diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2af096e2286a35d9f59ddf5acb7792efebac50b0 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# uvm_async_bench diff --git a/data.zip b/data.zip new file mode 100644 index 0000000000000000000000000000000000000000..76396b65bed954c7e8c4f590ec701c2d977bccfd --- /dev/null +++ b/data.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24dcfa78fe0a79ebf4b189d3b2c8449842f15a9f30b7b781c400c96ba525756a +size 8411674489 diff --git a/env.sh b/env.sh new file mode 100644 index 0000000000000000000000000000000000000000..1ad8b9402926303de8ff29c7c9f8221692b01379 --- /dev/null +++ b/env.sh @@ -0,0 +1 @@ +export UVMAsyncBench_BASE=$(pwd) diff --git a/workloads/common/Makefile b/workloads/common/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..ddd1437992e45fc080214db863d405b10abddde1 --- /dev/null +++ b/workloads/common/Makefile @@ -0,0 +1,12 @@ +include ./make.config + +NVCC = $(CUDA_DIR)/bin/nvcc +NVCC_FLAGS = -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) + +all: cpu_timestapms.o cupti_add.o + +cpu_timestapms.o: cpu_timestamps.cpp + $(NVCC) $(NVCC_FLAGS) -c cpu_timestamps.cpp + +cupti_add.o: cupti_add.cpp + $(NVCC) $(NVCC_FLAGS) -c cupti_add.cpp diff --git a/workloads/common/cpu_timestamps.cpp b/workloads/common/cpu_timestamps.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1bdab3a6039a30b06892da0737e88657141c1862 --- /dev/null +++ b/workloads/common/cpu_timestamps.cpp @@ -0,0 +1,38 @@ +#include "cpu_timestamps.h" + +void startCPU() { + struct timespec tv; + if(clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + startCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); +} + + + +void endCPU() { + struct timespec tv; + if(clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + + endCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); + //endCPUTimestamp1 = std::chrono::system_clock::now(); + printf("CPU_Times,%lu,%lu,%lu\n", startCPUTime, endCPUTime, endCPUTime-startCPUTime); + printf("Overlap_Times,%lu,%lu,%lu\n", overlapStartCPUTime, overlapEndCPUTime, overlapEndCPUTime - overlapStartCPUTime); +} + +void overlapStartCPU() +{ + struct timespec tv; + if (clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + overlapStartCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); +} + +void overlapEndCPU() +{ + struct timespec tv; + if (clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + + overlapEndCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); +} diff --git a/workloads/common/cpu_timestamps.h b/workloads/common/cpu_timestamps.h new file mode 100644 index 0000000000000000000000000000000000000000..aadcd19e384f06ae94e37afc8a0f78e0d96113ac --- /dev/null +++ b/workloads/common/cpu_timestamps.h @@ -0,0 +1,22 @@ +#ifndef CPU_TIMESTAMP_ +#define CPU_TIMESTAMP_ + +#include +#include +#include +#include +#include + +static uint64_t startCPUTime; +static uint64_t endCPUTime; + +static uint64_t overlapStartCPUTime = 0; +static uint64_t overlapEndCPUTime = 0; + +void startCPU(); +void endCPU(); + +void overlapStartCPU(); +void overlapEndCPU(); + +#endif diff --git a/workloads/common/cpu_timestamps.o b/workloads/common/cpu_timestamps.o new file mode 100644 index 0000000000000000000000000000000000000000..d614f30f54812a1192508b239b359a2f99fd12ff Binary files /dev/null and b/workloads/common/cpu_timestamps.o differ diff --git a/workloads/common/cupti_add.cpp b/workloads/common/cupti_add.cpp new file mode 100644 index 0000000000000000000000000000000000000000..91adcf48e628b36cbf0f97b760e8b5148557c7eb --- /dev/null +++ b/workloads/common/cupti_add.cpp @@ -0,0 +1,242 @@ +#include "cupti_add.h" + +static const char * +getMemcpyKindString(CUpti_ActivityMemcpyKind kind) +{ + switch (kind) + { + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOD: + return "HtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOH: + return "DtoH"; + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOA: + return "HtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOH: + return "AtoH"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOA: + return "AtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOD: + return "AtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOA: + return "DtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOD: + return "DtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOH: + return "HtoH"; + default: + break; + } + + return ""; +} + +static const char * +getUvmCounterKindString(CUpti_ActivityUnifiedMemoryCounterKind kind) +{ + switch (kind) + { + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD: + return "BYTES_TRANSFER_HTOD"; + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH: + return "BYTES_TRANSFER_DTOH"; + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_CPU_PAGE_FAULT_COUNT: + return "CPU_PAGE_FAULTS"; + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_GPU_PAGE_FAULT: + return "GPU_PAGE_FAULTS"; + default: + break; + } + return ""; +} + +static void +printActivity(CUpti_Activity *record) +{ + switch (record->kind) + { + case CUPTI_ACTIVITY_KIND_KERNEL: + { + int status; + CUpti_ActivityKernel4 *kernel = (CUpti_ActivityKernel4 *)record; + printf("KERNEL %s, %llu, %llu, %llu\n", + abi::__cxa_demangle(kernel->name, 0, 0, &status), + (unsigned long long)(kernel->start), + (unsigned long long)(kernel->end), + (unsigned long long)(kernel->end) - (kernel->start)); + break; + } + case CUPTI_ACTIVITY_KIND_RUNTIME: + { + CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; + const char *callback_name; + cuptiGetCallbackName(CUPTI_CB_DOMAIN_RUNTIME_API, api->cbid, &callback_name); + // printf("RUNTIME %s (cbid=%u) [ %llu - %llu ] process %u, thread %u, correlation %u\n", + // callback_name, api->cbid, + // (unsigned long long)(api->start - startTimestamp), + // (unsigned long long)(api->end - startTimestamp), + // api->processId, api->threadId, api->correlationId); + printf("RUNTIME %s (cbid=%u), %llu,%llu,%llu, process %u, thread %u, correlation %u\n", + callback_name, api->cbid, + (unsigned long long)(api->start), + (unsigned long long)(api->end), + (unsigned long long)(api->end - api->start), + api->processId, api->threadId, api->correlationId); + break; + } + case CUPTI_ACTIVITY_KIND_MEMCPY: + { + CUpti_ActivityMemcpy4 *memcpy = (CUpti_ActivityMemcpy4 *)record; + printf("MEMCPY %s, size %llu, %llu, %llu, %llu\n", + getMemcpyKindString((CUpti_ActivityMemcpyKind)memcpy->copyKind), + (unsigned long long)memcpy->bytes, + (unsigned long long)(memcpy->start), + (unsigned long long)(memcpy->end), + (unsigned long long)(memcpy->end) - (memcpy->start)); + break; + } + case CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER: + { + CUpti_ActivityUnifiedMemoryCounter2 *uvm = (CUpti_ActivityUnifiedMemoryCounter2 *)record; + printf("UVM MEMCPY %s, size %llu, %llu, %llu, %llu \n", + getUvmCounterKindString(uvm->counterKind), + (unsigned long long)uvm->value, + (unsigned long long)(uvm->start), + (unsigned long long)(uvm->end), + (unsigned long long)(uvm->end - uvm->start)); + break; + } + } +} + +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) +{ + uint8_t *bfr = (uint8_t *)malloc(BUF_SIZE + ALIGN_SIZE); + if (bfr == NULL) + { + printf("Error: out of memory\n"); + exit(-1); + } + + *size = BUF_SIZE; + *buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE); + *maxNumRecords = 0; +} + +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) +{ + CUptiResult status; + CUpti_Activity *record = NULL; + if (validSize > 0) + { + do + { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if (status == CUPTI_SUCCESS) + { + printActivity(record); + } + else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) + break; + else + { + CUPTI_CALL(status); + } + } while (1); + + // report any records dropped from the queue + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if (dropped != 0) + { + printf("Dropped %u activity records\n", (unsigned int)dropped); + } + } + + free(buffer); +} + +// void initTrace() { +// return; +// } + +// void finiTrace() { +// return; +// } + + +void initTrace() +{ + size_t attrValue = 0, attrValueSize = sizeof(size_t); + + CUpti_ActivityUnifiedMemoryCounterConfig config[2]; + + // configure unified memory counters + config[0].scope = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE; + config[0].kind = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD; + config[0].deviceId = 0; + config[0].enable = 1; + + config[1].scope = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE; + config[1].kind = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH; + config[1].deviceId = 0; + config[1].enable = 1; + + CUptiResult res = cuptiActivityConfigureUnifiedMemoryCounter(config, 2); + if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED) + { + printf("Test is waived, unified memory is not supported on the underlying platform.\n"); + } + else if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE) + { + printf("Test is waived, unified memory is not supported on the device.\n"); + } + else if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES) + { + printf("Test is waived, unified memory is not supported on the non-P2P multi-gpu setup.\n"); + } + else + { + CUPTI_CALL(res); + } + + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER)); + // CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_COUNT)); + + // CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_GPU_PAGE_FAULT + // CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_CPU_PAGE_FAULT_COUNT + + // Register callbacks for buffer requests and for buffers completed by CUPTI. + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + + // Optionally get and set activity attributes. + // Attributes can be set by the CUPTI client to change behavior of the activity API. + // Some attributes require to be set before any CUDA context is created to be effective, + // e.g. to be applied to all device buffer allocations (see documentation). + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + printf("%s = %llu B\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); +} + +void finiTrace() +{ + // Force flush any remaining activity buffers before termination of the application + CUPTI_CALL(cuptiActivityFlushAll(1)); +} + +void GPU_argv_init() { + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, GPU_DEVICE); + printf("setting device %d with name %s\n", GPU_DEVICE, deviceProp.name); + cudaSetDevice(GPU_DEVICE); +} diff --git a/workloads/common/cupti_add.h b/workloads/common/cupti_add.h new file mode 100644 index 0000000000000000000000000000000000000000..747b6e9c9d1f58e7a36cc59064e88894e4235db1 --- /dev/null +++ b/workloads/common/cupti_add.h @@ -0,0 +1,56 @@ +#include +#include +#include +#include + +#define GPU_DEVICE 7 + +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) \ + (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer)) + +static uint64_t startTimestamp; +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +#define CUPTI_CALL(call) \ + do { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + if(_status == CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED) \ + exit(0); \ + else \ + exit(-1); \ + } \ + } while (0) + + +extern inline __attribute__((always_inline)) unsigned long rdtsc() { + unsigned long a, d; + + __asm__ volatile("rdtsc" : "=a"(a), "=d"(d)); + + return (a | (d << 32)); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsp() { + struct timespec tms; + if (clock_gettime(CLOCK_REALTIME, &tms)) { + return -1; + } + unsigned long ns = tms.tv_sec * 1000000000; + ns += tms.tv_nsec; + return ns; +} + +void initTrace(); +void finiTrace(); +void GPU_argv_init(); +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords); +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); +static void printActivity(CUpti_Activity *record); diff --git a/workloads/common/cupti_add.o b/workloads/common/cupti_add.o new file mode 100644 index 0000000000000000000000000000000000000000..de875b500fe4665bab4a8db29ad60725550b8cb9 Binary files /dev/null and b/workloads/common/cupti_add.o differ diff --git a/workloads/common/make.config b/workloads/common/make.config new file mode 100755 index 0000000000000000000000000000000000000000..31d9ff048764881f4efcc21e33e4f3d55bf096d4 --- /dev/null +++ b/workloads/common/make.config @@ -0,0 +1,10 @@ +CUDA_DIR = /apps/cuda-11.4 + +COMPUTE = compute_80 #61 +SM_ARCH = sm_80 #61 + +CUDA_LIB_DIR := $(CUDA_DIR)/lib64 +CUPTI_LIB_DIR := $(CUDA_DIR)/extras/CUPTI/lib64/ +CUPTI_INCLUDE := $(CUDA_DIR)/extras/CUPTI/include/ + +CUPTI_ADD_COMMON = $(UVMAsyncBench_BASE)/workloads/common/ diff --git a/workloads/micro/async/2DCONV/2DConvolution.cu b/workloads/micro/async/2DCONV/2DConvolution.cu new file mode 100644 index 0000000000000000000000000000000000000000..acedbbf6cdcd7cd4d50171d080053fd55e6767de --- /dev/null +++ b/workloads/micro/async/2DCONV/2DConvolution.cu @@ -0,0 +1,392 @@ +/** + * 2DConvolution.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +#define NBLOCKS 32 +#define BATCH_SIZE 4 + +uint64_t NI; +uint64_t NJ; +uint64_t nblocks; + + +/* Thread block dimensions */ +#define KERNEL 3 +#define DIM_THREAD_BLOCK 8 + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; + +void conv2D(DATA_TYPE* A, DATA_TYPE* B) +{ + uint64_t i, j; + DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + c11 = +0.2; c21 = +0.5; c31 = -0.8; + c12 = -0.3; c22 = +0.6; c32 = -0.9; + c13 = +0.4; c23 = +0.7; c33 = +0.10; + + for (i = 1; i < NI - 1; ++i) // 0 + { + for (j = 1; j < NJ - 1; ++j) // 1 + { + B[i*NJ + j] = c11 * A[(i - 1)*NJ + (j - 1)] + c12 * A[(i + 0)*NJ + (j - 1)] + c13 * A[(i + 1)*NJ + (j - 1)] + + c21 * A[(i - 1)*NJ + (j + 0)] + c22 * A[(i + 0)*NJ + (j + 0)] + c23 * A[(i + 1)*NJ + (j + 0)] + + c31 * A[(i - 1)*NJ + (j + 1)] + c32 * A[(i + 0)*NJ + (j + 1)] + c33 * A[(i + 1)*NJ + (j + 1)]; + } + } +} + + +void initGPU(DATA_TYPE* A_gpu) +{ + uint64_t i, j; + + for (i = 0; i < NI; ++i) { + for (j = 0; j < NJ; ++j) { + A_gpu[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } +} + +void initCPU(DATA_TYPE* A) +{ + uint64_t i, j; + + for (i = 0; i < NI; ++i) { + for (j = 0; j < NJ; ++j) { + A[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } +} + + +void compareResults(DATA_TYPE* B, DATA_TYPE* B_outputFromGpu) +{ + uint64_t i, j, fail; + fail = 0; + + // Compare a and b + for (i=1; i < (NI-1); i++) + { + for (j=1; j < (NJ-1); j++) + { + if (percentDiff(B[i*NJ + j], B_outputFromGpu[i*NJ + j]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, CPU is %f, GPU is %f.\n", i, j, B[i * NJ + j], B_outputFromGpu[i * NJ + j]); + fail++; + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); + +} + +__global__ void Convolution2D_kernel(DATA_TYPE *A, DATA_TYPE *B, uint64_t NI, uint64_t NJ, uint64_t block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + uint64_t tile_dim_x = (NJ + DIM_THREAD_BLOCK - 1) / (DIM_THREAD_BLOCK * BATCH_SIZE); + + __shared__ DATA_TYPE tmp_A[PREFETCH_COUNT][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1]; + __shared__ DATA_TYPE tmp_B[DIM_THREAD_BLOCK * BATCH_SIZE][DIM_THREAD_BLOCK * BATCH_SIZE]; + + uint64_t total_tiles = tile_dim_x * tile_dim_x; + + uint64_t tiles_this_block_x = (block_size / (DIM_THREAD_BLOCK * BATCH_SIZE)); + uint64_t tiles_this_block = tiles_this_block_x * tiles_this_block_x; + + // DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + // c11 = +0.2; c21 = +0.5; c31 = -0.8; + // c12 = -0.3; c22 = +0.6; c32 = -0.9; + // c13 = +0.4; c23 = +0.7; c33 = +0.10; + + DATA_TYPE c[KERNEL][KERNEL]; + + c[0][0] = +0.2; + c[1][0] = +0.5; + c[2][0] = -0.8; + c[0][1] = -0.3; + c[1][1] = +0.6; + c[2][1] = -0.9; + c[0][2] = +0.4; + c[1][2] = +0.7; + c[2][2] = +0.10; + + uint64_t base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + uint64_t fetch = base_tile; + uint64_t end_tile = fetch + tiles_this_block; + + for (uint64_t compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + // block id + uint64_t offset = fetch - base_tile; + uint64_t block_id = fetch / tiles_this_block; + uint64_t bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + uint64_t by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + uint64_t batch_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + // thread id + uint64_t tx = threadIdx.x; + uint64_t ty = threadIdx.y; + + uint64_t index_A_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty; + uint64_t index_A_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx; + + uint64_t index_A_y_start = DIM_THREAD_BLOCK * BATCH_SIZE * by; + uint64_t index_A_x_start = DIM_THREAD_BLOCK * BATCH_SIZE * bx; + + uint64_t index_A_y_bound = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * DIM_THREAD_BLOCK; + uint64_t index_A_x_bound = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * DIM_THREAD_BLOCK; + + // fetch A + for (uint64_t i = 0; i < BATCH_SIZE; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE; j++) + { + if ((index_A_y + i) < NI && (index_A_x + j) < NJ) + { + memcpy_async(tmp_A[fetch % PREFETCH_COUNT][ty * BATCH_SIZE + i][tx * BATCH_SIZE + j], A[(index_A_y + i) * NJ + index_A_x + j], pipe); + tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] = 0; + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < KERNEL - 1; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; j++) + { + if ((index_A_y_bound + i) < NI && (index_A_x_start + j) < NJ) + { + memcpy_async(tmp_A[fetch % PREFETCH_COUNT][DIM_THREAD_BLOCK * BATCH_SIZE + i][j], A[(index_A_y_bound + i) * NJ + index_A_x_start + j], pipe); + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; i++) + { + for (uint64_t j = 0; j < KERNEL - 1; j++) + { + if ((index_A_y_start + i) < NI && (index_A_x_bound + j) < NJ) + { + memcpy_async(tmp_A[fetch % PREFETCH_COUNT][i][DIM_THREAD_BLOCK * BATCH_SIZE + j], A[(index_A_y_start + i) * NJ + index_A_x_bound + j], pipe); + } + } + } + pipe.commit(); + } + if (fetch == end_tile) + { + for (uint64_t i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + // block id + uint64_t offset = compute - base_tile; + uint64_t block_id = compute / tiles_this_block; + uint64_t bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + uint64_t by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + // thread id + uint64_t tx = threadIdx.x; + uint64_t ty = threadIdx.y; + + uint64_t index_B_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty + 1; + uint64_t index_B_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx + 1; + + // Computation + for (uint64_t i = 0; i < BATCH_SIZE; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE; j++) + { + tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] = 0; + } + } + block.sync(); + + for (uint64_t i = 0; i < BATCH_SIZE; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE; j++) + { + for (uint64_t m = 0; m < KERNEL; m++) + { + for (uint64_t n = 0; n < KERNEL; n++) + { + tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] += tmp_A[compute % PREFETCH_COUNT][ty * BATCH_SIZE + i + m][tx * BATCH_SIZE + j + n] * c[n][m]; + } + } + } + } + block.sync(); + + // Store B + for (uint64_t i = 0; i < BATCH_SIZE; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE; j++) + { + if ((index_B_y + i) < NI && (index_B_x + j) < NJ) + { + B[(index_B_y + i) * NJ + index_B_x + j] = tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j]; + } + } + } + block.sync(); + } +} + +void convolution2DCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu) +{ + double t_start, t_end; + + uint64_t output_width = NI - KERNEL + 1; + uint64_t output_height = NJ - KERNEL + 1; + + dim3 block(DIM_THREAD_BLOCK, DIM_THREAD_BLOCK); + dim3 grid(nblocks, nblocks); + + uint64_t block_size = (NJ + (nblocks - 1)) / nblocks; + + // t_start = rtclock(); + + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyHostToDevice); + Convolution2D_kernel<<>>(A_gpu, B_gpu, NI, NJ, block_size); + cudaDeviceSynchronize(); + cudaMemcpy(B, B_gpu, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyDeviceToHost); + + // t_end = rtclock(); + // fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start);//); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + if (argc >= 4) { + NI = atoll(argv[1]); + NJ = atoll(argv[2]); + nblocks = atoi(argv[3]); + } else { + NI = SIZE; + NJ = SIZE; + nblocks = NBLOCKS; + } + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE *B_ref; + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + + A = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + B_ref = (DATA_TYPE *)malloc(NI * NJ * sizeof(DATA_TYPE)); + initCPU(A); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI * NJ); + cudaMalloc(&B_gpu, sizeof(DATA_TYPE) * NI * NJ); + // B_outputFromGpu = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + + convolution2DCuda(A, B, A_gpu, B_gpu); + + cudaFree(A_gpu); + cudaFree(B_gpu); + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // conv2D(A, B_ref); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(B, B_ref); + free(A); + free(B); + + return 0; +} diff --git a/workloads/micro/async/2DCONV/Makefile b/workloads/micro/async/2DCONV/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f97459c40d8409dec80056ca77208c75e9d0b5e8 --- /dev/null +++ b/workloads/micro/async/2DCONV/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := 2DConvolution +CUFILES := 2DConvolution.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o 2DConvolution diff --git a/workloads/micro/async/2DCONV/run.sh b/workloads/micro/async/2DCONV/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..c4d26406f477f75d294497d89fc0d88c529f475b --- /dev/null +++ b/workloads/micro/async/2DCONV/run.sh @@ -0,0 +1,2 @@ +# ./2DConvolution 16384 16384 32 +./2DConvolution 32768 32768 32 \ No newline at end of file diff --git a/workloads/micro/async/2DCONV/run_large.sh b/workloads/micro/async/2DCONV/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..6e4e06894b252b1d547f335deef142ab01c98df9 --- /dev/null +++ b/workloads/micro/async/2DCONV/run_large.sh @@ -0,0 +1 @@ +./2DConvolution 8192 8192 32 diff --git a/workloads/micro/async/2DCONV/run_medium.sh b/workloads/micro/async/2DCONV/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..c246aa18fc41de6e48e7d9ab67f1bbf1925afff6 --- /dev/null +++ b/workloads/micro/async/2DCONV/run_medium.sh @@ -0,0 +1 @@ +./2DConvolution 4096 4096 32 diff --git a/workloads/micro/async/2DCONV/run_mega.sh b/workloads/micro/async/2DCONV/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..aa390557108b8621ec9ef8ac80f0a8f085161cce --- /dev/null +++ b/workloads/micro/async/2DCONV/run_mega.sh @@ -0,0 +1 @@ +./2DConvolution 65536 65536 32 diff --git a/workloads/micro/async/2DCONV/run_small.sh b/workloads/micro/async/2DCONV/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..c4b192f75d30e834237d39cfc15c8c57bade3c0e --- /dev/null +++ b/workloads/micro/async/2DCONV/run_small.sh @@ -0,0 +1 @@ +./2DConvolution 1024 1024 8 diff --git a/workloads/micro/async/2DCONV/run_super.sh b/workloads/micro/async/2DCONV/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..8a0981739ace39a1104aa069d6a6f0dfa38fd5c9 --- /dev/null +++ b/workloads/micro/async/2DCONV/run_super.sh @@ -0,0 +1 @@ +./2DConvolution 32768 32768 32 diff --git a/workloads/micro/async/2DCONV/run_tiny.sh b/workloads/micro/async/2DCONV/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..d7c49df52c8e594edae710ad71032e912ccd7892 --- /dev/null +++ b/workloads/micro/async/2DCONV/run_tiny.sh @@ -0,0 +1 @@ +./2DConvolution 512 512 4 diff --git a/workloads/micro/async/3DCONV/3DConvolution.cu b/workloads/micro/async/3DCONV/3DConvolution.cu new file mode 100644 index 0000000000000000000000000000000000000000..4b62e03e782955117e573d54749fcb750a3e3820 --- /dev/null +++ b/workloads/micro/async/3DCONV/3DConvolution.cu @@ -0,0 +1,449 @@ +/** + * 3DConvolution.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +// define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +#define NBLOCKS 2 +#define BATCH_SIZE 3 + +uint64_t NI; +uint64_t NJ; +uint64_t NK; +uint64_t nblocks; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 4 + +#define KERNEL 3 + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; + + + +void conv3D(DATA_TYPE* A, DATA_TYPE* B) +{ + uint64_t i, j, k; + DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + c11 = +2; c21 = +5; c31 = -8; + c12 = -3; c22 = +6; c32 = -9; + c13 = +4; c23 = +7; c33 = +10; + + for (i = 1; i < NI - 1; ++i) // 0 + { + for (j = 1; j < NJ - 1; ++j) // 1 + { + for (k = 1; k < NK -1; ++k) // 2 + { + B[i*(NK * NJ) + j*NK + k] = c11 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + c13 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + + c21 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + c23 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + + c31 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + c33 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + + c12 * A[(i + 0)*(NK * NJ) + (j - 1)*NK + (k + 0)] + c22 * A[(i + 0)*(NK * NJ) + (j + 0)*NK + (k + 0)] + + c32 * A[(i + 0)*(NK * NJ) + (j + 1)*NK + (k + 0)] + c11 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k + 1)] + + c13 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k + 1)] + c21 * A[(i - 1)*(NK * NJ) + (j + 0)*NK + (k + 1)] + + c23 * A[(i + 1)*(NK * NJ) + (j + 0)*NK + (k + 1)] + c31 * A[(i - 1)*(NK * NJ) + (j + 1)*NK + (k + 1)] + + c33 * A[(i + 1)*(NK * NJ) + (j + 1)*NK + (k + 1)]; + } + } + } +} + +void initGPU(DATA_TYPE *A_gpu) +{ + uint64_t i, j, k; + + for (i = 0; i < NI; ++i) + { + for (j = 0; j < NJ; ++j) + { + for (k = 0; k < NK; ++k) + { + A_gpu[i * (NK * NJ) + j * NK + k] = i % 12 + 2 * (j % 7) + 3 * (k % 13); + } + } + } +} + +void initCPU(DATA_TYPE *A) +{ + uint64_t i, j, k; + + for (i = 0; i < NI; ++i) + { + for (j = 0; j < NJ; ++j) + { + for (k = 0; k < NK; ++k) + { + A[i*(NK * NJ) + j*NK + k] = i % 12 + 2 * (j % 7) + 3 * (k % 13); + } + } + } +} + + +void compareResults(DATA_TYPE* B, DATA_TYPE* B_outputFromGpu) +{ + uint64_t i, j, k, fail; + fail = 0; + + // Compare result from cpu and gpu... + for (i = 1; i < NI - 1; ++i) // 0 + { + for (j = 1; j < NJ - 1; ++j) // 1 + { + for (k = 1; k < NK - 1; ++k) // 2 + { + if (percentDiff(B[i*(NK * NJ) + j*NK + k], B_outputFromGpu[i*(NK * NJ) + j*NK + k]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, %d, CPU is %f, GPU is %f.\n", i, j, k, B[i * (NK * NJ) + j * NK + k], B_outputFromGpu[i * (NK * NJ) + j * NK + k]); + fail++; + } + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void convolution3D_kernel(DATA_TYPE *A, DATA_TYPE *B, uint64_t NI, uint64_t NJ, uint64_t NK, uint64_t block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + c11 = +2; + c21 = +5; + c31 = -8; + c12 = -3; + c22 = +6; + c32 = -9; + c13 = +4; + c23 = +7; + c33 = +10; + + uint64_t tile_dim_x = (NJ + DIM_THREAD_BLOCK - 1) / (DIM_THREAD_BLOCK * BATCH_SIZE); + + __shared__ DATA_TYPE tmp_A[PREFETCH_COUNT][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1]; + __shared__ DATA_TYPE tmp_B[DIM_THREAD_BLOCK * BATCH_SIZE][DIM_THREAD_BLOCK * BATCH_SIZE][DIM_THREAD_BLOCK * BATCH_SIZE]; + + // uint64_t total_tiles = tile_dim_x * tile_dim_x * tile_dim_x; + + uint64_t tiles_this_block_x = (block_size / (DIM_THREAD_BLOCK * BATCH_SIZE)); + uint64_t tiles_this_block = tiles_this_block_x * tiles_this_block_x * tiles_this_block_x; + + uint64_t base_tile = (blockIdx.z * gridDim.y * gridDim.x + blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + uint64_t fetch = base_tile; + uint64_t end_tile = fetch + tiles_this_block; + + // printf("block_size is %d, tile_dim_x is %d, tiles_this_block_x is %d.\n", block_size, tile_dim_x, tiles_this_block_x); + + for (uint64_t compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + // block id + uint64_t offset = fetch - base_tile; + uint64_t block_id = fetch / tiles_this_block; + + uint64_t bz = block_id / (gridDim.y * gridDim.x) * tiles_this_block_x + offset / (tiles_this_block_x * tiles_this_block_x); + uint64_t by = block_id % (gridDim.y * gridDim.x) / gridDim.x * tiles_this_block_x + offset % (tiles_this_block_x * tiles_this_block_x) / tiles_this_block_x; + uint64_t bx = block_id % (gridDim.y * gridDim.x) % gridDim.x * tiles_this_block_x + offset % (tiles_this_block_x * tiles_this_block_x) % tiles_this_block_x; + + // thread id + uint64_t tx = threadIdx.x; + uint64_t ty = threadIdx.y; + uint64_t tz = threadIdx.z; + + uint64_t index_A_z = DIM_THREAD_BLOCK * BATCH_SIZE * bz + BATCH_SIZE * tz; + uint64_t index_A_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty; + uint64_t index_A_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx; + + uint64_t index_A_z_start = DIM_THREAD_BLOCK * BATCH_SIZE * bz; + uint64_t index_A_y_start = DIM_THREAD_BLOCK * BATCH_SIZE * by; + uint64_t index_A_x_start = DIM_THREAD_BLOCK * BATCH_SIZE * bx; + + uint64_t index_A_z_bound = DIM_THREAD_BLOCK * BATCH_SIZE * bz + BATCH_SIZE * DIM_THREAD_BLOCK; + uint64_t index_A_y_bound = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * DIM_THREAD_BLOCK; + uint64_t index_A_x_bound = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * DIM_THREAD_BLOCK; + + // fetch A + for (uint64_t i = 0; i < BATCH_SIZE; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE; j++) + { + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + if ((index_A_z + i) < NI && (index_A_y + j) < NJ && (index_A_x + k) < NK) + { + memcpy_async(tmp_A[fetch % PREFETCH_COUNT][tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k], A[(index_A_z + i) * NJ * NK + (index_A_y + j) * NK + index_A_x + k], pipe); + } + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < KERNEL - 1; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; j++) + { + for (uint64_t k = 0; k < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; k++) + { + if ((index_A_z_bound + i) < NI && (index_A_y_start + j) < NJ && (index_A_x_start + k) < NK) + { + memcpy_async(tmp_A[fetch % PREFETCH_COUNT][DIM_THREAD_BLOCK * BATCH_SIZE + i][j][k], A[(index_A_z_bound + i) * NJ * NK + (index_A_y_start + j) * NK + index_A_x_start + k], pipe); + } + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; i++) + { + for (uint64_t j = 0; j < KERNEL - 1; j++) + { + for (uint64_t k = 0; k < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; k++) + { + if ((index_A_z_start + i) < NI && (index_A_y_bound + j) < NJ && (index_A_x_start + k) < NK) + { + memcpy_async(tmp_A[fetch % PREFETCH_COUNT][i][DIM_THREAD_BLOCK * BATCH_SIZE + j][k], A[(index_A_z_start + i) * NJ * NK + (index_A_y_bound + j) * NK + index_A_x_start + k], pipe); + } + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; j++) + { + for (uint64_t k = 0; k < KERNEL - 1; k++) + { + if ((index_A_z_start + i) < NI && (index_A_y_start + j) < NJ && (index_A_x_bound + k) < NK) + { + memcpy_async(tmp_A[fetch % PREFETCH_COUNT][i][j][DIM_THREAD_BLOCK * BATCH_SIZE + k], A[(index_A_z_start + i) * NJ * NK + (index_A_y_start + j) * NK + index_A_x_bound + k], pipe); + } + } + } + } + pipe.commit(); + } + if (fetch == end_tile) + { + for (uint64_t i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + // block id + uint64_t offset = compute - base_tile; + uint64_t block_id = compute / tiles_this_block; + + uint64_t bz = block_id / (gridDim.y * gridDim.x) * tiles_this_block_x + offset / (tiles_this_block_x * tiles_this_block_x); + uint64_t by = block_id % (gridDim.y * gridDim.x) / gridDim.x * tiles_this_block_x + offset % (tiles_this_block_x * tiles_this_block_x) / tiles_this_block_x; + uint64_t bx = block_id % (gridDim.y * gridDim.x) % gridDim.x * tiles_this_block_x + offset % (tiles_this_block_x * tiles_this_block_x) % tiles_this_block_x; + + // thread id + uint64_t tx = threadIdx.x; + uint64_t ty = threadIdx.y; + uint64_t tz = threadIdx.z; + + uint64_t index_B_z = DIM_THREAD_BLOCK * BATCH_SIZE * bz + BATCH_SIZE * tz + 1; + uint64_t index_B_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty + 1; + uint64_t index_B_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx + 1; + + + // Computation + for (uint64_t i = 0; i < BATCH_SIZE; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE; j++) + { + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + tmp_B[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] = 0; + } + } + } + block.sync(); + + for (uint64_t i = 0; i < BATCH_SIZE; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE; j++) + { + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + tmp_B[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] = + c11 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c13 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c21 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c23 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c31 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c33 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c12 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 1][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k + 1] + c22 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 1][ty * BATCH_SIZE + j + 1][tx * BATCH_SIZE + k + 1] + c32 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 1][ty * BATCH_SIZE + j + 2][tx * BATCH_SIZE + k + 1] + c11 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k + 2] + c13 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k + 2] + c21 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i][ty * BATCH_SIZE + j + 1][tx * BATCH_SIZE + k + 2] + c23 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j + 1][tx * BATCH_SIZE + k + 2] + c31 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i][ty * BATCH_SIZE + j + 2][tx * BATCH_SIZE + k + 2] + c33 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j + 2][tx * BATCH_SIZE + k + 2]; + } + } + } + block.sync(); + + // Store B + for (uint64_t i = 0; i < BATCH_SIZE; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE; j++) + { + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + if ((index_B_z + i + 1) < NI && (index_B_y + j + 1) < NJ && (index_B_x + k + 1) < NK) + { + B[(index_B_z + i) * NJ * NK + (index_B_y + j) * NK + index_B_x + k] = tmp_B[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k]; + } + } + } + } + block.sync(); + } +} + + + +void convolution3DCuda(DATA_TYPE* A, DATA_TYPE* B, DATA_TYPE* A_gpu, DATA_TYPE* B_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK, DIM_THREAD_BLOCK, DIM_THREAD_BLOCK); + dim3 grid(nblocks, nblocks, nblocks); + + uint64_t block_size = (NI + (nblocks - 1)) / nblocks; + + // t_start = rtclock(); + + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI * NJ * NK, cudaMemcpyHostToDevice); + convolution3D_kernel<<>>(A_gpu, B_gpu, NI, NJ, NK, block_size); + cudaDeviceSynchronize(); + cudaMemcpy(B, B_gpu, sizeof(DATA_TYPE) * NI * NJ * NK, cudaMemcpyDeviceToHost); + + // t_end = rtclock(); + // fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); + +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + if (argc >= 5) { + NI = atoll(argv[1]); + NJ = atoll(argv[2]); + NK = atoll(argv[3]); + nblocks = atoi(argv[4]); + } else { + NI = SIZE; + NJ = SIZE; + NK = SIZE; + nblocks = NBLOCKS; + } + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE *B_ref; + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + + A = (DATA_TYPE*)malloc(NI*NJ*NK*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NI*NJ*NK*sizeof(DATA_TYPE)); + B_ref = (DATA_TYPE*)malloc(NI*NJ*NK*sizeof(DATA_TYPE)); + initCPU(A); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI * NJ * NK); + cudaMalloc(&B_gpu, sizeof(DATA_TYPE) * NI * NJ * NK); + + // initGPU(A_gpu); + + convolution3DCuda(A, B, A_gpu, B_gpu); + + cudaFree(A_gpu); + cudaFree(B_gpu); + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // conv3D(A, B_ref); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(B, B_ref); + free(A); + free(B); + + return 0; +} diff --git a/workloads/micro/async/3DCONV/Makefile b/workloads/micro/async/3DCONV/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..72aac9cb83cca03110da33f8da9119f32af90ccd --- /dev/null +++ b/workloads/micro/async/3DCONV/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := 3DConvolution +CUFILES := 3DConvolution.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o 3DConvolution diff --git a/workloads/micro/async/3DCONV/run.sh b/workloads/micro/async/3DCONV/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..0c27d78b8d3484896bd6812043b5466b074cbebf --- /dev/null +++ b/workloads/micro/async/3DCONV/run.sh @@ -0,0 +1,2 @@ +#./3DConvolution 768 768 768 8 +./3DConvolution 1536 1536 1536 8 diff --git a/workloads/micro/async/3DCONV/run_large.sh b/workloads/micro/async/3DCONV/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..500c363302024d6080c025784efbe2e7fef74f53 --- /dev/null +++ b/workloads/micro/async/3DCONV/run_large.sh @@ -0,0 +1 @@ +./3DConvolution 384 384 384 8 diff --git a/workloads/micro/async/3DCONV/run_medium.sh b/workloads/micro/async/3DCONV/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..d0a9fb120b6de1c16ae4146d7684c5557af95152 --- /dev/null +++ b/workloads/micro/async/3DCONV/run_medium.sh @@ -0,0 +1 @@ +./3DConvolution 192 192 192 4 diff --git a/workloads/micro/async/3DCONV/run_mega.sh b/workloads/micro/async/3DCONV/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..4e21c66c3d1cf21bfe88427d92c54ff8be428d8a --- /dev/null +++ b/workloads/micro/async/3DCONV/run_mega.sh @@ -0,0 +1 @@ +./3DConvolution 1536 1536 1536 8 diff --git a/workloads/micro/async/3DCONV/run_small.sh b/workloads/micro/async/3DCONV/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..f794eec58ed56174c5d02096a9bf5acc4e948d0f --- /dev/null +++ b/workloads/micro/async/3DCONV/run_small.sh @@ -0,0 +1 @@ +./3DConvolution 96 96 96 4 diff --git a/workloads/micro/async/3DCONV/run_super.sh b/workloads/micro/async/3DCONV/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..11f8b43d3e406c466b824420d34991a5c0f876b4 --- /dev/null +++ b/workloads/micro/async/3DCONV/run_super.sh @@ -0,0 +1 @@ +./3DConvolution 768 768 768 8 diff --git a/workloads/micro/async/3DCONV/run_tiny.sh b/workloads/micro/async/3DCONV/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..791c84bc54088ea65fc7612ee94442bbbc615cc3 --- /dev/null +++ b/workloads/micro/async/3DCONV/run_tiny.sh @@ -0,0 +1 @@ +./3DConvolution 48 48 48 2 diff --git a/workloads/micro/async/gemm/Makefile b/workloads/micro/async/gemm/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d545b0aab3ef63260888227abd6ab99bcaddbff3 --- /dev/null +++ b/workloads/micro/async/gemm/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := gemm +CUFILES := gemm.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o gemm diff --git a/workloads/micro/async/gemm/gemm.cu b/workloads/micro/async/gemm/gemm.cu new file mode 100644 index 0000000000000000000000000000000000000000..c71dbd6b1bafbd3473bcc70886b4f81552d7a108 --- /dev/null +++ b/workloads/micro/async/gemm/gemm.cu @@ -0,0 +1,277 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +uint64_t NI; +uint64_t NJ; +uint64_t NK; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK_X 32 +#define DIM_THREAD_BLOCK_Y 32 + + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void gemm(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i,j,k; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i*NJ + j] *= BETA; + for (k = 0; k < NK; ++k) { + C[i*NJ + j] += ALPHA * A[i*NK + k] * B[k*NJ + j]; + } + } + } +} + +void init(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *C_ref) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) + for (j = 0; j < NK; j++) + A[i*NK + j] = ((DATA_TYPE) i*j) / NI; + + for (i = 0; i < NK; i++) + for (j = 0; j < NJ; j++) + B[i*NJ + j] = ((DATA_TYPE) i*j + 1) / NJ; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + C_ref[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + } + } + +} + + +void compareResults(DATA_TYPE* C, DATA_TYPE* C_outputFromGpu) +{ + uint64_t i, j, fail; + fail = 0; + + // Compare C1 and C2 + for (i=0; i < NI; i++) + { + for (j=0; j < NJ; j++) + { + // printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + if (percentDiff(C[i*NJ + j], C_outputFromGpu[i*NJ + j]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + fail++; + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void gemm_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, uint64_t NI, uint64_t NK, uint64_t NJ) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + + uint64_t row = blockIdx.y * blockDim.y + threadIdx.y; + uint64_t col = blockIdx.x * blockDim.x + threadIdx.x; + + __shared__ DATA_TYPE s_a[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y * PREFETCH_COUNT]; + __shared__ DATA_TYPE s_b[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y * PREFETCH_COUNT]; + + DATA_TYPE tmp = BETA * c[row * NJ + col]; + + uint64_t base_tiles = 0; + uint64_t end_tile = base_tiles + NK / blockDim.x; + + uint64_t fetch = base_tiles; + uint64_t tile_size = DIM_THREAD_BLOCK_X; + uint64_t mem_size = DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y; + + for (uint64_t compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + memcpy_async(s_a[(fetch % PREFETCH_COUNT) * mem_size + (threadIdx.y * blockDim.x + threadIdx.x)], a[row * NK + fetch * tile_size + threadIdx.x], pipe); + memcpy_async(s_b[(fetch % PREFETCH_COUNT) * mem_size + (threadIdx.y * blockDim.x + threadIdx.x)], b[(fetch * tile_size + threadIdx.y) * NJ + col], pipe); + + pipe.commit(); + } + if (fetch == end_tile) { + for (uint64_t i = 0; i < PREFETCH_COUNT-1; ++i) { pipe.commit(); } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + for (uint64_t k = 0; k < blockDim.x; k++) + { + tmp += ALPHA * s_a[(compute % PREFETCH_COUNT) * mem_size + (threadIdx.y * blockDim.x + k)] * s_b[(compute % PREFETCH_COUNT) * mem_size + (k * blockDim.x + threadIdx.x)]; + } + block.sync(); + } + + c[row * NJ + col] = tmp; +} + +void gemmCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + dim3 grid((size_t)(ceil( ((float)NI)/ ((float)block.x) )),(size_t)(ceil( ((float)NJ)/ ((float)block.y) ))); + + //t_start = rtclock(); + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI * NK, cudaMemcpyHostToDevice); + cudaMemcpy(B_gpu, B, sizeof(DATA_TYPE) * NK * NJ, cudaMemcpyHostToDevice); + cudaMemcpy(C_gpu, C, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyHostToDevice); + gemm_kernel<<< grid, block >>>(A_gpu, B_gpu, C_gpu, NI, NK, NJ); + cudaDeviceSynchronize(); + cudaMemcpy(C, C_gpu, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyDeviceToHost); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + if (argc >= 4) { + NI = atoll(argv[1]); + NK = atoll(argv[2]); + NJ = atoll(argv[3]); + } else { + NI = SIZE; + NK = SIZE; + NJ = SIZE; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE* C; + DATA_TYPE *C_ref; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + DATA_TYPE *C_gpu; + + A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE)); + C = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + C_ref = (DATA_TYPE *)malloc(NI * NJ * sizeof(DATA_TYPE)); + + //cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + //cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + //cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + init(A, B, C, C_ref); + + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + cudaMalloc(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + cudaMalloc(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + gemmCuda(A, B, C, A_gpu, B_gpu, C_gpu); + + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // gemm(A, B, C_ref); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(C, C_ref); + free(A); + free(B); + free(C); + free(C_ref); + return 0; +} + diff --git a/workloads/micro/async/gemm/run.sh b/workloads/micro/async/gemm/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..d234537561c276f291434f42a7337bc5d5a61605 --- /dev/null +++ b/workloads/micro/async/gemm/run.sh @@ -0,0 +1,3 @@ +#./gemm 1024 1024 1024 +./gemm 32768 32768 32768 +#./gemm 512 512 512 diff --git a/workloads/micro/async/gemm/run_large.sh b/workloads/micro/async/gemm/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..cc1dbe192ecafb7558b76cfa4d2ce05a15067999 --- /dev/null +++ b/workloads/micro/async/gemm/run_large.sh @@ -0,0 +1 @@ +./gemm 8192 8192 8192 diff --git a/workloads/micro/async/gemm/run_medium.sh b/workloads/micro/async/gemm/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..ebae2815e56031234ccdef0d98cb774f111e82a2 --- /dev/null +++ b/workloads/micro/async/gemm/run_medium.sh @@ -0,0 +1 @@ +./gemm 4096 4096 4096 diff --git a/workloads/micro/async/gemm/run_mega.sh b/workloads/micro/async/gemm/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..7a7e74e163fa0d0ad774327163771ce8eb23bd79 --- /dev/null +++ b/workloads/micro/async/gemm/run_mega.sh @@ -0,0 +1 @@ +./gemm 32768 32768 32768 diff --git a/workloads/micro/async/gemm/run_small.sh b/workloads/micro/async/gemm/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..df5896fd490618e0f8c5608097518b3837b9ad33 --- /dev/null +++ b/workloads/micro/async/gemm/run_small.sh @@ -0,0 +1 @@ +./gemm 1024 1024 1024 diff --git a/workloads/micro/async/gemm/run_super.sh b/workloads/micro/async/gemm/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..9736fc5d97c8ef9b55a3157882e05979f65a5011 --- /dev/null +++ b/workloads/micro/async/gemm/run_super.sh @@ -0,0 +1 @@ +./gemm 16384 16384 16384 diff --git a/workloads/micro/async/gemm/run_tiny.sh b/workloads/micro/async/gemm/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..9858f12d61d44c0f6265f7c22c27fab446500f55 --- /dev/null +++ b/workloads/micro/async/gemm/run_tiny.sh @@ -0,0 +1 @@ +./gemm 512 512 512 diff --git a/workloads/micro/async/gemm_perf/Makefile b/workloads/micro/async/gemm_perf/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d545b0aab3ef63260888227abd6ab99bcaddbff3 --- /dev/null +++ b/workloads/micro/async/gemm_perf/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := gemm +CUFILES := gemm.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o gemm diff --git a/workloads/micro/async/gemm_perf/gemm b/workloads/micro/async/gemm_perf/gemm new file mode 100755 index 0000000000000000000000000000000000000000..ef6f4abb8f57ed0134374b60dbf07d650009cd45 Binary files /dev/null and b/workloads/micro/async/gemm_perf/gemm differ diff --git a/workloads/micro/async/gemm_perf/gemm.cu b/workloads/micro/async/gemm_perf/gemm.cu new file mode 100644 index 0000000000000000000000000000000000000000..1850730d9f73f140fe6afcb0644059248d7826f3 --- /dev/null +++ b/workloads/micro/async/gemm_perf/gemm.cu @@ -0,0 +1,277 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +uint64_t NI; +uint64_t NJ; +uint64_t NK; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK_X 32 +#define DIM_THREAD_BLOCK_Y 32 + + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void gemm(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i,j,k; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i*NJ + j] *= BETA; + for (k = 0; k < NK; ++k) { + C[i*NJ + j] += ALPHA * A[i*NK + k] * B[k*NJ + j]; + } + } + } +} + +void init(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *C_ref) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) + for (j = 0; j < NK; j++) + A[i*NK + j] = ((DATA_TYPE) i*j) / NI; + + for (i = 0; i < NK; i++) + for (j = 0; j < NJ; j++) + B[i*NJ + j] = ((DATA_TYPE) i*j + 1) / NJ; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + C_ref[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + } + } + +} + + +void compareResults(DATA_TYPE* C, DATA_TYPE* C_outputFromGpu) +{ + uint64_t i, j, fail; + fail = 0; + + // Compare C1 and C2 + for (i=0; i < NI; i++) + { + for (j=0; j < NJ; j++) + { + // printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + if (percentDiff(C[i*NJ + j], C_outputFromGpu[i*NJ + j]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + fail++; + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void gemm_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, uint64_t NI, uint64_t NK, uint64_t NJ) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + + uint64_t row = blockIdx.y * blockDim.y + threadIdx.y; + uint64_t col = blockIdx.x * blockDim.x + threadIdx.x; + + __shared__ DATA_TYPE s_a[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y * PREFETCH_COUNT]; + __shared__ DATA_TYPE s_b[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y * PREFETCH_COUNT]; + + DATA_TYPE tmp = BETA * c[row * NJ + col]; + + uint64_t base_tiles = 0; + uint64_t end_tile = base_tiles + NK / blockDim.x; + + uint64_t fetch = base_tiles; + uint64_t tile_size = DIM_THREAD_BLOCK_X; + uint64_t mem_size = DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y; + + for (uint64_t compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + memcpy_async(s_a[(fetch % PREFETCH_COUNT) * mem_size + (threadIdx.y * blockDim.x + threadIdx.x)], a[row * NK + fetch * tile_size + threadIdx.x], pipe); + memcpy_async(s_b[(fetch % PREFETCH_COUNT) * mem_size + (threadIdx.y * blockDim.x + threadIdx.x)], b[(fetch * tile_size + threadIdx.y) * NJ + col], pipe); + + pipe.commit(); + } + if (fetch == end_tile) { + for (uint64_t i = 0; i < PREFETCH_COUNT-1; ++i) { pipe.commit(); } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + for (uint64_t k = 0; k < blockDim.x; k++) + { + tmp += ALPHA * s_a[(compute % PREFETCH_COUNT) * mem_size + (threadIdx.y * blockDim.x + k)] * s_b[(compute % PREFETCH_COUNT) * mem_size + (k * blockDim.x + threadIdx.x)]; + } + block.sync(); + } + + c[row * NJ + col] = tmp; +} + +void gemmCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + dim3 grid((size_t)(ceil( ((float)NI)/ ((float)block.x) )),(size_t)(ceil( ((float)NJ)/ ((float)block.y) ))); + + //t_start = rtclock(); + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI * NK, cudaMemcpyHostToDevice); + cudaMemcpy(B_gpu, B, sizeof(DATA_TYPE) * NK * NJ, cudaMemcpyHostToDevice); + cudaMemcpy(C_gpu, C, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyHostToDevice); + gemm_kernel<<< grid, block >>>(A_gpu, B_gpu, C_gpu, NI, NK, NJ); + cudaDeviceSynchronize(); + cudaMemcpy(C, C_gpu, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyDeviceToHost); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + if (argc >= 4) { + NI = atoll(argv[1]); + NK = atoll(argv[2]); + NJ = atoll(argv[3]); + } else { + NI = SIZE; + NK = SIZE; + NJ = SIZE; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE* C; + DATA_TYPE *C_ref; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + DATA_TYPE *C_gpu; + + A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE)); + C = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + C_ref = (DATA_TYPE *)malloc(NI * NJ * sizeof(DATA_TYPE)); + + //cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + //cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + //cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + init(A, B, C, C_ref); + + GPU_argv_init(); + + //initTrace(); + startCPU(); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + cudaMalloc(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + cudaMalloc(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + gemmCuda(A, B, C, A_gpu, B_gpu, C_gpu); + + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); + endCPU(); + //finiTrace(); + + // t_start = rtclock(); + // gemm(A, B, C_ref); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(C, C_ref); + free(A); + free(B); + free(C); + free(C_ref); + return 0; +} + diff --git a/workloads/micro/async/gemm_perf/run.sh b/workloads/micro/async/gemm_perf/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..d234537561c276f291434f42a7337bc5d5a61605 --- /dev/null +++ b/workloads/micro/async/gemm_perf/run.sh @@ -0,0 +1,3 @@ +#./gemm 1024 1024 1024 +./gemm 32768 32768 32768 +#./gemm 512 512 512 diff --git a/workloads/micro/async/gemm_perf/run_large.sh b/workloads/micro/async/gemm_perf/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..cc1dbe192ecafb7558b76cfa4d2ce05a15067999 --- /dev/null +++ b/workloads/micro/async/gemm_perf/run_large.sh @@ -0,0 +1 @@ +./gemm 8192 8192 8192 diff --git a/workloads/micro/async/gemm_perf/run_medium.sh b/workloads/micro/async/gemm_perf/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..ebae2815e56031234ccdef0d98cb774f111e82a2 --- /dev/null +++ b/workloads/micro/async/gemm_perf/run_medium.sh @@ -0,0 +1 @@ +./gemm 4096 4096 4096 diff --git a/workloads/micro/async/gemm_perf/run_mega.sh b/workloads/micro/async/gemm_perf/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..7a7e74e163fa0d0ad774327163771ce8eb23bd79 --- /dev/null +++ b/workloads/micro/async/gemm_perf/run_mega.sh @@ -0,0 +1 @@ +./gemm 32768 32768 32768 diff --git a/workloads/micro/async/gemm_perf/run_small.sh b/workloads/micro/async/gemm_perf/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..df5896fd490618e0f8c5608097518b3837b9ad33 --- /dev/null +++ b/workloads/micro/async/gemm_perf/run_small.sh @@ -0,0 +1 @@ +./gemm 1024 1024 1024 diff --git a/workloads/micro/async/gemm_perf/run_super.sh b/workloads/micro/async/gemm_perf/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..9736fc5d97c8ef9b55a3157882e05979f65a5011 --- /dev/null +++ b/workloads/micro/async/gemm_perf/run_super.sh @@ -0,0 +1 @@ +./gemm 16384 16384 16384 diff --git a/workloads/micro/async/gemm_perf/run_tiny.sh b/workloads/micro/async/gemm_perf/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..9858f12d61d44c0f6265f7c22c27fab446500f55 --- /dev/null +++ b/workloads/micro/async/gemm_perf/run_tiny.sh @@ -0,0 +1 @@ +./gemm 512 512 512 diff --git a/workloads/micro/async/gemv/Makefile b/workloads/micro/async/gemv/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..005563000dad0469dbf388d54a639e926cf5aa85 --- /dev/null +++ b/workloads/micro/async/gemv/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := gemv +CUFILES := gemv.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o gemv diff --git a/workloads/micro/async/gemv/gemv.cu b/workloads/micro/async/gemv/gemv.cu new file mode 100644 index 0000000000000000000000000000000000000000..8a136e3ae7943b4600c0452c74ac7b28250c80ce --- /dev/null +++ b/workloads/micro/async/gemv/gemv.cu @@ -0,0 +1,269 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +// define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 40960 +uint64_t NI; +uint64_t NJ; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 256 + +#define BATCH_SIZE 16 + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void gemv(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) + { + C[i] *= BETA; + for (j = 0; j < NJ; j++) + { + C[i] += ALPHA * A[i * NJ + j] * B[j]; + } + } +} + +void init(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *C_ref) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) + for (j = 0; j < NJ; j++) + A[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + + for (j = 0; j < NJ; j++) + B[j] = ((DATA_TYPE)j + 1) / NJ; + + for (i = 0; i < NI; i++) + { + C[i] = ((DATA_TYPE)i + 2) / NI; + C_ref[i] = ((DATA_TYPE)i + 2) / NI; + } +} + +void compareResults(DATA_TYPE *C, DATA_TYPE *C_outputFromGpu) +{ + uint64_t i, fail; + fail = 0; + + // Compare C1 and C2 + for (i = 0; i < NI; i++) + { + if (percentDiff(C[i], C_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + fail++; + printf("%d, GPU is %f, CPU is %f.\n", i, C[i], C_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void gemv_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, uint64_t NI, uint64_t NJ) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + uint64_t row = blockIdx.x * blockDim.x + threadIdx.x; + uint64_t tx = threadIdx.x; + + __shared__ DATA_TYPE s_b[PREFETCH_COUNT][DIM_THREAD_BLOCK][BATCH_SIZE]; + + DATA_TYPE tmp = BETA * c[row]; + __syncthreads(); + + uint64_t fetch = 0; + uint64_t end_tile = NJ / BATCH_SIZE; + + for (uint64_t compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + uint64_t base_index = fetch * BATCH_SIZE; + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + memcpy_async(s_b[fetch % PREFETCH_COUNT][tx][k], b[base_index + k], pipe); + } + pipe.commit(); + } + if (fetch == end_tile) + { + for (uint64_t i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + uint64_t base_index = compute * BATCH_SIZE; + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + tmp += ALPHA * a[row * NJ + base_index + k] * s_b[compute % PREFETCH_COUNT][tx][k]; + } + block.sync(); + } + c[row] = tmp; +} + +void gemvCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / (DIM_THREAD_BLOCK)); + + // t_start = rtclock(); + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyHostToDevice); + cudaMemcpy(B_gpu, B, sizeof(DATA_TYPE) * NJ, cudaMemcpyHostToDevice); + cudaMemcpy(C_gpu, C, sizeof(DATA_TYPE) * NI, cudaMemcpyHostToDevice); + gemv_kernel<<>>(A_gpu, B_gpu, C_gpu, NI, NJ); + cudaDeviceSynchronize(); + cudaMemcpy(C, C_gpu, sizeof(DATA_TYPE) * NI, cudaMemcpyDeviceToHost); + // t_end = rtclock(); + + // fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + if (argc >= 3) + { + NI = atoll(argv[1]); + NJ = atoll(argv[2]); + } + else + { + NI = SIZE; + NJ = SIZE; + } + + double t_start, t_end; + + DATA_TYPE *A; + DATA_TYPE *B; + DATA_TYPE *C; + DATA_TYPE *C_ref; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + DATA_TYPE *C_gpu; + + A = (DATA_TYPE *)malloc(NI * NJ * sizeof(DATA_TYPE)); + B = (DATA_TYPE *)malloc(NJ * sizeof(DATA_TYPE)); + C = (DATA_TYPE *)malloc(NI * sizeof(DATA_TYPE)); + C_ref = (DATA_TYPE *)malloc(NI * sizeof(DATA_TYPE)); + + // cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + // cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + // cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + init(A, B, C, C_ref); + + GPU_argv_init(); + initTrace(); + startCPU(); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI * NJ); + cudaMalloc(&B_gpu, sizeof(DATA_TYPE) * NJ); + cudaMalloc(&C_gpu, sizeof(DATA_TYPE) * NI); + + gemvCuda(A, B, C, A_gpu, B_gpu, C_gpu); + + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // gemv(A, B, C_ref); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(C, C_ref); + free(A); + free(B); + free(C); + free(C_ref); + return 0; +} diff --git a/workloads/micro/async/gemv/gemv.cu.bp b/workloads/micro/async/gemv/gemv.cu.bp new file mode 100644 index 0000000000000000000000000000000000000000..ecc16eeff006780068003c5076ce92698e3ebc0c --- /dev/null +++ b/workloads/micro/async/gemv/gemv.cu.bp @@ -0,0 +1,334 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +#define GPU_DEVICE 0 + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 40960 +int NI; +int NJ; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK_X 32 +#define DIM_THREAD_BLOCK_Y 32 + +#define SHMEM_SIZE (DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y) +// #define SHMEM_SIZE (NI) + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef int DATA_TYPE; + +void gemv(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + int i,j; + + for (i = 0; i < NI; i++) { + C[i] *= BETA; + for (j = 0; j < NJ; j++) { + C[i] += ALPHA * A[i*NJ + j] * B[j]; + } + } +} + +void init(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *C_ref) +{ + int i, j; + + for (i = 0; i < NI; i++) + for (j = 0; j < NJ; j++) + A[i*NJ + j] = ((DATA_TYPE) i*j) / NI; + + for (j = 0; j < NJ; j++) + B[j] = ((DATA_TYPE) j + 1) / NJ; + + for (i = 0; i < NI; i++) { + C[i] = ((DATA_TYPE)i + 2) / NI; + C_ref[i] = ((DATA_TYPE)i + 2) / NI; + } + + for (i = 0; i < NI; i++) + for (j = 0; j < NJ; j++) + A[i*NJ + j] = 1.0f; + + // for (j = 0; j < NJ; j++) + // B[j] = 1.0f; + + // for (i = 0; i < NI; i++) { + // C[i] = 1.0f; + // C_ref[i] = 1.0f; + // } +} + + +void compareResults(DATA_TYPE* C, DATA_TYPE* C_outputFromGpu) +{ + int i, fail; + fail = 0; + + // Compare C1 and C2 + for (i=0; i < NI; i++) { + if (percentDiff(C[i], C_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) { + fail++; + printf("%d, GPU is %f, CPU is %f.\n", i, C[i], C_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + + +void GPU_argv_init() +{ + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, GPU_DEVICE); + printf("setting device %d with name %s\n",GPU_DEVICE,deviceProp.name); + cudaSetDevice( GPU_DEVICE ); +} + +// __global__ void gemv_kernel_reduce(DATA_TYPE *c_tmp, DATA_TYPE *c, int NI, int NJ) +// { +// int index = blockIdx.x * blockDim.x + threadIdx.x; + +// for (int i = 0; i < DIM_THREAD_BLOCK_X; i++) { +// c[index] += c_tmp[index * DIM_THREAD_BLOCK_X + i]; +// } +// } + +// __global__ void gemv_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, int NI, int NJ) +// { +// // Compute each thread's global row and column index +// int row = blockIdx.y * blockDim.y + threadIdx.y; +// __shared__ DATA_TYPE tmp[DIM_THREAD_BLOCK_X]; + +// // Sweep tile across matrix +// for (int i = threadIdx.x; i < NJ; i += blockDim.x) +// { +// tmp[threadIdx.x] = b[i]; +// } + +// __syncthreads(); + +// for (int i = threadIdx.x; i < NJ; i += blockDim.x) +// { +// c[row * DIM_THREAD_BLOCK_X + threadIdx.x] += ALPHA * a[row * NJ + i] * tmp[threadIdx.x]; +// } + +// __syncthreads(); +// } + +// __global__ void gemv_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c) +// { +// // Compute each thread's global row and column index +// int row = blockIdx.y * blockDim.y + threadIdx.y; + +// __shared__ DATA_TYPE tmp[DIM_THREAD_BLOCK_X]; + +// tmp[threadIdx.x] = 0; +// __syncthreads(); + +// // Sweep tile across matrix +// for (int i = 0; i < NJ; i+= blockDim.x) { +// // s_b[threadIdx.x] = b[i + threadIdx.x]; +// // __syncthreads(); +// tmp[threadIdx.x] += ALPHA * a[row * NJ + i + threadIdx.x] * b[i + threadIdx.x]; +// __syncthreads(); +// } +// __syncthreads(); + + +// DATA_TYPE tmp_c = c[row] * BETA; +// __syncthreads(); +// for (int i = 0; i < blockDim.x; i++) { +// tmp_c += tmp[i]; +// __syncthreads(); +// } +// __syncthreads(); +// c[row] = tmp_c; +// __syncthreads(); +// } + +__global__ void gemv_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, int NI, int NJ) +{ + // Compute each thread's global row and column index + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + + // Statically allocated shared memory + __shared__ DATA_TYPE s_a[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + __shared__ DATA_TYPE s_b[DIM_THREAD_BLOCK_X]; + + // Accumulate in temporary variable + + DATA_TYPE tmp = BETA * c[row]; + + // Sweep tile across matrix + for (int i = 0; i < NJ; i += blockDim.x) + { + s_a[threadIdx.y * blockDim.x + threadIdx.x] = a[row * NJ + i + threadIdx.x]; + s_b[threadIdx.x] = b[i + threadIdx.x]; + + // = b[(i + threadIdx.y) * NJ + col]; + + // Wait for both tiles to be loaded in before doing computation + __syncthreads(); + + // Do matrix multiplication on the small matrix + for (int k = 0; k < blockDim.x; k++) + { + tmp += ALPHA * s_a[threadIdx.y * blockDim.x + k] * s_b[threadIdx.x]; + } + + // Wait for all threads to finish using current tiles before loading in new ones + __syncthreads(); + } + + c[row] = tmp; +} + +void gemvCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu, DATA_TYPE *C_gpu_tmp) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + // dim3 grid(1, (size_t)(ceil(((float)NJ) / ((float)block.y)))); + dim3 grid((size_t)(ceil( ((float)NI)/ ((float)block.x) )),(size_t)(ceil( ((float)NJ)/ ((float)block.y) ))); + + dim3 block_reduce(DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y); + dim3 grid_reduce((size_t)(ceil(((float)NJ) / ((float)(DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y))))); + + t_start = rtclock(); + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyHostToDevice); + cudaMemcpy(B_gpu, B, sizeof(DATA_TYPE) * NJ, cudaMemcpyHostToDevice); + cudaMemcpy(C_gpu, C, sizeof(DATA_TYPE) * NI, cudaMemcpyHostToDevice); + gemv_kernel<<>>(A_gpu, B_gpu, C_gpu, NI, NJ); + // gemv_kernel_reduce<<>>(C_gpu_tmp, C_gpu, NI, NJ); + cudaDeviceSynchronize(); + cudaMemcpy(C, C_gpu, sizeof(DATA_TYPE) * NI, cudaMemcpyDeviceToHost); + t_end = rtclock(); + + fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + + +int main(int argc, char *argv[]) +{ + if (argc >= 3) { + NI = atoi(argv[1]); + NJ = atoi(argv[2]); + } else { + NI = SIZE; + NJ = SIZE; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE* C; + DATA_TYPE *C_ref; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + DATA_TYPE *C_gpu; + DATA_TYPE *C_gpu_tmp; + + A = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NJ*sizeof(DATA_TYPE)); + C = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + C_ref = (DATA_TYPE *)malloc(NI*sizeof(DATA_TYPE)); + + //cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + //cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + //cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI * NJ); + cudaMalloc(&B_gpu, sizeof(DATA_TYPE) * NJ); + cudaMalloc(&C_gpu, sizeof(DATA_TYPE) * NI); + cudaMalloc(&C_gpu_tmp, sizeof(DATA_TYPE) * NI * DIM_THREAD_BLOCK_X); + + init(A, B, C, C_ref); + + GPU_argv_init(); + + gemvCuda(A, B, C, A_gpu, B_gpu, C_gpu, C_gpu_tmp); + + t_start = rtclock(); + gemv(A, B, C_ref); + t_end = rtclock(); + fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + compareResults(C, C_ref); + + free(A); + free(B); + free(C); + free(C_ref); + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); + cudaFree(C_gpu_tmp); + return 0; +} + diff --git a/workloads/micro/async/gemv/run.sh b/workloads/micro/async/gemv/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..c75d8f69375fd6c923a93ba88a4cb43238844f8b --- /dev/null +++ b/workloads/micro/async/gemv/run.sh @@ -0,0 +1,2 @@ +# ./gemv 16384 16384 +./gemv 32768 32768 diff --git a/workloads/micro/async/gemv/run_large.sh b/workloads/micro/async/gemv/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..102aa73e96b74d0d46ef261f0ffd665639679025 --- /dev/null +++ b/workloads/micro/async/gemv/run_large.sh @@ -0,0 +1 @@ +./gemv 8192 8192 diff --git a/workloads/micro/async/gemv/run_medium.sh b/workloads/micro/async/gemv/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..2cc0f68e4d4e96c36ee0d72e64f7acdb7c97233c --- /dev/null +++ b/workloads/micro/async/gemv/run_medium.sh @@ -0,0 +1 @@ +./gemv 4096 4096 diff --git a/workloads/micro/async/gemv/run_mega.sh b/workloads/micro/async/gemv/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..88ca5ba9468efc68d859100ccbd246b9b3af960b --- /dev/null +++ b/workloads/micro/async/gemv/run_mega.sh @@ -0,0 +1 @@ +./gemv 65536 65536 diff --git a/workloads/micro/async/gemv/run_small.sh b/workloads/micro/async/gemv/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..55646a647b7f53aff91c5e38562bc449d75daa9a --- /dev/null +++ b/workloads/micro/async/gemv/run_small.sh @@ -0,0 +1 @@ +./gemv 1024 1024 diff --git a/workloads/micro/async/gemv/run_super.sh b/workloads/micro/async/gemv/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..295d223c3c9c768e58c0fba722fa859b74564d2d --- /dev/null +++ b/workloads/micro/async/gemv/run_super.sh @@ -0,0 +1 @@ +./gemv 32768 32768 diff --git a/workloads/micro/async/gemv/run_tiny.sh b/workloads/micro/async/gemv/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..60becef20c6cc3113ff1b4897d177ff3cbd77eb8 --- /dev/null +++ b/workloads/micro/async/gemv/run_tiny.sh @@ -0,0 +1 @@ +./gemv 512 512 diff --git a/workloads/micro/async/saxpy/Makefile b/workloads/micro/async/saxpy/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..069a5001c286cb5f44c4686449f04755cd5a2e52 --- /dev/null +++ b/workloads/micro/async/saxpy/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := saxpy +CUFILES := saxpy.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o saxpy diff --git a/workloads/micro/async/saxpy/run.sh b/workloads/micro/async/saxpy/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..61775ede6c26208ae2a150958f8eec5375952773 --- /dev/null +++ b/workloads/micro/async/saxpy/run.sh @@ -0,0 +1,11 @@ +# ./vector_seq 17179869184 10 524288 + +./vector_seq 8589934592 10 262144 +#nsys profile --stats=true ./vector_seq 1073741824 10 32768 + + +# ./vector_seq 4294967296 10 131072 + +# ./vector_seq 2147483648 10 32768 +#./vector_seq 1073741824 10 32768 +# ./vector_seq 1048576 100 32768 diff --git a/workloads/micro/async/saxpy/run_large.sh b/workloads/micro/async/saxpy/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..bce2787645ea40cb94ba004ec19cf4728ba48647 --- /dev/null +++ b/workloads/micro/async/saxpy/run_large.sh @@ -0,0 +1 @@ +./saxpy 134217728 100 65536 diff --git a/workloads/micro/async/saxpy/run_medium.sh b/workloads/micro/async/saxpy/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..af6f429a95aa9d5ede352e30e59405aaee4ee55b --- /dev/null +++ b/workloads/micro/async/saxpy/run_medium.sh @@ -0,0 +1 @@ +./saxpy 16777216 100 32768 diff --git a/workloads/micro/async/saxpy/run_mega.sh b/workloads/micro/async/saxpy/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..5dce5483842d255b4130f7ca89b4894c65b5a44b --- /dev/null +++ b/workloads/micro/async/saxpy/run_mega.sh @@ -0,0 +1 @@ +./saxpy 4294967296 100 262144 diff --git a/workloads/micro/async/saxpy/run_small.sh b/workloads/micro/async/saxpy/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..c927e5e53c30ca58a12dbdf95e264a4b790ab0b8 --- /dev/null +++ b/workloads/micro/async/saxpy/run_small.sh @@ -0,0 +1 @@ +./saxpy 2097152 100 32768 diff --git a/workloads/micro/async/saxpy/run_super.sh b/workloads/micro/async/saxpy/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..7f8d7b99b0be6cc643cd686965806f7edbc0af27 --- /dev/null +++ b/workloads/micro/async/saxpy/run_super.sh @@ -0,0 +1 @@ +./saxpy 1073741824 100 65536 diff --git a/workloads/micro/async/saxpy/run_tiny.sh b/workloads/micro/async/saxpy/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..4604f1747023f34e81c15ce89c4be5a6fb39bc95 --- /dev/null +++ b/workloads/micro/async/saxpy/run_tiny.sh @@ -0,0 +1 @@ +./saxpy 262144 100 8192 diff --git a/workloads/micro/async/saxpy/saxpy.cu b/workloads/micro/async/saxpy/saxpy.cu new file mode 100644 index 0000000000000000000000000000000000000000..7067b1783e188a4384cce7739d9e7dc9808fbaea --- /dev/null +++ b/workloads/micro/async/saxpy/saxpy.cu @@ -0,0 +1,270 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096000 +#define ITER 100 +uint64_t NI; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 256 + +#define BATCH_SIZE 8 + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void saxpy(DATA_TYPE *A, DATA_TYPE *B, uint64_t iterations) +{ + for (uint64_t i = 0; i < NI; i++) { + for (uint64_t iter = 0; iter < iterations; iter++) { + B[i] = ALPHA * A[i] + B[i]; + } + } +} + +void init(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *B_ref) +{ + for (uint64_t i = 0; i < NI; i++) + A[i] = ((DATA_TYPE) i) / NI; + + for (uint64_t i = 0; i < NI; i++) { + B[i] = ((DATA_TYPE)i + 2) / NI; + B_ref[i] = ((DATA_TYPE)i + 2) / NI; + } +} + + +void compareResults(DATA_TYPE* B, DATA_TYPE* B_outputFromGpu) +{ + uint64_t fail = 0; + + // Compare C1 and C2 + for (uint64_t i = 0; i < NI; i++) { + // printf("%lld, GPU is %f, CPU is %f.\n", i, B[i], B_outputFromGpu[i]); + if (percentDiff(B[i], B_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) { + fail++; + printf("%lld, GPU is %f, CPU is %f.\n", i, B[i], B_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void saxpy_kernel(DATA_TYPE *a, DATA_TYPE *b, uint64_t NI, uint64_t iterations, uint64_t block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + + // Compute each thread's global row and column index + const uint64_t mem_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + __shared__ DATA_TYPE tmp_a[mem_size * PREFETCH_COUNT]; + __shared__ DATA_TYPE tmp_b[mem_size * PREFETCH_COUNT]; + + uint64_t total_tiles = NI / mem_size; + uint64_t base_tiles = total_tiles / gridDim.x; + + uint64_t tiles_this_block = block_size / mem_size; + + uint64_t fetch = base_tiles * blockIdx.x; + uint64_t end_tile = fetch + tiles_this_block; + + for (uint64_t compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + memcpy_async(tmp_a[(fetch % PREFETCH_COUNT) * mem_size + i], a[fetch * mem_size + i], pipe); + memcpy_async(tmp_b[(fetch % PREFETCH_COUNT) * mem_size + i], b[fetch * mem_size + i], pipe); + } + pipe.commit(); + } + if (fetch == end_tile) { + for (uint64_t i = 0; i < PREFETCH_COUNT-1; ++i) { pipe.commit(); } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + for (uint64_t iter = 0; iter < iterations; iter++) + { + tmp_b[(compute % PREFETCH_COUNT) * mem_size + i] += ALPHA * tmp_a[(compute % PREFETCH_COUNT) * mem_size + i]; + } + } + block.sync(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + b[compute * mem_size + i] = tmp_b[(compute % PREFETCH_COUNT) * mem_size + i]; + } + block.sync(); + } + +} + + +void saxpyCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, uint64_t iterations, uint64_t block_size) +{ + double t_start, t_end; + if (block_size <= DIM_THREAD_BLOCK) + block_size = DIM_THREAD_BLOCK; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / block_size); + + //t_start = rtclock(); + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI, cudaMemcpyHostToDevice); + cudaMemcpy(B_gpu, B, sizeof(DATA_TYPE) * NI, cudaMemcpyHostToDevice); + saxpy_kernel<<>>(A_gpu, B_gpu, NI, iterations, block_size); + cudaDeviceSynchronize(); + cudaMemcpy(B, B_gpu, sizeof(DATA_TYPE) * NI, cudaMemcpyDeviceToHost); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + uint64_t iterations = ITER; + uint64_t block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + if (argc >= 4) { + NI = atoll(argv[1]); + iterations = atoi(argv[2]); + block_size = atoi(argv[3]); + } else { + NI = SIZE; + iterations = ITER; + block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + } + + int nblocks = NI / block_size; + if (nblocks > 64) + { + nblocks = 64; + block_size = NI / nblocks; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE *B_ref; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + + A = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + B_ref = (DATA_TYPE *)malloc(NI*sizeof(DATA_TYPE)); + + //cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + //cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + //cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + init(A, B, B_ref); + + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI); + cudaMalloc(&B_gpu, sizeof(DATA_TYPE) * NI); + + saxpyCuda(A, B, A_gpu, B_gpu, iterations, block_size); + + cudaFree(A_gpu); + cudaFree(B_gpu); + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // saxpy(A, B_ref, iterations); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(B, B_ref); + free(A); + free(B); + free(B_ref); + return 0; +} + diff --git a/workloads/micro/async/vector_rand/Makefile b/workloads/micro/async/vector_rand/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..b2abd68d5e6d513a5652b845d6d822b15fc12a59 --- /dev/null +++ b/workloads/micro/async/vector_rand/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := vector_rand +CUFILES := vector_rand.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o vector_rand diff --git a/workloads/micro/async/vector_rand/run.sh b/workloads/micro/async/vector_rand/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..61775ede6c26208ae2a150958f8eec5375952773 --- /dev/null +++ b/workloads/micro/async/vector_rand/run.sh @@ -0,0 +1,11 @@ +# ./vector_seq 17179869184 10 524288 + +./vector_seq 8589934592 10 262144 +#nsys profile --stats=true ./vector_seq 1073741824 10 32768 + + +# ./vector_seq 4294967296 10 131072 + +# ./vector_seq 2147483648 10 32768 +#./vector_seq 1073741824 10 32768 +# ./vector_seq 1048576 100 32768 diff --git a/workloads/micro/async/vector_rand/run_large.sh b/workloads/micro/async/vector_rand/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..0fbcd5c0eb481d41efff934e3f19f162bc1f73e2 --- /dev/null +++ b/workloads/micro/async/vector_rand/run_large.sh @@ -0,0 +1 @@ +./vector_rand 134217728 100 65536 diff --git a/workloads/micro/async/vector_rand/run_medium.sh b/workloads/micro/async/vector_rand/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..84e868f85fbc74d101d476d501095afd4aa6d017 --- /dev/null +++ b/workloads/micro/async/vector_rand/run_medium.sh @@ -0,0 +1 @@ +./vector_rand 16777216 100 32768 diff --git a/workloads/micro/async/vector_rand/run_mega.sh b/workloads/micro/async/vector_rand/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..24b084a7613986acafd45cf6ca300fe52f0426d5 --- /dev/null +++ b/workloads/micro/async/vector_rand/run_mega.sh @@ -0,0 +1 @@ +./vector_rand 8589934592 100 262144 diff --git a/workloads/micro/async/vector_rand/run_small.sh b/workloads/micro/async/vector_rand/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..baf1b589adcc83b946051cf0fc2383b45746c62f --- /dev/null +++ b/workloads/micro/async/vector_rand/run_small.sh @@ -0,0 +1 @@ +./vector_rand 2097152 100 32768 diff --git a/workloads/micro/async/vector_rand/run_super.sh b/workloads/micro/async/vector_rand/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..19be53cddf75e6d6c5812e3ec337cce2fd9079ae --- /dev/null +++ b/workloads/micro/async/vector_rand/run_super.sh @@ -0,0 +1 @@ +./vector_rand 1073741824 100 65536 diff --git a/workloads/micro/async/vector_rand/run_tiny.sh b/workloads/micro/async/vector_rand/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..55ee72259022f385aabf8eedc426425e6817835f --- /dev/null +++ b/workloads/micro/async/vector_rand/run_tiny.sh @@ -0,0 +1 @@ +./vector_rand 262144 100 8192 diff --git a/workloads/micro/async/vector_rand/vector_rand.cu b/workloads/micro/async/vector_rand/vector_rand.cu new file mode 100644 index 0000000000000000000000000000000000000000..73f2600af1a85d424c05aaf6bd25c0524644c9fd --- /dev/null +++ b/workloads/micro/async/vector_rand/vector_rand.cu @@ -0,0 +1,265 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.005 + +/* Problem size */ +#define SIZE 4096000 +#define ITER 100 +uint64_t NI; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 256 + +#define BATCH_SIZE 16 + +#define LCG_A 1.1f +#define LCG_B 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void saxpy(DATA_TYPE *A, uint64_t iterations) +{ + for (uint64_t i = 0; i < NI; i++) { + for (uint64_t iter = 0; iter < iterations; iter++) { + A[i] = LCG_A * A[i] + LCG_B; + } + } +} + +void init(DATA_TYPE *A, DATA_TYPE *A_ref) +{ + for (uint64_t i = 0; i < NI; i++) { + A[i] = ((DATA_TYPE) i) / NI; + A_ref[i] = ((DATA_TYPE) i) / NI; + } +} + + +void compareResults(DATA_TYPE* A, DATA_TYPE* A_outputFromGpu) +{ + uint64_t fail = 0; + + // Compare C1 and C2 + for (uint64_t i = 0; i < NI; i++) { + // printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + if (percentDiff(A[i], A_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) { + fail++; + printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void vector_rand_kernel(DATA_TYPE *a, uint64_t NI, uint64_t iterations, uint64_t block_size, size_t seed) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + + const uint64_t mem_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + __shared__ DATA_TYPE tmp[mem_size * PREFETCH_COUNT]; + + curandState_t randState; + size_t tx = blockIdx.x * blockDim.x + threadIdx.x; + curand_init(seed, tx, 0, &randState); + size_t idx = 0; + + uint64_t total_tiles = NI / mem_size; + uint64_t base_tiles = total_tiles / gridDim.x; + + uint64_t tiles_this_block = block_size / mem_size; + + uint64_t fetch = base_tiles * blockIdx.x; + uint64_t end_tile = fetch + tiles_this_block; + + for (uint64_t compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + idx = curand(&randState); + idx <<= 32; + idx |= curand(&randState); + + memcpy_async(tmp[(fetch % PREFETCH_COUNT) * mem_size + i], a[fetch * mem_size + idx % mem_size], pipe); + } + pipe.commit(); + } + if (fetch == end_tile) { + for (uint64_t i = 0; i < PREFETCH_COUNT-1; ++i) { pipe.commit(); } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + for (uint64_t iter = 0; iter < iterations; iter++) + { + tmp[(compute % PREFETCH_COUNT) * mem_size + i] = LCG_A * tmp[(compute % PREFETCH_COUNT) * mem_size + i] + LCG_B; + } + } + block.sync(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + a[compute * mem_size + i] = tmp[(compute % PREFETCH_COUNT) * mem_size + idx % mem_size]; + } + block.sync(); + } +} + +void saxpyCuda(DATA_TYPE *A, DATA_TYPE *A_gpu, uint64_t iterations, uint64_t block_size) +{ + double t_start, t_end; + if (block_size <= DIM_THREAD_BLOCK) + block_size = DIM_THREAD_BLOCK; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / block_size); + + //t_start = rtclock(); + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI, cudaMemcpyHostToDevice); + vector_rand_kernel<<>>(A_gpu, NI, iterations, block_size, 832945); + cudaDeviceSynchronize(); + cudaMemcpy(A, A_gpu, sizeof(DATA_TYPE) * NI, cudaMemcpyDeviceToHost); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + uint64_t iterations = ITER; + uint64_t block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + if (argc >= 4) { + NI = atoll(argv[1]); + iterations = atoi(argv[2]); + block_size = atoi(argv[3]); + } + else { + NI = SIZE; + iterations = ITER; + block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + } + + int nblocks = NI / block_size; + if (nblocks > 64) + { + nblocks = 64; + block_size = NI / nblocks; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE *A_ref; + + DATA_TYPE *A_gpu; + + A = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + A_ref = (DATA_TYPE *)malloc(NI*sizeof(DATA_TYPE)); + + //cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + //cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + //cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + init(A, A_ref); + + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI); + + saxpyCuda(A, A_gpu, iterations, block_size); + + // t_start = rtclock(); + // saxpy(A_ref, iterations); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(A, A_ref); + cudaFree(A_gpu); + endCPU(); + finiTrace(); + + free(A); + free(A_ref); + return 0; +} + diff --git a/workloads/micro/async/vector_seq/Makefile b/workloads/micro/async/vector_seq/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..2d9857cedea5755ad6a381996c578f9cbd41424a --- /dev/null +++ b/workloads/micro/async/vector_seq/Makefile @@ -0,0 +1,109 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := vector_seq +CUFILES := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + + +EXECUTABLE_4096_256 := vector_seq_4096_256 +CUFILES_4096_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=4096 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_2048_256 := vector_seq_2048_256 +CUFILES_2048_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=2048 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_1024_256 := vector_seq_1024_256 +CUFILES_1024_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=1024 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_512_256 := vector_seq_512_256 +CUFILES_512_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=512 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_256_256 := vector_seq_256_256 +CUFILES_256_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=256 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_128_256 := vector_seq_128_256 +CUFILES_128_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=128 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_64_256 := vector_seq_64_256 +CUFILES_64_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_32_256 := vector_seq_32_256 +CUFILES_32_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=32 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_16_256 := vector_seq_16_256 +CUFILES_16_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=16 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + + +EXECUTABLE_1024_4 := vector_seq_1024_4 +CUFILES_1024_4 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=1024 -DBATCH_SIZE=4 + +EXECUTABLE_512_8 := vector_seq_512_8 +CUFILES_512_8 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=512 -DBATCH_SIZE=8 + +EXECUTABLE_256_16 := vector_seq_256_16 +CUFILES_256_16 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_128_32 := vector_seq_128_32 +CUFILES_128_32 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=32 + +EXECUTABLE_64_64 := vector_seq_64_64 +CUFILES_64_64 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=64 -DBATCH_SIZE=64 + +EXECUTABLE_32_128 := vector_seq_32_128 +CUFILES_32_128 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=32 -DBATCH_SIZE=128 + + +EXECUTABLE_2 := vector_seq_2 +CUFILES_2:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=2 + +EXECUTABLE_4 := vector_seq_4 +CUFILES_4:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=4 + +EXECUTABLE_8 := vector_seq_8 +CUFILES_8:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=8 + +EXECUTABLE_16 := vector_seq_16 +CUFILES_16:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=16 + +EXECUTABLE_32 := vector_seq_32 +CUFILES_32:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=32 + +EXECUTABLE_64 := vector_seq_64 +CUFILES_64:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=64 + +EXECUTABLE_128 := vector_seq_128 +CUFILES_128:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=128 + + + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} + + $(NVCC) ${NVCCCFLAGS} ${CUFILES_4096_256} ${DEF} -o ${EXECUTABLE_4096_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_2048_256} ${DEF} -o ${EXECUTABLE_2048_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_1024_256} ${DEF} -o ${EXECUTABLE_1024_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_512_256} ${DEF} -o ${EXECUTABLE_512_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_256_256} ${DEF} -o ${EXECUTABLE_256_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_128_256} ${DEF} -o ${EXECUTABLE_128_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_64_256} ${DEF} -o ${EXECUTABLE_64_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_32_256} ${DEF} -o ${EXECUTABLE_32_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_16_256} ${DEF} -o ${EXECUTABLE_16_256} + + $(NVCC) ${NVCCCFLAGS} ${CUFILES_1024_4} ${DEF} -o ${EXECUTABLE_1024_4} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_512_8} ${DEF} -o ${EXECUTABLE_512_8} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_256_16} ${DEF} -o ${EXECUTABLE_256_16} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_128_32} ${DEF} -o ${EXECUTABLE_128_32} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_64_64} ${DEF} -o ${EXECUTABLE_64_64} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_32_128} ${DEF} -o ${EXECUTABLE_32_128} + + $(NVCC) ${NVCCCFLAGS} ${CUFILES_2} ${DEF} -o ${EXECUTABLE_2} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_4} ${DEF} -o ${EXECUTABLE_4} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_8} ${DEF} -o ${EXECUTABLE_8} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_16} ${DEF} -o ${EXECUTABLE_16} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_32} ${DEF} -o ${EXECUTABLE_32} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_64} ${DEF} -o ${EXECUTABLE_64} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_128} ${DEF} -o ${EXECUTABLE_128} + +clean: + rm -f *.o vector_seq vector_seq_* diff --git a/workloads/micro/async/vector_seq/run.sh b/workloads/micro/async/vector_seq/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..61775ede6c26208ae2a150958f8eec5375952773 --- /dev/null +++ b/workloads/micro/async/vector_seq/run.sh @@ -0,0 +1,11 @@ +# ./vector_seq 17179869184 10 524288 + +./vector_seq 8589934592 10 262144 +#nsys profile --stats=true ./vector_seq 1073741824 10 32768 + + +# ./vector_seq 4294967296 10 131072 + +# ./vector_seq 2147483648 10 32768 +#./vector_seq 1073741824 10 32768 +# ./vector_seq 1048576 100 32768 diff --git a/workloads/micro/async/vector_seq/run_large.sh b/workloads/micro/async/vector_seq/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..66794fb697b9cd4165ae0e85db50fd512c3467e7 --- /dev/null +++ b/workloads/micro/async/vector_seq/run_large.sh @@ -0,0 +1 @@ +./vector_seq 134217728 100 65536 diff --git a/workloads/micro/async/vector_seq/run_medium.sh b/workloads/micro/async/vector_seq/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..9bbbb986b9351d0e1c82e9d53d86d4d0f83c7492 --- /dev/null +++ b/workloads/micro/async/vector_seq/run_medium.sh @@ -0,0 +1 @@ +./vector_seq 16777216 100 32768 diff --git a/workloads/micro/async/vector_seq/run_mega.sh b/workloads/micro/async/vector_seq/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..7b622d0246ef3a0a542e2422e09099850d52f3d1 --- /dev/null +++ b/workloads/micro/async/vector_seq/run_mega.sh @@ -0,0 +1 @@ +./vector_seq 8589934592 100 262144 diff --git a/workloads/micro/async/vector_seq/run_small.sh b/workloads/micro/async/vector_seq/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..da65ab8dbc61ff2e26a2638703dc97d003cb9dba --- /dev/null +++ b/workloads/micro/async/vector_seq/run_small.sh @@ -0,0 +1 @@ +./vector_seq 2097152 100 32768 diff --git a/workloads/micro/async/vector_seq/run_super.sh b/workloads/micro/async/vector_seq/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..5a98cb48e7b06fa4db1cb7db99d9a0e8ebcf7f46 --- /dev/null +++ b/workloads/micro/async/vector_seq/run_super.sh @@ -0,0 +1 @@ +./vector_seq 1073741824 100 65536 diff --git a/workloads/micro/async/vector_seq/run_tiny.sh b/workloads/micro/async/vector_seq/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..a2f7760fa3fb1a37be61193c663da2e38690bfe3 --- /dev/null +++ b/workloads/micro/async/vector_seq/run_tiny.sh @@ -0,0 +1 @@ +./vector_seq 262144 100 8192 diff --git a/workloads/micro/async/vector_seq/vector_seq.cu b/workloads/micro/async/vector_seq/vector_seq.cu new file mode 100644 index 0000000000000000000000000000000000000000..188e8a7bdc7c325a756c3c8849d1b943f83342d1 --- /dev/null +++ b/workloads/micro/async/vector_seq/vector_seq.cu @@ -0,0 +1,263 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.005 + +/* Problem size */ +#define SIZE 1073741824 +#define ITER 100 +uint64_t NI; + +/* Thread block dimensions */ +#ifndef DIM_THREAD_BLOCK +#define DIM_THREAD_BLOCK 256 +#endif + +#ifndef BATCH_SIZE +#define BATCH_SIZE 16 +#endif + +#ifndef NBLOCKS +#define NBLOCKS 64 +#endif + +#define LCG_A 1.1f +#define LCG_B 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void saxpy(DATA_TYPE *A, uint64_t iterations) +{ + for (uint64_t i = 0; i < NI; i++) { + for (uint64_t iter = 0; iter < iterations; iter++) { + A[i] = LCG_A * A[i] + LCG_B; + } + } +} + +void init(DATA_TYPE *A, DATA_TYPE *A_ref) +{ + for (uint64_t i = 0; i < NI; i++) { + A[i] = ((DATA_TYPE) i) / NI; + A_ref[i] = ((DATA_TYPE) i) / NI; + } +} + + +void compareResults(DATA_TYPE* A, DATA_TYPE* A_outputFromGpu) +{ + uint64_t fail = 0; + + // Compare C1 and C2 + for (uint64_t i = 0; i < NI; i++) { + // printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + if (percentDiff(A[i], A_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) { + fail++; + printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void vector_seq_kernel(DATA_TYPE *a, uint64_t NI, uint64_t iterations, uint64_t block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + + const uint64_t mem_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + // __shared__ DATA_TYPE tmp[mem_size * PREFETCH_COUNT]; + extern __shared__ DATA_TYPE tmp[]; + + uint64_t total_tiles = NI / mem_size; + uint64_t base_tiles = total_tiles / gridDim.x; + + uint64_t tiles_this_block = block_size / mem_size; + + uint64_t fetch = base_tiles * blockIdx.x; + uint64_t end_tile = fetch + tiles_this_block; + + for (uint64_t compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + memcpy_async(tmp[(fetch % PREFETCH_COUNT) * mem_size + i], a[fetch * mem_size + i], pipe); + } + pipe.commit(); + } + if (fetch == end_tile) { + for (uint64_t i = 0; i < PREFETCH_COUNT-1; ++i) { pipe.commit(); } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + for (uint64_t iter = 0; iter < iterations; iter++) + { + tmp[(compute % PREFETCH_COUNT) * mem_size + i] = LCG_A * tmp[(compute % PREFETCH_COUNT) * mem_size + i] + LCG_B; + } + } + block.sync(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + a[compute * mem_size + i] = tmp[(compute % PREFETCH_COUNT) * mem_size + i]; + } + block.sync(); + } +} + +void saxpyCuda(DATA_TYPE *A, DATA_TYPE *A_gpu, uint64_t iterations, uint64_t block_size) +{ + double t_start, t_end; + if (block_size <= DIM_THREAD_BLOCK) + block_size = DIM_THREAD_BLOCK; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / block_size); + + int MaxBytesofSharedMemory = DIM_THREAD_BLOCK * BATCH_SIZE * PREFETCH_COUNT * sizeof(DATA_TYPE); + cudaFuncSetAttribute(vector_seq_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, MaxBytesofSharedMemory); + + //t_start = rtclock(); + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI, cudaMemcpyHostToDevice); + vector_seq_kernel<<>>(A_gpu, NI, iterations, block_size); + cudaDeviceSynchronize(); + cudaMemcpy(A, A_gpu, sizeof(DATA_TYPE) * NI, cudaMemcpyDeviceToHost); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + uint64_t iterations = ITER; + uint64_t block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + if (argc >= 4) { + NI = atoll(argv[1]); + iterations = atoi(argv[2]); + block_size = atoi(argv[3]); + } + else { + NI = SIZE; + iterations = ITER; + block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + } + + int nblocks = NBLOCKS; + block_size = NI / nblocks; + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE *A_ref; + + DATA_TYPE *A_gpu; + + A = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + A_ref = (DATA_TYPE *)malloc(NI*sizeof(DATA_TYPE)); + + //cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + //cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + //cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + init(A, A_ref); + + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI); + + saxpyCuda(A, A_gpu, iterations, block_size); + + cudaFree(A_gpu); + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // saxpy(A_ref, iterations); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(A, A_ref); + free(A); + free(A_ref); + return 0; +} + diff --git a/workloads/micro/clean_output.py b/workloads/micro/clean_output.py new file mode 100644 index 0000000000000000000000000000000000000000..6064d8836c58b6bd23ac356298cc5232d5bbddcd --- /dev/null +++ b/workloads/micro/clean_output.py @@ -0,0 +1,26 @@ +import sys, os + +file_name = sys.argv[1] +new_file_name = sys.argv[1] + +f_old = open(file_name, "r") +lines = f_old.readlines() +os.remove(file_name) +f_new = open(new_file_name, "w+") + +for line in lines: + words = (line.split(',')) + words_space = (line.split(' ')) + if words_space[0] == 'start_tsc': + words = words_space + line = line.replace(' ', ',') + if len(words) < 4: + continue + if words[0] == 'start_tsc' or words[0] == 'CPU_Times' or words[0] == 'pc' or words[0] == 'CUPTI' or "cuda" in words[1]: + # TODO: remove arguments in kernel name + f_new.write(line) + +f_new.close() + + + diff --git a/workloads/micro/large_avg.csv b/workloads/micro/large_avg.csv new file mode 100644 index 0000000000000000000000000000000000000000..b806e6964e432d8ad01405c8c34e6c8bd4e72539 --- /dev/null +++ b/workloads/micro/large_avg.csv @@ -0,0 +1,22 @@ +group,,standard,async,uvm,uvm_prefetch,uvm_prefetch_async +vector_seq,gpu_kernel,0.024406434699912882,0.014204833078548453,0.07986748760823671,0.024403063586516616,0.014200207597376833 +vector_seq,memcpy,0.25480116432082867,0.2531628301595513,0.1529562419117931,0.13030881302310887,0.12969778892028752 +vector_seq,allocation,0.7207924009792585,0.6808459983503446,0.9101183897540731,0.7608090800527739,1.0363659445607025 +vector_rand,gpu_kernel,0.020723930945328876,0.015104031694203289,0.09107229805905348,0.020725205439409297,0.015117105233849596 +vector_rand,memcpy,0.24286626818110987,0.24244371119438382,0.14969218855536703,0.12448470948611579,0.12427049120668325 +vector_rand,allocation,0.7364098008735612,0.7449357320516788,0.9260186605179876,1.0022955962051623,0.9222776917931101 +saxpy,gpu_kernel,0.02319161170845595,0.012746653869891237,0.1319734422427582,0.02318684877724961,0.01208626181541138 +saxpy,memcpy,0.31673582912015197,0.3168945575071053,0.19408013671551602,0.1571147037437844,0.15467656137611693 +saxpy,allocation,0.6600725591713921,0.6682877200712628,0.8381947319681949,0.9485774133169648,0.871793333544497 +gemv,gpu_kernel,0.00833943738745467,0.011628462412566305,0.038126573679953805,0.008390254360557535,0.011622984913263102 +gemv,memcpy,0.064541706099724,0.06501537811879494,0.10718741157739368,0.029030111654425497,0.02899978526886025 +gemv,allocation,0.9271188565128213,0.7973063654553985,1.0291127060237264,0.9547080356065579,0.9548208589251383 +2DCONV,gpu_kernel,0.005491933942181511,0.013626216867095234,0.15420873334413798,0.004845230055899283,0.013630559041770293 +2DCONV,memcpy,0.26065079857450957,0.25500991096992387,0.11251317392597295,0.023959110914172476,0.024008468133563575 +2DCONV,allocation,0.7338572674833089,0.767560207425334,0.7321085835715029,0.776947220875579,0.8310740127437749 +3DCONV,gpu_kernel,0.024156673989482253,0.04813193987173915,0.06995650164250347,0.024534268737036704,0.04812774509160054 +3DCONV,memcpy,0.20311415839032007,0.2017871719610086,0.04212272170121406,0.019054921883261466,0.01873599960617468 +3DCONV,allocation,0.7727291676201977,0.6563420986793592,0.6906270592299225,0.7659228726548141,0.9047487825331291 +gemm,gpu_kernel,0.3787204882782183,0.3948165162291086,0.542053391515842,0.3936100592230617,0.3965891516398322 +gemm,memcpy,0.1162323624259082,0.11545839163489466,0.15095030069959625,0.0542061388892262,0.05429840333357921 +gemm,allocation,0.5050471492958735,0.3924373195870893,0.4401573226270997,0.44934862640063816,0.6024320477381075 diff --git a/workloads/micro/micro_large_avg_std.pdf b/workloads/micro/micro_large_avg_std.pdf new file mode 100644 index 0000000000000000000000000000000000000000..539237c6b0605d953bfbf5453302c96a5bd3e5cb Binary files /dev/null and b/workloads/micro/micro_large_avg_std.pdf differ diff --git a/workloads/micro/micro_large_avg_std.png b/workloads/micro/micro_large_avg_std.png new file mode 100644 index 0000000000000000000000000000000000000000..61af004df1b05461ae0b0993d62b2b1a76d9f6e9 Binary files /dev/null and b/workloads/micro/micro_large_avg_std.png differ diff --git a/workloads/micro/micro_large_no_legend.pdf b/workloads/micro/micro_large_no_legend.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c655c2cf417b9f3f41fcd3741b979a20860c8cca Binary files /dev/null and b/workloads/micro/micro_large_no_legend.pdf differ diff --git a/workloads/micro/micro_medium_no_legend.pdf b/workloads/micro/micro_medium_no_legend.pdf new file mode 100644 index 0000000000000000000000000000000000000000..9cb54ab718187f32f6437e12c7663a098ffedab5 Binary files /dev/null and b/workloads/micro/micro_medium_no_legend.pdf differ diff --git a/workloads/micro/micro_mega_no_legend.pdf b/workloads/micro/micro_mega_no_legend.pdf new file mode 100644 index 0000000000000000000000000000000000000000..07776ec1a3a6f9723a9e295029db35a92e130f0e Binary files /dev/null and b/workloads/micro/micro_mega_no_legend.pdf differ diff --git a/workloads/micro/micro_small_no_legend.pdf b/workloads/micro/micro_small_no_legend.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c89eb950b72f798d77df0edbb7f8262a5fbcb56b Binary files /dev/null and b/workloads/micro/micro_small_no_legend.pdf differ diff --git a/workloads/micro/micro_std.pdf b/workloads/micro/micro_std.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b9a43c94ae9dfb5c78f30b5d2435c71562be54c1 Binary files /dev/null and b/workloads/micro/micro_std.pdf differ diff --git a/workloads/micro/micro_super_avg_std.pdf b/workloads/micro/micro_super_avg_std.pdf new file mode 100644 index 0000000000000000000000000000000000000000..10669d34241214ce1d254f8c0d198d722838416b Binary files /dev/null and b/workloads/micro/micro_super_avg_std.pdf differ diff --git a/workloads/micro/micro_super_no_legend.pdf b/workloads/micro/micro_super_no_legend.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0257a9d9002d28422b9e71e24019ddf95b738f65 Binary files /dev/null and b/workloads/micro/micro_super_no_legend.pdf differ diff --git a/workloads/micro/micro_tiny_no_legend.pdf b/workloads/micro/micro_tiny_no_legend.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8b6b17620c2389267a828eec1770334b43f3ef31 Binary files /dev/null and b/workloads/micro/micro_tiny_no_legend.pdf differ diff --git a/workloads/micro/output.xlsx b/workloads/micro/output.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..1f3fa63197cfa216e4cc0f82e80cc8de586e9872 Binary files /dev/null and b/workloads/micro/output.xlsx differ diff --git a/workloads/micro/output_all.xlsx b/workloads/micro/output_all.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..0a8e5ca3e02711a1444944d3b2e479384463b6e8 Binary files /dev/null and b/workloads/micro/output_all.xlsx differ diff --git a/workloads/micro/output_std.csv b/workloads/micro/output_std.csv new file mode 100644 index 0000000000000000000000000000000000000000..a9b66f10fe69131d558d6bc5ad791f2ad437b5ac --- /dev/null +++ b/workloads/micro/output_std.csv @@ -0,0 +1,9 @@ +group,tiny,small,medium,large,super,mega +vector_seq,0.0,0.0,0.0,0.0,0.0,0.0 +vector_rand,0.0,0.0,0.0,0.0,0.0,0.0 +saxpy,0.0,0.0,0.0,0.0,0.0,0.0 +gemv,0.0,0.0,0.0,0.0,0.0,0.0 +2DCONV,0.0,0.0,0.0,0.0,0.0,0.0 +3DCONV,0.0,0.0,0.0,0.0,0.0,0.0 +gemm,0.0,0.0,0.0,0.0,0.0,0.0 +Geo-mean,0.0,0.0,0.0,0.0,0.0,0.0 diff --git a/workloads/micro/run_micro_all.py b/workloads/micro/run_micro_all.py new file mode 100644 index 0000000000000000000000000000000000000000..8378719735a9fdfce22db43a0db51be944d5f631 --- /dev/null +++ b/workloads/micro/run_micro_all.py @@ -0,0 +1,818 @@ +import os +import argparse +import sys +import subprocess +import psutil + +import os +import collections +import csv + +import numpy as np +import pandas as pd +import matplotlib +from matplotlib import pyplot as plt +import matplotlib.ticker as mticker +import xlsxwriter +import seaborn as sns + +from matplotlib.ticker import FormatStrFormatter +from matplotlib.legend_handler import HandlerTuple + +from subprocess import Popen, PIPE + +from scipy.stats import gmean + + +prefix = 'run_' +parameter_super_list = ['tiny', 'small', 'medium', 'large', 'super', 'mega'] +# parameter_super_list = ['large'] + +config_super_list = ['standard', 'async', 'uvm', 'uvm_prefetch', 'uvm_prefetch_async'] +workload_super_list = ['vector_seq', 'vector_rand', 'saxpy', 'gemv', 'gemm', '2DCONV', '3DCONV'] +# workload_super_list = ['2DCONV'] + + +def dict_to_list(input_dict): + return_list = [] + for elemement in input_dict: + return_list.append(elemement) + return return_list + +def addOptions(parser): + parser.add_argument("-i", "--iterations", type=int, default=1, + help="Number of iterations") + parser.add_argument("-o", "--output", type=str, default='output.xlsx', + help="output trace log file") + parser.add_argument("-f", "--figure", type=str, default='micro', + help="output pdf file") + parser.add_argument("-p", "--profiling", action='store_true', + help="whether to run profiling or just parse results") + parser.add_argument("-c", "--clean", action='store_true', + help="whether to clean all results") + + +def get_config_list(root_directory): + config_list = [] + for dict in os.listdir(root_directory): + if os.path.isdir(dict) and dict in config_super_list: + config_list.append(dict) + return config_list + + +def get_workload_dict(root_directory, config_list): + workload_list = [] + workload_dict = dict() + for config in config_list: + config_dir = root_directory + '/' + config + + for root, directories, files in os.walk(config_dir, topdown=False): + for dir in directories: + if dir in workload_super_list: + if dir not in workload_dict: + workload_dict[dir] = dict() + workload_dict[dir][config] = os.path.join(root, dir) + if dir not in workload_list: + workload_list.append(dir) + + return workload_list, workload_dict + +def execute_bashes(workload_dict, iterations): + for workload in workload_dict: + if workload in workload_super_list: + for config in workload_dict[workload]: + if config in config_super_list: + cur_dir = workload_dict[workload][config] + pwd = os.getcwd() + os.chdir(cur_dir) + os.system('make') + for para in parameter_super_list: + for i in range(0, iterations): + sh_file = './' + prefix + para + '.sh' + exe_cmd = sh_file + ' > ' + para + '_' + str(i) + '.log' + os.system(exe_cmd) + os.chdir(pwd) + + +def execute_clean_bashes(workload_dict): + for workload in workload_dict: + if workload in workload_super_list: + for config in workload_dict[workload]: + if config in config_super_list: + cur_dir = workload_dict[workload][config] + pwd = os.getcwd() + os.chdir(cur_dir) + os.system('make clean') + os.system('rm *.log') + os.chdir(pwd) + +def process_file(log_file, config): + result_dict = dict() + text = open(log_file, "r") + + overlap = 0 + + result_dict['gpu_kernel'] = 0 + result_dict['memcpy'] = 0 + result_dict['memcpy_HtoD'] = 0 + result_dict['memcpy_DtoH'] = 0 + result_dict['allocation'] = 0 + + result_dict['malloc'] = 0 + result_dict['free'] = 0 + + for line in text: + line = line.replace(':', '') + line = line.strip() + words = line.split(',') + + if 'KERNEL' in words[0] and len(words) >= 4: + result_dict['gpu_kernel'] += int(words[-1]) + elif 'MEMCPY' in words[0]: + if 'HTOD' in words[0] or 'HtoD' in words[0]: + result_dict['memcpy_HtoD'] += int(words[-1]) + else: + result_dict['memcpy_DtoH'] += int(words[-1]) + elif 'cudaMalloc' in words[0]: + result_dict['allocation'] += int(words[3]) + result_dict['malloc'] += int(words[3]) + elif 'cudaFree' in words[0]: + result_dict['allocation'] += int(words[3]) + result_dict['free'] += int(words[3]) + + return_dict = dict() + + if config == 'uvm': + return_dict['gpu_kernel'] = result_dict['gpu_kernel'] - result_dict['memcpy_HtoD'] + else: + return_dict['gpu_kernel'] = result_dict['gpu_kernel'] + return_dict['memcpy'] = result_dict['memcpy_HtoD'] + result_dict['memcpy_DtoH'] + return_dict['allocation'] = result_dict['allocation'] + return_dict['malloc'] = result_dict['malloc'] + return_dict['free'] = result_dict['free'] + + return return_dict + + +def process_results(workload_dict, iterations): + result_dict = dict() + for workload in workload_dict: + if workload in workload_super_list: + for config in workload_dict[workload]: + if config in config_super_list: + cur_dir = workload_dict[workload][config] + for para in parameter_super_list: + if para not in result_dict: + result_dict[para] = dict() + if workload not in result_dict[para]: + result_dict[para][workload] = dict() + + # if config not in result_dict[para][workload]: + result_dict[para][workload][config] = [] + for i in range(0, iterations): + log_file = cur_dir + '/' + para + '_' + str(i) + '.log' + result_dict[para][workload][config].append(process_file(log_file, config)) + sorted(result_dict[para][workload]) + sorted(result_dict[para]) + return result_dict + + +def export_xlsx(result_dict, config_list, iterations, output_file): + workbook = xlsxwriter.Workbook(output_file) + + for para in result_dict: + worksheet = workbook.add_worksheet(para) + first_col = 'B' + first_row = 3 + + col_index = 0 + row_index = first_row + for workload in result_dict[para]: + if col_index + (ord(first_col) - ord('A')) < 26: + current_col = chr(ord(first_col) + col_index) + else: + all = (ord(first_col) - ord('A')) + col_index + dig_1 = all // 26 - 1 + dig_2 = all % 26 + current_col = chr(ord('A') + dig_1) + chr(ord('A') + dig_2) + worksheet.write(current_col + '1', workload) + for config in config_list: + if col_index + (ord(first_col) - ord('A')) < 26: + current_col = chr(ord(first_col) + col_index) + else: + all = (ord(first_col) - ord('A')) + col_index + dig_1 = all // 26 - 1 + dig_2 = all % 26 + current_col = chr(ord('A') + dig_1) + chr(ord('A') + dig_2) + worksheet.write(current_col + '2', config) + row_index = first_row + result_list = dict_to_list(result_dict[para][workload][config][0]) + + for i in range(0, iterations): + tmp_result_dict = dict() + for result in result_dict[para][workload][config][i]: + tmp_result_dict[result] = result_dict[para][workload][config][i][result] + + if col_index == 0: + for j in range(0, len(result_list)): + worksheet.write('A' + str(j + row_index), result_list[j]) + + for j in range(0, len(result_list)): + worksheet.write(current_col + str(j + row_index), tmp_result_dict[result_list[j]]) + + row_index += len(result_list) + col_index += 1 + workbook.close() + + +def plot_results(result_dict, config_list, workload_list, iterations, output_file): + + config_ordered_list = [] + for config in config_super_list: + if config in config_list: + config_ordered_list.append(config) + + workload_ordered_list = [] + for workload in workload_super_list: + if workload in workload_list: + workload_ordered_list.append(workload) + + for para in result_dict: + pandas_list = [] + pandas_list.append('workload') + for config in config_list: + pandas_list.append(config) + + pandas_dict = dict() + for ele in pandas_list: + pandas_dict[ele] = [] + + + for workload in result_dict[para]: + for i in range(0, iterations): + pandas_dict['workload'].append(workload) + for config in config_list: + overall_time = 0 + overall_time += result_dict[para][workload][config][i]['gpu_kernel'] + overall_time += result_dict[para][workload][config][i]['memcpy'] + overall_time += result_dict[para][workload][config][i]['allocation'] + pandas_dict[config].append(overall_time) + + df = pd.DataFrame(pandas_dict) + dd=pd.melt(df,id_vars='workload',value_vars=config_list,var_name='configs') + + my_pal = {"standard": '#000000', "async": '#0000ff', "uvm": '#ff0000', + "uvm_prefetch": '#ff6666', "uvm_prefetch_async": '#00ff00'} + sns.boxplot(data=dd, x='workload', y='value', hue='configs', order=workload_ordered_list, hue_order=config_ordered_list, palette=my_pal) + # sns.tight_layout() + plt.legend([], [], frameon=False) + # plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fontsize=11) + # plt.legend(fontsize=14) + + plt.xticks(fontsize=14, rotation=15) + plt.yticks(fontsize=14) + plt.grid(axis='y') + plt.xlabel("") + plt.ylabel("Execution time (ns)", fontsize=14) + plt.tight_layout() + # plt.margins(x=0.01, y=0.01) + + plt.savefig(output_file + '_' + para + '_no_legend.pdf', bbox_inches='tight') + plt.close() + + # fig = sns_plot.get_figure() + # fig.savefig(para + '.pdf') + +def export_xlsx_all(result_dict, config_list, iterations, output_file): + std_dict = dict() + + workbook = xlsxwriter.Workbook(output_file.replace(".xlsx", '') + '_all.xlsx') + + for para in result_dict: + worksheet = workbook.add_worksheet(para) + first_col = 'B' + first_row = 3 + + col_index = 0 + row_index = first_row + for workload in result_dict[para]: + if col_index + (ord(first_col) - ord('A')) < 26: + current_col = chr(ord(first_col) + col_index) + else: + all = (ord(first_col) - ord('A')) + col_index + dig_1 = all // 26 - 1 + dig_2 = all % 26 + current_col = chr(ord('A') + dig_1) + chr(ord('A') + dig_2) + worksheet.write(current_col + '1', workload) + for config in config_list: + if col_index + (ord(first_col) - ord('A')) < 26: + current_col = chr(ord(first_col) + col_index) + else: + all = (ord(first_col) - ord('A')) + col_index + dig_1 = all // 26 - 1 + dig_2 = all % 26 + current_col = chr(ord('A') + dig_1) + chr(ord('A') + dig_2) + worksheet.write(current_col + '2', config) + row_index = first_row + + all_time_list = [] + + for i in range(0, iterations): + tmp_result_dict = dict() + for result in result_dict[para][workload][config][i]: + tmp_result_dict[result] = result_dict[para][workload][config][i][result] + + overall_time = 0 + for result in tmp_result_dict: + overall_time += tmp_result_dict[result] + + if col_index == 0: + worksheet.write('A' + str(row_index), 'Time') + + worksheet.write(current_col + str(row_index), overall_time) + all_time_list.append(overall_time) + + row_index += 1 + + if para not in std_dict: + std_dict[para] = dict() + if workload not in std_dict[para]: + std_dict[para][workload] = dict() + + std_dict[para][workload][config] = np.std(all_time_list) / np.mean(all_time_list) + col_index += 1 + + workbook.close() + + avg_std_dict = dict() + mean_avg_std_dict = dict() + workload_list = [] + parameter_list = [] + for para in std_dict: + avg_std_dict[para] = dict() + overall_std_list = [] + for workload in std_dict[para]: + overall_std = 0 + for config in std_dict[para][workload]: + overall_std += std_dict[para][workload][config] / len(config_list) + avg_std_dict[para][workload] = overall_std + overall_std_list.append(overall_std) + # avg_std_dict[para][workload] = std_dict[para][workload]['standard'] + sorted(avg_std_dict[para]) + workload_list = dict_to_list(avg_std_dict[para]) + + mean_avg_std_dict[para] = gmean(overall_std_list) + sorted(avg_std_dict) + parameter_list = dict_to_list(avg_std_dict) + + + + avg_std_csv_file = output_file.replace(".xlsx", '') + '_std.csv' + out = open(avg_std_csv_file, "w") + + out.write('group,') + for i in range(0, len(parameter_list)): + out.write(parameter_list[i]) + if i != len(parameter_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + for i in range(0, len(workload_list)): + out.write(workload_list[i]+',') + for j in range(0, len(parameter_list)): + out.write(str(avg_std_dict[parameter_list[j]][workload_list[i]])) + if j != len(parameter_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + out.write('Geo-mean,') + for j in range(0, len(parameter_list)): + out.write(str(mean_avg_std_dict[parameter_list[j]])) + if j != len(parameter_list) - 1: + out.write(',') + else: + out.write(os.linesep) + out.close() + + + vector_seq_mega_csv_file = 'vector_seq_maga.csv' + out = open(vector_seq_mega_csv_file, "w") + + out.write('group,,Time') + out.write(os.linesep) + + for i in range(0, iterations): + gpu_kernel = result_dict['mega']['vector_seq']['standard'][i]['gpu_kernel'] + memcpy = result_dict['mega']['vector_seq']['standard'][i]['memcpy'] + allocation = result_dict['mega']['vector_seq']['standard'][i]['allocation'] + + out.write(str(i)+','+'gpu_kernel'+',') + out.write(str(gpu_kernel)) + out.write(os.linesep) + out.write(str(i)+','+'allocation'+',') + out.write(str(allocation)) + out.write(os.linesep) + out.write(str(i)+','+'memcpy'+',') + out.write(str(memcpy)) + out.write(os.linesep) + out.close() + + super_avg_dict = dict() + for workload in workload_list: + super_avg_dict[workload] = dict() + for c in range(0, len(config_list)): + super_avg_dict[workload][config_list[c]] = dict() + super_avg_dict[workload][config_list[c]]['gpu_kernel'] = 0 + super_avg_dict[workload][config_list[c]]['memcpy'] = 0 + super_avg_dict[workload][config_list[c]]['allocation'] = 0 + super_avg_dict[workload][config_list[c]]['all'] = 0 + for i in range(0, iterations): + super_avg_dict[workload][config_list[c]]['gpu_kernel'] += result_dict['super'][workload][config_list[c]][i]['gpu_kernel'] + super_avg_dict[workload][config_list[c]]['memcpy'] += result_dict['super'][workload][config_list[c]][i]['memcpy'] + super_avg_dict[workload][config_list[c]]['allocation'] += result_dict['super'][workload][config_list[c]][i]['allocation'] + + super_avg_dict[workload][config_list[c]]['all'] += result_dict['super'][workload][config_list[c]][i]['gpu_kernel'] + super_avg_dict[workload][config_list[c]]['all'] += result_dict['super'][workload][config_list[c]][i]['memcpy'] + super_avg_dict[workload][config_list[c]]['all'] += result_dict['super'][workload][config_list[c]][i]['allocation'] + + for c in range(0, len(config_list)): + normarlized_all = super_avg_dict[workload][config_list[c]]['all'] / super_avg_dict[workload]['standard']['all'] + super_avg_dict[workload][config_list[c]]['gpu_kernel'] = (super_avg_dict[workload][config_list[c]]['gpu_kernel'] / super_avg_dict[workload][config_list[c]]['all']) * normarlized_all + super_avg_dict[workload][config_list[c]]['memcpy'] = (super_avg_dict[workload][config_list[c]]['memcpy'] / super_avg_dict[workload][config_list[c]]['all']) * normarlized_all + super_avg_dict[workload][config_list[c]]['allocation'] = (super_avg_dict[workload][config_list[c]]['allocation'] / super_avg_dict[workload][config_list[c]]['all']) * normarlized_all + sorted(super_avg_dict) + + + super_avg_csv_file = 'super_avg.csv' + out = open(super_avg_csv_file, "w") + + out.write('group,,') + for i in range(0, len(config_list)): + out.write(config_list[i]) + if i != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + for i in range(0, len(workload_list)): + + out.write(workload_list[i]+',gpu_kernel,') + for j in range(0, len(config_list)): + out.write(str(super_avg_dict[workload_list[i]][config_list[j]]['gpu_kernel'])) + if j != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + out.write(workload_list[i]+',memcpy,') + for j in range(0, len(config_list)): + out.write(str(super_avg_dict[workload_list[i]][config_list[j]]['memcpy'])) + if j != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + out.write(workload_list[i]+',allocation,') + for j in range(0, len(config_list)): + out.write(str(super_avg_dict[workload_list[i]][config_list[j]]['allocation'])) + if j != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + out.close() + + + large_avg_dict = dict() + for workload in workload_list: + large_avg_dict[workload] = dict() + for c in range(0, len(config_list)): + large_avg_dict[workload][config_list[c]] = dict() + large_avg_dict[workload][config_list[c]]['gpu_kernel'] = 0 + large_avg_dict[workload][config_list[c]]['memcpy'] = 0 + large_avg_dict[workload][config_list[c]]['allocation'] = 0 + large_avg_dict[workload][config_list[c]]['all'] = 0 + for i in range(0, iterations): + large_avg_dict[workload][config_list[c]]['gpu_kernel'] += result_dict['large'][workload][config_list[c]][i]['gpu_kernel'] + large_avg_dict[workload][config_list[c]]['memcpy'] += result_dict['large'][workload][config_list[c]][i]['memcpy'] + large_avg_dict[workload][config_list[c]]['allocation'] += result_dict['large'][workload][config_list[c]][i]['allocation'] + + large_avg_dict[workload][config_list[c]]['all'] += result_dict['large'][workload][config_list[c]][i]['gpu_kernel'] + large_avg_dict[workload][config_list[c]]['all'] += result_dict['large'][workload][config_list[c]][i]['memcpy'] + large_avg_dict[workload][config_list[c]]['all'] += result_dict['large'][workload][config_list[c]][i]['allocation'] + + for c in range(0, len(config_list)): + normarlized_all = large_avg_dict[workload][config_list[c]]['all'] / large_avg_dict[workload]['standard']['all'] + large_avg_dict[workload][config_list[c]]['gpu_kernel'] = (large_avg_dict[workload][config_list[c]]['gpu_kernel'] / large_avg_dict[workload][config_list[c]]['all']) * normarlized_all + large_avg_dict[workload][config_list[c]]['memcpy'] = (large_avg_dict[workload][config_list[c]]['memcpy'] / large_avg_dict[workload][config_list[c]]['all']) * normarlized_all + large_avg_dict[workload][config_list[c]]['allocation'] = (large_avg_dict[workload][config_list[c]]['allocation'] / large_avg_dict[workload][config_list[c]]['all']) * normarlized_all + sorted(large_avg_dict) + + + large_avg_csv_file = 'large_avg.csv' + out = open(large_avg_csv_file, "w") + + out.write('group,,') + for i in range(0, len(config_list)): + out.write(config_list[i]) + if i != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + for i in range(0, len(workload_list)): + + out.write(workload_list[i]+',gpu_kernel,') + for j in range(0, len(config_list)): + out.write(str(large_avg_dict[workload_list[i]][config_list[j]]['gpu_kernel'])) + if j != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + out.write(workload_list[i]+',memcpy,') + for j in range(0, len(config_list)): + out.write(str(large_avg_dict[workload_list[i]][config_list[j]]['memcpy'])) + if j != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + out.write(workload_list[i]+',allocation,') + for j in range(0, len(config_list)): + out.write(str(large_avg_dict[workload_list[i]][config_list[j]]['allocation'])) + if j != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + out.close() + + return avg_std_csv_file, vector_seq_mega_csv_file, large_avg_csv_file, super_avg_csv_file + + +def plot_std_results(csv_file, output_file): + df = pd.read_csv(csv_file, index_col=0) + + group_list = [] + for index in df.index: + if index not in group_list: + group_list.append(index) + col_list = df.columns + + ngroups = len(group_list) + x = np.arange(ngroups) + nbars = len(col_list) + width = (1 - 0.4) / (1.5 * nbars) # the width of the bars + + matplotlib.rcParams["hatch.linewidth"] = 2 + + # patterns = [ "|" , "/", "-", "", "x", "-", "\\", "+", "o", "O" ] + # patterns = [ "|" , "/", "x", "*", ".", "-", "\\", "+", "o", "O" ] + # patterns = ["//", "//", "//", "//", "//", "//", "//"] + patterns = ["", "", "", "", "", ""] + # color_tab = ['#D9D9D9', '#BFBFBF', '#A6A6A6', '#7F7F7F', '#7F7F7F', '#7F7F7F'] + color_tab = ['#ff0000', '#ff6d01','#46bdc6', '#4285f4', '#ea4335', '#34a853'] + edge_color_tab = ['#000000', '#000000', '#000000', '#000000', '#000000', '#000000'] + + fig, ax = plt.subplots(figsize=[8.8, 2.8]) + + rects = [] + + for i in range(0, nbars): + # height_cum = np.array([0.0] * ngroups) + height_total = np.array([1 for g in group_list]) # y coo + height_curr = np.array([float(df[col_list[i]][g]) for g in group_list]) # y coo + rect_base = ax.bar(x - 0.3 + (3 * i + 1.5) * width / 2, # x coo + height_curr / height_total, # y coo + width, label=col_list[i], + color=color_tab[i], + edgecolor=edge_color_tab[i], + linewidth=0.5 + ) + rects.append(rect_base) + # height_cum += height_curr + + hdl_pair = [(rects[i]) for i in range(nbars)] + + ax.set_xticks(x) + ax.set_xticklabels(group_list, rotation=0) + # ax.legend() + # ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fontsize=11) + # prop = dict(weight='bold') + ax.legend(fontsize=14, ncol=3) + # ax.yaxis.set_major_formatter(mticker.PercentFormatter(1.0)) + + + plt.xticks(fontsize=14, rotation=15) + plt.yticks(fontsize=14) + plt.grid(axis='y') + plt.xlabel("") + plt.ylabel("std / mean", fontsize=14) + plt.tight_layout() + # plt.margins(x=0.01, y=0.01) + + plt.savefig(output_file + '_std.pdf', bbox_inches='tight') + plt.close() + + +def plot_breakdown_results(csv_file, output_file): + df = pd.read_csv(csv_file, index_col=[0,1]) + group_list = [] + subgrp_list = [] + for index in df.index: + if index[0] not in group_list: + group_list.append(index[0]) + if index[1] not in subgrp_list: + subgrp_list.append(index[1]) + col_list = df.columns + + ngroups = len(group_list) + nsubgrps = len(subgrp_list) + x = np.arange(ngroups) + nbars = len(col_list) + width = (1 - 0.4) / (1.5 * nbars) # the width of the bars + + matplotlib.rcParams["hatch.linewidth"] = 2 + + patterns = [ "|" , "/", "-", "", "x", "-", "\\", "+", "o", "O" ] + #patterns = [ "|" , "/", "x", "*", ".", "-", "\\", "+", "o", "O" ] + color_tab = ['#000000', '#000000', '#000000', '#ff0000', '#ff6d01','#46bdc6', '#4285f4', '#ea4335', '#34a853'] + #color_tab = ['#000000', '#ff0000', '#ff6666', '#ff6d01','#46bdc6', '#4285f4', '#ea4335', '#34a853'] + + fig, ax = plt.subplots(figsize=[8.8, 2.8]) + hdl_pair = [] + + rects = [] + + for i in range(0, nbars): + height_cum = np.array([0.0] * ngroups) + height_total = np.array([df[col_list[i]][g][0] + df[col_list[i]][g][1] + df[col_list[i]][g][2] for g in group_list]) # y coo + height_curr = np.array([df[col_list[i]][g][0] for g in group_list]) # y coo + rect_base = ax.bar(x - 0.3 + (3 * i + 1.5) * width / 2, # x coo + height_curr, # y coo + width, label=subgrp_list[0], + color=color_tab[i], + edgecolor=color_tab[i], + linewidth=0.5 + ) + # hatch=patterns[i]*3 + rects.append(rect_base) + height_cum += height_curr + for j in range(1, 3): + height_curr = np.array([df[col_list[i]][g][j] for g in group_list]) + rect = ax.bar(x - 0.3 + (3 * i + 1.5) * width / 2, # x coo + height_curr, # y coo + width, + bottom=height_cum, + label=subgrp_list[j], + color=color_tab[i], + edgecolor=color_tab[i], + linewidth=0.5, + alpha=0.25 * (4 - j) + ) + rects.append(rect) + height_cum += height_curr + + hdl_pair = [(rects[i*nsubgrps], rects[i*nsubgrps+1]) for i in range(nbars)] + ax.set_xticks(x) + ax.set_xticklabels(group_list, rotation = 0) + + ax.legend() + # ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fontsize=11) + ax.legend(fontsize=14) + # ax.yaxis.set_major_formatter(mticker.PercentFormatter(1.0)) + # ax.set_yscale('log') + + plt.xticks(fontsize=14, rotation=15) + plt.yticks(fontsize=14) + plt.grid(axis='y') + plt.xlabel("") + plt.ylabel("Execution Time (ns)", fontsize=14) + plt.tight_layout() + # plt.margins(x=0.01, y=0.01) + + plt.savefig(output_file + '_std.pdf', bbox_inches='tight') + plt.close() + + +def plot_breakdown_avg_results(csv_file, output_file): + df = pd.read_csv(csv_file, index_col=[0, 1]) + group_list = [] + subgrp_list = [] + for index in df.index: + if index[0] not in group_list: + group_list.append(index[0]) + if index[1] not in subgrp_list: + subgrp_list.append(index[1]) + col_list = df.columns + + ngroups = len(group_list) + nsubgrps = len(subgrp_list) + x = np.arange(ngroups) + nbars = len(col_list) + width = (1 - 0.4) / (1.5 * nbars) # the width of the bars + + matplotlib.rcParams["hatch.linewidth"] = 2 + + patterns = ["", "-", "/", "|", "/", "-", "x", "-", "\\", "+", "o", "O"] + color_tab = ['#000000', '#0000ff', '#ff0000', '#ff6666', '#00ff00'] + + fig, ax = plt.subplots(figsize=[8.8, 3.8]) + hdl_pair = [] + + rects = [] + + for i in range(0, nbars): + height_cum = np.array([0.0] * ngroups) + height_curr = np.array([df[col_list[i]][g][0] + for g in group_list]) # y coo + rect_base = ax.bar(x - 0.3 + (3 * i + 1.5) * width / 2, # x coo + height_curr, # y coo + width, + label=col_list[i]+" "+subgrp_list[0], + color=color_tab[i], + edgecolor=color_tab[0], + linewidth=0.25 + ) + rects.append(rect_base) + height_cum += height_curr + for j in range(1, 3): + height_curr = np.array([df[col_list[i]][g][j] + for g in group_list]) + rect = ax.bar(x - 0.3 + (3 * i + 1.5) * width / 2, # x coo + height_curr, # y coo + width, + label=col_list[i]+" "+subgrp_list[j], + bottom=height_cum, + color=color_tab[i], + edgecolor=color_tab[0], + linewidth=0.25, + alpha = 0.25 * (4 - j) + ) + rects.append(rect) + height_cum += height_curr + + hdl_pair = [(rects[i*nsubgrps], rects[i*nsubgrps+1], rects[i*nsubgrps+2]) for i in range(nbars)] + ax.set_xticks(x) + ax.set_xticklabels(group_list, rotation=0) + + ax.legend(hdl_pair, col_list, loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.08), fontsize=14, handler_map={tuple: HandlerTuple(ndivide=None)}) + # ax.legend() + # ax.yaxis.set_major_formatter(mticker.PercentFormatter(1.0)) + # ax.set_yscale('log') + + plt.xticks(fontsize=14, rotation=15) + plt.yticks(fontsize=14) + plt.grid(axis='y') + plt.xlabel("") + plt.ylabel("Time (normalized to standard)", fontsize=14) + plt.tight_layout() + # plt.margins(x=0.01, y=0.01) + + plt.savefig(output_file + '_std.pdf', bbox_inches='tight') + plt.savefig(output_file + '_std.png', bbox_inches='tight') + plt.close() + + +def main(): + parser = argparse.ArgumentParser() + addOptions(parser) + + options = parser.parse_args() + + iterations = options.iterations + output_csv_file = options.output + output_figure_file = options.figure + profiling = options.profiling + clean = options.clean + + root_directory = './' + + config_list = get_config_list(root_directory) + print(config_list) + workload_list, workload_dict = get_workload_dict(root_directory, config_list) + print(workload_dict) + + + if clean: + execute_clean_bashes(workload_dict) + else: + if profiling: + execute_bashes(workload_dict, iterations) + + result_dict = process_results(workload_dict, iterations) + export_xlsx(result_dict, config_list, iterations, output_csv_file) + plot_results(result_dict, config_list, workload_list, iterations, output_figure_file) + + avg_std_csv_file, vector_seq_mega_csv_file, large_avg_csv_file, super_avg_csv_file = export_xlsx_all(result_dict, config_super_list, iterations, output_csv_file) + plot_std_results(avg_std_csv_file, output_figure_file) + # plot_breakdown_results(vector_seq_mega_csv_file, 'vector_seq_mega') + plot_breakdown_avg_results(large_avg_csv_file, 'micro_large_avg') + # plot_breakdown_avg_results(super_avg_csv_file, 'micro_super_avg') + +if __name__ == '__main__': + main() + diff --git a/workloads/micro/run_micro_perf.py b/workloads/micro/run_micro_perf.py new file mode 100644 index 0000000000000000000000000000000000000000..dc459aa53db645303e717f3e257ba53f1837f527 --- /dev/null +++ b/workloads/micro/run_micro_perf.py @@ -0,0 +1,148 @@ +import os +import argparse +import sys +import subprocess +import psutil + +import os +import collections +import csv + +import numpy as np +import pandas as pd +import matplotlib +from matplotlib import pyplot as plt +import matplotlib.ticker as mticker +import xlsxwriter +import seaborn as sns + +from matplotlib.ticker import FormatStrFormatter +from matplotlib.legend_handler import HandlerTuple + +from subprocess import Popen, PIPE + +from scipy.stats import gmean + + +prefix = 'run_' +parameter_super_list = ['super'] + +config_super_list = ['standard', 'async', 'uvm', 'uvm_prefetch', 'uvm_prefetch_async'] +workload_super_list = ['gemm'] + + +def dict_to_list(input_dict): + return_list = [] + for elemement in input_dict: + return_list.append(elemement) + return return_list + +def addOptions(parser): + parser.add_argument("-i", "--iterations", type=int, default=1, + help="Number of iterations") + parser.add_argument("-c", "--csv", type=str, default='output.xlsx', + help="output trace log file") + parser.add_argument("-f", "--figure", type=str, default='micro', + help="output pdf file") + parser.add_argument("-p", "--profiling", action='store_true', + help="whether to run profiling or just parse results") + + +def get_config_list(root_directory): + config_list = [] + for dict in os.listdir(root_directory): + if os.path.isdir(dict) and dict in config_super_list: + config_list.append(dict) + return config_list + + +def get_workload_dict(root_directory, config_list): + workload_list = [] + workload_dict = dict() + for config in config_list: + config_dir = root_directory + '/' + config + + for root, directories, files in os.walk(config_dir, topdown=False): + for dir in directories: + if dir in workload_super_list: + if dir not in workload_dict: + workload_dict[dir] = dict() + workload_dict[dir][config] = os.path.join(root, dir + '_perf') + if dir not in workload_list: + workload_list.append(dir) + + return workload_list, workload_dict + +def get_run_cmd(bash_file): + return_txt = '' + text = open(bash_file, "r") + for line in text: + return_txt += line.rstrip() + return return_txt + +def execute_bashes(workload_dict, iterations, perf_list): + for workload in workload_dict: + if workload in workload_super_list: + for config in workload_dict[workload]: + if config in config_super_list: + cur_dir = workload_dict[workload][config] + pwd = os.getcwd() + os.chdir(cur_dir) + os.system('make') + for para in parameter_super_list: + for iter in range(0, iterations): + sh_file = './' + prefix + para + '.sh' + + perf_cmd = '' + for i in range(0, len(perf_list)): + perf_cmd += perf_list[i] + if i != len(perf_list) - 1: + perf_cmd += ',' + profile_cmd = 'ncu --metrics ' + profile_cmd += perf_cmd + profile_cmd += ' --csv --log-file ' + para + '_' + str(iter) + '.profile.csv ' + profile_cmd += get_run_cmd(sh_file) + os.system(profile_cmd) + os.chdir(pwd) + +def main(): + parser = argparse.ArgumentParser() + addOptions(parser) + + options = parser.parse_args() + + iterations = options.iterations + output_csv_file = options.csv + output_figure_file = options.figure + profiling = options.profiling + + perf_list = [] + + perf_list.append('smsp__inst_executed.sum') + + perf_list.append('smsp__sass_thread_inst_executed_op_memory_pred_on.sum') + perf_list.append('smsp__sass_thread_inst_executed_op_control_pred_on.sum') + perf_list.append('smsp__sass_thread_inst_executed_op_integer_pred_on.sum') + perf_list.append('smsp__sass_thread_inst_executed_op_fp16_pred_on.sum') + perf_list.append('smsp__sass_thread_inst_executed_op_fp32_pred_on.sum') + perf_list.append('smsp__sass_thread_inst_executed_op_fp64_pred_on.sum') + + perf_list.append('l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum') + perf_list.append('l1tex__t_sectors_pipe_lsu_mem_global_op_ld_lookup_hit.sum') + perf_list.append('l1tex__t_sectors_pipe_lsu_mem_global_op_st.sum') + perf_list.append('l1tex__t_sectors_pipe_lsu_mem_global_op_st_lookup_hit.sum') + + root_directory = './' + + config_list = get_config_list(root_directory) + print(config_list) + workload_list, workload_dict = get_workload_dict(root_directory, config_list) + print(workload_dict) + + if profiling: + execute_bashes(workload_dict, iterations, perf_list) + + +if __name__ == '__main__': + main() + diff --git a/workloads/micro/run_micro_sensitivity.py b/workloads/micro/run_micro_sensitivity.py new file mode 100644 index 0000000000000000000000000000000000000000..9ad77d1fbcec7cd9a6553654916660d7855eb367 --- /dev/null +++ b/workloads/micro/run_micro_sensitivity.py @@ -0,0 +1,388 @@ +import os +import argparse +import sys +import subprocess +import psutil + +import os +import collections +import csv + +import numpy as np +import pandas as pd +import matplotlib +from matplotlib import pyplot as plt +import matplotlib.ticker as mticker +import xlsxwriter +import seaborn as sns + +from matplotlib.ticker import FormatStrFormatter +from matplotlib.legend_handler import HandlerTuple + +from subprocess import Popen, PIPE + +from scipy.stats import gmean + +config_super_list = ['standard', 'async', 'uvm', 'uvm_prefetch', 'uvm_prefetch_async'] +workload_super_list = ['vector_seq'] + +# thread_block_batch_list = [[1024, 4], [512, 8], [256, 16], [128, 32], [64, 64], [32, 128], [4096, 256], [2048, 256], [1024, 256], [512, 256], [256, 256], [128, 256], [64, 256], [32, 256], [16, 256]] + +block_batch_list = [[4096, 256], [2048, 256], [1024, 256], [512, 256], [256, 256], [128, 256], [64, 256], [32, 256], [16, 256]] +thread_batch_list = [[1024, 4], [512, 8], [256, 16], [128, 32], [64, 64], [32, 128]] + +thread_block_batch_list = block_batch_list + thread_batch_list + +def dict_to_list(input_dict): + return_list = [] + for elemement in input_dict: + return_list.append(elemement) + return return_list + +def addOptions(parser): + parser.add_argument("-i", "--iterations", type=int, default=1, + help="Number of iterations") + parser.add_argument("-c", "--csv", type=str, default='output.xlsx', + help="output trace log file") + parser.add_argument("-f", "--figure", type=str, default='micro', + help="output pdf file") + parser.add_argument("-p", "--profiling", action='store_true', + help="whether to run profiling or just parse results") + + +def get_config_list(root_directory): + config_list = [] + for dict in os.listdir(root_directory): + if os.path.isdir(dict) and dict in config_super_list: + config_list.append(dict) + return config_list + + +def get_workload_dict(root_directory, config_list): + workload_list = [] + workload_dict = dict() + for config in config_list: + config_dir = root_directory + '/' + config + + for root, directories, files in os.walk(config_dir, topdown=False): + for dir in directories: + if dir in workload_super_list: + if dir not in workload_dict: + workload_dict[dir] = dict() + workload_dict[dir][config] = os.path.join(root, dir) + if dir not in workload_list: + workload_list.append(dir) + + return workload_list, workload_dict + +def execute_bashes(workload_dict, thread_batch_list, iterations): + for workload in workload_dict: + if workload in workload_super_list: + for config in workload_dict[workload]: + if config in config_super_list: + cur_dir = workload_dict[workload][config] + pwd = os.getcwd() + os.chdir(cur_dir) + os.system('make') + for thread_batch in thread_batch_list: + for i in range(0, iterations): + sh_file = './' + workload + '_' + str(thread_batch[0]) + '_' + str(thread_batch[1]) + exe_cmd = sh_file + ' > ' + str(thread_batch[0]) + '_' + str(thread_batch[1]) + '_' + str(i) + '.log' + os.system(exe_cmd) + os.chdir(pwd) + + +def process_file(log_file, config): + result_dict = dict() + text = open(log_file, "r") + + overlap = 0 + + result_dict['gpu_kernel'] = 0 + result_dict['memcpy'] = 0 + result_dict['memcpy_HtoD'] = 0 + result_dict['memcpy_DtoH'] = 0 + result_dict['allocation'] = 0 + + for line in text: + line = line.replace(':', '') + line = line.strip() + words = line.split(',') + + if 'KERNEL' in words[0] and len(words) >= 4: + result_dict['gpu_kernel'] += int(words[-1]) + elif 'MEMCPY' in words[0]: + if 'HTOD' in words[0] or 'HtoD' in words[0]: + result_dict['memcpy_HtoD'] += int(words[-1]) + else: + result_dict['memcpy_DtoH'] += int(words[-1]) + elif 'cudaMalloc' in words[0]: + result_dict['allocation'] += int(words[3]) + elif 'cudaFree' in words[0]: + result_dict['allocation'] += int(words[3]) + + return_dict = dict() + + if config == 'uvm': + return_dict['gpu_kernel'] = result_dict['gpu_kernel'] - result_dict['memcpy_HtoD'] + else: + return_dict['gpu_kernel'] = result_dict['gpu_kernel'] + return_dict['memcpy'] = result_dict['memcpy_HtoD'] + result_dict['memcpy_DtoH'] + return_dict['allocation'] = result_dict['allocation'] + + return return_dict + + +def process_results(workload_dict, thread_batch_list, iterations): + result_dict = dict() + for workload in workload_dict: + if workload in workload_super_list: + result_dict[workload] = dict() + for thread_batch in thread_batch_list: + thread_batch_str = str(thread_batch[0]) + '_' + str(thread_batch[1]) + result_dict[workload][thread_batch_str] = dict() + for config in workload_dict[workload]: + if config in config_super_list: + result_dict[workload][thread_batch_str][config] = [] + cur_dir = workload_dict[workload][config] + + for i in range(0, iterations): + log_file = cur_dir + '/' + thread_batch_str + '_' + str(i) + '.log' + result_dict[workload][thread_batch_str][config].append(process_file(log_file, config)) + return result_dict + +def export_csv(result_dict, thread_batch_list, config_list, iterations, isblock): + workload_list = dict_to_list(result_dict) + + csv_list = [] + for workload in workload_list: + avg_dict = dict() + for b in range(0, len(thread_batch_list)): + thread_batch_str = str(thread_batch_list[b][0]) + '_' + str(thread_batch_list[b][1]) + avg_dict[thread_batch_str] = dict() + for c in range(0, len(config_list)): + avg_dict[thread_batch_str][config_list[c]] = dict() + + metric_list = dict_to_list(result_dict[workload][thread_batch_str][config_list[c]][0]) + + for metric in metric_list: + avg_dict[thread_batch_str][config_list[c]][metric] = 0 + avg_dict[thread_batch_str][config_list[c]]['all'] = 0 + + for i in range(0, iterations): + for metric in metric_list: + avg_dict[thread_batch_str][config_list[c]][metric] += result_dict[workload][thread_batch_str][config_list[c]][i][metric] + avg_dict[thread_batch_str][config_list[c]]['all'] += result_dict[workload][thread_batch_str][config_list[c]][i][metric] + + for c in range(0, len(config_list)): + thread_batch_str_0 = str(thread_batch_list[0][0]) + '_' + str(thread_batch_list[0][1]) + normarlized_all = avg_dict[thread_batch_str][config_list[c]]['all'] / avg_dict[thread_batch_str_0]['standard']['all'] + for metric in metric_list: + avg_dict[thread_batch_str][config_list[c]][metric] = (avg_dict[thread_batch_str][config_list[c]][metric] / avg_dict[thread_batch_str][config_list[c]]['all']) * normarlized_all + + if isblock: + workload_csv_file = workload + '_sensitivity_block.csv' + else: + workload_csv_file = workload + '_sensitivity_thread.csv' + csv_list.append(workload_csv_file) + out = open(workload_csv_file, "w") + + out.write('group,,') + for i in range(0, len(config_list)): + out.write(config_list[i]) + if i != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + for thread_batch in thread_batch_list: + thread_batch_str = str(thread_batch[0]) + '_' + str(thread_batch[1]) + for metric in metric_list: + out.write(str(thread_batch[0]) + ',' + metric + ',') + for j in range(0, len(config_list)): + out.write(str(avg_dict[thread_batch_str][config_list[j]][metric])) + if j != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + out.close() + + return csv_list + +def plot_std_results(csv_file, output_file): + df = pd.read_csv(csv_file, index_col=0) + + group_list = [] + for index in df.index: + if index not in group_list: + group_list.append(index) + col_list = df.columns + + ngroups = len(group_list) + x = np.arange(ngroups) + nbars = len(col_list) + width = (1 - 0.4) / (1.5 * nbars) # the width of the bars + + matplotlib.rcParams["hatch.linewidth"] = 2 + + # patterns = [ "|" , "/", "-", "", "x", "-", "\\", "+", "o", "O" ] + # patterns = [ "|" , "/", "x", "*", ".", "-", "\\", "+", "o", "O" ] + # patterns = ["//", "//", "//", "//", "//", "//", "//"] + patterns = ["", "", "", "", "", ""] + # color_tab = ['#D9D9D9', '#BFBFBF', '#A6A6A6', '#7F7F7F', '#7F7F7F', '#7F7F7F'] + color_tab = ['#ff0000', '#ff6d01','#46bdc6', '#4285f4', '#ea4335', '#34a853'] + edge_color_tab = ['#000000', '#000000', '#000000', '#000000', '#000000', '#000000'] + + fig, ax = plt.subplots(figsize=[8.8, 2.8]) + + rects = [] + + for i in range(0, nbars): + # height_cum = np.array([0.0] * ngroups) + height_total = np.array([1 for g in group_list]) # y coo + height_curr = np.array([float(df[col_list[i]][g]) for g in group_list]) # y coo + rect_base = ax.bar(x - 0.3 + (3 * i + 1.5) * width / 2, # x coo + height_curr / height_total, # y coo + width, label=col_list[i], + color=color_tab[i], + edgecolor=edge_color_tab[i], + linewidth=0.5 + ) + rects.append(rect_base) + # height_cum += height_curr + + hdl_pair = [(rects[i]) for i in range(nbars)] + + ax.set_xticks(x) + ax.set_xticklabels(group_list, rotation=0, weight='bold') + ax.legend() + # ax.yaxis.set_major_formatter(mticker.PercentFormatter(1.0)) + + plt.xticks(fontsize=11, weight='bold') + plt.yticks(fontsize=9, weight='bold') + plt.grid(axis='y') + plt.xlabel("") + plt.ylabel("Standard deviation / Mean", fontsize=11, weight='bold') + plt.tight_layout() + # plt.margins(x=0.01, y=0.01) + + plt.savefig(output_file + '_std.pdf', bbox_inches='tight') + plt.close() + + +def plot_results(csv_file, output_file): + df = pd.read_csv(csv_file, index_col=[0, 1]) + group_list = [] + subgrp_list = [] + for index in df.index: + if index[0] not in group_list: + group_list.append(index[0]) + if index[1] not in subgrp_list: + subgrp_list.append(index[1]) + col_list = df.columns + + ngroups = len(group_list) + nsubgrps = len(subgrp_list) + x = np.arange(ngroups) + nbars = len(col_list) + width = (1 - 0.4) / (1.5 * nbars) # the width of the bars + + matplotlib.rcParams["hatch.linewidth"] = 2 + + patterns = ["", "-", "/", "|", "/", "-", "x", "-", "\\", "+", "o", "O"] + color_tab = ['#000000', '#0000ff', '#ff0000', '#ff6666', '#00ff00'] + + fig, ax = plt.subplots(figsize=[8.8, 3.8]) + hdl_pair = [] + + rects = [] + + for i in range(0, nbars): + height_cum = np.array([0.0] * ngroups) + height_curr = np.array([df[col_list[i]][g][0] + for g in group_list]) # y coo + rect_base = ax.bar(x - 0.3 + (3 * i + 1.5) * width / 2, # x coo + height_curr, # y coo + width, + label=col_list[i]+" "+subgrp_list[0], + color=color_tab[i], + edgecolor=color_tab[0], + linewidth=0.25 + ) + rects.append(rect_base) + height_cum += height_curr + for j in range(1, 3): + height_curr = np.array([df[col_list[i]][g][j] + for g in group_list]) + rect = ax.bar(x - 0.3 + (3 * i + 1.5) * width / 2, # x coo + height_curr, # y coo + width, + label=col_list[i]+" "+subgrp_list[j], + bottom=height_cum, + color=color_tab[i], + edgecolor=color_tab[0], + linewidth=0.25, + alpha=0.25 * (4 - j) + ) + rects.append(rect) + height_cum += height_curr + + hdl_pair = [(rects[i*nsubgrps], rects[i*nsubgrps+1], + rects[i*nsubgrps+2]) for i in range(nbars)] + ax.set_xticks(x) + ax.set_xticklabels(group_list, rotation=0) + + ax.legend(hdl_pair, col_list, loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.08), fontsize=14, handler_map={tuple: HandlerTuple(ndivide=None)}) + + # ax.legend() + # ax.yaxis.set_major_formatter(mticker.PercentFormatter(1.0)) + # ax.set_yscale('log') + + plt.xticks(fontsize=14, rotation=15) + plt.yticks(fontsize=14) + plt.grid(axis='y') + plt.xlabel("# of Blocks", fontsize=14) + plt.ylabel("Normalized Time", fontsize=14) + plt.tight_layout() + # plt.margins(x=0.01, y=0.01) + + plt.savefig(output_file, bbox_inches='tight') + plt.close() + + +def main(): + parser = argparse.ArgumentParser() + addOptions(parser) + + options = parser.parse_args() + + iterations = options.iterations + output_csv_file = options.csv + output_figure_file = options.figure + profiling = options.profiling + + root_directory = './' + + config_list = get_config_list(root_directory) + print(config_list) + workload_list, workload_dict = get_workload_dict(root_directory, config_list) + print(workload_dict) + + if profiling: + execute_bashes(workload_dict, thread_block_batch_list, iterations) + + result_dict = process_results(workload_dict, block_batch_list, iterations) + csv_list = export_csv(result_dict, block_batch_list, config_super_list, iterations, True) + for csv_file in csv_list: + plot_results(csv_file, csv_file.replace(".csv", ".pdf")) + + result_dict = process_results(workload_dict, thread_batch_list, iterations) + csv_list = export_csv(result_dict, thread_batch_list, config_super_list, iterations, False) + for csv_file in csv_list: + plot_results(csv_file, csv_file.replace(".csv", ".pdf")) + + +if __name__ == '__main__': + main() + diff --git a/workloads/micro/run_micro_shared.py b/workloads/micro/run_micro_shared.py new file mode 100644 index 0000000000000000000000000000000000000000..15ed622c415bd00ea49cd4e115e5d40f5ae878cd --- /dev/null +++ b/workloads/micro/run_micro_shared.py @@ -0,0 +1,377 @@ +import os +import argparse +import sys +import subprocess +import psutil + +import os +import collections +import csv + +import numpy as np +import pandas as pd +import matplotlib +from matplotlib import pyplot as plt +import matplotlib.ticker as mticker +import xlsxwriter +import seaborn as sns + +from matplotlib.ticker import FormatStrFormatter +from matplotlib.legend_handler import HandlerTuple + +from subprocess import Popen, PIPE + +from scipy.stats import gmean + + +config_super_list = ['standard', 'async', 'uvm', 'uvm_prefetch', 'uvm_prefetch_async'] +workload_super_list = ['vector_seq'] + +thread_batch_list = [2, 4, 8, 16, 32, 64, 128] + +def dict_to_list(input_dict): + return_list = [] + for elemement in input_dict: + return_list.append(elemement) + return return_list + +def addOptions(parser): + parser.add_argument("-i", "--iterations", type=int, default=1, + help="Number of iterations") + parser.add_argument("-c", "--csv", type=str, default='output.xlsx', + help="output trace log file") + parser.add_argument("-f", "--figure", type=str, default='micro', + help="output pdf file") + parser.add_argument("-p", "--profiling", action='store_true', + help="whether to run profiling or just parse results") + + +def get_config_list(root_directory): + config_list = [] + for dict in os.listdir(root_directory): + if os.path.isdir(dict) and dict in config_super_list: + config_list.append(dict) + return config_list + + +def get_workload_dict(root_directory, config_list): + workload_list = [] + workload_dict = dict() + for config in config_list: + config_dir = root_directory + '/' + config + + for root, directories, files in os.walk(config_dir, topdown=False): + for dir in directories: + if dir in workload_super_list: + if dir not in workload_dict: + workload_dict[dir] = dict() + workload_dict[dir][config] = os.path.join(root, dir) + if dir not in workload_list: + workload_list.append(dir) + + return workload_list, workload_dict + +def execute_bashes(workload_dict, thread_batch_list, iterations): + for workload in workload_dict: + if workload in workload_super_list: + for config in workload_dict[workload]: + if config in config_super_list: + cur_dir = workload_dict[workload][config] + pwd = os.getcwd() + os.chdir(cur_dir) + os.system('make') + for thread_batch in thread_batch_list: + for i in range(0, iterations): + sh_file = './' + workload + '_' + str(thread_batch) + exe_cmd = sh_file + ' > ' + str(thread_batch) + '_' + str(i) + '.log' + os.system(exe_cmd) + os.chdir(pwd) + + +def process_file(log_file, config): + result_dict = dict() + text = open(log_file, "r") + + overlap = 0 + + result_dict['gpu_kernel'] = 0 + result_dict['memcpy'] = 0 + result_dict['memcpy_HtoD'] = 0 + result_dict['memcpy_DtoH'] = 0 + result_dict['allocation'] = 0 + + for line in text: + line = line.replace(':', '') + line = line.strip() + words = line.split(',') + + if 'KERNEL' in words[0] and len(words) >= 4: + result_dict['gpu_kernel'] += int(words[-1]) + elif 'MEMCPY' in words[0]: + if 'HTOD' in words[0] or 'HtoD' in words[0]: + result_dict['memcpy_HtoD'] += int(words[-1]) + else: + result_dict['memcpy_DtoH'] += int(words[-1]) + elif 'cudaMalloc' in words[0]: + result_dict['allocation'] += int(words[3]) + elif 'cudaFree' in words[0]: + result_dict['allocation'] += int(words[3]) + + return_dict = dict() + + if config == 'uvm': + return_dict['gpu_kernel'] = result_dict['gpu_kernel'] - result_dict['memcpy_HtoD'] + else: + return_dict['gpu_kernel'] = result_dict['gpu_kernel'] + return_dict['memcpy'] = result_dict['memcpy_HtoD'] + result_dict['memcpy_DtoH'] + return_dict['allocation'] = result_dict['allocation'] + + return return_dict + + +def process_results(workload_dict, thread_batch_list, iterations): + result_dict = dict() + for workload in workload_dict: + if workload in workload_super_list: + result_dict[workload] = dict() + for thread_batch in thread_batch_list: + thread_batch_str = str(thread_batch) + result_dict[workload][thread_batch_str] = dict() + for config in workload_dict[workload]: + if config in config_super_list: + result_dict[workload][thread_batch_str][config] = [] + cur_dir = workload_dict[workload][config] + + for i in range(0, iterations): + log_file = cur_dir + '/' + thread_batch_str + '_' + str(i) + '.log' + result_dict[workload][thread_batch_str][config].append(process_file(log_file, config)) + return result_dict + +def export_csv(result_dict, thread_batch_list, config_list, iterations): + workload_list = dict_to_list(result_dict) + + csv_list = [] + for workload in workload_list: + avg_dict = dict() + for b in range(0, len(thread_batch_list)): + thread_batch_str = str(thread_batch_list[b]) + avg_dict[thread_batch_str] = dict() + for c in range(0, len(config_list)): + avg_dict[thread_batch_str][config_list[c]] = dict() + + metric_list = dict_to_list(result_dict[workload][thread_batch_str][config_list[c]][0]) + + for metric in metric_list: + avg_dict[thread_batch_str][config_list[c]][metric] = 0 + avg_dict[thread_batch_str][config_list[c]]['all'] = 0 + + for i in range(0, iterations): + for metric in metric_list: + avg_dict[thread_batch_str][config_list[c]][metric] += result_dict[workload][thread_batch_str][config_list[c]][i][metric] + avg_dict[thread_batch_str][config_list[c]]['all'] += result_dict[workload][thread_batch_str][config_list[c]][i][metric] + + for c in range(0, len(config_list)): + thread_batch_str_0 = str(thread_batch_list[0]) + normarlized_all = avg_dict[thread_batch_str][config_list[c]]['all'] / avg_dict[thread_batch_str_0]['standard']['all'] + for metric in metric_list: + avg_dict[thread_batch_str][config_list[c]][metric] = (avg_dict[thread_batch_str][config_list[c]][metric] / avg_dict[thread_batch_str][config_list[c]]['all']) * normarlized_all + + workload_csv_file = workload + '_sensitivity_shared.csv' + + csv_list.append(workload_csv_file) + out = open(workload_csv_file, "w") + + out.write('group,,') + for i in range(0, len(config_list)): + out.write(config_list[i]) + if i != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + for thread_batch in thread_batch_list: + thread_batch_str = str(thread_batch) + for metric in metric_list: + out.write(str(int (thread_batch * 256 * 4 / 1024)) + 'KB,' + metric + ',') + for j in range(0, len(config_list)): + out.write(str(avg_dict[thread_batch_str][config_list[j]][metric])) + if j != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + out.close() + + return csv_list + +def plot_std_results(csv_file, output_file): + df = pd.read_csv(csv_file, index_col=0) + + group_list = [] + for index in df.index: + if index not in group_list: + group_list.append(index) + col_list = df.columns + + ngroups = len(group_list) + x = np.arange(ngroups) + nbars = len(col_list) + width = (1 - 0.4) / (1.5 * nbars) # the width of the bars + + matplotlib.rcParams["hatch.linewidth"] = 2 + + # patterns = [ "|" , "/", "-", "", "x", "-", "\\", "+", "o", "O" ] + # patterns = [ "|" , "/", "x", "*", ".", "-", "\\", "+", "o", "O" ] + # patterns = ["//", "//", "//", "//", "//", "//", "//"] + patterns = ["", "", "", "", "", ""] + # color_tab = ['#D9D9D9', '#BFBFBF', '#A6A6A6', '#7F7F7F', '#7F7F7F', '#7F7F7F'] + color_tab = ['#ff0000', '#ff6d01','#46bdc6', '#4285f4', '#ea4335', '#34a853'] + edge_color_tab = ['#000000', '#000000', '#000000', '#000000', '#000000', '#000000'] + + fig, ax = plt.subplots(figsize=[8.8, 2.8]) + + rects = [] + + for i in range(0, nbars): + # height_cum = np.array([0.0] * ngroups) + height_total = np.array([1 for g in group_list]) # y coo + height_curr = np.array([float(df[col_list[i]][g]) for g in group_list]) # y coo + rect_base = ax.bar(x - 0.3 + (3 * i + 1.5) * width / 2, # x coo + height_curr / height_total, # y coo + width, label=col_list[i], + color=color_tab[i], + edgecolor=edge_color_tab[i], + linewidth=0.5 + ) + rects.append(rect_base) + # height_cum += height_curr + + hdl_pair = [(rects[i]) for i in range(nbars)] + + ax.set_xticks(x) + ax.set_xticklabels(group_list, rotation=0, weight='bold') + ax.legend() + # ax.yaxis.set_major_formatter(mticker.PercentFormatter(1.0)) + + plt.xticks(fontsize=11) + plt.yticks(fontsize=9, weight='bold') + plt.grid(axis='y') + plt.xlabel("") + plt.ylabel("Standard deviation / Mean") + plt.tight_layout() + # plt.margins(x=0.01, y=0.01) + + plt.savefig(output_file + '_std.pdf', bbox_inches='tight') + plt.close() + + +def plot_results(csv_file, output_file): + df = pd.read_csv(csv_file, index_col=[0, 1]) + group_list = [] + subgrp_list = [] + for index in df.index: + if index[0] not in group_list: + group_list.append(index[0]) + if index[1] not in subgrp_list: + subgrp_list.append(index[1]) + col_list = df.columns + + ngroups = len(group_list) + nsubgrps = len(subgrp_list) + x = np.arange(ngroups) + nbars = len(col_list) + width = (1 - 0.4) / (1.5 * nbars) # the width of the bars + + matplotlib.rcParams["hatch.linewidth"] = 2 + + patterns = ["", "-", "/", "|", "/", "-", "x", "-", "\\", "+", "o", "O"] + color_tab = ['#000000', '#0000ff', '#ff0000', '#ff6666', '#00ff00'] + + fig, ax = plt.subplots(figsize=[8.8, 3.8]) + hdl_pair = [] + + rects = [] + + for i in range(0, nbars): + height_cum = np.array([0.0] * ngroups) + height_curr = np.array([df[col_list[i]][g][0] + for g in group_list]) # y coo + rect_base = ax.bar(x - 0.3 + (3 * i + 1.5) * width / 2, # x coo + height_curr, # y coo + width, + label=col_list[i]+" "+subgrp_list[0], + color=color_tab[i], + edgecolor=color_tab[0], + linewidth=0.25 + ) + rects.append(rect_base) + height_cum += height_curr + for j in range(1, 3): + height_curr = np.array([df[col_list[i]][g][j] + for g in group_list]) + rect = ax.bar(x - 0.3 + (3 * i + 1.5) * width / 2, # x coo + height_curr, # y coo + width, + label=col_list[i]+" "+subgrp_list[j], + bottom=height_cum, + color=color_tab[i], + edgecolor=color_tab[0], + linewidth=0.25, + alpha=0.25 * (4 - j) + ) + rects.append(rect) + height_cum += height_curr + + hdl_pair = [(rects[i*nsubgrps], rects[i*nsubgrps+1], + rects[i*nsubgrps+2]) for i in range(nbars)] + ax.set_xticks(x) + ax.set_xticklabels(group_list, rotation=0) + + ax.legend(hdl_pair, col_list, loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.2), fontsize=14, handler_map={tuple: HandlerTuple(ndivide=None)}) + # ax.legend() + # ax.yaxis.set_major_formatter(mticker.PercentFormatter(1.0)) + # ax.set_yscale('log') + + plt.xticks(fontsize=14, rotation=15) + plt.yticks(fontsize=14) + plt.grid(axis='y') + plt.xlabel("") + plt.ylabel("Normalized Time", fontsize=14) + plt.xlabel("Shared Memory Capacity", fontsize=14) + plt.tight_layout() + # plt.margins(x=0.01, y=0.01) + + plt.savefig(output_file, bbox_inches='tight') + plt.close() + + +def main(): + parser = argparse.ArgumentParser() + addOptions(parser) + + options = parser.parse_args() + + iterations = options.iterations + output_csv_file = options.csv + output_figure_file = options.figure + profiling = options.profiling + + root_directory = './' + + config_list = get_config_list(root_directory) + print(config_list) + workload_list, workload_dict = get_workload_dict(root_directory, config_list) + print(workload_dict) + + if profiling: + execute_bashes(workload_dict, thread_batch_list, iterations) + + result_dict = process_results(workload_dict, thread_batch_list, iterations) + csv_list = export_csv(result_dict, thread_batch_list, config_super_list, iterations) + for csv_file in csv_list: + plot_results(csv_file, csv_file.replace(".csv", ".pdf")) + + +if __name__ == '__main__': + main() + diff --git a/workloads/micro/standard/2DCONV/2DConvolution.cu b/workloads/micro/standard/2DCONV/2DConvolution.cu new file mode 100644 index 0000000000000000000000000000000000000000..4d9d30a6eb9863f8d803745ba4ab20a375aa32ce --- /dev/null +++ b/workloads/micro/standard/2DCONV/2DConvolution.cu @@ -0,0 +1,345 @@ +/** + * 2DConvolution.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +#define GPU_DEVICE 5 + +/* Problem size */ +#define SIZE 4096 +#define NBLOCKS 32 +#define BATCH_SIZE 4 + +uint64_t NI; +uint64_t NJ; +int nblocks; + + +/* Thread block dimensions */ +#define KERNEL 3 +#define DIM_THREAD_BLOCK 8 + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; + +void conv2D(DATA_TYPE* A, DATA_TYPE* B) +{ + int i, j; + DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + c11 = +0.2; c21 = +0.5; c31 = -0.8; + c12 = -0.3; c22 = +0.6; c32 = -0.9; + c13 = +0.4; c23 = +0.7; c33 = +0.10; + + for (i = 1; i < NI - 1; ++i) // 0 + { + for (j = 1; j < NJ - 1; ++j) // 1 + { + B[i*NJ + j] = c11 * A[(i - 1)*NJ + (j - 1)] + c12 * A[(i + 0)*NJ + (j - 1)] + c13 * A[(i + 1)*NJ + (j - 1)] + + c21 * A[(i - 1)*NJ + (j + 0)] + c22 * A[(i + 0)*NJ + (j + 0)] + c23 * A[(i + 1)*NJ + (j + 0)] + + c31 * A[(i - 1)*NJ + (j + 1)] + c32 * A[(i + 0)*NJ + (j + 1)] + c33 * A[(i + 1)*NJ + (j + 1)]; + } + } +} + + +void initGPU(DATA_TYPE* A_gpu) +{ + int i, j; + + for (i = 0; i < NI; ++i) { + for (j = 0; j < NJ; ++j) { + A_gpu[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } +} + +void initCPU(DATA_TYPE* A) +{ + int i, j; + + for (i = 0; i < NI; ++i) { + for (j = 0; j < NJ; ++j) { + A[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } +} + + +void compareResults(DATA_TYPE* B, DATA_TYPE* B_outputFromGpu) +{ + int i, j, fail; + fail = 0; + + // Compare a and b + for (i=1; i < (NI-1); i++) + { + for (j=1; j < (NJ-1); j++) + { + if (percentDiff(B[i*NJ + j], B_outputFromGpu[i*NJ + j]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, CPU is %f, GPU is %f.\n", i, j, B[i * NJ + j], B_outputFromGpu[i * NJ + j]); + fail++; + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); + +} + +__global__ void Convolution2D_kernel(DATA_TYPE *A, DATA_TYPE *B, uint64_t NI, uint64_t NJ, int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + uint64_t tile_dim_x = (NJ + DIM_THREAD_BLOCK - 1) / (DIM_THREAD_BLOCK * BATCH_SIZE); + + __shared__ DATA_TYPE tmp_A[DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1]; + __shared__ DATA_TYPE tmp_B[DIM_THREAD_BLOCK * BATCH_SIZE][DIM_THREAD_BLOCK * BATCH_SIZE]; + + uint64_t total_tiles = tile_dim_x * tile_dim_x; + + uint64_t tiles_this_block_x = (block_size / (DIM_THREAD_BLOCK * BATCH_SIZE)); + uint64_t tiles_this_block = tiles_this_block_x * tiles_this_block_x; + + uint64_t base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + uint64_t tile = base_tile; + uint64_t end_tile = tile + tiles_this_block; + + // DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + // c11 = +0.2; c21 = +0.5; c31 = -0.8; + // c12 = -0.3; c22 = +0.6; c32 = -0.9; + // c13 = +0.4; c23 = +0.7; c33 = +0.10; + + DATA_TYPE c[KERNEL][KERNEL]; + + c[0][0] = +0.2; + c[1][0] = +0.5; + c[2][0] = -0.8; + c[0][1] = -0.3; + c[1][1] = +0.6; + c[2][1] = -0.9; + c[0][2] = +0.4; + c[1][2] = +0.7; + c[2][2] = +0.10; + + for (; tile < end_tile; tile += 1) + { + // block id + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + int batch_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + // thread id + int tx = threadIdx.x; + int ty = threadIdx.y; + + int index_B_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty + 1; + int index_B_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx + 1; + + int index_A_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty; + int index_A_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx; + + int index_A_y_start = DIM_THREAD_BLOCK * BATCH_SIZE * by; + int index_A_x_start = DIM_THREAD_BLOCK * BATCH_SIZE * bx; + + int index_A_y_bound = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * DIM_THREAD_BLOCK; + int index_A_x_bound = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * DIM_THREAD_BLOCK; + + // fetch A + for (int i = 0; i < BATCH_SIZE; i++) { + for (int j = 0; j < BATCH_SIZE; j++) { + if ((index_A_y + i) < NI && (index_A_x + j) < NJ) { + tmp_A[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] = A[(index_A_y + i) * NJ + index_A_x + j]; + tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] = 0; + } + } + } + + // fetch A -- padding + for (int i = 0; i < KERNEL - 1; i++) { + for (int j = 0; j < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; j++) { + if ((index_A_y_bound + i) < NI && (index_A_x_start + j) < NJ) { + tmp_A[DIM_THREAD_BLOCK * BATCH_SIZE + i][j] = A[(index_A_y_bound + i) * NJ + index_A_x_start + j]; + } + } + } + + // fetch A -- padding + for (int i = 0; i < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; i++) { + for (int j = 0; j < KERNEL - 1; j++) { + if ((index_A_y_start + i) < NI && (index_A_x_bound + j) < NJ) { + tmp_A[i][DIM_THREAD_BLOCK * BATCH_SIZE + j] = A[(index_A_y_start + i) * NJ + index_A_x_bound + j]; + } + } + } + block.sync(); + + // Computation + for (int i = 0; i < BATCH_SIZE; i++) { + for (int j = 0; j < BATCH_SIZE; j++) { + for (int m = 0; m < KERNEL; m++) { + for (int n = 0; n < KERNEL; n++) { + tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] += tmp_A[ty * BATCH_SIZE + i + m][tx * BATCH_SIZE + j + n] * c[n][m]; + } + } + } + } + block.sync(); + + // Store B + for (int i = 0; i < BATCH_SIZE; i++) { + for (int j = 0; j < BATCH_SIZE; j++) { + if ((index_B_y + i + 1) < NI && (index_B_x + j + 1) < NJ) { + B[(index_B_y + i) * NJ + index_B_x + j] = tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j]; + } + } + } + block.sync(); + } +} + +void convolution2DCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu) +{ + double t_start, t_end; + + int output_width = NI - KERNEL + 1; + int output_height = NJ - KERNEL + 1; + + dim3 block(DIM_THREAD_BLOCK, DIM_THREAD_BLOCK); + dim3 grid(nblocks, nblocks); + + int block_size = (NJ + (nblocks - 1)) / nblocks; + + // t_start = rtclock(); + + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyHostToDevice); + Convolution2D_kernel<<>>(A_gpu, B_gpu, NI, NJ, block_size); + cudaDeviceSynchronize(); + cudaMemcpy(B, B_gpu, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyDeviceToHost); + + // t_end = rtclock(); + // fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start);//); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + if (argc >= 4) { + NI = atoll(argv[1]); + NJ = atoll(argv[2]); + nblocks = atoi(argv[3]); + } else { + NI = SIZE; + NJ = SIZE; + nblocks = NBLOCKS; + } + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE *B_ref; + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + + A = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + B_ref = (DATA_TYPE *)malloc(NI * NJ * sizeof(DATA_TYPE)); + initCPU(A); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI * NJ); + cudaMalloc(&B_gpu, sizeof(DATA_TYPE) * NI * NJ); + // B_outputFromGpu = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + + convolution2DCuda(A, B, A_gpu, B_gpu); + + cudaFree(A_gpu); + cudaFree(B_gpu); + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // conv2D(A, B_ref); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);//); + + // compareResults(B, B_ref); + free(A); + free(B); + + return 0; +} diff --git a/workloads/micro/standard/2DCONV/2DConvolution.cu.nopadding b/workloads/micro/standard/2DCONV/2DConvolution.cu.nopadding new file mode 100644 index 0000000000000000000000000000000000000000..997d0214d35f2010497a17c07f82cbb836f75ae8 --- /dev/null +++ b/workloads/micro/standard/2DCONV/2DConvolution.cu.nopadding @@ -0,0 +1,362 @@ +/** + * 2DConvolution.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +#define GPU_DEVICE 0 + +/* Problem size */ +#define SIZE 4096 +#define NBLOCKS 32 +#define BATCH_SIZE 4 + +int NI; +int NJ; +int nblocks; + + +/* Thread block dimensions */ +#define KERNEL 3 +#define DIM_THREAD_BLOCK 8 + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; + +void conv2D(DATA_TYPE* A, DATA_TYPE* B) +{ + int i, j; + DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + c11 = +0.2; c21 = +0.5; c31 = -0.8; + c12 = -0.3; c22 = +0.6; c32 = -0.9; + c13 = +0.4; c23 = +0.7; c33 = +0.10; + + for (i = 1; i < NI - 1; ++i) // 0 + { + for (j = 1; j < NJ - 1; ++j) // 1 + { + B[i*NJ + j] = c11 * A[(i - 1)*NJ + (j - 1)] + c12 * A[(i + 0)*NJ + (j - 1)] + c13 * A[(i + 1)*NJ + (j - 1)] + + c21 * A[(i - 1)*NJ + (j + 0)] + c22 * A[(i + 0)*NJ + (j + 0)] + c23 * A[(i + 1)*NJ + (j + 0)] + + c31 * A[(i - 1)*NJ + (j + 1)] + c32 * A[(i + 0)*NJ + (j + 1)] + c33 * A[(i + 1)*NJ + (j + 1)]; + } + } +} + + +void initGPU(DATA_TYPE* A_gpu) +{ + int i, j; + + for (i = 0; i < NI; ++i) { + for (j = 0; j < NJ; ++j) { + A_gpu[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } +} + +void initCPU(DATA_TYPE* A) +{ + int i, j; + + for (i = 0; i < NI; ++i) { + for (j = 0; j < NJ; ++j) { + A[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } +} + + +void compareResults(DATA_TYPE* B, DATA_TYPE* B_outputFromGpu) +{ + int i, j, fail; + fail = 0; + + // Compare a and b + for (i=1; i < (NI-1); i++) + { + for (j=1; j < (NJ-1); j++) + { + if (percentDiff(B[i*NJ + j], B_outputFromGpu[i*NJ + j]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, CPU is %f, GPU is %f.\n", i, j, B[i * NJ + j], B_outputFromGpu[i * NJ + j]); + fail++; + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); + +} + + +void GPU_argv_init() +{ + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, GPU_DEVICE); + printf("setting device %d with name %s\n",GPU_DEVICE,deviceProp.name); + cudaSetDevice( GPU_DEVICE ); +} + +__global__ void Convolution2D_kernel(DATA_TYPE *A, DATA_TYPE *B, int NI, int NJ, int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + int tile_dim_x = (NJ + DIM_THREAD_BLOCK - 1) / (DIM_THREAD_BLOCK * BATCH_SIZE); + + __shared__ DATA_TYPE tmp_A[DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1]; + __shared__ DATA_TYPE tmp_B[DIM_THREAD_BLOCK * BATCH_SIZE][DIM_THREAD_BLOCK * BATCH_SIZE]; + + int total_tiles = tile_dim_x * tile_dim_x; + + int tiles_this_block_x = (block_size / (DIM_THREAD_BLOCK * BATCH_SIZE)); + int tiles_this_block = tiles_this_block_x * tiles_this_block_x; + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + // DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + // c11 = +0.2; c21 = +0.5; c31 = -0.8; + // c12 = -0.3; c22 = +0.6; c32 = -0.9; + // c13 = +0.4; c23 = +0.7; c33 = +0.10; + + DATA_TYPE c[KERNEL][KERNEL]; + + c[0][0] = +0.2; + c[1][0] = +0.5; + c[2][0] = -0.8; + c[0][1] = -0.3; + c[1][1] = +0.6; + c[2][1] = -0.9; + c[0][2] = +0.4; + c[1][2] = +0.7; + c[2][2] = +0.10; + + for (; tile < end_tile; tile += 1) + { + // block id + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + int batch_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + // thread id + int tx = threadIdx.x; + int ty = threadIdx.y; + + int index_B_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty + 1; + int index_B_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx + 1; + + int index_A_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty; + int index_A_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx; + + int index_A_y_start = DIM_THREAD_BLOCK * BATCH_SIZE * by; + int index_A_x_start = DIM_THREAD_BLOCK * BATCH_SIZE * bx; + + int index_A_y_bound = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * DIM_THREAD_BLOCK; + int index_A_x_bound = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * DIM_THREAD_BLOCK; + + // fetch A + for (int i = 0; i < BATCH_SIZE; i++) { + for (int j = 0; j < BATCH_SIZE; j++) { + if ((index_A_y + i) < NI && (index_A_x + j) < NJ) { + tmp_A[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] = A[(index_A_y + i) * NJ + index_A_x + j]; + tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] = 0; + } + } + } + block.sync(); + + if (tx < DIM_THREAD_BLOCK && ty < DIM_THREAD_BLOCK) { + // Computation + for (int i = 0; i < BATCH_SIZE; i++) { + for (int j = 0; j < BATCH_SIZE; j++) { + for (int m = 0; m < KERNEL; m++) { + for (int n = 0; n < KERNEL; n++) { + tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] += tmp_A[ty * BATCH_SIZE + i + m][tx * BATCH_SIZE + j + n] * c[n][m]; + } + } + } + } + block.sync(); + + // Store B + for (int i = 0; i < BATCH_SIZE; i++) { + for (int j = 0; j < BATCH_SIZE; j++) { + if ((index_B_y + i + 1) < NI && (index_B_x + j + 1) < NJ) { + B[(index_B_y + i) * NJ + index_B_x + j] = tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j]; + } + } + } + block.sync(); + } + } +} + +void convolution2DCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu) +{ + double t_start, t_end; + + int output_width = NI - KERNEL + 1; + int output_height = NJ - KERNEL + 1; + + dim3 block(DIM_THREAD_BLOCK + KERNEL - 1, DIM_THREAD_BLOCK + KERNEL - 1); + dim3 grid(nblocks, nblocks); + + int block_size = (NJ + (nblocks - 1)) / nblocks; + + // t_start = rtclock(); + + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyHostToDevice); + Convolution2D_kernel<<>>(A_gpu, B_gpu, NI, NJ, block_size); + cudaDeviceSynchronize(); + cudaMemcpy(B, B_gpu, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyDeviceToHost); + + // t_end = rtclock(); + // fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start);//); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsc() +{ + unsigned long a, d; + + __asm__ volatile("rdtsc" + : "=a"(a), "=d"(d)); + + return (a | (d << 32)); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsp() +{ + struct timespec tms; + if (clock_gettime(CLOCK_REALTIME, &tms)) + { + return -1; + } + unsigned long ns = tms.tv_sec * 1000000000; + ns += tms.tv_nsec; + return ns; +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + if (argc >= 4) { + NI = atoi(argv[1]); + NJ = atoi(argv[2]); + nblocks = atoi(argv[3]); + } else { + NI = SIZE; + NJ = SIZE; + nblocks = NBLOCKS; + } + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE *B_ref; + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + + A = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + B_ref = (DATA_TYPE *)malloc(NI * NJ * sizeof(DATA_TYPE)); + initCPU(A); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI * NJ); + cudaMalloc(&B_gpu, sizeof(DATA_TYPE) * NI * NJ); + // B_outputFromGpu = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + + convolution2DCuda(A, B, A_gpu, B_gpu); + + cudaFree(A_gpu); + cudaFree(B_gpu); + + endCPU(); + finiTrace(); + + t_start = rtclock(); + conv2D(A, B_ref); + t_end = rtclock(); + fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);//); + + compareResults(B, B_ref); + + free(A); + free(B); + + return 0; +} diff --git a/workloads/micro/standard/2DCONV/Makefile b/workloads/micro/standard/2DCONV/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f97459c40d8409dec80056ca77208c75e9d0b5e8 --- /dev/null +++ b/workloads/micro/standard/2DCONV/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := 2DConvolution +CUFILES := 2DConvolution.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o 2DConvolution diff --git a/workloads/micro/standard/2DCONV/run.sh b/workloads/micro/standard/2DCONV/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..c4d26406f477f75d294497d89fc0d88c529f475b --- /dev/null +++ b/workloads/micro/standard/2DCONV/run.sh @@ -0,0 +1,2 @@ +# ./2DConvolution 16384 16384 32 +./2DConvolution 32768 32768 32 \ No newline at end of file diff --git a/workloads/micro/standard/2DCONV/run_large.sh b/workloads/micro/standard/2DCONV/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..6e4e06894b252b1d547f335deef142ab01c98df9 --- /dev/null +++ b/workloads/micro/standard/2DCONV/run_large.sh @@ -0,0 +1 @@ +./2DConvolution 8192 8192 32 diff --git a/workloads/micro/standard/2DCONV/run_medium.sh b/workloads/micro/standard/2DCONV/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..c246aa18fc41de6e48e7d9ab67f1bbf1925afff6 --- /dev/null +++ b/workloads/micro/standard/2DCONV/run_medium.sh @@ -0,0 +1 @@ +./2DConvolution 4096 4096 32 diff --git a/workloads/micro/standard/2DCONV/run_mega.sh b/workloads/micro/standard/2DCONV/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..aa390557108b8621ec9ef8ac80f0a8f085161cce --- /dev/null +++ b/workloads/micro/standard/2DCONV/run_mega.sh @@ -0,0 +1 @@ +./2DConvolution 65536 65536 32 diff --git a/workloads/micro/standard/2DCONV/run_small.sh b/workloads/micro/standard/2DCONV/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..c4b192f75d30e834237d39cfc15c8c57bade3c0e --- /dev/null +++ b/workloads/micro/standard/2DCONV/run_small.sh @@ -0,0 +1 @@ +./2DConvolution 1024 1024 8 diff --git a/workloads/micro/standard/2DCONV/run_super.sh b/workloads/micro/standard/2DCONV/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..8a0981739ace39a1104aa069d6a6f0dfa38fd5c9 --- /dev/null +++ b/workloads/micro/standard/2DCONV/run_super.sh @@ -0,0 +1 @@ +./2DConvolution 32768 32768 32 diff --git a/workloads/micro/standard/2DCONV/run_tiny.sh b/workloads/micro/standard/2DCONV/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..d7c49df52c8e594edae710ad71032e912ccd7892 --- /dev/null +++ b/workloads/micro/standard/2DCONV/run_tiny.sh @@ -0,0 +1 @@ +./2DConvolution 512 512 4 diff --git a/workloads/micro/standard/3DCONV/3DConvolution.cu b/workloads/micro/standard/3DCONV/3DConvolution.cu new file mode 100644 index 0000000000000000000000000000000000000000..f6605e3877967e5cae6a45109ac439f35207915a --- /dev/null +++ b/workloads/micro/standard/3DCONV/3DConvolution.cu @@ -0,0 +1,387 @@ +/** + * 3DConvolution.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +// define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +#define NBLOCKS 2 +#define BATCH_SIZE 3 + +uint64_t NI; +uint64_t NJ; +uint64_t NK; +int nblocks; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 4 + +#define KERNEL 3 + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; + + + +void conv3D(DATA_TYPE* A, DATA_TYPE* B) +{ + int i, j, k; + DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + c11 = +2; c21 = +5; c31 = -8; + c12 = -3; c22 = +6; c32 = -9; + c13 = +4; c23 = +7; c33 = +10; + + for (i = 1; i < NI - 1; ++i) // 0 + { + for (j = 1; j < NJ - 1; ++j) // 1 + { + for (k = 1; k < NK -1; ++k) // 2 + { + B[i*(NK * NJ) + j*NK + k] = c11 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + c13 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + + c21 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + c23 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + + c31 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + c33 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + + c12 * A[(i + 0)*(NK * NJ) + (j - 1)*NK + (k + 0)] + c22 * A[(i + 0)*(NK * NJ) + (j + 0)*NK + (k + 0)] + + c32 * A[(i + 0)*(NK * NJ) + (j + 1)*NK + (k + 0)] + c11 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k + 1)] + + c13 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k + 1)] + c21 * A[(i - 1)*(NK * NJ) + (j + 0)*NK + (k + 1)] + + c23 * A[(i + 1)*(NK * NJ) + (j + 0)*NK + (k + 1)] + c31 * A[(i - 1)*(NK * NJ) + (j + 1)*NK + (k + 1)] + + c33 * A[(i + 1)*(NK * NJ) + (j + 1)*NK + (k + 1)]; + } + } + } +} + +void initGPU(DATA_TYPE *A_gpu) +{ + int i, j, k; + + for (i = 0; i < NI; ++i) + { + for (j = 0; j < NJ; ++j) + { + for (k = 0; k < NK; ++k) + { + A_gpu[i * (NK * NJ) + j * NK + k] = i % 12 + 2 * (j % 7) + 3 * (k % 13); + } + } + } +} + +void initCPU(DATA_TYPE *A) +{ + int i, j, k; + + for (i = 0; i < NI; ++i) + { + for (j = 0; j < NJ; ++j) + { + for (k = 0; k < NK; ++k) + { + A[i*(NK * NJ) + j*NK + k] = i % 12 + 2 * (j % 7) + 3 * (k % 13); + } + } + } +} + + +void compareResults(DATA_TYPE* B, DATA_TYPE* B_outputFromGpu) +{ + int i, j, k, fail; + fail = 0; + + // Compare result from cpu and gpu... + for (i = 1; i < NI - 1; ++i) // 0 + { + for (j = 1; j < NJ - 1; ++j) // 1 + { + for (k = 1; k < NK - 1; ++k) // 2 + { + if (percentDiff(B[i*(NK * NJ) + j*NK + k], B_outputFromGpu[i*(NK * NJ) + j*NK + k]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, %d, CPU is %f, GPU is %f.\n", i, j, k, B[i * (NK * NJ) + j * NK + k], B_outputFromGpu[i * (NK * NJ) + j * NK + k]); + fail++; + } + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void convolution3D_kernel(DATA_TYPE *A, DATA_TYPE *B, uint64_t NI, uint64_t NJ, uint64_t NK, int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + c11 = +2; c21 = +5; c31 = -8; + c12 = -3; c22 = +6; c32 = -9; + c13 = +4; c23 = +7; c33 = +10; + + uint64_t tile_dim_x = (NJ + DIM_THREAD_BLOCK - 1) / (DIM_THREAD_BLOCK * BATCH_SIZE); + + __shared__ DATA_TYPE tmp_A[DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1]; + __shared__ DATA_TYPE tmp_B[DIM_THREAD_BLOCK * BATCH_SIZE][DIM_THREAD_BLOCK * BATCH_SIZE][DIM_THREAD_BLOCK * BATCH_SIZE]; + + // int total_tiles = tile_dim_x * tile_dim_x * tile_dim_x; + + uint64_t tiles_this_block_x = (block_size / (DIM_THREAD_BLOCK * BATCH_SIZE)); + uint64_t tiles_this_block = tiles_this_block_x * tiles_this_block_x * tiles_this_block_x; + + uint64_t base_tile = (blockIdx.z * gridDim.y * gridDim.x + blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + uint64_t tile = base_tile; + uint64_t end_tile = tile + tiles_this_block; + + // printf("block_size is %d, tile_dim_x is %d, tiles_this_block_x is %d.\n", block_size, tile_dim_x, tiles_this_block_x); + + for (; tile < end_tile; tile += 1) + { + // block id + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + + int bz = block_id / (gridDim.y * gridDim.x) * tiles_this_block_x + offset / (tiles_this_block_x * tiles_this_block_x); + int by = block_id % (gridDim.y * gridDim.x) / gridDim.x * tiles_this_block_x + offset % (tiles_this_block_x * tiles_this_block_x) / tiles_this_block_x; + int bx = block_id % (gridDim.y * gridDim.x) % gridDim.x * tiles_this_block_x + offset % (tiles_this_block_x * tiles_this_block_x) % tiles_this_block_x; + + // thread id + int tx = threadIdx.x; + int ty = threadIdx.y; + int tz = threadIdx.z; + + int index_B_z = DIM_THREAD_BLOCK * BATCH_SIZE * bz + BATCH_SIZE * tz + 1; + int index_B_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty + 1; + int index_B_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx + 1; + + int index_A_z = DIM_THREAD_BLOCK * BATCH_SIZE * bz + BATCH_SIZE * tz; + int index_A_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty; + int index_A_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx; + + int index_A_z_start = DIM_THREAD_BLOCK * BATCH_SIZE * bz; + int index_A_y_start = DIM_THREAD_BLOCK * BATCH_SIZE * by; + int index_A_x_start = DIM_THREAD_BLOCK * BATCH_SIZE * bx; + + int index_A_z_bound = DIM_THREAD_BLOCK * BATCH_SIZE * bz + BATCH_SIZE * DIM_THREAD_BLOCK; + int index_A_y_bound = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * DIM_THREAD_BLOCK; + int index_A_x_bound = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * DIM_THREAD_BLOCK; + + // fetch A + for (int i = 0; i < BATCH_SIZE; i++) { + for (int j = 0; j < BATCH_SIZE; j++) { + for (int k = 0; k < BATCH_SIZE; k++) { + if ((index_A_z + i) < NI && (index_A_y + j) < NJ && (index_A_x + k) < NK) { + tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] = A[(index_A_z + i) * NJ * NK + (index_A_y + j) * NK + index_A_x + k]; + tmp_B[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] = 0; + } + } + } + } + + // fetch A -- padding + for (int i = 0; i < KERNEL - 1; i++) { + for (int j = 0; j < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; j++) { + for (int k = 0; k < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; k++) { + if ((index_A_z_bound + i) < NI && (index_A_y_start + j) < NJ && (index_A_x_start + k) < NK) { + tmp_A[DIM_THREAD_BLOCK * BATCH_SIZE + i][j][k] = A[(index_A_z_bound + i) * NJ * NK + (index_A_y_start + j) * NK + index_A_x_start + k]; + } + } + } + } + + // fetch A -- padding + for (int i = 0; i < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; i++) { + for (int j = 0; j < KERNEL - 1; j++) { + for (int k = 0; k < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; k++) { + if ((index_A_z_start + i) < NI && (index_A_y_bound + j) < NJ && (index_A_x_start + k) < NK) { + tmp_A[i][DIM_THREAD_BLOCK * BATCH_SIZE + j][k] = A[(index_A_z_start + i) * NJ * NK + (index_A_y_bound + j) * NK + index_A_x_start + k]; + } + } + } + } + + // fetch A -- padding + for (int i = 0; i < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; i++) { + for (int j = 0; j < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; j++) { + for (int k = 0; k < KERNEL - 1; k++) { + if ((index_A_z_start + i) < NI && (index_A_y_start + j) < NJ && (index_A_x_bound + k) < NK) { + tmp_A[i][j][DIM_THREAD_BLOCK * BATCH_SIZE + k] = A[(index_A_z_start + i) * NJ * NK + (index_A_y_start + j) * NK + index_A_x_bound + k]; + } + } + } + } + block.sync(); + + // Computation + for (int i = 0; i < BATCH_SIZE; i++) { + for (int j = 0; j < BATCH_SIZE; j++) { + for (int k = 0; k < BATCH_SIZE; k++) { + tmp_B[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] = c11 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c13 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + + c21 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c23 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + + c31 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c33 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + + c12 * tmp_A[tz * BATCH_SIZE + i + 1][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k + 1] + c22 * tmp_A[tz * BATCH_SIZE + i + 1][ty * BATCH_SIZE + j + 1][tx * BATCH_SIZE + k + 1] + + c32 * tmp_A[tz * BATCH_SIZE + i + 1][ty * BATCH_SIZE + j + 2][tx * BATCH_SIZE + k + 1] + c11 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k + 2] + + c13 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k + 2] + c21 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j + 1][tx * BATCH_SIZE + k + 2] + + c23 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j + 1][tx * BATCH_SIZE + k + 2] + c31 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j + 2][tx * BATCH_SIZE + k + 2] + + c33 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j + 2][tx * BATCH_SIZE + k + 2]; + } + } + } + block.sync(); + + // Store B + for (int i = 0; i < BATCH_SIZE; i++) { + for (int j = 0; j < BATCH_SIZE; j++) { + for (int k = 0; k < BATCH_SIZE; k++) { + if ((index_B_z + i + 1) < NI && (index_B_y + j + 1) < NJ && (index_B_x + k + 1) < NK) + { + B[(index_B_z + i) * NJ * NK + (index_B_y + j) * NK + index_B_x + k] = tmp_B[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k]; + } + } + } + } + block.sync(); + } +} + + +void convolution3DCuda(DATA_TYPE* A, DATA_TYPE* B, DATA_TYPE* A_gpu, DATA_TYPE* B_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK, DIM_THREAD_BLOCK, DIM_THREAD_BLOCK); + dim3 grid(nblocks, nblocks, nblocks); + + int block_size = (NI + (nblocks - 1)) / nblocks; + + // t_start = rtclock(); + + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI * NJ * NK, cudaMemcpyHostToDevice); + convolution3D_kernel<<>>(A_gpu, B_gpu, NI, NJ, NK, block_size); + cudaDeviceSynchronize(); + cudaMemcpy(B, B_gpu, sizeof(DATA_TYPE) * NI * NJ * NK, cudaMemcpyDeviceToHost); + + // t_end = rtclock(); + // fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); + +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + if (argc >= 5) { + NI = atoll(argv[1]); + NJ = atoll(argv[2]); + NK = atoll(argv[3]); + nblocks = atoi(argv[4]); + } else { + NI = SIZE; + NJ = SIZE; + NK = SIZE; + nblocks = NBLOCKS; + } + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE *B_ref; + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + + A = (DATA_TYPE*)malloc(NI*NJ*NK*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NI*NJ*NK*sizeof(DATA_TYPE)); + B_ref = (DATA_TYPE*)malloc(NI*NJ*NK*sizeof(DATA_TYPE)); + initCPU(A); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI * NJ * NK); + cudaMalloc(&B_gpu, sizeof(DATA_TYPE) * NI * NJ * NK); + + // initGPU(A_gpu); + + convolution3DCuda(A, B, A_gpu, B_gpu); + + cudaFree(A_gpu); + cudaFree(B_gpu); + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // conv3D(A, B_ref); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(B, B_ref); + free(A); + free(B); + + return 0; +} diff --git a/workloads/micro/standard/3DCONV/Makefile b/workloads/micro/standard/3DCONV/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..72aac9cb83cca03110da33f8da9119f32af90ccd --- /dev/null +++ b/workloads/micro/standard/3DCONV/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := 3DConvolution +CUFILES := 3DConvolution.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o 3DConvolution diff --git a/workloads/micro/standard/3DCONV/run.sh b/workloads/micro/standard/3DCONV/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..0c27d78b8d3484896bd6812043b5466b074cbebf --- /dev/null +++ b/workloads/micro/standard/3DCONV/run.sh @@ -0,0 +1,2 @@ +#./3DConvolution 768 768 768 8 +./3DConvolution 1536 1536 1536 8 diff --git a/workloads/micro/standard/3DCONV/run_large.sh b/workloads/micro/standard/3DCONV/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..500c363302024d6080c025784efbe2e7fef74f53 --- /dev/null +++ b/workloads/micro/standard/3DCONV/run_large.sh @@ -0,0 +1 @@ +./3DConvolution 384 384 384 8 diff --git a/workloads/micro/standard/3DCONV/run_medium.sh b/workloads/micro/standard/3DCONV/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..d0a9fb120b6de1c16ae4146d7684c5557af95152 --- /dev/null +++ b/workloads/micro/standard/3DCONV/run_medium.sh @@ -0,0 +1 @@ +./3DConvolution 192 192 192 4 diff --git a/workloads/micro/standard/3DCONV/run_mega.sh b/workloads/micro/standard/3DCONV/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..4e21c66c3d1cf21bfe88427d92c54ff8be428d8a --- /dev/null +++ b/workloads/micro/standard/3DCONV/run_mega.sh @@ -0,0 +1 @@ +./3DConvolution 1536 1536 1536 8 diff --git a/workloads/micro/standard/3DCONV/run_small.sh b/workloads/micro/standard/3DCONV/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..f794eec58ed56174c5d02096a9bf5acc4e948d0f --- /dev/null +++ b/workloads/micro/standard/3DCONV/run_small.sh @@ -0,0 +1 @@ +./3DConvolution 96 96 96 4 diff --git a/workloads/micro/standard/3DCONV/run_super.sh b/workloads/micro/standard/3DCONV/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..11f8b43d3e406c466b824420d34991a5c0f876b4 --- /dev/null +++ b/workloads/micro/standard/3DCONV/run_super.sh @@ -0,0 +1 @@ +./3DConvolution 768 768 768 8 diff --git a/workloads/micro/standard/3DCONV/run_tiny.sh b/workloads/micro/standard/3DCONV/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..791c84bc54088ea65fc7612ee94442bbbc615cc3 --- /dev/null +++ b/workloads/micro/standard/3DCONV/run_tiny.sh @@ -0,0 +1 @@ +./3DConvolution 48 48 48 2 diff --git a/workloads/micro/standard/gemm/Makefile b/workloads/micro/standard/gemm/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d545b0aab3ef63260888227abd6ab99bcaddbff3 --- /dev/null +++ b/workloads/micro/standard/gemm/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := gemm +CUFILES := gemm.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o gemm diff --git a/workloads/micro/standard/gemm/gemm.cu b/workloads/micro/standard/gemm/gemm.cu new file mode 100644 index 0000000000000000000000000000000000000000..b2e414e2af3fee5933b58e27b2eafa2bf8b1d17a --- /dev/null +++ b/workloads/micro/standard/gemm/gemm.cu @@ -0,0 +1,266 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +uint64_t NI; +uint64_t NJ; +uint64_t NK; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK_X 32 +#define DIM_THREAD_BLOCK_Y 32 + + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void gemm(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i,j,k; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i*NJ + j] *= BETA; + for (k = 0; k < NK; ++k) { + C[i*NJ + j] += ALPHA * A[i*NK + k] * B[k*NJ + j]; + } + } + } +} + +void init(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *C_ref) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) + for (j = 0; j < NK; j++) + A[i*NK + j] = ((DATA_TYPE) i*j) / NI; + + for (i = 0; i < NK; i++) + for (j = 0; j < NJ; j++) + B[i*NJ + j] = ((DATA_TYPE) i*j + 1) / NJ; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + C_ref[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + } + } + +} + + +void compareResults(DATA_TYPE* C, DATA_TYPE* C_outputFromGpu) +{ + uint64_t i, j, fail; + fail = 0; + + // Compare C1 and C2 + for (i=0; i < NI; i++) + { + for (j=0; j < NJ; j++) + { + // printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + if (percentDiff(C[i*NJ + j], C_outputFromGpu[i*NJ + j]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + fail++; + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void gemm_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, uint64_t NI, uint64_t NK, uint64_t NJ) +{ + // Compute each thread's global row and column index + uint64_t row = blockIdx.y * blockDim.y + threadIdx.y; + uint64_t col = blockIdx.x * blockDim.x + threadIdx.x; + + // Statically allocated shared memory + __shared__ DATA_TYPE s_a[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + __shared__ DATA_TYPE s_b[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + // Accumulate in temporary variable + + DATA_TYPE tmp = BETA * c[row * NJ + col]; + + // Sweep tile across matrix + for (uint64_t i = 0; i < NK; i += blockDim.x) + { + s_a[threadIdx.y * blockDim.x + threadIdx.x] = a[row * NK + i + threadIdx.x]; + s_b[threadIdx.y * blockDim.x + threadIdx.x] = b[(i + threadIdx.y) * NJ + col]; + + // Wait for both tiles to be loaded in before doing computation + __syncthreads(); + + // Do matrix multiplication on the small matrix + for (uint64_t k = 0; k < blockDim.x; k++) + { + tmp += ALPHA * s_a[threadIdx.y * blockDim.x + k] * s_b[k * blockDim.x + threadIdx.x]; + } + + // Wait for all threads to finish using current tiles before loading in new ones + __syncthreads(); + } + + c[row * NJ + col] = tmp; +} + +void gemmCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + dim3 grid((size_t)(ceil( ((float)NI)/ ((float)block.x) )),(size_t)(ceil( ((float)NJ)/ ((float)block.y) ))); + + //t_start = rtclock(); + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI * NK, cudaMemcpyHostToDevice); + cudaMemcpy(B_gpu, B, sizeof(DATA_TYPE) * NK * NJ, cudaMemcpyHostToDevice); + cudaMemcpy(C_gpu, C, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyHostToDevice); + gemm_kernel<<< grid, block >>>(A_gpu, B_gpu, C_gpu, NI, NK, NJ); + cudaDeviceSynchronize(); + cudaMemcpy(C, C_gpu, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyDeviceToHost); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + if (argc >= 4) { + NI = atoll(argv[1]); + NK = atoll(argv[2]); + NJ = atoll(argv[3]); + } else { + NI = SIZE; + NK = SIZE; + NJ = SIZE; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE* C; + DATA_TYPE *C_ref; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + DATA_TYPE *C_gpu; + + A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE)); + C = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + C_ref = (DATA_TYPE *)malloc(NI * NJ * sizeof(DATA_TYPE)); + + //cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + //cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + //cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + init(A, B, C, C_ref); + + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + cudaMalloc(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + cudaMalloc(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + gemmCuda(A, B, C, A_gpu, B_gpu, C_gpu); + + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // gemm(A, B, C_ref); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(C, C_ref); + free(A); + free(B); + free(C); + free(C_ref); + return 0; +} + diff --git a/workloads/micro/standard/gemm/run.sh b/workloads/micro/standard/gemm/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..d234537561c276f291434f42a7337bc5d5a61605 --- /dev/null +++ b/workloads/micro/standard/gemm/run.sh @@ -0,0 +1,3 @@ +#./gemm 1024 1024 1024 +./gemm 32768 32768 32768 +#./gemm 512 512 512 diff --git a/workloads/micro/standard/gemm/run_large.sh b/workloads/micro/standard/gemm/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..cc1dbe192ecafb7558b76cfa4d2ce05a15067999 --- /dev/null +++ b/workloads/micro/standard/gemm/run_large.sh @@ -0,0 +1 @@ +./gemm 8192 8192 8192 diff --git a/workloads/micro/standard/gemm/run_medium.sh b/workloads/micro/standard/gemm/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..ebae2815e56031234ccdef0d98cb774f111e82a2 --- /dev/null +++ b/workloads/micro/standard/gemm/run_medium.sh @@ -0,0 +1 @@ +./gemm 4096 4096 4096 diff --git a/workloads/micro/standard/gemm/run_mega.sh b/workloads/micro/standard/gemm/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..7a7e74e163fa0d0ad774327163771ce8eb23bd79 --- /dev/null +++ b/workloads/micro/standard/gemm/run_mega.sh @@ -0,0 +1 @@ +./gemm 32768 32768 32768 diff --git a/workloads/micro/standard/gemm/run_small.sh b/workloads/micro/standard/gemm/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..df5896fd490618e0f8c5608097518b3837b9ad33 --- /dev/null +++ b/workloads/micro/standard/gemm/run_small.sh @@ -0,0 +1 @@ +./gemm 1024 1024 1024 diff --git a/workloads/micro/standard/gemm/run_super.sh b/workloads/micro/standard/gemm/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..9736fc5d97c8ef9b55a3157882e05979f65a5011 --- /dev/null +++ b/workloads/micro/standard/gemm/run_super.sh @@ -0,0 +1 @@ +./gemm 16384 16384 16384 diff --git a/workloads/micro/standard/gemm/run_tiny.sh b/workloads/micro/standard/gemm/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..9858f12d61d44c0f6265f7c22c27fab446500f55 --- /dev/null +++ b/workloads/micro/standard/gemm/run_tiny.sh @@ -0,0 +1 @@ +./gemm 512 512 512 diff --git a/workloads/micro/standard/gemm_perf/Makefile b/workloads/micro/standard/gemm_perf/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d545b0aab3ef63260888227abd6ab99bcaddbff3 --- /dev/null +++ b/workloads/micro/standard/gemm_perf/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := gemm +CUFILES := gemm.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o gemm diff --git a/workloads/micro/standard/gemm_perf/gemm b/workloads/micro/standard/gemm_perf/gemm new file mode 100755 index 0000000000000000000000000000000000000000..e0537370873af02028b2816a58882af6e9af22d0 Binary files /dev/null and b/workloads/micro/standard/gemm_perf/gemm differ diff --git a/workloads/micro/standard/gemm_perf/gemm.cu b/workloads/micro/standard/gemm_perf/gemm.cu new file mode 100644 index 0000000000000000000000000000000000000000..9abbb9ccb87e76fac010d1ff039df69ec409da19 --- /dev/null +++ b/workloads/micro/standard/gemm_perf/gemm.cu @@ -0,0 +1,266 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +uint64_t NI; +uint64_t NJ; +uint64_t NK; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK_X 32 +#define DIM_THREAD_BLOCK_Y 32 + + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void gemm(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i,j,k; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i*NJ + j] *= BETA; + for (k = 0; k < NK; ++k) { + C[i*NJ + j] += ALPHA * A[i*NK + k] * B[k*NJ + j]; + } + } + } +} + +void init(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *C_ref) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) + for (j = 0; j < NK; j++) + A[i*NK + j] = ((DATA_TYPE) i*j) / NI; + + for (i = 0; i < NK; i++) + for (j = 0; j < NJ; j++) + B[i*NJ + j] = ((DATA_TYPE) i*j + 1) / NJ; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + C_ref[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + } + } + +} + + +void compareResults(DATA_TYPE* C, DATA_TYPE* C_outputFromGpu) +{ + uint64_t i, j, fail; + fail = 0; + + // Compare C1 and C2 + for (i=0; i < NI; i++) + { + for (j=0; j < NJ; j++) + { + // printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + if (percentDiff(C[i*NJ + j], C_outputFromGpu[i*NJ + j]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + fail++; + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void gemm_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, uint64_t NI, uint64_t NK, uint64_t NJ) +{ + // Compute each thread's global row and column index + uint64_t row = blockIdx.y * blockDim.y + threadIdx.y; + uint64_t col = blockIdx.x * blockDim.x + threadIdx.x; + + // Statically allocated shared memory + __shared__ DATA_TYPE s_a[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + __shared__ DATA_TYPE s_b[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + // Accumulate in temporary variable + + DATA_TYPE tmp = BETA * c[row * NJ + col]; + + // Sweep tile across matrix + for (uint64_t i = 0; i < NK; i += blockDim.x) + { + s_a[threadIdx.y * blockDim.x + threadIdx.x] = a[row * NK + i + threadIdx.x]; + s_b[threadIdx.y * blockDim.x + threadIdx.x] = b[(i + threadIdx.y) * NJ + col]; + + // Wait for both tiles to be loaded in before doing computation + __syncthreads(); + + // Do matrix multiplication on the small matrix + for (uint64_t k = 0; k < blockDim.x; k++) + { + tmp += ALPHA * s_a[threadIdx.y * blockDim.x + k] * s_b[k * blockDim.x + threadIdx.x]; + } + + // Wait for all threads to finish using current tiles before loading in new ones + __syncthreads(); + } + + c[row * NJ + col] = tmp; +} + +void gemmCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + dim3 grid((size_t)(ceil( ((float)NI)/ ((float)block.x) )),(size_t)(ceil( ((float)NJ)/ ((float)block.y) ))); + + //t_start = rtclock(); + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI * NK, cudaMemcpyHostToDevice); + cudaMemcpy(B_gpu, B, sizeof(DATA_TYPE) * NK * NJ, cudaMemcpyHostToDevice); + cudaMemcpy(C_gpu, C, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyHostToDevice); + gemm_kernel<<< grid, block >>>(A_gpu, B_gpu, C_gpu, NI, NK, NJ); + cudaDeviceSynchronize(); + cudaMemcpy(C, C_gpu, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyDeviceToHost); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + if (argc >= 4) { + NI = atoll(argv[1]); + NK = atoll(argv[2]); + NJ = atoll(argv[3]); + } else { + NI = SIZE; + NK = SIZE; + NJ = SIZE; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE* C; + DATA_TYPE *C_ref; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + DATA_TYPE *C_gpu; + + A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE)); + C = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + C_ref = (DATA_TYPE *)malloc(NI * NJ * sizeof(DATA_TYPE)); + + //cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + //cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + //cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + init(A, B, C, C_ref); + + GPU_argv_init(); + + //initTrace(); + startCPU(); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + cudaMalloc(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + cudaMalloc(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + gemmCuda(A, B, C, A_gpu, B_gpu, C_gpu); + + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); + endCPU(); + //finiTrace(); + + // t_start = rtclock(); + // gemm(A, B, C_ref); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(C, C_ref); + free(A); + free(B); + free(C); + free(C_ref); + return 0; +} + diff --git a/workloads/micro/standard/gemm_perf/run.sh b/workloads/micro/standard/gemm_perf/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..d234537561c276f291434f42a7337bc5d5a61605 --- /dev/null +++ b/workloads/micro/standard/gemm_perf/run.sh @@ -0,0 +1,3 @@ +#./gemm 1024 1024 1024 +./gemm 32768 32768 32768 +#./gemm 512 512 512 diff --git a/workloads/micro/standard/gemm_perf/run_large.sh b/workloads/micro/standard/gemm_perf/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..cc1dbe192ecafb7558b76cfa4d2ce05a15067999 --- /dev/null +++ b/workloads/micro/standard/gemm_perf/run_large.sh @@ -0,0 +1 @@ +./gemm 8192 8192 8192 diff --git a/workloads/micro/standard/gemm_perf/run_medium.sh b/workloads/micro/standard/gemm_perf/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..ebae2815e56031234ccdef0d98cb774f111e82a2 --- /dev/null +++ b/workloads/micro/standard/gemm_perf/run_medium.sh @@ -0,0 +1 @@ +./gemm 4096 4096 4096 diff --git a/workloads/micro/standard/gemm_perf/run_mega.sh b/workloads/micro/standard/gemm_perf/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..7a7e74e163fa0d0ad774327163771ce8eb23bd79 --- /dev/null +++ b/workloads/micro/standard/gemm_perf/run_mega.sh @@ -0,0 +1 @@ +./gemm 32768 32768 32768 diff --git a/workloads/micro/standard/gemm_perf/run_small.sh b/workloads/micro/standard/gemm_perf/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..df5896fd490618e0f8c5608097518b3837b9ad33 --- /dev/null +++ b/workloads/micro/standard/gemm_perf/run_small.sh @@ -0,0 +1 @@ +./gemm 1024 1024 1024 diff --git a/workloads/micro/standard/gemm_perf/run_super.sh b/workloads/micro/standard/gemm_perf/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..9736fc5d97c8ef9b55a3157882e05979f65a5011 --- /dev/null +++ b/workloads/micro/standard/gemm_perf/run_super.sh @@ -0,0 +1 @@ +./gemm 16384 16384 16384 diff --git a/workloads/micro/standard/gemm_perf/run_tiny.sh b/workloads/micro/standard/gemm_perf/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..9858f12d61d44c0f6265f7c22c27fab446500f55 --- /dev/null +++ b/workloads/micro/standard/gemm_perf/run_tiny.sh @@ -0,0 +1 @@ +./gemm 512 512 512 diff --git a/workloads/micro/standard/gemv/Makefile b/workloads/micro/standard/gemv/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..005563000dad0469dbf388d54a639e926cf5aa85 --- /dev/null +++ b/workloads/micro/standard/gemv/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := gemv +CUFILES := gemv.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o gemv diff --git a/workloads/micro/standard/gemv/gemv.cu b/workloads/micro/standard/gemv/gemv.cu new file mode 100644 index 0000000000000000000000000000000000000000..c9a1a457873f4e53cc515f5bf1af0a1b1b57818e --- /dev/null +++ b/workloads/micro/standard/gemv/gemv.cu @@ -0,0 +1,253 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +// define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 40960 +uint64_t NI; +uint64_t NJ; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 256 + +#define BATCH_SIZE 16 + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void gemv(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) + { + C[i] *= BETA; + for (j = 0; j < NJ; j++) + { + C[i] += ALPHA * A[i * NJ + j] * B[j]; + } + } +} + +void init(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *C_ref) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) + for (j = 0; j < NJ; j++) + A[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + + for (j = 0; j < NJ; j++) + B[j] = ((DATA_TYPE)j + 1) / NJ; + + for (i = 0; i < NI; i++) + { + C[i] = ((DATA_TYPE)i + 2) / NI; + C_ref[i] = ((DATA_TYPE)i + 2) / NI; + } +} + +void compareResults(DATA_TYPE *C, DATA_TYPE *C_outputFromGpu) +{ + uint64_t i, fail; + fail = 0; + + // Compare C1 and C2 + for (i = 0; i < NI; i++) + { + if (percentDiff(C[i], C_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + fail++; + printf("%d, GPU is %f, CPU is %f.\n", i, C[i], C_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void gemv_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, uint64_t NI, uint64_t NJ) +{ + uint64_t row = blockIdx.x * blockDim.x + threadIdx.x; + uint64_t tx = threadIdx.x; + + __shared__ DATA_TYPE s_b[DIM_THREAD_BLOCK][BATCH_SIZE]; + + DATA_TYPE tmp = BETA * c[row]; + __syncthreads(); + + uint64_t tile = 0; + uint64_t end_tile = NJ / BATCH_SIZE; + + for (; tile < end_tile; tile += 1) + { + uint64_t base_index = tile * BATCH_SIZE; + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + s_b[tx][k] = b[base_index + k]; + } + __syncthreads(); + + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + tmp += ALPHA * a[row * NJ + base_index + k] * s_b[tx][k]; + } + __syncthreads(); + } + c[row] = tmp; +} + +void gemvCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / (DIM_THREAD_BLOCK)); + + // t_start = rtclock(); + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyHostToDevice); + cudaMemcpy(B_gpu, B, sizeof(DATA_TYPE) * NJ, cudaMemcpyHostToDevice); + cudaMemcpy(C_gpu, C, sizeof(DATA_TYPE) * NI, cudaMemcpyHostToDevice); + gemv_kernel<<>>(A_gpu, B_gpu, C_gpu, NI, NJ); + cudaDeviceSynchronize(); + cudaMemcpy(C, C_gpu, sizeof(DATA_TYPE) * NI, cudaMemcpyDeviceToHost); + // t_end = rtclock(); + + // fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + if (argc >= 3) + { + NI = atoll(argv[1]); + NJ = atoll(argv[2]); + } + else + { + NI = SIZE; + NJ = SIZE; + } + + double t_start, t_end; + + DATA_TYPE *A; + DATA_TYPE *B; + DATA_TYPE *C; + DATA_TYPE *C_ref; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + DATA_TYPE *C_gpu; + + A = (DATA_TYPE *)malloc(NI * NJ * sizeof(DATA_TYPE)); + B = (DATA_TYPE *)malloc(NJ * sizeof(DATA_TYPE)); + C = (DATA_TYPE *)malloc(NI * sizeof(DATA_TYPE)); + C_ref = (DATA_TYPE *)malloc(NI * sizeof(DATA_TYPE)); + + // cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + // cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + // cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + init(A, B, C, C_ref); + + GPU_argv_init(); + initTrace(); + startCPU(); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI * NJ); + cudaMalloc(&B_gpu, sizeof(DATA_TYPE) * NJ); + cudaMalloc(&C_gpu, sizeof(DATA_TYPE) * NI); + + gemvCuda(A, B, C, A_gpu, B_gpu, C_gpu); + + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // gemv(A, B, C_ref); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(C, C_ref); + free(A); + free(B); + free(C); + free(C_ref); + return 0; +} diff --git a/workloads/micro/standard/gemv/gemv.cu.bp b/workloads/micro/standard/gemv/gemv.cu.bp new file mode 100644 index 0000000000000000000000000000000000000000..ecc16eeff006780068003c5076ce92698e3ebc0c --- /dev/null +++ b/workloads/micro/standard/gemv/gemv.cu.bp @@ -0,0 +1,334 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +#define GPU_DEVICE 0 + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 40960 +int NI; +int NJ; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK_X 32 +#define DIM_THREAD_BLOCK_Y 32 + +#define SHMEM_SIZE (DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y) +// #define SHMEM_SIZE (NI) + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef int DATA_TYPE; + +void gemv(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + int i,j; + + for (i = 0; i < NI; i++) { + C[i] *= BETA; + for (j = 0; j < NJ; j++) { + C[i] += ALPHA * A[i*NJ + j] * B[j]; + } + } +} + +void init(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *C_ref) +{ + int i, j; + + for (i = 0; i < NI; i++) + for (j = 0; j < NJ; j++) + A[i*NJ + j] = ((DATA_TYPE) i*j) / NI; + + for (j = 0; j < NJ; j++) + B[j] = ((DATA_TYPE) j + 1) / NJ; + + for (i = 0; i < NI; i++) { + C[i] = ((DATA_TYPE)i + 2) / NI; + C_ref[i] = ((DATA_TYPE)i + 2) / NI; + } + + for (i = 0; i < NI; i++) + for (j = 0; j < NJ; j++) + A[i*NJ + j] = 1.0f; + + // for (j = 0; j < NJ; j++) + // B[j] = 1.0f; + + // for (i = 0; i < NI; i++) { + // C[i] = 1.0f; + // C_ref[i] = 1.0f; + // } +} + + +void compareResults(DATA_TYPE* C, DATA_TYPE* C_outputFromGpu) +{ + int i, fail; + fail = 0; + + // Compare C1 and C2 + for (i=0; i < NI; i++) { + if (percentDiff(C[i], C_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) { + fail++; + printf("%d, GPU is %f, CPU is %f.\n", i, C[i], C_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + + +void GPU_argv_init() +{ + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, GPU_DEVICE); + printf("setting device %d with name %s\n",GPU_DEVICE,deviceProp.name); + cudaSetDevice( GPU_DEVICE ); +} + +// __global__ void gemv_kernel_reduce(DATA_TYPE *c_tmp, DATA_TYPE *c, int NI, int NJ) +// { +// int index = blockIdx.x * blockDim.x + threadIdx.x; + +// for (int i = 0; i < DIM_THREAD_BLOCK_X; i++) { +// c[index] += c_tmp[index * DIM_THREAD_BLOCK_X + i]; +// } +// } + +// __global__ void gemv_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, int NI, int NJ) +// { +// // Compute each thread's global row and column index +// int row = blockIdx.y * blockDim.y + threadIdx.y; +// __shared__ DATA_TYPE tmp[DIM_THREAD_BLOCK_X]; + +// // Sweep tile across matrix +// for (int i = threadIdx.x; i < NJ; i += blockDim.x) +// { +// tmp[threadIdx.x] = b[i]; +// } + +// __syncthreads(); + +// for (int i = threadIdx.x; i < NJ; i += blockDim.x) +// { +// c[row * DIM_THREAD_BLOCK_X + threadIdx.x] += ALPHA * a[row * NJ + i] * tmp[threadIdx.x]; +// } + +// __syncthreads(); +// } + +// __global__ void gemv_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c) +// { +// // Compute each thread's global row and column index +// int row = blockIdx.y * blockDim.y + threadIdx.y; + +// __shared__ DATA_TYPE tmp[DIM_THREAD_BLOCK_X]; + +// tmp[threadIdx.x] = 0; +// __syncthreads(); + +// // Sweep tile across matrix +// for (int i = 0; i < NJ; i+= blockDim.x) { +// // s_b[threadIdx.x] = b[i + threadIdx.x]; +// // __syncthreads(); +// tmp[threadIdx.x] += ALPHA * a[row * NJ + i + threadIdx.x] * b[i + threadIdx.x]; +// __syncthreads(); +// } +// __syncthreads(); + + +// DATA_TYPE tmp_c = c[row] * BETA; +// __syncthreads(); +// for (int i = 0; i < blockDim.x; i++) { +// tmp_c += tmp[i]; +// __syncthreads(); +// } +// __syncthreads(); +// c[row] = tmp_c; +// __syncthreads(); +// } + +__global__ void gemv_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, int NI, int NJ) +{ + // Compute each thread's global row and column index + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + + // Statically allocated shared memory + __shared__ DATA_TYPE s_a[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + __shared__ DATA_TYPE s_b[DIM_THREAD_BLOCK_X]; + + // Accumulate in temporary variable + + DATA_TYPE tmp = BETA * c[row]; + + // Sweep tile across matrix + for (int i = 0; i < NJ; i += blockDim.x) + { + s_a[threadIdx.y * blockDim.x + threadIdx.x] = a[row * NJ + i + threadIdx.x]; + s_b[threadIdx.x] = b[i + threadIdx.x]; + + // = b[(i + threadIdx.y) * NJ + col]; + + // Wait for both tiles to be loaded in before doing computation + __syncthreads(); + + // Do matrix multiplication on the small matrix + for (int k = 0; k < blockDim.x; k++) + { + tmp += ALPHA * s_a[threadIdx.y * blockDim.x + k] * s_b[threadIdx.x]; + } + + // Wait for all threads to finish using current tiles before loading in new ones + __syncthreads(); + } + + c[row] = tmp; +} + +void gemvCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu, DATA_TYPE *C_gpu_tmp) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + // dim3 grid(1, (size_t)(ceil(((float)NJ) / ((float)block.y)))); + dim3 grid((size_t)(ceil( ((float)NI)/ ((float)block.x) )),(size_t)(ceil( ((float)NJ)/ ((float)block.y) ))); + + dim3 block_reduce(DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y); + dim3 grid_reduce((size_t)(ceil(((float)NJ) / ((float)(DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y))))); + + t_start = rtclock(); + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI * NJ, cudaMemcpyHostToDevice); + cudaMemcpy(B_gpu, B, sizeof(DATA_TYPE) * NJ, cudaMemcpyHostToDevice); + cudaMemcpy(C_gpu, C, sizeof(DATA_TYPE) * NI, cudaMemcpyHostToDevice); + gemv_kernel<<>>(A_gpu, B_gpu, C_gpu, NI, NJ); + // gemv_kernel_reduce<<>>(C_gpu_tmp, C_gpu, NI, NJ); + cudaDeviceSynchronize(); + cudaMemcpy(C, C_gpu, sizeof(DATA_TYPE) * NI, cudaMemcpyDeviceToHost); + t_end = rtclock(); + + fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + + +int main(int argc, char *argv[]) +{ + if (argc >= 3) { + NI = atoi(argv[1]); + NJ = atoi(argv[2]); + } else { + NI = SIZE; + NJ = SIZE; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE* C; + DATA_TYPE *C_ref; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + DATA_TYPE *C_gpu; + DATA_TYPE *C_gpu_tmp; + + A = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NJ*sizeof(DATA_TYPE)); + C = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + C_ref = (DATA_TYPE *)malloc(NI*sizeof(DATA_TYPE)); + + //cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + //cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + //cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI * NJ); + cudaMalloc(&B_gpu, sizeof(DATA_TYPE) * NJ); + cudaMalloc(&C_gpu, sizeof(DATA_TYPE) * NI); + cudaMalloc(&C_gpu_tmp, sizeof(DATA_TYPE) * NI * DIM_THREAD_BLOCK_X); + + init(A, B, C, C_ref); + + GPU_argv_init(); + + gemvCuda(A, B, C, A_gpu, B_gpu, C_gpu, C_gpu_tmp); + + t_start = rtclock(); + gemv(A, B, C_ref); + t_end = rtclock(); + fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + compareResults(C, C_ref); + + free(A); + free(B); + free(C); + free(C_ref); + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); + cudaFree(C_gpu_tmp); + return 0; +} + diff --git a/workloads/micro/standard/gemv/run.sh b/workloads/micro/standard/gemv/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..c75d8f69375fd6c923a93ba88a4cb43238844f8b --- /dev/null +++ b/workloads/micro/standard/gemv/run.sh @@ -0,0 +1,2 @@ +# ./gemv 16384 16384 +./gemv 32768 32768 diff --git a/workloads/micro/standard/gemv/run_large.sh b/workloads/micro/standard/gemv/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..102aa73e96b74d0d46ef261f0ffd665639679025 --- /dev/null +++ b/workloads/micro/standard/gemv/run_large.sh @@ -0,0 +1 @@ +./gemv 8192 8192 diff --git a/workloads/micro/standard/gemv/run_medium.sh b/workloads/micro/standard/gemv/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..2cc0f68e4d4e96c36ee0d72e64f7acdb7c97233c --- /dev/null +++ b/workloads/micro/standard/gemv/run_medium.sh @@ -0,0 +1 @@ +./gemv 4096 4096 diff --git a/workloads/micro/standard/gemv/run_mega.sh b/workloads/micro/standard/gemv/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..88ca5ba9468efc68d859100ccbd246b9b3af960b --- /dev/null +++ b/workloads/micro/standard/gemv/run_mega.sh @@ -0,0 +1 @@ +./gemv 65536 65536 diff --git a/workloads/micro/standard/gemv/run_small.sh b/workloads/micro/standard/gemv/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..55646a647b7f53aff91c5e38562bc449d75daa9a --- /dev/null +++ b/workloads/micro/standard/gemv/run_small.sh @@ -0,0 +1 @@ +./gemv 1024 1024 diff --git a/workloads/micro/standard/gemv/run_super.sh b/workloads/micro/standard/gemv/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..295d223c3c9c768e58c0fba722fa859b74564d2d --- /dev/null +++ b/workloads/micro/standard/gemv/run_super.sh @@ -0,0 +1 @@ +./gemv 32768 32768 diff --git a/workloads/micro/standard/gemv/run_tiny.sh b/workloads/micro/standard/gemv/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..60becef20c6cc3113ff1b4897d177ff3cbd77eb8 --- /dev/null +++ b/workloads/micro/standard/gemv/run_tiny.sh @@ -0,0 +1 @@ +./gemv 512 512 diff --git a/workloads/micro/standard/saxpy/Makefile b/workloads/micro/standard/saxpy/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..069a5001c286cb5f44c4686449f04755cd5a2e52 --- /dev/null +++ b/workloads/micro/standard/saxpy/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := saxpy +CUFILES := saxpy.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o saxpy diff --git a/workloads/micro/standard/saxpy/run.sh b/workloads/micro/standard/saxpy/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..61775ede6c26208ae2a150958f8eec5375952773 --- /dev/null +++ b/workloads/micro/standard/saxpy/run.sh @@ -0,0 +1,11 @@ +# ./vector_seq 17179869184 10 524288 + +./vector_seq 8589934592 10 262144 +#nsys profile --stats=true ./vector_seq 1073741824 10 32768 + + +# ./vector_seq 4294967296 10 131072 + +# ./vector_seq 2147483648 10 32768 +#./vector_seq 1073741824 10 32768 +# ./vector_seq 1048576 100 32768 diff --git a/workloads/micro/standard/saxpy/run_large.sh b/workloads/micro/standard/saxpy/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..bce2787645ea40cb94ba004ec19cf4728ba48647 --- /dev/null +++ b/workloads/micro/standard/saxpy/run_large.sh @@ -0,0 +1 @@ +./saxpy 134217728 100 65536 diff --git a/workloads/micro/standard/saxpy/run_medium.sh b/workloads/micro/standard/saxpy/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..af6f429a95aa9d5ede352e30e59405aaee4ee55b --- /dev/null +++ b/workloads/micro/standard/saxpy/run_medium.sh @@ -0,0 +1 @@ +./saxpy 16777216 100 32768 diff --git a/workloads/micro/standard/saxpy/run_mega.sh b/workloads/micro/standard/saxpy/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..5dce5483842d255b4130f7ca89b4894c65b5a44b --- /dev/null +++ b/workloads/micro/standard/saxpy/run_mega.sh @@ -0,0 +1 @@ +./saxpy 4294967296 100 262144 diff --git a/workloads/micro/standard/saxpy/run_small.sh b/workloads/micro/standard/saxpy/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..c927e5e53c30ca58a12dbdf95e264a4b790ab0b8 --- /dev/null +++ b/workloads/micro/standard/saxpy/run_small.sh @@ -0,0 +1 @@ +./saxpy 2097152 100 32768 diff --git a/workloads/micro/standard/saxpy/run_super.sh b/workloads/micro/standard/saxpy/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..7f8d7b99b0be6cc643cd686965806f7edbc0af27 --- /dev/null +++ b/workloads/micro/standard/saxpy/run_super.sh @@ -0,0 +1 @@ +./saxpy 1073741824 100 65536 diff --git a/workloads/micro/standard/saxpy/run_tiny.sh b/workloads/micro/standard/saxpy/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..4604f1747023f34e81c15ce89c4be5a6fb39bc95 --- /dev/null +++ b/workloads/micro/standard/saxpy/run_tiny.sh @@ -0,0 +1 @@ +./saxpy 262144 100 8192 diff --git a/workloads/micro/standard/saxpy/saxpy.cu b/workloads/micro/standard/saxpy/saxpy.cu new file mode 100644 index 0000000000000000000000000000000000000000..fa34385f5fc3f8a5774b8ac2ede71632668593c1 --- /dev/null +++ b/workloads/micro/standard/saxpy/saxpy.cu @@ -0,0 +1,255 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096000 +#define ITER 100 +uint64_t NI; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 256 + +#define BATCH_SIZE 8 + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void saxpy(DATA_TYPE *A, DATA_TYPE *B, uint64_t iterations) +{ + for (uint64_t i = 0; i < NI; i++) { + for (uint64_t iter = 0; iter < iterations; iter++) { + B[i] = ALPHA * A[i] + B[i]; + } + } +} + +void init(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *B_ref) +{ + for (uint64_t i = 0; i < NI; i++) + A[i] = ((DATA_TYPE) i) / NI; + + for (uint64_t i = 0; i < NI; i++) { + B[i] = ((DATA_TYPE)i + 2) / NI; + B_ref[i] = ((DATA_TYPE)i + 2) / NI; + } +} + + +void compareResults(DATA_TYPE* B, DATA_TYPE* B_outputFromGpu) +{ + uint64_t fail = 0; + + // Compare C1 and C2 + for (uint64_t i = 0; i < NI; i++) { + // printf("%lld, GPU is %f, CPU is %f.\n", i, B[i], B_outputFromGpu[i]); + if (percentDiff(B[i], B_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) { + fail++; + printf("%lld, GPU is %f, CPU is %f.\n", i, B[i], B_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void saxpy_kernel(DATA_TYPE *a, DATA_TYPE *b, uint64_t NI, uint64_t iterations, uint64_t block_size) +{ + // Compute each thread's global row and column index + const uint64_t mem_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + __shared__ DATA_TYPE tmp_a[mem_size]; + __shared__ DATA_TYPE tmp_b[mem_size]; + + uint64_t total_tiles = NI / mem_size; + uint64_t base_tiles = total_tiles / gridDim.x; + + uint64_t tiles_this_block = block_size / mem_size; + + uint64_t tile = base_tiles * blockIdx.x; + uint64_t end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) { + tmp_a[i] = a[tile * mem_size + i]; + tmp_b[i] = b[tile * mem_size + i]; + } + + __syncthreads(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + for (uint64_t iter = 0; iter < iterations; iter++) + { + tmp_b[i] += ALPHA * tmp_a[i]; + } + } + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + b[tile * mem_size + i] = tmp_b[i]; + } + } +} + + +void saxpyCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, uint64_t iterations, uint64_t block_size) +{ + double t_start, t_end; + if (block_size <= DIM_THREAD_BLOCK) + block_size = DIM_THREAD_BLOCK; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / block_size); + + //t_start = rtclock(); + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI, cudaMemcpyHostToDevice); + cudaMemcpy(B_gpu, B, sizeof(DATA_TYPE) * NI, cudaMemcpyHostToDevice); + saxpy_kernel<<>>(A_gpu, B_gpu, NI, iterations, block_size); + cudaDeviceSynchronize(); + cudaMemcpy(B, B_gpu, sizeof(DATA_TYPE) * NI, cudaMemcpyDeviceToHost); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + uint64_t iterations = ITER; + uint64_t block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + if (argc >= 4) { + NI = atoll(argv[1]); + iterations = atoi(argv[2]); + block_size = atoi(argv[3]); + } else { + NI = SIZE; + iterations = ITER; + block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + } + + int nblocks = NI / block_size; + if (nblocks > 64) + { + nblocks = 64; + block_size = NI / nblocks; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE *B_ref; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + + A = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + B_ref = (DATA_TYPE *)malloc(NI*sizeof(DATA_TYPE)); + + //cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + //cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + //cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + init(A, B, B_ref); + + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI); + cudaMalloc(&B_gpu, sizeof(DATA_TYPE) * NI); + + saxpyCuda(A, B, A_gpu, B_gpu, iterations, block_size); + + cudaFree(A_gpu); + cudaFree(B_gpu); + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // saxpy(A, B_ref, iterations); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(B, B_ref); + free(A); + free(B); + free(B_ref); + return 0; +} + diff --git a/workloads/micro/standard/vector_rand/Makefile b/workloads/micro/standard/vector_rand/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..b2abd68d5e6d513a5652b845d6d822b15fc12a59 --- /dev/null +++ b/workloads/micro/standard/vector_rand/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := vector_rand +CUFILES := vector_rand.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o vector_rand diff --git a/workloads/micro/standard/vector_rand/run.sh b/workloads/micro/standard/vector_rand/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..61775ede6c26208ae2a150958f8eec5375952773 --- /dev/null +++ b/workloads/micro/standard/vector_rand/run.sh @@ -0,0 +1,11 @@ +# ./vector_seq 17179869184 10 524288 + +./vector_seq 8589934592 10 262144 +#nsys profile --stats=true ./vector_seq 1073741824 10 32768 + + +# ./vector_seq 4294967296 10 131072 + +# ./vector_seq 2147483648 10 32768 +#./vector_seq 1073741824 10 32768 +# ./vector_seq 1048576 100 32768 diff --git a/workloads/micro/standard/vector_rand/run_large.sh b/workloads/micro/standard/vector_rand/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..0fbcd5c0eb481d41efff934e3f19f162bc1f73e2 --- /dev/null +++ b/workloads/micro/standard/vector_rand/run_large.sh @@ -0,0 +1 @@ +./vector_rand 134217728 100 65536 diff --git a/workloads/micro/standard/vector_rand/run_medium.sh b/workloads/micro/standard/vector_rand/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..84e868f85fbc74d101d476d501095afd4aa6d017 --- /dev/null +++ b/workloads/micro/standard/vector_rand/run_medium.sh @@ -0,0 +1 @@ +./vector_rand 16777216 100 32768 diff --git a/workloads/micro/standard/vector_rand/run_mega.sh b/workloads/micro/standard/vector_rand/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..24b084a7613986acafd45cf6ca300fe52f0426d5 --- /dev/null +++ b/workloads/micro/standard/vector_rand/run_mega.sh @@ -0,0 +1 @@ +./vector_rand 8589934592 100 262144 diff --git a/workloads/micro/standard/vector_rand/run_small.sh b/workloads/micro/standard/vector_rand/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..baf1b589adcc83b946051cf0fc2383b45746c62f --- /dev/null +++ b/workloads/micro/standard/vector_rand/run_small.sh @@ -0,0 +1 @@ +./vector_rand 2097152 100 32768 diff --git a/workloads/micro/standard/vector_rand/run_super.sh b/workloads/micro/standard/vector_rand/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..19be53cddf75e6d6c5812e3ec337cce2fd9079ae --- /dev/null +++ b/workloads/micro/standard/vector_rand/run_super.sh @@ -0,0 +1 @@ +./vector_rand 1073741824 100 65536 diff --git a/workloads/micro/standard/vector_rand/run_tiny.sh b/workloads/micro/standard/vector_rand/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..55ee72259022f385aabf8eedc426425e6817835f --- /dev/null +++ b/workloads/micro/standard/vector_rand/run_tiny.sh @@ -0,0 +1 @@ +./vector_rand 262144 100 8192 diff --git a/workloads/micro/standard/vector_rand/vector_rand.cu b/workloads/micro/standard/vector_rand/vector_rand.cu new file mode 100644 index 0000000000000000000000000000000000000000..444d1c107fd903fa4006edb8942855c93f808ce6 --- /dev/null +++ b/workloads/micro/standard/vector_rand/vector_rand.cu @@ -0,0 +1,254 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.005 + +/* Problem size */ +#define SIZE 4096000 +#define ITER 100 +uint64_t NI; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 256 + +#define BATCH_SIZE 16 + +#define LCG_A 1.1f +#define LCG_B 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void saxpy(DATA_TYPE *A, uint64_t iterations) +{ + for (uint64_t i = 0; i < NI; i++) { + for (uint64_t iter = 0; iter < iterations; iter++) { + A[i] = LCG_A * A[i] + LCG_B; + } + } +} + +void init(DATA_TYPE *A, DATA_TYPE *A_ref) +{ + for (uint64_t i = 0; i < NI; i++) { + A[i] = ((DATA_TYPE) i) / NI; + A_ref[i] = ((DATA_TYPE) i) / NI; + } +} + + +void compareResults(DATA_TYPE* A, DATA_TYPE* A_outputFromGpu) +{ + uint64_t fail = 0; + + // Compare C1 and C2 + for (uint64_t i = 0; i < NI; i++) { + // printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + if (percentDiff(A[i], A_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) { + fail++; + printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void vector_rand_kernel(DATA_TYPE *a, uint64_t NI, uint64_t iterations, uint64_t block_size, size_t seed) +{ + // Compute each thread's global row and column index + const uint64_t mem_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + __shared__ DATA_TYPE tmp[mem_size]; + + curandState_t randState; + size_t tx = blockIdx.x * blockDim.x + threadIdx.x; + curand_init(seed, tx, 0, &randState); + size_t idx = 0; + + uint64_t total_tiles = NI / mem_size; + uint64_t base_tiles = total_tiles / gridDim.x; + + uint64_t tiles_this_block = block_size / mem_size; + + uint64_t tile = base_tiles * blockIdx.x; + uint64_t end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + // tmp[i] = a[tile * mem_size + i]; + + idx = curand(&randState); + idx <<= 32; + idx |= curand(&randState); + tmp[i] = a[tile * mem_size + idx % mem_size]; + } + + __syncthreads(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + for (uint64_t iter = 0; iter < iterations; iter++) + { + tmp[i] = LCG_A * tmp[i] + LCG_B; + } + } + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + a[tile * mem_size + idx % mem_size] = tmp[i]; + } + } +} + +void saxpyCuda(DATA_TYPE *A, DATA_TYPE *A_gpu, uint64_t iterations, uint64_t block_size) +{ + double t_start, t_end; + if (block_size <= DIM_THREAD_BLOCK) + block_size = DIM_THREAD_BLOCK; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / block_size); + + //t_start = rtclock(); + cudaMemcpy(A_gpu, A, sizeof(DATA_TYPE) * NI, cudaMemcpyHostToDevice); + vector_rand_kernel<<>>(A_gpu, NI, iterations, block_size, 832945); + cudaDeviceSynchronize(); + cudaMemcpy(A, A_gpu, sizeof(DATA_TYPE) * NI, cudaMemcpyDeviceToHost); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + uint64_t iterations = ITER; + uint64_t block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + if (argc >= 4) { + NI = atoll(argv[1]); + iterations = atoi(argv[2]); + block_size = atoi(argv[3]); + } + else { + NI = SIZE; + iterations = ITER; + block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + } + + int nblocks = NI / block_size; + if (nblocks > 64) + { + nblocks = 64; + block_size = NI / nblocks; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE *A_ref; + + DATA_TYPE *A_gpu; + + A = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + A_ref = (DATA_TYPE *)malloc(NI*sizeof(DATA_TYPE)); + + //cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + //cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + //cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + init(A, A_ref); + + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI); + + saxpyCuda(A, A_gpu, iterations, block_size); + + // t_start = rtclock(); + // saxpy(A_ref, iterations); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(A, A_ref); + cudaFree(A_gpu); + endCPU(); + finiTrace(); + + free(A); + free(A_ref); + return 0; +} + diff --git a/workloads/micro/standard/vector_seq/Makefile b/workloads/micro/standard/vector_seq/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..2d9857cedea5755ad6a381996c578f9cbd41424a --- /dev/null +++ b/workloads/micro/standard/vector_seq/Makefile @@ -0,0 +1,109 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := vector_seq +CUFILES := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + + +EXECUTABLE_4096_256 := vector_seq_4096_256 +CUFILES_4096_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=4096 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_2048_256 := vector_seq_2048_256 +CUFILES_2048_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=2048 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_1024_256 := vector_seq_1024_256 +CUFILES_1024_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=1024 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_512_256 := vector_seq_512_256 +CUFILES_512_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=512 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_256_256 := vector_seq_256_256 +CUFILES_256_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=256 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_128_256 := vector_seq_128_256 +CUFILES_128_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=128 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_64_256 := vector_seq_64_256 +CUFILES_64_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_32_256 := vector_seq_32_256 +CUFILES_32_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=32 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_16_256 := vector_seq_16_256 +CUFILES_16_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=16 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + + +EXECUTABLE_1024_4 := vector_seq_1024_4 +CUFILES_1024_4 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=1024 -DBATCH_SIZE=4 + +EXECUTABLE_512_8 := vector_seq_512_8 +CUFILES_512_8 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=512 -DBATCH_SIZE=8 + +EXECUTABLE_256_16 := vector_seq_256_16 +CUFILES_256_16 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_128_32 := vector_seq_128_32 +CUFILES_128_32 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=32 + +EXECUTABLE_64_64 := vector_seq_64_64 +CUFILES_64_64 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=64 -DBATCH_SIZE=64 + +EXECUTABLE_32_128 := vector_seq_32_128 +CUFILES_32_128 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=32 -DBATCH_SIZE=128 + + +EXECUTABLE_2 := vector_seq_2 +CUFILES_2:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=2 + +EXECUTABLE_4 := vector_seq_4 +CUFILES_4:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=4 + +EXECUTABLE_8 := vector_seq_8 +CUFILES_8:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=8 + +EXECUTABLE_16 := vector_seq_16 +CUFILES_16:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=16 + +EXECUTABLE_32 := vector_seq_32 +CUFILES_32:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=32 + +EXECUTABLE_64 := vector_seq_64 +CUFILES_64:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=64 + +EXECUTABLE_128 := vector_seq_128 +CUFILES_128:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=128 + + + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} + + $(NVCC) ${NVCCCFLAGS} ${CUFILES_4096_256} ${DEF} -o ${EXECUTABLE_4096_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_2048_256} ${DEF} -o ${EXECUTABLE_2048_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_1024_256} ${DEF} -o ${EXECUTABLE_1024_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_512_256} ${DEF} -o ${EXECUTABLE_512_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_256_256} ${DEF} -o ${EXECUTABLE_256_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_128_256} ${DEF} -o ${EXECUTABLE_128_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_64_256} ${DEF} -o ${EXECUTABLE_64_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_32_256} ${DEF} -o ${EXECUTABLE_32_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_16_256} ${DEF} -o ${EXECUTABLE_16_256} + + $(NVCC) ${NVCCCFLAGS} ${CUFILES_1024_4} ${DEF} -o ${EXECUTABLE_1024_4} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_512_8} ${DEF} -o ${EXECUTABLE_512_8} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_256_16} ${DEF} -o ${EXECUTABLE_256_16} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_128_32} ${DEF} -o ${EXECUTABLE_128_32} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_64_64} ${DEF} -o ${EXECUTABLE_64_64} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_32_128} ${DEF} -o ${EXECUTABLE_32_128} + + $(NVCC) ${NVCCCFLAGS} ${CUFILES_2} ${DEF} -o ${EXECUTABLE_2} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_4} ${DEF} -o ${EXECUTABLE_4} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_8} ${DEF} -o ${EXECUTABLE_8} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_16} ${DEF} -o ${EXECUTABLE_16} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_32} ${DEF} -o ${EXECUTABLE_32} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_64} ${DEF} -o ${EXECUTABLE_64} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_128} ${DEF} -o ${EXECUTABLE_128} + +clean: + rm -f *.o vector_seq vector_seq_* diff --git a/workloads/micro/standard/vector_seq/run.sh b/workloads/micro/standard/vector_seq/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..61775ede6c26208ae2a150958f8eec5375952773 --- /dev/null +++ b/workloads/micro/standard/vector_seq/run.sh @@ -0,0 +1,11 @@ +# ./vector_seq 17179869184 10 524288 + +./vector_seq 8589934592 10 262144 +#nsys profile --stats=true ./vector_seq 1073741824 10 32768 + + +# ./vector_seq 4294967296 10 131072 + +# ./vector_seq 2147483648 10 32768 +#./vector_seq 1073741824 10 32768 +# ./vector_seq 1048576 100 32768 diff --git a/workloads/micro/standard/vector_seq/run_large.sh b/workloads/micro/standard/vector_seq/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..66794fb697b9cd4165ae0e85db50fd512c3467e7 --- /dev/null +++ b/workloads/micro/standard/vector_seq/run_large.sh @@ -0,0 +1 @@ +./vector_seq 134217728 100 65536 diff --git a/workloads/micro/standard/vector_seq/run_medium.sh b/workloads/micro/standard/vector_seq/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..9bbbb986b9351d0e1c82e9d53d86d4d0f83c7492 --- /dev/null +++ b/workloads/micro/standard/vector_seq/run_medium.sh @@ -0,0 +1 @@ +./vector_seq 16777216 100 32768 diff --git a/workloads/micro/standard/vector_seq/run_mega.sh b/workloads/micro/standard/vector_seq/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..7b622d0246ef3a0a542e2422e09099850d52f3d1 --- /dev/null +++ b/workloads/micro/standard/vector_seq/run_mega.sh @@ -0,0 +1 @@ +./vector_seq 8589934592 100 262144 diff --git a/workloads/micro/standard/vector_seq/run_small.sh b/workloads/micro/standard/vector_seq/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..da65ab8dbc61ff2e26a2638703dc97d003cb9dba --- /dev/null +++ b/workloads/micro/standard/vector_seq/run_small.sh @@ -0,0 +1 @@ +./vector_seq 2097152 100 32768 diff --git a/workloads/micro/standard/vector_seq/run_super.sh b/workloads/micro/standard/vector_seq/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..5a98cb48e7b06fa4db1cb7db99d9a0e8ebcf7f46 --- /dev/null +++ b/workloads/micro/standard/vector_seq/run_super.sh @@ -0,0 +1 @@ +./vector_seq 1073741824 100 65536 diff --git a/workloads/micro/standard/vector_seq/run_tiny.sh b/workloads/micro/standard/vector_seq/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..a2f7760fa3fb1a37be61193c663da2e38690bfe3 --- /dev/null +++ b/workloads/micro/standard/vector_seq/run_tiny.sh @@ -0,0 +1 @@ +./vector_seq 262144 100 8192 diff --git a/workloads/micro/standard/vector_seq/vector_seq.cu b/workloads/micro/standard/vector_seq/vector_seq.cu new file mode 100644 index 0000000000000000000000000000000000000000..87840dcb2747c0c3287c41768dd4a230faf0702a --- /dev/null +++ b/workloads/micro/standard/vector_seq/vector_seq.cu @@ -0,0 +1,251 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.005 + +/* Problem size */ +#define SIZE 1073741824 +#define ITER 100 +uint64_t NI; + +/* Thread block dimensions */ +#ifndef DIM_THREAD_BLOCK +#define DIM_THREAD_BLOCK 256 +#endif + +#ifndef BATCH_SIZE +#define BATCH_SIZE 16 +#endif + +#ifndef NBLOCKS +#define NBLOCKS 64 +#endif + +#define LCG_A 1.1f +#define LCG_B 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void saxpy(DATA_TYPE *A, uint64_t iterations) +{ + for (uint64_t i = 0; i < NI; i++) { + for (uint64_t iter = 0; iter < iterations; iter++) { + A[i] = LCG_A * A[i] + LCG_B; + } + } +} + +void init(DATA_TYPE *A, DATA_TYPE *A_ref) +{ + for (uint64_t i = 0; i < NI; i++) { + A[i] = ((DATA_TYPE) i) / NI; + A_ref[i] = ((DATA_TYPE) i) / NI; + } +} + + +void compareResults(DATA_TYPE* A, DATA_TYPE* A_outputFromGpu) +{ + uint64_t fail = 0; + + // Compare C1 and C2 + for (uint64_t i = 0; i < NI; i++) { + // printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + if (percentDiff(A[i], A_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) { + fail++; + printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void vector_seq_kernel(DATA_TYPE *a, uint64_t NI, uint64_t iterations, uint64_t block_size) +{ + // Compute each thread's global row and column index + const uint64_t mem_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + // __shared__ DATA_TYPE tmp[mem_size]; + extern __shared__ DATA_TYPE tmp[]; + + uint64_t total_tiles = NI / mem_size; + uint64_t base_tiles = total_tiles / gridDim.x; + + uint64_t tiles_this_block = block_size / mem_size; + + uint64_t tile = base_tiles * blockIdx.x; + uint64_t end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + tmp[i] = a[tile * mem_size + i]; + } + + __syncthreads(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + for (uint64_t iter = 0; iter < iterations; iter++) + { + tmp[i] = LCG_A * tmp[i] + LCG_B; + } + } + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + a[tile * mem_size + i] = tmp[i]; + } + } +} + +void saxpyCuda(DATA_TYPE *A, DATA_TYPE *A_gpu, uint64_t iterations, uint64_t block_size) +{ + double t_start, t_end; + if (block_size <= DIM_THREAD_BLOCK) + block_size = DIM_THREAD_BLOCK; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / block_size); + + int MaxBytesofSharedMemory = DIM_THREAD_BLOCK * BATCH_SIZE * sizeof(DATA_TYPE); + cudaFuncSetAttribute(vector_seq_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, MaxBytesofSharedMemory); + + //t_start = rtclock(); + cudaMemcpy(A_gpu, A, NI * sizeof(DATA_TYPE), cudaMemcpyHostToDevice); + vector_seq_kernel<<>>(A_gpu, NI, iterations, block_size); + cudaDeviceSynchronize(); + cudaMemcpy(A, A_gpu, NI * sizeof(DATA_TYPE), cudaMemcpyDeviceToHost); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + uint64_t iterations = ITER; + uint64_t block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + if (argc >= 4) { + NI = atoll(argv[1]); + iterations = atoi(argv[2]); + block_size = atoi(argv[3]); + } + else { + NI = SIZE; + iterations = ITER; + block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + } + + int nblocks = NBLOCKS; + block_size = NI / nblocks; + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE *A_ref; + + DATA_TYPE *A_gpu; + + A = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + A_ref = (DATA_TYPE *)malloc(NI*sizeof(DATA_TYPE)); + + //cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + //cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + //cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + init(A, A_ref); + + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMalloc(&A_gpu, sizeof(DATA_TYPE) * NI); + + saxpyCuda(A, A_gpu, iterations, block_size); + + cudaFree(A_gpu); + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // saxpy(A_ref, iterations); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(A, A_ref); + + free(A); + free(A_ref); + return 0; +} diff --git a/workloads/micro/super_avg.csv b/workloads/micro/super_avg.csv new file mode 100644 index 0000000000000000000000000000000000000000..e6208f30cebc4e773cc09cdb8c4c7d7a58ecc365 --- /dev/null +++ b/workloads/micro/super_avg.csv @@ -0,0 +1,22 @@ +group,,standard,async,uvm,uvm_prefetch,uvm_prefetch_async +vector_seq,gpu_kernel,0.05805017425680148,0.03173757332555969,0.21329270550725832,0.0586754827768831,0.03118878558025065 +vector_seq,memcpy,0.717360365981429,0.7185794112766591,0.39943356297819815,0.33789583456699746,0.3379199594585987 +vector_seq,allocation,0.22458945976176947,0.2301877611571537,0.5219579205742119,0.6222651127680494,0.46573701828224945 +vector_rand,gpu_kernel,0.04850459369114966,0.03494442915499904,0.25597967690765394,0.052701443481255335,0.03352365280267823 +vector_rand,memcpy,0.7080178823569716,0.7076241829673889,0.4078670044825631,0.3315590028106266,0.3314020915369003 +vector_rand,allocation,0.24347752395187874,0.2832024587103021,0.6555308332218751,0.5198576476211102,0.48098753719400555 +saxpy,gpu_kernel,0.03349993360954776,0.016660026580555497,0.2242962855183901,0.031558818751110454,0.01668245308671386 +saxpy,memcpy,0.8268232180290235,0.6151823380095137,0.31066537201255073,0.2500596116010173,0.25092528999516023 +saxpy,allocation,0.1396768483614288,0.13699393999785936,0.3734173539258691,0.3661937300322661,0.374398166620341 +gemv,gpu_kernel,0.02867622869775037,0.03695373547178976,0.37762379125723405,0.03444411979294944,0.0383047518764697 +gemv,memcpy,0.5703649187069172,0.5732037869997497,0.6036267315764592,0.2178541889220221,0.2174248271984349 +gemv,allocation,0.4009588525953325,0.39084850851066877,0.7196380744329455,0.7173040675887135,0.6934178925011326 +2DCONV,gpu_kernel,0.016966114507647043,0.03817781207240045,0.28133043400986885,0.01522274170579749,0.03707065146498869 +2DCONV,memcpy,0.8488492808059975,0.9916308894783414,0.4115606099344337,0.08244177056990615,0.08240950516368743 +2DCONV,allocation,0.13418460468635549,0.16370812047034441,0.3317212052371825,0.33281055104048485,0.3183306048665641 +3DCONV,gpu_kernel,0.07404101581588855,0.13833976170356355,0.28546073417932794,0.0789080133417091,0.13819353367843776 +3DCONV,memcpy,0.6290238889842874,0.6431300799648962,0.20152914480899037,0.05700373786894915,0.05710092696675891 +3DCONV,allocation,0.2969350951998241,0.2515155671215799,0.4131152187867928,0.4253670910167462,0.4374900824963273 +gemm,gpu_kernel,0.7286444306961865,0.7891829654552334,0.7594224966629366,0.7270803165936371,0.7829551812884327 +gemm,memcpy,0.15716135904382605,0.15788682204271773,0.1417815495844003,0.0582486215304029,0.057265310266941406 +gemm,allocation,0.11419421025998751,0.12573271185354737,0.16086794793818573,0.16271552837896275,0.15312625839669744 diff --git a/workloads/micro/super_avg_profile.csv b/workloads/micro/super_avg_profile.csv new file mode 100644 index 0000000000000000000000000000000000000000..b0203921970eb4b989f349bd5a379f11be17268e --- /dev/null +++ b/workloads/micro/super_avg_profile.csv @@ -0,0 +1,5 @@ +group,,uvm_prefetch,async,standard,uvm,uvm_prefetch_async +gemm,memory,0.3048728164266013,0.300398406374502,0.3048728164266013,0.3048728164266013,0.300398406374502 +gemm,control,0.022424587364826408,0.031390919837134976,0.022424587364826408,0.022424587364826408,0.031390919837134976 +gemm,int,0.38127927849043386,0.3970053850531939,0.38127927849043386,0.38127927849043386,0.3970053850531939 +gemm,fp,0.29142331771813845,0.2869401514819842,0.29142331771813845,0.29142331771813845,0.2869401514819842 diff --git a/workloads/micro/uvm/2DCONV/2DConvolution.cu b/workloads/micro/uvm/2DCONV/2DConvolution.cu new file mode 100644 index 0000000000000000000000000000000000000000..d750468d53c7e95e64c217d4e1c9a812487d0846 --- /dev/null +++ b/workloads/micro/uvm/2DCONV/2DConvolution.cu @@ -0,0 +1,362 @@ +/** + * 2DConvolution.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +#define NBLOCKS 32 +#define BATCH_SIZE 4 + +uint64_t NI; +uint64_t NJ; +uint64_t nblocks; + + +/* Thread block dimensions */ +#define KERNEL 3 +#define DIM_THREAD_BLOCK 16 + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; + +void conv2D(DATA_TYPE* A, DATA_TYPE* B) +{ + uint64_t i, j; + DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + c11 = +0.2; c21 = +0.5; c31 = -0.8; + c12 = -0.3; c22 = +0.6; c32 = -0.9; + c13 = +0.4; c23 = +0.7; c33 = +0.10; + + for (i = 1; i < NI - 1; ++i) // 0 + { + for (j = 1; j < NJ - 1; ++j) // 1 + { + B[i*NJ + j] = c11 * A[(i - 1)*NJ + (j - 1)] + c12 * A[(i + 0)*NJ + (j - 1)] + c13 * A[(i + 1)*NJ + (j - 1)] + + c21 * A[(i - 1)*NJ + (j + 0)] + c22 * A[(i + 0)*NJ + (j + 0)] + c23 * A[(i + 1)*NJ + (j + 0)] + + c31 * A[(i - 1)*NJ + (j + 1)] + c32 * A[(i + 0)*NJ + (j + 1)] + c33 * A[(i + 1)*NJ + (j + 1)]; + } + } +} + +void initGPU(DATA_TYPE* A_gpu) +{ + uint64_t i, j; + + for (i = 0; i < NI; ++i) { + for (j = 0; j < NJ; ++j) { + A_gpu[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } +} + +void initCPU(DATA_TYPE* A) +{ + uint64_t i, j; + + for (i = 0; i < NI; ++i) { + for (j = 0; j < NJ; ++j) { + A[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } +} + + +void compareResults(DATA_TYPE* B, DATA_TYPE* B_outputFromGpu) +{ + uint64_t i, j, fail; + fail = 0; + + // Compare a and b + for (i=1; i < (NI-1); i++) + { + for (j=1; j < (NJ-1); j++) + { + if (percentDiff(B[i*NJ + j], B_outputFromGpu[i*NJ + j]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, CPU is %f, GPU is %f.\n", i, j, B[i * NJ + j], B_outputFromGpu[i * NJ + j]); + fail++; + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); + +} + +__global__ void Convolution2D_kernel(DATA_TYPE *A, DATA_TYPE *B, uint64_t NI, uint64_t NJ, uint64_t block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + uint64_t tile_dim_x = (NJ + DIM_THREAD_BLOCK - 1) / (DIM_THREAD_BLOCK * BATCH_SIZE); + + __shared__ DATA_TYPE tmp_A[DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1]; + __shared__ DATA_TYPE tmp_B[DIM_THREAD_BLOCK * BATCH_SIZE][DIM_THREAD_BLOCK * BATCH_SIZE]; + + uint64_t total_tiles = tile_dim_x * tile_dim_x; + + uint64_t tiles_this_block_x = (block_size / (DIM_THREAD_BLOCK * BATCH_SIZE)); + uint64_t tiles_this_block = tiles_this_block_x * tiles_this_block_x; + + uint64_t base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + uint64_t tile = base_tile; + uint64_t end_tile = tile + tiles_this_block; + + // DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + // c11 = +0.2; c21 = +0.5; c31 = -0.8; + // c12 = -0.3; c22 = +0.6; c32 = -0.9; + // c13 = +0.4; c23 = +0.7; c33 = +0.10; + + DATA_TYPE c[KERNEL][KERNEL]; + + c[0][0] = +0.2; + c[1][0] = +0.5; + c[2][0] = -0.8; + c[0][1] = -0.3; + c[1][1] = +0.6; + c[2][1] = -0.9; + c[0][2] = +0.4; + c[1][2] = +0.7; + c[2][2] = +0.10; + + for (; tile < end_tile; tile += 1) + { + // block id + uint64_t offset = tile - base_tile; + uint64_t block_id = tile / tiles_this_block; + uint64_t bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + uint64_t by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + uint64_t batch_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + // thread id + uint64_t tx = threadIdx.x; + uint64_t ty = threadIdx.y; + + uint64_t index_B_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty + 1; + uint64_t index_B_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx + 1; + + uint64_t index_A_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty; + uint64_t index_A_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx; + + uint64_t index_A_y_start = DIM_THREAD_BLOCK * BATCH_SIZE * by; + uint64_t index_A_x_start = DIM_THREAD_BLOCK * BATCH_SIZE * bx; + + uint64_t index_A_y_bound = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * DIM_THREAD_BLOCK; + uint64_t index_A_x_bound = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * DIM_THREAD_BLOCK; + + // fetch A + for (uint64_t i = 0; i < BATCH_SIZE; i++) { + for (uint64_t j = 0; j < BATCH_SIZE; j++) { + if ((index_A_y + i) < NI && (index_A_x + j) < NJ) { + tmp_A[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] = A[(index_A_y + i) * NJ + index_A_x + j]; + tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] = 0; + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < KERNEL - 1; i++) { + for (uint64_t j = 0; j < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; j++) { + if ((index_A_y_bound + i) < NI && (index_A_x_start + j) < NJ) { + tmp_A[DIM_THREAD_BLOCK * BATCH_SIZE + i][j] = A[(index_A_y_bound + i) * NJ + index_A_x_start + j]; + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; i++) { + for (uint64_t j = 0; j < KERNEL - 1; j++) { + if ((index_A_y_start + i) < NI && (index_A_x_bound + j) < NJ) { + tmp_A[i][DIM_THREAD_BLOCK * BATCH_SIZE + j] = A[(index_A_y_start + i) * NJ + index_A_x_bound + j]; + } + } + } + block.sync(); + + // Computation + for (uint64_t i = 0; i < BATCH_SIZE; i++) { + for (uint64_t j = 0; j < BATCH_SIZE; j++) { + for (uint64_t m = 0; m < KERNEL; m++) { + for (uint64_t n = 0; n < KERNEL; n++) { + tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] += tmp_A[ty * BATCH_SIZE + i + m][tx * BATCH_SIZE + j + n] * c[n][m]; + } + } + } + } + block.sync(); + + // Store B + for (uint64_t i = 0; i < BATCH_SIZE; i++) { + for (uint64_t j = 0; j < BATCH_SIZE; j++) { + if ((index_B_y + i + 1) < NI && (index_B_x + j + 1) < NJ) { + B[(index_B_y + i) * NJ + index_B_x + j] = tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j]; + } + } + } + block.sync(); + } +} + +void convolution2DCuda(DATA_TYPE *A_gpu, DATA_TYPE *B_gpu) +{ + double t_start, t_end; + + uint64_t output_width = NI - KERNEL + 1; + uint64_t output_height = NJ - KERNEL + 1; + + dim3 block(DIM_THREAD_BLOCK, DIM_THREAD_BLOCK); + dim3 grid(nblocks, nblocks); + + uint64_t block_size = (NJ + (nblocks - 1)) / nblocks; + + // t_start = rtclock(); + +#ifdef PREF + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(A_gpu, NI * NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(B_gpu, NI * NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + // cudaMemset(B_gpu,0 ,NI*NJ*sizeof(DATA_TYPE)); + Convolution2D_kernel<<>>(A_gpu, B_gpu); + cudaDeviceSynchronize(); + + t_end = rtclock(); + fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); //); +#else + Convolution2D_kernel<<>>(A_gpu, B_gpu, NI, NJ, block_size); + cudaDeviceSynchronize(); + + // t_end = rtclock(); + // fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); //); +#endif +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + if (argc >= 4) { + NI = atoll(argv[1]); + NJ = atoll(argv[2]); + nblocks = atoi(argv[3]); + } else { + NI = SIZE; + NJ = SIZE; + nblocks = NBLOCKS; + } + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + + A = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + initCPU(A); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NJ); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NI * NJ); + + // initGPU(A_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI*NJ*sizeof(DATA_TYPE)); + // overlapEndCPU(); + + convolution2DCuda(A_gpu, B_gpu); + + memcpy(B, B_gpu, NI * NJ * sizeof(DATA_TYPE)); + + cudaFree(A_gpu); + cudaFree(B_gpu); + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // conv2D(A, B); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);//); + + // compareResults(B, B_gpu); + free(A); + free(B); + + return 0; +} diff --git a/workloads/micro/uvm/2DCONV/Makefile b/workloads/micro/uvm/2DCONV/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f97459c40d8409dec80056ca77208c75e9d0b5e8 --- /dev/null +++ b/workloads/micro/uvm/2DCONV/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := 2DConvolution +CUFILES := 2DConvolution.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o 2DConvolution diff --git a/workloads/micro/uvm/2DCONV/run.sh b/workloads/micro/uvm/2DCONV/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..c4d26406f477f75d294497d89fc0d88c529f475b --- /dev/null +++ b/workloads/micro/uvm/2DCONV/run.sh @@ -0,0 +1,2 @@ +# ./2DConvolution 16384 16384 32 +./2DConvolution 32768 32768 32 \ No newline at end of file diff --git a/workloads/micro/uvm/2DCONV/run_large.sh b/workloads/micro/uvm/2DCONV/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..6e4e06894b252b1d547f335deef142ab01c98df9 --- /dev/null +++ b/workloads/micro/uvm/2DCONV/run_large.sh @@ -0,0 +1 @@ +./2DConvolution 8192 8192 32 diff --git a/workloads/micro/uvm/2DCONV/run_medium.sh b/workloads/micro/uvm/2DCONV/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..c246aa18fc41de6e48e7d9ab67f1bbf1925afff6 --- /dev/null +++ b/workloads/micro/uvm/2DCONV/run_medium.sh @@ -0,0 +1 @@ +./2DConvolution 4096 4096 32 diff --git a/workloads/micro/uvm/2DCONV/run_mega.sh b/workloads/micro/uvm/2DCONV/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..aa390557108b8621ec9ef8ac80f0a8f085161cce --- /dev/null +++ b/workloads/micro/uvm/2DCONV/run_mega.sh @@ -0,0 +1 @@ +./2DConvolution 65536 65536 32 diff --git a/workloads/micro/uvm/2DCONV/run_small.sh b/workloads/micro/uvm/2DCONV/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..c4b192f75d30e834237d39cfc15c8c57bade3c0e --- /dev/null +++ b/workloads/micro/uvm/2DCONV/run_small.sh @@ -0,0 +1 @@ +./2DConvolution 1024 1024 8 diff --git a/workloads/micro/uvm/2DCONV/run_super.sh b/workloads/micro/uvm/2DCONV/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..8a0981739ace39a1104aa069d6a6f0dfa38fd5c9 --- /dev/null +++ b/workloads/micro/uvm/2DCONV/run_super.sh @@ -0,0 +1 @@ +./2DConvolution 32768 32768 32 diff --git a/workloads/micro/uvm/2DCONV/run_tiny.sh b/workloads/micro/uvm/2DCONV/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..d7c49df52c8e594edae710ad71032e912ccd7892 --- /dev/null +++ b/workloads/micro/uvm/2DCONV/run_tiny.sh @@ -0,0 +1 @@ +./2DConvolution 512 512 4 diff --git a/workloads/micro/uvm/3DCONV/3DConvolution.cu b/workloads/micro/uvm/3DCONV/3DConvolution.cu new file mode 100644 index 0000000000000000000000000000000000000000..419996a0d5f492e204b2ba5e60d19d3de403342d --- /dev/null +++ b/workloads/micro/uvm/3DCONV/3DConvolution.cu @@ -0,0 +1,386 @@ +/** + * 3DConvolution.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +// define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +#define NBLOCKS 2 +#define BATCH_SIZE 3 + +uint64_t NI; +uint64_t NJ; +uint64_t NK; +uint64_t nblocks; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 4 + +#define KERNEL 3 + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; + + + +void conv3D(DATA_TYPE* A, DATA_TYPE* B) +{ + uint64_t i, j, k; + DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + c11 = +2; c21 = +5; c31 = -8; + c12 = -3; c22 = +6; c32 = -9; + c13 = +4; c23 = +7; c33 = +10; + + for (i = 1; i < NI - 1; ++i) // 0 + { + for (j = 1; j < NJ - 1; ++j) // 1 + { + for (k = 1; k < NK -1; ++k) // 2 + { + B[i*(NK * NJ) + j*NK + k] = c11 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + c13 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + + c21 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + c23 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + + c31 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + c33 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + + c12 * A[(i + 0)*(NK * NJ) + (j - 1)*NK + (k + 0)] + c22 * A[(i + 0)*(NK * NJ) + (j + 0)*NK + (k + 0)] + + c32 * A[(i + 0)*(NK * NJ) + (j + 1)*NK + (k + 0)] + c11 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k + 1)] + + c13 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k + 1)] + c21 * A[(i - 1)*(NK * NJ) + (j + 0)*NK + (k + 1)] + + c23 * A[(i + 1)*(NK * NJ) + (j + 0)*NK + (k + 1)] + c31 * A[(i - 1)*(NK * NJ) + (j + 1)*NK + (k + 1)] + + c33 * A[(i + 1)*(NK * NJ) + (j + 1)*NK + (k + 1)]; + } + } + } +} + +void initGPU(DATA_TYPE *A_gpu) +{ + uint64_t i, j, k; + + for (i = 0; i < NI; ++i) + { + for (j = 0; j < NJ; ++j) + { + for (k = 0; k < NK; ++k) + { + A_gpu[i * (NK * NJ) + j * NK + k] = i % 12 + 2 * (j % 7) + 3 * (k % 13); + } + } + } +} + +void initCPU(DATA_TYPE *A) +{ + uint64_t i, j, k; + + for (i = 0; i < NI; ++i) + { + for (j = 0; j < NJ; ++j) + { + for (k = 0; k < NK; ++k) + { + A[i*(NK * NJ) + j*NK + k] = i % 12 + 2 * (j % 7) + 3 * (k % 13); + } + } + } +} + + +void compareResults(DATA_TYPE* B, DATA_TYPE* B_outputFromGpu) +{ + uint64_t i, j, k, fail; + fail = 0; + + // Compare result from cpu and gpu... + for (i = 1; i < NI - 1; ++i) // 0 + { + for (j = 1; j < NJ - 1; ++j) // 1 + { + for (k = 1; k < NK - 1; ++k) // 2 + { + if (percentDiff(B[i*(NK * NJ) + j*NK + k], B_outputFromGpu[i*(NK * NJ) + j*NK + k]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, %d, CPU is %f, GPU is %f.\n", i, j, k, B[i * (NK * NJ) + j * NK + k], B_outputFromGpu[i * (NK * NJ) + j * NK + k]); + fail++; + } + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void convolution3D_kernel(DATA_TYPE *A, DATA_TYPE *B, uint64_t NI, uint64_t NJ, uint64_t NK, uint64_t block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + c11 = +2; c21 = +5; c31 = -8; + c12 = -3; c22 = +6; c32 = -9; + c13 = +4; c23 = +7; c33 = +10; + + uint64_t tile_dim_x = (NJ + DIM_THREAD_BLOCK - 1) / (DIM_THREAD_BLOCK * BATCH_SIZE); + + __shared__ DATA_TYPE tmp_A[DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1]; + __shared__ DATA_TYPE tmp_B[DIM_THREAD_BLOCK * BATCH_SIZE][DIM_THREAD_BLOCK * BATCH_SIZE][DIM_THREAD_BLOCK * BATCH_SIZE]; + + // uint64_t total_tiles = tile_dim_x * tile_dim_x * tile_dim_x; + + uint64_t tiles_this_block_x = (block_size / (DIM_THREAD_BLOCK * BATCH_SIZE)); + uint64_t tiles_this_block = tiles_this_block_x * tiles_this_block_x * tiles_this_block_x; + + uint64_t base_tile = (blockIdx.z * gridDim.y * gridDim.x + blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + uint64_t tile = base_tile; + uint64_t end_tile = tile + tiles_this_block; + + // printf("block_size is %d, tile_dim_x is %d, tiles_this_block_x is %d.\n", block_size, tile_dim_x, tiles_this_block_x); + + for (; tile < end_tile; tile += 1) + { + // block id + uint64_t offset = tile - base_tile; + uint64_t block_id = tile / tiles_this_block; + + uint64_t bz = block_id / (gridDim.y * gridDim.x) * tiles_this_block_x + offset / (tiles_this_block_x * tiles_this_block_x); + uint64_t by = block_id % (gridDim.y * gridDim.x) / gridDim.x * tiles_this_block_x + offset % (tiles_this_block_x * tiles_this_block_x) / tiles_this_block_x; + uint64_t bx = block_id % (gridDim.y * gridDim.x) % gridDim.x * tiles_this_block_x + offset % (tiles_this_block_x * tiles_this_block_x) % tiles_this_block_x; + + // thread id + uint64_t tx = threadIdx.x; + uint64_t ty = threadIdx.y; + uint64_t tz = threadIdx.z; + + uint64_t index_B_z = DIM_THREAD_BLOCK * BATCH_SIZE * bz + BATCH_SIZE * tz + 1; + uint64_t index_B_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty + 1; + uint64_t index_B_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx + 1; + + uint64_t index_A_z = DIM_THREAD_BLOCK * BATCH_SIZE * bz + BATCH_SIZE * tz; + uint64_t index_A_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty; + uint64_t index_A_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx; + + uint64_t index_A_z_start = DIM_THREAD_BLOCK * BATCH_SIZE * bz; + uint64_t index_A_y_start = DIM_THREAD_BLOCK * BATCH_SIZE * by; + uint64_t index_A_x_start = DIM_THREAD_BLOCK * BATCH_SIZE * bx; + + uint64_t index_A_z_bound = DIM_THREAD_BLOCK * BATCH_SIZE * bz + BATCH_SIZE * DIM_THREAD_BLOCK; + uint64_t index_A_y_bound = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * DIM_THREAD_BLOCK; + uint64_t index_A_x_bound = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * DIM_THREAD_BLOCK; + + // fetch A + for (uint64_t i = 0; i < BATCH_SIZE; i++) { + for (uint64_t j = 0; j < BATCH_SIZE; j++) { + for (uint64_t k = 0; k < BATCH_SIZE; k++) { + if ((index_A_z + i) < NI && (index_A_y + j) < NJ && (index_A_x + k) < NK) { + tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] = A[(index_A_z + i) * NJ * NK + (index_A_y + j) * NK + index_A_x + k]; + tmp_B[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] = 0; + } + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < KERNEL - 1; i++) { + for (uint64_t j = 0; j < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; j++) { + for (uint64_t k = 0; k < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; k++) { + if ((index_A_z_bound + i) < NI && (index_A_y_start + j) < NJ && (index_A_x_start + k) < NK) { + tmp_A[DIM_THREAD_BLOCK * BATCH_SIZE + i][j][k] = A[(index_A_z_bound + i) * NJ * NK + (index_A_y_start + j) * NK + index_A_x_start + k]; + } + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; i++) { + for (uint64_t j = 0; j < KERNEL - 1; j++) { + for (uint64_t k = 0; k < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; k++) { + if ((index_A_z_start + i) < NI && (index_A_y_bound + j) < NJ && (index_A_x_start + k) < NK) { + tmp_A[i][DIM_THREAD_BLOCK * BATCH_SIZE + j][k] = A[(index_A_z_start + i) * NJ * NK + (index_A_y_bound + j) * NK + index_A_x_start + k]; + } + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; i++) { + for (uint64_t j = 0; j < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; j++) { + for (uint64_t k = 0; k < KERNEL - 1; k++) { + if ((index_A_z_start + i) < NI && (index_A_y_start + j) < NJ && (index_A_x_bound + k) < NK) { + tmp_A[i][j][DIM_THREAD_BLOCK * BATCH_SIZE + k] = A[(index_A_z_start + i) * NJ * NK + (index_A_y_start + j) * NK + index_A_x_bound + k]; + } + } + } + } + block.sync(); + + // Computation + for (uint64_t i = 0; i < BATCH_SIZE; i++) { + for (uint64_t j = 0; j < BATCH_SIZE; j++) { + for (uint64_t k = 0; k < BATCH_SIZE; k++) { + tmp_B[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] = c11 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c13 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + + c21 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c23 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + + c31 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c33 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + + c12 * tmp_A[tz * BATCH_SIZE + i + 1][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k + 1] + c22 * tmp_A[tz * BATCH_SIZE + i + 1][ty * BATCH_SIZE + j + 1][tx * BATCH_SIZE + k + 1] + + c32 * tmp_A[tz * BATCH_SIZE + i + 1][ty * BATCH_SIZE + j + 2][tx * BATCH_SIZE + k + 1] + c11 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k + 2] + + c13 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k + 2] + c21 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j + 1][tx * BATCH_SIZE + k + 2] + + c23 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j + 1][tx * BATCH_SIZE + k + 2] + c31 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j + 2][tx * BATCH_SIZE + k + 2] + + c33 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j + 2][tx * BATCH_SIZE + k + 2]; + } + } + } + block.sync(); + + // Store B + for (uint64_t i = 0; i < BATCH_SIZE; i++) { + for (uint64_t j = 0; j < BATCH_SIZE; j++) { + for (uint64_t k = 0; k < BATCH_SIZE; k++) { + if ((index_B_z + i + 1) < NI && (index_B_y + j + 1) < NJ && (index_B_x + k + 1) < NK) + { + B[(index_B_z + i) * NJ * NK + (index_B_y + j) * NK + index_B_x + k] = tmp_B[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k]; + } + } + } + } + block.sync(); + } +} + + +void convolution3DCuda(DATA_TYPE* A_gpu, DATA_TYPE* B_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK, DIM_THREAD_BLOCK, DIM_THREAD_BLOCK); + dim3 grid(nblocks, nblocks, nblocks); + + uint64_t block_size = (NI + (nblocks - 1)) / nblocks; + + // t_start = rtclock(); + convolution3D_kernel<<>>(A_gpu, B_gpu, NI, NJ, NK, block_size); + + cudaDeviceSynchronize(); + // t_end = rtclock(); + // fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); + +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + if (argc >= 5) { + NI = atoll(argv[1]); + NJ = atoll(argv[2]); + NK = atoll(argv[3]); + nblocks = atoi(argv[4]); + } else { + NI = SIZE; + NJ = SIZE; + NK = SIZE; + nblocks = NBLOCKS; + } + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + + A = (DATA_TYPE*)malloc(NI*NJ*NK*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NI*NJ*NK*sizeof(DATA_TYPE)); + initCPU(A); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NJ * NK); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NI * NJ * NK); + + // initGPU(A_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * NJ * NK * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + convolution3DCuda(A_gpu, B_gpu); + memcpy(B, B_gpu, NI * NJ * NK * sizeof(DATA_TYPE)); + + cudaFree(A_gpu); + cudaFree(B_gpu); + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // conv3D(A, B); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(B, B_gpu); + free(A); + free(B); + + return 0; +} diff --git a/workloads/micro/uvm/3DCONV/Makefile b/workloads/micro/uvm/3DCONV/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..72aac9cb83cca03110da33f8da9119f32af90ccd --- /dev/null +++ b/workloads/micro/uvm/3DCONV/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := 3DConvolution +CUFILES := 3DConvolution.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o 3DConvolution diff --git a/workloads/micro/uvm/3DCONV/run.sh b/workloads/micro/uvm/3DCONV/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..0c27d78b8d3484896bd6812043b5466b074cbebf --- /dev/null +++ b/workloads/micro/uvm/3DCONV/run.sh @@ -0,0 +1,2 @@ +#./3DConvolution 768 768 768 8 +./3DConvolution 1536 1536 1536 8 diff --git a/workloads/micro/uvm/3DCONV/run_large.sh b/workloads/micro/uvm/3DCONV/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..500c363302024d6080c025784efbe2e7fef74f53 --- /dev/null +++ b/workloads/micro/uvm/3DCONV/run_large.sh @@ -0,0 +1 @@ +./3DConvolution 384 384 384 8 diff --git a/workloads/micro/uvm/3DCONV/run_medium.sh b/workloads/micro/uvm/3DCONV/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..d0a9fb120b6de1c16ae4146d7684c5557af95152 --- /dev/null +++ b/workloads/micro/uvm/3DCONV/run_medium.sh @@ -0,0 +1 @@ +./3DConvolution 192 192 192 4 diff --git a/workloads/micro/uvm/3DCONV/run_mega.sh b/workloads/micro/uvm/3DCONV/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..4e21c66c3d1cf21bfe88427d92c54ff8be428d8a --- /dev/null +++ b/workloads/micro/uvm/3DCONV/run_mega.sh @@ -0,0 +1 @@ +./3DConvolution 1536 1536 1536 8 diff --git a/workloads/micro/uvm/3DCONV/run_small.sh b/workloads/micro/uvm/3DCONV/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..f794eec58ed56174c5d02096a9bf5acc4e948d0f --- /dev/null +++ b/workloads/micro/uvm/3DCONV/run_small.sh @@ -0,0 +1 @@ +./3DConvolution 96 96 96 4 diff --git a/workloads/micro/uvm/3DCONV/run_super.sh b/workloads/micro/uvm/3DCONV/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..11f8b43d3e406c466b824420d34991a5c0f876b4 --- /dev/null +++ b/workloads/micro/uvm/3DCONV/run_super.sh @@ -0,0 +1 @@ +./3DConvolution 768 768 768 8 diff --git a/workloads/micro/uvm/3DCONV/run_tiny.sh b/workloads/micro/uvm/3DCONV/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..791c84bc54088ea65fc7612ee94442bbbc615cc3 --- /dev/null +++ b/workloads/micro/uvm/3DCONV/run_tiny.sh @@ -0,0 +1 @@ +./3DConvolution 48 48 48 2 diff --git a/workloads/micro/uvm/gemm/Makefile b/workloads/micro/uvm/gemm/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d545b0aab3ef63260888227abd6ab99bcaddbff3 --- /dev/null +++ b/workloads/micro/uvm/gemm/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := gemm +CUFILES := gemm.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o gemm diff --git a/workloads/micro/uvm/gemm/gemm.cu b/workloads/micro/uvm/gemm/gemm.cu new file mode 100644 index 0000000000000000000000000000000000000000..9bf58a82aa996a9a02cc532b0494f6c044c5be2d --- /dev/null +++ b/workloads/micro/uvm/gemm/gemm.cu @@ -0,0 +1,289 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +uint64_t NI; +uint64_t NJ; +uint64_t NK; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK_X 32 +#define DIM_THREAD_BLOCK_Y 32 + + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void gemm(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i,j,k; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i*NJ + j] *= BETA; + for (k = 0; k < NK; ++k) { + C[i*NJ + j] += ALPHA * A[i*NK + k] * B[k*NJ + j]; + } + } + } +} + +void initGPU(DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NK; j++) { + A_gpu[i * NK + j] = ((DATA_TYPE)i * j) / NI; + } + } + + + for (i = 0; i < NK; i++) { + for (j = 0; j < NJ; j++) { + B_gpu[i * NJ + j] = ((DATA_TYPE)i * j + 1) / NJ; + } + } + + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C_gpu[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + } + } + +} + +void initCPU(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NK; j++) { + A[i * NK + j] = ((DATA_TYPE)i * j) / NI; + } + } + + + for (i = 0; i < NK; i++) { + for (j = 0; j < NJ; j++) { + B[i * NJ + j] = ((DATA_TYPE)i * j + 1) / NJ; + } + } + + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + } + } + +} + + +void compareResults(DATA_TYPE* C, DATA_TYPE* C_outputFromGpu) +{ + uint64_t i, j, fail; + fail = 0; + + // Compare C1 and C2 + for (i=0; i < NI; i++) + { + for (j=0; j < NJ; j++) + { + // printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + if (percentDiff(C[i*NJ + j], C_outputFromGpu[i*NJ + j]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + fail++; + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void gemm_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, uint64_t NI, uint64_t NK, uint64_t NJ) +{ + // Compute each thread's global row and column index + uint64_t row = blockIdx.y * blockDim.y + threadIdx.y; + uint64_t col = blockIdx.x * blockDim.x + threadIdx.x; + + // Statically allocated shared memory + __shared__ DATA_TYPE s_a[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + __shared__ DATA_TYPE s_b[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + // Accumulate in temporary variable + + DATA_TYPE tmp = BETA * c[row * NJ + col]; + + // Sweep tile across matrix + for (uint64_t i = 0; i < NK; i += blockDim.x) + { + s_a[threadIdx.y * blockDim.x + threadIdx.x] = a[row * NK + i + threadIdx.x]; + s_b[threadIdx.y * blockDim.x + threadIdx.x] = b[(i + threadIdx.y) * NJ + col]; + + // Wait for both tiles to be loaded in before doing computation + __syncthreads(); + + // Do matrix multiplication on the small matrix + for (uint64_t k = 0; k < blockDim.x; k++) + { + tmp += ALPHA * s_a[threadIdx.y * blockDim.x + k] * s_b[k * blockDim.x + threadIdx.x]; + } + + // Wait for all threads to finish using current tiles before loading in new ones + __syncthreads(); + } + + c[row * NJ + col] = tmp; +} + +void gemmCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + dim3 grid((size_t)(ceil( ((float)NI)/ ((float)block.x) )),(size_t)(ceil( ((float)NJ)/ ((float)block.y) ))); + + //t_start = rtclock(); + gemm_kernel<<< grid, block >>>(A_gpu, B_gpu, C_gpu, NI, NK, NJ); + cudaDeviceSynchronize(); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + if (argc >= 4) { + NI = atoll(argv[1]); + NK = atoll(argv[2]); + NJ = atoll(argv[3]); + } else { + NI = SIZE; + NK = SIZE; + NJ = SIZE; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE* C; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + DATA_TYPE *C_gpu; + + A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE)); + C = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + + initCPU(A,B,C); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + // initGPU(A_gpu, B_gpu, C_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * NK * sizeof(DATA_TYPE)); + memcpy(B_gpu, B, NK * NJ * sizeof(DATA_TYPE)); + memcpy(C_gpu, C, NI * NJ * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + gemmCuda(A, B, C, A_gpu, B_gpu, C_gpu); + memcpy(C, C_gpu, NI * NJ * sizeof(DATA_TYPE)); + + //t_start = rtclock(); + //gemm(A, B, C); // needed to keep benchmark accurate + //t_end = rtclock(); + //fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + //compareResults(C_gpu, C); + + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); + endCPU(); + finiTrace(); + + free(A); + free(B); + free(C); + return 0; +} + diff --git a/workloads/micro/uvm/gemm/run.sh b/workloads/micro/uvm/gemm/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..d234537561c276f291434f42a7337bc5d5a61605 --- /dev/null +++ b/workloads/micro/uvm/gemm/run.sh @@ -0,0 +1,3 @@ +#./gemm 1024 1024 1024 +./gemm 32768 32768 32768 +#./gemm 512 512 512 diff --git a/workloads/micro/uvm/gemm/run_large.sh b/workloads/micro/uvm/gemm/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..cc1dbe192ecafb7558b76cfa4d2ce05a15067999 --- /dev/null +++ b/workloads/micro/uvm/gemm/run_large.sh @@ -0,0 +1 @@ +./gemm 8192 8192 8192 diff --git a/workloads/micro/uvm/gemm/run_medium.sh b/workloads/micro/uvm/gemm/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..ebae2815e56031234ccdef0d98cb774f111e82a2 --- /dev/null +++ b/workloads/micro/uvm/gemm/run_medium.sh @@ -0,0 +1 @@ +./gemm 4096 4096 4096 diff --git a/workloads/micro/uvm/gemm/run_mega.sh b/workloads/micro/uvm/gemm/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..7a7e74e163fa0d0ad774327163771ce8eb23bd79 --- /dev/null +++ b/workloads/micro/uvm/gemm/run_mega.sh @@ -0,0 +1 @@ +./gemm 32768 32768 32768 diff --git a/workloads/micro/uvm/gemm/run_small.sh b/workloads/micro/uvm/gemm/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..df5896fd490618e0f8c5608097518b3837b9ad33 --- /dev/null +++ b/workloads/micro/uvm/gemm/run_small.sh @@ -0,0 +1 @@ +./gemm 1024 1024 1024 diff --git a/workloads/micro/uvm/gemm/run_super.sh b/workloads/micro/uvm/gemm/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..9736fc5d97c8ef9b55a3157882e05979f65a5011 --- /dev/null +++ b/workloads/micro/uvm/gemm/run_super.sh @@ -0,0 +1 @@ +./gemm 16384 16384 16384 diff --git a/workloads/micro/uvm/gemm/run_tiny.sh b/workloads/micro/uvm/gemm/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..9858f12d61d44c0f6265f7c22c27fab446500f55 --- /dev/null +++ b/workloads/micro/uvm/gemm/run_tiny.sh @@ -0,0 +1 @@ +./gemm 512 512 512 diff --git a/workloads/micro/uvm/gemm_perf/Makefile b/workloads/micro/uvm/gemm_perf/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d545b0aab3ef63260888227abd6ab99bcaddbff3 --- /dev/null +++ b/workloads/micro/uvm/gemm_perf/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := gemm +CUFILES := gemm.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o gemm diff --git a/workloads/micro/uvm/gemm_perf/gemm b/workloads/micro/uvm/gemm_perf/gemm new file mode 100755 index 0000000000000000000000000000000000000000..3dfd2807a3e2d149eb0aa688131a826092d6d492 Binary files /dev/null and b/workloads/micro/uvm/gemm_perf/gemm differ diff --git a/workloads/micro/uvm/gemm_perf/gemm.cu b/workloads/micro/uvm/gemm_perf/gemm.cu new file mode 100644 index 0000000000000000000000000000000000000000..90aaba4c2f350b3d8f7b974b8407c1049d604beb --- /dev/null +++ b/workloads/micro/uvm/gemm_perf/gemm.cu @@ -0,0 +1,289 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +uint64_t NI; +uint64_t NJ; +uint64_t NK; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK_X 32 +#define DIM_THREAD_BLOCK_Y 32 + + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void gemm(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i,j,k; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i*NJ + j] *= BETA; + for (k = 0; k < NK; ++k) { + C[i*NJ + j] += ALPHA * A[i*NK + k] * B[k*NJ + j]; + } + } + } +} + +void initGPU(DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NK; j++) { + A_gpu[i * NK + j] = ((DATA_TYPE)i * j) / NI; + } + } + + + for (i = 0; i < NK; i++) { + for (j = 0; j < NJ; j++) { + B_gpu[i * NJ + j] = ((DATA_TYPE)i * j + 1) / NJ; + } + } + + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C_gpu[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + } + } + +} + +void initCPU(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NK; j++) { + A[i * NK + j] = ((DATA_TYPE)i * j) / NI; + } + } + + + for (i = 0; i < NK; i++) { + for (j = 0; j < NJ; j++) { + B[i * NJ + j] = ((DATA_TYPE)i * j + 1) / NJ; + } + } + + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + } + } + +} + + +void compareResults(DATA_TYPE* C, DATA_TYPE* C_outputFromGpu) +{ + uint64_t i, j, fail; + fail = 0; + + // Compare C1 and C2 + for (i=0; i < NI; i++) + { + for (j=0; j < NJ; j++) + { + // printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + if (percentDiff(C[i*NJ + j], C_outputFromGpu[i*NJ + j]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + fail++; + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void gemm_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, uint64_t NI, uint64_t NK, uint64_t NJ) +{ + // Compute each thread's global row and column index + uint64_t row = blockIdx.y * blockDim.y + threadIdx.y; + uint64_t col = blockIdx.x * blockDim.x + threadIdx.x; + + // Statically allocated shared memory + __shared__ DATA_TYPE s_a[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + __shared__ DATA_TYPE s_b[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + // Accumulate in temporary variable + + DATA_TYPE tmp = BETA * c[row * NJ + col]; + + // Sweep tile across matrix + for (uint64_t i = 0; i < NK; i += blockDim.x) + { + s_a[threadIdx.y * blockDim.x + threadIdx.x] = a[row * NK + i + threadIdx.x]; + s_b[threadIdx.y * blockDim.x + threadIdx.x] = b[(i + threadIdx.y) * NJ + col]; + + // Wait for both tiles to be loaded in before doing computation + __syncthreads(); + + // Do matrix multiplication on the small matrix + for (uint64_t k = 0; k < blockDim.x; k++) + { + tmp += ALPHA * s_a[threadIdx.y * blockDim.x + k] * s_b[k * blockDim.x + threadIdx.x]; + } + + // Wait for all threads to finish using current tiles before loading in new ones + __syncthreads(); + } + + c[row * NJ + col] = tmp; +} + +void gemmCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + dim3 grid((size_t)(ceil( ((float)NI)/ ((float)block.x) )),(size_t)(ceil( ((float)NJ)/ ((float)block.y) ))); + + //t_start = rtclock(); + gemm_kernel<<< grid, block >>>(A_gpu, B_gpu, C_gpu, NI, NK, NJ); + cudaDeviceSynchronize(); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + if (argc >= 4) { + NI = atoll(argv[1]); + NK = atoll(argv[2]); + NJ = atoll(argv[3]); + } else { + NI = SIZE; + NK = SIZE; + NJ = SIZE; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE* C; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + DATA_TYPE *C_gpu; + + A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE)); + C = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + + initCPU(A,B,C); + GPU_argv_init(); + + //initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + // initGPU(A_gpu, B_gpu, C_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * NK * sizeof(DATA_TYPE)); + memcpy(B_gpu, B, NK * NJ * sizeof(DATA_TYPE)); + memcpy(C_gpu, C, NI * NJ * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + gemmCuda(A, B, C, A_gpu, B_gpu, C_gpu); + memcpy(C, C_gpu, NI * NJ * sizeof(DATA_TYPE)); + + //t_start = rtclock(); + //gemm(A, B, C); // needed to keep benchmark accurate + //t_end = rtclock(); + //fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + //compareResults(C_gpu, C); + + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); + endCPU(); + //finiTrace(); + + free(A); + free(B); + free(C); + return 0; +} + diff --git a/workloads/micro/uvm/gemm_perf/run.sh b/workloads/micro/uvm/gemm_perf/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..d234537561c276f291434f42a7337bc5d5a61605 --- /dev/null +++ b/workloads/micro/uvm/gemm_perf/run.sh @@ -0,0 +1,3 @@ +#./gemm 1024 1024 1024 +./gemm 32768 32768 32768 +#./gemm 512 512 512 diff --git a/workloads/micro/uvm/gemm_perf/run_large.sh b/workloads/micro/uvm/gemm_perf/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..cc1dbe192ecafb7558b76cfa4d2ce05a15067999 --- /dev/null +++ b/workloads/micro/uvm/gemm_perf/run_large.sh @@ -0,0 +1 @@ +./gemm 8192 8192 8192 diff --git a/workloads/micro/uvm/gemm_perf/run_medium.sh b/workloads/micro/uvm/gemm_perf/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..ebae2815e56031234ccdef0d98cb774f111e82a2 --- /dev/null +++ b/workloads/micro/uvm/gemm_perf/run_medium.sh @@ -0,0 +1 @@ +./gemm 4096 4096 4096 diff --git a/workloads/micro/uvm/gemm_perf/run_mega.sh b/workloads/micro/uvm/gemm_perf/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..7a7e74e163fa0d0ad774327163771ce8eb23bd79 --- /dev/null +++ b/workloads/micro/uvm/gemm_perf/run_mega.sh @@ -0,0 +1 @@ +./gemm 32768 32768 32768 diff --git a/workloads/micro/uvm/gemm_perf/run_small.sh b/workloads/micro/uvm/gemm_perf/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..df5896fd490618e0f8c5608097518b3837b9ad33 --- /dev/null +++ b/workloads/micro/uvm/gemm_perf/run_small.sh @@ -0,0 +1 @@ +./gemm 1024 1024 1024 diff --git a/workloads/micro/uvm/gemm_perf/run_super.sh b/workloads/micro/uvm/gemm_perf/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..9736fc5d97c8ef9b55a3157882e05979f65a5011 --- /dev/null +++ b/workloads/micro/uvm/gemm_perf/run_super.sh @@ -0,0 +1 @@ +./gemm 16384 16384 16384 diff --git a/workloads/micro/uvm/gemm_perf/run_tiny.sh b/workloads/micro/uvm/gemm_perf/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..9858f12d61d44c0f6265f7c22c27fab446500f55 --- /dev/null +++ b/workloads/micro/uvm/gemm_perf/run_tiny.sh @@ -0,0 +1 @@ +./gemm 512 512 512 diff --git a/workloads/micro/uvm/gemv/Makefile b/workloads/micro/uvm/gemv/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..005563000dad0469dbf388d54a639e926cf5aa85 --- /dev/null +++ b/workloads/micro/uvm/gemv/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := gemv +CUFILES := gemv.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o gemv diff --git a/workloads/micro/uvm/gemv/gemv.cu b/workloads/micro/uvm/gemv/gemv.cu new file mode 100644 index 0000000000000000000000000000000000000000..680800c0d9c54758ec655402ff2bc13dfeb80e2c --- /dev/null +++ b/workloads/micro/uvm/gemv/gemv.cu @@ -0,0 +1,272 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +// define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 40960 +uint64_t NI; +uint64_t NJ; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 256 + +#define BATCH_SIZE 16 + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void gemv(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) + { + C[i] *= BETA; + for (j = 0; j < NJ; j++) + { + C[i] += ALPHA * A[i * NJ + j] * B[j]; + } + } +} + +void initGPU(DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) + { + for (j = 0; j < NJ; j++) + { + A_gpu[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } + + for (j = 0; j < NJ; j++) + { + B_gpu[j] = ((DATA_TYPE)j + 1) / NJ; + } + + for (i = 0; i < NI; i++) + { + C_gpu[i] = ((DATA_TYPE)i + 2) / NI; + } +} + +void initCPU(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) + { + for (j = 0; j < NJ; j++) + { + A[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } + + for (j = 0; j < NJ; j++) + { + B[j] = ((DATA_TYPE)j + 1) / NJ; + } + + for (i = 0; i < NI; i++) + { + C[i] = ((DATA_TYPE)i + 2) / NI; + } +} + +void compareResults(DATA_TYPE *C, DATA_TYPE *C_outputFromGpu) +{ + uint64_t i, fail; + fail = 0; + + // Compare C1 and C2 + for (i = 0; i < NI; i++) + { + if (percentDiff(C[i], C_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + fail++; + printf("%d, GPU is %f, CPU is %f.\n", i, C[i], C_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void gemv_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, uint64_t NI, uint64_t NJ) +{ + uint64_t row = blockIdx.x * blockDim.x + threadIdx.x; + uint64_t tx = threadIdx.x; + + __shared__ DATA_TYPE s_b[DIM_THREAD_BLOCK][BATCH_SIZE]; + + DATA_TYPE tmp = BETA * c[row]; + __syncthreads(); + + uint64_t tile = 0; + uint64_t end_tile = NJ / BATCH_SIZE; + + for (; tile < end_tile; tile += 1) + { + uint64_t base_index = tile * BATCH_SIZE; + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + s_b[tx][k] = b[base_index + k]; + } + __syncthreads(); + + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + tmp += ALPHA * a[row * NJ + base_index + k] * s_b[tx][k]; + } + __syncthreads(); + } + c[row] = tmp; +} + +void gemvCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / (DIM_THREAD_BLOCK)); + + // t_start = rtclock(); + gemv_kernel<<>>(A_gpu, B_gpu, C_gpu, NI, NJ); + cudaDeviceSynchronize(); + // t_end = rtclock(); + + // fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + if (argc >= 3) + { + NI = atoll(argv[1]); + NJ = atoll(argv[2]); + } + else + { + NI = SIZE; + NJ = SIZE; + } + + double t_start, t_end; + + DATA_TYPE *A; + DATA_TYPE *B; + DATA_TYPE *C; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + DATA_TYPE *C_gpu; + + A = (DATA_TYPE *)malloc(NI * NJ * sizeof(DATA_TYPE)); + B = (DATA_TYPE *)malloc(NJ * sizeof(DATA_TYPE)); + C = (DATA_TYPE *)malloc(NI * sizeof(DATA_TYPE)); + + initCPU(A, B, C); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NJ); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NJ); + cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI); + + // initGPU(A_gpu, B_gpu, C_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * NJ * sizeof(DATA_TYPE)); + memcpy(B_gpu, B, NJ * sizeof(DATA_TYPE)); + memcpy(C_gpu, C, NI * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + gemvCuda(A, B, C, A_gpu, B_gpu, C_gpu); + memcpy(C, C_gpu, NI * sizeof(DATA_TYPE)); + + // t_start = rtclock(); + // gemv(A, B, C); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(C_gpu, C); + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); + endCPU(); + finiTrace(); + + free(A); + free(B); + free(C); + return 0; +} diff --git a/workloads/micro/uvm/gemv/run.sh b/workloads/micro/uvm/gemv/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..c75d8f69375fd6c923a93ba88a4cb43238844f8b --- /dev/null +++ b/workloads/micro/uvm/gemv/run.sh @@ -0,0 +1,2 @@ +# ./gemv 16384 16384 +./gemv 32768 32768 diff --git a/workloads/micro/uvm/gemv/run_large.sh b/workloads/micro/uvm/gemv/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..102aa73e96b74d0d46ef261f0ffd665639679025 --- /dev/null +++ b/workloads/micro/uvm/gemv/run_large.sh @@ -0,0 +1 @@ +./gemv 8192 8192 diff --git a/workloads/micro/uvm/gemv/run_medium.sh b/workloads/micro/uvm/gemv/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..2cc0f68e4d4e96c36ee0d72e64f7acdb7c97233c --- /dev/null +++ b/workloads/micro/uvm/gemv/run_medium.sh @@ -0,0 +1 @@ +./gemv 4096 4096 diff --git a/workloads/micro/uvm/gemv/run_mega.sh b/workloads/micro/uvm/gemv/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..88ca5ba9468efc68d859100ccbd246b9b3af960b --- /dev/null +++ b/workloads/micro/uvm/gemv/run_mega.sh @@ -0,0 +1 @@ +./gemv 65536 65536 diff --git a/workloads/micro/uvm/gemv/run_small.sh b/workloads/micro/uvm/gemv/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..55646a647b7f53aff91c5e38562bc449d75daa9a --- /dev/null +++ b/workloads/micro/uvm/gemv/run_small.sh @@ -0,0 +1 @@ +./gemv 1024 1024 diff --git a/workloads/micro/uvm/gemv/run_super.sh b/workloads/micro/uvm/gemv/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..295d223c3c9c768e58c0fba722fa859b74564d2d --- /dev/null +++ b/workloads/micro/uvm/gemv/run_super.sh @@ -0,0 +1 @@ +./gemv 32768 32768 diff --git a/workloads/micro/uvm/gemv/run_tiny.sh b/workloads/micro/uvm/gemv/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..60becef20c6cc3113ff1b4897d177ff3cbd77eb8 --- /dev/null +++ b/workloads/micro/uvm/gemv/run_tiny.sh @@ -0,0 +1 @@ +./gemv 512 512 diff --git a/workloads/micro/uvm/saxpy/Makefile b/workloads/micro/uvm/saxpy/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..069a5001c286cb5f44c4686449f04755cd5a2e52 --- /dev/null +++ b/workloads/micro/uvm/saxpy/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := saxpy +CUFILES := saxpy.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o saxpy diff --git a/workloads/micro/uvm/saxpy/run.sh b/workloads/micro/uvm/saxpy/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..61775ede6c26208ae2a150958f8eec5375952773 --- /dev/null +++ b/workloads/micro/uvm/saxpy/run.sh @@ -0,0 +1,11 @@ +# ./vector_seq 17179869184 10 524288 + +./vector_seq 8589934592 10 262144 +#nsys profile --stats=true ./vector_seq 1073741824 10 32768 + + +# ./vector_seq 4294967296 10 131072 + +# ./vector_seq 2147483648 10 32768 +#./vector_seq 1073741824 10 32768 +# ./vector_seq 1048576 100 32768 diff --git a/workloads/micro/uvm/saxpy/run_large.sh b/workloads/micro/uvm/saxpy/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..bce2787645ea40cb94ba004ec19cf4728ba48647 --- /dev/null +++ b/workloads/micro/uvm/saxpy/run_large.sh @@ -0,0 +1 @@ +./saxpy 134217728 100 65536 diff --git a/workloads/micro/uvm/saxpy/run_medium.sh b/workloads/micro/uvm/saxpy/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..af6f429a95aa9d5ede352e30e59405aaee4ee55b --- /dev/null +++ b/workloads/micro/uvm/saxpy/run_medium.sh @@ -0,0 +1 @@ +./saxpy 16777216 100 32768 diff --git a/workloads/micro/uvm/saxpy/run_mega.sh b/workloads/micro/uvm/saxpy/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..5dce5483842d255b4130f7ca89b4894c65b5a44b --- /dev/null +++ b/workloads/micro/uvm/saxpy/run_mega.sh @@ -0,0 +1 @@ +./saxpy 4294967296 100 262144 diff --git a/workloads/micro/uvm/saxpy/run_small.sh b/workloads/micro/uvm/saxpy/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..c927e5e53c30ca58a12dbdf95e264a4b790ab0b8 --- /dev/null +++ b/workloads/micro/uvm/saxpy/run_small.sh @@ -0,0 +1 @@ +./saxpy 2097152 100 32768 diff --git a/workloads/micro/uvm/saxpy/run_super.sh b/workloads/micro/uvm/saxpy/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..7f8d7b99b0be6cc643cd686965806f7edbc0af27 --- /dev/null +++ b/workloads/micro/uvm/saxpy/run_super.sh @@ -0,0 +1 @@ +./saxpy 1073741824 100 65536 diff --git a/workloads/micro/uvm/saxpy/run_tiny.sh b/workloads/micro/uvm/saxpy/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..4604f1747023f34e81c15ce89c4be5a6fb39bc95 --- /dev/null +++ b/workloads/micro/uvm/saxpy/run_tiny.sh @@ -0,0 +1 @@ +./saxpy 262144 100 8192 diff --git a/workloads/micro/uvm/saxpy/saxpy.cu b/workloads/micro/uvm/saxpy/saxpy.cu new file mode 100644 index 0000000000000000000000000000000000000000..b2e461c459e6ffc5ee69f71a820f8e81cc107b10 --- /dev/null +++ b/workloads/micro/uvm/saxpy/saxpy.cu @@ -0,0 +1,256 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096000 +#define ITER 100 +uint64_t NI; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 256 + +#define BATCH_SIZE 8 + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void saxpy(DATA_TYPE *A, DATA_TYPE *B, uint64_t iterations) +{ + for (uint64_t i = 0; i < NI; i++) { + for (uint64_t iter = 0; iter < iterations; iter++) { + B[i] = ALPHA * A[i] + B[i]; + } + } +} + +void initGPU(DATA_TYPE *A_gpu, DATA_TYPE *B_gpu) +{ + for (uint64_t i = 0; i < NI; i++) { + A_gpu[i] = ((DATA_TYPE)i) / NI; + } + for (uint64_t i = 0; i < NI; i++) { + B_gpu[i] = ((DATA_TYPE)i + 2) / NI; + } + +} + +void initCPU(DATA_TYPE *A, DATA_TYPE *B) +{ + for (uint64_t i = 0; i < NI; i++) { + A[i] = ((DATA_TYPE)i) / NI; + } + for (uint64_t i = 0; i < NI; i++) { + B[i] = ((DATA_TYPE)i + 2) / NI; + } +} + +void compareResults(DATA_TYPE* B, DATA_TYPE* B_outputFromGpu) +{ + uint64_t fail = 0; + + // Compare C1 and C2 + for (uint64_t i = 0; i < NI; i++) { + // printf("%lld, GPU is %f, CPU is %f.\n", i, B[i], B_outputFromGpu[i]); + if (percentDiff(B[i], B_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) { + fail++; + printf("%lld, GPU is %f, CPU is %f.\n", i, B[i], B_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void saxpy_kernel(DATA_TYPE *a, DATA_TYPE *b, uint64_t NI, uint64_t iterations, uint64_t block_size) +{ + // Compute each thread's global row and column index + const uint64_t mem_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + __shared__ DATA_TYPE tmp_a[mem_size]; + __shared__ DATA_TYPE tmp_b[mem_size]; + + uint64_t total_tiles = NI / mem_size; + uint64_t base_tiles = total_tiles / gridDim.x; + + uint64_t tiles_this_block = block_size / mem_size; + + uint64_t tile = base_tiles * blockIdx.x; + uint64_t end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + tmp_a[i] = a[tile * mem_size + i]; + tmp_b[i] = b[tile * mem_size + i]; + } + + __syncthreads(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + for (uint64_t iter = 0; iter < iterations; iter++) + { + tmp_b[i] += ALPHA * tmp_a[i]; + } + } + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + b[tile * mem_size + i] = tmp_b[i]; + } + } +} + +void saxpyCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, uint64_t iterations, uint64_t block_size) +{ + double t_start, t_end; + if (block_size <= DIM_THREAD_BLOCK) + block_size = DIM_THREAD_BLOCK; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / block_size); + + //t_start = rtclock(); + saxpy_kernel<<>>(A_gpu, B_gpu, NI, iterations, block_size); + cudaDeviceSynchronize(); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + uint64_t iterations = ITER; + uint64_t block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + if (argc >= 4) { + NI = atoll(argv[1]); + iterations = atoi(argv[2]); + block_size = atoi(argv[3]); + } else { + NI = SIZE; + iterations = ITER; + block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + } + + int nblocks = NI / block_size; + if (nblocks > 64) + { + nblocks = 64; + block_size = NI / nblocks; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + + A = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + + initCPU(A,B); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NI); + + // initGPU(A_gpu, B_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * sizeof(DATA_TYPE)); + memcpy(B_gpu, B, NI * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + saxpyCuda(A, B, A_gpu, B_gpu, iterations, block_size); + memcpy(B, B_gpu, NI * sizeof(DATA_TYPE)); + + //t_start = rtclock(); + //saxpy(A, B, iterations); + //t_end = rtclock(); + //fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + //compareResults(B_gpu, B); + cudaFree(A_gpu); + cudaFree(B_gpu); + endCPU(); + finiTrace(); + + free(A); + free(B); + return 0; +} diff --git a/workloads/micro/uvm/vector_rand/Makefile b/workloads/micro/uvm/vector_rand/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..b2abd68d5e6d513a5652b845d6d822b15fc12a59 --- /dev/null +++ b/workloads/micro/uvm/vector_rand/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := vector_rand +CUFILES := vector_rand.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o vector_rand diff --git a/workloads/micro/uvm/vector_rand/run.sh b/workloads/micro/uvm/vector_rand/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..61775ede6c26208ae2a150958f8eec5375952773 --- /dev/null +++ b/workloads/micro/uvm/vector_rand/run.sh @@ -0,0 +1,11 @@ +# ./vector_seq 17179869184 10 524288 + +./vector_seq 8589934592 10 262144 +#nsys profile --stats=true ./vector_seq 1073741824 10 32768 + + +# ./vector_seq 4294967296 10 131072 + +# ./vector_seq 2147483648 10 32768 +#./vector_seq 1073741824 10 32768 +# ./vector_seq 1048576 100 32768 diff --git a/workloads/micro/uvm/vector_rand/run_large.sh b/workloads/micro/uvm/vector_rand/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..0fbcd5c0eb481d41efff934e3f19f162bc1f73e2 --- /dev/null +++ b/workloads/micro/uvm/vector_rand/run_large.sh @@ -0,0 +1 @@ +./vector_rand 134217728 100 65536 diff --git a/workloads/micro/uvm/vector_rand/run_medium.sh b/workloads/micro/uvm/vector_rand/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..84e868f85fbc74d101d476d501095afd4aa6d017 --- /dev/null +++ b/workloads/micro/uvm/vector_rand/run_medium.sh @@ -0,0 +1 @@ +./vector_rand 16777216 100 32768 diff --git a/workloads/micro/uvm/vector_rand/run_mega.sh b/workloads/micro/uvm/vector_rand/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..24b084a7613986acafd45cf6ca300fe52f0426d5 --- /dev/null +++ b/workloads/micro/uvm/vector_rand/run_mega.sh @@ -0,0 +1 @@ +./vector_rand 8589934592 100 262144 diff --git a/workloads/micro/uvm/vector_rand/run_small.sh b/workloads/micro/uvm/vector_rand/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..baf1b589adcc83b946051cf0fc2383b45746c62f --- /dev/null +++ b/workloads/micro/uvm/vector_rand/run_small.sh @@ -0,0 +1 @@ +./vector_rand 2097152 100 32768 diff --git a/workloads/micro/uvm/vector_rand/run_super.sh b/workloads/micro/uvm/vector_rand/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..19be53cddf75e6d6c5812e3ec337cce2fd9079ae --- /dev/null +++ b/workloads/micro/uvm/vector_rand/run_super.sh @@ -0,0 +1 @@ +./vector_rand 1073741824 100 65536 diff --git a/workloads/micro/uvm/vector_rand/run_tiny.sh b/workloads/micro/uvm/vector_rand/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..55ee72259022f385aabf8eedc426425e6817835f --- /dev/null +++ b/workloads/micro/uvm/vector_rand/run_tiny.sh @@ -0,0 +1 @@ +./vector_rand 262144 100 8192 diff --git a/workloads/micro/uvm/vector_rand/vector_rand.cu b/workloads/micro/uvm/vector_rand/vector_rand.cu new file mode 100644 index 0000000000000000000000000000000000000000..6273b67221c086a62980a252e8c3b8a38a8e4b66 --- /dev/null +++ b/workloads/micro/uvm/vector_rand/vector_rand.cu @@ -0,0 +1,250 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.005 + +/* Problem size */ +#define SIZE 4096000 +#define ITER 100 +uint64_t NI; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 256 + +#define BATCH_SIZE 16 + +#define LCG_A 1.1f +#define LCG_B 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void saxpy(DATA_TYPE *A, uint64_t iterations) +{ + for (uint64_t i = 0; i < NI; i++) { + for (uint64_t iter = 0; iter < iterations; iter++) { + A[i] = LCG_A * A[i] + LCG_B; + } + } +} + +void initGPU(DATA_TYPE *A_gpu) +{ + for (uint64_t i = 0; i < NI; i++) + { + A_gpu[i] = ((DATA_TYPE)i) / NI; + } + +} + +void initCPU(DATA_TYPE *A) +{ + for (uint64_t i = 0; i < NI; i++) + { + A[i] = ((DATA_TYPE)i) / NI; + } + +} + +void compareResults(DATA_TYPE* A, DATA_TYPE* A_outputFromGpu) +{ + uint64_t fail = 0; + + // Compare C1 and C2 + for (uint64_t i = 0; i < NI; i++) { + // printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + if (percentDiff(A[i], A_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) { + fail++; + printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void vector_rand_kernel(DATA_TYPE *a, uint64_t NI, uint64_t iterations, uint64_t block_size, size_t seed) +{ + // Compute each thread's global row and column index + const uint64_t mem_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + __shared__ DATA_TYPE tmp[mem_size]; + + curandState_t randState; + size_t tx = blockIdx.x * blockDim.x + threadIdx.x; + curand_init(seed, tx, 0, &randState); + size_t idx = 0; + + uint64_t total_tiles = NI / mem_size; + uint64_t base_tiles = total_tiles / gridDim.x; + + uint64_t tiles_this_block = block_size / mem_size; + + uint64_t tile = base_tiles * blockIdx.x; + uint64_t end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + // tmp[i] = a[tile * mem_size + i]; + + idx = curand(&randState); + idx <<= 32; + idx |= curand(&randState); + tmp[i] = a[tile * mem_size + idx % mem_size]; + } + + __syncthreads(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + for (uint64_t iter = 0; iter < iterations; iter++) + { + tmp[i] = LCG_A * tmp[i] + LCG_B; + } + } + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + a[tile * mem_size + idx % mem_size] = tmp[i]; + } + } +} + +void saxpyCuda(DATA_TYPE *A, DATA_TYPE *A_gpu, uint64_t iterations, uint64_t block_size) +{ + double t_start, t_end; + if (block_size <= DIM_THREAD_BLOCK) + block_size = DIM_THREAD_BLOCK; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / block_size); + + //t_start = rtclock(); + vector_rand_kernel<<>>(A_gpu, NI, iterations, block_size, 832945); + cudaDeviceSynchronize(); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + uint64_t iterations = ITER; + uint64_t block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + if (argc >= 4) { + NI = atoll(argv[1]); + iterations = atoi(argv[2]); + block_size = atoi(argv[3]); + } + else { + NI = SIZE; + iterations = ITER; + block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + } + + int nblocks = NI / block_size; + if (nblocks > 64) + { + nblocks = 64; + block_size = NI / nblocks; + } + + double t_start, t_end; + + DATA_TYPE *A; + DATA_TYPE *A_gpu; + + A = (DATA_TYPE *)malloc(NI * sizeof(DATA_TYPE)); + initCPU(A); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI); + + // initGPU(A_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + saxpyCuda(A, A_gpu, iterations, block_size); + memcpy(A, A_gpu, NI * sizeof(DATA_TYPE)); + + // t_start = rtclock(); + //saxpy(A, iterations); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + //compareResults(A_gpu, A); + cudaFree(A_gpu); + endCPU(); + finiTrace(); + + free(A); + return 0; +} diff --git a/workloads/micro/uvm/vector_seq/Makefile b/workloads/micro/uvm/vector_seq/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..2d9857cedea5755ad6a381996c578f9cbd41424a --- /dev/null +++ b/workloads/micro/uvm/vector_seq/Makefile @@ -0,0 +1,109 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := vector_seq +CUFILES := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + + +EXECUTABLE_4096_256 := vector_seq_4096_256 +CUFILES_4096_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=4096 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_2048_256 := vector_seq_2048_256 +CUFILES_2048_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=2048 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_1024_256 := vector_seq_1024_256 +CUFILES_1024_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=1024 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_512_256 := vector_seq_512_256 +CUFILES_512_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=512 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_256_256 := vector_seq_256_256 +CUFILES_256_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=256 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_128_256 := vector_seq_128_256 +CUFILES_128_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=128 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_64_256 := vector_seq_64_256 +CUFILES_64_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_32_256 := vector_seq_32_256 +CUFILES_32_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=32 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_16_256 := vector_seq_16_256 +CUFILES_16_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=16 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + + +EXECUTABLE_1024_4 := vector_seq_1024_4 +CUFILES_1024_4 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=1024 -DBATCH_SIZE=4 + +EXECUTABLE_512_8 := vector_seq_512_8 +CUFILES_512_8 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=512 -DBATCH_SIZE=8 + +EXECUTABLE_256_16 := vector_seq_256_16 +CUFILES_256_16 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_128_32 := vector_seq_128_32 +CUFILES_128_32 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=32 + +EXECUTABLE_64_64 := vector_seq_64_64 +CUFILES_64_64 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=64 -DBATCH_SIZE=64 + +EXECUTABLE_32_128 := vector_seq_32_128 +CUFILES_32_128 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=32 -DBATCH_SIZE=128 + + +EXECUTABLE_2 := vector_seq_2 +CUFILES_2:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=2 + +EXECUTABLE_4 := vector_seq_4 +CUFILES_4:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=4 + +EXECUTABLE_8 := vector_seq_8 +CUFILES_8:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=8 + +EXECUTABLE_16 := vector_seq_16 +CUFILES_16:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=16 + +EXECUTABLE_32 := vector_seq_32 +CUFILES_32:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=32 + +EXECUTABLE_64 := vector_seq_64 +CUFILES_64:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=64 + +EXECUTABLE_128 := vector_seq_128 +CUFILES_128:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=128 + + + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} + + $(NVCC) ${NVCCCFLAGS} ${CUFILES_4096_256} ${DEF} -o ${EXECUTABLE_4096_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_2048_256} ${DEF} -o ${EXECUTABLE_2048_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_1024_256} ${DEF} -o ${EXECUTABLE_1024_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_512_256} ${DEF} -o ${EXECUTABLE_512_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_256_256} ${DEF} -o ${EXECUTABLE_256_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_128_256} ${DEF} -o ${EXECUTABLE_128_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_64_256} ${DEF} -o ${EXECUTABLE_64_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_32_256} ${DEF} -o ${EXECUTABLE_32_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_16_256} ${DEF} -o ${EXECUTABLE_16_256} + + $(NVCC) ${NVCCCFLAGS} ${CUFILES_1024_4} ${DEF} -o ${EXECUTABLE_1024_4} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_512_8} ${DEF} -o ${EXECUTABLE_512_8} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_256_16} ${DEF} -o ${EXECUTABLE_256_16} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_128_32} ${DEF} -o ${EXECUTABLE_128_32} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_64_64} ${DEF} -o ${EXECUTABLE_64_64} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_32_128} ${DEF} -o ${EXECUTABLE_32_128} + + $(NVCC) ${NVCCCFLAGS} ${CUFILES_2} ${DEF} -o ${EXECUTABLE_2} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_4} ${DEF} -o ${EXECUTABLE_4} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_8} ${DEF} -o ${EXECUTABLE_8} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_16} ${DEF} -o ${EXECUTABLE_16} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_32} ${DEF} -o ${EXECUTABLE_32} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_64} ${DEF} -o ${EXECUTABLE_64} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_128} ${DEF} -o ${EXECUTABLE_128} + +clean: + rm -f *.o vector_seq vector_seq_* diff --git a/workloads/micro/uvm/vector_seq/run.sh b/workloads/micro/uvm/vector_seq/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..61775ede6c26208ae2a150958f8eec5375952773 --- /dev/null +++ b/workloads/micro/uvm/vector_seq/run.sh @@ -0,0 +1,11 @@ +# ./vector_seq 17179869184 10 524288 + +./vector_seq 8589934592 10 262144 +#nsys profile --stats=true ./vector_seq 1073741824 10 32768 + + +# ./vector_seq 4294967296 10 131072 + +# ./vector_seq 2147483648 10 32768 +#./vector_seq 1073741824 10 32768 +# ./vector_seq 1048576 100 32768 diff --git a/workloads/micro/uvm/vector_seq/run_large.sh b/workloads/micro/uvm/vector_seq/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..66794fb697b9cd4165ae0e85db50fd512c3467e7 --- /dev/null +++ b/workloads/micro/uvm/vector_seq/run_large.sh @@ -0,0 +1 @@ +./vector_seq 134217728 100 65536 diff --git a/workloads/micro/uvm/vector_seq/run_medium.sh b/workloads/micro/uvm/vector_seq/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..9bbbb986b9351d0e1c82e9d53d86d4d0f83c7492 --- /dev/null +++ b/workloads/micro/uvm/vector_seq/run_medium.sh @@ -0,0 +1 @@ +./vector_seq 16777216 100 32768 diff --git a/workloads/micro/uvm/vector_seq/run_mega.sh b/workloads/micro/uvm/vector_seq/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..7b622d0246ef3a0a542e2422e09099850d52f3d1 --- /dev/null +++ b/workloads/micro/uvm/vector_seq/run_mega.sh @@ -0,0 +1 @@ +./vector_seq 8589934592 100 262144 diff --git a/workloads/micro/uvm/vector_seq/run_small.sh b/workloads/micro/uvm/vector_seq/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..da65ab8dbc61ff2e26a2638703dc97d003cb9dba --- /dev/null +++ b/workloads/micro/uvm/vector_seq/run_small.sh @@ -0,0 +1 @@ +./vector_seq 2097152 100 32768 diff --git a/workloads/micro/uvm/vector_seq/run_super.sh b/workloads/micro/uvm/vector_seq/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..5a98cb48e7b06fa4db1cb7db99d9a0e8ebcf7f46 --- /dev/null +++ b/workloads/micro/uvm/vector_seq/run_super.sh @@ -0,0 +1 @@ +./vector_seq 1073741824 100 65536 diff --git a/workloads/micro/uvm/vector_seq/run_tiny.sh b/workloads/micro/uvm/vector_seq/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..a2f7760fa3fb1a37be61193c663da2e38690bfe3 --- /dev/null +++ b/workloads/micro/uvm/vector_seq/run_tiny.sh @@ -0,0 +1 @@ +./vector_seq 262144 100 8192 diff --git a/workloads/micro/uvm/vector_seq/vector_seq.cu b/workloads/micro/uvm/vector_seq/vector_seq.cu new file mode 100644 index 0000000000000000000000000000000000000000..49c4d19ec99f7cc10cc323386499fb1a79958220 --- /dev/null +++ b/workloads/micro/uvm/vector_seq/vector_seq.cu @@ -0,0 +1,245 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.005 + +/* Problem size */ +#define SIZE 1073741824 +#define ITER 100 +uint64_t NI; + +/* Thread block dimensions */ +#ifndef DIM_THREAD_BLOCK +#define DIM_THREAD_BLOCK 256 +#endif + +#ifndef BATCH_SIZE +#define BATCH_SIZE 16 +#endif + +#ifndef NBLOCKS +#define NBLOCKS 64 +#endif + +#define LCG_A 1.1f +#define LCG_B 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void saxpy(DATA_TYPE *A, uint64_t iterations) +{ + for (uint64_t i = 0; i < NI; i++) { + for (uint64_t iter = 0; iter < iterations; iter++) { + A[i] = LCG_A * A[i] + LCG_B; + } + } +} + +void initCPU(DATA_TYPE *A) +{ + for (uint64_t i = 0; i < NI; i++) { + A[i] = ((DATA_TYPE) i) / NI; + } +} + +void initGPU(DATA_TYPE *A_gpu) +{ + for (uint64_t i = 0; i < NI; i++) { + A_gpu[i] = ((DATA_TYPE)i) / NI; + } +} + + +void compareResults(DATA_TYPE* A, DATA_TYPE* A_outputFromGpu) +{ + uint64_t fail = 0; + + // Compare C1 and C2 + for (uint64_t i = 0; i < NI; i++) { + // printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + if (percentDiff(A[i], A_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) { + fail++; + printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void vector_seq_kernel(DATA_TYPE *a, uint64_t NI, uint64_t iterations, uint64_t block_size) +{ + // Compute each thread's global row and column index + const uint64_t mem_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + // __shared__ DATA_TYPE tmp[mem_size]; + extern __shared__ DATA_TYPE tmp[]; + + uint64_t total_tiles = NI / mem_size; + uint64_t base_tiles = total_tiles / gridDim.x; + + uint64_t tiles_this_block = block_size / mem_size; + + uint64_t tile = base_tiles * blockIdx.x; + uint64_t end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + tmp[i] = a[tile * mem_size + i]; + } + + __syncthreads(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + for (uint64_t iter = 0; iter < iterations; iter++) + { + tmp[i] = LCG_A * tmp[i] + LCG_B; + } + } + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + a[tile * mem_size + i] = tmp[i]; + } + } +} + +void saxpyCuda(DATA_TYPE *A, DATA_TYPE *A_gpu, uint64_t iterations, uint64_t block_size) +{ + double t_start, t_end; + if (block_size <= DIM_THREAD_BLOCK) + block_size = DIM_THREAD_BLOCK; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / block_size); + + int MaxBytesofSharedMemory = DIM_THREAD_BLOCK * BATCH_SIZE * sizeof(DATA_TYPE); + cudaFuncSetAttribute(vector_seq_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, MaxBytesofSharedMemory); + + //t_start = rtclock(); + vector_seq_kernel<<>>(A_gpu, NI, iterations, block_size); + cudaDeviceSynchronize(); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + uint64_t iterations = ITER; + uint64_t block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + if (argc >= 4) { + NI = atoll(argv[1]); + iterations = atoi(argv[2]); + block_size = atoi(argv[3]); + } + else { + NI = SIZE; + iterations = ITER; + block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + } + + int nblocks = NBLOCKS; + block_size = NI / nblocks; + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE *A_gpu; + + A = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + + initCPU(A); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI); + + // initGPU(A_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + saxpyCuda(A, A_gpu, iterations, block_size); + memcpy(A, A_gpu, NI * sizeof(DATA_TYPE)); + + //t_start = rtclock(); + //saxpy(A, iterations); + //t_end = rtclock(); + //fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + //compareResults(A_gpu, A); + cudaFree(A_gpu); + endCPU(); + finiTrace(); + + free(A); + return 0; +} + diff --git a/workloads/micro/uvm_prefetch/2DCONV/2DConvolution.cu b/workloads/micro/uvm_prefetch/2DCONV/2DConvolution.cu new file mode 100644 index 0000000000000000000000000000000000000000..3705f71122ae2efdfe6e2678b7658c508b541ba8 --- /dev/null +++ b/workloads/micro/uvm_prefetch/2DCONV/2DConvolution.cu @@ -0,0 +1,356 @@ +/** + * 2DConvolution.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +#define NBLOCKS 32 +#define BATCH_SIZE 4 + +uint64_t NI; +uint64_t NJ; +uint64_t nblocks; + + +/* Thread block dimensions */ +#define KERNEL 3 +#define DIM_THREAD_BLOCK 8 + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; + +void conv2D(DATA_TYPE* A, DATA_TYPE* B) +{ + uint64_t i, j; + DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + c11 = +0.2; c21 = +0.5; c31 = -0.8; + c12 = -0.3; c22 = +0.6; c32 = -0.9; + c13 = +0.4; c23 = +0.7; c33 = +0.10; + + for (i = 1; i < NI - 1; ++i) // 0 + { + for (j = 1; j < NJ - 1; ++j) // 1 + { + B[i*NJ + j] = c11 * A[(i - 1)*NJ + (j - 1)] + c12 * A[(i + 0)*NJ + (j - 1)] + c13 * A[(i + 1)*NJ + (j - 1)] + + c21 * A[(i - 1)*NJ + (j + 0)] + c22 * A[(i + 0)*NJ + (j + 0)] + c23 * A[(i + 1)*NJ + (j + 0)] + + c31 * A[(i - 1)*NJ + (j + 1)] + c32 * A[(i + 0)*NJ + (j + 1)] + c33 * A[(i + 1)*NJ + (j + 1)]; + } + } +} + +void initGPU(DATA_TYPE* A_gpu) +{ + uint64_t i, j; + + for (i = 0; i < NI; ++i) { + for (j = 0; j < NJ; ++j) { + A_gpu[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } +} + +void initCPU(DATA_TYPE* A) +{ + uint64_t i, j; + + for (i = 0; i < NI; ++i) { + for (j = 0; j < NJ; ++j) { + A[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } +} + + +void compareResults(DATA_TYPE* B, DATA_TYPE* B_outputFromGpu) +{ + uint64_t i, j, fail; + fail = 0; + + // Compare a and b + for (i=1; i < (NI-1); i++) + { + for (j=1; j < (NJ-1); j++) + { + if (percentDiff(B[i*NJ + j], B_outputFromGpu[i*NJ + j]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, CPU is %f, GPU is %f.\n", i, j, B[i * NJ + j], B_outputFromGpu[i * NJ + j]); + fail++; + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); + +} + +__global__ void Convolution2D_kernel(DATA_TYPE *A, DATA_TYPE *B, uint64_t NI, uint64_t NJ, uint64_t block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + uint64_t tile_dim_x = (NJ + DIM_THREAD_BLOCK - 1) / (DIM_THREAD_BLOCK * BATCH_SIZE); + + __shared__ DATA_TYPE tmp_A[DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1]; + __shared__ DATA_TYPE tmp_B[DIM_THREAD_BLOCK * BATCH_SIZE][DIM_THREAD_BLOCK * BATCH_SIZE]; + + uint64_t total_tiles = tile_dim_x * tile_dim_x; + + uint64_t tiles_this_block_x = (block_size / (DIM_THREAD_BLOCK * BATCH_SIZE)); + uint64_t tiles_this_block = tiles_this_block_x * tiles_this_block_x; + + uint64_t base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + uint64_t tile = base_tile; + uint64_t end_tile = tile + tiles_this_block; + + // DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + // c11 = +0.2; c21 = +0.5; c31 = -0.8; + // c12 = -0.3; c22 = +0.6; c32 = -0.9; + // c13 = +0.4; c23 = +0.7; c33 = +0.10; + + DATA_TYPE c[KERNEL][KERNEL]; + + c[0][0] = +0.2; + c[1][0] = +0.5; + c[2][0] = -0.8; + c[0][1] = -0.3; + c[1][1] = +0.6; + c[2][1] = -0.9; + c[0][2] = +0.4; + c[1][2] = +0.7; + c[2][2] = +0.10; + + for (; tile < end_tile; tile += 1) + { + // block id + uint64_t offset = tile - base_tile; + uint64_t block_id = tile / tiles_this_block; + uint64_t bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + uint64_t by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + uint64_t batch_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + // thread id + uint64_t tx = threadIdx.x; + uint64_t ty = threadIdx.y; + + uint64_t index_B_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty + 1; + uint64_t index_B_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx + 1; + + uint64_t index_A_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty; + uint64_t index_A_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx; + + uint64_t index_A_y_start = DIM_THREAD_BLOCK * BATCH_SIZE * by; + uint64_t index_A_x_start = DIM_THREAD_BLOCK * BATCH_SIZE * bx; + + uint64_t index_A_y_bound = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * DIM_THREAD_BLOCK; + uint64_t index_A_x_bound = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * DIM_THREAD_BLOCK; + + // fetch A + for (uint64_t i = 0; i < BATCH_SIZE; i++) { + for (uint64_t j = 0; j < BATCH_SIZE; j++) { + if ((index_A_y + i) < NI && (index_A_x + j) < NJ) { + tmp_A[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] = A[(index_A_y + i) * NJ + index_A_x + j]; + tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] = 0; + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < KERNEL - 1; i++) { + for (uint64_t j = 0; j < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; j++) { + if ((index_A_y_bound + i) < NI && (index_A_x_start + j) < NJ) { + tmp_A[DIM_THREAD_BLOCK * BATCH_SIZE + i][j] = A[(index_A_y_bound + i) * NJ + index_A_x_start + j]; + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; i++) { + for (uint64_t j = 0; j < KERNEL - 1; j++) { + if ((index_A_y_start + i) < NI && (index_A_x_bound + j) < NJ) { + tmp_A[i][DIM_THREAD_BLOCK * BATCH_SIZE + j] = A[(index_A_y_start + i) * NJ + index_A_x_bound + j]; + } + } + } + block.sync(); + + // Computation + for (uint64_t i = 0; i < BATCH_SIZE; i++) { + for (uint64_t j = 0; j < BATCH_SIZE; j++) { + for (uint64_t m = 0; m < KERNEL; m++) { + for (uint64_t n = 0; n < KERNEL; n++) { + tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] += tmp_A[ty * BATCH_SIZE + i + m][tx * BATCH_SIZE + j + n] * c[n][m]; + } + } + } + } + block.sync(); + + // Store B + for (uint64_t i = 0; i < BATCH_SIZE; i++) { + for (uint64_t j = 0; j < BATCH_SIZE; j++) { + if ((index_B_y + i + 1) < NI && (index_B_x + j + 1) < NJ) { + B[(index_B_y + i) * NJ + index_B_x + j] = tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j]; + } + } + } + block.sync(); + } +} + +void convolution2DCuda(DATA_TYPE *A_gpu, DATA_TYPE *B_gpu) +{ + double t_start, t_end; + + uint64_t output_width = NI - KERNEL + 1; + uint64_t output_height = NJ - KERNEL + 1; + + dim3 block(DIM_THREAD_BLOCK, DIM_THREAD_BLOCK); + dim3 grid(nblocks, nblocks); + + uint64_t block_size = (NJ + (nblocks - 1)) / nblocks; + + // t_start = rtclock(); + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(A_gpu, NI * NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(B_gpu, NI * NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + Convolution2D_kernel<<>>(A_gpu, B_gpu, NI, NJ, block_size); + cudaDeviceSynchronize(); + + // Convolution2D_kernel<<>>(A_gpu, B_gpu, NI, NJ, block_size); + // cudaDeviceSynchronize(); + + // t_end = rtclock(); + // fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); //); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + if (argc >= 4) { + NI = atoll(argv[1]); + NJ = atoll(argv[2]); + nblocks = atoi(argv[3]); + } else { + NI = SIZE; + NJ = SIZE; + nblocks = NBLOCKS; + } + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + + A = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + initCPU(A); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NJ); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NI * NJ); + + // initGPU(A_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI*NJ*sizeof(DATA_TYPE)); + // overlapEndCPU(); + + convolution2DCuda(A_gpu, B_gpu); + + memcpy(B, B_gpu, NI * NJ * sizeof(DATA_TYPE)); + + cudaFree(A_gpu); + cudaFree(B_gpu); + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // conv2D(A, B); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);//); + + // compareResults(B, B_gpu); + free(A); + free(B); + + return 0; +} diff --git a/workloads/micro/uvm_prefetch/2DCONV/Makefile b/workloads/micro/uvm_prefetch/2DCONV/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f97459c40d8409dec80056ca77208c75e9d0b5e8 --- /dev/null +++ b/workloads/micro/uvm_prefetch/2DCONV/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := 2DConvolution +CUFILES := 2DConvolution.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o 2DConvolution diff --git a/workloads/micro/uvm_prefetch/2DCONV/run.sh b/workloads/micro/uvm_prefetch/2DCONV/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..c4d26406f477f75d294497d89fc0d88c529f475b --- /dev/null +++ b/workloads/micro/uvm_prefetch/2DCONV/run.sh @@ -0,0 +1,2 @@ +# ./2DConvolution 16384 16384 32 +./2DConvolution 32768 32768 32 \ No newline at end of file diff --git a/workloads/micro/uvm_prefetch/2DCONV/run_large.sh b/workloads/micro/uvm_prefetch/2DCONV/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..6e4e06894b252b1d547f335deef142ab01c98df9 --- /dev/null +++ b/workloads/micro/uvm_prefetch/2DCONV/run_large.sh @@ -0,0 +1 @@ +./2DConvolution 8192 8192 32 diff --git a/workloads/micro/uvm_prefetch/2DCONV/run_medium.sh b/workloads/micro/uvm_prefetch/2DCONV/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..c246aa18fc41de6e48e7d9ab67f1bbf1925afff6 --- /dev/null +++ b/workloads/micro/uvm_prefetch/2DCONV/run_medium.sh @@ -0,0 +1 @@ +./2DConvolution 4096 4096 32 diff --git a/workloads/micro/uvm_prefetch/2DCONV/run_mega.sh b/workloads/micro/uvm_prefetch/2DCONV/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..aa390557108b8621ec9ef8ac80f0a8f085161cce --- /dev/null +++ b/workloads/micro/uvm_prefetch/2DCONV/run_mega.sh @@ -0,0 +1 @@ +./2DConvolution 65536 65536 32 diff --git a/workloads/micro/uvm_prefetch/2DCONV/run_small.sh b/workloads/micro/uvm_prefetch/2DCONV/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..c4b192f75d30e834237d39cfc15c8c57bade3c0e --- /dev/null +++ b/workloads/micro/uvm_prefetch/2DCONV/run_small.sh @@ -0,0 +1 @@ +./2DConvolution 1024 1024 8 diff --git a/workloads/micro/uvm_prefetch/2DCONV/run_super.sh b/workloads/micro/uvm_prefetch/2DCONV/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..8a0981739ace39a1104aa069d6a6f0dfa38fd5c9 --- /dev/null +++ b/workloads/micro/uvm_prefetch/2DCONV/run_super.sh @@ -0,0 +1 @@ +./2DConvolution 32768 32768 32 diff --git a/workloads/micro/uvm_prefetch/2DCONV/run_tiny.sh b/workloads/micro/uvm_prefetch/2DCONV/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..d7c49df52c8e594edae710ad71032e912ccd7892 --- /dev/null +++ b/workloads/micro/uvm_prefetch/2DCONV/run_tiny.sh @@ -0,0 +1 @@ +./2DConvolution 512 512 4 diff --git a/workloads/micro/uvm_prefetch/3DCONV/3DConvolution.cu b/workloads/micro/uvm_prefetch/3DCONV/3DConvolution.cu new file mode 100644 index 0000000000000000000000000000000000000000..86f14d94a150a511414c116bc2d9efb712d176ca --- /dev/null +++ b/workloads/micro/uvm_prefetch/3DCONV/3DConvolution.cu @@ -0,0 +1,398 @@ +/** + * 3DConvolution.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +// define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +#define NBLOCKS 2 +#define BATCH_SIZE 3 + +uint64_t NI; +uint64_t NJ; +uint64_t NK; +uint64_t nblocks; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 4 + +#define KERNEL 3 + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; + + + +void conv3D(DATA_TYPE* A, DATA_TYPE* B) +{ + uint64_t i, j, k; + DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + c11 = +2; c21 = +5; c31 = -8; + c12 = -3; c22 = +6; c32 = -9; + c13 = +4; c23 = +7; c33 = +10; + + for (i = 1; i < NI - 1; ++i) // 0 + { + for (j = 1; j < NJ - 1; ++j) // 1 + { + for (k = 1; k < NK -1; ++k) // 2 + { + B[i*(NK * NJ) + j*NK + k] = c11 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + c13 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + + c21 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + c23 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + + c31 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + c33 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + + c12 * A[(i + 0)*(NK * NJ) + (j - 1)*NK + (k + 0)] + c22 * A[(i + 0)*(NK * NJ) + (j + 0)*NK + (k + 0)] + + c32 * A[(i + 0)*(NK * NJ) + (j + 1)*NK + (k + 0)] + c11 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k + 1)] + + c13 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k + 1)] + c21 * A[(i - 1)*(NK * NJ) + (j + 0)*NK + (k + 1)] + + c23 * A[(i + 1)*(NK * NJ) + (j + 0)*NK + (k + 1)] + c31 * A[(i - 1)*(NK * NJ) + (j + 1)*NK + (k + 1)] + + c33 * A[(i + 1)*(NK * NJ) + (j + 1)*NK + (k + 1)]; + } + } + } +} + +void initGPU(DATA_TYPE *A_gpu) +{ + uint64_t i, j, k; + + for (i = 0; i < NI; ++i) + { + for (j = 0; j < NJ; ++j) + { + for (k = 0; k < NK; ++k) + { + A_gpu[i * (NK * NJ) + j * NK + k] = i % 12 + 2 * (j % 7) + 3 * (k % 13); + } + } + } +} + +void initCPU(DATA_TYPE *A) +{ + uint64_t i, j, k; + + for (i = 0; i < NI; ++i) + { + for (j = 0; j < NJ; ++j) + { + for (k = 0; k < NK; ++k) + { + A[i*(NK * NJ) + j*NK + k] = i % 12 + 2 * (j % 7) + 3 * (k % 13); + } + } + } +} + + +void compareResults(DATA_TYPE* B, DATA_TYPE* B_outputFromGpu) +{ + uint64_t i, j, k, fail; + fail = 0; + + // Compare result from cpu and gpu... + for (i = 1; i < NI - 1; ++i) // 0 + { + for (j = 1; j < NJ - 1; ++j) // 1 + { + for (k = 1; k < NK - 1; ++k) // 2 + { + if (percentDiff(B[i*(NK * NJ) + j*NK + k], B_outputFromGpu[i*(NK * NJ) + j*NK + k]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, %d, CPU is %f, GPU is %f.\n", i, j, k, B[i * (NK * NJ) + j * NK + k], B_outputFromGpu[i * (NK * NJ) + j * NK + k]); + fail++; + } + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void convolution3D_kernel(DATA_TYPE *A, DATA_TYPE *B, uint64_t NI, uint64_t NJ, uint64_t NK, uint64_t block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + c11 = +2; c21 = +5; c31 = -8; + c12 = -3; c22 = +6; c32 = -9; + c13 = +4; c23 = +7; c33 = +10; + + uint64_t tile_dim_x = (NJ + DIM_THREAD_BLOCK - 1) / (DIM_THREAD_BLOCK * BATCH_SIZE); + + __shared__ DATA_TYPE tmp_A[DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1]; + __shared__ DATA_TYPE tmp_B[DIM_THREAD_BLOCK * BATCH_SIZE][DIM_THREAD_BLOCK * BATCH_SIZE][DIM_THREAD_BLOCK * BATCH_SIZE]; + + // uint64_t total_tiles = tile_dim_x * tile_dim_x * tile_dim_x; + + uint64_t tiles_this_block_x = (block_size / (DIM_THREAD_BLOCK * BATCH_SIZE)); + uint64_t tiles_this_block = tiles_this_block_x * tiles_this_block_x * tiles_this_block_x; + + uint64_t base_tile = (blockIdx.z * gridDim.y * gridDim.x + blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + uint64_t tile = base_tile; + uint64_t end_tile = tile + tiles_this_block; + + // printf("block_size is %d, tile_dim_x is %d, tiles_this_block_x is %d.\n", block_size, tile_dim_x, tiles_this_block_x); + + for (; tile < end_tile; tile += 1) + { + // block id + uint64_t offset = tile - base_tile; + uint64_t block_id = tile / tiles_this_block; + + uint64_t bz = block_id / (gridDim.y * gridDim.x) * tiles_this_block_x + offset / (tiles_this_block_x * tiles_this_block_x); + uint64_t by = block_id % (gridDim.y * gridDim.x) / gridDim.x * tiles_this_block_x + offset % (tiles_this_block_x * tiles_this_block_x) / tiles_this_block_x; + uint64_t bx = block_id % (gridDim.y * gridDim.x) % gridDim.x * tiles_this_block_x + offset % (tiles_this_block_x * tiles_this_block_x) % tiles_this_block_x; + + // thread id + uint64_t tx = threadIdx.x; + uint64_t ty = threadIdx.y; + uint64_t tz = threadIdx.z; + + uint64_t index_B_z = DIM_THREAD_BLOCK * BATCH_SIZE * bz + BATCH_SIZE * tz + 1; + uint64_t index_B_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty + 1; + uint64_t index_B_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx + 1; + + uint64_t index_A_z = DIM_THREAD_BLOCK * BATCH_SIZE * bz + BATCH_SIZE * tz; + uint64_t index_A_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty; + uint64_t index_A_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx; + + uint64_t index_A_z_start = DIM_THREAD_BLOCK * BATCH_SIZE * bz; + uint64_t index_A_y_start = DIM_THREAD_BLOCK * BATCH_SIZE * by; + uint64_t index_A_x_start = DIM_THREAD_BLOCK * BATCH_SIZE * bx; + + uint64_t index_A_z_bound = DIM_THREAD_BLOCK * BATCH_SIZE * bz + BATCH_SIZE * DIM_THREAD_BLOCK; + uint64_t index_A_y_bound = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * DIM_THREAD_BLOCK; + uint64_t index_A_x_bound = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * DIM_THREAD_BLOCK; + + // fetch A + for (uint64_t i = 0; i < BATCH_SIZE; i++) { + for (uint64_t j = 0; j < BATCH_SIZE; j++) { + for (uint64_t k = 0; k < BATCH_SIZE; k++) { + if ((index_A_z + i) < NI && (index_A_y + j) < NJ && (index_A_x + k) < NK) { + tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] = A[(index_A_z + i) * NJ * NK + (index_A_y + j) * NK + index_A_x + k]; + tmp_B[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] = 0; + } + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < KERNEL - 1; i++) { + for (uint64_t j = 0; j < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; j++) { + for (uint64_t k = 0; k < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; k++) { + if ((index_A_z_bound + i) < NI && (index_A_y_start + j) < NJ && (index_A_x_start + k) < NK) { + tmp_A[DIM_THREAD_BLOCK * BATCH_SIZE + i][j][k] = A[(index_A_z_bound + i) * NJ * NK + (index_A_y_start + j) * NK + index_A_x_start + k]; + } + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; i++) { + for (uint64_t j = 0; j < KERNEL - 1; j++) { + for (uint64_t k = 0; k < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; k++) { + if ((index_A_z_start + i) < NI && (index_A_y_bound + j) < NJ && (index_A_x_start + k) < NK) { + tmp_A[i][DIM_THREAD_BLOCK * BATCH_SIZE + j][k] = A[(index_A_z_start + i) * NJ * NK + (index_A_y_bound + j) * NK + index_A_x_start + k]; + } + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; i++) { + for (uint64_t j = 0; j < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; j++) { + for (uint64_t k = 0; k < KERNEL - 1; k++) { + if ((index_A_z_start + i) < NI && (index_A_y_start + j) < NJ && (index_A_x_bound + k) < NK) { + tmp_A[i][j][DIM_THREAD_BLOCK * BATCH_SIZE + k] = A[(index_A_z_start + i) * NJ * NK + (index_A_y_start + j) * NK + index_A_x_bound + k]; + } + } + } + } + block.sync(); + + // Computation + for (uint64_t i = 0; i < BATCH_SIZE; i++) { + for (uint64_t j = 0; j < BATCH_SIZE; j++) { + for (uint64_t k = 0; k < BATCH_SIZE; k++) { + tmp_B[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] = c11 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c13 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + + c21 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c23 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + + c31 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c33 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + + c12 * tmp_A[tz * BATCH_SIZE + i + 1][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k + 1] + c22 * tmp_A[tz * BATCH_SIZE + i + 1][ty * BATCH_SIZE + j + 1][tx * BATCH_SIZE + k + 1] + + c32 * tmp_A[tz * BATCH_SIZE + i + 1][ty * BATCH_SIZE + j + 2][tx * BATCH_SIZE + k + 1] + c11 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k + 2] + + c13 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k + 2] + c21 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j + 1][tx * BATCH_SIZE + k + 2] + + c23 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j + 1][tx * BATCH_SIZE + k + 2] + c31 * tmp_A[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j + 2][tx * BATCH_SIZE + k + 2] + + c33 * tmp_A[tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j + 2][tx * BATCH_SIZE + k + 2]; + } + } + } + block.sync(); + + // Store B + for (uint64_t i = 0; i < BATCH_SIZE; i++) { + for (uint64_t j = 0; j < BATCH_SIZE; j++) { + for (uint64_t k = 0; k < BATCH_SIZE; k++) { + if ((index_B_z + i + 1) < NI && (index_B_y + j + 1) < NJ && (index_B_x + k + 1) < NK) + { + B[(index_B_z + i) * NJ * NK + (index_B_y + j) * NK + index_B_x + k] = tmp_B[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k]; + } + } + } + } + block.sync(); + } +} + + +void convolution3DCuda(DATA_TYPE* A_gpu, DATA_TYPE* B_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK, DIM_THREAD_BLOCK, DIM_THREAD_BLOCK); + dim3 grid(nblocks, nblocks, nblocks); + + uint64_t block_size = (NI + (nblocks - 1)) / nblocks; + + // t_start = rtclock(); + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(A_gpu, NI * NJ * NK * sizeof(DATA_TYPE), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(B_gpu, NI * NJ * NK * sizeof(DATA_TYPE), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + convolution3D_kernel<<>>(A_gpu, B_gpu, NI, NJ, NK, block_size); + cudaDeviceSynchronize(); + + // convolution3D_kernel<<>>(A_gpu, B_gpu, NI, NJ, NK, block_size); + // cudaDeviceSynchronize(); + // t_end = rtclock(); + // fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); + +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + if (argc >= 5) { + NI = atoll(argv[1]); + NJ = atoll(argv[2]); + NK = atoll(argv[3]); + nblocks = atoi(argv[4]); + } else { + NI = SIZE; + NJ = SIZE; + NK = SIZE; + nblocks = NBLOCKS; + } + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + + A = (DATA_TYPE*)malloc(NI*NJ*NK*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NI*NJ*NK*sizeof(DATA_TYPE)); + initCPU(A); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NJ * NK); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NI * NJ * NK); + + // initGPU(A_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * NJ * NK * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + convolution3DCuda(A_gpu, B_gpu); + memcpy(B, B_gpu, NI * NJ * NK * sizeof(DATA_TYPE)); + + cudaFree(A_gpu); + cudaFree(B_gpu); + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // conv3D(A, B); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(B, B_gpu); + free(A); + free(B); + + return 0; +} diff --git a/workloads/micro/uvm_prefetch/3DCONV/Makefile b/workloads/micro/uvm_prefetch/3DCONV/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..72aac9cb83cca03110da33f8da9119f32af90ccd --- /dev/null +++ b/workloads/micro/uvm_prefetch/3DCONV/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := 3DConvolution +CUFILES := 3DConvolution.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o 3DConvolution diff --git a/workloads/micro/uvm_prefetch/3DCONV/run.sh b/workloads/micro/uvm_prefetch/3DCONV/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..0c27d78b8d3484896bd6812043b5466b074cbebf --- /dev/null +++ b/workloads/micro/uvm_prefetch/3DCONV/run.sh @@ -0,0 +1,2 @@ +#./3DConvolution 768 768 768 8 +./3DConvolution 1536 1536 1536 8 diff --git a/workloads/micro/uvm_prefetch/3DCONV/run_large.sh b/workloads/micro/uvm_prefetch/3DCONV/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..500c363302024d6080c025784efbe2e7fef74f53 --- /dev/null +++ b/workloads/micro/uvm_prefetch/3DCONV/run_large.sh @@ -0,0 +1 @@ +./3DConvolution 384 384 384 8 diff --git a/workloads/micro/uvm_prefetch/3DCONV/run_medium.sh b/workloads/micro/uvm_prefetch/3DCONV/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..d0a9fb120b6de1c16ae4146d7684c5557af95152 --- /dev/null +++ b/workloads/micro/uvm_prefetch/3DCONV/run_medium.sh @@ -0,0 +1 @@ +./3DConvolution 192 192 192 4 diff --git a/workloads/micro/uvm_prefetch/3DCONV/run_mega.sh b/workloads/micro/uvm_prefetch/3DCONV/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..4e21c66c3d1cf21bfe88427d92c54ff8be428d8a --- /dev/null +++ b/workloads/micro/uvm_prefetch/3DCONV/run_mega.sh @@ -0,0 +1 @@ +./3DConvolution 1536 1536 1536 8 diff --git a/workloads/micro/uvm_prefetch/3DCONV/run_small.sh b/workloads/micro/uvm_prefetch/3DCONV/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..f794eec58ed56174c5d02096a9bf5acc4e948d0f --- /dev/null +++ b/workloads/micro/uvm_prefetch/3DCONV/run_small.sh @@ -0,0 +1 @@ +./3DConvolution 96 96 96 4 diff --git a/workloads/micro/uvm_prefetch/3DCONV/run_super.sh b/workloads/micro/uvm_prefetch/3DCONV/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..11f8b43d3e406c466b824420d34991a5c0f876b4 --- /dev/null +++ b/workloads/micro/uvm_prefetch/3DCONV/run_super.sh @@ -0,0 +1 @@ +./3DConvolution 768 768 768 8 diff --git a/workloads/micro/uvm_prefetch/3DCONV/run_tiny.sh b/workloads/micro/uvm_prefetch/3DCONV/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..791c84bc54088ea65fc7612ee94442bbbc615cc3 --- /dev/null +++ b/workloads/micro/uvm_prefetch/3DCONV/run_tiny.sh @@ -0,0 +1 @@ +./3DConvolution 48 48 48 2 diff --git a/workloads/micro/uvm_prefetch/gemm/Makefile b/workloads/micro/uvm_prefetch/gemm/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d545b0aab3ef63260888227abd6ab99bcaddbff3 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := gemm +CUFILES := gemm.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o gemm diff --git a/workloads/micro/uvm_prefetch/gemm/gemm.cu b/workloads/micro/uvm_prefetch/gemm/gemm.cu new file mode 100644 index 0000000000000000000000000000000000000000..aaff3427d6c1acd9ca9c208bb1c9682f5bfd72ce --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm/gemm.cu @@ -0,0 +1,305 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +uint64_t NI; +uint64_t NJ; +uint64_t NK; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK_X 32 +#define DIM_THREAD_BLOCK_Y 32 + + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void gemm(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i,j,k; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i*NJ + j] *= BETA; + for (k = 0; k < NK; ++k) { + C[i*NJ + j] += ALPHA * A[i*NK + k] * B[k*NJ + j]; + } + } + } +} + +void initGPU(DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NK; j++) { + A_gpu[i * NK + j] = ((DATA_TYPE)i * j) / NI; + } + } + + + for (i = 0; i < NK; i++) { + for (j = 0; j < NJ; j++) { + B_gpu[i * NJ + j] = ((DATA_TYPE)i * j + 1) / NJ; + } + } + + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C_gpu[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + } + } + +} + +void initCPU(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NK; j++) { + A[i * NK + j] = ((DATA_TYPE)i * j) / NI; + } + } + + + for (i = 0; i < NK; i++) { + for (j = 0; j < NJ; j++) { + B[i * NJ + j] = ((DATA_TYPE)i * j + 1) / NJ; + } + } + + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + } + } + +} + + +void compareResults(DATA_TYPE* C, DATA_TYPE* C_outputFromGpu) +{ + uint64_t i, j, fail; + fail = 0; + + // Compare C1 and C2 + for (i=0; i < NI; i++) + { + for (j=0; j < NJ; j++) + { + // printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + if (percentDiff(C[i*NJ + j], C_outputFromGpu[i*NJ + j]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + fail++; + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void gemm_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, uint64_t NI, uint64_t NK, uint64_t NJ) +{ + // Compute each thread's global row and column index + uint64_t row = blockIdx.y * blockDim.y + threadIdx.y; + uint64_t col = blockIdx.x * blockDim.x + threadIdx.x; + + // Statically allocated shared memory + __shared__ DATA_TYPE s_a[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + __shared__ DATA_TYPE s_b[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + // Accumulate in temporary variable + + DATA_TYPE tmp = BETA * c[row * NJ + col]; + + // Sweep tile across matrix + for (uint64_t i = 0; i < NK; i += blockDim.x) + { + s_a[threadIdx.y * blockDim.x + threadIdx.x] = a[row * NK + i + threadIdx.x]; + s_b[threadIdx.y * blockDim.x + threadIdx.x] = b[(i + threadIdx.y) * NJ + col]; + + // Wait for both tiles to be loaded in before doing computation + __syncthreads(); + + // Do matrix multiplication on the small matrix + for (uint64_t k = 0; k < blockDim.x; k++) + { + tmp += ALPHA * s_a[threadIdx.y * blockDim.x + k] * s_b[k * blockDim.x + threadIdx.x]; + } + + // Wait for all threads to finish using current tiles before loading in new ones + __syncthreads(); + } + + c[row * NJ + col] = tmp; +} + +void gemmCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + dim3 grid((size_t)(ceil( ((float)NI)/ ((float)block.x) )),(size_t)(ceil( ((float)NJ)/ ((float)block.y) ))); + + //t_start = rtclock(); + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(A_gpu, NI * NK * sizeof(DATA_TYPE), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(B_gpu, NK * NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(C_gpu, NI * NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + gemm_kernel<<>>(A_gpu, B_gpu, C_gpu, NI, NK, NJ); + cudaDeviceSynchronize(); + + + // gemm_kernel<<< grid, block >>>(A_gpu, B_gpu, C_gpu, NI, NK, NJ); + // cudaDeviceSynchronize(); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + if (argc >= 4) { + NI = atoll(argv[1]); + NK = atoll(argv[2]); + NJ = atoll(argv[3]); + } else { + NI = SIZE; + NK = SIZE; + NJ = SIZE; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE* C; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + DATA_TYPE *C_gpu; + + A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE)); + C = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + + initCPU(A,B,C); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + // initGPU(A_gpu, B_gpu, C_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * NK * sizeof(DATA_TYPE)); + memcpy(B_gpu, B, NK * NJ * sizeof(DATA_TYPE)); + memcpy(C_gpu, C, NI * NJ * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + gemmCuda(A, B, C, A_gpu, B_gpu, C_gpu); + memcpy(C, C_gpu, NI * NJ * sizeof(DATA_TYPE)); + + // t_start = rtclock(); + // gemm(A, B, C); // needed to keep benchmark accurate + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(C_gpu, C); + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); + endCPU(); + finiTrace(); + + free(A); + free(B); + free(C); + return 0; +} + diff --git a/workloads/micro/uvm_prefetch/gemm/run.sh b/workloads/micro/uvm_prefetch/gemm/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..d234537561c276f291434f42a7337bc5d5a61605 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm/run.sh @@ -0,0 +1,3 @@ +#./gemm 1024 1024 1024 +./gemm 32768 32768 32768 +#./gemm 512 512 512 diff --git a/workloads/micro/uvm_prefetch/gemm/run_large.sh b/workloads/micro/uvm_prefetch/gemm/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..cc1dbe192ecafb7558b76cfa4d2ce05a15067999 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm/run_large.sh @@ -0,0 +1 @@ +./gemm 8192 8192 8192 diff --git a/workloads/micro/uvm_prefetch/gemm/run_medium.sh b/workloads/micro/uvm_prefetch/gemm/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..ebae2815e56031234ccdef0d98cb774f111e82a2 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm/run_medium.sh @@ -0,0 +1 @@ +./gemm 4096 4096 4096 diff --git a/workloads/micro/uvm_prefetch/gemm/run_mega.sh b/workloads/micro/uvm_prefetch/gemm/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..7a7e74e163fa0d0ad774327163771ce8eb23bd79 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm/run_mega.sh @@ -0,0 +1 @@ +./gemm 32768 32768 32768 diff --git a/workloads/micro/uvm_prefetch/gemm/run_small.sh b/workloads/micro/uvm_prefetch/gemm/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..df5896fd490618e0f8c5608097518b3837b9ad33 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm/run_small.sh @@ -0,0 +1 @@ +./gemm 1024 1024 1024 diff --git a/workloads/micro/uvm_prefetch/gemm/run_super.sh b/workloads/micro/uvm_prefetch/gemm/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..9736fc5d97c8ef9b55a3157882e05979f65a5011 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm/run_super.sh @@ -0,0 +1 @@ +./gemm 16384 16384 16384 diff --git a/workloads/micro/uvm_prefetch/gemm/run_tiny.sh b/workloads/micro/uvm_prefetch/gemm/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..9858f12d61d44c0f6265f7c22c27fab446500f55 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm/run_tiny.sh @@ -0,0 +1 @@ +./gemm 512 512 512 diff --git a/workloads/micro/uvm_prefetch/gemm_perf/Makefile b/workloads/micro/uvm_prefetch/gemm_perf/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d545b0aab3ef63260888227abd6ab99bcaddbff3 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm_perf/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := gemm +CUFILES := gemm.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o gemm diff --git a/workloads/micro/uvm_prefetch/gemm_perf/gemm b/workloads/micro/uvm_prefetch/gemm_perf/gemm new file mode 100755 index 0000000000000000000000000000000000000000..209eb9e8f7babab36963d1ebf8a771aab8aff38e Binary files /dev/null and b/workloads/micro/uvm_prefetch/gemm_perf/gemm differ diff --git a/workloads/micro/uvm_prefetch/gemm_perf/gemm.cu b/workloads/micro/uvm_prefetch/gemm_perf/gemm.cu new file mode 100644 index 0000000000000000000000000000000000000000..146cc7340714d8ac908595e5bafa4a2c5b9963b3 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm_perf/gemm.cu @@ -0,0 +1,305 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +uint64_t NI; +uint64_t NJ; +uint64_t NK; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK_X 32 +#define DIM_THREAD_BLOCK_Y 32 + + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void gemm(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i,j,k; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i*NJ + j] *= BETA; + for (k = 0; k < NK; ++k) { + C[i*NJ + j] += ALPHA * A[i*NK + k] * B[k*NJ + j]; + } + } + } +} + +void initGPU(DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NK; j++) { + A_gpu[i * NK + j] = ((DATA_TYPE)i * j) / NI; + } + } + + + for (i = 0; i < NK; i++) { + for (j = 0; j < NJ; j++) { + B_gpu[i * NJ + j] = ((DATA_TYPE)i * j + 1) / NJ; + } + } + + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C_gpu[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + } + } + +} + +void initCPU(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NK; j++) { + A[i * NK + j] = ((DATA_TYPE)i * j) / NI; + } + } + + + for (i = 0; i < NK; i++) { + for (j = 0; j < NJ; j++) { + B[i * NJ + j] = ((DATA_TYPE)i * j + 1) / NJ; + } + } + + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + } + } + +} + + +void compareResults(DATA_TYPE* C, DATA_TYPE* C_outputFromGpu) +{ + uint64_t i, j, fail; + fail = 0; + + // Compare C1 and C2 + for (i=0; i < NI; i++) + { + for (j=0; j < NJ; j++) + { + // printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + if (percentDiff(C[i*NJ + j], C_outputFromGpu[i*NJ + j]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + fail++; + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void gemm_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, uint64_t NI, uint64_t NK, uint64_t NJ) +{ + // Compute each thread's global row and column index + uint64_t row = blockIdx.y * blockDim.y + threadIdx.y; + uint64_t col = blockIdx.x * blockDim.x + threadIdx.x; + + // Statically allocated shared memory + __shared__ DATA_TYPE s_a[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + __shared__ DATA_TYPE s_b[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + // Accumulate in temporary variable + + DATA_TYPE tmp = BETA * c[row * NJ + col]; + + // Sweep tile across matrix + for (uint64_t i = 0; i < NK; i += blockDim.x) + { + s_a[threadIdx.y * blockDim.x + threadIdx.x] = a[row * NK + i + threadIdx.x]; + s_b[threadIdx.y * blockDim.x + threadIdx.x] = b[(i + threadIdx.y) * NJ + col]; + + // Wait for both tiles to be loaded in before doing computation + __syncthreads(); + + // Do matrix multiplication on the small matrix + for (uint64_t k = 0; k < blockDim.x; k++) + { + tmp += ALPHA * s_a[threadIdx.y * blockDim.x + k] * s_b[k * blockDim.x + threadIdx.x]; + } + + // Wait for all threads to finish using current tiles before loading in new ones + __syncthreads(); + } + + c[row * NJ + col] = tmp; +} + +void gemmCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + dim3 grid((size_t)(ceil( ((float)NI)/ ((float)block.x) )),(size_t)(ceil( ((float)NJ)/ ((float)block.y) ))); + + //t_start = rtclock(); + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(A_gpu, NI * NK * sizeof(DATA_TYPE), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(B_gpu, NK * NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(C_gpu, NI * NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + gemm_kernel<<>>(A_gpu, B_gpu, C_gpu, NI, NK, NJ); + cudaDeviceSynchronize(); + + + // gemm_kernel<<< grid, block >>>(A_gpu, B_gpu, C_gpu, NI, NK, NJ); + // cudaDeviceSynchronize(); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + if (argc >= 4) { + NI = atoll(argv[1]); + NK = atoll(argv[2]); + NJ = atoll(argv[3]); + } else { + NI = SIZE; + NK = SIZE; + NJ = SIZE; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE* C; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + DATA_TYPE *C_gpu; + + A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE)); + C = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + + initCPU(A,B,C); + GPU_argv_init(); + + //initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + // initGPU(A_gpu, B_gpu, C_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * NK * sizeof(DATA_TYPE)); + memcpy(B_gpu, B, NK * NJ * sizeof(DATA_TYPE)); + memcpy(C_gpu, C, NI * NJ * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + gemmCuda(A, B, C, A_gpu, B_gpu, C_gpu); + memcpy(C, C_gpu, NI * NJ * sizeof(DATA_TYPE)); + + // t_start = rtclock(); + // gemm(A, B, C); // needed to keep benchmark accurate + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(C_gpu, C); + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); + endCPU(); + //finiTrace(); + + free(A); + free(B); + free(C); + return 0; +} + diff --git a/workloads/micro/uvm_prefetch/gemm_perf/run.sh b/workloads/micro/uvm_prefetch/gemm_perf/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..d234537561c276f291434f42a7337bc5d5a61605 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm_perf/run.sh @@ -0,0 +1,3 @@ +#./gemm 1024 1024 1024 +./gemm 32768 32768 32768 +#./gemm 512 512 512 diff --git a/workloads/micro/uvm_prefetch/gemm_perf/run_large.sh b/workloads/micro/uvm_prefetch/gemm_perf/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..cc1dbe192ecafb7558b76cfa4d2ce05a15067999 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm_perf/run_large.sh @@ -0,0 +1 @@ +./gemm 8192 8192 8192 diff --git a/workloads/micro/uvm_prefetch/gemm_perf/run_medium.sh b/workloads/micro/uvm_prefetch/gemm_perf/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..ebae2815e56031234ccdef0d98cb774f111e82a2 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm_perf/run_medium.sh @@ -0,0 +1 @@ +./gemm 4096 4096 4096 diff --git a/workloads/micro/uvm_prefetch/gemm_perf/run_mega.sh b/workloads/micro/uvm_prefetch/gemm_perf/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..7a7e74e163fa0d0ad774327163771ce8eb23bd79 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm_perf/run_mega.sh @@ -0,0 +1 @@ +./gemm 32768 32768 32768 diff --git a/workloads/micro/uvm_prefetch/gemm_perf/run_small.sh b/workloads/micro/uvm_prefetch/gemm_perf/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..df5896fd490618e0f8c5608097518b3837b9ad33 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm_perf/run_small.sh @@ -0,0 +1 @@ +./gemm 1024 1024 1024 diff --git a/workloads/micro/uvm_prefetch/gemm_perf/run_super.sh b/workloads/micro/uvm_prefetch/gemm_perf/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..9736fc5d97c8ef9b55a3157882e05979f65a5011 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm_perf/run_super.sh @@ -0,0 +1 @@ +./gemm 16384 16384 16384 diff --git a/workloads/micro/uvm_prefetch/gemm_perf/run_tiny.sh b/workloads/micro/uvm_prefetch/gemm_perf/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..9858f12d61d44c0f6265f7c22c27fab446500f55 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemm_perf/run_tiny.sh @@ -0,0 +1 @@ +./gemm 512 512 512 diff --git a/workloads/micro/uvm_prefetch/gemv/Makefile b/workloads/micro/uvm_prefetch/gemv/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..005563000dad0469dbf388d54a639e926cf5aa85 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemv/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := gemv +CUFILES := gemv.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o gemv diff --git a/workloads/micro/uvm_prefetch/gemv/gemv.cu b/workloads/micro/uvm_prefetch/gemv/gemv.cu new file mode 100644 index 0000000000000000000000000000000000000000..bebc282c23043c99fc4a0a679206a6fe032fc92d --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemv/gemv.cu @@ -0,0 +1,288 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +// define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 40960 +uint64_t NI; +uint64_t NJ; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 256 + +#define BATCH_SIZE 16 + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void gemv(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) + { + C[i] *= BETA; + for (j = 0; j < NJ; j++) + { + C[i] += ALPHA * A[i * NJ + j] * B[j]; + } + } +} + +void initGPU(DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) + { + for (j = 0; j < NJ; j++) + { + A_gpu[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } + + for (j = 0; j < NJ; j++) + { + B_gpu[j] = ((DATA_TYPE)j + 1) / NJ; + } + + for (i = 0; i < NI; i++) + { + C_gpu[i] = ((DATA_TYPE)i + 2) / NI; + } +} + +void initCPU(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) + { + for (j = 0; j < NJ; j++) + { + A[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } + + for (j = 0; j < NJ; j++) + { + B[j] = ((DATA_TYPE)j + 1) / NJ; + } + + for (i = 0; i < NI; i++) + { + C[i] = ((DATA_TYPE)i + 2) / NI; + } +} + +void compareResults(DATA_TYPE *C, DATA_TYPE *C_outputFromGpu) +{ + uint64_t i, fail; + fail = 0; + + // Compare C1 and C2 + for (i = 0; i < NI; i++) + { + if (percentDiff(C[i], C_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + fail++; + printf("%d, GPU is %f, CPU is %f.\n", i, C[i], C_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void gemv_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, uint64_t NI, uint64_t NJ) +{ + uint64_t row = blockIdx.x * blockDim.x + threadIdx.x; + uint64_t tx = threadIdx.x; + + __shared__ DATA_TYPE s_b[DIM_THREAD_BLOCK][BATCH_SIZE]; + + DATA_TYPE tmp = BETA * c[row]; + __syncthreads(); + + uint64_t tile = 0; + uint64_t end_tile = NJ / BATCH_SIZE; + + for (; tile < end_tile; tile += 1) + { + uint64_t base_index = tile * BATCH_SIZE; + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + s_b[tx][k] = b[base_index + k]; + } + __syncthreads(); + + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + tmp += ALPHA * a[row * NJ + base_index + k] * s_b[tx][k]; + } + __syncthreads(); + } + c[row] = tmp; +} + +void gemvCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / (DIM_THREAD_BLOCK)); + + // t_start = rtclock(); + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(A_gpu, NI * NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(B_gpu, NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(C_gpu, NI * sizeof(DATA_TYPE), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + gemv_kernel<<>>(A_gpu, B_gpu, C_gpu, NI, NJ); + cudaDeviceSynchronize(); + + // gemv_kernel<<>>(A_gpu, B_gpu, C_gpu, NI, NJ); + // cudaDeviceSynchronize(); + // t_end = rtclock(); + + // fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + if (argc >= 3) + { + NI = atoll(argv[1]); + NJ = atoll(argv[2]); + } + else + { + NI = SIZE; + NJ = SIZE; + } + + double t_start, t_end; + + DATA_TYPE *A; + DATA_TYPE *B; + DATA_TYPE *C; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + DATA_TYPE *C_gpu; + + A = (DATA_TYPE *)malloc(NI * NJ * sizeof(DATA_TYPE)); + B = (DATA_TYPE *)malloc(NJ * sizeof(DATA_TYPE)); + C = (DATA_TYPE *)malloc(NI * sizeof(DATA_TYPE)); + + initCPU(A, B, C); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NJ); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NJ); + cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI); + + // initGPU(A_gpu, B_gpu, C_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * NJ * sizeof(DATA_TYPE)); + memcpy(B_gpu, B, NJ * sizeof(DATA_TYPE)); + memcpy(C_gpu, C, NI * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + gemvCuda(A, B, C, A_gpu, B_gpu, C_gpu); + memcpy(C, C_gpu, NI * sizeof(DATA_TYPE)); + + // t_start = rtclock(); + // gemv(A, B, C); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(C_gpu, C); + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); + endCPU(); + finiTrace(); + + free(A); + free(B); + free(C); + return 0; +} diff --git a/workloads/micro/uvm_prefetch/gemv/run.sh b/workloads/micro/uvm_prefetch/gemv/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..c75d8f69375fd6c923a93ba88a4cb43238844f8b --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemv/run.sh @@ -0,0 +1,2 @@ +# ./gemv 16384 16384 +./gemv 32768 32768 diff --git a/workloads/micro/uvm_prefetch/gemv/run_large.sh b/workloads/micro/uvm_prefetch/gemv/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..102aa73e96b74d0d46ef261f0ffd665639679025 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemv/run_large.sh @@ -0,0 +1 @@ +./gemv 8192 8192 diff --git a/workloads/micro/uvm_prefetch/gemv/run_medium.sh b/workloads/micro/uvm_prefetch/gemv/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..2cc0f68e4d4e96c36ee0d72e64f7acdb7c97233c --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemv/run_medium.sh @@ -0,0 +1 @@ +./gemv 4096 4096 diff --git a/workloads/micro/uvm_prefetch/gemv/run_mega.sh b/workloads/micro/uvm_prefetch/gemv/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..88ca5ba9468efc68d859100ccbd246b9b3af960b --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemv/run_mega.sh @@ -0,0 +1 @@ +./gemv 65536 65536 diff --git a/workloads/micro/uvm_prefetch/gemv/run_small.sh b/workloads/micro/uvm_prefetch/gemv/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..55646a647b7f53aff91c5e38562bc449d75daa9a --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemv/run_small.sh @@ -0,0 +1 @@ +./gemv 1024 1024 diff --git a/workloads/micro/uvm_prefetch/gemv/run_super.sh b/workloads/micro/uvm_prefetch/gemv/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..295d223c3c9c768e58c0fba722fa859b74564d2d --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemv/run_super.sh @@ -0,0 +1 @@ +./gemv 32768 32768 diff --git a/workloads/micro/uvm_prefetch/gemv/run_tiny.sh b/workloads/micro/uvm_prefetch/gemv/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..60becef20c6cc3113ff1b4897d177ff3cbd77eb8 --- /dev/null +++ b/workloads/micro/uvm_prefetch/gemv/run_tiny.sh @@ -0,0 +1 @@ +./gemv 512 512 diff --git a/workloads/micro/uvm_prefetch/saxpy/Makefile b/workloads/micro/uvm_prefetch/saxpy/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..069a5001c286cb5f44c4686449f04755cd5a2e52 --- /dev/null +++ b/workloads/micro/uvm_prefetch/saxpy/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := saxpy +CUFILES := saxpy.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o saxpy diff --git a/workloads/micro/uvm_prefetch/saxpy/run.sh b/workloads/micro/uvm_prefetch/saxpy/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..61775ede6c26208ae2a150958f8eec5375952773 --- /dev/null +++ b/workloads/micro/uvm_prefetch/saxpy/run.sh @@ -0,0 +1,11 @@ +# ./vector_seq 17179869184 10 524288 + +./vector_seq 8589934592 10 262144 +#nsys profile --stats=true ./vector_seq 1073741824 10 32768 + + +# ./vector_seq 4294967296 10 131072 + +# ./vector_seq 2147483648 10 32768 +#./vector_seq 1073741824 10 32768 +# ./vector_seq 1048576 100 32768 diff --git a/workloads/micro/uvm_prefetch/saxpy/run_large.sh b/workloads/micro/uvm_prefetch/saxpy/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..bce2787645ea40cb94ba004ec19cf4728ba48647 --- /dev/null +++ b/workloads/micro/uvm_prefetch/saxpy/run_large.sh @@ -0,0 +1 @@ +./saxpy 134217728 100 65536 diff --git a/workloads/micro/uvm_prefetch/saxpy/run_medium.sh b/workloads/micro/uvm_prefetch/saxpy/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..af6f429a95aa9d5ede352e30e59405aaee4ee55b --- /dev/null +++ b/workloads/micro/uvm_prefetch/saxpy/run_medium.sh @@ -0,0 +1 @@ +./saxpy 16777216 100 32768 diff --git a/workloads/micro/uvm_prefetch/saxpy/run_mega.sh b/workloads/micro/uvm_prefetch/saxpy/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..5dce5483842d255b4130f7ca89b4894c65b5a44b --- /dev/null +++ b/workloads/micro/uvm_prefetch/saxpy/run_mega.sh @@ -0,0 +1 @@ +./saxpy 4294967296 100 262144 diff --git a/workloads/micro/uvm_prefetch/saxpy/run_small.sh b/workloads/micro/uvm_prefetch/saxpy/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..c927e5e53c30ca58a12dbdf95e264a4b790ab0b8 --- /dev/null +++ b/workloads/micro/uvm_prefetch/saxpy/run_small.sh @@ -0,0 +1 @@ +./saxpy 2097152 100 32768 diff --git a/workloads/micro/uvm_prefetch/saxpy/run_super.sh b/workloads/micro/uvm_prefetch/saxpy/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..7f8d7b99b0be6cc643cd686965806f7edbc0af27 --- /dev/null +++ b/workloads/micro/uvm_prefetch/saxpy/run_super.sh @@ -0,0 +1 @@ +./saxpy 1073741824 100 65536 diff --git a/workloads/micro/uvm_prefetch/saxpy/run_tiny.sh b/workloads/micro/uvm_prefetch/saxpy/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..4604f1747023f34e81c15ce89c4be5a6fb39bc95 --- /dev/null +++ b/workloads/micro/uvm_prefetch/saxpy/run_tiny.sh @@ -0,0 +1 @@ +./saxpy 262144 100 8192 diff --git a/workloads/micro/uvm_prefetch/saxpy/saxpy.cu b/workloads/micro/uvm_prefetch/saxpy/saxpy.cu new file mode 100644 index 0000000000000000000000000000000000000000..cac333f333f8e8c3523a720ae5f0859785e64207 --- /dev/null +++ b/workloads/micro/uvm_prefetch/saxpy/saxpy.cu @@ -0,0 +1,269 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096000 +#define ITER 100 +uint64_t NI; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 256 + +#define BATCH_SIZE 8 + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void saxpy(DATA_TYPE *A, DATA_TYPE *B, uint64_t iterations) +{ + for (uint64_t i = 0; i < NI; i++) { + for (uint64_t iter = 0; iter < iterations; iter++) { + B[i] = ALPHA * A[i] + B[i]; + } + } +} + +void initGPU(DATA_TYPE *A_gpu, DATA_TYPE *B_gpu) +{ + for (uint64_t i = 0; i < NI; i++) { + A_gpu[i] = ((DATA_TYPE)i) / NI; + } + for (uint64_t i = 0; i < NI; i++) { + B_gpu[i] = ((DATA_TYPE)i + 2) / NI; + } + +} + +void initCPU(DATA_TYPE *A, DATA_TYPE *B) +{ + for (uint64_t i = 0; i < NI; i++) { + A[i] = ((DATA_TYPE)i) / NI; + } + for (uint64_t i = 0; i < NI; i++) { + B[i] = ((DATA_TYPE)i + 2) / NI; + } +} + +void compareResults(DATA_TYPE* B, DATA_TYPE* B_outputFromGpu) +{ + uint64_t fail = 0; + + // Compare C1 and C2 + for (uint64_t i = 0; i < NI; i++) { + // printf("%lld, GPU is %f, CPU is %f.\n", i, B[i], B_outputFromGpu[i]); + if (percentDiff(B[i], B_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) { + fail++; + printf("%lld, GPU is %f, CPU is %f.\n", i, B[i], B_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void saxpy_kernel(DATA_TYPE *a, DATA_TYPE *b, uint64_t NI, uint64_t iterations, uint64_t block_size) +{ + // Compute each thread's global row and column index + const uint64_t mem_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + __shared__ DATA_TYPE tmp_a[mem_size]; + __shared__ DATA_TYPE tmp_b[mem_size]; + + uint64_t total_tiles = NI / mem_size; + uint64_t base_tiles = total_tiles / gridDim.x; + + uint64_t tiles_this_block = block_size / mem_size; + + uint64_t tile = base_tiles * blockIdx.x; + uint64_t end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + tmp_a[i] = a[tile * mem_size + i]; + tmp_b[i] = b[tile * mem_size + i]; + } + + __syncthreads(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + for (uint64_t iter = 0; iter < iterations; iter++) + { + tmp_b[i] += ALPHA * tmp_a[i]; + } + } + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + b[tile * mem_size + i] = tmp_b[i]; + } + } +} + +void saxpyCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, uint64_t iterations, uint64_t block_size) +{ + double t_start, t_end; + if (block_size <= DIM_THREAD_BLOCK) + block_size = DIM_THREAD_BLOCK; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / block_size); + + //t_start = rtclock(); + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(A_gpu, NI * sizeof(DATA_TYPE), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(B_gpu, NI * sizeof(DATA_TYPE), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + saxpy_kernel<<>>(A_gpu, B_gpu, NI, iterations, block_size); + cudaDeviceSynchronize(); + + // saxpy_kernel<<>>(A_gpu, B_gpu, NI, iterations, block_size); + // cudaDeviceSynchronize(); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + uint64_t iterations = ITER; + uint64_t block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + if (argc >= 4) { + NI = atoll(argv[1]); + iterations = atoi(argv[2]); + block_size = atoi(argv[3]); + } else { + NI = SIZE; + iterations = ITER; + block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + } + + int nblocks = NI / block_size; + if (nblocks > 64) + { + nblocks = 64; + block_size = NI / nblocks; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + + A = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + + initCPU(A,B); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NI); + + // initGPU(A_gpu, B_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * sizeof(DATA_TYPE)); + memcpy(B_gpu, B, NI * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + saxpyCuda(A, B, A_gpu, B_gpu, iterations, block_size); + memcpy(B, B_gpu, NI * sizeof(DATA_TYPE)); + + // t_start = rtclock(); + // saxpy(A, B, iterations); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(B_gpu, B); + cudaFree(A_gpu); + cudaFree(B_gpu); + endCPU(); + finiTrace(); + + + free(A); + free(B); + return 0; +} diff --git a/workloads/micro/uvm_prefetch/vector_rand/Makefile b/workloads/micro/uvm_prefetch/vector_rand/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..b2abd68d5e6d513a5652b845d6d822b15fc12a59 --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_rand/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := vector_rand +CUFILES := vector_rand.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o vector_rand diff --git a/workloads/micro/uvm_prefetch/vector_rand/run.sh b/workloads/micro/uvm_prefetch/vector_rand/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..61775ede6c26208ae2a150958f8eec5375952773 --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_rand/run.sh @@ -0,0 +1,11 @@ +# ./vector_seq 17179869184 10 524288 + +./vector_seq 8589934592 10 262144 +#nsys profile --stats=true ./vector_seq 1073741824 10 32768 + + +# ./vector_seq 4294967296 10 131072 + +# ./vector_seq 2147483648 10 32768 +#./vector_seq 1073741824 10 32768 +# ./vector_seq 1048576 100 32768 diff --git a/workloads/micro/uvm_prefetch/vector_rand/run_large.sh b/workloads/micro/uvm_prefetch/vector_rand/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..0fbcd5c0eb481d41efff934e3f19f162bc1f73e2 --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_rand/run_large.sh @@ -0,0 +1 @@ +./vector_rand 134217728 100 65536 diff --git a/workloads/micro/uvm_prefetch/vector_rand/run_medium.sh b/workloads/micro/uvm_prefetch/vector_rand/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..84e868f85fbc74d101d476d501095afd4aa6d017 --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_rand/run_medium.sh @@ -0,0 +1 @@ +./vector_rand 16777216 100 32768 diff --git a/workloads/micro/uvm_prefetch/vector_rand/run_mega.sh b/workloads/micro/uvm_prefetch/vector_rand/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..24b084a7613986acafd45cf6ca300fe52f0426d5 --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_rand/run_mega.sh @@ -0,0 +1 @@ +./vector_rand 8589934592 100 262144 diff --git a/workloads/micro/uvm_prefetch/vector_rand/run_small.sh b/workloads/micro/uvm_prefetch/vector_rand/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..baf1b589adcc83b946051cf0fc2383b45746c62f --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_rand/run_small.sh @@ -0,0 +1 @@ +./vector_rand 2097152 100 32768 diff --git a/workloads/micro/uvm_prefetch/vector_rand/run_super.sh b/workloads/micro/uvm_prefetch/vector_rand/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..19be53cddf75e6d6c5812e3ec337cce2fd9079ae --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_rand/run_super.sh @@ -0,0 +1 @@ +./vector_rand 1073741824 100 65536 diff --git a/workloads/micro/uvm_prefetch/vector_rand/run_tiny.sh b/workloads/micro/uvm_prefetch/vector_rand/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..55ee72259022f385aabf8eedc426425e6817835f --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_rand/run_tiny.sh @@ -0,0 +1 @@ +./vector_rand 262144 100 8192 diff --git a/workloads/micro/uvm_prefetch/vector_rand/vector_rand.cu b/workloads/micro/uvm_prefetch/vector_rand/vector_rand.cu new file mode 100644 index 0000000000000000000000000000000000000000..80017e788147f3775afd6775857a8a8cad1efa0a --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_rand/vector_rand.cu @@ -0,0 +1,259 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.005 + +/* Problem size */ +#define SIZE 4096000 +#define ITER 100 +uint64_t NI; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 256 + +#define BATCH_SIZE 16 + +#define LCG_A 1.1f +#define LCG_B 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void saxpy(DATA_TYPE *A, uint64_t iterations) +{ + for (uint64_t i = 0; i < NI; i++) { + for (uint64_t iter = 0; iter < iterations; iter++) { + A[i] = LCG_A * A[i] + LCG_B; + } + } +} + +void initGPU(DATA_TYPE *A_gpu) +{ + for (uint64_t i = 0; i < NI; i++) + { + A_gpu[i] = ((DATA_TYPE)i) / NI; + } + +} + +void initCPU(DATA_TYPE *A) +{ + for (uint64_t i = 0; i < NI; i++) + { + A[i] = ((DATA_TYPE)i) / NI; + } + +} + +void compareResults(DATA_TYPE* A, DATA_TYPE* A_outputFromGpu) +{ + uint64_t fail = 0; + + // Compare C1 and C2 + for (uint64_t i = 0; i < NI; i++) { + // printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + if (percentDiff(A[i], A_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) { + fail++; + printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void vector_rand_kernel(DATA_TYPE *a, uint64_t NI, uint64_t iterations, uint64_t block_size, size_t seed) +{ + // Compute each thread's global row and column index + const uint64_t mem_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + __shared__ DATA_TYPE tmp[mem_size]; + + curandState_t randState; + size_t tx = blockIdx.x * blockDim.x + threadIdx.x; + curand_init(seed, tx, 0, &randState); + size_t idx = 0; + + uint64_t total_tiles = NI / mem_size; + uint64_t base_tiles = total_tiles / gridDim.x; + + uint64_t tiles_this_block = block_size / mem_size; + + uint64_t tile = base_tiles * blockIdx.x; + uint64_t end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + // tmp[i] = a[tile * mem_size + i]; + + idx = curand(&randState); + idx <<= 32; + idx |= curand(&randState); + tmp[i] = a[tile * mem_size + idx % mem_size]; + } + + __syncthreads(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + for (uint64_t iter = 0; iter < iterations; iter++) + { + tmp[i] = LCG_A * tmp[i] + LCG_B; + } + } + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + a[tile * mem_size + idx % mem_size] = tmp[i]; + } + } +} + +void saxpyCuda(DATA_TYPE *A, DATA_TYPE *A_gpu, uint64_t iterations, uint64_t block_size) +{ + double t_start, t_end; + if (block_size <= DIM_THREAD_BLOCK) + block_size = DIM_THREAD_BLOCK; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / block_size); + + //t_start = rtclock(); + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(A_gpu, NI * sizeof(DATA_TYPE), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + vector_rand_kernel<<>>(A_gpu, NI, iterations, block_size, 832945); + cudaDeviceSynchronize(); + + // vector_rand_kernel<<>>(A_gpu, NI, iterations, block_size, 832945); + // cudaDeviceSynchronize(); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + uint64_t iterations = ITER; + uint64_t block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + if (argc >= 4) { + NI = atoll(argv[1]); + iterations = atoi(argv[2]); + block_size = atoi(argv[3]); + } + else { + NI = SIZE; + iterations = ITER; + block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + } + + int nblocks = NI / block_size; + if (nblocks > 64) + { + nblocks = 64; + block_size = NI / nblocks; + } + + double t_start, t_end; + + DATA_TYPE *A; + DATA_TYPE *A_gpu; + + A = (DATA_TYPE *)malloc(NI * sizeof(DATA_TYPE)); + initCPU(A); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI); + + // initGPU(A_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + saxpyCuda(A, A_gpu, iterations, block_size); + memcpy(A, A_gpu, NI * sizeof(DATA_TYPE)); + + // t_start = rtclock(); + //saxpy(A, iterations); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + //compareResults(A_gpu, A); + + cudaFree(A_gpu); + endCPU(); + finiTrace(); + + free(A); + return 0; +} diff --git a/workloads/micro/uvm_prefetch/vector_seq/Makefile b/workloads/micro/uvm_prefetch/vector_seq/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..2d9857cedea5755ad6a381996c578f9cbd41424a --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_seq/Makefile @@ -0,0 +1,109 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := vector_seq +CUFILES := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + + +EXECUTABLE_4096_256 := vector_seq_4096_256 +CUFILES_4096_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=4096 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_2048_256 := vector_seq_2048_256 +CUFILES_2048_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=2048 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_1024_256 := vector_seq_1024_256 +CUFILES_1024_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=1024 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_512_256 := vector_seq_512_256 +CUFILES_512_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=512 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_256_256 := vector_seq_256_256 +CUFILES_256_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=256 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_128_256 := vector_seq_128_256 +CUFILES_128_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=128 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_64_256 := vector_seq_64_256 +CUFILES_64_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_32_256 := vector_seq_32_256 +CUFILES_32_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=32 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_16_256 := vector_seq_16_256 +CUFILES_16_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=16 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + + +EXECUTABLE_1024_4 := vector_seq_1024_4 +CUFILES_1024_4 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=1024 -DBATCH_SIZE=4 + +EXECUTABLE_512_8 := vector_seq_512_8 +CUFILES_512_8 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=512 -DBATCH_SIZE=8 + +EXECUTABLE_256_16 := vector_seq_256_16 +CUFILES_256_16 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_128_32 := vector_seq_128_32 +CUFILES_128_32 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=32 + +EXECUTABLE_64_64 := vector_seq_64_64 +CUFILES_64_64 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=64 -DBATCH_SIZE=64 + +EXECUTABLE_32_128 := vector_seq_32_128 +CUFILES_32_128 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=32 -DBATCH_SIZE=128 + + +EXECUTABLE_2 := vector_seq_2 +CUFILES_2:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=2 + +EXECUTABLE_4 := vector_seq_4 +CUFILES_4:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=4 + +EXECUTABLE_8 := vector_seq_8 +CUFILES_8:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=8 + +EXECUTABLE_16 := vector_seq_16 +CUFILES_16:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=16 + +EXECUTABLE_32 := vector_seq_32 +CUFILES_32:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=32 + +EXECUTABLE_64 := vector_seq_64 +CUFILES_64:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=64 + +EXECUTABLE_128 := vector_seq_128 +CUFILES_128:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=128 + + + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} + + $(NVCC) ${NVCCCFLAGS} ${CUFILES_4096_256} ${DEF} -o ${EXECUTABLE_4096_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_2048_256} ${DEF} -o ${EXECUTABLE_2048_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_1024_256} ${DEF} -o ${EXECUTABLE_1024_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_512_256} ${DEF} -o ${EXECUTABLE_512_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_256_256} ${DEF} -o ${EXECUTABLE_256_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_128_256} ${DEF} -o ${EXECUTABLE_128_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_64_256} ${DEF} -o ${EXECUTABLE_64_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_32_256} ${DEF} -o ${EXECUTABLE_32_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_16_256} ${DEF} -o ${EXECUTABLE_16_256} + + $(NVCC) ${NVCCCFLAGS} ${CUFILES_1024_4} ${DEF} -o ${EXECUTABLE_1024_4} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_512_8} ${DEF} -o ${EXECUTABLE_512_8} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_256_16} ${DEF} -o ${EXECUTABLE_256_16} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_128_32} ${DEF} -o ${EXECUTABLE_128_32} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_64_64} ${DEF} -o ${EXECUTABLE_64_64} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_32_128} ${DEF} -o ${EXECUTABLE_32_128} + + $(NVCC) ${NVCCCFLAGS} ${CUFILES_2} ${DEF} -o ${EXECUTABLE_2} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_4} ${DEF} -o ${EXECUTABLE_4} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_8} ${DEF} -o ${EXECUTABLE_8} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_16} ${DEF} -o ${EXECUTABLE_16} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_32} ${DEF} -o ${EXECUTABLE_32} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_64} ${DEF} -o ${EXECUTABLE_64} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_128} ${DEF} -o ${EXECUTABLE_128} + +clean: + rm -f *.o vector_seq vector_seq_* diff --git a/workloads/micro/uvm_prefetch/vector_seq/page_fault.sh b/workloads/micro/uvm_prefetch/vector_seq/page_fault.sh new file mode 100644 index 0000000000000000000000000000000000000000..2ed3a2da6356c7406b116da1c2fda5c004492992 --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_seq/page_fault.sh @@ -0,0 +1 @@ +nsys profile --force-overwrite=true --cuda-um-gpu-page-faults=true --cuda-um-cpu-page-faults=true ./vector_seq_2 diff --git a/workloads/micro/uvm_prefetch/vector_seq/run.sh b/workloads/micro/uvm_prefetch/vector_seq/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..61775ede6c26208ae2a150958f8eec5375952773 --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_seq/run.sh @@ -0,0 +1,11 @@ +# ./vector_seq 17179869184 10 524288 + +./vector_seq 8589934592 10 262144 +#nsys profile --stats=true ./vector_seq 1073741824 10 32768 + + +# ./vector_seq 4294967296 10 131072 + +# ./vector_seq 2147483648 10 32768 +#./vector_seq 1073741824 10 32768 +# ./vector_seq 1048576 100 32768 diff --git a/workloads/micro/uvm_prefetch/vector_seq/run_large.sh b/workloads/micro/uvm_prefetch/vector_seq/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..66794fb697b9cd4165ae0e85db50fd512c3467e7 --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_seq/run_large.sh @@ -0,0 +1 @@ +./vector_seq 134217728 100 65536 diff --git a/workloads/micro/uvm_prefetch/vector_seq/run_medium.sh b/workloads/micro/uvm_prefetch/vector_seq/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..9bbbb986b9351d0e1c82e9d53d86d4d0f83c7492 --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_seq/run_medium.sh @@ -0,0 +1 @@ +./vector_seq 16777216 100 32768 diff --git a/workloads/micro/uvm_prefetch/vector_seq/run_mega.sh b/workloads/micro/uvm_prefetch/vector_seq/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..7b622d0246ef3a0a542e2422e09099850d52f3d1 --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_seq/run_mega.sh @@ -0,0 +1 @@ +./vector_seq 8589934592 100 262144 diff --git a/workloads/micro/uvm_prefetch/vector_seq/run_small.sh b/workloads/micro/uvm_prefetch/vector_seq/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..da65ab8dbc61ff2e26a2638703dc97d003cb9dba --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_seq/run_small.sh @@ -0,0 +1 @@ +./vector_seq 2097152 100 32768 diff --git a/workloads/micro/uvm_prefetch/vector_seq/run_super.sh b/workloads/micro/uvm_prefetch/vector_seq/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..5a98cb48e7b06fa4db1cb7db99d9a0e8ebcf7f46 --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_seq/run_super.sh @@ -0,0 +1 @@ +./vector_seq 1073741824 100 65536 diff --git a/workloads/micro/uvm_prefetch/vector_seq/run_tiny.sh b/workloads/micro/uvm_prefetch/vector_seq/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..a2f7760fa3fb1a37be61193c663da2e38690bfe3 --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_seq/run_tiny.sh @@ -0,0 +1 @@ +./vector_seq 262144 100 8192 diff --git a/workloads/micro/uvm_prefetch/vector_seq/vector_seq.cu b/workloads/micro/uvm_prefetch/vector_seq/vector_seq.cu new file mode 100644 index 0000000000000000000000000000000000000000..857af3114965fd2ab5c02c4a43fd26536170261d --- /dev/null +++ b/workloads/micro/uvm_prefetch/vector_seq/vector_seq.cu @@ -0,0 +1,253 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.005 + +/* Problem size */ +#define SIZE 1073741824 +#define ITER 100 +uint64_t NI; + +/* Thread block dimensions */ +#ifndef DIM_THREAD_BLOCK +#define DIM_THREAD_BLOCK 256 +#endif + +#ifndef BATCH_SIZE +#define BATCH_SIZE 16 +#endif + +#ifndef NBLOCKS +#define NBLOCKS 64 +#endif + +#define LCG_A 1.1f +#define LCG_B 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void saxpy(DATA_TYPE *A, uint64_t iterations) +{ + for (uint64_t i = 0; i < NI; i++) { + for (uint64_t iter = 0; iter < iterations; iter++) { + A[i] = LCG_A * A[i] + LCG_B; + } + } +} + +void initCPU(DATA_TYPE *A) +{ + for (uint64_t i = 0; i < NI; i++) { + A[i] = ((DATA_TYPE) i) / NI; + } +} + +void initGPU(DATA_TYPE *A_gpu) +{ + for (uint64_t i = 0; i < NI; i++) { + A_gpu[i] = ((DATA_TYPE)i) / NI; + } +} + + +void compareResults(DATA_TYPE* A, DATA_TYPE* A_outputFromGpu) +{ + uint64_t fail = 0; + + // Compare C1 and C2 + for (uint64_t i = 0; i < NI; i++) { + // printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + if (percentDiff(A[i], A_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) { + fail++; + printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void vector_seq_kernel(DATA_TYPE *a, uint64_t NI, uint64_t iterations, uint64_t block_size) +{ + // Compute each thread's global row and column index + const uint64_t mem_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + // __shared__ DATA_TYPE tmp[mem_size]; + extern __shared__ DATA_TYPE tmp[]; + + uint64_t total_tiles = NI / mem_size; + uint64_t base_tiles = total_tiles / gridDim.x; + + uint64_t tiles_this_block = block_size / mem_size; + + uint64_t tile = base_tiles * blockIdx.x; + uint64_t end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + tmp[i] = a[tile * mem_size + i]; + } + + __syncthreads(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + for (uint64_t iter = 0; iter < iterations; iter++) + { + tmp[i] = LCG_A * tmp[i] + LCG_B; + } + } + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + a[tile * mem_size + i] = tmp[i]; + } + } +} + +void saxpyCuda(DATA_TYPE *A, DATA_TYPE *A_gpu, uint64_t iterations, uint64_t block_size) +{ + double t_start, t_end; + if (block_size <= DIM_THREAD_BLOCK) + block_size = DIM_THREAD_BLOCK; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / block_size); + + int MaxBytesofSharedMemory = DIM_THREAD_BLOCK * BATCH_SIZE * sizeof(DATA_TYPE); + cudaFuncSetAttribute(vector_seq_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, MaxBytesofSharedMemory); + + //t_start = rtclock(); + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(A_gpu, NI * sizeof(DATA_TYPE), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + vector_seq_kernel<<>>(A_gpu, NI, iterations, block_size); + cudaDeviceSynchronize(); + + // vector_seq_kernel<<>>(A_gpu, NI, iterations, block_size); + // cudaDeviceSynchronize(); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + uint64_t iterations = ITER; + uint64_t block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + if (argc >= 4) { + NI = atoll(argv[1]); + iterations = atoi(argv[2]); + block_size = atoi(argv[3]); + } + else { + NI = SIZE; + iterations = ITER; + block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + } + + int nblocks = NBLOCKS; + block_size = NI / nblocks; + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE *A_gpu; + + A = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + + initCPU(A); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI); + + // initGPU(A_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + saxpyCuda(A, A_gpu, iterations, block_size); + memcpy(A, A_gpu, NI * sizeof(DATA_TYPE)); + + // t_start = rtclock(); + // saxpy(A, iterations); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(A_gpu, A); + cudaFree(A_gpu); + endCPU(); + finiTrace(); + + free(A); + return 0; +} + diff --git a/workloads/micro/uvm_prefetch_async/2DCONV/2DConvolution.cu b/workloads/micro/uvm_prefetch_async/2DCONV/2DConvolution.cu new file mode 100644 index 0000000000000000000000000000000000000000..a8f04eb44c0ca5810ec3c25297908ba74ac6cafc --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/2DCONV/2DConvolution.cu @@ -0,0 +1,407 @@ +/** + * 2DConvolution.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +#define NBLOCKS 32 +#define BATCH_SIZE 4 + +uint64_t NI; +uint64_t NJ; +uint64_t nblocks; + + +/* Thread block dimensions */ +#define KERNEL 3 +#define DIM_THREAD_BLOCK 8 + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; + +void conv2D(DATA_TYPE* A, DATA_TYPE* B) +{ + uint64_t i, j; + DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + c11 = +0.2; c21 = +0.5; c31 = -0.8; + c12 = -0.3; c22 = +0.6; c32 = -0.9; + c13 = +0.4; c23 = +0.7; c33 = +0.10; + + for (i = 1; i < NI - 1; ++i) // 0 + { + for (j = 1; j < NJ - 1; ++j) // 1 + { + B[i*NJ + j] = c11 * A[(i - 1)*NJ + (j - 1)] + c12 * A[(i + 0)*NJ + (j - 1)] + c13 * A[(i + 1)*NJ + (j - 1)] + + c21 * A[(i - 1)*NJ + (j + 0)] + c22 * A[(i + 0)*NJ + (j + 0)] + c23 * A[(i + 1)*NJ + (j + 0)] + + c31 * A[(i - 1)*NJ + (j + 1)] + c32 * A[(i + 0)*NJ + (j + 1)] + c33 * A[(i + 1)*NJ + (j + 1)]; + } + } +} + +void initGPU(DATA_TYPE* A_gpu) +{ + uint64_t i, j; + + for (i = 0; i < NI; ++i) { + for (j = 0; j < NJ; ++j) { + A_gpu[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } +} + +void initCPU(DATA_TYPE* A) +{ + uint64_t i, j; + + for (i = 0; i < NI; ++i) { + for (j = 0; j < NJ; ++j) { + A[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } +} + + +void compareResults(DATA_TYPE* B, DATA_TYPE* B_outputFromGpu) +{ + uint64_t i, j, fail; + fail = 0; + + // Compare a and b + for (i=1; i < (NI-1); i++) + { + for (j=1; j < (NJ-1); j++) + { + if (percentDiff(B[i*NJ + j], B_outputFromGpu[i*NJ + j]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, CPU is %f, GPU is %f.\n", i, j, B[i * NJ + j], B_outputFromGpu[i * NJ + j]); + fail++; + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); + +} + +__global__ void Convolution2D_kernel(DATA_TYPE *A, DATA_TYPE *B, uint64_t NI, uint64_t NJ, uint64_t block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + uint64_t tile_dim_x = (NJ + DIM_THREAD_BLOCK - 1) / (DIM_THREAD_BLOCK * BATCH_SIZE); + + __shared__ DATA_TYPE tmp_A[PREFETCH_COUNT][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1]; + __shared__ DATA_TYPE tmp_B[DIM_THREAD_BLOCK * BATCH_SIZE][DIM_THREAD_BLOCK * BATCH_SIZE]; + + uint64_t total_tiles = tile_dim_x * tile_dim_x; + + uint64_t tiles_this_block_x = (block_size / (DIM_THREAD_BLOCK * BATCH_SIZE)); + uint64_t tiles_this_block = tiles_this_block_x * tiles_this_block_x; + + // DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + // c11 = +0.2; c21 = +0.5; c31 = -0.8; + // c12 = -0.3; c22 = +0.6; c32 = -0.9; + // c13 = +0.4; c23 = +0.7; c33 = +0.10; + + DATA_TYPE c[KERNEL][KERNEL]; + + c[0][0] = +0.2; + c[1][0] = +0.5; + c[2][0] = -0.8; + c[0][1] = -0.3; + c[1][1] = +0.6; + c[2][1] = -0.9; + c[0][2] = +0.4; + c[1][2] = +0.7; + c[2][2] = +0.10; + + uint64_t base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + uint64_t fetch = base_tile; + uint64_t end_tile = fetch + tiles_this_block; + + for (uint64_t compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + // block id + uint64_t offset = fetch - base_tile; + uint64_t block_id = fetch / tiles_this_block; + uint64_t bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + uint64_t by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + uint64_t batch_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + // thread id + uint64_t tx = threadIdx.x; + uint64_t ty = threadIdx.y; + + uint64_t index_A_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty; + uint64_t index_A_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx; + + uint64_t index_A_y_start = DIM_THREAD_BLOCK * BATCH_SIZE * by; + uint64_t index_A_x_start = DIM_THREAD_BLOCK * BATCH_SIZE * bx; + + uint64_t index_A_y_bound = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * DIM_THREAD_BLOCK; + uint64_t index_A_x_bound = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * DIM_THREAD_BLOCK; + + // fetch A + for (uint64_t i = 0; i < BATCH_SIZE; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE; j++) + { + if ((index_A_y + i) < NI && (index_A_x + j) < NJ) + { + memcpy_async(tmp_A[fetch % PREFETCH_COUNT][ty * BATCH_SIZE + i][tx * BATCH_SIZE + j], A[(index_A_y + i) * NJ + index_A_x + j], pipe); + tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] = 0; + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < KERNEL - 1; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; j++) + { + if ((index_A_y_bound + i) < NI && (index_A_x_start + j) < NJ) + { + memcpy_async(tmp_A[fetch % PREFETCH_COUNT][DIM_THREAD_BLOCK * BATCH_SIZE + i][j], A[(index_A_y_bound + i) * NJ + index_A_x_start + j], pipe); + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; i++) + { + for (uint64_t j = 0; j < KERNEL - 1; j++) + { + if ((index_A_y_start + i) < NI && (index_A_x_bound + j) < NJ) + { + memcpy_async(tmp_A[fetch % PREFETCH_COUNT][i][DIM_THREAD_BLOCK * BATCH_SIZE + j], A[(index_A_y_start + i) * NJ + index_A_x_bound + j], pipe); + } + } + } + pipe.commit(); + } + if (fetch == end_tile) + { + for (uint64_t i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + // block id + uint64_t offset = compute - base_tile; + uint64_t block_id = compute / tiles_this_block; + uint64_t bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + uint64_t by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + // thread id + uint64_t tx = threadIdx.x; + uint64_t ty = threadIdx.y; + + uint64_t index_B_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty + 1; + uint64_t index_B_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx + 1; + + // Computation + for (uint64_t i = 0; i < BATCH_SIZE; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE; j++) + { + tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] = 0; + } + } + block.sync(); + + for (uint64_t i = 0; i < BATCH_SIZE; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE; j++) + { + for (uint64_t m = 0; m < KERNEL; m++) + { + for (uint64_t n = 0; n < KERNEL; n++) + { + tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j] += tmp_A[compute % PREFETCH_COUNT][ty * BATCH_SIZE + i + m][tx * BATCH_SIZE + j + n] * c[n][m]; + } + } + } + } + block.sync(); + + // Store B + for (uint64_t i = 0; i < BATCH_SIZE; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE; j++) + { + if ((index_B_y + i) < NI && (index_B_x + j) < NJ) + { + B[(index_B_y + i) * NJ + index_B_x + j] = tmp_B[ty * BATCH_SIZE + i][tx * BATCH_SIZE + j]; + } + } + } + block.sync(); + } +} + +void convolution2DCuda(DATA_TYPE *A_gpu, DATA_TYPE *B_gpu) +{ + double t_start, t_end; + + uint64_t output_width = NI - KERNEL + 1; + uint64_t output_height = NJ - KERNEL + 1; + + dim3 block(DIM_THREAD_BLOCK, DIM_THREAD_BLOCK); + dim3 grid(nblocks, nblocks); + + uint64_t block_size = (NJ + (nblocks - 1)) / nblocks; + + // t_start = rtclock(); + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(A_gpu, NI * NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(B_gpu, NI * NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + Convolution2D_kernel<<>>(A_gpu, B_gpu, NI, NJ, block_size); + cudaDeviceSynchronize(); + + // Convolution2D_kernel<<>>(A_gpu, B_gpu, NI, NJ, block_size); + // cudaDeviceSynchronize(); + + // t_end = rtclock(); + // fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); //); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + if (argc >= 4) { + NI = atoll(argv[1]); + NJ = atoll(argv[2]); + nblocks = atoi(argv[3]); + } else { + NI = SIZE; + NJ = SIZE; + nblocks = NBLOCKS; + } + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + + A = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + initCPU(A); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NJ); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NI * NJ); + + // initGPU(A_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI*NJ*sizeof(DATA_TYPE)); + // overlapEndCPU(); + + convolution2DCuda(A_gpu, B_gpu); + + memcpy(B, B_gpu, NI * NJ * sizeof(DATA_TYPE)); + + cudaFree(A_gpu); + cudaFree(B_gpu); + + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // conv2D(A, B); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);//); + + // compareResults(B, B_gpu); + + free(A); + free(B); + + return 0; +} diff --git a/workloads/micro/uvm_prefetch_async/2DCONV/Makefile b/workloads/micro/uvm_prefetch_async/2DCONV/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f97459c40d8409dec80056ca77208c75e9d0b5e8 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/2DCONV/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := 2DConvolution +CUFILES := 2DConvolution.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o 2DConvolution diff --git a/workloads/micro/uvm_prefetch_async/2DCONV/run.sh b/workloads/micro/uvm_prefetch_async/2DCONV/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..c4d26406f477f75d294497d89fc0d88c529f475b --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/2DCONV/run.sh @@ -0,0 +1,2 @@ +# ./2DConvolution 16384 16384 32 +./2DConvolution 32768 32768 32 \ No newline at end of file diff --git a/workloads/micro/uvm_prefetch_async/2DCONV/run_large.sh b/workloads/micro/uvm_prefetch_async/2DCONV/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..6e4e06894b252b1d547f335deef142ab01c98df9 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/2DCONV/run_large.sh @@ -0,0 +1 @@ +./2DConvolution 8192 8192 32 diff --git a/workloads/micro/uvm_prefetch_async/2DCONV/run_medium.sh b/workloads/micro/uvm_prefetch_async/2DCONV/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..c246aa18fc41de6e48e7d9ab67f1bbf1925afff6 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/2DCONV/run_medium.sh @@ -0,0 +1 @@ +./2DConvolution 4096 4096 32 diff --git a/workloads/micro/uvm_prefetch_async/2DCONV/run_mega.sh b/workloads/micro/uvm_prefetch_async/2DCONV/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..aa390557108b8621ec9ef8ac80f0a8f085161cce --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/2DCONV/run_mega.sh @@ -0,0 +1 @@ +./2DConvolution 65536 65536 32 diff --git a/workloads/micro/uvm_prefetch_async/2DCONV/run_small.sh b/workloads/micro/uvm_prefetch_async/2DCONV/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..c4b192f75d30e834237d39cfc15c8c57bade3c0e --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/2DCONV/run_small.sh @@ -0,0 +1 @@ +./2DConvolution 1024 1024 8 diff --git a/workloads/micro/uvm_prefetch_async/2DCONV/run_super.sh b/workloads/micro/uvm_prefetch_async/2DCONV/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..8a0981739ace39a1104aa069d6a6f0dfa38fd5c9 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/2DCONV/run_super.sh @@ -0,0 +1 @@ +./2DConvolution 32768 32768 32 diff --git a/workloads/micro/uvm_prefetch_async/2DCONV/run_tiny.sh b/workloads/micro/uvm_prefetch_async/2DCONV/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..d7c49df52c8e594edae710ad71032e912ccd7892 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/2DCONV/run_tiny.sh @@ -0,0 +1 @@ +./2DConvolution 512 512 4 diff --git a/workloads/micro/uvm_prefetch_async/3DCONV/3DConvolution.cu b/workloads/micro/uvm_prefetch_async/3DCONV/3DConvolution.cu new file mode 100644 index 0000000000000000000000000000000000000000..3c886a267cd4224768917af258e8e95e1e8c9157 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/3DCONV/3DConvolution.cu @@ -0,0 +1,459 @@ +/** + * 3DConvolution.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +// define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +#define NBLOCKS 2 +#define BATCH_SIZE 3 + +uint64_t NI; +uint64_t NJ; +uint64_t NK; +uint64_t nblocks; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 4 + +#define KERNEL 3 + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; + + + +void conv3D(DATA_TYPE* A, DATA_TYPE* B) +{ + uint64_t i, j, k; + DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + c11 = +2; c21 = +5; c31 = -8; + c12 = -3; c22 = +6; c32 = -9; + c13 = +4; c23 = +7; c33 = +10; + + for (i = 1; i < NI - 1; ++i) // 0 + { + for (j = 1; j < NJ - 1; ++j) // 1 + { + for (k = 1; k < NK -1; ++k) // 2 + { + B[i*(NK * NJ) + j*NK + k] = c11 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + c13 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + + c21 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + c23 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + + c31 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + c33 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k - 1)] + + c12 * A[(i + 0)*(NK * NJ) + (j - 1)*NK + (k + 0)] + c22 * A[(i + 0)*(NK * NJ) + (j + 0)*NK + (k + 0)] + + c32 * A[(i + 0)*(NK * NJ) + (j + 1)*NK + (k + 0)] + c11 * A[(i - 1)*(NK * NJ) + (j - 1)*NK + (k + 1)] + + c13 * A[(i + 1)*(NK * NJ) + (j - 1)*NK + (k + 1)] + c21 * A[(i - 1)*(NK * NJ) + (j + 0)*NK + (k + 1)] + + c23 * A[(i + 1)*(NK * NJ) + (j + 0)*NK + (k + 1)] + c31 * A[(i - 1)*(NK * NJ) + (j + 1)*NK + (k + 1)] + + c33 * A[(i + 1)*(NK * NJ) + (j + 1)*NK + (k + 1)]; + } + } + } +} + +void initGPU(DATA_TYPE *A_gpu) +{ + uint64_t i, j, k; + + for (i = 0; i < NI; ++i) + { + for (j = 0; j < NJ; ++j) + { + for (k = 0; k < NK; ++k) + { + A_gpu[i * (NK * NJ) + j * NK + k] = i % 12 + 2 * (j % 7) + 3 * (k % 13); + } + } + } +} + +void initCPU(DATA_TYPE *A) +{ + uint64_t i, j, k; + + for (i = 0; i < NI; ++i) + { + for (j = 0; j < NJ; ++j) + { + for (k = 0; k < NK; ++k) + { + A[i*(NK * NJ) + j*NK + k] = i % 12 + 2 * (j % 7) + 3 * (k % 13); + } + } + } +} + + +void compareResults(DATA_TYPE* B, DATA_TYPE* B_outputFromGpu) +{ + uint64_t i, j, k, fail; + fail = 0; + + // Compare result from cpu and gpu... + for (i = 1; i < NI - 1; ++i) // 0 + { + for (j = 1; j < NJ - 1; ++j) // 1 + { + for (k = 1; k < NK - 1; ++k) // 2 + { + if (percentDiff(B[i*(NK * NJ) + j*NK + k], B_outputFromGpu[i*(NK * NJ) + j*NK + k]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, %d, CPU is %f, GPU is %f.\n", i, j, k, B[i * (NK * NJ) + j * NK + k], B_outputFromGpu[i * (NK * NJ) + j * NK + k]); + fail++; + } + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void convolution3D_kernel(DATA_TYPE *A, DATA_TYPE *B, uint64_t NI, uint64_t NJ, uint64_t NK, uint64_t block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + DATA_TYPE c11, c12, c13, c21, c22, c23, c31, c32, c33; + + c11 = +2; + c21 = +5; + c31 = -8; + c12 = -3; + c22 = +6; + c32 = -9; + c13 = +4; + c23 = +7; + c33 = +10; + + uint64_t tile_dim_x = (NJ + DIM_THREAD_BLOCK - 1) / (DIM_THREAD_BLOCK * BATCH_SIZE); + + __shared__ DATA_TYPE tmp_A[PREFETCH_COUNT][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1][DIM_THREAD_BLOCK * BATCH_SIZE + KERNEL - 1]; + __shared__ DATA_TYPE tmp_B[DIM_THREAD_BLOCK * BATCH_SIZE][DIM_THREAD_BLOCK * BATCH_SIZE][DIM_THREAD_BLOCK * BATCH_SIZE]; + + // uint64_t total_tiles = tile_dim_x * tile_dim_x * tile_dim_x; + + uint64_t tiles_this_block_x = (block_size / (DIM_THREAD_BLOCK * BATCH_SIZE)); + uint64_t tiles_this_block = tiles_this_block_x * tiles_this_block_x * tiles_this_block_x; + + uint64_t base_tile = (blockIdx.z * gridDim.y * gridDim.x + blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + uint64_t fetch = base_tile; + uint64_t end_tile = fetch + tiles_this_block; + + // printf("block_size is %d, tile_dim_x is %d, tiles_this_block_x is %d.\n", block_size, tile_dim_x, tiles_this_block_x); + + for (uint64_t compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + // block id + uint64_t offset = fetch - base_tile; + uint64_t block_id = fetch / tiles_this_block; + + uint64_t bz = block_id / (gridDim.y * gridDim.x) * tiles_this_block_x + offset / (tiles_this_block_x * tiles_this_block_x); + uint64_t by = block_id % (gridDim.y * gridDim.x) / gridDim.x * tiles_this_block_x + offset % (tiles_this_block_x * tiles_this_block_x) / tiles_this_block_x; + uint64_t bx = block_id % (gridDim.y * gridDim.x) % gridDim.x * tiles_this_block_x + offset % (tiles_this_block_x * tiles_this_block_x) % tiles_this_block_x; + + // thread id + uint64_t tx = threadIdx.x; + uint64_t ty = threadIdx.y; + uint64_t tz = threadIdx.z; + + uint64_t index_A_z = DIM_THREAD_BLOCK * BATCH_SIZE * bz + BATCH_SIZE * tz; + uint64_t index_A_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty; + uint64_t index_A_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx; + + uint64_t index_A_z_start = DIM_THREAD_BLOCK * BATCH_SIZE * bz; + uint64_t index_A_y_start = DIM_THREAD_BLOCK * BATCH_SIZE * by; + uint64_t index_A_x_start = DIM_THREAD_BLOCK * BATCH_SIZE * bx; + + uint64_t index_A_z_bound = DIM_THREAD_BLOCK * BATCH_SIZE * bz + BATCH_SIZE * DIM_THREAD_BLOCK; + uint64_t index_A_y_bound = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * DIM_THREAD_BLOCK; + uint64_t index_A_x_bound = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * DIM_THREAD_BLOCK; + + // fetch A + for (uint64_t i = 0; i < BATCH_SIZE; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE; j++) + { + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + if ((index_A_z + i) < NI && (index_A_y + j) < NJ && (index_A_x + k) < NK) + { + memcpy_async(tmp_A[fetch % PREFETCH_COUNT][tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k], A[(index_A_z + i) * NJ * NK + (index_A_y + j) * NK + index_A_x + k], pipe); + } + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < KERNEL - 1; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; j++) + { + for (uint64_t k = 0; k < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; k++) + { + if ((index_A_z_bound + i) < NI && (index_A_y_start + j) < NJ && (index_A_x_start + k) < NK) + { + memcpy_async(tmp_A[fetch % PREFETCH_COUNT][DIM_THREAD_BLOCK * BATCH_SIZE + i][j][k], A[(index_A_z_bound + i) * NJ * NK + (index_A_y_start + j) * NK + index_A_x_start + k], pipe); + } + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; i++) + { + for (uint64_t j = 0; j < KERNEL - 1; j++) + { + for (uint64_t k = 0; k < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; k++) + { + if ((index_A_z_start + i) < NI && (index_A_y_bound + j) < NJ && (index_A_x_start + k) < NK) + { + memcpy_async(tmp_A[fetch % PREFETCH_COUNT][i][DIM_THREAD_BLOCK * BATCH_SIZE + j][k], A[(index_A_z_start + i) * NJ * NK + (index_A_y_bound + j) * NK + index_A_x_start + k], pipe); + } + } + } + } + + // fetch A -- padding + for (uint64_t i = 0; i < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE * DIM_THREAD_BLOCK + KERNEL - 1; j++) + { + for (uint64_t k = 0; k < KERNEL - 1; k++) + { + if ((index_A_z_start + i) < NI && (index_A_y_start + j) < NJ && (index_A_x_bound + k) < NK) + { + memcpy_async(tmp_A[fetch % PREFETCH_COUNT][i][j][DIM_THREAD_BLOCK * BATCH_SIZE + k], A[(index_A_z_start + i) * NJ * NK + (index_A_y_start + j) * NK + index_A_x_bound + k], pipe); + } + } + } + } + pipe.commit(); + } + if (fetch == end_tile) + { + for (uint64_t i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + // block id + uint64_t offset = compute - base_tile; + uint64_t block_id = compute / tiles_this_block; + + uint64_t bz = block_id / (gridDim.y * gridDim.x) * tiles_this_block_x + offset / (tiles_this_block_x * tiles_this_block_x); + uint64_t by = block_id % (gridDim.y * gridDim.x) / gridDim.x * tiles_this_block_x + offset % (tiles_this_block_x * tiles_this_block_x) / tiles_this_block_x; + uint64_t bx = block_id % (gridDim.y * gridDim.x) % gridDim.x * tiles_this_block_x + offset % (tiles_this_block_x * tiles_this_block_x) % tiles_this_block_x; + + // thread id + uint64_t tx = threadIdx.x; + uint64_t ty = threadIdx.y; + uint64_t tz = threadIdx.z; + + uint64_t index_B_z = DIM_THREAD_BLOCK * BATCH_SIZE * bz + BATCH_SIZE * tz + 1; + uint64_t index_B_y = DIM_THREAD_BLOCK * BATCH_SIZE * by + BATCH_SIZE * ty + 1; + uint64_t index_B_x = DIM_THREAD_BLOCK * BATCH_SIZE * bx + BATCH_SIZE * tx + 1; + + // Computation + for (uint64_t i = 0; i < BATCH_SIZE; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE; j++) + { + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + tmp_B[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] = 0; + } + } + } + block.sync(); + + for (uint64_t i = 0; i < BATCH_SIZE; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE; j++) + { + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + tmp_B[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] = + c11 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c13 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c21 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c23 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c31 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c33 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k] + c12 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 1][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k + 1] + c22 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 1][ty * BATCH_SIZE + j + 1][tx * BATCH_SIZE + k + 1] + c32 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 1][ty * BATCH_SIZE + j + 2][tx * BATCH_SIZE + k + 1] + c11 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k + 2] + c13 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k + 2] + c21 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i][ty * BATCH_SIZE + j + 1][tx * BATCH_SIZE + k + 2] + c23 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j + 1][tx * BATCH_SIZE + k + 2] + c31 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i][ty * BATCH_SIZE + j + 2][tx * BATCH_SIZE + k + 2] + c33 * tmp_A[compute % PREFETCH_COUNT][tz * BATCH_SIZE + i + 2][ty * BATCH_SIZE + j + 2][tx * BATCH_SIZE + k + 2]; + } + } + } + block.sync(); + + // Store B + for (uint64_t i = 0; i < BATCH_SIZE; i++) + { + for (uint64_t j = 0; j < BATCH_SIZE; j++) + { + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + if ((index_B_z + i + 1) < NI && (index_B_y + j + 1) < NJ && (index_B_x + k + 1) < NK) + { + B[(index_B_z + i) * NJ * NK + (index_B_y + j) * NK + index_B_x + k] = tmp_B[tz * BATCH_SIZE + i][ty * BATCH_SIZE + j][tx * BATCH_SIZE + k]; + } + } + } + } + block.sync(); + } +} + +void convolution3DCuda(DATA_TYPE* A_gpu, DATA_TYPE* B_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK, DIM_THREAD_BLOCK, DIM_THREAD_BLOCK); + dim3 grid(nblocks, nblocks, nblocks); + + uint64_t block_size = (NI + (nblocks - 1)) / nblocks; + + // t_start = rtclock(); + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(A_gpu, NI * NJ * NK * sizeof(DATA_TYPE), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(B_gpu, NI * NJ * NK * sizeof(DATA_TYPE), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + convolution3D_kernel<<>>(A_gpu, B_gpu, NI, NJ, NK, block_size); + cudaDeviceSynchronize(); + + // convolution3D_kernel<<>>(A_gpu, B_gpu, NI, NJ, NK, block_size); + // cudaDeviceSynchronize(); + // t_end = rtclock(); + // fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); + +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + if (argc >= 5) { + NI = atoll(argv[1]); + NJ = atoll(argv[2]); + NK = atoll(argv[3]); + nblocks = atoi(argv[4]); + } else { + NI = SIZE; + NJ = SIZE; + NK = SIZE; + nblocks = NBLOCKS; + } + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + + A = (DATA_TYPE*)malloc(NI*NJ*NK*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NI*NJ*NK*sizeof(DATA_TYPE)); + initCPU(A); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NJ * NK); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NI * NJ * NK); + + // initGPU(A_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * NJ * NK * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + convolution3DCuda(A_gpu, B_gpu); + memcpy(B, B_gpu, NI * NJ * NK * sizeof(DATA_TYPE)); + + cudaFree(A_gpu); + cudaFree(B_gpu); + + endCPU(); + finiTrace(); + + // t_start = rtclock(); + // conv3D(A, B); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(B, B_gpu); + + free(A); + free(B); + + return 0; +} diff --git a/workloads/micro/uvm_prefetch_async/3DCONV/Makefile b/workloads/micro/uvm_prefetch_async/3DCONV/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..72aac9cb83cca03110da33f8da9119f32af90ccd --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/3DCONV/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := 3DConvolution +CUFILES := 3DConvolution.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o 3DConvolution diff --git a/workloads/micro/uvm_prefetch_async/3DCONV/run.sh b/workloads/micro/uvm_prefetch_async/3DCONV/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..0c27d78b8d3484896bd6812043b5466b074cbebf --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/3DCONV/run.sh @@ -0,0 +1,2 @@ +#./3DConvolution 768 768 768 8 +./3DConvolution 1536 1536 1536 8 diff --git a/workloads/micro/uvm_prefetch_async/3DCONV/run_large.sh b/workloads/micro/uvm_prefetch_async/3DCONV/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..500c363302024d6080c025784efbe2e7fef74f53 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/3DCONV/run_large.sh @@ -0,0 +1 @@ +./3DConvolution 384 384 384 8 diff --git a/workloads/micro/uvm_prefetch_async/3DCONV/run_medium.sh b/workloads/micro/uvm_prefetch_async/3DCONV/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..d0a9fb120b6de1c16ae4146d7684c5557af95152 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/3DCONV/run_medium.sh @@ -0,0 +1 @@ +./3DConvolution 192 192 192 4 diff --git a/workloads/micro/uvm_prefetch_async/3DCONV/run_mega.sh b/workloads/micro/uvm_prefetch_async/3DCONV/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..4e21c66c3d1cf21bfe88427d92c54ff8be428d8a --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/3DCONV/run_mega.sh @@ -0,0 +1 @@ +./3DConvolution 1536 1536 1536 8 diff --git a/workloads/micro/uvm_prefetch_async/3DCONV/run_small.sh b/workloads/micro/uvm_prefetch_async/3DCONV/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..f794eec58ed56174c5d02096a9bf5acc4e948d0f --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/3DCONV/run_small.sh @@ -0,0 +1 @@ +./3DConvolution 96 96 96 4 diff --git a/workloads/micro/uvm_prefetch_async/3DCONV/run_super.sh b/workloads/micro/uvm_prefetch_async/3DCONV/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..11f8b43d3e406c466b824420d34991a5c0f876b4 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/3DCONV/run_super.sh @@ -0,0 +1 @@ +./3DConvolution 768 768 768 8 diff --git a/workloads/micro/uvm_prefetch_async/3DCONV/run_tiny.sh b/workloads/micro/uvm_prefetch_async/3DCONV/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..791c84bc54088ea65fc7612ee94442bbbc615cc3 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/3DCONV/run_tiny.sh @@ -0,0 +1 @@ +./3DConvolution 48 48 48 2 diff --git a/workloads/micro/uvm_prefetch_async/gemm/Makefile b/workloads/micro/uvm_prefetch_async/gemm/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d545b0aab3ef63260888227abd6ab99bcaddbff3 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := gemm +CUFILES := gemm.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o gemm diff --git a/workloads/micro/uvm_prefetch_async/gemm/gemm.cu b/workloads/micro/uvm_prefetch_async/gemm/gemm.cu new file mode 100644 index 0000000000000000000000000000000000000000..786bee258fdce3413eca9c47b08c877ad47e7106 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm/gemm.cu @@ -0,0 +1,328 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +uint64_t NI; +uint64_t NJ; +uint64_t NK; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK_X 32 +#define DIM_THREAD_BLOCK_Y 32 + + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void gemm(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i,j,k; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i*NJ + j] *= BETA; + for (k = 0; k < NK; ++k) { + C[i*NJ + j] += ALPHA * A[i*NK + k] * B[k*NJ + j]; + } + } + } +} + +void initGPU(DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NK; j++) { + A_gpu[i * NK + j] = ((DATA_TYPE)i * j) / NI; + } + } + + + for (i = 0; i < NK; i++) { + for (j = 0; j < NJ; j++) { + B_gpu[i * NJ + j] = ((DATA_TYPE)i * j + 1) / NJ; + } + } + + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C_gpu[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + } + } + +} + +void initCPU(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NK; j++) { + A[i * NK + j] = ((DATA_TYPE)i * j) / NI; + } + } + + + for (i = 0; i < NK; i++) { + for (j = 0; j < NJ; j++) { + B[i * NJ + j] = ((DATA_TYPE)i * j + 1) / NJ; + } + } + + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + } + } + +} + + +void compareResults(DATA_TYPE* C, DATA_TYPE* C_outputFromGpu) +{ + uint64_t i, j, fail; + fail = 0; + + // Compare C1 and C2 + for (i=0; i < NI; i++) + { + for (j=0; j < NJ; j++) + { + // printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + if (percentDiff(C[i*NJ + j], C_outputFromGpu[i*NJ + j]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + fail++; + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void gemm_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, uint64_t NI, uint64_t NK, uint64_t NJ) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + + uint64_t row = blockIdx.y * blockDim.y + threadIdx.y; + uint64_t col = blockIdx.x * blockDim.x + threadIdx.x; + + __shared__ DATA_TYPE s_a[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y * PREFETCH_COUNT]; + __shared__ DATA_TYPE s_b[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y * PREFETCH_COUNT]; + + DATA_TYPE tmp = BETA * c[row * NJ + col]; + + uint64_t base_tiles = 0; + uint64_t end_tile = base_tiles + NK / blockDim.x; + + uint64_t fetch = base_tiles; + uint64_t tile_size = DIM_THREAD_BLOCK_X; + uint64_t mem_size = DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y; + + for (uint64_t compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + memcpy_async(s_a[(fetch % PREFETCH_COUNT) * mem_size + (threadIdx.y * blockDim.x + threadIdx.x)], a[row * NK + fetch * tile_size + threadIdx.x], pipe); + memcpy_async(s_b[(fetch % PREFETCH_COUNT) * mem_size + (threadIdx.y * blockDim.x + threadIdx.x)], b[(fetch * tile_size + threadIdx.y) * NJ + col], pipe); + + pipe.commit(); + } + if (fetch == end_tile) + { + for (uint64_t i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + for (uint64_t k = 0; k < blockDim.x; k++) + { + tmp += ALPHA * s_a[(compute % PREFETCH_COUNT) * mem_size + (threadIdx.y * blockDim.x + k)] * s_b[(compute % PREFETCH_COUNT) * mem_size + (k * blockDim.x + threadIdx.x)]; + } + block.sync(); + } + + c[row * NJ + col] = tmp; +} + +void gemmCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + dim3 grid((size_t)(ceil( ((float)NI)/ ((float)block.x) )),(size_t)(ceil( ((float)NJ)/ ((float)block.y) ))); + + //t_start = rtclock(); + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(A_gpu, NI * NK * sizeof(DATA_TYPE), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(B_gpu, NK * NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(C_gpu, NI * NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + gemm_kernel<<>>(A_gpu, B_gpu, C_gpu, NI, NK, NJ); + cudaDeviceSynchronize(); + + + // gemm_kernel<<< grid, block >>>(A_gpu, B_gpu, C_gpu, NI, NK, NJ); + // cudaDeviceSynchronize(); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + if (argc >= 4) { + NI = atoll(argv[1]); + NK = atoll(argv[2]); + NJ = atoll(argv[3]); + } else { + NI = SIZE; + NK = SIZE; + NJ = SIZE; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE* C; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + DATA_TYPE *C_gpu; + + A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE)); + C = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + + initCPU(A,B,C); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + // initGPU(A_gpu, B_gpu, C_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * NK * sizeof(DATA_TYPE)); + memcpy(B_gpu, B, NK * NJ * sizeof(DATA_TYPE)); + memcpy(C_gpu, C, NI * NJ * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + gemmCuda(A, B, C, A_gpu, B_gpu, C_gpu); + memcpy(C, C_gpu, NI * NJ * sizeof(DATA_TYPE)); + + // t_start = rtclock(); + // gemm(A, B, C); // needed to keep benchmark accurate + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(C_gpu, C); + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); + + endCPU(); + finiTrace(); + + free(A); + free(B); + free(C); + return 0; +} + diff --git a/workloads/micro/uvm_prefetch_async/gemm/run.sh b/workloads/micro/uvm_prefetch_async/gemm/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..d234537561c276f291434f42a7337bc5d5a61605 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm/run.sh @@ -0,0 +1,3 @@ +#./gemm 1024 1024 1024 +./gemm 32768 32768 32768 +#./gemm 512 512 512 diff --git a/workloads/micro/uvm_prefetch_async/gemm/run_large.sh b/workloads/micro/uvm_prefetch_async/gemm/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..cc1dbe192ecafb7558b76cfa4d2ce05a15067999 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm/run_large.sh @@ -0,0 +1 @@ +./gemm 8192 8192 8192 diff --git a/workloads/micro/uvm_prefetch_async/gemm/run_medium.sh b/workloads/micro/uvm_prefetch_async/gemm/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..ebae2815e56031234ccdef0d98cb774f111e82a2 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm/run_medium.sh @@ -0,0 +1 @@ +./gemm 4096 4096 4096 diff --git a/workloads/micro/uvm_prefetch_async/gemm/run_mega.sh b/workloads/micro/uvm_prefetch_async/gemm/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..7a7e74e163fa0d0ad774327163771ce8eb23bd79 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm/run_mega.sh @@ -0,0 +1 @@ +./gemm 32768 32768 32768 diff --git a/workloads/micro/uvm_prefetch_async/gemm/run_small.sh b/workloads/micro/uvm_prefetch_async/gemm/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..df5896fd490618e0f8c5608097518b3837b9ad33 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm/run_small.sh @@ -0,0 +1 @@ +./gemm 1024 1024 1024 diff --git a/workloads/micro/uvm_prefetch_async/gemm/run_super.sh b/workloads/micro/uvm_prefetch_async/gemm/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..9736fc5d97c8ef9b55a3157882e05979f65a5011 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm/run_super.sh @@ -0,0 +1 @@ +./gemm 16384 16384 16384 diff --git a/workloads/micro/uvm_prefetch_async/gemm/run_tiny.sh b/workloads/micro/uvm_prefetch_async/gemm/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..9858f12d61d44c0f6265f7c22c27fab446500f55 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm/run_tiny.sh @@ -0,0 +1 @@ +./gemm 512 512 512 diff --git a/workloads/micro/uvm_prefetch_async/gemm_perf/Makefile b/workloads/micro/uvm_prefetch_async/gemm_perf/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d545b0aab3ef63260888227abd6ab99bcaddbff3 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm_perf/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := gemm +CUFILES := gemm.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o gemm diff --git a/workloads/micro/uvm_prefetch_async/gemm_perf/gemm b/workloads/micro/uvm_prefetch_async/gemm_perf/gemm new file mode 100755 index 0000000000000000000000000000000000000000..ad07e5d904355b63f966171e0731ba059b3a3a8b Binary files /dev/null and b/workloads/micro/uvm_prefetch_async/gemm_perf/gemm differ diff --git a/workloads/micro/uvm_prefetch_async/gemm_perf/gemm.cu b/workloads/micro/uvm_prefetch_async/gemm_perf/gemm.cu new file mode 100644 index 0000000000000000000000000000000000000000..0dc800e559a90d1a6f314514f57be8d472121d8c --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm_perf/gemm.cu @@ -0,0 +1,328 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096 +uint64_t NI; +uint64_t NJ; +uint64_t NK; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK_X 32 +#define DIM_THREAD_BLOCK_Y 32 + + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void gemm(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i,j,k; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i*NJ + j] *= BETA; + for (k = 0; k < NK; ++k) { + C[i*NJ + j] += ALPHA * A[i*NK + k] * B[k*NJ + j]; + } + } + } +} + +void initGPU(DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NK; j++) { + A_gpu[i * NK + j] = ((DATA_TYPE)i * j) / NI; + } + } + + + for (i = 0; i < NK; i++) { + for (j = 0; j < NJ; j++) { + B_gpu[i * NJ + j] = ((DATA_TYPE)i * j + 1) / NJ; + } + } + + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C_gpu[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + } + } + +} + +void initCPU(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) { + for (j = 0; j < NK; j++) { + A[i * NK + j] = ((DATA_TYPE)i * j) / NI; + } + } + + + for (i = 0; i < NK; i++) { + for (j = 0; j < NJ; j++) { + B[i * NJ + j] = ((DATA_TYPE)i * j + 1) / NJ; + } + } + + + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + C[i * NJ + j] = ((DATA_TYPE)i * j + 2) / NJ; + } + } + +} + + +void compareResults(DATA_TYPE* C, DATA_TYPE* C_outputFromGpu) +{ + uint64_t i, j, fail; + fail = 0; + + // Compare C1 and C2 + for (i=0; i < NI; i++) + { + for (j=0; j < NJ; j++) + { + // printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + if (percentDiff(C[i*NJ + j], C_outputFromGpu[i*NJ + j]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + printf("%d, %d, GPU is %f, CPU is %f.\n", i, j, C[i*NJ + j], C_outputFromGpu[i*NJ + j]); + fail++; + } + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void gemm_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, uint64_t NI, uint64_t NK, uint64_t NJ) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + + uint64_t row = blockIdx.y * blockDim.y + threadIdx.y; + uint64_t col = blockIdx.x * blockDim.x + threadIdx.x; + + __shared__ DATA_TYPE s_a[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y * PREFETCH_COUNT]; + __shared__ DATA_TYPE s_b[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y * PREFETCH_COUNT]; + + DATA_TYPE tmp = BETA * c[row * NJ + col]; + + uint64_t base_tiles = 0; + uint64_t end_tile = base_tiles + NK / blockDim.x; + + uint64_t fetch = base_tiles; + uint64_t tile_size = DIM_THREAD_BLOCK_X; + uint64_t mem_size = DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y; + + for (uint64_t compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + memcpy_async(s_a[(fetch % PREFETCH_COUNT) * mem_size + (threadIdx.y * blockDim.x + threadIdx.x)], a[row * NK + fetch * tile_size + threadIdx.x], pipe); + memcpy_async(s_b[(fetch % PREFETCH_COUNT) * mem_size + (threadIdx.y * blockDim.x + threadIdx.x)], b[(fetch * tile_size + threadIdx.y) * NJ + col], pipe); + + pipe.commit(); + } + if (fetch == end_tile) + { + for (uint64_t i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + for (uint64_t k = 0; k < blockDim.x; k++) + { + tmp += ALPHA * s_a[(compute % PREFETCH_COUNT) * mem_size + (threadIdx.y * blockDim.x + k)] * s_b[(compute % PREFETCH_COUNT) * mem_size + (k * blockDim.x + threadIdx.x)]; + } + block.sync(); + } + + c[row * NJ + col] = tmp; +} + +void gemmCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + dim3 grid((size_t)(ceil( ((float)NI)/ ((float)block.x) )),(size_t)(ceil( ((float)NJ)/ ((float)block.y) ))); + + //t_start = rtclock(); + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(A_gpu, NI * NK * sizeof(DATA_TYPE), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(B_gpu, NK * NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(C_gpu, NI * NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + gemm_kernel<<>>(A_gpu, B_gpu, C_gpu, NI, NK, NJ); + cudaDeviceSynchronize(); + + + // gemm_kernel<<< grid, block >>>(A_gpu, B_gpu, C_gpu, NI, NK, NJ); + // cudaDeviceSynchronize(); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + if (argc >= 4) { + NI = atoll(argv[1]); + NK = atoll(argv[2]); + NJ = atoll(argv[3]); + } else { + NI = SIZE; + NK = SIZE; + NJ = SIZE; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + DATA_TYPE* C; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + DATA_TYPE *C_gpu; + + A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE)); + C = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); + + initCPU(A,B,C); + GPU_argv_init(); + + //initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NK); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NK * NJ); + cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI * NJ); + + // initGPU(A_gpu, B_gpu, C_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * NK * sizeof(DATA_TYPE)); + memcpy(B_gpu, B, NK * NJ * sizeof(DATA_TYPE)); + memcpy(C_gpu, C, NI * NJ * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + gemmCuda(A, B, C, A_gpu, B_gpu, C_gpu); + memcpy(C, C_gpu, NI * NJ * sizeof(DATA_TYPE)); + + // t_start = rtclock(); + // gemm(A, B, C); // needed to keep benchmark accurate + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(C_gpu, C); + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); + + endCPU(); + //finiTrace(); + + free(A); + free(B); + free(C); + return 0; +} + diff --git a/workloads/micro/uvm_prefetch_async/gemm_perf/run.sh b/workloads/micro/uvm_prefetch_async/gemm_perf/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..d234537561c276f291434f42a7337bc5d5a61605 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm_perf/run.sh @@ -0,0 +1,3 @@ +#./gemm 1024 1024 1024 +./gemm 32768 32768 32768 +#./gemm 512 512 512 diff --git a/workloads/micro/uvm_prefetch_async/gemm_perf/run_large.sh b/workloads/micro/uvm_prefetch_async/gemm_perf/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..cc1dbe192ecafb7558b76cfa4d2ce05a15067999 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm_perf/run_large.sh @@ -0,0 +1 @@ +./gemm 8192 8192 8192 diff --git a/workloads/micro/uvm_prefetch_async/gemm_perf/run_medium.sh b/workloads/micro/uvm_prefetch_async/gemm_perf/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..ebae2815e56031234ccdef0d98cb774f111e82a2 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm_perf/run_medium.sh @@ -0,0 +1 @@ +./gemm 4096 4096 4096 diff --git a/workloads/micro/uvm_prefetch_async/gemm_perf/run_mega.sh b/workloads/micro/uvm_prefetch_async/gemm_perf/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..7a7e74e163fa0d0ad774327163771ce8eb23bd79 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm_perf/run_mega.sh @@ -0,0 +1 @@ +./gemm 32768 32768 32768 diff --git a/workloads/micro/uvm_prefetch_async/gemm_perf/run_small.sh b/workloads/micro/uvm_prefetch_async/gemm_perf/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..df5896fd490618e0f8c5608097518b3837b9ad33 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm_perf/run_small.sh @@ -0,0 +1 @@ +./gemm 1024 1024 1024 diff --git a/workloads/micro/uvm_prefetch_async/gemm_perf/run_super.sh b/workloads/micro/uvm_prefetch_async/gemm_perf/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..9736fc5d97c8ef9b55a3157882e05979f65a5011 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm_perf/run_super.sh @@ -0,0 +1 @@ +./gemm 16384 16384 16384 diff --git a/workloads/micro/uvm_prefetch_async/gemm_perf/run_tiny.sh b/workloads/micro/uvm_prefetch_async/gemm_perf/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..9858f12d61d44c0f6265f7c22c27fab446500f55 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemm_perf/run_tiny.sh @@ -0,0 +1 @@ +./gemm 512 512 512 diff --git a/workloads/micro/uvm_prefetch_async/gemv/Makefile b/workloads/micro/uvm_prefetch_async/gemv/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..005563000dad0469dbf388d54a639e926cf5aa85 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemv/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := gemv +CUFILES := gemv.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o gemv diff --git a/workloads/micro/uvm_prefetch_async/gemv/gemv.cu b/workloads/micro/uvm_prefetch_async/gemv/gemv.cu new file mode 100644 index 0000000000000000000000000000000000000000..9d8dcc080c76c3d8113348eca11adb5bbb6bfc27 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemv/gemv.cu @@ -0,0 +1,312 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +// define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 40960 +uint64_t NI; +uint64_t NJ; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 256 + +#define BATCH_SIZE 16 + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void gemv(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) + { + C[i] *= BETA; + for (j = 0; j < NJ; j++) + { + C[i] += ALPHA * A[i * NJ + j] * B[j]; + } + } +} + +void initGPU(DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) + { + for (j = 0; j < NJ; j++) + { + A_gpu[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } + + for (j = 0; j < NJ; j++) + { + B_gpu[j] = ((DATA_TYPE)j + 1) / NJ; + } + + for (i = 0; i < NI; i++) + { + C_gpu[i] = ((DATA_TYPE)i + 2) / NI; + } +} + +void initCPU(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C) +{ + uint64_t i, j; + + for (i = 0; i < NI; i++) + { + for (j = 0; j < NJ; j++) + { + A[i * NJ + j] = ((DATA_TYPE)i * j) / NI; + } + } + + for (j = 0; j < NJ; j++) + { + B[j] = ((DATA_TYPE)j + 1) / NJ; + } + + for (i = 0; i < NI; i++) + { + C[i] = ((DATA_TYPE)i + 2) / NI; + } +} + +void compareResults(DATA_TYPE *C, DATA_TYPE *C_outputFromGpu) +{ + uint64_t i, fail; + fail = 0; + + // Compare C1 and C2 + for (i = 0; i < NI; i++) + { + if (percentDiff(C[i], C_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) + { + fail++; + printf("%d, GPU is %f, CPU is %f.\n", i, C[i], C_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void gemv_kernel(DATA_TYPE *a, DATA_TYPE *b, DATA_TYPE *c, uint64_t NI, uint64_t NJ) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + uint64_t row = blockIdx.x * blockDim.x + threadIdx.x; + uint64_t tx = threadIdx.x; + + __shared__ DATA_TYPE s_b[PREFETCH_COUNT][DIM_THREAD_BLOCK][BATCH_SIZE]; + + DATA_TYPE tmp = BETA * c[row]; + __syncthreads(); + + uint64_t fetch = 0; + uint64_t end_tile = NJ / BATCH_SIZE; + + for (uint64_t compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + uint64_t base_index = fetch * BATCH_SIZE; + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + memcpy_async(s_b[fetch % PREFETCH_COUNT][tx][k], b[base_index + k], pipe); + } + pipe.commit(); + } + if (fetch == end_tile) + { + for (uint64_t i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + uint64_t base_index = compute * BATCH_SIZE; + for (uint64_t k = 0; k < BATCH_SIZE; k++) + { + tmp += ALPHA * a[row * NJ + base_index + k] * s_b[compute % PREFETCH_COUNT][tx][k]; + } + block.sync(); + } + c[row] = tmp; +} + +void gemvCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *C, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, DATA_TYPE *C_gpu) +{ + double t_start, t_end; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / (DIM_THREAD_BLOCK)); + + // t_start = rtclock(); + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(A_gpu, NI * NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(B_gpu, NJ * sizeof(DATA_TYPE), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(C_gpu, NI * sizeof(DATA_TYPE), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + gemv_kernel<<>>(A_gpu, B_gpu, C_gpu, NI, NJ); + cudaDeviceSynchronize(); + + // gemv_kernel<<>>(A_gpu, B_gpu, C_gpu, NI, NJ); + // cudaDeviceSynchronize(); + // t_end = rtclock(); + + // fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + if (argc >= 3) + { + NI = atoll(argv[1]); + NJ = atoll(argv[2]); + } + else + { + NI = SIZE; + NJ = SIZE; + } + + double t_start, t_end; + + DATA_TYPE *A; + DATA_TYPE *B; + DATA_TYPE *C; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + DATA_TYPE *C_gpu; + + A = (DATA_TYPE *)malloc(NI * NJ * sizeof(DATA_TYPE)); + B = (DATA_TYPE *)malloc(NJ * sizeof(DATA_TYPE)); + C = (DATA_TYPE *)malloc(NI * sizeof(DATA_TYPE)); + + initCPU(A, B, C); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI * NJ); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NJ); + cudaMallocManaged(&C_gpu, sizeof(DATA_TYPE) * NI); + + // initGPU(A_gpu, B_gpu, C_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * NJ * sizeof(DATA_TYPE)); + memcpy(B_gpu, B, NJ * sizeof(DATA_TYPE)); + memcpy(C_gpu, C, NI * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + gemvCuda(A, B, C, A_gpu, B_gpu, C_gpu); + memcpy(C, C_gpu, NI * sizeof(DATA_TYPE)); + + // t_start = rtclock(); + // gemv(A, B, C); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(C_gpu, C); + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); + + endCPU(); + finiTrace(); + + free(A); + free(B); + free(C); + return 0; +} diff --git a/workloads/micro/uvm_prefetch_async/gemv/run.sh b/workloads/micro/uvm_prefetch_async/gemv/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..c75d8f69375fd6c923a93ba88a4cb43238844f8b --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemv/run.sh @@ -0,0 +1,2 @@ +# ./gemv 16384 16384 +./gemv 32768 32768 diff --git a/workloads/micro/uvm_prefetch_async/gemv/run_large.sh b/workloads/micro/uvm_prefetch_async/gemv/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..102aa73e96b74d0d46ef261f0ffd665639679025 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemv/run_large.sh @@ -0,0 +1 @@ +./gemv 8192 8192 diff --git a/workloads/micro/uvm_prefetch_async/gemv/run_medium.sh b/workloads/micro/uvm_prefetch_async/gemv/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..2cc0f68e4d4e96c36ee0d72e64f7acdb7c97233c --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemv/run_medium.sh @@ -0,0 +1 @@ +./gemv 4096 4096 diff --git a/workloads/micro/uvm_prefetch_async/gemv/run_mega.sh b/workloads/micro/uvm_prefetch_async/gemv/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..88ca5ba9468efc68d859100ccbd246b9b3af960b --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemv/run_mega.sh @@ -0,0 +1 @@ +./gemv 65536 65536 diff --git a/workloads/micro/uvm_prefetch_async/gemv/run_small.sh b/workloads/micro/uvm_prefetch_async/gemv/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..55646a647b7f53aff91c5e38562bc449d75daa9a --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemv/run_small.sh @@ -0,0 +1 @@ +./gemv 1024 1024 diff --git a/workloads/micro/uvm_prefetch_async/gemv/run_super.sh b/workloads/micro/uvm_prefetch_async/gemv/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..295d223c3c9c768e58c0fba722fa859b74564d2d --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemv/run_super.sh @@ -0,0 +1 @@ +./gemv 32768 32768 diff --git a/workloads/micro/uvm_prefetch_async/gemv/run_tiny.sh b/workloads/micro/uvm_prefetch_async/gemv/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..60becef20c6cc3113ff1b4897d177ff3cbd77eb8 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/gemv/run_tiny.sh @@ -0,0 +1 @@ +./gemv 512 512 diff --git a/workloads/micro/uvm_prefetch_async/saxpy/Makefile b/workloads/micro/uvm_prefetch_async/saxpy/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..069a5001c286cb5f44c4686449f04755cd5a2e52 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/saxpy/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := saxpy +CUFILES := saxpy.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o saxpy diff --git a/workloads/micro/uvm_prefetch_async/saxpy/run.sh b/workloads/micro/uvm_prefetch_async/saxpy/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..61775ede6c26208ae2a150958f8eec5375952773 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/saxpy/run.sh @@ -0,0 +1,11 @@ +# ./vector_seq 17179869184 10 524288 + +./vector_seq 8589934592 10 262144 +#nsys profile --stats=true ./vector_seq 1073741824 10 32768 + + +# ./vector_seq 4294967296 10 131072 + +# ./vector_seq 2147483648 10 32768 +#./vector_seq 1073741824 10 32768 +# ./vector_seq 1048576 100 32768 diff --git a/workloads/micro/uvm_prefetch_async/saxpy/run_large.sh b/workloads/micro/uvm_prefetch_async/saxpy/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..bce2787645ea40cb94ba004ec19cf4728ba48647 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/saxpy/run_large.sh @@ -0,0 +1 @@ +./saxpy 134217728 100 65536 diff --git a/workloads/micro/uvm_prefetch_async/saxpy/run_medium.sh b/workloads/micro/uvm_prefetch_async/saxpy/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..af6f429a95aa9d5ede352e30e59405aaee4ee55b --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/saxpy/run_medium.sh @@ -0,0 +1 @@ +./saxpy 16777216 100 32768 diff --git a/workloads/micro/uvm_prefetch_async/saxpy/run_mega.sh b/workloads/micro/uvm_prefetch_async/saxpy/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..5dce5483842d255b4130f7ca89b4894c65b5a44b --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/saxpy/run_mega.sh @@ -0,0 +1 @@ +./saxpy 4294967296 100 262144 diff --git a/workloads/micro/uvm_prefetch_async/saxpy/run_small.sh b/workloads/micro/uvm_prefetch_async/saxpy/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..c927e5e53c30ca58a12dbdf95e264a4b790ab0b8 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/saxpy/run_small.sh @@ -0,0 +1 @@ +./saxpy 2097152 100 32768 diff --git a/workloads/micro/uvm_prefetch_async/saxpy/run_super.sh b/workloads/micro/uvm_prefetch_async/saxpy/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..7f8d7b99b0be6cc643cd686965806f7edbc0af27 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/saxpy/run_super.sh @@ -0,0 +1 @@ +./saxpy 1073741824 100 65536 diff --git a/workloads/micro/uvm_prefetch_async/saxpy/run_tiny.sh b/workloads/micro/uvm_prefetch_async/saxpy/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..4604f1747023f34e81c15ce89c4be5a6fb39bc95 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/saxpy/run_tiny.sh @@ -0,0 +1 @@ +./saxpy 262144 100 8192 diff --git a/workloads/micro/uvm_prefetch_async/saxpy/saxpy.cu b/workloads/micro/uvm_prefetch_async/saxpy/saxpy.cu new file mode 100644 index 0000000000000000000000000000000000000000..19fc166ff46d47cd414ec1dd2904b04f9ed4b2d8 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/saxpy/saxpy.cu @@ -0,0 +1,289 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.05 + +/* Problem size */ +#define SIZE 4096000 +#define ITER 100 +uint64_t NI; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 256 + +#define BATCH_SIZE 8 + +/* Declared constant values for ALPHA and BETA (same as values in PolyBench 2.0) */ +#define ALPHA 1.1f +#define BETA 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void saxpy(DATA_TYPE *A, DATA_TYPE *B, uint64_t iterations) +{ + for (uint64_t i = 0; i < NI; i++) { + for (uint64_t iter = 0; iter < iterations; iter++) { + B[i] = ALPHA * A[i] + B[i]; + } + } +} + +void initGPU(DATA_TYPE *A_gpu, DATA_TYPE *B_gpu) +{ + for (uint64_t i = 0; i < NI; i++) { + A_gpu[i] = ((DATA_TYPE)i) / NI; + } + for (uint64_t i = 0; i < NI; i++) { + B_gpu[i] = ((DATA_TYPE)i + 2) / NI; + } + +} + +void initCPU(DATA_TYPE *A, DATA_TYPE *B) +{ + for (uint64_t i = 0; i < NI; i++) { + A[i] = ((DATA_TYPE)i) / NI; + } + for (uint64_t i = 0; i < NI; i++) { + B[i] = ((DATA_TYPE)i + 2) / NI; + } +} + +void compareResults(DATA_TYPE* B, DATA_TYPE* B_outputFromGpu) +{ + uint64_t fail = 0; + + // Compare C1 and C2 + for (uint64_t i = 0; i < NI; i++) { + // printf("%lld, GPU is %f, CPU is %f.\n", i, B[i], B_outputFromGpu[i]); + if (percentDiff(B[i], B_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) { + fail++; + printf("%lld, GPU is %f, CPU is %f.\n", i, B[i], B_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void saxpy_kernel(DATA_TYPE *a, DATA_TYPE *b, uint64_t NI, uint64_t iterations, uint64_t block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + + // Compute each thread's global row and column index + const uint64_t mem_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + __shared__ DATA_TYPE tmp_a[mem_size * PREFETCH_COUNT]; + __shared__ DATA_TYPE tmp_b[mem_size * PREFETCH_COUNT]; + + uint64_t total_tiles = NI / mem_size; + uint64_t base_tiles = total_tiles / gridDim.x; + + uint64_t tiles_this_block = block_size / mem_size; + + uint64_t fetch = base_tiles * blockIdx.x; + uint64_t end_tile = fetch + tiles_this_block; + + for (uint64_t compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + memcpy_async(tmp_a[(fetch % PREFETCH_COUNT) * mem_size + i], a[fetch * mem_size + i], pipe); + memcpy_async(tmp_b[(fetch % PREFETCH_COUNT) * mem_size + i], b[fetch * mem_size + i], pipe); + } + pipe.commit(); + } + if (fetch == end_tile) + { + for (uint64_t i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + for (uint64_t iter = 0; iter < iterations; iter++) + { + tmp_b[(compute % PREFETCH_COUNT) * mem_size + i] += ALPHA * tmp_a[(compute % PREFETCH_COUNT) * mem_size + i]; + } + } + block.sync(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + b[compute * mem_size + i] = tmp_b[(compute % PREFETCH_COUNT) * mem_size + i]; + } + block.sync(); + } +} + +void saxpyCuda(DATA_TYPE *A, DATA_TYPE *B, DATA_TYPE *A_gpu, DATA_TYPE *B_gpu, uint64_t iterations, uint64_t block_size) +{ + double t_start, t_end; + if (block_size <= DIM_THREAD_BLOCK) + block_size = DIM_THREAD_BLOCK; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / block_size); + + //t_start = rtclock(); + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(A_gpu, NI * sizeof(DATA_TYPE), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(B_gpu, NI * sizeof(DATA_TYPE), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + saxpy_kernel<<>>(A_gpu, B_gpu, NI, iterations, block_size); + cudaDeviceSynchronize(); + + // saxpy_kernel<<>>(A_gpu, B_gpu, NI, iterations, block_size); + // cudaDeviceSynchronize(); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + uint64_t iterations = ITER; + uint64_t block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + if (argc >= 4) { + NI = atoll(argv[1]); + iterations = atoi(argv[2]); + block_size = atoi(argv[3]); + } else { + NI = SIZE; + iterations = ITER; + block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + } + + int nblocks = NI / block_size; + if (nblocks > 64) + { + nblocks = 64; + block_size = NI / nblocks; + } + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE* B; + + DATA_TYPE *A_gpu; + DATA_TYPE *B_gpu; + + A = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + B = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + + initCPU(A,B); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI); + cudaMallocManaged(&B_gpu, sizeof(DATA_TYPE) * NI); + + // initGPU(A_gpu, B_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * sizeof(DATA_TYPE)); + memcpy(B_gpu, B, NI * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + saxpyCuda(A, B, A_gpu, B_gpu, iterations, block_size); + memcpy(B, B_gpu, NI * sizeof(DATA_TYPE)); + + // t_start = rtclock(); + // saxpy(A, B, iterations); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(B_gpu, B); + cudaFree(A_gpu); + cudaFree(B_gpu); + endCPU(); + finiTrace(); + + free(A); + free(B); + return 0; +} diff --git a/workloads/micro/uvm_prefetch_async/vector_rand/Makefile b/workloads/micro/uvm_prefetch_async/vector_rand/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..b2abd68d5e6d513a5652b845d6d822b15fc12a59 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_rand/Makefile @@ -0,0 +1,12 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := vector_rand +CUFILES := vector_rand.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o vector_rand diff --git a/workloads/micro/uvm_prefetch_async/vector_rand/run.sh b/workloads/micro/uvm_prefetch_async/vector_rand/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..61775ede6c26208ae2a150958f8eec5375952773 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_rand/run.sh @@ -0,0 +1,11 @@ +# ./vector_seq 17179869184 10 524288 + +./vector_seq 8589934592 10 262144 +#nsys profile --stats=true ./vector_seq 1073741824 10 32768 + + +# ./vector_seq 4294967296 10 131072 + +# ./vector_seq 2147483648 10 32768 +#./vector_seq 1073741824 10 32768 +# ./vector_seq 1048576 100 32768 diff --git a/workloads/micro/uvm_prefetch_async/vector_rand/run_large.sh b/workloads/micro/uvm_prefetch_async/vector_rand/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..0fbcd5c0eb481d41efff934e3f19f162bc1f73e2 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_rand/run_large.sh @@ -0,0 +1 @@ +./vector_rand 134217728 100 65536 diff --git a/workloads/micro/uvm_prefetch_async/vector_rand/run_medium.sh b/workloads/micro/uvm_prefetch_async/vector_rand/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..84e868f85fbc74d101d476d501095afd4aa6d017 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_rand/run_medium.sh @@ -0,0 +1 @@ +./vector_rand 16777216 100 32768 diff --git a/workloads/micro/uvm_prefetch_async/vector_rand/run_mega.sh b/workloads/micro/uvm_prefetch_async/vector_rand/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..24b084a7613986acafd45cf6ca300fe52f0426d5 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_rand/run_mega.sh @@ -0,0 +1 @@ +./vector_rand 8589934592 100 262144 diff --git a/workloads/micro/uvm_prefetch_async/vector_rand/run_small.sh b/workloads/micro/uvm_prefetch_async/vector_rand/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..baf1b589adcc83b946051cf0fc2383b45746c62f --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_rand/run_small.sh @@ -0,0 +1 @@ +./vector_rand 2097152 100 32768 diff --git a/workloads/micro/uvm_prefetch_async/vector_rand/run_super.sh b/workloads/micro/uvm_prefetch_async/vector_rand/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..19be53cddf75e6d6c5812e3ec337cce2fd9079ae --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_rand/run_super.sh @@ -0,0 +1 @@ +./vector_rand 1073741824 100 65536 diff --git a/workloads/micro/uvm_prefetch_async/vector_rand/run_tiny.sh b/workloads/micro/uvm_prefetch_async/vector_rand/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..55ee72259022f385aabf8eedc426425e6817835f --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_rand/run_tiny.sh @@ -0,0 +1 @@ +./vector_rand 262144 100 8192 diff --git a/workloads/micro/uvm_prefetch_async/vector_rand/vector_rand.cu b/workloads/micro/uvm_prefetch_async/vector_rand/vector_rand.cu new file mode 100644 index 0000000000000000000000000000000000000000..37e825cb898d71328641a2854d08da444a22fdf3 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_rand/vector_rand.cu @@ -0,0 +1,281 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.005 + +/* Problem size */ +#define SIZE 4096000 +#define ITER 100 +uint64_t NI; + +/* Thread block dimensions */ +#define DIM_THREAD_BLOCK 256 + +#define BATCH_SIZE 16 + +#define LCG_A 1.1f +#define LCG_B 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void saxpy(DATA_TYPE *A, uint64_t iterations) +{ + for (uint64_t i = 0; i < NI; i++) { + for (uint64_t iter = 0; iter < iterations; iter++) { + A[i] = LCG_A * A[i] + LCG_B; + } + } +} + +void initGPU(DATA_TYPE *A_gpu) +{ + for (uint64_t i = 0; i < NI; i++) + { + A_gpu[i] = ((DATA_TYPE)i) / NI; + } + +} + +void initCPU(DATA_TYPE *A) +{ + for (uint64_t i = 0; i < NI; i++) + { + A[i] = ((DATA_TYPE)i) / NI; + } + +} + +void compareResults(DATA_TYPE* A, DATA_TYPE* A_outputFromGpu) +{ + uint64_t fail = 0; + + // Compare C1 and C2 + for (uint64_t i = 0; i < NI; i++) { + // printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + if (percentDiff(A[i], A_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) { + fail++; + printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void vector_rand_kernel(DATA_TYPE *a, uint64_t NI, uint64_t iterations, uint64_t block_size, size_t seed) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + + const uint64_t mem_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + __shared__ DATA_TYPE tmp[mem_size * PREFETCH_COUNT]; + + curandState_t randState; + size_t tx = blockIdx.x * blockDim.x + threadIdx.x; + curand_init(seed, tx, 0, &randState); + size_t idx = 0; + + uint64_t total_tiles = NI / mem_size; + uint64_t base_tiles = total_tiles / gridDim.x; + + uint64_t tiles_this_block = block_size / mem_size; + + uint64_t fetch = base_tiles * blockIdx.x; + uint64_t end_tile = fetch + tiles_this_block; + + for (uint64_t compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + idx = curand(&randState); + idx <<= 32; + idx |= curand(&randState); + + memcpy_async(tmp[(fetch % PREFETCH_COUNT) * mem_size + i], a[fetch * mem_size + idx % mem_size], pipe); + } + pipe.commit(); + } + if (fetch == end_tile) + { + for (uint64_t i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + for (uint64_t iter = 0; iter < iterations; iter++) + { + tmp[(compute % PREFETCH_COUNT) * mem_size + i] = LCG_A * tmp[(compute % PREFETCH_COUNT) * mem_size + i] + LCG_B; + } + } + block.sync(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + a[compute * mem_size + i] = tmp[(compute % PREFETCH_COUNT) * mem_size + idx % mem_size]; + } + block.sync(); + } +} + +void saxpyCuda(DATA_TYPE *A, DATA_TYPE *A_gpu, uint64_t iterations, uint64_t block_size) +{ + double t_start, t_end; + if (block_size <= DIM_THREAD_BLOCK) + block_size = DIM_THREAD_BLOCK; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / block_size); + + //t_start = rtclock(); + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(A_gpu, NI * sizeof(DATA_TYPE), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + vector_rand_kernel<<>>(A_gpu, NI, iterations, block_size, 832945); + cudaDeviceSynchronize(); + + // vector_rand_kernel<<>>(A_gpu, NI, iterations, block_size, 832945); + // cudaDeviceSynchronize(); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + uint64_t iterations = ITER; + uint64_t block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + if (argc >= 4) { + NI = atoll(argv[1]); + iterations = atoi(argv[2]); + block_size = atoi(argv[3]); + } + else { + NI = SIZE; + iterations = ITER; + block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + } + + int nblocks = NI / block_size; + if (nblocks > 64) + { + nblocks = 64; + block_size = NI / nblocks; + } + + double t_start, t_end; + + DATA_TYPE *A; + DATA_TYPE *A_gpu; + + A = (DATA_TYPE *)malloc(NI * sizeof(DATA_TYPE)); + initCPU(A); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI); + + // initGPU(A_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + saxpyCuda(A, A_gpu, iterations, block_size); + memcpy(A, A_gpu, NI * sizeof(DATA_TYPE)); + + // t_start = rtclock(); + //saxpy(A, iterations); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + //compareResults(A_gpu, A); + + cudaFree(A_gpu); + endCPU(); + finiTrace(); + + free(A); + return 0; +} diff --git a/workloads/micro/uvm_prefetch_async/vector_seq/Makefile b/workloads/micro/uvm_prefetch_async/vector_seq/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..2d9857cedea5755ad6a381996c578f9cbd41424a --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_seq/Makefile @@ -0,0 +1,109 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := vector_seq +CUFILES := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + + +EXECUTABLE_4096_256 := vector_seq_4096_256 +CUFILES_4096_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=4096 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_2048_256 := vector_seq_2048_256 +CUFILES_2048_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=2048 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_1024_256 := vector_seq_1024_256 +CUFILES_1024_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=1024 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_512_256 := vector_seq_512_256 +CUFILES_512_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=512 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_256_256 := vector_seq_256_256 +CUFILES_256_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=256 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_128_256 := vector_seq_128_256 +CUFILES_128_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=128 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_64_256 := vector_seq_64_256 +CUFILES_64_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_32_256 := vector_seq_32_256 +CUFILES_32_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=32 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_16_256 := vector_seq_16_256 +CUFILES_16_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=16 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + + +EXECUTABLE_1024_4 := vector_seq_1024_4 +CUFILES_1024_4 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=1024 -DBATCH_SIZE=4 + +EXECUTABLE_512_8 := vector_seq_512_8 +CUFILES_512_8 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=512 -DBATCH_SIZE=8 + +EXECUTABLE_256_16 := vector_seq_256_16 +CUFILES_256_16 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 + +EXECUTABLE_128_32 := vector_seq_128_32 +CUFILES_128_32 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=32 + +EXECUTABLE_64_64 := vector_seq_64_64 +CUFILES_64_64 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=64 -DBATCH_SIZE=64 + +EXECUTABLE_32_128 := vector_seq_32_128 +CUFILES_32_128 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=32 -DBATCH_SIZE=128 + + +EXECUTABLE_2 := vector_seq_2 +CUFILES_2:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=2 + +EXECUTABLE_4 := vector_seq_4 +CUFILES_4:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=4 + +EXECUTABLE_8 := vector_seq_8 +CUFILES_8:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=8 + +EXECUTABLE_16 := vector_seq_16 +CUFILES_16:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=16 + +EXECUTABLE_32 := vector_seq_32 +CUFILES_32:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=32 + +EXECUTABLE_64 := vector_seq_64 +CUFILES_64:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=64 + +EXECUTABLE_128 := vector_seq_128 +CUFILES_128:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=128 + + + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} + + $(NVCC) ${NVCCCFLAGS} ${CUFILES_4096_256} ${DEF} -o ${EXECUTABLE_4096_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_2048_256} ${DEF} -o ${EXECUTABLE_2048_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_1024_256} ${DEF} -o ${EXECUTABLE_1024_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_512_256} ${DEF} -o ${EXECUTABLE_512_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_256_256} ${DEF} -o ${EXECUTABLE_256_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_128_256} ${DEF} -o ${EXECUTABLE_128_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_64_256} ${DEF} -o ${EXECUTABLE_64_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_32_256} ${DEF} -o ${EXECUTABLE_32_256} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_16_256} ${DEF} -o ${EXECUTABLE_16_256} + + $(NVCC) ${NVCCCFLAGS} ${CUFILES_1024_4} ${DEF} -o ${EXECUTABLE_1024_4} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_512_8} ${DEF} -o ${EXECUTABLE_512_8} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_256_16} ${DEF} -o ${EXECUTABLE_256_16} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_128_32} ${DEF} -o ${EXECUTABLE_128_32} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_64_64} ${DEF} -o ${EXECUTABLE_64_64} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_32_128} ${DEF} -o ${EXECUTABLE_32_128} + + $(NVCC) ${NVCCCFLAGS} ${CUFILES_2} ${DEF} -o ${EXECUTABLE_2} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_4} ${DEF} -o ${EXECUTABLE_4} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_8} ${DEF} -o ${EXECUTABLE_8} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_16} ${DEF} -o ${EXECUTABLE_16} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_32} ${DEF} -o ${EXECUTABLE_32} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_64} ${DEF} -o ${EXECUTABLE_64} + $(NVCC) ${NVCCCFLAGS} ${CUFILES_128} ${DEF} -o ${EXECUTABLE_128} + +clean: + rm -f *.o vector_seq vector_seq_* diff --git a/workloads/micro/uvm_prefetch_async/vector_seq/run.sh b/workloads/micro/uvm_prefetch_async/vector_seq/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..61775ede6c26208ae2a150958f8eec5375952773 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_seq/run.sh @@ -0,0 +1,11 @@ +# ./vector_seq 17179869184 10 524288 + +./vector_seq 8589934592 10 262144 +#nsys profile --stats=true ./vector_seq 1073741824 10 32768 + + +# ./vector_seq 4294967296 10 131072 + +# ./vector_seq 2147483648 10 32768 +#./vector_seq 1073741824 10 32768 +# ./vector_seq 1048576 100 32768 diff --git a/workloads/micro/uvm_prefetch_async/vector_seq/run_large.sh b/workloads/micro/uvm_prefetch_async/vector_seq/run_large.sh new file mode 100755 index 0000000000000000000000000000000000000000..66794fb697b9cd4165ae0e85db50fd512c3467e7 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_seq/run_large.sh @@ -0,0 +1 @@ +./vector_seq 134217728 100 65536 diff --git a/workloads/micro/uvm_prefetch_async/vector_seq/run_medium.sh b/workloads/micro/uvm_prefetch_async/vector_seq/run_medium.sh new file mode 100755 index 0000000000000000000000000000000000000000..9bbbb986b9351d0e1c82e9d53d86d4d0f83c7492 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_seq/run_medium.sh @@ -0,0 +1 @@ +./vector_seq 16777216 100 32768 diff --git a/workloads/micro/uvm_prefetch_async/vector_seq/run_mega.sh b/workloads/micro/uvm_prefetch_async/vector_seq/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..7b622d0246ef3a0a542e2422e09099850d52f3d1 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_seq/run_mega.sh @@ -0,0 +1 @@ +./vector_seq 8589934592 100 262144 diff --git a/workloads/micro/uvm_prefetch_async/vector_seq/run_small.sh b/workloads/micro/uvm_prefetch_async/vector_seq/run_small.sh new file mode 100755 index 0000000000000000000000000000000000000000..da65ab8dbc61ff2e26a2638703dc97d003cb9dba --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_seq/run_small.sh @@ -0,0 +1 @@ +./vector_seq 2097152 100 32768 diff --git a/workloads/micro/uvm_prefetch_async/vector_seq/run_super.sh b/workloads/micro/uvm_prefetch_async/vector_seq/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..5a98cb48e7b06fa4db1cb7db99d9a0e8ebcf7f46 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_seq/run_super.sh @@ -0,0 +1 @@ +./vector_seq 1073741824 100 65536 diff --git a/workloads/micro/uvm_prefetch_async/vector_seq/run_tiny.sh b/workloads/micro/uvm_prefetch_async/vector_seq/run_tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..a2f7760fa3fb1a37be61193c663da2e38690bfe3 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_seq/run_tiny.sh @@ -0,0 +1 @@ +./vector_seq 262144 100 8192 diff --git a/workloads/micro/uvm_prefetch_async/vector_seq/vector_seq.cu b/workloads/micro/uvm_prefetch_async/vector_seq/vector_seq.cu new file mode 100644 index 0000000000000000000000000000000000000000..f1d72a79c24a242baa6ab2253eb7654fc116fe00 --- /dev/null +++ b/workloads/micro/uvm_prefetch_async/vector_seq/vector_seq.cu @@ -0,0 +1,277 @@ +/** + * gemm.cu: This file is part of the PolyBench/GPU 1.0 test suite. + * + * + * Contact: Scott Grauer-Gray + * Louis-Noel Pouchet + * Web address: http://www.cse.ohio-state.edu/~pouchet/software/polybench/GPU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SMALL_FLOAT_VAL 0.00000001f + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + uint64_t stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +float absVal(float a) +{ + if (a < 0) + { + return (a * -1); + } + else + { + return a; + } +} + +float percentDiff(double val1, double val2) +{ + if ((absVal(val1) < 0.01) && (absVal(val2) < 0.01)) + { + return 0.0f; + } + + else + { + return 100.0f * (absVal(absVal(val1 - val2) / absVal(val1 + SMALL_FLOAT_VAL))); + } +} + +//define the error threshold for the results "not matching" +#define PERCENT_DIFF_ERROR_THRESHOLD 0.005 + +/* Problem size */ +#define SIZE 1073741824 +#define ITER 100 +uint64_t NI; + +/* Thread block dimensions */ +#ifndef DIM_THREAD_BLOCK +#define DIM_THREAD_BLOCK 256 +#endif + +#ifndef BATCH_SIZE +#define BATCH_SIZE 16 +#endif + +#ifndef NBLOCKS +#define NBLOCKS 64 +#endif + +#define LCG_A 1.1f +#define LCG_B 1.1f + +/* Can switch DATA_TYPE between float and double */ +typedef float DATA_TYPE; +// typedef uint64_t DATA_TYPE; + +void saxpy(DATA_TYPE *A, uint64_t iterations) +{ + for (uint64_t i = 0; i < NI; i++) { + for (uint64_t iter = 0; iter < iterations; iter++) { + A[i] = LCG_A * A[i] + LCG_B; + } + } +} + +void initCPU(DATA_TYPE *A) +{ + for (uint64_t i = 0; i < NI; i++) { + A[i] = ((DATA_TYPE) i) / NI; + } +} + +void initGPU(DATA_TYPE *A_gpu) +{ + for (uint64_t i = 0; i < NI; i++) { + A_gpu[i] = ((DATA_TYPE)i) / NI; + } +} + + +void compareResults(DATA_TYPE* A, DATA_TYPE* A_outputFromGpu) +{ + uint64_t fail = 0; + + // Compare C1 and C2 + for (uint64_t i = 0; i < NI; i++) { + // printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + if (percentDiff(A[i], A_outputFromGpu[i]) > PERCENT_DIFF_ERROR_THRESHOLD) { + fail++; + printf("%lld, GPU is %f, CPU is %f.\n", i, A[i], A_outputFromGpu[i]); + } + } + + // Print results + printf("Non-Matching CPU-GPU Outputs Beyond Error Threshold of %4.2f Percent: %d\n", PERCENT_DIFF_ERROR_THRESHOLD, fail); +} + +__global__ void vector_seq_kernel(DATA_TYPE *a, uint64_t NI, uint64_t iterations, uint64_t block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + + const uint64_t mem_size = DIM_THREAD_BLOCK * BATCH_SIZE; + + // __shared__ DATA_TYPE tmp[mem_size * PREFETCH_COUNT]; + extern __shared__ DATA_TYPE tmp[]; + + uint64_t total_tiles = NI / mem_size; + uint64_t base_tiles = total_tiles / gridDim.x; + + uint64_t tiles_this_block = block_size / mem_size; + + uint64_t fetch = base_tiles * blockIdx.x; + uint64_t end_tile = fetch + tiles_this_block; + + for (uint64_t compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + memcpy_async(tmp[(fetch % PREFETCH_COUNT) * mem_size + i], a[fetch * mem_size + i], pipe); + } + pipe.commit(); + } + if (fetch == end_tile) + { + for (uint64_t i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + for (uint64_t iter = 0; iter < iterations; iter++) + { + tmp[(compute % PREFETCH_COUNT) * mem_size + i] = LCG_A * tmp[(compute % PREFETCH_COUNT) * mem_size + i] + LCG_B; + } + } + block.sync(); + + for (uint64_t i = threadIdx.x; i < mem_size; i += blockDim.x) + { + a[compute * mem_size + i] = tmp[(compute % PREFETCH_COUNT) * mem_size + i]; + } + block.sync(); + } +} + +void saxpyCuda(DATA_TYPE *A, DATA_TYPE *A_gpu, uint64_t iterations, uint64_t block_size) +{ + double t_start, t_end; + if (block_size <= DIM_THREAD_BLOCK) + block_size = DIM_THREAD_BLOCK; + + dim3 block(DIM_THREAD_BLOCK); + dim3 grid(NI / block_size); + + int MaxBytesofSharedMemory = DIM_THREAD_BLOCK * BATCH_SIZE * PREFETCH_COUNT * sizeof(DATA_TYPE); + cudaFuncSetAttribute(vector_seq_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, MaxBytesofSharedMemory); + + //t_start = rtclock(); + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(A_gpu, NI * sizeof(DATA_TYPE), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + vector_seq_kernel<<>>(A_gpu, NI, iterations, block_size); + cudaDeviceSynchronize(); + + // vector_seq_kernel<<>>(A_gpu, NI, iterations, block_size); + // cudaDeviceSynchronize(); + //t_end = rtclock(); + + //fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + uint64_t iterations = ITER; + uint64_t block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + if (argc >= 4) { + NI = atoll(argv[1]); + iterations = atoi(argv[2]); + block_size = atoi(argv[3]); + } + else { + NI = SIZE; + iterations = ITER; + block_size = DIM_THREAD_BLOCK * BATCH_SIZE; + } + + int nblocks = NBLOCKS; + block_size = NI / nblocks; + + double t_start, t_end; + + DATA_TYPE* A; + DATA_TYPE *A_gpu; + + A = (DATA_TYPE*)malloc(NI*sizeof(DATA_TYPE)); + + initCPU(A); + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMallocManaged(&A_gpu, sizeof(DATA_TYPE) * NI); + + // initGPU(A_gpu); + // overlapStartCPU(); + memcpy(A_gpu, A, NI * sizeof(DATA_TYPE)); + // overlapEndCPU(); + + saxpyCuda(A, A_gpu, iterations, block_size); + memcpy(A, A_gpu, NI * sizeof(DATA_TYPE)); + + // t_start = rtclock(); + // saxpy(A, iterations); + // t_end = rtclock(); + // fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); + + // compareResults(A_gpu, A); + cudaFree(A_gpu); + endCPU(); + finiTrace(); + + + free(A); + return 0; +} + diff --git a/workloads/micro/vector_seq_maga.csv b/workloads/micro/vector_seq_maga.csv new file mode 100644 index 0000000000000000000000000000000000000000..eb0b067b5d5d0fd45fa4d5413c59776a0e543d7a --- /dev/null +++ b/workloads/micro/vector_seq_maga.csv @@ -0,0 +1,4 @@ +group,,Time +0,gpu_kernel,524232718 +0,allocation,503586197 +0,memcpy,27093354623 diff --git a/workloads/micro/vector_seq_mega_std.pdf b/workloads/micro/vector_seq_mega_std.pdf new file mode 100644 index 0000000000000000000000000000000000000000..03fbd019888b6f1b5659ae194303945215021b4c Binary files /dev/null and b/workloads/micro/vector_seq_mega_std.pdf differ diff --git a/workloads/micro/vector_seq_sensitivity_block.csv b/workloads/micro/vector_seq_sensitivity_block.csv new file mode 100644 index 0000000000000000000000000000000000000000..4b7460a6c4825a174efeea56e90e799f34569906 --- /dev/null +++ b/workloads/micro/vector_seq_sensitivity_block.csv @@ -0,0 +1,28 @@ +group,,standard,async,uvm,uvm_prefetch,uvm_prefetch_async +4096,gpu_kernel,0.07781171430186219,0.04528451769591008,0.26325272580797293,0.07778291487076493,0.045195617360919656 +4096,memcpy,0.7261005098361408,0.7389050288813662,0.5002853681650768,0.42205274695403217,0.4240542524666642 +4096,allocation,0.196087775861997,0.1927525839227595,0.5300699318992702,0.4994666063327884,0.5425935229104059 +2048,gpu_kernel,0.07778833252682275,0.045278475906383196,0.2687569356416639,0.0777750354125106,0.04520848428190736 +2048,memcpy,0.7326888691506528,0.7245371727155359,0.49547625727272254,0.41970085853632194,0.4214155339296313 +2048,allocation,0.18867725629160995,0.19242603057867785,0.5373973395967646,0.503914894923179,0.551721397818463 +1024,gpu_kernel,0.07780750428445295,0.04527817268456338,0.2678892370021932,0.07773856028018697,0.04518813278112882 +1024,memcpy,0.719261361194239,0.7245406240142169,0.499687959719211,0.41935609432281734,0.42358191850182053 +1024,allocation,0.18968735054142352,0.19384491031664072,0.5408406691069109,0.5159624646768015,0.5616245245117903 +512,gpu_kernel,0.07780806410599636,0.04526664886225509,0.26278402062328576,0.07774350449031539,0.04519591789362621 +512,memcpy,0.7212868767300469,0.7182623101282364,0.501143984567488,0.4209652854576375,0.42360568326490694 +512,allocation,0.18720021229728886,0.19708090543579274,0.5738981930943752,0.5181387234137075,0.53516768871038 +256,gpu_kernel,0.077812088088606,0.0452665633290139,0.2526646159370329,0.07777386477346862,0.0451742067059033 +256,memcpy,0.7286767935133605,0.7179676401421823,0.4976898224019404,0.42019431415750347,0.4207435213496624 +256,allocation,0.19053935457631124,0.198442758023521,0.5363438953183717,0.5067967271511732,0.5488311511282704 +128,gpu_kernel,0.07780012526045751,0.04526362785409531,0.2591956665219924,0.07780593792457464,0.045186631737543795 +128,memcpy,0.7266905780395604,0.7254105038485349,0.500163534681119,0.4222763960008101,0.42361650046600674 +128,allocation,0.19363335865087142,0.1961874412033707,0.551564993645155,0.5010421466493888,0.5397833326448571 +64,gpu_kernel,0.07780344203867745,0.045260558150306156,0.26258054290669763,0.07773630690046793,0.04520785217829536 +64,memcpy,0.7477351597257211,0.7216808653886562,0.4995768661972442,0.42040734444678096,0.42199359018808663 +64,allocation,0.19542004681266623,0.19755435855773895,0.5433511578610934,0.5145916200831033,0.5506197298338873 +32,gpu_kernel,0.07781129687372601,0.04526024297327194,0.2573701737754086,0.07775525672525642,0.045196395616217654 +32,memcpy,0.7359860712092238,0.7243509569029787,0.4982409473530436,0.4218515061584939,0.4212928254146968 +32,allocation,0.1903124254593294,0.19931421772808158,0.552503528873848,0.5204518999724379,0.5498060551883156 +16,gpu_kernel,0.07783208553360431,0.04525624562260331,0.25977888497074536,0.07778151089444907,0.0451859974632018 +16,memcpy,0.7307501065146991,0.718099120900975,0.4974590388784794,0.4216104143891171,0.42233502142389373 +16,allocation,0.1958643878535995,0.1992644526740138,0.5575306224625439,0.5216127822959022,0.5293240700484507 diff --git a/workloads/micro/vector_seq_sensitivity_block.pdf b/workloads/micro/vector_seq_sensitivity_block.pdf new file mode 100644 index 0000000000000000000000000000000000000000..6be7286f151e44e97b71d20e1512373dff0e72ea Binary files /dev/null and b/workloads/micro/vector_seq_sensitivity_block.pdf differ diff --git a/workloads/micro/vector_seq_sensitivity_shared.csv b/workloads/micro/vector_seq_sensitivity_shared.csv new file mode 100644 index 0000000000000000000000000000000000000000..528dc6b10a6ebcf166236e9b49ba516509bbb7b4 --- /dev/null +++ b/workloads/micro/vector_seq_sensitivity_shared.csv @@ -0,0 +1,22 @@ +group,,standard,async,uvm,uvm_prefetch,uvm_prefetch_async +2KB,gpu_kernel,0.10033841881031709,0.06159370594384277,0.2089160826774829,0.10250029311764199,0.06420476816293794 +2KB,memcpy,0.6444026761943288,0.6197389685588808,0.3776916622507978,0.3183604822797509,0.32044503558462667 +2KB,allocation,0.2552589049953541,0.22099350037025844,0.4446709362755676,0.4136497065282931,0.4171331730152742 +4KB,gpu_kernel,0.09812173482987978,0.05577621149481183,0.20486799218294846,0.10027106671068131,0.05757624165989899 +4KB,memcpy,0.6432619475013237,0.610246375809435,0.38638866221070645,0.3293057376277322,0.32523799253942215 +4KB,allocation,0.23684658540617093,0.21686857378032326,0.4480928610074074,0.4008834811386749,0.41625118070149275 +8KB,gpu_kernel,0.09680117003272354,0.052952728297652886,0.20677112797602837,0.09878178296775614,0.054418924358404806 +8KB,memcpy,0.6396925191180018,0.639540986887264,0.3829909902395469,0.32612374933290694,0.32958925092820723 +8KB,allocation,0.2147487153529928,0.21885531299154448,0.4411661880514101,0.41563514854692307,0.43066531833512106 +16KB,gpu_kernel,0.09697939553345757,0.05128885862209411,0.20420389926023344,0.09772340091136529,0.05288333308560271 +16KB,memcpy,0.6509262154468788,0.6169834552541844,0.3806990847284905,0.3201305624443415,0.32215373265444813 +16KB,allocation,0.2187517037557754,0.22348667119344515,0.44453341400262525,0.40830116316221243,0.43452508599043427 +32KB,gpu_kernel,0.0959495800095362,0.05092095873653742,0.20757380646369922,0.09761058729044154,0.052407471324812376 +32KB,memcpy,0.6142834541989206,0.6067368170995155,0.38422588962535736,0.3235367459991315,0.3277512764972068 +32KB,allocation,0.2091238678042312,0.21640196255201372,0.44216707653295373,0.42322993603391823,0.4221811996689958 +64KB,gpu_kernel,0.09582010241937919,0.05054769027581663,0.20440883691175377,0.09692986072608388,0.052033957002250456 +64KB,memcpy,0.6379742945802966,0.6484145377452993,0.3756307222806087,0.32831960427783596,0.32277572792328363 +64KB,allocation,0.21934421067743765,0.21597537682373602,0.4483655611713775,0.42528199904889374,0.4413671986786598 +128KB,gpu_kernel,0.09558043445485213,0.05007902399955239,0.20674680923547534,0.09644422897957777,0.05146208794166637 +128KB,memcpy,0.6156379734989633,0.6204850208562286,0.3916979885809955,0.3295893065416515,0.33330459368408305 +128KB,allocation,0.20154372741166507,0.21283810711426066,0.4451488469364804,0.42797963663566757,0.4223491869455651 diff --git a/workloads/micro/vector_seq_sensitivity_shared.pdf b/workloads/micro/vector_seq_sensitivity_shared.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c74e94279c6f1e0ed829f6aeb2c5e6280b9d8ebf Binary files /dev/null and b/workloads/micro/vector_seq_sensitivity_shared.pdf differ diff --git a/workloads/micro/vector_seq_sensitivity_thread.csv b/workloads/micro/vector_seq_sensitivity_thread.csv new file mode 100644 index 0000000000000000000000000000000000000000..fb9a8d62aa35417253fe9b3223f398ac298e2522 --- /dev/null +++ b/workloads/micro/vector_seq_sensitivity_thread.csv @@ -0,0 +1,19 @@ +group,,standard,async,uvm,uvm_prefetch,uvm_prefetch_async +1024,gpu_kernel,0.043523330679422094,0.034633119774798625,0.2924846158646235,0.043505753151103936,0.03460768523023579 +1024,memcpy,0.7527654233285772,0.7549365692709031,0.5214994100369522,0.44471433033355345,0.4416672373366119 +1024,allocation,0.20371124599200074,0.20139239723964183,0.5743778122304851,0.5110132483922324,0.6040871678905618 +512,gpu_kernel,0.05376920075139251,0.035414546712763055,0.2813219616279899,0.053770901115654926,0.035362698017901074 +512,memcpy,0.7552916183903086,0.7543198056217182,0.5215884697700037,0.4403237027630689,0.4431458875007721 +512,allocation,0.20575651708707096,0.20085529523097556,0.5603130266115857,0.5125495413810862,0.5866709251601967 +256,gpu_kernel,0.08169405673755913,0.04752114441098629,0.28131247591516634,0.08161498258836944,0.04746430965898285 +256,memcpy,0.7585259130230356,0.7568598457438419,0.5249555186673261,0.44302041966609407,0.44364709008849884 +256,allocation,0.21114227734886762,0.20455826297626503,0.5483049279300072,0.5055085769874297,0.5869840046024639 +128,gpu_kernel,0.1561018245461435,0.0862568369207757,0.2987069540169865,0.156050598649234,0.08621691376927426 +128,memcpy,0.7605281789500847,0.75566655241771,0.5212276690383768,0.4418095702065054,0.44410748750540646 +128,allocation,0.2053468074301855,0.20225007981741652,0.5766931243000091,0.5121767502072188,0.5776935078481383 +64,gpu_kernel,0.3097942623546093,0.17114090898931597,0.3534551538974396,0.3097294997690298,0.1709831198116784 +64,memcpy,0.7673178493699814,0.7700582356681395,0.5237463376667854,0.44487955784802563,0.4433862067269751 +64,allocation,0.20317623238031357,0.2061116600173311,0.5656071637706259,0.5016585514412594,0.576424814336511 +32,gpu_kernel,0.6168899520655767,0.34002507575375773,0.48877502316138727,0.617051505718397,0.3398291940628384 +32,memcpy,0.7750054820208114,0.766421707368957,0.5247320640681695,0.4459661102378475,0.4399371063252922 +32,allocation,0.218159458615951,0.2058566525214926,0.5671310230796576,0.5101348977660979,0.5686376106470228 diff --git a/workloads/micro/vector_seq_sensitivity_thread.pdf b/workloads/micro/vector_seq_sensitivity_thread.pdf new file mode 100644 index 0000000000000000000000000000000000000000..5555b69e72913db4e3ff5dfcd6e7c815119ad5e5 Binary files /dev/null and b/workloads/micro/vector_seq_sensitivity_thread.pdf differ diff --git a/workloads/process_perf.py b/workloads/process_perf.py new file mode 100644 index 0000000000000000000000000000000000000000..773c4c3e5c379cf79cef7fa09c4f9b2fc00d1d8b --- /dev/null +++ b/workloads/process_perf.py @@ -0,0 +1,388 @@ +import os +import argparse +import sys +import subprocess +import psutil + +import os +import collections +import csv + +import numpy as np +import pandas as pd +import matplotlib +from matplotlib import pyplot as plt +import matplotlib.ticker as mticker +import xlsxwriter +import seaborn as sns + +from matplotlib.ticker import FormatStrFormatter +from matplotlib.legend_handler import HandlerTuple + +from subprocess import Popen, PIPE + +from scipy.stats import gmean + + +prefix = 'run_' +parameter_super_list = ['super'] + +config_super_list = ['standard', 'async', 'uvm', 'uvm_prefetch', 'uvm_prefetch_async'] + +workload_super_list = ['gemm', 'lud'] +darknet_super_list = ['yolov3'] + +def dict_to_list(input_dict): + return_list = [] + for elemement in input_dict: + return_list.append(elemement) + return return_list + +def addOptions(parser): + parser.add_argument("-i", "--iterations", type=int, default=1, + help="Number of iterations") + parser.add_argument("-c", "--csv", type=str, default='output.xlsx', + help="output trace log file") + parser.add_argument("-f", "--figure", type=str, default='micro', + help="output pdf file") + + +def get_config_list(root_directory): + config_list = [] + for dict in os.listdir(root_directory): + if os.path.isdir(os.path.join(root_directory, dict)) and dict in config_super_list: + print(dict) + config_list.append(dict) + return config_list + + +def get_workload_dict(root_directory, config_list): + workload_list = [] + workload_dict = dict() + for config in config_list: + config_dir = root_directory + '/' + config + + for root, directories, files in os.walk(config_dir, topdown=False): + for dir in directories: + if dir in workload_super_list: + if dir not in workload_dict: + workload_dict[dir] = dict() + workload_dict[dir][config] = os.path.join(root, dir + '_perf') + if dir not in workload_list: + workload_list.append(dir) + if dir == 'darknet': + for root_darnet, directories_darknet, files_darknet in os.walk(config_dir + '/darknet_perf', topdown=False): + for dir in directories_darknet: + if dir in darknet_super_list: + if dir not in workload_dict: + workload_dict[dir] = dict() + workload_dict[dir][config] = os.path.join(root_darnet, dir) + if dir not in workload_list: + workload_list.append(dir) + + return workload_list, workload_dict + +def get_run_cmd(bash_file): + return_txt = '' + text = open(bash_file, "r") + # line = text[0] + # print(line) + for line in text: + return_txt += line.rstrip() + return return_txt + +def process_file(log_file, perf_list): + result_dict = dict() + result_map = dict() + + result_map['memory'] = [1] + result_map['control'] = [2] + result_map['int'] = [3] + result_map['fp'] = [4, 5, 6] + + result_map['load'] = [7] + result_map['load_hit'] = [8] + + result_map['store'] = [9] + result_map['store_hit'] = [10] + + perf_list.append('l1tex__t_sectors_pipe_lsu_mem_global_op_ld_lookup_hit.sum') + perf_list.append('l1tex__t_sectors_pipe_lsu_mem_global_op_st_lookup_hit.sum') + perf_list.append('l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum') + perf_list.append('l1tex__t_sectors_pipe_lsu_mem_global_op_st.sum') + + lines = [] + print(log_file) + text = open(log_file, "r") + index = 0 + for line in text: + if "==PROF==" not in line: + lines.append(line) + text.close() + os.remove(log_file) + + + out = open(log_file, "w") + for line in lines: + out.write(line) + out.close() + + content_dict = dict() + content = pd.read_csv(log_file) + + # print(content) + content = content[["Metric Name", "Metric Value"]].to_numpy() + # print(content) + + for ele in content: + if ele[0] in perf_list: + if ele[0] not in content_dict: + content_dict[ele[0]] = [] + content_dict[ele[0]].append(int(ele[1].replace(',', ''))) + + # print(content_dict) + + for ele in result_map: + result_dict[ele] = 0 + for index in result_map[ele]: + result_dict[ele] += sum(content_dict[perf_list[index]]) + + return_dict = dict() + return_dict['memory'] = result_dict['memory'] + return_dict['control'] = result_dict['control'] + return_dict['int'] = result_dict['int'] + return_dict['fp'] = result_dict['fp'] + + return_dict['load_miss_rate'] = (result_dict['load'] - result_dict['load_hit']) / result_dict['load'] + return_dict['store_miss_rate'] = (result_dict['store'] - result_dict['store_hit']) / result_dict['store'] + + print(return_dict) + return return_dict + + +def process_results(workload_dict, workload_list, iterations, perf_list): + result_dict = dict() + for workload in workload_dict: + if workload in workload_list: + for config in workload_dict[workload]: + if config in config_super_list: + cur_dir = workload_dict[workload][config] + for para in parameter_super_list: + if para not in result_dict: + result_dict[para] = dict() + if workload not in result_dict[para]: + result_dict[para][workload] = dict() + + # if config not in result_dict[para][workload]: + result_dict[para][workload][config] = [] + for i in range(0, iterations): + log_file = cur_dir + '/' + para + '_' + str(i) + '.profile.csv' + result_dict[para][workload][config].append(process_file(log_file, perf_list)) + print(workload, config, para, i, result_dict[para][workload][config][i]) + sorted(result_dict[para][workload]) + sorted(result_dict[para]) + return result_dict + + +def export_csv(result_dict, config_list, iterations, sub_metric): + workload_list = dict_to_list(result_dict['super']) + + super_avg_dict = dict() + for workload in workload_list: + super_avg_dict[workload] = dict() + for c in range(0, len(config_list)): + super_avg_dict[workload][config_list[c]] = dict() + + metric_list = dict_to_list(result_dict['super'][workload][config_list[c]][0]) + + for metric in metric_list: + super_avg_dict[workload][config_list[c]][metric] = 0 + super_avg_dict[workload][config_list[c]]['all'] = 0 + + for i in range(0, iterations): + for metric in metric_list: + super_avg_dict[workload][config_list[c]][metric] += result_dict['super'][workload][config_list[c]][i][metric] / iterations + super_avg_dict[workload][config_list[c]]['all'] += result_dict['super'][workload][config_list[c]][i][metric] / iterations + + # for c in range(0, len(config_list)): + # normarlized_all = super_avg_dict[workload][config_list[c]]['all'] / super_avg_dict[workload]['standard']['all'] + # print(super_avg_dict[workload][config_list[c]]) + # for metric in metric_list: + # super_avg_dict[workload][config_list[c]][metric] = (super_avg_dict[workload][config_list[c]][metric] / super_avg_dict[workload][config_list[c]]['all']) * normarlized_all + + csv_list = [] + + for metric in sub_metric: + profile_csv_file = 'super_profile_' + metric + '.csv' + csv_list.append(profile_csv_file) + out = open(profile_csv_file, "w") + + out.write('group,') + for i in range(0, len(config_list)): + out.write(config_list[i]) + if i != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + + for i in range(0, len(workload_list)): + out.write(workload_list[i] + ',') + for j in range(0, len(config_list)): + out.write(str(super_avg_dict[workload_list[i]][config_list[j]][metric])) + if j != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + out.close() + + return csv_list + + +def normalize(arr, t_min, t_max): + norm_arr = [] + diff = t_max - t_min + diff_arr = max(arr) - min(arr) + for i in arr: + temp = (((i - min(arr))*diff)/diff_arr) + t_min + norm_arr.append(temp) + return norm_arr + +def plot_results(csv_file, output_file): + df = pd.read_csv(csv_file, index_col=0) + + group_list = [] + for index in df.index: + if index not in group_list: + group_list.append(index) + col_list = df.columns + + ngroups = len(group_list) + x = np.arange(ngroups) + nbars = len(col_list) + width = (1 - 0.4) / (1.5 * nbars) # the width of the bars + print(group_list) + + matplotlib.rcParams["hatch.linewidth"] = 2 + + # patterns = [ "|" , "/", "-", "", "x", "-", "\\", "+", "o", "O" ] + # patterns = [ "|" , "/", "x", "*", ".", "-", "\\", "+", "o", "O" ] + # patterns = ["//", "//", "//", "//", "//", "//", "//"] + patterns = ["", "", "", "", "", ""] + # color_tab = ['#D9D9D9', '#BFBFBF', '#A6A6A6', '#7F7F7F', '#7F7F7F', '#7F7F7F'] + color_tab = ['#000000', '#0000ff', '#ff0000', '#ff6666', '#00ff00'] + edge_color_tab = ['#000000', '#000000', '#000000', '#000000', '#000000', '#000000'] + + if "rate" in csv_file: + fig, ax = plt.subplots(figsize=[5, 6]) + else: + fig, ax = plt.subplots(figsize=[5, 4]) + + rects = [] + + print(nbars) + print(col_list) + + for i in range(0, nbars): + # height_cum = np.array([0.0] * ngroups) + height_total = np.array([1 for g in group_list]) # y coo + height_curr = np.array([float(df[col_list[i]][g]) for g in group_list]) # y coo + print(height_total) + print(height_curr) + rect_base = ax.bar(x - 0.3 + (3 * i + 1.5) * width / 2, # x coo + height_curr / height_total, # y coo + width, label=col_list[i], + color=color_tab[i], + edgecolor=edge_color_tab[i], + linewidth=0.5 + ) + rects.append(rect_base) + # height_cum += height_curr + + hdl_pair = [(rects[i]) for i in range(nbars)] + + ax.set_xticks(x) + ax.set_xticklabels(group_list, rotation=0) + # ax.legend() + if "rate" in csv_file: + ax.legend(loc='upper left', ncol=1, bbox_to_anchor=(0.3, 1.4), fontsize=14) + else: + ax.legend(fontsize=14) + + ax.set_yscale('log') + # ax.yaxis.set_major_formatter(mticker.PercentFormatter(1.0)) + + + plt.xticks(fontsize=15, rotation=15) + plt.yticks(fontsize=15) + plt.grid(axis='y') + plt.xlabel("") + if "rate" in csv_file: + plt.ylabel("Miss rate", fontsize=15) + else: + plt.ylabel("Inst count", fontsize=15) + plt.tight_layout() + # plt.margins(x=0.01, y=0.01) + + plt.savefig(output_file, bbox_inches='tight') + plt.close() + +def main(): + parser = argparse.ArgumentParser() + addOptions(parser) + + options = parser.parse_args() + + iterations = options.iterations + output_csv_file = options.csv + output_figure_file = options.figure + + perf_list = [] + + perf_list.append('smsp__inst_executed.sum') + + perf_list.append('smsp__sass_thread_inst_executed_op_memory_pred_on.sum') + perf_list.append('smsp__sass_thread_inst_executed_op_control_pred_on.sum') + perf_list.append('smsp__sass_thread_inst_executed_op_fp16_pred_on.sum') + perf_list.append('smsp__sass_thread_inst_executed_op_fp32_pred_on.sum') + perf_list.append('smsp__sass_thread_inst_executed_op_fp64_pred_on.sum') + perf_list.append('smsp__sass_thread_inst_executed_op_integer_pred_on.sum') + + perf_list.append('l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum') + perf_list.append('l1tex__t_sectors_pipe_lsu_mem_global_op_ld_lookup_hit.sum') + perf_list.append('l1tex__t_sectors_pipe_lsu_mem_global_op_st.sum') + perf_list.append('l1tex__t_sectors_pipe_lsu_mem_global_op_st_lookup_hit.sum') + + + micro_root_directory = './micro/' + real_root_directory = './realworld/' + + config_list = get_config_list(micro_root_directory) + + micro_workload_list, micro_workload_dict = get_workload_dict(micro_root_directory, config_list) + real_workload_list, real_workload_dict = get_workload_dict(real_root_directory, config_list) + + workload_list = micro_workload_list + real_workload_list + + workload_dict = dict() + for workload in workload_list: + if workload in micro_workload_dict: + workload_dict[workload] = micro_workload_dict[workload] + else: + workload_dict[workload] = real_workload_dict[workload] + + print(workload_dict) + + metric_list = ['memory', 'control', 'fp', 'int', 'load_miss_rate', 'store_miss_rate'] + + result_dict = process_results(workload_dict, workload_list, iterations, perf_list) + csv_list = export_csv(result_dict, config_super_list, iterations, metric_list) + for csv_file in csv_list: + plot_results(csv_file, csv_file.replace(".csv", ".pdf")) + + + +if __name__ == '__main__': + main() + diff --git a/workloads/realworld/async/BN/.clang-format b/workloads/realworld/async/BN/.clang-format new file mode 100644 index 0000000000000000000000000000000000000000..3a5940ef65bf1e40df9511da805a7a0440184e84 --- /dev/null +++ b/workloads/realworld/async/BN/.clang-format @@ -0,0 +1,90 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: false +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: false +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IndentCaseLabels: false +IndentWidth: 2 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 8 +UseTab: Never +... + diff --git a/workloads/realworld/async/BN/LICENSE b/workloads/realworld/async/BN/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/workloads/realworld/async/BN/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/workloads/realworld/async/BN/Makefile b/workloads/realworld/async/BN/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..864b8e45401b0fe12162389c2e9d33fa86f4fc9f --- /dev/null +++ b/workloads/realworld/async/BN/Makefile @@ -0,0 +1,169 @@ +################################################################################ +# +# Copyright 1993-2013 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Makefile project only supported on Mac OS X and Linux Platforms) +# +################################################################################ + +include ../../../common/make.config +include ./findcudalib.mk + +# Location of the CUDA Toolkit +CUDA_PATH ?= $(CUDA_DIR) + +# internal flags +NVCCFLAGS := -m${OS_SIZE} +CCFLAGS := -Wno-narrowing +NVCCLDFLAGS := +LDFLAGS := + +# Extra user flags +EXTRA_NVCCFLAGS ?= +EXTRA_NVCCLDFLAGS ?= +EXTRA_LDFLAGS ?= +EXTRA_CCFLAGS ?= + +# OS-specific build flags +ifneq ($(DARWIN),) + LDFLAGS += -rpath $(CUDA_PATH)/lib + CCFLAGS += -arch $(OS_ARCH) $(STDLIB) +else + ifeq ($(OS_ARCH),armv7l) + ifeq ($(abi),gnueabi) + CCFLAGS += -mfloat-abi=softfp + else + # default to gnueabihf + override abi := gnueabihf + LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3 + CCFLAGS += -mfloat-abi=hard + endif + endif +endif + +ifeq ($(ARMv7),1) +NVCCFLAGS += -target-cpu-arch ARM +ifneq ($(TARGET_FS),) +CCFLAGS += --sysroot=$(TARGET_FS) +LDFLAGS += --sysroot=$(TARGET_FS) +LDFLAGS += -rpath-link=$(TARGET_FS)/lib +LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib +LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-$(abi) +endif +endif + +# Debug build flags +ifeq ($(dbg),1) + NVCCFLAGS += -g -G + TARGET := debug +else + TARGET := release +endif + +ALL_CCFLAGS := +ALL_CCFLAGS += $(NVCCFLAGS) +ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS)) +ALL_CCFLAGS += $(EXTRA_NVCCFLAGS) +ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS)) + +ALL_LDFLAGS := +ALL_LDFLAGS += $(ALL_CCFLAGS) +ALL_LDFLAGS += $(NVCCLDFLAGS) +ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS)) +ALL_LDFLAGS += $(EXTRA_NVCCLDFLAGS) +ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS)) + +# Common includes and paths for CUDA +INCLUDES := -I../../common/inc -I$(INCLUDE) -I$(CUPTI_INCLUDE) +LIBRARIES := -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +################################################################################ + +# CUDA code generation flags +ifneq ($(OS_ARCH),armv7l) +GENCODE_SM10 := -gencode arch=compute_10,code=sm_10 +endif +GENCODE_SM20 := -gencode arch=compute_20,code=sm_20 +GENCODE_SM30 := -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=\"sm_35,compute_35\" +GENCODE_SM80 := -gencode arch=compute_80,code=sm_80 +GENCODE_FLAGS := $(GENCODE_SM80) + +################################################################################ + +# Target rules +all: build + +build: ordergraph_30 ordergraph_40 ordergraph_45 ordergraph_50 + +# ordergraph_25.o: ordergraph.cu ordergraph_kernel.cu +# $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_25 -o $@ -c $< + +ordergraph_30.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_30 -o $@ -c $< + +ordergraph_40.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_40 -o $@ -c $< + +ordergraph_45.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_45 -o $@ -c $< + +ordergraph_50.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_50 -o $@ -c $< + +# ordergraph_125.o: ordergraph.cu ordergraph_kernel.cu +# $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_125 -o $@ -c $< + + + +# ordergraph_25: ordergraph_25.o +# $(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES) + +ordergraph_30: ordergraph_30.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_40: ordergraph_40.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_45: ordergraph_45.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_50: ordergraph_50.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +# ordergraph_125: ordergraph_125.o +# $(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES) + +run: build + ./ordergraph + +clean: + rm -f ordergraph_30 ordergraph_40 ordergraph_45 ordergraph_50 *.o *.bin *.out + +clobber: clean diff --git a/workloads/realworld/async/BN/README.md b/workloads/realworld/async/BN/README.md new file mode 100644 index 0000000000000000000000000000000000000000..07158a0bd52af63032c860ff04e243e0d7c76ef1 --- /dev/null +++ b/workloads/realworld/async/BN/README.md @@ -0,0 +1,21 @@ +The code works with CUDA 6.0. +If you are using this code for your project, please cite [our paper](https://yuemmawang.github.io/publications/wang-tpds2016.pdf): + +``` +Wang Y, Qian W, Zhang S, et al. A Learning Algorithm for Bayesian Networks and +Its Efficient Implementation on GPUs[J]. Parallel and Distributed Systems, IEEE +Transactions on, 2016, 27(1): 17-30. +``` + +``` +@article{wang2015learning, + title={A learning algorithm for Bayesian networks and its efficient implementation on GPUs}, + author={Wang, Yu and Qian, Weikang and Zhang, Shuchang and Liang, Xiaoyao and Yuan, Bo}, + journal={IEEE Transactions on Parallel and Distributed Systems}, + volume={27}, + number={1}, + pages={17--30}, + year={2015}, + publisher={IEEE} +} +``` diff --git a/workloads/realworld/async/BN/data125.cu b/workloads/realworld/async/BN/data125.cu new file mode 100644 index 0000000000000000000000000000000000000000..6bb370a636a330992e083f0b52f1f67a9a86040e --- /dev/null +++ b/workloads/realworld/async/BN/data125.cu @@ -0,0 +1,610 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=25; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,1, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,0,1,0, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,0,1,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,0,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,0,1,0,0,0,1, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,1,0,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,0,0,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,0,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,0, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,0,0,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,1,0, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,1,1,0,0,0,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,0,0,1,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,1,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,1,1, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,1,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,0,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,1,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,1,0, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,1,1,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,1,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,1,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,0,1,0,1,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,1, +} +#endif diff --git a/workloads/realworld/async/BN/data25.cu b/workloads/realworld/async/BN/data25.cu new file mode 100644 index 0000000000000000000000000000000000000000..6af94f79766c6e36ee121f6c537f987f841bf7c0 --- /dev/null +++ b/workloads/realworld/async/BN/data25.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=25; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +} + +#endif + diff --git a/workloads/realworld/async/BN/data30.cu b/workloads/realworld/async/BN/data30.cu new file mode 100644 index 0000000000000000000000000000000000000000..bf89729e319e920533f9d134c8a2dff9aa4bc022 --- /dev/null +++ b/workloads/realworld/async/BN/data30.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=30; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +} + +#endif + diff --git a/workloads/realworld/async/BN/data40.cu b/workloads/realworld/async/BN/data40.cu new file mode 100644 index 0000000000000000000000000000000000000000..16d34d9dc4860d1dd24d604e501bccb43cae8095 --- /dev/null +++ b/workloads/realworld/async/BN/data40.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=40; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1, +} + +#endif + diff --git a/workloads/realworld/async/BN/data45.cu b/workloads/realworld/async/BN/data45.cu new file mode 100644 index 0000000000000000000000000000000000000000..b23e9a35e7c27948c0853710a06be462694df57d --- /dev/null +++ b/workloads/realworld/async/BN/data45.cu @@ -0,0 +1,616 @@ +// The data are synthesized. +#include +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=45; +const int STATE_N=2; +const int DATA_N=600; + + + +int data[DATA_N*NODE_N]= { +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0, +} + + +#endif + diff --git a/workloads/realworld/async/BN/data50.cu b/workloads/realworld/async/BN/data50.cu new file mode 100644 index 0000000000000000000000000000000000000000..936a7aa4a67a1f949f1264477388a7eb5a93a1b4 --- /dev/null +++ b/workloads/realworld/async/BN/data50.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=50; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,0,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,1,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,0,0,1,1,1, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,1,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,1,0,0,0,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,1,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,0,1,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,0,1,0,0,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0, +} + +#endif + diff --git a/workloads/realworld/async/BN/file_process.py b/workloads/realworld/async/BN/file_process.py new file mode 100644 index 0000000000000000000000000000000000000000..eeebbee70e59153ca0f1a960f4e8ffa0437693c3 --- /dev/null +++ b/workloads/realworld/async/BN/file_process.py @@ -0,0 +1,15 @@ +import os +filename = "data125.cu" +file_write = "data45.cu" + +f_w = open(file_write,"w") +with open(filename) as f: + content = f.readlines() + + for i, line in enumerate(content): + if i < 8: + f_w.write(line) + elif i >= 8: + f_w.write(line[0:90]) + f_w.write("\n") +f_w.close() \ No newline at end of file diff --git a/workloads/realworld/async/BN/findcudalib.mk b/workloads/realworld/async/BN/findcudalib.mk new file mode 100644 index 0000000000000000000000000000000000000000..f40c2c38e5510fdee2fdf59df00160b547c056c1 --- /dev/null +++ b/workloads/realworld/async/BN/findcudalib.mk @@ -0,0 +1,226 @@ +################################################################################ +# +# Copyright 1993-2013 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# findcudalib.mk is used to find the locations for CUDA libraries and other +# Unix Platforms. This is supported Mac OS X and Linux. +# +################################################################################ + +# OS Name (Linux or Darwin) +OSUPPER = $(shell uname -s 2>/dev/null | tr "[:lower:]" "[:upper:]") +OSLOWER = $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]") + +# Flags to detect 32-bit or 64-bit OS platform +OS_SIZE = $(shell uname -m | sed -e "s/i.86/32/" -e "s/x86_64/64/" -e "s/armv7l/32/") +OS_ARCH = $(shell uname -m | sed -e "s/i386/i686/") + +# Determine OS platform and unix distribution +ifeq ("$(OSLOWER)","linux") + # first search lsb_release + DISTRO = $(shell lsb_release -i -s 2>/dev/null | tr "[:upper:]" "[:lower:]") + DISTVER = $(shell lsb_release -r -s 2>/dev/null) + ifeq ("$(DISTRO)",'') + # second search and parse /etc/issue + DISTRO = $(shell more /etc/issue | awk '{print $$1}' | sed '1!d' | sed -e "/^$$/d" 2>/dev/null | tr "[:upper:]" "[:lower:]") + DISTVER= $(shell more /etc/issue | awk '{print $$2}' | sed '1!d' 2>/dev/null + endif + ifeq ("$(DISTRO)",'') + # third, we can search in /etc/os-release or /etc/{distro}-release + DISTRO = $(shell awk '/ID/' /etc/*-release | sed 's/ID=//' | grep -v "VERSION" | grep -v "ID" | grep -v "DISTRIB") + DISTVER= $(shell awk '/DISTRIB_RELEASE/' /etc/*-release | sed 's/DISTRIB_RELEASE=//' | grep -v "DISTRIB_RELEASE") + endif +endif + +# search at Darwin (unix based info) +DARWIN = $(strip $(findstring DARWIN, $(OSUPPER))) +ifneq ($(DARWIN),) + SNOWLEOPARD = $(strip $(findstring 10.6, $(shell egrep "10\.6" /System/Library/CoreServices/SystemVersion.plist))) + LION = $(strip $(findstring 10.7, $(shell egrep "10\.7" /System/Library/CoreServices/SystemVersion.plist))) + MOUNTAIN = $(strip $(findstring 10.8, $(shell egrep "10\.8" /System/Library/CoreServices/SystemVersion.plist))) + MAVERICKS = $(strip $(findstring 10.9, $(shell egrep "10\.9" /System/Library/CoreServices/SystemVersion.plist))) +endif + +# Common binaries +GCC ?= g++ +CLANG ?= /usr/bin/clang + +ifeq ("$(OSUPPER)","LINUX") + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(GCC) +else + # for some newer versions of XCode, CLANG is the default compiler, so we need to include this + ifneq ($(MAVERICKS),) + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(CLANG) + STDLIB ?= -stdlib=libstdc++ + else + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(GCC) + endif +endif + +# Take command line flags that override any of these settings +ifeq ($(i386),1) + OS_SIZE = 32 + OS_ARCH = i686 +endif +ifeq ($(x86_64),1) + OS_SIZE = 64 + OS_ARCH = x86_64 +endif +ifeq ($(ARMv7),1) + OS_SIZE = 32 + OS_ARCH = armv7l +endif + +ifeq ("$(OSUPPER)","LINUX") + # Each Linux Distribuion has a set of different paths. This applies especially when using the Linux RPM/debian packages + ifeq ("$(DISTRO)","ubuntu") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","kubuntu") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","debian") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","suse") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","suse linux") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","opensuse") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","fedora") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","redhat") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","red") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","redhatenterpriseworkstation") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH ?= /usr/lib + endif + endif + ifeq ("$(DISTRO)","centos") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + + ifeq ($(ARMv7),1) + CUDAPATH := /usr/arm-linux-gnueabihf/lib + CUDALINK := -L/usr/arm-linux-gnueabihf/lib + ifneq ($(TARGET_FS),) + CUDAPATH += $(TARGET_FS)/usr/lib/nvidia-current + CUDALINK += -L$(TARGET_FS)/usr/lib/nvidia-current + endif + endif + + # Search for Linux distribution path for libcuda.so + CUDALIB ?= $(shell find $(CUDAPATH) $(DFLT_PATH) -name libcuda.so -print 2>/dev/null) + + ifeq ("$(CUDALIB)",'') + $(info >>> WARNING - CUDA Driver libcuda.so is not found. Please check and re-install the NVIDIA driver. <<<) + EXEC=@echo "[@]" + endif +else + # This would be the Mac OS X path if we had to do anything special +endif + diff --git a/workloads/realworld/async/BN/ordergraph.cu b/workloads/realworld/async/BN/ordergraph.cu new file mode 100644 index 0000000000000000000000000000000000000000..d96a0b85460d0648a4e632a86b1c6e1aab15633b --- /dev/null +++ b/workloads/realworld/async/BN/ordergraph.cu @@ -0,0 +1,733 @@ +#include +#include +#include +#include +#include +//#include +// includes CUDA +#include +// includes, kernels +#include "ordergraph_kernel.cu" +; + +#include "../../../common/cpu_timestamps.h" +#include "../../../common/cupti_add.h" + +const int HIGHEST = 3; +int taskperthr = 1; +int sizepernode; +int ITER = 100; + +// global var +float preScore = -99999999999.0f; +float score = 0.0; +float maxScore[HIGHEST] = {-999999999.0f}; +bool orders[NODE_N][NODE_N]; +bool preOrders[NODE_N][NODE_N]; +bool preGraph[NODE_N][NODE_N]; +bool bestGraph[HIGHEST][NODE_N][NODE_N]; +bool graph[NODE_N][NODE_N]; +// float prior[NODE_N][NODE_N]; +float *localscore, *D_localscore, *D_Score, *scores; +float *LG; +bool *D_parent; +int *D_resP, *parents; + +void initial(); // initial orders and data +int genOrders(); // swap +int ConCore(); // discard new order or not +bool getparent(int *bit, int *pre, int posN, int *parent, int *parN, + int time); // get every possible set of parents for a node +void incr(int *bit, int n); // binary code increases 1 each time +void incrS(int *bit, int n); // STATE_N code increases 1 each time +bool getState( + int parN, int *state, + int time); // get every possible combination of state for a parent set +float logGamma(int N); // log and gamma +float findBestGraph(); +void genScore(); +int convert(int *parent, int parN); +void sortGraph(); +void swap(int a, int b); +void Pre_logGamma(); +int findindex(int *arr, int size); +int C(int n, int a); + +FILE *fpout; + +int main(int argc, char *argv[]) { + /* + for(i=0;i maxScore[HIGHEST - 1]) { + maxScore[HIGHEST - 1] = preScore; + for (a = 0; a < NODE_N; a++) { + for (b = 0; b < NODE_N; b++) { + bestGraph[HIGHEST - 1][a][b] = preGraph[a][b]; + } + } + b = HIGHEST - 1; + for (a = HIGHEST - 2; a >= 0; a--) { + if (maxScore[b] > maxScore[a]) { + swap(a, b); + tmpd = maxScore[a]; + maxScore[a] = maxScore[b]; + maxScore[b] = tmpd; + b = a; + } + } + } + } + + } // endwhile + + cudaFreeHost(localscore); + cudaFree(D_localscore); + cudaFree(D_parent); + + cudaFreeHost(scores); + cudaFreeHost(parents); + cudaFree(D_Score); + cudaFree(D_resP); + + /* + for(j=0;j max) { + max = maxScore[i]; + maxi = i; + } + } + + swap(j, maxi); + tmp = maxScore[j]; + maxScore[j] = max; + maxScore[maxi] = tmp; + } +} + +void swap(int a, int b) { + int i, j; + bool tmp; + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + + tmp = bestGraph[a][i][j]; + bestGraph[a][i][j] = bestGraph[b][i][j]; + bestGraph[b][i][j] = tmp; + } + } +} + +void initial() { + int i, j, tmp, a, b, r; + bool tmpd; + tmp = 1; + for (i = 1; i <= 4; i++) { + tmp += C(NODE_N - 1, i); + } + sizepernode = tmp; + tmp *= NODE_N; + + cudaMallocHost((void **)&localscore, tmp * sizeof(float)); + + for (i = 0; i < tmp; i++) + localscore[i] = 0; + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) + orders[i][j] = 0; + } + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < i; j++) + orders[i][j] = 1; + } + r = rand() % 10000; + for (i = 0; i < r; i++) { + a = rand() % NODE_N; + b = rand() % NODE_N; + for (j = 0; j < NODE_N; j++) { + tmpd = orders[j][a]; + orders[j][a] = orders[j][b]; + orders[j][b] = tmpd; + } + + for (j = 0; j < NODE_N; j++) { + tmpd = orders[a][j]; + orders[a][j] = orders[b][j]; + orders[b][j] = tmpd; + } + } + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + preOrders[i][j] = orders[i][j]; + } + } +} + +// generate ramdom order +int genOrders() { + + int a, b, j; + bool tmp; + a = rand() % NODE_N; + b = rand() % NODE_N; + + for (j = 0; j < NODE_N; j++) { + tmp = orders[a][j]; + orders[a][j] = orders[b][j]; + orders[b][j] = tmp; + } + for (j = 0; j < NODE_N; j++) { + tmp = orders[j][a]; + orders[j][a] = orders[j][b]; + orders[j][b] = tmp; + } + + return 1; +} + +// decide leave or discard an order +int ConCore() { + int i, j; + float tmp; + tmp = log((rand() % 100000) / 100000.0); + if (tmp < (score - preScore)) { + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + preOrders[i][j] = orders[i][j]; + preGraph[i][j] = graph[i][j]; + } + } + preScore = score; + + return 1; + } + + return 0; +} + +void genScore() { + int *D_data; + float *D_LG; + dim3 grid(sizepernode / 256 + 1, 1, 1); + dim3 threads(256, 1, 1); + + Pre_logGamma(); + // cudaPrintfInit(); + cudaMalloc((void **)&D_data, NODE_N * DATA_N * sizeof(int)); + cudaMalloc((void **)&D_localscore, NODE_N * sizepernode * sizeof(float)); + cudaMalloc((void **)&D_LG, (DATA_N + 2) * sizeof(float)); + cudaMemset(D_localscore, 0.0, NODE_N * sizepernode * sizeof(float)); + cudaMemcpy(D_data, data, NODE_N * DATA_N * sizeof(int), + cudaMemcpyHostToDevice); + cudaMemcpy(D_LG, LG, (DATA_N + 2) * sizeof(float), cudaMemcpyHostToDevice); + genScoreKernel<<>>(sizepernode, D_localscore, D_data, D_LG); + cudaDeviceSynchronize(); + cudaMemcpy(localscore, D_localscore, NODE_N * sizepernode * sizeof(float), + cudaMemcpyDeviceToHost); + + // cudaPrintfDisplay(stdout, true); + // cudaPrintfEnd(); + + cudaFreeHost(LG); + cudaFree(D_LG); + cudaFree(D_data); + + cudaMallocHost((void **)&scores, + (sizepernode / (256 * taskperthr) + 1) * sizeof(float)); + cudaMallocHost((void **)&parents, + (sizepernode / (256 * taskperthr) + 1) * 4 * sizeof(int)); + cudaMalloc((void **)&D_Score, + (sizepernode / (256 * taskperthr) + 1) * sizeof(float)); + cudaMalloc((void **)&D_parent, NODE_N * sizeof(bool)); + cudaMalloc((void **)&D_resP, + (sizepernode / (256 * taskperthr) + 1) * 4 * sizeof(int)); +} + +int convert(int *parent, int parN) { + int i, j, w = 1, tmp = 0; + j = 0; + for (i = 0; parN > 0 && i <= parent[parN - 1]; i++) { + if (parent[j] == i) { + j++; + tmp += w; + } + w *= 2; + } + + return tmp; +} + +void Pre_logGamma() { + + cudaMallocHost((void **)&LG, (DATA_N + 2) * sizeof(float)); + + LG[1] = log(1.0); + float i; + for (i = 2; i <= DATA_N + 1; i++) { + LG[(int)i] = LG[(int)i - 1] + log((float)i); + } +} + +void incr(int *bit, int n) { + + bit[n]++; + if (bit[n] >= 2) { + bit[n] = 0; + incr(bit, n + 1); + } + + return; +} + +void incrS(int *bit, int n) { + + bit[n]++; + if (bit[n] >= STATE_N) { + bit[n] = 0; + incr(bit, n + 1); + } + + return; +} + +bool getState(int parN, int *state, int time) { + int j = 1; + + j = pow(STATE_N, (float)parN) - 1; + + if (time > j) + return false; + + if (time >= 1) + incrS(state, 0); + + return true; +} + +bool getparent(int *bit, int *pre, int posN, int *parent, int *parN, int time) { + int i, j = 1; + + *parN = 0; + if (time == 0) + return true; + + for (i = 0; i < posN; i++) { + j = j * 2; + } + j--; + + if (time > j) + return false; + + incr(bit, 0); + + for (i = 0; i < posN; i++) { + if (bit[i] == 1) { + parent[(*parN)++] = pre[i]; + } + } + + return true; +} + +float findBestGraph() { + float bestls = -99999999; + int bestparent[5]; + int bestpN, total; + int node, index; + int pre[NODE_N] = {0}; + int parent[NODE_N] = {0}; + int posN = 0, i, j, parN, tmp, k, l; + float ls = -99999999999, score = 0; + int blocknum; + + for (i = 0; i < NODE_N; i++) + for (j = 0; j < NODE_N; j++) + graph[i][j] = 0; + + for (node = 0; node < NODE_N; node++) { + + bestls = -99999999; + posN = 0; + + for (i = 0; i < NODE_N; i++) { + if (orders[node][i] == 1) { + pre[posN++] = i; + } + } + + if (posN >= 0) { + total = C(posN, 4) + C(posN, 3) + C(posN, 2) + posN + 1; + taskperthr = 1; + blocknum = total / (256 * taskperthr) + 1; + + int nbatches = MIN_NBATCHES; + + int blocknum_max = total / (BLOCK_SIZE * MIN_NBATCHES * taskperthr) + 1; + if (blocknum_max >= MAX_NBLOCKS) { + blocknum = MAX_NBLOCKS; + nbatches = (total + 1) / (BLOCK_SIZE * MAX_NBLOCKS * taskperthr); + } else { + blocknum = blocknum_max; + } + + cudaMemset(D_resP, 0, blocknum * 4 * sizeof(int)); + cudaMemset(D_Score, -999999.0, blocknum * nbatches * sizeof(float)); + cudaMemcpy(D_parent, orders[node], NODE_N * sizeof(bool), + cudaMemcpyHostToDevice); + + computeKernel<<>>( + taskperthr, sizepernode, D_localscore, D_parent, node, total, D_Score, + D_resP, nbatches); + cudaDeviceSynchronize(); + cudaMemcpy(parents, D_resP, blocknum * 4 * sizeof(int), + cudaMemcpyDeviceToHost); + cudaMemcpy(scores, D_Score, blocknum * sizeof(float), + cudaMemcpyDeviceToHost); + + for (i = 0; i < blocknum * nbatches; i++) { + + if (scores[i] > bestls) { + + bestls = scores[i]; + + parN = 0; + for (tmp = 0; tmp < 4; tmp++) { + if (parents[i * 4 + tmp] < 0) + break; + + bestparent[tmp] = parents[i * 4 + tmp]; + + parN++; + } + + bestpN = parN; + } + } + } else { + if (posN >= 4) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + for (k = j + 1; k < posN; k++) { + for (l = k + 1; l < posN; l++) { + parN = 4; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + if (pre[k] > node) + parent[3] = pre[k]; + else + parent[3] = pre[k] + 1; + if (pre[l] > node) + parent[4] = pre[l]; + else + parent[4] = pre[l] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + } + } + + if (posN >= 3) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + for (k = j + 1; k < posN; k++) { + + parN = 3; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + if (pre[k] > node) + parent[3] = pre[k]; + else + parent[3] = pre[k] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + } + + if (posN >= 2) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + + parN = 2; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + + if (posN >= 1) { + for (i = 0; i < posN; i++) { + + parN = 1; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + + parN = 0; + index = sizepernode * node; + + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = 0; + } + } + if (bestls > -99999999) { + + for (i = 0; i < bestpN; i++) { + if (bestparent[i] < node) + graph[node][bestparent[i] - 1] = 1; + else + graph[node][bestparent[i]] = 1; + } + score += bestls; + } + } + + return score; +} + +int findindex(int *arr, int size) { // reminder: arr[0] has to be 0 && size == + // array size-1 && index start from 0 + int i, j, index = 0; + + for (i = 1; i < size; i++) { + index += C(NODE_N - 1, i); + } + + for (i = 1; i <= size - 1; i++) { + for (j = arr[i - 1] + 1; j <= arr[i] - 1; j++) { + index += C(NODE_N - 1 - j, size - i); + } + } + + index += arr[size] - arr[size - 1]; + + return index; +} + +int C(int n, int a) { + int i, res = 1, atmp = a; + + for (i = 0; i < atmp; i++) { + res *= n; + n--; + } + + for (i = 0; i < atmp; i++) { + res /= a; + a--; + } + + return res; +} \ No newline at end of file diff --git a/workloads/realworld/async/BN/ordergraph_kernel.cu b/workloads/realworld/async/BN/ordergraph_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..f6cbe5f9465edf7aa0f1d3f3a04bddc4d4f72def --- /dev/null +++ b/workloads/realworld/async/BN/ordergraph_kernel.cu @@ -0,0 +1,352 @@ +#ifndef _ORDERGRAPH_KERNEL_H_ +#define _ORDERGRAPH_KERNEL_H_ + +#include + +#ifdef DATA_25 +#include "data25.cu" +#endif +#ifdef DATA_30 +#include "data30.cu" +#endif +#ifdef DATA_40 +#include "data40.cu" +#endif +#ifdef DATA_45 +#include "data45.cu" +#endif +#ifdef DATA_50 +#include "data50.cu" +#endif +#ifdef DATA_125 +#include "data125.cu" +#endif +; + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define BLOCK_SIZE 256 +#define MAX_NBLOCKS 1024 +#define MIN_NBATCHES 16 + + +__device__ void Dincr(int *bit, int n); +__device__ void DincrS(int *bit, int n); +__device__ bool D_getState(int parN, int *sta, int time); +__device__ void D_findComb(int *comb, int l, int n); +__device__ int D_findindex(int *arr, int size); +__device__ int D_C(int n, int a); + +__global__ void genScoreKernel(int sizepernode, float *D_localscore, + int *D_data, float *D_LG) { + int id = blockIdx.x * BLOCK_SIZE + threadIdx.x; + int node, index; + bool flag; + int parent[5] = {0}; + int pre[NODE_N] = {0}; + int state[5] = {0}; + int i, j, parN = 0, tmp, t; + int t1 = 0, t2 = 0; + float ls = 0; + int Nij[STATE_N] = {0}; + + if (id < sizepernode) { + + D_findComb(parent, id, NODE_N - 1); + + for (i = 0; i < 4; i++) { + if (parent[i] > 0) + parN++; + } + + for (node = 0; node < NODE_N; node++) { + + j = 1; + for (i = 0; i < NODE_N; i++) { + if (i != node) + pre[j++] = i; + } + + for (tmp = 0; tmp < parN; tmp++) + state[tmp] = 0; + + index = sizepernode * node + id; + + // priors + /* + for(tmp=1;tmp<=4;tmp++){ + localscore[index]+=100*(prior[node][pre[parent[tmp]]]-0.5)*(prior[node][pre[parent[tmp]]]-0.5)*(prior[node][pre[parent[tmp]]]-0.5); + } + */ + t = 0; + while (D_getState(parN, state, t++)) { // for get state + // printf("test %u\n",id); + ls = 0; + for (tmp = 0; tmp < STATE_N; tmp++) + Nij[tmp] = 0; + + for (t1 = 0; t1 < DATA_N; t1++) { + flag = true; + for (t2 = 0; t2 < parN; t2++) { + if (D_data[t1 * NODE_N + pre[parent[t2]]] != state[t2]) { + flag = false; + break; + } + } + if (!flag) + continue; + + Nij[D_data[t1 * NODE_N + node]]++; + } + + tmp = STATE_N - 1; + + for (t1 = 0; t1 < STATE_N; t1++) { + ls += D_LG[Nij[t1]]; + tmp += Nij[t1]; + } + + ls -= D_LG[tmp]; + ls += D_LG[STATE_N - 1]; + + D_localscore[index] += ls; + } + } + } +} + +__global__ void computeKernel(int taskperthr, int sizepernode, + float *D_localscore, bool *D_parent, int node, + int total, float *D_Score, int *D_resP, + int nbatches) { + cooperative_groups::thread_block block = + cooperative_groups::this_thread_block(); + pipeline pipe; + __shared__ float lsinblock[PREFETCH_COUNT][BLOCK_SIZE]; + + int fetch = 0; + int end_tile = fetch + nbatches; + int bestparent[4] = {0}, parent[5] = {-1}; + + for (int compute = fetch; compute < end_tile; compute++) { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) { + unsigned int bid = blockIdx.x * nbatches + fetch; + unsigned int tid = threadIdx.x; + unsigned int id = bid * (BLOCK_SIZE * nbatches) + tid; + + int posN = 1, i, index, tmp; + int pre[NODE_N] = {0}; + int parN = 0; + + float bestls = -999999999999999, ls; + + for (i = 0; i < NODE_N; i++) { + if (D_parent[i] == 1) { + pre[posN++] = i; + } + } + + for (i = 0; i < taskperthr && ((id * taskperthr + i) < total); i++) { + + D_findComb(parent, id * taskperthr + i, posN); + + for (parN = 0; parN < 4; parN++) { + if (parent[parN] < 0) + break; + if (pre[parent[parN]] > node) + parent[parN] = pre[parent[parN]]; + else + parent[parN] = pre[parent[parN]] + 1; + } + + for (tmp = parN; tmp > 0; tmp--) { + parent[tmp] = parent[tmp - 1]; + } + parent[0] = 0; + + index = D_findindex(parent, parN); + index += sizepernode * node; + + ls = D_localscore[index]; + + if (ls > bestls) { + bestls = ls; + for (tmp = 0; tmp < 4; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + + memcpy_async(lsinblock[fetch % PREFETCH_COUNT][tid], bestls, pipe); + pipe.commit(); + } + if (fetch == end_tile) { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + int i, t; + unsigned int bid = blockIdx.x * nbatches + compute; + unsigned int tid = threadIdx.x; + // unsigned int id = bid * (BLOCK_SIZE * nbatches) + tid; + + for (i = BLOCK_SIZE / 2; i >= 1; i /= 2) { + if (tid < i) { + if (lsinblock[compute % PREFETCH_COUNT][tid + i] > + lsinblock[compute % PREFETCH_COUNT][tid] && + lsinblock[compute % PREFETCH_COUNT][tid + i] < 0) { + lsinblock[compute % PREFETCH_COUNT][tid] = + lsinblock[compute % PREFETCH_COUNT][tid + i]; + lsinblock[compute % PREFETCH_COUNT][tid + i] = (float)(tid + i); + } else if (lsinblock[compute % PREFETCH_COUNT][tid + i] < + lsinblock[compute % PREFETCH_COUNT][tid] && + lsinblock[compute % PREFETCH_COUNT][tid] < 0) { + lsinblock[compute % PREFETCH_COUNT][tid + i] = (float)tid; + } else if (lsinblock[tid] > 0 && lsinblock[tid + i] < 0) { + lsinblock[compute % PREFETCH_COUNT][tid] = + lsinblock[compute % PREFETCH_COUNT][tid + i]; + lsinblock[compute % PREFETCH_COUNT][tid + i] = (float)(tid + i); + } else if (lsinblock[compute % PREFETCH_COUNT][tid] < 0 && + lsinblock[compute % PREFETCH_COUNT][tid + i] > 0) { + lsinblock[compute % PREFETCH_COUNT][tid + i] = (float)tid; + } + } + block.sync(); + } + block.sync(); + + if (tid == 0) { + D_Score[bid] = lsinblock[compute % PREFETCH_COUNT][0]; + t = 0; + for (i = 0; i < 7 && t < 128 && t >= 0; i++) { + t = (int)lsinblock[compute % PREFETCH_COUNT][(int)powf(2.0, i) + t]; + } + lsinblock[compute % PREFETCH_COUNT][0] = (float)t; + } + block.sync(); + + if (tid == (int)lsinblock[compute % PREFETCH_COUNT][0]) { + for (i = 0; i < 4; i++) { + D_resP[bid * 4 + i] = bestparent[i]; + } + } + } +} + +__device__ void Dincr(int *bit, int n) { + + while (n <= NODE_N) { + bit[n]++; + if (bit[n] >= 2) { + bit[n] = 0; + n++; + } else { + break; + } + } + + return; +} + +__device__ void DincrS(int *bit, int n) { + + bit[n]++; + if (bit[n] >= STATE_N) { + bit[n] = 0; + Dincr(bit, n + 1); + } + + return; +} + +__device__ bool D_getState(int parN, int *sta, int time) { + int i, j = 1; + + for (i = 0; i < parN; i++) { + j *= STATE_N; + } + j--; + if (time > j) + return false; + + if (time >= 1) + DincrS(sta, 0); + + return true; +} + +__device__ void D_findComb(int *comb, int l, int n) { + const int len = 4; + if (l == 0) { + for (int i = 0; i < len; i++) + comb[i] = -1; + return; + } + int sum = 0; + int k = 1; + + while (sum < l) + sum += D_C(n, k++); + l -= sum - D_C(n, --k); + int low = 0; + int pos = 0; + while (k > 1) { + sum = 0; + int s = 1; + while (sum < l) + sum += D_C(n - s++, k - 1); + l -= sum - D_C(n - (--s), --k); + low += s; + comb[pos++] = low; + n -= s; + } + comb[pos] = low + l; + for (int i = pos + 1; i < 4; i++) + comb[i] = -1; +} + +__device__ int D_findindex(int *arr, + int size) { // reminder: arr[0] has to be 0 && size + // == array size-1 && index start from 0 + int i, j, index = 0; + + for (i = 1; i < size; i++) { + index += D_C(NODE_N - 1, i); + } + + for (i = 1; i <= size - 1; i++) { + for (j = arr[i - 1] + 1; j <= arr[i] - 1; j++) { + index += D_C(NODE_N - 1 - j, size - i); + } + } + + index += arr[size] - arr[size - 1]; + + return index; +} + +__device__ int D_C(int n, int a) { + int i, res = 1, atmp = a; + + for (i = 0; i < atmp; i++) { + res *= n; + n--; + } + + for (i = 0; i < atmp; i++) { + res /= a; + a--; + } + + return res; +} + +#endif diff --git a/workloads/realworld/async/BN/run.sh b/workloads/realworld/async/BN/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..f87817975334cff247b1bdf91651d41062aa8320 --- /dev/null +++ b/workloads/realworld/async/BN/run.sh @@ -0,0 +1,5 @@ +# ./ordergraph_25 +# ./ordergraph_30 +# ./ordergraph_40 +# ./ordergraph_45 +./ordergraph_50 \ No newline at end of file diff --git a/workloads/realworld/async/BN/run_super.sh b/workloads/realworld/async/BN/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..5c45d88db0716b0b4b0828ba397cbd918d1612c0 --- /dev/null +++ b/workloads/realworld/async/BN/run_super.sh @@ -0,0 +1 @@ +./ordergraph_50 diff --git a/workloads/realworld/async/backprop/Makefile b/workloads/realworld/async/backprop/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..286cd40af79bbb80b6d86aad9bd0d2c0d1a846e0 --- /dev/null +++ b/workloads/realworld/async/backprop/Makefile @@ -0,0 +1,47 @@ +include ../../../common/make.config + +# C compiler +CC = gcc +CC_FLAGS = -g -O2 + +# CUDA compiler +NVCC = $(CUDA_DIR)/bin/nvcc +NVCC_FLAGS = -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -arch=sm_80 + +# 'make dbg=1' enables NVCC debugging +ifeq ($(dbg),1) + NVCC_FLAGS += -g -O0 +else + NVCC_FLAGS += -O2 +endif + +# 'make emu=1' compiles the CUDA kernels for emulation +ifeq ($(emu),1) + NVCC_FLAGS += -deviceemu +endif + + +backprop: backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + $(NVCC) $(NVCC_FLAGS) backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -o backprop $(NVCC_FLAGS) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +# backprop: backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp +# $(CC) $(CC_FLAGS) backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -o backprop -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +%.o: %.[ch] + $(CC) $(CC_FLAGS) $< -c + +facetrain.o: facetrain.c backprop.h + $(CC) $(CC_FLAGS) facetrain.c -c + +backprop.o: backprop.c backprop.h + $(CC) $(CC_FLAGS) backprop.c -c + +backprop_cuda.o: backprop_cuda.cu backprop.h $(CUPTI_ADD_COMMON)/cupti_add.h $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + $(NVCC) $(NVCC_FLAGS) -c backprop_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +imagenet.o: imagenet.c backprop.h + $(CC) $(CC_FLAGS) imagenet.c -c + + +clean: + rm -f *.o *~ backprop backprop_cuda.linkinfo diff --git a/workloads/realworld/async/backprop/backprop.c b/workloads/realworld/async/backprop/backprop.c new file mode 100644 index 0000000000000000000000000000000000000000..3a38f012b785f8cbaec7f9c33e9ae58b9ee92ae5 --- /dev/null +++ b/workloads/realworld/async/backprop/backprop.c @@ -0,0 +1,502 @@ +/* + ****************************************************************** + * HISTORY + * 15-Oct-94 Jeff Shufelt (js), Carnegie Mellon University + * Prepared for 15-681, Fall 1994. + * Modified by Shuai Che + ****************************************************************** + */ +#include +#include +#include +#include +#include "backprop.h" +#include +//#define OPEN + +#define ABS(x) (((x) > 0.0) ? (x) : (-(x))) + +#define fastcopy(to,from,len)\ +{\ + register char *_to,*_from;\ + register int _i,_l;\ + _to = (char *)(to);\ + _from = (char *)(from);\ + _l = (len);\ + for (_i = 0; _i < _l; _i++) *_to++ = *_from++;\ +} + +/*** Return random number between 0.0 and 1.0 ***/ +float drnd() +{ + return ((float) rand() / (float) BIGRND); +} + +/*** Return random number between -1.0 and 1.0 ***/ +float dpn1() +{ + return ((drnd() * 2.0) - 1.0); +} + +/*** The squashing function. Currently, it's a sigmoid. ***/ + +float squash(x) +float x; +{ + float m; + //x = -x; + //m = 1 + x + x*x/2 + x*x*x/6 + x*x*x*x/24 + x*x*x*x*x/120; + //return(1.0 / (1.0 + m)); + return (1.0 / (1.0 + exp(-x))); +} + + +/*** Allocate 1d array of floats ***/ + +float *alloc_1d_dbl(n) +int n; +{ + float *new; + + new = (float *) malloc ((unsigned) (n * sizeof (float))); + if (new == NULL) { + printf("ALLOC_1D_DBL: Couldn't allocate array of floats\n"); + return (NULL); + } + return (new); +} + + +/*** Allocate 2d array of floats ***/ + +float **alloc_2d_dbl(m, n) +int m, n; +{ + int i; + float **new; + + new = (float **) malloc ((unsigned) (m * sizeof (float *))); + if (new == NULL) { + printf("ALLOC_2D_DBL: Couldn't allocate array of dbl ptrs\n"); + return (NULL); + } + + for (i = 0; i < m; i++) { + new[i] = alloc_1d_dbl(n); + } + + return (new); +} + + +bpnn_randomize_weights(w, m, n) +float **w; +int m, n; +{ + int i, j; + + for (i = 0; i <= m; i++) { + for (j = 0; j <= n; j++) { + w[i][j] = (float) rand()/RAND_MAX; + // w[i][j] = dpn1(); + } + } +} + +bpnn_randomize_row(w, m) +float *w; +int m; +{ + int i; + for (i = 0; i <= m; i++) { + //w[i] = (float) rand()/RAND_MAX; + w[i] = 0.1; + } +} + + +bpnn_zero_weights(w, m, n) +float **w; +int m, n; +{ + int i, j; + + for (i = 0; i <= m; i++) { + for (j = 0; j <= n; j++) { + w[i][j] = 0.0; + } + } +} + + +void bpnn_initialize(seed) +{ + printf("Random number generator seed: %d\n", seed); + srand(seed); +} + + +BPNN *bpnn_internal_create(n_in, n_hidden, n_out) +int n_in, n_hidden, n_out; +{ + BPNN *newnet; + + newnet = (BPNN *) malloc (sizeof (BPNN)); + if (newnet == NULL) { + printf("BPNN_CREATE: Couldn't allocate neural network\n"); + return (NULL); + } + + newnet->input_n = n_in; + newnet->hidden_n = n_hidden; + newnet->output_n = n_out; + newnet->input_units = alloc_1d_dbl(n_in + 1); + newnet->hidden_units = alloc_1d_dbl(n_hidden + 1); + newnet->output_units = alloc_1d_dbl(n_out + 1); + + newnet->hidden_delta = alloc_1d_dbl(n_hidden + 1); + newnet->output_delta = alloc_1d_dbl(n_out + 1); + newnet->target = alloc_1d_dbl(n_out + 1); + + newnet->input_weights = alloc_2d_dbl(n_in + 1, n_hidden + 1); + newnet->hidden_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); + + newnet->input_prev_weights = alloc_2d_dbl(n_in + 1, n_hidden + 1); + newnet->hidden_prev_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); + + return (newnet); +} + + +void bpnn_free(net) +BPNN *net; +{ + int n1, n2, i; + + n1 = net->input_n; + n2 = net->hidden_n; + + free((char *) net->input_units); + free((char *) net->hidden_units); + free((char *) net->output_units); + + free((char *) net->hidden_delta); + free((char *) net->output_delta); + free((char *) net->target); + + for (i = 0; i <= n1; i++) { + free((char *) net->input_weights[i]); + free((char *) net->input_prev_weights[i]); + } + free((char *) net->input_weights); + free((char *) net->input_prev_weights); + + for (i = 0; i <= n2; i++) { + free((char *) net->hidden_weights[i]); + free((char *) net->hidden_prev_weights[i]); + } + free((char *) net->hidden_weights); + free((char *) net->hidden_prev_weights); + + free((char *) net); +} + + +/*** Creates a new fully-connected network from scratch, + with the given numbers of input, hidden, and output units. + Threshold units are automatically included. All weights are + randomly initialized. + + Space is also allocated for temporary storage (momentum weights, + error computations, etc). +***/ + +BPNN *bpnn_create(n_in, n_hidden, n_out) +int n_in, n_hidden, n_out; +{ + + BPNN *newnet; + + newnet = bpnn_internal_create(n_in, n_hidden, n_out); + +#ifdef INITZERO + bpnn_zero_weights(newnet->input_weights, n_in, n_hidden); +#else + bpnn_randomize_weights(newnet->input_weights, n_in, n_hidden); +#endif + bpnn_randomize_weights(newnet->hidden_weights, n_hidden, n_out); + bpnn_zero_weights(newnet->input_prev_weights, n_in, n_hidden); + bpnn_zero_weights(newnet->hidden_prev_weights, n_hidden, n_out); + bpnn_randomize_row(newnet->target, n_out); + return (newnet); +} + + +void bpnn_layerforward(l1, l2, conn, n1, n2) +float *l1, *l2, **conn; +int n1, n2; +{ + float sum; + int j, k; + + /*** Set up thresholding unit ***/ + l1[0] = 1.0; +#ifdef OPEN + omp_set_num_threads(NUM_THREAD); + #pragma omp parallel for shared(conn, n1, n2, l1) private(k, j) reduction(+: sum) schedule(static) +#endif + /*** For each unit in second layer ***/ + for (j = 1; j <= n2; j++) { + + /*** Compute weighted sum of its inputs ***/ + sum = 0.0; + for (k = 0; k <= n1; k++) { + sum += conn[k][j] * l1[k]; + } + l2[j] = squash(sum); + } +} + +//extern "C" +void bpnn_output_error(delta, target, output, nj, err) +float *delta, *target, *output, *err; +int nj; +{ + int j; + float o, t, errsum; + errsum = 0.0; + for (j = 1; j <= nj; j++) { + o = output[j]; + t = target[j]; + delta[j] = o * (1.0 - o) * (t - o); + errsum += ABS(delta[j]); + } + *err = errsum; +} + + +void bpnn_hidden_error(delta_h, + nh, + delta_o, + no, + who, + hidden, + err) +float *delta_h, *delta_o, *hidden, **who, *err; +int nh, no; +{ + int j, k; + float h, sum, errsum; + + errsum = 0.0; + for (j = 1; j <= nh; j++) { + h = hidden[j]; + sum = 0.0; + for (k = 1; k <= no; k++) { + sum += delta_o[k] * who[j][k]; + } + delta_h[j] = h * (1.0 - h) * sum; + errsum += ABS(delta_h[j]); + } + *err = errsum; +} + + +void bpnn_adjust_weights(delta, ndelta, ly, nly, w, oldw) +float *delta, *ly, **w, **oldw; +{ + float new_dw; + int k, j; + ly[0] = 1.0; + //eta = 0.3; + //momentum = 0.3; + +#ifdef OPEN + omp_set_num_threads(NUM_THREAD); + #pragma omp parallel for \ + shared(oldw, w, delta) \ + private(j, k, new_dw) \ + firstprivate(ndelta, nly, momentum) +#endif + for (j = 1; j <= ndelta; j++) { + for (k = 0; k <= nly; k++) { + new_dw = ((ETA * delta[j] * ly[k]) + (MOMENTUM * oldw[k][j])); + w[k][j] += new_dw; + oldw[k][j] = new_dw; + } + } +} + + +void bpnn_feedforward(net) +BPNN *net; +{ + int in, hid, out; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + + /*** Feed forward input activations. ***/ + bpnn_layerforward(net->input_units, net->hidden_units, + net->input_weights, in, hid); + bpnn_layerforward(net->hidden_units, net->output_units, + net->hidden_weights, hid, out); + +} + + +void bpnn_train(net, eo, eh) +BPNN *net; +float *eo, *eh; +{ + int in, hid, out; + float out_err, hid_err; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + + /*** Feed forward input activations. ***/ + bpnn_layerforward(net->input_units, net->hidden_units, + net->input_weights, in, hid); + bpnn_layerforward(net->hidden_units, net->output_units, + net->hidden_weights, hid, out); + + /*** Compute error on output and hidden units. ***/ + bpnn_output_error(net->output_delta, net->target, net->output_units, + out, &out_err); + bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, + net->hidden_weights, net->hidden_units, &hid_err); + *eo = out_err; + *eh = hid_err; + + /*** Adjust input and hidden weights. ***/ + bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, + net->hidden_weights, net->hidden_prev_weights); + bpnn_adjust_weights(net->hidden_delta, hid, net->input_units, in, + net->input_weights, net->input_prev_weights); + +} + + + + +void bpnn_save(net, filename) +BPNN *net; +char *filename; +{ + int n1, n2, n3, i, j, memcnt; + float dvalue, **w; + char *mem; + ///add// + FILE *pFile; + pFile = fopen( filename, "w+" ); + /////// + /* + if ((fd = creat(filename, 0644)) == -1) { + printf("BPNN_SAVE: Cannot create '%s'\n", filename); + return; + } + */ + + n1 = net->input_n; n2 = net->hidden_n; n3 = net->output_n; + printf("Saving %dx%dx%d network to '%s'\n", n1, n2, n3, filename); + //fflush(stdout); + + //write(fd, (char *) &n1, sizeof(int)); + //write(fd, (char *) &n2, sizeof(int)); + //write(fd, (char *) &n3, sizeof(int)); + + fwrite( (char *) &n1 , sizeof(char), sizeof(char), pFile); + fwrite( (char *) &n2 , sizeof(char), sizeof(char), pFile); + fwrite( (char *) &n3 , sizeof(char), sizeof(char), pFile); + + + + memcnt = 0; + w = net->input_weights; + mem = (char *) malloc ((unsigned) ((n1+1) * (n2+1) * sizeof(float))); + for (i = 0; i <= n1; i++) { + for (j = 0; j <= n2; j++) { + dvalue = w[i][j]; + fastcopy(&mem[memcnt], &dvalue, sizeof(float)); + memcnt += sizeof(float); + } + } + //write(fd, mem, (n1+1) * (n2+1) * sizeof(float)); + fwrite( mem , (unsigned)(sizeof(float)), (unsigned) ((n1+1) * (n2+1) * sizeof(float)) , pFile); + free(mem); + + memcnt = 0; + w = net->hidden_weights; + mem = (char *) malloc ((unsigned) ((n2+1) * (n3+1) * sizeof(float))); + for (i = 0; i <= n2; i++) { + for (j = 0; j <= n3; j++) { + dvalue = w[i][j]; + fastcopy(&mem[memcnt], &dvalue, sizeof(float)); + memcnt += sizeof(float); + } + } + //write(fd, mem, (n2+1) * (n3+1) * sizeof(float)); + fwrite( mem , sizeof(float), (unsigned) ((n2+1) * (n3+1) * sizeof(float)) , pFile); + free(mem); + + fclose(pFile); + return; +} + + +BPNN *bpnn_read(filename) +char *filename; +{ + char *mem; + BPNN *new; + int fd, n1, n2, n3, i, j, memcnt; + + if ((fd = open(filename, 0, 0644)) == -1) { + return (NULL); + } + + printf("Reading '%s'\n", filename); //fflush(stdout); + + read(fd, (char *) &n1, sizeof(int)); + read(fd, (char *) &n2, sizeof(int)); + read(fd, (char *) &n3, sizeof(int)); + new = bpnn_internal_create(n1, n2, n3); + + printf("'%s' contains a %dx%dx%d network\n", filename, n1, n2, n3); + printf("Reading input weights..."); //fflush(stdout); + + memcnt = 0; + mem = (char *) malloc ((unsigned) ((n1+1) * (n2+1) * sizeof(float))); + read(fd, mem, (n1+1) * (n2+1) * sizeof(float)); + for (i = 0; i <= n1; i++) { + for (j = 0; j <= n2; j++) { + fastcopy(&(new->input_weights[i][j]), &mem[memcnt], sizeof(float)); + memcnt += sizeof(float); + } + } + free(mem); + + printf("Done\nReading hidden weights..."); //fflush(stdout); + + memcnt = 0; + mem = (char *) malloc ((unsigned) ((n2+1) * (n3+1) * sizeof(float))); + read(fd, mem, (n2+1) * (n3+1) * sizeof(float)); + for (i = 0; i <= n2; i++) { + for (j = 0; j <= n3; j++) { + fastcopy(&(new->hidden_weights[i][j]), &mem[memcnt], sizeof(float)); + memcnt += sizeof(float); + } + } + free(mem); + close(fd); + + printf("Done\n"); //fflush(stdout); + + bpnn_zero_weights(new->input_prev_weights, n1, n2); + bpnn_zero_weights(new->hidden_prev_weights, n2, n3); + + return (new); +} diff --git a/workloads/realworld/async/backprop/backprop.h b/workloads/realworld/async/backprop/backprop.h new file mode 100644 index 0000000000000000000000000000000000000000..dfaafe39b76a1c9c1455e38bbe0a14d5461d5d2b --- /dev/null +++ b/workloads/realworld/async/backprop/backprop.h @@ -0,0 +1,53 @@ +#ifndef _BACKPROP_H_ +#define _BACKPROP_H_ + +#define BIGRND 0x7fffffff + +#define GPU +#define THREADS 256 +#define WIDTH 16 // shared memory width +#define HEIGHT 16 // shared memory height + +#define ETA 0.3 //eta value +#define MOMENTUM 0.3 //momentum value +#define NUM_THREAD 4 //OpenMP threads + + +typedef struct { + int input_n; /* number of input units */ + int hidden_n; /* number of hidden units */ + int output_n; /* number of output units */ + + float *input_units; /* the input units */ + float *hidden_units; /* the hidden units */ + float *output_units; /* the output units */ + + float *hidden_delta; /* storage for hidden unit error */ + float *output_delta; /* storage for output unit error */ + + float *target; /* storage for target vector */ + + float **input_weights; /* weights from input to hidden layer */ + float **hidden_weights; /* weights from hidden to output layer */ + + /*** The next two are for momentum ***/ + float **input_prev_weights; /* previous change on input to hidden wgt */ + float **hidden_prev_weights; /* previous change on hidden to output wgt */ +} BPNN; + + +/*** User-level functions ***/ + +void bpnn_initialize(); + +BPNN *bpnn_create(); +void bpnn_free(); + +void bpnn_train(); +void bpnn_feedforward(); + +void bpnn_save(); +BPNN *bpnn_read(); + + +#endif diff --git a/workloads/realworld/async/backprop/backprop_cuda.cu b/workloads/realworld/async/backprop/backprop_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..df07c19e4dc40cd5aefb10f711c4104b5378a4b5 --- /dev/null +++ b/workloads/realworld/async/backprop/backprop_cuda.cu @@ -0,0 +1,246 @@ + + +// includes, system +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +double t_start, t_end; + +// includes, kernels +#include "backprop_cuda_kernel.cu" +#include "backprop.h" + +//////////////////////////////////////////////////////////////////////////////// + +extern "C" void bpnn_layerforward(float *l1, float *l2, float **conn, int n1, int n2); + +extern "C" void bpnn_output_error(float *delta, float *target, float *output, int nj, float *err); + +extern "C" void bpnn_hidden_error(float *delta_h, int nh, float *delta_o, int no, float **who, float *hidden, float *err); + +extern "C" void bpnn_adjust_weights(float *delta, int ndelta, float *ly, int nly, float **w, float **oldw); + +extern "C" int setup(int argc, char **argv); + +extern "C" float **alloc_2d_dbl(int m, int n); + +extern "C" float squash(float x); + +double gettime() +{ + struct timeval t; + gettimeofday(&t, NULL); + return t.tv_sec + t.tv_usec * 1e-6; +} + +unsigned int num_threads = 0; +unsigned int num_blocks = 0; + +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + num_blocks = atoi(argv[2]); + setup(argc, argv); +} + +extern "C" void bpnn_train_cuda(BPNN *net, float *eo, float *eh) +{ + int in, hid, out; + float out_err, hid_err; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + +#ifdef GPU + int m = 0; + float *input_hidden_cuda; + float *input_cuda; + float *output_hidden_cuda; + float *partial_sum; + float *hidden_partial_sum; + float *hidden_delta_cuda; + float *input_prev_weights_cuda; + float sum; + float *input_weights_one_dim; + float *input_weights_prev_one_dim; + // ruihao + // num_blocks = in / 16; + // dim3 grid(1, num_blocks); + // dim3 threads(16, 16); + + int tile_size = in / num_blocks; + dim3 grid(1, num_blocks); + dim3 threads(16, 16); + // ruihao + + input_weights_one_dim = (float *)malloc((in + 1) * (hid + 1) * sizeof(float)); + input_weights_prev_one_dim = (float *)malloc((in + 1) * (hid + 1) * sizeof(float)); + // ruihao + // partial_sum = (float *) malloc(num_blocks * WIDTH * sizeof(float)); + partial_sum = (float *)malloc(in * sizeof(float)); + // ruihao + + // this preprocessing stage is added to correct the bugs of wrong memcopy using two-dimensional net->inputweights + for (int k = 0; k <= in; k++) + { + for (int j = 0; j <= hid; j++) + { + input_weights_one_dim[m] = net->input_weights[k][j]; + input_weights_prev_one_dim[m] = net->input_prev_weights[k][j]; + m++; + } + } + + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMalloc((void **)&input_cuda, (in + 1) * sizeof(float)); + cudaMalloc((void **)&output_hidden_cuda, (hid + 1) * sizeof(float)); + cudaMalloc((void **)&input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float)); + // ruihao + // cudaMalloc((void**) &hidden_partial_sum, num_blocks * WIDTH * sizeof(float)); + cudaMalloc((void **)&hidden_partial_sum, in * sizeof(float)); + // ruihao + +#endif + +#ifdef CPU + + printf("Performing CPU computation\n"); + bpnn_layerforward(net->input_units, net->hidden_units, net->input_weights, in, hid); + +#endif + +#ifdef GPU + + //printf("Performing GPU computation\n"); + + // printf("in= %d, hid = %d, numblocks = %d\n", in, hid, num_blocks); + + + cudaMemcpy(input_cuda, net->input_units, (in + 1) * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(input_hidden_cuda, input_weights_one_dim, (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyHostToDevice); + + // ruihao + //t_start = rtclock(); + // ruihao + bpnn_layerforward_CUDA<<>>(input_cuda, + output_hidden_cuda, + input_hidden_cuda, + hidden_partial_sum, + in, + hid, + tile_size); + + cudaDeviceSynchronize(); + + // ruihao + // cudaMemcpy(partial_sum, hidden_partial_sum, num_blocks * WIDTH * sizeof(float), cudaMemcpyDeviceToHost); + //t_end = rtclock(); + //fprintf(stdout, "bpnn_layerforward_CUDA GPU Runtime: %0.6lfs\n", t_end - t_start); + cudaMemcpy(partial_sum, hidden_partial_sum, in * sizeof(float), cudaMemcpyDeviceToHost); + // ruihao + + cudaError_t error = cudaGetLastError(); + if (error != cudaSuccess) + { + printf("bpnn kernel error: %s\n", cudaGetErrorString(error)); + exit(EXIT_FAILURE); + } + + for (int j = 1; j <= hid; j++) + { + sum = 0.0; + // ruihao + // for (int k = 0; k < num_blocks; k++) { + // sum += partial_sum[k * hid + j-1] ; + // } + for (int k = 0; k < in / WIDTH; k++) + { + sum += partial_sum[k * hid + j - 1]; + } + // ruihao + sum += net->input_weights[0][j]; + net->hidden_units[j] = float(1.0 / (1.0 + exp(-sum))); + } +#endif + + bpnn_layerforward(net->hidden_units, net->output_units, net->hidden_weights, hid, out); + bpnn_output_error(net->output_delta, net->target, net->output_units, out, &out_err); + bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, net->hidden_weights, net->hidden_units, &hid_err); + bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, net->hidden_weights, net->hidden_prev_weights); + +#ifdef CPU + + bpnn_adjust_weights(net->hidden_delta, hid, net->input_units, in, net->input_weights, net->input_prev_weights); + +#endif + +#ifdef GPU + + cudaMalloc((void **)&hidden_delta_cuda, (hid + 1) * sizeof(float)); + cudaMalloc((void **)&input_prev_weights_cuda, (in + 1) * (hid + 1) * sizeof(float)); + + cudaMemcpy(hidden_delta_cuda, net->hidden_delta, (hid + 1) * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(input_prev_weights_cuda, input_weights_prev_one_dim, (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(input_hidden_cuda, input_weights_one_dim, (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyHostToDevice); + // ruihao + //t_start = rtclock(); + // ruihao + bpnn_adjust_weights_cuda<<>>(hidden_delta_cuda, + hid, + input_cuda, + in, + input_hidden_cuda, + input_prev_weights_cuda, + tile_size); + // ruihao + cudaDeviceSynchronize(); + //t_end = rtclock(); + //fprintf(stdout, "bpnn_adjust_weights_cuda GPU Runtime: %0.6lfs\n", t_end - t_start); + // ruihao + cudaMemcpy(net->input_units, input_cuda, (in + 1) * sizeof(float), cudaMemcpyDeviceToHost); + cudaMemcpy(input_weights_one_dim, input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyDeviceToHost); + + + cudaFree(input_cuda); + cudaFree(output_hidden_cuda); + cudaFree(input_hidden_cuda); + cudaFree(hidden_partial_sum); + cudaFree(input_prev_weights_cuda); + cudaFree(hidden_delta_cuda); + + endCPU(); + finiTrace(); + + free(partial_sum); + free(input_weights_one_dim); + free(input_weights_prev_one_dim); + +#endif +} diff --git a/workloads/realworld/async/backprop/backprop_cuda_kernel.cu b/workloads/realworld/async/backprop/backprop_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..a8a29034fe018cc3d40480bc205e7618bcd5379e --- /dev/null +++ b/workloads/realworld/async/backprop/backprop_cuda_kernel.cu @@ -0,0 +1,182 @@ + + +#ifndef _BACKPROP_CUDA_KERNEL_H_ +#define _BACKPROP_CUDA_KERNEL_H_ + +#include +#include "backprop.h" +#include "math.h" +#include "cuda.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +__global__ void +bpnn_layerforward_CUDA(float *input_cuda, + float *output_hidden_cuda, + float *input_hidden_cuda, + float *hidden_partial_sum, + int in, + int hid, + int tile_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + + int by = blockIdx.y; + int tx = threadIdx.x; + int ty = threadIdx.y; + + __shared__ float input_node[HEIGHT * PREFETCH_COUNT]; + __shared__ float weight_matrix[HEIGHT * WIDTH * PREFETCH_COUNT]; + + int batches = tile_size / WIDTH; + + int fetch = batches * by; + int end_tile = fetch + batches; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + int fetch_index = (hid + 1) * HEIGHT * fetch + (hid + 1) * ty + tx + 1 + (hid + 1); + int index_in = HEIGHT * fetch + ty + 1; + + if (tx == 0) + memcpy_async(input_node[(fetch % PREFETCH_COUNT) * HEIGHT + ty], input_cuda[index_in], pipe); + + memcpy_async(weight_matrix[(fetch % PREFETCH_COUNT) * HEIGHT * WIDTH + ty * WIDTH + tx], input_hidden_cuda[fetch_index], pipe); + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + + + int compute_index = (hid + 1) * HEIGHT * compute + (hid + 1) * ty + tx + 1 + (hid + 1); + weight_matrix[(compute % PREFETCH_COUNT) * HEIGHT * WIDTH + ty * WIDTH + tx] *= input_node[(compute % PREFETCH_COUNT) * HEIGHT + ty]; + block.sync(); + + for (int i = 1; i <= __log2f(HEIGHT); i++) { + int power_two = __powf(2, i); + if (ty % power_two == 0) + weight_matrix[(compute % PREFETCH_COUNT) * HEIGHT * WIDTH + ty * WIDTH + tx] += weight_matrix[(compute % PREFETCH_COUNT) * HEIGHT * WIDTH + (ty + power_two / 2) * WIDTH + tx]; + block.sync(); + } + + input_hidden_cuda[compute_index] = weight_matrix[(compute % PREFETCH_COUNT) * HEIGHT * WIDTH + ty * WIDTH + tx]; + block.sync(); + + if (tx == 0) + { + hidden_partial_sum[compute * hid + ty] = weight_matrix[(compute % PREFETCH_COUNT) * HEIGHT * WIDTH + tx * WIDTH + ty]; + } + } +} + +// __global__ void +// bpnn_layerforward_CUDA(float *input_cuda, +// float *output_hidden_cuda, +// float *input_hidden_cuda, +// float *hidden_partial_sum, +// int in, +// int hid, +// int tile_size) +// { +// int by = blockIdx.y; +// int tx = threadIdx.x; +// int ty = threadIdx.y; + +// int batches = tile_size / WIDTH; + +// __shared__ float input_node[HEIGHT]; +// __shared__ float weight_matrix[HEIGHT * WIDTH]; + +// for (int b = 0; b < batches; b++) +// { +// int index = (hid + 1) * HEIGHT * (batches * by + b) + (hid + 1) * ty + tx + 1 + (hid + 1); + +// int index_in = HEIGHT * (batches * by + b) + ty + 1; + +// if (tx == 0) +// input_node[ty] = input_cuda[index_in]; + +// __syncthreads(); + +// weight_matrix[ty * WIDTH + tx] = input_hidden_cuda[index]; + +// __syncthreads(); + +// weight_matrix[ty * WIDTH + tx] = weight_matrix[ty * WIDTH + tx] * input_node[ty]; + +// __syncthreads(); + +// for (int i = 1; i <= __log2f(HEIGHT); i++) +// { + +// int power_two = __powf(2, i); + +// if (ty % power_two == 0) +// weight_matrix[ty * WIDTH + tx] += weight_matrix[(ty + power_two / 2) * WIDTH + tx]; + +// __syncthreads(); +// } + +// input_hidden_cuda[index] = weight_matrix[ty * WIDTH + tx]; + +// __syncthreads(); + +// if (tx == 0) +// { +// hidden_partial_sum[(batches * by + b) * hid + ty] = weight_matrix[tx * WIDTH + ty]; +// } +// } +// } + +__global__ void bpnn_adjust_weights_cuda(float * delta, + int hid, + float * ly, + int in, + float * w, + float * oldw, + int tile_size) +{ + int by = blockIdx.y; + + int tx = threadIdx.x; + int ty = threadIdx.y; + + int batches = tile_size / WIDTH; + + for (int b = 0; b < batches; b++) { + int index = (hid + 1) * HEIGHT * (batches * by + b) + (hid + 1) * ty + tx + 1 + (hid + 1); + int index_y = HEIGHT * (batches * by + b) + ty + 1; + int index_x = tx + 1; + // eta = 0.3; + // momentum = 0.3; + + w[index] += ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index])); + oldw[index] = ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index])); + + __syncthreads(); + + if (ty == 0 && by == 0 && b == 0) + { + w[index_x] += ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x])); + oldw[index_x] = ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x])); + } + } +} +#endif diff --git a/workloads/realworld/async/backprop/backprop_cuda_kernel.cu.bp b/workloads/realworld/async/backprop/backprop_cuda_kernel.cu.bp new file mode 100644 index 0000000000000000000000000000000000000000..dcce64f86286e850c3bf17d7ac150cd757a0d175 --- /dev/null +++ b/workloads/realworld/async/backprop/backprop_cuda_kernel.cu.bp @@ -0,0 +1,100 @@ + + +#ifndef _BACKPROP_CUDA_KERNEL_H_ +#define _BACKPROP_CUDA_KERNEL_H_ + +#include +#include "backprop.h" +#include "math.h" +#include "cuda.h" + + +__global__ void +bpnn_layerforward_CUDA(float *input_cuda, + float *output_hidden_cuda, + float *input_hidden_cuda, + float *hidden_partial_sum, + int in, + int hid) +{ + int by = blockIdx.y; + int tx = threadIdx.x; + int ty = threadIdx.y; + + int index = ( hid + 1 ) * HEIGHT * by + ( hid + 1 ) * ty + tx + 1 + ( hid + 1 ) ; + + int index_in = HEIGHT * by + ty + 1; + + __shared__ float input_node[HEIGHT]; + __shared__ float weight_matrix[HEIGHT][WIDTH]; + + + if ( tx == 0 ) + input_node[ty] = input_cuda[index_in] ; + + __syncthreads(); + + weight_matrix[ty][tx] = input_hidden_cuda[index]; + + __syncthreads(); + + weight_matrix[ty][tx] = weight_matrix[ty][tx] * input_node[ty]; + + __syncthreads(); + + for ( int i = 1 ; i <= __log2f(HEIGHT) ; i++){ + + int power_two = __powf(2, i); + + if( ty % power_two == 0 ) + weight_matrix[ty][tx] = weight_matrix[ty][tx] + weight_matrix[ty + power_two/2][tx]; + + __syncthreads(); + + } + + input_hidden_cuda[index] = weight_matrix[ty][tx]; + + __syncthreads(); + + if ( tx == 0 ) { + hidden_partial_sum[by * hid + ty] = weight_matrix[tx][ty]; + } + +} + + +__global__ void bpnn_adjust_weights_cuda(float * delta, + int hid, + float * ly, + int in, + float * w, + float * oldw) +{ + + + int by = blockIdx.y; + + int tx = threadIdx.x; + int ty = threadIdx.y; + + int index = ( hid + 1 ) * HEIGHT * by + ( hid + 1 ) * ty + tx + 1 + ( hid + 1 ) ; + int index_y = HEIGHT * by + ty + 1; + int index_x = tx + 1; + //eta = 0.3; + //momentum = 0.3; + + w[index] += ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index])); + oldw[index] = ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index])); + + + __syncthreads(); + + if (ty == 0 && by ==0){ + w[index_x] += ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x])); + oldw[index_x] = ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x])); + } + + +} +#endif diff --git a/workloads/realworld/async/backprop/facetrain.c b/workloads/realworld/async/backprop/facetrain.c new file mode 100644 index 0000000000000000000000000000000000000000..cbf83810934b68551d7dd4b7b94fda5eb6837276 --- /dev/null +++ b/workloads/realworld/async/backprop/facetrain.c @@ -0,0 +1,54 @@ + +#include +#include +#include +#include +#include "backprop.h" +#include "omp.h" + +extern char *strcpy(); +extern void exit(); + +int layer_size = 0; + +backprop_face() +{ + BPNN *net; + int i; + float out_err, hid_err; + net = bpnn_create(layer_size, 16, 1); // (16, 1 can not be changed) + + printf("Input layer size : %d\n", layer_size); + load(net); + // entering the training kernel, only one iteration + printf("Starting training kernel\n"); + bpnn_train_cuda(net, &out_err, &hid_err); + bpnn_free(net); + printf("Training done\n"); +} + +int setup(argc, argv) +int argc; +char *argv[]; +{ + + int seed; + + if (argc != 3) + { + fprintf(stderr, "usage: backprop \n"); + exit(0); + } + layer_size = atoi(argv[1]); + if (layer_size % 16 != 0) + { + fprintf(stderr, "The number of input points must be divided by 16\n"); + exit(0); + } + + seed = 7; + bpnn_initialize(seed); + backprop_face(); + + exit(0); +} diff --git a/workloads/realworld/async/backprop/imagenet.c b/workloads/realworld/async/backprop/imagenet.c new file mode 100644 index 0000000000000000000000000000000000000000..255b0d5d8ca67508f6732e266299e7c58012906f --- /dev/null +++ b/workloads/realworld/async/backprop/imagenet.c @@ -0,0 +1,24 @@ + +#include +#include +#include "backprop.h" + +extern layer_size; + +load(net) +BPNN *net; +{ + float *units; + int nr, nc, imgsize, i, j, k; + + nr = layer_size; + + imgsize = nr * nc; + units = net->input_units; + + k = 1; + for (i = 0; i < nr; i++) { + units[k] = (float) rand()/RAND_MAX ; + k++; + } +} diff --git a/workloads/realworld/async/backprop/run.sh b/workloads/realworld/async/backprop/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..874cbb88032622578f319cce3800a3793151cb92 --- /dev/null +++ b/workloads/realworld/async/backprop/run.sh @@ -0,0 +1,5 @@ +# ./backprop 524288 +# ./backprop 8388608 128 + +# ./backprop 66708864 128 +./backprop 66708864 1024 \ No newline at end of file diff --git a/workloads/realworld/async/backprop/run_super.sh b/workloads/realworld/async/backprop/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..d9e8a3d42354f597bbcd153d180d9b23bed71192 --- /dev/null +++ b/workloads/realworld/async/backprop/run_super.sh @@ -0,0 +1,3 @@ +# ./backprop 1073741824 1024 +# ./backprop 134217728 1024 +./backprop 67108864 1024 \ No newline at end of file diff --git a/workloads/realworld/async/darknet/LICENSE b/workloads/realworld/async/darknet/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..a50f7d700ba02bfacd50f59b315311cf4d0bbda2 --- /dev/null +++ b/workloads/realworld/async/darknet/LICENSE @@ -0,0 +1,12 @@ + YOLO LICENSE + Version 2, July 29 2016 + +THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER +SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN +TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES +LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY. NOW HERE'S +THE REAL LICENSE: + +0. Darknet is public domain. +1. Do whatever you want with it. +2. Stop emailing me about it! diff --git a/workloads/realworld/async/darknet/LICENSE.fuck b/workloads/realworld/async/darknet/LICENSE.fuck new file mode 100644 index 0000000000000000000000000000000000000000..8b1a9d8189b3b9f4479221d52882ce36fdc73a62 --- /dev/null +++ b/workloads/realworld/async/darknet/LICENSE.fuck @@ -0,0 +1,13 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + +Copyright (C) 2004 Sam Hocevar + +Everyone is permitted to copy and distribute verbatim or modified +copies of this license document, and changing it is allowed as long +as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. diff --git a/workloads/realworld/async/darknet/LICENSE.gen b/workloads/realworld/async/darknet/LICENSE.gen new file mode 100644 index 0000000000000000000000000000000000000000..c54113271e15057c4def6676693eb96fd6362b28 --- /dev/null +++ b/workloads/realworld/async/darknet/LICENSE.gen @@ -0,0 +1,91 @@ +RNN LICENSE Version 3, June 21 2017 + +Copyright (c) 1990, 1989, 1999 Free87337 May 48 THIRD PARTIES OR ANY OTHER THE +COMPLAIN OR CONSEQUENTIAL DAMAGES AND REGARDLESS OF WHETHER IN CONTRACT, TO THE +EXTENT REPAIR OR AGENTS (NOT THE IN ANY EVENT). THE SOFTWARE WILL BE +UNINTERRUPTED OR ERROR-FREE OR ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF ALL THE WORK (GOVERNED CODE) HIM RESPONSES, OR OF FINES, +SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR ANY OTHER OR OTHER HARL UNDER NO +CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), +PATENT PERMITTED BY THE INSTAGRAM PARENT STATE OR TORT (INCLUDING NEGLIGENCE), +PRODUCT LIABILITY OR OTHERWISE, ARISING OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR ANYTHING PROVIDED IN THIS PRODUCT, COMMIS AND SERVICES +ARE LICENSED SOFTWARE AND ANY RESULE OR ANY OTHER THE COPYRIGHT HOLDERS BE +LIABLE FOR ANY SPECIAL, INCIDENTAL, CASE, SUCH WARRANTIES, EXPRESS OR IMPLIED, +INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COPYRIGHT HOLDERS AND/OR ANY +PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY +EXPRESS OR DISTRIBUTE THAT ALL CLAIMS ARE SHALL CREATE DERAVE BE LIABLE TO YOU +WILL HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +6\. TERMINATION. TO THE EXTENT PERMITTED BY LAW, NO USE OF THE COVERED CODE IS +WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE +INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY +SERVICING, REPAIR OR COULT OR IN ANY WAY OUT OF THE USE OF THE WEBSITES OR +SERVICE WILL BE CONSEQUENTIAL DAMAGES OF ANY KIND HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + +This paragraph Agreement constitutes the entire agreement between the parties +with respect to the Work licensed here. However, if you place the name of the +fact that the arbitration was the consultation of the parties as a "patent is". +Subject to the terms and conditions of this License, Contributor has knowledge +that a license under a third party may also be used to endorse or promote +products derived from the Work, and there is no warranty on the Software and +Science Fees. For the purposes of this Agreement, attach the following +disclaimers (without liabilities of written notice to the Subject Software) in a +manner that a product is under common control with you. The Free Software +Foundation may publish revised and/or new versions of the License for the +Modifications made by the applicable terms. The Recipient shall promptly retain +the covered works for any reason be entered in any federal or state or login +Restricted Laws appearing in the United States or any of its own information +that is not disabled from a derivative work except as expressly permitted in +this License, to the extent that they are in receiving the Software and Source +Code or any exercise of the rights granted to You by this License or a +Contributor made by the Licensor or are authorized to make a reasonable +retirement by the courts of the courts located in Santa Clara County, California +printed and related to the Work or “Company” and Apache Software Foundation. If +the Licensor shall be entitled to reflect your rights to use the Software and +the Software to exercise the rights granted to the recipient without a +requirement to exercise the rights granted by the Agreement to the provision +will begin will appear in such cases, you will use such information without such +corporation shall be an officer with respect to any part of the Software or any +portion thereof. Capitalized terms are included in the Initial Contributor and +under no circumstances will license the Service at any time and for any direct, +indirect, special, incidental, or consequential damages of or assist in +connection with any Services or the registration purposes only to the extent +that it includes any or all means including the processing of which you download +any derivative work. Any of the purchases’ transmission purposes are made +available, if any, in other circumstances, we may review the copyright notice. +In the event that this Agreement is required to give us strict content. The +inclusion of the other party hereunder may also notify you Intellectual Property +Rights to any third party. This means that the Source Code exists of the Work +will not charge a program available to you at any time. You must include a +prominent statement that the Software is governed under a particular version of +this Agreement. You must include a provision to the extent that there is no +warranty for the content of others. You agree that the Recipient was appointed +as a Contributor, (c) are effective until terminated by hereunder, then the +registration are not disabled and not limited to, submit any Customer Data +without the updated use of the Software and that no fee is released. You grant +to Use Other Arbitration Rules for Diagnostic or Services may use or modify the +Apple Software and Consolidated Apple Software or Services. The Company may have +full risk as a product of the Compatible Source. A Contribution by the Licensor +or by the updated Software under the following conditions we can redistribute +any General Provision of this Agreement. If the Program is used in accordance +with the terms of this Agreement, Customer may provide advertisements from your +devices that clause you can your employer or a transaction or country that has +been controlled by the arbitrator, that they will be useful of this Agreement. +The term "Open Source Software is available in connection with the program, and +you may not protect the combination of the Covered Code. You should like to +select a user's rights to charge a copy of this License. I are Contributor's +confidentiality of the exercise of the rights granted herein. Such a covered +work is released as a consequence, the Licensor shall be eligible for a purpose +or subcontractor of the person or entity to the user of the user, then the word +"Application" means having the original fee for any reason; and that no patent +license to more than fifty stated close of the license term. The terms of this +License will the license terms and conditions set forth in Section 2.2 (OPEC) +and You will not use the Software or any set of responsibility for any resulting +information that the Original Code warrants that you have the right to disclose +these information (or in the notification; or (iii) late use of the software or +any third party to the three (50) days before such belief to the extent that it +includes a court court obtains the rights granted by this License. diff --git a/workloads/realworld/async/darknet/LICENSE.gpl b/workloads/realworld/async/darknet/LICENSE.gpl new file mode 100644 index 0000000000000000000000000000000000000000..9cecc1d4669ee8af2ca727a5d8cde10cd8b2d7cc --- /dev/null +++ b/workloads/realworld/async/darknet/LICENSE.gpl @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + {one line to give the program's name and a brief idea of what it does.} + Copyright (C) {year} {name of author} + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + {project} Copyright (C) {year} {fullname} + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/workloads/realworld/async/darknet/LICENSE.meta b/workloads/realworld/async/darknet/LICENSE.meta new file mode 100644 index 0000000000000000000000000000000000000000..6728bd28d319c68ae04944fb034118dcc4c9aa09 --- /dev/null +++ b/workloads/realworld/async/darknet/LICENSE.meta @@ -0,0 +1,8 @@ + META-LICENSE + Version 1, June 21 2017 + +Any and all licenses may be applied to the software either individually +or in concert. Any issues, ambiguities, paradoxes, or metaphysical quandries +arising from this combination should be discussed with a local faith leader, +hermit, or guru. The Oxford comma shall be used. + diff --git a/workloads/realworld/async/darknet/LICENSE.mit b/workloads/realworld/async/darknet/LICENSE.mit new file mode 100644 index 0000000000000000000000000000000000000000..5bd806ce16ea5053c8631793787362439375026e --- /dev/null +++ b/workloads/realworld/async/darknet/LICENSE.mit @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2017 Joseph Redmon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/workloads/realworld/async/darknet/LICENSE.v1 b/workloads/realworld/async/darknet/LICENSE.v1 new file mode 100644 index 0000000000000000000000000000000000000000..5b8709acc43e7b76ed69758a52a9eaffaba775e6 --- /dev/null +++ b/workloads/realworld/async/darknet/LICENSE.v1 @@ -0,0 +1,13 @@ + YOLO LICENSE + Version 1, July 10 2015 + +THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER +SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN +TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES +LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY SUBJECT TO +THE FOLLOWING CONDITIONS: + +1. #yolo +2. #swag +3. #blazeit + diff --git a/workloads/realworld/async/darknet/Makefile b/workloads/realworld/async/darknet/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..5022f68377d7626ab768f2501883d72b013dafba --- /dev/null +++ b/workloads/realworld/async/darknet/Makefile @@ -0,0 +1,114 @@ +GPU=1 +CUDNN=0 +OPENCV=0 +OPENMP=0 +DEBUG=0 + +#ARCH= -gencode arch=compute_30,code=sm_30 \ +# -gencode arch=compute_35,code=sm_35 \ +# -gencode arch=compute_50,code=[sm_50,compute_50] \ +# -gencode arch=compute_52,code=[sm_52,compute_52] +# -gencode arch=compute_20,code=[sm_20,sm_21] \ This one is deprecated? + +# This is what I use, uncomment if you know your arch and want to specify +ARCH= -gencode arch=compute_80,code=sm_80 \ +#ARCH= -arch=sm_80 + +VPATH=./src/:./examples:$(CUPTI_ADD_COMMON) +SLIB=libdarknet.so +ALIB=libdarknet.a +EXEC=darknet +OBJDIR=./obj/ + +CC=gcc +CPP=g++ +# NVCC=nvcc --default-stream per-thread +NVCC=nvcc +AR=ar +ARFLAGS=rcs +OPTS=-Ofast +LDFLAGS= -lm -pthread +COMMON= -Iinclude/ -Isrc/ +CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC +ifeq ($(PROFILE), 1) +CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -DPROFILE +endif + + +ifeq ($(OPENMP), 1) +CFLAGS+= -fopenmp +endif + +ifeq ($(DEBUG), 1) +OPTS=-O0 -g +endif + +CFLAGS+=$(OPTS) + +ifeq ($(OPENCV), 1) +COMMON+= -DOPENCV +CFLAGS+= -DOPENCV +LDFLAGS+= `pkg-config --libs opencv` -lstdc++ +COMMON+= `pkg-config --cflags opencv` +endif + +ifeq ($(GPU), 1) +include ../../../common/make.config +COMMON+= -DGPU -I$(CUDA_DIR)/include/ -I$(CUDA_DIR)/extras/CUPTI/include/ +CFLAGS+= -DGPU +LDFLAGS+= -L$(CUDA_DIR)/lib64 -L$(CUDA_DIR)/extras/CUPTI/lib64/ -lcuda -lcudart -lcublas -lcurand -lcupti +endif + + +ifeq ($(CUDNN), 1) +COMMON+= -DCUDNN +CFLAGS+= -DCUDNN +LDFLAGS+= -lcudnn +endif + +OBJ=gemm.o utils.o cuda_dark.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o lstm_layer.o l2norm_layer.o yolo_layer.o iseg_layer.o image_opencv.o +EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o instance-segmenter.o darknet.o +ifeq ($(GPU), 1) +LDFLAGS+= -lstdc++ +OBJ+=gemm_kernel.o convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o +endif + +# cpu_timestamps.o +# cupti_add.o + +EXECOBJ = $(addprefix $(OBJDIR), $(EXECOBJA)) +OBJS = $(addprefix $(OBJDIR), $(OBJ)) +DEPS = $(wildcard src/*.h) Makefile include/darknet.h + +all: obj backup results $(SLIB) $(ALIB) $(EXEC) + +$(EXEC): $(EXECOBJ) $(ALIB) + $(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS) $(ALIB) + +$(ALIB): $(OBJS) + $(AR) $(ARFLAGS) $@ $^ + +$(SLIB): $(OBJS) + $(CC) $(CFLAGS) -shared $^ -o $@ $(LDFLAGS) + +$(OBJDIR)%.o: %.cpp $(DEPS) + $(CPP) $(COMMON) $(CFLAGS) -c $< -o $@ + +$(OBJDIR)%.o: %.c $(DEPS) + $(CC) $(COMMON) $(CFLAGS) -c $< -o $@ + +$(OBJDIR)%.o: %.cu $(DEPS) + $(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@ + +obj: + mkdir -p obj +backup: + mkdir -p backup +results: + mkdir -p results + +.PHONY: clean + +clean: + rm -rf $(OBJS) $(SLIB) $(ALIB) $(EXEC) $(EXECOBJ) $(OBJDIR)/* + diff --git a/workloads/realworld/async/darknet/README.md b/workloads/realworld/async/darknet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fb58c2640038a963cd573d121e4fab59399f67dc --- /dev/null +++ b/workloads/realworld/async/darknet/README.md @@ -0,0 +1,124 @@ +![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png) + +# Darknet # +Darknet is an open source neural network framework written in C and CUDA. It is fast, easy to install, and supports CPU and GPU computation. + +**Discord** invite link for for communication and questions: https://discord.gg/zSq8rtW + +## YOLOv7: + +* **paper** - YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors: https://arxiv.org/abs/2207.02696 + +* **source code - Pytorch (use to reproduce results):** https://github.com/WongKinYiu/yolov7 + +---- + +Official YOLOv7 is more accurate and faster than YOLOv5 by **120%** FPS, than YOLOX by **180%** FPS, than Dual-Swin-T by **1200%** FPS, than ConvNext by **550%** FPS, than SWIN-L by **500%** FPS. + +YOLOv7 surpasses all known object detectors in both speed and accuracy in the range from 5 FPS to 160 FPS and has the highest accuracy 56.8% AP among all known real-time object detectors with 30 FPS or higher on GPU V100, batch=1. + +* YOLOv7-e6 (55.9% AP, 56 FPS V100 b=1) by `+500%` FPS faster than SWIN-L Cascade-Mask R-CNN (53.9% AP, 9.2 FPS A100 b=1) +* YOLOv7-e6 (55.9% AP, 56 FPS V100 b=1) by `+550%` FPS faster than ConvNeXt-XL C-M-RCNN (55.2% AP, 8.6 FPS A100 b=1) +* YOLOv7-w6 (54.6% AP, 84 FPS V100 b=1) by `+120%` FPS faster than YOLOv5-X6-r6.1 (55.0% AP, 38 FPS V100 b=1) +* YOLOv7-w6 (54.6% AP, 84 FPS V100 b=1) by `+1200%` FPS faster than Dual-Swin-T C-M-RCNN (53.6% AP, 6.5 FPS V100 b=1) +* YOLOv7x (52.9% AP, 114 FPS V100 b=1) by `+150%` FPS faster than PPYOLOE-X (51.9% AP, 45 FPS V100 b=1) +* YOLOv7 (51.2% AP, 161 FPS V100 b=1) by `+180%` FPS faster than YOLOX-X (51.1% AP, 58 FPS V100 b=1) + +---- + +![more5](https://user-images.githubusercontent.com/4096485/179425274-f55a36d4-8450-4471-816b-8c105841effd.jpg) + +---- + +![image](https://user-images.githubusercontent.com/4096485/177675030-a929ee00-0eba-4d93-95c2-225231d0fd61.png) + + +---- + +![yolov7_640_1280](https://user-images.githubusercontent.com/4096485/177688869-d75e0c36-63af-46ec-bdbd-81dbb281f257.png) + +---- + +## Scaled-YOLOv4: + +* **paper (CVPR 2021)**: https://openaccess.thecvf.com/content/CVPR2021/html/Wang_Scaled-YOLOv4_Scaling_Cross_Stage_Partial_Network_CVPR_2021_paper.html + +* **source code - Pytorch (use to reproduce results):** https://github.com/WongKinYiu/ScaledYOLOv4 + +* **source code - Darknet:** https://github.com/AlexeyAB/darknet + +* **Medium:** https://alexeyab84.medium.com/scaled-yolo-v4-is-the-best-neural-network-for-object-detection-on-ms-coco-dataset-39dfa22fa982?source=friends_link&sk=c8553bfed861b1a7932f739d26f487c8 + +## YOLOv4: + +* **paper:** https://arxiv.org/abs/2004.10934 + +* **source code:** https://github.com/AlexeyAB/darknet + +* **Wiki:** https://github.com/AlexeyAB/darknet/wiki + +* **useful links:** https://medium.com/@alexeyab84/yolov4-the-most-accurate-real-time-neural-network-on-ms-coco-dataset-73adfd3602fe?source=friends_link&sk=6039748846bbcf1d960c3061542591d7 + +For more information see the [Darknet project website](http://pjreddie.com/darknet). + + +
Expand + +![yolo_progress](https://user-images.githubusercontent.com/4096485/146988929-1ed0cbec-1e01-4ad0-b42c-808dcef32994.png) https://paperswithcode.com/sota/object-detection-on-coco + +---- + +![scaled_yolov4](https://user-images.githubusercontent.com/4096485/112776361-281d8380-9048-11eb-8083-8728b12dcd55.png) AP50:95 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2011.08036 + +---- + +![YOLOv4Tiny](https://user-images.githubusercontent.com/4096485/101363015-e5c21200-38b1-11eb-986f-b3e516e05977.png) + +---- + +![YOLOv4](https://user-images.githubusercontent.com/4096485/90338826-06114c80-dff5-11ea-9ba2-8eb63a7409b3.png) + +
+ +---- + +![OpenCV_TRT](https://user-images.githubusercontent.com/4096485/90338805-e5e18d80-dff4-11ea-8a68-5710956256ff.png) + + +## Citation + + +``` +@misc{https://doi.org/10.48550/arxiv.2207.02696, + doi = {10.48550/ARXIV.2207.02696}, + url = {https://arxiv.org/abs/2207.02696}, + author = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors}, + publisher = {arXiv}, + year = {2022}, + copyright = {arXiv.org perpetual, non-exclusive license} +} +``` + +``` +@misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +``` +@InProceedings{Wang_2021_CVPR, + author = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + title = {{Scaled-YOLOv4}: Scaling Cross Stage Partial Network}, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2021}, + pages = {13029-13038} +} +``` diff --git a/workloads/realworld/async/darknet/cfg/alexnet.cfg b/workloads/realworld/async/darknet/cfg/alexnet.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e2ed4bb8e7b1bad7859aef0d802cb4084753cb7a --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/alexnet.cfg @@ -0,0 +1,96 @@ +[net] +# Training +# batch=128 +# subdivisions=1 +# Testing +batch=1 +subdivisions=1 +height=227 +width=227 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=256 + +learning_rate=0.01 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue = .1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +filters=96 +size=11 +stride=4 +pad=0 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[convolutional] +filters=256 +size=5 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[convolutional] +filters=384 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=384 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=1000 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/async/darknet/cfg/cifar.cfg b/workloads/realworld/async/darknet/cfg/cifar.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b2f69f53903e55c24718ed12d9adaaa1557e3647 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/cifar.cfg @@ -0,0 +1,121 @@ +[net] +batch=128 +subdivisions=1 +height=28 +width=28 +channels=3 +max_crop=32 +min_crop=32 + +hue=.1 +saturation=.75 +exposure=.75 + +learning_rate=0.4 +policy=poly +power=4 +max_batches = 5000 +momentum=0.9 +decay=0.0005 + + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[dropout] +probability=.5 + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 diff --git a/workloads/realworld/async/darknet/cfg/cifar.data b/workloads/realworld/async/darknet/cfg/cifar.data new file mode 100644 index 0000000000000000000000000000000000000000..a52208db1b203b5e1f24d5afaf8c7002adfd71a3 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/cifar.data @@ -0,0 +1,7 @@ +classes=10 +train = data/cifar/train.list +valid = data/cifar/test.list +test = data/cifar/test.list +labels = data/cifar/labels.txt +backup = backup/ +top=2 diff --git a/workloads/realworld/async/darknet/cfg/cifar.test.cfg b/workloads/realworld/async/darknet/cfg/cifar.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..18b6c54c909152b1201d6320b85fafc5c36ba1ef --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/cifar.test.cfg @@ -0,0 +1,117 @@ +[net] +batch=128 +subdivisions=1 +height=32 +width=32 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.4 +policy=poly +power=4 +max_batches = 50000 + + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[dropout] +probability=.5 + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 +temperature=3 + diff --git a/workloads/realworld/async/darknet/cfg/cifar_small.cfg b/workloads/realworld/async/darknet/cfg/cifar_small.cfg new file mode 100644 index 0000000000000000000000000000000000000000..d48b1231f0131faaa187b18df6705411c3d16a76 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/cifar_small.cfg @@ -0,0 +1,86 @@ +[net] +batch=128 +subdivisions=1 +height=28 +width=28 +channels=3 +max_crop=32 +min_crop=32 + +hue=.1 +saturation=.75 +exposure=.75 + +learning_rate=0.1 +policy=poly +power=4 +max_batches = 5000 +momentum=0.9 +decay=0.0005 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] diff --git a/workloads/realworld/async/darknet/cfg/coco.data b/workloads/realworld/async/darknet/cfg/coco.data new file mode 100644 index 0000000000000000000000000000000000000000..5951d5245a7895e8418bc3155de3b03049e76c42 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/coco.data @@ -0,0 +1,6 @@ +classes= 80 +train = /data/darknet/coco/valid.list +valid = /data/darknet/coco/valid.list +backup = /data/darknet/backup/ +names = /data/darknet/coco/coco.names +eval=coco diff --git a/workloads/realworld/async/darknet/cfg/coco.names b/workloads/realworld/async/darknet/cfg/coco.names new file mode 100644 index 0000000000000000000000000000000000000000..16315f2becec9705017bfaf1b9fb81ca2a83c0b0 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/coco.names @@ -0,0 +1,80 @@ +person +bicycle +car +motorbike +aeroplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +sofa +pottedplant +bed +diningtable +toilet +tvmonitor +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush \ No newline at end of file diff --git a/workloads/realworld/async/darknet/cfg/combine9k.data b/workloads/realworld/async/darknet/cfg/combine9k.data new file mode 100644 index 0000000000000000000000000000000000000000..06a3e76aefac9c1074c3dfe159bc115a92b0791e --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/combine9k.data @@ -0,0 +1,10 @@ +classes= 9418 +#train = /home/pjreddie/data/coco/trainvalno5k.txt +train = data/combine9k.train.list +valid = /home/pjreddie/data/imagenet/det.val.files +labels = data/9k.labels +names = data/9k.names +backup = backup/ +map = data/inet9k.map +eval = imagenet +results = results diff --git a/workloads/realworld/async/darknet/cfg/darknet.cfg b/workloads/realworld/async/darknet/cfg/darknet.cfg new file mode 100644 index 0000000000000000000000000000000000000000..375107f7c196baf7adf229a7cfffc84739875828 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/darknet.cfg @@ -0,0 +1,120 @@ +[net] +# Training +# batch=128 +# subdivisions=1 +# Testing +batch=1 +subdivisions=1 +height=256 +width=256 +min_crop=128 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/async/darknet/cfg/darknet19.cfg b/workloads/realworld/async/darknet/cfg/darknet19.cfg new file mode 100644 index 0000000000000000000000000000000000000000..28ac9669ef686b4d638a5bf462451962fec45a4e --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/darknet19.cfg @@ -0,0 +1,205 @@ +[net] +# Training +#batch=128 +#subdivisions=2 + +# Testing + batch=1 + subdivisions=1 + +height=256 +width=256 +min_crop=128 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/async/darknet/cfg/darknet19_448.cfg b/workloads/realworld/async/darknet/cfg/darknet19_448.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c6df7306d3ef0622e0a0e0cbd6a5603699344e56 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/darknet19_448.cfg @@ -0,0 +1,197 @@ +[net] +batch=128 +subdivisions=4 +height=448 +width=448 +max_crop=512 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 + +angle=7 +hue = .1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/async/darknet/cfg/darknet53.cfg b/workloads/realworld/async/darknet/cfg/darknet53.cfg new file mode 100644 index 0000000000000000000000000000000000000000..7b6d5766e9ec48ee19a74321583b44621c1e07b3 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/darknet53.cfg @@ -0,0 +1,566 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/async/darknet/cfg/darknet53_448.cfg b/workloads/realworld/async/darknet/cfg/darknet53_448.cfg new file mode 100644 index 0000000000000000000000000000000000000000..dedab1b97c7e5d4226f061e6c983046d7a278dd0 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/darknet53_448.cfg @@ -0,0 +1,559 @@ +[net] +# Training - start training with darknet53.weights +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=448 +width=448 +channels=3 +min_crop=448 +max_crop=512 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 +momentum=0.9 +decay=0.0005 + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/async/darknet/cfg/darknet9000.cfg b/workloads/realworld/async/darknet/cfg/darknet9000.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9dd2dfbbf5a7137faada4e091b8e6d48233f09bf --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/darknet9000.cfg @@ -0,0 +1,205 @@ +[net] +# Training +# batch=128 +# subdivisions=4 +# Testing +batch = 1 +subdivisions = 1 +height=448 +width=448 +max_crop=512 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=9418 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 +tree=data/9k.tree + +[cost] +type=masked + diff --git a/workloads/realworld/async/darknet/cfg/densenet201.cfg b/workloads/realworld/async/darknet/cfg/densenet201.cfg new file mode 100644 index 0000000000000000000000000000000000000000..65b4aecc52d45075f2913e3d63b9aec0527fa44c --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/densenet201.cfg @@ -0,0 +1,1951 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/async/darknet/cfg/extraction.cfg b/workloads/realworld/async/darknet/cfg/extraction.cfg new file mode 100644 index 0000000000000000000000000000000000000000..66cb15f80e9a5e811223299594882a3b5d9485dc --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/extraction.cfg @@ -0,0 +1,209 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=224 +width=224 +max_crop=320 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/async/darknet/cfg/extraction.conv.cfg b/workloads/realworld/async/darknet/cfg/extraction.conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..2a7d09ec80fa2f47e1ebb4134b7845d5cae2b828 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/extraction.conv.cfg @@ -0,0 +1,179 @@ +[net] +batch=1 +subdivisions=1 +height=256 +width=256 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.5 +policy=poly +power=6 +max_batches=500000 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[connected] +output=1000 +activation=leaky + +[softmax] +groups=1 + diff --git a/workloads/realworld/async/darknet/cfg/extraction22k.cfg b/workloads/realworld/async/darknet/cfg/extraction22k.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b5f54090d00537fdca72f54bb2eed69dd78f5b00 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/extraction22k.cfg @@ -0,0 +1,206 @@ +[net] +batch=128 +subdivisions=1 +height=224 +width=224 +max_crop=320 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +max_batches = 0 +policy=steps +steps=444000,590000,970000 +scales=.5,.2,.1 + +#policy=sigmoid +#gamma=.00008 +#step=100000 +#max_batches=200000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[connected] +output=21842 +activation=leaky + +[softmax] +groups=1 + diff --git a/workloads/realworld/async/darknet/cfg/go.cfg b/workloads/realworld/async/darknet/cfg/go.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c730092ff3ffda0124baeace050bd382c442d88d --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/go.cfg @@ -0,0 +1,132 @@ +[net] +batch=512 +subdivisions=1 +height=19 +width=19 +channels=1 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=10000000 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=1 +size=1 +stride=1 +pad=1 +activation=linear + +[reorg] +extra=1 +stride=1 + +[softmax] + diff --git a/workloads/realworld/async/darknet/cfg/go.test.cfg b/workloads/realworld/async/darknet/cfg/go.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..1e4e43809bf3ede20a67b5020fcca0f61612e8f7 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/go.test.cfg @@ -0,0 +1,132 @@ +[net] +batch=1 +subdivisions=1 +height=19 +width=19 +channels=1 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +policy=poly +power=4 +max_batches=100000 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=1 +size=1 +stride=1 +pad=1 +activation=linear + +[reorg] +extra=1 +stride=1 + +[softmax] + + diff --git a/workloads/realworld/async/darknet/cfg/gru.cfg b/workloads/realworld/async/darknet/cfg/gru.cfg new file mode 100644 index 0000000000000000000000000000000000000000..6064221289d41dc3ee464a715ae05593a02f34f4 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/gru.cfg @@ -0,0 +1,29 @@ +[net] +inputs=256 +momentum=0.9 +decay=0.0 +subdivisions=1 +batch = 1 +time_steps=1 +learning_rate=.002 +adam=1 + +policy=constant +power=4 +max_batches=1000000 + +[gru] +output = 256 + +[gru] +output = 256 + +[gru] +output = 256 + +[connected] +output=256 +activation=linear + +[softmax] + diff --git a/workloads/realworld/async/darknet/cfg/imagenet.labels.list b/workloads/realworld/async/darknet/cfg/imagenet.labels.list new file mode 100644 index 0000000000000000000000000000000000000000..e73d41762d311df7f7eefec0f65ab12a7bacc023 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/imagenet.labels.list @@ -0,0 +1,21842 @@ +n02119789 +n02100735 +n02110185 +n02096294 +n02102040 +n02066245 +n02509815 +n02124075 +n02417914 +n02123394 +n02125311 +n02423022 +n02346627 +n02077923 +n02110063 +n02447366 +n02109047 +n02089867 +n02102177 +n02091134 +n02092002 +n02071294 +n02442845 +n02504458 +n02092339 +n02098105 +n02096437 +n02114712 +n02105641 +n02128925 +n02091635 +n02088466 +n02096051 +n02117135 +n02138441 +n02097130 +n02493509 +n02457408 +n02389026 +n02443484 +n02110341 +n02089078 +n02086910 +n02445715 +n02093256 +n02113978 +n02106382 +n02441942 +n02113712 +n02113186 +n02105162 +n02415577 +n02356798 +n02488702 +n02123159 +n02098413 +n02422699 +n02114855 +n02094433 +n02111277 +n02132136 +n02119022 +n02091467 +n02106550 +n02422106 +n02091831 +n02120505 +n02104365 +n02086079 +n02112706 +n02098286 +n02095889 +n02484975 +n02137549 +n02500267 +n02129604 +n02090721 +n02396427 +n02108000 +n02391049 +n02412080 +n02108915 +n02480495 +n02110806 +n02128385 +n02107683 +n02085936 +n02094114 +n02087046 +n02100583 +n02096177 +n02494079 +n02105056 +n02101556 +n02123597 +n02481823 +n02105505 +n02088094 +n02085782 +n02489166 +n02364673 +n02114548 +n02134084 +n02480855 +n02090622 +n02113624 +n02093859 +n02403003 +n02097298 +n02108551 +n02493793 +n02107142 +n02096585 +n02107574 +n02107908 +n02086240 +n02102973 +n02112018 +n02093647 +n02397096 +n02437312 +n02483708 +n02097047 +n02106030 +n02099601 +n02093991 +n02110627 +n02106166 +n02326432 +n02108089 +n02097658 +n02088364 +n02111129 +n02100236 +n02486261 +n02115913 +n02486410 +n02487347 +n02099849 +n02108422 +n02104029 +n02492035 +n02110958 +n02099429 +n02094258 +n02099267 +n02395406 +n02112350 +n02109961 +n02101388 +n02113799 +n02095570 +n02128757 +n02101006 +n02115641 +n02097209 +n02342885 +n02097474 +n02120079 +n02095314 +n02088238 +n02408429 +n02133161 +n02328150 +n02410509 +n02492660 +n02398521 +n02112137 +n02510455 +n02093428 +n02105855 +n02111500 +n02085620 +n02123045 +n02490219 +n02099712 +n02109525 +n02454379 +n02111889 +n02088632 +n02090379 +n02443114 +n02361337 +n02105412 +n02483362 +n02437616 +n02107312 +n02325366 +n02091032 +n02129165 +n02102318 +n02100877 +n02074367 +n02504013 +n02363005 +n02102480 +n02113023 +n02086646 +n02497673 +n02087394 +n02127052 +n02116738 +n02488291 +n02091244 +n02114367 +n02130308 +n02089973 +n02105251 +n02134418 +n02093754 +n02106662 +n02444819 +n01882714 +n01871265 +n01872401 +n01877812 +n01873310 +n01883070 +n04086273 +n04507155 +n04147183 +n04254680 +n02672831 +n02219486 +n02317335 +n01968897 +n03452741 +n03642806 +n07745940 +n02690373 +n04552348 +n02692877 +n02782093 +n04266014 +n03344393 +n03447447 +n04273569 +n03662601 +n02951358 +n04612504 +n02981792 +n04483307 +n03095699 +n03673027 +n03947888 +n02687172 +n04347754 +n04606251 +n03478589 +n04389033 +n03773504 +n02860847 +n03218198 +n02835271 +n03792782 +n03393912 +n03895866 +n02797295 +n04204347 +n03791053 +n03384352 +n03272562 +n04310018 +n02704792 +n02701002 +n02814533 +n02930766 +n03100240 +n03594945 +n03670208 +n03770679 +n03777568 +n04037443 +n04285008 +n03444034 +n03445924 +n03785016 +n04252225 +n03345487 +n03417042 +n03930630 +n04461696 +n04467665 +n03796401 +n03977966 +n04065272 +n04335435 +n04252077 +n04465501 +n03776460 +n04482393 +n04509417 +n03538406 +n03599486 +n03868242 +n02804414 +n03125729 +n03131574 +n03388549 +n02870880 +n03018349 +n03742115 +n03016953 +n04380533 +n03337140 +n03891251 +n02791124 +n04429376 +n03376595 +n04099969 +n04344873 +n04447861 +n03179701 +n03982430 +n03201208 +n03290653 +n04550184 +n07742313 +n07747607 +n07749582 +n07753113 +n07753275 +n07753592 +n07754684 +n07760859 +n07768694 +n12267677 +n12620546 +n13133613 +n11879895 +n12144580 +n12768682 +n03854065 +n04515003 +n03017168 +n03249569 +n03447721 +n03720891 +n03721384 +n04311174 +n02787622 +n02992211 +n04536866 +n03495258 +n02676566 +n03272010 +n03110669 +n03394916 +n04487394 +n03494278 +n03840681 +n03884397 +n02804610 +n03838899 +n04141076 +n03372029 +n11939491 +n12057211 +n09246464 +n09468604 +n09193705 +n09472597 +n09399592 +n09421951 +n09256479 +n09332890 +n09428293 +n09288635 +n03498962 +n03041632 +n03658185 +n03954731 +n03995372 +n03649909 +n03481172 +n03109150 +n02951585 +n03970156 +n04154565 +n04208210 +n03967562 +n03000684 +n01514668 +n01514859 +n01518878 +n01530575 +n01531178 +n01532829 +n01534433 +n01537544 +n01558993 +n01560419 +n01580077 +n01582220 +n01592084 +n01601694 +n01608432 +n01614925 +n01616318 +n01622779 +n01795545 +n01796340 +n01797886 +n01798484 +n01806143 +n01806567 +n01807496 +n01817953 +n01818515 +n01819313 +n01820546 +n01824575 +n01828970 +n01829413 +n01833805 +n01843065 +n01843383 +n01847000 +n01855032 +n01855672 +n01860187 +n02002556 +n02002724 +n02006656 +n02007558 +n02009912 +n02009229 +n02011460 +n02012849 +n02013706 +n02018207 +n02018795 +n02025239 +n02027492 +n02028035 +n02033041 +n02037110 +n02017213 +n02051845 +n02056570 +n02058221 +n01484850 +n01491361 +n01494475 +n01496331 +n01498041 +n02514041 +n02536864 +n01440764 +n01443537 +n02526121 +n02606052 +n02607072 +n02643566 +n02655020 +n02640242 +n02641379 +n01664065 +n01665541 +n01667114 +n01667778 +n01669191 +n01675722 +n01677366 +n01682714 +n01685808 +n01687978 +n01688243 +n01689811 +n01692333 +n01693334 +n01694178 +n01695060 +n01704323 +n01697457 +n01698640 +n01728572 +n01728920 +n01729322 +n01729977 +n01734418 +n01735189 +n01737021 +n01739381 +n01740131 +n01742172 +n01744401 +n01748264 +n01749939 +n01751748 +n01753488 +n01755581 +n01756291 +n01629819 +n01630670 +n01631663 +n01632458 +n01632777 +n01641577 +n01644373 +n01644900 +n04579432 +n04592741 +n03876231 +n03483316 +n03868863 +n04251144 +n03691459 +n03759954 +n04152593 +n03793489 +n03271574 +n03843555 +n04332243 +n04265275 +n04330267 +n03467068 +n02794156 +n04118776 +n03841143 +n04141975 +n02708093 +n03196217 +n04548280 +n03544143 +n04355338 +n03891332 +n04328186 +n03197337 +n04317175 +n04376876 +n03706229 +n02841315 +n04009552 +n04356056 +n03692522 +n04044716 +n02879718 +n02950826 +n02749479 +n04090263 +n04008634 +n03085013 +n04505470 +n03126707 +n03666591 +n02666196 +n02977058 +n04238763 +n03180011 +n03485407 +n03832673 +n06359193 +n03496892 +n04428191 +n04004767 +n04243546 +n04525305 +n04179913 +n03602883 +n04372370 +n03532672 +n02974003 +n03874293 +n03944341 +n03992509 +n03425413 +n02966193 +n04371774 +n04067472 +n04040759 +n04019541 +n03492542 +n04355933 +n03929660 +n02965783 +n04258138 +n04074963 +n03208938 +n02910353 +n03476684 +n03627232 +n03075370 +n03874599 +n03804744 +n04127249 +n04153751 +n03803284 +n04162706 +n04228054 +n02948072 +n03590841 +n04286575 +n04456115 +n03814639 +n03933933 +n04485082 +n03733131 +n03794056 +n04275548 +n01768244 +n01770081 +n01770393 +n01773157 +n01773549 +n01773797 +n01774384 +n01774750 +n01775062 +n01776313 +n01784675 +n01990800 +n01978287 +n01978455 +n01980166 +n01981276 +n01983481 +n01984695 +n01985128 +n01986214 +n02165105 +n02165456 +n02167151 +n02168699 +n02169497 +n02172182 +n02174001 +n02177972 +n02190166 +n02206856 +n02226429 +n02229544 +n02231487 +n02233338 +n02236044 +n02256656 +n02259212 +n02264363 +n02268443 +n02268853 +n02276258 +n02277742 +n02279972 +n02280649 +n02281406 +n02281787 +n01910747 +n01914609 +n01917289 +n01924916 +n01930112 +n01943899 +n01944390 +n01945685 +n01950731 +n01955084 +n02319095 +n02321529 +n03584829 +n03297495 +n03761084 +n03259280 +n04111531 +n04442312 +n04542943 +n04517823 +n03207941 +n04070727 +n04554684 +n03133878 +n03400231 +n04596742 +n02939185 +n03063689 +n04398044 +n04270147 +n02699494 +n04486054 +n03899768 +n04311004 +n04366367 +n04532670 +n02793495 +n03457902 +n03877845 +n03781244 +n03661043 +n02727426 +n02859443 +n03028079 +n03788195 +n04346328 +n03956157 +n04081281 +n03032252 +n03529860 +n03697007 +n03065424 +n03837869 +n04458633 +n02980441 +n04005630 +n03461385 +n02776631 +n02791270 +n02871525 +n02927161 +n03089624 +n04200800 +n04443257 +n04462240 +n03388043 +n03042490 +n04613696 +n03216828 +n02892201 +n03743016 +n02788148 +n02894605 +n03160309 +n03000134 +n03930313 +n04604644 +n04326547 +n03459775 +n04239074 +n04501370 +n03792972 +n04149813 +n03530642 +n03961711 +n03903868 +n02814860 +n07711569 +n07720875 +n07714571 +n07714990 +n07715103 +n07716358 +n07716906 +n07717410 +n07717556 +n07718472 +n07718747 +n07730033 +n07734744 +n04209239 +n03594734 +n02971356 +n03485794 +n04133789 +n02747177 +n04125021 +n07579787 +n03814906 +n03134739 +n03404251 +n04423845 +n03877472 +n04120489 +n03062245 +n03014705 +n03717622 +n03777754 +n04493381 +n04476259 +n02777292 +n07693725 +n03998194 +n03617480 +n07590611 +n04579145 +n03623198 +n07248320 +n04277352 +n04229816 +n02823428 +n03127747 +n02877765 +n04435653 +n03724870 +n03710637 +n03920288 +n03379051 +n02807133 +n04399382 +n03527444 +n03983396 +n03924679 +n04532106 +n06785654 +n03445777 +n07613480 +n04350905 +n04562935 +n03325584 +n03045698 +n07892512 +n03250847 +n04192698 +n03026506 +n03534580 +n07565083 +n04296562 +n02869837 +n07871810 +n02799071 +n03314780 +n04141327 +n04357314 +n02823750 +n13052670 +n07583066 +n03637318 +n04599235 +n07802026 +n02883205 +n03709823 +n04560804 +n02909870 +n03207743 +n04263257 +n07932039 +n03786901 +n04479046 +n03873416 +n02999410 +n04367480 +n03775546 +n07875152 +n04591713 +n04201297 +n02916936 +n03240683 +n02840245 +n02963159 +n04370456 +n03991062 +n02843684 +n03482405 +n03942813 +n03908618 +n03902125 +n07584110 +n02730930 +n04023962 +n02769748 +n10148035 +n02817516 +n03908714 +n02906734 +n03788365 +n02667093 +n03787032 +n03980874 +n03141823 +n03976467 +n04264628 +n07930864 +n04039381 +n06874185 +n04033901 +n04041544 +n07860988 +n03146219 +n03763968 +n03676483 +n04209133 +n03782006 +n03857828 +n03775071 +n02892767 +n07684084 +n04522168 +n03764736 +n04118538 +n03887697 +n13044778 +n03291819 +n03770439 +n03124170 +n04487081 +n03916031 +n02808440 +n07697537 +n12985857 +n02917067 +n03938244 +n15075141 +n02978881 +n02966687 +n03633091 +n13040303 +n03690938 +n03476991 +n02669723 +n03220513 +n03127925 +n04584207 +n07880968 +n03937543 +n03000247 +n04418357 +n04590129 +n02795169 +n04553703 +n02783161 +n02802426 +n02808304 +n03124043 +n03450230 +n04589890 +n12998815 +n02992529 +n03825788 +n02790996 +n03710193 +n03630383 +n03347037 +n03769881 +n03871628 +n03733281 +n03976657 +n03535780 +n04259630 +n03929855 +n04049303 +n04548362 +n02979186 +n06596364 +n03935335 +n06794110 +n02825657 +n03388183 +n04591157 +n04540053 +n03866082 +n04136333 +n04026417 +n02865351 +n02834397 +n03888257 +n04235860 +n04404412 +n04371430 +n03733805 +n07920052 +n07873807 +n02895154 +n04204238 +n04597913 +n04131690 +n07836838 +n09835506 +n03443371 +n13037406 +n04336792 +n04557648 +n03187595 +n04254120 +n03595614 +n04146614 +n03598930 +n03958227 +n04069434 +n03188531 +n02786058 +n07615774 +n04525038 +n04409515 +n03424325 +n03223299 +n03680355 +n07614500 +n07695742 +n04033995 +n03710721 +n04392985 +n03047690 +n03584254 +n13054560 +n10565667 +n03950228 +n03729826 +n02837789 +n04254777 +n02988304 +n03657121 +n04417672 +n04523525 +n02815834 +n09229709 +n07697313 +n03888605 +n03355925 +n03063599 +n04116512 +n04325704 +n07831146 +n03255030 +n00483313 +n02432291 +n02356381 +n02377388 +n04028764 +n04381587 +n02279257 +n04168199 +n00445055 +n02461128 +n03626760 +n04313503 +n00451635 +n02509515 +n04224842 +n09403734 +n02769290 +n13054073 +n03163222 +n00464478 +n03087069 +n04477219 +n03445617 +n00449054 +n00483705 +n04395106 +n03389611 +n04285965 +n04166281 +n04003856 +n03696301 +n00475787 +n04587404 +n09218641 +n02276355 +n03592669 +n04459909 +n04492375 +n09447666 +n00463543 +n04148703 +n04591517 +n03970546 +n04297750 +n02782778 +n02383231 +n03693474 +n02277094 +n03766044 +n02056228 +n03394272 +n03047052 +n00434075 +n04185946 +n02411999 +n03858418 +n12833149 +n02836035 +n03108853 +n04587559 +n04138261 +n02278024 +n03063485 +n02774921 +n09475044 +n02811204 +n03329302 +n04026813 +n03986562 +n03379204 +n03426134 +n02790669 +n03487090 +n03548402 +n08614632 +n04054361 +n03421485 +n03302671 +n03098959 +n02970408 +n03772584 +n03064935 +n09415584 +n11715430 +n12024445 +n02710201 +n03475581 +n13142504 +n03396074 +n03211789 +n03914337 +n03678558 +n03233123 +n00453396 +n00454395 +n00440382 +n04289027 +n00445226 +n11953610 +n04128413 +n00480211 +n00470966 +n12547503 +n03085219 +n02275773 +n02692086 +n04257790 +n00448748 +n02686379 +n12328567 +n03432129 +n03859000 +n12091377 +n02124313 +n00442847 +n04603399 +n03114379 +n02920369 +n03818343 +n02946127 +n02978055 +n12914923 +n02705429 +n00448232 +n12882945 +n04289690 +n07606669 +n02056728 +n11848479 +n03046921 +n12282933 +n02867966 +n12821505 +n02812949 +n04545305 +n02699770 +n04395651 +n02900160 +n04099003 +n02054711 +n12606545 +n03356858 +n01859190 +n03643737 +n02962200 +n03123553 +n09361517 +n02793089 +n00449517 +n02783994 +n10117851 +n12038585 +n04383839 +n10142391 +n07719213 +n03536122 +n02472987 +n03454536 +n11728099 +n02392824 +n03795758 +n04282872 +n00448872 +n02404432 +n03797182 +n03029197 +n03665924 +n12477163 +n02769963 +n03863262 +n01532325 +n04165409 +n04593077 +n04473108 +n03577090 +n09988063 +n00446804 +n03931765 +n00475014 +n02700064 +n03240892 +n12475242 +n11735053 +n04053508 +n02852173 +n02382750 +n03823111 +n04543772 +n04112147 +n04433585 +n03175189 +n03596543 +n00445685 +n03307792 +n04589593 +n01814217 +n02993368 +n04303497 +n02811350 +n03355768 +n03699591 +n04590553 +n01893825 +n12726670 +n09916348 +n11544015 +n01318894 +n02133704 +n02367492 +n04506289 +n02069974 +n01900150 +n03207835 +n03363549 +n02831595 +n04970470 +n04160847 +n03767203 +n03928814 +n02302969 +n02918595 +n10401331 +n04231272 +n03717447 +n03063968 +n03380724 +n00825773 +n09988493 +n02740300 +n04539794 +n04121511 +n01323599 +n12937130 +n02428508 +n02980036 +n12061380 +n01887787 +n04214046 +n01787835 +n00466630 +n02979290 +n03927091 +n03231368 +n03904657 +n04469003 +n04196502 +n02122948 +n04544325 +n07868340 +n13876561 +n11925898 +n12158443 +n01595450 +n12454705 +n02069412 +n09618957 +n02357111 +n00451563 +n04197110 +n02276902 +n03111296 +n03909020 +n12303083 +n02082791 +n01956764 +n04269822 +n04207343 +n02433318 +n01888181 +n12682668 +n01592387 +n09793141 +n00466273 +n04026180 +n06255081 +n12172364 +n10145590 +n12311579 +n12173912 +n03822171 +n03140292 +n03027625 +n02739427 +n02060133 +n02431785 +n03219010 +n00447957 +n11910271 +n03620967 +n12547215 +n02409508 +n04290079 +n12329260 +n13901858 +n02008497 +n10304914 +n04524142 +n04279462 +n04233124 +n09733793 +n12822115 +n09475179 +n10151760 +n03418618 +n12858397 +n07735510 +n03549473 +n10098245 +n03653583 +n10604380 +n03375575 +n03885293 +n01527347 +n03237340 +n02760658 +n11953038 +n03187268 +n03004275 +n02393161 +n11965218 +n08580944 +n03938725 +n03900979 +n04144241 +n03760310 +n02376679 +n03237992 +n09432283 +n02379908 +n09918554 +n04041747 +n12012111 +n10331167 +n01612122 +n10147935 +n07691539 +n11669786 +n09403427 +n01935395 +n09903501 +n04439585 +n04459018 +n02780704 +n03720163 +n12899752 +n04118635 +n03404149 +n02429456 +n00449168 +n04516354 +n04317833 +n12075299 +n07878647 +n09438940 +n03361550 +n02027357 +n04317976 +n03092883 +n04526964 +n03985069 +n03610682 +n04028581 +n02277268 +n09433839 +n03846431 +n03919289 +n10146104 +n10260706 +n02686227 +n03321103 +n00444846 +n01558307 +n01595168 +n03919096 +n11844892 +n04260364 +n02750070 +n03034244 +n03002096 +n04273972 +n11814584 +n04605321 +n07745466 +n02922798 +n03361380 +n12651229 +n08521623 +n04498389 +n00453313 +n04967882 +n12024690 +n03934656 +n02685082 +n04501550 +n09972458 +n03055418 +n07763629 +n03902756 +n09938449 +n09712696 +n02387346 +n03133415 +n07711080 +n03129753 +n03524150 +n02275560 +n03993053 +n03438661 +n11939180 +n00466524 +n11753355 +n03456024 +n03421324 +n07890540 +n11720643 +n02057035 +n00453126 +n04453037 +n01540832 +n03546235 +n03370387 +n02041875 +n02871439 +n03262072 +n01786646 +n02430830 +n02799175 +n05262422 +n03854722 +n12817694 +n04449966 +n01564773 +n02034971 +n03490119 +n02822579 +n07879953 +n04110178 +n04963588 +n04252653 +n01565078 +n02389128 +n02779435 +n10645017 +n04582205 +n08573842 +n10146002 +n03892178 +n03119396 +n03813078 +n07866868 +n03160740 +n03371875 +n02417387 +n03904782 +n03098688 +n02902687 +n01828556 +n04401680 +n04590933 +n01575401 +n07693048 +n02901114 +n03047941 +n04355511 +n11849871 +n10738111 +n03122073 +n12052787 +n01594004 +n01549886 +n02824058 +n03506184 +n11487732 +n12574866 +n12948053 +n10091450 +n00470554 +n00326094 +n12093329 +n04438897 +n07818995 +n12828791 +n13901321 +n10613996 +n10159533 +n02669295 +n02843158 +n06415688 +n14858292 +n09813219 +n12485653 +n03200231 +n02089468 +n03935234 +n01539925 +n12428076 +n10439373 +n01536644 +n02694662 +n02123242 +n03002711 +n03363749 +n02669534 +n03451798 +n11927215 +n02679257 +n09475925 +n10015485 +n12422129 +n03946162 +n02377291 +n07871720 +n12622297 +n12782915 +n01579260 +n11838916 +n10267311 +n12824053 +n03340723 +n02276749 +n04439712 +n02126139 +n04188179 +n02386853 +n07942152 +n02499316 +n04324387 +n10635788 +n04234887 +n12237641 +n03713436 +n04960582 +n04076713 +n01646292 +n03947798 +n02840134 +n04476972 +n09822830 +n03551395 +n04533802 +n02918964 +n00474657 +n12932966 +n01615458 +n01806364 +n12458550 +n11784497 +n03557360 +n10638922 +n09889941 +n10689306 +n03358172 +n04295571 +n06596607 +n11853356 +n00482122 +n11760785 +n03150232 +n11778257 +n03059685 +n10105733 +n04104384 +n07691237 +n04326676 +n07684938 +n12666965 +n04177820 +n13918387 +n03398153 +n03914438 +n09932098 +n02988486 +n02977619 +n03317788 +n03484487 +n02988679 +n04062428 +n02568087 +n12866162 +n04227144 +n07875436 +n04082886 +n11753700 +n00470682 +n02122298 +n10145239 +n12755727 +n04214282 +n01852671 +n02378969 +n04108822 +n10382825 +n12392549 +n03973839 +n12258885 +n11782761 +n12389501 +n02940570 +n03405595 +n02969323 +n03207630 +n10169147 +n03805725 +n09847543 +n02415253 +n07880080 +n04305572 +n02042180 +n07565161 +n02871147 +n04438507 +n04445154 +n07842433 +n12029635 +n09750282 +n09621232 +n01858906 +n02761206 +n03986355 +n12591351 +n13916721 +n02905036 +n11894770 +n02377603 +n12924623 +n03950899 +n09454153 +n10247358 +n05261310 +n11943660 +n10804287 +n03560430 +n01756089 +n10618342 +n04283378 +n13926786 +n04238321 +n04393549 +n04461879 +n03502200 +n00440941 +n03494706 +n04148579 +n13902336 +n02780815 +n10726031 +n04124098 +n12344483 +n04384910 +n07681450 +n02030837 +n04059157 +n09247410 +n02714751 +n08633683 +n04520784 +n10141732 +n12371439 +n04499062 +n02931148 +n07609632 +n04536335 +n02874537 +n03013438 +n11786539 +n11690455 +n07600696 +n00478262 +n00466712 +n03399677 +n12441183 +n07895962 +n11966083 +n02990373 +n04241249 +n02068541 +n12513933 +n02356977 +n04252560 +n04087826 +n03455488 +n07619409 +n09787534 +n03680942 +n00446980 +n12384839 +n03416900 +n07821758 +n11853813 +n01606522 +n11780148 +n04969242 +n12413880 +n04130257 +n01322604 +n03061211 +n01959492 +n02842573 +n04313628 +n03815149 +n02445394 +n08547544 +n03222176 +n04070003 +n03075768 +n09695979 +n02877266 +n08583292 +n02870676 +n03657511 +n01621635 +n04284341 +n04136161 +n02836174 +n10247880 +n01744100 +n02882894 +n03408444 +n03411079 +n02366959 +n04399158 +n04542715 +n02787435 +n04251701 +n13863020 +n07890226 +n12245319 +n12849952 +n11626826 +n00887544 +n03140431 +n03519387 +n03855604 +n07906111 +n02054036 +n11954161 +n03038281 +n00450998 +n12136392 +n02119477 +n04356925 +n02406647 +n04450133 +n12545635 +n01565599 +n02028900 +n07817024 +n02971167 +n04309049 +n02678897 +n12795555 +n11769803 +n01904886 +n02079851 +n12189987 +n04581829 +n12098403 +n01839330 +n12587803 +n03652932 +n08628141 +n03544238 +n04513827 +n01847806 +n03132076 +n10243137 +n03621377 +n10530959 +n14765422 +n04968139 +n12950314 +n02064816 +n02846511 +n10513823 +n11772408 +n03341297 +n03492922 +n03683606 +n02894337 +n02365480 +n09846755 +n03495039 +n01317813 +n12610328 +n02157206 +n01588002 +n03914831 +n03659686 +n10406765 +n09205509 +n02870526 +n07954211 +n10578471 +n11646694 +n03115762 +n07762913 +n12056758 +n12305986 +n11845913 +n02835915 +n02831237 +n07927512 +n12171098 +n02073831 +n07605040 +n02885462 +n02768114 +n04450994 +n11844371 +n03963645 +n02956699 +n02029378 +n01528396 +n10005934 +n04465666 +n04390977 +n11882074 +n03831382 +n04605163 +n06276501 +n02944075 +n05258051 +n07901457 +n12683571 +n02205219 +n13235503 +n02388735 +n03941231 +n14919819 +n12816508 +n11536673 +n13895262 +n02903204 +n10137825 +n07841345 +n07893253 +n01850192 +n07769731 +n11773987 +n03539678 +n12938193 +n10802507 +n03089879 +n00477392 +n01828096 +n09263912 +n13653902 +n04579667 +n01322983 +n08579352 +n07587023 +n07756951 +n07870167 +n10588357 +n01606809 +n13864035 +n02802544 +n07591961 +n02979399 +n04144539 +n02416820 +n11769176 +n09743792 +n09732170 +n04972451 +n13918274 +n01847089 +n01859689 +n04208065 +n07617051 +n10674713 +n07914271 +n07887461 +n03736064 +n03644858 +n03878963 +n04040247 +n07891433 +n01611969 +n07587618 +n02689144 +n10049363 +n04059516 +n10313239 +n03115400 +n01519563 +n01533893 +n03850245 +n11733548 +n03372549 +n01884834 +n02839110 +n07887192 +n03617312 +n07886463 +n03103396 +n07764847 +n01855476 +n07808587 +n12858871 +n03632729 +n10209731 +n04141712 +n03978686 +n03225988 +n00475273 +n09224725 +n04966543 +n01322221 +n03649674 +n13154494 +n03948830 +n03320519 +n03723267 +n07869611 +n12342498 +n01827793 +n03145719 +n11821184 +n11956348 +n11857875 +n10339717 +n09450163 +n10756148 +n01591301 +n07915094 +n04422727 +n09719309 +n03349469 +n03389889 +n10718131 +n04298661 +n09747495 +n03676623 +n03547229 +n03062015 +n10734394 +n07817315 +n02852360 +n01850553 +n02952585 +n03587205 +n02009750 +n01540090 +n02947660 +n03656957 +n03378174 +n02508213 +n01572489 +n12008487 +n12185859 +n11691046 +n01323355 +n05262534 +n00448126 +n02432983 +n12038406 +n03883385 +n02411206 +n01643896 +n10159045 +n11675025 +n01803362 +n02009508 +n07920349 +n04098513 +n11617272 +n09913455 +n12390314 +n04171208 +n02995345 +n10634849 +n03173929 +n02749953 +n11845793 +n12796022 +n11955153 +n11816829 +n03032453 +n11984542 +n02992795 +n03712111 +n02873733 +n02759387 +n14915184 +n02381364 +n12686274 +n07857731 +n04518764 +n03010473 +n02418465 +n02359556 +n07894799 +n04104770 +n04335209 +n01848976 +n02006063 +n04454908 +n03002948 +n04220250 +n09923561 +n04102162 +n11958080 +n04598965 +n10173410 +n03067339 +n02003204 +n12686676 +n11986511 +n02311617 +n03367059 +n02761557 +n05578095 +n04041069 +n10575463 +n03325941 +n10082043 +n01806297 +n09691729 +n04593866 +n01813088 +n01625562 +n03906224 +n01652026 +n10236304 +n04102618 +n04321453 +n07820145 +n01575117 +n12788854 +n07823698 +n04206225 +n03216710 +n02421449 +n03343737 +n07560903 +n02872529 +n11989869 +n12071744 +n06278475 +n04492749 +n02920259 +n03798061 +n02420509 +n03316105 +n12052447 +n03974915 +n02904803 +n03430418 +n12291959 +n06892775 +n03875806 +n07903841 +n10282482 +n02683323 +n07862348 +n01849157 +n04469813 +n09944022 +n03342127 +n07592481 +n02936402 +n02405929 +n10002760 +n02537716 +n05259914 +n01560280 +n12694486 +n07879350 +n02377063 +n03637181 +n03409297 +n01607812 +n02808185 +n09239302 +n12055516 +n09712448 +n02859184 +n12772908 +n02735538 +n10333838 +n12336092 +n02386968 +n04613939 +n00452864 +n04535524 +n03174731 +n04189816 +n07607605 +n12909917 +n02387722 +n02960690 +n07715221 +n02407071 +n10667477 +n09398076 +n04236809 +n01904806 +n01610552 +n12373100 +n12771390 +n04122685 +n07804771 +n15102455 +n03469175 +n03746005 +n02536456 +n03505667 +n11816336 +n09376198 +n10572706 +n03464053 +n02869155 +n07816164 +n04969798 +n02942349 +n14820180 +n01623615 +n12676703 +n03369276 +n03650551 +n02010272 +n02976123 +n01852400 +n02196119 +n04132158 +n03238586 +n07639069 +n03313333 +n10542761 +n12215022 +n00455173 +n10019406 +n12899537 +n04277826 +n09906449 +n04549629 +n11508382 +n15090065 +n10289462 +n04540255 +n02723165 +n04335693 +n01536334 +n03107488 +n12782530 +n14785065 +n02974348 +n09874862 +n04479939 +n03309465 +n09902954 +n12092417 +n03425595 +n12433081 +n07806774 +n12462805 +n01314781 +n10192839 +n01622120 +n07807171 +n03261019 +n02843553 +n04287747 +n02324587 +n09915434 +n01818299 +n01592694 +n03826186 +n03607659 +n01527917 +n03628511 +n02005399 +n04204081 +n02052775 +n04403413 +n03914106 +n12811027 +n01872772 +n04555700 +n02004855 +n04602762 +n02713003 +n04406817 +n11934807 +n03336282 +n09684901 +n03836976 +n11959862 +n03062336 +n03506028 +n04503413 +n07819896 +n03205669 +n11902200 +n07685218 +n03046133 +n10261624 +n10303814 +n03676087 +n04023695 +n07587111 +n07764155 +n01504179 +n03794136 +n03389761 +n13901211 +n02784124 +n04488530 +n02807731 +n07898443 +n04981658 +n04177755 +n03649161 +n04125257 +n10135129 +n03653110 +n10560106 +n07735687 +n03511333 +n11960245 +n03301568 +n03878066 +n10746931 +n04223299 +n04237423 +n07888229 +n01819734 +n12312728 +n09981939 +n03727465 +n13882276 +n02993194 +n11971927 +n09713108 +n03581125 +n09718936 +n14698884 +n03005285 +n03540914 +n03359436 +n03934042 +n07569644 +n04964878 +n07890068 +n07580253 +n01538630 +n03132666 +n03259009 +n02796318 +n12703190 +n01464844 +n11792029 +n04270371 +n13102775 +n02933649 +n02387254 +n02890188 +n04335886 +n04358491 +n02786837 +n03885194 +n04001265 +n03438071 +n10375402 +n02997910 +n03326795 +n00470830 +n02734725 +n03494537 +n08376250 +n07743544 +n02991847 +n04246271 +n04156140 +n04381073 +n07732168 +n04951071 +n07977870 +n04334599 +n02838728 +n03326948 +n11723227 +n08182379 +n03686924 +n03821518 +n02382204 +n02080415 +n11788727 +n07732636 +n03860404 +n03898395 +n07867324 +n04392113 +n13237188 +n03263076 +n07843636 +n04968056 +n04397027 +n03320421 +n06267564 +n02880842 +n04115456 +n13862407 +n10289039 +n03128248 +n01457852 +n01536035 +n04579056 +n03937931 +n03036022 +n01804163 +n09913593 +n12841007 +n03115897 +n03256032 +n02475669 +n07924443 +n03061505 +n10001481 +n03600722 +n07842308 +n10696508 +n04215402 +n10588074 +n03614782 +n03995535 +n12091953 +n04113194 +n10092978 +n03011741 +n04381860 +n07819769 +n07905474 +n03288500 +n04225987 +n13223710 +n02879087 +n02920083 +n08640739 +n03362890 +n03996849 +n03849814 +n09694664 +n02407390 +n02910864 +n02388917 +n01668665 +n07616046 +n02932891 +n10553235 +n03652729 +n01615703 +n12801781 +n12164656 +n05302499 +n03801760 +n03332271 +n02901793 +n03941417 +n09833441 +n01623110 +n02807523 +n10598181 +n03725600 +n10368528 +n04116098 +n12719944 +n02045864 +n02173373 +n02811059 +n04479823 +n07816398 +n10572889 +n04142731 +n07687381 +n02799323 +n07865484 +n01858845 +n12684379 +n01842235 +n09242389 +n02028727 +n03527565 +n03438863 +n15019030 +n13907272 +n09659039 +n04251791 +n03683995 +n04137217 +n04389430 +n09785659 +n02016816 +n03124590 +n01859325 +n03138669 +n02999936 +n11926365 +n12686077 +n03517760 +n09734450 +n04563413 +n12074867 +n01564217 +n12521394 +n06267893 +n03594148 +n04139395 +n12369309 +n01544389 +n12048056 +n04524941 +n03016868 +n03653740 +n02795528 +n03687137 +n03766935 +n03361297 +n04263502 +n10043491 +n03446268 +n01994910 +n03891538 +n10091564 +n10226413 +n02755140 +n03500389 +n10237196 +n03625646 +n06596474 +n03360300 +n09730824 +n10732010 +n04469514 +n02904927 +n04961331 +n02936570 +n03680858 +n07585758 +n09199101 +n04050933 +n03712337 +n03911513 +n01556182 +n03102371 +n07928887 +n12133462 +n03974070 +n03971218 +n03292475 +n03425241 +n03440216 +n11995092 +n02894158 +n02918112 +n10568358 +n11524451 +n03169176 +n04100519 +n07588193 +n06883725 +n02860640 +n07762114 +n04082710 +n07896893 +n10167152 +n03287351 +n02788021 +n08494231 +n01560935 +n03249342 +n04564581 +n09349648 +n07704205 +n03510244 +n12127460 +n09945745 +n11719286 +n11613459 +n12656369 +n03824381 +n07655263 +n09894143 +n04964001 +n02161457 +n07654298 +n07930433 +n02979074 +n02026948 +n13914608 +n07611267 +n02843276 +n09827363 +n10259780 +n04432662 +n11715678 +n12388858 +n03057920 +n10465451 +n03855214 +n07728181 +n09835348 +n03549732 +n04589325 +n03491032 +n00452034 +n03948242 +n01456756 +n07921615 +n02809105 +n12889713 +n07586894 +n07734879 +n07905979 +n12847374 +n12129134 +n02122580 +n04028074 +n02911332 +n09251407 +n07697825 +n04597309 +n02800213 +n03480579 +n07621618 +n04170933 +n03743279 +n01916481 +n04037220 +n10748620 +n02708433 +n12007196 +n02561381 +n04103769 +n03030880 +n04413969 +n03911658 +n04590746 +n00476389 +n04331639 +n07725789 +n01792429 +n02949542 +n07686720 +n04064862 +n04447028 +n01713764 +n09854218 +n04032603 +n04405907 +n15093298 +n04385536 +n11954345 +n01560793 +n09249034 +n03784270 +n03436549 +n01324610 +n02379183 +n07616487 +n04119478 +n03309356 +n12865037 +n12850168 +n04250850 +n03024064 +n04412097 +n02982515 +n00450070 +n10175248 +n11847169 +n12276872 +n12870891 +n10229883 +n10505613 +n03482252 +n09300905 +n02919890 +n07617611 +n10283170 +n01607962 +n01671125 +n07894551 +n04561287 +n00005787 +n10025635 +n02850732 +n03732020 +n02036711 +n07907429 +n03797896 +n03004824 +n12011620 +n10300303 +n03105467 +n03767745 +n07868508 +n07868200 +n03788047 +n07886057 +n04559451 +n09845401 +n04373704 +n02676938 +n02565324 +n02667478 +n02122878 +n03244047 +n01747589 +n04320973 +n13205058 +n02379430 +n11959632 +n10183931 +n07683490 +n10055410 +n04370288 +n03273551 +n13900422 +n07899434 +n04053677 +n07740461 +n11879722 +n04282494 +n02981911 +n03449451 +n07581249 +n03965456 +n11808468 +n13881644 +n11725973 +n12091213 +n13193856 +n02873520 +n02754656 +n02431976 +n01324431 +n02385214 +n01888411 +n12680864 +n07731284 +n04337287 +n07631926 +n02549248 +n04395024 +n07585557 +n02776825 +n09460046 +n12023108 +n00475403 +n10098517 +n07902336 +n03683708 +n02412210 +n04397452 +n04583212 +n13869547 +n03632577 +n01616086 +n02763901 +n08256735 +n03015478 +n02084732 +n12178896 +n11966215 +n07605380 +n13869788 +n01847170 +n07744811 +n01854700 +n00444937 +n10422405 +n07801892 +n09688804 +n11879054 +n02802215 +n07908411 +n07822518 +n01558594 +n07935737 +n10730728 +n04436329 +n04294879 +n04972350 +n12911440 +n13886260 +n07578093 +n02537525 +n03703730 +n09607630 +n13865904 +n02360282 +n11731659 +n04126066 +n04212165 +n11618290 +n07588574 +n09269472 +n11896722 +n02892304 +n03487642 +n02028342 +n03321563 +n03135030 +n03522100 +n03253886 +n04095109 +n06470073 +n12603449 +n10644598 +n10260800 +n01535469 +n09696456 +n03553019 +n03963198 +n11918473 +n10314517 +n03002341 +n07574923 +n10421470 +n05716342 +n03244231 +n01730563 +n11691857 +n12807251 +n12345899 +n03142679 +n01531512 +n12307240 +n07835457 +n04535370 +n00451186 +n12481458 +n03434188 +n09734185 +n04578934 +n04167346 +n02747802 +n03459328 +n03301940 +n01562014 +n07690431 +n10642596 +n03696065 +n12781940 +n02759257 +n04392764 +n04218564 +n03499907 +n01536780 +n09751895 +n03235042 +n04570815 +n12070381 +n09448690 +n07625061 +n10178216 +n04560113 +n09457979 +n03858085 +n02421792 +n02944579 +n10085869 +n09718811 +n04103206 +n04239786 +n04501947 +n01321123 +n02390015 +n03964495 +n01554448 +n02925107 +n03028596 +n12483625 +n03227317 +n10701644 +n11968704 +n03900393 +n01851038 +n02276078 +n03132776 +n07585906 +n04480033 +n07880458 +n12887293 +n07921239 +n03307037 +n04595028 +n04244379 +n13131028 +n10313724 +n09436708 +n02694045 +n09941787 +n00449796 +n01817346 +n07928696 +n03401279 +n12901724 +n11646167 +n07682477 +n09415671 +n07900225 +n03607029 +n02692232 +n11834654 +n07935379 +n12437930 +n03762434 +n07922764 +n03595523 +n04546340 +n10686885 +n03516844 +n03767112 +n09896685 +n03859608 +n03149686 +n07920872 +n12388143 +n10406391 +n04233715 +n04373089 +n02023992 +n01947396 +n12115180 +n00479616 +n03962852 +n02392434 +n12414035 +n14976871 +n03201776 +n10665587 +n03600285 +n04402449 +n08539072 +n03629231 +n12860365 +n03488438 +n03337383 +n12455950 +n10384392 +n02953455 +n03101796 +n07919572 +n03233744 +n01578180 +n01756508 +n04556533 +n02962843 +n02882190 +n03731483 +n01850873 +n05260240 +n03111177 +n09836519 +n03030557 +n11789066 +n02788572 +n07903101 +n04067818 +n07840804 +n01567678 +n12427184 +n03333610 +n02416964 +n10607291 +n07936548 +n05451384 +n02968074 +n07605597 +n02704949 +n07609215 +n01951274 +n07696977 +n03180384 +n04303357 +n03291741 +n02207805 +n10123844 +n03420345 +n12384227 +n02758863 +n02047975 +n03978966 +n03549199 +n04275175 +n09294877 +n09836343 +n11970586 +n02010728 +n10369317 +n12681893 +n03192543 +n12413165 +n12174521 +n11916696 +n10042845 +n07822197 +n04968749 +n10323634 +n12849416 +n02814774 +n05538625 +n03078802 +n12230794 +n07726095 +n03051249 +n12005656 +n11876432 +n12164881 +n09711435 +n01622483 +n09896170 +n07684289 +n03368352 +n07910048 +n03159535 +n00466377 +n01541386 +n11647703 +n09752023 +n07903731 +n12249542 +n03794798 +n11786131 +n02852043 +n10493685 +n09846894 +n01752585 +n01536186 +n07618432 +n09859152 +n02065026 +n02382635 +n07867616 +n03885788 +n04255586 +n03275681 +n11961100 +n12485981 +n04495698 +n03293741 +n13902048 +n03254862 +n07903962 +n01594787 +n11962272 +n03284886 +n07842202 +n10157128 +n02405302 +n04443766 +n06266633 +n02519862 +n01487506 +n03373943 +n04247876 +n04327204 +n03349771 +n09260907 +n10092794 +n12223764 +n03504723 +n11926833 +n01820052 +n13032381 +n03889871 +n03209359 +n04608923 +n15093137 +n15091304 +n03688405 +n09905185 +n03543112 +n11611356 +n03885028 +n03234164 +n07594066 +n02396014 +n03456186 +n09874725 +n11601333 +n02917521 +n03055857 +n02804123 +n12352844 +n12866002 +n09858165 +n12037691 +n02565072 +n04477387 +n02008643 +n07867021 +n04119360 +n09893191 +n02944146 +n12435649 +n13197274 +n04974859 +n07751004 +n12003696 +n02762508 +n02680512 +n01743086 +n06998748 +n10607478 +n07613815 +n01559477 +n01859852 +n03239054 +n04466871 +n05263183 +n13173882 +n07897438 +n12427757 +n04400737 +n03291963 +n07682808 +n11692265 +n04130143 +n09445289 +n07696839 +n03835197 +n12821895 +n09734639 +n03365374 +n04305210 +n04962240 +n09871867 +n07897750 +n07616386 +n09443281 +n03641569 +n13882563 +n07680761 +n10498816 +n04034262 +n03533014 +n07928790 +n07690152 +n10060352 +n04124370 +n12453186 +n04509171 +n03013580 +n10604979 +n12515711 +n04971211 +n07693223 +n03786715 +n07894703 +n02761834 +n04232800 +n03437741 +n04045644 +n14976759 +n03042697 +n12557681 +n06275095 +n11678010 +n01586941 +n07684517 +n07822845 +n03483823 +n09951616 +n03180865 +n07861557 +n03644378 +n12848499 +n11962667 +n03886762 +n04238128 +n11979964 +n13915113 +n12791329 +n12457091 +n03341153 +n10267865 +n03484576 +n10186216 +n07612137 +n03843438 +n11807525 +n11931540 +n02027897 +n07614730 +n04116294 +n03469903 +n10017272 +n03688605 +n07860103 +n03981566 +n01888045 +n03345837 +n11998888 +n02071636 +n02726017 +n04310157 +n04607869 +n01622959 +n08524735 +n03119203 +n12031927 +n03610524 +n02807616 +n04056180 +n03233905 +n03374473 +n14810561 +n11944954 +n03121431 +n09750891 +n08505018 +n10727171 +n12357485 +n12571781 +n12067193 +n07586604 +n02086753 +n03548086 +n02560110 +n07804900 +n02880393 +n04208427 +n12931542 +n01594968 +n05218119 +n03520493 +n03727605 +n12687698 +n03612965 +n04135315 +n07730320 +n10540114 +n07599911 +n01323493 +n02115096 +n04590263 +n12043836 +n02861387 +n09836786 +n04966941 +n02816768 +n13131618 +n10701962 +n02919792 +n03442597 +n04325041 +n03333129 +n04091693 +n04950952 +n10631309 +n04177931 +n13234678 +n01970667 +n07748416 +n07893642 +n07691650 +n03660909 +n04145863 +n11945514 +n10334009 +n12336973 +n03954393 +n04558478 +n09899929 +n03487533 +n07816575 +n07877187 +n07863547 +n01603812 +n02098906 +n04973585 +n03674440 +n04371050 +n12243109 +n07871234 +n02928049 +n07574504 +n07889274 +n12141167 +n04543996 +n03080633 +n03423479 +n07879659 +n04380916 +n10514429 +n07584423 +n04009801 +n12479537 +n07606538 +n07698543 +n12353754 +n10132035 +n03367545 +n04245508 +n09811852 +n02024763 +n04052442 +n10120330 +n12352639 +n12606438 +n07752966 +n09772930 +n02535759 +n11737534 +n10345015 +n12427566 +n09705784 +n04112654 +n02985963 +n03758089 +n12953484 +n07906572 +n02881757 +n12739332 +n03718458 +n03407865 +n07775050 +n03210552 +n09452395 +n09789566 +n10566072 +n10559996 +n07826930 +n12414932 +n01887474 +n03026907 +n07751148 +n10223177 +n03957420 +n03788601 +n12244819 +n12421137 +n04266162 +n10038409 +n02981024 +n03228967 +n11825351 +n12058822 +n11963932 +n03041449 +n03046029 +n07590502 +n02932523 +n02152881 +n04970398 +n07887967 +n12812478 +n12421917 +n02708711 +n11870747 +n04290507 +n07934282 +n01608265 +n12070583 +n03205574 +n02305085 +n07866015 +n02960903 +n10098624 +n00481803 +n07938007 +n02693246 +n03923379 +n04103665 +n11792742 +n12489815 +n04971313 +n01668892 +n01055165 +n03215508 +n12104501 +n07899292 +n12822955 +n07713074 +n03842012 +n02449350 +n07868955 +n02835829 +n12283542 +n04525584 +n07910656 +n11625003 +n03987266 +n02805983 +n15091846 +n09736945 +n04973816 +n02439398 +n01519873 +n07899003 +n03019938 +n07582152 +n01885498 +n12108871 +n02934451 +n04327682 +n07696625 +n09750770 +n12084890 +n03960374 +n07585107 +n01570839 +n11905392 +n06277135 +n07842044 +n03751269 +n04398951 +n12861892 +n12649539 +n07596967 +n07580592 +n12845413 +n07690739 +n07804657 +n04334105 +n03779128 +n03268918 +n03066359 +n02744323 +n12596148 +n04272389 +n07832416 +n10210911 +n01548865 +n03221351 +n15091669 +n07878926 +n07607967 +n12171966 +n02846141 +n07576781 +n02922292 +n10092643 +n01732614 +n02578771 +n02864593 +n03537241 +n09635534 +n03268645 +n07852833 +n13873917 +n12640839 +n03506727 +n10536416 +n09976429 +n10692482 +n07600285 +n04156946 +n07818689 +n02605703 +n02710429 +n02890351 +n03408054 +n03121298 +n02731629 +n12450840 +n04061681 +n10153414 +n07648913 +n07891309 +n01562265 +n14973585 +n01610226 +n06267991 +n03302938 +n07822323 +n07826091 +n02764398 +n10406266 +n09282208 +n01734104 +n04283096 +n03530910 +n11542137 +n02610664 +n03856012 +n01531811 +n07862611 +n11625632 +n12643313 +n02469248 +n03333711 +n02907082 +n02122430 +n01559804 +n09744161 +n10187990 +n12015525 +n07844867 +n07887304 +n02878425 +n02009380 +n11448153 +n10655594 +n12566954 +n11901977 +n03999160 +n02389779 +n07928488 +n12785889 +n04281375 +n03745146 +n03224603 +n04594828 +n12835331 +n09715427 +n11615026 +n09972010 +n04038231 +n02379329 +n03445326 +n10753442 +n04249882 +n11727738 +n07866723 +n04282992 +n11621281 +n01566645 +n03919430 +n11980682 +n03480719 +n11625804 +n10467395 +n09436444 +n07867751 +n03684611 +n03788498 +n12062626 +n07808904 +n07690585 +n03865557 +n10711766 +n10465831 +n04380255 +n12166128 +n04432203 +n07892418 +n10432441 +n12991184 +n04209613 +n04459773 +n09666883 +n07807472 +n09873899 +n12939874 +n04545748 +n09637339 +n07919441 +n03987376 +n03645577 +n03437430 +n10671613 +n02964843 +n09707289 +n11700058 +n03877351 +n03518445 +n07643200 +n02140049 +n12683791 +n12418221 +n04154152 +n03397947 +n03238131 +n11851839 +n04545858 +n07744682 +n02995871 +n07593199 +n03543394 +n10293332 +n12658481 +n11599324 +n02705201 +n03920867 +n08249459 +n02876084 +n03937835 +n01397871 +n03849679 +n12016567 +n04208936 +n07696728 +n13148208 +n01904029 +n08659861 +n07878785 +n07827130 +n03390983 +n02624807 +n03319745 +n03994614 +n00446493 +n12477583 +n02920658 +n04602956 +n02688273 +n07577538 +n04350581 +n09283405 +n04074185 +n04495843 +n03538179 +n03454885 +n03878211 +n10308168 +n08518171 +n02660208 +n07904760 +n07928367 +n10174445 +n02137015 +n02863426 +n07700003 +n04015908 +n03946076 +n11725821 +n01794344 +n04364160 +n01663782 +n04283255 +n02822064 +n04406239 +n02782681 +n11990313 +n03563460 +n02957008 +n07889814 +n07896060 +n03683079 +n04278447 +n13011595 +n11810358 +n03836451 +n12827537 +n03545470 +n03213538 +n07929351 +n03471190 +n02882301 +n03625943 +n03397087 +n11955896 +n04097373 +n03145522 +n03034405 +n02889646 +n02928299 +n09652149 +n01641391 +n04593524 +n07651025 +n03719343 +n03884778 +n03452594 +n02174659 +n12345280 +n03039827 +n03309687 +n11635433 +n02057330 +n01664990 +n09779790 +n02011016 +n09689958 +n07770763 +n03010915 +n03443912 +n02946509 +n13050397 +n03031012 +n04217546 +n04124202 +n12766869 +n04177041 +n12050533 +n03251932 +n03086580 +n03918737 +n04386792 +n03176594 +n01577035 +n01669654 +n01818832 +n10441962 +n03885904 +n03724756 +n02925666 +n03549589 +n03062122 +n02828427 +n12604228 +n03624400 +n07725888 +n03873699 +n01503976 +n02887079 +n03610098 +n02940385 +n04610013 +n03652100 +n04496872 +n04008385 +n02583890 +n10476467 +n03395514 +n03306385 +n04228581 +n02389261 +n12576323 +n01579149 +n01623425 +n02593019 +n03995265 +n02124484 +n12745386 +n04355267 +n02643836 +n01614343 +n03810952 +n04058594 +n12278650 +n03474779 +n02823510 +n00442437 +n12039317 +n04574067 +n03762602 +n02153109 +n03518943 +n04289827 +n02288268 +n07749969 +n04132985 +n03213826 +n04307986 +n03567066 +n02049088 +n04408871 +n03522003 +n09305898 +n04266375 +n08571898 +n03039259 +n01587526 +n03261603 +n00464277 +n02627532 +n02992368 +n03640850 +n03037404 +n04525191 +n02106854 +n07772147 +n04173511 +n12761284 +n03257210 +n02813544 +n07740342 +n04066270 +n03070059 +n03616428 +n02904233 +n03209910 +n04389854 +n03078995 +n03193260 +n01488038 +n01754533 +n12629305 +n02055107 +n11664418 +n04228693 +n03353951 +n03440682 +n03025250 +n03300216 +n02042046 +n04226826 +n03342015 +n03090000 +n02050313 +n03492250 +n01535690 +n01572654 +n03465718 +n02879309 +n06278338 +n04113406 +n03695857 +n09720256 +n01860002 +n02851939 +n09828216 +n02564270 +n03528901 +n02542432 +n11978961 +n01670802 +n03956623 +n01612275 +n09376786 +n03222318 +n02813645 +n02213543 +n13898207 +n03616763 +n03616979 +n11904109 +n04212282 +n04608435 +n02042472 +n04198453 +n03216402 +n02015357 +n12282737 +n02699629 +n12866635 +n02048353 +n02933340 +n01793715 +n12001707 +n02878222 +n03187037 +n03105306 +n04080705 +n04254009 +n01623880 +n02839592 +n03436182 +n01591123 +n01318279 +n03002816 +n13155095 +n03141702 +n03775388 +n12165170 +n03322836 +n03259401 +n04471148 +n03911767 +n12585629 +n04317325 +n04257986 +n03133050 +n02035210 +n12891305 +n11882426 +n04491388 +n12948251 +n03498781 +n04262161 +n03775636 +n09915651 +n07584332 +n07852614 +n11626152 +n03901750 +n09723067 +n04265904 +n09920283 +n02397744 +n03253796 +n07712959 +n03898129 +n01743936 +n02075612 +n04560292 +n03479397 +n04334365 +n04357121 +n10145902 +n03844673 +n09854421 +n12687957 +n12598027 +n03944138 +n01839750 +n07722888 +n04258859 +n03088389 +n03351434 +n03509608 +n01677747 +n03145147 +n12046815 +n03505133 +n01629962 +n03333252 +n03993703 +n02962061 +n04529962 +n03463666 +n07681691 +n12160857 +n04187233 +n09331251 +n11614713 +n04376400 +n12301445 +n12633994 +n03883524 +n11614420 +n13062421 +n03645011 +n03293863 +n11640132 +n02579928 +n02854739 +n04461437 +n07729384 +n02977936 +n02836392 +n03593122 +n01666228 +n07820683 +n07568502 +n11910460 +n09348460 +n09712324 +n02403740 +n03482877 +n04370774 +n07750146 +n12992177 +n03152303 +n04134008 +n09805324 +n01611800 +n04374315 +n07586099 +n02032222 +n01979874 +n04350769 +n02907873 +n03016609 +n02543565 +n03256166 +n03016737 +n02419336 +n03268790 +n03559999 +n07765999 +n04607035 +n02416104 +n02123917 +n12484784 +n03225108 +n10739391 +n03506880 +n02918831 +n03045228 +n12516828 +n01314663 +n04172342 +n02768226 +n12368028 +n01500476 +n01558149 +n03604156 +n04035912 +n02359915 +n12261571 +n03875955 +n01887623 +n03871371 +n03390786 +n12494794 +n03826039 +n04465358 +n03838298 +n03165466 +n04229737 +n01321770 +n04354026 +n02998003 +n04114844 +n10611613 +n03600475 +n01909906 +n00466880 +n04284869 +n07722485 +n04496614 +n03298716 +n02285801 +n04081699 +n07765208 +n12659539 +n11618525 +n11757653 +n07727048 +n03913343 +n12070016 +n02697675 +n04284572 +n02595702 +n04482297 +n03516996 +n03704549 +n02040266 +n04476116 +n01323261 +n03823216 +n07696403 +n03226880 +n09734535 +n03950537 +n01671479 +n03049924 +n12593994 +n04568841 +n03604400 +n01837072 +n01754370 +n03122202 +n12338454 +n04094720 +n04150980 +n03429682 +n03884926 +n03378005 +n02434954 +n03461288 +n02893692 +n04472563 +n10472129 +n04590021 +n07739344 +n04162433 +n03395859 +n12059314 +n03498662 +n03678729 +n02927764 +n02770211 +n11710393 +n07730207 +n04178190 +n07772935 +n03801880 +n04414675 +n12729521 +n12203529 +n04122578 +n04575824 +n06267655 +n03698360 +n02804515 +n02431337 +n08598568 +n02893608 +n02270623 +n00479440 +n11616662 +n02884994 +n04305323 +n02407625 +n04476831 +n04222307 +n03179910 +n11623967 +n00446311 +n00454983 +n02886434 +n12279458 +n03723781 +n11816121 +n02403231 +n11808299 +n07816296 +n03219483 +n02657694 +n00453478 +n02816656 +n02625851 +n04112752 +n03339529 +n12171316 +n02044517 +n04137773 +n01486838 +n03015149 +n12911673 +n03967270 +n03498441 +n11672269 +n03386870 +n11615967 +n02580679 +n01681653 +n02793199 +n02824319 +n10727458 +n02555863 +n01533000 +n02175916 +n12064389 +n04383015 +n02469472 +n03101664 +n03623338 +n12295796 +n02869249 +n01792042 +n03447075 +n04453390 +n04382438 +n04112252 +n03332393 +n12729729 +n01851207 +n04269270 +n12333771 +n06272612 +n03135532 +n02927887 +n11711537 +n12301180 +n04107743 +n01813948 +n03282295 +n09714694 +n00483409 +n01504344 +n04279353 +n04040373 +n12658308 +n04134523 +n10104064 +n12056601 +n04525417 +n07819166 +n12263038 +n02072798 +n03125057 +n03367410 +n04000592 +n03549897 +n01877606 +n01564914 +n12307076 +n02855925 +n03176763 +n12271933 +n04121728 +n07690511 +n02825442 +n04442441 +n01630901 +n03088580 +n02499808 +n10675010 +n01531971 +n02273392 +n01526521 +n01531344 +n03667664 +n02888270 +n04412416 +n07733394 +n04559910 +n04105704 +n11792341 +n04201064 +n01693175 +n04555291 +n02908773 +n01976868 +n03529175 +n03365231 +n03622839 +n04258333 +n03327133 +n03425769 +n12477747 +n03718935 +n11727540 +n07933799 +n03030262 +n12043673 +n02619550 +n07937461 +n12198286 +n08560295 +n12402348 +n01733957 +n12344700 +n02763604 +n11925303 +n01557962 +n03927299 +n11611758 +n03035252 +n09454412 +n04004990 +n03456299 +n02175569 +n03668279 +n12352990 +n03507241 +n01534155 +n12278371 +n02499022 +n03822767 +n01318381 +n04024983 +n04277493 +n11934616 +n02027075 +n11611561 +n03454442 +n02236355 +n01732789 +n07722052 +n01489501 +n04409625 +n10563403 +n01817263 +n07757511 +n03770316 +n02977438 +n01840775 +n03607923 +n03322704 +n02375302 +n01614038 +n01646555 +n03952576 +n02946824 +n12847008 +n03016389 +n11809594 +n03165096 +n03839671 +n02687821 +n01689081 +n03822656 +n02597608 +n12336727 +n01579578 +n03631922 +n03904909 +n11658331 +n04224543 +n12621410 +n03870672 +n04252331 +n09720842 +n01396048 +n11988596 +n00483205 +n02871005 +n01597022 +n02382039 +n07743902 +n02358890 +n07877961 +n05263448 +n01862399 +n04136800 +n10624540 +n11990167 +n02731398 +n03366974 +n03490006 +n01561732 +n02626265 +n10627252 +n12402051 +n08517676 +n10488656 +n03099274 +n03718581 +n11806219 +n01830042 +n07728585 +n03732114 +n10755080 +n03359285 +n07720277 +n03354207 +n01596273 +n04416005 +n01847253 +n07733567 +n09725653 +n04274985 +n00449977 +n07772274 +n12063639 +n01530439 +n01322508 +n04397768 +n07273802 +n04261281 +n10524076 +n01678343 +n03410938 +n01797020 +n02388832 +n07719616 +n03639497 +n09787765 +n07721018 +n11818069 +n04185529 +n11644462 +n12074408 +n00483848 +n01583495 +n11891175 +n03347617 +n03308481 +n02535258 +n07750872 +n07748157 +n02855701 +n04584373 +n02461830 +n02912557 +n12277578 +n03604311 +n03643253 +n03031152 +n04039742 +n03435743 +n13908201 +n04150153 +n03250405 +n01410457 +n02357401 +n12588780 +n12729315 +n01690149 +n02538216 +n03171228 +n02424909 +n06274760 +n03775747 +n04211857 +n12429352 +n12272239 +n11759853 +n03401129 +n12649317 +n02625258 +n12651611 +n03603442 +n02803934 +n03861271 +n02605936 +n02018368 +n12711984 +n02811936 +n04612026 +n01339471 +n02923682 +n09194227 +n04346157 +n03939178 +n12635532 +n01593028 +n01793249 +n02380464 +n12400720 +n07708398 +n12020941 +n12492106 +n12850336 +n12749679 +n02892948 +n12591017 +n03193423 +n01791463 +n11979527 +n12134025 +n12167075 +n09308743 +n13108545 +n01618503 +n07827284 +n07724492 +n02338145 +n04533946 +n01586020 +n07598256 +n01603953 +n12646740 +n03067518 +n04046277 +n01532511 +n07769584 +n11644046 +n12753573 +n02681392 +n08492461 +n07749446 +n04409384 +n01791954 +n12330891 +n04560882 +n10145480 +n04250473 +n02655848 +n02903126 +n11736851 +n11901294 +n12865824 +n03870105 +n00449892 +n04240752 +n11851258 +n04200537 +n12049562 +n01521399 +n03565830 +n07860447 +n03067212 +n01664674 +n07561590 +n02727141 +n02324514 +n02372952 +n01584853 +n07766173 +n11811706 +n03097362 +n04200258 +n02732572 +n01853195 +n12282527 +n09838621 +n02764505 +n04256891 +n12337617 +n12635955 +n07831267 +n11628793 +n12316572 +n07807834 +n02037869 +n01821869 +n02820556 +n04517211 +n01839086 +n03842986 +n07698401 +n02386224 +n07841800 +n01830915 +n11616486 +n11902389 +n03427202 +n12727101 +n01851573 +n02125494 +n07746186 +n11628087 +n07746551 +n03943115 +n11892029 +n02861022 +n11733312 +n01852329 +n09392402 +n12336224 +n07887099 +n03403643 +n04414199 +n07895100 +n02264232 +n02317781 +n07823460 +n07755929 +n02524202 +n04324297 +n11627512 +n01585715 +n02922578 +n00479887 +n02687423 +n02416880 +n11784126 +n12073991 +n01853870 +n01561452 +n04187970 +n10300154 +n02520147 +n12294124 +n07743224 +n12066018 +n11634736 +n02041678 +n11626585 +n02386141 +n03986949 +n07860331 +n12356023 +n12072722 +n03082280 +n12083113 +n12979829 +n01448594 +n03007444 +n07858978 +n01641739 +n02043333 +n12020736 +n02751215 +n04528079 +n01538200 +n07925608 +n12091550 +n03742019 +n03518305 +n01642539 +n03414029 +n04363991 +n03767966 +n02596067 +n01586374 +n02885882 +n04080138 +n11617631 +n02033779 +n09451237 +n02310585 +n12648045 +n03955489 +n01752736 +n07899899 +n02299505 +n01579410 +n02156871 +n02998841 +n03759661 +n02050809 +n02683454 +n11621950 +n02910145 +n04967801 +n07896661 +n11906917 +n12275675 +n11611233 +n07736692 +n02312640 +n12588320 +n04399537 +n12757303 +n04197781 +n12717224 +n11635152 +n03122295 +n01792955 +n13133932 +n02518324 +n01584695 +n02915904 +n02967294 +n04345201 +n03019434 +n02470238 +n03049782 +n03101517 +n12709688 +n03716887 +n02422391 +n12638753 +n00288384 +n02162561 +n02053584 +n01317294 +n03334291 +n07814634 +n12273768 +n12406715 +n11644226 +n01646802 +n03460147 +n12338796 +n01972541 +n02147947 +n03890093 +n04127395 +n01581984 +n01681328 +n02213239 +n04582869 +n03254189 +n03274265 +n03186285 +n11839823 +n01624833 +n09792969 +n07891189 +n12023726 +n07619208 +n03466600 +n01849676 +n12190869 +n03079136 +n12317296 +n13001930 +n00477639 +n02944459 +n03903733 +n04131208 +n12710295 +n12180885 +n11612349 +n03443149 +n03982331 +n04264765 +n12642090 +n03237416 +n13868944 +n04046400 +n11705171 +n11979715 +n12597134 +n01609956 +n01568294 +n01469103 +n00443692 +n01606672 +n04556408 +n07690019 +n03977592 +n03358726 +n12696492 +n01573240 +n11632619 +n01772664 +n03453231 +n04179712 +n03646020 +n01812662 +n04306592 +n07724654 +n13908580 +n02903852 +n04284438 +n13132656 +n04317063 +n07829248 +n01589718 +n02654745 +n12294331 +n12515925 +n07900825 +n07721195 +n04189282 +n11907689 +n01624537 +n12333530 +n07762244 +n11757851 +n01599159 +n04038338 +n01568892 +n12691661 +n09744834 +n04307767 +n03120778 +n07920540 +n03781683 +n04185804 +n12080820 +n04354182 +n07574426 +n02579303 +n03046802 +n12078172 +n03210245 +n01614556 +n02304432 +n07713267 +n09724656 +n02861147 +n12755387 +n01483830 +n12921868 +n12026018 +n07817871 +n12062781 +n04241573 +n11621727 +n03376159 +n11815721 +n13007034 +n03540090 +n00450866 +n11619455 +n01528845 +n01568720 +n12743352 +n02871314 +n03606251 +n01490670 +n04246060 +n02053425 +n10780284 +n01915700 +n04510706 +n00456465 +n01563945 +n11809094 +n09855433 +n04112579 +n03855333 +n09809925 +n03413684 +n02123478 +n12070712 +n03651843 +n02032355 +n01591005 +n01646648 +n02752615 +n02415829 +n03283221 +n04368496 +n01573360 +n02321170 +n10348526 +n04446844 +n07763792 +n12077944 +n04431025 +n02895438 +n10082687 +n07714188 +n02262449 +n03090172 +n12491017 +n01558461 +n12754781 +n04070415 +n04297098 +n03424862 +n01970164 +n09833536 +n01793435 +n01670535 +n09894445 +n09676247 +n01548492 +n12501202 +n03250089 +n03358380 +n02578928 +n12020184 +n02301935 +n03393017 +n12340755 +n01849863 +n01748906 +n03075946 +n01810268 +n01984245 +n04555400 +n12286988 +n04097760 +n02050586 +n12104238 +n01679962 +n02709101 +n01569060 +n12790430 +n01757901 +n13199717 +n11815918 +n07827410 +n02970534 +n12942572 +n07924276 +n04103918 +n11704093 +n07908647 +n07601686 +n12172906 +n04084889 +n02381261 +n02299157 +n11978713 +n12460957 +n02963503 +n03272810 +n12469517 +n03443005 +n01797307 +n02952237 +n11908549 +n13912540 +n03428226 +n10276477 +n01757343 +n01443243 +n01607600 +n03580518 +n12709103 +n07579688 +n04329834 +n12710415 +n11808932 +n10583790 +n02213788 +n11622184 +n12596709 +n02216211 +n07721942 +n07765361 +n01848453 +n11724109 +n02028451 +n02935017 +n12046028 +n10629939 +n00441073 +n07900958 +n12451399 +n02823964 +n04210120 +n01848840 +n10485883 +n07767709 +n02432704 +n11622591 +n03210372 +n07848196 +n11992806 +n02953197 +n07620689 +n01521756 +n03571625 +n03158186 +n12647560 +n02065407 +n01572782 +n09890749 +n05581932 +n07754451 +n03350204 +n13044375 +n12294723 +n12482893 +n04434531 +n12989938 +n12196336 +n01701859 +n07746334 +n11941924 +n02047411 +n12650379 +n10486166 +n01599556 +n01567879 +n12675876 +n01682435 +n02043808 +n12362668 +n12306089 +n02999138 +n01679626 +n03557270 +n01546039 +n11901759 +n01549053 +n11883328 +n06596727 +n03193107 +n11612018 +n03300443 +n03612010 +n03668488 +n12648888 +n01448291 +n11632167 +n10262445 +n09742101 +n09717233 +n04299370 +n03094159 +n04536595 +n03514693 +n02029706 +n02886321 +n07816052 +n04045255 +n01851731 +n02627292 +n01841288 +n02739889 +n02932693 +n03784896 +n04569063 +n07902799 +n03863108 +n02607470 +n13200651 +n07916183 +n01573898 +n04347119 +n10076604 +n13033577 +n01824035 +n03630262 +n04426316 +n03064250 +n12262018 +n12048399 +n12279772 +n04143140 +n07829331 +n12891643 +n01826680 +n12646605 +n13103877 +n02023855 +n03086868 +n04163530 +n03736470 +n04358117 +n13872822 +n03159640 +n01680655 +n11611087 +n03980478 +n02978478 +n01555004 +n12402840 +n07763987 +n04387706 +n04979002 +n03258330 +n09856671 +n11624192 +n01538059 +n02003839 +n12552309 +n10469874 +n01576076 +n03643149 +n04419868 +n04586581 +n00483508 +n03131967 +n01847407 +n07929172 +n09683757 +n03786621 +n04369282 +n12733870 +n11612575 +n11619227 +n03301833 +n02176439 +n01569971 +n07935043 +n02563792 +n02051059 +n04482177 +n11859472 +n11710136 +n04115144 +n07864934 +n07691758 +n02620167 +n07748276 +n03415486 +n07835921 +n00452152 +n01848323 +n12906214 +n12075010 +n01563449 +n01499396 +n01570267 +n12047345 +n07920989 +n07601572 +n02683558 +n04428634 +n04345028 +n12161969 +n03460040 +n02561514 +n02006364 +n03582959 +n11812910 +n13185269 +n04297847 +n07896165 +n01552813 +n12361946 +n02031585 +n12766595 +n11622368 +n11695599 +n11615387 +n02509197 +n12409470 +n01314388 +n11758799 +n09846469 +n02675219 +n04253057 +n04041243 +n12276628 +n04381724 +n01855188 +n02203152 +n04403925 +n11895092 +n11924849 +n04172904 +n11888800 +n01546506 +n07906718 +n01489920 +n03436417 +n03615655 +n07765073 +n02434190 +n02004492 +n12282235 +n12406488 +n11981192 +n10373390 +n13183056 +n04332074 +n12818346 +n07731006 +n02598573 +n02438580 +n01957335 +n03356982 +n10288964 +n02629230 +n02042759 +n12319414 +n01451426 +n03521675 +n02016066 +n01813532 +n13207335 +n11805544 +n04401828 +n02952109 +n03963294 +n10013811 +n12058630 +n01551711 +n01574560 +n01858780 +n10093818 +n03858183 +n01550172 +n03571280 +n02309242 +n10258786 +n01569423 +n10134178 +n08578517 +n04445327 +n03250279 +n02584449 +n03223553 +n04523831 +n04485423 +n02050442 +n04474035 +n04528968 +n02649546 +n01913166 +n09971273 +n04517408 +n02437482 +n03824713 +n03778817 +n07643026 +n01613177 +n12022054 +n07714448 +n07592768 +n00454493 +n03296328 +n02305929 +n03084834 +n03698815 +n12093600 +n08649711 +n03466493 +n04067658 +n03041114 +n03514451 +n01491006 +n04178329 +n03790953 +n03938401 +n02048115 +n07768858 +n03273740 +n10333601 +n05418717 +n12754003 +n02098806 +n03314608 +n01565930 +n12113195 +n12284821 +n12483427 +n04332580 +n10382710 +n03416094 +n02837887 +n03917198 +n14131950 +n04414476 +n11861641 +n11903671 +n01841441 +n09872066 +n01806467 +n04964799 +n00467320 +n01595974 +n03220692 +n01339083 +n01825278 +n11727358 +n04518343 +n11984144 +n07724269 +n02292692 +n02324850 +n01753032 +n01624115 +n11816649 +n07930062 +n02460451 +n12319204 +n04340521 +n12325234 +n01541102 +n02979836 +n00141669 +n01822300 +n11658544 +n12272883 +n03334382 +n11726707 +n03639077 +n07904934 +n03516367 +n03698723 +n03553248 +n11812094 +n03724417 +n01540566 +n02341974 +n11819912 +n07734555 +n02987379 +n03580845 +n12546962 +n02548247 +n12753245 +n07768423 +n12849279 +n11617090 +n02912894 +n07840027 +n12295033 +n12703383 +n02696165 +n10419785 +n04426427 +n03694639 +n11712282 +n04142999 +n01597737 +n03801533 +n01495493 +n07774719 +n03267113 +n01742821 +n03859170 +n03416640 +n03320959 +n12733218 +n02017725 +n13229543 +n09344324 +n04965451 +n01490112 +n10069296 +n12084555 +n04554406 +n04086446 +n02976249 +n02656032 +n02424486 +n02381609 +n09934337 +n04573937 +n07685399 +n02800497 +n02905152 +n02951703 +n07760153 +n03609397 +n00447463 +n03680512 +n02046939 +n03288886 +n11870418 +n03386544 +n07767171 +n07847453 +n12687044 +n01664492 +n03099147 +n03463381 +n02125081 +n12920204 +n03517647 +n02603540 +n12267411 +n11933546 +n11947802 +n04387095 +n12975804 +n02973904 +n13195341 +n04048441 +n11753143 +n03212114 +n03298858 +n04366116 +n01424420 +n10450161 +n01442972 +n07877299 +n04503593 +n04349306 +n12969425 +n12597466 +n03092656 +n07914995 +n03487886 +n12223569 +n01756733 +n13919919 +n04175147 +n02029087 +n03530511 +n02425887 +n03572107 +n03927539 +n03383099 +n04130907 +n01632601 +n07823105 +n10378026 +n02382850 +n07613266 +n03235180 +n02810782 +n12708654 +n11636835 +n02823124 +n03402941 +n12121610 +n03715114 +n04052658 +n00480366 +n12493208 +n04255163 +n12145477 +n01489709 +n12402596 +n01598074 +n03837606 +n02628062 +n04103364 +n03247083 +n02032480 +n07736256 +n12578916 +n09218315 +n02218371 +n03730334 +n02080146 +n03836906 +n02868638 +n02198859 +n12744387 +n02942460 +n11754893 +n12274358 +n02725872 +n09218494 +n03942920 +n07574780 +n02921756 +n01757115 +n02763306 +n11758122 +n10508141 +n02303284 +n04083800 +n13879049 +n12765115 +n12075830 +n02666943 +n11980318 +n07907037 +n12794135 +n02333909 +n03870980 +n07586718 +n11923174 +n10782471 +n01493146 +n12294871 +n11726269 +n12932173 +n07825972 +n12732009 +n03572321 +n07682197 +n03423306 +n12495895 +n03545756 +n03557692 +n03785237 +n07902937 +n09899671 +n12061614 +n07902443 +n01449374 +n12632335 +n03474896 +n03539433 +n04310904 +n03902482 +n12006930 +n03285578 +n04200000 +n03912218 +n07821260 +n03548626 +n03223686 +n11826198 +n03165616 +n02104280 +n09981278 +n09382099 +n03732458 +n03987990 +n09946814 +n12270741 +n07737745 +n04172776 +n10189278 +n03543012 +n12629666 +n02180875 +n04087432 +n12961879 +n03321954 +n12528549 +n02424085 +n09843443 +n03846677 +n12304703 +n09873473 +n03410571 +n03041810 +n02425228 +n01562451 +n03615790 +n10081204 +n03985881 +n07842130 +n02890513 +n03649797 +n02381004 +n12560621 +n12523475 +n07687626 +n11905749 +n11759404 +n12905412 +n03542605 +n03983612 +n12573474 +n11972291 +n03767459 +n02698634 +n12713866 +n13084834 +n02202006 +n13108323 +n02631475 +n10737103 +n03637898 +n03069752 +n12400489 +n09692915 +n10242328 +n02794664 +n12465557 +n12085267 +n03348868 +n12754981 +n02745611 +n10504206 +n12073554 +n02835724 +n04605572 +n02825961 +n03528523 +n12116429 +n02973805 +n12708941 +n01544704 +n04180229 +n09403211 +n08242223 +n02146371 +n12127768 +n09770359 +n03295246 +n01757677 +n04385799 +n02584145 +n07909593 +n12587132 +n13029326 +n04184316 +n07903643 +n01848555 +n10750031 +n02332156 +n12703557 +n03196990 +n12406902 +n02768973 +n12416073 +n02147591 +n09724533 +n09693982 +n12687462 +n01982068 +n03435991 +n03272125 +n07713763 +n03018712 +n03648431 +n03336575 +n07854184 +n12806015 +n07879174 +n03984643 +n03147280 +n02699915 +n07617708 +n01533651 +n12483841 +n01697611 +n02576906 +n03724066 +n03935116 +n09782397 +n01599269 +n10672371 +n12066630 +n03178674 +n15086247 +n03523987 +n02826068 +n12580654 +n02358390 +n01647640 +n10259997 +n03738066 +n13915023 +n02639605 +n03174450 +n12269406 +n09874428 +n03432061 +n04386051 +n03923918 +n04592465 +n12480456 +n10333439 +n04206790 +n01443831 +n02967626 +n07733712 +n03746155 +n12947313 +n11690254 +n12244650 +n12670758 +n08658309 +n12710693 +n11860555 +n03485198 +n03047799 +n04461570 +n07600177 +n02126640 +n12704343 +n02866386 +n03008976 +n04532831 +n03465426 +n12691428 +n01641206 +n04962062 +n03254046 +n04425804 +n02014524 +n03439348 +n02538010 +n11603246 +n12265600 +n12277800 +n04016240 +n12086192 +n09650729 +n01549641 +n03112719 +n04961062 +n02710324 +n12049282 +n12362274 +n11969607 +n12856680 +n02201000 +n07863802 +n03360622 +n07601809 +n04354487 +n12898774 +n12939282 +n03109693 +n12867826 +n12441390 +n12915811 +n12879527 +n04137355 +n04131368 +n03527149 +n10164492 +n09932508 +n12426623 +n12575812 +n02557318 +n10263790 +n04309548 +n00476235 +n04194127 +n11876634 +n10327987 +n03499354 +n02616851 +n04464615 +n03615406 +n02744844 +n11732567 +n10347446 +n09752519 +n04228215 +n10004718 +n07899533 +n12030908 +n15102894 +n12044467 +n11711764 +n02610066 +n03415749 +n04562496 +n02034295 +n02297442 +n03566193 +n12506991 +n07774842 +n12827270 +n14908027 +n12242409 +n04072960 +n02829596 +n12496427 +n02266050 +n13108481 +n12473840 +n08677424 +n12076223 +n15091473 +n02815749 +n04549028 +n12558425 +n12023407 +n04179824 +n02378541 +n03188725 +n12517445 +n07573347 +n02004131 +n11921395 +n12570972 +n10602470 +n12095647 +n03854421 +n02450295 +n02792409 +n03543735 +n12836337 +n12204175 +n12152722 +n07900734 +n12517642 +n02775039 +n12607456 +n03376938 +n12179122 +n09873348 +n01847978 +n07888816 +n10453184 +n09675922 +n01851895 +n12865562 +n01797601 +n03711044 +n02738859 +n12064591 +n04033425 +n08551296 +n01650690 +n01537895 +n04207151 +n10087434 +n12261808 +n09438844 +n10364198 +n01814755 +n01583209 +n12270946 +n11892817 +n03344642 +n04117464 +n07847917 +n04003241 +n10362319 +n10477713 +n03495570 +n07560542 +n04363777 +n04534359 +n02404906 +n03349892 +n07712267 +n02960352 +n07866277 +n07857170 +n00324978 +n02755823 +n03150511 +n04211528 +n01899894 +n07588299 +n11874081 +n03425325 +n04506506 +n11949402 +n02952374 +n03309110 +n12159388 +n07591049 +n03068998 +n03228254 +n10279018 +n04173046 +n07728053 +n13052931 +n01597906 +n12368451 +n02767665 +n09435739 +n03915900 +n09728285 +n03292603 +n03331077 +n07817160 +n07917392 +n12540250 +n04153025 +n10209082 +n03968581 +n12676534 +n11824146 +n03521899 +n01853666 +n04292921 +n12332030 +n03984759 +n02863014 +n07801091 +n07723177 +n03289660 +n01533481 +n04488202 +n03468821 +n02382338 +n03543254 +n01961985 +n07915918 +n03703862 +n02771004 +n02047045 +n03877674 +n13141415 +n03529629 +n02240517 +n03675235 +n04491638 +n12384037 +n04419642 +n03019685 +n07591586 +n04496726 +n12985420 +n12927013 +n12196694 +n03473227 +n11621547 +n02988066 +n10451450 +n07729828 +n09618760 +n12196527 +n01555305 +n12830222 +n11950877 +n13190747 +n12160303 +n12390099 +n02818135 +n03163381 +n04554211 +n03244919 +n07897975 +n03386726 +n04290615 +n02011281 +n12407890 +n04123448 +n07904865 +n03447358 +n02393940 +n07931870 +n02937958 +n04318787 +n04587327 +n12807409 +n04112430 +n07560193 +n12774299 +n02618827 +n07854982 +n03757604 +n03817191 +n12793494 +n02324431 +n03013850 +n04113641 +n01612476 +n03127408 +n02038466 +n03799876 +n04257684 +n03382292 +n10449664 +n04394630 +n10275395 +n07698250 +n12329473 +n07694659 +n07642742 +n02563648 +n08583455 +n02557182 +n02775178 +n09274152 +n03189083 +n12570703 +n04211219 +n12486574 +n03073694 +n11969166 +n02475078 +n02976350 +n08584914 +n07899660 +n10116702 +n01613807 +n12461109 +n04025508 +n12451240 +n12596849 +n12079963 +n03541269 +n04561422 +n11699442 +n07725255 +n03460297 +n07616748 +n12757458 +n03103563 +n02813752 +n07698782 +n12840362 +n01543632 +n01602832 +n01875313 +n12472024 +n02926591 +n02872333 +n10728624 +n12532564 +n03882960 +n12333053 +n03684224 +n13146583 +n03436075 +n04154340 +n03868643 +n02598878 +n04139140 +n03266371 +n04083309 +n12506341 +n12200143 +n03503477 +n12807773 +n03123917 +n13029760 +n10173771 +n03659809 +n12047884 +n12759273 +n04193377 +n04258438 +n04597400 +n04579986 +n03719743 +n04299963 +n02864504 +n10510245 +n03417970 +n09719794 +n03138344 +n02085272 +n07694516 +n12665857 +n01642257 +n03229244 +n10581890 +n10318293 +n03635108 +n10652605 +n12189429 +n09934774 +n11709205 +n04207903 +n10296176 +n10603851 +n03450734 +n13223588 +n12754648 +n09886403 +n07751280 +n11950686 +n07814390 +n12799776 +n01646902 +n09796809 +n12819728 +n01938454 +n02410011 +n07607138 +n02119634 +n10332861 +n09230202 +n02757061 +n02849885 +n15092227 +n12151615 +n03111041 +n02413050 +n03506560 +n07744057 +n04030518 +n12544539 +n04089836 +n02038993 +n13882201 +n12099342 +n01946630 +n10095769 +n02982416 +n12957924 +n13215586 +n07726525 +n12452836 +n03801671 +n04598318 +n01449712 +n12428747 +n04119751 +n10509063 +n07694839 +n02782602 +n11626409 +n02573704 +n12399384 +n12388989 +n01601068 +n11971406 +n04367011 +n07930315 +n12925179 +n04967674 +n03497352 +n03653833 +n01819465 +n03688192 +n02802990 +n03393761 +n04430475 +n13107694 +n10384496 +n07867164 +n12449526 +n01515303 +n12574320 +n01444339 +n07919310 +n03453443 +n04173907 +n02887489 +n07772788 +n03629520 +n02580830 +n11705387 +n12069679 +n01956344 +n02406533 +n03973402 +n03938037 +n04969952 +n04103094 +n04393808 +n07715407 +n04172107 +n01917882 +n12085664 +n07608429 +n09835230 +n04135024 +n07842605 +n12568186 +n04339879 +n07691091 +n01801876 +n00474568 +n01807105 +n12128071 +n01673282 +n11948864 +n03991837 +n09659188 +n02070174 +n02670683 +n12454949 +n10385566 +n11631854 +n12305293 +n12002428 +n12948495 +n12757816 +n11852028 +n10690648 +n09283866 +n03214582 +n03423877 +n04127521 +n03006626 +n09283193 +n07712559 +n01447331 +n02981321 +n02658531 +n11947629 +n02419634 +n02420828 +n11923637 +n12570394 +n11968931 +n12731029 +n09749386 +n07736813 +n03967396 +n11908846 +n03029445 +n02426481 +n01964271 +n13198914 +n04484432 +n12656685 +n10806113 +n11849983 +n03236423 +n10649197 +n07688624 +n03057541 +n12015221 +n02094931 +n02014237 +n07560331 +n02801450 +n04206570 +n07556406 +n11627908 +n11889619 +n07852229 +n04063154 +n02713364 +n02783459 +n12877838 +n02930214 +n02125010 +n02407276 +n07815424 +n12855494 +n12530818 +n07750449 +n01963317 +n10082997 +n03245724 +n03012013 +n03555006 +n02421136 +n03332989 +n04375405 +n03746486 +n12636224 +n03278914 +n07917133 +n12504783 +n09416890 +n03896526 +n02258198 +n12983048 +n03837698 +n12869061 +n04541987 +n01637615 +n04401949 +n02241426 +n13220122 +n07876651 +n03729308 +n02364840 +n01339801 +n03418915 +n09257843 +n11614039 +n09731343 +n03809603 +n05399243 +n01569262 +n11901597 +n03124474 +n01566207 +n03796522 +n12595699 +n04573281 +n09689435 +n11859737 +n03201529 +n12902662 +n03374372 +n03760944 +n09189157 +n01517966 +n10431625 +n02898269 +n03693707 +n04369025 +n07834618 +n04095342 +n02786331 +n03822504 +n02284611 +n09862621 +n03436891 +n07688898 +n12435777 +n03949317 +n12443323 +n12273114 +n12623077 +n04333869 +n07907831 +n07774596 +n05450617 +n03320262 +n04190376 +n12671651 +n11819509 +n07588111 +n09756049 +n07611046 +n04973291 +n11602873 +n00120010 +n03500699 +n03844815 +n03708843 +n04452528 +n04387261 +n09889065 +n10147121 +n03318294 +n12599435 +n04164406 +n01965529 +n11636204 +n11791569 +n12275131 +n02977330 +n07851443 +n04132603 +n07824191 +n09760609 +n12190410 +n07915491 +n12665271 +n10120671 +n02570164 +n10208950 +n02163297 +n02244797 +n09842528 +n08645104 +n01841679 +n11603835 +n04488857 +n07814487 +n01953762 +n04612373 +n11877193 +n03198500 +n03981924 +n01943087 +n11552806 +n04414909 +n03005033 +n02457945 +n10500217 +n10375314 +n04607242 +n07914777 +n09832456 +n12915568 +n12813189 +n10578021 +n03519081 +n07801779 +n12026476 +n03296081 +n03850492 +n07902121 +n09881265 +n12562785 +n03290195 +n10131151 +n10078719 +n01558765 +n03917814 +n02045596 +n07734183 +n03414676 +n07933154 +n02126787 +n12148757 +n12263987 +n07684164 +n03406966 +n01492569 +n02988963 +n12963628 +n09964202 +n03417749 +n01854838 +n02921029 +n02183096 +n11762433 +n11722466 +n02387093 +n02768655 +n12519089 +n09871229 +n07938313 +n10502329 +n11989393 +n03768916 +n13145040 +n11813077 +n04457910 +n03655720 +n03703945 +n11876803 +n01438581 +n07910379 +n07847827 +n02300797 +n09245515 +n10754189 +n04581102 +n12513172 +n02458135 +n03762332 +n11789589 +n09695620 +n03850053 +n07911249 +n12342852 +n12753007 +n07748574 +n07727458 +n03696568 +n04304680 +n07723039 +n07775197 +n07577144 +n03043693 +n04374735 +n01858281 +n09228055 +n09466678 +n01949085 +n02024479 +n11623815 +n02704645 +n07894451 +n01751472 +n01646388 +n01317916 +n13880994 +n10300500 +n11794024 +n03735963 +n04610274 +n11854479 +n07754894 +n02639087 +n02122510 +n02262803 +n12732966 +n04529108 +n13194036 +n09990777 +n10009276 +n12088223 +n12155009 +n07886176 +n04278247 +n04222723 +n11707229 +n01999186 +n07851641 +n12741792 +n01315213 +n10033412 +n04249582 +n03586631 +n03237839 +n12037499 +n12014085 +n07756325 +n01636352 +n03905947 +n08611339 +n07693590 +n03724538 +n09791816 +n01666585 +n10588965 +n11613219 +n10542608 +n12913791 +n10528023 +n03171635 +n11923397 +n12854600 +n10410246 +n12698598 +n04135118 +n09844457 +n04441790 +n03882611 +n02337001 +n07907342 +n12561169 +n12027658 +n10719132 +n09851165 +n02801823 +n12330587 +n01683558 +n12162181 +n04387932 +n11704620 +n09679170 +n07601290 +n04028221 +n10277027 +n09877750 +n11758483 +n10027246 +n03819336 +n10205231 +n12478768 +n03451711 +n12973443 +n01923025 +n03262717 +n07807594 +n00475535 +n07744430 +n02341475 +n04614655 +n07924747 +n03388323 +n12680402 +n03202940 +n04534520 +n09661873 +n15092059 +n11832480 +n04198355 +n12529220 +n12389130 +n12304115 +n03234952 +n07610620 +n02868975 +n04442741 +n05282652 +n02820675 +n12795352 +n12675299 +n08547468 +n04189651 +n04141198 +n04513998 +n12273939 +n12482668 +n12858618 +n01958346 +n03172038 +n10280674 +n04301760 +n02631330 +n12433178 +n07763107 +n03068181 +n07565259 +n03605598 +n13177884 +n04005197 +n09751496 +n12737383 +n07648997 +n09839702 +n09442595 +n07925229 +n12150722 +n11898775 +n09904208 +n02207345 +n07642361 +n07685918 +n03205458 +n10574538 +n09742315 +n02599557 +n03585682 +n04273659 +n02200850 +n03410740 +n03391301 +n07726672 +n09782167 +n13155305 +n02067240 +n07561848 +n07728708 +n12463134 +n12228229 +n09743487 +n12225563 +n03421669 +n03226375 +n03973945 +n12498055 +n04483925 +n04564278 +n11890150 +n12519563 +n12754468 +n04353573 +n11615607 +n04430896 +n04585128 +n10395828 +n10773665 +n02772435 +n09881895 +n12663023 +n01615303 +n12803754 +n09445008 +n03955296 +n05245192 +n05486510 +n07899769 +n07575510 +n02307681 +n03814817 +n02670186 +n03598515 +n12797860 +n03518135 +n07587962 +n12630763 +n06273743 +n09843824 +n03226254 +n12407222 +n02961544 +n12951835 +n06417096 +n02016659 +n01441117 +n07735404 +n09411189 +n13896217 +n03262248 +n03451120 +n02525382 +n03375329 +n04155068 +n12916179 +n10297234 +n11907100 +n03423568 +n04360914 +n12027222 +n12199790 +n01744270 +n09896401 +n07925116 +n03693860 +n04414319 +n07767549 +n03555564 +n04043411 +n07872593 +n03774461 +n03129471 +n04497801 +n11756870 +n09776346 +n04530283 +n01520576 +n12828220 +n01583828 +n04120842 +n09676021 +n04344734 +n01916388 +n12513613 +n09861863 +n02310334 +n03318983 +n04533499 +n02427576 +n12727518 +n04502059 +n11725480 +n11987126 +n11876204 +n03504205 +n09720595 +n12315999 +n12935609 +n04452757 +n12201331 +n01603152 +n10772092 +n03156279 +n12723610 +n02003037 +n03244775 +n07802963 +n11954642 +n07770034 +n09931165 +n10559508 +n01745902 +n07654148 +n10070108 +n01585287 +n13196003 +n04389718 +n10253122 +n03730893 +n02983357 +n02783900 +n01680813 +n03072440 +n03109253 +n03274435 +n11655974 +n10048612 +n07849733 +n07896994 +n03792334 +n03035832 +n03819448 +n03105088 +n11943992 +n01485479 +n01699675 +n11795049 +n12086778 +n01840120 +n07753980 +n10685398 +n04346428 +n04532398 +n07709172 +n02146700 +n09461069 +n03853924 +n01321456 +n12068432 +n09757449 +n03206282 +n03751757 +n13053608 +n11695974 +n12123741 +n03500209 +n04367371 +n02890940 +n01917611 +n07835331 +n02907656 +n04136045 +n12059625 +n03862862 +n12864160 +n00440039 +n03448590 +n12628986 +n04115802 +n03949145 +n12916511 +n12647893 +n09706255 +n13181811 +n07752109 +n04375615 +n01648620 +n04403524 +n09967967 +n12911079 +n03857687 +n02803539 +n01551080 +n10734891 +n13235159 +n04127633 +n07935878 +n12853482 +n10191001 +n03126385 +n10076224 +n01812866 +n12919403 +n03769610 +n09283767 +n03462110 +n11770256 +n12038898 +n09889170 +n11894558 +n10298647 +n02592055 +n02795670 +n11701066 +n12762049 +n02890662 +n07918193 +n02976455 +n03100897 +n13127843 +n12184912 +n00468299 +n12407079 +n12496949 +n03541537 +n05260127 +n01535140 +n01541760 +n11945783 +n07687053 +n07745046 +n12083847 +n02382132 +n12270027 +n10140597 +n03788914 +n01790711 +n02197689 +n03173270 +n10368624 +n04449290 +n01579729 +n07834872 +n07734417 +n02379630 +n01636829 +n12549192 +n12951146 +n13579829 +n03268142 +n11761202 +n02769669 +n09452760 +n04095577 +n12031139 +n02003577 +n12891469 +n03931885 +n01577941 +n04176295 +n12046428 +n03418402 +n13145250 +n11865874 +n12473608 +n11797321 +n01798168 +n09923186 +n02786736 +n01698782 +n09976283 +n03975788 +n14685296 +n01682172 +n07838441 +n02771286 +n03429137 +n03948950 +n02512830 +n02298218 +n10141364 +n02823848 +n02077384 +n12584715 +n11748811 +n02214773 +n03667552 +n04121426 +n04135710 +n07579917 +n12275888 +n07826453 +n12167436 +n04586072 +n09877288 +n04248396 +n02761696 +n03038870 +n01490360 +n12353203 +n09785891 +n12057660 +n04146343 +n12557556 +n02081798 +n02917964 +n07898617 +n12597798 +n07574176 +n07764630 +n03008177 +n04255899 +n04434207 +n07897600 +n09929577 +n11811921 +n12415595 +n02893941 +n12276110 +n02821202 +n09690621 +n02508742 +n02077787 +n02390640 +n03764822 +n02257985 +n13033134 +n04559166 +n07865196 +n10506915 +n12051103 +n10473917 +n12775919 +n02971579 +n12880462 +n11837970 +n02063662 +n09840520 +n12019827 +n09208496 +n12836508 +n02982232 +n04219185 +n03332005 +n07914128 +n07862461 +n04250692 +n09267854 +n04561734 +n02076402 +n12344837 +n02919148 +n06592281 +n03668803 +n03062985 +n04246731 +n12112609 +n04012482 +n03558633 +n03982642 +n01998741 +n07665438 +n04209509 +n07913882 +n01749244 +n07801342 +n02611561 +n04488742 +n01897536 +n10624437 +n13128976 +n07931612 +n04300643 +n03727067 +n03360431 +n07593471 +n10253296 +n03297226 +n03854506 +n07879450 +n10562283 +n12557438 +n13154388 +n12862512 +n02126028 +n07752514 +n02387887 +n12066261 +n07666176 +n02806530 +n09988703 +n03721252 +n03221540 +n12195533 +n02682569 +n03622058 +n03943266 +n04207596 +n11721337 +n02427032 +n07910152 +n01551300 +n12861345 +n11660300 +n03786313 +n12966945 +n02046171 +n02797535 +n03546112 +n07711232 +n02044908 +n02998563 +n02652132 +n12634986 +n12187247 +n11645590 +n07582892 +n03065243 +n09911226 +n04396902 +n10763075 +n02359047 +n10400108 +n04294614 +n03991646 +n11728945 +n07766891 +n12277150 +n13141564 +n10563314 +n12426749 +n07827750 +n12403994 +n12627119 +n03420801 +n10203949 +n12830568 +n12280060 +n13180875 +n12659064 +n04239436 +n03823312 +n04367746 +n12448700 +n01896844 +n07581931 +n09384106 +n11625223 +n04198722 +n01477875 +n09932336 +n03477512 +n12281974 +n10117739 +n07759194 +n12281788 +n01405007 +n03077616 +n02304036 +n12947544 +n03140126 +n12356960 +n07807002 +n07877849 +n02956795 +n04373795 +n07925500 +n10359546 +n09730077 +n01694955 +n10611267 +n04316498 +n07849912 +n12841354 +n07903543 +n10026976 +n04050313 +n03939844 +n03260849 +n07917507 +n12228387 +n03199775 +n01569566 +n02403920 +n04261638 +n02986160 +n03724623 +n01960177 +n03783430 +n07877675 +n10401639 +n04215153 +n03077741 +n02589623 +n12934985 +n03233624 +n04506688 +n12194147 +n09975425 +n07818825 +n12641007 +n10036692 +n02771750 +n12285900 +n01472303 +n10033663 +n10707134 +n03219966 +n11772879 +n10146416 +n10435169 +n10304086 +n12385566 +n03126580 +n12904314 +n03619196 +n02299846 +n03574243 +n12368257 +n03690473 +n01748686 +n09834378 +n07750736 +n02930645 +n01679307 +n03721047 +n02710044 +n07563207 +n02930080 +n09309168 +n03127203 +n02863536 +n02536165 +n01559639 +n09654518 +n02961035 +n12007406 +n12773651 +n04351699 +n03114504 +n06273414 +n02017475 +n01733466 +n02175014 +n07920663 +n03953901 +n09670521 +n09400987 +n11791341 +n02284884 +n12919646 +n07880325 +n03801353 +n01982347 +n07828642 +n01570421 +n03998333 +n03449309 +n10482220 +n12850906 +n12805561 +n12926689 +n03232543 +n04248851 +n03195959 +n04082562 +n03846100 +n07682952 +n07695652 +n11809271 +n09895561 +n04287898 +n09740724 +n02859955 +n09830400 +n03674731 +n02825153 +n04571686 +n13107891 +n10318607 +n07848093 +n13226871 +n08555710 +n03137473 +n02776978 +n03141455 +n12514138 +n01809371 +n09405078 +n01753180 +n02184473 +n11610215 +n03539546 +n12731835 +n04485884 +n03590588 +n10221312 +n04049753 +n03441345 +n02302244 +n12262185 +n15092650 +n11877646 +n10377185 +n01684578 +n03796605 +n07897116 +n03164344 +n12135049 +n10757050 +n01692523 +n04566756 +n07697699 +n07575392 +n10262655 +n04064747 +n07914006 +n12433769 +n07873348 +n04457767 +n10019072 +n02921195 +n03856465 +n04041408 +n12639584 +n12920955 +n11781176 +n07864756 +n03941013 +n03646148 +n04401578 +n11692792 +n02757714 +n02286089 +n04253168 +n03890514 +n07855510 +n03507458 +n04123026 +n11661909 +n12435152 +n04330746 +n09481120 +n03731019 +n03717285 +n03271030 +n02772101 +n07740597 +n02847852 +n12825497 +n12263738 +n03342262 +n03603594 +n07804543 +n12932365 +n12695975 +n10297531 +n04054670 +n03175081 +n12703856 +n03832144 +n03966206 +n02414290 +n03619275 +n09738121 +n03290096 +n10585077 +n07731767 +n12409840 +n12026981 +n02278980 +n02752810 +n01654637 +n02654112 +n10314836 +n13023134 +n01823414 +n07461050 +n11902982 +n04543636 +n02204907 +n04049585 +n12304899 +n03073545 +n04272928 +n10315456 +n03975657 +n09899782 +n12288005 +n07005523 +n03795269 +n09823832 +n02242137 +n02907391 +n03643491 +n03245889 +n12285369 +n03061345 +n03797264 +n07838073 +n09219233 +n02859343 +n07608098 +n03920641 +n12578626 +n10688356 +n04542858 +n07834065 +n00443803 +n04181561 +n04570214 +n02047517 +n03295012 +n01633781 +n10610850 +n04035836 +n03001115 +n04593376 +n02393807 +n13061348 +n10123122 +n11800236 +n13207094 +n10140929 +n12167602 +n01809752 +n10421956 +n02764935 +n03424489 +n12889219 +n04046091 +n07714287 +n07708685 +n07736087 +n04142434 +n11961446 +n04521863 +n02414763 +n02901377 +n00467536 +n13085747 +n03855756 +n11846765 +n02530999 +n03063199 +n04258618 +n12204032 +n04424692 +n11758276 +n02653497 +n03766508 +n02026629 +n02572484 +n12339831 +n01635027 +n01668436 +n07821919 +n01543175 +n02689748 +n12528974 +n04024862 +n04184880 +n11720891 +n13869896 +n01678043 +n01647303 +n11532682 +n03236217 +n04963307 +n03012897 +n11682659 +n03191286 +n07643891 +n12737898 +n10680609 +n07924955 +n03879705 +n10461060 +n02523427 +n02013567 +n09893344 +n04124488 +n09863031 +n12454436 +n12305089 +n07709046 +n03805180 +n11940599 +n01691217 +n04198562 +n03978421 +n02357585 +n07818572 +n12870682 +n03798442 +n04154938 +n10550369 +n11957678 +n01958531 +n09936825 +n02334201 +n07910538 +n11978551 +n10562135 +n12700088 +n12784889 +n04480853 +n03281673 +n07588419 +n02968333 +n11935469 +n13046669 +n11730602 +n09643799 +n11849467 +n01758757 +n09638454 +n03267468 +n07914586 +n12104734 +n02961225 +n09827246 +n09917214 +n13079073 +n12634734 +n04089376 +n13034062 +n11714382 +n12753762 +n07683039 +n11840067 +n07689842 +n12173069 +n12172481 +n04182152 +n07869522 +n10356877 +n02771166 +n03154895 +n07615289 +n12986227 +n12361135 +n03456447 +n12706410 +n12895811 +n02988156 +n03130761 +n10639359 +n03628215 +n02738741 +n01643507 +n07730708 +n03232309 +n02846733 +n04969540 +n03051041 +n12890928 +n03235327 +n04289576 +n07588817 +n10325774 +n03973285 +n09703485 +n02358584 +n03061674 +n03195332 +n02901259 +n07849619 +n04486934 +n07908812 +n01588725 +n03682877 +n11949015 +n04146504 +n04146862 +n07898247 +n03318865 +n04367950 +n07880213 +n04247011 +n01447658 +n12711817 +n03146687 +n02926426 +n12856091 +n11966896 +n02413593 +n09764900 +n03009794 +n03314227 +n10499232 +n10075693 +n04451318 +n12320806 +n11933728 +n07764315 +n12133682 +n09904837 +n12832538 +n03816530 +n07802863 +n04391445 +n09728137 +n03887330 +n04436012 +n03957991 +n07771731 +n06266973 +n10407310 +n10290919 +n07862244 +n01842504 +n10262561 +n12726159 +n07691954 +n07618119 +n03437829 +n11966617 +n03629100 +n04231905 +n04208760 +n03344305 +n03684143 +n12934174 +n08645212 +n03556679 +n12109365 +n03751458 +n02380875 +n02025389 +n02770721 +n09830629 +n02800675 +n04951186 +n04483073 +n12710577 +n12789054 +n12058192 +n11777080 +n07716203 +n09618880 +n04525821 +n04016846 +n02918330 +n10375052 +n13158512 +n13090871 +n02929582 +n02308735 +n10487182 +n02213663 +n07608339 +n04384593 +n12890490 +n03992436 +n02994573 +n13231078 +n12880244 +n01651059 +n02925009 +n09686401 +n13219976 +n09981540 +n04582771 +n06267758 +n09893502 +n13214340 +n03272940 +n12554911 +n02214341 +n04137089 +n03874487 +n04573513 +n12003167 +n12004547 +n13065089 +n01903346 +n04373428 +n02216365 +n02024185 +n12577895 +n11698042 +n07586318 +n11705776 +n03030353 +n04486213 +n07885705 +n07928163 +n02356612 +n02767038 +n02897097 +n11662371 +n04128710 +n09842395 +n07683360 +n11533212 +n08495908 +n12841193 +n03669886 +n07768068 +n02381831 +n12081215 +n02757337 +n02811618 +n10144338 +n01379389 +n09698644 +n12779851 +n10400618 +n11801891 +n12322099 +n12408077 +n02767956 +n08640962 +n07816839 +n03021228 +n10346015 +n07868830 +n07917272 +n10076957 +n12865708 +n04290259 +n03595264 +n03986224 +n07825194 +n01610100 +n04417086 +n12995601 +n12734070 +n15091129 +n12428412 +n07587331 +n02405101 +n03108455 +n03594523 +n04489695 +n03892425 +n13032618 +n04409011 +n07590752 +n15092942 +n03914583 +n13066448 +n03532919 +n10639637 +n04566561 +n13223843 +n07904637 +n12347158 +n02720048 +n03901229 +n03936466 +n10574840 +n03782794 +n12397431 +n07908567 +n12580896 +n02697221 +n09791419 +n02577403 +n07870069 +n02136103 +n04318892 +n01462544 +n09747191 +n12287836 +n03067093 +n03934565 +n03543945 +n13126856 +n02240068 +n01585422 +n12413301 +n03246454 +n01876034 +n03635330 +n11680596 +n03228365 +n03082656 +n11609862 +n12859986 +n03934229 +n10233248 +n03166514 +n12166793 +n10115430 +n03327553 +n03373611 +n02967782 +n12338258 +n01604968 +n01323155 +n02590094 +n03044934 +n07866409 +n12291143 +n14900342 +n12094612 +n07845702 +n07926250 +n10750640 +n04359500 +n09797873 +n09953350 +n03561047 +n12122725 +n12725738 +n01453087 +n04977412 +n04575723 +n13219833 +n12161056 +n04273285 +n12482437 +n12863624 +n04953296 +n03390075 +n10188957 +n02874442 +n04236935 +n09990690 +n12866459 +n04075715 +n09725000 +n12794367 +n12461673 +n03050453 +n03677115 +n12427391 +n07736371 +n02973236 +n02406749 +n12322699 +n12815198 +n10680796 +n03268311 +n02405799 +n12302248 +n09791014 +n01545574 +n07740033 +n07862095 +n09901337 +n04390577 +n03597916 +n12110085 +n11802586 +n04205505 +n07696527 +n12076852 +n04344003 +n03326660 +n02823586 +n03042139 +n01565345 +n07905296 +n01454545 +n07650903 +n07905386 +n12530629 +n02841187 +n02943964 +n03329536 +n09681234 +n03479121 +n03770085 +n04147793 +n11552133 +n03774327 +n13197507 +n07901355 +n10400437 +n07837912 +n02310941 +n07845087 +n02239774 +n04976319 +n03960490 +n05239437 +n06275471 +n01633406 +n04257223 +n12009420 +n10483138 +n02775897 +n07866151 +n07922512 +n02666624 +n03944024 +n03842377 +n01832493 +n07855907 +n03968728 +n04492060 +n07879072 +n11635830 +n11802800 +n02357911 +n02431628 +n03730494 +n13099999 +n07768230 +n13147270 +n12331655 +n10237676 +n11855553 +n09759501 +n10620586 +n13181055 +n12309277 +n13183489 +n04382695 +n07679034 +n10495756 +n02173113 +n12764202 +n03683457 +n10298912 +n07680313 +n10160280 +n02205673 +n12053690 +n11653904 +n02931294 +n04093775 +n12856479 +n02427470 +n07608866 +n09954639 +n11639445 +n03364599 +n09924106 +n09683924 +n10419472 +n03089753 +n12620969 +n07604956 +n12940609 +n12564083 +n03514894 +n10343355 +n13068255 +n03805280 +n12793284 +n03140652 +n02666501 +n11717577 +n04267435 +n04593185 +n12820853 +n03934311 +n02630615 +n07767002 +n07723968 +n01631354 +n07931452 +n12414818 +n03097673 +n09944430 +n04457474 +n11850521 +n12227658 +n10131815 +n12408717 +n03566730 +n12777680 +n06273555 +n04357531 +n03759243 +n09861599 +n03015851 +n04175039 +n03392741 +n07859796 +n07741138 +n04474187 +n02266864 +n04553561 +n02667244 +n12720200 +n12432356 +n07806120 +n10362557 +n11929743 +n07765862 +n02963987 +n02762371 +n02747672 +n04289195 +n04056413 +n03039493 +n03894677 +n12338655 +n04422409 +n12079120 +n10252222 +n10168837 +n12919847 +n10297841 +n01340014 +n11710827 +n10167838 +n12278107 +n01384164 +n10498986 +n02742468 +n02899439 +n11752937 +n12107710 +n12315598 +n03985441 +n07605804 +n07686202 +n12884100 +n13121349 +n11725311 +n10420507 +n11706761 +n01381044 +n03331599 +n12336333 +n10185483 +n07880880 +n01782516 +n12615232 +n03175457 +n12657082 +n01750437 +n07918879 +n13213066 +n12927494 +n02910542 +n06273986 +n02161338 +n10235024 +n12180168 +n03659950 +n02160947 +n11861853 +n09866817 +n09279986 +n12393269 +n01552034 +n05526957 +n02956883 +n12818966 +n09753792 +n03114236 +n12273344 +n12546617 +n13177048 +n02129991 +n01731941 +n01628770 +n12774641 +n07685546 +n03253279 +n10678937 +n12579038 +n08673039 +n01392275 +n02379081 +n10530150 +n12851469 +n12414449 +n11694664 +n11877283 +n09708889 +n03585438 +n00483605 +n12332555 +n03323096 +n07851767 +n02417663 +n10667863 +n02856237 +n09269341 +n01596608 +n09720033 +n13160604 +n04443164 +n02814428 +n11622771 +n10328123 +n04338963 +n01794651 +n12069217 +n07762740 +n02935387 +n11897116 +n10569179 +n12749852 +n10745006 +n07823280 +n12162425 +n09801533 +n03772269 +n04518643 +n07916319 +n12771597 +n02147173 +n10342992 +n03795123 +n11646344 +n12847927 +n07686021 +n12383894 +n04465050 +n14564779 +n04212467 +n12274863 +n02380052 +n04329958 +n12034384 +n04213353 +n04366033 +n04955160 +n02778294 +n12890685 +n03028785 +n03097535 +n04533594 +n01750167 +n01415626 +n12276477 +n07729926 +n07711371 +n12843970 +n10500419 +n12891093 +n03840823 +n12509665 +n11878101 +n04315342 +n07685031 +n12305819 +n10039271 +n12264512 +n03911866 +n13919547 +n12413419 +n03785721 +n02599347 +n03786194 +n04018155 +n12856287 +n09607903 +n02396088 +n10212501 +n10313000 +n07683617 +n03586219 +n03890233 +n03156767 +n12033709 +n01648139 +n04399846 +n10671736 +n07698672 +n10791115 +n07708124 +n02709908 +n04266968 +n01758141 +n10058962 +n09444783 +n03668067 +n02838345 +n02388143 +n12893993 +n12590499 +n01462042 +n02689434 +n13209808 +n04075291 +n02412629 +n01953594 +n03906463 +n03043423 +n02200509 +n10152763 +n12504570 +n04396808 +n03382413 +n03618101 +n02767147 +n02390101 +n03450974 +n12778398 +n03625539 +n02574271 +n04113316 +n07572616 +n11809437 +n04119230 +n03829954 +n10500603 +n04258732 +n02731900 +n10174330 +n01574801 +n08663703 +n12558230 +n03981760 +n07732904 +n11875523 +n11823436 +n03238286 +n03079494 +n04281260 +n07873057 +n11686912 +n10568608 +n07593004 +n04271531 +n10037922 +n07838551 +n03615300 +n12624568 +n12940226 +n05242928 +n03680734 +n01589893 +n11652376 +n11893640 +n04119091 +n09696763 +n07851554 +n02660640 +n12124818 +n10370955 +n02663211 +n02414209 +n13187367 +n03258577 +n04375241 +n07617932 +n12240477 +n03417202 +n07595649 +n03839424 +n03087245 +n02431441 +n04396335 +n03484809 +n03426285 +n03592931 +n02912319 +n03488887 +n12187891 +n07592400 +n12918609 +n07858114 +n07567980 +n01548694 +n02726210 +n02406859 +n10147262 +n05458576 +n02848921 +n03503233 +n02587618 +n03465151 +n03582508 +n11654293 +n03695452 +n02197185 +n04223170 +n10243273 +n03149135 +n02842809 +n03669534 +n03857291 +n02147328 +n12278865 +n12733428 +n03264906 +n09924195 +n10432189 +n12203896 +n03892728 +n12360958 +n10418735 +n01650901 +n12420722 +n03341606 +n02557909 +n07751858 +n03483971 +n12019035 +n03991202 +n02072040 +n03129848 +n04505345 +n02405440 +n03901974 +n11656123 +n11552976 +n10291822 +n10108018 +n09902731 +n03325691 +n12646072 +n04134170 +n12097396 +n07564008 +n01624305 +n03421117 +n02776007 +n10792856 +n07818133 +n03227184 +n10198437 +n04157099 +n12743009 +n07820960 +n12749456 +n13035925 +n05262698 +n03422771 +n02878628 +n12140903 +n07820297 +n03524745 +n09901921 +n03170872 +n10039946 +n12638964 +n11989087 +n03461988 +n04287451 +n04298053 +n07882420 +n04002262 +n02734835 +n11707827 +n07756641 +n12808007 +n10069981 +n12637123 +n12947895 +n04363082 +n04292080 +n11858077 +n04535252 +n12646397 +n12283147 +n12321077 +n02746595 +n02895328 +n07624924 +n12537253 +n11952541 +n02181477 +n01440160 +n03878828 +n12861541 +n02869563 +n04242084 +n03197201 +n09396608 +n04291992 +n07845863 +n04314522 +n12843557 +n04029647 +n12146654 +n13147386 +n12954799 +n11920133 +n03038480 +n03213715 +n02971473 +n04149374 +n04230387 +n00444340 +n11859275 +n07564796 +n02948403 +n10186068 +n04315713 +n02366002 +n02670935 +n13208302 +n10225931 +n07826340 +n04102872 +n02259708 +n11855842 +n09941089 +n08896327 +n10237464 +n12084158 +n03764995 +n03627954 +n12384375 +n10341343 +n07876189 +n04573379 +n07904293 +n07840520 +n12038038 +n03005147 +n10483799 +n02978367 +n01484285 +n13094273 +n04539053 +n01748389 +n10146816 +n07815839 +n12991837 +n03294604 +n03588841 +n04055180 +n03209477 +n09917345 +n04393913 +n12337391 +n12126084 +n01882125 +n07688130 +n02814116 +n09640715 +n12679593 +n12596345 +n03029925 +n11761650 +n04457157 +n12683096 +n07709881 +n03841290 +n13157684 +n07927836 +n03523134 +n03690279 +n10187491 +n12451070 +n02682311 +n03978815 +n11806679 +n07808022 +n01386354 +n03622526 +n02369293 +n11885856 +n02289610 +n12663359 +n02624987 +n13173488 +n03027001 +n07896765 +n11935330 +n07814790 +n04242704 +n09959142 +n07589543 +n03551582 +n07843117 +n03556992 +n02060569 +n04000998 +n03825271 +n11946918 +n02874750 +n03479502 +n09919451 +n02176747 +n02080713 +n03400972 +n10222170 +n07926785 +n07852302 +n03012373 +n10438842 +n12868019 +n03634034 +n04210591 +n07853560 +n12374862 +n09248399 +n04355115 +n12908093 +n12906498 +n12875269 +n02791665 +n03146777 +n02854378 +n12414159 +n07821610 +n07595180 +n12238913 +n12141385 +n10761190 +n12165758 +n01653223 +n12956367 +n03695753 +n12416703 +n12346813 +n03405111 +n04304215 +n01624212 +n12674895 +n09850760 +n12407715 +n04156040 +n11610437 +n03395256 +n09970822 +n04229959 +n02530831 +n07870894 +n12098524 +n12828379 +n04057215 +n10751152 +n10053439 +n03674270 +n07869291 +n12256920 +n02535163 +n04282231 +n02136452 +n02365108 +n10328328 +n02315487 +n03325403 +n09231117 +n03342657 +n09980985 +n10702167 +n11961871 +n02065263 +n12857779 +n03219612 +n07805966 +n10699981 +n07691863 +n12831932 +n04179126 +n10208189 +n09765118 +n07922147 +n01631512 +n01947997 +n01405616 +n01892030 +n07827896 +n12964920 +n07749870 +n03276696 +n10020670 +n11828577 +n07624666 +n10590146 +n02407521 +n10253703 +n03270854 +n11610047 +n12981443 +n12413642 +n12302565 +n03177059 +n04594114 +n10227985 +n07728391 +n10395073 +n02810270 +n03569293 +n07812046 +n03843316 +n12477401 +n03802643 +n07618029 +n10755648 +n12837803 +n12454556 +n01636127 +n02809241 +n03270165 +n12035631 +n02962414 +n09750641 +n01793085 +n04346003 +n07922041 +n04164002 +n12499979 +n03301291 +n07921834 +n09656077 +n07599161 +n13155611 +n10194231 +n10063635 +n03601442 +n10366276 +n00475661 +n03943714 +n10377291 +n02624551 +n02568447 +n07589458 +n09691858 +n02685995 +n11919975 +n01690466 +n13211020 +n04114069 +n10530383 +n04200908 +n12631932 +n07916437 +n03219859 +n07918309 +n10368291 +n10253479 +n03317889 +n13206178 +n02821415 +n10592811 +n12557064 +n12872458 +n10212231 +n07926346 +n09695514 +n09741816 +n03964611 +n07812913 +n09703708 +n02587479 +n10593521 +n03485309 +n03776877 +n12289433 +n07716504 +n10580030 +n03061893 +n03206158 +n09710041 +n04266849 +n07864065 +n12767648 +n02333190 +n12295429 +n02406432 +n01799679 +n07861983 +n02201626 +n03441582 +n03653975 +n02834506 +n12263204 +n10672662 +n03072682 +n03410423 +n11620389 +n04542095 +n07910970 +n03697913 +n02706806 +n09736798 +n12318965 +n07938594 +n12032429 +n03191776 +n04210288 +n01422335 +n03236093 +n11881189 +n02247216 +n12338146 +n03104512 +n00474881 +n04172230 +n01461315 +n04400109 +n10646140 +n02215621 +n10096126 +n03019806 +n11809754 +n02492948 +n10741367 +n10308504 +n07875560 +n02523110 +n07738224 +n02015797 +n10499631 +n03025165 +n03284308 +n03508881 +n10441037 +n10757492 +n07608721 +n09755241 +n04264361 +n04394421 +n03776997 +n03175843 +n04476526 +n02523877 +n13196369 +n10190122 +n03172738 +n02709763 +n02070624 +n04563560 +n04017807 +n03824589 +n07817758 +n03222722 +n01542433 +n13173259 +n04458201 +n12869668 +n12580786 +n02407763 +n09760913 +n10530571 +n11752798 +n09612700 +n07601175 +n11632376 +n10641223 +n03158668 +n03411208 +n01413457 +n03684740 +n10248008 +n12656528 +n11849271 +n07771891 +n12067433 +n12389727 +n11734698 +n04042204 +n07825399 +n12621945 +n07624757 +n03180732 +n09741331 +n10246317 +n04030414 +n07821107 +n04524716 +n03789603 +n12867449 +n10249869 +n02434415 +n07614103 +n03333349 +n04602840 +n09923996 +n02658811 +n13033879 +n03663433 +n02873623 +n07837545 +n12436907 +n02675077 +n01500854 +n04435552 +n01790304 +n11687789 +n03443543 +n09733459 +n01606177 +n12245885 +n11721642 +n02201497 +n12010815 +n04594742 +n02755984 +n07927716 +n04245218 +n03134118 +n13214485 +n12294542 +n12713521 +n03556173 +n12650038 +n07719058 +n04319774 +n10443830 +n10019187 +n09720702 +n07926442 +n10402709 +n03989777 +n11699751 +n09613118 +n02965122 +n04221076 +n01861330 +n12837052 +n02975589 +n09668437 +n03012499 +n01418498 +n12451566 +n03585778 +n07692517 +n09672590 +n09741999 +n09748648 +n07621264 +n03482001 +n10185148 +n01542168 +n12536291 +n07846557 +n11840476 +n03130866 +n02631775 +n11730015 +n03715275 +n07680168 +n12175370 +n05427346 +n03665232 +n08611421 +n11730458 +n02413484 +n09783884 +n07888378 +n04611351 +n02247655 +n02136794 +n11649359 +n01382033 +n07889193 +n10405540 +n03510384 +n04420720 +n03585875 +n03812789 +n01835769 +n12139921 +n09762011 +n10103228 +n03477410 +n11930788 +n10064831 +n12311045 +n07681805 +n03136504 +n12887713 +n03886940 +n03130233 +n10197392 +n12333961 +n07672914 +n12723062 +n12599661 +n04268799 +n03696909 +n12809868 +n12452256 +n10710778 +n02571652 +n12117326 +n02450677 +n03041265 +n12544240 +n01966377 +n10252354 +n02378625 +n09814488 +n10569011 +n13067330 +n07928998 +n07890970 +n02187279 +n02592371 +n07846802 +n03475961 +n05448704 +n10410996 +n02851795 +n10093167 +n12468719 +n09876701 +n03057724 +n03469031 +n02344270 +n04248209 +n02687682 +n04467899 +n12897788 +n03436656 +n12539832 +n09906704 +n03190458 +n11843441 +n12130549 +n11823756 +n03153246 +n03684489 +n04160036 +n02908951 +n12855365 +n03518230 +n12225222 +n12933274 +n10432957 +n02921406 +n10156831 +n12239647 +n02826812 +n03411927 +n11602091 +n13200986 +n04244847 +n01330126 +n14938389 +n03001540 +n04387531 +n03423099 +n07608533 +n11723986 +n07600394 +n12529500 +n02403820 +n02587300 +n10333317 +n07935288 +n12680652 +n01449980 +n12153914 +n07803310 +n11741797 +n01881857 +n13081999 +n08644045 +n02061217 +n02173784 +n02660519 +n03104019 +n13137951 +n04538403 +n02621258 +n04515729 +n04165945 +n11919761 +n13078021 +n07861247 +n11959259 +n11801665 +n04070545 +n13210597 +n10218043 +n10717337 +n01365885 +n10718952 +n11979187 +n03880032 +n03798610 +n03477303 +n01876667 +n11860208 +n03401721 +n03360133 +n13230843 +n13194758 +n13190060 +n02564935 +n13894154 +n12754311 +n07697408 +n13171210 +n02035402 +n03736147 +n10396337 +n04554998 +n02793930 +n04126852 +n03654826 +n09411295 +n06255613 +n01680983 +n10261862 +n01581874 +n10378780 +n10646641 +n03539103 +n03351151 +n04349913 +n03906106 +n02370525 +n03319576 +n04113968 +n09693244 +n02945964 +n03344509 +n04117216 +n03889626 +n03557840 +n09800469 +n04280487 +n07890890 +n12147835 +n12295237 +n03883664 +n04436992 +n02922877 +n10099002 +n01988203 +n10056719 +n11646517 +n03672521 +n04568713 +n10111358 +n03606347 +n04047733 +n12320627 +n10251612 +n10460033 +n01742447 +n11917835 +n10443032 +n13079567 +n04363671 +n10788852 +n10482587 +n03308614 +n12741586 +n12938667 +n04539407 +n01630148 +n02303777 +n13050940 +n04552551 +n02341288 +n04098169 +n04110439 +n11625391 +n12259316 +n02822762 +n10631131 +n04089152 +n03571439 +n04558199 +n12656909 +n03170292 +n02877642 +n12771890 +n03033267 +n12658603 +n13354021 +n12855886 +n11840246 +n03619050 +n07727252 +n12932706 +n13874073 +n01315805 +n02948942 +n12048928 +n03146449 +n10656969 +n09872557 +n03906590 +n04454792 +n12500309 +n04239333 +n01815036 +n09644657 +n10497645 +n02918455 +n07812662 +n04240434 +n10804636 +n11967878 +n04184095 +n11834272 +n05244755 +n02299039 +n12665659 +n12144987 +n07607492 +n11887750 +n13083461 +n04577139 +n09670909 +n07876893 +n02875948 +n04069582 +n10458111 +n10361194 +n09389867 +n01651778 +n11933387 +n13193143 +n12834190 +n03516266 +n02184589 +n10041373 +n02809605 +n04064213 +n04957589 +n12643113 +n02582721 +n07911061 +n07921360 +n10369417 +n10527147 +n04104925 +n03707372 +n01386182 +n10374849 +n09902851 +n08559155 +n02332447 +n11649150 +n11722036 +n01823740 +n04592356 +n10002257 +n10661732 +n07562379 +n07597263 +n04036776 +n13112201 +n09842288 +n07738105 +n04545984 +n09635973 +n02885233 +n02756854 +n07808479 +n03029296 +n01543383 +n02884450 +n09843716 +n04224395 +n10576676 +n10140051 +n07919894 +n07806879 +n10212780 +n09478210 +n12017127 +n03770224 +n07606191 +n03555217 +n09715165 +n12270460 +n12129738 +n11739365 +n02303585 +n07818029 +n05314075 +n03019304 +n09859975 +n09454744 +n13151082 +n12586989 +n00455076 +n07741357 +n04957356 +n08659242 +n04577293 +n04126244 +n03131193 +n12428242 +n03569494 +n03781594 +n07743384 +n02892392 +n12576695 +n12199982 +n07693439 +n07719756 +n11884384 +n03043798 +n12351091 +n03690168 +n02214499 +n01839949 +n01831360 +n12642964 +n02957862 +n03125588 +n12883628 +n04002371 +n10747965 +n09744462 +n02853745 +n13030337 +n12156679 +n02761034 +n12587487 +n03374570 +n12728322 +n01731764 +n07918706 +n03696445 +n03185868 +n02805283 +n03868763 +n02202124 +n12369665 +n12449934 +n12650229 +n02656301 +n07743723 +n11702713 +n02927053 +n03916385 +n01486010 +n03986071 +n04188064 +n13897528 +n12414329 +n07718068 +n07837755 +n11735570 +n10464542 +n04091466 +n01315581 +n10374943 +n03989898 +n13220525 +n04076052 +n04062179 +n02414442 +n04414101 +n04446162 +n00480885 +n03536568 +n03773835 +n10728998 +n12643877 +n02255391 +n03799610 +n07847585 +n00446411 +n11910666 +n03139998 +n02296276 +n02889996 +n02786611 +n10363445 +n07854348 +n08583682 +n09912681 +n07896422 +n02368821 +n11935953 +n12185254 +n11738547 +n03809211 +n02448318 +n13066979 +n01987076 +n12009047 +n12839574 +n13174823 +n07902520 +n03369866 +n13209129 +n02593191 +n03853291 +n02620578 +n10071332 +n01813658 +n09895480 +n10134760 +n01316734 +n07845166 +n03175983 +n13132156 +n12814960 +n12883265 +n03637787 +n04310507 +n04133114 +n03900194 +n04129688 +n04449550 +n01805321 +n01717467 +n01573627 +n12271451 +n11722621 +n09976917 +n12232280 +n12905135 +n03451253 +n01655344 +n12346986 +n11987511 +n10517283 +n02941845 +n12730370 +n03121190 +n07917874 +n10023656 +n10151133 +n07695187 +n03258456 +n10639238 +n10682713 +n02085019 +n12343753 +n10749928 +n04595611 +n04410565 +n08500819 +n07719980 +n04016479 +n03232417 +n03469832 +n09834885 +n07925327 +n10094782 +n03632100 +n12734215 +n09845849 +n04047139 +n10743124 +n02604954 +n12270278 +n03036244 +n11991777 +n10168012 +n02561803 +n10531109 +n10344319 +n03804211 +n10513938 +n10732967 +n09917481 +n02950482 +n03148808 +n07910245 +n07925423 +n07889990 +n04302988 +n07745357 +n04346511 +n07573563 +n02564403 +n12084400 +n10030277 +n09815455 +n04388473 +n12404729 +n10576316 +n12072210 +n11811059 +n01824344 +n03556811 +n03175301 +n07586485 +n13137010 +n11986729 +n04967561 +n03881404 +n07692114 +n07874995 +n02770585 +n07853345 +n02775689 +n04328580 +n01323781 +n07773428 +n02414043 +n02794474 +n02352932 +n07569873 +n12374705 +n03606106 +n04267246 +n04369485 +n11934239 +n12705698 +n11841247 +n07868045 +n03525693 +n12358293 +n02937010 +n09658398 +n12711182 +n03516647 +n04591631 +n10228712 +n11930353 +n03471779 +n12594324 +n02251593 +n04455579 +n02542017 +n03381450 +n03320845 +n12364940 +n09657748 +n12412987 +n01840412 +n10570704 +n10117267 +n03251280 +n10195261 +n12178129 +n12285049 +n02177775 +n10117415 +n03707766 +n04475309 +n05604434 +n03999064 +n12127575 +n01972131 +n09793946 +n01635176 +n02791532 +n07564101 +n07876460 +n02813981 +n10764719 +n03638743 +n12761702 +n02125689 +n11657585 +n09923003 +n13069773 +n02683183 +n04324515 +n11936946 +n12862828 +n02659808 +n02619861 +n13175682 +n11648039 +n07768139 +n12512674 +n12108613 +n02947977 +n12899971 +n03845107 +n07689490 +n02081927 +n07619508 +n10248377 +n10300041 +n10761326 +n09655213 +n02675522 +n04963111 +n01995686 +n03256631 +n10684630 +n04471912 +n12728864 +n03870546 +n02829246 +n09725546 +n03409920 +n13194918 +n10055297 +n02513248 +n01462803 +n11782266 +n13094145 +n07839478 +n13916363 +n07932454 +n09722817 +n07774479 +n10386874 +n12832822 +n01599388 +n02964295 +n04349189 +n07689313 +n11653126 +n02309841 +n02064000 +n04410663 +n04562122 +n02358712 +n09901786 +n10441124 +n12882158 +n12815668 +n10159289 +n01641930 +n03315990 +n12271187 +n10277638 +n07815163 +n12903014 +n07915366 +n04412300 +n01324799 +n03408264 +n09452291 +n03019198 +n11890884 +n10355806 +n03186199 +n04013600 +n12541157 +n06259898 +n06273294 +n11946051 +n01671705 +n04415257 +n01905321 +n04050600 +n12604460 +n04051439 +n02929184 +n11765568 +n10025060 +n02396796 +n04033287 +n13027557 +n03127531 +n10308066 +n09729062 +n01593553 +n02476567 +n07609728 +n12970293 +n01419888 +n03215749 +n01684741 +n13067672 +n03870290 +n07846359 +n12961536 +n03356559 +n07727140 +n09843602 +n02378755 +n12044041 +n01977485 +n07718920 +n12060546 +n04265428 +n12237855 +n04006067 +n10227266 +n04361937 +n12134486 +n10097842 +n02264591 +n03912821 +n07594155 +n03116163 +n11771924 +n04155457 +n12394118 +n10507380 +n01844746 +n11901452 +n03024233 +n03383562 +n11806814 +n10062716 +n04204755 +n08613733 +n12907671 +n03533654 +n09826605 +n03109033 +n07606419 +n03537085 +n11615812 +n07695504 +n11694300 +n04520962 +n09971839 +n02664285 +n03402511 +n02061560 +n13133140 +n03548195 +n12877493 +n02425086 +n12845187 +n12488454 +n02975994 +n02071028 +n01457407 +n03685486 +n07605282 +n07771405 +n07827554 +n10538733 +n03438780 +n04379096 +n12686496 +n10001764 +n11848867 +n12125001 +n09886540 +n03275566 +n01442710 +n12789554 +n07858197 +n12722071 +n12868880 +n10441694 +n12409651 +n07727741 +n12289585 +n04069166 +n12686877 +n03723439 +n07815956 +n12543455 +n10778044 +n02200630 +n10074841 +n12640284 +n12589841 +n07592317 +n07866571 +n12712626 +n04228422 +n11711289 +n03590475 +n13081229 +n03045800 +n03639230 +n02874214 +n07615954 +n03204134 +n12053962 +n12769219 +n15006012 +n09873769 +n11818636 +n01959029 +n03349599 +n12227909 +n07576969 +n03638180 +n07742224 +n03390673 +n02344175 +n03770520 +n00447361 +n13235319 +n01983674 +n10061882 +n04267165 +n12493868 +n12713358 +n02930339 +n10493419 +n12918810 +n02582220 +n12248359 +n02644501 +n04596492 +n04538249 +n07905618 +n13230190 +n07808268 +n15005577 +n09351905 +n12730544 +n11937023 +n04024137 +n02238358 +n11646955 +n11618079 +n09849990 +n04060448 +n04220805 +n12725940 +n12004120 +n01484562 +n02669442 +n12132956 +n01756916 +n03980986 +n02256172 +n07716750 +n12119390 +n04047834 +n11934041 +n12828977 +n03648219 +n11873612 +n12909614 +n04397860 +n03908111 +n03261395 +n03695616 +n11668117 +n12014355 +n02896074 +n03988758 +n04426184 +n10328696 +n02477028 +n04507326 +n04320871 +n03256472 +n01919385 +n03988926 +n13182164 +n07826250 +n03207548 +n01396617 +n04369618 +n07913774 +n13229951 +n03410022 +n12728508 +n01997119 +n03598783 +n01341090 +n03879456 +n01736796 +n02864122 +n13879816 +n02684962 +n12246037 +n02433729 +n04364397 +n09881358 +n02950120 +n03326371 +n02243878 +n01790812 +n12990597 +n03330947 +n07764486 +n03332173 +n10006177 +n03347472 +n07619301 +n10106509 +n12365285 +n01732989 +n07678586 +n04098795 +n07733847 +n03994297 +n12872914 +n02762909 +n07766530 +n13198482 +n02395855 +n12273515 +n04487894 +n07847047 +n12488709 +n02859557 +n04255768 +n02360933 +n03267696 +n03152951 +n10188715 +n10520544 +n13065514 +n02900594 +n03699754 +n01319187 +n01949499 +n10417424 +n01603000 +n12062105 +n09683180 +n09863339 +n01880716 +n10702615 +n03893935 +n10495555 +n04131499 +n02957252 +n02113892 +n07724078 +n12246941 +n04303095 +n01751215 +n04213530 +n12117695 +n12418507 +n01922948 +n12131405 +n13188767 +n01481498 +n03174079 +n02407172 +n11613867 +n10152616 +n10119609 +n04158250 +n11695085 +n07855105 +n02854630 +n03768683 +n12739966 +n12266984 +n12819141 +n12732605 +n13205249 +n11917407 +n01607429 +n02694279 +n07815294 +n06614901 +n07846471 +n12119717 +n02595339 +n12366186 +n10693235 +n12263410 +n12484244 +n10337488 +n04146976 +n01469723 +n07872748 +n03238879 +n12000191 +n07846938 +n03116008 +n12139196 +n04013176 +n10317963 +n12140511 +n02065726 +n01649556 +n10316862 +n01755952 +n04385079 +n12770529 +n02814338 +n01675352 +n11874423 +n01369484 +n10537708 +n07618281 +n07821404 +n02297819 +n03238762 +n03357081 +n02628600 +n07830986 +n12507823 +n04431925 +n11955532 +n03429771 +n10281896 +n12383737 +n12760875 +n09673091 +n12892013 +n06625062 +n04503269 +n03674842 +n12338979 +n04268275 +n12033139 +n11767877 +n07812790 +n12676134 +n04037873 +n10097477 +n12310638 +n12258101 +n09391386 +n13196738 +n13866626 +n12720354 +n10106995 +n07843220 +n03878294 +n04101375 +n07733217 +n10220080 +n04601938 +n10778148 +n12973937 +n10556825 +n12256708 +n12583855 +n04259202 +n07628181 +n04226962 +n02777402 +n09674412 +n12188635 +n03776167 +n04504038 +n04156591 +n02270945 +n02264021 +n07826653 +n02980203 +n02059852 +n02102806 +n12921660 +n04477725 +n10107173 +n12837466 +n02697022 +n04350688 +n12110236 +n02177196 +n07899976 +n12639910 +n02368399 +n10009162 +n03950647 +n09248153 +n02425532 +n04044955 +n11933257 +n03460899 +n10147710 +n02379743 +n02413917 +n02890804 +n12915140 +n02146879 +n07915800 +n01787006 +n03646809 +n11677902 +n04065909 +n02088992 +n02887832 +n10115946 +n02306825 +n03719560 +n10456696 +n03758220 +n12625003 +n04021503 +n07563366 +n02531625 +n10304650 +n12855710 +n09735654 +n07853762 +n03512030 +n12898342 +n02297938 +n12618727 +n04082344 +n12953712 +n12617559 +n03035715 +n02532451 +n05399356 +n03602686 +n10082423 +n04607759 +n07581607 +n07594737 +n04030965 +n03464628 +n12103894 +n03039353 +n03522990 +n02964934 +n03169063 +n10153865 +n09653144 +n09941571 +n12907057 +n07768318 +n02600798 +n02187150 +n01811243 +n12252383 +n04495555 +n07678953 +n13181244 +n13069224 +n13184394 +n12765402 +n03471347 +n10208847 +n03697366 +n09840435 +n02506947 +n09709673 +n07928578 +n11935715 +n07848936 +n02757927 +n01999767 +n02245443 +n10260473 +n13898645 +n02701260 +n07840219 +n11785875 +n12385830 +n12017664 +n12145148 +n04530456 +n01929186 +n02384741 +n04113038 +n03296217 +n09723819 +n03766697 +n12143215 +n09929202 +n02684248 +n12119539 +n03566555 +n12941220 +n04124573 +n10750188 +n07733005 +n04230707 +n03829857 +n07756838 +n12244458 +n12543826 +n03514129 +n02762169 +n04435870 +n03342863 +n09745324 +n12369476 +n11652039 +n03915320 +n07746749 +n07608641 +n12642600 +n02389943 +n12137791 +n04111962 +n12493426 +n12454793 +n01455317 +n10728117 +n03281524 +n12195734 +n12353431 +n02477329 +n02678010 +n04557522 +n10162354 +n14942411 +n07806043 +n12274151 +n09835153 +n03983499 +n04086663 +n07851926 +n07868684 +n11926976 +n03972146 +n04310604 +n09675799 +n13880704 +n13173132 +n07577918 +n10720964 +n11937102 +n03349020 +n12340581 +n03725506 +n03477143 +n10578162 +n01731137 +n03382104 +n11616852 +n01493829 +n09327077 +n03856335 +n03321843 +n02375757 +n02118643 +n08500989 +n03496486 +n04140777 +n12858987 +n02845293 +n04093157 +n07819682 +n10394786 +n12289310 +n02901620 +n01559160 +n07919165 +n12648196 +n11774972 +n11995396 +n10543937 +n10154013 +n03977158 +n01884476 +n12266528 +n11906127 +n12661538 +n04396650 +n12761905 +n04175574 +n10181878 +n12017326 +n12876899 +n09744346 +n07741706 +n04451636 +n07735981 +n03751590 +n03140546 +n03070396 +n03091223 +n12071477 +n07562017 +n09981092 +n09847344 +n12552893 +n12371202 +n02245111 +n01598271 +n04400499 +n02298095 +n15048888 +n02967170 +n04030161 +n10676434 +n01556514 +n13235766 +n02538562 +n12603672 +n03941586 +n02449183 +n07567611 +n12923257 +n02296021 +n11730933 +n12497669 +n02917742 +n07875926 +n02714535 +n13142182 +n02878107 +n07861334 +n02682811 +n03730655 +n03681813 +n12970733 +n02132320 +n12436090 +n07931280 +n04295353 +n12982590 +n01783017 +n13164501 +n02424589 +n01499732 +n12650805 +n04543509 +n10369699 +n03439631 +n13160116 +n07831663 +n05449196 +n13025854 +n10169241 +n02847461 +n10734963 +n13213397 +n03343234 +n12275317 +n02793414 +n04300509 +n01803893 +n11617878 +n02179192 +n03637480 +n04514648 +n03087521 +n10478827 +n11757190 +n12919195 +n04532504 +n01736375 +n04015786 +n04545471 +n12668131 +n04472961 +n14786943 +n07584938 +n02498743 +n07744559 +n10010062 +n10101308 +n07832099 +n02601767 +n10473453 +n02451575 +n02496052 +n03696746 +n12669803 +n07904072 +n04290762 +n11737125 +n07760755 +n12553742 +n12068138 +n12630999 +n02390938 +n02202678 +n02216740 +n02679961 +n13173697 +n11828973 +n02287987 +n04585318 +n10360366 +n07745661 +n03474352 +n07934800 +n12677612 +n03692272 +n13092240 +n04230487 +n11846312 +n12433952 +n11793403 +n03056873 +n05454833 +n12517077 +n12682882 +n02649218 +n09425344 +n07878283 +n02795978 +n10064977 +n12754174 +n02945813 +n01750743 +n03150661 +n13880415 +n12337800 +n04017571 +n09754907 +n04456734 +n02967540 +n10621400 +n11744471 +n01971620 +n04148285 +n10781817 +n11991549 +n12305654 +n03943833 +n10330931 +n12918991 +n01783706 +n11933099 +n12931231 +n07589967 +n09666349 +n07853445 +n12714949 +n03548533 +n04158672 +n03809802 +n03080309 +n12800049 +n02578454 +n02834027 +n10067600 +n03044671 +n04198233 +n07930205 +n04357930 +n12221522 +n11957317 +n03085781 +n03723885 +n03614383 +n02661618 +n04292221 +n03426574 +n03838024 +n10442093 +n12399534 +n01450950 +n07876550 +n11937446 +n09870096 +n02631628 +n05460759 +n01710177 +n03660562 +n04283784 +n01497738 +n02232223 +n04209811 +n12837259 +n02864987 +n04499810 +n12654857 +n03493792 +n09688233 +n02312912 +n10057271 +n07606058 +n03258192 +n10507565 +n11930038 +n08679269 +n03812263 +n11662128 +n04085574 +n07643577 +n03981094 +n02796412 +n02513939 +n07686634 +n07936979 +n03168774 +n03816394 +n07625324 +n04138131 +n10383094 +n10222716 +n10381981 +n12254168 +n13223090 +n03056583 +n09910556 +n03277004 +n12649866 +n02089725 +n03688707 +n09665367 +n07849506 +n02843909 +n13141797 +n02477516 +n09710886 +n03835941 +n11734493 +n10778711 +n10007809 +n02038141 +n12766043 +n02353172 +n02030224 +n10762212 +n06274921 +n13033396 +n03560860 +n01961234 +n13868515 +n03216199 +n01553527 +n04429038 +n10211036 +n02150885 +n02435517 +n02755675 +n09699020 +n12566331 +n03909516 +n02903727 +n02594942 +n04173172 +n04125692 +n12251001 +n02412787 +n01649412 +n01411450 +n01774097 +n09912907 +n03162556 +n07566231 +n12267534 +n03928589 +n04142327 +n11771147 +n07832592 +n04155177 +n07937621 +n07839864 +n03201895 +n13095013 +n10298271 +n03059103 +n03784793 +n11925450 +n03288742 +n02809364 +n04108999 +n04449449 +n03726233 +n07854455 +n03692136 +n12018447 +n03374282 +n06008896 +n07598928 +n03577312 +n04604806 +n09892513 +n04370600 +n08238463 +n01793159 +n07822687 +n03242390 +n07685303 +n03822361 +n01996280 +n10505942 +n06596845 +n04219580 +n12056990 +n10579062 +n10240082 +n10298202 +n07711907 +n03905730 +n12222900 +n07598622 +n04415815 +n12389932 +n12154114 +n04210012 +n12500751 +n03729402 +n12122918 +n04572121 +n12804352 +n02415130 +n12780325 +n11639084 +n12768933 +n02253494 +n13217005 +n03567788 +n12304286 +n10703480 +n07766723 +n05455113 +n07741804 +n12186839 +n01687128 +n01350701 +n03260206 +n07876026 +n12528382 +n04125541 +n10457444 +n01606097 +n11717399 +n04598416 +n12899166 +n09748101 +n12160125 +n07608980 +n07843348 +n02409038 +n02571167 +n09980805 +n09706029 +n02495242 +n12765846 +n10373525 +n12321873 +n03047171 +n12365462 +n03752398 +n02662993 +n10316527 +n10728233 +n06273207 +n01733214 +n12297846 +n12755876 +n02428842 +n02289307 +n04536465 +n03253187 +n02297294 +n05584746 +n03117642 +n12189779 +n10338231 +n07599649 +n04559994 +n12710917 +n09966470 +n12470907 +n04499300 +n12403075 +n11837743 +n02269657 +n12599185 +n07618587 +n03996004 +n12851094 +n03392648 +n01319001 +n12826143 +n12369845 +n01814549 +n10056103 +n12854193 +n02267483 +n04019881 +n03490649 +n04268142 +n10801802 +n12315060 +n10149436 +n04563790 +n09865068 +n03000530 +n10657556 +n07840672 +n12118414 +n02856013 +n02900459 +n04094859 +n12079523 +n11827541 +n12236160 +n02904505 +n02846619 +n09842823 +n12926039 +n02146201 +n03195799 +n12815838 +n09899289 +n01483021 +n02519340 +n05453815 +n10329035 +n02494383 +n09742927 +n13220355 +n03212406 +n11759609 +n10061431 +n12095281 +n04262530 +n03799240 +n02426176 +n04608809 +n12230540 +n13880551 +n11741175 +n11858814 +n11723452 +n07590841 +n12604845 +n10342543 +n12760539 +n09270657 +n02563079 +n10643937 +n12843316 +n01651641 +n07838811 +n04359034 +n07758260 +n02762725 +n11726433 +n03114743 +n01952029 +n12321395 +n11930571 +n12337922 +n12427946 +n12001294 +n12551457 +n13235011 +n02290340 +n06419354 +n12408873 +n01741442 +n12308447 +n10243872 +n03658635 +n03694761 +n02570484 +n12912801 +n04158002 +n02417785 +n01332181 +n03703075 +n10283366 +n03142431 +n02779609 +n02300554 +n09868782 +n10323752 +n03166809 +n03394149 +n02827148 +n02186717 +n01350226 +n03344784 +n03555996 +n04498873 +n13157481 +n04519887 +n12028424 +n12349711 +n10471640 +n07741235 +n04032936 +n12357968 +n10228592 +n13178284 +n04168840 +n13239177 +n03561573 +n02566489 +n11807696 +n07681264 +n02566665 +n10456070 +n10063919 +n10492727 +n01788579 +n11977660 +n02036228 +n02738978 +n03989349 +n10332953 +n12949361 +n09901502 +n07839730 +n13146928 +n10152306 +n04170515 +n11602478 +n02522722 +n01333610 +n13030852 +n02143891 +n12807624 +n04542329 +n12243693 +n12036226 +n13917690 +n02553028 +n02752199 +n10594857 +n11627714 +n04348070 +n13171797 +n04612257 +n07934373 +n04536765 +n02244515 +n04526800 +n04546595 +n02551668 +n12143405 +n07871588 +n07858484 +n03628728 +n13179804 +n03242264 +n12089846 +n07588688 +n07620047 +n01647466 +n09685233 +n03467254 +n12666369 +n05449661 +n10694939 +n12886600 +n12256522 +n04006330 +n03317673 +n04316815 +n12222090 +n04022866 +n04088441 +n07617526 +n10782362 +n04355821 +n13901490 +n12508618 +n03849943 +n04503499 +n13193466 +n09754633 +n07583978 +n13911045 +n07643679 +n12054195 +n10692090 +n04032509 +n10146927 +n02031298 +n04002629 +n04035748 +n10712229 +n02866106 +n07909504 +n04540397 +n06266878 +n10219879 +n12567950 +n07853648 +n03191561 +n07856045 +n12646197 +n03317510 +n10515863 +n13198054 +n02808829 +n12889579 +n02698473 +n09924437 +n03595055 +n12306270 +n07857356 +n09715303 +n03024518 +n04323519 +n09629065 +n04178668 +n12748248 +n02308618 +n07873198 +n10564098 +n03007297 +n04036155 +n02143439 +n10507482 +n12267931 +n03956331 +n12888234 +n04066476 +n07813107 +n02736396 +n10306496 +n12324388 +n01744555 +n01649726 +n06596179 +n03616091 +n07754279 +n02072493 +n12408280 +n04314632 +n02412700 +n04030846 +n09833997 +n03599964 +n05258627 +n12572759 +n12136581 +n02419056 +n12453714 +n11652217 +n03878511 +n03907908 +n12223160 +n10514121 +n04153330 +n12163279 +n12623818 +n03495671 +n13222985 +n10354754 +n04365112 +n12384680 +n12538209 +n03105214 +n12534862 +n13869045 +n03945928 +n11613692 +n11892181 +n13002209 +n02685253 +n07598529 +n02629716 +n13202355 +n07927070 +n02176916 +n04370955 +n11988132 +n03246197 +n01440467 +n07620145 +n03940894 +n01897667 +n03408340 +n12602612 +n02539424 +n03863657 +n04559620 +n02604480 +n11822300 +n03518829 +n11619845 +n10504090 +n03341035 +n02908123 +n04281998 +n03277602 +n03865288 +n10074578 +n13902793 +n03054605 +n04404200 +n12786836 +n12235051 +n04035231 +n12009792 +n12705458 +n04378489 +n02476870 +n11954798 +n03573848 +n02087314 +n03162460 +n04363412 +n02261063 +n09953615 +n01947139 +n03044801 +n04287351 +n04479287 +n03861596 +n12510343 +n07854066 +n03027505 +n12161577 +n04197878 +n01812187 +n10015792 +n08685188 +n11737009 +n10333044 +n02730568 +n10290813 +n13096779 +n05257476 +n07917951 +n12121187 +n03517509 +n07932762 +n02336275 +n12159942 +n12105981 +n02562971 +n13882961 +n12016777 +n02793684 +n12717644 +n01380754 +n07724173 +n04055861 +n11831297 +n03059934 +n03370646 +n10065758 +n09459979 +n07913644 +n04322531 +n03457451 +n02567633 +n04240867 +n10693334 +n10556704 +n04614844 +n07909362 +n12082131 +n09268007 +n04359217 +n09883807 +n02292085 +n04052346 +n03431570 +n02843465 +n04584056 +n04432043 +n09846142 +n07864317 +n04475749 +n04227050 +n04280845 +n03535284 +n07890617 +n03217889 +n02806762 +n11967315 +n11762927 +n02501923 +n03442487 +n09690083 +n02964634 +n02920164 +n07855317 +n10196725 +n03042829 +n11662937 +n12183816 +n12311224 +n13884261 +n02243209 +n03140771 +n02385002 +n03071288 +n12936826 +n04583022 +n07859142 +n04578112 +n04467506 +n12938081 +n09982152 +n12555255 +n03335333 +n10104888 +n12151170 +n12709349 +n10456138 +n02237868 +n07620327 +n12561309 +n12341931 +n12350032 +n01775730 +n12950796 +n01440242 +n04261767 +n10568915 +n12285195 +n07589872 +n13112035 +n07840395 +n11750508 +n12286197 +n03336168 +n03325288 +n02551134 +n04293258 +n13130014 +n07733124 +n04451139 +n11985903 +n03602365 +n11722342 +n11944751 +n12897999 +n02277422 +n03101302 +n07608245 +n03531982 +n01997825 +n11713370 +n04442582 +n02833403 +n04427857 +n01648356 +n10645223 +n10414865 +n10696101 +n12885045 +n10037080 +n12218274 +n07570530 +n04493259 +n10659042 +n10577710 +n03141612 +n10582604 +n00446632 +n02834642 +n07568389 +n04583888 +n04096848 +n12879068 +n04495051 +n09837459 +n12216215 +n03702440 +n10174695 +n10559009 +n10577182 +n07686299 +n04269668 +n02404028 +n03720665 +n09885866 +n03082450 +n12492682 +n12780563 +n03703463 +n02644360 +n02307910 +n01374703 +n04402342 +n04264134 +n03158414 +n04443433 +n12522894 +n10803978 +n11706942 +n10751026 +n13143758 +n02972934 +n04174234 +n12718995 +n11994150 +n11545350 +n12526754 +n07753448 +n02870772 +n11942659 +n11744108 +n12735160 +n12229887 +n04970312 +n02874336 +n10721819 +n13193269 +n03330665 +n09865162 +n10306595 +n12161744 +n03303669 +n07846688 +n02168427 +n01961600 +n03559531 +n09826821 +n03413124 +n09695019 +n03783873 +n11863877 +n13874558 +n02283617 +n11895472 +n13182799 +n07854614 +n03283827 +n01397690 +n02650413 +n09809279 +n10290541 +n10383505 +n11724660 +n07689757 +n10181547 +n07620597 +n11979354 +n02771547 +n13061471 +n12631637 +n11966385 +n03969510 +n11735977 +n07621497 +n12956588 +n03217653 +n04546081 +n11696450 +n10300654 +n02032769 +n01654863 +n09779280 +n02390258 +n03887512 +n10489426 +n10745770 +n10713843 +n03602194 +n10710913 +n07864475 +n04486322 +n07915213 +n08663051 +n10236842 +n02390738 +n02388453 +n03598385 +n12228689 +n11771746 +n12803226 +n11242849 +n02378149 +n10427223 +n05448827 +n11870044 +n12477983 +n12311413 +n03500090 +n10280034 +n02685365 +n03652389 +n12728656 +n07695284 +n09961198 +n03780799 +n03935883 +n01612955 +n12475774 +n02701730 +n07833535 +n12584365 +n03902220 +n12727960 +n10619492 +n04450465 +n10646780 +n10110731 +n04142175 +n12296735 +n09337048 +n12681579 +n12819354 +n12541403 +n04305016 +n12798910 +n10321126 +n08618831 +n09721244 +n02225798 +n01637338 +n12218868 +n05545879 +n12022382 +n03972372 +n02505063 +n01694311 +n10695450 +n10081842 +n12297507 +n07592922 +n12118661 +n01952712 +n10517137 +n01340522 +n07719330 +n03729482 +n04168541 +n03090710 +n07873679 +n07828378 +n07728284 +n10343088 +n07869937 +n14585392 +n01453475 +n12095412 +n04973020 +n12810007 +n07564515 +n01599741 +n11629047 +n09937802 +n12450607 +n12460146 +n02292401 +n03632963 +n09617696 +n12545232 +n02874642 +n09934488 +n10091349 +n01447946 +n05469861 +n11830400 +n03382533 +n02608547 +n12697152 +n03542727 +n10716576 +n03664159 +n07568625 +n02976815 +n13147532 +n02336826 +n12432574 +n07686461 +n04107598 +n02505998 +n09849167 +n03688066 +n02836513 +n01576358 +n01893021 +n12017511 +n12065649 +n01714231 +n11662585 +n12827907 +n12954353 +n11936199 +n01368672 +n03843883 +n12184095 +n10058411 +n11684654 +n08506347 +n10579549 +n01423302 +n11604046 +n07613158 +n03605504 +n02090129 +n02284224 +n01958435 +n12664469 +n04459122 +n09617161 +n09780828 +n11830252 +n12870048 +n04247544 +n09871095 +n02962938 +n09933020 +n13064457 +n10341243 +n07694169 +n13200193 +n07765728 +n01524761 +n07730562 +n07751737 +n07740855 +n04192521 +n12593122 +n07841037 +n02809736 +n10604275 +n12512095 +n01907287 +n04592596 +n09823153 +n03181667 +n12449784 +n07908923 +n12365900 +n03053976 +n15060688 +n04165675 +n02530637 +n09816654 +n12540966 +n07934152 +n09290350 +n03455802 +n10111779 +n01351315 +n10281770 +n13862552 +n12435486 +n12370174 +n12296045 +n03493219 +n12363301 +n11973749 +n03939565 +n02938321 +n13209270 +n12604639 +n12657755 +n03604536 +n10328941 +n04278932 +n10376890 +n01884203 +n02061853 +n04256318 +n07831821 +n10585217 +n07591813 +n10210648 +n07739035 +n01632308 +n10319313 +n02861777 +n03821145 +n13029610 +n04239900 +n10313441 +n04951716 +n10628097 +n02368116 +n08571275 +n04433377 +n10458596 +n12435965 +n12448136 +n12129986 +n04295777 +n07898895 +n07854266 +n12327846 +n12318782 +n07825850 +n10414239 +n11731157 +n04409911 +n10655442 +n11829205 +n01738306 +n02840515 +n04150371 +n03369512 +n02645538 +n12773917 +n07818422 +n03227010 +n10303037 +n12942025 +n12406304 +n06616216 +n02435216 +n12981954 +n03683341 +n09703809 +n07722666 +n11817160 +n10110893 +n10228468 +n03572631 +n01378545 +n02130086 +n04388574 +n11960673 +n12956922 +n11924014 +n09895902 +n03426462 +n07759576 +n02563949 +n03466947 +n02522637 +n09480959 +n02033882 +n02451415 +n12677120 +n10580437 +n04425977 +n03057841 +n12285512 +n07614348 +n03144873 +n03391613 +n12366870 +n02304657 +n07863935 +n07909714 +n02413717 +n12591702 +n07838659 +n02967407 +n12016914 +n02735268 +n09470027 +n10222259 +n03899100 +n10513509 +n11620016 +n12600267 +n04368840 +n03016209 +n04085017 +n03215076 +n10238272 +n09782855 +n07586179 +n12434483 +n12452480 +n01990516 +n12030092 +n11739978 +n12714254 +n13036804 +n07727377 +n07879560 +n03710421 +n12128490 +n11968519 +n03250588 +n10173579 +n03114041 +n02942015 +n12729164 +n07871065 +n02591330 +n09353815 +n10138472 +n02712545 +n12866333 +n07835823 +n03508485 +n01758895 +n02925385 +n03321419 +n09931418 +n02846874 +n12500518 +n07587819 +n03160186 +n04974340 +n13067532 +n11940349 +n13027879 +n02878534 +n10055566 +n07925708 +n12628356 +n11958499 +n03472672 +n04233295 +n04563020 +n03426871 +n04330109 +n03677682 +n04129766 +n02884859 +n12692521 +n10188856 +n03500971 +n10355306 +n12407545 +n11955040 +n10028541 +n10345659 +n14720833 +n09641578 +n12613706 +n11718296 +n03380301 +n01334217 +n03890358 +n03583419 +n12447121 +n09660010 +n11826569 +n11837351 +n12096089 +n03871860 +n01821554 +n12834938 +n02738449 +n02644665 +n03316873 +n12548564 +n03605417 +n12094401 +n13152339 +n03004531 +n03080904 +n03535647 +n12349315 +n04213264 +n07860208 +n01526766 +n03710937 +n11806521 +n10618234 +n12306938 +n10473562 +n10050880 +n04596116 +n02577164 +n04479694 +n07936093 +n07834286 +n12175181 +n03986857 +n02919648 +n12055073 +n04567593 +n07585015 +n12771085 +n10551576 +n09778783 +n01593282 +n02406952 +n12331263 +n10629329 +n12287195 +n07729225 +n07828041 +n01880473 +n12257725 +n02696246 +n07853232 +n11936864 +n09745229 +n03364156 +n04503155 +n03194297 +n04003359 +n07607361 +n10106387 +n10306890 +n10455619 +n01647180 +n07740115 +n12106323 +n03626272 +n11685621 +n11866706 +n04321121 +n01606978 +n12621619 +n11615259 +n07840304 +n02841847 +n05459769 +n03432360 +n04604276 +n12356395 +n12468545 +n03645168 +n00477827 +n03459591 +n04202142 +n12959074 +n07881625 +n12382233 +n02405692 +n12299640 +n12247202 +n12628705 +n12534625 +n09264803 +n12176953 +n09835017 +n10390807 +n04975739 +n12474418 +n11931135 +n07917791 +n10636488 +n09690496 +n11993675 +n03703203 +n11794139 +n13015688 +n04168084 +n01948446 +n10169419 +n04455048 +n04973669 +n12840502 +n12120578 +n10448455 +n01386007 +n02288122 +n01441910 +n02278463 +n03108759 +n02753710 +n03143400 +n13080866 +n13917785 +n13124358 +n13220663 +n02475358 +n01925916 +n02684649 +n10451590 +n03869976 +n03881305 +n07928264 +n01422185 +n04035634 +n11996677 +n04261369 +n12925583 +n12764008 +n09972587 +n03708962 +n01791388 +n02892626 +n04098399 +n07823369 +n07752874 +n13225244 +n03376771 +n01771766 +n13146403 +n12157179 +n13897198 +n07770869 +n13240362 +n07610502 +n03688504 +n02896856 +n12543186 +n09967063 +n05453412 +n12590600 +n02378870 +n07568241 +n01687290 +n00474769 +n11694866 +n02338722 +n02637977 +n04567746 +n10586444 +n11907405 +n03421960 +n07605693 +n10384214 +n12877637 +n12018363 +n10056611 +n13882487 +n12140759 +n04114301 +n11762018 +n12678794 +n11817501 +n02116450 +n12018530 +n03324629 +n12726528 +n03155502 +n10493199 +n04181083 +n10609198 +n04328703 +n03045074 +n07769886 +n01892385 +n12828520 +n03165211 +n11800565 +n07567139 +n13877547 +n12829582 +n02949084 +n07589724 +n01746191 +n12395463 +n05459457 +n10565502 +n11981475 +n09310616 +n12327022 +n02313709 +n12957803 +n11865276 +n12955414 +n12939479 +n13225365 +n07936459 +n03139089 +n07577772 +n12057895 +n03620353 +n12152031 +n01885158 +n04096733 +n12626674 +n10464711 +n10675609 +n07752782 +n03709960 +n02540983 +n02285179 +n01903234 +n07835701 +n04421083 +n02352290 +n09421031 +n03349367 +n02539894 +n04052235 +n07922955 +n03941887 +n04234260 +n04423552 +n11975254 +n08501887 +n12489676 +n04574348 +n10602119 +n02163008 +n02748491 +n10024937 +n10033888 +n12605683 +n01790398 +n10128519 +n14977188 +n10293590 +n12077244 +n09741074 +n11694469 +n12692714 +n12159804 +n12533437 +n03831203 +n03692004 +n09462600 +n04537436 +n06618653 +n07913537 +n12783316 +n10038119 +n10236521 +n01486540 +n07875267 +n04345787 +n07681355 +n13028937 +n03607186 +n07863107 +n12387103 +n09830926 +n03574416 +n04478383 +n11685091 +n03197446 +n03225458 +n09741722 +n07736527 +n02857907 +n10177150 +n12711398 +n10308275 +n02418770 +n02577662 +n09935107 +n03362639 +n12446908 +n04329681 +n04114428 +n09624899 +n12913144 +n12338034 +n02341616 +n12360817 +n12907857 +n02414904 +n05482922 +n11974888 +n04127117 +n12581110 +n04368365 +n01699254 +n12525753 +n04254450 +n11951052 +n12458874 +n12721477 +n07562651 +n02239192 +n10533874 +n12006306 +n09537660 +n10008123 +n02788386 +n03248835 +n04491312 +n11795580 +n04025633 +n10166189 +n07703889 +n11824747 +n07605198 +n12134836 +n03591116 +n02946753 +n13212025 +n11742310 +n02328820 +n02985606 +n09955944 +n12679432 +n10020366 +n12013035 +n02942147 +n04172512 +n11802410 +n10789709 +n03385295 +n02039497 +n01416213 +n11940750 +n12178780 +n01967963 +n12662379 +n12217851 +n02812631 +n12432069 +n09991740 +n03089477 +n12458713 +n03876111 +n10311661 +n12286068 +n02838958 +n11936369 +n03716228 +n13228017 +n06276902 +n12677331 +n04330189 +n10488016 +n12011370 +n04343740 +n07893792 +n02171164 +n03963483 +n12080588 +n07577657 +n12936155 +n03809686 +n04223066 +n04086066 +n12776558 +n07813579 +n01841943 +n12285705 +n02581482 +n11653570 +n10010632 +n04305947 +n12228886 +n12797368 +n01404495 +n09697986 +n11882237 +n10077879 +n07607832 +n09779461 +n13212379 +n10769188 +n10715789 +n01480106 +n02145910 +n04275093 +n01983829 +n01978010 +n09937903 +n11976314 +n11785276 +n12386945 +n04445782 +n10712374 +n10706812 +n10194775 +n12655062 +n10739135 +n02597972 +n02307176 +n04121342 +n02350670 +n12698027 +n02805845 +n02895008 +n13149970 +n03451365 +n04542595 +n07803895 +n07864198 +n09690864 +n03844550 +n12378249 +n10345422 +n13163553 +n10457903 +n10783539 +n10539015 +n11757017 +n10274173 +n08652376 +n10283546 +n04541777 +n02824152 +n12945177 +n02082056 +n03695957 +n07936015 +n07591162 +n03628071 +n02990758 +n07685118 +n04023422 +n04951875 +n03541393 +n10289176 +n04039209 +n07913180 +n07910799 +n12017853 +n03732543 +n10656120 +n10512859 +n04556664 +n12464649 +n12927758 +n12078451 +n07878145 +n10561320 +n12467592 +n07689217 +n07619881 +n11935187 +n09837720 +n03642144 +n12220019 +n02983507 +n03271260 +n02778588 +n10193650 +n01654083 +n02746978 +n10202763 +n02953552 +n07924366 +n08583554 +n02905886 +n07855603 +n09745834 +n12366053 +n04140539 +n03383211 +n11648268 +n03352961 +n12116734 +n07771539 +n07836077 +n03842754 +n11683838 +n03004409 +n11730750 +n13098962 +n12292463 +n02867592 +n01653026 +n07583865 +n12548804 +n12702124 +n03917048 +n12677841 +n12511488 +n04217387 +n12495670 +n03554375 +n12403513 +n08558770 +n02781764 +n12339526 +n12742290 +n01404365 +n03591798 +n12446737 +n10494195 +n12110352 +n01672611 +n10493922 +n03638623 +n09910840 +n02238594 +n02575325 +n13186546 +n11873182 +n10344774 +n04094060 +n10417682 +n02749169 +n02428089 +n04549721 +n03824284 +n12107002 +n12784371 +n09986904 +n01634227 +n07826544 +n12253487 +n01679005 +n12516165 +n09339810 +n03126090 +n07803408 +n11883945 +n03842276 +n03397412 +n03280216 +n12264786 +n02545841 +n11877860 +n01830479 +n13207923 +n12490490 +n02542958 +n04114719 +n12590715 +n13226320 +n11644872 +n04119630 +n10176913 +n04213105 +n11652966 +n12546420 +n12625823 +n11897466 +n02092173 +n10567613 +n04953678 +n10059067 +n12408466 +n03056288 +n13036116 +n04169597 +n12467197 +n02569905 +n02758490 +n12623211 +n04077889 +n04959061 +n04183957 +n11689815 +n03777126 +n03306869 +n07720084 +n02659478 +n12947756 +n04341288 +n04448185 +n04037076 +n09828988 +n03346289 +n04174705 +n13126050 +n04255346 +n09764732 +n11773628 +n14891255 +n04314107 +n02184720 +n02646892 +n04320598 +n01979526 +n03191451 +n03662452 +n10290422 +n01739094 +n02305636 +n04202282 +n05459101 +n02766168 +n09994808 +n03528100 +n10475940 +n03005619 +n12639168 +n02144936 +n13202125 +n10703221 +n03770834 +n12324056 +n03474167 +n02609302 +n12166929 +n12852570 +n12920719 +n12508762 +n11983375 +n01422450 +n12616630 +n09681107 +n10486561 +n13038577 +n12266644 +n02478875 +n02547014 +n02249809 +n03336742 +n12038760 +n01672432 +n09861287 +n03678879 +n01949973 +n09928845 +n02310149 +n12648693 +n10533983 +n12812801 +n04550676 +n01800633 +n12128306 +n12744142 +n13140367 +n07803213 +n07688265 +n13068434 +n02030568 +n12955840 +n01625121 +n13215258 +n04270576 +n02680638 +n02817251 +n01539272 +n04066023 +n12969927 +n10280598 +n04001661 +n09774167 +n10358575 +n01836673 +n02290664 +n09940725 +n12447581 +n07803779 +n04561965 +n10151261 +n01538362 +n10170060 +n13160365 +n09823287 +n12554729 +n10620212 +n11935027 +n03465605 +n03227856 +n08519299 +n07785487 +n03522863 +n02861286 +n12200905 +n04269502 +n02104184 +n07612273 +n01390763 +n11872658 +n12981086 +n10244359 +n01738731 +n12117235 +n12846690 +n02861658 +n08782627 +n09832633 +n02531114 +n01394492 +n03269073 +n03077442 +n09794668 +n13884384 +n08659331 +n02556373 +n02587877 +n03523506 +n03723153 +n12024805 +n13061172 +n03978575 +n07914686 +n13134844 +n12183026 +n03573574 +n03765128 +n03319167 +n01920438 +n07852452 +n07680655 +n03017698 +n12959538 +n04261506 +n01793340 +n03292362 +n12817855 +n03593222 +n01962506 +n12453018 +n04027367 +n12518481 +n09223487 +n07871335 +n03779246 +n09668562 +n01889849 +n02492356 +n07830841 +n03277149 +n09968652 +n03092476 +n10400205 +n06263202 +n07595368 +n12767208 +n02196896 +n12580012 +n10265801 +n02103181 +n02922461 +n01731277 +n12422559 +n04278605 +n02250280 +n03283413 +n11829922 +n10191613 +n02493224 +n04427559 +n12181352 +n12742878 +n10683675 +n04503705 +n03785142 +n12816942 +n10723230 +n11936707 +n12360534 +n12909759 +n03766218 +n02696843 +n11935877 +n07828156 +n10617397 +n12921499 +n13158714 +n10166394 +n12370549 +n03505015 +n12769065 +n02636550 +n10781236 +n09869317 +n10275249 +n04234763 +n10735173 +n13137225 +n02070776 +n04232312 +n07575226 +n03471030 +n07909954 +n02633677 +n01662060 +n07563642 +n04263950 +n11824344 +n13178707 +n02972714 +n10417288 +n12092930 +n11993203 +n10170681 +n03726116 +n03215337 +n12564613 +n14975598 +n07758125 +n03123666 +n07717714 +n01421333 +n02359667 +n09403086 +n03857026 +n12759668 +n02628259 +n02307515 +n12146488 +n09777870 +n07819303 +n12105353 +n10784113 +n11802995 +n12561594 +n02845130 +n12100187 +n03507658 +n02141611 +n01800195 +n03470005 +n12444898 +n02203592 +n09707061 +n00475142 +n12216628 +n01732093 +n02581642 +n03803780 +n12114590 +n04541662 +n12267133 +n11652753 +n07859951 +n04524594 +n12843144 +n04040540 +n10604880 +n12559044 +n03063834 +n12394328 +n12704513 +n10230216 +n10756641 +n02101670 +n12309630 +n03070587 +n11626010 +n04239639 +n01638329 +n01928517 +n13144084 +n10420649 +n03102516 +n12395289 +n09833111 +n01651285 +n11688069 +n12881913 +n12783730 +n07716649 +n03618678 +n10344203 +n03626502 +n10718665 +n03577474 +n01683201 +n03246653 +n12153224 +n02519472 +n02470709 +n15090238 +n03129636 +n07774295 +n04577567 +n09995829 +n09662038 +n10297367 +n03555862 +n12531727 +n09947127 +n12533190 +n04062807 +n00479734 +n12860978 +n01884104 +n09866559 +n12069009 +n04595501 +n12088495 +n02909053 +n12283790 +n02180427 +n10697282 +n07562881 +n13092078 +n11706325 +n01746952 +n01978136 +n07731436 +n02386746 +n12648424 +n12726357 +n10314182 +n07839172 +n11753562 +n12903503 +n12589687 +n02375438 +n03604763 +n11549895 +n13202602 +n12304420 +n10738215 +n12220829 +n10095420 +n12177455 +n11887476 +n04006411 +n09838370 +n02853218 +n12688372 +n03335461 +n02800940 +n03036701 +n09885059 +n10206629 +n11922926 +n01678657 +n12192132 +n12248141 +n03108624 +n01936671 +n02417242 +n03222857 +n03768823 +n04343511 +n03538817 +n12655726 +n12521186 +n01330497 +n12767423 +n12965951 +n09695132 +n04410886 +n12599874 +n07865700 +n07596160 +n10227698 +n03224490 +n11598886 +n02948293 +n09906293 +n12247963 +n03301175 +n03895170 +n04259468 +n07808806 +n13147689 +n09856827 +n13882639 +n02241008 +n03842585 +n02883101 +n12182276 +n13918717 +n12728164 +n10634464 +n02477187 +n03107716 +n02342250 +n01479213 +n12793695 +n09808080 +n10707707 +n04161010 +n02836607 +n10076483 +n07726386 +n03872273 +n10250712 +n07688412 +n13884930 +n12301766 +n10196404 +n07591330 +n03814727 +n09610255 +n12757115 +n09814381 +n02397987 +n07886317 +n03959123 +n02185167 +n03533845 +n11838413 +n10227393 +n07704305 +n03580615 +n02663485 +n10101981 +n04346855 +n10067011 +n04464125 +n02829510 +n10007995 +n07845775 +n03004713 +n02450561 +n09905530 +n10361060 +n12394638 +n12095934 +n10479135 +n03145277 +n12246773 +n13194212 +n04475900 +n03252787 +n14867545 +n10485298 +n09961739 +n02149653 +n01553762 +n03931980 +n02344408 +n11676850 +n04034367 +n04235646 +n12867184 +n12625670 +n12763529 +n07593107 +n04351550 +n02571810 +n13899735 +n03652826 +n09495962 +n03421768 +n04205062 +n11918808 +n07745197 +n07752264 +n01892744 +n04609811 +n10278456 +n11790936 +n09754152 +n13234519 +n09820044 +n00440643 +n02350357 +n03779884 +n07803992 +n03305953 +n01836087 +n10068234 +n10690421 +n03134394 +n12380761 +n12801966 +n03134232 +n02596720 +n07591236 +n11882821 +n02312175 +n02387983 +n01912152 +n10805501 +n12718074 +n03188290 +n02776505 +n10528148 +n09971385 +n10524223 +n09958292 +n02721813 +n10300829 +n12007766 +n12107191 +n04449700 +n02987950 +n11878633 +n12328801 +n04551833 +n10567722 +n11654984 +n02808968 +n12066451 +n02964075 +n11633284 +n02434712 +n03070854 +n07926540 +n01543936 +n10091861 +n09938080 +n11976511 +n03342432 +n12886831 +n12509993 +n12958261 +n12730776 +n10066206 +n07846014 +n13176714 +n03332591 +n04607640 +n02513727 +n12138248 +n11964848 +n01318053 +n10553140 +n07839055 +n02632039 +n11865429 +n02286654 +n02367812 +n12093885 +n10774329 +n02296912 +n01729672 +n10353928 +n12033504 +n11936113 +n03263338 +n07822053 +n09737050 +n13875884 +n13212559 +n11690088 +n05468739 +n09344724 +n02507148 +n01377694 +n04172607 +n10464870 +n07804152 +n02825872 +n03139640 +n11858703 +n10227490 +n12334153 +n03616225 +n12018188 +n12399656 +n10235269 +n11840764 +n01995514 +n03326475 +n12704041 +n10684827 +n03006788 +n13906484 +n02868240 +n03614887 +n03491724 +n12124172 +n03675907 +n13170840 +n03983712 +n03254737 +n07836269 +n01784293 +n02095212 +n12470512 +n12219668 +n12920521 +n04492157 +n02950018 +n01922717 +n11797981 +n12601805 +n02744961 +n07814925 +n09798096 +n03939062 +n13891547 +n07564292 +n01590220 +n09295210 +n03997875 +n03479266 +n01491661 +n03781055 +n12528768 +n10657306 +n12014923 +n10094320 +n02532272 +n02224023 +n04541136 +n12067672 +n02661473 +n04233027 +n12399899 +n12889412 +n01736032 +n12551173 +n01337734 +n10104487 +n02921592 +n02148512 +n10216403 +n03276839 +n01781570 +n03999621 +n02505238 +n12537569 +n10433452 +n02351343 +n12365158 +n08539276 +n01897257 +n12221801 +n10557246 +n10437698 +n01803641 +n11836327 +n07813833 +n03468570 +n06277025 +n10040240 +n03692842 +n03017835 +n01881564 +n10487363 +n07937069 +n10597505 +n01638722 +n10160412 +n09825096 +n12611640 +n03098515 +n10654211 +n13196234 +n03436990 +n04058486 +n09814567 +n10758337 +n03515934 +n07688757 +n10269199 +n12627347 +n04521571 +n01636510 +n03220095 +n09982525 +n12768809 +n02340930 +n02473857 +n12336586 +n12125584 +n02833040 +n02498153 +n01467804 +n12120347 +n11650430 +n11953339 +n12592058 +n05102764 +n10575594 +n09722064 +n01966586 +n10619888 +n07852376 +n12650915 +n10321882 +n11974557 +n09847267 +n13201423 +n12337131 +n13185658 +n02150134 +n10538853 +n10471732 +n07836600 +n03526062 +n02512752 +n04232437 +n03367321 +n04308915 +n07600895 +n11539289 +n03539293 +n12699922 +n07817599 +n02781213 +n03594010 +n12035907 +n04075813 +n05233741 +n07863229 +n10735984 +n12095543 +n12272735 +n04229620 +n12240965 +n07768590 +n04420024 +n12111627 +n02861509 +n02595056 +n12183452 +n04607982 +n13213577 +n07741888 +n03750614 +n10043024 +n03372933 +n10051861 +n10199251 +n03249956 +n03984125 +n02956393 +n11619687 +n03356279 +n07833951 +n10715030 +n02340358 +n10768272 +n01494041 +n02592734 +n03323319 +n02136285 +n03995661 +n09945223 +n03547397 +n10044682 +n12878784 +n02803809 +n13160254 +n12726902 +n12196954 +n03161016 +n03105645 +n04218921 +n09493983 +n10719036 +n12263588 +n12565102 +n10684146 +n03148518 +n04287986 +n02340640 +n04331443 +n10727016 +n03369407 +n07824863 +n07844786 +n12467433 +n07582811 +n02964196 +n02197877 +n10758445 +n03271376 +n13212175 +n03260504 +n12777778 +n11973634 +n05467054 +n11946313 +n02462213 +n13906669 +n10520286 +n02074726 +n01771100 +n13880199 +n09811568 +n13883763 +n02334728 +n11831100 +n12025220 +n12751172 +n03858837 +n10127186 +n12831535 +n07823591 +n02513805 +n03662301 +n09913329 +n02749670 +n10655986 +n01787191 +n03199488 +n12732252 +n12253664 +n07735294 +n03440876 +n09650839 +n03844965 +n10341446 +n12688187 +n12961242 +n03423224 +n13157346 +n09802951 +n11948044 +n03489048 +n12279060 +n03664840 +n03731882 +n07742605 +n07870734 +n03949761 +n10759331 +n07739923 +n02737351 +n01788291 +n11780424 +n03722646 +n12297110 +n12363768 +n04495310 +n10008254 +n03934890 +n01318478 +n03609959 +n10070377 +n04123228 +n13068735 +n02909706 +n10671042 +n10491998 +n07650792 +n12664710 +n10213034 +n03455642 +n10411867 +n09903936 +n10121800 +n02622955 +n03647423 +n07596566 +n09654898 +n12248780 +n02684515 +n04255670 +n06273890 +n03495941 +n12960552 +n09724234 +n03861048 +n03293095 +n11835251 +n12852428 +n04084517 +n01814620 +n13159890 +n03147156 +n02311748 +n10237799 +n07584859 +n01946827 +n09651968 +n12241192 +n03669245 +n07858336 +n11932927 +n04444218 +n10526534 +n03642573 +n09470222 +n10731732 +n12001924 +n03786096 +n01359762 +n03824999 +n13877667 +n10591811 +n10574311 +n03275125 +n11631985 +n10539160 +n10502950 +n12499757 +n12432707 +n12068615 +n07689624 +n02610373 +n03204436 +n13051346 +n13134531 +n07610890 +n04021164 +n03502897 +n02299378 +n10417843 +n10050043 +n07929940 +n02593453 +n10577820 +n12870225 +n03333851 +n09463226 +n11741575 +n09193551 +n12012510 +n11987349 +n09215023 +n07924655 +n10060075 +n11999278 +n03933391 +n02602059 +n11993444 +n02337902 +n10149867 +n04441093 +n02868429 +n10629647 +n04192361 +n12029039 +n02768433 +n12078747 +n12730143 +n03255167 +n12492900 +n01709876 +n09672725 +n07870620 +n02315821 +n12277334 +n12204730 +n07852712 +n01319685 +n07802246 +n13031193 +n00812526 +n09658815 +n11982939 +n04264485 +n07893425 +n04094438 +n03285730 +n13182338 +n10724570 +n07832741 +n13210350 +n10654015 +n04058721 +n07875086 +n03462747 +n03994417 +n02889856 +n11957514 +n10109443 +n10478462 +n03064562 +n02477782 +n11920998 +n02138169 +n04227787 +n11797508 +n10753339 +n12928307 +n11921792 +n12643688 +n01833112 +n03919808 +n09817386 +n01903498 +n03848033 +n12031547 +n01035504 +n12324906 +n01911063 +n02588794 +n03749634 +n03539754 +n02242455 +n03079616 +n03246312 +n09705671 +n07860629 +n10458356 +n10051761 +n09709531 +n02867401 +n12522678 +n13150378 +n04462576 +n03462315 +n03712981 +n07607027 +n10581648 +n02957427 +n04271793 +n02253913 +n12824735 +n11697802 +n02161588 +n12463975 +n02361090 +n09784564 +n09680908 +n03512452 +n13214217 +n10712690 +n04023119 +n07814007 +n09833751 +n12885265 +n02259987 +n11933903 +n03628831 +n11967142 +n02533545 +n03900301 +n07919787 +n12793886 +n10768148 +n03071552 +n02780315 +n12193665 +n03378442 +n04486616 +n07832307 +n03164192 +n12786273 +n04261868 +n12655351 +n12320414 +n04371979 +n10630093 +n13052014 +n01357328 +n07879821 +n09753348 +n03796974 +n11701302 +n11678299 +n04022434 +n11610823 +n07726009 +n04117639 +n10474343 +n11888061 +n01842788 +n10435251 +n03343047 +n03383378 +n12750767 +n09662661 +n05241485 +n10000459 +n12220496 +n02246941 +n12676370 +n02253264 +n07766409 +n02940289 +n12089320 +n10363573 +n12922119 +n09783537 +n11695285 +n12331066 +n12573647 +n10218164 +n12509821 +n07862946 +n12818601 +n02589316 +n13191620 +n03758992 +n12112337 +n10733820 +n02898093 +n02645953 +n10150794 +n04595762 +n02344918 +n13132756 +n12859153 +n12138444 +n04211001 +n12935166 +n07830493 +n10142166 +n11951820 +n03018848 +n01453742 +n11985321 +n10000294 +n01362336 +n02328009 +n12639376 +n03090437 +n02204249 +n04312916 +n13127666 +n09684082 +n03432509 +n10274318 +n09704057 +n07593972 +n10074249 +n13157971 +n01638194 +n04036963 +n11708857 +n03418749 +n12589458 +n11899762 +n07683138 +n01601410 +n07854707 +n04279063 +n03239607 +n10302700 +n12520406 +n12576451 +n03881534 +n07565608 +n02349390 +n12569851 +n12249294 +n04059399 +n03530189 +n09357346 +n04325208 +n13159691 +n04045941 +n13898315 +n11992479 +n02353411 +n07825496 +n12922458 +n03115014 +n11761836 +n03323211 +n02793296 +n03492087 +n05241662 +n05491154 +n10419630 +n04506895 +n10546428 +n02907296 +n10769459 +n11647868 +n13188462 +n03825442 +n13209460 +n10742005 +n07599242 +n12361754 +n04570532 +n04131811 +n07756499 +n02598134 +n01910252 +n02910701 +n10129338 +n13871717 +n12673588 +n12565912 +n07562172 +n02711237 +n10775003 +n07695410 +n02637179 +n12930951 +n10261211 +n02906963 +n01366700 +n10642705 +n09846586 +n02779719 +n04978561 +n01369358 +n12114010 +n03521771 +n10667709 +n02296612 +n10722029 +n03500557 +n01365474 +n10472447 +n07585644 +n07609316 +n04013060 +n04505888 +n09726811 +n12692160 +n12378963 +n03585551 +n13139837 +n10167565 +n03799375 +n11990920 +n09640327 +n04502989 +n10108832 +n10561736 +n01897426 +n11766189 +n12462582 +n12913524 +n02684356 +n13200542 +n10466198 +n04331892 +n01478969 +n07837234 +n07692248 +n04552097 +n12382875 +n01484447 +n04120695 +n12681376 +n10293861 +n11965962 +n11788039 +n03959227 +n01832813 +n09918867 +n09942697 +n07587206 +n10459882 +n01347583 +n02267208 +n03951453 +n03006903 +n12126736 +n10286749 +n03395401 +n04605057 +n03467887 +n12755559 +n04020744 +n11629354 +n01647033 +n02780445 +n10205714 +n09439032 +n03138128 +n02763083 +n07835547 +n12251278 +n11949857 +n01635480 +n10675142 +n07845335 +n07751977 +n10332110 +n11871496 +n11764814 +n12229651 +n07760297 +n09865672 +n02919308 +n12218490 +n03782929 +n12231709 +n11909864 +n03144982 +n11799331 +n10433610 +n10483395 +n03206023 +n05442594 +n03626418 +n07870478 +n10171456 +n11964446 +n12796849 +n02126317 +n03797062 +n01412694 +n07610746 +n03581897 +n04479526 +n12447891 +n11906514 +n09699642 +n12873984 +n10586903 +n13234114 +n02436353 +n11889205 +n01460303 +n04400899 +n11884967 +n02140491 +n12215824 +n03586911 +n01394040 +n10691937 +n12371704 +n09668988 +n04362624 +n01740885 +n01337191 +n09714120 +n02185481 +n08555333 +n10704238 +n12430471 +n12034594 +n10012484 +n12088909 +n03205903 +n04129490 +n13090018 +n10712474 +n12234669 +n13016076 +n00454855 +n13882713 +n02644817 +n03192907 +n03519226 +n01561181 +n04583967 +n11732052 +n10732854 +n04480303 +n07934908 +n03825673 +n10621294 +n04354387 +n03374102 +n02922159 +n13158815 +n04000716 +n09685806 +n04427216 +n12051514 +n09712967 +n12081649 +n09748889 +n03252231 +n10704886 +n12897118 +n12525168 +n11728769 +n02731251 +n02548884 +n12403276 +n09627807 +n08679167 +n09663999 +n04247440 +n07711683 +n09909929 +n03415868 +n05244421 +n07680416 +n12757668 +n11935794 +n03483086 +n01860864 +n10755164 +n03675076 +n12004987 +n07566092 +n04078955 +n03379719 +n01916588 +n10138369 +n09755893 +n03649003 +n03977430 +n02309120 +n10616578 +n12242850 +n12388293 +n03292085 +n09919061 +n10302576 +n01497413 +n01936858 +n01377278 +n04358256 +n02667693 +n12125183 +n07758582 +n07813324 +n09737453 +n12745564 +n03855464 +n03166685 +n01446152 +n09801102 +n10561222 +n10576818 +n13915209 +n10474446 +n03845990 +n04237174 +n12531328 +n07855812 +n10763245 +n04614505 +n07905770 +n12051792 +n12653633 +n03593862 +n10359659 +n10436334 +n07853125 +n12911264 +n12265083 +n03638014 +n04444121 +n02706221 +n10563711 +n07808166 +n11799732 +n04093915 +n10451858 +n04410760 +n10075299 +n12740967 +n12635359 +n09611722 +n12902466 +n13915305 +n05542893 +n04440597 +n03675445 +n12315245 +n10646032 +n10047199 +n12775717 +n10365514 +n10590452 +n11616260 +n02812342 +n07856756 +n04570416 +n03565991 +n12215210 +n04330896 +n02388588 +n02266269 +n10760199 +n14714645 +n02742070 +n03565710 +n12609379 +n03420935 +n03441465 +n00453631 +n01963479 +n04362972 +n09863936 +n03961394 +n03009269 +n12297280 +n04561010 +n12192877 +n02981565 +n12134695 +n07855413 +n03232815 +n10180791 +n09932788 +n10571907 +n02109256 +n02660091 +n07865788 +n13228536 +n10306279 +n02635580 +n03634899 +n10262343 +n12296929 +n04393301 +n06281175 +n04485586 +n13103660 +n10510974 +n04166436 +n01634522 +n07596362 +n12700357 +n08597579 +n11744011 +n12238756 +n01790171 +n04571800 +n11867311 +n03464467 +n12241880 +n09961605 +n12592544 +n03170459 +n09938991 +n02692680 +n10295371 +n04331765 +n02612167 +n02520810 +n11977887 +n04094608 +n07722390 +n07832202 +n12448361 +n04612159 +n12186352 +n13161151 +n12654227 +n09868899 +n10104756 +n09920106 +n12981301 +n02610980 +n12545865 +n10673296 +n04110841 +n01704626 +n04055700 +n12117912 +n10519126 +n12443736 +n01697978 +n02148088 +n03012644 +n12091697 +n10395390 +n10509810 +n10462751 +n02896949 +n03836602 +n03928994 +n07718195 +n02473983 +n08571642 +n02648916 +n11970298 +n06274292 +n04613158 +n09856401 +n12811713 +n13111340 +n12122442 +n10095265 +n04445610 +n11631619 +n07863644 +n12022821 +n10315217 +n12549799 +n03386343 +n03121040 +n03558007 +n12272432 +n11798496 +n02522866 +n02952935 +n10741493 +n12143065 +n07883156 +n09616573 +n02289988 +n13161904 +n02588945 +n00451768 +n12375769 +n10777299 +n04495183 +n11930994 +n09970088 +n02254246 +n12276314 +n07857598 +n04428382 +n03789794 +n03383821 +n12980080 +n01447139 +n12880799 +n03501520 +n10764465 +n13143285 +n12727729 +n12444095 +n02354621 +n13174354 +n01691652 +n07732525 +n10437014 +n04368235 +n10371052 +n02611898 +n03597147 +n09912431 +n03135788 +n07888058 +n02409202 +n14582716 +n11934463 +n04395332 +n12558680 +n05257967 +n11798978 +n10617024 +n04102760 +n12132092 +n12988572 +n10390698 +n11887310 +n12063211 +n12952717 +n13141972 +n12176453 +n10245863 +n10509161 +n10389976 +n10333165 +n01474864 +n09274305 +n11888424 +n10368711 +n13222877 +n10469611 +n07582970 +n09700125 +n12805762 +n07865575 +n07853852 +n03628421 +n04482975 +n03099622 +n01349735 +n11943133 +n12736603 +n12197601 +n10597745 +n04418644 +n12689305 +n07755262 +n10598459 +n04312020 +n03195485 +n09776642 +n10596517 +n10223606 +n01923890 +n12703716 +n03465040 +n12372233 +n12528109 +n03571853 +n10802621 +n10204177 +n02320465 +n03976105 +n02214096 +n02148991 +n10377542 +n10697135 +n03538542 +n07582027 +n04517999 +n12180456 +n02838014 +n03977266 +n03818001 +n12191240 +n11648776 +n10773800 +n04475496 +n03945817 +n04682018 +n02994743 +n02787269 +n11650160 +n03834472 +n03389983 +n09797742 +n06209940 +n12525513 +n12672289 +n01893164 +n10710259 +n01892145 +n11773408 +n10554024 +n09864968 +n10699752 +n11631405 +n10414768 +n04430605 +n10742546 +n10738871 +n12857204 +n09309046 +n01724840 +n04123317 +n07881525 +n03868044 +n02140268 +n10708292 +n09838295 +n09797998 +n10710171 +n11814996 +n11938556 +n03543511 +n02151230 +n01515217 +n03533392 +n02039780 +n12810151 +n02335231 +n12152251 +n13225617 +n09801275 +n01978587 +n14821852 +n11742878 +n12679023 +n03521431 +n09679028 +n02021281 +n10784544 +n04421258 +n12492460 +n03720005 +n02541257 +n03889397 +n02888898 +n10659762 +n12045157 +n12712320 +n10369095 +n09721444 +n12769318 +n01703161 +n12697514 +n07836456 +n03905361 +n10660883 +n07769306 +n11893916 +n07846274 +n04110281 +n03655470 +n07740744 +n01363719 +n12540647 +n09896311 +n12842642 +n07755619 +n07754155 +n11548870 +n02868546 +n04215588 +n04288165 +n13201566 +n07721118 +n12018271 +n11903333 +n02909165 +n02662559 +n11658709 +n13063514 +n07725663 +n10179069 +n10776887 +n12637485 +n03814528 +n12542043 +n07833333 +n07820036 +n02746683 +n07925808 +n10349750 +n03154316 +n04155625 +n03232923 +n02116185 +n09998788 +n02821543 +n03410303 +n10656223 +n07916582 +n12880638 +n10408809 +n04612840 +n11805255 +n12044784 +n10497534 +n03458422 +n12873341 +n07808675 +n09476123 +n07611733 +n10598013 +n02214660 +n05469664 +n03952150 +n11855435 +n04375926 +n08523340 +n01642391 +n04007415 +n09756961 +n12891824 +n02894847 +n11698245 +n12906771 +n02894024 +n04131015 +n11882636 +n04386456 +n03291551 +n07837110 +n12462221 +n08540532 +n10299875 +n12705978 +n10448322 +n10487592 +n12175598 +n02272552 +n03833907 +n10383237 +n12758176 +n12729950 +n10061195 +n07816726 +n03241903 +n12239880 +n10380499 +n07855188 +n10207077 +n02770078 +n12961393 +n03778459 +n10734741 +n03485575 +n09958447 +n12337246 +n11830045 +n09866354 +n03209666 +n01470145 +n10395209 +n03872016 +n04267091 +n12888457 +n12104104 +n04088229 +n01964957 +n12002651 +n02503756 +n00481938 +n01908042 +n03378765 +n04193883 +n09862183 +n11861487 +n02520525 +n02081060 +n10386754 +n12693865 +n04514095 +n01325060 +n02460817 +n07568095 +n03651605 +n02561937 +n12844409 +n12888016 +n02974565 +n12439154 +n13018906 +n12071259 +n03897634 +n02863176 +n10603528 +n03493911 +n12887532 +n12944095 +n12794568 +n09980458 +n03503567 +n11783162 +n13123309 +n11729860 +n03702582 +n04280373 +n10086744 +n01790557 +n12627526 +n10552393 +n12092629 +n03888998 +n12751675 +n01442450 +n02479332 +n07726230 +n03642341 +n03142325 +n06263895 +n12088327 +n09703344 +n10528493 +n02820085 +n07737594 +n04090781 +n09901642 +n02328942 +n02724722 +n09866115 +n12658715 +n10481167 +n13135692 +n11850918 +n10205344 +n12361560 +n03698123 +n03284482 +n12106134 +n04441528 +n02591613 +n02581108 +n07856186 +n12197359 +n12900783 +n01725713 +n12012253 +n03907475 +n02170738 +n03694949 +n13238654 +n04611795 +n02782432 +n13191148 +n02741367 +n04170694 +n12770892 +n01973148 +n10080508 +n10161622 +n09808591 +n07912093 +n02059541 +n02779971 +n03857156 +n12945366 +n03055159 +n12758325 +n10067305 +n02597818 +n07808352 +n13147153 +n10679723 +n02271222 +n04012665 +n12942729 +n10349243 +n01377510 +n07800636 +n10654321 +n10219453 +n09961469 +n10732521 +n04479405 +n11632929 +n03856728 +n08658918 +n10327143 +n10754281 +n02085118 +n09691604 +n09952163 +n10082299 +n03872167 +n03733465 +n04138869 +n01425223 +n12066821 +n02177506 +n09892262 +n02896694 +n12983654 +n13224922 +n09658921 +n12744850 +n03639880 +n02943686 +n10660621 +n11936539 +n03698226 +n04519536 +n12392765 +n09319604 +n07567039 +n04160261 +n01802159 +n02838178 +n07746910 +n02266421 +n10240417 +n12542240 +n12550408 +n01445857 +n04132465 +n03569014 +n12666050 +n12362514 +n10676569 +n09702673 +n12885510 +n04447156 +n04396226 +n12240150 +n11639306 +n02249134 +n01340785 +n02833140 +n10027590 +n02142407 +n11996251 +n07874531 +n04340019 +n03166120 +n10420277 +n04465203 +n12738259 +n12831141 +n03998673 +n01385017 +n12842519 +n02587051 +n10753061 +n12505253 +n13906936 +n01989516 +n12640435 +n07852532 +n04243142 +n10261511 +n12853287 +n12239240 +n03973003 +n09983889 +n10345302 +n14804958 +n02354162 +n03049326 +n10443659 +n01318660 +n12787364 +n04253304 +n11941094 +n09283514 +n09393524 +n11865574 +n01531639 +n04409279 +n02859729 +n10712835 +n03694196 +n04343630 +n10331098 +n12929600 +n02826259 +n10171219 +n07735179 +n07594840 +n03709644 +n09950728 +n09859285 +n07718329 +n01418620 +n09858299 +n12395068 +n10011360 +n07763290 +n02643316 +n03596099 +n04422566 +n11958888 +n09650989 +n10318686 +n01333082 +n12886402 +n03781467 +n12667582 +n02923535 +n09988311 +n08663860 +n02508346 +n13885011 +n03939281 +n10772937 +n04485750 +n09871952 +n10291942 +n07759324 +n10174971 +n03666238 +n01937579 +n02308033 +n07847706 +n10371330 +n04124887 +n11853079 +n11941478 +n12647231 +n04601041 +n12718483 +n02902816 +n01941340 +n04066767 +n07617839 +n02254901 +n03488784 +n07834774 +n02524659 +n03367969 +n10783734 +n03422484 +n09776807 +n03970363 +n10131590 +n03433247 +n02622712 +n10206506 +n12061104 +n11936287 +n07874674 +n10061043 +n07828275 +n03764606 +n12236768 +n01826844 +n09741904 +n05454978 +n03591592 +n01441272 +n03736372 +n07585474 +n12762405 +n12943912 +n01894522 +n03218446 +n11846425 +n11689678 +n04147916 +n02375862 +n10409459 +n09287415 +n10113583 +n03261263 +n02817386 +n09869578 +n10550252 +n02532786 +n12031388 +n07937344 +n11612235 +n01571410 +n09402944 +n04234670 +n02603862 +n04196925 +n09999135 +n10468750 +n15093049 +n03003633 +n11650307 +n12312110 +n02525703 +n10501635 +n09751622 +n10114550 +n10103155 +n12829975 +n04004099 +n12419878 +n02082190 +n03328201 +n03093427 +n07845571 +n12655498 +n02558206 +n12563045 +n07573453 +n12324558 +n13016289 +n10601234 +n10310783 +n03531691 +n02135610 +n03168543 +n09985978 +n10615334 +n07839312 +n09985809 +n10142537 +n10417969 +n07869111 +n12514992 +n04327544 +n10326776 +n12583681 +n01476418 +n12840168 +n03852544 +n11713763 +n07824502 +n07858841 +n12256325 +n03036149 +n07883661 +n04500390 +n10170866 +n01835918 +n10760951 +n10720197 +n12330239 +n02135844 +n10210512 +n03217739 +n10802953 +n03136254 +n02161225 +n03961630 +n12927194 +n02251233 +n13891937 +n09945603 +n02695762 +n12181612 +n13234857 +n10175725 +n11346873 +n07934678 +n02318687 +n10251329 +n04112921 +n04001132 +n03042984 +n11704791 +n04246459 +n12193334 +n10718509 +n10371221 +n05278922 +n03265754 +n12186554 +n12481289 +n10521853 +n10748506 +n11729142 +n10143595 +n09422631 +n07562984 +n07850219 +n04193742 +n11997160 +n12002826 +n12820113 +n04132829 +n10272913 +n03358841 +n12610740 +n12384569 +n10725280 +n02746008 +n13148384 +n12635151 +n02337171 +n10350774 +n12308907 +n04542474 +n04339062 +n03549350 +n10240235 +n10556033 +n10214390 +n01791314 +n02801047 +n07817465 +n11610602 +n10315730 +n14592309 +n10249191 +n12453857 +n12579822 +n09833275 +n04051269 +n11552594 +n04088343 +n04565039 +n03930431 +n10679503 +n11899921 +n10295479 +n01357507 +n13036312 +n03404900 +n12523141 +n01816017 +n02020578 +n12661045 +n06262943 +n02775813 +n12921315 +n09751076 +n09834258 +n10585628 +n12885754 +n04411019 +n10342367 +n10368798 +n09672840 +n12729023 +n04578329 +n10325549 +n03680248 +n11920663 +n10416567 +n10011486 +n01643255 +n03193754 +n07823814 +n04055447 +n10660128 +n07765612 +n07612530 +n04205613 +n09677427 +n03989199 +n11100798 +n12721122 +n10000787 +n10382157 +n07724819 +n12928819 +n11631159 +n02608996 +n10516527 +n09703101 +n12290975 +n03470222 +n03810412 +n03729131 +n03356038 +n12692024 +n12614625 +n10789415 +n02333819 +n01722670 +n03885410 +n12038208 +n02294097 +n02608860 +n02500596 +n07909231 +n03254625 +n09681973 +n12221368 +n01893399 +n10025295 +n03194812 +n13181406 +n12249122 +n03447894 +n09795010 +n02187900 +n10139651 +n10631654 +n01792530 +n02569631 +n07853946 +n09907804 +n03263758 +n04214649 +n02450829 +n02431542 +n11998492 +n02651060 +n04101860 +n01806061 +n13901423 +n12903964 +n03968479 +n04268565 +n12601494 +n02083780 +n04570118 +n12247407 +n03337822 +n09878921 +n02369935 +n10022908 +n09667358 +n13160938 +n11937360 +n07741623 +n03705808 +n12241426 +n10478118 +n03805933 +n10343869 +n09391774 +n03482128 +n10357737 +n10334461 +n09675045 +n09662951 +n10174253 +n01815270 +n13873361 +n04432785 +n09778927 +n10671898 +n05571341 +n10033572 +n09864632 +n10618465 +n03437184 +n12786464 +n01723579 +n11798270 +n07742415 +n02143142 +n10548419 +n03695122 +n02518622 +n04605446 +n10218292 +n11832671 +n12646950 +n03382708 +n09844898 +n09674786 +n01472502 +n07616906 +n09763272 +n03982767 +n10005006 +n03059236 +n01816474 +n03725869 +n01979269 +n04226322 +n13236100 +n03920384 +n11852148 +n04373563 +n04324120 +n11686652 +n03036341 +n02142898 +n09783776 +n13147918 +n03465320 +n07855721 +n10336411 +n10438619 +n07750299 +n12237152 +n03559373 +n10077106 +n10169796 +n09828403 +n09959658 +n12464128 +n12934685 +n04221673 +n02617537 +n11689367 +n10180580 +n07813717 +n12529905 +n02340186 +n01400247 +n11749112 +n04404072 +n03135656 +n12098827 +n12481150 +n10023506 +n03500838 +n01564101 +n04009923 +n10023264 +n03908456 +n03206405 +n07590068 +n09958133 +n10755394 +n01423617 +n11511327 +n10536274 +n01965252 +n11549245 +n11935627 +n09635635 +n03752071 +n07585997 +n03147084 +n12666159 +n09748408 +n03796848 +n01501948 +n02345078 +n12430675 +n03103128 +n11710987 +n03393199 +n09233603 +n10465002 +n04298765 +n01351170 +n02720576 +n03966582 +n10643837 +n12420124 +n10793799 +n01652297 +n09281252 +n11983606 +n10222497 +n11832899 +n02391617 +n12434106 +n03987674 +n02140179 +n07896560 +n04325804 +n10647745 +n01924800 +n10156629 +n03545961 +n03906789 +n01890564 +n10699558 +n12332218 +n03247495 +n11839460 +n03527675 +n12586725 +n13208965 +n02714315 +n02750320 +n04615149 +n12679876 +n12863234 +n03304323 +n12139793 +n11922755 +n12321669 +n04979307 +n01921059 +n09657206 +n13042134 +n04045787 +n11700279 +n02337598 +n01415920 +n01400391 +n13207572 +n10785480 +n02515713 +n12018100 +n02634545 +n03292736 +n02881546 +n12655605 +n03105810 +n10545792 +n03894933 +n09796974 +n10320484 +n12308112 +n11549009 +n13047862 +n14941787 +n12379531 +n10540252 +n11696935 +n12184468 +n12851860 +n12908854 +n10586265 +n12369066 +n10426630 +n12523850 +n03916289 +n04538878 +n09908769 +n02828115 +n07560422 +n10266016 +n03569174 +n06423496 +n10495167 +n03617834 +n09327538 +n10195056 +n10508379 +n13031323 +n11659248 +n04242315 +n10742111 +n10700963 +n12032686 +n09877587 +n07825597 +n07568991 +n11736362 +n12169099 +n13103750 +n03263640 +n12248941 +n10665302 +n01920051 +n09704283 +n11533999 +n04503073 +n11645163 +n10639817 +n09920901 +n06340977 +n03251100 +n10378113 +n03226090 +n10131268 +n02877513 +n13191884 +n02787120 +n11709045 +n02740061 +n12323665 +n02831998 +n10342180 +n12716594 +n04498275 +n09905050 +n03745487 +n07642833 +n10294020 +n10211666 +n12205460 +n02981198 +n01642943 +n07679140 +n04390483 +n10432875 +n09214269 +n10792506 +n10243483 +n13099833 +n10221520 +n13177768 +n04091584 +n10672540 +n10200246 +n13889331 +n02345340 +n10237556 +n01833415 +n01335218 +n09804230 +n09957523 +n05235879 +n10070449 +n10308653 +n10721708 +n04312654 +n10394434 +n12201938 +n12434775 +n07601025 +n02672152 +n10157271 +n02635154 +n12572858 +n13182937 +n10160188 +n03396997 +n10344656 +n02968210 +n10190516 +n07684422 +n03706939 +n07618871 +n02290870 +n03817331 +n03275311 +n12698774 +n04375080 +n07837630 +n04314216 +n11833373 +n07618684 +n03742238 +n12532886 +n03712444 +n11750989 +n10038620 +n09617577 +n03807334 +n10108089 +n01816140 +n10715347 +n02648035 +n13127303 +n02809491 +n02430748 +n12235479 +n01451863 +n01514926 +n10010864 +n01913440 +n09660240 +n11806369 +n01470479 +n12655245 +n07655067 +n03436772 +n11778092 +n03951800 +n10277815 +n07931733 +n01479820 +n03576955 +n07609549 +n12568649 +n05263316 +n02636405 +n01384084 +n03298352 +n07617344 +n09987045 +n10573957 +n07801709 +n02589062 +n02534165 +n02748359 +n09607782 +n07590974 +n02199170 +n02696569 +n09678747 +n12795209 +n13176363 +n10663315 +n10588724 +n09772330 +n10174589 +n12366313 +n11883628 +n07617447 +n01334690 +n03168663 +n11764478 +n08599174 +n03942028 +n12153033 +n03448696 +n12096674 +n10037588 +n03548320 +n09760290 +n10374541 +n09653438 +n10294139 +n10276942 +n12279293 +n12764507 +n12803958 +n10764622 +n02140858 +n07599068 +n10245507 +n12351790 +n12818004 +n10118301 +n03945459 +n09912995 +n12176709 +n03873996 +n10339179 +n10614507 +n10114662 +n10784922 +n03821424 +n04959230 +n13015509 +n12573911 +n11948469 +n09775907 +n12758014 +n01780142 +n09956578 +n12165384 +n10088200 +n10382480 +n04131113 +n09930628 +n09784160 +n11750173 +n13064111 +n03817522 +n12662074 +n03176238 +n12310021 +n11679378 +n09961331 +n02385580 +n11904274 +n03113505 +n10244913 +n02836900 +n09986700 +n11963572 +n13158605 +n10321632 +n02179891 +n02189670 +n10097995 +n10774756 +n10783240 +n10605737 +n02530052 +n10386196 +n10184505 +n09788237 +n03589672 +n12509109 +n10658304 +n12966804 +n12559518 +n03189311 +n01451295 +n12179632 +n12301613 +n10496489 +n03402785 +n10244108 +n02385676 +n03552001 +n03092053 +n02313360 +n02547733 +n02109391 +n01327909 +n04574606 +n03060728 +n07840124 +n10567848 +n10062176 +n02703124 +n10804732 +n12699301 +n04515890 +n07919665 +n10457214 +n09663248 +n03165955 +n12988341 +n03987865 +n03031756 +n10277912 +n10172080 +n09325824 +n03198223 +n09605110 +n10113869 +n11603462 +n03352366 +n11930203 +n09769929 +n12979316 +n02579762 +n09953052 +n03105974 +n00476140 +n11598287 +n02830157 +n10512201 +n09746936 +n10668666 +n02919976 +n09993651 +n02149861 +n09705003 +n10389865 +n11655152 +n10010767 +n10070563 +n03688832 +n10590239 +n11936027 +n02939763 +n03163488 +n03171910 +n09955406 +n03266195 +n10217208 +n09338013 +n07594250 +n03215930 +n09725935 +n10592049 +n03732658 +n12498457 +n09966554 +n10668450 +n10361525 +n04060198 +n11936624 +n02602760 +n03942600 +n03708425 +n10020533 +n12067817 +n07590177 +n01891274 +n11837204 +n01419332 +n03860234 +n12616248 +n07834160 +n09867154 +n09788073 +n12222493 +n03388990 +n04245412 +n10182402 +n11675404 +n10450038 +n13045594 +n13158167 +n13082568 +n12052267 +n12707199 +n07810531 +n07914887 +n13127001 +n02573249 +n08619112 +n10471859 +n09919899 +n03635516 +n12067029 +n03352232 +n07765517 +n10519984 +n02742194 +n03062798 +n13124654 +n09958569 +n02370137 +n10121714 +n04019335 +n07732433 +n02559383 +n12585137 +n09729156 +n10744078 +n09954355 +n03078506 +n10062042 +n10688811 +n02668613 +n03142205 +n10347204 +n10518349 +n09898020 +n12563702 +n05468098 +n10116370 +n07838905 +n03127024 +n03545585 +n12801072 +n09940818 +n04480995 +n10466564 +n02606751 +n10032987 +n10771066 +n01587278 +n11852531 +n01455461 +n10397392 +n02349205 +n10180923 +n09778266 +n04366832 +n10051975 +n10538629 +n09865744 +n12554029 +n13118330 +n12952590 +n04187751 +n09924313 +n10062594 +n01980655 +n10028402 +n02567334 +n10590903 +n10265891 +n10739297 +n01457082 +n03437581 +n03713151 +n03475674 +n05464534 +n11863467 +n06592421 +n12491435 +n14914945 +n10279778 +n03388711 +n10483890 +n10612373 +n03332784 +n02332954 +n02952798 +n13041943 +n01607309 +n04356772 +n07711799 +n12670962 +n12229111 +n07878479 +n12401893 +n07772413 +n12138110 +n09781504 +n07902698 +n02750652 +n13042316 +n12400924 +n02304797 +n03066464 +n12852234 +n10155222 +n05541509 +n10711483 +n04210858 +n02835551 +n12859679 +n02935490 +n03540476 +n05279953 +n09807075 +n09617435 +n03566860 +n10549510 +n10025391 +n10754449 +n11927740 +n03554645 +n01837526 +n02656969 +n08648917 +n07860548 +n01452345 +n04021704 +n07783827 +n10080117 +n02187554 +n03214966 +n10036444 +n04291069 +n12407396 +n02170599 +n09896826 +n12417836 +n07845495 +n02749292 +n03061819 +n03682380 +n10756261 +n10369955 +n09692125 +n09978442 +n04277669 +n10539278 +n09703932 +n01879837 +n02746225 +n13159357 +n11763874 +n10540656 +n07933530 +n12987535 +n02371344 +n10654827 +n09723944 +n12775393 +n11856573 +n12626878 +n12716400 +n09903639 +n09784043 +n03906894 +n10775128 +n03124313 +n10396727 +n02841641 +n10211830 +n12283395 +n03490784 +n14175579 +n04027935 +n12396091 +n02609823 +n01414216 +n09880741 +n11976933 +n03073384 +n09270160 +n11768816 +n12073217 +n11597657 +n09994878 +n11756329 +n12579404 +n03161893 +n01451115 +n07736971 +n02949356 +n03878418 +n12653436 +n10626630 +n12777892 +n13061704 +n10498699 +n03609786 +n03199358 +n10776339 +n10762480 +n13179056 +n10113249 +n04029913 +n12640081 +n10493835 +n11683216 +n03524287 +n04585626 +n02969527 +n12976554 +n08569482 +n10204833 +n12442548 +n02577952 +n09357447 +n10202225 +n02198129 +n11882972 +n10404426 +n01600341 +n12016434 +n09867069 +n10576223 +n09893600 +n01702479 +n04274686 +n04406552 +n02848118 +n02258629 +n03260733 +n03685640 +n11751974 +n09967555 +n06274546 +n09649067 +n10681557 +n07606933 +n03110202 +n11982545 +n10803031 +n02679142 +n04086937 +n10514255 +n04506402 +n03884554 +n09970192 +n10117017 +n12642435 +n10186686 +n02097967 +n03956531 +n11834890 +n02677436 +n10040698 +n11796188 +n03348142 +n04168472 +n02294407 +n12483282 +n09429630 +n04423687 +n09819477 +n09755555 +n10157016 +n03344935 +n07762373 +n12871859 +n09853541 +n09875979 +n13050705 +n02251067 +n10637483 +n03823673 +n10357012 +n03424204 +n04431648 +n01475940 +n02339282 +n10248198 +n07683265 +n13150592 +n10359117 +n10096508 +n03473078 +n13052248 +n10743356 +n03710079 +n10634990 +n04507689 +n07921090 +n02352002 +n03924407 +n03609147 +n02837567 +n03406759 +n03909658 +n10286282 +n12135576 +n01912809 +n10801561 +n10717055 +n03473465 +n03761588 +n03144156 +n09474412 +n10253611 +n12549420 +n02499568 +n09910222 +n10431122 +n12699031 +n01697749 +n11786843 +n03888808 +n12089496 +n10066314 +n10302905 +n12696830 +n09965787 +n11969806 +n04066388 +n13080306 +n03913930 +n09968259 +n10490421 +n10714195 +n07570021 +n10343449 +n10401204 +n03472796 +n10779897 +n11787190 +n03503097 +n10439523 +n12123648 +n04279858 +n10511771 +n09755788 +n08253141 +n02616397 +n12248574 +n01645466 +n04334504 +n07729142 +n05451099 +n10503818 +n10354265 +n09707735 +n02633422 +n11999656 +n01324916 +n02088745 +n09354511 +n10705448 +n09756195 +n10136615 +n10427359 +n09702134 +n12600095 +n04122262 +n10791820 +n03330002 +n02713496 +n11710658 +n09664908 +n02550203 +n02349847 +n12835766 +n04098260 +n11536567 +n11686780 +n12875861 +n12758471 +n09806944 +n11810030 +n10400003 +n10098388 +n11663263 +n10559683 +n07833672 +n10753989 +n10643095 +n01988869 +n03112240 +n12911914 +n09979913 +n09785236 +n09790047 +n02676097 +n01653509 +n04601159 +n01938735 +n10748142 +n12978076 +n11990627 +n10437262 +n12972136 +n04077594 +n10148825 +n02269340 +n12886185 +n03608504 +n11677485 +n10612518 +n12267265 +n10649308 +n05458173 +n10650162 +n03213361 +n02747063 +n01611674 +n02322992 +n01554017 +n03512624 +n12773142 +n12747120 +n09902128 +n03162714 +n03924532 +n10299125 +n12378753 +n02778131 +n09976024 +n13093629 +n10778999 +n07721833 +n12232851 +n07876775 +n10097590 +n03194170 +n13029122 +n04573832 +n12859272 +n09639382 +n07688021 +n02878796 +n10751710 +n03633632 +n07762534 +n10779995 +n13914265 +n13093275 +n10729330 +n10433077 +n03663910 +n10499110 +n02272286 +n10371450 +n01967308 +n12633061 +n11659627 +n12982915 +n10344121 +n10268629 +n02697876 +n09879552 +n10167361 +n10719807 +n04042076 +n01632952 +n03243625 +n02125872 +n10105906 +n12194613 +n03149810 +n10721124 +n03947343 +n02020219 +n10122531 +n01315330 +n08647264 +n00452734 +n03607527 +n10010243 +n09863749 +n04473275 +n11782878 +n03585337 +n09655466 +n12989007 +n11711971 +n10716864 +n10475835 +n10704712 +n01894956 +n10568443 +n12881105 +n10387836 +n10403633 +n08645318 +n03500457 +n10377633 +n10108464 +n09933972 +n02618094 +n11798688 +n04155735 +n09780395 +n12822466 +n04302200 +n11899223 +n10633298 +n02760298 +n12142450 +n10803282 +n10769321 +n10514051 +n10597889 +n11837562 +n02261757 +n01458746 +n09830759 +n10003476 +n09817174 +n10738670 +n10118743 +n12096563 +n03054491 +n12155773 +n10439727 +n04170384 +n03223923 +n12632733 +n07845421 +n10062905 +n11831521 +n04267985 +n12796385 +n04154854 +n00444142 +n09778537 +n03115663 +n04385157 +n10109826 +n02337332 +n09996304 +n09880189 +n12871696 +n11823305 +n02516776 +n12377494 +n08511017 +n04421417 +n10765305 +n09675673 +n03488111 +n03076623 +n11829672 +n10292316 +n10758949 +n13031474 +n02829353 +n10090745 +n09186592 +n12736999 +n12715195 +n11684499 +n03168933 +n09890192 +n10596899 +n12527081 +n10496393 +n10497135 +n02137302 +n03266620 +n12958615 +n12664187 +n02633977 +n04262869 +n04215800 +n13133233 +n02392555 +n09858733 +n10186350 +n01715888 +n03142099 +n08573674 +n11687071 +n02690715 +n03146342 +n12331788 +n08079613 +n10609092 +n12943049 +n12234318 +n02312325 +n12618146 +n10135197 +n11705573 +n02794368 +n02850358 +n09464486 +n01993525 +n03187153 +n10097262 +n02976641 +n12198793 +n12941717 +n10219240 +n12434634 +n03827420 +n10437137 +n10342893 +n04174026 +n10265281 +n07757874 +n10765885 +n01470895 +n02349557 +n11716698 +n03765467 +n10227793 +n07824268 +n12994892 +n10486236 +n02974454 +n10718349 +n11726145 +n09909660 +n03378593 +n07805006 +n09875025 +n02645691 +n10223069 +n03722944 +n04389999 +n02544274 +n10239928 +n04456011 +n10382302 +n01552333 +n10082562 +n12952469 +n09883047 +n10442573 +n01891013 +n10690268 +n13111504 +n02287352 +n03567635 +n10331347 +n09762385 +n09933842 +n02369555 +n12291459 +n09919200 +n01492860 +n02067768 +n10713254 +n10550468 +n12846335 +n03835729 +n12467018 +n11676743 +n03629643 +n12987423 +n10655730 +n08678783 +n10349836 +n10087736 +n10246703 +n10338391 +n04585456 +n04158138 +n10500942 +n09850974 +n10791890 +n10020807 +n03315805 +n02752917 +n04033801 +n10492086 +n04427473 +n02940706 +n12110475 +n09832978 +n12515393 +n07800487 +n09848110 +n02659176 +n09967406 +n10536134 +n10760622 +n09736485 +n07830690 +n07835173 +n09814252 +n10311506 +n10341955 +n03869838 +n07760673 +n09970402 +n12526178 +n11687964 +n09968741 +n10719267 +n07851054 +n10116478 +n10599215 +n09951524 +n03855908 +n03997274 +n02986348 +n08599292 +n02474282 +n04155889 +n09983314 +n01987727 +n10280130 +n10404998 +n02294577 +n02998696 +n08586978 +n11652578 +n13867005 +n12663254 +n10524869 +n02287622 +n10220924 +n03279918 +n02626089 +n10291110 +n12820669 +n07861681 +n08643267 +n07720185 +n12555859 +n03225616 +n09769525 +n03295140 +n12489046 +n10615179 +n12150969 +n02888429 +n10753182 +n10267166 +n03675558 +n12693352 +n02378299 +n02788462 +n03622401 +n12236977 +n10730542 +n12758099 +n10502046 +n11937195 +n10366145 +n10307114 +n12984595 +n10128748 +n09362316 +n09789898 +n09654079 +n04260192 +n10114476 +n08623676 +n10331841 +n05265736 +n10269289 +n03090856 +n12764978 +n02825240 +n10358032 +n09825750 +n03062651 +n11196627 +n11825749 +n04148464 +n04439505 +n07572858 +n04561857 +n12904562 +n03643907 +n10723597 +n01492708 +n10071557 +n10140683 +n01739871 +n12984267 +n03072056 +n10772580 +n10462588 +n11936448 +n10494373 +n12845908 +n09793352 +n10717196 +n12577362 +n09779124 +n10663549 +n02286425 +n10380126 +n01890144 +n02751490 +n03361109 +n01781875 +n13128278 +n09994400 +n09883452 +n13881512 +n02833275 +n10362003 +n01376543 +n12366675 +n09984960 +n10173665 +n10673776 +n02057898 +n01934844 +n04057673 +n10018747 +n02916065 +n13024653 +n05539947 +n09648911 +n04150273 +n01393486 +n10411356 +n12232114 +n02436224 +n12757930 +n03095965 +n10555059 +n01577458 +n09666476 +n10598904 +n11656549 +n02591911 +n13092385 +n10506336 +n13103023 +n09658254 +n04095938 +n11936782 +n07824383 +n09781650 +n10240821 +n01780426 +n02850060 +n02863340 +n13914141 +n12138578 +n13034555 +n12291671 +n12133151 +n04515444 +n04591359 +n02589196 +n02689819 +n11740414 +n07610295 +n10246395 +n09921034 +n12447346 +n12641180 +n01419573 +n04242587 +n07760395 +n03399579 +n09866661 +n02549376 +n11861238 +n01588996 +n04319545 +n09789150 +n03288643 +n10312491 +n03353281 +n02345997 +n09711132 +n03043173 +n02558860 +n03703590 +n03188871 +n12589142 +n12113323 +n09987161 +n05242239 +n09686262 +n09780984 +n09668199 +n09716933 +n11675738 +n04459243 +n11833749 +n10646942 +n07760070 +n10286539 +n04469684 +n13030616 +n03939440 +n01725086 +n09967816 +n10500824 +n13026015 +n03983928 +n02936921 +n04115542 +n10245029 +n12105828 +n12452673 +n10498046 +n10737264 +n11766046 +n04079603 +n10072054 +n12569037 +n10153155 +n09867311 +n02806992 +n10258602 +n10164025 +n10520964 +n02258508 +n12199399 +n05266096 +n08496334 +n10351064 +n12441552 +n12878325 +n13102648 +n02980625 +n03462972 +n12395906 +n13022903 +n11895714 +n03324814 +n11318824 +n01728266 +n07883510 +n10731013 +n10181799 +n12142357 +n09671089 +n11531334 +n01718414 +n04573625 +n10390600 +n11553522 +n01314910 +n04227519 +n10514784 +n02944256 +n12103680 +n03081859 +n11655592 +n12569616 +n10700105 +n09755086 +n03865820 +n01456137 +n10442232 +n02900987 +n04491934 +n07849026 +n04519728 +n09986450 +n03305300 +n10186143 +n02879422 +n03018614 +n10747548 +n10562509 +n10068425 +n12593341 +n11937692 +n08679562 +n09613690 +n10646433 +n12251740 +n10994097 +n13048447 +n03848537 +n12153741 +n12614096 +n11654438 +n09985470 +n10562968 +n02923915 +n10740594 +n07802767 +n12514592 +n10335801 +n03878674 +n12586499 +n10255459 +n02413824 +n10312600 +n02616128 +n12644283 +n04238953 +n04526520 +n01898593 +n09737161 +n03372822 +n09781398 +n10339251 +n02502807 +n10198832 +n10679610 +n13136781 +n11974373 +n11680457 +n10083677 +n04037298 +n09945021 +n09987239 +n02708885 +n13107807 +n10130877 +n12507379 +n08651104 +n12116058 +n10135297 +n04269086 +n03858533 +n10477955 +n04394031 +n10442417 +n10074735 +n03618797 +n03460455 +n04374521 +n10756061 +n08517010 +n12923108 +n02362194 +n01704103 +n10062492 +n01394771 +n10473789 +n10330593 +n02748183 +n12562141 +n09745933 +n02505485 +n11922661 +n12018014 +n09866922 +n04067143 +n13161254 +n07813495 +n01374846 +n10213429 +n03253071 +n02546028 +n01642097 +n01475232 +n03212247 +n10155600 +n11689957 +n11738997 +n10525878 +n03301389 +n10589666 +n01908958 +n10289766 +n03900028 +n03437295 +n02987823 +n02739123 +n10505347 +n02546627 +n10381804 +n10132502 +n10336904 +n10189597 +n09786115 +n12875697 +n10761519 +n01470733 +n02875626 +n12111238 +n07862770 +n07856895 +n09996039 +n03368048 +n07913300 +n10062996 +n10555430 +n04302863 +n12758555 +n10740732 +n02385898 +n02385098 +n12162758 +n03887899 +n03976268 +n04234160 +n03641947 +n07857076 +n10578656 +n12135729 +n12675515 +n09032191 +n12969670 +n02600503 +n12518013 +n10227166 +n10121026 +n01801672 +n10661216 +n03244388 +n04147291 +n09664556 +n02539573 +n04480141 +n10601362 +n02613572 +n10537906 +n02613820 +n11656771 +n03841011 +n02845985 +n12534208 +n10241024 +n03645290 +n12743976 +n11922839 +n07709701 +n03066232 +n03467380 +n09266604 +n09663786 +n12775070 +n02427183 +n04083113 +n12896615 +n10501453 +n02345774 +n09965515 +n09704157 +n10666752 +n03846970 +n04167661 +n03991321 +n09556121 +n10686517 +n02586238 +n03594277 +n03591313 +n10391416 +n10756837 +n13163649 +n03971960 +n10245341 +n02577041 +n04481642 +n12373739 +n10214062 +n10091997 +n10275848 +n02090253 +n03514340 +n04593629 +n11795216 +n03126927 +n11871748 +n10272782 +n12056099 +n04484024 +n03101375 +n12255225 +n10724372 +n10531838 +n02354781 +n02389865 +n02853336 +n01477080 +n01779939 +n10776052 +n10724132 +n10284871 +n10554141 +n03898787 +n02366301 +n10721612 +n04421740 +n04256758 +n01445593 +n10103921 +n02729222 +n02530188 +n02387452 +n02601921 +n01711160 +n02474110 +n09869447 +n12789977 +n10158506 +n10396908 +n07839593 +n02662825 +n02473720 +n13034788 +n07752602 +n03762238 +n10262880 +n07770180 +n04030054 +n10151367 +n03525252 +n10252075 +n10747424 +n10191388 +n04130566 +n03951068 +n13239921 +n03733547 +n10358124 +n11549779 +n09203827 +n04043168 +n10359422 +n04286960 +n04237287 +n10130686 +n02338449 +n12912274 +n10586998 +n02812785 +n10364502 +n03955941 +n12324222 +n09743601 +n03766600 +n01427399 +n12968309 +n11776234 +n01501777 +n10051026 +n10397001 +n01516212 +n02596252 +n02225081 +n10479328 +n02109687 +n10181445 +n02248062 +n03802973 +n01639187 +n02142734 +n02342534 +n02410141 +n02743426 +n03950359 +n12253835 +n07805478 +n03706415 +n03578981 +n04560619 +n09761753 +n03524425 +n01962788 +n04350235 +n10686694 +n13139321 +n10195155 +n12335937 +n12758399 +n03805374 +n12895298 +n03800371 +n11972959 +n11530008 +n03178538 +n02217839 +n10591072 +n04033557 +n01880813 +n12292877 +n02430643 +n07599383 +n01954516 +n09894909 +n02474605 +n03576443 +n07595051 +n03367875 +n12945549 +n02360480 +n14583400 +n04208582 +n02405577 +n02550655 +n02513355 +n04381450 +n00444490 +n03567912 +n09937688 +n07932323 +n04029416 +n01913346 +n13237508 +n04437276 +n12938445 +n03042384 +n12543639 +n03194992 +n04094250 +n12045514 +n03825913 +n03504293 +n12758250 +n03547861 +n03649288 +n04572235 +n07569423 +n03534695 +n03253714 +n01501641 +n13906767 +n12578255 +n11749603 +n07742513 +n07609083 +n04214413 +n07595751 +n12013701 +n12592839 +n12949160 +n04093223 +n02983072 +n03510072 +n02966068 +n03867854 +n01747285 +n10691318 +n13091982 +n12574470 +n02255023 +n03449217 +n03153585 +n04006227 +n13140049 +n02965024 +n03805503 +n03911406 +n13120958 +n12203699 +n01456454 +n10397142 +n12920043 +n02412977 +n08674344 +n07801007 +n03037590 +n10361296 +n13133316 +n03483637 +n04435759 +n12983873 +n02627037 +n03783304 +n07725158 +n02921292 +n01788864 +n01705010 +n12616996 +n03903290 +n08662427 +n03667060 +n07856992 +n03252422 +n02449699 +n12137954 +n10024025 +n07891095 +n04337157 +n04368109 +n03015631 +n02363996 +n12824289 +n03206602 +n12799269 +n02333733 +n01793565 +n01721898 +n03178173 +n02844056 +n11688378 +n13889066 +n02637475 +n03750437 +n01403457 +n01717229 +n02677136 +n12512294 +n03736269 +n02838577 +n08661878 +n01993830 +n02777638 +n02900857 +n04023021 +n03843092 +n07770439 +n12928491 +n03697812 +n02639922 +n13139482 +n07771082 +n12487058 +n07774182 +n02122810 +n02856362 +n11686195 +n11687432 +n02853870 +n04239218 +n02665250 +n02938218 +n11746600 +n10183347 +n10681194 +n04164199 +n04407257 +n12549005 +n02331842 +n03862379 +n02863638 +n11962994 +n03091907 +n04177654 +n02252972 +n02403153 +n01376437 +n02848806 +n08579266 +n07616265 +n10331258 +n10765587 +n09433312 +n03412387 +n10178077 +n13123841 +n02532918 +n04144651 +n03296963 +n03450881 +n04348988 +n10425946 +n03257065 +n02354320 +n11689197 +n04084682 +n10140783 +n03637027 +n02346170 +n02559144 +n01705591 +n09400584 +n03840327 +n03918074 +n04053767 +n02406046 +n00288190 +n03160001 +n03366464 +n09249155 +n01324305 +n07556872 +n03381565 +n12705220 +n11874878 +n02632494 +n02502006 +n03146560 +n02179340 +n04312756 +n10162016 +n03800563 +n04140853 +n07933652 +n03075248 +n04421582 +n10652703 +n02218134 +n12233249 +n04578559 +n01781071 +n02615298 +n04436832 +n04054566 +n02608284 +n11674019 +n03505764 +n02662397 +n09422190 +n04382537 +n04355684 +n04383923 +n09888635 +n03783575 +n03228796 +n07772026 +n02381119 +n15060326 +n10586166 +n12647787 +n02458517 +n10281546 +n03498866 +n02485988 +n10121246 +n09391644 +n03103904 +n08676253 +n02203978 +n04092168 +n03213014 +n03138217 +n04135933 +n12612811 +n04478066 +n02157285 +n02543255 +n03863783 +n01502101 +n03930229 +n12439830 +n09425019 +n02618513 +n02910241 +n12261359 +n03648667 +n04365229 +n03461651 +n04388040 +n03295928 +n03581531 +n04203356 +n02622249 +n13142907 +n04497249 +n11678377 +n02366579 +n02931013 +n02837134 +n03132438 +n13092987 +n04196803 +n03056215 +n03255322 +n02130925 +n10291469 +n02971940 +n01718096 +n12510774 +n11766432 +n04271891 +n03366721 +n03154616 +n03694356 +n10478293 +n11763142 +n07763483 +n03037228 +n09201998 +n01517389 +n00443517 +n12693244 +n03580990 +n03519848 +n10238375 +n10783646 +n03564849 +n03975926 +n02473554 +n02450426 +n03464952 +n04411835 +n04573045 +n10505732 +n04337650 +n10621514 +n10334782 +n12434985 +n07769102 +n10594523 +n05475397 +n01875610 +n03299406 +n10507692 +n02593679 +n03317233 +n13239736 +n03550420 +n03247351 +n03819047 +n03633341 +n03154745 +n04073425 +n04532022 +n02910964 +n04301242 +n04378651 +n13098515 +n11775626 +n14603798 +n10263146 +n01886045 +n03761731 +n02224713 +n04591249 +n02144251 +n03849412 +n11548728 +n04051705 +n12298165 +n03150795 +n03989447 +n02826459 +n07602650 +n03155915 +n09891730 +n02067603 +n01523105 +n03618339 +n03897130 +n02711780 +n05285623 +n03533486 +n04085873 +n01923404 +n10139077 +n01709484 +n02183507 +n03216562 +n01971850 +n03136051 +n02948834 +n03589313 +n03665851 +n02937336 +n02035656 +n07769465 +n07849186 +n12585373 +n12280364 +n02846260 +n02511730 +n02614653 +n04193179 +n11718681 +n09467696 +n01522450 +n03040836 +n03162297 +n11896141 +n04000480 +n10350220 +n07746038 +n02124157 +n10655169 +n03476542 +n03895038 +n00443917 +n07757753 +n01726203 +n02987706 +n12750076 +n03012734 +n02941228 +n04194009 +n04501127 +n09794550 +n03510487 +n08589670 +n03166951 +n03673270 +n09792125 +n08492354 +n02396157 +n01628331 +n03993878 +n07833816 +n04958865 +n13650447 +n04339191 +n02826683 +n02893269 +n02810139 +n02626471 +n02589796 +n08677801 +n04325968 +n03275864 +n02622547 +n04406687 +n04097085 +n02998107 +n07831450 +n03658102 +n02575590 +n03523398 +n02412909 +n02953850 +n04337503 +n03510987 +n12664005 +n03710294 +n13138155 +n10110093 +n07831955 +n03932080 +n12971804 +n03943623 +n03726371 +n10531445 +n12984489 +n07835051 +n12097556 +n02685701 +n03038041 +n02451125 +n04594919 +n02372140 +n02665985 +n03496183 +n03961828 +n03802800 +n01713170 +n03602790 +n04974145 +n02780588 +n04031884 +n03588216 +n02614140 +n04578708 +n04501281 +n03166600 +n03992975 +n04206070 +n03227721 +n02582349 +n02664642 +n07805389 +n09226869 +n02459190 +n12216968 +n03628984 +n02524928 +n09209025 +n04078002 +n03167153 +n03562565 +n07599554 +n10252547 +n03279804 +n07692887 +n14909584 +n02529293 +n04444953 +n04156814 +n07616174 +n03415626 +n03331244 +n03868324 +n03644073 +n02818687 +n10085101 +n02953056 +n03202481 +n02118707 +n03591901 +n12602434 +n02943465 +n02818254 +n07922607 +n02597004 +n04212810 +n04056073 +n12327528 +n02207647 +n01792808 +n03002555 +n03951213 +n12242123 +n10062275 +n12325787 +n10048117 +n11937278 +n03624767 +n04039041 +n04059298 +n03707171 +n07758407 +n01333483 +n02219015 +n02436645 +n02478239 +n04457638 +n01781698 +n09474765 +n03686363 +n10769084 +n09456207 +n02385776 +n13555775 +n03962685 +n13129078 +n03463185 +n01429172 +n04243251 +n12177129 +n03143754 +n03958338 +n02791795 +n04560502 +n12776774 +n02745816 +n03009111 +n02976552 +n03008817 +n03211413 +n03537550 +n12200504 +n01909788 +n11790089 +n03480973 +n10507070 +n01707294 +n04374907 +n04281571 +n00006024 +n03823906 +n12603273 +n03503358 +n04027820 +n12645530 +n02535080 +n04143365 +n08385989 +n12661227 +n12814857 +n11871059 +n04268418 +n13128582 +n01928865 +n04359124 +n12670334 +n03610836 +n04543924 +n02252799 +n15102359 +n04437380 +n04316924 +n11872324 +n09330378 +n10122300 +n03784139 +n00443375 +n14993378 +n01721174 +n00004475 +n00006484 +n00007846 +n00015388 +n00017222 +n00021265 +n00021939 +n00288000 +n00433458 +n00433661 +n00433802 +n00439826 +n00440218 +n00440509 +n00440747 +n00441824 +n00442115 +n00442981 +n00443231 +n00444651 +n00445351 +n00445802 +n00447073 +n00447221 +n00447540 +n00448466 +n00448640 +n00448958 +n00449295 +n00449695 +n00450335 +n00450700 +n00451370 +n00451866 +n00452293 +n00453935 +n00454237 +n00454624 +n00463246 +n00464651 +n00464894 +n00467719 +n00467995 +n00468480 +n00469651 +n00471437 +n00471613 +n00479076 +n00480508 +n00480993 +n00482298 +n00523513 +n01035667 +n01316422 +n01316579 +n01316949 +n01317089 +n01317391 +n01317541 +n01319467 +n01320872 +n01321230 +n01321579 +n01321854 +n01322343 +n01322685 +n01322898 +n01323068 +n01326291 +n01329186 +n01338685 +n01339336 +n01340935 +n01342269 +n01358572 +n01367772 +n01375204 +n01376237 +n01380610 +n01384687 +n01385330 +n01387065 +n01389507 +n01390123 +n01392380 +n01395254 +n01397114 +n01402600 +n01407798 +n01421807 +n01438208 +n01439121 +n01439514 +n01439808 +n01441425 +n01444783 +n01445429 +n01446589 +n01446760 +n01448951 +n01450661 +n01454856 +n01455778 +n01458842 +n01459791 +n01461646 +n01466257 +n01467336 +n01468238 +n01468712 +n01471682 +n01473806 +n01474283 +n01477525 +n01478511 +n01480516 +n01480880 +n01481331 +n01482071 +n01482330 +n01483522 +n01484097 +n01488918 +n01491874 +n01492357 +n01493541 +n01494757 +n01494882 +n01495006 +n01495701 +n01497118 +n01498406 +n01498699 +n01498989 +n01500091 +n01501160 +n01503061 +n01514752 +n01515078 +n01517565 +n01524359 +n01525720 +n01527194 +n01527617 +n01528654 +n01529672 +n01533339 +n01534582 +n01534762 +n01537134 +n01538955 +n01539573 +n01540233 +n01541922 +n01542786 +n01544208 +n01546921 +n01547832 +n01548301 +n01549430 +n01550761 +n01553142 +n01555809 +n01557185 +n01560105 +n01560636 +n01563128 +n01563746 +n01564394 +n01567133 +n01568132 +n01569836 +n01570676 +n01571904 +n01572328 +n01573074 +n01574045 +n01574390 +n01575745 +n01576695 +n01577659 +n01578575 +n01579028 +n01580379 +n01580490 +n01580772 +n01580870 +n01581166 +n01581434 +n01581730 +n01582398 +n01582498 +n01582856 +n01584225 +n01585121 +n01587834 +n01588431 +n01589286 +n01591697 +n01592257 +n01592540 +n01594372 +n01595624 +n01597336 +n01598588 +n01598988 +n01600085 +n01600657 +n01602080 +n01602209 +n01602630 +n01603600 +n01604330 +n01605630 +n01608814 +n01609062 +n01609391 +n01609751 +n01610955 +n01611472 +n01612628 +n01613294 +n01613615 +n01615121 +n01616551 +n01616764 +n01617095 +n01617443 +n01617766 +n01618082 +n01618922 +n01619310 +n01619536 +n01619835 +n01620135 +n01620414 +n01620735 +n01621127 +n01622352 +n01623706 +n01627424 +n01629276 +n01630284 +n01631175 +n01632047 +n01637112 +n01637932 +n01639765 +n01640846 +n01645776 +n01649170 +n01650167 +n01651487 +n01653773 +n01661091 +n01661592 +n01661818 +n01662622 +n01662784 +n01663401 +n01664369 +n01665932 +n01667432 +n01668091 +n01669372 +n01670092 +n01672032 +n01674216 +n01674464 +n01674990 +n01676755 +n01680264 +n01680478 +n01681940 +n01684133 +n01685439 +n01686044 +n01686220 +n01686403 +n01686609 +n01686808 +n01687665 +n01688961 +n01689411 +n01691951 +n01692864 +n01693783 +n01694709 +n01696633 +n01697178 +n01698434 +n01699040 +n01701551 +n01702256 +n01703011 +n01703569 +n01705934 +n01708106 +n01708998 +n01712008 +n01712752 +n01717016 +n01719403 +n01722998 +n01724231 +n01726692 +n01727646 +n01730185 +n01730307 +n01730812 +n01730960 +n01731545 +n01732244 +n01733757 +n01734637 +n01734808 +n01735439 +n01735577 +n01735728 +n01737472 +n01737728 +n01737875 +n01738065 +n01738601 +n01739647 +n01740551 +n01741232 +n01741562 +n01741943 +n01743605 +n01745125 +n01745484 +n01746359 +n01747885 +n01749582 +n01749742 +n01751036 +n01752165 +n01753959 +n01754876 +n01755740 +n01767661 +n01769347 +n01770795 +n01771417 +n01772222 +n01775370 +n01776192 +n01776705 +n01777304 +n01777467 +n01777649 +n01777909 +n01778217 +n01778487 +n01778621 +n01778801 +n01779148 +n01779463 +n01779629 +n01780696 +n01782209 +n01785667 +n01789386 +n01789740 +n01791107 +n01791625 +n01792158 +n01792640 +n01794158 +n01795088 +n01795735 +n01795900 +n01796019 +n01796105 +n01796519 +n01796729 +n01798706 +n01798839 +n01798979 +n01799302 +n01800424 +n01801088 +n01801479 +n01802721 +n01803078 +n01804478 +n01804653 +n01804921 +n01805070 +n01805801 +n01806847 +n01807828 +n01808140 +n01808291 +n01808596 +n01809106 +n01810700 +n01811909 +n01812337 +n01813385 +n01814370 +n01814921 +n01815601 +n01816887 +n01819115 +n01820348 +n01820801 +n01821076 +n01821203 +n01822602 +n01823013 +n01824749 +n01825930 +n01826364 +n01827403 +n01829869 +n01831712 +n01832167 +n01834177 +n01834540 +n01835276 +n01838038 +n01838598 +n01839598 +n01841102 +n01843719 +n01844231 +n01844551 +n01844917 +n01845132 +n01845477 +n01846331 +n01848123 +n01848648 +n01849466 +n01850373 +n01851375 +n01852142 +n01852861 +n01853498 +n01854415 +n01856072 +n01856155 +n01856380 +n01856553 +n01856890 +n01857079 +n01857325 +n01857512 +n01857632 +n01857851 +n01858441 +n01859496 +n01860497 +n01861148 +n01861778 +n01871543 +n01871875 +n01874434 +n01874928 +n01876326 +n01877134 +n01878061 +n01878335 +n01878639 +n01878929 +n01879217 +n01879509 +n01880152 +n01881171 +n01883513 +n01883920 +n01886756 +n01887896 +n01888264 +n01889074 +n01889520 +n01890860 +n01891633 +n01892551 +n01894207 +n01905661 +n01906749 +n01907738 +n01909422 +n01911403 +n01911839 +n01912454 +n01914163 +n01914830 +n01915811 +n01916187 +n01916925 +n01918744 +n01922303 +n01925270 +n01925695 +n01926379 +n01926689 +n01927159 +n01927456 +n01927928 +n01928215 +n01930852 +n01931140 +n01931520 +n01931714 +n01932151 +n01932936 +n01933151 +n01933478 +n01933988 +n01934440 +n01935176 +n01936391 +n01937909 +n01940736 +n01941223 +n01942177 +n01942869 +n01943541 +n01944118 +n01944812 +n01944955 +n01945143 +n01945340 +n01945845 +n01946277 +n01948573 +n01951613 +n01953361 +n01955933 +n01956481 +n01958038 +n01959985 +n01960459 +n01963571 +n01964049 +n01964441 +n01965889 +n01967094 +n01968315 +n01969726 +n01971094 +n01971280 +n01974773 +n01975687 +n01976146 +n01976957 +n01978930 +n01981702 +n01982650 +n01983048 +n01985493 +n01985797 +n01986806 +n01987545 +n01988701 +n01989869 +n01990007 +n01991028 +n01991520 +n01992262 +n01992423 +n01992773 +n01996585 +n01998183 +n02000954 +n02002075 +n02005790 +n02006985 +n02007284 +n02008041 +n02008796 +n02010453 +n02011805 +n02011943 +n02012185 +n02013177 +n02014941 +n02015554 +n02016358 +n02016956 +n02018027 +n02019190 +n02019438 +n02019929 +n02021050 +n02021795 +n02022684 +n02023341 +n02025043 +n02026059 +n02028175 +n02030035 +n02030287 +n02030996 +n02031934 +n02033208 +n02033324 +n02033561 +n02034129 +n02034661 +n02036053 +n02037464 +n02039171 +n02040505 +n02041085 +n02041246 +n02043063 +n02044178 +n02044778 +n02045369 +n02046759 +n02047260 +n02047614 +n02048698 +n02049532 +n02050004 +n02051474 +n02052204 +n02052365 +n02053083 +n02054502 +n02055658 +n02055803 +n02057731 +n02058594 +n02058747 +n02059162 +n02060411 +n02060889 +n02062017 +n02062430 +n02062744 +n02063224 +n02064338 +n02066707 +n02068206 +n02068974 +n02069701 +n02070430 +n02073250 +n02075296 +n02075927 +n02076196 +n02076779 +n02077152 +n02077658 +n02078292 +n02078574 +n02078738 +n02079005 +n02079389 +n02081571 +n02083346 +n02083672 +n02084071 +n02084861 +n02085374 +n02086346 +n02086478 +n02087122 +n02087551 +n02088839 +n02089232 +n02089555 +n02090475 +n02090827 +n02092468 +n02093056 +n02094562 +n02094721 +n02095050 +n02095412 +n02095727 +n02096756 +n02097786 +n02098550 +n02099029 +n02099997 +n02100399 +n02101108 +n02101861 +n02102605 +n02103406 +n02103841 +n02104523 +n02104882 +n02106966 +n02107420 +n02108254 +n02108672 +n02109150 +n02109811 +n02110532 +n02111626 +n02112497 +n02112826 +n02113335 +n02114100 +n02115012 +n02115335 +n02117512 +n02117646 +n02117900 +n02118176 +n02118333 +n02119247 +n02119359 +n02120278 +n02120997 +n02121620 +n02121808 +n02122725 +n02123785 +n02124623 +n02127292 +n02127381 +n02127482 +n02127586 +n02127678 +n02127808 +n02128598 +n02128669 +n02129463 +n02129530 +n02129837 +n02129923 +n02130545 +n02131653 +n02132466 +n02132580 +n02132788 +n02133400 +n02134971 +n02135220 +n02137722 +n02137888 +n02138647 +n02138777 +n02139199 +n02139671 +n02141306 +n02141713 +n02144593 +n02145424 +n02148835 +n02149420 +n02150482 +n02152740 +n02152991 +n02153203 +n02153809 +n02156732 +n02159955 +n02164464 +n02165877 +n02166229 +n02166567 +n02166826 +n02167505 +n02167820 +n02167944 +n02168245 +n02169023 +n02169218 +n02169705 +n02169974 +n02170400 +n02170993 +n02171453 +n02171869 +n02172518 +n02172678 +n02172761 +n02172870 +n02174355 +n02176261 +n02178411 +n02178717 +n02179012 +n02180233 +n02181235 +n02181724 +n02182045 +n02182355 +n02182642 +n02182930 +n02183857 +n02186153 +n02188699 +n02189363 +n02190790 +n02191273 +n02191773 +n02191979 +n02192252 +n02192513 +n02192814 +n02193009 +n02193163 +n02194249 +n02194750 +n02195091 +n02195526 +n02195819 +n02196344 +n02198532 +n02199502 +n02200198 +n02202287 +n02204722 +n02206270 +n02207179 +n02207449 +n02208280 +n02208498 +n02208848 +n02208979 +n02209111 +n02209354 +n02209624 +n02209964 +n02210427 +n02210921 +n02211444 +n02211627 +n02211896 +n02212062 +n02212602 +n02212958 +n02213107 +n02215161 +n02215770 +n02217563 +n02218713 +n02220055 +n02220225 +n02220518 +n02220804 +n02221083 +n02221414 +n02221571 +n02221715 +n02221820 +n02222035 +n02222321 +n02222582 +n02223266 +n02223520 +n02226183 +n02226821 +n02226970 +n02227247 +n02227604 +n02227966 +n02228341 +n02228697 +n02229156 +n02229765 +n02230023 +n02230187 +n02230480 +n02230634 +n02231052 +n02231803 +n02233943 +n02234355 +n02234570 +n02234848 +n02235205 +n02236241 +n02236896 +n02237424 +n02237581 +n02238235 +n02238887 +n02239528 +n02241569 +n02241799 +n02243562 +n02244173 +n02246011 +n02246628 +n02247511 +n02248368 +n02248510 +n02248887 +n02249515 +n02250822 +n02251775 +n02252226 +n02253127 +n02253715 +n02254697 +n02257003 +n02257284 +n02257715 +n02259377 +n02260421 +n02260863 +n02261419 +n02262178 +n02263378 +n02264885 +n02265330 +n02268148 +n02269196 +n02269522 +n02270011 +n02270200 +n02271570 +n02271897 +n02272871 +n02274024 +n02274259 +n02274822 +n02278210 +n02278839 +n02279637 +n02280458 +n02281015 +n02281136 +n02281267 +n02282257 +n02282385 +n02282553 +n02282903 +n02283077 +n02283201 +n02283951 +n02285548 +n02287004 +n02287799 +n02288789 +n02291220 +n02291572 +n02291748 +n02293352 +n02293868 +n02295064 +n02295390 +n02295870 +n02298541 +n02300173 +n02301452 +n02302459 +n02302620 +n02305407 +n02306433 +n02307325 +n02308139 +n02308471 +n02309337 +n02310000 +n02310717 +n02311060 +n02312006 +n02312427 +n02313008 +n02316707 +n02318167 +n02319308 +n02319555 +n02319829 +n02320127 +n02322047 +n02323449 +n02323902 +n02324045 +n02325722 +n02325884 +n02326074 +n02326763 +n02326862 +n02327028 +n02327175 +n02327435 +n02327656 +n02327842 +n02328429 +n02329401 +n02330245 +n02331046 +n02331309 +n02332755 +n02333546 +n02334460 +n02335127 +n02336011 +n02336641 +n02338901 +n02339376 +n02339922 +n02343058 +n02343320 +n02343772 +n02344528 +n02345600 +n02346998 +n02347274 +n02347573 +n02347744 +n02348173 +n02348788 +n02350105 +n02350989 +n02351870 +n02352591 +n02353861 +n02355227 +n02355477 +n02358091 +n02359324 +n02360781 +n02361587 +n02361706 +n02361850 +n02363245 +n02363351 +n02364520 +n02369680 +n02370806 +n02372584 +n02373336 +n02374149 +n02374451 +n02376542 +n02376791 +n02376918 +n02377181 +n02377480 +n02377703 +n02378415 +n02380335 +n02380583 +n02380745 +n02381460 +n02382437 +n02382948 +n02384858 +n02386014 +n02386310 +n02386496 +n02388276 +n02389346 +n02389559 +n02390454 +n02390834 +n02391234 +n02391373 +n02391508 +n02391994 +n02393580 +n02394477 +n02395003 +n02395694 +n02395931 +n02397529 +n02399000 +n02401031 +n02402010 +n02402175 +n02402425 +n02403325 +n02403454 +n02404186 +n02404573 +n02406174 +n02407959 +n02408660 +n02408817 +n02409870 +n02410702 +n02410900 +n02411705 +n02412440 +n02413131 +n02414578 +n02415435 +n02416519 +n02417070 +n02417534 +n02418064 +n02419796 +n02423218 +n02423362 +n02423589 +n02424305 +n02424695 +n02426813 +n02427724 +n02428349 +n02430045 +n02430559 +n02431122 +n02432511 +n02433546 +n02433925 +n02435853 +n02437136 +n02437971 +n02438173 +n02438272 +n02439033 +n02441326 +n02442172 +n02442336 +n02442446 +n02442572 +n02442668 +n02443015 +n02443346 +n02443808 +n02443959 +n02444251 +n02445004 +n02445171 +n02446206 +n02446352 +n02446645 +n02447021 +n02447762 +n02448060 +n02448633 +n02448885 +n02450034 +n02453108 +n02453611 +n02454794 +n02455135 +n02455428 +n02455720 +n02456008 +n02456275 +n02456962 +n02460009 +n02469914 +n02470325 +n02470899 +n02471300 +n02471762 +n02472293 +n02473307 +n02474777 +n02476219 +n02480153 +n02481103 +n02481235 +n02481366 +n02481500 +n02482060 +n02482286 +n02482474 +n02482650 +n02483092 +n02484322 +n02484473 +n02485225 +n02485371 +n02485536 +n02485688 +n02486657 +n02486908 +n02487079 +n02487547 +n02487675 +n02487847 +n02488003 +n02488415 +n02488894 +n02489589 +n02490597 +n02490811 +n02491107 +n02491329 +n02491474 +n02496913 +n02501583 +n02502514 +n02503127 +n02503517 +n02504770 +n02507649 +n02508021 +n02512053 +n02512938 +n02513560 +n02515214 +n02516188 +n02517442 +n02517938 +n02519148 +n02519686 +n02521646 +n02522399 +n02524524 +n02526425 +n02526818 +n02527057 +n02527271 +n02527622 +n02528163 +n02529772 +n02530421 +n02532028 +n02532602 +n02533209 +n02533834 +n02534559 +n02534734 +n02535537 +n02537085 +n02537319 +n02538406 +n02538985 +n02540412 +n02541687 +n02546331 +n02548689 +n02549989 +n02550460 +n02552171 +n02554730 +n02556846 +n02557591 +n02557749 +n02559862 +n02561108 +n02561661 +n02562315 +n02562796 +n02563182 +n02564720 +n02565573 +n02566109 +n02568959 +n02569484 +n02570838 +n02572196 +n02574910 +n02576223 +n02576575 +n02578233 +n02579557 +n02580336 +n02581957 +n02583567 +n02585872 +n02586543 +n02588286 +n02590495 +n02590702 +n02590987 +n02594250 +n02596381 +n02597367 +n02599052 +n02599958 +n02600298 +n02601344 +n02602405 +n02603317 +n02604157 +n02605316 +n02606384 +n02607201 +n02607862 +n02613181 +n02614482 +n02614978 +n02619165 +n02621908 +n02623445 +n02624167 +n02625612 +n02626762 +n02627835 +n02630281 +n02630739 +n02631041 +n02636170 +n02636854 +n02638596 +n02640626 +n02640857 +n02642107 +n02642644 +n02643112 +n02644113 +n02646667 +n02648625 +n02650050 +n02650541 +n02652668 +n02653145 +n02653786 +n02654425 +n02655523 +n02656670 +n02657368 +n02658079 +n02661017 +n02662239 +n02663849 +n02667379 +n02667576 +n02668393 +n02670382 +n02671780 +n02672371 +n02676261 +n02676670 +n02677028 +n02677718 +n02678384 +n02680110 +n02680754 +n02682407 +n02682922 +n02683791 +n02686121 +n02686568 +n02687992 +n02688443 +n02689274 +n02691156 +n02692513 +n02693413 +n02693540 +n02694426 +n02694966 +n02695627 +n02697576 +n02698244 +n02700258 +n02700895 +n02702989 +n02703275 +n02705944 +n02708224 +n02708555 +n02709367 +n02709637 +n02710600 +n02712643 +n02713218 +n02715229 +n02715513 +n02715712 +n02716626 +n02726305 +n02726681 +n02727016 +n02727825 +n02728440 +n02729837 +n02729965 +n02730265 +n02732072 +n02732827 +n02733213 +n02733524 +n02735361 +n02735688 +n02736798 +n02737660 +n02738031 +n02738271 +n02738535 +n02739550 +n02739668 +n02740533 +n02740764 +n02741475 +n02742322 +n02742753 +n02745492 +n02746365 +n02749790 +n02750169 +n02751067 +n02751295 +n02752496 +n02753044 +n02753394 +n02754103 +n02755352 +n02755529 +n02756098 +n02756977 +n02757462 +n02757810 +n02758134 +n02758960 +n02759700 +n02759963 +n02760099 +n02760199 +n02760429 +n02760855 +n02761392 +n02763198 +n02763714 +n02764044 +n02764614 +n02764779 +n02765028 +n02766320 +n02766534 +n02766792 +n02767433 +n02769075 +n02770830 +n02772554 +n02772700 +n02773037 +n02773838 +n02774152 +n02774630 +n02775483 +n02776205 +n02777100 +n02777734 +n02777927 +n02778456 +n02778669 +n02781121 +n02781338 +n02781517 +n02783035 +n02783324 +n02784998 +n02785648 +n02786198 +n02786463 +n02788689 +n02789487 +n02790823 +n02792552 +n02792948 +n02793842 +n02794008 +n02794779 +n02794972 +n02795783 +n02796207 +n02796623 +n02796995 +n02797692 +n02797881 +n02799897 +n02801184 +n02801525 +n02801938 +n02802721 +n02803349 +n02803666 +n02804252 +n02806088 +n02806379 +n02806875 +n02810471 +n02811468 +n02811719 +n02812201 +n02813252 +n02813399 +n02815478 +n02815950 +n02816494 +n02817031 +n02817650 +n02817799 +n02818832 +n02819697 +n02820210 +n02821627 +n02821943 +n02822220 +n02822399 +n02822865 +n02823335 +n02824448 +n02826589 +n02826886 +n02827606 +n02828299 +n02828884 +n02831335 +n02831724 +n02831894 +n02833793 +n02834778 +n02835412 +n02836268 +n02839351 +n02839910 +n02840619 +n02841063 +n02841506 +n02842133 +n02843029 +n02843777 +n02844214 +n02844307 +n02844714 +n02847631 +n02848216 +n02848523 +n02849154 +n02850950 +n02851099 +n02853016 +n02854532 +n02854926 +n02855089 +n02855390 +n02855793 +n02857365 +n02857477 +n02857644 +n02858304 +n02860415 +n02861886 +n02862048 +n02862916 +n02863750 +n02865665 +n02865931 +n02866578 +n02867715 +n02869737 +n02871631 +n02871824 +n02871963 +n02872752 +n02873839 +n02874086 +n02875436 +n02876326 +n02876457 +n02876657 +n02877962 +n02879517 +n02880189 +n02880546 +n02880940 +n02881193 +n02881906 +n02882483 +n02882647 +n02883004 +n02883344 +n02884225 +n02885108 +n02885338 +n02886599 +n02887209 +n02887970 +n02888569 +n02889425 +n02891188 +n02891788 +n02892499 +n02893418 +n02896294 +n02896442 +n02897389 +n02897820 +n02898173 +n02898369 +n02898585 +n02898711 +n02900705 +n02901481 +n02901901 +n02902079 +n02902916 +n02903006 +n02904109 +n02904640 +n02908217 +n02909285 +n02911485 +n02912065 +n02913152 +n02914991 +n02916179 +n02916350 +n02917377 +n02917607 +n02919414 +n02920503 +n02921884 +n02923129 +n02924116 +n02925519 +n02928413 +n02928608 +n02929289 +n02929462 +n02929923 +n02931417 +n02931836 +n02932019 +n02932400 +n02933112 +n02933462 +n02933750 +n02933990 +n02934168 +n02935658 +n02935891 +n02936176 +n02936281 +n02936714 +n02938886 +n02939866 +n02941095 +n02942699 +n02943241 +n02943871 +n02944826 +n02945161 +n02946270 +n02946348 +n02946921 +n02947212 +n02947818 +n02948557 +n02949202 +n02950186 +n02950256 +n02950632 +n02950943 +n02951843 +n02952485 +n02952674 +n02953673 +n02954163 +n02954340 +n02954938 +n02955065 +n02955247 +n02955540 +n02955767 +n02957135 +n02957755 +n02958343 +n02959942 +n02961451 +n02961947 +n02963302 +n02963692 +n02963821 +n02965216 +n02965300 +n02965529 +n02966545 +n02966786 +n02966942 +n02967081 +n02967991 +n02968473 +n02969010 +n02969163 +n02969634 +n02969886 +n02970685 +n02970849 +n02971691 +n02972397 +n02973017 +n02974697 +n02975212 +n02976939 +n02978205 +n02978753 +n02979516 +n02982599 +n02983189 +n02983904 +n02984061 +n02984203 +n02984469 +n02984699 +n02985137 +n02985828 +n02986066 +n02987047 +n02987492 +n02989099 +n02991048 +n02991302 +n02992032 +n02993546 +n02995998 +n02997391 +n02997607 +n03001282 +n03001627 +n03002210 +n03003091 +n03004620 +n03005515 +n03007130 +n03007591 +n03010656 +n03010795 +n03011018 +n03011355 +n03012159 +n03013006 +n03014440 +n03015254 +n03017070 +n03018209 +n03020034 +n03020416 +n03020692 +n03024333 +n03025070 +n03025886 +n03027108 +n03027250 +n03029066 +n03031422 +n03032811 +n03033362 +n03033986 +n03034516 +n03034663 +n03035510 +n03036469 +n03036866 +n03037108 +n03037709 +n03038685 +n03039015 +n03039947 +n03040229 +n03040376 +n03043274 +n03043958 +n03045337 +n03046257 +n03048883 +n03049066 +n03049457 +n03050026 +n03050546 +n03050655 +n03050864 +n03051396 +n03051540 +n03052464 +n03052917 +n03053047 +n03054901 +n03055670 +n03056097 +n03056493 +n03057021 +n03057636 +n03058107 +n03058603 +n03058949 +n03059366 +n03061050 +n03063073 +n03063338 +n03064350 +n03064758 +n03065708 +n03066849 +n03070193 +n03071021 +n03071160 +n03072201 +n03073296 +n03073977 +n03074380 +n03074855 +n03075097 +n03075500 +n03075634 +n03076411 +n03076708 +n03078287 +n03078670 +n03079230 +n03079741 +n03080497 +n03080731 +n03081986 +n03082127 +n03082807 +n03082979 +n03084420 +n03085333 +n03085602 +n03085915 +n03086183 +n03086457 +n03086670 +n03087366 +n03087643 +n03087816 +n03088707 +n03091044 +n03091374 +n03092166 +n03092314 +n03093792 +n03094503 +n03096439 +n03096960 +n03098140 +n03098806 +n03099454 +n03099771 +n03099945 +n03100346 +n03100490 +n03101156 +n03101986 +n03102654 +n03102859 +n03106722 +n03106898 +n03107046 +n03109881 +n03111690 +n03112869 +n03113152 +n03113657 +n03113835 +n03114839 +n03115180 +n03116530 +n03116767 +n03117199 +n03118346 +n03118969 +n03119510 +n03120198 +n03120491 +n03121897 +n03122748 +n03123809 +n03125870 +n03128085 +n03128427 +n03128519 +n03129001 +n03130066 +n03130563 +n03131669 +n03132261 +n03134853 +n03135917 +n03136369 +n03137579 +n03139464 +n03140900 +n03141065 +n03141327 +n03143572 +n03145384 +n03145843 +n03146846 +n03147509 +n03148324 +n03148727 +n03149401 +n03151077 +n03153948 +n03154073 +n03154446 +n03155178 +n03156071 +n03156405 +n03157348 +n03158796 +n03158885 +n03161450 +n03162818 +n03163798 +n03163973 +n03164605 +n03164722 +n03164929 +n03165823 +n03167978 +n03168107 +n03168217 +n03170635 +n03171356 +n03172965 +n03173387 +n03175604 +n03176386 +n03177165 +n03177708 +n03178000 +n03178430 +n03180504 +n03180969 +n03181293 +n03182140 +n03182232 +n03182912 +n03183080 +n03186818 +n03187751 +n03189818 +n03193597 +n03196062 +n03196324 +n03196598 +n03199647 +n03199901 +n03200357 +n03200539 +n03200701 +n03200906 +n03201035 +n03201638 +n03201996 +n03202354 +n03202760 +n03203089 +n03203806 +n03204306 +n03204558 +n03204955 +n03205143 +n03205304 +n03206718 +n03206908 +n03207305 +n03208556 +n03210683 +n03211117 +n03211616 +n03212811 +n03214253 +n03214450 +n03215191 +n03219135 +n03220237 +n03221059 +n03221720 +n03222516 +n03223162 +n03223441 +n03224753 +n03224893 +n03225777 +n03226538 +n03228016 +n03228533 +n03228692 +n03229115 +n03229526 +n03231160 +n03231819 +n03235796 +n03235979 +n03236580 +n03236735 +n03237212 +n03237639 +n03239259 +n03239726 +n03240140 +n03241093 +n03241335 +n03241496 +n03242120 +n03242506 +n03242995 +n03243218 +n03245271 +n03245421 +n03246933 +n03250952 +n03251533 +n03251766 +n03252324 +n03252637 +n03254374 +n03255488 +n03255899 +n03256788 +n03256928 +n03257586 +n03258905 +n03259505 +n03261776 +n03262519 +n03262809 +n03262932 +n03265032 +n03266749 +n03267821 +n03269203 +n03269401 +n03270695 +n03271765 +n03271865 +n03272239 +n03272383 +n03273061 +n03273913 +n03274561 +n03274796 +n03276179 +n03277459 +n03277771 +n03278248 +n03279153 +n03279364 +n03279508 +n03280394 +n03280644 +n03281145 +n03282060 +n03282401 +n03284743 +n03284981 +n03285912 +n03286572 +n03287733 +n03288003 +n03289985 +n03291413 +n03292960 +n03294048 +n03294833 +n03296478 +n03297103 +n03297644 +n03297735 +n03298089 +n03302790 +n03303217 +n03303831 +n03304197 +n03304465 +n03305522 +n03307573 +n03308152 +n03309808 +n03314378 +n03314884 +n03315644 +n03316406 +n03318136 +n03319457 +n03320046 +n03322570 +n03322940 +n03323703 +n03324928 +n03325088 +n03326073 +n03327234 +n03327691 +n03327841 +n03329663 +n03330792 +n03334017 +n03334492 +n03334912 +n03335030 +n03335846 +n03336839 +n03337494 +n03338287 +n03338821 +n03339296 +n03339643 +n03340009 +n03340923 +n03342961 +n03343354 +n03343560 +n03343853 +n03346135 +n03346455 +n03349296 +n03350352 +n03350456 +n03350602 +n03351262 +n03351979 +n03352628 +n03354903 +n03355468 +n03356446 +n03357267 +n03357716 +n03359137 +n03359566 +n03360731 +n03361683 +n03362771 +n03363363 +n03364008 +n03364937 +n03365592 +n03365991 +n03366823 +n03373237 +n03374649 +n03374838 +n03375171 +n03376279 +n03378342 +n03379343 +n03379828 +n03379989 +n03380647 +n03380867 +n03381126 +n03381231 +n03381776 +n03382856 +n03382969 +n03383468 +n03384167 +n03384891 +n03385557 +n03386011 +n03387323 +n03387653 +n03390327 +n03391770 +n03393324 +n03394480 +n03394649 +n03396580 +n03396654 +n03397266 +n03397532 +n03398228 +n03399761 +n03399971 +n03402188 +n03402369 +n03404012 +n03404360 +n03404449 +n03405265 +n03405725 +n03407369 +n03409393 +n03409591 +n03410147 +n03411339 +n03412058 +n03412220 +n03412511 +n03412906 +n03413264 +n03413428 +n03413828 +n03414162 +n03415252 +n03416489 +n03416775 +n03417345 +n03418158 +n03418242 +n03419014 +n03422072 +n03422589 +n03423719 +n03424630 +n03427296 +n03428090 +n03428349 +n03429003 +n03429288 +n03429914 +n03430091 +n03430313 +n03430551 +n03430959 +n03431243 +n03431745 +n03433637 +n03433877 +n03434285 +n03434830 +n03435593 +n03437941 +n03438257 +n03439814 +n03441112 +n03442288 +n03442756 +n03446070 +n03446832 +n03448031 +n03448956 +n03449564 +n03449858 +n03450516 +n03452267 +n03452449 +n03453320 +n03454110 +n03454211 +n03454707 +n03455355 +n03456548 +n03456665 +n03457008 +n03457686 +n03458271 +n03459914 +n03461882 +n03465500 +n03465818 +n03466162 +n03466839 +n03467517 +n03467796 +n03467984 +n03468696 +n03469493 +n03470387 +n03470629 +n03470948 +n03472232 +n03472535 +n03472937 +n03473817 +n03473966 +n03475823 +n03476083 +n03476313 +n03477773 +n03477902 +n03478756 +n03478907 +n03481521 +n03482523 +n03483230 +n03483531 +n03484083 +n03484931 +n03487331 +n03487444 +n03487774 +n03488188 +n03488603 +n03489162 +n03490324 +n03490449 +n03490884 +n03491988 +n03496296 +n03496612 +n03497100 +n03497657 +n03498536 +n03499468 +n03500295 +n03501152 +n03501288 +n03501614 +n03502331 +n03502509 +n03502777 +n03503718 +n03503997 +n03505383 +n03505504 +n03506370 +n03507963 +n03508101 +n03509394 +n03509843 +n03510583 +n03510866 +n03511175 +n03512147 +n03512911 +n03513137 +n03513376 +n03515338 +n03517899 +n03517982 +n03518631 +n03519674 +n03521076 +n03521544 +n03522634 +n03524574 +n03524976 +n03525074 +n03525454 +n03525827 +n03528263 +n03529444 +n03531281 +n03531447 +n03531546 +n03532342 +n03534776 +n03535024 +n03536761 +n03537412 +n03538037 +n03538300 +n03538634 +n03538957 +n03540267 +n03540595 +n03541091 +n03541696 +n03541923 +n03542333 +n03542860 +n03543603 +n03544360 +n03545150 +n03546340 +n03547054 +n03547530 +n03548930 +n03550153 +n03550289 +n03551084 +n03551790 +n03552449 +n03552749 +n03553486 +n03554460 +n03555426 +n03555662 +n03557590 +n03558176 +n03558404 +n03558739 +n03561169 +n03563200 +n03563710 +n03563967 +n03565288 +n03565565 +n03566329 +n03568117 +n03568818 +n03571942 +n03572205 +n03574555 +n03574816 +n03575958 +n03576215 +n03577672 +n03577818 +n03578055 +n03578251 +n03578656 +n03579538 +n03579982 +n03583621 +n03584400 +n03585073 +n03588951 +n03589513 +n03589791 +n03590306 +n03590932 +n03592245 +n03592773 +n03593526 +n03595409 +n03595860 +n03596285 +n03597317 +n03598151 +n03598299 +n03598646 +n03600977 +n03601638 +n03601840 +n03602081 +n03603722 +n03604629 +n03604843 +n03605722 +n03605915 +n03606465 +n03609235 +n03609542 +n03610418 +n03610992 +n03612814 +n03613294 +n03613592 +n03614007 +n03614532 +n03615563 +n03617095 +n03617594 +n03618546 +n03618982 +n03619396 +n03619650 +n03619793 +n03619890 +n03620052 +n03621049 +n03621694 +n03622931 +n03623556 +n03624134 +n03625355 +n03626115 +n03631177 +n03631811 +n03632852 +n03633886 +n03635032 +n03635668 +n03635932 +n03636248 +n03636649 +n03638883 +n03639675 +n03640988 +n03642444 +n03646296 +n03646916 +n03647520 +n03651388 +n03653220 +n03653454 +n03654576 +n03655072 +n03656484 +n03657239 +n03658858 +n03659292 +n03660124 +n03661340 +n03662719 +n03662887 +n03663531 +n03664675 +n03664943 +n03665366 +n03666362 +n03666917 +n03667235 +n03667829 +n03671914 +n03672827 +n03673450 +n03673767 +n03676759 +n03677766 +n03679384 +n03679712 +n03681477 +n03682487 +n03684823 +n03685307 +n03685820 +n03686130 +n03686470 +n03687928 +n03688943 +n03689157 +n03689570 +n03690851 +n03691817 +n03692379 +n03693293 +n03697552 +n03698604 +n03699280 +n03699975 +n03700963 +n03701191 +n03701391 +n03701640 +n03701790 +n03702248 +n03704834 +n03705379 +n03706653 +n03707597 +n03708036 +n03709206 +n03709363 +n03709545 +n03710528 +n03711711 +n03711999 +n03712887 +n03713069 +n03714235 +n03715386 +n03715669 +n03715892 +n03716966 +n03717131 +n03718212 +n03718335 +n03718699 +n03718789 +n03719053 +n03721590 +n03722007 +n03722288 +n03724176 +n03725035 +n03725717 +n03726516 +n03726760 +n03726993 +n03727837 +n03727946 +n03728437 +n03728982 +n03729647 +n03729951 +n03730153 +n03730788 +n03731695 +n03733644 +n03733925 +n03735637 +n03736970 +n03738241 +n03738472 +n03739518 +n03739693 +n03743902 +n03744276 +n03744684 +n03744840 +n03745571 +n03746330 +n03748162 +n03749504 +n03749807 +n03750206 +n03751065 +n03752185 +n03752922 +n03753077 +n03753514 +n03758894 +n03759432 +n03760671 +n03762982 +n03763727 +n03764276 +n03765561 +n03765934 +n03766322 +n03768132 +n03769722 +n03770954 +n03772077 +n03772674 +n03773035 +n03775199 +n03775847 +n03779000 +n03779370 +n03780047 +n03781787 +n03782190 +n03785499 +n03787523 +n03789171 +n03789400 +n03789946 +n03790230 +n03790512 +n03790755 +n03791235 +n03792048 +n03792526 +n03793850 +n03795976 +n03796181 +n03797390 +n03798982 +n03799113 +n03800485 +n03800772 +n03800933 +n03802007 +n03802228 +n03802393 +n03803116 +n03809312 +n03811295 +n03811444 +n03811847 +n03811965 +n03812382 +n03812924 +n03813176 +n03813946 +n03815278 +n03815482 +n03815615 +n03816005 +n03816136 +n03816849 +n03817647 +n03819595 +n03819994 +n03820154 +n03820318 +n03820728 +n03820950 +n03824197 +n03825080 +n03827536 +n03828020 +n03829340 +n03831757 +n03834040 +n03834604 +n03836062 +n03837422 +n03838748 +n03839172 +n03839276 +n03839795 +n03841666 +n03842156 +n03844045 +n03844233 +n03845190 +n03846234 +n03846772 +n03847471 +n03847823 +n03848168 +n03848348 +n03849275 +n03850613 +n03851341 +n03851787 +n03852280 +n03852688 +n03854815 +n03859280 +n03859495 +n03859958 +n03861430 +n03861842 +n03862676 +n03863923 +n03864139 +n03864356 +n03864692 +n03865371 +n03865949 +n03868406 +n03871083 +n03871524 +n03871724 +n03873848 +n03874138 +n03874823 +n03875218 +n03880129 +n03880323 +n03880531 +n03883054 +n03883773 +n03883944 +n03884639 +n03885535 +n03885669 +n03886053 +n03886641 +n03887185 +n03888022 +n03889503 +n03889726 +n03891051 +n03892557 +n03894051 +n03894379 +n03896103 +n03896233 +n03896419 +n03896628 +n03896984 +n03897943 +n03898271 +n03898633 +n03899612 +n03899933 +n03901338 +n03903133 +n03903424 +n03904060 +n03904183 +n03904433 +n03905540 +n03906997 +n03907654 +n03908204 +n03909160 +n03909406 +n03915118 +n03915437 +n03916470 +n03916720 +n03917327 +n03918480 +n03920737 +n03923564 +n03923692 +n03924069 +n03926148 +n03926412 +n03926876 +n03927792 +n03928116 +n03929091 +n03929202 +n03929443 +n03930515 +n03932670 +n03936269 +n03938522 +n03939677 +n03940256 +n03941684 +n03943920 +n03945615 +n03947111 +n03947466 +n03948459 +n03951971 +n03953020 +n03953416 +n03955809 +n03956785 +n03956922 +n03957315 +n03957762 +n03958630 +n03958752 +n03959014 +n03959701 +n03961939 +n03962525 +n03962932 +n03963028 +n03965907 +n03966325 +n03966751 +n03966976 +n03967942 +n03968293 +n03971321 +n03972524 +n03973520 +n03973628 +n03975035 +n03979377 +n03979492 +n03980026 +n03981340 +n03982232 +n03982895 +n03984234 +n03984381 +n03985232 +n03986704 +n03988170 +n03989665 +n03990474 +n03991443 +n03992325 +n03992703 +n03993180 +n03993403 +n03994008 +n03994757 +n03995018 +n03995856 +n03996145 +n03996416 +n03997484 +n03999992 +n04000311 +n04001397 +n04001499 +n04001845 +n04004210 +n04004475 +n04005912 +n04007664 +n04010057 +n04010779 +n04010927 +n04011827 +n04012084 +n04013729 +n04014297 +n04015204 +n04016576 +n04016684 +n04018399 +n04018667 +n04019101 +n04019696 +n04020087 +n04020298 +n04020912 +n04021028 +n04021362 +n04021798 +n04022332 +n04022708 +n04023249 +n04024274 +n04026053 +n04026918 +n04027023 +n04027706 +n04028315 +n04029734 +n04030274 +n04036303 +n04037964 +n04038440 +n04038727 +n04039848 +n04042358 +n04042632 +n04042795 +n04042985 +n04043733 +n04044307 +n04044498 +n04045085 +n04045397 +n04046590 +n04046974 +n04047401 +n04049405 +n04050066 +n04051549 +n04051825 +n04052757 +n04056932 +n04057047 +n04057435 +n04057846 +n04057981 +n04058096 +n04058239 +n04059947 +n04060647 +n04060904 +n04061793 +n04061969 +n04062644 +n04063373 +n04063868 +n04064401 +n04065464 +n04065789 +n04067231 +n04067353 +n04067921 +n04068441 +n04068601 +n04069276 +n04069777 +n04070207 +n04070964 +n04071102 +n04071263 +n04071393 +n04072193 +n04072551 +n04073948 +n04075468 +n04075916 +n04076284 +n04077430 +n04077734 +n04078574 +n04079106 +n04079244 +n04079933 +n04080454 +n04080833 +n04081844 +n04083649 +n04086794 +n04087126 +n04087709 +n04088696 +n04088797 +n04089666 +n04089976 +n04090548 +n04091097 +n04093625 +n04095210 +n04096066 +n04097622 +n04097866 +n04099175 +n04099429 +n04100174 +n04101497 +n04101701 +n04102037 +n04102285 +n04102406 +n04102962 +n04104147 +n04104500 +n04105068 +n04105438 +n04105893 +n04107984 +n04108268 +n04110068 +n04110654 +n04110955 +n04111190 +n04111414 +n04111668 +n04113765 +n04114996 +n04115256 +n04115996 +n04116389 +n04118021 +n04121228 +n04122349 +n04122492 +n04122825 +n04123123 +n04123567 +n04123740 +n04125116 +n04125853 +n04126541 +n04126659 +n04126980 +n04127904 +n04128499 +n04128837 +n04131929 +n04134632 +n04136510 +n04137444 +n04137897 +n04138977 +n04139859 +n04140064 +n04140631 +n04141838 +n04143897 +n04146050 +n04147495 +n04148054 +n04149083 +n04151108 +n04151581 +n04151940 +n04152387 +n04154753 +n04156297 +n04156411 +n04157320 +n04158807 +n04158956 +n04160372 +n04160586 +n04161358 +n04161981 +n04164757 +n04164868 +n04166111 +n04167489 +n04169437 +n04170037 +n04171459 +n04171629 +n04171831 +n04174101 +n04174500 +n04176068 +n04176190 +n04176528 +n04177329 +n04177545 +n04180063 +n04180888 +n04181228 +n04181718 +n04182322 +n04183217 +n04183329 +n04184435 +n04184600 +n04185071 +n04186051 +n04186268 +n04186455 +n04186624 +n04186848 +n04187061 +n04187547 +n04187885 +n04189092 +n04190052 +n04190464 +n04190747 +n04190997 +n04191150 +n04191595 +n04191943 +n04192238 +n04192858 +n04194289 +n04196080 +n04197391 +n04198015 +n04198797 +n04199027 +n04201733 +n04202417 +n04205318 +n04206356 +n04207763 +n04210390 +n04211356 +n04211970 +n04215910 +n04216634 +n04216860 +n04216963 +n04217718 +n04217882 +n04219424 +n04221823 +n04222210 +n04222470 +n04222847 +n04225031 +n04225222 +n04225729 +n04226464 +n04226537 +n04227900 +n04229007 +n04229107 +n04229480 +n04230603 +n04230808 +n04231693 +n04232153 +n04233832 +n04234455 +n04235291 +n04235771 +n04236001 +n04236377 +n04236702 +n04238617 +n04241042 +n04241394 +n04242408 +n04243003 +n04243941 +n04244997 +n04245847 +n04246855 +n04247630 +n04247736 +n04248507 +n04249415 +n04250224 +n04250599 +n04253931 +n04255499 +n04256520 +n04260589 +n04261116 +n04262678 +n04263336 +n04263760 +n04264233 +n04264914 +n04266486 +n04267577 +n04269944 +n04270891 +n04271148 +n04272054 +n04272782 +n04273064 +n04273796 +n04275283 +n04275661 +n04275904 +n04278353 +n04279172 +n04279987 +n04280259 +n04280970 +n04283585 +n04283905 +n04284002 +n04285146 +n04285622 +n04285803 +n04286128 +n04288272 +n04288533 +n04288673 +n04289449 +n04291242 +n04291759 +n04292414 +n04292572 +n04293119 +n04293744 +n04294212 +n04294426 +n04295081 +n04295881 +n04299215 +n04300358 +n04301000 +n04301474 +n04303258 +n04304375 +n04305471 +n04306080 +n04306847 +n04307419 +n04307878 +n04308084 +n04308273 +n04308397 +n04308583 +n04308807 +n04309348 +n04309833 +n04310721 +n04311595 +n04312154 +n04312432 +n04313220 +n04314914 +n04315828 +n04315948 +n04317420 +n04318131 +n04318982 +n04319937 +n04320405 +n04322026 +n04322692 +n04322801 +n04323819 +n04326799 +n04326896 +n04328054 +n04328329 +n04328946 +n04329477 +n04330340 +n04330669 +n04330998 +n04331277 +n04332987 +n04333129 +n04338517 +n04339638 +n04340750 +n04340935 +n04341133 +n04341414 +n04341686 +n04346679 +n04347519 +n04348184 +n04348359 +n04349401 +n04350104 +n04350458 +n04354589 +n04356595 +n04358707 +n04358874 +n04359335 +n04359589 +n04360501 +n04360798 +n04361095 +n04361260 +n04362821 +n04363210 +n04363874 +n04364545 +n04364827 +n04364994 +n04365328 +n04365484 +n04365751 +n04368695 +n04370048 +n04371563 +n04373894 +n04375775 +n04377057 +n04378956 +n04379243 +n04379964 +n04380346 +n04381994 +n04382334 +n04382880 +n04383130 +n04383301 +n04386664 +n04387201 +n04387400 +n04388162 +n04388743 +n04389521 +n04390873 +n04391838 +n04392526 +n04393095 +n04394261 +n04395875 +n04397168 +n04397261 +n04397645 +n04398497 +n04398688 +n04398834 +n04399046 +n04400289 +n04401088 +n04402057 +n04402580 +n04402746 +n04402984 +n04403638 +n04404817 +n04404997 +n04405540 +n04405762 +n04407435 +n04407686 +n04409128 +n04409806 +n04410086 +n04410365 +n04410485 +n04411264 +n04411966 +n04413151 +n04413419 +n04415663 +n04416901 +n04417180 +n04417361 +n04417809 +n04419073 +n04421872 +n04422875 +n04427715 +n04428008 +n04431436 +n04431745 +n04434932 +n04435180 +n04436185 +n04436401 +n04436542 +n04437670 +n04437953 +n04438304 +n04438643 +n04440963 +n04441662 +n04444749 +n04445040 +n04445952 +n04446276 +n04447276 +n04447443 +n04448070 +n04448361 +n04450243 +n04450640 +n04450749 +n04451818 +n04452615 +n04452848 +n04453156 +n04453666 +n04453910 +n04454654 +n04455250 +n04455652 +n04456472 +n04457326 +n04458843 +n04459362 +n04459610 +n04460130 +n04462011 +n04463679 +n04464852 +n04467099 +n04467307 +n04468005 +n04469251 +n04470741 +n04471315 +n04471632 +n04472243 +n04472726 +n04473884 +n04474466 +n04475411 +n04475631 +n04477548 +n04478512 +n04478657 +n04480527 +n04481524 +n04487724 +n04488427 +n04489008 +n04489817 +n04490091 +n04491769 +n04493109 +n04494204 +n04495450 +n04497442 +n04497570 +n04498523 +n04499446 +n04499554 +n04500060 +n04501837 +n04502197 +n04502502 +n04502670 +n04502851 +n04504141 +n04504770 +n04505036 +n04506994 +n04507453 +n04508163 +n04508489 +n04508949 +n04509260 +n04509592 +n04511002 +n04514241 +n04516116 +n04516214 +n04516672 +n04518132 +n04519153 +n04520170 +n04520382 +n04521987 +n04524313 +n04527648 +n04529681 +n04530566 +n04531098 +n04531873 +n04533042 +n04533199 +n04533700 +n04534127 +n04534895 +n04536153 +n04538552 +n04539203 +n04540761 +n04541320 +n04543158 +n04544450 +n04546194 +n04546855 +n04547592 +n04549122 +n04549919 +n04551055 +n04552696 +n04553389 +n04554871 +n04555600 +n04555897 +n04556948 +n04557308 +n04557751 +n04558059 +n04558804 +n04559023 +n04559730 +n04562262 +n04563204 +n04565375 +n04566257 +n04567098 +n04568069 +n04568557 +n04569520 +n04569822 +n04570958 +n04571292 +n04571566 +n04571958 +n04572935 +n04574471 +n04574999 +n04576002 +n04576211 +n04576971 +n04577426 +n04577769 +n04578801 +n04579230 +n04580493 +n04581595 +n04582349 +n04583620 +n04585745 +n04585980 +n04586932 +n04587648 +n04588739 +n04589190 +n04589434 +n04591056 +n04591887 +n04592005 +n04592099 +n04594218 +n04594489 +n04595285 +n04595855 +n04596852 +n04597066 +n04597804 +n04598136 +n04598582 +n04599124 +n04600312 +n04600486 +n04600912 +n04603729 +n04603872 +n04605726 +n04606574 +n04608329 +n04608567 +n04609531 +n04609651 +n04610176 +n04610503 +n04610676 +n04611916 +n04613015 +n04615226 +n04615644 +n04950713 +n04951373 +n04958634 +n04959672 +n04960277 +n04961691 +n04963740 +n04965179 +n04965661 +n04967191 +n04968895 +n04970059 +n04970631 +n04970916 +n04972801 +n04973386 +n04976952 +n05238282 +n05241218 +n05242070 +n05244934 +n05266879 +n05399034 +n05447757 +n05449959 +n05453657 +n05467758 +n05586759 +n06254669 +n06262567 +n06263369 +n06263609 +n06263762 +n06266417 +n06266710 +n06267145 +n06271778 +n06272290 +n06272803 +n06274092 +n06275353 +n06276697 +n06277280 +n06281040 +n06359467 +n06359657 +n06418693 +n06591815 +n06592078 +n06595351 +n06613686 +n06793231 +n07556637 +n07556970 +n07557165 +n07557434 +n07560652 +n07561112 +n07562495 +n07563800 +n07564629 +n07564971 +n07565725 +n07565945 +n07566340 +n07566863 +n07567390 +n07567707 +n07568818 +n07569106 +n07569543 +n07570720 +n07572353 +n07572957 +n07573103 +n07573696 +n07574602 +n07575076 +n07575726 +n07575984 +n07576182 +n07576438 +n07576577 +n07577374 +n07579575 +n07580053 +n07580359 +n07580470 +n07581346 +n07581775 +n07582277 +n07582441 +n07582609 +n07583197 +n07584228 +n07584593 +n07585208 +n07587441 +n07587700 +n07588947 +n07590320 +n07591473 +n07592094 +n07592656 +n07593774 +n07595914 +n07596046 +n07596452 +n07596684 +n07597145 +n07597365 +n07598734 +n07599468 +n07599783 +n07599998 +n07600506 +n07601407 +n07605474 +n07605944 +n07606278 +n07606764 +n07607707 +n07609407 +n07609840 +n07611148 +n07611358 +n07611839 +n07611991 +n07612367 +n07612632 +n07612996 +n07613671 +n07614198 +n07614825 +n07615052 +n07615190 +n07615460 +n07615569 +n07615671 +n07616590 +n07617188 +n07619004 +n07623136 +n07624466 +n07627931 +n07628068 +n07641928 +n07642471 +n07642933 +n07643306 +n07643474 +n07643764 +n07643981 +n07644244 +n07663899 +n07678729 +n07679356 +n07680517 +n07680932 +n07681926 +n07682316 +n07682624 +n07683786 +n07684600 +n07685730 +n07686873 +n07687211 +n07687469 +n07687789 +n07689003 +n07690273 +n07690892 +n07692405 +n07692614 +n07693889 +n07693972 +n07694403 +n07695878 +n07695965 +n07697100 +n07704054 +n07705931 +n07707451 +n07708512 +n07708798 +n07709333 +n07710007 +n07710283 +n07710616 +n07710952 +n07712063 +n07712382 +n07712748 +n07712856 +n07713395 +n07713895 +n07714078 +n07714802 +n07714895 +n07715561 +n07715721 +n07716034 +n07717070 +n07717858 +n07718671 +n07719437 +n07719839 +n07720442 +n07720615 +n07721325 +n07721456 +n07721678 +n07722217 +n07722763 +n07723330 +n07723559 +n07723753 +n07724943 +n07725376 +n07725531 +n07726796 +n07727578 +n07727868 +n07728804 +n07729000 +n07729485 +n07730406 +n07730855 +n07731122 +n07731587 +n07731952 +n07732302 +n07732747 +n07734017 +n07734292 +n07735052 +n07735803 +n07737081 +n07739125 +n07739506 +n07740220 +n07740954 +n07741461 +n07742012 +n07742704 +n07744246 +n07747055 +n07747811 +n07747951 +n07748753 +n07748912 +n07749095 +n07749192 +n07749312 +n07749731 +n07750586 +n07751451 +n07752377 +n07752664 +n07753743 +n07755089 +n07755411 +n07755707 +n07756096 +n07757132 +n07757312 +n07757602 +n07757990 +n07758680 +n07758950 +n07759424 +n07759691 +n07759816 +n07760501 +n07761141 +n07761309 +n07761611 +n07761777 +n07761954 +n07767344 +n07767847 +n07770571 +n07771212 +n07800091 +n07800740 +n07801508 +n07802152 +n07802417 +n07803093 +n07803545 +n07804323 +n07805254 +n07805594 +n07805731 +n07806221 +n07806633 +n07807317 +n07807710 +n07807922 +n07809096 +n07809368 +n07810907 +n07811416 +n07812184 +n07814203 +n07815588 +n07818277 +n07819480 +n07820497 +n07820814 +n07823951 +n07824702 +n07824988 +n07825717 +n07828987 +n07829412 +n07830593 +n07832902 +n07834507 +n07836731 +n07837002 +n07837362 +n07838233 +n07841495 +n07841639 +n07841907 +n07842753 +n07842972 +n07843464 +n07843775 +n07844042 +n07844604 +n07846143 +n07847198 +n07848338 +n07848771 +n07849336 +n07850083 +n07850329 +n07851298 +n07852045 +n07852919 +n07854813 +n07856270 +n07857959 +n07858595 +n07859284 +n07859583 +n07860805 +n07861158 +n07861813 +n07863374 +n07864638 +n07865105 +n07867421 +n07867883 +n07869391 +n07869775 +n07870313 +n07871436 +n07873464 +n07874063 +n07874159 +n07874259 +n07874343 +n07874441 +n07874780 +n07875693 +n07875835 +n07876281 +n07880751 +n07881117 +n07881205 +n07881404 +n07881800 +n07882497 +n07882886 +n07883031 +n07883251 +n07883384 +n07884567 +n07886572 +n07886849 +n07887634 +n07888465 +n07888909 +n07889510 +n07890352 +n07890750 +n07891726 +n07892813 +n07893528 +n07893891 +n07894102 +n07894298 +n07894965 +n07895237 +n07895435 +n07895595 +n07895710 +n07895839 +n07896287 +n07897200 +n07897865 +n07898117 +n07898333 +n07898745 +n07899108 +n07900406 +n07900616 +n07901587 +n07903208 +n07904395 +n07905038 +n07906284 +n07906877 +n07907161 +n07907548 +n07907943 +n07909129 +n07909811 +n07911371 +n07911677 +n07912211 +n07913393 +n07914413 +n07915618 +n07916041 +n07917618 +n07918028 +n07920222 +n07921455 +n07921948 +n07923748 +n07924033 +n07924560 +n07924834 +n07925966 +n07926920 +n07927197 +n07927931 +n07929519 +n07930554 +n07931001 +n07931096 +n07932614 +n07932841 +n07933274 +n07933891 +n07934032 +n07934530 +n07935152 +n07935504 +n07936263 +n07936745 +n07938149 +n07951464 +n08554440 +n08558963 +n08596076 +n08598301 +n08616050 +n08640531 +n08659446 +n09191635 +n09206896 +n09206985 +n09210862 +n09213434 +n09213565 +n09214060 +n09214916 +n09215437 +n09217230 +n09230041 +n09233446 +n09238926 +n09255070 +n09259025 +n09259219 +n09262690 +n09265620 +n09269882 +n09270735 +n09287968 +n09289331 +n09289596 +n09290444 +n09295946 +n09300306 +n09302616 +n09303008 +n09303528 +n09304750 +n09305031 +n09308572 +n09309292 +n09315159 +n09326662 +n09335693 +n09335809 +n09336555 +n09337253 +n09344198 +n09352849 +n09359803 +n09362945 +n09366017 +n09366317 +n09375606 +n09376526 +n09381242 +n09393605 +n09396465 +n09398677 +n09405787 +n09406793 +n09409512 +n09409752 +n09410224 +n09416076 +n09421799 +n09428628 +n09432990 +n09433442 +n09437454 +n09439213 +n09443641 +n09453008 +n09458269 +n09472413 +n09474010 +n09505153 +n09606009 +n09606527 +n09608709 +n09610405 +n09613191 +n09615336 +n09616922 +n09619168 +n09619452 +n09620078 +n09620794 +n09622049 +n09622302 +n09624168 +n09624559 +n09625401 +n09626238 +n09627906 +n09629246 +n09629752 +n09631129 +n09632274 +n09632518 +n09633969 +n09636339 +n09638875 +n09639919 +n09641002 +n09644152 +n09648743 +n09651123 +n09665545 +n09669631 +n09670280 +n09676884 +n09679925 +n09690208 +n09694771 +n09696585 +n09697401 +n09700964 +n09701148 +n09701833 +n09705124 +n09708750 +n09710164 +n09716047 +n09718217 +n09722658 +n09724785 +n09725229 +n09725772 +n09726621 +n09727440 +n09727826 +n09730204 +n09731436 +n09731571 +n09735258 +n09738400 +n09744679 +n09754217 +n09758173 +n09758885 +n09761068 +n09763784 +n09764201 +n09764598 +n09765278 +n09767197 +n09769076 +n09770179 +n09771435 +n09772746 +n09773962 +n09774783 +n09790482 +n09792555 +n09795124 +n09795334 +n09800964 +n09802445 +n09802641 +n09805151 +n09805475 +n09809538 +n09809749 +n09810166 +n09811712 +n09814660 +n09815790 +n09816771 +n09818022 +n09820263 +n09821831 +n09823502 +n09824135 +n09824609 +n09826204 +n09830194 +n09831962 +n09834699 +n09836160 +n09840217 +n09841188 +n09841515 +n09841696 +n09842047 +n09848489 +n09851575 +n09853645 +n09853881 +n09854915 +n09857007 +n09861946 +n09865398 +n09868270 +n09871681 +n09877951 +n09889691 +n09892693 +n09894654 +n09895222 +n09895701 +n09902353 +n09903153 +n09910374 +n09917593 +n09918248 +n09923418 +n09923673 +n09924996 +n09927089 +n09927451 +n09928136 +n09928451 +n09929298 +n09930257 +n09930876 +n09931640 +n09933098 +n09935434 +n09936892 +n09937056 +n09941964 +n09942970 +n09943239 +n09943811 +n09944160 +n09945319 +n09950457 +n09951070 +n09951274 +n09960688 +n09962966 +n09964411 +n09968845 +n09974648 +n09976728 +n09979321 +n09983572 +n09989502 +n09990415 +n09991867 +n09992538 +n09992837 +n09993252 +n09994673 +n09996481 +n09997622 +n10001217 +n10006748 +n10007684 +n10009484 +n10009671 +n10015215 +n10015897 +n10017422 +n10018861 +n10020890 +n10024362 +n10029068 +n10034201 +n10034614 +n10035952 +n10036266 +n10036929 +n10037385 +n10040945 +n10041887 +n10042690 +n10043643 +n10044879 +n10047459 +n10048367 +n10048836 +n10052694 +n10053808 +n10054657 +n10055730 +n10055847 +n10060175 +n10067968 +n10070711 +n10077593 +n10078131 +n10078806 +n10079399 +n10079893 +n10080869 +n10083823 +n10084043 +n10084295 +n10086383 +n10091651 +n10092488 +n10093475 +n10094584 +n10095869 +n10098710 +n10098862 +n10099375 +n10101634 +n10102800 +n10105085 +n10107303 +n10109662 +n10111903 +n10112129 +n10118844 +n10126177 +n10126424 +n10126708 +n10127689 +n10129825 +n10134396 +n10134982 +n10136959 +n10142747 +n10142946 +n10143172 +n10143725 +n10145340 +n10145774 +n10148305 +n10150071 +n10150940 +n10151570 +n10153594 +n10154186 +n10154601 +n10155849 +n10162194 +n10164233 +n10165448 +n10168183 +n10168584 +n10171567 +n10182190 +n10185793 +n10186774 +n10187130 +n10195593 +n10200781 +n10202624 +n10205457 +n10206173 +n10207169 +n10210137 +n10215623 +n10216106 +n10224578 +n10225219 +n10228278 +n10235385 +n10237069 +n10241300 +n10243664 +n10245639 +n10249270 +n10249459 +n10249950 +n10257221 +n10259348 +n10263411 +n10266328 +n10266848 +n10271677 +n10273064 +n10274815 +n10276045 +n10282672 +n10284064 +n10284965 +n10296444 +n10299250 +n10299700 +n10305635 +n10305802 +n10306004 +n10308732 +n10312287 +n10314054 +n10315561 +n10316360 +n10317007 +n10317500 +n10320863 +n10321340 +n10322238 +n10323999 +n10324560 +n10328437 +n10332385 +n10335246 +n10335931 +n10340312 +n10341573 +n10343554 +n10345100 +n10353016 +n10353355 +n10355142 +n10355449 +n10355688 +n10356450 +n10357613 +n10360747 +n10366966 +n10369528 +n10370381 +n10376523 +n10377021 +n10379376 +n10380672 +n10383816 +n10386984 +n10387196 +n10387324 +n10393909 +n10396106 +n10399130 +n10400998 +n10402824 +n10403876 +n10405694 +n10407954 +n10409752 +n10411551 +n10415037 +n10417551 +n10418101 +n10419047 +n10420031 +n10421016 +n10426454 +n10427764 +n10428004 +n10433737 +n10435716 +n10435988 +n10438172 +n10439851 +n10444194 +n10450303 +n10462860 +n10464052 +n10466918 +n10467179 +n10470779 +n10474064 +n10474645 +n10478960 +n10481268 +n10482054 +n10482921 +n10484858 +n10488309 +n10495421 +n10499355 +n10499857 +n10506544 +n10508710 +n10512372 +n10512708 +n10519494 +n10521100 +n10521662 +n10522035 +n10522324 +n10522759 +n10523341 +n10525134 +n10525436 +n10525617 +n10527334 +n10529231 +n10541833 +n10542888 +n10543161 +n10544232 +n10544748 +n10546633 +n10548537 +n10548681 +n10554846 +n10556518 +n10557854 +n10559288 +n10560637 +n10568200 +n10570019 +n10575787 +n10576962 +n10577284 +n10580535 +n10582746 +n10583387 +n10594147 +n10595164 +n10595647 +n10599806 +n10602985 +n10604634 +n10605253 +n10610465 +n10612210 +n10614629 +n10617193 +n10618685 +n10618848 +n10619642 +n10620758 +n10622053 +n10624074 +n10624310 +n10625860 +n10628644 +n10630188 +n10632576 +n10633450 +n10648237 +n10648696 +n10654932 +n10657835 +n10661002 +n10661563 +n10665698 +n10669991 +n10674130 +n10676018 +n10679174 +n10682953 +n10686073 +n10692883 +n10693824 +n10694258 +n10698368 +n10700201 +n10700640 +n10701180 +n10703336 +n10703692 +n10705615 +n10707233 +n10708454 +n10709529 +n10713686 +n10720453 +n10721321 +n10722575 +n10722965 +n10726786 +n10735298 +n10740868 +n10741152 +n10742997 +n10744164 +n10747119 +n10751265 +n10752480 +n10759151 +n10759982 +n10763383 +n10763620 +n10765679 +n10766260 +n10768903 +n10779610 +n10780632 +n10782791 +n10782940 +n10787470 +n10791221 +n10792335 +n10793570 +n10794014 +n11531193 +n11537327 +n11542640 +n11545524 +n11545714 +n11547562 +n11547855 +n11552386 +n11553240 +n11596108 +n11598686 +n11600372 +n11601177 +n11601918 +n11608250 +n11609475 +n11609684 +n11612923 +n11614250 +n11618861 +n11620673 +n11621029 +n11623105 +n11624531 +n11627168 +n11628456 +n11630017 +n11630489 +n11643835 +n11645914 +n11647306 +n11649878 +n11650558 +n11650759 +n11661372 +n11665372 +n11666854 +n11669921 +n11672400 +n11674332 +n11676500 +n11684264 +n11689483 +n11693981 +n11697560 +n11700864 +n11703669 +n11708658 +n11709674 +n11713164 +n11720353 +n11722982 +n11723770 +n11725015 +n11725623 +n11727091 +n11729478 +n11733054 +n11736694 +n11741350 +n11745817 +n11747468 +n11748002 +n11751765 +n11752578 +n11756092 +n11756669 +n11759224 +n11763625 +n11767354 +n11769621 +n11771539 +n11774513 +n11775340 +n11779300 +n11782036 +n11783920 +n11785668 +n11789438 +n11789962 +n11790788 +n11793779 +n11794519 +n11796005 +n11801392 +n11805956 +n11807108 +n11807979 +n11808721 +n11811473 +n11815491 +n11817914 +n11820965 +n11823043 +n11830714 +n11830906 +n11832214 +n11836722 +n11839568 +n11845557 +n11851578 +n11855274 +n11857696 +n11862835 +n11865071 +n11866248 +n11868814 +n11869351 +n11869689 +n11872146 +n11875691 +n11875938 +n11877473 +n11878283 +n11887119 +n11890022 +n11892637 +n11894327 +n11898639 +n11900569 +n11902709 +n11915214 +n11915658 +n11915899 +n11916467 +n11918286 +n11919447 +n11920498 +n11924445 +n11928352 +n11928858 +n11931918 +n11932745 +n11939699 +n11940006 +n11943407 +n11944196 +n11945367 +n11946727 +n11947251 +n11948264 +n11950345 +n11951511 +n11952346 +n11953884 +n11954484 +n11956850 +n11965627 +n11967744 +n11970101 +n11971248 +n11971783 +n11972759 +n11973341 +n11976170 +n11977303 +n11978233 +n11982115 +n11985053 +n11985739 +n11988893 +n11991263 +n11997032 +n11997969 +n12006766 +n12008252 +n12008749 +n12010628 +n12013511 +n12015959 +n12018760 +n12020507 +n12024176 +n12030654 +n12034141 +n12036067 +n12036939 +n12041446 +n12043444 +n12045860 +n12050959 +n12053405 +n12056217 +n12057447 +n12062468 +n12065316 +n12065777 +n12075151 +n12076577 +n12080395 +n12083591 +n12086012 +n12086539 +n12087961 +n12090890 +n12092262 +n12094244 +n12095020 +n12096395 +n12101870 +n12102133 +n12105125 +n12107970 +n12108432 +n12109827 +n12110778 +n12112008 +n12112918 +n12113657 +n12117017 +n12119099 +n12119238 +n12121033 +n12124627 +n12126360 +n12131550 +n12135898 +n12136720 +n12137120 +n12137569 +n12139575 +n12141495 +n12142085 +n12143676 +n12144313 +n12146311 +n12147226 +n12152532 +n12153580 +n12154773 +n12155583 +n12156819 +n12157056 +n12157769 +n12158031 +n12158798 +n12159055 +n12159555 +n12160490 +n12161285 +n12163035 +n12164363 +n12166424 +n12168565 +n12170585 +n12173664 +n12174311 +n12174926 +n12182049 +n12187663 +n12188289 +n12195391 +n12196129 +n12199266 +n12201580 +n12202936 +n12205694 +n12214789 +n12215579 +n12217453 +n12221191 +n12224978 +n12225349 +n12226932 +n12231192 +n12236546 +n12237486 +n12244153 +n12245695 +n12246232 +n12252168 +n12252866 +n12253229 +n12256112 +n12257570 +n12260799 +n12262553 +n12265394 +n12266217 +n12266796 +n12268246 +n12269241 +n12269652 +n12271643 +n12274630 +n12275489 +n12281241 +n12284262 +n12286826 +n12287642 +n12288823 +n12290748 +n12293723 +n12296432 +n12300840 +n12302071 +n12303462 +n12305475 +n12306717 +n12307756 +n12310349 +n12316444 +n12318378 +n12320010 +n12322501 +n12328398 +n12330469 +n12334293 +n12334891 +n12335483 +n12335664 +n12335800 +n12340383 +n12341542 +n12342299 +n12343480 +n12344283 +n12346578 +n12350758 +n12352287 +n12355760 +n12360108 +n12360684 +n12364604 +n12367611 +n12374418 +n12377198 +n12381511 +n12385429 +n12387633 +n12387839 +n12392070 +n12396924 +n12399132 +n12401335 +n12401684 +n12405714 +n12409231 +n12411461 +n12412355 +n12412606 +n12416423 +n12419037 +n12420535 +n12421467 +n12421683 +n12425281 +n12430198 +n12431434 +n12437513 +n12437769 +n12441958 +n12446200 +n12446519 +n12449296 +n12450344 +n12451915 +n12454159 +n12459629 +n12460697 +n12461466 +n12462032 +n12463743 +n12464476 +n12466727 +n12470092 +n12474167 +n12475035 +n12476510 +n12480895 +n12491826 +n12495146 +n12499163 +n12506181 +n12508309 +n12509476 +n12511856 +n12516584 +n12522188 +n12524188 +n12526516 +n12527738 +n12539074 +n12539306 +n12546183 +n12548280 +n12550210 +n12554526 +n12556656 +n12560282 +n12560775 +n12562577 +n12572546 +n12573256 +n12575322 +n12582231 +n12582665 +n12582846 +n12583126 +n12583401 +n12584191 +n12586298 +n12590232 +n12594989 +n12595964 +n12602262 +n12602980 +n12612170 +n12614477 +n12615710 +n12620196 +n12622875 +n12624381 +n12625383 +n12631331 +n12633638 +n12634211 +n12634429 +n12635744 +n12636885 +n12638218 +n12638556 +n12639736 +n12640607 +n12641413 +n12641931 +n12642200 +n12643473 +n12644902 +n12645174 +n12647376 +n12649065 +n12650556 +n12651821 +n12653218 +n12655869 +n12658118 +n12658846 +n12659356 +n12660601 +n12662772 +n12663804 +n12665048 +n12667406 +n12667964 +n12674120 +n12674685 +n12682411 +n12683407 +n12685431 +n12685831 +n12688716 +n12690653 +n12695144 +n12698435 +n12705013 +n12707781 +n12708293 +n12709901 +n12711596 +n12713063 +n12714755 +n12715914 +n12717072 +n12719684 +n12724942 +n12725521 +n12727301 +n12731401 +n12732491 +n12732756 +n12733647 +n12741222 +n12742741 +n12743823 +n12746884 +n12749049 +n12752205 +n12755225 +n12756457 +n12762896 +n12768369 +n12771192 +n12772753 +n12777436 +n12778605 +n12779603 +n12785724 +n12791064 +n12793015 +n12794985 +n12798284 +n12800586 +n12801520 +n12805146 +n12806732 +n12810595 +n12812235 +n12814643 +n12817464 +n12822769 +n12823717 +n12823859 +n12832315 +n12833985 +n12834798 +n12836212 +n12836862 +n12839979 +n12840749 +n12842302 +n12842887 +n12844939 +n12849061 +n12853080 +n12854048 +n12858150 +n12866968 +n12869478 +n12870535 +n12871272 +n12877244 +n12878169 +n12879963 +n12882779 +n12884260 +n12890265 +n12893463 +n12903367 +n12904938 +n12908645 +n12909421 +n12912670 +n12917901 +n12922763 +n12926480 +n12928071 +n12929403 +n12930778 +n12931906 +n12934036 +n12934479 +n12939104 +n12941536 +n12942395 +n12943443 +n12946849 +n12950126 +n12952165 +n12953206 +n12956170 +n12957608 +n12960378 +n12960863 +n12965626 +n12968136 +n12969131 +n12970193 +n12971400 +n12973791 +n12974987 +n12976198 +n12980840 +n12982468 +n12983961 +n12985773 +n12987056 +n12988158 +n12992868 +n12997654 +n12997919 +n13000891 +n13001041 +n13001206 +n13001366 +n13001529 +n13002750 +n13002925 +n13003061 +n13003254 +n13003522 +n13003712 +n13004423 +n13004640 +n13004826 +n13004992 +n13005329 +n13005984 +n13006171 +n13006631 +n13006894 +n13007417 +n13007629 +n13008157 +n13008315 +n13008485 +n13008689 +n13008839 +n13009085 +n13009244 +n13009429 +n13009656 +n13010694 +n13010951 +n13011221 +n13012253 +n13012469 +n13012973 +n13013534 +n13013764 +n13013965 +n13014097 +n13014265 +n13014409 +n13014581 +n13014741 +n13014879 +n13017102 +n13017240 +n13017439 +n13017610 +n13017789 +n13017979 +n13018088 +n13018232 +n13018407 +n13019496 +n13019643 +n13019835 +n13020191 +n13020481 +n13020964 +n13021166 +n13021332 +n13021543 +n13021689 +n13021867 +n13022210 +n13022709 +n13024012 +n13024500 +n13025647 +n13028611 +n13032115 +n13032923 +n13035241 +n13035389 +n13035707 +n13037585 +n13037805 +n13038068 +n13038376 +n13038744 +n13039349 +n13040629 +n13040796 +n13041312 +n13042982 +n13043926 +n13045210 +n13045975 +n13046130 +n13049953 +n13055423 +n13055577 +n13055792 +n13055949 +n13056135 +n13056349 +n13056607 +n13056799 +n13057054 +n13057242 +n13057422 +n13057639 +n13058037 +n13058272 +n13058608 +n13059298 +n13059657 +n13060017 +n13060190 +n13063269 +n13066129 +n13067191 +n13068917 +n13070308 +n13070875 +n13071371 +n13071553 +n13071815 +n13072031 +n13072209 +n13072350 +n13072528 +n13072706 +n13072863 +n13073055 +n13073703 +n13074619 +n13074814 +n13075020 +n13075272 +n13075441 +n13075684 +n13075847 +n13076041 +n13076405 +n13076643 +n13076831 +n13077033 +n13077295 +n13079419 +n13083023 +n13084184 +n13085113 +n13091620 +n13091774 +n13100156 +n13100677 +n13104059 +n13108131 +n13108662 +n13108841 +n13109733 +n13110915 +n13111174 +n13111881 +n13118707 +n13119870 +n13120211 +n13121104 +n13122364 +n13123431 +n13125117 +n13130161 +n13130726 +n13132034 +n13132338 +n13132486 +n13132940 +n13134302 +n13134947 +n13135832 +n13136316 +n13136556 +n13137409 +n13137672 +n13138308 +n13138658 +n13138842 +n13139055 +n13139647 +n13141141 +n13145444 +n13149296 +n13150894 +n13154841 +n13156986 +n13157137 +n13160831 +n13163991 +n13172923 +n13174670 +n13177529 +n13180534 +n13186388 +n13188096 +n13188268 +n13192625 +n13193642 +n13194572 +n13195761 +n13199970 +n13201969 +n13206817 +n13207736 +n13208705 +n13211790 +n13219422 +n13221529 +n13224673 +n13230662 +n13231678 +n13231919 +n13232106 +n13232363 +n13232779 +n13233727 +n13238375 +n13238988 +n13252672 +n13862780 +n13863186 +n13863473 +n13863771 +n13864153 +n13864965 +n13865298 +n13865483 +n13866144 +n13866827 +n13867492 +n13868248 +n13868371 +n13872592 +n13873502 +n13875392 +n13875571 +n13878306 +n13879320 +n13883603 +n13888491 +n13893786 +n13894434 +n13896100 +n13897996 +n13900287 +n13903079 +n13905121 +n13905275 +n13905792 +n13912260 +n13915999 +n14633206 +n14696793 +n14844693 +n14853210 +n14899328 +n14900184 +n14974264 +n14977504 +n14992287 +n15062057 +n15067877 +n15089258 +n15089472 +n15089645 +n15089803 +n15090742 +n15092409 +n15092751 \ No newline at end of file diff --git a/workloads/realworld/async/darknet/cfg/imagenet.shortnames.list b/workloads/realworld/async/darknet/cfg/imagenet.shortnames.list new file mode 100644 index 0000000000000000000000000000000000000000..e7a18d44b543086958eaf60e6dc0b7deb0df9400 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/imagenet.shortnames.list @@ -0,0 +1,21842 @@ +kit fox +English setter +Siberian husky +Australian terrier +English springer +grey whale +lesser panda +Egyptian cat +ibex +Persian cat +cougar +gazelle +porcupine +sea lion +malamute +badger +Great Dane +Walker hound +Welsh springer spaniel +whippet +Scottish deerhound +killer whale +mink +African elephant +Weimaraner +soft-coated wheaten terrier +Dandie Dinmont +red wolf +Old English sheepdog +jaguar +otterhound +bloodhound +Airedale +hyena +meerkat +giant schnauzer +titi +three-toed sloth +sorrel +black-footed ferret +dalmatian +black-and-tan coonhound +papillon +skunk +Staffordshire bullterrier +Mexican hairless +Bouvier des Flandres +weasel +miniature poodle +Cardigan +malinois +bighorn +fox squirrel +colobus +tiger cat +Lhasa +impala +coyote +Yorkshire terrier +Newfoundland +brown bear +red fox +Norwegian elkhound +Rottweiler +hartebeest +Saluki +grey fox +schipperke +Pekinese +Brabancon griffon +West Highland white terrier +Sealyham terrier +guenon +mongoose +indri +tiger +Irish wolfhound +wild boar +EntleBucher +zebra +ram +French bulldog +orangutan +basenji +leopard +Bernese mountain dog +Maltese dog +Norfolk terrier +toy terrier +vizsla +cairn +squirrel monkey +groenendael +clumber +Siamese cat +chimpanzee +komondor +Afghan hound +Japanese spaniel +proboscis monkey +guinea pig +white wolf +ice bear +gorilla +borzoi +toy poodle +Kerry blue terrier +ox +Scotch terrier +Tibetan mastiff +spider monkey +Doberman +Boston bull +Greater Swiss Mountain dog +Appenzeller +Shih-Tzu +Irish water spaniel +Pomeranian +Bedlington terrier +warthog +Arabian camel +siamang +miniature schnauzer +collie +golden retriever +Irish terrier +affenpinscher +Border collie +hare +boxer +silky terrier +beagle +Leonberg +German short-haired pointer +patas +dhole +baboon +macaque +Chesapeake Bay retriever +bull mastiff +kuvasz +capuchin +pug +curly-coated retriever +Norwich terrier +flat-coated retriever +hog +keeshond +Eskimo dog +Brittany spaniel +standard poodle +Lakeland terrier +snow leopard +Gordon setter +dingo +standard schnauzer +hamster +Tibetan terrier +Arctic fox +wire-haired fox terrier +basset +water buffalo +American black bear +Angora +bison +howler monkey +hippopotamus +chow +giant panda +American Staffordshire terrier +Shetland sheepdog +Great Pyrenees +Chihuahua +tabby +marmoset +Labrador retriever +Saint Bernard +armadillo +Samoyed +bluetick +redbone +polecat +marmot +kelpie +gibbon +llama +miniature pinscher +wood rabbit +Italian greyhound +lion +cocker spaniel +Irish setter +dugong +Indian elephant +beaver +Sussex spaniel +Pembroke +Blenheim spaniel +Madagascar cat +Rhodesian ridgeback +lynx +African hunting dog +langur +Ibizan hound +timber wolf +cheetah +English foxhound +briard +sloth bear +Border terrier +German shepherd +otter +koala +tusker +echidna +wallaby +platypus +wombat +revolver +umbrella +schooner +soccer ball +accordion +ant +starfish +chambered nautilus +grand piano +laptop +strawberry +airliner +warplane +airship +balloon +space shuttle +fireboat +gondola +speedboat +lifeboat +canoe +yawl +catamaran +trimaran +container ship +liner +pirate +aircraft carrier +submarine +wreck +half track +tank +missile +bobsled +dogsled +bicycle-built-for-two +mountain bike +freight car +passenger car +barrow +shopping cart +motor scooter +forklift +electric locomotive +steam locomotive +amphibian +ambulance +beach wagon +cab +convertible +jeep +limousine +minivan +Model T +racer +sports car +go-kart +golfcart +moped +snowplow +fire engine +garbage truck +pickup +tow truck +trailer truck +moving van +police van +recreational vehicle +streetcar +snowmobile +tractor +mobile home +tricycle +unicycle +horse cart +jinrikisha +oxcart +bassinet +cradle +crib +four-poster +bookcase +china cabinet +medicine chest +chiffonier +table lamp +file +park bench +barber chair +throne +folding chair +rocking chair +studio couch +toilet seat +desk +pool table +dining table +entertainment center +wardrobe +Granny Smith +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +acorn +hip +ear +rapeseed +corn +buckeye +organ +upright +chime +drum +gong +maraca +marimba +steel drum +banjo +cello +violin +harp +acoustic guitar +electric guitar +cornet +French horn +trombone +harmonica +ocarina +panpipe +bassoon +oboe +sax +flute +daisy +yellow lady's slipper +cliff +valley +alp +volcano +promontory +sandbar +coral reef +lakeside +seashore +geyser +hatchet +cleaver +letter opener +plane +power drill +lawn mower +hammer +corkscrew +can opener +plunger +screwdriver +shovel +plow +chain saw +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +white stork +black stork +spoonbill +flamingo +American egret +little blue heron +bittern +crane +limpkin +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +European gallinule +pelican +king penguin +albatross +great white shark +tiger shark +hammerhead +electric ray +stingray +barracouta +coho +tench +goldfish +eel +rock beauty +anemone fish +lionfish +puffer +sturgeon +gar +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +triceratops +African crocodile +American alligator +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +whistle +wing +paintbrush +hand blower +oxygen mask +snorkel +loudspeaker +microphone +screen +mouse +electric fan +oil filter +strainer +space heater +stove +guillotine +barometer +rule +odometer +scale +analog clock +digital clock +wall clock +hourglass +sundial +parking meter +stopwatch +digital watch +stethoscope +syringe +magnetic compass +binoculars +projector +sunglasses +loupe +radio telescope +bow +cannon +assault rifle +rifle +projectile +computer keyboard +typewriter keyboard +crane +lighter +abacus +cash machine +slide rule +desktop computer +hand-held computer +notebook +web site +harvester +thresher +printer +slot +vending machine +sewing machine +joystick +switch +hook +car wheel +paddlewheel +pinwheel +potter's wheel +gas pump +carousel +swing +reel +radiator +puck +hard disc +sunglass +pick +car mirror +solar dish +remote control +disk brake +buckle +hair slide +knot +combination lock +padlock +nail +safety pin +screw +muzzle +seat belt +ski +candle +jack-o'-lantern +spotlight +torch +neck brace +pier +tripod +maypole +mousetrap +spider web +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +isopod +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +sea urchin +sea cucumber +iron +espresso maker +microwave +Dutch oven +rotisserie +toaster +waffle iron +vacuum +dishwasher +refrigerator +washer +Crock Pot +frying pan +wok +caldron +coffeepot +teapot +spatula +altar +triumphal arch +patio +steel arch bridge +suspension bridge +viaduct +barn +greenhouse +palace +monastery +library +apiary +boathouse +church +mosque +stupa +planetarium +restaurant +cinema +home theater +lumbermill +coil +obelisk +totem pole +castle +prison +grocery store +bakery +barbershop +bookshop +butcher shop +confectionery +shoe shop +tobacco shop +toyshop +fountain +cliff dwelling +yurt +dock +brass +megalith +bannister +breakwater +dam +chainlink fence +picket fence +worm fence +stone wall +grille +sliding door +turnstile +mountain tent +scoreboard +honeycomb +plate rack +pedestal +beacon +mashed potato +bell pepper +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +cardoon +mushroom +shower curtain +jean +carton +handkerchief +sandal +ashcan +safe +plate +necklace +croquet ball +fur coat +thimble +pajama +running shoe +cocktail shaker +chest +manhole cover +modem +tub +tray +balance beam +bagel +prayer rug +kimono +hot pot +whiskey jug +knee pad +book jacket +spindle +ski mask +beer bottle +crash helmet +bottlecap +tile roof +mask +maillot +Petri dish +football helmet +bathing cap +teddy +holster +pop bottle +photocopier +vestment +crossword puzzle +golf ball +trifle +suit +water tower +feather boa +cloak +red wine +drumstick +shield +Christmas stocking +hoopskirt +menu +stage +bonnet +meat loaf +baseball +face powder +scabbard +sunscreen +beer glass +hen-of-the-woods +guacamole +lampshade +wool +hay +bow tie +mailbag +water jug +bucket +dishrag +soup bowl +eggnog +mortar +trench coat +paddle +chain +swab +mixing bowl +potpie +wine bottle +shoji +bulletproof vest +drilling platform +binder +cardigan +sweatshirt +pot +birdhouse +hamper +ping-pong ball +pencil box +pay-phone +consomme +apron +punching bag +backpack +groom +bearskin +pencil sharpener +broom +mosquito net +abaya +mortarboard +poncho +crutch +Polaroid camera +space bar +cup +racket +traffic light +quill +radio +dough +cuirass +military uniform +lipstick +shower cap +monitor +oscilloscope +mitten +brassiere +French loaf +vase +milk can +rugby ball +paper towel +earthstar +envelope +miniskirt +cowboy hat +trolleybus +perfume +bathtub +hotdog +coral fungus +bullet train +pillow +toilet tissue +cassette +carpenter's kit +ladle +stinkhorn +lotion +hair spray +academic gown +dome +crate +wig +burrito +pill bottle +chain mail +theater curtain +window shade +barrel +washbasin +ballpoint +basketball +bath towel +cowboy boot +gown +window screen +agaric +cellular telephone +nipple +barbell +mailbox +lab coat +fire screen +minibus +packet +maze +pole +horizontal bar +sombrero +pickelhaube +rain barrel +wallet +cassette player +comic book +piggy bank +street sign +bell cote +fountain pen +Windsor tie +volleyball +overskirt +sarong +purse +bolo tie +bib +parachute +sleeping bag +television +swimming trunks +measuring cup +espresso +pizza +breastplate +shopping basket +wooden spoon +saltshaker +chocolate sauce +ballplayer +goblet +gyromitra +stretcher +water bottle +dial telephone +soap dispenser +jersey +school bus +jigsaw puzzle +plastic bag +reflex camera +diaper +Band Aid +ice lolly +velvet +tennis ball +gasmask +doormat +Loafer +ice cream +pretzel +quilt +maillot +tape player +clog +iPod +bolete +scuba diver +pitcher +matchstick +bikini +sock +CD player +lens cap +thatch +vault +beaker +bubble +cheeseburger +parallel bars +flagpole +coffee mug +rubber eraser +stole +carbonara +dumbbell +singles +Virginia deer +eastern grey squirrel +gelding +pylon +table-tennis table +peacock +Segway +surfing +tamandua +knocker +steering wheel +motorcycling +coati +sitar +range +backhoe +agaric +dashboard +water polo +concrete mixer +treadmill +golf bag +skateboarding +royal tennis +tartan +four-wheel drive +sport utility +sedan +print +luggage rack +softball +windmill +ben +red admiral +jalousie +towel rail +truss +strand +ice hockey +sconce +wind turbine +plush +stained-glass window +ballpark +thoroughbred +love seat +red-spotted purple +miller +Adelie +freight liner +clock tower +acrobatics +shaving brush +ewe +ottoman +African violet +bicycle wheel +cork +windmill +satin +comma +coffee mill +baggage +wasp's nest +batting glove +Ferris wheel +push-bike +porthole +football stadium +gas tank +barbecue +handlebar +hula-hoop +fairground +rapier +garter stitch +exercise bike +control tower +carryall +minute hand +cog +riverbank +water nymph +common dandelion +android +hairbrush +redberry +fret +display window +pepper mill +litterbin +drapery +ducking +fly-fishing +broad jump +sprinkler +water-skiing +chicory +sail +volleyball +rugby +Texas bluebonnet +computer monitor +tortoiseshell +airplane propeller +solar array +figure skating +air conditioner +purple loosestrife +gearshift +outboard motor +cowslip +Abyssinian +dip +workstation +cosy +bunker +neon lamp +campanile +casket +verbena +amphora +sumo +common foxglove +sprocket +jelly bean +emperor penguin +night-blooming cereus +clock radio +black birch +bomber jacket +Virginia bluebell +bayonet +walker +altarpiece +tattoo +bridle +rocker arm +water turkey +spiderwort +flange +mute swan +laser printer +carburetor +coverlet +mountainside +baritone +auto racing +baluster +gal +peach bells +taffeta +grandfather +asparagus +horizontal stabilizer +world +grate +marsh marigold +white rhinoceros +movement +split rail +rollerblading +longhorn +muffler +church tower +light bulb +American agave +backpacking tent +overall +New World goldfinch +sectional +wing chair +transom +integrated circuit +dad +spar +picture frame +no-hit game +alternator +drill press +strawflower +hepatica +rangefinder +blinker +Welsh pony +nib +wagon wheel +rotor +tie +denim +jetliner +sculling +external drive +window frame +mourning dove +censer +stapler +batting helmet +flagon +machete +windshield +hedgehog +weeping willow +chief executive officer +hepatica +pet +Asiatic black bear +chinchilla +uke +Atlantic bottlenose dolphin +hair +dishtowel +flintlock +Bermuda shorts +lavender +searchlight +millwheel +piano keyboard +luna moth +bumper +parrot +skirt +manhole +coffee table +footstool +judo +Dalai Lama +armored personnel carrier +voile +saber +thoroughbred +wild carrot +gemsbok +caster +butterfly orchid +cow +sideboard +horseshoe crab +match play +cassette recorder +photomicrograph +drafting table +pediment +tramline +shipping +kitten +wainscoting +fried rice +helix +marguerite +pumpkin +white-bellied swallow +Tulipa gesneriana +common dolphin +face +red squirrel +bicycling +shipwreck +banded purple +cornice +pendant earring +forsythia +aardvark +seashell +spat +shoulder bag +fallow deer +yearling +common teasel +tufted titmouse +ancient +professional golf +purl +vehicle +okra +great grandmother +common lilac +rose mallow +newspaper +crucifix +chukka +armlet +fulmar +wapiti +doily +Greco-Roman wrestling +bleeding heart +kitchen table +bluebonnet +Cape buffalo +spun yarn +crape myrtle +dewdrop +great blue heron +medalist +vaulting horse +spinning wheel +skyscraper +Tahitian +forget-me-not +watercourse +guitarist +gargoyle +bee balm +pumpkin +hunting knife +flutist +lectern +skateboarder +foil +pant leg +hedge sparrow +dresser +automatic pistol +chicory +dialog box +chamberpot +black rhinoceros +fireweed +half-mast +pillow sham +pavilion +scarf joint +microprocessor +filly +dressing gown +shell +Arabian +child +radio antenna +butterweed +morris dancer +sparrow hawk +groom +brioche +floret +rainbow +earthworm +cellist +tine +toupee +balldress +map +angel's trumpet +ruin +fur +pronghorn +speed skating +used-car +stick +early spider orchid +stuffed peppers +snowdrift +flats +least sandpiper +stick +console table +ventilator +portable +kepi +pylon +viceroy +shoreline +Olympian Zeus +pestle +great-niece +life +air compressor +fanjet +scuba diving +fieldfare +tree swallow +personnel carrier +night-blooming cereus +sonogram +assembly hall +circuit breaker +chair +speed skate +soapwort +worsted +raspberry +burlap +flat panel display +Pyracantha +cemetery +turban +deer hunting +bottle green +dandelion green +pieta +aigrette +turntable +cover girl +clutch bag +kiwi +pea jacket +color guard +Malay +shire +crock +french fries +credenza +hockey stick +mourning cloak +potty seat +glass +balsamroot +medal play +red clover +gravy boat +garter belt +Guinness +meadow buttercup +jackass penguin +coursing +tooth +hawfinch +housetop +fluorescent lamp +black-backed gull +bookshelf +earplug +millipede +fawn +baseball bat +soup-strainer +organ loft +bugloss +tomahawk +blackcap +black-necked stilt +hand truck +bedstead +tempura +rose window +crimson +snow thrower +lesser whitethroat +palomino +ball +staff sergeant +wicker +garbage heap +great-nephew +parquet +coupe +nave +eggs Benedict +damask +flush toilet +Angora +pedometer +control room +bristle brush +kookaburra +telephone booth +Windsor chair +red-winged blackbird +cinnamon roll +briefs +cloister +sundress +mammillaria +unicyclist +covered bridge +coelogyne +fairy bluebird +phoebe +beer mug +headstock +parhelion +gorse +common European dogwood +fire-eater +professional football +rock climbing +cyclamen +tin +marjoram +Japanese morning glory +pipe +smasher +hang glider +abutment +birdbath +jotter +litter +artist's model +butterfly bush +dining area +sausage dog +piggery +English sparrow +Turk's-cap +platinum blond +song sparrow +alarm clock +tortoiseshell +chaise longue +flintlock +academic costume +graffito +Arnica montana +adding machine +waterside +director +jonquil +pipefitting +stud +Swedish meatball +musk rose +Venus's flytrap +raven +bougainvillea +little brother +field bindweed +finder +white admiral +tinfoil +serval +sheet +carthorse +people +potto +stockroom +sphinx +slate roof +mountain laurel +majolica +coal black +repository +bufo +pique +binder +tread +attorney general +hydraulic press +videocassette recorder +bumper car +professional baseball +cow parsley +ern +blue peafowl +common hyacinth +jack-in-the-pulpit +ice hockey rink +sport +camper +tailback +flash +stacks +pulp +Christmas cactus +netball +calliandra +curler +large periwinkle +cobweb +forward +Roman arch +cross bun +stoneware +banana bread +cape jasmine +settle +tongue +frock +pepper shaker +pitching coach +CD-R +casing +faience +hand cream +CD-ROM +recliner +striped bass +clary +sketch +risotto +reticle +white clover +touch football +kitty +great-aunt +Japanese maple +sidecar +muscovy duck +hack +rope bridge +organist +stinging nettle +pocket watch +Indian pipe +amorphophallus +bird's-foot violet +caller ID +furnishing +carriageway +dish rack +heiress +nail polish +beldam +Dall sheep +teriyaki +stateroom +laughing gull +chow +bookmark +timer +toga virilis +deviled egg +coltsfoot +Papuan +native +cygnet +automation +portfolio +cabbage palm +cube +broiler +radish +broodmare +castor-oil plant +pith hat +talus +lass +thatch +common marigold +young buck +igloo +prairie rattlesnake +soccer player +spoke +place +slide fastener +tapestry +toy +headboard +cross-country skiing +harness +sconce +rim +ballet skirt +transvestite +saddlebag +common evening primrose +taillight +challah +willet +ready-to-wear +cloud +answering machine +waterfront +vane +granddaughter +Chinese gooseberry +tureen +cab +truffle +viola +bootlace +chemise +taro +petal +candied apple +soccer +miniature golf +front porch +asparagus +Sauvignon blanc +daisy fleabane +ceiling +slip-on +bottle-nosed whale +redbud +black squirrel +snowsuit +ribbing +gravestone +creme brulee +ambassador +local +archery +love-in-a-mist +garbage +thyme +night-blooming cereus +goshawk +cuckoopint +azure +German iris +salad bowl +puppy +cockhorse +giant clam +biplane +stele +necklet +sea otter +crest +door +reformer +comforter +Byelorussian +bottle +hemline +book bag +leotard +owlet +spoon +sari +bidet +Latin +reticulated python +bowling shoe +futon +gaiter +coypu +tea urn +waders +bangle +snowbank +pencil +porter +azalea +English lavender +red spruce +team sport +cruet +high-rise +O ring +vodka +cormorant +Canada thistle +clasp +showjumping +rattan +red fox +sun parlor +Charolais +Tommy gun +bird's foot trefoil +sedge warbler +knot +chives +car tire +steam engine +adapter +spirea +common allamanda +oyster shell +harbor seal +baobab +wick +plumbago +downy woodpecker +coconut +leash +kasbah +hour hand +upholstery +mallard +cricket bat +lady +kitchenware +right-hander +leopard +olive green +common valerian +blue whale +blackboard +redhead +periwinkle +fingerboard +hard hat +locker +breakfast table +capybara +beekeeper +harness +feeder +water hyacinth +hexapod +brown thrasher +percale +lever +patriarch +arete +book +book +senator +bunya bunya +couch +durian +common lady's-slipper +mountain ash +golden barrel cactus +bicycle seat +beret +pop +musk mallow +manatee +cotton candy +boxing glove +backboard +tongue +saguaro +playground +capitol +sanderling +wagtail +deputy +tractor +tap +lady's smock +noseband +worsted +radiotelephone +camisole +forelock +muscat +sweet scabious +crane fly +butterfly weed +chestnut +pinata +inositol +borage +aquatic +belly +broadcaster +gondolier +egg yolk +blush wine +bufflehead +rambutan +oleander +horse-trail +sea holly +yard bird +conference room +lacrosse +belted kingfisher +defile +extremum +whistle +bear cub +grainfield +potage +watermelon +lasagna +sheik +Cooper's hawk +bulb +basketball court +paella +cassette tape +scatter rug +kid +impala lily +Minnesotan +Sudanese +chocolate +tail +quack-quack +whistling swan +shoulder patch +frozen custard +sumo wrestler +smoothie +bock +meat grinder +latch +palisade +radial +sake +kestrel +corn chowder +airframe +electrician +reamer +metropolitan +cotton flannel +cassowary +crossbill +operating room +winter aconite +flute +Tasmanian devil +billboard +suds +kilt +aperitif +cooling tower +avocado +hooded merganser +coleslaw +bee balm +ladder-back +insurance broker +scaffolding +polo mallet +double bed +two-hitter +bluff +gamboge +baby +lawn chair +frond +pistol grip +fancy dress +marquetry +jambalaya +fireweed +Eurasian kingfisher +cue ball +ice plant +horseweed +rose moss +musher +sun +viscount +white-breasted nuthatch +gin and tonic +thermos +Kenyan +first-aid kit +four-wheeler +tourist +stairwell +Gambian +liqueur glass +hovercraft +cocktail dress +twin +coriander +blister pack +Barrow's goldeneye +canteen +irrigation ditch +great white heron +tree sparrow +canal boat +lens +food processor +common raccoon +Baltimore oriole +black-eyed Susan +bush hibiscus +corolla +sire +mustachio +professional wrestling +elk +clustered bellflower +pannier +musk ox +crapaud +animal trainer +rosebud +ring-necked pheasant +little egret +cappuccino +rocker +bristlecone pine +cheerleader +hedge violet +semaphore +central processing unit +speedskater +delivery truck +assembly +hedgehog cactus +bergenia +bull thistle +bladder campion +cinquefoil +inula +cellulose tape +main rotor +bootee +autogiro +ice +grey +meadow cranesbill +hummus +valise +chassis +mountain goat +blacktail prairie dog +Chardonnay +romper +street +shoveler +wood ibis +topiary +chalice +silo +circus acrobat +Rollerblade +cosmos +woof +heroine +cold cream +marabou +herb robert +garden lettuce +nymph +floor lamp +automobile engine +heel +radiator +seeded player +fedora +father-in-law +peahen +Bahamian +wiper +wood pigeon +barn owl +pegboard +chorus frog +kin +roller skate +stob +rosemary +cowbird +hortensia +cranberry sauce +shot glass +Dixie cup +gnu +fire alarm +diet +booster +oxeye daisy +twayblade +high-definition television +truss bridge +bunk bed +mule +blackbuck +facsimile +frog orchid +point-and-shoot camera +brocade +gazebo +prairie gentian +concert +paintball +Cognac +maid +afghan +barbecued spareribs +pintail +tramway +commissioner +finger-painting +beef stew +caftan +Aberdeen Angus +demonstrator +sea trout +pigtail +thrush nightingale +barbados cherry +sashimi +ridgeling +lamppost +gabardine +red-shouldered hawk +bath salts +cavern +cymbid +Haitian +boater +southern buckthorn +arctic +motorcycle cop +red gum +Clydesdale +Zamboni +beagling +villa +demitasse +Sheetrock +lollipop +hybrid petunia +post horse +carabiner +brussels sprouts +Durham +stylist +pothole +sleigh bed +scallop shell +harrier eagle +papaya +Japanese persimmon +sachet +wild rice +chipboard +gun enclosure +menorah +chinook +headset +white campion +ocean +Secretary of State +G-string +bone china +basil +greenish blue +camcorder +concrete +screech owl +trumpet honeysuckle +flugelhorn +layette +cattle egret +case knife +mandarin duck +robber fly +salwar +dressing table +doughnut +facade +runner +honeypot +surf casting +diver +angel's trumpet +spin dryer +chameleon +wand +snow +vitamin A1 +manageress +volleyball net +antiperspirant +street clothes +tree sparrow +cords +sundew +bricks and mortar +caryatid +bridesmaid +trestle bridge +eyepiece +celebrant +scarlet pimpernel +gas range +onion +green salad +squill +creepy-crawly +hunk +little owl +salad nicoise +earflap +bird feeder +spray gun +bunny +Cheops +amazon +blue tit +Nissen hut +Kalashnikov +skylark +kremlin +shoebill +shopping bag +frigate bird +telephoto lens +peplum +moss pink +echidna +wastepaper basket +wood ibis +workroom +ankle brace +telpherage +Michaelmas daisy +figure skate +swami +nylons +cardoon +cocotte +headstall +twin bed +parsley +dirndl +corn poppy +nut bread +cloche +light heavyweight +mayor +lip-gloss +punch bowl +pottage +mango +fledgling +mousse +four-wheel drive +barrel +banana boat +trouser +bathroom +Sauterne +ring +settee +lavaliere +safe-deposit +godson +leatherette +schoolmate +radish +hedge trimmer +dahlia +euphonium +palace +vaulter +singlet +slicer +Pilsner +cockateel +kangaroo paw +Cub Scout +master bedroom +hexagon +cenotaph +Barberton daisy +Netherlander +intersection +Korean +gravel +chandelier +hospital bed +flash memory +pier +whole wheat flour +maroon +pale ale +special +snow bunting +crinoline +dustpan +barrette +common wood sorrel +yolk +pothos +speakerphone +tendril +cabinetwork +farm horse +brake disk +streetlight +superhighway +bandsaw +panting +pressure cooker +girdle +old man +cereal bowl +felt +hurling +architecture +harmonium +chain +blueberry +cellar +smocking +scrub brush +tablespoon +sweet corn +graining +library +street +bill +felt-tip pen +monkshood +crowd +log cabin +newel post +hack +elephant seal +golden pothos +popcorn +outhouse +patch pocket +fish and chips +tape +wax plant +eaves +fried egg +emerald +tea cart +fan blade +daily +Bowie knife +rowing boat +leaf shape +man +crayon +trumpetfish +chipping sparrow +whiskey bottle +pillion +city hall +golden pheasant +cheerleader +creeping bugle +couch +Dumpster +Homo sapiens sapiens +cranberry juice +cockpit +demagogue +joinery +scrambled eggs +technician +sidewalk +sheep +keyhole +power line +polyanthus +roulette +first lieutenant +checkout +tabletop +nasturtium +schnapps +engineering +skateboard +ground fir +bouquet +bunk +resort area +fleur-de-lis +power steering +opera +Bolivian +Friesian +buckskins +bay +slider +frozen yogurt +cabin cruiser +saunterer +lean-to +fishing eagle +bog star +cantaloupe +mouth +music stand +fiddlestick +brilliantine +pinball machine +bairn +barred owl +bath oil +signorina +Mason jar +nymph +rubber band +garden nasturtium +razorbill +Japanese beetle +batting cage +trestle +borage +Secretary of the Interior +scanner +baguet +baseball cap +chow mein +pen +jewelweed +barbet +chasm +pectoral sandpiper +holster +glasses case +sand +crevice +Kickapoo +snowboard +locket +satchel +tankard +alpinist +moorhen +cow pen +whooper +crown +chain +silversword +wild geranium +hi-fi +Tibetan +waterwheel +bee orchid +ruby-crowned kinglet +common broom +tabloid +javelin +sauna +klammath weed +zebra finch +spider orchid +velour +chiffon +lecture room +barrel +loggia +millstone +flatlet +soupspoon +econometrician +golf-club head +daphnia +parlor +fire-eater +juggler +attache case +hay bale +kisser +knitting needle +news magazine +flatbed +Senegalese +trumpeter +trampoline +brogan +bone +caftan +lobster pot +gazpacho +anthill +ramekin +mainsail +penitentiary +spotted flycatcher +cookstove +root beer +broom beard grass +pogo stick +plywood +epee +gas oven +Global Positioning System +sweet false chamomile +breakfast area +bullring +second cousin +wave +decolletage +rodeo +won ton +swastika +bobby pin +papaw +retaining wall +Muscadet +heavyweight +energizer +banner +amusement park +whinchat +drugstore +waxwork +meander +congee +heat sink +switch grass +commuter +peony +western white pine +wild raspberry +nightgown +saute +cardinal +claret +pollinator +biryani +pina colada +cassette deck +European sandpiper +block +flan +birdcage +baby +lieutenant colonel +ticking +European white lily +dog violet +coat hanger +premature baby +organza +string bean +balloonist +hurricane deck +window box +hang glider +bullfighting +piste +seahorse +hard cider +batik +common mullein +petite marmite +stuffed mushroom +tequila +ground ivy +fountain grass +stray +putter +buffer +comet +bomber +woodcarving +baseball glove +halter +garnish +selvage +megaphone +sea fan +rabbit hutch +very important person +analog watch +long-head coneflower +northern pike +roll-on +cigarette butt +terraced house +penknife +windshield wiper +cricket +straightener +snow pea +cockerel +canister +sour bread +recovery room +toilet bowl +tyrannosaur +big sister +quartz battery +television receiver +vitamin C +tailpipe +field thistle +stonechat +col +monstrance +gift wrapping +herbivore +quarter horse +ice-cream sundae +rumpus room +eyepatch +clary sage +French lavender +snorkel +choir +tent-fly +cat box +horse racing +high priest +barrel cactus +pin oak +wild thyme +keyboardist +raiser +hammock +hail +bungee +chocolate mousse +major +buzzard +gopher tortoise +Chablis +water meter +benthos +donna +blender +Mauser +avocet +rye +mulch +chancel +dusty miller +mate +corbel +minaret +frittata +French toast +mosaic +home brew +water faucet +beard +swivel chair +acropolis +largemouth +abbey +tabby +driver +copperhead +stirrup +Boston fern +Tennessee walker +artichoke +honor guard +chapatti +enchantress +sweat pants +electric organ +column +dry vermouth +range hood +Red Delicious +rape +splint +catapult +gourd +antipasto +plaza +carnation +star +wood anemone +English primrose +male fern +boot +atrium +Japanese deer +carnivore +yearling +doe +guelder rose +chicory +stretch pants +ice-cream cake +frogfish +tarpaulin +chicken soup +balaclava +tor +feverfew +three-hitter +flyweight +aqua vitae +locker room +wether +teacup +wide-angle lens +hook +ladder-back +osprey +awning +wedding +chest protector +pooch +rose mallow +orange daisy +fondant +envelope +duckling +blackberry +goosander +snorkeling +philatelist +broad bean +Frank +bok choy +basket +absinth +cayenne +blackbird +bottled water +trooper +timber +stable +chestnut +tomatillo +bell +banquet +rainbow trout +macrame +appointee +heart +chipmunk +purple clematis +safety bicycle +shuttle bus +Japanese black pine +lentil soup +downhill +field mustard +brass +hand-me-down +greater yellowlegs +fanny pack +croquet mallet +hip roof +duffel bag +Ritz +document +pie plant +staff member +lifeguard +white-throated sparrow +Cameroonian +hydrofoil +platter +common ageratum +middleweight +chairlift +brunch +pharmacist +lemon +driveshaft +green snake +lip +London plane +mangrove +crystal +siskin +common jasmine +hollandaise +villa +cross-country riding +mother-in-law's tongue +generator +Tanzanian +whisk +seeder +ashtray +griddle +evening bag +bluebird +bran muffin +square dancer +luggage compartment +tropical pitcher plant +autofocus +tape drive +silencer +Hawaiian guitar +swamp sparrow +Zimbabwean +drawing room +weekender +liparis +streambed +samosa +hitter +water heater +tidal basin +ossuary +dik-dik +camouflage +fiance +Jordanian +rolling pin +slingback +turret +hen +jennet +playpen +woodhewer +bushing +church bell +bear grass +double knit +tennis pro +Joe-Pye weed +pave +pochard +painted beauty +crinoline +gumbo +trestle table +schnitzel +balloon flower +Turkish coffee +extension cord +wireless local area network +sluice +umbel +microeconomist +sky +aisle +commander in chief +hydroplane racing +poll +Coca Cola +fuel injection +bird pepper +monkey puzzle +English muffin +riverbed +varietal +kachina +airport +saltwort +oolong +red-hot poker +mihrab +cocoa +jersey +Walkman +syndic +Hessian boot +millstone +carpenter +outfall +curbstone +mocha +field pansy +patriarch +slacks +switchblade +killdeer +whelk +pampas grass +racquetball +platform bed +Indian rhinoceros +Japanese iris +blacktop +dinner jacket +stud +jodhpurs +telephone pole +business district +kurta +basil +handset +file folder +gloriosa +orphan +cantle +cookie sheet +cafe au lait +drawbridge +hill myna +Western diamondback +watch case +cardcase +bowling alley +mattress cover +canvasback +pompadour +cornice +matador +cigar cutter +skunk cabbage +baptismal font +bitters +refectory +egg +parula warbler +tiger lily +field house +nanny +skin-diver +soda water +lymphocyte +carport +chocolate fudge +amphitheater +sugar candy +sea hare +open-face sandwich +dessert spoon +staple gun +envelope +worker bee +general +garment bag +maypop +autobahn +Atlantic puffin +polo shirt +Humvee +spice rack +grotto +banderillero +gaillardia +black-crowned night heron +oboist +weigela +Dictaphone +dwarf iris +marsh mallow +yarrow +eccentric +catsup +jade green +mistress +henbit +beachwear +head +commuter +strawberry tree +chickpea +clothespin +fleabane +brussels sprout +winter melon +Laconian +great horned owl +caricaturist +nan +flowerbed +triple sec +dairy +round of golf +cardinal +kauri +Zulu +Armagnac +cowberry +mouthpiece +wild calla +bling +puppeteer +beer drinker +adder +field sparrow +chocolate pudding +blacksmith +finback +Shetland pony +cheese fondue +panty girdle +soda can +electrolytic +florist's chrysanthemum +yellow jasmine +tudung +equalizer +ridge +dulcimer +grappa +barn swallow +coneflower +enamel +poached egg +halfback +yak +toby +Fleet Street +blue catfish +sand tiger +flying buttress +snaffle +stoop +first base +cultivated land +first lady +waratah +headquarters +arnica +lovebird +common morel +parasol +disk clutch +Xerox +vitamin P +vitamin B12 +long sleeve +certified public accountant +hot pants +pitch pine +pantie +drawers +cake mix +boar +grey +bride +false sago +bullion +coach house +bass guitar +Japanese banana +meadow clary +black belt +Canterbury bell +smallmouth +treadmill +great white heron +enchilada +rummer +captain +camisole +wild garlic +oak fern +ultramarine +peach +hawkweed +autostrada +adit +anaconda +artwork +skinhead +jello +hermit thrush +Bewick's swan +dress suit +trail bike +stubble +common polypody +Riesling +Easter lily +telegraph key +envelope +garlic bread +perianth +salad bar +steppe +club sandwich +nude +garden forget-me-not +Tuareg +flood +Statehouse +charcoal +boy scout +Rhone wine +parfait +spoor +lanyard +octagon +brown bread +quarterback +quilted bedspread +hookah +Pepsi +hamburger bun +entrepreneur +saddle oxford +snake's head fritillary +undies +chemise +skidder +chickpea +carnation +honey bun +mortar +Montrachet +automobile horn +skylight +gingham +rafter +pantile +climbing frame +scarlet runner +cable +cornstalk +mockingbird +raisin bread +chili sauce +hand calculator +concert-goer +detached house +coq au vin +lasso +hyssop +globe thistle +paper clip +slide +Jerusalem artichoke +tetrahedron +mock orange +lemon lily +finger +little sister +handcuff +horse wrangler +pavlova +oilcloth +snow-in-summer +common mugwort +greenshank +ice-cream cone +rubber boot +gunnysack +disk jockey +long trousers +sorghum +pontoon +calf +fire extinguisher +cotton thistle +pilot whale +ao dai +steamroller +wristwatch +tawny owl +city +country store +ironweed +kennel +bathrobe +rattan +drawer +fly tent +choline +musk thistle +courthouse +Yugoslav +bush +trawler +shellflower +jade vine +ragged orchid +pea soup +King Charles spaniel +hubcap +snook +paddy +bow and arrow +shovel +dill +cliff swallow +cadaver +hijab +masterpiece +fish geranium +kettle +sanitary napkin +carrot stick +Mountie +peanut brittle +dam +jackal +windowsill +butterfly orchid +bodice +picador +pale yellow +beanie +petiole +tenor saxophonist +bungalow +gnomon +stock saddle +field glass +rigging +wood grain +Speaker +settlement house +swamp milkweed +paper nautilus +tangerine +champagne +crescent roll +library +Schmidt telescope +stemless carline thistle +motorcyclist +alpine ash +planchet +water closet +casuist +hand luggage +hyssop +spaghetti and meatballs +cannelloni +cedar waxwing +water dog +brick red +linkage +sweep hand +purple heather +macaroni and cheese +butter knife +refreshment +malt +St. Augustine grass +wainscot +compass +gas heater +tamale +table saw +referee +borsch +projector +dracaena +peppermint +Reuben +Abyssinian banana +glassblower +floss +small stores +artilleryman +lapwing +ranch +garbage man +dwarf banana +commelina +currant +adulteress +landlocked salmon +pasqueflower +nan +tiger lily +Eritrean +rotunda +catsup bottle +mezzanine +royal fern +blended whiskey +bowler hat +mistletoe +manor +fusee drive +pistachio +dispensary +swamp +amputee +sculptor +schoolmaster +Chinese anise +dwarf iris +livestock +chronograph +nectarine +jockey +plaster +motel room +swamp azalea +hippeastrum +space station +duchess +catacomb +dovetail +cockscomb +common spotted orchid +brittlebush +cleats +cloche +hotchpotch +cabin car +prey +indigo +light beer +bear's breech +jonquil +analyzer +alyssum +spur gear +ice tea +honey buzzard +twayblade +dirndl +atlas moth +croquette +carafe +flyweight +professional basketball +multivitamin +air terminal +phial +roll-on +skunk cabbage +bird of paradise +rose +cooter +camping +divided highway +herbage +sweet vermouth +common comfrey +eggplant +office building +glutton +gefilte fish +bicycle rack +swamp birch +Venetian blind +Pernod +Norway spruce +portrait camera +bastion +vitamin Bc +Ugandan +Indian red +okapi +emu +vin ordinaire +chintz +shrimp cocktail +numbat +tall oat grass +cable car +stopcock +ham sandwich +Yemeni +stanhopea +plate +chicken broth +common yellowthroat +California poppy +radio +chocolate egg +mess jacket +tea table +physostegia +Japanese flowering cherry +confectionery +chicken cacciatore +painted nettle +popover +white rice +strapless +mohair +electrical cable +coil spring +arterial road +miniature fan palm +spectator pump +pesto +interlocutor +eastern kingbird +dongle +vitamin B6 +stuffed tomato +cough drop +okra +black +barbecue +burial mound +firstborn +corn snake +amberjack +bollard +horn +Black African +elbow pad +Camembert +circle +Japanese apricot +hearing aid +rock star +creature +taster +bubble gum +scull +lemon balm +chaetodon +anemometer +brake drum +fuselage +courthouse +aqualung +yellow adder's tongue +reception desk +guy +buffalo wing +ginger beer +robin +pantothenic acid +marsh hawk +yellow journalism +exhaust +cardamom +Tabasco +ax handle +patriarch +floor +pine snake +spoiler +hood +sphagnum +parrotfish +orphanage +redpoll +beef Wellington +white spruce +cherry plum +scapular +field lens +broomstick +mouser +wood thrush +Nebraskan +hotelier +milk thistle +soya milk +Munich beer +boucle +snowy egret +dust storm +steward +kudzu +oriental poppy +presbytery +burro +orange soda +stonecrop +splashboard +menagerie +dormer +wire cutter +yellow bells +Dubliner +shore pine +cousin +racing gig +Morgan +gold plate +villager +snifter +granny's bonnets +egg roll +Spode +amabilis fir +babbler +pestle +heliopsis +halter +black spruce +President of the United States +ski slope +chocolate fondue +lockstitch +motel +Epipactis helleborine +tabbouleh +Yorkshire pudding +overpass +Timorese +presbyter +tablefork +bottle gourd +tiara +vintage +pilgrim +reindeer moss +shower stall +towel rack +kachina +chef's salad +breeder +cow parsnip +walker +Black woman +Irish coffee +portrait lens +lateen +gilt +successor +cargo container +Lithuanian +mayapple +paisley +highchair +strawberry jam +flying fox +field scabious +blue-eyed grass +screw +Frisbee +dressing room +cholla +walkie-talkie +red currant +centrifugal pump +smorgasbord +hot rod +marcher +rowanberry +welwitschia +amphitheater +pew +concert band +bosom +pillbox +seagrass +openwork +meadow goldenrod +shower +chicken sandwich +Boston ivy +plastron +oilfield +stuffed tomato +juniper berries +frame +Spanish mackerel +family room +powder horn +fight +maguey +bunker +work-shirt +air filter +nosh +sugar bowl +foothill +reliquary +tugboat +horsebox +grater +palace +board member +campsite +halibut +geneva +ginger ale +high commissioner +genet +bodywork +spaghetti +protractor +pipe cutter +wood anemone +turkey cock +surge suppressor +green turtle +spoiler +bedsitting room +television room +ballot box +shasta daisy +impeller +capote +bitter +California wine +lock +spinnaker +gill fungus +baby's breath +nut and bolt +moonflower +houseboat +distributor cap +coffee bean +gusset +bowling ball +knitwear +frieze +mistflower +roadster +cue +circuitry +brake +butt hinge +Chickasaw +leopard frog +wing tip +puree +mantel +pantheon +grandfather clock +cockchafer +pomegranate +cleaners +eyeshadow +Oregon cedar +rock hopper +hawksbill turtle +agriculturist +yellow-crowned night heron +Albanian +pumpkin seed +chateau +goggles +camper trailer +bracket fungus +cigarette case +signal box +saddle blanket +poison ivy +set gun +cattleya +dry fly +concert hall +personal digital assistant +talcum +deodorant +common starling +painted turtle +kea +plenipotentiary +pantyhose +masjid +buskin +hurdle +cocktail lounge +belting +sour dock +knife blade +sugar snap pea +paddle +dickeybird +brace +keep +call center +yacht +lead pencil +tumbler +production line +tetra +private +French window +express +ski boot +pinto +broad bean +American crow +screech owl +snapper +power cord +Manx +rambutan +sun deck +stonefish +golden eagle +national monument +readout +cork oak +hacksaw +beer can +bathe +tussock bellflower +wet suit +mihrab +big game +highlighter +sprocket +measuring worm +grapefruit +samovar +distributor point +steak knife +incubator +loon +temporary hookup +hippodrome +hot spring +spacesuit +flea market +clay pigeon +catbird +earmuff +tetherball +yellowfin +cellophane +lanolin +clapperboard +velveteen +police dog +cashew +sequencer +mango +duplex house +bazaar +Golden Delicious +red carpet +collet +kickstand +broadloom +diskette +tank engine +compact +diesel-electric locomotive +whale shark +water moccasin +mountain avens +tropic bird +ginkgo +ski cap +fixative +glockenspiel +chopine +ethernet +herring gull +skeleton key +finger paint +conference table +great crested grebe +harbor +white-crowned sparrow +Bullock's oriole +guestroom +boutique +cable television +roulette wheel +Luger +Latin American +trumpeter +blindfold +baby +freshwater bass +home plate +bonefish +giant sunflower +giant tortoise +planking +pigeon hawk +oceanfront +door +bazaar +common wasp +conformation +kick starter +kid glove +corydalis +shuttlecock +writing desk +ivory gull +shirttail +diving suit +weka +downy birch +altar +wild sage +tufted puffin +cabinet +Orpington +cineraria +bottom +dial +coracle +resort hotel +soap dish +spotted owl +billiard room +ghetto blaster +red-breasted nuthatch +hatchling +chalet +bracteole +crusher +mixer +net melon +farmhouse +Dutch oven +transept +penlight +palmyra +stewing pan +solar cell +crochet needle +black-winged stilt +germander speedwell +crinkleroot +truncheon +bunchberry +hatchback +sounding board +mixing faucet +chess master +bisque +Brie +Sitka spruce +pawn +Mexican-American +space rocket +choreographer +collared peccary +duffel +nacho +patchcord +carpet snake +omnivore +watering can +hall of residence +streamer fly +sunroof +great grandson +oil refinery +billiard player +ivy geranium +key palm +pinwheel +yellow-shafted flicker +purple onion +soldering iron +condominium +fishing gear +heat pump +marine iguana +cuckoo clock +Bletilla striata +headrest +spotted salamander +field hockey ball +pound +carboy +vertical stabilizer +groundsheet +cinnamon bread +acorn squash +sheathing +lakefront +Jeffrey pine +synthesizer +olive +apple +pannier +ponderosa +Jew's-ear +latch +equatorial +metasequoia +permit +bloomers +town hall +fava bean +casino +bier +jampot +common snapping turtle +clary sage +oatmeal +Dutchman's breeches +massif +Guyanese +heifer +handball +sweat suit +pomelo +Iceland moss +customhouse +sandbag +archer +gyrfalcon +sword cane +marmite +whole snipe +blue crab +sugar spoon +brownstone +chicken wire +lizardfish +dump truck +chicken yard +chamois +electric +idle pulley +jujube +wrestling mat +aoudad +Burmese cat +water shamrock +dormitory +Unknown Soldier +hearse +bumper +clipper +desert pea +critter +semitrailer +backboard +common St John's wort +Atlantic manta +song thrush +jukebox +quoin +eastern chipmunk +copper beech +paintball gun +bull +package store +fraise +royal poinciana +niqab +traction engine +objective +day nursery +ski lodge +orphan +summer house +cereal box +router +sleuth +jodhpur +polyp +croquet +sport kite +green onion +tulle +etagere +tussock caterpillar +rest house +elderberry +bridal wreath +Torrey pine +silver wattle +kidney bean +pentode +laelia +Allen wrench +sporran +red drum +tricot +heterodyne receiver +magazine rack +stone curlew +trawler +suckling +niblick +sandwich plate +double door +Togolese +pitching wedge +desert tortoise +cloth cap +date palm +webbing +jumper +frogmouth +copperhead +covered couch +black mallee +riser +scraper +gauntlet +pantheon +food court +muntjac +grocery bag +bread-bin +transmission shaft +primigravida +window seat +crab apple +seat +Fresnel lens +dendrobium +hatchback +little theater +butter dish +back porch +umbrella tree +carrot +seventy-eight +coconut +music stool +Tesla coil +bay willow +American basswood +sabot +wheel and axle +gazette +lute +bassinet +hart +mecca +breadbasket +silverfish +handball +Scotch pine +box camera +stately home +Hereford +tread +single-breasted jacket +desk phone +deodar +professional boxing +fly casting +box wrench +black oak +martello tower +red campion +bullock +sweet William +bay leaf +dollhouse +flounder +fox hunting +beanbag +king mackerel +rouge +film advance +common mallow +parasitic jaeger +satellite receiver +nurse shark +chesterfield +tomatillo +plimsoll +hatbox +bloomer +foul-weather gear +longleaf pine +horse mackerel +tree lizard +bark +belfry +Treasury +perch +purple finch +stag beetle +fragrant orchid +tachymeter +tadpole +cookie jar +knee piece +agueweed +bones +chick +golf glove +toothpick +taboret +rotor blade +field artillery +purple willow +redhead +spark plug +guava +voice mail +cross +butterfly valve +star magnolia +olive +room light +Australian turtledove +embassy +Iraqi +singles +nestling +spinning rod +radial engine +rowan +sandbox +boss +moccasin flower +veneer +mint +American chestnut +white whale +CPU board +florist +press box +hurricane lamp +giant kangaroo +greater whitethroat +winter jasmine +blue +department store +southern red oak +saber saw +corn muffin +bellbottom trousers +toaster oven +red eft +condominium +galago +sunbather +redpoll +common European earwig +songbird +linnet +light meter +bracer +tepee +gumbo +water glass +roofing +spathiphyllum +shofar +sand lizard +washroom +Brussels carpet +brachyuran +home room +floatplane +knee brace +solar heater +felucca +gas ring +maguey +manse +blue columbine +cuppa +cigar band +male orchis +mudskipper +couscous +Chinese parasol tree +dude ranch +banyan +gopher snake +sundrops +aviary +African daisy +missel thrush +Photostat +stone pine +circus tent +tangle +printer cable +grease-gun +rose chafer +light pen +plantain +hearth +bullfinch +post oak +slow loris +Newtonian telescope +head +punt +spindle +New England aster +spotted sandpiper +pond pine +grass skirt +bug +black rat snake +tabasco +bull shark +tennis camp +scrambler +popinjay +bing cherry +ministry +cash register +redheaded woodpecker +kameez +farmer's market +roan +harpy +European toad +pizzeria +camshaft +hemp nettle +chicken coop +cottage pink +daybed +observatory +airdock +mountain devil +newsstand +kingfish +snow gum +jackdaw +lacquerware +peeler +miro +sister ship +damask rose +pack +snowshoe +Liberian +paramecium +tidytips +professional tennis +bookend +wood swallow +cayuse +cranberry +rock squirrel +steak au poivre +soul patch +female mammal +sash fastener +songwriter +oxeye daisy +apse +floor joist +hand towel +wheatear +cero +soul mate +golden fig +bus stop +psycholinguist +convenience store +manor hall +mountain sandwort +Euopean hoopoe +haricot vert +mausoleum +violist +flashlight battery +chard +fixer-upper +bank martin +testudo +diving duck +kohlrabi +Omani +sphygmomanometer +greyhound racing +chestnut +rattlesnake plantain +chaffinch +wolf pup +teakettle +cairn +souk +resident commissioner +chuckwalla +gaiter +capercaillie +liver chestnut +bean sprout +land line +ambassador +green pepper +common chickweed +Sharpie +Oriental arborvitae +oncidium +pallone +currawong +sweet alyssum +fire tower +eyebrow pencil +redfish +apricot +clementine +blucher +wigwam +pangolin +buggy +common oak +jumbojet +laser +cigarette holder +racquetball +georgette +cleft +scouring pad +drum printer +pond scum +American red squirrel +caranday +swamp willow +blindworm +brook trout +defense system +nyala +three-way calling +mizzen +shuttle +African lily +Oregon white oak +rain tree +fuel gauge +oriental cherry +wahoo +pear +jungle gym +bass fiddle +outrigger +angelfish +Old World coot +lime +battlement +yarmulke +herpes varicella zoster +burp gun +Alpine glacier +stun gun +pilot boat +Southern crab apple +bushtit +pullet +polo pony +jackfruit +raw vegetable +French marigold +golden shower tree +spike lavender +wahoo +brass knucks +cabbage palm +diesel-hydraulic locomotive +red jungle fowl +prairie sunflower +rye +loofa +icecap +shade tree +secretary bird +saffron +cos +muskrat +videodisk +Carolina wren +candy bar +Bohemian waxwing +flowering almond +cold frame +raglan +pine siskin +quince +western red cedar +red maple +adobe +agora +kumquat +tenement +bantam +bayberry +water jump +great granddaughter +snips +porcupinefish +brochette +love-in-a-mist +Iceland poppy +common sage +pace car +camel racing +slipcover +nopal +shoehorn +calypso +rhea +in-basket +maple syrup +cold chisel +Pacific ridley +dietary +aperture +lapin +rock hyrax +house wren +litchi +ragged robin +control center +shoebox +arabesque +eider +silver birch +bantamweight +ax head +softball +blue gum +Bechtel crab +tomato sauce +green douglas fir +sweet gum +macaroni salad +red phalarope +budgerigar +Bedford cord +Uzi +green woodpecker +ohmmeter +bacon-lettuce-tomato sandwich +hackney +Easter egg +motmot +red pine +opium poppy +gat +pussy willow +greater scaup +ocelot +persimmon +western hemlock +carambola +pinion +Malcolm stock +bobsled +larkspur +wood drake +pinetum +red gum +draft beer +funnel +terrarium +Pinot blanc +doodlebug +brittle star +salsa +cantaloup +pollack +stockpot +eastern hemlock +rock wren +burqa +squash +aircraft engine +billy +flamingo flower +odontoglossum +old squaw +redstart +sheepskin coat +mate +flathead catfish +gentianella +bilberry +bog rein orchid +incense cedar +mew +Colorado spruce +cob +portmanteau +grenadine +common ginger +masdevallia +compound microscope +sobralia +white fungus +guppy +chapterhouse +honey +green frog +sea swallow +African marigold +astrolabe +verdigris +yellowhammer +carrot juice +oxlip +medicine ball +highboy +grass frog +gamebag +surgery +mincer +mulloway +cactus wren +box office +resonator +table-mountain pine +European curlew +supernova +cabbageworm +peach +plane seat +asp +Yquem +tomato hornworm +rook +quadruped +chador +micrometer +dabchick +Afro-wig +balsam fir +bucket seat +sage green +macon +blue poppy +chinquapin oak +black pine +spinach +chrysalis +carnauba +tee +bearberry +shirt button +tree of heaven +southern white cedar +covered wagon +brood hen +spadix +European catfish +winter wren +bulldog clip +carpetbag +study hall +chino +simian +closeup lens +cookie cutter +grapefruit +mandola +sassaby +Allegheny plum +piaffe +scorpion fly +booby +draft animal +field tent +cumin +laurel oak +smooth-leaved elm +American arborvitae +American toad +grinding wheel +mountain ash +cuttlefish +pipistrelle +parer +safety rail +Clark's nutcracker +side-blotched lizard +giant hornet +wicket +dugout +electric toothbrush +dhow +common four-o'clock +long-eared owl +anchor +near beer +tansy +creme caramel +guided missile frigate +shelduck +durian +compact +iron tree +shiitake +polo +camouflage +pedal pusher +salon +tangerine +lacebark +Swiss mountain pine +goalpost +poolroom +space capsule +wild cherry +dress hat +wave +raglan sleeve +cassia +Jerusalem artichoke +cabbage palmetto +marsh harrier +American redstart +sea squirt +cliff diving +sparrow hawk +watch cap +frankfurter bun +police boat +flash camera +neem +eastern meadowlark +Italian cypress +orb-weaving spider +graniteware +sewing basket +latex paint +rock dove +stator +leaf lettuce +roulette +broadcloth +Spork +panicle +sternwheeler +cider vinegar +brown creeper +cowfish +closed gentian +chickpea +port +pimento +sheeting +matilija poppy +hawk owl +guava +papaya +huisache +European shrike +racing skiff +yellow warbler +gumbo-limbo +North Carolinian +staysail +court +iced coffee +money belt +shaver +Psychopsis papilio +sumo ring +refection +kingfish +clock pendulum +greater butterfly orchid +disk harrow +tawny eagle +polyphemus moth +pieplant +Nicaraguan +bocce ball +California box elder +porbeagle +crown of thorns +Mexican sunflower +fennel +stream orchid +slip ring +white fir +fold +moss campion +fairy ring +hose +pony-trekking +western larch +meadow pipit +Cape May warbler +longan +bookmobile +junk shop +lemon shark +smelling bottle +solan +widow +sea pen +universal joint +day game +goldcrest +maiden pink +biographer +rotunda +oriel +arranger +gambrel +Angora +fen orchid +leading rein +Wilson's snipe +European nuthatch +natterjack +athletic supporter +mouflon +emergency room +swallow-tailed coat +western meadowlark +feather star +Navy SEAL +toilet bag +loquat +lesser butterfly orchid +thumbhole +breathalyzer +featherweight +collards +mayfly +confessional +mountain ebony +redwing +Norway maple +refractometer +stagecoach +gasoline gauge +octopus +baker +Rhode Island red +European tortoise +cardiologist +Punjabi +Arkansas kingbird +tamarind +drum brake +flash +yellowtail +stokes' aster +emperor +free house +sour gum +ruddy duck +hamadryad +command module +tinamou +Norway lobster +washstand +European hornbeam +roaster +black-necked grebe +tallgrass +leopard lizard +anastigmat +Blackburn +deutzia +ground rattler +Christmas fern +wild pink +sesame seed +carrycot +Italian parsley +nectar +roll-on roll-off +true laurel +anisette +candy corn +flowering maple +revers +dun +tobacco hornworm +common sunflower +common grape hyacinth +cardiograph +electric meter +herb Paris +goalmouth +spruce grouse +canopy +wind poppy +stemma +gateleg table +lumper +speckled rattlesnake +gudgeon +rough-legged hawk +internal drive +pomelo +piece de resistance +storm door +clementine +Japanese pink +settler +yellow jacket +Fraser fir +royal palm +cicada killer +cayenne +guava +bluewing +red baneberry +lesser yellowlegs +cache +bog rose +sparring partner +ski jumping +sherry +glacier lily +beer mat +shredder +American widgeon +protectionist +green olive +black-tailed deer +Alpine fir +dispatch case +whipping cream +African daisy +cantilever bridge +maraschino +rhea +ink bottle +dacha +hagberry tree +lesser rorqual +orchard oriole +candidate +cuticle +breadfruit +fishbowl +giant puffball +closed gentian +Joshua tree +tie rod +beard lichen +flame tree +stegosaur +acerola +Swan River daisy +common murre +flowering almond +protegee +loggerhead shrike +Wilson's warbler +Japanese honeysuckle +basilisk +skimmer +hybrid tuberous begonia +pumpkin ash +chafing dish +collared lizard +iced-tea spoon +scrubbird +Iceland poppy +grey kingbird +wallflower +slick +diesel +Swiss pine +ethernet cable +ketch +lightship +black cherry +swordtail +Monterey cypress +lightweight +Floridian +Sabine +stall +contact +viola da gamba +hemstitch +upland sandpiper +box spring +sassafras +radome +lesser scaup +bluefin +yellow-bellied sapsucker +armored car +cabin class +Moorish arch +webcam +aquavit +overall +sergeant major +soft shield fern +gin and it +bobolink +subcompact +falconer +black morel +roadrunner +lab bench +thong +coffee urn +weeping beech +caladenia +southern live oak +scanner +wine vinegar +common speedwell +European roller +fuji +snag +piping plover +concertina +secateurs +meat thermometer +supercomputer +funnel +dais +western fence lizard +spruce pine +pommel horse +Cassegrainian telescope +pitta +India-rubber tree +mangosteen +tamp +aposematic coloration +dustcloth +birth +Atlas cedar +reed bunting +jabiru +sainfoin +press photographer +golden oriole +laryngoscope +thermal printer +winder +doubles +cricket ball +dabbling duck +tonic +Buddhist +Morris chair +swatter +quaking aspen +ancient pine +American larch +evaporative cooler +click beetle +yellow-breasted chat +souchong +bluegill +pied-billed grebe +tricorn +spring beauty +southern magnolia +rowel +chili +hard roll +flathead +satsuma +gangplank +bourguignon +cockfighting +greenwing +plum tomato +fly orchid +gnatcatcher +spotted eagle ray +ovenbird +brassavola +mocha +candy cane +afterburner +thriftshop +study +winter crookneck +grinder +muskellunge +sacred ibis +inverter +sandwort +deer fern +stair-carpet +Cotes de Provence +ovenbird +rex begonia +American woodcock +poison ash +lowland fir +pawpaw +loblolly pine +kinkajou +European hackberry +pest +coralwood +Bedouin +acetate rayon +snuffbox +radiator cap +basket oak +table-tennis racquet +smew +midge +telescopic sight +radish +great burdock +separate +damask violet +broadbill +bourbon +blacktip shark +gift shop +khimar +date +woodland caribou +policeman bird +grey birch +American elm +strawflower +officiant +hart's-tongue +straight razor +Spanish elm +radicchio +white croaker +vicuna +soft-shell clam +flannel +adonis +bonito +kittiwake +English walnut +soldierfish +hipflask +spotted crake +Streptopelia turtur +American maidenhair fern +corn cockle +telephone cord +canopy +playback +diocesan +marsh orchid +manakin +purple grackle +cob +fishmonger +otoscope +vermillion flycatcher +inhaler +instar +licentiate +myrtle warbler +goat herder +benthos +toggle +drumhead +piranha +doorplate +vault +triptych +red-necked grebe +transporter +vernier caliper +flathead +Portuguese man-of-war +countrywoman +vacation home +Bactrian camel +night-light +module +lemon curd +carancha +painted daisy +bok choy +ratatouille +troll +escarpment +cinnabar +computerized axial tomography scanner +lychgate +sowbread +bedside +guided missile cruiser +reel +cleat +hemostat +blue shark +Seven Wonders of the Ancient World +motorized wheelchair +pillow block +horned puffin +prickly pear +electric range +mother's daughter +vein +Oregon maple +bird dog +faceplate +wren warbler +feather reed grass +common alder +Adam's needle +straitjacket +organ-grinder +gantry +bikini pants +peristyle +herpes +terry +toad lily +celandine +red-breasted sapsucker +bragger +green peafowl +fuschia +quoits +house martin +dome +herpes simplex 1 +touraco +meeting house +vacuum gauge +cat's-ear +crisphead lettuce +carpet moth +European rabbit +puff adder +Old World scops owl +fire pink +fruit punch +ant bear +black walnut +stroboscope +white mangrove +pine grosbeak +cast +check-in +ring-necked parakeet +matai +shingle oak +fieldwork +rue anemone +landing net +ouzo +herringbone +lyceum +hydrogen bomb +mullein pink +masher +evening grosbeak +water vole +livingstone daisy +tomatillo +cavalier hat +interphone +wild lupine +goosefish +sugar maple +plantain +white dead nettle +Monterey pine +bugle +veloute +marsh gentian +Bermuda buttercup +alehouse +Peter Pan +thong +LP +tulip tree +scanner +scarlet tanager +music hall +angel shark +pecan +eight ball +rosy boa +outboard motorboat +garage +fanlight +black cottonwood +notornis +mountain fern +lunar crater +reddish orange +whitetip shark +executant +European ladies' tresses +washboard +revolving door +case knife +balloonfish +greater kudu +tarpan +cog +wet fly +Irish soda bread +basement +broken arch +canopic jar +muscat +kazoo +bobsledding +loaner +black guillemot +English saddle +garlic mustard +Foucault pendulum +mulberry +clotted cream +dove's foot geranium +Atlantic ridley +convector +ground floor +European wildcat +poinsettia +hideaway +great barracuda +black beech +bushy aster +cornflower +tam +true slime mold +carving knife +holly fern +railroad tunnel +crimson clover +disposal +etamine +suspension +plasmodium +political scientist +minnow +Spanish rice +twist bit +subway train +Scleroderma citrinum +saw palmetto +console +gimlet +hand pump +waratah +rock rattlesnake +keel +server +curlew sandpiper +hone +sable antelope +inkle +photostat +foresail +sallet +tiger salamander +chutney +onlooker +Exmoor +tiramisu +drawing room +battery +sour orange +juniper berry +beeper +funeral home +fescue +Maksutov telescope +ranch house +jai alai +carob +socket +popcorn +sandbar shark +pipal +summer tanager +oast +skipjack +rolling stock +dropper +great snipe +turnip greens +cowpea +honeycomb +ichneumon fly +maternity hospital +harp seal +nylon +bomb shelter +horse tick +litchi +camel's hair +mimosa +bur oak +anvil +belay +pinhead +continental breakfast +burglar alarm +Mojave rattlesnake +auxiliary storage +lightwood +ratepayer +cecropia +retractor +quadrate +pepper tree +Venus' slipper +abattoir +strawflower +firewater +purple saxifrage +black rat +pack +pepper pot +mayweed +winger +whitetip shark +great yellow gentian +snowdrop anemone +garden angelica +soy sauce +white poplar +inkwell +crouton +gas gun +honey locust +house of cards +ice maker +moquette +arrack +casualty +butterfly orchid +eau de vie +mosquitofish +prairie smoke +haft +horseshoe +steel +peach orchard +Mexican hat +encaustic +shoe +pennywhistle +sweet woodruff +hull +doorsill +globe amaranth +day school +housedog +crown princess +oxbow +maxi +positron emission tomography scanner +compere +European turkey oak +peanut +sentry box +house physician +hot line +loquat +rove beetle +riband +flowering fern +fan vaulting +ceibo +bongo +bat boy +omelet pan +European ash +breadwinner +gaff topsail +clerestory +bushbuck +bluethroat +khukuri +Father +portcullis +candy egg +brake lining +lawn furniture +buckskin +garden pea +Brazilian rosewood +Italian bread +horn poppy +silk tree +Christmasberry +hotel-casino +poplin +false lupine +desert sunflower +mimeograph +alpenstock +cork tree +cultivar +common mosquito +pollard +black marlin +understudy +lancet window +college +breadfruit +Herero +Labourite +bar printer +squaw grass +stelis +firing chamber +sycamore +artificial horizon +radiologist +pansy orchid +bicycle pump +wraparound +bell gable +home computer +orchard grass +carving fork +bergamot +honeycreeper +sewing room +radiator +core +brown bat +goose grass +adjutant general +Erlenmeyer flask +massasauga +tail rotor +cardinal tetra +Drambuie +wine palm +Sarcoscypha coccinea +shantung +Calvados +garganey +vicar +house mouse +creeping oxalis +digital subscriber line +cedar elm +backgammon board +blackberry-lily +pallid bat +New Zealander +Barbadian +rose geranium +European spider crab +gharry +electric hammer +mustard +Chinese lantern +laundry cart +filament +mozzarella +gooseberry +sukiyaki +porkpie +culvert +altazimuth +plum pudding +serin +Spanish dagger +Asian crocodile +crevalle jack +mascara +pig bed +alderman +northern shrike +Sufi +purple-fringed orchid +derringer +linseed +hockey skate +bell jar +Japanese wistaria +mantled ground squirrel +western toad +lieutenant commander +mechanical piano +ovoid +paddlefish +demijohn +coast live oak +brick +gearset +tailstock +phonograph needle +winery +tuberose +mother's boy +shot tower +crucian carp +carpet pad +lamb's-quarter +Menorah +common white dogwood +hypanthium +rosebay +wild medlar +soil horizon +sweet orange +bitterroot +hand glass +cloisonne +towpath +gum ball +margay +carambola +bolt cutter +charger +vibraphone +gueridon +elephant tree +wood-frog +ash grey +duffel coat +third base +chunga +glebe house +lake trout +encephalartos +Japanese oak +northern red oak +pruner +blue orchid +Biloxi +western wood pewee +corselet +alabaster +anechoic chamber +grass pink +wax begonia +blue daisy +pennyroyal +Asian tiger mosquito +cheese souffle +flat bench +caramel +sump pump +bush violet +common fennel +corner +skullcap +asparagus fern +white mangrove +calceolaria +sateen +saltbox +hollowware +head nurse +coal miner +mountain lily +tufted vetch +European perch +line officer +steamer +stickball +shin guard +cauliflower +Monegasque +hatpin +wolffish +trackball +khaki +arthrogram +rocket larkspur +naval commander +Gemini +ski binding +department head +Chenin blanc +wingstem +knothole +aerides +sweet bay +tautog +gangway +waterspout +Hudsonian godwit +armyworm +incinerator +kidney vetch +pine nut +cypress vine +hip tile +sorrel tree +relay +bench press +Kentucky coffee tree +dobson +sapling +false lily of the valley +veld +phaius +vitamin B2 +beaker +wall tent +sieva bean +dusty miller +sewing kit +cavalry horse +diaper +butterfly pea +Spam +saddlebill +pearly everlasting +kowhai +Sister +moneywort +organdy +pine marten +bareboat +hot-water bottle +baby blue-eyes +silver lime +common cotton grass +malmsey +blue pea +baggage car +pineapple +folding saw +cotton rose +brawler +black duck +Weizenbock +pool player +Gujarati +wild duck +purple sage +sage grouse +mail train +arm guard +short-spurred fragrant orchid +queen +eparchy +spring peeper +ortolan +shoulder +fighter pilot +American beech +snowcap +novitiate +roller +butcherbird +canyon oak +brompton stock +firebrick +rudder +light cream +Primus stove +nonsmoker +probationer +harp +kosher +surcoat +videotape +zebu +first class +yam +car +rissole +miso +funambulism +attic +curling iron +shutter +encolure +split-pea soup +yellow rocket +gas oven +ultracentrifuge +chamomile +canteen +eyeliner +yellow squash +Irish stew +collar +doublet +machinist +septic tank +snap bean +Polyporus squamosus +western tanager +creeping St John's wort +back +sinkhole +perforation +Romanian +epergne +fez +comfrey +sidecar +beach pea +screen door +instigator +plughole +woodbine +pigweed +hip pocket +common scoter +squeegee +Surinam cherry +porringer +body stocking +eatage +shallot +enlarger +common canary +trophy case +gun case +plow horse +hot plate +pearl oyster +margarita +madras +backspace key +pigeon guillemot +pajama +buckthorn berry +homestead +bedbug +Linotype +trundle bed +granadilla +theremin +chin rest +bouillabaisse +tumble-dryer +truffle +cassava +kurrajong +gyroscope +European silver fir +C-clamp +politician +green soybean +exponent +flame tree +scissortail +achimenes +crown daisy +soft tree fern +spaghetti squash +pale violet +beaver +dashiki +washboard +driving wheel +sack +foulard +sputnik +boatbill +English elm +sack coat +grog +golliwog +Malayan tapir +May wine +calash +stile +windjammer +American sycamore +rotor head +fast food +balata +dragonet +Emmenthal +metronome +negative +meadow saxifrage +rabbit ears +chenille +round +hobby +crankshaft +Wilson's phalarope +Murphy bed +soil pipe +forecourt +policyholder +tarmacadam +loyalist +gyro +Queen's crape myrtle +shortcake +apple butter +pumpkinseed +heronry +yellow perch +baggage claim +escarpment +diaphragm +mescal bean +shunter +flax +columbarium +Joe-Pye weed +Neandertal man +casement +hole-in-the-wall +Verdicchio +futurist +eaglet +tassel hyacinth +pup tent +fawn lily +cabbage palm +pogonia +hospital ship +water mill +Oregon grape +lentil +grindstone +banana split +inkberry +coonskin cap +bazooka +wrap +anise hyssop +Java sparrow +red-eyed vireo +common opossum +clintonia +bustle +booster +tribesman +soy +panhandle +jaboticaba +locking pliers +Sauvignon grape +ghat +screw +oximeter +white croaker +saucepot +eggbeater +reticule +cabbage bark +looking-glass plant +head gasket +California sycamore +cowbell +Aleuria aurantia +Herr +lever +spider orchid +cashew +shift key +solar house +wood chisel +white +mantilla +stamp +bolero +rear admiral +garden rake +Lao +crowbar +lapdog +buttermilk biscuit +yellow bedstraw +pickerel frog +dowel +serjeant-at-law +mill-hand +lambrequin +state treasurer +red silk-cotton tree +coiffeur +star anise +shoulder pad +marshal +sitar player +gown +ground cedar +hedge maple +caddie +pitahaya +corn marigold +stick cinnamon +woodland star +Eurasian green toad +anti +blueweed +medicinal leech +gaur +chocolate kiss +kit fox +mother +butte +audio CD +blast furnace +vitamin D +nutgrass +cornice +black sheep +hearing aid +lingonberry +quad +lentil +riding crop +pratincole +pentagon +sea lavender +nerita +flatmate +catboat +water clover +angiopteris +mushy peas +crown imperial +music school +woodshed +platy +Turk's-cap +rundle +reading teacher +hardtack +balloon sail +oriental spruce +bluefish +white mulberry +horned violet +satin bowerbird +treasure flower +sustaining pedal +mimosa +spurge nettle +sea green +hasp +lederhosen +pink cockatoo +long johns +basket weave +freewheel +thrust bearing +timber tree +orphan +falafel +common camas +bird of passage +bird's foot trefoil +electric eel +fizz +grape arbor +serape +brace +hazelnut +kylix +horse mackerel +cassia bark +lizard orchid +spat +Brown Swiss +pocket flap +pillory +purplish blue +rolling mill +tappet +broccoli rabe +semi-detached house +mushroom coral +fly orchid +nougat bar +ball hawk +sand wedge +shirred egg +black locust +strip lighting +drop scone +brush turkey +ball +tragopan +dallisgrass +tuatara +great knapweed +potentiometer +Kiliwa +Pacific bottlenose dolphin +accelerator +Darwin tulip +osteopath +Arizona cypress +manna ash +butterbur +cornelian cherry +American holly +nopal +tanker +foreshore +ditty bag +gas lamp +safety razor +chanter +fomite +chip +striped killifish +catalytic converter +plaice +dusty miller +takin +gerenuk +corn chamomile +Japanese pagoda tree +boneset +common osier +Guinean +taro +plotter +celandine poppy +churn +steenbok +edible mussel +sensitive fern +triode +black raspberry +zoo keeper +feather ball +dredger +starlet +cornpone +coat button +rosinweed +toy Manchester +crested cariama +finger food +basilisk +shotgun shell +comfort food +mountain hemlock +candytuft +Stilton +record changer +anklet +ball valve +Mediterranean snapdragon +BVD +sand cat +Galloway +nutmeg +water-mint +woodwaxen +citron +ark shell +federalist +drone +cheekpiece +hyperbaric chamber +addax +field-emission microscope +synchronous converter +men's room +medlar +electronic fetal monitor +Sazerac +false indigo +roof +passe-partout +meadow spittlebug +Phytophthora infestans +oast house +hedge nettle +voting booth +slender salamander +telephone jack +true bug +scouring rush +Scotch egg +matchbook +aperea +cytomegalovirus +garlic press +cove +whitebark pine +Slovene +narrow wale +mother's milk +Audubon's warbler +prickly poppy +cowl +tailorbird +mud brick +bamboo palm +welt +Afghan +Virginia spring beauty +dinner bell +night jasmine +fly rod +microtome +aerie +carinate +picker +brick trowel +loving cup +swathe +green mayonnaise +rivet +bandbox +newsroom +tea tortrix +bobby +gig +hush puppy +garlic chive +piston rod +aspidistra +bluejack oak +harvest-lice +strap hinge +sour mash +macadamia nut +histiocyte +fan belt +shelf bracket +abelia +Hottentot fig +fish chowder +abettor +compote +beige +dioon +hop +haymaker +oilskin +magnetometer +tool bag +tambour +call girl +gringo +fairy light +broad-leaved plantain +second base +zebra mussel +Japanese cedar +pistia +swamp chestnut oak +cashmere +double cream +samisen +lamb curry +companion +kapok +julep +sweet woodruff +gardener +jewfish +inspector general +collembolan +wheel bug +bass +scrubland +wryneck +macrozamia +trouser press +clove +tiger cowrie +yawl +collard +dildo +pony cart +ormer +annual +tessera +chancellery +two-toed sloth +queen +old lady +wringer +spritzer +baggage +black mangrove +black-eyed Susan +semifinalist +highlighter +alfalfa +Easter daisy +escapement +operating table +neutral spirits +bursar +roble +entablature +girl wonder +farm boy +ring ouzel +permanent press +auklet +beefsteak tomato +gaming table +tea bag +manul +giant bamboo +Ozark chinkapin +matzo +furrow +smoothhound +CD-ROM drive +powdery mildew +copilot +garden +American merganser +bunsen burner +Asian longhorned beetle +lead tree +creeping buttercup +Percheron +back brace +axseed +cub +soul food +rabbi +edelweiss +mineshaft +fox grape +sandwort +torque wrench +leisure wear +Mae West +broccoli +loach +maraschino +heavy cream +silkworm +cirque +vintner +whitewash +butterfly pea +two-toed sloth +midiron +ceriman +Bulgarian +operating microscope +sambuca +California fuchsia +silver maple +tangelo +black bean +lugsail +starting gate +leek +sunflower seed +fish fry +clinker +synagogue +coscoroba +brae +uphill +common limpet +golden plover +cedar of Lebanon +amphibian +Canary wine +taipan +agua +feeder +parallel +mater +pink calla +meat counter +yagi +crab cactus +cacao bean +bowfin +alley cat +stonefly +Eastern cottonwood +vernier scale +marginal wood fern +dancing-master +detective +yam +textile screw pine +hooch +spinet +single prop +sassafras +goose barnacle +triple cream +China tree +peeper +dressmaker +snatch block +ironmongery +dressing case +creeping bellflower +silver sage +honeydew +eastern red-backed salamander +peg +nombril +danish +mashie +anarchist +alligator snapping turtle +shepherd +American white pine +runner +chalice vine +rheumatologist +defibrillator +yellow chamomile +lemon balm +peacekeeper +native beech +sandwich board +Bavarian +titrator +paneling +deer mouse +poteen +sugar snap pea +meadow salsify +town crier +best +basinet +common myrtle +night lizard +cushaw +Tampax +camphor tree +gentile +orange peel +putty knife +pyromaniac +Brummie +fever tree +double +nest +inferior +cabbage tree +graduated cylinder +mucor +woodborer +earthwork +potato salad +four-hitter +gooseberry +water vole +ziggurat +grapefruit juice +four-in-hand +cranberry bush +diode +videotape +Mohican +niacin +beetroot +shirtsleeve +cork tree +two-eyed violet +white ash +drawing chalk +baked Alaska +bone-ash cup +toastrack +diastema +bed jacket +dwarf astilbe +yellow honeysuckle +cow pasture +sheet pile +saxhorn +upholstery material +California white oak +Spanish bayonet +horsemint +littleneck +deflector +magician +standard transmission +blue marlin +shallot +feijoa +collar +board +jump suit +common staghorn fern +priory +Xhosa +Loranthaceae +barbecued wing +barmaid +spit +lemon juice +umbrella plant +field pennycress +centenarian +queen bee +fish stick +black bread +dirk +secularist +German American +spotted weakfish +iron foundry +speed bump +yellow-fever mosquito +gag +frame +black-eyed pea +alcoholic +involucre +sperm whale +balanced diet +wax bean +butcher's broom +winter heath +Mainer +Australian pine +gas guzzler +double-breasted jacket +pod +palo verde +trimmer +wattmeter +dyer's woad +crotalaria +vine maple +sulky +jack pine +thumb +Wilton +Panchen Lama +welder +badminton court +business editor +Arabian coffee +Kamchatkan sea eagle +foamflower +steep +plane +freckle +cerebral cortex +Vouvray +tea +forest tent caterpillar +neckerchief +accelerator +jig +bridal wreath +highball glass +New England clam chowder +beach strawberry +call waiting +baton twirler +double boiler +Dutch elm +car bomb +filmy fern +breviary +Florida gallinule +dace +parsnip +riparian forest +crescent +earplug +grab bar +cusk +foglamp +screwtop +black mangrove +mascot +Welsh poppy +gas holder +support hose +salsify +red beech +Indian python +caroler +pineapple juice +lowboy +terra sigillata +black olive +hypodermic needle +radio-phonograph +moussaka +miter joint +creche +tuning fork +black wattle +affiliate +vertical tail +kiwi +red morning-glory +piping crow +runway +Kashmiri +studio apartment +sea feather +Judas tree +boatbuilder +corn earworm +fallboard +Victrola +lechwe +goat willow +turret clock +Canada anemone +leaf lettuce +savoy cabbage +headpiece +Lebanese +fothergilla +hemlock +toolshed +silver tree +blue-headed vireo +weatherman +cylinder +caltrop +adjutant bird +driving iron +millet +European woolly thistle +rose apple +clown +schoolfriend +eastern coral snake +barbecue +executive vice president +long-billed marsh wren +brittle bladder fern +tank destroyer +left-hander +matting +catchment +balsa raft +eastern fence lizard +color tube +corncrib +electric typewriter +westland pine +elder statesman +whey +plonk +mound +cittern +nest egg +copyholder +China aster +basking shark +gavial +common duckweed +vanilla orchid +red-shafted flicker +granadilla +sylph +sty +vest pocket +potherb +little brown bat +Trapezium +ordinary +adult +purple-fringed orchid +abseiler +disco +metal detector +beefsteak fungus +ilang-ilang +barley grass +hawser +suture +brake shoe +staghorn coral +barbecue sauce +Browning machine gun +sarcophagus +disa +oven thermometer +rosemary +track +gorget +quince +royal +piston ring +teak +pin cherry +Komi +walking fern +sloe +synchronous motor +fire-bellied toad +Teleprompter +co-star +cape gooseberry +oscillograph +bass clarinet +cock of the rock +Tyke +showy milkweed +safety valve +branch water +sweet marjoram +hugger +crampon +fairy godmother +band-tailed pigeon +snow-on-the-mountain +minibar +foreland +grosgrain +dita +rampion +calligrapher +jointed charlock +master +sheepshead +barrelhouse +Carolina allspice +mastic +brake pad +whiskey sour +casement window +conveyer belt +stolon +pavonia +shinny +witch elm +logwood +hostel +pageboy +vesper sparrow +pyrrhuloxia +common carline thistle +wafer +boysenberry +screw augur +hack +American white oak +governor general +Mother Hubbard +game fowl +drosophila +delft +nymphet +tollbooth +chough +Russian dressing +plum tomato +American saddle horse +dusky salamander +black medick +red valerian +cordage +Elastoplast +conacaste +backlighting +swell +riveting machine +cowpen daisy +openbill +water speedwell +picture hat +crested myna +servo +bletia +garden trowel +muscadine +common caper +false lily of the valley +aralia +sharp-tailed grouse +cigar smoker +bandoneon +Chinese alligator +crazy +point lace +charcoal +Texas horned lizard +marinara +backstay +Gatling gun +piston +game fish +fall armyworm +grammarian +beer hall +guadalupe fur seal +sugar palm +peanut +velvet ant +light machine gun +rya +cling film +adobo +myrtle oak +angelica +balsam apple +windbreak +brother-in-law +snap brim +automobile factory +clavichord +dusky shark +edible banana +altar boy +California lady's slipper +schoolbag +wax bean +Atlantic walrus +bullpen +straw wine +thatch palm +potluck +tamarind +charcuterie +sod house +tie rack +liebfraumilch +clinician +scarlet lychnis +Spanish iris +bread knife +water oak +bedpan +Angolan +bassarisk +Alaska fur seal +African wild ass +milk float +froghopper +Verpa bohemica +water cooler +chop suey +ranker +red helleborine +Prince of Wales +marmalade tree +car train +giant red paintbrush +desert sand verbena +right whale +baron +stevia +asterism +five-spot +catapult +Silex +fiberscope +refresher +beef Bourguignonne +snood +divot +waterproof +crabeater seal +Missouri primrose +bumper guard +rock opera +Lilo +coffee can +smokehouse +buffalo grass +propjet +ice tongs +poop deck +acorn barnacle +veal parmesan +shower room +collins +ringhals +silage +jawfish +trouser cuff +contour feather +songstress +rachis +White Russian +stanchion +mastaba +flatbed press +viand +legal representative +espalier +organic light-emitting diode +sushi +scorer +haricot +pinna +plectranthus +jungle cat +dried apricot +coach horse +white fringed orchis +veal cordon bleu +bath +dallier +marching order +donkey jacket +Panama tree +aerator +klaxon +pinnacle +shouldered arch +lesser celandine +common eland +Grand Marnier +cock of the rock +phlomis +Japanese umbrella pine +morning room +dead-man's-fingers +little auk +bascule +house paint +home fries +great skua +cesspool +flying gurnard +wild crab +checkerbloom +Wollemi pine +cheese dip +coif +charwoman +tea ball +waif +Arctic ground squirrel +parishioner +stabilizer bar +potentiometer +black cohosh +medlar +willow oak +cascara buckthorn +scoutmaster +Canada lily +poppy seed +paper mulberry +blackthorn +garrison cap +inductee +aeschynanthus +interior live oak +black spleenwort +wild service tree +sling +nicad +swab +sego lily +eiderdown +fruit cocktail +pallasite +weeping spruce +shiv +sea lamprey +coachman +half binding +American white birch +gainer +Concord grape +yellow birch +fucus +common room +io moth +red osier +crucible +galangal +salmagundi +pepper steak +cap opener +swizzle stick +tomato juice +Nobelist +Sarawakian +African monitor +sleeping beauty +stereoscope +curd +pyramid bugle +applejack +dosser +rake handle +pilot light +Eames chair +Scotch and soda +bell heather +dinette +blackpoll +dogie +sound camera +cattle guard +mashie niblick +edible cockle +monocle +steak tartare +partaker +sidesaddle +communications satellite +porkfish +water hemlock +drawbar +ultramicroscope +Jamaican cherry +craftsman +lovage +common apricot +drum majorette +backsword +smooth alder +Amniota +dribbler +theosophist +dolman +ivory tree +Green Beret +pipe smoker +mayoress +mignonette +crampon +henbane +kirtle +death's-head moth +instep +great St John's wort +lorry +black-necked cobra +ball carrier +Jordan almond +byway +earless lizard +marble +andiron +high-protein diet +buzzer +ice floe +crankcase +Bofors gun +sockeye +veery +Delaware +caravansary +prairie coneflower +star apple +suiting +cot +call forwarding +American gallinule +glossy snake +rose chafer +instant coffee +placket +Tarahumara +pulsar +philodendron +orange tortrix +cypress spurge +Welsh rarebit +music box +giant crab +vanilla bean +water thrush +prayer shawl +gouge +promoter +dagga +black currant +bitter cassava +drain basket +snare +digital audiotape +retainer +olive drab +gluten bread +graham cracker +cheddar pink +caregiver +spray paint +Anglo-American +boatyard +backbencher +Link trainer +bell arch +weir +arbor +millionairess +sour cream +earthtongue +crawlspace +crossjack +balalaika +crupper +western redbud +guinea hen +rangeland +gaboon viper +common louse +single-leaf +horseshoe +balsam poplar +triskelion +jack-in-the-box +jester +rain stick +glove compartment +imperial moth +Japanese beech +biotin +turnip +oligarch +western skink +mudguard +retsina +data system +green bristlegrass +visiting professor +beaded lizard +weathercock +Sloppy Joe +high tea +lightweight +record sleeve +cooler +nodding onion +pigs in blankets +torque converter +district attorney +bunting +orrery +radiator hose +common plum +wood spurge +calamus +chicken Kiev +pin +lath +telephone bell +thistledown +audiotape +gypsy moth +snuffer +pari-mutuel machine +peanut butter +hearthrug +sack +Old World yew +chives +stovepipe +xenolith +mattock +mangle +electric chair +backup system +Empire +blackwash +dodder +Allegheny chinkapin +finger plate +junk +brown rice +wild angelica +chinaberry +mason +rasp +den +violet wood sorrel +nosewheel +plenum +merino +kirtle +Igbo +ensign +sex symbol +Belgian endive +sugarberry +yellow salsify +purple emperor +atlas +African clawed frog +leatherjacket +midwife +sac fungus +European cuckoo +three-day event +Mexican poppy +wagon tire +armyworm +rain gauge +Oregon ash +columbarium +spectrophotometer +Milanese +pointing trowel +casualty +Eastern hop hornbeam +lobe +mouthpiece +au pair girl +giant water bug +Browning automatic rifle +laser-guided bomb +drone +white alder +cockleshell +mufti +gravy +berm +boat hook +marshmallow +pet shop +cowpea +tactician +wading pool +anchovy dressing +flip +shackle +Wedgwood +thick-billed murre +erecting prism +giant salamander +sleeper +quiver +chain store +wing tip +New World tapir +witches' butter +gendarme +ginseng +common maidenhair +graduate nurse +balsam pear +hoatzin +philanthropist +axle bar +gas meter +moth mullein +ragbag +Chinese cabbage +celery stick +rutabaga +scalpel +cape marigold +variometer +argali +brig +shuffleboard +wort +Orlon +epiphyllum +allice shad +coffee filter +solar telescope +Japanese linden +thinning shears +golden wattle +queen triggerfish +millinery +surfbird +flame fish +clove +dicamptodon +red-bellied terrapin +turmeric +baya +air horn +Indian coral tree +punnet +sharkskin +water crowfoot +bight +desert iguana +Texas toad +volva +dredge +Turkey red +chemical plant +gemma +dice cup +orange marmalade +mistletoe +surveyor +frozen orange juice +pallette +poultryman +burbot +courlan +captain +saddlery +bodyguard +dwarf tulip +black ash +pulse +nailbrush +tickseed sunflower +legless lizard +shirtwaist +polling booth +chickeree +garlic chive +common thyme +multichannel recorder +screw thread +sangoma +calliopsis +geoduck +colleen +bandicoot rat +pastis +swamp sunflower +scorekeeper +Honduras mahogany +Australian pitcher plant +triangle +elevator shaft +green pea soup +carrel +prairie aster +bird's-nest fungus +scarlet clematis +gook +mescal button +carcase +mulatto +ejection seat +strawberry daiquiri +goat grass +car battery +babu +chief of staff +monilia +Siberian crab +ridge rope +Morchella semilibera +nutmeg +moosewood +graham bread +California four o'clock +zwieback +velvetleaf +abelmosk +shadow box +corned beef hash +newsreader +backstairs +cutwork +sherbert +tooth fungus +angel-wing begonia +greasepaint +common milkwort +potato vine +CD drive +crepe de Chine +sporting man +koto +armet +barking frog +celeriac +drainage ditch +black box +steel blue +clotheshorse +corn speedwell +drawknife +spritsail +vichyssoise +modeler +pocketcomb +limey +suslik +cockpit +digester +brig +raita +troll +benedictine +rock wren +lock +Barnaby's thistle +school bell +school ship +Soave +falchion +swaddling clothes +terrine +smoke screen +rivulus +sweet lemon +cullis +bustier +peppermint +Philadelphia fleabane +Hampshire +active +charnel house +face guard +Quebecois +facilitator +tongue depressor +bitternut +heath aster +sapodilla +bluestem +centrist +Canterbury bell +needlenose pliers +groats +tapa +Qatari +paper feed +tilt-top table +plastering trowel +brazil nut +rotogravure +patriot +manicurist +bacon and eggs +puffbird +lightweight +golden willow +kaiser roll +duff +girandole +seaside daisy +Kurdistan +Skivvies +showboat +fire bell +lock-gate +greater masterwort +weald +ice ax +toetoe +mess kit +bucking bronco +black turnstone +backscratcher +backpacker +basement +marbleization +trigger +satsuma +fall-blooming hydrangea +mountain lady's slipper +yellow oleander +crookneck +ex-president +Venn diagram +psaltery +bulwark +old boy +linear leaf +aril +butt weld +fall webworm +pruner +bald-faced hornet +nougat +tailgate +field speedwell +potsherd +center punch +long beech fern +desert paintbrush +canyon treefrog +bushel basket +Eurasian +swamp horsetail +cryptanalyst +wicket +school newspaper +captive +spider brake +electric mixer +tumbleweed +mason wasp +sash window +paddock +wet bar +oxtongue +stevia +wheat rust +scute +switch engine +mud dauber +dotterel +snailflower +common barberry +mulligatawny +cinnamon bark +cigar box +trivet +proof spirit +cream soda +western grey squirrel +baby powder +Bren +Japanese yew +sailcloth +Basket Maker +bannock +basidiocarp +aphelion +erect bugle +limiter +bosc +Przewalski's horse +helmet orchid +audiometer +battle cruiser +grass widower +staphylococcus +Congolese +common pitcher plant +parliamentary agent +Virginia snakeroot +mockernut +Siberian elm +backbench +rough +chervil +chlamys +nationalist +galantine +screwdriver +falsifier +cancerweed +spur +jerkin +porte-cochere +dill pickle +Montagu's harrier +tetrode +true fungus +American quaking aspen +vitamin B1 +leopard lily +eggdrop soup +aurochs +core bit +Jaws of Life +trousseau +parquetry +Disciotis venosa +tender +beef goulash +vitamin K1 +pepper spray +covered smut +hook +sports announcer +weapons carrier +foxtail grass +sloe gin +mezereon +antifouling paint +pavior +pile driver +security consultant +monkey-wrench +Indian hemp +amaretto +American wistaria +A-line +market strategist +rainbow runner +souvlaki +binturong +stiletto +gastrula +Vietnamese +Old World hop hornbeam +cold cathode +pier table +houndstooth check +prop root +leaf-footed bug +sedge wren +Dutch iris +drop curtain +opossum rat +lame +pollen tube +doubletree +compression bandage +pinon pine +catmint +pier arch +kingmaker +deanery +loofah +fullback +fencing mask +flying boat +carpet sweeper +lemon-scented gum +Accipitriformes +kit +pigfish +clipper +dolmas +lesser centaury +blood agar +water violet +raw milk +lemonade +vicar-general +supply closet +Anzac +confectioner +ignition key +velvet grass +white willow +John Dory +ruddiness +wheel +common horsetail +hubbard squash +speculum +Spanish bayonet +mountain mint +glint +foxhole +housemate +bootjack +sleigh bell +clog dancer +Mexican mint +rendering +Hausa +star saxifrage +spring squill +clothesbrush +liquid metal reactor +Columbia tiger lily +sorrel +cartwheel +Jersey +Caucasian walnut +desert willow +surveyor +elbow +Santa Gertrudis +fringe bush +industry analyst +lyrebird +Cortland +arroz con pollo +catechist +tank top +jew's harp +cereal oat +heartleaf +short sleeve +butty +butterfly plant +stud finder +felloe +beer garden +clevis +wood warbler +demerara +cornetfish +mince +Jamaica rum +Spanish broom +binnacle +camise +ferrule +Copt +hall +minicar +scimitar +cryptogam +miter box +limestone fern +Marsala +Parliamentarian +gravy +woolly bear moth +formula +squash bug +pigmentation +plate +skin graft +radiotelegraph +hellbender +soft pedal +lavender cotton +propagator +Bailey bridge +cottage pie +rotgut +A battery +pintle +off-line equipment +European swift +shrimp butter +plumb bob +trunk lid +succotash +yellow cypress +heartleaf +antelope squirrel +sambar +maternity ward +deciduous plant +bartlett +Riesling +sour cherry +Klansman +poke +academician +sociolinguist +bird's nest fern +common privet +scale fern +tachograph +oyster stuffing +pusher +green June beetle +staghorn sumac +lockage +master +bap +harlequin +blackfly +spotted coral root +kahikatea +cabana +riot gun +apple mint +kob +praline +confidant +pahautea +float +city father +Zen Buddhist +pessimist +conference center +banksia rose +comfit +sweet cicely +winged bean +henroost +myope +bunt +nailfile +yellow mountain saxifrage +cruise control +abandoned ship +water chinquapin +spanker +wing nut +puccoon +pier glass +Atlantic sailfish +medlar +buttercrunch +rough-skinned newt +planter's punch +Dutch iris +control key +committeewoman +torpedo-boat destroyer +garambulla +tree heath +gladiator +September elm +inclinometer +snowbell +call-in +sunsuit +microfiche +bluestocking +cheval glass +server +franking machine +sugar syrup +Macoun +transport ship +alderfly +wash-and-wear +Abbe condenser +bush nasturtium +wild leek +canary seed +Northern Baptist +sweet wormwood +jaboticaba +cardroom +autoradiograph +ash-pan +sprinkler system +rattrap +claymore +parts bin +forest red gum +thermonuclear reactor +Indian crocus +lector +heir apparent +leafy spurge +masquerader +varicella zoster virus +cucumber tree +hedger +Shumard oak +zooplankton +quartermaster +arrester +bridge +hop clover +meadow foxtail +winter hazel +portable circular saw +penuche +limpa +blue toadflax +mesophyte +Alpine anemone +pet sitter +avocado +streptococcus +fiber optic cable +river red gum +hornist +chicken taco +red spider +tape grass +densitometer +salmonberry +tiger snake +hot toddy +silver fern +candlenut +buckram +local call +defoliator +king +mahoe +lever lock +social insect +winter purslane +bootblack +fireball +ramie +bellbird +prepuce +capote +Chinese forget-me-not +Pisces +costume +California black oak +tree lupine +golden polypody +liger +California whipsnake +urodele +sapodilla +skillet bread +duckpin +supremo +asparagus bean +kampong +endameba +cow pony +rider +motherwort +Persian iris +soursop +kohlrabi +Parisienne +irons +doubles +feijoa +farmplace +cottage cheese +bezoar goat +subcontractor +blunderbuss +down +purple martin +Lapp +crenate leaf +tobacco pouch +beach towel +Santa Lucia fir +monetarist +stringer +ocellated turkey +Texas purple spike +ackee +caddy +hedge mustard +second-rater +strawberry bush +valedictorian +steak sauce +prairie gourd +aspirant +mint +Valenciennes +vodka martini +American persimmon +big brown bat +Mycenaen +mouthpiece +norfolk island pine +pennyroyal +Jewish rye bread +granadilla +tract house +wall +shuttle helicopter +blackjack oak +Lippizan +storm window +white zinnia +sickle +sushi bar +polish +baldric +brooklime +church hat +control circuit +vicuna +death adder +eukaryote +durmast +field soybean +jacket potato +wild basil +queen consort +brooklime +octant +blue false indigo +broccoli raab +step-down transformer +date bread +blue ash +duffer +oak chestnut +pennant +wedge +Florentine iris +morion +weakfish +morning dress +public address system +spearmint +Ashkenazi +sow +interpreter +Metis +pita +iron lung +parfait glass +cylinder lock +immortelle +obstetrical toad +tee hinge +successor +western +working girl +julienne +AND circuit +spaghetti junction +fer-de-lance +enlisted woman +star +lightning rod +bilge pump +pacer +horse nettle +African oil palm +blastocyst +air hammer +bamboo fern +remote terminal +lambkin +money cowrie +Pelham +clinical thermometer +wiggler +guru +false indigo +tea bag +foredeck +king +baby shoe +mule +grab bag +silver-bell tree +knitting machine +cobia +roulette ball +larder +button pink +rumble seat +noria +queen mother +solar thermal system +aquaplane +highbrow +rusty blackbird +desktop +lima bean +pontoon bridge +watercress +wild cabbage +tumbleweed +dressing sack +compact-disk burner +spittoon +marrow +sporophyte +second fiddle +pot-au-feu +specialty store +dry +mole +khadi +japonica +lovage +squamous cell +lobe +European creeper +brown pine +bladderpod +rumble +French Canadian +mascarpone +Pacific halibut +perennial ryegrass +wine lover +turbot +longwool +silver tree fern +dust cover +synchromesh +corn pudding +alpine azalea +garboard +cane sugar +observation dome +condensation pump +hind +taximeter +hand drill +gas thermometer +jammer +buffing wheel +handstamp +prairie mallow +turkey stew +sun spurge +duck pate +kibble +Cassin's kingbird +apadana +Devon +grinner +oocyte +blank +header +schoolmaster +guard ship +intravenous pyelogram +rimu +luff +Mediterranean fruit fly +singlestick +lady-in-waiting +curb +birch +limekiln +orthoscope +serotine +Spanish oak +swamp cottonwood +edger +city man +picnicker +white basswood +Parsons table +Christmas begonia +perspirer +Pacific tree toad +Cape tulip +finger bowl +blue pike +greengage +handcar +milkweed +potbelly +river dolphin +creel +typewriter carriage +banteng +pawnbroker's shop +huon pine +biennial +man of action +foundress +caveman +featheredge +jordan almond +sandblaster +coralberry +low-calorie diet +hoot owl +garter +bain-marie +wrecker +fenugreek +double-hung window +idol +scullery +balloon vine +summer savory +winged spindle tree +Helvella crispa +walrus mustache +gas engine +boulle +rush grass +rue +hoe handle +cat fancier +deerstalker +dunker +American red plum +fall dandelion +groover +sprag +stair-rod +wish-wash +pricket +architrave +California laurel +net melon +Arizona sycamore +executive secretary +silverweed +silky cornel +surface ship +square sail +common purslane +villa +holly-leaved cherry +sweet birch +pecan +artillery shell +breast pocket +pirogi +scarlet runner +rabbit brush +mealworm +leather carp +palette knife +Jerusalem sage +boneshaker +slit lamp +digital voltmeter +polar glacier +square-rigger +homogenized milk +Sten gun +lesser calamint +pyrograph +Korean lawn grass +Zinfandel +crepe fern +western ragweed +clasp knife +distributor housing +cartouche +scooter +ski parka +jackknife +Carolina spring beauty +soft diet +candlesnuffer +horse trader +step stool +agouti +accelerometer +annual fern +judge advocate +angelica +roll film +treehopper +ombu +comer +sultanate +kitchen help +hooded ladies' tresses +milking machine +knuckle joint +Jamaica honeysuckle +music teacher +sauerkraut +Weston cell +slivovitz +Worcester sauce +tall bellflower +chancery +prophetess +casquet +shortfin mako +sorus +visual display unit +asp +grenadier +black pepper +crottle +erasable programmable read-only memory +jabot +ratchet +disk controller +chief petty officer +tap wrench +white mountain ash +cultivated rice +flying phalanger +skillet corn bread +BB gun +Elamite +European red elder +reed rhapis +ciderpress +inga +torpedo +wild teasel +bean curd +oeil de boeuf +acuminate leaf +bitter lemon +hitchrack +Lorraine cross +hostess +European dogtooth +adz +polonaise +rock sandwort +Waldorf salad +myrmecophile +klystron +mole rat +draba +corn borer +robusta coffee +chub mackerel +leatherleaf +chronometer +Moselle +sea aster +fennel +slop basin +constable +Brunswick stew +hydraulic pump +French omelet +icebreaker +Manx shearwater +press of sail +ninepin +blue succory +bootstrap +hallstand +chit +firefly +bearded seal +fuel filter +jezebel +mate +Roquefort +cheesecloth +plasterer +blue pimpernel +lake dwelling +shrink-wrap +goat cheese +common gum cistus +coastland +Sunday best +wild tobacco +mandrake +common unicorn plant +barbican +culotte +blockhouse +German iris +tarragon +caramel +wild rosemary +grain +voyager +squirting cucumber +eastern narrow-mouthed toad +creeping fern +luge +saffron +garland flower +furnace room +starship +Oriental scops owl +Italian honeysuckle +berserker +Chinese elm +scrubber +bishop pine +French polish +compromiser +skimmer +river shad +lobster thermidor +leadwort +man-of-the-earth +razorblade +vicegerent +empress +link +ham and eggs +wild lily of the valley +blackfish +splicer +fossa +mara +moneygrubber +brachiopod +fauteuil +caldera +finish coat +croupier +termer +leopard's-bane +sei whale +molucca balm +dolly +dog food +term infant +soft roll +episcia +sewer +inquiry agent +active citizen +perry +California newt +moon shell +bladderwrack +common shrew +dill +Dutch elm fungus +key lime +electrometer +divorce lawyer +lamb's-quarters +apple turnover +shipmate +Guernsey +legionnaire +electric blanket +Rocky mountain pinon +tobacco mildew +stinking iris +forestiera +departure lounge +wiper motor +jurist +scarlet runner +pallbearer +batter's box +inertial guidance system +fines herbes +oilcan +sisal +mustache cup +steamed pudding +Visayan +fiesta flower +lady tulip +lungless salamander +batiste +electrical system +blazing star +car carrier +Walloon +mother hen +stump +mulled cider +secondary coil +Alexandria senna +etui +scrumpy +Havasupai +jawbreaker +glume +ex-husband +Eskimo +Joint Direct Attack Munition +number theorist +five-hitter +pinstripe +Olympian +common mackerel +stone bass +bigos +Bahraini +airbrush +great ragweed +glass lizard +hand fern +roundel +riding master +shoetree +yellow avens +old fashioned +dolman +stinger +nursling +legate +faille +golden fern +bedpost +shop steward +kidney bean +bladderwort +internist +limeade +Bruneian +Coloradan +playsuit +wintergreen oil +Cantabrigian +mutton snapper +shot putter +hand grenade +moccasin +cobnut +marrow +separatist +cockscomb +discharge pipe +Gabonese +spade bit +chicken cordon bleu +varnish tree +European wood mouse +striped gentian +Ayrshire +curassow +moo goo gai pan +malarial mosquito +glow tube +ledger board +bib-and-tucker +European chestnut +suffragette +color wash +gaffsail +golden larch +voting machine +Kahlua +lungi +amusement arcade +Uzbek +butternut +mold +mule's ears +dickey +shrimper +trophozoite +dreadnought +shepherd's purse +greenhouse whitefly +spotted gum +copperware +perfect game +semigloss +spawn +telecom hotel +stakeholder +mason wasp +flibbertigibbet +chin strap +fringed pink +saki +urchin +memorizer +roulade +whiting +cling +corncrake +Queen of England +choo-choo +empty +heating pad +playmate +visualizer +popcorn ball +absconder +sou'wester +target acquisition system +mock-up +dental floss +tray cloth +haddock +bulblet fern +housing commissioner +delayed action +anchor light +harbor porpoise +water wings +PT boat +night latch +fennel +doorframe +green-tailed towhee +grey polypody +torture chamber +American germander +Chinese wistaria +cattalo +accompanist +rifleman +alpine clover +contrarian +lemon peel +Mexican cypress +sprog +dado +Galilean telescope +desmid +lockup +Latin +American raspberry +mescal +butternut +prairie orchid +downy yellow violet +green hellebore +radio compass +bread and butter pickle +Cherokee rose +knish +destroyer escort +Arkansan +langlaufer +pyxis +winter savory +velocipede +motley +winter savory +law student +barren ground caribou +apple dumpling +field hospital +works +city editor +European flatfish +Morchella crassipes +life office +boot camp +cream sauce +cape aloe +acetate disk +devil ray +tile cutter +Plymouth Rock +microspore +godown +Syrian +tiercel +American cranberry +lesser spearwort +anopheline +Spanish oyster plant +wire cloth +attic fan +birch beer +small computer system interface +crook +ribbon fern +explorer's gentian +nagami +I-beam +rosebud cherry +Jerusalem artichoke +Stillson wrench +pluralist +district manager +Levantine +orangeade +part-timer +post horn +Oregon grape +contadino +cargo helicopter +silverpoint +chaja +California bluebell +case +Shasta +cheese cutter +Leishmania +avalanche lily +iron horse +bialy +Yana +Delawarean +Prussian +nonpareil +hammer +hoper +chewink +anil +skim milk +desert four o'clock +crescent wrench +white marlin +blue jasmine +malacca +anadama bread +purple poppy mallow +ganglion cell +ligature +no-parking zone +golden clematis +Cotswold +aliterate +shebeen +yardarm +superbug +fanaloka +stinking cedar +spirochete +wort +pater +heaume +thermocouple +ironing +naval tactical data system +European goatsucker +prairie cordgrass +accused +foreign agent +halberd +western mugwort +esthetician +Persian lilac +cracked-wheat bread +crosscut saw +rock penstemon +paper cutter +crematory +ideologist +cattley guava +margarine +creosote bush +hoary plantain +spark gap +lumberjack +Greek valerian +mission bells +tight end +bigeye +large crabgrass +stone marten +cleat +lentil +bay scallop +lector +charger +assemblywoman +second lieutenant +boil smut +sarsaparilla +hydromel +cat flea +pinfish +whole milk +hairnet +myeloblast +peasant +blind curve +first offender +dwarf-white trillium +Brother +coatdress +gun emplacement +tamarisk gerbil +snap +air cushion +trailing edge +potato vine +gig +everlasting pea +champion +dibble +rattail cactus +timothy +prince's-feather +cutlas +lockring +sealing wax +Brussels lace +corn mint +highboard +she-oak +wild celery +pillar +Burberry +Hakka +leucothoe +bell tent +gallery +coontie +leather fern +smack +adenovirus +linoleum +chain wrench +tammy +gas fixture +nut bar +baneberry +butterscotch +goat's rue +bullock +grey snapper +mother-in-law +hyson +wayfaring tree +mollie +needle spike rush +buckwheat +bayberry +brush-tailed phalanger +dry rot +harborage +stormy petrel +Oriental beetle +Atlantic halibut +coping saw +simple fruit +viscose rayon +surgeonfish +upstairs +security system +common ragweed +verticillium +pancake batter +hawk's-beard +Dutchman's-pipe +refrigeration system +European parsley fern +Ivy Leaguer +totalitarian +gonococcus +towhead +showy sunflower +pallium +multiengine airplane +hair trigger +rabbit-eared bandicoot +siskiyou lewisia +fuel system +flat arch +broad beech fern +Alpine lady fern +bracken +Kentucky black bass +rut +mountain maple +tunaburger +umbrella fern +white-headed stilt +meat hook +panhandler +washhouse +barnyard +safety lamp +leg +ripple mark +paper +sagebrush lizard +light heavyweight +common nutcracker +operator +stalking-horse +horseless carriage +fishhook +suction cup +peg +Ungulata +false teeth +round-bottom flask +Luba +campaign hat +firebox +rudder +parapet +ice pack +appellant +spirit stove +metheglin +common bamboo +soapwort gentian +pannikin +time capsule +burn bag +folk poet +tropical prawn +end man +new caledonian pine +linen +web +free trader +jury box +railing +pignut +leaker +potboy +rubber boa +white snakeroot +plumber +Candida albicans +surfboat +woman +promulgator +eyecup +wild China tree +rattlesnake master +Viyella +alpine salamander +ailanthus silkworm +Albatrellus ovinus +war room +meadow vole +robotics equipment +rotary actuator +Engelmann spruce +pinesap +beefcake +native speaker +ridge +injector +water chute +salmonberry +decoupage +bottlebrush +date plum +circlet +American mountain ash +pocketbook +horsemint +sweet four o'clock +kirpan +pinto bean +chervil +equator +range animal +candy thermometer +calanthe +cul +stipendiary +brahman +pelican crossing +topgallant +wild senna +sliding window +carrier pigeon +Tatar +quadruplet +bumboat +spearmint oil +slip clutch +young Turk +golden yarrow +shank +glasswort +dental plaque +Manduca sexta +Northern bedstraw +dent corn +Life Saver +western wall flower +bedder +wherry +Tuscarora +scrapple +borstal +reflux condenser +problem solver +nondriver +perforation +eastern cricket frog +white wood aster +broad buckler-fern +Cape primrose +herringbone +head louse +earl +baton +recording system +primary color for light +cherry laurel +pomfret +ratafia +chocolate milk +obscurantist +revisionist +rood screen +magnetic needle +commensal +oil tycoon +celebrant +domicile +harvest mouse +California nutmeg +greater spearwort +black-billed cuckoo +winepress +demographer +straw boss +diabetic diet +sweetmeat +rabbet +ming tree +basketweaver +freestone +walk-in +Aryan +box coat +audio amplifier +chicken salad +churidars +whydah +box +batman +siren +selectman +gouger +drip coffee +Caesar salad +interpreter +whinstone +grey goldenrod +minicomputer +honey crisp +hypercoaster +Irishman +swamp white oak +reed canary grass +globeflower +cynthia moth +fennel seed +canthus +chino +blind date +tar pit +watermelon begonia +fishtail palm +overcast +Pearmain +primary color for pigments +coal seam +wherry +safety bolt +cretonne +Michigan lily +inflater +moneybag +huckleberry +brassard +bush vetch +looking glass tree +pinwheel roll +alfalfa sprout +sea kale +clinometer +achira +lorgnette +potter wasp +gilded flicker +tody +capulin +captain's chair +crackle +gerardia +prie-dieu +venture capitalist +New Jerseyan +block and tackle +elf cup +bur reed +automatic transmission +wax palm +flytrap +crack willow +coachwhip +swizzle +lugger +Dewar flask +baster +oxyacetylene torch +Culex quinquefasciatus +St Peter's wort +wild hyacinth +Russian almond +burrfish +wintergreen +katsura tree +butcher knife +perfumery +thresher +porte-cochere +sheepwalk +hypotenuse +Dalmatian iris +buttercup squash +demiglace +goldenseal +preceptor +rigger +poikilotherm +old-age pensioner +posthouse +wood horsetail +repeater +reciprocating engine +Rambouillet +terra cotta +togs +battledore +horizontal tail +missile defense system +trier +morello +woolly adelgid +munition +double creme +in-fighting +squirrel corn +crow's nest +antler moth +brake cylinder +bandoleer +noticer +Parmesan +hipline +cheapskate +Dubonnet +mole rat +bog aster +ribbon tree +meadow rue +nard +ratel +loose smut +snapping shrimp +golden glow +basil thyme +Florida strap fern +moonshine +flume +lace fern +black bream +orchestra pit +archerfish +exile +ringdove +career man +godfather +bottom-feeder +pasteurized milk +dental implant +pedicel +Catalpa speciosa +yellow foxglove +lancet arch +steam shovel +sampan +patrol boat +sailor cap +tollgate +monal +velociraptor +cacique +jack oak +cursed crowfoot +creep +Parry manzanita +common matrimony vine +grace cup +caecilian +spurge laurel +prickly lettuce +Regius professor +camail +Sitka willow +Courtelle +gin sling +dogmatist +guest +saltine +dust cover +sport +sweeper +feist +lady's-eardrop +vibist +wire stripper +tenpin +interplanetary space +beet green +pruning knife +drainage system +gunnery +ballet master +lime juice +flak catcher +lacrosse ball +Canadian aspen +beatnik +railhead +utilizer +spadefish +Arizona white oak +city university +dense blazing star +hedger +chain pickerel +right-hand man +namby-pamby +nacelle +redneck +tumbler +Chief Secretary +cannon +cupola +kummel +papaya juice +Burton +Stanley Steamer +loganberry +stylus +square meal +rock bass +western ladies' tresses +dramatist +assignee +tandoor +trumpetwood +segregator +green adder's mouth +coral necklace +ani +iceboat +densimeter +oxtail soup +kernel +cos lettuce +greenishness +panchromatic film +Parker House roll +oatmeal +backsaw +double Gloucester +bailey +storage cell +giant +coconut milk +broadtail +barouche +loir +soybean meal +white-leaved rockrose +junction barrier +spandrel +sweat bag +goldilocks +flowering wintergreen +cockspur +beef fondue +holding cell +cardamom +cagoule +Kamia +tangelo +Herschelian telescope +wine bar +kachina +sand sage +guy +ivory palm +citrus mealybug +topper +ladyfish +force pump +fanion +calaba +Iowa +orrisroot +ivorybill +Secretary of Agriculture +gagman +dry cell +hypnotist +kenaf +grey alder +deathwatch beetle +gagman +magnetic stripe +trap door +abdominal wall +prefab +broomcorn millet +architeuthis +angler +Pacific giant salamander +barbette carriage +low-fat diet +veal scallopini +B battery +wallah +landing flap +pistachio +jaguarundi +nagi +cicerone +felt fungus +Aertex +stocks +smooth aster +patchouli +lemon sole +sleeper +basket fern +dundathu pine +anjou +Moreton Bay chestnut +broom sedge +candid camera +red angel's trumpet +oilstone +cinnamon toast +Pacific walrus +fruit custard +Jehovah's Witness +mate +voyeur +Esselen +achromatic lens +sanguine +brine shrimp +dunce cap +swot +transit instrument +grey willow +pack +bench clamp +Nova Scotian +gadgetry +silvery spleenwort +enchantress +rough fish +morula +giant taro +sorus +roux +polyhedral angle +spruce beer +Chicano +cola extract +outfielder +kohleria +white-rumped shrike +car-ferry +subway token +spoon bread +totara +corn borer +bowhead +tensimeter +water scooter +flickertail +Catholicos +pleaser +blue-eyed Mary +calabash +handyman +cascades frog +facing +scarlet oak +lutist +ginger +tree tomato +Harvey Wallbanger +tent peg +insectivore +fusil +swale +chinning bar +bladderpod +New Dealer +dhoti +proscenium arch +common vetchling +channel +collect call +safflower +Texas tortoise +test equipment +theca +RAM disk +sheep sorrel +rammer +buttonhook +honey mesquite +dominus +babirusa +queen +Aspergillus fumigatus +crash barrier +nonmember +Muscovite +verdin +Australopithecus afarensis +Turkish Delight +stalked puffball +giardia +divider +mountain skink +head smut +pacemaker +evaporated milk +rattlesnake fern +flamethrower +navy bean +bather +steed +showy orchis +stone crab +artichoke heart +phantom orchid +space helmet +swamp laurel +privateer +junior +surcoat +bristlegrass +flower girl +aphid lion +penthouse +lemonade mix +coude telescope +natal plum +scriber +wood nettle +rape suspect +resplendent quetzel +western poppy +choir loft +fore-topsail +thyme-leaved sandwort +erotic +short circuit +outdoors +flowering tobacco +hookup +aviatrix +corker +horehound +horn +swamp pine +water biscuit +cherimoya +vaporizer +courtier +European sole +full skirt +Mother Carey's chicken +cymule +huck +white snapdragon +mountain nyala +country borage +bonduc +casein paint +grampus +shrimpfish +lodge +dragee +black walnut +caraway seed +roper +glass cutter +tab key +Richardson's geranium +demigod +chichipe +Italian ryegrass +cadet +electrograph +rudd +carpenteria +foie gras +lignum vitae +hedge nettle +pledger +American hackberry +flageolet +beaked hazelnut +reflectometer +sticky geranium +marriage bed +white pepper +japanese clover +whiteface +gnat +extrovert +Canada plum +talipot +chicken stew +egg foo yong +fraxinella +skibob +saucer magnolia +jacket +green smut fungus +cloakroom +landing skid +booth +ice milk +dipole +striped coral root +red buckeye +roughcast +breaststroker +cowherb +razor clam +first-aid station +briarroot +clambake +lander +Bramley's Seedling +frail +jird +minisub +luging +poison milkweed +European lobster +epidemiologist +spandex +paloverde +marumi +bypass condenser +punter +petty spurge +Coryphaena hippurus +bilberry +vermillion rockfish +witness box +viscometer +pulque +Massachusetts fern +herring salad +ridge tile +mesa +dwarf grey willow +southern aster +punch pliers +tarnished plant bug +hoop pine +Japanese red pine +benedick +rebozo +silver plate +silver willow +mouse-ear hawkweed +bonito shark +abutment arch +noble cane +tiger rattlesnake +pongee +jumping plant louse +pattypan squash +giant ryegrass +railroad bed +stiff aster +imperial Japanese morning glory +laundry +winter cress +large white petunia +tea maker +pen-and-ink +early warning system +lug +monocot +sea wormwood +breechblock +postage meter +third rail +Mongoloid +Australopithecus boisei +umbrella tent +stirrer +Dumpy level +beroe +post and lintel +green spleenwort +tomato paste +dishpan +stentor +sweatband +cobbler +New York fern +gaff +prairie willow +cyclops +jigsaw +rotavirus +pallet +eastern ground snake +boiling water reactor +acute triangle +agora +European cranberry +roebuck +surgical dressing +busboy +cannikin +feedlot +common pond-skater +cochin +horsehair lichen +fetter +sapote +fichu +dermatologist +fire tongs +creme anglais +foster-mother +laurelwood +chicken snake +mincemeat +rocker +wild spinach +powder and shot +butterwort +auxiliary engine +mamey +hart's-tongue +sucking pig +American turkey oak +troopship +buttermilk +divi-divi +boatswain's chair +soda fountain +southern flying squirrel +elastic +cutaway +housekeeper +renegade +apple rust +bridoon +machicolation +stunt +keyhole limpet +personality +solitary vireo +epidendron +Jihadist +boffin +bettong +terror +partial denture +pusher +saltcellar +capstan +large poodle +Bibb lettuce +low-bush blueberry +staple +banded krait +sickroom +barnyard grass +wandflower +woodworm +bluegrass +squirrel's-foot fern +rabbitfish +delta wing +milking shorthorn +limber pine +guru +gamine +scythe +sweetsop +Gruyere +bloodmobile +mine detector +American mistletoe +silver beech +hound's-tongue +Lombardy poplar +basket fern +pink-and-white everlasting +redtail +Aladdin's lamp +mace +outtake +condensed milk +Canada wild rye +silver perch +waxflower +taxer +Chinese chestnut +Our Lord's candle +mugwump +school system +salp +osso buco +dress shirt +butterweed +low-fat milk +couchette +broomcorn +proscenium +mill agent +smut grass +humpback +southern spadefoot +military leader +canebrake rattlesnake +tailor-made +ebony +beach house +flying gecko +hoary alison +typhoid bacillus +Romanov +vanilla pudding +sweet cicely +Spodoptera exigua +dress rack +flannel +skipjack +bolognese pasta sauce +rooibos +thunderer +blessed thistle +gauntlet +mahatma +granadilla +laurel sumac +Yuma +thyme-leaved speedwell +encyclical +twill +linocut +manna gum +spark arrester +cocklebur +Indian hemp +lemon oil +Hall's honeysuckle +raceway +flop +Himalayan lilac +one-flowered wintergreen +photosphere +silvery spleenwort +convex polygon +canarybird flower +foster-sister +fluffy omelet +palanquin +roll +dandelion green +Javanese +workpiece +Carmelite +bread mold +schlemiel +wild lily of the valley +grugru +solenoid +puff batter +skep +balance wheel +Gadaba +portia tree +mobcap +two-man tent +scuffle +firebrat +ant lion +anise +caster +giant petrel +American water spaniel +naboom +treasure ship +foster-son +fiddleneck +alidade +sugar refinery +wild oat +water beetle +generic +damson plum +abrocome +detainee +pitch pipe +coast +nilgai +radiotherapy equipment +heart-leaved aster +gristmill +grocer +Appaloosa +Cheviot +brake pedal +lantana +cave myotis +Rob Roy +sea spider +latrine +carpophore +recycling plant +coondog +brace and bit +funambulist +eggar +mantelet +postdoc +mezzanine +coco plum +pulse generator +high-vitamin diet +menhaden +mechanical engineer +bergamot mint +Chuvash +grated cheese +helicon +belladonna +beet armyworm +eelgrass +resuscitator +interrupted fern +arrow grass +cistern +Pacific herring +colostrum +journal bearing +Fauve +wrist pin +canape +choice morsel +quadraphony +guard boat +shortgrass +claymore mine +hitching post +cargo door +decoder +gym rat +Cocopa +commander +apple of Peru +seckel +yellow goatfish +dog flea +dodo +oconee bells +Tudor arch +turkey stuffing +ebony spleenwort +wheat flag smut +scolopendrium +Brazilian pepper tree +gusset +inspector +lunar excursion module +baron +plantigrade mammal +Creole +phosphate +aromatic aster +ghee +audiovisual +onychophoran +cotton stainer +lieutenant junior grade +spheroid +amen corner +caper sauce +Caladium bicolor +dyer's rocket +seaside goldenrod +flint corn +Very pistol +rotifer +steeplechaser +rouleau +escape wheel +Namibian +millivoltmeter +emmer +climatologist +agateware +sea lyme grass +inclinometer +water fennel +saddle seat +vicar +garden cress +ski rack +Norfolk jacket +casaba +coast rhododendron +sericea lespedeza +hematocrit +autopilot +tilter +finish coat +Pennsylvanian +shrubby St John's wort +podocarp +percussion cap +ceriman +peanut bar +gean +jack +durra +rotor +carob +cottage tulip +three-spined stickleback +trencher +elevator +kalumpang +abaca +Australopithecus robustus +active matrix screen +water bed +hatmaker +lodestone +cat food +overcup oak +balletomane +popgun +rheometer +process cheese +frog legs +heartleaf arnica +p-n-p transistor +steam turbine +Tulu +scalene triangle +licorice fern +coffee break +trade unionist +starved aster +firing pin +water gum +Masonite +hairspring +seminarian +blue racer +forecastle +scrub pine +Atlantic spiny dogfish +kopje +orphrey +fan tracery +gee-gee +vixen +interstellar space +Harris Tweed +sawmill +lemon mint +bitewing +ringlet +Chinese mustard +paleontologist +American hazel +brigantine +clay-colored robin +zombie +nectarine +West Indian jasmine +pineapple weed +rusher +gynecologist +pole +thylacine +myrtle beech +golden cup +woodruff +T-bar lift +terebinth +service club +homegirl +Blue Mountain tea +figwort +New Hampshirite +Stayman +tonometer +white turnip +messuage +cruet-stand +colliery +connecting room +lesser twayblade +bland diet +crown prince +beggarwoman +restharrow +bower actinidia +firebug +hepatic tanager +telegraph +Spodoptera frugiperda +spackle +carpenter's square +pyx +supermom +thickhead +whorled milkweed +Arctic char +Chinese rhubarb +pince-nez +wolverine +tomato concentrate +cascarilla bark +red underwing +leather flower +Jerusalem thorn +bullpen +Salisbury steak +anode +coffeeberry +bottling plant +fritter batter +aerial torpedo +matrix +local oscillator +stalked puffball +bruin +three-cornered leek +wassail +stabling +damping off fungus +myriapod +osier +lesser kudu +cownose ray +chokecherry +wagon +obstetrician +Glengarry +even-pinnate leaf +wine sauce +osteocyte +baker's yeast +heir presumptive +blackjack +tympanist +golden fern +fipple +Japanese oak +bar mask +stamping machine +argus +knobcone pine +oil beetle +lanai +upper berth +condenser +proctologist +catechu +wild spurge +vestry +ground snake +proton accelerator +walker +scarlet bush +transom +lagging +bouillon +slender loris +black currant +developer +football hero +plum sauce +striped mullet +prince charming +fictional animal +prosimian +lug wrench +lemonwood +kirsch +spy satellite +black caraway +Thompson Seedless +bead tree +purple fringeless orchid +Virginia strawberry +chigetai +punkie +gall wasp +addressing machine +rock polypody +good-king-henry +spring cankerworm +wimple +noncandidate +saskatoon +hacienda +Darjeeling +snowberry +lounging pajama +ascospore +ski-plane +hedgehog cereus +Welsh onion +yautia +coaster brake +sickle cell +parrot's beak +fuller's teasel +painted greenling +scablands +stuffed cabbage +barrel organ +etcher +dwarf maple +camp +Australian blacksnake +currycomb +obtuse triangle +rose gum +psychrometer +abridger +torpedo +carpet loom +sodalist +slender rush +loligo +sclerometer +wimp +dotted gayfeather +green ash +pinstripe +moralist +medusa's head +garden centipede +heath aster +fool's parsley +olla podrida +Potawatomi +Edam +toothache tree +hulk +seabag +narthex +compartment +prairie star +lookdown +B-flat clarinet +event planner +clip lead +shirting +milk punch +supercharger +macadamia nut +giant coreopsis +computer store +martingale +keyboard buffer +summer flounder +squash ball +gas turbine +object ball +plier +black mulberry +reef squirrelfish +scampi +willow aster +bowler +striped marlin +smooth muscle cell +diplodocus +Liberty ship +sponge cloth +guitarfish +walking leaf +showroom +California bluebell +bolo +turnbuckle +boysenberry +hardware +Gael +imago +endorser +jujube +dust bag +rapporteur +field wormwood +low-water mark +naval missile +Pacific yew +reversible +crabapple jelly +poniard +barricade +spawner +simnel +seltzer +deckle edge +needle +timbale +satellite transmitter +organization man +job candidate +orderly +native cranberry +fir clubmoss +coaming +chartered accountant +electron accelerator +Sierra plum +American foxhound +long underwear +Penobscot +blueberry yogurt +biretta +cascara +Paranthropus +Dorian +nun's habit +lenten rose +Augustinian +designer +northern phalarope +mombin +hazel mouse +reeve +waffler +telegraphy +Verpa conica +ignition coil +Japanese oyster +S-shape +divining rod +ant thrush +throat protector +interlocutor +Desmodus rotundus +pere david's deer +attenuator +Cypriot +red sandalwood +pendulum watch +broadcloth +striped drum +sequence +safety arch +diapensia +hog +western spadefoot +chlorella +comb-footed spider +Chechen +darning needle +C-ration +hard beech +piano action +scaling ladder +Nepal trumpet flower +ravigote +screw wrench +ramekin +Lyonnaise sauce +dinner napkin +partial veil +masseuse +coatrack +mooring tower +blue-eyed African daisy +English horn +baton +rope tow +toll bridge +massage parlor +quark cheese +lounging jacket +tall goldenrod +flying jib +coordinate axis +barley-sugar +integrator +worm gear +captain +sweatshop +class +layer +chili powder +dripping pan +oatcake +newsroom +tadpole shrimp +rake +trade magazine +silks +ram's-head +senior +knower +masseur +yam +peg +wheel tree +hardbake +test room +long-spurred violet +creeping spike rush +shrapnel +coffee senna +matchbox +creeping soft grass +welder's mask +pickaback plant +urial +hooded pitcher plant +incense cedar +Ohio buckeye +ant cow +skeleton fork fern +Indiaman +swamp ash +testatrix +marang +spherocyte +Winesap +Indian mallow +teju +Yersinia pestis +dye-works +sauerbraten +coral bean tree +safe house +postulator +eyas +lotus +wood vise +lady-of-the-night +East German +cymling +rock candy +western omelet +anoa +rainbow seaperch +crossover voter +Finn +tree shrew +hog plum +Federal +shagbark +clockwork +Alexandrian laurel +metal wood +brill +military chaplain +trend-setter +call-back +Indian rat snake +spurred gentian +Japanese maple +forest goat +bee moth +viola da braccio +duckboard +armyworm +hangnail +counterbore +cream-of-tartar tree +Mullah +bonbon +water hazard +temple orange +corporatist +rough bindweed +Turkish bath +mistletoe fig +beach sand verbena +caddisworm +English plantain +brown Betty +power pack +lion's-ear +Francis turbine +stayer +dichondra +marsh St-John's wort +squab +energizer +common horehound +mantispid +pullback +handwheel +spark arrester +yakuza +Virginian witch hazel +grunter +waterworks +bondwoman +chain printer +stockjobber +coconut milk +yardgrass +blue chip +bridle path +riser +pleurothallis +saltwort +salal +broadside +blackboard eraser +bastard +Para rubber tree +red bat +digital-analog converter +calabash +cashier +cow shark +horned pout +microphage +monologist +woolly monkey +Illinoisan +marsh horsetail +distaff +siris +eparch +gooseneck loosestrife +sounding rocket +multiprocessor +saiga +xerographic printer +madrona +right triangle +sweet gale +red maids +wolfsbane +pork-and-veal goulash +French sorrel +mutterer +Venetian sumac +drumlin +white crappie +squire +large-flowered calamint +northern cricket frog +mushroom sauce +supertanker +morello +auxiliary boiler +Virginia thimbleweed +cottage tent +bubble shell +big shellbark +wormwood sage +cider gum +coast lily +American feverfew +Peruvian balsam +purple silkweed +tobacco moth +desk dictionary +rock elm +eastern indigo snake +Japanese privet +lamb +levee +L-plate +soapfish +painted tongue +scuttle +markhor +Marburg virus +mackinaw +major +crypt +ball and chain +domestic silkworm moth +bottom feeder +mistress +death house +freight elevator +bellyband +Pulex irritans +Bacillus anthracis +fire control radar +hysterosalpingogram +turbogenerator +decompound leaf +vambrace +scentless camomile +Medinilla magnifica +prima ballerina +Northern Spy +quartz lamp +grains of paradise +justiciar +felt fern +seismograph +Madagascar jasmine +imaret +white perch +Alpine mouse-ear +tea bread +yellow bass +poseuse +espionage agent +punching bag +eurypterid +orange sneezeweed +banded stilt +armhole +postern +mother +kapuka +catechumen +Soubise +Sauvignon blanc +gunnery sergeant +self-starter +ceratozamia +Atlantic cod +Reoviridae +blood cup +horseshoe bat +oriental plane +voussoir +fetterbush +samara +truncated pyramid +lingcod +athenaeum +shyster +Carolina hemlock +submarine torpedo +floating fern +yataghan +sun tea +viola d'amore +conenose +ventilation shaft +walk-up apartment +saury +wild wheat +porcupine ball +tahini +kris +grass fern +drip pan +black bryony +Scotch broth +tapioca pudding +southwestern toad +Hare Krishna +guimpe +wild madder +megalocyte +teaching fellow +shrubby penstemon +lesser wintergreen +privet hedge +Fahrenheit thermometer +stern chaser +prickly ash +pump room +ricer +chicken mousse +wing commander +sun gear +bolus +alpine milk vetch +opera cloak +twinjet +Goldie's fern +abnegator +alphabet soup +node +grape jelly +early coral root +Tarzan +quarterstaff +greeter +Eurasian woodcock +primary coil +quirt +tinkerer +bolt +creme de fraise +voltage regulator +news photography +Jat +bristly locust +Gouda +dickey +lobster butter +dwarf flowering almond +fagot stitch +Reform Jew +ostrich fern +bathyscaphe +purple mullein +alpaca +civic leader +jellaba +Arizona ash +wasabi +Irishwoman +choke +stockinet +religionist +sewage disposal plant +bittersweet +Hyphantria cunea +pheasant under glass +screen actor +chapterhouse +quoit +horseshoe bat +rapper +cupule +planetary gear +cascade penstemon +redoubt +salt +areaway +megalomaniac +bush willow +amethystine python +plains spadefoot +colour supplement +kick pleat +bell apple +narwhal +slippery elm +stenograph +baa-lamb +quadrant +balker +jobcentre +spit curl +bastard indigo +malacca +serow +adobe lily +yacca +palestra +penalty box +scrub beefwood +reenactor +screening +white bryony +alderleaf Juneberry +harpoon +alpine clubmoss +neurosurgeon +surrey +sweet calabash +Scotch laburnum +coquille +French honeysuckle +extrados +pipe cleaner +southwestern white pine +Virginian stock +scaly lentinus +aileron +carob bar +swordfish +Alpine woodsia +negus +wireworm +sweep +goldfields +drop arch +European bream +roly-poly +pin +bastard wing +fustian +wild buckwheat +lake whitefish +overcoat +water filter +Bermuda chub +New Zealand spinach +high-hat cymbal +European larch +radiologic technologist +fine-tooth comb +brunch coat +splice +electronic converter +overmantel +extern +taper +cluster bomb +teletypewriter +pinwheel +trailing arbutus +quipu +creeping zinnia +orange milkwort +tabard +Australopithecus africanus +melancholy thistle +insole +courser +darkroom +surface-to-air missile system +bark-louse +Confederate +neritina +clip-on +spouter +trench knife +outside caliper +dhak +Limburger +chuck wagon +buttercup squash +shirtdress +pouter pigeon +dirty old man +zodiac +fennel flower +mother figure +appointment +Manichaean +lignum +bouffant +rum sling +Ravenna grass +hibachi +gin rickey +American harvest mouse +cocozelle +western wheatgrass +black crappie +rhombus +Missouri goldenrod +barndoor +wild mango +pneumococcus +Boston lettuce +ratline +desert holly +cobweb +fluoroscope +ethnologist +tor +bullshot +stockade +greave +rock sea bass +slip-joint pliers +taxi dancer +schizophrenic +zill +creme de menthe +orange-blossom orchid +divot +supplejack +busybody +casemaking clothes moth +ramrod +gearbox +birdcall +Wiffle +thwart +beauty consultant +chicken paprika +trawl +skep +spirometer +hopper +kvass +doggie bag +bath chair +showy daisy +wild tamarind +Tarsius syrichta +glyptics +Algerian +cargo area +bunk +Velveeta +iconoclast +clinch +New Caledonian yew +false mallow +Japanese tree lilac +convex polyhedron +water boatman +cruise missile +finisher +colonoscope +cumin +wickiup +saccharin +whipcord +trailer camp +eryngo +cuckold +yam bean +fighting chair +forewoman +galingale +citron +positivist +four-lined plant bug +suet pudding +field pea +Circaea lutetiana +deer grass +trap-door spider +common corn salad +mirror carp +sounder +second-in-command +seaside alder +burgoo +ming tree +curry sauce +courbaril +green alder +figure loom +fauld +halfbeak +squelch circuit +cladode +winter cress +tongue and groove joint +dwarf dandelion +joss house +western buttercup +welted thistle +potato tree +anglewing +cookfire +marzipan +hood latch +seed shrimp +common moonseed +toasting fork +bevel +three-quarter binding +midwife toad +stage director +Pentecostal +technical sergeant +golden-beard penstemon +drunk +silky oak +corn gluten feed +T-square +stoker +selling agent +cruse +server +rope-a-dope +bicorn +matzo meal +wide wale +roadblock +false foxglove +tuck box +bandsman +smoke bush +machinist's vise +Highlander +scholiast +self-starter +Swedish rye bread +spark transmitter +maverick +maquiladora +cabinetmaker +compress +rainbow shower +huntsman's horn +mackinaw +copper rockfish +lappet +nitrate bacterium +telephone plug +soutache +Dacron +toboggan +sissoo +yogi +laurel-tree +vice chancellor +Christ's-thorn +cartridge fuse +serial port +quassia +tarweed +pecopteris +beggarweed +anchovy pear +bookbindery +woodland oxeye +toad rush +sandalwood tree +marsh andromeda +Tyrian purple +boothose +tragedienne +fragrant cliff fern +festoon +bondwoman +melancholic +butternut squash +exhaust valve +semi-skimmed milk +glowworm +Virginia oyster +Identikit +ayah +gallows tree +Carioca +monoplane +jewels-of-opar +scallop +moth miller +marsh cress +lobed spleenwort +ricotta +emitter +arame +tub gurnard +army attache +maniac +organizer +pheasant's-eye +Melba toast +homeboy +Bavarian cream +Maximilian's sunflower +backstop +Tremella foliacea +yellow avens +spreading fleabane +plumb level +false rue anemone +zabaglione +climbing maidenhair +doeskin +walking shoe +lancewood +material +jacksnipe +South American poison toad +agonist +hinny +paper mill +psychophysicist +valley girl +toast mistress +jorum +tiler +chicken Tetrazzini +trivet +grasshopper +three-mile limit +kink +kiang +pole horse +jig +Cornish heath +hedge thorn +false alumroot +Popper +remount +photojournalist +sideroblast +stonecress +Agave tequilana +Japanese lilac +hawse +maenad +air bag +leaf spring +dwarf willow +soda cracker +contralto +moleskin +pilaster +Audubon's caracara +pia +American organ +bleu cheese dressing +betel palm +PC board +almond willow +socializer +tone arm +stammerer +free-liver +scaler +Gentianopsis crinita +leak +black haw +hound's-tongue +grass pea +Stassano furnace +coralbells +ministrant +perihelion +Luxemburger +powder-post termite +arboreal salamander +cushion flower +foramen magnum +pyrethrum +poacher +woolly mammoth +horned chameleon +tearaway +father-figure +tufted gentian +salmi +finger millet +physa +registrar +polyoma +bamboo shoot +matchlock +seine +congress boot +bulgur pilaf +monosodium glutamate +Kentucky wonder +mycologist +kedgeree +ragweed pollen +boarfish +yellow pimpernel +tan +northern Jacob's ladder +macrobiotic diet +migrant shrike +big-cone spruce +colonialist +white dogtooth violet +bath asparagus +webbing clothes moth +ladies' room +experimenter +prairie bird's-foot trefoil +bootleg +cognitive neuroscientist +fire chief +flagfish +dendrite +stinking goosefoot +fore edge +hogfish +Spanish cedar +hotel-casino +Tory +life-support system +pea flour +cash bar +Chenin blanc +white-footed mouse +Canada garlic +salt-rising bread +roomette +mastodon +bell founder +long iron +bi-fold door +fig-bird +European water shrew +dyer's weed +frog orchid +allosaur +Florida yew +wild potato vine +crape fern +flat-topped white aster +klebsiella +oil heater +waxmallow +enjoyer +mesocarp +semidesert +senior vice president +coccidium +burrawong +syllabub +jump suit +harrier +leaf roller +cherrystone +cinchona tree +touring car +eulogist +air force officer +red goosefoot +cat thyme +smoothbore +slugger +cardiac monitor +cobber +blister rust +musicologist +rolled biscuit +Braun's holly fern +hog plum +nonpasserine bird +pascal celery +damson +Jonathan +Sheraton +cohune palm +egg white +baton +sixth-former +Siberian pea tree +choanocyte +wineskin +auditor +detention home +Leichtlin's camas +Chartreuse +clusia +club car +wattle and daub +security blanket +common American shad +assistant professor +marsh pea +camomile tea +gopher hole +gravure +Freudian +spirillum +maharani +equilateral +crow garlic +mammee apple +felwort +hardtop +dillenia +curlycup gumweed +pilot engine +calcimine +wooly lip fern +bitter dock +wineberry +jumper +monolingual +spinning frame +old-timer +native cat +diving petrel +sodium-vapor lamp +marchand de vin +sexton +matelote +interior designer +windfall +mole salamander +minder +bodkin +neutron bomb +Caloscypha fulgens +slinger ring +mezzo-soprano +aura +Southern Baptist +viscacha +midfield +tie +prosthetist +round-headed leek +yellow mariposa tulip +canary grass +staddle +Tokay +Muenster +brazil nut +California black walnut +applesauce +penologist +virgin's bower +tenon +steward +Jerusalem oak +red-bellied snake +bindery +scow +fluid flywheel +bullhead +satinleaf +clove +double glazing +matron +wild parsnip +winged elm +shoot-'em-up +musk deer +white rust +lock +Cornishman +Vidalia onion +corn spurry +freeloader +justice of the peace +inlay +myxobacteria +tiglon +tangram +German ivy +scented fern +woolly daisy +caretaker +gastroscope +scuppernong +spotted sunfish +guilloche +codling +wormcast +Eskimo curlew +tayra +European fly honeysuckle +septuagenarian +third gear +coatee +red alder +water ice +cubitiere +frame buffer +gamboge tree +pernyi moth +chicken Marengo +Galliano +Lincoln +true sago palm +hunter's sauce +carpet beater +alpine goldenrod +arch support +vehicle-borne transmission +jilt +paternoster +redcap +Siberian larch +hoary plantain +swan's down +chicane +reverse +divan +kneeler +alexic +mock turtle soup +daffodil garlic +mission bells +squilla +ursinia +winter's bark +trifoliate orange +discina +frijole +Swiss steak +maildrop +knotgrass +dog fennel +drum sander +heroin addict +costume +camber arch +shining willow +lutefisk +red porgy +microfossil +good old boy +angle bracket +pitcher sage +bordelaise +heat exchanger +carrion +bush jacket +fanjet +coach +blackface +sicklepod +Manhattan clam chowder +daisywheel printer +olive +Sphacelotheca +Spanish needles +brown root rot fungus +boudoir +encyclopedist +V-8 juice +red haw +brass buttons +gym suit +skywalk +water wagon +gas-turbine ship +stoup +lisle +sailor suit +box beam +balm of gilead +housemaster +hayrack +neutralist +water elm +brook thistle +doyenne +nark +alpha-tocopheral +WASP +hydrilla +water-shield +footlocker +variola major +pargeting +ion engine +yellow globe lily +Malecite +bloodleaf +yellow sand verbena +whorled loosestrife +packinghouse +Carolina parakeet +Virginia waterleaf +armband +red rockfish +factory ship +moon trefoil +jump seat +water gillyflower +yerba mansa +chamfer bit +compass saw +hopsacking +Indian rhododendron +sickbed +treacle +honey eater +mailsorter +seabeach sandwort +sob sister +primrose jasmine +prince consort +elocutionist +wishing cap +runner +trestle +sugar water +half-and-half dressing +fringed poppy mallow +portiere +bung +swan orchid +weather satellite +beef broth +marblewood +sapper +agitator +wren-tit +grade +allspice tree +spacewalker +American hornbeam +sieva bean +dill seed +potoroo +love-in-winter +alembic +Cheshire cheese +small white aster +Oregonian +flipper +twill +differential gear +Prince Albert +licorice +foster-father +Melkite +portraitist +Yosemite toad +Cox's Orange Pippin +slender wheatgrass +knob +silique +Rocky Mountain bee plant +stirrup pump +chicken hawk +sweetbrier +Sierra lodgepole pine +poulette +biohazard suit +striated muscle cell +Geiger counter +World Wide Web +turmeric +prairie wake-robin +latchet +pushball +grill +shooting lodge +floating-moss +refried beans +boojum tree +red poll +toothbrush tree +rabbiteye blueberry +red haw +sweet vetch +delta +upland cotton +ballet mistress +padrone +complementary color +great Solomon's-seal +bud brush +brandy sling +spinster +Andorran +Mojave aster +mackinaw +golden calla +bottom rot fungus +segmental arch +periwinkle +hellion +topknot +copper +Mexican hyssop +weeping love grass +point woman +pathogen +fall cankerworm +common shiner +silverspot +corer +atomic pile +crystal detector +yellow spot fungus +truncated cone +saprobe +variegated horsetail +Cro-magnon +cercaria +aglet +pollster +oyster bed +pancake turner +egg cream +sporozoite +quirk molding +mutisia +sound bow +physic nut +sugar-bush +cow +magnetron +jungle hen +brassie +rock bit +taco sauce +seeded raisin +desert selaginella +folding door +vinegarroon +Pinot blanc +rye +ellipsoid +betel nut +tree of knowledge +ambrosia +long tom +breechloader +bicolor lespediza +cosmetician +monoblast +American oil palm +prancer +farina +caiman lizard +hardball +bullock's heart +cotton rat +whiting +weather ship +sharecropper +creamcups +gas bracket +divinity +ornithologist +yellow twining snapdragon +showy goldenrod +end man +heptagon +sand dropseed +round file +guama +blue elder +sand spurry +raccoon dog +zigzag goldenrod +fast reactor +arctic willow +cyclopean masonry +punter +sgraffito +slattern +storage ring +clipper +pulasan +short-tailed shrew +scammony +daybook +umbrella tree +coloring +element of a cone +gesneriad +cane +burgoo +western coral snake +friendship plant +Leydig cell +scrutineer +hairy golden aster +inclined fault +water milfoil +bryozoan +nardoo +native pomegranate +curly grass +Florence fennel +resurrection plant +ice water +crown +ploughman's lunch +clustered lady's slipper +kitchenette +sand sedge +pouched mouse +roadbed +parsley haw +predecessor +super heavyweight +seedless raisin +mailbag +sparling +codling moth +squama +Bercy +thermoelectric thermometer +Jaculus jaculus +saltpan +firmer chisel +round whitefish +ramrod +criollo +pinch bar +slash pocket +thigh pad +velvet plant +intergalactic space +brazilian ironwood +whaleboat +sirrah +hanging fly +aspirator +Dominican +dribbler +yellow-eyed grass +Cornish +geophysicist +tarmacadam +marchioness +rattlesnake orchid +Alaska Native +ilama +myrrh tree +zucchini +licorice root +nosebag +lounger +troposphere +virginal +spaghetti Western +Virgin Mary +waterwheel plant +dry nurse +enate +carpet shark +rijsttaffel +stuffing nut +caraway seed bread +Leotia lubrica +kaffiyeh +Boston baked beans +halophyte +backscratcher +instillator +trefoil arch +pip +digitizer +dosemeter +Carolinian +French sorrel +boards +historian +rangpur +clansman +goral +leatherjacket +coiner +fleece +white globe lily +storm cellar +roundhouse +mediatrix +butterfly flower +swamp gum +prairie vole +rhizomatous begonia +common tobacco +Marco Polo sheep +subarachnoid space +broomweed +safety net +silky wisteria +swagger stick +spectacled caiman +derris root +soap pad +chop-suey greens +summer hyacinth +palo santo +carbohydrate loading +chinch bug +roadman +sheep plant +messiah +desk officer +banquette +drugget +trumpet arch +great duckweed +purdah +heartbreaker +hasty pudding +alligator weed +dragee +yellow bristlegrass +Jacob's ladder +campstool +coffee fern +sweet fern +little chief hare +cat-o'-nine-tails +rep +American red elder +divorcee +black salsify +cambric +sennit +Canada ginger +wonderer +Formica +cream-colored courser +zooid +European beggar-ticks +sorrel tree +piddock +blolly +red-flowered silky oak +bay +Hooker's onion +dark horse +cone clutch +Roman hyacinth +paintbox +mestiza +green alder +bill +panicled aster +mammogram +snuffbox fern +Rediffusion +swamp fly honeysuckle +stoup +psychiatrist +nodding groundsel +student union +cold duck +bee beetle +playbox +Psychopsis krameriana +nosh-up +earthnut +narthex +single-rotor helicopter +revetment +sweetleaf +seasoned salt +piculet +speckled alder +mackerel scad +common yellowwood +devisee +static tube +Spanish heath +umbrella plant +fucoid +Chilean +coral-root bittercress +fanatic +cachou +agony aunt +bird's-foot fern +washwoman +torchbearer +placoderm +frosted bat +spicemill +Cape lobster +hard-shell crab +colonizer +camphor daisy +friar's-cowl +false tamarisk +toggle joint +tinsmith +theorist +hydrologist +loganberry +universal donor +northern whiting +tent-caterpillar moth +russet +kangaroo mouse +African scented mahogany +bastinado +breast implant +betel +grade separation +vox humana +stodge +Maryland chicken +Anguillan +oil pump +governor's plum +narcissist +deadwood +private citizen +winker +ropewalker +gidgee +Lothario +ski resort +major-domo +von Neumann machine +belaying pin +water parsnip +Fissipedia +luggage carrier +spring water +oyster stew +kohl +celesta +date-nut bread +punchboard +sunniness +hospital train +man +rack and pinion +mixer +pousse-cafe +narrow goldenrod +Maxim gun +stiff +recruiting-sergeant +watch glass +white hellebore +tung tree +prairie white-fringed orchid +beef Stroganoff +scoffer +grassy death camas +Shawnee cake +tapioca +Short's aster +banker +laparoscope +honeyflower +Caterpillar +electric clock +baling wire +huntress +Surinam toad +art school +incurable +Canton crepe +apple juice +hipline +bronchoscope +marshmallow fluff +Texan +wild fig +sawed-off shotgun +forestay +red kauri +fish slice +Egyptian grass +English walnut +brown sauce +ogee arch +nectary +chambray +leather flower +phloem +Persian violet +bomb calorimeter +western narrow-mouthed toad +soup du jour +sickle alfalfa +caracolito +periscope +coralberry +sword bean +sigmoidoscope +water locust +hygrodeik +sycamore +sheikdom +ballistocardiograph +clove +akee +fucoid +jacquard +cat's-ear +puritan +slender wild oat +smooth softshell +purchasing agent +landing craft +chartist +lace bug +sharksucker +Virginia chain fern +horseradish +namer +ripcord +personage +aspirin powder +puku +Wankel engine +nightcap +velvet bent +roridula +cytogeneticist +olm +almond extract +common heath +fringe-toed lizard +Kentucky yellowwood +lithosphere +cramp +bulgur +scurvy grass +officer's mess +frigate +electroscope +giant chinkapin +opah +rutabaga +wood hoopoe +Farley maidenhair +shingle tree +argentine +router +palm nut +quillwort +hiba arborvitae +runcible spoon +hireling +sickbay +alpine totara +white lupine +Cotoneaster horizontalis +desert plume +staghound +Sea Scout +opalescence +enophile +Jersey elm +coal house +Helvella acetabulum +selenium cell +white camas +creole-fish +auger +fragrant agrimony +research center +achromia +shank +cottonseed +mod con +extension +sugar beet +winter flounder +silky dogwood +strop +tokamak +rabbit ears +baby farmer +fireman's ax +serration +taproot +socket wrench +action officer +Chilean jasmine +Greek fire +stem-winder +body louse +lumpsucker +stink bomb +American lady crab +dicer +lie detector +maneuverer +black-headed snake +tiger moth +shooting stick +spermatid +babushka +deaconess +home +prior +chanfron +chickasaw plum +big-eared bat +rusty woodsia +tertigravida +miniver +combretum +habit +bluehead +angled loofah +gipsywort +fire-on-the-mountain +purple milk vetch +alpine gold +merozoite +loddon pondweed +Uniat +provost marshal +Gyromitra fastigiata +Coigue +proconsul +oarfish +San Jose scale +filature +chimney plant +spiny softshell +bluecoat +live axle +river limpet +clever Dick +pink bollworm +Japanese plum +roarer +caricature plant +wardroom +Texas chachalaca +Bahia grass +Moreton Bay tulipwood +accessory fruit +pearl barley +ashcake +bunt +Polynesian tattler +pine fern +laughing owl +potato fern +speaking trumpet +adjoining room +bearing rein +banana quit +redbrick university +Scleroderma bovista +magdalen +pressurized water reactor +advisee +NIMBY +poorwill +almond moth +comedian +star tulip +cracked wheat +water pump +guest of honor +yellow-breasted bunting +hire +pedate leaf +augur +purple locoweed +Socinian +upland white aster +guesthouse +double reed +detention basin +rollmops +hitch +bodega +mayeng +sparkplug wrench +attack dog +peach melba +heliozoan +tower mustard +blue mold fungus +lamplighter +banded sand snake +smooth crabgrass +elsholtzia +bodkin +Aegean island +bag lady +alewife +arcella +electrical contact +common ax +animist +concave polyhedron +coalface +climbing perch +yellowtail +hobble skirt +marquee +Russian dandelion +snow mushroom +polo ball +NADA daiquiri +cormous plant +chaparral mallow +inside caliper +milking stool +fallout shelter +sea gooseberry +Danish blue +grissino +chimney breast +mosquito fern +soundbox +spring chicken +epauliere +cape forget-me-not +japan +saddle oyster +white fritillary +push-button radio +bladder senna +bladder stone +macedoine +moire +Shawnee +starnose mole +douroucouli +horseradish sauce +electron gun +cotter +console +park commissioner +free press +lump sugar +western poison oak +apple maggot +keurboom +lisper +griffon +burin +horseshoe whipsnake +Jacobean lily +spinner +cochineal insect +emesis basin +sowbane +humanitarian +uakari +three-dimensional radar +wild hollyhock +heartseed +swinger +two-by-four +mop handle +common amsinckia +traitress +rush aster +fibrous-rooted begonia +violet-flowered petunia +milliammeter +alidade +azure aster +celery seed +snorer +scarlet plume +obtuse leaf +heathen +rose chestnut +headrace +dwarf buckeye +Pacific tripletail +wiggler +bounty hunter +Lowlander +slate pencil +typist +syconium +vaquita +skybox +business lunch +gusher +curacao +palometa +Diapsida +light diet +sourdine +thorny amaranth +potato fern +cartridge extractor +peshmerga +chaffweed +tahoka daisy +hematologist +massage parlor +diverging lens +breadroot +papyrus +amarelle +cover plate +hubbard squash +cryptomonad +whitetail prairie dog +rabbit burrow +orthochromatic film +goncalo alves +Chile bonito +tent-caterpillar moth +Manila grass +buck sergeant +mustard seed +crested wheatgrass +wise guy +asarabacca +field pea +bite plate +barbasco +heart-lung machine +mouse-eared bat +piping guan +gun pendulum +climbing onion +fungus gnat +Livonian +one-hitter +Chilean firebush +Sonoran whipsnake +round scad +myelogram +Rhodes grass +vomitory +roble beech +South-African yellowwood +molasses +Velcro +common calamint +radiation pyrometer +sketcher +chaparral pea +coffee stall +Australian nettle +bilimbi +Khedive +visionary +field spaniel +devilwood +collimator +Siberian spruce +sling +limestone salamander +ribbon worm +hazel +petter +coolant system +artillery plant +bailiff +chameleon tree frog +microsporophyll +maiden blue-eyed Mary +Drosophyllum lusitanicum +cocozelle +king post +nailer +knobkerrie +tovarich +Intelnet +worm lizard +drop forge +wool grass +brown bullhead +anthropoid +vitamin A2 +creche +hickory nut +whiffletree +deipnosophist +Muskhogean +masochist +hypsometer +gliricidia +complexifier +wild licorice +reconnaissance vehicle +fives +beefsteak plant +eastern dasyure +bookworm +crested coral root +wire recorder +cinnamon vine +bubble +Newfoundland dwarf birch +spruce bark beetle +teetotaler +fad diet +ascus +spicebush +African coral snake +soft-shell crab +Postum +packhorse +sand cherry +cricket-bat willow +middlebrow +Hungarian sauce +buffalo clover +jimsonweed +latanier +stablemate +jumper +zoospore +smooth woodsia +flowering ash +unilateralist +lomatia +flapper +wild cotton +Siberian wall flower +probe +bankrupt +blockade +lemon geranium +fig leaf +basic point defense missile system +clack valve +buttinsky +ingenue +mountain everlasting +zebra-tailed lizard +shaving-brush tree +evergreen huckleberry +core drill +lugworm +Cashmere goat +doorjamb +minelayer +student center +horsehair +European dewberry +white broom +arenavirus +eastern poison oak +rye ergot +Tupi +tensiometer +fleawort +coquille +icing sugar +junior lightweight +Doppler radar +mahuang +candlepin +chambermaid +evergreen blueberry +Eton jacket +parvis +solleret +molded salad +malvasia +birth-control campaigner +nonagon +backswimmer +ogee +bowstring +salt marsh mallow +trapezohedron +hoary willow +speech therapist +Zinjanthropus +core +red-backed mouse +eptatretus +mossy saxifrage +Aristotelian +Thessalonian +searing iron +bifocals +falangist +field pea +packsaddle +lay reader +hoecake +cuboid +white maire +iceman +lobscouse +neckcloth +color-blind person +Chinese holly +assemblyman +white-lipped peccary +kava +plastron +crab louse +hook wrench +trailing four o'clock +junior +skilly +internet +tonguefish +footman +sub-assembly +evangelist +track +bench lathe +desk clerk +scalded milk +chamois cloth +American marten +chachka +nondescript +pellitory-of-the-wall +swamp candles +procurator +cuddy +farkleberry +mountain male fern +trawl +dual scan display +fish meal +prospector +convener +guano bat +ant shrike +picture rail +sand rat +gynophore +quilting +sleeper +summer savory +Cotoneaster dammeri +smooth sumac +slumgullion +suite +catalufa +spherule +lean-to tent +gryphon +gas shell +short iron +sweet sultan +dewberry +Victoria plum +American water shrew +X-ray tube +macebearer +green arrow arum +abbe +poke milkweed +atheist +Fosbury flop +Ord kangaroo rat +moldboard +wheat germ +explosive trace detection +whippoorwill +examiner +tallyman +Crookes tube +wild peach +fringed grass of Parnassus +Crookes radiometer +Atlantic croaker +lobster stew +spring cress +maggot +pacer +hydra +Zionist +pepper tree +diamante +baize +Rhodesian man +county agent +respecter +Anglican +antimacassar +materialist +Swan River everlasting +cloud grass +toll line +C battery +chinese mustard +grass poly +warming pan +seasonal worker +common sickle pine +bathysphere +elegant Habenaria +card table +Chilean cedar +brocket +collimator +malted milk +avadavat +fire marshall +coloratura +yellow spiny daisy +fingerstall +narrow-leaf penstemon +indigo broom +pillwort +bearberry willow +Etonian +certified milk +climbing bird's nest fern +field coil +wrist pad +parr +kaoliang +engelmannia +stocker +satrap +Nantua +spearfish +caper tree +gold-tail moth +mountain chinchilla +sea milkwort +westerner +army cutworm +leaf-nosed snake +neurobiologist +xeranthemum +Eastern silvery aster +ecclesiastical attire +caper +Ukranian +bight +button fern +peach pit +oligodendrocyte +maar +digitigrade mammal +streptobacillus +sensitometer +preemptor +oat +bell foundry +crown lens +rock purslane +Junior +Brazilian guava +kicksorter +Ohio goldenrod +red mulberry +King's Counsel +mountain four o'clock +fairy shrimp +fell +oca +sycophant +chantry +dermatoglyphic +bomblet +keyhole saw +hangman's rope +little barley +lion-jaw forceps +giant scrambling fern +popper +dulcimer +Espagnole +tardigrade +smooth-haired fox terrier +bullbrier +rewa-rewa +Japanese poinsettia +trunk line +cannery +helminth +American spikenard +prince's-feather +arthroscope +ginger +aphakic +pilot bit +angle of refraction +low-sodium diet +wall creeper +growler +praetorium +Hall of Fame +soupfin shark +Molotov cocktail +kaffir boom +stitcher +sawwort +flagellant +Atlantic herring +Reticulitermes lucifugus +voltaic pile +snowy orchid +southern flounder +skysail +osage orange +white mullein +lined snake +tolu tree +poliovirus +foreman +burette +jackass bat +invigilator +electromyograph +acarus +presence chamber +columbian mammoth +hyacinth bean +pilot +meadow jumping mouse +Maria +outskirts +aftershaft +Queensland nut +schlockmeister +plainsman +afropavo +scarlet musk flower +five spice powder +gunboat +multiplex +Dutch uncle +louvered window +chimney corner +cuscus +psalmist +Vichy water +signer +amphiuma +harmonizer +authorizer +naiad +control rod +stentor +mountain bladder fern +gig +read-only memory chip +assenter +vixen +hermitage +corn dab +locksmith +cockspur thorn +variable-pitch propeller +western red-backed salamander +dolman sleeve +cultist +sweet buckeye +pine vole +Peking man +mountain swamp gum +nimblewill +bethel +aye-aye +lancelet +teff +Alpine celery pine +endive +nipa palm +center of curvature +seeder +Sabahan +sea scallop +social secretary +gorgonzola +western chokecherry +misanthrope +rabbitweed +beggarman +button fern +white mallee +doodia +mastiff bat +roper +prima donna +blanc +holding pen +fingerling +skyhook +flophouse +steam chest +crystallized ginger +acrocarp +horse pistol +true mahogany +costmary +ballistic galvanometer +jaunting car +bartonia +rep +mandibular notch +bubble and squeak +umpire +fringed loosestrife +bear oak +ski jump +staggerbush +plumcot +thermal reactor +field brome +bodkin +jackknife-fish +malope +writing arm +gold fern +Stayman Winesap +merlon +eclectic +fluxmeter +emeritus +imam +drum +pop tent +capital ship +subalpine larch +flail +Lorenzo dressing +tomboy +eastern woodrat +warrantee +Pacific spiny dogfish +sheepshead porgy +farthingale +Cryptoprocta +power loom +communicant +howdah +ectomorph +false foxglove +basset horn +odd-pinnate leaf +Wisconsin weeping willow +Queensland bottletree +dampener +corbel arch +silent butler +Circe +town clerk +Japanese chestnut +bloodwood tree +switcher +cup hook +spreader +rice rat +straightedge +traverser +fluid drive +Spanish paprika +sour milk +poison camas +bean dip +card table +vinegar fly +vizier +electric-discharge lamp +purple rock brake +dynamo +Japanese snowbell +Grindelia robusta +neuroglia +safflower seed +coronet +frown line +Renaissance man +Steller's sea cow +book scorpion +isosceles triangle +arthritic +spherical triangle +kangaroo mouse +garden orache +stemless hymenoxys +titi +out-basket +gent +columnea +mint sauce +mouthbreeder +Liebig condenser +cheerer +assegai +stickler +Merostomata +dimmer +grey poplar +common heath +scorzonera +glory hole +Blackfoot +oil slick +musketeer +apple geranium +daisyleaf grape fern +gas furnace +bijugate leaf +Arabist +star-thistle +hand throttle +huckleberry oak +lift pump +maulstick +Rome Beauty +Newburg sauce +pit +volunteer +Baldwin +ark +Asian horseshoe crab +black calla +marlinespike +Gentianopsid procera +guinea gold vine +tucker-bag +desk sergeant +piezometer +migrator +keelson +executrix +sackcloth +onion smut +buckboard +substitute +pudge +mess +cinchona +intervenor +gravimeter +pederast +censor +gastroenterologist +cutlassfish +launch +demerara +Diegueno +bog bilberry +aglet +soda fountain +crank call +harpoon gun +ribbon fern +Gurkha +output device +epilating wax +greasewood +water horehound +return key +fairy swallow +spatulate leaf +culverin +leptocephalus +kleptomaniac +barley water +bleeding tooth +Cheyenne +maleberry +limber +tapenade +whorled aster +toe +revenant +lap joint +vein +truant +florest's cineraria +morning dress +trichodesmium +nightshirt +element of a cylinder +shopaholic +section hand +electrodynamometer +Guadalupe cypress +rosebud +racist +avaram +keeled garlic +Alaska rein orchid +orange toast +cunner +dipstick +Neolentinus ponderosus +bulbil +charlotte +pull-through +header +Manduca quinquemaculata +persona grata +elegist +cafe royale +scup +semanticist +wood sage +field magnet +tundra +bay myrtle +alluvial flat +arrowleaf groundsel +celtuce +baryon +must +entrant +othonna +pied-a-terre +liza +sticky aster +grasshopper mouse +prison guard +tire iron +bomb rack +Spanish American +sheltered workshop +turfing daisy +backbone +tangle orchid +creeping willow +dumb bomb +horse cassia +barosaur +Yavapai +shrimp Newburg +peanut worm +dwarf chinkapin oak +corchorus +brick cheese +by-catch +stover +Urnula craterium +clasp +Kekchi +alpine coltsfoot +soybean future +altar wine +ripping chisel +encephalogram +mountain spleenwort +transferee +remoulade sauce +American rock brake +stenographer +read/write head +loblolly +ground +powdered mustard +brake band +sea dahlia +freak +proconsul +Coffey still +Sivapithecus +pellitory +palm cat +skew arch +American angelica tree +vigilante +candelilla +andryala +amarelle +swiftlet +petcock +associate professor +sclerite +open circuit +Virginia crownbeard +Last Supper +button tree +scyphozoan +margate +mercury cell +horsewhip +water scorpion +companionway +drop cloth +Amhara +miraculous food +pro-lifer +embryologist +Creole +bombazine +Indian blackwood +cubeb +trace detector +gros point +main-topsail +meringue kiss +spree killer +capstone +specimen bottle +woolly apple aphid +silverweed +American barberry +gallfly +European bog asphodel +northern flying squirrel +alliterator +Old Catholic +heliograph +Pteris cretica +tippler +pump well +allspice +balancer +scarlet bugler +lantern fly +white prairie aster +krummhorn +robin's plantain +Pacific sardine +patty-pan +decaffeinated coffee +western saxifrage +warrantee +colorimeter +ball bearing +makomako +foot +troika +apricot sauce +data multiplexer +rose-root +sound film +Northern dewberry +water hickory +swing door +spastic +Oligoporus leucospongia +botulinus +tamale pie +Sagittarius +muff +spicebush +petiolule +pump action +Parry's pinyon +split-pea +rudder blade +princess royal +wormseed mustard +honey guide +pip-squeak +fin keel +foretop +cyrilla +Navaho +melanocyte +deist +silver tree +citrus whitefly +Morrow's honeysuckle +green peach aphid +longanberry +call-board +wild yam +novelist +toothed spurge +alienee +pond apple +allspice +Carolina lupine +Jack of all trades +white false indigo +boiled dinner +princewood +sailor's-choice +false bracken +microbrewery +black grama +tutee +brickkiln +sea raven +guesser +wirework +European lemming +thyrse +plains lemon monarda +milo +shunt +spotted cowbane +anchovy sauce +grande dame +Maryland golden aster +Chinese puzzle +boarfish +burweed marsh elder +defense contractor +nitric bacteria +Belgian hare +beach plum +conformal projection +sand fly +steering linkage +quickset +Mahayanist +Geiger tube +loudmouth +Lancastrian +brownie mix +ex-spouse +deltoid leaf +Shasta salamander +rabbet joint +purple anise +garibaldi +gebang palm +bladderpod +Host +great bowerbird +string cheese +spinning jenny +drift net +matriarch +guar +bitter betch +panda car +mess +plains pocket mouse +scarlet wisteria tree +deerberry +reamer +homing torpedo +molehill +stockyard +reniform leaf +rag +symmetry +Texas star +lerot +pickle relish +three-seeded mercury +cotter pin +ice-cream bean +farmyard +bar magnet +hansom +prickle cell +renal cortex +pest +Ultrasuede +sailing master +brougham +wastrel +amboina pine +Canary Island hare's foot fern +ninepin ball +southwestern lip fern +usherette +lemon drop +star begonia +weeds +saltworks +Persian melon +corbina +medusa +bucksaw +Gibson girl +diameter +American twinflower +kino +clear liquid diet +angiocardiogram +wetter +oyster cracker +yellowfin mojarra +wild parsley +life tenant +broom closet +Corynebacterium diphtheriae +square shooter +bedwetter +ball-and-socket joint +nonsolid color +Salmonella typhimurium +buffel grass +hip pad +subaltern +heliothis moth +trail boss +hayloft +Francisella +primordial dwarf +cock-a-leekie +sugarplum +propulsion system +tyrolean +Carib +salai +ketembilla +ironclad +cornhusk +heckler +multistage rocket +north island edelweiss +Chaldean +twenty-two pistol +Francophobe +scofflaw +sickle feather +screw bean +sea squill +Scopolia carniolica +agglomerator +western holly fern +presenter +straight pin +Myxine glutinosa +Colbert +clover-leaf roll +war paint +bird's-eye bush +longfin mako +running suit +arrow wood +margrave +blue fleabane +dracontium +plastron +chimney swift +child prodigy +commissar +turtle soup +postulant +archaebacteria +snakefly +Pitot tube +chap +smilo +Malthusian +French roof +worm wheel +gulag +pointed-leaf maple +pull-off +Cathaya +American green toad +ball cartridge +infiltrator +snowfield +crotchet +auxiliary pump +bearnaise +galax +chaenactis +olympic salamander +sundowner +cows' milk +beach plum +moss-trooper +Arabidopsis thaliana +cat's-claw +bog rosemary +ribier +book agent +bumper jack +beefwood +monk's cloth +alpine bearberry +climbing fumitory +cucking stool +puka +Piltdown man +property man +discharge lamp +X chromosome +knobble +lobster Newburg +herbalist +sunray +golden saxifrage +leopard cat +muffle +stonewort +blancmange +intraocular lens +trepan +desert mariposa tulip +plume poppy +Dane +martynia +shaver +white milkweed +napu +tansy-leaved rocket +abortus +telemeter +tansy mustard +harpy +honeysuckle +ironworks +testacean +Tartuffe +silvervine +Sihasapa +surface gauge +western blind snake +paramyxovirus +Icelander +bird louse +stockbroker belt +test-tube baby +ague root +little golden zinnia +dietician +elephant's-foot +dirty bomb +sailing warship +brier +tinter +Connemara heath +potato fungus +bait casting +decagon +rosefish +die +high-pass filter +solitaire +widow's walk +goldthread +Tudor +trews +orange pekoe +ninon +soda jerk +sump +flying carpet +burial garment +oblanceolate leaf +press gallery +Shintoist +three-centered arch +spreading pogonia +Moro +foxtail orchid +Ghanian +dry kiln +thane +naranjilla +bitter pea +American bugbane +apron string +oyster fish +Port Jackson fig +prize winner +high-water mark +Oneida +smoking room +potato skin +charge d'affaires +gantlet +amyloid plaque +barmbrack +mate +arrow leaved aster +handbarrow +horned screamer +virago +linoleum knife +rattlesnake root +K ration +reset +foot brake +red coral +good guy +aberrant +lavalava +poleax +garden webworm +sneezer +mountain heath +American dog violet +eolith +chimneysweeper +matriarch +smalltooth sawfish +sea mouse +tubercle bacillus +superconducting supercollider +Abney level +darnel +gherkin +celery salt +Tungus +pulasan +oriflamme +death camp +redhorse +apprehender +scion +selectwoman +pentahedron +principal +old school tie +slice bar +chanar +pimento butter +wailer +zero +mescal +rosebud orchid +stone bramble +Jarvik heart +NOC +pitchman +rat cheese +strawberry tomato +dwarf golden chinkapin +landau +tocsin +ampulla +scratcher +crab Louis +ginseng +ripcord +polluter +tensiometer +eyewitness +aalii +Oregon crab apple +conservator +day jessamine +hexahedron +suture +tippet +linsey-woolsey +vernal witch hazel +stainer +egocentric +canistel +nudger +shipping agent +shortleaf pine +battle sight +cheese spread +weeder +incendiary bomb +honeyflower +stovepipe iron +stepper +hellgrammiate +votary +aflatoxin +arquebus +impulse turbine +pipewort +garrote +glow lamp +pigsticking +blood clam +surface search radar +Bolshevik +platen +chariot +Gentianopsis thermalis +water level +quandong +catalytic cracker +giant foxtail +nut butter +drainplug +holdover +coastguardsman +Secretary of Health and Human Services +Seeing Eye dog +American plaice +coquilles Saint-Jacques +christella +medium +clingfish +lally +light-o'-love +Gentianopsis detonsa +taper file +signal detection +trip wire +lignosae +receiver +sedan +mud puppy +corn sugar +Philippine mahogany +magnetic pole +jointed rush +trapper's tea +Dorking +welcome wagon +clammyweed +guard +false azalea +convalescent +babassu +dedicated file server +colossus +air search radar +marquess +straight flute +sand stargazer +sea catfish +rosilla +ripsaw +Bermuda onion +peach sauce +sagebrush mariposa tulip +yashmak +Virginia mallow +erose leaf +sand blackberry +boulevardier +forester +choragus +onion mildew +threadfin +winged pea +sugar daddy +rotary press +styracosaur +rathskeller +Japanese millet +anchorite +coral drops +false gavial +eastern pipistrel +cheese press +Chinese primrose +pamperer +real estate broker +power worker +breeder reactor +nutcracker +piano wire +cushaw +Sinanthropus +firebreak +kelp greenling +herba impia +toll call +yoke +bird fancier +evening-snow +fever tree +reed meadow grass +flanker back +toggle bolt +Santa Cruz cypress +carbonnade flamande +northern dune tansy +mikado +millettia +forty-five +court +icepick +holm oak +Japanese angelica tree +Pacific cod +cant hook +urologist +spelt +lekvar +enologist +Mediterranean flour moth +prickly-edged leaf +Spanish grunt +dune cycling +frostweed +whisperer +tucker +Roman wormwood +counterterrorist +woolly alder aphid +Nuttall oak +snail butter +threshing floor +motley +forge +water mold +mummichog +sulfur paintbrush +head +walking delegate +jujube +peachleaf willow +Christmas bells +valley pocket gopher +bear's-paw fern +Lanthanotus borneensis +pearl hominy +placeman +swage block +offerer +stargazer +jeweler's glass +male chauvinist +crossbar +Oktoberfest +tamarau +micronutrient +large-leaved aster +tasset +tepary bean +sausage curl +ivy +snob +roller towel +wood meadowgrass +archil +padrone +prairie rocket +tongueflower +kidney fern +Carolina buckthorn +sea island cotton +landscape architect +realist +oyabun +mother hen +ostracoderm +esker +heliophila +nympholept +shining clubmoss +press agent +clam dip +Djiboutian +white currant +codfish ball +hand cheese +kraal +trident +conventicle +bacteroid +Indian plantain +quandong +kola nut +signor +theater light +musk clover +canistel +silent partner +steel-wool pad +diggings +affluent +sightreader +John Doe +arrowworm +goatsfoot +guardroom +wild cinnamon +kaffir boom +ink eraser +yardie +industrialist +sea lily +polarimeter +Polistes annularis +western big-eared bat +omnivore +Ted +horsecloth +crab cocktail +vacuum chamber +flower-of-an-hour +bilge +poleax +neolith +Montezuma +plum-yew +welfare case +trave +pipe bomb +shading +Centigrade thermometer +bangalore torpedo +celery top pine +nuclear rocket +fowling piece +anti-Semite +landscape +derris +bush honeysuckle +Mediterranean water shrew +ticket collector +masked shrew +white dipladenia +Savoyard +bondman +tempter +pygmy cypress +pentathlete +thruster +usurper +Arminian +yerba buena +ice field +ichthyosaurus +sackcloth +bean tostada +Oxbridge +Pteropus hypomelanus +thinker +bank robber +ape-man +thurifer +knawel +mule fat +hot spot +hairy-legged vampire bat +night raven +hook and eye +crocodile bird +skunkweed +beaver rat +cypress sedge +florida selaginella +April fool +Jonah crab +glass wool +corkwood +dwarf elder +hinging post +gentile +Brazilian trumpeter +witch doctor +thermograph +pink shower +Mao jacket +capelin +parang +bradawl +stooper +jewel orchid +citrange +oarswoman +Macedonian +particolored buckeye +pachycephalosaur +satinwood +Chinese brown sauce +peep sight +straight man +quandong +chamois cress +nonfat dry milk +rosin bag +Leiden jar +Grimes' golden +spirillum +grass vetch +carillonneur +downy wood mint +melon ball +sweet calabash +chlamydospore +bombshell +sidewall +sprig +Indian button fern +globe pepper +rough-stemmed goldenrod +bocconia +bubble chamber +sand dab +plum-fruited yew +aecium +marrowfat pea +hobbyist +whipper-in +salad burnet +neckband +Tangier pea +sauce Louis +salad burnet +artist's loft +koumiss +Nazarene +cutter +scrim +drape +crab-eating dog +deckhand +bedroll +gaff +stifler +pink lady +great plains paintbrush +patternmaker +yoke +caryophyllaceous plant +angrecum +quadriplegic +grid +genlisea +aspic +water table +junket +signore +Mutillidae +proprioceptor +pivoting window +Indian poke +synchroscope +trichion +tarahumara frog +proctoscope +abomination +purslane speedwell +breast drill +Japanese barberry +mandrake root +breakable +salon +American watercress +take-up +entrenchment +cocktail sauce +Scotch asphodel +borough +matchmaker +Seneca snakeroot +pointsman +psephologist +clustered poppy mallow +onion thrips +nuclear-powered ship +organizer +deciduous holly +balsam willow +enzymologist +caraway +drip loop +dog laurel +Orangeman +sapsago +polymath +backplate +leathery grape fern +modillion +two-timer +handhold +consignee +white stringybark +nettle-leaved goosefoot +bookmaker +disk drive +doliolum +palmist +packinghouse +Spandau +Whipple's penstemon +sword grass +ribbon development +pearly-shelled mussel +winter heliotrope +rogue elephant +deck tennis +Venus's flower basket +football +shim +boatswain +blinks +armored catfish +hooded seal +outdoorswoman +water starwort +upholstery needle +pleurodont +silky anteater +cornmeal +lead-in +redfin pickerel +horse balm +Rydberg's penstemon +cascade transformer +fly poison +Volvaria bombycina +broad-leaved twayblade +pastry cart +body plethysmograph +waverer +hardware store +Parry's penstemon +European sanicle +strawberry geranium +cross-examiner +head gate +devil's tongue +hemiepiphyte +pine hyacinth +machmeter +spirit lamp +field judge +Rock Cornish +mayhaw +Sassenach +bog pimpernel +parallel interface +crowberry +roach +Aegyptopithecus +cajan pea +lapboard +cryostat +magnetic storage medium +white yam +Lombard +rhymer +bed and breakfast +bunya bunya +rifle grenade +caterer +collared pika +anti-submarine rocket +bookkeeper +Western mountain ash +profit taker +fruitlet +Knowlton's cactus +infernal +beefsteak begonia +lunula +emulsion +intermediate wheatgrass +titfer +European sea bream +bigeye scad +yak butter +kola +cone pepper +plesiosaur +ragwort +penal colony +black carpet beetle +lubber's hole +Stapelias asterias +yard marker +balloon bomb +Scythian lamb +armory +selsyn +marblewood +spirula +fatalist +hash head +armiger +Dom Pedro +white-chinned petrel +ballast +orthopter +greater water parsnip +clutch +largeleaf holly +Evangelist +king whiting +tuna fish salad +Muscadet +surpriser +jumping bristletail +proportional counter tube +Hamburg parsley +obstructionist +pus-forming bacteria +creep feed +stepbrother +janissary +control freak +trusty +trepan +King William pine +orthicon +geological horizon +molecular biologist +violator +pariah dog +Austrian +conciliator +Fauntleroy +packing needle +mazer +Saturday night special +leucocytozoan +coastal rein orchid +whirligig beetle +capitalist +breeches buoy +clubroot fungus +meadow spikemoss +Kichai +Spanish lime +land office +camera obscura +strafer +purple-stemmed aster +lusterware +valve +Roman nettle +isthmus +breadstuff +sealskin +maleo +bilge keel +carissa plum +fish fly +kolkhoznik +heath pea +cowage +hog sucker +Sam Browne belt +inductor +wild licorice +Socotra begonia +supernumerary +Angle +red shrubby penstemon +toilet kit +tawse +sweet bells +kawaka +brown soft scale +lyssavirus +betting shop +double-crosser +macrotus +climbing hempweed +poi +strip mall +deadhead +petit juror +tract housing +American mistletoe +lace-flower vine +precipitator +endoparasite +hairy wood mint +red snapper +Victorian +hog peanut +line of heart +opossum shrimp +plumcot +Bavarian blue +slops +light flyweight +oregano +sand myrtle +pocket battleship +curator +narc +hydraulic cement +plains pocket gopher +closed loop +pluralist +molter +Christmas bush +snuffers +slender knapweed +footwall +plage +caper tree +red siskin +tender +boat train +tipster +low-pass filter +student lamp +morosoph +japonica +bellows +herald +oyster plant +savory +mail +computational linguist +blade +winter crookneck squash +zoomastigote +blackmailer +richweed +dialectician +genip +plumed scorpionfish +jet bridge +thermopile +billy buttons +Brule +millwright +Arenaviridae +Jones' penstemon +monastic habit +genipap fruit +burnous +dairyman +top +crab-eating raccoon +quadrangular prism +pilot burner +weeder +trireme +boy wonder +man of letters +Catawba +high-muck-a-muck +light circuit +bloodworm +lappet caterpillar +half-and-half +office boy +saddle stitch +mistletoe cactus +false chamomile +Catalina cherry +workhouse +Jamaica quassia +britches +tooth shell +reduction gear +carrot pudding +balsam woolly aphid +handspike +aioli +silver hake +flour bin +wireman +gas-cooled reactor +aficionado +plus fours +gitano +gene chip +oilfish +ingenue +tulip orchid +late purple aster +pork and beans +envoy +lemon extract +milk bar +black huckleberry +ground roller +Connecticuter +siderocyte +Jacquard loom +chub +meat safe +stock cube +Australian sumac +purple sanicle +tailless tenrec +dog wrench +rainbow cactus +castor bean +scintillation counter +eohippus +pawnbroker +gauge boson +front man +early warning radar +bearing wall +Bourbon +sandwichman +sild +gravelweed +perishable +cembra nut +riflebird +quicksand +slate +sweeper +ship-towed long-range acoustic detection system +defamer +president +vitamin K3 +challis +tanekaha +bloodwort +grenadier +quietist +Zairese +fucker +foremother +gesneria +print buffer +salsilla +fissiped mammal +fender +consulate +acidophilus milk +Southern dewberry +snail darter +Panama redwood tree +dehydrated food +bush willow +coffee fungus +Sinologist +Mesoamerican +hood +large civet +deck-house +cyborg +smuggler +pepper sauce +cyberpunk +Grand Inquisitor +persona non grata +haggis +weeping tree broom +stop bath +modifier +coyol +conodont +yellow giant hyssop +optical pyrometer +Carolina moonseed +marinade +aspartame +false wintergreen +cityscape +philter +turnery +hemiplegic +chuck-will's-widow +vower +track star +myrtaceous tree +small civet +intelligence analyst +dogcart +yardman +cross bit +holometabola +platen +sweet cassava +Comstock mealybug +acute angle +Communist +alcohol thermometer +mountain hollyhock +Mead's milkweed +highjacker +Townes +congou +Astrophyton muricatum +lazybones +roughcast +pressure cabin +clinch +cinnamon +smoke bomb +quandong +tout +office-bearer +punctum +efficiency apartment +Queensland hemp +Ceylon bowstring hemp +newswoman +vermin +fetid bugbane +grantee +sanitary landfill +gluten-free diet +clabber +shillelagh +white lettuce +sweet coltsfoot +beggar's lice +samite +loser +flasher +water star grass +banana passion fruit +translator +artificial kidney +Virginia creeper +American crab apple +cactus mouse +nebbish +Ligustrum obtusifolium +vox angelica +stringer +hunter +know-it-all +scene painter +invalidator +jungle cock +basilica +coriander +California single-leaf pinyon +miles gloriosus +pina cloth +law agent +scarlet fritillary +keurboom +bailor +ramjet +seedling +rib joint pliers +ways +picket ship +Surgeon General +wasabi +marquis +clostridium perfringens +Helvella sulcata +furnace lining +kingwood +painted sandgrouse +plain wanderer +Indian madder +silver screen +bailey +dwarf spurge +Serbian +ball-buster +shaheed +Platte River penstemon +tensiometer +mute +nymphomaniac +Yokuts +arroyo willow +whipping post +class act +load +winged everlasting +periodontist +diarist +robber frog +diestock +curry powder +ratchet wheel +store detective +hog plum +prune whip +shortwave diathermy machine +Anabaptist +post chaise +Kennan +bean caper +delegate +orderly sergeant +celtuce +jumping bean +gowen cypress +puddingwife +registered nurse +West Saxon +rosita +gun room +nasotracheal tube +matchboard +flagship +Boswellia carteri +Canadian pondweed +wonder boy +sewer rat +dimetrodon +pantograph +marsh bellflower +angoumois moth +slippery dick +woolly indris +creme de cacao +dulciana +Jewess +Macadamia integrifolia +least shrew +don +diffuser +black-stem spleenwort +grouseberry +goniometer +annotator +sticktight +gossip columnist +speechwriter +capon +rock hind +Liederkranz +chandler +echocardiograph +sidelight +fisher +brocket +New Zealand daisybush +northern sea robin +roller bandage +peachick +pellet +pichi +plug fuse +spark coil +buckwheat +brood bitch +wedgie +dwarf bilberry +filigree +bull +queen +dodo +Salish +denticulate leaf +Western silvery aster +Prima +magnetic bottle +fetterbush +process-server +nainsook +mythologist +Piedmont glacier +hammerhead +niggard +Mound Builder +Kui +Nootka +highbinder +passenger pigeon +oblong +tickler coil +agnostic +succorer +esophagogastric junction +dressmaker's model +bombshell +social anthropologist +gildhall +orpine +pterodactyl +bristly sarsaparilla +Lane's Prince Albert +hognose bat +salesgirl +lubricating system +electric catfish +wrap +Jacksonian +chard +cherry laurel +foreground +beadsman +Kolam +amniote +frozen pudding +acid head +poor box +depositor +coattail +pallas's sandgrouse +mason's level +English lady crab +skeg +cruel plant +petrolatum gauze +tuna +swivel +stock-in-trade +perisperm +civies +Phyllostomus hastatus +alienor +Verdicchio +guard's van +onion butter +moviegoer +planter +citrange +box huckleberry +iconoscope +familiar +helmsman +baby boomer +constructivist +American bog asphodel +whorled caraway +simple pendulum +viviparous eelpout +Job's tears +holdout +sour salt +poison bush +dusky-footed woodrat +golden algae +granadilla tree +telethermometer +crossbar +thrift +African bowstring hemp +dog in the manger +hayrack +gold-crowned kinglet +prolonge +doge +pencil +discount house +mulligan stew +Nonconformist +virologist +gregarine +facula +rocket scientist +thin-shelled mussel +oospore +annual salt-marsh aster +Afrikaner +metallic +julienne +culverin +cleavers +Berliner +mudhif +thorny skate +brown lemming +yellow colicroot +cooling system +large-leaved magnolia +free-reed +canyonside +preemptor +stake +Brucella +anti-G suit +pleximeter +squire +salsilla +write-in candidate +lowland burrowing treefrog +flare star +dwarf hulsea +jobber +mangel-wurzel +quagga +red-skinned onion +positive pole +Pteropus capestratus +jug wine +stomacher +standee +bladder worm +hakim +house of correction +pelisse +golden mole +temporizer +rose apple +drove +umbrellawort +holy of holies +lawyer cane +smooth lip fern +anode +astatic coils +zip gun +feverroot +self-heal +expansion bit +salt reed grass +field pussytoes +nutmeg hickory +cryptic coloration +Venus's girdle +Hunkpapa +Calostoma cinnabarina +raft foundation +May apple +pygmy mouse +prokaryote +yellow-green algae +Bermuda maidenhair +withdrawer +coelacanth +Elliott's goldenrod +driftfish +epicyclic train +bowl +swamp dewberry +corbel step +sadist +party line +anti-American +mining engineer +Amur privet +conidium +Gastrocybe lateritia +lithia water +chaulmoogra +Rough Rider +Guinea pepper +glade mallow +pitcher sage +whitecup +shanghaier +low St Andrew's cross +phonologist +cocobolo +perfumery +visor +prison chaplain +belt +ingesta +literary critic +industrial watercourse +reckoner +pursuer +Kinetoscope +Kuiper belt +hyperope +raw recruit +Galiella rufa +Prince Albert yew +slit trench +usher +tenderfoot +white-rayed mule's ears +browser +piccalilli +bran +giant buttercup +water lobelia +arborescent plant +echinus +dryland blueberry +struggler +platyctenean +Geordie +domatium +twenty-two rifle +keteleeria +sports editor +chorus girl +Hakham +dry-bulb thermometer +onomancer +double-bitted ax +Girondist +bottle bank +thyrsopteris +bandwagon +star anise +armored car +dhawa +Bessemer converter +mutineer +paradise tree +tupik +centurion +mending +chowchow +margrave +International Grandmaster +African hemp +catafalque +leptodactylid frog +forcemeat +tank shell +pill +barbecue pit +worthy +lady's maid +evergreen +Jesuit +South American staghorn +rigger +suffragan +imperialist +spherical angle +grey lemming +kitchen police +tree swift +coliphage +archaist +Conservative +rib +exegete +Mendelian +tragedian +steerage +Paleo-American +obeche +garlic +grapefruit peel +accommodating lens implant +half blood +barrelfish +catgut +lanceolate spleenwort +hardliner +frieze +name dropper +carrack +huckster +onion bread +magnetic head +pease pudding +raisin moth +negative magnetic pole +electroencephalograph +bunji-bunji +synchroflash +Mornay sauce +stencil +winged pigweed +Nesselrode +MEDLINE +licorice +mainspring +melilotus +duke +experimenter +Napier's bones +four-minute man +pin-tailed sandgrouse +toolmaker +pogge +rootstock +baton +pricket +creeping snowberry +anomalops +nester +devourer +apolemia +Maricopa +pine-barren sandwort +larvacean +American dewberry +escalope de veau Orloff +gig +myrtle +pitsaw +Lutheran +fish house punch +gnathostome +intake valve +molasses taffy +clammy locust +vandyke beard +Atlantic tripletail +planktonic algae +estradiol patch +flummery +cytologist +sectarian +oil meal +tomtate +mediterranean anchovy +aspersorium +argonaut +porkholt +sheep ked +algometer +Adventist +false goatsbeard +snake polypody +streetwalker +shelver +adoptee +highflier +pitch apple +prairie rocket +fish mousse +viroid +deckle +manila tamarind +observer's meridian +pincurl clip +hardstem bulrush +gossamer +brookweed +Druze +hug-me-tight +accessory before the fact +oilman +Comanche +Marine +bedlamite +Chinese cork oak +squawbush +false miterwort +walk-on +Cynopterus sphinx +brandyball +landlubber +arrowroot +cape forget-me-not +galoot +tabor pipe +checker +Levant cotton +paddle box +murderess +smirker +fuddy-duddy +withdrawer +newel +shade +pink disease fungus +tipu +sweet sultan +aeronautical engineer +tall gallberry holly +acarid +conqueror +cucumber +film director +ordinary +salon +closet queen +allegorizer +tonka bean +flax rust +negative pole +dagame +dentist's drill +mock privet +micropyle +contributor +dark horse +climbing corydalis +cosmotron +land agent +Big Blue +Cynic +tassel flower +lyrate leaf +Minuteman +Dutch-elm beetle +Hessian fly +flower girl +West-sider +window dresser +skinny-dipper +whitebait +out-and-outer +hooker +amicus curiae +jack +camwood +stockist +black root rot fungus +Jamaica dogwood +diaphragm +Holocentrus ascensionis +roselle +black maire +Pygmy +fumigator +lame duck +mudder +hydraulic transmission +conning tower +phoronid +batfish +hearing dog +monohybrid +whaling gun +Cockcroft and Walton accelerator +allemande +seasoner +epileptic +ammonia clock +Young Turk +lanseh tree +urceole +cafe noir +poster girl +Oglala +deadeye +manna lichen +positive pole +cinch +lyricist +hermaphrodite +kidney stone +dilator +number one +frotteur +kaffir bread +fish knife +tarragon +adjuster +potato wart fungus +Florida pompano +conductor +corbie gable +rounders +Catha edulis +bender +recruit +Uruguayan +subject +bunghole +day boarder +pocketed bat +Oxonian +owner-occupier +yellow-leaf sickle pine +devisor +exhibitor +looking glass +shipowner +crooked-stemmed aster +calico +dash-pot +defilade +Confucian +egg-and-dart +irreligionist +lepton +self-rising flour +diving bell +Brahui +shop girl +maximum and minimum thermometer +Dalmatian laburnum +correspondent +subduer +nonperson +Reaumur thermometer +rough-leaved aster +jacksmelt +pinfold +magneto +ex-wife +round-leaved rein orchid +purloo +American shrew mole +sweet sand verbena +polymastigote +outfitter +curled leaf pondweed +Italian dressing +borderer +ambusher +geebung +four-stroke engine +small ship +homeopath +gynostegium +political prisoner +Radiigera fuscogleba +ensiform leaf +rhizoctinia +satyr orchid +rue +bouillon cube +flip +prophyll +tilefish +periselene +prima donna +choker +laminar flow clean room +Hooker's orchid +fish joint +mombin +remover +array +coelostat +autophyte +consigner +Damaraland mole rat +gasman +public works +lye hominy +pearlfish +piassava palm +Georgian +uxoricide +confessor +community center +epigone +tagger +abrading stone +cryoscope +nautch girl +reliever +Cartesian +Indian beech +protoplasmic astrocyte +fundamentalist +mustard sauce +crank +houselights +five-point bishop's cap +comedienne +triangle +presentist +beaugregory +dreamer +Wave +blue mockingbird +Barbados gooseberry +ten-spined stickleback +papoose +silky pocket mouse +holdup man +agent-in-place +suspensory +emigrant +ropemaker +bookbinder +jumby bead +undershrub +Killarney fern +sheep bell +city slicker +equerry +pea crab +down-and-out +blackmouth bass +shirtmaker +lister +UNIX guru +snipefish +gimbal +maisonette +haircloth +Ranvier's nodes +pigmy talinum +tribute album +msasa +hydroxide ion +madame +four-pounder +prophet +sloganeer +field-effect transistor +nude mouse +canteen +Calostoma lutescens +buteonine +sunlamp +Uruguay potato +Spanish tamarind +Prince-of-Wales'-heath +kishke +caprifig +chincapin +hegari +alarmist +bathtub gin +astatic galvanometer +Calostoma ravenelii +marang +tussah +coin box +bugleweed +hacker +frontal eminence +timekeeper +shunt +bicycle clip +mustang mint +caesium clock +hospice +glenoid fossa +archpriest +ex-gambler +incrustation +salvager +Donatist +violator +lamb succory +hygroscope +oilbird +sharptail mola +showplace +corn syrup +flashlight fish +pulse timing circuit +anchovy paste +fascista +chigoe +divan +Druid +squad room +Huntingdon elm +buffalo carpet beetle +carper +corn lily +goats' milk +assault gun +cockpit +Lochaber ax +Visigoth +occupier +Basotho +criminologist +spindle +Rosicrucian +Cornishwoman +musk kangaroo +artificial skin +pandurate leaf +Parkia javanica +roundhead +tea-like drink +basidiolichen +unguiculate +stepmother +Nauruan +gutta-percha tree +bloodberry +scarlet haw +marupa +censor +algebraist +pelvimeter +whaler +cowhide +paparazzo +biochip +internationalist +Yukon white birch +hangar queen +chlamydia +puttee +Pipturus albidus +pearly razorfish +sea moss +burglar +hoary golden bush +colter +drey +bushman's poison +maxillaria +gnetum +deadeye +shittah +swamp oak +damper block +deepwater squirrelfish +truffle +cangue +paleolith +lawyerbush +sorehead +Texas snowbell +Tremella reticulata +quarter +keelboat +dimity +whiner +Wagnerian +myrmecophyte +frontierswoman +pyrometric cone +big-tree plum +puppy +galbulus +hod +winceyette +carriage wrench +dictostylium +farmland +infanticide +Jacob's rod +threadfish +monocline +inamorato +leaf miner +purple cress +passer +black-fronted bush shrike +silverrod +bootmaker +segregate +captive +Edmontonia +spherometer +television transmitter +bladder +Saratoga spittlebug +dynamometer +lodge +smooth darling pea +Cossack +wake-up call +Olmec +sutler +molasses kiss +corner post +rattlesnake weed +yardmaster +adder +rhinoscope +referral +ulster +pantaloon +counterspy +gadgeteer +heart cherry +hospital chaplain +Clydesdale terrier +plank-bed +Russian thistle +actinometer +dyspeptic +common wolffia +firewall +seidel +potato moth +soapweed +seif dune +thill +cosmographer +absolver +halberdier +fire control system +kai apple +bastard pennyroyal +Big Brother +broadcast journalist +Albatrellus dispansus +citrophilous mealybug +split end +nickel-iron battery +Newtonian +gas maser +thumbstall +anaspid +dusky-footed wood rat +latitudinarian +flatbrod +schizocarp +niqaabi +flight surgeon +gyrocompass +Polyporus tenuiculus +Utopian +mailboat +spellbinder +undercoat +cassareep +typical jerboa +photocathode +katharometer +bight +fur-piece +penetration bomb +malik +Siberian millet +nanomia +Wykehamist +tosser +gyrostabilizer +microwave diathermy machine +crystal set +wall +legatee +alfalfa +angwantibo +charioteer +piano maker +African mahogany +Morlett's crocodile +taro +parallel circuit +cush-cush +etymologist +matriculate +neem seed +cornerback +kingfisher daisy +redoubt +blastomycete +peplos +costumier +publican +tobogganist +semolina +myrmidon +parricide +gymslip +whoremaster +cryptocoryne +header +platitudinarian +barleycorn +spiral bandage +reciter +abecedarian +dance +wrymouth +bilberry +Liopelma hamiltoni +streamliner +Fordhooks +fixed phagocyte +radiobiologist +neurologist +Selkup +dollarfish +cascade everlasting +acrodont +boarhound +midstream +theatrical producer +abhorrer +goldsmith +photometrist +Anglo-Saxon +rugel's plantain +sable +workmate +ferule +ankus +earleaved umbrella tree +Passamaquody +timucu +Mexican pocket mouse +yerba santa +Rochon prism +apomict +monocarp +sweet unicorn plant +common winterberry holly +archivist +drypis +paretic +fly-by-night +white-berry yew +Schoolman +blue cheese dressing +vintager +squatter +Euphausia pacifica +corrugated fastener +yellow henbane +Croesus +almoner +analphabet +acoustic delay line +sheep frog +workhouse +horseleech +venturer +pond-scum parasite +Pyrenees daisy +plagiarist +Truncocolumella citrina +rerebrace +group captain +caddis fly +hot-rock penstemon +kanzu +stylopodium +slopseller +rauli beech +starter +ootid +statesman +distributor cam +ascot +falcon-gentle +Duplicidentata +spotted antbird +heliometer +false buckthorn +Allegheny spurge +Cavalier +dart +photocoagulator +master-at-arms +kei apple +baldachin +crapshooter +gametangium +white hope +chipotle +spike heath +Scotch woodcock +Florentine +differential analyzer +Mitrula elegans +wet cell +basil balm +Circassian +corn cake +bouncing betty +vice-regent +lagerphone +ketembilla +whoremaster +fork +tetrasporangium +trifler +pill head +life-support system +quartermaster general +tobacco thrips +officeholder +teredo +toyon +Sundacarpus amara +Phytophthora citrophthora +naif +lobbyist +alligator wrench +bully +heavy +toxicologist +radio chassis +waterdog +drive line +kaffir cat +foster-brother +breakax +curette +traditionalist +pipe vise +striped button quail +gawker +homeotherm +schoolyard +battue +kalansuwa +deviationist +Bolshevik +transponder +pungapung +iron +Eyeish +roccella +manglietia +Tory +print seller +Texas Ranger +otter shrew +seconder +shellflower +outlier +party man +wold +hayfork +oncologist +framer +co-beneficiary +ocean pout +Chinese angelica +scrimshaw +air attache +false gromwell +standing press +fringepod +specifier +automatic choke +durum +yenta +wassailer +reeler +signora +beach pancake +common booklouse +pellicle +backroom boy +den mother +associate +Unitarian +gambist +brookweed +clubroom +cat's-tail +playboy +self-registering thermometer +doorstop +bennet +yak's milk +escapee +quail bush +sparge pipe +coast boykinia +screw key +half gainer +aggravator +cotton mill +tailor's chalk +free agent +cotton mouse +deadhead +bunny +turpentine camphor weed +amaranth +ceratodus +red lauan +beam-ends +thermograph +wally +Toda +handrest +commissary +oak-leaved goosefoot +manufacturer +voicer +Jafnea semitosta +bench hook +finder +abyssal zone +rabbitwood +Hercules'-club +epicarp +declinometer +camp follower +signaler +Australian pea +putz +qadi +banded palm civet +egg timer +regnellidium +calisaya +harvestfish +sound spectrograph +side-wheeler +glomerule +woolly rhinoceros +Black Muslim +horticulturist +ornithomimid +cryometer +battlefront +gametophyte +airmailer +cuisse +nakedwood +baseball club +slasher +anise +leatherleaf +leatherjacket +horned pondweed +gofer +Saigon cinnamon +barong +blazer +twinkler +skeleton shrimp +dial +floorwalker +case shot +flannelbush +cultivated parsnip +Jane Doe +few-flowered leek +nogging +placer miner +muzzler +serge +lion-hunter +capulin +Wandering Jew +ascidian tadpole +hispid pocket mouse +southern spatterdock +milk wagon +junior middleweight +duck sauce +promycelium +protozoologist +cascade liquefier +tout +longheaded thimbleweed +charcoal burner +footage +slop +bridge agent +miller's-thumb +Job's comforter +marocain +tanker plane +lancetfish +knocker +toque +ordinand +umbrella bird +favorite son +hare's-foot bristle fern +business traveler +plotter +Asiatic shrew mole +tallyman +stump +Paleacrita vernata +index register +mortgagee +accuser +codger +sand rat +seaside centaury +chiropractor +Florida smoothhound +dwarf sperm whale +T-man +sannup +dragonhead +numdah +alkali grass +gynobase +kymograph +ascolichen +steward +waterline +Nazarene +filer +lapidary +muncher +wincey +scyphus +question master +besieger +worldling +docent +facing +atmometer +quern +puerpera +three-decker +calliope +wild red oat +bailee +flame pea +cattle cake +theist +yellowtail flounder +cosmopolitan +rocket engineer +vouchee +Turkoman +hard sauce +Thousand Island dressing +assayer +messmate +mutilator +oyster bar +flame tokay +countess +prairie mimosa +microsporangium +cotter +townsman +paring +fundraiser +simperer +Comrade +orlop deck +power takeoff +cattleship +prime meridian +Javanthropus +scriptorium +curandera +long-clawed prawn +maestro +paster +potato tuberworm +chachka +junkyard +cape yellowwood +reentrant polygon +Liberian coffee +restaurateur +Alsophila pometaria +Jekyll and Hyde +electrophorus +Scomberomorus maculatus +manipulator +gromwell +chicken provencale +ashram +mangel-wurzel +shamrock pea +dossal +adducer +erection +Mysore thorn +smoothie +chufa +brace wrench +victualer +litterer +linstock +Protium guianense +palfrey +banyan +klieg light +dangleberry +trooper +yaupon holly +quitter +tradescant's aster +nullipara +melter +devil's urn +ghostwriter +mouth +analogist +Creek +sonic depth finder +fucker +locus of infection +mortician +esophageal smear +locum tenens +conic projection +aroeira blanca +bellarmine +night porter +automobile mechanic +codpiece +Munro +cottonweed +scoinson arch +tinderbox +frozen food +waterproofing +Egyptian henbane +lash +transactor +American smooth dogfish +existentialist +grabber +Sonoran lyre snake +Rufous rubber cup +colors +weekend warrior +power user +perennial salt marsh aster +Puritan +Apalachicola rosemary +anecdotist +tosser +moth bean +agnostic +stretcher-bearer +browntail +optimist +brewer's mole +astronomy satellite +flat file +rust mite +tuberous plant +day laborer +buster +trapezoid +bevatron +nonresident +Streptomyces griseus +mangosteen +customer agent +hero worshiper +suicide bomber +procellariiform seabird +archiannelid +reaction turbine +distortionist +bulldog wrench +grainy club +scalp +Aztec +scow +globigerina +pedant +heartleaf manzanita +kanchil +low gallberry holly +containment +scandalmonger +rose-colored starling +Powhatan +addle-head +Chilean rimu +Atlantic sea bream +arthrospore +ramrod +root climber +Kalapooia +roach clip +Schreiber's aster +horseradish +albino +Kshatriya +trombidiid +blasting cap +body pad +brachium +shallu +Wynnea americana +slender centaury +munj +upset +wind tunnel +cottonwick +airing cupboard +pepper shrub +ambrosia +languisher +chosen +rose globe lily +purple apricot +costia +sloop of war +sultana +frontlet +booster +sargassum fish +broad-leaved montia +rifleman bird +stillroom +amoralist +enginery +meter maid +fitment +southern bog lemming +Athenian +clincher +cusk-eel +mackintosh +diaphone +corozo +Australian reed grass +czar +spongioblast +Eurafrican +airhead +Shahaptian +Roman +pollinium +tourist class +halogeton +stamper +emperor +malingerer +tramp steamer +Peziza domicilina +pilot cloth +stenopterygius +cost accountant +Queen's Counsel +wine-maker's yeast +poppet +cage +rowlock arch +landgrave +bearded wheatgrass +stink bell +quaker +undesirable +algarroba +resistance pyrometer +exorcist +carib wood +guvnor +border patrolman +bathhouse +licenser +headman +rentier +pine spittlebug +nut-leaved screw tree +paraduodenal smear +apron +necker +smilax +Alpine besseya +creeper +castle +ground bait +Queensland grass-cloth plant +sclerotium +great yellowcress +fat farm +Stoker +hoop snake +elixir of life +Trotskyite +home buyer +wheat berry +Tutelo +semi-climber +utahraptor +wet-bulb thermometer +packrat +hygrophyte +darter +sketcher +refiner +camlet +midgrass +compound +tarwood +Colorado River hemp +toiler +abstractor +override +dwarf pipefish +plodder +briefcase computer +trunk hose +brown butter +valve-in-head engine +cymbalist +explosive detection system +horsewoman +boutonniere +chinchilla +venerator +scourer +exarch +cohune nut +ayapana +continental divide +cosigner +stalker +pyxie +Genet +Macowanites americanus +open-hearth furnace +water chestnut +American frogbit +tarwood +cutter +scout +burr +upsetter +grist +tagasaste +mouthpiece +palette +rattan +letterman +Exmoor +Methodist +eelblenny +marasca +slide valve +ventilation +saddle hackle +Yakut +flux applicator +air traveler +murder suspect +Cynocephalus variegatus +idolizer +Surgeon General +nutlet +little-head snakeweed +germ tube +fellow traveler +raceabout +commodore +czar +anamorphosis +treelet +girlfriend +groundnut +sideline +giant star grass +goffer +spark lever +oubliette +processor +tare +plodder +extremist +Kipp's apparatus +gripsack +S wrench +viscountess +bridgehead +cascarilla +Asiatic flying squirrel +protoceratops +equerry +difflugia +princeling +moonlighter +aspergill +common flat pea +Utahan +imperial mammoth +plantain-leaved pussytoes +Boott's goldenrod +bootlegger +reed pipe +runcinate leaf +onion salt +nitrite bacterium +introvert +duck +New World opah +goliath frog +heterostracan +disrupting explosive +haggler +candlenut +false bugbane +returning officer +eudiometer +ship-breaker +metazoan +mandarin +patka +gill net +cavity wall +armilla +rainmaker +dealfish +orderly +gleaner +muffin man +house sitter +alto +sand devil's claw +vulcanizer +appendicularia +boron chamber +chess +bitok +anchovy butter +dropout +flour mill +bishop +escapist +scapegrace +stanhope +smooth winterberry holly +upstager +stalking-horse +pony +prairie gourd +parabolic mirror +Polaroid +slasher +lap +garlic butter +sendee +German millet +hairy honeysuckle +Swiss canton +Scleroderma flavidium +red goatfish +telegraph plant +Jungian +garment cutter +mallee hen +stranger +driveway +schooner +Paiute +cisco +trestlework +sipper +shanny +romanticist +Molly Miller +mountain rimu +odd-leg caliper +bitumastic +Western Australia coral pea +labor coach +latchkey +harpulla +solitary pussytoes +chop-suey greens +coil +guimpe +diapir +Osage +gutta-percha tree +giant eland +reticulation +garden huckleberry +quick study +Hudson bay collared lemming +coreligionist +Lancastrian +stumblebum +omnirange +seersucker +Potemkin village +Rhea Silvia +symphonist +bolti +jaw +jaconet +page +visiting fireman +haulm +p-n junction +landlubber +yellow jack +triclinium +souari +invader +fire walker +Luddite +Plott hound +hemming-stitch +winker +star-duckweed +craniometer +Arabidopsis lyrata +loser +cypripedia +trimmer arch +cookhouse +pink fivecorner +transfer +ringleader +northern pocket gopher +moke +blockade-runner +cyclostome +web-spinning mite +Whig +transcriber +malahini +sawyer +patent log +paca +tragedian +thermojunction +soffit +black buffalo +foreigner +applecart +brit +pole horse +white mullet +argentinosaur +Homo soloensis +bounty hunter +decumary +hand +paperboy +Smitane +windowpane +Java man +Wynnea sparassoides +prune +middy +lilliputian +sorb +pyrostat +guest worker +hold +leaseholder +vegan +humanist +salinometer +piton +zygospore +means +night rider +tetraspore +archipelago +radiomicrometer +nitpicker +spot weld +slicer +girlfriend +round-tailed muskrat +cock's eggs +Shavian +bay +nuclear chemist +planetarium +hiccup nut +Marylander +milling +microsporidian +brown cup +Strophanthus kombe +little skate +emancipator +paperhanger +archaeopteryx +maigre +Mastotermes electrodominicus +procurer +seizure-alert dog +homeboy +cotton strain +mute +siren +spearnose bat +phenacomys +gayal +arsenal +pitchfork +Port Jackson heath +cud +magnetic core memory +interferometer +water jacket +account executive +hodoscope +window oyster +sudatorium +syncopator +loment +hypertensive +smoothbark +Geogia holly +nailhead +African holly +musette +chafeweed +microflora +derrick +strawworm +shogun +queen post +jerboa kangaroo +columbo +royal +sourball +solenogaster +cardsharp +Homo habilis +intaglio +calf's-foot jelly +flotsam +skirret +baronduki +chyme +shovel hat +Welsh +monoplane flying fish +groundfish +tablet-armed chair +swan dive +Indian club +colonial +cassiri +pyramidal tent +praya +silk vine +time clock +button snakeroot +clews +Korean lespedeza +diffuser +ripping bar +puttyroot +nipple shield +headpin +juneberry holly +hub-and-spoke +laver +weldment +plain flour +hoosegow +dudeen +grey skate +line of life +mung +arariba +Newtown Wonder +rock candy +side chapel +castor sugar +narrow-leaved white-topped aster +babassu nut +puka +rings +catchall +heat shield +caroche +oxbow +Australian coral snake +tapper +sporangiophore +fenugreek +spruce gall aphid +gouache +cutoff +private line +pod +cargo hatch +nailhead +penile implant +geophyte +small-leaved linden +deepwater pipefish +paperhanger +hairy spurge +Persian lamb +subtropics +feed grain +clarence +nonparticipant +scorpioid cyme +hand brake +tiller +Geglossaceae +albacore +monochrome +goa bean +bur +tongue worm +psittacosaur +frog's lettuce +pectoral +terreplein +light filter +fishpaste +dry point +grison +feterita +dolichocephalic +oenomel +stretcher +swag +cheval-de-frise +mountain beaver +scammony +discus +leatherleaf saxifrage +wharf rat +Dominique +pelycosaur +depth gauge +bishop +archespore +true anomaly +silver jenny +mercy seat +kelp +oviraptorid +acrylic +Chinese pea tree +meat house +bilge well +Temperate Zone +whale louse +balbriggan +briefcase bomb +pump-type pliers +oil +sour gourd +Jewbush +lunette +Chinese paddlefish +pyxidium +beechnut +calabar bean +grugru nut +gib +blunt file +cataphyll +megasporangium +blockbuster +sliding seat +hogchoker +calceus +Connarus guianensis +honest woman +survivor +second balcony +tempera +Calvary clover +murine +outwork +bogy +elephant's-foot +conning tower +set square +blackfly +stirk +Streptomyces erythreus +blade +goldfield +snowball +mortal enemy +waltzer +shoal +galley +hitchhiker +lithophyte +brisling +scauper +esophagoscope +grab +subtracter +philosopher +duplex apartment +southeastern pocket gopher +bonduc nut +reverberatory furnace +grader +lamp house +northern bog lemming +brotula +ornithopod +ptyalith +obturator +perpetual motion machine +range pole +Africander +curvet +daisy print wheel +floor +collector +mutant +tuck +fore-and-after +senega +buckler mustard +louvar +Tarsius glis +culdoscope +Spanish fly +steering gear +hatchet man +museum +saw set +cambric tea +comber +thermohydrometer +stationer +chalcis fly +bryanthus +whipstitch +harvest mite +rock gunnel +time bomb +rariora +pigfish +apetalous flower +head shop +horned whiff +sandpit +tachistoscope +sundries +taffrail +caller +monofocal lens implant +Dover's powder +souari nut +crowbait +render +Shakespearian +hagberry +megatherian +magus +hatchel +mangabey +garroter +piedmont +cope +barrio +psychodid +rigout +distributor +croupier's rake +sarcenet +narrow-leaved water plantain +treenail +biped +lanternfish +overdrive +barndoor skate +picket boat +amber lily +sawpit +sand lance +bucket shop +common beech +laundry truck +surtout +grogram +tampion +escape hatch +interstice +shop bell +snake mackerel +nakedwood +tumbrel +mericarp +mountain paca +cab +big board +cringle +eusporangium +shipping room +coal chute +dumbwaiter +Smiledon californicus +man-at-arms +cartridge +deinonychus +pigeon pea +screw bean +spectacle +floorboard +cutting room +low-warp-loom +proconsul +sabicu +genipap +clapper +aquifer +archaeornis +belly flop +Protium heptaphyllum +interrupter +high-warp loom +knight +wiper +impression +poker +Pithecanthropus +sable +guardroom +tenter +wellhead +raja +strickle +sodomite +mountebank +sand leek +Barbados gooseberry +shuffler +sensory fiber +crab-eating opossum +etching +rare bird +scup +fagot +negro vine +hutment +droshky +nephoscope +lady chapel +cutty stool +release +vestiture +buff +standard +Tabernacle +vascular ray +snakewood +chlorobenzylidenemalononitrile +limnologist +pouched mole +microwave linear accelerator +Mastotermes darwiniensis +wind tee +orange bat +open sight +carpospore +rampant arch +sabbatia +cursor +post exchange +bellpull +center +cyclostyle +canonist +pygmy sperm whale +moa +king +pass-through +angioscope +marrow +hookup +revetment +acanthocephalan +good Samaritan +apatosaur +web spinner +dixie +ommastrephes +crossbench +candlewick +jack +light arm +caisson +kaki +quandong nut +Meuniere butter +coquilla nut +mast +black +twitterer +bluethroat pikeblenny +shielding +water-shield +urolith +elephant bird +clearway +dark lantern +schizopetalon +press +Nazi +sugarberry +Maltese +stevedore +hair shirt +party wall +gainer +blackheart +nothosaur +cavetto +evergreen bittersweet +chemical bomb +calpac +shingle +turnpike +animator +heaver +isoclinic line +death knell +liner +anathema +aerie +razorback +Ichyostega +pound net +French dressing +mottle +yard +string tie +bell seat +brattice +battering ram +sierra +pompon +vertex +stomach pump +electrolytic cell +escolar +telpher +roadhouse +cerecloth +tartare sauce +letter case +whale sucker +hob +teg +canvas +strickle +hectograph +Cartagena bark +mail car +acinus +freedom rider +bread sauce +picture window +Rhizopogon idahoensis +pinprick +mass spectrograph +ringer +devil's cigar +salad cream +marlberry +airbrake +Clark cell +yellow-throated marten +wire gauge +dinoceras +aba +harpoon log +plate rail +mustard plaster +coelophysis +journal box +puce +ballcock +quartering +izar +clinid +whirler +turnspit +deathbed +pottle +shot +doubler +Coryphaena equisetis +English sole +chicken feed +borrow pit +mylodontid +Chilean nut +Kundt's tube +ling +asthenosphere +reseau +death seat +immovable bandage +peppermint patty +lecturer +electron multiplier +bear claw +hyacinth +beaked salmon +toehold +scull +snowball +gangsaw +fiber +oxeye +lashing +Beckman thermometer +fence +cantilever +dinner theater +Reynard +jag +umbrella plant +camera lucida +beaver +slug +yellowfin croaker +Sibley tent +rat-tail file +anchovy pear +soldier +cackler +chaise +Pitot-static tube +minniebush +Episcopalian +oleaster +ejaculator +wavy-leaved aster +knight +rack +real storage +magnetic mine +cocoa plum +vesiculovirus +birch leaf miner +water chevrotain +rudapithecus +torpedo tube +itch mite +warren +loft +washerman +terrace +nonstarter +shit +platform +caudex +ground control +Ostariophysi +slopshop +Peruvian cotton +crystal oscillator +plastic bomb +bar bit +watering cart +Asiatic sweetleaf +artificial joint +chariot +casern +charge-exchange accelerator +display adapter +hornpipe +honey bell +planula +Nephthytis afzelii +hame +ranter +trachodon +synchrocyclotron +splasher +heterotroph +Nicol prism +Himalayan rhubarb +headfast +put-put +bitter almond +parr +scantling +power breakfast +madder +Catalpa bignioides +rose of Jericho +spark chamber +rhizome +beard worm +supper club +negro peach +keratoscope +wain +apple aphid +planking +time-delay measuring instrument +sternpost +sicklepod +lake bed +gatherer +monotype +dead-man's float +poison gas +dicynodont +organism +cell +person +animal +plant +food +artifact +dressage +contact sport +outdoor sport +gymnastics +track and field +jumping +high jump +skiing +water sport +swimming +dive +floating +skin diving +rowing +boxing +sledding +tobogganing +wrestling +skating +ice skating +roller skating +racing +boat racing +riding +equestrian sport +cycling +blood sport +hunt +fishing +angling +casting +athletic game +outdoor game +golf +field game +field hockey +football +American football +ball game +baseball +court game +badminton +basketball +tennis +sport +Seder +scavenger +bottom-feeder +work animal +beast of burden +pack animal +domestic animal +marine animal +female +male +young +young mammal +pup +cub +lion cub +tiger cub +microorganism +arbovirus +herpes +herpes zoster +reovirus +moneran +cyanobacteria +enteric bacteria +actinomycete +streptomyces +diplococcus +parasite +ectoparasite +protoctist +protozoan +sarcodinian +ameba +ciliate +alga +brown algae +green algae +sporozoan +cypriniform fish +cyprinid +carp +domestic carp +shiner +catostomid +buffalo fish +cyprinodont +killifish +topminnow +squirrelfish +stickleback +pipefish +embryo +fetus +blastula +chordate +cephalochordate +tunicate +ascidian +vertebrate +aquatic vertebrate +jawless vertebrate +lamprey +hagfish +cartilaginous fish +holocephalan +chimaera +elasmobranch +shark +mackerel shark +mako +requiem shark +dogfish +smooth dogfish +spiny dogfish +smooth hammerhead +smalleye hammerhead +shovelhead +ray +sawfish +roughtail stingray +butterfly ray +eagle ray +manta +skate +bird +gamecock +night bird +ratite +passerine +oscine +accentor +lark +pipit +finch +canary +dark-eyed junco +New World sparrow +bunting +honeycreeper +sparrow +grosbeak +towhee +weaver +grassfinch +tyrannid +New World flycatcher +kingbird +pewee +cotinga +antbird +Old World flycatcher +thrush +nightingale +Old World chat +warbler +kinglet +Old World warbler +New World warbler +flycatching warbler +New World chat +yellowthroat +New World oriole +northern oriole +meadowlark +New World blackbird +grackle +Old World oriole +starling +myna +corvine bird +crow +Old World jay +common European jay +New World jay +blue jay +Canada jay +Rocky Mountain jay +nutcracker +European magpie +American magpie +Australian magpie +wren +marsh wren +thrasher +New Zealand wren +creeper +titmouse +black-capped chickadee +Carolina chickadee +swallow +martin +tanager +shrike +butcherbird +bush shrike +bowerbird +European water ouzel +American water ouzel +vireo +waxwing +bird of prey +hawk +black kite +swallow-tailed kite +white-tailed kite +harrier +falcon +peregrine +caracara +eagle +young bird +sea eagle +Aegypiidae +Old World vulture +griffon vulture +bearded vulture +Egyptian vulture +black vulture +New World vulture +buzzard +condor +Andean condor +California condor +black vulture +king vulture +owl +horned owl +scops owl +amphibian +salamander +newt +Pacific newt +ambystomid +climbing salamander +web-toed salamander +frog +true frog +true toad +spadefoot +tree toad +cricket frog +tongueless frog +reptile +anapsid +diapsid +chelonian +turtle +sea turtle +ridley +snapping turtle +musk turtle +diamondback terrapin +Western box turtle +tortoise +soft-shelled turtle +saurian +lizard +gecko +iguanid +spiny lizard +fence lizard +horned lizard +skink +teiid lizard +racerunner +plateau striped whiptail +Chihuahuan spotted whiptail +western whiptail +checkered whiptail +agamid +moloch +anguid lizard +venomous lizard +lacertid lizard +chameleon +monitor +crocodilian reptile +crocodile +alligator +caiman +armored dinosaur +ankylosaur +bone-headed dinosaur +ceratopsian +hadrosaur +saurischian +sauropod +theropod +ceratosaur +maniraptor +synapsid +pterosaur +ichthyosaur +snake +colubrid snake +smooth green snake +rough green snake +racer +blacksnake +whip-snake +rat snake +bull snake +common kingsnake +milk snake +common garter snake +ribbon snake +Western ribbon snake +common water snake +water moccasin +grass snake +viperine grass snake +sand snake +lyre snake +blind snake +indigo snake +constrictor +boa +python +elapid +coral snake +coral snake +cobra +mamba +black mamba +krait +viper +pit viper +rattlesnake +timber rattlesnake +arthropod +arachnid +false scorpion +whip-scorpion +spider +European wolf spider +acarine +hard tick +Ixodes dammini +Ixodes neotomae +Ixodes pacificus +Ixodes scapularis +sheep-tick +Ixodes persulcatus +Ixodes dentatus +Ixodes spinipalpis +wood tick +soft tick +mite +trombiculid +spider mite +house centipede +gallinaceous bird +domestic fowl +jungle fowl +chicken +cock +hen +turkey +grouse +European black grouse +Asian black grouse +blackcock +greyhen +red grouse +moorhen +greater prairie chicken +lesser prairie chicken +heath hen +guan +chachalaca +megapode +mallee fowl +phasianid +pheasant +bobwhite +northern bobwhite +Old World quail +migratory quail +peafowl +California quail +Hungarian partridge +red-legged partridge +Greek partridge +mountain quail +guinea fowl +columbiform bird +pigeon +dove +turtledove +domestic pigeon +homing pigeon +sandgrouse +parrot +cockatoo +lory +varied Lorikeet +rainbow lorikeet +parakeet +cuculiform bird +cuckoo +crow pheasant +coraciiform bird +roller +kingfisher +hoopoe +apodiform bird +swift +Archilochus colubris +thornbill +goatsucker +piciform bird +woodpecker +flicker +sapsucker +toucanet +trogon +quetzal +aquatic bird +waterfowl +anseriform bird +duck +teal +widgeon +sheldrake +goldeneye +scaup +wood duck +sea duck +scoter +merganser +gosling +gander +Chinese goose +greylag +blue goose +snow goose +brant +common brant goose +honker +barnacle goose +swan +tundra swan +screamer +crested screamer +mammal +prototherian +monotreme +marsupial +opossum +bandicoot +kangaroo +common wallaby +hare wallaby +nail-tailed wallaby +rock wallaby +pademelon +tree wallaby +rat kangaroo +phalanger +dasyurid marsupial +dasyure +placental +calf +buck +insectivore +mole +shrew mole +shrew +water shrew +tenrec +invertebrate +sponge +glass sponge +coelenterate +Chrysaora quinquecirrha +hydrozoan +siphonophore +anthozoan +actinia +coral +gorgonian +stony coral +ctenophore +worm +planarian +fluke +liver fluke +Fasciolopsis buski +schistosome +tapeworm +echinococcus +taenia +common roundworm +chicken roundworm +pinworm +eelworm +vinegar eel +trichina +hookworm +filaria +Guinea worm +annelid +oligochaete +polychaete +leech +mollusk +scaphopod +gastropod +abalone +scorpion shell +giant conch +edible snail +garden snail +brown snail +Helix hortensis +seasnail +neritid +limpet +Hermissenda crassicornis +cowrie +bivalve +clam +quahog +cockle +oyster +mussel +marine mussel +freshwater mussel +scallop +shipworm +cephalopod +octopod +decapod +squid +crustacean +malacostracan crustacean +decapod crustacean +crab +swimming crab +spider crab +lobster +true lobster +Old World crayfish +American crayfish +shrimp +prawn +krill +stomatopod +mantis shrimp +woodlouse +pill bug +sow bug +sea louse +amphipod +copepod +barnacle +wading bird +stork +ibis +common spoonbill +roseate spoonbill +heron +egret +night heron +American bittern +European bittern +least bittern +whooping crane +rail +crake +gallinule +purple gallinule +coot +great bustard +plain turkey +button quail +trumpeter +seabird +shorebird +plover +turnstone +sandpiper +yellowlegs +ruff +tattler +woodcock +snipe +greyback +red-breasted snipe +curlew +godwit +stilt +stilt +phalarope +courser +coastal diving bird +larid +gull +tern +jaeger +skua +auk +guillemot +murre +puffin +gaviiform seabird +podicipitiform seabird +grebe +pelecaniform seabird +white pelican +Old world white pelican +gannet +snakebird +sphenisciform seabird +penguin +pelagic bird +wandering albatross +black-footed albatross +petrel +shearwater +storm petrel +aquatic mammal +cetacean +whale +baleen whale +rorqual +toothed whale +beaked whale +dolphin +bottlenose dolphin +porpoise +sea cow +carnivore +pinniped mammal +seal +eared seal +fur seal +fur seal +South American sea lion +California sea lion +Australian sea lion +Steller sea lion +earless seal +walrus +canine +bitch +dog +cur +toy dog +toy spaniel +English toy spaniel +hunting dog +hound +coonhound +dachshund +foxhound +wolfhound +greyhound +terrier +bullterrier +rat terrier +Manchester terrier +fox terrier +wirehair +Welsh terrier +schnauzer +Skye terrier +sporting dog +retriever +pointer +setter +spaniel +springer spaniel +water spaniel +working dog +watchdog +shepherd dog +Belgian sheepdog +pinscher +Sennenhunde +mastiff +bulldog +guide dog +sled dog +liver-spotted dalmatian +spitz +griffon +corgi +poodle +wolf +coydog +wild dog +striped hyena +brown hyena +spotted hyena +aardwolf +fox +black fox +silver fox +blue fox +feline +cat +domestic cat +tom +blue point Siamese +wildcat +common lynx +Canada lynx +bobcat +spotted lynx +caracal +big cat +leopardess +panther +lioness +lionet +Bengal tiger +tigress +saber-toothed tiger +bear +Syrian bear +grizzly +Alaskan brown bear +cinnamon bear +viverrine +civet +Indian mongoose +ichneumon +slender-tailed meerkat +suricate +bat +fruit bat +carnivorous bat +leafnose bat +false vampire +vespertilian bat +long-eared bat +freetail +vampire bat +predator +game +game bird +fossorial mammal +tetrapod +insect +beetle +two-spotted ladybug +Mexican bean beetle +Hippodamia convergens +vedalia +bombardier beetle +calosoma +searcher +firefly +sawyer +pine sawyer +flea beetle +Colorado potato beetle +carpet beetle +clerid beetle +lamellicorn beetle +scarabaeid beetle +scarab +tumblebug +dorbeetle +June beetle +melolonthid beetle +elaterid beetle +snout beetle +boll weevil +blister beetle +bark beetle +darkling beetle +flour beetle +seed beetle +pea weevil +bean weevil +rice weevil +louse +flea +dipterous insect +gall midge +housefly +tsetse fly +blowfly +bluebottle +greenbottle +flesh fly +tachina fly +gadfly +botfly +human botfly +sheep botfly +warble fly +horsefly +bee fly +fruit fly +louse fly +horn fly +mosquito +gnat +fungus gnat +hymenopterous insect +drone +worker +honeybee +Africanized bee +black bee +Carniolan bee +Italian bee +carpenter bee +bumblebee +cuckoo-bumblebee +andrena +Nomia melanderi +leaf-cutting bee +mason bee +potter bee +wasp +vespid +paper wasp +hornet +sphecoid wasp +digger wasp +chalcid fly +sawfly +pharaoh ant +little black ant +army ant +carpenter ant +fire ant +wood ant +slave ant +Formica fusca +slave-making ant +sanguinary ant +bulldog ant +Amazon ant +termite +dry-wood termite +orthopterous insect +short-horned grasshopper +locust +migratory locust +migratory grasshopper +long-horned grasshopper +katydid +mormon cricket +sand cricket +mole cricket +European house cricket +field cricket +tree cricket +snowy tree cricket +phasmid +diapheromera +oriental cockroach +American cockroach +Australian cockroach +German cockroach +giant cockroach +praying mantis +hemipterous insect +leaf bug +mirid bug +lygus bug +lygaeid +coreid bug +heteropterous insect +water bug +water strider +assassin bug +homopterous insect +whitefly +sweet-potato whitefly +coccid insect +scale insect +soft scale +armored scale +mealybug +plant louse +aphid +greenfly +woolly aphid +adelgid +dog-day cicada +seventeen-year locust +spittle insect +plant hopper +psocopterous insect +psocid +booklouse +ephemerid +neuropteron +green lacewing +brown lacewing +odonate +trichopterous insect +caseworm +thysanuran insect +bristletail +thysanopter +thrips +earwig +lepidopterous insect +butterfly +nymphalid +fritillary +emperor butterfly +danaid +pierid +small white +large white +southern cabbage butterfly +blue +copper +American copper +hairstreak +Strymon melinus +moth +tortricid +lymantriid +geometrid +cankerworm +pyralid +tineoid +tineid +clothes moth +gelechiid +grain moth +noctuid moth +cutworm +underwing +hawkmoth +bombycid +saturniid +giant silkworm moth +silkworm +arctiid +lasiocampid +tent caterpillar +webworm +webworm moth +caterpillar +bollworm +woolly bear +larva +grub +pupa +queen +echinoderm +basket star +edible sea urchin +sand dollar +heart urchin +crinoid +trepang +lagomorph +leporid +rabbit +eastern cottontail +swamp rabbit +marsh hare +leveret +European hare +jackrabbit +white-tailed jackrabbit +blacktail jackrabbit +polar hare +snowshoe hare +pika +rodent +mouse +rat +pocket rat +field mouse +brown rat +jerboa rat +water rat +New World mouse +wood mouse +wood rat +vole +packrat +Eurasian hamster +golden hamster +gerbil +lemming +pied lemming +Old World porcupine +brush-tailed porcupine +long-tailed porcupine +New World porcupine +Canada porcupine +pocket mouse +kangaroo rat +jumping mouse +jerboa +dormouse +gopher +squirrel +tree squirrel +ground squirrel +prairie dog +American flying squirrel +groundhog +hoary marmot +yellowbelly marmot +Old World beaver +New World beaver +cavy +naked mole rat +ungulate +hyrax +odd-toed ungulate +equine +horse +foal +colt +male horse +stallion +mare +saddle horse +warhorse +pony +mustang +bronco +wild horse +pony +racehorse +racer +harness horse +workhorse +draft horse +trotting horse +ass +domestic ass +wild ass +onager +common zebra +mountain zebra +grevy's zebra +rhinoceros +tapir +even-toed ungulate +swine +piglet +porker +peccary +ruminant +bovid +bovine +ox +cattle +bull +cow +beef +Brahman +dairy cattle +Old World buffalo +Indian buffalo +carabao +Asian wild ox +American bison +wisent +sheep +lamb +domestic sheep +wild sheep +mountain sheep +goat +domestic goat +wild goat +goat antelope +antelope +Thomson's gazelle +Gazella subgutturosa +springbok +kudu +harnessed antelope +eland +waterbuck +oryx +deer +stag +red deer +mule deer +roe deer +caribou +chevrotain +camel +domestic llama +guanaco +alpaca +giraffe +musteline mammal +ermine +stoat +New World least weasel +Old World least weasel +longtail weasel +American mink +ferret +muishond +snake muishond +striped muishond +river otter +Eurasian otter +striped skunk +hooded skunk +hog-nosed skunk +spotted skunk +American badger +Eurasian badger +ferret badger +hog badger +marten +pachyderm +edentate +peba +apar +tatouay +peludo +giant armadillo +pichiciago +sloth +anteater +primate +ape +anthropoid ape +hominoid +hominid +homo +Homo erectus +Homo sapiens +australopithecine +great ape +western lowland gorilla +eastern lowland gorilla +mountain gorilla +silverback +western chimpanzee +eastern chimpanzee +central chimpanzee +pygmy chimpanzee +lesser ape +monkey +Old World monkey +talapoin +grivet +vervet +green monkey +chacma +mandrill +drill +rhesus +bonnet macaque +Barbary ape +crab-eating macaque +entellus +guereza +New World monkey +true marmoset +pygmy marmoset +tamarin +silky tamarin +pinche +lemur +tarsier +flying lemur +proboscidean +elephant +mammoth +procyonid +raccoon +fish +food fish +young fish +crossopterygian +lungfish +catfish +silurid +bullhead +channel catfish +gadoid +cod +hake +elver +common eel +tuna +moray +conger +teleost fish +clupeid fish +shad +herring +sardine +pilchard +anchovy +salmonid +salmon +Atlantic salmon +trout +brown trout +char +whitefish +smelt +tarpon +ribbonfish +toadfish +needlefish +flying fish +spiny-finned fish +percoid fish +perch +pike-perch +walleye +robalo +pike +pickerel +sunfish +crappie +freshwater bream +black bass +bass +serranid fish +grouper +hind +surfperch +cardinalfish +remora +carangid fish +jack +moonfish +pompano +scad +dolphinfish +characin +cichlid +snapper +grunt +sparid +sea bream +porgy +sciaenid fish +croaker +whiting +sea trout +mullet +goatfish +mullet +silversides +barracuda +sea chub +butterfly fish +damselfish +clown anemone fish +wrasse +blenny +pikeblenny +gunnel +goby +gempylid +scombroid +mackerel +Spanish mackerel +tuna +bonito +sailfish +billfish +marlin +tripletail +mojarra +ganoid +Pacific sturgeon +beluga +scorpaenoid +scorpaenid +scorpionfish +rockfish +lumpfish +greenling +gurnard +sea robin +plectognath +triggerfish +filefish +boxfish +spiny puffer +ocean sunfish +flatfish +righteye flounder +lefteye flounder +whiff +sole +abbey +abbey +abrader +accelerator +accessory +accommodation +acoustic device +acoustic modem +acrylic +action +actuator +adhesive bandage +adjustable wrench +aeolian harp +aerosol +after-shave +airbus +aircraft +airfield +airfoil +air gun +airplane +air pump +air-to-air missile +air-to-ground missile +alarm +alb +alcazar +Allen screw +alms dish +altimeter +Amati +ammeter +ammunition +amplifier +analog computer +analytical balance +anchor +anchor chain +aneroid barometer +angledozer +anklet +antenna +anteroom +antiaircraft +antiballistic missile +apartment +apartment building +aperture +apparatus +apparel +appliance +appliance +applicator +aquarium +arbor +arcade +arch +arc lamp +area +argyle +arm +armament +armature +armchair +armoire +armor +armored vehicle +armor plate +armrest +array +arrow +artificial heart +artillery +assembly +assembly plant +astrodome +astronomical telescope +athletic sock +atom bomb +atomic clock +atomizer +attachment +attack submarine +attire +audiocassette +audio system +audiotape +auditorium +autoclave +autoinjector +autoloader +automat +automat +automatic firearm +automatic rifle +automaton +auxiliary research submarine +awl +ax +axis +axle +axletree +baby bed +baby buggy +baby grand +back +background +backseat +badminton equipment +badminton racket +bag +bag +bag +baggage +bagpipe +bait +balance +balcony +balcony +bale +ball +ball gown +ballistic missile +ballistic pendulum +ball-peen hammer +ballroom +band +bandage +bandanna +banderilla +bar +bar +barbed wire +barge +barge pole +barn door +barograph +barrack +barrage balloon +barrel knot +barrel vault +barrier +barroom +base +base +baseball equipment +basilica +basin +basket +basketball equipment +bass +bass drum +bass horn +bastion +bat +bathhouse +battery +battle-ax +battle dress +battleship +bay rum +bay window +beading plane +beam +beam balance +bearing +beater +beating-reed instrument +bed +bed +bedclothes +bedroom +bedroom furniture +bedspread +bedspring +beehive +beer barrel +bell +bell push +bell tower +belt +belt buckle +bench +berlin +berth +besom +bevel gear +bicycle +bicycle chain +bier +billiard ball +bin +binding +bin liner +binocular microscope +bioscope +birchbark canoe +bird shot +bistro +bit +bit +black tie +blade +blade +blanket +blimp +blind +block +block plane +blouse +blower +blowtorch +bludgeon +boarding +boarding house +boardroom +boat +bobbin +body +body armor +body lotion +boiler +bolt +bolt +bomb +bomber +bongo +boom +boom +boomerang +boot +booth +booth +bore bit +Boston rocker +bota +bottle +bottle opener +bow +bow +bowed stringed instrument +bowl +bowl +bowline +bowling equipment +bowling pin +bowsprit +box +box +boxcar +boxing equipment +brace +brace +bracelet +bracket +brake +brake system +brass +brasserie +brazier +breechcloth +breeches +brewpub +brick +bricklayer's hammer +brickwork +bridal gown +bridge +briefcase +brigandine +brilliant pebble +brim +broad arrow +broadax +broad hatchet +broadsword +brush +bubble jet printer +buffer +buffet +building +building complex +bulldozer +bullet +bullhorn +bullnose +bundle +bunker +burial chamber +burner +bus +business suit +butt joint +button +buttress +butt shaft +buzz bomb +cabaret +caber +cabin +cabin +cabinet +cabinet +cabin liner +cable +cable +cafe +cafeteria +cafeteria tray +caff +cage +calculator +caliper +calorimeter +camera +camera lens +camera tripod +camp +camp +camp chair +camper +can +canal +candelabrum +candlestick +cane +cannikin +cannon +cannon +cannonball +canopy +canteen +canteen +canvas +canvas tent +cap +cap +cap +capacitor +caparison +cape +cap screw +capsule +car +car +carbine +carbon arc lamp +card index +cardioid microphone +car door +cargo liner +cargo ship +carillon +carpenter's hammer +carpenter's level +carpenter's mallet +carpenter's rule +carpet tack +carriage +carriage +carriage bolt +carrick bend +carrier +car seat +cart +cartridge +cartridge belt +cartridge holder +case +case +cashbox +casque +casserole +cassock +catch +catcher's mask +cathedra +cathedral +cathedral +catheter +cathode +cathode-ray tube +cat's-paw +cattle car +cautery +cavalry sword +cedar chest +cell +cell +cellblock +center +centrifuge +ceramic +ceramic ware +chain tongs +chair +chair of state +chalk +chamfer plane +chandlery +chapel +character printer +chassis +chasuble +chatelaine +checker +cheeseboard +chemical reactor +chessman +chest of drawers +child's room +china +chip +chip +chisel +choke +chokey +chordophone +chronoscope +chuck +church key +cigar lighter +circle +circuit +circuit board +circular plane +circular saw +cistern +civilian clothing +clamp +clamshell +clarinet +classroom +clavier +cleaning implement +cleaning pad +clean room +clinic +clip +cloak +clock +closed circuit +closed-circuit television +closet +cloth covering +clothes closet +clothes dryer +clothes hamper +clothes tree +clothing +clothing store +clout nail +clove hitch +clutch +coach +coal car +coal shovel +coat +coat closet +coating +coating +coat of paint +coaxial cable +cocked hat +coffee cup +coffee maker +coffer +coffin +coil +colander +collider +cologne +colonnade +color television +Colt +column +column +comb +comb +combination plane +combine +commissary +commodity +communication system +commutator +compact disk +compartment +compass +compass card +compound lens +compound lever +compressor +computer +computer circuit +computer network +computer screen +computer system +concentration camp +concert grand +concertina +condenser +condenser +condenser microphone +conductor +connecting rod +connection +conservatory +conservatory +contact +container +contrabassoon +control +control panel +control system +convent +converging lens +converter +convertible +conveyance +cooker +cooking utensil +cooler +cooling system +cord +cord +cordage +corner +correctional institution +corset +cosmetic +costume +costume +cotter +cotton +counter +counter +counter +counter tube +country house +coupling +court +court +coverall +covering +cowbarn +craft +cravat +crazy quilt +cream +cream pitcher +crematory +crepe +crib +cricket equipment +croquet equipment +crossbar +crossbow +crosspiece +crown jewels +cruiser +cruiser +cruise ship +crystal microphone +cudgel +cuff +cultivator +cup +cupboard +cupola +curb roof +curtain +cutout +cutter +cutting implement +cybercafe +cyclotron +cylinder +cymbal +dado plane +dagger +damper +dart +data converter +data input device +davenport +davenport +davit +dead axle +deck +deck +deck chair +deep-freeze +defensive structure +delay line +delicatessen +dental appliance +denture +depilatory +depressor +depth finder +derrick +destroyer +detector +detector +detonating fuse +detonator +developer +device +dial +dialyzer +diathermy machine +diesel locomotive +digital camera +digital computer +digital display +diner +dinghy +dining car +dining-hall +dining room +dining-room furniture +dining-room table +dinner dress +dinner pail +dinner table +diode +dip +diplomatic building +dipper +DIP switch +directional antenna +directional microphone +direction finder +disguise +dish +dish +disk +dispenser +display +display panel +distillery +ditch +ditch spade +dive bomber +doll +dolmen +domino +door +doorbell +doorlock +doornail +dormer window +dormitory +dot matrix printer +double-breasted suit +double-reed instrument +douche +dovecote +dovetail plane +downstage +drafting instrument +Dragunov +drawstring bag +dray +dredging bucket +dress +dress blues +dressing +dress uniform +drill +electric drill +drill rig +drinking fountain +drinking vessel +drip mat +drip pot +drive +drive +drogue +drogue parachute +drop-leaf table +dry battery +dry dock +dryer +dry masonry +dry wall +dugout canoe +dumdum +dumpcart +dune buggy +dungeon +duplicator +dustmop +dwelling +earphone +earthenware +easel +easy chair +edge tool +eiderdown +elastic bandage +electrical converter +electrical device +electric bell +electric frying pan +electric furnace +electric heater +electric lamp +electric motor +electric refrigerator +electro-acoustic transducer +electrode +electromagnet +electronic balance +electronic device +electronic equipment +electronic instrument +electronic voltmeter +electron microscope +electrostatic generator +electrostatic printer +elevator +embankment +embellishment +enamel +enamelware +enclosure +endoscope +engine +engine +ensemble +entrenching tool +epidiascope +equipment +eraser +escutcheon +espadrille +espresso shop +establishment +estaminet +exercise device +exhaust fan +exhibition hall +Exocet +expansion bolt +explosive device +external-combustion engine +extractor +fabric +face mask +face veil +facing +factory +fairlead +false face +fan +farm building +farm machine +fastener +fatigues +faucet +feedback circuit +fence +fencing sword +fender +ferry +fetoscope +field-sequential color television +fife +fifth wheel +fighter +figure eight +file +file server +filling +film +film +filter +filter +finery +finisher +fipple flute +fire +firearm +fire iron +fireplace +firkin +fisherman's bend +fisherman's knot +fisherman's lure +fishing boat +fishing rod +fishnet +flag +flageolet +flambeau +flannelette +flap +flashlight +flask +flatcar +flat tip screwdriver +fleet ballistic missile submarine +flight simulator +flip-flop +floating dock +floor +floor +floor cover +fly +flywheel +fob +foghorn +folder +food hamper +footbath +footbridge +foothold +foot rule +footwear +footwear +forceps +fore-and-aft sail +foremast +fore plane +fore-topmast +fork +formalwear +fortification +fortress +foundation garment +foundry +fragmentation bomb +framework +free-reed instrument +freight train +French door +friary +friction clutch +frigate +frill +frock coat +front projector +fruit machine +full-dress uniform +full metal jacket +funny wagon +fur hat +furnace +furnishing +furniture +fuse +gable +gable roof +gaff +galleon +gallery +galley +galley +gallows +galvanometer +gambling house +game +game equipment +gamp +garage +Garand rifle +garden +garden spade +garden tool +garment +gas burner +gas-discharge tube +gasket +gasoline engine +gate +gatehouse +gatepost +gathered skirt +gauge +gauze +gauze +gavel +gear +gear +gear +gearing +general-purpose bomb +generator +generator +Geneva gown +geodesic dome +girder +glass +glider +glove +glyptic art +goal +golf club +golf equipment +Gordian knot +Gothic arch +government building +government office +gown +gramophone +granary +granny knot +grapnel +grapnel +grate +graver +greasy spoon +greatcoat +great hall +greengrocery +grenade +grillroom +groined vault +Guarnerius +guidance system +guided missile +guildhall +guitar +guitar pick +gun +gun carriage +gunlock +gunsight +gun trigger +gurney +gymnastic apparatus +gym shoe +gypsy cab +habergeon +habit +hairdressing +hairpiece +hairpin +half hatchet +half hitch +hall +hall +hammer +hand +handbell +handbow +handcart +hand glass +handloom +hand lotion +hand mower +handsaw +hand shovel +hand tool +handwear +handwheel +hanger +hank +harpsichord +harrow +hash house +hat +hatch +hauberk +hawser bend +hazard +head +head +head covering +headdress +header +headgear +headlight +headsail +headscarf +health spa +heat engine +heater +heat lamp +heat-seeking missile +heavier-than-air craft +heckelphone +hedge +helicopter +helm +helmet +helmet +heraldry +high altar +high-angle gun +high gear +high table +hinge +hip boot +hitch +hoe +hogshead +hoist +holder +holding device +home appliance +homespun +hood +hood +hood +hook +Hoover +hope chest +horn +horn button +horse +horsecar +horse-drawn vehicle +horsehair wig +hosiery +hospital +hospital room +hostel +hot-air balloon +hotel +hotel room +hot tub +house +house +housing +hovel +huarache +humeral veil +hut +hutch +hydraulic brake +hydraulic system +hydroelectric turbine +hydrofoil +hydrometer +hygrometer +hypermarket +hypodermic syringe +ice machine +ice rink +ice skate +icetray +ignition switch +impact printer +implant +implement +imprint +improvised explosive device +inclined plane +indicator +induction coil +ink-jet printer +inkstand +institution +instrument +instrument of punishment +instrument of torture +interceptor +interchange +intercommunication system +intercontinental ballistic missile +interface +interior door +internal-combustion engine +ionization chamber +video iPod +iron +jack +jack +jacket +jacket +jack plane +jail +jamb +jar +jeroboam +jet +jet engine +jewelled headdress +jib +jibboom +jiggermast +joint +jointer +joist +jolly boat +jug +jumper +jumper cable +junction +junction +jury mast +kayak +keel +keg +kerchief +kettle +key +key +keyboard +keyboard instrument +khakis +kiln +kinescope +kingbolt +kirk +kit +kit +kitbag +kitchen +kitchen appliance +kitchen utensil +kite balloon +knee-high +knife +knife +knit +knob +lace +lacquer +ladder truck +lag screw +lamasery +laminate +lamination +lamp +lamp +landing gear +land mine +lantern +lapel +lathe +lattice +launcher +lead-acid battery +leather strip +Leclanche cell +leg +legging +lens +lens implant +level +lever +Levi's +lid +life buoy +life jacket +life preserver +lifting device +ligament +light +light-emitting diode +lighter-than-air craft +lighting +light microscope +linear accelerator +line printer +lingerie +lining +liquid crystal display +lister +living quarters +living room +local area network +lock +locomotive +lodge +lodging house +loft +loft +longbow +lookout +loom +loop knot +lota +lounge +loungewear +love knot +lunchroom +luxury liner +lyre +machine +machine +machine bolt +machine gun +machinery +machine screw +machine tool +magic lantern +magnet +magnetic disk +magnetic recorder +magnetic tape +magnifier +magnum +magnus hitch +mailer +mainframe +mainmast +main-topmast +main yard +makeup +mallet +mallet +mallet +mandolin +manger +man-of-war +manometer +MANPAD +mansard +mansion +marina +marker +marketplace +maser +mask +masonry +mass spectrometer +mast +mast +mat +mat +match +match +match plane +material +materiel +Matthew Walker +maul +measure +measuring instrument +measuring stick +mechanical device +mechanical system +mechanism +medical building +medical instrument +memorial +memory +memory chip +memory device +menhir +man's clothing +mercantile establishment +mercury barometer +mercury thermometer +mercury-vapor lamp +mess +metal screw +meteorological balloon +meter +meterstick +microbalance +microfilm +microscope +military hospital +military quarters +military vehicle +mill +milldam +millinery +mine +minibike +mink +minster +Minuteman +mirror +mixer +mizzenmast +module +mold +moldboard plow +monitor +monitor +morgue +mortise joint +motion-picture camera +motion-picture film +motor +motorboat +motorcycle +motor hotel +motor vehicle +mound +mount +mouse button +movie projector +moving-coil galvanometer +mug +multiplex +multiplexer +musette pipe +mushroom anchor +musical instrument +musket +musket ball +muslin +muzzle loader +narrowbody aircraft +nautilus +navigational system +naval equipment +naval gun +naval radar +naval weaponry +navigational instrument +nebuchadnezzar +neckline +neckpiece +necktie +neckwear +needle +needlework +negligee +net +net +net +net +network +network +night bell +nightwear +noisemaker +nonsmoker +non-volatile storage +nose flute +nuclear reactor +nuclear weapon +nursery +oar +oblique bandage +oboe da caccia +oboe d'amore +obstacle +office +office furniture +oil lamp +oil paint +oil tanker +olive drab +omnidirectional antenna +onion dome +open-air market +open-end wrench +opener +openside plane +ophthalmoscope +optical device +optical disk +optical instrument +optical telescope +organ pipe +outbuilding +outerwear +outfit +outrigger canoe +outside mirror +oven +overgarment +overhand knot +overhang +overhead projector +overnighter +overshoe +oxford +package +packaging +packing box +paddle +paddle steamer +page printer +paint +pallium +pan +pan +panic button +panopticon +panopticon +pantechnicon +pantry +pants suit +panzer +paper chain +paper fastener +parabolic reflector +parapet +parasail +parka +parsonage +particle detector +partition +passenger ship +passenger train +passenger van +passive matrix display +passkey +patch +patchouli +patchwork +patina +patisserie +pavis +peavey +pedal +pedestal table +pedestrian crossing +pedicab +peg +pen +penal institution +pencil +pendulum +pendulum clock +percolator +percussion instrument +perfumery +peripheral +periwig +personal computer +petticoat +Phillips screw +Phillips screwdriver +phonograph record +photographic equipment +photographic paper +photometer +physical pendulum +piano +piccolo +pick +pick +pickle barrel +piece of cloth +pile +pillow lace +pilothouse +pin +pincer +pinstripe +pipe +pipet +pipe wrench +pistol +pivot +place of business +place of worship +planetarium +planner +plant +planter +plasterboard +plastic laminate +plastic wrap +plastron +plate +platform +platform +platform rocker +plating +pleat +plethysmograph +plexor +pliers +plug +plug +pneumatic drill +pocket +pocket-handkerchief +pocketknife +pointed arch +polyester +polygraph +pomade +pontifical +pool ball +poorhouse +porcelain +porch +portable computer +portico +post +posthole digger +pot +potential divider +potpourri +pottery +pouch +poultice +powder +powder keg +power brake +power mower +power saw +power shovel +power tool +press +press +pressure dome +pressure gauge +pressure suit +printed circuit +printer +prison camp +prod +prolonge knot +prompter +prong +propeller +propeller plane +prosthesis +protective covering +protective garment +pruning saw +pruning shears +public house +public toilet +public transport +pull +pull chain +pulley +Pullman +pullover +pulse counter +pump +pump +pump house +punch +punch press +purifier +push broom +push button +pusher +puzzle +pyrometer +pyx +QWERTY keyboard +racing boat +rack +rack +radar +radiogram +radio interferometer +radio link +radiometer +radio receiver +radiotelegraph +radiotelephone +radio transmitter +raft +rail +rail fence +railing +raincoat +rake +ramp +rampart +random-access memory +rayon +razor +reaction-propulsion engine +reactor +reading lamp +reading room +read-only memory +rearview mirror +receiver +receptacle +reception room +recess +reconnaissance plane +recorder +recording +record player +recreation room +recycling bin +reed stop +reef knot +refectory table +refinery +reflecting telescope +reflector +reformatory +refracting telescope +refrigerator car +refuge +regalia +regimentals +regulator +rein +religious residence +removable disk +repair shop +repeating firearm +reproducer +rescue equipment +reservoir +reset button +residence +resistor +resonator +respirator +restraint +retort +rheostat +rib +ribbed vault +riddle +ride +riding boot +riding mower +rifle ball +rig +rink +river boat +road +roadway +robe +rocket +rocket +rod +roller +roller +in-line skate +roller blind +roller coaster +rolling hitch +Rolodex +Roman building +roof +roof +room +roost +rope +rose water +rotary engine +rotating mechanism +rotating shaft +rotisserie +rotor +round arch +router plane +row house +royal mast +rubber bullet +rug +rushlight +sable +sable coat +sack +sackbut +sacking +saddle +safe +safety belt +safety curtain +safety fuse +safety match +sail +sailboat +sailing vessel +salver +sandglass +sash +satellite +satellite television +saucepan +savings bank +saw +sawhorse +scale +scarf +school +scientific instrument +scissors +scoop +scratcher +screen +screen +screen +screw eye +scrub plane +scuffer +sculpture +sea boat +sea chest +seam +seaplane +seat +seat +second hand +secretary +security system +seeker +selector +self-propelled vehicle +semiautomatic firearm +semiautomatic pistol +semiconductor device +serger +serpent +serving cart +serving dish +set +setscrew +setscrew +sewing needle +sextant +shackle +shade +shaft +shag rug +shaker +shaper +shaping tool +sharpener +shaving cream +shaving foam +shawl +shawm +shears +sheath +shed +sheepshank +sheet bend +shelf +shell +shell +shell +shellac +shelter +shelter +shelter +shield +ship +shipboard system +shirt +shirtfront +shock absorber +shoe +shooting brake +shop +short pants +shotgun +shoulder holster +shrine +shutter +shuttle +sidewinder +sieve +sifter +sights +signaling device +signboard +silk +simulator +single bed +single-breasted suit +single-reed instrument +sitz bath +six-pack +skate +skein +skeleton +skewer +skidder +skid lid +skiff +ski pole +skirt +ski tow +skullcap +slack suit +slat +sled +sleeper +sleeping car +sleeve +sleeve +slide projector +slipknot +slipper +sloop +slop pail +slot machine +small boat +smart bomb +smoker +smooth plane +snack bar +snap-brim hat +snare drum +sniper rifle +Sno-cat +soapbox +socle +sofa +sonograph +sorter +sound recording +soup ladle +source of illumination +soutane +spacecraft +spade +spar +spatula +spear +spear +spectacles +spectrograph +spectroscope +speedometer +spider +spike +spike +spinet +spinning machine +spiral ratchet screwdriver +spiral spring +spit +spokeshave +sponge mop +spoon +sports equipment +sports implement +sportswear +spot +spring +spring balance +springboard +sprit +square +square knot +squash racket +squawk box +squeezer +squinch +stabilizer +stabilizer +stable gear +stadium +stall +stamp mill +stand +standard cell +staple +starter +state prison +station +statue +stay +steakhouse +stealth aircraft +stealth bomber +stealth fighter +steam bath +steamboat +steamer +steam iron +steam whistle +steel mill +steelyard +steeple +steering system +step +step-up transformer +stereo +stick +stick +still +stilt +Stinger +stock +stockcar +stock car +stocking +stonework +stool +stopper knot +storage battery +storage space +storeroom +stove +stove bolt +Stradavarius +straight chair +strap +strap +stringed instrument +strip +strongbox +stronghold +strongroom +structural member +structure +stylus +submachine gun +submersible +submersible +subwoofer +suction pump +suede cloth +sunbonnet +sunhat +supermarket +superstructure +supply chamber +support +support +support column +supporting structure +supporting tower +surface lift +surface-to-air missile +surgeon's knot +surgical instrument +surgical knife +surplice +surveillance system +surveying instrument +surveyor's level +swamp buggy +sweater +swimsuit +sword +synchrotron +system +tabi +table +table +table knife +tableware +tabor +tachometer +tack +tack hammer +talaria +tambour +tambourine +tampon +tank +tank car +tannoy +tape +tape deck +tape recorder +target +tavern +tea chest +teaching aid +tea gown +teashop +teaspoon +tea-strainer +tea tray +telecommunication system +telephone +telephone line +telephone receiver +telephone system +telephone wire +telescope +television antenna +television camera +television equipment +television monitor +temple +temple +tender +tennis racket +tenor drum +tenoroon +tenpenny nail +tent +tenterhook +terminal +terminal +test rocket +tetraskelion +textile machine +textile mill +theater +theodolite +thermometer +thermostat +three-piece suit +three-way switch +thumbscrew +thumbtack +tights +tile +timber +timber hitch +timbrel +time-fuse +timepiece +timer +time-switch +tire chain +tithe barn +toecap +toga +toggle switch +toilet +toilet powder +toiletry +toilet water +token +tomograph +toner +tongs +tool +toolbox +tooth +toothbrush +top +top +topgallant +topmast +topsail +torpedo +torpedo boat +touch screen +towel +toweling +tower +toy box +track +tracked vehicle +trailer +trailer +train +trammel +transdermal patch +transformer +transistor +transmission +transmitter +transporter +trap +trapeze +travel iron +treasure chest +trellis +trench +trial balloon +triclinium +troop carrier +trough +trouser +trowel +truck +trunk +try square +tube +tuck shop +tun +tunic +turbine +Turkish towel +Turk's head +turner +turntable +turtleneck +tweed +tweeter +twenty-two +two-piece +typesetting machine +typewriter +ultraviolet lamp +undercarriage +undergarment +underpants +underwear +uneven parallel bars +uniform +university +uplift +urn +urn +utensil +vacuum flask +valve +van +van +varnish +vehicle +veranda +vertical file +vessel +vessel +vest +vibrator +vibrator +videocassette +video recording +vigil light +viol +vise +vivarium +voltaic cell +voltmeter +wagon +waist pack +walking stick +wall +wall +wall unit +ward +warehouse +warship +wash +washer +washtub +watch +watchtower +water-base paint +water butt +water cart +watercolor +water-cooled reactor +water gauge +water ski +waterwheel +weapon +weaponry +weatherglass +weathervane +web +wedge +wedge +weighbridge +weight +weir +weld +well +whaler +wheel +wheelchair +wheeled vehicle +wheelwork +whetstone +whip +whisk +whispering gallery +white goods +whorehouse +wicker basket +widebody aircraft +winch +Winchester +wind instrument +window +window +window blind +window envelope +Windsor knot +wine bucket +wine cask +wineglass +wire +wire +wire matrix printer +wiring +woman's clothing +wood +woodenware +woodscrew +woodwind +woofer +workbasket +workbench +work-clothing +worktable +workwear +wrapping +wrench +writing desk +writing implement +X-ray film +X-ray machine +yacht chair +yard +yard +yardstick +yoke +zither +zoot suit +grain +light +colorlessness +chromatic color +black +gray +dark red +orange +yellow +green +blue +purple +reddish purple +pink +light brown +reddish brown +complexion +skin +epidermal cell +columnar cell +macule +specimen +milk +embryonic cell +leukocyte +neutrophil +astrocyte +exoskeleton +medium +film +press +print media +storage medium +journalism +photojournalism +newspaper +telecommunication +telephone +call +long distance +wireless +broadcasting +television +reception +chat room +portal site +wordbook +album +concept album +magazine +movie +sign +comestible +course +dainty +dish +fare +diet +dietary supplement +liquid diet +reducing diet +vegetarianism +ration +field ration +foodstuff +starches +concentrate +meal +roughage +flour +wheat flour +nutriment +commissariat +canned food +canned meat +meal +breakfast +lunch +dinner +supper +buffet +picnic +cookout +bite +entree +side dish +casserole +chicken casserole +appetizer +cocktail +hors d'oeuvre +relish +dip +soup +madrilene +broth +broth +chowder +clam chowder +stew +goulash +fish stew +fricassee +ragout +ready-mix +powdered sugar +granulated sugar +brown sugar +sweet +confiture +candy +hard candy +patty +brittle +chewing gum +candied fruit +candied citrus peel +fudge +gumdrop +mint +kiss +lozenge +taffy +dessert +dumpling +frozen dessert +mousse +mousse +whip +pudding +pudding +tipsy cake +ice +chocolate ice cream +Neapolitan ice cream +peach ice cream +strawberry ice cream +tutti-frutti +vanilla ice cream +split +pudding +custard +pastry +turnover +puff paste +phyllo +fish cake +conserve +jam +jelly +apple jelly +marmalade +gelatin +gelatin dessert +patty +stuffing +bread +breadstick +bun +cracker +dark bread +flatbread +loaf of bread +quick bread +rye bread +toast +white bread +French bread +cornbread +johnnycake +muffin +scone +onion roll +sweet roll +onion bagel +biscuit +baking-powder biscuit +soft pretzel +sandwich +hamburger +gruel +edible fruit +vegetable +crudites +legume +greens +solanaceous vegetable +root vegetable +potato +baked potato +sweet potato +snack food +corn chip +tortilla chip +cruciferous vegetable +cabbage +kale +red cabbage +savoy cabbage +squash +summer squash +yellow squash +winter squash +turban squash +gherkin +sprout +beet +pepper +sweet pepper +hot pepper +chili +jalapeno +onion +Spanish onion +salad green +lettuce +butterhead lettuce +bean +pea +green pea +common bean +fresh bean +green bean +shell bean +lima bean +soy +celery +chicory +coffee substitute +chicory escarole +corn +hominy +cress +tomato +cherry tomato +salsify +turnip +edible nut +apple +eating apple +Delicious +McIntosh +Pippin +cooking apple +berry +currant +citrus +temple orange +mandarin +bitter orange +sweet orange +Jaffa orange +navel orange +Valencia orange +lime +almond +plum +dried fruit +raisin +passion fruit +cocoa +melon +muskmelon +winter melon +cherry +sweet cherry +heart cherry +sour cherry +grape +fox grape +muscadine +slipskin grape +vinifera grape +Tokay +cherimoya +soursop +sweetsop +ilama +pond apple +olive +pear +edible seed +walnut +feed +fodder +oil cake +timothy +grain +barley +wheat +rice +mash +bird feed +petfood +salad +tossed salad +combination salad +pasta salad +fruit salad +ingredient +flavorer +condiment +herb +spice +cinnamon +pepper +garlic +mustard +sage +savory +curry +paprika +pickle +sweet pickle +vinegar +sauce +hot sauce +dressing +mayonnaise +cheese sauce +hot-fudge sauce +white sauce +spaghetti sauce +boiled egg +hard-boiled egg +Easter egg +omelet +firm omelet +souffle +dairy product +milk +milk +powdered milk +cream +butter +clarified butter +yogurt +curd +cheese +cream cheese +bleu +cheddar +Swiss cheese +spread +pate +sweetening +sugar +syrup +batter +bread dough +chicken and rice +pasta +Tetrazzini +chili dog +fondue +fondue +hash +kabob +seafood Newburg +meatball +pilaf +sausage pizza +pepperoni pizza +cheese pizza +anchovy pizza +Sicilian pizza +porridge +fish loaf +salmon loaf +scallopine +taco +beef burrito +quesadilla +tostada +beverage +concoction +mix +filling +potion +elixir +alcohol +brew +beer +lager +Weissbier +malt +ale +stout +mead +wine +white wine +sparkling wine +Burgundy +Beaujolais +Medoc +Pinot noir +Bordeaux +claret +Chianti +Cabernet +Merlot +dessert wine +Rhine wine +Rioja +Saint Emilion +zinfandel +table wine +vermouth +fortified wine +Madeira +liquor +brandy +gin +rum +whiskey +corn whiskey +Irish +Scotch +liqueur +coffee liqueur +orange liqueur +mixed drink +cocktail +highball +Bloody Mary +daiquiri +manhattan +martini +sling +sour +caffe latte +cider +sweet cider +juice +fruit juice +grape juice +orange juice +fruit drink +mulled wine +soft drink +cola +coffee +punch +champagne cup +claret cup +rickey +tea +tea +herb tea +tisane +black tea +green tea +water +drinking water +mineral water +vitamin pill +collection +suburb +residence +littoral +grassland +pasture +resort +field +air bubble +arroyo +ascent +atoll +bank +bank +bar +barrier reef +basin +beach +burrow +canyon +cave +continental glacier +crag +crater +dale +descent +draw +dune +geological formation +glacier +glen +gorge +gulch +gully +highland +hill +hillside +hole +hollow +iceberg +ice mass +ion +knoll +landfall +landfill +lather +ledge +lowland +meteorite +mountain +mull +natural depression +natural elevation +nullah +ocean floor +outcrop +plain +point +precipice +ravine +reef +ridge +ridge +rift valley +rock +sandbank +seaside +shiner +shore +slope +soapsuds +spume +tableland +tideland +volcanic crater +wadi +spiritual leader +adventurer +anomaly +benefactor +commoner +contestant +discussant +entertainer +female +finisher +inhabitant +native +juvenile +lover +male +mediator +national +peer +recipient +sensualist +traveler +unwelcome person +unskilled person +worker +wrongdoer +Black +White +Semite +white man +Mongol +Nahuatl +Caddo +Penutian +Teton +Taracahitian +Slav +Catholic +Altaic +Bornean +Canadian +Central American +Britisher +English person +Englishwoman +Ethiopian +Parisian +Greek +Italian +Japanese +Mexican +Nigerian +North American +Pakistani +South American Indian +Filipino +Polynesian +Scandinavian +South African +South American +Turki +American +New Yorker +abbess +abstainer +academic administrator +accomplice +acquaintance +acquirer +aerialist +actor +actor +addict +adjutant +admirer +adulterer +advertiser +advocate +analyst +ancestor +announcer +announcer +appointee +appreciator +appropriator +archbishop +architect +army engineer +army officer +arrival +articulator +asserter +assistant +associate +astronaut +athlete +attendant +aunt +authoritarian +authority +aviator +back +bad person +ballet dancer +bullfighter +baron +bartender +baseball coach +base runner +basketball player +believer +betrothed +bigot +big shot +biochemist +bisexual +boatman +bond servant +botanist +Boy Scout +buddy +campaigner +captain +card player +careerist +caretaker +cavalryman +celebrity +charmer +child +child +cipher +citizen +civil rights leader +cleaner +clergyman +cleric +clerk +climber +closer +clown +coach +cobbler +collaborator +college student +collegian +commanding officer +commissioned officer +commissioned military officer +commissioner +committee member +communist +compulsive +computer scientist +computer user +contractor +convict +copycat +counselor +craftsman +creditor +critic +curate +dancer +dancer +darling +date +daughter +dawdler +deacon +deaf person +debtor +deliveryman +descender +designated hitter +detective +detractor +director +disbeliever +dispatcher +distributor +doctor +domestic partner +draftsman +drinker +drinker +drug addict +drug user +drummer +drunkard +eager beaver +earner +eavesdropper +economist +editor +egotist +elder +elected official +emissary +employee +employer +endomorph +enemy +entrant +examiner +exhibitionist +fan +fancier +farmer +farmhand +fascist +father +female aristocrat +female offspring +female child +fielder +fireman +first baseman +first sergeant +flag officer +flatterer +foe +folk dancer +follower +football player +forefather +forger +founder +free agent +friar +monk +gambler +generator +geneticist +genitor +geologist +girl +godchild +godparent +golfer +grandma +grandmaster +grandparent +granter +great grandchild +great grandparent +grouch +guard +guest +guide +gymnast +Gypsy +hack +hairdresser +hater +headmaster +hearer +hedonist +heir +herder +homeless +horseman +host +host +hypocrite +important person +incumbent +infielder +informer +in-law +insurgent +investigator +investor +journalist +judge +juror +Counsel to the Crown +kinswoman +laborer +lama +landowner +lawgiver +lawman +lawyer +liberator +lieutenant +lineman +literate +litigant +Lord +failure +lowerclassman +lumberman +maid +maker +malcontent +martinet +master of ceremonies +masturbator +medical officer +medical practitioner +medical scientist +mender +meteorologist +middle-aged man +miler +military attache +military officer +military policeman +minister +minor leaguer +misfit +mixed-blood +model +moneymaker +mother +mourner +mover +musician +Muslimah +mystic +nanny +neonate +nephew +neutral +newcomer +newcomer +newspaper editor +niece +noncommissioned officer +nurse +observer +occultist +oldster +old woman +opportunist +orator +originator +outfielder +right fielder +right-handed pitcher +painter +panelist +pardoner +parodist +party +passenger +patient +patron +payer +peddler +percussionist +personal representative +personification +pervert +petitioner +Pharaoh +phonetician +physical therapist +physicist +pimp +pisser +pitcher +planner +player +poet +politician +practitioner +prayer +preserver +president +priest +princess +principal +proctor +programmer +promiser +propagandist +prosecutor +psychic +pusher +queen +queen +ranch hand +reader +recruit +recruiter +religious leader +repairman +reporter +representative +reprobate +rescuer +reservist +restrainer +retailer +retiree +revolutionist +rich person +civil authority +runner +running back +rustic +saboteur +sailor +salesman +salesperson +scalper +schemer +scholar +schoolchild +scientist +second baseman +secretary +seeker +selfish person +seller +serf +serviceman +settler +shrew +sibling +sick person +singer +sister +skeptic +skier +sleeper +slob +smith +snoop +social climber +socialist +social scientist +sociologist +soldier +son +songster +sorcerer +sovereign +speaker +specialist +spectator +stand-in +star +stepparent +stock trader +stranger +strategist +student +subordinate +suitor +superior +surgeon +sweetheart +sympathizer +tax assessor +taxonomist +teacher +television reporter +tenant +tenant +tennis player +testator +testee +theologian +therapist +thinker +thrower +toastmaster +trader +traffic cop +trainer +traitor +traveling salesman +tyrant +upstart +upstart +utility man +vacationer +vegetarian +vice president +victim +volunteer +votary +waiter +waitress +wanderer +wanton +washer +white supremacist +wife +winner +winner +woman +workman +worshiper +wright +writer +wilding +bryophyte +liverwort +pteridophyte +fern +fern ally +spore +spermatophyte +perennial +gymnosperm +ephedra +cycad +sago palm +zamia +pine +pinon +nut pine +white pine +yellow pine +larch +fir +silver fir +cedar +spruce +hemlock +douglas fir +cedar +cypress +arborvitae +araucaria +kauri pine +celery pine +yellowwood +gymnospermous yellowwood +yew +angiosperm +dicot +flower +wildflower +inflorescence +pistil +pericarp +oilseed +custard apple +barberry +allspice +laurel +anise tree +magnolia +moonseed +buttercup +aconite +baneberry +anemone +thimbleweed +columbine +clematis +delphinium +nigella +wax myrtle +zebrawood +legume +legume +darling pea +clover +acacia +wattle +albizzia +nitta tree +dogbane +allamanda +carissa +frangipani +rauwolfia +arum +alocasia +anthurium +caladium +monstera +nephthytis +arrow arum +calla lily +duckweed +watermeal +birthwort +sandwort +mouse-ear chickweed +pink +china pink +lychnis +silene +chickweed +fig marigold +amaranth +orach +saltbush +beet +sand verbena +four o'clock +echinocactus +prickly pear +pokeweed +portulaca +flame flower +caper +spiderflower +crucifer +cress +watercress +rock cress +cabbage +head cabbage +turnip plant +mustard +wallflower +woad +stock +radish plant +pennycress +poppy +prickly poppy +composite +compass plant +everlasting +achillea +ageratum +ragweed +ammobium +burdock +artemisia +mugwort +aster +wood aster +common daisy +bur marigold +calendula +thistle +carline thistle +catananche +centaury +knapweed +chrysanthemum +golden aster +goldenbush +plume thistle +woolly thistle +coreopsis +fleabane +woolly sunflower +cotton rose +gazania +African daisy +cudweed +gumweed +goldenbush +sneezeweed +sunflower +hawkweed +marsh elder +krigia +hawkbit +blazing star +rattlesnake root +daisybush +coneflower +coneflower +cutleaved coneflower +golden thistle +white-topped aster +goldenrod +sow thistle +marigold +dandelion +crownbeard +zinnia +achene +campanula +orchid +orchis +arethusa +helleborine +coral root +lady's slipper +large yellow lady's slipper +helleborine +fringed orchis +rein orchid +spider orchid +moth orchid +butterfly orchid +ladies' tresses +vanda +vanilla +yam +primrose +pimpernel +featherfoil +loosestrife +water pimpernel +gramineous plant +grass +wheatgrass +foxtail +broom grass +oat +brome +grama +reed grass +burgrass +crabgrass +lyme grass +wild rye +plume grass +rye grass +ricegrass +meadowgrass +millet +reed +sorghum +grain sorghum +cordgrass +cereal +wheat +corn +mealie +zoysia +bamboo +cotton grass +spike rush +pandanus +cattail +grain +kernel +gourd +gourd +squash +summer squash +marrow +winter squash +turban squash +bryony +sweet melon +luffa +lobelia +mallow +hollyhock +althea +poppy mallow +seashore mallow +globe mallow +tulipwood tree +sterculia +bottle-tree +screw tree +cacao +linden +herb +protea +banksia +grevillea +macadamia +casuarina +beefwood +heath +bearberry +huckleberry +kalmia +rhododendron +cranberry +blueberry +shortia +Australian heath +epacris +wintergreen +pipsissewa +beech +chestnut +tanbark oak +southern beech +New Zealand beech +oak +live oak +white oak +red oak +scrub oak +chestnut oak +birch +alder +hornbeam +hop hornbeam +hazelnut +centaury +gentian +fringed gentian +olive tree +fringe tree +ash +red ash +jasmine +privet +lilac +liquidambar +walnut +hickory +wing nut +loosestrife +myrtle +gum tree +eucalyptus +flooded gum +mallee +stringybark +tupelo +enchanter's nightshade +willowherb +fuchsia +evening primrose +daphne +canna +banana +ginger +begonia +tuberous begonia +poon +St John's wort +rockrose +dipterocarp +candlewood +reseda +viola +violet +nettle +cannabis +mulberry +fig tree +fig +elm +hackberry +iridaceous plant +bearded iris +beardless iris +crocus +amaryllis +blood lily +narcissus +daffodil +liliaceous plant +colicroot +alliaceous plant +kniphofia +poker plant +asphodel +mariposa +globe lily +camas +dogtooth violet +fritillary +tulip +star-of-Bethlehem +grape hyacinth +scilla +false asphodel +bog asphodel +hellebore +death camas +sarsaparilla +Solomon's-seal +bellwort +agave +sansevieria +cassia +locust tree +senna +angelim +milk vetch +wild indigo +pea tree +glory pea +rosewood +blackwood +tick trefoil +coral tree +vetchling +wild pea +lupine +medic +mucuna +locoweed +pole bean +pea +edible-pod pea +quira +hoary pea +bush pea +vetch +palm +sago palm +feather palm +fan palm +palmetto +areca +calamus +oil palm +raffia palm +lady palm +eriogonum +rhubarb +water plantain +waterweed +pondweed +rose +agrimonia +flowering quince +cotoneaster +avens +apple tree +wild apple +crab apple +Iowa crab +cinquefoil +plum +wild plum +bullace +apricot +cherry +wild cherry +sweet cherry +sour cherry +almond tree +almond +bird cherry +flowering cherry +chokecherry +fruit tree +bramble bush +raspberry +mountain ash +service tree +spirea +madderwort +coffee +cinchona +bedstraw +genipa +hamelia +honeysuckle +American fly honeysuckle +teasel +scabious +geranium +cranesbill +storksbill +incense tree +mahogany +silver ash +milkwort +citrus +orange +mandarin +lemon +kumquat +prickly ash +bitterwood tree +ailanthus +nasturtium +willow +osier +sallow +poplar +black poplar +cottonwood +aspen +soapberry +soapberry vine +harpullia +pachysandra +spindle tree +maple +box elder +holly +sumac +horse chestnut +persimmon +buckthorn +styrax +carnivorous plant +pitcher plant +sedum +philadelphus +saxifrage +astilbe +alumroot +miterwort +parnassia +currant +plane tree +phlox +acanthus +catalpa +anchusa +comfrey +convolvulus +bindweed +gloxinia +streptocarpus +waterleaf +nemophila +scorpionweed +giant hyssop +bugle +wood mint +calamint +coleus +dead nettle +origanum +horehound +monarda +savory +germander +thyme +blue curls +snapdragon +kitten-tails +Indian paintbrush +foxglove +toadflax +veronica +nightshade +thorn apple +matrimony vine +cupflower +petunia +salpiglossis +spurge +croton +cassava +slipper spurge +camellia +umbellifer +angelica +astrantia +caraway +fennel +parsnip +parsley +sanicle +dogwood +valerian +bristle fern +flowering fern +climbing fern +clover fern +adder's tongue +grape fern +ergot +sclerotinia +earthball +Podaxaceae +false truffle +rhizopus +slime mold +cellular slime mold +downy mildew +pythium +Sarcosomataceae +club fungus +lichen +lecanora +fungus +basidiomycete +mushroom +mushroom +mushroom +toadstool +horse mushroom +meadow mushroom +royal agaric +false deathcap +fly agaric +death cap +blushing mushroom +destroying angel +chanterelle +floccose chanterelle +pig's ears +cinnabar chanterelle +jack-o-lantern fungus +inky cap +shaggymane +milkcap +fairy-ring mushroom +oyster mushroom +olive-tree agaric +Pholiota astragalina +Pholiota aurea +Pholiota destruens +Pholiota flammans +Pholiota flavida +nameko +Pholiota squarrosa-adiposa +Pholiota squarrosa +Pholiota squarrosoides +Stropharia ambigua +Stropharia hornemannii +Stropharia rugoso-annulata +Entoloma lividum +Entoloma aprile +Chlorophyllum molybdites +lepiota +parasol mushroom +poisonous parasol +Lepiota naucina +Lepiota rhacodes +American parasol +Lepiota rubrotincta +Lepiota clypeolaria +onion stem +blewits +sandy mushroom +Tricholoma pessundatum +Tricholoma sejunctum +man-on-a-horse +Tricholoma venenata +Tricholoma pardinum +Tricholoma vaccinum +Tricholoma aurantium +Pluteus aurantiorugosus +Pluteus magnus +deer mushroom +straw mushroom +Volvariella bombycina +Clitocybe clavipes +Clitocybe dealbata +Clitocybe inornata +Clitocybe robusta +Clitocybe irina +Clitocybe subconnexa +winter mushroom +mycelium +ascomycete +Clavicipitaceae +yeast +discomycete +morel +Verpa +false morel +lorchel +helvella +Gyromitra californica +Gyromitra sphaerospora +Gyromitra esculenta +Gyromitra infula +Gyromitra gigas +gasteromycete +common stinkhorn +Phallus ravenelii +dog stinkhorn +stinky squid +puffball +Geastrum coronatum +Astreus pteridis +Astreus hygrometricus +polypore +Boletus chrysenteron +Boletus edulis +Frost's bolete +Boletus luridus +Boletus mirabilis +Boletus pallidus +Boletus pulcherrimus +Boletus pulverulentus +Boletus roxanae +Boletus subvelutipes +Boletus variipes +Boletus zelleri +Fuscoboletinus paluster +Fuscoboletinus serotinus +Leccinum fibrillosum +Suillus albivelatus +old-man-of-the-woods +Boletellus russellii +jelly fungus +rust +smut +cornsmut +flag smut fungus +waxycap +Hygrocybe acutoconica +Hygrophorus borealis +Hygrophorus caeruleus +Hygrophorus inocybiformis +Hygrophorus kauffmanii +Hygrophorus marzuolus +Hygrophorus purpurascens +Hygrophorus russula +Hygrophorus sordidus +Hygrophorus tennesseensis +Hygrophorus turundus +Neohygrophorus angelesianus +Cortinarius armillatus +Cortinarius atkinsonianus +Cortinarius corrugatus +Cortinarius gentilis +Cortinarius mutabilis +Cortinarius semisanguineus +Cortinarius subfoetidus +Cortinarius violaceus +Gymnopilus spectabilis +Gymnopilus validipes +Gymnopilus ventricosus +mold +mildew +candida +houseplant +succulent +weed +sporophyll +sporangium +poisonous plant +vine +tree +bean tree +gymnospermous tree +conifer +angiospermous tree +nut tree +spice tree +bonsai +subshrub +bramble +liana +desert plant +marsh plant +strangler +root +receptacle +scape +peduncle +flower cluster +raceme +cyme +bulbous plant +fruit +seed +bean +nut +berry +aggregate fruit +drupe +drupelet +pome +pod +husk +buckthorn +vinifera +true pepper +peperomia +bract +palmate leaf +pinnate leaf +dentate leaf +branchlet +polypody +strap fern +staghorn fern +spleenwort +chain fern +davallia +hare's-foot fern +shield fern +wood fern +lady fern +bladder fern +holly fern +woodsia +maidenhair +brittle maidenhair +lip fern +cliff brake +horsetail +club moss +spikemoss +beech fern +shoestring fungus +Armillaria caligata +Armillaria ponderosa +Armillaria zelleri +honey mushroom +milkweed +stapelia +stephanotis +orangery +figure +plane figure +solid figure +line +convex shape +concave shape +cylinder +round shape +polygon +concave polygon +amorphous shape +closed curve +simple closed curve +cone +circle +ring +loop +ellipse +triangle +spherical polygon +angular distance +groove +bulge +bow +balance +toroid +boundary +incisure +notch +wrinkle +tree +regular polyhedron +carbon +rock +soil +high explosive +culture medium +agar +paper +paving +plaster +stucco +tear gas +vitamin +fat-soluble vitamin +water-soluble vitamin +vitamin A +B-complex vitamin +vitamin E +vitamin K \ No newline at end of file diff --git a/workloads/realworld/async/darknet/cfg/imagenet1k.data b/workloads/realworld/async/darknet/cfg/imagenet1k.data new file mode 100644 index 0000000000000000000000000000000000000000..daf120a3c020003e8ed08096c51304272ca3ba27 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/imagenet1k.data @@ -0,0 +1,9 @@ +classes=1000 +train = /data/darknet/imagenet_mini/valid.list +valid = /data/darknet/imagenet_mini/valid.list +test = /data/darknet/imagenet_mini/valid.list +backup = /data/darknet/backup/ +labels = /data/darknet/imagenet_mini/imagenet.labels.list +names = /data/darknet/imagenet_mini/imagenet.shortnames.list +top=5 + diff --git a/workloads/realworld/async/darknet/cfg/imagenet22k.dataset b/workloads/realworld/async/darknet/cfg/imagenet22k.dataset new file mode 100644 index 0000000000000000000000000000000000000000..e25ef007ecceb096e5846ee7cacd1fd54fb8f9e4 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/imagenet22k.dataset @@ -0,0 +1,9 @@ +classes=21842 +train = /data/imagenet/imagenet22k.train.list +valid = /data/imagenet/imagenet22k.valid.list +#valid = /data/imagenet/imagenet1k.valid.list +backup = /home/pjreddie/backup/ +labels = data/imagenet.labels.list +names = data/imagenet.shortnames.list +top = 5 + diff --git a/workloads/realworld/async/darknet/cfg/imagenet9k.hierarchy.dataset b/workloads/realworld/async/darknet/cfg/imagenet9k.hierarchy.dataset new file mode 100644 index 0000000000000000000000000000000000000000..41fb71b065544b919bc8ed7d723afb5d04ad85ac --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/imagenet9k.hierarchy.dataset @@ -0,0 +1,9 @@ +classes=9418 +train = data/9k.train.list +valid = /data/imagenet/imagenet1k.valid.list +leaves = data/imagenet1k.labels +backup = /home/pjreddie/backup/ +labels = data/9k.labels +names = data/9k.names +top=5 + diff --git a/workloads/realworld/async/darknet/cfg/jnet-conv.cfg b/workloads/realworld/async/darknet/cfg/jnet-conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..056f82aa6e2a0710a664c4740ab763961e4de33d --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/jnet-conv.cfg @@ -0,0 +1,118 @@ +[net] +batch=1 +subdivisions=1 +height=10 +width=10 +channels=3 +learning_rate=0.01 +momentum=0.9 +decay=0.0005 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + diff --git a/workloads/realworld/async/darknet/cfg/openimages.data b/workloads/realworld/async/darknet/cfg/openimages.data new file mode 100644 index 0000000000000000000000000000000000000000..fa80e5ab7d8576d391c7cac9dfc8367aab704139 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/openimages.data @@ -0,0 +1,8 @@ +classes= 601 +train = /home/pjreddie/data/openimsv4/openimages.train.list +#valid = coco_testdev +valid = data/coco_val_5k.list +names = data/openimages.names +backup = /home/pjreddie/backup/ +eval=coco + diff --git a/workloads/realworld/async/darknet/cfg/resnet101.cfg b/workloads/realworld/async/darknet/cfg/resnet101.cfg new file mode 100644 index 0000000000000000000000000000000000000000..de458820bcd35f5e65d858f9f661e42653ed0184 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/resnet101.cfg @@ -0,0 +1,990 @@ +[net] +# Training +# batch=128 +# subdivisions=2 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + +[cost] +type=sse + diff --git a/workloads/realworld/async/darknet/cfg/resnet152.cfg b/workloads/realworld/async/darknet/cfg/resnet152.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e8e3297ac2364b95f28fa0a0bdd4ca71f14ac82c --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/resnet152.cfg @@ -0,0 +1,1460 @@ +[net] +# Training +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/async/darknet/cfg/resnet18.cfg b/workloads/realworld/async/darknet/cfg/resnet18.cfg new file mode 100644 index 0000000000000000000000000000000000000000..275f4bdb5962d77c16f353cd3d2751e189b9344c --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/resnet18.cfg @@ -0,0 +1,228 @@ +[net] +# Training +# batch=128 +# subdivisions=1 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/async/darknet/cfg/resnet18_b.cfg b/workloads/realworld/async/darknet/cfg/resnet18_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c0712e9cdfaf1f28dbe3a358355405d2758e6d2b --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/resnet18_b.cfg @@ -0,0 +1,228 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/async/darknet/cfg/resnet18_t.cfg b/workloads/realworld/async/darknet/cfg/resnet18_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c0712e9cdfaf1f28dbe3a358355405d2758e6d2b --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/resnet18_t.cfg @@ -0,0 +1,228 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/async/darknet/cfg/resnet34.cfg b/workloads/realworld/async/darknet/cfg/resnet34.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9f68f096741ae3b4898f40b76af7569d4697729f --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/resnet34.cfg @@ -0,0 +1,392 @@ +[net] +# Training +# batch=128 +# subdivisions=2 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/async/darknet/cfg/resnet50.cfg b/workloads/realworld/async/darknet/cfg/resnet50.cfg new file mode 100644 index 0000000000000000000000000000000000000000..d0d7c511516e997a392bb5ba77682740c0494972 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/resnet50.cfg @@ -0,0 +1,510 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/async/darknet/cfg/resnet50_b.cfg b/workloads/realworld/async/darknet/cfg/resnet50_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..462479918fd608c4a0761b84c7299e51081193c6 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/resnet50_b.cfg @@ -0,0 +1,510 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/async/darknet/cfg/resnet50_t.cfg b/workloads/realworld/async/darknet/cfg/resnet50_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..462479918fd608c4a0761b84c7299e51081193c6 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/resnet50_t.cfg @@ -0,0 +1,510 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/async/darknet/cfg/resnext101-32x4d.cfg b/workloads/realworld/async/darknet/cfg/resnext101-32x4d.cfg new file mode 100644 index 0000000000000000000000000000000000000000..8538ccc3daee2e3de589eb4e2edf868340d4924b --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/resnext101-32x4d.cfg @@ -0,0 +1,1053 @@ +[net] +# Training +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/async/darknet/cfg/resnext152-32x4d.cfg b/workloads/realworld/async/darknet/cfg/resnext152-32x4d.cfg new file mode 100644 index 0000000000000000000000000000000000000000..48279fd28eb0dbe23c7b0c593f67051cb6a62374 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/resnext152-32x4d.cfg @@ -0,0 +1,1562 @@ +[net] +# Training +# batch=128 +# subdivisions=16 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/async/darknet/cfg/resnext50.cfg b/workloads/realworld/async/darknet/cfg/resnext50.cfg new file mode 100644 index 0000000000000000000000000000000000000000..12aebdf6fbd48bde40ee22c4257e06f2e0cf46eb --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/resnext50.cfg @@ -0,0 +1,523 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/async/darknet/cfg/rnn.cfg b/workloads/realworld/async/darknet/cfg/rnn.cfg new file mode 100644 index 0000000000000000000000000000000000000000..61b202f3a441b701f76d9b007c6276467c639e11 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/rnn.cfg @@ -0,0 +1,38 @@ +[net] +subdivisions=1 +inputs=256 +batch = 1 +momentum=0.9 +decay=0.001 +max_batches = 2000 +time_steps=1 +learning_rate=0.1 +policy=steps +steps=1000,1500 +scales=.1,.1 + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[connected] +output=256 +activation=leaky + +[softmax] + + diff --git a/workloads/realworld/async/darknet/cfg/rnn.train.cfg b/workloads/realworld/async/darknet/cfg/rnn.train.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b9748990aceaa85cc2e43358073114606725dcbd --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/rnn.train.cfg @@ -0,0 +1,38 @@ +[net] +subdivisions=1 +inputs=256 +batch = 128 +momentum=0.9 +decay=0.001 +max_batches = 2000 +time_steps=576 +learning_rate=0.1 +policy=steps +steps=1000,1500 +scales=.1,.1 + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[connected] +output=256 +activation=leaky + +[softmax] + + diff --git a/workloads/realworld/async/darknet/cfg/strided.cfg b/workloads/realworld/async/darknet/cfg/strided.cfg new file mode 100644 index 0000000000000000000000000000000000000000..2f745085adc268a3e99bd7895bd4dda28227bffd --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/strided.cfg @@ -0,0 +1,182 @@ +[net] +batch=128 +subdivisions=4 +height=256 +width=256 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +policy=steps +scales=.1,.1,.1 +steps=200000,300000,400000 +max_batches=800000 + + +[crop] +crop_height=224 +crop_width=224 +flip=1 +angle=0 +saturation=1 +exposure=1 +shift=.2 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=192 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=1024 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=ramp + +[maxpool] +size=3 +stride=2 + +[connected] +output=4096 +activation=ramp + +[dropout] +probability=0.5 + +[connected] +output=1000 +activation=ramp + +[softmax] + diff --git a/workloads/realworld/async/darknet/cfg/t1.test.cfg b/workloads/realworld/async/darknet/cfg/t1.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b3628114e048dc78f4797342afd95a757c81c977 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/t1.test.cfg @@ -0,0 +1,117 @@ +[net] +batch=1 +subdivisions=1 +height=224 +width=224 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,20000,30000 +scales=2.5,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[connected] +output= 1470 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/async/darknet/cfg/tiny.cfg b/workloads/realworld/async/darknet/cfg/tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..f97327cfceebf868998bf2bb16224bd0994ebd82 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/tiny.cfg @@ -0,0 +1,174 @@ +[net] +# Train +batch=128 +subdivisions=1 +# Test +# batch=1 +# subdivisions=1 +height=224 +width=224 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=320 + +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + + diff --git a/workloads/realworld/async/darknet/cfg/vgg-16.cfg b/workloads/realworld/async/darknet/cfg/vgg-16.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c73b17b6ccfdcc9cae9b67591b662571463569ab --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/vgg-16.cfg @@ -0,0 +1,157 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +learning_rate=0.00001 +momentum=0.9 +decay=0.0005 + +[crop] +crop_height=224 +crop_width=224 +flip=1 +exposure=1 +saturation=1 +angle=0 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=1000 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/async/darknet/cfg/vgg-conv.cfg b/workloads/realworld/async/darknet/cfg/vgg-conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..21e1d724c9418107f9cf82f9bffb9ae64d3e2084 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/vgg-conv.cfg @@ -0,0 +1,121 @@ +[net] +batch=1 +subdivisions=1 +width=224 +height=224 +channels=3 +learning_rate=0.00001 +momentum=0.9 +decay=0.0005 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + diff --git a/workloads/realworld/async/darknet/cfg/voc.data b/workloads/realworld/async/darknet/cfg/voc.data new file mode 100644 index 0000000000000000000000000000000000000000..7807b5d2a8fd0f855a8c68e82c064dc320551da1 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/voc.data @@ -0,0 +1,6 @@ +classes= 20 +train = /home/pjreddie/data/voc/train.txt +valid = /home/pjreddie/data/voc/2007_test.txt +names = data/voc.names +backup = backup + diff --git a/workloads/realworld/async/darknet/cfg/writing.cfg b/workloads/realworld/async/darknet/cfg/writing.cfg new file mode 100644 index 0000000000000000000000000000000000000000..1ed899bcd63d6354e8320ace7e7f513ba1174886 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/writing.cfg @@ -0,0 +1,41 @@ +[net] +batch=128 +subdivisions=2 +height=256 +width=256 +channels=3 +learning_rate=0.00000001 +momentum=0.9 +decay=0.0005 +seen=0 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1 +size=3 +stride=1 +pad=1 +activation=logistic + +[cost] + diff --git a/workloads/realworld/async/darknet/cfg/yolo9000.cfg b/workloads/realworld/async/darknet/cfg/yolo9000.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e745f78a6e37611fb0f13c2d848c292cea1a89d3 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/yolo9000.cfg @@ -0,0 +1,218 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +batch=1 +subdivisions=1 +height=544 +width=544 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +hue=.1 +saturation=.75 +exposure=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=28269 +size=1 +stride=1 +pad=1 +activation=linear + +[region] +anchors = 0.77871, 1.14074, 3.00525, 4.31277, 9.22725, 9.61974 +bias_match=1 +classes=9418 +coords=4 +num=3 +softmax=1 +jitter=.2 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +thresh = .6 +absolute=1 +random=1 + +tree=data/9k.tree +map = data/coco9k.map diff --git a/workloads/realworld/async/darknet/cfg/yolov1-tiny.cfg b/workloads/realworld/async/darknet/cfg/yolov1-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a5e7b4920289ccb507a3a0356a33362bc7633581 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/yolov1-tiny.cfg @@ -0,0 +1,130 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 + +saturation=.75 +exposure=.75 +hue = .1 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,800,20000,30000 +scales=2.5,2,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[connected] +output= 1470 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/async/darknet/cfg/yolov1.cfg b/workloads/realworld/async/darknet/cfg/yolov1.cfg new file mode 100644 index 0000000000000000000000000000000000000000..06cf6e676170e41d24e63ec08d7b27a31c411718 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/yolov1.cfg @@ -0,0 +1,261 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 +saturation=1.5 +exposure=1.5 +hue=.1 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,20000,30000 +scales=2.5,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[local] +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[dropout] +probability=.5 + +[connected] +output= 1715 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=3 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/async/darknet/cfg/yolov2-tiny-voc.cfg b/workloads/realworld/async/darknet/cfg/yolov2-tiny-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c4c127cdd352bd98b3b7a3336d5c3b2efc6fadcd --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/yolov2-tiny-voc.cfg @@ -0,0 +1,138 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +max_batches = 40200 +policy=steps +steps=-1,100,20000,30000 +scales=.1,10,.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=125 +activation=linear + +[region] +anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 +bias_match=1 +classes=20 +coords=4 +num=5 +softmax=1 +jitter=.2 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/async/darknet/cfg/yolov2-tiny.cfg b/workloads/realworld/async/darknet/cfg/yolov2-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..81d0ac45d6dca10f50875bfe85f7496ded8e0f63 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/yolov2-tiny.cfg @@ -0,0 +1,139 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=425 +activation=linear + +[region] +anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 +bias_match=1 +classes=80 +coords=4 +num=5 +softmax=1 +jitter=.2 +rescore=0 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/async/darknet/cfg/yolov2-voc.cfg b/workloads/realworld/async/darknet/cfg/yolov2-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..dbf2de281c1200cb4889409c616e775080823268 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/yolov2-voc.cfg @@ -0,0 +1,258 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=416 +width=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 80200 +policy=steps +steps=40000,60000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[route] +layers=-9 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=64 +activation=leaky + +[reorg] +stride=2 + +[route] +layers=-1,-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=125 +activation=linear + + +[region] +anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 +bias_match=1 +classes=20 +coords=4 +num=5 +softmax=1 +jitter=.3 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/async/darknet/cfg/yolov2.cfg b/workloads/realworld/async/darknet/cfg/yolov2.cfg new file mode 100644 index 0000000000000000000000000000000000000000..088edf81573e83c59edd7137cbc07b6fe1433591 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/yolov2.cfg @@ -0,0 +1,258 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[route] +layers=-9 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=64 +activation=leaky + +[reorg] +stride=2 + +[route] +layers=-1,-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=425 +activation=linear + + +[region] +anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 +bias_match=1 +classes=80 +coords=4 +num=5 +softmax=1 +jitter=.3 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/async/darknet/cfg/yolov3-openimages.cfg b/workloads/realworld/async/darknet/cfg/yolov3-openimages.cfg new file mode 100644 index 0000000000000000000000000000000000000000..65d241a74c4c4995dbd997b1750575a83b0a17d4 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/yolov3-openimages.cfg @@ -0,0 +1,789 @@ +[net] +# Testing + batch=1 + subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=5000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/async/darknet/cfg/yolov3-spp.cfg b/workloads/realworld/async/darknet/cfg/yolov3-spp.cfg new file mode 100644 index 0000000000000000000000000000000000000000..4ad2a052d88328a79cff5686ff4dd1df6993a2fd --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/yolov3-spp.cfg @@ -0,0 +1,822 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 + +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/async/darknet/cfg/yolov3-tiny.cfg b/workloads/realworld/async/darknet/cfg/yolov3-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..cfca3cfa6415b7b61eae238aa71107dedbe5d607 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/yolov3-tiny.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/async/darknet/cfg/yolov3-tiny_b.cfg b/workloads/realworld/async/darknet/cfg/yolov3-tiny_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..0d8ded69bcf909f4cca02ab202cc99b1800a1562 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/yolov3-tiny_b.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/async/darknet/cfg/yolov3-tiny_t.cfg b/workloads/realworld/async/darknet/cfg/yolov3-tiny_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..0d8ded69bcf909f4cca02ab202cc99b1800a1562 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/yolov3-tiny_t.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/async/darknet/cfg/yolov3-voc.cfg b/workloads/realworld/async/darknet/cfg/yolov3-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..3f3e8dfb31b7103cf7ca00cd0bef83d6d426bb8d --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/yolov3-voc.cfg @@ -0,0 +1,785 @@ +[net] +# Testing + batch=1 + subdivisions=1 +# Training +# batch=64 +# subdivisions=16 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 50200 +policy=steps +steps=40000,45000 +scales=.1,.1 + + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/async/darknet/cfg/yolov3.cfg b/workloads/realworld/async/darknet/cfg/yolov3.cfg new file mode 100644 index 0000000000000000000000000000000000000000..938ffff23f106d65290faae217f6a9b0a715c023 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/yolov3.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/async/darknet/cfg/yolov3_b.cfg b/workloads/realworld/async/darknet/cfg/yolov3_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a86d73b42225ef65d8ff8692b0d72827ba3a8484 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/yolov3_b.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/async/darknet/cfg/yolov3_t.cfg b/workloads/realworld/async/darknet/cfg/yolov3_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a86d73b42225ef65d8ff8692b0d72827ba3a8484 --- /dev/null +++ b/workloads/realworld/async/darknet/cfg/yolov3_t.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/async/darknet/examples/art.c b/workloads/realworld/async/darknet/examples/art.c new file mode 100644 index 0000000000000000000000000000000000000000..932688e7b9ecbfd1a359a5d373dddf52815da9bb --- /dev/null +++ b/workloads/realworld/async/darknet/examples/art.c @@ -0,0 +1,59 @@ +#include "darknet.h" + +#include + +void demo_art(char *cfgfile, char *weightfile, int cam_index) +{ +#ifdef OPENCV + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + + void * cap = open_video_stream(0, cam_index, 0,0,0); + + char *window = "ArtJudgementBot9000!!!"; + if(!cap) error("Couldn't connect to webcam.\n"); + int i; + int idx[] = {37, 401, 434}; + int n = sizeof(idx)/sizeof(idx[0]); + + while(1){ + image in = get_image_from_stream(cap); + image in_s = resize_image(in, net->w, net->h); + + float *p = network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + + float score = 0; + for(i = 0; i < n; ++i){ + float s = p[idx[i]]; + if (s > score) score = s; + } + score = score; + printf("I APPRECIATE THIS ARTWORK: %10.7f%%\n", score*100); + printf("["); + int upper = 30; + for(i = 0; i < upper; ++i){ + printf("%c", ((i+.5) < score*upper) ? 219 : ' '); + } + printf("]\n"); + + show_image(in, window, 1); + free_image(in_s); + free_image(in); + } +#endif +} + + +void run_art(int argc, char **argv) +{ + int cam_index = find_int_arg(argc, argv, "-c", 0); + char *cfg = argv[2]; + char *weights = argv[3]; + demo_art(cfg, weights, cam_index); +} + diff --git a/workloads/realworld/async/darknet/examples/attention.c b/workloads/realworld/async/darknet/examples/attention.c new file mode 100644 index 0000000000000000000000000000000000000000..cd1e579d375be8ffed5620c70180f0a59a927159 --- /dev/null +++ b/workloads/realworld/async/darknet/examples/attention.c @@ -0,0 +1,459 @@ +#include "darknet.h" + +#include +#include + +void extend_data_truth(data *d, int n, float val) +{ + int i, j; + for(i = 0; i < d->y.rows; ++i){ + d->y.vals[i] = realloc(d->y.vals[i], (d->y.cols+n)*sizeof(float)); + for(j = 0; j < n; ++j){ + d->y.vals[i][d->y.cols + j] = val; + } + } + d->y.cols += n; +} + +matrix network_loss_data(network *net, data test) +{ + int i,b; + int k = 1; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.cols, sizeof(float)); + float *y = calloc(net->batch*test.y.cols, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + memcpy(y+b*test.y.cols, test.y.vals[i+b], test.y.cols*sizeof(float)); + } + + network orig = *net; + net->input = X; + net->truth = y; + net->train = 0; + net->delta = 0; + forward_network(net); + *net = orig; + + float *delta = net->layers[net->n-1].output; + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + int t = max_index(y + b*test.y.cols, 1000); + float err = sum_array(delta + b*net->outputs, net->outputs); + pred.vals[i+b][0] = -err; + //pred.vals[i+b][0] = 1-delta[b*net->outputs + t]; + } + } + free(X); + free(y); + return pred; +} + +void train_attention(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i, j; + + float avg_cls_loss = -1; + float avg_att_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *train_list = option_find_str(options, "train", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + + char **labels = get_labels(label_list); + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + double time; + + int divs=3; + int size=2; + + load_args args = {0}; + args.w = divs*net->w/size; + args.h = divs*net->h/size; + args.size = divs*net->w/size; + args.threads = 32; + args.hierarchy = net->hierarchy; + + args.min = net->min_ratio*args.w; + args.max = net->max_ratio*args.w; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + args.paths = paths; + args.classes = classes; + args.n = imgs; + args.m = N; + args.labels = labels; + args.type = CLASSIFICATION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + data resized = resize_data(train, net->w, net->h); + extend_data_truth(&resized, divs*divs, 0); + data *tiles = tile_data(train, divs, size); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float aloss = 0; + float closs = 0; + int z; + for (i = 0; i < divs*divs/ngpus; ++i) { +#pragma omp parallel for + for(j = 0; j < ngpus; ++j){ + int index = i*ngpus + j; + extend_data_truth(tiles+index, divs*divs, SECRET_NUM); + matrix deltas = network_loss_data(nets[j], tiles[index]); + for(z = 0; z < resized.y.rows; ++z){ + resized.y.vals[z][train.y.cols + index] = deltas.vals[z][0]; + } + free_matrix(deltas); + } + } + int *inds = calloc(resized.y.rows, sizeof(int)); + for(z = 0; z < resized.y.rows; ++z){ + int index = max_index(resized.y.vals[z] + train.y.cols, divs*divs); + inds[z] = index; + for(i = 0; i < divs*divs; ++i){ + resized.y.vals[z][train.y.cols + i] = (i == index)? 1 : 0; + } + } + data best = select_data(tiles, inds); + free(inds); + #ifdef GPU + if (ngpus == 1) { + closs = train_network(net, best); + } else { + closs = train_networks(nets, ngpus, best, 4); + } + #endif + for (i = 0; i < divs*divs; ++i) { + printf("%.2f ", resized.y.vals[0][train.y.cols + i]); + if((i+1)%divs == 0) printf("\n"); + free_data(tiles[i]); + } + free_data(best); + printf("\n"); + image im = float_to_image(64,64,3,resized.X.vals[0]); + //show_image(im, "orig"); + //cvWaitKey(100); + /* + image im1 = float_to_image(64,64,3,tiles[i].X.vals[0]); + image im2 = float_to_image(64,64,3,resized.X.vals[0]); + show_image(im1, "tile"); + show_image(im2, "res"); + */ +#ifdef GPU + if (ngpus == 1) { + aloss = train_network(net, resized); + } else { + aloss = train_networks(nets, ngpus, resized, 4); + } +#endif + for(i = 0; i < divs*divs; ++i){ + printf("%f ", nets[0]->output[1000 + i]); + if ((i+1) % divs == 0) printf("\n"); + } + printf("\n"); + + free_data(resized); + free_data(train); + if(avg_cls_loss == -1) avg_cls_loss = closs; + if(avg_att_loss == -1) avg_att_loss = aloss; + avg_cls_loss = avg_cls_loss*.9 + closs*.1; + avg_att_loss = avg_att_loss*.9 + aloss*.1; + + printf("%ld, %.3f: Att: %f, %f avg, Class: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, aloss, avg_att_loss, closs, avg_cls_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + pthread_join(load_thread, 0); + + free_network(net); + free_ptrs((void**)labels, classes); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_attention_single(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *leaf_list = option_find_str(options, "leaves", 0); + if(leaf_list) change_leaves(net->hierarchy, leaf_list); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + int divs = 4; + int size = 2; + int extra = 0; + float *avgs = calloc(classes, sizeof(float)); + int *inds = calloc(divs*divs, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w*divs/size); + image crop = crop_image(resized, (resized.w - net->w*divs/size)/2, (resized.h - net->h*divs/size)/2, net->w*divs/size, net->h*divs/size); + image rcrop = resize_image(crop, net->w, net->h); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, rcrop.data); + //pred[classes + 56] = 0; + for(j = 0; j < divs*divs; ++j){ + printf("%.2f ", pred[classes + j]); + if((j+1)%divs == 0) printf("\n"); + } + printf("\n"); + copy_cpu(classes, pred, 1, avgs, 1); + top_k(pred + classes, divs*divs, divs*divs, inds); + show_image(crop, "crop"); + for(j = 0; j < extra; ++j){ + int index = inds[j]; + int row = index / divs; + int col = index % divs; + int y = row * crop.h / divs - (net->h - crop.h/divs)/2; + int x = col * crop.w / divs - (net->w - crop.w/divs)/2; + printf("%d %d %d %d\n", row, col, y, x); + image tile = crop_image(crop, x, y, net->w, net->h); + float *pred = network_predict(net, tile.data); + axpy_cpu(classes, 1., pred, 1, avgs, 1); + show_image(tile, "tile"); + //cvWaitKey(10); + } + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + if(rcrop.data != resized.data) free_image(rcrop); + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_attention_multi(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + int scales[] = {224, 288, 320, 352, 384}; + int nscales = sizeof(scales)/sizeof(scales[0]); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + float *pred = calloc(classes, sizeof(float)); + image im = load_image_color(paths[i], 0, 0); + for(j = 0; j < nscales; ++j){ + image r = resize_min(im, scales[j]); + resize_network(net, r.w, r.h); + float *p = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + flip_image(r); + p = network_predict(net, r.data); + axpy_cpu(classes, 1, p, 1, pred, 1); + if(r.data != im.data) free_image(r); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void predict_attention(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + if(top == 0) top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = letterbox_image(im, net->w, net->h); + //resize_network(&net, r.w, r.h); + //printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + float *predictions = network_predict(net, X); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root"); + //else printf("%s: %f\n",names[index], predictions[index]); + printf("%5.2f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void run_attention(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int ngpus; + int *gpus = read_intlist(gpu_list, &ngpus, gpu_index); + + + int top = find_int_arg(argc, argv, "-t", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + char *layer_s = (argc > 7) ? argv[7]: 0; + if(0==strcmp(argv[2], "predict")) predict_attention(data, cfg, weights, filename, top); + else if(0==strcmp(argv[2], "train")) train_attention(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) validate_attention_single(data, cfg, weights); + else if(0==strcmp(argv[2], "validmulti")) validate_attention_multi(data, cfg, weights); +} + + diff --git a/workloads/realworld/async/darknet/examples/captcha.c b/workloads/realworld/async/darknet/examples/captcha.c new file mode 100644 index 0000000000000000000000000000000000000000..41d6d07c30801b35da34c05984be488e6f6767e9 --- /dev/null +++ b/workloads/realworld/async/darknet/examples/captcha.c @@ -0,0 +1,353 @@ +#include "darknet.h" + +void fix_data_captcha(data d, int mask) +{ + matrix labels = d.y; + int i, j; + for(i = 0; i < d.y.rows; ++i){ + for(j = 0; j < d.y.cols; j += 2){ + if (mask){ + if(!labels.vals[i][j]){ + labels.vals[i][j] = SECRET_NUM; + labels.vals[i][j+1] = SECRET_NUM; + }else if(labels.vals[i][j+1]){ + labels.vals[i][j] = 0; + } + } else{ + if (labels.vals[i][j]) { + labels.vals[i][j+1] = 0; + } else { + labels.vals[i][j+1] = 1; + } + } + } + } +} + +void train_captcha(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + int i = *net->seen/imgs; + int solved = 1; + list *plist; + char **labels = get_labels("/data/captcha/reimgs.labels.list"); + if (solved){ + plist = get_paths("/data/captcha/reimgs.solved.list"); + }else{ + plist = get_paths("/data/captcha/reimgs.raw.list"); + } + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = 26; + args.n = imgs; + args.m = plist->size; + args.labels = labels; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + + load_thread = load_data_in_thread(args); + while(1){ + ++i; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + fix_data_captcha(train, solved); + + /* + image im = float_to_image(256, 256, 3, train.X.vals[114]); + show_image(im, "training"); + cvWaitKey(0); + */ + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net->seen); + free_data(train); + if(i%100==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } + } +} + +void test_captcha(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + int i = 0; + char **names = get_labels("/data/captcha/reimgs.labels.list"); + char buff[256]; + char *input = buff; + int indexes[26]; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + //printf("Enter Image Path: "); + //fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, net->w, net->h); + float *X = im.data; + float *predictions = network_predict(net, X); + top_predictions(net, 26, indexes); + //printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < 26; ++i){ + int index = indexes[i]; + if(i != 0) printf(", "); + printf("%s %f", names[index], predictions[index]); + } + printf("\n"); + fflush(stdout); + free_image(im); + if (filename) break; + } +} + +void valid_captcha(char *cfgfile, char *weightfile, char *filename) +{ + char **labels = get_labels("/data/captcha/reimgs.labels.list"); + network *net = load_network(cfgfile, weightfile, 0); + list *plist = get_paths("/data/captcha/reimgs.fg.list"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + int outputs = net->outputs; + + set_batch_network(net, 1); + srand(2222222); + int i, j; + for(i = 0; i < N; ++i){ + if (i%100 == 0) fprintf(stderr, "%d\n", i); + image im = load_image_color(paths[i], net->w, net->h); + float *X = im.data; + float *predictions = network_predict(net, X); + //printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + int truth = -1; + for(j = 0; j < 13; ++j){ + if (strstr(paths[i], labels[j])) truth = j; + } + if (truth == -1){ + fprintf(stderr, "bad: %s\n", paths[i]); + return; + } + printf("%d, ", truth); + for(j = 0; j < outputs; ++j){ + if (j != 0) printf(", "); + printf("%f", predictions[j]); + } + printf("\n"); + fflush(stdout); + free_image(im); + if (filename) break; + } +} + +/* + void train_captcha(char *cfgfile, char *weightfile) + { + float avg_loss = -1; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + int i = net->seen/imgs; + list *plist = get_paths("/data/captcha/train.auto5"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + while(1){ + ++i; + time=clock(); + data train = load_data_captcha(paths, imgs, plist->size, 10, 200, 60); + translate_data_rows(train, -128); + scale_data_rows(train, 1./128); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + net->seen += imgs; + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net->seen); + free_data(train); + if(i%10==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } + } + } + + void decode_captcha(char *cfgfile, char *weightfile) + { + setbuf(stdout, NULL); + srand(time(0)); + network net = parse_network_cfg(cfgfile); + set_batch_network(&net, 1); + if(weightfile){ + load_weights(&net, weightfile); + } + char filename[256]; + while(1){ + printf("Enter filename: "); + fgets(filename, 256, stdin); + strtok(filename, "\n"); + image im = load_image_color(filename, 300, 57); + scale_image(im, 1./255.); + float *X = im.data; + float *predictions = network_predict(net, X); + image out = float_to_image(300, 57, 1, predictions); + show_image(out, "decoded"); +#ifdef OPENCV +cvWaitKey(0); +#endif +free_image(im); +} +} + +void encode_captcha(char *cfgfile, char *weightfile) +{ +float avg_loss = -1; +srand(time(0)); +char *base = basecfg(cfgfile); +printf("%s\n", base); +network net = parse_network_cfg(cfgfile); +if(weightfile){ + load_weights(&net, weightfile); +} +printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); +int imgs = 1024; +int i = net->seen/imgs; +list *plist = get_paths("/data/captcha/encode.list"); +char **paths = (char **)list_to_array(plist); +printf("%d\n", plist->size); +clock_t time; +while(1){ + ++i; + time=clock(); + data train = load_data_captcha_encode(paths, imgs, plist->size, 300, 57); + scale_data_rows(train, 1./255); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + net->seen += imgs; + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net->seen); + free_matrix(train.X); + if(i%100==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } +} +} + +void validate_captcha(char *cfgfile, char *weightfile) +{ + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + int numchars = 37; + list *plist = get_paths("/data/captcha/solved.hard"); + char **paths = (char **)list_to_array(plist); + int imgs = plist->size; + data valid = load_data_captcha(paths, imgs, 0, 10, 200, 60); + translate_data_rows(valid, -128); + scale_data_rows(valid, 1./128); + matrix pred = network_predict_data(net, valid); + int i, k; + int correct = 0; + int total = 0; + int accuracy = 0; + for(i = 0; i < imgs; ++i){ + int allcorrect = 1; + for(k = 0; k < 10; ++k){ + char truth = int_to_alphanum(max_index(valid.y.vals[i]+k*numchars, numchars)); + char prediction = int_to_alphanum(max_index(pred.vals[i]+k*numchars, numchars)); + if (truth != prediction) allcorrect=0; + if (truth != '.' && truth == prediction) ++correct; + if (truth != '.' || truth != prediction) ++total; + } + accuracy += allcorrect; + } + printf("Word Accuracy: %f, Char Accuracy %f\n", (float)accuracy/imgs, (float)correct/total); + free_data(valid); +} + +void test_captcha(char *cfgfile, char *weightfile) +{ + setbuf(stdout, NULL); + srand(time(0)); + //char *base = basecfg(cfgfile); + //printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + set_batch_network(&net, 1); + if(weightfile){ + load_weights(&net, weightfile); + } + char filename[256]; + while(1){ + //printf("Enter filename: "); + fgets(filename, 256, stdin); + strtok(filename, "\n"); + image im = load_image_color(filename, 200, 60); + translate_image(im, -128); + scale_image(im, 1/128.); + float *X = im.data; + float *predictions = network_predict(net, X); + print_letters(predictions, 10); + free_image(im); + } +} + */ +void run_captcha(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_captcha(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights, filename); + else if(0==strcmp(argv[2], "valid")) valid_captcha(cfg, weights, filename); + //if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "encode")) encode_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "decode")) decode_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "valid")) validate_captcha(cfg, weights); +} + diff --git a/workloads/realworld/async/darknet/examples/cifar.c b/workloads/realworld/async/darknet/examples/cifar.c new file mode 100644 index 0000000000000000000000000000000000000000..a5f5f240b9f680acd9b5890042300d3b683e0f82 --- /dev/null +++ b/workloads/realworld/async/darknet/examples/cifar.c @@ -0,0 +1,251 @@ +#include "darknet.h" + +void train_cifar(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + int classes = 10; + int N = 50000; + + char **labels = get_labels("data/cifar/labels.txt"); + int epoch = (*net->seen)/N; + data train = load_all_cifar10(); + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + clock_t time=clock(); + + float loss = train_network_sgd(net, train, 1); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)labels, classes); + free(base); + free_data(train); +} + +void train_cifar_distill(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + int classes = 10; + int N = 50000; + + char **labels = get_labels("data/cifar/labels.txt"); + int epoch = (*net->seen)/N; + + data train = load_all_cifar10(); + matrix soft = csv_to_matrix("results/ensemble.csv"); + + float weight = .9; + scale_matrix(soft, weight); + scale_matrix(train.y, 1. - weight); + matrix_add_matrix(soft, train.y); + + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + clock_t time=clock(); + + float loss = train_network_sgd(net, train, 1); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)labels, classes); + free(base); + free_data(train); +} + +void test_cifar_multi(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + float avg_acc = 0; + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + + float pred[10] = {0}; + + float *p = network_predict(net, im.data); + axpy_cpu(10, 1, p, 1, pred, 1); + flip_image(im); + p = network_predict(net, im.data); + axpy_cpu(10, 1, p, 1, pred, 1); + + int index = max_index(pred, 10); + int class = max_index(test.y.vals[i], 10); + if(index == class) avg_acc += 1; + free_image(im); + printf("%4d: %.2f%%\n", i, 100.*avg_acc/(i+1)); + } +} + +void test_cifar(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + clock_t time; + float avg_acc = 0; + float avg_top5 = 0; + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + time=clock(); + + float *acc = network_accuracies(net, test, 2); + avg_acc += acc[0]; + avg_top5 += acc[1]; + printf("top1: %f, %lf seconds, %d images\n", avg_acc, sec(clock()-time), test.X.rows); + free_data(test); +} + +void extract_cifar() +{ +char *labels[] = {"airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"}; + int i; + data train = load_all_cifar10(); + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + for(i = 0; i < train.X.rows; ++i){ + image im = float_to_image(32, 32, 3, train.X.vals[i]); + int class = max_index(train.y.vals[i], 10); + char buff[256]; + sprintf(buff, "data/cifar/train/%d_%s",i,labels[class]); + save_image_options(im, buff, PNG, 0); + } + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + int class = max_index(test.y.vals[i], 10); + char buff[256]; + sprintf(buff, "data/cifar/test/%d_%s",i,labels[class]); + save_image_options(im, buff, PNG, 0); + } +} + +void test_cifar_csv(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + matrix pred = network_predict_data(net, test); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + flip_image(im); + } + matrix pred2 = network_predict_data(net, test); + scale_matrix(pred, .5); + scale_matrix(pred2, .5); + matrix_add_matrix(pred2, pred); + + matrix_to_csv(pred); + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); +} + +void test_cifar_csvtrain(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + srand(time(0)); + + data test = load_all_cifar10(); + + matrix pred = network_predict_data(net, test); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + flip_image(im); + } + matrix pred2 = network_predict_data(net, test); + scale_matrix(pred, .5); + scale_matrix(pred2, .5); + matrix_add_matrix(pred2, pred); + + matrix_to_csv(pred); + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); +} + +void eval_cifar_csv() +{ + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + matrix pred = csv_to_matrix("results/combined.csv"); + fprintf(stderr, "%d %d\n", pred.rows, pred.cols); + + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); + free_matrix(pred); +} + + +void run_cifar(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_cifar(cfg, weights); + else if(0==strcmp(argv[2], "extract")) extract_cifar(); + else if(0==strcmp(argv[2], "distill")) train_cifar_distill(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_cifar(cfg, weights); + else if(0==strcmp(argv[2], "multi")) test_cifar_multi(cfg, weights); + else if(0==strcmp(argv[2], "csv")) test_cifar_csv(cfg, weights); + else if(0==strcmp(argv[2], "csvtrain")) test_cifar_csvtrain(cfg, weights); + else if(0==strcmp(argv[2], "eval")) eval_cifar_csv(); +} + + diff --git a/workloads/realworld/async/darknet/examples/classifier.c b/workloads/realworld/async/darknet/examples/classifier.c new file mode 100644 index 0000000000000000000000000000000000000000..e8779836dc01a2e476104132acd2dbfdd6ed29aa --- /dev/null +++ b/workloads/realworld/async/darknet/examples/classifier.c @@ -0,0 +1,1123 @@ +#include "darknet.h" + +#include +#include + +float *get_regression_values(char **labels, int n) +{ + float *v = calloc(n, sizeof(float)); + int i; + for(i = 0; i < n; ++i){ + char *p = strchr(labels[i], ' '); + *p = 0; + v[i] = atof(p+1); + } + return v; +} + +void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + // Ruihao + int tag = option_find_int_quiet(options, "tag", 0); + + char *backup_directory_cfg = option_find_str(options, "backup", "/backup/"); + char *label_list_cfg = option_find_str(options, "labels", "data/labels.list"); + char *train_list_cfg = option_find_str(options, "train", "data/train.list"); + + char *env = getenv("UVMAsyncBench_BASE"); + char backup_directory[256]; + char label_list[256]; + char train_list[256]; + sprintf(backup_directory, "%s/%s", env, backup_directory_cfg); + sprintf(label_list, "%s/%s", env, label_list_cfg); + sprintf(train_list, "%s/%s", env, train_list_cfg); + // Ruihao + char *tree = option_find_str(options, "tree", 0); + if (tree) net->hierarchy = read_tree(tree); + int classes = option_find_int(options, "classes", 2); + + char **labels = 0; + if(!tag){ + labels = get_labels(label_list); + } + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + double time; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.hierarchy = net->hierarchy; + + args.min = net->min_ratio*net->w; + args.max = net->max_ratio*net->w; + printf("%d %d\n", args.min, args.max); + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + + args.paths = paths; + args.classes = classes; + args.n = imgs; + args.m = N; + args.labels = labels; + if (tag){ + args.type = TAG_DATA; + } else { + args.type = CLASSIFICATION_DATA; + } + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int count = 0; + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + if(net->random && count++%40 == 0){ + printf("Resizing\n"); + int dim = (rand() % 11 + 4) * 32; + //if (get_current_batch(net)+200 > net->max_batches) dim = 608; + //int dim = (rand() % 4 + 16) * 32; + printf("%d\n", dim); + args.w = dim; + args.h = dim; + args.size = dim; + args.min = net->min_ratio*dim; + args.max = net->max_ratio*dim; + printf("%d %d\n", args.min, args.max); + + pthread_join(load_thread, 0); + train = buffer; + free_data(train); + load_thread = load_data(args); + + for(i = 0; i < ngpus; ++i){ + resize_network(nets[i], dim, dim); + } + net = nets[0]; + } + time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + pthread_join(load_thread, 0); + + // free_network(net); + if(labels) free_ptrs((void**)labels, classes); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_classifier_crop(char *datacfg, char *filename, char *weightfile) +{ + int i = 0; + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + clock_t time; + float avg_acc = 0; + float avg_topk = 0; + int splits = m/1000; + int num = (i+1)*m/splits - i*m/splits; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + + args.paths = paths; + args.classes = classes; + args.n = num; + args.m = 0; + args.labels = labels; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(i = 1; i <= splits; ++i){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + num = (i+1)*m/splits - i*m/splits; + char **part = paths+(i*m/splits); + if(i != splits){ + args.paths = part; + load_thread = load_data_in_thread(args); + } + printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + float *acc = network_accuracies(net, val, topk); + avg_acc += acc[0]; + avg_topk += acc[1]; + printf("%d: top 1: %f, top %d: %f, %lf seconds, %d images\n", i, avg_acc/i, topk, avg_topk/i, sec(clock()-time), val.X.rows); + free_data(val); + } +} + +void validate_classifier_10(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + int w = net->w; + int h = net->h; + int shift = 32; + image im = load_image_color(paths[i], w+shift, h+shift); + image images[10]; + images[0] = crop_image(im, -shift, -shift, w, h); + images[1] = crop_image(im, shift, -shift, w, h); + images[2] = crop_image(im, 0, 0, w, h); + images[3] = crop_image(im, -shift, shift, w, h); + images[4] = crop_image(im, shift, shift, w, h); + flip_image(im); + images[5] = crop_image(im, -shift, -shift, w, h); + images[6] = crop_image(im, shift, -shift, w, h); + images[7] = crop_image(im, 0, 0, w, h); + images[8] = crop_image(im, -shift, shift, w, h); + images[9] = crop_image(im, shift, shift, w, h); + float *pred = calloc(classes, sizeof(float)); + for(j = 0; j < 10; ++j){ + float *p = network_predict(net, images[j].data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1, 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + free_image(images[j]); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_classifier_full(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + int size = net->w; + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, size); + resize_network(net, resized.w, resized.h); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, resized.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + free_image(im); + free_image(resized); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + + +void validate_classifier_single(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *leaf_list = option_find_str(options, "leaves", 0); + if(leaf_list) change_leaves(net->hierarchy, leaf_list); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image crop = center_crop_image(im, net->w, net->h); + //grayscale_image_3c(crop); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, crop.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + free_image(im); + free_image(crop); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%s, %d, %f, %f, \n", paths[i], class, pred[0], pred[1]); + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_classifier_multi(char *datacfg, char *cfg, char *weights) +{ + int i, j; + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + //int scales[] = {224, 288, 320, 352, 384}; + int scales[] = {224, 256, 288, 320}; + int nscales = sizeof(scales)/sizeof(scales[0]); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + float *pred = calloc(classes, sizeof(float)); + image im = load_image_color(paths[i], 0, 0); + for(j = 0; j < nscales; ++j){ + image r = resize_max(im, scales[j]); + resize_network(net, r.w, r.h); + float *p = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + flip_image(r); + p = network_predict(net, r.data); + axpy_cpu(classes, 1, p, 1, pred, 1); + if(r.data != im.data) free_image(r); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int layer_num) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + int top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image orig = load_image_color(input, 0, 0); + image r = resize_min(orig, 256); + image im = crop_image(r, (r.w - 224 - 1)/2 + 1, (r.h - 224 - 1)/2 + 1, 224, 224); + float mean[] = {0.48263312050943, 0.45230225481413, 0.40099074308742}; + float std[] = {0.22590347483426, 0.22120921437787, 0.22103996251583}; + float var[3]; + var[0] = std[0]*std[0]; + var[1] = std[1]*std[1]; + var[2] = std[2]*std[2]; + + normalize_cpu(im.data, mean, var, 1, 3, im.w*im.h); + + float *X = im.data; + time=clock(); + float *predictions = network_predict(net, X); + + layer l = net->layers[layer_num]; + for(i = 0; i < l.c; ++i){ + if(l.rolling_mean) printf("%f %f %f\n", l.rolling_mean[i], l.rolling_variance[i], l.scales[i]); + } +#ifdef GPU + cuda_pull_array(l.output_gpu, l.output, l.outputs); +#endif + for(i = 0; i < l.outputs; ++i){ + printf("%f\n", l.output[i]); + } + /* + + printf("\n\nWeights\n"); + for(i = 0; i < l.n*l.size*l.size*l.c; ++i){ + printf("%f\n", l.filters[i]); + } + + printf("\n\nBiases\n"); + for(i = 0; i < l.n; ++i){ + printf("%f\n", l.biases[i]); + } + */ + + top_predictions(net, top, indexes); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + printf("%s: %f\n", names[index], predictions[index]); + } + free_image(im); + if (filename) break; + } +} + +void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *name_list_cfg = option_find_str(options, "names", 0); + char *env = getenv("UVMAsyncBench_BASE"); + char name_list[256]; + sprintf(name_list, "%s/%s", env, name_list_cfg); + // if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + // Ruihao + if(top == 0) top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = letterbox_image(im, net->w, net->h); + //image r = resize_min(im, 320); + //printf("%d %d\n", r.w, r.h); + //resize_network(net, r.w, r.h); + //printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + startCPU(); + float *predictions = network_predict(net, X); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + endCPU(); + fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root"); + //else printf("%s: %f\n",names[index], predictions[index]); + printf("%5.2f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void label_classifier(char *datacfg, char *filename, char *weightfile) +{ + int i; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "names", "data/labels.list"); + char *test_list = option_find_str(options, "test", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + + char **labels = get_labels(label_list); + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + for(i = 0; i < m; ++i){ + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + float *pred = network_predict(net, crop.data); + + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + int ind = max_index(pred, classes); + + printf("%s\n", labels[ind]); + } +} + +void csv_classifier(char *datacfg, char *cfgfile, char *weightfile) +{ + int i,j; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *test_list = option_find_str(options, "test", "data/test.list"); + int top = option_find_int(options, "top", 1); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + int *indexes = calloc(top, sizeof(int)); + + for(i = 0; i < m; ++i){ + double time = what_time_is_it_now(); + char *path = paths[i]; + image im = load_image_color(path, 0, 0); + image r = letterbox_image(im, net->w, net->h); + float *predictions = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + + printf("%s", path); + for(j = 0; j < top; ++j){ + printf("\t%d", indexes[j]); + } + printf("\n"); + + free_image(im); + free_image(r); + + fprintf(stderr, "%lf seconds, %d images, %d total\n", what_time_is_it_now() - time, i+1, m); + } +} + +void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_layer) +{ + int curr = 0; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *test_list_cfg = option_find_str(options, "test", "data/test.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char test_list[256]; + sprintf(test_list, "%s/%s", env, test_list_cfg); + // Ruihao + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + m = net->max_batches * net->batch; + free_list(plist); + + clock_t time; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = classes; + args.n = net->batch; + args.m = 0; + args.labels = 0; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(curr = net->batch; curr <= m; curr += net->batch){ + // while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + if(curr < m){ + args.paths = paths + curr; + if (curr + net->batch > m) args.n = m - curr; + load_thread = load_data_in_thread(args); + } + fprintf(stderr, "Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + + int i, j; + if (target_layer >= 0){ + //layer l = net->layers[target_layer]; + } + + // for(i = 0; i < pred.rows; ++i){ + // printf("%s", paths[curr-net->batch+i]); + // for(j = 0; j < pred.cols; ++j){ + // printf("\t%g", pred.vals[i][j]); + // } + // printf("\n"); + // } + + fprintf(stderr, "%lf seconds, %d images, %d total\n", sec(clock()-time), val.X.rows, curr); + free_matrix(pred); + free_data(val); + } +} + + +void file_output_classifier(char *datacfg, char *filename, char *weightfile, char *listfile) +{ + int i,j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + //char *label_list = option_find_str(options, "names", "data/labels.list"); + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(listfile); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + for(i = 0; i < m; ++i){ + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + + float *pred = network_predict(net, crop.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 0, 1); + + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + + printf("%s", paths[i]); + for(j = 0; j < classes; ++j){ + printf("\t%g", pred[j]); + } + printf("\n"); + } +} + + +void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + float threat = 0; + float roll = .2; + + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + int top = option_find_int(options, "top", 1); + + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + //cvNamedWindow("Threat", CV_WINDOW_NORMAL); + //cvResizeWindow("Threat", 512, 512); + float fps = 0; + int i; + + int count = 0; + + while(1){ + ++count; + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + if(!in.data) break; + image in_s = resize_image(in, net->w, net->h); + + image out = in; + int x1 = out.w / 20; + int y1 = out.h / 20; + int x2 = 2*x1; + int y2 = out.h - out.h/20; + + int border = .01*out.h; + int h = y2 - y1 - 2*border; + int w = x2 - x1 - 2*border; + + float *predictions = network_predict(net, in_s.data); + float curr_threat = 0; + if(1){ + curr_threat = predictions[0] * 0 + + predictions[1] * .6 + + predictions[2]; + } else { + curr_threat = predictions[218] + + predictions[539] + + predictions[540] + + predictions[368] + + predictions[369] + + predictions[370]; + } + threat = roll * curr_threat + (1-roll) * threat; + + draw_box_width(out, x2 + border, y1 + .02*h, x2 + .5 * w, y1 + .02*h + border, border, 0,0,0); + if(threat > .97) { + draw_box_width(out, x2 + .5 * w + border, + y1 + .02*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .02*h + 3*border, 3*border, 1,0,0); + } + draw_box_width(out, x2 + .5 * w + border, + y1 + .02*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .02*h + 3*border, .5*border, 0,0,0); + draw_box_width(out, x2 + border, y1 + .42*h, x2 + .5 * w, y1 + .42*h + border, border, 0,0,0); + if(threat > .57) { + draw_box_width(out, x2 + .5 * w + border, + y1 + .42*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .42*h + 3*border, 3*border, 1,1,0); + } + draw_box_width(out, x2 + .5 * w + border, + y1 + .42*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .42*h + 3*border, .5*border, 0,0,0); + + draw_box_width(out, x1, y1, x2, y2, border, 0,0,0); + for(i = 0; i < threat * h ; ++i){ + float ratio = (float) i / h; + float r = (ratio < .5) ? (2*(ratio)) : 1; + float g = (ratio < .5) ? 1 : 1 - 2*(ratio - .5); + draw_box_width(out, x1 + border, y2 - border - i, x2 - border, y2 - border - i, 1, r, g, 0); + } + top_predictions(net, top, indexes); + char buff[256]; + sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count); + //save_image(out, buff); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + for(i = 0; i < top; ++i){ + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + } + + if(1){ + show_image(out, "Threat", 10); + } + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + int bad_cats[] = {218, 539, 540, 1213, 1501, 1742, 1911, 2415, 4348, 19223, 368, 369, 370, 1133, 1200, 1306, 2122, 2301, 2537, 2823, 3179, 3596, 3639, 4489, 5107, 5140, 5289, 6240, 6631, 6762, 7048, 7171, 7969, 7984, 7989, 8824, 8927, 9915, 10270, 10448, 13401, 15205, 18358, 18894, 18895, 19249, 19697}; + + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + int top = option_find_int(options, "top", 1); + + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + int i; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = resize_image(in, net->w, net->h); + + float *predictions = network_predict(net, in_s.data); + top_predictions(net, top, indexes); + + printf("\033[2J"); + printf("\033[1;1H"); + + int threat = 0; + for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){ + int index = bad_cats[i]; + if(predictions[index] > .01){ + printf("Threat Detected!\n"); + threat = 1; + break; + } + } + if(!threat) printf("Scanning...\n"); + for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){ + int index = bad_cats[i]; + if(predictions[index] > .01){ + printf("%s\n", names[index]); + } + } + + show_image(in, "Threat Detection", 10); + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + +void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + image **alphabet = load_alphabet(); + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + + int w = 1280; + int h = 720; + void * cap = open_video_stream(filename, cam_index, w, h, 0); + + int top = option_find_int(options, "top", 1); + + char *label_list = option_find_str(options, "labels", 0); + char *name_list = option_find_str(options, "names", label_list); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + int i; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + + float *predictions = network_predict(net, in_s.data); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_predictions(net, top, indexes); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + int lh = in.h*.03; + int toph = 3*lh; + + float rgb[3] = {1,1,1}; + for(i = 0; i < top; ++i){ + printf("%d\n", toph); + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + + char buff[1024]; + sprintf(buff, "%3.1f%%: %s\n", predictions[index]*100, names[index]); + image label = get_label(alphabet, buff, lh); + draw_label(in, toph, lh, label, rgb); + toph += 2*lh; + free_image(label); + } + + show_image(in, base, 10); + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_classifier(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int ngpus; + int *gpus = read_intlist(gpu_list, &ngpus, gpu_index); + + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int top = find_int_arg(argc, argv, "-t", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + char *layer_s = (argc > 7) ? argv[7]: 0; + int layer = layer_s ? atoi(layer_s) : -1; + if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename, top); + else if(0==strcmp(argv[2], "fout")) file_output_classifier(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s)); + else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer); + else if(0==strcmp(argv[2], "csv")) csv_classifier(data, cfg, weights); + else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_classifier_single(data, cfg, weights); + else if(0==strcmp(argv[2], "validmulti")) validate_classifier_multi(data, cfg, weights); + else if(0==strcmp(argv[2], "valid10")) validate_classifier_10(data, cfg, weights); + else if(0==strcmp(argv[2], "validcrop")) validate_classifier_crop(data, cfg, weights); + else if(0==strcmp(argv[2], "validfull")) validate_classifier_full(data, cfg, weights); +} + + diff --git a/workloads/realworld/async/darknet/examples/coco.c b/workloads/realworld/async/darknet/examples/coco.c new file mode 100644 index 0000000000000000000000000000000000000000..6a50b89abd2abc7fb217b5118034a746f790f690 --- /dev/null +++ b/workloads/realworld/async/darknet/examples/coco.c @@ -0,0 +1,357 @@ +#include "darknet.h" + +#include + +char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"}; + +int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; + +void train_coco(char *cfgfile, char *weightfile) +{ + //char *train_images = "/home/pjreddie/data/voc/test/train.txt"; + //char *train_images = "/home/pjreddie/data/coco/train.txt"; + char *train_images = "data/coco.trainval.txt"; + //char *train_images = "data/bags.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + layer l = net->layers[net->n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + /* + image im = float_to_image(net->w, net->h, 3, train.X.vals[113]); + image copy = copy_image(im); + draw_coco(copy, train.y.vals[113], 7, "truth"); + cvWaitKey(0); + free_image(copy); + */ + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || (i < 1000 && i%100 == 0)){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +static void print_cocos(FILE *fp, int image_id, detection *dets, int num_boxes, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < num_boxes; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + float bx = xmin; + float by = ymin; + float bw = xmax - xmin; + float bh = ymax - ymin; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); + } + } +} + +int get_coco_image_id(char *filename) +{ + char *p = strrchr(filename, '_'); + return atoi(p+1); +} + +void validate_coco(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/"; + list *plist = get_paths("data/coco_val_5k.list"); + //list *plist = get_paths("/home/pjreddie/data/people-art/test.txt"); + //list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + snprintf(buff, 1024, "%s/coco_results.json", base); + FILE *fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + + int m = plist->size; + int i=0; + int t; + + float thresh = .01; + int nms = 1; + float iou_thresh = .5; + + int nthreads = 8; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.type = IMAGE_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + time_t start = time(0); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + int image_id = get_coco_image_id(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh); + print_cocos(fp, image_id, dets, l.side*l.side*l.n, classes, w, h); + free_detections(dets, nboxes); + free_image(val[t]); + free_image(val_resized[t]); + } + } + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); +} + +void validate_coco_recall(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + int side = l.side; + + int j, k; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + + float thresh = .001; + int nms = 0; + float iou_thresh = .5; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + + int nboxes = 0; + detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, side*side*l.n, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < side*side*l.n; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < side*side*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + free_detections(dets, nboxes); + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free(id); + free_image(orig); + free_image(sized); + } +} + +void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) +{ + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + layer l = net->layers[net->n-1]; + set_batch_network(net, 1); + srand(2222222); + float nms = .4; + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = resize_image(im, net->w, net->h); + float *X = sized.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + + int nboxes = 0; + detection *dets = get_network_boxes(net, 1, 1, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms); + + draw_detections(im, dets, l.side*l.side*l.n, thresh, coco_classes, alphabet, 80); + save_image(im, "prediction"); + show_image(im, "predictions", 0); + free_detections(dets, nboxes); + free_image(im); + free_image(sized); + if (filename) break; + } +} + +void run_coco(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .2); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + int avg = find_int_arg(argc, argv, "-avg", 1); + if(0==strcmp(argv[2], "test")) test_coco(cfg, weights, filename, thresh); + else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights); + else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, 80, frame_skip, prefix, avg, .5, 0,0,0,0); +} diff --git a/workloads/realworld/async/darknet/examples/darknet.c b/workloads/realworld/async/darknet/examples/darknet.c new file mode 100644 index 0000000000000000000000000000000000000000..f1c5e43b66391a9674c13a193e329cf3dfc26439 --- /dev/null +++ b/workloads/realworld/async/darknet/examples/darknet.c @@ -0,0 +1,559 @@ +#include "darknet.h" + +// #include "../../../../common/cupti_add.h" +// #include "../../../../common/cpu_timestamps.h" +// #include "cpu_timestamps.h" + +static uint64_t startCPUTime; +static uint64_t endCPUTime; + +void startCPU() +{ + struct timespec tv; + if (clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + startCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); +} + +void endCPU() +{ + struct timespec tv; + if (clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + + endCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); + // endCPUTimestamp1 = std::chrono::system_clock::now(); + printf("CPU_Times,%lu,%lu,%lu\n", startCPUTime, endCPUTime, endCPUTime - startCPUTime); +} + +#ifdef __cplusplus +extern "C" { +#endif +extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top); +extern void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen); +extern void run_yolo(int argc, char **argv); +extern void run_detector(int argc, char **argv); +extern void run_coco(int argc, char **argv); +extern void run_nightmare(int argc, char **argv); +extern void run_classifier(int argc, char **argv); +extern void run_regressor(int argc, char **argv); +extern void run_segmenter(int argc, char **argv); +extern void run_isegmenter(int argc, char **argv); +extern void run_char_rnn(int argc, char **argv); +extern void run_tag(int argc, char **argv); +extern void run_cifar(int argc, char **argv); +extern void run_go(int argc, char **argv); +extern void run_art(int argc, char **argv); +extern void run_super(int argc, char **argv); +extern void run_lsd(int argc, char **argv); +#ifdef __cplusplus +} +#endif + +void average(int argc, char *argv[]) +{ + char *cfgfile = argv[2]; + char *outfile = argv[3]; + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + network *sum = parse_network_cfg(cfgfile); + + char *weightfile = argv[4]; + load_weights(sum, weightfile); + + int i, j; + int n = argc - 5; + for(i = 0; i < n; ++i){ + weightfile = argv[i+5]; + load_weights(net, weightfile); + for(j = 0; j < net->n; ++j){ + layer l = net->layers[j]; + layer out = sum->layers[j]; + if(l.type == CONVOLUTIONAL){ + int num = l.n*l.c*l.size*l.size; + axpy_cpu(l.n, 1, l.biases, 1, out.biases, 1); + axpy_cpu(num, 1, l.weights, 1, out.weights, 1); + if(l.batch_normalize){ + axpy_cpu(l.n, 1, l.scales, 1, out.scales, 1); + axpy_cpu(l.n, 1, l.rolling_mean, 1, out.rolling_mean, 1); + axpy_cpu(l.n, 1, l.rolling_variance, 1, out.rolling_variance, 1); + } + } + if(l.type == CONNECTED){ + axpy_cpu(l.outputs, 1, l.biases, 1, out.biases, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weights, 1, out.weights, 1); + } + } + } + n = n+1; + for(j = 0; j < net->n; ++j){ + layer l = sum->layers[j]; + if(l.type == CONVOLUTIONAL){ + int num = l.n*l.c*l.size*l.size; + scal_cpu(l.n, 1./n, l.biases, 1); + scal_cpu(num, 1./n, l.weights, 1); + if(l.batch_normalize){ + scal_cpu(l.n, 1./n, l.scales, 1); + scal_cpu(l.n, 1./n, l.rolling_mean, 1); + scal_cpu(l.n, 1./n, l.rolling_variance, 1); + } + } + if(l.type == CONNECTED){ + scal_cpu(l.outputs, 1./n, l.biases, 1); + scal_cpu(l.outputs*l.inputs, 1./n, l.weights, 1); + } + } + save_weights(sum, outfile); +} + +long numops(network *net) +{ + int i; + long ops = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + ops += 2l * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w; + } else if(l.type == CONNECTED){ + ops += 2l * l.inputs * l.outputs; + } else if (l.type == RNN){ + ops += 2l * l.input_layer->inputs * l.input_layer->outputs; + ops += 2l * l.self_layer->inputs * l.self_layer->outputs; + ops += 2l * l.output_layer->inputs * l.output_layer->outputs; + } else if (l.type == GRU){ + ops += 2l * l.uz->inputs * l.uz->outputs; + ops += 2l * l.uh->inputs * l.uh->outputs; + ops += 2l * l.ur->inputs * l.ur->outputs; + ops += 2l * l.wz->inputs * l.wz->outputs; + ops += 2l * l.wh->inputs * l.wh->outputs; + ops += 2l * l.wr->inputs * l.wr->outputs; + } else if (l.type == LSTM){ + ops += 2l * l.uf->inputs * l.uf->outputs; + ops += 2l * l.ui->inputs * l.ui->outputs; + ops += 2l * l.ug->inputs * l.ug->outputs; + ops += 2l * l.uo->inputs * l.uo->outputs; + ops += 2l * l.wf->inputs * l.wf->outputs; + ops += 2l * l.wi->inputs * l.wi->outputs; + ops += 2l * l.wg->inputs * l.wg->outputs; + ops += 2l * l.wo->inputs * l.wo->outputs; + } + } + return ops; +} + +void speed(char *cfgfile, int tics) +{ + if (tics == 0) tics = 1000; + network *net = parse_network_cfg(cfgfile); + set_batch_network(net, 1); + int i; + double time=what_time_is_it_now(); + image im = make_image(net->w, net->h, net->c*net->batch); + for(i = 0; i < tics; ++i){ + network_predict(net, im.data); + } + double t = what_time_is_it_now() - time; + long ops = numops(net); + printf("\n%d evals, %f Seconds\n", tics, t); + printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); + printf("FLOPS: %.2f Bn\n", (float)ops/1000000000.*tics/t); + printf("Speed: %f sec/eval\n", t/tics); + printf("Speed: %f Hz\n", tics/t); +} + +void operations(char *cfgfile) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + long ops = numops(net); + printf("Floating Point Operations: %ld\n", ops); + printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); +} + +void oneoff(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + int oldn = net->layers[net->n - 2].n; + int c = net->layers[net->n - 2].c; + scal_cpu(oldn*c, .1, net->layers[net->n - 2].weights, 1); + scal_cpu(oldn, 0, net->layers[net->n - 2].biases, 1); + net->layers[net->n - 2].n = 11921; + net->layers[net->n - 2].biases += 5; + net->layers[net->n - 2].weights += 5*c; + if(weightfile){ + load_weights(net, weightfile); + } + net->layers[net->n - 2].biases -= 5; + net->layers[net->n - 2].weights -= 5*c; + net->layers[net->n - 2].n = oldn; + printf("%d\n", oldn); + layer l = net->layers[net->n - 2]; + copy_cpu(l.n/3, l.biases, 1, l.biases + l.n/3, 1); + copy_cpu(l.n/3, l.biases, 1, l.biases + 2*l.n/3, 1); + copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + l.n/3*l.c, 1); + copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + 2*l.n/3*l.c, 1); + *net->seen = 0; + save_weights(net, outfile); +} + +void oneoff2(char *cfgfile, char *weightfile, char *outfile, int l) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights_upto(net, weightfile, 0, net->n); + load_weights_upto(net, weightfile, l, net->n); + } + *net->seen = 0; + save_weights_upto(net, outfile, net->n); +} + +void partial(char *cfgfile, char *weightfile, char *outfile, int max) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 1); + save_weights_upto(net, outfile, max); +} + +void print_weights(char *cfgfile, char *weightfile, int n) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 1); + layer l = net->layers[n]; + int i, j; + //printf("["); + for(i = 0; i < l.n; ++i){ + //printf("["); + for(j = 0; j < l.size*l.size*l.c; ++j){ + //if(j > 0) printf(","); + printf("%g ", l.weights[i*l.size*l.size*l.c + j]); + } + printf("\n"); + //printf("]%s\n", (i == l.n-1)?"":","); + } + //printf("]"); +} + +void rescale_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + rescale_weights(l, 2, -.5); + break; + } + } + save_weights(net, outfile); +} + +void rgbgr_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + rgbgr_weights(l); + break; + } + } + save_weights(net, outfile); +} + +void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if (l.type == CONVOLUTIONAL && l.batch_normalize) { + denormalize_convolutional_layer(l); + } + if (l.type == CONNECTED && l.batch_normalize) { + denormalize_connected_layer(l); + } + if (l.type == GRU && l.batch_normalize) { + denormalize_connected_layer(*l.input_z_layer); + denormalize_connected_layer(*l.input_r_layer); + denormalize_connected_layer(*l.input_h_layer); + denormalize_connected_layer(*l.state_z_layer); + denormalize_connected_layer(*l.state_r_layer); + denormalize_connected_layer(*l.state_h_layer); + } + } + save_weights(net, outfile); +} + +layer normalize_layer(layer l, int n) +{ + int j; + l.batch_normalize=1; + l.scales = (float *) calloc(n, sizeof(float)); + for(j = 0; j < n; ++j){ + l.scales[j] = 1; + } + l.rolling_mean = (float *) calloc(n, sizeof(float)); + l.rolling_variance = (float *) calloc(n, sizeof(float)); + return l; +} + +void normalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL && !l.batch_normalize){ + net->layers[i] = normalize_layer(l, l.n); + } + if (l.type == CONNECTED && !l.batch_normalize) { + net->layers[i] = normalize_layer(l, l.outputs); + } + if (l.type == GRU && l.batch_normalize) { + *l.input_z_layer = normalize_layer(*l.input_z_layer, l.input_z_layer->outputs); + *l.input_r_layer = normalize_layer(*l.input_r_layer, l.input_r_layer->outputs); + *l.input_h_layer = normalize_layer(*l.input_h_layer, l.input_h_layer->outputs); + *l.state_z_layer = normalize_layer(*l.state_z_layer, l.state_z_layer->outputs); + *l.state_r_layer = normalize_layer(*l.state_r_layer, l.state_r_layer->outputs); + *l.state_h_layer = normalize_layer(*l.state_h_layer, l.state_h_layer->outputs); + net->layers[i].batch_normalize=1; + } + } + save_weights(net, outfile); +} + +void statistics_net(char *cfgfile, char *weightfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if (l.type == CONNECTED && l.batch_normalize) { + printf("Connected Layer %d\n", i); + statistics_connected_layer(l); + } + if (l.type == GRU && l.batch_normalize) { + printf("GRU Layer %d\n", i); + printf("Input Z\n"); + statistics_connected_layer(*l.input_z_layer); + printf("Input R\n"); + statistics_connected_layer(*l.input_r_layer); + printf("Input H\n"); + statistics_connected_layer(*l.input_h_layer); + printf("State Z\n"); + statistics_connected_layer(*l.state_z_layer); + printf("State R\n"); + statistics_connected_layer(*l.state_r_layer); + printf("State H\n"); + statistics_connected_layer(*l.state_h_layer); + } + printf("\n"); + } +} + +void denormalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if ((l.type == DECONVOLUTIONAL || l.type == CONVOLUTIONAL) && l.batch_normalize) { + denormalize_convolutional_layer(l); + net->layers[i].batch_normalize=0; + } + if (l.type == CONNECTED && l.batch_normalize) { + denormalize_connected_layer(l); + net->layers[i].batch_normalize=0; + } + if (l.type == GRU && l.batch_normalize) { + denormalize_connected_layer(*l.input_z_layer); + denormalize_connected_layer(*l.input_r_layer); + denormalize_connected_layer(*l.input_h_layer); + denormalize_connected_layer(*l.state_z_layer); + denormalize_connected_layer(*l.state_r_layer); + denormalize_connected_layer(*l.state_h_layer); + l.input_z_layer->batch_normalize = 0; + l.input_r_layer->batch_normalize = 0; + l.input_h_layer->batch_normalize = 0; + l.state_z_layer->batch_normalize = 0; + l.state_r_layer->batch_normalize = 0; + l.state_h_layer->batch_normalize = 0; + net->layers[i].batch_normalize=0; + } + } + save_weights(net, outfile); +} + +void mkimg(char *cfgfile, char *weightfile, int h, int w, int num, char *prefix) +{ + network *net = load_network(cfgfile, weightfile, 0); + image *ims = get_weights(net->layers[0]); + int n = net->layers[0].n; + int z; + for(z = 0; z < num; ++z){ + image im = make_image(h, w, 3); + fill_image(im, .5); + int i; + for(i = 0; i < 100; ++i){ + image r = copy_image(ims[rand()%n]); + rotate_image_cw(r, rand()%4); + random_distort_image(r, 1, 1.5, 1.5); + int dx = rand()%(w-r.w); + int dy = rand()%(h-r.h); + ghost_image(r, im, dx, dy); + free_image(r); + } + char buff[256]; + sprintf(buff, "%s/gen_%d", prefix, z); + save_image(im, buff); + free_image(im); + } +} + +void visualize(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + visualize_network(net); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsc() +{ + unsigned long a, d; + + __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); + + return (a | (d << 32)); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsp() { + struct timespec tms; + if (clock_gettime(CLOCK_REALTIME, &tms)) { + return -1; + } + unsigned long ns = tms.tv_sec * 1000000000; + ns += tms.tv_nsec; + return ns; +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + //test_resize("data/bad.jpg"); + //test_box(); + //test_convolutional_layer(); + if(argc < 2){ + fprintf(stderr, "usage: %s \n", argv[0]); + return 0; + } + gpu_index = find_int_arg(argc, argv, "-i", GPU_DEVICE); + if(find_arg(argc, argv, "-nogpu")) { + gpu_index = -1; + } + +#ifndef GPU + gpu_index = -1; +#else + if(gpu_index >= 0){ + cuda_set_device(gpu_index); + } + initTrace(); +#endif + + if (0 == strcmp(argv[1], "average")){ + average(argc, argv); + } else if (0 == strcmp(argv[1], "yolo")){ + run_yolo(argc, argv); + } else if (0 == strcmp(argv[1], "super")){ + run_super(argc, argv); + } else if (0 == strcmp(argv[1], "lsd")){ + run_lsd(argc, argv); + } else if (0 == strcmp(argv[1], "detector")){ + run_detector(argc, argv); + } else if (0 == strcmp(argv[1], "detect")){ + float thresh = find_float_arg(argc, argv, "-thresh", .5); + char *filename = (argc > 4) ? argv[4]: 0; + char *outfile = find_char_arg(argc, argv, "-out", 0); + int fullscreen = find_arg(argc, argv, "-fullscreen"); + char *value = getenv("UVMAsyncBench_BASE"); + char buff[256]; + sprintf(buff, "%s/workloads/realworld/standard/darknet/cfg/coco.data", value); + test_detector(buff, argv[2], argv[3], filename, thresh, .5, outfile, fullscreen); + } else if (0 == strcmp(argv[1], "cifar")){ + run_cifar(argc, argv); + } else if (0 == strcmp(argv[1], "go")){ + run_go(argc, argv); + } else if (0 == strcmp(argv[1], "rnn")){ + run_char_rnn(argc, argv); + } else if (0 == strcmp(argv[1], "coco")){ + run_coco(argc, argv); + } else if (0 == strcmp(argv[1], "classify")){ + predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5); + } else if (0 == strcmp(argv[1], "classifier")){ + run_classifier(argc, argv); + } else if (0 == strcmp(argv[1], "regressor")){ + run_regressor(argc, argv); + } else if (0 == strcmp(argv[1], "isegmenter")){ + run_isegmenter(argc, argv); + } else if (0 == strcmp(argv[1], "segmenter")){ + run_segmenter(argc, argv); + } else if (0 == strcmp(argv[1], "art")){ + run_art(argc, argv); + } else if (0 == strcmp(argv[1], "tag")){ + run_tag(argc, argv); + } else if (0 == strcmp(argv[1], "3d")){ + composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0); + } else if (0 == strcmp(argv[1], "test")){ + test_resize(argv[2]); + } else if (0 == strcmp(argv[1], "nightmare")){ + run_nightmare(argc, argv); + } else if (0 == strcmp(argv[1], "rgbgr")){ + rgbgr_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "reset")){ + reset_normalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "denormalize")){ + denormalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "statistics")){ + statistics_net(argv[2], argv[3]); + } else if (0 == strcmp(argv[1], "normalize")){ + normalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "rescale")){ + rescale_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "ops")){ + operations(argv[2]); + } else if (0 == strcmp(argv[1], "speed")){ + speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0); + } else if (0 == strcmp(argv[1], "oneoff")){ + oneoff(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "oneoff2")){ + oneoff2(argv[2], argv[3], argv[4], atoi(argv[5])); + } else if (0 == strcmp(argv[1], "print")){ + print_weights(argv[2], argv[3], atoi(argv[4])); + } else if (0 == strcmp(argv[1], "partial")){ + partial(argv[2], argv[3], argv[4], atoi(argv[5])); + } else if (0 == strcmp(argv[1], "average")){ + average(argc, argv); + } else if (0 == strcmp(argv[1], "visualize")){ + visualize(argv[2], (argc > 3) ? argv[3] : 0); + } else if (0 == strcmp(argv[1], "mkimg")){ + mkimg(argv[2], argv[3], atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), argv[7]); + } else if (0 == strcmp(argv[1], "imtest")){ + test_resize(argv[2]); + } else { + fprintf(stderr, "Not an option: %s\n", argv[1]); + } + finiTrace(); + return 0; +} + diff --git a/workloads/realworld/async/darknet/examples/detector-scipy-opencv.py b/workloads/realworld/async/darknet/examples/detector-scipy-opencv.py new file mode 100644 index 0000000000000000000000000000000000000000..3bfc591312ad89ff2b026ffac0daecd461c80447 --- /dev/null +++ b/workloads/realworld/async/darknet/examples/detector-scipy-opencv.py @@ -0,0 +1,56 @@ +# Stupid python path shit. +# Instead just add darknet.py to somewhere in your python path +# OK actually that might not be a great idea, idk, work in progress +# Use at your own risk. or don't, i don't care + +from scipy.misc import imread +import cv2 + +def array_to_image(arr): + arr = arr.transpose(2,0,1) + c = arr.shape[0] + h = arr.shape[1] + w = arr.shape[2] + arr = (arr/255.0).flatten() + data = dn.c_array(dn.c_float, arr) + im = dn.IMAGE(w,h,c,data) + return im + +def detect2(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): + boxes = dn.make_boxes(net) + probs = dn.make_probs(net) + num = dn.num_boxes(net) + dn.network_detect(net, image, thresh, hier_thresh, nms, boxes, probs) + res = [] + for j in range(num): + for i in range(meta.classes): + if probs[j][i] > 0: + res.append((meta.names[i], probs[j][i], (boxes[j].x, boxes[j].y, boxes[j].w, boxes[j].h))) + res = sorted(res, key=lambda x: -x[1]) + dn.free_ptrs(dn.cast(probs, dn.POINTER(dn.c_void_p)), num) + return res + +import sys, os +sys.path.append(os.path.join(os.getcwd(),'python/')) + +import darknet as dn + +# Darknet +net = dn.load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0) +meta = dn.load_meta("cfg/coco.data") +r = dn.detect(net, meta, "data/dog.jpg") +print r + +# scipy +arr= imread('data/dog.jpg') +im = array_to_image(arr) +r = detect2(net, meta, im) +print r + +# OpenCV +arr = cv2.imread('data/dog.jpg') +im = array_to_image(arr) +dn.rgbgr_image(im) +r = detect2(net, meta, im) +print r + diff --git a/workloads/realworld/async/darknet/examples/detector.c b/workloads/realworld/async/darknet/examples/detector.c new file mode 100644 index 0000000000000000000000000000000000000000..6ff1fcdff3d3d81abb458e001091cf2757b8d837 --- /dev/null +++ b/workloads/realworld/async/darknet/examples/detector.c @@ -0,0 +1,931 @@ +#include "darknet.h" + +static int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; + + +void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + list *options = read_data_cfg(datacfg); + // Ruihao + char *train_images_cfg = option_find_str(options, "train", "data/train.list"); + char *backup_directory_cfg = option_find_str(options, "backup", "/backup/"); + + char *env = getenv("UVMAsyncBench_BASE"); + char train_images[256]; + char backup_directory[256]; + sprintf(train_images, "%s/%s", env, train_images_cfg); + sprintf(backup_directory, "%s/%s", env, backup_directory_cfg); + // Ruihao + + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network **nets = calloc(ngpus, sizeof(network)); + + srand(time(0)); + int seed = rand(); + int i; + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + data train, buffer; + + layer l = net->layers[net->n - 1]; + + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = get_base_args(net); + args.coords = l.coords; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = l.max_boxes; + args.d = &buffer; + args.type = DETECTION_DATA; + //args.type = INSTANCE_DATA; + args.threads = 64; + + pthread_t load_thread = load_data(args); + double time; + int count = 0; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + if(l.random && count++%10 == 0){ + printf("Resizing\n"); + int dim = (rand() % 10 + 10) * 32; + if (get_current_batch(net)+200 > net->max_batches) dim = 608; + //int dim = (rand() % 4 + 16) * 32; + printf("%d\n", dim); + args.w = dim; + args.h = dim; + + pthread_join(load_thread, 0); + train = buffer; + free_data(train); + load_thread = load_data(args); + + #pragma omp parallel for + for(i = 0; i < ngpus; ++i){ + resize_network(nets[i], dim, dim); + } + net = nets[0]; + } + time=what_time_is_it_now(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + /* + int k; + for(k = 0; k < l.max_boxes; ++k){ + box b = float_to_box(train.y.vals[10] + 1 + k*5); + if(!b.x) break; + printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h); + } + */ + /* + int zz; + for(zz = 0; zz < train.X.cols; ++zz){ + image im = float_to_image(net->w, net->h, 3, train.X.vals[zz]); + int k; + for(k = 0; k < l.max_boxes; ++k){ + box b = float_to_box(train.y.vals[zz] + k*5, 1); + printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + draw_bbox(im, b, 1, 1,0,0); + } + show_image(im, "truth11"); + cvWaitKey(0); + save_image(im, "truth11"); + } + */ + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + + time=what_time_is_it_now(); + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + i = get_current_batch(net); + printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, i*imgs); + if(i%100==0){ +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + if(i%10000==0 || (i < 1000 && i%100 == 0)){ +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + + +static int get_coco_image_id(char *filename) +{ + char *p = strrchr(filename, '/'); + char *c = strrchr(filename, '_'); + if(c) p = c; + return atoi(p+1); +} + +static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_boxes, int classes, int w, int h) +{ + int i, j; + int image_id = get_coco_image_id(image_path); + for(i = 0; i < num_boxes; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + float bx = xmin; + float by = ymin; + float bw = xmax - xmin; + float bh = ymax - ymin; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); + } + } +} + +void print_detector_detections(FILE **fps, char *id, detection *dets, int total, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2. + 1; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2. + 1; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2. + 1; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2. + 1; + + if (xmin < 1) xmin = 1; + if (ymin < 1) ymin = 1; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], + xmin, ymin, xmax, ymax); + } + } +} + +void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + int class = j; + if (dets[i].prob[class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j+1, dets[i].prob[class], + xmin, ymin, xmax, ymax); + } + } +} + +void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char *outfile) +{ + int j; + list *options = read_data_cfg(datacfg); + char *valid_images = option_find_str(options, "valid", "data/train.list"); + char *name_list = option_find_str(options, "names", "data/names.list"); + char *prefix = option_find_str(options, "results", "results"); + char **names = get_labels(name_list); + char *mapf = option_find_str(options, "map", 0); + int *map = 0; + if (mapf) map = read_map(mapf); + + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 2); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths(valid_images); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + char *type = option_find_str(options, "eval", "voc"); + FILE *fp = 0; + FILE **fps = 0; + int coco = 0; + int imagenet = 0; + if(0==strcmp(type, "coco")){ + if(!outfile) outfile = "coco_results"; + snprintf(buff, 1024, "%s/%s.json", prefix, outfile); + fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + coco = 1; + } else if(0==strcmp(type, "imagenet")){ + if(!outfile) outfile = "imagenet-detection"; + snprintf(buff, 1024, "%s/%s.txt", prefix, outfile); + fp = fopen(buff, "w"); + imagenet = 1; + classes = 200; + } else { + if(!outfile) outfile = "comp4_det_test_"; + fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); + fps[j] = fopen(buff, "w"); + } + } + + int m = plist->size; + int i=0; + int t; + + float thresh = .005; + float nms = .45; + + int nthreads = 4; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + image input = make_image(net->w, net->h, net->c*2); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + //args.type = IMAGE_DATA; + args.type = LETTERBOX_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + double start = what_time_is_it_now(); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data, 1); + flip_image(val_resized[t]); + copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data + net->w*net->h*net->c, 1); + + network_predict(net, input.data); + int w = val[t].w; + int h = val[t].h; + int num = 0; + detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &num); + if (nms) do_nms_sort(dets, num, classes, nms); + if (coco){ + print_cocos(fp, path, dets, num, classes, w, h); + } else if (imagenet){ + print_imagenet_detections(fp, i+t-nthreads+1, dets, num, classes, w, h); + } else { + print_detector_detections(fps, id, dets, num, classes, w, h); + } + free_detections(dets, num); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + for(j = 0; j < classes; ++j){ + if(fps) fclose(fps[j]); + } + if(coco){ + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start); +} + + +void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile) +{ + int j; + list *options = read_data_cfg(datacfg); + char *valid_images = option_find_str(options, "valid", "data/train.list"); + char *name_list = option_find_str(options, "names", "data/names.list"); + char *prefix = option_find_str(options, "results", "results"); + char **names = get_labels(name_list); + char *mapf = option_find_str(options, "map", 0); + int *map = 0; + if (mapf) map = read_map(mapf); + + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths(valid_images); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + char *type = option_find_str(options, "eval", "voc"); + FILE *fp = 0; + FILE **fps = 0; + int coco = 0; + int imagenet = 0; + if(0==strcmp(type, "coco")){ + if(!outfile) outfile = "coco_results"; + snprintf(buff, 1024, "%s/%s.json", prefix, outfile); + fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + coco = 1; + } else if(0==strcmp(type, "imagenet")){ + if(!outfile) outfile = "imagenet-detection"; + snprintf(buff, 1024, "%s/%s.txt", prefix, outfile); + fp = fopen(buff, "w"); + imagenet = 1; + classes = 200; + } else { + if(!outfile) outfile = "comp4_det_test_"; + fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); + fps[j] = fopen(buff, "w"); + } + } + + + int m = plist->size; + int i=0; + int t; + + float thresh = .005; + float nms = .45; + + int nthreads = 4; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + //args.type = IMAGE_DATA; + args.type = LETTERBOX_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + double start = what_time_is_it_now(); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &nboxes); + if (nms) do_nms_sort(dets, nboxes, classes, nms); + if (coco){ + print_cocos(fp, path, dets, nboxes, classes, w, h); + } else if (imagenet){ + print_imagenet_detections(fp, i+t-nthreads+1, dets, nboxes, classes, w, h); + } else { + print_detector_detections(fps, id, dets, nboxes, classes, w, h); + } + free_detections(dets, nboxes); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + for(j = 0; j < classes; ++j){ + if(fps) fclose(fps[j]); + } + if(coco){ + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start); +} + +void validate_detector_recall(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths("data/coco_val_5k.list"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + + int j, k; + + int m = plist->size; + int i=0; + + float thresh = .001; + float iou_thresh = .5; + float nms = .4; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + int nboxes = 0; + detection *dets = get_network_boxes(net, sized.w, sized.h, thresh, .5, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, nboxes, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < nboxes; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < l.w*l.h*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free(id); + free_image(orig); + free_image(sized); + } +} + + +void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen) +{ + list *options = read_data_cfg(datacfg); + // Ruihao + char *name_list_cfg = option_find_str(options, "names", "data/names.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char name_list[256]; + sprintf(name_list, "%s/%s", env, name_list_cfg); + // Ruihao + char **names = get_labels(name_list); + + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + double time; + char buff[256]; + char *input = buff; + float nms=.45; + while(1){ + printf("fine name is %s\n", filename); + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = letterbox_image(im, net->w, net->h); + //image sized = resize_image(im, net->w, net->h); + //image sized2 = resize_max(im, net->w); + //image sized = crop_image(sized2, -((net->w - sized2.w)/2), -((net->h - sized2.h)/2), net->w, net->h); + //resize_network(net, sized.w, sized.h); + layer l = net->layers[net->n-1]; + + + float *X = sized.data; + time=what_time_is_it_now(); + startCPU(); + network_predict(net, X); + endCPU(); + printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time); + int nboxes = 0; + detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes); + //printf("%d\n", nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes); + free_detections(dets, nboxes); + if(outfile){ + save_image(im, outfile); + } + else{ + save_image(im, "predictions"); +#ifdef OPENCV + make_window("predictions", 512, 512, 0); + show_image(im, "predictions", 0); +#endif + } + + free_image(im); + free_image(sized); + if (filename) break; + } +} + +/* +void censor_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + CvCapture * cap; + + int w = 1280; + int h = 720; + + if(filename){ + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + } + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + cvNamedWindow(base, CV_WINDOW_NORMAL); + cvResizeWindow(base, 512, 512); + float fps = 0; + int i; + float nms = .45; + + while(1){ + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + layer l = net->layers[net->n-1]; + + float *X = in_s.data; + network_predict(net, X); + int nboxes = 0; + detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 0, &nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + + for(i = 0; i < nboxes; ++i){ + if(dets[i].prob[class] > thresh){ + box b = dets[i].bbox; + int left = b.x-b.w/2.; + int top = b.y-b.h/2.; + censor_image(in, left, top, b.w, b.h); + } + } + show_image(in, base); + cvWaitKey(10); + free_detections(dets, nboxes); + + + free_image(in_s); + free_image(in); + + + float curr = 0; + fps = .9*fps + .1*curr; + for(i = 0; i < skip; ++i){ + image in = get_image_from_stream(cap); + free_image(in); + } + } + #endif +} + +void extract_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + CvCapture * cap; + + int w = 1280; + int h = 720; + + if(filename){ + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + } + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + cvNamedWindow(base, CV_WINDOW_NORMAL); + cvResizeWindow(base, 512, 512); + float fps = 0; + int i; + int count = 0; + float nms = .45; + + while(1){ + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + layer l = net->layers[net->n-1]; + + show_image(in, base); + + int nboxes = 0; + float *X = in_s.data; + network_predict(net, X); + detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 1, &nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + + for(i = 0; i < nboxes; ++i){ + if(dets[i].prob[class] > thresh){ + box b = dets[i].bbox; + int size = b.w*in.w > b.h*in.h ? b.w*in.w : b.h*in.h; + int dx = b.x*in.w-size/2.; + int dy = b.y*in.h-size/2.; + image bim = crop_image(in, dx, dy, size, size); + char buff[2048]; + sprintf(buff, "results/extract/%07d", count); + ++count; + save_image(bim, buff); + free_image(bim); + } + } + free_detections(dets, nboxes); + + + free_image(in_s); + free_image(in); + + + float curr = 0; + fps = .9*fps + .1*curr; + for(i = 0; i < skip; ++i){ + image in = get_image_from_stream(cap); + free_image(in); + } + } + #endif +} +*/ + +/* +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets) +{ + network_predict_image(net, im); + layer l = net->layers[net->n-1]; + int nboxes = num_boxes(net); + fill_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 0, dets); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); +} +*/ + +void infer_detector(char *datacfg, char *cfgfile, char *weightfile) +{ + int curr = 0; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *test_list_cfg = option_find_str(options, "valid", "data/valid.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char test_list[256]; + sprintf(test_list, "%s/%s", env, test_list_cfg); + // Ruihao + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + m = net->max_batches * net->batch; + free_list(plist); + + clock_t time; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = classes; + args.n = net->batch; + args.m = 0; + args.labels = 0; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(curr = net->batch; curr <= m; curr += net->batch){ + // while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + if(curr < m){ + args.paths = paths + curr; + if (curr + net->batch > m) args.n = m - curr; + load_thread = load_data_in_thread(args); + } + fprintf(stderr, "Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + + fprintf(stderr, "%lf seconds, %d images, %d total\n", sec(clock()-time), val.X.rows, curr); + free_matrix(pred); + free_data(val); + } +} + +void run_detector(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .5); + float hier_thresh = find_float_arg(argc, argv, "-hier", .5); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + int avg = find_int_arg(argc, argv, "-avg", 3); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + char *outfile = find_char_arg(argc, argv, "-out", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int clear = find_arg(argc, argv, "-clear"); + int fullscreen = find_arg(argc, argv, "-fullscreen"); + int width = find_int_arg(argc, argv, "-w", 0); + int height = find_int_arg(argc, argv, "-h", 0); + int fps = find_int_arg(argc, argv, "-fps", 0); + //int class = find_int_arg(argc, argv, "-class", 0); + + char *datacfg = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen); + else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile); + // Ruihao + else if(0==strcmp(argv[2], "infer")) infer_detector(datacfg, cfg, weights); + // Ruihao + else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile); + else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) { + list *options = read_data_cfg(datacfg); + int classes = option_find_int(options, "classes", 20); + char *name_list = option_find_str(options, "names", "data/names.list"); + char **names = get_labels(name_list); + demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen); + } + //else if(0==strcmp(argv[2], "extract")) extract_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); + //else if(0==strcmp(argv[2], "censor")) censor_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); +} diff --git a/workloads/realworld/async/darknet/examples/detector.py b/workloads/realworld/async/darknet/examples/detector.py new file mode 100644 index 0000000000000000000000000000000000000000..40bb365e68211c513db9d63847ac95070f5eab98 --- /dev/null +++ b/workloads/realworld/async/darknet/examples/detector.py @@ -0,0 +1,27 @@ +# Stupid python path shit. +# Instead just add darknet.py to somewhere in your python path +# OK actually that might not be a great idea, idk, work in progress +# Use at your own risk. or don't, i don't care + +import sys, os +sys.path.append(os.path.join(os.getcwd(),'python/')) + +import darknet as dn +import pdb + +dn.set_gpu(0) +net = dn.load_net("cfg/yolo-thor.cfg", "/home/pjreddie/backup/yolo-thor_final.weights", 0) +meta = dn.load_meta("cfg/thor.data") +r = dn.detect(net, meta, "data/bedroom.jpg") +print r + +# And then down here you could detect a lot more images like: +r = dn.detect(net, meta, "data/eagle.jpg") +print r +r = dn.detect(net, meta, "data/giraffe.jpg") +print r +r = dn.detect(net, meta, "data/horses.jpg") +print r +r = dn.detect(net, meta, "data/person.jpg") +print r + diff --git a/workloads/realworld/async/darknet/examples/dice.c b/workloads/realworld/async/darknet/examples/dice.c new file mode 100644 index 0000000000000000000000000000000000000000..f56d76c0bb66c7f630ba1c4d1dc9195398b87cfb --- /dev/null +++ b/workloads/realworld/async/darknet/examples/dice.c @@ -0,0 +1,116 @@ +#include "darknet.h" + +char *dice_labels[] = {"face1","face2","face3","face4","face5","face6"}; + +void train_dice(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + int i = *net.seen/imgs; + char **labels = dice_labels; + list *plist = get_paths("data/dice/dice.train.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + while(1){ + ++i; + time=clock(); + data train = load_data_old(paths, imgs, plist->size, labels, 6, net.w, net.h); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); + free_data(train); + if((i % 100) == 0) net.learning_rate *= .1; + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, i); + save_weights(net, buff); + } + } +} + +void validate_dice(char *filename, char *weightfile) +{ + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + + char **labels = dice_labels; + list *plist = get_paths("data/dice/dice.val.list"); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + data val = load_data_old(paths, m, 0, labels, 6, net.w, net.h); + float *acc = network_accuracies(net, val, 2); + printf("Validation Accuracy: %f, %d images\n", acc[0], m); + free_data(val); +} + +void test_dice(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + int i = 0; + char **names = dice_labels; + char buff[256]; + char *input = buff; + int indexes[6]; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, net.w, net.h); + float *X = im.data; + float *predictions = network_predict(net, X); + top_predictions(net, 6, indexes); + for(i = 0; i < 6; ++i){ + int index = indexes[i]; + printf("%s: %f\n", names[index], predictions[index]); + } + free_image(im); + if (filename) break; + } +} + +void run_dice(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "test")) test_dice(cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_dice(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_dice(cfg, weights); +} + diff --git a/workloads/realworld/async/darknet/examples/go.c b/workloads/realworld/async/darknet/examples/go.c new file mode 100644 index 0000000000000000000000000000000000000000..688579dcb3a3e35e9a79b8fb8aa684f28f44290d --- /dev/null +++ b/workloads/realworld/async/darknet/examples/go.c @@ -0,0 +1,1370 @@ +#include "darknet.h" + +#include +#include +#include + +int inverted = 1; +int noi = 1; +static const int nind = 10; +int legal_go(float *b, float *ko, int p, int r, int c); +int check_ko(float *x, float *ko); + +typedef struct { + char **data; + int n; +} moves; + +char *fgetgo(FILE *fp) +{ + if(feof(fp)) return 0; + size_t size = 96; + char *line = malloc(size*sizeof(char)); + if(size != fread(line, sizeof(char), size, fp)){ + free(line); + return 0; + } + + return line; +} + +moves load_go_moves(char *filename) +{ + moves m; + m.n = 128; + m.data = calloc(128, sizeof(char*)); + FILE *fp = fopen(filename, "rb"); + int count = 0; + char *line = 0; + while ((line = fgetgo(fp))) { + if (count >= m.n) { + m.n *= 2; + m.data = realloc(m.data, m.n*sizeof(char*)); + } + m.data[count] = line; + ++count; + } + printf("%d\n", count); + m.n = count; + m.data = realloc(m.data, count*sizeof(char*)); + return m; +} + +void string_to_board(char *s, float *board) +{ + int i, j; + memset(board, 0, 2*19*19*sizeof(float)); + int count = 0; + for(i = 0; i < 91; ++i){ + char c = s[i]; + for(j = 0; j < 4; ++j){ + int me = (c >> (2*j)) & 1; + int you = (c >> (2*j + 1)) & 1; + if (me) board[count] = 1; + else if (you) board[count + 19*19] = 1; + ++count; + if(count >= 19*19) break; + } + } +} + +void board_to_string(char *s, float *board) +{ + int i, j; + memset(s, 0, (19*19/4+1)*sizeof(char)); + int count = 0; + for(i = 0; i < 91; ++i){ + for(j = 0; j < 4; ++j){ + int me = (board[count] == 1); + int you = (board[count + 19*19] == 1); + if (me) s[i] = s[i] | (1<<(2*j)); + if (you) s[i] = s[i] | (1<<(2*j + 1)); + ++count; + if(count >= 19*19) break; + } + } +} + +static int occupied(float *b, int i) +{ + if (b[i]) return 1; + if (b[i+19*19]) return -1; + return 0; +} + +data random_go_moves(moves m, int n) +{ + data d = {0}; + d.X = make_matrix(n, 19*19*3); + d.y = make_matrix(n, 19*19+2); + int i, j; + for(i = 0; i < n; ++i){ + float *board = d.X.vals[i]; + float *label = d.y.vals[i]; + char *b = m.data[rand()%m.n]; + int player = b[0] - '0'; + int result = b[1] - '0'; + int row = b[2]; + int col = b[3]; + string_to_board(b+4, board); + if(player > 0) for(j = 0; j < 19*19; ++j) board[19*19*2 + j] = 1; + label[19*19+1] = (player==result); + if(row >= 19 || col >= 19){ + label[19*19] = 1; + } else { + label[col + 19*row] = 1; + if(occupied(board, col + 19*row)) printf("hey\n"); + } + + int flip = rand()%2; + int rotate = rand()%4; + image in = float_to_image(19, 19, 3, board); + image out = float_to_image(19, 19, 1, label); + if(flip){ + flip_image(in); + flip_image(out); + } + rotate_image_cw(in, rotate); + rotate_image_cw(out, rotate); + } + return d; +} + + +void train_go(char *cfgfile, char *weightfile, char *filename, int *gpus, int ngpus, int clear) +{ + int i; + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + network *net = nets[0]; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + + char buff[256]; + moves m = load_go_moves(filename); + //moves m = load_go_moves("games.txt"); + + int N = m.n; + printf("Moves: %d\n", N); + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time=what_time_is_it_now(); + + data train = random_go_moves(m, net->batch*net->subdivisions*ngpus); + printf("Loaded: %lf seconds\n", what_time_is_it_now() - time); + time=what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 10); + } +#else + loss = train_network(net, train); +#endif + free_data(train); + + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory,base, epoch); + save_weights(net, buff); + + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + if(get_current_batch(net)%10000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_%ld.backup",backup_directory,base,get_current_batch(net)); + save_weights(net, buff); + } + } + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free(base); +} + +static void propagate_liberty(float *board, int *lib, int *visited, int row, int col, int side) +{ + if (row < 0 || row > 18 || col < 0 || col > 18) return; + int index = row*19 + col; + if (occupied(board,index) != side) return; + if (visited[index]) return; + visited[index] = 1; + lib[index] += 1; + propagate_liberty(board, lib, visited, row+1, col, side); + propagate_liberty(board, lib, visited, row-1, col, side); + propagate_liberty(board, lib, visited, row, col+1, side); + propagate_liberty(board, lib, visited, row, col-1, side); +} + + +static int *calculate_liberties(float *board) +{ + int *lib = calloc(19*19, sizeof(int)); + int visited[19*19]; + int i, j; + for(j = 0; j < 19; ++j){ + for(i = 0; i < 19; ++i){ + memset(visited, 0, 19*19*sizeof(int)); + int index = j*19 + i; + if(!occupied(board,index)){ + if ((i > 0) && occupied(board,index - 1)) propagate_liberty(board, lib, visited, j, i-1, occupied(board,index-1)); + if ((i < 18) && occupied(board,index + 1)) propagate_liberty(board, lib, visited, j, i+1, occupied(board,index+1)); + if ((j > 0) && occupied(board,index - 19)) propagate_liberty(board, lib, visited, j-1, i, occupied(board,index-19)); + if ((j < 18) && occupied(board,index + 19)) propagate_liberty(board, lib, visited, j+1, i, occupied(board,index+19)); + } + } + } + return lib; +} + +void print_board(FILE *stream, float *board, int player, int *indexes) +{ + int i,j,n; + fprintf(stream, " "); + for(i = 0; i < 19; ++i){ + fprintf(stream, "%c ", 'A' + i + 1*(i > 7 && noi)); + } + fprintf(stream, "\n"); + for(j = 0; j < 19; ++j){ + fprintf(stream, "%2d", (inverted) ? 19-j : j+1); + for(i = 0; i < 19; ++i){ + int index = j*19 + i; + if(indexes){ + int found = 0; + for(n = 0; n < nind; ++n){ + if(index == indexes[n]){ + found = 1; + /* + if(n == 0) fprintf(stream, "\uff11"); + else if(n == 1) fprintf(stream, "\uff12"); + else if(n == 2) fprintf(stream, "\uff13"); + else if(n == 3) fprintf(stream, "\uff14"); + else if(n == 4) fprintf(stream, "\uff15"); + */ + fprintf(stream, " %d", n+1); + } + } + if(found) continue; + } + //if(board[index]*-swap > 0) fprintf(stream, "\u25C9 "); + //else if(board[index]*-swap < 0) fprintf(stream, "\u25EF "); + if (occupied(board, index) == player) fprintf(stream, " X"); + else if (occupied(board, index) ==-player) fprintf(stream, " O"); + else fprintf(stream, " ."); + } + fprintf(stream, "\n"); + } +} + +void flip_board(float *board) +{ + int i; + for(i = 0; i < 19*19; ++i){ + float swap = board[i]; + board[i] = board[i+19*19]; + board[i+19*19] = swap; + board[i+19*19*2] = 1-board[i+19*19*2]; + } +} + +float predict_move2(network *net, float *board, float *move, int multi) +{ + float *output = network_predict(net, board); + copy_cpu(19*19+1, output, 1, move, 1); + float result = output[19*19 + 1]; + int i; + if(multi){ + image bim = float_to_image(19, 19, 3, board); + for(i = 1; i < 8; ++i){ + rotate_image_cw(bim, i); + if(i >= 4) flip_image(bim); + + float *output = network_predict(net, board); + image oim = float_to_image(19, 19, 1, output); + result += output[19*19 + 1]; + + if(i >= 4) flip_image(oim); + rotate_image_cw(oim, -i); + + axpy_cpu(19*19+1, 1, output, 1, move, 1); + + if(i >= 4) flip_image(bim); + rotate_image_cw(bim, -i); + } + result = result/8; + scal_cpu(19*19+1, 1./8., move, 1); + } + for(i = 0; i < 19*19; ++i){ + if(board[i] || board[i+19*19]) move[i] = 0; + } + return result; +} + +static void remove_connected(float *b, int *lib, int p, int r, int c) +{ + if (r < 0 || r >= 19 || c < 0 || c >= 19) return; + if (occupied(b, r*19 + c) != p) return; + if (lib[r*19 + c] != 1) return; + b[r*19 + c] = 0; + b[19*19 + r*19 + c] = 0; + remove_connected(b, lib, p, r+1, c); + remove_connected(b, lib, p, r-1, c); + remove_connected(b, lib, p, r, c+1); + remove_connected(b, lib, p, r, c-1); +} + + +void move_go(float *b, int p, int r, int c) +{ + int *l = calculate_liberties(b); + if(p > 0) b[r*19 + c] = 1; + else b[19*19 + r*19 + c] = 1; + remove_connected(b, l, -p, r+1, c); + remove_connected(b, l, -p, r-1, c); + remove_connected(b, l, -p, r, c+1); + remove_connected(b, l, -p, r, c-1); + free(l); +} + +int compare_board(float *a, float *b) +{ + if(memcmp(a, b, 19*19*3*sizeof(float)) == 0) return 1; + return 0; +} + +typedef struct mcts_tree{ + float *board; + struct mcts_tree **children; + float *prior; + int *visit_count; + float *value; + float *mean; + float *prob; + int total_count; + float result; + int done; + int pass; +} mcts_tree; + +void free_mcts(mcts_tree *root) +{ + if(!root) return; + int i; + free(root->board); + for(i = 0; i < 19*19+1; ++i){ + if(root->children[i]) free_mcts(root->children[i]); + } + free(root->children); + free(root->prior); + free(root->visit_count); + free(root->value); + free(root->mean); + free(root->prob); + free(root); +} + +float *network_predict_rotations(network *net, float *next) +{ + int n = net->batch; + float *in = calloc(19*19*3*n, sizeof(float)); + image im = float_to_image(19, 19, 3, next); + int i,j; + int *inds = random_index_order(0, 8); + for(j = 0; j < n; ++j){ + i = inds[j]; + rotate_image_cw(im, i); + if(i >= 4) flip_image(im); + memcpy(in + 19*19*3*j, im.data, 19*19*3*sizeof(float)); + if(i >= 4) flip_image(im); + rotate_image_cw(im, -i); + } + float *pred = network_predict(net, in); + for(j = 0; j < n; ++j){ + i = inds[j]; + image im = float_to_image(19, 19, 1, pred + j*(19*19 + 2)); + if(i >= 4) flip_image(im); + rotate_image_cw(im, -i); + if(j > 0){ + axpy_cpu(19*19+2, 1, im.data, 1, pred, 1); + } + } + free(in); + free(inds); + scal_cpu(19*19+2, 1./n, pred, 1); + return pred; +} + +mcts_tree *expand(float *next, float *ko, network *net) +{ + mcts_tree *root = calloc(1, sizeof(mcts_tree)); + root->board = next; + root->children = calloc(19*19+1, sizeof(mcts_tree*)); + root->prior = calloc(19*19 + 1, sizeof(float)); + root->prob = calloc(19*19 + 1, sizeof(float)); + root->mean = calloc(19*19 + 1, sizeof(float)); + root->value = calloc(19*19 + 1, sizeof(float)); + root->visit_count = calloc(19*19 + 1, sizeof(int)); + root->total_count = 1; + int i; + float *pred = network_predict_rotations(net, next); + copy_cpu(19*19+1, pred, 1, root->prior, 1); + float val = 2*pred[19*19 + 1] - 1; + root->result = val; + for(i = 0; i < 19*19+1; ++i) { + root->visit_count[i] = 0; + root->value[i] = 0; + root->mean[i] = val; + if(i < 19*19 && occupied(next, i)){ + root->value[i] = -1; + root->mean[i] = -1; + root->prior[i] = 0; + } + } + //print_board(stderr, next, flip?-1:1, 0); + return root; +} + +float *copy_board(float *board) +{ + float *next = calloc(19*19*3, sizeof(float)); + copy_cpu(19*19*3, board, 1, next, 1); + return next; +} + +float select_mcts(mcts_tree *root, network *net, float *prev, float cpuct) +{ + if(root->done) return -root->result; + int i; + float max = -1000; + int max_i = 0; + for(i = 0; i < 19*19+1; ++i){ + root->prob[i] = root->mean[i] + cpuct*root->prior[i] * sqrt(root->total_count) / (1. + root->visit_count[i]); + if(root->prob[i] > max){ + max = root->prob[i]; + max_i = i; + } + } + float val; + i = max_i; + root->visit_count[i]++; + root->total_count++; + if (root->children[i]) { + val = select_mcts(root->children[i], net, root->board, cpuct); + } else { + if(max_i < 19*19 && !legal_go(root->board, prev, 1, max_i/19, max_i%19)) { + root->mean[i] = -1; + root->value[i] = -1; + root->prior[i] = 0; + --root->total_count; + return select_mcts(root, net, prev, cpuct); + //printf("Detected ko\n"); + //getchar(); + } else { + float *next = copy_board(root->board); + if (max_i < 19*19) { + move_go(next, 1, max_i / 19, max_i % 19); + } + flip_board(next); + root->children[i] = expand(next, root->board, net); + val = -root->children[i]->result; + if(max_i == 19*19){ + root->children[i]->pass = 1; + if (root->pass){ + root->children[i]->done = 1; + } + } + } + } + root->value[i] += val; + root->mean[i] = root->value[i]/root->visit_count[i]; + return -val; +} + +mcts_tree *run_mcts(mcts_tree *tree, network *net, float *board, float *ko, int player, int n, float cpuct, float secs) +{ + int i; + double t = what_time_is_it_now(); + if(player < 0) flip_board(board); + if(!tree) tree = expand(copy_board(board), ko, net); + assert(compare_board(tree->board, board)); + for(i = 0; i < n; ++i){ + if (secs > 0 && (what_time_is_it_now() - t) > secs) break; + int max_i = max_int_index(tree->visit_count, 19*19+1); + if (tree->visit_count[max_i] >= n) break; + select_mcts(tree, net, ko, cpuct); + } + if(player < 0) flip_board(board); + //fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + return tree; +} + +mcts_tree *move_mcts(mcts_tree *tree, int index) +{ + if(index < 0 || index > 19*19 || !tree || !tree->children[index]) { + free_mcts(tree); + tree = 0; + } else { + mcts_tree *swap = tree; + tree = tree->children[index]; + swap->children[index] = 0; + free_mcts(swap); + } + return tree; +} + +typedef struct { + float value; + float mcts; + int row; + int col; +} move; + +move pick_move(mcts_tree *tree, float temp, int player) +{ + int i; + float probs[19*19+1] = {0}; + move m = {0}; + double sum = 0; + /* + for(i = 0; i < 19*19+1; ++i){ + probs[i] = tree->visit_count[i]; + } + */ + //softmax(probs, 19*19+1, temp, 1, probs); + for(i = 0; i < 19*19+1; ++i){ + sum += pow(tree->visit_count[i], 1./temp); + } + for(i = 0; i < 19*19+1; ++i){ + probs[i] = pow(tree->visit_count[i], 1./temp) / sum; + } + + int index = sample_array(probs, 19*19+1); + m.row = index / 19; + m.col = index % 19; + m.value = (tree->result+1.)/2.; + m.mcts = (tree->mean[index]+1.)/2.; + + int indexes[nind]; + top_k(probs, 19*19+1, nind, indexes); + print_board(stderr, tree->board, player, indexes); + + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", index/19, index%19, tree->result, tree->prior[index], probs[index], tree->mean[index], (tree->children[index])?tree->children[index]->result:0, tree->visit_count[index]); + int ind = max_index(probs, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, tree->result, tree->prior[ind], probs[ind], tree->mean[ind], (tree->children[ind])?tree->children[ind]->result:0, tree->visit_count[ind]); + ind = max_index(tree->prior, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, tree->result, tree->prior[ind], probs[ind], tree->mean[ind], (tree->children[ind])?tree->children[ind]->result:0, tree->visit_count[ind]); + return m; +} + +/* + float predict_move(network *net, float *board, float *move, int multi, float *ko, float temp) + { + + int i; + + int max_v = 0; + int max_i = 0; + for(i = 0; i < 19*19+1; ++i){ + if(root->visit_count[i] > max_v){ + max_v = root->visit_count[i]; + max_i = i; + } + } + fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + int ind = max_index(root->mean, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", max_i/19, max_i%19, root->result, root->prior[max_i], root->prob[max_i], root->mean[max_i], (root->children[max_i])?root->children[max_i]->result:0, root->visit_count[max_i]); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, root->result, root->prior[ind], root->prob[ind], root->mean[ind], (root->children[ind])?root->children[ind]->result:0, root->visit_count[ind]); + ind = max_index(root->prior, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, root->result, root->prior[ind], root->prob[ind], root->mean[ind], (root->children[ind])?root->children[ind]->result:0, root->visit_count[ind]); + if(root->result < -.9 && root->mean[max_i] < -.9) return -1000.f; + + float val = root->result; + free_mcts(root); + return val; + } + */ + +static int makes_safe_go(float *b, int *lib, int p, int r, int c){ + if (r < 0 || r >= 19 || c < 0 || c >= 19) return 0; + if (occupied(b,r*19 + c) == -p){ + if (lib[r*19 + c] > 1) return 0; + else return 1; + } + if (!occupied(b,r*19 + c)) return 1; + if (lib[r*19 + c] > 1) return 1; + return 0; +} + +int suicide_go(float *b, int p, int r, int c) +{ + int *l = calculate_liberties(b); + int safe = 0; + safe = safe || makes_safe_go(b, l, p, r+1, c); + safe = safe || makes_safe_go(b, l, p, r-1, c); + safe = safe || makes_safe_go(b, l, p, r, c+1); + safe = safe || makes_safe_go(b, l, p, r, c-1); + free(l); + return !safe; +} + +int check_ko(float *x, float *ko) +{ + if(!ko) return 0; + float curr[19*19*3]; + copy_cpu(19*19*3, x, 1, curr, 1); + if(curr[19*19*2] != ko[19*19*2]) flip_board(curr); + if(compare_board(curr, ko)) return 1; + return 0; +} + +int legal_go(float *b, float *ko, int p, int r, int c) +{ + if (occupied(b, r*19+c)) return 0; + float curr[19*19*3]; + copy_cpu(19*19*3, b, 1, curr, 1); + move_go(curr, p, r, c); + if(check_ko(curr, ko)) return 0; + if(suicide_go(b, p, r, c)) return 0; + return 1; +} + +/* + move generate_move(mcts_tree *root, network *net, int player, float *board, int multi, float temp, float *ko, int print) + { + move m = {0}; +//root = run_mcts(tree, network *net, float *board, float *ko, int n, float cpuct) +int i, j; +int empty = 1; +for(i = 0; i < 19*19; ++i){ +if (occupied(board, i)) { +empty = 0; +break; +} +} +if(empty) { +m.value = .5; +m.mcts = .5; +m.row = 3; +m.col = 15; +return m; +} + +float move[362]; +if (player < 0) flip_board(board); +float result = predict_move(net, board, move, multi, ko, temp); +if (player < 0) flip_board(board); +if(result == -1000.f) return -2; + +for(i = 0; i < 19; ++i){ +for(j = 0; j < 19; ++j){ +if (!legal_go(board, ko, player, i, j)) move[i*19 + j] = 0; +} +} + +int indexes[nind]; +top_k(move, 19*19+1, nind, indexes); + + +int max = max_index(move, 19*19+1); +int row = max / 19; +int col = max % 19; +int index = sample_array(move, 19*19+1); + +if(print){ +top_k(move, 19*19+1, nind, indexes); +for(i = 0; i < nind; ++i){ +if (!move[indexes[i]]) indexes[i] = -1; +} +print_board(stderr, board, 1, indexes); +fprintf(stderr, "%s To Move\n", player > 0 ? "X" : "O"); +fprintf(stderr, "%.2f%% Win Chance\n", (result+1)/2*100); +for(i = 0; i < nind; ++i){ +int index = indexes[i]; +int row = index / 19; +int col = index % 19; +if(row == 19){ +fprintf(stderr, "%d: Pass, %.2f%%\n", i+1, move[index]*100); +} else { +fprintf(stderr, "%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100); +} +} +} +if (row == 19) return -1; + +if (suicide_go(board, player, row, col)){ +return -1; +} + +if (suicide_go(board, player, index/19, index%19)){ +index = max; +} +if (index == 19*19) return -1; +return index; +} +*/ + +void valid_go(char *cfgfile, char *weightfile, int multi, char *filename) +{ + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + float *board = calloc(19*19*3, sizeof(float)); + float *move = calloc(19*19+2, sizeof(float)); + // moves m = load_go_moves("/home/pjreddie/backup/go.test"); + moves m = load_go_moves(filename); + + int N = m.n; + int i,j; + int correct = 0; + for (i = 0; i 0) for(j = 0; j < 19*19; ++j) board[19*19*2 + j] = 1; + predict_move2(net, board, move, multi); + int index = max_index(move, 19*19+1); + if(index == truth) ++correct; + printf("%d Accuracy %f\n", i, (float) correct/(i+1)); + } +} + +int print_game(float *board, FILE *fp) +{ + int i, j; + int count = 3; + fprintf(fp, "komi 6.5\n"); + fprintf(fp, "boardsize 19\n"); + fprintf(fp, "clear_board\n"); + for(j = 0; j < 19; ++j){ + for(i = 0; i < 19; ++i){ + if(occupied(board,j*19 + i) == 1) fprintf(fp, "play black %c%d\n", 'A'+i+(i>=8), 19-j); + if(occupied(board,j*19 + i) == -1) fprintf(fp, "play white %c%d\n", 'A'+i+(i>=8), 19-j); + if(occupied(board,j*19 + i)) ++count; + } + } + return count; +} + + +int stdin_ready() +{ + fd_set readfds; + FD_ZERO(&readfds); + + struct timeval timeout; + timeout.tv_sec = 0; + timeout.tv_usec = 0; + FD_SET(STDIN_FILENO, &readfds); + + if (select(1, &readfds, NULL, NULL, &timeout)){ + return 1; + } + return 0; +} + +mcts_tree *ponder(mcts_tree *tree, network *net, float *b, float *ko, int player, float cpuct) +{ + double t = what_time_is_it_now(); + int count = 0; + if (tree) count = tree->total_count; + while(!stdin_ready()){ + if (what_time_is_it_now() - t > 120) break; + tree = run_mcts(tree, net, b, ko, player, 100000, cpuct, .1); + } + fprintf(stderr, "Pondered %d moves...\n", tree->total_count - count); + return tree; +} + +void engine_go(char *filename, char *weightfile, int mcts_iters, float secs, float temp, float cpuct, int anon, int resign) +{ + mcts_tree *root = 0; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *one = calloc(19*19*3, sizeof(float)); + float *two = calloc(19*19*3, sizeof(float)); + int ponder_player = 0; + int passed = 0; + int move_num = 0; + int main_time = 0; + int byo_yomi_time = 0; + int byo_yomi_stones = 0; + int black_time_left = 0; + int black_stones_left = 0; + int white_time_left = 0; + int white_stones_left = 0; + float orig_time = secs; + int old_ponder = 0; + while(1){ + if(ponder_player){ + root = ponder(root, net, board, two, ponder_player, cpuct); + } + old_ponder = ponder_player; + ponder_player = 0; + char buff[256]; + int id = 0; + int has_id = (scanf("%d", &id) == 1); + scanf("%s", buff); + if (feof(stdin)) break; + fprintf(stderr, "%s\n", buff); + char ids[256]; + sprintf(ids, "%d", id); + //fprintf(stderr, "%s\n", buff); + if (!has_id) ids[0] = 0; + if (!strcmp(buff, "protocol_version")){ + printf("=%s 2\n\n", ids); + } else if (!strcmp(buff, "name")){ + if(anon){ + printf("=%s The Fool!\n\n", ids); + }else{ + printf("=%s DarkGo\n\n", ids); + } + } else if (!strcmp(buff, "time_settings")){ + ponder_player = old_ponder; + scanf("%d %d %d", &main_time, &byo_yomi_time, &byo_yomi_stones); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "time_left")){ + ponder_player = old_ponder; + char color[256]; + int time = 0, stones = 0; + scanf("%s %d %d", color, &time, &stones); + if (color[0] == 'b' || color[0] == 'B'){ + black_time_left = time; + black_stones_left = stones; + } else { + white_time_left = time; + white_stones_left = stones; + } + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "version")){ + if(anon){ + printf("=%s :-DDDD\n\n", ids); + }else { + printf("=%s 1.0. Want more DarkGo? You can find me on OGS, unlimited games, no waiting! https://online-go.com/user/view/434218\n\n", ids); + } + } else if (!strcmp(buff, "known_command")){ + char comm[256]; + scanf("%s", comm); + int known = (!strcmp(comm, "protocol_version") || + !strcmp(comm, "name") || + !strcmp(comm, "version") || + !strcmp(comm, "known_command") || + !strcmp(comm, "list_commands") || + !strcmp(comm, "quit") || + !strcmp(comm, "boardsize") || + !strcmp(comm, "clear_board") || + !strcmp(comm, "komi") || + !strcmp(comm, "final_status_list") || + !strcmp(comm, "play") || + !strcmp(comm, "genmove_white") || + !strcmp(comm, "genmove_black") || + !strcmp(comm, "fixed_handicap") || + !strcmp(comm, "genmove")); + if(known) printf("=%s true\n\n", ids); + else printf("=%s false\n\n", ids); + } else if (!strcmp(buff, "list_commands")){ + printf("=%s protocol_version\nshowboard\nname\nversion\nknown_command\nlist_commands\nquit\nboardsize\nclear_board\nkomi\nplay\ngenmove_black\ngenmove_white\ngenmove\nfinal_status_list\nfixed_handicap\n\n", ids); + } else if (!strcmp(buff, "quit")){ + break; + } else if (!strcmp(buff, "boardsize")){ + int boardsize = 0; + scanf("%d", &boardsize); + //fprintf(stderr, "%d\n", boardsize); + if(boardsize != 19){ + printf("?%s unacceptable size\n\n", ids); + } else { + root = move_mcts(root, -1); + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + move_num = 0; + printf("=%s \n\n", ids); + } + } else if (!strcmp(buff, "fixed_handicap")){ + int handicap = 0; + scanf("%d", &handicap); + int indexes[] = {72, 288, 300, 60, 180, 174, 186, 66, 294}; + int i; + for(i = 0; i < handicap; ++i){ + board[indexes[i]] = 1; + ++move_num; + } + root = move_mcts(root, -1); + } else if (!strcmp(buff, "clear_board")){ + passed = 0; + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + move_num = 0; + root = move_mcts(root, -1); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "komi")){ + float komi = 0; + scanf("%f", &komi); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "showboard")){ + printf("=%s \n", ids); + print_board(stdout, board, 1, 0); + printf("\n"); + } else if (!strcmp(buff, "play") || !strcmp(buff, "black") || !strcmp(buff, "white")){ + ++move_num; + char color[256]; + if(!strcmp(buff, "play")) + { + scanf("%s ", color); + } else { + scanf(" "); + color[0] = buff[0]; + } + char c; + int r; + int count = scanf("%c%d", &c, &r); + int player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1; + if((c == 'p' || c == 'P') && count < 2) { + passed = 1; + printf("=%s \n\n", ids); + char *line = fgetl(stdin); + free(line); + fflush(stdout); + fflush(stderr); + root = move_mcts(root, 19*19); + continue; + } else { + passed = 0; + } + if(c >= 'A' && c <= 'Z') c = c - 'A'; + if(c >= 'a' && c <= 'z') c = c - 'a'; + if(c >= 8) --c; + r = 19 - r; + fprintf(stderr, "move: %d %d\n", r, c); + + float *swap = two; + two = one; + one = swap; + move_go(board, player, r, c); + copy_cpu(19*19*3, board, 1, one, 1); + if(root) fprintf(stderr, "Prior: %f\n", root->prior[r*19 + c]); + if(root) fprintf(stderr, "Mean: %f\n", root->mean[r*19 + c]); + if(root) fprintf(stderr, "Result: %f\n", root->result); + root = move_mcts(root, r*19 + c); + if(root) fprintf(stderr, "Visited: %d\n", root->total_count); + else fprintf(stderr, "NOT VISITED\n"); + + printf("=%s \n\n", ids); + //print_board(stderr, board, 1, 0); + } else if (!strcmp(buff, "genmove") || !strcmp(buff, "genmove_black") || !strcmp(buff, "genmove_white")){ + ++move_num; + int player = 0; + if(!strcmp(buff, "genmove")){ + char color[256]; + scanf("%s", color); + player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1; + } else if (!strcmp(buff, "genmove_black")){ + player = 1; + } else { + player = -1; + } + if(player > 0){ + if(black_time_left <= 30) secs = 2.5; + else secs = orig_time; + } else { + if(white_time_left <= 30) secs = 2.5; + else secs = orig_time; + } + ponder_player = -player; + + //tree = generate_move(net, player, board, multi, .1, two, 1); + double t = what_time_is_it_now(); + root = run_mcts(root, net, board, two, player, mcts_iters, cpuct, secs); + fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + move m = pick_move(root, temp, player); + root = move_mcts(root, m.row*19 + m.col); + + + if(move_num > resign && m.value < .1 && m.mcts < .1){ + printf("=%s resign\n\n", ids); + } else if(m.row == 19){ + printf("=%s pass\n\n", ids); + passed = 0; + } else { + int row = m.row; + int col = m.col; + + float *swap = two; + two = one; + one = swap; + + move_go(board, player, row, col); + copy_cpu(19*19*3, board, 1, one, 1); + row = 19 - row; + if (col >= 8) ++col; + printf("=%s %c%d\n\n", ids, 'A' + col, row); + } + + } else if (!strcmp(buff, "p")){ + //print_board(board, 1, 0); + } else if (!strcmp(buff, "final_status_list")){ + char type[256]; + scanf("%s", type); + fprintf(stderr, "final_status\n"); + char *line = fgetl(stdin); + free(line); + if(type[0] == 'd' || type[0] == 'D'){ + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "%s final_status_list dead\n", ids); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + while((l = fgetl(p))){ + printf("%s\n", l); + free(l); + } + } else { + printf("?%s unknown command\n\n", ids); + } + } else if (!strcmp(buff, "kgs-genmove_cleanup")){ + char type[256]; + scanf("%s", type); + fprintf(stderr, "kgs-genmove_cleanup\n"); + char *line = fgetl(stdin); + free(line); + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "%s kgs-genmove_cleanup %s\n", ids, type); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + while((l = fgetl(p))){ + printf("%s\n", l); + free(l); + } + } else { + char *line = fgetl(stdin); + free(line); + printf("?%s unknown command\n\n", ids); + } + fflush(stdout); + fflush(stderr); + } + printf("%d %d %d\n",passed, black_stones_left, white_stones_left); +} + +void test_go(char *cfg, char *weights, int multi) +{ + int i; + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(time(0)); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *move = calloc(19*19+1, sizeof(float)); + int color = 1; + while(1){ + float result = predict_move2(net, board, move, multi); + printf("%.2f%% Win Chance\n", (result+1)/2*100); + + int indexes[nind]; + int row, col; + top_k(move, 19*19+1, nind, indexes); + print_board(stderr, board, color, indexes); + for(i = 0; i < nind; ++i){ + int index = indexes[i]; + row = index / 19; + col = index % 19; + if(row == 19){ + printf("%d: Pass, %.2f%%\n", i+1, move[index]*100); + } else { + printf("%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100); + } + } + //if(color == 1) printf("\u25EF Enter move: "); + //else printf("\u25C9 Enter move: "); + if(color == 1) printf("X Enter move: "); + else printf("O Enter move: "); + + char c; + char *line = fgetl(stdin); + int picked = 1; + int dnum = sscanf(line, "%d", &picked); + int cnum = sscanf(line, "%c", &c); + if (strlen(line) == 0 || dnum) { + --picked; + if (picked < nind){ + int index = indexes[picked]; + row = index / 19; + col = index % 19; + if(row < 19){ + move_go(board, 1, row, col); + } + } + } else if (cnum){ + if (c <= 'T' && c >= 'A'){ + int num = sscanf(line, "%c %d", &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 2) move_go(board, 1, row, col); + } else if (c == 'p') { + // Pass + } else if(c=='b' || c == 'w'){ + char g; + int num = sscanf(line, "%c %c %d", &g, &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 3) { + int mc = (g == 'b') ? 1 : -1; + if (mc == color) { + board[row*19 + col] = 1; + } else { + board[19*19 + row*19 + col] = 1; + } + } + } else if(c == 'c'){ + char g; + int num = sscanf(line, "%c %c %d", &g, &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 3) { + board[row*19 + col] = 0; + board[19*19 + row*19 + col] = 0; + } + } + } + free(line); + flip_board(board); + color = -color; + } +} + +float score_game(float *board) +{ + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "final_score\n"); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + float score = 0; + char player = 0; + while((l = fgetl(p))){ + fprintf(stderr, "%s \t", l); + int n = sscanf(l, "= %c+%f", &player, &score); + free(l); + if (n == 2) break; + } + if(player == 'W') score = -score; + pclose(p); + return score; +} + +void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi) +{ + mcts_tree *tree1 = 0; + mcts_tree *tree2 = 0; + network *net = load_network(filename, weightfile, 0); + //set_batch_network(net, 1); + + network *net2; + if (f2) { + net2 = parse_network_cfg(f2); + if(w2){ + load_weights(net2, w2); + } + } else { + net2 = calloc(1, sizeof(network)); + *net2 = *net; + } + srand(time(0)); + char boards[600][93]; + int count = 0; + //set_batch_network(net, 1); + //set_batch_network(net2, 1); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *one = calloc(19*19*3, sizeof(float)); + float *two = calloc(19*19*3, sizeof(float)); + int done = 0; + int player = 1; + int p1 = 0; + int p2 = 0; + int total = 0; + float temp = .1; + int mcts_iters = 500; + float cpuct = 5; + while(1){ + if (done){ + tree1 = move_mcts(tree1, -1); + tree2 = move_mcts(tree2, -1); + float score = score_game(board); + if((score > 0) == (total%2==0)) ++p1; + else ++p2; + ++total; + fprintf(stderr, "Total: %d, Player 1: %f, Player 2: %f\n", total, (float)p1/total, (float)p2/total); + sleep(1); + /* + int i = (score > 0)? 0 : 1; + int j; + for(; i < count; i += 2){ + for(j = 0; j < 93; ++j){ + printf("%c", boards[i][j]); + } + printf("\n"); + } + */ + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + player = 1; + done = 0; + count = 0; + fflush(stdout); + fflush(stderr); + } + //print_board(stderr, board, 1, 0); + //sleep(1); + + if ((total%2==0) == (player==1)){ + //mcts_iters = 4500; + cpuct = 5; + } else { + //mcts_iters = 500; + cpuct = 1; + } + network *use = ((total%2==0) == (player==1)) ? net : net2; + mcts_tree *t = ((total%2==0) == (player==1)) ? tree1 : tree2; + t = run_mcts(t, use, board, two, player, mcts_iters, cpuct, 0); + move m = pick_move(t, temp, player); + if(((total%2==0) == (player==1))) tree1 = t; + else tree2 = t; + + tree1 = move_mcts(tree1, m.row*19 + m.col); + tree2 = move_mcts(tree2, m.row*19 + m.col); + + if(m.row == 19){ + done = 1; + continue; + } + int row = m.row; + int col = m.col; + + float *swap = two; + two = one; + one = swap; + + if(player < 0) flip_board(board); + boards[count][0] = row; + boards[count][1] = col; + board_to_string(boards[count] + 2, board); + if(player < 0) flip_board(board); + ++count; + + move_go(board, player, row, col); + copy_cpu(19*19*3, board, 1, one, 1); + + player = -player; + } +} + +void run_go(int argc, char **argv) +{ + //boards_go(); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + int clear = find_arg(argc, argv, "-clear"); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *c2 = (argc > 5) ? argv[5] : 0; + char *w2 = (argc > 6) ? argv[6] : 0; + int multi = find_arg(argc, argv, "-multi"); + int anon = find_arg(argc, argv, "-anon"); + int iters = find_int_arg(argc, argv, "-iters", 500); + int resign = find_int_arg(argc, argv, "-resign", 175); + float cpuct = find_float_arg(argc, argv, "-cpuct", 5); + float temp = find_float_arg(argc, argv, "-temp", .1); + float time = find_float_arg(argc, argv, "-time", 0); + if(0==strcmp(argv[2], "train")) train_go(cfg, weights, c2, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) valid_go(cfg, weights, multi, c2); + else if(0==strcmp(argv[2], "self")) self_go(cfg, weights, c2, w2, multi); + else if(0==strcmp(argv[2], "test")) test_go(cfg, weights, multi); + else if(0==strcmp(argv[2], "engine")) engine_go(cfg, weights, iters, time, temp, cpuct, anon, resign); +} + + diff --git a/workloads/realworld/async/darknet/examples/instance-segmenter.c b/workloads/realworld/async/darknet/examples/instance-segmenter.c new file mode 100644 index 0000000000000000000000000000000000000000..664e71426d58e19f758bab198783eac178a3cdc4 --- /dev/null +++ b/workloads/realworld/async/darknet/examples/instance-segmenter.c @@ -0,0 +1,267 @@ +#include "darknet.h" +#include +#include + +void normalize_image2(image p); +void train_isegmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + image pred = get_network_image(net); + + image embed = pred; + embed.c = 3; + embed.data += embed.w*embed.h*80; + + int div = net->w/pred.w; + assert(pred.w * div == net->w); + assert(pred.h * div == net->h); + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.scale = div; + args.num_boxes = 90; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.classes = 80; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = ISEG_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(display){ + image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]); + image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]); + pred.c = 80; + image mask = mask_to_rgb(tr); + image prmask = mask_to_rgb(pred); + image ecopy = copy_image(embed); + normalize_image2(ecopy); + show_image(ecopy, "embed", 1); + free_image(ecopy); + + show_image(im, "input", 1); + show_image(prmask, "pred", 1); + show_image(mask, "truth", 100); + free_image(mask); + free_image(prmask); + } + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_isegmenter(char *datafile, char *cfg, char *weights, char *filename) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + show_image(sized, "orig", 1); + show_image(prmask, "pred", 0); + free_image(im); + free_image(sized); + free_image(prmask); + if (filename) break; + } +} + + +void demo_isegmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Classifier Demo\n"); + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = letterbox_image(in, net->w, net->h); + + network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + show_image(prmask, "Segmenter", 10); + + free_image(in_s); + free_image(in); + free_image(prmask); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_isegmenter(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_isegmenter(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_isegmenter(data, cfg, weights, gpus, ngpus, clear, display); + else if(0==strcmp(argv[2], "demo")) demo_isegmenter(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/async/darknet/examples/lsd.c b/workloads/realworld/async/darknet/examples/lsd.c new file mode 100644 index 0000000000000000000000000000000000000000..4ab944c884b9df422cd2b273b1faee128f2ab112 --- /dev/null +++ b/workloads/realworld/async/darknet/examples/lsd.c @@ -0,0 +1,1378 @@ +#include +#include "darknet.h" + +/* +void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg, char *aweight, int clear) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + //char *style_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *style_images = "/home/pjreddie/zelda.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + network fnet = load_network(fcfg, fweight, clear); + network gnet = load_network(gcfg, gweight, clear); + network anet = load_network(acfg, aweight, clear); + char *gbase = basecfg(gcfg); + char *abase = basecfg(acfg); + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + int i = *gnet->seen/imgs; + data train, tbuffer; + data style, sbuffer; + + + list *slist = get_paths(style_images); + char **spaths = (char **)list_to_array(slist); + + list *tlist = get_paths(train_images); + char **tpaths = (char **)list_to_array(tlist); + + load_args targs= get_base_args(gnet); + targs.paths = tpaths; + targs.n = imgs; + targs.m = tlist->size; + targs.d = &tbuffer; + targs.type = CLASSIFICATION_DATA; + targs.classes = 1; + char *ls[1] = {"zelda"}; + targs.labels = ls; + + load_args sargs = get_base_args(gnet); + sargs.paths = spaths; + sargs.n = imgs; + sargs.m = slist->size; + sargs.d = &sbuffer; + sargs.type = CLASSIFICATION_DATA; + sargs.classes = 1; + sargs.labels = ls; + + pthread_t tload_thread = load_data_in_thread(targs); + pthread_t sload_thread = load_data_in_thread(sargs); + clock_t time; + + float aloss_avg = -1; + float floss_avg = -1; + + fnet->train=1; + int x_size = fnet->inputs*fnet->batch; + int y_size = fnet->truths*fnet->batch; + float *X = calloc(x_size, sizeof(float)); + float *y = calloc(y_size, sizeof(float)); + + + int ax_size = anet->inputs*anet->batch; + int ay_size = anet->truths*anet->batch; + fill_gpu(ay_size, .9, anet->truth_gpu, 1); + anet->delta_gpu = cuda_make_array(0, ax_size); + anet->train = 1; + + int gx_size = gnet->inputs*gnet->batch; + int gy_size = gnet->truths*gnet->batch; + gstate.input = cuda_make_array(0, gx_size); + gstate.truth = 0; + gstate.delta = 0; + gstate.train = 1; + + while (get_current_batch(gnet) < gnet->max_batches) { + i += 1; + time=clock(); + pthread_join(tload_thread, 0); + pthread_join(sload_thread, 0); + train = tbuffer; + style = sbuffer; + tload_thread = load_data_in_thread(targs); + sload_thread = load_data_in_thread(sargs); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data generated = copy_data(train); + time=clock(); + + int j, k; + float floss = 0; + for(j = 0; j < fnet->subdivisions; ++j){ + layer imlayer = gnet->layers[gnet->n - 1]; + get_next_batch(train, fnet->batch, j*fnet->batch, X, y); + + cuda_push_array(fstate.input, X, x_size); + cuda_push_array(gstate.input, X, gx_size); + *gnet->seen += gnet->batch; + + forward_network_gpu(fnet, fstate); + float *feats = fnet->layers[fnet->n - 2].output_gpu; + copy_gpu(y_size, feats, 1, fstate.truth, 1); + + forward_network_gpu(gnet, gstate); + float *gen = gnet->layers[gnet->n-1].output_gpu; + copy_gpu(x_size, gen, 1, fstate.input, 1); + + fill_gpu(x_size, 0, fstate.delta, 1); + forward_network_gpu(fnet, fstate); + backward_network_gpu(fnet, fstate); + //HERE + + astate.input = gen; + fill_gpu(ax_size, 0, astate.delta, 1); + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + float *delta = imlayer.delta_gpu; + fill_gpu(x_size, 0, delta, 1); + scal_gpu(x_size, 100, astate.delta, 1); + scal_gpu(x_size, .001, fstate.delta, 1); + axpy_gpu(x_size, 1, fstate.delta, 1, delta, 1); + axpy_gpu(x_size, 1, astate.delta, 1, delta, 1); + + //fill_gpu(x_size, 0, delta, 1); + //cuda_push_array(delta, X, x_size); + //axpy_gpu(x_size, -1, imlayer.output_gpu, 1, delta, 1); + //printf("pix error: %f\n", cuda_mag_array(delta, x_size)); + printf("fea error: %f\n", cuda_mag_array(fstate.delta, x_size)); + printf("adv error: %f\n", cuda_mag_array(astate.delta, x_size)); + //axpy_gpu(x_size, 1, astate.delta, 1, delta, 1); + + backward_network_gpu(gnet, gstate); + + floss += get_network_cost(fnet) /(fnet->subdivisions*fnet->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1); + generated.y.vals[index][0] = .1; + style.y.vals[index][0] = .9; + } + } + +*/ +/* + image sim = float_to_image(anet->w, anet->h, anet->c, style.X.vals[j]); + show_image(sim, "style"); + cvWaitKey(0); + */ + /* + + harmless_update_network_gpu(anet); + + data merge = concat_data(style, generated); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(generated); + free_data(style); + if (aloss_avg < 0) aloss_avg = aloss; + if (floss_avg < 0) floss_avg = floss; + aloss_avg = aloss_avg*.9 + aloss*.1; + floss_avg = floss_avg*.9 + floss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, floss, aloss, floss_avg, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, gbase, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, gbase); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } +#endif +} +*/ + +/* +void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/train1.txt"; + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network net = load_network(cfg, weight, clear); + network anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = net->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.type = CLASSIFICATION_DATA; + args.classes = 1; + char *ls[1] = {"coco"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + network_state gstate = {0}; + gstate.index = 0; + gstate.net = net; + int x_size = get_network_input_size(net)*net->batch; + int y_size = x_size; + gstate.input = cuda_make_array(0, x_size); + gstate.truth = cuda_make_array(0, y_size); + gstate.delta = 0; + gstate.train = 1; + float *pixs = calloc(x_size, sizeof(float)); + float *graypixs = calloc(x_size, sizeof(float)); + float *y = calloc(y_size, sizeof(float)); + + network_state astate = {0}; + astate.index = 0; + astate.net = anet; + int ay_size = get_network_output_size(anet)*anet->batch; + astate.input = 0; + astate.truth = 0; + astate.delta = 0; + astate.train = 1; + + float *imerror = cuda_make_array(0, imlayer.outputs); + float *ones_gpu = cuda_make_array(0, ay_size); + fill_gpu(ay_size, .9, ones_gpu, 1); + + float aloss_avg = -1; + float gloss_avg = -1; + + //data generated = copy_data(train); + + while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gray = copy_data(train); + for(j = 0; j < imgs; ++j){ + image gim = float_to_image(net->w, net->h, net->c, gray.X.vals[j]); + grayscale_image_3c(gim); + train.y.vals[j][0] = .9; + + image yim = float_to_image(net->w, net->h, net->c, train.X.vals[j]); + //rgb_to_yuv(yim); + } + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, pixs, y); + get_next_batch(gray, net->batch, j*net->batch, graypixs, y); + cuda_push_array(gstate.input, graypixs, x_size); + cuda_push_array(gstate.truth, pixs, y_size); + */ + /* + image origi = float_to_image(net->w, net->h, 3, pixs); + image grayi = float_to_image(net->w, net->h, 3, graypixs); + show_image(grayi, "gray"); + show_image(origi, "orig"); + cvWaitKey(0); + */ + /* + *net->seen += net->batch; + forward_network_gpu(net, gstate); + + fill_gpu(imlayer.outputs, 0, imerror, 1); + astate.input = imlayer.output_gpu; + astate.delta = imerror; + astate.truth = ones_gpu; + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + scal_gpu(imlayer.outputs, .1, net->layers[net->n-1].delta_gpu, 1); + + backward_network_gpu(net, gstate); + + scal_gpu(imlayer.outputs, 1000, imerror, 1); + + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs)); + printf("features %f\n", cuda_mag_array(net->layers[net->n-1].delta_gpu, imlayer.outputs)); + + axpy_gpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1); + + gloss += get_network_cost(net) /(net->subdivisions*net->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1); + gray.y.vals[index][0] = .1; + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gray); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + update_network_gpu(anet); + free_data(merge); + free_data(train); + free_data(gray); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +#endif +} +*/ + +void slerp(float *start, float *end, float s, int n, float *out) +{ + float omega = acos(dot_cpu(n, start, 1, end, 1)); + float so = sin(omega); + fill_cpu(n, 0, out, 1); + axpy_cpu(n, sin((1-s)*omega)/so, start, 1, out, 1); + axpy_cpu(n, sin(s*omega)/so, end, 1, out, 1); + + float mag = mag_array(out, n); + scale_array(out, n, 1./mag); +} + +image random_unit_vector_image(int w, int h, int c) +{ + image im = make_image(w, h, c); + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + im.data[i] = rand_normal(); + } + float mag = mag_array(im.data, im.w*im.h*im.c); + scale_array(im.data, im.w*im.h*im.c, 1./mag); + return im; +} + +void inter_dcgan(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int i, imlayer = 0; + + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = i; + printf("%d\n", i); + break; + } + } + image start = random_unit_vector_image(net->w, net->h, net->c); + image end = random_unit_vector_image(net->w, net->h, net->c); + image im = make_image(net->w, net->h, net->c); + image orig = copy_image(start); + + int c = 0; + int count = 0; + int max_count = 15; + while(1){ + ++c; + + if(count == max_count){ + count = 0; + free_image(start); + start = end; + end = random_unit_vector_image(net->w, net->h, net->c); + if(c > 300){ + end = orig; + } + if(c>300 + max_count) return; + } + ++count; + + slerp(start.data, end.data, (float)count / max_count, im.w*im.h*im.c, im.data); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + normalize_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + //char buff[256]; + sprintf(buff, "out%05d", c); + save_image(out, "out"); + save_image(out, buff); + show_image(out, "out", 0); + } +} + +void test_dcgan(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int imlayer = 0; + + imlayer = net->n-1; + + while(1){ + image im = make_image(net->w, net->h, net->c); + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + im.data[i] = rand_normal(); + } + //float mag = mag_array(im.data, im.w*im.h*im.c); + //scale_array(im.data, im.w*im.h*im.c, 1./mag); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + normalize_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 0); + + free_image(im); + } +} + +void set_network_alpha_beta(network *net, float alpha, float beta) +{ + int i; + for(i = 0; i < net->n; ++i){ + if(net->layers[i].type == SHORTCUT){ + net->layers[i].alpha = alpha; + net->layers[i].beta = beta; + } + } +} + +void train_prog(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) +{ +#ifdef GPU + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *gnet = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = gnet->layers[gnet->n-1]; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + i = *gnet->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(anet); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + args.threads=16; + args.classes = 1; + char *ls[2] = {"imagenet", "zzzzzzzz"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + gnet->train = 1; + anet->train = 1; + + int x_size = gnet->inputs*gnet->batch; + int y_size = gnet->truths*gnet->batch; + float *imerror = cuda_make_array(0, y_size); + + float aloss_avg = -1; + + if (maxbatch == 0) maxbatch = gnet->max_batches; + while (get_current_batch(gnet) < maxbatch) { + { + int cb = get_current_batch(gnet); + float alpha = (float) cb / (maxbatch/2); + if(alpha > 1) alpha = 1; + float beta = 1 - alpha; + printf("%f %f\n", alpha, beta); + set_network_alpha_beta(gnet, alpha, beta); + set_network_alpha_beta(anet, beta, alpha); + } + + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gen = copy_data(train); + for (j = 0; j < imgs; ++j) { + train.y.vals[j][0] = 1; + gen.y.vals[j][0] = 0; + } + time=clock(); + + for (j = 0; j < gnet->subdivisions; ++j) { + get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); + int z; + for(z = 0; z < x_size; ++z){ + gnet->input[z] = rand_normal(); + } + /* + for(z = 0; z < gnet->batch; ++z){ + float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); + scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); + } + */ + *gnet->seen += gnet->batch; + forward_network(gnet); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); + copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1); + anet->delta_gpu = imerror; + forward_network(anet); + backward_network(anet); + + //float genaloss = *anet->cost / anet->batch; + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1); + + backward_network(gnet); + + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gen); + float aloss = train_network(anet, merge); + +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + save_image(im, "gen"); + save_image(im2, "train"); + } +#endif + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(gen); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + + printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(gnet, buff); +#endif +} + +void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) +{ +#ifdef GPU + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *gnet = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + //float orig_rate = anet->learning_rate; + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < gnet->n; ++i) { + if (gnet->layers[i].out_c == 3) { + imlayer = gnet->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + i = *gnet->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(anet); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + args.threads=16; + args.classes = 1; + char *ls[2] = {"imagenet", "zzzzzzzz"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + gnet->train = 1; + anet->train = 1; + + int x_size = gnet->inputs*gnet->batch; + int y_size = gnet->truths*gnet->batch; + float *imerror = cuda_make_array(0, y_size); + + //int ay_size = anet->truths*anet->batch; + + float aloss_avg = -1; + + //data generated = copy_data(train); + + if (maxbatch == 0) maxbatch = gnet->max_batches; + while (get_current_batch(gnet) < maxbatch) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + //translate_data_rows(train, -.5); + //scale_data_rows(train, 2); + + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gen = copy_data(train); + for (j = 0; j < imgs; ++j) { + train.y.vals[j][0] = 1; + gen.y.vals[j][0] = 0; + } + time=clock(); + + for(j = 0; j < gnet->subdivisions; ++j){ + get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); + int z; + for(z = 0; z < x_size; ++z){ + gnet->input[z] = rand_normal(); + } + for(z = 0; z < gnet->batch; ++z){ + float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); + scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); + } + /* + for(z = 0; z < 100; ++z){ + printf("%f, ", gnet->input[z]); + } + printf("\n"); + printf("input: %f %f\n", mean_array(gnet->input, x_size), variance_array(gnet->input, x_size)); + */ + + //cuda_push_array(gnet->input_gpu, gnet->input, x_size); + //cuda_push_array(gnet->truth_gpu, gnet->truth, y_size); + *gnet->seen += gnet->batch; + forward_network(gnet); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); + copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1); + anet->delta_gpu = imerror; + forward_network(anet); + backward_network(anet); + + //float genaloss = *anet->cost / anet->batch; + //printf("%f\n", genaloss); + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1); + + //printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch)); + //printf("features %f\n", cuda_mag_array(gnet->layers[gnet->n-1].delta_gpu, imlayer.outputs*imlayer.batch)); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1); + + backward_network(gnet); + + /* + for(k = 0; k < gnet->n; ++k){ + layer l = gnet->layers[k]; + cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); + printf("%d: %f %f\n", k, mean_array(l.output, l.outputs*l.batch), variance_array(l.output, l.outputs*l.batch)); + } + */ + + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gen); + //randomize_data(merge); + float aloss = train_network(anet, merge); + + //translate_image(im, 1); + //scale_image(im, .5); + //translate_image(im2, 1); + //scale_image(im2, .5); +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + save_image(im, "gen"); + save_image(im2, "train"); + } +#endif + + /* + if(aloss < .1){ + anet->learning_rate = 0; + } else if (aloss > .3){ + anet->learning_rate = orig_rate; + } + */ + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(gen); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + + printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(gnet, buff); +#endif +} + +void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/train1.txt"; + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *net = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = net->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(net); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + + args.type = CLASSIFICATION_DATA; + args.classes = 1; + char *ls[2] = {"imagenet"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + int x_size = net->inputs*net->batch; + //int y_size = x_size; + net->delta = 0; + net->train = 1; + float *pixs = calloc(x_size, sizeof(float)); + float *graypixs = calloc(x_size, sizeof(float)); + //float *y = calloc(y_size, sizeof(float)); + + //int ay_size = anet->outputs*anet->batch; + anet->delta = 0; + anet->train = 1; + + float *imerror = cuda_make_array(0, imlayer.outputs*imlayer.batch); + + float aloss_avg = -1; + float gloss_avg = -1; + + //data generated = copy_data(train); + + while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gray = copy_data(train); + for(j = 0; j < imgs; ++j){ + image gim = float_to_image(net->w, net->h, net->c, gray.X.vals[j]); + grayscale_image_3c(gim); + train.y.vals[j][0] = .95; + gray.y.vals[j][0] = .05; + } + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, pixs, 0); + get_next_batch(gray, net->batch, j*net->batch, graypixs, 0); + cuda_push_array(net->input_gpu, graypixs, net->inputs*net->batch); + cuda_push_array(net->truth_gpu, pixs, net->truths*net->batch); + /* + image origi = float_to_image(net->w, net->h, 3, pixs); + image grayi = float_to_image(net->w, net->h, 3, graypixs); + show_image(grayi, "gray"); + show_image(origi, "orig"); + cvWaitKey(0); + */ + *net->seen += net->batch; + forward_network_gpu(net); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + copy_gpu(anet->inputs*anet->batch, imlayer.output_gpu, 1, anet->input_gpu, 1); + fill_gpu(anet->inputs*anet->batch, .95, anet->truth_gpu, 1); + anet->delta_gpu = imerror; + forward_network_gpu(anet); + backward_network_gpu(anet); + + scal_gpu(imlayer.outputs*imlayer.batch, 1./100., net->layers[net->n-1].delta_gpu, 1); + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch)); + printf("features %f\n", cuda_mag_array(net->layers[net->n-1].delta_gpu, imlayer.outputs*imlayer.batch)); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, net->layers[net->n-1].delta_gpu, 1); + + backward_network_gpu(net); + + + gloss += *net->cost /(net->subdivisions*net->batch); + + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gray); + //randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gray.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + } +#endif + free_data(merge); + free_data(train); + free_data(gray); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +#endif +} + +/* + void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfile, int clear) + { +#ifdef GPU +char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; +char *backup_directory = "/home/pjreddie/backup/"; +srand(time(0)); +char *base = basecfg(cfgfile); +printf("%s\n", base); +network net = parse_network_cfg(cfgfile); +if(weightfile){ +load_weights(&net, weightfile); +} +if(clear) *net->seen = 0; + +char *abase = basecfg(acfgfile); +network anet = parse_network_cfg(acfgfile); +if(aweightfile){ +load_weights(&anet, aweightfile); +} +if(clear) *anet->seen = 0; + +int i, j, k; +layer imlayer = {0}; +for (i = 0; i < net->n; ++i) { +if (net->layers[i].out_c == 3) { +imlayer = net->layers[i]; +break; +} +} + +printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); +int imgs = net->batch*net->subdivisions; +i = *net->seen/imgs; +data train, buffer; + + +list *plist = get_paths(train_images); +//int N = plist->size; +char **paths = (char **)list_to_array(plist); + +load_args args = {0}; +args.w = net->w; +args.h = net->h; +args.paths = paths; +args.n = imgs; +args.m = plist->size; +args.d = &buffer; + +args.min = net->min_crop; +args.max = net->max_crop; +args.angle = net->angle; +args.aspect = net->aspect; +args.exposure = net->exposure; +args.saturation = net->saturation; +args.hue = net->hue; +args.size = net->w; +args.type = CLASSIFICATION_DATA; +args.classes = 1; +char *ls[1] = {"coco"}; +args.labels = ls; + +pthread_t load_thread = load_data_in_thread(args); +clock_t time; + +network_state gstate = {0}; +gstate.index = 0; +gstate.net = net; +int x_size = get_network_input_size(net)*net->batch; +int y_size = 1*net->batch; +gstate.input = cuda_make_array(0, x_size); +gstate.truth = 0; +gstate.delta = 0; +gstate.train = 1; +float *X = calloc(x_size, sizeof(float)); +float *y = calloc(y_size, sizeof(float)); + +network_state astate = {0}; +astate.index = 0; +astate.net = anet; +int ay_size = get_network_output_size(anet)*anet->batch; +astate.input = 0; +astate.truth = 0; +astate.delta = 0; +astate.train = 1; + +float *imerror = cuda_make_array(0, imlayer.outputs); +float *ones_gpu = cuda_make_array(0, ay_size); +fill_gpu(ay_size, 1, ones_gpu, 1); + +float aloss_avg = -1; +float gloss_avg = -1; + +//data generated = copy_data(train); + +while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data generated = copy_data(train); + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, X, y); + cuda_push_array(gstate.input, X, x_size); + *net->seen += net->batch; + forward_network_gpu(net, gstate); + + fill_gpu(imlayer.outputs, 0, imerror, 1); + astate.input = imlayer.output_gpu; + astate.delta = imerror; + astate.truth = ones_gpu; + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + scal_gpu(imlayer.outputs, 1, imerror, 1); + axpy_gpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1); + + backward_network_gpu(net, gstate); + + printf("features %f\n", cuda_mag_array(imlayer.delta_gpu, imlayer.outputs)); + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs)); + + gloss += get_network_cost(net) /(net->subdivisions*net->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1); + generated.y.vals[index][0] = 0; + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, generated); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + update_network_gpu(anet); + free_data(merge); + free_data(train); + free_data(generated); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } +} +char buff[256]; +sprintf(buff, "%s/%s_final.weights", backup_directory, base); +save_weights(net, buff); +#endif +} +*/ + +/* + void train_lsd(char *cfgfile, char *weightfile, int clear) + { + char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + if(clear) *net->seen = 0; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); +//int N = plist->size; +char **paths = (char **)list_to_array(plist); + +load_args args = {0}; +args.w = net->w; +args.h = net->h; +args.paths = paths; +args.n = imgs; +args.m = plist->size; +args.d = &buffer; + +args.min = net->min_crop; +args.max = net->max_crop; +args.angle = net->angle; +args.aspect = net->aspect; +args.exposure = net->exposure; +args.saturation = net->saturation; +args.hue = net->hue; +args.size = net->w; +args.type = CLASSIFICATION_DATA; +args.classes = 1; +char *ls[1] = {"coco"}; +args.labels = ls; + +pthread_t load_thread = load_data_in_thread(args); +clock_t time; +//while(i*imgs < N*120){ +while(get_current_batch(net) < net->max_batches){ +i += 1; +time=clock(); +pthread_join(load_thread, 0); +train = buffer; +load_thread = load_data_in_thread(args); + +printf("Loaded: %lf seconds\n", sec(clock()-time)); + +time=clock(); +float loss = train_network(net, train); +if (avg_loss < 0) avg_loss = loss; +avg_loss = avg_loss*.9 + loss*.1; + +printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); +if(i%1000==0){ +char buff[256]; +sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); +save_weights(net, buff); +} +if(i%100==0){ +char buff[256]; +sprintf(buff, "%s/%s.backup", backup_directory, base); +save_weights(net, buff); +} +free_data(train); +} +char buff[256]; +sprintf(buff, "%s/%s_final.weights", backup_directory, base); +save_weights(net, buff); +} +*/ + +void test_lsd(char *cfg, char *weights, char *filename, int gray) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int i, imlayer = 0; + + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = i; + printf("%d\n", i); + break; + } + } + + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + if(gray) grayscale_image_3c(crop); + + float *X = crop.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + constrain_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 1); + show_image(crop, "crop", 0); + + free_image(im); + free_image(resized); + free_image(crop); + if (filename) break; + } +} + + +void run_lsd(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + int batches = find_int_arg(argc, argv, "-b", 0); + char *file = find_char_arg(argc, argv, "-file", "/home/pjreddie/data/imagenet/imagenet1k.train.list"); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + char *acfg = argv[5]; + char *aweights = (argc > 6) ? argv[6] : 0; + //if(0==strcmp(argv[2], "train")) train_lsd(cfg, weights, clear); + //else if(0==strcmp(argv[2], "train2")) train_lsd2(cfg, weights, acfg, aweights, clear); + //else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear); + //else if(0==strcmp(argv[2], "train3")) train_lsd3(argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], clear); + if(0==strcmp(argv[2], "traingan")) train_dcgan(cfg, weights, acfg, aweights, clear, display, file, batches); + else if(0==strcmp(argv[2], "trainprog")) train_prog(cfg, weights, acfg, aweights, clear, display, file, batches); + else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear, display); + else if(0==strcmp(argv[2], "gan")) test_dcgan(cfg, weights); + else if(0==strcmp(argv[2], "inter")) inter_dcgan(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_lsd(cfg, weights, filename, 0); + else if(0==strcmp(argv[2], "color")) test_lsd(cfg, weights, filename, 1); + /* + else if(0==strcmp(argv[2], "valid")) validate_lsd(cfg, weights); + */ +} diff --git a/workloads/realworld/async/darknet/examples/nightmare.c b/workloads/realworld/async/darknet/examples/nightmare.c new file mode 100644 index 0000000000000000000000000000000000000000..2978eb61193e96325441c5b830a786eccb203569 --- /dev/null +++ b/workloads/realworld/async/darknet/examples/nightmare.c @@ -0,0 +1,414 @@ +#include "darknet.h" + +#include + +// ./darknet nightmare cfg/extractor.recon.cfg ~/trained/yolo-coco.conv frame6.png -reconstruct -iters 500 -i 3 -lambda .1 -rate .01 -smooth 2 + +float abs_mean(float *x, int n) +{ + int i; + float sum = 0; + for (i = 0; i < n; ++i){ + sum += fabs(x[i]); + } + return sum/n; +} + +void calculate_loss(float *output, float *delta, int n, float thresh) +{ + int i; + float mean = mean_array(output, n); + float var = variance_array(output, n); + for(i = 0; i < n; ++i){ + if(delta[i] > mean + thresh*sqrt(var)) delta[i] = output[i]; + else delta[i] = 0; + } +} + +void optimize_picture(network *net, image orig, int max_layer, float scale, float rate, float thresh, int norm) +{ + //scale_image(orig, 2); + //translate_image(orig, -1); + net->n = max_layer + 1; + + int dx = rand()%16 - 8; + int dy = rand()%16 - 8; + int flip = rand()%2; + + image crop = crop_image(orig, dx, dy, orig.w, orig.h); + image im = resize_image(crop, (int)(orig.w * scale), (int)(orig.h * scale)); + if(flip) flip_image(im); + + resize_network(net, im.w, im.h); + layer last = net->layers[net->n-1]; + //net->layers[net->n - 1].activation = LINEAR; + + image delta = make_image(im.w, im.h, im.c); + +#ifdef GPU + net->delta_gpu = cuda_make_array(delta.data, im.w*im.h*im.c); + copy_cpu(net->inputs, im.data, 1, net->input, 1); + + forward_network_gpu(net); + copy_gpu(last.outputs, last.output_gpu, 1, last.delta_gpu, 1); + + cuda_pull_array(last.delta_gpu, last.delta, last.outputs); + calculate_loss(last.delta, last.delta, last.outputs, thresh); + cuda_push_array(last.delta_gpu, last.delta, last.outputs); + + backward_network_gpu(net); + + cuda_pull_array(net->delta_gpu, delta.data, im.w*im.h*im.c); + cuda_free(net->delta_gpu); + net->delta_gpu = 0; +#else + printf("\nnet: %d %d %d im: %d %d %d\n", net->w, net->h, net->inputs, im.w, im.h, im.c); + copy_cpu(net->inputs, im.data, 1, net->input, 1); + net->delta = delta.data; + forward_network(net); + copy_cpu(last.outputs, last.output, 1, last.delta, 1); + calculate_loss(last.output, last.delta, last.outputs, thresh); + backward_network(net); +#endif + + if(flip) flip_image(delta); + //normalize_array(delta.data, delta.w*delta.h*delta.c); + image resized = resize_image(delta, orig.w, orig.h); + image out = crop_image(resized, -dx, -dy, orig.w, orig.h); + + /* + image g = grayscale_image(out); + free_image(out); + out = g; + */ + + //rate = rate / abs_mean(out.data, out.w*out.h*out.c); + image gray = make_image(out.w, out.h, out.c); + fill_image(gray, .5); + axpy_cpu(orig.w*orig.h*orig.c, -1, orig.data, 1, gray.data, 1); + axpy_cpu(orig.w*orig.h*orig.c, .1, gray.data, 1, out.data, 1); + + if(norm) normalize_array(out.data, out.w*out.h*out.c); + axpy_cpu(orig.w*orig.h*orig.c, rate, out.data, 1, orig.data, 1); + + /* + normalize_array(orig.data, orig.w*orig.h*orig.c); + scale_image(orig, sqrt(var)); + translate_image(orig, mean); + */ + + //translate_image(orig, 1); + //scale_image(orig, .5); + //normalize_image(orig); + + constrain_image(orig); + + free_image(crop); + free_image(im); + free_image(delta); + free_image(resized); + free_image(out); + +} + +void smooth(image recon, image update, float lambda, int num) +{ + int i, j, k; + int ii, jj; + for(k = 0; k < recon.c; ++k){ + for(j = 0; j < recon.h; ++j){ + for(i = 0; i < recon.w; ++i){ + int out_index = i + recon.w*(j + recon.h*k); + for(jj = j-num; jj <= j + num && jj < recon.h; ++jj){ + if (jj < 0) continue; + for(ii = i-num; ii <= i + num && ii < recon.w; ++ii){ + if (ii < 0) continue; + int in_index = ii + recon.w*(jj + recon.h*k); + update.data[out_index] += lambda * (recon.data[in_index] - recon.data[out_index]); + } + } + } + } + } +} + +void reconstruct_picture(network *net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters) +{ + int iter = 0; + for (iter = 0; iter < iters; ++iter) { + image delta = make_image(recon.w, recon.h, recon.c); + +#ifdef GPU + layer l = get_network_output_layer(net); + cuda_push_array(net->input_gpu, recon.data, recon.w*recon.h*recon.c); + //cuda_push_array(net->truth_gpu, features, net->truths); + net->delta_gpu = cuda_make_array(delta.data, delta.w*delta.h*delta.c); + + forward_network_gpu(net); + cuda_push_array(l.delta_gpu, features, l.outputs); + axpy_gpu(l.outputs, -1, l.output_gpu, 1, l.delta_gpu, 1); + backward_network_gpu(net); + + cuda_pull_array(net->delta_gpu, delta.data, delta.w*delta.h*delta.c); + + cuda_free(net->delta_gpu); +#else + net->input = recon.data; + net->delta = delta.data; + net->truth = features; + + forward_network(net); + backward_network(net); +#endif + + //normalize_array(delta.data, delta.w*delta.h*delta.c); + axpy_cpu(recon.w*recon.h*recon.c, 1, delta.data, 1, update.data, 1); + //smooth(recon, update, lambda, smooth_size); + + axpy_cpu(recon.w*recon.h*recon.c, rate, update.data, 1, recon.data, 1); + scal_cpu(recon.w*recon.h*recon.c, momentum, update.data, 1); + + float mag = mag_array(delta.data, recon.w*recon.h*recon.c); + printf("mag: %f\n", mag); + //scal_cpu(recon.w*recon.h*recon.c, 600/mag, recon.data, 1); + + constrain_image(recon); + free_image(delta); + } +} + +/* +void run_lsd(int argc, char **argv) +{ + srand(0); + if(argc < 3){ + fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [options! (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[2]; + char *weights = argv[3]; + char *input = argv[4]; + + int norm = find_int_arg(argc, argv, "-norm", 1); + int rounds = find_int_arg(argc, argv, "-rounds", 1); + int iters = find_int_arg(argc, argv, "-iters", 10); + float rate = find_float_arg(argc, argv, "-rate", .04); + float momentum = find_float_arg(argc, argv, "-momentum", .9); + float lambda = find_float_arg(argc, argv, "-lambda", .01); + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + int reconstruct = find_arg(argc, argv, "-reconstruct"); + int smooth_size = find_int_arg(argc, argv, "-smooth", 1); + + network net = parse_network_cfg(cfg); + load_weights(&net, weights); + char *cfgbase = basecfg(cfg); + char *imbase = basecfg(input); + + set_batch_network(&net, 1); + image im = load_image_color(input, 0, 0); + + float *features = 0; + image update; + if (reconstruct){ + im = letterbox_image(im, net->w, net->h); + + int zz = 0; + network_predict(net, im.data); + image out_im = get_network_image(net); + image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz); + //flip_image(crop); + image f_im = resize_image(crop, out_im.w, out_im.h); + free_image(crop); + printf("%d features\n", out_im.w*out_im.h*out_im.c); + + + im = resize_image(im, im.w, im.h); + f_im = resize_image(f_im, f_im.w, f_im.h); + features = f_im.data; + + int i; + for(i = 0; i < 14*14*512; ++i){ + features[i] += rand_uniform(-.19, .19); + } + + free_image(im); + im = make_random_image(im.w, im.h, im.c); + update = make_image(im.w, im.h, im.c); + + } + + int e; + int n; + for(e = 0; e < rounds; ++e){ + fprintf(stderr, "Iteration: "); + fflush(stderr); + for(n = 0; n < iters; ++n){ + fprintf(stderr, "%d, ", n); + fflush(stderr); + if(reconstruct){ + reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1); + //if ((n+1)%30 == 0) rate *= .5; + show_image(im, "reconstruction"); +#ifdef OPENCV + cvWaitKey(10); +#endif + }else{ + int layer = max_layer + rand()%range - range/2; + int octave = rand()%octaves; + optimize_picture(&net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); + } + } + fprintf(stderr, "done\n"); + char buff[256]; + if (prefix){ + sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e); + }else{ + sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e); + } + printf("%d %s\n", e, buff); + save_image(im, buff); + //show_image(im, buff); + //cvWaitKey(0); + + if(rotate){ + image rot = rotate_image(im, rotate); + free_image(im); + im = rot; + } + image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom); + image resized = resize_image(crop, im.w, im.h); + free_image(im); + free_image(crop); + im = resized; + } +} +*/ + +void run_nightmare(int argc, char **argv) +{ + srand(0); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [layer] [options! (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[2]; + char *weights = argv[3]; + char *input = argv[4]; + int max_layer = atoi(argv[5]); + + int range = find_int_arg(argc, argv, "-range", 1); + int norm = find_int_arg(argc, argv, "-norm", 1); + int rounds = find_int_arg(argc, argv, "-rounds", 1); + int iters = find_int_arg(argc, argv, "-iters", 10); + int octaves = find_int_arg(argc, argv, "-octaves", 4); + float zoom = find_float_arg(argc, argv, "-zoom", 1.); + float rate = find_float_arg(argc, argv, "-rate", .04); + float thresh = find_float_arg(argc, argv, "-thresh", 1.); + float rotate = find_float_arg(argc, argv, "-rotate", 0); + float momentum = find_float_arg(argc, argv, "-momentum", .9); + float lambda = find_float_arg(argc, argv, "-lambda", .01); + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + int reconstruct = find_arg(argc, argv, "-reconstruct"); + int smooth_size = find_int_arg(argc, argv, "-smooth", 1); + + network *net = load_network(cfg, weights, 0); + char *cfgbase = basecfg(cfg); + char *imbase = basecfg(input); + + set_batch_network(net, 1); + image im = load_image_color(input, 0, 0); + if(0){ + float scale = 1; + if(im.w > 512 || im.h > 512){ + if(im.w > im.h) scale = 512.0/im.w; + else scale = 512.0/im.h; + } + image resized = resize_image(im, scale*im.w, scale*im.h); + free_image(im); + im = resized; + } + //im = letterbox_image(im, net->w, net->h); + + float *features = 0; + image update; + if (reconstruct){ + net->n = max_layer; + im = letterbox_image(im, net->w, net->h); + //resize_network(&net, im.w, im.h); + + network_predict(net, im.data); + if(net->layers[net->n-1].type == REGION){ + printf("region!\n"); + zero_objectness(net->layers[net->n-1]); + } + image out_im = copy_image(get_network_image(net)); + /* + image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz); + //flip_image(crop); + image f_im = resize_image(crop, out_im.w, out_im.h); + free_image(crop); + */ + printf("%d features\n", out_im.w*out_im.h*out_im.c); + + features = out_im.data; + + /* + int i; + for(i = 0; i < 14*14*512; ++i){ + //features[i] += rand_uniform(-.19, .19); + } + free_image(im); + im = make_random_image(im.w, im.h, im.c); + */ + update = make_image(im.w, im.h, im.c); + } + + int e; + int n; + for(e = 0; e < rounds; ++e){ + fprintf(stderr, "Iteration: "); + fflush(stderr); + for(n = 0; n < iters; ++n){ + fprintf(stderr, "%d, ", n); + fflush(stderr); + if(reconstruct){ + reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1); + //if ((n+1)%30 == 0) rate *= .5; + show_image(im, "reconstruction", 10); + }else{ + int layer = max_layer + rand()%range - range/2; + int octave = rand()%octaves; + optimize_picture(net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); + } + } + fprintf(stderr, "done\n"); + if(0){ + image g = grayscale_image(im); + free_image(im); + im = g; + } + char buff[256]; + if (prefix){ + sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e); + }else{ + sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e); + } + printf("%d %s\n", e, buff); + save_image(im, buff); + //show_image(im, buff, 0); + + if(rotate){ + image rot = rotate_image(im, rotate); + free_image(im); + im = rot; + } + image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom); + image resized = resize_image(crop, im.w, im.h); + free_image(im); + free_image(crop); + im = resized; + } +} + diff --git a/workloads/realworld/async/darknet/examples/regressor.c b/workloads/realworld/async/darknet/examples/regressor.c new file mode 100644 index 0000000000000000000000000000000000000000..20cec0fad9f0a2ccb2c46a30d0a01793119b43ce --- /dev/null +++ b/workloads/realworld/async/darknet/examples/regressor.c @@ -0,0 +1,240 @@ +#include "darknet.h" +#include +#include + +void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + int classes = option_find_int(options, "classes", 1); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + clock_t time; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.classes = classes; + + args.min = net->min_ratio*net->w; + args.max = net->max_ratio*net->w; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = REGRESSION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_regressor(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + free_image(im); + free_image(sized); + if (filename) break; + } +} + + +void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Regressor Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + list *options = read_data_cfg(datacfg); + int classes = option_find_int(options, "classes", 1); + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + void * cap = open_video_stream(filename, cam_index, 0,0,0); + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image crop = center_crop_image(in, net->w, net->h); + grayscale_image_3c(crop); + + float *predictions = network_predict(net, crop.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + int i; + for(i = 0; i < classes; ++i){ + printf("%s: %f\n", names[i], predictions[i]); + } + + show_image(crop, "Regressor", 10); + free_image(in); + free_image(crop); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_regressor(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_regressor(data, cfg, weights); + else if(0==strcmp(argv[2], "train")) train_regressor(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "demo")) demo_regressor(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/async/darknet/examples/rnn.c b/workloads/realworld/async/darknet/examples/rnn.c new file mode 100644 index 0000000000000000000000000000000000000000..5d49eaae7070eb1dc9a87b5627b7ec6f7cb09e46 --- /dev/null +++ b/workloads/realworld/async/darknet/examples/rnn.c @@ -0,0 +1,542 @@ +#include "darknet.h" + +#include + +typedef struct { + float *x; + float *y; +} float_pair; + +unsigned char **load_files(char *filename, int *n) +{ + list *paths = get_paths(filename); + *n = paths->size; + unsigned char **contents = calloc(*n, sizeof(char *)); + int i; + node *x = paths->front; + for(i = 0; i < *n; ++i){ + contents[i] = read_file((char *)x->val); + x = x->next; + } + return contents; +} + +int *read_tokenized_data(char *filename, size_t *read) +{ + size_t size = 512; + size_t count = 0; + FILE *fp = fopen(filename, "r"); + int *d = calloc(size, sizeof(int)); + int n, one; + one = fscanf(fp, "%d", &n); + while(one == 1){ + ++count; + if(count > size){ + size = size*2; + d = realloc(d, size*sizeof(int)); + } + d[count-1] = n; + one = fscanf(fp, "%d", &n); + } + fclose(fp); + d = realloc(d, count*sizeof(int)); + *read = count; + return d; +} + +char **read_tokens(char *filename, size_t *read) +{ + size_t size = 512; + size_t count = 0; + FILE *fp = fopen(filename, "r"); + char **d = calloc(size, sizeof(char *)); + char *line; + while((line=fgetl(fp)) != 0){ + ++count; + if(count > size){ + size = size*2; + d = realloc(d, size*sizeof(char *)); + } + if(0==strcmp(line, "")) line = "\n"; + d[count-1] = line; + } + fclose(fp); + d = realloc(d, count*sizeof(char *)); + *read = count; + return d; +} + + +float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size_t len, int batch, int steps) +{ + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + int i,j; + for(i = 0; i < batch; ++i){ + for(j = 0; j < steps; ++j){ + int curr = tokens[(offsets[i])%len]; + int next = tokens[(offsets[i] + 1)%len]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + offsets[i] = (offsets[i] + 1) % len; + + if(curr >= characters || curr < 0 || next >= characters || next < 0){ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +float_pair get_seq2seq_data(char **source, char **dest, int n, int characters, size_t len, int batch, int steps) +{ + int i,j; + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + for(i = 0; i < batch; ++i){ + int index = rand()%n; + //int slen = strlen(source[index]); + //int dlen = strlen(dest[index]); + for(j = 0; j < steps; ++j){ + unsigned char curr = source[index][j]; + unsigned char next = dest[index][j]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + if(curr > 255 || curr <= 0 || next > 255 || next <= 0){ + /*text[(index+j+2)%len] = 0; + printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]); + printf("%s", text+index); + */ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +float_pair get_rnn_data(unsigned char *text, size_t *offsets, int characters, size_t len, int batch, int steps) +{ + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + int i,j; + for(i = 0; i < batch; ++i){ + for(j = 0; j < steps; ++j){ + unsigned char curr = text[(offsets[i])%len]; + unsigned char next = text[(offsets[i] + 1)%len]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + offsets[i] = (offsets[i] + 1) % len; + + if(curr > 255 || curr <= 0 || next > 255 || next <= 0){ + /*text[(index+j+2)%len] = 0; + printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]); + printf("%s", text+index); + */ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear, int tokenized) +{ + srand(time(0)); + unsigned char *text = 0; + int *tokens = 0; + size_t size; + if(tokenized){ + tokens = read_tokenized_data(filename, &size); + } else { + text = read_file(filename); + size = strlen((const char*)text); + } + + char *backup_directory = "/home/pjreddie/backup/"; + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, clear); + + int inputs = net->inputs; + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g, Inputs: %d %d %d\n", net->learning_rate, net->momentum, net->decay, inputs, net->batch, net->time_steps); + int batch = net->batch; + int steps = net->time_steps; + if(clear) *net->seen = 0; + int i = (*net->seen)/net->batch; + + int streams = batch/steps; + size_t *offsets = calloc(streams, sizeof(size_t)); + int j; + for(j = 0; j < streams; ++j){ + offsets[j] = rand_size_t()%size; + } + + clock_t time; + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + float_pair p; + if(tokenized){ + p = get_rnn_token_data(tokens, offsets, inputs, size, streams, steps); + }else{ + p = get_rnn_data(text, offsets, inputs, size, streams, steps); + } + + copy_cpu(net->inputs*net->batch, p.x, 1, net->input, 1); + copy_cpu(net->truths*net->batch, p.y, 1, net->truth, 1); + float loss = train_network_datum(net) / (batch); + free(p.x); + free(p.y); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + size_t chars = get_current_batch(net)*batch; + fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds, %f epochs\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), (float) chars/size); + + for(j = 0; j < streams; ++j){ + //printf("%d\n", j); + if(rand()%64 == 0){ + //fprintf(stderr, "Reset\n"); + offsets[j] = rand_size_t()%size; + reset_network_state(net, j); + } + } + + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void print_symbol(int n, char **tokens){ + if(tokens){ + printf("%s ", tokens[n]); + } else { + printf("%c", n); + } +} + +void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + + /* + fill_cpu(inputs, 0, input, 1); + for(i = 0; i < 10; ++i){ + network_predict(net, input); + } + fill_cpu(inputs, 0, input, 1); + */ + + for(i = 0; i < len-1; ++i){ + c = seed[i]; + input[c] = 1; + network_predict(net, input); + input[c] = 0; + print_symbol(c, tokens); + } + if(len) c = seed[len-1]; + print_symbol(c, tokens); + for(i = 0; i < num; ++i){ + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + for(j = 32; j < 127; ++j){ + //printf("%d %c %f\n",j, j, out[j]); + } + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + c = sample_array(out, inputs); + print_symbol(c, tokens); + } + printf("\n"); +} + +void test_tactic_rnn_multi(char *cfgfile, char *weightfile, int num, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + float *input = calloc(inputs, sizeof(float)); + float *out = 0; + + while(1){ + reset_network_state(net, 0); + while((c = getc(stdin)) != EOF && c != 0){ + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + for(i = 0; i < num; ++i){ + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + int next = sample_array(out, inputs); + if(c == '.' && next == '\n') break; + c = next; + print_symbol(c, tokens); + + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + printf("\n"); + } +} + +void test_tactic_rnn(char *cfgfile, char *weightfile, int num, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + float *input = calloc(inputs, sizeof(float)); + float *out = 0; + + while((c = getc(stdin)) != EOF){ + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + for(i = 0; i < num; ++i){ + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + int next = sample_array(out, inputs); + if(c == '.' && next == '\n') break; + c = next; + print_symbol(c, tokens); + + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + printf("\n"); +} + +void valid_tactic_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int count = 0; + int words = 1; + int c; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + for(i = 0; i < len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + float sum = 0; + c = getc(stdin); + float log2 = log(2); + int in = 0; + while(c != EOF){ + int next = getc(stdin); + if(next == EOF) break; + if(next < 0 || next >= 255) error("Out of range character"); + + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + + if(c == '.' && next == '\n') in = 0; + if(!in) { + if(c == '>' && next == '>'){ + in = 1; + ++words; + } + c = next; + continue; + } + ++count; + sum += log(out[next])/log2; + c = next; + printf("%d %d Perplexity: %4.4f Word Perplexity: %4.4f\n", count, words, pow(2, -sum/count), pow(2, -sum/words)); + } +} + +void valid_char_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int count = 0; + int words = 1; + int c; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + for(i = 0; i < len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + float sum = 0; + c = getc(stdin); + float log2 = log(2); + while(c != EOF){ + int next = getc(stdin); + if(next == EOF) break; + if(next < 0 || next >= 255) error("Out of range character"); + ++count; + if(next == ' ' || next == '\n' || next == '\t') ++words; + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + sum += log(out[next])/log2; + c = next; + printf("%d BPC: %4.4f Perplexity: %4.4f Word Perplexity: %4.4f\n", count, -sum/count, pow(2, -sum/count), pow(2, -sum/words)); + } +} + +void vec_char_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int c; + int seed_len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + char *line; + while((line=fgetl(stdin)) != 0){ + reset_network_state(net, 0); + for(i = 0; i < seed_len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + strip(line); + int str_len = strlen(line); + for(i = 0; i < str_len; ++i){ + c = line[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + c = ' '; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + + layer l = net->layers[0]; + #ifdef GPU + cuda_pull_array(l.output_gpu, l.output, l.outputs); + #endif + printf("%s", line); + for(i = 0; i < l.outputs; ++i){ + printf(",%g", l.output[i]); + } + printf("\n"); + } +} + +void run_char_rnn(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + char *filename = find_char_arg(argc, argv, "-file", "data/shakespeare.txt"); + char *seed = find_char_arg(argc, argv, "-seed", "\n\n"); + int len = find_int_arg(argc, argv, "-len", 1000); + float temp = find_float_arg(argc, argv, "-temp", .7); + int rseed = find_int_arg(argc, argv, "-srand", time(0)); + int clear = find_arg(argc, argv, "-clear"); + int tokenized = find_arg(argc, argv, "-tokenized"); + char *tokens = find_char_arg(argc, argv, "-tokens", 0); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_char_rnn(cfg, weights, filename, clear, tokenized); + else if(0==strcmp(argv[2], "valid")) valid_char_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "validtactic")) valid_tactic_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "vec")) vec_char_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "generate")) test_char_rnn(cfg, weights, len, seed, temp, rseed, tokens); + else if(0==strcmp(argv[2], "generatetactic")) test_tactic_rnn(cfg, weights, len, temp, rseed, tokens); +} diff --git a/workloads/realworld/async/darknet/examples/rnn_vid.c b/workloads/realworld/async/darknet/examples/rnn_vid.c new file mode 100644 index 0000000000000000000000000000000000000000..e88792352311438d0fcb25bb7befd0677f70bae5 --- /dev/null +++ b/workloads/realworld/async/darknet/examples/rnn_vid.c @@ -0,0 +1,208 @@ +#include "darknet.h" + +#ifdef OPENCV +image get_image_from_stream(CvCapture *cap); +image ipl_to_image(IplImage* src); + +void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters); + + +typedef struct { + float *x; + float *y; +} float_pair; + +float_pair get_rnn_vid_data(network net, char **files, int n, int batch, int steps) +{ + int b; + assert(net.batch == steps + 1); + image out_im = get_network_image(net); + int output_size = out_im.w*out_im.h*out_im.c; + printf("%d %d %d\n", out_im.w, out_im.h, out_im.c); + float *feats = calloc(net.batch*batch*output_size, sizeof(float)); + for(b = 0; b < batch; ++b){ + int input_size = net.w*net.h*net.c; + float *input = calloc(input_size*net.batch, sizeof(float)); + char *filename = files[rand()%n]; + CvCapture *cap = cvCaptureFromFile(filename); + int frames = cvGetCaptureProperty(cap, CV_CAP_PROP_FRAME_COUNT); + int index = rand() % (frames - steps - 2); + if (frames < (steps + 4)){ + --b; + free(input); + continue; + } + + printf("frames: %d, index: %d\n", frames, index); + cvSetCaptureProperty(cap, CV_CAP_PROP_POS_FRAMES, index); + + int i; + for(i = 0; i < net.batch; ++i){ + IplImage* src = cvQueryFrame(cap); + image im = ipl_to_image(src); + rgbgr_image(im); + image re = resize_image(im, net.w, net.h); + //show_image(re, "loaded"); + //cvWaitKey(10); + memcpy(input + i*input_size, re.data, input_size*sizeof(float)); + free_image(im); + free_image(re); + } + float *output = network_predict(net, input); + + free(input); + + for(i = 0; i < net.batch; ++i){ + memcpy(feats + (b + i*batch)*output_size, output + i*output_size, output_size*sizeof(float)); + } + + cvReleaseCapture(&cap); + } + + //printf("%d %d %d\n", out_im.w, out_im.h, out_im.c); + float_pair p = {0}; + p.x = feats; + p.y = feats + output_size*batch; //+ out_im.w*out_im.h*out_im.c; + + return p; +} + + +void train_vid_rnn(char *cfgfile, char *weightfile) +{ + char *train_videos = "data/vid/train.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + + list *plist = get_paths(train_videos); + int N = plist->size; + char **paths = (char **)list_to_array(plist); + clock_t time; + int steps = net.time_steps; + int batch = net.batch / net.time_steps; + + network extractor = parse_network_cfg("cfg/extractor.cfg"); + load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv"); + + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + float_pair p = get_rnn_vid_data(extractor, paths, N, batch, steps); + + copy_cpu(net.inputs*net.batch, p.x, 1, net.input, 1); + copy_cpu(net.truths*net.batch, p.y, 1, net.truth, 1); + float loss = train_network_datum(net) / (net.batch); + + + free(p.x); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time)); + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%10==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + + +image save_reconstruction(network net, image *init, float *feat, char *name, int i) +{ + image recon; + if (init) { + recon = copy_image(*init); + } else { + recon = make_random_image(net.w, net.h, 3); + } + + image update = make_image(net.w, net.h, 3); + reconstruct_picture(net, feat, recon, update, .01, .9, .1, 2, 50); + char buff[256]; + sprintf(buff, "%s%d", name, i); + save_image(recon, buff); + free_image(update); + return recon; +} + +void generate_vid_rnn(char *cfgfile, char *weightfile) +{ + network extractor = parse_network_cfg("cfg/extractor.recon.cfg"); + load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv"); + + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&extractor, 1); + set_batch_network(&net, 1); + + int i; + CvCapture *cap = cvCaptureFromFile("/extra/vid/ILSVRC2015/Data/VID/snippets/val/ILSVRC2015_val_00007030.mp4"); + float *feat; + float *next; + image last; + for(i = 0; i < 25; ++i){ + image im = get_image_from_stream(cap); + image re = resize_image(im, extractor.w, extractor.h); + feat = network_predict(extractor, re.data); + if(i > 0){ + printf("%f %f\n", mean_array(feat, 14*14*512), variance_array(feat, 14*14*512)); + printf("%f %f\n", mean_array(next, 14*14*512), variance_array(next, 14*14*512)); + printf("%f\n", mse_array(feat, 14*14*512)); + axpy_cpu(14*14*512, -1, feat, 1, next, 1); + printf("%f\n", mse_array(next, 14*14*512)); + } + next = network_predict(net, feat); + + free_image(im); + + free_image(save_reconstruction(extractor, 0, feat, "feat", i)); + free_image(save_reconstruction(extractor, 0, next, "next", i)); + if (i==24) last = copy_image(re); + free_image(re); + } + for(i = 0; i < 30; ++i){ + next = network_predict(net, next); + image new = save_reconstruction(extractor, &last, next, "new", i); + free_image(last); + last = new; + } +} + +void run_vid_rnn(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + //char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_vid_rnn(cfg, weights); + else if(0==strcmp(argv[2], "generate")) generate_vid_rnn(cfg, weights); +} +#else +void run_vid_rnn(int argc, char **argv){} +#endif + diff --git a/workloads/realworld/async/darknet/examples/segmenter.c b/workloads/realworld/async/darknet/examples/segmenter.c new file mode 100644 index 0000000000000000000000000000000000000000..2e7cea0b730754b74a125bcd865aa12f0bdd3be0 --- /dev/null +++ b/workloads/realworld/async/darknet/examples/segmenter.c @@ -0,0 +1,255 @@ +#include "darknet.h" +#include +#include + +void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + image pred = get_network_image(net); + + int div = net->w/pred.w; + assert(pred.w * div == net->w); + assert(pred.h * div == net->h); + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.scale = div; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.classes = 80; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = SEGMENTATION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(display){ + image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]); + image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]); + image mask = mask_to_rgb(tr); + image prmask = mask_to_rgb(pred); + show_image(im, "input", 1); + show_image(prmask, "pred", 1); + show_image(mask, "truth", 100); + free_image(mask); + free_image(prmask); + } + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_segmenter(char *datafile, char *cfg, char *weights, char *filename) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + show_image(sized, "orig", 1); + show_image(prmask, "pred", 0); + free_image(im); + free_image(sized); + free_image(prmask); + if (filename) break; + } +} + + +void demo_segmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Classifier Demo\n"); + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = letterbox_image(in, net->w, net->h); + + network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + show_image(prmask, "Segmenter", 10); + + free_image(in_s); + free_image(in); + free_image(prmask); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_segmenter(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_segmenter(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_segmenter(data, cfg, weights, gpus, ngpus, clear, display); + else if(0==strcmp(argv[2], "demo")) demo_segmenter(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/async/darknet/examples/super.c b/workloads/realworld/async/darknet/examples/super.c new file mode 100644 index 0000000000000000000000000000000000000000..d34406b1f2ce70cd36eecb8298bf1ca3e736f01b --- /dev/null +++ b/workloads/realworld/async/darknet/examples/super.c @@ -0,0 +1,120 @@ +#include "darknet.h" + +void train_super(char *cfgfile, char *weightfile, int clear) +{ + char *train_images = "/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, clear); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.scale = 4; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = SUPER_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void test_super(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + resize_network(net, im.w, im.h); + printf("%d %d\n", im.w, im.h); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image(net); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 0); + + free_image(im); + if (filename) break; + } +} + + +void run_super(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + int clear = find_arg(argc, argv, "-clear"); + if(0==strcmp(argv[2], "train")) train_super(cfg, weights, clear); + else if(0==strcmp(argv[2], "test")) test_super(cfg, weights, filename); + /* + else if(0==strcmp(argv[2], "valid")) validate_super(cfg, weights); + */ +} diff --git a/workloads/realworld/async/darknet/examples/swag.c b/workloads/realworld/async/darknet/examples/swag.c new file mode 100644 index 0000000000000000000000000000000000000000..c22d7855c46a975ecd1e94a60f9b7059bc288fee --- /dev/null +++ b/workloads/realworld/async/darknet/examples/swag.c @@ -0,0 +1,83 @@ +#include "darknet.h" +#include + +void train_swag(char *cfgfile, char *weightfile) +{ + char *train_images = "data/voc.0712.trainval"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + data train, buffer; + + layer l = net.layers[net.n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || i == 600){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void run_swag(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_swag(cfg, weights); +} diff --git a/workloads/realworld/async/darknet/examples/tag.c b/workloads/realworld/async/darknet/examples/tag.c new file mode 100644 index 0000000000000000000000000000000000000000..4caf8cba18f39f62deb54ea913fd40c194b3e33c --- /dev/null +++ b/workloads/realworld/async/darknet/examples/tag.c @@ -0,0 +1,140 @@ +#include "darknet.h" + +void train_tag(char *cfgfile, char *weightfile, int clear) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, clear); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + list *plist = get_paths("/home/pjreddie/tag/train.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + + args.min = net->w; + args.max = net->max_crop; + args.size = net->w; + + args.paths = paths; + args.classes = net->outputs; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = TAG_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + fprintf(stderr, "%d classes\n", net->outputs); + + load_thread = load_data_in_thread(args); + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + pthread_join(load_thread, 0); + free_data(buffer); + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void test_tag(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + int i = 0; + char **names = get_labels("data/tags.txt"); + clock_t time; + int indexes[10]; + char buff[256]; + char *input = buff; + int size = net->w; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = resize_min(im, size); + resize_network(net, r.w, r.h); + printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + float *predictions = network_predict(net, X); + top_predictions(net, 10, indexes); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < 10; ++i){ + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void run_tag(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int clear = find_arg(argc, argv, "-clear"); + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_tag(cfg, weights, clear); + else if(0==strcmp(argv[2], "test")) test_tag(cfg, weights, filename); +} + diff --git a/workloads/realworld/async/darknet/examples/voxel.c b/workloads/realworld/async/darknet/examples/voxel.c new file mode 100644 index 0000000000000000000000000000000000000000..01ea9bb98987590227758364bbfff50996cf9a2d --- /dev/null +++ b/workloads/realworld/async/darknet/examples/voxel.c @@ -0,0 +1,161 @@ +#include "darknet.h" + +void extract_voxel(char *lfile, char *rfile, char *prefix) +{ +#ifdef OPENCV + int w = 1920; + int h = 1080; + int shift = 0; + int count = 0; + CvCapture *lcap = cvCaptureFromFile(lfile); + CvCapture *rcap = cvCaptureFromFile(rfile); + while(1){ + image l = get_image_from_stream(lcap); + image r = get_image_from_stream(rcap); + if(!l.w || !r.w) break; + if(count%100 == 0) { + shift = best_3d_shift_r(l, r, -l.h/100, l.h/100); + printf("%d\n", shift); + } + image ls = crop_image(l, (l.w - w)/2, (l.h - h)/2, w, h); + image rs = crop_image(r, 105 + (r.w - w)/2, (r.h - h)/2 + shift, w, h); + char buff[256]; + sprintf(buff, "%s_%05d_l", prefix, count); + save_image(ls, buff); + sprintf(buff, "%s_%05d_r", prefix, count); + save_image(rs, buff); + free_image(l); + free_image(r); + free_image(ls); + free_image(rs); + ++count; + } + +#else + printf("need OpenCV for extraction\n"); +#endif +} + +void train_voxel(char *cfgfile, char *weightfile) +{ + char *train_images = "/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.scale = 4; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = SUPER_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void test_voxel(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + resize_network(&net, im.w, im.h); + printf("%d %d\n", im.w, im.h); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image(net); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + + free_image(im); + if (filename) break; + } +} + + +void run_voxel(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_voxel(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_voxel(cfg, weights, filename); + else if(0==strcmp(argv[2], "extract")) extract_voxel(argv[3], argv[4], argv[5]); + /* + else if(0==strcmp(argv[2], "valid")) validate_voxel(cfg, weights); + */ +} diff --git a/workloads/realworld/async/darknet/examples/writing.c b/workloads/realworld/async/darknet/examples/writing.c new file mode 100644 index 0000000000000000000000000000000000000000..1b6ff83b5838b654e0fd1b6664156daf6d7a889b --- /dev/null +++ b/workloads/realworld/async/darknet/examples/writing.c @@ -0,0 +1,144 @@ +#include "darknet.h" + +void train_writing(char *cfgfile, char *weightfile) +{ + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + list *plist = get_paths("figures.list"); + char **paths = (char **)list_to_array(plist); + clock_t time; + int N = plist->size; + printf("N: %d\n", N); + image out = get_network_image(net); + + data train, buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.out_w = out.w; + args.out_h = out.h; + args.paths = paths; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = WRITING_DATA; + + pthread_t load_thread = load_data_in_thread(args); + int epoch = (*net.seen)/N; + while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + printf("Loaded %lf seconds\n",sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + + /* + image pred = float_to_image(64, 64, 1, out); + print_image(pred); + */ + + /* + image im = float_to_image(256, 256, 3, train.X.vals[0]); + image lab = float_to_image(64, 64, 1, train.y.vals[0]); + image pred = float_to_image(64, 64, 1, out); + show_image(im, "image"); + show_image(lab, "label"); + print_image(lab); + show_image(pred, "pred"); + cvWaitKey(0); + */ + + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + free_data(train); + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_batch_%ld.weights", backup_directory, base, get_current_batch(net)); + save_weights(net, buff); + } + if(*net.seen/N > epoch){ + epoch = *net.seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + } +} + +void test_writing(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + + image im = load_image_color(input, 0, 0); + resize_network(&net, im.w, im.h); + printf("%d %d %d\n", im.h, im.w, im.c); + float *X = im.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + image pred = get_network_image(net); + + image upsampled = resize_image(pred, im.w, im.h); + image thresh = threshold_image(upsampled, .5); + pred = thresh; + + show_image(pred, "prediction"); + show_image(im, "orig"); +#ifdef OPENCV + cvWaitKey(0); + cvDestroyAllWindows(); +#endif + + free_image(upsampled); + free_image(thresh); + free_image(im); + if (filename) break; + } +} + +void run_writing(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_writing(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_writing(cfg, weights, filename); +} + diff --git a/workloads/realworld/async/darknet/examples/yolo.c b/workloads/realworld/async/darknet/examples/yolo.c new file mode 100644 index 0000000000000000000000000000000000000000..4ddb69a3e53b2123ccb89026645a66c044047faa --- /dev/null +++ b/workloads/realworld/async/darknet/examples/yolo.c @@ -0,0 +1,327 @@ +#include "darknet.h" + +char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; + +void train_yolo(char *cfgfile, char *weightfile) +{ + char *train_images = "/data/voc/train.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + layer l = net->layers[net->n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || (i < 1000 && i%100 == 0)){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void print_yolo_detections(FILE **fps, char *id, int total, int classes, int w, int h, detection *dets) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], + xmin, ymin, xmax, ymax); + } + } +} + +void validate_yolo(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + //list *plist = get_paths("data/voc.2007.test"); + list *plist = get_paths("/home/pjreddie/data/voc/2007_test.txt"); + //list *plist = get_paths("data/voc.2012.test"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + int j; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + int t; + + float thresh = .001; + int nms = 1; + float iou_thresh = .5; + + int nthreads = 8; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.type = IMAGE_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + time_t start = time(0); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh); + print_yolo_detections(fps, id, l.side*l.side*l.n, classes, w, h, dets); + free_detections(dets, nboxes); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); +} + +void validate_yolo_recall(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + list *plist = get_paths("data/voc.2007.test"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + int side = l.side; + + int j, k; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + + float thresh = .001; + float iou_thresh = .5; + float nms = 0; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + + int nboxes = 0; + detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, side*side*l.n, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < side*side*l.n; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < side*side*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free_detections(dets, nboxes); + free(id); + free_image(orig); + free_image(sized); + } +} + +void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) +{ + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + layer l = net->layers[net->n-1]; + set_batch_network(net, 1); + srand(2222222); + clock_t time; + char buff[256]; + char *input = buff; + float nms=.4; + while(1){ + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = resize_image(im, net->w, net->h); + float *X = sized.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + + int nboxes = 0; + detection *dets = get_network_boxes(net, 1, 1, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms); + + draw_detections(im, dets, l.side*l.side*l.n, thresh, voc_names, alphabet, 20); + save_image(im, "predictions"); + show_image(im, "predictions", 0); + free_detections(dets, nboxes); + free_image(im); + free_image(sized); + if (filename) break; + } +} + +void run_yolo(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .2); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int avg = find_int_arg(argc, argv, "-avg", 1); + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "test")) test_yolo(cfg, weights, filename, thresh); + else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights); + else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix, avg, .5, 0,0,0,0); +} diff --git a/workloads/realworld/async/darknet/include/darknet.h b/workloads/realworld/async/darknet/include/darknet.h new file mode 100644 index 0000000000000000000000000000000000000000..7be8225e2d39f079ca0a15da6980b42f8966af40 --- /dev/null +++ b/workloads/realworld/async/darknet/include/darknet.h @@ -0,0 +1,810 @@ +#ifndef DARKNET_API +#define DARKNET_API +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef GPU + #define GPU_DEVICE 7 + #define BLOCK 512 + + #include "cuda_runtime.h" + #include "curand.h" + #include "cublas_v2.h" + + #ifdef CUDNN + #include "cudnn.h" + #endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define SECRET_NUM -1234 +extern int gpu_index; + +typedef struct{ + int classes; + char **names; +} metadata; + +metadata get_metadata(char *file); + +typedef struct{ + int *leaf; + int n; + int *parent; + int *child; + int *group; + char **name; + + int groups; + int *group_size; + int *group_offset; +} tree; +tree *read_tree(char *filename); + +typedef enum{ + LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU +} ACTIVATION; + +typedef enum{ + PNG, BMP, TGA, JPG +} IMTYPE; + +typedef enum{ + MULT, ADD, SUB, DIV +} BINARY_ACTIVATION; + +typedef enum { + CONVOLUTIONAL, + DECONVOLUTIONAL, + CONNECTED, + MAXPOOL, + SOFTMAX, + DETECTION, + DROPOUT, + CROP, + ROUTE, + COST, + NORMALIZATION, + AVGPOOL, + LOCAL, + SHORTCUT, + ACTIVE, + RNN, + GRU, + LSTM, + CRNN, + BATCHNORM, + NETWORK, + XNOR, + REGION, + YOLO, + ISEG, + REORG, + UPSAMPLE, + LOGXENT, + L2NORM, + BLANK +} LAYER_TYPE; + +typedef enum{ + SSE, MASKED, L1, SEG, SMOOTH,WGAN +} COST_TYPE; + +typedef struct{ + int batch; + float learning_rate; + float momentum; + float decay; + int adam; + float B1; + float B2; + float eps; + int t; +} update_args; + +struct network; +typedef struct network network; + +struct layer; +typedef struct layer layer; + +struct layer{ + LAYER_TYPE type; + ACTIVATION activation; + COST_TYPE cost_type; + void (*forward) (struct layer, struct network); + void (*backward) (struct layer, struct network); + void (*update) (struct layer, update_args); + void (*forward_gpu) (struct layer, struct network); + void (*backward_gpu) (struct layer, struct network); + void (*update_gpu) (struct layer, update_args); + int batch_normalize; + int shortcut; + int batch; + int forced; + int flipped; + int inputs; + int outputs; + int nweights; + int nbiases; + int extra; + int truths; + int h,w,c; + int out_h, out_w, out_c; + int n; + int max_boxes; + int groups; + int size; + int side; + int stride; + int reverse; + int flatten; + int spatial; + int pad; + int sqrt; + int flip; + int index; + int binary; + int xnor; + int steps; + int hidden; + int truth; + float smooth; + float dot; + float angle; + float jitter; + float saturation; + float exposure; + float shift; + float ratio; + float learning_rate_scale; + float clip; + int noloss; + int softmax; + int classes; + int coords; + int background; + int rescore; + int objectness; + int joint; + int noadjust; + int reorg; + int log; + int tanh; + int *mask; + int total; + + float alpha; + float beta; + float kappa; + + float coord_scale; + float object_scale; + float noobject_scale; + float mask_scale; + float class_scale; + int bias_match; + int random; + float ignore_thresh; + float truth_thresh; + float thresh; + float focus; + int classfix; + int absolute; + + int onlyforward; + int stopbackward; + int dontload; + int dontsave; + int dontloadscales; + int numload; + + float temperature; + float probability; + float scale; + + char * cweights; + int * indexes; + int * input_layers; + int * input_sizes; + int * map; + int * counts; + float ** sums; + float * rand; + float * cost; + float * state; + float * prev_state; + float * forgot_state; + float * forgot_delta; + float * state_delta; + float * combine_cpu; + float * combine_delta_cpu; + + float * concat; + float * concat_delta; + + float * binary_weights; + + float * biases; + float * bias_updates; + + float * scales; + float * scale_updates; + + float * weights; + float * weight_updates; + + float * delta; + float * output; + float * loss; + float * squared; + float * norms; + + float * spatial_mean; + float * mean; + float * variance; + + float * mean_delta; + float * variance_delta; + + float * rolling_mean; + float * rolling_variance; + + float * x; + float * x_norm; + + float * m; + float * v; + + float * bias_m; + float * bias_v; + float * scale_m; + float * scale_v; + + + float *z_cpu; + float *r_cpu; + float *h_cpu; + float * prev_state_cpu; + + float *temp_cpu; + float *temp2_cpu; + float *temp3_cpu; + + float *dh_cpu; + float *hh_cpu; + float *prev_cell_cpu; + float *cell_cpu; + float *f_cpu; + float *i_cpu; + float *g_cpu; + float *o_cpu; + float *c_cpu; + float *dc_cpu; + + float * binary_input; + + struct layer *input_layer; + struct layer *self_layer; + struct layer *output_layer; + + struct layer *reset_layer; + struct layer *update_layer; + struct layer *state_layer; + + struct layer *input_gate_layer; + struct layer *state_gate_layer; + struct layer *input_save_layer; + struct layer *state_save_layer; + struct layer *input_state_layer; + struct layer *state_state_layer; + + struct layer *input_z_layer; + struct layer *state_z_layer; + + struct layer *input_r_layer; + struct layer *state_r_layer; + + struct layer *input_h_layer; + struct layer *state_h_layer; + + struct layer *wz; + struct layer *uz; + struct layer *wr; + struct layer *ur; + struct layer *wh; + struct layer *uh; + struct layer *uo; + struct layer *wo; + struct layer *uf; + struct layer *wf; + struct layer *ui; + struct layer *wi; + struct layer *ug; + struct layer *wg; + + tree *softmax_tree; + + size_t workspace_size; + +#ifdef GPU + int *indexes_gpu; + + float *z_gpu; + float *r_gpu; + float *h_gpu; + + float *temp_gpu; + float *temp2_gpu; + float *temp3_gpu; + + float *dh_gpu; + float *hh_gpu; + float *prev_cell_gpu; + float *cell_gpu; + float *f_gpu; + float *i_gpu; + float *g_gpu; + float *o_gpu; + float *c_gpu; + float *dc_gpu; + + float *m_gpu; + float *v_gpu; + float *bias_m_gpu; + float *scale_m_gpu; + float *bias_v_gpu; + float *scale_v_gpu; + + float * combine_gpu; + float * combine_delta_gpu; + + float * prev_state_gpu; + float * forgot_state_gpu; + float * forgot_delta_gpu; + float * state_gpu; + float * state_delta_gpu; + float * gate_gpu; + float * gate_delta_gpu; + float * save_gpu; + float * save_delta_gpu; + float * concat_gpu; + float * concat_delta_gpu; + + float * binary_input_gpu; + float * binary_weights_gpu; + + float * mean_gpu; + float * variance_gpu; + + float * rolling_mean_gpu; + float * rolling_variance_gpu; + + float * variance_delta_gpu; + float * mean_delta_gpu; + + float * x_gpu; + float * x_norm_gpu; + float * weights_gpu; + float * weight_updates_gpu; + float * weight_change_gpu; + + float * biases_gpu; + float * bias_updates_gpu; + float * bias_change_gpu; + + float * scales_gpu; + float * scale_updates_gpu; + float * scale_change_gpu; + + float * output_gpu; + float * loss_gpu; + float * delta_gpu; + float * rand_gpu; + float * squared_gpu; + float * norms_gpu; +#ifdef CUDNN + cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc; + cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc; + cudnnTensorDescriptor_t normTensorDesc; + cudnnFilterDescriptor_t weightDesc; + cudnnFilterDescriptor_t dweightDesc; + cudnnConvolutionDescriptor_t convDesc; + cudnnConvolutionFwdAlgo_t fw_algo; + cudnnConvolutionBwdDataAlgo_t bd_algo; + cudnnConvolutionBwdFilterAlgo_t bf_algo; +#endif +#endif +}; + +void free_layer(layer); + +typedef enum { + CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM +} learning_rate_policy; + +typedef struct network{ + int n; + int batch; + size_t *seen; + int *t; + float epoch; + int subdivisions; + layer *layers; + float *output; + learning_rate_policy policy; + + float learning_rate; + float momentum; + float decay; + float gamma; + float scale; + float power; + int time_steps; + int step; + int max_batches; + float *scales; + int *steps; + int num_steps; + int burn_in; + + int adam; + float B1; + float B2; + float eps; + + int inputs; + int outputs; + int truths; + int notruth; + int h, w, c; + int max_crop; + int min_crop; + float max_ratio; + float min_ratio; + int center; + float angle; + float aspect; + float exposure; + float saturation; + float hue; + int random; + + int gpu_index; + tree *hierarchy; + + float *input; + float *truth; + float *delta; + float *workspace; + int train; + int index; + float *cost; + float clip; + +#ifdef GPU + float *input_gpu; + float *truth_gpu; + float *delta_gpu; + float *output_gpu; +#endif + +} network; + +typedef struct { + int w; + int h; + float scale; + float rad; + float dx; + float dy; + float aspect; +} augment_args; + +typedef struct { + int w; + int h; + int c; + float *data; +} image; + +typedef struct{ + float x, y, w, h; +} box; + +typedef struct detection{ + box bbox; + int classes; + float *prob; + float *mask; + float objectness; + int sort_class; +} detection; + +typedef struct matrix{ + int rows, cols; + float **vals; +} matrix; + + +typedef struct{ + int w, h; + matrix X; + matrix y; + int shallow; + int *num_boxes; + box **boxes; +} data; + +typedef enum { + CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA, ISEG_DATA +} data_type; + +typedef struct load_args{ + int threads; + char **paths; + char *path; + int n; + int m; + char **labels; + int h; + int w; + int out_w; + int out_h; + int nh; + int nw; + int num_boxes; + int min, max, size; + int classes; + int background; + int scale; + int center; + int coords; + float jitter; + float angle; + float aspect; + float saturation; + float exposure; + float hue; + data *d; + image *im; + image *resized; + data_type type; + tree *hierarchy; +} load_args; + +typedef struct{ + int id; + float x,y,w,h; + float left, right, top, bottom; +} box_label; + + +network *load_network(char *cfg, char *weights, int clear); +load_args get_base_args(network *net); + +void free_data(data d); + +typedef struct node{ + void *val; + struct node *next; + struct node *prev; +} node; + +typedef struct list{ + int size; + node *front; + node *back; +} list; + +pthread_t load_data(load_args args); +list *read_data_cfg(char *filename); +list *read_cfg(char *filename); +unsigned char *read_file(char *filename); +data resize_data(data orig, int w, int h); +data *tile_data(data orig, int divs, int size); +data select_data(data *orig, int *inds); + +void forward_network(network *net); +void backward_network(network *net); +void update_network(network *net); + + +float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); +void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void copy_cpu(int N, float *X, int INCX, float *Y, int INCY); +void scal_cpu(int N, float ALPHA, float *X, int INCX); +void fill_cpu(int N, float ALPHA, float * X, int INCX); +void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); +void softmax(float *input, int n, float temp, int stride, float *output); + +int best_3d_shift_r(image a, image b, int min, int max); +#ifdef GPU +void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); +void fill_gpu(int N, float ALPHA, float * X, int INCX); +void scal_gpu(int N, float ALPHA, float * X, int INCX); +void copy_gpu(int N, float * X, int INCX, float * Y, int INCY); + +void cuda_set_device(int n); +void cuda_free(float *x_gpu); +float *cuda_make_array(float *x, size_t n); +void cuda_pull_array(float *x_gpu, float *x, size_t n); +float cuda_mag_array(float *x_gpu, size_t n); +void cuda_push_array(float *x_gpu, float *x, size_t n); + +void forward_network_gpu(network *net); +void backward_network_gpu(network *net); +void update_network_gpu(network *net); + +float train_networks(network **nets, int n, data d, int interval); +void sync_nets(network **nets, int n, int interval); +void harmless_update_network_gpu(network *net); +#endif +image get_label(image **characters, char *string, int size); +void draw_label(image a, int r, int c, image label, const float *rgb); +void save_image(image im, const char *name); +void save_image_options(image im, const char *name, IMTYPE f, int quality); +void get_next_batch(data d, int n, int offset, float *X, float *y); +void grayscale_image_3c(image im); +void normalize_image(image p); +void matrix_to_csv(matrix m); +float train_network_sgd(network *net, data d, int n); +void rgbgr_image(image im); +data copy_data(data d); +data concat_data(data d1, data d2); +data load_cifar10_data(char *filename); +float matrix_topk_accuracy(matrix truth, matrix guess, int k); +void matrix_add_matrix(matrix from, matrix to); +void scale_matrix(matrix m, float scale); +matrix csv_to_matrix(char *filename); +float *network_accuracies(network *net, data d, int n); +float train_network_datum(network *net); +image make_random_image(int w, int h, int c); + +void denormalize_connected_layer(layer l); +void denormalize_convolutional_layer(layer l); +void statistics_connected_layer(layer l); +void rescale_weights(layer l, float scale, float trans); +void rgbgr_weights(layer l); +image *get_weights(layer l); + +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, int avg, float hier_thresh, int w, int h, int fps, int fullscreen); +void get_detection_detections(layer l, int w, int h, float thresh, detection *dets); + +char *option_find_str(list *l, char *key, char *def); +int option_find_int(list *l, char *key, int def); +int option_find_int_quiet(list *l, char *key, int def); + +network *parse_network_cfg(char *filename); +void save_weights(network *net, char *filename); +void load_weights(network *net, char *filename); +void save_weights_upto(network *net, char *filename, int cutoff); +void load_weights_upto(network *net, char *filename, int start, int cutoff); + +void zero_objectness(layer l); +void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets); +int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets); +void free_network(network *net); +void set_batch_network(network *net, int b); +void set_temp_network(network *net, float t); +image load_image(char *filename, int w, int h, int c); +image load_image_color(char *filename, int w, int h); +image make_image(int w, int h, int c); +image resize_image(image im, int w, int h); +void censor_image(image im, int dx, int dy, int w, int h); +image letterbox_image(image im, int w, int h); +image crop_image(image im, int dx, int dy, int w, int h); +image center_crop_image(image im, int w, int h); +image resize_min(image im, int min); +image resize_max(image im, int max); +image threshold_image(image im, float thresh); +image mask_to_rgb(image mask); +int resize_network(network *net, int w, int h); +void free_matrix(matrix m); +void test_resize(char *filename); +int show_image(image p, const char *name, int ms); +image copy_image(image p); +void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b); +float get_current_rate(network *net); +void composite_3d(char *f1, char *f2, char *out, int delta); +data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h); +size_t get_current_batch(network *net); +void constrain_image(image im); +image get_network_image_layer(network *net, int i); +layer get_network_output_layer(network *net); +void top_predictions(network *net, int n, int *index); +void flip_image(image a); +image float_to_image(int w, int h, int c, float *data); +void ghost_image(image source, image dest, int dx, int dy); +float network_accuracy(network *net, data d); +void random_distort_image(image im, float hue, float saturation, float exposure); +void fill_image(image m, float s); +image grayscale_image(image im); +void rotate_image_cw(image im, int times); +double what_time_is_it_now(); +image rotate_image(image m, float rad); +void visualize_network(network *net); +float box_iou(box a, box b); +data load_all_cifar10(); +box_label *read_boxes(char *filename, int *n); +box float_to_box(float *f, int stride); +void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes); + +matrix network_predict_data(network *net, data test); +image **load_alphabet(); +image get_network_image(network *net); +float *network_predict(network *net, float *input); + +int network_width(network *net); +int network_height(network *net); +float *network_predict_image(network *net, image im); +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets); +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num); +void free_detections(detection *dets, int n); + +void reset_network_state(network *net, int b); + +char **get_labels(char *filename); +void do_nms_obj(detection *dets, int total, int classes, float thresh); +void do_nms_sort(detection *dets, int total, int classes, float thresh); + +matrix make_matrix(int rows, int cols); + +#ifdef OPENCV +void *open_video_stream(const char *f, int c, int w, int h, int fps); +image get_image_from_stream(void *p); +void make_window(char *name, int w, int h, int fullscreen); +#endif + +void free_image(image m); +float train_network(network *net, data d); +pthread_t load_data_in_thread(load_args args); +void load_data_blocking(load_args args); +list *get_paths(char *filename); +void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride); +void change_leaves(tree *t, char *leaf_list); + +int find_int_arg(int argc, char **argv, char *arg, int def); +float find_float_arg(int argc, char **argv, char *arg, float def); +int find_arg(int argc, char* argv[], char *arg); +char *find_char_arg(int argc, char **argv, char *arg, char *def); +char *basecfg(char *cfgfile); +void find_replace(char *str, char *orig, char *rep, char *output); +void free_ptrs(void **ptrs, int n); +char *fgetl(FILE *fp); +void strip(char *s); +float sec(clock_t clocks); +void **list_to_array(list *l); +void top_k(float *a, int n, int k, int *index); +int *read_map(char *filename); +void error(const char *s); +int max_index(float *a, int n); +int max_int_index(int *a, int n); +int sample_array(float *a, int n); +int *random_index_order(int min, int max); +void free_list(list *l); +float mse_array(float *a, int n); +float variance_array(float *a, int n); +float mag_array(float *a, int n); +void scale_array(float *a, int n, float s); +float mean_array(float *a, int n); +float sum_array(float *a, int n); +void normalize_array(float *a, int n); +int *read_intlist(char *s, int *n, int d); +size_t rand_size_t(); +float rand_normal(); +float rand_uniform(float min, float max); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/workloads/realworld/async/darknet/predictions.jpg b/workloads/realworld/async/darknet/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c92d70d77e70e11853e9838ca90b46eb71a18ffa Binary files /dev/null and b/workloads/realworld/async/darknet/predictions.jpg differ diff --git a/workloads/realworld/async/darknet/python/darknet.py b/workloads/realworld/async/darknet/python/darknet.py new file mode 100644 index 0000000000000000000000000000000000000000..b14d24485d86aa69f3991be79ec4f25c2b8e5a59 --- /dev/null +++ b/workloads/realworld/async/darknet/python/darknet.py @@ -0,0 +1,156 @@ +from ctypes import * +import math +import random + +def sample(probs): + s = sum(probs) + probs = [a/s for a in probs] + r = random.uniform(0, 1) + for i in range(len(probs)): + r = r - probs[i] + if r <= 0: + return i + return len(probs)-1 + +def c_array(ctype, values): + arr = (ctype*len(values))() + arr[:] = values + return arr + +class BOX(Structure): + _fields_ = [("x", c_float), + ("y", c_float), + ("w", c_float), + ("h", c_float)] + +class DETECTION(Structure): + _fields_ = [("bbox", BOX), + ("classes", c_int), + ("prob", POINTER(c_float)), + ("mask", POINTER(c_float)), + ("objectness", c_float), + ("sort_class", c_int)] + + +class IMAGE(Structure): + _fields_ = [("w", c_int), + ("h", c_int), + ("c", c_int), + ("data", POINTER(c_float))] + +class METADATA(Structure): + _fields_ = [("classes", c_int), + ("names", POINTER(c_char_p))] + + + +#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL) +lib = CDLL("libdarknet.so", RTLD_GLOBAL) +lib.network_width.argtypes = [c_void_p] +lib.network_width.restype = c_int +lib.network_height.argtypes = [c_void_p] +lib.network_height.restype = c_int + +predict = lib.network_predict +predict.argtypes = [c_void_p, POINTER(c_float)] +predict.restype = POINTER(c_float) + +set_gpu = lib.cuda_set_device +set_gpu.argtypes = [c_int] + +make_image = lib.make_image +make_image.argtypes = [c_int, c_int, c_int] +make_image.restype = IMAGE + +get_network_boxes = lib.get_network_boxes +get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)] +get_network_boxes.restype = POINTER(DETECTION) + +make_network_boxes = lib.make_network_boxes +make_network_boxes.argtypes = [c_void_p] +make_network_boxes.restype = POINTER(DETECTION) + +free_detections = lib.free_detections +free_detections.argtypes = [POINTER(DETECTION), c_int] + +free_ptrs = lib.free_ptrs +free_ptrs.argtypes = [POINTER(c_void_p), c_int] + +network_predict = lib.network_predict +network_predict.argtypes = [c_void_p, POINTER(c_float)] + +reset_rnn = lib.reset_rnn +reset_rnn.argtypes = [c_void_p] + +load_net = lib.load_network +load_net.argtypes = [c_char_p, c_char_p, c_int] +load_net.restype = c_void_p + +do_nms_obj = lib.do_nms_obj +do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +do_nms_sort = lib.do_nms_sort +do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +free_image = lib.free_image +free_image.argtypes = [IMAGE] + +letterbox_image = lib.letterbox_image +letterbox_image.argtypes = [IMAGE, c_int, c_int] +letterbox_image.restype = IMAGE + +load_meta = lib.get_metadata +lib.get_metadata.argtypes = [c_char_p] +lib.get_metadata.restype = METADATA + +load_image = lib.load_image_color +load_image.argtypes = [c_char_p, c_int, c_int] +load_image.restype = IMAGE + +rgbgr_image = lib.rgbgr_image +rgbgr_image.argtypes = [IMAGE] + +predict_image = lib.network_predict_image +predict_image.argtypes = [c_void_p, IMAGE] +predict_image.restype = POINTER(c_float) + +def classify(net, meta, im): + out = predict_image(net, im) + res = [] + for i in range(meta.classes): + res.append((meta.names[i], out[i])) + res = sorted(res, key=lambda x: -x[1]) + return res + +def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): + im = load_image(image, 0, 0) + num = c_int(0) + pnum = pointer(num) + predict_image(net, im) + dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum) + num = pnum[0] + if (nms): do_nms_obj(dets, num, meta.classes, nms); + + res = [] + for j in range(num): + for i in range(meta.classes): + if dets[j].prob[i] > 0: + b = dets[j].bbox + res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h))) + res = sorted(res, key=lambda x: -x[1]) + free_image(im) + free_detections(dets, num) + return res + +if __name__ == "__main__": + #net = load_net("cfg/densenet201.cfg", "/home/pjreddie/trained/densenet201.weights", 0) + #im = load_image("data/wolf.jpg", 0, 0) + #meta = load_meta("cfg/imagenet1k.data") + #r = classify(net, meta, im) + #print r[:10] + net = load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0) + meta = load_meta("cfg/coco.data") + r = detect(net, meta, "data/dog.jpg") + print(r) + + diff --git a/workloads/realworld/async/darknet/python/proverbot.py b/workloads/realworld/async/darknet/python/proverbot.py new file mode 100644 index 0000000000000000000000000000000000000000..095aae8f8bf8bbe47ea1768a6e2c948bb0ff8f85 --- /dev/null +++ b/workloads/realworld/async/darknet/python/proverbot.py @@ -0,0 +1,37 @@ +from darknet import * + +def predict_tactic(net, s): + prob = 0 + d = c_array(c_float, [0.0]*256) + tac = '' + if not len(s): + s = '\n' + for c in s[:-1]: + d[ord(c)] = 1 + pred = predict(net, d) + d[ord(c)] = 0 + c = s[-1] + while 1: + d[ord(c)] = 1 + pred = predict(net, d) + d[ord(c)] = 0 + pred = [pred[i] for i in range(256)] + ind = sample(pred) + c = chr(ind) + prob += math.log(pred[ind]) + if len(tac) and tac[-1] == '.': + break + tac = tac + c + return (tac, prob) + +def predict_tactics(net, s, n): + tacs = [] + for i in range(n): + reset_rnn(net) + tacs.append(predict_tactic(net, s)) + tacs = sorted(tacs, key=lambda x: -x[1]) + return tacs + +net = load_net("cfg/coq.test.cfg", "/home/pjreddie/backup/coq.backup", 0) +t = predict_tactics(net, "+++++\n", 10) +print t diff --git a/workloads/realworld/async/darknet/resnet18/run_resnet18.sh b/workloads/realworld/async/darknet/resnet18/run_resnet18.sh new file mode 100755 index 0000000000000000000000000000000000000000..10257ad02affc17f8287ea42917813fe320f5f23 --- /dev/null +++ b/workloads/realworld/async/darknet/resnet18/run_resnet18.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg diff --git a/workloads/realworld/async/darknet/resnet18/run_super.sh b/workloads/realworld/async/darknet/resnet18/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..10257ad02affc17f8287ea42917813fe320f5f23 --- /dev/null +++ b/workloads/realworld/async/darknet/resnet18/run_super.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg diff --git a/workloads/realworld/async/darknet/resnet18_b/run_super.sh b/workloads/realworld/async/darknet/resnet18_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..012635a1ce64ecda462e50097be554185989ae7a --- /dev/null +++ b/workloads/realworld/async/darknet/resnet18_b/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier test ../cfg/imagenet1k.data ../cfg/resnet18_b.cfg diff --git a/workloads/realworld/async/darknet/resnet18_t/run_super.sh b/workloads/realworld/async/darknet/resnet18_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..0eb59b3bd65cf0186c5ed5f36eff5ec34d54298c --- /dev/null +++ b/workloads/realworld/async/darknet/resnet18_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier train ../cfg/imagenet1k.data ../cfg/resnet18_t.cfg \ No newline at end of file diff --git a/workloads/realworld/async/darknet/resnet50/run_resnet50.sh b/workloads/realworld/async/darknet/resnet50/run_resnet50.sh new file mode 100755 index 0000000000000000000000000000000000000000..ce88111af06600203c1ae94421134eb3404f51a4 --- /dev/null +++ b/workloads/realworld/async/darknet/resnet50/run_resnet50.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet50.cfg ../../../../../data/darknet/resnet50.weights ../data/dog.jpg diff --git a/workloads/realworld/async/darknet/resnet50/run_super.sh b/workloads/realworld/async/darknet/resnet50/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ce88111af06600203c1ae94421134eb3404f51a4 --- /dev/null +++ b/workloads/realworld/async/darknet/resnet50/run_super.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet50.cfg ../../../../../data/darknet/resnet50.weights ../data/dog.jpg diff --git a/workloads/realworld/async/darknet/resnet50_b/run_super.sh b/workloads/realworld/async/darknet/resnet50_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..e6f1b1d59b612bef36d04af547bf61808261eb12 --- /dev/null +++ b/workloads/realworld/async/darknet/resnet50_b/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier test ../cfg/imagenet1k.data ../cfg/resnet50_b.cfg diff --git a/workloads/realworld/async/darknet/resnet50_t/run_super.sh b/workloads/realworld/async/darknet/resnet50_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..4d4c7feebd1bd5bdcded72e3d4cf58045949ac90 --- /dev/null +++ b/workloads/realworld/async/darknet/resnet50_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier train ../cfg/imagenet1k.data ../cfg/resnet50_t.cfg diff --git a/workloads/realworld/async/darknet/scripts/dice_label.sh b/workloads/realworld/async/darknet/scripts/dice_label.sh new file mode 100644 index 0000000000000000000000000000000000000000..f19f8a49481b46d5a04dd18b1b05af8928b21957 --- /dev/null +++ b/workloads/realworld/async/darknet/scripts/dice_label.sh @@ -0,0 +1,20 @@ +mkdir -p images +mkdir -p images/orig +mkdir -p images/train +mkdir -p images/val + +ffmpeg -i Face1.mp4 images/orig/face1_%6d.jpg +ffmpeg -i Face2.mp4 images/orig/face2_%6d.jpg +ffmpeg -i Face3.mp4 images/orig/face3_%6d.jpg +ffmpeg -i Face4.mp4 images/orig/face4_%6d.jpg +ffmpeg -i Face5.mp4 images/orig/face5_%6d.jpg +ffmpeg -i Face6.mp4 images/orig/face6_%6d.jpg + +mogrify -resize 100x100^ -gravity center -crop 100x100+0+0 +repage images/orig/* + +ls images/orig/* | shuf | head -n 1000 | xargs mv -t images/val +mv images/orig/* images/train + +find `pwd`/images/train > dice.train.list -name \*.jpg +find `pwd`/images/val > dice.val.list -name \*.jpg + diff --git a/workloads/realworld/async/darknet/scripts/gen_tactic.sh b/workloads/realworld/async/darknet/scripts/gen_tactic.sh new file mode 100755 index 0000000000000000000000000000000000000000..ffa30d27754dacdd03bd5996d41cbfab14db0f39 --- /dev/null +++ b/workloads/realworld/async/darknet/scripts/gen_tactic.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Usage: +# wget http://pjreddie.com/media/files/peek.weights +# scripts/gen_tactic.sh < data/goal.txt +./darknet rnn generatetactic cfg/gru.cfg peek.weights 2>/dev/null diff --git a/workloads/realworld/async/darknet/scripts/get_coco_dataset.sh b/workloads/realworld/async/darknet/scripts/get_coco_dataset.sh new file mode 100644 index 0000000000000000000000000000000000000000..28463015d1748fd331e071a0a778c6d4500b29ef --- /dev/null +++ b/workloads/realworld/async/darknet/scripts/get_coco_dataset.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Clone COCO API +git clone https://github.com/pdollar/coco +cd coco + +mkdir images +cd images + +# Download Images +wget -c https://pjreddie.com/media/files/train2014.zip +wget -c https://pjreddie.com/media/files/val2014.zip + +# Unzip +unzip -q train2014.zip +unzip -q val2014.zip + +cd .. + +# Download COCO Metadata +wget -c https://pjreddie.com/media/files/instances_train-val2014.zip +wget -c https://pjreddie.com/media/files/coco/5k.part +wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part +wget -c https://pjreddie.com/media/files/coco/labels.tgz +tar xzf labels.tgz +unzip -q instances_train-val2014.zip + +# Set Up Image Lists +paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt +paste <(awk "{print \"$PWD\"}" trainvalno5k.txt + diff --git a/workloads/realworld/async/darknet/scripts/imagenet_label.sh b/workloads/realworld/async/darknet/scripts/imagenet_label.sh new file mode 100644 index 0000000000000000000000000000000000000000..01e4306ee3cf7322427374f01c766bcdef970922 --- /dev/null +++ b/workloads/realworld/async/darknet/scripts/imagenet_label.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +mkdir -p labelled +wd=`pwd` + +for f in val/*.xml; +do +label=`grep -m1 "" $f | grep -oP '\K[^<]*'` +im=`echo $f | sed 's/val/imgs/; s/xml/JPEG/'` +out=`echo $im | sed 's/JPEG/'${label}'.JPEG/; s/imgs/labelled/'` +ln -s ${wd}/$im ${wd}/$out +done + +find ${wd}/labelled -name \*.JPEG > inet.val.list + diff --git a/workloads/realworld/async/darknet/scripts/voc_label.py b/workloads/realworld/async/darknet/scripts/voc_label.py new file mode 100644 index 0000000000000000000000000000000000000000..679fc366890d9eccf15124f950a274d8ad24fc83 --- /dev/null +++ b/workloads/realworld/async/darknet/scripts/voc_label.py @@ -0,0 +1,59 @@ +import xml.etree.ElementTree as ET +import pickle +import os +from os import listdir, getcwd +from os.path import join + +sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')] + +classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + + +def convert(size, box): + dw = 1./(size[0]) + dh = 1./(size[1]) + x = (box[0] + box[1])/2.0 - 1 + y = (box[2] + box[3])/2.0 - 1 + w = box[1] - box[0] + h = box[3] - box[2] + x = x*dw + w = w*dw + y = y*dh + h = h*dh + return (x,y,w,h) + +def convert_annotation(year, image_id): + in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id)) + out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w') + tree=ET.parse(in_file) + root = tree.getroot() + size = root.find('size') + w = int(size.find('width').text) + h = int(size.find('height').text) + + for obj in root.iter('object'): + difficult = obj.find('difficult').text + cls = obj.find('name').text + if cls not in classes or int(difficult)==1: + continue + cls_id = classes.index(cls) + xmlbox = obj.find('bndbox') + b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) + bb = convert((w,h), b) + out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') + +wd = getcwd() + +for year, image_set in sets: + if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)): + os.makedirs('VOCdevkit/VOC%s/labels/'%(year)) + image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split() + list_file = open('%s_%s.txt'%(year, image_set), 'w') + for image_id in image_ids: + list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id)) + convert_annotation(year, image_id) + list_file.close() + +os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt") +os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt") + diff --git a/workloads/realworld/async/darknet/src/activation_kernels.cu b/workloads/realworld/async/darknet/src/activation_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..659b44fb85fba664e37b6e8d6aa1abee39accdd2 --- /dev/null +++ b/workloads/realworld/async/darknet/src/activation_kernels.cu @@ -0,0 +1,206 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "activations.h" +#include "cuda_dark.h" +} + + +__device__ float lhtan_activate_kernel(float x) +{ + if(x < 0) return .001f*x; + if(x > 1) return .001f*(x-1.f) + 1.f; + return x; +} +__device__ float lhtan_gradient_kernel(float x) +{ + if(x > 0 && x < 1) return 1; + return .001; +} + +__device__ float hardtan_activate_kernel(float x) +{ + if (x < -1) return -1; + if (x > 1) return 1; + return x; +} +__device__ float linear_activate_kernel(float x){return x;} +__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));} +__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;} +__device__ float relu_activate_kernel(float x){return x*(x>0);} +__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);} +__device__ float selu_activate_kernel(float x){return (x >= 0)*1.0507f*x + (x < 0)*1.0507f*1.6732f*(expf(x)-1);} +__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;} +__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;} +__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;} +__device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);} +__device__ float plse_activate_kernel(float x) +{ + if(x < -4) return .01f * (x + 4); + if(x > 4) return .01f * (x - 4) + 1; + return .125f*x + .5f; +} +__device__ float stair_activate_kernel(float x) +{ + int n = floorf(x); + if (n%2 == 0) return floorf(x/2); + else return (x - n) + floorf(x/2); +} + + +__device__ float hardtan_gradient_kernel(float x) +{ + if (x > -1 && x < 1) return 1; + return 0; +} +__device__ float linear_gradient_kernel(float x){return 1;} +__device__ float logistic_gradient_kernel(float x){return (1-x)*x;} +__device__ float loggy_gradient_kernel(float x) +{ + float y = (x+1)/2; + return 2*(1-y)*y; +} +__device__ float relu_gradient_kernel(float x){return (x>0);} +__device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);} +__device__ float selu_gradient_kernel(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);} +__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;} +__device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;} +__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;} +__device__ float tanh_gradient_kernel(float x){return 1-x*x;} +__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;} +__device__ float stair_gradient_kernel(float x) +{ + if (floorf(x) == x) return 0; + return 1; +} + +__device__ float activate_kernel(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_activate_kernel(x); + case LOGISTIC: + return logistic_activate_kernel(x); + case LOGGY: + return loggy_activate_kernel(x); + case RELU: + return relu_activate_kernel(x); + case ELU: + return elu_activate_kernel(x); + case SELU: + return selu_activate_kernel(x); + case RELIE: + return relie_activate_kernel(x); + case RAMP: + return ramp_activate_kernel(x); + case LEAKY: + return leaky_activate_kernel(x); + case TANH: + return tanh_activate_kernel(x); + case PLSE: + return plse_activate_kernel(x); + case STAIR: + return stair_activate_kernel(x); + case HARDTAN: + return hardtan_activate_kernel(x); + case LHTAN: + return lhtan_activate_kernel(x); + } + return 0; +} + +__device__ float gradient_kernel(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_gradient_kernel(x); + case LOGISTIC: + return logistic_gradient_kernel(x); + case LOGGY: + return loggy_gradient_kernel(x); + case RELU: + return relu_gradient_kernel(x); + case ELU: + return elu_gradient_kernel(x); + case SELU: + return selu_gradient_kernel(x); + case RELIE: + return relie_gradient_kernel(x); + case RAMP: + return ramp_gradient_kernel(x); + case LEAKY: + return leaky_gradient_kernel(x); + case TANH: + return tanh_gradient_kernel(x); + case PLSE: + return plse_gradient_kernel(x); + case STAIR: + return stair_gradient_kernel(x); + case HARDTAN: + return hardtan_gradient_kernel(x); + case LHTAN: + return lhtan_gradient_kernel(x); + } + return 0; +} + +__global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int i = id % s; + int b = id / s; + float x1 = x[b*s + i]; + float x2 = x[b*s + s/2 + i]; + if(id < n) { + float de = dy[id]; + dx[b*s + i] = x2*de; + dx[b*s + s/2 + i] = x1*de; + } +} + +extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y) +{ + binary_gradient_array_kernel<<>>(x, dx, n/2, size, a, y); + check_error(cudaPeekAtLastError()); +} +__global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int i = id % s; + int b = id / s; + float x1 = x[b*s + i]; + float x2 = x[b*s + s/2 + i]; + if(id < n) y[id] = x1*x2; +} + +extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y) +{ + binary_activate_array_kernel<<>>(x, n/2, size, a, y); + check_error(cudaPeekAtLastError()); +} + +__global__ void activate_array_kernel(float *x, int n, ACTIVATION a) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n) x[i] = activate_kernel(x[i], a); +} + +__global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n) delta[i] *= gradient_kernel(x[i], a); +} + +extern "C" void activate_array_gpu(float *x, int n, ACTIVATION a) +{ + activate_array_kernel<<>>(x, n, a); + check_error(cudaPeekAtLastError()); +} + +extern "C" void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta) +{ + gradient_array_kernel<<>>(x, n, a, delta); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/async/darknet/src/activation_layer.c b/workloads/realworld/async/darknet/src/activation_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..0791772336e4d1b001ed1b76bbbf21ee8d6fa24f --- /dev/null +++ b/workloads/realworld/async/darknet/src/activation_layer.c @@ -0,0 +1,63 @@ +#include "activation_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +layer make_activation_layer(int batch, int inputs, ACTIVATION activation) +{ + layer l = {0}; + l.type = ACTIVE; + + l.inputs = inputs; + l.outputs = inputs; + l.batch=batch; + + l.output = calloc(batch*inputs, sizeof(float*)); + l.delta = calloc(batch*inputs, sizeof(float*)); + + l.forward = forward_activation_layer; + l.backward = backward_activation_layer; +#ifdef GPU + l.forward_gpu = forward_activation_layer_gpu; + l.backward_gpu = backward_activation_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); +#endif + l.activation = activation; + fprintf(stderr, "Activation Layer: %d inputs\n", inputs); + return l; +} + +void forward_activation_layer(layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_activation_layer(layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_activation_layer_gpu(layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_activation_layer_gpu(layer l, network net) +{ + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/async/darknet/src/activation_layer.h b/workloads/realworld/async/darknet/src/activation_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..42118a84e83f59a8997e354959404d1283a3004c --- /dev/null +++ b/workloads/realworld/async/darknet/src/activation_layer.h @@ -0,0 +1,19 @@ +#ifndef ACTIVATION_LAYER_H +#define ACTIVATION_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_activation_layer(int batch, int inputs, ACTIVATION activation); + +void forward_activation_layer(layer l, network net); +void backward_activation_layer(layer l, network net); + +#ifdef GPU +void forward_activation_layer_gpu(layer l, network net); +void backward_activation_layer_gpu(layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/async/darknet/src/activations.c b/workloads/realworld/async/darknet/src/activations.c new file mode 100644 index 0000000000000000000000000000000000000000..da1a17a89b46b6c41fa80b5dd113e1b30c910712 --- /dev/null +++ b/workloads/realworld/async/darknet/src/activations.c @@ -0,0 +1,150 @@ +#include "activations.h" + +#include +#include +#include +#include + +char *get_activation_string(ACTIVATION a) +{ + switch(a){ + case LOGISTIC: + return "logistic"; + case LOGGY: + return "loggy"; + case RELU: + return "relu"; + case ELU: + return "elu"; + case SELU: + return "selu"; + case RELIE: + return "relie"; + case RAMP: + return "ramp"; + case LINEAR: + return "linear"; + case TANH: + return "tanh"; + case PLSE: + return "plse"; + case LEAKY: + return "leaky"; + case STAIR: + return "stair"; + case HARDTAN: + return "hardtan"; + case LHTAN: + return "lhtan"; + default: + break; + } + return "relu"; +} + +ACTIVATION get_activation(char *s) +{ + if (strcmp(s, "logistic")==0) return LOGISTIC; + if (strcmp(s, "loggy")==0) return LOGGY; + if (strcmp(s, "relu")==0) return RELU; + if (strcmp(s, "elu")==0) return ELU; + if (strcmp(s, "selu")==0) return SELU; + if (strcmp(s, "relie")==0) return RELIE; + if (strcmp(s, "plse")==0) return PLSE; + if (strcmp(s, "hardtan")==0) return HARDTAN; + if (strcmp(s, "lhtan")==0) return LHTAN; + if (strcmp(s, "linear")==0) return LINEAR; + if (strcmp(s, "ramp")==0) return RAMP; + if (strcmp(s, "leaky")==0) return LEAKY; + if (strcmp(s, "tanh")==0) return TANH; + if (strcmp(s, "stair")==0) return STAIR; + fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s); + return RELU; +} + +float activate(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_activate(x); + case LOGISTIC: + return logistic_activate(x); + case LOGGY: + return loggy_activate(x); + case RELU: + return relu_activate(x); + case ELU: + return elu_activate(x); + case SELU: + return selu_activate(x); + case RELIE: + return relie_activate(x); + case RAMP: + return ramp_activate(x); + case LEAKY: + return leaky_activate(x); + case TANH: + return tanh_activate(x); + case PLSE: + return plse_activate(x); + case STAIR: + return stair_activate(x); + case HARDTAN: + return hardtan_activate(x); + case LHTAN: + return lhtan_activate(x); + } + return 0; +} + +void activate_array(float *x, const int n, const ACTIVATION a) +{ + int i; + for(i = 0; i < n; ++i){ + x[i] = activate(x[i], a); + } +} + +float gradient(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_gradient(x); + case LOGISTIC: + return logistic_gradient(x); + case LOGGY: + return loggy_gradient(x); + case RELU: + return relu_gradient(x); + case ELU: + return elu_gradient(x); + case SELU: + return selu_gradient(x); + case RELIE: + return relie_gradient(x); + case RAMP: + return ramp_gradient(x); + case LEAKY: + return leaky_gradient(x); + case TANH: + return tanh_gradient(x); + case PLSE: + return plse_gradient(x); + case STAIR: + return stair_gradient(x); + case HARDTAN: + return hardtan_gradient(x); + case LHTAN: + return lhtan_gradient(x); + } + return 0; +} + +void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta) +{ + int i; + for(i = 0; i < n; ++i){ + delta[i] *= gradient(x[i], a); + } +} + diff --git a/workloads/realworld/async/darknet/src/activations.h b/workloads/realworld/async/darknet/src/activations.h new file mode 100644 index 0000000000000000000000000000000000000000..eec28d5b692ede3975e01a4d454ace20e8a9fdd8 --- /dev/null +++ b/workloads/realworld/async/darknet/src/activations.h @@ -0,0 +1,87 @@ +#ifndef ACTIVATIONS_H +#define ACTIVATIONS_H +#include "darknet.h" +#include "cuda_dark.h" +#include "math.h" + +ACTIVATION get_activation(char *s); + +char *get_activation_string(ACTIVATION a); +float activate(float x, ACTIVATION a); +float gradient(float x, ACTIVATION a); +void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta); +void activate_array(float *x, const int n, const ACTIVATION a); +#ifdef GPU +void activate_array_gpu(float *x, int n, ACTIVATION a); +void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta); +#endif + +static inline float stair_activate(float x) +{ + int n = floor(x); + if (n%2 == 0) return floor(x/2.); + else return (x - n) + floor(x/2.); +} +static inline float hardtan_activate(float x) +{ + if (x < -1) return -1; + if (x > 1) return 1; + return x; +} +static inline float linear_activate(float x){return x;} +static inline float logistic_activate(float x){return 1./(1. + exp(-x));} +static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;} +static inline float relu_activate(float x){return x*(x>0);} +static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} +static inline float selu_activate(float x){return (x >= 0)*1.0507*x + (x < 0)*1.0507*1.6732*(exp(x)-1);} +static inline float relie_activate(float x){return (x>0) ? x : .01*x;} +static inline float ramp_activate(float x){return x*(x>0)+.1*x;} +static inline float leaky_activate(float x){return (x>0) ? x : .1*x;} +static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);} +static inline float plse_activate(float x) +{ + if(x < -4) return .01 * (x + 4); + if(x > 4) return .01 * (x - 4) + 1; + return .125*x + .5; +} + +static inline float lhtan_activate(float x) +{ + if(x < 0) return .001*x; + if(x > 1) return .001*(x-1) + 1; + return x; +} +static inline float lhtan_gradient(float x) +{ + if(x > 0 && x < 1) return 1; + return .001; +} + +static inline float hardtan_gradient(float x) +{ + if (x > -1 && x < 1) return 1; + return 0; +} +static inline float linear_gradient(float x){return 1;} +static inline float logistic_gradient(float x){return (1-x)*x;} +static inline float loggy_gradient(float x) +{ + float y = (x+1.)/2.; + return 2*(1-y)*y; +} +static inline float stair_gradient(float x) +{ + if (floor(x) == x) return 0; + return 1; +} +static inline float relu_gradient(float x){return (x>0);} +static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);} +static inline float selu_gradient(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);} +static inline float relie_gradient(float x){return (x>0) ? 1 : .01;} +static inline float ramp_gradient(float x){return (x>0)+.1;} +static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;} +static inline float tanh_gradient(float x){return 1-x*x;} +static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;} + +#endif + diff --git a/workloads/realworld/async/darknet/src/avgpool_layer.c b/workloads/realworld/async/darknet/src/avgpool_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..7d17fa8f829aba43652117c141fb8b54ef4cf5dc --- /dev/null +++ b/workloads/realworld/async/darknet/src/avgpool_layer.c @@ -0,0 +1,71 @@ +#include "avgpool_layer.h" +#include "cuda_dark.h" +#include + +avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) +{ + fprintf(stderr, "avg %4d x%4d x%4d -> %4d\n", w, h, c, c); + avgpool_layer l = {0}; + l.type = AVGPOOL; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.out_w = 1; + l.out_h = 1; + l.out_c = c; + l.outputs = l.out_c; + l.inputs = h*w*c; + int output_size = l.outputs * batch; + l.output = calloc(output_size, sizeof(float)); + l.delta = calloc(output_size, sizeof(float)); + l.forward = forward_avgpool_layer; + l.backward = backward_avgpool_layer; + #ifdef GPU + l.forward_gpu = forward_avgpool_layer_gpu; + l.backward_gpu = backward_avgpool_layer_gpu; + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); + #endif + return l; +} + +void resize_avgpool_layer(avgpool_layer *l, int w, int h) +{ + l->w = w; + l->h = h; + l->inputs = h*w*l->c; +} + +void forward_avgpool_layer(const avgpool_layer l, network net) +{ + int b,i,k; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < l.c; ++k){ + int out_index = k + b*l.c; + l.output[out_index] = 0; + for(i = 0; i < l.h*l.w; ++i){ + int in_index = i + l.h*l.w*(k + b*l.c); + l.output[out_index] += net.input[in_index]; + } + l.output[out_index] /= l.h*l.w; + } + } +} + +void backward_avgpool_layer(const avgpool_layer l, network net) +{ + int b,i,k; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < l.c; ++k){ + int out_index = k + b*l.c; + for(i = 0; i < l.h*l.w; ++i){ + int in_index = i + l.h*l.w*(k + b*l.c); + net.delta[in_index] += l.delta[out_index] / (l.h*l.w); + } + } + } +} + diff --git a/workloads/realworld/async/darknet/src/avgpool_layer.h b/workloads/realworld/async/darknet/src/avgpool_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..576ad1db9e9cb87640b0c3f764e2bbfbaae4b2b3 --- /dev/null +++ b/workloads/realworld/async/darknet/src/avgpool_layer.h @@ -0,0 +1,23 @@ +#ifndef AVGPOOL_LAYER_H +#define AVGPOOL_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +typedef layer avgpool_layer; + +image get_avgpool_image(avgpool_layer l); +avgpool_layer make_avgpool_layer(int batch, int w, int h, int c); +void resize_avgpool_layer(avgpool_layer *l, int w, int h); +void forward_avgpool_layer(const avgpool_layer l, network net); +void backward_avgpool_layer(const avgpool_layer l, network net); + +#ifdef GPU +void forward_avgpool_layer_gpu(avgpool_layer l, network net); +void backward_avgpool_layer_gpu(avgpool_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/async/darknet/src/avgpool_layer_kernels.cu b/workloads/realworld/async/darknet/src/avgpool_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..55e5ec372d251e1d4b0c501563f9240437595795 --- /dev/null +++ b/workloads/realworld/async/darknet/src/avgpool_layer_kernels.cu @@ -0,0 +1,61 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "avgpool_layer.h" +#include "cuda_dark.h" +} + +__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int k = id % c; + id /= c; + int b = id; + + int i; + int out_index = (k + c*b); + output[out_index] = 0; + for(i = 0; i < w*h; ++i){ + int in_index = i + h*w*(k + b*c); + output[out_index] += input[in_index]; + } + output[out_index] /= w*h; +} + +__global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int k = id % c; + id /= c; + int b = id; + + int i; + int out_index = (k + c*b); + for(i = 0; i < w*h; ++i){ + int in_index = i + h*w*(k + b*c); + in_delta[in_index] += out_delta[out_index] / (w*h); + } +} + +extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network net) +{ + size_t n = layer.c*layer.batch; + + forward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, net.input_gpu, layer.output_gpu); + check_error(cudaPeekAtLastError()); +} + +extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network net) +{ + size_t n = layer.c*layer.batch; + + backward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, net.delta_gpu, layer.delta_gpu); + check_error(cudaPeekAtLastError()); +} + diff --git a/workloads/realworld/async/darknet/src/batchnorm_layer.c b/workloads/realworld/async/darknet/src/batchnorm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..ebff387cc4b0365173fb6727efd80ebc80bfbd41 --- /dev/null +++ b/workloads/realworld/async/darknet/src/batchnorm_layer.c @@ -0,0 +1,279 @@ +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "blas.h" +#include + +layer make_batchnorm_layer(int batch, int w, int h, int c) +{ + fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c); + layer l = {0}; + l.type = BATCHNORM; + l.batch = batch; + l.h = l.out_h = h; + l.w = l.out_w = w; + l.c = l.out_c = c; + l.output = calloc(h * w * c * batch, sizeof(float)); + l.delta = calloc(h * w * c * batch, sizeof(float)); + l.inputs = w*h*c; + l.outputs = l.inputs; + + l.scales = calloc(c, sizeof(float)); + l.scale_updates = calloc(c, sizeof(float)); + l.biases = calloc(c, sizeof(float)); + l.bias_updates = calloc(c, sizeof(float)); + int i; + for(i = 0; i < c; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(c, sizeof(float)); + l.variance = calloc(c, sizeof(float)); + + l.rolling_mean = calloc(c, sizeof(float)); + l.rolling_variance = calloc(c, sizeof(float)); + + l.forward = forward_batchnorm_layer; + l.backward = backward_batchnorm_layer; +#ifdef GPU + l.forward_gpu = forward_batchnorm_layer_gpu; + l.backward_gpu = backward_batchnorm_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, h * w * c * batch); + l.delta_gpu = cuda_make_array(l.delta, h * w * c * batch); + + l.biases_gpu = cuda_make_array(l.biases, c); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, c); + + l.scales_gpu = cuda_make_array(l.scales, c); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, c); + + l.mean_gpu = cuda_make_array(l.mean, c); + l.variance_gpu = cuda_make_array(l.variance, c); + + l.rolling_mean_gpu = cuda_make_array(l.mean, c); + l.rolling_variance_gpu = cuda_make_array(l.variance, c); + + l.mean_delta_gpu = cuda_make_array(l.mean, c); + l.variance_delta_gpu = cuda_make_array(l.variance, c); + + l.x_gpu = cuda_make_array(l.output, l.batch*l.outputs); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*l.outputs); + #ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); + + #endif +#endif + return l; +} + +void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + int i,b,f; + for(f = 0; f < n; ++f){ + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int index = i + size*(f + n*b); + sum += delta[index] * x_norm[index]; + } + } + scale_updates[f] += sum; + } +} + +void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + + int i,j,k; + for(i = 0; i < filters; ++i){ + mean_delta[i] = 0; + for (j = 0; j < batch; ++j) { + for (k = 0; k < spatial; ++k) { + int index = j*filters*spatial + i*spatial + k; + mean_delta[i] += delta[index]; + } + } + mean_delta[i] *= (-1./sqrt(variance[i] + .00001f)); + } +} +void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + + int i,j,k; + for(i = 0; i < filters; ++i){ + variance_delta[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance_delta[i] += delta[index]*(x[index] - mean[i]); + } + } + variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float)(-3./2.)); + } +} +void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + int f, j, k; + for(j = 0; j < batch; ++j){ + for(f = 0; f < filters; ++f){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + f*spatial + k; + delta[index] = delta[index] * 1./(sqrt(variance[f] + .00001f)) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); + } + } + } +} + +void resize_batchnorm_layer(layer *layer, int w, int h) +{ + fprintf(stderr, "Not implemented\n"); +} + +void forward_batchnorm_layer(layer l, network net) +{ + if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1); + if(net.train){ + mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean); + variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance); + + scal_cpu(l.out_c, .99, l.rolling_mean, 1); + axpy_cpu(l.out_c, .01, l.mean, 1, l.rolling_mean, 1); + scal_cpu(l.out_c, .99, l.rolling_variance, 1); + axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1); + + normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w); + copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1); + } else { + normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w); + } + scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w); + add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h*l.out_w); +} + +void backward_batchnorm_layer(layer l, network net) +{ + if(!net.train){ + l.mean = l.rolling_mean; + l.variance = l.rolling_variance; + } + backward_bias(l.bias_updates, l.delta, l.batch, l.out_c, l.out_w*l.out_h); + backward_scale_cpu(l.x_norm, l.delta, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates); + + scale_bias(l.delta, l.scales, l.batch, l.out_c, l.out_h*l.out_w); + + mean_delta_cpu(l.delta, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta); + variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta); + normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.out_c, l.out_w*l.out_h, l.delta); + if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_batchnorm_layer(layer l) +{ + cuda_pull_array(l.scales_gpu, l.scales, l.c); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.c); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.c); +} +void push_batchnorm_layer(layer l) +{ + cuda_push_array(l.scales_gpu, l.scales, l.c); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.c); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.c); +} + +void forward_batchnorm_layer_gpu(layer l, network net) +{ + if(l.type == BATCHNORM) copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); + if (net.train) { +#ifdef CUDNN + float one = 1; + float zero = 0; + cudnnBatchNormalizationForwardTraining(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + l.dstTensorDesc, + l.x_gpu, + l.dstTensorDesc, + l.output_gpu, + l.normTensorDesc, + l.scales_gpu, + l.biases_gpu, + .01, + l.rolling_mean_gpu, + l.rolling_variance_gpu, + .00001, + l.mean_gpu, + l.variance_gpu); +#else + fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu); + fast_variance_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.variance_gpu); + + scal_gpu(l.out_c, .99, l.rolling_mean_gpu, 1); + axpy_gpu(l.out_c, .01, l.mean_gpu, 1, l.rolling_mean_gpu, 1); + scal_gpu(l.out_c, .99, l.rolling_variance_gpu, 1); + axpy_gpu(l.out_c, .01, l.variance_gpu, 1, l.rolling_variance_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); + normalize_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_norm_gpu, 1); + + scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); +#endif + } else { + normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); + scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); + } + +} + +void backward_batchnorm_layer_gpu(layer l, network net) +{ + if(!net.train){ + l.mean_gpu = l.rolling_mean_gpu; + l.variance_gpu = l.rolling_variance_gpu; + } +#ifdef CUDNN + float one = 1; + float zero = 0; + cudnnBatchNormalizationBackward(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + &one, + &one, + l.dstTensorDesc, + l.x_gpu, + l.dstTensorDesc, + l.delta_gpu, + l.dstTensorDesc, + l.x_norm_gpu, + l.normTensorDesc, + l.scales_gpu, + l.scale_updates_gpu, + l.bias_updates_gpu, + .00001, + l.mean_gpu, + l.variance_gpu); + copy_gpu(l.outputs*l.batch, l.x_norm_gpu, 1, l.delta_gpu, 1); +#else + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h); + backward_scale_gpu(l.x_norm_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates_gpu); + + scale_bias_gpu(l.delta_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + + fast_mean_delta_gpu(l.delta_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta_gpu); + fast_variance_delta_gpu(l.x_gpu, l.delta_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta_gpu); + normalize_delta_gpu(l.x_gpu, l.mean_gpu, l.variance_gpu, l.mean_delta_gpu, l.variance_delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu); +#endif + if(l.type == BATCHNORM) copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/async/darknet/src/batchnorm_layer.h b/workloads/realworld/async/darknet/src/batchnorm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..25a18a3c8f2569bab135b088501248159e1cae11 --- /dev/null +++ b/workloads/realworld/async/darknet/src/batchnorm_layer.h @@ -0,0 +1,19 @@ +#ifndef BATCHNORM_LAYER_H +#define BATCHNORM_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +layer make_batchnorm_layer(int batch, int w, int h, int c); +void forward_batchnorm_layer(layer l, network net); +void backward_batchnorm_layer(layer l, network net); + +#ifdef GPU +void forward_batchnorm_layer_gpu(layer l, network net); +void backward_batchnorm_layer_gpu(layer l, network net); +void pull_batchnorm_layer(layer l); +void push_batchnorm_layer(layer l); +#endif + +#endif diff --git a/workloads/realworld/async/darknet/src/blas.c b/workloads/realworld/async/darknet/src/blas.c new file mode 100644 index 0000000000000000000000000000000000000000..9e1604449ba9aeb9decdc7f0395a38bd3b478671 --- /dev/null +++ b/workloads/realworld/async/darknet/src/blas.c @@ -0,0 +1,351 @@ +#include "blas.h" + +#include +#include +#include +#include +#include +#include +void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int b,i,j,k; + int out_c = c/(stride*stride); + + for(b = 0; b < batch; ++b){ + for(k = 0; k < c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int in_index = i + w*(j + h*(k + c*b)); + int c2 = k % out_c; + int offset = k / out_c; + int w2 = i*stride + offset % stride; + int h2 = j*stride + offset / stride; + int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); + if(forward) out[out_index] = x[in_index]; + else out[in_index] = x[out_index]; + } + } + } + } +} + +void flatten(float *x, int size, int layers, int batch, int forward) +{ + float *swap = calloc(size*layers*batch, sizeof(float)); + int i,c,b; + for(b = 0; b < batch; ++b){ + for(c = 0; c < layers; ++c){ + for(i = 0; i < size; ++i){ + int i1 = b*layers*size + c*size + i; + int i2 = b*layers*size + i*layers + c; + if (forward) swap[i2] = x[i1]; + else swap[i1] = x[i2]; + } + } + } + memcpy(x, swap, size*layers*batch*sizeof(float)); + free(swap); +} + +void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c) +{ + int i; + for(i = 0; i < n; ++i){ + c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); + } +} + +void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc) +{ + int i; + for(i = 0; i < n; ++i){ + if(da) da[i] += dc[i] * s[i]; + if(db) db[i] += dc[i] * (1-s[i]); + ds[i] += dc[i] * (a[i] - b[i]); + } +} + +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int stride = w1/w2; + int sample = w2/w1; + assert(stride == h1/h2); + assert(sample == h2/h1); + if(stride < 1) stride = 1; + if(sample < 1) sample = 1; + int minw = (w1 < w2) ? w1 : w2; + int minh = (h1 < h2) ? h1 : h2; + int minc = (c1 < c2) ? c1 : c2; + + int i,j,k,b; + for(b = 0; b < batch; ++b){ + for(k = 0; k < minc; ++k){ + for(j = 0; j < minh; ++j){ + for(i = 0; i < minw; ++i){ + int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); + int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); + out[out_index] = s1*out[out_index] + s2*add[add_index]; + } + } + } + } +} + +void mean_cpu(float *x, int batch, int filters, int spatial, float *mean) +{ + float scale = 1./(batch * spatial); + int i,j,k; + for(i = 0; i < filters; ++i){ + mean[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + mean[i] += x[index]; + } + } + mean[i] *= scale; + } +} + +void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + float scale = 1./(batch * spatial - 1); + int i,j,k; + for(i = 0; i < filters; ++i){ + variance[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance[i] += pow((x[index] - mean[i]), 2); + } + } + variance[i] *= scale; + } +} + +void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial) +{ + int b,f,i; + for(b = 0; b < batch; ++b){ + for(i = 0; i < spatial; ++i){ + float sum = 0; + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + sum += powf(x[index], 2); + } + sum = sqrtf(sum); + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + x[index] /= sum; + dx[index] = (1 - x[index]) / sum; + } + } + } +} + + +void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + int b, f, i; + for(b = 0; b < batch; ++b){ + for(f = 0; f < filters; ++f){ + for(i = 0; i < spatial; ++i){ + int index = b*filters*spatial + f*spatial + i; + x[index] = (x[index] - mean[f])/(sqrt(variance[f]) + .000001f); + } + } + } +} + +void const_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; +} + +void mul_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] *= X[i*INCX]; +} + +void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] = pow(X[i*INCX], ALPHA); +} + +void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX]; +} + +void scal_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA; +} + +void fill_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; +} + +void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i, j; + int index = 0; + for(j = 0; j < B; ++j) { + for(i = 0; i < NX; ++i){ + if(X) X[j*NX + i] += OUT[index]; + ++index; + } + for(i = 0; i < NY; ++i){ + if(Y) Y[j*NY + i] += OUT[index]; + ++index; + } + } +} + +void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i, j; + int index = 0; + for(j = 0; j < B; ++j) { + for(i = 0; i < NX; ++i){ + OUT[index++] = X[j*NX + i]; + } + for(i = 0; i < NY; ++i){ + OUT[index++] = Y[j*NY + i]; + } + } +} + +void copy_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX]; +} + +void mult_add_into_cpu(int N, float *X, float *Y, float *Z) +{ + int i; + for(i = 0; i < N; ++i) Z[i] += X[i]*Y[i]; +} + +void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + float abs_val = fabs(diff); + if(abs_val < 1) { + error[i] = diff * diff; + delta[i] = diff; + } + else { + error[i] = 2*abs_val - 1; + delta[i] = (diff < 0) ? 1 : -1; + } + } +} + +void l1_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + error[i] = fabs(diff); + delta[i] = diff > 0 ? 1 : -1; + } +} + +void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float t = truth[i]; + float p = pred[i]; + error[i] = (t) ? -log(p) : 0; + delta[i] = t-p; + } +} + +void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float t = truth[i]; + float p = pred[i]; + error[i] = -t*log(p) - (1-t)*log(1-p); + delta[i] = t-p; + } +} + +void l2_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + error[i] = diff * diff; + delta[i] = diff; + } +} + +float dot_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + float dot = 0; + for(i = 0; i < N; ++i) dot += X[i*INCX] * Y[i*INCY]; + return dot; +} + +void softmax(float *input, int n, float temp, int stride, float *output) +{ + int i; + float sum = 0; + float largest = -FLT_MAX; + for(i = 0; i < n; ++i){ + if(input[i*stride] > largest) largest = input[i*stride]; + } + for(i = 0; i < n; ++i){ + float e = exp(input[i*stride]/temp - largest/temp); + sum += e; + output[i*stride] = e; + } + for(i = 0; i < n; ++i){ + output[i*stride] /= sum; + } +} + + +void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + int g, b; + for(b = 0; b < batch; ++b){ + for(g = 0; g < groups; ++g){ + softmax(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); + } + } +} + +void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + int i, j, k, b; + for(b = 0; b < batch; ++b){ + for(k = 0; k < c; ++k){ + for(j = 0; j < h*stride; ++j){ + for(i = 0; i < w*stride; ++i){ + int in_index = b*w*h*c + k*w*h + (j/stride)*w + i/stride; + int out_index = b*w*h*c*stride*stride + k*w*h*stride*stride + j*w*stride + i; + if(forward) out[out_index] = scale*in[in_index]; + else in[in_index] += scale*out[out_index]; + } + } + } + } +} + + diff --git a/workloads/realworld/async/darknet/src/blas.h b/workloads/realworld/async/darknet/src/blas.h new file mode 100644 index 0000000000000000000000000000000000000000..5d24a9aea70d8050b05098aa7a5634576444a32c --- /dev/null +++ b/workloads/realworld/async/darknet/src/blas.h @@ -0,0 +1,105 @@ +#ifndef BLAS_H +#define BLAS_H +#include "darknet.h" + +void flatten(float *x, int size, int layers, int batch, int forward); +void pm(int M, int N, float *A); +float *random_matrix(int rows, int cols); +void time_random_matrix(int TA, int TB, int m, int k, int n); +void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); + +void test_blas(); + +void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void mult_add_into_cpu(int N, float *X, float *Y, float *Z); + +void const_cpu(int N, float ALPHA, float *X, int INCX); +void constrain_gpu(int N, float ALPHA, float * X, int INCX); +void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void mul_cpu(int N, float *X, int INCX, float *Y, int INCY); + +int test_gpu_blas(); +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out); + +void mean_cpu(float *x, int batch, int filters, int spatial, float *mean); +void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); + +void scale_bias(float *output, float *scales, int batch, int n, int size); +void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); +void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); +void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); +void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); +void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial); + +void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error); +void l2_cpu(int n, float *pred, float *truth, float *delta, float *error); +void l1_cpu(int n, float *pred, float *truth, float *delta, float *error); +void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); +void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); +void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c); +void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc); + +void softmax(float *input, int n, float temp, int stride, float *output); +void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); +void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); + +#ifdef GPU +#include "cuda_dark.h" +#include "tree.h" + +void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); +void axpy_gpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); +void copy_gpu(int N, float * X, int INCX, float * Y, int INCY); +void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); +void add_gpu(int N, float ALPHA, float * X, int INCX); +void supp_gpu(int N, float ALPHA, float * X, int INCX); +void mask_gpu(int N, float * X, float mask_num, float * mask, float val); +void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale); +void const_gpu(int N, float ALPHA, float *X, int INCX); +void pow_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void mul_gpu(int N, float *X, int INCX, float *Y, int INCY); + +void mean_gpu(float *x, int batch, int filters, int spatial, float *mean); +void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); +void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); +void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial); + +void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); + +void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); +void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); + +void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); +void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean); +void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out); +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); +void add_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); + +void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error); +void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error); +void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error); +void l2_gpu(int n, float *pred, float *truth, float *delta, float *error); +void l1_gpu(int n, float *pred, float *truth, float *delta, float *error); +void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error); +void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc); +void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c); +void mult_add_into_gpu(int num, float *a, float *b, float *c); +void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT); + +void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); + +void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); +void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t); +void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t); + +void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out); +void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier); +void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); + +#endif +#endif diff --git a/workloads/realworld/async/darknet/src/blas_kernels.cu b/workloads/realworld/async/darknet/src/blas_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..3db34a057b318e87769058c4b7fdc81f02780a9d --- /dev/null +++ b/workloads/realworld/async/darknet/src/blas_kernels.cu @@ -0,0 +1,1035 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" +#include + +extern "C" { +#include "blas.h" +#include "cuda_dark.h" +#include "utils.h" +} + +__global__ void scale_bias_kernel(float *output, float *biases, int n, int size) +{ + int offset = blockIdx.x * blockDim.x + threadIdx.x; + int filter = blockIdx.y; + int batch = blockIdx.z; + + if(offset < size) output[(batch*n+filter)*size + offset] *= biases[filter]; +} + +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size) +{ + dim3 dimGrid((size-1)/BLOCK + 1, n, batch); + dim3 dimBlock(BLOCK, 1, 1); + + scale_bias_kernel<<>>(output, biases, n, size); + check_error(cudaPeekAtLastError()); +} + +__global__ void backward_scale_kernel(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + __shared__ float part[BLOCK]; + int i,b; + int filter = blockIdx.x; + int p = threadIdx.x; + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; i += BLOCK){ + int index = p + i + size*(filter + n*b); + sum += (p+i < size) ? delta[index]*x_norm[index] : 0; + } + } + part[p] = sum; + __syncthreads(); + if (p == 0) { + for(i = 0; i < BLOCK; ++i) scale_updates[filter] += part[i]; + } +} + +void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + backward_scale_kernel<<>>(x_norm, delta, batch, n, size, scale_updates); + check_error(cudaPeekAtLastError()); +} + +__global__ void add_bias_kernel(float *output, float *biases, int batch, int n, int size) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= n*size*batch) return; + int i = index % size; + index /= size; + int j = index % n; + index /= n; + int k = index; + + output[(k*n+j)*size + i] += biases[j]; +} + +void add_bias_gpu(float *output, float *biases, int batch, int n, int size) +{ + int num = n*size*batch; + + add_bias_kernel<<>>(output, biases, batch, n, size); + check_error(cudaPeekAtLastError()); +} + +__global__ void backward_bias_conn_kernel(float *bias_updates, float *delta, int batch, int n) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= n) return; + int b; + float sum = 0; + for(b = 0; b < batch; ++b){ + int i = b*n + index; + sum += delta[i]; + } + bias_updates[index] += sum; +} + +__global__ void backward_bias_kernel(float *bias_updates, float *delta, int batch, int n, int size) +{ + __shared__ float part[BLOCK]; + int i,b; + int filter = blockIdx.x; + int p = threadIdx.x; + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; i += BLOCK){ + int index = p + i + size*(filter + n*b); + sum += (p+i < size) ? delta[index] : 0; + } + } + part[p] = sum; + __syncthreads(); + if (p == 0) { + for(i = 0; i < BLOCK; ++i) bias_updates[filter] += part[i]; + } +} + +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size) +{ + if(size == 1){ + backward_bias_conn_kernel<<>>(bias_updates, delta, batch, n); + }else{ + backward_bias_kernel<<>>(bias_updates, delta, batch, n, size); + } + check_error(cudaPeekAtLastError()); +} + +/* +__global__ void dot_kernel(float *output, float scale, int batch, int n, int size, float *delta) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int f1 = index / n; + int f2 = index % n; + if (f2 <= f1) return; + + float sum = 0; + float norm1 = 0; + float norm2 = 0; + int b, i; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int i1 = b * size * n + f1 * size + i; + int i2 = b * size * n + f2 * size + i; + sum += output[i1] * output[i2]; + norm1 += output[i1] * output[i1]; + norm2 += output[i2] * output[i2]; + } + } + norm1 = sqrt(norm1); + norm2 = sqrt(norm2); + float norm = norm1 * norm2; + sum = sum / norm; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int i1 = b * size * n + f1 * size + i; + int i2 = b * size * n + f2 * size + i; + delta[i1] += - scale * sum * output[i2] / norm; + delta[i2] += - scale * sum * output[i1] / norm; + } + } +} + +void dot_error_gpu(layer l) +{ + dot_kernel<<>>(l.output_gpu, l.dot, l.batch, l.n, l.out_w * l.out_h, l.delta_gpu); + check_error(cudaPeekAtLastError()); +} +*/ + + +__global__ void adam_kernel(int N, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + + float mhat = m[index] / (1.f - powf(B1, t)); + float vhat = v[index] / (1.f - powf(B2, t)); + + x[index] = x[index] + rate * mhat / (sqrtf(vhat) + eps); +} + +extern "C" void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) +{ + adam_kernel<<>>(n, x, m, v, B1, B2, rate, eps, t); + check_error(cudaPeekAtLastError()); +} + +extern "C" void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t) +{ + scal_gpu(n, B1, m, 1); + scal_gpu(n, B2, v, 1); + axpy_gpu(n, -decay*batch, w, 1, d, 1); + + axpy_gpu(n, (1-B1), d, 1, m, 1); + mul_gpu(n, d, 1, d, 1); + axpy_gpu(n, (1-B2), d, 1, v, 1); + + adam_gpu(n, w, m, v, B1, B2, rate, eps, t); + fill_gpu(n, 0, d, 1); +} + +__global__ void normalize_kernel(int N, float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int f = (index/spatial)%filters; + + x[index] = (x[index] - mean[f])/(sqrtf(variance[f] + .00001f)); +} + +__global__ void normalize_delta_kernel(int N, float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int f = (index/spatial)%filters; + + delta[index] = delta[index] * 1.f/(sqrtf(variance[f] + .00001f)) + variance_delta[f] * 2.f * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); +} + +extern "C" void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + size_t N = batch*filters*spatial; + normalize_delta_kernel<<>>(N, x, mean, variance, mean_delta, variance_delta, batch, filters, spatial, delta); + check_error(cudaPeekAtLastError()); +} + +__global__ void variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + variance_delta[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance_delta[i] += delta[index]*(x[index] - mean[i]); + } + } + variance_delta[i] *= -.5f * powf(variance[i] + .00001f, (float)(-3.f/2.f)); +} + +__global__ void accumulate_kernel(float *x, int n, int groups, float *sum) +{ + int k; + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= groups) return; + sum[i] = 0; + for(k = 0; k < n; ++k){ + sum[i] += x[k*groups + i]; + } +} + +__global__ void fast_mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + local[id] += (i+id < spatial) ? delta[index] : 0; + } + } + + __syncthreads(); + + if(id == 0){ + mean_delta[filter] = 0; + for(i = 0; i < threads; ++i){ + mean_delta[filter] += local[i]; + } + mean_delta[filter] *= (-1.f/sqrtf(variance[filter] + .00001f)); + } +} + +__global__ void fast_variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + + local[id] += (i+id < spatial) ? delta[index]*(x[index] - mean[filter]) : 0; + } + } + + __syncthreads(); + + if(id == 0){ + variance_delta[filter] = 0; + for(i = 0; i < threads; ++i){ + variance_delta[filter] += local[i]; + } + variance_delta[filter] *= -.5f * powf(variance[filter] + .00001f, (float)(-3.f/2.f)); + } +} + + +__global__ void mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + mean_delta[i] = 0; + for (j = 0; j < batch; ++j) { + for (k = 0; k < spatial; ++k) { + int index = j*filters*spatial + i*spatial + k; + mean_delta[i] += delta[index]; + } + } + mean_delta[i] *= (-1.f/sqrtf(variance[i] + .00001f)); +} + +extern "C" void mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + mean_delta_kernel<<>>(delta, variance, batch, filters, spatial, mean_delta); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + fast_mean_delta_kernel<<>>(delta, variance, batch, filters, spatial, mean_delta); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + fast_variance_delta_kernel<<>>(x, delta, mean, variance, batch, filters, spatial, variance_delta); + check_error(cudaPeekAtLastError()); +} + +__global__ void mean_kernel(float *x, int batch, int filters, int spatial, float *mean) +{ + float scale = 1.f/(batch * spatial); + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + mean[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + mean[i] += x[index]; + } + } + mean[i] *= scale; +} + +__global__ void variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + float scale = 1.f/(batch * spatial - 1); + int j,k; + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + variance[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance[i] += powf((x[index] - mean[i]), 2); + } + } + variance[i] *= scale; +} + +__global__ void reorg_kernel(int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int in_index = i; + int in_w = i%w; + i = i/w; + int in_h = i%h; + i = i/h; + int in_c = i%c; + i = i/c; + int b = i%batch; + + int out_c = c/(stride*stride); + + int c2 = in_c % out_c; + int offset = in_c / out_c; + int w2 = in_w*stride + offset % stride; + int h2 = in_h*stride + offset / stride; + //printf("%d\n", offset); + int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); + + // printf("%d %d %d\n", w2, h2, c2); + //printf("%d %d\n", in_index, out_index); + //if(out_index >= N || out_index < 0) printf("bad bad bad \n"); + + if(forward) out[out_index] = x[in_index]; + else out[in_index] = x[out_index]; + //if(forward) out[1] = x[1]; + //else out[0] = x[0]; +} + +__global__ void axpy_kernel(int N, float ALPHA, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[OFFY+i*INCY] += ALPHA*X[OFFX+i*INCX]; +} + +__global__ void pow_kernel(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY] = pow(X[i*INCX], ALPHA); +} + +__global__ void const_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = ALPHA; +} + +__global__ void constrain_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = fminf(ALPHA, fmaxf(-ALPHA, X[i*INCX])); +} + +__global__ void supp_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) { + if((X[i*INCX] * X[i*INCX]) < (ALPHA * ALPHA)) X[i*INCX] = 0; + } +} + +__global__ void add_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] += ALPHA; +} + +__global__ void scal_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] *= ALPHA; +} + +__global__ void fill_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = ALPHA; +} + +__global__ void copy_kernel(int N, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY + OFFY] = X[i*INCX + OFFX]; +} + +__global__ void mul_kernel(int N, float *X, int INCX, float *Y, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY] *= X[i*INCX]; +} + + +extern "C" void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + size_t N = batch*filters*spatial; + normalize_kernel<<>>(N, x, mean, variance, batch, filters, spatial); + check_error(cudaPeekAtLastError()); +} + +__global__ void l2norm_kernel(int N, float *x, float *dx, int batch, int filters, int spatial) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int b = index / spatial; + int i = index % spatial; + int f; + float sum = 0; + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + sum += powf(x[index], 2); + } + sum = sqrtf(sum); + if(sum == 0) sum = 1; + //printf("%f\n", sum); + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + x[index] /= sum; + dx[index] = (1 - x[index]) / sum; + } +} + +extern "C" void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial) +{ + size_t N = batch*spatial; + l2norm_kernel<<>>(N, x, dx, batch, filters, spatial); + check_error(cudaPeekAtLastError()); +} + +__global__ void fast_mean_kernel(float *x, int batch, int filters, int spatial, float *mean) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + local[id] += (i+id < spatial) ? x[index] : 0; + } + } + + __syncthreads(); + + if(id == 0){ + mean[filter] = 0; + for(i = 0; i < threads; ++i){ + mean[filter] += local[i]; + } + mean[filter] /= spatial * batch; + } +} + +__global__ void fast_variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + + local[id] += (i+id < spatial) ? powf((x[index] - mean[filter]), 2) : 0; + } + } + + __syncthreads(); + + if(id == 0){ + variance[filter] = 0; + for(i = 0; i < threads; ++i){ + variance[filter] += local[i]; + } + variance[filter] /= (spatial * batch - 1); + } +} + +extern "C" void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean) +{ + fast_mean_kernel<<>>(x, batch, filters, spatial, mean); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + fast_variance_kernel<<>>(x, mean, batch, filters, spatial, variance); + check_error(cudaPeekAtLastError()); +} + + +extern "C" void mean_gpu(float *x, int batch, int filters, int spatial, float *mean) +{ + mean_kernel<<>>(x, batch, filters, spatial, mean); + check_error(cudaPeekAtLastError()); +} + +extern "C" void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + variance_kernel<<>>(x, mean, batch, filters, spatial, variance); + check_error(cudaPeekAtLastError()); +} + +extern "C" void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY) +{ + axpy_gpu_offset(N, ALPHA, X, 0, INCX, Y, 0, INCY); +} + +extern "C" void pow_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY) +{ + pow_kernel<<>>(N, ALPHA, X, INCX, Y, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void axpy_gpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY) +{ + axpy_kernel<<>>(N, ALPHA, X, OFFX, INCX, Y, OFFY, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void copy_gpu(int N, float * X, int INCX, float * Y, int INCY) +{ + copy_gpu_offset(N, X, 0, INCX, Y, 0, INCY); +} + +extern "C" void mul_gpu(int N, float * X, int INCX, float * Y, int INCY) +{ + mul_kernel<<>>(N, X, INCX, Y, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY) +{ + copy_kernel<<>>(N, X, OFFX, INCX, Y, OFFY, INCY); + check_error(cudaPeekAtLastError()); +} + +__global__ void flatten_kernel(int N, float *x, int spatial, int layers, int batch, int forward, float *out) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int in_s = i%spatial; + i = i/spatial; + int in_c = i%layers; + i = i/layers; + int b = i; + + int i1 = b*layers*spatial + in_c*spatial + in_s; + int i2 = b*layers*spatial + in_s*layers + in_c; + + if (forward) out[i2] = x[i1]; + else out[i1] = x[i2]; +} + +extern "C" void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out) +{ + int size = spatial*batch*layers; + flatten_kernel<<>>(size, x, spatial, layers, batch, forward, out); + check_error(cudaPeekAtLastError()); +} + +extern "C" void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int size = w*h*c*batch; + reorg_kernel<<>>(size, x, w, h, c, batch, stride, forward, out); + check_error(cudaPeekAtLastError()); +} + +__global__ void mask_kernel(int n, float *x, float mask_num, float *mask, float val) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n && mask[i] == mask_num) x[i] = val; +} + +extern "C" void mask_gpu(int N, float * X, float mask_num, float * mask, float val) +{ + mask_kernel<<>>(N, X, mask_num, mask, val); + check_error(cudaPeekAtLastError()); +} + +__global__ void scale_mask_kernel(int n, float *x, float mask_num, float *mask, float scale) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n && mask[i] == mask_num) x[i] *= scale; +} + +extern "C" void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale) +{ + scale_mask_kernel<<>>(N, X, mask_num, mask, scale); + check_error(cudaPeekAtLastError()); +} + +extern "C" void const_gpu(int N, float ALPHA, float * X, int INCX) +{ + const_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void constrain_gpu(int N, float ALPHA, float * X, int INCX) +{ + constrain_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + + +extern "C" void add_gpu(int N, float ALPHA, float * X, int INCX) +{ + add_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void scal_gpu(int N, float ALPHA, float * X, int INCX) +{ + scal_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void supp_gpu(int N, float ALPHA, float * X, int INCX) +{ + supp_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fill_gpu(int N, float ALPHA, float * X, int INCX) +{ + fill_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +__global__ void shortcut_kernel(int size, int minw, int minh, int minc, int stride, int sample, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= size) return; + int i = id % minw; + id /= minw; + int j = id % minh; + id /= minh; + int k = id % minc; + id /= minc; + int b = id % batch; + + int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); + int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); + out[out_index] = s1*out[out_index] + s2*add[add_index]; + //out[out_index] += add[add_index]; +} + +extern "C" void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int minw = (w1 < w2) ? w1 : w2; + int minh = (h1 < h2) ? h1 : h2; + int minc = (c1 < c2) ? c1 : c2; + + int stride = w1/w2; + int sample = w2/w1; + assert(stride == h1/h2); + assert(sample == h2/h1); + if(stride < 1) stride = 1; + if(sample < 1) sample = 1; + + int size = batch * minw * minh * minc; + shortcut_kernel<<>>(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, s1, s2, out); + check_error(cudaPeekAtLastError()); +} + +__global__ void smooth_l1_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + float abs_val = fabsf(diff); + if(abs_val < 1) { + error[i] = diff * diff; + delta[i] = diff; + } + else { + error[i] = 2*abs_val - 1; + delta[i] = (diff > 0) ? 1 : -1; + } + } +} + +extern "C" void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + smooth_l1_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void softmax_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float t = truth[i]; + float p = pred[i]; + error[i] = (t) ? -log(p) : 0; + delta[i] = t-p; + } +} + +extern "C" void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + softmax_x_ent_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void logistic_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float t = truth[i]; + float p = pred[i]; + error[i] = -t*log(p+.0000001) - (1-t)*log(1-p+.0000001); + delta[i] = t-p; + } +} + +extern "C" void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + logistic_x_ent_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void l2_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + error[i] = diff * diff; //I know this is technically wrong, deal with it. + delta[i] = diff; + } +} + +extern "C" void l2_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + l2_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void l1_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + error[i] = abs(diff); + delta[i] = (diff > 0) ? 1 : -1; + } +} + +extern "C" void l1_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + l1_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void wgan_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + error[i] = truth[i] ? -pred[i] : pred[i]; + delta[i] = (truth[i] > 0) ? 1 : -1; + } +} + +extern "C" void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + wgan_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + + + + +__global__ void weighted_sum_kernel(int n, float *a, float *b, float *s, float *c) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); + } +} + +__global__ void deinter_kernel(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < (NX+NY)*B){ + int b = i / (NX+NY); + int j = i % (NX+NY); + if (j < NX){ + if(X) X[b*NX + j] += OUT[i]; + } else { + if(Y) Y[b*NY + j - NX] += OUT[i]; + } + } +} + +extern "C" void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + deinter_kernel<<>>(NX, X, NY, Y, B, OUT); + check_error(cudaPeekAtLastError()); +} + +__global__ void inter_kernel(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < (NX+NY)*B){ + int b = i / (NX+NY); + int j = i % (NX+NY); + if (j < NX){ + OUT[i] = X[b*NX + j]; + } else { + OUT[i] = Y[b*NY + j - NX]; + } + } +} + +extern "C" void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + inter_kernel<<>>(NX, X, NY, Y, B, OUT); + check_error(cudaPeekAtLastError()); +} + +extern "C" void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c) +{ + weighted_sum_kernel<<>>(num, a, b, s, c); + check_error(cudaPeekAtLastError()); +} + +__global__ void weighted_delta_kernel(int n, float *a, float *b, float *s, float *da, float *db, float *ds, float *dc) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + if(da) da[i] += dc[i] * s[i]; + if(db) db[i] += dc[i] * (1-s[i]); + ds[i] += dc[i] * (a[i] - b[i]); + } +} + +extern "C" void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc) +{ + weighted_delta_kernel<<>>(num, a, b, s, da, db, ds, dc); + check_error(cudaPeekAtLastError()); +} + +__global__ void mult_add_into_kernel(int n, float *a, float *b, float *c) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + c[i] += a[i]*b[i]; + } +} + +extern "C" void mult_add_into_gpu(int num, float *a, float *b, float *c) +{ + mult_add_into_kernel<<>>(num, a, b, c); + check_error(cudaPeekAtLastError()); +} + + +__device__ void softmax_device(float *input, int n, float temp, int stride, float *output) +{ + int i; + float sum = 0; + float largest = -INFINITY; + for(i = 0; i < n; ++i){ + int val = input[i*stride]; + largest = (val>largest) ? val : largest; + } + for(i = 0; i < n; ++i){ + float e = expf(input[i*stride]/temp - largest/temp); + sum += e; + output[i*stride] = e; + } + for(i = 0; i < n; ++i){ + output[i*stride] /= sum; + } +} + + +__global__ void softmax_tree_kernel(float *input, int spatial, int batch, int stride, float temp, float *output, int groups, int *group_size, int *group_offset) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= spatial*batch*groups) return; + int s = id % spatial; + id = id / spatial; + int g = id % groups; + int b = id / groups; + int goff = group_offset[g]*spatial; + int boff = b*stride; + softmax_device(input + goff + boff + s, group_size[g], temp, spatial, output + goff + boff + s); +} + +extern "C" void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier) +{ + int *tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); + int *tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); + /* + static int *tree_groups_size = 0; + static int *tree_groups_offset = 0; + if(!tree_groups_size){ + tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); + tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); + } + */ + int num = spatial*batch*hier.groups; + softmax_tree_kernel<<>>(input, spatial, batch, stride, temp, output, hier.groups, tree_groups_size, tree_groups_offset); + check_error(cudaPeekAtLastError()); + cuda_free((float *)tree_groups_size); + cuda_free((float *)tree_groups_offset); +} + +__global__ void softmax_kernel(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= batch*groups) return; + int b = id / groups; + int g = id % groups; + softmax_device(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); +} + +extern "C" void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + softmax_kernel<<>>(input, n, batch, batch_offset, groups, group_offset, stride, temp, output); + check_error(cudaPeekAtLastError()); +} + + +__global__ void upsample_kernel(size_t N, float *x, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + size_t i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int out_index = i; + int out_w = i%(w*stride); + i = i/(w*stride); + int out_h = i%(h*stride); + i = i/(h*stride); + int out_c = i%c; + i = i/c; + int b = i%batch; + + int in_w = out_w / stride; + int in_h = out_h / stride; + int in_c = out_c; + + int in_index = b*w*h*c + in_c*w*h + in_h*w + in_w; + + + if(forward) out[out_index] += scale * x[in_index]; + else atomicAdd(x+in_index, scale * out[out_index]); +} +extern "C" void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + size_t size = w*h*c*batch*stride*stride; + upsample_kernel<<>>(size, in, w, h, c, batch, stride, forward, scale, out); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/async/darknet/src/box.c b/workloads/realworld/async/darknet/src/box.c new file mode 100644 index 0000000000000000000000000000000000000000..8a1772c9ae05dede6ddc83d9b6465f64cf974ae8 --- /dev/null +++ b/workloads/realworld/async/darknet/src/box.c @@ -0,0 +1,357 @@ +#include "box.h" +#include +#include +#include + +int nms_comparator(const void *pa, const void *pb) +{ + detection a = *(detection *)pa; + detection b = *(detection *)pb; + float diff = 0; + if(b.sort_class >= 0){ + diff = a.prob[b.sort_class] - b.prob[b.sort_class]; + } else { + diff = a.objectness - b.objectness; + } + if(diff < 0) return 1; + else if(diff > 0) return -1; + return 0; +} + +void do_nms_obj(detection *dets, int total, int classes, float thresh) +{ + int i, j, k; + k = total-1; + for(i = 0; i <= k; ++i){ + if(dets[i].objectness == 0){ + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k+1; + + for(i = 0; i < total; ++i){ + dets[i].sort_class = -1; + } + + qsort(dets, total, sizeof(detection), nms_comparator); + for(i = 0; i < total; ++i){ + if(dets[i].objectness == 0) continue; + box a = dets[i].bbox; + for(j = i+1; j < total; ++j){ + if(dets[j].objectness == 0) continue; + box b = dets[j].bbox; + if (box_iou(a, b) > thresh){ + dets[j].objectness = 0; + for(k = 0; k < classes; ++k){ + dets[j].prob[k] = 0; + } + } + } + } +} + + +void do_nms_sort(detection *dets, int total, int classes, float thresh) +{ + int i, j, k; + k = total-1; + for(i = 0; i <= k; ++i){ + if(dets[i].objectness == 0){ + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k+1; + + for(k = 0; k < classes; ++k){ + for(i = 0; i < total; ++i){ + dets[i].sort_class = k; + } + qsort(dets, total, sizeof(detection), nms_comparator); + for(i = 0; i < total; ++i){ + if(dets[i].prob[k] == 0) continue; + box a = dets[i].bbox; + for(j = i+1; j < total; ++j){ + box b = dets[j].bbox; + if (box_iou(a, b) > thresh){ + dets[j].prob[k] = 0; + } + } + } + } +} + +box float_to_box(float *f, int stride) +{ + box b = {0}; + b.x = f[0]; + b.y = f[1*stride]; + b.w = f[2*stride]; + b.h = f[3*stride]; + return b; +} + +dbox derivative(box a, box b) +{ + dbox d; + d.dx = 0; + d.dw = 0; + float l1 = a.x - a.w/2; + float l2 = b.x - b.w/2; + if (l1 > l2){ + d.dx -= 1; + d.dw += .5; + } + float r1 = a.x + a.w/2; + float r2 = b.x + b.w/2; + if(r1 < r2){ + d.dx += 1; + d.dw += .5; + } + if (l1 > r2) { + d.dx = -1; + d.dw = 0; + } + if (r1 < l2){ + d.dx = 1; + d.dw = 0; + } + + d.dy = 0; + d.dh = 0; + float t1 = a.y - a.h/2; + float t2 = b.y - b.h/2; + if (t1 > t2){ + d.dy -= 1; + d.dh += .5; + } + float b1 = a.y + a.h/2; + float b2 = b.y + b.h/2; + if(b1 < b2){ + d.dy += 1; + d.dh += .5; + } + if (t1 > b2) { + d.dy = -1; + d.dh = 0; + } + if (b1 < t2){ + d.dy = 1; + d.dh = 0; + } + return d; +} + +float overlap(float x1, float w1, float x2, float w2) +{ + float l1 = x1 - w1/2; + float l2 = x2 - w2/2; + float left = l1 > l2 ? l1 : l2; + float r1 = x1 + w1/2; + float r2 = x2 + w2/2; + float right = r1 < r2 ? r1 : r2; + return right - left; +} + +float box_intersection(box a, box b) +{ + float w = overlap(a.x, a.w, b.x, b.w); + float h = overlap(a.y, a.h, b.y, b.h); + if(w < 0 || h < 0) return 0; + float area = w*h; + return area; +} + +float box_union(box a, box b) +{ + float i = box_intersection(a, b); + float u = a.w*a.h + b.w*b.h - i; + return u; +} + +float box_iou(box a, box b) +{ + return box_intersection(a, b)/box_union(a, b); +} + +float box_rmse(box a, box b) +{ + return sqrt(pow(a.x-b.x, 2) + + pow(a.y-b.y, 2) + + pow(a.w-b.w, 2) + + pow(a.h-b.h, 2)); +} + +dbox dintersect(box a, box b) +{ + float w = overlap(a.x, a.w, b.x, b.w); + float h = overlap(a.y, a.h, b.y, b.h); + dbox dover = derivative(a, b); + dbox di; + + di.dw = dover.dw*h; + di.dx = dover.dx*h; + di.dh = dover.dh*w; + di.dy = dover.dy*w; + + return di; +} + +dbox dunion(box a, box b) +{ + dbox du; + + dbox di = dintersect(a, b); + du.dw = a.h - di.dw; + du.dh = a.w - di.dh; + du.dx = -di.dx; + du.dy = -di.dy; + + return du; +} + + +void test_dunion() +{ + box a = {0, 0, 1, 1}; + box dxa= {0+.0001, 0, 1, 1}; + box dya= {0, 0+.0001, 1, 1}; + box dwa= {0, 0, 1+.0001, 1}; + box dha= {0, 0, 1, 1+.0001}; + + box b = {.5, .5, .2, .2}; + dbox di = dunion(a,b); + printf("Union: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); + float inter = box_union(a, b); + float xinter = box_union(dxa, b); + float yinter = box_union(dya, b); + float winter = box_union(dwa, b); + float hinter = box_union(dha, b); + xinter = (xinter - inter)/(.0001); + yinter = (yinter - inter)/(.0001); + winter = (winter - inter)/(.0001); + hinter = (hinter - inter)/(.0001); + printf("Union Manual %f %f %f %f\n", xinter, yinter, winter, hinter); +} +void test_dintersect() +{ + box a = {0, 0, 1, 1}; + box dxa= {0+.0001, 0, 1, 1}; + box dya= {0, 0+.0001, 1, 1}; + box dwa= {0, 0, 1+.0001, 1}; + box dha= {0, 0, 1, 1+.0001}; + + box b = {.5, .5, .2, .2}; + dbox di = dintersect(a,b); + printf("Inter: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); + float inter = box_intersection(a, b); + float xinter = box_intersection(dxa, b); + float yinter = box_intersection(dya, b); + float winter = box_intersection(dwa, b); + float hinter = box_intersection(dha, b); + xinter = (xinter - inter)/(.0001); + yinter = (yinter - inter)/(.0001); + winter = (winter - inter)/(.0001); + hinter = (hinter - inter)/(.0001); + printf("Inter Manual %f %f %f %f\n", xinter, yinter, winter, hinter); +} + +void test_box() +{ + test_dintersect(); + test_dunion(); + box a = {0, 0, 1, 1}; + box dxa= {0+.00001, 0, 1, 1}; + box dya= {0, 0+.00001, 1, 1}; + box dwa= {0, 0, 1+.00001, 1}; + box dha= {0, 0, 1, 1+.00001}; + + box b = {.5, 0, .2, .2}; + + float iou = box_iou(a,b); + iou = (1-iou)*(1-iou); + printf("%f\n", iou); + dbox d = diou(a, b); + printf("%f %f %f %f\n", d.dx, d.dy, d.dw, d.dh); + + float xiou = box_iou(dxa, b); + float yiou = box_iou(dya, b); + float wiou = box_iou(dwa, b); + float hiou = box_iou(dha, b); + xiou = ((1-xiou)*(1-xiou) - iou)/(.00001); + yiou = ((1-yiou)*(1-yiou) - iou)/(.00001); + wiou = ((1-wiou)*(1-wiou) - iou)/(.00001); + hiou = ((1-hiou)*(1-hiou) - iou)/(.00001); + printf("manual %f %f %f %f\n", xiou, yiou, wiou, hiou); +} + +dbox diou(box a, box b) +{ + float u = box_union(a,b); + float i = box_intersection(a,b); + dbox di = dintersect(a,b); + dbox du = dunion(a,b); + dbox dd = {0,0,0,0}; + + if(i <= 0 || 1) { + dd.dx = b.x - a.x; + dd.dy = b.y - a.y; + dd.dw = b.w - a.w; + dd.dh = b.h - a.h; + return dd; + } + + dd.dx = 2*pow((1-(i/u)),1)*(di.dx*u - du.dx*i)/(u*u); + dd.dy = 2*pow((1-(i/u)),1)*(di.dy*u - du.dy*i)/(u*u); + dd.dw = 2*pow((1-(i/u)),1)*(di.dw*u - du.dw*i)/(u*u); + dd.dh = 2*pow((1-(i/u)),1)*(di.dh*u - du.dh*i)/(u*u); + return dd; +} + + +void do_nms(box *boxes, float **probs, int total, int classes, float thresh) +{ + int i, j, k; + for(i = 0; i < total; ++i){ + int any = 0; + for(k = 0; k < classes; ++k) any = any || (probs[i][k] > 0); + if(!any) { + continue; + } + for(j = i+1; j < total; ++j){ + if (box_iou(boxes[i], boxes[j]) > thresh){ + for(k = 0; k < classes; ++k){ + if (probs[i][k] < probs[j][k]) probs[i][k] = 0; + else probs[j][k] = 0; + } + } + } + } +} + +box encode_box(box b, box anchor) +{ + box encode; + encode.x = (b.x - anchor.x) / anchor.w; + encode.y = (b.y - anchor.y) / anchor.h; + encode.w = log2(b.w / anchor.w); + encode.h = log2(b.h / anchor.h); + return encode; +} + +box decode_box(box b, box anchor) +{ + box decode; + decode.x = b.x * anchor.w + anchor.x; + decode.y = b.y * anchor.h + anchor.y; + decode.w = pow(2., b.w) * anchor.w; + decode.h = pow(2., b.h) * anchor.h; + return decode; +} diff --git a/workloads/realworld/async/darknet/src/box.h b/workloads/realworld/async/darknet/src/box.h new file mode 100644 index 0000000000000000000000000000000000000000..dda3e59100c3d9e0a6bb05a80070155d9fcbc876 --- /dev/null +++ b/workloads/realworld/async/darknet/src/box.h @@ -0,0 +1,14 @@ +#ifndef BOX_H +#define BOX_H +#include "darknet.h" + +typedef struct{ + float dx, dy, dw, dh; +} dbox; + +float box_rmse(box a, box b); +dbox diou(box a, box b); +box decode_box(box b, box anchor); +box encode_box(box b, box anchor); + +#endif diff --git a/workloads/realworld/async/darknet/src/classifier.h b/workloads/realworld/async/darknet/src/classifier.h new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/workloads/realworld/async/darknet/src/classifier.h @@ -0,0 +1 @@ + diff --git a/workloads/realworld/async/darknet/src/col2im.c b/workloads/realworld/async/darknet/src/col2im.c new file mode 100644 index 0000000000000000000000000000000000000000..5c4605e197439f79fe05c41337a5f2b8103f63ba --- /dev/null +++ b/workloads/realworld/async/darknet/src/col2im.c @@ -0,0 +1,39 @@ +#include +#include +void col2im_add_pixel(float *im, int height, int width, int channels, + int row, int col, int channel, int pad, float val) +{ + row -= pad; + col -= pad; + + if (row < 0 || col < 0 || + row >= height || col >= width) return; + im[col + width*(row + height*channel)] += val; +} +//This one might be too, can't remember. +void col2im_cpu(float* data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_im) +{ + int c,h,w; + int height_col = (height + 2*pad - ksize) / stride + 1; + int width_col = (width + 2*pad - ksize) / stride + 1; + + int channels_col = channels * ksize * ksize; + for (c = 0; c < channels_col; ++c) { + int w_offset = c % ksize; + int h_offset = (c / ksize) % ksize; + int c_im = c / ksize / ksize; + for (h = 0; h < height_col; ++h) { + for (w = 0; w < width_col; ++w) { + int im_row = h_offset + h * stride; + int im_col = w_offset + w * stride; + int col_index = (c * height_col + h) * width_col + w; + double val = data_col[col_index]; + col2im_add_pixel(data_im, height, width, channels, + im_row, im_col, c_im, pad, val); + } + } + } +} + diff --git a/workloads/realworld/async/darknet/src/col2im.h b/workloads/realworld/async/darknet/src/col2im.h new file mode 100644 index 0000000000000000000000000000000000000000..3fbe05307db65a1f511f801670a23734e21b7dff --- /dev/null +++ b/workloads/realworld/async/darknet/src/col2im.h @@ -0,0 +1,13 @@ +#ifndef COL2IM_H +#define COL2IM_H + +void col2im_cpu(float* data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_im); + +#ifdef GPU +void col2im_gpu(float *data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_im); +#endif +#endif diff --git a/workloads/realworld/async/darknet/src/col2im_kernels.cu b/workloads/realworld/async/darknet/src/col2im_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..30ea71e2c6ac0bb81235729c37568abbaa987d3d --- /dev/null +++ b/workloads/realworld/async/darknet/src/col2im_kernels.cu @@ -0,0 +1,58 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "col2im.h" +#include "cuda_dark.h" +} + +// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu +// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE + +__global__ void col2im_gpu_kernel(const int n, const float* data_col, + const int height, const int width, const int ksize, + const int pad, + const int stride, + const int height_col, const int width_col, + float *data_im) { + int index = blockIdx.x*blockDim.x+threadIdx.x; + for(; index < n; index += blockDim.x*gridDim.x){ + float val = 0; + int w = index % width + pad; + int h = (index / width) % height + pad; + int c = index / (width * height); + // compute the start and end of the output + int w_col_start = (w < ksize) ? 0 : (w - ksize) / stride + 1; + int w_col_end = min(w / stride + 1, width_col); + int h_col_start = (h < ksize) ? 0 : (h - ksize) / stride + 1; + int h_col_end = min(h / stride + 1, height_col); + // equivalent implementation + int offset = + (c * ksize * ksize + h * ksize + w) * height_col * width_col; + int coeff_h_col = (1 - stride * ksize * height_col) * width_col; + int coeff_w_col = (1 - stride * height_col * width_col); + for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { + for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { + val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col]; + } + } + data_im[index] += val; + } +} + +void col2im_gpu(float *data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_im){ + // We are going to launch channels * height_col * width_col kernels, each + // kernel responsible for copying a single-channel grid. + int height_col = (height + 2 * pad - ksize) / stride + 1; + int width_col = (width + 2 * pad - ksize) / stride + 1; + int num_kernels = channels * height * width; + col2im_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, + BLOCK>>>( + num_kernels, data_col, height, width, ksize, pad, + stride, height_col, + width_col, data_im); +} + diff --git a/workloads/realworld/async/darknet/src/compare.c b/workloads/realworld/async/darknet/src/compare.c new file mode 100644 index 0000000000000000000000000000000000000000..d2d2b3bdc675cf808f483d1607550e072e245396 --- /dev/null +++ b/workloads/realworld/async/darknet/src/compare.c @@ -0,0 +1,352 @@ +#include + +#include "network.h" +#include "detection_layer.h" +#include "cost_layer.h" +#include "utils.h" +#include "parser.h" +#include "box.h" + +void train_compare(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + list *plist = get_paths("data/compare.train.list"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + printf("%d\n", N); + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.classes = 20; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = COMPARE_DATA; + + load_thread = load_data_in_thread(args); + int epoch = *net.seen/N; + int i = 0; + while(1){ + ++i; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%.3f: %f, %f avg, %lf seconds, %ld images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen); + free_data(train); + if(i%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_%d_minor_%d.weights",backup_directory,base, epoch, i); + save_weights(net, buff); + } + if(*net.seen/N > epoch){ + epoch = *net.seen/N; + i = 0; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + if(epoch%22 == 0) net.learning_rate *= .1; + } + } + pthread_join(load_thread, 0); + free_data(buffer); + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_compare(char *filename, char *weightfile) +{ + int i = 0; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + + list *plist = get_paths("data/compare.val.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size/2; + free_list(plist); + + clock_t time; + int correct = 0; + int total = 0; + int splits = 10; + int num = (i+1)*N/splits - i*N/splits; + + data val, buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.classes = 20; + args.n = num; + args.m = 0; + args.d = &buffer; + args.type = COMPARE_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(i = 1; i <= splits; ++i){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + num = (i+1)*N/splits - i*N/splits; + char **part = paths+(i*N/splits); + if(i != splits){ + args.paths = part; + load_thread = load_data_in_thread(args); + } + printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + int j,k; + for(j = 0; j < val.y.rows; ++j){ + for(k = 0; k < 20; ++k){ + if(val.y.vals[j][k*2] != val.y.vals[j][k*2+1]){ + ++total; + if((val.y.vals[j][k*2] < val.y.vals[j][k*2+1]) == (pred.vals[j][k*2] < pred.vals[j][k*2+1])){ + ++correct; + } + } + } + } + free_matrix(pred); + printf("%d: Acc: %f, %lf seconds, %d images\n", i, (float)correct/total, sec(clock()-time), val.X.rows); + free_data(val); + } +} + +typedef struct { + network net; + char *filename; + int class; + int classes; + float elo; + float *elos; +} sortable_bbox; + +int total_compares = 0; +int current_class = 0; + +int elo_comparator(const void*a, const void *b) +{ + sortable_bbox box1 = *(sortable_bbox*)a; + sortable_bbox box2 = *(sortable_bbox*)b; + if(box1.elos[current_class] == box2.elos[current_class]) return 0; + if(box1.elos[current_class] > box2.elos[current_class]) return -1; + return 1; +} + +int bbox_comparator(const void *a, const void *b) +{ + ++total_compares; + sortable_bbox box1 = *(sortable_bbox*)a; + sortable_bbox box2 = *(sortable_bbox*)b; + network net = box1.net; + int class = box1.class; + + image im1 = load_image_color(box1.filename, net.w, net.h); + image im2 = load_image_color(box2.filename, net.w, net.h); + float *X = calloc(net.w*net.h*net.c, sizeof(float)); + memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); + memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); + float *predictions = network_predict(net, X); + + free_image(im1); + free_image(im2); + free(X); + if (predictions[class*2] > predictions[class*2+1]){ + return 1; + } + return -1; +} + +void bbox_update(sortable_bbox *a, sortable_bbox *b, int class, int result) +{ + int k = 32; + float EA = 1./(1+pow(10, (b->elos[class] - a->elos[class])/400.)); + float EB = 1./(1+pow(10, (a->elos[class] - b->elos[class])/400.)); + float SA = result ? 1 : 0; + float SB = result ? 0 : 1; + a->elos[class] += k*(SA - EA); + b->elos[class] += k*(SB - EB); +} + +void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, int class) +{ + image im1 = load_image_color(a->filename, net.w, net.h); + image im2 = load_image_color(b->filename, net.w, net.h); + float *X = calloc(net.w*net.h*net.c, sizeof(float)); + memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); + memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); + float *predictions = network_predict(net, X); + ++total_compares; + + int i; + for(i = 0; i < classes; ++i){ + if(class < 0 || class == i){ + int result = predictions[i*2] > predictions[i*2+1]; + bbox_update(a, b, i, result); + } + } + + free_image(im1); + free_image(im2); + free(X); +} + +void SortMaster3000(char *filename, char *weightfile) +{ + int i = 0; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + set_batch_network(&net, 1); + + list *plist = get_paths("data/compare.sort.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + free_list(plist); + sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); + printf("Sorting %d boxes...\n", N); + for(i = 0; i < N; ++i){ + boxes[i].filename = paths[i]; + boxes[i].net = net; + boxes[i].class = 7; + boxes[i].elo = 1500; + } + clock_t time=clock(); + qsort(boxes, N, sizeof(sortable_bbox), bbox_comparator); + for(i = 0; i < N; ++i){ + printf("%s\n", boxes[i].filename); + } + printf("Sorted in %d compares, %f secs\n", total_compares, sec(clock()-time)); +} + +void BattleRoyaleWithCheese(char *filename, char *weightfile) +{ + int classes = 20; + int i,j; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + set_batch_network(&net, 1); + + list *plist = get_paths("data/compare.sort.list"); + //list *plist = get_paths("data/compare.small.list"); + //list *plist = get_paths("data/compare.cat.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + int total = N; + free_list(plist); + sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); + printf("Battling %d boxes...\n", N); + for(i = 0; i < N; ++i){ + boxes[i].filename = paths[i]; + boxes[i].net = net; + boxes[i].classes = classes; + boxes[i].elos = calloc(classes, sizeof(float));; + for(j = 0; j < classes; ++j){ + boxes[i].elos[j] = 1500; + } + } + int round; + clock_t time=clock(); + for(round = 1; round <= 4; ++round){ + clock_t round_time=clock(); + printf("Round: %d\n", round); + shuffle(boxes, N, sizeof(sortable_bbox)); + for(i = 0; i < N/2; ++i){ + bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, -1); + } + printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N); + } + + int class; + + for (class = 0; class < classes; ++class){ + + N = total; + current_class = class; + qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); + N /= 2; + + for(round = 1; round <= 100; ++round){ + clock_t round_time=clock(); + printf("Round: %d\n", round); + + sorta_shuffle(boxes, N, sizeof(sortable_bbox), 10); + for(i = 0; i < N/2; ++i){ + bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, class); + } + qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); + if(round <= 20) N = (N*9/10)/2*2; + + printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N); + } + char buff[256]; + sprintf(buff, "results/battle_%d.log", class); + FILE *outfp = fopen(buff, "w"); + for(i = 0; i < N; ++i){ + fprintf(outfp, "%s %f\n", boxes[i].filename, boxes[i].elos[class]); + } + fclose(outfp); + } + printf("Tournament in %d compares, %f secs\n", total_compares, sec(clock()-time)); +} + +void run_compare(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + //char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_compare(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_compare(cfg, weights); + else if(0==strcmp(argv[2], "sort")) SortMaster3000(cfg, weights); + else if(0==strcmp(argv[2], "battle")) BattleRoyaleWithCheese(cfg, weights); + /* + else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); + else if(0==strcmp(argv[2], "extract")) extract_boxes(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_recall(cfg, weights); + */ +} diff --git a/workloads/realworld/async/darknet/src/connected_layer.c b/workloads/realworld/async/darknet/src/connected_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..ec25b44d998661c4735cd9a8a86f2355a0ae0080 --- /dev/null +++ b/workloads/realworld/async/darknet/src/connected_layer.c @@ -0,0 +1,336 @@ +#include "connected_layer.h" +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam) +{ + int i; + layer l = {0}; + l.learning_rate_scale = 1; + l.type = CONNECTED; + + l.inputs = inputs; + l.outputs = outputs; + l.batch=batch; + l.batch_normalize = batch_normalize; + l.h = 1; + l.w = 1; + l.c = inputs; + l.out_h = 1; + l.out_w = 1; + l.out_c = outputs; + + l.output = calloc(batch*outputs, sizeof(float)); + l.delta = calloc(batch*outputs, sizeof(float)); + + l.weight_updates = calloc(inputs*outputs, sizeof(float)); + l.bias_updates = calloc(outputs, sizeof(float)); + + l.weights = calloc(outputs*inputs, sizeof(float)); + l.biases = calloc(outputs, sizeof(float)); + + l.forward = forward_connected_layer; + l.backward = backward_connected_layer; + l.update = update_connected_layer; + + //float scale = 1./sqrt(inputs); + float scale = sqrt(2./inputs); + for(i = 0; i < outputs*inputs; ++i){ + l.weights[i] = scale*rand_uniform(-1, 1); + } + + for(i = 0; i < outputs; ++i){ + l.biases[i] = 0; + } + + if(adam){ + l.m = calloc(l.inputs*l.outputs, sizeof(float)); + l.v = calloc(l.inputs*l.outputs, sizeof(float)); + l.bias_m = calloc(l.outputs, sizeof(float)); + l.scale_m = calloc(l.outputs, sizeof(float)); + l.bias_v = calloc(l.outputs, sizeof(float)); + l.scale_v = calloc(l.outputs, sizeof(float)); + } + if(batch_normalize){ + l.scales = calloc(outputs, sizeof(float)); + l.scale_updates = calloc(outputs, sizeof(float)); + for(i = 0; i < outputs; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(outputs, sizeof(float)); + l.mean_delta = calloc(outputs, sizeof(float)); + l.variance = calloc(outputs, sizeof(float)); + l.variance_delta = calloc(outputs, sizeof(float)); + + l.rolling_mean = calloc(outputs, sizeof(float)); + l.rolling_variance = calloc(outputs, sizeof(float)); + + l.x = calloc(batch*outputs, sizeof(float)); + l.x_norm = calloc(batch*outputs, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_connected_layer_gpu; + l.backward_gpu = backward_connected_layer_gpu; + l.update_gpu = update_connected_layer_gpu; + + l.weights_gpu = cuda_make_array(l.weights, outputs*inputs); + l.biases_gpu = cuda_make_array(l.biases, outputs); + + l.weight_updates_gpu = cuda_make_array(l.weight_updates, outputs*inputs); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, outputs); + + l.output_gpu = cuda_make_array(l.output, outputs*batch); + l.delta_gpu = cuda_make_array(l.delta, outputs*batch); + if (adam) { + l.m_gpu = cuda_make_array(0, inputs*outputs); + l.v_gpu = cuda_make_array(0, inputs*outputs); + l.bias_m_gpu = cuda_make_array(0, outputs); + l.bias_v_gpu = cuda_make_array(0, outputs); + l.scale_m_gpu = cuda_make_array(0, outputs); + l.scale_v_gpu = cuda_make_array(0, outputs); + } + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(l.mean, outputs); + l.variance_gpu = cuda_make_array(l.variance, outputs); + + l.rolling_mean_gpu = cuda_make_array(l.mean, outputs); + l.rolling_variance_gpu = cuda_make_array(l.variance, outputs); + + l.mean_delta_gpu = cuda_make_array(l.mean, outputs); + l.variance_delta_gpu = cuda_make_array(l.variance, outputs); + + l.scales_gpu = cuda_make_array(l.scales, outputs); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, outputs); + + l.x_gpu = cuda_make_array(l.output, l.batch*outputs); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*outputs); +#ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); +#endif + } +#endif + l.activation = activation; + fprintf(stderr, "connected %4d -> %4d\n", inputs, outputs); + return l; +} + +void update_connected_layer(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.outputs, momentum, l.bias_updates, 1); + + if(l.batch_normalize){ + axpy_cpu(l.outputs, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.outputs, momentum, l.scale_updates, 1); + } + + axpy_cpu(l.inputs*l.outputs, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(l.inputs*l.outputs, momentum, l.weight_updates, 1); +} + +void forward_connected_layer(layer l, network net) +{ + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + int m = l.batch; + int k = l.inputs; + int n = l.outputs; + float *a = net.input; + float *b = l.weights; + float *c = l.output; + gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); + if(l.batch_normalize){ + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.outputs, 1); + } + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_connected_layer(layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.outputs, 1); + } + + int m = l.outputs; + int k = l.batch; + int n = l.inputs; + float *a = l.delta; + float *b = net.input; + float *c = l.weight_updates; + gemm(1,0,m,n,k,1,a,m,b,n,1,c,n); + + m = l.batch; + k = l.outputs; + n = l.inputs; + + a = l.delta; + b = l.weights; + c = net.delta; + + if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); +} + + +void denormalize_connected_layer(layer l) +{ + int i, j; + for(i = 0; i < l.outputs; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .000001); + for(j = 0; j < l.inputs; ++j){ + l.weights[i*l.inputs + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + + +void statistics_connected_layer(layer l) +{ + if(l.batch_normalize){ + printf("Scales "); + print_statistics(l.scales, l.outputs); + /* + printf("Rolling Mean "); + print_statistics(l.rolling_mean, l.outputs); + printf("Rolling Variance "); + print_statistics(l.rolling_variance, l.outputs); + */ + } + printf("Biases "); + print_statistics(l.biases, l.outputs); + printf("Weights "); + print_statistics(l.weights, l.outputs); +} + +#ifdef GPU + +void pull_connected_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.inputs*l.outputs); + cuda_pull_array(l.biases_gpu, l.biases, l.outputs); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.outputs); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs); + } +} + +void push_connected_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.inputs*l.outputs); + cuda_push_array(l.biases_gpu, l.biases, l.outputs); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.outputs); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs); + } +} + +void update_connected_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.inputs*l.outputs, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.outputs, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.outputs, batch, a.t); + } + }else{ + axpy_gpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.outputs, momentum, l.bias_updates_gpu, 1); + + if(l.batch_normalize){ + axpy_gpu(l.outputs, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.outputs, momentum, l.scale_updates_gpu, 1); + } + + axpy_gpu(l.inputs*l.outputs, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.inputs*l.outputs, momentum, l.weight_updates_gpu, 1); + } +} + +void forward_connected_layer_gpu(layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + + int m = l.batch; + int k = l.inputs; + int n = l.outputs; + float * a = net.input_gpu; + float * b = l.weights_gpu; + float * c = l.output_gpu; + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.outputs, 1); + } + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_connected_layer_gpu(layer l, network net) +{ + constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.outputs, 1); + } + + int m = l.outputs; + int k = l.batch; + int n = l.inputs; + float * a = l.delta_gpu; + float * b = net.input_gpu; + float * c = l.weight_updates_gpu; + gemm_gpu(1,0,m,n,k,1,a,m,b,n,1,c,n); + + m = l.batch; + k = l.outputs; + n = l.inputs; + + a = l.delta_gpu; + b = l.weights_gpu; + c = net.delta_gpu; + + if(c) gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); +} +#endif diff --git a/workloads/realworld/async/darknet/src/connected_layer.h b/workloads/realworld/async/darknet/src/connected_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..6727a964eaa923906b202ff337aa69ad91817117 --- /dev/null +++ b/workloads/realworld/async/darknet/src/connected_layer.h @@ -0,0 +1,23 @@ +#ifndef CONNECTED_LAYER_H +#define CONNECTED_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam); + +void forward_connected_layer(layer l, network net); +void backward_connected_layer(layer l, network net); +void update_connected_layer(layer l, update_args a); + +#ifdef GPU +void forward_connected_layer_gpu(layer l, network net); +void backward_connected_layer_gpu(layer l, network net); +void update_connected_layer_gpu(layer l, update_args a); +void push_connected_layer(layer l); +void pull_connected_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/async/darknet/src/convolutional_kernels.cu b/workloads/realworld/async/darknet/src/convolutional_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..ed9d76e49548e4636c545d4e5d33ddc9b63e5905 --- /dev/null +++ b/workloads/realworld/async/darknet/src/convolutional_kernels.cu @@ -0,0 +1,330 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "gemm.h" +#include "blas.h" +#include "im2col.h" +#include "col2im.h" +#include "utils.h" +#include "cuda_dark.h" +} + +__global__ void binarize_kernel(float *x, int n, float *binary) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= n) return; + binary[i] = (x[i] >= 0) ? 1 : -1; +} + +void binarize_gpu(float *x, int n, float *binary) +{ + binarize_kernel<<>>(x, n, binary); + check_error(cudaPeekAtLastError()); +} + +__global__ void binarize_input_kernel(float *input, int n, int size, float *binary) +{ + int s = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (s >= size) return; + int i = 0; + float mean = 0; + for(i = 0; i < n; ++i){ + mean += fabsf(input[i*size + s]); + } + mean = mean / n; + for(i = 0; i < n; ++i){ + binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean; + } +} + +void binarize_input_gpu(float *input, int n, int size, float *binary) +{ + binarize_input_kernel<<>>(input, n, size, binary); + check_error(cudaPeekAtLastError()); +} + + +__global__ void binarize_weights_kernel(float *weights, int n, int size, float *binary) +{ + int f = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (f >= n) return; + int i = 0; + float mean = 0; + for(i = 0; i < size; ++i){ + mean += fabsf(weights[f*size + i]); + } + mean = mean / size; + for(i = 0; i < size; ++i){ + binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean; + //binary[f*size + i] = weights[f*size + i]; + } +} + +void binarize_weights_gpu(float *weights, int n, int size, float *binary) +{ + binarize_weights_kernel<<>>(weights, n, size, binary); + check_error(cudaPeekAtLastError()); +} + +void forward_convolutional_layer_gpu(convolutional_layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + if(l.binary){ + binarize_weights_gpu(l.weights_gpu, l.n, l.c/l.groups*l.size*l.size, l.binary_weights_gpu); + swap_binary(&l); + } + + if(l.xnor){ + binarize_weights_gpu(l.weights_gpu, l.n, l.c/l.groups*l.size*l.size, l.binary_weights_gpu); + swap_binary(&l); + binarize_gpu(net.input_gpu, l.c*l.h*l.w*l.batch, l.binary_input_gpu); + net.input_gpu = l.binary_input_gpu; + } + +#ifdef CUDNN + float one = 1; + cudnnConvolutionForward(cudnn_handle(), + &one, + l.srcTensorDesc, + net.input_gpu, + l.weightDesc, + l.weights_gpu, + l.convDesc, + l.fw_algo, + net.workspace, + l.workspace_size, + &one, + l.dstTensorDesc, + l.output_gpu); + +#else + int i, j; + int m = l.n/l.groups; + int k = l.size*l.size*l.c/l.groups; + int n = l.out_w*l.out_h; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.weights_gpu + j*l.nweights/l.groups; + float *b = net.workspace; + float *c = l.output_gpu + (i*l.groups + j)*n*m; + float *im = net.input_gpu + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if (l.size == 1){ + b = im; + } else { + im2col_gpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + } + gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +#endif + + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); + } + + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); + //if(l.dot > 0) dot_error_gpu(l); + if(l.binary || l.xnor) swap_binary(&l); +} + +__global__ void smooth_kernel(float *x, int n, int w, int h, int c, int size, float rate, float *delta) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int j = id % w; + id /= w; + int i = id % h; + id /= h; + int k = id % c; + id /= c; + int b = id; + + int w_offset = -(size/2.f); + int h_offset = -(size/2.f); + + int out_index = j + w*(i + h*(k + c*b)); + int l, m; + for(l = 0; l < size; ++l){ + for(m = 0; m < size; ++m){ + int cur_h = h_offset + i + l; + int cur_w = w_offset + j + m; + int index = cur_w + w*(cur_h + h*(k + b*c)); + int valid = (cur_h >= 0 && cur_h < h && + cur_w >= 0 && cur_w < w); + delta[out_index] += valid ? rate*(x[index] - x[out_index]) : 0; + } + } +} + +extern "C" void smooth_layer(layer l, int size, float rate) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.out_c; + + size_t n = h*w*c*l.batch; + + smooth_kernel<<>>(l.output_gpu, n, l.w, l.h, l.c, size, rate, l.delta_gpu); + check_error(cudaPeekAtLastError()); +} + +void backward_convolutional_layer_gpu(convolutional_layer l, network net) +{ + if(l.smooth){ + smooth_layer(l, 5, l.smooth); + } + //constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + + + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); + } + float *original_input = net.input_gpu; + + if(l.xnor) net.input_gpu = l.binary_input_gpu; +#ifdef CUDNN + float one = 1; + cudnnConvolutionBackwardFilter(cudnn_handle(), + &one, + l.srcTensorDesc, + net.input_gpu, + l.ddstTensorDesc, + l.delta_gpu, + l.convDesc, + l.bf_algo, + net.workspace, + l.workspace_size, + &one, + l.dweightDesc, + l.weight_updates_gpu); + + if(net.delta_gpu){ + if(l.binary || l.xnor) swap_binary(&l); + cudnnConvolutionBackwardData(cudnn_handle(), + &one, + l.weightDesc, + l.weights_gpu, + l.ddstTensorDesc, + l.delta_gpu, + l.convDesc, + l.bd_algo, + net.workspace, + l.workspace_size, + &one, + l.dsrcTensorDesc, + net.delta_gpu); + if(l.binary || l.xnor) swap_binary(&l); + if(l.xnor) gradient_array_gpu(original_input, l.batch*l.c*l.h*l.w, HARDTAN, net.delta_gpu); + } + +#else + int m = l.n/l.groups; + int n = l.size*l.size*l.c/l.groups; + int k = l.out_w*l.out_h; + + int i, j; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.delta_gpu + (i*l.groups + j)*m*k; + float *b = net.workspace; + float *c = l.weight_updates_gpu + j*l.nweights/l.groups; + + float *im = net.input_gpu+(i*l.groups + j)*l.c/l.groups*l.h*l.w; + float *imd = net.delta_gpu+(i*l.groups + j)*l.c/l.groups*l.h*l.w; + + im2col_gpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (net.delta_gpu) { + if (l.binary || l.xnor) swap_binary(&l); + a = l.weights_gpu + j*l.nweights/l.groups; + b = l.delta_gpu + (i*l.groups + j)*m*k; + c = net.workspace; + if (l.size == 1) { + c = imd; + } + + gemm_gpu(1,0,n,k,m,1,a,n,b,k,0,c,k); + + if (l.size != 1) { + col2im_gpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd); + } + if(l.binary || l.xnor) { + swap_binary(&l); + } + } + if(l.xnor) gradient_array_gpu(original_input + i*l.c*l.h*l.w, l.c*l.h*l.w, HARDTAN, net.delta_gpu + i*l.c*l.h*l.w); + } + } +#endif +} + +void pull_convolutional_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.nweights); + cuda_pull_array(l.biases_gpu, l.biases, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.n); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void push_convolutional_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.nweights); + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.n); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void update_convolutional_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + } + }else{ + axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1); + + axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.n, momentum, l.bias_updates_gpu, 1); + + if(l.scales_gpu){ + axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.n, momentum, l.scale_updates_gpu, 1); + } + } + if(l.clip){ + constrain_gpu(l.nweights, l.clip, l.weights_gpu, 1); + } +} + + diff --git a/workloads/realworld/async/darknet/src/convolutional_layer.c b/workloads/realworld/async/darknet/src/convolutional_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..1fb58b0933b06f2b27ec89f9f7c05f0b2b8a87eb --- /dev/null +++ b/workloads/realworld/async/darknet/src/convolutional_layer.c @@ -0,0 +1,622 @@ +#include "convolutional_layer.h" +#include "utils.h" +#include "batchnorm_layer.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" +#include +#include + +#ifdef AI2 +#include "xnor_layer.h" +#endif + +void swap_binary(convolutional_layer *l) +{ + float *swap = l->weights; + l->weights = l->binary_weights; + l->binary_weights = swap; + +#ifdef GPU + swap = l->weights_gpu; + l->weights_gpu = l->binary_weights_gpu; + l->binary_weights_gpu = swap; +#endif +} + +void binarize_weights(float *weights, int n, int size, float *binary) +{ + int i, f; + for(f = 0; f < n; ++f){ + float mean = 0; + for(i = 0; i < size; ++i){ + mean += fabs(weights[f*size + i]); + } + mean = mean / size; + for(i = 0; i < size; ++i){ + binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean; + } + } +} + +void binarize_cpu(float *input, int n, float *binary) +{ + int i; + for(i = 0; i < n; ++i){ + binary[i] = (input[i] > 0) ? 1 : -1; + } +} + +void binarize_input(float *input, int n, int size, float *binary) +{ + int i, s; + for(s = 0; s < size; ++s){ + float mean = 0; + for(i = 0; i < n; ++i){ + mean += fabs(input[i*size + s]); + } + mean = mean / n; + for(i = 0; i < n; ++i){ + binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean; + } + } +} + +int convolutional_out_height(convolutional_layer l) +{ + return (l.h + 2*l.pad - l.size) / l.stride + 1; +} + +int convolutional_out_width(convolutional_layer l) +{ + return (l.w + 2*l.pad - l.size) / l.stride + 1; +} + +image get_convolutional_image(convolutional_layer l) +{ + return float_to_image(l.out_w,l.out_h,l.out_c,l.output); +} + +image get_convolutional_delta(convolutional_layer l) +{ + return float_to_image(l.out_w,l.out_h,l.out_c,l.delta); +} + +static size_t get_workspace_size(layer l){ +#ifdef CUDNN + if(gpu_index >= 0){ + size_t most = 0; + size_t s = 0; + cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle(), + l.srcTensorDesc, + l.weightDesc, + l.convDesc, + l.dstTensorDesc, + l.fw_algo, + &s); + if (s > most) most = s; + cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnn_handle(), + l.srcTensorDesc, + l.ddstTensorDesc, + l.convDesc, + l.dweightDesc, + l.bf_algo, + &s); + if (s > most) most = s; + cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(), + l.weightDesc, + l.ddstTensorDesc, + l.convDesc, + l.dsrcTensorDesc, + l.bd_algo, + &s); + if (s > most) most = s; + return most; + } +#endif + return (size_t)l.out_h*l.out_w*l.size*l.size*l.c/l.groups*sizeof(float); +} + +#ifdef GPU +#ifdef CUDNN +void cudnn_convolutional_setup(layer *l) +{ + cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); + cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + + cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + + cudnnSetFilter4dDescriptor(l->dweightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); + cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); + #if CUDNN_MAJOR >= 6 + cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT); + #else + cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION); + #endif + + #if CUDNN_MAJOR >= 7 + cudnnSetConvolutionGroupCount(l->convDesc, l->groups); + #else + if(l->groups > 1){ + error("CUDNN < 7 doesn't support groups, please upgrade!"); + } + #endif + + cudnnGetConvolutionForwardAlgorithm(cudnn_handle(), + l->srcTensorDesc, + l->weightDesc, + l->convDesc, + l->dstTensorDesc, + CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->fw_algo); + cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(), + l->weightDesc, + l->ddstTensorDesc, + l->convDesc, + l->dsrcTensorDesc, + CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->bd_algo); + cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(), + l->srcTensorDesc, + l->ddstTensorDesc, + l->convDesc, + l->dweightDesc, + CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->bf_algo); +} +#endif +#endif + +convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam) +{ + int i; + convolutional_layer l = {0}; + l.type = CONVOLUTIONAL; + + l.groups = groups; + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.binary = binary; + l.xnor = xnor; + l.batch = batch; + l.stride = stride; + l.size = size; + l.pad = padding; + l.batch_normalize = batch_normalize; + + l.weights = calloc(c/groups*n*size*size, sizeof(float)); + l.weight_updates = calloc(c/groups*n*size*size, sizeof(float)); + + l.biases = calloc(n, sizeof(float)); + l.bias_updates = calloc(n, sizeof(float)); + + l.nweights = c/groups*n*size*size; + l.nbiases = n; + + // float scale = 1./sqrt(size*size*c); + float scale = sqrt(2./(size*size*c/l.groups)); + //printf("convscale %f\n", scale); + //scale = .02; + //for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1); + for(i = 0; i < l.nweights; ++i) l.weights[i] = scale*rand_normal(); + int out_w = convolutional_out_width(l); + int out_h = convolutional_out_height(l); + l.out_h = out_h; + l.out_w = out_w; + l.out_c = n; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = l.w * l.h * l.c; + + l.output = calloc(l.batch*l.outputs, sizeof(float)); + l.delta = calloc(l.batch*l.outputs, sizeof(float)); + + l.forward = forward_convolutional_layer; + l.backward = backward_convolutional_layer; + l.update = update_convolutional_layer; + if(binary){ + l.binary_weights = calloc(l.nweights, sizeof(float)); + l.cweights = calloc(l.nweights, sizeof(char)); + l.scales = calloc(n, sizeof(float)); + } + if(xnor){ + l.binary_weights = calloc(l.nweights, sizeof(float)); + l.binary_input = calloc(l.inputs*l.batch, sizeof(float)); + } + + if(batch_normalize){ + l.scales = calloc(n, sizeof(float)); + l.scale_updates = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(n, sizeof(float)); + l.variance = calloc(n, sizeof(float)); + + l.mean_delta = calloc(n, sizeof(float)); + l.variance_delta = calloc(n, sizeof(float)); + + l.rolling_mean = calloc(n, sizeof(float)); + l.rolling_variance = calloc(n, sizeof(float)); + l.x = calloc(l.batch*l.outputs, sizeof(float)); + l.x_norm = calloc(l.batch*l.outputs, sizeof(float)); + } + if(adam){ + l.m = calloc(l.nweights, sizeof(float)); + l.v = calloc(l.nweights, sizeof(float)); + l.bias_m = calloc(n, sizeof(float)); + l.scale_m = calloc(n, sizeof(float)); + l.bias_v = calloc(n, sizeof(float)); + l.scale_v = calloc(n, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_convolutional_layer_gpu; + l.backward_gpu = backward_convolutional_layer_gpu; + l.update_gpu = update_convolutional_layer_gpu; + + if(gpu_index >= 0){ + if (adam) { + l.m_gpu = cuda_make_array(l.m, l.nweights); + l.v_gpu = cuda_make_array(l.v, l.nweights); + l.bias_m_gpu = cuda_make_array(l.bias_m, n); + l.bias_v_gpu = cuda_make_array(l.bias_v, n); + l.scale_m_gpu = cuda_make_array(l.scale_m, n); + l.scale_v_gpu = cuda_make_array(l.scale_v, n); + } + + l.weights_gpu = cuda_make_array(l.weights, l.nweights); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights); + + l.biases_gpu = cuda_make_array(l.biases, n); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + + if(binary){ + l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights); + } + if(xnor){ + l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights); + l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch); + } + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(l.mean, n); + l.variance_gpu = cuda_make_array(l.variance, n); + + l.rolling_mean_gpu = cuda_make_array(l.mean, n); + l.rolling_variance_gpu = cuda_make_array(l.variance, n); + + l.mean_delta_gpu = cuda_make_array(l.mean, n); + l.variance_delta_gpu = cuda_make_array(l.variance, n); + + l.scales_gpu = cuda_make_array(l.scales, n); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); + + l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + } +#ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.srcTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnCreateFilterDescriptor(&l.weightDesc); + cudnnCreateTensorDescriptor(&l.dsrcTensorDesc); + cudnnCreateTensorDescriptor(&l.ddstTensorDesc); + cudnnCreateFilterDescriptor(&l.dweightDesc); + cudnnCreateConvolutionDescriptor(&l.convDesc); + cudnn_convolutional_setup(&l); +#endif + } +#endif + l.workspace_size = get_workspace_size(l); + l.activation = activation; + + fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BFLOPs\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, (2.0 * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w)/1000000000.); + + return l; +} + +void denormalize_convolutional_layer(convolutional_layer l) +{ + int i, j; + for(i = 0; i < l.n; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001); + for(j = 0; j < l.c/l.groups*l.size*l.size; ++j){ + l.weights[i*l.c/l.groups*l.size*l.size + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + +/* +void test_convolutional_layer() +{ + convolutional_layer l = make_convolutional_layer(1, 5, 5, 3, 2, 5, 2, 1, LEAKY, 1, 0, 0, 0); + l.batch_normalize = 1; + float data[] = {1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3}; + //net.input = data; + //forward_convolutional_layer(l); +} +*/ + +void resize_convolutional_layer(convolutional_layer *l, int w, int h) +{ + l->w = w; + l->h = h; + int out_w = convolutional_out_width(*l); + int out_h = convolutional_out_height(*l); + + l->out_w = out_w; + l->out_h = out_h; + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->w * l->h * l->c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + if(l->batch_normalize){ + l->x = realloc(l->x, l->batch*l->outputs*sizeof(float)); + l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float)); + } + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); + + if(l->batch_normalize){ + cuda_free(l->x_gpu); + cuda_free(l->x_norm_gpu); + + l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); + } +#ifdef CUDNN + cudnn_convolutional_setup(l); +#endif +#endif + l->workspace_size = get_workspace_size(*l); +} + +void add_bias(float *output, float *biases, int batch, int n, int size) +{ + int i,j,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + for(j = 0; j < size; ++j){ + output[(b*n + i)*size + j] += biases[i]; + } + } + } +} + +void scale_bias(float *output, float *scales, int batch, int n, int size) +{ + int i,j,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + for(j = 0; j < size; ++j){ + output[(b*n + i)*size + j] *= scales[i]; + } + } + } +} + +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size) +{ + int i,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + bias_updates[i] += sum_array(delta+size*(i+b*n), size); + } + } +} + +void forward_convolutional_layer(convolutional_layer l, network net) +{ + int i, j; + + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + + if(l.xnor){ + binarize_weights(l.weights, l.n, l.c/l.groups*l.size*l.size, l.binary_weights); + swap_binary(&l); + binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input); + net.input = l.binary_input; + } + + int m = l.n/l.groups; + int k = l.size*l.size*l.c/l.groups; + int n = l.out_w*l.out_h; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.weights + j*l.nweights/l.groups; + float *b = net.workspace; + float *c = l.output + (i*l.groups + j)*n*m; + float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if (l.size == 1) { + b = im; + } else { + im2col_cpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + } + gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } + + if(l.batch_normalize){ + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w); + } + + activate_array(l.output, l.outputs*l.batch, l.activation); + if(l.binary || l.xnor) swap_binary(&l); +} + +void backward_convolutional_layer(convolutional_layer l, network net) +{ + int i, j; + int m = l.n/l.groups; + int n = l.size*l.size*l.c/l.groups; + int k = l.out_w*l.out_h; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.n, k); + } + + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.delta + (i*l.groups + j)*m*k; + float *b = net.workspace; + float *c = l.weight_updates + j*l.nweights/l.groups; + + float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + float *imd = net.delta + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if(l.size == 1){ + b = im; + } else { + im2col_cpu(im, l.c/l.groups, l.h, l.w, + l.size, l.stride, l.pad, b); + } + + gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (net.delta) { + a = l.weights + j*l.nweights/l.groups; + b = l.delta + (i*l.groups + j)*m*k; + c = net.workspace; + if (l.size == 1) { + c = imd; + } + + gemm(1,0,n,k,m,1,a,n,b,k,0,c,k); + + if (l.size != 1) { + col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd); + } + } + } + } +} + +void update_convolutional_layer(convolutional_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.n, momentum, l.bias_updates, 1); + + if(l.scales){ + axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.n, momentum, l.scale_updates, 1); + } + + axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(l.nweights, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(l.nweights, momentum, l.weight_updates, 1); +} + + +image get_convolutional_weight(convolutional_layer l, int i) +{ + int h = l.size; + int w = l.size; + int c = l.c/l.groups; + return float_to_image(w,h,c,l.weights+i*h*w*c); +} + +void rgbgr_weights(convolutional_layer l) +{ + int i; + for(i = 0; i < l.n; ++i){ + image im = get_convolutional_weight(l, i); + if (im.c == 3) { + rgbgr_image(im); + } + } +} + +void rescale_weights(convolutional_layer l, float scale, float trans) +{ + int i; + for(i = 0; i < l.n; ++i){ + image im = get_convolutional_weight(l, i); + if (im.c == 3) { + scale_image(im, scale); + float sum = sum_array(im.data, im.w*im.h*im.c); + l.biases[i] += sum*trans; + } + } +} + +image *get_weights(convolutional_layer l) +{ + image *weights = calloc(l.n, sizeof(image)); + int i; + for(i = 0; i < l.n; ++i){ + weights[i] = copy_image(get_convolutional_weight(l, i)); + normalize_image(weights[i]); + /* + char buff[256]; + sprintf(buff, "filter%d", i); + save_image(weights[i], buff); + */ + } + //error("hey"); + return weights; +} + +image *visualize_convolutional_layer(convolutional_layer l, char *window, image *prev_weights) +{ + image *single_weights = get_weights(l); + show_images(single_weights, l.n, window); + + image delta = get_convolutional_image(l); + image dc = collapse_image_layers(delta, 1); + char buff[256]; + sprintf(buff, "%s: Output", window); + //show_image(dc, buff); + //save_image(dc, buff); + free_image(dc); + return single_weights; +} + diff --git a/workloads/realworld/async/darknet/src/convolutional_layer.h b/workloads/realworld/async/darknet/src/convolutional_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..baacf38f4127a42abe009ef8aa3b59543433a286 --- /dev/null +++ b/workloads/realworld/async/darknet/src/convolutional_layer.h @@ -0,0 +1,50 @@ +#ifndef CONVOLUTIONAL_LAYER_H +#define CONVOLUTIONAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +typedef layer convolutional_layer; + +#ifdef GPU +void forward_convolutional_layer_gpu(convolutional_layer layer, network net); +void backward_convolutional_layer_gpu(convolutional_layer layer, network net); +void update_convolutional_layer_gpu(convolutional_layer layer, update_args a); + +void push_convolutional_layer(convolutional_layer layer); +void pull_convolutional_layer(convolutional_layer layer); + +void add_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); +void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t); +#ifdef CUDNN +void cudnn_convolutional_setup(layer *l); +#endif +#endif + +convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam); +void resize_convolutional_layer(convolutional_layer *layer, int w, int h); +void forward_convolutional_layer(const convolutional_layer layer, network net); +void update_convolutional_layer(convolutional_layer layer, update_args a); +image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_weights); +void binarize_weights(float *weights, int n, int size, float *binary); +void swap_binary(convolutional_layer *l); +void binarize_weights2(float *weights, int n, int size, char *binary, float *scales); + +void backward_convolutional_layer(convolutional_layer layer, network net); + +void add_bias(float *output, float *biases, int batch, int n, int size); +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); + +image get_convolutional_image(convolutional_layer layer); +image get_convolutional_delta(convolutional_layer layer); +image get_convolutional_weight(convolutional_layer layer, int i); + +int convolutional_out_height(convolutional_layer layer); +int convolutional_out_width(convolutional_layer layer); + +#endif + diff --git a/workloads/realworld/async/darknet/src/cost_layer.c b/workloads/realworld/async/darknet/src/cost_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..85fa85daf306dda03c113a6bbdc2d92b25d0b00d --- /dev/null +++ b/workloads/realworld/async/darknet/src/cost_layer.c @@ -0,0 +1,176 @@ +#include "cost_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include +#include +#include +#include + +COST_TYPE get_cost_type(char *s) +{ + if (strcmp(s, "seg")==0) return SEG; + if (strcmp(s, "sse")==0) return SSE; + if (strcmp(s, "masked")==0) return MASKED; + if (strcmp(s, "smooth")==0) return SMOOTH; + if (strcmp(s, "L1")==0) return L1; + if (strcmp(s, "wgan")==0) return WGAN; + fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s); + return SSE; +} + +char *get_cost_string(COST_TYPE a) +{ + switch(a){ + case SEG: + return "seg"; + case SSE: + return "sse"; + case MASKED: + return "masked"; + case SMOOTH: + return "smooth"; + case L1: + return "L1"; + case WGAN: + return "wgan"; + } + return "sse"; +} + +cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale) +{ + fprintf(stderr, "cost %4d\n", inputs); + cost_layer l = {0}; + l.type = COST; + + l.scale = scale; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.cost_type = cost_type; + l.delta = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_cost_layer; + l.backward = backward_cost_layer; + #ifdef GPU + l.forward_gpu = forward_cost_layer_gpu; + l.backward_gpu = backward_cost_layer_gpu; + + l.delta_gpu = cuda_make_array(l.output, inputs*batch); + l.output_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void resize_cost_layer(cost_layer *l, int inputs) +{ + l->inputs = inputs; + l->outputs = inputs; + l->delta = realloc(l->delta, inputs*l->batch*sizeof(float)); + l->output = realloc(l->output, inputs*l->batch*sizeof(float)); +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch); + l->output_gpu = cuda_make_array(l->output, inputs*l->batch); +#endif +} + +void forward_cost_layer(cost_layer l, network net) +{ + if (!net.truth) return; + if(l.cost_type == MASKED){ + int i; + for(i = 0; i < l.batch*l.inputs; ++i){ + if(net.truth[i] == SECRET_NUM) net.input[i] = SECRET_NUM; + } + } + if(l.cost_type == SMOOTH){ + smooth_l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + }else if(l.cost_type == L1){ + l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + } else { + l2_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + } + l.cost[0] = sum_array(l.output, l.batch*l.inputs); +} + +void backward_cost_layer(const cost_layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_cost_layer(cost_layer l) +{ + cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +void push_cost_layer(cost_layer l) +{ + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +int float_abs_compare (const void * a, const void * b) +{ + float fa = *(const float*) a; + if(fa < 0) fa = -fa; + float fb = *(const float*) b; + if(fb < 0) fb = -fb; + return (fa > fb) - (fa < fb); +} + +void forward_cost_layer_gpu(cost_layer l, network net) +{ + if (!net.truth) return; + if(l.smooth){ + scal_gpu(l.batch*l.inputs, (1-l.smooth), net.truth_gpu, 1); + add_gpu(l.batch*l.inputs, l.smooth * 1./l.inputs, net.truth_gpu, 1); + } + + if(l.cost_type == SMOOTH){ + smooth_l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else if (l.cost_type == L1){ + l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else if (l.cost_type == WGAN){ + wgan_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else { + l2_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } + + if (l.cost_type == SEG && l.noobject_scale != 1) { + scale_mask_gpu(l.batch*l.inputs, l.delta_gpu, 0, net.truth_gpu, l.noobject_scale); + scale_mask_gpu(l.batch*l.inputs, l.output_gpu, 0, net.truth_gpu, l.noobject_scale); + } + if (l.cost_type == MASKED) { + mask_gpu(l.batch*l.inputs, net.delta_gpu, SECRET_NUM, net.truth_gpu, 0); + } + + if(l.ratio){ + cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); + qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare); + int n = (1-l.ratio) * l.batch*l.inputs; + float thresh = l.delta[n]; + thresh = 0; + printf("%f\n", thresh); + supp_gpu(l.batch*l.inputs, thresh, l.delta_gpu, 1); + } + + if(l.thresh){ + supp_gpu(l.batch*l.inputs, l.thresh*1./l.inputs, l.delta_gpu, 1); + } + + cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs); + l.cost[0] = sum_array(l.output, l.batch*l.inputs); +} + +void backward_cost_layer_gpu(const cost_layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/async/darknet/src/cost_layer.h b/workloads/realworld/async/darknet/src/cost_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..ceb64de00bf66839c2f34852a05ea71114608a35 --- /dev/null +++ b/workloads/realworld/async/darknet/src/cost_layer.h @@ -0,0 +1,20 @@ +#ifndef COST_LAYER_H +#define COST_LAYER_H +#include "layer.h" +#include "network.h" + +typedef layer cost_layer; + +COST_TYPE get_cost_type(char *s); +char *get_cost_string(COST_TYPE a); +cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale); +void forward_cost_layer(const cost_layer l, network net); +void backward_cost_layer(const cost_layer l, network net); +void resize_cost_layer(cost_layer *l, int inputs); + +#ifdef GPU +void forward_cost_layer_gpu(cost_layer l, network net); +void backward_cost_layer_gpu(const cost_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/async/darknet/src/cpu_timestamps.c b/workloads/realworld/async/darknet/src/cpu_timestamps.c new file mode 100644 index 0000000000000000000000000000000000000000..35114479c7a9cce3debe2204b6886ad5528041d5 --- /dev/null +++ b/workloads/realworld/async/darknet/src/cpu_timestamps.c @@ -0,0 +1,20 @@ +#include "cpu_timestamps.h" + +void startCPU() { + struct timespec tv; + if(clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + startCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); +} + + + +void endCPU() { + struct timespec tv; + if(clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + + endCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); + //endCPUTimestamp1 = std::chrono::system_clock::now(); + printf("CPU_Times,%lu,%lu,%lu\n", startCPUTime, endCPUTime, endCPUTime-startCPUTime); +} diff --git a/workloads/realworld/async/darknet/src/cpu_timestamps.h b/workloads/realworld/async/darknet/src/cpu_timestamps.h new file mode 100644 index 0000000000000000000000000000000000000000..e53e995a5603b4610759c02a4a179eb9f0124e48 --- /dev/null +++ b/workloads/realworld/async/darknet/src/cpu_timestamps.h @@ -0,0 +1,21 @@ +#ifndef CPU_TIMESTAMP_ +#define CPU_TIMESTAMP_ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +static uint64_t startCPUTime; +static uint64_t endCPUTime; + +void startCPU(); +void endCPU(); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/workloads/realworld/async/darknet/src/crnn_layer.c b/workloads/realworld/async/darknet/src/crnn_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..159e17f92d45693461c92d482bf3aa7354a148d8 --- /dev/null +++ b/workloads/realworld/async/darknet/src/crnn_layer.c @@ -0,0 +1,283 @@ +#include "crnn_layer.h" +#include "convolutional_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize) +{ + fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = CRNN; + l.steps = steps; + l.h = h; + l.w = w; + l.c = c; + l.out_h = h; + l.out_w = w; + l.out_c = output_filters; + l.inputs = h*w*c; + l.hidden = h * w * hidden_filters; + l.outputs = l.out_h * l.out_w * l.out_c; + + l.state = calloc(l.hidden*batch*(steps+1), sizeof(float)); + + l.input_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.input_layer) = make_convolutional_layer(batch*steps, h, w, c, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.input_layer->batch = batch; + + l.self_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.self_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.self_layer->batch = batch; + + l.output_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.output_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, output_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.output_layer->batch = batch; + + l.output = l.output_layer->output; + l.delta = l.output_layer->delta; + + l.forward = forward_crnn_layer; + l.backward = backward_crnn_layer; + l.update = update_crnn_layer; + +#ifdef GPU + l.forward_gpu = forward_crnn_layer_gpu; + l.backward_gpu = backward_crnn_layer_gpu; + l.update_gpu = update_crnn_layer_gpu; + + l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1)); + l.output_gpu = l.output_layer->output_gpu; + l.delta_gpu = l.output_layer->delta_gpu; +#endif + + return l; +} + +void update_crnn_layer(layer l, update_args a) +{ + update_convolutional_layer(*(l.input_layer), a); + update_convolutional_layer(*(l.self_layer), a); + update_convolutional_layer(*(l.output_layer), a); +} + +void forward_crnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1); + fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1); + fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1); + if(net.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1); + + for (i = 0; i < l.steps; ++i) { + s.input = net.input; + forward_convolutional_layer(input_layer, s); + + s.input = l.state; + forward_convolutional_layer(self_layer, s); + + float *old_state = l.state; + if(net.train) l.state += l.hidden*l.batch; + if(l.shortcut){ + copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1); + }else{ + fill_cpu(l.hidden * l.batch, 0, l.state, 1); + } + axpy_cpu(l.hidden * l.batch, 1, input_layer.output, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + forward_convolutional_layer(output_layer, s); + + net.input += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_crnn_layer(layer l, network net) +{ + network s = net; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + increment_layer(&input_layer, l.steps-1); + increment_layer(&self_layer, l.steps-1); + increment_layer(&output_layer, l.steps-1); + + l.state += l.hidden*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + s.delta = self_layer.delta; + backward_convolutional_layer(output_layer, s); + + l.state -= l.hidden*l.batch; + /* + if(i > 0){ + copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output - l.hidden*l.batch, 1, l.state, 1); + }else{ + fill_cpu(l.hidden * l.batch, 0, l.state, 1); + } + */ + + s.input = l.state; + s.delta = self_layer.delta - l.hidden*l.batch; + if (i == 0) s.delta = 0; + backward_convolutional_layer(self_layer, s); + + copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1); + if (i > 0 && l.shortcut) axpy_cpu(l.hidden*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden*l.batch, 1); + s.input = net.input + i*l.inputs*l.batch; + if(net.delta) s.delta = net.delta + i*l.inputs*l.batch; + else s.delta = 0; + backward_convolutional_layer(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} + +#ifdef GPU + +void pull_crnn_layer(layer l) +{ + pull_convolutional_layer(*(l.input_layer)); + pull_convolutional_layer(*(l.self_layer)); + pull_convolutional_layer(*(l.output_layer)); +} + +void push_crnn_layer(layer l) +{ + push_convolutional_layer(*(l.input_layer)); + push_convolutional_layer(*(l.self_layer)); + push_convolutional_layer(*(l.output_layer)); +} + +void update_crnn_layer_gpu(layer l, update_args a) +{ + update_convolutional_layer_gpu(*(l.input_layer), a); + update_convolutional_layer_gpu(*(l.self_layer), a); + update_convolutional_layer_gpu(*(l.output_layer), a); +} + +void forward_crnn_layer_gpu(layer l, network net) +{ + network s = net; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_gpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); + fill_gpu(l.hidden * l.batch * l.steps, 0, self_layer.delta_gpu, 1); + fill_gpu(l.hidden * l.batch * l.steps, 0, input_layer.delta_gpu, 1); + if(net.train) fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1); + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = net.input_gpu; + forward_convolutional_layer_gpu(input_layer, s); + + s.input_gpu = l.state_gpu; + forward_convolutional_layer_gpu(self_layer, s); + + float *old_state = l.state_gpu; + if(net.train) l.state_gpu += l.hidden*l.batch; + if(l.shortcut){ + copy_gpu(l.hidden * l.batch, old_state, 1, l.state_gpu, 1); + }else{ + fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1); + } + axpy_gpu(l.hidden * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + forward_convolutional_layer_gpu(output_layer, s); + + net.input_gpu += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_crnn_layer_gpu(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + increment_layer(&input_layer, l.steps - 1); + increment_layer(&self_layer, l.steps - 1); + increment_layer(&output_layer, l.steps - 1); + l.state_gpu += l.hidden*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_gpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu; + backward_convolutional_layer_gpu(output_layer, s); + + l.state_gpu -= l.hidden*l.batch; + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu - l.hidden*l.batch; + if (i == 0) s.delta_gpu = 0; + backward_convolutional_layer_gpu(self_layer, s); + + copy_gpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); + if (i > 0 && l.shortcut) axpy_gpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1); + s.input_gpu = net.input_gpu + i*l.inputs*l.batch; + if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch; + else s.delta_gpu = 0; + backward_convolutional_layer_gpu(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} +#endif diff --git a/workloads/realworld/async/darknet/src/crnn_layer.h b/workloads/realworld/async/darknet/src/crnn_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..515f378354e9cc6149e7a1ac60ffc86ace112991 --- /dev/null +++ b/workloads/realworld/async/darknet/src/crnn_layer.h @@ -0,0 +1,24 @@ + +#ifndef CRNN_LAYER_H +#define CRNN_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize); + +void forward_crnn_layer(layer l, network net); +void backward_crnn_layer(layer l, network net); +void update_crnn_layer(layer l, update_args a); + +#ifdef GPU +void forward_crnn_layer_gpu(layer l, network net); +void backward_crnn_layer_gpu(layer l, network net); +void update_crnn_layer_gpu(layer l, update_args a); +void push_crnn_layer(layer l); +void pull_crnn_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/async/darknet/src/crop_layer.c b/workloads/realworld/async/darknet/src/crop_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..3c8c9650bda6dcf4485ce8da8e2fa1984f2b244d --- /dev/null +++ b/workloads/realworld/async/darknet/src/crop_layer.c @@ -0,0 +1,103 @@ +#include "crop_layer.h" +#include "cuda_dark.h" +#include + +image get_crop_image(crop_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.out_c; + return float_to_image(w,h,c,l.output); +} + +void backward_crop_layer(const crop_layer l, network net){} +void backward_crop_layer_gpu(const crop_layer l, network net){} + +crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure) +{ + fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c); + crop_layer l = {0}; + l.type = CROP; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.scale = (float)crop_height / h; + l.flip = flip; + l.angle = angle; + l.saturation = saturation; + l.exposure = exposure; + l.out_w = crop_width; + l.out_h = crop_height; + l.out_c = c; + l.inputs = l.w * l.h * l.c; + l.outputs = l.out_w * l.out_h * l.out_c; + l.output = calloc(l.outputs*batch, sizeof(float)); + l.forward = forward_crop_layer; + l.backward = backward_crop_layer; + + #ifdef GPU + l.forward_gpu = forward_crop_layer_gpu; + l.backward_gpu = backward_crop_layer_gpu; + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + l.rand_gpu = cuda_make_array(0, l.batch*8); + #endif + return l; +} + +void resize_crop_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->out_w = l->scale*w; + l->out_h = l->scale*h; + + l->inputs = l->w * l->h * l->c; + l->outputs = l->out_h * l->out_w * l->out_c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + #ifdef GPU + cuda_free(l->output_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + #endif +} + + +void forward_crop_layer(const crop_layer l, network net) +{ + int i,j,c,b,row,col; + int index; + int count = 0; + int flip = (l.flip && rand()%2); + int dh = rand()%(l.h - l.out_h + 1); + int dw = rand()%(l.w - l.out_w + 1); + float scale = 2; + float trans = -1; + if(l.noadjust){ + scale = 1; + trans = 0; + } + if(!net.train){ + flip = 0; + dh = (l.h - l.out_h)/2; + dw = (l.w - l.out_w)/2; + } + for(b = 0; b < l.batch; ++b){ + for(c = 0; c < l.c; ++c){ + for(i = 0; i < l.out_h; ++i){ + for(j = 0; j < l.out_w; ++j){ + if(flip){ + col = l.w - dw - j - 1; + }else{ + col = j + dw; + } + row = i + dh; + index = col+l.w*(row+l.h*(c + l.c*b)); + l.output[count++] = net.input[index]*scale + trans; + } + } + } + } +} + diff --git a/workloads/realworld/async/darknet/src/crop_layer.h b/workloads/realworld/async/darknet/src/crop_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..3b5883c47d6df0987700e1b0434010eebd6312af --- /dev/null +++ b/workloads/realworld/async/darknet/src/crop_layer.h @@ -0,0 +1,20 @@ +#ifndef CROP_LAYER_H +#define CROP_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +typedef layer crop_layer; + +image get_crop_image(crop_layer l); +crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure); +void forward_crop_layer(const crop_layer l, network net); +void resize_crop_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_crop_layer_gpu(crop_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/async/darknet/src/crop_layer_kernels.cu b/workloads/realworld/async/darknet/src/crop_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..7e262fce4ff8beb52de23d7c79bd1917410ef136 --- /dev/null +++ b/workloads/realworld/async/darknet/src/crop_layer_kernels.cu @@ -0,0 +1,225 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "crop_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "image.h" +} + +__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c) +{ + if(x < 0 || x >= w || y < 0 || y >= h) return 0; + return image[x + w*(y + c*h)]; +} + +__device__ float3 rgb_to_hsv_kernel(float3 rgb) +{ + float r = rgb.x; + float g = rgb.y; + float b = rgb.z; + + float h, s, v; + float max = (r > g) ? ( (r > b) ? r : b) : ( (g > b) ? g : b); + float min = (r < g) ? ( (r < b) ? r : b) : ( (g < b) ? g : b); + float delta = max - min; + v = max; + if(max == 0){ + s = 0; + h = -1; + }else{ + s = delta/max; + if(r == max){ + h = (g - b) / delta; + } else if (g == max) { + h = 2 + (b - r) / delta; + } else { + h = 4 + (r - g) / delta; + } + if (h < 0) h += 6; + } + return make_float3(h, s, v); +} + +__device__ float3 hsv_to_rgb_kernel(float3 hsv) +{ + float h = hsv.x; + float s = hsv.y; + float v = hsv.z; + + float r, g, b; + float f, p, q, t; + + if (s == 0) { + r = g = b = v; + } else { + int index = (int) floorf(h); + f = h - index; + p = v*(1-s); + q = v*(1-s*f); + t = v*(1-s*(1-f)); + if(index == 0){ + r = v; g = t; b = p; + } else if(index == 1){ + r = q; g = v; b = p; + } else if(index == 2){ + r = p; g = v; b = t; + } else if(index == 3){ + r = p; g = q; b = v; + } else if(index == 4){ + r = t; g = p; b = v; + } else { + r = v; g = p; b = q; + } + } + r = (r < 0) ? 0 : ((r > 1) ? 1 : r); + g = (g < 0) ? 0 : ((g > 1) ? 1 : g); + b = (b < 0) ? 0 : ((b > 1) ? 1 : b); + return make_float3(r, g, b); +} + +__device__ float bilinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c) +{ + int ix = (int) floorf(x); + int iy = (int) floorf(y); + + float dx = x - ix; + float dy = y - iy; + + float val = (1-dy) * (1-dx) * get_pixel_kernel(image, w, h, ix, iy, c) + + dy * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) + + (1-dy) * dx * get_pixel_kernel(image, w, h, ix+1, iy, c) + + dy * dx * get_pixel_kernel(image, w, h, ix+1, iy+1, c); + return val; +} + +__global__ void levels_image_kernel(float *image, float *rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale, float shift) +{ + int size = batch * w * h; + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= size) return; + int x = id % w; + id /= w; + int y = id % h; + id /= h; + float rshift = rand[0]; + float gshift = rand[1]; + float bshift = rand[2]; + float r0 = rand[8*id + 0]; + float r1 = rand[8*id + 1]; + float r2 = rand[8*id + 2]; + float r3 = rand[8*id + 3]; + + saturation = r0*(saturation - 1) + 1; + saturation = (r1 > .5f) ? 1.f/saturation : saturation; + exposure = r2*(exposure - 1) + 1; + exposure = (r3 > .5f) ? 1.f/exposure : exposure; + + size_t offset = id * h * w * 3; + image += offset; + float r = image[x + w*(y + h*0)]; + float g = image[x + w*(y + h*1)]; + float b = image[x + w*(y + h*2)]; + float3 rgb = make_float3(r,g,b); + if(train){ + float3 hsv = rgb_to_hsv_kernel(rgb); + hsv.y *= saturation; + hsv.z *= exposure; + rgb = hsv_to_rgb_kernel(hsv); + } else { + shift = 0; + } + image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5f)*shift; + image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5f)*shift; + image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5f)*shift; +} + +__global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= size) return; + + float cx = w/2.f; + float cy = h/2.f; + + int count = id; + int j = id % crop_width; + id /= crop_width; + int i = id % crop_height; + id /= crop_height; + int k = id % c; + id /= c; + int b = id; + + float r4 = rand[8*b + 4]; + float r5 = rand[8*b + 5]; + float r6 = rand[8*b + 6]; + float r7 = rand[8*b + 7]; + + float dw = (w - crop_width)*r4; + float dh = (h - crop_height)*r5; + flip = (flip && (r6 > .5f)); + angle = 2*angle*r7 - angle; + if(!train){ + dw = (w - crop_width)/2.f; + dh = (h - crop_height)/2.f; + flip = 0; + angle = 0; + } + + input += w*h*c*b; + + float x = (flip) ? w - dw - j - 1 : j + dw; + float y = i + dh; + + float rx = cosf(angle)*(x-cx) - sinf(angle)*(y-cy) + cx; + float ry = sinf(angle)*(x-cx) + cosf(angle)*(y-cy) + cy; + + output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k); +} + +extern "C" void forward_crop_layer_gpu(crop_layer layer, network net) +{ + cuda_random(layer.rand_gpu, layer.batch*8); + + float radians = layer.angle*3.14159265f/180.f; + + float scale = 2; + float translate = -1; + if(layer.noadjust){ + scale = 1; + translate = 0; + } + + int size = layer.batch * layer.w * layer.h; + + levels_image_kernel<<>>(net.input_gpu, layer.rand_gpu, layer.batch, layer.w, layer.h, net.train, layer.saturation, layer.exposure, translate, scale, layer.shift); + check_error(cudaPeekAtLastError()); + + size = layer.batch*layer.c*layer.out_w*layer.out_h; + + forward_crop_layer_kernel<<>>(net.input_gpu, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, net.train, layer.flip, radians, layer.output_gpu); + check_error(cudaPeekAtLastError()); + +/* + cuda_pull_array(layer.output_gpu, layer.output, size); + image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 0*(size/layer.batch)); + image im2 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 1*(size/layer.batch)); + image im3 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 2*(size/layer.batch)); + + translate_image(im, -translate); + scale_image(im, 1/scale); + translate_image(im2, -translate); + scale_image(im2, 1/scale); + translate_image(im3, -translate); + scale_image(im3, 1/scale); + + show_image(im, "cropped"); + show_image(im2, "cropped2"); + show_image(im3, "cropped3"); + cvWaitKey(0); + */ +} + diff --git a/workloads/realworld/async/darknet/src/cuda_dark.cu b/workloads/realworld/async/darknet/src/cuda_dark.cu new file mode 100644 index 0000000000000000000000000000000000000000..e478ff79abe59a1b9203c30b5a6564a3e2ca0dc5 --- /dev/null +++ b/workloads/realworld/async/darknet/src/cuda_dark.cu @@ -0,0 +1,419 @@ +int gpu_index = 0; + +#ifdef GPU + +#include "cuda.h" +#include "utils.h" +#include "blas.h" +#include +#include +#include + + +#include + +void cuda_set_device(int n) +{ + gpu_index = n; + cudaError_t status = cudaSetDevice(n); + check_error(status); +} + +int cuda_get_device() +{ + int n = 0; + cudaError_t status = cudaGetDevice(&n); + check_error(status); + return n; +} + +void check_error(cudaError_t status) +{ + cudaDeviceSynchronize(); + cudaError_t status2 = cudaGetLastError(); + if (status != cudaSuccess) + { + const char *s = cudaGetErrorString(status); + char buffer[256]; + printf("CUDA Error: %s\n", s); + assert(0); + snprintf(buffer, 256, "CUDA Error: %s", s); + error(buffer); + } + if (status2 != cudaSuccess) + { + const char *s = cudaGetErrorString(status); + char buffer[256]; + printf("CUDA Error Prev: %s\n", s); + assert(0); + snprintf(buffer, 256, "CUDA Error Prev: %s", s); + error(buffer); + } +} + +dim3 cuda_gridsize(size_t n){ + size_t k = (n-1) / BLOCK + 1; + size_t x = k; + size_t y = 1; + if(x > 65535){ + x = ceil(sqrt(k)); + y = (n-1)/(x*BLOCK) + 1; + } + dim3 d = {x, y, 1}; + //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK); + return d; +} + +#ifdef CUDNN +cudnnHandle_t cudnn_handle() +{ + static int init[16] = {0}; + static cudnnHandle_t handle[16]; + int i = cuda_get_device(); + if(!init[i]) { + cudnnCreate(&handle[i]); + init[i] = 1; + } + return handle[i]; +} +#endif + +cublasHandle_t blas_handle() +{ + static int init[16] = {0}; + static cublasHandle_t handle[16]; + int i = cuda_get_device(); + if(!init[i]) { + cublasCreate(&handle[i]); + init[i] = 1; + } + return handle[i]; +} + +float *cuda_make_array(float *x, size_t n) +{ + float *x_gpu; + size_t size = sizeof(float)*n; + cudaError_t status = cudaMalloc((void **)&x_gpu, size); + check_error(status); + if(x){ + status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + check_error(status); + } else { + fill_gpu(n, 0, x_gpu, 1); + } + if(!x_gpu) error("Cuda malloc failed\n"); + return x_gpu; +} + +void cuda_random(float *x_gpu, size_t n) +{ + static curandGenerator_t gen[16]; + static int init[16] = {0}; + int i = cuda_get_device(); + if(!init[i]){ + curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT); + curandSetPseudoRandomGeneratorSeed(gen[i], time(0)); + init[i] = 1; + } + curandGenerateUniform(gen[i], x_gpu, n); + check_error(cudaPeekAtLastError()); +} + +float cuda_compare(float *x_gpu, float *x, size_t n, char *s) +{ + float *tmp = (float *) calloc(n, sizeof(float)); + cuda_pull_array(x_gpu, tmp, n); + //int i; + //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]); + axpy_cpu(n, -1, x, 1, tmp, 1); + float err = dot_cpu(n, tmp, 1, tmp, 1); + printf("Error %s: %f\n", s, sqrt(err/n)); + free(tmp); + return err; +} + +int *cuda_make_int_array(int *x, size_t n) +{ + int *x_gpu; + size_t size = sizeof(int)*n; + cudaError_t status = cudaMalloc((void **)&x_gpu, size); + check_error(status); + if(x){ + status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + check_error(status); + } + if(!x_gpu) error("Cuda malloc failed\n"); + return x_gpu; +} + +void cuda_free(float *x_gpu) +{ + cudaError_t status = cudaFree(x_gpu); + check_error(status); +} + +void cuda_push_array(float *x_gpu, float *x, size_t n) +{ + size_t size = sizeof(float)*n; + cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + check_error(status); +} + +void cuda_pull_array(float *x_gpu, float *x, size_t n) +{ + size_t size = sizeof(float)*n; + cudaError_t status = cudaMemcpy(x, x_gpu, size, cudaMemcpyDeviceToHost); + check_error(status); +} + +float cuda_mag_array(float *x_gpu, size_t n) +{ + float *temp = (float *) calloc(n, sizeof(float)); + cuda_pull_array(x_gpu, temp, n); + float m = mag_array(temp, n); + free(temp); + return m; +} + +static const char * +getMemcpyKindString(CUpti_ActivityMemcpyKind kind) +{ + switch (kind) + { + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOD: + return "HtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOH: + return "DtoH"; + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOA: + return "HtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOH: + return "AtoH"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOA: + return "AtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOD: + return "AtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOA: + return "DtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOD: + return "DtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOH: + return "HtoH"; + default: + break; + } + + return ""; +} + +static const char * +getUvmCounterKindString(CUpti_ActivityUnifiedMemoryCounterKind kind) +{ + switch (kind) + { + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD: + return "BYTES_TRANSFER_HTOD"; + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH: + return "BYTES_TRANSFER_DTOH"; + default: + break; + } + return ""; +} + +static void +printActivity(CUpti_Activity *record) +{ + switch (record->kind) + { + case CUPTI_ACTIVITY_KIND_KERNEL: + { + int status; + CUpti_ActivityKernel4 *kernel = (CUpti_ActivityKernel4 *)record; + printf("KERNEL %s, %llu, %llu, %llu\n", + abi::__cxa_demangle(kernel->name, 0, 0, &status), + (unsigned long long)(kernel->start), + (unsigned long long)(kernel->end), + (unsigned long long)(kernel->end) - (kernel->start)); + break; + } + case CUPTI_ACTIVITY_KIND_RUNTIME: + { + CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; + const char *callback_name; + cuptiGetCallbackName(CUPTI_CB_DOMAIN_RUNTIME_API, api->cbid, &callback_name); + // printf("RUNTIME %s (cbid=%u) [ %llu - %llu ] process %u, thread %u, correlation %u\n", + // callback_name, api->cbid, + // (unsigned long long)(api->start - startTimestamp), + // (unsigned long long)(api->end - startTimestamp), + // api->processId, api->threadId, api->correlationId); + printf("RUNTIME %s (cbid=%u), %llu,%llu,%llu, process %u, thread %u, correlation %u\n", + callback_name, api->cbid, + (unsigned long long)(api->start), + (unsigned long long)(api->end), + (unsigned long long)(api->end - api->start), + api->processId, api->threadId, api->correlationId); + break; + } + case CUPTI_ACTIVITY_KIND_MEMCPY: + { + CUpti_ActivityMemcpy4 *memcpy = (CUpti_ActivityMemcpy4 *)record; + printf("MEMCPY %s, size %llu, %llu, %llu, %llu\n", + getMemcpyKindString((CUpti_ActivityMemcpyKind)memcpy->copyKind), + (unsigned long long)memcpy->bytes, + (unsigned long long)(memcpy->start), + (unsigned long long)(memcpy->end), + (unsigned long long)(memcpy->end) - (memcpy->start)); + break; + } + case CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER: + { + CUpti_ActivityUnifiedMemoryCounter2 *uvm = (CUpti_ActivityUnifiedMemoryCounter2 *)record; + printf("UVM MEMCPY %s, size %llu, %llu, %llu, %llu \n", + getUvmCounterKindString(uvm->counterKind), + (unsigned long long)uvm->value, + (unsigned long long)(uvm->start), + (unsigned long long)(uvm->end), + (unsigned long long)(uvm->end - uvm->start)); + break; + } + } +} + +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) +{ + uint8_t *bfr = (uint8_t *)malloc(BUF_SIZE + ALIGN_SIZE); + if (bfr == NULL) + { + printf("Error: out of memory\n"); + exit(-1); + } + + *size = BUF_SIZE; + *buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE); + *maxNumRecords = 0; +} + +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) +{ + CUptiResult status; + CUpti_Activity *record = NULL; + if (validSize > 0) + { + do + { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if (status == CUPTI_SUCCESS) + { + printActivity(record); + } + else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) + break; + else + { + CUPTI_CALL(status); + } + } while (1); + + // report any records dropped from the queue + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if (dropped != 0) + { + printf("Dropped %u activity records\n", (unsigned int)dropped); + } + } + + free(buffer); +} + +#ifndef PROFILE +void initTrace() { + printf("not Profile initTrace()\n"); + return; +} + +void finiTrace() { + return; +} + +#else +void initTrace() +{ + printf("Profile initTrace()\n"); + size_t attrValue = 0, attrValueSize = sizeof(size_t); + + CUpti_ActivityUnifiedMemoryCounterConfig config[2]; + + // configure unified memory counters + config[0].scope = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE; + config[0].kind = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD; + config[0].deviceId = 0; + config[0].enable = 1; + + config[1].scope = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE; + config[1].kind = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH; + config[1].deviceId = 0; + config[1].enable = 1; + + CUptiResult res = cuptiActivityConfigureUnifiedMemoryCounter(config, 2); + if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED) + { + printf("Test is waived, unified memory is not supported on the underlying platform.\n"); + } + else if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE) + { + printf("Test is waived, unified memory is not supported on the device.\n"); + } + else if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES) + { + printf("Test is waived, unified memory is not supported on the non-P2P multi-gpu setup.\n"); + } + else + { + CUPTI_CALL(res); + } + + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER)); + + // Register callbacks for buffer requests and for buffers completed by CUPTI. + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + + // Optionally get and set activity attributes. + // Attributes can be set by the CUPTI client to change behavior of the activity API. + // Some attributes require to be set before any CUDA context is created to be effective, + // e.g. to be applied to all device buffer allocations (see documentation). + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + printf("%s = %llu B\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); +} + +void finiTrace() +{ + // Force flush any remaining activity buffers before termination of the application + CUPTI_CALL(cuptiActivityFlushAll(1)); +} +#endif + +void GPU_argv_init() +{ + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, GPU_DEVICE); + printf("setting device %d with name %s\n", GPU_DEVICE, deviceProp.name); + cudaSetDevice(GPU_DEVICE); +} +#else +void cuda_set_device(int n){} + +#endif diff --git a/workloads/realworld/async/darknet/src/cuda_dark.h b/workloads/realworld/async/darknet/src/cuda_dark.h new file mode 100644 index 0000000000000000000000000000000000000000..ac6b60bc3d27ec1ebc8190463648b946f6c809ef --- /dev/null +++ b/workloads/realworld/async/darknet/src/cuda_dark.h @@ -0,0 +1,63 @@ +#ifndef CUDA_H +#define CUDA_H + +#include "darknet.h" + +#ifdef GPU + +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) (((uintptr_t)(buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t)(buffer) & ((align)-1))) : (buffer)) + +static uint64_t startTimestamp; +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +#define CUPTI_CALL(call) \ + do \ + { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) \ + { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + if (_status == CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED) \ + exit(0); \ + else \ + exit(-1); \ + } \ + } while (0) + +#include + +#ifdef __cplusplus +extern "C" { +#endif +void check_error(cudaError_t status); +cublasHandle_t blas_handle(); +int *cuda_make_int_array(int *x, size_t n); +void cuda_random(float *x_gpu, size_t n); +float cuda_compare(float *x_gpu, float *x, size_t n, char *s); +dim3 cuda_gridsize(size_t n); + +void GPU_argv_init(); +void initTrace(); +void finiTrace(); +void startCPU(); +void endCPU(); +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords); +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); +static void printActivity(CUpti_Activity *record); + +#ifdef __cplusplus +} +#endif + +#ifdef CUDNN +cudnnHandle_t cudnn_handle(); +#endif + +#endif +#endif diff --git a/workloads/realworld/async/darknet/src/cupti_add.cpp b/workloads/realworld/async/darknet/src/cupti_add.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a0d16eb72f41f8e858a59354d2de9d6b470c0e76 --- /dev/null +++ b/workloads/realworld/async/darknet/src/cupti_add.cpp @@ -0,0 +1,112 @@ +#include "cupti_add.h" + +static void +printActivity(CUpti_Activity *record) +{ + switch (record->kind) + { + case CUPTI_ACTIVITY_KIND_KERNEL: + { + int status; + CUpti_ActivityKernel4 *kernel = (CUpti_ActivityKernel4 *)record; + printf("CUPTI,%s,%llu,%llu,%llu\n", + abi::__cxa_demangle(kernel->name, 0, 0, &status), + (unsigned long long)(kernel->start), + (unsigned long long)(kernel->end), + (unsigned long long)(kernel->end - startTimestamp) - (kernel->start - startTimestamp)); + break; + } + case CUPTI_ACTIVITY_KIND_RUNTIME: + { + CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; + const char *callback_name; + cuptiGetCallbackName(CUPTI_CB_DOMAIN_RUNTIME_API, api->cbid, &callback_name); + printf("RUNTIME %s (cbid=%u) [ %llu - %llu ] process %u, thread %u, correlation %u\n", + callback_name, api->cbid, + (unsigned long long)(api->start - startTimestamp), + (unsigned long long)(api->end - startTimestamp), + api->processId, api->threadId, api->correlationId); + break; + } + } +} + +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) +{ + uint8_t *bfr = (uint8_t *)malloc(BUF_SIZE + ALIGN_SIZE); + if (bfr == NULL) + { + printf("Error: out of memory\n"); + exit(-1); + } + + *size = BUF_SIZE; + *buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE); + *maxNumRecords = 0; +} + +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) +{ + CUptiResult status; + CUpti_Activity *record = NULL; + if (validSize > 0) + { + do + { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if (status == CUPTI_SUCCESS) + { + printActivity(record); + } + else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) + break; + else + { + CUPTI_CALL(status); + } + } while (1); + + // report any records dropped from the queue + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if (dropped != 0) + { + printf("Dropped %u activity records\n", (unsigned int)dropped); + } + } + + free(buffer); +} + +void initTrace() +{ + size_t attrValue = 0, attrValueSize = sizeof(size_t); + + // CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); + + // Register callbacks for buffer requests and for buffers completed by CUPTI. + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + + // Optionally get and set activity attributes. + // Attributes can be set by the CUPTI client to change behavior of the activity API. + // Some attributes require to be set before any CUDA context is created to be effective, + // e.g. to be applied to all device buffer allocations (see documentation). + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + printf("%s = %llu B\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); +} + +void finiTrace() +{ + // Force flush any remaining activity buffers before termination of the application + CUPTI_CALL(cuptiActivityFlushAll(1)); +} \ No newline at end of file diff --git a/workloads/realworld/async/darknet/src/cupti_add.h b/workloads/realworld/async/darknet/src/cupti_add.h new file mode 100644 index 0000000000000000000000000000000000000000..a30b7b847ad13381032d2f60eac2955d30146485 --- /dev/null +++ b/workloads/realworld/async/darknet/src/cupti_add.h @@ -0,0 +1,36 @@ +#include +#include +#include + + +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) \ + (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer)) + +static uint64_t startTimestamp; +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +#define CUPTI_CALL(call) \ + do { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + if(_status == CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED) \ + exit(0); \ + else \ + exit(-1); \ + } \ + } while (0) + +void initTrace(); +void finiTrace(); +void startCPU(); +void endCPU(); +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords); +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); +static void printActivity(CUpti_Activity *record); diff --git a/workloads/realworld/async/darknet/src/data.c b/workloads/realworld/async/darknet/src/data.c new file mode 100644 index 0000000000000000000000000000000000000000..d50f1346c5cdcfe1dbeb2d0f70ec408fb4f33960 --- /dev/null +++ b/workloads/realworld/async/darknet/src/data.c @@ -0,0 +1,1685 @@ +#include "data.h" +#include "utils.h" +#include "image.h" +#include "cuda_dark.h" + +#include +#include +#include + +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + +list *get_paths(char *filename) +{ + char *path; + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + list *lines = make_list(); + while((path=fgetl(file))){ + list_insert(lines, path); + } + fclose(file); + return lines; +} + +/* +char **get_random_paths_indexes(char **paths, int n, int m, int *indexes) +{ + char **random_paths = calloc(n, sizeof(char*)); + int i; + pthread_mutex_lock(&mutex); + for(i = 0; i < n; ++i){ + int index = rand()%m; + indexes[i] = index; + random_paths[i] = paths[index]; + if(i == 0) printf("%s\n", paths[index]); + } + pthread_mutex_unlock(&mutex); + return random_paths; +} +*/ + +char **get_random_paths(char **paths, int n, int m) +{ + char **random_paths = calloc(n, sizeof(char*)); + int i; + pthread_mutex_lock(&mutex); + for(i = 0; i < n; ++i){ + int index = rand()%m; + random_paths[i] = paths[index]; + //if(i == 0) printf("%s\n", paths[index]); + } + pthread_mutex_unlock(&mutex); + return random_paths; +} + +char **find_replace_paths(char **paths, int n, char *find, char *replace) +{ + char **replace_paths = calloc(n, sizeof(char*)); + int i; + for(i = 0; i < n; ++i){ + char replaced[4096]; + find_replace(paths[i], find, replace, replaced); + replace_paths[i] = copy_string(replaced); + } + return replace_paths; +} + +matrix load_image_paths_gray(char **paths, int n, int w, int h) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image(paths[i], w, h, 3); + + image gray = grayscale_image(im); + free_image(im); + im = gray; + + X.vals[i] = im.data; + X.cols = im.h*im.w*im.c; + } + return X; +} + +matrix load_image_paths(char **paths, int n, int w, int h) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], w, h); + X.vals[i] = im.data; + X.cols = im.h*im.w*im.c; + } + return X; +} + +matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], 0, 0); + image crop; + if(center){ + crop = center_crop_image(im, size, size); + } else { + crop = random_augment_image(im, angle, aspect, min, max, size, size); + } + int flip = rand()%2; + if (flip) flip_image(crop); + random_distort_image(crop, hue, saturation, exposure); + + /* + show_image(im, "orig"); + show_image(crop, "crop"); + cvWaitKey(0); + */ + //grayscale_image_3c(crop); + free_image(im); + X.vals[i] = crop.data; + X.cols = crop.h*crop.w*crop.c; + } + return X; +} + + +box_label *read_boxes(char *filename, int *n) +{ + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + float x, y, h, w; + int id; + int count = 0; + int size = 64; + box_label *boxes = calloc(size, sizeof(box_label)); + while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){ + if(count == size) { + size = size * 2; + boxes = realloc(boxes, size*sizeof(box_label)); + } + boxes[count].id = id; + boxes[count].x = x; + boxes[count].y = y; + boxes[count].h = h; + boxes[count].w = w; + boxes[count].left = x - w/2; + boxes[count].right = x + w/2; + boxes[count].top = y - h/2; + boxes[count].bottom = y + h/2; + ++count; + } + fclose(file); + *n = count; + return boxes; +} + +void randomize_boxes(box_label *b, int n) +{ + int i; + for(i = 0; i < n; ++i){ + box_label swap = b[i]; + int index = rand()%n; + b[i] = b[index]; + b[index] = swap; + } +} + +void correct_boxes(box_label *boxes, int n, float dx, float dy, float sx, float sy, int flip) +{ + int i; + for(i = 0; i < n; ++i){ + if(boxes[i].x == 0 && boxes[i].y == 0) { + boxes[i].x = 999999; + boxes[i].y = 999999; + boxes[i].w = 999999; + boxes[i].h = 999999; + continue; + } + boxes[i].left = boxes[i].left * sx - dx; + boxes[i].right = boxes[i].right * sx - dx; + boxes[i].top = boxes[i].top * sy - dy; + boxes[i].bottom = boxes[i].bottom* sy - dy; + + if(flip){ + float swap = boxes[i].left; + boxes[i].left = 1. - boxes[i].right; + boxes[i].right = 1. - swap; + } + + boxes[i].left = constrain(0, 1, boxes[i].left); + boxes[i].right = constrain(0, 1, boxes[i].right); + boxes[i].top = constrain(0, 1, boxes[i].top); + boxes[i].bottom = constrain(0, 1, boxes[i].bottom); + + boxes[i].x = (boxes[i].left+boxes[i].right)/2; + boxes[i].y = (boxes[i].top+boxes[i].bottom)/2; + boxes[i].w = (boxes[i].right - boxes[i].left); + boxes[i].h = (boxes[i].bottom - boxes[i].top); + + boxes[i].w = constrain(0, 1, boxes[i].w); + boxes[i].h = constrain(0, 1, boxes[i].h); + } +} + +void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + float x,y,w,h; + int id; + int i; + + for (i = 0; i < count && i < 90; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if (w < .0 || h < .0) continue; + + int index = (4+classes) * i; + + truth[index++] = x; + truth[index++] = y; + truth[index++] = w; + truth[index++] = h; + + if (id < classes) truth[index+id] = 1; + } + free(boxes); +} + +void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + float x,y,w,h; + int id; + int i; + + for (i = 0; i < count; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if (w < .005 || h < .005) continue; + + int col = (int)(x*num_boxes); + int row = (int)(y*num_boxes); + + x = x*num_boxes - col; + y = y*num_boxes - row; + + int index = (col+row*num_boxes)*(5+classes); + if (truth[index]) continue; + truth[index++] = 1; + + if (id < classes) truth[index+id] = 1; + index += classes; + + truth[index++] = x; + truth[index++] = y; + truth[index++] = w; + truth[index++] = h; + } + free(boxes); +} + +void load_rle(image im, int *rle, int n) +{ + int count = 0; + int curr = 0; + int i,j; + for(i = 0; i < n; ++i){ + for(j = 0; j < rle[i]; ++j){ + im.data[count++] = curr; + } + curr = 1 - curr; + } + for(; count < im.h*im.w*im.c; ++count){ + im.data[count] = curr; + } +} + +void or_image(image src, image dest, int c) +{ + int i; + for(i = 0; i < src.w*src.h; ++i){ + if(src.data[i]) dest.data[dest.w*dest.h*c + i] = 1; + } +} + +void exclusive_image(image src) +{ + int k, j, i; + int s = src.w*src.h; + for(k = 0; k < src.c-1; ++k){ + for(i = 0; i < s; ++i){ + if (src.data[k*s + i]){ + for(j = k+1; j < src.c; ++j){ + src.data[j*s + i] = 0; + } + } + } + } +} + +box bound_image(image im) +{ + int x,y; + int minx = im.w; + int miny = im.h; + int maxx = 0; + int maxy = 0; + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + if(im.data[y*im.w + x]){ + minx = (x < minx) ? x : minx; + miny = (y < miny) ? y : miny; + maxx = (x > maxx) ? x : maxx; + maxy = (y > maxy) ? y : maxy; + } + } + } + box b = {minx, miny, maxx-minx + 1, maxy-miny + 1}; + //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + return b; +} + +void fill_truth_iseg(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + int i = 0; + int j; + image part = make_image(w, h, 1); + while((fscanf(file, "%d %s", &id, buff) == 2) && i < num_boxes){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + image sized = rotate_crop_image(part, aug.rad, aug.scale, aug.w, aug.h, aug.dx, aug.dy, aug.aspect); + if(flip) flip_image(sized); + + image mask = resize_image(sized, mw, mh); + truth[i*(mw*mh+1)] = id; + for(j = 0; j < mw*mh; ++j){ + truth[i*(mw*mh + 1) + 1 + j] = mask.data[j]; + } + ++i; + + free_image(mask); + free_image(sized); + free(rle); + } + if(i < num_boxes) truth[i*(mw*mh+1)] = -1; + fclose(file); + free_image(part); +} + +void fill_truth_mask(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + int i = 0; + image part = make_image(w, h, 1); + while((fscanf(file, "%d %s", &id, buff) == 2) && i < num_boxes){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + image sized = rotate_crop_image(part, aug.rad, aug.scale, aug.w, aug.h, aug.dx, aug.dy, aug.aspect); + if(flip) flip_image(sized); + box b = bound_image(sized); + if(b.w > 0){ + image crop = crop_image(sized, b.x, b.y, b.w, b.h); + image mask = resize_image(crop, mw, mh); + truth[i*(4 + mw*mh + 1) + 0] = (b.x + b.w/2.)/sized.w; + truth[i*(4 + mw*mh + 1) + 1] = (b.y + b.h/2.)/sized.h; + truth[i*(4 + mw*mh + 1) + 2] = b.w/sized.w; + truth[i*(4 + mw*mh + 1) + 3] = b.h/sized.h; + int j; + for(j = 0; j < mw*mh; ++j){ + truth[i*(4 + mw*mh + 1) + 4 + j] = mask.data[j]; + } + truth[i*(4 + mw*mh + 1) + 4 + mw*mh] = id; + free_image(crop); + free_image(mask); + ++i; + } + free_image(sized); + free(rle); + } + fclose(file); + free_image(part); +} + + +void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + + find_replace(labelpath, "raw", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + if(count > num_boxes) count = num_boxes; + float x,y,w,h; + int id; + int i; + int sub = 0; + + for (i = 0; i < count; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if ((w < .001 || h < .001)) { + ++sub; + continue; + } + + truth[(i-sub)*5+0] = x; + truth[(i-sub)*5+1] = y; + truth[(i-sub)*5+2] = w; + truth[(i-sub)*5+3] = h; + truth[(i-sub)*5+4] = id; + } + free(boxes); +} + +#define NUMCHARS 37 + +void print_letters(float *pred, int n) +{ + int i; + for(i = 0; i < n; ++i){ + int index = max_index(pred+i*NUMCHARS, NUMCHARS); + printf("%c", int_to_alphanum(index)); + } + printf("\n"); +} + +void fill_truth_captcha(char *path, int n, float *truth) +{ + char *begin = strrchr(path, '/'); + ++begin; + int i; + for(i = 0; i < strlen(begin) && i < n && begin[i] != '.'; ++i){ + int index = alphanum_to_int(begin[i]); + if(index > 35) printf("Bad %c\n", begin[i]); + truth[i*NUMCHARS+index] = 1; + } + for(;i < n; ++i){ + truth[i*NUMCHARS + NUMCHARS-1] = 1; + } +} + +data load_data_captcha(char **paths, int n, int m, int k, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = make_matrix(n, k*NUMCHARS); + int i; + for(i = 0; i < n; ++i){ + fill_truth_captcha(paths[i], k, d.y.vals[i]); + } + if(m) free(paths); + return d; +} + +data load_data_captcha_encode(char **paths, int n, int m, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.X.cols = 17100; + d.y = d.X; + if(m) free(paths); + return d; +} + +void fill_truth(char *path, char **labels, int k, float *truth) +{ + int i; + memset(truth, 0, k*sizeof(float)); + int count = 0; + for(i = 0; i < k; ++i){ + if(strstr(path, labels[i])){ + truth[i] = 1; + ++count; + //printf("%s %s %d\n", path, labels[i], i); + } + } + if(count != 1 && (k != 1 || count != 0)) printf("Too many or too few labels: %d, %s\n", count, path); +} + +void fill_hierarchy(float *truth, int k, tree *hierarchy) +{ + int j; + for(j = 0; j < k; ++j){ + if(truth[j]){ + int parent = hierarchy->parent[j]; + while(parent >= 0){ + truth[parent] = 1; + parent = hierarchy->parent[parent]; + } + } + } + int i; + int count = 0; + for(j = 0; j < hierarchy->groups; ++j){ + //printf("%d\n", count); + int mask = 1; + for(i = 0; i < hierarchy->group_size[j]; ++i){ + if(truth[count + i]){ + mask = 0; + break; + } + } + if (mask) { + for(i = 0; i < hierarchy->group_size[j]; ++i){ + truth[count + i] = SECRET_NUM; + } + } + count += hierarchy->group_size[j]; + } +} + +matrix load_regression_labels_paths(char **paths, int n, int k) +{ + matrix y = make_matrix(n, k); + int i,j; + for(i = 0; i < n; ++i){ + char labelpath[4096]; + find_replace(paths[i], "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".BMP", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPeG", ".txt", labelpath); + find_replace(labelpath, ".Jpeg", ".txt", labelpath); + find_replace(labelpath, ".PNG", ".txt", labelpath); + find_replace(labelpath, ".TIF", ".txt", labelpath); + find_replace(labelpath, ".bmp", ".txt", labelpath); + find_replace(labelpath, ".jpeg", ".txt", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".tif", ".txt", labelpath); + + FILE *file = fopen(labelpath, "r"); + for(j = 0; j < k; ++j){ + fscanf(file, "%f", &(y.vals[i][j])); + } + fclose(file); + } + return y; +} + +matrix load_labels_paths(char **paths, int n, char **labels, int k, tree *hierarchy) +{ + matrix y = make_matrix(n, k); + int i; + for(i = 0; i < n && labels; ++i){ + fill_truth(paths[i], labels, k, y.vals[i]); + if(hierarchy){ + fill_hierarchy(y.vals[i], k, hierarchy); + } + } + return y; +} + +matrix load_tags_paths(char **paths, int n, int k) +{ + matrix y = make_matrix(n, k); + int i; + //int count = 0; + for(i = 0; i < n; ++i){ + char label[4096]; + find_replace(paths[i], "images", "labels", label); + find_replace(label, ".jpg", ".txt", label); + FILE *file = fopen(label, "r"); + if (!file) continue; + //++count; + int tag; + while(fscanf(file, "%d", &tag) == 1){ + if(tag < k){ + y.vals[i][tag] = 1; + } + } + fclose(file); + } + //printf("%d/%d\n", count, n); + return y; +} + +char **get_labels(char *filename) +{ + list *plist = get_paths(filename); + char **labels = (char **)list_to_array(plist); + free_list(plist); + return labels; +} + +void free_data(data d) +{ + if(!d.shallow){ + free_matrix(d.X); + free_matrix(d.y); + }else{ + free(d.X.vals); + free(d.y.vals); + } +} + +image get_segmentation_image(char *path, int w, int h, int classes) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + image mask = make_image(w, h, classes); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + image part = make_image(w, h, 1); + while(fscanf(file, "%d %s", &id, buff) == 2){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + or_image(part, mask, id); + free(rle); + } + //exclusive_image(mask); + fclose(file); + free_image(part); + return mask; +} + +image get_segmentation_image2(char *path, int w, int h, int classes) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + image mask = make_image(w, h, classes+1); + int i; + for(i = 0; i < w*h; ++i){ + mask.data[w*h*classes + i] = 1; + } + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + image part = make_image(w, h, 1); + while(fscanf(file, "%d %s", &id, buff) == 2){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + or_image(part, mask, id); + for(i = 0; i < w*h; ++i){ + if(part.data[i]) mask.data[w*h*classes + i] = 0; + } + free(rle); + } + //exclusive_image(mask); + fclose(file); + free_image(part); + return mask; +} + +data load_data_seg(int n, char **paths, int m, int w, int h, int classes, int min, int max, float angle, float aspect, float hue, float saturation, float exposure, int div) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + + d.y.rows = n; + d.y.cols = h*w*classes/div/div; + d.y.vals = calloc(d.X.rows, sizeof(float*)); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + + image mask = get_segmentation_image(random_paths[i], orig.w, orig.h, classes); + //image mask = make_image(orig.w, orig.h, classes+1); + image sized_m = rotate_crop_image(mask, a.rad, a.scale/div, a.w/div, a.h/div, a.dx/div, a.dy/div, a.aspect); + + if(flip) flip_image(sized_m); + d.y.vals[i] = sized_m.data; + + free_image(orig); + free_image(mask); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_iseg(int n, char **paths, int m, int w, int h, int classes, int boxes, int div, int min, int max, float angle, float aspect, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, (((w/div)*(h/div))+1)*boxes); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + //show_image(sized, "image"); + + fill_truth_iseg(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, w/div, h/div); + + free_image(orig); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_mask(int n, char **paths, int m, int w, int h, int classes, int boxes, int coords, int min, int max, float angle, float aspect, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, (coords+1)*boxes); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + //show_image(sized, "image"); + + fill_truth_mask(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, 14, 14); + + free_image(orig); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + + int k = size*size*(5+classes); + d.y = make_matrix(n, k); + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + + int oh = orig.h; + int ow = orig.w; + + int dw = (ow*jitter); + int dh = (oh*jitter); + + int pleft = rand_uniform(-dw, dw); + int pright = rand_uniform(-dw, dw); + int ptop = rand_uniform(-dh, dh); + int pbot = rand_uniform(-dh, dh); + + int swidth = ow - pleft - pright; + int sheight = oh - ptop - pbot; + + float sx = (float)swidth / ow; + float sy = (float)sheight / oh; + + int flip = rand()%2; + image cropped = crop_image(orig, pleft, ptop, swidth, sheight); + + float dx = ((float)pleft/ow)/sx; + float dy = ((float)ptop /oh)/sy; + + image sized = resize_image(cropped, w, h); + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + + fill_truth_region(random_paths[i], d.y.vals[i], classes, size, flip, dx, dy, 1./sx, 1./sy); + + free_image(orig); + free_image(cropped); + } + free(random_paths); + return d; +} + +data load_data_compare(int n, char **paths, int m, int classes, int w, int h) +{ + if(m) paths = get_random_paths(paths, 2*n, m); + int i,j; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*6; + + int k = 2*(classes); + d.y = make_matrix(n, k); + for(i = 0; i < n; ++i){ + image im1 = load_image_color(paths[i*2], w, h); + image im2 = load_image_color(paths[i*2+1], w, h); + + d.X.vals[i] = calloc(d.X.cols, sizeof(float)); + memcpy(d.X.vals[i], im1.data, h*w*3*sizeof(float)); + memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float)); + + int id; + float iou; + + char imlabel1[4096]; + char imlabel2[4096]; + find_replace(paths[i*2], "imgs", "labels", imlabel1); + find_replace(imlabel1, "jpg", "txt", imlabel1); + FILE *fp1 = fopen(imlabel1, "r"); + + while(fscanf(fp1, "%d %f", &id, &iou) == 2){ + if (d.y.vals[i][2*id] < iou) d.y.vals[i][2*id] = iou; + } + + find_replace(paths[i*2+1], "imgs", "labels", imlabel2); + find_replace(imlabel2, "jpg", "txt", imlabel2); + FILE *fp2 = fopen(imlabel2, "r"); + + while(fscanf(fp2, "%d %f", &id, &iou) == 2){ + if (d.y.vals[i][2*id + 1] < iou) d.y.vals[i][2*id + 1] = iou; + } + + for (j = 0; j < classes; ++j){ + if (d.y.vals[i][2*j] > .5 && d.y.vals[i][2*j+1] < .5){ + d.y.vals[i][2*j] = 1; + d.y.vals[i][2*j+1] = 0; + } else if (d.y.vals[i][2*j] < .5 && d.y.vals[i][2*j+1] > .5){ + d.y.vals[i][2*j] = 0; + d.y.vals[i][2*j+1] = 1; + } else { + d.y.vals[i][2*j] = SECRET_NUM; + d.y.vals[i][2*j+1] = SECRET_NUM; + } + } + fclose(fp1); + fclose(fp2); + + free_image(im1); + free_image(im2); + } + if(m) free(paths); + return d; +} + +data load_data_swag(char **paths, int n, int classes, float jitter) +{ + int index = rand()%n; + char *random_path = paths[index]; + + image orig = load_image_color(random_path, 0, 0); + int h = orig.h; + int w = orig.w; + + data d = {0}; + d.shallow = 0; + d.w = w; + d.h = h; + + d.X.rows = 1; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + int k = (4+classes)*90; + d.y = make_matrix(1, k); + + int dw = w*jitter; + int dh = h*jitter; + + int pleft = rand_uniform(-dw, dw); + int pright = rand_uniform(-dw, dw); + int ptop = rand_uniform(-dh, dh); + int pbot = rand_uniform(-dh, dh); + + int swidth = w - pleft - pright; + int sheight = h - ptop - pbot; + + float sx = (float)swidth / w; + float sy = (float)sheight / h; + + int flip = rand()%2; + image cropped = crop_image(orig, pleft, ptop, swidth, sheight); + + float dx = ((float)pleft/w)/sx; + float dy = ((float)ptop /h)/sy; + + image sized = resize_image(cropped, w, h); + if(flip) flip_image(sized); + d.X.vals[0] = sized.data; + + fill_truth_swag(random_path, d.y.vals[0], classes, flip, dx, dy, 1./sx, 1./sy); + + free_image(orig); + free_image(cropped); + + return d; +} + +data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, 5*boxes); + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + image sized = make_image(w, h, orig.c); + fill_image(sized, .5); + + float dw = jitter * orig.w; + float dh = jitter * orig.h; + + float new_ar = (orig.w + rand_uniform(-dw, dw)) / (orig.h + rand_uniform(-dh, dh)); + //float scale = rand_uniform(.25, 2); + float scale = 1; + + float nw, nh; + + if(new_ar < 1){ + nh = scale * h; + nw = nh * new_ar; + } else { + nw = scale * w; + nh = nw / new_ar; + } + + float dx = rand_uniform(0, w - nw); + float dy = rand_uniform(0, h - nh); + + place_image(orig, nw, nh, dx, dy, sized); + + random_distort_image(sized, hue, saturation, exposure); + + int flip = rand()%2; + if(flip) flip_image(sized); + d.X.vals[i] = sized.data; + + + fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, -dx/w, -dy/h, nw/w, nh/h); + + free_image(orig); + } + free(random_paths); + return d; +} + +void *load_thread(void *ptr) +{ + //printf("Loading data: %d\n", rand()); + load_args a = *(struct load_args*)ptr; + if(a.exposure == 0) a.exposure = 1; + if(a.saturation == 0) a.saturation = 1; + if(a.aspect == 0) a.aspect = 1; + + if (a.type == OLD_CLASSIFICATION_DATA){ + *a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h); + } else if (a.type == REGRESSION_DATA){ + *a.d = load_data_regression(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == CLASSIFICATION_DATA){ + *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.center); + } else if (a.type == SUPER_DATA){ + *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale); + } else if (a.type == WRITING_DATA){ + *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h); + } else if (a.type == ISEG_DATA){ + *a.d = load_data_iseg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.scale, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == INSTANCE_DATA){ + *a.d = load_data_mask(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.coords, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == SEGMENTATION_DATA){ + *a.d = load_data_seg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.scale); + } else if (a.type == REGION_DATA){ + *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); + } else if (a.type == DETECTION_DATA){ + *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); + } else if (a.type == SWAG_DATA){ + *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter); + } else if (a.type == COMPARE_DATA){ + *a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h); + } else if (a.type == IMAGE_DATA){ + *(a.im) = load_image_color(a.path, 0, 0); + *(a.resized) = resize_image(*(a.im), a.w, a.h); + } else if (a.type == LETTERBOX_DATA){ + *(a.im) = load_image_color(a.path, 0, 0); + *(a.resized) = letterbox_image(*(a.im), a.w, a.h); + } else if (a.type == TAG_DATA){ + *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } + free(ptr); + return 0; +} + +pthread_t load_data_in_thread(load_args args) +{ + pthread_t thread; + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + if(pthread_create(&thread, 0, load_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void *load_threads(void *ptr) +{ + int i; + load_args args = *(load_args *)ptr; + if (args.threads == 0) args.threads = 1; + data *out = args.d; + int total = args.n; + free(ptr); + data *buffers = calloc(args.threads, sizeof(data)); + pthread_t *threads = calloc(args.threads, sizeof(pthread_t)); + for(i = 0; i < args.threads; ++i){ + args.d = buffers + i; + args.n = (i+1) * total/args.threads - i * total/args.threads; + threads[i] = load_data_in_thread(args); + } + for(i = 0; i < args.threads; ++i){ + pthread_join(threads[i], 0); + } + *out = concat_datas(buffers, args.threads); + out->shallow = 0; + for(i = 0; i < args.threads; ++i){ + buffers[i].shallow = 1; + free_data(buffers[i]); + } + free(buffers); + free(threads); + return 0; +} + +void load_data_blocking(load_args args) +{ + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + load_thread(ptr); +} + +pthread_t load_data(load_args args) +{ + pthread_t thread; + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed"); + return thread; +} + +data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h) +{ + if(m) paths = get_random_paths(paths, n, m); + char **replace_paths = find_replace_paths(paths, n, ".png", "-label.png"); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = load_image_paths_gray(replace_paths, n, out_w, out_h); + if(m) free(paths); + int i; + for(i = 0; i < n; ++i) free(replace_paths[i]); + free(replace_paths); + return d; +} + +data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = load_labels_paths(paths, n, labels, k, 0); + if(m) free(paths); + return d; +} + +/* + data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) + { + data d = {0}; + d.indexes = calloc(n, sizeof(int)); + if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes); + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure); + d.y = load_labels_paths(paths, n, labels, k); + if(m) free(paths); + return d; + } + */ + +data load_data_super(char **paths, int n, int m, int w, int h, int scale) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + + int i; + d.X.rows = n; + d.X.vals = calloc(n, sizeof(float*)); + d.X.cols = w*h*3; + + d.y.rows = n; + d.y.vals = calloc(n, sizeof(float*)); + d.y.cols = w*scale * h*scale * 3; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], 0, 0); + image crop = random_crop_image(im, w*scale, h*scale); + int flip = rand()%2; + if (flip) flip_image(crop); + image resize = resize_image(crop, w, h); + d.X.vals[i] = resize.data; + d.y.vals[i] = crop.data; + free_image(im); + } + + if(m) free(paths); + return d; +} + +data load_data_regression(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, 0); + d.y = load_regression_labels_paths(paths, n, k); + if(m) free(paths); + return d; +} + +data select_data(data *orig, int *inds) +{ + data d = {0}; + d.shallow = 1; + d.w = orig[0].w; + d.h = orig[0].h; + + d.X.rows = orig[0].X.rows; + d.y.rows = orig[0].X.rows; + + d.X.cols = orig[0].X.cols; + d.y.cols = orig[0].y.cols; + + d.X.vals = calloc(orig[0].X.rows, sizeof(float *)); + d.y.vals = calloc(orig[0].y.rows, sizeof(float *)); + int i; + for(i = 0; i < d.X.rows; ++i){ + d.X.vals[i] = orig[inds[i]].X.vals[i]; + d.y.vals[i] = orig[inds[i]].y.vals[i]; + } + return d; +} + +data *tile_data(data orig, int divs, int size) +{ + data *ds = calloc(divs*divs, sizeof(data)); + int i, j; +#pragma omp parallel for + for(i = 0; i < divs*divs; ++i){ + data d; + d.shallow = 0; + d.w = orig.w/divs * size; + d.h = orig.h/divs * size; + d.X.rows = orig.X.rows; + d.X.cols = d.w*d.h*3; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + + d.y = copy_matrix(orig.y); +#pragma omp parallel for + for(j = 0; j < orig.X.rows; ++j){ + int x = (i%divs) * orig.w / divs - (d.w - orig.w/divs)/2; + int y = (i/divs) * orig.h / divs - (d.h - orig.h/divs)/2; + image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[j]); + d.X.vals[j] = crop_image(im, x, y, d.w, d.h).data; + } + ds[i] = d; + } + return ds; +} + +data resize_data(data orig, int w, int h) +{ + data d = {0}; + d.shallow = 0; + d.w = w; + d.h = h; + int i; + d.X.rows = orig.X.rows; + d.X.cols = w*h*3; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + + d.y = copy_matrix(orig.y); +#pragma omp parallel for + for(i = 0; i < orig.X.rows; ++i){ + image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[i]); + d.X.vals[i] = resize_image(im, w, h).data; + } + return d; +} + +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.w=size; + d.h=size; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, center); + d.y = load_labels_paths(paths, n, labels, k, hierarchy); + if(m) free(paths); + return d; +} + +data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.w = size; + d.h = size; + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, 0); + d.y = load_tags_paths(paths, n, k); + if(m) free(paths); + return d; +} + +matrix concat_matrix(matrix m1, matrix m2) +{ + int i, count = 0; + matrix m; + m.cols = m1.cols; + m.rows = m1.rows+m2.rows; + m.vals = calloc(m1.rows + m2.rows, sizeof(float*)); + for(i = 0; i < m1.rows; ++i){ + m.vals[count++] = m1.vals[i]; + } + for(i = 0; i < m2.rows; ++i){ + m.vals[count++] = m2.vals[i]; + } + return m; +} + +data concat_data(data d1, data d2) +{ + data d = {0}; + d.shallow = 1; + d.X = concat_matrix(d1.X, d2.X); + d.y = concat_matrix(d1.y, d2.y); + d.w = d1.w; + d.h = d1.h; + return d; +} + +data concat_datas(data *d, int n) +{ + int i; + data out = {0}; + for(i = 0; i < n; ++i){ + data new = concat_data(d[i], out); + free_data(out); + out = new; + } + return out; +} + +data load_categorical_data_csv(char *filename, int target, int k) +{ + data d = {0}; + d.shallow = 0; + matrix X = csv_to_matrix(filename); + float *truth_1d = pop_column(&X, target); + float **truth = one_hot_encode(truth_1d, X.rows, k); + matrix y; + y.rows = X.rows; + y.cols = k; + y.vals = truth; + d.X = X; + d.y = y; + free(truth_1d); + return d; +} + +data load_cifar10_data(char *filename) +{ + data d = {0}; + d.shallow = 0; + long i,j; + matrix X = make_matrix(10000, 3072); + matrix y = make_matrix(10000, 10); + d.X = X; + d.y = y; + + FILE *fp = fopen(filename, "rb"); + if(!fp) file_error(filename); + for(i = 0; i < 10000; ++i){ + unsigned char bytes[3073]; + fread(bytes, 1, 3073, fp); + int class = bytes[0]; + y.vals[i][class] = 1; + for(j = 0; j < X.cols; ++j){ + X.vals[i][j] = (double)bytes[j+1]; + } + } + scale_data_rows(d, 1./255); + //normalize_data_rows(d); + fclose(fp); + return d; +} + +void get_random_batch(data d, int n, float *X, float *y) +{ + int j; + for(j = 0; j < n; ++j){ + int index = rand()%d.X.rows; + memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float)); + memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float)); + } +} + +void get_next_batch(data d, int n, int offset, float *X, float *y) +{ + int j; + for(j = 0; j < n; ++j){ + int index = offset + j; + memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float)); + if(y) memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float)); + } +} + +void smooth_data(data d) +{ + int i, j; + float scale = 1. / d.y.cols; + float eps = .1; + for(i = 0; i < d.y.rows; ++i){ + for(j = 0; j < d.y.cols; ++j){ + d.y.vals[i][j] = eps * scale + (1-eps) * d.y.vals[i][j]; + } + } +} + +data load_all_cifar10() +{ + data d = {0}; + d.shallow = 0; + int i,j,b; + matrix X = make_matrix(50000, 3072); + matrix y = make_matrix(50000, 10); + d.X = X; + d.y = y; + + + for(b = 0; b < 5; ++b){ + char buff[256]; + sprintf(buff, "data/cifar/cifar-10-batches-bin/data_batch_%d.bin", b+1); + FILE *fp = fopen(buff, "rb"); + if(!fp) file_error(buff); + for(i = 0; i < 10000; ++i){ + unsigned char bytes[3073]; + fread(bytes, 1, 3073, fp); + int class = bytes[0]; + y.vals[i+b*10000][class] = 1; + for(j = 0; j < X.cols; ++j){ + X.vals[i+b*10000][j] = (double)bytes[j+1]; + } + } + fclose(fp); + } + //normalize_data_rows(d); + scale_data_rows(d, 1./255); + smooth_data(d); + return d; +} + +data load_go(char *filename) +{ + FILE *fp = fopen(filename, "rb"); + matrix X = make_matrix(3363059, 361); + matrix y = make_matrix(3363059, 361); + int row, col; + + if(!fp) file_error(filename); + char *label; + int count = 0; + while((label = fgetl(fp))){ + int i; + if(count == X.rows){ + X = resize_matrix(X, count*2); + y = resize_matrix(y, count*2); + } + sscanf(label, "%d %d", &row, &col); + char *board = fgetl(fp); + + int index = row*19 + col; + y.vals[count][index] = 1; + + for(i = 0; i < 19*19; ++i){ + float val = 0; + if(board[i] == '1') val = 1; + else if(board[i] == '2') val = -1; + X.vals[count][i] = val; + } + ++count; + free(label); + free(board); + } + X = resize_matrix(X, count); + y = resize_matrix(y, count); + + data d = {0}; + d.shallow = 0; + d.X = X; + d.y = y; + + + fclose(fp); + + return d; +} + + +void randomize_data(data d) +{ + int i; + for(i = d.X.rows-1; i > 0; --i){ + int index = rand()%i; + float *swap = d.X.vals[index]; + d.X.vals[index] = d.X.vals[i]; + d.X.vals[i] = swap; + + swap = d.y.vals[index]; + d.y.vals[index] = d.y.vals[i]; + d.y.vals[i] = swap; + } +} + +void scale_data_rows(data d, float s) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + scale_array(d.X.vals[i], d.X.cols, s); + } +} + +void translate_data_rows(data d, float s) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + translate_array(d.X.vals[i], d.X.cols, s); + } +} + +data copy_data(data d) +{ + data c = {0}; + c.w = d.w; + c.h = d.h; + c.shallow = 0; + c.num_boxes = d.num_boxes; + c.boxes = d.boxes; + c.X = copy_matrix(d.X); + c.y = copy_matrix(d.y); + return c; +} + +void normalize_data_rows(data d) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + normalize_array(d.X.vals[i], d.X.cols); + } +} + +data get_data_part(data d, int part, int total) +{ + data p = {0}; + p.shallow = 1; + p.X.rows = d.X.rows * (part + 1) / total - d.X.rows * part / total; + p.y.rows = d.y.rows * (part + 1) / total - d.y.rows * part / total; + p.X.cols = d.X.cols; + p.y.cols = d.y.cols; + p.X.vals = d.X.vals + d.X.rows * part / total; + p.y.vals = d.y.vals + d.y.rows * part / total; + return p; +} + +data get_random_data(data d, int num) +{ + data r = {0}; + r.shallow = 1; + + r.X.rows = num; + r.y.rows = num; + + r.X.cols = d.X.cols; + r.y.cols = d.y.cols; + + r.X.vals = calloc(num, sizeof(float *)); + r.y.vals = calloc(num, sizeof(float *)); + + int i; + for(i = 0; i < num; ++i){ + int index = rand()%d.X.rows; + r.X.vals[i] = d.X.vals[index]; + r.y.vals[i] = d.y.vals[index]; + } + return r; +} + +data *split_data(data d, int part, int total) +{ + data *split = calloc(2, sizeof(data)); + int i; + int start = part*d.X.rows/total; + int end = (part+1)*d.X.rows/total; + data train; + data test; + train.shallow = test.shallow = 1; + + test.X.rows = test.y.rows = end-start; + train.X.rows = train.y.rows = d.X.rows - (end-start); + train.X.cols = test.X.cols = d.X.cols; + train.y.cols = test.y.cols = d.y.cols; + + train.X.vals = calloc(train.X.rows, sizeof(float*)); + test.X.vals = calloc(test.X.rows, sizeof(float*)); + train.y.vals = calloc(train.y.rows, sizeof(float*)); + test.y.vals = calloc(test.y.rows, sizeof(float*)); + + for(i = 0; i < start; ++i){ + train.X.vals[i] = d.X.vals[i]; + train.y.vals[i] = d.y.vals[i]; + } + for(i = start; i < end; ++i){ + test.X.vals[i-start] = d.X.vals[i]; + test.y.vals[i-start] = d.y.vals[i]; + } + for(i = end; i < d.X.rows; ++i){ + train.X.vals[i-(end-start)] = d.X.vals[i]; + train.y.vals[i-(end-start)] = d.y.vals[i]; + } + split[0] = train; + split[1] = test; + return split; +} + diff --git a/workloads/realworld/async/darknet/src/data.h b/workloads/realworld/async/darknet/src/data.h new file mode 100644 index 0000000000000000000000000000000000000000..781906f8743c7d88c0fa134403d0ae020b544053 --- /dev/null +++ b/workloads/realworld/async/darknet/src/data.h @@ -0,0 +1,50 @@ +#ifndef DATA_H +#define DATA_H +#include + +#include "darknet.h" +#include "matrix.h" +#include "list.h" +#include "image.h" +#include "tree.h" + +static inline float distance_from_edge(int x, int max) +{ + int dx = (max/2) - x; + if (dx < 0) dx = -dx; + dx = (max/2) + 1 - dx; + dx *= 2; + float dist = (float)dx/max; + if (dist > 1) dist = 1; + return dist; +} +void load_data_blocking(load_args args); + + +void print_letters(float *pred, int n); +data load_data_captcha(char **paths, int n, int m, int k, int w, int h); +data load_data_captcha_encode(char **paths, int n, int m, int w, int h); +data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure); +data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); +matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); +data load_data_super(char **paths, int n, int m, int w, int h, int scale); +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); +data load_data_regression(char **paths, int n, int m, int classes, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); +data load_go(char *filename); + + +data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h); + +void get_random_batch(data d, int n, float *X, float *y); +data get_data_part(data d, int part, int total); +data get_random_data(data d, int num); +data load_categorical_data_csv(char *filename, int target, int k); +void normalize_data_rows(data d); +void scale_data_rows(data d, float s); +void translate_data_rows(data d, float s); +void randomize_data(data d); +data *split_data(data d, int part, int total); +data concat_datas(data *d, int n); +void fill_truth(char *path, char **labels, int k, float *truth); + +#endif diff --git a/workloads/realworld/async/darknet/src/deconvolutional_kernels.cu b/workloads/realworld/async/darknet/src/deconvolutional_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..ed12e7a3dc5148b1cbff746f13901a9653bc0f6d --- /dev/null +++ b/workloads/realworld/async/darknet/src/deconvolutional_kernels.cu @@ -0,0 +1,139 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "convolutional_layer.h" +#include "deconvolutional_layer.h" +#include "batchnorm_layer.h" +#include "gemm.h" +#include "blas.h" +#include "im2col.h" +#include "col2im.h" +#include "utils.h" +#include "cuda_dark.h" +} + +extern "C" void forward_deconvolutional_layer_gpu(layer l, network net) +{ + int i; + + int m = l.size*l.size*l.n; + int n = l.h*l.w; + int k = l.c; + + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + + for(i = 0; i < l.batch; ++i){ + float *a = l.weights_gpu; + float *b = net.input_gpu + i*l.c*l.h*l.w; + float *c = net.workspace; + + gemm_gpu(1,0,m,n,k,1,a,m,b,n,0,c,n); + + col2im_gpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output_gpu+i*l.outputs); + } + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); + } + activate_array_gpu(l.output_gpu, l.batch*l.n*l.out_w*l.out_h, l.activation); +} + +extern "C" void backward_deconvolutional_layer_gpu(layer l, network net) +{ + int i; + + //constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); + } + + //if(net.delta_gpu) memset(net.delta_gpu, 0, l.batch*l.h*l.w*l.c*sizeof(float)); + + for(i = 0; i < l.batch; ++i){ + int m = l.c; + int n = l.size*l.size*l.n; + int k = l.h*l.w; + + float *a = net.input_gpu + i*m*k; + float *b = net.workspace; + float *c = l.weight_updates_gpu; + + im2col_gpu(l.delta_gpu + i*l.outputs, l.out_c, l.out_h, l.out_w, + l.size, l.stride, l.pad, b); + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if(net.delta_gpu){ + int m = l.c; + int n = l.h*l.w; + int k = l.size*l.size*l.n; + + float *a = l.weights_gpu; + float *b = net.workspace; + float *c = net.delta_gpu + i*n*m; + + gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +} + +extern "C" void pull_deconvolutional_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size); + cuda_pull_array(l.biases_gpu, l.biases, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.n); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +extern "C" void push_deconvolutional_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size); + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.n); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void update_deconvolutional_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + } + }else{ + axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1); + + axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.n, momentum, l.bias_updates_gpu, 1); + + if(l.scales_gpu){ + axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.n, momentum, l.scale_updates_gpu, 1); + } + } +} + diff --git a/workloads/realworld/async/darknet/src/deconvolutional_layer.c b/workloads/realworld/async/darknet/src/deconvolutional_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..00c0e85771d42f99de969f9fd03e5f0f359d405c --- /dev/null +++ b/workloads/realworld/async/darknet/src/deconvolutional_layer.c @@ -0,0 +1,312 @@ +#include "deconvolutional_layer.h" +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "utils.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" + +#include +#include + + +static size_t get_workspace_size(layer l){ + return (size_t)l.h*l.w*l.size*l.size*l.n*sizeof(float); +} + +void bilinear_init(layer l) +{ + int i,j,f; + float center = (l.size-1) / 2.; + for(f = 0; f < l.n; ++f){ + for(j = 0; j < l.size; ++j){ + for(i = 0; i < l.size; ++i){ + float val = (1 - fabs(i - center)) * (1 - fabs(j - center)); + int c = f%l.c; + int ind = f*l.size*l.size*l.c + c*l.size*l.size + j*l.size + i; + l.weights[ind] = val; + } + } + } +} + + +layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam) +{ + int i; + layer l = {0}; + l.type = DECONVOLUTIONAL; + + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.batch = batch; + l.stride = stride; + l.size = size; + + l.nweights = c*n*size*size; + l.nbiases = n; + + l.weights = calloc(c*n*size*size, sizeof(float)); + l.weight_updates = calloc(c*n*size*size, sizeof(float)); + + l.biases = calloc(n, sizeof(float)); + l.bias_updates = calloc(n, sizeof(float)); + //float scale = n/(size*size*c); + //printf("scale: %f\n", scale); + float scale = .02; + for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal(); + //bilinear_init(l); + for(i = 0; i < n; ++i){ + l.biases[i] = 0; + } + l.pad = padding; + + l.out_h = (l.h - 1) * l.stride + l.size - 2*l.pad; + l.out_w = (l.w - 1) * l.stride + l.size - 2*l.pad; + l.out_c = n; + l.outputs = l.out_w * l.out_h * l.out_c; + l.inputs = l.w * l.h * l.c; + + scal_cpu(l.nweights, (float)l.out_w*l.out_h/(l.w*l.h), l.weights, 1); + + l.output = calloc(l.batch*l.outputs, sizeof(float)); + l.delta = calloc(l.batch*l.outputs, sizeof(float)); + + l.forward = forward_deconvolutional_layer; + l.backward = backward_deconvolutional_layer; + l.update = update_deconvolutional_layer; + + l.batch_normalize = batch_normalize; + + if(batch_normalize){ + l.scales = calloc(n, sizeof(float)); + l.scale_updates = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(n, sizeof(float)); + l.variance = calloc(n, sizeof(float)); + + l.mean_delta = calloc(n, sizeof(float)); + l.variance_delta = calloc(n, sizeof(float)); + + l.rolling_mean = calloc(n, sizeof(float)); + l.rolling_variance = calloc(n, sizeof(float)); + l.x = calloc(l.batch*l.outputs, sizeof(float)); + l.x_norm = calloc(l.batch*l.outputs, sizeof(float)); + } + if(adam){ + l.m = calloc(c*n*size*size, sizeof(float)); + l.v = calloc(c*n*size*size, sizeof(float)); + l.bias_m = calloc(n, sizeof(float)); + l.scale_m = calloc(n, sizeof(float)); + l.bias_v = calloc(n, sizeof(float)); + l.scale_v = calloc(n, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_deconvolutional_layer_gpu; + l.backward_gpu = backward_deconvolutional_layer_gpu; + l.update_gpu = update_deconvolutional_layer_gpu; + + if(gpu_index >= 0){ + + if (adam) { + l.m_gpu = cuda_make_array(l.m, c*n*size*size); + l.v_gpu = cuda_make_array(l.v, c*n*size*size); + l.bias_m_gpu = cuda_make_array(l.bias_m, n); + l.bias_v_gpu = cuda_make_array(l.bias_v, n); + l.scale_m_gpu = cuda_make_array(l.scale_m, n); + l.scale_v_gpu = cuda_make_array(l.scale_v, n); + } + l.weights_gpu = cuda_make_array(l.weights, c*n*size*size); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size); + + l.biases_gpu = cuda_make_array(l.biases, n); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*l.out_h*l.out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n); + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(0, n); + l.variance_gpu = cuda_make_array(0, n); + + l.rolling_mean_gpu = cuda_make_array(0, n); + l.rolling_variance_gpu = cuda_make_array(0, n); + + l.mean_delta_gpu = cuda_make_array(0, n); + l.variance_delta_gpu = cuda_make_array(0, n); + + l.scales_gpu = cuda_make_array(l.scales, n); + l.scale_updates_gpu = cuda_make_array(0, n); + + l.x_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n); + l.x_norm_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n); + } + } + #ifdef CUDNN + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); + #endif +#endif + + l.activation = activation; + l.workspace_size = get_workspace_size(l); + + fprintf(stderr, "deconv%5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); + + return l; +} + +void denormalize_deconvolutional_layer(layer l) +{ + int i, j; + for(i = 0; i < l.n; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001); + for(j = 0; j < l.c*l.size*l.size; ++j){ + l.weights[i*l.c*l.size*l.size + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + +void resize_deconvolutional_layer(layer *l, int h, int w) +{ + l->h = h; + l->w = w; + l->out_h = (l->h - 1) * l->stride + l->size - 2*l->pad; + l->out_w = (l->w - 1) * l->stride + l->size - 2*l->pad; + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->w * l->h * l->c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + if(l->batch_normalize){ + l->x = realloc(l->x, l->batch*l->outputs*sizeof(float)); + l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float)); + } + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); + + if(l->batch_normalize){ + cuda_free(l->x_gpu); + cuda_free(l->x_norm_gpu); + + l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); + } + #ifdef CUDNN + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + #endif +#endif + l->workspace_size = get_workspace_size(*l); +} + +void forward_deconvolutional_layer(const layer l, network net) +{ + int i; + + int m = l.size*l.size*l.n; + int n = l.h*l.w; + int k = l.c; + + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + + for(i = 0; i < l.batch; ++i){ + float *a = l.weights; + float *b = net.input + i*l.c*l.h*l.w; + float *c = net.workspace; + + gemm_cpu(1,0,m,n,k,1,a,m,b,n,0,c,n); + + col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output+i*l.outputs); + } + if (l.batch_normalize) { + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.n, l.out_w*l.out_h); + } + activate_array(l.output, l.batch*l.n*l.out_w*l.out_h, l.activation); +} + +void backward_deconvolutional_layer(layer l, network net) +{ + int i; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h); + } + + //if(net.delta) memset(net.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float)); + + for(i = 0; i < l.batch; ++i){ + int m = l.c; + int n = l.size*l.size*l.n; + int k = l.h*l.w; + + float *a = net.input + i*m*k; + float *b = net.workspace; + float *c = l.weight_updates; + + im2col_cpu(l.delta + i*l.outputs, l.out_c, l.out_h, l.out_w, + l.size, l.stride, l.pad, b); + gemm_cpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if(net.delta){ + int m = l.c; + int n = l.h*l.w; + int k = l.size*l.size*l.n; + + float *a = l.weights; + float *b = net.workspace; + float *c = net.delta + i*n*m; + + gemm_cpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +} + +void update_deconvolutional_layer(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int size = l.size*l.size*l.c*l.n; + axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.n, momentum, l.bias_updates, 1); + + if(l.scales){ + axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.n, momentum, l.scale_updates, 1); + } + + axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(size, momentum, l.weight_updates, 1); +} + + + diff --git a/workloads/realworld/async/darknet/src/deconvolutional_layer.h b/workloads/realworld/async/darknet/src/deconvolutional_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..722a1a58feec4ef13dac2b811df98e3f9960d4ef --- /dev/null +++ b/workloads/realworld/async/darknet/src/deconvolutional_layer.h @@ -0,0 +1,25 @@ +#ifndef DECONVOLUTIONAL_LAYER_H +#define DECONVOLUTIONAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +#ifdef GPU +void forward_deconvolutional_layer_gpu(layer l, network net); +void backward_deconvolutional_layer_gpu(layer l, network net); +void update_deconvolutional_layer_gpu(layer l, update_args a); +void push_deconvolutional_layer(layer l); +void pull_deconvolutional_layer(layer l); +#endif + +layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam); +void resize_deconvolutional_layer(layer *l, int h, int w); +void forward_deconvolutional_layer(const layer l, network net); +void update_deconvolutional_layer(layer l, update_args a); +void backward_deconvolutional_layer(layer l, network net); + +#endif + diff --git a/workloads/realworld/async/darknet/src/demo.c b/workloads/realworld/async/darknet/src/demo.c new file mode 100644 index 0000000000000000000000000000000000000000..b89efb8dc4c044c0240b7442e39222405409a676 --- /dev/null +++ b/workloads/realworld/async/darknet/src/demo.c @@ -0,0 +1,349 @@ +#include "network.h" +#include "detection_layer.h" +#include "region_layer.h" +#include "cost_layer.h" +#include "utils.h" +#include "parser.h" +#include "box.h" +#include "image.h" +#include "demo.h" +#include + +#define DEMO 1 + +#ifdef OPENCV + +static char **demo_names; +static image **demo_alphabet; +static int demo_classes; + +static network *net; +static image buff [3]; +static image buff_letter[3]; +static int buff_index = 0; +static void * cap; +static float fps = 0; +static float demo_thresh = 0; +static float demo_hier = .5; +static int running = 0; + +static int demo_frame = 3; +static int demo_index = 0; +static float **predictions; +static float *avg; +static int demo_done = 0; +static int demo_total = 0; +double demo_time; + +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num); + +int size_network(network *net) +{ + int i; + int count = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + count += l.outputs; + } + } + return count; +} + +void remember_network(network *net) +{ + int i; + int count = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + memcpy(predictions[demo_index] + count, net->layers[i].output, sizeof(float) * l.outputs); + count += l.outputs; + } + } +} + +detection *avg_predictions(network *net, int *nboxes) +{ + int i, j; + int count = 0; + fill_cpu(demo_total, 0, avg, 1); + for(j = 0; j < demo_frame; ++j){ + axpy_cpu(demo_total, 1./demo_frame, predictions[j], 1, avg, 1); + } + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + memcpy(l.output, avg + count, sizeof(float) * l.outputs); + count += l.outputs; + } + } + detection *dets = get_network_boxes(net, buff[0].w, buff[0].h, demo_thresh, demo_hier, 0, 1, nboxes); + return dets; +} + +void *detect_in_thread(void *ptr) +{ + running = 1; + float nms = .4; + + layer l = net->layers[net->n-1]; + float *X = buff_letter[(buff_index+2)%3].data; + network_predict(net, X); + + /* + if(l.type == DETECTION){ + get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0); + } else */ + remember_network(net); + detection *dets = 0; + int nboxes = 0; + dets = avg_predictions(net, &nboxes); + + + /* + int i,j; + box zero = {0}; + int classes = l.classes; + for(i = 0; i < demo_detections; ++i){ + avg[i].objectness = 0; + avg[i].bbox = zero; + memset(avg[i].prob, 0, classes*sizeof(float)); + for(j = 0; j < demo_frame; ++j){ + axpy_cpu(classes, 1./demo_frame, dets[j][i].prob, 1, avg[i].prob, 1); + avg[i].objectness += dets[j][i].objectness * 1./demo_frame; + avg[i].bbox.x += dets[j][i].bbox.x * 1./demo_frame; + avg[i].bbox.y += dets[j][i].bbox.y * 1./demo_frame; + avg[i].bbox.w += dets[j][i].bbox.w * 1./demo_frame; + avg[i].bbox.h += dets[j][i].bbox.h * 1./demo_frame; + } + //copy_cpu(classes, dets[0][i].prob, 1, avg[i].prob, 1); + //avg[i].objectness = dets[0][i].objectness; + } + */ + + if (nms > 0) do_nms_obj(dets, nboxes, l.classes, nms); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.1f\n",fps); + printf("Objects:\n\n"); + image display = buff[(buff_index+2) % 3]; + draw_detections(display, dets, nboxes, demo_thresh, demo_names, demo_alphabet, demo_classes); + free_detections(dets, nboxes); + + demo_index = (demo_index + 1)%demo_frame; + running = 0; + return 0; +} + +void *fetch_in_thread(void *ptr) +{ + free_image(buff[buff_index]); + buff[buff_index] = get_image_from_stream(cap); + if(buff[buff_index].data == 0) { + demo_done = 1; + return 0; + } + letterbox_image_into(buff[buff_index], net->w, net->h, buff_letter[buff_index]); + return 0; +} + +void *display_in_thread(void *ptr) +{ + int c = show_image(buff[(buff_index + 1)%3], "Demo", 1); + if (c != -1) c = c%256; + if (c == 27) { + demo_done = 1; + return 0; + } else if (c == 82) { + demo_thresh += .02; + } else if (c == 84) { + demo_thresh -= .02; + if(demo_thresh <= .02) demo_thresh = .02; + } else if (c == 83) { + demo_hier += .02; + } else if (c == 81) { + demo_hier -= .02; + if(demo_hier <= .0) demo_hier = .0; + } + return 0; +} + +void *display_loop(void *ptr) +{ + while(1){ + display_in_thread(0); + } +} + +void *detect_loop(void *ptr) +{ + while(1){ + detect_in_thread(0); + } +} + +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) +{ + //demo_frame = avg_frames; + image **alphabet = load_alphabet(); + demo_names = names; + demo_alphabet = alphabet; + demo_classes = classes; + demo_thresh = thresh; + demo_hier = hier; + printf("Demo\n"); + net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + pthread_t detect_thread; + pthread_t fetch_thread; + + srand(2222222); + + int i; + demo_total = size_network(net); + predictions = calloc(demo_frame, sizeof(float*)); + for (i = 0; i < demo_frame; ++i){ + predictions[i] = calloc(demo_total, sizeof(float)); + } + avg = calloc(demo_total, sizeof(float)); + + if(filename){ + printf("video file: %s\n", filename); + cap = open_video_stream(filename, 0, 0, 0, 0); + }else{ + cap = open_video_stream(0, cam_index, w, h, frames); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + + buff[0] = get_image_from_stream(cap); + buff[1] = copy_image(buff[0]); + buff[2] = copy_image(buff[0]); + buff_letter[0] = letterbox_image(buff[0], net->w, net->h); + buff_letter[1] = letterbox_image(buff[0], net->w, net->h); + buff_letter[2] = letterbox_image(buff[0], net->w, net->h); + + int count = 0; + if(!prefix){ + make_window("Demo", 1352, 1013, fullscreen); + } + + demo_time = what_time_is_it_now(); + + while(!demo_done){ + buff_index = (buff_index + 1) %3; + if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); + if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); + if(!prefix){ + fps = 1./(what_time_is_it_now() - demo_time); + demo_time = what_time_is_it_now(); + display_in_thread(0); + }else{ + char name[256]; + sprintf(name, "%s_%08d", prefix, count); + save_image(buff[(buff_index + 1)%3], name); + } + pthread_join(fetch_thread, 0); + pthread_join(detect_thread, 0); + ++count; + } +} + +/* + void demo_compare(char *cfg1, char *weight1, char *cfg2, char *weight2, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) + { + demo_frame = avg_frames; + predictions = calloc(demo_frame, sizeof(float*)); + image **alphabet = load_alphabet(); + demo_names = names; + demo_alphabet = alphabet; + demo_classes = classes; + demo_thresh = thresh; + demo_hier = hier; + printf("Demo\n"); + net = load_network(cfg1, weight1, 0); + set_batch_network(net, 1); + pthread_t detect_thread; + pthread_t fetch_thread; + + srand(2222222); + + if(filename){ + printf("video file: %s\n", filename); + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + if(frames){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FPS, frames); + } + } + + if(!cap) error("Couldn't connect to webcam.\n"); + + layer l = net->layers[net->n-1]; + demo_detections = l.n*l.w*l.h; + int j; + + avg = (float *) calloc(l.outputs, sizeof(float)); + for(j = 0; j < demo_frame; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float)); + + boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box)); + probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *)); + for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes+1, sizeof(float)); + + buff[0] = get_image_from_stream(cap); + buff[1] = copy_image(buff[0]); + buff[2] = copy_image(buff[0]); + buff_letter[0] = letterbox_image(buff[0], net->w, net->h); + buff_letter[1] = letterbox_image(buff[0], net->w, net->h); + buff_letter[2] = letterbox_image(buff[0], net->w, net->h); + ipl = cvCreateImage(cvSize(buff[0].w,buff[0].h), IPL_DEPTH_8U, buff[0].c); + + int count = 0; + if(!prefix){ + cvNamedWindow("Demo", CV_WINDOW_NORMAL); + if(fullscreen){ + cvSetWindowProperty("Demo", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); + } else { + cvMoveWindow("Demo", 0, 0); + cvResizeWindow("Demo", 1352, 1013); + } + } + + demo_time = what_time_is_it_now(); + + while(!demo_done){ +buff_index = (buff_index + 1) %3; +if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); +if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); +if(!prefix){ + fps = 1./(what_time_is_it_now() - demo_time); + demo_time = what_time_is_it_now(); + display_in_thread(0); +}else{ + char name[256]; + sprintf(name, "%s_%08d", prefix, count); + save_image(buff[(buff_index + 1)%3], name); +} +pthread_join(fetch_thread, 0); +pthread_join(detect_thread, 0); +++count; +} +} +*/ +#else +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg, float hier, int w, int h, int frames, int fullscreen) +{ + fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); +} +#endif + diff --git a/workloads/realworld/async/darknet/src/demo.h b/workloads/realworld/async/darknet/src/demo.h new file mode 100644 index 0000000000000000000000000000000000000000..86e46541d1a7473b22373b29bc6ff9cc281d4939 --- /dev/null +++ b/workloads/realworld/async/darknet/src/demo.h @@ -0,0 +1,6 @@ +#ifndef DEMO_H +#define DEMO_H + +#include "image.h" + +#endif diff --git a/workloads/realworld/async/darknet/src/detection_layer.c b/workloads/realworld/async/darknet/src/detection_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..565fa3c3f7d123736d65661d3be8ea91e26b3d5c --- /dev/null +++ b/workloads/realworld/async/darknet/src/detection_layer.c @@ -0,0 +1,275 @@ +#include "detection_layer.h" +#include "activations.h" +#include "softmax_layer.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +detection_layer make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore) +{ + detection_layer l = {0}; + l.type = DETECTION; + + l.n = n; + l.batch = batch; + l.inputs = inputs; + l.classes = classes; + l.coords = coords; + l.rescore = rescore; + l.side = side; + l.w = side; + l.h = side; + assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs); + l.cost = calloc(1, sizeof(float)); + l.outputs = l.inputs; + l.truths = l.side*l.side*(1+l.coords+l.classes); + l.output = calloc(batch*l.outputs, sizeof(float)); + l.delta = calloc(batch*l.outputs, sizeof(float)); + + l.forward = forward_detection_layer; + l.backward = backward_detection_layer; +#ifdef GPU + l.forward_gpu = forward_detection_layer_gpu; + l.backward_gpu = backward_detection_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "Detection Layer\n"); + srand(0); + + return l; +} + +void forward_detection_layer(const detection_layer l, network net) +{ + int locations = l.side*l.side; + int i,j; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + //if(l.reorg) reorg(l.output, l.w*l.h, size*l.n, l.batch, 1); + int b; + if (l.softmax){ + for(b = 0; b < l.batch; ++b){ + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + int offset = i*l.classes; + softmax(l.output + index + offset, l.classes, 1, 1, + l.output + index + offset); + } + } + } + if(net.train){ + float avg_iou = 0; + float avg_cat = 0; + float avg_allcat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + *(l.cost) = 0; + int size = l.inputs * l.batch; + memset(l.delta, 0, size * sizeof(float)); + for (b = 0; b < l.batch; ++b){ + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + int truth_index = (b*locations + i)*(1+l.coords+l.classes); + int is_obj = net.truth[truth_index]; + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + l.delta[p_index] = l.noobject_scale*(0 - l.output[p_index]); + *(l.cost) += l.noobject_scale*pow(l.output[p_index], 2); + avg_anyobj += l.output[p_index]; + } + + int best_index = -1; + float best_iou = 0; + float best_rmse = 20; + + if (!is_obj){ + continue; + } + + int class_index = index + i*l.classes; + for(j = 0; j < l.classes; ++j) { + l.delta[class_index+j] = l.class_scale * (net.truth[truth_index+1+j] - l.output[class_index+j]); + *(l.cost) += l.class_scale * pow(net.truth[truth_index+1+j] - l.output[class_index+j], 2); + if(net.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j]; + avg_allcat += l.output[class_index+j]; + } + + box truth = float_to_box(net.truth + truth_index + 1 + l.classes, 1); + truth.x /= l.side; + truth.y /= l.side; + + for(j = 0; j < l.n; ++j){ + int box_index = index + locations*(l.classes + l.n) + (i*l.n + j) * l.coords; + box out = float_to_box(l.output + box_index, 1); + out.x /= l.side; + out.y /= l.side; + + if (l.sqrt){ + out.w = out.w*out.w; + out.h = out.h*out.h; + } + + float iou = box_iou(out, truth); + //iou = 0; + float rmse = box_rmse(out, truth); + if(best_iou > 0 || iou > 0){ + if(iou > best_iou){ + best_iou = iou; + best_index = j; + } + }else{ + if(rmse < best_rmse){ + best_rmse = rmse; + best_index = j; + } + } + } + + if(l.forced){ + if(truth.w*truth.h < .1){ + best_index = 1; + }else{ + best_index = 0; + } + } + if(l.random && *(net.seen) < 64000){ + best_index = rand()%l.n; + } + + int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords; + int tbox_index = truth_index + 1 + l.classes; + + box out = float_to_box(l.output + box_index, 1); + out.x /= l.side; + out.y /= l.side; + if (l.sqrt) { + out.w = out.w*out.w; + out.h = out.h*out.h; + } + float iou = box_iou(out, truth); + + //printf("%d,", best_index); + int p_index = index + locations*l.classes + i*l.n + best_index; + *(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2); + *(l.cost) += l.object_scale * pow(1-l.output[p_index], 2); + avg_obj += l.output[p_index]; + l.delta[p_index] = l.object_scale * (1.-l.output[p_index]); + + if(l.rescore){ + l.delta[p_index] = l.object_scale * (iou - l.output[p_index]); + } + + l.delta[box_index+0] = l.coord_scale*(net.truth[tbox_index + 0] - l.output[box_index + 0]); + l.delta[box_index+1] = l.coord_scale*(net.truth[tbox_index + 1] - l.output[box_index + 1]); + l.delta[box_index+2] = l.coord_scale*(net.truth[tbox_index + 2] - l.output[box_index + 2]); + l.delta[box_index+3] = l.coord_scale*(net.truth[tbox_index + 3] - l.output[box_index + 3]); + if(l.sqrt){ + l.delta[box_index+2] = l.coord_scale*(sqrt(net.truth[tbox_index + 2]) - l.output[box_index + 2]); + l.delta[box_index+3] = l.coord_scale*(sqrt(net.truth[tbox_index + 3]) - l.output[box_index + 3]); + } + + *(l.cost) += pow(1-iou, 2); + avg_iou += iou; + ++count; + } + } + + if(0){ + float *costs = calloc(l.batch*locations*l.n, sizeof(float)); + for (b = 0; b < l.batch; ++b) { + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + costs[b*locations*l.n + i*l.n + j] = l.delta[p_index]*l.delta[p_index]; + } + } + } + int indexes[100]; + top_k(costs, l.batch*locations*l.n, 100, indexes); + float cutoff = costs[indexes[99]]; + for (b = 0; b < l.batch; ++b) { + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + if (l.delta[p_index]*l.delta[p_index] < cutoff) l.delta[p_index] = 0; + } + } + } + free(costs); + } + + + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + + + printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count); + //if(l.reorg) reorg(l.delta, l.w*l.h, size*l.n, l.batch, 0); + } +} + +void backward_detection_layer(const detection_layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +void get_detection_detections(layer l, int w, int h, float thresh, detection *dets) +{ + int i,j,n; + float *predictions = l.output; + //int per_cell = 5*num+classes; + for (i = 0; i < l.side*l.side; ++i){ + int row = i / l.side; + int col = i % l.side; + for(n = 0; n < l.n; ++n){ + int index = i*l.n + n; + int p_index = l.side*l.side*l.classes + i*l.n + n; + float scale = predictions[p_index]; + int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n)*4; + box b; + b.x = (predictions[box_index + 0] + col) / l.side * w; + b.y = (predictions[box_index + 1] + row) / l.side * h; + b.w = pow(predictions[box_index + 2], (l.sqrt?2:1)) * w; + b.h = pow(predictions[box_index + 3], (l.sqrt?2:1)) * h; + dets[index].bbox = b; + dets[index].objectness = scale; + for(j = 0; j < l.classes; ++j){ + int class_index = i*l.classes; + float prob = scale*predictions[class_index+j]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } + } +} + +#ifdef GPU + +void forward_detection_layer_gpu(const detection_layer l, network net) +{ + if(!net.train){ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + return; + } + + cuda_pull_array(net.input_gpu, net.input, l.batch*l.inputs); + forward_detection_layer(l, net); + cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +void backward_detection_layer_gpu(detection_layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); + //copy_gpu(l.batch*l.inputs, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/async/darknet/src/detection_layer.h b/workloads/realworld/async/darknet/src/detection_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1c818535700c770c7a5d9387534b199b58876198 --- /dev/null +++ b/workloads/realworld/async/darknet/src/detection_layer.h @@ -0,0 +1,18 @@ +#ifndef DETECTION_LAYER_H +#define DETECTION_LAYER_H + +#include "layer.h" +#include "network.h" + +typedef layer detection_layer; + +detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore); +void forward_detection_layer(const detection_layer l, network net); +void backward_detection_layer(const detection_layer l, network net); + +#ifdef GPU +void forward_detection_layer_gpu(const detection_layer l, network net); +void backward_detection_layer_gpu(detection_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/async/darknet/src/dropout_layer.c b/workloads/realworld/async/darknet/src/dropout_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..8fefa22caeddd174b2a7010274fadae854c742c1 --- /dev/null +++ b/workloads/realworld/async/darknet/src/dropout_layer.c @@ -0,0 +1,60 @@ +#include "dropout_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include +#include + +dropout_layer make_dropout_layer(int batch, int inputs, float probability) +{ + dropout_layer l = {0}; + l.type = DROPOUT; + l.probability = probability; + l.inputs = inputs; + l.outputs = inputs; + l.batch = batch; + l.rand = calloc(inputs*batch, sizeof(float)); + l.scale = 1./(1.-probability); + l.forward = forward_dropout_layer; + l.backward = backward_dropout_layer; + #ifdef GPU + l.forward_gpu = forward_dropout_layer_gpu; + l.backward_gpu = backward_dropout_layer_gpu; + l.rand_gpu = cuda_make_array(l.rand, inputs*batch); + #endif + fprintf(stderr, "dropout p = %.2f %4d -> %4d\n", probability, inputs, inputs); + return l; +} + +void resize_dropout_layer(dropout_layer *l, int inputs) +{ + l->rand = realloc(l->rand, l->inputs*l->batch*sizeof(float)); + #ifdef GPU + cuda_free(l->rand_gpu); + + l->rand_gpu = cuda_make_array(l->rand, inputs*l->batch); + #endif +} + +void forward_dropout_layer(dropout_layer l, network net) +{ + int i; + if (!net.train) return; + for(i = 0; i < l.batch * l.inputs; ++i){ + float r = rand_uniform(0, 1); + l.rand[i] = r; + if(r < l.probability) net.input[i] = 0; + else net.input[i] *= l.scale; + } +} + +void backward_dropout_layer(dropout_layer l, network net) +{ + int i; + if(!net.delta) return; + for(i = 0; i < l.batch * l.inputs; ++i){ + float r = l.rand[i]; + if(r < l.probability) net.delta[i] = 0; + else net.delta[i] *= l.scale; + } +} + diff --git a/workloads/realworld/async/darknet/src/dropout_layer.h b/workloads/realworld/async/darknet/src/dropout_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..01f94d4d7d10b732fb0e558089579e95128a70bd --- /dev/null +++ b/workloads/realworld/async/darknet/src/dropout_layer.h @@ -0,0 +1,20 @@ +#ifndef DROPOUT_LAYER_H +#define DROPOUT_LAYER_H + +#include "layer.h" +#include "network.h" + +typedef layer dropout_layer; + +dropout_layer make_dropout_layer(int batch, int inputs, float probability); + +void forward_dropout_layer(dropout_layer l, network net); +void backward_dropout_layer(dropout_layer l, network net); +void resize_dropout_layer(dropout_layer *l, int inputs); + +#ifdef GPU +void forward_dropout_layer_gpu(dropout_layer l, network net); +void backward_dropout_layer_gpu(dropout_layer l, network net); + +#endif +#endif diff --git a/workloads/realworld/async/darknet/src/dropout_layer_kernels.cu b/workloads/realworld/async/darknet/src/dropout_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..23aef8f12ffc390504e05f0839878f7787a5381f --- /dev/null +++ b/workloads/realworld/async/darknet/src/dropout_layer_kernels.cu @@ -0,0 +1,41 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "dropout_layer.h" +#include "cuda_dark.h" +#include "utils.h" +} + +__global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale; +} + +void forward_dropout_layer_gpu(dropout_layer layer, network net) +{ + if (!net.train) return; + int size = layer.inputs*layer.batch; + cuda_random(layer.rand_gpu, size); + /* + int i; + for(i = 0; i < size; ++i){ + layer.rand[i] = rand_uniform(); + } + cuda_push_array(layer.rand_gpu, layer.rand, size); + */ + + yoloswag420blazeit360noscope<<>>(net.input_gpu, size, layer.rand_gpu, layer.probability, layer.scale); + check_error(cudaPeekAtLastError()); +} + +void backward_dropout_layer_gpu(dropout_layer layer, network net) +{ + if(!net.delta_gpu) return; + int size = layer.inputs*layer.batch; + + yoloswag420blazeit360noscope<<>>(net.delta_gpu, size, layer.rand_gpu, layer.probability, layer.scale); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/async/darknet/src/gemm.c b/workloads/realworld/async/darknet/src/gemm.c new file mode 100644 index 0000000000000000000000000000000000000000..756ae595d7348fc2d343a48715b05ea882d6aa7c --- /dev/null +++ b/workloads/realworld/async/darknet/src/gemm.c @@ -0,0 +1,324 @@ +#include "gemm.h" +#include "utils.h" +#include "cuda_dark.h" +#include +#include +#include + +void gemm_bin(int M, int N, int K, float ALPHA, + char *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + char A_PART = A[i*lda+k]; + if(A_PART){ + for(j = 0; j < N; ++j){ + C[i*ldc+j] += B[k*ldb+j]; + } + } else { + for(j = 0; j < N; ++j){ + C[i*ldc+j] -= B[k*ldb+j]; + } + } + } + } +} + +float *random_matrix(int rows, int cols) +{ + int i; + float *m = calloc(rows*cols, sizeof(float)); + for(i = 0; i < rows*cols; ++i){ + m[i] = (float)rand()/RAND_MAX; + } + return m; +} + +void time_random_matrix(int TA, int TB, int m, int k, int n) +{ + float *a; + if(!TA) a = random_matrix(m,k); + else a = random_matrix(k,m); + int lda = (!TA)?k:m; + float *b; + if(!TB) b = random_matrix(k,n); + else b = random_matrix(n,k); + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + int i; + clock_t start = clock(), end; + for(i = 0; i<10; ++i){ + gemm_cpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); + } + end = clock(); + printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf ms\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); + free(a); + free(b); + free(c); +} + + +void gemm(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + gemm_cpu( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); +} + +void gemm_nn(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + register float A_PART = ALPHA*A[i*lda+k]; + for(j = 0; j < N; ++j){ + C[i*ldc+j] += A_PART*B[k*ldb+j]; + } + } + } +} + +void gemm_nt(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + register float sum = 0; + for(k = 0; k < K; ++k){ + sum += ALPHA*A[i*lda+k]*B[j*ldb + k]; + } + C[i*ldc+j] += sum; + } + } +} + +void gemm_tn(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + register float A_PART = ALPHA*A[k*lda+i]; + for(j = 0; j < N; ++j){ + C[i*ldc+j] += A_PART*B[k*ldb+j]; + } + } + } +} + +void gemm_tt(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + register float sum = 0; + for(k = 0; k < K; ++k){ + sum += ALPHA*A[i+k*lda]*B[k+j*ldb]; + } + C[i*ldc+j] += sum; + } + } +} + + +void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + //printf("cpu: %d %d %d %d %d %f %d %d %f %d\n",TA, TB, M, N, K, ALPHA, lda, ldb, BETA, ldc); + int i, j; + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + C[i*ldc + j] *= BETA; + } + } + if(!TA && !TB) + gemm_nn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else if(TA && !TB) + gemm_tn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else if(!TA && TB) + gemm_nt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else + gemm_tt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); +} + + +// #ifdef GPU + +// #include + +// void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, +// float *A_gpu, int lda, +// float *B_gpu, int ldb, +// float BETA, +// float *C_gpu, int ldc) +// { +// cublasHandle_t handle = blas_handle(); +// cudaError_t status = (cudaError_t) cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N), +// (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc); +// check_error(status); +// } + +// #include +// #include +// #include +// #include + +// void time_gpu_random_matrix(int TA, int TB, int m, int k, int n) +// { +// float *a; +// if(!TA) a = random_matrix(m,k); +// else a = random_matrix(k,m); +// int lda = (!TA)?k:m; +// float *b; +// if(!TB) b = random_matrix(k,n); +// else b = random_matrix(n,k); +// int ldb = (!TB)?n:k; + +// float *c = random_matrix(m,n); +// int i; +// clock_t start = clock(), end; +// for(i = 0; i<32; ++i){ +// gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); +// } +// end = clock(); +// printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); +// free(a); +// free(b); +// free(c); +// } + +// void time_gpu(int TA, int TB, int m, int k, int n) +// { +// int iter = 10; +// float *a = random_matrix(m,k); +// float *b = random_matrix(k,n); + +// int lda = (!TA)?k:m; +// int ldb = (!TB)?n:k; + +// float *c = random_matrix(m,n); + +// float *a_cl = cuda_make_array(a, m*k); +// float *b_cl = cuda_make_array(b, k*n); +// float *c_cl = cuda_make_array(c, m*n); + +// int i; +// clock_t start = clock(), end; +// for(i = 0; i +#include +#include +#include +#include + +#include "gemm.h" +#include "utils.h" +#include "cuda_dark.h" + +#include +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK_X 16 +#define DIM_THREAD_BLOCK_Y 16 + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +__global__ void gemm_kernel(float *a, float *b, float *c, int M, int K, int N, float alpha, float beta) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + // Compute each thread's global row and column index + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + + // Statically allocated shared memory + __shared__ float s_a[PREFETCH_COUNT * DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + __shared__ float s_b[PREFETCH_COUNT * DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + // Accumulate in temporary variable + + if (row < M && col < N) { + float tmp = beta * c[row * N + col]; + + int base_tiles = 0; + int end_tile = base_tiles + (K + blockDim.x - 1) / blockDim.x; + + int fetch = base_tiles; + int tile_size = DIM_THREAD_BLOCK_X; + int mem_size = DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + if ((fetch * tile_size + threadIdx.x) < K) + memcpy_async(s_a[(fetch % PREFETCH_COUNT) * mem_size + (threadIdx.y * blockDim.x + threadIdx.x)], a[row * K + fetch * tile_size + threadIdx.x], pipe); + if ((fetch * tile_size + threadIdx.y) < K) + memcpy_async(s_b[(fetch % PREFETCH_COUNT) * mem_size + (threadIdx.y * blockDim.x + threadIdx.x)], b[(fetch * tile_size + threadIdx.y) * N + col], pipe); + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + int left = K - compute * blockDim.x; + for (int k = 0; k < blockDim.x && k < left; k++) + { + tmp += alpha * s_a[(compute % PREFETCH_COUNT) * mem_size + threadIdx.y * blockDim.x + k] * s_b[(compute % PREFETCH_COUNT) * mem_size + k * blockDim.x + threadIdx.x]; + } + block.sync(); + } + c[row * N + col] = tmp; + } + block.sync(); +} + +void gemmCuda(float *A, float *B, float *C, int M, int N, int K, float alpha, float beta) +{ + // float *A_gpu; + // float *B_gpu; + // float *C_gpu; + + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + dim3 grid((size_t)(ceil(((float)N) / ((float)block.y))), (size_t)(ceil(((float)M) / ((float)block.x)))); + + // cudaMalloc(&A_gpu, sizeof(float) * M * K); + // cudaMalloc(&B_gpu, sizeof(float) * K * N); + // cudaMalloc(&C_gpu, sizeof(float) * M * N); + + // cudaMemcpy(A_gpu, A, sizeof(float) * M * K, cudaMemcpyHostToDevice); + // cudaMemcpy(B_gpu, B, sizeof(float) * K * N, cudaMemcpyHostToDevice); + // cudaMemcpy(C_gpu, C, sizeof(float) * M * N, cudaMemcpyHostToDevice); + gemm_kernel<<>>(A, B, C, M, K, N, alpha, beta); + // cudaDeviceSynchronize(); + // cudaMemcpy(C, C_gpu, sizeof(float) * M * N, cudaMemcpyDeviceToHost); + + // cudaFree(A_gpu); + // cudaFree(B_gpu); + // cudaFree(C_gpu); +} + +// void gemmCuda(float *A, float *B, float *C, int M, int N, int K, float alpha, float beta) +// { +// float *A_gpu; +// float *B_gpu; +// float *C_gpu; + +// dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); +// dim3 grid((size_t)(ceil(((float)N) / ((float)block.y))), (size_t)(ceil(((float)M) / ((float)block.x)))); + +// cudaMalloc(&A_gpu, sizeof(float) * M * K); +// cudaMalloc(&B_gpu, sizeof(float) * K * N); +// cudaMalloc(&C_gpu, sizeof(float) * M * N); + +// cudaMemcpy(A_gpu, A, sizeof(float) * M * K, cudaMemcpyHostToDevice); +// cudaMemcpy(B_gpu, B, sizeof(float) * K * N, cudaMemcpyHostToDevice); +// cudaMemcpy(C_gpu, C, sizeof(float) * M * N, cudaMemcpyHostToDevice); +// gemm_kernel<<>>(A_gpu, B_gpu, C_gpu, M, K, N, alpha, beta); +// // cudaDeviceSynchronize(); +// cudaMemcpy(C, C_gpu, sizeof(float) * M * N, cudaMemcpyDeviceToHost); + +// cudaFree(A_gpu); +// cudaFree(B_gpu); +// cudaFree(C_gpu); +// } + +// void gemmCuda(float *A, float *B, float *C, int M, int N, int K, float alpha, float beta) +// { +// float *A_gpu; +// float *B_gpu; +// float *C_gpu; + +// dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); +// dim3 grid((size_t)(ceil(((float)N) / ((float)block.y))), (size_t)(ceil(((float)M) / ((float)block.x)))); + +// cudaMallocManaged(&A_gpu, sizeof(float) * M * K); +// cudaMallocManaged(&B_gpu, sizeof(float) * K * N); +// cudaMallocManaged(&C_gpu, sizeof(float) * M * N); +// printf("allocation succeed!\n"); + +// memcpy(A_gpu, A, sizeof(float) * M * K); +// memcpy(B_gpu, B, sizeof(float) * K * N); +// printf("memcpy succeed!\n"); +// // cudaMemcpy(C_gpu, C, sizeof(float) * M * N, cudaMemcpyHostToDevice); +// gemm_kernel<<>>(A_gpu, B_gpu, C_gpu, M, K, N, alpha, beta); +// cudaDeviceSynchronize(); +// memcpy(C, C_gpu, sizeof(float) * M * N); + +// cudaFree(A_gpu); +// cudaFree(B_gpu); +// cudaFree(C_gpu); +// } + +void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, + float *A_gpu, int lda, + float *B_gpu, int ldb, + float BETA, + float *C_gpu, int ldc) +{ + // printf("TA is %d, TB is %d, M is %d, N is %d, K is %d, lda is %d, ldb is %d, ldc is %d.\n", TA, TB, M, N, K, lda, ldb, ldc); + if (TA == 0 && TB == 0) { + gemmCuda(A_gpu, B_gpu, C_gpu, M, N, K, ALPHA, BETA); + } else { + cublasHandle_t handle = blas_handle(); + cudaError_t status = (cudaError_t) cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N), + (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc); + check_error(status); + } +} + + +void time_gpu_random_matrix(int TA, int TB, int m, int k, int n) +{ + float *a; + if(!TA) a = random_matrix(m,k); + else a = random_matrix(k,m); + int lda = (!TA)?k:m; + float *b; + if(!TB) b = random_matrix(k,n); + else b = random_matrix(n,k); + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + int i; + clock_t start = clock(), end; + for(i = 0; i<32; ++i){ + gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); + } + end = clock(); + printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); + free(a); + free(b); + free(c); +} + +void time_gpu(int TA, int TB, int m, int k, int n) +{ + int iter = 10; + float *a = random_matrix(m,k); + float *b = random_matrix(k,n); + + int lda = (!TA)?k:m; + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + + float *a_cl = cuda_make_array(a, m*k); + float *b_cl = cuda_make_array(b, k*n); + float *c_cl = cuda_make_array(c, m*n); + + int i; + clock_t start = clock(), end; + for(i = 0; i +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam) +{ + fprintf(stderr, "GRU Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = GRU; + l.steps = steps; + l.inputs = inputs; + + l.uz = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uz) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uz->batch = batch; + + l.wz = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wz) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wz->batch = batch; + + l.ur = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ur) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ur->batch = batch; + + l.wr = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wr) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wr->batch = batch; + + + + l.uh = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uh) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uh->batch = batch; + + l.wh = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wh) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wh->batch = batch; + + l.batch_normalize = batch_normalize; + + + l.outputs = outputs; + l.output = calloc(outputs*batch*steps, sizeof(float)); + l.delta = calloc(outputs*batch*steps, sizeof(float)); + l.state = calloc(outputs*batch, sizeof(float)); + l.prev_state = calloc(outputs*batch, sizeof(float)); + l.forgot_state = calloc(outputs*batch, sizeof(float)); + l.forgot_delta = calloc(outputs*batch, sizeof(float)); + + l.r_cpu = calloc(outputs*batch, sizeof(float)); + l.z_cpu = calloc(outputs*batch, sizeof(float)); + l.h_cpu = calloc(outputs*batch, sizeof(float)); + + l.forward = forward_gru_layer; + l.backward = backward_gru_layer; + l.update = update_gru_layer; + +#ifdef GPU + l.forward_gpu = forward_gru_layer_gpu; + l.backward_gpu = backward_gru_layer_gpu; + l.update_gpu = update_gru_layer_gpu; + + l.forgot_state_gpu = cuda_make_array(0, batch*outputs); + l.forgot_delta_gpu = cuda_make_array(0, batch*outputs); + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.state_gpu = cuda_make_array(0, batch*outputs); + l.output_gpu = cuda_make_array(0, batch*outputs*steps); + l.delta_gpu = cuda_make_array(0, batch*outputs*steps); + l.r_gpu = cuda_make_array(0, batch*outputs); + l.z_gpu = cuda_make_array(0, batch*outputs); + l.h_gpu = cuda_make_array(0, batch*outputs); + +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.uz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uz->out_c, l.uz->out_h, l.uz->out_w); + cudnnSetTensor4dDescriptor(l.uh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uh->out_c, l.uh->out_h, l.uh->out_w); + cudnnSetTensor4dDescriptor(l.ur->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ur->out_c, l.ur->out_h, l.ur->out_w); + cudnnSetTensor4dDescriptor(l.wz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wz->out_c, l.wz->out_h, l.wz->out_w); + cudnnSetTensor4dDescriptor(l.wh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wh->out_c, l.wh->out_h, l.wh->out_w); + cudnnSetTensor4dDescriptor(l.wr->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wr->out_c, l.wr->out_h, l.wr->out_w); +#endif +#endif + + return l; +} + +void update_gru_layer(layer l, update_args a) +{ + update_connected_layer(*(l.ur), a); + update_connected_layer(*(l.uz), a); + update_connected_layer(*(l.uh), a); + update_connected_layer(*(l.wr), a); + update_connected_layer(*(l.wz), a); + update_connected_layer(*(l.wh), a); +} + +void forward_gru_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + fill_cpu(l.outputs * l.batch * l.steps, 0, uz.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ur.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, uh.delta, 1); + + fill_cpu(l.outputs * l.batch * l.steps, 0, wz.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wr.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wh.delta, 1); + if(net.train) { + fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); + copy_cpu(l.outputs*l.batch, l.state, 1, l.prev_state, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input = l.state; + forward_connected_layer(wz, s); + forward_connected_layer(wr, s); + + s.input = net.input; + forward_connected_layer(uz, s); + forward_connected_layer(ur, s); + forward_connected_layer(uh, s); + + + copy_cpu(l.outputs*l.batch, uz.output, 1, l.z_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wz.output, 1, l.z_cpu, 1); + + copy_cpu(l.outputs*l.batch, ur.output, 1, l.r_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wr.output, 1, l.r_cpu, 1); + + activate_array(l.z_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.r_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.state, 1, l.forgot_state, 1); + mul_cpu(l.outputs*l.batch, l.r_cpu, 1, l.forgot_state, 1); + + s.input = l.forgot_state; + forward_connected_layer(wh, s); + + copy_cpu(l.outputs*l.batch, uh.output, 1, l.h_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wh.output, 1, l.h_cpu, 1); + + if(l.tanh){ + activate_array(l.h_cpu, l.outputs*l.batch, TANH); + } else { + activate_array(l.h_cpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_sum_cpu(l.state, l.h_cpu, l.z_cpu, l.outputs*l.batch, l.output); + + copy_cpu(l.outputs*l.batch, l.output, 1, l.state, 1); + + net.input += l.inputs*l.batch; + l.output += l.outputs*l.batch; + increment_layer(&uz, 1); + increment_layer(&ur, 1); + increment_layer(&uh, 1); + + increment_layer(&wz, 1); + increment_layer(&wr, 1); + increment_layer(&wh, 1); + } +} + +void backward_gru_layer(layer l, network net) +{ +} + +#ifdef GPU + +void pull_gru_layer(layer l) +{ +} + +void push_gru_layer(layer l) +{ +} + +void update_gru_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.ur), a); + update_connected_layer_gpu(*(l.uz), a); + update_connected_layer_gpu(*(l.uh), a); + update_connected_layer_gpu(*(l.wr), a); + update_connected_layer_gpu(*(l.wz), a); + update_connected_layer_gpu(*(l.wh), a); +} + +void forward_gru_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + fill_gpu(l.outputs * l.batch * l.steps, 0, uz.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ur.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, uh.delta_gpu, 1); + + fill_gpu(l.outputs * l.batch * l.steps, 0, wz.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wr.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wh.delta_gpu, 1); + if(net.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.prev_state_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(wz, s); + forward_connected_layer_gpu(wr, s); + + s.input_gpu = net.input_gpu; + forward_connected_layer_gpu(uz, s); + forward_connected_layer_gpu(ur, s); + forward_connected_layer_gpu(uh, s); + + copy_gpu(l.outputs*l.batch, uz.output_gpu, 1, l.z_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wz.output_gpu, 1, l.z_gpu, 1); + + copy_gpu(l.outputs*l.batch, ur.output_gpu, 1, l.r_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wr.output_gpu, 1, l.r_gpu, 1); + + activate_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.forgot_state_gpu, 1); + mul_gpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1); + + s.input_gpu = l.forgot_state_gpu; + forward_connected_layer_gpu(wh, s); + + copy_gpu(l.outputs*l.batch, uh.output_gpu, 1, l.h_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wh.output_gpu, 1, l.h_gpu, 1); + + if(l.tanh){ + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + } else { + activate_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_sum_gpu(l.state_gpu, l.h_gpu, l.z_gpu, l.outputs*l.batch, l.output_gpu); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.state_gpu, 1); + + net.input_gpu += l.inputs*l.batch; + l.output_gpu += l.outputs*l.batch; + increment_layer(&uz, 1); + increment_layer(&ur, 1); + increment_layer(&uh, 1); + + increment_layer(&wz, 1); + increment_layer(&wr, 1); + increment_layer(&wh, 1); + } +} + +void backward_gru_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + increment_layer(&uz, l.steps - 1); + increment_layer(&ur, l.steps - 1); + increment_layer(&uh, l.steps - 1); + + increment_layer(&wz, l.steps - 1); + increment_layer(&wr, l.steps - 1); + increment_layer(&wh, l.steps - 1); + + net.input_gpu += l.inputs*l.batch*(l.steps-1); + if(net.delta_gpu) net.delta_gpu += l.inputs*l.batch*(l.steps-1); + l.output_gpu += l.outputs*l.batch*(l.steps-1); + l.delta_gpu += l.outputs*l.batch*(l.steps-1); + float *end_state = l.output_gpu; + for (i = l.steps-1; i >= 0; --i) { + if(i != 0) copy_gpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + else copy_gpu(l.outputs*l.batch, l.prev_state_gpu, 1, l.state_gpu, 1); + float *prev_delta_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch; + + copy_gpu(l.outputs*l.batch, uz.output_gpu, 1, l.z_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wz.output_gpu, 1, l.z_gpu, 1); + + copy_gpu(l.outputs*l.batch, ur.output_gpu, 1, l.r_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wr.output_gpu, 1, l.r_gpu, 1); + + activate_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, uh.output_gpu, 1, l.h_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wh.output_gpu, 1, l.h_gpu, 1); + + if(l.tanh){ + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + } else { + activate_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_delta_gpu(l.state_gpu, l.h_gpu, l.z_gpu, prev_delta_gpu, uh.delta_gpu, uz.delta_gpu, l.outputs*l.batch, l.delta_gpu); + + if(l.tanh){ + gradient_array_gpu(l.h_gpu, l.outputs*l.batch, TANH, uh.delta_gpu); + } else { + gradient_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC, uh.delta_gpu); + } + + copy_gpu(l.outputs*l.batch, uh.delta_gpu, 1, wh.delta_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.forgot_state_gpu, 1); + mul_gpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1); + fill_gpu(l.outputs*l.batch, 0, l.forgot_delta_gpu, 1); + + s.input_gpu = l.forgot_state_gpu; + s.delta_gpu = l.forgot_delta_gpu; + + backward_connected_layer_gpu(wh, s); + if(prev_delta_gpu) mult_add_into_gpu(l.outputs*l.batch, l.forgot_delta_gpu, l.r_gpu, prev_delta_gpu); + mult_add_into_gpu(l.outputs*l.batch, l.forgot_delta_gpu, l.state_gpu, ur.delta_gpu); + + gradient_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC, ur.delta_gpu); + copy_gpu(l.outputs*l.batch, ur.delta_gpu, 1, wr.delta_gpu, 1); + + gradient_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC, uz.delta_gpu); + copy_gpu(l.outputs*l.batch, uz.delta_gpu, 1, wz.delta_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = prev_delta_gpu; + + backward_connected_layer_gpu(wr, s); + backward_connected_layer_gpu(wz, s); + + s.input_gpu = net.input_gpu; + s.delta_gpu = net.delta_gpu; + + backward_connected_layer_gpu(uh, s); + backward_connected_layer_gpu(ur, s); + backward_connected_layer_gpu(uz, s); + + + net.input_gpu -= l.inputs*l.batch; + if(net.delta_gpu) net.delta_gpu -= l.inputs*l.batch; + l.output_gpu -= l.outputs*l.batch; + l.delta_gpu -= l.outputs*l.batch; + increment_layer(&uz, -1); + increment_layer(&ur, -1); + increment_layer(&uh, -1); + + increment_layer(&wz, -1); + increment_layer(&wr, -1); + increment_layer(&wh, -1); + } + copy_gpu(l.outputs*l.batch, end_state, 1, l.state_gpu, 1); +} +#endif diff --git a/workloads/realworld/async/darknet/src/gru_layer.h b/workloads/realworld/async/darknet/src/gru_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9067942e9499d53c8d54f7728d64a5030200f4de --- /dev/null +++ b/workloads/realworld/async/darknet/src/gru_layer.h @@ -0,0 +1,24 @@ + +#ifndef GRU_LAYER_H +#define GRU_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam); + +void forward_gru_layer(layer l, network state); +void backward_gru_layer(layer l, network state); +void update_gru_layer(layer l, update_args a); + +#ifdef GPU +void forward_gru_layer_gpu(layer l, network state); +void backward_gru_layer_gpu(layer l, network state); +void update_gru_layer_gpu(layer l, update_args a); +void push_gru_layer(layer l); +void pull_gru_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/async/darknet/src/im2col.c b/workloads/realworld/async/darknet/src/im2col.c new file mode 100644 index 0000000000000000000000000000000000000000..69ec98a9d12b2e21a3859611ad709d62fc80dcf3 --- /dev/null +++ b/workloads/realworld/async/darknet/src/im2col.c @@ -0,0 +1,40 @@ +#include "im2col.h" +#include +float im2col_get_pixel(float *im, int height, int width, int channels, + int row, int col, int channel, int pad) +{ + row -= pad; + col -= pad; + + if (row < 0 || col < 0 || + row >= height || col >= width) return 0; + return im[col + width*(row + height*channel)]; +} + +//From Berkeley Vision's Caffe! +//https://github.com/BVLC/caffe/blob/master/LICENSE +void im2col_cpu(float* data_im, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_col) +{ + int c,h,w; + int height_col = (height + 2*pad - ksize) / stride + 1; + int width_col = (width + 2*pad - ksize) / stride + 1; + + int channels_col = channels * ksize * ksize; + for (c = 0; c < channels_col; ++c) { + int w_offset = c % ksize; + int h_offset = (c / ksize) % ksize; + int c_im = c / ksize / ksize; + for (h = 0; h < height_col; ++h) { + for (w = 0; w < width_col; ++w) { + int im_row = h_offset + h * stride; + int im_col = w_offset + w * stride; + int col_index = (c * height_col + h) * width_col + w; + data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, + im_row, im_col, c_im, pad); + } + } + } +} + diff --git a/workloads/realworld/async/darknet/src/im2col.h b/workloads/realworld/async/darknet/src/im2col.h new file mode 100644 index 0000000000000000000000000000000000000000..02c4247fad9b8428a8e89fc8caec0b5b6ba5b36a --- /dev/null +++ b/workloads/realworld/async/darknet/src/im2col.h @@ -0,0 +1,15 @@ +#ifndef IM2COL_H +#define IM2COL_H + +void im2col_cpu(float* data_im, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_col); + +#ifdef GPU + +void im2col_gpu(float *im, + int channels, int height, int width, + int ksize, int stride, int pad,float *data_col); + +#endif +#endif diff --git a/workloads/realworld/async/darknet/src/im2col_kernels.cu b/workloads/realworld/async/darknet/src/im2col_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..decbc1202a9ebd3916312527cc0cdad3fef9b264 --- /dev/null +++ b/workloads/realworld/async/darknet/src/im2col_kernels.cu @@ -0,0 +1,61 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "im2col.h" +#include "cuda_dark.h" +} + +// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu +// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE + +__global__ void im2col_gpu_kernel(const int n, const float* data_im, + const int height, const int width, const int ksize, + const int pad, + const int stride, + const int height_col, const int width_col, + float *data_col) { + int index = blockIdx.x*blockDim.x+threadIdx.x; + for(; index < n; index += blockDim.x*gridDim.x){ + int w_out = index % width_col; + int h_index = index / width_col; + int h_out = h_index % height_col; + int channel_in = h_index / height_col; + int channel_out = channel_in * ksize * ksize; + int h_in = h_out * stride - pad; + int w_in = w_out * stride - pad; + float* data_col_ptr = data_col; + data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out; + const float* data_im_ptr = data_im; + data_im_ptr += (channel_in * height + h_in) * width + w_in; + for (int i = 0; i < ksize; ++i) { + for (int j = 0; j < ksize; ++j) { + int h = h_in + i; + int w = w_in + j; + + *data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ? + data_im_ptr[i * width + j] : 0; + + //*data_col_ptr = data_im_ptr[ii * width + jj]; + + data_col_ptr += height_col * width_col; + } + } + } +} + +void im2col_gpu(float *im, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_col){ + // We are going to launch channels * height_col * width_col kernels, each + // kernel responsible for copying a single-channel grid. + int height_col = (height + 2 * pad - ksize) / stride + 1; + int width_col = (width + 2 * pad - ksize) / stride + 1; + int num_kernels = channels * height_col * width_col; + im2col_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, + BLOCK>>>( + num_kernels, im, height, width, ksize, pad, + stride, height_col, + width_col, data_col); +} diff --git a/workloads/realworld/async/darknet/src/image.c b/workloads/realworld/async/darknet/src/image.c new file mode 100644 index 0000000000000000000000000000000000000000..3edf6d1045f4637d7bb108440302fd36d5ef9a18 --- /dev/null +++ b/workloads/realworld/async/darknet/src/image.c @@ -0,0 +1,1467 @@ +#include "image.h" +#include "utils.h" +#include "blas.h" +#include "cuda_dark.h" +#include +#include + +#define STB_IMAGE_IMPLEMENTATION +#include "stb_image.h" +#define STB_IMAGE_WRITE_IMPLEMENTATION +#include "stb_image_write.h" + +int windows = 0; + +float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} }; + +float get_color(int c, int x, int max) +{ + float ratio = ((float)x/max)*5; + int i = floor(ratio); + int j = ceil(ratio); + ratio -= i; + float r = (1-ratio) * colors[i][c] + ratio*colors[j][c]; + //printf("%f\n", r); + return r; +} + +image mask_to_rgb(image mask) +{ + int n = mask.c; + image im = make_image(mask.w, mask.h, 3); + int i, j; + for(j = 0; j < n; ++j){ + int offset = j*123457 % n; + float red = get_color(2,offset,n); + float green = get_color(1,offset,n); + float blue = get_color(0,offset,n); + for(i = 0; i < im.w*im.h; ++i){ + im.data[i + 0*im.w*im.h] += mask.data[j*im.h*im.w + i]*red; + im.data[i + 1*im.w*im.h] += mask.data[j*im.h*im.w + i]*green; + im.data[i + 2*im.w*im.h] += mask.data[j*im.h*im.w + i]*blue; + } + } + return im; +} + +static float get_pixel(image m, int x, int y, int c) +{ + assert(x < m.w && y < m.h && c < m.c); + return m.data[c*m.h*m.w + y*m.w + x]; +} +static float get_pixel_extend(image m, int x, int y, int c) +{ + if(x < 0 || x >= m.w || y < 0 || y >= m.h) return 0; + /* + if(x < 0) x = 0; + if(x >= m.w) x = m.w-1; + if(y < 0) y = 0; + if(y >= m.h) y = m.h-1; + */ + if(c < 0 || c >= m.c) return 0; + return get_pixel(m, x, y, c); +} +static void set_pixel(image m, int x, int y, int c, float val) +{ + if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return; + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] = val; +} +static void add_pixel(image m, int x, int y, int c, float val) +{ + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] += val; +} + +static float bilinear_interpolate(image im, float x, float y, int c) +{ + int ix = (int) floorf(x); + int iy = (int) floorf(y); + + float dx = x - ix; + float dy = y - iy; + + float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) + + dy * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + + (1-dy) * dx * get_pixel_extend(im, ix+1, iy, c) + + dy * dx * get_pixel_extend(im, ix+1, iy+1, c); + return val; +} + + +void composite_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float val = get_pixel(source, x, y, k); + float val2 = get_pixel_extend(dest, dx+x, dy+y, k); + set_pixel(dest, dx+x, dy+y, k, val * val2); + } + } + } +} + +image border_image(image a, int border) +{ + image b = make_image(a.w + 2*border, a.h + 2*border, a.c); + int x,y,k; + for(k = 0; k < b.c; ++k){ + for(y = 0; y < b.h; ++y){ + for(x = 0; x < b.w; ++x){ + float val = get_pixel_extend(a, x - border, y - border, k); + if(x - border < 0 || x - border >= a.w || y - border < 0 || y - border >= a.h) val = 1; + set_pixel(b, x, y, k, val); + } + } + } + return b; +} + +image tile_images(image a, image b, int dx) +{ + if(a.w == 0) return copy_image(b); + image c = make_image(a.w + b.w + dx, (a.h > b.h) ? a.h : b.h, (a.c > b.c) ? a.c : b.c); + fill_cpu(c.w*c.h*c.c, 1, c.data, 1); + embed_image(a, c, 0, 0); + composite_image(b, c, a.w + dx, 0); + return c; +} + +image get_label(image **characters, char *string, int size) +{ + size = size/10; + if(size > 7) size = 7; + image label = make_empty_image(0,0,0); + while(*string){ + image l = characters[size][(int)*string]; + image n = tile_images(label, l, -size - 1 + (size+1)/2); + free_image(label); + label = n; + ++string; + } + image b = border_image(label, label.h*.25); + free_image(label); + return b; +} + +void draw_label(image a, int r, int c, image label, const float *rgb) +{ + int w = label.w; + int h = label.h; + if (r - h >= 0) r = r - h; + + int i, j, k; + for(j = 0; j < h && j + r < a.h; ++j){ + for(i = 0; i < w && i + c < a.w; ++i){ + for(k = 0; k < label.c; ++k){ + float val = get_pixel(label, i, j, k); + set_pixel(a, i+c, j+r, k, rgb[k] * val); + } + } + } +} + +void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b) +{ + //normalize_image(a); + int i; + if(x1 < 0) x1 = 0; + if(x1 >= a.w) x1 = a.w-1; + if(x2 < 0) x2 = 0; + if(x2 >= a.w) x2 = a.w-1; + + if(y1 < 0) y1 = 0; + if(y1 >= a.h) y1 = a.h-1; + if(y2 < 0) y2 = 0; + if(y2 >= a.h) y2 = a.h-1; + + for(i = x1; i <= x2; ++i){ + a.data[i + y1*a.w + 0*a.w*a.h] = r; + a.data[i + y2*a.w + 0*a.w*a.h] = r; + + a.data[i + y1*a.w + 1*a.w*a.h] = g; + a.data[i + y2*a.w + 1*a.w*a.h] = g; + + a.data[i + y1*a.w + 2*a.w*a.h] = b; + a.data[i + y2*a.w + 2*a.w*a.h] = b; + } + for(i = y1; i <= y2; ++i){ + a.data[x1 + i*a.w + 0*a.w*a.h] = r; + a.data[x2 + i*a.w + 0*a.w*a.h] = r; + + a.data[x1 + i*a.w + 1*a.w*a.h] = g; + a.data[x2 + i*a.w + 1*a.w*a.h] = g; + + a.data[x1 + i*a.w + 2*a.w*a.h] = b; + a.data[x2 + i*a.w + 2*a.w*a.h] = b; + } +} + +void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b) +{ + int i; + for(i = 0; i < w; ++i){ + draw_box(a, x1+i, y1+i, x2-i, y2-i, r, g, b); + } +} + +void draw_bbox(image a, box bbox, int w, float r, float g, float b) +{ + int left = (bbox.x-bbox.w/2)*a.w; + int right = (bbox.x+bbox.w/2)*a.w; + int top = (bbox.y-bbox.h/2)*a.h; + int bot = (bbox.y+bbox.h/2)*a.h; + + int i; + for(i = 0; i < w; ++i){ + draw_box(a, left+i, top+i, right-i, bot-i, r, g, b); + } +} + +image **load_alphabet() +{ + char *value = getenv("UVMAsyncBench_BASE"); + int i, j; + const int nsize = 8; + image **alphabets = calloc(nsize, sizeof(image)); + for(j = 0; j < nsize; ++j){ + alphabets[j] = calloc(128, sizeof(image)); + for(i = 32; i < 127; ++i){ + char buff[256]; + sprintf(buff, "%s/workloads/realworld/standard/darknet/data/labels/%d_%d.png", value, i, j); + alphabets[j][i] = load_image_color(buff, 0, 0); + } + } + return alphabets; +} + +void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes) +{ + int i,j; + + for(i = 0; i < num; ++i){ + char labelstr[4096] = {0}; + int class = -1; + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j] > thresh){ + if (class < 0) { + strcat(labelstr, names[j]); + class = j; + } else { + strcat(labelstr, ", "); + strcat(labelstr, names[j]); + } + printf("%s: %.0f%%\n", names[j], dets[i].prob[j]*100); + } + } + if(class >= 0){ + int width = im.h * .006; + + /* + if(0){ + width = pow(prob, 1./2.)*10+1; + alphabet = 0; + } + */ + + //printf("%d %s: %.0f%%\n", i, names[class], prob*100); + int offset = class*123457 % classes; + float red = get_color(2,offset,classes); + float green = get_color(1,offset,classes); + float blue = get_color(0,offset,classes); + float rgb[3]; + + //width = prob*20+2; + + rgb[0] = red; + rgb[1] = green; + rgb[2] = blue; + box b = dets[i].bbox; + //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + + int left = (b.x-b.w/2.)*im.w; + int right = (b.x+b.w/2.)*im.w; + int top = (b.y-b.h/2.)*im.h; + int bot = (b.y+b.h/2.)*im.h; + + if(left < 0) left = 0; + if(right > im.w-1) right = im.w-1; + if(top < 0) top = 0; + if(bot > im.h-1) bot = im.h-1; + + draw_box_width(im, left, top, right, bot, width, red, green, blue); + if (alphabet) { + image label = get_label(alphabet, labelstr, (im.h*.03)); + draw_label(im, top + width, left, label, rgb); + free_image(label); + } + if (dets[i].mask){ + image mask = float_to_image(14, 14, 1, dets[i].mask); + image resized_mask = resize_image(mask, b.w*im.w, b.h*im.h); + image tmask = threshold_image(resized_mask, .5); + embed_image(tmask, im, left, top); + free_image(mask); + free_image(resized_mask); + free_image(tmask); + } + } + } +} + +void transpose_image(image im) +{ + assert(im.w == im.h); + int n, m; + int c; + for(c = 0; c < im.c; ++c){ + for(n = 0; n < im.w-1; ++n){ + for(m = n + 1; m < im.w; ++m){ + float swap = im.data[m + im.w*(n + im.h*c)]; + im.data[m + im.w*(n + im.h*c)] = im.data[n + im.w*(m + im.h*c)]; + im.data[n + im.w*(m + im.h*c)] = swap; + } + } + } +} + +void rotate_image_cw(image im, int times) +{ + assert(im.w == im.h); + times = (times + 400) % 4; + int i, x, y, c; + int n = im.w; + for(i = 0; i < times; ++i){ + for(c = 0; c < im.c; ++c){ + for(x = 0; x < n/2; ++x){ + for(y = 0; y < (n-1)/2 + 1; ++y){ + float temp = im.data[y + im.w*(x + im.h*c)]; + im.data[y + im.w*(x + im.h*c)] = im.data[n-1-x + im.w*(y + im.h*c)]; + im.data[n-1-x + im.w*(y + im.h*c)] = im.data[n-1-y + im.w*(n-1-x + im.h*c)]; + im.data[n-1-y + im.w*(n-1-x + im.h*c)] = im.data[x + im.w*(n-1-y + im.h*c)]; + im.data[x + im.w*(n-1-y + im.h*c)] = temp; + } + } + } + } +} + +void flip_image(image a) +{ + int i,j,k; + for(k = 0; k < a.c; ++k){ + for(i = 0; i < a.h; ++i){ + for(j = 0; j < a.w/2; ++j){ + int index = j + a.w*(i + a.h*(k)); + int flip = (a.w - j - 1) + a.w*(i + a.h*(k)); + float swap = a.data[flip]; + a.data[flip] = a.data[index]; + a.data[index] = swap; + } + } + } +} + +image image_distance(image a, image b) +{ + int i,j; + image dist = make_image(a.w, a.h, 1); + for(i = 0; i < a.c; ++i){ + for(j = 0; j < a.h*a.w; ++j){ + dist.data[j] += pow(a.data[i*a.h*a.w+j]-b.data[i*a.h*a.w+j],2); + } + } + for(j = 0; j < a.h*a.w; ++j){ + dist.data[j] = sqrt(dist.data[j]); + } + return dist; +} + +void ghost_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + float max_dist = sqrt((-source.w/2. + .5)*(-source.w/2. + .5)); + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float dist = sqrt((x - source.w/2. + .5)*(x - source.w/2. + .5) + (y - source.h/2. + .5)*(y - source.h/2. + .5)); + float alpha = (1 - dist/max_dist); + if(alpha < 0) alpha = 0; + float v1 = get_pixel(source, x,y,k); + float v2 = get_pixel(dest, dx+x,dy+y,k); + float val = alpha*v1 + (1-alpha)*v2; + set_pixel(dest, dx+x, dy+y, k, val); + } + } + } +} + +void blocky_image(image im, int s) +{ + int i,j,k; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)]; + } + } + } +} + +void censor_image(image im, int dx, int dy, int w, int h) +{ + int i,j,k; + int s = 32; + if(dx < 0) dx = 0; + if(dy < 0) dy = 0; + + for(k = 0; k < im.c; ++k){ + for(j = dy; j < dy + h && j < im.h; ++j){ + for(i = dx; i < dx + w && i < im.w; ++i){ + im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)]; + //im.data[i + j*im.w + k*im.w*im.h] = 0; + } + } + } +} + +void embed_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float val = get_pixel(source, x,y,k); + set_pixel(dest, dx+x, dy+y, k, val); + } + } + } +} + +image collapse_image_layers(image source, int border) +{ + int h = source.h; + h = (h+border)*source.c - border; + image dest = make_image(source.w, h, 1); + int i; + for(i = 0; i < source.c; ++i){ + image layer = get_image_layer(source, i); + int h_offset = i*(source.h+border); + embed_image(layer, dest, 0, h_offset); + free_image(layer); + } + return dest; +} + +void constrain_image(image im) +{ + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + if(im.data[i] < 0) im.data[i] = 0; + if(im.data[i] > 1) im.data[i] = 1; + } +} + +void normalize_image(image p) +{ + int i; + float min = 9999999; + float max = -999999; + + for(i = 0; i < p.h*p.w*p.c; ++i){ + float v = p.data[i]; + if(v < min) min = v; + if(v > max) max = v; + } + if(max - min < .000000001){ + min = 0; + max = 1; + } + for(i = 0; i < p.c*p.w*p.h; ++i){ + p.data[i] = (p.data[i] - min)/(max-min); + } +} + +void normalize_image2(image p) +{ + float *min = calloc(p.c, sizeof(float)); + float *max = calloc(p.c, sizeof(float)); + int i,j; + for(i = 0; i < p.c; ++i) min[i] = max[i] = p.data[i*p.h*p.w]; + + for(j = 0; j < p.c; ++j){ + for(i = 0; i < p.h*p.w; ++i){ + float v = p.data[i+j*p.h*p.w]; + if(v < min[j]) min[j] = v; + if(v > max[j]) max[j] = v; + } + } + for(i = 0; i < p.c; ++i){ + if(max[i] - min[i] < .000000001){ + min[i] = 0; + max[i] = 1; + } + } + for(j = 0; j < p.c; ++j){ + for(i = 0; i < p.w*p.h; ++i){ + p.data[i+j*p.h*p.w] = (p.data[i+j*p.h*p.w] - min[j])/(max[j]-min[j]); + } + } + free(min); + free(max); +} + +void copy_image_into(image src, image dest) +{ + memcpy(dest.data, src.data, src.h*src.w*src.c*sizeof(float)); +} + +image copy_image(image p) +{ + image copy = p; + copy.data = calloc(p.h*p.w*p.c, sizeof(float)); + memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float)); + return copy; +} + +void rgbgr_image(image im) +{ + int i; + for(i = 0; i < im.w*im.h; ++i){ + float swap = im.data[i]; + im.data[i] = im.data[i+im.w*im.h*2]; + im.data[i+im.w*im.h*2] = swap; + } +} + +int show_image(image p, const char *name, int ms) +{ +#ifdef OPENCV + int c = show_image_cv(p, name, ms); + return c; +#else + fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name); + save_image(p, name); + return -1; +#endif +} + +void save_image_options(image im, const char *name, IMTYPE f, int quality) +{ + char buff[256]; + //sprintf(buff, "%s (%d)", name, windows); + if(f == PNG) sprintf(buff, "%s.png", name); + else if (f == BMP) sprintf(buff, "%s.bmp", name); + else if (f == TGA) sprintf(buff, "%s.tga", name); + else if (f == JPG) sprintf(buff, "%s.jpg", name); + else sprintf(buff, "%s.png", name); + unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char)); + int i,k; + for(k = 0; k < im.c; ++k){ + for(i = 0; i < im.w*im.h; ++i){ + data[i*im.c+k] = (unsigned char) (255*im.data[i + k*im.w*im.h]); + } + } + int success = 0; + if(f == PNG) success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c); + else if (f == BMP) success = stbi_write_bmp(buff, im.w, im.h, im.c, data); + else if (f == TGA) success = stbi_write_tga(buff, im.w, im.h, im.c, data); + else if (f == JPG) success = stbi_write_jpg(buff, im.w, im.h, im.c, data, quality); + free(data); + if(!success) fprintf(stderr, "Failed to write image %s\n", buff); +} + +void save_image(image im, const char *name) +{ + save_image_options(im, name, JPG, 80); +} + +void show_image_layers(image p, char *name) +{ + int i; + char buff[256]; + for(i = 0; i < p.c; ++i){ + sprintf(buff, "%s - Layer %d", name, i); + image layer = get_image_layer(p, i); + show_image(layer, buff, 1); + free_image(layer); + } +} + +void show_image_collapsed(image p, char *name) +{ + image c = collapse_image_layers(p, 1); + show_image(c, name, 1); + free_image(c); +} + +image make_empty_image(int w, int h, int c) +{ + image out; + out.data = 0; + out.h = h; + out.w = w; + out.c = c; + return out; +} + +image make_image(int w, int h, int c) +{ + image out = make_empty_image(w,h,c); + out.data = calloc(h*w*c, sizeof(float)); + return out; +} + +image make_random_image(int w, int h, int c) +{ + image out = make_empty_image(w,h,c); + out.data = calloc(h*w*c, sizeof(float)); + int i; + for(i = 0; i < w*h*c; ++i){ + out.data[i] = (rand_normal() * .25) + .5; + } + return out; +} + +image float_to_image(int w, int h, int c, float *data) +{ + image out = make_empty_image(w,h,c); + out.data = data; + return out; +} + +void place_image(image im, int w, int h, int dx, int dy, image canvas) +{ + int x, y, c; + for(c = 0; c < im.c; ++c){ + for(y = 0; y < h; ++y){ + for(x = 0; x < w; ++x){ + float rx = ((float)x / w) * im.w; + float ry = ((float)y / h) * im.h; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(canvas, x + dx, y + dy, c, val); + } + } + } +} + +image center_crop_image(image im, int w, int h) +{ + int m = (im.w < im.h) ? im.w : im.h; + image c = crop_image(im, (im.w - m) / 2, (im.h - m)/2, m, m); + image r = resize_image(c, w, h); + free_image(c); + return r; +} + +image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect) +{ + int x, y, c; + float cx = im.w/2.; + float cy = im.h/2.; + image rot = make_image(w, h, im.c); + for(c = 0; c < im.c; ++c){ + for(y = 0; y < h; ++y){ + for(x = 0; x < w; ++x){ + float rx = cos(rad)*((x - w/2.)/s*aspect + dx/s*aspect) - sin(rad)*((y - h/2.)/s + dy/s) + cx; + float ry = sin(rad)*((x - w/2.)/s*aspect + dx/s*aspect) + cos(rad)*((y - h/2.)/s + dy/s) + cy; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(rot, x, y, c, val); + } + } + } + return rot; +} + +image rotate_image(image im, float rad) +{ + int x, y, c; + float cx = im.w/2.; + float cy = im.h/2.; + image rot = make_image(im.w, im.h, im.c); + for(c = 0; c < im.c; ++c){ + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx; + float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(rot, x, y, c, val); + } + } + } + return rot; +} + +void fill_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] = s; +} + +void translate_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s; +} + +void scale_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s; +} + +image crop_image(image im, int dx, int dy, int w, int h) +{ + image cropped = make_image(w, h, im.c); + int i, j, k; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int r = j + dy; + int c = i + dx; + float val = 0; + r = constrain_int(r, 0, im.h-1); + c = constrain_int(c, 0, im.w-1); + val = get_pixel(im, c, r, k); + set_pixel(cropped, i, j, k, val); + } + } + } + return cropped; +} + +int best_3d_shift_r(image a, image b, int min, int max) +{ + if(min == max) return min; + int mid = floor((min + max) / 2.); + image c1 = crop_image(b, 0, mid, b.w, b.h); + image c2 = crop_image(b, 0, mid+1, b.w, b.h); + float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 10); + float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 10); + free_image(c1); + free_image(c2); + if(d1 < d2) return best_3d_shift_r(a, b, min, mid); + else return best_3d_shift_r(a, b, mid+1, max); +} + +int best_3d_shift(image a, image b, int min, int max) +{ + int i; + int best = 0; + float best_distance = FLT_MAX; + for(i = min; i <= max; i += 2){ + image c = crop_image(b, 0, i, b.w, b.h); + float d = dist_array(c.data, a.data, a.w*a.h*a.c, 100); + if(d < best_distance){ + best_distance = d; + best = i; + } + printf("%d %f\n", i, d); + free_image(c); + } + return best; +} + +void composite_3d(char *f1, char *f2, char *out, int delta) +{ + if(!out) out = "out"; + image a = load_image(f1, 0,0,0); + image b = load_image(f2, 0,0,0); + int shift = best_3d_shift_r(a, b, -a.h/100, a.h/100); + + image c1 = crop_image(b, 10, shift, b.w, b.h); + float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 100); + image c2 = crop_image(b, -10, shift, b.w, b.h); + float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 100); + + if(d2 < d1 && 0){ + image swap = a; + a = b; + b = swap; + shift = -shift; + printf("swapped, %d\n", shift); + } + else{ + printf("%d\n", shift); + } + + image c = crop_image(b, delta, shift, a.w, a.h); + int i; + for(i = 0; i < c.w*c.h; ++i){ + c.data[i] = a.data[i]; + } + save_image(c, out); +} + +void letterbox_image_into(image im, int w, int h, image boxed) +{ + int new_w = im.w; + int new_h = im.h; + if (((float)w/im.w) < ((float)h/im.h)) { + new_w = w; + new_h = (im.h * w)/im.w; + } else { + new_h = h; + new_w = (im.w * h)/im.h; + } + image resized = resize_image(im, new_w, new_h); + embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); + free_image(resized); +} + +image letterbox_image(image im, int w, int h) +{ + int new_w = im.w; + int new_h = im.h; + if (((float)w/im.w) < ((float)h/im.h)) { + new_w = w; + new_h = (im.h * w)/im.w; + } else { + new_h = h; + new_w = (im.w * h)/im.h; + } + image resized = resize_image(im, new_w, new_h); + image boxed = make_image(w, h, im.c); + fill_image(boxed, .5); + //int i; + //for(i = 0; i < boxed.w*boxed.h*boxed.c; ++i) boxed.data[i] = 0; + embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); + free_image(resized); + return boxed; +} + +image resize_max(image im, int max) +{ + int w = im.w; + int h = im.h; + if(w > h){ + h = (h * max) / w; + w = max; + } else { + w = (w * max) / h; + h = max; + } + if(w == im.w && h == im.h) return im; + image resized = resize_image(im, w, h); + return resized; +} + +image resize_min(image im, int min) +{ + int w = im.w; + int h = im.h; + if(w < h){ + h = (h * min) / w; + w = min; + } else { + w = (w * min) / h; + h = min; + } + if(w == im.w && h == im.h) return im; + image resized = resize_image(im, w, h); + return resized; +} + +image random_crop_image(image im, int w, int h) +{ + int dx = rand_int(0, im.w - w); + int dy = rand_int(0, im.h - h); + image crop = crop_image(im, dx, dy, w, h); + return crop; +} + +augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h) +{ + augment_args a = {0}; + aspect = rand_scale(aspect); + int r = rand_int(low, high); + int min = (im.h < im.w*aspect) ? im.h : im.w*aspect; + float scale = (float)r / min; + + float rad = rand_uniform(-angle, angle) * TWO_PI / 360.; + + float dx = (im.w*scale/aspect - w) / 2.; + float dy = (im.h*scale - w) / 2.; + //if(dx < 0) dx = 0; + //if(dy < 0) dy = 0; + dx = rand_uniform(-dx, dx); + dy = rand_uniform(-dy, dy); + + a.rad = rad; + a.scale = scale; + a.w = w; + a.h = h; + a.dx = dx; + a.dy = dy; + a.aspect = aspect; + return a; +} + +image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h) +{ + augment_args a = random_augment_args(im, angle, aspect, low, high, w, h); + image crop = rotate_crop_image(im, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + return crop; +} + +float three_way_max(float a, float b, float c) +{ + return (a > b) ? ( (a > c) ? a : c) : ( (b > c) ? b : c) ; +} + +float three_way_min(float a, float b, float c) +{ + return (a < b) ? ( (a < c) ? a : c) : ( (b < c) ? b : c) ; +} + +void yuv_to_rgb(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float y, u, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + y = get_pixel(im, i , j, 0); + u = get_pixel(im, i , j, 1); + v = get_pixel(im, i , j, 2); + + r = y + 1.13983*v; + g = y + -.39465*u + -.58060*v; + b = y + 2.03211*u; + + set_pixel(im, i, j, 0, r); + set_pixel(im, i, j, 1, g); + set_pixel(im, i, j, 2, b); + } + } +} + +void rgb_to_yuv(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float y, u, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + r = get_pixel(im, i , j, 0); + g = get_pixel(im, i , j, 1); + b = get_pixel(im, i , j, 2); + + y = .299*r + .587*g + .114*b; + u = -.14713*r + -.28886*g + .436*b; + v = .615*r + -.51499*g + -.10001*b; + + set_pixel(im, i, j, 0, y); + set_pixel(im, i, j, 1, u); + set_pixel(im, i, j, 2, v); + } + } +} + +// http://www.cs.rit.edu/~ncs/color/t_convert.html +void rgb_to_hsv(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float h, s, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + r = get_pixel(im, i , j, 0); + g = get_pixel(im, i , j, 1); + b = get_pixel(im, i , j, 2); + float max = three_way_max(r,g,b); + float min = three_way_min(r,g,b); + float delta = max - min; + v = max; + if(max == 0){ + s = 0; + h = 0; + }else{ + s = delta/max; + if(r == max){ + h = (g - b) / delta; + } else if (g == max) { + h = 2 + (b - r) / delta; + } else { + h = 4 + (r - g) / delta; + } + if (h < 0) h += 6; + h = h/6.; + } + set_pixel(im, i, j, 0, h); + set_pixel(im, i, j, 1, s); + set_pixel(im, i, j, 2, v); + } + } +} + +void hsv_to_rgb(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float h, s, v; + float f, p, q, t; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + h = 6 * get_pixel(im, i , j, 0); + s = get_pixel(im, i , j, 1); + v = get_pixel(im, i , j, 2); + if (s == 0) { + r = g = b = v; + } else { + int index = floor(h); + f = h - index; + p = v*(1-s); + q = v*(1-s*f); + t = v*(1-s*(1-f)); + if(index == 0){ + r = v; g = t; b = p; + } else if(index == 1){ + r = q; g = v; b = p; + } else if(index == 2){ + r = p; g = v; b = t; + } else if(index == 3){ + r = p; g = q; b = v; + } else if(index == 4){ + r = t; g = p; b = v; + } else { + r = v; g = p; b = q; + } + } + set_pixel(im, i, j, 0, r); + set_pixel(im, i, j, 1, g); + set_pixel(im, i, j, 2, b); + } + } +} + +void grayscale_image_3c(image im) +{ + assert(im.c == 3); + int i, j, k; + float scale[] = {0.299, 0.587, 0.114}; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float val = 0; + for(k = 0; k < 3; ++k){ + val += scale[k]*get_pixel(im, i, j, k); + } + im.data[0*im.h*im.w + im.w*j + i] = val; + im.data[1*im.h*im.w + im.w*j + i] = val; + im.data[2*im.h*im.w + im.w*j + i] = val; + } + } +} + +image grayscale_image(image im) +{ + assert(im.c == 3); + int i, j, k; + image gray = make_image(im.w, im.h, 1); + float scale[] = {0.299, 0.587, 0.114}; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + gray.data[i+im.w*j] += scale[k]*get_pixel(im, i, j, k); + } + } + } + return gray; +} + +image threshold_image(image im, float thresh) +{ + int i; + image t = make_image(im.w, im.h, im.c); + for(i = 0; i < im.w*im.h*im.c; ++i){ + t.data[i] = im.data[i]>thresh ? 1 : 0; + } + return t; +} + +image blend_image(image fore, image back, float alpha) +{ + assert(fore.w == back.w && fore.h == back.h && fore.c == back.c); + image blend = make_image(fore.w, fore.h, fore.c); + int i, j, k; + for(k = 0; k < fore.c; ++k){ + for(j = 0; j < fore.h; ++j){ + for(i = 0; i < fore.w; ++i){ + float val = alpha * get_pixel(fore, i, j, k) + + (1 - alpha)* get_pixel(back, i, j, k); + set_pixel(blend, i, j, k, val); + } + } + } + return blend; +} + +void scale_image_channel(image im, int c, float v) +{ + int i, j; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float pix = get_pixel(im, i, j, c); + pix = pix*v; + set_pixel(im, i, j, c, pix); + } + } +} + +void translate_image_channel(image im, int c, float v) +{ + int i, j; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float pix = get_pixel(im, i, j, c); + pix = pix+v; + set_pixel(im, i, j, c, pix); + } + } +} + +image binarize_image(image im) +{ + image c = copy_image(im); + int i; + for(i = 0; i < im.w * im.h * im.c; ++i){ + if(c.data[i] > .5) c.data[i] = 1; + else c.data[i] = 0; + } + return c; +} + +void saturate_image(image im, float sat) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + hsv_to_rgb(im); + constrain_image(im); +} + +void hue_image(image im, float hue) +{ + rgb_to_hsv(im); + int i; + for(i = 0; i < im.w*im.h; ++i){ + im.data[i] = im.data[i] + hue; + if (im.data[i] > 1) im.data[i] -= 1; + if (im.data[i] < 0) im.data[i] += 1; + } + hsv_to_rgb(im); + constrain_image(im); +} + +void exposure_image(image im, float sat) +{ + rgb_to_hsv(im); + scale_image_channel(im, 2, sat); + hsv_to_rgb(im); + constrain_image(im); +} + +void distort_image(image im, float hue, float sat, float val) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + scale_image_channel(im, 2, val); + int i; + for(i = 0; i < im.w*im.h; ++i){ + im.data[i] = im.data[i] + hue; + if (im.data[i] > 1) im.data[i] -= 1; + if (im.data[i] < 0) im.data[i] += 1; + } + hsv_to_rgb(im); + constrain_image(im); +} + +void random_distort_image(image im, float hue, float saturation, float exposure) +{ + float dhue = rand_uniform(-hue, hue); + float dsat = rand_scale(saturation); + float dexp = rand_scale(exposure); + distort_image(im, dhue, dsat, dexp); +} + +void saturate_exposure_image(image im, float sat, float exposure) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + scale_image_channel(im, 2, exposure); + hsv_to_rgb(im); + constrain_image(im); +} + +image resize_image(image im, int w, int h) +{ + image resized = make_image(w, h, im.c); + image part = make_image(w, im.h, im.c); + int r, c, k; + float w_scale = (float)(im.w - 1) / (w - 1); + float h_scale = (float)(im.h - 1) / (h - 1); + for(k = 0; k < im.c; ++k){ + for(r = 0; r < im.h; ++r){ + for(c = 0; c < w; ++c){ + float val = 0; + if(c == w-1 || im.w == 1){ + val = get_pixel(im, im.w-1, r, k); + } else { + float sx = c*w_scale; + int ix = (int) sx; + float dx = sx - ix; + val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k); + } + set_pixel(part, c, r, k, val); + } + } + } + for(k = 0; k < im.c; ++k){ + for(r = 0; r < h; ++r){ + float sy = r*h_scale; + int iy = (int) sy; + float dy = sy - iy; + for(c = 0; c < w; ++c){ + float val = (1-dy) * get_pixel(part, c, iy, k); + set_pixel(resized, c, r, k, val); + } + if(r == h-1 || im.h == 1) continue; + for(c = 0; c < w; ++c){ + float val = dy * get_pixel(part, c, iy+1, k); + add_pixel(resized, c, r, k, val); + } + } + } + + free_image(part); + return resized; +} + + +void test_resize(char *filename) +{ + image im = load_image(filename, 0,0, 3); + float mag = mag_array(im.data, im.w*im.h*im.c); + printf("L2 Norm: %f\n", mag); + image gray = grayscale_image(im); + + image c1 = copy_image(im); + image c2 = copy_image(im); + image c3 = copy_image(im); + image c4 = copy_image(im); + distort_image(c1, .1, 1.5, 1.5); + distort_image(c2, -.1, .66666, .66666); + distort_image(c3, .1, 1.5, .66666); + distort_image(c4, .1, .66666, 1.5); + + + show_image(im, "Original", 1); + show_image(gray, "Gray", 1); + show_image(c1, "C1", 1); + show_image(c2, "C2", 1); + show_image(c3, "C3", 1); + show_image(c4, "C4", 1); +#ifdef OPENCV + while(1){ + image aug = random_augment_image(im, 0, .75, 320, 448, 320, 320); + show_image(aug, "aug", 1); + free_image(aug); + + + float exposure = 1.15; + float saturation = 1.15; + float hue = .05; + + image c = copy_image(im); + + float dexp = rand_scale(exposure); + float dsat = rand_scale(saturation); + float dhue = rand_uniform(-hue, hue); + + distort_image(c, dhue, dsat, dexp); + show_image(c, "rand", 1); + printf("%f %f %f\n", dhue, dsat, dexp); + free_image(c); + } +#endif +} + + +image load_image_stb(char *filename, int channels) +{ + int w, h, c; + unsigned char *data = stbi_load(filename, &w, &h, &c, channels); + if (!data) { + fprintf(stderr, "Cannot load image \"%s\"\nSTB Reason: %s\n", filename, stbi_failure_reason()); + exit(0); + } + if(channels) c = channels; + int i,j,k; + image im = make_image(w, h, c); + for(k = 0; k < c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int dst_index = i + w*j + w*h*k; + int src_index = k + c*i + c*w*j; + im.data[dst_index] = (float)data[src_index]/255.; + } + } + } + free(data); + return im; +} + +image load_image(char *filename, int w, int h, int c) +{ +#ifdef OPENCV + image out = load_image_cv(filename, c); +#else + image out = load_image_stb(filename, c); +#endif + + if((h && w) && (h != out.h || w != out.w)){ + image resized = resize_image(out, w, h); + free_image(out); + out = resized; + } + return out; +} + +image load_image_color(char *filename, int w, int h) +{ + return load_image(filename, w, h, 3); +} + +image get_image_layer(image m, int l) +{ + image out = make_image(m.w, m.h, 1); + int i; + for(i = 0; i < m.h*m.w; ++i){ + out.data[i] = m.data[i+l*m.h*m.w]; + } + return out; +} +void print_image(image m) +{ + int i, j, k; + for(i =0 ; i < m.c; ++i){ + for(j =0 ; j < m.h; ++j){ + for(k = 0; k < m.w; ++k){ + printf("%.2lf, ", m.data[i*m.h*m.w + j*m.w + k]); + if(k > 30) break; + } + printf("\n"); + if(j > 30) break; + } + printf("\n"); + } + printf("\n"); +} + +image collapse_images_vert(image *ims, int n) +{ + int color = 1; + int border = 1; + int h,w,c; + w = ims[0].w; + h = (ims[0].h + border) * n - border; + c = ims[0].c; + if(c != 3 || !color){ + w = (w+border)*c - border; + c = 1; + } + + image filters = make_image(w, h, c); + int i,j; + for(i = 0; i < n; ++i){ + int h_offset = i*(ims[0].h+border); + image copy = copy_image(ims[i]); + //normalize_image(copy); + if(c == 3 && color){ + embed_image(copy, filters, 0, h_offset); + } + else{ + for(j = 0; j < copy.c; ++j){ + int w_offset = j*(ims[0].w+border); + image layer = get_image_layer(copy, j); + embed_image(layer, filters, w_offset, h_offset); + free_image(layer); + } + } + free_image(copy); + } + return filters; +} + +image collapse_images_horz(image *ims, int n) +{ + int color = 1; + int border = 1; + int h,w,c; + int size = ims[0].h; + h = size; + w = (ims[0].w + border) * n - border; + c = ims[0].c; + if(c != 3 || !color){ + h = (h+border)*c - border; + c = 1; + } + + image filters = make_image(w, h, c); + int i,j; + for(i = 0; i < n; ++i){ + int w_offset = i*(size+border); + image copy = copy_image(ims[i]); + //normalize_image(copy); + if(c == 3 && color){ + embed_image(copy, filters, w_offset, 0); + } + else{ + for(j = 0; j < copy.c; ++j){ + int h_offset = j*(size+border); + image layer = get_image_layer(copy, j); + embed_image(layer, filters, w_offset, h_offset); + free_image(layer); + } + } + free_image(copy); + } + return filters; +} + +void show_image_normalized(image im, const char *name) +{ + image c = copy_image(im); + normalize_image(c); + show_image(c, name, 1); + free_image(c); +} + +void show_images(image *ims, int n, char *window) +{ + image m = collapse_images_vert(ims, n); + /* + int w = 448; + int h = ((float)m.h/m.w) * 448; + if(h > 896){ + h = 896; + w = ((float)m.w/m.h) * 896; + } + image sized = resize_image(m, w, h); + */ + normalize_image(m); + save_image(m, window); + show_image(m, window, 1); + free_image(m); +} + +void free_image(image m) +{ + if(m.data){ + free(m.data); + } +} diff --git a/workloads/realworld/async/darknet/src/image.h b/workloads/realworld/async/darknet/src/image.h new file mode 100644 index 0000000000000000000000000000000000000000..3392bb9787fc542929cda064bcefa0f3f893b76c --- /dev/null +++ b/workloads/realworld/async/darknet/src/image.h @@ -0,0 +1,69 @@ +#ifndef IMAGE_H +#define IMAGE_H + +#include +#include +#include +#include +#include +#include "box.h" +#include "darknet.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef OPENCV +void *open_video_stream(const char *f, int c, int w, int h, int fps); +image get_image_from_stream(void *p); +image load_image_cv(char *filename, int channels); +int show_image_cv(image im, const char* name, int ms); +#endif + +float get_color(int c, int x, int max); +void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); +void draw_bbox(image a, box bbox, int w, float r, float g, float b); +void write_label(image a, int r, int c, image *characters, char *string, float *rgb); +image image_distance(image a, image b); +void scale_image(image m, float s); +image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect); +image random_crop_image(image im, int w, int h); +image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h); +augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h); +void letterbox_image_into(image im, int w, int h, image boxed); +image resize_max(image im, int max); +void translate_image(image m, float s); +void embed_image(image source, image dest, int dx, int dy); +void place_image(image im, int w, int h, int dx, int dy, image canvas); +void saturate_image(image im, float sat); +void exposure_image(image im, float sat); +void distort_image(image im, float hue, float sat, float val); +void saturate_exposure_image(image im, float sat, float exposure); +void rgb_to_hsv(image im); +void hsv_to_rgb(image im); +void yuv_to_rgb(image im); +void rgb_to_yuv(image im); + + +image collapse_image_layers(image source, int border); +image collapse_images_horz(image *ims, int n); +image collapse_images_vert(image *ims, int n); + +void show_image_normalized(image im, const char *name); +void show_images(image *ims, int n, char *window); +void show_image_layers(image p, char *name); +void show_image_collapsed(image p, char *name); + +void print_image(image m); + +image make_empty_image(int w, int h, int c); +void copy_image_into(image src, image dest); + +image get_image_layer(image m, int l); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/workloads/realworld/async/darknet/src/image_opencv.cpp b/workloads/realworld/async/darknet/src/image_opencv.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7511280be07ca987fd51fa54aea55910cd34a706 --- /dev/null +++ b/workloads/realworld/async/darknet/src/image_opencv.cpp @@ -0,0 +1,135 @@ +#ifdef OPENCV + +#include "stdio.h" +#include "stdlib.h" +#include "opencv2/opencv.hpp" +#include "image.h" + +using namespace cv; + +extern "C" { + +IplImage *image_to_ipl(image im) +{ + int x,y,c; + IplImage *disp = cvCreateImage(cvSize(im.w,im.h), IPL_DEPTH_8U, im.c); + int step = disp->widthStep; + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + for(c= 0; c < im.c; ++c){ + float val = im.data[c*im.h*im.w + y*im.w + x]; + disp->imageData[y*step + x*im.c + c] = (unsigned char)(val*255); + } + } + } + return disp; +} + +image ipl_to_image(IplImage* src) +{ + int h = src->height; + int w = src->width; + int c = src->nChannels; + image im = make_image(w, h, c); + unsigned char *data = (unsigned char *)src->imageData; + int step = src->widthStep; + int i, j, k; + + for(i = 0; i < h; ++i){ + for(k= 0; k < c; ++k){ + for(j = 0; j < w; ++j){ + im.data[k*w*h + i*w + j] = data[i*step + j*c + k]/255.; + } + } + } + return im; +} + +Mat image_to_mat(image im) +{ + image copy = copy_image(im); + constrain_image(copy); + if(im.c == 3) rgbgr_image(copy); + + IplImage *ipl = image_to_ipl(copy); + Mat m = cvarrToMat(ipl, true); + cvReleaseImage(&ipl); + free_image(copy); + return m; +} + +image mat_to_image(Mat m) +{ + IplImage ipl = m; + image im = ipl_to_image(&ipl); + rgbgr_image(im); + return im; +} + +void *open_video_stream(const char *f, int c, int w, int h, int fps) +{ + VideoCapture *cap; + if(f) cap = new VideoCapture(f); + else cap = new VideoCapture(c); + if(!cap->isOpened()) return 0; + if(w) cap->set(CV_CAP_PROP_FRAME_WIDTH, w); + if(h) cap->set(CV_CAP_PROP_FRAME_HEIGHT, w); + if(fps) cap->set(CV_CAP_PROP_FPS, w); + return (void *) cap; +} + +image get_image_from_stream(void *p) +{ + VideoCapture *cap = (VideoCapture *)p; + Mat m; + *cap >> m; + if(m.empty()) return make_empty_image(0,0,0); + return mat_to_image(m); +} + +image load_image_cv(char *filename, int channels) +{ + int flag = -1; + if (channels == 0) flag = -1; + else if (channels == 1) flag = 0; + else if (channels == 3) flag = 1; + else { + fprintf(stderr, "OpenCV can't force load with %d channels\n", channels); + } + Mat m; + m = imread(filename, flag); + if(!m.data){ + fprintf(stderr, "Cannot load image \"%s\"\n", filename); + char buff[256]; + sprintf(buff, "echo %s >> bad.list", filename); + system(buff); + return make_image(10,10,3); + //exit(0); + } + image im = mat_to_image(m); + return im; +} + +int show_image_cv(image im, const char* name, int ms) +{ + Mat m = image_to_mat(im); + imshow(name, m); + int c = waitKey(ms); + if (c != -1) c = c%256; + return c; +} + +void make_window(char *name, int w, int h, int fullscreen) +{ + namedWindow(name, WINDOW_NORMAL); + if (fullscreen) { + setWindowProperty(name, CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); + } else { + resizeWindow(name, w, h); + if(strcmp(name, "Demo") == 0) moveWindow(name, 0, 0); + } +} + +} + +#endif diff --git a/workloads/realworld/async/darknet/src/iseg_layer.c b/workloads/realworld/async/darknet/src/iseg_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..a1b822a5797a6d04b0f3756f106cb2b20ba31a5b --- /dev/null +++ b/workloads/realworld/async/darknet/src/iseg_layer.c @@ -0,0 +1,225 @@ +#include "iseg_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_iseg_layer(int batch, int w, int h, int classes, int ids) +{ + layer l = {0}; + l.type = ISEG; + + l.h = h; + l.w = w; + l.c = classes + ids; + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.batch = batch; + l.extra = ids; + l.cost = calloc(1, sizeof(float)); + l.outputs = h*w*l.c; + l.inputs = l.outputs; + l.truths = 90*(l.w*l.h+1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + + l.counts = calloc(90, sizeof(int)); + l.sums = calloc(90, sizeof(float*)); + if(ids){ + int i; + for(i = 0; i < 90; ++i){ + l.sums[i] = calloc(ids, sizeof(float)); + } + } + + l.forward = forward_iseg_layer; + l.backward = backward_iseg_layer; +#ifdef GPU + l.forward_gpu = forward_iseg_layer_gpu; + l.backward_gpu = backward_iseg_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "iseg\n"); + srand(0); + + return l; +} + +void resize_iseg_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->c; + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +void forward_iseg_layer(const layer l, network net) +{ + + double time = what_time_is_it_now(); + int i,b,j,k; + int ids = l.extra; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + int index = b*l.outputs; + activate_array(l.output + index, l.classes*l.w*l.h, LOGISTIC); + } +#endif + + for (b = 0; b < l.batch; ++b){ + // a priori, each pixel has no class + for(i = 0; i < l.classes; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + i*l.w*l.h + k; + l.delta[index] = 0 - l.output[index]; + } + } + + // a priori, embedding should be small magnitude + for(i = 0; i < ids; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + (i+l.classes)*l.w*l.h + k; + l.delta[index] = .1 * (0 - l.output[index]); + } + } + + + memset(l.counts, 0, 90*sizeof(int)); + for(i = 0; i < 90; ++i){ + fill_cpu(ids, 0, l.sums[i], 1); + + int c = net.truth[b*l.truths + i*(l.w*l.h+1)]; + if(c < 0) break; + // add up metric embeddings for each instance + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + c*l.w*l.h + k; + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + l.delta[index] = v - l.output[index]; + axpy_cpu(ids, 1, l.output + b*l.outputs + l.classes*l.w*l.h + k, l.w*l.h, l.sums[i], 1); + ++l.counts[i]; + } + } + } + + float *mse = calloc(90, sizeof(float)); + for(i = 0; i < 90; ++i){ + int c = net.truth[b*l.truths + i*(l.w*l.h+1)]; + if(c < 0) break; + for(k = 0; k < l.w*l.h; ++k){ + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + int z; + float sum = 0; + for(z = 0; z < ids; ++z){ + int index = b*l.outputs + (l.classes + z)*l.w*l.h + k; + sum += pow(l.sums[i][z]/l.counts[i] - l.output[index], 2); + } + mse[i] += sum; + } + } + mse[i] /= l.counts[i]; + } + + // Calculate average embedding + for(i = 0; i < 90; ++i){ + if(!l.counts[i]) continue; + scal_cpu(ids, 1.f/l.counts[i], l.sums[i], 1); + if(b == 0 && net.gpu_index == 0){ + printf("%4d, %6.3f, ", l.counts[i], mse[i]); + for(j = 0; j < ids; ++j){ + printf("%6.3f,", l.sums[i][j]); + } + printf("\n"); + } + } + free(mse); + + // Calculate embedding loss + for(i = 0; i < 90; ++i){ + if(!l.counts[i]) continue; + for(k = 0; k < l.w*l.h; ++k){ + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + for(j = 0; j < 90; ++j){ + if(!l.counts[j])continue; + int z; + for(z = 0; z < ids; ++z){ + int index = b*l.outputs + (l.classes + z)*l.w*l.h + k; + float diff = l.sums[j][z] - l.output[index]; + if (j == i) l.delta[index] += diff < 0? -.1 : .1; + else l.delta[index] += -(diff < 0? -.1 : .1); + } + } + } + } + } + + for(i = 0; i < ids; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + (i+l.classes)*l.w*l.h + k; + l.delta[index] *= .01; + } + } + } + + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("took %lf sec\n", what_time_is_it_now() - time); +} + +void backward_iseg_layer(const layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_iseg_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b; + for (b = 0; b < l.batch; ++b){ + activate_array_gpu(l.output_gpu + b*l.outputs, l.classes*l.w*l.h, LOGISTIC); + //if(l.extra) activate_array_gpu(l.output_gpu + b*l.outputs + l.classes*l.w*l.h, l.extra*l.w*l.h, LOGISTIC); + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_iseg_layer(l, net); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_iseg_layer_gpu(const layer l, network net) +{ + int b; + for (b = 0; b < l.batch; ++b){ + //if(l.extra) gradient_array_gpu(l.output_gpu + b*l.outputs + l.classes*l.w*l.h, l.extra*l.w*l.h, LOGISTIC, l.delta_gpu + b*l.outputs + l.classes*l.w*l.h); + } + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/async/darknet/src/iseg_layer.h b/workloads/realworld/async/darknet/src/iseg_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..dd8e64e023caf1e1fd0c30af57f9983f24ddd691 --- /dev/null +++ b/workloads/realworld/async/darknet/src/iseg_layer.h @@ -0,0 +1,19 @@ +#ifndef ISEG_LAYER_H +#define ISEG_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_iseg_layer(int batch, int w, int h, int classes, int ids); +void forward_iseg_layer(const layer l, network net); +void backward_iseg_layer(const layer l, network net); +void resize_iseg_layer(layer *l, int w, int h); +int iseg_num_detections(layer l, float thresh); + +#ifdef GPU +void forward_iseg_layer_gpu(const layer l, network net); +void backward_iseg_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/async/darknet/src/l2norm_layer.c b/workloads/realworld/async/darknet/src/l2norm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..0cf7f844170cb2c3dba15be94d4a435aaa63067c --- /dev/null +++ b/workloads/realworld/async/darknet/src/l2norm_layer.c @@ -0,0 +1,63 @@ +#include "l2norm_layer.h" +#include "activations.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +layer make_l2norm_layer(int batch, int inputs) +{ + fprintf(stderr, "l2norm %4d\n", inputs); + layer l = {0}; + l.type = L2NORM; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.output = calloc(inputs*batch, sizeof(float)); + l.scales = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + + l.forward = forward_l2norm_layer; + l.backward = backward_l2norm_layer; + #ifdef GPU + l.forward_gpu = forward_l2norm_layer_gpu; + l.backward_gpu = backward_l2norm_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.scales_gpu = cuda_make_array(l.output, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_l2norm_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + l2normalize_cpu(l.output, l.scales, l.batch, l.out_c, l.out_w*l.out_h); +} + +void backward_l2norm_layer(const layer l, network net) +{ + //axpy_cpu(l.inputs*l.batch, 1, l.scales, 1, l.delta, 1); + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_l2norm_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + l2normalize_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_w*l.out_h); +} + +void backward_l2norm_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.scales_gpu, 1, l.delta_gpu, 1); + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/async/darknet/src/l2norm_layer.h b/workloads/realworld/async/darknet/src/l2norm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1ca6f710f017f2857f566eaed90634698d72b26d --- /dev/null +++ b/workloads/realworld/async/darknet/src/l2norm_layer.h @@ -0,0 +1,15 @@ +#ifndef L2NORM_LAYER_H +#define L2NORM_LAYER_H +#include "layer.h" +#include "network.h" + +layer make_l2norm_layer(int batch, int inputs); +void forward_l2norm_layer(const layer l, network net); +void backward_l2norm_layer(const layer l, network net); + +#ifdef GPU +void forward_l2norm_layer_gpu(const layer l, network net); +void backward_l2norm_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/async/darknet/src/layer.c b/workloads/realworld/async/darknet/src/layer.c new file mode 100644 index 0000000000000000000000000000000000000000..3bffe436f06a455e2d1043158ff6da9b07bbb61f --- /dev/null +++ b/workloads/realworld/async/darknet/src/layer.c @@ -0,0 +1,97 @@ +#include "layer.h" +#include "cuda_dark.h" + +#include + +void free_layer(layer l) +{ + if(l.type == DROPOUT){ + if(l.rand) free(l.rand); +#ifdef GPU + if(l.rand_gpu) cuda_free(l.rand_gpu); +#endif + return; + } + if(l.cweights) free(l.cweights); + if(l.indexes) free(l.indexes); + if(l.input_layers) free(l.input_layers); + if(l.input_sizes) free(l.input_sizes); + if(l.map) free(l.map); + if(l.rand) free(l.rand); + if(l.cost) free(l.cost); + if(l.state) free(l.state); + if(l.prev_state) free(l.prev_state); + if(l.forgot_state) free(l.forgot_state); + if(l.forgot_delta) free(l.forgot_delta); + if(l.state_delta) free(l.state_delta); + if(l.concat) free(l.concat); + if(l.concat_delta) free(l.concat_delta); + if(l.binary_weights) free(l.binary_weights); + if(l.biases) free(l.biases); + if(l.bias_updates) free(l.bias_updates); + if(l.scales) free(l.scales); + if(l.scale_updates) free(l.scale_updates); + if(l.weights) free(l.weights); + if(l.weight_updates) free(l.weight_updates); + if(l.delta) free(l.delta); + if(l.output) free(l.output); + if(l.squared) free(l.squared); + if(l.norms) free(l.norms); + if(l.spatial_mean) free(l.spatial_mean); + if(l.mean) free(l.mean); + if(l.variance) free(l.variance); + if(l.mean_delta) free(l.mean_delta); + if(l.variance_delta) free(l.variance_delta); + if(l.rolling_mean) free(l.rolling_mean); + if(l.rolling_variance) free(l.rolling_variance); + if(l.x) free(l.x); + if(l.x_norm) free(l.x_norm); + if(l.m) free(l.m); + if(l.v) free(l.v); + if(l.z_cpu) free(l.z_cpu); + if(l.r_cpu) free(l.r_cpu); + if(l.h_cpu) free(l.h_cpu); + if(l.binary_input) free(l.binary_input); + +#ifdef GPU + if(l.indexes_gpu) cuda_free((float *)l.indexes_gpu); + + if(l.z_gpu) cuda_free(l.z_gpu); + if(l.r_gpu) cuda_free(l.r_gpu); + if(l.h_gpu) cuda_free(l.h_gpu); + if(l.m_gpu) cuda_free(l.m_gpu); + if(l.v_gpu) cuda_free(l.v_gpu); + if(l.prev_state_gpu) cuda_free(l.prev_state_gpu); + if(l.forgot_state_gpu) cuda_free(l.forgot_state_gpu); + if(l.forgot_delta_gpu) cuda_free(l.forgot_delta_gpu); + if(l.state_gpu) cuda_free(l.state_gpu); + if(l.state_delta_gpu) cuda_free(l.state_delta_gpu); + if(l.gate_gpu) cuda_free(l.gate_gpu); + if(l.gate_delta_gpu) cuda_free(l.gate_delta_gpu); + if(l.save_gpu) cuda_free(l.save_gpu); + if(l.save_delta_gpu) cuda_free(l.save_delta_gpu); + if(l.concat_gpu) cuda_free(l.concat_gpu); + if(l.concat_delta_gpu) cuda_free(l.concat_delta_gpu); + if(l.binary_input_gpu) cuda_free(l.binary_input_gpu); + if(l.binary_weights_gpu) cuda_free(l.binary_weights_gpu); + if(l.mean_gpu) cuda_free(l.mean_gpu); + if(l.variance_gpu) cuda_free(l.variance_gpu); + if(l.rolling_mean_gpu) cuda_free(l.rolling_mean_gpu); + if(l.rolling_variance_gpu) cuda_free(l.rolling_variance_gpu); + if(l.variance_delta_gpu) cuda_free(l.variance_delta_gpu); + if(l.mean_delta_gpu) cuda_free(l.mean_delta_gpu); + if(l.x_gpu) cuda_free(l.x_gpu); + if(l.x_norm_gpu) cuda_free(l.x_norm_gpu); + if(l.weights_gpu) cuda_free(l.weights_gpu); + if(l.weight_updates_gpu) cuda_free(l.weight_updates_gpu); + if(l.biases_gpu) cuda_free(l.biases_gpu); + if(l.bias_updates_gpu) cuda_free(l.bias_updates_gpu); + if(l.scales_gpu) cuda_free(l.scales_gpu); + if(l.scale_updates_gpu) cuda_free(l.scale_updates_gpu); + if(l.output_gpu) cuda_free(l.output_gpu); + if(l.delta_gpu) cuda_free(l.delta_gpu); + if(l.rand_gpu) cuda_free(l.rand_gpu); + if(l.squared_gpu) cuda_free(l.squared_gpu); + if(l.norms_gpu) cuda_free(l.norms_gpu); +#endif +} diff --git a/workloads/realworld/async/darknet/src/layer.h b/workloads/realworld/async/darknet/src/layer.h new file mode 100644 index 0000000000000000000000000000000000000000..af6cd2ab5054f5ef3bbdfca2da45f08d710a7bd0 --- /dev/null +++ b/workloads/realworld/async/darknet/src/layer.h @@ -0,0 +1 @@ +#include "darknet.h" diff --git a/workloads/realworld/async/darknet/src/list.c b/workloads/realworld/async/darknet/src/list.c new file mode 100644 index 0000000000000000000000000000000000000000..0e4165d37800e1b4c7c33992cd64a6634fe4688c --- /dev/null +++ b/workloads/realworld/async/darknet/src/list.c @@ -0,0 +1,92 @@ +#include +#include +#include "list.h" + +list *make_list() +{ + list *l = malloc(sizeof(list)); + l->size = 0; + l->front = 0; + l->back = 0; + return l; +} + +/* +void transfer_node(list *s, list *d, node *n) +{ + node *prev, *next; + prev = n->prev; + next = n->next; + if(prev) prev->next = next; + if(next) next->prev = prev; + --s->size; + if(s->front == n) s->front = next; + if(s->back == n) s->back = prev; +} +*/ + +void *list_pop(list *l){ + if(!l->back) return 0; + node *b = l->back; + void *val = b->val; + l->back = b->prev; + if(l->back) l->back->next = 0; + free(b); + --l->size; + + return val; +} + +void list_insert(list *l, void *val) +{ + node *new = malloc(sizeof(node)); + new->val = val; + new->next = 0; + + if(!l->back){ + l->front = new; + new->prev = 0; + }else{ + l->back->next = new; + new->prev = l->back; + } + l->back = new; + ++l->size; +} + +void free_node(node *n) +{ + node *next; + while(n) { + next = n->next; + free(n); + n = next; + } +} + +void free_list(list *l) +{ + free_node(l->front); + free(l); +} + +void free_list_contents(list *l) +{ + node *n = l->front; + while(n){ + free(n->val); + n = n->next; + } +} + +void **list_to_array(list *l) +{ + void **a = calloc(l->size, sizeof(void*)); + int count = 0; + node *n = l->front; + while(n){ + a[count++] = n->val; + n = n->next; + } + return a; +} diff --git a/workloads/realworld/async/darknet/src/list.h b/workloads/realworld/async/darknet/src/list.h new file mode 100644 index 0000000000000000000000000000000000000000..6b445c717c2937b9c90536654ba82a33e14bb4ec --- /dev/null +++ b/workloads/realworld/async/darknet/src/list.h @@ -0,0 +1,13 @@ +#ifndef LIST_H +#define LIST_H +#include "darknet.h" + +list *make_list(); +int list_find(list *l, void *val); + +void list_insert(list *, void *); + + +void free_list_contents(list *l); + +#endif diff --git a/workloads/realworld/async/darknet/src/local_layer.c b/workloads/realworld/async/darknet/src/local_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..74f6910a8fd751ad9f3b41fc67be737399a151d0 --- /dev/null +++ b/workloads/realworld/async/darknet/src/local_layer.c @@ -0,0 +1,293 @@ +#include "local_layer.h" +#include "utils.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" +#include +#include + +int local_out_height(local_layer l) +{ + int h = l.h; + if (!l.pad) h -= l.size; + else h -= 1; + return h/l.stride + 1; +} + +int local_out_width(local_layer l) +{ + int w = l.w; + if (!l.pad) w -= l.size; + else w -= 1; + return w/l.stride + 1; +} + +local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation) +{ + int i; + local_layer l = {0}; + l.type = LOCAL; + + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.batch = batch; + l.stride = stride; + l.size = size; + l.pad = pad; + + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int locations = out_h*out_w; + l.out_h = out_h; + l.out_w = out_w; + l.out_c = n; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = l.w * l.h * l.c; + + l.weights = calloc(c*n*size*size*locations, sizeof(float)); + l.weight_updates = calloc(c*n*size*size*locations, sizeof(float)); + + l.biases = calloc(l.outputs, sizeof(float)); + l.bias_updates = calloc(l.outputs, sizeof(float)); + + // float scale = 1./sqrt(size*size*c); + float scale = sqrt(2./(size*size*c)); + for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1); + + l.output = calloc(l.batch*out_h * out_w * n, sizeof(float)); + l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float)); + + l.workspace_size = out_h*out_w*size*size*c; + + l.forward = forward_local_layer; + l.backward = backward_local_layer; + l.update = update_local_layer; + +#ifdef GPU + l.forward_gpu = forward_local_layer_gpu; + l.backward_gpu = backward_local_layer_gpu; + l.update_gpu = update_local_layer_gpu; + + l.weights_gpu = cuda_make_array(l.weights, c*n*size*size*locations); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size*locations); + + l.biases_gpu = cuda_make_array(l.biases, l.outputs); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, l.outputs); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + +#endif + l.activation = activation; + + fprintf(stderr, "Local Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); + + return l; +} + +void forward_local_layer(const local_layer l, network net) +{ + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int i, j; + int locations = out_h * out_w; + + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.outputs, l.biases, 1, l.output + i*l.outputs, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input + i*l.w*l.h*l.c; + im2col_cpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + float *output = l.output + i*l.outputs; + for(j = 0; j < locations; ++j){ + float *a = l.weights + j*l.size*l.size*l.c*l.n; + float *b = net.workspace + j; + float *c = output + j; + + int m = l.n; + int n = 1; + int k = l.size*l.size*l.c; + + gemm(0,0,m,n,k,1,a,k,b,locations,1,c,locations); + } + } + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_local_layer(local_layer l, network net) +{ + int i, j; + int locations = l.out_w*l.out_h; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + for(i = 0; i < l.batch; ++i){ + axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input + i*l.w*l.h*l.c; + im2col_cpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + + for(j = 0; j < locations; ++j){ + float *a = l.delta + i*l.outputs + j; + float *b = net.workspace + j; + float *c = l.weight_updates + j*l.size*l.size*l.c*l.n; + int m = l.n; + int n = l.size*l.size*l.c; + int k = 1; + + gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n); + } + + if(net.delta){ + for(j = 0; j < locations; ++j){ + float *a = l.weights + j*l.size*l.size*l.c*l.n; + float *b = l.delta + i*l.outputs + j; + float *c = net.workspace + j; + + int m = l.size*l.size*l.c; + int n = 1; + int k = l.n; + + gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations); + } + + col2im_cpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta+i*l.c*l.h*l.w); + } + } +} + +void update_local_layer(local_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.outputs, momentum, l.bias_updates, 1); + + axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(size, momentum, l.weight_updates, 1); +} + +#ifdef GPU + +void forward_local_layer_gpu(const local_layer l, network net) +{ + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int i, j; + int locations = out_h * out_w; + + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.outputs, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input_gpu + i*l.w*l.h*l.c; + im2col_gpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + float *output = l.output_gpu + i*l.outputs; + for(j = 0; j < locations; ++j){ + float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n; + float *b = net.workspace + j; + float *c = output + j; + + int m = l.n; + int n = 1; + int k = l.size*l.size*l.c; + + gemm_gpu(0,0,m,n,k,1,a,k,b,locations,1,c,locations); + } + } + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_local_layer_gpu(local_layer l, network net) +{ + int i, j; + int locations = l.out_w*l.out_h; + + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + for(i = 0; i < l.batch; ++i){ + axpy_gpu(l.outputs, 1, l.delta_gpu + i*l.outputs, 1, l.bias_updates_gpu, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input_gpu + i*l.w*l.h*l.c; + im2col_gpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + + for(j = 0; j < locations; ++j){ + float *a = l.delta_gpu + i*l.outputs + j; + float *b = net.workspace + j; + float *c = l.weight_updates_gpu + j*l.size*l.size*l.c*l.n; + int m = l.n; + int n = l.size*l.size*l.c; + int k = 1; + + gemm_gpu(0,1,m,n,k,1,a,locations,b,locations,1,c,n); + } + + if(net.delta_gpu){ + for(j = 0; j < locations; ++j){ + float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n; + float *b = l.delta_gpu + i*l.outputs + j; + float *c = net.workspace + j; + + int m = l.size*l.size*l.c; + int n = 1; + int k = l.n; + + gemm_gpu(1,0,m,n,k,1,a,m,b,locations,0,c,locations); + } + + col2im_gpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta_gpu+i*l.c*l.h*l.w); + } + } +} + +void update_local_layer_gpu(local_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + axpy_gpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.outputs, momentum, l.bias_updates_gpu, 1); + + axpy_gpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(size, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(size, momentum, l.weight_updates_gpu, 1); +} + +void pull_local_layer(local_layer l) +{ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + cuda_pull_array(l.weights_gpu, l.weights, size); + cuda_pull_array(l.biases_gpu, l.biases, l.outputs); +} + +void push_local_layer(local_layer l) +{ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + cuda_push_array(l.weights_gpu, l.weights, size); + cuda_push_array(l.biases_gpu, l.biases, l.outputs); +} +#endif diff --git a/workloads/realworld/async/darknet/src/local_layer.h b/workloads/realworld/async/darknet/src/local_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..56805c4f1cb51fed9ef0e771d2befb430df60df5 --- /dev/null +++ b/workloads/realworld/async/darknet/src/local_layer.h @@ -0,0 +1,31 @@ +#ifndef LOCAL_LAYER_H +#define LOCAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +typedef layer local_layer; + +#ifdef GPU +void forward_local_layer_gpu(local_layer layer, network net); +void backward_local_layer_gpu(local_layer layer, network net); +void update_local_layer_gpu(local_layer layer, update_args a); + +void push_local_layer(local_layer layer); +void pull_local_layer(local_layer layer); +#endif + +local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation); + +void forward_local_layer(const local_layer layer, network net); +void backward_local_layer(local_layer layer, network net); +void update_local_layer(local_layer layer, update_args a); + +void bias_output(float *output, float *biases, int batch, int n, int size); +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); + +#endif + diff --git a/workloads/realworld/async/darknet/src/logistic_layer.c b/workloads/realworld/async/darknet/src/logistic_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..8d98986f67a17da70df75e3d56a46615cfc8eaf1 --- /dev/null +++ b/workloads/realworld/async/darknet/src/logistic_layer.c @@ -0,0 +1,71 @@ +#include "logistic_layer.h" +#include "activations.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +layer make_logistic_layer(int batch, int inputs) +{ + fprintf(stderr, "logistic x entropy %4d\n", inputs); + layer l = {0}; + l.type = LOGXENT; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.loss = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_logistic_layer; + l.backward = backward_logistic_layer; + #ifdef GPU + l.forward_gpu = forward_logistic_layer_gpu; + l.backward_gpu = backward_logistic_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.loss_gpu = cuda_make_array(l.loss, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_logistic_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + activate_array(l.output, l.outputs*l.batch, LOGISTIC); + if(net.truth){ + logistic_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_logistic_layer(const layer l, network net) +{ + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_logistic_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, LOGISTIC); + if(net.truth){ + logistic_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu); + cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_logistic_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/async/darknet/src/logistic_layer.h b/workloads/realworld/async/darknet/src/logistic_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9c25bee3c2a6eb1013ed43ce0c4aeaa63b7a293f --- /dev/null +++ b/workloads/realworld/async/darknet/src/logistic_layer.h @@ -0,0 +1,15 @@ +#ifndef LOGISTIC_LAYER_H +#define LOGISTIC_LAYER_H +#include "layer.h" +#include "network.h" + +layer make_logistic_layer(int batch, int inputs); +void forward_logistic_layer(const layer l, network net); +void backward_logistic_layer(const layer l, network net); + +#ifdef GPU +void forward_logistic_layer_gpu(const layer l, network net); +void backward_logistic_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/async/darknet/src/lstm_layer.c b/workloads/realworld/async/darknet/src/lstm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..16f43914df8f35fb7f7b16bd93ff1d83f513dda0 --- /dev/null +++ b/workloads/realworld/async/darknet/src/lstm_layer.c @@ -0,0 +1,626 @@ +#include "lstm_layer.h" +#include "connected_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam) +{ + fprintf(stderr, "LSTM Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = { 0 }; + l.batch = batch; + l.type = LSTM; + l.steps = steps; + l.inputs = inputs; + + l.uf = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uf) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uf->batch = batch; + + l.ui = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ui) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ui->batch = batch; + + l.ug = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ug) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ug->batch = batch; + + l.uo = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uo) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uo->batch = batch; + + l.wf = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wf) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wf->batch = batch; + + l.wi = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wi) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wi->batch = batch; + + l.wg = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wg) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wg->batch = batch; + + l.wo = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wo) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wo->batch = batch; + + l.batch_normalize = batch_normalize; + l.outputs = outputs; + + l.output = calloc(outputs*batch*steps, sizeof(float)); + l.state = calloc(outputs*batch, sizeof(float)); + + l.forward = forward_lstm_layer; + l.update = update_lstm_layer; + + l.prev_state_cpu = calloc(batch*outputs, sizeof(float)); + l.prev_cell_cpu = calloc(batch*outputs, sizeof(float)); + l.cell_cpu = calloc(batch*outputs*steps, sizeof(float)); + + l.f_cpu = calloc(batch*outputs, sizeof(float)); + l.i_cpu = calloc(batch*outputs, sizeof(float)); + l.g_cpu = calloc(batch*outputs, sizeof(float)); + l.o_cpu = calloc(batch*outputs, sizeof(float)); + l.c_cpu = calloc(batch*outputs, sizeof(float)); + l.h_cpu = calloc(batch*outputs, sizeof(float)); + l.temp_cpu = calloc(batch*outputs, sizeof(float)); + l.temp2_cpu = calloc(batch*outputs, sizeof(float)); + l.temp3_cpu = calloc(batch*outputs, sizeof(float)); + l.dc_cpu = calloc(batch*outputs, sizeof(float)); + l.dh_cpu = calloc(batch*outputs, sizeof(float)); + +#ifdef GPU + l.forward_gpu = forward_lstm_layer_gpu; + l.backward_gpu = backward_lstm_layer_gpu; + l.update_gpu = update_lstm_layer_gpu; + + l.output_gpu = cuda_make_array(0, batch*outputs*steps); + l.delta_gpu = cuda_make_array(0, batch*l.outputs*steps); + + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.prev_cell_gpu = cuda_make_array(0, batch*outputs); + l.cell_gpu = cuda_make_array(0, batch*outputs*steps); + + l.f_gpu = cuda_make_array(0, batch*outputs); + l.i_gpu = cuda_make_array(0, batch*outputs); + l.g_gpu = cuda_make_array(0, batch*outputs); + l.o_gpu = cuda_make_array(0, batch*outputs); + l.c_gpu = cuda_make_array(0, batch*outputs); + l.h_gpu = cuda_make_array(0, batch*outputs); + l.temp_gpu = cuda_make_array(0, batch*outputs); + l.temp2_gpu = cuda_make_array(0, batch*outputs); + l.temp3_gpu = cuda_make_array(0, batch*outputs); + l.dc_gpu = cuda_make_array(0, batch*outputs); + l.dh_gpu = cuda_make_array(0, batch*outputs); +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.wf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wf->out_c, l.wf->out_h, l.wf->out_w); + cudnnSetTensor4dDescriptor(l.wi->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wi->out_c, l.wi->out_h, l.wi->out_w); + cudnnSetTensor4dDescriptor(l.wg->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wg->out_c, l.wg->out_h, l.wg->out_w); + cudnnSetTensor4dDescriptor(l.wo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wo->out_c, l.wo->out_h, l.wo->out_w); + + cudnnSetTensor4dDescriptor(l.uf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uf->out_c, l.uf->out_h, l.uf->out_w); + cudnnSetTensor4dDescriptor(l.ui->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ui->out_c, l.ui->out_h, l.ui->out_w); + cudnnSetTensor4dDescriptor(l.ug->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ug->out_c, l.ug->out_h, l.ug->out_w); + cudnnSetTensor4dDescriptor(l.uo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uo->out_c, l.uo->out_h, l.uo->out_w); +#endif + +#endif + + return l; +} + +void update_lstm_layer(layer l, update_args a) +{ + update_connected_layer(*(l.wf), a); + update_connected_layer(*(l.wi), a); + update_connected_layer(*(l.wg), a); + update_connected_layer(*(l.wo), a); + update_connected_layer(*(l.uf), a); + update_connected_layer(*(l.ui), a); + update_connected_layer(*(l.ug), a); + update_connected_layer(*(l.uo), a); +} + +void forward_lstm_layer(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + fill_cpu(l.outputs * l.batch * l.steps, 0, wf.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wi.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wg.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wo.delta, 1); + + fill_cpu(l.outputs * l.batch * l.steps, 0, uf.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ui.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ug.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, uo.delta, 1); + if (state.train) { + fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input = l.h_cpu; + forward_connected_layer(wf, s); + forward_connected_layer(wi, s); + forward_connected_layer(wg, s); + forward_connected_layer(wo, s); + + s.input = state.input; + forward_connected_layer(uf, s); + forward_connected_layer(ui, s); + forward_connected_layer(ug, s); + forward_connected_layer(uo, s); + + copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); + + copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); + + copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); + + copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); + + activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.g_cpu, l.outputs*l.batch, TANH); + activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.c_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, l.temp_cpu, 1, l.c_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.h_cpu, 1); + activate_array(l.h_cpu, l.outputs*l.batch, TANH); + mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.h_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.cell_cpu, 1); + copy_cpu(l.outputs*l.batch, l.h_cpu, 1, l.output, 1); + + state.input += l.inputs*l.batch; + l.output += l.outputs*l.batch; + l.cell_cpu += l.outputs*l.batch; + + increment_layer(&wf, 1); + increment_layer(&wi, 1); + increment_layer(&wg, 1); + increment_layer(&wo, 1); + + increment_layer(&uf, 1); + increment_layer(&ui, 1); + increment_layer(&ug, 1); + increment_layer(&uo, 1); + } +} + +void backward_lstm_layer(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + increment_layer(&wf, l.steps - 1); + increment_layer(&wi, l.steps - 1); + increment_layer(&wg, l.steps - 1); + increment_layer(&wo, l.steps - 1); + + increment_layer(&uf, l.steps - 1); + increment_layer(&ui, l.steps - 1); + increment_layer(&ug, l.steps - 1); + increment_layer(&uo, l.steps - 1); + + state.input += l.inputs*l.batch*(l.steps - 1); + if (state.delta) state.delta += l.inputs*l.batch*(l.steps - 1); + + l.output += l.outputs*l.batch*(l.steps - 1); + l.cell_cpu += l.outputs*l.batch*(l.steps - 1); + l.delta += l.outputs*l.batch*(l.steps - 1); + + for (i = l.steps - 1; i >= 0; --i) { + if (i != 0) copy_cpu(l.outputs*l.batch, l.cell_cpu - l.outputs*l.batch, 1, l.prev_cell_cpu, 1); + copy_cpu(l.outputs*l.batch, l.cell_cpu, 1, l.c_cpu, 1); + if (i != 0) copy_cpu(l.outputs*l.batch, l.output - l.outputs*l.batch, 1, l.prev_state_cpu, 1); + copy_cpu(l.outputs*l.batch, l.output, 1, l.h_cpu, 1); + + l.dh_cpu = (i == 0) ? 0 : l.delta - l.outputs*l.batch; + + copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); + + copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); + + copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); + + copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); + + activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.g_cpu, l.outputs*l.batch, TANH); + activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.delta, 1, l.temp3_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); + activate_array(l.temp_cpu, l.outputs*l.batch, TANH); + + copy_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp2_cpu, 1); + mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.temp2_cpu, 1); + + gradient_array(l.temp_cpu, l.outputs*l.batch, TANH, l.temp2_cpu); + axpy_cpu(l.outputs*l.batch, 1, l.dc_cpu, 1, l.temp2_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); + activate_array(l.temp_cpu, l.outputs*l.batch, TANH); + mul_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp_cpu, 1); + gradient_array(l.o_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wo.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wo, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uo.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(uo, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); + gradient_array(l.g_cpu, l.outputs*l.batch, TANH, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wg.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wg, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ug.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(ug, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); + gradient_array(l.i_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wi.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wi, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ui.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(ui, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.prev_cell_cpu, 1, l.temp_cpu, 1); + gradient_array(l.f_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wf.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wf, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uf.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(uf, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.temp_cpu, 1); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, l.dc_cpu, 1); + + state.input -= l.inputs*l.batch; + if (state.delta) state.delta -= l.inputs*l.batch; + l.output -= l.outputs*l.batch; + l.cell_cpu -= l.outputs*l.batch; + l.delta -= l.outputs*l.batch; + + increment_layer(&wf, -1); + increment_layer(&wi, -1); + increment_layer(&wg, -1); + increment_layer(&wo, -1); + + increment_layer(&uf, -1); + increment_layer(&ui, -1); + increment_layer(&ug, -1); + increment_layer(&uo, -1); + } +} + +#ifdef GPU +void update_lstm_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.wf), a); + update_connected_layer_gpu(*(l.wi), a); + update_connected_layer_gpu(*(l.wg), a); + update_connected_layer_gpu(*(l.wo), a); + update_connected_layer_gpu(*(l.uf), a); + update_connected_layer_gpu(*(l.ui), a); + update_connected_layer_gpu(*(l.ug), a); + update_connected_layer_gpu(*(l.uo), a); +} + +void forward_lstm_layer_gpu(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + fill_gpu(l.outputs * l.batch * l.steps, 0, wf.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wi.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wg.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wo.delta_gpu, 1); + + fill_gpu(l.outputs * l.batch * l.steps, 0, uf.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ui.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ug.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, uo.delta_gpu, 1); + if (state.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = l.h_gpu; + forward_connected_layer_gpu(wf, s); + forward_connected_layer_gpu(wi, s); + forward_connected_layer_gpu(wg, s); + forward_connected_layer_gpu(wo, s); + + s.input_gpu = state.input_gpu; + forward_connected_layer_gpu(uf, s); + forward_connected_layer_gpu(ui, s); + forward_connected_layer_gpu(ug, s); + forward_connected_layer_gpu(uo, s); + + copy_gpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); + + copy_gpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); + + copy_gpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); + + copy_gpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); + + activate_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.g_gpu, l.outputs*l.batch, TANH); + activate_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.f_gpu, 1, l.c_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, l.temp_gpu, 1, l.c_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.h_gpu, 1); + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + mul_gpu(l.outputs*l.batch, l.o_gpu, 1, l.h_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.cell_gpu, 1); + copy_gpu(l.outputs*l.batch, l.h_gpu, 1, l.output_gpu, 1); + + state.input_gpu += l.inputs*l.batch; + l.output_gpu += l.outputs*l.batch; + l.cell_gpu += l.outputs*l.batch; + + increment_layer(&wf, 1); + increment_layer(&wi, 1); + increment_layer(&wg, 1); + increment_layer(&wo, 1); + + increment_layer(&uf, 1); + increment_layer(&ui, 1); + increment_layer(&ug, 1); + increment_layer(&uo, 1); + } +} + +void backward_lstm_layer_gpu(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + increment_layer(&wf, l.steps - 1); + increment_layer(&wi, l.steps - 1); + increment_layer(&wg, l.steps - 1); + increment_layer(&wo, l.steps - 1); + + increment_layer(&uf, l.steps - 1); + increment_layer(&ui, l.steps - 1); + increment_layer(&ug, l.steps - 1); + increment_layer(&uo, l.steps - 1); + + state.input_gpu += l.inputs*l.batch*(l.steps - 1); + if (state.delta_gpu) state.delta_gpu += l.inputs*l.batch*(l.steps - 1); + + l.output_gpu += l.outputs*l.batch*(l.steps - 1); + l.cell_gpu += l.outputs*l.batch*(l.steps - 1); + l.delta_gpu += l.outputs*l.batch*(l.steps - 1); + + for (i = l.steps - 1; i >= 0; --i) { + if (i != 0) copy_gpu(l.outputs*l.batch, l.cell_gpu - l.outputs*l.batch, 1, l.prev_cell_gpu, 1); + copy_gpu(l.outputs*l.batch, l.cell_gpu, 1, l.c_gpu, 1); + if (i != 0) copy_gpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, 1, l.prev_state_gpu, 1); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.h_gpu, 1); + + l.dh_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch; + + copy_gpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); + + copy_gpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); + + copy_gpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); + + copy_gpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); + + activate_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.g_gpu, l.outputs*l.batch, TANH); + activate_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, l.temp3_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.temp_gpu, 1); + activate_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH); + + copy_gpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp2_gpu, 1); + mul_gpu(l.outputs*l.batch, l.o_gpu, 1, l.temp2_gpu, 1); + + gradient_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH, l.temp2_gpu); + axpy_gpu(l.outputs*l.batch, 1, l.dc_gpu, 1, l.temp2_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.temp_gpu, 1); + activate_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH); + mul_gpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wo.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wo, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, uo.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(uo, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.g_gpu, l.outputs*l.batch, TANH, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wg.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wg, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, ug.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(ug, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wi.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wi, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, ui.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(ui, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.prev_cell_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wf.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wf, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, uf.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(uf, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.f_gpu, 1, l.temp_gpu, 1); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, l.dc_gpu, 1); + + state.input_gpu -= l.inputs*l.batch; + if (state.delta_gpu) state.delta_gpu -= l.inputs*l.batch; + l.output_gpu -= l.outputs*l.batch; + l.cell_gpu -= l.outputs*l.batch; + l.delta_gpu -= l.outputs*l.batch; + + increment_layer(&wf, -1); + increment_layer(&wi, -1); + increment_layer(&wg, -1); + increment_layer(&wo, -1); + + increment_layer(&uf, -1); + increment_layer(&ui, -1); + increment_layer(&ug, -1); + increment_layer(&uo, -1); + } +} +#endif diff --git a/workloads/realworld/async/darknet/src/lstm_layer.h b/workloads/realworld/async/darknet/src/lstm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..b9f07e6424b55c336e692aa6f1028d0bc1cae0b3 --- /dev/null +++ b/workloads/realworld/async/darknet/src/lstm_layer.h @@ -0,0 +1,20 @@ +#ifndef LSTM_LAYER_H +#define LSTM_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" +#define USET + +layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam); + +void forward_lstm_layer(layer l, network net); +void update_lstm_layer(layer l, update_args a); + +#ifdef GPU +void forward_lstm_layer_gpu(layer l, network net); +void backward_lstm_layer_gpu(layer l, network net); +void update_lstm_layer_gpu(layer l, update_args a); + +#endif +#endif diff --git a/workloads/realworld/async/darknet/src/matrix.c b/workloads/realworld/async/darknet/src/matrix.c new file mode 100644 index 0000000000000000000000000000000000000000..799916bff017180e220ae48748f495007793d168 --- /dev/null +++ b/workloads/realworld/async/darknet/src/matrix.c @@ -0,0 +1,196 @@ +#include "matrix.h" +#include "utils.h" +#include "blas.h" +#include +#include +#include +#include +#include + +void free_matrix(matrix m) +{ + int i; + for(i = 0; i < m.rows; ++i) free(m.vals[i]); + free(m.vals); +} + +float matrix_topk_accuracy(matrix truth, matrix guess, int k) +{ + int *indexes = calloc(k, sizeof(int)); + int n = truth.cols; + int i,j; + int correct = 0; + for(i = 0; i < truth.rows; ++i){ + top_k(guess.vals[i], n, k, indexes); + for(j = 0; j < k; ++j){ + int class = indexes[j]; + if(truth.vals[i][class]){ + ++correct; + break; + } + } + } + free(indexes); + return (float)correct/truth.rows; +} + +void scale_matrix(matrix m, float scale) +{ + int i,j; + for(i = 0; i < m.rows; ++i){ + for(j = 0; j < m.cols; ++j){ + m.vals[i][j] *= scale; + } + } +} + +matrix resize_matrix(matrix m, int size) +{ + int i; + if (m.rows == size) return m; + if (m.rows < size) { + m.vals = realloc(m.vals, size*sizeof(float*)); + for (i = m.rows; i < size; ++i) { + m.vals[i] = calloc(m.cols, sizeof(float)); + } + } else if (m.rows > size) { + for (i = size; i < m.rows; ++i) { + free(m.vals[i]); + } + m.vals = realloc(m.vals, size*sizeof(float*)); + } + m.rows = size; + return m; +} + +void matrix_add_matrix(matrix from, matrix to) +{ + assert(from.rows == to.rows && from.cols == to.cols); + int i,j; + for(i = 0; i < from.rows; ++i){ + for(j = 0; j < from.cols; ++j){ + to.vals[i][j] += from.vals[i][j]; + } + } +} + +matrix copy_matrix(matrix m) +{ + matrix c = {0}; + c.rows = m.rows; + c.cols = m.cols; + c.vals = calloc(c.rows, sizeof(float *)); + int i; + for(i = 0; i < c.rows; ++i){ + c.vals[i] = calloc(c.cols, sizeof(float)); + copy_cpu(c.cols, m.vals[i], 1, c.vals[i], 1); + } + return c; +} + +matrix make_matrix(int rows, int cols) +{ + int i; + matrix m; + m.rows = rows; + m.cols = cols; + m.vals = calloc(m.rows, sizeof(float *)); + for(i = 0; i < m.rows; ++i){ + m.vals[i] = calloc(m.cols, sizeof(float)); + } + return m; +} + +matrix hold_out_matrix(matrix *m, int n) +{ + int i; + matrix h; + h.rows = n; + h.cols = m->cols; + h.vals = calloc(h.rows, sizeof(float *)); + for(i = 0; i < n; ++i){ + int index = rand()%m->rows; + h.vals[i] = m->vals[index]; + m->vals[index] = m->vals[--(m->rows)]; + } + return h; +} + +float *pop_column(matrix *m, int c) +{ + float *col = calloc(m->rows, sizeof(float)); + int i, j; + for(i = 0; i < m->rows; ++i){ + col[i] = m->vals[i][c]; + for(j = c; j < m->cols-1; ++j){ + m->vals[i][j] = m->vals[i][j+1]; + } + } + --m->cols; + return col; +} + +matrix csv_to_matrix(char *filename) +{ + FILE *fp = fopen(filename, "r"); + if(!fp) file_error(filename); + + matrix m; + m.cols = -1; + + char *line; + + int n = 0; + int size = 1024; + m.vals = calloc(size, sizeof(float*)); + while((line = fgetl(fp))){ + if(m.cols == -1) m.cols = count_fields(line); + if(n == size){ + size *= 2; + m.vals = realloc(m.vals, size*sizeof(float*)); + } + m.vals[n] = parse_fields(line, m.cols); + free(line); + ++n; + } + m.vals = realloc(m.vals, n*sizeof(float*)); + m.rows = n; + return m; +} + +void matrix_to_csv(matrix m) +{ + int i, j; + + for(i = 0; i < m.rows; ++i){ + for(j = 0; j < m.cols; ++j){ + if(j > 0) printf(","); + printf("%.17g", m.vals[i][j]); + } + printf("\n"); + } +} + +void print_matrix(matrix m) +{ + int i, j; + printf("%d X %d Matrix:\n",m.rows, m.cols); + printf(" __"); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf("__ \n"); + + printf("| "); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf(" |\n"); + + for(i = 0; i < m.rows; ++i){ + printf("| "); + for(j = 0; j < m.cols; ++j){ + printf("%15.7f ", m.vals[i][j]); + } + printf(" |\n"); + } + printf("|__"); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf("__|\n"); +} diff --git a/workloads/realworld/async/darknet/src/matrix.h b/workloads/realworld/async/darknet/src/matrix.h new file mode 100644 index 0000000000000000000000000000000000000000..879acd70d26c084931b30067ddcc77057068e58c --- /dev/null +++ b/workloads/realworld/async/darknet/src/matrix.h @@ -0,0 +1,13 @@ +#ifndef MATRIX_H +#define MATRIX_H +#include "darknet.h" + +matrix copy_matrix(matrix m); +void print_matrix(matrix m); + +matrix hold_out_matrix(matrix *m, int n); +matrix resize_matrix(matrix m, int size); + +float *pop_column(matrix *m, int c); + +#endif diff --git a/workloads/realworld/async/darknet/src/maxpool_layer.c b/workloads/realworld/async/darknet/src/maxpool_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..b54be838662ebfc53abc539da22413becc1805a3 --- /dev/null +++ b/workloads/realworld/async/darknet/src/maxpool_layer.c @@ -0,0 +1,127 @@ +#include "maxpool_layer.h" +#include "cuda_dark.h" +#include + +image get_maxpool_image(maxpool_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.c; + return float_to_image(w,h,c,l.output); +} + +image get_maxpool_delta(maxpool_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.c; + return float_to_image(w,h,c,l.delta); +} + +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding) +{ + maxpool_layer l = {0}; + l.type = MAXPOOL; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.pad = padding; + l.out_w = (w + padding - size)/stride + 1; + l.out_h = (h + padding - size)/stride + 1; + l.out_c = c; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = h*w*c; + l.size = size; + l.stride = stride; + int output_size = l.out_h * l.out_w * l.out_c * batch; + l.indexes = calloc(output_size, sizeof(int)); + l.output = calloc(output_size, sizeof(float)); + l.delta = calloc(output_size, sizeof(float)); + l.forward = forward_maxpool_layer; + l.backward = backward_maxpool_layer; + #ifdef GPU + l.forward_gpu = forward_maxpool_layer_gpu; + l.backward_gpu = backward_maxpool_layer_gpu; + l.indexes_gpu = cuda_make_int_array(0, output_size); + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); + #endif + fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); + return l; +} + +void resize_maxpool_layer(maxpool_layer *l, int w, int h) +{ + l->h = h; + l->w = w; + l->inputs = h*w*l->c; + + l->out_w = (w + l->pad - l->size)/l->stride + 1; + l->out_h = (h + l->pad - l->size)/l->stride + 1; + l->outputs = l->out_w * l->out_h * l->c; + int output_size = l->outputs * l->batch; + + l->indexes = realloc(l->indexes, output_size * sizeof(int)); + l->output = realloc(l->output, output_size * sizeof(float)); + l->delta = realloc(l->delta, output_size * sizeof(float)); + + #ifdef GPU + cuda_free((float *)l->indexes_gpu); + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->indexes_gpu = cuda_make_int_array(0, output_size); + l->output_gpu = cuda_make_array(l->output, output_size); + l->delta_gpu = cuda_make_array(l->delta, output_size); + #endif +} + +void forward_maxpool_layer(const maxpool_layer l, network net) +{ + int b,i,j,k,m,n; + int w_offset = -l.pad/2; + int h_offset = -l.pad/2; + + int h = l.out_h; + int w = l.out_w; + int c = l.c; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < c; ++k){ + for(i = 0; i < h; ++i){ + for(j = 0; j < w; ++j){ + int out_index = j + w*(i + h*(k + c*b)); + float max = -FLT_MAX; + int max_i = -1; + for(n = 0; n < l.size; ++n){ + for(m = 0; m < l.size; ++m){ + int cur_h = h_offset + i*l.stride + n; + int cur_w = w_offset + j*l.stride + m; + int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c)); + int valid = (cur_h >= 0 && cur_h < l.h && + cur_w >= 0 && cur_w < l.w); + float val = (valid != 0) ? net.input[index] : -FLT_MAX; + max_i = (val > max) ? index : max_i; + max = (val > max) ? val : max; + } + } + l.output[out_index] = max; + l.indexes[out_index] = max_i; + } + } + } + } +} + +void backward_maxpool_layer(const maxpool_layer l, network net) +{ + int i; + int h = l.out_h; + int w = l.out_w; + int c = l.c; + for(i = 0; i < h*w*c*l.batch; ++i){ + int index = l.indexes[i]; + net.delta[index] += l.delta[i]; + } +} + diff --git a/workloads/realworld/async/darknet/src/maxpool_layer.h b/workloads/realworld/async/darknet/src/maxpool_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..f01adb957e8bd8ce01a06e5a1ff14a988ae07149 --- /dev/null +++ b/workloads/realworld/async/darknet/src/maxpool_layer.h @@ -0,0 +1,23 @@ +#ifndef MAXPOOL_LAYER_H +#define MAXPOOL_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +typedef layer maxpool_layer; + +image get_maxpool_image(maxpool_layer l); +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding); +void resize_maxpool_layer(maxpool_layer *l, int w, int h); +void forward_maxpool_layer(const maxpool_layer l, network net); +void backward_maxpool_layer(const maxpool_layer l, network net); + +#ifdef GPU +void forward_maxpool_layer_gpu(maxpool_layer l, network net); +void backward_maxpool_layer_gpu(maxpool_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/async/darknet/src/maxpool_layer_kernels.cu b/workloads/realworld/async/darknet/src/maxpool_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..23302f8295682c5d9112fb12a7f63cd47a82954b --- /dev/null +++ b/workloads/realworld/async/darknet/src/maxpool_layer_kernels.cu @@ -0,0 +1,106 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "maxpool_layer.h" +#include "cuda_dark.h" +} + +__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) +{ + int h = (in_h + pad - size)/stride + 1; + int w = (in_w + pad - size)/stride + 1; + int c = in_c; + + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int j = id % w; + id /= w; + int i = id % h; + id /= h; + int k = id % c; + id /= c; + int b = id; + + int w_offset = -pad/2; + int h_offset = -pad/2; + + int out_index = j + w*(i + h*(k + c*b)); + float max = -INFINITY; + int max_i = -1; + int l, m; + for(l = 0; l < size; ++l){ + for(m = 0; m < size; ++m){ + int cur_h = h_offset + i*stride + l; + int cur_w = w_offset + j*stride + m; + int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); + int valid = (cur_h >= 0 && cur_h < in_h && + cur_w >= 0 && cur_w < in_w); + float val = (valid != 0) ? input[index] : -INFINITY; + max_i = (val > max) ? index : max_i; + max = (val > max) ? val : max; + } + } + output[out_index] = max; + indexes[out_index] = max_i; +} + +__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes) +{ + int h = (in_h + pad - size)/stride + 1; + int w = (in_w + pad - size)/stride + 1; + int c = in_c; + int area = (size-1)/stride; + + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int index = id; + int j = id % in_w; + id /= in_w; + int i = id % in_h; + id /= in_h; + int k = id % in_c; + id /= in_c; + int b = id; + + int w_offset = -pad/2; + int h_offset = -pad/2; + + float d = 0; + int l, m; + for(l = -area; l < area+1; ++l){ + for(m = -area; m < area+1; ++m){ + int out_w = (j-w_offset)/stride + m; + int out_h = (i-h_offset)/stride + l; + int out_index = out_w + w*(out_h + h*(k + c*b)); + int valid = (out_w >= 0 && out_w < w && + out_h >= 0 && out_h < h); + d += (valid && indexes[out_index] == index) ? delta[out_index] : 0; + } + } + prev_delta[index] += d; +} + +extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network net) +{ + int h = layer.out_h; + int w = layer.out_w; + int c = layer.c; + + size_t n = h*w*c*layer.batch; + + forward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, net.input_gpu, layer.output_gpu, layer.indexes_gpu); + check_error(cudaPeekAtLastError()); +} + +extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network net) +{ + size_t n = layer.h*layer.w*layer.c*layer.batch; + + backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, net.delta_gpu, layer.indexes_gpu); + check_error(cudaPeekAtLastError()); +} + diff --git a/workloads/realworld/async/darknet/src/network.c b/workloads/realworld/async/darknet/src/network.c new file mode 100644 index 0000000000000000000000000000000000000000..aaab7997b5ee7da829289fa153f942a066b43d8c --- /dev/null +++ b/workloads/realworld/async/darknet/src/network.c @@ -0,0 +1,1129 @@ +#include +#include +#include +#include "network.h" +#include "image.h" +#include "data.h" +#include "utils.h" +#include "blas.h" + +#include "crop_layer.h" +#include "connected_layer.h" +#include "gru_layer.h" +#include "rnn_layer.h" +#include "crnn_layer.h" +#include "local_layer.h" +#include "convolutional_layer.h" +#include "activation_layer.h" +#include "detection_layer.h" +#include "region_layer.h" +#include "yolo_layer.h" +#include "normalization_layer.h" +#include "batchnorm_layer.h" +#include "maxpool_layer.h" +#include "reorg_layer.h" +#include "avgpool_layer.h" +#include "cost_layer.h" +#include "softmax_layer.h" +#include "dropout_layer.h" +#include "route_layer.h" +#include "upsample_layer.h" +#include "shortcut_layer.h" +#include "parser.h" +#include "data.h" + +load_args get_base_args(network *net) +{ + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.size = net->w; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.center = net->center; + args.saturation = net->saturation; + args.hue = net->hue; + return args; +} + +network *load_network(char *cfg, char *weights, int clear) +{ + network *net = parse_network_cfg(cfg); + if(weights && weights[0] != 0){ + load_weights(net, weights); + } + if(clear) (*net->seen) = 0; + return net; +} + +size_t get_current_batch(network *net) +{ + size_t batch_num = (*net->seen)/(net->batch*net->subdivisions); + return batch_num; +} + +void reset_network_state(network *net, int b) +{ + int i; + for (i = 0; i < net->n; ++i) { + #ifdef GPU + layer l = net->layers[i]; + if(l.state_gpu){ + fill_gpu(l.outputs, 0, l.state_gpu + l.outputs*b, 1); + } + if(l.h_gpu){ + fill_gpu(l.outputs, 0, l.h_gpu + l.outputs*b, 1); + } + #endif + } +} + +void reset_rnn(network *net) +{ + reset_network_state(net, 0); +} + +float get_current_rate(network *net) +{ + size_t batch_num = get_current_batch(net); + int i; + float rate; + if (batch_num < net->burn_in) return net->learning_rate * pow((float)batch_num / net->burn_in, net->power); + switch (net->policy) { + case CONSTANT: + return net->learning_rate; + case STEP: + return net->learning_rate * pow(net->scale, batch_num/net->step); + case STEPS: + rate = net->learning_rate; + for(i = 0; i < net->num_steps; ++i){ + if(net->steps[i] > batch_num) return rate; + rate *= net->scales[i]; + } + return rate; + case EXP: + return net->learning_rate * pow(net->gamma, batch_num); + case POLY: + return net->learning_rate * pow(1 - (float)batch_num / net->max_batches, net->power); + case RANDOM: + return net->learning_rate * pow(rand_uniform(0,1), net->power); + case SIG: + return net->learning_rate * (1./(1.+exp(net->gamma*(batch_num - net->step)))); + default: + fprintf(stderr, "Policy is weird!\n"); + return net->learning_rate; + } +} + +char *get_layer_string(LAYER_TYPE a) +{ + switch(a){ + case CONVOLUTIONAL: + return "convolutional"; + case ACTIVE: + return "activation"; + case LOCAL: + return "local"; + case DECONVOLUTIONAL: + return "deconvolutional"; + case CONNECTED: + return "connected"; + case RNN: + return "rnn"; + case GRU: + return "gru"; + case LSTM: + return "lstm"; + case CRNN: + return "crnn"; + case MAXPOOL: + return "maxpool"; + case REORG: + return "reorg"; + case AVGPOOL: + return "avgpool"; + case SOFTMAX: + return "softmax"; + case DETECTION: + return "detection"; + case REGION: + return "region"; + case YOLO: + return "yolo"; + case DROPOUT: + return "dropout"; + case CROP: + return "crop"; + case COST: + return "cost"; + case ROUTE: + return "route"; + case SHORTCUT: + return "shortcut"; + case NORMALIZATION: + return "normalization"; + case BATCHNORM: + return "batchnorm"; + default: + break; + } + return "none"; +} + +network *make_network(int n) +{ + network *net = calloc(1, sizeof(network)); + net->n = n; + net->layers = calloc(net->n, sizeof(layer)); + net->seen = calloc(1, sizeof(size_t)); + net->t = calloc(1, sizeof(int)); + net->cost = calloc(1, sizeof(float)); + return net; +} + +void forward_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + forward_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + for(i = 0; i < net.n; ++i){ + net.index = i; + layer l = net.layers[i]; + if(l.delta){ + fill_cpu(l.outputs * l.batch, 0, l.delta, 1); + } + l.forward(l, net); + net.input = l.output; + if(l.truth) { + net.truth = l.output; + } + } + calc_network_cost(netp); +} + +void update_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + update_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + update_args a = {0}; + a.batch = net.batch*net.subdivisions; + a.learning_rate = get_current_rate(netp); + a.momentum = net.momentum; + a.decay = net.decay; + a.adam = net.adam; + a.B1 = net.B1; + a.B2 = net.B2; + a.eps = net.eps; + ++*net.t; + a.t = *net.t; + + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.update){ + l.update(l, a); + } + } +} + +void calc_network_cost(network *netp) +{ + network net = *netp; + int i; + float sum = 0; + int count = 0; + for(i = 0; i < net.n; ++i){ + if(net.layers[i].cost){ + sum += net.layers[i].cost[0]; + ++count; + } + } + *net.cost = sum/count; +} + +int get_predicted_class_network(network *net) +{ + return max_index(net->output, net->outputs); +} + +void backward_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + backward_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + network orig = net; + for(i = net.n-1; i >= 0; --i){ + layer l = net.layers[i]; + if(l.stopbackward) break; + if(i == 0){ + net = orig; + }else{ + layer prev = net.layers[i-1]; + net.input = prev.output; + net.delta = prev.delta; + } + net.index = i; + l.backward(l, net); + } +} + +float train_network_datum(network *net) +{ + *net->seen += net->batch; + net->train = 1; + forward_network(net); + backward_network(net); + float error = *net->cost; + if(((*net->seen)/net->batch)%net->subdivisions == 0) update_network(net); + return error; +} + +float train_network_sgd(network *net, data d, int n) +{ + int batch = net->batch; + + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + get_random_batch(d, batch, net->input, net->truth); + float err = train_network_datum(net); + sum += err; + } + return (float)sum/(n*batch); +} + +float train_network(network *net, data d) +{ + assert(d.X.rows % net->batch == 0); + int batch = net->batch; + int n = d.X.rows / batch; + + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + get_next_batch(d, batch, i*batch, net->input, net->truth); + float err = train_network_datum(net); + sum += err; + } + return (float)sum/(n*batch); +} + +void set_temp_network(network *net, float t) +{ + int i; + for(i = 0; i < net->n; ++i){ + net->layers[i].temperature = t; + } +} + + +void set_batch_network(network *net, int b) +{ + net->batch = b; + int i; + for(i = 0; i < net->n; ++i){ + net->layers[i].batch = b; +#ifdef CUDNN + if(net->layers[i].type == CONVOLUTIONAL){ + cudnn_convolutional_setup(net->layers + i); + } + if(net->layers[i].type == DECONVOLUTIONAL){ + layer *l = net->layers + i; + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + } +#endif + } +} + +int resize_network(network *net, int w, int h) +{ +#ifdef GPU + cuda_set_device(net->gpu_index); + cuda_free(net->workspace); +#endif + int i; + //if(w == net->w && h == net->h) return 0; + net->w = w; + net->h = h; + int inputs = 0; + size_t workspace_size = 0; + //fprintf(stderr, "Resizing to %d x %d...\n", w, h); + //fflush(stderr); + for (i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + resize_convolutional_layer(&l, w, h); + }else if(l.type == CROP){ + resize_crop_layer(&l, w, h); + }else if(l.type == MAXPOOL){ + resize_maxpool_layer(&l, w, h); + }else if(l.type == REGION){ + resize_region_layer(&l, w, h); + }else if(l.type == YOLO){ + resize_yolo_layer(&l, w, h); + }else if(l.type == ROUTE){ + resize_route_layer(&l, net); + }else if(l.type == SHORTCUT){ + resize_shortcut_layer(&l, w, h); + }else if(l.type == UPSAMPLE){ + resize_upsample_layer(&l, w, h); + }else if(l.type == REORG){ + resize_reorg_layer(&l, w, h); + }else if(l.type == AVGPOOL){ + resize_avgpool_layer(&l, w, h); + }else if(l.type == NORMALIZATION){ + resize_normalization_layer(&l, w, h); + }else if(l.type == COST){ + resize_cost_layer(&l, inputs); + }else{ + error("Cannot resize this type of layer"); + } + if(l.workspace_size > workspace_size) workspace_size = l.workspace_size; + if(l.workspace_size > 2000000000) assert(0); + inputs = l.outputs; + net->layers[i] = l; + w = l.out_w; + h = l.out_h; + if(l.type == AVGPOOL) break; + } + layer out = get_network_output_layer(net); + net->inputs = net->layers[0].inputs; + net->outputs = out.outputs; + net->truths = out.outputs; + if(net->layers[net->n-1].truths) net->truths = net->layers[net->n-1].truths; + net->output = out.output; + free(net->input); + free(net->truth); + net->input = calloc(net->inputs*net->batch, sizeof(float)); + net->truth = calloc(net->truths*net->batch, sizeof(float)); +#ifdef GPU + if(gpu_index >= 0){ + cuda_free(net->input_gpu); + cuda_free(net->truth_gpu); + net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch); + net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch); + if(workspace_size){ + net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1); + } + }else { + free(net->workspace); + net->workspace = calloc(1, workspace_size); + } +#else + free(net->workspace); + net->workspace = calloc(1, workspace_size); +#endif + //fprintf(stderr, " Done!\n"); + return 0; +} + +layer get_network_detection_layer(network *net) +{ + int i; + for(i = 0; i < net->n; ++i){ + if(net->layers[i].type == DETECTION){ + return net->layers[i]; + } + } + fprintf(stderr, "Detection layer not found!!\n"); + layer l = {0}; + return l; +} + +image get_network_image_layer(network *net, int i) +{ + layer l = net->layers[i]; +#ifdef GPU + //cuda_pull_array(l.output_gpu, l.output, l.outputs); +#endif + if (l.out_w && l.out_h && l.out_c){ + return float_to_image(l.out_w, l.out_h, l.out_c, l.output); + } + image def = {0}; + return def; +} + +image get_network_image(network *net) +{ + int i; + for(i = net->n-1; i >= 0; --i){ + image m = get_network_image_layer(net, i); + if(m.h != 0) return m; + } + image def = {0}; + return def; +} + +void visualize_network(network *net) +{ + image *prev = 0; + int i; + char buff[256]; + for(i = 0; i < net->n; ++i){ + sprintf(buff, "Layer %d", i); + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + prev = visualize_convolutional_layer(l, buff, prev); + } + } +} + +void top_predictions(network *net, int k, int *index) +{ + top_k(net->output, net->outputs, k, index); +} + + +float *network_predict(network *net, float *input) +{ + network orig = *net; + net->input = input; + net->truth = 0; + net->train = 0; + net->delta = 0; + forward_network(net); + float *out = net->output; + *net = orig; + return out; +} + +int num_detections(network *net, float thresh) +{ + int i; + int s = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO){ + s += yolo_num_detections(l, thresh); + } + if(l.type == DETECTION || l.type == REGION){ + s += l.w*l.h*l.n; + } + } + return s; +} + +detection *make_network_boxes(network *net, float thresh, int *num) +{ + layer l = net->layers[net->n - 1]; + int i; + int nboxes = num_detections(net, thresh); + if(num) *num = nboxes; + detection *dets = calloc(nboxes, sizeof(detection)); + for(i = 0; i < nboxes; ++i){ + dets[i].prob = calloc(l.classes, sizeof(float)); + if(l.coords > 4){ + dets[i].mask = calloc(l.coords-4, sizeof(float)); + } + } + return dets; +} + +void fill_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets) +{ + int j; + for(j = 0; j < net->n; ++j){ + layer l = net->layers[j]; + if(l.type == YOLO){ + int count = get_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets); + dets += count; + } + if(l.type == REGION){ + get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets); + dets += l.w*l.h*l.n; + } + if(l.type == DETECTION){ + get_detection_detections(l, w, h, thresh, dets); + dets += l.w*l.h*l.n; + } + } +} + +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num) +{ + detection *dets = make_network_boxes(net, thresh, num); + fill_network_boxes(net, w, h, thresh, hier, map, relative, dets); + return dets; +} + +void free_detections(detection *dets, int n) +{ + int i; + for(i = 0; i < n; ++i){ + free(dets[i].prob); + if(dets[i].mask) free(dets[i].mask); + } + free(dets); +} + +float *network_predict_image(network *net, image im) +{ + image imr = letterbox_image(im, net->w, net->h); + set_batch_network(net, 1); + float *p = network_predict(net, imr.data); + free_image(imr); + return p; +} + +int network_width(network *net){return net->w;} +int network_height(network *net){return net->h;} + +matrix network_predict_data_multi(network *net, data test, int n) +{ + int i,j,b,m; + int k = net->outputs; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.rows, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + } + for(m = 0; m < n; ++m){ + float *out = network_predict(net, X); + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + for(j = 0; j < k; ++j){ + pred.vals[i+b][j] += out[j+b*k]/n; + } + } + } + } + free(X); + return pred; +} + +matrix network_predict_data(network *net, data test) +{ + int i,j,b; + int k = net->outputs; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.cols, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + } + float *out = network_predict(net, X); + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + for(j = 0; j < k; ++j){ + pred.vals[i+b][j] = out[j+b*k]; + } + } + } + free(X); + return pred; +} + +void print_network(network *net) +{ + int i,j; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + float *output = l.output; + int n = l.outputs; + float mean = mean_array(output, n); + float vari = variance_array(output, n); + fprintf(stderr, "Layer %d - Mean: %f, Variance: %f\n",i,mean, vari); + if(n > 100) n = 100; + for(j = 0; j < n; ++j) fprintf(stderr, "%f, ", output[j]); + if(n == 100)fprintf(stderr,".....\n"); + fprintf(stderr, "\n"); + } +} + +void compare_networks(network *n1, network *n2, data test) +{ + matrix g1 = network_predict_data(n1, test); + matrix g2 = network_predict_data(n2, test); + int i; + int a,b,c,d; + a = b = c = d = 0; + for(i = 0; i < g1.rows; ++i){ + int truth = max_index(test.y.vals[i], test.y.cols); + int p1 = max_index(g1.vals[i], g1.cols); + int p2 = max_index(g2.vals[i], g2.cols); + if(p1 == truth){ + if(p2 == truth) ++d; + else ++c; + }else{ + if(p2 == truth) ++b; + else ++a; + } + } + printf("%5d %5d\n%5d %5d\n", a, b, c, d); + float num = pow((abs(b - c) - 1.), 2.); + float den = b + c; + printf("%f\n", num/den); +} + +float network_accuracy(network *net, data d) +{ + matrix guess = network_predict_data(net, d); + float acc = matrix_topk_accuracy(d.y, guess,1); + free_matrix(guess); + return acc; +} + +float *network_accuracies(network *net, data d, int n) +{ + static float acc[2]; + matrix guess = network_predict_data(net, d); + acc[0] = matrix_topk_accuracy(d.y, guess, 1); + acc[1] = matrix_topk_accuracy(d.y, guess, n); + free_matrix(guess); + return acc; +} + +layer get_network_output_layer(network *net) +{ + int i; + for(i = net->n - 1; i >= 0; --i){ + if(net->layers[i].type != COST) break; + } + return net->layers[i]; +} + +float network_accuracy_multi(network *net, data d, int n) +{ + matrix guess = network_predict_data_multi(net, d, n); + float acc = matrix_topk_accuracy(d.y, guess,1); + free_matrix(guess); + return acc; +} + +void free_network(network *net) +{ + int i; + for(i = 0; i < net->n; ++i){ + free_layer(net->layers[i]); + } + free(net->layers); + if(net->input) free(net->input); + if(net->truth) free(net->truth); +#ifdef GPU + if(net->input_gpu) cuda_free(net->input_gpu); + if(net->truth_gpu) cuda_free(net->truth_gpu); +#endif + free(net); +} + +// Some day... +// ^ What the hell is this comment for? + + +layer network_output_layer(network *net) +{ + int i; + for(i = net->n - 1; i >= 0; --i){ + if(net->layers[i].type != COST) break; + } + return net->layers[i]; +} + +int network_inputs(network *net) +{ + return net->layers[0].inputs; +} + +int network_outputs(network *net) +{ + return network_output_layer(net).outputs; +} + +float *network_output(network *net) +{ + return network_output_layer(net).output; +} + +#ifdef GPU + +void forward_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + cuda_push_array(net.input_gpu, net.input, net.inputs*net.batch); + if(net.truth){ + cuda_push_array(net.truth_gpu, net.truth, net.truths*net.batch); + } + + int i; + for(i = 0; i < net.n; ++i){ + net.index = i; + layer l = net.layers[i]; + if(l.delta_gpu){ + fill_gpu(l.outputs * l.batch, 0, l.delta_gpu, 1); + } + l.forward_gpu(l, net); + net.input_gpu = l.output_gpu; + net.input = l.output; + if(l.truth) { + net.truth_gpu = l.output_gpu; + net.truth = l.output; + } + } + pull_network_output(netp); + calc_network_cost(netp); +} + +void backward_network_gpu(network *netp) +{ + int i; + network net = *netp; + network orig = net; + cuda_set_device(net.gpu_index); + for(i = net.n-1; i >= 0; --i){ + layer l = net.layers[i]; + if(l.stopbackward) break; + if(i == 0){ + net = orig; + }else{ + layer prev = net.layers[i-1]; + net.input = prev.output; + net.delta = prev.delta; + net.input_gpu = prev.output_gpu; + net.delta_gpu = prev.delta_gpu; + } + net.index = i; + l.backward_gpu(l, net); + } +} + +void update_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + int i; + update_args a = {0}; + a.batch = net.batch*net.subdivisions; + a.learning_rate = get_current_rate(netp); + a.momentum = net.momentum; + a.decay = net.decay; + a.adam = net.adam; + a.B1 = net.B1; + a.B2 = net.B2; + a.eps = net.eps; + ++*net.t; + a.t = (*net.t); + + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.update_gpu){ + l.update_gpu(l, a); + } + } +} + +void harmless_update_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + int i; + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.weight_updates_gpu) fill_gpu(l.nweights, 0, l.weight_updates_gpu, 1); + if(l.bias_updates_gpu) fill_gpu(l.nbiases, 0, l.bias_updates_gpu, 1); + if(l.scale_updates_gpu) fill_gpu(l.nbiases, 0, l.scale_updates_gpu, 1); + } +} + +typedef struct { + network *net; + data d; + float *err; +} train_args; + +void *train_thread(void *ptr) +{ + train_args args = *(train_args*)ptr; + free(ptr); + cuda_set_device(args.net->gpu_index); + *args.err = train_network(args.net, args.d); + return 0; +} + +pthread_t train_network_in_thread(network *net, data d, float *err) +{ + pthread_t thread; + train_args *ptr = (train_args *)calloc(1, sizeof(train_args)); + ptr->net = net; + ptr->d = d; + ptr->err = err; + if(pthread_create(&thread, 0, train_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void merge_weights(layer l, layer base) +{ + if (l.type == CONVOLUTIONAL) { + axpy_cpu(l.n, 1, l.bias_updates, 1, base.biases, 1); + axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weights, 1); + if (l.scales) { + axpy_cpu(l.n, 1, l.scale_updates, 1, base.scales, 1); + } + } else if(l.type == CONNECTED) { + axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.biases, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weights, 1); + } +} + +void scale_weights(layer l, float s) +{ + if (l.type == CONVOLUTIONAL) { + scal_cpu(l.n, s, l.biases, 1); + scal_cpu(l.nweights, s, l.weights, 1); + if (l.scales) { + scal_cpu(l.n, s, l.scales, 1); + } + } else if(l.type == CONNECTED) { + scal_cpu(l.outputs, s, l.biases, 1); + scal_cpu(l.outputs*l.inputs, s, l.weights, 1); + } +} + + +void pull_weights(layer l) +{ + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_pull_array(l.biases_gpu, l.bias_updates, l.n); + cuda_pull_array(l.weights_gpu, l.weight_updates, l.nweights); + if(l.scales) cuda_pull_array(l.scales_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_pull_array(l.biases_gpu, l.bias_updates, l.outputs); + cuda_pull_array(l.weights_gpu, l.weight_updates, l.outputs*l.inputs); + } +} + +void push_weights(layer l) +{ + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weights_gpu, l.weights, l.nweights); + if(l.scales) cuda_push_array(l.scales_gpu, l.scales, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.biases_gpu, l.biases, l.outputs); + cuda_push_array(l.weights_gpu, l.weights, l.outputs*l.inputs); + } +} + +void distribute_weights(layer l, layer base) +{ + if (l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL) { + cuda_push_array(l.biases_gpu, base.biases, l.n); + cuda_push_array(l.weights_gpu, base.weights, l.nweights); + if (base.scales) cuda_push_array(l.scales_gpu, base.scales, l.n); + } else if (l.type == CONNECTED) { + cuda_push_array(l.biases_gpu, base.biases, l.outputs); + cuda_push_array(l.weights_gpu, base.weights, l.outputs*l.inputs); + } +} + + +/* + + void pull_updates(layer l) + { + if(l.type == CONVOLUTIONAL){ + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + if(l.scale_updates) cuda_pull_array(l.scale_updates_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs); + } + } + + void push_updates(layer l) + { + if(l.type == CONVOLUTIONAL){ + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + if(l.scale_updates) cuda_push_array(l.scale_updates_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs); + } + } + + void update_layer(layer l, network net) + { + int update_batch = net.batch*net.subdivisions; + float rate = get_current_rate(net); + l.t = get_current_batch(net); + if(l.update_gpu){ + l.update_gpu(l, update_batch, rate*l.learning_rate_scale, net.momentum, net.decay); + } + } + void merge_updates(layer l, layer base) + { + if (l.type == CONVOLUTIONAL) { + axpy_cpu(l.n, 1, l.bias_updates, 1, base.bias_updates, 1); + axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weight_updates, 1); + if (l.scale_updates) { + axpy_cpu(l.n, 1, l.scale_updates, 1, base.scale_updates, 1); + } + } else if(l.type == CONNECTED) { + axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.bias_updates, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weight_updates, 1); + } + } + + void distribute_updates(layer l, layer base) + { + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.n); + cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.nweights); + if(base.scale_updates) cuda_push_array(l.scale_updates_gpu, base.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.outputs); + cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.outputs*l.inputs); + } + } + */ + +/* + void sync_layer(network *nets, int n, int j) + { + int i; + network net = nets[0]; + layer base = net.layers[j]; + scale_weights(base, 0); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i].gpu_index); + layer l = nets[i].layers[j]; + pull_weights(l); + merge_weights(l, base); + } + scale_weights(base, 1./n); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i].gpu_index); + layer l = nets[i].layers[j]; + distribute_weights(l, base); + } + } + */ + +void sync_layer(network **nets, int n, int j) +{ + int i; + network *net = nets[0]; + layer base = net->layers[j]; + scale_weights(base, 0); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i]->gpu_index); + layer l = nets[i]->layers[j]; + pull_weights(l); + merge_weights(l, base); + } + scale_weights(base, 1./n); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i]->gpu_index); + layer l = nets[i]->layers[j]; + distribute_weights(l, base); + } +} + +typedef struct{ + network **nets; + int n; + int j; +} sync_args; + +void *sync_layer_thread(void *ptr) +{ + sync_args args = *(sync_args*)ptr; + sync_layer(args.nets, args.n, args.j); + free(ptr); + return 0; +} + +pthread_t sync_layer_in_thread(network **nets, int n, int j) +{ + pthread_t thread; + sync_args *ptr = (sync_args *)calloc(1, sizeof(sync_args)); + ptr->nets = nets; + ptr->n = n; + ptr->j = j; + if(pthread_create(&thread, 0, sync_layer_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void sync_nets(network **nets, int n, int interval) +{ + int j; + int layers = nets[0]->n; + pthread_t *threads = (pthread_t *) calloc(layers, sizeof(pthread_t)); + + *(nets[0]->seen) += interval * (n-1) * nets[0]->batch * nets[0]->subdivisions; + for (j = 0; j < n; ++j){ + *(nets[j]->seen) = *(nets[0]->seen); + } + for (j = 0; j < layers; ++j) { + threads[j] = sync_layer_in_thread(nets, n, j); + } + for (j = 0; j < layers; ++j) { + pthread_join(threads[j], 0); + } + free(threads); +} + +float train_networks(network **nets, int n, data d, int interval) +{ + int i; + int batch = nets[0]->batch; + int subdivisions = nets[0]->subdivisions; + assert(batch * subdivisions * n == d.X.rows); + pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t)); + float *errors = (float *) calloc(n, sizeof(float)); + + float sum = 0; + for(i = 0; i < n; ++i){ + data p = get_data_part(d, i, n); + threads[i] = train_network_in_thread(nets[i], p, errors + i); + } + for(i = 0; i < n; ++i){ + pthread_join(threads[i], 0); + //printf("%f\n", errors[i]); + sum += errors[i]; + } + //cudaDeviceSynchronize(); + if (get_current_batch(nets[0]) % interval == 0) { + printf("Syncing... "); + fflush(stdout); + sync_nets(nets, n, interval); + printf("Done!\n"); + } + //cudaDeviceSynchronize(); + free(threads); + free(errors); + return (float)sum/(n); +} + +void pull_network_output(network *net) +{ + layer l = get_network_output_layer(net); + cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); +} + +#endif diff --git a/workloads/realworld/async/darknet/src/network.h b/workloads/realworld/async/darknet/src/network.h new file mode 100644 index 0000000000000000000000000000000000000000..1b0dfd1aaa3e090c6ce276d26f24d127de2cb66d --- /dev/null +++ b/workloads/realworld/async/darknet/src/network.h @@ -0,0 +1,29 @@ +// Oh boy, why am I about to do this.... +#ifndef NETWORK_H +#define NETWORK_H +#include "darknet.h" + +#include "image.h" +#include "layer.h" +#include "data.h" +#include "tree.h" + + +#ifdef GPU +void pull_network_output(network *net); +#endif + +void compare_networks(network *n1, network *n2, data d); +char *get_layer_string(LAYER_TYPE a); + +network *make_network(int n); + + +float network_accuracy_multi(network *net, data d, int n); +int get_predicted_class_network(network *net); +void print_network(network *net); +int resize_network(network *net, int w, int h); +void calc_network_cost(network *net); + +#endif + diff --git a/workloads/realworld/async/darknet/src/normalization_layer.c b/workloads/realworld/async/darknet/src/normalization_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..424714fe8653f79b57fd4cde625997749d8eff83 --- /dev/null +++ b/workloads/realworld/async/darknet/src/normalization_layer.c @@ -0,0 +1,151 @@ +#include "normalization_layer.h" +#include "blas.h" + +#include + +layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa) +{ + fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size); + layer layer = {0}; + layer.type = NORMALIZATION; + layer.batch = batch; + layer.h = layer.out_h = h; + layer.w = layer.out_w = w; + layer.c = layer.out_c = c; + layer.kappa = kappa; + layer.size = size; + layer.alpha = alpha; + layer.beta = beta; + layer.output = calloc(h * w * c * batch, sizeof(float)); + layer.delta = calloc(h * w * c * batch, sizeof(float)); + layer.squared = calloc(h * w * c * batch, sizeof(float)); + layer.norms = calloc(h * w * c * batch, sizeof(float)); + layer.inputs = w*h*c; + layer.outputs = layer.inputs; + + layer.forward = forward_normalization_layer; + layer.backward = backward_normalization_layer; + #ifdef GPU + layer.forward_gpu = forward_normalization_layer_gpu; + layer.backward_gpu = backward_normalization_layer_gpu; + + layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch); + layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch); + layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch); + layer.norms_gpu = cuda_make_array(layer.norms, h * w * c * batch); + #endif + return layer; +} + +void resize_normalization_layer(layer *layer, int w, int h) +{ + int c = layer->c; + int batch = layer->batch; + layer->h = h; + layer->w = w; + layer->out_h = h; + layer->out_w = w; + layer->inputs = w*h*c; + layer->outputs = layer->inputs; + layer->output = realloc(layer->output, h * w * c * batch * sizeof(float)); + layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float)); + layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float)); + layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float)); +#ifdef GPU + cuda_free(layer->output_gpu); + cuda_free(layer->delta_gpu); + cuda_free(layer->squared_gpu); + cuda_free(layer->norms_gpu); + layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch); + layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch); + layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch); + layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch); +#endif +} + +void forward_normalization_layer(const layer layer, network net) +{ + int k,b; + int w = layer.w; + int h = layer.h; + int c = layer.c; + scal_cpu(w*h*c*layer.batch, 0, layer.squared, 1); + + for(b = 0; b < layer.batch; ++b){ + float *squared = layer.squared + w*h*c*b; + float *norms = layer.norms + w*h*c*b; + float *input = net.input + w*h*c*b; + pow_cpu(w*h*c, 2, input, 1, squared, 1); + + const_cpu(w*h, layer.kappa, norms, 1); + for(k = 0; k < layer.size/2; ++k){ + axpy_cpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); + } + + for(k = 1; k < layer.c; ++k){ + copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); + int prev = k - ((layer.size-1)/2) - 1; + int next = k + (layer.size/2); + if(prev >= 0) axpy_cpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); + if(next < layer.c) axpy_cpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); + } + } + pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1); + mul_cpu(w*h*c*layer.batch, net.input, 1, layer.output, 1); +} + +void backward_normalization_layer(const layer layer, network net) +{ + // TODO This is approximate ;-) + // Also this should add in to delta instead of overwritting. + + int w = layer.w; + int h = layer.h; + int c = layer.c; + pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, net.delta, 1); + mul_cpu(w*h*c*layer.batch, layer.delta, 1, net.delta, 1); +} + +#ifdef GPU +void forward_normalization_layer_gpu(const layer layer, network net) +{ + int k,b; + int w = layer.w; + int h = layer.h; + int c = layer.c; + scal_gpu(w*h*c*layer.batch, 0, layer.squared_gpu, 1); + + for(b = 0; b < layer.batch; ++b){ + float *squared = layer.squared_gpu + w*h*c*b; + float *norms = layer.norms_gpu + w*h*c*b; + float *input = net.input_gpu + w*h*c*b; + pow_gpu(w*h*c, 2, input, 1, squared, 1); + + const_gpu(w*h, layer.kappa, norms, 1); + for(k = 0; k < layer.size/2; ++k){ + axpy_gpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); + } + + for(k = 1; k < layer.c; ++k){ + copy_gpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); + int prev = k - ((layer.size-1)/2) - 1; + int next = k + (layer.size/2); + if(prev >= 0) axpy_gpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); + if(next < layer.c) axpy_gpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); + } + } + pow_gpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1); + mul_gpu(w*h*c*layer.batch, net.input_gpu, 1, layer.output_gpu, 1); +} + +void backward_normalization_layer_gpu(const layer layer, network net) +{ + // TODO This is approximate ;-) + + int w = layer.w; + int h = layer.h; + int c = layer.c; + pow_gpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, net.delta_gpu, 1); + mul_gpu(w*h*c*layer.batch, layer.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/async/darknet/src/normalization_layer.h b/workloads/realworld/async/darknet/src/normalization_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..665baa5066282335b6625618ce07c2fcc833d952 --- /dev/null +++ b/workloads/realworld/async/darknet/src/normalization_layer.h @@ -0,0 +1,19 @@ +#ifndef NORMALIZATION_LAYER_H +#define NORMALIZATION_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa); +void resize_normalization_layer(layer *layer, int h, int w); +void forward_normalization_layer(const layer layer, network net); +void backward_normalization_layer(const layer layer, network net); +void visualize_normalization_layer(layer layer, char *window); + +#ifdef GPU +void forward_normalization_layer_gpu(const layer layer, network net); +void backward_normalization_layer_gpu(const layer layer, network net); +#endif + +#endif diff --git a/workloads/realworld/async/darknet/src/option_list.c b/workloads/realworld/async/darknet/src/option_list.c new file mode 100644 index 0000000000000000000000000000000000000000..2f52781f8096fecc5e9d1db3cfbfa10685506b93 --- /dev/null +++ b/workloads/realworld/async/darknet/src/option_list.c @@ -0,0 +1,140 @@ +#include +#include +#include +#include "option_list.h" +#include "utils.h" + +list *read_data_cfg(char *filename) +{ + FILE *file = fopen(filename, "r"); + if(file == 0) file_error(filename); + char *line; + int nu = 0; + list *options = make_list(); + while((line=fgetl(file)) != 0){ + ++ nu; + strip(line); + switch(line[0]){ + case '\0': + case '#': + case ';': + free(line); + break; + default: + if(!read_option(line, options)){ + fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); + free(line); + } + break; + } + } + fclose(file); + return options; +} + +metadata get_metadata(char *file) +{ + metadata m = {0}; + list *options = read_data_cfg(file); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", 0); + if(!name_list) { + fprintf(stderr, "No names or labels found\n"); + } else { + m.names = get_labels(name_list); + } + m.classes = option_find_int(options, "classes", 2); + free_list(options); + return m; +} + +int read_option(char *s, list *options) +{ + size_t i; + size_t len = strlen(s); + char *val = 0; + for(i = 0; i < len; ++i){ + if(s[i] == '='){ + s[i] = '\0'; + val = s+i+1; + break; + } + } + if(i == len-1) return 0; + char *key = s; + option_insert(options, key, val); + return 1; +} + +void option_insert(list *l, char *key, char *val) +{ + kvp *p = malloc(sizeof(kvp)); + p->key = key; + p->val = val; + p->used = 0; + list_insert(l, p); +} + +void option_unused(list *l) +{ + node *n = l->front; + while(n){ + kvp *p = (kvp *)n->val; + if(!p->used){ + fprintf(stderr, "Unused field: '%s = %s'\n", p->key, p->val); + } + n = n->next; + } +} + +char *option_find(list *l, char *key) +{ + node *n = l->front; + while(n){ + kvp *p = (kvp *)n->val; + if(strcmp(p->key, key) == 0){ + p->used = 1; + return p->val; + } + n = n->next; + } + return 0; +} +char *option_find_str(list *l, char *key, char *def) +{ + char *v = option_find(l, key); + if(v) return v; + if(def) fprintf(stderr, "%s: Using default '%s'\n", key, def); + return def; +} + +int option_find_int(list *l, char *key, int def) +{ + char *v = option_find(l, key); + if(v) return atoi(v); + fprintf(stderr, "%s: Using default '%d'\n", key, def); + return def; +} + +int option_find_int_quiet(list *l, char *key, int def) +{ + char *v = option_find(l, key); + if(v) return atoi(v); + return def; +} + +float option_find_float_quiet(list *l, char *key, float def) +{ + char *v = option_find(l, key); + if(v) return atof(v); + return def; +} + +float option_find_float(list *l, char *key, float def) +{ + char *v = option_find(l, key); + if(v) return atof(v); + fprintf(stderr, "%s: Using default '%lf'\n", key, def); + return def; +} diff --git a/workloads/realworld/async/darknet/src/option_list.h b/workloads/realworld/async/darknet/src/option_list.h new file mode 100644 index 0000000000000000000000000000000000000000..844bd8724b77889d9ab6e6e70f62305e3339048c --- /dev/null +++ b/workloads/realworld/async/darknet/src/option_list.h @@ -0,0 +1,19 @@ +#ifndef OPTION_LIST_H +#define OPTION_LIST_H +#include "list.h" + +typedef struct{ + char *key; + char *val; + int used; +} kvp; + + +int read_option(char *s, list *options); +void option_insert(list *l, char *key, char *val); +char *option_find(list *l, char *key); +float option_find_float(list *l, char *key, float def); +float option_find_float_quiet(list *l, char *key, float def); +void option_unused(list *l); + +#endif diff --git a/workloads/realworld/async/darknet/src/parser.c b/workloads/realworld/async/darknet/src/parser.c new file mode 100644 index 0000000000000000000000000000000000000000..c8141c9f2ddc95941900d11006ff583fadf22290 --- /dev/null +++ b/workloads/realworld/async/darknet/src/parser.c @@ -0,0 +1,1312 @@ +#include +#include +#include +#include + +#include "activation_layer.h" +#include "logistic_layer.h" +#include "l2norm_layer.h" +#include "activations.h" +#include "avgpool_layer.h" +#include "batchnorm_layer.h" +#include "blas.h" +#include "connected_layer.h" +#include "deconvolutional_layer.h" +#include "convolutional_layer.h" +#include "cost_layer.h" +#include "crnn_layer.h" +#include "crop_layer.h" +#include "detection_layer.h" +#include "dropout_layer.h" +#include "gru_layer.h" +#include "list.h" +#include "local_layer.h" +#include "maxpool_layer.h" +#include "normalization_layer.h" +#include "option_list.h" +#include "parser.h" +#include "region_layer.h" +#include "yolo_layer.h" +#include "iseg_layer.h" +#include "reorg_layer.h" +#include "rnn_layer.h" +#include "route_layer.h" +#include "upsample_layer.h" +#include "shortcut_layer.h" +#include "softmax_layer.h" +#include "lstm_layer.h" +#include "utils.h" + +typedef struct{ + char *type; + list *options; +}section; + +list *read_cfg(char *filename); + +LAYER_TYPE string_to_layer_type(char * type) +{ + + if (strcmp(type, "[shortcut]")==0) return SHORTCUT; + if (strcmp(type, "[crop]")==0) return CROP; + if (strcmp(type, "[cost]")==0) return COST; + if (strcmp(type, "[detection]")==0) return DETECTION; + if (strcmp(type, "[region]")==0) return REGION; + if (strcmp(type, "[yolo]")==0) return YOLO; + if (strcmp(type, "[iseg]")==0) return ISEG; + if (strcmp(type, "[local]")==0) return LOCAL; + if (strcmp(type, "[conv]")==0 + || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL; + if (strcmp(type, "[deconv]")==0 + || strcmp(type, "[deconvolutional]")==0) return DECONVOLUTIONAL; + if (strcmp(type, "[activation]")==0) return ACTIVE; + if (strcmp(type, "[logistic]")==0) return LOGXENT; + if (strcmp(type, "[l2norm]")==0) return L2NORM; + if (strcmp(type, "[net]")==0 + || strcmp(type, "[network]")==0) return NETWORK; + if (strcmp(type, "[crnn]")==0) return CRNN; + if (strcmp(type, "[gru]")==0) return GRU; + if (strcmp(type, "[lstm]") == 0) return LSTM; + if (strcmp(type, "[rnn]")==0) return RNN; + if (strcmp(type, "[conn]")==0 + || strcmp(type, "[connected]")==0) return CONNECTED; + if (strcmp(type, "[max]")==0 + || strcmp(type, "[maxpool]")==0) return MAXPOOL; + if (strcmp(type, "[reorg]")==0) return REORG; + if (strcmp(type, "[avg]")==0 + || strcmp(type, "[avgpool]")==0) return AVGPOOL; + if (strcmp(type, "[dropout]")==0) return DROPOUT; + if (strcmp(type, "[lrn]")==0 + || strcmp(type, "[normalization]")==0) return NORMALIZATION; + if (strcmp(type, "[batchnorm]")==0) return BATCHNORM; + if (strcmp(type, "[soft]")==0 + || strcmp(type, "[softmax]")==0) return SOFTMAX; + if (strcmp(type, "[route]")==0) return ROUTE; + if (strcmp(type, "[upsample]")==0) return UPSAMPLE; + return BLANK; +} + +void free_section(section *s) +{ + free(s->type); + node *n = s->options->front; + while(n){ + kvp *pair = (kvp *)n->val; + free(pair->key); + free(pair); + node *next = n->next; + free(n); + n = next; + } + free(s->options); + free(s); +} + +void parse_data(char *data, float *a, int n) +{ + int i; + if(!data) return; + char *curr = data; + char *next = data; + int done = 0; + for(i = 0; i < n && !done; ++i){ + while(*++next !='\0' && *next != ','); + if(*next == '\0') done = 1; + *next = '\0'; + sscanf(curr, "%g", &a[i]); + curr = next+1; + } +} + +typedef struct size_params{ + int batch; + int inputs; + int h; + int w; + int c; + int index; + int time_steps; + network *net; +} size_params; + +local_layer parse_local(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + int pad = option_find_int(options, "pad",0); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before local layer must output image."); + + local_layer layer = make_local_layer(batch,h,w,c,n,size,stride,pad,activation); + + return layer; +} + +layer parse_deconvolutional(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before deconvolutional layer must output image."); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + int pad = option_find_int_quiet(options, "pad",0); + int padding = option_find_int_quiet(options, "padding",0); + if(pad) padding = size/2; + + layer l = make_deconvolutional_layer(batch,h,w,c,n,size,stride,padding, activation, batch_normalize, params.net->adam); + + return l; +} + + +convolutional_layer parse_convolutional(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + int pad = option_find_int_quiet(options, "pad",0); + int padding = option_find_int_quiet(options, "padding",0); + int groups = option_find_int_quiet(options, "groups", 1); + if(pad) padding = size/2; + + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before convolutional layer must output image."); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + int binary = option_find_int_quiet(options, "binary", 0); + int xnor = option_find_int_quiet(options, "xnor", 0); + + convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,groups,size,stride,padding,activation, batch_normalize, binary, xnor, params.net->adam); + layer.flipped = option_find_int_quiet(options, "flipped", 0); + layer.dot = option_find_float_quiet(options, "dot", 0); + + return layer; +} + +layer parse_crnn(list *options, size_params params) +{ + int output_filters = option_find_int(options, "output_filters",1); + int hidden_filters = option_find_int(options, "hidden_filters",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_crnn_layer(params.batch, params.w, params.h, params.c, hidden_filters, output_filters, params.time_steps, activation, batch_normalize); + + l.shortcut = option_find_int_quiet(options, "shortcut", 0); + + return l; +} + +layer parse_rnn(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_rnn_layer(params.batch, params.inputs, output, params.time_steps, activation, batch_normalize, params.net->adam); + + l.shortcut = option_find_int_quiet(options, "shortcut", 0); + + return l; +} + +layer parse_gru(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_gru_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net->adam); + l.tanh = option_find_int_quiet(options, "tanh", 0); + + return l; +} + +layer parse_lstm(list *options, size_params params) +{ + int output = option_find_int(options, "output", 1); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_lstm_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net->adam); + + return l; +} + +layer parse_connected(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_connected_layer(params.batch, params.inputs, output, activation, batch_normalize, params.net->adam); + return l; +} + +layer parse_softmax(list *options, size_params params) +{ + int groups = option_find_int_quiet(options, "groups",1); + layer l = make_softmax_layer(params.batch, params.inputs, groups); + l.temperature = option_find_float_quiet(options, "temperature", 1); + char *tree_file = option_find_str(options, "tree", 0); + if (tree_file) l.softmax_tree = read_tree(tree_file); + l.w = params.w; + l.h = params.h; + l.c = params.c; + l.spatial = option_find_float_quiet(options, "spatial", 0); + l.noloss = option_find_int_quiet(options, "noloss", 0); + return l; +} + +int *parse_yolo_mask(char *a, int *num) +{ + int *mask = 0; + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + mask = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + int val = atoi(a); + mask[i] = val; + a = strchr(a, ',')+1; + } + *num = n; + } + return mask; +} + +layer parse_yolo(list *options, size_params params) +{ + int classes = option_find_int(options, "classes", 20); + int total = option_find_int(options, "num", 1); + int num = total; + + char *a = option_find_str(options, "mask", 0); + int *mask = parse_yolo_mask(a, &num); + layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes); + assert(l.outputs == params.inputs); + + l.max_boxes = option_find_int_quiet(options, "max",90); + l.jitter = option_find_float(options, "jitter", .2); + + l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); + l.truth_thresh = option_find_float(options, "truth_thresh", 1); + l.random = option_find_int_quiet(options, "random", 0); + + char *map_file = option_find_str(options, "map", 0); + if (map_file) l.map = read_map(map_file); + + a = option_find_str(options, "anchors", 0); + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + for(i = 0; i < n; ++i){ + float bias = atof(a); + l.biases[i] = bias; + a = strchr(a, ',')+1; + } + } + return l; +} + +layer parse_iseg(list *options, size_params params) +{ + int classes = option_find_int(options, "classes", 20); + int ids = option_find_int(options, "ids", 32); + layer l = make_iseg_layer(params.batch, params.w, params.h, classes, ids); + assert(l.outputs == params.inputs); + return l; +} + +layer parse_region(list *options, size_params params) +{ + int coords = option_find_int(options, "coords", 4); + int classes = option_find_int(options, "classes", 20); + int num = option_find_int(options, "num", 1); + + layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords); + assert(l.outputs == params.inputs); + + l.log = option_find_int_quiet(options, "log", 0); + l.sqrt = option_find_int_quiet(options, "sqrt", 0); + + l.softmax = option_find_int(options, "softmax", 0); + l.background = option_find_int_quiet(options, "background", 0); + l.max_boxes = option_find_int_quiet(options, "max",30); + l.jitter = option_find_float(options, "jitter", .2); + l.rescore = option_find_int_quiet(options, "rescore",0); + + l.thresh = option_find_float(options, "thresh", .5); + l.classfix = option_find_int_quiet(options, "classfix", 0); + l.absolute = option_find_int_quiet(options, "absolute", 0); + l.random = option_find_int_quiet(options, "random", 0); + + l.coord_scale = option_find_float(options, "coord_scale", 1); + l.object_scale = option_find_float(options, "object_scale", 1); + l.noobject_scale = option_find_float(options, "noobject_scale", 1); + l.mask_scale = option_find_float(options, "mask_scale", 1); + l.class_scale = option_find_float(options, "class_scale", 1); + l.bias_match = option_find_int_quiet(options, "bias_match",0); + + char *tree_file = option_find_str(options, "tree", 0); + if (tree_file) l.softmax_tree = read_tree(tree_file); + char *map_file = option_find_str(options, "map", 0); + if (map_file) l.map = read_map(map_file); + + char *a = option_find_str(options, "anchors", 0); + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + for(i = 0; i < n; ++i){ + float bias = atof(a); + l.biases[i] = bias; + a = strchr(a, ',')+1; + } + } + return l; +} + +detection_layer parse_detection(list *options, size_params params) +{ + int coords = option_find_int(options, "coords", 1); + int classes = option_find_int(options, "classes", 1); + int rescore = option_find_int(options, "rescore", 0); + int num = option_find_int(options, "num", 1); + int side = option_find_int(options, "side", 7); + detection_layer layer = make_detection_layer(params.batch, params.inputs, num, side, classes, coords, rescore); + + layer.softmax = option_find_int(options, "softmax", 0); + layer.sqrt = option_find_int(options, "sqrt", 0); + + layer.max_boxes = option_find_int_quiet(options, "max",90); + layer.coord_scale = option_find_float(options, "coord_scale", 1); + layer.forced = option_find_int(options, "forced", 0); + layer.object_scale = option_find_float(options, "object_scale", 1); + layer.noobject_scale = option_find_float(options, "noobject_scale", 1); + layer.class_scale = option_find_float(options, "class_scale", 1); + layer.jitter = option_find_float(options, "jitter", .2); + layer.random = option_find_int_quiet(options, "random", 0); + layer.reorg = option_find_int_quiet(options, "reorg", 0); + return layer; +} + +cost_layer parse_cost(list *options, size_params params) +{ + char *type_s = option_find_str(options, "type", "sse"); + COST_TYPE type = get_cost_type(type_s); + float scale = option_find_float_quiet(options, "scale",1); + cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale); + layer.ratio = option_find_float_quiet(options, "ratio",0); + layer.noobject_scale = option_find_float_quiet(options, "noobj", 1); + layer.thresh = option_find_float_quiet(options, "thresh",0); + return layer; +} + +crop_layer parse_crop(list *options, size_params params) +{ + int crop_height = option_find_int(options, "crop_height",1); + int crop_width = option_find_int(options, "crop_width",1); + int flip = option_find_int(options, "flip",0); + float angle = option_find_float(options, "angle",0); + float saturation = option_find_float(options, "saturation",1); + float exposure = option_find_float(options, "exposure",1); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before crop layer must output image."); + + int noadjust = option_find_int_quiet(options, "noadjust",0); + + crop_layer l = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip, angle, saturation, exposure); + l.shift = option_find_float(options, "shift", 0); + l.noadjust = noadjust; + return l; +} + +layer parse_reorg(list *options, size_params params) +{ + int stride = option_find_int(options, "stride",1); + int reverse = option_find_int_quiet(options, "reverse",0); + int flatten = option_find_int_quiet(options, "flatten",0); + int extra = option_find_int_quiet(options, "extra",0); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before reorg layer must output image."); + + layer layer = make_reorg_layer(batch,w,h,c,stride,reverse, flatten, extra); + return layer; +} + +maxpool_layer parse_maxpool(list *options, size_params params) +{ + int stride = option_find_int(options, "stride",1); + int size = option_find_int(options, "size",stride); + int padding = option_find_int_quiet(options, "padding", size-1); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before maxpool layer must output image."); + + maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride,padding); + return layer; +} + +avgpool_layer parse_avgpool(list *options, size_params params) +{ + int batch,w,h,c; + w = params.w; + h = params.h; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before avgpool layer must output image."); + + avgpool_layer layer = make_avgpool_layer(batch,w,h,c); + return layer; +} + +dropout_layer parse_dropout(list *options, size_params params) +{ + float probability = option_find_float(options, "probability", .5); + dropout_layer layer = make_dropout_layer(params.batch, params.inputs, probability); + layer.out_w = params.w; + layer.out_h = params.h; + layer.out_c = params.c; + return layer; +} + +layer parse_normalization(list *options, size_params params) +{ + float alpha = option_find_float(options, "alpha", .0001); + float beta = option_find_float(options, "beta" , .75); + float kappa = option_find_float(options, "kappa", 1); + int size = option_find_int(options, "size", 5); + layer l = make_normalization_layer(params.batch, params.w, params.h, params.c, size, alpha, beta, kappa); + return l; +} + +layer parse_batchnorm(list *options, size_params params) +{ + layer l = make_batchnorm_layer(params.batch, params.w, params.h, params.c); + return l; +} + +layer parse_shortcut(list *options, size_params params, network *net) +{ + char *l = option_find(options, "from"); + int index = atoi(l); + if(index < 0) index = params.index + index; + + int batch = params.batch; + layer from = net->layers[index]; + + layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c); + + char *activation_s = option_find_str(options, "activation", "linear"); + ACTIVATION activation = get_activation(activation_s); + s.activation = activation; + s.alpha = option_find_float_quiet(options, "alpha", 1); + s.beta = option_find_float_quiet(options, "beta", 1); + return s; +} + + +layer parse_l2norm(list *options, size_params params) +{ + layer l = make_l2norm_layer(params.batch, params.inputs); + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + return l; +} + + +layer parse_logistic(list *options, size_params params) +{ + layer l = make_logistic_layer(params.batch, params.inputs); + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + return l; +} + +layer parse_activation(list *options, size_params params) +{ + char *activation_s = option_find_str(options, "activation", "linear"); + ACTIVATION activation = get_activation(activation_s); + + layer l = make_activation_layer(params.batch, params.inputs, activation); + + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + + return l; +} + +layer parse_upsample(list *options, size_params params, network *net) +{ + + int stride = option_find_int(options, "stride",2); + layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride); + l.scale = option_find_float_quiet(options, "scale", 1); + return l; +} + +route_layer parse_route(list *options, size_params params, network *net) +{ + char *l = option_find(options, "layers"); + int len = strlen(l); + if(!l) error("Route Layer must specify input layers"); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (l[i] == ',') ++n; + } + + int *layers = calloc(n, sizeof(int)); + int *sizes = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + int index = atoi(l); + l = strchr(l, ',')+1; + if(index < 0) index = params.index + index; + layers[i] = index; + sizes[i] = net->layers[index].outputs; + } + int batch = params.batch; + + route_layer layer = make_route_layer(batch, n, layers, sizes); + + convolutional_layer first = net->layers[layers[0]]; + layer.out_w = first.out_w; + layer.out_h = first.out_h; + layer.out_c = first.out_c; + for(i = 1; i < n; ++i){ + int index = layers[i]; + convolutional_layer next = net->layers[index]; + if(next.out_w == first.out_w && next.out_h == first.out_h){ + layer.out_c += next.out_c; + }else{ + layer.out_h = layer.out_w = layer.out_c = 0; + } + } + + return layer; +} + +learning_rate_policy get_policy(char *s) +{ + if (strcmp(s, "random")==0) return RANDOM; + if (strcmp(s, "poly")==0) return POLY; + if (strcmp(s, "constant")==0) return CONSTANT; + if (strcmp(s, "step")==0) return STEP; + if (strcmp(s, "exp")==0) return EXP; + if (strcmp(s, "sigmoid")==0) return SIG; + if (strcmp(s, "steps")==0) return STEPS; + fprintf(stderr, "Couldn't find policy %s, going with constant\n", s); + return CONSTANT; +} + +void parse_net_options(list *options, network *net) +{ + net->batch = option_find_int(options, "batch",1); + net->learning_rate = option_find_float(options, "learning_rate", .001); + net->momentum = option_find_float(options, "momentum", .9); + net->decay = option_find_float(options, "decay", .0001); + int subdivs = option_find_int(options, "subdivisions",1); + net->time_steps = option_find_int_quiet(options, "time_steps",1); + net->notruth = option_find_int_quiet(options, "notruth",0); + net->batch /= subdivs; + net->batch *= net->time_steps; + net->subdivisions = subdivs; + net->random = option_find_int_quiet(options, "random", 0); + + net->adam = option_find_int_quiet(options, "adam", 0); + if(net->adam){ + net->B1 = option_find_float(options, "B1", .9); + net->B2 = option_find_float(options, "B2", .999); + net->eps = option_find_float(options, "eps", .0000001); + } + + net->h = option_find_int_quiet(options, "height",0); + net->w = option_find_int_quiet(options, "width",0); + net->c = option_find_int_quiet(options, "channels",0); + net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c); + net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2); + net->min_crop = option_find_int_quiet(options, "min_crop",net->w); + net->max_ratio = option_find_float_quiet(options, "max_ratio", (float) net->max_crop / net->w); + net->min_ratio = option_find_float_quiet(options, "min_ratio", (float) net->min_crop / net->w); + net->center = option_find_int_quiet(options, "center",0); + net->clip = option_find_float_quiet(options, "clip", 0); + + net->angle = option_find_float_quiet(options, "angle", 0); + net->aspect = option_find_float_quiet(options, "aspect", 1); + net->saturation = option_find_float_quiet(options, "saturation", 1); + net->exposure = option_find_float_quiet(options, "exposure", 1); + net->hue = option_find_float_quiet(options, "hue", 0); + + if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied"); + + char *policy_s = option_find_str(options, "policy", "constant"); + net->policy = get_policy(policy_s); + net->burn_in = option_find_int_quiet(options, "burn_in", 0); + net->power = option_find_float_quiet(options, "power", 4); + if(net->policy == STEP){ + net->step = option_find_int(options, "step", 1); + net->scale = option_find_float(options, "scale", 1); + } else if (net->policy == STEPS){ + char *l = option_find(options, "steps"); + char *p = option_find(options, "scales"); + if(!l || !p) error("STEPS policy must have steps and scales in cfg file"); + + int len = strlen(l); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (l[i] == ',') ++n; + } + int *steps = calloc(n, sizeof(int)); + float *scales = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + int step = atoi(l); + float scale = atof(p); + l = strchr(l, ',')+1; + p = strchr(p, ',')+1; + steps[i] = step; + scales[i] = scale; + } + net->scales = scales; + net->steps = steps; + net->num_steps = n; + } else if (net->policy == EXP){ + net->gamma = option_find_float(options, "gamma", 1); + } else if (net->policy == SIG){ + net->gamma = option_find_float(options, "gamma", 1); + net->step = option_find_int(options, "step", 1); + } else if (net->policy == POLY || net->policy == RANDOM){ + } + net->max_batches = option_find_int(options, "max_batches", 0); +} + +int is_network(section *s) +{ + return (strcmp(s->type, "[net]")==0 + || strcmp(s->type, "[network]")==0); +} + +network *parse_network_cfg(char *filename) +{ + list *sections = read_cfg(filename); + node *n = sections->front; + if(!n) error("Config file has no sections"); + network *net = make_network(sections->size - 1); + net->gpu_index = gpu_index; + size_params params; + + section *s = (section *)n->val; + list *options = s->options; + if(!is_network(s)) error("First section must be [net] or [network]"); + parse_net_options(options, net); + + params.h = net->h; + params.w = net->w; + params.c = net->c; + params.inputs = net->inputs; + params.batch = net->batch; + params.time_steps = net->time_steps; + params.net = net; + + size_t workspace_size = 0; + n = n->next; + int count = 0; + free_section(s); + fprintf(stderr, "layer filters size input output\n"); + while(n){ + params.index = count; + fprintf(stderr, "%5d ", count); + s = (section *)n->val; + options = s->options; + layer l = {0}; + LAYER_TYPE lt = string_to_layer_type(s->type); + if(lt == CONVOLUTIONAL){ + l = parse_convolutional(options, params); + }else if(lt == DECONVOLUTIONAL){ + l = parse_deconvolutional(options, params); + }else if(lt == LOCAL){ + l = parse_local(options, params); + }else if(lt == ACTIVE){ + l = parse_activation(options, params); + }else if(lt == LOGXENT){ + l = parse_logistic(options, params); + }else if(lt == L2NORM){ + l = parse_l2norm(options, params); + }else if(lt == RNN){ + l = parse_rnn(options, params); + }else if(lt == GRU){ + l = parse_gru(options, params); + }else if (lt == LSTM) { + l = parse_lstm(options, params); + }else if(lt == CRNN){ + l = parse_crnn(options, params); + }else if(lt == CONNECTED){ + l = parse_connected(options, params); + }else if(lt == CROP){ + l = parse_crop(options, params); + }else if(lt == COST){ + l = parse_cost(options, params); + }else if(lt == REGION){ + l = parse_region(options, params); + }else if(lt == YOLO){ + l = parse_yolo(options, params); + }else if(lt == ISEG){ + l = parse_iseg(options, params); + }else if(lt == DETECTION){ + l = parse_detection(options, params); + }else if(lt == SOFTMAX){ + l = parse_softmax(options, params); + net->hierarchy = l.softmax_tree; + }else if(lt == NORMALIZATION){ + l = parse_normalization(options, params); + }else if(lt == BATCHNORM){ + l = parse_batchnorm(options, params); + }else if(lt == MAXPOOL){ + l = parse_maxpool(options, params); + }else if(lt == REORG){ + l = parse_reorg(options, params); + }else if(lt == AVGPOOL){ + l = parse_avgpool(options, params); + }else if(lt == ROUTE){ + l = parse_route(options, params, net); + }else if(lt == UPSAMPLE){ + l = parse_upsample(options, params, net); + }else if(lt == SHORTCUT){ + l = parse_shortcut(options, params, net); + }else if(lt == DROPOUT){ + l = parse_dropout(options, params); + l.output = net->layers[count-1].output; + l.delta = net->layers[count-1].delta; +#ifdef GPU + l.output_gpu = net->layers[count-1].output_gpu; + l.delta_gpu = net->layers[count-1].delta_gpu; +#endif + }else{ + fprintf(stderr, "Type not recognized: %s\n", s->type); + } + l.clip = net->clip; + l.truth = option_find_int_quiet(options, "truth", 0); + l.onlyforward = option_find_int_quiet(options, "onlyforward", 0); + l.stopbackward = option_find_int_quiet(options, "stopbackward", 0); + l.dontsave = option_find_int_quiet(options, "dontsave", 0); + l.dontload = option_find_int_quiet(options, "dontload", 0); + l.numload = option_find_int_quiet(options, "numload", 0); + l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0); + l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1); + l.smooth = option_find_float_quiet(options, "smooth", 0); + option_unused(options); + net->layers[count] = l; + if (l.workspace_size > workspace_size) workspace_size = l.workspace_size; + free_section(s); + n = n->next; + ++count; + if(n){ + params.h = l.out_h; + params.w = l.out_w; + params.c = l.out_c; + params.inputs = l.outputs; + } + } + free_list(sections); + layer out = get_network_output_layer(net); + net->outputs = out.outputs; + net->truths = out.outputs; + if(net->layers[net->n-1].truths) net->truths = net->layers[net->n-1].truths; + net->output = out.output; + net->input = calloc(net->inputs*net->batch, sizeof(float)); + net->truth = calloc(net->truths*net->batch, sizeof(float)); +#ifdef GPU + net->output_gpu = out.output_gpu; + net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch); + net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch); +#endif + if(workspace_size){ + //printf("%ld\n", workspace_size); +#ifdef GPU + if(gpu_index >= 0){ + net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1); + }else { + net->workspace = calloc(1, workspace_size); + } +#else + net->workspace = calloc(1, workspace_size); +#endif + } + return net; +} + +list *read_cfg(char *filename) +{ + FILE *file = fopen(filename, "r"); + if(file == 0) file_error(filename); + char *line; + int nu = 0; + list *options = make_list(); + section *current = 0; + while((line=fgetl(file)) != 0){ + ++ nu; + strip(line); + switch(line[0]){ + case '[': + current = malloc(sizeof(section)); + list_insert(options, current); + current->options = make_list(); + current->type = line; + break; + case '\0': + case '#': + case ';': + free(line); + break; + default: + if(!read_option(line, current->options)){ + fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); + free(line); + } + break; + } + } + fclose(file); + return options; +} + +void save_convolutional_weights_binary(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_convolutional_layer(l); + } +#endif + binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.binary_weights); + int size = l.c*l.size*l.size; + int i, j, k; + fwrite(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.n, fp); + fwrite(l.rolling_mean, sizeof(float), l.n, fp); + fwrite(l.rolling_variance, sizeof(float), l.n, fp); + } + for(i = 0; i < l.n; ++i){ + float mean = l.binary_weights[i*size]; + if(mean < 0) mean = -mean; + fwrite(&mean, sizeof(float), 1, fp); + for(j = 0; j < size/8; ++j){ + int index = i*size + j*8; + unsigned char c = 0; + for(k = 0; k < 8; ++k){ + if (j*8 + k >= size) break; + if (l.binary_weights[index + k] > 0) c = (c | 1<= 0){ + pull_convolutional_layer(l); + } +#endif + int num = l.nweights; + fwrite(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.n, fp); + fwrite(l.rolling_mean, sizeof(float), l.n, fp); + fwrite(l.rolling_variance, sizeof(float), l.n, fp); + } + fwrite(l.weights, sizeof(float), num, fp); +} + +void save_batchnorm_weights(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_batchnorm_layer(l); + } +#endif + fwrite(l.scales, sizeof(float), l.c, fp); + fwrite(l.rolling_mean, sizeof(float), l.c, fp); + fwrite(l.rolling_variance, sizeof(float), l.c, fp); +} + +void save_connected_weights(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_connected_layer(l); + } +#endif + fwrite(l.biases, sizeof(float), l.outputs, fp); + fwrite(l.weights, sizeof(float), l.outputs*l.inputs, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.outputs, fp); + fwrite(l.rolling_mean, sizeof(float), l.outputs, fp); + fwrite(l.rolling_variance, sizeof(float), l.outputs, fp); + } +} + +void save_weights_upto(network *net, char *filename, int cutoff) +{ +#ifdef GPU + if(net->gpu_index >= 0){ + cuda_set_device(net->gpu_index); + } +#endif + fprintf(stderr, "Saving weights to %s\n", filename); + FILE *fp = fopen(filename, "wb"); + if(!fp) file_error(filename); + + int major = 0; + int minor = 2; + int revision = 0; + fwrite(&major, sizeof(int), 1, fp); + fwrite(&minor, sizeof(int), 1, fp); + fwrite(&revision, sizeof(int), 1, fp); + fwrite(net->seen, sizeof(size_t), 1, fp); + + int i; + for(i = 0; i < net->n && i < cutoff; ++i){ + layer l = net->layers[i]; + if (l.dontsave) continue; + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + save_convolutional_weights(l, fp); + } if(l.type == CONNECTED){ + save_connected_weights(l, fp); + } if(l.type == BATCHNORM){ + save_batchnorm_weights(l, fp); + } if(l.type == RNN){ + save_connected_weights(*(l.input_layer), fp); + save_connected_weights(*(l.self_layer), fp); + save_connected_weights(*(l.output_layer), fp); + } if (l.type == LSTM) { + save_connected_weights(*(l.wi), fp); + save_connected_weights(*(l.wf), fp); + save_connected_weights(*(l.wo), fp); + save_connected_weights(*(l.wg), fp); + save_connected_weights(*(l.ui), fp); + save_connected_weights(*(l.uf), fp); + save_connected_weights(*(l.uo), fp); + save_connected_weights(*(l.ug), fp); + } if (l.type == GRU) { + if(1){ + save_connected_weights(*(l.wz), fp); + save_connected_weights(*(l.wr), fp); + save_connected_weights(*(l.wh), fp); + save_connected_weights(*(l.uz), fp); + save_connected_weights(*(l.ur), fp); + save_connected_weights(*(l.uh), fp); + }else{ + save_connected_weights(*(l.reset_layer), fp); + save_connected_weights(*(l.update_layer), fp); + save_connected_weights(*(l.state_layer), fp); + } + } if(l.type == CRNN){ + save_convolutional_weights(*(l.input_layer), fp); + save_convolutional_weights(*(l.self_layer), fp); + save_convolutional_weights(*(l.output_layer), fp); + } if(l.type == LOCAL){ +#ifdef GPU + if(gpu_index >= 0){ + pull_local_layer(l); + } +#endif + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + fwrite(l.biases, sizeof(float), l.outputs, fp); + fwrite(l.weights, sizeof(float), size, fp); + } + } + fclose(fp); +} +void save_weights(network *net, char *filename) +{ + save_weights_upto(net, filename, net->n); +} + +void transpose_matrix(float *a, int rows, int cols) +{ + float *transpose = calloc(rows*cols, sizeof(float)); + int x, y; + for(x = 0; x < rows; ++x){ + for(y = 0; y < cols; ++y){ + transpose[y*rows + x] = a[x*cols + y]; + } + } + memcpy(a, transpose, rows*cols*sizeof(float)); + free(transpose); +} + +void load_connected_weights(layer l, FILE *fp, int transpose) +{ + fread(l.biases, sizeof(float), l.outputs, fp); + fread(l.weights, sizeof(float), l.outputs*l.inputs, fp); + if(transpose){ + transpose_matrix(l.weights, l.inputs, l.outputs); + } + //printf("Biases: %f mean %f variance\n", mean_array(l.biases, l.outputs), variance_array(l.biases, l.outputs)); + //printf("Weights: %f mean %f variance\n", mean_array(l.weights, l.outputs*l.inputs), variance_array(l.weights, l.outputs*l.inputs)); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.outputs, fp); + fread(l.rolling_mean, sizeof(float), l.outputs, fp); + fread(l.rolling_variance, sizeof(float), l.outputs, fp); + //printf("Scales: %f mean %f variance\n", mean_array(l.scales, l.outputs), variance_array(l.scales, l.outputs)); + //printf("rolling_mean: %f mean %f variance\n", mean_array(l.rolling_mean, l.outputs), variance_array(l.rolling_mean, l.outputs)); + //printf("rolling_variance: %f mean %f variance\n", mean_array(l.rolling_variance, l.outputs), variance_array(l.rolling_variance, l.outputs)); + } +#ifdef GPU + if(gpu_index >= 0){ + push_connected_layer(l); + } +#endif +} + +void load_batchnorm_weights(layer l, FILE *fp) +{ + fread(l.scales, sizeof(float), l.c, fp); + fread(l.rolling_mean, sizeof(float), l.c, fp); + fread(l.rolling_variance, sizeof(float), l.c, fp); +#ifdef GPU + if(gpu_index >= 0){ + push_batchnorm_layer(l); + } +#endif +} + +void load_convolutional_weights_binary(layer l, FILE *fp) +{ + fread(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.n, fp); + fread(l.rolling_mean, sizeof(float), l.n, fp); + fread(l.rolling_variance, sizeof(float), l.n, fp); + } + int size = l.c*l.size*l.size; + int i, j, k; + for(i = 0; i < l.n; ++i){ + float mean = 0; + fread(&mean, sizeof(float), 1, fp); + for(j = 0; j < size/8; ++j){ + int index = i*size + j*8; + unsigned char c = 0; + fread(&c, sizeof(char), 1, fp); + for(k = 0; k < 8; ++k){ + if (j*8 + k >= size) break; + l.weights[index + k] = (c & 1<= 0){ + push_convolutional_layer(l); + } +#endif +} + +void load_convolutional_weights(layer l, FILE *fp) +{ + if(l.binary){ + //load_convolutional_weights_binary(l, fp); + //return; + } + if(l.numload) l.n = l.numload; + int num = l.c/l.groups*l.n*l.size*l.size; + fread(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.n, fp); + fread(l.rolling_mean, sizeof(float), l.n, fp); + fread(l.rolling_variance, sizeof(float), l.n, fp); + if(0){ + int i; + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_mean[i]); + } + printf("\n"); + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_variance[i]); + } + printf("\n"); + } + if(0){ + fill_cpu(l.n, 0, l.rolling_mean, 1); + fill_cpu(l.n, 0, l.rolling_variance, 1); + } + if(0){ + int i; + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_mean[i]); + } + printf("\n"); + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_variance[i]); + } + printf("\n"); + } + } + fread(l.weights, sizeof(float), num, fp); + //if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1); + if (l.flipped) { + transpose_matrix(l.weights, l.c*l.size*l.size, l.n); + } + //if (l.binary) binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.weights); +#ifdef GPU + if(gpu_index >= 0){ + push_convolutional_layer(l); + } +#endif +} + + +void load_weights_upto(network *net, char *filename, int start, int cutoff) +{ +#ifdef GPU + if(net->gpu_index >= 0){ + cuda_set_device(net->gpu_index); + } +#endif + fprintf(stderr, "Loading weights from %s...", filename); + fflush(stdout); + FILE *fp = fopen(filename, "rb"); + if(!fp) file_error(filename); + + int major; + int minor; + int revision; + fread(&major, sizeof(int), 1, fp); + fread(&minor, sizeof(int), 1, fp); + fread(&revision, sizeof(int), 1, fp); + if ((major*10 + minor) >= 2 && major < 1000 && minor < 1000){ + fread(net->seen, sizeof(size_t), 1, fp); + } else { + int iseen = 0; + fread(&iseen, sizeof(int), 1, fp); + *net->seen = iseen; + } + int transpose = (major > 1000) || (minor > 1000); + + int i; + for(i = start; i < net->n && i < cutoff; ++i){ + layer l = net->layers[i]; + if (l.dontload) continue; + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + load_convolutional_weights(l, fp); + } + if(l.type == CONNECTED){ + load_connected_weights(l, fp, transpose); + } + if(l.type == BATCHNORM){ + load_batchnorm_weights(l, fp); + } + if(l.type == CRNN){ + load_convolutional_weights(*(l.input_layer), fp); + load_convolutional_weights(*(l.self_layer), fp); + load_convolutional_weights(*(l.output_layer), fp); + } + if(l.type == RNN){ + load_connected_weights(*(l.input_layer), fp, transpose); + load_connected_weights(*(l.self_layer), fp, transpose); + load_connected_weights(*(l.output_layer), fp, transpose); + } + if (l.type == LSTM) { + load_connected_weights(*(l.wi), fp, transpose); + load_connected_weights(*(l.wf), fp, transpose); + load_connected_weights(*(l.wo), fp, transpose); + load_connected_weights(*(l.wg), fp, transpose); + load_connected_weights(*(l.ui), fp, transpose); + load_connected_weights(*(l.uf), fp, transpose); + load_connected_weights(*(l.uo), fp, transpose); + load_connected_weights(*(l.ug), fp, transpose); + } + if (l.type == GRU) { + if(1){ + load_connected_weights(*(l.wz), fp, transpose); + load_connected_weights(*(l.wr), fp, transpose); + load_connected_weights(*(l.wh), fp, transpose); + load_connected_weights(*(l.uz), fp, transpose); + load_connected_weights(*(l.ur), fp, transpose); + load_connected_weights(*(l.uh), fp, transpose); + }else{ + load_connected_weights(*(l.reset_layer), fp, transpose); + load_connected_weights(*(l.update_layer), fp, transpose); + load_connected_weights(*(l.state_layer), fp, transpose); + } + } + if(l.type == LOCAL){ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + fread(l.biases, sizeof(float), l.outputs, fp); + fread(l.weights, sizeof(float), size, fp); +#ifdef GPU + if(gpu_index >= 0){ + push_local_layer(l); + } +#endif + } + } + fprintf(stderr, "Done!\n"); + fclose(fp); +} + +void load_weights(network *net, char *filename) +{ + load_weights_upto(net, filename, 0, net->n); +} + diff --git a/workloads/realworld/async/darknet/src/parser.h b/workloads/realworld/async/darknet/src/parser.h new file mode 100644 index 0000000000000000000000000000000000000000..81aef2c86f3e6cb362f8bde9695ce9d5699ca77f --- /dev/null +++ b/workloads/realworld/async/darknet/src/parser.h @@ -0,0 +1,9 @@ +#ifndef PARSER_H +#define PARSER_H +#include "darknet.h" +#include "network.h" + +void save_network(network net, char *filename); +void save_weights_double(network net, char *filename); + +#endif diff --git a/workloads/realworld/async/darknet/src/region_layer.c b/workloads/realworld/async/darknet/src/region_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..9df1b8bc252239ca520fa3dabeff55e5eb5959b8 --- /dev/null +++ b/workloads/realworld/async/darknet/src/region_layer.c @@ -0,0 +1,507 @@ +#include "region_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_region_layer(int batch, int w, int h, int n, int classes, int coords) +{ + layer l = {0}; + l.type = REGION; + + l.n = n; + l.batch = batch; + l.h = h; + l.w = w; + l.c = n*(classes + coords + 1); + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.coords = coords; + l.cost = calloc(1, sizeof(float)); + l.biases = calloc(n*2, sizeof(float)); + l.bias_updates = calloc(n*2, sizeof(float)); + l.outputs = h*w*n*(classes + coords + 1); + l.inputs = l.outputs; + l.truths = 30*(l.coords + 1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + int i; + for(i = 0; i < n*2; ++i){ + l.biases[i] = .5; + } + + l.forward = forward_region_layer; + l.backward = backward_region_layer; +#ifdef GPU + l.forward_gpu = forward_region_layer_gpu; + l.backward_gpu = backward_region_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "detection\n"); + srand(0); + + return l; +} + +void resize_region_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->n*(l->classes + l->coords + 1); + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride) +{ + box b; + b.x = (i + x[index + 0*stride]) / w; + b.y = (j + x[index + 1*stride]) / h; + b.w = exp(x[index + 2*stride]) * biases[2*n] / w; + b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h; + return b; +} + +float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale, int stride) +{ + box pred = get_region_box(x, biases, n, index, i, j, w, h, stride); + float iou = box_iou(pred, truth); + + float tx = (truth.x*w - i); + float ty = (truth.y*h - j); + float tw = log(truth.w*w / biases[2*n]); + float th = log(truth.h*h / biases[2*n + 1]); + + delta[index + 0*stride] = scale * (tx - x[index + 0*stride]); + delta[index + 1*stride] = scale * (ty - x[index + 1*stride]); + delta[index + 2*stride] = scale * (tw - x[index + 2*stride]); + delta[index + 3*stride] = scale * (th - x[index + 3*stride]); + return iou; +} + +void delta_region_mask(float *truth, float *x, int n, int index, float *delta, int stride, int scale) +{ + int i; + for(i = 0; i < n; ++i){ + delta[index + i*stride] = scale*(truth[i] - x[index + i*stride]); + } +} + + +void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, int stride, float *avg_cat, int tag) +{ + int i, n; + if(hier){ + float pred = 1; + while(class >= 0){ + pred *= output[index + stride*class]; + int g = hier->group[class]; + int offset = hier->group_offset[g]; + for(i = 0; i < hier->group_size[g]; ++i){ + delta[index + stride*(offset + i)] = scale * (0 - output[index + stride*(offset + i)]); + } + delta[index + stride*class] = scale * (1 - output[index + stride*class]); + + class = hier->parent[class]; + } + *avg_cat += pred; + } else { + if (delta[index] && tag){ + delta[index + stride*class] = scale * (1 - output[index + stride*class]); + return; + } + for(n = 0; n < classes; ++n){ + delta[index + stride*n] = scale * (((n == class)?1 : 0) - output[index + stride*n]); + if(n == class) *avg_cat += output[index + stride*n]; + } + } +} + +float logit(float x) +{ + return log(x/(1.-x)); +} + +float tisnan(float x) +{ + return (x != x); +} + +int entry_index(layer l, int batch, int location, int entry) +{ + int n = location / (l.w*l.h); + int loc = location % (l.w*l.h); + return batch*l.outputs + n*l.w*l.h*(l.coords+l.classes+1) + entry*l.w*l.h + loc; +} + +void forward_region_layer(const layer l, network net) +{ + int i,j,b,t,n; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) activate_array(l.output + index, l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords + 1); + if(!l.softmax && !l.softmax_tree) activate_array(l.output + index, l.classes*l.w*l.h, LOGISTIC); + } + } + if (l.softmax_tree){ + int i; + int count = l.coords + 1; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_cpu(net.input + count, group_size, l.batch, l.inputs, l.n*l.w*l.h, 1, l.n*l.w*l.h, l.temperature, l.output + count); + count += group_size; + } + } else if (l.softmax){ + int index = entry_index(l, 0, 0, l.coords + !l.background); + softmax_cpu(net.input + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output + index); + } +#endif + + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + if(!net.train) return; + float avg_iou = 0; + float recall = 0; + float avg_cat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + int class_count = 0; + *(l.cost) = 0; + for (b = 0; b < l.batch; ++b) { + if(l.softmax_tree){ + int onlyclass = 0; + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + if(!truth.x) break; + int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords]; + float maxp = 0; + int maxi = 0; + if(truth.x > 100000 && truth.y > 100000){ + for(n = 0; n < l.n*l.w*l.h; ++n){ + int class_index = entry_index(l, b, n, l.coords + 1); + int obj_index = entry_index(l, b, n, l.coords); + float scale = l.output[obj_index]; + l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]); + float p = scale*get_hierarchy_probability(l.output + class_index, l.softmax_tree, class, l.w*l.h); + if(p > maxp){ + maxp = p; + maxi = n; + } + } + int class_index = entry_index(l, b, maxi, l.coords + 1); + int obj_index = entry_index(l, b, maxi, l.coords); + delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax); + if(l.output[obj_index] < .3) l.delta[obj_index] = l.object_scale * (.3 - l.output[obj_index]); + else l.delta[obj_index] = 0; + l.delta[obj_index] = 0; + ++class_count; + onlyclass = 1; + break; + } + } + if(onlyclass) continue; + } + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h); + float best_iou = 0; + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + if(!truth.x) break; + float iou = box_iou(pred, truth); + if (iou > best_iou) { + best_iou = iou; + } + } + int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, l.coords); + avg_anyobj += l.output[obj_index]; + l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]); + if(l.background) l.delta[obj_index] = l.noobject_scale * (1 - l.output[obj_index]); + if (best_iou > l.thresh) { + l.delta[obj_index] = 0; + } + + if(*(net.seen) < 12800){ + box truth = {0}; + truth.x = (i + .5)/l.w; + truth.y = (j + .5)/l.h; + truth.w = l.biases[2*n]/l.w; + truth.h = l.biases[2*n+1]/l.h; + delta_region_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, l.delta, .01, l.w*l.h); + } + } + } + } + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + + if(!truth.x) break; + float best_iou = 0; + int best_n = 0; + i = (truth.x * l.w); + j = (truth.y * l.h); + box truth_shift = truth; + truth_shift.x = 0; + truth_shift.y = 0; + for(n = 0; n < l.n; ++n){ + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h); + if(l.bias_match){ + pred.w = l.biases[2*n]/l.w; + pred.h = l.biases[2*n+1]/l.h; + } + pred.x = 0; + pred.y = 0; + float iou = box_iou(pred, truth_shift); + if (iou > best_iou){ + best_iou = iou; + best_n = n; + } + } + + int box_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 0); + float iou = delta_region_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, l.delta, l.coord_scale * (2 - truth.w*truth.h), l.w*l.h); + if(l.coords > 4){ + int mask_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 4); + delta_region_mask(net.truth + t*(l.coords + 1) + b*l.truths + 5, l.output, l.coords - 4, mask_index, l.delta, l.w*l.h, l.mask_scale); + } + if(iou > .5) recall += 1; + avg_iou += iou; + + int obj_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords); + avg_obj += l.output[obj_index]; + l.delta[obj_index] = l.object_scale * (1 - l.output[obj_index]); + if (l.rescore) { + l.delta[obj_index] = l.object_scale * (iou - l.output[obj_index]); + } + if(l.background){ + l.delta[obj_index] = l.object_scale * (0 - l.output[obj_index]); + } + + int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords + 1); + delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax); + ++count; + ++class_count; + } + } + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count); +} + +void backward_region_layer(const layer l, network net) +{ + /* + int b; + int size = l.coords + l.classes + 1; + for (b = 0; b < l.batch*l.n; ++b){ + int index = (b*size + 4)*l.w*l.h; + gradient_array(l.output + index, l.w*l.h, LOGISTIC, l.delta + index); + } + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); + */ +} + +void correct_region_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +{ + int i; + int new_w=0; + int new_h=0; + if (((float)netw/w) < ((float)neth/h)) { + new_w = netw; + new_h = (h * netw)/w; + } else { + new_h = neth; + new_w = (w * neth)/h; + } + for (i = 0; i < n; ++i){ + box b = dets[i].bbox; + b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); + b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); + b.w *= (float)netw/new_w; + b.h *= (float)neth/new_h; + if(!relative){ + b.x *= w; + b.w *= w; + b.y *= h; + b.h *= h; + } + dets[i].bbox = b; + } +} + +void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets) +{ + int i,j,n,z; + float *predictions = l.output; + if (l.batch == 2) { + float *flip = l.output + l.outputs; + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w/2; ++i) { + for (n = 0; n < l.n; ++n) { + for(z = 0; z < l.classes + l.coords + 1; ++z){ + int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; + int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); + float swap = flip[i1]; + flip[i1] = flip[i2]; + flip[i2] = swap; + if(z == 0){ + flip[i1] = -flip[i1]; + flip[i2] = -flip[i2]; + } + } + } + } + } + for(i = 0; i < l.outputs; ++i){ + l.output[i] = (l.output[i] + flip[i])/2.; + } + } + for (i = 0; i < l.w*l.h; ++i){ + int row = i / l.w; + int col = i % l.w; + for(n = 0; n < l.n; ++n){ + int index = n*l.w*l.h + i; + for(j = 0; j < l.classes; ++j){ + dets[index].prob[j] = 0; + } + int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); + int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); + int mask_index = entry_index(l, 0, n*l.w*l.h + i, 4); + float scale = l.background ? 1 : predictions[obj_index]; + dets[index].bbox = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h); + dets[index].objectness = scale > thresh ? scale : 0; + if(dets[index].mask){ + for(j = 0; j < l.coords - 4; ++j){ + dets[index].mask[j] = l.output[mask_index + j*l.w*l.h]; + } + } + + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + !l.background); + if(l.softmax_tree){ + + hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0, l.w*l.h); + if(map){ + for(j = 0; j < 200; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + map[j]); + float prob = scale*predictions[class_index]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } else { + int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh, l.w*l.h); + dets[index].prob[j] = (scale > thresh) ? scale : 0; + } + } else { + if(dets[index].objectness){ + for(j = 0; j < l.classes; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + j); + float prob = scale*predictions[class_index]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } + } + } + } + correct_region_boxes(dets, l.w*l.h*l.n, w, h, netw, neth, relative); +} + +#ifdef GPU + +void forward_region_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + if(l.coords > 4){ + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC); + } + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) activate_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords + 1); + if(!l.softmax && !l.softmax_tree) activate_array_gpu(l.output_gpu + index, l.classes*l.w*l.h, LOGISTIC); + } + } + if (l.softmax_tree){ + int index = entry_index(l, 0, 0, l.coords + 1); + softmax_tree(net.input_gpu + index, l.w*l.h, l.batch*l.n, l.inputs/l.n, 1, l.output_gpu + index, *l.softmax_tree); + } else if (l.softmax) { + int index = entry_index(l, 0, 0, l.coords + !l.background); + softmax_gpu(net.input_gpu + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index); + } + if(!net.train || l.onlyforward){ + cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); + return; + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_region_layer(l, net); + //cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs); + if(!net.train) return; + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_region_layer_gpu(const layer l, network net) +{ + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + gradient_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC, l.delta_gpu + index); + if(l.coords > 4){ + index = entry_index(l, b, n*l.w*l.h, 4); + gradient_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC, l.delta_gpu + index); + } + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) gradient_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC, l.delta_gpu + index); + } + } + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + +void zero_objectness(layer l) +{ + int i, n; + for (i = 0; i < l.w*l.h; ++i){ + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); + l.output[obj_index] = 0; + } + } +} + diff --git a/workloads/realworld/async/darknet/src/region_layer.h b/workloads/realworld/async/darknet/src/region_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9f12fd187fd490d10cbc21af8251e0e2a870b7cb --- /dev/null +++ b/workloads/realworld/async/darknet/src/region_layer.h @@ -0,0 +1,18 @@ +#ifndef REGION_LAYER_H +#define REGION_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_region_layer(int batch, int w, int h, int n, int classes, int coords); +void forward_region_layer(const layer l, network net); +void backward_region_layer(const layer l, network net); +void resize_region_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_region_layer_gpu(const layer l, network net); +void backward_region_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/async/darknet/src/reorg_layer.c b/workloads/realworld/async/darknet/src/reorg_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..b3634d53a5e01a8bbcf00e62a90f70f40108e1d7 --- /dev/null +++ b/workloads/realworld/async/darknet/src/reorg_layer.c @@ -0,0 +1,173 @@ +#include "reorg_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + + +layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra) +{ + layer l = {0}; + l.type = REORG; + l.batch = batch; + l.stride = stride; + l.extra = extra; + l.h = h; + l.w = w; + l.c = c; + l.flatten = flatten; + if(reverse){ + l.out_w = w*stride; + l.out_h = h*stride; + l.out_c = c/(stride*stride); + }else{ + l.out_w = w/stride; + l.out_h = h/stride; + l.out_c = c*(stride*stride); + } + l.reverse = reverse; + + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = h*w*c; + if(l.extra){ + l.out_w = l.out_h = l.out_c = 0; + l.outputs = l.inputs + l.extra; + } + + if(extra){ + fprintf(stderr, "reorg %4d -> %4d\n", l.inputs, l.outputs); + } else { + fprintf(stderr, "reorg /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + } + int output_size = l.outputs * batch; + l.output = calloc(output_size, sizeof(float)); + l.delta = calloc(output_size, sizeof(float)); + + l.forward = forward_reorg_layer; + l.backward = backward_reorg_layer; +#ifdef GPU + l.forward_gpu = forward_reorg_layer_gpu; + l.backward_gpu = backward_reorg_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); +#endif + return l; +} + +void resize_reorg_layer(layer *l, int w, int h) +{ + int stride = l->stride; + int c = l->c; + + l->h = h; + l->w = w; + + if(l->reverse){ + l->out_w = w*stride; + l->out_h = h*stride; + l->out_c = c/(stride*stride); + }else{ + l->out_w = w/stride; + l->out_h = h/stride; + l->out_c = c*(stride*stride); + } + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->outputs; + int output_size = l->outputs * l->batch; + + l->output = realloc(l->output, output_size * sizeof(float)); + l->delta = realloc(l->delta, output_size * sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, output_size); + l->delta_gpu = cuda_make_array(l->delta, output_size); +#endif +} + +void forward_reorg_layer(const layer l, network net) +{ + int i; + if(l.flatten){ + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + if(l.reverse){ + flatten(l.output, l.w*l.h, l.c, l.batch, 0); + }else{ + flatten(l.output, l.w*l.h, l.c, l.batch, 1); + } + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.inputs, net.input + i*l.inputs, 1, l.output + i*l.outputs, 1); + } + } else if (l.reverse){ + reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output); + } else { + reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output); + } +} + +void backward_reorg_layer(const layer l, network net) +{ + int i; + if(l.flatten){ + memcpy(net.delta, l.delta, l.outputs*l.batch*sizeof(float)); + if(l.reverse){ + flatten(net.delta, l.w*l.h, l.c, l.batch, 1); + }else{ + flatten(net.delta, l.w*l.h, l.c, l.batch, 0); + } + } else if(l.reverse){ + reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta); + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.inputs, l.delta + i*l.outputs, 1, net.delta + i*l.inputs, 1); + } + }else{ + reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta); + } +} + +#ifdef GPU +void forward_reorg_layer_gpu(layer l, network net) +{ + int i; + if(l.flatten){ + if(l.reverse){ + flatten_gpu(net.input_gpu, l.w*l.h, l.c, l.batch, 0, l.output_gpu); + }else{ + flatten_gpu(net.input_gpu, l.w*l.h, l.c, l.batch, 1, l.output_gpu); + } + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.inputs, net.input_gpu + i*l.inputs, 1, l.output_gpu + i*l.outputs, 1); + } + } else if (l.reverse) { + reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu); + }else { + reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu); + } +} + +void backward_reorg_layer_gpu(layer l, network net) +{ + if(l.flatten){ + if(l.reverse){ + flatten_gpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 1, net.delta_gpu); + }else{ + flatten_gpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 0, net.delta_gpu); + } + } else if (l.extra) { + int i; + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.inputs, l.delta_gpu + i*l.outputs, 1, net.delta_gpu + i*l.inputs, 1); + } + } else if(l.reverse){ + reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta_gpu); + } else { + reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta_gpu); + } +} +#endif diff --git a/workloads/realworld/async/darknet/src/reorg_layer.h b/workloads/realworld/async/darknet/src/reorg_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1d1445f17d2874835ee19d033b50e09761374de3 --- /dev/null +++ b/workloads/realworld/async/darknet/src/reorg_layer.h @@ -0,0 +1,20 @@ +#ifndef REORG_LAYER_H +#define REORG_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra); +void resize_reorg_layer(layer *l, int w, int h); +void forward_reorg_layer(const layer l, network net); +void backward_reorg_layer(const layer l, network net); + +#ifdef GPU +void forward_reorg_layer_gpu(layer l, network net); +void backward_reorg_layer_gpu(layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/async/darknet/src/rnn_layer.c b/workloads/realworld/async/darknet/src/rnn_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..c07e338caee5418657eb1127058419566d9ef787 --- /dev/null +++ b/workloads/realworld/async/darknet/src/rnn_layer.c @@ -0,0 +1,292 @@ +#include "rnn_layer.h" +#include "connected_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTIVATION activation, int batch_normalize, int adam) +{ + fprintf(stderr, "RNN Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = RNN; + l.steps = steps; + l.inputs = inputs; + + l.state = calloc(batch*outputs, sizeof(float)); + l.prev_state = calloc(batch*outputs, sizeof(float)); + + l.input_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.input_layer) = make_connected_layer(batch*steps, inputs, outputs, activation, batch_normalize, adam); + l.input_layer->batch = batch; + + l.self_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.self_layer) = make_connected_layer(batch*steps, outputs, outputs, activation, batch_normalize, adam); + l.self_layer->batch = batch; + + l.output_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.output_layer) = make_connected_layer(batch*steps, outputs, outputs, activation, batch_normalize, adam); + l.output_layer->batch = batch; + + l.outputs = outputs; + l.output = l.output_layer->output; + l.delta = l.output_layer->delta; + + l.forward = forward_rnn_layer; + l.backward = backward_rnn_layer; + l.update = update_rnn_layer; +#ifdef GPU + l.forward_gpu = forward_rnn_layer_gpu; + l.backward_gpu = backward_rnn_layer_gpu; + l.update_gpu = update_rnn_layer_gpu; + l.state_gpu = cuda_make_array(0, batch*outputs); + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.output_gpu = l.output_layer->output_gpu; + l.delta_gpu = l.output_layer->delta_gpu; +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.input_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.input_layer->out_c, l.input_layer->out_h, l.input_layer->out_w); + cudnnSetTensor4dDescriptor(l.self_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.self_layer->out_c, l.self_layer->out_h, l.self_layer->out_w); + cudnnSetTensor4dDescriptor(l.output_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.output_layer->out_c, l.output_layer->out_h, l.output_layer->out_w); +#endif +#endif + + return l; +} + +void update_rnn_layer(layer l, update_args a) +{ + update_connected_layer(*(l.input_layer), a); + update_connected_layer(*(l.self_layer), a); + update_connected_layer(*(l.output_layer), a); +} + +void forward_rnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, self_layer.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, input_layer.delta, 1); + if(net.train) fill_cpu(l.outputs * l.batch, 0, l.state, 1); + + for (i = 0; i < l.steps; ++i) { + s.input = net.input; + forward_connected_layer(input_layer, s); + + s.input = l.state; + forward_connected_layer(self_layer, s); + + float *old_state = l.state; + if(net.train) l.state += l.outputs*l.batch; + if(l.shortcut){ + copy_cpu(l.outputs * l.batch, old_state, 1, l.state, 1); + }else{ + fill_cpu(l.outputs * l.batch, 0, l.state, 1); + } + axpy_cpu(l.outputs * l.batch, 1, input_layer.output, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + forward_connected_layer(output_layer, s); + + net.input += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_rnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + increment_layer(&input_layer, l.steps-1); + increment_layer(&self_layer, l.steps-1); + increment_layer(&output_layer, l.steps-1); + + l.state += l.outputs*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_cpu(l.outputs * l.batch, input_layer.output, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + s.delta = self_layer.delta; + backward_connected_layer(output_layer, s); + + l.state -= l.outputs*l.batch; + /* + if(i > 0){ + copy_cpu(l.outputs * l.batch, input_layer.output - l.outputs*l.batch, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output - l.outputs*l.batch, 1, l.state, 1); + }else{ + fill_cpu(l.outputs * l.batch, 0, l.state, 1); + } + */ + + s.input = l.state; + s.delta = self_layer.delta - l.outputs*l.batch; + if (i == 0) s.delta = 0; + backward_connected_layer(self_layer, s); + + copy_cpu(l.outputs*l.batch, self_layer.delta, 1, input_layer.delta, 1); + if (i > 0 && l.shortcut) axpy_cpu(l.outputs*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.outputs*l.batch, 1); + s.input = net.input + i*l.inputs*l.batch; + if(net.delta) s.delta = net.delta + i*l.inputs*l.batch; + else s.delta = 0; + backward_connected_layer(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} + +#ifdef GPU + +void pull_rnn_layer(layer l) +{ + pull_connected_layer(*(l.input_layer)); + pull_connected_layer(*(l.self_layer)); + pull_connected_layer(*(l.output_layer)); +} + +void push_rnn_layer(layer l) +{ + push_connected_layer(*(l.input_layer)); + push_connected_layer(*(l.self_layer)); + push_connected_layer(*(l.output_layer)); +} + +void update_rnn_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.input_layer), a); + update_connected_layer_gpu(*(l.self_layer), a); + update_connected_layer_gpu(*(l.output_layer), a); +} + +void forward_rnn_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_gpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, self_layer.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, input_layer.delta_gpu, 1); + + if(net.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.prev_state_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = net.input_gpu; + forward_connected_layer_gpu(input_layer, s); + + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(self_layer, s); + + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(output_layer, s); + + net.input_gpu += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_rnn_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + increment_layer(&input_layer, l.steps - 1); + increment_layer(&self_layer, l.steps - 1); + increment_layer(&output_layer, l.steps - 1); + float *last_input = input_layer.output_gpu; + float *last_self = self_layer.output_gpu; + for (i = l.steps-1; i >= 0; --i) { + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu; + backward_connected_layer_gpu(output_layer, s); + + if(i != 0) { + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + }else { + copy_gpu(l.outputs*l.batch, l.prev_state_gpu, 1, l.state_gpu, 1); + } + + copy_gpu(l.outputs*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = (i > 0) ? self_layer.delta_gpu - l.outputs*l.batch : 0; + if (i == 0) s.delta_gpu = 0; + backward_connected_layer_gpu(self_layer, s); + + s.input_gpu = net.input_gpu + i*l.inputs*l.batch; + if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch; + else s.delta_gpu = 0; + backward_connected_layer_gpu(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, last_input, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, last_self, 1, l.state_gpu, 1); +} +#endif diff --git a/workloads/realworld/async/darknet/src/rnn_layer.h b/workloads/realworld/async/darknet/src/rnn_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..270a63ffafca9a9adb7b995ed674f93c70bdeb51 --- /dev/null +++ b/workloads/realworld/async/darknet/src/rnn_layer.h @@ -0,0 +1,25 @@ + +#ifndef RNN_LAYER_H +#define RNN_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" +#define USET + +layer make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTIVATION activation, int batch_normalize, int adam); + +void forward_rnn_layer(layer l, network net); +void backward_rnn_layer(layer l, network net); +void update_rnn_layer(layer l, update_args a); + +#ifdef GPU +void forward_rnn_layer_gpu(layer l, network net); +void backward_rnn_layer_gpu(layer l, network net); +void update_rnn_layer_gpu(layer l, update_args a); +void push_rnn_layer(layer l); +void pull_rnn_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/async/darknet/src/route_layer.c b/workloads/realworld/async/darknet/src/route_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..608abe9a1c729eb6bdfd5e0d65c58196b51da496 --- /dev/null +++ b/workloads/realworld/async/darknet/src/route_layer.c @@ -0,0 +1,134 @@ +#include "route_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + +route_layer make_route_layer(int batch, int n, int *input_layers, int *input_sizes) +{ + fprintf(stderr,"route "); + route_layer l = {0}; + l.type = ROUTE; + l.batch = batch; + l.n = n; + l.input_layers = input_layers; + l.input_sizes = input_sizes; + int i; + int outputs = 0; + for(i = 0; i < n; ++i){ + fprintf(stderr," %d", input_layers[i]); + outputs += input_sizes[i]; + } + fprintf(stderr, "\n"); + l.outputs = outputs; + l.inputs = outputs; + l.delta = calloc(outputs*batch, sizeof(float)); + l.output = calloc(outputs*batch, sizeof(float));; + + l.forward = forward_route_layer; + l.backward = backward_route_layer; + #ifdef GPU + l.forward_gpu = forward_route_layer_gpu; + l.backward_gpu = backward_route_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, outputs*batch); + l.output_gpu = cuda_make_array(l.output, outputs*batch); + #endif + return l; +} + +void resize_route_layer(route_layer *l, network *net) +{ + int i; + layer first = net->layers[l->input_layers[0]]; + l->out_w = first.out_w; + l->out_h = first.out_h; + l->out_c = first.out_c; + l->outputs = first.outputs; + l->input_sizes[0] = first.outputs; + for(i = 1; i < l->n; ++i){ + int index = l->input_layers[i]; + layer next = net->layers[index]; + l->outputs += next.outputs; + l->input_sizes[i] = next.outputs; + if(next.out_w == first.out_w && next.out_h == first.out_h){ + l->out_c += next.out_c; + }else{ + printf("%d %d, %d %d\n", next.out_w, next.out_h, first.out_w, first.out_h); + l->out_h = l->out_w = l->out_c = 0; + } + } + l->inputs = l->outputs; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + +void forward_route_layer(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *input = net.layers[index].output; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1); + } + offset += input_size; + } +} + +void backward_route_layer(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *delta = net.layers[index].delta; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1); + } + offset += input_size; + } +} + +#ifdef GPU +void forward_route_layer_gpu(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *input = net.layers[index].output_gpu; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + copy_gpu(input_size, input + j*input_size, 1, l.output_gpu + offset + j*l.outputs, 1); + } + offset += input_size; + } +} + +void backward_route_layer_gpu(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *delta = net.layers[index].delta_gpu; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + axpy_gpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1); + } + offset += input_size; + } +} +#endif diff --git a/workloads/realworld/async/darknet/src/route_layer.h b/workloads/realworld/async/darknet/src/route_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1d40330ff30c9c93a2180a696d5f67f628ea481c --- /dev/null +++ b/workloads/realworld/async/darknet/src/route_layer.h @@ -0,0 +1,18 @@ +#ifndef ROUTE_LAYER_H +#define ROUTE_LAYER_H +#include "network.h" +#include "layer.h" + +typedef layer route_layer; + +route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size); +void forward_route_layer(const route_layer l, network net); +void backward_route_layer(const route_layer l, network net); +void resize_route_layer(route_layer *l, network *net); + +#ifdef GPU +void forward_route_layer_gpu(const route_layer l, network net); +void backward_route_layer_gpu(const route_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/async/darknet/src/shortcut_layer.c b/workloads/realworld/async/darknet/src/shortcut_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..e5b9e14301c0a6b7e03b270824352f1ba40163cd --- /dev/null +++ b/workloads/realworld/async/darknet/src/shortcut_layer.c @@ -0,0 +1,90 @@ +#include "shortcut_layer.h" +#include "cuda_dark.h" +#include "blas.h" +#include "activations.h" + +#include +#include + +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2) +{ + fprintf(stderr, "res %3d %4d x%4d x%4d -> %4d x%4d x%4d\n",index, w2,h2,c2, w,h,c); + layer l = {0}; + l.type = SHORTCUT; + l.batch = batch; + l.w = w2; + l.h = h2; + l.c = c2; + l.out_w = w; + l.out_h = h; + l.out_c = c; + l.outputs = w*h*c; + l.inputs = l.outputs; + + l.index = index; + + l.delta = calloc(l.outputs*batch, sizeof(float)); + l.output = calloc(l.outputs*batch, sizeof(float));; + + l.forward = forward_shortcut_layer; + l.backward = backward_shortcut_layer; + #ifdef GPU + l.forward_gpu = forward_shortcut_layer_gpu; + l.backward_gpu = backward_shortcut_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + #endif + return l; +} + +void resize_shortcut_layer(layer *l, int w, int h) +{ + assert(l->w == l->out_w); + assert(l->h == l->out_h); + l->w = l->out_w = w; + l->h = l->out_h = h; + l->outputs = w*h*l->out_c; + l->inputs = l->outputs; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + + +void forward_shortcut_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + shortcut_cpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output); + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_shortcut_layer(const layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + axpy_cpu(l.outputs*l.batch, l.alpha, l.delta, 1, net.delta, 1); + shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta); +} + +#ifdef GPU +void forward_shortcut_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + shortcut_gpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output_gpu); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_shortcut_layer_gpu(const layer l, network net) +{ + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + axpy_gpu(l.outputs*l.batch, l.alpha, l.delta_gpu, 1, net.delta_gpu, 1); + shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta_gpu); +} +#endif diff --git a/workloads/realworld/async/darknet/src/shortcut_layer.h b/workloads/realworld/async/darknet/src/shortcut_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..5f684fc1eadea2c6902be96bf4a4bf9a3b533da9 --- /dev/null +++ b/workloads/realworld/async/darknet/src/shortcut_layer.h @@ -0,0 +1,17 @@ +#ifndef SHORTCUT_LAYER_H +#define SHORTCUT_LAYER_H + +#include "layer.h" +#include "network.h" + +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2); +void forward_shortcut_layer(const layer l, network net); +void backward_shortcut_layer(const layer l, network net); +void resize_shortcut_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_shortcut_layer_gpu(const layer l, network net); +void backward_shortcut_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/async/darknet/src/softmax_layer.c b/workloads/realworld/async/darknet/src/softmax_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..569b62b14097ed226d9939d8e1f1fd2899083ee6 --- /dev/null +++ b/workloads/realworld/async/darknet/src/softmax_layer.c @@ -0,0 +1,107 @@ +#include "softmax_layer.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +softmax_layer make_softmax_layer(int batch, int inputs, int groups) +{ + assert(inputs%groups == 0); + fprintf(stderr, "softmax %4d\n", inputs); + softmax_layer l = {0}; + l.type = SOFTMAX; + l.batch = batch; + l.groups = groups; + l.inputs = inputs; + l.outputs = inputs; + l.loss = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_softmax_layer; + l.backward = backward_softmax_layer; + #ifdef GPU + l.forward_gpu = forward_softmax_layer_gpu; + l.backward_gpu = backward_softmax_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.loss_gpu = cuda_make_array(l.loss, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_softmax_layer(const softmax_layer l, network net) +{ + if(l.softmax_tree){ + int i; + int count = 0; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_cpu(net.input + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output + count); + count += group_size; + } + } else { + softmax_cpu(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output); + } + + if(net.truth && !l.noloss){ + softmax_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_softmax_layer(const softmax_layer l, network net) +{ + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_softmax_layer_output(const softmax_layer layer) +{ + cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch); +} + +void forward_softmax_layer_gpu(const softmax_layer l, network net) +{ + if(l.softmax_tree){ + softmax_tree(net.input_gpu, 1, l.batch, l.inputs, l.temperature, l.output_gpu, *l.softmax_tree); + /* + int i; + int count = 0; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_gpu(net.input_gpu + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output_gpu + count); + count += group_size; + } + */ + } else { + if(l.spatial){ + softmax_gpu(net.input_gpu, l.c, l.batch*l.c, l.inputs/l.c, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu); + }else{ + softmax_gpu(net.input_gpu, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu); + } + } + if(net.truth && !l.noloss){ + softmax_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu); + if(l.softmax_tree){ + mask_gpu(l.batch*l.inputs, l.delta_gpu, SECRET_NUM, net.truth_gpu, 0); + mask_gpu(l.batch*l.inputs, l.loss_gpu, SECRET_NUM, net.truth_gpu, 0); + } + cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_softmax_layer_gpu(const softmax_layer layer, network net) +{ + axpy_gpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/async/darknet/src/softmax_layer.h b/workloads/realworld/async/darknet/src/softmax_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..2e3ffe01a6c5d273a9f6139bc9f265cd7e2bc860 --- /dev/null +++ b/workloads/realworld/async/darknet/src/softmax_layer.h @@ -0,0 +1,19 @@ +#ifndef SOFTMAX_LAYER_H +#define SOFTMAX_LAYER_H +#include "layer.h" +#include "network.h" + +typedef layer softmax_layer; + +void softmax_array(float *input, int n, float temp, float *output); +softmax_layer make_softmax_layer(int batch, int inputs, int groups); +void forward_softmax_layer(const softmax_layer l, network net); +void backward_softmax_layer(const softmax_layer l, network net); + +#ifdef GPU +void pull_softmax_layer_output(const softmax_layer l); +void forward_softmax_layer_gpu(const softmax_layer l, network net); +void backward_softmax_layer_gpu(const softmax_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/async/darknet/src/stb_image.h b/workloads/realworld/async/darknet/src/stb_image.h new file mode 100644 index 0000000000000000000000000000000000000000..d9c21bc813f1f24de2a25ee3cc82bdce9413eaa5 --- /dev/null +++ b/workloads/realworld/async/darknet/src/stb_image.h @@ -0,0 +1,7462 @@ +/* stb_image - v2.19 - public domain image loader - http://nothings.org/stb + no warranty implied; use at your own risk + + Do this: + #define STB_IMAGE_IMPLEMENTATION + before you include this file in *one* C or C++ file to create the implementation. + + // i.e. it should look like this: + #include ... + #include ... + #include ... + #define STB_IMAGE_IMPLEMENTATION + #include "stb_image.h" + + You can #define STBI_ASSERT(x) before the #include to avoid using assert.h. + And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free + + + QUICK NOTES: + Primarily of interest to game developers and other people who can + avoid problematic images and only need the trivial interface + + JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) + PNG 1/2/4/8/16-bit-per-channel + + TGA (not sure what subset, if a subset) + BMP non-1bpp, non-RLE + PSD (composited view only, no extra channels, 8/16 bit-per-channel) + + GIF (*comp always reports as 4-channel) + HDR (radiance rgbE format) + PIC (Softimage PIC) + PNM (PPM and PGM binary only) + + Animated GIF still needs a proper API, but here's one way to do it: + http://gist.github.com/urraka/685d9a6340b26b830d49 + + - decode from memory or through FILE (define STBI_NO_STDIO to remove code) + - decode from arbitrary I/O callbacks + - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON) + + Full documentation under "DOCUMENTATION" below. + + +LICENSE + + See end of file for license information. + +RECENT REVISION HISTORY: + + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings + 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes + 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 + RGB-format JPEG; remove white matting in PSD; + allocate large structures on the stack; + correct channel count for PNG & BMP + 2.10 (2016-01-22) avoid warning introduced in 2.09 + 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED + + See end of file for full revision history. + + + ============================ Contributors ========================= + + Image formats Extensions, features + Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) + Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) + Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) + Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) + Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) + Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) + Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) + github:urraka (animated gif) Junggon Kim (PNM comments) + Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) + socks-the-fox (16-bit PNG) + Jeremy Sawicki (handle all ImageNet JPGs) + Optimizations & bugfixes Mikhail Morozov (1-bit BMP) + Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query) + Arseny Kapoulkine + John-Mark Allen + + Bug & warning fixes + Marc LeBlanc David Woo Guillaume George Martins Mozeiko + Christpher Lloyd Jerry Jansson Joseph Thomson Phil Jordan + Dave Moore Roy Eltham Hayaki Saito Nathan Reed + Won Chun Luke Graham Johan Duparc Nick Verigakis + the Horde3D community Thomas Ruf Ronny Chevalier github:rlyeh + Janez Zemva John Bartholomew Michal Cichon github:romigrou + Jonathan Blow Ken Hamada Tero Hanninen github:svdijk + Laurent Gomila Cort Stratton Sergio Gonzalez github:snagar + Aruelien Pocheville Thibault Reuille Cass Everitt github:Zelex + Ryamond Barbiero Paul Du Bois Engin Manap github:grim210 + Aldo Culquicondor Philipp Wiesemann Dale Weiler github:sammyhw + Oriol Ferrer Mesia Josh Tobin Matthew Gregan github:phprus + Julian Raschke Gregory Mullen Baldur Karlsson github:poppolopoppo + Christian Floisand Kevin Schmidt github:darealshinji + Blazej Dariusz Roszkowski github:Michaelangel007 +*/ + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H + +// DOCUMENTATION +// +// Limitations: +// - no 12-bit-per-channel JPEG +// - no JPEGs with arithmetic coding +// - GIF always returns *comp=4 +// +// Basic usage (see HDR discussion below for HDR usage): +// int x,y,n; +// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +// // ... process data if not NULL ... +// // ... x = width, y = height, n = # 8-bit components per pixel ... +// // ... replace '0' with '1'..'4' to force that many components per pixel +// // ... but 'n' will always be the number that it would have been if you said 0 +// stbi_image_free(data) +// +// Standard parameters: +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *channels_in_file -- outputs # of image components in image file +// int desired_channels -- if non-zero, # of image components requested in result +// +// The return value from an image loader is an 'unsigned char *' which points +// to the pixel data, or NULL on an allocation failure or if the image is +// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels, +// with each pixel consisting of N interleaved 8-bit components; the first +// pixel pointed to is top-left-most in the image. There is no padding between +// image scanlines or between pixels, regardless of format. The number of +// components N is 'desired_channels' if desired_channels is non-zero, or +// *channels_in_file otherwise. If desired_channels is non-zero, +// *channels_in_file has the number of components that _would_ have been +// output otherwise. E.g. if you set desired_channels to 4, you will always +// get RGBA output, but you can check *channels_in_file to see if it's trivially +// opaque because e.g. there were only 3 channels in the source image. +// +// An output image with N components has the following components interleaved +// in this order in each pixel: +// +// N=#comp components +// 1 grey +// 2 grey, alpha +// 3 red, green, blue +// 4 red, green, blue, alpha +// +// If image loading fails for any reason, the return value will be NULL, +// and *x, *y, *channels_in_file will be unchanged. The function +// stbi_failure_reason() can be queried for an extremely brief, end-user +// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS +// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// more user-friendly ones. +// +// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. +// +// =========================================================================== +// +// Philosophy +// +// stb libraries are designed with the following priorities: +// +// 1. easy to use +// 2. easy to maintain +// 3. good performance +// +// Sometimes I let "good performance" creep up in priority over "easy to maintain", +// and for best performance I may provide less-easy-to-use APIs that give higher +// performance, in addition to the easy to use ones. Nevertheless, it's important +// to keep in mind that from the standpoint of you, a client of this library, +// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all. +// +// Some secondary priorities arise directly from the first two, some of which +// make more explicit reasons why performance can't be emphasized. +// +// - Portable ("ease of use") +// - Small source code footprint ("easy to maintain") +// - No dependencies ("ease of use") +// +// =========================================================================== +// +// I/O callbacks +// +// I/O callbacks allow you to read from arbitrary sources, like packaged +// files or some other source. Data read from callbacks are processed +// through a small internal buffer (currently 128 bytes) to try to reduce +// overhead. +// +// The three functions you must define are "read" (reads some bytes of data), +// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end). +// +// =========================================================================== +// +// SIMD support +// +// The JPEG decoder will try to automatically use SIMD kernels on x86 when +// supported by the compiler. For ARM Neon support, you must explicitly +// request it. +// +// (The old do-it-yourself SIMD API is no longer supported in the current +// code.) +// +// On x86, SSE2 will automatically be used when available based on a run-time +// test; if not, the generic C versions are used as a fall-back. On ARM targets, +// the typical path is to have separate builds for NEON and non-NEON devices +// (at least this is true for iOS and Android). Therefore, the NEON support is +// toggled by a build flag: define STBI_NEON to get NEON loops. +// +// If for some reason you do not want to use any of SIMD code, or if +// you have issues compiling it, you can disable it entirely by +// defining STBI_NO_SIMD. +// +// =========================================================================== +// +// HDR image support (disable by defining STBI_NO_HDR) +// +// stb_image now supports loading HDR images in general, and currently +// the Radiance .HDR file format, although the support is provided +// generically. You can still load any file through the existing interface; +// if you attempt to load an HDR file, it will be automatically remapped to +// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// both of these constants can be reconfigured through this interface: +// +// stbi_hdr_to_ldr_gamma(2.2f); +// stbi_hdr_to_ldr_scale(1.0f); +// +// (note, do not use _inverse_ constants; stbi_image will invert them +// appropriately). +// +// Additionally, there is a new, parallel interface for loading files as +// (linear) floats to preserve the full dynamic range: +// +// float *data = stbi_loadf(filename, &x, &y, &n, 0); +// +// If you load LDR images through this interface, those images will +// be promoted to floating point values, run through the inverse of +// constants corresponding to the above: +// +// stbi_ldr_to_hdr_scale(1.0f); +// stbi_ldr_to_hdr_gamma(2.2f); +// +// Finally, given a filename (or an open file or memory block--see header +// file for details) containing image data, you can query for the "most +// appropriate" interface to use (that is, whether the image is HDR or +// not), using: +// +// stbi_is_hdr(char *filename); +// +// =========================================================================== +// +// iPhone PNG support: +// +// By default we convert iphone-formatted PNGs back to RGB, even though +// they are internally encoded differently. You can disable this conversion +// by by calling stbi_convert_iphone_png_to_rgb(0), in which case +// you will always just get the native iphone "format" through (which +// is BGR stored in RGB). +// +// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per +// pixel to remove any premultiplied alpha *only* if the image file explicitly +// says there's premultiplied data (currently only happens in iPhone images, +// and only if iPhone convert-to-rgb processing is on). +// +// =========================================================================== +// +// ADDITIONAL CONFIGURATION +// +// - You can suppress implementation of any of the decoders to reduce +// your code footprint by #defining one or more of the following +// symbols before creating the implementation. +// +// STBI_NO_JPEG +// STBI_NO_PNG +// STBI_NO_BMP +// STBI_NO_PSD +// STBI_NO_TGA +// STBI_NO_GIF +// STBI_NO_HDR +// STBI_NO_PIC +// STBI_NO_PNM (.ppm and .pgm) +// +// - You can request *only* certain decoders and suppress all other ones +// (this will be more forward-compatible, as addition of new decoders +// doesn't require you to disable them explicitly): +// +// STBI_ONLY_JPEG +// STBI_ONLY_PNG +// STBI_ONLY_BMP +// STBI_ONLY_PSD +// STBI_ONLY_TGA +// STBI_ONLY_GIF +// STBI_ONLY_HDR +// STBI_ONLY_PIC +// STBI_ONLY_PNM (.ppm and .pgm) +// +// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still +// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB +// + + +#ifndef STBI_NO_STDIO +#include +#endif // STBI_NO_STDIO + +#define STBI_VERSION 1 + +enum +{ + STBI_default = 0, // only used for desired_channels + + STBI_grey = 1, + STBI_grey_alpha = 2, + STBI_rgb = 3, + STBI_rgb_alpha = 4 +}; + +typedef unsigned char stbi_uc; +typedef unsigned short stbi_us; + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef STB_IMAGE_STATIC +#define STBIDEF static +#else +#define STBIDEF extern +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// PRIMARY API - works on images of any type +// + +// +// load image by filename, open file, or memory buffer +// + +typedef struct +{ + int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read + void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative + int (*eof) (void *user); // returns nonzero if we are at end of file/data +} stbi_io_callbacks; + +//////////////////////////////////// +// +// 8-bits-per-channel interface +// + +STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels); +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +#endif + + +#ifndef STBI_NO_STDIO +STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +// for stbi_load_from_file, file pointer is left pointing immediately after image +#endif + +//////////////////////////////////// +// +// 16-bits-per-channel interface +// + +STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +#endif + +//////////////////////////////////// +// +// float-per-channel interface +// +#ifndef STBI_NO_LINEAR + STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + + #ifndef STBI_NO_STDIO + STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); + #endif +#endif + +#ifndef STBI_NO_HDR + STBIDEF void stbi_hdr_to_ldr_gamma(float gamma); + STBIDEF void stbi_hdr_to_ldr_scale(float scale); +#endif // STBI_NO_HDR + +#ifndef STBI_NO_LINEAR + STBIDEF void stbi_ldr_to_hdr_gamma(float gamma); + STBIDEF void stbi_ldr_to_hdr_scale(float scale); +#endif // STBI_NO_LINEAR + +// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user); +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename); +STBIDEF int stbi_is_hdr_from_file(FILE *f); +#endif // STBI_NO_STDIO + + +// get a VERY brief reason for failure +// NOT THREADSAFE +STBIDEF const char *stbi_failure_reason (void); + +// free the loaded image -- this is just free() +STBIDEF void stbi_image_free (void *retval_from_stbi_load); + +// get image dimensions & components without fully decoding +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len); +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user); + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit (char const *filename); +STBIDEF int stbi_is_16_bit_from_file(FILE *f); +#endif + + + +// for image formats that explicitly notate that they have premultiplied alpha, +// we just return the colors as stored in the file. set this flag to force +// unpremultiplication. results are undefined if the unpremultiply overflow. +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply); + +// indicate whether we should process iphone images back to canonical format, +// or just pass them through "as-is" +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); + +// flip the image vertically, so the first pixel in the output array is the bottom left +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip); + +// ZLIB client - used by PNG, available for other purposes + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header); +STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + + +#ifdef __cplusplus +} +#endif + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBI_INCLUDE_STB_IMAGE_H + +#ifdef STB_IMAGE_IMPLEMENTATION + +#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \ + || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \ + || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \ + || defined(STBI_ONLY_ZLIB) + #ifndef STBI_ONLY_JPEG + #define STBI_NO_JPEG + #endif + #ifndef STBI_ONLY_PNG + #define STBI_NO_PNG + #endif + #ifndef STBI_ONLY_BMP + #define STBI_NO_BMP + #endif + #ifndef STBI_ONLY_PSD + #define STBI_NO_PSD + #endif + #ifndef STBI_ONLY_TGA + #define STBI_NO_TGA + #endif + #ifndef STBI_ONLY_GIF + #define STBI_NO_GIF + #endif + #ifndef STBI_ONLY_HDR + #define STBI_NO_HDR + #endif + #ifndef STBI_ONLY_PIC + #define STBI_NO_PIC + #endif + #ifndef STBI_ONLY_PNM + #define STBI_NO_PNM + #endif +#endif + +#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB) +#define STBI_NO_ZLIB +#endif + + +#include +#include // ptrdiff_t on osx +#include +#include +#include + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +#include // ldexp, pow +#endif + +#ifndef STBI_NO_STDIO +#include +#endif + +#ifndef STBI_ASSERT +#include +#define STBI_ASSERT(x) assert(x) +#endif + + +#ifndef _MSC_VER + #ifdef __cplusplus + #define stbi_inline inline + #else + #define stbi_inline + #endif +#else + #define stbi_inline __forceinline +#endif + + +#ifdef _MSC_VER +typedef unsigned short stbi__uint16; +typedef signed short stbi__int16; +typedef unsigned int stbi__uint32; +typedef signed int stbi__int32; +#else +#include +typedef uint16_t stbi__uint16; +typedef int16_t stbi__int16; +typedef uint32_t stbi__uint32; +typedef int32_t stbi__int32; +#endif + +// should produce compiler error if size is wrong +typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; + +#ifdef _MSC_VER +#define STBI_NOTUSED(v) (void)(v) +#else +#define STBI_NOTUSED(v) (void)sizeof(v) +#endif + +#ifdef _MSC_VER +#define STBI_HAS_LROTL +#endif + +#ifdef STBI_HAS_LROTL + #define stbi_lrot(x,y) _lrotl(x,y) +#else + #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y)))) +#endif + +#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) +// ok +#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)." +#endif + +#ifndef STBI_MALLOC +#define STBI_MALLOC(sz) malloc(sz) +#define STBI_REALLOC(p,newsz) realloc(p,newsz) +#define STBI_FREE(p) free(p) +#endif + +#ifndef STBI_REALLOC_SIZED +#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz) +#endif + +// x86/x64 detection +#if defined(__x86_64__) || defined(_M_X64) +#define STBI__X64_TARGET +#elif defined(__i386) || defined(_M_IX86) +#define STBI__X86_TARGET +#endif + +#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) +// gcc doesn't support sse2 intrinsics unless you compile with -msse2, +// which in turn means it gets to use SSE2 everywhere. This is unfortunate, +// but previous attempts to provide the SSE2 functions with runtime +// detection caused numerous issues. The way architecture extensions are +// exposed in GCC/Clang is, sadly, not really suited for one-file libs. +// New behavior: if compiled with -msse2, we use SSE2 without any +// detection; if not, we don't use it at all. +#define STBI_NO_SIMD +#endif + +#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD) +// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET +// +// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the +// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant. +// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not +// simultaneously enabling "-mstackrealign". +// +// See https://github.com/nothings/stb/issues/81 for more information. +// +// So default to no SSE2 on 32-bit MinGW. If you've read this far and added +// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2. +#define STBI_NO_SIMD +#endif + +#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) +#define STBI_SSE2 +#include + +#ifdef _MSC_VER + +#if _MSC_VER >= 1400 // not VC6 +#include // __cpuid +static int stbi__cpuid3(void) +{ + int info[4]; + __cpuid(info,1); + return info[3]; +} +#else +static int stbi__cpuid3(void) +{ + int res; + __asm { + mov eax,1 + cpuid + mov res,edx + } + return res; +} +#endif + +#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name + +static int stbi__sse2_available(void) +{ + int info3 = stbi__cpuid3(); + return ((info3 >> 26) & 1) != 0; +} +#else // assume GCC-style if not VC++ +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) + +static int stbi__sse2_available(void) +{ + // If we're even attempting to compile this on GCC/Clang, that means + // -msse2 is on, which means the compiler is allowed to use SSE2 + // instructions at will, and so are we. + return 1; +} +#endif +#endif + +// ARM NEON +#if defined(STBI_NO_SIMD) && defined(STBI_NEON) +#undef STBI_NEON +#endif + +#ifdef STBI_NEON +#include +// assume GCC or Clang on ARM targets +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) +#endif + +#ifndef STBI_SIMD_ALIGN +#define STBI_SIMD_ALIGN(type, name) type name +#endif + +/////////////////////////////////////////////// +// +// stbi__context struct and start_xxx functions + +// stbi__context structure is our basic context used by all images, so it +// contains all the IO context, plus some basic image information +typedef struct +{ + stbi__uint32 img_x, img_y; + int img_n, img_out_n; + + stbi_io_callbacks io; + void *io_user_data; + + int read_from_callbacks; + int buflen; + stbi_uc buffer_start[128]; + + stbi_uc *img_buffer, *img_buffer_end; + stbi_uc *img_buffer_original, *img_buffer_original_end; +} stbi__context; + + +static void stbi__refill_buffer(stbi__context *s); + +// initialize a memory-decode context +static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len) +{ + s->io.read = NULL; + s->read_from_callbacks = 0; + s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer; + s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len; +} + +// initialize a callback-based context +static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user) +{ + s->io = *c; + s->io_user_data = user; + s->buflen = sizeof(s->buffer_start); + s->read_from_callbacks = 1; + s->img_buffer_original = s->buffer_start; + stbi__refill_buffer(s); + s->img_buffer_original_end = s->img_buffer_end; +} + +#ifndef STBI_NO_STDIO + +static int stbi__stdio_read(void *user, char *data, int size) +{ + return (int) fread(data,1,size,(FILE*) user); +} + +static void stbi__stdio_skip(void *user, int n) +{ + fseek((FILE*) user, n, SEEK_CUR); +} + +static int stbi__stdio_eof(void *user) +{ + return feof((FILE*) user); +} + +static stbi_io_callbacks stbi__stdio_callbacks = +{ + stbi__stdio_read, + stbi__stdio_skip, + stbi__stdio_eof, +}; + +static void stbi__start_file(stbi__context *s, FILE *f) +{ + stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f); +} + +//static void stop_file(stbi__context *s) { } + +#endif // !STBI_NO_STDIO + +static void stbi__rewind(stbi__context *s) +{ + // conceptually rewind SHOULD rewind to the beginning of the stream, + // but we just rewind to the beginning of the initial buffer, because + // we only use it after doing 'test', which only ever looks at at most 92 bytes + s->img_buffer = s->img_buffer_original; + s->img_buffer_end = s->img_buffer_original_end; +} + +enum +{ + STBI_ORDER_RGB, + STBI_ORDER_BGR +}; + +typedef struct +{ + int bits_per_channel; + int num_channels; + int channel_order; +} stbi__result_info; + +#ifndef STBI_NO_JPEG +static int stbi__jpeg_test(stbi__context *s); +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNG +static int stbi__png_test(stbi__context *s); +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__png_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_BMP +static int stbi__bmp_test(stbi__context *s); +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_TGA +static int stbi__tga_test(stbi__context *s); +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s); +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__psd_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_HDR +static int stbi__hdr_test(stbi__context *s); +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_test(stbi__context *s); +static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_GIF +static int stbi__gif_test(stbi__context *s); +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNM +static int stbi__pnm_test(stbi__context *s); +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +// this is not threadsafe +static const char *stbi__g_failure_reason; + +STBIDEF const char *stbi_failure_reason(void) +{ + return stbi__g_failure_reason; +} + +static int stbi__err(const char *str) +{ + stbi__g_failure_reason = str; + return 0; +} + +static void *stbi__malloc(size_t size) +{ + return STBI_MALLOC(size); +} + +// stb_image uses ints pervasively, including for offset calculations. +// therefore the largest decoded image size we can support with the +// current code, even on 64-bit targets, is INT_MAX. this is not a +// significant limitation for the intended use case. +// +// we do, however, need to make sure our size calculations don't +// overflow. hence a few helper functions for size calculations that +// multiply integers together, making sure that they're non-negative +// and no overflow occurs. + +// return 1 if the sum is valid, 0 on overflow. +// negative terms are considered invalid. +static int stbi__addsizes_valid(int a, int b) +{ + if (b < 0) return 0; + // now 0 <= b <= INT_MAX, hence also + // 0 <= INT_MAX - b <= INTMAX. + // And "a + b <= INT_MAX" (which might overflow) is the + // same as a <= INT_MAX - b (no overflow) + return a <= INT_MAX - b; +} + +// returns 1 if the product is valid, 0 on overflow. +// negative factors are considered invalid. +static int stbi__mul2sizes_valid(int a, int b) +{ + if (a < 0 || b < 0) return 0; + if (b == 0) return 1; // mul-by-0 is always safe + // portable way to check for no overflows in a*b + return a <= INT_MAX/b; +} + +// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow +static int stbi__mad2sizes_valid(int a, int b, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add); +} + +// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow +static int stbi__mad3sizes_valid(int a, int b, int c, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__addsizes_valid(a*b*c, add); +} + +// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); +} +#endif + +// mallocs with size overflow checking +static void *stbi__malloc_mad2(int a, int b, int add) +{ + if (!stbi__mad2sizes_valid(a, b, add)) return NULL; + return stbi__malloc(a*b + add); +} + +static void *stbi__malloc_mad3(int a, int b, int c, int add) +{ + if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL; + return stbi__malloc(a*b*c + add); +} + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static void *stbi__malloc_mad4(int a, int b, int c, int d, int add) +{ + if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL; + return stbi__malloc(a*b*c*d + add); +} +#endif + +// stbi__err - error +// stbi__errpf - error returning pointer to float +// stbi__errpuc - error returning pointer to unsigned char + +#ifdef STBI_NO_FAILURE_STRINGS + #define stbi__err(x,y) 0 +#elif defined(STBI_FAILURE_USERMSG) + #define stbi__err(x,y) stbi__err(y) +#else + #define stbi__err(x,y) stbi__err(x) +#endif + +#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL)) +#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL)) + +STBIDEF void stbi_image_free(void *retval_from_stbi_load) +{ + STBI_FREE(retval_from_stbi_load); +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp); +#endif + +#ifndef STBI_NO_HDR +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp); +#endif + +static int stbi__vertically_flip_on_load = 0; + +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) +{ + stbi__vertically_flip_on_load = flag_true_if_should_flip; +} + +static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields + ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed + ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order + ri->num_channels = 0; + + #ifndef STBI_NO_JPEG + if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNG + if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_BMP + if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_GIF + if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PSD + if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc); + #endif + #ifndef STBI_NO_PIC + if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNM + if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri); + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); + return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + + #ifndef STBI_NO_TGA + // test tga last because it's a crappy test! + if (stbi__tga_test(s)) + return stbi__tga_load(s,x,y,comp,req_comp, ri); + #endif + + return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); +} + +static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi_uc *reduced; + + reduced = (stbi_uc *) stbi__malloc(img_len); + if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling + + STBI_FREE(orig); + return reduced; +} + +static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi__uint16 *enlarged; + + enlarged = (stbi__uint16 *) stbi__malloc(img_len*2); + if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff + + STBI_FREE(orig); + return enlarged; +} + +static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel) +{ + int row; + size_t bytes_per_row = (size_t)w * bytes_per_pixel; + stbi_uc temp[2048]; + stbi_uc *bytes = (stbi_uc *)image; + + for (row = 0; row < (h>>1); row++) { + stbi_uc *row0 = bytes + row*bytes_per_row; + stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; + // swap row0 with row1 + size_t bytes_left = bytes_per_row; + while (bytes_left) { + size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); + memcpy(temp, row0, bytes_copy); + memcpy(row0, row1, bytes_copy); + memcpy(row1, temp, bytes_copy); + row0 += bytes_copy; + row1 += bytes_copy; + bytes_left -= bytes_copy; + } + } +} + +static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel) +{ + int slice; + int slice_size = w * h * bytes_per_pixel; + + stbi_uc *bytes = (stbi_uc *)image; + for (slice = 0; slice < z; ++slice) { + stbi__vertical_flip(bytes, w, h, bytes_per_pixel); + bytes += slice_size; + } +} + +static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); + + if (result == NULL) + return NULL; + + if (ri.bits_per_channel != 8) { + STBI_ASSERT(ri.bits_per_channel == 16); + result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 8; + } + + // @TODO: move stbi__convert_format to here + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); + } + + return (unsigned char *) result; +} + +static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); + + if (result == NULL) + return NULL; + + if (ri.bits_per_channel != 16) { + STBI_ASSERT(ri.bits_per_channel == 8); + result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 16; + } + + // @TODO: move stbi__convert_format16 to here + // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); + } + + return (stbi__uint16 *) result; +} + +#if !defined(STBI_NO_HDR) || !defined(STBI_NO_LINEAR) +static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp) +{ + if (stbi__vertically_flip_on_load && result != NULL) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); + } +} +#endif + +#ifndef STBI_NO_STDIO + +static FILE *stbi__fopen(char const *filename, char const *mode) +{ + FILE *f; +#if defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != fopen_s(&f, filename, mode)) + f=0; +#else + f = fopen(filename, mode); +#endif + return f; +} + + +STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + unsigned char *result; + if (!f) return stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__uint16 *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + stbi__uint16 *result; + if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file_16(f,x,y,comp,req_comp); + fclose(f); + return result; +} + + +#endif //!STBI_NO_STDIO + +STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_mem(&s,buffer,len); + + result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp); + if (stbi__vertically_flip_on_load) { + stbi__vertical_flip_slices( result, *x, *y, *z, *comp ); + } + + return result; +} +#endif + +#ifndef STBI_NO_LINEAR +static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + stbi__result_info ri; + float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); + if (hdr_data) + stbi__float_postprocess(hdr_data,x,y,comp,req_comp); + return hdr_data; + } + #endif + data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp); + if (data) + return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); +} + +STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + float *result; + FILE *f = stbi__fopen(filename, "rb"); + if (!f) return stbi__errpf("can't fopen", "Unable to open file"); + result = stbi_loadf_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_file(&s,f); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} +#endif // !STBI_NO_STDIO + +#endif // !STBI_NO_LINEAR + +// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is +// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always +// reports false! + +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(buffer); + STBI_NOTUSED(len); + return 0; + #endif +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result=0; + if (f) { + result = stbi_is_hdr_from_file(f); + fclose(f); + } + return result; +} + +STBIDEF int stbi_is_hdr_from_file(FILE *f) +{ + #ifndef STBI_NO_HDR + long pos = ftell(f); + int res; + stbi__context s; + stbi__start_file(&s,f); + res = stbi__hdr_test(&s); + fseek(f, pos, SEEK_SET); + return res; + #else + STBI_NOTUSED(f); + return 0; + #endif +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(clbk); + STBI_NOTUSED(user); + return 0; + #endif +} + +#ifndef STBI_NO_LINEAR +static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f; + +STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; } +STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; } +#endif + +static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f; + +STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; } +STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; } + + +////////////////////////////////////////////////////////////////////////////// +// +// Common code used by all image loaders +// + +enum +{ + STBI__SCAN_load=0, + STBI__SCAN_type, + STBI__SCAN_header +}; + +static void stbi__refill_buffer(stbi__context *s) +{ + int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen); + if (n == 0) { + // at end of file, treat same as if from memory, but need to handle case + // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file + s->read_from_callbacks = 0; + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start+1; + *s->img_buffer = 0; + } else { + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start + n; + } +} + +stbi_inline static stbi_uc stbi__get8(stbi__context *s) +{ + if (s->img_buffer < s->img_buffer_end) + return *s->img_buffer++; + if (s->read_from_callbacks) { + stbi__refill_buffer(s); + return *s->img_buffer++; + } + return 0; +} + +stbi_inline static int stbi__at_eof(stbi__context *s) +{ + if (s->io.read) { + if (!(s->io.eof)(s->io_user_data)) return 0; + // if feof() is true, check if buffer = end + // special case: we've only got the special 0 character at the end + if (s->read_from_callbacks == 0) return 1; + } + + return s->img_buffer >= s->img_buffer_end; +} + +static void stbi__skip(stbi__context *s, int n) +{ + if (n < 0) { + s->img_buffer = s->img_buffer_end; + return; + } + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + s->img_buffer = s->img_buffer_end; + (s->io.skip)(s->io_user_data, n - blen); + return; + } + } + s->img_buffer += n; +} + +static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) +{ + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + int res, count; + + memcpy(buffer, s->img_buffer, blen); + + count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen); + res = (count == (n-blen)); + s->img_buffer = s->img_buffer_end; + return res; + } + } + + if (s->img_buffer+n <= s->img_buffer_end) { + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; + return 1; + } else + return 0; +} + +static int stbi__get16be(stbi__context *s) +{ + int z = stbi__get8(s); + return (z << 8) + stbi__get8(s); +} + +static stbi__uint32 stbi__get32be(stbi__context *s) +{ + stbi__uint32 z = stbi__get16be(s); + return (z << 16) + stbi__get16be(s); +} + +#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) +// nothing +#else +static int stbi__get16le(stbi__context *s) +{ + int z = stbi__get8(s); + return z + (stbi__get8(s) << 8); +} +#endif + +#ifndef STBI_NO_BMP +static stbi__uint32 stbi__get32le(stbi__context *s) +{ + stbi__uint32 z = stbi__get16le(s); + return z + (stbi__get16le(s) << 16); +} +#endif + +#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings + + +////////////////////////////////////////////////////////////////////////////// +// +// generic converter from built-in img_n to req_comp +// individual types do this automatically as much as possible (e.g. jpeg +// does all cases internally since it needs to colorspace convert anyway, +// and it never has alpha, so very few cases ). png can automatically +// interleave an alpha=255 channel, but falls back to this for other cases +// +// assume data buffer is malloced, so malloc a new one and free that one +// only failure mode is malloc failing + +static stbi_uc stbi__compute_y(int r, int g, int b) +{ + return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + unsigned char *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0); + if (good == NULL) { + STBI_FREE(data); + return stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; + default: STBI_ASSERT(0); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} + +static stbi__uint16 stbi__compute_y_16(int r, int g, int b) +{ + return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + stbi__uint16 *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2); + if (good == NULL) { + STBI_FREE(data); + return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + stbi__uint16 *src = data + j * x * img_n ; + stbi__uint16 *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; + default: STBI_ASSERT(0); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) +{ + int i,k,n; + float *output; + if (!data) return NULL; + output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); + } + if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f; + } + STBI_FREE(data); + return output; +} +#endif + +#ifndef STBI_NO_HDR +#define stbi__float2int(x) ((int) (x)) +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) +{ + int i,k,n; + stbi_uc *output; + if (!data) return NULL; + output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + if (k < comp) { + float z = data[i*comp+k] * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + } + STBI_FREE(data); + return output; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// "baseline" JPEG/JFIF decoder +// +// simple implementation +// - doesn't support delayed output of y-dimension +// - simple interface (only one output format: 8-bit interleaved RGB) +// - doesn't try to recover corrupt jpegs +// - doesn't allow partial loading, loading multiple at once +// - still fast on x86 (copying globals into locals doesn't help x86) +// - allocates lots of intermediate memory (full size of all components) +// - non-interleaved case requires this anyway +// - allows good upsampling (see next) +// high-quality +// - upsampled channels are bilinearly interpolated, even across blocks +// - quality integer IDCT derived from IJG's 'slow' +// performance +// - fast huffman; reasonable integer IDCT +// - some SIMD kernels for common paths on targets with SSE2/NEON +// - uses a lot of intermediate memory, could cache poorly + +#ifndef STBI_NO_JPEG + +// huffman decoding acceleration +#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache + +typedef struct +{ + stbi_uc fast[1 << FAST_BITS]; + // weirdly, repacking this into AoS is a 10% speed loss, instead of a win + stbi__uint16 code[256]; + stbi_uc values[256]; + stbi_uc size[257]; + unsigned int maxcode[18]; + int delta[17]; // old 'firstsymbol' - old 'firstcode' +} stbi__huffman; + +typedef struct +{ + stbi__context *s; + stbi__huffman huff_dc[4]; + stbi__huffman huff_ac[4]; + stbi__uint16 dequant[4][64]; + stbi__int16 fast_ac[4][1 << FAST_BITS]; + +// sizes for components, interleaved MCUs + int img_h_max, img_v_max; + int img_mcu_x, img_mcu_y; + int img_mcu_w, img_mcu_h; + +// definition of jpeg image component + struct + { + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + stbi_uc *data; + void *raw_data, *raw_coeff; + stbi_uc *linebuf; + short *coeff; // progressive only + int coeff_w, coeff_h; // number of 8x8 coefficient blocks + } img_comp[4]; + + stbi__uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int progressive; + int spec_start; + int spec_end; + int succ_high; + int succ_low; + int eob_run; + int jfif; + int app14_color_transform; // Adobe APP14 tag + int rgb; + + int scan_n, order[4]; + int restart_interval, todo; + +// kernels + void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]); + void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step); + stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs); +} stbi__jpeg; + +static int stbi__build_huffman(stbi__huffman *h, int *count) +{ + int i,j,k=0; + unsigned int code; + // build size list for each symbol (from JPEG spec) + for (i=0; i < 16; ++i) + for (j=0; j < count[i]; ++j) + h->size[k++] = (stbi_uc) (i+1); + h->size[k] = 0; + + // compute actual symbols (from jpeg spec) + code = 0; + k = 0; + for(j=1; j <= 16; ++j) { + // compute delta to add to code to compute symbol id + h->delta[j] = k - code; + if (h->size[k] == j) { + while (h->size[k] == j) + h->code[k++] = (stbi__uint16) (code++); + if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG"); + } + // compute largest code + 1 for this size, preshifted as needed later + h->maxcode[j] = code << (16-j); + code <<= 1; + } + h->maxcode[j] = 0xffffffff; + + // build non-spec acceleration table; 255 is flag for not-accelerated + memset(h->fast, 255, 1 << FAST_BITS); + for (i=0; i < k; ++i) { + int s = h->size[i]; + if (s <= FAST_BITS) { + int c = h->code[i] << (FAST_BITS-s); + int m = 1 << (FAST_BITS-s); + for (j=0; j < m; ++j) { + h->fast[c+j] = (stbi_uc) i; + } + } + } + return 1; +} + +// build a table that decodes both magnitude and value of small ACs in +// one go. +static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) +{ + int i; + for (i=0; i < (1 << FAST_BITS); ++i) { + stbi_uc fast = h->fast[i]; + fast_ac[i] = 0; + if (fast < 255) { + int rs = h->values[fast]; + int run = (rs >> 4) & 15; + int magbits = rs & 15; + int len = h->size[fast]; + + if (magbits && len + magbits <= FAST_BITS) { + // magnitude code followed by receive_extend code + int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); + int m = 1 << (magbits - 1); + if (k < m) k += (~0U << magbits) + 1; + // if the result is small enough, we can fit it in fast_ac table + if (k >= -128 && k <= 127) + fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits)); + } + } + } +} + +static void stbi__grow_buffer_unsafe(stbi__jpeg *j) +{ + do { + unsigned int b = j->nomore ? 0 : stbi__get8(j->s); + if (b == 0xff) { + int c = stbi__get8(j->s); + while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes + if (c != 0) { + j->marker = (unsigned char) c; + j->nomore = 1; + return; + } + } + j->code_buffer |= b << (24 - j->code_bits); + j->code_bits += 8; + } while (j->code_bits <= 24); +} + +// (1 << n) - 1 +static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; + +// decode a jpeg huffman value from the bitstream +stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) +{ + unsigned int temp; + int c,k; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + // look at the top FAST_BITS and determine what symbol ID it is, + // if the code is <= FAST_BITS + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + k = h->fast[c]; + if (k < 255) { + int s = h->size[k]; + if (s > j->code_bits) + return -1; + j->code_buffer <<= s; + j->code_bits -= s; + return h->values[k]; + } + + // naive test is to shift the code_buffer down so k bits are + // valid, then test against maxcode. To speed this up, we've + // preshifted maxcode left so that it has (16-k) 0s at the + // end; in other words, regardless of the number of bits, it + // wants to be compared against something shifted to have 16; + // that way we don't need to shift inside the loop. + temp = j->code_buffer >> 16; + for (k=FAST_BITS+1 ; ; ++k) + if (temp < h->maxcode[k]) + break; + if (k == 17) { + // error! code not found + j->code_bits -= 16; + return -1; + } + + if (k > j->code_bits) + return -1; + + // convert the huffman code to the symbol id + c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; + STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]); + + // convert the id to a symbol + j->code_bits -= k; + j->code_buffer <<= k; + return h->values[c]; +} + +// bias[n] = (-1<code_bits < n) stbi__grow_buffer_unsafe(j); + + sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB + k = stbi_lrot(j->code_buffer, n); + STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask))); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k + (stbi__jbias[n] & ~sgn); +} + +// get some unsigned bits +stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n) +{ + unsigned int k; + if (j->code_bits < n) stbi__grow_buffer_unsafe(j); + k = stbi_lrot(j->code_buffer, n); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k; +} + +stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) +{ + unsigned int k; + if (j->code_bits < 1) stbi__grow_buffer_unsafe(j); + k = j->code_buffer; + j->code_buffer <<= 1; + --j->code_bits; + return k & 0x80000000; +} + +// given a value that's at position X in the zigzag stream, +// where does it appear in the 8x8 matrix coded as row-major? +static const stbi_uc stbi__jpeg_dezigzag[64+15] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // let corrupt input sample past end + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63 +}; + +// decode one 64-entry block-- +static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant) +{ + int diff,dc,k; + int t; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + t = stbi__jpeg_huff_decode(j, hdc); + if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + + // 0 all the ac values now so we can do it 32-bits at a time + memset(data,0,64*sizeof(data[0])); + + diff = t ? stbi__extend_receive(j, t) : 0; + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc * dequant[0]); + + // decode AC components, see JPEG spec + k = 1; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) * dequant[zig]); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (rs != 0xf0) break; // end block + k += 16; + } else { + k += r; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]); + } + } + } while (k < 64); + return 1; +} + +static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b) +{ + int diff,dc; + int t; + if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + if (j->succ_high == 0) { + // first scan for DC coefficient, must be first + memset(data,0,64*sizeof(data[0])); // 0 all the ac values now + t = stbi__jpeg_huff_decode(j, hdc); + diff = t ? stbi__extend_receive(j, t) : 0; + + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc << j->succ_low); + } else { + // refinement scan for DC coefficient + if (stbi__jpeg_get_bit(j)) + data[0] += (short) (1 << j->succ_low); + } + return 1; +} + +// @OPTIMIZE: store non-zigzagged during the decode passes, +// and only de-zigzag when dequantizing +static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac) +{ + int k; + if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->succ_high == 0) { + int shift = j->succ_low; + + if (j->eob_run) { + --j->eob_run; + return 1; + } + + k = j->spec_start; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) << shift); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r); + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + --j->eob_run; + break; + } + k += 16; + } else { + k += r; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) << shift); + } + } + } while (k <= j->spec_end); + } else { + // refinement scan for these AC coefficients + + short bit = (short) (1 << j->succ_low); + + if (j->eob_run) { + --j->eob_run; + for (k = j->spec_start; k <= j->spec_end; ++k) { + short *p = &data[stbi__jpeg_dezigzag[k]]; + if (*p != 0) + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } + } else { + k = j->spec_start; + do { + int r,s; + int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r) - 1; + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + r = 64; // force end of block + } else { + // r=15 s=0 should write 16 0s, so we just do + // a run of 15 0s and then write s (which is 0), + // so we don't have to do anything special here + } + } else { + if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG"); + // sign bit + if (stbi__jpeg_get_bit(j)) + s = bit; + else + s = -bit; + } + + // advance by r + while (k <= j->spec_end) { + short *p = &data[stbi__jpeg_dezigzag[k++]]; + if (*p != 0) { + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } else { + if (r == 0) { + *p = (short) s; + break; + } + --r; + } + } + } while (k <= j->spec_end); + } + } + return 1; +} + +// take a -128..127 value and stbi__clamp it and convert to 0..255 +stbi_inline static stbi_uc stbi__clamp(int x) +{ + // trick to use a single test to catch both cases + if ((unsigned int) x > 255) { + if (x < 0) return 0; + if (x > 255) return 255; + } + return (stbi_uc) x; +} + +#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5))) +#define stbi__fsh(x) ((x) * 4096) + +// derived from jidctint -- DCT_ISLOW +#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ + int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2+p3) * stbi__f2f(0.5411961f); \ + t2 = p1 + p3*stbi__f2f(-1.847759065f); \ + t3 = p1 + p2*stbi__f2f( 0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = stbi__fsh(p2+p3); \ + t1 = stbi__fsh(p2-p3); \ + x0 = t0+t3; \ + x3 = t0-t3; \ + x1 = t1+t2; \ + x2 = t1-t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0+t2; \ + p4 = t1+t3; \ + p1 = t0+t3; \ + p2 = t1+t2; \ + p5 = (p3+p4)*stbi__f2f( 1.175875602f); \ + t0 = t0*stbi__f2f( 0.298631336f); \ + t1 = t1*stbi__f2f( 2.053119869f); \ + t2 = t2*stbi__f2f( 3.072711026f); \ + t3 = t3*stbi__f2f( 1.501321110f); \ + p1 = p5 + p1*stbi__f2f(-0.899976223f); \ + p2 = p5 + p2*stbi__f2f(-2.562915447f); \ + p3 = p3*stbi__f2f(-1.961570560f); \ + p4 = p4*stbi__f2f(-0.390180644f); \ + t3 += p1+p4; \ + t2 += p2+p3; \ + t1 += p2+p4; \ + t0 += p1+p3; + +static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) +{ + int i,val[64],*v=val; + stbi_uc *o; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0]*4; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + x0 += 65536 + (128<<17); + x1 += 65536 + (128<<17); + x2 += 65536 + (128<<17); + x3 += 65536 + (128<<17); + // tried computing the shifts into temps, or'ing the temps to see + // if any were out of range, but that was slower + o[0] = stbi__clamp((x0+t3) >> 17); + o[7] = stbi__clamp((x0-t3) >> 17); + o[1] = stbi__clamp((x1+t2) >> 17); + o[6] = stbi__clamp((x1-t2) >> 17); + o[2] = stbi__clamp((x2+t1) >> 17); + o[5] = stbi__clamp((x2-t1) >> 17); + o[3] = stbi__clamp((x3+t0) >> 17); + o[4] = stbi__clamp((x3-t0) >> 17); + } +} + +#ifdef STBI_SSE2 +// sse2 integer IDCT. not the fastest possible implementation but it +// produces bit-identical results to the generic C version so it's +// fully "transparent". +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + // This is constructed to match our regular (generic) integer IDCT exactly. + __m128i row0, row1, row2, row3, row4, row5, row6, row7; + __m128i tmp; + + // dot product constant: even elems=x, odd elems=y + #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y)) + + // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit) + // out(1) = c1[even]*x + c1[odd]*y + #define dct_rot(out0,out1, x,y,c0,c1) \ + __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \ + __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \ + __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ + __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ + __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ + __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) + + // out = in << 12 (in 16-bit, out 32-bit) + #define dct_widen(out, in) \ + __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ + __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) + + // wide add + #define dct_wadd(out, a, b) \ + __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_add_epi32(a##_h, b##_h) + + // wide sub + #define dct_wsub(out, a, b) \ + __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) + + // butterfly a/b, add bias, then shift by "s" and pack + #define dct_bfly32o(out0, out1, a,b,bias,s) \ + { \ + __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ + __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ + dct_wadd(sum, abiased, b); \ + dct_wsub(dif, abiased, b); \ + out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ + out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ + } + + // 8-bit interleave step (for transposes) + #define dct_interleave8(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi8(a, b); \ + b = _mm_unpackhi_epi8(tmp, b) + + // 16-bit interleave step (for transposes) + #define dct_interleave16(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi16(a, b); \ + b = _mm_unpackhi_epi16(tmp, b) + + #define dct_pass(bias,shift) \ + { \ + /* even part */ \ + dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \ + __m128i sum04 = _mm_add_epi16(row0, row4); \ + __m128i dif04 = _mm_sub_epi16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ + dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \ + __m128i sum17 = _mm_add_epi16(row1, row7); \ + __m128i sum35 = _mm_add_epi16(row3, row5); \ + dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \ + dct_wadd(x4, y0o, y4o); \ + dct_wadd(x5, y1o, y5o); \ + dct_wadd(x6, y2o, y5o); \ + dct_wadd(x7, y3o, y4o); \ + dct_bfly32o(row0,row7, x0,x7,bias,shift); \ + dct_bfly32o(row1,row6, x1,x6,bias,shift); \ + dct_bfly32o(row2,row5, x2,x5,bias,shift); \ + dct_bfly32o(row3,row4, x3,x4,bias,shift); \ + } + + __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); + __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f)); + __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f)); + __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f)); + __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f)); + __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f)); + __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f)); + __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f)); + + // rounding biases in column/row passes, see stbi__idct_block for explanation. + __m128i bias_0 = _mm_set1_epi32(512); + __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17)); + + // load + row0 = _mm_load_si128((const __m128i *) (data + 0*8)); + row1 = _mm_load_si128((const __m128i *) (data + 1*8)); + row2 = _mm_load_si128((const __m128i *) (data + 2*8)); + row3 = _mm_load_si128((const __m128i *) (data + 3*8)); + row4 = _mm_load_si128((const __m128i *) (data + 4*8)); + row5 = _mm_load_si128((const __m128i *) (data + 5*8)); + row6 = _mm_load_si128((const __m128i *) (data + 6*8)); + row7 = _mm_load_si128((const __m128i *) (data + 7*8)); + + // column pass + dct_pass(bias_0, 10); + + { + // 16bit 8x8 transpose pass 1 + dct_interleave16(row0, row4); + dct_interleave16(row1, row5); + dct_interleave16(row2, row6); + dct_interleave16(row3, row7); + + // transpose pass 2 + dct_interleave16(row0, row2); + dct_interleave16(row1, row3); + dct_interleave16(row4, row6); + dct_interleave16(row5, row7); + + // transpose pass 3 + dct_interleave16(row0, row1); + dct_interleave16(row2, row3); + dct_interleave16(row4, row5); + dct_interleave16(row6, row7); + } + + // row pass + dct_pass(bias_1, 17); + + { + // pack + __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 + __m128i p1 = _mm_packus_epi16(row2, row3); + __m128i p2 = _mm_packus_epi16(row4, row5); + __m128i p3 = _mm_packus_epi16(row6, row7); + + // 8bit 8x8 transpose pass 1 + dct_interleave8(p0, p2); // a0e0a1e1... + dct_interleave8(p1, p3); // c0g0c1g1... + + // transpose pass 2 + dct_interleave8(p0, p1); // a0c0e0g0... + dct_interleave8(p2, p3); // b0d0f0h0... + + // transpose pass 3 + dct_interleave8(p0, p2); // a0b0c0d0... + dct_interleave8(p1, p3); // a4b4c4d4... + + // store + _mm_storel_epi64((__m128i *) out, p0); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p2); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p1); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p3); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e)); + } + +#undef dct_const +#undef dct_rot +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_interleave8 +#undef dct_interleave16 +#undef dct_pass +} + +#endif // STBI_SSE2 + +#ifdef STBI_NEON + +// NEON integer IDCT. should produce bit-identical +// results to the generic C version. +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + int16x8_t row0, row1, row2, row3, row4, row5, row6, row7; + + int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f)); + int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f)); + int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f)); + int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f)); + int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f)); + int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f)); + int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f)); + int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f)); + int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f)); + int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f)); + int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f)); + int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f)); + +#define dct_long_mul(out, inq, coeff) \ + int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff) + +#define dct_long_mac(out, acc, inq, coeff) \ + int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff) + +#define dct_widen(out, inq) \ + int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \ + int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12) + +// wide add +#define dct_wadd(out, a, b) \ + int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vaddq_s32(a##_h, b##_h) + +// wide sub +#define dct_wsub(out, a, b) \ + int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vsubq_s32(a##_h, b##_h) + +// butterfly a/b, then shift using "shiftop" by "s" and pack +#define dct_bfly32o(out0,out1, a,b,shiftop,s) \ + { \ + dct_wadd(sum, a, b); \ + dct_wsub(dif, a, b); \ + out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ + out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ + } + +#define dct_pass(shiftop, shift) \ + { \ + /* even part */ \ + int16x8_t sum26 = vaddq_s16(row2, row6); \ + dct_long_mul(p1e, sum26, rot0_0); \ + dct_long_mac(t2e, p1e, row6, rot0_1); \ + dct_long_mac(t3e, p1e, row2, rot0_2); \ + int16x8_t sum04 = vaddq_s16(row0, row4); \ + int16x8_t dif04 = vsubq_s16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + int16x8_t sum15 = vaddq_s16(row1, row5); \ + int16x8_t sum17 = vaddq_s16(row1, row7); \ + int16x8_t sum35 = vaddq_s16(row3, row5); \ + int16x8_t sum37 = vaddq_s16(row3, row7); \ + int16x8_t sumodd = vaddq_s16(sum17, sum35); \ + dct_long_mul(p5o, sumodd, rot1_0); \ + dct_long_mac(p1o, p5o, sum17, rot1_1); \ + dct_long_mac(p2o, p5o, sum35, rot1_2); \ + dct_long_mul(p3o, sum37, rot2_0); \ + dct_long_mul(p4o, sum15, rot2_1); \ + dct_wadd(sump13o, p1o, p3o); \ + dct_wadd(sump24o, p2o, p4o); \ + dct_wadd(sump23o, p2o, p3o); \ + dct_wadd(sump14o, p1o, p4o); \ + dct_long_mac(x4, sump13o, row7, rot3_0); \ + dct_long_mac(x5, sump24o, row5, rot3_1); \ + dct_long_mac(x6, sump23o, row3, rot3_2); \ + dct_long_mac(x7, sump14o, row1, rot3_3); \ + dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \ + dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \ + dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \ + dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \ + } + + // load + row0 = vld1q_s16(data + 0*8); + row1 = vld1q_s16(data + 1*8); + row2 = vld1q_s16(data + 2*8); + row3 = vld1q_s16(data + 3*8); + row4 = vld1q_s16(data + 4*8); + row5 = vld1q_s16(data + 5*8); + row6 = vld1q_s16(data + 6*8); + row7 = vld1q_s16(data + 7*8); + + // add DC bias + row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0)); + + // column pass + dct_pass(vrshrn_n_s32, 10); + + // 16bit 8x8 transpose + { +// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively. +// whether compilers actually get this is another story, sadly. +#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); } +#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); } + + // pass 1 + dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 + dct_trn16(row2, row3); + dct_trn16(row4, row5); + dct_trn16(row6, row7); + + // pass 2 + dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 + dct_trn32(row1, row3); + dct_trn32(row4, row6); + dct_trn32(row5, row7); + + // pass 3 + dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 + dct_trn64(row1, row5); + dct_trn64(row2, row6); + dct_trn64(row3, row7); + +#undef dct_trn16 +#undef dct_trn32 +#undef dct_trn64 + } + + // row pass + // vrshrn_n_s32 only supports shifts up to 16, we need + // 17. so do a non-rounding shift of 16 first then follow + // up with a rounding shift by 1. + dct_pass(vshrn_n_s32, 16); + + { + // pack and round + uint8x8_t p0 = vqrshrun_n_s16(row0, 1); + uint8x8_t p1 = vqrshrun_n_s16(row1, 1); + uint8x8_t p2 = vqrshrun_n_s16(row2, 1); + uint8x8_t p3 = vqrshrun_n_s16(row3, 1); + uint8x8_t p4 = vqrshrun_n_s16(row4, 1); + uint8x8_t p5 = vqrshrun_n_s16(row5, 1); + uint8x8_t p6 = vqrshrun_n_s16(row6, 1); + uint8x8_t p7 = vqrshrun_n_s16(row7, 1); + + // again, these can translate into one instruction, but often don't. +#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); } +#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); } + + // sadly can't use interleaved stores here since we only write + // 8 bytes to each scan line! + + // 8x8 8-bit transpose pass 1 + dct_trn8_8(p0, p1); + dct_trn8_8(p2, p3); + dct_trn8_8(p4, p5); + dct_trn8_8(p6, p7); + + // pass 2 + dct_trn8_16(p0, p2); + dct_trn8_16(p1, p3); + dct_trn8_16(p4, p6); + dct_trn8_16(p5, p7); + + // pass 3 + dct_trn8_32(p0, p4); + dct_trn8_32(p1, p5); + dct_trn8_32(p2, p6); + dct_trn8_32(p3, p7); + + // store + vst1_u8(out, p0); out += out_stride; + vst1_u8(out, p1); out += out_stride; + vst1_u8(out, p2); out += out_stride; + vst1_u8(out, p3); out += out_stride; + vst1_u8(out, p4); out += out_stride; + vst1_u8(out, p5); out += out_stride; + vst1_u8(out, p6); out += out_stride; + vst1_u8(out, p7); + +#undef dct_trn8_8 +#undef dct_trn8_16 +#undef dct_trn8_32 + } + +#undef dct_long_mul +#undef dct_long_mac +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_pass +} + +#endif // STBI_NEON + +#define STBI__MARKER_none 0xff +// if there's a pending marker from the entropy stream, return that +// otherwise, fetch from the stream and get a marker. if there's no +// marker, return 0xff, which is never a valid marker value +static stbi_uc stbi__get_marker(stbi__jpeg *j) +{ + stbi_uc x; + if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; } + x = stbi__get8(j->s); + if (x != 0xff) return STBI__MARKER_none; + while (x == 0xff) + x = stbi__get8(j->s); // consume repeated 0xff fill bytes + return x; +} + +// in each scan, we'll have scan_n components, and the order +// of the components is specified by order[] +#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) + +// after a restart interval, stbi__jpeg_reset the entropy decoder and +// the dc prediction +static void stbi__jpeg_reset(stbi__jpeg *j) +{ + j->code_bits = 0; + j->code_buffer = 0; + j->nomore = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0; + j->marker = STBI__MARKER_none; + j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; + j->eob_run = 0; + // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, + // since we don't even allow 1<<30 pixels +} + +static int stbi__parse_entropy_coded_data(stbi__jpeg *z) +{ + stbi__jpeg_reset(z); + if (!z->progressive) { + if (z->scan_n == 1) { + int i,j; + STBI_SIMD_ALIGN(short, data[64]); + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + STBI_SIMD_ALIGN(short, data[64]); + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data); + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } else { + if (z->scan_n == 1) { + int i,j; + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + if (z->spec_start == 0) { + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } else { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha])) + return 0; + } + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x); + int y2 = (j*z->img_comp[n].v + y); + short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w); + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } +} + +static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant) +{ + int i; + for (i=0; i < 64; ++i) + data[i] *= dequant[i]; +} + +static void stbi__jpeg_finish(stbi__jpeg *z) +{ + if (z->progressive) { + // dequantize and idct the data + int i,j,n; + for (n=0; n < z->s->img_n; ++n) { + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + } + } + } + } +} + +static int stbi__process_marker(stbi__jpeg *z, int m) +{ + int L; + switch (m) { + case STBI__MARKER_none: // no marker found + return stbi__err("expected marker","Corrupt JPEG"); + + case 0xDD: // DRI - specify restart interval + if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG"); + z->restart_interval = stbi__get16be(z->s); + return 1; + + case 0xDB: // DQT - define quantization table + L = stbi__get16be(z->s)-2; + while (L > 0) { + int q = stbi__get8(z->s); + int p = q >> 4, sixteen = (p != 0); + int t = q & 15,i; + if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); + if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); + + for (i=0; i < 64; ++i) + z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); + L -= (sixteen ? 129 : 65); + } + return L==0; + + case 0xC4: // DHT - define huffman table + L = stbi__get16be(z->s)-2; + while (L > 0) { + stbi_uc *v; + int sizes[16],i,n=0; + int q = stbi__get8(z->s); + int tc = q >> 4; + int th = q & 15; + if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG"); + for (i=0; i < 16; ++i) { + sizes[i] = stbi__get8(z->s); + n += sizes[i]; + } + L -= 17; + if (tc == 0) { + if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; + } else { + if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; + } + for (i=0; i < n; ++i) + v[i] = stbi__get8(z->s); + if (tc != 0) + stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th); + L -= n; + } + return L==0; + } + + // check for comment block or APP blocks + if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { + L = stbi__get16be(z->s); + if (L < 2) { + if (m == 0xFE) + return stbi__err("bad COM len","Corrupt JPEG"); + else + return stbi__err("bad APP len","Corrupt JPEG"); + } + L -= 2; + + if (m == 0xE0 && L >= 5) { // JFIF APP0 segment + static const unsigned char tag[5] = {'J','F','I','F','\0'}; + int ok = 1; + int i; + for (i=0; i < 5; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 5; + if (ok) + z->jfif = 1; + } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment + static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; + int ok = 1; + int i; + for (i=0; i < 6; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 6; + if (ok) { + stbi__get8(z->s); // version + stbi__get16be(z->s); // flags0 + stbi__get16be(z->s); // flags1 + z->app14_color_transform = stbi__get8(z->s); // color transform + L -= 6; + } + } + + stbi__skip(z->s, L); + return 1; + } + + return stbi__err("unknown marker","Corrupt JPEG"); +} + +// after we see SOS +static int stbi__process_scan_header(stbi__jpeg *z) +{ + int i; + int Ls = stbi__get16be(z->s); + z->scan_n = stbi__get8(z->s); + if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG"); + if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG"); + for (i=0; i < z->scan_n; ++i) { + int id = stbi__get8(z->s), which; + int q = stbi__get8(z->s); + for (which = 0; which < z->s->img_n; ++which) + if (z->img_comp[which].id == id) + break; + if (which == z->s->img_n) return 0; // no match + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG"); + z->order[i] = which; + } + + { + int aa; + z->spec_start = stbi__get8(z->s); + z->spec_end = stbi__get8(z->s); // should be 63, but might be 0 + aa = stbi__get8(z->s); + z->succ_high = (aa >> 4); + z->succ_low = (aa & 15); + if (z->progressive) { + if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13) + return stbi__err("bad SOS", "Corrupt JPEG"); + } else { + if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG"); + if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG"); + z->spec_end = 63; + } + } + + return 1; +} + +static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why) +{ + int i; + for (i=0; i < ncomp; ++i) { + if (z->img_comp[i].raw_data) { + STBI_FREE(z->img_comp[i].raw_data); + z->img_comp[i].raw_data = NULL; + z->img_comp[i].data = NULL; + } + if (z->img_comp[i].raw_coeff) { + STBI_FREE(z->img_comp[i].raw_coeff); + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].coeff = 0; + } + if (z->img_comp[i].linebuf) { + STBI_FREE(z->img_comp[i].linebuf); + z->img_comp[i].linebuf = NULL; + } + } + return why; +} + +static int stbi__process_frame_header(stbi__jpeg *z, int scan) +{ + stbi__context *s = z->s; + int Lf,p,i,q, h_max=1,v_max=1,c; + Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG + p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires + c = stbi__get8(s); + if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG"); + s->img_n = c; + for (i=0; i < c; ++i) { + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; + } + + if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG"); + + z->rgb = 0; + for (i=0; i < s->img_n; ++i) { + static const unsigned char rgb[3] = { 'R', 'G', 'B' }; + z->img_comp[i].id = stbi__get8(s); + if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) + ++z->rgb; + q = stbi__get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); + z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG"); + } + + if (scan != STBI__SCAN_load) return 1; + + if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode"); + + for (i=0; i < s->img_n; ++i) { + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; + } + + // compute interleaved mcu info + z->img_h_max = h_max; + z->img_v_max = v_max; + z->img_mcu_w = h_max * 8; + z->img_mcu_h = v_max * 8; + // these sizes can't be more than 17 bits + z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; + z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; + + for (i=0; i < s->img_n; ++i) { + // number of effective pixels (e.g. for non-interleaved MCU) + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; + // to simplify generation, we'll allocate enough memory to decode + // the bogus oversized data from using interleaved MCUs and their + // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't + // discard the extra data until colorspace conversion + // + // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) + // so these muls can't overflow with 32-bit ints (which we require) + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].coeff = 0; + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].linebuf = NULL; + z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); + if (z->img_comp[i].raw_data == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + // align blocks for idct using mmx/sse + z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + if (z->progressive) { + // w2, h2 are multiples of 8 (see above) + z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; + z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; + z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); + if (z->img_comp[i].raw_coeff == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); + } + } + + return 1; +} + +// use comparisons since in some cases we handle more than one case (e.g. SOF) +#define stbi__DNL(x) ((x) == 0xdc) +#define stbi__SOI(x) ((x) == 0xd8) +#define stbi__EOI(x) ((x) == 0xd9) +#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2) +#define stbi__SOS(x) ((x) == 0xda) + +#define stbi__SOF_progressive(x) ((x) == 0xc2) + +static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) +{ + int m; + z->jfif = 0; + z->app14_color_transform = -1; // valid values are 0,1,2 + z->marker = STBI__MARKER_none; // initialize cached marker to empty + m = stbi__get_marker(z); + if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG"); + if (scan == STBI__SCAN_type) return 1; + m = stbi__get_marker(z); + while (!stbi__SOF(m)) { + if (!stbi__process_marker(z,m)) return 0; + m = stbi__get_marker(z); + while (m == STBI__MARKER_none) { + // some files have extra padding after their blocks, so ok, we'll scan + if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG"); + m = stbi__get_marker(z); + } + } + z->progressive = stbi__SOF_progressive(m); + if (!stbi__process_frame_header(z, scan)) return 0; + return 1; +} + +// decode image to YCbCr format +static int stbi__decode_jpeg_image(stbi__jpeg *j) +{ + int m; + for (m = 0; m < 4; m++) { + j->img_comp[m].raw_data = NULL; + j->img_comp[m].raw_coeff = NULL; + } + j->restart_interval = 0; + if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0; + m = stbi__get_marker(j); + while (!stbi__EOI(m)) { + if (stbi__SOS(m)) { + if (!stbi__process_scan_header(j)) return 0; + if (!stbi__parse_entropy_coded_data(j)) return 0; + if (j->marker == STBI__MARKER_none ) { + // handle 0s at the end of image data from IP Kamera 9060 + while (!stbi__at_eof(j->s)) { + int x = stbi__get8(j->s); + if (x == 255) { + j->marker = stbi__get8(j->s); + break; + } + } + // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 + } + } else if (stbi__DNL(m)) { + int Ld = stbi__get16be(j->s); + stbi__uint32 NL = stbi__get16be(j->s); + if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); + if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); + } else { + if (!stbi__process_marker(j, m)) return 0; + } + m = stbi__get_marker(j); + } + if (j->progressive) + stbi__jpeg_finish(j); + return 1; +} + +// static jfif-centered resampling (across block boundaries) + +typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1, + int w, int hs); + +#define stbi__div4(x) ((stbi_uc) ((x) >> 2)) + +static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + STBI_NOTUSED(out); + STBI_NOTUSED(in_far); + STBI_NOTUSED(w); + STBI_NOTUSED(hs); + return in_near; +} + +static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples vertically for every one in input + int i; + STBI_NOTUSED(hs); + for (i=0; i < w; ++i) + out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2); + return out; +} + +static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples horizontally for every one in input + int i; + stbi_uc *input = in_near; + + if (w == 1) { + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; + } + + out[0] = input[0]; + out[1] = stbi__div4(input[0]*3 + input[1] + 2); + for (i=1; i < w-1; ++i) { + int n = 3*input[i]+2; + out[i*2+0] = stbi__div4(n+input[i-1]); + out[i*2+1] = stbi__div4(n+input[i+1]); + } + out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2); + out[i*2+1] = input[w-1]; + + STBI_NOTUSED(in_far); + STBI_NOTUSED(hs); + + return out; +} + +#define stbi__div16(x) ((stbi_uc) ((x) >> 4)) + +static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i,t0,t1; + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + out[0] = stbi__div4(t1+2); + for (i=1; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i=0,t0,t1; + + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + // process groups of 8 pixels for as long as we can. + // note we can't handle the last pixel in a row in this loop + // because we need to handle the filter boundary conditions. + for (; i < ((w-1) & ~7); i += 8) { +#if defined(STBI_SSE2) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + __m128i zero = _mm_setzero_si128(); + __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i)); + __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i)); + __m128i farw = _mm_unpacklo_epi8(farb, zero); + __m128i nearw = _mm_unpacklo_epi8(nearb, zero); + __m128i diff = _mm_sub_epi16(farw, nearw); + __m128i nears = _mm_slli_epi16(nearw, 2); + __m128i curr = _mm_add_epi16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + __m128i prv0 = _mm_slli_si128(curr, 2); + __m128i nxt0 = _mm_srli_si128(curr, 2); + __m128i prev = _mm_insert_epi16(prv0, t1, 0); + __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + __m128i bias = _mm_set1_epi16(8); + __m128i curs = _mm_slli_epi16(curr, 2); + __m128i prvd = _mm_sub_epi16(prev, curr); + __m128i nxtd = _mm_sub_epi16(next, curr); + __m128i curb = _mm_add_epi16(curs, bias); + __m128i even = _mm_add_epi16(prvd, curb); + __m128i odd = _mm_add_epi16(nxtd, curb); + + // interleave even and odd pixels, then undo scaling. + __m128i int0 = _mm_unpacklo_epi16(even, odd); + __m128i int1 = _mm_unpackhi_epi16(even, odd); + __m128i de0 = _mm_srli_epi16(int0, 4); + __m128i de1 = _mm_srli_epi16(int1, 4); + + // pack and write output + __m128i outv = _mm_packus_epi16(de0, de1); + _mm_storeu_si128((__m128i *) (out + i*2), outv); +#elif defined(STBI_NEON) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + uint8x8_t farb = vld1_u8(in_far + i); + uint8x8_t nearb = vld1_u8(in_near + i); + int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb)); + int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2)); + int16x8_t curr = vaddq_s16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + int16x8_t prv0 = vextq_s16(curr, curr, 7); + int16x8_t nxt0 = vextq_s16(curr, curr, 1); + int16x8_t prev = vsetq_lane_s16(t1, prv0, 0); + int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + int16x8_t curs = vshlq_n_s16(curr, 2); + int16x8_t prvd = vsubq_s16(prev, curr); + int16x8_t nxtd = vsubq_s16(next, curr); + int16x8_t even = vaddq_s16(curs, prvd); + int16x8_t odd = vaddq_s16(curs, nxtd); + + // undo scaling and round, then store with even/odd phases interleaved + uint8x8x2_t o; + o.val[0] = vqrshrun_n_s16(even, 4); + o.val[1] = vqrshrun_n_s16(odd, 4); + vst2_u8(out + i*2, o); +#endif + + // "previous" value for next iter + t1 = 3*in_near[i+7] + in_far[i+7]; + } + + t0 = t1; + t1 = 3*in_near[i] + in_far[i]; + out[i*2] = stbi__div16(3*t1 + t0 + 8); + + for (++i; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} +#endif + +static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // resample with nearest-neighbor + int i,j; + STBI_NOTUSED(in_far); + for (i=0; i < w; ++i) + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; + return out; +} + +// this is a reduced-precision calculation of YCbCr-to-RGB introduced +// to make sure the code produces the same results in both SIMD and scalar +#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8) +static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step) +{ + int i; + for (i=0; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step) +{ + int i = 0; + +#ifdef STBI_SSE2 + // step == 3 is pretty ugly on the final interleave, and i'm not convinced + // it's useful in practice (you wouldn't use it for textures, for example). + // so just accelerate step == 4 case. + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + __m128i signflip = _mm_set1_epi8(-0x80); + __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f)); + __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); + __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); + __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f)); + __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128); + __m128i xw = _mm_set1_epi16(255); // alpha channel + + for (; i+7 < count; i += 8) { + // load + __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i)); + __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i)); + __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i)); + __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 + __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 + + // unpack to short (and left-shift cr, cb by 8) + __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes); + __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); + __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); + + // color transform + __m128i yws = _mm_srli_epi16(yw, 4); + __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); + __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); + __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); + __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); + __m128i rws = _mm_add_epi16(cr0, yws); + __m128i gwt = _mm_add_epi16(cb0, yws); + __m128i bws = _mm_add_epi16(yws, cb1); + __m128i gws = _mm_add_epi16(gwt, cr1); + + // descale + __m128i rw = _mm_srai_epi16(rws, 4); + __m128i bw = _mm_srai_epi16(bws, 4); + __m128i gw = _mm_srai_epi16(gws, 4); + + // back to byte, set up for transpose + __m128i brb = _mm_packus_epi16(rw, bw); + __m128i gxb = _mm_packus_epi16(gw, xw); + + // transpose to interleave channels + __m128i t0 = _mm_unpacklo_epi8(brb, gxb); + __m128i t1 = _mm_unpackhi_epi8(brb, gxb); + __m128i o0 = _mm_unpacklo_epi16(t0, t1); + __m128i o1 = _mm_unpackhi_epi16(t0, t1); + + // store + _mm_storeu_si128((__m128i *) (out + 0), o0); + _mm_storeu_si128((__m128i *) (out + 16), o1); + out += 32; + } + } +#endif + +#ifdef STBI_NEON + // in this version, step=3 support would be easy to add. but is there demand? + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + uint8x8_t signflip = vdup_n_u8(0x80); + int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f)); + int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f)); + int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f)); + int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f)); + + for (; i+7 < count; i += 8) { + // load + uint8x8_t y_bytes = vld1_u8(y + i); + uint8x8_t cr_bytes = vld1_u8(pcr + i); + uint8x8_t cb_bytes = vld1_u8(pcb + i); + int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); + int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); + + // expand to s16 + int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); + int16x8_t crw = vshll_n_s8(cr_biased, 7); + int16x8_t cbw = vshll_n_s8(cb_biased, 7); + + // color transform + int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); + int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); + int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); + int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); + int16x8_t rws = vaddq_s16(yws, cr0); + int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); + int16x8_t bws = vaddq_s16(yws, cb1); + + // undo scaling, round, convert to byte + uint8x8x4_t o; + o.val[0] = vqrshrun_n_s16(rws, 4); + o.val[1] = vqrshrun_n_s16(gws, 4); + o.val[2] = vqrshrun_n_s16(bws, 4); + o.val[3] = vdup_n_u8(255); + + // store, interleaving r/g/b/a + vst4_u8(out, o); + out += 8*4; + } + } +#endif + + for (; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} +#endif + +// set up the kernels +static void stbi__setup_jpeg(stbi__jpeg *j) +{ + j->idct_block_kernel = stbi__idct_block; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2; + +#ifdef STBI_SSE2 + if (stbi__sse2_available()) { + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; + } +#endif + +#ifdef STBI_NEON + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; +#endif +} + +// clean up the temporary component buffers +static void stbi__cleanup_jpeg(stbi__jpeg *j) +{ + stbi__free_jpeg_components(j, j->s->img_n, 0); +} + +typedef struct +{ + resample_row_func resample; + stbi_uc *line0,*line1; + int hs,vs; // expansion factor in each axis + int w_lores; // horizontal pixels pre-expansion + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on +} stbi__resample; + +// fast 0..255 * 0..255 => 0..255 rounded multiplication +static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) +{ + unsigned int t = x*y + 128; + return (stbi_uc) ((t + (t >>8)) >> 8); +} + +static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ + int n, decode_n, is_rgb; + z->s->img_n = 0; // make stbi__cleanup_jpeg safe + + // validate req_comp + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + + // load a jpeg image from whichever source, but leave in YCbCr format + if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; } + + // determine actual number of components to generate + n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1; + + is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif)); + + if (z->s->img_n == 3 && n < 3 && !is_rgb) + decode_n = 1; + else + decode_n = z->s->img_n; + + // resample and color-convert + { + int k; + unsigned int i,j; + stbi_uc *output; + stbi_uc *coutput[4]; + + stbi__resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3); + if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s->img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel; + else r->resample = stbi__resample_row_generic; + } + + // can't error after this so, this is safe + output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); + if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s->img_y; ++j) { + stbi_uc *out = output + n * z->s->img_x * j; + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; + } + } + if (n >= 3) { + stbi_uc *y = coutput[0]; + if (z->s->img_n == 3) { + if (is_rgb) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = y[i]; + out[1] = coutput[1][i]; + out[2] = coutput[2][i]; + out[3] = 255; + out += n; + } + } else { + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else if (z->s->img_n == 4) { + if (z->app14_color_transform == 0) { // CMYK + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(coutput[0][i], m); + out[1] = stbi__blinn_8x8(coutput[1][i], m); + out[2] = stbi__blinn_8x8(coutput[2][i], m); + out[3] = 255; + out += n; + } + } else if (z->app14_color_transform == 2) { // YCCK + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(255 - out[0], m); + out[1] = stbi__blinn_8x8(255 - out[1], m); + out[2] = stbi__blinn_8x8(255 - out[2], m); + out += n; + } + } else { // YCbCr + alpha? Ignore the fourth channel for now + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else + for (i=0; i < z->s->img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + if (is_rgb) { + if (n == 1) + for (i=0; i < z->s->img_x; ++i) + *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + else { + for (i=0; i < z->s->img_x; ++i, out += 2) { + out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + out[1] = 255; + } + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); + stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); + stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); + out[0] = stbi__compute_y(r, g, b); + out[1] = 255; + out += n; + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); + out[1] = 255; + out += n; + } + } else { + stbi_uc *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255; + } + } + } + stbi__cleanup_jpeg(z); + *out_x = z->s->img_x; + *out_y = z->s->img_y; + if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output + return output; + } +} + +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + unsigned char* result; + stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg)); + STBI_NOTUSED(ri); + j->s = s; + stbi__setup_jpeg(j); + result = load_jpeg_image(j, x,y,comp,req_comp); + STBI_FREE(j); + return result; +} + +static int stbi__jpeg_test(stbi__context *s) +{ + int r; + stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg)); + j->s = s; + stbi__setup_jpeg(j); + r = stbi__decode_jpeg_header(j, STBI__SCAN_type); + stbi__rewind(s); + STBI_FREE(j); + return r; +} + +static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) +{ + if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) { + stbi__rewind( j->s ); + return 0; + } + if (x) *x = j->s->img_x; + if (y) *y = j->s->img_y; + if (comp) *comp = j->s->img_n >= 3 ? 3 : 1; + return 1; +} + +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) +{ + int result; + stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg))); + j->s = s; + result = stbi__jpeg_info_raw(j, x, y, comp); + STBI_FREE(j); + return result; +} +#endif + +// public domain zlib decode v0.2 Sean Barrett 2006-11-18 +// simple implementation +// - all input must be provided in an upfront buffer +// - all output is written to a single output buffer (can malloc/realloc) +// performance +// - fast huffman + +#ifndef STBI_NO_ZLIB + +// fast-way is faster to check than jpeg huffman, but slow way is slower +#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables +#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1) + +// zlib-style huffman encoding +// (jpegs packs from left, zlib from right, so can't share code) +typedef struct +{ + stbi__uint16 fast[1 << STBI__ZFAST_BITS]; + stbi__uint16 firstcode[16]; + int maxcode[17]; + stbi__uint16 firstsymbol[16]; + stbi_uc size[288]; + stbi__uint16 value[288]; +} stbi__zhuffman; + +stbi_inline static int stbi__bitreverse16(int n) +{ + n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); + n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); + n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); + n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); + return n; +} + +stbi_inline static int stbi__bit_reverse(int v, int bits) +{ + STBI_ASSERT(bits <= 16); + // to bit reverse n bits, reverse 16 and shift + // e.g. 11 bits, bit reverse and shift away 5 + return stbi__bitreverse16(v) >> (16-bits); +} + +static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num) +{ + int i,k=0; + int code, next_code[16], sizes[17]; + + // DEFLATE spec for generating codes + memset(sizes, 0, sizeof(sizes)); + memset(z->fast, 0, sizeof(z->fast)); + for (i=0; i < num; ++i) + ++sizes[sizelist[i]]; + sizes[0] = 0; + for (i=1; i < 16; ++i) + if (sizes[i] > (1 << i)) + return stbi__err("bad sizes", "Corrupt PNG"); + code = 0; + for (i=1; i < 16; ++i) { + next_code[i] = code; + z->firstcode[i] = (stbi__uint16) code; + z->firstsymbol[i] = (stbi__uint16) k; + code = (code + sizes[i]); + if (sizes[i]) + if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG"); + z->maxcode[i] = code << (16-i); // preshift for inner loop + code <<= 1; + k += sizes[i]; + } + z->maxcode[16] = 0x10000; // sentinel + for (i=0; i < num; ++i) { + int s = sizelist[i]; + if (s) { + int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); + z->size [c] = (stbi_uc ) s; + z->value[c] = (stbi__uint16) i; + if (s <= STBI__ZFAST_BITS) { + int j = stbi__bit_reverse(next_code[s],s); + while (j < (1 << STBI__ZFAST_BITS)) { + z->fast[j] = fastv; + j += (1 << s); + } + } + ++next_code[s]; + } + } + return 1; +} + +// zlib-from-memory implementation for PNG reading +// because PNG allows splitting the zlib stream arbitrarily, +// and it's annoying structurally to have PNG call ZLIB call PNG, +// we require PNG read all the IDATs and combine them into a single +// memory buffer + +typedef struct +{ + stbi_uc *zbuffer, *zbuffer_end; + int num_bits; + stbi__uint32 code_buffer; + + char *zout; + char *zout_start; + char *zout_end; + int z_expandable; + + stbi__zhuffman z_length, z_distance; +} stbi__zbuf; + +stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z) +{ + if (z->zbuffer >= z->zbuffer_end) return 0; + return *z->zbuffer++; +} + +static void stbi__fill_bits(stbi__zbuf *z) +{ + do { + STBI_ASSERT(z->code_buffer < (1U << z->num_bits)); + z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; + z->num_bits += 8; + } while (z->num_bits <= 24); +} + +stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n) +{ + unsigned int k; + if (z->num_bits < n) stbi__fill_bits(z); + k = z->code_buffer & ((1 << n) - 1); + z->code_buffer >>= n; + z->num_bits -= n; + return k; +} + +static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s,k; + // not resolved by fast table, so compute it the slow way + // use jpeg approach, which requires MSbits at top + k = stbi__bit_reverse(a->code_buffer, 16); + for (s=STBI__ZFAST_BITS+1; ; ++s) + if (k < z->maxcode[s]) + break; + if (s == 16) return -1; // invalid code! + // code size is s, so: + b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; + STBI_ASSERT(z->size[b] == s); + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; +} + +stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s; + if (a->num_bits < 16) stbi__fill_bits(a); + b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; + if (b) { + s = b >> 9; + a->code_buffer >>= s; + a->num_bits -= s; + return b & 511; + } + return stbi__zhuffman_decode_slowpath(a, z); +} + +static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes +{ + char *q; + int cur, limit, old_limit; + z->zout = zout; + if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); + cur = (int) (z->zout - z->zout_start); + limit = old_limit = (int) (z->zout_end - z->zout_start); + while (cur + n > limit) + limit *= 2; + q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); + STBI_NOTUSED(old_limit); + if (q == NULL) return stbi__err("outofmem", "Out of memory"); + z->zout_start = q; + z->zout = q + cur; + z->zout_end = q + limit; + return 1; +} + +static const int stbi__zlength_base[31] = { + 3,4,5,6,7,8,9,10,11,13, + 15,17,19,23,27,31,35,43,51,59, + 67,83,99,115,131,163,195,227,258,0,0 }; + +static const int stbi__zlength_extra[31]= +{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + +static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + +static const int stbi__zdist_extra[32] = +{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int stbi__parse_huffman_block(stbi__zbuf *a) +{ + char *zout = a->zout; + for(;;) { + int z = stbi__zhuffman_decode(a, &a->z_length); + if (z < 256) { + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes + if (zout >= a->zout_end) { + if (!stbi__zexpand(a, zout, 1)) return 0; + zout = a->zout; + } + *zout++ = (char) z; + } else { + stbi_uc *p; + int len,dist; + if (z == 256) { + a->zout = zout; + return 1; + } + z -= 257; + len = stbi__zlength_base[z]; + if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); + z = stbi__zhuffman_decode(a, &a->z_distance); + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); + dist = stbi__zdist_base[z]; + if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); + if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); + if (zout + len > a->zout_end) { + if (!stbi__zexpand(a, zout, len)) return 0; + zout = a->zout; + } + p = (stbi_uc *) (zout - dist); + if (dist == 1) { // run of one byte; common in images. + stbi_uc v = *p; + if (len) { do *zout++ = v; while (--len); } + } else { + if (len) { do *zout++ = *p++; while (--len); } + } + } + } +} + +static int stbi__compute_huffman_codes(stbi__zbuf *a) +{ + static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + stbi__zhuffman z_codelength; + stbi_uc lencodes[286+32+137];//padding for maximum single op + stbi_uc codelength_sizes[19]; + int i,n; + + int hlit = stbi__zreceive(a,5) + 257; + int hdist = stbi__zreceive(a,5) + 1; + int hclen = stbi__zreceive(a,4) + 4; + int ntot = hlit + hdist; + + memset(codelength_sizes, 0, sizeof(codelength_sizes)); + for (i=0; i < hclen; ++i) { + int s = stbi__zreceive(a,3); + codelength_sizes[length_dezigzag[i]] = (stbi_uc) s; + } + if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; + + n = 0; + while (n < ntot) { + int c = stbi__zhuffman_decode(a, &z_codelength); + if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); + if (c < 16) + lencodes[n++] = (stbi_uc) c; + else { + stbi_uc fill = 0; + if (c == 16) { + c = stbi__zreceive(a,2)+3; + if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); + fill = lencodes[n-1]; + } else if (c == 17) + c = stbi__zreceive(a,3)+3; + else { + STBI_ASSERT(c == 18); + c = stbi__zreceive(a,7)+11; + } + if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); + memset(lencodes+n, fill, c); + n += c; + } + } + if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG"); + if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; + return 1; +} + +static int stbi__parse_uncompressed_block(stbi__zbuf *a) +{ + stbi_uc header[4]; + int len,nlen,k; + if (a->num_bits & 7) + stbi__zreceive(a, a->num_bits & 7); // discard + // drain the bit-packed data into header + k = 0; + while (a->num_bits > 0) { + header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check + a->code_buffer >>= 8; + a->num_bits -= 8; + } + STBI_ASSERT(a->num_bits == 0); + // now fill header the normal way + while (k < 4) + header[k++] = stbi__zget8(a); + len = header[1] * 256 + header[0]; + nlen = header[3] * 256 + header[2]; + if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG"); + if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG"); + if (a->zout + len > a->zout_end) + if (!stbi__zexpand(a, a->zout, len)) return 0; + memcpy(a->zout, a->zbuffer, len); + a->zbuffer += len; + a->zout += len; + return 1; +} + +static int stbi__parse_zlib_header(stbi__zbuf *a) +{ + int cmf = stbi__zget8(a); + int cm = cmf & 15; + /* int cinfo = cmf >> 4; */ + int flg = stbi__zget8(a); + if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec + if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png + if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png + // window = 1 << (8 + cinfo)... but who cares, we fully buffer output + return 1; +} + +static const stbi_uc stbi__zdefault_length[288] = +{ + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8 +}; +static const stbi_uc stbi__zdefault_distance[32] = +{ + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 +}; +/* +Init algorithm: +{ + int i; // use <= to match clearly with spec + for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8; + for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9; + for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7; + for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8; + + for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5; +} +*/ + +static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) +{ + int final, type; + if (parse_header) + if (!stbi__parse_zlib_header(a)) return 0; + a->num_bits = 0; + a->code_buffer = 0; + do { + final = stbi__zreceive(a,1); + type = stbi__zreceive(a,2); + if (type == 0) { + if (!stbi__parse_uncompressed_block(a)) return 0; + } else if (type == 3) { + return 0; + } else { + if (type == 1) { + // use fixed code lengths + if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; + } else { + if (!stbi__compute_huffman_codes(a)) return 0; + } + if (!stbi__parse_huffman_block(a)) return 0; + } + } while (!final); + return 1; +} + +static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header) +{ + a->zout_start = obuf; + a->zout = obuf; + a->zout_end = obuf + olen; + a->z_expandable = exp; + + return stbi__parse_zlib(a, parse_header); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, 1)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) +{ + return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 1)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(16384); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer+len; + if (stbi__do_zlib(&a, p, 16384, 1, 0)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 0)) + return (int) (a.zout - a.zout_start); + else + return -1; +} +#endif + +// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 +// simple implementation +// - only 8-bit samples +// - no CRC checking +// - allocates lots of intermediate memory +// - avoids problem of streaming data between subsystems +// - avoids explicit window management +// performance +// - uses stb_zlib, a PD zlib implementation with fast huffman decoding + +#ifndef STBI_NO_PNG +typedef struct +{ + stbi__uint32 length; + stbi__uint32 type; +} stbi__pngchunk; + +static stbi__pngchunk stbi__get_chunk_header(stbi__context *s) +{ + stbi__pngchunk c; + c.length = stbi__get32be(s); + c.type = stbi__get32be(s); + return c; +} + +static int stbi__check_png_header(stbi__context *s) +{ + static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; + int i; + for (i=0; i < 8; ++i) + if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); + return 1; +} + +typedef struct +{ + stbi__context *s; + stbi_uc *idata, *expanded, *out; + int depth; +} stbi__png; + + +enum { + STBI__F_none=0, + STBI__F_sub=1, + STBI__F_up=2, + STBI__F_avg=3, + STBI__F_paeth=4, + // synthetic filters used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static stbi_uc first_row_filter[5] = +{ + STBI__F_none, + STBI__F_sub, + STBI__F_none, + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static int stbi__paeth(int a, int b, int c) +{ + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; +} + +static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; + +// create the png data from post-deflated data +static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) +{ + int bytes = (depth == 16? 2 : 1); + stbi__context *s = a->s; + stbi__uint32 i,j,stride = x*out_n*bytes; + stbi__uint32 img_len, img_width_bytes; + int k; + int img_n = s->img_n; // copy it into a local for later + + int output_bytes = out_n*bytes; + int filter_bytes = img_n*bytes; + int width = x; + + STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1); + a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into + if (!a->out) return stbi__err("outofmem", "Out of memory"); + + if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); + img_width_bytes = (((img_n * x * depth) + 7) >> 3); + img_len = (img_width_bytes + 1) * y; + + // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, + // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros), + // so just check for raw_len < img_len always. + if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *prior; + int filter = *raw++; + + if (filter > 4) + return stbi__err("invalid filter","Corrupt PNG"); + + if (depth < 8) { + STBI_ASSERT(img_width_bytes <= x); + cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place + filter_bytes = 1; + width = img_width_bytes; + } + prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above + + // if first row, use special filter that doesn't sample previous row + if (j == 0) filter = first_row_filter[filter]; + + // handle first byte explicitly + for (k=0; k < filter_bytes; ++k) { + switch (filter) { + case STBI__F_none : cur[k] = raw[k]; break; + case STBI__F_sub : cur[k] = raw[k]; break; + case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; + case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; + case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; + case STBI__F_avg_first : cur[k] = raw[k]; break; + case STBI__F_paeth_first: cur[k] = raw[k]; break; + } + } + + if (depth == 8) { + if (img_n != out_n) + cur[img_n] = 255; // first pixel + raw += img_n; + cur += out_n; + prior += out_n; + } else if (depth == 16) { + if (img_n != out_n) { + cur[filter_bytes] = 255; // first pixel top byte + cur[filter_bytes+1] = 255; // first pixel bottom byte + } + raw += filter_bytes; + cur += output_bytes; + prior += output_bytes; + } else { + raw += 1; + cur += 1; + prior += 1; + } + + // this is a little gross, so that we don't switch per-pixel or per-component + if (depth < 8 || img_n == out_n) { + int nk = (width - 1)*filter_bytes; + #define STBI__CASE(f) \ + case f: \ + for (k=0; k < nk; ++k) + switch (filter) { + // "none" filter turns into a memcpy here; make that explicit. + case STBI__F_none: memcpy(cur, raw, nk); break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; + } + #undef STBI__CASE + raw += nk; + } else { + STBI_ASSERT(img_n+1 == out_n); + #define STBI__CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ + for (k=0; k < filter_bytes; ++k) + switch (filter) { + STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; + } + #undef STBI__CASE + + // the loop above sets the high byte of the pixels' alpha, but for + // 16 bit png files we also need the low byte set. we'll do that here. + if (depth == 16) { + cur = a->out + stride*j; // start at the beginning of the row again + for (i=0; i < x; ++i,cur+=output_bytes) { + cur[filter_bytes+1] = 255; + } + } + } + } + + // we make a separate pass to expand bits to pixels; for performance, + // this could run two scanlines behind the above code, so it won't + // intefere with filtering but will still be in the cache. + if (depth < 8) { + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; + // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit + // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop + stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + + // note that the final byte might overshoot and write more data than desired. + // we can allocate enough data that this never writes out of memory, but it + // could also overwrite the next scanline. can it overwrite non-empty data + // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. + // so we need to explicitly clamp the final ones + + if (depth == 4) { + for (k=x*img_n; k >= 2; k-=2, ++in) { + *cur++ = scale * ((*in >> 4) ); + *cur++ = scale * ((*in ) & 0x0f); + } + if (k > 0) *cur++ = scale * ((*in >> 4) ); + } else if (depth == 2) { + for (k=x*img_n; k >= 4; k-=4, ++in) { + *cur++ = scale * ((*in >> 6) ); + *cur++ = scale * ((*in >> 4) & 0x03); + *cur++ = scale * ((*in >> 2) & 0x03); + *cur++ = scale * ((*in ) & 0x03); + } + if (k > 0) *cur++ = scale * ((*in >> 6) ); + if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); + if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); + } else if (depth == 1) { + for (k=x*img_n; k >= 8; k-=8, ++in) { + *cur++ = scale * ((*in >> 7) ); + *cur++ = scale * ((*in >> 6) & 0x01); + *cur++ = scale * ((*in >> 5) & 0x01); + *cur++ = scale * ((*in >> 4) & 0x01); + *cur++ = scale * ((*in >> 3) & 0x01); + *cur++ = scale * ((*in >> 2) & 0x01); + *cur++ = scale * ((*in >> 1) & 0x01); + *cur++ = scale * ((*in ) & 0x01); + } + if (k > 0) *cur++ = scale * ((*in >> 7) ); + if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); + if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); + if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); + if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); + if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); + if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); + } + if (img_n != out_n) { + int q; + // insert alpha = 255 + cur = a->out + stride*j; + if (img_n == 1) { + for (q=x-1; q >= 0; --q) { + cur[q*2+1] = 255; + cur[q*2+0] = cur[q]; + } + } else { + STBI_ASSERT(img_n == 3); + for (q=x-1; q >= 0; --q) { + cur[q*4+3] = 255; + cur[q*4+2] = cur[q*3+2]; + cur[q*4+1] = cur[q*3+1]; + cur[q*4+0] = cur[q*3+0]; + } + } + } + } + } else if (depth == 16) { + // force the image data from big-endian to platform-native. + // this is done in a separate pass due to the decoding relying + // on the data being untouched, but could probably be done + // per-line during decode if care is taken. + stbi_uc *cur = a->out; + stbi__uint16 *cur16 = (stbi__uint16*)cur; + + for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { + *cur16 = (cur[0] << 8) | cur[1]; + } + } + + return 1; +} + +static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) +{ + int bytes = (depth == 16 ? 2 : 1); + int out_bytes = out_n * bytes; + stbi_uc *final; + int p; + if (!interlaced) + return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); + + // de-interlacing + final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); + for (p=0; p < 7; ++p) { + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y; + if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) { + STBI_FREE(final); + return 0; + } + for (j=0; j < y; ++j) { + for (i=0; i < x; ++i) { + int out_y = j*yspc[p]+yorig[p]; + int out_x = i*xspc[p]+xorig[p]; + memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, + a->out + (j*x+i)*out_bytes, out_bytes); + } + } + STBI_FREE(a->out); + image_data += img_len; + image_data_len -= img_len; + } + } + a->out = final; + + return 1; +} + +static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + // compute color-based transparency, assuming we've + // already got 255 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i=0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 255); + p += 2; + } + } else { + for (i=0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi__uint16 *p = (stbi__uint16*) z->out; + + // compute color-based transparency, assuming we've + // already got 65535 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i = 0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 65535); + p += 2; + } + } else { + for (i = 0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n) +{ + stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y; + stbi_uc *p, *temp_out, *orig = a->out; + + p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0); + if (p == NULL) return stbi__err("outofmem", "Out of memory"); + + // between here and free(out) below, exitting would leak + temp_out = p; + + if (pal_img_n == 3) { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p += 3; + } + } else { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p[3] = palette[n+3]; + p += 4; + } + } + STBI_FREE(a->out); + a->out = temp_out; + + STBI_NOTUSED(len); + + return 1; +} + +static int stbi__unpremultiply_on_load = 0; +static int stbi__de_iphone_flag = 0; + +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) +{ + stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply; +} + +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) +{ + stbi__de_iphone_flag = flag_true_if_should_convert; +} + +static void stbi__de_iphone(stbi__png *z) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + if (s->img_out_n == 3) { // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 3; + } + } else { + STBI_ASSERT(s->img_out_n == 4); + if (stbi__unpremultiply_on_load) { + // convert bgr to rgb and unpremultiply + for (i=0; i < pixel_count; ++i) { + stbi_uc a = p[3]; + stbi_uc t = p[0]; + if (a) { + stbi_uc half = a / 2; + p[0] = (p[2] * 255 + half) / a; + p[1] = (p[1] * 255 + half) / a; + p[2] = ( t * 255 + half) / a; + } else { + p[0] = p[2]; + p[2] = t; + } + p += 4; + } + } else { + // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 4; + } + } + } +} + +#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d)) + +static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) +{ + stbi_uc palette[1024], pal_img_n=0; + stbi_uc has_trans=0, tc[3]; + stbi__uint16 tc16[3]; + stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0; + int first=1,k,interlace=0, color=0, is_iphone=0; + stbi__context *s = z->s; + + z->expanded = NULL; + z->idata = NULL; + z->out = NULL; + + if (!stbi__check_png_header(s)) return 0; + + if (scan == STBI__SCAN_type) return 1; + + for (;;) { + stbi__pngchunk c = stbi__get_chunk_header(s); + switch (c.type) { + case STBI__PNG_TYPE('C','g','B','I'): + is_iphone = 1; + stbi__skip(s, c.length); + break; + case STBI__PNG_TYPE('I','H','D','R'): { + int comp,filter; + if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); + first = 0; + if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); + s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); + s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); + z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); + color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); + comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG"); + filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG"); + interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG"); + if (!pal_img_n) { + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); + if (scan == STBI__SCAN_header) return 1; + } else { + // if paletted, then pal_n is our final components, and + // img_n is # components to decompress/filter. + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); + // if SCAN_header, have to scan to see if we have a tRNS + } + break; + } + + case STBI__PNG_TYPE('P','L','T','E'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG"); + pal_len = c.length / 3; + if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG"); + for (i=0; i < pal_len; ++i) { + palette[i*4+0] = stbi__get8(s); + palette[i*4+1] = stbi__get8(s); + palette[i*4+2] = stbi__get8(s); + palette[i*4+3] = 255; + } + break; + } + + case STBI__PNG_TYPE('t','R','N','S'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG"); + if (pal_img_n) { + if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; } + if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG"); + if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG"); + pal_img_n = 4; + for (i=0; i < c.length; ++i) + palette[i*4+3] = stbi__get8(s); + } else { + if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); + if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); + has_trans = 1; + if (z->depth == 16) { + for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is + } else { + for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger + } + } + break; + } + + case STBI__PNG_TYPE('I','D','A','T'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); + if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; } + if ((int)(ioff + c.length) < (int)ioff) return 0; + if (ioff + c.length > idata_limit) { + stbi__uint32 idata_limit_old = idata_limit; + stbi_uc *p; + if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; + while (ioff + c.length > idata_limit) + idata_limit *= 2; + STBI_NOTUSED(idata_limit_old); + p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); + z->idata = p; + } + if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG"); + ioff += c.length; + break; + } + + case STBI__PNG_TYPE('I','E','N','D'): { + stbi__uint32 raw_len, bpl; + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (scan != STBI__SCAN_load) return 1; + if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); + // initial guess for decoded data size to avoid unnecessary reallocs + bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component + raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; + z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); + if (z->expanded == NULL) return 0; // zlib should set error + STBI_FREE(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; + else + s->img_out_n = s->img_n; + if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0; + if (has_trans) { + if (z->depth == 16) { + if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0; + } else { + if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0; + } + } + if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) + stbi__de_iphone(z); + if (pal_img_n) { + // pal_img_n == 3 or 4 + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) + return 0; + } else if (has_trans) { + // non-paletted image with tRNS -> source image has (constant) alpha + ++s->img_n; + } + STBI_FREE(z->expanded); z->expanded = NULL; + return 1; + } + + default: + // if critical, fail + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if ((c.type & (1 << 29)) == 0) { + #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe + static char invalid_chunk[] = "XXXX PNG chunk not known"; + invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); + invalid_chunk[1] = STBI__BYTECAST(c.type >> 16); + invalid_chunk[2] = STBI__BYTECAST(c.type >> 8); + invalid_chunk[3] = STBI__BYTECAST(c.type >> 0); + #endif + return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type"); + } + stbi__skip(s, c.length); + break; + } + // end of PNG chunk, read and skip CRC + stbi__get32be(s); + } +} + +static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri) +{ + void *result=NULL; + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { + if (p->depth < 8) + ri->bits_per_channel = 8; + else + ri->bits_per_channel = p->depth; + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s->img_out_n) { + if (ri->bits_per_channel == 8) + result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + else + result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + p->s->img_out_n = req_comp; + if (result == NULL) return result; + } + *x = p->s->img_x; + *y = p->s->img_y; + if (n) *n = p->s->img_n; + } + STBI_FREE(p->out); p->out = NULL; + STBI_FREE(p->expanded); p->expanded = NULL; + STBI_FREE(p->idata); p->idata = NULL; + + return result; +} + +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi__png p; + p.s = s; + return stbi__do_png(&p, x,y,comp,req_comp, ri); +} + +static int stbi__png_test(stbi__context *s) +{ + int r; + r = stbi__check_png_header(s); + stbi__rewind(s); + return r; +} + +static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp) +{ + if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) { + stbi__rewind( p->s ); + return 0; + } + if (x) *x = p->s->img_x; + if (y) *y = p->s->img_y; + if (comp) *comp = p->s->img_n; + return 1; +} + +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__png p; + p.s = s; + return stbi__png_info_raw(&p, x, y, comp); +} + +static int stbi__png_is16(stbi__context *s) +{ + stbi__png p; + p.s = s; + if (!stbi__png_info_raw(&p, NULL, NULL, NULL)) + return 0; + if (p.depth != 16) { + stbi__rewind(p.s); + return 0; + } + return 1; +} +#endif + +// Microsoft/Windows BMP image + +#ifndef STBI_NO_BMP +static int stbi__bmp_test_raw(stbi__context *s) +{ + int r; + int sz; + if (stbi__get8(s) != 'B') return 0; + if (stbi__get8(s) != 'M') return 0; + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + stbi__get32le(s); // discard data offset + sz = stbi__get32le(s); + r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124); + return r; +} + +static int stbi__bmp_test(stbi__context *s) +{ + int r = stbi__bmp_test_raw(s); + stbi__rewind(s); + return r; +} + + +// returns 0..31 for the highest set bit +static int stbi__high_bit(unsigned int z) +{ + int n=0; + if (z == 0) return -1; + if (z >= 0x10000) n += 16, z >>= 16; + if (z >= 0x00100) n += 8, z >>= 8; + if (z >= 0x00010) n += 4, z >>= 4; + if (z >= 0x00004) n += 2, z >>= 2; + if (z >= 0x00002) n += 1, z >>= 1; + return n; +} + +static int stbi__bitcount(unsigned int a) +{ + a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 + a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 + a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits + a = (a + (a >> 8)); // max 16 per 8 bits + a = (a + (a >> 16)); // max 32 per 8 bits + return a & 0xff; +} + +// extract an arbitrarily-aligned N-bit value (N=bits) +// from v, and then make it 8-bits long and fractionally +// extend it to full full range. +static int stbi__shiftsigned(int v, int shift, int bits) +{ + static unsigned int mul_table[9] = { + 0, + 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/, + 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/, + }; + static unsigned int shift_table[9] = { + 0, 0,0,1,0,2,4,6,0, + }; + if (shift < 0) + v <<= -shift; + else + v >>= shift; + STBI_ASSERT(v >= 0 && v < 256); + v >>= (8-bits); + STBI_ASSERT(bits >= 0 && bits <= 8); + return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits]; +} + +typedef struct +{ + int bpp, offset, hsz; + unsigned int mr,mg,mb,ma, all_a; +} stbi__bmp_data; + +static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) +{ + int hsz; + if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP"); + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + info->offset = stbi__get32le(s); + info->hsz = hsz = stbi__get32le(s); + info->mr = info->mg = info->mb = info->ma = 0; + + if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown"); + if (hsz == 12) { + s->img_x = stbi__get16le(s); + s->img_y = stbi__get16le(s); + } else { + s->img_x = stbi__get32le(s); + s->img_y = stbi__get32le(s); + } + if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP"); + info->bpp = stbi__get16le(s); + if (hsz != 12) { + int compress = stbi__get32le(s); + if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); + stbi__get32le(s); // discard sizeof + stbi__get32le(s); // discard hres + stbi__get32le(s); // discard vres + stbi__get32le(s); // discard colorsused + stbi__get32le(s); // discard max important + if (hsz == 40 || hsz == 56) { + if (hsz == 56) { + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + } + if (info->bpp == 16 || info->bpp == 32) { + if (compress == 0) { + if (info->bpp == 32) { + info->mr = 0xffu << 16; + info->mg = 0xffu << 8; + info->mb = 0xffu << 0; + info->ma = 0xffu << 24; + info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 + } else { + info->mr = 31u << 10; + info->mg = 31u << 5; + info->mb = 31u << 0; + } + } else if (compress == 3) { + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + // not documented, but generated by photoshop and handled by mspaint + if (info->mr == info->mg && info->mg == info->mb) { + // ?!?!? + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else { + int i; + if (hsz != 108 && hsz != 124) + return stbi__errpuc("bad BMP", "bad BMP"); + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + info->ma = stbi__get32le(s); + stbi__get32le(s); // discard color space + for (i=0; i < 12; ++i) + stbi__get32le(s); // discard color space parameters + if (hsz == 124) { + stbi__get32le(s); // discard rendering intent + stbi__get32le(s); // discard offset of profile data + stbi__get32le(s); // discard size of profile data + stbi__get32le(s); // discard reserved + } + } + } + return (void *) 1; +} + + +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + unsigned int mr=0,mg=0,mb=0,ma=0, all_a; + stbi_uc pal[256][4]; + int psize=0,i,j,width; + int flip_vertically, pad, target; + stbi__bmp_data info; + STBI_NOTUSED(ri); + + info.all_a = 255; + if (stbi__bmp_parse_header(s, &info) == NULL) + return NULL; // error code already set + + flip_vertically = ((int) s->img_y) > 0; + s->img_y = abs((int) s->img_y); + + mr = info.mr; + mg = info.mg; + mb = info.mb; + ma = info.ma; + all_a = info.all_a; + + if (info.hsz == 12) { + if (info.bpp < 24) + psize = (info.offset - 14 - 24) / 3; + } else { + if (info.bpp < 16) + psize = (info.offset - 14 - info.hsz) >> 2; + } + + s->img_n = ma ? 4 : 3; + if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 + target = req_comp; + else + target = s->img_n; // if they want monochrome, we'll post-convert + + // sanity-check size + if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "Corrupt BMP"); + + out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + if (info.bpp < 16) { + int z=0; + if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); } + for (i=0; i < psize; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + if (info.hsz != 12) stbi__get8(s); + pal[i][3] = 255; + } + stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); + if (info.bpp == 1) width = (s->img_x + 7) >> 3; + else if (info.bpp == 4) width = (s->img_x + 1) >> 1; + else if (info.bpp == 8) width = s->img_x; + else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } + pad = (-width)&3; + if (info.bpp == 1) { + for (j=0; j < (int) s->img_y; ++j) { + int bit_offset = 7, v = stbi__get8(s); + for (i=0; i < (int) s->img_x; ++i) { + int color = (v>>bit_offset)&0x1; + out[z++] = pal[color][0]; + out[z++] = pal[color][1]; + out[z++] = pal[color][2]; + if((--bit_offset) < 0) { + bit_offset = 7; + v = stbi__get8(s); + } + } + stbi__skip(s, pad); + } + } else { + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=stbi__get8(s),v2=0; + if (info.bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (info.bpp == 8) ? stbi__get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + stbi__skip(s, pad); + } + } + } else { + int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; + int z = 0; + int easy=0; + stbi__skip(s, info.offset - 14 - info.hsz); + if (info.bpp == 24) width = 3 * s->img_x; + else if (info.bpp == 16) width = 2*s->img_x; + else /* bpp = 32 and pad = 0 */ width=0; + pad = (-width) & 3; + if (info.bpp == 24) { + easy = 1; + } else if (info.bpp == 32) { + if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) + easy = 2; + } + if (!easy) { + if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } + // right shift amt to put high bit in position #7 + rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr); + gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); + bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); + ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); + } + for (j=0; j < (int) s->img_y; ++j) { + if (easy) { + for (i=0; i < (int) s->img_x; ++i) { + unsigned char a; + out[z+2] = stbi__get8(s); + out[z+1] = stbi__get8(s); + out[z+0] = stbi__get8(s); + z += 3; + a = (easy == 2 ? stbi__get8(s) : 255); + all_a |= a; + if (target == 4) out[z++] = a; + } + } else { + int bpp = info.bpp; + for (i=0; i < (int) s->img_x; ++i) { + stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); + unsigned int a; + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); + a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255); + all_a |= a; + if (target == 4) out[z++] = STBI__BYTECAST(a); + } + } + stbi__skip(s, pad); + } + } + + // if alpha channel is all 0s, replace with all 255s + if (target == 4 && all_a == 0) + for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) + out[i] = 255; + + if (flip_vertically) { + stbi_uc t; + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { + t = p1[i], p1[i] = p2[i], p2[i] = t; + } + } + } + + if (req_comp && req_comp != target) { + out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + return out; +} +#endif + +// Targa Truevision - TGA +// by Jonathan Dummer +#ifndef STBI_NO_TGA +// returns STBI_rgb or whatever, 0 on error +static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) +{ + // only RGB or RGBA (incl. 16bit) or grey allowed + if (is_rgb16) *is_rgb16 = 0; + switch(bits_per_pixel) { + case 8: return STBI_grey; + case 16: if(is_grey) return STBI_grey_alpha; + // fallthrough + case 15: if(is_rgb16) *is_rgb16 = 1; + return STBI_rgb; + case 24: // fallthrough + case 32: return bits_per_pixel/8; + default: return 0; + } +} + +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp) +{ + int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp; + int sz, tga_colormap_type; + stbi__get8(s); // discard Offset + tga_colormap_type = stbi__get8(s); // colormap type + if( tga_colormap_type > 1 ) { + stbi__rewind(s); + return 0; // only RGB or indexed allowed + } + tga_image_type = stbi__get8(s); // image type + if ( tga_colormap_type == 1 ) { // colormapped (paletted) image + if (tga_image_type != 1 && tga_image_type != 9) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip image x and y origin + tga_colormap_bpp = sz; + } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE + if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) { + stbi__rewind(s); + return 0; // only RGB or grey allowed, +/- RLE + } + stbi__skip(s,9); // skip colormap specification and image x/y origin + tga_colormap_bpp = 0; + } + tga_w = stbi__get16le(s); + if( tga_w < 1 ) { + stbi__rewind(s); + return 0; // test width + } + tga_h = stbi__get16le(s); + if( tga_h < 1 ) { + stbi__rewind(s); + return 0; // test height + } + tga_bits_per_pixel = stbi__get8(s); // bits per pixel + stbi__get8(s); // ignore alpha bits + if (tga_colormap_bpp != 0) { + if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) { + // when using a colormap, tga_bits_per_pixel is the size of the indexes + // I don't think anything but 8 or 16bit indexes makes sense + stbi__rewind(s); + return 0; + } + tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL); + } else { + tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL); + } + if(!tga_comp) { + stbi__rewind(s); + return 0; + } + if (x) *x = tga_w; + if (y) *y = tga_h; + if (comp) *comp = tga_comp; + return 1; // seems to have passed everything +} + +static int stbi__tga_test(stbi__context *s) +{ + int res = 0; + int sz, tga_color_type; + stbi__get8(s); // discard Offset + tga_color_type = stbi__get8(s); // color type + if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed + sz = stbi__get8(s); // image type + if ( tga_color_type == 1 ) { // colormapped (paletted) image + if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9 + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + stbi__skip(s,4); // skip image x and y origin + } else { // "normal" image w/o colormap + if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE + stbi__skip(s,9); // skip colormap specification and image x/y origin + } + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height + sz = stbi__get8(s); // bits per pixel + if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + + res = 1; // if we got this far, everything's good and we can return 1 instead of 0 + +errorEnd: + stbi__rewind(s); + return res; +} + +// read 16bit value and convert to 24bit RGB +static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) +{ + stbi__uint16 px = (stbi__uint16)stbi__get16le(s); + stbi__uint16 fiveBitMask = 31; + // we have 3 channels with 5bits each + int r = (px >> 10) & fiveBitMask; + int g = (px >> 5) & fiveBitMask; + int b = px & fiveBitMask; + // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later + out[0] = (stbi_uc)((r * 255)/31); + out[1] = (stbi_uc)((g * 255)/31); + out[2] = (stbi_uc)((b * 255)/31); + + // some people claim that the most significant bit might be used for alpha + // (possibly if an alpha-bit is set in the "image descriptor byte") + // but that only made 16bit test images completely translucent.. + // so let's treat all 15 and 16bit TGAs as RGB with no alpha. +} + +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + // read in the TGA header stuff + int tga_offset = stbi__get8(s); + int tga_indexed = stbi__get8(s); + int tga_image_type = stbi__get8(s); + int tga_is_RLE = 0; + int tga_palette_start = stbi__get16le(s); + int tga_palette_len = stbi__get16le(s); + int tga_palette_bits = stbi__get8(s); + int tga_x_origin = stbi__get16le(s); + int tga_y_origin = stbi__get16le(s); + int tga_width = stbi__get16le(s); + int tga_height = stbi__get16le(s); + int tga_bits_per_pixel = stbi__get8(s); + int tga_comp, tga_rgb16=0; + int tga_inverted = stbi__get8(s); + // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?) + // image data + unsigned char *tga_data; + unsigned char *tga_palette = NULL; + int i, j; + unsigned char raw_data[4] = {0}; + int RLE_count = 0; + int RLE_repeating = 0; + int read_next_pixel = 1; + STBI_NOTUSED(ri); + + // do a tiny bit of precessing + if ( tga_image_type >= 8 ) + { + tga_image_type -= 8; + tga_is_RLE = 1; + } + tga_inverted = 1 - ((tga_inverted >> 5) & 1); + + // If I'm paletted, then I'll use the number of bits from the palette + if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16); + else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16); + + if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency + return stbi__errpuc("bad format", "Can't find out TGA pixelformat"); + + // tga info + *x = tga_width; + *y = tga_height; + if (comp) *comp = tga_comp; + + if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) + return stbi__errpuc("too large", "Corrupt TGA"); + + tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0); + if (!tga_data) return stbi__errpuc("outofmem", "Out of memory"); + + // skip to the data's starting position (offset usually = 0) + stbi__skip(s, tga_offset ); + + if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) { + for (i=0; i < tga_height; ++i) { + int row = tga_inverted ? tga_height -i - 1 : i; + stbi_uc *tga_row = tga_data + row*tga_width*tga_comp; + stbi__getn(s, tga_row, tga_width * tga_comp); + } + } else { + // do I need to load a palette? + if ( tga_indexed) + { + // any data to skip? (offset usually = 0) + stbi__skip(s, tga_palette_start ); + // load the palette + tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); + if (!tga_palette) { + STBI_FREE(tga_data); + return stbi__errpuc("outofmem", "Out of memory"); + } + if (tga_rgb16) { + stbi_uc *pal_entry = tga_palette; + STBI_ASSERT(tga_comp == STBI_rgb); + for (i=0; i < tga_palette_len; ++i) { + stbi__tga_read_rgb16(s, pal_entry); + pal_entry += tga_comp; + } + } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) { + STBI_FREE(tga_data); + STBI_FREE(tga_palette); + return stbi__errpuc("bad palette", "Corrupt TGA"); + } + } + // load the data + for (i=0; i < tga_width * tga_height; ++i) + { + // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk? + if ( tga_is_RLE ) + { + if ( RLE_count == 0 ) + { + // yep, get the next byte as a RLE command + int RLE_cmd = stbi__get8(s); + RLE_count = 1 + (RLE_cmd & 127); + RLE_repeating = RLE_cmd >> 7; + read_next_pixel = 1; + } else if ( !RLE_repeating ) + { + read_next_pixel = 1; + } + } else + { + read_next_pixel = 1; + } + // OK, if I need to read a pixel, do it now + if ( read_next_pixel ) + { + // load however much data we did have + if ( tga_indexed ) + { + // read in index, then perform the lookup + int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s); + if ( pal_idx >= tga_palette_len ) { + // invalid index + pal_idx = 0; + } + pal_idx *= tga_comp; + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = tga_palette[pal_idx+j]; + } + } else if(tga_rgb16) { + STBI_ASSERT(tga_comp == STBI_rgb); + stbi__tga_read_rgb16(s, raw_data); + } else { + // read in the data raw + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = stbi__get8(s); + } + } + // clear the reading flag for the next pixel + read_next_pixel = 0; + } // end of reading a pixel + + // copy data + for (j = 0; j < tga_comp; ++j) + tga_data[i*tga_comp+j] = raw_data[j]; + + // in case we're in RLE mode, keep counting down + --RLE_count; + } + // do I need to invert the image? + if ( tga_inverted ) + { + for (j = 0; j*2 < tga_height; ++j) + { + int index1 = j * tga_width * tga_comp; + int index2 = (tga_height - 1 - j) * tga_width * tga_comp; + for (i = tga_width * tga_comp; i > 0; --i) + { + unsigned char temp = tga_data[index1]; + tga_data[index1] = tga_data[index2]; + tga_data[index2] = temp; + ++index1; + ++index2; + } + } + } + // clear my palette, if I had one + if ( tga_palette != NULL ) + { + STBI_FREE( tga_palette ); + } + } + + // swap RGB - if the source data was RGB16, it already is in the right order + if (tga_comp >= 3 && !tga_rgb16) + { + unsigned char* tga_pixel = tga_data; + for (i=0; i < tga_width * tga_height; ++i) + { + unsigned char temp = tga_pixel[0]; + tga_pixel[0] = tga_pixel[2]; + tga_pixel[2] = temp; + tga_pixel += tga_comp; + } + } + + // convert to target component count + if (req_comp && req_comp != tga_comp) + tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height); + + // the things I do to get rid of an error message, and yet keep + // Microsoft's C compilers happy... [8^( + tga_palette_start = tga_palette_len = tga_palette_bits = + tga_x_origin = tga_y_origin = 0; + // OK, done + return tga_data; +} +#endif + +// ************************************************************************************************* +// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s) +{ + int r = (stbi__get32be(s) == 0x38425053); + stbi__rewind(s); + return r; +} + +static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount) +{ + int count, nleft, len; + + count = 0; + while ((nleft = pixelCount - count) > 0) { + len = stbi__get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + if (len > nleft) return 0; // corrupt data + count += len; + while (len) { + *p = stbi__get8(s); + p += 4; + len--; + } + } else if (len > 128) { + stbi_uc val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len = 257 - len; + if (len > nleft) return 0; // corrupt data + val = stbi__get8(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } + } + + return 1; +} + +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + int pixelCount; + int channelCount, compression; + int channel, i; + int bitdepth; + int w,h; + stbi_uc *out; + STBI_NOTUSED(ri); + + // Check identifier + if (stbi__get32be(s) != 0x38425053) // "8BPS" + return stbi__errpuc("not PSD", "Corrupt PSD image"); + + // Check file type version. + if (stbi__get16be(s) != 1) + return stbi__errpuc("wrong version", "Unsupported version of PSD image"); + + // Skip 6 reserved bytes. + stbi__skip(s, 6 ); + + // Read the number of channels (R, G, B, A, etc). + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) + return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image"); + + // Read the rows and columns of the image. + h = stbi__get32be(s); + w = stbi__get32be(s); + + // Make sure the depth is 8 bits. + bitdepth = stbi__get16be(s); + if (bitdepth != 8 && bitdepth != 16) + return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit"); + + // Make sure the color mode is RGB. + // Valid options are: + // 0: Bitmap + // 1: Grayscale + // 2: Indexed color + // 3: RGB color + // 4: CMYK color + // 7: Multichannel + // 8: Duotone + // 9: Lab color + if (stbi__get16be(s) != 3) + return stbi__errpuc("wrong color format", "PSD is not in RGB color format"); + + // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) + stbi__skip(s,stbi__get32be(s) ); + + // Skip the image resources. (resolution, pen tool paths, etc) + stbi__skip(s, stbi__get32be(s) ); + + // Skip the reserved data. + stbi__skip(s, stbi__get32be(s) ); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + compression = stbi__get16be(s); + if (compression > 1) + return stbi__errpuc("bad compression", "PSD has an unknown compression format"); + + // Check size + if (!stbi__mad3sizes_valid(4, w, h, 0)) + return stbi__errpuc("too large", "Corrupt PSD"); + + // Create the destination image. + + if (!compression && bitdepth == 16 && bpc == 16) { + out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); + ri->bits_per_channel = 16; + } else + out = (stbi_uc *) stbi__malloc(4 * w*h); + + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + pixelCount = w*h; + + // Initialize the data to zero. + //memset( out, 0, pixelCount * 4 ); + + // Finally, the image data. + if (compression) { + // RLE as used by .PSD and .TIFF + // Loop until you get the number of unpacked bytes you are expecting: + // Read the next source byte into n. + // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. + // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. + // Else if n is 128, noop. + // Endloop + + // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data, + // which we're going to just skip. + stbi__skip(s, h * channelCount * 2 ); + + // Read the RLE data by channel. + for (channel = 0; channel < 4; channel++) { + stbi_uc *p; + + p = out+channel; + if (channel >= channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++, p += 4) + *p = (channel == 3 ? 255 : 0); + } else { + // Read the RLE data. + if (!stbi__psd_decode_rle(s, p, pixelCount)) { + STBI_FREE(out); + return stbi__errpuc("corrupt", "bad RLE data"); + } + } + } + + } else { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. + + // Read the data by channel. + for (channel = 0; channel < 4; channel++) { + if (channel >= channelCount) { + // Fill this channel with default data. + if (bitdepth == 16 && bpc == 16) { + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + stbi__uint16 val = channel == 3 ? 65535 : 0; + for (i = 0; i < pixelCount; i++, q += 4) + *q = val; + } else { + stbi_uc *p = out+channel; + stbi_uc val = channel == 3 ? 255 : 0; + for (i = 0; i < pixelCount; i++, p += 4) + *p = val; + } + } else { + if (ri->bits_per_channel == 16) { // output bpc + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + for (i = 0; i < pixelCount; i++, q += 4) + *q = (stbi__uint16) stbi__get16be(s); + } else { + stbi_uc *p = out+channel; + if (bitdepth == 16) { // input bpc + for (i = 0; i < pixelCount; i++, p += 4) + *p = (stbi_uc) (stbi__get16be(s) >> 8); + } else { + for (i = 0; i < pixelCount; i++, p += 4) + *p = stbi__get8(s); + } + } + } + } + } + + // remove weird white matte from PSD + if (channelCount >= 4) { + if (ri->bits_per_channel == 16) { + for (i=0; i < w*h; ++i) { + stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; + if (pixel[3] != 0 && pixel[3] != 65535) { + float a = pixel[3] / 65535.0f; + float ra = 1.0f / a; + float inv_a = 65535.0f * (1 - ra); + pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); + pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); + pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); + } + } + } else { + for (i=0; i < w*h; ++i) { + unsigned char *pixel = out + 4*i; + if (pixel[3] != 0 && pixel[3] != 255) { + float a = pixel[3] / 255.0f; + float ra = 1.0f / a; + float inv_a = 255.0f * (1 - ra); + pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); + pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); + pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); + } + } + } + } + + // convert to desired output format + if (req_comp && req_comp != 4) { + if (ri->bits_per_channel == 16) + out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); + else + out = stbi__convert_format(out, 4, req_comp, w, h); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + if (comp) *comp = 4; + *y = h; + *x = w; + + return out; +} +#endif + +// ************************************************************************************************* +// Softimage PIC loader +// by Tom Seddon +// +// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format +// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/ + +#ifndef STBI_NO_PIC +static int stbi__pic_is4(stbi__context *s,const char *str) +{ + int i; + for (i=0; i<4; ++i) + if (stbi__get8(s) != (stbi_uc)str[i]) + return 0; + + return 1; +} + +static int stbi__pic_test_core(stbi__context *s) +{ + int i; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) + return 0; + + for(i=0;i<84;++i) + stbi__get8(s); + + if (!stbi__pic_is4(s,"PICT")) + return 0; + + return 1; +} + +typedef struct +{ + stbi_uc size,type,channel; +} stbi__pic_packet; + +static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest) +{ + int mask=0x80, i; + + for (i=0; i<4; ++i, mask>>=1) { + if (channel & mask) { + if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short"); + dest[i]=stbi__get8(s); + } + } + + return dest; +} + +static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src) +{ + int mask=0x80,i; + + for (i=0;i<4; ++i, mask>>=1) + if (channel&mask) + dest[i]=src[i]; +} + +static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result) +{ + int act_comp=0,num_packets=0,y,chained; + stbi__pic_packet packets[10]; + + // this will (should...) cater for even some bizarre stuff like having data + // for the same channel in multiple packets. + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return stbi__errpuc("bad format","too many packets"); + + packet = &packets[num_packets++]; + + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + + act_comp |= packet->channel; + + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)"); + if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp"); + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? + + for(y=0; ytype) { + default: + return stbi__errpuc("bad format","packet has bad compression type"); + + case 0: {//uncompressed + int x; + + for(x=0;xchannel,dest)) + return 0; + break; + } + + case 1://Pure RLE + { + int left=width, i; + + while (left>0) { + stbi_uc count,value[4]; + + count=stbi__get8(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)"); + + if (count > left) + count = (stbi_uc) left; + + if (!stbi__readval(s,packet->channel,value)) return 0; + + for(i=0; ichannel,dest,value); + left -= count; + } + } + break; + + case 2: {//Mixed RLE + int left=width; + while (left>0) { + int count = stbi__get8(s), i; + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)"); + + if (count >= 128) { // Repeated + stbi_uc value[4]; + + if (count==128) + count = stbi__get16be(s); + else + count -= 127; + if (count > left) + return stbi__errpuc("bad file","scanline overrun"); + + if (!stbi__readval(s,packet->channel,value)) + return 0; + + for(i=0;ichannel,dest,value); + } else { // Raw + ++count; + if (count>left) return stbi__errpuc("bad file","scanline overrun"); + + for(i=0;ichannel,dest)) + return 0; + } + left-=count; + } + break; + } + } + } + } + + return result; +} + +static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri) +{ + stbi_uc *result; + int i, x,y, internal_comp; + STBI_NOTUSED(ri); + + if (!comp) comp = &internal_comp; + + for (i=0; i<92; ++i) + stbi__get8(s); + + x = stbi__get16be(s); + y = stbi__get16be(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)"); + if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode"); + + stbi__get32be(s); //skip `ratio' + stbi__get16be(s); //skip `fields' + stbi__get16be(s); //skip `pad' + + // intermediate buffer is RGBA + result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0); + memset(result, 0xff, x*y*4); + + if (!stbi__pic_load_core(s,x,y,comp, result)) { + STBI_FREE(result); + result=0; + } + *px = x; + *py = y; + if (req_comp == 0) req_comp = *comp; + result=stbi__convert_format(result,4,req_comp,x,y); + + return result; +} + +static int stbi__pic_test(stbi__context *s) +{ + int r = stbi__pic_test_core(s); + stbi__rewind(s); + return r; +} +#endif + +// ************************************************************************************************* +// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb + +#ifndef STBI_NO_GIF +typedef struct +{ + stbi__int16 prefix; + stbi_uc first; + stbi_uc suffix; +} stbi__gif_lzw; + +typedef struct +{ + int w,h; + stbi_uc *out; // output buffer (always 4 components) + stbi_uc *background; // The current "background" as far as a gif is concerned + stbi_uc *history; + int flags, bgindex, ratio, transparent, eflags; + stbi_uc pal[256][4]; + stbi_uc lpal[256][4]; + stbi__gif_lzw codes[8192]; + stbi_uc *color_table; + int parse, step; + int lflags; + int start_x, start_y; + int max_x, max_y; + int cur_x, cur_y; + int line_size; + int delay; +} stbi__gif; + +static int stbi__gif_test_raw(stbi__context *s) +{ + int sz; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0; + sz = stbi__get8(s); + if (sz != '9' && sz != '7') return 0; + if (stbi__get8(s) != 'a') return 0; + return 1; +} + +static int stbi__gif_test(stbi__context *s) +{ + int r = stbi__gif_test_raw(s); + stbi__rewind(s); + return r; +} + +static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp) +{ + int i; + for (i=0; i < num_entries; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + pal[i][3] = transp == i ? 0 : 255; + } +} + +static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info) +{ + stbi_uc version; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') + return stbi__err("not GIF", "Corrupt GIF"); + + version = stbi__get8(s); + if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF"); + if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF"); + + stbi__g_failure_reason = ""; + g->w = stbi__get16le(s); + g->h = stbi__get16le(s); + g->flags = stbi__get8(s); + g->bgindex = stbi__get8(s); + g->ratio = stbi__get8(s); + g->transparent = -1; + + if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments + + if (is_info) return 1; + + if (g->flags & 0x80) + stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); + + return 1; +} + +static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); + if (!stbi__gif_header(s, g, comp, 1)) { + STBI_FREE(g); + stbi__rewind( s ); + return 0; + } + if (x) *x = g->w; + if (y) *y = g->h; + STBI_FREE(g); + return 1; +} + +static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) +{ + stbi_uc *p, *c; + int idx; + + // recurse to decode the prefixes, since the linked-list is backwards, + // and working backwards through an interleaved image would be nasty + if (g->codes[code].prefix >= 0) + stbi__out_gif_code(g, g->codes[code].prefix); + + if (g->cur_y >= g->max_y) return; + + idx = g->cur_x + g->cur_y; + p = &g->out[idx]; + g->history[idx / 4] = 1; + + c = &g->color_table[g->codes[code].suffix * 4]; + if (c[3] > 128) { // don't render transparent pixels; + p[0] = c[2]; + p[1] = c[1]; + p[2] = c[0]; + p[3] = c[3]; + } + g->cur_x += 4; + + if (g->cur_x >= g->max_x) { + g->cur_x = g->start_x; + g->cur_y += g->step; + + while (g->cur_y >= g->max_y && g->parse > 0) { + g->step = (1 << g->parse) * g->line_size; + g->cur_y = g->start_y + (g->step >> 1); + --g->parse; + } + } +} + +static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) +{ + stbi_uc lzw_cs; + stbi__int32 len, init_code; + stbi__uint32 first; + stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear; + stbi__gif_lzw *p; + + lzw_cs = stbi__get8(s); + if (lzw_cs > 12) return NULL; + clear = 1 << lzw_cs; + first = 1; + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + bits = 0; + valid_bits = 0; + for (init_code = 0; init_code < clear; init_code++) { + g->codes[init_code].prefix = -1; + g->codes[init_code].first = (stbi_uc) init_code; + g->codes[init_code].suffix = (stbi_uc) init_code; + } + + // support no starting clear code + avail = clear+2; + oldcode = -1; + + len = 0; + for(;;) { + if (valid_bits < codesize) { + if (len == 0) { + len = stbi__get8(s); // start new block + if (len == 0) + return g->out; + } + --len; + bits |= (stbi__int32) stbi__get8(s) << valid_bits; + valid_bits += 8; + } else { + stbi__int32 code = bits & codemask; + bits >>= codesize; + valid_bits -= codesize; + // @OPTIMIZE: is there some way we can accelerate the non-clear path? + if (code == clear) { // clear code + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + avail = clear + 2; + oldcode = -1; + first = 0; + } else if (code == clear + 1) { // end of stream code + stbi__skip(s, len); + while ((len = stbi__get8(s)) > 0) + stbi__skip(s,len); + return g->out; + } else if (code <= avail) { + if (first) { + return stbi__errpuc("no clear code", "Corrupt GIF"); + } + + if (oldcode >= 0) { + p = &g->codes[avail++]; + if (avail > 8192) { + return stbi__errpuc("too many codes", "Corrupt GIF"); + } + + p->prefix = (stbi__int16) oldcode; + p->first = g->codes[oldcode].first; + p->suffix = (code == avail) ? p->first : g->codes[code].first; + } else if (code == avail) + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + + stbi__out_gif_code(g, (stbi__uint16) code); + + if ((avail & codemask) == 0 && avail <= 0x0FFF) { + codesize++; + codemask = (1 << codesize) - 1; + } + + oldcode = code; + } else { + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + } + } + } +} + +// this function is designed to support animated gifs, although stb_image doesn't support it +// two back is the image from two frames ago, used for a very specific disposal format +static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back) +{ + int dispose; + int first_frame; + int pi; + int pcount; + + // on first frame, any non-written pixels get the background colour (non-transparent) + first_frame = 0; + if (g->out == 0) { + if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header + g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h); + g->background = (stbi_uc *) stbi__malloc(4 * g->w * g->h); + g->history = (stbi_uc *) stbi__malloc(g->w * g->h); + if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory"); + + // image is treated as "tranparent" at the start - ie, nothing overwrites the current background; + // background colour is only used for pixels that are not rendered first frame, after that "background" + // color refers to teh color that was there the previous frame. + memset( g->out, 0x00, 4 * g->w * g->h ); + memset( g->background, 0x00, 4 * g->w * g->h ); // state of the background (starts transparent) + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + first_frame = 1; + } else { + // second frame - how do we dispoase of the previous one? + dispose = (g->eflags & 0x1C) >> 2; + pcount = g->w * g->h; + + if ((dispose == 3) && (two_back == 0)) { + dispose = 2; // if I don't have an image to revert back to, default to the old background + } + + if (dispose == 3) { // use previous graphic + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 ); + } + } + } else if (dispose == 2) { + // restore what was changed last frame to background before that frame; + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 ); + } + } + } else { + // This is a non-disposal case eithe way, so just + // leave the pixels as is, and they will become the new background + // 1: do not dispose + // 0: not specified. + } + + // background is what out is after the undoing of the previou frame; + memcpy( g->background, g->out, 4 * g->w * g->h ); + } + + // clear my history; + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + + for (;;) { + int tag = stbi__get8(s); + switch (tag) { + case 0x2C: /* Image Descriptor */ + { + stbi__int32 x, y, w, h; + stbi_uc *o; + + x = stbi__get16le(s); + y = stbi__get16le(s); + w = stbi__get16le(s); + h = stbi__get16le(s); + if (((x + w) > (g->w)) || ((y + h) > (g->h))) + return stbi__errpuc("bad Image Descriptor", "Corrupt GIF"); + + g->line_size = g->w * 4; + g->start_x = x * 4; + g->start_y = y * g->line_size; + g->max_x = g->start_x + w * 4; + g->max_y = g->start_y + h * g->line_size; + g->cur_x = g->start_x; + g->cur_y = g->start_y; + + g->lflags = stbi__get8(s); + + if (g->lflags & 0x40) { + g->step = 8 * g->line_size; // first interlaced spacing + g->parse = 3; + } else { + g->step = g->line_size; + g->parse = 0; + } + + if (g->lflags & 0x80) { + stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); + g->color_table = (stbi_uc *) g->lpal; + } else if (g->flags & 0x80) { + g->color_table = (stbi_uc *) g->pal; + } else + return stbi__errpuc("missing color table", "Corrupt GIF"); + + o = stbi__process_gif_raster(s, g); + if (o == NULL) return NULL; + + // if this was the first frame, + pcount = g->w * g->h; + if (first_frame && (g->bgindex > 0)) { + // if first frame, any pixel not drawn to gets the background color + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi] == 0) { + g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; + memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 ); + } + } + } + + return o; + } + + case 0x21: // Comment Extension. + { + int len; + int ext = stbi__get8(s); + if (ext == 0xF9) { // Graphic Control Extension. + len = stbi__get8(s); + if (len == 4) { + g->eflags = stbi__get8(s); + g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths. + + // unset old transparent + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 255; + } + if (g->eflags & 0x01) { + g->transparent = stbi__get8(s); + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 0; + } + } else { + // don't need transparent + stbi__skip(s, 1); + g->transparent = -1; + } + } else { + stbi__skip(s, len); + break; + } + } + while ((len = stbi__get8(s)) != 0) { + stbi__skip(s, len); + } + break; + } + + case 0x3B: // gif stream termination code + return (stbi_uc *) s; // using '1' causes warning on some compilers + + default: + return stbi__errpuc("unknown code", "Corrupt GIF"); + } + } +} + +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + if (stbi__gif_test(s)) { + int layers = 0; + stbi_uc *u = 0; + stbi_uc *out = 0; + stbi_uc *two_back = 0; + stbi__gif g; + int stride; + memset(&g, 0, sizeof(g)); + if (delays) { + *delays = 0; + } + + do { + u = stbi__gif_load_next(s, &g, comp, req_comp, two_back); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + + if (u) { + *x = g.w; + *y = g.h; + ++layers; + stride = g.w * g.h * 4; + + if (out) { + out = (stbi_uc*) STBI_REALLOC( out, layers * stride ); + if (delays) { + *delays = (int*) STBI_REALLOC( *delays, sizeof(int) * layers ); + } + } else { + out = (stbi_uc*)stbi__malloc( layers * stride ); + if (delays) { + *delays = (int*) stbi__malloc( layers * sizeof(int) ); + } + } + memcpy( out + ((layers - 1) * stride), u, stride ); + if (layers >= 2) { + two_back = out - 2 * stride; + } + + if (delays) { + (*delays)[layers - 1U] = g.delay; + } + } + } while (u != 0); + + // free temp buffer; + STBI_FREE(g.out); + STBI_FREE(g.history); + STBI_FREE(g.background); + + // do the final conversion after loading everything; + if (req_comp && req_comp != 4) + out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h); + + *z = layers; + return out; + } else { + return stbi__errpuc("not GIF", "Image was not as a gif type."); + } +} + +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *u = 0; + stbi__gif g; + memset(&g, 0, sizeof(g)); + + u = stbi__gif_load_next(s, &g, comp, req_comp, 0); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + if (u) { + *x = g.w; + *y = g.h; + + // moved conversion to after successful load so that the same + // can be done for multiple frames. + if (req_comp && req_comp != 4) + u = stbi__convert_format(u, 4, req_comp, g.w, g.h); + } + + // free buffers needed for multiple frame loading; + STBI_FREE(g.history); + STBI_FREE(g.background); + + return u; +} + +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp) +{ + return stbi__gif_info_raw(s,x,y,comp); +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR loader +// originally by Nicolas Schulz +#ifndef STBI_NO_HDR +static int stbi__hdr_test_core(stbi__context *s, const char *signature) +{ + int i; + for (i=0; signature[i]; ++i) + if (stbi__get8(s) != signature[i]) + return 0; + stbi__rewind(s); + return 1; +} + +static int stbi__hdr_test(stbi__context* s) +{ + int r = stbi__hdr_test_core(s, "#?RADIANCE\n"); + stbi__rewind(s); + if(!r) { + r = stbi__hdr_test_core(s, "#?RGBE\n"); + stbi__rewind(s); + } + return r; +} + +#define STBI__HDR_BUFLEN 1024 +static char *stbi__hdr_gettoken(stbi__context *z, char *buffer) +{ + int len=0; + char c = '\0'; + + c = (char) stbi__get8(z); + + while (!stbi__at_eof(z) && c != '\n') { + buffer[len++] = c; + if (len == STBI__HDR_BUFLEN-1) { + // flush to end of line + while (!stbi__at_eof(z) && stbi__get8(z) != '\n') + ; + break; + } + c = (char) stbi__get8(z); + } + + buffer[len] = 0; + return buffer; +} + +static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp) +{ + if ( input[3] != 0 ) { + float f1; + // Exponent + f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); + if (req_comp <= 2) + output[0] = (input[0] + input[1] + input[2]) * f1 / 3; + else { + output[0] = input[0] * f1; + output[1] = input[1] * f1; + output[2] = input[2] * f1; + } + if (req_comp == 2) output[1] = 1; + if (req_comp == 4) output[3] = 1; + } else { + switch (req_comp) { + case 4: output[3] = 1; /* fallthrough */ + case 3: output[0] = output[1] = output[2] = 0; + break; + case 2: output[1] = 1; /* fallthrough */ + case 1: output[0] = 0; + break; + } + } +} + +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int width, height; + stbi_uc *scanline; + float *hdr_data; + int len; + unsigned char count, value; + int i, j, k, c1,c2, z; + const char *headerToken; + STBI_NOTUSED(ri); + + // Check identifier + headerToken = stbi__hdr_gettoken(s,buffer); + if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0) + return stbi__errpf("not HDR", "Corrupt HDR image"); + + // Parse header + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format"); + + // Parse width and height + // can't use sscanf() if we're not using stdio! + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + height = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + width = (int) strtol(token, NULL, 10); + + *x = width; + *y = height; + + if (comp) *comp = 3; + if (req_comp == 0) req_comp = 3; + + if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) + return stbi__errpf("too large", "HDR image is too large"); + + // Read data + hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0); + if (!hdr_data) + return stbi__errpf("outofmem", "Out of memory"); + + // Load image data + // image data is stored as some number of sca + if ( width < 8 || width >= 32768) { + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + stbi_uc rgbe[4]; + main_decode_loop: + stbi__getn(s, rgbe, 4); + stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); + } + } + } else { + // Read RLE-encoded data + scanline = NULL; + + for (j = 0; j < height; ++j) { + c1 = stbi__get8(s); + c2 = stbi__get8(s); + len = stbi__get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + stbi_uc rgbe[4]; + rgbe[0] = (stbi_uc) c1; + rgbe[1] = (stbi_uc) c2; + rgbe[2] = (stbi_uc) len; + rgbe[3] = (stbi_uc) stbi__get8(s); + stbi__hdr_convert(hdr_data, rgbe, req_comp); + i = 1; + j = 0; + STBI_FREE(scanline); + goto main_decode_loop; // yes, this makes no sense + } + len <<= 8; + len |= stbi__get8(s); + if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } + if (scanline == NULL) { + scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); + if (!scanline) { + STBI_FREE(hdr_data); + return stbi__errpf("outofmem", "Out of memory"); + } + } + + for (k = 0; k < 4; ++k) { + int nleft; + i = 0; + while ((nleft = width - i) > 0) { + count = stbi__get8(s); + if (count > 128) { + // Run + value = stbi__get8(s); + count -= 128; + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = stbi__get8(s); + } + } + } + for (i=0; i < width; ++i) + stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); + } + if (scanline) + STBI_FREE(scanline); + } + + return hdr_data; +} + +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int dummy; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (stbi__hdr_test(s) == 0) { + stbi__rewind( s ); + return 0; + } + + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) { + stbi__rewind( s ); + return 0; + } + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *y = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *x = (int) strtol(token, NULL, 10); + *comp = 3; + return 1; +} +#endif // STBI_NO_HDR + +#ifndef STBI_NO_BMP +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) +{ + void *p; + stbi__bmp_data info; + + info.all_a = 255; + p = stbi__bmp_parse_header(s, &info); + stbi__rewind( s ); + if (p == NULL) + return 0; + if (x) *x = s->img_x; + if (y) *y = s->img_y; + if (comp) *comp = info.ma ? 4 : 3; + return 1; +} +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) +{ + int channelCount, dummy, depth; + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + *y = stbi__get32be(s); + *x = stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 8 && depth != 16) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 3) { + stbi__rewind( s ); + return 0; + } + *comp = 4; + return 1; +} + +static int stbi__psd_is16(stbi__context *s) +{ + int channelCount, depth; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + (void) stbi__get32be(s); + (void) stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 16) { + stbi__rewind( s ); + return 0; + } + return 1; +} +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) +{ + int act_comp=0,num_packets=0,chained,dummy; + stbi__pic_packet packets[10]; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) { + stbi__rewind(s); + return 0; + } + + stbi__skip(s, 88); + + *x = stbi__get16be(s); + *y = stbi__get16be(s); + if (stbi__at_eof(s)) { + stbi__rewind( s); + return 0; + } + if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) { + stbi__rewind( s ); + return 0; + } + + stbi__skip(s, 8); + + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return 0; + + packet = &packets[num_packets++]; + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + act_comp |= packet->channel; + + if (stbi__at_eof(s)) { + stbi__rewind( s ); + return 0; + } + if (packet->size != 8) { + stbi__rewind( s ); + return 0; + } + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); + + return 1; +} +#endif + +// ************************************************************************************************* +// Portable Gray Map and Portable Pixel Map loader +// by Ken Miller +// +// PGM: http://netpbm.sourceforge.net/doc/pgm.html +// PPM: http://netpbm.sourceforge.net/doc/ppm.html +// +// Known limitations: +// Does not support comments in the header section +// Does not support ASCII image data (formats P2 and P3) +// Does not support 16-bit-per-channel + +#ifndef STBI_NO_PNM + +static int stbi__pnm_test(stbi__context *s) +{ + char p, t; + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind( s ); + return 0; + } + return 1; +} + +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + STBI_NOTUSED(ri); + + if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n)) + return 0; + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + + if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "PNM too large"); + + out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + stbi__getn(s, out, s->img_n * s->img_x * s->img_y); + + if (req_comp && req_comp != s->img_n) { + out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + return out; +} + +static int stbi__pnm_isspace(char c) +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; +} + +static void stbi__pnm_skip_whitespace(stbi__context *s, char *c) +{ + for (;;) { + while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) + *c = (char) stbi__get8(s); + + if (stbi__at_eof(s) || *c != '#') + break; + + while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' ) + *c = (char) stbi__get8(s); + } +} + +static int stbi__pnm_isdigit(char c) +{ + return c >= '0' && c <= '9'; +} + +static int stbi__pnm_getinteger(stbi__context *s, char *c) +{ + int value = 0; + + while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { + value = value*10 + (*c - '0'); + *c = (char) stbi__get8(s); + } + + return value; +} + +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) +{ + int maxv, dummy; + char c, p, t; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + stbi__rewind(s); + + // Get identifier + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind(s); + return 0; + } + + *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm + + c = (char) stbi__get8(s); + stbi__pnm_skip_whitespace(s, &c); + + *x = stbi__pnm_getinteger(s, &c); // read width + stbi__pnm_skip_whitespace(s, &c); + + *y = stbi__pnm_getinteger(s, &c); // read height + stbi__pnm_skip_whitespace(s, &c); + + maxv = stbi__pnm_getinteger(s, &c); // read max value + + if (maxv > 255) + return stbi__err("max value > 255", "PPM image not 8-bit"); + else + return 1; +} +#endif + +static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) +{ + #ifndef STBI_NO_JPEG + if (stbi__jpeg_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNG + if (stbi__png_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_GIF + if (stbi__gif_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_BMP + if (stbi__bmp_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PIC + if (stbi__pic_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNM + if (stbi__pnm_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_info(s, x, y, comp)) return 1; + #endif + + // test tga last because it's a crappy test! + #ifndef STBI_NO_TGA + if (stbi__tga_info(s, x, y, comp)) + return 1; + #endif + return stbi__err("unknown image type", "Image not of any known type, or corrupt"); +} + +static int stbi__is_16_main(stbi__context *s) +{ + #ifndef STBI_NO_PNG + if (stbi__png_is16(s)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_is16(s)) return 1; + #endif + + return 0; +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_info_from_file(f, x, y, comp); + fclose(f); + return result; +} + +STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__info_main(&s,x,y,comp); + fseek(f,pos,SEEK_SET); + return r; +} + +STBIDEF int stbi_is_16_bit(char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_is_16_bit_from_file(f); + fclose(f); + return result; +} + +STBIDEF int stbi_is_16_bit_from_file(FILE *f) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__is_16_main(&s); + fseek(f,pos,SEEK_SET); + return r; +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__is_16_main(&s); +} + +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__is_16_main(&s); +} + +#endif // STB_IMAGE_IMPLEMENTATION + +/* + revision history: + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug + 1-bit BMP + *_is_16_bit api + avoid warnings + 2.16 (2017-07-23) all functions have 16-bit variants; + STBI_NO_STDIO works again; + compilation fixes; + fix rounding in unpremultiply; + optimize vertical flip; + disable raw_len validation; + documentation fixes + 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; + warning fixes; disable run-time SSE detection on gcc; + uniform handling of optional "return" values; + thread-safe initialization of zlib tables + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) allocate large structures on the stack + remove white matting for transparent PSD + fix reported channel count for PNG & BMP + re-enable SSE2 in non-gcc 64-bit + support RGB-formatted JPEG + read 16-bit PNGs (only as 8-bit) + 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED + 2.09 (2016-01-16) allow comments in PNM files + 16-bit-per-pixel TGA (not bit-per-component) + info() for TGA could break due to .hdr handling + info() for BMP to shares code instead of sloppy parse + can use STBI_REALLOC_SIZED if allocator doesn't support realloc + code cleanup + 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA + 2.07 (2015-09-13) fix compiler warnings + partial animated GIF support + limited 16-bpc PSD support + #ifdef unused functions + bug with < 92 byte PIC,PNM,HDR,TGA + 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value + 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning + 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit + 2.03 (2015-04-12) extra corruption checking (mmozeiko) + stbi_set_flip_vertically_on_load (nguillemot) + fix NEON support; fix mingw support + 2.02 (2015-01-19) fix incorrect assert, fix warning + 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2 + 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG + 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) + progressive JPEG (stb) + PGM/PPM support (Ken Miller) + STBI_MALLOC,STBI_REALLOC,STBI_FREE + GIF bugfix -- seemingly never worked + STBI_NO_*, STBI_ONLY_* + 1.48 (2014-12-14) fix incorrectly-named assert() + 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb) + optimize PNG (ryg) + fix bug in interlaced PNG with user-specified channel count (stb) + 1.46 (2014-08-26) + fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG + 1.45 (2014-08-16) + fix MSVC-ARM internal compiler error by wrapping malloc + 1.44 (2014-08-07) + various warning fixes from Ronny Chevalier + 1.43 (2014-07-15) + fix MSVC-only compiler problem in code changed in 1.42 + 1.42 (2014-07-09) + don't define _CRT_SECURE_NO_WARNINGS (affects user code) + fixes to stbi__cleanup_jpeg path + added STBI_ASSERT to avoid requiring assert.h + 1.41 (2014-06-25) + fix search&replace from 1.36 that messed up comments/error messages + 1.40 (2014-06-22) + fix gcc struct-initialization warning + 1.39 (2014-06-15) + fix to TGA optimization when req_comp != number of components in TGA; + fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite) + add support for BMP version 5 (more ignored fields) + 1.38 (2014-06-06) + suppress MSVC warnings on integer casts truncating values + fix accidental rename of 'skip' field of I/O + 1.37 (2014-06-04) + remove duplicate typedef + 1.36 (2014-06-03) + convert to header file single-file library + if de-iphone isn't set, load iphone images color-swapped instead of returning NULL + 1.35 (2014-05-27) + various warnings + fix broken STBI_SIMD path + fix bug where stbi_load_from_file no longer left file pointer in correct place + fix broken non-easy path for 32-bit BMP (possibly never used) + TGA optimization by Arseny Kapoulkine + 1.34 (unknown) + use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case + 1.33 (2011-07-14) + make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements + 1.32 (2011-07-13) + support for "info" function for all supported filetypes (SpartanJ) + 1.31 (2011-06-20) + a few more leak fixes, bug in PNG handling (SpartanJ) + 1.30 (2011-06-11) + added ability to load files via callbacks to accomidate custom input streams (Ben Wenger) + removed deprecated format-specific test/load functions + removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway + error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) + fix inefficiency in decoding 32-bit BMP (David Woo) + 1.29 (2010-08-16) + various warning fixes from Aurelien Pocheville + 1.28 (2010-08-01) + fix bug in GIF palette transparency (SpartanJ) + 1.27 (2010-08-01) + cast-to-stbi_uc to fix warnings + 1.26 (2010-07-24) + fix bug in file buffering for PNG reported by SpartanJ + 1.25 (2010-07-17) + refix trans_data warning (Won Chun) + 1.24 (2010-07-12) + perf improvements reading from files on platforms with lock-heavy fgetc() + minor perf improvements for jpeg + deprecated type-specific functions so we'll get feedback if they're needed + attempt to fix trans_data warning (Won Chun) + 1.23 fixed bug in iPhone support + 1.22 (2010-07-10) + removed image *writing* support + stbi_info support from Jetro Lauha + GIF support from Jean-Marc Lienher + iPhone PNG-extensions from James Brown + warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva) + 1.21 fix use of 'stbi_uc' in header (reported by jon blow) + 1.20 added support for Softimage PIC, by Tom Seddon + 1.19 bug in interlaced PNG corruption check (found by ryg) + 1.18 (2008-08-02) + fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - stbi__convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug + header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) + 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz + 1.07 attempt to fix C++ warning/errors again + 1.06 attempt to fix C++ warning/errors again + 1.05 fix TGA loading to return correct *comp and use good luminance calc + 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free + 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR + 1.02 support for (subset of) HDR files, float interface for preferred access to them + 1.01 fix bug: possible bug in handling right-side up bmps... not sure + fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all + 1.00 interface to zlib that skips zlib header + 0.99 correct handling of alpha in palette + 0.98 TGA loader by lonesock; dynamically add loaders (untested) + 0.97 jpeg errors on too large a file; also catch another malloc failure + 0.96 fix detection of invalid v value - particleman@mollyrocket forum + 0.95 during header scan, seek to markers in case of padding + 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same + 0.93 handle jpegtran output; verbose errors + 0.92 read 4,8,16,24,32-bit BMP files of several formats + 0.91 output 24-bit Windows 3.0 BMP files + 0.90 fix a few more warnings; bump version number to approach 1.0 + 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd + 0.60 fix compiling as c++ + 0.59 fix warnings: merge Dave Moore's -Wall fixes + 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian + 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available + 0.56 fix bug: zlib uncompressed mode len vs. nlen + 0.55 fix bug: restart_interval not initialized to 0 + 0.54 allow NULL for 'int *comp' + 0.53 fix bug in png 3->4; speedup png decoding + 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments + 0.51 obey req_comp requests, 1-component jpegs return as 1-component, + on 'test' only check type, not whether we support this variant + 0.50 (2006-11-19) + first released version +*/ + + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/workloads/realworld/async/darknet/src/stb_image_write.h b/workloads/realworld/async/darknet/src/stb_image_write.h new file mode 100644 index 0000000000000000000000000000000000000000..c05e95812b96232abd3617f98255832cc3fe4716 --- /dev/null +++ b/workloads/realworld/async/darknet/src/stb_image_write.h @@ -0,0 +1,1568 @@ +/* stb_image_write - v1.09 - public domain - http://nothings.org/stb/stb_image_write.h + writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 + no warranty implied; use at your own risk + + Before #including, + + #define STB_IMAGE_WRITE_IMPLEMENTATION + + in the file that you want to have the implementation. + + Will probably not work correctly with strict-aliasing optimizations. + + If using a modern Microsoft Compiler, non-safe versions of CRT calls may cause + compilation warnings or even errors. To avoid this, also before #including, + + #define STBI_MSC_SECURE_CRT + +ABOUT: + + This header file is a library for writing images to C stdio. It could be + adapted to write to memory or a general streaming interface; let me know. + + The PNG output is not optimal; it is 20-50% larger than the file + written by a decent optimizing implementation; though providing a custom + zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that. + This library is designed for source code compactness and simplicity, + not optimal image file size or run-time performance. + +BUILDING: + + You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h. + You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace + malloc,realloc,free. + You can #define STBIW_MEMMOVE() to replace memmove() + You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function + for PNG compression (instead of the builtin one), it must have the following signature: + unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality); + The returned data will be freed with STBIW_FREE() (free() by default), + so it must be heap allocated with STBIW_MALLOC() (malloc() by default), + +USAGE: + + There are five functions, one for each image file format: + + int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality); + int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); + + void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically + + There are also five equivalent functions that use an arbitrary write function. You are + expected to open/close your file-equivalent before and after calling these: + + int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); + int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + + where the callback is: + void stbi_write_func(void *context, void *data, int size); + + You can configure it with these global variables: + int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE + int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression + int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode + + + You can define STBI_WRITE_NO_STDIO to disable the file variant of these + functions, so the library will not use stdio.h at all. However, this will + also disable HDR writing, because it requires stdio for formatted output. + + Each function returns 0 on failure and non-0 on success. + + The functions create an image file defined by the parameters. The image + is a rectangle of pixels stored from left-to-right, top-to-bottom. + Each pixel contains 'comp' channels of data stored interleaved with 8-bits + per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is + monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall. + The *data pointer points to the first byte of the top-left-most pixel. + For PNG, "stride_in_bytes" is the distance in bytes from the first byte of + a row of pixels to the first byte of the next row of pixels. + + PNG creates output files with the same number of components as the input. + The BMP format expands Y to RGB in the file format and does not + output alpha. + + PNG supports writing rectangles of data even when the bytes storing rows of + data are not consecutive in memory (e.g. sub-rectangles of a larger image), + by supplying the stride between the beginning of adjacent rows. The other + formats do not. (Thus you cannot write a native-format BMP through the BMP + writer, both because it is in BGR order and because it may have padding + at the end of the line.) + + PNG allows you to set the deflate compression level by setting the global + variable 'stbi_write_png_compression_level' (it defaults to 8). + + HDR expects linear float data. Since the format is always 32-bit rgb(e) + data, alpha (if provided) is discarded, and for monochrome data it is + replicated across all three channels. + + TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed + data, set the global variable 'stbi_write_tga_with_rle' to 0. + + JPEG does ignore alpha channels in input data; quality is between 1 and 100. + Higher quality looks better but results in a bigger image. + JPEG baseline (no JPEG progressive). + +CREDITS: + + + Sean Barrett - PNG/BMP/TGA + Baldur Karlsson - HDR + Jean-Sebastien Guay - TGA monochrome + Tim Kelsey - misc enhancements + Alan Hickman - TGA RLE + Emmanuel Julien - initial file IO callback implementation + Jon Olick - original jo_jpeg.cpp code + Daniel Gibson - integrate JPEG, allow external zlib + Aarni Koskela - allow choosing PNG filter + + bugfixes: + github:Chribba + Guillaume Chereau + github:jry2 + github:romigrou + Sergio Gonzalez + Jonas Karlsson + Filip Wasil + Thatcher Ulrich + github:poppolopoppo + Patrick Boettcher + github:xeekworx + Cap Petschulat + Simon Rodriguez + Ivan Tikhonov + github:ignotion + Adam Schackart + +LICENSE + + See end of file for license information. + +*/ + +#ifndef INCLUDE_STB_IMAGE_WRITE_H +#define INCLUDE_STB_IMAGE_WRITE_H + +// if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline' or 'static inline' +#ifndef STBIWDEF +#ifdef STB_IMAGE_WRITE_STATIC +#define STBIWDEF static +#else +#ifdef __cplusplus +#define STBIWDEF extern "C" +#else +#define STBIWDEF extern +#endif +#endif +#endif + +#ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations +extern int stbi_write_tga_with_rle; +extern int stbi_write_png_compression_level; +extern int stbi_write_force_png_filter; +#endif + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality); +#endif + +typedef void stbi_write_func(void *context, void *data, int size); + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + +STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); + +#endif//INCLUDE_STB_IMAGE_WRITE_H + +#ifdef STB_IMAGE_WRITE_IMPLEMENTATION + +#ifdef _WIN32 + #ifndef _CRT_SECURE_NO_WARNINGS + #define _CRT_SECURE_NO_WARNINGS + #endif + #ifndef _CRT_NONSTDC_NO_DEPRECATE + #define _CRT_NONSTDC_NO_DEPRECATE + #endif +#endif + +#ifndef STBI_WRITE_NO_STDIO +#include +#endif // STBI_WRITE_NO_STDIO + +#include +#include +#include +#include + +#if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED)) +// ok +#elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)." +#endif + +#ifndef STBIW_MALLOC +#define STBIW_MALLOC(sz) malloc(sz) +#define STBIW_REALLOC(p,newsz) realloc(p,newsz) +#define STBIW_FREE(p) free(p) +#endif + +#ifndef STBIW_REALLOC_SIZED +#define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz) +#endif + + +#ifndef STBIW_MEMMOVE +#define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz) +#endif + + +#ifndef STBIW_ASSERT +#include +#define STBIW_ASSERT(x) assert(x) +#endif + +#define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff) + +#ifdef STB_IMAGE_WRITE_STATIC +static int stbi__flip_vertically_on_write=0; +static int stbi_write_png_compression_level = 8; +static int stbi_write_tga_with_rle = 1; +static int stbi_write_force_png_filter = -1; +#else +int stbi_write_png_compression_level = 8; +int stbi__flip_vertically_on_write=0; +int stbi_write_tga_with_rle = 1; +int stbi_write_force_png_filter = -1; +#endif + +STBIWDEF void stbi_flip_vertically_on_write(int flag) +{ + stbi__flip_vertically_on_write = flag; +} + +typedef struct +{ + stbi_write_func *func; + void *context; +} stbi__write_context; + +// initialize a callback-based context +static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context) +{ + s->func = c; + s->context = context; +} + +#ifndef STBI_WRITE_NO_STDIO + +static void stbi__stdio_write(void *context, void *data, int size) +{ + fwrite(data,1,size,(FILE*) context); +} + +static int stbi__start_write_file(stbi__write_context *s, const char *filename) +{ + FILE *f; +#ifdef STBI_MSC_SECURE_CRT + if (fopen_s(&f, filename, "wb")) + f = NULL; +#else + f = fopen(filename, "wb"); +#endif + stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f); + return f != NULL; +} + +static void stbi__end_write_file(stbi__write_context *s) +{ + fclose((FILE *)s->context); +} + +#endif // !STBI_WRITE_NO_STDIO + +typedef unsigned int stbiw_uint32; +typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1]; + +static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) +{ + while (*fmt) { + switch (*fmt++) { + case ' ': break; + case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int)); + s->func(s->context,&x,1); + break; } + case '2': { int x = va_arg(v,int); + unsigned char b[2]; + b[0] = STBIW_UCHAR(x); + b[1] = STBIW_UCHAR(x>>8); + s->func(s->context,b,2); + break; } + case '4': { stbiw_uint32 x = va_arg(v,int); + unsigned char b[4]; + b[0]=STBIW_UCHAR(x); + b[1]=STBIW_UCHAR(x>>8); + b[2]=STBIW_UCHAR(x>>16); + b[3]=STBIW_UCHAR(x>>24); + s->func(s->context,b,4); + break; } + default: + STBIW_ASSERT(0); + return; + } + } +} + +static void stbiw__writef(stbi__write_context *s, const char *fmt, ...) +{ + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); +} + +static void stbiw__putc(stbi__write_context *s, unsigned char c) +{ + s->func(s->context, &c, 1); +} + +static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c) +{ + unsigned char arr[3]; + arr[0] = a, arr[1] = b, arr[2] = c; + s->func(s->context, arr, 3); +} + +static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d) +{ + unsigned char bg[3] = { 255, 0, 255}, px[3]; + int k; + + if (write_alpha < 0) + s->func(s->context, &d[comp - 1], 1); + + switch (comp) { + case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case + case 1: + if (expand_mono) + stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp + else + s->func(s->context, d, 1); // monochrome TGA + break; + case 4: + if (!write_alpha) { + // composite against pink background + for (k = 0; k < 3; ++k) + px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255; + stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]); + break; + } + /* FALLTHROUGH */ + case 3: + stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); + break; + } + if (write_alpha > 0) + s->func(s->context, &d[comp - 1], 1); +} + +static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) +{ + stbiw_uint32 zero = 0; + int i,j, j_end; + + if (y <= 0) + return; + + if (stbi__flip_vertically_on_write) + vdir *= -1; + + if (vdir < 0) + j_end = -1, j = y-1; + else + j_end = y, j = 0; + + for (; j != j_end; j += vdir) { + for (i=0; i < x; ++i) { + unsigned char *d = (unsigned char *) data + (j*x+i)*comp; + stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); + } + s->func(s->context, &zero, scanline_pad); + } +} + +static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...) +{ + if (y < 0 || x < 0) { + return 0; + } else { + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); + stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono); + return 1; + } +} + +static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data) +{ + int pad = (-x*3) & 3; + return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad, + "11 4 22 4" "4 44 22 444444", + 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header + 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header +} + +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_bmp_core(&s, x, y, comp, data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_bmp_core(&s, x, y, comp, data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif //!STBI_WRITE_NO_STDIO + +static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data) +{ + int has_alpha = (comp == 2 || comp == 4); + int colorbytes = has_alpha ? comp-1 : comp; + int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3 + + if (y < 0 || x < 0) + return 0; + + if (!stbi_write_tga_with_rle) { + return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0, + "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); + } else { + int i,j,k; + int jend, jdir; + + stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8); + + if (stbi__flip_vertically_on_write) { + j = 0; + jend = y; + jdir = 1; + } else { + j = y-1; + jend = -1; + jdir = -1; + } + for (; j != jend; j += jdir) { + unsigned char *row = (unsigned char *) data + j * x * comp; + int len; + + for (i = 0; i < x; i += len) { + unsigned char *begin = row + i * comp; + int diff = 1; + len = 1; + + if (i < x - 1) { + ++len; + diff = memcmp(begin, row + (i + 1) * comp, comp); + if (diff) { + const unsigned char *prev = begin; + for (k = i + 2; k < x && len < 128; ++k) { + if (memcmp(prev, row + k * comp, comp)) { + prev += comp; + ++len; + } else { + --len; + break; + } + } + } else { + for (k = i + 2; k < x && len < 128; ++k) { + if (!memcmp(begin, row + k * comp, comp)) { + ++len; + } else { + break; + } + } + } + } + + if (diff) { + unsigned char header = STBIW_UCHAR(len - 1); + s->func(s->context, &header, 1); + for (k = 0; k < len; ++k) { + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); + } + } else { + unsigned char header = STBIW_UCHAR(len - 129); + s->func(s->context, &header, 1); + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); + } + } + } + } + return 1; +} + +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_tga_core(&s, x, y, comp, (void *) data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_tga_core(&s, x, y, comp, (void *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR writer +// by Baldur Karlsson + +#define stbiw__max(a, b) ((a) > (b) ? (a) : (b)) + +void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear) +{ + int exponent; + float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2])); + + if (maxcomp < 1e-32f) { + rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; + } else { + float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp; + + rgbe[0] = (unsigned char)(linear[0] * normalize); + rgbe[1] = (unsigned char)(linear[1] * normalize); + rgbe[2] = (unsigned char)(linear[2] * normalize); + rgbe[3] = (unsigned char)(exponent + 128); + } +} + +void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte) +{ + unsigned char lengthbyte = STBIW_UCHAR(length+128); + STBIW_ASSERT(length+128 <= 255); + s->func(s->context, &lengthbyte, 1); + s->func(s->context, &databyte, 1); +} + +void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data) +{ + unsigned char lengthbyte = STBIW_UCHAR(length); + STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code + s->func(s->context, &lengthbyte, 1); + s->func(s->context, data, length); +} + +void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline) +{ + unsigned char scanlineheader[4] = { 2, 2, 0, 0 }; + unsigned char rgbe[4]; + float linear[3]; + int x; + + scanlineheader[2] = (width&0xff00)>>8; + scanlineheader[3] = (width&0x00ff); + + /* skip RLE for images too small or large */ + if (width < 8 || width >= 32768) { + for (x=0; x < width; x++) { + switch (ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + s->func(s->context, rgbe, 4); + } + } else { + int c,r; + /* encode into scratch buffer */ + for (x=0; x < width; x++) { + switch(ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + scratch[x + width*0] = rgbe[0]; + scratch[x + width*1] = rgbe[1]; + scratch[x + width*2] = rgbe[2]; + scratch[x + width*3] = rgbe[3]; + } + + s->func(s->context, scanlineheader, 4); + + /* RLE each component separately */ + for (c=0; c < 4; c++) { + unsigned char *comp = &scratch[width*c]; + + x = 0; + while (x < width) { + // find first run + r = x; + while (r+2 < width) { + if (comp[r] == comp[r+1] && comp[r] == comp[r+2]) + break; + ++r; + } + if (r+2 >= width) + r = width; + // dump up to first run + while (x < r) { + int len = r-x; + if (len > 128) len = 128; + stbiw__write_dump_data(s, len, &comp[x]); + x += len; + } + // if there's a run, output it + if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd + // find next byte after run + while (r < width && comp[r] == comp[x]) + ++r; + // output run up to r + while (x < r) { + int len = r-x; + if (len > 127) len = 127; + stbiw__write_run_data(s, len, comp[x]); + x += len; + } + } + } + } + } +} + +static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data) +{ + if (y <= 0 || x <= 0 || data == NULL) + return 0; + else { + // Each component is stored separately. Allocate scratch space for full output scanline. + unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4); + int i, len; + char buffer[128]; + char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"; + s->func(s->context, header, sizeof(header)-1); + +#ifdef STBI_MSC_SECURE_CRT + len = sprintf_s(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#else + len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#endif + s->func(s->context, buffer, len); + + for(i=0; i < y; i++) + stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i)*x); + STBIW_FREE(scratch); + return 1; + } +} + +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_hdr_core(&s, x, y, comp, (float *) data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif // STBI_WRITE_NO_STDIO + + +////////////////////////////////////////////////////////////////////////////// +// +// PNG writer +// + +#ifndef STBIW_ZLIB_COMPRESS +// stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size() +#define stbiw__sbraw(a) ((int *) (a) - 2) +#define stbiw__sbm(a) stbiw__sbraw(a)[0] +#define stbiw__sbn(a) stbiw__sbraw(a)[1] + +#define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) +#define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0) +#define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) + +#define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) +#define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) +#define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) + +static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) +{ + int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1; + void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2); + STBIW_ASSERT(p); + if (p) { + if (!*arr) ((int *) p)[1] = 0; + *arr = (void *) ((int *) p + 2); + stbiw__sbm(*arr) = m; + } + return *arr; +} + +static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount) +{ + while (*bitcount >= 8) { + stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer)); + *bitbuffer >>= 8; + *bitcount -= 8; + } + return data; +} + +static int stbiw__zlib_bitrev(int code, int codebits) +{ + int res=0; + while (codebits--) { + res = (res << 1) | (code & 1); + code >>= 1; + } + return res; +} + +static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit) +{ + int i; + for (i=0; i < limit && i < 258; ++i) + if (a[i] != b[i]) break; + return i; +} + +static unsigned int stbiw__zhash(unsigned char *data) +{ + stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16); + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + return hash; +} + +#define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount)) +#define stbiw__zlib_add(code,codebits) \ + (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) +#define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) +// default huffman tables +#define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8) +#define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9) +#define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7) +#define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8) +#define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) +#define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n)) + +#define stbiw__ZHASH 16384 + +#endif // STBIW_ZLIB_COMPRESS + +unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality) +{ +#ifdef STBIW_ZLIB_COMPRESS + // user provided a zlib compress implementation, use that + return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality); +#else // use builtin + static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 }; + static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; + static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 }; + static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; + unsigned int bitbuf=0; + int i,j, bitcount=0; + unsigned char *out = NULL; + unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(char**)); + if (hash_table == NULL) + return NULL; + if (quality < 5) quality = 5; + + stbiw__sbpush(out, 0x78); // DEFLATE 32K window + stbiw__sbpush(out, 0x5e); // FLEVEL = 1 + stbiw__zlib_add(1,1); // BFINAL = 1 + stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman + + for (i=0; i < stbiw__ZHASH; ++i) + hash_table[i] = NULL; + + i=0; + while (i < data_len-3) { + // hash next 3 bytes of data to be compressed + int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3; + unsigned char *bestloc = 0; + unsigned char **hlist = hash_table[h]; + int n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32768) { // if entry lies within window + int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i); + if (d >= best) best=d,bestloc=hlist[j]; + } + } + // when hash table entry is too long, delete half the entries + if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) { + STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality); + stbiw__sbn(hash_table[h]) = quality; + } + stbiw__sbpush(hash_table[h],data+i); + + if (bestloc) { + // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal + h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1); + hlist = hash_table[h]; + n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32767) { + int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1); + if (e > best) { // if next match is better, bail on current match + bestloc = NULL; + break; + } + } + } + } + + if (bestloc) { + int d = (int) (data+i - bestloc); // distance back + STBIW_ASSERT(d <= 32767 && best <= 258); + for (j=0; best > lengthc[j+1]-1; ++j); + stbiw__zlib_huff(j+257); + if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); + for (j=0; d > distc[j+1]-1; ++j); + stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5); + if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]); + i += best; + } else { + stbiw__zlib_huffb(data[i]); + ++i; + } + } + // write out final bytes + for (;i < data_len; ++i) + stbiw__zlib_huffb(data[i]); + stbiw__zlib_huff(256); // end of block + // pad with 0 bits to byte boundary + while (bitcount) + stbiw__zlib_add(0,1); + + for (i=0; i < stbiw__ZHASH; ++i) + (void) stbiw__sbfree(hash_table[i]); + STBIW_FREE(hash_table); + + { + // compute adler32 on input + unsigned int s1=1, s2=0; + int blocklen = (int) (data_len % 5552); + j=0; + while (j < data_len) { + for (i=0; i < blocklen; ++i) s1 += data[j+i], s2 += s1; + s1 %= 65521, s2 %= 65521; + j += blocklen; + blocklen = 5552; + } + stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s2)); + stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s1)); + } + *out_len = stbiw__sbn(out); + // make returned pointer freeable + STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len); + return (unsigned char *) stbiw__sbraw(out); +#endif // STBIW_ZLIB_COMPRESS +} + +static unsigned int stbiw__crc32(unsigned char *buffer, int len) +{ + static unsigned int crc_table[256] = + { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + }; + + unsigned int crc = ~0u; + int i; + for (i=0; i < len; ++i) + crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; + return ~crc; +} + +#define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4) +#define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v)); +#define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3]) + +static void stbiw__wpcrc(unsigned char **data, int len) +{ + unsigned int crc = stbiw__crc32(*data - len - 4, len+4); + stbiw__wp32(*data, crc); +} + +static unsigned char stbiw__paeth(int a, int b, int c) +{ + int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c); + if (pa <= pb && pa <= pc) return STBIW_UCHAR(a); + if (pb <= pc) return STBIW_UCHAR(b); + return STBIW_UCHAR(c); +} + +// @OPTIMIZE: provide an option that always forces left-predict or paeth predict +static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer) +{ + static int mapping[] = { 0,1,2,3,4 }; + static int firstmap[] = { 0,1,0,5,6 }; + int *mymap = (y != 0) ? mapping : firstmap; + int i; + int type = mymap[filter_type]; + unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y); + int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes; + for (i = 0; i < n; ++i) { + switch (type) { + case 0: line_buffer[i] = z[i]; break; + case 1: line_buffer[i] = z[i]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break; + case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break; + case 5: line_buffer[i] = z[i]; break; + case 6: line_buffer[i] = z[i]; break; + } + } + for (i=n; i < width*n; ++i) { + switch (type) { + case 0: line_buffer[i] = z[i]; break; + case 1: line_buffer[i] = z[i] - z[i-n]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break; + case 4: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break; + case 5: line_buffer[i] = z[i] - (z[i-n]>>1); break; + case 6: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break; + } + } +} + +unsigned char *stbi_write_png_to_mem(unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len) +{ + int force_filter = stbi_write_force_png_filter; + int ctype[5] = { -1, 0, 4, 2, 6 }; + unsigned char sig[8] = { 137,80,78,71,13,10,26,10 }; + unsigned char *out,*o, *filt, *zlib; + signed char *line_buffer; + int j,zlen; + + if (stride_bytes == 0) + stride_bytes = x * n; + + if (force_filter >= 5) { + force_filter = -1; + } + + filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0; + line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; } + for (j=0; j < y; ++j) { + int filter_type; + if (force_filter > -1) { + filter_type = force_filter; + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, force_filter, line_buffer); + } else { // Estimate the best filter by running through all of them: + int best_filter = 0, best_filter_val = 0x7fffffff, est, i; + for (filter_type = 0; filter_type < 5; filter_type++) { + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, filter_type, line_buffer); + + // Estimate the entropy of the line using this filter; the less, the better. + est = 0; + for (i = 0; i < x*n; ++i) { + est += abs((signed char) line_buffer[i]); + } + if (est < best_filter_val) { + best_filter_val = est; + best_filter = filter_type; + } + } + if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, best_filter, line_buffer); + filter_type = best_filter; + } + } + // when we get here, filter_type contains the filter type, and line_buffer contains the data + filt[j*(x*n+1)] = (unsigned char) filter_type; + STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); + } + STBIW_FREE(line_buffer); + zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level); + STBIW_FREE(filt); + if (!zlib) return 0; + + // each tag requires 12 bytes of overhead + out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12); + if (!out) return 0; + *out_len = 8 + 12+13 + 12+zlen + 12; + + o=out; + STBIW_MEMMOVE(o,sig,8); o+= 8; + stbiw__wp32(o, 13); // header length + stbiw__wptag(o, "IHDR"); + stbiw__wp32(o, x); + stbiw__wp32(o, y); + *o++ = 8; + *o++ = STBIW_UCHAR(ctype[n]); + *o++ = 0; + *o++ = 0; + *o++ = 0; + stbiw__wpcrc(&o,13); + + stbiw__wp32(o, zlen); + stbiw__wptag(o, "IDAT"); + STBIW_MEMMOVE(o, zlib, zlen); + o += zlen; + STBIW_FREE(zlib); + stbiw__wpcrc(&o, zlen); + + stbiw__wp32(o,0); + stbiw__wptag(o, "IEND"); + stbiw__wpcrc(&o,0); + + STBIW_ASSERT(o == out + *out_len); + + return out; +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes) +{ + FILE *f; + int len; + unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; +#ifdef STBI_MSC_SECURE_CRT + if (fopen_s(&f, filename, "wb")) + f = NULL; +#else + f = fopen(filename, "wb"); +#endif + if (!f) { STBIW_FREE(png); return 0; } + fwrite(png, 1, len, f); + fclose(f); + STBIW_FREE(png); + return 1; +} +#endif + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes) +{ + int len; + unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; + func(context, png, len); + STBIW_FREE(png); + return 1; +} + + +/* *************************************************************************** + * + * JPEG writer + * + * This is based on Jon Olick's jo_jpeg.cpp: + * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html + */ + +static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18, + 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; + +static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) { + int bitBuf = *bitBufP, bitCnt = *bitCntP; + bitCnt += bs[1]; + bitBuf |= bs[0] << (24 - bitCnt); + while(bitCnt >= 8) { + unsigned char c = (bitBuf >> 16) & 255; + stbiw__putc(s, c); + if(c == 255) { + stbiw__putc(s, 0); + } + bitBuf <<= 8; + bitCnt -= 8; + } + *bitBufP = bitBuf; + *bitCntP = bitCnt; +} + +static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) { + float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p; + float z1, z2, z3, z4, z5, z11, z13; + + float tmp0 = d0 + d7; + float tmp7 = d0 - d7; + float tmp1 = d1 + d6; + float tmp6 = d1 - d6; + float tmp2 = d2 + d5; + float tmp5 = d2 - d5; + float tmp3 = d3 + d4; + float tmp4 = d3 - d4; + + // Even part + float tmp10 = tmp0 + tmp3; // phase 2 + float tmp13 = tmp0 - tmp3; + float tmp11 = tmp1 + tmp2; + float tmp12 = tmp1 - tmp2; + + d0 = tmp10 + tmp11; // phase 3 + d4 = tmp10 - tmp11; + + z1 = (tmp12 + tmp13) * 0.707106781f; // c4 + d2 = tmp13 + z1; // phase 5 + d6 = tmp13 - z1; + + // Odd part + tmp10 = tmp4 + tmp5; // phase 2 + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + // The rotator is modified from fig 4-8 to avoid extra negations. + z5 = (tmp10 - tmp12) * 0.382683433f; // c6 + z2 = tmp10 * 0.541196100f + z5; // c2-c6 + z4 = tmp12 * 1.306562965f + z5; // c2+c6 + z3 = tmp11 * 0.707106781f; // c4 + + z11 = tmp7 + z3; // phase 5 + z13 = tmp7 - z3; + + *d5p = z13 + z2; // phase 6 + *d3p = z13 - z2; + *d1p = z11 + z4; + *d7p = z11 - z4; + + *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6; +} + +static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) { + int tmp1 = val < 0 ? -val : val; + val = val < 0 ? val-1 : val; + bits[1] = 1; + while(tmp1 >>= 1) { + ++bits[1]; + } + bits[0] = val & ((1<0)&&(DU[end0pos]==0); --end0pos) { + } + // end0pos = first element in reverse order !=0 + if(end0pos == 0) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + return DU[0]; + } + for(i = 1; i <= end0pos; ++i) { + int startpos = i; + int nrzeroes; + unsigned short bits[2]; + for (; DU[i]==0 && i<=end0pos; ++i) { + } + nrzeroes = i-startpos; + if ( nrzeroes >= 16 ) { + int lng = nrzeroes>>4; + int nrmarker; + for (nrmarker=1; nrmarker <= lng; ++nrmarker) + stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes); + nrzeroes &= 15; + } + stbiw__jpg_calcBits(DU[i], bits); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); + } + if(end0pos != 63) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + } + return DU[0]; +} + +static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) { + // Constants that don't pollute global namespace + static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0}; + static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d}; + static const unsigned char std_ac_luminance_values[] = { + 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08, + 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28, + 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59, + 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, + 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6, + 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2, + 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0}; + static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77}; + static const unsigned char std_ac_chrominance_values[] = { + 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, + 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26, + 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58, + 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, + 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4, + 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda, + 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + // Huffman tables + static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}}; + static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}}; + static const unsigned short YAC_HT[256][2] = { + {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const unsigned short UVAC_HT[256][2] = { + {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22, + 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99}; + static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99, + 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99}; + static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, + 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; + + int row, col, i, k; + float fdtbl_Y[64], fdtbl_UV[64]; + unsigned char YTable[64], UVTable[64]; + + if(!data || !width || !height || comp > 4 || comp < 1) { + return 0; + } + + quality = quality ? quality : 90; + quality = quality < 1 ? 1 : quality > 100 ? 100 : quality; + quality = quality < 50 ? 5000 / quality : 200 - quality * 2; + + for(i = 0; i < 64; ++i) { + int uvti, yti = (YQT[i]*quality+50)/100; + YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti); + uvti = (UVQT[i]*quality+50)/100; + UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); + } + + for(row = 0, k = 0; row < 8; ++row) { + for(col = 0; col < 8; ++col, ++k) { + fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + } + } + + // Write Headers + { + static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 }; + static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 }; + const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width), + 3,1,0x11,0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 }; + s->func(s->context, (void*)head0, sizeof(head0)); + s->func(s->context, (void*)YTable, sizeof(YTable)); + stbiw__putc(s, 1); + s->func(s->context, UVTable, sizeof(UVTable)); + s->func(s->context, (void*)head1, sizeof(head1)); + s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1); + s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values)); + stbiw__putc(s, 0x10); // HTYACinfo + s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1); + s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values)); + stbiw__putc(s, 1); // HTUDCinfo + s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values)); + stbiw__putc(s, 0x11); // HTUACinfo + s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values)); + s->func(s->context, (void*)head2, sizeof(head2)); + } + + // Encode 8x8 macroblocks + { + static const unsigned short fillBits[] = {0x7F, 7}; + const unsigned char *imageData = (const unsigned char *)data; + int DCY=0, DCU=0, DCV=0; + int bitBuf=0, bitCnt=0; + // comp == 2 is grey+alpha (alpha is ignored) + int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; + int x, y, pos; + for(y = 0; y < height; y += 8) { + for(x = 0; x < width; x += 8) { + float YDU[64], UDU[64], VDU[64]; + for(row = y, pos = 0; row < y+8; ++row) { + for(col = x; col < x+8; ++col, ++pos) { + int p = (stbi__flip_vertically_on_write ? height-1-row : row)*width*comp + col*comp; + float r, g, b; + if(row >= height) { + p -= width*comp*(row+1 - height); + } + if(col >= width) { + p -= comp*(col+1 - width); + } + + r = imageData[p+0]; + g = imageData[p+ofsG]; + b = imageData[p+ofsB]; + YDU[pos]=+0.29900f*r+0.58700f*g+0.11400f*b-128; + UDU[pos]=-0.16874f*r-0.33126f*g+0.50000f*b; + VDU[pos]=+0.50000f*r-0.41869f*g-0.08131f*b; + } + } + + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, YDU, fdtbl_Y, DCY, YDC_HT, YAC_HT); + DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, UDU, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); + DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, VDU, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); + } + } + + // Do the bit alignment of the EOI marker + stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits); + } + + // EOI + stbiw__putc(s, 0xFF); + stbiw__putc(s, 0xD9); + + return 1; +} + +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality); +} + + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +#endif // STB_IMAGE_WRITE_IMPLEMENTATION + +/* Revision history + 1.09 (2018-02-11) + fix typo in zlib quality API, improve STB_I_W_STATIC in C++ + 1.08 (2018-01-29) + add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter + 1.07 (2017-07-24) + doc fix + 1.06 (2017-07-23) + writing JPEG (using Jon Olick's code) + 1.05 ??? + 1.04 (2017-03-03) + monochrome BMP expansion + 1.03 ??? + 1.02 (2016-04-02) + avoid allocating large structures on the stack + 1.01 (2016-01-16) + STBIW_REALLOC_SIZED: support allocators with no realloc support + avoid race-condition in crc initialization + minor compile issues + 1.00 (2015-09-14) + installable file IO function + 0.99 (2015-09-13) + warning fixes; TGA rle support + 0.98 (2015-04-08) + added STBIW_MALLOC, STBIW_ASSERT etc + 0.97 (2015-01-18) + fixed HDR asserts, rewrote HDR rle logic + 0.96 (2015-01-17) + add HDR output + fix monochrome BMP + 0.95 (2014-08-17) + add monochrome TGA output + 0.94 (2014-05-31) + rename private functions to avoid conflicts with stb_image.h + 0.93 (2014-05-27) + warning fixes + 0.92 (2010-08-01) + casts to unsigned char to fix warnings + 0.91 (2010-07-17) + first public release + 0.90 first internal release +*/ + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/workloads/realworld/async/darknet/src/tree.c b/workloads/realworld/async/darknet/src/tree.c new file mode 100644 index 0000000000000000000000000000000000000000..67b6d431f6f7e92ede234c71ecae9bd9146dc71f --- /dev/null +++ b/workloads/realworld/async/darknet/src/tree.c @@ -0,0 +1,139 @@ +#include +#include +#include "tree.h" +#include "utils.h" +#include "data.h" + +void change_leaves(tree *t, char *leaf_list) +{ + list *llist = get_paths(leaf_list); + char **leaves = (char **)list_to_array(llist); + int n = llist->size; + int i,j; + int found = 0; + for(i = 0; i < t->n; ++i){ + t->leaf[i] = 0; + for(j = 0; j < n; ++j){ + if (0==strcmp(t->name[i], leaves[j])){ + t->leaf[i] = 1; + ++found; + break; + } + } + } + fprintf(stderr, "Found %d leaves.\n", found); +} + +float get_hierarchy_probability(float *x, tree *hier, int c, int stride) +{ + float p = 1; + while(c >= 0){ + p = p * x[c*stride]; + c = hier->parent[c]; + } + return p; +} + +void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride) +{ + int j; + for(j = 0; j < n; ++j){ + int parent = hier->parent[j]; + if(parent >= 0){ + predictions[j*stride] *= predictions[parent*stride]; + } + } + if(only_leaves){ + for(j = 0; j < n; ++j){ + if(!hier->leaf[j]) predictions[j*stride] = 0; + } + } +} + +int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride) +{ + float p = 1; + int group = 0; + int i; + while(1){ + float max = 0; + int max_i = 0; + + for(i = 0; i < hier->group_size[group]; ++i){ + int index = i + hier->group_offset[group]; + float val = predictions[(i + hier->group_offset[group])*stride]; + if(val > max){ + max_i = index; + max = val; + } + } + if(p*max > thresh){ + p = p*max; + group = hier->child[max_i]; + if(hier->child[max_i] < 0) return max_i; + } else if (group == 0){ + return max_i; + } else { + return hier->parent[hier->group_offset[group]]; + } + } + return 0; +} + +tree *read_tree(char *filename) +{ + tree t = {0}; + FILE *fp = fopen(filename, "r"); + + char *line; + int last_parent = -1; + int group_size = 0; + int groups = 0; + int n = 0; + while((line=fgetl(fp)) != 0){ + char *id = calloc(256, sizeof(char)); + int parent = -1; + sscanf(line, "%s %d", id, &parent); + t.parent = realloc(t.parent, (n+1)*sizeof(int)); + t.parent[n] = parent; + + t.child = realloc(t.child, (n+1)*sizeof(int)); + t.child[n] = -1; + + t.name = realloc(t.name, (n+1)*sizeof(char *)); + t.name[n] = id; + if(parent != last_parent){ + ++groups; + t.group_offset = realloc(t.group_offset, groups * sizeof(int)); + t.group_offset[groups - 1] = n - group_size; + t.group_size = realloc(t.group_size, groups * sizeof(int)); + t.group_size[groups - 1] = group_size; + group_size = 0; + last_parent = parent; + } + t.group = realloc(t.group, (n+1)*sizeof(int)); + t.group[n] = groups; + if (parent >= 0) { + t.child[parent] = groups; + } + ++n; + ++group_size; + } + ++groups; + t.group_offset = realloc(t.group_offset, groups * sizeof(int)); + t.group_offset[groups - 1] = n - group_size; + t.group_size = realloc(t.group_size, groups * sizeof(int)); + t.group_size[groups - 1] = group_size; + t.n = n; + t.groups = groups; + t.leaf = calloc(n, sizeof(int)); + int i; + for(i = 0; i < n; ++i) t.leaf[i] = 1; + for(i = 0; i < n; ++i) if(t.parent[i] >= 0) t.leaf[t.parent[i]] = 0; + + fclose(fp); + tree *tree_ptr = calloc(1, sizeof(tree)); + *tree_ptr = t; + //error(0); + return tree_ptr; +} diff --git a/workloads/realworld/async/darknet/src/tree.h b/workloads/realworld/async/darknet/src/tree.h new file mode 100644 index 0000000000000000000000000000000000000000..3802b8ead806266edd291de5407b08c2d7ed5dd1 --- /dev/null +++ b/workloads/realworld/async/darknet/src/tree.h @@ -0,0 +1,8 @@ +#ifndef TREE_H +#define TREE_H +#include "darknet.h" + +int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride); +float get_hierarchy_probability(float *x, tree *hier, int c, int stride); + +#endif diff --git a/workloads/realworld/async/darknet/src/upsample_layer.c b/workloads/realworld/async/darknet/src/upsample_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..83f32ea5f41b4c787c38e5324e3e7dd4909ca928 --- /dev/null +++ b/workloads/realworld/async/darknet/src/upsample_layer.c @@ -0,0 +1,106 @@ +#include "upsample_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + +layer make_upsample_layer(int batch, int w, int h, int c, int stride) +{ + layer l = {0}; + l.type = UPSAMPLE; + l.batch = batch; + l.w = w; + l.h = h; + l.c = c; + l.out_w = w*stride; + l.out_h = h*stride; + l.out_c = c; + if(stride < 0){ + stride = -stride; + l.reverse=1; + l.out_w = w/stride; + l.out_h = h/stride; + } + l.stride = stride; + l.outputs = l.out_w*l.out_h*l.out_c; + l.inputs = l.w*l.h*l.c; + l.delta = calloc(l.outputs*batch, sizeof(float)); + l.output = calloc(l.outputs*batch, sizeof(float));; + + l.forward = forward_upsample_layer; + l.backward = backward_upsample_layer; + #ifdef GPU + l.forward_gpu = forward_upsample_layer_gpu; + l.backward_gpu = backward_upsample_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + #endif + if(l.reverse) fprintf(stderr, "downsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + else fprintf(stderr, "upsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + return l; +} + +void resize_upsample_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + l->out_w = w*l->stride; + l->out_h = h*l->stride; + if(l->reverse){ + l->out_w = w/l->stride; + l->out_h = h/l->stride; + } + l->outputs = l->out_w*l->out_h*l->out_c; + l->inputs = l->h*l->w*l->c; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + +void forward_upsample_layer(const layer l, network net) +{ + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + if(l.reverse){ + upsample_cpu(l.output, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input); + }else{ + upsample_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output); + } +} + +void backward_upsample_layer(const layer l, network net) +{ + if(l.reverse){ + upsample_cpu(l.delta, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, net.delta); + }else{ + upsample_cpu(net.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta); + } +} + +#ifdef GPU +void forward_upsample_layer_gpu(const layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + if(l.reverse){ + upsample_gpu(l.output_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input_gpu); + }else{ + upsample_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output_gpu); + } +} + +void backward_upsample_layer_gpu(const layer l, network net) +{ + if(l.reverse){ + upsample_gpu(l.delta_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, net.delta_gpu); + }else{ + upsample_gpu(net.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta_gpu); + } +} +#endif diff --git a/workloads/realworld/async/darknet/src/upsample_layer.h b/workloads/realworld/async/darknet/src/upsample_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..86790d1088354ea9c46a4b20fbe1dacf36925ca8 --- /dev/null +++ b/workloads/realworld/async/darknet/src/upsample_layer.h @@ -0,0 +1,15 @@ +#ifndef UPSAMPLE_LAYER_H +#define UPSAMPLE_LAYER_H +#include "darknet.h" + +layer make_upsample_layer(int batch, int w, int h, int c, int stride); +void forward_upsample_layer(const layer l, network net); +void backward_upsample_layer(const layer l, network net); +void resize_upsample_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_upsample_layer_gpu(const layer l, network net); +void backward_upsample_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/async/darknet/src/utils.c b/workloads/realworld/async/darknet/src/utils.c new file mode 100644 index 0000000000000000000000000000000000000000..626b4678c1e2779552ed9d34f19ce4b0f57d9ded --- /dev/null +++ b/workloads/realworld/async/darknet/src/utils.c @@ -0,0 +1,726 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + + +/* +// old timing. is it better? who knows!! +double get_wall_time() +{ + struct timeval time; + if (gettimeofday(&time,NULL)){ + return 0; + } + return (double)time.tv_sec + (double)time.tv_usec * .000001; +} +*/ + +double what_time_is_it_now() +{ + struct timeval time; + if (gettimeofday(&time,NULL)){ + return 0; + } + return (double)time.tv_sec + (double)time.tv_usec * .000001; +} + +int *read_intlist(char *gpu_list, int *ngpus, int d) +{ + int *gpus = 0; + if(gpu_list){ + int len = strlen(gpu_list); + *ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++*ngpus; + } + gpus = calloc(*ngpus, sizeof(int)); + for(i = 0; i < *ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpus = calloc(1, sizeof(float)); + *gpus = d; + *ngpus = 1; + } + return gpus; +} + +int *read_map(char *filename) +{ + int n = 0; + int *map = 0; + char *str; + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + while((str=fgetl(file))){ + ++n; + map = realloc(map, n*sizeof(int)); + map[n-1] = atoi(str); + } + return map; +} + +void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections) +{ + size_t i; + for(i = 0; i < sections; ++i){ + size_t start = n*i/sections; + size_t end = n*(i+1)/sections; + size_t num = end-start; + shuffle(arr+(start*size), num, size); + } +} + +void shuffle(void *arr, size_t n, size_t size) +{ + size_t i; + void *swp = calloc(1, size); + for(i = 0; i < n-1; ++i){ + size_t j = i + rand()/(RAND_MAX / (n-i)+1); + memcpy(swp, arr+(j*size), size); + memcpy(arr+(j*size), arr+(i*size), size); + memcpy(arr+(i*size), swp, size); + } +} + +int *random_index_order(int min, int max) +{ + int *inds = calloc(max-min, sizeof(int)); + int i; + for(i = min; i < max; ++i){ + inds[i] = i; + } + for(i = min; i < max-1; ++i){ + int swap = inds[i]; + int index = i + rand()%(max-i); + inds[i] = inds[index]; + inds[index] = swap; + } + return inds; +} + +void del_arg(int argc, char **argv, int index) +{ + int i; + for(i = index; i < argc-1; ++i) argv[i] = argv[i+1]; + argv[i] = 0; +} + +int find_arg(int argc, char* argv[], char *arg) +{ + int i; + for(i = 0; i < argc; ++i) { + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)) { + del_arg(argc, argv, i); + return 1; + } + } + return 0; +} + +int find_int_arg(int argc, char **argv, char *arg, int def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = atoi(argv[i+1]); + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + +float find_float_arg(int argc, char **argv, char *arg, float def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = atof(argv[i+1]); + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + +char *find_char_arg(int argc, char **argv, char *arg, char *def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = argv[i+1]; + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + + +char *basecfg(char *cfgfile) +{ + char *c = cfgfile; + char *next; + while((next = strchr(c, '/'))) + { + c = next+1; + } + c = copy_string(c); + next = strchr(c, '.'); + if (next) *next = 0; + return c; +} + +int alphanum_to_int(char c) +{ + return (c < 58) ? c - 48 : c-87; +} +char int_to_alphanum(int i) +{ + if (i == 36) return '.'; + return (i < 10) ? i + 48 : i + 87; +} + +void pm(int M, int N, float *A) +{ + int i,j; + for(i =0 ; i < M; ++i){ + printf("%d ", i+1); + for(j = 0; j < N; ++j){ + printf("%2.4f, ", A[i*N+j]); + } + printf("\n"); + } + printf("\n"); +} + +void find_replace(char *str, char *orig, char *rep, char *output) +{ + char buffer[4096] = {0}; + char *p; + + sprintf(buffer, "%s", str); + if(!(p = strstr(buffer, orig))){ // Is 'orig' even in 'str'? + sprintf(output, "%s", str); + return; + } + + *p = '\0'; + + sprintf(output, "%s%s%s", buffer, rep, p+strlen(orig)); +} + +float sec(clock_t clocks) +{ + return (float)clocks/CLOCKS_PER_SEC; +} + +void top_k(float *a, int n, int k, int *index) +{ + int i,j; + for(j = 0; j < k; ++j) index[j] = -1; + for(i = 0; i < n; ++i){ + int curr = i; + for(j = 0; j < k; ++j){ + if((index[j] < 0) || a[curr] > a[index[j]]){ + int swap = curr; + curr = index[j]; + index[j] = swap; + } + } + } +} + +void error(const char *s) +{ + perror(s); + assert(0); + exit(-1); +} + +unsigned char *read_file(char *filename) +{ + FILE *fp = fopen(filename, "rb"); + size_t size; + + fseek(fp, 0, SEEK_END); + size = ftell(fp); + fseek(fp, 0, SEEK_SET); + + unsigned char *text = calloc(size+1, sizeof(char)); + fread(text, 1, size, fp); + fclose(fp); + return text; +} + +void malloc_error() +{ + fprintf(stderr, "Malloc error\n"); + exit(-1); +} + +void file_error(char *s) +{ + fprintf(stderr, "Couldn't open file: %s\n", s); + exit(0); +} + +list *split_str(char *s, char delim) +{ + size_t i; + size_t len = strlen(s); + list *l = make_list(); + list_insert(l, s); + for(i = 0; i < len; ++i){ + if(s[i] == delim){ + s[i] = '\0'; + list_insert(l, &(s[i+1])); + } + } + return l; +} + +void strip(char *s) +{ + size_t i; + size_t len = strlen(s); + size_t offset = 0; + for(i = 0; i < len; ++i){ + char c = s[i]; + if(c==' '||c=='\t'||c=='\n') ++offset; + else s[i-offset] = c; + } + s[len-offset] = '\0'; +} + +void strip_char(char *s, char bad) +{ + size_t i; + size_t len = strlen(s); + size_t offset = 0; + for(i = 0; i < len; ++i){ + char c = s[i]; + if(c==bad) ++offset; + else s[i-offset] = c; + } + s[len-offset] = '\0'; +} + +void free_ptrs(void **ptrs, int n) +{ + int i; + for(i = 0; i < n; ++i) free(ptrs[i]); + free(ptrs); +} + +char *fgetl(FILE *fp) +{ + if(feof(fp)) return 0; + size_t size = 512; + char *line = malloc(size*sizeof(char)); + if(!fgets(line, size, fp)){ + free(line); + return 0; + } + + size_t curr = strlen(line); + + while((line[curr-1] != '\n') && !feof(fp)){ + if(curr == size-1){ + size *= 2; + line = realloc(line, size*sizeof(char)); + if(!line) { + printf("%ld\n", size); + malloc_error(); + } + } + size_t readsize = size-curr; + if(readsize > INT_MAX) readsize = INT_MAX-1; + fgets(&line[curr], readsize, fp); + curr = strlen(line); + } + if(line[curr-1] == '\n') line[curr-1] = '\0'; + + return line; +} + +int read_int(int fd) +{ + int n = 0; + int next = read(fd, &n, sizeof(int)); + if(next <= 0) return -1; + return n; +} + +void write_int(int fd, int n) +{ + int next = write(fd, &n, sizeof(int)); + if(next <= 0) error("read failed"); +} + +int read_all_fail(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + int next = read(fd, buffer + n, bytes-n); + if(next <= 0) return 1; + n += next; + } + return 0; +} + +int write_all_fail(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + size_t next = write(fd, buffer + n, bytes-n); + if(next <= 0) return 1; + n += next; + } + return 0; +} + +void read_all(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + int next = read(fd, buffer + n, bytes-n); + if(next <= 0) error("read failed"); + n += next; + } +} + +void write_all(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + size_t next = write(fd, buffer + n, bytes-n); + if(next <= 0) error("write failed"); + n += next; + } +} + + +char *copy_string(char *s) +{ + char *copy = malloc(strlen(s)+1); + strncpy(copy, s, strlen(s)+1); + return copy; +} + +list *parse_csv_line(char *line) +{ + list *l = make_list(); + char *c, *p; + int in = 0; + for(c = line, p = line; *c != '\0'; ++c){ + if(*c == '"') in = !in; + else if(*c == ',' && !in){ + *c = '\0'; + list_insert(l, copy_string(p)); + p = c+1; + } + } + list_insert(l, copy_string(p)); + return l; +} + +int count_fields(char *line) +{ + int count = 0; + int done = 0; + char *c; + for(c = line; !done; ++c){ + done = (*c == '\0'); + if(*c == ',' || done) ++count; + } + return count; +} + +float *parse_fields(char *line, int n) +{ + float *field = calloc(n, sizeof(float)); + char *c, *p, *end; + int count = 0; + int done = 0; + for(c = line, p = line; !done; ++c){ + done = (*c == '\0'); + if(*c == ',' || done){ + *c = '\0'; + field[count] = strtod(p, &end); + if(p == c) field[count] = nan(""); + if(end != c && (end != c-1 || *end != '\r')) field[count] = nan(""); //DOS file formats! + p = c+1; + ++count; + } + } + return field; +} + +float sum_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i) sum += a[i]; + return sum; +} + +float mean_array(float *a, int n) +{ + return sum_array(a,n)/n; +} + +void mean_arrays(float **a, int n, int els, float *avg) +{ + int i; + int j; + memset(avg, 0, els*sizeof(float)); + for(j = 0; j < n; ++j){ + for(i = 0; i < els; ++i){ + avg[i] += a[j][i]; + } + } + for(i = 0; i < els; ++i){ + avg[i] /= n; + } +} + +void print_statistics(float *a, int n) +{ + float m = mean_array(a, n); + float v = variance_array(a, n); + printf("MSE: %.6f, Mean: %.6f, Variance: %.6f\n", mse_array(a, n), m, v); +} + +float variance_array(float *a, int n) +{ + int i; + float sum = 0; + float mean = mean_array(a, n); + for(i = 0; i < n; ++i) sum += (a[i] - mean)*(a[i]-mean); + float variance = sum/n; + return variance; +} + +int constrain_int(int a, int min, int max) +{ + if (a < min) return min; + if (a > max) return max; + return a; +} + +float constrain(float min, float max, float a) +{ + if (a < min) return min; + if (a > max) return max; + return a; +} + +float dist_array(float *a, float *b, int n, int sub) +{ + int i; + float sum = 0; + for(i = 0; i < n; i += sub) sum += pow(a[i]-b[i], 2); + return sqrt(sum); +} + +float mse_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i) sum += a[i]*a[i]; + return sqrt(sum/n); +} + +void normalize_array(float *a, int n) +{ + int i; + float mu = mean_array(a,n); + float sigma = sqrt(variance_array(a,n)); + for(i = 0; i < n; ++i){ + a[i] = (a[i] - mu)/sigma; + } + mu = mean_array(a,n); + sigma = sqrt(variance_array(a,n)); +} + +void translate_array(float *a, int n, float s) +{ + int i; + for(i = 0; i < n; ++i){ + a[i] += s; + } +} + +float mag_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + sum += a[i]*a[i]; + } + return sqrt(sum); +} + +void scale_array(float *a, int n, float s) +{ + int i; + for(i = 0; i < n; ++i){ + a[i] *= s; + } +} + +int sample_array(float *a, int n) +{ + float sum = sum_array(a, n); + scale_array(a, n, 1./sum); + float r = rand_uniform(0, 1); + int i; + for(i = 0; i < n; ++i){ + r = r - a[i]; + if (r <= 0) return i; + } + return n-1; +} + +int max_int_index(int *a, int n) +{ + if(n <= 0) return -1; + int i, max_i = 0; + int max = a[0]; + for(i = 1; i < n; ++i){ + if(a[i] > max){ + max = a[i]; + max_i = i; + } + } + return max_i; +} + +int max_index(float *a, int n) +{ + if(n <= 0) return -1; + int i, max_i = 0; + float max = a[0]; + for(i = 1; i < n; ++i){ + if(a[i] > max){ + max = a[i]; + max_i = i; + } + } + return max_i; +} + +int int_index(int *a, int val, int n) +{ + int i; + for(i = 0; i < n; ++i){ + if(a[i] == val) return i; + } + return -1; +} + +int rand_int(int min, int max) +{ + if (max < min){ + int s = min; + min = max; + max = s; + } + int r = (rand()%(max - min + 1)) + min; + return r; +} + +// From http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform +float rand_normal() +{ + static int haveSpare = 0; + static double rand1, rand2; + + if(haveSpare) + { + haveSpare = 0; + return sqrt(rand1) * sin(rand2); + } + + haveSpare = 1; + + rand1 = rand() / ((double) RAND_MAX); + if(rand1 < 1e-100) rand1 = 1e-100; + rand1 = -2 * log(rand1); + rand2 = (rand() / ((double) RAND_MAX)) * TWO_PI; + + return sqrt(rand1) * cos(rand2); +} + +/* + float rand_normal() + { + int n = 12; + int i; + float sum= 0; + for(i = 0; i < n; ++i) sum += (float)rand()/RAND_MAX; + return sum-n/2.; + } + */ + +size_t rand_size_t() +{ + return ((size_t)(rand()&0xff) << 56) | + ((size_t)(rand()&0xff) << 48) | + ((size_t)(rand()&0xff) << 40) | + ((size_t)(rand()&0xff) << 32) | + ((size_t)(rand()&0xff) << 24) | + ((size_t)(rand()&0xff) << 16) | + ((size_t)(rand()&0xff) << 8) | + ((size_t)(rand()&0xff) << 0); +} + +float rand_uniform(float min, float max) +{ + if(max < min){ + float swap = min; + min = max; + max = swap; + } + return ((float)rand()/RAND_MAX * (max - min)) + min; +} + +float rand_scale(float s) +{ + float scale = rand_uniform(1, s); + if(rand()%2) return scale; + return 1./scale; +} + +float **one_hot_encode(float *a, int n, int k) +{ + int i; + float **t = calloc(n, sizeof(float*)); + for(i = 0; i < n; ++i){ + t[i] = calloc(k, sizeof(float)); + int index = (int)a[i]; + t[i][index] = 1; + } + return t; +} + diff --git a/workloads/realworld/async/darknet/src/utils.h b/workloads/realworld/async/darknet/src/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..ef24da79888612f5b48fbb4dc233c483590e0c34 --- /dev/null +++ b/workloads/realworld/async/darknet/src/utils.h @@ -0,0 +1,53 @@ +#ifndef UTILS_H +#define UTILS_H +#include +#include +#include "darknet.h" +#include "list.h" + +#define TIME(a) \ + do { \ + double start = what_time_is_it_now(); \ + a; \ + printf("%s took: %f seconds\n", #a, what_time_is_it_now() - start); \ + } while (0) + +#define TWO_PI 6.2831853071795864769252866f + +double what_time_is_it_now(); +void shuffle(void *arr, size_t n, size_t size); +void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections); +void free_ptrs(void **ptrs, int n); +int alphanum_to_int(char c); +char int_to_alphanum(int i); +int read_int(int fd); +void write_int(int fd, int n); +void read_all(int fd, char *buffer, size_t bytes); +void write_all(int fd, char *buffer, size_t bytes); +int read_all_fail(int fd, char *buffer, size_t bytes); +int write_all_fail(int fd, char *buffer, size_t bytes); +void find_replace(char *str, char *orig, char *rep, char *output); +void malloc_error(); +void file_error(char *s); +void strip(char *s); +void strip_char(char *s, char bad); +list *split_str(char *s, char delim); +char *fgetl(FILE *fp); +list *parse_csv_line(char *line); +char *copy_string(char *s); +int count_fields(char *line); +float *parse_fields(char *line, int n); +void translate_array(float *a, int n, float s); +float constrain(float min, float max, float a); +int constrain_int(int a, int min, int max); +float rand_scale(float s); +int rand_int(int min, int max); +void mean_arrays(float **a, int n, int els, float *avg); +float dist_array(float *a, float *b, int n, int sub); +float **one_hot_encode(float *a, int n, int k); +float sec(clock_t clocks); +void print_statistics(float *a, int n); +int int_index(int *a, int val, int n); + +#endif + diff --git a/workloads/realworld/async/darknet/src/yolo_layer.c b/workloads/realworld/async/darknet/src/yolo_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..049a4d6a92cf7fea667b8de2340822834408bb05 --- /dev/null +++ b/workloads/realworld/async/darknet/src/yolo_layer.c @@ -0,0 +1,374 @@ +#include "yolo_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes) +{ + int i; + layer l = {0}; + l.type = YOLO; + + l.n = n; + l.total = total; + l.batch = batch; + l.h = h; + l.w = w; + l.c = n*(classes + 4 + 1); + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.cost = calloc(1, sizeof(float)); + l.biases = calloc(total*2, sizeof(float)); + if(mask) l.mask = mask; + else{ + l.mask = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + l.mask[i] = i; + } + } + l.bias_updates = calloc(n*2, sizeof(float)); + l.outputs = h*w*n*(classes + 4 + 1); + l.inputs = l.outputs; + l.truths = 90*(4 + 1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + for(i = 0; i < total*2; ++i){ + l.biases[i] = .5; + } + + l.forward = forward_yolo_layer; + l.backward = backward_yolo_layer; +#ifdef GPU + l.forward_gpu = forward_yolo_layer_gpu; + l.backward_gpu = backward_yolo_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "yolo\n"); + srand(0); + + return l; +} + +void resize_yolo_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->n*(l->classes + 4 + 1); + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride) +{ + box b; + b.x = (i + x[index + 0*stride]) / lw; + b.y = (j + x[index + 1*stride]) / lh; + b.w = exp(x[index + 2*stride]) * biases[2*n] / w; + b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h; + return b; +} + +float delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride) +{ + box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride); + float iou = box_iou(pred, truth); + + float tx = (truth.x*lw - i); + float ty = (truth.y*lh - j); + float tw = log(truth.w*w / biases[2*n]); + float th = log(truth.h*h / biases[2*n + 1]); + + delta[index + 0*stride] = scale * (tx - x[index + 0*stride]); + delta[index + 1*stride] = scale * (ty - x[index + 1*stride]); + delta[index + 2*stride] = scale * (tw - x[index + 2*stride]); + delta[index + 3*stride] = scale * (th - x[index + 3*stride]); + return iou; +} + + +void delta_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat) +{ + int n; + if (delta[index]){ + delta[index + stride*class] = 1 - output[index + stride*class]; + if(avg_cat) *avg_cat += output[index + stride*class]; + return; + } + for(n = 0; n < classes; ++n){ + delta[index + stride*n] = ((n == class)?1 : 0) - output[index + stride*n]; + if(n == class && avg_cat) *avg_cat += output[index + stride*n]; + } +} + +static int entry_index(layer l, int batch, int location, int entry) +{ + int n = location / (l.w*l.h); + int loc = location % (l.w*l.h); + return batch*l.outputs + n*l.w*l.h*(4+l.classes+1) + entry*l.w*l.h + loc; +} + +void forward_yolo_layer(const layer l, network net) +{ + int i,j,b,t,n; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array(l.output + index, (1+l.classes)*l.w*l.h, LOGISTIC); + } + } +#endif + + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + if(!net.train) return; + float avg_iou = 0; + float recall = 0; + float recall75 = 0; + float avg_cat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + int class_count = 0; + *(l.cost) = 0; + for (b = 0; b < l.batch; ++b) { + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.w*l.h); + float best_iou = 0; + int best_t = 0; + for(t = 0; t < l.max_boxes; ++t){ + box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1); + if(!truth.x) break; + float iou = box_iou(pred, truth); + if (iou > best_iou) { + best_iou = iou; + best_t = t; + } + } + int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4); + avg_anyobj += l.output[obj_index]; + l.delta[obj_index] = 0 - l.output[obj_index]; + if (best_iou > l.ignore_thresh) { + l.delta[obj_index] = 0; + } + if (best_iou > l.truth_thresh) { + l.delta[obj_index] = 1 - l.output[obj_index]; + + int class = net.truth[best_t*(4 + 1) + b*l.truths + 4]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); + delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, 0); + box truth = float_to_box(net.truth + best_t*(4 + 1) + b*l.truths, 1); + delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + } + } + } + } + for(t = 0; t < l.max_boxes; ++t){ + box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1); + + if(!truth.x) break; + float best_iou = 0; + int best_n = 0; + i = (truth.x * l.w); + j = (truth.y * l.h); + box truth_shift = truth; + truth_shift.x = truth_shift.y = 0; + for(n = 0; n < l.total; ++n){ + box pred = {0}; + pred.w = l.biases[2*n]/net.w; + pred.h = l.biases[2*n+1]/net.h; + float iou = box_iou(pred, truth_shift); + if (iou > best_iou){ + best_iou = iou; + best_n = n; + } + } + + int mask_n = int_index(l.mask, best_n, l.n); + if(mask_n >= 0){ + int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); + float iou = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + + int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4); + avg_obj += l.output[obj_index]; + l.delta[obj_index] = 1 - l.output[obj_index]; + + int class = net.truth[t*(4 + 1) + b*l.truths + 4]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4 + 1); + delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, &avg_cat); + + ++count; + ++class_count; + if(iou > .5) recall += 1; + if(iou > .75) recall75 += 1; + avg_iou += iou; + } + } + } + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", net.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count); +} + +void backward_yolo_layer(const layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +{ + int i; + int new_w=0; + int new_h=0; + if (((float)netw/w) < ((float)neth/h)) { + new_w = netw; + new_h = (h * netw)/w; + } else { + new_h = neth; + new_w = (w * neth)/h; + } + for (i = 0; i < n; ++i){ + box b = dets[i].bbox; + b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); + b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); + b.w *= (float)netw/new_w; + b.h *= (float)neth/new_h; + if(!relative){ + b.x *= w; + b.w *= w; + b.y *= h; + b.h *= h; + } + dets[i].bbox = b; + } +} + +int yolo_num_detections(layer l, float thresh) +{ + int i, n; + int count = 0; + for (i = 0; i < l.w*l.h; ++i){ + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); + if(l.output[obj_index] > thresh){ + ++count; + } + } + } + return count; +} + +void avg_flipped_yolo(layer l) +{ + int i,j,n,z; + float *flip = l.output + l.outputs; + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w/2; ++i) { + for (n = 0; n < l.n; ++n) { + for(z = 0; z < l.classes + 4 + 1; ++z){ + int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; + int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); + float swap = flip[i1]; + flip[i1] = flip[i2]; + flip[i2] = swap; + if(z == 0){ + flip[i1] = -flip[i1]; + flip[i2] = -flip[i2]; + } + } + } + } + } + for(i = 0; i < l.outputs; ++i){ + l.output[i] = (l.output[i] + flip[i])/2.; + } +} + +int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets) +{ + int i,j,n; + float *predictions = l.output; + if (l.batch == 2) avg_flipped_yolo(l); + int count = 0; + for (i = 0; i < l.w*l.h; ++i){ + int row = i / l.w; + int col = i % l.w; + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); + float objectness = predictions[obj_index]; + if(objectness <= thresh) continue; + int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); + dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); + dets[count].objectness = objectness; + dets[count].classes = l.classes; + for(j = 0; j < l.classes; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j); + float prob = objectness*predictions[class_index]; + dets[count].prob[j] = (prob > thresh) ? prob : 0; + } + ++count; + } + } + correct_yolo_boxes(dets, count, w, h, netw, neth, relative); + return count; +} + +#ifdef GPU + +void forward_yolo_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array_gpu(l.output_gpu + index, (1+l.classes)*l.w*l.h, LOGISTIC); + } + } + if(!net.train || l.onlyforward){ + cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); + return; + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_yolo_layer(l, net); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_yolo_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/async/darknet/src/yolo_layer.h b/workloads/realworld/async/darknet/src/yolo_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..d2a0243268146e00ebff2b4b11bce23f830689d1 --- /dev/null +++ b/workloads/realworld/async/darknet/src/yolo_layer.h @@ -0,0 +1,19 @@ +#ifndef YOLO_LAYER_H +#define YOLO_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes); +void forward_yolo_layer(const layer l, network net); +void backward_yolo_layer(const layer l, network net); +void resize_yolo_layer(layer *l, int w, int h); +int yolo_num_detections(layer l, float thresh); + +#ifdef GPU +void forward_yolo_layer_gpu(const layer l, network net); +void backward_yolo_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/async/darknet/yolov3-tiny/predictions.jpg b/workloads/realworld/async/darknet/yolov3-tiny/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9ac52ad293314b2654fa1bb577fc1a3249e51228 Binary files /dev/null and b/workloads/realworld/async/darknet/yolov3-tiny/predictions.jpg differ diff --git a/workloads/realworld/async/darknet/yolov3-tiny/run_super.sh b/workloads/realworld/async/darknet/yolov3-tiny/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..d890a7c581dc59167fab1b93778d143fc7e679a9 --- /dev/null +++ b/workloads/realworld/async/darknet/yolov3-tiny/run_super.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg diff --git a/workloads/realworld/async/darknet/yolov3-tiny/run_yolov3-tiny.sh b/workloads/realworld/async/darknet/yolov3-tiny/run_yolov3-tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..d890a7c581dc59167fab1b93778d143fc7e679a9 --- /dev/null +++ b/workloads/realworld/async/darknet/yolov3-tiny/run_yolov3-tiny.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg diff --git a/workloads/realworld/async/darknet/yolov3-tiny_b/run_super.sh b/workloads/realworld/async/darknet/yolov3-tiny_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..31669e62fb94142e7dc24b3f905c8f1d25950367 --- /dev/null +++ b/workloads/realworld/async/darknet/yolov3-tiny_b/run_super.sh @@ -0,0 +1,2 @@ +#../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg +../darknet detector infer ../cfg/coco.data ../cfg/yolov3-tiny_b.cfg diff --git a/workloads/realworld/async/darknet/yolov3-tiny_t/run_super.sh b/workloads/realworld/async/darknet/yolov3-tiny_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..6cc56bc601476fa212f615b5aec964f12e044473 --- /dev/null +++ b/workloads/realworld/async/darknet/yolov3-tiny_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg +../darknet detector train ../cfg/coco.data ../cfg/yolov3-tiny_t.cfg diff --git a/workloads/realworld/async/darknet/yolov3/predictions.jpg b/workloads/realworld/async/darknet/yolov3/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..49c1abe30cdcdceadad4353da30ce5660c96be1a Binary files /dev/null and b/workloads/realworld/async/darknet/yolov3/predictions.jpg differ diff --git a/workloads/realworld/async/darknet/yolov3/run_super.sh b/workloads/realworld/async/darknet/yolov3/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..440a4b456a80a434243dffa9614d5a501790990f --- /dev/null +++ b/workloads/realworld/async/darknet/yolov3/run_super.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg diff --git a/workloads/realworld/async/darknet/yolov3/run_yolov3.sh b/workloads/realworld/async/darknet/yolov3/run_yolov3.sh new file mode 100755 index 0000000000000000000000000000000000000000..440a4b456a80a434243dffa9614d5a501790990f --- /dev/null +++ b/workloads/realworld/async/darknet/yolov3/run_yolov3.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg diff --git a/workloads/realworld/async/darknet/yolov3_b/run_super.sh b/workloads/realworld/async/darknet/yolov3_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..913790c1ee53a0d442a89306fbd8bda93faa2581 --- /dev/null +++ b/workloads/realworld/async/darknet/yolov3_b/run_super.sh @@ -0,0 +1,2 @@ +#../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg +../darknet detector infer ../cfg/coco.data ../cfg/yolov3_b.cfg diff --git a/workloads/realworld/async/darknet/yolov3_t/run_super.sh b/workloads/realworld/async/darknet/yolov3_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ee7df07c1a4172f25c1129d8027095d2e3861e28 --- /dev/null +++ b/workloads/realworld/async/darknet/yolov3_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg +../darknet detector train ../cfg/coco.data ../cfg/yolov3_t.cfg diff --git a/workloads/realworld/async/hotspot/Makefile b/workloads/realworld/async/hotspot/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..793c7c13d91b4f492c5df5801f2a9ddadf023470 --- /dev/null +++ b/workloads/realworld/async/hotspot/Makefile @@ -0,0 +1,24 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include + +SRC = hotspot.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = hotspot + +release: $(SRC) + $(CC) $(KERNEL_DIM) $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +enum: $(SRC) + $(CC) $(KERNEL_DIM) -deviceemu $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +debug: $(SRC) + $(CC) $(KERNEL_DIM) -g $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +debugenum: $(SRC) + $(CC) $(KERNEL_DIM) -g -deviceemu $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt diff --git a/workloads/realworld/async/hotspot/Makefile_nvidia b/workloads/realworld/async/hotspot/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..1f7ae25a90c968563e96208c693b927b6765490a --- /dev/null +++ b/workloads/realworld/async/hotspot/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := hotspot +# CUDA source files (compiled with cudacc) +CUFILES := hotspot.cu +# CUDA dependency files +# CU_DEPS := needle_kernel.cu +# C/C++ source files (compiled with gcc / c++) +# CCFILES := + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/async/hotspot/README b/workloads/realworld/async/hotspot/README new file mode 100644 index 0000000000000000000000000000000000000000..f24239abebe938fe3e8d3c1a7a97f915bd09a90b --- /dev/null +++ b/workloads/realworld/async/hotspot/README @@ -0,0 +1,8 @@ +******Adjustable work group size***** +The kernel has square shape +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe one dimension +The actually dimension = RD_WG_SIZE_0 * RD_WG_SIZE_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" \ No newline at end of file diff --git a/workloads/realworld/async/hotspot/hotspot.cu b/workloads/realworld/async/hotspot/hotspot.cu new file mode 100644 index 0000000000000000000000000000000000000000..9d8081303421a495d420bb60a7818b160a797a75 --- /dev/null +++ b/workloads/realworld/async/hotspot/hotspot.cu @@ -0,0 +1,421 @@ +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#ifdef RD_WG_SIZE_0_0 +#define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) +#define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) +#define BLOCK_SIZE RD_WG_SIZE +#else +#define BLOCK_SIZE 16 +#endif + +#define STR_SIZE 256 + +/* maximum power density possible (say 300W for a 10mm x 10mm chip) */ +#define MAX_PD (3.0e6) +/* required precision in degrees */ +#define PRECISION 0.001 +#define SPEC_HEAT_SI 1.75e6 +#define K_SI 100 +/* capacitance fitting factor */ +#define FACTOR_CHIP 0.5 + +/* chip parameters */ +float t_chip = 0.0005; +float chip_height = 0.016; +float chip_width = 0.016; +/* ambient temperature, assuming no package at all */ +float amb_temp = 80.0; + +void run(int argc, char **argv); + +/* define timer macros */ +#define pin_stats_reset() startCycle() +#define pin_stats_pause(cycles) stopCycle(cycles) +#define pin_stats_dump(cycles) printf("timer: %Lu\n", cycles) + +void fatal(char *s) +{ + fprintf(stderr, "error: %s\n", s); +} + +void writeoutput(float *vect, int grid_rows, int grid_cols, char *file) +{ + + int i, j, index = 0; + FILE *fp; + char str[STR_SIZE]; + + if ((fp = fopen(file, "w")) == 0) + printf("The file was not opened\n"); + + for (i = 0; i < grid_rows; i++) + for (j = 0; j < grid_cols; j++) + { + + sprintf(str, "%d\t%g\n", index, vect[i * grid_cols + j]); + fputs(str, fp); + index++; + } + + fclose(fp); +} + +void readinput(float *vect, int grid_rows, int grid_cols, char *file) +{ + + int i, j; + FILE *fp; + char str[STR_SIZE]; + float val; + + if ((fp = fopen(file, "r")) == 0) + printf("The file was not opened\n"); + + for (i = 0; i <= grid_rows - 1; i++) + for (j = 0; j <= grid_cols - 1; j++) + { + fgets(str, STR_SIZE, fp); + if (feof(fp)) + fatal("not enough lines in file"); + // if ((sscanf(str, "%d%f", &index, &val) != 2) || (index != ((i-1)*(grid_cols-2)+j-1))) + if ((sscanf(str, "%f", &val) != 1)) + fatal("invalid file format"); + vect[i * grid_cols + j] = val; + } + + fclose(fp); +} + +#define IN_RANGE(x, min, max) ((x) >= (min) && (x) <= (max)) +#define CLAMP_RANGE(x, min, max) x = (x < (min)) ? min : ((x > (max)) ? max : x) +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +__global__ void calculate_temp(int iteration, // number of iteration + float *power, // power input + float *temp_src, // temperature input/output + float *temp_dst, // temperature input/output + int grid_cols, // Col of grid + int grid_rows, // Row of grid + int border_cols, // border offset + int border_rows, // border offset + float Cap, // Capacitance + float Rx, + float Ry, + float Rz, + float step, + int batch_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + __shared__ float temp_on_cuda[PREFETCH_COUNT][BLOCK_SIZE][BLOCK_SIZE]; + __shared__ float power_on_cuda[PREFETCH_COUNT][BLOCK_SIZE][BLOCK_SIZE]; + __shared__ float temp_t[BLOCK_SIZE][BLOCK_SIZE]; // saving temparary temperature result + + float amb_temp = 80.0; + float step_div_Cap; + float Rx_1, Ry_1, Rz_1; + + // int bx = blockIdx.x; + // int by = blockIdx.y; + + int tx = threadIdx.x; + int ty = threadIdx.y; + + step_div_Cap = step / Cap; + + Rx_1 = 1 / Rx; + Ry_1 = 1 / Ry; + Rz_1 = 1 / Rz; + + // each block finally computes result for a small block + // after N iterations. + // it is the non-overlapping small blocks that cover + // all the input data + + // calculate the small block size + int small_block_rows = BLOCK_SIZE - iteration * 2; // EXPAND_RATE + int small_block_cols = BLOCK_SIZE - iteration * 2; // EXPAND_RATE + + // if (bx == 0 && by == 0 && tx == 0 && ty == 0) + // printf("iteration is %d, small_block_rows is %d\n", iteration, small_block_rows); + + int tile_dim_x = gridDim.x * batch_size; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = batch_size * batch_size; + int tiles_this_block_x = batch_size; + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int fetch = base_tile; + int end_tile = fetch + tiles_this_block; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + // block id + int offset = fetch - base_tile; + int block_id = fetch / tiles_this_block; + int bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + // calculate the boundary for the block according to + // the boundary of its small block + int blkY = small_block_rows * by - border_rows; + int blkX = small_block_cols * bx - border_cols; + int blkYmax = blkY + BLOCK_SIZE - 1; + int blkXmax = blkX + BLOCK_SIZE - 1; + + // calculate the global thread coordination + int yidx = blkY + ty; + int xidx = blkX + tx; + + // load data if it is within the valid input range + int loadYidx = yidx, loadXidx = xidx; + int index = grid_cols * loadYidx + loadXidx; + + if (IN_RANGE(loadYidx, 0, grid_rows - 1) && IN_RANGE(loadXidx, 0, grid_cols - 1)) + { + memcpy_async(temp_on_cuda[fetch % PREFETCH_COUNT][ty][tx], temp_src[index], pipe); // Load the temperature data from global memory to shared memory + memcpy_async(power_on_cuda[fetch % PREFETCH_COUNT][ty][tx], power[index], pipe); // Load the power data from global memory to shared memory + } + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + // block id + int offset = compute - base_tile; + int block_id = compute / tiles_this_block; + int bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + // calculate the boundary for the block according to + // the boundary of its small block + int blkY = small_block_rows * by - border_rows; + int blkX = small_block_cols * bx - border_cols; + int blkYmax = blkY + BLOCK_SIZE - 1; + int blkXmax = blkX + BLOCK_SIZE - 1; + + // calculate the global thread coordination + int yidx = blkY + ty; + int xidx = blkX + tx; + + // load data if it is within the valid input range + int loadYidx = yidx, loadXidx = xidx; + int index = grid_cols * loadYidx + loadXidx; + + // effective range within this block that falls within + // the valid range of the input data + // used to rule out computation outside the boundary. + int validYmin = (blkY < 0) ? -blkY : 0; + int validYmax = (blkYmax > grid_rows - 1) ? BLOCK_SIZE - 1 - (blkYmax - grid_rows + 1) : BLOCK_SIZE - 1; + int validXmin = (blkX < 0) ? -blkX : 0; + int validXmax = (blkXmax > grid_cols - 1) ? BLOCK_SIZE - 1 - (blkXmax - grid_cols + 1) : BLOCK_SIZE - 1; + + int N = ty - 1; + int S = ty + 1; + int W = tx - 1; + int E = tx + 1; + + N = (N < validYmin) ? validYmin : N; + S = (S > validYmax) ? validYmax : S; + W = (W < validXmin) ? validXmin : W; + E = (E > validXmax) ? validXmax : E; + + bool computed; + for (int i = 0; i < iteration; i++) + { + computed = false; + if (IN_RANGE(tx, i + 1, BLOCK_SIZE - i - 2) && + IN_RANGE(ty, i + 1, BLOCK_SIZE - i - 2) && + IN_RANGE(tx, validXmin, validXmax) && + IN_RANGE(ty, validYmin, validYmax)) + { + computed = true; + temp_t[ty][tx] = temp_on_cuda[compute % PREFETCH_COUNT][ty][tx] + step_div_Cap * (power_on_cuda[compute % PREFETCH_COUNT][ty][tx] + + (temp_on_cuda[compute % PREFETCH_COUNT][S][tx] + temp_on_cuda[compute % PREFETCH_COUNT][N][tx] - 2.0 * temp_on_cuda[compute % PREFETCH_COUNT][ty][tx]) * Ry_1 + + (temp_on_cuda[compute % PREFETCH_COUNT][ty][E] + temp_on_cuda[compute % PREFETCH_COUNT][ty][W] - 2.0 * temp_on_cuda[compute % PREFETCH_COUNT][ty][tx]) * Rx_1 + + (amb_temp - temp_on_cuda[compute % PREFETCH_COUNT][ty][tx]) * Rz_1); + } + block.sync(); + if (i == iteration - 1) + break; + if (computed) // Assign the computation range + temp_on_cuda[compute % PREFETCH_COUNT][ty][tx] = temp_t[ty][tx]; + block.sync(); + } + + // update the global memory + // after the last iteration, only threads coordinated within the + // small block perform the calculation and switch on ``computed'' + if (computed) + { + temp_dst[index] = temp_t[ty][tx]; + } + } +} + +/* + compute N time steps +*/ + +int compute_tran_temp(float *MatrixPower, float *MatrixTemp[2], int col, int row, + int total_iterations, int num_iterations, int blockCols, int blockRows, int borderCols, int borderRows, int batch_size) +{ + dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE); + dim3 dimGrid(blockCols, blockRows); + + float grid_height = chip_height / row; + float grid_width = chip_width / col; + + float Cap = FACTOR_CHIP * SPEC_HEAT_SI * t_chip * grid_width * grid_height; + float Rx = grid_width / (2.0 * K_SI * t_chip * grid_height); + float Ry = grid_height / (2.0 * K_SI * t_chip * grid_width); + float Rz = t_chip / (K_SI * grid_height * grid_width); + + float max_slope = MAX_PD / (FACTOR_CHIP * t_chip * SPEC_HEAT_SI); + float step = PRECISION / max_slope; + float t; + + int src = 1, dst = 0; + + for (t = 0; t < total_iterations; t += num_iterations) + { + int temp = src; + src = dst; + dst = temp; + calculate_temp<<>>(MIN(num_iterations, total_iterations - t), MatrixPower, MatrixTemp[src], MatrixTemp[dst], + col, row, borderCols, borderRows, Cap, Rx, Ry, Rz, step, batch_size); + } + return dst; +} + +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - number of rows/cols in the grid (positive integer)\n"); + fprintf(stderr, "\t - pyramid heigh(positive integer)\n"); + fprintf(stderr, "\t - number of iterations\n"); + fprintf(stderr, "\t - name of the file containing the initial temperature values of each cell\n"); + fprintf(stderr, "\t - name of the file containing the dissipated power values of each cell\n"); + fprintf(stderr, "\t - name of the output file\n"); + fprintf(stderr, "\t - batch_size * batch_size per block\n"); + exit(1); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE); + + run(argc, argv); + + return EXIT_SUCCESS; +} + +void run(int argc, char **argv) +{ + int size; + int grid_rows, grid_cols; + float *FilesavingTemp, *FilesavingPower, *MatrixOut; + char *tfile, *pfile, *ofile; + + int total_iterations = 60; + int pyramid_height = 1; // number of iterations + + if (argc != 8) + usage(argc, argv); + if ((grid_rows = atoi(argv[1])) <= 0 || + (grid_cols = atoi(argv[1])) <= 0 || + (pyramid_height = atoi(argv[2])) <= 0 || + (total_iterations = atoi(argv[3])) <= 0) + usage(argc, argv); + + tfile = argv[4]; + pfile = argv[5]; + ofile = argv[6]; + + int batch_size = atoi(argv[7]); + + size = grid_rows * grid_cols; + +/* --------------- pyramid parameters --------------- */ +#define EXPAND_RATE 2 // add one iteration will extend the pyramid base by 2 per each borderline + int borderCols = (pyramid_height)*EXPAND_RATE / 2; + int borderRows = (pyramid_height)*EXPAND_RATE / 2; + int smallBlockCol = BLOCK_SIZE - (pyramid_height)*EXPAND_RATE; + int smallBlockRow = BLOCK_SIZE - (pyramid_height)*EXPAND_RATE; + // int blockCols = grid_cols / smallBlockCol + ((grid_cols % smallBlockCol == 0) ? 0 : 1); + // int blockRows = grid_rows / smallBlockRow + ((grid_rows % smallBlockRow == 0) ? 0 : 1); + + int blockCols = (grid_cols + smallBlockCol * batch_size - 1) / (smallBlockCol * batch_size); + int blockRows = (grid_rows + smallBlockRow * batch_size - 1) / (smallBlockRow * batch_size); + + // printf("borderCols is %d, smallBlockCol is %d, blockCols is %d, grid_cols is %d \n", borderCols, smallBlockCol, blockCols, grid_cols); + + FilesavingTemp = (float *)malloc(size * sizeof(float)); + FilesavingPower = (float *)malloc(size * sizeof(float)); + MatrixOut = (float *)calloc(size, sizeof(float)); + + if (!FilesavingPower || !FilesavingTemp || !MatrixOut) + fatal("unable to allocate memory"); + + printf("pyramidHeight: %d\ngridSize: [%d, %d]\nborder:[%d, %d]\nblockGrid:[%d, %d]\ntargetBlock:[%d, %d]\n", + pyramid_height, grid_cols, grid_rows, borderCols, borderRows, blockCols, blockRows, smallBlockCol, smallBlockRow); + + readinput(FilesavingTemp, grid_rows, grid_cols, tfile); + readinput(FilesavingPower, grid_rows, grid_cols, pfile); + + GPU_argv_init(); + + initTrace(); + startCPU(); + float *MatrixTemp[2], *MatrixPower; + cudaMalloc((void **)&MatrixTemp[0], sizeof(float) * size); + cudaMalloc((void **)&MatrixTemp[1], sizeof(float) * size); + cudaMemcpy(MatrixTemp[0], FilesavingTemp, sizeof(float) * size, cudaMemcpyHostToDevice); + + cudaMalloc((void **)&MatrixPower, sizeof(float) * size); + cudaMemcpy(MatrixPower, FilesavingPower, sizeof(float) * size, cudaMemcpyHostToDevice); + // printf("Start computing the transient temperature\n"); + int ret = compute_tran_temp(MatrixPower, MatrixTemp, grid_cols, grid_rows, + total_iterations, pyramid_height, blockCols, blockRows, borderCols, borderRows, batch_size); + // printf("Ending simulation\n"); + cudaMemcpy(MatrixOut, MatrixTemp[ret], sizeof(float) * size, cudaMemcpyDeviceToHost); + + cudaFree(MatrixPower); + cudaFree(MatrixTemp[0]); + cudaFree(MatrixTemp[1]); + + endCPU(); + finiTrace(); + + writeoutput(MatrixOut, grid_rows, grid_cols, ofile); + free(MatrixOut); +} diff --git a/workloads/realworld/async/hotspot/run.sh b/workloads/realworld/async/hotspot/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..66b814725286fb6699d2b893740518ad43dc307a --- /dev/null +++ b/workloads/realworld/async/hotspot/run.sh @@ -0,0 +1 @@ +./hotspot 512 2 2 ../../../../data/hotspot/temp_512 ../../../../data/hotspot/power_512 output.out 4 diff --git a/workloads/realworld/async/hotspot/run_super.sh b/workloads/realworld/async/hotspot/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ad31b9dcd0ce3e1d53df5aaf445082ab86cf2366 --- /dev/null +++ b/workloads/realworld/async/hotspot/run_super.sh @@ -0,0 +1 @@ +./hotspot 8192 2 2 ../../../../data/hotspot/temp_8192.txt ../../../../data/hotspot/power_8192.txt output.out 8 diff --git a/workloads/realworld/async/kmeans/Makefile b/workloads/realworld/async/kmeans/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..e473b45be17e5805399c15b04dda57742451d913 --- /dev/null +++ b/workloads/realworld/async/kmeans/Makefile @@ -0,0 +1,33 @@ +include ../../../common/make.config + +# C compiler +CC = gcc +CC_FLAGS = -g -fopenmp -O2 + +# CUDA compiler +NVCC = $(CUDA_DIR)/bin/nvcc +NVCC_FLAGS = -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) + +# 'make dbg=1' enables NVCC debugging + + +# 'make emu=1' compiles the CUDA kernels for emulation +ifeq ($(emu),1) + NVCC_FLAGS += -deviceemu +endif + + +kmeans: cluster.o getopt.o kmeans.o kmeans_clustering.o kmeans_cuda.o rmse.o $(CUPTI_ADD_COMMON)/cpu_timestamps.o + $(CC) $(CC_FLAGS) cluster.o getopt.o kmeans.o kmeans_clustering.o kmeans_cuda.o rmse.o $(CUPTI_ADD_COMMON)/cpu_timestamps.o $(CUPTI_ADD_COMMON)/cupti_add.cpp -o kmeans $(NVCC_FLAGS) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +kmeans.o: kmeans.c + $(CC) $(CC_FLAGS) $< -c $(NVCC_FLAGS) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +%.o: %.[ch] + $(CC) $(CC_FLAGS) $< -c -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lstdc++ + +kmeans_cuda.o: kmeans_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp + $(NVCC) -O2 -c kmeans_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(NVCC_FLAGS) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +clean: + rm -f *.o *~ kmeans kmeans_cuda.linkinfo diff --git a/workloads/realworld/async/kmeans/Makefile_nvidia b/workloads/realworld/async/kmeans/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..5d612b9dc8b5c19310162e1f721e1d2d4e33fb72 --- /dev/null +++ b/workloads/realworld/async/kmeans/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := kmeans +# CUDA source files (compiled with cudacc) +CUFILES := kmeans_cuda.cu +# CUDA dependency files +CU_DEPS := kmeans_cuda_kernel.cu +# C/C++ source files (compiled with gcc / c++) +CFILES := cluster.c getopt.c kmeans.c kmeans_clustering.c rmse.c + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/async/kmeans/README b/workloads/realworld/async/kmeans/README new file mode 100755 index 0000000000000000000000000000000000000000..bebae52d716986889b30a435214e095346424190 --- /dev/null +++ b/workloads/realworld/async/kmeans/README @@ -0,0 +1,10 @@ +Usage: ./kmeans [switches] -i filename + + -i filename :file containing data to be clustered + -m max_nclusters :maximum number of clusters allowed [default=5] + -n min_nclusters :minimum number of clusters allowed [default=5] + -t threshold :threshold value [default=0.001] + -l nloops :iteration for each number of clusters [default=1] + -b :input file is in binary format + -r :calculate RMSE [default=off] + -o :output cluster center coordinates [default=off] \ No newline at end of file diff --git a/workloads/realworld/async/kmeans/cluster.c b/workloads/realworld/async/kmeans/cluster.c new file mode 100755 index 0000000000000000000000000000000000000000..3d2d78f817e01b19f8eca1dbea3e05dea8a318f6 --- /dev/null +++ b/workloads/realworld/async/kmeans/cluster.c @@ -0,0 +1,159 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: cluster.c **/ +/** Description: Takes as input a file, containing 1 data point per **/ +/** per line, and performs a fuzzy c-means clustering **/ +/** on the data. Fuzzy clustering is performed using **/ +/** min to max clusters and the clustering that gets **/ +/** the best score according to a compactness and **/ +/** separation criterion are returned. **/ +/** Author: Brendan McCane **/ +/** James Cook University of North Queensland. **/ +/** Australia. email: mccane@cs.jcu.edu.au **/ +/** **/ +/** Edited by: Jay Pisharath, Wei-keng Liao **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "kmeans.h" + +extern double wtime(void); +float min_rmse_ref = FLT_MAX; /* reference min_rmse value */ + +/*---< cluster() >-----------------------------------------------------------*/ +int cluster(int npoints, /* number of data points */ + int nfeatures, /* number of attributes for each point */ + float **features, /* array: [npoints][nfeatures] */ + int min_nclusters, /* range of min to max number of clusters */ + int max_nclusters, + float threshold, /* loop terminating factor */ + int *best_nclusters, /* out: number between min and max with lowest RMSE */ + float ***cluster_centres, /* out: [best_nclusters][nfeatures] */ + float *min_rmse, /* out: minimum RMSE */ + int isRMSE, /* calculate RMSE */ + int nloops /* number of iteration for each number of clusters */ + ) +{ + + //initTrace(); + + int nclusters; /* number of clusters k */ + int index =0; /* number of iteration to reach the best RMSE */ + int rmse; /* RMSE for each clustering */ + int *membership; /* which cluster a data point belongs to */ + float **tmp_cluster_centres; /* hold coordinates of cluster centers */ + int i; + + /* allocate memory for membership */ + membership = (int*) malloc(npoints * sizeof(int)); + + /* sweep k from min to max_nclusters to find the best number of clusters */ + for(nclusters = min_nclusters; nclusters <= max_nclusters; nclusters++) + { + if (nclusters > npoints) break; /* cannot have more clusters than points */ + + /* allocate device memory, invert data array (@ kmeans_cuda.cu) */ + allocateMemory(npoints, nfeatures, nclusters, features); + + /* iterate nloops times for each number of clusters */ + for(i = 0; i < nloops; i++) + { + /* initialize initial cluster centers, CUDA calls (@ kmeans_cuda.cu) */ + tmp_cluster_centres = kmeans_clustering(features, + nfeatures, + npoints, + nclusters, + threshold, + membership); + + if (*cluster_centres) { + free((*cluster_centres)[0]); + free(*cluster_centres); + } + *cluster_centres = tmp_cluster_centres; + + + /* find the number of clusters with the best RMSE */ + if(isRMSE) + { + rmse = rms_err(features, + nfeatures, + npoints, + tmp_cluster_centres, + nclusters); + + if(rmse < min_rmse_ref){ + min_rmse_ref = rmse; //update reference min RMSE + *min_rmse = min_rmse_ref; //update return min RMSE + *best_nclusters = nclusters; //update optimum number of clusters + index = i; //update number of iteration to reach best RMSE + } + } + } + + deallocateMemory(); /* free device memory (@ kmeans_cuda.cu) */ + } + + free(membership); + + return index; +} + diff --git a/workloads/realworld/async/kmeans/cp.sh b/workloads/realworld/async/kmeans/cp.sh new file mode 100755 index 0000000000000000000000000000000000000000..243885047459789eb8eae5d6c5b2b4822eb3aabf --- /dev/null +++ b/workloads/realworld/async/kmeans/cp.sh @@ -0,0 +1,27 @@ +cp super_0.log super_3.log +cp super_0.log super_4.log +cp super_0.log super_5.log +cp super_0.log super_6.log +cp super_0.log super_7.log +cp super_0.log super_8.log +cp super_0.log super_9.log +cp super_0.log super_10.log +cp super_0.log super_11.log +cp super_0.log super_12.log +cp super_0.log super_13.log +cp super_0.log super_14.log +cp super_0.log super_15.log +cp super_0.log super_17.log +cp super_0.log super_17.log +cp super_0.log super_18.log +cp super_0.log super_19.log +cp super_0.log super_20.log +cp super_0.log super_21.log +cp super_0.log super_22.log +cp super_0.log super_23.log +cp super_0.log super_24.log +cp super_0.log super_25.log +cp super_0.log super_26.log +cp super_0.log super_27.log +cp super_0.log super_28.log +cp super_0.log super_29.log diff --git a/workloads/realworld/async/kmeans/getopt.c b/workloads/realworld/async/kmeans/getopt.c new file mode 100755 index 0000000000000000000000000000000000000000..fa2f31378fb2978f65267ba2e810aae3ff1ee016 --- /dev/null +++ b/workloads/realworld/async/kmeans/getopt.c @@ -0,0 +1,1184 @@ +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to drepper@gnu.org + before changing it! + Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This tells Alpha OSF/1 not to define a getopt prototype in . + Ditto for AIX 3.2 and . */ +#ifndef _NO_PROTO +# define _NO_PROTO +#endif + +#ifdef HAVE_CONFIG_H +# include +#endif + +#if !defined __STDC__ || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +# ifndef const +# define const +# endif +#endif + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +# include +# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +# define ELIDE_CODE +# endif +#endif + +#ifndef ELIDE_CODE + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +/* Don't include stdlib.h for non-GNU C libraries because some of them + contain conflicting prototypes for getopt. */ +# include +# include +#endif /* GNU C library. */ + +#ifdef VMS +# include +# if HAVE_STRING_H - 0 +# include +# endif +#endif + +#ifndef _ +/* This is for other GNU distributions with internationalized messages. */ +# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include +# ifndef _ +# define _(msgid) gettext (msgid) +# endif +# else +# define _(msgid) (msgid) +# endif +# if defined _LIBC && defined USE_IN_LIBIO +# include +# endif +#endif + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* 1003.2 says this must be 1 before any call. */ +int optind = 1; + +/* Formerly, initialization of getopt depended on optind==0, which + causes problems with re-calling getopt as programs generally don't + know that. */ + +int __getopt_initialized; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return -1 with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +/* Value of POSIXLY_CORRECT environment variable. */ +static char *posixly_correct; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +# include +# define my_index strchr +#else + +//# if HAVE_STRING_H || WIN32 /* Pete Wilson mod 7/28/02 */ +# include +//# else +//# include +//# endif + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +#ifndef getenv +extern char *getenv (); +#endif + +static char * +my_index (str, chr) + const char *str; + int chr; +{ + while (*str) + { + if (*str == chr) + return (char *) str; + str++; + } + return 0; +} + +/* If using GCC, we can safely declare strlen this way. + If not using GCC, it is ok not to declare it. */ +#ifdef __GNUC__ +/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. + That was relevant to code that was here before. */ +# if (!defined __STDC__ || !__STDC__) && !defined strlen +/* gcc with -traditional declares the built-in strlen to return int, + and has done so at least since version 2.4.5. -- rms. */ +extern int strlen (const char *); +# endif /* not __STDC__ */ +#endif /* __GNUC__ */ + +#endif /* not __GNU_LIBRARY__ */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +#ifdef _LIBC +/* Stored original parameters. + XXX This is no good solution. We should rather copy the args so + that we can compare them later. But we must not use malloc(3). */ +extern int __libc_argc; +extern char **__libc_argv; + +/* Bash 2.0 gives us an environment variable containing flags + indicating ARGV elements that should not be considered arguments. */ + +# ifdef USE_NONOPTION_FLAGS +/* Defined in getopt_init.c */ +extern char *__getopt_nonoption_flags; + +static int nonoption_flags_max_len; +static int nonoption_flags_len; +# endif + +# ifdef USE_NONOPTION_FLAGS +# define SWAP_FLAGS(ch1, ch2) \ + if (nonoption_flags_len > 0) \ + { \ + char __tmp = __getopt_nonoption_flags[ch1]; \ + __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ + __getopt_nonoption_flags[ch2] = __tmp; \ + } +# else +# define SWAP_FLAGS(ch1, ch2) +# endif +#else /* !_LIBC */ +# define SWAP_FLAGS(ch1, ch2) +#endif /* _LIBC */ + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +#if defined __STDC__ && __STDC__ +static void exchange (char **); +#endif + +static void +exchange (argv) + char **argv; +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + /* First make sure the handling of the `__getopt_nonoption_flags' + string can work normally. Our top argument must be in the range + of the string. */ + if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len) + { + /* We must extend the array. The user plays games with us and + presents new arguments. */ + char *new_str = malloc (top + 1); + if (new_str == NULL) + nonoption_flags_len = nonoption_flags_max_len = 0; + else + { + memset (__mempcpy (new_str, __getopt_nonoption_flags, + nonoption_flags_max_len), + '\0', top + 1 - nonoption_flags_max_len); + nonoption_flags_max_len = top + 1; + __getopt_nonoption_flags = new_str; + } + } +#endif + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + SWAP_FLAGS (bottom + i, middle + i); + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Initialize the internal data when the first call is made. */ + +#if defined __STDC__ && __STDC__ +static const char *_getopt_initialize (int, char *const *, const char *); +#endif +static const char * +_getopt_initialize (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = optind; + + nextchar = NULL; + + posixly_correct = getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + if (posixly_correct == NULL + && argc == __libc_argc && argv == __libc_argv) + { + if (nonoption_flags_max_len == 0) + { + if (__getopt_nonoption_flags == NULL + || __getopt_nonoption_flags[0] == '\0') + nonoption_flags_max_len = -1; + else + { + const char *orig_str = __getopt_nonoption_flags; + int len = nonoption_flags_max_len = strlen (orig_str); + if (nonoption_flags_max_len < argc) + nonoption_flags_max_len = argc; + __getopt_nonoption_flags = + (char *) malloc (nonoption_flags_max_len); + if (__getopt_nonoption_flags == NULL) + nonoption_flags_max_len = -1; + else + memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), + '\0', nonoption_flags_max_len - len); + } + } + nonoption_flags_len = nonoption_flags_max_len; + } + else + nonoption_flags_len = 0; +#endif + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns -1. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (argc, argv, optstring, longopts, longind, long_only) + int argc; + char *const *argv; + const char *optstring; + const struct option *longopts; + int *longind; + int long_only; +{ + int print_errors = opterr; + if (optstring[0] == ':') + print_errors = 0; + + if (argc < 1) + return -1; + + optarg = NULL; + + if (optind == 0 || !__getopt_initialized) + { + if (optind == 0) + optind = 1; /* Don't scan ARGV[0], the program name. */ + optstring = _getopt_initialize (argc, argv, optstring); + __getopt_initialized = 1; + } + + /* Test whether ARGV[optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ +#if defined _LIBC && defined USE_NONOPTION_FLAGS +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \ + || (optind < nonoption_flags_len \ + && __getopt_nonoption_flags[optind] == '1')) +#else +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0') +#endif + + if (nextchar == NULL || *nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT and LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (last_nonopt > optind) + last_nonopt = optind; + if (first_nonopt > optind) + first_nonopt = optind; + + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc && NONOPTION_P) + optind++; + last_nonopt = optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) + { + if (ordering == REQUIRE_ORDER) + return -1; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[optind][1] == '-' + || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = -1; + int option_index; + + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) + == (unsigned int) strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else if (long_only + || pfound->has_arg != p->has_arg + || pfound->flag != p->flag + || pfound->val != p->val) + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + optopt = 0; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind - 1][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#else + fprintf (stderr, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], + pfound->name); +#else + fprintf (stderr, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], pfound->name); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + + nextchar += strlen (nextchar); + + optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' + || my_index (optstring, *nextchar) == NULL) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + nextchar = (char *) ""; + optind++; + optopt = 0; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (posixly_correct) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: illegal option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c); +#endif + } + else + { +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: invalid option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + optopt = c; + return '?'; + } + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + + /* optarg is now the argument, see if it's in the + table of longopts. */ + + for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + return '?'; + } + if (pfound != NULL) + { + option_index = indfound; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = NULL; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +#endif /* Not ELIDE_CODE. */ + + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ \ No newline at end of file diff --git a/workloads/realworld/async/kmeans/getopt.h b/workloads/realworld/async/kmeans/getopt.h new file mode 100755 index 0000000000000000000000000000000000000000..bae04bf7d418206d73892a94ff94923a36549362 --- /dev/null +++ b/workloads/realworld/async/kmeans/getopt.h @@ -0,0 +1,191 @@ + + +/* getopt.h */ +/* Declarations for getopt. + Copyright (C) 1989-1994, 1996-1999, 2001 Free Software + Foundation, Inc. This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute + it and/or modify it under the terms of the GNU Lesser + General Public License as published by the Free Software + Foundation; either version 2.1 of the License, or + (at your option) any later version. + + The GNU C Library is distributed in the hope that it will + be useful, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A + PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General + Public License along with the GNU C Library; if not, write + to the Free Software Foundation, Inc., 59 Temple Place, + Suite 330, Boston, MA 02111-1307 USA. */ + + + + + +#ifndef _GETOPT_H + +#ifndef __need_getopt +# define _GETOPT_H 1 +#endif + +/* If __GNU_LIBRARY__ is not already defined, either we are being used + standalone, or this is the first header included in the source file. + If we are being used with glibc, we need to include , but + that does not exist if we are standalone. So: if __GNU_LIBRARY__ is + not defined, include , which will pull in for us + if it's from glibc. (Why ctype.h? It's guaranteed to exist and it + doesn't flood the namespace with stuff the way some other headers do.) */ +#if !defined __GNU_LIBRARY__ +# include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +#ifndef __need_getopt +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +# if (defined __STDC__ && __STDC__) || defined __cplusplus + const char *name; +# else + char *name; +# endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +# define no_argument 0 +# define required_argument 1 +# define optional_argument 2 +#endif /* need getopt */ + + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. + + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, `optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in `optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU `getopt'. + + The argument `--' causes premature termination of argument + scanning, explicitly telling `getopt' that there are no more + options. + + If OPTS begins with `--', then non-option arguments are treated as + arguments to the option '\0'. This behavior is specific to the GNU + `getopt'. */ + +#if (defined __STDC__ && __STDC__) || defined __cplusplus +# ifdef __GNU_LIBRARY__ +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int ___argc, char *const *___argv, const char *__shortopts); +# else /* not __GNU_LIBRARY__ */ +extern int getopt (); +# endif /* __GNU_LIBRARY__ */ + +# ifndef __need_getopt +extern int getopt_long (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); +extern int getopt_long_only (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only); +# endif +#else /* not __STDC__ */ +extern int getopt (); +# ifndef __need_getopt +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +# endif +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +/* Make sure we later can get all the definitions and declarations. */ +#undef __need_getopt + +#endif /* getopt.h */ + diff --git a/workloads/realworld/async/kmeans/kmeans.c b/workloads/realworld/async/kmeans/kmeans.c new file mode 100755 index 0000000000000000000000000000000000000000..e0725dfaab140d5cbbf60aa80cae1e50fe613b7d --- /dev/null +++ b/workloads/realworld/async/kmeans/kmeans.c @@ -0,0 +1,308 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: example.c **/ +/** Description: Takes as input a file: **/ +/** ascii file: containing 1 data point per line **/ +/** binary file: first int is the number of objects **/ +/** 2nd int is the no. of features of each **/ +/** object **/ +/** This example performs a fuzzy c-means clustering **/ +/** on the data. Fuzzy clustering is performed using **/ +/** min to max clusters and the clustering that gets **/ +/** the best score according to a compactness and **/ +/** separation criterion are returned. **/ +/** Author: Wei-keng Liao **/ +/** ECE Department Northwestern University **/ +/** email: wkliao@ece.northwestern.edu **/ +/** **/ +/** Edited by: Jay Pisharath **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ +#define _CRT_SECURE_NO_DEPRECATE 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#define _POSIX_C_SOURCE 200809L +#include +#include +#include "kmeans.h" + +extern double wtime(void); + + + +/*---< usage() >------------------------------------------------------------*/ +void usage(char *argv0) { + char *help = + "\nUsage: %s [switches] -i filename\n\n" + " -i filename :file containing data to be clustered\n" + " -m max_nclusters :maximum number of clusters allowed [default=5]\n" + " -n min_nclusters :minimum number of clusters allowed [default=5]\n" + " -t threshold :threshold value [default=0.001]\n" + " -l nloops :iteration for each number of clusters [default=1]\n" + " -b :input file is in binary format\n" + " -r :calculate RMSE [default=off]\n" + " -o :output cluster center coordinates [default=off]\n"; + fprintf(stderr, help, argv0); + exit(-1); +} + +/*---< main() >-------------------------------------------------------------*/ +int setup(int argc, char **argv) { + int opt; + extern char *optarg; + char *filename = 0; + float *buf; + char line[1024]; + int isBinaryFile = 0; + + float threshold = 0.001; /* default value */ + int max_nclusters=5; /* default value */ + int min_nclusters=5; /* default value */ + int best_nclusters = 0; + int nfeatures = 0; + int npoints = 0; + float len; + + float **features; + float **cluster_centres=NULL; + int i, j, index; + int nloops = 1; /* default value */ + + int isRMSE = 0; + float rmse; + + int isOutput = 0; + //float cluster_timing, io_timing; + + /* obtain command line arguments and change appropriate options */ + while ( (opt=getopt(argc,argv,"i:t:m:n:l:bro"))!= EOF) { + switch (opt) { + case 'i': filename=optarg; + break; + case 'b': isBinaryFile = 1; + break; + case 't': threshold=atof(optarg); + break; + case 'm': max_nclusters = atoi(optarg); + break; + case 'n': min_nclusters = atoi(optarg); + break; + case 'r': isRMSE = 1; + break; + case 'o': isOutput = 1; + break; + case 'l': nloops = atoi(optarg); + break; + case '?': usage(argv[0]); + break; + default: usage(argv[0]); + break; + } + } + + if (filename == 0) usage(argv[0]); + + /* ============== I/O begin ==============*/ + /* get nfeatures and npoints */ + //io_timing = omp_get_wtime(); + if (isBinaryFile) { //Binary file input + int infile; + if ((infile = open(filename, O_RDONLY, "0600")) == -1) { + fprintf(stderr, "Error: no such file (%s)\n", filename); + exit(1); + } + read(infile, &npoints, sizeof(int)); + read(infile, &nfeatures, sizeof(int)); + + /* allocate space for features[][] and read attributes of all objects */ + buf = (float*) malloc(npoints*nfeatures*sizeof(float)); + features = (float**)malloc(npoints* sizeof(float*)); + features[0] = (float*) malloc(npoints*nfeatures*sizeof(float)); + for (i=1; i npoints(%d) -- cannot proceed\n", min_nclusters, npoints); + exit(0); + } + + srand(7); /* seed for future random number generator */ + memcpy(features[0], buf, npoints*nfeatures*sizeof(float)); /* now features holds 2-dimensional array of features */ + free(buf); + + /* ======================= core of the clustering ===================*/ + + //cluster_timing = omp_get_wtime(); /* Total clustering time */ + cluster_centres = NULL; + index = cluster(npoints, /* number of data points */ + nfeatures, /* number of features for each point */ + features, /* array: [npoints][nfeatures] */ + min_nclusters, /* range of min to max number of clusters */ + max_nclusters, + threshold, /* loop termination factor */ + &best_nclusters, /* return: number between min and max */ + &cluster_centres, /* return: [best_nclusters][nfeatures] */ + &rmse, /* Root Mean Squared Error */ + isRMSE, /* calculate RMSE */ + nloops); /* number of iteration for each number of clusters */ + //cluster_timing = omp_get_wtime() - cluster_timing; + + + /* =============== Command Line Output =============== */ + + /* cluster center coordinates + :displayed only for when k=1*/ + if((min_nclusters == max_nclusters) && (isOutput == 1)) { + printf("\n================= Centroid Coordinates =================\n"); + for(i = 0; i < max_nclusters; i++){ + printf("%d:", i); + for(j = 0; j < nfeatures; j++){ + printf(" %.2f", cluster_centres[i][j]); + } + printf("\n\n"); + } + } + + len = (float) ((max_nclusters - min_nclusters + 1)*nloops); + + printf("Number of Iteration: %d\n", nloops); + //printf("Time for I/O: %.5fsec\n", io_timing); + //printf("Time for Entire Clustering: %.5fsec\n", cluster_timing); + + if(min_nclusters != max_nclusters){ + if(nloops != 1){ //range of k, multiple iteration + //printf("Average Clustering Time: %fsec\n", + // cluster_timing / len); + printf("Best number of clusters is %d\n", best_nclusters); + } + else{ //range of k, single iteration + //printf("Average Clustering Time: %fsec\n", + // cluster_timing / len); + printf("Best number of clusters is %d\n", best_nclusters); + } + } + else{ + if(nloops != 1){ // single k, multiple iteration + //printf("Average Clustering Time: %.5fsec\n", + // cluster_timing / nloops); + if(isRMSE) // if calculated RMSE + printf("Number of trials to approach the best RMSE of %.3f is %d\n", rmse, index + 1); + } + else{ // single k, single iteration + if(isRMSE) // if calculated RMSE + printf("Root Mean Squared Error: %.3f\n", rmse); + } + } + + + /* free up memory */ + free(features[0]); + free(features); + return(0); +} + diff --git a/workloads/realworld/async/kmeans/kmeans.h b/workloads/realworld/async/kmeans/kmeans.h new file mode 100755 index 0000000000000000000000000000000000000000..28b6c34732313f04c02b59b095361bc8142d4b05 --- /dev/null +++ b/workloads/realworld/async/kmeans/kmeans.h @@ -0,0 +1,60 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +#ifndef _H_FUZZY_KMEANS +#define _H_FUZZY_KMEANS + +#ifndef FLT_MAX +#define FLT_MAX 3.40282347e+38 +#endif + +#include + +/* rmse.c */ +float euclid_dist_2 (float*, float*, int); +int find_nearest_point (float* , int, float**, int); +float rms_err(float**, int, int, float**, int); + +/* cluster.c */ +int cluster(int, int, float**, int, int, float, int*, float***, float*, int, int); + +/* kmeans_clustering.c */ +float **kmeans_clustering(float**, int, int, int, float, int*); + +#endif diff --git a/workloads/realworld/async/kmeans/kmeans_clustering.c b/workloads/realworld/async/kmeans/kmeans_clustering.c new file mode 100755 index 0000000000000000000000000000000000000000..54ddcd6d8ff6f0d075ff16e6b6aef84fda4716d2 --- /dev/null +++ b/workloads/realworld/async/kmeans/kmeans_clustering.c @@ -0,0 +1,178 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: kmeans_clustering.c **/ +/** Description: Implementation of regular k-means clustering **/ +/** algorithm **/ +/** Author: Wei-keng Liao **/ +/** ECE Department, Northwestern University **/ +/** email: wkliao@ece.northwestern.edu **/ +/** **/ +/** Edited by: Jay Pisharath **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include + +#include "kmeans.h" + +#define RANDOM_MAX 2147483647 + +extern double wtime(void); + +/*----< kmeans_clustering() >---------------------------------------------*/ +float** kmeans_clustering(float **feature, /* in: [npoints][nfeatures] */ + int nfeatures, + int npoints, + int nclusters, + float threshold, + int *membership) /* out: [npoints] */ +{ + int i, j, n = 0; /* counters */ + int loop=0, temp; + int *new_centers_len; /* [nclusters]: no. of points in each cluster */ + float delta; /* if the point moved */ + float **clusters; /* out: [nclusters][nfeatures] */ + float **new_centers; /* [nclusters][nfeatures] */ + + int *initial; /* used to hold the index of points not yet selected + prevents the "birthday problem" of dual selection (?) + considered holding initial cluster indices, but changed due to + possible, though unlikely, infinite loops */ + int initial_points; + int c = 0; + + /* nclusters should never be > npoints + that would guarantee a cluster without points */ + if (nclusters > npoints) + nclusters = npoints; + + /* allocate space for and initialize returning variable clusters[] */ + clusters = (float**) malloc(nclusters * sizeof(float*)); + clusters[0] = (float*) malloc(nclusters * nfeatures * sizeof(float)); + for (i=1; i= 0; i++) { + //n = (int)rand() % initial_points; + + for (j=0; j 0) + clusters[i][j] = new_centers[i][j] / new_centers_len[i]; /* take average i.e. sum/n */ + new_centers[i][j] = 0.0; /* set back to 0 */ + } + new_centers_len[i] = 0; /* set back to 0 */ + } + c++; + } while ((delta > threshold) && (loop++ < 500)); /* makes sure loop terminates */ + printf("iterated %d times\n", c); + free(new_centers[0]); + free(new_centers); + free(new_centers_len); + + return clusters; +} + diff --git a/workloads/realworld/async/kmeans/kmeans_cuda.cu b/workloads/realworld/async/kmeans/kmeans_cuda.cu new file mode 100755 index 0000000000000000000000000000000000000000..81e3b6dda591e8198764aa505d6527b1a31a6feb --- /dev/null +++ b/workloads/realworld/async/kmeans/kmeans_cuda.cu @@ -0,0 +1,220 @@ +#include +#include +#include +#include +#include +#include + +#include + +#define THREADS_PER_DIM 16 +#define BLOCKS_PER_DIM 64 +#define THREADS_PER_BLOCK THREADS_PER_DIM*THREADS_PER_DIM +#include +#include "kmeans_cuda_kernel.cu" +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + + +//#define BLOCK_DELTA_REDUCE +//#define BLOCK_CENTER_REDUCE + +#define CPU_DELTA_REDUCE +#define CPU_CENTER_REDUCE + +extern "C" +int setup(int argc, char** argv); /* function prototype */ + +// GLOBAL!!!!! +unsigned int num_threads_perdim = THREADS_PER_DIM; /* sqrt(256) -- see references for this choice */ +unsigned int num_blocks_perdim = BLOCKS_PER_DIM; /* temporary */ +unsigned int num_threads = num_threads_perdim*num_threads_perdim; /* number of threads */ +unsigned int num_blocks = num_blocks_perdim*num_blocks_perdim; /* number of blocks */ + +/* _d denotes it resides on the device */ +int *membership_new; /* newly assignment membership */ +float *feature_d; /* inverted data array */ +float *feature_flipped_d; /* original (not inverted) data array */ +int *membership_d; /* membership on the device */ +float *block_new_centers; /* sum of points in a cluster (per block) */ +float *clusters_d; /* cluster centers on the device */ +float *block_clusters_d; /* per block calculation of cluster centers */ +int *block_deltas_d; /* per block calculation of deltas */ + + +/* -------------- allocateMemory() ------------------- */ +/* allocate device memory, calculate number of blocks and threads, and invert the data array */ +extern "C" +void allocateMemory(int npoints, int nfeatures, int nclusters, float **features) +{ + // printf("npoints is %d, num_threads is %d\n", npoints, num_threads); + // num_blocks = npoints / num_threads; + // if (npoints % num_threads > 0) /* defeat truncation */ + // num_blocks++; + + // num_blocks_perdim = sqrt((double) num_blocks); + // while (num_blocks_perdim * num_blocks_perdim < num_blocks) // defeat truncation (should run once) + // num_blocks_perdim++; + + num_blocks = num_blocks_perdim*num_blocks_perdim; + + /* allocate memory for memory_new[] and initialize to -1 (host) */ + membership_new = (int*) malloc(npoints * sizeof(int)); + for(int i=0;i>>(feature_flipped_d, feature_d, npoints, (num_blocks_perdim * num_blocks_perdim * num_threads_perdim * num_threads_perdim), nfeatures); + + /* allocate memory for membership_d[] and clusters_d[][] (device) */ + cudaMalloc((void**) &membership_d, npoints*sizeof(int)); + cudaMalloc((void**) &clusters_d, nclusters*nfeatures*sizeof(float)); +} +/* -------------- allocateMemory() end ------------------- */ + +/* -------------- deallocateMemory() ------------------- */ +/* free host and device memory */ +extern "C" +void deallocateMemory() +{ + free(membership_new); + free(block_new_centers); + cudaFree(feature_d); + cudaFree(feature_flipped_d); + cudaFree(membership_d); + + cudaFree(clusters_d); + + endCPU(); +} +/* -------------- deallocateMemory() end ------------------- */ + +//////////////////////////////////////////////////////////////////////////////// +// Program main // + +int +main( int argc, char** argv) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + // make sure we're running on the big card + GPU_argv_init(); + // as done in the CUDA start/help document provided + initTrace(); + setup(argc, argv); + finiTrace(); +} + +// // +//////////////////////////////////////////////////////////////////////////////// + +/* ------------------- kmeansCuda() ------------------------ */ +extern "C" +int // delta -- had problems when return value was of float type +kmeansCuda(float **feature, /* in: [npoints][nfeatures] */ + int nfeatures, /* number of attributes for each point */ + int npoints, /* number of data points */ + int nclusters, /* number of clusters */ + int *membership, /* which cluster the point belongs to */ + float **clusters, /* coordinates of cluster centers */ + int *new_centers_len, /* number of elements in each cluster */ + float **new_centers /* sum of elements in each cluster */ + ) +{ + int delta = 0; /* if point has moved */ + int i,j; /* counters */ + + // cudaSetDevice(1); + + + /* copy membership (host to device) */ + cudaMemcpy(membership_d, membership_new, npoints*sizeof(int), cudaMemcpyHostToDevice); + + // /* copy clusters (host to device) */ + // cudaMemcpy(clusters_d, clusters[0], nclusters*nfeatures*sizeof(float), cudaMemcpyHostToDevice); + + // /* set up texture */ + // cudaChannelFormatDesc chDesc0 = cudaCreateChannelDesc(); + // t_features.filterMode = cudaFilterModePoint; + // t_features.normalized = false; + // t_features.channelDesc = chDesc0; + + // if(cudaBindTexture(NULL, &t_features, feature_d, &chDesc0, npoints*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind features array to texture!\n"); + + // cudaChannelFormatDesc chDesc1 = cudaCreateChannelDesc(); + // t_features_flipped.filterMode = cudaFilterModePoint; + // t_features_flipped.normalized = false; + // t_features_flipped.channelDesc = chDesc1; + + // if(cudaBindTexture(NULL, &t_features_flipped, feature_flipped_d, &chDesc1, npoints*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind features_flipped array to texture!\n"); + + // cudaChannelFormatDesc chDesc2 = cudaCreateChannelDesc(); + // t_clusters.filterMode = cudaFilterModePoint; + // t_clusters.normalized = false; + // t_clusters.channelDesc = chDesc2; + + // if(cudaBindTexture(NULL, &t_clusters, clusters_d, &chDesc2, nclusters*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind clusters array to texture!\n"); + + // /* copy clusters to constant memory */ + // cudaMemcpyToSymbol("c_clusters",clusters[0],nclusters*nfeatures*sizeof(float),0,cudaMemcpyHostToDevice); + + // cudaMemcpy(feature_d, feature, npoints * nfeatures * sizeof(float), cudaMemcpyHostToDevice); + + /* setup execution parameters. + changed to 2d (source code on NVIDIA CUDA Programming Guide) */ + dim3 grid( num_blocks_perdim, num_blocks_perdim ); + dim3 threads( num_threads_perdim*num_threads_perdim ); + /* execute the kernel */ + kmeansPoint<<>>(feature_d, + nfeatures, + npoints, + (num_blocks_perdim * num_blocks_perdim * num_threads_perdim * num_threads_perdim), + nclusters, + membership_d); + cudaDeviceSynchronize(); + + /* copy back membership (device to host) */ + cudaMemcpy(membership_new, membership_d, npoints * sizeof(int), cudaMemcpyDeviceToHost); + + /* for each point, sum data points in each cluster + and see if membership has changed: + if so, increase delta and change old membership, and update new_centers; + otherwise, update new_centers */ + delta = 0; + for (i = 0; i < npoints; i++) + { + int cluster_id = membership_new[i]; + new_centers_len[cluster_id]++; + if (membership_new[i] != membership[i]) + { +#ifdef CPU_DELTA_REDUCE + delta++; +#endif + membership[i] = membership_new[i]; + } +#ifdef CPU_CENTER_REDUCE + for (j = 0; j < nfeatures; j++) + { + new_centers[cluster_id][j] += feature[i][j]; + } +#endif + } + return delta; + +} +/* ------------------- kmeansCuda() end ------------------------ */ + diff --git a/workloads/realworld/async/kmeans/kmeans_cuda_kernel.cu b/workloads/realworld/async/kmeans/kmeans_cuda_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..4150c36621af95fb81ffe1e965b5e08941fff234 --- /dev/null +++ b/workloads/realworld/async/kmeans/kmeans_cuda_kernel.cu @@ -0,0 +1,150 @@ +#ifndef _KMEANS_CUDA_KERNEL_H_ +#define _KMEANS_CUDA_KERNEL_H_ + +#include +#include + +#include "kmeans.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +// FIXME: Make this a runtime selectable variable! +#define ASSUMED_NR_CLUSTERS 32 + +#define SDATA(index) CUT_BANK_CHECKER(sdata, index) + +// t_features has the layout dim0[points 0-m-1]dim1[ points 0-m-1]... +texture t_features; +// t_features_flipped has the layout point0[dim 0-n-1]point1[dim 0-n-1] +texture t_features_flipped; +texture t_clusters; + +__constant__ float c_clusters[ASSUMED_NR_CLUSTERS * 34]; /* constant memory for cluster centers */ + +/* ----------------- invert_mapping() --------------------- */ +/* inverts data array from row-major to column-major. + + [p0,dim0][p0,dim1][p0,dim2] ... + [p1,dim0][p1,dim1][p1,dim2] ... + [p2,dim0][p2,dim1][p2,dim2] ... + to + [dim0,p0][dim0,p1][dim0,p2] ... + [dim1,p0][dim1,p1][dim1,p2] ... + [dim2,p0][dim2,p1][dim2,p2] ... +*/ +__global__ void invert_mapping(float *input, /* original */ + float *output, /* inverted */ + int npoints, /* npoints */ + int batch_size, + int nfeatures) /* nfeatures */ +{ + int point_id = threadIdx.x + blockDim.x * blockIdx.x; /* id of thread */ + + int batches = npoints / batch_size; + + for (int b = 0; b < batches; b++) + { + for (int i = 0; i < nfeatures; i++) + { + output[b * batch_size + point_id + npoints * i] = input[(b * batch_size + point_id) * nfeatures + i]; + } + } + + + + return; +} +/* ----------------- invert_mapping() end --------------------- */ + +/* to turn on the GPU delta and center reduction */ +// #define GPU_DELTA_REDUCTION +// #define GPU_NEW_CENTER_REDUCTION + +/* ----------------- kmeansPoint() --------------------- */ +/* find the index of nearest cluster centers and change membership*/ +__global__ void +kmeansPoint(float *features, /* in: [npoints*nfeatures] */ + int nfeatures, + int npoints, + int batch_size, + int nclusters, + int *membership) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + // block ID + const unsigned int block_id = gridDim.x * blockIdx.y + blockIdx.x; + // point/thread ID + const unsigned int point_id = block_id * blockDim.x * blockDim.y + threadIdx.x; + + __shared__ float tmp_features[PREFETCH_COUNT * THREADS_PER_DIM][THREADS_PER_DIM][16]; + + int batches = npoints / batch_size; + int fetch = 0; + int end_tile = fetch + batches; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + for (int i = 0; i < 16; i++) + { + int addr = fetch * batch_size + point_id + i * npoints; + memcpy_async(tmp_features[(fetch % PREFETCH_COUNT) * THREADS_PER_DIM + threadIdx.y][threadIdx.x][i], features[addr], pipe); + } + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + int index = -1; + + float min_dist = FLT_MAX; + float dist; /* distance square between a point to cluster center */ + + /* find the cluster center id with min distance to pt */ + for (int i = 0; i < nclusters; i++) + { + int cluster_base_index = i * nfeatures; /* base index of cluster centers for inverted array */ + float ans = 0.0; /* Euclidean distance sqaure */ + + for (int j = 0; j < nfeatures; j++) + { + // int addr = point_id + j * npoints; /* appropriate index of data point */ + // float diff = (tex1Dfetch(t_features,addr) - c_clusters[cluster_base_index + j]); /* distance between a data point to cluster centers */ + + // int addr = point_id + j * npoints; /* appropriate index of data point */ + // float diff = features[addr] - c_clusters[cluster_base_index + j]; /* distance between a data point to cluster centers */ + float diff = tmp_features[(compute % PREFETCH_COUNT) * THREADS_PER_DIM + threadIdx.y][threadIdx.x][j] - c_clusters[cluster_base_index + j]; /* distance between a data point to cluster centers */ + ans += diff * diff; /* sum of squares */ + } + dist = ans; + block.sync(); + + /* see if distance is smaller than previous ones: + if so, change minimum distance and save index of cluster center */ + if (dist < min_dist) + { + min_dist = dist; + index = i; + } + } + membership[compute * batch_size + point_id] = index; + block.sync(); + } + +} +#endif // #ifndef _KMEANS_CUDA_KERNEL_H_ diff --git a/workloads/realworld/async/kmeans/kmeans_cuda_kernel.cu.old b/workloads/realworld/async/kmeans/kmeans_cuda_kernel.cu.old new file mode 100755 index 0000000000000000000000000000000000000000..dd0dec27eebf197d811a58063b13a86c8f72e1ed --- /dev/null +++ b/workloads/realworld/async/kmeans/kmeans_cuda_kernel.cu.old @@ -0,0 +1,185 @@ +#ifndef _KMEANS_CUDA_KERNEL_H_ +#define _KMEANS_CUDA_KERNEL_H_ + +#include +#include + +#include "kmeans.h" + +// FIXME: Make this a runtime selectable variable! +#define ASSUMED_NR_CLUSTERS 32 + +#define SDATA( index) CUT_BANK_CHECKER(sdata, index) + +// t_features has the layout dim0[points 0-m-1]dim1[ points 0-m-1]... +texture t_features; +// t_features_flipped has the layout point0[dim 0-n-1]point1[dim 0-n-1] +texture t_features_flipped; +texture t_clusters; + + +__constant__ float c_clusters[ASSUMED_NR_CLUSTERS*34]; /* constant memory for cluster centers */ + +/* ----------------- invert_mapping() --------------------- */ +/* inverts data array from row-major to column-major. + + [p0,dim0][p0,dim1][p0,dim2] ... + [p1,dim0][p1,dim1][p1,dim2] ... + [p2,dim0][p2,dim1][p2,dim2] ... + to + [dim0,p0][dim0,p1][dim0,p2] ... + [dim1,p0][dim1,p1][dim1,p2] ... + [dim2,p0][dim2,p1][dim2,p2] ... +*/ +__global__ void invert_mapping(float *input, /* original */ + float *output, /* inverted */ + int npoints, /* npoints */ + int nfeatures) /* nfeatures */ +{ + int point_id = threadIdx.x + blockDim.x*blockIdx.x; /* id of thread */ + int i; + + if(point_id < npoints){ + for(i=0;i 1; threadids_participating /= 2) { + if(threadIdx.x < threadids_participating) { + deltas[threadIdx.x] += deltas[threadIdx.x + threadids_participating]; + } + __syncthreads(); + } + if(threadIdx.x < 1) {deltas[threadIdx.x] += deltas[threadIdx.x + 1];} + __syncthreads(); + // propagate number of changes to global counter + if(threadIdx.x == 0) { + block_deltas[blockIdx.y * gridDim.x + blockIdx.x] = deltas[0]; + //printf("original id: %d, modified: %d\n", blockIdx.y*gridDim.x+blockIdx.x, blockIdx.x); + + } + +#endif + + +#ifdef GPU_NEW_CENTER_REDUCTION + int center_id = threadIdx.x / nfeatures; + int dim_id = threadIdx.x - nfeatures*center_id; + + __shared__ int new_center_ids[THREADS_PER_BLOCK]; + + new_center_ids[threadIdx.x] = index; + __syncthreads(); + + /*** + determine which dimension calculte the sum for + mapping of threads is + center0[dim0,dim1,dim2,...]center1[dim0,dim1,dim2,...]... + ***/ + + int new_base_index = (point_id - threadIdx.x)*nfeatures + dim_id; + float accumulator = 0.f; + + if(threadIdx.x < nfeatures * nclusters) { + // accumulate over all the elements of this threadblock + for(int i = 0; i< (THREADS_PER_BLOCK); i++) { + float val = tex1Dfetch(t_features_flipped,new_base_index+i*nfeatures); + if(new_center_ids[i] == center_id) + accumulator += val; + } + + // now store the sum for this threadblock + /*** + mapping to global array is + block0[center0[dim0,dim1,dim2,...]center1[dim0,dim1,dim2,...]...]block1[...]... + ***/ + block_clusters[(blockIdx.y*gridDim.x + blockIdx.x) * nclusters * nfeatures + threadIdx.x] = accumulator; + } +#endif + +} +#endif // #ifndef _KMEANS_CUDA_KERNEL_H_ diff --git a/workloads/realworld/async/kmeans/rmse.c b/workloads/realworld/async/kmeans/rmse.c new file mode 100755 index 0000000000000000000000000000000000000000..fe7786342bf77cab12958e630cb8d99834312f0d --- /dev/null +++ b/workloads/realworld/async/kmeans/rmse.c @@ -0,0 +1,95 @@ +/*************************************************************************/ +/** File: rmse.c **/ +/** Description: calculate root mean squared error of particular **/ +/** clustering. **/ +/** Author: Sang-Ha Lee **/ +/** University of Virginia. **/ +/** **/ +/** Note: euclid_dist_2() and find_nearest_point() adopted from **/ +/** Minebench code. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include + +#include "kmeans.h" + +extern double wtime(void); + +/*----< euclid_dist_2() >----------------------------------------------------*/ +/* multi-dimensional spatial Euclid distance square */ +__inline +float euclid_dist_2(float *pt1, + float *pt2, + int numdims) +{ + int i; + float ans=0.0; + + for (i=0; i-----------------------------------------------*/ +__inline +int find_nearest_point(float *pt, /* [nfeatures] */ + int nfeatures, + float **pts, /* [npts][nfeatures] */ + int npts) +{ + int index, i; + float max_dist=FLT_MAX; + + /* find the cluster center id with min distance to pt */ + for (i=0; i-------------------------------------*/ +float rms_err (float **feature, /* [npoints][nfeatures] */ + int nfeatures, + int npoints, + float **cluster_centres, /* [nclusters][nfeatures] */ + int nclusters) +{ + int i; + int nearest_cluster_index; /* cluster center id with min distance to pt */ + float sum_euclid = 0.0; /* sum of Euclidean distance squares */ + float ret; /* return value */ + + /* calculate and sum the sqaure of euclidean distance*/ + #pragma omp parallel for \ + shared(feature,cluster_centres) \ + firstprivate(npoints,nfeatures,nclusters) \ + private(i, nearest_cluster_index) \ + schedule (static) + for (i=0; i +#endif + +#include + +#ifndef _H_TYPES +#include +#endif + +#include + +#ifndef _H_ACCESS +#include /* for the "access" function */ +#endif + +/* + * POSIX requires that certain values be included in unistd.h. It also + * requires that when _POSIX_SOURCE is defined only those standard + * specific values are present. This header includes all the POSIX + * required entries. + */ + +#ifdef _POSIX_SOURCE +#ifdef _LARGE_FILES +#define lseek lseek64 +#endif + + +/* Symbolic constants for the "lseek" function: */ +#ifndef SEEK_SET +#define SEEK_SET 0 /* Set file pointer to "offset" */ +#define SEEK_CUR 1 /* Set file pointer to current plus "offset" */ +#define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif /* SEEK_SET */ + +#ifdef _NO_PROTO + +#ifndef _KERNEL +extern int access(); +extern unsigned int alarm(); +extern int chdir(); +extern int chown(); +extern int close(); +extern char *ctermid(); +extern int dup(); +extern int dup2(); +extern int execl(); +extern int execv(); +extern int execle(); +extern int execve(); +extern int execlp(); +extern int execvp(); +extern void _exit(); +extern pid_t fork(); +extern long fpathconf(); +extern char *getcwd(); +extern gid_t getegid(); +extern uid_t geteuid(); +extern gid_t getgid(); +extern int getgroups(); +extern char *getlogin(); +extern pid_t getpgrp(); +extern pid_t getpid(); +extern pid_t getppid(); +extern uid_t getuid(); +extern int isatty(); +extern int link(); +extern off_t lseek(); +extern long pathconf(); +extern int pause(); +extern int pipe(); +#if defined(_XOPEN_SOURCE) && ( _XOPEN_SOURCE >= 500 ) +extern int pthread_atfork(); +#endif +extern int read(); +extern int rmdir(); +extern int setgid(); +extern int setpgid(); +extern int setsid(); +extern int setuid(); +extern unsigned int sleep(); +extern long sysconf(); +extern pid_t tcgetpgrp(); +extern int tcsetpgrp(); +extern char *ttyname(); +extern int unlink(); +extern int write(); +#endif /* !_KERNEL */ + +#else /* POSIX required prototypes */ + +#ifndef _KERNEL +extern int access(const char *, int); +extern unsigned int alarm(unsigned int); +extern int chdir(const char *); +extern int chown(const char *, uid_t, gid_t); +extern int close(int); +extern char *ctermid(char *); +extern int dup(int); +extern int dup2(int, int); +extern int execl(const char *, const char *, ...); +extern int execv(const char *, char *const []); +extern int execle(const char *, const char *, ...); +extern int execve(const char *, char *const [], char *const []); +extern int execlp(const char *, const char *, ...); +extern int execvp(const char *, char *const []); +extern void _exit(int); +extern pid_t fork(void); +extern long fpathconf(int, int); +extern char *getcwd(char *, size_t); +extern gid_t getegid(void); +extern uid_t geteuid(void); +extern gid_t getgid(void); +extern int getgroups(int, gid_t []); +extern char *getlogin(void); +#ifndef _BSD +extern pid_t getpgrp(void); +#endif /* _BSD */ +extern pid_t getpid(void); +extern pid_t getppid(void); +extern uid_t getuid(void); +extern int isatty(int); +extern int link(const char *, const char *); +extern off_t lseek(int, off_t, int); +#ifdef _LARGE_FILE_API +extern off64_t lseek64(int, off64_t, int); +#endif +extern long pathconf(const char *, int); +extern int pause(void); +extern int pipe(int []); +#if defined(_XOPEN_SOURCE) && ( _XOPEN_SOURCE >= 500 ) +extern int pthread_atfork(void (*)(void), void (*)(void), void (*)(void)); +#endif +extern ssize_t read(int, void *, size_t); +extern int rmdir(const char *); +extern int setgid(gid_t); +extern int setpgid(pid_t, pid_t); +extern pid_t setsid(void); +extern int setuid(uid_t); +extern unsigned int sleep(unsigned int); +extern long sysconf(int); +extern pid_t tcgetpgrp(int); +extern int tcsetpgrp(int, pid_t); +extern char *ttyname(int); +extern int unlink(const char *); +extern ssize_t write(int, const void *, size_t); +#endif /* !_KERNEL */ +#endif /* !_NO_PROTO */ + +#define STDIN_FILENO 0 +#define STDOUT_FILENO 1 +#define STDERR_FILENO 2 + +#define _POSIX_JOB_CONTROL 1 +#define _POSIX_SAVED_IDS 1 + +#define _POSIX_VERSION 200112L +#define _POSIX2_VERSION 200112L +#define _POSIX2_C_VERSION 200112L + + +#ifdef _XOPEN_SOURCE + +#define _XOPEN_VERSION 600 +#define _XOPEN_XCU_VERSION 4 +#define _XOPEN_XPG3 1 +#define _XOPEN_XPG4 1 +#define _XOPEN_UNIX 1 + +#define _XOPEN_REALTIME (-1) +#define _XOPEN_REALTIME_THREADS (-1) + +#if (_XOPEN_SOURCE >= 600) +#define _XOPEN_STREAMS 1 +#endif + +#define _XBS5_ILP32_OFF32 1 +#define _XBS5_ILP32_OFFBIG 1 +#define _XBS5_LP64_OFF64 1 +#define _XBS5_LPBIG_OFFBIG 1 + +#define _POSIX2_C_BIND 200112L +#define _POSIX2_C_DEV 200112L +#define _POSIX2_CHAR_TERM 1 +#define _POSIX2_LOCALEDEF 200112L +#define _POSIX2_UPE 200112L +#define _POSIX2_FORT_DEV (-1) +#define _POSIX2_FORT_RUN (-1) +#define _POSIX2_SW_DEV (-1) + +#if (_POSIX_C_SOURCE >= 200112L) +#define _POSIX_REGEXP 1 +#define _POSIX_SHELL 1 +#define _POSIX2_PBS (-1) +#define _POSIX2_PBS_ACCOUNTING (-1) +#define _POSIX2_PBS_CHECKPOINT (-1) +#define _POSIX2_PBS_LOCATE (-1) +#define _POSIX2_PBS_MESSAGE (-1) +#define _POSIX2_PBS_TRACK (-1) +#define _V6_ILP32_OFF32 1 +#define _V6_ILP32_OFFBIG 1 +#define _V6_LP64_OFF64 1 +#define _V6_LPBIG_OFFBIG 1 + +#define _POSIX_ADVISORY_INFO 200112L +#define _POSIX_BARRIERS 200112L +#define _POSIX_CLOCK_SELECTION 200112L +#define _POSIX_CPUTIME 200112L +#define _POSIX_MONOTONIC_CLOCK 200112L + +#ifdef _POSIX_RAW_SOCKETS +#undef _POSIX_RAW_SOCKETS +#endif + +#define _POSIX_SPAWN 200112L +#define _POSIX_SPIN_LOCKS 200112L +#define _POSIX_SPORADIC_SERVER (-1) +#define _POSIX_THREAD_CPUTIME 200112L +#define _POSIX_THREAD_SPORADIC_SERVER (-1) +#define _POSIX_TIMEOUTS 200112L +#define _POSIX_TRACE (-1) +#define _POSIX_TRACE_EVENT_FILTER (-1) +#define _POSIX_TRACE_INHERIT (-1) +#define _POSIX_TRACE_LOG (-1) +#define _POSIX_TYPED_MEMORY_OBJECTS (-1) + +#endif /* _POSIX_C_SOURCE >= 200112L */ + +#define _XOPEN_CRYPT 1 +#define _XOPEN_SHM 1 +#define _XOPEN_ENH_I18N 1 +#define _XOPEN_LEGACY (-1) +#ifndef __64BIT__ +#define _UNIX_ABI (-1) +#define _UNIX_ABI_IA64 (-1) +#define _UNIX_ABI_BIG_ENDIAN (-1) +#define _UNIX_ABI_LITTLE_ENDIAN (-1) +#endif /* __64BIT__ */ + +extern char *optarg; +extern int optind, opterr, optopt; + +#ifdef _NO_PROTO + extern size_t confstr(); + extern char *crypt(); + extern void encrypt(); + extern int fsync(); + extern int getopt(); + extern int nice(); + extern void swab(); +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern char *getpass(); + extern int chroot(); +#endif +#else + extern size_t confstr(int, char*, size_t); + extern char *crypt(const char *, const char *); + extern void encrypt(char *, int); + extern int fsync(int); + extern int getopt(int, char* const*, const char*); + extern int nice(int); + extern void swab(const void *, void *, ssize_t); + extern int fdatasync(int); +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern char *getpass(const char *); + extern int chroot(const char *); +#endif +#endif + +#endif /* _XOPEN _SOURCE */ + +/* Threads options for 1003.1c and XPG UNIX98 */ +#define _POSIX_THREADS 200112L +#define _POSIX_THREAD_ATTR_STACKADDR 200112L +#define _POSIX_THREAD_ATTR_STACKSIZE 200112L +#define _POSIX_THREAD_PROCESS_SHARED 200112L +#define _POSIX_THREAD_SAFE_FUNCTIONS 200112L +#ifdef _ALL_SOURCE +#define _POSIX_REENTRANT_FUNCTIONS _POSIX_THREAD_SAFE_FUNCTIONS +#endif + +/* Realtime threads options for 1003.1c and XPG UNIX98 */ +#define _POSIX_THREAD_PRIORITY_SCHEDULING (-1) +#define _POSIX_THREAD_PRIO_INHERIT (-1) +#define _POSIX_THREAD_PRIO_PROTECT (-1) + +#undef _POSIX_THREAD_FORKALL + +/* Realtime options for 1003.1c and XPG UNIX98 */ +#define _POSIX_ASYNCHRONOUS_IO 200112L +#define _POSIX_FSYNC 200112L +#define _POSIX_MAPPED_FILES 200112L +#define _POSIX_MEMLOCK 200112L +#define _POSIX_MEMLOCK_RANGE 200112L +#define _POSIX_MEMORY_PROTECTION 200112L +#define _POSIX_MESSAGE_PASSING 200112L +#define _POSIX_PRIORITIZED_IO 200112L +#define _POSIX_PRIORITY_SCHEDULING 200112L +#define _POSIX_REALTIME_SIGNALS 200112L +#define _POSIX_SEMAPHORES 200112L +#define _POSIX_SHARED_MEMORY_OBJECTS 200112L +#define _POSIX_SYNCHRONIZED_IO 200112L +#define _POSIX_TIMERS 200112L + +#define _POSIX_ASYNC_IO (-1) +#undef _POSIX_SYNC_IO +#define _POSIX_PRIO_IO (-1) + +#define _POSIX_CHOWN_RESTRICTED 0 +#define _POSIX_VDISABLE 0xFF +#define _POSIX_NO_TRUNC 0 + + /* UNIX03 and POSIX01 */ + /* Always enabled */ +#define _POSIX_IPV6 200112L +#define _POSIX_RAW_SOCKETS 200112L + + +#ifndef NULL +#define NULL 0 +#endif + +#if (_POSIX_C_SOURCE >= 200112L) +#define _POSIX_READER_WRITER_LOCKS 200112L +#endif + +/* arguments for the confstr() function */ + +#define _CS_PATH 1 + + /* compile,link,lib,lint flags for 32bit, no_LARGE_FILES system */ +#define _CS_XBS5_ILP32_OFF32_CFLAGS 2 +#define _CS_XBS5_ILP32_OFF32_LDFLAGS 3 +#define _CS_XBS5_ILP32_OFF32_LIBS 4 +#define _CS_XBS5_ILP32_OFF32_LINTFLAGS 5 + + /* compile,link,lib,lint flags for 32bit, _LARGE_FILES system */ +#define _CS_XBS5_ILP32_OFFBIG_CFLAGS 6 +#define _CS_XBS5_ILP32_OFFBIG_LDFLAGS 7 +#define _CS_XBS5_ILP32_OFFBIG_LIBS 8 +#define _CS_XBS5_ILP32_OFFBIG_LINTFLAGS 9 + + /* compile,link,lib,lint flags for LP64 64bit system */ +#define _CS_XBS5_LP64_OFF64_CFLAGS 10 +#define _CS_XBS5_LP64_OFF64_LDFLAGS 11 +#define _CS_XBS5_LP64_OFF64_LIBS 12 +#define _CS_XBS5_LP64_OFF64_LINTFLAGS 13 + + /* compile,link,lib,lint flags for ILP64 64bit system */ + /* AIX does not currently support this */ +#define _CS_XBS5_LPBIG_OFFBIG_CFLAGS 14 +#define _CS_XBS5_LPBIG_OFFBIG_LDFLAGS 15 +#define _CS_XBS5_LPBIG_OFFBIG_LIBS 16 +#define _CS_XBS5_LPBIG_OFFBIG_LINTFLAGS 17 + +#define _CS_AIX_BOOTDEV 24 +#define _CS_AIX_MODEL_CODE 25 +#define _CS_AIX_ARCHITECTURE 26 +#define _CS_AIX_MODEL_CLASS 40 + +#if (_POSIX_C_SOURCE >= 200112L) +#define _CS_POSIX_V6_ILP32_OFF32_CFLAGS 27 +#define _CS_POSIX_V6_ILP32_OFF32_LDFLAGS 28 +#define _CS_POSIX_V6_ILP32_OFF32_LIBS 29 +#define _CS_POSIX_V6_ILP32_OFFBIG_CFLAGS 30 +#define _CS_POSIX_V6_ILP32_OFFBIG_LDFLAGS 31 +#define _CS_POSIX_V6_ILP32_OFFBIG_LIBS 32 +#define _CS_POSIX_V6_LP64_OFF64_CFLAGS 33 +#define _CS_POSIX_V6_LP64_OFF64_LDFLAGS 34 +#define _CS_POSIX_V6_LP64_OFF64_LIBS 35 +#define _CS_POSIX_V6_LPBIG_OFFBIG_CFLAGS 36 +#define _CS_POSIX_V6_LPBIG_OFFBIG_LDFLAGS 37 +#define _CS_POSIX_V6_LPBIG_OFFBIG_LIBS 38 +#define _CS_POSIX_V6_WIDTH_RESTRICTED_ENVS 39 +#endif + + /* Values for the above */ +#define _CSPATH "/usr/bin:/usr/vac/bin" + + /* ILP32_OFF32 */ +#define _CSPOSIX_V6_ILP32_OFF32_CFLAGS "-q32" +#define _CSXBS5_ILP32_OFF32_CFLAGS _CSPOSIX_V6_ILP32_OFF32_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_ILP32_OFF32_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_ILP32_OFF32_LDFLAGS "-b32" +#define _CSXBS5_ILP32_OFF32_LDFLAGS _CSPOSIX_V6_ILP32_OFF32_LDFLAGS +#endif + +#define _CSPOSIX_V6_ILP32_OFF32_LIBS "-lc -lpthread -lm" +#define _CSXBS5_ILP32_OFF32_LIBS _CSPOSIX_V6_ILP32_OFF32_LIBS + +#define _CSXBS5_ILP32_OFF32_LINTFLAGS "" + + /* ILP32_OFFOFFBIG */ +#define _CSPOSIX_V6_ILP32_OFFBIG_CFLAGS "-q32 -D_LARGE_FILES -qlonglong" +#define _CSXBS5_ILP32_OFFBIG_CFLAGS _CSPOSIX_V6_ILP32_OFFBIG_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_ILP32_OFFBIG_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_ILP32_OFFBIG_LDFLAGS "-b32" +#define _CSXBS5_ILP32_OFFBIG_LDFLAGS _CSPOSIX_V6_ILP32_OFFBIG_LDFLAGS +#endif + +#define _CSPOSIX_V6_ILP32_OFFBIG_LIBS "-lc -lpthread -lm" +#define _CSXBS5_ILP32_OFFBIG_LIBS _CSPOSIX_V6_ILP32_OFFBIG_LIBS + +#define _CSXBS5_ILP32_OFFBIG_LINTFLAGS "-D_LARGE_FILES -qlonglong" + + /* LP64_OFF64 */ +#define _CSPOSIX_V6_LP64_OFF64_CFLAGS "-q64" +#define _CSXBS5_LP64_OFF64_CFLAGS _CSPOSIX_V6_LP64_OFF64_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_LP64_OFF64_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_LP64_OFF64_LDFLAGS "-b64" +#define _CSXBS5_LP64_OFF64_LDFLAGS _CSPOSIX_V6_LP64_OFF64_LDFLAGS +#endif + +#define _CSPOSIX_V6_LP64_OFF64_LIBS "-lc -lpthread -lm" +#define _CSXBS5_LP64_OFF64_LIBS _CSPOSIX_V6_LP64_OFF64_LIBS + +#define _CSXBS5_LP64_OFF64_LINTFLAGS "-D__64BIT__" + + /* LPBIG_OFFBIG */ +#define _CSPOSIX_V6_LPBIG_OFFBIG_CFLAGS "-q64" +#define _CSXBS5_LPBIG_OFFBIG_CFLAGS _CSPOSIX_V6_LPBIG_OFFBIG_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_LPBIG_OFFBIG_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_LPBIG_OFFBIG_LDFLAGS "-b64" +#define _CSXBS5_LPBIG_OFFBIG_LDFLAGS _CSPOSIX_V6_LPBIG_OFFBIG_LDFLAGS +#endif + +#define _CSPOSIX_V6_LPBIG_OFFBIG_LIBS "-lc -lpthread -lm" +#define _CSXBS5_LPBIG_OFFBIG_LIBS _CSPOSIX_V6_LPBIG_OFFBIG_LIBS + +#define _CSXBS5_LPBIG_OFFBIG_LINTFLAGS "-D__64BIT__" + +#if (_POSIX_C_SOURCE >= 200112L) +#define _CSPOSIX_V6_WIDTH_RESTRICTED_ENVS \ + "POSIX_V6_ILP32_OFF32\n" \ + "POSIX_V6_ILP32_OFFBIG\n" \ + "POSIX_V6_LP64_OFF64\n" \ + "POSIX_V6_LPBIG_OFFBIG" +#endif + +/* arguments for the pathconf() function */ + +#define _PC_CHOWN_RESTRICTED 10 +#define _PC_LINK_MAX 11 +#define _PC_MAX_CANON 12 +#define _PC_MAX_INPUT 13 +#define _PC_NAME_MAX 14 +#define _PC_NO_TRUNC 15 +#define _PC_PATH_MAX 16 +#define _PC_PIPE_BUF 17 +#define _PC_VDISABLE 18 +#define _PC_ASYNC_IO 19 +#define _PC_SYNC_IO 20 +#define _PC_PRIO_IO 21 +#define _PC_FILESIZEBITS 22 /* # bits needed to hold offset */ +#define _PC_AIX_DISK_PARTITION 23 +#define _PC_AIX_DISK_SIZE 24 +#if (_POSIX_C_SOURCE >= 200112L) +#define _PC_SYMLINK_MAX 25 +#define _PC_ALLOC_SIZE_MIN 26 +#define _PC_REC_INCR_XFER_SIZE 27 +#define _PC_REC_MAX_XFER_SIZE 28 +#define _PC_REC_MIN_XFER_SIZE 29 +#define _PC_REC_XFER_ALIGN 30 +#define _PC_2_SYMLINKS 31 +#endif + +/* arguments for the sysconf() function, the defined numbers are used as + * array index in sysconf(). + * + * POSIX.1(1990), Table 4-2 + */ +#define _SC_ARG_MAX 0 +#define _SC_CHILD_MAX 1 +#define _SC_CLK_TCK 2 +#define _SC_NGROUPS_MAX 3 +#define _SC_OPEN_MAX 4 +#define _SC_STREAM_MAX 5 +#define _SC_TZNAME_MAX 6 +#define _SC_JOB_CONTROL 7 +#define _SC_SAVED_IDS 8 +#define _SC_VERSION 9 + +/* POSIX.1(1990), Table 2-3, required by command getconf */ + +#define _SC_POSIX_ARG_MAX 10 +#define _SC_POSIX_CHILD_MAX 11 +#define _SC_POSIX_LINK_MAX 12 +#define _SC_POSIX_MAX_CANON 13 +#define _SC_POSIX_MAX_INPUT 14 +#define _SC_POSIX_NAME_MAX 15 +#define _SC_POSIX_NGROUPS_MAX 16 +#define _SC_POSIX_OPEN_MAX 17 +#define _SC_POSIX_PATH_MAX 18 +#define _SC_POSIX_PIPE_BUF 19 +#define _SC_POSIX_SSIZE_MAX 20 +#define _SC_POSIX_STREAM_MAX 21 +#define _SC_POSIX_TZNAME_MAX 22 + +/* POSIX.2 (Draft 10), Table 41) */ + +#define _SC_BC_BASE_MAX 23 +#define _SC_BC_DIM_MAX 24 +#define _SC_BC_SCALE_MAX 25 +#define _SC_BC_STRING_MAX 26 +#define _SC_EQUIV_CLASS_MAX 27 +#define _SC_EXPR_NEST_MAX 28 +#define _SC_LINE_MAX 29 +#define _SC_RE_DUP_MAX 30 +#define _SC_2_VERSION 31 +#define _SC_2_C_DEV 32 +#define _SC_2_FORT_DEV 33 +#define _SC_2_FORT_RUN 34 +#define _SC_2_LOCALEDEF 35 +#define _SC_2_SW_DEV 36 + +/* POSIX.2 (Draft 10), Table 13) */ + +#define _SC_POSIX2_BC_BASE_MAX 37 +#define _SC_POSIX2_BC_DIM_MAX 38 +#define _SC_POSIX2_BC_SCALE_MAX 39 +#define _SC_POSIX2_BC_STRING_MAX 40 +#define _SC_POSIX2_EQUIV_CLASS_MAX 41 +#define _SC_POSIX2_EXPR_NEST_MAX 42 +#define _SC_POSIX2_LINE_MAX 43 +#define _SC_POSIX2_RE_DUP_MAX 44 +#define _SC_PASS_MAX 45 +#define _SC_XOPEN_VERSION 46 +#define _SC_ATEXIT_MAX 47 +#if _XOPEN_SOURCE_EXTENDED==1 +#define _SC_PAGE_SIZE 48 +#endif /* _XOPEN_SOURCE_EXTENDED */ +#define _SC_AES_OS_VERSION 49 +#define _SC_COLL_WEIGHTS_MAX 50 +#define _SC_2_C_BIND 51 +#define _SC_2_C_VERSION 52 +#define _SC_2_UPE 53 +#define _SC_2_CHAR_TERM 54 +#define _SC_XOPEN_SHM 55 +#define _SC_XOPEN_CRYPT 56 +#define _SC_XOPEN_ENH_I18N 57 +#if _XOPEN_SOURCE_EXTENDED==1 +#define _SC_PAGESIZE _SC_PAGE_SIZE +#define _SC_IOV_MAX 58 +#endif /* _XOPEN_SOURCE_EXTENDED */ +#define _SC_THREAD_SAFE_FUNCTIONS 59 +#define _SC_THREADS 60 +#define _SC_THREAD_ATTR_STACKADDR 61 +#define _SC_THREAD_ATTR_STACKSIZE 62 +#define _SC_THREAD_FORKALL 63 +#define _SC_THREAD_PRIORITY_SCHEDULING 64 +#define _SC_THREAD_PRIO_INHERIT 65 +#define _SC_THREAD_PRIO_PROTECT 66 +#define _SC_THREAD_PROCESS_SHARED 67 +#define _SC_THREAD_KEYS_MAX 68 +#define _SC_THREAD_DATAKEYS_MAX _SC_THREAD_KEYS_MAX +#define _SC_THREAD_STACK_MIN 69 +#define _SC_THREAD_THREADS_MAX 70 +#ifdef _ALL_SOURCE +#define _SC_NPROCESSORS_CONF 71 +#define _SC_NPROCESSORS_ONLN 72 +#endif /* _ALL_SOURCE */ +#define _SC_XOPEN_UNIX 73 + +#if (_XOPEN_SOURCE >= 500) + +/* POSIX 1003.1c and XPG UNIX98 */ +/* look to defines above for meanings */ +#define _SC_AIO_LISTIO_MAX 75 +#define _SC_AIO_MAX 76 +#define _SC_AIO_PRIO_DELTA_MAX 77 +#define _SC_ASYNCHRONOUS_IO 78 +#define _SC_DELAYTIMER_MAX 79 +#define _SC_FSYNC 80 +#define _SC_GETGR_R_SIZE_MAX 81 +#define _SC_GETPW_R_SIZE_MAX 82 +#define _SC_LOGIN_NAME_MAX 83 +#define _SC_MAPPED_FILES 84 +#define _SC_MEMLOCK 85 +#define _SC_MEMLOCK_RANGE 86 +#define _SC_MEMORY_PROTECTION 87 +#define _SC_MESSAGE_PASSING 88 +#define _SC_MQ_OPEN_MAX 89 +#define _SC_MQ_PRIO_MAX 90 +#define _SC_PRIORITIZED_IO 91 +#define _SC_PRIORITY_SCHEDULING 92 +#define _SC_REALTIME_SIGNALS 93 +#define _SC_RTSIG_MAX 94 +#define _SC_SEMAPHORES 95 +#define _SC_SEM_NSEMS_MAX 96 +#define _SC_SEM_VALUE_MAX 97 +#define _SC_SHARED_MEMORY_OBJECTS 98 +#define _SC_SIGQUEUE_MAX 99 +#define _SC_SYNCHRONIZED_IO 100 +#define _SC_THREAD_DESTRUCTOR_ITERATIONS 101 +#define _SC_TIMERS 102 +#define _SC_TIMER_MAX 103 +#define _SC_TTY_NAME_MAX 104 +#define _SC_XBS5_ILP32_OFF32 105 +#define _SC_XBS5_ILP32_OFFBIG 106 +#define _SC_XBS5_LP64_OFF64 107 +#define _SC_XBS5_LPBIG_OFFBIG 108 +#define _SC_XOPEN_XCU_VERSION 109 +#define _SC_XOPEN_REALTIME 110 +#define _SC_XOPEN_REALTIME_THREADS 111 +#define _SC_XOPEN_LEGACY 112 +#endif /* _XOPEN_SOURCE >= 500 */ + +#ifdef _ALL_SOURCE +#define _SC_REENTRANT_FUNCTIONS _SC_THREAD_SAFE_FUNCTIONS +#define _SC_PHYS_PAGES 113 +#define _SC_AVPHYS_PAGES 114 +#define _SC_LPAR_ENABLED 115 +#define _SC_LARGE_PAGESIZE 116 +#endif /* _ALL_SOURCE */ + +#define _SC_AIX_KERNEL_BITMODE 117 +#define _SC_AIX_REALMEM 118 +#define _SC_AIX_HARDWARE_BITMODE 119 +#define _SC_AIX_MP_CAPABLE 120 + +#define _SC_V6_ILP32_OFF32 121 +#define _SC_V6_ILP32_OFFBIG 122 +#define _SC_V6_LP64_OFF64 123 +#define _SC_V6_LPBIG_OFFBIG 124 + +#define _SC_XOPEN_STREAMS 125 + +#if (_POSIX_C_SOURCE >= 200112L) +#define _SC_HOST_NAME_MAX 126 +#define _SC_REGEXP 127 +#define _SC_SHELL 128 +#define _SC_SYMLOOP_MAX 129 +#define _SC_ADVISORY_INFO 130 +#define _SC_FILE_LOCKING 131 +#define _SC_2_PBS 132 +#define _SC_2_PBS_ACCOUNTING 133 +#define _SC_2_PBS_CHECKPOINT 134 +#define _SC_2_PBS_LOCATE 135 +#define _SC_2_PBS_MESSAGE 136 +#define _SC_2_PBS_TRACK 137 +#define _SC_BARRIERS 138 +#define _SC_CLOCK_SELECTION 139 +#define _SC_CPUTIME 140 +#define _SC_MONOTONIC_CLOCK 141 +#define _SC_READER_WRITER_LOCKS 142 +#define _SC_SPAWN 143 +#define _SC_SPIN_LOCKS 144 +#define _SC_SPORADIC_SERVER 145 +#define _SC_THREAD_CPUTIME 146 +#define _SC_THREAD_SPORADIC_SERVER 147 +#define _SC_TIMEOUTS 148 +#define _SC_TRACE 149 +#define _SC_TRACE_EVENT_FILTER 150 +#define _SC_TRACE_INHERIT 151 +#define _SC_TRACE_LOG 152 +#define _SC_TYPED_MEMORY_OBJECTS 153 +#define _SC_IPV6 154 +#define _SC_RAW_SOCKETS 155 +#define _SC_SS_REPL_MAX 156 +#define _SC_TRACE_EVENT_NAME_MAX 157 +#define _SC_TRACE_NAME_MAX 158 +#define _SC_TRACE_SYS_MAX 159 +#define _SC_TRACE_USER_EVENT_MAX 160 +#endif /* _POSIX_C_SOURCE >= 200112L */ + +#ifdef _ALL_SOURCE +#define _SC_AIX_UKEYS 161 +#endif /* _ALL_SOURCE */ + +#endif /* _POSIX_SOURCE */ + + +#if _XOPEN_SOURCE_EXTENDED==1 +#ifdef _LARGE_FILES +#define ftruncate ftruncate64 +#define truncate truncate64 +#endif + +#ifndef _H_LOCKF +#include /* lockf definitions for portability */ +#endif + +#ifdef _NO_PROTO +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern int brk(); + extern int getpagesize(); +#ifndef _MSGQSUPPORT + extern int __fd_getdtablesize(); + static int getdtablesize() + { + return __fd_getdtablesize(); + } +#else + extern int getdtablesize(); +#endif /* _MSGQSUPPORT */ + + extern void *sbrk(); +#endif /* _POSIX_C_SOURCE<200112L */ + extern int fchdir(); + extern int fchown(); + extern int ftruncate(); + extern long gethostid(); + extern int gethostname(); + extern pid_t getpgid(); + extern pid_t getsid(); + extern char *getwd(); + extern int lchown(); + extern int readlink(); + extern pid_t setpgrp(); + extern int setregid(); + extern int setreuid(); + extern int symlink(); + extern void sync(); + extern int truncate(); + extern useconds_t ualarm(); + extern int usleep(); + extern pid_t vfork(); +#else /* _NO_PROTO */ +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern int brk(void *); + extern int getpagesize(void); +#ifndef _MSGQSUPPORT + extern int __fd_getdtablesize(void); + static int getdtablesize() + { + return __fd_getdtablesize(); + } +#else + extern int getdtablesize(void); +#endif /* _MSGQSUPPORT */ +#ifdef _LINUX_SOURCE_COMPAT + extern void *sbrk(ptrdiff_t); +#elif (_XOPEN_SOURCE >= 500) || defined(__64BIT__) + extern void *sbrk(intptr_t); +#else + extern void *sbrk(int); +#endif +#endif /* _POSIX_C_SOURCE<200112L */ + extern int fchdir(int); + extern int fchown(int, uid_t, gid_t); + extern int ftruncate(int, off_t); +#ifdef _LARGE_FILE_API + extern int ftruncate64(int, off64_t); +#endif + extern int gethostname(char *, size_t); + extern long gethostid(void); + extern pid_t getpgid(pid_t); + extern pid_t getsid(pid_t); + extern char *getwd(char *); + extern int lchown(const char *, uid_t, gid_t); + +#if (defined(_SUSV3_READLINK) || \ + (!defined(_ALL_SOURCE) && (_POSIX_C_SOURCE >= 200112L))) + /* If SUSV3 readlink specifically requested or if strict SUSv3 + * environment requested */ +#ifdef __64BIT__ +static ssize_t readlink(const char *__restrict__ __path, + char *__restrict__ __buf, size_t __bufsize) +{ + extern ssize_t __readlink64(const char *__restrict__, char *__restrict__, size_t); + return __readlink64(__path, __buf, __bufsize); +} +#else + extern ssize_t readlink(const char *__restrict__, char *__restrict__, size_t); +#endif /* __64BIT__ */ +#else + extern int readlink(const char *, char *, size_t); +#endif /* _SUSV3_READLINK || !_ALL_SOURCE && _POSIX_C_SOURCE >= 200112L */ + +#ifndef _BSD + extern pid_t setpgrp(void); +#endif /* _BSD */ + extern int setregid(gid_t, gid_t); + extern int setreuid(uid_t, uid_t); + extern int symlink(const char *, const char *); + extern void sync(void); + extern int truncate(const char *, off_t); +#ifdef _LARGE_FILE_API + extern int truncate64(const char *, off64_t); +#endif + extern useconds_t ualarm(useconds_t, useconds_t); + extern int usleep(useconds_t); + extern pid_t vfork(void); +#if _XOPEN_SOURCE>=500 + extern int getlogin_r(char *, size_t); + extern int ttyname_r(int, char *, size_t); + +#ifdef _LARGE_FILES +#define pread pread64 +#define pwrite pwrite64 +#endif /* _LARGE_FILES */ + + extern ssize_t pread(int, void *, size_t, off_t); + extern ssize_t pwrite(int, const void *, size_t, off_t); +#ifdef _LARGE_FILE_API + extern ssize_t pread64(int, void *, size_t, off64_t); + extern ssize_t pwrite64(int, const void *, size_t, off64_t); +#endif /* _LARGE_FILE_API */ +#endif /* _XOPEN_SOURCE>=500 */ + +#endif /* _NO_PROTO */ + +#endif /* _XOPEN_SOURCE_EXTENDED */ + +#ifdef _ALL_SOURCE + +extern char **environ; + +#ifndef _KERNEL +#ifdef _NO_PROTO + extern pid_t f_fork(); +#else /* _NO_PROTO */ + extern pid_t f_fork(void); +#endif /* _NO_PROTO */ +#endif /* _KERNEL */ + +#ifdef _NO_PROTO + extern char * cuserid(); + extern int ioctl(); +#ifdef __64BIT__ + extern int ioctlx(); + extern int ioctl32(); + extern int ioctl32x(); +#endif /* __64BIT__ */ + extern int readx(); + extern int setgroups(); + extern int writex(); + extern int setegid(); + extern int seteuid(); + extern int setrgid(); + extern int setruid(); + extern offset_t llseek(); + extern char * getusershell(); + extern void setusershell(); + extern void endusershell(); + extern char * get_current_dir_name(); + extern int sysfs(); +#else + extern char * cuserid(char *); + extern int setegid(gid_t); + extern int seteuid(uid_t); + extern int setrgid(gid_t); + extern int setruid(uid_t); +#ifndef _BSD + extern int ioctl(int, int, ...); +#endif /* _BSD */ +#ifdef __64BIT__ + extern int ioctlx(int, int, void *, long); + extern int ioctl32(int, int, ...); + extern int ioctl32x(int, int, unsigned int, unsigned int); +#endif /* __64BIT__ */ + extern int setgroups(int, gid_t []); +#ifndef _KERNEL + extern int readx(int, char*, unsigned, long); + extern int writex(int, char*, unsigned, long); + +#ifdef _LARGE_FILES +#define fclear fclear64 +#define fsync_range fsync_range64 +#endif + extern off_t fclear(int, off_t); + extern int fsync_range(int, int, off_t, off_t); +#ifdef _LARGE_FILE_API + extern off64_t fclear64(int, off64_t); + extern int fsync_range64(int, int, off64_t, off64_t); +#endif + extern offset_t llseek(int, offset_t, int); + extern char * getusershell(void); + extern void setusershell(void); + extern void endusershell(void); + extern char * get_current_dir_name(void); + extern int sysfs(int, ...); + extern int finfo(const char *, int, void *, int32long64_t); + extern int ffinfo(int, int, void *, int32long64_t); + +#endif /* ndef _KERNEL */ + +#endif /* _NO_PROTO */ + +#define _AES_OS_VERSION 1 /* OSF, AES version */ + +#endif /* _ALL_SOURCE */ + +#ifdef __cplusplus +} +#endif + +#endif /* _H_UNISTD */ diff --git a/workloads/realworld/async/knn/Makefile b/workloads/realworld/async/knn/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..6ebd397ea3d2a2082eb070de41b7f1eb452dbf53 --- /dev/null +++ b/workloads/realworld/async/knn/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := knn +CUFILES := knn_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o knn + diff --git a/workloads/realworld/async/knn/knn_cuda.cu b/workloads/realworld/async/knn/knn_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..005fafe0fdbd970baf8153f5dc3453e84064bfdb --- /dev/null +++ b/workloads/realworld/async/knn/knn_cuda.cu @@ -0,0 +1,619 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +int ref_nb = 4096; // Reference point number, max=65535 +int query_nb = 4096; // Query point number, max=65535 +int dim = 128; // Dimension of points +int k = 20; // Nearest neighbors to consider +int iterations = 100; + +//-----------------------------------------------------------------------------------------------// +// KERNELS // +//-----------------------------------------------------------------------------------------------// +__global__ void extract_with_interpolation(int nthreads, float *data, + float *n_xy_coords, + float *extracted_data, + int n_max_coord, int channels, + int height, int width) { + + int x0, x1, y0, y1, nc; + float wx0, wx1, wy0, wy1; + int n, nd; + float x, y; + + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); + index += blockDim.x * gridDim.x) { + n = (index / n_max_coord); + nd = n * n_max_coord * channels; + x = n_xy_coords[index * 2]; + y = n_xy_coords[index * 2 + 1]; + + x0 = static_cast(floor(x)); + x1 = x0 + 1; + y0 = static_cast(floor(y)); + y1 = y0 + 1; + + x0 = x0 <= 0 ? 0 : (x0 >= (width - 1) ? (width - 1) : x0); + y0 = y0 <= 0 ? 0 : (y0 >= (height - 1) ? (height - 1) : y0); + x1 = x1 <= 0 ? 0 : (x1 >= (width - 1) ? (width - 1) : x1); + y1 = y1 <= 0 ? 0 : (y1 >= (height - 1) ? (height - 1) : y1); + + wx0 = static_cast(x1) - x; + wx1 = x - x0; + wy0 = static_cast(y1) - y; + wy1 = y - y0; + + if (x0 == x1) { + wx0 = 1; + wx1 = 0; + } + if (y0 == y1) { + wy0 = 1; + wy1 = 0; + } + for (int c = 0; c < channels; c++) { + nc = (n * channels + c) * height; + // extracted_data[index * channels + c] = wy0 * wx0 * data[(nc + y0) * + // width + x0] + // extracted_data[nd + index % n_max_coord + n_max_coord * c] = index; + extracted_data[nd + index % n_max_coord + n_max_coord * c] = + wy0 * wx0 * data[(nc + y0) * width + x0] + + wy1 * wx0 * data[(nc + y1) * width + x0] + + wy0 * wx1 * data[(nc + y0) * width + x1] + + wy1 * wx1 * data[(nc + y1) * width + x1]; + } + } +} + +/** + * Computes the distance between two matrix A (reference points) and + * B (query points) containing respectively wA and wB points. + * + * @param A pointer on the matrix A + * @param wA width of the matrix A = number of points in A + * @param B pointer on the matrix B + * @param wB width of the matrix B = number of points in B + * @param dim dimension of points = height of matrices A and B + * @param AB pointer on the matrix containing the wA*wB distances computed + */ +__global__ void cuComputeDistanceGlobal(float *A, int wA, float *B, int wB, + int dim, float *AB) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + // Declaration of the shared memory arrays As and Bs used to store the + // sub-matrix of A and B + __shared__ float shared_A[PREFETCH_COUNT][BLOCK_DIM][BLOCK_DIM]; + __shared__ float shared_B[PREFETCH_COUNT][BLOCK_DIM][BLOCK_DIM]; + + // Sub-matrix of A (begin, step, end) and Sub-matrix of B (begin, step) + __shared__ int begin_A; + __shared__ int begin_B; + __shared__ int step_A; + __shared__ int step_B; + __shared__ int end_A; + + // Thread index + int tx = threadIdx.x; + int ty = threadIdx.y; + + // Other variables + float tmp; + float ssd = 0; + + // Loop parameters + begin_A = BLOCK_DIM * blockIdx.y; + begin_B = BLOCK_DIM * blockIdx.x; + step_A = BLOCK_DIM * wA; + step_B = BLOCK_DIM * wB; + end_A = begin_A + (dim - 1) * wA; + + // if (blockIdx.x == 0 && blockIdx.y == 0 && tx == 0 && ty == 0) + // printf("begin_A is %d, end_A is %d, step_A is %d, begin_B is %d, step_B is %d\n", begin_A, end_A, step_A, begin_B, step_B); + + // Conditions + int cond0 = (begin_A + tx < wA); // used to write in shared memory + int cond1 = (begin_B + tx < wB); // used to write in shared memory & to + // computations and to write in output matrix + int cond2 = + (begin_A + ty < wA); // used to computations and to write in output matrix + // Loop over all the sub-matrices of A and B required to compute the block + // sub-matrix + + // int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int fetch = 0; + int end_tile = fetch + (end_A - begin_A + 1) / step_A; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + int a = begin_A + step_A * fetch; + int b = begin_B + step_B * fetch; + if (a / wA + ty < dim) + { + // shared_A[fetch % PREFETCH_COUNT][ty][tx] = (cond0) ? A[a + wA * ty + tx] : 0; + // shared_B[fetch % PREFETCH_COUNT][ty][tx] = (cond1) ? B[b + wB * ty + tx] : 0; + if (cond0) + memcpy_async(shared_A[fetch % PREFETCH_COUNT][ty][tx], A[a + wA * ty + tx], pipe); + else + shared_A[fetch % PREFETCH_COUNT][ty][tx] = 0; + if (cond1) + memcpy_async(shared_B[fetch % PREFETCH_COUNT][ty][tx], B[b + wB * ty + tx], pipe); + else + shared_B[fetch % PREFETCH_COUNT][ty][tx] = 0; + } + else + { + shared_A[fetch % PREFETCH_COUNT][ty][tx] = 0; + shared_B[fetch % PREFETCH_COUNT][ty][tx] = 0; + } + // block.sync(); + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + if (cond2 && cond1) + { + for (int k = 0; k < BLOCK_DIM; ++k) + { + tmp = shared_A[compute % PREFETCH_COUNT][k][ty] - shared_B[compute % PREFETCH_COUNT][k][tx]; + ssd += tmp * tmp; + } + } + + // Synchronize to make sure that the preceding computation is done before + // loading two new sub-matrices of A and B in the next iteration + block.sync(); + } + + // Write the block sub-matrix to device memory; each thread writes one element + if (cond2 && cond1) + { + AB[(begin_A + ty) * wB + begin_B + tx] = ssd; + } +} + +/** + * Gathers k-th smallest distances for each column of the distance matrix in + * the top. + * + * @param dist distance matrix + * @param ind index matrix + * @param width width of the distance matrix and of the index matrix + * @param height height of the distance matrix and of the index matrix + * @param k number of neighbors to consider + */ +__global__ void cuInsertionSort(float *dist, int *ind, int width, int height, + int k) { + // printf("test2\n"); + // Variables + int l, i, j; + float *p_dist; + int *p_ind; + float curr_dist, max_dist; + int curr_row, max_row; + unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x; + + if (xIndex < width) { + // Pointer shift, initialization, and max value + p_dist = dist + xIndex; + p_ind = ind + xIndex; + max_dist = p_dist[0]; + p_ind[0] = 0; + + // Part 1 : sort kth firt elementZ + for (l = 1; l < k; l++) { + curr_row = l * width; + curr_dist = p_dist[curr_row]; + if (curr_dist < max_dist) { + i = l - 1; + for (int a = 0; a < l - 1; a++) { + if (p_dist[a * width] > curr_dist) { + i = a; + break; + } + } + for (j = l; j > i; j--) { + p_dist[j * width] = p_dist[(j - 1) * width]; + p_ind[j * width] = p_ind[(j - 1) * width]; + } + p_dist[i * width] = curr_dist; + p_ind[i * width] = l; + } else { + p_ind[l * width] = l; + } + max_dist = p_dist[curr_row]; + } + + // Part 2 : insert element in the k-th first lines + max_row = (k - 1) * width; + for (l = k; l < height; l++) { + curr_dist = p_dist[l * width]; + if (curr_dist < max_dist) { + i = k - 1; + for (int a = 0; a < k - 1; a++) { + if (p_dist[a * width] > curr_dist) { + i = a; + break; + } + } + for (j = k - 1; j > i; j--) { + p_dist[j * width] = p_dist[(j - 1) * width]; + p_ind[j * width] = p_ind[(j - 1) * width]; + } + p_dist[i * width] = curr_dist; + p_ind[i * width] = l; + max_dist = p_dist[max_row]; + } + } + } +} + +/** + * Computes the square root of the first line (width-th first element) + * of the distance matrix. + * + * @param dist distance matrix + * @param width width of the distance matrix + * @param k number of neighbors to consider + */ +__global__ void cuParallelSqrt(float *dist, int width, int k) { + unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int yIndex = blockIdx.y * blockDim.y + threadIdx.y; + // printf("test3\n"); + if (xIndex < width && yIndex < k) + dist[yIndex * width + xIndex] = sqrt(dist[yIndex * width + xIndex]); +} + +//-----------------------------------------------------------------------------------------------// +// K-th NEAREST NEIGHBORS // +//-----------------------------------------------------------------------------------------------// + +/** + * Prints the error message return during the memory allocation. + * + * @param error error value return by the memory allocation function + * @param memorySize size of memory tried to be allocated + */ +void printErrorMessage(cudaError_t error, int memorySize) { + printf("==================================================\n"); + printf("MEMORY ALLOCATION ERROR : %s\n", cudaGetErrorString(error)); + printf("Whished allocated memory : %d\n", memorySize); + printf("==================================================\n"); +} + +/** + * K nearest neighbor algorithm + * - Initialize CUDA + * - Allocate device memory + * - Copy point sets (reference and query points) from host to device memory + * - Compute the distances + indexes to the k nearest neighbors for each query + * point + * - Copy distances from device to host memory + * + * @param ref_host reference points ; pointer to linear matrix + * @param ref_width number of reference points ; width of the matrix + * @param query_host query points ; pointer to linear matrix + * @param query_width number of query points ; width of the matrix + * @param height dimension of points ; height of the matrices + * @param k number of neighbor to consider + * @param dist_host distances to k nearest neighbors ; pointer to linear + * matrix + * @param dist_host indexes of the k nearest neighbors ; pointer to linear + * matrix + * + */ +// void knn_cuda(float *ref_host, int ref_width, float *query_host, +// int query_width, int height, int k, float *dist_host, +// int *ind_host) { +void knn_cuda(float *ref_device, int ref_width, float *query_device, + int query_width, int height, int k, float *dist_device, + int *ind_device) { + // Grids ans threads + dim3 g_16x16(query_width / 16, ref_width / 16, 1); + dim3 t_16x16(16, 16, 1); + if (query_width % 16 != 0) + g_16x16.x += 1; + if (ref_width % 16 != 0) + g_16x16.y += 1; + // + dim3 g_256x1(query_width / 256, 1, 1); + dim3 t_256x1(256, 1, 1); + if (query_width % 256 != 0) + g_256x1.x += 1; + + dim3 g_k_16x16(query_width / 16, k / 16, 1); + dim3 t_k_16x16(16, 16, 1); + if (query_width % 16 != 0) + g_k_16x16.x += 1; + if (k % 16 != 0) + g_k_16x16.y += 1; + + // printf("ref_width is %d, query_width is %d, height is %d\n", ref_width, query_width, height); + + // Kernel 1: Compute all the distances + cuComputeDistanceGlobal<<>>(ref_device, ref_width, query_device, + query_width, height, dist_device); + // Kernel 2: Sort each column + cuInsertionSort<<>>(dist_device, ind_device, query_width, + ref_width, k); + // Kernel 3: Compute square root of k first elements + cuParallelSqrt<<>>(dist_device, query_width, k); + cudaDeviceSynchronize(); +} + +float compute_distance(const float *ref, int ref_nb, const float *query, + int query_nb, int dim, int ref_index, int query_index) { + float sum = 0.f; + for (int d = 0; d < dim; ++d) { + const float diff = + ref[d * ref_nb + ref_index] - query[d * query_nb + query_index]; + sum += diff * diff; + } + return sqrtf(sum); +} + +void modified_insertion_sort(float *dist, int *index, int length, int k) { + + // Initialise the first index + index[0] = 0; + + // Go through all points + for (int i = 1; i < length; ++i) { + + // Store current distance and associated index + float curr_dist = dist[i]; + int curr_index = i; + + // Skip the current value if its index is >= k and if it's higher the k-th + // slready sorted mallest value + if (i >= k && curr_dist >= dist[k - 1]) { + continue; + } + + // Shift values (and indexes) higher that the current distance to the right + int j = min(i, k - 1); + while (j > 0 && dist[j - 1] > curr_dist) { + dist[j] = dist[j - 1]; + index[j] = index[j - 1]; + --j; + } + + // Write the current distance and index at their position + dist[j] = curr_dist; + index[j] = curr_index; + } +} + +bool knn_c(const float *ref, int ref_nb, const float *query, int query_nb, + int dim, int k, float *knn_dist, int *knn_index) { + // Allocate local array to store all the distances / indexes for a given query + // point + float *dist = (float *)malloc(ref_nb * sizeof(float)); + int *index = (int *)malloc(ref_nb * sizeof(int)); + + // Allocation checks + if (!dist || !index) { + printf("Memory allocation error\n"); + free(dist); + free(index); + return false; + } + + // Process one query point at the time + for (int i = 0; i < query_nb; ++i) { + + // Compute all distances / indexes + for (int j = 0; j < ref_nb; ++j) { + dist[j] = compute_distance(ref, ref_nb, query, query_nb, dim, j, i); + index[j] = j; + } + + // Sort distances / indexes + modified_insertion_sort(dist, index, ref_nb, k); + + // Copy k smallest distances and their associated index + for (int j = 0; j < k; ++j) { + knn_dist[j * query_nb + i] = dist[j]; + knn_index[j * query_nb + i] = index[j]; + } + } + + // Memory clean-up + free(dist); + free(index); + return true; +} + +/** + * Example of use of kNN search CUDA. + */ +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + // Variables and parameters + float *ref; // Pointer to reference point array + float *query; // Pointer to query point array + float *dist, *dist_c; // Pointer to distance array + int *ind, *ind_c; // Pointer to index array + + + if (argc >= 4) { + ref_nb = atoi(argv[1]); + query_nb = atoi(argv[2]); + dim = atoi(argv[3]); + } + + int c_iterations = 10; + int i; + const float precision = 0.001f; // distance error max + int nb_correct_precisions = 0; + int nb_correct_indexes = 0; + float *knn_dist = (float *)malloc(query_nb * k * sizeof(float)); + int *knn_index = (int *)malloc(query_nb * k * sizeof(int)); + + // Memory allocation + ref = (float *)malloc(ref_nb * dim * sizeof(float)); + query = (float *)malloc(query_nb * dim * sizeof(float)); + dist = (float *)malloc(query_nb * ref_nb * sizeof(float)); + ind = (int *)malloc(query_nb * k * sizeof(int)); + // cudaMallocManaged(&ref, ref_nb * dim * sizeof(float)); + // cudaMallocManaged(&query, query_nb * dim * sizeof(float)); + // cudaMallocManaged(&dist, query_nb * ref_nb * sizeof(float)); + // cudaMallocManaged(&ind, query_nb * k * sizeof(int)); + dist_c = (float *)malloc(query_nb * k * sizeof(float)); + ind_c = (int *)malloc(query_nb * k * sizeof(float)); + + // Init + srand(time(NULL)); + for (i = 0; i < ref_nb * dim; i++) + ref[i] = (float)rand() / (float)RAND_MAX; + for (i = 0; i < query_nb * dim; i++) + query[i] = (float)rand() / (float)RAND_MAX; + + // printf("Ground truth computation in progress...\n\n"); + // if (!knn_c(ref, ref_nb, query, query_nb, dim, k, knn_dist, knn_index)) { + // free(knn_dist); + // free(knn_index); + // return EXIT_FAILURE; + // } + + // Variables for duration evaluation + float elapsed_time; + + // Display informations + printf("Number of reference points : %6d\n", ref_nb); + printf("Number of query points : %6d\n", query_nb); + printf("Dimension of points : %4d\n", dim); + printf("Number of neighbors to consider : %4d\n", k); + printf("Processing kNN search :\n"); + + float precision_accuracy = 0.0f; + float index_accuracy = 0.0f; + /* + printf("On CPU: \n"); + struct timeval tic; + gettimeofday(&tic, NULL); + for (i = 0; i < c_iterations; i++) { + knn_c(ref, ref_nb, query, query_nb, dim, k, dist_c, ind_c); + } + + for (int i = 0; i < query_nb * k; ++i) { + if (fabs(dist_c[i] - knn_dist[i]) <= precision) { + nb_correct_precisions++; + } + if (ind_c[i] == knn_index[i]) { + nb_correct_indexes++; + } + } + + struct timeval toc; + gettimeofday(&toc, NULL); + elapsed_time = toc.tv_sec - tic.tv_sec; + elapsed_time += (toc.tv_usec - tic.tv_usec) / 1000000.; + precision_accuracy = nb_correct_precisions / ((float)query_nb * k); + index_accuracy = nb_correct_indexes / ((float)query_nb * k); + printf("%f, %f\n", precision_accuracy, index_accuracy); + printf(" done in %f s for %d iterations (%f s by iteration)\n", elapsed_time, + c_iterations, elapsed_time / (c_iterations)); + */ + printf("on GPU: \n"); + + GPU_argv_init(); + + // Call kNN search CUDA + initTrace(); + startCPU(); + + float *ref_device; + float *query_device; + float *dist_device; + int *ind_device; + + cudaMalloc(&ref_device, ref_nb * dim * sizeof(float)); + cudaMalloc(&query_device, query_nb * dim * sizeof(float)); + cudaMalloc(&dist_device, query_nb * ref_nb * sizeof(float)); + cudaMalloc(&ind_device, query_nb * k * sizeof(int)); + + cudaMemcpy(ref_device, ref, ref_nb * dim * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(query_device, query, query_nb * dim * sizeof(float), cudaMemcpyHostToDevice); + + for (i = 0; i < iterations; i++) { + // knn_cuda(ref, ref_nb, query, query_nb, dim, k, dist, ind); + knn_cuda(ref_device, ref_nb, query_device, query_nb, dim, k, dist_device, ind_device); + } + + cudaMemcpy(dist, dist_device, query_nb * ref_nb * sizeof(float), cudaMemcpyDeviceToHost); + cudaMemcpy(ind, ind_device, query_nb * k * sizeof(int), cudaMemcpyDeviceToHost); + + cudaFree(ind_device); + cudaFree(dist_device); + cudaFree(query_device); + cudaFree(ref_device); + + endCPU(); + finiTrace(); + + nb_correct_precisions = 0; + nb_correct_indexes = 0; + for (int i = 0; i < query_nb * k; ++i) { + if (fabs(dist[i] - knn_dist[i]) <= precision) { + nb_correct_precisions++; + } + if (ind[i] == knn_index[i]) { + nb_correct_indexes++; + } + } + + precision_accuracy = nb_correct_precisions / ((float)query_nb * k); + index_accuracy = nb_correct_indexes / ((float)query_nb * k); + printf("%f, %f\n", precision_accuracy, index_accuracy); + + // Destroy cuda event object and free memory + // cudaFree(ind); + // cudaFree(dist); + // cudaFree(query); + // cudaFree(ref); + free(ind); + free(dist); + free(query); + free(ref); + free(dist_c); + free(ind_c); +} \ No newline at end of file diff --git a/workloads/realworld/async/knn/run.sh b/workloads/realworld/async/knn/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..541db1387ce3ebe87b1338f079609b8b4a2736c6 --- /dev/null +++ b/workloads/realworld/async/knn/run.sh @@ -0,0 +1 @@ +./knn 4096 4096 128 \ No newline at end of file diff --git a/workloads/realworld/async/knn/run_super.sh b/workloads/realworld/async/knn/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..86ad9321b470072e5e84e706e1619ee200cf2b31 --- /dev/null +++ b/workloads/realworld/async/knn/run_super.sh @@ -0,0 +1 @@ +./knn 32768 32768 128 \ No newline at end of file diff --git a/workloads/realworld/async/lavaMD/README b/workloads/realworld/async/lavaMD/README new file mode 100755 index 0000000000000000000000000000000000000000..27b526ff669e9632b11193634307bfe778a2dfff --- /dev/null +++ b/workloads/realworld/async/lavaMD/README @@ -0,0 +1,50 @@ +//======================================================================================================================================================150 +// DESCRIPTION +//======================================================================================================================================================150 + +This is the CUDA version of the code. + +The code calculates particle potential and relocation due to mutual forces between particles within a large 3D space. This space is +divided into cubes, or large boxes, that are allocated to individual cluster nodes. The large box at each node is further divided into +cubes, called boxes. 26 neighbor boxes surround each box (the home box). Home boxes at the boundaries of the particle space have fewer neighbors. +Particles only interact with those other particles that are within a cutoff radius since ones at larger distances exert negligible forces. Thus the +box size s chosen so that cutoff radius does not span beyond any neighbor box for any particle in a home box, thus limiting the reference space to +a finite number of boxes. + +This code [1] was derived from the ddcMD application [2] by rewriting the front end and structuring it for parallelization. This code represents MPI +task that runs on a single cluster node. While the details of the code are somewhat different than the original, the code retains the structure of the +MPI task in the original code. Since the rest of MPI code is not included here, the application first emulates MPI partitioning of the particle space +into boxes. Then, for every particle in the home box, the nested loop processes interactions first with other particles in the home box and then with +particles in all neighbor boxes. The processing of each particle consists of a single stage of calculation that is enclosed in the innermost loop. The +nested loops in the application were parallelized in such a way that at any point of time GPU warp/wavefront accesses adjacent memory locations. The +speedup depends on the number of boxes, particles (fixed) and the actualcal culation for each particle (fixed). The application is memory bound, and +GPU speedup seems to saturate at about 16x when compared to single-core CPU. + +More information about the parallel version of this code can be found in: +[1] L. G. Szafaryn, T. Gamblin, B. deSupinski and K. Skadron. "Experiences with Achieving Portability across Heterogeneous Architectures." Submitted to +WOLFHPC workshop at 25th International Conference on Supercomputing (ICS). Tucson, AZ. 2010. +More about the original ddcMD application can be found in: +[2] F. H. Streitz, J. N. Glosli, M. V. Patel, B. Chan, R. K. Yates, B. R. de Supinski, J. Sexton, J and A. Gunnels. "100+ TFlop Solidification Simulations +on BlueGene/L." In Proceedings of the 2005 Supercomputing Conference (SC 05). Seattle, WA. 2005. + +//======================================================================================================================================================150 +// USE +//======================================================================================================================================================150 + +The code takes the followint parameters: +-boxes1d (number of boxes in one dimension, the total number of boxes will be that^3) + +The code can be run as follows: +./lavaMD -boxes1d 10 + +******Adjustable work group size***** +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=128" + +######OUTPUT FOR VALIDATION######## +USAGE: +make clean +make OUTPUT=Y \ No newline at end of file diff --git a/workloads/realworld/async/lavaMD/kernel/kernel_gpu_cuda.cu b/workloads/realworld/async/lavaMD/kernel/kernel_gpu_cuda.cu new file mode 100755 index 0000000000000000000000000000000000000000..c931ae7f7eb429c844ac7722059faeedae8c9fbe --- /dev/null +++ b/workloads/realworld/async/lavaMD/kernel/kernel_gpu_cuda.cu @@ -0,0 +1,210 @@ +//----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------200 +// plasmaKernel_gpu_2 +//----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------200 + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +__global__ void kernel_gpu_cuda(par_str d_par_gpu, + dim_str d_dim_gpu, + box_str *d_box_gpu, + FOUR_VECTOR *d_rv_gpu, + fp *d_qv_gpu, + FOUR_VECTOR *d_fv_gpu, + int boxes_per_block) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + // THREAD PARAMETERS + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + int bx = blockIdx.x; // get current horizontal block index (0-n) + int tx = threadIdx.x; // get current horizontal thread index (0-n) + int wtx = tx; + + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // Extract input parameters + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + // parameters + fp a2 = 2.0 * d_par_gpu.alpha * d_par_gpu.alpha; + + // home box + int first_i; + FOUR_VECTOR *rA; + FOUR_VECTOR *fA; + __shared__ FOUR_VECTOR rA_shared[100]; + + // nei box + int pointer; + int k = 0; + int first_j; + FOUR_VECTOR *rB; + fp *qB; + int j = 0; + __shared__ FOUR_VECTOR rB_shared[NUMBER_PAR_PER_BOX * PREFETCH_COUNT]; + __shared__ double qB_shared[NUMBER_PAR_PER_BOX * PREFETCH_COUNT]; + + // common + fp r2; + fp u2; + fp vij; + fp fs; + fp fxij; + fp fyij; + fp fzij; + THREE_VECTOR d; + + int box = bx * boxes_per_block; + int end_box = box + boxes_per_block; + + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + // DO FOR THE NUMBER OF BOXES + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + for (; box < end_box; box++) + { + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // Home box + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // home box - box parameters + first_i = d_box_gpu[box].offset; + + // home box - distance, force, charge and type parameters + rA = &d_rv_gpu[first_i]; + fA = &d_fv_gpu[first_i]; + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Copy to shared memory + //----------------------------------------------------------------------------------------------------------------------------------140 + + // home box - shared memory + while (wtx < NUMBER_PAR_PER_BOX) + { + rA_shared[wtx] = rA[wtx]; + wtx = wtx + NUMBER_THREADS; + } + wtx = tx; + + // synchronize threads - not needed, but just to be safe + block.sync(); + + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // nei box loop + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + // if (wtx == 0) + // printf("d_box_gpu[%d].nn is %d\n", bx, d_box_gpu[bx].nn); + + int fetch = 0; + int end_tile = 1 + d_box_gpu[box].nn; + + // loop over neiing boxes of home box + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + //----------------------------------------50 + // nei box - get pointer to the right box + //----------------------------------------50 + + if (fetch == 0) + { + pointer = box; // set first box to be processed to home box + } + else + { + pointer = d_box_gpu[box].nei[fetch - 1].number; // remaining boxes are nei boxes + } + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // nei box - box parameters + first_j = d_box_gpu[pointer].offset; + + // nei box - distance, (force), charge and (type) parameters + rB = &d_rv_gpu[first_j]; + qB = &d_qv_gpu[first_j]; + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // nei box - shared memory + while (wtx < NUMBER_PAR_PER_BOX) + { + memcpy_async(rB_shared[(fetch % PREFETCH_COUNT) * NUMBER_PAR_PER_BOX + wtx], rB[wtx], pipe); + memcpy_async(qB_shared[(fetch % PREFETCH_COUNT) * NUMBER_PAR_PER_BOX + wtx], qB[wtx], pipe); + wtx = wtx + NUMBER_THREADS; + } + wtx = tx; + + // synchronize threads because in next section each thread accesses data brought in by different threads here + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Calculation + //----------------------------------------------------------------------------------------------------------------------------------140 + + // loop for the number of particles in the home box + // for (int i=0; i +#include "../../../../common/cupti_add.h" +#include "../../../../common/cpu_timestamps.h" + +void +kernel_gpu_cuda_wrapper(par_str par_cpu, + dim_str dim_cpu, + box_str* box_cpu, + FOUR_VECTOR* rv_cpu, + fp* qv_cpu, + FOUR_VECTOR* fv_cpu, + int nblocks) +{ + + //======================================================================================================================================================150 + // CPU VARIABLES + //======================================================================================================================================================150 + + // timer + long long time0; + long long time1; + long long time2; + long long time3; + long long time4; + long long time5; + long long time6; + + time0 = get_time(); + + //======================================================================================================================================================150 + // GPU SETUP + //======================================================================================================================================================150 + + //====================================================================================================100 + // INITIAL DRIVER OVERHEAD + //====================================================================================================100 + + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaThreadSynchronize(); + + //====================================================================================================100 + // VARIABLES + //====================================================================================================100 + + box_str* d_box_gpu; + FOUR_VECTOR* d_rv_gpu; + fp* d_qv_gpu; + FOUR_VECTOR* d_fv_gpu; + + dim3 threads; + dim3 blocks; + + //====================================================================================================100 + // EXECUTION PARAMETERS + //====================================================================================================100 + + // blocks.x = dim_cpu.number_boxes; + blocks.x = nblocks * nblocks * nblocks; + blocks.y = 1; + threads.x = NUMBER_THREADS; // define the number of threads in the block + threads.y = 1; + + int boxes_per_block = 1; + if (dim_cpu.number_boxes >= blocks.x) + { + boxes_per_block = (dim_cpu.number_boxes + blocks.x - 1) / blocks.x; + } + + time1 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY (MALLOC) + //======================================================================================================================================================150 + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY IN + //====================================================================================================100 + + //==================================================50 + // boxes + //==================================================50 + + cudaMalloc( (void **)&d_box_gpu, + dim_cpu.box_mem); + + //==================================================50 + // rv + //==================================================50 + + cudaMalloc( (void **)&d_rv_gpu, + dim_cpu.space_mem); + + //==================================================50 + // qv + //==================================================50 + + cudaMalloc( (void **)&d_qv_gpu, + dim_cpu.space_mem2); + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY + //====================================================================================================100 + + //==================================================50 + // fv + //==================================================50 + + cudaMalloc( (void **)&d_fv_gpu, + dim_cpu.space_mem); + + time2 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY COPY + //======================================================================================================================================================150 + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY IN + //====================================================================================================100 + + //==================================================50 + // boxes + //==================================================50 + + cudaMemcpy( d_box_gpu, + box_cpu, + dim_cpu.box_mem, + cudaMemcpyHostToDevice); + + //==================================================50 + // rv + //==================================================50 + + cudaMemcpy( d_rv_gpu, + rv_cpu, + dim_cpu.space_mem, + cudaMemcpyHostToDevice); + + //==================================================50 + // qv + //==================================================50 + + cudaMemcpy( d_qv_gpu, + qv_cpu, + dim_cpu.space_mem2, + cudaMemcpyHostToDevice); + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY + //====================================================================================================100 + + //==================================================50 + // fv + //==================================================50 + + cudaMemcpy( d_fv_gpu, + fv_cpu, + dim_cpu.space_mem, + cudaMemcpyHostToDevice); + + time3 = get_time(); + + //======================================================================================================================================================150 + // KERNEL + //======================================================================================================================================================150 + // launch kernel - all boxes + kernel_gpu_cuda<<>>(par_cpu, + dim_cpu, + d_box_gpu, + d_rv_gpu, + d_qv_gpu, + d_fv_gpu, + boxes_per_block); + + checkCUDAError("Start"); + cudaDeviceSynchronize(); + + + time4 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY COPY (CONTD.) + //======================================================================================================================================================150 + + cudaMemcpy( fv_cpu, + d_fv_gpu, + dim_cpu.space_mem, + cudaMemcpyDeviceToHost); + + time5 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY DEALLOCATION + //======================================================================================================================================================150 + + cudaFree(d_rv_gpu); + cudaFree(d_qv_gpu); + cudaFree(d_fv_gpu); + cudaFree(d_box_gpu); + + endCPU(); + finiTrace(); + + time6 = get_time(); + + //======================================================================================================================================================150 + // DISPLAY TIMING + //======================================================================================================================================================150 + + printf("Time spent in different stages of GPU_CUDA KERNEL:\n"); + + printf("%15.12f s, %15.12f % : GPU: SET DEVICE / DRIVER INIT\n", (float) (time1-time0) / 1000000, (float) (time1-time0) / (float) (time6-time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: ALO\n", (float) (time2-time1) / 1000000, (float) (time2-time1) / (float) (time6-time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: COPY IN\n", (float) (time3-time2) / 1000000, (float) (time3-time2) / (float) (time6-time0) * 100); + + printf("%15.12f s, %15.12f % : GPU: KERNEL\n", (float) (time4-time3) / 1000000, (float) (time4-time3) / (float) (time6-time0) * 100); + + printf("%15.12f s, %15.12f % : GPU MEM: COPY OUT\n", (float) (time5-time4) / 1000000, (float) (time5-time4) / (float) (time6-time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: FRE\n", (float) (time6-time5) / 1000000, (float) (time6-time5) / (float) (time6-time0) * 100); + + printf("Total time:\n"); + printf("%.12f s\n", (float) (time6-time0) / 1000000); + +} diff --git a/workloads/realworld/async/lavaMD/kernel/kernel_gpu_cuda_wrapper.h b/workloads/realworld/async/lavaMD/kernel/kernel_gpu_cuda_wrapper.h new file mode 100755 index 0000000000000000000000000000000000000000..cf499f1480469569c649eccf174cc8ba0655ddbd --- /dev/null +++ b/workloads/realworld/async/lavaMD/kernel/kernel_gpu_cuda_wrapper.h @@ -0,0 +1,19 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//========================================================================================================================================================================================================200 +// KERNEL_GPU_CUDA_WRAPPER HEADER +//========================================================================================================================================================================================================200 + +void kernel_gpu_cuda_wrapper( par_str parms_cpu, + dim_str dim_cpu, + box_str* box_cpu, + FOUR_VECTOR* rv_cpu, + fp* qv_cpu, + FOUR_VECTOR* fv_cpu, + int nblocks); + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/async/lavaMD/main.c b/workloads/realworld/async/lavaMD/main.c new file mode 100755 index 0000000000000000000000000000000000000000..a7c88472e3939414bbdc314e2bfb1c46bc345bea --- /dev/null +++ b/workloads/realworld/async/lavaMD/main.c @@ -0,0 +1,318 @@ +//========================================================================================================================================================================================================200 +//======================================================================================================================================================150 +//====================================================================================================100 +//==================================================50 + +//========================================================================================================================================================================================================200 +// UPDATE +//========================================================================================================================================================================================================200 + +// 14 APR 2011 Lukasz G. Szafaryn + +//========================================================================================================================================================================================================200 +// DEFINE/INCLUDE +//========================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// LIBRARIES +//======================================================================================================================================================150 + +#include // (in path known to compiler) needed by printf +#include // (in path known to compiler) needed by malloc +#include // (in path known to compiler) needed by true/false + +//======================================================================================================================================================150 +// UTILITIES +//======================================================================================================================================================150 + +#include "./util/timer/timer.h" // (in path specified here) +#include "./util/num/num.h" // (in path specified here) + +//======================================================================================================================================================150 +// MAIN FUNCTION HEADER +//======================================================================================================================================================150 + +#include "./main.h" // (in the current directory) + +//======================================================================================================================================================150 +// KERNEL +//======================================================================================================================================================150 + +#include "./kernel/kernel_gpu_cuda_wrapper.h" // (in library path specified here) + +//========================================================================================================================================================================================================200 +// MAIN FUNCTION +//========================================================================================================================================================================================================200 +#define _POSIX_C_SOURCE 200809L +#include +#include +#include +extern inline __attribute__((always_inline)) unsigned long rdtsc() +{ + unsigned long a, d; + + __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); + + return (a | (d << 32)); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsp() { + struct timespec tms; + if (clock_gettime(CLOCK_REALTIME, &tms)) { + return -1; + } + unsigned long ns = tms.tv_sec * 1000000000; + ns += tms.tv_nsec; + return ns; +} + + +int +main( int argc, + char *argv []) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + + printf("thread block size of kernel = %d \n", NUMBER_THREADS); + //======================================================================================================================================================150 + // CPU/MCPU VARIABLES + //======================================================================================================================================================150 + + // timer + long long time0; + + time0 = get_time(); + + // timer + long long time1; + long long time2; + long long time3; + long long time4; + long long time5; + long long time6; + long long time7; + + // counters + int i, j, k, l, m, n; + + // system memory + par_str par_cpu; + dim_str dim_cpu; + box_str* box_cpu; + FOUR_VECTOR* rv_cpu; + fp* qv_cpu; + FOUR_VECTOR* fv_cpu; + int nh; + + time1 = get_time(); + + //======================================================================================================================================================150 + // CHECK INPUT ARGUMENTS + //======================================================================================================================================================150 + + // assing default values + dim_cpu.boxes1d_arg = 1; + + // go through arguments + dim_cpu.boxes1d_arg = atoi(argv[1]); + int nblocks = atoi(argv[2]); + + // Print configuration + printf("Configuration used: boxes1d = %d\n", dim_cpu.boxes1d_arg); + + time2 = get_time(); + + //======================================================================================================================================================150 + // INPUTS + //======================================================================================================================================================150 + + par_cpu.alpha = 0.5; + + time3 = get_time(); + + //======================================================================================================================================================150 + // DIMENSIONS + //======================================================================================================================================================150 + + // total number of boxes + dim_cpu.number_boxes = dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg; + + // how many particles space has in each direction + dim_cpu.space_elem = dim_cpu.number_boxes * NUMBER_PAR_PER_BOX; + dim_cpu.space_mem = dim_cpu.space_elem * sizeof(FOUR_VECTOR); + dim_cpu.space_mem2 = dim_cpu.space_elem * sizeof(fp); + + // box array + dim_cpu.box_mem = dim_cpu.number_boxes * sizeof(box_str); + + time4 = get_time(); + + //======================================================================================================================================================150 + // SYSTEM MEMORY + //======================================================================================================================================================150 + + //====================================================================================================100 + // BOX + //====================================================================================================100 + + // allocate boxes + box_cpu = (box_str*)malloc(dim_cpu.box_mem); + + // initialize number of home boxes + nh = 0; + + // home boxes in z direction + for(i=0; i=0 && (j+m)>=0 && (k+n)>=0)==true && ((i+l) 1) { + + // variables + int max_multiprocessors; + int max_device; + cudaDeviceProp properties; + + // initialize variables + max_multiprocessors = 0; + max_device = 0; + + for (device = 0; device < num_devices; device++) { + cudaGetDeviceProperties(&properties, device); + if (max_multiprocessors < properties.multiProcessorCount) { + max_multiprocessors = properties.multiProcessorCount; + max_device = device; + } + } + cudaSetDevice(max_device); + } + +} + +//====================================================================================================100 +// GET LAST ERROR +//====================================================================================================100 + +void checkCUDAError(const char *msg) +{ + cudaError_t err = cudaGetLastError(); + if( cudaSuccess != err) { + // fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + printf("Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + fflush(NULL); + exit(EXIT_FAILURE); + } +} + +//===============================================================================================================================================================================================================200 +// END SET_DEVICE CODE +//===============================================================================================================================================================================================================200 diff --git a/workloads/realworld/async/lavaMD/util/device/device.h b/workloads/realworld/async/lavaMD/util/device/device.h new file mode 100755 index 0000000000000000000000000000000000000000..23bb31d26c1bc0e607c9b2faf7bddaa5a5c06d98 --- /dev/null +++ b/workloads/realworld/async/lavaMD/util/device/device.h @@ -0,0 +1,29 @@ +//===============================================================================================================================================================================================================200 +// SET_DEVICE HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// INCLUDE/DEFINE +//======================================================================================================================================================150 + +#include // (in library path known to compiler) needed by printf + +//======================================================================================================================================================150 +// FUNCTION PROTOTYPES +//======================================================================================================================================================150 + +//====================================================================================================100 +// SET DEVICE +//====================================================================================================100 + +void setdevice(void); + +//====================================================================================================100 +// GET LAST ERROR +//====================================================================================================100 + +void checkCUDAError(const char *msg); + +//===============================================================================================================================================================================================================200 +// END SET_DEVICE HEADER +//===============================================================================================================================================================================================================200 diff --git a/workloads/realworld/async/lavaMD/util/num/num.c b/workloads/realworld/async/lavaMD/util/num/num.c new file mode 100755 index 0000000000000000000000000000000000000000..980ff7498832c784eab718a8b886e82891047599 --- /dev/null +++ b/workloads/realworld/async/lavaMD/util/num/num.c @@ -0,0 +1,53 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// DESCRIPTION +//===============================================================================================================================================================================================================200 + +// Returns: 0 if string does not represent integer +// 1 if string represents integer + +//===============================================================================================================================================================================================================200 +// NUM CODE +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// ISINTEGER FUNCTION +//======================================================================================================================================================150 + +int isInteger(char *str){ + + //====================================================================================================100 + // make sure it's not empty + //====================================================================================================100 + + if (*str == '\0'){ + return 0; + } + + //====================================================================================================100 + // if any digit is not a number, return false + //====================================================================================================100 + + for(; *str != '\0'; str++){ + if (*str < 48 || *str > 57){ // digit characters (need to include . if checking for float) + return 0; + } + } + + //====================================================================================================100 + // it got past all my checks so I think it's a number + //====================================================================================================100 + + return 1; +} + +//===============================================================================================================================================================================================================200 +// END NUM CODE +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/async/lavaMD/util/num/num.h b/workloads/realworld/async/lavaMD/util/num/num.h new file mode 100755 index 0000000000000000000000000000000000000000..27a5e42fe2819d9ecc2f569b5979fb451985976f --- /dev/null +++ b/workloads/realworld/async/lavaMD/util/num/num.h @@ -0,0 +1,21 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// FILE HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// ISINTEGER FUNCTION PROTOTYPE +//======================================================================================================================================================150 + +int isInteger(char *str); + +//===============================================================================================================================================================================================================200 +// END FILE HEADER +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/async/lavaMD/util/timer/timer.c b/workloads/realworld/async/lavaMD/util/timer/timer.c new file mode 100755 index 0000000000000000000000000000000000000000..c7cc252b4e67b3a868722b7b2c58f5b863ae0cfc --- /dev/null +++ b/workloads/realworld/async/lavaMD/util/timer/timer.c @@ -0,0 +1,36 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// TIMER CODE +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// INCLUDE/DEFINE +//======================================================================================================================================================150 + +#include + +//======================================================================================================================================================150 +// FUNCTIONS +//======================================================================================================================================================150 + +//====================================================================================================100 +// DISPLAY TIME +//====================================================================================================100 + + // Returns the current system time in microseconds +long long get_time() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000000) + tv.tv_usec; +} + +//===============================================================================================================================================================================================================200 +// END TIMER CODE +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/async/lavaMD/util/timer/timer.h b/workloads/realworld/async/lavaMD/util/timer/timer.h new file mode 100755 index 0000000000000000000000000000000000000000..1744df4b8607f95c057ac4db6e9ced5ff84c4ab7 --- /dev/null +++ b/workloads/realworld/async/lavaMD/util/timer/timer.h @@ -0,0 +1,21 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// TIMER HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// FUNCTION PROTOTYPES +//======================================================================================================================================================150 + +long long get_time(); + +//===============================================================================================================================================================================================================200 +// END TIMER HEADER +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/async/lud/Makefile b/workloads/realworld/async/lud/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1dfc37ac7fa0db46535d0583970dba1cb5cfb80e --- /dev/null +++ b/workloads/realworld/async/lud/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := lud +CUFILES := lud_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o lud + diff --git a/workloads/realworld/async/lud/lud_cuda.cu b/workloads/realworld/async/lud/lud_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..7d21a53dd3ca2568dbbce6144932b716518f3e42 --- /dev/null +++ b/workloads/realworld/async/lud/lud_cuda.cu @@ -0,0 +1,302 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK 256 + +#ifndef SIZE +#define SIZE 4096 +#endif + +// __global__ void add(float *a, float *b, float *c) +// { +// int tid = blockIdx.x; // Handle the data at the index + +// c[tid] = a[tid] + b[tid]; +// } + +// __global__ void scale(float *a, int size, int index) +// { +// int i; +// int start = (index * size + index); +// int end = (index * size + size); + +// for (i = start + 1; i < end; i++) +// { +// a[i] = (a[i] / a[start]); +// } +// } + +// __global__ void reduce(float *a, int size, int index, int b_size) +// { +// extern __shared__ float pivot[]; +// int i; + +// int tid = threadIdx.x; +// int bid = blockIdx.x; +// int block_size = b_size; + +// int pivot_start = (index * size + index); +// int pivot_end = (index * size + size); + +// int start; +// int end; +// int pivot_row; +// int my_row; + +// if (tid == 0) +// { +// for (i = index; i < size; i++) +// pivot[i] = a[(index * size) + i]; +// } + +// __syncthreads(); + +// pivot_row = (index * size); +// my_row = (((block_size * bid) + tid) * size); +// start = my_row + index; +// end = my_row + size; + +// if (my_row > pivot_row) +// { +// for (i = start + 1; i < end; i++) +// { +// a[i] = a[i] - (a[start] * pivot[(i - my_row)]); +// } +// } +// } + +void initCPU(float *a, int N) +{ + srand((unsigned)2); + // fill the arrays 'a' on the CPU + for (int i = 0; i < (N * N); i++) + { + a[i] = ((rand() % 10) + 1); + } +} + +void initGPU(float *a_dev, int N) +{ + srand((unsigned)2); + for (int i = 0; i < (N * N); i++) + { + a_dev[i] = ((rand() % 10) + 1); + } +} + +__global__ void lud_kernel(float *a, int N) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + // extern __shared__ float pivot[]; + __shared__ float pivot[SIZE * PREFETCH_COUNT]; + + int fetch = 0; + int end_tile = fetch + N; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = blockDim.x; + + if (tid == 0 && bid == 0) + { + int start = (compute * N + compute); + int end = (compute * N + N); + + for (int i = start + 1; i < end; i++) + a[i] = (a[i] / a[start]); + } + block.sync(); + + if (tid == 0) + { + for (int i = compute; i < N; i++) + memcpy_async(pivot[(fetch % PREFETCH_COUNT) * N + i], a[(compute * N) + i], pipe); + } + // block.sync(); + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = blockDim.x; + + int pivot_row = (compute * N); + int my_row = (((block_size * bid) + tid) * N); + int start = my_row + compute; + int end = my_row + N; + + if (my_row > pivot_row) + { + for (int i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(compute % PREFETCH_COUNT) * N + (i - my_row)]); + } + } + block.sync(); + } +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + float *a; + float *c; + float error; + int N; + int flag = 0; + + float **result; + float **a_ref; + int blocks; + + float *dev_a; + int i; + int j; + int k; + float l1; + float u1; + + N = SIZE; + // allocate memory on CPU + a = (float *)malloc(sizeof(float) * N * N); + c = (float *)malloc(sizeof(float) * N * N); + + result = (float **)malloc(sizeof(float *) * N); + a_ref = (float **)malloc(sizeof(float *) * N); + + for (i = 0; i < N; i++) + { + result[i] = (float *)malloc(sizeof(float) * N); + a_ref[i] = (float *)malloc(sizeof(float) * N); + } + + GPU_argv_init(); + initCPU(a, N); + + initTrace(); + startCPU(); + // allocate the memory on the GPU + cudaMalloc((void **)&dev_a, N * N * sizeof(float)); + + cudaMemcpy(dev_a, a, N * N * sizeof(float), cudaMemcpyHostToDevice); // copy array to device memory + + /*Perform LU Decomposition*/ + // for (i = 0; i < N; i++) + // { + // scale<<<1, 1>>>(dev_a, N, i); + // // blocks= ((N-i-1)/512)+1; + // blocks = ((N / 512)); + // // printf("Number of blocks rxd : %d \n",blocks); + // reduce<<>>(dev_a, N, i, 512); + // } + blocks = ((N / DIM_THREAD_BLOCK)); + // lud_kernel<<>>(dev_a, N); + lud_kernel<<>>(dev_a, N); + /*LU decomposition ends here*/ + + cudaMemcpy(c, dev_a, N * N * sizeof(float), cudaMemcpyDeviceToHost); // copy array back to host + + // free the memory allocated on the GPU + cudaFree(dev_a); + endCPU(); + finiTrace(); + + /*copy the result matrix into explicit 2D matrix for verification*/ + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + result[i][j] = c[i * N + j]; + } + } + + printf("======================================================="); + printf("\n Performing inplace verification \n"); + + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + a_ref[i][j] = 0; + for (k = 0; k < N; k++) + { + if (i >= k) + l1 = result[i][k]; + else + l1 = 0; + + if (k == j) + u1 = 1; + else if (k < j) + u1 = result[k][j]; // figured it out + else + u1 = 0.0; + + a_ref[i][j] = a_ref[i][j] + (l1 * u1); + } + } + } + + // for (i = 0; i < N; i++) + // { + // for (j = 0; j < N; j++) + // { + // error = abs(a[(i * N + j)] - a_ref[i][j]); + // if (error > 1) + // { + // // printf("No match occured at %d %d Error is %lf \n ", i, j, abs(a[(i*N+j)]-b[i][j])); + // flag = flag + 1; + // } + // } + // } + + // if (flag == 0) + // printf("Match \n"); + // else + // printf("No Matchs %d \n", flag); + + + + return 0; +} diff --git a/workloads/realworld/async/lud/run.sh b/workloads/realworld/async/lud/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..ea7db937489e328f5e923d2b18774e4256eef123 --- /dev/null +++ b/workloads/realworld/async/lud/run.sh @@ -0,0 +1 @@ +./lud 1024 diff --git a/workloads/realworld/async/lud/run_super.sh b/workloads/realworld/async/lud/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2791fe07c43d75b40894206ba79ed441f207ee26 --- /dev/null +++ b/workloads/realworld/async/lud/run_super.sh @@ -0,0 +1 @@ +./lud 4096 diff --git a/workloads/realworld/async/lud_perf/Makefile b/workloads/realworld/async/lud_perf/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1dfc37ac7fa0db46535d0583970dba1cb5cfb80e --- /dev/null +++ b/workloads/realworld/async/lud_perf/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := lud +CUFILES := lud_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o lud + diff --git a/workloads/realworld/async/lud_perf/lud b/workloads/realworld/async/lud_perf/lud new file mode 100755 index 0000000000000000000000000000000000000000..5bb98d704b71e58f6ea2cf398443e68057c17db2 Binary files /dev/null and b/workloads/realworld/async/lud_perf/lud differ diff --git a/workloads/realworld/async/lud_perf/lud_cuda.cu b/workloads/realworld/async/lud_perf/lud_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..89d2fcf1de96170d17ce3e761c9c59bc634669b8 --- /dev/null +++ b/workloads/realworld/async/lud_perf/lud_cuda.cu @@ -0,0 +1,302 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK 256 + +#ifndef SIZE +#define SIZE 4096 +#endif + +// __global__ void add(float *a, float *b, float *c) +// { +// int tid = blockIdx.x; // Handle the data at the index + +// c[tid] = a[tid] + b[tid]; +// } + +// __global__ void scale(float *a, int size, int index) +// { +// int i; +// int start = (index * size + index); +// int end = (index * size + size); + +// for (i = start + 1; i < end; i++) +// { +// a[i] = (a[i] / a[start]); +// } +// } + +// __global__ void reduce(float *a, int size, int index, int b_size) +// { +// extern __shared__ float pivot[]; +// int i; + +// int tid = threadIdx.x; +// int bid = blockIdx.x; +// int block_size = b_size; + +// int pivot_start = (index * size + index); +// int pivot_end = (index * size + size); + +// int start; +// int end; +// int pivot_row; +// int my_row; + +// if (tid == 0) +// { +// for (i = index; i < size; i++) +// pivot[i] = a[(index * size) + i]; +// } + +// __syncthreads(); + +// pivot_row = (index * size); +// my_row = (((block_size * bid) + tid) * size); +// start = my_row + index; +// end = my_row + size; + +// if (my_row > pivot_row) +// { +// for (i = start + 1; i < end; i++) +// { +// a[i] = a[i] - (a[start] * pivot[(i - my_row)]); +// } +// } +// } + +void initCPU(float *a, int N) +{ + srand((unsigned)2); + // fill the arrays 'a' on the CPU + for (int i = 0; i < (N * N); i++) + { + a[i] = ((rand() % 10) + 1); + } +} + +void initGPU(float *a_dev, int N) +{ + srand((unsigned)2); + for (int i = 0; i < (N * N); i++) + { + a_dev[i] = ((rand() % 10) + 1); + } +} + +__global__ void lud_kernel(float *a, int N) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + // extern __shared__ float pivot[]; + __shared__ float pivot[SIZE * PREFETCH_COUNT]; + + int fetch = 0; + int end_tile = fetch + N; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = blockDim.x; + + if (tid == 0 && bid == 0) + { + int start = (compute * N + compute); + int end = (compute * N + N); + + for (int i = start + 1; i < end; i++) + a[i] = (a[i] / a[start]); + } + block.sync(); + + if (tid == 0) + { + for (int i = compute; i < N; i++) + memcpy_async(pivot[(fetch % PREFETCH_COUNT) * N + i], a[(compute * N) + i], pipe); + } + // block.sync(); + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = blockDim.x; + + int pivot_row = (compute * N); + int my_row = (((block_size * bid) + tid) * N); + int start = my_row + compute; + int end = my_row + N; + + if (my_row > pivot_row) + { + for (int i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(compute % PREFETCH_COUNT) * N + (i - my_row)]); + } + } + block.sync(); + } +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + float *a; + float *c; + float error; + int N; + int flag = 0; + + float **result; + float **a_ref; + int blocks; + + float *dev_a; + int i; + int j; + int k; + float l1; + float u1; + + N = SIZE; + // allocate memory on CPU + a = (float *)malloc(sizeof(float) * N * N); + c = (float *)malloc(sizeof(float) * N * N); + + result = (float **)malloc(sizeof(float *) * N); + a_ref = (float **)malloc(sizeof(float *) * N); + + for (i = 0; i < N; i++) + { + result[i] = (float *)malloc(sizeof(float) * N); + a_ref[i] = (float *)malloc(sizeof(float) * N); + } + + GPU_argv_init(); + initCPU(a, N); + + // initTrace(); + startCPU(); + // allocate the memory on the GPU + cudaMalloc((void **)&dev_a, N * N * sizeof(float)); + + cudaMemcpy(dev_a, a, N * N * sizeof(float), cudaMemcpyHostToDevice); // copy array to device memory + + /*Perform LU Decomposition*/ + // for (i = 0; i < N; i++) + // { + // scale<<<1, 1>>>(dev_a, N, i); + // // blocks= ((N-i-1)/512)+1; + // blocks = ((N / 512)); + // // printf("Number of blocks rxd : %d \n",blocks); + // reduce<<>>(dev_a, N, i, 512); + // } + blocks = ((N / DIM_THREAD_BLOCK)); + // lud_kernel<<>>(dev_a, N); + lud_kernel<<>>(dev_a, N); + /*LU decomposition ends here*/ + + cudaMemcpy(c, dev_a, N * N * sizeof(float), cudaMemcpyDeviceToHost); // copy array back to host + + // free the memory allocated on the GPU + cudaFree(dev_a); + endCPU(); + // finiTrace(); + + /*copy the result matrix into explicit 2D matrix for verification*/ + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + result[i][j] = c[i * N + j]; + } + } + + printf("======================================================="); + printf("\n Performing inplace verification \n"); + + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + a_ref[i][j] = 0; + for (k = 0; k < N; k++) + { + if (i >= k) + l1 = result[i][k]; + else + l1 = 0; + + if (k == j) + u1 = 1; + else if (k < j) + u1 = result[k][j]; // figured it out + else + u1 = 0.0; + + a_ref[i][j] = a_ref[i][j] + (l1 * u1); + } + } + } + + // for (i = 0; i < N; i++) + // { + // for (j = 0; j < N; j++) + // { + // error = abs(a[(i * N + j)] - a_ref[i][j]); + // if (error > 1) + // { + // // printf("No match occured at %d %d Error is %lf \n ", i, j, abs(a[(i*N+j)]-b[i][j])); + // flag = flag + 1; + // } + // } + // } + + // if (flag == 0) + // printf("Match \n"); + // else + // printf("No Matchs %d \n", flag); + + + + return 0; +} diff --git a/workloads/realworld/async/lud_perf/run.sh b/workloads/realworld/async/lud_perf/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..ea7db937489e328f5e923d2b18774e4256eef123 --- /dev/null +++ b/workloads/realworld/async/lud_perf/run.sh @@ -0,0 +1 @@ +./lud 1024 diff --git a/workloads/realworld/async/lud_perf/run_super.sh b/workloads/realworld/async/lud_perf/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2791fe07c43d75b40894206ba79ed441f207ee26 --- /dev/null +++ b/workloads/realworld/async/lud_perf/run_super.sh @@ -0,0 +1 @@ +./lud 4096 diff --git a/workloads/realworld/async/nw/Makefile b/workloads/realworld/async/nw/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..b33ae6462826357f9665bfd7fc9929ed176f9b35 --- /dev/null +++ b/workloads/realworld/async/nw/Makefile @@ -0,0 +1,15 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include + +SRC = needle.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = needle + +release: $(SRC) + $(CC) ${KERNEL_DIM} $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt diff --git a/workloads/realworld/async/nw/Makefile_nvidia b/workloads/realworld/async/nw/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..2fd0b98d07beea56ae69a96a0c8cb3af87d602f6 --- /dev/null +++ b/workloads/realworld/async/nw/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := needle +# CUDA source files (compiled with cudacc) +CUFILES := needle.cu +# CUDA dependency files +CU_DEPS := needle_kernel.cu +# C/C++ source files (compiled with gcc / c++) +# CCFILES := BlackScholes_gold.cpp + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/async/nw/README b/workloads/realworld/async/nw/README new file mode 100755 index 0000000000000000000000000000000000000000..683cbd53db81f0ece4f926fa01316582ea0d5fc9 --- /dev/null +++ b/workloads/realworld/async/nw/README @@ -0,0 +1,12 @@ +Note: This program generate two sequences randomly. Please specify your own sequences for different uses. + At the current stage, the program only supports two sequences with the same lengh, which can be divided by 16. +Usage: needle 32 10 + 32 //the length of both sequences + 10 //penalty value + +******Adjustable work group size***** +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" diff --git a/workloads/realworld/async/nw/needle.cu b/workloads/realworld/async/nw/needle.cu new file mode 100755 index 0000000000000000000000000000000000000000..13f28e9f2de37a368a03c41c4dcc2fa2ba181427 --- /dev/null +++ b/workloads/realworld/async/nw/needle.cu @@ -0,0 +1,284 @@ +#define LIMIT -999 +#include +#include +#include +#include +#include "needle.h" +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + + +// includes, kernels +#include "needle_kernel.cu" + +#ifdef TIMING +#include "timing.h" + +struct timeval tv; +struct timeval tv_total_start, tv_total_end; +struct timeval tv_h2d_start, tv_h2d_end; +struct timeval tv_d2h_start, tv_d2h_end; +struct timeval tv_kernel_start, tv_kernel_end; +struct timeval tv_mem_alloc_start, tv_mem_alloc_end; +struct timeval tv_close_start, tv_close_end; +float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, + d2h_time = 0, close_time = 0, total_time = 0; +#endif + +//////////////////////////////////////////////////////////////////////////////// +// declaration, forward +void runTest( int argc, char** argv); + + +int blosum62[24][24] = { +{ 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0, -4}, +{-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1, -4}, +{-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1, -4}, +{-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1, -4}, +{ 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4}, +{-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1, -4}, +{-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4}, +{ 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -4}, +{-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1, -4}, +{-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1, -4}, +{-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1, -4}, +{-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1, -4}, +{-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1, -4}, +{-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1, -4}, +{-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2, -4}, +{ 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0, -4}, +{ 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0, -4}, +{-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2, -4}, +{-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1, -4}, +{ 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1, -4}, +{-2, -1, 3, 4, -3, 0, 1, -1, 0, -3, -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1, -4}, +{-1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4}, +{ 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1, -4}, +{-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1} +}; + +double gettime() { + struct timeval t; + gettimeofday(&t,NULL); + return t.tv_sec+t.tv_usec*1e-6; +} + +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int +main( int argc, char** argv) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + + printf("WG size of kernel = %d \n", BLOCK_SIZE); + + runTest( argc, argv); + + return EXIT_SUCCESS; +} + +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - x and y dimensions\n"); + fprintf(stderr, "\t - penalty(positive integer)\n"); + exit(1); +} + +void runTest( int argc, char** argv) +{ + int max_rows, max_cols, penalty, nblocks; + int *input_itemsets, *output_itemsets, *referrence; + int *matrix_cuda, *referrence_cuda; + int size; + + + // the lengths of the two sequences should be able to divided by 16. + // And at current stage max_rows needs to equal max_cols + if (argc == 4) + { + max_rows = atoi(argv[1]); + max_cols = atoi(argv[1]); + penalty = atoi(argv[2]); + nblocks = atoi(argv[3]); + } + else{ + usage(argc, argv); + } + + if(atoi(argv[1])%16!=0){ + fprintf(stderr,"The dimension values must be a multiple of 16\n"); + exit(1); + } + + + max_rows = max_rows + 1; + max_cols = max_cols + 1; + referrence = (int *)malloc( max_rows * max_cols * sizeof(int) ); + input_itemsets = (int *)malloc( max_rows * max_cols * sizeof(int) ); + output_itemsets = (int *)malloc( max_rows * max_cols * sizeof(int) ); + + + if (!input_itemsets) + fprintf(stderr, "error: can not allocate memory"); + + srand ( 7 ); + + + for (int i = 0 ; i < max_cols; i++){ + for (int j = 0 ; j < max_rows; j++){ + input_itemsets[i*max_cols+j] = 0; + } + } + + printf("Start Needleman-Wunsch\n"); + + for( int i=1; i< max_rows ; i++){ //please define your own sequence. + input_itemsets[i*max_cols] = rand() % 10 + 1; + } + for( int j=1; j< max_cols ; j++){ //please define your own sequence. + input_itemsets[j] = rand() % 10 + 1; + } + + + for (int i = 1 ; i < max_cols; i++){ + for (int j = 1 ; j < max_rows; j++){ + referrence[i*max_cols+j] = blosum62[input_itemsets[i*max_cols]][input_itemsets[j]]; + } + } + + for( int i = 1; i< max_rows ; i++) + input_itemsets[i*max_cols] = -i * penalty; + for( int j = 1; j< max_cols ; j++) + input_itemsets[j] = -j * penalty; + + + size = max_cols * max_rows; + + GPU_argv_init(); + initTrace(); + startCPU(); + + cudaMalloc((void**)& referrence_cuda, sizeof(int)*size); + cudaMalloc((void**)& matrix_cuda, sizeof(int)*size); + + cudaMemcpy(referrence_cuda, referrence, sizeof(int) * size, cudaMemcpyHostToDevice); + cudaMemcpy(matrix_cuda, input_itemsets, sizeof(int) * size, cudaMemcpyHostToDevice); + + dim3 dimGrid; + dim3 dimBlock(BLOCK_SIZE, 1); + // int block_width = ( max_cols - 1 )/BLOCK_SIZE; + int block_width = nblocks - 1; + int block_size = (max_cols - 1) / (nblocks * BLOCK_SIZE); + +#ifdef TIMING + gettimeofday(&tv_kernel_start, NULL); +#endif + + //printf("Processing top-left matrix\n"); + //process top-left matrix + for( int i = 1 ; i <= block_width ; i++) { + dimGrid.x = i; + dimGrid.y = 1; + needle_cuda_shared_1<<>>(referrence_cuda, matrix_cuda + ,max_cols, penalty, i, block_width, block_size); + } + //printf("Processing bottom-right matrix\n"); + //process bottom-right matrix + for( int i = block_width - 1 ; i >= 1 ; i--){ + dimGrid.x = i; + dimGrid.y = 1; + needle_cuda_shared_2<<>>(referrence_cuda, matrix_cuda + ,max_cols, penalty, i, block_width, block_size); + } + cudaDeviceSynchronize(); + +#ifdef TIMING + gettimeofday(&tv_kernel_end, NULL); + tvsub(&tv_kernel_end, &tv_kernel_start, &tv); + kernel_time += tv.tv_sec * 1000.0 + (float) tv.tv_usec / 1000.0; +#endif + + cudaMemcpy(output_itemsets, matrix_cuda, sizeof(int) * size, cudaMemcpyDeviceToHost); + + cudaFree(referrence_cuda); + cudaFree(matrix_cuda); + endCPU(); + finiTrace(); + +//#define TRACEBACK +#ifdef TRACEBACK + + FILE *fpo = fopen("result.txt","w"); + fprintf(fpo, "print traceback value GPU:\n"); + + for (int i = max_rows - 2, j = max_rows - 2; i>=0, j>=0;){ + int nw, n, w, traceback; + if ( i == max_rows - 2 && j == max_rows - 2 ) + fprintf(fpo, "%d ", output_itemsets[ i * max_cols + j]); //print the first element + if ( i == 0 && j == 0 ) + break; + if ( i > 0 && j > 0 ){ + nw = output_itemsets[(i - 1) * max_cols + j - 1]; + w = output_itemsets[ i * max_cols + j - 1 ]; + n = output_itemsets[(i - 1) * max_cols + j]; + } + else if ( i == 0 ){ + nw = n = LIMIT; + w = output_itemsets[ i * max_cols + j - 1 ]; + } + else if ( j == 0 ){ + nw = w = LIMIT; + n = output_itemsets[(i - 1) * max_cols + j]; + } + else{ + } + + //traceback = maximum(nw, w, n); + int new_nw, new_w, new_n; + new_nw = nw + referrence[i * max_cols + j]; + new_w = w - penalty; + new_n = n - penalty; + + traceback = maximum(new_nw, new_w, new_n); + if(traceback == new_nw) + traceback = nw; + if(traceback == new_w) + traceback = w; + if(traceback == new_n) + traceback = n; + + fprintf(fpo, "%d ", traceback); + + if(traceback == nw ) + {i--; j--; continue;} + + else if(traceback == w ) + {j--; continue;} + + else if(traceback == n ) + {i--; continue;} + + else + ; + } + + fclose(fpo); + +#endif + + free(referrence); + free(input_itemsets); + free(output_itemsets); + +#ifdef TIMING + printf("Exec: %f\n", kernel_time); +#endif +} + diff --git a/workloads/realworld/async/nw/needle.h b/workloads/realworld/async/nw/needle.h new file mode 100755 index 0000000000000000000000000000000000000000..e73320d6496262665592117d242e9bc383298b5b --- /dev/null +++ b/workloads/realworld/async/nw/needle.h @@ -0,0 +1,11 @@ +#ifdef RD_WG_SIZE_0_0 + #define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) + #define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) + #define BLOCK_SIZE RD_WG_SIZE +#else + #define BLOCK_SIZE 16 +#endif +//#define TRACE + diff --git a/workloads/realworld/async/nw/needle_kernel.cu b/workloads/realworld/async/nw/needle_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..8581753101a99cad9c9695db670e466e5c24d5f5 --- /dev/null +++ b/workloads/realworld/async/nw/needle_kernel.cu @@ -0,0 +1,237 @@ + +#include "needle.h" +#include + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SDATA( index) CUT_BANK_CHECKER(sdata, index) + +__device__ __host__ int +maximum( int a, + int b, + int c){ + +int k; +if( a <= b ) +k = b; +else +k = a; + +if( k <=c ) +return(c); +else +return(k); + +} + +__global__ void +needle_cuda_shared_1( int* referrence, + int* matrix_cuda, + int cols, + int penalty, + int i, + int block_width, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + int bx = blockIdx.x; + int tx = threadIdx.x; + + int b_index_x = bx; + int b_index_y = i - 1 - bx; + + __shared__ int temp[PREFETCH_COUNT * (BLOCK_SIZE+1)][BLOCK_SIZE+1]; + __shared__ int ref[PREFETCH_COUNT * BLOCK_SIZE][BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (b_index_y * gridDim.x + b_index_x) * tiles_this_block; + int fetch = base_tile; + int end_tile = fetch + tiles_this_block; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + int offset = fetch - base_tile; + int block_id = fetch / tiles_this_block; + int b_index_x = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int b_index_y = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + int index = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( cols + 1 ); + int index_n = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( 1 ); + int index_w = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + ( cols ); + int index_nw = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x; + + if (tx == 0) + memcpy_async(temp[(fetch % PREFETCH_COUNT) * (BLOCK_SIZE+1) + tx][0], matrix_cuda[index_nw], pipe); + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + memcpy_async(ref[(fetch % PREFETCH_COUNT) * (BLOCK_SIZE) + ty][tx], referrence[index + cols * ty], pipe); + block.sync(); + + memcpy_async(temp[(fetch % PREFETCH_COUNT) * (BLOCK_SIZE + 1) + tx + 1][0], matrix_cuda[index_w + cols * tx], pipe); + block.sync(); + + memcpy_async(temp[(fetch % PREFETCH_COUNT) * (BLOCK_SIZE + 1) + 0][tx + 1], matrix_cuda[index_n], pipe); + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + + int b_index_x = compute % tile_dim_x; + int b_index_y = compute / tile_dim_x; + + int index = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( cols + 1 ); + + for( int m = 0 ; m < BLOCK_SIZE ; m++){ + if ( tx <= m ){ + int t_index_x = tx + 1; + int t_index_y = (compute % PREFETCH_COUNT) * (BLOCK_SIZE+1) + m - tx + 1; + + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[(compute % PREFETCH_COUNT) * BLOCK_SIZE + m - tx][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + } + block.sync(); + } + + for( int m = BLOCK_SIZE - 2 ; m >=0 ; m--){ + if ( tx <= m){ + int t_index_x = tx + BLOCK_SIZE - m ; + int t_index_y = (compute % PREFETCH_COUNT) * (BLOCK_SIZE+1) + BLOCK_SIZE - tx; + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[(compute % PREFETCH_COUNT) * BLOCK_SIZE + m - tx][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + + } + block.sync(); + } + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + matrix_cuda[index + ty * cols] = temp[(compute % PREFETCH_COUNT) * (BLOCK_SIZE+1) + ty+1][tx+1]; + } +} + + +__global__ void +needle_cuda_shared_2( int* referrence, + int* matrix_cuda, + int cols, + int penalty, + int i, + int block_width, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + int bx = blockIdx.x; + int tx = threadIdx.x; + + int b_index_x = bx + block_width - i; + int b_index_y = block_width - bx -1; + + __shared__ int temp[PREFETCH_COUNT * (BLOCK_SIZE+1)][BLOCK_SIZE+1]; + __shared__ int ref[PREFETCH_COUNT * BLOCK_SIZE][BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (b_index_y * gridDim.x + b_index_x) * tiles_this_block; + int fetch = base_tile; + int end_tile = fetch + tiles_this_block; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + int offset = fetch - base_tile; + int block_id = fetch / tiles_this_block; + int b_index_x = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int b_index_y = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + int index = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( cols + 1 ); + int index_n = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( 1 ); + int index_w = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + ( cols ); + int index_nw = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x; + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + ref[(fetch % PREFETCH_COUNT) * BLOCK_SIZE + ty][tx] = referrence[index + cols * ty]; + block.sync(); + + if (tx == 0) + temp[(fetch % PREFETCH_COUNT) * (BLOCK_SIZE+1) + tx][0] = matrix_cuda[index_nw]; + temp[(fetch % PREFETCH_COUNT) * (BLOCK_SIZE+1) + tx + 1][0] = matrix_cuda[index_w + cols * tx]; + block.sync(); + + temp[(fetch % PREFETCH_COUNT) * (BLOCK_SIZE+1) + 0][tx + 1] = matrix_cuda[index_n]; + block.sync(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + + int b_index_x = compute % tile_dim_x; + int b_index_y = compute / tile_dim_x; + + int index = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( cols + 1 ); + + for( int m = 0 ; m < BLOCK_SIZE ; m++){ + if ( tx <= m ){ + int t_index_x = tx + 1; + int t_index_y = (compute % PREFETCH_COUNT) * (BLOCK_SIZE+1) + m - tx + 1; + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[(compute % PREFETCH_COUNT) * BLOCK_SIZE + m - tx][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + + } + block.sync(); + } + + for( int m = BLOCK_SIZE - 2 ; m >=0 ; m--){ + if ( tx <= m){ + int t_index_x = tx + BLOCK_SIZE - m ; + int t_index_y = (compute % PREFETCH_COUNT) * (BLOCK_SIZE+1) + BLOCK_SIZE - tx; + + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[(compute % PREFETCH_COUNT) * BLOCK_SIZE + BLOCK_SIZE - tx -1][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + } + block.sync(); + } + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + matrix_cuda[index + ty * cols] = temp[(compute % PREFETCH_COUNT) * (BLOCK_SIZE+1) + ty+1][tx+1]; + } +} + diff --git a/workloads/realworld/async/nw/run.sh b/workloads/realworld/async/nw/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..e3d20f9f4402de57c2a49c4db5c04d917907d741 --- /dev/null +++ b/workloads/realworld/async/nw/run.sh @@ -0,0 +1 @@ +./needle 32768 10 256 diff --git a/workloads/realworld/async/nw/run_super.sh b/workloads/realworld/async/nw/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..23b570be1ac96cce67094a9469de1c6d24c03b08 --- /dev/null +++ b/workloads/realworld/async/nw/run_super.sh @@ -0,0 +1 @@ +./needle 32768 10 64 diff --git a/workloads/realworld/async/pathfinder/Makefile b/workloads/realworld/async/pathfinder/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d740e79027f3651c458e229179bbbd46fb4fcbec --- /dev/null +++ b/workloads/realworld/async/pathfinder/Makefile @@ -0,0 +1,14 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc +INCLUDE := $(CUDA_DIR)/include + +SRC = pathfinder.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = pathfinder + +release: + $(CC) $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +clean: + rm -f pathfinder diff --git a/workloads/realworld/async/pathfinder/README b/workloads/realworld/async/pathfinder/README new file mode 100644 index 0000000000000000000000000000000000000000..9af75abe201eb95c5c89a038c6d79f54b276f94e --- /dev/null +++ b/workloads/realworld/async/pathfinder/README @@ -0,0 +1,6 @@ +To compile the program: + +nvcc -cuda dynproc.cu +nvcc -o dynproc dynproc.cu.cpp + +Usage: dynproc row_len col_len pyramid_height diff --git a/workloads/realworld/async/pathfinder/pathfinder.cu b/workloads/realworld/async/pathfinder/pathfinder.cu new file mode 100644 index 0000000000000000000000000000000000000000..2397d9f3ae96bf94efd1e0be86132a2b3a140d32 --- /dev/null +++ b/workloads/realworld/async/pathfinder/pathfinder.cu @@ -0,0 +1,338 @@ +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#ifdef TIMING +#include "timing.h" + +struct timeval tv; +struct timeval tv_total_start, tv_total_end; +struct timeval tv_h2d_start, tv_h2d_end; +struct timeval tv_d2h_start, tv_d2h_end; +struct timeval tv_kernel_start, tv_kernel_end; +struct timeval tv_mem_alloc_start, tv_mem_alloc_end; +struct timeval tv_close_start, tv_close_end; +float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, + d2h_time = 0, close_time = 0, total_time = 0; +#endif + +#define BLOCK_SIZE 256 +#define STR_SIZE 256 +#define DEVICE 0 +#define HALO 1 // halo width along one direction when advancing to the next iteration + +// #define BENCH_PRINT + +void run(int argc, char **argv); + +int rows, cols; +int *data; +int **wall; +int *result; +#define M_SEED 9 +int pyramid_height; +int nblocks; + +void init(int argc, char **argv) +{ + if (argc == 5) + { + cols = atoi(argv[1]); + rows = atoi(argv[2]); + pyramid_height = atoi(argv[3]); + nblocks = atoi(argv[4]); + } + else + { + printf("Usage: dynproc row_len col_len pyramid_height\n"); + exit(0); + } + data = new int[rows * cols]; + wall = new int *[rows]; + for (int n = 0; n < rows; n++) + wall[n] = data + cols * n; + result = new int[cols]; + + int seed = M_SEED; + srand(seed); + + for (int i = 0; i < rows; i++) + { + for (int j = 0; j < cols; j++) + { + wall[i][j] = rand() % 10; + } + } +#ifdef BENCH_PRINT + for (int i = 0; i < rows; i++) + { + for (int j = 0; j < cols; j++) + { + printf("%d ", wall[i][j]); + } + printf("\n"); + } +#endif +} + +void fatal(char *s) +{ + fprintf(stderr, "error: %s\n", s); +} + +#define IN_RANGE(x, min, max) ((x) >= (min) && (x) <= (max)) +#define CLAMP_RANGE(x, min, max) x = (x < (min)) ? min : ((x > (max)) ? max : x) +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +__global__ void dynproc_kernel( + int iteration, + int *gpuWall, + int *gpuSrc, + int *gpuResults, + int cols, + int rows, + int startStep, + int border, + int small_block_cols, + int tile_size, + int batches) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + __shared__ int prev[BLOCK_SIZE * PREFETCH_COUNT]; + __shared__ int result[BLOCK_SIZE]; + + int bx = blockIdx.x; + int tx = threadIdx.x; + + int fetch = 0; + + for (int compute = fetch; compute < batches; compute++) + { + for (; fetch < batches && fetch < compute + PREFETCH_COUNT; fetch++) + { + // each block finally computes result for a small block + // after N iterations. + // it is the non-overlapping small blocks that cover + // all the input data + + // calculate the boundary for the block according to + // the boundary of its small block + int blkX = bx * tile_size + small_block_cols * fetch - border; + int blkXmax = blkX + BLOCK_SIZE - 1; + + // calculate the global thread coordination + int xidx = blkX + tx; + + // effective range within this block that falls within + // the valid range of the input data + // used to rule out computation outside the boundary. + int validXmin = (blkX < 0) ? -blkX : 0; + int validXmax = (blkXmax > cols - 1) ? BLOCK_SIZE - 1 - (blkXmax - cols + 1) : BLOCK_SIZE - 1; + + int W = tx - 1; + int E = tx + 1; + + W = (W < validXmin) ? validXmin : W; + E = (E > validXmax) ? validXmax : E; + + bool isValid = IN_RANGE(tx, validXmin, validXmax); + + if (IN_RANGE(xidx, 0, cols - 1)) + { + memcpy_async(prev[(fetch % PREFETCH_COUNT) * BLOCK_SIZE + tx], gpuSrc[xidx], pipe); + } + // block.sync(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012 + pipe.commit(); + } + if (fetch == batches) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + // calculate the boundary for the block according to + // the boundary of its small block + int blkX = bx * tile_size + small_block_cols * compute - border; + int blkXmax = blkX + BLOCK_SIZE - 1; + + // calculate the global thread coordination + int xidx = blkX + tx; + + // effective range within this block that falls within + // the valid range of the input data + // used to rule out computation outside the boundary. + int validXmin = (blkX < 0) ? -blkX : 0; + int validXmax = (blkXmax > cols - 1) ? BLOCK_SIZE - 1 - (blkXmax - cols + 1) : BLOCK_SIZE - 1; + + int W = tx - 1; + int E = tx + 1; + + W = (W < validXmin) ? validXmin : W; + E = (E > validXmax) ? validXmax : E; + + bool isValid = IN_RANGE(tx, validXmin, validXmax); + + bool computed; + for (int i = 0; i < iteration; i++) + { + computed = false; + if (IN_RANGE(tx, i + 1, BLOCK_SIZE - i - 2) && + isValid) + { + computed = true; + int left = prev[(compute % PREFETCH_COUNT) * BLOCK_SIZE + W]; + int up = prev[(compute % PREFETCH_COUNT) * BLOCK_SIZE + tx]; + int right = prev[(compute % PREFETCH_COUNT) * BLOCK_SIZE + E]; + int shortest = MIN(left, up); + shortest = MIN(shortest, right); + int index = cols * (startStep + i) + xidx; + result[tx] = shortest + gpuWall[index]; + } + block.sync(); + if (i == iteration - 1) + break; + if (computed) // Assign the computation range + prev[(compute % PREFETCH_COUNT) * BLOCK_SIZE + tx] = result[tx]; + block.sync(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012 + } + + // update the global memory + // after the last iteration, only threads coordinated within the + // small block perform the calculation and switch on ``computed'' + if (computed) + { + gpuResults[xidx] = result[tx]; + } + } +} + +/* + compute N time steps +*/ +int calc_path(int *gpuWall, int *gpuResult[2], int rows, int cols, + int pyramid_height, int blockCols, int borderCols, int tile_size, int batches) +{ + dim3 dimBlock(BLOCK_SIZE); + dim3 dimGrid(nblocks); + + int src = 1, dst = 0; + for (int t = 0; t < rows - 1; t += pyramid_height) + { + int temp = src; + src = dst; + dst = temp; + + int iteration = MIN(pyramid_height, rows - t - 1); + int small_block_cols = BLOCK_SIZE - iteration * HALO * 2; + dynproc_kernel<<>>( + iteration, gpuWall, gpuResult[src], gpuResult[dst], + cols, rows, t, borderCols, small_block_cols, tile_size, batches); + + // for the measurement fairness + cudaDeviceSynchronize(); + } + return dst; +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + GPU_argv_init(); + + run(argc, argv); + + return EXIT_SUCCESS; +} + +void run(int argc, char **argv) +{ + init(argc, argv); + + /* --------------- pyramid parameters --------------- */ + int borderCols = (pyramid_height)*HALO; + int smallBlockCol = BLOCK_SIZE - (pyramid_height)*HALO * 2; + int blockCols = cols / smallBlockCol + ((cols % smallBlockCol == 0) ? 0 : 1); + + // ruihao + int cols_per_block = cols / nblocks; + if (cols_per_block < BLOCK_SIZE) + cols_per_block = BLOCK_SIZE; + int batches = cols_per_block / smallBlockCol + ((cols_per_block % smallBlockCol == 0) ? 0 : 1); + + // printf("pyramidHeight: %d\ngridSize: [%d]\nborder:[%d]\nblockSize: %d\nblockGrid:[%d]\ntargetBlock:[%d]\n", + // pyramid_height, cols, borderCols, BLOCK_SIZE, blockCols, smallBlockCol); + printf("pyramidHeight: %d\ngridSize: [%d]\nborder:[%d]\nblockSize: %d\nblockGrid:[%d]\ntargetBlock:[%d]\n", + pyramid_height, cols, borderCols, BLOCK_SIZE, nblocks, smallBlockCol); + + int *gpuWall, *gpuResult[2]; + int size = rows * cols; + + initTrace(); + startCPU(); + + cudaMalloc((void **)&gpuResult[0], sizeof(int) * cols); + cudaMalloc((void **)&gpuResult[1], sizeof(int) * cols); + cudaMemcpy(gpuResult[0], data, sizeof(int) * cols, cudaMemcpyHostToDevice); + cudaMalloc((void **)&gpuWall, sizeof(int) * (size - cols)); + cudaMemcpy(gpuWall, data + cols, sizeof(int) * (size - cols), cudaMemcpyHostToDevice); + +#ifdef TIMING + gettimeofday(&tv_kernel_start, NULL); +#endif + + // int final_ret = calc_path(gpuWall, gpuResult, rows, cols, + // pyramid_height, blockCols, borderCols); + int final_ret = calc_path(gpuWall, gpuResult, rows, cols, + pyramid_height, blockCols, borderCols, cols_per_block, batches); + +#ifdef TIMING + gettimeofday(&tv_kernel_end, NULL); + tvsub(&tv_kernel_end, &tv_kernel_start, &tv); + kernel_time += tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; +#endif + + cudaMemcpy(result, gpuResult[final_ret], sizeof(int) * cols, cudaMemcpyDeviceToHost); + +#ifdef BENCH_PRINT + for (int i = 0; i < cols; i++) + printf("%d ", data[i]); + printf("\n"); + for (int i = 0; i < cols; i++) + printf("%d ", result[i]); + printf("\n"); +#endif + + cudaFree(gpuWall); + cudaFree(gpuResult[0]); + cudaFree(gpuResult[1]); + + endCPU(); + finiTrace(); + + delete[] data; + delete[] wall; + delete[] result; + +#ifdef TIMING + printf("Exec: %f\n", kernel_time); +#endif +} diff --git a/workloads/realworld/async/pathfinder/result.txt b/workloads/realworld/async/pathfinder/result.txt new file mode 100644 index 0000000000000000000000000000000000000000..fa67c591d071682e1842a455f4477b397825e250 --- /dev/null +++ b/workloads/realworld/async/pathfinder/result.txt @@ -0,0 +1,11 @@ +pyramidHeight: 20 +gridSize: [100000] +border:[20] +blockSize: 256 +blockGrid:[463] +targetBlock:[216] +CUPTI dynproc_kernel iter 0 start: 1679530155077329959 end: 1679530155078946951 +CUPTI dynproc_kernel iter 20 start: 1679530155078953603 end: 1679530155081441509 +CUPTI dynproc_kernel iter 40 start: 1679530155081441880 end: 1679530155083936228 +CUPTI dynproc_kernel iter 60 start: 1679530155083936508 end: 1679530155086433891 +CUPTI dynproc_kernel iter 80 start: 1679530155086434172 end: 1679530155088929332 diff --git a/workloads/realworld/async/pathfinder/run.sh b/workloads/realworld/async/pathfinder/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..11a9e6199ea4b2ff7fd3e0ebf893dc96fa89ff45 --- /dev/null +++ b/workloads/realworld/async/pathfinder/run.sh @@ -0,0 +1,2 @@ +#./pathfinder 100000 100 20 1024 > result.txt +./pathfinder 10000000 100 20 1024 diff --git a/workloads/realworld/async/pathfinder/run_super.sh b/workloads/realworld/async/pathfinder/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..b35cc0b44511def3912323c1e0d58c2daa280722 --- /dev/null +++ b/workloads/realworld/async/pathfinder/run_super.sh @@ -0,0 +1 @@ +./pathfinder 10000000 100 20 1024 diff --git a/workloads/realworld/async/srad/Makefile b/workloads/realworld/async/srad/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..47da520a663446e36c04461d54cfbb3d12cfa328 --- /dev/null +++ b/workloads/realworld/async/srad/Makefile @@ -0,0 +1,15 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -std=c++11 -arch=sm_80 -O3 + +SRC = srad.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = srad + +release: $(SRC) + $(CC) $(KERNEL_DIM) $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt diff --git a/workloads/realworld/async/srad/Makefile_nvidia b/workloads/realworld/async/srad/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..e1f345c41c0f838dcf159958f628276455ef4dd7 --- /dev/null +++ b/workloads/realworld/async/srad/Makefile_nvidia @@ -0,0 +1,22 @@ +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := srad +# CUDA source files (compiled with cudacc) +CUFILES := srad.cu +# CUDA dependency files +CU_DEPS := \ + srad_kernel.cu \ + +# C/C++ source files (compiled with gcc / c++) +CCFILES := \ + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/async/srad/README b/workloads/realworld/async/srad/README new file mode 100755 index 0000000000000000000000000000000000000000..91e803b576bdeebe232c00a5112dadd836ffc33f --- /dev/null +++ b/workloads/realworld/async/srad/README @@ -0,0 +1,24 @@ +In srad.h, define either GPU or CPU computation +Currently, the GPU implementation can only support x-, y-dimensions that can be divided by 16. + +Usage: +srad 128 128 0 31 0 31 0.5 2 + +128 //number of rows in the domain +128 //number of cols in the domain +0 //y1 position of the speckle +31 //y2 position of the speckle +0 //x1 position of the speckle +31 //x2 position of the speckle +0.5 //Lambda value +2 //number of iterations + + +******Adjustable work group size***** +The kernel has square shape +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe one dimesion +The total thread number for one block is RD_WG_SIZE_0*RD_WG_SIZE_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" \ No newline at end of file diff --git a/workloads/realworld/async/srad/run.sh b/workloads/realworld/async/srad/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..982fd1345383490dd093950055b359dc475480cc --- /dev/null +++ b/workloads/realworld/async/srad/run.sh @@ -0,0 +1,3 @@ +# ./srad 2048 2048 0 127 0 127 0.5 2 32 + +./srad 16384 16384 0 127 0 127 0.5 2 32 \ No newline at end of file diff --git a/workloads/realworld/async/srad/run_super.sh b/workloads/realworld/async/srad/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2d0f1b8ebd049e33bbb74a15722fc7172167641f --- /dev/null +++ b/workloads/realworld/async/srad/run_super.sh @@ -0,0 +1 @@ +./srad 32768 32768 0 127 0 127 0.5 2 8 \ No newline at end of file diff --git a/workloads/realworld/async/srad/srad.cu b/workloads/realworld/async/srad/srad.cu new file mode 100755 index 0000000000000000000000000000000000000000..5a39e777fbe12fba17fea74f798dc969d559be1a --- /dev/null +++ b/workloads/realworld/async/srad/srad.cu @@ -0,0 +1,298 @@ +// includes, system +#include +#include +#include +#include +#include "srad.h" +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +// includes, project +#include + +// includes, kernels +#include "srad_kernel.cu" + +void random_matrix(float *I, int rows, int cols); +void runTest( int argc, char** argv); +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - number of rows\n"); + fprintf(stderr, "\t - number of cols\n"); + fprintf(stderr, "\t - y1 value of the speckle\n"); + fprintf(stderr, "\t - y2 value of the speckle\n"); + fprintf(stderr, "\t - x1 value of the speckle\n"); + fprintf(stderr, "\t - x2 value of the speckle\n"); + fprintf(stderr, "\t - lambda (0,1)\n"); + fprintf(stderr, "\t - number of iterations\n"); + + exit(1); +} +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE); + runTest( argc, argv); + + return EXIT_SUCCESS; +} + +void +runTest( int argc, char** argv) +{ + int rows, cols, size_I, size_R, niter = 10, iter, nblocks; + float *I, *J, lambda, q0sqr, sum, sum2, tmp, meanROI,varROI ; + +#ifdef CPU + float Jc, G2, L, num, den, qsqr; + int *iN,*iS,*jE,*jW, k; + float *dN,*dS,*dW,*dE; + float cN,cS,cW,cE,D; +#endif + +#ifdef GPU + + float *J_cuda; + float *C_cuda; + float *E_C, *W_C, *N_C, *S_C; + +#endif + + unsigned int r1, r2, c1, c2; + float *c; + + + + if (argc == 10) + { + rows = atoi(argv[1]); //number of rows in the domain + cols = atoi(argv[2]); //number of cols in the domain + if ((rows%16!=0) || (cols%16!=0)){ + fprintf(stderr, "rows and cols must be multiples of 16\n"); + exit(1); + } + r1 = atoi(argv[3]); //y1 position of the speckle + r2 = atoi(argv[4]); //y2 position of the speckle + c1 = atoi(argv[5]); //x1 position of the speckle + c2 = atoi(argv[6]); //x2 position of the speckle + lambda = atof(argv[7]); //Lambda value + niter = atoi(argv[8]); //number of iterations + nblocks = atoi(argv[9]); // number of blocks + } + else{ + usage(argc, argv); + } + + size_I = cols * rows; + size_R = (r2-r1+1)*(c2-c1+1); + + I = (float *)malloc( size_I * sizeof(float) ); + J = (float *)malloc( size_I * sizeof(float) ); + c = (float *)malloc(sizeof(float)* size_I) ; + + +#ifdef CPU + + iN = (int *)malloc(sizeof(unsigned int*) * rows) ; + iS = (int *)malloc(sizeof(unsigned int*) * rows) ; + jW = (int *)malloc(sizeof(unsigned int*) * cols) ; + jE = (int *)malloc(sizeof(unsigned int*) * cols) ; + + + dN = (float *)malloc(sizeof(float)* size_I) ; + dS = (float *)malloc(sizeof(float)* size_I) ; + dW = (float *)malloc(sizeof(float)* size_I) ; + dE = (float *)malloc(sizeof(float)* size_I) ; + + + for (int i=0; i< rows; i++) { + iN[i] = i-1; + iS[i] = i+1; + } + for (int j=0; j< cols; j++) { + jW[j] = j-1; + jE[j] = j+1; + } + iN[0] = 0; + iS[rows-1] = rows-1; + jW[0] = 0; + jE[cols-1] = cols-1; + +#endif + GPU_argv_init(); + initTrace(); + startCPU(); + +#ifdef GPU + + //Allocate device memory + cudaMalloc((void**)& J_cuda, sizeof(float)* size_I); + cudaMalloc((void**)& C_cuda, sizeof(float)* size_I); + cudaMalloc((void**)& E_C, sizeof(float)* size_I); + cudaMalloc((void**)& W_C, sizeof(float)* size_I); + cudaMalloc((void**)& S_C, sizeof(float)* size_I); + cudaMalloc((void**)& N_C, sizeof(float)* size_I); + + +#endif + + printf("Randomizing the input matrix\n"); + //Generate a random matrix + random_matrix(I, rows, cols); + + for (int k = 0; k < size_I; k++ ) { + J[k] = (float)exp(I[k]) ; + } + printf("Start the SRAD main loop\n"); + for (iter=0; iter< niter; iter++){ + sum=0; sum2=0; + for (int i=r1; i<=r2; i++) { + for (int j=c1; j<=c2; j++) { + tmp = J[i * cols + j]; + sum += tmp ; + sum2 += tmp*tmp; + } + } + meanROI = sum / size_R; + varROI = (sum2 / size_R) - meanROI*meanROI; + q0sqr = varROI / (meanROI*meanROI); + +#ifdef CPU + + for (int i = 0 ; i < rows ; i++) { + for (int j = 0; j < cols; j++) { + + k = i * cols + j; + Jc = J[k]; + + // directional derivates + dN[k] = J[iN[i] * cols + j] - Jc; + dS[k] = J[iS[i] * cols + j] - Jc; + dW[k] = J[i * cols + jW[j]] - Jc; + dE[k] = J[i * cols + jE[j]] - Jc; + + G2 = (dN[k]*dN[k] + dS[k]*dS[k] + + dW[k]*dW[k] + dE[k]*dE[k]) / (Jc*Jc); + + L = (dN[k] + dS[k] + dW[k] + dE[k]) / Jc; + + num = (0.5*G2) - ((1.0/16.0)*(L*L)) ; + den = 1 + (.25*L); + qsqr = num/(den*den); + + // diffusion coefficent (equ 33) + den = (qsqr-q0sqr) / (q0sqr * (1+q0sqr)) ; + c[k] = 1.0 / (1.0+den) ; + + // saturate diffusion coefficent + if (c[k] < 0) {c[k] = 0;} + else if (c[k] > 1) {c[k] = 1;} + } + } + + for (int i = 0; i < rows; i++) { + for (int j = 0; j < cols; j++) { + + // current index + k = i * cols + j; + + // diffusion coefficent + cN = c[k]; + cS = c[iS[i] * cols + j]; + cW = c[k]; + cE = c[i * cols + jE[j]]; + + // divergence (equ 58) + D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k]; + + // image update (equ 61) + J[k] = J[k] + 0.25*lambda*D; + } + } + +#endif // CPU + + +#ifdef GPU + + //Currently the input size must be divided by 16 - the block size + + // ruihao + int block_x = cols/BLOCK_SIZE ; + int block_y = rows/BLOCK_SIZE ; + + if (nblocks > block_x) nblocks = block_x; + + dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE); + // dim3 dimGrid(block_x, block_y); + dim3 dimGrid(nblocks, nblocks); + // ruihao + + //Copy data from main memory to device memory + cudaMemcpy(J_cuda, J, sizeof(float) * size_I, cudaMemcpyHostToDevice); + + //Run kernels + // srad_cuda_1<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr); + // srad_cuda_2<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, lambda, q0sqr); + srad_cuda_1<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr, cols / nblocks); + srad_cuda_2<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, lambda, q0sqr, cols / nblocks); + //Copy data from device memory to main memory + cudaMemcpy(J, J_cuda, sizeof(float) * size_I, cudaMemcpyDeviceToHost); + +#endif +} + + cudaThreadSynchronize(); +#ifdef GPU + cudaFree(C_cuda); + cudaFree(J_cuda); + cudaFree(E_C); + cudaFree(W_C); + cudaFree(N_C); + cudaFree(S_C); +#endif + endCPU(); + finiTrace(); + +#ifdef OUTPUT + //Printing output + printf("Printing Output:\n"); + for( int i = 0 ; i < rows ; i++){ + for ( int j = 0 ; j < cols ; j++){ + printf("%.5f ", J[i * cols + j]); + } + printf("\n"); + } +#endif + + printf("Computation Done\n"); + + free(I); + free(J); +#ifdef CPU + free(iN); free(iS); free(jW); free(jE); + free(dN); free(dS); free(dW); free(dE); +#endif + free(c); + +} + + +void random_matrix(float *I, int rows, int cols){ + + srand(7); + + for( int i = 0 ; i < rows ; i++){ + for ( int j = 0 ; j < cols ; j++){ + I[i * cols + j] = rand()/(float)RAND_MAX ; + } + } + +} + diff --git a/workloads/realworld/async/srad/srad.h b/workloads/realworld/async/srad/srad.h new file mode 100755 index 0000000000000000000000000000000000000000..2b2adb6d956b697c5b0ace9bccb89162ef98be50 --- /dev/null +++ b/workloads/realworld/async/srad/srad.h @@ -0,0 +1,16 @@ +#define STR_SIZE 256 + +#ifdef RD_WG_SIZE_0_0 + #define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) + #define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) + #define BLOCK_SIZE RD_WG_SIZE +#else + #define BLOCK_SIZE 16 +#endif + +#define GPU +#define TIMER +//#define OUTPUT + diff --git a/workloads/realworld/async/srad/srad_kernel.cu b/workloads/realworld/async/srad/srad_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..8fcdfde4b0cddf9e2fe56409b9a643f684231dc6 --- /dev/null +++ b/workloads/realworld/async/srad/srad_kernel.cu @@ -0,0 +1,367 @@ +#include "srad.h" +#include + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +__global__ void +srad_cuda_1( + float *E_C, + float *W_C, + float *N_C, + float *S_C, + float *J_cuda, + float *C_cuda, + int cols, + int rows, + float q0sqr, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + // shared memory allocation + __shared__ float temp[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + __shared__ float temp_result[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + + __shared__ float north[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + __shared__ float south[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + __shared__ float east[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + __shared__ float west[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int fetch = base_tile; + int end_tile = fetch + tiles_this_block; + + for (int compute = fetch; compute < end_tile; compute++) + { + // thread id + int tx = threadIdx.x; + int ty = threadIdx.y; + + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + // block id + int bx = fetch % tile_dim_x; + int by = fetch / tile_dim_x; + + // indices + int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; + int index_n = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + tx - cols; + int index_s = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * BLOCK_SIZE + tx; + int index_w = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty - 1; + int index_e = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + BLOCK_SIZE; + + if (index_n < 0) index_n = 0; + if (index_s >= (cols * rows)) index_s = cols * rows - 1; + if (index_w < 0) index_w = 0; + if (index_e >= (cols * rows)) index_e = cols * rows - 1; + + // load data to shared memory + memcpy_async(north[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], J_cuda[index_n], pipe); + memcpy_async(south[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], J_cuda[index_s], pipe); + if (by == 0) + { + memcpy_async(north[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], J_cuda[BLOCK_SIZE * bx + tx], pipe); + } + else if (by == tile_dim_x - 1) + { + memcpy_async(south[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], + J_cuda[cols * BLOCK_SIZE * (tile_dim_x - 1) + BLOCK_SIZE * bx + cols * (BLOCK_SIZE - 1) + tx], pipe); + } + block.sync(); + + memcpy_async(west[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], J_cuda[index_w], pipe); + memcpy_async(east[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], J_cuda[index_e], pipe); + + if (bx == 0) + { + memcpy_async(west[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], J_cuda[cols * BLOCK_SIZE * by + cols * ty], pipe); + } + else if (bx == tile_dim_x - 1) + { + memcpy_async(east[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], + J_cuda[cols * BLOCK_SIZE * by + BLOCK_SIZE * (tile_dim_x - 1) + cols * ty + BLOCK_SIZE - 1], pipe); + } + block.sync(); + memcpy_async(temp[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], J_cuda[index], pipe); + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + // block id + int bx = compute % tile_dim_x; + int by = compute / tile_dim_x; + + // indices + int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; + int index_n = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + tx - cols; + int index_s = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * BLOCK_SIZE + tx; + int index_w = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty - 1; + int index_e = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + BLOCK_SIZE; + + if (index_n < 0) index_n = 0; + if (index_s >= (cols * rows)) index_s = cols * rows - 1; + if (index_w < 0) index_w = 0; + if (index_e >= (cols * rows)) index_e = cols * rows - 1; + + float n, w, e, s, jc, g2, l, num, den, qsqr, c; + jc = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx]; + + if (ty == 0 && tx == 0) + { // nw + n = north[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + s = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty + 1) * BLOCK_SIZE + tx] - jc; + w = west[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + e = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (ty == 0 && tx == BLOCK_SIZE - 1) + { // ne + n = north[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + s = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx - 1] - jc; + e = east[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1 && tx == BLOCK_SIZE - 1) + { // se + n = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + w = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx - 1] - jc; + e = east[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1 && tx == 0) + { // sw + n = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + w = west[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + e = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx + 1] - jc; + } + + else if (ty == 0) + { // n + n = north[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + s = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (tx == BLOCK_SIZE - 1) + { // e + n = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx - 1] - jc; + e = east[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1) + { // s + n = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + w = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (tx == 0) + { // w + n = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty + 1) * BLOCK_SIZE + tx] - jc; + w = west[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + e = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx + 1] - jc; + } + else + { // the data elements which are not on the borders + n = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx + 1] - jc; + } + + g2 = (n * n + s * s + w * w + e * e) / (jc * jc); + + l = (n + s + w + e) / jc; + + num = (0.5 * g2) - ((1.0 / 16.0) * (l * l)); + den = 1 + (.25 * l); + qsqr = num / (den * den); + + // diffusion coefficent (equ 33) + den = (qsqr - q0sqr) / (q0sqr * (1 + q0sqr)); + c = 1.0 / (1.0 + den); + + // saturate diffusion coefficent + if (c < 0) + { + temp_result[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = 0; + } + else if (c > 1) + { + temp_result[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = 1; + } + else + { + temp_result[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = c; + } + block.sync(); + + C_cuda[index] = temp_result[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx]; + E_C[index] = e; + W_C[index] = w; + S_C[index] = s; + N_C[index] = n; + } +} + + + +__global__ void +srad_cuda_2( + float *E_C, + float *W_C, + float *N_C, + float *S_C, + float *J_cuda, + float *C_cuda, + int cols, + int rows, + float lambda, + float q0sqr, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + // shared memory allocation + __shared__ float south_c[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + __shared__ float east_c[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + + __shared__ float c_cuda_temp[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + __shared__ float c_cuda_result[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + __shared__ float temp[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int fetch = base_tile; + int end_tile = fetch + tiles_this_block; + + for (int compute = fetch; compute < end_tile; compute++) + { + // thread id + int tx = threadIdx.x; + int ty = threadIdx.y; + + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + // block id + int offset = fetch - base_tile; + int block_id = fetch / tiles_this_block; + int bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + // indices + int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; + int index_s = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * BLOCK_SIZE + tx; + int index_e = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + BLOCK_SIZE; + + if (index_s >= (cols * rows)) index_s = cols * rows - 1; + if (index_e >= (cols * rows)) index_e = cols * rows - 1; + + // load data to shared memory + temp[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = J_cuda[index]; + block.sync(); + + south_c[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = C_cuda[index_s]; + if (by == tile_dim_x - 1) + { + south_c[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = C_cuda[cols * BLOCK_SIZE * (tile_dim_x - 1) + BLOCK_SIZE * bx + cols * (BLOCK_SIZE - 1) + tx]; + } + block.sync(); + + east_c[ty * BLOCK_SIZE + tx] = C_cuda[index_e]; + if (bx == tile_dim_x - 1) + { + east_c[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = C_cuda[cols * BLOCK_SIZE * by + BLOCK_SIZE * (tile_dim_x - 1) + cols * ty + BLOCK_SIZE - 1]; + } + block.sync(); + + c_cuda_temp[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = C_cuda[index]; + block.sync(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + // block id + int bx = compute % tile_dim_x; + int by = compute / tile_dim_x; + + // indices + int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; + + float cc, cn, cs, ce, cw, d_sum; + cc = c_cuda_temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx]; + + if (ty == BLOCK_SIZE - 1 && tx == BLOCK_SIZE - 1) + { // se + cn = cc; + cs = south_c[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx]; + cw = cc; + ce = east_c[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx]; + } + else if (tx == BLOCK_SIZE - 1) + { // e + cn = cc; + cs = c_cuda_temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty + 1) * BLOCK_SIZE + tx]; + cw = cc; + ce = east_c[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx]; + } + else if (ty == BLOCK_SIZE - 1) + { // s + cn = cc; + cs = south_c[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx]; + cw = cc; + ce = c_cuda_temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx + 1]; + } + else + { // the data elements which are not on the borders + cn = cc; + cs = c_cuda_temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty + 1) * BLOCK_SIZE + tx]; + cw = cc; + ce = c_cuda_temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx + 1]; + } + + // divergence (equ 58) + d_sum = cn * N_C[index] + cs * S_C[index] + cw * W_C[index] + ce * E_C[index]; + + // image update (equ 61) + c_cuda_result[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] + 0.25 * lambda * d_sum; + + block.sync(); + + J_cuda[index] = c_cuda_result[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx]; + } +} \ No newline at end of file diff --git a/workloads/realworld/cp_cfg.sh b/workloads/realworld/cp_cfg.sh new file mode 100755 index 0000000000000000000000000000000000000000..5b78ea1879f65c5eb8cdf276ffd7d56091293bd1 --- /dev/null +++ b/workloads/realworld/cp_cfg.sh @@ -0,0 +1,6 @@ +cp -r standard/darknet/cfg/ ./pinned/darknet/ +cp -r standard/darknet/cfg/ ./pipeline/darknet/ +cp -r standard/darknet/cfg/ ./async/darknet/ +cp -r standard/darknet/cfg/ ./uvm/darknet/ +cp -r standard/darknet/cfg/ ./uvm_prefetch/darknet/ +cp -r standard/darknet/cfg/ ./uvm_prefetch_async/darknet/ diff --git a/workloads/realworld/cp_iiswc.sh b/workloads/realworld/cp_iiswc.sh new file mode 100755 index 0000000000000000000000000000000000000000..a180427cfc95045b5b36e27c9ee6769396d64939 --- /dev/null +++ b/workloads/realworld/cp_iiswc.sh @@ -0,0 +1,13 @@ +cp -r /home/ruihao/work/uvm_async_iiswc_2023/workloads/realworld/standard/darknet/resnet* ./standard/darknet/ +cp -r /home/ruihao/work/uvm_async_iiswc_2023/workloads/realworld/async/darknet/resnet* ./async/darknet/ +cp -r /home/ruihao/work/uvm_async_iiswc_2023/workloads/realworld/uvm/darknet/resnet* ./uvm/darknet/ +cp -r /home/ruihao/work/uvm_async_iiswc_2023/workloads/realworld/uvm_prefetch/darknet/resnet* ./uvm_prefetch/darknet/ +cp -r /home/ruihao/work/uvm_async_iiswc_2023/workloads/realworld/uvm_prefetch_async/darknet/resnet* ./uvm_prefetch_async/darknet/ + +cp -r /home/ruihao/work/uvm_async_iiswc_2023/workloads/realworld/standard/darknet/yolov3* ./standard/darknet/ +cp -r /home/ruihao/work/uvm_async_iiswc_2023/workloads/realworld/async/darknet/yolov3* ./async/darknet/ +cp -r /home/ruihao/work/uvm_async_iiswc_2023/workloads/realworld/uvm/darknet/yolov3* ./uvm/darknet/ +cp -r /home/ruihao/work/uvm_async_iiswc_2023/workloads/realworld/uvm_prefetch/darknet/yolov3* ./uvm_prefetch/darknet/ +cp -r /home/ruihao/work/uvm_async_iiswc_2023/workloads/realworld/uvm_prefetch_async/darknet/yolov3* ./uvm_prefetch_async/darknet/ + +# cp -r standard/darknet/cfg/ ./pipeline/ \ No newline at end of file diff --git a/workloads/realworld/cp_makefile.sh b/workloads/realworld/cp_makefile.sh new file mode 100755 index 0000000000000000000000000000000000000000..df24ede22ff50d30935851bc23b2d84e03a65a53 --- /dev/null +++ b/workloads/realworld/cp_makefile.sh @@ -0,0 +1,6 @@ +cp standard/darknet/Makefile ./pinned/darknet/ +cp standard/darknet/Makefile ./pipeline/darknet/ +cp standard/darknet/Makefile ./async/darknet/ +cp standard/darknet/Makefile ./uvm/darknet/ +cp standard/darknet/Makefile ./uvm_prefetch/darknet/ +cp standard/darknet/Makefile ./uvm_prefetch_async/darknet/ diff --git a/workloads/realworld/cp_workloads.sh b/workloads/realworld/cp_workloads.sh new file mode 100755 index 0000000000000000000000000000000000000000..67edd4a571d15b7bbe0a482b9a7bff5c45944430 --- /dev/null +++ b/workloads/realworld/cp_workloads.sh @@ -0,0 +1,27 @@ +cp -r standard/darknet/resnet18_b/ ./pinned/darknet/ +cp -r standard/darknet/resnet18_b/ ./pipeline/darknet/ +cp -r standard/darknet/resnet18_b/ ./async/darknet/ +cp -r standard/darknet/resnet18_b/ ./uvm/darknet/ +cp -r standard/darknet/resnet18_b/ ./uvm_prefetch/darknet/ +cp -r standard/darknet/resnet18_b/ ./uvm_prefetch_async/darknet/ + +cp -r standard/darknet/resnet50_b/ ./pinned/darknet/ +cp -r standard/darknet/resnet50_b/ ./pipeline/darknet/ +cp -r standard/darknet/resnet50_b/ ./async/darknet/ +cp -r standard/darknet/resnet50_b/ ./uvm/darknet/ +cp -r standard/darknet/resnet50_b/ ./uvm_prefetch/darknet/ +cp -r standard/darknet/resnet50_b/ ./uvm_prefetch_async/darknet/ + +cp -r standard/darknet/yolov3_b/ ./pinned/darknet/ +cp -r standard/darknet/yolov3_b/ ./pipeline/darknet/ +cp -r standard/darknet/yolov3_b/ ./async/darknet/ +cp -r standard/darknet/yolov3_b/ ./uvm/darknet/ +cp -r standard/darknet/yolov3_b/ ./uvm_prefetch/darknet/ +cp -r standard/darknet/yolov3_b/ ./uvm_prefetch_async/darknet/ + +cp -r standard/darknet/yolov3-tiny_b/ ./pinned/darknet/ +cp -r standard/darknet/yolov3-tiny_b/ ./pipeline/darknet/ +cp -r standard/darknet/yolov3-tiny_b/ ./async/darknet/ +cp -r standard/darknet/yolov3-tiny_b/ ./uvm/darknet/ +cp -r standard/darknet/yolov3-tiny_b/ ./uvm_prefetch/darknet/ +cp -r standard/darknet/yolov3-tiny_b/ ./uvm_prefetch_async/darknet/ diff --git a/workloads/realworld/output.log b/workloads/realworld/output.log new file mode 100644 index 0000000000000000000000000000000000000000..03dcfa746e138865044eb53202e2c6d21d794582 --- /dev/null +++ b/workloads/realworld/output.log @@ -0,0 +1,262 @@ +layer filters size input output + 0 conv 64 7 x 7 / 2 256 x 256 x 3 -> 128 x 128 x 64 0.308 BFLOPs + 1 max 2 x 2 / 2 128 x 128 x 64 -> 64 x 64 x 64 + 2 conv 64 1 x 1 / 1 64 x 64 x 64 -> 64 x 64 x 64 0.034 BFLOPs + 3 conv 64 3 x 3 / 1 64 x 64 x 64 -> 64 x 64 x 64 0.302 BFLOPs + 4 conv 256 1 x 1 / 1 64 x 64 x 64 -> 64 x 64 x 256 0.134 BFLOPs + 5 res 1 64 x 64 x 64 -> 64 x 64 x 256 + 6 conv 64 1 x 1 / 1 64 x 64 x 256 -> 64 x 64 x 64 0.134 BFLOPs + 7 conv 64 3 x 3 / 1 64 x 64 x 64 -> 64 x 64 x 64 0.302 BFLOPs + 8 conv 256 1 x 1 / 1 64 x 64 x 64 -> 64 x 64 x 256 0.134 BFLOPs + 9 res 5 64 x 64 x 256 -> 64 x 64 x 256 + 10 conv 64 1 x 1 / 1 64 x 64 x 256 -> 64 x 64 x 64 0.134 BFLOPs + 11 conv 64 3 x 3 / 1 64 x 64 x 64 -> 64 x 64 x 64 0.302 BFLOPs + 12 conv 256 1 x 1 / 1 64 x 64 x 64 -> 64 x 64 x 256 0.134 BFLOPs + 13 res 9 64 x 64 x 256 -> 64 x 64 x 256 + 14 conv 128 1 x 1 / 1 64 x 64 x 256 -> 64 x 64 x 128 0.268 BFLOPs + 15 conv 128 3 x 3 / 2 64 x 64 x 128 -> 32 x 32 x 128 0.302 BFLOPs + 16 conv 512 1 x 1 / 1 32 x 32 x 128 -> 32 x 32 x 512 0.134 BFLOPs + 17 res 13 64 x 64 x 256 -> 32 x 32 x 512 + 18 conv 128 1 x 1 / 1 32 x 32 x 512 -> 32 x 32 x 128 0.134 BFLOPs + 19 conv 128 3 x 3 / 1 32 x 32 x 128 -> 32 x 32 x 128 0.302 BFLOPs + 20 conv 512 1 x 1 / 1 32 x 32 x 128 -> 32 x 32 x 512 0.134 BFLOPs + 21 res 17 32 x 32 x 512 -> 32 x 32 x 512 + 22 conv 128 1 x 1 / 1 32 x 32 x 512 -> 32 x 32 x 128 0.134 BFLOPs + 23 conv 128 3 x 3 / 1 32 x 32 x 128 -> 32 x 32 x 128 0.302 BFLOPs + 24 conv 512 1 x 1 / 1 32 x 32 x 128 -> 32 x 32 x 512 0.134 BFLOPs + 25 res 21 32 x 32 x 512 -> 32 x 32 x 512 + 26 conv 128 1 x 1 / 1 32 x 32 x 512 -> 32 x 32 x 128 0.134 BFLOPs + 27 conv 128 3 x 3 / 1 32 x 32 x 128 -> 32 x 32 x 128 0.302 BFLOPs + 28 conv 512 1 x 1 / 1 32 x 32 x 128 -> 32 x 32 x 512 0.134 BFLOPs + 29 res 25 32 x 32 x 512 -> 32 x 32 x 512 + 30 conv 256 1 x 1 / 1 32 x 32 x 512 -> 32 x 32 x 256 0.268 BFLOPs + 31 conv 256 3 x 3 / 2 32 x 32 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 32 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 33 res 29 32 x 32 x 512 -> 16 x 16 x1024 + 34 conv 256 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 256 0.134 BFLOPs + 35 conv 256 3 x 3 / 1 16 x 16 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 36 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 37 res 33 16 x 16 x1024 -> 16 x 16 x1024 + 38 conv 256 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 256 0.134 BFLOPs + 39 conv 256 3 x 3 / 1 16 x 16 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 40 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 41 res 37 16 x 16 x1024 -> 16 x 16 x1024 + 42 conv 256 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 256 0.134 BFLOPs + 43 conv 256 3 x 3 / 1 16 x 16 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 44 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 45 res 41 16 x 16 x1024 -> 16 x 16 x1024 + 46 conv 256 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 256 0.134 BFLOPs + 47 conv 256 3 x 3 / 1 16 x 16 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 48 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 49 res 45 16 x 16 x1024 -> 16 x 16 x1024 + 50 conv 256 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 256 0.134 BFLOPs + 51 conv 256 3 x 3 / 1 16 x 16 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 52 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 53 res 49 16 x 16 x1024 -> 16 x 16 x1024 + 54 conv 512 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 512 0.268 BFLOPs + 55 conv 512 3 x 3 / 2 16 x 16 x 512 -> 8 x 8 x 512 0.302 BFLOPs + 56 conv 2048 1 x 1 / 1 8 x 8 x 512 -> 8 x 8 x2048 0.134 BFLOPs + 57 res 53 16 x 16 x1024 -> 8 x 8 x2048 + 58 conv 512 1 x 1 / 1 8 x 8 x2048 -> 8 x 8 x 512 0.134 BFLOPs + 59 conv 512 3 x 3 / 1 8 x 8 x 512 -> 8 x 8 x 512 0.302 BFLOPs + 60 conv 2048 1 x 1 / 1 8 x 8 x 512 -> 8 x 8 x2048 0.134 BFLOPs + 61 res 57 8 x 8 x2048 -> 8 x 8 x2048 + 62 conv 512 1 x 1 / 1 8 x 8 x2048 -> 8 x 8 x 512 0.134 BFLOPs + 63 conv 512 3 x 3 / 1 8 x 8 x 512 -> 8 x 8 x 512 0.302 BFLOPs + 64 conv 2048 1 x 1 / 1 8 x 8 x 512 -> 8 x 8 x2048 0.134 BFLOPs + 65 res 61 8 x 8 x2048 -> 8 x 8 x2048 + 66 avg 8 x 8 x2048 -> 2048 + 67 conv 1000 1 x 1 / 1 1 x 1 x2048 -> 1 x 1 x1000 0.004 BFLOPs + 68 softmax 1000 +Loading weights from ../../../../../data/darknet/resnet50.weights...Done! +../data/dog.jpg: Predicted in 0.016876 seconds. +layer filters size input output + 0 conv 64 7 x 7 / 2 256 x 256 x 3 -> 128 x 128 x 64 0.308 BFLOPs + 1 max 2 x 2 / 2 128 x 128 x 64 -> 64 x 64 x 64 + 2 conv 64 1 x 1 / 1 64 x 64 x 64 -> 64 x 64 x 64 0.034 BFLOPs + 3 conv 64 3 x 3 / 1 64 x 64 x 64 -> 64 x 64 x 64 0.302 BFLOPs + 4 conv 256 1 x 1 / 1 64 x 64 x 64 -> 64 x 64 x 256 0.134 BFLOPs + 5 res 1 64 x 64 x 64 -> 64 x 64 x 256 + 6 conv 64 1 x 1 / 1 64 x 64 x 256 -> 64 x 64 x 64 0.134 BFLOPs + 7 conv 64 3 x 3 / 1 64 x 64 x 64 -> 64 x 64 x 64 0.302 BFLOPs + 8 conv 256 1 x 1 / 1 64 x 64 x 64 -> 64 x 64 x 256 0.134 BFLOPs + 9 res 5 64 x 64 x 256 -> 64 x 64 x 256 + 10 conv 64 1 x 1 / 1 64 x 64 x 256 -> 64 x 64 x 64 0.134 BFLOPs + 11 conv 64 3 x 3 / 1 64 x 64 x 64 -> 64 x 64 x 64 0.302 BFLOPs + 12 conv 256 1 x 1 / 1 64 x 64 x 64 -> 64 x 64 x 256 0.134 BFLOPs + 13 res 9 64 x 64 x 256 -> 64 x 64 x 256 + 14 conv 128 1 x 1 / 1 64 x 64 x 256 -> 64 x 64 x 128 0.268 BFLOPs + 15 conv 128 3 x 3 / 2 64 x 64 x 128 -> 32 x 32 x 128 0.302 BFLOPs + 16 conv 512 1 x 1 / 1 32 x 32 x 128 -> 32 x 32 x 512 0.134 BFLOPs + 17 res 13 64 x 64 x 256 -> 32 x 32 x 512 + 18 conv 128 1 x 1 / 1 32 x 32 x 512 -> 32 x 32 x 128 0.134 BFLOPs + 19 conv 128 3 x 3 / 1 32 x 32 x 128 -> 32 x 32 x 128 0.302 BFLOPs + 20 conv 512 1 x 1 / 1 32 x 32 x 128 -> 32 x 32 x 512 0.134 BFLOPs + 21 res 17 32 x 32 x 512 -> 32 x 32 x 512 + 22 conv 128 1 x 1 / 1 32 x 32 x 512 -> 32 x 32 x 128 0.134 BFLOPs + 23 conv 128 3 x 3 / 1 32 x 32 x 128 -> 32 x 32 x 128 0.302 BFLOPs + 24 conv 512 1 x 1 / 1 32 x 32 x 128 -> 32 x 32 x 512 0.134 BFLOPs + 25 res 21 32 x 32 x 512 -> 32 x 32 x 512 + 26 conv 128 1 x 1 / 1 32 x 32 x 512 -> 32 x 32 x 128 0.134 BFLOPs + 27 conv 128 3 x 3 / 1 32 x 32 x 128 -> 32 x 32 x 128 0.302 BFLOPs + 28 conv 512 1 x 1 / 1 32 x 32 x 128 -> 32 x 32 x 512 0.134 BFLOPs + 29 res 25 32 x 32 x 512 -> 32 x 32 x 512 + 30 conv 256 1 x 1 / 1 32 x 32 x 512 -> 32 x 32 x 256 0.268 BFLOPs + 31 conv 256 3 x 3 / 2 32 x 32 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 32 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 33 res 29 32 x 32 x 512 -> 16 x 16 x1024 + 34 conv 256 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 256 0.134 BFLOPs + 35 conv 256 3 x 3 / 1 16 x 16 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 36 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 37 res 33 16 x 16 x1024 -> 16 x 16 x1024 + 38 conv 256 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 256 0.134 BFLOPs + 39 conv 256 3 x 3 / 1 16 x 16 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 40 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 41 res 37 16 x 16 x1024 -> 16 x 16 x1024 + 42 conv 256 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 256 0.134 BFLOPs + 43 conv 256 3 x 3 / 1 16 x 16 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 44 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 45 res 41 16 x 16 x1024 -> 16 x 16 x1024 + 46 conv 256 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 256 0.134 BFLOPs + 47 conv 256 3 x 3 / 1 16 x 16 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 48 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 49 res 45 16 x 16 x1024 -> 16 x 16 x1024 + 50 conv 256 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 256 0.134 BFLOPs + 51 conv 256 3 x 3 / 1 16 x 16 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 52 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 53 res 49 16 x 16 x1024 -> 16 x 16 x1024 + 54 conv 512 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 512 0.268 BFLOPs + 55 conv 512 3 x 3 / 2 16 x 16 x 512 -> 8 x 8 x 512 0.302 BFLOPs + 56 conv 2048 1 x 1 / 1 8 x 8 x 512 -> 8 x 8 x2048 0.134 BFLOPs + 57 res 53 16 x 16 x1024 -> 8 x 8 x2048 + 58 conv 512 1 x 1 / 1 8 x 8 x2048 -> 8 x 8 x 512 0.134 BFLOPs + 59 conv 512 3 x 3 / 1 8 x 8 x 512 -> 8 x 8 x 512 0.302 BFLOPs + 60 conv 2048 1 x 1 / 1 8 x 8 x 512 -> 8 x 8 x2048 0.134 BFLOPs + 61 res 57 8 x 8 x2048 -> 8 x 8 x2048 + 62 conv 512 1 x 1 / 1 8 x 8 x2048 -> 8 x 8 x 512 0.134 BFLOPs + 63 conv 512 3 x 3 / 1 8 x 8 x 512 -> 8 x 8 x 512 0.302 BFLOPs + 64 conv 2048 1 x 1 / 1 8 x 8 x 512 -> 8 x 8 x2048 0.134 BFLOPs + 65 res 61 8 x 8 x2048 -> 8 x 8 x2048 + 66 avg 8 x 8 x2048 -> 2048 + 67 conv 1000 1 x 1 / 1 1 x 1 x2048 -> 1 x 1 x1000 0.004 BFLOPs + 68 softmax 1000 +Loading weights from ../../../../../data/darknet/resnet50.weights...Done! +../data/dog.jpg: Predicted in 0.017422 seconds. +layer filters size input output + 0 conv 64 7 x 7 / 2 256 x 256 x 3 -> 128 x 128 x 64 0.308 BFLOPs + 1 max 2 x 2 / 2 128 x 128 x 64 -> 64 x 64 x 64 + 2 conv 64 1 x 1 / 1 64 x 64 x 64 -> 64 x 64 x 64 0.034 BFLOPs + 3 conv 64 3 x 3 / 1 64 x 64 x 64 -> 64 x 64 x 64 0.302 BFLOPs + 4 conv 256 1 x 1 / 1 64 x 64 x 64 -> 64 x 64 x 256 0.134 BFLOPs + 5 res 1 64 x 64 x 64 -> 64 x 64 x 256 + 6 conv 64 1 x 1 / 1 64 x 64 x 256 -> 64 x 64 x 64 0.134 BFLOPs + 7 conv 64 3 x 3 / 1 64 x 64 x 64 -> 64 x 64 x 64 0.302 BFLOPs + 8 conv 256 1 x 1 / 1 64 x 64 x 64 -> 64 x 64 x 256 0.134 BFLOPs + 9 res 5 64 x 64 x 256 -> 64 x 64 x 256 + 10 conv 64 1 x 1 / 1 64 x 64 x 256 -> 64 x 64 x 64 0.134 BFLOPs + 11 conv 64 3 x 3 / 1 64 x 64 x 64 -> 64 x 64 x 64 0.302 BFLOPs + 12 conv 256 1 x 1 / 1 64 x 64 x 64 -> 64 x 64 x 256 0.134 BFLOPs + 13 res 9 64 x 64 x 256 -> 64 x 64 x 256 + 14 conv 128 1 x 1 / 1 64 x 64 x 256 -> 64 x 64 x 128 0.268 BFLOPs + 15 conv 128 3 x 3 / 2 64 x 64 x 128 -> 32 x 32 x 128 0.302 BFLOPs + 16 conv 512 1 x 1 / 1 32 x 32 x 128 -> 32 x 32 x 512 0.134 BFLOPs + 17 res 13 64 x 64 x 256 -> 32 x 32 x 512 + 18 conv 128 1 x 1 / 1 32 x 32 x 512 -> 32 x 32 x 128 0.134 BFLOPs + 19 conv 128 3 x 3 / 1 32 x 32 x 128 -> 32 x 32 x 128 0.302 BFLOPs + 20 conv 512 1 x 1 / 1 32 x 32 x 128 -> 32 x 32 x 512 0.134 BFLOPs + 21 res 17 32 x 32 x 512 -> 32 x 32 x 512 + 22 conv 128 1 x 1 / 1 32 x 32 x 512 -> 32 x 32 x 128 0.134 BFLOPs + 23 conv 128 3 x 3 / 1 32 x 32 x 128 -> 32 x 32 x 128 0.302 BFLOPs + 24 conv 512 1 x 1 / 1 32 x 32 x 128 -> 32 x 32 x 512 0.134 BFLOPs + 25 res 21 32 x 32 x 512 -> 32 x 32 x 512 + 26 conv 128 1 x 1 / 1 32 x 32 x 512 -> 32 x 32 x 128 0.134 BFLOPs + 27 conv 128 3 x 3 / 1 32 x 32 x 128 -> 32 x 32 x 128 0.302 BFLOPs + 28 conv 512 1 x 1 / 1 32 x 32 x 128 -> 32 x 32 x 512 0.134 BFLOPs + 29 res 25 32 x 32 x 512 -> 32 x 32 x 512 + 30 conv 256 1 x 1 / 1 32 x 32 x 512 -> 32 x 32 x 256 0.268 BFLOPs + 31 conv 256 3 x 3 / 2 32 x 32 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 32 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 33 res 29 32 x 32 x 512 -> 16 x 16 x1024 + 34 conv 256 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 256 0.134 BFLOPs + 35 conv 256 3 x 3 / 1 16 x 16 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 36 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 37 res 33 16 x 16 x1024 -> 16 x 16 x1024 + 38 conv 256 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 256 0.134 BFLOPs + 39 conv 256 3 x 3 / 1 16 x 16 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 40 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 41 res 37 16 x 16 x1024 -> 16 x 16 x1024 + 42 conv 256 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 256 0.134 BFLOPs + 43 conv 256 3 x 3 / 1 16 x 16 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 44 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 45 res 41 16 x 16 x1024 -> 16 x 16 x1024 + 46 conv 256 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 256 0.134 BFLOPs + 47 conv 256 3 x 3 / 1 16 x 16 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 48 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 49 res 45 16 x 16 x1024 -> 16 x 16 x1024 + 50 conv 256 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 256 0.134 BFLOPs + 51 conv 256 3 x 3 / 1 16 x 16 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 52 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 53 res 49 16 x 16 x1024 -> 16 x 16 x1024 + 54 conv 512 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 512 0.268 BFLOPs + 55 conv 512 3 x 3 / 2 16 x 16 x 512 -> 8 x 8 x 512 0.302 BFLOPs + 56 conv 2048 1 x 1 / 1 8 x 8 x 512 -> 8 x 8 x2048 0.134 BFLOPs + 57 res 53 16 x 16 x1024 -> 8 x 8 x2048 + 58 conv 512 1 x 1 / 1 8 x 8 x2048 -> 8 x 8 x 512 0.134 BFLOPs + 59 conv 512 3 x 3 / 1 8 x 8 x 512 -> 8 x 8 x 512 0.302 BFLOPs + 60 conv 2048 1 x 1 / 1 8 x 8 x 512 -> 8 x 8 x2048 0.134 BFLOPs + 61 res 57 8 x 8 x2048 -> 8 x 8 x2048 + 62 conv 512 1 x 1 / 1 8 x 8 x2048 -> 8 x 8 x 512 0.134 BFLOPs + 63 conv 512 3 x 3 / 1 8 x 8 x 512 -> 8 x 8 x 512 0.302 BFLOPs + 64 conv 2048 1 x 1 / 1 8 x 8 x 512 -> 8 x 8 x2048 0.134 BFLOPs + 65 res 61 8 x 8 x2048 -> 8 x 8 x2048 + 66 avg 8 x 8 x2048 -> 2048 + 67 conv 1000 1 x 1 / 1 1 x 1 x2048 -> 1 x 1 x1000 0.004 BFLOPs + 68 softmax 1000 +Loading weights from ../../../../../data/darknet/resnet50.weights...Done! +layer filters size input output + 0 conv 64 7 x 7 / 2 256 x 256 x 3 -> 128 x 128 x 64 0.308 BFLOPs + 1 max 2 x 2 / 2 128 x 128 x 64 -> 64 x 64 x 64 + 2 conv 64 1 x 1 / 1 64 x 64 x 64 -> 64 x 64 x 64 0.034 BFLOPs + 3 conv 64 3 x 3 / 1 64 x 64 x 64 -> 64 x 64 x 64 0.302 BFLOPs + 4 conv 256 1 x 1 / 1 64 x 64 x 64 -> 64 x 64 x 256 0.134 BFLOPs + 5 res 1 64 x 64 x 64 -> 64 x 64 x 256 + 6 conv 64 1 x 1 / 1 64 x 64 x 256 -> 64 x 64 x 64 0.134 BFLOPs + 7 conv 64 3 x 3 / 1 64 x 64 x 64 -> 64 x 64 x 64 0.302 BFLOPs + 8 conv 256 1 x 1 / 1 64 x 64 x 64 -> 64 x 64 x 256 0.134 BFLOPs + 9 res 5 64 x 64 x 256 -> 64 x 64 x 256 + 10 conv 64 1 x 1 / 1 64 x 64 x 256 -> 64 x 64 x 64 0.134 BFLOPs + 11 conv 64 3 x 3 / 1 64 x 64 x 64 -> 64 x 64 x 64 0.302 BFLOPs + 12 conv 256 1 x 1 / 1 64 x 64 x 64 -> 64 x 64 x 256 0.134 BFLOPs + 13 res 9 64 x 64 x 256 -> 64 x 64 x 256 + 14 conv 128 1 x 1 / 1 64 x 64 x 256 -> 64 x 64 x 128 0.268 BFLOPs + 15 conv 128 3 x 3 / 2 64 x 64 x 128 -> 32 x 32 x 128 0.302 BFLOPs + 16 conv 512 1 x 1 / 1 32 x 32 x 128 -> 32 x 32 x 512 0.134 BFLOPs + 17 res 13 64 x 64 x 256 -> 32 x 32 x 512 + 18 conv 128 1 x 1 / 1 32 x 32 x 512 -> 32 x 32 x 128 0.134 BFLOPs + 19 conv 128 3 x 3 / 1 32 x 32 x 128 -> 32 x 32 x 128 0.302 BFLOPs + 20 conv 512 1 x 1 / 1 32 x 32 x 128 -> 32 x 32 x 512 0.134 BFLOPs + 21 res 17 32 x 32 x 512 -> 32 x 32 x 512 + 22 conv 128 1 x 1 / 1 32 x 32 x 512 -> 32 x 32 x 128 0.134 BFLOPs + 23 conv 128 3 x 3 / 1 32 x 32 x 128 -> 32 x 32 x 128 0.302 BFLOPs + 24 conv 512 1 x 1 / 1 32 x 32 x 128 -> 32 x 32 x 512 0.134 BFLOPs + 25 res 21 32 x 32 x 512 -> 32 x 32 x 512 + 26 conv 128 1 x 1 / 1 32 x 32 x 512 -> 32 x 32 x 128 0.134 BFLOPs + 27 conv 128 3 x 3 / 1 32 x 32 x 128 -> 32 x 32 x 128 0.302 BFLOPs + 28 conv 512 1 x 1 / 1 32 x 32 x 128 -> 32 x 32 x 512 0.134 BFLOPs + 29 res 25 32 x 32 x 512 -> 32 x 32 x 512 + 30 conv 256 1 x 1 / 1 32 x 32 x 512 -> 32 x 32 x 256 0.268 BFLOPs + 31 conv 256 3 x 3 / 2 32 x 32 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 32 conv 1024 1 x 1 / 1 16 x 16 x 256 -> 16 x 16 x1024 0.134 BFLOPs + 33 res 29 32 x 32 x 512 -> 16 x 16 x1024 + 34 conv 256 1 x 1 / 1 16 x 16 x1024 -> 16 x 16 x 256 0.134 BFLOPs + 35 conv 256 3 x 3 / 1 16 x 16 x 256 -> 16 x 16 x 256 0.302 BFLOPs + 36 Traceback (most recent call last): + File "run_real_all.py", line 646, in + main() + File "run_real_all.py", line 638, in main + result_dict = process_results(workload_dict, iterations) + File "run_real_all.py", line 187, in process_results + result_dict[para][workload][config].append(process_file(log_file, config)) + File "run_real_all.py", line 155, in process_file + result_dict['allocation'] += int(words[3]) +IndexError: list index out of range diff --git a/workloads/realworld/output.xlsx b/workloads/realworld/output.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e59cb694ab0696b75ef856cfd5bbdcdb160432e6 Binary files /dev/null and b/workloads/realworld/output.xlsx differ diff --git a/workloads/realworld/output_all.xlsx b/workloads/realworld/output_all.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..2c78d48d9d0f6d6f136564671df15a7c06f611ff Binary files /dev/null and b/workloads/realworld/output_all.xlsx differ diff --git a/workloads/realworld/output_std.csv b/workloads/realworld/output_std.csv new file mode 100644 index 0000000000000000000000000000000000000000..c148771cb3c1493a1d9ad6ba9e9fabe3a08c3a5e --- /dev/null +++ b/workloads/realworld/output_std.csv @@ -0,0 +1,3 @@ +group,super +hotspot,0.0 +Geo-mean,0.0 diff --git a/workloads/realworld/pinned/BN/.clang-format b/workloads/realworld/pinned/BN/.clang-format new file mode 100644 index 0000000000000000000000000000000000000000..3a5940ef65bf1e40df9511da805a7a0440184e84 --- /dev/null +++ b/workloads/realworld/pinned/BN/.clang-format @@ -0,0 +1,90 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: false +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: false +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IndentCaseLabels: false +IndentWidth: 2 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 8 +UseTab: Never +... + diff --git a/workloads/realworld/pinned/BN/LICENSE b/workloads/realworld/pinned/BN/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/workloads/realworld/pinned/BN/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/workloads/realworld/pinned/BN/Makefile b/workloads/realworld/pinned/BN/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..864b8e45401b0fe12162389c2e9d33fa86f4fc9f --- /dev/null +++ b/workloads/realworld/pinned/BN/Makefile @@ -0,0 +1,169 @@ +################################################################################ +# +# Copyright 1993-2013 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Makefile project only supported on Mac OS X and Linux Platforms) +# +################################################################################ + +include ../../../common/make.config +include ./findcudalib.mk + +# Location of the CUDA Toolkit +CUDA_PATH ?= $(CUDA_DIR) + +# internal flags +NVCCFLAGS := -m${OS_SIZE} +CCFLAGS := -Wno-narrowing +NVCCLDFLAGS := +LDFLAGS := + +# Extra user flags +EXTRA_NVCCFLAGS ?= +EXTRA_NVCCLDFLAGS ?= +EXTRA_LDFLAGS ?= +EXTRA_CCFLAGS ?= + +# OS-specific build flags +ifneq ($(DARWIN),) + LDFLAGS += -rpath $(CUDA_PATH)/lib + CCFLAGS += -arch $(OS_ARCH) $(STDLIB) +else + ifeq ($(OS_ARCH),armv7l) + ifeq ($(abi),gnueabi) + CCFLAGS += -mfloat-abi=softfp + else + # default to gnueabihf + override abi := gnueabihf + LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3 + CCFLAGS += -mfloat-abi=hard + endif + endif +endif + +ifeq ($(ARMv7),1) +NVCCFLAGS += -target-cpu-arch ARM +ifneq ($(TARGET_FS),) +CCFLAGS += --sysroot=$(TARGET_FS) +LDFLAGS += --sysroot=$(TARGET_FS) +LDFLAGS += -rpath-link=$(TARGET_FS)/lib +LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib +LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-$(abi) +endif +endif + +# Debug build flags +ifeq ($(dbg),1) + NVCCFLAGS += -g -G + TARGET := debug +else + TARGET := release +endif + +ALL_CCFLAGS := +ALL_CCFLAGS += $(NVCCFLAGS) +ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS)) +ALL_CCFLAGS += $(EXTRA_NVCCFLAGS) +ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS)) + +ALL_LDFLAGS := +ALL_LDFLAGS += $(ALL_CCFLAGS) +ALL_LDFLAGS += $(NVCCLDFLAGS) +ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS)) +ALL_LDFLAGS += $(EXTRA_NVCCLDFLAGS) +ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS)) + +# Common includes and paths for CUDA +INCLUDES := -I../../common/inc -I$(INCLUDE) -I$(CUPTI_INCLUDE) +LIBRARIES := -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +################################################################################ + +# CUDA code generation flags +ifneq ($(OS_ARCH),armv7l) +GENCODE_SM10 := -gencode arch=compute_10,code=sm_10 +endif +GENCODE_SM20 := -gencode arch=compute_20,code=sm_20 +GENCODE_SM30 := -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=\"sm_35,compute_35\" +GENCODE_SM80 := -gencode arch=compute_80,code=sm_80 +GENCODE_FLAGS := $(GENCODE_SM80) + +################################################################################ + +# Target rules +all: build + +build: ordergraph_30 ordergraph_40 ordergraph_45 ordergraph_50 + +# ordergraph_25.o: ordergraph.cu ordergraph_kernel.cu +# $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_25 -o $@ -c $< + +ordergraph_30.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_30 -o $@ -c $< + +ordergraph_40.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_40 -o $@ -c $< + +ordergraph_45.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_45 -o $@ -c $< + +ordergraph_50.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_50 -o $@ -c $< + +# ordergraph_125.o: ordergraph.cu ordergraph_kernel.cu +# $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_125 -o $@ -c $< + + + +# ordergraph_25: ordergraph_25.o +# $(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES) + +ordergraph_30: ordergraph_30.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_40: ordergraph_40.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_45: ordergraph_45.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_50: ordergraph_50.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +# ordergraph_125: ordergraph_125.o +# $(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES) + +run: build + ./ordergraph + +clean: + rm -f ordergraph_30 ordergraph_40 ordergraph_45 ordergraph_50 *.o *.bin *.out + +clobber: clean diff --git a/workloads/realworld/pinned/BN/README.md b/workloads/realworld/pinned/BN/README.md new file mode 100644 index 0000000000000000000000000000000000000000..07158a0bd52af63032c860ff04e243e0d7c76ef1 --- /dev/null +++ b/workloads/realworld/pinned/BN/README.md @@ -0,0 +1,21 @@ +The code works with CUDA 6.0. +If you are using this code for your project, please cite [our paper](https://yuemmawang.github.io/publications/wang-tpds2016.pdf): + +``` +Wang Y, Qian W, Zhang S, et al. A Learning Algorithm for Bayesian Networks and +Its Efficient Implementation on GPUs[J]. Parallel and Distributed Systems, IEEE +Transactions on, 2016, 27(1): 17-30. +``` + +``` +@article{wang2015learning, + title={A learning algorithm for Bayesian networks and its efficient implementation on GPUs}, + author={Wang, Yu and Qian, Weikang and Zhang, Shuchang and Liang, Xiaoyao and Yuan, Bo}, + journal={IEEE Transactions on Parallel and Distributed Systems}, + volume={27}, + number={1}, + pages={17--30}, + year={2015}, + publisher={IEEE} +} +``` diff --git a/workloads/realworld/pinned/BN/data125.cu b/workloads/realworld/pinned/BN/data125.cu new file mode 100644 index 0000000000000000000000000000000000000000..6bb370a636a330992e083f0b52f1f67a9a86040e --- /dev/null +++ b/workloads/realworld/pinned/BN/data125.cu @@ -0,0 +1,610 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=25; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,1, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,0,1,0, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,0,1,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,0,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,0,1,0,0,0,1, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,1,0,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,0,0,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,0,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,0, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,0,0,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,1,0, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,1,1,0,0,0,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,0,0,1,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,1,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,1,1, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,1,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,0,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,1,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,1,0, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,1,1,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,1,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,1,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,0,1,0,1,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,1, +} +#endif diff --git a/workloads/realworld/pinned/BN/data25.cu b/workloads/realworld/pinned/BN/data25.cu new file mode 100644 index 0000000000000000000000000000000000000000..6af94f79766c6e36ee121f6c537f987f841bf7c0 --- /dev/null +++ b/workloads/realworld/pinned/BN/data25.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=25; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +} + +#endif + diff --git a/workloads/realworld/pinned/BN/data30.cu b/workloads/realworld/pinned/BN/data30.cu new file mode 100644 index 0000000000000000000000000000000000000000..bf89729e319e920533f9d134c8a2dff9aa4bc022 --- /dev/null +++ b/workloads/realworld/pinned/BN/data30.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=30; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +} + +#endif + diff --git a/workloads/realworld/pinned/BN/data40.cu b/workloads/realworld/pinned/BN/data40.cu new file mode 100644 index 0000000000000000000000000000000000000000..16d34d9dc4860d1dd24d604e501bccb43cae8095 --- /dev/null +++ b/workloads/realworld/pinned/BN/data40.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=40; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1, +} + +#endif + diff --git a/workloads/realworld/pinned/BN/data45.cu b/workloads/realworld/pinned/BN/data45.cu new file mode 100644 index 0000000000000000000000000000000000000000..b23e9a35e7c27948c0853710a06be462694df57d --- /dev/null +++ b/workloads/realworld/pinned/BN/data45.cu @@ -0,0 +1,616 @@ +// The data are synthesized. +#include +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=45; +const int STATE_N=2; +const int DATA_N=600; + + + +int data[DATA_N*NODE_N]= { +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0, +} + + +#endif + diff --git a/workloads/realworld/pinned/BN/data50.cu b/workloads/realworld/pinned/BN/data50.cu new file mode 100644 index 0000000000000000000000000000000000000000..936a7aa4a67a1f949f1264477388a7eb5a93a1b4 --- /dev/null +++ b/workloads/realworld/pinned/BN/data50.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=50; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,0,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,1,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,0,0,1,1,1, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,1,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,1,0,0,0,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,1,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,0,1,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,0,1,0,0,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0, +} + +#endif + diff --git a/workloads/realworld/pinned/BN/file_process.py b/workloads/realworld/pinned/BN/file_process.py new file mode 100644 index 0000000000000000000000000000000000000000..eeebbee70e59153ca0f1a960f4e8ffa0437693c3 --- /dev/null +++ b/workloads/realworld/pinned/BN/file_process.py @@ -0,0 +1,15 @@ +import os +filename = "data125.cu" +file_write = "data45.cu" + +f_w = open(file_write,"w") +with open(filename) as f: + content = f.readlines() + + for i, line in enumerate(content): + if i < 8: + f_w.write(line) + elif i >= 8: + f_w.write(line[0:90]) + f_w.write("\n") +f_w.close() \ No newline at end of file diff --git a/workloads/realworld/pinned/BN/findcudalib.mk b/workloads/realworld/pinned/BN/findcudalib.mk new file mode 100644 index 0000000000000000000000000000000000000000..f40c2c38e5510fdee2fdf59df00160b547c056c1 --- /dev/null +++ b/workloads/realworld/pinned/BN/findcudalib.mk @@ -0,0 +1,226 @@ +################################################################################ +# +# Copyright 1993-2013 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# findcudalib.mk is used to find the locations for CUDA libraries and other +# Unix Platforms. This is supported Mac OS X and Linux. +# +################################################################################ + +# OS Name (Linux or Darwin) +OSUPPER = $(shell uname -s 2>/dev/null | tr "[:lower:]" "[:upper:]") +OSLOWER = $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]") + +# Flags to detect 32-bit or 64-bit OS platform +OS_SIZE = $(shell uname -m | sed -e "s/i.86/32/" -e "s/x86_64/64/" -e "s/armv7l/32/") +OS_ARCH = $(shell uname -m | sed -e "s/i386/i686/") + +# Determine OS platform and unix distribution +ifeq ("$(OSLOWER)","linux") + # first search lsb_release + DISTRO = $(shell lsb_release -i -s 2>/dev/null | tr "[:upper:]" "[:lower:]") + DISTVER = $(shell lsb_release -r -s 2>/dev/null) + ifeq ("$(DISTRO)",'') + # second search and parse /etc/issue + DISTRO = $(shell more /etc/issue | awk '{print $$1}' | sed '1!d' | sed -e "/^$$/d" 2>/dev/null | tr "[:upper:]" "[:lower:]") + DISTVER= $(shell more /etc/issue | awk '{print $$2}' | sed '1!d' 2>/dev/null + endif + ifeq ("$(DISTRO)",'') + # third, we can search in /etc/os-release or /etc/{distro}-release + DISTRO = $(shell awk '/ID/' /etc/*-release | sed 's/ID=//' | grep -v "VERSION" | grep -v "ID" | grep -v "DISTRIB") + DISTVER= $(shell awk '/DISTRIB_RELEASE/' /etc/*-release | sed 's/DISTRIB_RELEASE=//' | grep -v "DISTRIB_RELEASE") + endif +endif + +# search at Darwin (unix based info) +DARWIN = $(strip $(findstring DARWIN, $(OSUPPER))) +ifneq ($(DARWIN),) + SNOWLEOPARD = $(strip $(findstring 10.6, $(shell egrep "10\.6" /System/Library/CoreServices/SystemVersion.plist))) + LION = $(strip $(findstring 10.7, $(shell egrep "10\.7" /System/Library/CoreServices/SystemVersion.plist))) + MOUNTAIN = $(strip $(findstring 10.8, $(shell egrep "10\.8" /System/Library/CoreServices/SystemVersion.plist))) + MAVERICKS = $(strip $(findstring 10.9, $(shell egrep "10\.9" /System/Library/CoreServices/SystemVersion.plist))) +endif + +# Common binaries +GCC ?= g++ +CLANG ?= /usr/bin/clang + +ifeq ("$(OSUPPER)","LINUX") + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(GCC) +else + # for some newer versions of XCode, CLANG is the default compiler, so we need to include this + ifneq ($(MAVERICKS),) + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(CLANG) + STDLIB ?= -stdlib=libstdc++ + else + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(GCC) + endif +endif + +# Take command line flags that override any of these settings +ifeq ($(i386),1) + OS_SIZE = 32 + OS_ARCH = i686 +endif +ifeq ($(x86_64),1) + OS_SIZE = 64 + OS_ARCH = x86_64 +endif +ifeq ($(ARMv7),1) + OS_SIZE = 32 + OS_ARCH = armv7l +endif + +ifeq ("$(OSUPPER)","LINUX") + # Each Linux Distribuion has a set of different paths. This applies especially when using the Linux RPM/debian packages + ifeq ("$(DISTRO)","ubuntu") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","kubuntu") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","debian") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","suse") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","suse linux") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","opensuse") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","fedora") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","redhat") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","red") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","redhatenterpriseworkstation") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH ?= /usr/lib + endif + endif + ifeq ("$(DISTRO)","centos") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + + ifeq ($(ARMv7),1) + CUDAPATH := /usr/arm-linux-gnueabihf/lib + CUDALINK := -L/usr/arm-linux-gnueabihf/lib + ifneq ($(TARGET_FS),) + CUDAPATH += $(TARGET_FS)/usr/lib/nvidia-current + CUDALINK += -L$(TARGET_FS)/usr/lib/nvidia-current + endif + endif + + # Search for Linux distribution path for libcuda.so + CUDALIB ?= $(shell find $(CUDAPATH) $(DFLT_PATH) -name libcuda.so -print 2>/dev/null) + + ifeq ("$(CUDALIB)",'') + $(info >>> WARNING - CUDA Driver libcuda.so is not found. Please check and re-install the NVIDIA driver. <<<) + EXEC=@echo "[@]" + endif +else + # This would be the Mac OS X path if we had to do anything special +endif + diff --git a/workloads/realworld/pinned/BN/ordergraph.cu b/workloads/realworld/pinned/BN/ordergraph.cu new file mode 100644 index 0000000000000000000000000000000000000000..d96a0b85460d0648a4e632a86b1c6e1aab15633b --- /dev/null +++ b/workloads/realworld/pinned/BN/ordergraph.cu @@ -0,0 +1,733 @@ +#include +#include +#include +#include +#include +//#include +// includes CUDA +#include +// includes, kernels +#include "ordergraph_kernel.cu" +; + +#include "../../../common/cpu_timestamps.h" +#include "../../../common/cupti_add.h" + +const int HIGHEST = 3; +int taskperthr = 1; +int sizepernode; +int ITER = 100; + +// global var +float preScore = -99999999999.0f; +float score = 0.0; +float maxScore[HIGHEST] = {-999999999.0f}; +bool orders[NODE_N][NODE_N]; +bool preOrders[NODE_N][NODE_N]; +bool preGraph[NODE_N][NODE_N]; +bool bestGraph[HIGHEST][NODE_N][NODE_N]; +bool graph[NODE_N][NODE_N]; +// float prior[NODE_N][NODE_N]; +float *localscore, *D_localscore, *D_Score, *scores; +float *LG; +bool *D_parent; +int *D_resP, *parents; + +void initial(); // initial orders and data +int genOrders(); // swap +int ConCore(); // discard new order or not +bool getparent(int *bit, int *pre, int posN, int *parent, int *parN, + int time); // get every possible set of parents for a node +void incr(int *bit, int n); // binary code increases 1 each time +void incrS(int *bit, int n); // STATE_N code increases 1 each time +bool getState( + int parN, int *state, + int time); // get every possible combination of state for a parent set +float logGamma(int N); // log and gamma +float findBestGraph(); +void genScore(); +int convert(int *parent, int parN); +void sortGraph(); +void swap(int a, int b); +void Pre_logGamma(); +int findindex(int *arr, int size); +int C(int n, int a); + +FILE *fpout; + +int main(int argc, char *argv[]) { + /* + for(i=0;i maxScore[HIGHEST - 1]) { + maxScore[HIGHEST - 1] = preScore; + for (a = 0; a < NODE_N; a++) { + for (b = 0; b < NODE_N; b++) { + bestGraph[HIGHEST - 1][a][b] = preGraph[a][b]; + } + } + b = HIGHEST - 1; + for (a = HIGHEST - 2; a >= 0; a--) { + if (maxScore[b] > maxScore[a]) { + swap(a, b); + tmpd = maxScore[a]; + maxScore[a] = maxScore[b]; + maxScore[b] = tmpd; + b = a; + } + } + } + } + + } // endwhile + + cudaFreeHost(localscore); + cudaFree(D_localscore); + cudaFree(D_parent); + + cudaFreeHost(scores); + cudaFreeHost(parents); + cudaFree(D_Score); + cudaFree(D_resP); + + /* + for(j=0;j max) { + max = maxScore[i]; + maxi = i; + } + } + + swap(j, maxi); + tmp = maxScore[j]; + maxScore[j] = max; + maxScore[maxi] = tmp; + } +} + +void swap(int a, int b) { + int i, j; + bool tmp; + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + + tmp = bestGraph[a][i][j]; + bestGraph[a][i][j] = bestGraph[b][i][j]; + bestGraph[b][i][j] = tmp; + } + } +} + +void initial() { + int i, j, tmp, a, b, r; + bool tmpd; + tmp = 1; + for (i = 1; i <= 4; i++) { + tmp += C(NODE_N - 1, i); + } + sizepernode = tmp; + tmp *= NODE_N; + + cudaMallocHost((void **)&localscore, tmp * sizeof(float)); + + for (i = 0; i < tmp; i++) + localscore[i] = 0; + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) + orders[i][j] = 0; + } + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < i; j++) + orders[i][j] = 1; + } + r = rand() % 10000; + for (i = 0; i < r; i++) { + a = rand() % NODE_N; + b = rand() % NODE_N; + for (j = 0; j < NODE_N; j++) { + tmpd = orders[j][a]; + orders[j][a] = orders[j][b]; + orders[j][b] = tmpd; + } + + for (j = 0; j < NODE_N; j++) { + tmpd = orders[a][j]; + orders[a][j] = orders[b][j]; + orders[b][j] = tmpd; + } + } + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + preOrders[i][j] = orders[i][j]; + } + } +} + +// generate ramdom order +int genOrders() { + + int a, b, j; + bool tmp; + a = rand() % NODE_N; + b = rand() % NODE_N; + + for (j = 0; j < NODE_N; j++) { + tmp = orders[a][j]; + orders[a][j] = orders[b][j]; + orders[b][j] = tmp; + } + for (j = 0; j < NODE_N; j++) { + tmp = orders[j][a]; + orders[j][a] = orders[j][b]; + orders[j][b] = tmp; + } + + return 1; +} + +// decide leave or discard an order +int ConCore() { + int i, j; + float tmp; + tmp = log((rand() % 100000) / 100000.0); + if (tmp < (score - preScore)) { + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + preOrders[i][j] = orders[i][j]; + preGraph[i][j] = graph[i][j]; + } + } + preScore = score; + + return 1; + } + + return 0; +} + +void genScore() { + int *D_data; + float *D_LG; + dim3 grid(sizepernode / 256 + 1, 1, 1); + dim3 threads(256, 1, 1); + + Pre_logGamma(); + // cudaPrintfInit(); + cudaMalloc((void **)&D_data, NODE_N * DATA_N * sizeof(int)); + cudaMalloc((void **)&D_localscore, NODE_N * sizepernode * sizeof(float)); + cudaMalloc((void **)&D_LG, (DATA_N + 2) * sizeof(float)); + cudaMemset(D_localscore, 0.0, NODE_N * sizepernode * sizeof(float)); + cudaMemcpy(D_data, data, NODE_N * DATA_N * sizeof(int), + cudaMemcpyHostToDevice); + cudaMemcpy(D_LG, LG, (DATA_N + 2) * sizeof(float), cudaMemcpyHostToDevice); + genScoreKernel<<>>(sizepernode, D_localscore, D_data, D_LG); + cudaDeviceSynchronize(); + cudaMemcpy(localscore, D_localscore, NODE_N * sizepernode * sizeof(float), + cudaMemcpyDeviceToHost); + + // cudaPrintfDisplay(stdout, true); + // cudaPrintfEnd(); + + cudaFreeHost(LG); + cudaFree(D_LG); + cudaFree(D_data); + + cudaMallocHost((void **)&scores, + (sizepernode / (256 * taskperthr) + 1) * sizeof(float)); + cudaMallocHost((void **)&parents, + (sizepernode / (256 * taskperthr) + 1) * 4 * sizeof(int)); + cudaMalloc((void **)&D_Score, + (sizepernode / (256 * taskperthr) + 1) * sizeof(float)); + cudaMalloc((void **)&D_parent, NODE_N * sizeof(bool)); + cudaMalloc((void **)&D_resP, + (sizepernode / (256 * taskperthr) + 1) * 4 * sizeof(int)); +} + +int convert(int *parent, int parN) { + int i, j, w = 1, tmp = 0; + j = 0; + for (i = 0; parN > 0 && i <= parent[parN - 1]; i++) { + if (parent[j] == i) { + j++; + tmp += w; + } + w *= 2; + } + + return tmp; +} + +void Pre_logGamma() { + + cudaMallocHost((void **)&LG, (DATA_N + 2) * sizeof(float)); + + LG[1] = log(1.0); + float i; + for (i = 2; i <= DATA_N + 1; i++) { + LG[(int)i] = LG[(int)i - 1] + log((float)i); + } +} + +void incr(int *bit, int n) { + + bit[n]++; + if (bit[n] >= 2) { + bit[n] = 0; + incr(bit, n + 1); + } + + return; +} + +void incrS(int *bit, int n) { + + bit[n]++; + if (bit[n] >= STATE_N) { + bit[n] = 0; + incr(bit, n + 1); + } + + return; +} + +bool getState(int parN, int *state, int time) { + int j = 1; + + j = pow(STATE_N, (float)parN) - 1; + + if (time > j) + return false; + + if (time >= 1) + incrS(state, 0); + + return true; +} + +bool getparent(int *bit, int *pre, int posN, int *parent, int *parN, int time) { + int i, j = 1; + + *parN = 0; + if (time == 0) + return true; + + for (i = 0; i < posN; i++) { + j = j * 2; + } + j--; + + if (time > j) + return false; + + incr(bit, 0); + + for (i = 0; i < posN; i++) { + if (bit[i] == 1) { + parent[(*parN)++] = pre[i]; + } + } + + return true; +} + +float findBestGraph() { + float bestls = -99999999; + int bestparent[5]; + int bestpN, total; + int node, index; + int pre[NODE_N] = {0}; + int parent[NODE_N] = {0}; + int posN = 0, i, j, parN, tmp, k, l; + float ls = -99999999999, score = 0; + int blocknum; + + for (i = 0; i < NODE_N; i++) + for (j = 0; j < NODE_N; j++) + graph[i][j] = 0; + + for (node = 0; node < NODE_N; node++) { + + bestls = -99999999; + posN = 0; + + for (i = 0; i < NODE_N; i++) { + if (orders[node][i] == 1) { + pre[posN++] = i; + } + } + + if (posN >= 0) { + total = C(posN, 4) + C(posN, 3) + C(posN, 2) + posN + 1; + taskperthr = 1; + blocknum = total / (256 * taskperthr) + 1; + + int nbatches = MIN_NBATCHES; + + int blocknum_max = total / (BLOCK_SIZE * MIN_NBATCHES * taskperthr) + 1; + if (blocknum_max >= MAX_NBLOCKS) { + blocknum = MAX_NBLOCKS; + nbatches = (total + 1) / (BLOCK_SIZE * MAX_NBLOCKS * taskperthr); + } else { + blocknum = blocknum_max; + } + + cudaMemset(D_resP, 0, blocknum * 4 * sizeof(int)); + cudaMemset(D_Score, -999999.0, blocknum * nbatches * sizeof(float)); + cudaMemcpy(D_parent, orders[node], NODE_N * sizeof(bool), + cudaMemcpyHostToDevice); + + computeKernel<<>>( + taskperthr, sizepernode, D_localscore, D_parent, node, total, D_Score, + D_resP, nbatches); + cudaDeviceSynchronize(); + cudaMemcpy(parents, D_resP, blocknum * 4 * sizeof(int), + cudaMemcpyDeviceToHost); + cudaMemcpy(scores, D_Score, blocknum * sizeof(float), + cudaMemcpyDeviceToHost); + + for (i = 0; i < blocknum * nbatches; i++) { + + if (scores[i] > bestls) { + + bestls = scores[i]; + + parN = 0; + for (tmp = 0; tmp < 4; tmp++) { + if (parents[i * 4 + tmp] < 0) + break; + + bestparent[tmp] = parents[i * 4 + tmp]; + + parN++; + } + + bestpN = parN; + } + } + } else { + if (posN >= 4) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + for (k = j + 1; k < posN; k++) { + for (l = k + 1; l < posN; l++) { + parN = 4; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + if (pre[k] > node) + parent[3] = pre[k]; + else + parent[3] = pre[k] + 1; + if (pre[l] > node) + parent[4] = pre[l]; + else + parent[4] = pre[l] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + } + } + + if (posN >= 3) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + for (k = j + 1; k < posN; k++) { + + parN = 3; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + if (pre[k] > node) + parent[3] = pre[k]; + else + parent[3] = pre[k] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + } + + if (posN >= 2) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + + parN = 2; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + + if (posN >= 1) { + for (i = 0; i < posN; i++) { + + parN = 1; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + + parN = 0; + index = sizepernode * node; + + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = 0; + } + } + if (bestls > -99999999) { + + for (i = 0; i < bestpN; i++) { + if (bestparent[i] < node) + graph[node][bestparent[i] - 1] = 1; + else + graph[node][bestparent[i]] = 1; + } + score += bestls; + } + } + + return score; +} + +int findindex(int *arr, int size) { // reminder: arr[0] has to be 0 && size == + // array size-1 && index start from 0 + int i, j, index = 0; + + for (i = 1; i < size; i++) { + index += C(NODE_N - 1, i); + } + + for (i = 1; i <= size - 1; i++) { + for (j = arr[i - 1] + 1; j <= arr[i] - 1; j++) { + index += C(NODE_N - 1 - j, size - i); + } + } + + index += arr[size] - arr[size - 1]; + + return index; +} + +int C(int n, int a) { + int i, res = 1, atmp = a; + + for (i = 0; i < atmp; i++) { + res *= n; + n--; + } + + for (i = 0; i < atmp; i++) { + res /= a; + a--; + } + + return res; +} \ No newline at end of file diff --git a/workloads/realworld/pinned/BN/ordergraph_kernel.cu b/workloads/realworld/pinned/BN/ordergraph_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..a331f73e3dec8f82f5a499023a249761c405dcb1 --- /dev/null +++ b/workloads/realworld/pinned/BN/ordergraph_kernel.cu @@ -0,0 +1,350 @@ +#ifndef _ORDERGRAPH_KERNEL_H_ +#define _ORDERGRAPH_KERNEL_H_ + +#include + +#ifdef DATA_25 +#include "data25.cu" +#endif +#ifdef DATA_30 +#include "data30.cu" +#endif +#ifdef DATA_40 +#include "data40.cu" +#endif +#ifdef DATA_45 +#include "data45.cu" +#endif +#ifdef DATA_50 +#include "data50.cu" +#endif +#ifdef DATA_125 +#include "data125.cu" +#endif +; + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define BLOCK_SIZE 256 +#define MAX_NBLOCKS 1024 +#define MIN_NBATCHES 16 + +__device__ void Dincr(int *bit, int n); +__device__ void DincrS(int *bit, int n); +__device__ bool D_getState(int parN, int *sta, int time); +__device__ void D_findComb(int *comb, int l, int n); +__device__ int D_findindex(int *arr, int size); +__device__ int D_C(int n, int a); + +__global__ void genScoreKernel(int sizepernode, float *D_localscore, + int *D_data, float *D_LG) { + int id = blockIdx.x * BLOCK_SIZE + threadIdx.x; + int node, index; + bool flag; + int parent[5] = {0}; + int pre[NODE_N] = {0}; + int state[5] = {0}; + int i, j, parN = 0, tmp, t; + int t1 = 0, t2 = 0; + float ls = 0; + int Nij[STATE_N] = {0}; + + if (id < sizepernode) { + + D_findComb(parent, id, NODE_N - 1); + + for (i = 0; i < 4; i++) { + if (parent[i] > 0) + parN++; + } + + for (node = 0; node < NODE_N; node++) { + + j = 1; + for (i = 0; i < NODE_N; i++) { + if (i != node) + pre[j++] = i; + } + + for (tmp = 0; tmp < parN; tmp++) + state[tmp] = 0; + + index = sizepernode * node + id; + + // priors + /* + for(tmp=1;tmp<=4;tmp++){ + localscore[index]+=100*(prior[node][pre[parent[tmp]]]-0.5)*(prior[node][pre[parent[tmp]]]-0.5)*(prior[node][pre[parent[tmp]]]-0.5); + } + */ + t = 0; + while (D_getState(parN, state, t++)) { // for get state + // printf("test %u\n",id); + ls = 0; + for (tmp = 0; tmp < STATE_N; tmp++) + Nij[tmp] = 0; + + for (t1 = 0; t1 < DATA_N; t1++) { + flag = true; + for (t2 = 0; t2 < parN; t2++) { + if (D_data[t1 * NODE_N + pre[parent[t2]]] != state[t2]) { + flag = false; + break; + } + } + if (!flag) + continue; + + Nij[D_data[t1 * NODE_N + node]]++; + } + + tmp = STATE_N - 1; + + for (t1 = 0; t1 < STATE_N; t1++) { + ls += D_LG[Nij[t1]]; + tmp += Nij[t1]; + } + + ls -= D_LG[tmp]; + ls += D_LG[STATE_N - 1]; + + D_localscore[index] += ls; + } + } + } +} + +__global__ void computeKernel(int taskperthr, int sizepernode, + float *D_localscore, bool *D_parent, int node, + int total, float *D_Score, int *D_resP, + int nbatches) { + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + __shared__ float lsinblock[PREFETCH_COUNT][BLOCK_SIZE]; + + int fetch = 0; + int end_tile = fetch + nbatches; + int bestparent[4] = {0}, parent[5] = {-1}; + + for (int compute = fetch; compute < end_tile; compute++) { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) { + unsigned int bid = blockIdx.x * nbatches + fetch; + unsigned int tid = threadIdx.x; + unsigned int id = bid * (BLOCK_SIZE * nbatches) + tid; + + int posN = 1, i, index, tmp; + int pre[NODE_N] = {0}; + int parN = 0; + + float bestls = -999999999999999, ls; + + for (i = 0; i < NODE_N; i++) { + if (D_parent[i] == 1) { + pre[posN++] = i; + } + } + + for (i = 0; i < taskperthr && ((id * taskperthr + i) < total); i++) { + + D_findComb(parent, id * taskperthr + i, posN); + + for (parN = 0; parN < 4; parN++) { + if (parent[parN] < 0) + break; + if (pre[parent[parN]] > node) + parent[parN] = pre[parent[parN]]; + else + parent[parN] = pre[parent[parN]] + 1; + } + + for (tmp = parN; tmp > 0; tmp--) { + parent[tmp] = parent[tmp - 1]; + } + parent[0] = 0; + + index = D_findindex(parent, parN); + index += sizepernode * node; + + ls = D_localscore[index]; + + if (ls > bestls) { + bestls = ls; + for (tmp = 0; tmp < 4; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + + memcpy_async(lsinblock[fetch % PREFETCH_COUNT][tid], bestls, pipe); + pipe.commit(); + } + if (fetch == end_tile) { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + int i, t; + unsigned int bid = blockIdx.x * nbatches + compute; + unsigned int tid = threadIdx.x; + // unsigned int id = bid * (BLOCK_SIZE * nbatches) + tid; + + for (i = BLOCK_SIZE / 2; i >= 1; i /= 2) { + if (tid < i) { + if (lsinblock[compute % PREFETCH_COUNT][tid + i] > + lsinblock[compute % PREFETCH_COUNT][tid] && + lsinblock[compute % PREFETCH_COUNT][tid + i] < 0) { + lsinblock[compute % PREFETCH_COUNT][tid] = + lsinblock[compute % PREFETCH_COUNT][tid + i]; + lsinblock[compute % PREFETCH_COUNT][tid + i] = (float)(tid + i); + } else if (lsinblock[compute % PREFETCH_COUNT][tid + i] < + lsinblock[compute % PREFETCH_COUNT][tid] && + lsinblock[compute % PREFETCH_COUNT][tid] < 0) { + lsinblock[compute % PREFETCH_COUNT][tid + i] = (float)tid; + } else if (lsinblock[tid] > 0 && lsinblock[tid + i] < 0) { + lsinblock[compute % PREFETCH_COUNT][tid] = + lsinblock[compute % PREFETCH_COUNT][tid + i]; + lsinblock[compute % PREFETCH_COUNT][tid + i] = (float)(tid + i); + } else if (lsinblock[compute % PREFETCH_COUNT][tid] < 0 && + lsinblock[compute % PREFETCH_COUNT][tid + i] > 0) { + lsinblock[compute % PREFETCH_COUNT][tid + i] = (float)tid; + } + } + block.sync(); + } + block.sync(); + + if (tid == 0) { + D_Score[bid] = lsinblock[compute % PREFETCH_COUNT][0]; + t = 0; + for (i = 0; i < 7 && t < 128 && t >= 0; i++) { + t = (int)lsinblock[compute % PREFETCH_COUNT][(int)powf(2.0, i) + t]; + } + lsinblock[compute % PREFETCH_COUNT][0] = (float)t; + } + block.sync(); + + if (tid == (int)lsinblock[compute % PREFETCH_COUNT][0]) { + for (i = 0; i < 4; i++) { + D_resP[bid * 4 + i] = bestparent[i]; + } + } + } +} + +__device__ void Dincr(int *bit, int n) { + + while (n <= NODE_N) { + bit[n]++; + if (bit[n] >= 2) { + bit[n] = 0; + n++; + } else { + break; + } + } + + return; +} + +__device__ void DincrS(int *bit, int n) { + + bit[n]++; + if (bit[n] >= STATE_N) { + bit[n] = 0; + Dincr(bit, n + 1); + } + + return; +} + +__device__ bool D_getState(int parN, int *sta, int time) { + int i, j = 1; + + for (i = 0; i < parN; i++) { + j *= STATE_N; + } + j--; + if (time > j) + return false; + + if (time >= 1) + DincrS(sta, 0); + + return true; +} + +__device__ void D_findComb(int *comb, int l, int n) { + const int len = 4; + if (l == 0) { + for (int i = 0; i < len; i++) + comb[i] = -1; + return; + } + int sum = 0; + int k = 1; + + while (sum < l) + sum += D_C(n, k++); + l -= sum - D_C(n, --k); + int low = 0; + int pos = 0; + while (k > 1) { + sum = 0; + int s = 1; + while (sum < l) + sum += D_C(n - s++, k - 1); + l -= sum - D_C(n - (--s), --k); + low += s; + comb[pos++] = low; + n -= s; + } + comb[pos] = low + l; + for (int i = pos + 1; i < 4; i++) + comb[i] = -1; +} + +__device__ int D_findindex(int *arr, + int size) { // reminder: arr[0] has to be 0 && size + // == array size-1 && index start from 0 + int i, j, index = 0; + + for (i = 1; i < size; i++) { + index += D_C(NODE_N - 1, i); + } + + for (i = 1; i <= size - 1; i++) { + for (j = arr[i - 1] + 1; j <= arr[i] - 1; j++) { + index += D_C(NODE_N - 1 - j, size - i); + } + } + + index += arr[size] - arr[size - 1]; + + return index; +} + +__device__ int D_C(int n, int a) { + int i, res = 1, atmp = a; + + for (i = 0; i < atmp; i++) { + res *= n; + n--; + } + + for (i = 0; i < atmp; i++) { + res /= a; + a--; + } + + return res; +} + +#endif diff --git a/workloads/realworld/pinned/BN/run.sh b/workloads/realworld/pinned/BN/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..f87817975334cff247b1bdf91651d41062aa8320 --- /dev/null +++ b/workloads/realworld/pinned/BN/run.sh @@ -0,0 +1,5 @@ +# ./ordergraph_25 +# ./ordergraph_30 +# ./ordergraph_40 +# ./ordergraph_45 +./ordergraph_50 \ No newline at end of file diff --git a/workloads/realworld/pinned/BN/run_super.sh b/workloads/realworld/pinned/BN/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..5c45d88db0716b0b4b0828ba397cbd918d1612c0 --- /dev/null +++ b/workloads/realworld/pinned/BN/run_super.sh @@ -0,0 +1 @@ +./ordergraph_50 diff --git a/workloads/realworld/pinned/backprop/Makefile b/workloads/realworld/pinned/backprop/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..286cd40af79bbb80b6d86aad9bd0d2c0d1a846e0 --- /dev/null +++ b/workloads/realworld/pinned/backprop/Makefile @@ -0,0 +1,47 @@ +include ../../../common/make.config + +# C compiler +CC = gcc +CC_FLAGS = -g -O2 + +# CUDA compiler +NVCC = $(CUDA_DIR)/bin/nvcc +NVCC_FLAGS = -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -arch=sm_80 + +# 'make dbg=1' enables NVCC debugging +ifeq ($(dbg),1) + NVCC_FLAGS += -g -O0 +else + NVCC_FLAGS += -O2 +endif + +# 'make emu=1' compiles the CUDA kernels for emulation +ifeq ($(emu),1) + NVCC_FLAGS += -deviceemu +endif + + +backprop: backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + $(NVCC) $(NVCC_FLAGS) backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -o backprop $(NVCC_FLAGS) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +# backprop: backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp +# $(CC) $(CC_FLAGS) backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -o backprop -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +%.o: %.[ch] + $(CC) $(CC_FLAGS) $< -c + +facetrain.o: facetrain.c backprop.h + $(CC) $(CC_FLAGS) facetrain.c -c + +backprop.o: backprop.c backprop.h + $(CC) $(CC_FLAGS) backprop.c -c + +backprop_cuda.o: backprop_cuda.cu backprop.h $(CUPTI_ADD_COMMON)/cupti_add.h $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + $(NVCC) $(NVCC_FLAGS) -c backprop_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +imagenet.o: imagenet.c backprop.h + $(CC) $(CC_FLAGS) imagenet.c -c + + +clean: + rm -f *.o *~ backprop backprop_cuda.linkinfo diff --git a/workloads/realworld/pinned/backprop/backprop b/workloads/realworld/pinned/backprop/backprop new file mode 100755 index 0000000000000000000000000000000000000000..ef7c3bb5e8ea42937ebbb810725d90c505aa5e93 Binary files /dev/null and b/workloads/realworld/pinned/backprop/backprop differ diff --git a/workloads/realworld/pinned/backprop/backprop.c b/workloads/realworld/pinned/backprop/backprop.c new file mode 100644 index 0000000000000000000000000000000000000000..759c6a03e96da88844efb5ca8a9ecd8ec9da9b02 --- /dev/null +++ b/workloads/realworld/pinned/backprop/backprop.c @@ -0,0 +1,516 @@ +/* + ****************************************************************** + * HISTORY + * 15-Oct-94 Jeff Shufelt (js), Carnegie Mellon University + * Prepared for 15-681, Fall 1994. + * Modified by Shuai Che + ****************************************************************** + */ +#include +#include +#include +#include +#include "backprop.h" +#include +//#define OPEN + +#define ABS(x) (((x) > 0.0) ? (x) : (-(x))) + +#define fastcopy(to,from,len)\ +{\ + register char *_to,*_from;\ + register int _i,_l;\ + _to = (char *)(to);\ + _from = (char *)(from);\ + _l = (len);\ + for (_i = 0; _i < _l; _i++) *_to++ = *_from++;\ +} + +/*** Return random number between 0.0 and 1.0 ***/ +float drnd() +{ + return ((float) rand() / (float) BIGRND); +} + +/*** Return random number between -1.0 and 1.0 ***/ +float dpn1() +{ + return ((drnd() * 2.0) - 1.0); +} + +/*** The squashing function. Currently, it's a sigmoid. ***/ + +float squash(x) +float x; +{ + float m; + //x = -x; + //m = 1 + x + x*x/2 + x*x*x/6 + x*x*x*x/24 + x*x*x*x*x/120; + //return(1.0 / (1.0 + m)); + return (1.0 / (1.0 + exp(-x))); +} + + +/*** Allocate 1d array of floats ***/ + +float *alloc_1d_dbl(n) +int n; +{ + float *new; + + // Ruihao + // new = (float *) malloc ((unsigned) (n * sizeof (float))); + cudaMallocHost(&new, n*sizeof(float)); + // Ruihao + if (new == NULL) { + printf("ALLOC_1D_DBL: Couldn't allocate array of floats\n"); + return (NULL); + } + return (new); +} + + +/*** Allocate 2d array of floats ***/ + +float **alloc_2d_dbl(m, n) +int m, n; +{ + int i; + float **new; + + // Ruihao + new = (float **) malloc ((unsigned) (m * sizeof (float *))); + // new = (float **) malloc ((unsigned) (m * sizeof (float *))); + // Ruihao + if (new == NULL) { + printf("ALLOC_2D_DBL: Couldn't allocate array of dbl ptrs\n"); + return (NULL); + } + + for (i = 0; i < m; i++) { + new[i] = alloc_1d_dbl(n); + } + + return (new); +} + + +bpnn_randomize_weights(w, m, n) +float **w; +int m, n; +{ + int i, j; + + for (i = 0; i <= m; i++) { + for (j = 0; j <= n; j++) { + w[i][j] = (float) rand()/RAND_MAX; + // w[i][j] = dpn1(); + } + } +} + +bpnn_randomize_row(w, m) +float *w; +int m; +{ + int i; + for (i = 0; i <= m; i++) { + //w[i] = (float) rand()/RAND_MAX; + w[i] = 0.1; + } +} + + +bpnn_zero_weights(w, m, n) +float **w; +int m, n; +{ + int i, j; + + for (i = 0; i <= m; i++) { + for (j = 0; j <= n; j++) { + w[i][j] = 0.0; + } + } +} + + +void bpnn_initialize(seed) +{ + printf("Random number generator seed: %d\n", seed); + srand(seed); +} + + +BPNN *bpnn_internal_create(n_in, n_hidden, n_out) +int n_in, n_hidden, n_out; +{ + BPNN *newnet; + + newnet = (BPNN *) malloc (sizeof (BPNN)); + if (newnet == NULL) { + printf("BPNN_CREATE: Couldn't allocate neural network\n"); + return (NULL); + } + + newnet->input_n = n_in; + newnet->hidden_n = n_hidden; + newnet->output_n = n_out; + newnet->input_units = alloc_1d_dbl(n_in + 1); + newnet->hidden_units = alloc_1d_dbl(n_hidden + 1); + newnet->output_units = alloc_1d_dbl(n_out + 1); + + newnet->hidden_delta = alloc_1d_dbl(n_hidden + 1); + newnet->output_delta = alloc_1d_dbl(n_out + 1); + newnet->target = alloc_1d_dbl(n_out + 1); + + newnet->input_weights = alloc_2d_dbl(n_in + 1, n_hidden + 1); + newnet->hidden_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); + + newnet->input_prev_weights = alloc_2d_dbl(n_in + 1, n_hidden + 1); + newnet->hidden_prev_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); + + return (newnet); +} + + +void bpnn_free(net) +BPNN *net; +{ + int n1, n2, i; + + n1 = net->input_n; + n2 = net->hidden_n; + + // free((char *) net->input_units); + // free((char *) net->hidden_units); + // free((char *) net->output_units); + + // free((char *) net->hidden_delta); + // free((char *) net->output_delta); + // free((char *) net->target); + + cudaFreeHost((void *) net->input_units); + cudaFreeHost((void *) net->hidden_units); + cudaFreeHost((void *) net->output_units); + + cudaFreeHost((void *) net->hidden_delta); + cudaFreeHost((void *) net->output_delta); + cudaFreeHost((void *) net->target); + + for (i = 0; i <= n1; i++) { + free((char *) net->input_weights[i]); + free((char *) net->input_prev_weights[i]); + } + free((char *) net->input_weights); + free((char *) net->input_prev_weights); + + for (i = 0; i <= n2; i++) { + free((char *) net->hidden_weights[i]); + free((char *) net->hidden_prev_weights[i]); + } + free((char *) net->hidden_weights); + free((char *) net->hidden_prev_weights); + + free((char *) net); +} + + +/*** Creates a new fully-connected network from scratch, + with the given numbers of input, hidden, and output units. + Threshold units are automatically included. All weights are + randomly initialized. + + Space is also allocated for temporary storage (momentum weights, + error computations, etc). +***/ + +BPNN *bpnn_create(n_in, n_hidden, n_out) +int n_in, n_hidden, n_out; +{ + + BPNN *newnet; + + newnet = bpnn_internal_create(n_in, n_hidden, n_out); + +#ifdef INITZERO + bpnn_zero_weights(newnet->input_weights, n_in, n_hidden); +#else + bpnn_randomize_weights(newnet->input_weights, n_in, n_hidden); +#endif + bpnn_randomize_weights(newnet->hidden_weights, n_hidden, n_out); + bpnn_zero_weights(newnet->input_prev_weights, n_in, n_hidden); + bpnn_zero_weights(newnet->hidden_prev_weights, n_hidden, n_out); + bpnn_randomize_row(newnet->target, n_out); + return (newnet); +} + + +void bpnn_layerforward(l1, l2, conn, n1, n2) +float *l1, *l2, **conn; +int n1, n2; +{ + float sum; + int j, k; + + /*** Set up thresholding unit ***/ + l1[0] = 1.0; +#ifdef OPEN + omp_set_num_threads(NUM_THREAD); + #pragma omp parallel for shared(conn, n1, n2, l1) private(k, j) reduction(+: sum) schedule(static) +#endif + /*** For each unit in second layer ***/ + for (j = 1; j <= n2; j++) { + + /*** Compute weighted sum of its inputs ***/ + sum = 0.0; + for (k = 0; k <= n1; k++) { + sum += conn[k][j] * l1[k]; + } + l2[j] = squash(sum); + } +} + +//extern "C" +void bpnn_output_error(delta, target, output, nj, err) +float *delta, *target, *output, *err; +int nj; +{ + int j; + float o, t, errsum; + errsum = 0.0; + for (j = 1; j <= nj; j++) { + o = output[j]; + t = target[j]; + delta[j] = o * (1.0 - o) * (t - o); + errsum += ABS(delta[j]); + } + *err = errsum; +} + + +void bpnn_hidden_error(delta_h, + nh, + delta_o, + no, + who, + hidden, + err) +float *delta_h, *delta_o, *hidden, **who, *err; +int nh, no; +{ + int j, k; + float h, sum, errsum; + + errsum = 0.0; + for (j = 1; j <= nh; j++) { + h = hidden[j]; + sum = 0.0; + for (k = 1; k <= no; k++) { + sum += delta_o[k] * who[j][k]; + } + delta_h[j] = h * (1.0 - h) * sum; + errsum += ABS(delta_h[j]); + } + *err = errsum; +} + + +void bpnn_adjust_weights(delta, ndelta, ly, nly, w, oldw) +float *delta, *ly, **w, **oldw; +{ + float new_dw; + int k, j; + ly[0] = 1.0; + //eta = 0.3; + //momentum = 0.3; + +#ifdef OPEN + omp_set_num_threads(NUM_THREAD); + #pragma omp parallel for \ + shared(oldw, w, delta) \ + private(j, k, new_dw) \ + firstprivate(ndelta, nly, momentum) +#endif + for (j = 1; j <= ndelta; j++) { + for (k = 0; k <= nly; k++) { + new_dw = ((ETA * delta[j] * ly[k]) + (MOMENTUM * oldw[k][j])); + w[k][j] += new_dw; + oldw[k][j] = new_dw; + } + } +} + + +void bpnn_feedforward(net) +BPNN *net; +{ + int in, hid, out; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + + /*** Feed forward input activations. ***/ + bpnn_layerforward(net->input_units, net->hidden_units, + net->input_weights, in, hid); + bpnn_layerforward(net->hidden_units, net->output_units, + net->hidden_weights, hid, out); + +} + + +void bpnn_train(net, eo, eh) +BPNN *net; +float *eo, *eh; +{ + int in, hid, out; + float out_err, hid_err; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + + /*** Feed forward input activations. ***/ + bpnn_layerforward(net->input_units, net->hidden_units, + net->input_weights, in, hid); + bpnn_layerforward(net->hidden_units, net->output_units, + net->hidden_weights, hid, out); + + /*** Compute error on output and hidden units. ***/ + bpnn_output_error(net->output_delta, net->target, net->output_units, + out, &out_err); + bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, + net->hidden_weights, net->hidden_units, &hid_err); + *eo = out_err; + *eh = hid_err; + + /*** Adjust input and hidden weights. ***/ + bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, + net->hidden_weights, net->hidden_prev_weights); + bpnn_adjust_weights(net->hidden_delta, hid, net->input_units, in, + net->input_weights, net->input_prev_weights); + +} + + + + +void bpnn_save(net, filename) +BPNN *net; +char *filename; +{ + int n1, n2, n3, i, j, memcnt; + float dvalue, **w; + char *mem; + ///add// + FILE *pFile; + pFile = fopen( filename, "w+" ); + /////// + /* + if ((fd = creat(filename, 0644)) == -1) { + printf("BPNN_SAVE: Cannot create '%s'\n", filename); + return; + } + */ + + n1 = net->input_n; n2 = net->hidden_n; n3 = net->output_n; + printf("Saving %dx%dx%d network to '%s'\n", n1, n2, n3, filename); + //fflush(stdout); + + //write(fd, (char *) &n1, sizeof(int)); + //write(fd, (char *) &n2, sizeof(int)); + //write(fd, (char *) &n3, sizeof(int)); + + fwrite( (char *) &n1 , sizeof(char), sizeof(char), pFile); + fwrite( (char *) &n2 , sizeof(char), sizeof(char), pFile); + fwrite( (char *) &n3 , sizeof(char), sizeof(char), pFile); + + + + memcnt = 0; + w = net->input_weights; + mem = (char *) malloc ((unsigned) ((n1+1) * (n2+1) * sizeof(float))); + for (i = 0; i <= n1; i++) { + for (j = 0; j <= n2; j++) { + dvalue = w[i][j]; + fastcopy(&mem[memcnt], &dvalue, sizeof(float)); + memcnt += sizeof(float); + } + } + //write(fd, mem, (n1+1) * (n2+1) * sizeof(float)); + fwrite( mem , (unsigned)(sizeof(float)), (unsigned) ((n1+1) * (n2+1) * sizeof(float)) , pFile); + free(mem); + + memcnt = 0; + w = net->hidden_weights; + mem = (char *) malloc ((unsigned) ((n2+1) * (n3+1) * sizeof(float))); + for (i = 0; i <= n2; i++) { + for (j = 0; j <= n3; j++) { + dvalue = w[i][j]; + fastcopy(&mem[memcnt], &dvalue, sizeof(float)); + memcnt += sizeof(float); + } + } + //write(fd, mem, (n2+1) * (n3+1) * sizeof(float)); + fwrite( mem , sizeof(float), (unsigned) ((n2+1) * (n3+1) * sizeof(float)) , pFile); + free(mem); + + fclose(pFile); + return; +} + + +BPNN *bpnn_read(filename) +char *filename; +{ + char *mem; + BPNN *new; + int fd, n1, n2, n3, i, j, memcnt; + + if ((fd = open(filename, 0, 0644)) == -1) { + return (NULL); + } + + printf("Reading '%s'\n", filename); //fflush(stdout); + + read(fd, (char *) &n1, sizeof(int)); + read(fd, (char *) &n2, sizeof(int)); + read(fd, (char *) &n3, sizeof(int)); + new = bpnn_internal_create(n1, n2, n3); + + printf("'%s' contains a %dx%dx%d network\n", filename, n1, n2, n3); + printf("Reading input weights..."); //fflush(stdout); + + memcnt = 0; + mem = (char *) malloc ((unsigned) ((n1+1) * (n2+1) * sizeof(float))); + read(fd, mem, (n1+1) * (n2+1) * sizeof(float)); + for (i = 0; i <= n1; i++) { + for (j = 0; j <= n2; j++) { + fastcopy(&(new->input_weights[i][j]), &mem[memcnt], sizeof(float)); + memcnt += sizeof(float); + } + } + free(mem); + + printf("Done\nReading hidden weights..."); //fflush(stdout); + + memcnt = 0; + mem = (char *) malloc ((unsigned) ((n2+1) * (n3+1) * sizeof(float))); + read(fd, mem, (n2+1) * (n3+1) * sizeof(float)); + for (i = 0; i <= n2; i++) { + for (j = 0; j <= n3; j++) { + fastcopy(&(new->hidden_weights[i][j]), &mem[memcnt], sizeof(float)); + memcnt += sizeof(float); + } + } + free(mem); + close(fd); + + printf("Done\n"); //fflush(stdout); + + bpnn_zero_weights(new->input_prev_weights, n1, n2); + bpnn_zero_weights(new->hidden_prev_weights, n2, n3); + + return (new); +} diff --git a/workloads/realworld/pinned/backprop/backprop.h b/workloads/realworld/pinned/backprop/backprop.h new file mode 100644 index 0000000000000000000000000000000000000000..dfaafe39b76a1c9c1455e38bbe0a14d5461d5d2b --- /dev/null +++ b/workloads/realworld/pinned/backprop/backprop.h @@ -0,0 +1,53 @@ +#ifndef _BACKPROP_H_ +#define _BACKPROP_H_ + +#define BIGRND 0x7fffffff + +#define GPU +#define THREADS 256 +#define WIDTH 16 // shared memory width +#define HEIGHT 16 // shared memory height + +#define ETA 0.3 //eta value +#define MOMENTUM 0.3 //momentum value +#define NUM_THREAD 4 //OpenMP threads + + +typedef struct { + int input_n; /* number of input units */ + int hidden_n; /* number of hidden units */ + int output_n; /* number of output units */ + + float *input_units; /* the input units */ + float *hidden_units; /* the hidden units */ + float *output_units; /* the output units */ + + float *hidden_delta; /* storage for hidden unit error */ + float *output_delta; /* storage for output unit error */ + + float *target; /* storage for target vector */ + + float **input_weights; /* weights from input to hidden layer */ + float **hidden_weights; /* weights from hidden to output layer */ + + /*** The next two are for momentum ***/ + float **input_prev_weights; /* previous change on input to hidden wgt */ + float **hidden_prev_weights; /* previous change on hidden to output wgt */ +} BPNN; + + +/*** User-level functions ***/ + +void bpnn_initialize(); + +BPNN *bpnn_create(); +void bpnn_free(); + +void bpnn_train(); +void bpnn_feedforward(); + +void bpnn_save(); +BPNN *bpnn_read(); + + +#endif diff --git a/workloads/realworld/pinned/backprop/backprop.o b/workloads/realworld/pinned/backprop/backprop.o new file mode 100644 index 0000000000000000000000000000000000000000..babf13457eebac9785455de7c53940450a4faf6b Binary files /dev/null and b/workloads/realworld/pinned/backprop/backprop.o differ diff --git a/workloads/realworld/pinned/backprop/backprop_cuda.cu b/workloads/realworld/pinned/backprop/backprop_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..77814eb9bb4c8ed7245663efe6c51ec49047d195 --- /dev/null +++ b/workloads/realworld/pinned/backprop/backprop_cuda.cu @@ -0,0 +1,247 @@ + + +// includes, system +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +double t_start, t_end; + +// includes, kernels +#include "backprop_cuda_kernel.cu" +#include "backprop.h" + +//////////////////////////////////////////////////////////////////////////////// + +extern "C" void bpnn_layerforward(float *l1, float *l2, float **conn, int n1, int n2); + +extern "C" void bpnn_output_error(float *delta, float *target, float *output, int nj, float *err); + +extern "C" void bpnn_hidden_error(float *delta_h, int nh, float *delta_o, int no, float **who, float *hidden, float *err); + +extern "C" void bpnn_adjust_weights(float *delta, int ndelta, float *ly, int nly, float **w, float **oldw); + +extern "C" int setup(int argc, char **argv); + +extern "C" float **alloc_2d_dbl(int m, int n); + +extern "C" float squash(float x); + +double gettime() +{ + struct timeval t; + gettimeofday(&t, NULL); + return t.tv_sec + t.tv_usec * 1e-6; +} + +unsigned int num_threads = 0; +unsigned int num_blocks = 0; + +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + num_blocks = atoi(argv[2]); + setup(argc, argv); +} + +extern "C" void bpnn_train_cuda(BPNN *net, float *eo, float *eh) +{ + int in, hid, out; + float out_err, hid_err; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + +#ifdef GPU + int m = 0; + float *input_hidden_cuda; + float *input_cuda; + float *output_hidden_cuda; + float *partial_sum; + float *hidden_partial_sum; + float *hidden_delta_cuda; + float *input_prev_weights_cuda; + float sum; + float *input_weights_one_dim; + float *input_weights_prev_one_dim; + // ruihao + // num_blocks = in / 16; + // dim3 grid(1, num_blocks); + // dim3 threads(16, 16); + + int tile_size = in / num_blocks; + dim3 grid(1, num_blocks); + dim3 threads(16, 16); + // ruihao + + input_weights_one_dim = (float *)malloc((in + 1) * (hid + 1) * sizeof(float)); + input_weights_prev_one_dim = (float *)malloc((in + 1) * (hid + 1) * sizeof(float)); + // ruihao + // partial_sum = (float *) malloc(num_blocks * WIDTH * sizeof(float)); + partial_sum = (float *)malloc(in * sizeof(float)); + // ruihao + + // this preprocessing stage is added to correct the bugs of wrong memcopy using two-dimensional net->inputweights + for (int k = 0; k <= in; k++) + { + for (int j = 0; j <= hid; j++) + { + input_weights_one_dim[m] = net->input_weights[k][j]; + input_weights_prev_one_dim[m] = net->input_prev_weights[k][j]; + m++; + } + } + + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMalloc((void **)&input_cuda, (in + 1) * sizeof(float)); + cudaMalloc((void **)&output_hidden_cuda, (hid + 1) * sizeof(float)); + cudaMalloc((void **)&input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float)); + // ruihao + // cudaMalloc((void**) &hidden_partial_sum, num_blocks * WIDTH * sizeof(float)); + cudaMalloc((void **)&hidden_partial_sum, in * sizeof(float)); + // ruihao + +#endif + +#ifdef CPU + + printf("Performing CPU computation\n"); + bpnn_layerforward(net->input_units, net->hidden_units, net->input_weights, in, hid); + +#endif + +#ifdef GPU + + //printf("Performing GPU computation\n"); + + // printf("in= %d, hid = %d, numblocks = %d\n", in, hid, num_blocks); + + + cudaMemcpy(input_cuda, net->input_units, (in + 1) * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(input_hidden_cuda, input_weights_one_dim, (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyHostToDevice); + + // ruihao + //t_start = rtclock(); + // ruihao + bpnn_layerforward_CUDA<<>>(input_cuda, + output_hidden_cuda, + input_hidden_cuda, + hidden_partial_sum, + in, + hid, + tile_size); + + cudaDeviceSynchronize(); + + // ruihao + // cudaMemcpy(partial_sum, hidden_partial_sum, num_blocks * WIDTH * sizeof(float), cudaMemcpyDeviceToHost); + //t_end = rtclock(); + //fprintf(stdout, "bpnn_layerforward_CUDA GPU Runtime: %0.6lfs\n", t_end - t_start); + cudaMemcpy(partial_sum, hidden_partial_sum, in * sizeof(float), cudaMemcpyDeviceToHost); + // ruihao + + cudaError_t error = cudaGetLastError(); + if (error != cudaSuccess) + { + printf("bpnn kernel error: %s\n", cudaGetErrorString(error)); + exit(EXIT_FAILURE); + } + + for (int j = 1; j <= hid; j++) + { + sum = 0.0; + // ruihao + // for (int k = 0; k < num_blocks; k++) { + // sum += partial_sum[k * hid + j-1] ; + // } + for (int k = 0; k < in / WIDTH; k++) + { + sum += partial_sum[k * hid + j - 1]; + } + // ruihao + sum += net->input_weights[0][j]; + net->hidden_units[j] = float(1.0 / (1.0 + exp(-sum))); + } +#endif + + bpnn_layerforward(net->hidden_units, net->output_units, net->hidden_weights, hid, out); + bpnn_output_error(net->output_delta, net->target, net->output_units, out, &out_err); + bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, net->hidden_weights, net->hidden_units, &hid_err); + bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, net->hidden_weights, net->hidden_prev_weights); + +#ifdef CPU + + bpnn_adjust_weights(net->hidden_delta, hid, net->input_units, in, net->input_weights, net->input_prev_weights); + +#endif + +#ifdef GPU + + cudaMalloc((void **)&hidden_delta_cuda, (hid + 1) * sizeof(float)); + cudaMalloc((void **)&input_prev_weights_cuda, (in + 1) * (hid + 1) * sizeof(float)); + + cudaMemcpy(hidden_delta_cuda, net->hidden_delta, (hid + 1) * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(input_prev_weights_cuda, input_weights_prev_one_dim, (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(input_hidden_cuda, input_weights_one_dim, (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyHostToDevice); + // ruihao + //t_start = rtclock(); + // ruihao + bpnn_adjust_weights_cuda<<>>(hidden_delta_cuda, + hid, + input_cuda, + in, + input_hidden_cuda, + input_prev_weights_cuda, + tile_size); + // ruihao + cudaDeviceSynchronize(); + //t_end = rtclock(); + //fprintf(stdout, "bpnn_adjust_weights_cuda GPU Runtime: %0.6lfs\n", t_end - t_start); + // ruihao + cudaMemcpy(net->input_units, input_cuda, (in + 1) * sizeof(float), cudaMemcpyDeviceToHost); + cudaMemcpy(input_weights_one_dim, input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyDeviceToHost); + + + cudaFree(input_cuda); + cudaFree(output_hidden_cuda); + cudaFree(input_hidden_cuda); + cudaFree(hidden_partial_sum); + cudaFree(input_prev_weights_cuda); + cudaFree(hidden_delta_cuda); + + endCPU(); + finiTrace(); + + free(partial_sum); + free(input_weights_one_dim); + free(input_weights_prev_one_dim); + +#endif +} diff --git a/workloads/realworld/pinned/backprop/backprop_cuda.o b/workloads/realworld/pinned/backprop/backprop_cuda.o new file mode 100644 index 0000000000000000000000000000000000000000..20fd2755726d57cf53f3728608218f4f0ba7d4f4 Binary files /dev/null and b/workloads/realworld/pinned/backprop/backprop_cuda.o differ diff --git a/workloads/realworld/pinned/backprop/backprop_cuda_kernel.cu b/workloads/realworld/pinned/backprop/backprop_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..27f07767e27b29a189b99a1a0c6010ad2ee032e6 --- /dev/null +++ b/workloads/realworld/pinned/backprop/backprop_cuda_kernel.cu @@ -0,0 +1,110 @@ + + +#ifndef _BACKPROP_CUDA_KERNEL_H_ +#define _BACKPROP_CUDA_KERNEL_H_ + +#include +#include "backprop.h" +#include "math.h" +#include "cuda.h" + +#include +#include + +__global__ void +bpnn_layerforward_CUDA(float *input_cuda, + float *output_hidden_cuda, + float *input_hidden_cuda, + float *hidden_partial_sum, + int in, + int hid, + int tile_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + int by = blockIdx.y; + int tx = threadIdx.x; + int ty = threadIdx.y; + + int batches = tile_size / WIDTH; + + __shared__ float input_node[HEIGHT]; + __shared__ float weight_matrix[HEIGHT * WIDTH]; + + for (int b = 0; b < batches; b++) + { + int index = (hid + 1) * HEIGHT * (batches * by + b) + (hid + 1) * ty + tx + 1 + (hid + 1); + + int index_in = HEIGHT * (batches * by + b) + ty + 1; + + if (tx == 0) + input_node[ty] = input_cuda[index_in]; + + block.sync(); + + weight_matrix[ty * WIDTH + tx] = input_hidden_cuda[index]; + + block.sync(); + + weight_matrix[ty * WIDTH + tx] = weight_matrix[ty * WIDTH + tx] * input_node[ty]; + + block.sync(); + + for (int i = 1; i <= __log2f(HEIGHT); i++) + { + + int power_two = __powf(2, i); + + if (ty % power_two == 0) + weight_matrix[ty * WIDTH + tx] += weight_matrix[(ty + power_two / 2) * WIDTH + tx]; + + block.sync(); + } + + input_hidden_cuda[index] = weight_matrix[ty * WIDTH + tx]; + + block.sync(); + + if (tx == 0) + { + hidden_partial_sum[(batches * by + b) * hid + ty] = weight_matrix[tx * WIDTH + ty]; + } + } +} + +__global__ void bpnn_adjust_weights_cuda(float *delta, + int hid, + float *ly, + int in, + float *w, + float *oldw, + int tile_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + int by = blockIdx.y; + + int tx = threadIdx.x; + int ty = threadIdx.y; + + int batches = tile_size / WIDTH; + + for (int b = 0; b < batches; b++) + { + int index = (hid + 1) * HEIGHT * (batches * by + b) + (hid + 1) * ty + tx + 1 + (hid + 1); + int index_y = HEIGHT * (batches * by + b) + ty + 1; + int index_x = tx + 1; + // eta = 0.3; + // momentum = 0.3; + + w[index] += ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index])); + oldw[index] = ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index])); + + block.sync(); + + if (ty == 0 && by == 0 && b == 0) + { + w[index_x] += ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x])); + oldw[index_x] = ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x])); + } + } +} +#endif diff --git a/workloads/realworld/pinned/backprop/cpu_timestamps.o b/workloads/realworld/pinned/backprop/cpu_timestamps.o new file mode 100644 index 0000000000000000000000000000000000000000..c2a2035a71f58217d4b226d4471815a71028d591 Binary files /dev/null and b/workloads/realworld/pinned/backprop/cpu_timestamps.o differ diff --git a/workloads/realworld/pinned/backprop/cupti_add.o b/workloads/realworld/pinned/backprop/cupti_add.o new file mode 100644 index 0000000000000000000000000000000000000000..674a9489795889a08e802fee9b1a0a3ce63cb3c0 Binary files /dev/null and b/workloads/realworld/pinned/backprop/cupti_add.o differ diff --git a/workloads/realworld/pinned/backprop/facetrain.c b/workloads/realworld/pinned/backprop/facetrain.c new file mode 100644 index 0000000000000000000000000000000000000000..cbf83810934b68551d7dd4b7b94fda5eb6837276 --- /dev/null +++ b/workloads/realworld/pinned/backprop/facetrain.c @@ -0,0 +1,54 @@ + +#include +#include +#include +#include +#include "backprop.h" +#include "omp.h" + +extern char *strcpy(); +extern void exit(); + +int layer_size = 0; + +backprop_face() +{ + BPNN *net; + int i; + float out_err, hid_err; + net = bpnn_create(layer_size, 16, 1); // (16, 1 can not be changed) + + printf("Input layer size : %d\n", layer_size); + load(net); + // entering the training kernel, only one iteration + printf("Starting training kernel\n"); + bpnn_train_cuda(net, &out_err, &hid_err); + bpnn_free(net); + printf("Training done\n"); +} + +int setup(argc, argv) +int argc; +char *argv[]; +{ + + int seed; + + if (argc != 3) + { + fprintf(stderr, "usage: backprop \n"); + exit(0); + } + layer_size = atoi(argv[1]); + if (layer_size % 16 != 0) + { + fprintf(stderr, "The number of input points must be divided by 16\n"); + exit(0); + } + + seed = 7; + bpnn_initialize(seed); + backprop_face(); + + exit(0); +} diff --git a/workloads/realworld/pinned/backprop/facetrain.o b/workloads/realworld/pinned/backprop/facetrain.o new file mode 100644 index 0000000000000000000000000000000000000000..9c3e12e02799597d4051ded4ac8cd347a2df5cfb Binary files /dev/null and b/workloads/realworld/pinned/backprop/facetrain.o differ diff --git a/workloads/realworld/pinned/backprop/imagenet.c b/workloads/realworld/pinned/backprop/imagenet.c new file mode 100644 index 0000000000000000000000000000000000000000..255b0d5d8ca67508f6732e266299e7c58012906f --- /dev/null +++ b/workloads/realworld/pinned/backprop/imagenet.c @@ -0,0 +1,24 @@ + +#include +#include +#include "backprop.h" + +extern layer_size; + +load(net) +BPNN *net; +{ + float *units; + int nr, nc, imgsize, i, j, k; + + nr = layer_size; + + imgsize = nr * nc; + units = net->input_units; + + k = 1; + for (i = 0; i < nr; i++) { + units[k] = (float) rand()/RAND_MAX ; + k++; + } +} diff --git a/workloads/realworld/pinned/backprop/imagenet.o b/workloads/realworld/pinned/backprop/imagenet.o new file mode 100644 index 0000000000000000000000000000000000000000..9b1e42bf011cf38e4d05768c3ab7edf3b53640f5 Binary files /dev/null and b/workloads/realworld/pinned/backprop/imagenet.o differ diff --git a/workloads/realworld/pinned/backprop/run.sh b/workloads/realworld/pinned/backprop/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..3ff765b6ee27aa6718be615c512868bee7530ec7 --- /dev/null +++ b/workloads/realworld/pinned/backprop/run.sh @@ -0,0 +1,5 @@ +# ./backprop 524288 +./backprop 8388608 128 + +# ./backprop 66708864 128 +# ./backprop 66708864 1024 \ No newline at end of file diff --git a/workloads/realworld/pinned/backprop/run_super.sh b/workloads/realworld/pinned/backprop/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..d9e8a3d42354f597bbcd153d180d9b23bed71192 --- /dev/null +++ b/workloads/realworld/pinned/backprop/run_super.sh @@ -0,0 +1,3 @@ +# ./backprop 1073741824 1024 +# ./backprop 134217728 1024 +./backprop 67108864 1024 \ No newline at end of file diff --git a/workloads/realworld/pinned/darknet/LICENSE b/workloads/realworld/pinned/darknet/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..a50f7d700ba02bfacd50f59b315311cf4d0bbda2 --- /dev/null +++ b/workloads/realworld/pinned/darknet/LICENSE @@ -0,0 +1,12 @@ + YOLO LICENSE + Version 2, July 29 2016 + +THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER +SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN +TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES +LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY. NOW HERE'S +THE REAL LICENSE: + +0. Darknet is public domain. +1. Do whatever you want with it. +2. Stop emailing me about it! diff --git a/workloads/realworld/pinned/darknet/LICENSE.fuck b/workloads/realworld/pinned/darknet/LICENSE.fuck new file mode 100644 index 0000000000000000000000000000000000000000..8b1a9d8189b3b9f4479221d52882ce36fdc73a62 --- /dev/null +++ b/workloads/realworld/pinned/darknet/LICENSE.fuck @@ -0,0 +1,13 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + +Copyright (C) 2004 Sam Hocevar + +Everyone is permitted to copy and distribute verbatim or modified +copies of this license document, and changing it is allowed as long +as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. diff --git a/workloads/realworld/pinned/darknet/LICENSE.gen b/workloads/realworld/pinned/darknet/LICENSE.gen new file mode 100644 index 0000000000000000000000000000000000000000..c54113271e15057c4def6676693eb96fd6362b28 --- /dev/null +++ b/workloads/realworld/pinned/darknet/LICENSE.gen @@ -0,0 +1,91 @@ +RNN LICENSE Version 3, June 21 2017 + +Copyright (c) 1990, 1989, 1999 Free87337 May 48 THIRD PARTIES OR ANY OTHER THE +COMPLAIN OR CONSEQUENTIAL DAMAGES AND REGARDLESS OF WHETHER IN CONTRACT, TO THE +EXTENT REPAIR OR AGENTS (NOT THE IN ANY EVENT). THE SOFTWARE WILL BE +UNINTERRUPTED OR ERROR-FREE OR ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF ALL THE WORK (GOVERNED CODE) HIM RESPONSES, OR OF FINES, +SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR ANY OTHER OR OTHER HARL UNDER NO +CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), +PATENT PERMITTED BY THE INSTAGRAM PARENT STATE OR TORT (INCLUDING NEGLIGENCE), +PRODUCT LIABILITY OR OTHERWISE, ARISING OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR ANYTHING PROVIDED IN THIS PRODUCT, COMMIS AND SERVICES +ARE LICENSED SOFTWARE AND ANY RESULE OR ANY OTHER THE COPYRIGHT HOLDERS BE +LIABLE FOR ANY SPECIAL, INCIDENTAL, CASE, SUCH WARRANTIES, EXPRESS OR IMPLIED, +INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COPYRIGHT HOLDERS AND/OR ANY +PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY +EXPRESS OR DISTRIBUTE THAT ALL CLAIMS ARE SHALL CREATE DERAVE BE LIABLE TO YOU +WILL HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +6\. TERMINATION. TO THE EXTENT PERMITTED BY LAW, NO USE OF THE COVERED CODE IS +WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE +INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY +SERVICING, REPAIR OR COULT OR IN ANY WAY OUT OF THE USE OF THE WEBSITES OR +SERVICE WILL BE CONSEQUENTIAL DAMAGES OF ANY KIND HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + +This paragraph Agreement constitutes the entire agreement between the parties +with respect to the Work licensed here. However, if you place the name of the +fact that the arbitration was the consultation of the parties as a "patent is". +Subject to the terms and conditions of this License, Contributor has knowledge +that a license under a third party may also be used to endorse or promote +products derived from the Work, and there is no warranty on the Software and +Science Fees. For the purposes of this Agreement, attach the following +disclaimers (without liabilities of written notice to the Subject Software) in a +manner that a product is under common control with you. The Free Software +Foundation may publish revised and/or new versions of the License for the +Modifications made by the applicable terms. The Recipient shall promptly retain +the covered works for any reason be entered in any federal or state or login +Restricted Laws appearing in the United States or any of its own information +that is not disabled from a derivative work except as expressly permitted in +this License, to the extent that they are in receiving the Software and Source +Code or any exercise of the rights granted to You by this License or a +Contributor made by the Licensor or are authorized to make a reasonable +retirement by the courts of the courts located in Santa Clara County, California +printed and related to the Work or “Company” and Apache Software Foundation. If +the Licensor shall be entitled to reflect your rights to use the Software and +the Software to exercise the rights granted to the recipient without a +requirement to exercise the rights granted by the Agreement to the provision +will begin will appear in such cases, you will use such information without such +corporation shall be an officer with respect to any part of the Software or any +portion thereof. Capitalized terms are included in the Initial Contributor and +under no circumstances will license the Service at any time and for any direct, +indirect, special, incidental, or consequential damages of or assist in +connection with any Services or the registration purposes only to the extent +that it includes any or all means including the processing of which you download +any derivative work. Any of the purchases’ transmission purposes are made +available, if any, in other circumstances, we may review the copyright notice. +In the event that this Agreement is required to give us strict content. The +inclusion of the other party hereunder may also notify you Intellectual Property +Rights to any third party. This means that the Source Code exists of the Work +will not charge a program available to you at any time. You must include a +prominent statement that the Software is governed under a particular version of +this Agreement. You must include a provision to the extent that there is no +warranty for the content of others. You agree that the Recipient was appointed +as a Contributor, (c) are effective until terminated by hereunder, then the +registration are not disabled and not limited to, submit any Customer Data +without the updated use of the Software and that no fee is released. You grant +to Use Other Arbitration Rules for Diagnostic or Services may use or modify the +Apple Software and Consolidated Apple Software or Services. The Company may have +full risk as a product of the Compatible Source. A Contribution by the Licensor +or by the updated Software under the following conditions we can redistribute +any General Provision of this Agreement. If the Program is used in accordance +with the terms of this Agreement, Customer may provide advertisements from your +devices that clause you can your employer or a transaction or country that has +been controlled by the arbitrator, that they will be useful of this Agreement. +The term "Open Source Software is available in connection with the program, and +you may not protect the combination of the Covered Code. You should like to +select a user's rights to charge a copy of this License. I are Contributor's +confidentiality of the exercise of the rights granted herein. Such a covered +work is released as a consequence, the Licensor shall be eligible for a purpose +or subcontractor of the person or entity to the user of the user, then the word +"Application" means having the original fee for any reason; and that no patent +license to more than fifty stated close of the license term. The terms of this +License will the license terms and conditions set forth in Section 2.2 (OPEC) +and You will not use the Software or any set of responsibility for any resulting +information that the Original Code warrants that you have the right to disclose +these information (or in the notification; or (iii) late use of the software or +any third party to the three (50) days before such belief to the extent that it +includes a court court obtains the rights granted by this License. diff --git a/workloads/realworld/pinned/darknet/LICENSE.gpl b/workloads/realworld/pinned/darknet/LICENSE.gpl new file mode 100644 index 0000000000000000000000000000000000000000..9cecc1d4669ee8af2ca727a5d8cde10cd8b2d7cc --- /dev/null +++ b/workloads/realworld/pinned/darknet/LICENSE.gpl @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + {one line to give the program's name and a brief idea of what it does.} + Copyright (C) {year} {name of author} + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + {project} Copyright (C) {year} {fullname} + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/workloads/realworld/pinned/darknet/LICENSE.meta b/workloads/realworld/pinned/darknet/LICENSE.meta new file mode 100644 index 0000000000000000000000000000000000000000..6728bd28d319c68ae04944fb034118dcc4c9aa09 --- /dev/null +++ b/workloads/realworld/pinned/darknet/LICENSE.meta @@ -0,0 +1,8 @@ + META-LICENSE + Version 1, June 21 2017 + +Any and all licenses may be applied to the software either individually +or in concert. Any issues, ambiguities, paradoxes, or metaphysical quandries +arising from this combination should be discussed with a local faith leader, +hermit, or guru. The Oxford comma shall be used. + diff --git a/workloads/realworld/pinned/darknet/LICENSE.mit b/workloads/realworld/pinned/darknet/LICENSE.mit new file mode 100644 index 0000000000000000000000000000000000000000..5bd806ce16ea5053c8631793787362439375026e --- /dev/null +++ b/workloads/realworld/pinned/darknet/LICENSE.mit @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2017 Joseph Redmon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/workloads/realworld/pinned/darknet/LICENSE.v1 b/workloads/realworld/pinned/darknet/LICENSE.v1 new file mode 100644 index 0000000000000000000000000000000000000000..5b8709acc43e7b76ed69758a52a9eaffaba775e6 --- /dev/null +++ b/workloads/realworld/pinned/darknet/LICENSE.v1 @@ -0,0 +1,13 @@ + YOLO LICENSE + Version 1, July 10 2015 + +THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER +SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN +TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES +LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY SUBJECT TO +THE FOLLOWING CONDITIONS: + +1. #yolo +2. #swag +3. #blazeit + diff --git a/workloads/realworld/pinned/darknet/Makefile b/workloads/realworld/pinned/darknet/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..5022f68377d7626ab768f2501883d72b013dafba --- /dev/null +++ b/workloads/realworld/pinned/darknet/Makefile @@ -0,0 +1,114 @@ +GPU=1 +CUDNN=0 +OPENCV=0 +OPENMP=0 +DEBUG=0 + +#ARCH= -gencode arch=compute_30,code=sm_30 \ +# -gencode arch=compute_35,code=sm_35 \ +# -gencode arch=compute_50,code=[sm_50,compute_50] \ +# -gencode arch=compute_52,code=[sm_52,compute_52] +# -gencode arch=compute_20,code=[sm_20,sm_21] \ This one is deprecated? + +# This is what I use, uncomment if you know your arch and want to specify +ARCH= -gencode arch=compute_80,code=sm_80 \ +#ARCH= -arch=sm_80 + +VPATH=./src/:./examples:$(CUPTI_ADD_COMMON) +SLIB=libdarknet.so +ALIB=libdarknet.a +EXEC=darknet +OBJDIR=./obj/ + +CC=gcc +CPP=g++ +# NVCC=nvcc --default-stream per-thread +NVCC=nvcc +AR=ar +ARFLAGS=rcs +OPTS=-Ofast +LDFLAGS= -lm -pthread +COMMON= -Iinclude/ -Isrc/ +CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC +ifeq ($(PROFILE), 1) +CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -DPROFILE +endif + + +ifeq ($(OPENMP), 1) +CFLAGS+= -fopenmp +endif + +ifeq ($(DEBUG), 1) +OPTS=-O0 -g +endif + +CFLAGS+=$(OPTS) + +ifeq ($(OPENCV), 1) +COMMON+= -DOPENCV +CFLAGS+= -DOPENCV +LDFLAGS+= `pkg-config --libs opencv` -lstdc++ +COMMON+= `pkg-config --cflags opencv` +endif + +ifeq ($(GPU), 1) +include ../../../common/make.config +COMMON+= -DGPU -I$(CUDA_DIR)/include/ -I$(CUDA_DIR)/extras/CUPTI/include/ +CFLAGS+= -DGPU +LDFLAGS+= -L$(CUDA_DIR)/lib64 -L$(CUDA_DIR)/extras/CUPTI/lib64/ -lcuda -lcudart -lcublas -lcurand -lcupti +endif + + +ifeq ($(CUDNN), 1) +COMMON+= -DCUDNN +CFLAGS+= -DCUDNN +LDFLAGS+= -lcudnn +endif + +OBJ=gemm.o utils.o cuda_dark.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o lstm_layer.o l2norm_layer.o yolo_layer.o iseg_layer.o image_opencv.o +EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o instance-segmenter.o darknet.o +ifeq ($(GPU), 1) +LDFLAGS+= -lstdc++ +OBJ+=gemm_kernel.o convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o +endif + +# cpu_timestamps.o +# cupti_add.o + +EXECOBJ = $(addprefix $(OBJDIR), $(EXECOBJA)) +OBJS = $(addprefix $(OBJDIR), $(OBJ)) +DEPS = $(wildcard src/*.h) Makefile include/darknet.h + +all: obj backup results $(SLIB) $(ALIB) $(EXEC) + +$(EXEC): $(EXECOBJ) $(ALIB) + $(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS) $(ALIB) + +$(ALIB): $(OBJS) + $(AR) $(ARFLAGS) $@ $^ + +$(SLIB): $(OBJS) + $(CC) $(CFLAGS) -shared $^ -o $@ $(LDFLAGS) + +$(OBJDIR)%.o: %.cpp $(DEPS) + $(CPP) $(COMMON) $(CFLAGS) -c $< -o $@ + +$(OBJDIR)%.o: %.c $(DEPS) + $(CC) $(COMMON) $(CFLAGS) -c $< -o $@ + +$(OBJDIR)%.o: %.cu $(DEPS) + $(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@ + +obj: + mkdir -p obj +backup: + mkdir -p backup +results: + mkdir -p results + +.PHONY: clean + +clean: + rm -rf $(OBJS) $(SLIB) $(ALIB) $(EXEC) $(EXECOBJ) $(OBJDIR)/* + diff --git a/workloads/realworld/pinned/darknet/README.md b/workloads/realworld/pinned/darknet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fb58c2640038a963cd573d121e4fab59399f67dc --- /dev/null +++ b/workloads/realworld/pinned/darknet/README.md @@ -0,0 +1,124 @@ +![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png) + +# Darknet # +Darknet is an open source neural network framework written in C and CUDA. It is fast, easy to install, and supports CPU and GPU computation. + +**Discord** invite link for for communication and questions: https://discord.gg/zSq8rtW + +## YOLOv7: + +* **paper** - YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors: https://arxiv.org/abs/2207.02696 + +* **source code - Pytorch (use to reproduce results):** https://github.com/WongKinYiu/yolov7 + +---- + +Official YOLOv7 is more accurate and faster than YOLOv5 by **120%** FPS, than YOLOX by **180%** FPS, than Dual-Swin-T by **1200%** FPS, than ConvNext by **550%** FPS, than SWIN-L by **500%** FPS. + +YOLOv7 surpasses all known object detectors in both speed and accuracy in the range from 5 FPS to 160 FPS and has the highest accuracy 56.8% AP among all known real-time object detectors with 30 FPS or higher on GPU V100, batch=1. + +* YOLOv7-e6 (55.9% AP, 56 FPS V100 b=1) by `+500%` FPS faster than SWIN-L Cascade-Mask R-CNN (53.9% AP, 9.2 FPS A100 b=1) +* YOLOv7-e6 (55.9% AP, 56 FPS V100 b=1) by `+550%` FPS faster than ConvNeXt-XL C-M-RCNN (55.2% AP, 8.6 FPS A100 b=1) +* YOLOv7-w6 (54.6% AP, 84 FPS V100 b=1) by `+120%` FPS faster than YOLOv5-X6-r6.1 (55.0% AP, 38 FPS V100 b=1) +* YOLOv7-w6 (54.6% AP, 84 FPS V100 b=1) by `+1200%` FPS faster than Dual-Swin-T C-M-RCNN (53.6% AP, 6.5 FPS V100 b=1) +* YOLOv7x (52.9% AP, 114 FPS V100 b=1) by `+150%` FPS faster than PPYOLOE-X (51.9% AP, 45 FPS V100 b=1) +* YOLOv7 (51.2% AP, 161 FPS V100 b=1) by `+180%` FPS faster than YOLOX-X (51.1% AP, 58 FPS V100 b=1) + +---- + +![more5](https://user-images.githubusercontent.com/4096485/179425274-f55a36d4-8450-4471-816b-8c105841effd.jpg) + +---- + +![image](https://user-images.githubusercontent.com/4096485/177675030-a929ee00-0eba-4d93-95c2-225231d0fd61.png) + + +---- + +![yolov7_640_1280](https://user-images.githubusercontent.com/4096485/177688869-d75e0c36-63af-46ec-bdbd-81dbb281f257.png) + +---- + +## Scaled-YOLOv4: + +* **paper (CVPR 2021)**: https://openaccess.thecvf.com/content/CVPR2021/html/Wang_Scaled-YOLOv4_Scaling_Cross_Stage_Partial_Network_CVPR_2021_paper.html + +* **source code - Pytorch (use to reproduce results):** https://github.com/WongKinYiu/ScaledYOLOv4 + +* **source code - Darknet:** https://github.com/AlexeyAB/darknet + +* **Medium:** https://alexeyab84.medium.com/scaled-yolo-v4-is-the-best-neural-network-for-object-detection-on-ms-coco-dataset-39dfa22fa982?source=friends_link&sk=c8553bfed861b1a7932f739d26f487c8 + +## YOLOv4: + +* **paper:** https://arxiv.org/abs/2004.10934 + +* **source code:** https://github.com/AlexeyAB/darknet + +* **Wiki:** https://github.com/AlexeyAB/darknet/wiki + +* **useful links:** https://medium.com/@alexeyab84/yolov4-the-most-accurate-real-time-neural-network-on-ms-coco-dataset-73adfd3602fe?source=friends_link&sk=6039748846bbcf1d960c3061542591d7 + +For more information see the [Darknet project website](http://pjreddie.com/darknet). + + +
Expand + +![yolo_progress](https://user-images.githubusercontent.com/4096485/146988929-1ed0cbec-1e01-4ad0-b42c-808dcef32994.png) https://paperswithcode.com/sota/object-detection-on-coco + +---- + +![scaled_yolov4](https://user-images.githubusercontent.com/4096485/112776361-281d8380-9048-11eb-8083-8728b12dcd55.png) AP50:95 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2011.08036 + +---- + +![YOLOv4Tiny](https://user-images.githubusercontent.com/4096485/101363015-e5c21200-38b1-11eb-986f-b3e516e05977.png) + +---- + +![YOLOv4](https://user-images.githubusercontent.com/4096485/90338826-06114c80-dff5-11ea-9ba2-8eb63a7409b3.png) + +
+ +---- + +![OpenCV_TRT](https://user-images.githubusercontent.com/4096485/90338805-e5e18d80-dff4-11ea-8a68-5710956256ff.png) + + +## Citation + + +``` +@misc{https://doi.org/10.48550/arxiv.2207.02696, + doi = {10.48550/ARXIV.2207.02696}, + url = {https://arxiv.org/abs/2207.02696}, + author = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors}, + publisher = {arXiv}, + year = {2022}, + copyright = {arXiv.org perpetual, non-exclusive license} +} +``` + +``` +@misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +``` +@InProceedings{Wang_2021_CVPR, + author = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + title = {{Scaled-YOLOv4}: Scaling Cross Stage Partial Network}, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2021}, + pages = {13029-13038} +} +``` diff --git a/workloads/realworld/pinned/darknet/cfg/alexnet.cfg b/workloads/realworld/pinned/darknet/cfg/alexnet.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e2ed4bb8e7b1bad7859aef0d802cb4084753cb7a --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/alexnet.cfg @@ -0,0 +1,96 @@ +[net] +# Training +# batch=128 +# subdivisions=1 +# Testing +batch=1 +subdivisions=1 +height=227 +width=227 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=256 + +learning_rate=0.01 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue = .1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +filters=96 +size=11 +stride=4 +pad=0 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[convolutional] +filters=256 +size=5 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[convolutional] +filters=384 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=384 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=1000 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/cifar.cfg b/workloads/realworld/pinned/darknet/cfg/cifar.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b2f69f53903e55c24718ed12d9adaaa1557e3647 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/cifar.cfg @@ -0,0 +1,121 @@ +[net] +batch=128 +subdivisions=1 +height=28 +width=28 +channels=3 +max_crop=32 +min_crop=32 + +hue=.1 +saturation=.75 +exposure=.75 + +learning_rate=0.4 +policy=poly +power=4 +max_batches = 5000 +momentum=0.9 +decay=0.0005 + + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[dropout] +probability=.5 + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 diff --git a/workloads/realworld/pinned/darknet/cfg/cifar.data b/workloads/realworld/pinned/darknet/cfg/cifar.data new file mode 100644 index 0000000000000000000000000000000000000000..a52208db1b203b5e1f24d5afaf8c7002adfd71a3 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/cifar.data @@ -0,0 +1,7 @@ +classes=10 +train = data/cifar/train.list +valid = data/cifar/test.list +test = data/cifar/test.list +labels = data/cifar/labels.txt +backup = backup/ +top=2 diff --git a/workloads/realworld/pinned/darknet/cfg/cifar.test.cfg b/workloads/realworld/pinned/darknet/cfg/cifar.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..18b6c54c909152b1201d6320b85fafc5c36ba1ef --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/cifar.test.cfg @@ -0,0 +1,117 @@ +[net] +batch=128 +subdivisions=1 +height=32 +width=32 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.4 +policy=poly +power=4 +max_batches = 50000 + + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[dropout] +probability=.5 + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 +temperature=3 + diff --git a/workloads/realworld/pinned/darknet/cfg/cifar_small.cfg b/workloads/realworld/pinned/darknet/cfg/cifar_small.cfg new file mode 100644 index 0000000000000000000000000000000000000000..d48b1231f0131faaa187b18df6705411c3d16a76 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/cifar_small.cfg @@ -0,0 +1,86 @@ +[net] +batch=128 +subdivisions=1 +height=28 +width=28 +channels=3 +max_crop=32 +min_crop=32 + +hue=.1 +saturation=.75 +exposure=.75 + +learning_rate=0.1 +policy=poly +power=4 +max_batches = 5000 +momentum=0.9 +decay=0.0005 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] diff --git a/workloads/realworld/pinned/darknet/cfg/coco.data b/workloads/realworld/pinned/darknet/cfg/coco.data new file mode 100644 index 0000000000000000000000000000000000000000..5951d5245a7895e8418bc3155de3b03049e76c42 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/coco.data @@ -0,0 +1,6 @@ +classes= 80 +train = /data/darknet/coco/valid.list +valid = /data/darknet/coco/valid.list +backup = /data/darknet/backup/ +names = /data/darknet/coco/coco.names +eval=coco diff --git a/workloads/realworld/pinned/darknet/cfg/coco.names b/workloads/realworld/pinned/darknet/cfg/coco.names new file mode 100644 index 0000000000000000000000000000000000000000..16315f2becec9705017bfaf1b9fb81ca2a83c0b0 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/coco.names @@ -0,0 +1,80 @@ +person +bicycle +car +motorbike +aeroplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +sofa +pottedplant +bed +diningtable +toilet +tvmonitor +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush \ No newline at end of file diff --git a/workloads/realworld/pinned/darknet/cfg/combine9k.data b/workloads/realworld/pinned/darknet/cfg/combine9k.data new file mode 100644 index 0000000000000000000000000000000000000000..06a3e76aefac9c1074c3dfe159bc115a92b0791e --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/combine9k.data @@ -0,0 +1,10 @@ +classes= 9418 +#train = /home/pjreddie/data/coco/trainvalno5k.txt +train = data/combine9k.train.list +valid = /home/pjreddie/data/imagenet/det.val.files +labels = data/9k.labels +names = data/9k.names +backup = backup/ +map = data/inet9k.map +eval = imagenet +results = results diff --git a/workloads/realworld/pinned/darknet/cfg/darknet.cfg b/workloads/realworld/pinned/darknet/cfg/darknet.cfg new file mode 100644 index 0000000000000000000000000000000000000000..375107f7c196baf7adf229a7cfffc84739875828 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/darknet.cfg @@ -0,0 +1,120 @@ +[net] +# Training +# batch=128 +# subdivisions=1 +# Testing +batch=1 +subdivisions=1 +height=256 +width=256 +min_crop=128 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/darknet19.cfg b/workloads/realworld/pinned/darknet/cfg/darknet19.cfg new file mode 100644 index 0000000000000000000000000000000000000000..28ac9669ef686b4d638a5bf462451962fec45a4e --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/darknet19.cfg @@ -0,0 +1,205 @@ +[net] +# Training +#batch=128 +#subdivisions=2 + +# Testing + batch=1 + subdivisions=1 + +height=256 +width=256 +min_crop=128 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/darknet19_448.cfg b/workloads/realworld/pinned/darknet/cfg/darknet19_448.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c6df7306d3ef0622e0a0e0cbd6a5603699344e56 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/darknet19_448.cfg @@ -0,0 +1,197 @@ +[net] +batch=128 +subdivisions=4 +height=448 +width=448 +max_crop=512 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 + +angle=7 +hue = .1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/darknet53.cfg b/workloads/realworld/pinned/darknet/cfg/darknet53.cfg new file mode 100644 index 0000000000000000000000000000000000000000..7b6d5766e9ec48ee19a74321583b44621c1e07b3 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/darknet53.cfg @@ -0,0 +1,566 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/darknet53_448.cfg b/workloads/realworld/pinned/darknet/cfg/darknet53_448.cfg new file mode 100644 index 0000000000000000000000000000000000000000..dedab1b97c7e5d4226f061e6c983046d7a278dd0 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/darknet53_448.cfg @@ -0,0 +1,559 @@ +[net] +# Training - start training with darknet53.weights +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=448 +width=448 +channels=3 +min_crop=448 +max_crop=512 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 +momentum=0.9 +decay=0.0005 + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/darknet9000.cfg b/workloads/realworld/pinned/darknet/cfg/darknet9000.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9dd2dfbbf5a7137faada4e091b8e6d48233f09bf --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/darknet9000.cfg @@ -0,0 +1,205 @@ +[net] +# Training +# batch=128 +# subdivisions=4 +# Testing +batch = 1 +subdivisions = 1 +height=448 +width=448 +max_crop=512 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=9418 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 +tree=data/9k.tree + +[cost] +type=masked + diff --git a/workloads/realworld/pinned/darknet/cfg/densenet201.cfg b/workloads/realworld/pinned/darknet/cfg/densenet201.cfg new file mode 100644 index 0000000000000000000000000000000000000000..65b4aecc52d45075f2913e3d63b9aec0527fa44c --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/densenet201.cfg @@ -0,0 +1,1951 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/extraction.cfg b/workloads/realworld/pinned/darknet/cfg/extraction.cfg new file mode 100644 index 0000000000000000000000000000000000000000..66cb15f80e9a5e811223299594882a3b5d9485dc --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/extraction.cfg @@ -0,0 +1,209 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=224 +width=224 +max_crop=320 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/extraction.conv.cfg b/workloads/realworld/pinned/darknet/cfg/extraction.conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..2a7d09ec80fa2f47e1ebb4134b7845d5cae2b828 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/extraction.conv.cfg @@ -0,0 +1,179 @@ +[net] +batch=1 +subdivisions=1 +height=256 +width=256 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.5 +policy=poly +power=6 +max_batches=500000 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[connected] +output=1000 +activation=leaky + +[softmax] +groups=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/extraction22k.cfg b/workloads/realworld/pinned/darknet/cfg/extraction22k.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b5f54090d00537fdca72f54bb2eed69dd78f5b00 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/extraction22k.cfg @@ -0,0 +1,206 @@ +[net] +batch=128 +subdivisions=1 +height=224 +width=224 +max_crop=320 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +max_batches = 0 +policy=steps +steps=444000,590000,970000 +scales=.5,.2,.1 + +#policy=sigmoid +#gamma=.00008 +#step=100000 +#max_batches=200000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[connected] +output=21842 +activation=leaky + +[softmax] +groups=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/go.cfg b/workloads/realworld/pinned/darknet/cfg/go.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c730092ff3ffda0124baeace050bd382c442d88d --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/go.cfg @@ -0,0 +1,132 @@ +[net] +batch=512 +subdivisions=1 +height=19 +width=19 +channels=1 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=10000000 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=1 +size=1 +stride=1 +pad=1 +activation=linear + +[reorg] +extra=1 +stride=1 + +[softmax] + diff --git a/workloads/realworld/pinned/darknet/cfg/go.test.cfg b/workloads/realworld/pinned/darknet/cfg/go.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..1e4e43809bf3ede20a67b5020fcca0f61612e8f7 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/go.test.cfg @@ -0,0 +1,132 @@ +[net] +batch=1 +subdivisions=1 +height=19 +width=19 +channels=1 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +policy=poly +power=4 +max_batches=100000 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=1 +size=1 +stride=1 +pad=1 +activation=linear + +[reorg] +extra=1 +stride=1 + +[softmax] + + diff --git a/workloads/realworld/pinned/darknet/cfg/gru.cfg b/workloads/realworld/pinned/darknet/cfg/gru.cfg new file mode 100644 index 0000000000000000000000000000000000000000..6064221289d41dc3ee464a715ae05593a02f34f4 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/gru.cfg @@ -0,0 +1,29 @@ +[net] +inputs=256 +momentum=0.9 +decay=0.0 +subdivisions=1 +batch = 1 +time_steps=1 +learning_rate=.002 +adam=1 + +policy=constant +power=4 +max_batches=1000000 + +[gru] +output = 256 + +[gru] +output = 256 + +[gru] +output = 256 + +[connected] +output=256 +activation=linear + +[softmax] + diff --git a/workloads/realworld/pinned/darknet/cfg/imagenet.labels.list b/workloads/realworld/pinned/darknet/cfg/imagenet.labels.list new file mode 100644 index 0000000000000000000000000000000000000000..e73d41762d311df7f7eefec0f65ab12a7bacc023 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/imagenet.labels.list @@ -0,0 +1,21842 @@ +n02119789 +n02100735 +n02110185 +n02096294 +n02102040 +n02066245 +n02509815 +n02124075 +n02417914 +n02123394 +n02125311 +n02423022 +n02346627 +n02077923 +n02110063 +n02447366 +n02109047 +n02089867 +n02102177 +n02091134 +n02092002 +n02071294 +n02442845 +n02504458 +n02092339 +n02098105 +n02096437 +n02114712 +n02105641 +n02128925 +n02091635 +n02088466 +n02096051 +n02117135 +n02138441 +n02097130 +n02493509 +n02457408 +n02389026 +n02443484 +n02110341 +n02089078 +n02086910 +n02445715 +n02093256 +n02113978 +n02106382 +n02441942 +n02113712 +n02113186 +n02105162 +n02415577 +n02356798 +n02488702 +n02123159 +n02098413 +n02422699 +n02114855 +n02094433 +n02111277 +n02132136 +n02119022 +n02091467 +n02106550 +n02422106 +n02091831 +n02120505 +n02104365 +n02086079 +n02112706 +n02098286 +n02095889 +n02484975 +n02137549 +n02500267 +n02129604 +n02090721 +n02396427 +n02108000 +n02391049 +n02412080 +n02108915 +n02480495 +n02110806 +n02128385 +n02107683 +n02085936 +n02094114 +n02087046 +n02100583 +n02096177 +n02494079 +n02105056 +n02101556 +n02123597 +n02481823 +n02105505 +n02088094 +n02085782 +n02489166 +n02364673 +n02114548 +n02134084 +n02480855 +n02090622 +n02113624 +n02093859 +n02403003 +n02097298 +n02108551 +n02493793 +n02107142 +n02096585 +n02107574 +n02107908 +n02086240 +n02102973 +n02112018 +n02093647 +n02397096 +n02437312 +n02483708 +n02097047 +n02106030 +n02099601 +n02093991 +n02110627 +n02106166 +n02326432 +n02108089 +n02097658 +n02088364 +n02111129 +n02100236 +n02486261 +n02115913 +n02486410 +n02487347 +n02099849 +n02108422 +n02104029 +n02492035 +n02110958 +n02099429 +n02094258 +n02099267 +n02395406 +n02112350 +n02109961 +n02101388 +n02113799 +n02095570 +n02128757 +n02101006 +n02115641 +n02097209 +n02342885 +n02097474 +n02120079 +n02095314 +n02088238 +n02408429 +n02133161 +n02328150 +n02410509 +n02492660 +n02398521 +n02112137 +n02510455 +n02093428 +n02105855 +n02111500 +n02085620 +n02123045 +n02490219 +n02099712 +n02109525 +n02454379 +n02111889 +n02088632 +n02090379 +n02443114 +n02361337 +n02105412 +n02483362 +n02437616 +n02107312 +n02325366 +n02091032 +n02129165 +n02102318 +n02100877 +n02074367 +n02504013 +n02363005 +n02102480 +n02113023 +n02086646 +n02497673 +n02087394 +n02127052 +n02116738 +n02488291 +n02091244 +n02114367 +n02130308 +n02089973 +n02105251 +n02134418 +n02093754 +n02106662 +n02444819 +n01882714 +n01871265 +n01872401 +n01877812 +n01873310 +n01883070 +n04086273 +n04507155 +n04147183 +n04254680 +n02672831 +n02219486 +n02317335 +n01968897 +n03452741 +n03642806 +n07745940 +n02690373 +n04552348 +n02692877 +n02782093 +n04266014 +n03344393 +n03447447 +n04273569 +n03662601 +n02951358 +n04612504 +n02981792 +n04483307 +n03095699 +n03673027 +n03947888 +n02687172 +n04347754 +n04606251 +n03478589 +n04389033 +n03773504 +n02860847 +n03218198 +n02835271 +n03792782 +n03393912 +n03895866 +n02797295 +n04204347 +n03791053 +n03384352 +n03272562 +n04310018 +n02704792 +n02701002 +n02814533 +n02930766 +n03100240 +n03594945 +n03670208 +n03770679 +n03777568 +n04037443 +n04285008 +n03444034 +n03445924 +n03785016 +n04252225 +n03345487 +n03417042 +n03930630 +n04461696 +n04467665 +n03796401 +n03977966 +n04065272 +n04335435 +n04252077 +n04465501 +n03776460 +n04482393 +n04509417 +n03538406 +n03599486 +n03868242 +n02804414 +n03125729 +n03131574 +n03388549 +n02870880 +n03018349 +n03742115 +n03016953 +n04380533 +n03337140 +n03891251 +n02791124 +n04429376 +n03376595 +n04099969 +n04344873 +n04447861 +n03179701 +n03982430 +n03201208 +n03290653 +n04550184 +n07742313 +n07747607 +n07749582 +n07753113 +n07753275 +n07753592 +n07754684 +n07760859 +n07768694 +n12267677 +n12620546 +n13133613 +n11879895 +n12144580 +n12768682 +n03854065 +n04515003 +n03017168 +n03249569 +n03447721 +n03720891 +n03721384 +n04311174 +n02787622 +n02992211 +n04536866 +n03495258 +n02676566 +n03272010 +n03110669 +n03394916 +n04487394 +n03494278 +n03840681 +n03884397 +n02804610 +n03838899 +n04141076 +n03372029 +n11939491 +n12057211 +n09246464 +n09468604 +n09193705 +n09472597 +n09399592 +n09421951 +n09256479 +n09332890 +n09428293 +n09288635 +n03498962 +n03041632 +n03658185 +n03954731 +n03995372 +n03649909 +n03481172 +n03109150 +n02951585 +n03970156 +n04154565 +n04208210 +n03967562 +n03000684 +n01514668 +n01514859 +n01518878 +n01530575 +n01531178 +n01532829 +n01534433 +n01537544 +n01558993 +n01560419 +n01580077 +n01582220 +n01592084 +n01601694 +n01608432 +n01614925 +n01616318 +n01622779 +n01795545 +n01796340 +n01797886 +n01798484 +n01806143 +n01806567 +n01807496 +n01817953 +n01818515 +n01819313 +n01820546 +n01824575 +n01828970 +n01829413 +n01833805 +n01843065 +n01843383 +n01847000 +n01855032 +n01855672 +n01860187 +n02002556 +n02002724 +n02006656 +n02007558 +n02009912 +n02009229 +n02011460 +n02012849 +n02013706 +n02018207 +n02018795 +n02025239 +n02027492 +n02028035 +n02033041 +n02037110 +n02017213 +n02051845 +n02056570 +n02058221 +n01484850 +n01491361 +n01494475 +n01496331 +n01498041 +n02514041 +n02536864 +n01440764 +n01443537 +n02526121 +n02606052 +n02607072 +n02643566 +n02655020 +n02640242 +n02641379 +n01664065 +n01665541 +n01667114 +n01667778 +n01669191 +n01675722 +n01677366 +n01682714 +n01685808 +n01687978 +n01688243 +n01689811 +n01692333 +n01693334 +n01694178 +n01695060 +n01704323 +n01697457 +n01698640 +n01728572 +n01728920 +n01729322 +n01729977 +n01734418 +n01735189 +n01737021 +n01739381 +n01740131 +n01742172 +n01744401 +n01748264 +n01749939 +n01751748 +n01753488 +n01755581 +n01756291 +n01629819 +n01630670 +n01631663 +n01632458 +n01632777 +n01641577 +n01644373 +n01644900 +n04579432 +n04592741 +n03876231 +n03483316 +n03868863 +n04251144 +n03691459 +n03759954 +n04152593 +n03793489 +n03271574 +n03843555 +n04332243 +n04265275 +n04330267 +n03467068 +n02794156 +n04118776 +n03841143 +n04141975 +n02708093 +n03196217 +n04548280 +n03544143 +n04355338 +n03891332 +n04328186 +n03197337 +n04317175 +n04376876 +n03706229 +n02841315 +n04009552 +n04356056 +n03692522 +n04044716 +n02879718 +n02950826 +n02749479 +n04090263 +n04008634 +n03085013 +n04505470 +n03126707 +n03666591 +n02666196 +n02977058 +n04238763 +n03180011 +n03485407 +n03832673 +n06359193 +n03496892 +n04428191 +n04004767 +n04243546 +n04525305 +n04179913 +n03602883 +n04372370 +n03532672 +n02974003 +n03874293 +n03944341 +n03992509 +n03425413 +n02966193 +n04371774 +n04067472 +n04040759 +n04019541 +n03492542 +n04355933 +n03929660 +n02965783 +n04258138 +n04074963 +n03208938 +n02910353 +n03476684 +n03627232 +n03075370 +n03874599 +n03804744 +n04127249 +n04153751 +n03803284 +n04162706 +n04228054 +n02948072 +n03590841 +n04286575 +n04456115 +n03814639 +n03933933 +n04485082 +n03733131 +n03794056 +n04275548 +n01768244 +n01770081 +n01770393 +n01773157 +n01773549 +n01773797 +n01774384 +n01774750 +n01775062 +n01776313 +n01784675 +n01990800 +n01978287 +n01978455 +n01980166 +n01981276 +n01983481 +n01984695 +n01985128 +n01986214 +n02165105 +n02165456 +n02167151 +n02168699 +n02169497 +n02172182 +n02174001 +n02177972 +n02190166 +n02206856 +n02226429 +n02229544 +n02231487 +n02233338 +n02236044 +n02256656 +n02259212 +n02264363 +n02268443 +n02268853 +n02276258 +n02277742 +n02279972 +n02280649 +n02281406 +n02281787 +n01910747 +n01914609 +n01917289 +n01924916 +n01930112 +n01943899 +n01944390 +n01945685 +n01950731 +n01955084 +n02319095 +n02321529 +n03584829 +n03297495 +n03761084 +n03259280 +n04111531 +n04442312 +n04542943 +n04517823 +n03207941 +n04070727 +n04554684 +n03133878 +n03400231 +n04596742 +n02939185 +n03063689 +n04398044 +n04270147 +n02699494 +n04486054 +n03899768 +n04311004 +n04366367 +n04532670 +n02793495 +n03457902 +n03877845 +n03781244 +n03661043 +n02727426 +n02859443 +n03028079 +n03788195 +n04346328 +n03956157 +n04081281 +n03032252 +n03529860 +n03697007 +n03065424 +n03837869 +n04458633 +n02980441 +n04005630 +n03461385 +n02776631 +n02791270 +n02871525 +n02927161 +n03089624 +n04200800 +n04443257 +n04462240 +n03388043 +n03042490 +n04613696 +n03216828 +n02892201 +n03743016 +n02788148 +n02894605 +n03160309 +n03000134 +n03930313 +n04604644 +n04326547 +n03459775 +n04239074 +n04501370 +n03792972 +n04149813 +n03530642 +n03961711 +n03903868 +n02814860 +n07711569 +n07720875 +n07714571 +n07714990 +n07715103 +n07716358 +n07716906 +n07717410 +n07717556 +n07718472 +n07718747 +n07730033 +n07734744 +n04209239 +n03594734 +n02971356 +n03485794 +n04133789 +n02747177 +n04125021 +n07579787 +n03814906 +n03134739 +n03404251 +n04423845 +n03877472 +n04120489 +n03062245 +n03014705 +n03717622 +n03777754 +n04493381 +n04476259 +n02777292 +n07693725 +n03998194 +n03617480 +n07590611 +n04579145 +n03623198 +n07248320 +n04277352 +n04229816 +n02823428 +n03127747 +n02877765 +n04435653 +n03724870 +n03710637 +n03920288 +n03379051 +n02807133 +n04399382 +n03527444 +n03983396 +n03924679 +n04532106 +n06785654 +n03445777 +n07613480 +n04350905 +n04562935 +n03325584 +n03045698 +n07892512 +n03250847 +n04192698 +n03026506 +n03534580 +n07565083 +n04296562 +n02869837 +n07871810 +n02799071 +n03314780 +n04141327 +n04357314 +n02823750 +n13052670 +n07583066 +n03637318 +n04599235 +n07802026 +n02883205 +n03709823 +n04560804 +n02909870 +n03207743 +n04263257 +n07932039 +n03786901 +n04479046 +n03873416 +n02999410 +n04367480 +n03775546 +n07875152 +n04591713 +n04201297 +n02916936 +n03240683 +n02840245 +n02963159 +n04370456 +n03991062 +n02843684 +n03482405 +n03942813 +n03908618 +n03902125 +n07584110 +n02730930 +n04023962 +n02769748 +n10148035 +n02817516 +n03908714 +n02906734 +n03788365 +n02667093 +n03787032 +n03980874 +n03141823 +n03976467 +n04264628 +n07930864 +n04039381 +n06874185 +n04033901 +n04041544 +n07860988 +n03146219 +n03763968 +n03676483 +n04209133 +n03782006 +n03857828 +n03775071 +n02892767 +n07684084 +n04522168 +n03764736 +n04118538 +n03887697 +n13044778 +n03291819 +n03770439 +n03124170 +n04487081 +n03916031 +n02808440 +n07697537 +n12985857 +n02917067 +n03938244 +n15075141 +n02978881 +n02966687 +n03633091 +n13040303 +n03690938 +n03476991 +n02669723 +n03220513 +n03127925 +n04584207 +n07880968 +n03937543 +n03000247 +n04418357 +n04590129 +n02795169 +n04553703 +n02783161 +n02802426 +n02808304 +n03124043 +n03450230 +n04589890 +n12998815 +n02992529 +n03825788 +n02790996 +n03710193 +n03630383 +n03347037 +n03769881 +n03871628 +n03733281 +n03976657 +n03535780 +n04259630 +n03929855 +n04049303 +n04548362 +n02979186 +n06596364 +n03935335 +n06794110 +n02825657 +n03388183 +n04591157 +n04540053 +n03866082 +n04136333 +n04026417 +n02865351 +n02834397 +n03888257 +n04235860 +n04404412 +n04371430 +n03733805 +n07920052 +n07873807 +n02895154 +n04204238 +n04597913 +n04131690 +n07836838 +n09835506 +n03443371 +n13037406 +n04336792 +n04557648 +n03187595 +n04254120 +n03595614 +n04146614 +n03598930 +n03958227 +n04069434 +n03188531 +n02786058 +n07615774 +n04525038 +n04409515 +n03424325 +n03223299 +n03680355 +n07614500 +n07695742 +n04033995 +n03710721 +n04392985 +n03047690 +n03584254 +n13054560 +n10565667 +n03950228 +n03729826 +n02837789 +n04254777 +n02988304 +n03657121 +n04417672 +n04523525 +n02815834 +n09229709 +n07697313 +n03888605 +n03355925 +n03063599 +n04116512 +n04325704 +n07831146 +n03255030 +n00483313 +n02432291 +n02356381 +n02377388 +n04028764 +n04381587 +n02279257 +n04168199 +n00445055 +n02461128 +n03626760 +n04313503 +n00451635 +n02509515 +n04224842 +n09403734 +n02769290 +n13054073 +n03163222 +n00464478 +n03087069 +n04477219 +n03445617 +n00449054 +n00483705 +n04395106 +n03389611 +n04285965 +n04166281 +n04003856 +n03696301 +n00475787 +n04587404 +n09218641 +n02276355 +n03592669 +n04459909 +n04492375 +n09447666 +n00463543 +n04148703 +n04591517 +n03970546 +n04297750 +n02782778 +n02383231 +n03693474 +n02277094 +n03766044 +n02056228 +n03394272 +n03047052 +n00434075 +n04185946 +n02411999 +n03858418 +n12833149 +n02836035 +n03108853 +n04587559 +n04138261 +n02278024 +n03063485 +n02774921 +n09475044 +n02811204 +n03329302 +n04026813 +n03986562 +n03379204 +n03426134 +n02790669 +n03487090 +n03548402 +n08614632 +n04054361 +n03421485 +n03302671 +n03098959 +n02970408 +n03772584 +n03064935 +n09415584 +n11715430 +n12024445 +n02710201 +n03475581 +n13142504 +n03396074 +n03211789 +n03914337 +n03678558 +n03233123 +n00453396 +n00454395 +n00440382 +n04289027 +n00445226 +n11953610 +n04128413 +n00480211 +n00470966 +n12547503 +n03085219 +n02275773 +n02692086 +n04257790 +n00448748 +n02686379 +n12328567 +n03432129 +n03859000 +n12091377 +n02124313 +n00442847 +n04603399 +n03114379 +n02920369 +n03818343 +n02946127 +n02978055 +n12914923 +n02705429 +n00448232 +n12882945 +n04289690 +n07606669 +n02056728 +n11848479 +n03046921 +n12282933 +n02867966 +n12821505 +n02812949 +n04545305 +n02699770 +n04395651 +n02900160 +n04099003 +n02054711 +n12606545 +n03356858 +n01859190 +n03643737 +n02962200 +n03123553 +n09361517 +n02793089 +n00449517 +n02783994 +n10117851 +n12038585 +n04383839 +n10142391 +n07719213 +n03536122 +n02472987 +n03454536 +n11728099 +n02392824 +n03795758 +n04282872 +n00448872 +n02404432 +n03797182 +n03029197 +n03665924 +n12477163 +n02769963 +n03863262 +n01532325 +n04165409 +n04593077 +n04473108 +n03577090 +n09988063 +n00446804 +n03931765 +n00475014 +n02700064 +n03240892 +n12475242 +n11735053 +n04053508 +n02852173 +n02382750 +n03823111 +n04543772 +n04112147 +n04433585 +n03175189 +n03596543 +n00445685 +n03307792 +n04589593 +n01814217 +n02993368 +n04303497 +n02811350 +n03355768 +n03699591 +n04590553 +n01893825 +n12726670 +n09916348 +n11544015 +n01318894 +n02133704 +n02367492 +n04506289 +n02069974 +n01900150 +n03207835 +n03363549 +n02831595 +n04970470 +n04160847 +n03767203 +n03928814 +n02302969 +n02918595 +n10401331 +n04231272 +n03717447 +n03063968 +n03380724 +n00825773 +n09988493 +n02740300 +n04539794 +n04121511 +n01323599 +n12937130 +n02428508 +n02980036 +n12061380 +n01887787 +n04214046 +n01787835 +n00466630 +n02979290 +n03927091 +n03231368 +n03904657 +n04469003 +n04196502 +n02122948 +n04544325 +n07868340 +n13876561 +n11925898 +n12158443 +n01595450 +n12454705 +n02069412 +n09618957 +n02357111 +n00451563 +n04197110 +n02276902 +n03111296 +n03909020 +n12303083 +n02082791 +n01956764 +n04269822 +n04207343 +n02433318 +n01888181 +n12682668 +n01592387 +n09793141 +n00466273 +n04026180 +n06255081 +n12172364 +n10145590 +n12311579 +n12173912 +n03822171 +n03140292 +n03027625 +n02739427 +n02060133 +n02431785 +n03219010 +n00447957 +n11910271 +n03620967 +n12547215 +n02409508 +n04290079 +n12329260 +n13901858 +n02008497 +n10304914 +n04524142 +n04279462 +n04233124 +n09733793 +n12822115 +n09475179 +n10151760 +n03418618 +n12858397 +n07735510 +n03549473 +n10098245 +n03653583 +n10604380 +n03375575 +n03885293 +n01527347 +n03237340 +n02760658 +n11953038 +n03187268 +n03004275 +n02393161 +n11965218 +n08580944 +n03938725 +n03900979 +n04144241 +n03760310 +n02376679 +n03237992 +n09432283 +n02379908 +n09918554 +n04041747 +n12012111 +n10331167 +n01612122 +n10147935 +n07691539 +n11669786 +n09403427 +n01935395 +n09903501 +n04439585 +n04459018 +n02780704 +n03720163 +n12899752 +n04118635 +n03404149 +n02429456 +n00449168 +n04516354 +n04317833 +n12075299 +n07878647 +n09438940 +n03361550 +n02027357 +n04317976 +n03092883 +n04526964 +n03985069 +n03610682 +n04028581 +n02277268 +n09433839 +n03846431 +n03919289 +n10146104 +n10260706 +n02686227 +n03321103 +n00444846 +n01558307 +n01595168 +n03919096 +n11844892 +n04260364 +n02750070 +n03034244 +n03002096 +n04273972 +n11814584 +n04605321 +n07745466 +n02922798 +n03361380 +n12651229 +n08521623 +n04498389 +n00453313 +n04967882 +n12024690 +n03934656 +n02685082 +n04501550 +n09972458 +n03055418 +n07763629 +n03902756 +n09938449 +n09712696 +n02387346 +n03133415 +n07711080 +n03129753 +n03524150 +n02275560 +n03993053 +n03438661 +n11939180 +n00466524 +n11753355 +n03456024 +n03421324 +n07890540 +n11720643 +n02057035 +n00453126 +n04453037 +n01540832 +n03546235 +n03370387 +n02041875 +n02871439 +n03262072 +n01786646 +n02430830 +n02799175 +n05262422 +n03854722 +n12817694 +n04449966 +n01564773 +n02034971 +n03490119 +n02822579 +n07879953 +n04110178 +n04963588 +n04252653 +n01565078 +n02389128 +n02779435 +n10645017 +n04582205 +n08573842 +n10146002 +n03892178 +n03119396 +n03813078 +n07866868 +n03160740 +n03371875 +n02417387 +n03904782 +n03098688 +n02902687 +n01828556 +n04401680 +n04590933 +n01575401 +n07693048 +n02901114 +n03047941 +n04355511 +n11849871 +n10738111 +n03122073 +n12052787 +n01594004 +n01549886 +n02824058 +n03506184 +n11487732 +n12574866 +n12948053 +n10091450 +n00470554 +n00326094 +n12093329 +n04438897 +n07818995 +n12828791 +n13901321 +n10613996 +n10159533 +n02669295 +n02843158 +n06415688 +n14858292 +n09813219 +n12485653 +n03200231 +n02089468 +n03935234 +n01539925 +n12428076 +n10439373 +n01536644 +n02694662 +n02123242 +n03002711 +n03363749 +n02669534 +n03451798 +n11927215 +n02679257 +n09475925 +n10015485 +n12422129 +n03946162 +n02377291 +n07871720 +n12622297 +n12782915 +n01579260 +n11838916 +n10267311 +n12824053 +n03340723 +n02276749 +n04439712 +n02126139 +n04188179 +n02386853 +n07942152 +n02499316 +n04324387 +n10635788 +n04234887 +n12237641 +n03713436 +n04960582 +n04076713 +n01646292 +n03947798 +n02840134 +n04476972 +n09822830 +n03551395 +n04533802 +n02918964 +n00474657 +n12932966 +n01615458 +n01806364 +n12458550 +n11784497 +n03557360 +n10638922 +n09889941 +n10689306 +n03358172 +n04295571 +n06596607 +n11853356 +n00482122 +n11760785 +n03150232 +n11778257 +n03059685 +n10105733 +n04104384 +n07691237 +n04326676 +n07684938 +n12666965 +n04177820 +n13918387 +n03398153 +n03914438 +n09932098 +n02988486 +n02977619 +n03317788 +n03484487 +n02988679 +n04062428 +n02568087 +n12866162 +n04227144 +n07875436 +n04082886 +n11753700 +n00470682 +n02122298 +n10145239 +n12755727 +n04214282 +n01852671 +n02378969 +n04108822 +n10382825 +n12392549 +n03973839 +n12258885 +n11782761 +n12389501 +n02940570 +n03405595 +n02969323 +n03207630 +n10169147 +n03805725 +n09847543 +n02415253 +n07880080 +n04305572 +n02042180 +n07565161 +n02871147 +n04438507 +n04445154 +n07842433 +n12029635 +n09750282 +n09621232 +n01858906 +n02761206 +n03986355 +n12591351 +n13916721 +n02905036 +n11894770 +n02377603 +n12924623 +n03950899 +n09454153 +n10247358 +n05261310 +n11943660 +n10804287 +n03560430 +n01756089 +n10618342 +n04283378 +n13926786 +n04238321 +n04393549 +n04461879 +n03502200 +n00440941 +n03494706 +n04148579 +n13902336 +n02780815 +n10726031 +n04124098 +n12344483 +n04384910 +n07681450 +n02030837 +n04059157 +n09247410 +n02714751 +n08633683 +n04520784 +n10141732 +n12371439 +n04499062 +n02931148 +n07609632 +n04536335 +n02874537 +n03013438 +n11786539 +n11690455 +n07600696 +n00478262 +n00466712 +n03399677 +n12441183 +n07895962 +n11966083 +n02990373 +n04241249 +n02068541 +n12513933 +n02356977 +n04252560 +n04087826 +n03455488 +n07619409 +n09787534 +n03680942 +n00446980 +n12384839 +n03416900 +n07821758 +n11853813 +n01606522 +n11780148 +n04969242 +n12413880 +n04130257 +n01322604 +n03061211 +n01959492 +n02842573 +n04313628 +n03815149 +n02445394 +n08547544 +n03222176 +n04070003 +n03075768 +n09695979 +n02877266 +n08583292 +n02870676 +n03657511 +n01621635 +n04284341 +n04136161 +n02836174 +n10247880 +n01744100 +n02882894 +n03408444 +n03411079 +n02366959 +n04399158 +n04542715 +n02787435 +n04251701 +n13863020 +n07890226 +n12245319 +n12849952 +n11626826 +n00887544 +n03140431 +n03519387 +n03855604 +n07906111 +n02054036 +n11954161 +n03038281 +n00450998 +n12136392 +n02119477 +n04356925 +n02406647 +n04450133 +n12545635 +n01565599 +n02028900 +n07817024 +n02971167 +n04309049 +n02678897 +n12795555 +n11769803 +n01904886 +n02079851 +n12189987 +n04581829 +n12098403 +n01839330 +n12587803 +n03652932 +n08628141 +n03544238 +n04513827 +n01847806 +n03132076 +n10243137 +n03621377 +n10530959 +n14765422 +n04968139 +n12950314 +n02064816 +n02846511 +n10513823 +n11772408 +n03341297 +n03492922 +n03683606 +n02894337 +n02365480 +n09846755 +n03495039 +n01317813 +n12610328 +n02157206 +n01588002 +n03914831 +n03659686 +n10406765 +n09205509 +n02870526 +n07954211 +n10578471 +n11646694 +n03115762 +n07762913 +n12056758 +n12305986 +n11845913 +n02835915 +n02831237 +n07927512 +n12171098 +n02073831 +n07605040 +n02885462 +n02768114 +n04450994 +n11844371 +n03963645 +n02956699 +n02029378 +n01528396 +n10005934 +n04465666 +n04390977 +n11882074 +n03831382 +n04605163 +n06276501 +n02944075 +n05258051 +n07901457 +n12683571 +n02205219 +n13235503 +n02388735 +n03941231 +n14919819 +n12816508 +n11536673 +n13895262 +n02903204 +n10137825 +n07841345 +n07893253 +n01850192 +n07769731 +n11773987 +n03539678 +n12938193 +n10802507 +n03089879 +n00477392 +n01828096 +n09263912 +n13653902 +n04579667 +n01322983 +n08579352 +n07587023 +n07756951 +n07870167 +n10588357 +n01606809 +n13864035 +n02802544 +n07591961 +n02979399 +n04144539 +n02416820 +n11769176 +n09743792 +n09732170 +n04972451 +n13918274 +n01847089 +n01859689 +n04208065 +n07617051 +n10674713 +n07914271 +n07887461 +n03736064 +n03644858 +n03878963 +n04040247 +n07891433 +n01611969 +n07587618 +n02689144 +n10049363 +n04059516 +n10313239 +n03115400 +n01519563 +n01533893 +n03850245 +n11733548 +n03372549 +n01884834 +n02839110 +n07887192 +n03617312 +n07886463 +n03103396 +n07764847 +n01855476 +n07808587 +n12858871 +n03632729 +n10209731 +n04141712 +n03978686 +n03225988 +n00475273 +n09224725 +n04966543 +n01322221 +n03649674 +n13154494 +n03948830 +n03320519 +n03723267 +n07869611 +n12342498 +n01827793 +n03145719 +n11821184 +n11956348 +n11857875 +n10339717 +n09450163 +n10756148 +n01591301 +n07915094 +n04422727 +n09719309 +n03349469 +n03389889 +n10718131 +n04298661 +n09747495 +n03676623 +n03547229 +n03062015 +n10734394 +n07817315 +n02852360 +n01850553 +n02952585 +n03587205 +n02009750 +n01540090 +n02947660 +n03656957 +n03378174 +n02508213 +n01572489 +n12008487 +n12185859 +n11691046 +n01323355 +n05262534 +n00448126 +n02432983 +n12038406 +n03883385 +n02411206 +n01643896 +n10159045 +n11675025 +n01803362 +n02009508 +n07920349 +n04098513 +n11617272 +n09913455 +n12390314 +n04171208 +n02995345 +n10634849 +n03173929 +n02749953 +n11845793 +n12796022 +n11955153 +n11816829 +n03032453 +n11984542 +n02992795 +n03712111 +n02873733 +n02759387 +n14915184 +n02381364 +n12686274 +n07857731 +n04518764 +n03010473 +n02418465 +n02359556 +n07894799 +n04104770 +n04335209 +n01848976 +n02006063 +n04454908 +n03002948 +n04220250 +n09923561 +n04102162 +n11958080 +n04598965 +n10173410 +n03067339 +n02003204 +n12686676 +n11986511 +n02311617 +n03367059 +n02761557 +n05578095 +n04041069 +n10575463 +n03325941 +n10082043 +n01806297 +n09691729 +n04593866 +n01813088 +n01625562 +n03906224 +n01652026 +n10236304 +n04102618 +n04321453 +n07820145 +n01575117 +n12788854 +n07823698 +n04206225 +n03216710 +n02421449 +n03343737 +n07560903 +n02872529 +n11989869 +n12071744 +n06278475 +n04492749 +n02920259 +n03798061 +n02420509 +n03316105 +n12052447 +n03974915 +n02904803 +n03430418 +n12291959 +n06892775 +n03875806 +n07903841 +n10282482 +n02683323 +n07862348 +n01849157 +n04469813 +n09944022 +n03342127 +n07592481 +n02936402 +n02405929 +n10002760 +n02537716 +n05259914 +n01560280 +n12694486 +n07879350 +n02377063 +n03637181 +n03409297 +n01607812 +n02808185 +n09239302 +n12055516 +n09712448 +n02859184 +n12772908 +n02735538 +n10333838 +n12336092 +n02386968 +n04613939 +n00452864 +n04535524 +n03174731 +n04189816 +n07607605 +n12909917 +n02387722 +n02960690 +n07715221 +n02407071 +n10667477 +n09398076 +n04236809 +n01904806 +n01610552 +n12373100 +n12771390 +n04122685 +n07804771 +n15102455 +n03469175 +n03746005 +n02536456 +n03505667 +n11816336 +n09376198 +n10572706 +n03464053 +n02869155 +n07816164 +n04969798 +n02942349 +n14820180 +n01623615 +n12676703 +n03369276 +n03650551 +n02010272 +n02976123 +n01852400 +n02196119 +n04132158 +n03238586 +n07639069 +n03313333 +n10542761 +n12215022 +n00455173 +n10019406 +n12899537 +n04277826 +n09906449 +n04549629 +n11508382 +n15090065 +n10289462 +n04540255 +n02723165 +n04335693 +n01536334 +n03107488 +n12782530 +n14785065 +n02974348 +n09874862 +n04479939 +n03309465 +n09902954 +n12092417 +n03425595 +n12433081 +n07806774 +n12462805 +n01314781 +n10192839 +n01622120 +n07807171 +n03261019 +n02843553 +n04287747 +n02324587 +n09915434 +n01818299 +n01592694 +n03826186 +n03607659 +n01527917 +n03628511 +n02005399 +n04204081 +n02052775 +n04403413 +n03914106 +n12811027 +n01872772 +n04555700 +n02004855 +n04602762 +n02713003 +n04406817 +n11934807 +n03336282 +n09684901 +n03836976 +n11959862 +n03062336 +n03506028 +n04503413 +n07819896 +n03205669 +n11902200 +n07685218 +n03046133 +n10261624 +n10303814 +n03676087 +n04023695 +n07587111 +n07764155 +n01504179 +n03794136 +n03389761 +n13901211 +n02784124 +n04488530 +n02807731 +n07898443 +n04981658 +n04177755 +n03649161 +n04125257 +n10135129 +n03653110 +n10560106 +n07735687 +n03511333 +n11960245 +n03301568 +n03878066 +n10746931 +n04223299 +n04237423 +n07888229 +n01819734 +n12312728 +n09981939 +n03727465 +n13882276 +n02993194 +n11971927 +n09713108 +n03581125 +n09718936 +n14698884 +n03005285 +n03540914 +n03359436 +n03934042 +n07569644 +n04964878 +n07890068 +n07580253 +n01538630 +n03132666 +n03259009 +n02796318 +n12703190 +n01464844 +n11792029 +n04270371 +n13102775 +n02933649 +n02387254 +n02890188 +n04335886 +n04358491 +n02786837 +n03885194 +n04001265 +n03438071 +n10375402 +n02997910 +n03326795 +n00470830 +n02734725 +n03494537 +n08376250 +n07743544 +n02991847 +n04246271 +n04156140 +n04381073 +n07732168 +n04951071 +n07977870 +n04334599 +n02838728 +n03326948 +n11723227 +n08182379 +n03686924 +n03821518 +n02382204 +n02080415 +n11788727 +n07732636 +n03860404 +n03898395 +n07867324 +n04392113 +n13237188 +n03263076 +n07843636 +n04968056 +n04397027 +n03320421 +n06267564 +n02880842 +n04115456 +n13862407 +n10289039 +n03128248 +n01457852 +n01536035 +n04579056 +n03937931 +n03036022 +n01804163 +n09913593 +n12841007 +n03115897 +n03256032 +n02475669 +n07924443 +n03061505 +n10001481 +n03600722 +n07842308 +n10696508 +n04215402 +n10588074 +n03614782 +n03995535 +n12091953 +n04113194 +n10092978 +n03011741 +n04381860 +n07819769 +n07905474 +n03288500 +n04225987 +n13223710 +n02879087 +n02920083 +n08640739 +n03362890 +n03996849 +n03849814 +n09694664 +n02407390 +n02910864 +n02388917 +n01668665 +n07616046 +n02932891 +n10553235 +n03652729 +n01615703 +n12801781 +n12164656 +n05302499 +n03801760 +n03332271 +n02901793 +n03941417 +n09833441 +n01623110 +n02807523 +n10598181 +n03725600 +n10368528 +n04116098 +n12719944 +n02045864 +n02173373 +n02811059 +n04479823 +n07816398 +n10572889 +n04142731 +n07687381 +n02799323 +n07865484 +n01858845 +n12684379 +n01842235 +n09242389 +n02028727 +n03527565 +n03438863 +n15019030 +n13907272 +n09659039 +n04251791 +n03683995 +n04137217 +n04389430 +n09785659 +n02016816 +n03124590 +n01859325 +n03138669 +n02999936 +n11926365 +n12686077 +n03517760 +n09734450 +n04563413 +n12074867 +n01564217 +n12521394 +n06267893 +n03594148 +n04139395 +n12369309 +n01544389 +n12048056 +n04524941 +n03016868 +n03653740 +n02795528 +n03687137 +n03766935 +n03361297 +n04263502 +n10043491 +n03446268 +n01994910 +n03891538 +n10091564 +n10226413 +n02755140 +n03500389 +n10237196 +n03625646 +n06596474 +n03360300 +n09730824 +n10732010 +n04469514 +n02904927 +n04961331 +n02936570 +n03680858 +n07585758 +n09199101 +n04050933 +n03712337 +n03911513 +n01556182 +n03102371 +n07928887 +n12133462 +n03974070 +n03971218 +n03292475 +n03425241 +n03440216 +n11995092 +n02894158 +n02918112 +n10568358 +n11524451 +n03169176 +n04100519 +n07588193 +n06883725 +n02860640 +n07762114 +n04082710 +n07896893 +n10167152 +n03287351 +n02788021 +n08494231 +n01560935 +n03249342 +n04564581 +n09349648 +n07704205 +n03510244 +n12127460 +n09945745 +n11719286 +n11613459 +n12656369 +n03824381 +n07655263 +n09894143 +n04964001 +n02161457 +n07654298 +n07930433 +n02979074 +n02026948 +n13914608 +n07611267 +n02843276 +n09827363 +n10259780 +n04432662 +n11715678 +n12388858 +n03057920 +n10465451 +n03855214 +n07728181 +n09835348 +n03549732 +n04589325 +n03491032 +n00452034 +n03948242 +n01456756 +n07921615 +n02809105 +n12889713 +n07586894 +n07734879 +n07905979 +n12847374 +n12129134 +n02122580 +n04028074 +n02911332 +n09251407 +n07697825 +n04597309 +n02800213 +n03480579 +n07621618 +n04170933 +n03743279 +n01916481 +n04037220 +n10748620 +n02708433 +n12007196 +n02561381 +n04103769 +n03030880 +n04413969 +n03911658 +n04590746 +n00476389 +n04331639 +n07725789 +n01792429 +n02949542 +n07686720 +n04064862 +n04447028 +n01713764 +n09854218 +n04032603 +n04405907 +n15093298 +n04385536 +n11954345 +n01560793 +n09249034 +n03784270 +n03436549 +n01324610 +n02379183 +n07616487 +n04119478 +n03309356 +n12865037 +n12850168 +n04250850 +n03024064 +n04412097 +n02982515 +n00450070 +n10175248 +n11847169 +n12276872 +n12870891 +n10229883 +n10505613 +n03482252 +n09300905 +n02919890 +n07617611 +n10283170 +n01607962 +n01671125 +n07894551 +n04561287 +n00005787 +n10025635 +n02850732 +n03732020 +n02036711 +n07907429 +n03797896 +n03004824 +n12011620 +n10300303 +n03105467 +n03767745 +n07868508 +n07868200 +n03788047 +n07886057 +n04559451 +n09845401 +n04373704 +n02676938 +n02565324 +n02667478 +n02122878 +n03244047 +n01747589 +n04320973 +n13205058 +n02379430 +n11959632 +n10183931 +n07683490 +n10055410 +n04370288 +n03273551 +n13900422 +n07899434 +n04053677 +n07740461 +n11879722 +n04282494 +n02981911 +n03449451 +n07581249 +n03965456 +n11808468 +n13881644 +n11725973 +n12091213 +n13193856 +n02873520 +n02754656 +n02431976 +n01324431 +n02385214 +n01888411 +n12680864 +n07731284 +n04337287 +n07631926 +n02549248 +n04395024 +n07585557 +n02776825 +n09460046 +n12023108 +n00475403 +n10098517 +n07902336 +n03683708 +n02412210 +n04397452 +n04583212 +n13869547 +n03632577 +n01616086 +n02763901 +n08256735 +n03015478 +n02084732 +n12178896 +n11966215 +n07605380 +n13869788 +n01847170 +n07744811 +n01854700 +n00444937 +n10422405 +n07801892 +n09688804 +n11879054 +n02802215 +n07908411 +n07822518 +n01558594 +n07935737 +n10730728 +n04436329 +n04294879 +n04972350 +n12911440 +n13886260 +n07578093 +n02537525 +n03703730 +n09607630 +n13865904 +n02360282 +n11731659 +n04126066 +n04212165 +n11618290 +n07588574 +n09269472 +n11896722 +n02892304 +n03487642 +n02028342 +n03321563 +n03135030 +n03522100 +n03253886 +n04095109 +n06470073 +n12603449 +n10644598 +n10260800 +n01535469 +n09696456 +n03553019 +n03963198 +n11918473 +n10314517 +n03002341 +n07574923 +n10421470 +n05716342 +n03244231 +n01730563 +n11691857 +n12807251 +n12345899 +n03142679 +n01531512 +n12307240 +n07835457 +n04535370 +n00451186 +n12481458 +n03434188 +n09734185 +n04578934 +n04167346 +n02747802 +n03459328 +n03301940 +n01562014 +n07690431 +n10642596 +n03696065 +n12781940 +n02759257 +n04392764 +n04218564 +n03499907 +n01536780 +n09751895 +n03235042 +n04570815 +n12070381 +n09448690 +n07625061 +n10178216 +n04560113 +n09457979 +n03858085 +n02421792 +n02944579 +n10085869 +n09718811 +n04103206 +n04239786 +n04501947 +n01321123 +n02390015 +n03964495 +n01554448 +n02925107 +n03028596 +n12483625 +n03227317 +n10701644 +n11968704 +n03900393 +n01851038 +n02276078 +n03132776 +n07585906 +n04480033 +n07880458 +n12887293 +n07921239 +n03307037 +n04595028 +n04244379 +n13131028 +n10313724 +n09436708 +n02694045 +n09941787 +n00449796 +n01817346 +n07928696 +n03401279 +n12901724 +n11646167 +n07682477 +n09415671 +n07900225 +n03607029 +n02692232 +n11834654 +n07935379 +n12437930 +n03762434 +n07922764 +n03595523 +n04546340 +n10686885 +n03516844 +n03767112 +n09896685 +n03859608 +n03149686 +n07920872 +n12388143 +n10406391 +n04233715 +n04373089 +n02023992 +n01947396 +n12115180 +n00479616 +n03962852 +n02392434 +n12414035 +n14976871 +n03201776 +n10665587 +n03600285 +n04402449 +n08539072 +n03629231 +n12860365 +n03488438 +n03337383 +n12455950 +n10384392 +n02953455 +n03101796 +n07919572 +n03233744 +n01578180 +n01756508 +n04556533 +n02962843 +n02882190 +n03731483 +n01850873 +n05260240 +n03111177 +n09836519 +n03030557 +n11789066 +n02788572 +n07903101 +n04067818 +n07840804 +n01567678 +n12427184 +n03333610 +n02416964 +n10607291 +n07936548 +n05451384 +n02968074 +n07605597 +n02704949 +n07609215 +n01951274 +n07696977 +n03180384 +n04303357 +n03291741 +n02207805 +n10123844 +n03420345 +n12384227 +n02758863 +n02047975 +n03978966 +n03549199 +n04275175 +n09294877 +n09836343 +n11970586 +n02010728 +n10369317 +n12681893 +n03192543 +n12413165 +n12174521 +n11916696 +n10042845 +n07822197 +n04968749 +n10323634 +n12849416 +n02814774 +n05538625 +n03078802 +n12230794 +n07726095 +n03051249 +n12005656 +n11876432 +n12164881 +n09711435 +n01622483 +n09896170 +n07684289 +n03368352 +n07910048 +n03159535 +n00466377 +n01541386 +n11647703 +n09752023 +n07903731 +n12249542 +n03794798 +n11786131 +n02852043 +n10493685 +n09846894 +n01752585 +n01536186 +n07618432 +n09859152 +n02065026 +n02382635 +n07867616 +n03885788 +n04255586 +n03275681 +n11961100 +n12485981 +n04495698 +n03293741 +n13902048 +n03254862 +n07903962 +n01594787 +n11962272 +n03284886 +n07842202 +n10157128 +n02405302 +n04443766 +n06266633 +n02519862 +n01487506 +n03373943 +n04247876 +n04327204 +n03349771 +n09260907 +n10092794 +n12223764 +n03504723 +n11926833 +n01820052 +n13032381 +n03889871 +n03209359 +n04608923 +n15093137 +n15091304 +n03688405 +n09905185 +n03543112 +n11611356 +n03885028 +n03234164 +n07594066 +n02396014 +n03456186 +n09874725 +n11601333 +n02917521 +n03055857 +n02804123 +n12352844 +n12866002 +n09858165 +n12037691 +n02565072 +n04477387 +n02008643 +n07867021 +n04119360 +n09893191 +n02944146 +n12435649 +n13197274 +n04974859 +n07751004 +n12003696 +n02762508 +n02680512 +n01743086 +n06998748 +n10607478 +n07613815 +n01559477 +n01859852 +n03239054 +n04466871 +n05263183 +n13173882 +n07897438 +n12427757 +n04400737 +n03291963 +n07682808 +n11692265 +n04130143 +n09445289 +n07696839 +n03835197 +n12821895 +n09734639 +n03365374 +n04305210 +n04962240 +n09871867 +n07897750 +n07616386 +n09443281 +n03641569 +n13882563 +n07680761 +n10498816 +n04034262 +n03533014 +n07928790 +n07690152 +n10060352 +n04124370 +n12453186 +n04509171 +n03013580 +n10604979 +n12515711 +n04971211 +n07693223 +n03786715 +n07894703 +n02761834 +n04232800 +n03437741 +n04045644 +n14976759 +n03042697 +n12557681 +n06275095 +n11678010 +n01586941 +n07684517 +n07822845 +n03483823 +n09951616 +n03180865 +n07861557 +n03644378 +n12848499 +n11962667 +n03886762 +n04238128 +n11979964 +n13915113 +n12791329 +n12457091 +n03341153 +n10267865 +n03484576 +n10186216 +n07612137 +n03843438 +n11807525 +n11931540 +n02027897 +n07614730 +n04116294 +n03469903 +n10017272 +n03688605 +n07860103 +n03981566 +n01888045 +n03345837 +n11998888 +n02071636 +n02726017 +n04310157 +n04607869 +n01622959 +n08524735 +n03119203 +n12031927 +n03610524 +n02807616 +n04056180 +n03233905 +n03374473 +n14810561 +n11944954 +n03121431 +n09750891 +n08505018 +n10727171 +n12357485 +n12571781 +n12067193 +n07586604 +n02086753 +n03548086 +n02560110 +n07804900 +n02880393 +n04208427 +n12931542 +n01594968 +n05218119 +n03520493 +n03727605 +n12687698 +n03612965 +n04135315 +n07730320 +n10540114 +n07599911 +n01323493 +n02115096 +n04590263 +n12043836 +n02861387 +n09836786 +n04966941 +n02816768 +n13131618 +n10701962 +n02919792 +n03442597 +n04325041 +n03333129 +n04091693 +n04950952 +n10631309 +n04177931 +n13234678 +n01970667 +n07748416 +n07893642 +n07691650 +n03660909 +n04145863 +n11945514 +n10334009 +n12336973 +n03954393 +n04558478 +n09899929 +n03487533 +n07816575 +n07877187 +n07863547 +n01603812 +n02098906 +n04973585 +n03674440 +n04371050 +n12243109 +n07871234 +n02928049 +n07574504 +n07889274 +n12141167 +n04543996 +n03080633 +n03423479 +n07879659 +n04380916 +n10514429 +n07584423 +n04009801 +n12479537 +n07606538 +n07698543 +n12353754 +n10132035 +n03367545 +n04245508 +n09811852 +n02024763 +n04052442 +n10120330 +n12352639 +n12606438 +n07752966 +n09772930 +n02535759 +n11737534 +n10345015 +n12427566 +n09705784 +n04112654 +n02985963 +n03758089 +n12953484 +n07906572 +n02881757 +n12739332 +n03718458 +n03407865 +n07775050 +n03210552 +n09452395 +n09789566 +n10566072 +n10559996 +n07826930 +n12414932 +n01887474 +n03026907 +n07751148 +n10223177 +n03957420 +n03788601 +n12244819 +n12421137 +n04266162 +n10038409 +n02981024 +n03228967 +n11825351 +n12058822 +n11963932 +n03041449 +n03046029 +n07590502 +n02932523 +n02152881 +n04970398 +n07887967 +n12812478 +n12421917 +n02708711 +n11870747 +n04290507 +n07934282 +n01608265 +n12070583 +n03205574 +n02305085 +n07866015 +n02960903 +n10098624 +n00481803 +n07938007 +n02693246 +n03923379 +n04103665 +n11792742 +n12489815 +n04971313 +n01668892 +n01055165 +n03215508 +n12104501 +n07899292 +n12822955 +n07713074 +n03842012 +n02449350 +n07868955 +n02835829 +n12283542 +n04525584 +n07910656 +n11625003 +n03987266 +n02805983 +n15091846 +n09736945 +n04973816 +n02439398 +n01519873 +n07899003 +n03019938 +n07582152 +n01885498 +n12108871 +n02934451 +n04327682 +n07696625 +n09750770 +n12084890 +n03960374 +n07585107 +n01570839 +n11905392 +n06277135 +n07842044 +n03751269 +n04398951 +n12861892 +n12649539 +n07596967 +n07580592 +n12845413 +n07690739 +n07804657 +n04334105 +n03779128 +n03268918 +n03066359 +n02744323 +n12596148 +n04272389 +n07832416 +n10210911 +n01548865 +n03221351 +n15091669 +n07878926 +n07607967 +n12171966 +n02846141 +n07576781 +n02922292 +n10092643 +n01732614 +n02578771 +n02864593 +n03537241 +n09635534 +n03268645 +n07852833 +n13873917 +n12640839 +n03506727 +n10536416 +n09976429 +n10692482 +n07600285 +n04156946 +n07818689 +n02605703 +n02710429 +n02890351 +n03408054 +n03121298 +n02731629 +n12450840 +n04061681 +n10153414 +n07648913 +n07891309 +n01562265 +n14973585 +n01610226 +n06267991 +n03302938 +n07822323 +n07826091 +n02764398 +n10406266 +n09282208 +n01734104 +n04283096 +n03530910 +n11542137 +n02610664 +n03856012 +n01531811 +n07862611 +n11625632 +n12643313 +n02469248 +n03333711 +n02907082 +n02122430 +n01559804 +n09744161 +n10187990 +n12015525 +n07844867 +n07887304 +n02878425 +n02009380 +n11448153 +n10655594 +n12566954 +n11901977 +n03999160 +n02389779 +n07928488 +n12785889 +n04281375 +n03745146 +n03224603 +n04594828 +n12835331 +n09715427 +n11615026 +n09972010 +n04038231 +n02379329 +n03445326 +n10753442 +n04249882 +n11727738 +n07866723 +n04282992 +n11621281 +n01566645 +n03919430 +n11980682 +n03480719 +n11625804 +n10467395 +n09436444 +n07867751 +n03684611 +n03788498 +n12062626 +n07808904 +n07690585 +n03865557 +n10711766 +n10465831 +n04380255 +n12166128 +n04432203 +n07892418 +n10432441 +n12991184 +n04209613 +n04459773 +n09666883 +n07807472 +n09873899 +n12939874 +n04545748 +n09637339 +n07919441 +n03987376 +n03645577 +n03437430 +n10671613 +n02964843 +n09707289 +n11700058 +n03877351 +n03518445 +n07643200 +n02140049 +n12683791 +n12418221 +n04154152 +n03397947 +n03238131 +n11851839 +n04545858 +n07744682 +n02995871 +n07593199 +n03543394 +n10293332 +n12658481 +n11599324 +n02705201 +n03920867 +n08249459 +n02876084 +n03937835 +n01397871 +n03849679 +n12016567 +n04208936 +n07696728 +n13148208 +n01904029 +n08659861 +n07878785 +n07827130 +n03390983 +n02624807 +n03319745 +n03994614 +n00446493 +n12477583 +n02920658 +n04602956 +n02688273 +n07577538 +n04350581 +n09283405 +n04074185 +n04495843 +n03538179 +n03454885 +n03878211 +n10308168 +n08518171 +n02660208 +n07904760 +n07928367 +n10174445 +n02137015 +n02863426 +n07700003 +n04015908 +n03946076 +n11725821 +n01794344 +n04364160 +n01663782 +n04283255 +n02822064 +n04406239 +n02782681 +n11990313 +n03563460 +n02957008 +n07889814 +n07896060 +n03683079 +n04278447 +n13011595 +n11810358 +n03836451 +n12827537 +n03545470 +n03213538 +n07929351 +n03471190 +n02882301 +n03625943 +n03397087 +n11955896 +n04097373 +n03145522 +n03034405 +n02889646 +n02928299 +n09652149 +n01641391 +n04593524 +n07651025 +n03719343 +n03884778 +n03452594 +n02174659 +n12345280 +n03039827 +n03309687 +n11635433 +n02057330 +n01664990 +n09779790 +n02011016 +n09689958 +n07770763 +n03010915 +n03443912 +n02946509 +n13050397 +n03031012 +n04217546 +n04124202 +n12766869 +n04177041 +n12050533 +n03251932 +n03086580 +n03918737 +n04386792 +n03176594 +n01577035 +n01669654 +n01818832 +n10441962 +n03885904 +n03724756 +n02925666 +n03549589 +n03062122 +n02828427 +n12604228 +n03624400 +n07725888 +n03873699 +n01503976 +n02887079 +n03610098 +n02940385 +n04610013 +n03652100 +n04496872 +n04008385 +n02583890 +n10476467 +n03395514 +n03306385 +n04228581 +n02389261 +n12576323 +n01579149 +n01623425 +n02593019 +n03995265 +n02124484 +n12745386 +n04355267 +n02643836 +n01614343 +n03810952 +n04058594 +n12278650 +n03474779 +n02823510 +n00442437 +n12039317 +n04574067 +n03762602 +n02153109 +n03518943 +n04289827 +n02288268 +n07749969 +n04132985 +n03213826 +n04307986 +n03567066 +n02049088 +n04408871 +n03522003 +n09305898 +n04266375 +n08571898 +n03039259 +n01587526 +n03261603 +n00464277 +n02627532 +n02992368 +n03640850 +n03037404 +n04525191 +n02106854 +n07772147 +n04173511 +n12761284 +n03257210 +n02813544 +n07740342 +n04066270 +n03070059 +n03616428 +n02904233 +n03209910 +n04389854 +n03078995 +n03193260 +n01488038 +n01754533 +n12629305 +n02055107 +n11664418 +n04228693 +n03353951 +n03440682 +n03025250 +n03300216 +n02042046 +n04226826 +n03342015 +n03090000 +n02050313 +n03492250 +n01535690 +n01572654 +n03465718 +n02879309 +n06278338 +n04113406 +n03695857 +n09720256 +n01860002 +n02851939 +n09828216 +n02564270 +n03528901 +n02542432 +n11978961 +n01670802 +n03956623 +n01612275 +n09376786 +n03222318 +n02813645 +n02213543 +n13898207 +n03616763 +n03616979 +n11904109 +n04212282 +n04608435 +n02042472 +n04198453 +n03216402 +n02015357 +n12282737 +n02699629 +n12866635 +n02048353 +n02933340 +n01793715 +n12001707 +n02878222 +n03187037 +n03105306 +n04080705 +n04254009 +n01623880 +n02839592 +n03436182 +n01591123 +n01318279 +n03002816 +n13155095 +n03141702 +n03775388 +n12165170 +n03322836 +n03259401 +n04471148 +n03911767 +n12585629 +n04317325 +n04257986 +n03133050 +n02035210 +n12891305 +n11882426 +n04491388 +n12948251 +n03498781 +n04262161 +n03775636 +n09915651 +n07584332 +n07852614 +n11626152 +n03901750 +n09723067 +n04265904 +n09920283 +n02397744 +n03253796 +n07712959 +n03898129 +n01743936 +n02075612 +n04560292 +n03479397 +n04334365 +n04357121 +n10145902 +n03844673 +n09854421 +n12687957 +n12598027 +n03944138 +n01839750 +n07722888 +n04258859 +n03088389 +n03351434 +n03509608 +n01677747 +n03145147 +n12046815 +n03505133 +n01629962 +n03333252 +n03993703 +n02962061 +n04529962 +n03463666 +n07681691 +n12160857 +n04187233 +n09331251 +n11614713 +n04376400 +n12301445 +n12633994 +n03883524 +n11614420 +n13062421 +n03645011 +n03293863 +n11640132 +n02579928 +n02854739 +n04461437 +n07729384 +n02977936 +n02836392 +n03593122 +n01666228 +n07820683 +n07568502 +n11910460 +n09348460 +n09712324 +n02403740 +n03482877 +n04370774 +n07750146 +n12992177 +n03152303 +n04134008 +n09805324 +n01611800 +n04374315 +n07586099 +n02032222 +n01979874 +n04350769 +n02907873 +n03016609 +n02543565 +n03256166 +n03016737 +n02419336 +n03268790 +n03559999 +n07765999 +n04607035 +n02416104 +n02123917 +n12484784 +n03225108 +n10739391 +n03506880 +n02918831 +n03045228 +n12516828 +n01314663 +n04172342 +n02768226 +n12368028 +n01500476 +n01558149 +n03604156 +n04035912 +n02359915 +n12261571 +n03875955 +n01887623 +n03871371 +n03390786 +n12494794 +n03826039 +n04465358 +n03838298 +n03165466 +n04229737 +n01321770 +n04354026 +n02998003 +n04114844 +n10611613 +n03600475 +n01909906 +n00466880 +n04284869 +n07722485 +n04496614 +n03298716 +n02285801 +n04081699 +n07765208 +n12659539 +n11618525 +n11757653 +n07727048 +n03913343 +n12070016 +n02697675 +n04284572 +n02595702 +n04482297 +n03516996 +n03704549 +n02040266 +n04476116 +n01323261 +n03823216 +n07696403 +n03226880 +n09734535 +n03950537 +n01671479 +n03049924 +n12593994 +n04568841 +n03604400 +n01837072 +n01754370 +n03122202 +n12338454 +n04094720 +n04150980 +n03429682 +n03884926 +n03378005 +n02434954 +n03461288 +n02893692 +n04472563 +n10472129 +n04590021 +n07739344 +n04162433 +n03395859 +n12059314 +n03498662 +n03678729 +n02927764 +n02770211 +n11710393 +n07730207 +n04178190 +n07772935 +n03801880 +n04414675 +n12729521 +n12203529 +n04122578 +n04575824 +n06267655 +n03698360 +n02804515 +n02431337 +n08598568 +n02893608 +n02270623 +n00479440 +n11616662 +n02884994 +n04305323 +n02407625 +n04476831 +n04222307 +n03179910 +n11623967 +n00446311 +n00454983 +n02886434 +n12279458 +n03723781 +n11816121 +n02403231 +n11808299 +n07816296 +n03219483 +n02657694 +n00453478 +n02816656 +n02625851 +n04112752 +n03339529 +n12171316 +n02044517 +n04137773 +n01486838 +n03015149 +n12911673 +n03967270 +n03498441 +n11672269 +n03386870 +n11615967 +n02580679 +n01681653 +n02793199 +n02824319 +n10727458 +n02555863 +n01533000 +n02175916 +n12064389 +n04383015 +n02469472 +n03101664 +n03623338 +n12295796 +n02869249 +n01792042 +n03447075 +n04453390 +n04382438 +n04112252 +n03332393 +n12729729 +n01851207 +n04269270 +n12333771 +n06272612 +n03135532 +n02927887 +n11711537 +n12301180 +n04107743 +n01813948 +n03282295 +n09714694 +n00483409 +n01504344 +n04279353 +n04040373 +n12658308 +n04134523 +n10104064 +n12056601 +n04525417 +n07819166 +n12263038 +n02072798 +n03125057 +n03367410 +n04000592 +n03549897 +n01877606 +n01564914 +n12307076 +n02855925 +n03176763 +n12271933 +n04121728 +n07690511 +n02825442 +n04442441 +n01630901 +n03088580 +n02499808 +n10675010 +n01531971 +n02273392 +n01526521 +n01531344 +n03667664 +n02888270 +n04412416 +n07733394 +n04559910 +n04105704 +n11792341 +n04201064 +n01693175 +n04555291 +n02908773 +n01976868 +n03529175 +n03365231 +n03622839 +n04258333 +n03327133 +n03425769 +n12477747 +n03718935 +n11727540 +n07933799 +n03030262 +n12043673 +n02619550 +n07937461 +n12198286 +n08560295 +n12402348 +n01733957 +n12344700 +n02763604 +n11925303 +n01557962 +n03927299 +n11611758 +n03035252 +n09454412 +n04004990 +n03456299 +n02175569 +n03668279 +n12352990 +n03507241 +n01534155 +n12278371 +n02499022 +n03822767 +n01318381 +n04024983 +n04277493 +n11934616 +n02027075 +n11611561 +n03454442 +n02236355 +n01732789 +n07722052 +n01489501 +n04409625 +n10563403 +n01817263 +n07757511 +n03770316 +n02977438 +n01840775 +n03607923 +n03322704 +n02375302 +n01614038 +n01646555 +n03952576 +n02946824 +n12847008 +n03016389 +n11809594 +n03165096 +n03839671 +n02687821 +n01689081 +n03822656 +n02597608 +n12336727 +n01579578 +n03631922 +n03904909 +n11658331 +n04224543 +n12621410 +n03870672 +n04252331 +n09720842 +n01396048 +n11988596 +n00483205 +n02871005 +n01597022 +n02382039 +n07743902 +n02358890 +n07877961 +n05263448 +n01862399 +n04136800 +n10624540 +n11990167 +n02731398 +n03366974 +n03490006 +n01561732 +n02626265 +n10627252 +n12402051 +n08517676 +n10488656 +n03099274 +n03718581 +n11806219 +n01830042 +n07728585 +n03732114 +n10755080 +n03359285 +n07720277 +n03354207 +n01596273 +n04416005 +n01847253 +n07733567 +n09725653 +n04274985 +n00449977 +n07772274 +n12063639 +n01530439 +n01322508 +n04397768 +n07273802 +n04261281 +n10524076 +n01678343 +n03410938 +n01797020 +n02388832 +n07719616 +n03639497 +n09787765 +n07721018 +n11818069 +n04185529 +n11644462 +n12074408 +n00483848 +n01583495 +n11891175 +n03347617 +n03308481 +n02535258 +n07750872 +n07748157 +n02855701 +n04584373 +n02461830 +n02912557 +n12277578 +n03604311 +n03643253 +n03031152 +n04039742 +n03435743 +n13908201 +n04150153 +n03250405 +n01410457 +n02357401 +n12588780 +n12729315 +n01690149 +n02538216 +n03171228 +n02424909 +n06274760 +n03775747 +n04211857 +n12429352 +n12272239 +n11759853 +n03401129 +n12649317 +n02625258 +n12651611 +n03603442 +n02803934 +n03861271 +n02605936 +n02018368 +n12711984 +n02811936 +n04612026 +n01339471 +n02923682 +n09194227 +n04346157 +n03939178 +n12635532 +n01593028 +n01793249 +n02380464 +n12400720 +n07708398 +n12020941 +n12492106 +n12850336 +n12749679 +n02892948 +n12591017 +n03193423 +n01791463 +n11979527 +n12134025 +n12167075 +n09308743 +n13108545 +n01618503 +n07827284 +n07724492 +n02338145 +n04533946 +n01586020 +n07598256 +n01603953 +n12646740 +n03067518 +n04046277 +n01532511 +n07769584 +n11644046 +n12753573 +n02681392 +n08492461 +n07749446 +n04409384 +n01791954 +n12330891 +n04560882 +n10145480 +n04250473 +n02655848 +n02903126 +n11736851 +n11901294 +n12865824 +n03870105 +n00449892 +n04240752 +n11851258 +n04200537 +n12049562 +n01521399 +n03565830 +n07860447 +n03067212 +n01664674 +n07561590 +n02727141 +n02324514 +n02372952 +n01584853 +n07766173 +n11811706 +n03097362 +n04200258 +n02732572 +n01853195 +n12282527 +n09838621 +n02764505 +n04256891 +n12337617 +n12635955 +n07831267 +n11628793 +n12316572 +n07807834 +n02037869 +n01821869 +n02820556 +n04517211 +n01839086 +n03842986 +n07698401 +n02386224 +n07841800 +n01830915 +n11616486 +n11902389 +n03427202 +n12727101 +n01851573 +n02125494 +n07746186 +n11628087 +n07746551 +n03943115 +n11892029 +n02861022 +n11733312 +n01852329 +n09392402 +n12336224 +n07887099 +n03403643 +n04414199 +n07895100 +n02264232 +n02317781 +n07823460 +n07755929 +n02524202 +n04324297 +n11627512 +n01585715 +n02922578 +n00479887 +n02687423 +n02416880 +n11784126 +n12073991 +n01853870 +n01561452 +n04187970 +n10300154 +n02520147 +n12294124 +n07743224 +n12066018 +n11634736 +n02041678 +n11626585 +n02386141 +n03986949 +n07860331 +n12356023 +n12072722 +n03082280 +n12083113 +n12979829 +n01448594 +n03007444 +n07858978 +n01641739 +n02043333 +n12020736 +n02751215 +n04528079 +n01538200 +n07925608 +n12091550 +n03742019 +n03518305 +n01642539 +n03414029 +n04363991 +n03767966 +n02596067 +n01586374 +n02885882 +n04080138 +n11617631 +n02033779 +n09451237 +n02310585 +n12648045 +n03955489 +n01752736 +n07899899 +n02299505 +n01579410 +n02156871 +n02998841 +n03759661 +n02050809 +n02683454 +n11621950 +n02910145 +n04967801 +n07896661 +n11906917 +n12275675 +n11611233 +n07736692 +n02312640 +n12588320 +n04399537 +n12757303 +n04197781 +n12717224 +n11635152 +n03122295 +n01792955 +n13133932 +n02518324 +n01584695 +n02915904 +n02967294 +n04345201 +n03019434 +n02470238 +n03049782 +n03101517 +n12709688 +n03716887 +n02422391 +n12638753 +n00288384 +n02162561 +n02053584 +n01317294 +n03334291 +n07814634 +n12273768 +n12406715 +n11644226 +n01646802 +n03460147 +n12338796 +n01972541 +n02147947 +n03890093 +n04127395 +n01581984 +n01681328 +n02213239 +n04582869 +n03254189 +n03274265 +n03186285 +n11839823 +n01624833 +n09792969 +n07891189 +n12023726 +n07619208 +n03466600 +n01849676 +n12190869 +n03079136 +n12317296 +n13001930 +n00477639 +n02944459 +n03903733 +n04131208 +n12710295 +n12180885 +n11612349 +n03443149 +n03982331 +n04264765 +n12642090 +n03237416 +n13868944 +n04046400 +n11705171 +n11979715 +n12597134 +n01609956 +n01568294 +n01469103 +n00443692 +n01606672 +n04556408 +n07690019 +n03977592 +n03358726 +n12696492 +n01573240 +n11632619 +n01772664 +n03453231 +n04179712 +n03646020 +n01812662 +n04306592 +n07724654 +n13908580 +n02903852 +n04284438 +n13132656 +n04317063 +n07829248 +n01589718 +n02654745 +n12294331 +n12515925 +n07900825 +n07721195 +n04189282 +n11907689 +n01624537 +n12333530 +n07762244 +n11757851 +n01599159 +n04038338 +n01568892 +n12691661 +n09744834 +n04307767 +n03120778 +n07920540 +n03781683 +n04185804 +n12080820 +n04354182 +n07574426 +n02579303 +n03046802 +n12078172 +n03210245 +n01614556 +n02304432 +n07713267 +n09724656 +n02861147 +n12755387 +n01483830 +n12921868 +n12026018 +n07817871 +n12062781 +n04241573 +n11621727 +n03376159 +n11815721 +n13007034 +n03540090 +n00450866 +n11619455 +n01528845 +n01568720 +n12743352 +n02871314 +n03606251 +n01490670 +n04246060 +n02053425 +n10780284 +n01915700 +n04510706 +n00456465 +n01563945 +n11809094 +n09855433 +n04112579 +n03855333 +n09809925 +n03413684 +n02123478 +n12070712 +n03651843 +n02032355 +n01591005 +n01646648 +n02752615 +n02415829 +n03283221 +n04368496 +n01573360 +n02321170 +n10348526 +n04446844 +n07763792 +n12077944 +n04431025 +n02895438 +n10082687 +n07714188 +n02262449 +n03090172 +n12491017 +n01558461 +n12754781 +n04070415 +n04297098 +n03424862 +n01970164 +n09833536 +n01793435 +n01670535 +n09894445 +n09676247 +n01548492 +n12501202 +n03250089 +n03358380 +n02578928 +n12020184 +n02301935 +n03393017 +n12340755 +n01849863 +n01748906 +n03075946 +n01810268 +n01984245 +n04555400 +n12286988 +n04097760 +n02050586 +n12104238 +n01679962 +n02709101 +n01569060 +n12790430 +n01757901 +n13199717 +n11815918 +n07827410 +n02970534 +n12942572 +n07924276 +n04103918 +n11704093 +n07908647 +n07601686 +n12172906 +n04084889 +n02381261 +n02299157 +n11978713 +n12460957 +n02963503 +n03272810 +n12469517 +n03443005 +n01797307 +n02952237 +n11908549 +n13912540 +n03428226 +n10276477 +n01757343 +n01443243 +n01607600 +n03580518 +n12709103 +n07579688 +n04329834 +n12710415 +n11808932 +n10583790 +n02213788 +n11622184 +n12596709 +n02216211 +n07721942 +n07765361 +n01848453 +n11724109 +n02028451 +n02935017 +n12046028 +n10629939 +n00441073 +n07900958 +n12451399 +n02823964 +n04210120 +n01848840 +n10485883 +n07767709 +n02432704 +n11622591 +n03210372 +n07848196 +n11992806 +n02953197 +n07620689 +n01521756 +n03571625 +n03158186 +n12647560 +n02065407 +n01572782 +n09890749 +n05581932 +n07754451 +n03350204 +n13044375 +n12294723 +n12482893 +n04434531 +n12989938 +n12196336 +n01701859 +n07746334 +n11941924 +n02047411 +n12650379 +n10486166 +n01599556 +n01567879 +n12675876 +n01682435 +n02043808 +n12362668 +n12306089 +n02999138 +n01679626 +n03557270 +n01546039 +n11901759 +n01549053 +n11883328 +n06596727 +n03193107 +n11612018 +n03300443 +n03612010 +n03668488 +n12648888 +n01448291 +n11632167 +n10262445 +n09742101 +n09717233 +n04299370 +n03094159 +n04536595 +n03514693 +n02029706 +n02886321 +n07816052 +n04045255 +n01851731 +n02627292 +n01841288 +n02739889 +n02932693 +n03784896 +n04569063 +n07902799 +n03863108 +n02607470 +n13200651 +n07916183 +n01573898 +n04347119 +n10076604 +n13033577 +n01824035 +n03630262 +n04426316 +n03064250 +n12262018 +n12048399 +n12279772 +n04143140 +n07829331 +n12891643 +n01826680 +n12646605 +n13103877 +n02023855 +n03086868 +n04163530 +n03736470 +n04358117 +n13872822 +n03159640 +n01680655 +n11611087 +n03980478 +n02978478 +n01555004 +n12402840 +n07763987 +n04387706 +n04979002 +n03258330 +n09856671 +n11624192 +n01538059 +n02003839 +n12552309 +n10469874 +n01576076 +n03643149 +n04419868 +n04586581 +n00483508 +n03131967 +n01847407 +n07929172 +n09683757 +n03786621 +n04369282 +n12733870 +n11612575 +n11619227 +n03301833 +n02176439 +n01569971 +n07935043 +n02563792 +n02051059 +n04482177 +n11859472 +n11710136 +n04115144 +n07864934 +n07691758 +n02620167 +n07748276 +n03415486 +n07835921 +n00452152 +n01848323 +n12906214 +n12075010 +n01563449 +n01499396 +n01570267 +n12047345 +n07920989 +n07601572 +n02683558 +n04428634 +n04345028 +n12161969 +n03460040 +n02561514 +n02006364 +n03582959 +n11812910 +n13185269 +n04297847 +n07896165 +n01552813 +n12361946 +n02031585 +n12766595 +n11622368 +n11695599 +n11615387 +n02509197 +n12409470 +n01314388 +n11758799 +n09846469 +n02675219 +n04253057 +n04041243 +n12276628 +n04381724 +n01855188 +n02203152 +n04403925 +n11895092 +n11924849 +n04172904 +n11888800 +n01546506 +n07906718 +n01489920 +n03436417 +n03615655 +n07765073 +n02434190 +n02004492 +n12282235 +n12406488 +n11981192 +n10373390 +n13183056 +n04332074 +n12818346 +n07731006 +n02598573 +n02438580 +n01957335 +n03356982 +n10288964 +n02629230 +n02042759 +n12319414 +n01451426 +n03521675 +n02016066 +n01813532 +n13207335 +n11805544 +n04401828 +n02952109 +n03963294 +n10013811 +n12058630 +n01551711 +n01574560 +n01858780 +n10093818 +n03858183 +n01550172 +n03571280 +n02309242 +n10258786 +n01569423 +n10134178 +n08578517 +n04445327 +n03250279 +n02584449 +n03223553 +n04523831 +n04485423 +n02050442 +n04474035 +n04528968 +n02649546 +n01913166 +n09971273 +n04517408 +n02437482 +n03824713 +n03778817 +n07643026 +n01613177 +n12022054 +n07714448 +n07592768 +n00454493 +n03296328 +n02305929 +n03084834 +n03698815 +n12093600 +n08649711 +n03466493 +n04067658 +n03041114 +n03514451 +n01491006 +n04178329 +n03790953 +n03938401 +n02048115 +n07768858 +n03273740 +n10333601 +n05418717 +n12754003 +n02098806 +n03314608 +n01565930 +n12113195 +n12284821 +n12483427 +n04332580 +n10382710 +n03416094 +n02837887 +n03917198 +n14131950 +n04414476 +n11861641 +n11903671 +n01841441 +n09872066 +n01806467 +n04964799 +n00467320 +n01595974 +n03220692 +n01339083 +n01825278 +n11727358 +n04518343 +n11984144 +n07724269 +n02292692 +n02324850 +n01753032 +n01624115 +n11816649 +n07930062 +n02460451 +n12319204 +n04340521 +n12325234 +n01541102 +n02979836 +n00141669 +n01822300 +n11658544 +n12272883 +n03334382 +n11726707 +n03639077 +n07904934 +n03516367 +n03698723 +n03553248 +n11812094 +n03724417 +n01540566 +n02341974 +n11819912 +n07734555 +n02987379 +n03580845 +n12546962 +n02548247 +n12753245 +n07768423 +n12849279 +n11617090 +n02912894 +n07840027 +n12295033 +n12703383 +n02696165 +n10419785 +n04426427 +n03694639 +n11712282 +n04142999 +n01597737 +n03801533 +n01495493 +n07774719 +n03267113 +n01742821 +n03859170 +n03416640 +n03320959 +n12733218 +n02017725 +n13229543 +n09344324 +n04965451 +n01490112 +n10069296 +n12084555 +n04554406 +n04086446 +n02976249 +n02656032 +n02424486 +n02381609 +n09934337 +n04573937 +n07685399 +n02800497 +n02905152 +n02951703 +n07760153 +n03609397 +n00447463 +n03680512 +n02046939 +n03288886 +n11870418 +n03386544 +n07767171 +n07847453 +n12687044 +n01664492 +n03099147 +n03463381 +n02125081 +n12920204 +n03517647 +n02603540 +n12267411 +n11933546 +n11947802 +n04387095 +n12975804 +n02973904 +n13195341 +n04048441 +n11753143 +n03212114 +n03298858 +n04366116 +n01424420 +n10450161 +n01442972 +n07877299 +n04503593 +n04349306 +n12969425 +n12597466 +n03092656 +n07914995 +n03487886 +n12223569 +n01756733 +n13919919 +n04175147 +n02029087 +n03530511 +n02425887 +n03572107 +n03927539 +n03383099 +n04130907 +n01632601 +n07823105 +n10378026 +n02382850 +n07613266 +n03235180 +n02810782 +n12708654 +n11636835 +n02823124 +n03402941 +n12121610 +n03715114 +n04052658 +n00480366 +n12493208 +n04255163 +n12145477 +n01489709 +n12402596 +n01598074 +n03837606 +n02628062 +n04103364 +n03247083 +n02032480 +n07736256 +n12578916 +n09218315 +n02218371 +n03730334 +n02080146 +n03836906 +n02868638 +n02198859 +n12744387 +n02942460 +n11754893 +n12274358 +n02725872 +n09218494 +n03942920 +n07574780 +n02921756 +n01757115 +n02763306 +n11758122 +n10508141 +n02303284 +n04083800 +n13879049 +n12765115 +n12075830 +n02666943 +n11980318 +n07907037 +n12794135 +n02333909 +n03870980 +n07586718 +n11923174 +n10782471 +n01493146 +n12294871 +n11726269 +n12932173 +n07825972 +n12732009 +n03572321 +n07682197 +n03423306 +n12495895 +n03545756 +n03557692 +n03785237 +n07902937 +n09899671 +n12061614 +n07902443 +n01449374 +n12632335 +n03474896 +n03539433 +n04310904 +n03902482 +n12006930 +n03285578 +n04200000 +n03912218 +n07821260 +n03548626 +n03223686 +n11826198 +n03165616 +n02104280 +n09981278 +n09382099 +n03732458 +n03987990 +n09946814 +n12270741 +n07737745 +n04172776 +n10189278 +n03543012 +n12629666 +n02180875 +n04087432 +n12961879 +n03321954 +n12528549 +n02424085 +n09843443 +n03846677 +n12304703 +n09873473 +n03410571 +n03041810 +n02425228 +n01562451 +n03615790 +n10081204 +n03985881 +n07842130 +n02890513 +n03649797 +n02381004 +n12560621 +n12523475 +n07687626 +n11905749 +n11759404 +n12905412 +n03542605 +n03983612 +n12573474 +n11972291 +n03767459 +n02698634 +n12713866 +n13084834 +n02202006 +n13108323 +n02631475 +n10737103 +n03637898 +n03069752 +n12400489 +n09692915 +n10242328 +n02794664 +n12465557 +n12085267 +n03348868 +n12754981 +n02745611 +n10504206 +n12073554 +n02835724 +n04605572 +n02825961 +n03528523 +n12116429 +n02973805 +n12708941 +n01544704 +n04180229 +n09403211 +n08242223 +n02146371 +n12127768 +n09770359 +n03295246 +n01757677 +n04385799 +n02584145 +n07909593 +n12587132 +n13029326 +n04184316 +n07903643 +n01848555 +n10750031 +n02332156 +n12703557 +n03196990 +n12406902 +n02768973 +n12416073 +n02147591 +n09724533 +n09693982 +n12687462 +n01982068 +n03435991 +n03272125 +n07713763 +n03018712 +n03648431 +n03336575 +n07854184 +n12806015 +n07879174 +n03984643 +n03147280 +n02699915 +n07617708 +n01533651 +n12483841 +n01697611 +n02576906 +n03724066 +n03935116 +n09782397 +n01599269 +n10672371 +n12066630 +n03178674 +n15086247 +n03523987 +n02826068 +n12580654 +n02358390 +n01647640 +n10259997 +n03738066 +n13915023 +n02639605 +n03174450 +n12269406 +n09874428 +n03432061 +n04386051 +n03923918 +n04592465 +n12480456 +n10333439 +n04206790 +n01443831 +n02967626 +n07733712 +n03746155 +n12947313 +n11690254 +n12244650 +n12670758 +n08658309 +n12710693 +n11860555 +n03485198 +n03047799 +n04461570 +n07600177 +n02126640 +n12704343 +n02866386 +n03008976 +n04532831 +n03465426 +n12691428 +n01641206 +n04962062 +n03254046 +n04425804 +n02014524 +n03439348 +n02538010 +n11603246 +n12265600 +n12277800 +n04016240 +n12086192 +n09650729 +n01549641 +n03112719 +n04961062 +n02710324 +n12049282 +n12362274 +n11969607 +n12856680 +n02201000 +n07863802 +n03360622 +n07601809 +n04354487 +n12898774 +n12939282 +n03109693 +n12867826 +n12441390 +n12915811 +n12879527 +n04137355 +n04131368 +n03527149 +n10164492 +n09932508 +n12426623 +n12575812 +n02557318 +n10263790 +n04309548 +n00476235 +n04194127 +n11876634 +n10327987 +n03499354 +n02616851 +n04464615 +n03615406 +n02744844 +n11732567 +n10347446 +n09752519 +n04228215 +n10004718 +n07899533 +n12030908 +n15102894 +n12044467 +n11711764 +n02610066 +n03415749 +n04562496 +n02034295 +n02297442 +n03566193 +n12506991 +n07774842 +n12827270 +n14908027 +n12242409 +n04072960 +n02829596 +n12496427 +n02266050 +n13108481 +n12473840 +n08677424 +n12076223 +n15091473 +n02815749 +n04549028 +n12558425 +n12023407 +n04179824 +n02378541 +n03188725 +n12517445 +n07573347 +n02004131 +n11921395 +n12570972 +n10602470 +n12095647 +n03854421 +n02450295 +n02792409 +n03543735 +n12836337 +n12204175 +n12152722 +n07900734 +n12517642 +n02775039 +n12607456 +n03376938 +n12179122 +n09873348 +n01847978 +n07888816 +n10453184 +n09675922 +n01851895 +n12865562 +n01797601 +n03711044 +n02738859 +n12064591 +n04033425 +n08551296 +n01650690 +n01537895 +n04207151 +n10087434 +n12261808 +n09438844 +n10364198 +n01814755 +n01583209 +n12270946 +n11892817 +n03344642 +n04117464 +n07847917 +n04003241 +n10362319 +n10477713 +n03495570 +n07560542 +n04363777 +n04534359 +n02404906 +n03349892 +n07712267 +n02960352 +n07866277 +n07857170 +n00324978 +n02755823 +n03150511 +n04211528 +n01899894 +n07588299 +n11874081 +n03425325 +n04506506 +n11949402 +n02952374 +n03309110 +n12159388 +n07591049 +n03068998 +n03228254 +n10279018 +n04173046 +n07728053 +n13052931 +n01597906 +n12368451 +n02767665 +n09435739 +n03915900 +n09728285 +n03292603 +n03331077 +n07817160 +n07917392 +n12540250 +n04153025 +n10209082 +n03968581 +n12676534 +n11824146 +n03521899 +n01853666 +n04292921 +n12332030 +n03984759 +n02863014 +n07801091 +n07723177 +n03289660 +n01533481 +n04488202 +n03468821 +n02382338 +n03543254 +n01961985 +n07915918 +n03703862 +n02771004 +n02047045 +n03877674 +n13141415 +n03529629 +n02240517 +n03675235 +n04491638 +n12384037 +n04419642 +n03019685 +n07591586 +n04496726 +n12985420 +n12927013 +n12196694 +n03473227 +n11621547 +n02988066 +n10451450 +n07729828 +n09618760 +n12196527 +n01555305 +n12830222 +n11950877 +n13190747 +n12160303 +n12390099 +n02818135 +n03163381 +n04554211 +n03244919 +n07897975 +n03386726 +n04290615 +n02011281 +n12407890 +n04123448 +n07904865 +n03447358 +n02393940 +n07931870 +n02937958 +n04318787 +n04587327 +n12807409 +n04112430 +n07560193 +n12774299 +n02618827 +n07854982 +n03757604 +n03817191 +n12793494 +n02324431 +n03013850 +n04113641 +n01612476 +n03127408 +n02038466 +n03799876 +n04257684 +n03382292 +n10449664 +n04394630 +n10275395 +n07698250 +n12329473 +n07694659 +n07642742 +n02563648 +n08583455 +n02557182 +n02775178 +n09274152 +n03189083 +n12570703 +n04211219 +n12486574 +n03073694 +n11969166 +n02475078 +n02976350 +n08584914 +n07899660 +n10116702 +n01613807 +n12461109 +n04025508 +n12451240 +n12596849 +n12079963 +n03541269 +n04561422 +n11699442 +n07725255 +n03460297 +n07616748 +n12757458 +n03103563 +n02813752 +n07698782 +n12840362 +n01543632 +n01602832 +n01875313 +n12472024 +n02926591 +n02872333 +n10728624 +n12532564 +n03882960 +n12333053 +n03684224 +n13146583 +n03436075 +n04154340 +n03868643 +n02598878 +n04139140 +n03266371 +n04083309 +n12506341 +n12200143 +n03503477 +n12807773 +n03123917 +n13029760 +n10173771 +n03659809 +n12047884 +n12759273 +n04193377 +n04258438 +n04597400 +n04579986 +n03719743 +n04299963 +n02864504 +n10510245 +n03417970 +n09719794 +n03138344 +n02085272 +n07694516 +n12665857 +n01642257 +n03229244 +n10581890 +n10318293 +n03635108 +n10652605 +n12189429 +n09934774 +n11709205 +n04207903 +n10296176 +n10603851 +n03450734 +n13223588 +n12754648 +n09886403 +n07751280 +n11950686 +n07814390 +n12799776 +n01646902 +n09796809 +n12819728 +n01938454 +n02410011 +n07607138 +n02119634 +n10332861 +n09230202 +n02757061 +n02849885 +n15092227 +n12151615 +n03111041 +n02413050 +n03506560 +n07744057 +n04030518 +n12544539 +n04089836 +n02038993 +n13882201 +n12099342 +n01946630 +n10095769 +n02982416 +n12957924 +n13215586 +n07726525 +n12452836 +n03801671 +n04598318 +n01449712 +n12428747 +n04119751 +n10509063 +n07694839 +n02782602 +n11626409 +n02573704 +n12399384 +n12388989 +n01601068 +n11971406 +n04367011 +n07930315 +n12925179 +n04967674 +n03497352 +n03653833 +n01819465 +n03688192 +n02802990 +n03393761 +n04430475 +n13107694 +n10384496 +n07867164 +n12449526 +n01515303 +n12574320 +n01444339 +n07919310 +n03453443 +n04173907 +n02887489 +n07772788 +n03629520 +n02580830 +n11705387 +n12069679 +n01956344 +n02406533 +n03973402 +n03938037 +n04969952 +n04103094 +n04393808 +n07715407 +n04172107 +n01917882 +n12085664 +n07608429 +n09835230 +n04135024 +n07842605 +n12568186 +n04339879 +n07691091 +n01801876 +n00474568 +n01807105 +n12128071 +n01673282 +n11948864 +n03991837 +n09659188 +n02070174 +n02670683 +n12454949 +n10385566 +n11631854 +n12305293 +n12002428 +n12948495 +n12757816 +n11852028 +n10690648 +n09283866 +n03214582 +n03423877 +n04127521 +n03006626 +n09283193 +n07712559 +n01447331 +n02981321 +n02658531 +n11947629 +n02419634 +n02420828 +n11923637 +n12570394 +n11968931 +n12731029 +n09749386 +n07736813 +n03967396 +n11908846 +n03029445 +n02426481 +n01964271 +n13198914 +n04484432 +n12656685 +n10806113 +n11849983 +n03236423 +n10649197 +n07688624 +n03057541 +n12015221 +n02094931 +n02014237 +n07560331 +n02801450 +n04206570 +n07556406 +n11627908 +n11889619 +n07852229 +n04063154 +n02713364 +n02783459 +n12877838 +n02930214 +n02125010 +n02407276 +n07815424 +n12855494 +n12530818 +n07750449 +n01963317 +n10082997 +n03245724 +n03012013 +n03555006 +n02421136 +n03332989 +n04375405 +n03746486 +n12636224 +n03278914 +n07917133 +n12504783 +n09416890 +n03896526 +n02258198 +n12983048 +n03837698 +n12869061 +n04541987 +n01637615 +n04401949 +n02241426 +n13220122 +n07876651 +n03729308 +n02364840 +n01339801 +n03418915 +n09257843 +n11614039 +n09731343 +n03809603 +n05399243 +n01569262 +n11901597 +n03124474 +n01566207 +n03796522 +n12595699 +n04573281 +n09689435 +n11859737 +n03201529 +n12902662 +n03374372 +n03760944 +n09189157 +n01517966 +n10431625 +n02898269 +n03693707 +n04369025 +n07834618 +n04095342 +n02786331 +n03822504 +n02284611 +n09862621 +n03436891 +n07688898 +n12435777 +n03949317 +n12443323 +n12273114 +n12623077 +n04333869 +n07907831 +n07774596 +n05450617 +n03320262 +n04190376 +n12671651 +n11819509 +n07588111 +n09756049 +n07611046 +n04973291 +n11602873 +n00120010 +n03500699 +n03844815 +n03708843 +n04452528 +n04387261 +n09889065 +n10147121 +n03318294 +n12599435 +n04164406 +n01965529 +n11636204 +n11791569 +n12275131 +n02977330 +n07851443 +n04132603 +n07824191 +n09760609 +n12190410 +n07915491 +n12665271 +n10120671 +n02570164 +n10208950 +n02163297 +n02244797 +n09842528 +n08645104 +n01841679 +n11603835 +n04488857 +n07814487 +n01953762 +n04612373 +n11877193 +n03198500 +n03981924 +n01943087 +n11552806 +n04414909 +n03005033 +n02457945 +n10500217 +n10375314 +n04607242 +n07914777 +n09832456 +n12915568 +n12813189 +n10578021 +n03519081 +n07801779 +n12026476 +n03296081 +n03850492 +n07902121 +n09881265 +n12562785 +n03290195 +n10131151 +n10078719 +n01558765 +n03917814 +n02045596 +n07734183 +n03414676 +n07933154 +n02126787 +n12148757 +n12263987 +n07684164 +n03406966 +n01492569 +n02988963 +n12963628 +n09964202 +n03417749 +n01854838 +n02921029 +n02183096 +n11762433 +n11722466 +n02387093 +n02768655 +n12519089 +n09871229 +n07938313 +n10502329 +n11989393 +n03768916 +n13145040 +n11813077 +n04457910 +n03655720 +n03703945 +n11876803 +n01438581 +n07910379 +n07847827 +n02300797 +n09245515 +n10754189 +n04581102 +n12513172 +n02458135 +n03762332 +n11789589 +n09695620 +n03850053 +n07911249 +n12342852 +n12753007 +n07748574 +n07727458 +n03696568 +n04304680 +n07723039 +n07775197 +n07577144 +n03043693 +n04374735 +n01858281 +n09228055 +n09466678 +n01949085 +n02024479 +n11623815 +n02704645 +n07894451 +n01751472 +n01646388 +n01317916 +n13880994 +n10300500 +n11794024 +n03735963 +n04610274 +n11854479 +n07754894 +n02639087 +n02122510 +n02262803 +n12732966 +n04529108 +n13194036 +n09990777 +n10009276 +n12088223 +n12155009 +n07886176 +n04278247 +n04222723 +n11707229 +n01999186 +n07851641 +n12741792 +n01315213 +n10033412 +n04249582 +n03586631 +n03237839 +n12037499 +n12014085 +n07756325 +n01636352 +n03905947 +n08611339 +n07693590 +n03724538 +n09791816 +n01666585 +n10588965 +n11613219 +n10542608 +n12913791 +n10528023 +n03171635 +n11923397 +n12854600 +n10410246 +n12698598 +n04135118 +n09844457 +n04441790 +n03882611 +n02337001 +n07907342 +n12561169 +n12027658 +n10719132 +n09851165 +n02801823 +n12330587 +n01683558 +n12162181 +n04387932 +n11704620 +n09679170 +n07601290 +n04028221 +n10277027 +n09877750 +n11758483 +n10027246 +n03819336 +n10205231 +n12478768 +n03451711 +n12973443 +n01923025 +n03262717 +n07807594 +n00475535 +n07744430 +n02341475 +n04614655 +n07924747 +n03388323 +n12680402 +n03202940 +n04534520 +n09661873 +n15092059 +n11832480 +n04198355 +n12529220 +n12389130 +n12304115 +n03234952 +n07610620 +n02868975 +n04442741 +n05282652 +n02820675 +n12795352 +n12675299 +n08547468 +n04189651 +n04141198 +n04513998 +n12273939 +n12482668 +n12858618 +n01958346 +n03172038 +n10280674 +n04301760 +n02631330 +n12433178 +n07763107 +n03068181 +n07565259 +n03605598 +n13177884 +n04005197 +n09751496 +n12737383 +n07648997 +n09839702 +n09442595 +n07925229 +n12150722 +n11898775 +n09904208 +n02207345 +n07642361 +n07685918 +n03205458 +n10574538 +n09742315 +n02599557 +n03585682 +n04273659 +n02200850 +n03410740 +n03391301 +n07726672 +n09782167 +n13155305 +n02067240 +n07561848 +n07728708 +n12463134 +n12228229 +n09743487 +n12225563 +n03421669 +n03226375 +n03973945 +n12498055 +n04483925 +n04564278 +n11890150 +n12519563 +n12754468 +n04353573 +n11615607 +n04430896 +n04585128 +n10395828 +n10773665 +n02772435 +n09881895 +n12663023 +n01615303 +n12803754 +n09445008 +n03955296 +n05245192 +n05486510 +n07899769 +n07575510 +n02307681 +n03814817 +n02670186 +n03598515 +n12797860 +n03518135 +n07587962 +n12630763 +n06273743 +n09843824 +n03226254 +n12407222 +n02961544 +n12951835 +n06417096 +n02016659 +n01441117 +n07735404 +n09411189 +n13896217 +n03262248 +n03451120 +n02525382 +n03375329 +n04155068 +n12916179 +n10297234 +n11907100 +n03423568 +n04360914 +n12027222 +n12199790 +n01744270 +n09896401 +n07925116 +n03693860 +n04414319 +n07767549 +n03555564 +n04043411 +n07872593 +n03774461 +n03129471 +n04497801 +n11756870 +n09776346 +n04530283 +n01520576 +n12828220 +n01583828 +n04120842 +n09676021 +n04344734 +n01916388 +n12513613 +n09861863 +n02310334 +n03318983 +n04533499 +n02427576 +n12727518 +n04502059 +n11725480 +n11987126 +n11876204 +n03504205 +n09720595 +n12315999 +n12935609 +n04452757 +n12201331 +n01603152 +n10772092 +n03156279 +n12723610 +n02003037 +n03244775 +n07802963 +n11954642 +n07770034 +n09931165 +n10559508 +n01745902 +n07654148 +n10070108 +n01585287 +n13196003 +n04389718 +n10253122 +n03730893 +n02983357 +n02783900 +n01680813 +n03072440 +n03109253 +n03274435 +n11655974 +n10048612 +n07849733 +n07896994 +n03792334 +n03035832 +n03819448 +n03105088 +n11943992 +n01485479 +n01699675 +n11795049 +n12086778 +n01840120 +n07753980 +n10685398 +n04346428 +n04532398 +n07709172 +n02146700 +n09461069 +n03853924 +n01321456 +n12068432 +n09757449 +n03206282 +n03751757 +n13053608 +n11695974 +n12123741 +n03500209 +n04367371 +n02890940 +n01917611 +n07835331 +n02907656 +n04136045 +n12059625 +n03862862 +n12864160 +n00440039 +n03448590 +n12628986 +n04115802 +n03949145 +n12916511 +n12647893 +n09706255 +n13181811 +n07752109 +n04375615 +n01648620 +n04403524 +n09967967 +n12911079 +n03857687 +n02803539 +n01551080 +n10734891 +n13235159 +n04127633 +n07935878 +n12853482 +n10191001 +n03126385 +n10076224 +n01812866 +n12919403 +n03769610 +n09283767 +n03462110 +n11770256 +n12038898 +n09889170 +n11894558 +n10298647 +n02592055 +n02795670 +n11701066 +n12762049 +n02890662 +n07918193 +n02976455 +n03100897 +n13127843 +n12184912 +n00468299 +n12407079 +n12496949 +n03541537 +n05260127 +n01535140 +n01541760 +n11945783 +n07687053 +n07745046 +n12083847 +n02382132 +n12270027 +n10140597 +n03788914 +n01790711 +n02197689 +n03173270 +n10368624 +n04449290 +n01579729 +n07834872 +n07734417 +n02379630 +n01636829 +n12549192 +n12951146 +n13579829 +n03268142 +n11761202 +n02769669 +n09452760 +n04095577 +n12031139 +n02003577 +n12891469 +n03931885 +n01577941 +n04176295 +n12046428 +n03418402 +n13145250 +n11865874 +n12473608 +n11797321 +n01798168 +n09923186 +n02786736 +n01698782 +n09976283 +n03975788 +n14685296 +n01682172 +n07838441 +n02771286 +n03429137 +n03948950 +n02512830 +n02298218 +n10141364 +n02823848 +n02077384 +n12584715 +n11748811 +n02214773 +n03667552 +n04121426 +n04135710 +n07579917 +n12275888 +n07826453 +n12167436 +n04586072 +n09877288 +n04248396 +n02761696 +n03038870 +n01490360 +n12353203 +n09785891 +n12057660 +n04146343 +n12557556 +n02081798 +n02917964 +n07898617 +n12597798 +n07574176 +n07764630 +n03008177 +n04255899 +n04434207 +n07897600 +n09929577 +n11811921 +n12415595 +n02893941 +n12276110 +n02821202 +n09690621 +n02508742 +n02077787 +n02390640 +n03764822 +n02257985 +n13033134 +n04559166 +n07865196 +n10506915 +n12051103 +n10473917 +n12775919 +n02971579 +n12880462 +n11837970 +n02063662 +n09840520 +n12019827 +n09208496 +n12836508 +n02982232 +n04219185 +n03332005 +n07914128 +n07862461 +n04250692 +n09267854 +n04561734 +n02076402 +n12344837 +n02919148 +n06592281 +n03668803 +n03062985 +n04246731 +n12112609 +n04012482 +n03558633 +n03982642 +n01998741 +n07665438 +n04209509 +n07913882 +n01749244 +n07801342 +n02611561 +n04488742 +n01897536 +n10624437 +n13128976 +n07931612 +n04300643 +n03727067 +n03360431 +n07593471 +n10253296 +n03297226 +n03854506 +n07879450 +n10562283 +n12557438 +n13154388 +n12862512 +n02126028 +n07752514 +n02387887 +n12066261 +n07666176 +n02806530 +n09988703 +n03721252 +n03221540 +n12195533 +n02682569 +n03622058 +n03943266 +n04207596 +n11721337 +n02427032 +n07910152 +n01551300 +n12861345 +n11660300 +n03786313 +n12966945 +n02046171 +n02797535 +n03546112 +n07711232 +n02044908 +n02998563 +n02652132 +n12634986 +n12187247 +n11645590 +n07582892 +n03065243 +n09911226 +n04396902 +n10763075 +n02359047 +n10400108 +n04294614 +n03991646 +n11728945 +n07766891 +n12277150 +n13141564 +n10563314 +n12426749 +n07827750 +n12403994 +n12627119 +n03420801 +n10203949 +n12830568 +n12280060 +n13180875 +n12659064 +n04239436 +n03823312 +n04367746 +n12448700 +n01896844 +n07581931 +n09384106 +n11625223 +n04198722 +n01477875 +n09932336 +n03477512 +n12281974 +n10117739 +n07759194 +n12281788 +n01405007 +n03077616 +n02304036 +n12947544 +n03140126 +n12356960 +n07807002 +n07877849 +n02956795 +n04373795 +n07925500 +n10359546 +n09730077 +n01694955 +n10611267 +n04316498 +n07849912 +n12841354 +n07903543 +n10026976 +n04050313 +n03939844 +n03260849 +n07917507 +n12228387 +n03199775 +n01569566 +n02403920 +n04261638 +n02986160 +n03724623 +n01960177 +n03783430 +n07877675 +n10401639 +n04215153 +n03077741 +n02589623 +n12934985 +n03233624 +n04506688 +n12194147 +n09975425 +n07818825 +n12641007 +n10036692 +n02771750 +n12285900 +n01472303 +n10033663 +n10707134 +n03219966 +n11772879 +n10146416 +n10435169 +n10304086 +n12385566 +n03126580 +n12904314 +n03619196 +n02299846 +n03574243 +n12368257 +n03690473 +n01748686 +n09834378 +n07750736 +n02930645 +n01679307 +n03721047 +n02710044 +n07563207 +n02930080 +n09309168 +n03127203 +n02863536 +n02536165 +n01559639 +n09654518 +n02961035 +n12007406 +n12773651 +n04351699 +n03114504 +n06273414 +n02017475 +n01733466 +n02175014 +n07920663 +n03953901 +n09670521 +n09400987 +n11791341 +n02284884 +n12919646 +n07880325 +n03801353 +n01982347 +n07828642 +n01570421 +n03998333 +n03449309 +n10482220 +n12850906 +n12805561 +n12926689 +n03232543 +n04248851 +n03195959 +n04082562 +n03846100 +n07682952 +n07695652 +n11809271 +n09895561 +n04287898 +n09740724 +n02859955 +n09830400 +n03674731 +n02825153 +n04571686 +n13107891 +n10318607 +n07848093 +n13226871 +n08555710 +n03137473 +n02776978 +n03141455 +n12514138 +n01809371 +n09405078 +n01753180 +n02184473 +n11610215 +n03539546 +n12731835 +n04485884 +n03590588 +n10221312 +n04049753 +n03441345 +n02302244 +n12262185 +n15092650 +n11877646 +n10377185 +n01684578 +n03796605 +n07897116 +n03164344 +n12135049 +n10757050 +n01692523 +n04566756 +n07697699 +n07575392 +n10262655 +n04064747 +n07914006 +n12433769 +n07873348 +n04457767 +n10019072 +n02921195 +n03856465 +n04041408 +n12639584 +n12920955 +n11781176 +n07864756 +n03941013 +n03646148 +n04401578 +n11692792 +n02757714 +n02286089 +n04253168 +n03890514 +n07855510 +n03507458 +n04123026 +n11661909 +n12435152 +n04330746 +n09481120 +n03731019 +n03717285 +n03271030 +n02772101 +n07740597 +n02847852 +n12825497 +n12263738 +n03342262 +n03603594 +n07804543 +n12932365 +n12695975 +n10297531 +n04054670 +n03175081 +n12703856 +n03832144 +n03966206 +n02414290 +n03619275 +n09738121 +n03290096 +n10585077 +n07731767 +n12409840 +n12026981 +n02278980 +n02752810 +n01654637 +n02654112 +n10314836 +n13023134 +n01823414 +n07461050 +n11902982 +n04543636 +n02204907 +n04049585 +n12304899 +n03073545 +n04272928 +n10315456 +n03975657 +n09899782 +n12288005 +n07005523 +n03795269 +n09823832 +n02242137 +n02907391 +n03643491 +n03245889 +n12285369 +n03061345 +n03797264 +n07838073 +n09219233 +n02859343 +n07608098 +n03920641 +n12578626 +n10688356 +n04542858 +n07834065 +n00443803 +n04181561 +n04570214 +n02047517 +n03295012 +n01633781 +n10610850 +n04035836 +n03001115 +n04593376 +n02393807 +n13061348 +n10123122 +n11800236 +n13207094 +n10140929 +n12167602 +n01809752 +n10421956 +n02764935 +n03424489 +n12889219 +n04046091 +n07714287 +n07708685 +n07736087 +n04142434 +n11961446 +n04521863 +n02414763 +n02901377 +n00467536 +n13085747 +n03855756 +n11846765 +n02530999 +n03063199 +n04258618 +n12204032 +n04424692 +n11758276 +n02653497 +n03766508 +n02026629 +n02572484 +n12339831 +n01635027 +n01668436 +n07821919 +n01543175 +n02689748 +n12528974 +n04024862 +n04184880 +n11720891 +n13869896 +n01678043 +n01647303 +n11532682 +n03236217 +n04963307 +n03012897 +n11682659 +n03191286 +n07643891 +n12737898 +n10680609 +n07924955 +n03879705 +n10461060 +n02523427 +n02013567 +n09893344 +n04124488 +n09863031 +n12454436 +n12305089 +n07709046 +n03805180 +n11940599 +n01691217 +n04198562 +n03978421 +n02357585 +n07818572 +n12870682 +n03798442 +n04154938 +n10550369 +n11957678 +n01958531 +n09936825 +n02334201 +n07910538 +n11978551 +n10562135 +n12700088 +n12784889 +n04480853 +n03281673 +n07588419 +n02968333 +n11935469 +n13046669 +n11730602 +n09643799 +n11849467 +n01758757 +n09638454 +n03267468 +n07914586 +n12104734 +n02961225 +n09827246 +n09917214 +n13079073 +n12634734 +n04089376 +n13034062 +n11714382 +n12753762 +n07683039 +n11840067 +n07689842 +n12173069 +n12172481 +n04182152 +n07869522 +n10356877 +n02771166 +n03154895 +n07615289 +n12986227 +n12361135 +n03456447 +n12706410 +n12895811 +n02988156 +n03130761 +n10639359 +n03628215 +n02738741 +n01643507 +n07730708 +n03232309 +n02846733 +n04969540 +n03051041 +n12890928 +n03235327 +n04289576 +n07588817 +n10325774 +n03973285 +n09703485 +n02358584 +n03061674 +n03195332 +n02901259 +n07849619 +n04486934 +n07908812 +n01588725 +n03682877 +n11949015 +n04146504 +n04146862 +n07898247 +n03318865 +n04367950 +n07880213 +n04247011 +n01447658 +n12711817 +n03146687 +n02926426 +n12856091 +n11966896 +n02413593 +n09764900 +n03009794 +n03314227 +n10499232 +n10075693 +n04451318 +n12320806 +n11933728 +n07764315 +n12133682 +n09904837 +n12832538 +n03816530 +n07802863 +n04391445 +n09728137 +n03887330 +n04436012 +n03957991 +n07771731 +n06266973 +n10407310 +n10290919 +n07862244 +n01842504 +n10262561 +n12726159 +n07691954 +n07618119 +n03437829 +n11966617 +n03629100 +n04231905 +n04208760 +n03344305 +n03684143 +n12934174 +n08645212 +n03556679 +n12109365 +n03751458 +n02380875 +n02025389 +n02770721 +n09830629 +n02800675 +n04951186 +n04483073 +n12710577 +n12789054 +n12058192 +n11777080 +n07716203 +n09618880 +n04525821 +n04016846 +n02918330 +n10375052 +n13158512 +n13090871 +n02929582 +n02308735 +n10487182 +n02213663 +n07608339 +n04384593 +n12890490 +n03992436 +n02994573 +n13231078 +n12880244 +n01651059 +n02925009 +n09686401 +n13219976 +n09981540 +n04582771 +n06267758 +n09893502 +n13214340 +n03272940 +n12554911 +n02214341 +n04137089 +n03874487 +n04573513 +n12003167 +n12004547 +n13065089 +n01903346 +n04373428 +n02216365 +n02024185 +n12577895 +n11698042 +n07586318 +n11705776 +n03030353 +n04486213 +n07885705 +n07928163 +n02356612 +n02767038 +n02897097 +n11662371 +n04128710 +n09842395 +n07683360 +n11533212 +n08495908 +n12841193 +n03669886 +n07768068 +n02381831 +n12081215 +n02757337 +n02811618 +n10144338 +n01379389 +n09698644 +n12779851 +n10400618 +n11801891 +n12322099 +n12408077 +n02767956 +n08640962 +n07816839 +n03021228 +n10346015 +n07868830 +n07917272 +n10076957 +n12865708 +n04290259 +n03595264 +n03986224 +n07825194 +n01610100 +n04417086 +n12995601 +n12734070 +n15091129 +n12428412 +n07587331 +n02405101 +n03108455 +n03594523 +n04489695 +n03892425 +n13032618 +n04409011 +n07590752 +n15092942 +n03914583 +n13066448 +n03532919 +n10639637 +n04566561 +n13223843 +n07904637 +n12347158 +n02720048 +n03901229 +n03936466 +n10574840 +n03782794 +n12397431 +n07908567 +n12580896 +n02697221 +n09791419 +n02577403 +n07870069 +n02136103 +n04318892 +n01462544 +n09747191 +n12287836 +n03067093 +n03934565 +n03543945 +n13126856 +n02240068 +n01585422 +n12413301 +n03246454 +n01876034 +n03635330 +n11680596 +n03228365 +n03082656 +n11609862 +n12859986 +n03934229 +n10233248 +n03166514 +n12166793 +n10115430 +n03327553 +n03373611 +n02967782 +n12338258 +n01604968 +n01323155 +n02590094 +n03044934 +n07866409 +n12291143 +n14900342 +n12094612 +n07845702 +n07926250 +n10750640 +n04359500 +n09797873 +n09953350 +n03561047 +n12122725 +n12725738 +n01453087 +n04977412 +n04575723 +n13219833 +n12161056 +n04273285 +n12482437 +n12863624 +n04953296 +n03390075 +n10188957 +n02874442 +n04236935 +n09990690 +n12866459 +n04075715 +n09725000 +n12794367 +n12461673 +n03050453 +n03677115 +n12427391 +n07736371 +n02973236 +n02406749 +n12322699 +n12815198 +n10680796 +n03268311 +n02405799 +n12302248 +n09791014 +n01545574 +n07740033 +n07862095 +n09901337 +n04390577 +n03597916 +n12110085 +n11802586 +n04205505 +n07696527 +n12076852 +n04344003 +n03326660 +n02823586 +n03042139 +n01565345 +n07905296 +n01454545 +n07650903 +n07905386 +n12530629 +n02841187 +n02943964 +n03329536 +n09681234 +n03479121 +n03770085 +n04147793 +n11552133 +n03774327 +n13197507 +n07901355 +n10400437 +n07837912 +n02310941 +n07845087 +n02239774 +n04976319 +n03960490 +n05239437 +n06275471 +n01633406 +n04257223 +n12009420 +n10483138 +n02775897 +n07866151 +n07922512 +n02666624 +n03944024 +n03842377 +n01832493 +n07855907 +n03968728 +n04492060 +n07879072 +n11635830 +n11802800 +n02357911 +n02431628 +n03730494 +n13099999 +n07768230 +n13147270 +n12331655 +n10237676 +n11855553 +n09759501 +n10620586 +n13181055 +n12309277 +n13183489 +n04382695 +n07679034 +n10495756 +n02173113 +n12764202 +n03683457 +n10298912 +n07680313 +n10160280 +n02205673 +n12053690 +n11653904 +n02931294 +n04093775 +n12856479 +n02427470 +n07608866 +n09954639 +n11639445 +n03364599 +n09924106 +n09683924 +n10419472 +n03089753 +n12620969 +n07604956 +n12940609 +n12564083 +n03514894 +n10343355 +n13068255 +n03805280 +n12793284 +n03140652 +n02666501 +n11717577 +n04267435 +n04593185 +n12820853 +n03934311 +n02630615 +n07767002 +n07723968 +n01631354 +n07931452 +n12414818 +n03097673 +n09944430 +n04457474 +n11850521 +n12227658 +n10131815 +n12408717 +n03566730 +n12777680 +n06273555 +n04357531 +n03759243 +n09861599 +n03015851 +n04175039 +n03392741 +n07859796 +n07741138 +n04474187 +n02266864 +n04553561 +n02667244 +n12720200 +n12432356 +n07806120 +n10362557 +n11929743 +n07765862 +n02963987 +n02762371 +n02747672 +n04289195 +n04056413 +n03039493 +n03894677 +n12338655 +n04422409 +n12079120 +n10252222 +n10168837 +n12919847 +n10297841 +n01340014 +n11710827 +n10167838 +n12278107 +n01384164 +n10498986 +n02742468 +n02899439 +n11752937 +n12107710 +n12315598 +n03985441 +n07605804 +n07686202 +n12884100 +n13121349 +n11725311 +n10420507 +n11706761 +n01381044 +n03331599 +n12336333 +n10185483 +n07880880 +n01782516 +n12615232 +n03175457 +n12657082 +n01750437 +n07918879 +n13213066 +n12927494 +n02910542 +n06273986 +n02161338 +n10235024 +n12180168 +n03659950 +n02160947 +n11861853 +n09866817 +n09279986 +n12393269 +n01552034 +n05526957 +n02956883 +n12818966 +n09753792 +n03114236 +n12273344 +n12546617 +n13177048 +n02129991 +n01731941 +n01628770 +n12774641 +n07685546 +n03253279 +n10678937 +n12579038 +n08673039 +n01392275 +n02379081 +n10530150 +n12851469 +n12414449 +n11694664 +n11877283 +n09708889 +n03585438 +n00483605 +n12332555 +n03323096 +n07851767 +n02417663 +n10667863 +n02856237 +n09269341 +n01596608 +n09720033 +n13160604 +n04443164 +n02814428 +n11622771 +n10328123 +n04338963 +n01794651 +n12069217 +n07762740 +n02935387 +n11897116 +n10569179 +n12749852 +n10745006 +n07823280 +n12162425 +n09801533 +n03772269 +n04518643 +n07916319 +n12771597 +n02147173 +n10342992 +n03795123 +n11646344 +n12847927 +n07686021 +n12383894 +n04465050 +n14564779 +n04212467 +n12274863 +n02380052 +n04329958 +n12034384 +n04213353 +n04366033 +n04955160 +n02778294 +n12890685 +n03028785 +n03097535 +n04533594 +n01750167 +n01415626 +n12276477 +n07729926 +n07711371 +n12843970 +n10500419 +n12891093 +n03840823 +n12509665 +n11878101 +n04315342 +n07685031 +n12305819 +n10039271 +n12264512 +n03911866 +n13919547 +n12413419 +n03785721 +n02599347 +n03786194 +n04018155 +n12856287 +n09607903 +n02396088 +n10212501 +n10313000 +n07683617 +n03586219 +n03890233 +n03156767 +n12033709 +n01648139 +n04399846 +n10671736 +n07698672 +n10791115 +n07708124 +n02709908 +n04266968 +n01758141 +n10058962 +n09444783 +n03668067 +n02838345 +n02388143 +n12893993 +n12590499 +n01462042 +n02689434 +n13209808 +n04075291 +n02412629 +n01953594 +n03906463 +n03043423 +n02200509 +n10152763 +n12504570 +n04396808 +n03382413 +n03618101 +n02767147 +n02390101 +n03450974 +n12778398 +n03625539 +n02574271 +n04113316 +n07572616 +n11809437 +n04119230 +n03829954 +n10500603 +n04258732 +n02731900 +n10174330 +n01574801 +n08663703 +n12558230 +n03981760 +n07732904 +n11875523 +n11823436 +n03238286 +n03079494 +n04281260 +n07873057 +n11686912 +n10568608 +n07593004 +n04271531 +n10037922 +n07838551 +n03615300 +n12624568 +n12940226 +n05242928 +n03680734 +n01589893 +n11652376 +n11893640 +n04119091 +n09696763 +n07851554 +n02660640 +n12124818 +n10370955 +n02663211 +n02414209 +n13187367 +n03258577 +n04375241 +n07617932 +n12240477 +n03417202 +n07595649 +n03839424 +n03087245 +n02431441 +n04396335 +n03484809 +n03426285 +n03592931 +n02912319 +n03488887 +n12187891 +n07592400 +n12918609 +n07858114 +n07567980 +n01548694 +n02726210 +n02406859 +n10147262 +n05458576 +n02848921 +n03503233 +n02587618 +n03465151 +n03582508 +n11654293 +n03695452 +n02197185 +n04223170 +n10243273 +n03149135 +n02842809 +n03669534 +n03857291 +n02147328 +n12278865 +n12733428 +n03264906 +n09924195 +n10432189 +n12203896 +n03892728 +n12360958 +n10418735 +n01650901 +n12420722 +n03341606 +n02557909 +n07751858 +n03483971 +n12019035 +n03991202 +n02072040 +n03129848 +n04505345 +n02405440 +n03901974 +n11656123 +n11552976 +n10291822 +n10108018 +n09902731 +n03325691 +n12646072 +n04134170 +n12097396 +n07564008 +n01624305 +n03421117 +n02776007 +n10792856 +n07818133 +n03227184 +n10198437 +n04157099 +n12743009 +n07820960 +n12749456 +n13035925 +n05262698 +n03422771 +n02878628 +n12140903 +n07820297 +n03524745 +n09901921 +n03170872 +n10039946 +n12638964 +n11989087 +n03461988 +n04287451 +n04298053 +n07882420 +n04002262 +n02734835 +n11707827 +n07756641 +n12808007 +n10069981 +n12637123 +n12947895 +n04363082 +n04292080 +n11858077 +n04535252 +n12646397 +n12283147 +n12321077 +n02746595 +n02895328 +n07624924 +n12537253 +n11952541 +n02181477 +n01440160 +n03878828 +n12861541 +n02869563 +n04242084 +n03197201 +n09396608 +n04291992 +n07845863 +n04314522 +n12843557 +n04029647 +n12146654 +n13147386 +n12954799 +n11920133 +n03038480 +n03213715 +n02971473 +n04149374 +n04230387 +n00444340 +n11859275 +n07564796 +n02948403 +n10186068 +n04315713 +n02366002 +n02670935 +n13208302 +n10225931 +n07826340 +n04102872 +n02259708 +n11855842 +n09941089 +n08896327 +n10237464 +n12084158 +n03764995 +n03627954 +n12384375 +n10341343 +n07876189 +n04573379 +n07904293 +n07840520 +n12038038 +n03005147 +n10483799 +n02978367 +n01484285 +n13094273 +n04539053 +n01748389 +n10146816 +n07815839 +n12991837 +n03294604 +n03588841 +n04055180 +n03209477 +n09917345 +n04393913 +n12337391 +n12126084 +n01882125 +n07688130 +n02814116 +n09640715 +n12679593 +n12596345 +n03029925 +n11761650 +n04457157 +n12683096 +n07709881 +n03841290 +n13157684 +n07927836 +n03523134 +n03690279 +n10187491 +n12451070 +n02682311 +n03978815 +n11806679 +n07808022 +n01386354 +n03622526 +n02369293 +n11885856 +n02289610 +n12663359 +n02624987 +n13173488 +n03027001 +n07896765 +n11935330 +n07814790 +n04242704 +n09959142 +n07589543 +n03551582 +n07843117 +n03556992 +n02060569 +n04000998 +n03825271 +n11946918 +n02874750 +n03479502 +n09919451 +n02176747 +n02080713 +n03400972 +n10222170 +n07926785 +n07852302 +n03012373 +n10438842 +n12868019 +n03634034 +n04210591 +n07853560 +n12374862 +n09248399 +n04355115 +n12908093 +n12906498 +n12875269 +n02791665 +n03146777 +n02854378 +n12414159 +n07821610 +n07595180 +n12238913 +n12141385 +n10761190 +n12165758 +n01653223 +n12956367 +n03695753 +n12416703 +n12346813 +n03405111 +n04304215 +n01624212 +n12674895 +n09850760 +n12407715 +n04156040 +n11610437 +n03395256 +n09970822 +n04229959 +n02530831 +n07870894 +n12098524 +n12828379 +n04057215 +n10751152 +n10053439 +n03674270 +n07869291 +n12256920 +n02535163 +n04282231 +n02136452 +n02365108 +n10328328 +n02315487 +n03325403 +n09231117 +n03342657 +n09980985 +n10702167 +n11961871 +n02065263 +n12857779 +n03219612 +n07805966 +n10699981 +n07691863 +n12831932 +n04179126 +n10208189 +n09765118 +n07922147 +n01631512 +n01947997 +n01405616 +n01892030 +n07827896 +n12964920 +n07749870 +n03276696 +n10020670 +n11828577 +n07624666 +n10590146 +n02407521 +n10253703 +n03270854 +n11610047 +n12981443 +n12413642 +n12302565 +n03177059 +n04594114 +n10227985 +n07728391 +n10395073 +n02810270 +n03569293 +n07812046 +n03843316 +n12477401 +n03802643 +n07618029 +n10755648 +n12837803 +n12454556 +n01636127 +n02809241 +n03270165 +n12035631 +n02962414 +n09750641 +n01793085 +n04346003 +n07922041 +n04164002 +n12499979 +n03301291 +n07921834 +n09656077 +n07599161 +n13155611 +n10194231 +n10063635 +n03601442 +n10366276 +n00475661 +n03943714 +n10377291 +n02624551 +n02568447 +n07589458 +n09691858 +n02685995 +n11919975 +n01690466 +n13211020 +n04114069 +n10530383 +n04200908 +n12631932 +n07916437 +n03219859 +n07918309 +n10368291 +n10253479 +n03317889 +n13206178 +n02821415 +n10592811 +n12557064 +n12872458 +n10212231 +n07926346 +n09695514 +n09741816 +n03964611 +n07812913 +n09703708 +n02587479 +n10593521 +n03485309 +n03776877 +n12289433 +n07716504 +n10580030 +n03061893 +n03206158 +n09710041 +n04266849 +n07864065 +n12767648 +n02333190 +n12295429 +n02406432 +n01799679 +n07861983 +n02201626 +n03441582 +n03653975 +n02834506 +n12263204 +n10672662 +n03072682 +n03410423 +n11620389 +n04542095 +n07910970 +n03697913 +n02706806 +n09736798 +n12318965 +n07938594 +n12032429 +n03191776 +n04210288 +n01422335 +n03236093 +n11881189 +n02247216 +n12338146 +n03104512 +n00474881 +n04172230 +n01461315 +n04400109 +n10646140 +n02215621 +n10096126 +n03019806 +n11809754 +n02492948 +n10741367 +n10308504 +n07875560 +n02523110 +n07738224 +n02015797 +n10499631 +n03025165 +n03284308 +n03508881 +n10441037 +n10757492 +n07608721 +n09755241 +n04264361 +n04394421 +n03776997 +n03175843 +n04476526 +n02523877 +n13196369 +n10190122 +n03172738 +n02709763 +n02070624 +n04563560 +n04017807 +n03824589 +n07817758 +n03222722 +n01542433 +n13173259 +n04458201 +n12869668 +n12580786 +n02407763 +n09760913 +n10530571 +n11752798 +n09612700 +n07601175 +n11632376 +n10641223 +n03158668 +n03411208 +n01413457 +n03684740 +n10248008 +n12656528 +n11849271 +n07771891 +n12067433 +n12389727 +n11734698 +n04042204 +n07825399 +n12621945 +n07624757 +n03180732 +n09741331 +n10246317 +n04030414 +n07821107 +n04524716 +n03789603 +n12867449 +n10249869 +n02434415 +n07614103 +n03333349 +n04602840 +n09923996 +n02658811 +n13033879 +n03663433 +n02873623 +n07837545 +n12436907 +n02675077 +n01500854 +n04435552 +n01790304 +n11687789 +n03443543 +n09733459 +n01606177 +n12245885 +n11721642 +n02201497 +n12010815 +n04594742 +n02755984 +n07927716 +n04245218 +n03134118 +n13214485 +n12294542 +n12713521 +n03556173 +n12650038 +n07719058 +n04319774 +n10443830 +n10019187 +n09720702 +n07926442 +n10402709 +n03989777 +n11699751 +n09613118 +n02965122 +n04221076 +n01861330 +n12837052 +n02975589 +n09668437 +n03012499 +n01418498 +n12451566 +n03585778 +n07692517 +n09672590 +n09741999 +n09748648 +n07621264 +n03482001 +n10185148 +n01542168 +n12536291 +n07846557 +n11840476 +n03130866 +n02631775 +n11730015 +n03715275 +n07680168 +n12175370 +n05427346 +n03665232 +n08611421 +n11730458 +n02413484 +n09783884 +n07888378 +n04611351 +n02247655 +n02136794 +n11649359 +n01382033 +n07889193 +n10405540 +n03510384 +n04420720 +n03585875 +n03812789 +n01835769 +n12139921 +n09762011 +n10103228 +n03477410 +n11930788 +n10064831 +n12311045 +n07681805 +n03136504 +n12887713 +n03886940 +n03130233 +n10197392 +n12333961 +n07672914 +n12723062 +n12599661 +n04268799 +n03696909 +n12809868 +n12452256 +n10710778 +n02571652 +n12117326 +n02450677 +n03041265 +n12544240 +n01966377 +n10252354 +n02378625 +n09814488 +n10569011 +n13067330 +n07928998 +n07890970 +n02187279 +n02592371 +n07846802 +n03475961 +n05448704 +n10410996 +n02851795 +n10093167 +n12468719 +n09876701 +n03057724 +n03469031 +n02344270 +n04248209 +n02687682 +n04467899 +n12897788 +n03436656 +n12539832 +n09906704 +n03190458 +n11843441 +n12130549 +n11823756 +n03153246 +n03684489 +n04160036 +n02908951 +n12855365 +n03518230 +n12225222 +n12933274 +n10432957 +n02921406 +n10156831 +n12239647 +n02826812 +n03411927 +n11602091 +n13200986 +n04244847 +n01330126 +n14938389 +n03001540 +n04387531 +n03423099 +n07608533 +n11723986 +n07600394 +n12529500 +n02403820 +n02587300 +n10333317 +n07935288 +n12680652 +n01449980 +n12153914 +n07803310 +n11741797 +n01881857 +n13081999 +n08644045 +n02061217 +n02173784 +n02660519 +n03104019 +n13137951 +n04538403 +n02621258 +n04515729 +n04165945 +n11919761 +n13078021 +n07861247 +n11959259 +n11801665 +n04070545 +n13210597 +n10218043 +n10717337 +n01365885 +n10718952 +n11979187 +n03880032 +n03798610 +n03477303 +n01876667 +n11860208 +n03401721 +n03360133 +n13230843 +n13194758 +n13190060 +n02564935 +n13894154 +n12754311 +n07697408 +n13171210 +n02035402 +n03736147 +n10396337 +n04554998 +n02793930 +n04126852 +n03654826 +n09411295 +n06255613 +n01680983 +n10261862 +n01581874 +n10378780 +n10646641 +n03539103 +n03351151 +n04349913 +n03906106 +n02370525 +n03319576 +n04113968 +n09693244 +n02945964 +n03344509 +n04117216 +n03889626 +n03557840 +n09800469 +n04280487 +n07890890 +n12147835 +n12295237 +n03883664 +n04436992 +n02922877 +n10099002 +n01988203 +n10056719 +n11646517 +n03672521 +n04568713 +n10111358 +n03606347 +n04047733 +n12320627 +n10251612 +n10460033 +n01742447 +n11917835 +n10443032 +n13079567 +n04363671 +n10788852 +n10482587 +n03308614 +n12741586 +n12938667 +n04539407 +n01630148 +n02303777 +n13050940 +n04552551 +n02341288 +n04098169 +n04110439 +n11625391 +n12259316 +n02822762 +n10631131 +n04089152 +n03571439 +n04558199 +n12656909 +n03170292 +n02877642 +n12771890 +n03033267 +n12658603 +n13354021 +n12855886 +n11840246 +n03619050 +n07727252 +n12932706 +n13874073 +n01315805 +n02948942 +n12048928 +n03146449 +n10656969 +n09872557 +n03906590 +n04454792 +n12500309 +n04239333 +n01815036 +n09644657 +n10497645 +n02918455 +n07812662 +n04240434 +n10804636 +n11967878 +n04184095 +n11834272 +n05244755 +n02299039 +n12665659 +n12144987 +n07607492 +n11887750 +n13083461 +n04577139 +n09670909 +n07876893 +n02875948 +n04069582 +n10458111 +n10361194 +n09389867 +n01651778 +n11933387 +n13193143 +n12834190 +n03516266 +n02184589 +n10041373 +n02809605 +n04064213 +n04957589 +n12643113 +n02582721 +n07911061 +n07921360 +n10369417 +n10527147 +n04104925 +n03707372 +n01386182 +n10374849 +n09902851 +n08559155 +n02332447 +n11649150 +n11722036 +n01823740 +n04592356 +n10002257 +n10661732 +n07562379 +n07597263 +n04036776 +n13112201 +n09842288 +n07738105 +n04545984 +n09635973 +n02885233 +n02756854 +n07808479 +n03029296 +n01543383 +n02884450 +n09843716 +n04224395 +n10576676 +n10140051 +n07919894 +n07806879 +n10212780 +n09478210 +n12017127 +n03770224 +n07606191 +n03555217 +n09715165 +n12270460 +n12129738 +n11739365 +n02303585 +n07818029 +n05314075 +n03019304 +n09859975 +n09454744 +n13151082 +n12586989 +n00455076 +n07741357 +n04957356 +n08659242 +n04577293 +n04126244 +n03131193 +n12428242 +n03569494 +n03781594 +n07743384 +n02892392 +n12576695 +n12199982 +n07693439 +n07719756 +n11884384 +n03043798 +n12351091 +n03690168 +n02214499 +n01839949 +n01831360 +n12642964 +n02957862 +n03125588 +n12883628 +n04002371 +n10747965 +n09744462 +n02853745 +n13030337 +n12156679 +n02761034 +n12587487 +n03374570 +n12728322 +n01731764 +n07918706 +n03696445 +n03185868 +n02805283 +n03868763 +n02202124 +n12369665 +n12449934 +n12650229 +n02656301 +n07743723 +n11702713 +n02927053 +n03916385 +n01486010 +n03986071 +n04188064 +n13897528 +n12414329 +n07718068 +n07837755 +n11735570 +n10464542 +n04091466 +n01315581 +n10374943 +n03989898 +n13220525 +n04076052 +n04062179 +n02414442 +n04414101 +n04446162 +n00480885 +n03536568 +n03773835 +n10728998 +n12643877 +n02255391 +n03799610 +n07847585 +n00446411 +n11910666 +n03139998 +n02296276 +n02889996 +n02786611 +n10363445 +n07854348 +n08583682 +n09912681 +n07896422 +n02368821 +n11935953 +n12185254 +n11738547 +n03809211 +n02448318 +n13066979 +n01987076 +n12009047 +n12839574 +n13174823 +n07902520 +n03369866 +n13209129 +n02593191 +n03853291 +n02620578 +n10071332 +n01813658 +n09895480 +n10134760 +n01316734 +n07845166 +n03175983 +n13132156 +n12814960 +n12883265 +n03637787 +n04310507 +n04133114 +n03900194 +n04129688 +n04449550 +n01805321 +n01717467 +n01573627 +n12271451 +n11722621 +n09976917 +n12232280 +n12905135 +n03451253 +n01655344 +n12346986 +n11987511 +n10517283 +n02941845 +n12730370 +n03121190 +n07917874 +n10023656 +n10151133 +n07695187 +n03258456 +n10639238 +n10682713 +n02085019 +n12343753 +n10749928 +n04595611 +n04410565 +n08500819 +n07719980 +n04016479 +n03232417 +n03469832 +n09834885 +n07925327 +n10094782 +n03632100 +n12734215 +n09845849 +n04047139 +n10743124 +n02604954 +n12270278 +n03036244 +n11991777 +n10168012 +n02561803 +n10531109 +n10344319 +n03804211 +n10513938 +n10732967 +n09917481 +n02950482 +n03148808 +n07910245 +n07925423 +n07889990 +n04302988 +n07745357 +n04346511 +n07573563 +n02564403 +n12084400 +n10030277 +n09815455 +n04388473 +n12404729 +n10576316 +n12072210 +n11811059 +n01824344 +n03556811 +n03175301 +n07586485 +n13137010 +n11986729 +n04967561 +n03881404 +n07692114 +n07874995 +n02770585 +n07853345 +n02775689 +n04328580 +n01323781 +n07773428 +n02414043 +n02794474 +n02352932 +n07569873 +n12374705 +n03606106 +n04267246 +n04369485 +n11934239 +n12705698 +n11841247 +n07868045 +n03525693 +n12358293 +n02937010 +n09658398 +n12711182 +n03516647 +n04591631 +n10228712 +n11930353 +n03471779 +n12594324 +n02251593 +n04455579 +n02542017 +n03381450 +n03320845 +n12364940 +n09657748 +n12412987 +n01840412 +n10570704 +n10117267 +n03251280 +n10195261 +n12178129 +n12285049 +n02177775 +n10117415 +n03707766 +n04475309 +n05604434 +n03999064 +n12127575 +n01972131 +n09793946 +n01635176 +n02791532 +n07564101 +n07876460 +n02813981 +n10764719 +n03638743 +n12761702 +n02125689 +n11657585 +n09923003 +n13069773 +n02683183 +n04324515 +n11936946 +n12862828 +n02659808 +n02619861 +n13175682 +n11648039 +n07768139 +n12512674 +n12108613 +n02947977 +n12899971 +n03845107 +n07689490 +n02081927 +n07619508 +n10248377 +n10300041 +n10761326 +n09655213 +n02675522 +n04963111 +n01995686 +n03256631 +n10684630 +n04471912 +n12728864 +n03870546 +n02829246 +n09725546 +n03409920 +n13194918 +n10055297 +n02513248 +n01462803 +n11782266 +n13094145 +n07839478 +n13916363 +n07932454 +n09722817 +n07774479 +n10386874 +n12832822 +n01599388 +n02964295 +n04349189 +n07689313 +n11653126 +n02309841 +n02064000 +n04410663 +n04562122 +n02358712 +n09901786 +n10441124 +n12882158 +n12815668 +n10159289 +n01641930 +n03315990 +n12271187 +n10277638 +n07815163 +n12903014 +n07915366 +n04412300 +n01324799 +n03408264 +n09452291 +n03019198 +n11890884 +n10355806 +n03186199 +n04013600 +n12541157 +n06259898 +n06273294 +n11946051 +n01671705 +n04415257 +n01905321 +n04050600 +n12604460 +n04051439 +n02929184 +n11765568 +n10025060 +n02396796 +n04033287 +n13027557 +n03127531 +n10308066 +n09729062 +n01593553 +n02476567 +n07609728 +n12970293 +n01419888 +n03215749 +n01684741 +n13067672 +n03870290 +n07846359 +n12961536 +n03356559 +n07727140 +n09843602 +n02378755 +n12044041 +n01977485 +n07718920 +n12060546 +n04265428 +n12237855 +n04006067 +n10227266 +n04361937 +n12134486 +n10097842 +n02264591 +n03912821 +n07594155 +n03116163 +n11771924 +n04155457 +n12394118 +n10507380 +n01844746 +n11901452 +n03024233 +n03383562 +n11806814 +n10062716 +n04204755 +n08613733 +n12907671 +n03533654 +n09826605 +n03109033 +n07606419 +n03537085 +n11615812 +n07695504 +n11694300 +n04520962 +n09971839 +n02664285 +n03402511 +n02061560 +n13133140 +n03548195 +n12877493 +n02425086 +n12845187 +n12488454 +n02975994 +n02071028 +n01457407 +n03685486 +n07605282 +n07771405 +n07827554 +n10538733 +n03438780 +n04379096 +n12686496 +n10001764 +n11848867 +n12125001 +n09886540 +n03275566 +n01442710 +n12789554 +n07858197 +n12722071 +n12868880 +n10441694 +n12409651 +n07727741 +n12289585 +n04069166 +n12686877 +n03723439 +n07815956 +n12543455 +n10778044 +n02200630 +n10074841 +n12640284 +n12589841 +n07592317 +n07866571 +n12712626 +n04228422 +n11711289 +n03590475 +n13081229 +n03045800 +n03639230 +n02874214 +n07615954 +n03204134 +n12053962 +n12769219 +n15006012 +n09873769 +n11818636 +n01959029 +n03349599 +n12227909 +n07576969 +n03638180 +n07742224 +n03390673 +n02344175 +n03770520 +n00447361 +n13235319 +n01983674 +n10061882 +n04267165 +n12493868 +n12713358 +n02930339 +n10493419 +n12918810 +n02582220 +n12248359 +n02644501 +n04596492 +n04538249 +n07905618 +n13230190 +n07808268 +n15005577 +n09351905 +n12730544 +n11937023 +n04024137 +n02238358 +n11646955 +n11618079 +n09849990 +n04060448 +n04220805 +n12725940 +n12004120 +n01484562 +n02669442 +n12132956 +n01756916 +n03980986 +n02256172 +n07716750 +n12119390 +n04047834 +n11934041 +n12828977 +n03648219 +n11873612 +n12909614 +n04397860 +n03908111 +n03261395 +n03695616 +n11668117 +n12014355 +n02896074 +n03988758 +n04426184 +n10328696 +n02477028 +n04507326 +n04320871 +n03256472 +n01919385 +n03988926 +n13182164 +n07826250 +n03207548 +n01396617 +n04369618 +n07913774 +n13229951 +n03410022 +n12728508 +n01997119 +n03598783 +n01341090 +n03879456 +n01736796 +n02864122 +n13879816 +n02684962 +n12246037 +n02433729 +n04364397 +n09881358 +n02950120 +n03326371 +n02243878 +n01790812 +n12990597 +n03330947 +n07764486 +n03332173 +n10006177 +n03347472 +n07619301 +n10106509 +n12365285 +n01732989 +n07678586 +n04098795 +n07733847 +n03994297 +n12872914 +n02762909 +n07766530 +n13198482 +n02395855 +n12273515 +n04487894 +n07847047 +n12488709 +n02859557 +n04255768 +n02360933 +n03267696 +n03152951 +n10188715 +n10520544 +n13065514 +n02900594 +n03699754 +n01319187 +n01949499 +n10417424 +n01603000 +n12062105 +n09683180 +n09863339 +n01880716 +n10702615 +n03893935 +n10495555 +n04131499 +n02957252 +n02113892 +n07724078 +n12246941 +n04303095 +n01751215 +n04213530 +n12117695 +n12418507 +n01922948 +n12131405 +n13188767 +n01481498 +n03174079 +n02407172 +n11613867 +n10152616 +n10119609 +n04158250 +n11695085 +n07855105 +n02854630 +n03768683 +n12739966 +n12266984 +n12819141 +n12732605 +n13205249 +n11917407 +n01607429 +n02694279 +n07815294 +n06614901 +n07846471 +n12119717 +n02595339 +n12366186 +n10693235 +n12263410 +n12484244 +n10337488 +n04146976 +n01469723 +n07872748 +n03238879 +n12000191 +n07846938 +n03116008 +n12139196 +n04013176 +n10317963 +n12140511 +n02065726 +n01649556 +n10316862 +n01755952 +n04385079 +n12770529 +n02814338 +n01675352 +n11874423 +n01369484 +n10537708 +n07618281 +n07821404 +n02297819 +n03238762 +n03357081 +n02628600 +n07830986 +n12507823 +n04431925 +n11955532 +n03429771 +n10281896 +n12383737 +n12760875 +n09673091 +n12892013 +n06625062 +n04503269 +n03674842 +n12338979 +n04268275 +n12033139 +n11767877 +n07812790 +n12676134 +n04037873 +n10097477 +n12310638 +n12258101 +n09391386 +n13196738 +n13866626 +n12720354 +n10106995 +n07843220 +n03878294 +n04101375 +n07733217 +n10220080 +n04601938 +n10778148 +n12973937 +n10556825 +n12256708 +n12583855 +n04259202 +n07628181 +n04226962 +n02777402 +n09674412 +n12188635 +n03776167 +n04504038 +n04156591 +n02270945 +n02264021 +n07826653 +n02980203 +n02059852 +n02102806 +n12921660 +n04477725 +n10107173 +n12837466 +n02697022 +n04350688 +n12110236 +n02177196 +n07899976 +n12639910 +n02368399 +n10009162 +n03950647 +n09248153 +n02425532 +n04044955 +n11933257 +n03460899 +n10147710 +n02379743 +n02413917 +n02890804 +n12915140 +n02146879 +n07915800 +n01787006 +n03646809 +n11677902 +n04065909 +n02088992 +n02887832 +n10115946 +n02306825 +n03719560 +n10456696 +n03758220 +n12625003 +n04021503 +n07563366 +n02531625 +n10304650 +n12855710 +n09735654 +n07853762 +n03512030 +n12898342 +n02297938 +n12618727 +n04082344 +n12953712 +n12617559 +n03035715 +n02532451 +n05399356 +n03602686 +n10082423 +n04607759 +n07581607 +n07594737 +n04030965 +n03464628 +n12103894 +n03039353 +n03522990 +n02964934 +n03169063 +n10153865 +n09653144 +n09941571 +n12907057 +n07768318 +n02600798 +n02187150 +n01811243 +n12252383 +n04495555 +n07678953 +n13181244 +n13069224 +n13184394 +n12765402 +n03471347 +n10208847 +n03697366 +n09840435 +n02506947 +n09709673 +n07928578 +n11935715 +n07848936 +n02757927 +n01999767 +n02245443 +n10260473 +n13898645 +n02701260 +n07840219 +n11785875 +n12385830 +n12017664 +n12145148 +n04530456 +n01929186 +n02384741 +n04113038 +n03296217 +n09723819 +n03766697 +n12143215 +n09929202 +n02684248 +n12119539 +n03566555 +n12941220 +n04124573 +n10750188 +n07733005 +n04230707 +n03829857 +n07756838 +n12244458 +n12543826 +n03514129 +n02762169 +n04435870 +n03342863 +n09745324 +n12369476 +n11652039 +n03915320 +n07746749 +n07608641 +n12642600 +n02389943 +n12137791 +n04111962 +n12493426 +n12454793 +n01455317 +n10728117 +n03281524 +n12195734 +n12353431 +n02477329 +n02678010 +n04557522 +n10162354 +n14942411 +n07806043 +n12274151 +n09835153 +n03983499 +n04086663 +n07851926 +n07868684 +n11926976 +n03972146 +n04310604 +n09675799 +n13880704 +n13173132 +n07577918 +n10720964 +n11937102 +n03349020 +n12340581 +n03725506 +n03477143 +n10578162 +n01731137 +n03382104 +n11616852 +n01493829 +n09327077 +n03856335 +n03321843 +n02375757 +n02118643 +n08500989 +n03496486 +n04140777 +n12858987 +n02845293 +n04093157 +n07819682 +n10394786 +n12289310 +n02901620 +n01559160 +n07919165 +n12648196 +n11774972 +n11995396 +n10543937 +n10154013 +n03977158 +n01884476 +n12266528 +n11906127 +n12661538 +n04396650 +n12761905 +n04175574 +n10181878 +n12017326 +n12876899 +n09744346 +n07741706 +n04451636 +n07735981 +n03751590 +n03140546 +n03070396 +n03091223 +n12071477 +n07562017 +n09981092 +n09847344 +n12552893 +n12371202 +n02245111 +n01598271 +n04400499 +n02298095 +n15048888 +n02967170 +n04030161 +n10676434 +n01556514 +n13235766 +n02538562 +n12603672 +n03941586 +n02449183 +n07567611 +n12923257 +n02296021 +n11730933 +n12497669 +n02917742 +n07875926 +n02714535 +n13142182 +n02878107 +n07861334 +n02682811 +n03730655 +n03681813 +n12970733 +n02132320 +n12436090 +n07931280 +n04295353 +n12982590 +n01783017 +n13164501 +n02424589 +n01499732 +n12650805 +n04543509 +n10369699 +n03439631 +n13160116 +n07831663 +n05449196 +n13025854 +n10169241 +n02847461 +n10734963 +n13213397 +n03343234 +n12275317 +n02793414 +n04300509 +n01803893 +n11617878 +n02179192 +n03637480 +n04514648 +n03087521 +n10478827 +n11757190 +n12919195 +n04532504 +n01736375 +n04015786 +n04545471 +n12668131 +n04472961 +n14786943 +n07584938 +n02498743 +n07744559 +n10010062 +n10101308 +n07832099 +n02601767 +n10473453 +n02451575 +n02496052 +n03696746 +n12669803 +n07904072 +n04290762 +n11737125 +n07760755 +n12553742 +n12068138 +n12630999 +n02390938 +n02202678 +n02216740 +n02679961 +n13173697 +n11828973 +n02287987 +n04585318 +n10360366 +n07745661 +n03474352 +n07934800 +n12677612 +n03692272 +n13092240 +n04230487 +n11846312 +n12433952 +n11793403 +n03056873 +n05454833 +n12517077 +n12682882 +n02649218 +n09425344 +n07878283 +n02795978 +n10064977 +n12754174 +n02945813 +n01750743 +n03150661 +n13880415 +n12337800 +n04017571 +n09754907 +n04456734 +n02967540 +n10621400 +n11744471 +n01971620 +n04148285 +n10781817 +n11991549 +n12305654 +n03943833 +n10330931 +n12918991 +n01783706 +n11933099 +n12931231 +n07589967 +n09666349 +n07853445 +n12714949 +n03548533 +n04158672 +n03809802 +n03080309 +n12800049 +n02578454 +n02834027 +n10067600 +n03044671 +n04198233 +n07930205 +n04357930 +n12221522 +n11957317 +n03085781 +n03723885 +n03614383 +n02661618 +n04292221 +n03426574 +n03838024 +n10442093 +n12399534 +n01450950 +n07876550 +n11937446 +n09870096 +n02631628 +n05460759 +n01710177 +n03660562 +n04283784 +n01497738 +n02232223 +n04209811 +n12837259 +n02864987 +n04499810 +n12654857 +n03493792 +n09688233 +n02312912 +n10057271 +n07606058 +n03258192 +n10507565 +n11930038 +n08679269 +n03812263 +n11662128 +n04085574 +n07643577 +n03981094 +n02796412 +n02513939 +n07686634 +n07936979 +n03168774 +n03816394 +n07625324 +n04138131 +n10383094 +n10222716 +n10381981 +n12254168 +n13223090 +n03056583 +n09910556 +n03277004 +n12649866 +n02089725 +n03688707 +n09665367 +n07849506 +n02843909 +n13141797 +n02477516 +n09710886 +n03835941 +n11734493 +n10778711 +n10007809 +n02038141 +n12766043 +n02353172 +n02030224 +n10762212 +n06274921 +n13033396 +n03560860 +n01961234 +n13868515 +n03216199 +n01553527 +n04429038 +n10211036 +n02150885 +n02435517 +n02755675 +n09699020 +n12566331 +n03909516 +n02903727 +n02594942 +n04173172 +n04125692 +n12251001 +n02412787 +n01649412 +n01411450 +n01774097 +n09912907 +n03162556 +n07566231 +n12267534 +n03928589 +n04142327 +n11771147 +n07832592 +n04155177 +n07937621 +n07839864 +n03201895 +n13095013 +n10298271 +n03059103 +n03784793 +n11925450 +n03288742 +n02809364 +n04108999 +n04449449 +n03726233 +n07854455 +n03692136 +n12018447 +n03374282 +n06008896 +n07598928 +n03577312 +n04604806 +n09892513 +n04370600 +n08238463 +n01793159 +n07822687 +n03242390 +n07685303 +n03822361 +n01996280 +n10505942 +n06596845 +n04219580 +n12056990 +n10579062 +n10240082 +n10298202 +n07711907 +n03905730 +n12222900 +n07598622 +n04415815 +n12389932 +n12154114 +n04210012 +n12500751 +n03729402 +n12122918 +n04572121 +n12804352 +n02415130 +n12780325 +n11639084 +n12768933 +n02253494 +n13217005 +n03567788 +n12304286 +n10703480 +n07766723 +n05455113 +n07741804 +n12186839 +n01687128 +n01350701 +n03260206 +n07876026 +n12528382 +n04125541 +n10457444 +n01606097 +n11717399 +n04598416 +n12899166 +n09748101 +n12160125 +n07608980 +n07843348 +n02409038 +n02571167 +n09980805 +n09706029 +n02495242 +n12765846 +n10373525 +n12321873 +n03047171 +n12365462 +n03752398 +n02662993 +n10316527 +n10728233 +n06273207 +n01733214 +n12297846 +n12755876 +n02428842 +n02289307 +n04536465 +n03253187 +n02297294 +n05584746 +n03117642 +n12189779 +n10338231 +n07599649 +n04559994 +n12710917 +n09966470 +n12470907 +n04499300 +n12403075 +n11837743 +n02269657 +n12599185 +n07618587 +n03996004 +n12851094 +n03392648 +n01319001 +n12826143 +n12369845 +n01814549 +n10056103 +n12854193 +n02267483 +n04019881 +n03490649 +n04268142 +n10801802 +n12315060 +n10149436 +n04563790 +n09865068 +n03000530 +n10657556 +n07840672 +n12118414 +n02856013 +n02900459 +n04094859 +n12079523 +n11827541 +n12236160 +n02904505 +n02846619 +n09842823 +n12926039 +n02146201 +n03195799 +n12815838 +n09899289 +n01483021 +n02519340 +n05453815 +n10329035 +n02494383 +n09742927 +n13220355 +n03212406 +n11759609 +n10061431 +n12095281 +n04262530 +n03799240 +n02426176 +n04608809 +n12230540 +n13880551 +n11741175 +n11858814 +n11723452 +n07590841 +n12604845 +n10342543 +n12760539 +n09270657 +n02563079 +n10643937 +n12843316 +n01651641 +n07838811 +n04359034 +n07758260 +n02762725 +n11726433 +n03114743 +n01952029 +n12321395 +n11930571 +n12337922 +n12427946 +n12001294 +n12551457 +n13235011 +n02290340 +n06419354 +n12408873 +n01741442 +n12308447 +n10243872 +n03658635 +n03694761 +n02570484 +n12912801 +n04158002 +n02417785 +n01332181 +n03703075 +n10283366 +n03142431 +n02779609 +n02300554 +n09868782 +n10323752 +n03166809 +n03394149 +n02827148 +n02186717 +n01350226 +n03344784 +n03555996 +n04498873 +n13157481 +n04519887 +n12028424 +n12349711 +n10471640 +n07741235 +n04032936 +n12357968 +n10228592 +n13178284 +n04168840 +n13239177 +n03561573 +n02566489 +n11807696 +n07681264 +n02566665 +n10456070 +n10063919 +n10492727 +n01788579 +n11977660 +n02036228 +n02738978 +n03989349 +n10332953 +n12949361 +n09901502 +n07839730 +n13146928 +n10152306 +n04170515 +n11602478 +n02522722 +n01333610 +n13030852 +n02143891 +n12807624 +n04542329 +n12243693 +n12036226 +n13917690 +n02553028 +n02752199 +n10594857 +n11627714 +n04348070 +n13171797 +n04612257 +n07934373 +n04536765 +n02244515 +n04526800 +n04546595 +n02551668 +n12143405 +n07871588 +n07858484 +n03628728 +n13179804 +n03242264 +n12089846 +n07588688 +n07620047 +n01647466 +n09685233 +n03467254 +n12666369 +n05449661 +n10694939 +n12886600 +n12256522 +n04006330 +n03317673 +n04316815 +n12222090 +n04022866 +n04088441 +n07617526 +n10782362 +n04355821 +n13901490 +n12508618 +n03849943 +n04503499 +n13193466 +n09754633 +n07583978 +n13911045 +n07643679 +n12054195 +n10692090 +n04032509 +n10146927 +n02031298 +n04002629 +n04035748 +n10712229 +n02866106 +n07909504 +n04540397 +n06266878 +n10219879 +n12567950 +n07853648 +n03191561 +n07856045 +n12646197 +n03317510 +n10515863 +n13198054 +n02808829 +n12889579 +n02698473 +n09924437 +n03595055 +n12306270 +n07857356 +n09715303 +n03024518 +n04323519 +n09629065 +n04178668 +n12748248 +n02308618 +n07873198 +n10564098 +n03007297 +n04036155 +n02143439 +n10507482 +n12267931 +n03956331 +n12888234 +n04066476 +n07813107 +n02736396 +n10306496 +n12324388 +n01744555 +n01649726 +n06596179 +n03616091 +n07754279 +n02072493 +n12408280 +n04314632 +n02412700 +n04030846 +n09833997 +n03599964 +n05258627 +n12572759 +n12136581 +n02419056 +n12453714 +n11652217 +n03878511 +n03907908 +n12223160 +n10514121 +n04153330 +n12163279 +n12623818 +n03495671 +n13222985 +n10354754 +n04365112 +n12384680 +n12538209 +n03105214 +n12534862 +n13869045 +n03945928 +n11613692 +n11892181 +n13002209 +n02685253 +n07598529 +n02629716 +n13202355 +n07927070 +n02176916 +n04370955 +n11988132 +n03246197 +n01440467 +n07620145 +n03940894 +n01897667 +n03408340 +n12602612 +n02539424 +n03863657 +n04559620 +n02604480 +n11822300 +n03518829 +n11619845 +n10504090 +n03341035 +n02908123 +n04281998 +n03277602 +n03865288 +n10074578 +n13902793 +n03054605 +n04404200 +n12786836 +n12235051 +n04035231 +n12009792 +n12705458 +n04378489 +n02476870 +n11954798 +n03573848 +n02087314 +n03162460 +n04363412 +n02261063 +n09953615 +n01947139 +n03044801 +n04287351 +n04479287 +n03861596 +n12510343 +n07854066 +n03027505 +n12161577 +n04197878 +n01812187 +n10015792 +n08685188 +n11737009 +n10333044 +n02730568 +n10290813 +n13096779 +n05257476 +n07917951 +n12121187 +n03517509 +n07932762 +n02336275 +n12159942 +n12105981 +n02562971 +n13882961 +n12016777 +n02793684 +n12717644 +n01380754 +n07724173 +n04055861 +n11831297 +n03059934 +n03370646 +n10065758 +n09459979 +n07913644 +n04322531 +n03457451 +n02567633 +n04240867 +n10693334 +n10556704 +n04614844 +n07909362 +n12082131 +n09268007 +n04359217 +n09883807 +n02292085 +n04052346 +n03431570 +n02843465 +n04584056 +n04432043 +n09846142 +n07864317 +n04475749 +n04227050 +n04280845 +n03535284 +n07890617 +n03217889 +n02806762 +n11967315 +n11762927 +n02501923 +n03442487 +n09690083 +n02964634 +n02920164 +n07855317 +n10196725 +n03042829 +n11662937 +n12183816 +n12311224 +n13884261 +n02243209 +n03140771 +n02385002 +n03071288 +n12936826 +n04583022 +n07859142 +n04578112 +n04467506 +n12938081 +n09982152 +n12555255 +n03335333 +n10104888 +n12151170 +n12709349 +n10456138 +n02237868 +n07620327 +n12561309 +n12341931 +n12350032 +n01775730 +n12950796 +n01440242 +n04261767 +n10568915 +n12285195 +n07589872 +n13112035 +n07840395 +n11750508 +n12286197 +n03336168 +n03325288 +n02551134 +n04293258 +n13130014 +n07733124 +n04451139 +n11985903 +n03602365 +n11722342 +n11944751 +n12897999 +n02277422 +n03101302 +n07608245 +n03531982 +n01997825 +n11713370 +n04442582 +n02833403 +n04427857 +n01648356 +n10645223 +n10414865 +n10696101 +n12885045 +n10037080 +n12218274 +n07570530 +n04493259 +n10659042 +n10577710 +n03141612 +n10582604 +n00446632 +n02834642 +n07568389 +n04583888 +n04096848 +n12879068 +n04495051 +n09837459 +n12216215 +n03702440 +n10174695 +n10559009 +n10577182 +n07686299 +n04269668 +n02404028 +n03720665 +n09885866 +n03082450 +n12492682 +n12780563 +n03703463 +n02644360 +n02307910 +n01374703 +n04402342 +n04264134 +n03158414 +n04443433 +n12522894 +n10803978 +n11706942 +n10751026 +n13143758 +n02972934 +n04174234 +n12718995 +n11994150 +n11545350 +n12526754 +n07753448 +n02870772 +n11942659 +n11744108 +n12735160 +n12229887 +n04970312 +n02874336 +n10721819 +n13193269 +n03330665 +n09865162 +n10306595 +n12161744 +n03303669 +n07846688 +n02168427 +n01961600 +n03559531 +n09826821 +n03413124 +n09695019 +n03783873 +n11863877 +n13874558 +n02283617 +n11895472 +n13182799 +n07854614 +n03283827 +n01397690 +n02650413 +n09809279 +n10290541 +n10383505 +n11724660 +n07689757 +n10181547 +n07620597 +n11979354 +n02771547 +n13061471 +n12631637 +n11966385 +n03969510 +n11735977 +n07621497 +n12956588 +n03217653 +n04546081 +n11696450 +n10300654 +n02032769 +n01654863 +n09779280 +n02390258 +n03887512 +n10489426 +n10745770 +n10713843 +n03602194 +n10710913 +n07864475 +n04486322 +n07915213 +n08663051 +n10236842 +n02390738 +n02388453 +n03598385 +n12228689 +n11771746 +n12803226 +n11242849 +n02378149 +n10427223 +n05448827 +n11870044 +n12477983 +n12311413 +n03500090 +n10280034 +n02685365 +n03652389 +n12728656 +n07695284 +n09961198 +n03780799 +n03935883 +n01612955 +n12475774 +n02701730 +n07833535 +n12584365 +n03902220 +n12727960 +n10619492 +n04450465 +n10646780 +n10110731 +n04142175 +n12296735 +n09337048 +n12681579 +n12819354 +n12541403 +n04305016 +n12798910 +n10321126 +n08618831 +n09721244 +n02225798 +n01637338 +n12218868 +n05545879 +n12022382 +n03972372 +n02505063 +n01694311 +n10695450 +n10081842 +n12297507 +n07592922 +n12118661 +n01952712 +n10517137 +n01340522 +n07719330 +n03729482 +n04168541 +n03090710 +n07873679 +n07828378 +n07728284 +n10343088 +n07869937 +n14585392 +n01453475 +n12095412 +n04973020 +n12810007 +n07564515 +n01599741 +n11629047 +n09937802 +n12450607 +n12460146 +n02292401 +n03632963 +n09617696 +n12545232 +n02874642 +n09934488 +n10091349 +n01447946 +n05469861 +n11830400 +n03382533 +n02608547 +n12697152 +n03542727 +n10716576 +n03664159 +n07568625 +n02976815 +n13147532 +n02336826 +n12432574 +n07686461 +n04107598 +n02505998 +n09849167 +n03688066 +n02836513 +n01576358 +n01893021 +n12017511 +n12065649 +n01714231 +n11662585 +n12827907 +n12954353 +n11936199 +n01368672 +n03843883 +n12184095 +n10058411 +n11684654 +n08506347 +n10579549 +n01423302 +n11604046 +n07613158 +n03605504 +n02090129 +n02284224 +n01958435 +n12664469 +n04459122 +n09617161 +n09780828 +n11830252 +n12870048 +n04247544 +n09871095 +n02962938 +n09933020 +n13064457 +n10341243 +n07694169 +n13200193 +n07765728 +n01524761 +n07730562 +n07751737 +n07740855 +n04192521 +n12593122 +n07841037 +n02809736 +n10604275 +n12512095 +n01907287 +n04592596 +n09823153 +n03181667 +n12449784 +n07908923 +n12365900 +n03053976 +n15060688 +n04165675 +n02530637 +n09816654 +n12540966 +n07934152 +n09290350 +n03455802 +n10111779 +n01351315 +n10281770 +n13862552 +n12435486 +n12370174 +n12296045 +n03493219 +n12363301 +n11973749 +n03939565 +n02938321 +n13209270 +n12604639 +n12657755 +n03604536 +n10328941 +n04278932 +n10376890 +n01884203 +n02061853 +n04256318 +n07831821 +n10585217 +n07591813 +n10210648 +n07739035 +n01632308 +n10319313 +n02861777 +n03821145 +n13029610 +n04239900 +n10313441 +n04951716 +n10628097 +n02368116 +n08571275 +n04433377 +n10458596 +n12435965 +n12448136 +n12129986 +n04295777 +n07898895 +n07854266 +n12327846 +n12318782 +n07825850 +n10414239 +n11731157 +n04409911 +n10655442 +n11829205 +n01738306 +n02840515 +n04150371 +n03369512 +n02645538 +n12773917 +n07818422 +n03227010 +n10303037 +n12942025 +n12406304 +n06616216 +n02435216 +n12981954 +n03683341 +n09703809 +n07722666 +n11817160 +n10110893 +n10228468 +n03572631 +n01378545 +n02130086 +n04388574 +n11960673 +n12956922 +n11924014 +n09895902 +n03426462 +n07759576 +n02563949 +n03466947 +n02522637 +n09480959 +n02033882 +n02451415 +n12677120 +n10580437 +n04425977 +n03057841 +n12285512 +n07614348 +n03144873 +n03391613 +n12366870 +n02304657 +n07863935 +n07909714 +n02413717 +n12591702 +n07838659 +n02967407 +n12016914 +n02735268 +n09470027 +n10222259 +n03899100 +n10513509 +n11620016 +n12600267 +n04368840 +n03016209 +n04085017 +n03215076 +n10238272 +n09782855 +n07586179 +n12434483 +n12452480 +n01990516 +n12030092 +n11739978 +n12714254 +n13036804 +n07727377 +n07879560 +n03710421 +n12128490 +n11968519 +n03250588 +n10173579 +n03114041 +n02942015 +n12729164 +n07871065 +n02591330 +n09353815 +n10138472 +n02712545 +n12866333 +n07835823 +n03508485 +n01758895 +n02925385 +n03321419 +n09931418 +n02846874 +n12500518 +n07587819 +n03160186 +n04974340 +n13067532 +n11940349 +n13027879 +n02878534 +n10055566 +n07925708 +n12628356 +n11958499 +n03472672 +n04233295 +n04563020 +n03426871 +n04330109 +n03677682 +n04129766 +n02884859 +n12692521 +n10188856 +n03500971 +n10355306 +n12407545 +n11955040 +n10028541 +n10345659 +n14720833 +n09641578 +n12613706 +n11718296 +n03380301 +n01334217 +n03890358 +n03583419 +n12447121 +n09660010 +n11826569 +n11837351 +n12096089 +n03871860 +n01821554 +n12834938 +n02738449 +n02644665 +n03316873 +n12548564 +n03605417 +n12094401 +n13152339 +n03004531 +n03080904 +n03535647 +n12349315 +n04213264 +n07860208 +n01526766 +n03710937 +n11806521 +n10618234 +n12306938 +n10473562 +n10050880 +n04596116 +n02577164 +n04479694 +n07936093 +n07834286 +n12175181 +n03986857 +n02919648 +n12055073 +n04567593 +n07585015 +n12771085 +n10551576 +n09778783 +n01593282 +n02406952 +n12331263 +n10629329 +n12287195 +n07729225 +n07828041 +n01880473 +n12257725 +n02696246 +n07853232 +n11936864 +n09745229 +n03364156 +n04503155 +n03194297 +n04003359 +n07607361 +n10106387 +n10306890 +n10455619 +n01647180 +n07740115 +n12106323 +n03626272 +n11685621 +n11866706 +n04321121 +n01606978 +n12621619 +n11615259 +n07840304 +n02841847 +n05459769 +n03432360 +n04604276 +n12356395 +n12468545 +n03645168 +n00477827 +n03459591 +n04202142 +n12959074 +n07881625 +n12382233 +n02405692 +n12299640 +n12247202 +n12628705 +n12534625 +n09264803 +n12176953 +n09835017 +n10390807 +n04975739 +n12474418 +n11931135 +n07917791 +n10636488 +n09690496 +n11993675 +n03703203 +n11794139 +n13015688 +n04168084 +n01948446 +n10169419 +n04455048 +n04973669 +n12840502 +n12120578 +n10448455 +n01386007 +n02288122 +n01441910 +n02278463 +n03108759 +n02753710 +n03143400 +n13080866 +n13917785 +n13124358 +n13220663 +n02475358 +n01925916 +n02684649 +n10451590 +n03869976 +n03881305 +n07928264 +n01422185 +n04035634 +n11996677 +n04261369 +n12925583 +n12764008 +n09972587 +n03708962 +n01791388 +n02892626 +n04098399 +n07823369 +n07752874 +n13225244 +n03376771 +n01771766 +n13146403 +n12157179 +n13897198 +n07770869 +n13240362 +n07610502 +n03688504 +n02896856 +n12543186 +n09967063 +n05453412 +n12590600 +n02378870 +n07568241 +n01687290 +n00474769 +n11694866 +n02338722 +n02637977 +n04567746 +n10586444 +n11907405 +n03421960 +n07605693 +n10384214 +n12877637 +n12018363 +n10056611 +n13882487 +n12140759 +n04114301 +n11762018 +n12678794 +n11817501 +n02116450 +n12018530 +n03324629 +n12726528 +n03155502 +n10493199 +n04181083 +n10609198 +n04328703 +n03045074 +n07769886 +n01892385 +n12828520 +n03165211 +n11800565 +n07567139 +n13877547 +n12829582 +n02949084 +n07589724 +n01746191 +n12395463 +n05459457 +n10565502 +n11981475 +n09310616 +n12327022 +n02313709 +n12957803 +n11865276 +n12955414 +n12939479 +n13225365 +n07936459 +n03139089 +n07577772 +n12057895 +n03620353 +n12152031 +n01885158 +n04096733 +n12626674 +n10464711 +n10675609 +n07752782 +n03709960 +n02540983 +n02285179 +n01903234 +n07835701 +n04421083 +n02352290 +n09421031 +n03349367 +n02539894 +n04052235 +n07922955 +n03941887 +n04234260 +n04423552 +n11975254 +n08501887 +n12489676 +n04574348 +n10602119 +n02163008 +n02748491 +n10024937 +n10033888 +n12605683 +n01790398 +n10128519 +n14977188 +n10293590 +n12077244 +n09741074 +n11694469 +n12692714 +n12159804 +n12533437 +n03831203 +n03692004 +n09462600 +n04537436 +n06618653 +n07913537 +n12783316 +n10038119 +n10236521 +n01486540 +n07875267 +n04345787 +n07681355 +n13028937 +n03607186 +n07863107 +n12387103 +n09830926 +n03574416 +n04478383 +n11685091 +n03197446 +n03225458 +n09741722 +n07736527 +n02857907 +n10177150 +n12711398 +n10308275 +n02418770 +n02577662 +n09935107 +n03362639 +n12446908 +n04329681 +n04114428 +n09624899 +n12913144 +n12338034 +n02341616 +n12360817 +n12907857 +n02414904 +n05482922 +n11974888 +n04127117 +n12581110 +n04368365 +n01699254 +n12525753 +n04254450 +n11951052 +n12458874 +n12721477 +n07562651 +n02239192 +n10533874 +n12006306 +n09537660 +n10008123 +n02788386 +n03248835 +n04491312 +n11795580 +n04025633 +n10166189 +n07703889 +n11824747 +n07605198 +n12134836 +n03591116 +n02946753 +n13212025 +n11742310 +n02328820 +n02985606 +n09955944 +n12679432 +n10020366 +n12013035 +n02942147 +n04172512 +n11802410 +n10789709 +n03385295 +n02039497 +n01416213 +n11940750 +n12178780 +n01967963 +n12662379 +n12217851 +n02812631 +n12432069 +n09991740 +n03089477 +n12458713 +n03876111 +n10311661 +n12286068 +n02838958 +n11936369 +n03716228 +n13228017 +n06276902 +n12677331 +n04330189 +n10488016 +n12011370 +n04343740 +n07893792 +n02171164 +n03963483 +n12080588 +n07577657 +n12936155 +n03809686 +n04223066 +n04086066 +n12776558 +n07813579 +n01841943 +n12285705 +n02581482 +n11653570 +n10010632 +n04305947 +n12228886 +n12797368 +n01404495 +n09697986 +n11882237 +n10077879 +n07607832 +n09779461 +n13212379 +n10769188 +n10715789 +n01480106 +n02145910 +n04275093 +n01983829 +n01978010 +n09937903 +n11976314 +n11785276 +n12386945 +n04445782 +n10712374 +n10706812 +n10194775 +n12655062 +n10739135 +n02597972 +n02307176 +n04121342 +n02350670 +n12698027 +n02805845 +n02895008 +n13149970 +n03451365 +n04542595 +n07803895 +n07864198 +n09690864 +n03844550 +n12378249 +n10345422 +n13163553 +n10457903 +n10783539 +n10539015 +n11757017 +n10274173 +n08652376 +n10283546 +n04541777 +n02824152 +n12945177 +n02082056 +n03695957 +n07936015 +n07591162 +n03628071 +n02990758 +n07685118 +n04023422 +n04951875 +n03541393 +n10289176 +n04039209 +n07913180 +n07910799 +n12017853 +n03732543 +n10656120 +n10512859 +n04556664 +n12464649 +n12927758 +n12078451 +n07878145 +n10561320 +n12467592 +n07689217 +n07619881 +n11935187 +n09837720 +n03642144 +n12220019 +n02983507 +n03271260 +n02778588 +n10193650 +n01654083 +n02746978 +n10202763 +n02953552 +n07924366 +n08583554 +n02905886 +n07855603 +n09745834 +n12366053 +n04140539 +n03383211 +n11648268 +n03352961 +n12116734 +n07771539 +n07836077 +n03842754 +n11683838 +n03004409 +n11730750 +n13098962 +n12292463 +n02867592 +n01653026 +n07583865 +n12548804 +n12702124 +n03917048 +n12677841 +n12511488 +n04217387 +n12495670 +n03554375 +n12403513 +n08558770 +n02781764 +n12339526 +n12742290 +n01404365 +n03591798 +n12446737 +n10494195 +n12110352 +n01672611 +n10493922 +n03638623 +n09910840 +n02238594 +n02575325 +n13186546 +n11873182 +n10344774 +n04094060 +n10417682 +n02749169 +n02428089 +n04549721 +n03824284 +n12107002 +n12784371 +n09986904 +n01634227 +n07826544 +n12253487 +n01679005 +n12516165 +n09339810 +n03126090 +n07803408 +n11883945 +n03842276 +n03397412 +n03280216 +n12264786 +n02545841 +n11877860 +n01830479 +n13207923 +n12490490 +n02542958 +n04114719 +n12590715 +n13226320 +n11644872 +n04119630 +n10176913 +n04213105 +n11652966 +n12546420 +n12625823 +n11897466 +n02092173 +n10567613 +n04953678 +n10059067 +n12408466 +n03056288 +n13036116 +n04169597 +n12467197 +n02569905 +n02758490 +n12623211 +n04077889 +n04959061 +n04183957 +n11689815 +n03777126 +n03306869 +n07720084 +n02659478 +n12947756 +n04341288 +n04448185 +n04037076 +n09828988 +n03346289 +n04174705 +n13126050 +n04255346 +n09764732 +n11773628 +n14891255 +n04314107 +n02184720 +n02646892 +n04320598 +n01979526 +n03191451 +n03662452 +n10290422 +n01739094 +n02305636 +n04202282 +n05459101 +n02766168 +n09994808 +n03528100 +n10475940 +n03005619 +n12639168 +n02144936 +n13202125 +n10703221 +n03770834 +n12324056 +n03474167 +n02609302 +n12166929 +n12852570 +n12920719 +n12508762 +n11983375 +n01422450 +n12616630 +n09681107 +n10486561 +n13038577 +n12266644 +n02478875 +n02547014 +n02249809 +n03336742 +n12038760 +n01672432 +n09861287 +n03678879 +n01949973 +n09928845 +n02310149 +n12648693 +n10533983 +n12812801 +n04550676 +n01800633 +n12128306 +n12744142 +n13140367 +n07803213 +n07688265 +n13068434 +n02030568 +n12955840 +n01625121 +n13215258 +n04270576 +n02680638 +n02817251 +n01539272 +n04066023 +n12969927 +n10280598 +n04001661 +n09774167 +n10358575 +n01836673 +n02290664 +n09940725 +n12447581 +n07803779 +n04561965 +n10151261 +n01538362 +n10170060 +n13160365 +n09823287 +n12554729 +n10620212 +n11935027 +n03465605 +n03227856 +n08519299 +n07785487 +n03522863 +n02861286 +n12200905 +n04269502 +n02104184 +n07612273 +n01390763 +n11872658 +n12981086 +n10244359 +n01738731 +n12117235 +n12846690 +n02861658 +n08782627 +n09832633 +n02531114 +n01394492 +n03269073 +n03077442 +n09794668 +n13884384 +n08659331 +n02556373 +n02587877 +n03523506 +n03723153 +n12024805 +n13061172 +n03978575 +n07914686 +n13134844 +n12183026 +n03573574 +n03765128 +n03319167 +n01920438 +n07852452 +n07680655 +n03017698 +n12959538 +n04261506 +n01793340 +n03292362 +n12817855 +n03593222 +n01962506 +n12453018 +n04027367 +n12518481 +n09223487 +n07871335 +n03779246 +n09668562 +n01889849 +n02492356 +n07830841 +n03277149 +n09968652 +n03092476 +n10400205 +n06263202 +n07595368 +n12767208 +n02196896 +n12580012 +n10265801 +n02103181 +n02922461 +n01731277 +n12422559 +n04278605 +n02250280 +n03283413 +n11829922 +n10191613 +n02493224 +n04427559 +n12181352 +n12742878 +n10683675 +n04503705 +n03785142 +n12816942 +n10723230 +n11936707 +n12360534 +n12909759 +n03766218 +n02696843 +n11935877 +n07828156 +n10617397 +n12921499 +n13158714 +n10166394 +n12370549 +n03505015 +n12769065 +n02636550 +n10781236 +n09869317 +n10275249 +n04234763 +n10735173 +n13137225 +n02070776 +n04232312 +n07575226 +n03471030 +n07909954 +n02633677 +n01662060 +n07563642 +n04263950 +n11824344 +n13178707 +n02972714 +n10417288 +n12092930 +n11993203 +n10170681 +n03726116 +n03215337 +n12564613 +n14975598 +n07758125 +n03123666 +n07717714 +n01421333 +n02359667 +n09403086 +n03857026 +n12759668 +n02628259 +n02307515 +n12146488 +n09777870 +n07819303 +n12105353 +n10784113 +n11802995 +n12561594 +n02845130 +n12100187 +n03507658 +n02141611 +n01800195 +n03470005 +n12444898 +n02203592 +n09707061 +n00475142 +n12216628 +n01732093 +n02581642 +n03803780 +n12114590 +n04541662 +n12267133 +n11652753 +n07859951 +n04524594 +n12843144 +n04040540 +n10604880 +n12559044 +n03063834 +n12394328 +n12704513 +n10230216 +n10756641 +n02101670 +n12309630 +n03070587 +n11626010 +n04239639 +n01638329 +n01928517 +n13144084 +n10420649 +n03102516 +n12395289 +n09833111 +n01651285 +n11688069 +n12881913 +n12783730 +n07716649 +n03618678 +n10344203 +n03626502 +n10718665 +n03577474 +n01683201 +n03246653 +n12153224 +n02519472 +n02470709 +n15090238 +n03129636 +n07774295 +n04577567 +n09995829 +n09662038 +n10297367 +n03555862 +n12531727 +n09947127 +n12533190 +n04062807 +n00479734 +n12860978 +n01884104 +n09866559 +n12069009 +n04595501 +n12088495 +n02909053 +n12283790 +n02180427 +n10697282 +n07562881 +n13092078 +n11706325 +n01746952 +n01978136 +n07731436 +n02386746 +n12648424 +n12726357 +n10314182 +n07839172 +n11753562 +n12903503 +n12589687 +n02375438 +n03604763 +n11549895 +n13202602 +n12304420 +n10738215 +n12220829 +n10095420 +n12177455 +n11887476 +n04006411 +n09838370 +n02853218 +n12688372 +n03335461 +n02800940 +n03036701 +n09885059 +n10206629 +n11922926 +n01678657 +n12192132 +n12248141 +n03108624 +n01936671 +n02417242 +n03222857 +n03768823 +n04343511 +n03538817 +n12655726 +n12521186 +n01330497 +n12767423 +n12965951 +n09695132 +n04410886 +n12599874 +n07865700 +n07596160 +n10227698 +n03224490 +n11598886 +n02948293 +n09906293 +n12247963 +n03301175 +n03895170 +n04259468 +n07808806 +n13147689 +n09856827 +n13882639 +n02241008 +n03842585 +n02883101 +n12182276 +n13918717 +n12728164 +n10634464 +n02477187 +n03107716 +n02342250 +n01479213 +n12793695 +n09808080 +n10707707 +n04161010 +n02836607 +n10076483 +n07726386 +n03872273 +n10250712 +n07688412 +n13884930 +n12301766 +n10196404 +n07591330 +n03814727 +n09610255 +n12757115 +n09814381 +n02397987 +n07886317 +n03959123 +n02185167 +n03533845 +n11838413 +n10227393 +n07704305 +n03580615 +n02663485 +n10101981 +n04346855 +n10067011 +n04464125 +n02829510 +n10007995 +n07845775 +n03004713 +n02450561 +n09905530 +n10361060 +n12394638 +n12095934 +n10479135 +n03145277 +n12246773 +n13194212 +n04475900 +n03252787 +n14867545 +n10485298 +n09961739 +n02149653 +n01553762 +n03931980 +n02344408 +n11676850 +n04034367 +n04235646 +n12867184 +n12625670 +n12763529 +n07593107 +n04351550 +n02571810 +n13899735 +n03652826 +n09495962 +n03421768 +n04205062 +n11918808 +n07745197 +n07752264 +n01892744 +n04609811 +n10278456 +n11790936 +n09754152 +n13234519 +n09820044 +n00440643 +n02350357 +n03779884 +n07803992 +n03305953 +n01836087 +n10068234 +n10690421 +n03134394 +n12380761 +n12801966 +n03134232 +n02596720 +n07591236 +n11882821 +n02312175 +n02387983 +n01912152 +n10805501 +n12718074 +n03188290 +n02776505 +n10528148 +n09971385 +n10524223 +n09958292 +n02721813 +n10300829 +n12007766 +n12107191 +n04449700 +n02987950 +n11878633 +n12328801 +n04551833 +n10567722 +n11654984 +n02808968 +n12066451 +n02964075 +n11633284 +n02434712 +n03070854 +n07926540 +n01543936 +n10091861 +n09938080 +n11976511 +n03342432 +n12886831 +n12509993 +n12958261 +n12730776 +n10066206 +n07846014 +n13176714 +n03332591 +n04607640 +n02513727 +n12138248 +n11964848 +n01318053 +n10553140 +n07839055 +n02632039 +n11865429 +n02286654 +n02367812 +n12093885 +n10774329 +n02296912 +n01729672 +n10353928 +n12033504 +n11936113 +n03263338 +n07822053 +n09737050 +n13875884 +n13212559 +n11690088 +n05468739 +n09344724 +n02507148 +n01377694 +n04172607 +n10464870 +n07804152 +n02825872 +n03139640 +n11858703 +n10227490 +n12334153 +n03616225 +n12018188 +n12399656 +n10235269 +n11840764 +n01995514 +n03326475 +n12704041 +n10684827 +n03006788 +n13906484 +n02868240 +n03614887 +n03491724 +n12124172 +n03675907 +n13170840 +n03983712 +n03254737 +n07836269 +n01784293 +n02095212 +n12470512 +n12219668 +n12920521 +n04492157 +n02950018 +n01922717 +n11797981 +n12601805 +n02744961 +n07814925 +n09798096 +n03939062 +n13891547 +n07564292 +n01590220 +n09295210 +n03997875 +n03479266 +n01491661 +n03781055 +n12528768 +n10657306 +n12014923 +n10094320 +n02532272 +n02224023 +n04541136 +n12067672 +n02661473 +n04233027 +n12399899 +n12889412 +n01736032 +n12551173 +n01337734 +n10104487 +n02921592 +n02148512 +n10216403 +n03276839 +n01781570 +n03999621 +n02505238 +n12537569 +n10433452 +n02351343 +n12365158 +n08539276 +n01897257 +n12221801 +n10557246 +n10437698 +n01803641 +n11836327 +n07813833 +n03468570 +n06277025 +n10040240 +n03692842 +n03017835 +n01881564 +n10487363 +n07937069 +n10597505 +n01638722 +n10160412 +n09825096 +n12611640 +n03098515 +n10654211 +n13196234 +n03436990 +n04058486 +n09814567 +n10758337 +n03515934 +n07688757 +n10269199 +n12627347 +n04521571 +n01636510 +n03220095 +n09982525 +n12768809 +n02340930 +n02473857 +n12336586 +n12125584 +n02833040 +n02498153 +n01467804 +n12120347 +n11650430 +n11953339 +n12592058 +n05102764 +n10575594 +n09722064 +n01966586 +n10619888 +n07852376 +n12650915 +n10321882 +n11974557 +n09847267 +n13201423 +n12337131 +n13185658 +n02150134 +n10538853 +n10471732 +n07836600 +n03526062 +n02512752 +n04232437 +n03367321 +n04308915 +n07600895 +n11539289 +n03539293 +n12699922 +n07817599 +n02781213 +n03594010 +n12035907 +n04075813 +n05233741 +n07863229 +n10735984 +n12095543 +n12272735 +n04229620 +n12240965 +n07768590 +n04420024 +n12111627 +n02861509 +n02595056 +n12183452 +n04607982 +n13213577 +n07741888 +n03750614 +n10043024 +n03372933 +n10051861 +n10199251 +n03249956 +n03984125 +n02956393 +n11619687 +n03356279 +n07833951 +n10715030 +n02340358 +n10768272 +n01494041 +n02592734 +n03323319 +n02136285 +n03995661 +n09945223 +n03547397 +n10044682 +n12878784 +n02803809 +n13160254 +n12726902 +n12196954 +n03161016 +n03105645 +n04218921 +n09493983 +n10719036 +n12263588 +n12565102 +n10684146 +n03148518 +n04287986 +n02340640 +n04331443 +n10727016 +n03369407 +n07824863 +n07844786 +n12467433 +n07582811 +n02964196 +n02197877 +n10758445 +n03271376 +n13212175 +n03260504 +n12777778 +n11973634 +n05467054 +n11946313 +n02462213 +n13906669 +n10520286 +n02074726 +n01771100 +n13880199 +n09811568 +n13883763 +n02334728 +n11831100 +n12025220 +n12751172 +n03858837 +n10127186 +n12831535 +n07823591 +n02513805 +n03662301 +n09913329 +n02749670 +n10655986 +n01787191 +n03199488 +n12732252 +n12253664 +n07735294 +n03440876 +n09650839 +n03844965 +n10341446 +n12688187 +n12961242 +n03423224 +n13157346 +n09802951 +n11948044 +n03489048 +n12279060 +n03664840 +n03731882 +n07742605 +n07870734 +n03949761 +n10759331 +n07739923 +n02737351 +n01788291 +n11780424 +n03722646 +n12297110 +n12363768 +n04495310 +n10008254 +n03934890 +n01318478 +n03609959 +n10070377 +n04123228 +n13068735 +n02909706 +n10671042 +n10491998 +n07650792 +n12664710 +n10213034 +n03455642 +n10411867 +n09903936 +n10121800 +n02622955 +n03647423 +n07596566 +n09654898 +n12248780 +n02684515 +n04255670 +n06273890 +n03495941 +n12960552 +n09724234 +n03861048 +n03293095 +n11835251 +n12852428 +n04084517 +n01814620 +n13159890 +n03147156 +n02311748 +n10237799 +n07584859 +n01946827 +n09651968 +n12241192 +n03669245 +n07858336 +n11932927 +n04444218 +n10526534 +n03642573 +n09470222 +n10731732 +n12001924 +n03786096 +n01359762 +n03824999 +n13877667 +n10591811 +n10574311 +n03275125 +n11631985 +n10539160 +n10502950 +n12499757 +n12432707 +n12068615 +n07689624 +n02610373 +n03204436 +n13051346 +n13134531 +n07610890 +n04021164 +n03502897 +n02299378 +n10417843 +n10050043 +n07929940 +n02593453 +n10577820 +n12870225 +n03333851 +n09463226 +n11741575 +n09193551 +n12012510 +n11987349 +n09215023 +n07924655 +n10060075 +n11999278 +n03933391 +n02602059 +n11993444 +n02337902 +n10149867 +n04441093 +n02868429 +n10629647 +n04192361 +n12029039 +n02768433 +n12078747 +n12730143 +n03255167 +n12492900 +n01709876 +n09672725 +n07870620 +n02315821 +n12277334 +n12204730 +n07852712 +n01319685 +n07802246 +n13031193 +n00812526 +n09658815 +n11982939 +n04264485 +n07893425 +n04094438 +n03285730 +n13182338 +n10724570 +n07832741 +n13210350 +n10654015 +n04058721 +n07875086 +n03462747 +n03994417 +n02889856 +n11957514 +n10109443 +n10478462 +n03064562 +n02477782 +n11920998 +n02138169 +n04227787 +n11797508 +n10753339 +n12928307 +n11921792 +n12643688 +n01833112 +n03919808 +n09817386 +n01903498 +n03848033 +n12031547 +n01035504 +n12324906 +n01911063 +n02588794 +n03749634 +n03539754 +n02242455 +n03079616 +n03246312 +n09705671 +n07860629 +n10458356 +n10051761 +n09709531 +n02867401 +n12522678 +n13150378 +n04462576 +n03462315 +n03712981 +n07607027 +n10581648 +n02957427 +n04271793 +n02253913 +n12824735 +n11697802 +n02161588 +n12463975 +n02361090 +n09784564 +n09680908 +n03512452 +n13214217 +n10712690 +n04023119 +n07814007 +n09833751 +n12885265 +n02259987 +n11933903 +n03628831 +n11967142 +n02533545 +n03900301 +n07919787 +n12793886 +n10768148 +n03071552 +n02780315 +n12193665 +n03378442 +n04486616 +n07832307 +n03164192 +n12786273 +n04261868 +n12655351 +n12320414 +n04371979 +n10630093 +n13052014 +n01357328 +n07879821 +n09753348 +n03796974 +n11701302 +n11678299 +n04022434 +n11610823 +n07726009 +n04117639 +n10474343 +n11888061 +n01842788 +n10435251 +n03343047 +n03383378 +n12750767 +n09662661 +n05241485 +n10000459 +n12220496 +n02246941 +n12676370 +n02253264 +n07766409 +n02940289 +n12089320 +n10363573 +n12922119 +n09783537 +n11695285 +n12331066 +n12573647 +n10218164 +n12509821 +n07862946 +n12818601 +n02589316 +n13191620 +n03758992 +n12112337 +n10733820 +n02898093 +n02645953 +n10150794 +n04595762 +n02344918 +n13132756 +n12859153 +n12138444 +n04211001 +n12935166 +n07830493 +n10142166 +n11951820 +n03018848 +n01453742 +n11985321 +n10000294 +n01362336 +n02328009 +n12639376 +n03090437 +n02204249 +n04312916 +n13127666 +n09684082 +n03432509 +n10274318 +n09704057 +n07593972 +n10074249 +n13157971 +n01638194 +n04036963 +n11708857 +n03418749 +n12589458 +n11899762 +n07683138 +n01601410 +n07854707 +n04279063 +n03239607 +n10302700 +n12520406 +n12576451 +n03881534 +n07565608 +n02349390 +n12569851 +n12249294 +n04059399 +n03530189 +n09357346 +n04325208 +n13159691 +n04045941 +n13898315 +n11992479 +n02353411 +n07825496 +n12922458 +n03115014 +n11761836 +n03323211 +n02793296 +n03492087 +n05241662 +n05491154 +n10419630 +n04506895 +n10546428 +n02907296 +n10769459 +n11647868 +n13188462 +n03825442 +n13209460 +n10742005 +n07599242 +n12361754 +n04570532 +n04131811 +n07756499 +n02598134 +n01910252 +n02910701 +n10129338 +n13871717 +n12673588 +n12565912 +n07562172 +n02711237 +n10775003 +n07695410 +n02637179 +n12930951 +n10261211 +n02906963 +n01366700 +n10642705 +n09846586 +n02779719 +n04978561 +n01369358 +n12114010 +n03521771 +n10667709 +n02296612 +n10722029 +n03500557 +n01365474 +n10472447 +n07585644 +n07609316 +n04013060 +n04505888 +n09726811 +n12692160 +n12378963 +n03585551 +n13139837 +n10167565 +n03799375 +n11990920 +n09640327 +n04502989 +n10108832 +n10561736 +n01897426 +n11766189 +n12462582 +n12913524 +n02684356 +n13200542 +n10466198 +n04331892 +n01478969 +n07837234 +n07692248 +n04552097 +n12382875 +n01484447 +n04120695 +n12681376 +n10293861 +n11965962 +n11788039 +n03959227 +n01832813 +n09918867 +n09942697 +n07587206 +n10459882 +n01347583 +n02267208 +n03951453 +n03006903 +n12126736 +n10286749 +n03395401 +n04605057 +n03467887 +n12755559 +n04020744 +n11629354 +n01647033 +n02780445 +n10205714 +n09439032 +n03138128 +n02763083 +n07835547 +n12251278 +n11949857 +n01635480 +n10675142 +n07845335 +n07751977 +n10332110 +n11871496 +n11764814 +n12229651 +n07760297 +n09865672 +n02919308 +n12218490 +n03782929 +n12231709 +n11909864 +n03144982 +n11799331 +n10433610 +n10483395 +n03206023 +n05442594 +n03626418 +n07870478 +n10171456 +n11964446 +n12796849 +n02126317 +n03797062 +n01412694 +n07610746 +n03581897 +n04479526 +n12447891 +n11906514 +n09699642 +n12873984 +n10586903 +n13234114 +n02436353 +n11889205 +n01460303 +n04400899 +n11884967 +n02140491 +n12215824 +n03586911 +n01394040 +n10691937 +n12371704 +n09668988 +n04362624 +n01740885 +n01337191 +n09714120 +n02185481 +n08555333 +n10704238 +n12430471 +n12034594 +n10012484 +n12088909 +n03205903 +n04129490 +n13090018 +n10712474 +n12234669 +n13016076 +n00454855 +n13882713 +n02644817 +n03192907 +n03519226 +n01561181 +n04583967 +n11732052 +n10732854 +n04480303 +n07934908 +n03825673 +n10621294 +n04354387 +n03374102 +n02922159 +n13158815 +n04000716 +n09685806 +n04427216 +n12051514 +n09712967 +n12081649 +n09748889 +n03252231 +n10704886 +n12897118 +n12525168 +n11728769 +n02731251 +n02548884 +n12403276 +n09627807 +n08679167 +n09663999 +n04247440 +n07711683 +n09909929 +n03415868 +n05244421 +n07680416 +n12757668 +n11935794 +n03483086 +n01860864 +n10755164 +n03675076 +n12004987 +n07566092 +n04078955 +n03379719 +n01916588 +n10138369 +n09755893 +n03649003 +n03977430 +n02309120 +n10616578 +n12242850 +n12388293 +n03292085 +n09919061 +n10302576 +n01497413 +n01936858 +n01377278 +n04358256 +n02667693 +n12125183 +n07758582 +n07813324 +n09737453 +n12745564 +n03855464 +n03166685 +n01446152 +n09801102 +n10561222 +n10576818 +n13915209 +n10474446 +n03845990 +n04237174 +n12531328 +n07855812 +n10763245 +n04614505 +n07905770 +n12051792 +n12653633 +n03593862 +n10359659 +n10436334 +n07853125 +n12911264 +n12265083 +n03638014 +n04444121 +n02706221 +n10563711 +n07808166 +n11799732 +n04093915 +n10451858 +n04410760 +n10075299 +n12740967 +n12635359 +n09611722 +n12902466 +n13915305 +n05542893 +n04440597 +n03675445 +n12315245 +n10646032 +n10047199 +n12775717 +n10365514 +n10590452 +n11616260 +n02812342 +n07856756 +n04570416 +n03565991 +n12215210 +n04330896 +n02388588 +n02266269 +n10760199 +n14714645 +n02742070 +n03565710 +n12609379 +n03420935 +n03441465 +n00453631 +n01963479 +n04362972 +n09863936 +n03961394 +n03009269 +n12297280 +n04561010 +n12192877 +n02981565 +n12134695 +n07855413 +n03232815 +n10180791 +n09932788 +n10571907 +n02109256 +n02660091 +n07865788 +n13228536 +n10306279 +n02635580 +n03634899 +n10262343 +n12296929 +n04393301 +n06281175 +n04485586 +n13103660 +n10510974 +n04166436 +n01634522 +n07596362 +n12700357 +n08597579 +n11744011 +n12238756 +n01790171 +n04571800 +n11867311 +n03464467 +n12241880 +n09961605 +n12592544 +n03170459 +n09938991 +n02692680 +n10295371 +n04331765 +n02612167 +n02520810 +n11977887 +n04094608 +n07722390 +n07832202 +n12448361 +n04612159 +n12186352 +n13161151 +n12654227 +n09868899 +n10104756 +n09920106 +n12981301 +n02610980 +n12545865 +n10673296 +n04110841 +n01704626 +n04055700 +n12117912 +n10519126 +n12443736 +n01697978 +n02148088 +n03012644 +n12091697 +n10395390 +n10509810 +n10462751 +n02896949 +n03836602 +n03928994 +n07718195 +n02473983 +n08571642 +n02648916 +n11970298 +n06274292 +n04613158 +n09856401 +n12811713 +n13111340 +n12122442 +n10095265 +n04445610 +n11631619 +n07863644 +n12022821 +n10315217 +n12549799 +n03386343 +n03121040 +n03558007 +n12272432 +n11798496 +n02522866 +n02952935 +n10741493 +n12143065 +n07883156 +n09616573 +n02289988 +n13161904 +n02588945 +n00451768 +n12375769 +n10777299 +n04495183 +n11930994 +n09970088 +n02254246 +n12276314 +n07857598 +n04428382 +n03789794 +n03383821 +n12980080 +n01447139 +n12880799 +n03501520 +n10764465 +n13143285 +n12727729 +n12444095 +n02354621 +n13174354 +n01691652 +n07732525 +n10437014 +n04368235 +n10371052 +n02611898 +n03597147 +n09912431 +n03135788 +n07888058 +n02409202 +n14582716 +n11934463 +n04395332 +n12558680 +n05257967 +n11798978 +n10617024 +n04102760 +n12132092 +n12988572 +n10390698 +n11887310 +n12063211 +n12952717 +n13141972 +n12176453 +n10245863 +n10509161 +n10389976 +n10333165 +n01474864 +n09274305 +n11888424 +n10368711 +n13222877 +n10469611 +n07582970 +n09700125 +n12805762 +n07865575 +n07853852 +n03628421 +n04482975 +n03099622 +n01349735 +n11943133 +n12736603 +n12197601 +n10597745 +n04418644 +n12689305 +n07755262 +n10598459 +n04312020 +n03195485 +n09776642 +n10596517 +n10223606 +n01923890 +n12703716 +n03465040 +n12372233 +n12528109 +n03571853 +n10802621 +n10204177 +n02320465 +n03976105 +n02214096 +n02148991 +n10377542 +n10697135 +n03538542 +n07582027 +n04517999 +n12180456 +n02838014 +n03977266 +n03818001 +n12191240 +n11648776 +n10773800 +n04475496 +n03945817 +n04682018 +n02994743 +n02787269 +n11650160 +n03834472 +n03389983 +n09797742 +n06209940 +n12525513 +n12672289 +n01893164 +n10710259 +n01892145 +n11773408 +n10554024 +n09864968 +n10699752 +n11631405 +n10414768 +n04430605 +n10742546 +n10738871 +n12857204 +n09309046 +n01724840 +n04123317 +n07881525 +n03868044 +n02140268 +n10708292 +n09838295 +n09797998 +n10710171 +n11814996 +n11938556 +n03543511 +n02151230 +n01515217 +n03533392 +n02039780 +n12810151 +n02335231 +n12152251 +n13225617 +n09801275 +n01978587 +n14821852 +n11742878 +n12679023 +n03521431 +n09679028 +n02021281 +n10784544 +n04421258 +n12492460 +n03720005 +n02541257 +n03889397 +n02888898 +n10659762 +n12045157 +n12712320 +n10369095 +n09721444 +n12769318 +n01703161 +n12697514 +n07836456 +n03905361 +n10660883 +n07769306 +n11893916 +n07846274 +n04110281 +n03655470 +n07740744 +n01363719 +n12540647 +n09896311 +n12842642 +n07755619 +n07754155 +n11548870 +n02868546 +n04215588 +n04288165 +n13201566 +n07721118 +n12018271 +n11903333 +n02909165 +n02662559 +n11658709 +n13063514 +n07725663 +n10179069 +n10776887 +n12637485 +n03814528 +n12542043 +n07833333 +n07820036 +n02746683 +n07925808 +n10349750 +n03154316 +n04155625 +n03232923 +n02116185 +n09998788 +n02821543 +n03410303 +n10656223 +n07916582 +n12880638 +n10408809 +n04612840 +n11805255 +n12044784 +n10497534 +n03458422 +n12873341 +n07808675 +n09476123 +n07611733 +n10598013 +n02214660 +n05469664 +n03952150 +n11855435 +n04375926 +n08523340 +n01642391 +n04007415 +n09756961 +n12891824 +n02894847 +n11698245 +n12906771 +n02894024 +n04131015 +n11882636 +n04386456 +n03291551 +n07837110 +n12462221 +n08540532 +n10299875 +n12705978 +n10448322 +n10487592 +n12175598 +n02272552 +n03833907 +n10383237 +n12758176 +n12729950 +n10061195 +n07816726 +n03241903 +n12239880 +n10380499 +n07855188 +n10207077 +n02770078 +n12961393 +n03778459 +n10734741 +n03485575 +n09958447 +n12337246 +n11830045 +n09866354 +n03209666 +n01470145 +n10395209 +n03872016 +n04267091 +n12888457 +n12104104 +n04088229 +n01964957 +n12002651 +n02503756 +n00481938 +n01908042 +n03378765 +n04193883 +n09862183 +n11861487 +n02520525 +n02081060 +n10386754 +n12693865 +n04514095 +n01325060 +n02460817 +n07568095 +n03651605 +n02561937 +n12844409 +n12888016 +n02974565 +n12439154 +n13018906 +n12071259 +n03897634 +n02863176 +n10603528 +n03493911 +n12887532 +n12944095 +n12794568 +n09980458 +n03503567 +n11783162 +n13123309 +n11729860 +n03702582 +n04280373 +n10086744 +n01790557 +n12627526 +n10552393 +n12092629 +n03888998 +n12751675 +n01442450 +n02479332 +n07726230 +n03642341 +n03142325 +n06263895 +n12088327 +n09703344 +n10528493 +n02820085 +n07737594 +n04090781 +n09901642 +n02328942 +n02724722 +n09866115 +n12658715 +n10481167 +n13135692 +n11850918 +n10205344 +n12361560 +n03698123 +n03284482 +n12106134 +n04441528 +n02591613 +n02581108 +n07856186 +n12197359 +n12900783 +n01725713 +n12012253 +n03907475 +n02170738 +n03694949 +n13238654 +n04611795 +n02782432 +n13191148 +n02741367 +n04170694 +n12770892 +n01973148 +n10080508 +n10161622 +n09808591 +n07912093 +n02059541 +n02779971 +n03857156 +n12945366 +n03055159 +n12758325 +n10067305 +n02597818 +n07808352 +n13147153 +n10679723 +n02271222 +n04012665 +n12942729 +n10349243 +n01377510 +n07800636 +n10654321 +n10219453 +n09961469 +n10732521 +n04479405 +n11632929 +n03856728 +n08658918 +n10327143 +n10754281 +n02085118 +n09691604 +n09952163 +n10082299 +n03872167 +n03733465 +n04138869 +n01425223 +n12066821 +n02177506 +n09892262 +n02896694 +n12983654 +n13224922 +n09658921 +n12744850 +n03639880 +n02943686 +n10660621 +n11936539 +n03698226 +n04519536 +n12392765 +n09319604 +n07567039 +n04160261 +n01802159 +n02838178 +n07746910 +n02266421 +n10240417 +n12542240 +n12550408 +n01445857 +n04132465 +n03569014 +n12666050 +n12362514 +n10676569 +n09702673 +n12885510 +n04447156 +n04396226 +n12240150 +n11639306 +n02249134 +n01340785 +n02833140 +n10027590 +n02142407 +n11996251 +n07874531 +n04340019 +n03166120 +n10420277 +n04465203 +n12738259 +n12831141 +n03998673 +n01385017 +n12842519 +n02587051 +n10753061 +n12505253 +n13906936 +n01989516 +n12640435 +n07852532 +n04243142 +n10261511 +n12853287 +n12239240 +n03973003 +n09983889 +n10345302 +n14804958 +n02354162 +n03049326 +n10443659 +n01318660 +n12787364 +n04253304 +n11941094 +n09283514 +n09393524 +n11865574 +n01531639 +n04409279 +n02859729 +n10712835 +n03694196 +n04343630 +n10331098 +n12929600 +n02826259 +n10171219 +n07735179 +n07594840 +n03709644 +n09950728 +n09859285 +n07718329 +n01418620 +n09858299 +n12395068 +n10011360 +n07763290 +n02643316 +n03596099 +n04422566 +n11958888 +n09650989 +n10318686 +n01333082 +n12886402 +n03781467 +n12667582 +n02923535 +n09988311 +n08663860 +n02508346 +n13885011 +n03939281 +n10772937 +n04485750 +n09871952 +n10291942 +n07759324 +n10174971 +n03666238 +n01937579 +n02308033 +n07847706 +n10371330 +n04124887 +n11853079 +n11941478 +n12647231 +n04601041 +n12718483 +n02902816 +n01941340 +n04066767 +n07617839 +n02254901 +n03488784 +n07834774 +n02524659 +n03367969 +n10783734 +n03422484 +n09776807 +n03970363 +n10131590 +n03433247 +n02622712 +n10206506 +n12061104 +n11936287 +n07874674 +n10061043 +n07828275 +n03764606 +n12236768 +n01826844 +n09741904 +n05454978 +n03591592 +n01441272 +n03736372 +n07585474 +n12762405 +n12943912 +n01894522 +n03218446 +n11846425 +n11689678 +n04147916 +n02375862 +n10409459 +n09287415 +n10113583 +n03261263 +n02817386 +n09869578 +n10550252 +n02532786 +n12031388 +n07937344 +n11612235 +n01571410 +n09402944 +n04234670 +n02603862 +n04196925 +n09999135 +n10468750 +n15093049 +n03003633 +n11650307 +n12312110 +n02525703 +n10501635 +n09751622 +n10114550 +n10103155 +n12829975 +n04004099 +n12419878 +n02082190 +n03328201 +n03093427 +n07845571 +n12655498 +n02558206 +n12563045 +n07573453 +n12324558 +n13016289 +n10601234 +n10310783 +n03531691 +n02135610 +n03168543 +n09985978 +n10615334 +n07839312 +n09985809 +n10142537 +n10417969 +n07869111 +n12514992 +n04327544 +n10326776 +n12583681 +n01476418 +n12840168 +n03852544 +n11713763 +n07824502 +n07858841 +n12256325 +n03036149 +n07883661 +n04500390 +n10170866 +n01835918 +n10760951 +n10720197 +n12330239 +n02135844 +n10210512 +n03217739 +n10802953 +n03136254 +n02161225 +n03961630 +n12927194 +n02251233 +n13891937 +n09945603 +n02695762 +n12181612 +n13234857 +n10175725 +n11346873 +n07934678 +n02318687 +n10251329 +n04112921 +n04001132 +n03042984 +n11704791 +n04246459 +n12193334 +n10718509 +n10371221 +n05278922 +n03265754 +n12186554 +n12481289 +n10521853 +n10748506 +n11729142 +n10143595 +n09422631 +n07562984 +n07850219 +n04193742 +n11997160 +n12002826 +n12820113 +n04132829 +n10272913 +n03358841 +n12610740 +n12384569 +n10725280 +n02746008 +n13148384 +n12635151 +n02337171 +n10350774 +n12308907 +n04542474 +n04339062 +n03549350 +n10240235 +n10556033 +n10214390 +n01791314 +n02801047 +n07817465 +n11610602 +n10315730 +n14592309 +n10249191 +n12453857 +n12579822 +n09833275 +n04051269 +n11552594 +n04088343 +n04565039 +n03930431 +n10679503 +n11899921 +n10295479 +n01357507 +n13036312 +n03404900 +n12523141 +n01816017 +n02020578 +n12661045 +n06262943 +n02775813 +n12921315 +n09751076 +n09834258 +n10585628 +n12885754 +n04411019 +n10342367 +n10368798 +n09672840 +n12729023 +n04578329 +n10325549 +n03680248 +n11920663 +n10416567 +n10011486 +n01643255 +n03193754 +n07823814 +n04055447 +n10660128 +n07765612 +n07612530 +n04205613 +n09677427 +n03989199 +n11100798 +n12721122 +n10000787 +n10382157 +n07724819 +n12928819 +n11631159 +n02608996 +n10516527 +n09703101 +n12290975 +n03470222 +n03810412 +n03729131 +n03356038 +n12692024 +n12614625 +n10789415 +n02333819 +n01722670 +n03885410 +n12038208 +n02294097 +n02608860 +n02500596 +n07909231 +n03254625 +n09681973 +n12221368 +n01893399 +n10025295 +n03194812 +n13181406 +n12249122 +n03447894 +n09795010 +n02187900 +n10139651 +n10631654 +n01792530 +n02569631 +n07853946 +n09907804 +n03263758 +n04214649 +n02450829 +n02431542 +n11998492 +n02651060 +n04101860 +n01806061 +n13901423 +n12903964 +n03968479 +n04268565 +n12601494 +n02083780 +n04570118 +n12247407 +n03337822 +n09878921 +n02369935 +n10022908 +n09667358 +n13160938 +n11937360 +n07741623 +n03705808 +n12241426 +n10478118 +n03805933 +n10343869 +n09391774 +n03482128 +n10357737 +n10334461 +n09675045 +n09662951 +n10174253 +n01815270 +n13873361 +n04432785 +n09778927 +n10671898 +n05571341 +n10033572 +n09864632 +n10618465 +n03437184 +n12786464 +n01723579 +n11798270 +n07742415 +n02143142 +n10548419 +n03695122 +n02518622 +n04605446 +n10218292 +n11832671 +n12646950 +n03382708 +n09844898 +n09674786 +n01472502 +n07616906 +n09763272 +n03982767 +n10005006 +n03059236 +n01816474 +n03725869 +n01979269 +n04226322 +n13236100 +n03920384 +n11852148 +n04373563 +n04324120 +n11686652 +n03036341 +n02142898 +n09783776 +n13147918 +n03465320 +n07855721 +n10336411 +n10438619 +n07750299 +n12237152 +n03559373 +n10077106 +n10169796 +n09828403 +n09959658 +n12464128 +n12934685 +n04221673 +n02617537 +n11689367 +n10180580 +n07813717 +n12529905 +n02340186 +n01400247 +n11749112 +n04404072 +n03135656 +n12098827 +n12481150 +n10023506 +n03500838 +n01564101 +n04009923 +n10023264 +n03908456 +n03206405 +n07590068 +n09958133 +n10755394 +n01423617 +n11511327 +n10536274 +n01965252 +n11549245 +n11935627 +n09635635 +n03752071 +n07585997 +n03147084 +n12666159 +n09748408 +n03796848 +n01501948 +n02345078 +n12430675 +n03103128 +n11710987 +n03393199 +n09233603 +n10465002 +n04298765 +n01351170 +n02720576 +n03966582 +n10643837 +n12420124 +n10793799 +n01652297 +n09281252 +n11983606 +n10222497 +n11832899 +n02391617 +n12434106 +n03987674 +n02140179 +n07896560 +n04325804 +n10647745 +n01924800 +n10156629 +n03545961 +n03906789 +n01890564 +n10699558 +n12332218 +n03247495 +n11839460 +n03527675 +n12586725 +n13208965 +n02714315 +n02750320 +n04615149 +n12679876 +n12863234 +n03304323 +n12139793 +n11922755 +n12321669 +n04979307 +n01921059 +n09657206 +n13042134 +n04045787 +n11700279 +n02337598 +n01415920 +n01400391 +n13207572 +n10785480 +n02515713 +n12018100 +n02634545 +n03292736 +n02881546 +n12655605 +n03105810 +n10545792 +n03894933 +n09796974 +n10320484 +n12308112 +n11549009 +n13047862 +n14941787 +n12379531 +n10540252 +n11696935 +n12184468 +n12851860 +n12908854 +n10586265 +n12369066 +n10426630 +n12523850 +n03916289 +n04538878 +n09908769 +n02828115 +n07560422 +n10266016 +n03569174 +n06423496 +n10495167 +n03617834 +n09327538 +n10195056 +n10508379 +n13031323 +n11659248 +n04242315 +n10742111 +n10700963 +n12032686 +n09877587 +n07825597 +n07568991 +n11736362 +n12169099 +n13103750 +n03263640 +n12248941 +n10665302 +n01920051 +n09704283 +n11533999 +n04503073 +n11645163 +n10639817 +n09920901 +n06340977 +n03251100 +n10378113 +n03226090 +n10131268 +n02877513 +n13191884 +n02787120 +n11709045 +n02740061 +n12323665 +n02831998 +n10342180 +n12716594 +n04498275 +n09905050 +n03745487 +n07642833 +n10294020 +n10211666 +n12205460 +n02981198 +n01642943 +n07679140 +n04390483 +n10432875 +n09214269 +n10792506 +n10243483 +n13099833 +n10221520 +n13177768 +n04091584 +n10672540 +n10200246 +n13889331 +n02345340 +n10237556 +n01833415 +n01335218 +n09804230 +n09957523 +n05235879 +n10070449 +n10308653 +n10721708 +n04312654 +n10394434 +n12201938 +n12434775 +n07601025 +n02672152 +n10157271 +n02635154 +n12572858 +n13182937 +n10160188 +n03396997 +n10344656 +n02968210 +n10190516 +n07684422 +n03706939 +n07618871 +n02290870 +n03817331 +n03275311 +n12698774 +n04375080 +n07837630 +n04314216 +n11833373 +n07618684 +n03742238 +n12532886 +n03712444 +n11750989 +n10038620 +n09617577 +n03807334 +n10108089 +n01816140 +n10715347 +n02648035 +n13127303 +n02809491 +n02430748 +n12235479 +n01451863 +n01514926 +n10010864 +n01913440 +n09660240 +n11806369 +n01470479 +n12655245 +n07655067 +n03436772 +n11778092 +n03951800 +n10277815 +n07931733 +n01479820 +n03576955 +n07609549 +n12568649 +n05263316 +n02636405 +n01384084 +n03298352 +n07617344 +n09987045 +n10573957 +n07801709 +n02589062 +n02534165 +n02748359 +n09607782 +n07590974 +n02199170 +n02696569 +n09678747 +n12795209 +n13176363 +n10663315 +n10588724 +n09772330 +n10174589 +n12366313 +n11883628 +n07617447 +n01334690 +n03168663 +n11764478 +n08599174 +n03942028 +n12153033 +n03448696 +n12096674 +n10037588 +n03548320 +n09760290 +n10374541 +n09653438 +n10294139 +n10276942 +n12279293 +n12764507 +n12803958 +n10764622 +n02140858 +n07599068 +n10245507 +n12351790 +n12818004 +n10118301 +n03945459 +n09912995 +n12176709 +n03873996 +n10339179 +n10614507 +n10114662 +n10784922 +n03821424 +n04959230 +n13015509 +n12573911 +n11948469 +n09775907 +n12758014 +n01780142 +n09956578 +n12165384 +n10088200 +n10382480 +n04131113 +n09930628 +n09784160 +n11750173 +n13064111 +n03817522 +n12662074 +n03176238 +n12310021 +n11679378 +n09961331 +n02385580 +n11904274 +n03113505 +n10244913 +n02836900 +n09986700 +n11963572 +n13158605 +n10321632 +n02179891 +n02189670 +n10097995 +n10774756 +n10783240 +n10605737 +n02530052 +n10386196 +n10184505 +n09788237 +n03589672 +n12509109 +n10658304 +n12966804 +n12559518 +n03189311 +n01451295 +n12179632 +n12301613 +n10496489 +n03402785 +n10244108 +n02385676 +n03552001 +n03092053 +n02313360 +n02547733 +n02109391 +n01327909 +n04574606 +n03060728 +n07840124 +n10567848 +n10062176 +n02703124 +n10804732 +n12699301 +n04515890 +n07919665 +n10457214 +n09663248 +n03165955 +n12988341 +n03987865 +n03031756 +n10277912 +n10172080 +n09325824 +n03198223 +n09605110 +n10113869 +n11603462 +n03352366 +n11930203 +n09769929 +n12979316 +n02579762 +n09953052 +n03105974 +n00476140 +n11598287 +n02830157 +n10512201 +n09746936 +n10668666 +n02919976 +n09993651 +n02149861 +n09705003 +n10389865 +n11655152 +n10010767 +n10070563 +n03688832 +n10590239 +n11936027 +n02939763 +n03163488 +n03171910 +n09955406 +n03266195 +n10217208 +n09338013 +n07594250 +n03215930 +n09725935 +n10592049 +n03732658 +n12498457 +n09966554 +n10668450 +n10361525 +n04060198 +n11936624 +n02602760 +n03942600 +n03708425 +n10020533 +n12067817 +n07590177 +n01891274 +n11837204 +n01419332 +n03860234 +n12616248 +n07834160 +n09867154 +n09788073 +n12222493 +n03388990 +n04245412 +n10182402 +n11675404 +n10450038 +n13045594 +n13158167 +n13082568 +n12052267 +n12707199 +n07810531 +n07914887 +n13127001 +n02573249 +n08619112 +n10471859 +n09919899 +n03635516 +n12067029 +n03352232 +n07765517 +n10519984 +n02742194 +n03062798 +n13124654 +n09958569 +n02370137 +n10121714 +n04019335 +n07732433 +n02559383 +n12585137 +n09729156 +n10744078 +n09954355 +n03078506 +n10062042 +n10688811 +n02668613 +n03142205 +n10347204 +n10518349 +n09898020 +n12563702 +n05468098 +n10116370 +n07838905 +n03127024 +n03545585 +n12801072 +n09940818 +n04480995 +n10466564 +n02606751 +n10032987 +n10771066 +n01587278 +n11852531 +n01455461 +n10397392 +n02349205 +n10180923 +n09778266 +n04366832 +n10051975 +n10538629 +n09865744 +n12554029 +n13118330 +n12952590 +n04187751 +n09924313 +n10062594 +n01980655 +n10028402 +n02567334 +n10590903 +n10265891 +n10739297 +n01457082 +n03437581 +n03713151 +n03475674 +n05464534 +n11863467 +n06592421 +n12491435 +n14914945 +n10279778 +n03388711 +n10483890 +n10612373 +n03332784 +n02332954 +n02952798 +n13041943 +n01607309 +n04356772 +n07711799 +n12670962 +n12229111 +n07878479 +n12401893 +n07772413 +n12138110 +n09781504 +n07902698 +n02750652 +n13042316 +n12400924 +n02304797 +n03066464 +n12852234 +n10155222 +n05541509 +n10711483 +n04210858 +n02835551 +n12859679 +n02935490 +n03540476 +n05279953 +n09807075 +n09617435 +n03566860 +n10549510 +n10025391 +n10754449 +n11927740 +n03554645 +n01837526 +n02656969 +n08648917 +n07860548 +n01452345 +n04021704 +n07783827 +n10080117 +n02187554 +n03214966 +n10036444 +n04291069 +n12407396 +n02170599 +n09896826 +n12417836 +n07845495 +n02749292 +n03061819 +n03682380 +n10756261 +n10369955 +n09692125 +n09978442 +n04277669 +n10539278 +n09703932 +n01879837 +n02746225 +n13159357 +n11763874 +n10540656 +n07933530 +n12987535 +n02371344 +n10654827 +n09723944 +n12775393 +n11856573 +n12626878 +n12716400 +n09903639 +n09784043 +n03906894 +n10775128 +n03124313 +n10396727 +n02841641 +n10211830 +n12283395 +n03490784 +n14175579 +n04027935 +n12396091 +n02609823 +n01414216 +n09880741 +n11976933 +n03073384 +n09270160 +n11768816 +n12073217 +n11597657 +n09994878 +n11756329 +n12579404 +n03161893 +n01451115 +n07736971 +n02949356 +n03878418 +n12653436 +n10626630 +n12777892 +n13061704 +n10498699 +n03609786 +n03199358 +n10776339 +n10762480 +n13179056 +n10113249 +n04029913 +n12640081 +n10493835 +n11683216 +n03524287 +n04585626 +n02969527 +n12976554 +n08569482 +n10204833 +n12442548 +n02577952 +n09357447 +n10202225 +n02198129 +n11882972 +n10404426 +n01600341 +n12016434 +n09867069 +n10576223 +n09893600 +n01702479 +n04274686 +n04406552 +n02848118 +n02258629 +n03260733 +n03685640 +n11751974 +n09967555 +n06274546 +n09649067 +n10681557 +n07606933 +n03110202 +n11982545 +n10803031 +n02679142 +n04086937 +n10514255 +n04506402 +n03884554 +n09970192 +n10117017 +n12642435 +n10186686 +n02097967 +n03956531 +n11834890 +n02677436 +n10040698 +n11796188 +n03348142 +n04168472 +n02294407 +n12483282 +n09429630 +n04423687 +n09819477 +n09755555 +n10157016 +n03344935 +n07762373 +n12871859 +n09853541 +n09875979 +n13050705 +n02251067 +n10637483 +n03823673 +n10357012 +n03424204 +n04431648 +n01475940 +n02339282 +n10248198 +n07683265 +n13150592 +n10359117 +n10096508 +n03473078 +n13052248 +n10743356 +n03710079 +n10634990 +n04507689 +n07921090 +n02352002 +n03924407 +n03609147 +n02837567 +n03406759 +n03909658 +n10286282 +n12135576 +n01912809 +n10801561 +n10717055 +n03473465 +n03761588 +n03144156 +n09474412 +n10253611 +n12549420 +n02499568 +n09910222 +n10431122 +n12699031 +n01697749 +n11786843 +n03888808 +n12089496 +n10066314 +n10302905 +n12696830 +n09965787 +n11969806 +n04066388 +n13080306 +n03913930 +n09968259 +n10490421 +n10714195 +n07570021 +n10343449 +n10401204 +n03472796 +n10779897 +n11787190 +n03503097 +n10439523 +n12123648 +n04279858 +n10511771 +n09755788 +n08253141 +n02616397 +n12248574 +n01645466 +n04334504 +n07729142 +n05451099 +n10503818 +n10354265 +n09707735 +n02633422 +n11999656 +n01324916 +n02088745 +n09354511 +n10705448 +n09756195 +n10136615 +n10427359 +n09702134 +n12600095 +n04122262 +n10791820 +n03330002 +n02713496 +n11710658 +n09664908 +n02550203 +n02349847 +n12835766 +n04098260 +n11536567 +n11686780 +n12875861 +n12758471 +n09806944 +n11810030 +n10400003 +n10098388 +n11663263 +n10559683 +n07833672 +n10753989 +n10643095 +n01988869 +n03112240 +n12911914 +n09979913 +n09785236 +n09790047 +n02676097 +n01653509 +n04601159 +n01938735 +n10748142 +n12978076 +n11990627 +n10437262 +n12972136 +n04077594 +n10148825 +n02269340 +n12886185 +n03608504 +n11677485 +n10612518 +n12267265 +n10649308 +n05458173 +n10650162 +n03213361 +n02747063 +n01611674 +n02322992 +n01554017 +n03512624 +n12773142 +n12747120 +n09902128 +n03162714 +n03924532 +n10299125 +n12378753 +n02778131 +n09976024 +n13093629 +n10778999 +n07721833 +n12232851 +n07876775 +n10097590 +n03194170 +n13029122 +n04573832 +n12859272 +n09639382 +n07688021 +n02878796 +n10751710 +n03633632 +n07762534 +n10779995 +n13914265 +n13093275 +n10729330 +n10433077 +n03663910 +n10499110 +n02272286 +n10371450 +n01967308 +n12633061 +n11659627 +n12982915 +n10344121 +n10268629 +n02697876 +n09879552 +n10167361 +n10719807 +n04042076 +n01632952 +n03243625 +n02125872 +n10105906 +n12194613 +n03149810 +n10721124 +n03947343 +n02020219 +n10122531 +n01315330 +n08647264 +n00452734 +n03607527 +n10010243 +n09863749 +n04473275 +n11782878 +n03585337 +n09655466 +n12989007 +n11711971 +n10716864 +n10475835 +n10704712 +n01894956 +n10568443 +n12881105 +n10387836 +n10403633 +n08645318 +n03500457 +n10377633 +n10108464 +n09933972 +n02618094 +n11798688 +n04155735 +n09780395 +n12822466 +n04302200 +n11899223 +n10633298 +n02760298 +n12142450 +n10803282 +n10769321 +n10514051 +n10597889 +n11837562 +n02261757 +n01458746 +n09830759 +n10003476 +n09817174 +n10738670 +n10118743 +n12096563 +n03054491 +n12155773 +n10439727 +n04170384 +n03223923 +n12632733 +n07845421 +n10062905 +n11831521 +n04267985 +n12796385 +n04154854 +n00444142 +n09778537 +n03115663 +n04385157 +n10109826 +n02337332 +n09996304 +n09880189 +n12871696 +n11823305 +n02516776 +n12377494 +n08511017 +n04421417 +n10765305 +n09675673 +n03488111 +n03076623 +n11829672 +n10292316 +n10758949 +n13031474 +n02829353 +n10090745 +n09186592 +n12736999 +n12715195 +n11684499 +n03168933 +n09890192 +n10596899 +n12527081 +n10496393 +n10497135 +n02137302 +n03266620 +n12958615 +n12664187 +n02633977 +n04262869 +n04215800 +n13133233 +n02392555 +n09858733 +n10186350 +n01715888 +n03142099 +n08573674 +n11687071 +n02690715 +n03146342 +n12331788 +n08079613 +n10609092 +n12943049 +n12234318 +n02312325 +n12618146 +n10135197 +n11705573 +n02794368 +n02850358 +n09464486 +n01993525 +n03187153 +n10097262 +n02976641 +n12198793 +n12941717 +n10219240 +n12434634 +n03827420 +n10437137 +n10342893 +n04174026 +n10265281 +n07757874 +n10765885 +n01470895 +n02349557 +n11716698 +n03765467 +n10227793 +n07824268 +n12994892 +n10486236 +n02974454 +n10718349 +n11726145 +n09909660 +n03378593 +n07805006 +n09875025 +n02645691 +n10223069 +n03722944 +n04389999 +n02544274 +n10239928 +n04456011 +n10382302 +n01552333 +n10082562 +n12952469 +n09883047 +n10442573 +n01891013 +n10690268 +n13111504 +n02287352 +n03567635 +n10331347 +n09762385 +n09933842 +n02369555 +n12291459 +n09919200 +n01492860 +n02067768 +n10713254 +n10550468 +n12846335 +n03835729 +n12467018 +n11676743 +n03629643 +n12987423 +n10655730 +n08678783 +n10349836 +n10087736 +n10246703 +n10338391 +n04585456 +n04158138 +n10500942 +n09850974 +n10791890 +n10020807 +n03315805 +n02752917 +n04033801 +n10492086 +n04427473 +n02940706 +n12110475 +n09832978 +n12515393 +n07800487 +n09848110 +n02659176 +n09967406 +n10536134 +n10760622 +n09736485 +n07830690 +n07835173 +n09814252 +n10311506 +n10341955 +n03869838 +n07760673 +n09970402 +n12526178 +n11687964 +n09968741 +n10719267 +n07851054 +n10116478 +n10599215 +n09951524 +n03855908 +n03997274 +n02986348 +n08599292 +n02474282 +n04155889 +n09983314 +n01987727 +n10280130 +n10404998 +n02294577 +n02998696 +n08586978 +n11652578 +n13867005 +n12663254 +n10524869 +n02287622 +n10220924 +n03279918 +n02626089 +n10291110 +n12820669 +n07861681 +n08643267 +n07720185 +n12555859 +n03225616 +n09769525 +n03295140 +n12489046 +n10615179 +n12150969 +n02888429 +n10753182 +n10267166 +n03675558 +n12693352 +n02378299 +n02788462 +n03622401 +n12236977 +n10730542 +n12758099 +n10502046 +n11937195 +n10366145 +n10307114 +n12984595 +n10128748 +n09362316 +n09789898 +n09654079 +n04260192 +n10114476 +n08623676 +n10331841 +n05265736 +n10269289 +n03090856 +n12764978 +n02825240 +n10358032 +n09825750 +n03062651 +n11196627 +n11825749 +n04148464 +n04439505 +n07572858 +n04561857 +n12904562 +n03643907 +n10723597 +n01492708 +n10071557 +n10140683 +n01739871 +n12984267 +n03072056 +n10772580 +n10462588 +n11936448 +n10494373 +n12845908 +n09793352 +n10717196 +n12577362 +n09779124 +n10663549 +n02286425 +n10380126 +n01890144 +n02751490 +n03361109 +n01781875 +n13128278 +n09994400 +n09883452 +n13881512 +n02833275 +n10362003 +n01376543 +n12366675 +n09984960 +n10173665 +n10673776 +n02057898 +n01934844 +n04057673 +n10018747 +n02916065 +n13024653 +n05539947 +n09648911 +n04150273 +n01393486 +n10411356 +n12232114 +n02436224 +n12757930 +n03095965 +n10555059 +n01577458 +n09666476 +n10598904 +n11656549 +n02591911 +n13092385 +n10506336 +n13103023 +n09658254 +n04095938 +n11936782 +n07824383 +n09781650 +n10240821 +n01780426 +n02850060 +n02863340 +n13914141 +n12138578 +n13034555 +n12291671 +n12133151 +n04515444 +n04591359 +n02589196 +n02689819 +n11740414 +n07610295 +n10246395 +n09921034 +n12447346 +n12641180 +n01419573 +n04242587 +n07760395 +n03399579 +n09866661 +n02549376 +n11861238 +n01588996 +n04319545 +n09789150 +n03288643 +n10312491 +n03353281 +n02345997 +n09711132 +n03043173 +n02558860 +n03703590 +n03188871 +n12589142 +n12113323 +n09987161 +n05242239 +n09686262 +n09780984 +n09668199 +n09716933 +n11675738 +n04459243 +n11833749 +n10646942 +n07760070 +n10286539 +n04469684 +n13030616 +n03939440 +n01725086 +n09967816 +n10500824 +n13026015 +n03983928 +n02936921 +n04115542 +n10245029 +n12105828 +n12452673 +n10498046 +n10737264 +n11766046 +n04079603 +n10072054 +n12569037 +n10153155 +n09867311 +n02806992 +n10258602 +n10164025 +n10520964 +n02258508 +n12199399 +n05266096 +n08496334 +n10351064 +n12441552 +n12878325 +n13102648 +n02980625 +n03462972 +n12395906 +n13022903 +n11895714 +n03324814 +n11318824 +n01728266 +n07883510 +n10731013 +n10181799 +n12142357 +n09671089 +n11531334 +n01718414 +n04573625 +n10390600 +n11553522 +n01314910 +n04227519 +n10514784 +n02944256 +n12103680 +n03081859 +n11655592 +n12569616 +n10700105 +n09755086 +n03865820 +n01456137 +n10442232 +n02900987 +n04491934 +n07849026 +n04519728 +n09986450 +n03305300 +n10186143 +n02879422 +n03018614 +n10747548 +n10562509 +n10068425 +n12593341 +n11937692 +n08679562 +n09613690 +n10646433 +n12251740 +n10994097 +n13048447 +n03848537 +n12153741 +n12614096 +n11654438 +n09985470 +n10562968 +n02923915 +n10740594 +n07802767 +n12514592 +n10335801 +n03878674 +n12586499 +n10255459 +n02413824 +n10312600 +n02616128 +n12644283 +n04238953 +n04526520 +n01898593 +n09737161 +n03372822 +n09781398 +n10339251 +n02502807 +n10198832 +n10679610 +n13136781 +n11974373 +n11680457 +n10083677 +n04037298 +n09945021 +n09987239 +n02708885 +n13107807 +n10130877 +n12507379 +n08651104 +n12116058 +n10135297 +n04269086 +n03858533 +n10477955 +n04394031 +n10442417 +n10074735 +n03618797 +n03460455 +n04374521 +n10756061 +n08517010 +n12923108 +n02362194 +n01704103 +n10062492 +n01394771 +n10473789 +n10330593 +n02748183 +n12562141 +n09745933 +n02505485 +n11922661 +n12018014 +n09866922 +n04067143 +n13161254 +n07813495 +n01374846 +n10213429 +n03253071 +n02546028 +n01642097 +n01475232 +n03212247 +n10155600 +n11689957 +n11738997 +n10525878 +n03301389 +n10589666 +n01908958 +n10289766 +n03900028 +n03437295 +n02987823 +n02739123 +n10505347 +n02546627 +n10381804 +n10132502 +n10336904 +n10189597 +n09786115 +n12875697 +n10761519 +n01470733 +n02875626 +n12111238 +n07862770 +n07856895 +n09996039 +n03368048 +n07913300 +n10062996 +n10555430 +n04302863 +n12758555 +n10740732 +n02385898 +n02385098 +n12162758 +n03887899 +n03976268 +n04234160 +n03641947 +n07857076 +n10578656 +n12135729 +n12675515 +n09032191 +n12969670 +n02600503 +n12518013 +n10227166 +n10121026 +n01801672 +n10661216 +n03244388 +n04147291 +n09664556 +n02539573 +n04480141 +n10601362 +n02613572 +n10537906 +n02613820 +n11656771 +n03841011 +n02845985 +n12534208 +n10241024 +n03645290 +n12743976 +n11922839 +n07709701 +n03066232 +n03467380 +n09266604 +n09663786 +n12775070 +n02427183 +n04083113 +n12896615 +n10501453 +n02345774 +n09965515 +n09704157 +n10666752 +n03846970 +n04167661 +n03991321 +n09556121 +n10686517 +n02586238 +n03594277 +n03591313 +n10391416 +n10756837 +n13163649 +n03971960 +n10245341 +n02577041 +n04481642 +n12373739 +n10214062 +n10091997 +n10275848 +n02090253 +n03514340 +n04593629 +n11795216 +n03126927 +n11871748 +n10272782 +n12056099 +n04484024 +n03101375 +n12255225 +n10724372 +n10531838 +n02354781 +n02389865 +n02853336 +n01477080 +n01779939 +n10776052 +n10724132 +n10284871 +n10554141 +n03898787 +n02366301 +n10721612 +n04421740 +n04256758 +n01445593 +n10103921 +n02729222 +n02530188 +n02387452 +n02601921 +n01711160 +n02474110 +n09869447 +n12789977 +n10158506 +n10396908 +n07839593 +n02662825 +n02473720 +n13034788 +n07752602 +n03762238 +n10262880 +n07770180 +n04030054 +n10151367 +n03525252 +n10252075 +n10747424 +n10191388 +n04130566 +n03951068 +n13239921 +n03733547 +n10358124 +n11549779 +n09203827 +n04043168 +n10359422 +n04286960 +n04237287 +n10130686 +n02338449 +n12912274 +n10586998 +n02812785 +n10364502 +n03955941 +n12324222 +n09743601 +n03766600 +n01427399 +n12968309 +n11776234 +n01501777 +n10051026 +n10397001 +n01516212 +n02596252 +n02225081 +n10479328 +n02109687 +n10181445 +n02248062 +n03802973 +n01639187 +n02142734 +n02342534 +n02410141 +n02743426 +n03950359 +n12253835 +n07805478 +n03706415 +n03578981 +n04560619 +n09761753 +n03524425 +n01962788 +n04350235 +n10686694 +n13139321 +n10195155 +n12335937 +n12758399 +n03805374 +n12895298 +n03800371 +n11972959 +n11530008 +n03178538 +n02217839 +n10591072 +n04033557 +n01880813 +n12292877 +n02430643 +n07599383 +n01954516 +n09894909 +n02474605 +n03576443 +n07595051 +n03367875 +n12945549 +n02360480 +n14583400 +n04208582 +n02405577 +n02550655 +n02513355 +n04381450 +n00444490 +n03567912 +n09937688 +n07932323 +n04029416 +n01913346 +n13237508 +n04437276 +n12938445 +n03042384 +n12543639 +n03194992 +n04094250 +n12045514 +n03825913 +n03504293 +n12758250 +n03547861 +n03649288 +n04572235 +n07569423 +n03534695 +n03253714 +n01501641 +n13906767 +n12578255 +n11749603 +n07742513 +n07609083 +n04214413 +n07595751 +n12013701 +n12592839 +n12949160 +n04093223 +n02983072 +n03510072 +n02966068 +n03867854 +n01747285 +n10691318 +n13091982 +n12574470 +n02255023 +n03449217 +n03153585 +n04006227 +n13140049 +n02965024 +n03805503 +n03911406 +n13120958 +n12203699 +n01456454 +n10397142 +n12920043 +n02412977 +n08674344 +n07801007 +n03037590 +n10361296 +n13133316 +n03483637 +n04435759 +n12983873 +n02627037 +n03783304 +n07725158 +n02921292 +n01788864 +n01705010 +n12616996 +n03903290 +n08662427 +n03667060 +n07856992 +n03252422 +n02449699 +n12137954 +n10024025 +n07891095 +n04337157 +n04368109 +n03015631 +n02363996 +n12824289 +n03206602 +n12799269 +n02333733 +n01793565 +n01721898 +n03178173 +n02844056 +n11688378 +n13889066 +n02637475 +n03750437 +n01403457 +n01717229 +n02677136 +n12512294 +n03736269 +n02838577 +n08661878 +n01993830 +n02777638 +n02900857 +n04023021 +n03843092 +n07770439 +n12928491 +n03697812 +n02639922 +n13139482 +n07771082 +n12487058 +n07774182 +n02122810 +n02856362 +n11686195 +n11687432 +n02853870 +n04239218 +n02665250 +n02938218 +n11746600 +n10183347 +n10681194 +n04164199 +n04407257 +n12549005 +n02331842 +n03862379 +n02863638 +n11962994 +n03091907 +n04177654 +n02252972 +n02403153 +n01376437 +n02848806 +n08579266 +n07616265 +n10331258 +n10765587 +n09433312 +n03412387 +n10178077 +n13123841 +n02532918 +n04144651 +n03296963 +n03450881 +n04348988 +n10425946 +n03257065 +n02354320 +n11689197 +n04084682 +n10140783 +n03637027 +n02346170 +n02559144 +n01705591 +n09400584 +n03840327 +n03918074 +n04053767 +n02406046 +n00288190 +n03160001 +n03366464 +n09249155 +n01324305 +n07556872 +n03381565 +n12705220 +n11874878 +n02632494 +n02502006 +n03146560 +n02179340 +n04312756 +n10162016 +n03800563 +n04140853 +n07933652 +n03075248 +n04421582 +n10652703 +n02218134 +n12233249 +n04578559 +n01781071 +n02615298 +n04436832 +n04054566 +n02608284 +n11674019 +n03505764 +n02662397 +n09422190 +n04382537 +n04355684 +n04383923 +n09888635 +n03783575 +n03228796 +n07772026 +n02381119 +n15060326 +n10586166 +n12647787 +n02458517 +n10281546 +n03498866 +n02485988 +n10121246 +n09391644 +n03103904 +n08676253 +n02203978 +n04092168 +n03213014 +n03138217 +n04135933 +n12612811 +n04478066 +n02157285 +n02543255 +n03863783 +n01502101 +n03930229 +n12439830 +n09425019 +n02618513 +n02910241 +n12261359 +n03648667 +n04365229 +n03461651 +n04388040 +n03295928 +n03581531 +n04203356 +n02622249 +n13142907 +n04497249 +n11678377 +n02366579 +n02931013 +n02837134 +n03132438 +n13092987 +n04196803 +n03056215 +n03255322 +n02130925 +n10291469 +n02971940 +n01718096 +n12510774 +n11766432 +n04271891 +n03366721 +n03154616 +n03694356 +n10478293 +n11763142 +n07763483 +n03037228 +n09201998 +n01517389 +n00443517 +n12693244 +n03580990 +n03519848 +n10238375 +n10783646 +n03564849 +n03975926 +n02473554 +n02450426 +n03464952 +n04411835 +n04573045 +n10505732 +n04337650 +n10621514 +n10334782 +n12434985 +n07769102 +n10594523 +n05475397 +n01875610 +n03299406 +n10507692 +n02593679 +n03317233 +n13239736 +n03550420 +n03247351 +n03819047 +n03633341 +n03154745 +n04073425 +n04532022 +n02910964 +n04301242 +n04378651 +n13098515 +n11775626 +n14603798 +n10263146 +n01886045 +n03761731 +n02224713 +n04591249 +n02144251 +n03849412 +n11548728 +n04051705 +n12298165 +n03150795 +n03989447 +n02826459 +n07602650 +n03155915 +n09891730 +n02067603 +n01523105 +n03618339 +n03897130 +n02711780 +n05285623 +n03533486 +n04085873 +n01923404 +n10139077 +n01709484 +n02183507 +n03216562 +n01971850 +n03136051 +n02948834 +n03589313 +n03665851 +n02937336 +n02035656 +n07769465 +n07849186 +n12585373 +n12280364 +n02846260 +n02511730 +n02614653 +n04193179 +n11718681 +n09467696 +n01522450 +n03040836 +n03162297 +n11896141 +n04000480 +n10350220 +n07746038 +n02124157 +n10655169 +n03476542 +n03895038 +n00443917 +n07757753 +n01726203 +n02987706 +n12750076 +n03012734 +n02941228 +n04194009 +n04501127 +n09794550 +n03510487 +n08589670 +n03166951 +n03673270 +n09792125 +n08492354 +n02396157 +n01628331 +n03993878 +n07833816 +n04958865 +n13650447 +n04339191 +n02826683 +n02893269 +n02810139 +n02626471 +n02589796 +n08677801 +n04325968 +n03275864 +n02622547 +n04406687 +n04097085 +n02998107 +n07831450 +n03658102 +n02575590 +n03523398 +n02412909 +n02953850 +n04337503 +n03510987 +n12664005 +n03710294 +n13138155 +n10110093 +n07831955 +n03932080 +n12971804 +n03943623 +n03726371 +n10531445 +n12984489 +n07835051 +n12097556 +n02685701 +n03038041 +n02451125 +n04594919 +n02372140 +n02665985 +n03496183 +n03961828 +n03802800 +n01713170 +n03602790 +n04974145 +n02780588 +n04031884 +n03588216 +n02614140 +n04578708 +n04501281 +n03166600 +n03992975 +n04206070 +n03227721 +n02582349 +n02664642 +n07805389 +n09226869 +n02459190 +n12216968 +n03628984 +n02524928 +n09209025 +n04078002 +n03167153 +n03562565 +n07599554 +n10252547 +n03279804 +n07692887 +n14909584 +n02529293 +n04444953 +n04156814 +n07616174 +n03415626 +n03331244 +n03868324 +n03644073 +n02818687 +n10085101 +n02953056 +n03202481 +n02118707 +n03591901 +n12602434 +n02943465 +n02818254 +n07922607 +n02597004 +n04212810 +n04056073 +n12327528 +n02207647 +n01792808 +n03002555 +n03951213 +n12242123 +n10062275 +n12325787 +n10048117 +n11937278 +n03624767 +n04039041 +n04059298 +n03707171 +n07758407 +n01333483 +n02219015 +n02436645 +n02478239 +n04457638 +n01781698 +n09474765 +n03686363 +n10769084 +n09456207 +n02385776 +n13555775 +n03962685 +n13129078 +n03463185 +n01429172 +n04243251 +n12177129 +n03143754 +n03958338 +n02791795 +n04560502 +n12776774 +n02745816 +n03009111 +n02976552 +n03008817 +n03211413 +n03537550 +n12200504 +n01909788 +n11790089 +n03480973 +n10507070 +n01707294 +n04374907 +n04281571 +n00006024 +n03823906 +n12603273 +n03503358 +n04027820 +n12645530 +n02535080 +n04143365 +n08385989 +n12661227 +n12814857 +n11871059 +n04268418 +n13128582 +n01928865 +n04359124 +n12670334 +n03610836 +n04543924 +n02252799 +n15102359 +n04437380 +n04316924 +n11872324 +n09330378 +n10122300 +n03784139 +n00443375 +n14993378 +n01721174 +n00004475 +n00006484 +n00007846 +n00015388 +n00017222 +n00021265 +n00021939 +n00288000 +n00433458 +n00433661 +n00433802 +n00439826 +n00440218 +n00440509 +n00440747 +n00441824 +n00442115 +n00442981 +n00443231 +n00444651 +n00445351 +n00445802 +n00447073 +n00447221 +n00447540 +n00448466 +n00448640 +n00448958 +n00449295 +n00449695 +n00450335 +n00450700 +n00451370 +n00451866 +n00452293 +n00453935 +n00454237 +n00454624 +n00463246 +n00464651 +n00464894 +n00467719 +n00467995 +n00468480 +n00469651 +n00471437 +n00471613 +n00479076 +n00480508 +n00480993 +n00482298 +n00523513 +n01035667 +n01316422 +n01316579 +n01316949 +n01317089 +n01317391 +n01317541 +n01319467 +n01320872 +n01321230 +n01321579 +n01321854 +n01322343 +n01322685 +n01322898 +n01323068 +n01326291 +n01329186 +n01338685 +n01339336 +n01340935 +n01342269 +n01358572 +n01367772 +n01375204 +n01376237 +n01380610 +n01384687 +n01385330 +n01387065 +n01389507 +n01390123 +n01392380 +n01395254 +n01397114 +n01402600 +n01407798 +n01421807 +n01438208 +n01439121 +n01439514 +n01439808 +n01441425 +n01444783 +n01445429 +n01446589 +n01446760 +n01448951 +n01450661 +n01454856 +n01455778 +n01458842 +n01459791 +n01461646 +n01466257 +n01467336 +n01468238 +n01468712 +n01471682 +n01473806 +n01474283 +n01477525 +n01478511 +n01480516 +n01480880 +n01481331 +n01482071 +n01482330 +n01483522 +n01484097 +n01488918 +n01491874 +n01492357 +n01493541 +n01494757 +n01494882 +n01495006 +n01495701 +n01497118 +n01498406 +n01498699 +n01498989 +n01500091 +n01501160 +n01503061 +n01514752 +n01515078 +n01517565 +n01524359 +n01525720 +n01527194 +n01527617 +n01528654 +n01529672 +n01533339 +n01534582 +n01534762 +n01537134 +n01538955 +n01539573 +n01540233 +n01541922 +n01542786 +n01544208 +n01546921 +n01547832 +n01548301 +n01549430 +n01550761 +n01553142 +n01555809 +n01557185 +n01560105 +n01560636 +n01563128 +n01563746 +n01564394 +n01567133 +n01568132 +n01569836 +n01570676 +n01571904 +n01572328 +n01573074 +n01574045 +n01574390 +n01575745 +n01576695 +n01577659 +n01578575 +n01579028 +n01580379 +n01580490 +n01580772 +n01580870 +n01581166 +n01581434 +n01581730 +n01582398 +n01582498 +n01582856 +n01584225 +n01585121 +n01587834 +n01588431 +n01589286 +n01591697 +n01592257 +n01592540 +n01594372 +n01595624 +n01597336 +n01598588 +n01598988 +n01600085 +n01600657 +n01602080 +n01602209 +n01602630 +n01603600 +n01604330 +n01605630 +n01608814 +n01609062 +n01609391 +n01609751 +n01610955 +n01611472 +n01612628 +n01613294 +n01613615 +n01615121 +n01616551 +n01616764 +n01617095 +n01617443 +n01617766 +n01618082 +n01618922 +n01619310 +n01619536 +n01619835 +n01620135 +n01620414 +n01620735 +n01621127 +n01622352 +n01623706 +n01627424 +n01629276 +n01630284 +n01631175 +n01632047 +n01637112 +n01637932 +n01639765 +n01640846 +n01645776 +n01649170 +n01650167 +n01651487 +n01653773 +n01661091 +n01661592 +n01661818 +n01662622 +n01662784 +n01663401 +n01664369 +n01665932 +n01667432 +n01668091 +n01669372 +n01670092 +n01672032 +n01674216 +n01674464 +n01674990 +n01676755 +n01680264 +n01680478 +n01681940 +n01684133 +n01685439 +n01686044 +n01686220 +n01686403 +n01686609 +n01686808 +n01687665 +n01688961 +n01689411 +n01691951 +n01692864 +n01693783 +n01694709 +n01696633 +n01697178 +n01698434 +n01699040 +n01701551 +n01702256 +n01703011 +n01703569 +n01705934 +n01708106 +n01708998 +n01712008 +n01712752 +n01717016 +n01719403 +n01722998 +n01724231 +n01726692 +n01727646 +n01730185 +n01730307 +n01730812 +n01730960 +n01731545 +n01732244 +n01733757 +n01734637 +n01734808 +n01735439 +n01735577 +n01735728 +n01737472 +n01737728 +n01737875 +n01738065 +n01738601 +n01739647 +n01740551 +n01741232 +n01741562 +n01741943 +n01743605 +n01745125 +n01745484 +n01746359 +n01747885 +n01749582 +n01749742 +n01751036 +n01752165 +n01753959 +n01754876 +n01755740 +n01767661 +n01769347 +n01770795 +n01771417 +n01772222 +n01775370 +n01776192 +n01776705 +n01777304 +n01777467 +n01777649 +n01777909 +n01778217 +n01778487 +n01778621 +n01778801 +n01779148 +n01779463 +n01779629 +n01780696 +n01782209 +n01785667 +n01789386 +n01789740 +n01791107 +n01791625 +n01792158 +n01792640 +n01794158 +n01795088 +n01795735 +n01795900 +n01796019 +n01796105 +n01796519 +n01796729 +n01798706 +n01798839 +n01798979 +n01799302 +n01800424 +n01801088 +n01801479 +n01802721 +n01803078 +n01804478 +n01804653 +n01804921 +n01805070 +n01805801 +n01806847 +n01807828 +n01808140 +n01808291 +n01808596 +n01809106 +n01810700 +n01811909 +n01812337 +n01813385 +n01814370 +n01814921 +n01815601 +n01816887 +n01819115 +n01820348 +n01820801 +n01821076 +n01821203 +n01822602 +n01823013 +n01824749 +n01825930 +n01826364 +n01827403 +n01829869 +n01831712 +n01832167 +n01834177 +n01834540 +n01835276 +n01838038 +n01838598 +n01839598 +n01841102 +n01843719 +n01844231 +n01844551 +n01844917 +n01845132 +n01845477 +n01846331 +n01848123 +n01848648 +n01849466 +n01850373 +n01851375 +n01852142 +n01852861 +n01853498 +n01854415 +n01856072 +n01856155 +n01856380 +n01856553 +n01856890 +n01857079 +n01857325 +n01857512 +n01857632 +n01857851 +n01858441 +n01859496 +n01860497 +n01861148 +n01861778 +n01871543 +n01871875 +n01874434 +n01874928 +n01876326 +n01877134 +n01878061 +n01878335 +n01878639 +n01878929 +n01879217 +n01879509 +n01880152 +n01881171 +n01883513 +n01883920 +n01886756 +n01887896 +n01888264 +n01889074 +n01889520 +n01890860 +n01891633 +n01892551 +n01894207 +n01905661 +n01906749 +n01907738 +n01909422 +n01911403 +n01911839 +n01912454 +n01914163 +n01914830 +n01915811 +n01916187 +n01916925 +n01918744 +n01922303 +n01925270 +n01925695 +n01926379 +n01926689 +n01927159 +n01927456 +n01927928 +n01928215 +n01930852 +n01931140 +n01931520 +n01931714 +n01932151 +n01932936 +n01933151 +n01933478 +n01933988 +n01934440 +n01935176 +n01936391 +n01937909 +n01940736 +n01941223 +n01942177 +n01942869 +n01943541 +n01944118 +n01944812 +n01944955 +n01945143 +n01945340 +n01945845 +n01946277 +n01948573 +n01951613 +n01953361 +n01955933 +n01956481 +n01958038 +n01959985 +n01960459 +n01963571 +n01964049 +n01964441 +n01965889 +n01967094 +n01968315 +n01969726 +n01971094 +n01971280 +n01974773 +n01975687 +n01976146 +n01976957 +n01978930 +n01981702 +n01982650 +n01983048 +n01985493 +n01985797 +n01986806 +n01987545 +n01988701 +n01989869 +n01990007 +n01991028 +n01991520 +n01992262 +n01992423 +n01992773 +n01996585 +n01998183 +n02000954 +n02002075 +n02005790 +n02006985 +n02007284 +n02008041 +n02008796 +n02010453 +n02011805 +n02011943 +n02012185 +n02013177 +n02014941 +n02015554 +n02016358 +n02016956 +n02018027 +n02019190 +n02019438 +n02019929 +n02021050 +n02021795 +n02022684 +n02023341 +n02025043 +n02026059 +n02028175 +n02030035 +n02030287 +n02030996 +n02031934 +n02033208 +n02033324 +n02033561 +n02034129 +n02034661 +n02036053 +n02037464 +n02039171 +n02040505 +n02041085 +n02041246 +n02043063 +n02044178 +n02044778 +n02045369 +n02046759 +n02047260 +n02047614 +n02048698 +n02049532 +n02050004 +n02051474 +n02052204 +n02052365 +n02053083 +n02054502 +n02055658 +n02055803 +n02057731 +n02058594 +n02058747 +n02059162 +n02060411 +n02060889 +n02062017 +n02062430 +n02062744 +n02063224 +n02064338 +n02066707 +n02068206 +n02068974 +n02069701 +n02070430 +n02073250 +n02075296 +n02075927 +n02076196 +n02076779 +n02077152 +n02077658 +n02078292 +n02078574 +n02078738 +n02079005 +n02079389 +n02081571 +n02083346 +n02083672 +n02084071 +n02084861 +n02085374 +n02086346 +n02086478 +n02087122 +n02087551 +n02088839 +n02089232 +n02089555 +n02090475 +n02090827 +n02092468 +n02093056 +n02094562 +n02094721 +n02095050 +n02095412 +n02095727 +n02096756 +n02097786 +n02098550 +n02099029 +n02099997 +n02100399 +n02101108 +n02101861 +n02102605 +n02103406 +n02103841 +n02104523 +n02104882 +n02106966 +n02107420 +n02108254 +n02108672 +n02109150 +n02109811 +n02110532 +n02111626 +n02112497 +n02112826 +n02113335 +n02114100 +n02115012 +n02115335 +n02117512 +n02117646 +n02117900 +n02118176 +n02118333 +n02119247 +n02119359 +n02120278 +n02120997 +n02121620 +n02121808 +n02122725 +n02123785 +n02124623 +n02127292 +n02127381 +n02127482 +n02127586 +n02127678 +n02127808 +n02128598 +n02128669 +n02129463 +n02129530 +n02129837 +n02129923 +n02130545 +n02131653 +n02132466 +n02132580 +n02132788 +n02133400 +n02134971 +n02135220 +n02137722 +n02137888 +n02138647 +n02138777 +n02139199 +n02139671 +n02141306 +n02141713 +n02144593 +n02145424 +n02148835 +n02149420 +n02150482 +n02152740 +n02152991 +n02153203 +n02153809 +n02156732 +n02159955 +n02164464 +n02165877 +n02166229 +n02166567 +n02166826 +n02167505 +n02167820 +n02167944 +n02168245 +n02169023 +n02169218 +n02169705 +n02169974 +n02170400 +n02170993 +n02171453 +n02171869 +n02172518 +n02172678 +n02172761 +n02172870 +n02174355 +n02176261 +n02178411 +n02178717 +n02179012 +n02180233 +n02181235 +n02181724 +n02182045 +n02182355 +n02182642 +n02182930 +n02183857 +n02186153 +n02188699 +n02189363 +n02190790 +n02191273 +n02191773 +n02191979 +n02192252 +n02192513 +n02192814 +n02193009 +n02193163 +n02194249 +n02194750 +n02195091 +n02195526 +n02195819 +n02196344 +n02198532 +n02199502 +n02200198 +n02202287 +n02204722 +n02206270 +n02207179 +n02207449 +n02208280 +n02208498 +n02208848 +n02208979 +n02209111 +n02209354 +n02209624 +n02209964 +n02210427 +n02210921 +n02211444 +n02211627 +n02211896 +n02212062 +n02212602 +n02212958 +n02213107 +n02215161 +n02215770 +n02217563 +n02218713 +n02220055 +n02220225 +n02220518 +n02220804 +n02221083 +n02221414 +n02221571 +n02221715 +n02221820 +n02222035 +n02222321 +n02222582 +n02223266 +n02223520 +n02226183 +n02226821 +n02226970 +n02227247 +n02227604 +n02227966 +n02228341 +n02228697 +n02229156 +n02229765 +n02230023 +n02230187 +n02230480 +n02230634 +n02231052 +n02231803 +n02233943 +n02234355 +n02234570 +n02234848 +n02235205 +n02236241 +n02236896 +n02237424 +n02237581 +n02238235 +n02238887 +n02239528 +n02241569 +n02241799 +n02243562 +n02244173 +n02246011 +n02246628 +n02247511 +n02248368 +n02248510 +n02248887 +n02249515 +n02250822 +n02251775 +n02252226 +n02253127 +n02253715 +n02254697 +n02257003 +n02257284 +n02257715 +n02259377 +n02260421 +n02260863 +n02261419 +n02262178 +n02263378 +n02264885 +n02265330 +n02268148 +n02269196 +n02269522 +n02270011 +n02270200 +n02271570 +n02271897 +n02272871 +n02274024 +n02274259 +n02274822 +n02278210 +n02278839 +n02279637 +n02280458 +n02281015 +n02281136 +n02281267 +n02282257 +n02282385 +n02282553 +n02282903 +n02283077 +n02283201 +n02283951 +n02285548 +n02287004 +n02287799 +n02288789 +n02291220 +n02291572 +n02291748 +n02293352 +n02293868 +n02295064 +n02295390 +n02295870 +n02298541 +n02300173 +n02301452 +n02302459 +n02302620 +n02305407 +n02306433 +n02307325 +n02308139 +n02308471 +n02309337 +n02310000 +n02310717 +n02311060 +n02312006 +n02312427 +n02313008 +n02316707 +n02318167 +n02319308 +n02319555 +n02319829 +n02320127 +n02322047 +n02323449 +n02323902 +n02324045 +n02325722 +n02325884 +n02326074 +n02326763 +n02326862 +n02327028 +n02327175 +n02327435 +n02327656 +n02327842 +n02328429 +n02329401 +n02330245 +n02331046 +n02331309 +n02332755 +n02333546 +n02334460 +n02335127 +n02336011 +n02336641 +n02338901 +n02339376 +n02339922 +n02343058 +n02343320 +n02343772 +n02344528 +n02345600 +n02346998 +n02347274 +n02347573 +n02347744 +n02348173 +n02348788 +n02350105 +n02350989 +n02351870 +n02352591 +n02353861 +n02355227 +n02355477 +n02358091 +n02359324 +n02360781 +n02361587 +n02361706 +n02361850 +n02363245 +n02363351 +n02364520 +n02369680 +n02370806 +n02372584 +n02373336 +n02374149 +n02374451 +n02376542 +n02376791 +n02376918 +n02377181 +n02377480 +n02377703 +n02378415 +n02380335 +n02380583 +n02380745 +n02381460 +n02382437 +n02382948 +n02384858 +n02386014 +n02386310 +n02386496 +n02388276 +n02389346 +n02389559 +n02390454 +n02390834 +n02391234 +n02391373 +n02391508 +n02391994 +n02393580 +n02394477 +n02395003 +n02395694 +n02395931 +n02397529 +n02399000 +n02401031 +n02402010 +n02402175 +n02402425 +n02403325 +n02403454 +n02404186 +n02404573 +n02406174 +n02407959 +n02408660 +n02408817 +n02409870 +n02410702 +n02410900 +n02411705 +n02412440 +n02413131 +n02414578 +n02415435 +n02416519 +n02417070 +n02417534 +n02418064 +n02419796 +n02423218 +n02423362 +n02423589 +n02424305 +n02424695 +n02426813 +n02427724 +n02428349 +n02430045 +n02430559 +n02431122 +n02432511 +n02433546 +n02433925 +n02435853 +n02437136 +n02437971 +n02438173 +n02438272 +n02439033 +n02441326 +n02442172 +n02442336 +n02442446 +n02442572 +n02442668 +n02443015 +n02443346 +n02443808 +n02443959 +n02444251 +n02445004 +n02445171 +n02446206 +n02446352 +n02446645 +n02447021 +n02447762 +n02448060 +n02448633 +n02448885 +n02450034 +n02453108 +n02453611 +n02454794 +n02455135 +n02455428 +n02455720 +n02456008 +n02456275 +n02456962 +n02460009 +n02469914 +n02470325 +n02470899 +n02471300 +n02471762 +n02472293 +n02473307 +n02474777 +n02476219 +n02480153 +n02481103 +n02481235 +n02481366 +n02481500 +n02482060 +n02482286 +n02482474 +n02482650 +n02483092 +n02484322 +n02484473 +n02485225 +n02485371 +n02485536 +n02485688 +n02486657 +n02486908 +n02487079 +n02487547 +n02487675 +n02487847 +n02488003 +n02488415 +n02488894 +n02489589 +n02490597 +n02490811 +n02491107 +n02491329 +n02491474 +n02496913 +n02501583 +n02502514 +n02503127 +n02503517 +n02504770 +n02507649 +n02508021 +n02512053 +n02512938 +n02513560 +n02515214 +n02516188 +n02517442 +n02517938 +n02519148 +n02519686 +n02521646 +n02522399 +n02524524 +n02526425 +n02526818 +n02527057 +n02527271 +n02527622 +n02528163 +n02529772 +n02530421 +n02532028 +n02532602 +n02533209 +n02533834 +n02534559 +n02534734 +n02535537 +n02537085 +n02537319 +n02538406 +n02538985 +n02540412 +n02541687 +n02546331 +n02548689 +n02549989 +n02550460 +n02552171 +n02554730 +n02556846 +n02557591 +n02557749 +n02559862 +n02561108 +n02561661 +n02562315 +n02562796 +n02563182 +n02564720 +n02565573 +n02566109 +n02568959 +n02569484 +n02570838 +n02572196 +n02574910 +n02576223 +n02576575 +n02578233 +n02579557 +n02580336 +n02581957 +n02583567 +n02585872 +n02586543 +n02588286 +n02590495 +n02590702 +n02590987 +n02594250 +n02596381 +n02597367 +n02599052 +n02599958 +n02600298 +n02601344 +n02602405 +n02603317 +n02604157 +n02605316 +n02606384 +n02607201 +n02607862 +n02613181 +n02614482 +n02614978 +n02619165 +n02621908 +n02623445 +n02624167 +n02625612 +n02626762 +n02627835 +n02630281 +n02630739 +n02631041 +n02636170 +n02636854 +n02638596 +n02640626 +n02640857 +n02642107 +n02642644 +n02643112 +n02644113 +n02646667 +n02648625 +n02650050 +n02650541 +n02652668 +n02653145 +n02653786 +n02654425 +n02655523 +n02656670 +n02657368 +n02658079 +n02661017 +n02662239 +n02663849 +n02667379 +n02667576 +n02668393 +n02670382 +n02671780 +n02672371 +n02676261 +n02676670 +n02677028 +n02677718 +n02678384 +n02680110 +n02680754 +n02682407 +n02682922 +n02683791 +n02686121 +n02686568 +n02687992 +n02688443 +n02689274 +n02691156 +n02692513 +n02693413 +n02693540 +n02694426 +n02694966 +n02695627 +n02697576 +n02698244 +n02700258 +n02700895 +n02702989 +n02703275 +n02705944 +n02708224 +n02708555 +n02709367 +n02709637 +n02710600 +n02712643 +n02713218 +n02715229 +n02715513 +n02715712 +n02716626 +n02726305 +n02726681 +n02727016 +n02727825 +n02728440 +n02729837 +n02729965 +n02730265 +n02732072 +n02732827 +n02733213 +n02733524 +n02735361 +n02735688 +n02736798 +n02737660 +n02738031 +n02738271 +n02738535 +n02739550 +n02739668 +n02740533 +n02740764 +n02741475 +n02742322 +n02742753 +n02745492 +n02746365 +n02749790 +n02750169 +n02751067 +n02751295 +n02752496 +n02753044 +n02753394 +n02754103 +n02755352 +n02755529 +n02756098 +n02756977 +n02757462 +n02757810 +n02758134 +n02758960 +n02759700 +n02759963 +n02760099 +n02760199 +n02760429 +n02760855 +n02761392 +n02763198 +n02763714 +n02764044 +n02764614 +n02764779 +n02765028 +n02766320 +n02766534 +n02766792 +n02767433 +n02769075 +n02770830 +n02772554 +n02772700 +n02773037 +n02773838 +n02774152 +n02774630 +n02775483 +n02776205 +n02777100 +n02777734 +n02777927 +n02778456 +n02778669 +n02781121 +n02781338 +n02781517 +n02783035 +n02783324 +n02784998 +n02785648 +n02786198 +n02786463 +n02788689 +n02789487 +n02790823 +n02792552 +n02792948 +n02793842 +n02794008 +n02794779 +n02794972 +n02795783 +n02796207 +n02796623 +n02796995 +n02797692 +n02797881 +n02799897 +n02801184 +n02801525 +n02801938 +n02802721 +n02803349 +n02803666 +n02804252 +n02806088 +n02806379 +n02806875 +n02810471 +n02811468 +n02811719 +n02812201 +n02813252 +n02813399 +n02815478 +n02815950 +n02816494 +n02817031 +n02817650 +n02817799 +n02818832 +n02819697 +n02820210 +n02821627 +n02821943 +n02822220 +n02822399 +n02822865 +n02823335 +n02824448 +n02826589 +n02826886 +n02827606 +n02828299 +n02828884 +n02831335 +n02831724 +n02831894 +n02833793 +n02834778 +n02835412 +n02836268 +n02839351 +n02839910 +n02840619 +n02841063 +n02841506 +n02842133 +n02843029 +n02843777 +n02844214 +n02844307 +n02844714 +n02847631 +n02848216 +n02848523 +n02849154 +n02850950 +n02851099 +n02853016 +n02854532 +n02854926 +n02855089 +n02855390 +n02855793 +n02857365 +n02857477 +n02857644 +n02858304 +n02860415 +n02861886 +n02862048 +n02862916 +n02863750 +n02865665 +n02865931 +n02866578 +n02867715 +n02869737 +n02871631 +n02871824 +n02871963 +n02872752 +n02873839 +n02874086 +n02875436 +n02876326 +n02876457 +n02876657 +n02877962 +n02879517 +n02880189 +n02880546 +n02880940 +n02881193 +n02881906 +n02882483 +n02882647 +n02883004 +n02883344 +n02884225 +n02885108 +n02885338 +n02886599 +n02887209 +n02887970 +n02888569 +n02889425 +n02891188 +n02891788 +n02892499 +n02893418 +n02896294 +n02896442 +n02897389 +n02897820 +n02898173 +n02898369 +n02898585 +n02898711 +n02900705 +n02901481 +n02901901 +n02902079 +n02902916 +n02903006 +n02904109 +n02904640 +n02908217 +n02909285 +n02911485 +n02912065 +n02913152 +n02914991 +n02916179 +n02916350 +n02917377 +n02917607 +n02919414 +n02920503 +n02921884 +n02923129 +n02924116 +n02925519 +n02928413 +n02928608 +n02929289 +n02929462 +n02929923 +n02931417 +n02931836 +n02932019 +n02932400 +n02933112 +n02933462 +n02933750 +n02933990 +n02934168 +n02935658 +n02935891 +n02936176 +n02936281 +n02936714 +n02938886 +n02939866 +n02941095 +n02942699 +n02943241 +n02943871 +n02944826 +n02945161 +n02946270 +n02946348 +n02946921 +n02947212 +n02947818 +n02948557 +n02949202 +n02950186 +n02950256 +n02950632 +n02950943 +n02951843 +n02952485 +n02952674 +n02953673 +n02954163 +n02954340 +n02954938 +n02955065 +n02955247 +n02955540 +n02955767 +n02957135 +n02957755 +n02958343 +n02959942 +n02961451 +n02961947 +n02963302 +n02963692 +n02963821 +n02965216 +n02965300 +n02965529 +n02966545 +n02966786 +n02966942 +n02967081 +n02967991 +n02968473 +n02969010 +n02969163 +n02969634 +n02969886 +n02970685 +n02970849 +n02971691 +n02972397 +n02973017 +n02974697 +n02975212 +n02976939 +n02978205 +n02978753 +n02979516 +n02982599 +n02983189 +n02983904 +n02984061 +n02984203 +n02984469 +n02984699 +n02985137 +n02985828 +n02986066 +n02987047 +n02987492 +n02989099 +n02991048 +n02991302 +n02992032 +n02993546 +n02995998 +n02997391 +n02997607 +n03001282 +n03001627 +n03002210 +n03003091 +n03004620 +n03005515 +n03007130 +n03007591 +n03010656 +n03010795 +n03011018 +n03011355 +n03012159 +n03013006 +n03014440 +n03015254 +n03017070 +n03018209 +n03020034 +n03020416 +n03020692 +n03024333 +n03025070 +n03025886 +n03027108 +n03027250 +n03029066 +n03031422 +n03032811 +n03033362 +n03033986 +n03034516 +n03034663 +n03035510 +n03036469 +n03036866 +n03037108 +n03037709 +n03038685 +n03039015 +n03039947 +n03040229 +n03040376 +n03043274 +n03043958 +n03045337 +n03046257 +n03048883 +n03049066 +n03049457 +n03050026 +n03050546 +n03050655 +n03050864 +n03051396 +n03051540 +n03052464 +n03052917 +n03053047 +n03054901 +n03055670 +n03056097 +n03056493 +n03057021 +n03057636 +n03058107 +n03058603 +n03058949 +n03059366 +n03061050 +n03063073 +n03063338 +n03064350 +n03064758 +n03065708 +n03066849 +n03070193 +n03071021 +n03071160 +n03072201 +n03073296 +n03073977 +n03074380 +n03074855 +n03075097 +n03075500 +n03075634 +n03076411 +n03076708 +n03078287 +n03078670 +n03079230 +n03079741 +n03080497 +n03080731 +n03081986 +n03082127 +n03082807 +n03082979 +n03084420 +n03085333 +n03085602 +n03085915 +n03086183 +n03086457 +n03086670 +n03087366 +n03087643 +n03087816 +n03088707 +n03091044 +n03091374 +n03092166 +n03092314 +n03093792 +n03094503 +n03096439 +n03096960 +n03098140 +n03098806 +n03099454 +n03099771 +n03099945 +n03100346 +n03100490 +n03101156 +n03101986 +n03102654 +n03102859 +n03106722 +n03106898 +n03107046 +n03109881 +n03111690 +n03112869 +n03113152 +n03113657 +n03113835 +n03114839 +n03115180 +n03116530 +n03116767 +n03117199 +n03118346 +n03118969 +n03119510 +n03120198 +n03120491 +n03121897 +n03122748 +n03123809 +n03125870 +n03128085 +n03128427 +n03128519 +n03129001 +n03130066 +n03130563 +n03131669 +n03132261 +n03134853 +n03135917 +n03136369 +n03137579 +n03139464 +n03140900 +n03141065 +n03141327 +n03143572 +n03145384 +n03145843 +n03146846 +n03147509 +n03148324 +n03148727 +n03149401 +n03151077 +n03153948 +n03154073 +n03154446 +n03155178 +n03156071 +n03156405 +n03157348 +n03158796 +n03158885 +n03161450 +n03162818 +n03163798 +n03163973 +n03164605 +n03164722 +n03164929 +n03165823 +n03167978 +n03168107 +n03168217 +n03170635 +n03171356 +n03172965 +n03173387 +n03175604 +n03176386 +n03177165 +n03177708 +n03178000 +n03178430 +n03180504 +n03180969 +n03181293 +n03182140 +n03182232 +n03182912 +n03183080 +n03186818 +n03187751 +n03189818 +n03193597 +n03196062 +n03196324 +n03196598 +n03199647 +n03199901 +n03200357 +n03200539 +n03200701 +n03200906 +n03201035 +n03201638 +n03201996 +n03202354 +n03202760 +n03203089 +n03203806 +n03204306 +n03204558 +n03204955 +n03205143 +n03205304 +n03206718 +n03206908 +n03207305 +n03208556 +n03210683 +n03211117 +n03211616 +n03212811 +n03214253 +n03214450 +n03215191 +n03219135 +n03220237 +n03221059 +n03221720 +n03222516 +n03223162 +n03223441 +n03224753 +n03224893 +n03225777 +n03226538 +n03228016 +n03228533 +n03228692 +n03229115 +n03229526 +n03231160 +n03231819 +n03235796 +n03235979 +n03236580 +n03236735 +n03237212 +n03237639 +n03239259 +n03239726 +n03240140 +n03241093 +n03241335 +n03241496 +n03242120 +n03242506 +n03242995 +n03243218 +n03245271 +n03245421 +n03246933 +n03250952 +n03251533 +n03251766 +n03252324 +n03252637 +n03254374 +n03255488 +n03255899 +n03256788 +n03256928 +n03257586 +n03258905 +n03259505 +n03261776 +n03262519 +n03262809 +n03262932 +n03265032 +n03266749 +n03267821 +n03269203 +n03269401 +n03270695 +n03271765 +n03271865 +n03272239 +n03272383 +n03273061 +n03273913 +n03274561 +n03274796 +n03276179 +n03277459 +n03277771 +n03278248 +n03279153 +n03279364 +n03279508 +n03280394 +n03280644 +n03281145 +n03282060 +n03282401 +n03284743 +n03284981 +n03285912 +n03286572 +n03287733 +n03288003 +n03289985 +n03291413 +n03292960 +n03294048 +n03294833 +n03296478 +n03297103 +n03297644 +n03297735 +n03298089 +n03302790 +n03303217 +n03303831 +n03304197 +n03304465 +n03305522 +n03307573 +n03308152 +n03309808 +n03314378 +n03314884 +n03315644 +n03316406 +n03318136 +n03319457 +n03320046 +n03322570 +n03322940 +n03323703 +n03324928 +n03325088 +n03326073 +n03327234 +n03327691 +n03327841 +n03329663 +n03330792 +n03334017 +n03334492 +n03334912 +n03335030 +n03335846 +n03336839 +n03337494 +n03338287 +n03338821 +n03339296 +n03339643 +n03340009 +n03340923 +n03342961 +n03343354 +n03343560 +n03343853 +n03346135 +n03346455 +n03349296 +n03350352 +n03350456 +n03350602 +n03351262 +n03351979 +n03352628 +n03354903 +n03355468 +n03356446 +n03357267 +n03357716 +n03359137 +n03359566 +n03360731 +n03361683 +n03362771 +n03363363 +n03364008 +n03364937 +n03365592 +n03365991 +n03366823 +n03373237 +n03374649 +n03374838 +n03375171 +n03376279 +n03378342 +n03379343 +n03379828 +n03379989 +n03380647 +n03380867 +n03381126 +n03381231 +n03381776 +n03382856 +n03382969 +n03383468 +n03384167 +n03384891 +n03385557 +n03386011 +n03387323 +n03387653 +n03390327 +n03391770 +n03393324 +n03394480 +n03394649 +n03396580 +n03396654 +n03397266 +n03397532 +n03398228 +n03399761 +n03399971 +n03402188 +n03402369 +n03404012 +n03404360 +n03404449 +n03405265 +n03405725 +n03407369 +n03409393 +n03409591 +n03410147 +n03411339 +n03412058 +n03412220 +n03412511 +n03412906 +n03413264 +n03413428 +n03413828 +n03414162 +n03415252 +n03416489 +n03416775 +n03417345 +n03418158 +n03418242 +n03419014 +n03422072 +n03422589 +n03423719 +n03424630 +n03427296 +n03428090 +n03428349 +n03429003 +n03429288 +n03429914 +n03430091 +n03430313 +n03430551 +n03430959 +n03431243 +n03431745 +n03433637 +n03433877 +n03434285 +n03434830 +n03435593 +n03437941 +n03438257 +n03439814 +n03441112 +n03442288 +n03442756 +n03446070 +n03446832 +n03448031 +n03448956 +n03449564 +n03449858 +n03450516 +n03452267 +n03452449 +n03453320 +n03454110 +n03454211 +n03454707 +n03455355 +n03456548 +n03456665 +n03457008 +n03457686 +n03458271 +n03459914 +n03461882 +n03465500 +n03465818 +n03466162 +n03466839 +n03467517 +n03467796 +n03467984 +n03468696 +n03469493 +n03470387 +n03470629 +n03470948 +n03472232 +n03472535 +n03472937 +n03473817 +n03473966 +n03475823 +n03476083 +n03476313 +n03477773 +n03477902 +n03478756 +n03478907 +n03481521 +n03482523 +n03483230 +n03483531 +n03484083 +n03484931 +n03487331 +n03487444 +n03487774 +n03488188 +n03488603 +n03489162 +n03490324 +n03490449 +n03490884 +n03491988 +n03496296 +n03496612 +n03497100 +n03497657 +n03498536 +n03499468 +n03500295 +n03501152 +n03501288 +n03501614 +n03502331 +n03502509 +n03502777 +n03503718 +n03503997 +n03505383 +n03505504 +n03506370 +n03507963 +n03508101 +n03509394 +n03509843 +n03510583 +n03510866 +n03511175 +n03512147 +n03512911 +n03513137 +n03513376 +n03515338 +n03517899 +n03517982 +n03518631 +n03519674 +n03521076 +n03521544 +n03522634 +n03524574 +n03524976 +n03525074 +n03525454 +n03525827 +n03528263 +n03529444 +n03531281 +n03531447 +n03531546 +n03532342 +n03534776 +n03535024 +n03536761 +n03537412 +n03538037 +n03538300 +n03538634 +n03538957 +n03540267 +n03540595 +n03541091 +n03541696 +n03541923 +n03542333 +n03542860 +n03543603 +n03544360 +n03545150 +n03546340 +n03547054 +n03547530 +n03548930 +n03550153 +n03550289 +n03551084 +n03551790 +n03552449 +n03552749 +n03553486 +n03554460 +n03555426 +n03555662 +n03557590 +n03558176 +n03558404 +n03558739 +n03561169 +n03563200 +n03563710 +n03563967 +n03565288 +n03565565 +n03566329 +n03568117 +n03568818 +n03571942 +n03572205 +n03574555 +n03574816 +n03575958 +n03576215 +n03577672 +n03577818 +n03578055 +n03578251 +n03578656 +n03579538 +n03579982 +n03583621 +n03584400 +n03585073 +n03588951 +n03589513 +n03589791 +n03590306 +n03590932 +n03592245 +n03592773 +n03593526 +n03595409 +n03595860 +n03596285 +n03597317 +n03598151 +n03598299 +n03598646 +n03600977 +n03601638 +n03601840 +n03602081 +n03603722 +n03604629 +n03604843 +n03605722 +n03605915 +n03606465 +n03609235 +n03609542 +n03610418 +n03610992 +n03612814 +n03613294 +n03613592 +n03614007 +n03614532 +n03615563 +n03617095 +n03617594 +n03618546 +n03618982 +n03619396 +n03619650 +n03619793 +n03619890 +n03620052 +n03621049 +n03621694 +n03622931 +n03623556 +n03624134 +n03625355 +n03626115 +n03631177 +n03631811 +n03632852 +n03633886 +n03635032 +n03635668 +n03635932 +n03636248 +n03636649 +n03638883 +n03639675 +n03640988 +n03642444 +n03646296 +n03646916 +n03647520 +n03651388 +n03653220 +n03653454 +n03654576 +n03655072 +n03656484 +n03657239 +n03658858 +n03659292 +n03660124 +n03661340 +n03662719 +n03662887 +n03663531 +n03664675 +n03664943 +n03665366 +n03666362 +n03666917 +n03667235 +n03667829 +n03671914 +n03672827 +n03673450 +n03673767 +n03676759 +n03677766 +n03679384 +n03679712 +n03681477 +n03682487 +n03684823 +n03685307 +n03685820 +n03686130 +n03686470 +n03687928 +n03688943 +n03689157 +n03689570 +n03690851 +n03691817 +n03692379 +n03693293 +n03697552 +n03698604 +n03699280 +n03699975 +n03700963 +n03701191 +n03701391 +n03701640 +n03701790 +n03702248 +n03704834 +n03705379 +n03706653 +n03707597 +n03708036 +n03709206 +n03709363 +n03709545 +n03710528 +n03711711 +n03711999 +n03712887 +n03713069 +n03714235 +n03715386 +n03715669 +n03715892 +n03716966 +n03717131 +n03718212 +n03718335 +n03718699 +n03718789 +n03719053 +n03721590 +n03722007 +n03722288 +n03724176 +n03725035 +n03725717 +n03726516 +n03726760 +n03726993 +n03727837 +n03727946 +n03728437 +n03728982 +n03729647 +n03729951 +n03730153 +n03730788 +n03731695 +n03733644 +n03733925 +n03735637 +n03736970 +n03738241 +n03738472 +n03739518 +n03739693 +n03743902 +n03744276 +n03744684 +n03744840 +n03745571 +n03746330 +n03748162 +n03749504 +n03749807 +n03750206 +n03751065 +n03752185 +n03752922 +n03753077 +n03753514 +n03758894 +n03759432 +n03760671 +n03762982 +n03763727 +n03764276 +n03765561 +n03765934 +n03766322 +n03768132 +n03769722 +n03770954 +n03772077 +n03772674 +n03773035 +n03775199 +n03775847 +n03779000 +n03779370 +n03780047 +n03781787 +n03782190 +n03785499 +n03787523 +n03789171 +n03789400 +n03789946 +n03790230 +n03790512 +n03790755 +n03791235 +n03792048 +n03792526 +n03793850 +n03795976 +n03796181 +n03797390 +n03798982 +n03799113 +n03800485 +n03800772 +n03800933 +n03802007 +n03802228 +n03802393 +n03803116 +n03809312 +n03811295 +n03811444 +n03811847 +n03811965 +n03812382 +n03812924 +n03813176 +n03813946 +n03815278 +n03815482 +n03815615 +n03816005 +n03816136 +n03816849 +n03817647 +n03819595 +n03819994 +n03820154 +n03820318 +n03820728 +n03820950 +n03824197 +n03825080 +n03827536 +n03828020 +n03829340 +n03831757 +n03834040 +n03834604 +n03836062 +n03837422 +n03838748 +n03839172 +n03839276 +n03839795 +n03841666 +n03842156 +n03844045 +n03844233 +n03845190 +n03846234 +n03846772 +n03847471 +n03847823 +n03848168 +n03848348 +n03849275 +n03850613 +n03851341 +n03851787 +n03852280 +n03852688 +n03854815 +n03859280 +n03859495 +n03859958 +n03861430 +n03861842 +n03862676 +n03863923 +n03864139 +n03864356 +n03864692 +n03865371 +n03865949 +n03868406 +n03871083 +n03871524 +n03871724 +n03873848 +n03874138 +n03874823 +n03875218 +n03880129 +n03880323 +n03880531 +n03883054 +n03883773 +n03883944 +n03884639 +n03885535 +n03885669 +n03886053 +n03886641 +n03887185 +n03888022 +n03889503 +n03889726 +n03891051 +n03892557 +n03894051 +n03894379 +n03896103 +n03896233 +n03896419 +n03896628 +n03896984 +n03897943 +n03898271 +n03898633 +n03899612 +n03899933 +n03901338 +n03903133 +n03903424 +n03904060 +n03904183 +n03904433 +n03905540 +n03906997 +n03907654 +n03908204 +n03909160 +n03909406 +n03915118 +n03915437 +n03916470 +n03916720 +n03917327 +n03918480 +n03920737 +n03923564 +n03923692 +n03924069 +n03926148 +n03926412 +n03926876 +n03927792 +n03928116 +n03929091 +n03929202 +n03929443 +n03930515 +n03932670 +n03936269 +n03938522 +n03939677 +n03940256 +n03941684 +n03943920 +n03945615 +n03947111 +n03947466 +n03948459 +n03951971 +n03953020 +n03953416 +n03955809 +n03956785 +n03956922 +n03957315 +n03957762 +n03958630 +n03958752 +n03959014 +n03959701 +n03961939 +n03962525 +n03962932 +n03963028 +n03965907 +n03966325 +n03966751 +n03966976 +n03967942 +n03968293 +n03971321 +n03972524 +n03973520 +n03973628 +n03975035 +n03979377 +n03979492 +n03980026 +n03981340 +n03982232 +n03982895 +n03984234 +n03984381 +n03985232 +n03986704 +n03988170 +n03989665 +n03990474 +n03991443 +n03992325 +n03992703 +n03993180 +n03993403 +n03994008 +n03994757 +n03995018 +n03995856 +n03996145 +n03996416 +n03997484 +n03999992 +n04000311 +n04001397 +n04001499 +n04001845 +n04004210 +n04004475 +n04005912 +n04007664 +n04010057 +n04010779 +n04010927 +n04011827 +n04012084 +n04013729 +n04014297 +n04015204 +n04016576 +n04016684 +n04018399 +n04018667 +n04019101 +n04019696 +n04020087 +n04020298 +n04020912 +n04021028 +n04021362 +n04021798 +n04022332 +n04022708 +n04023249 +n04024274 +n04026053 +n04026918 +n04027023 +n04027706 +n04028315 +n04029734 +n04030274 +n04036303 +n04037964 +n04038440 +n04038727 +n04039848 +n04042358 +n04042632 +n04042795 +n04042985 +n04043733 +n04044307 +n04044498 +n04045085 +n04045397 +n04046590 +n04046974 +n04047401 +n04049405 +n04050066 +n04051549 +n04051825 +n04052757 +n04056932 +n04057047 +n04057435 +n04057846 +n04057981 +n04058096 +n04058239 +n04059947 +n04060647 +n04060904 +n04061793 +n04061969 +n04062644 +n04063373 +n04063868 +n04064401 +n04065464 +n04065789 +n04067231 +n04067353 +n04067921 +n04068441 +n04068601 +n04069276 +n04069777 +n04070207 +n04070964 +n04071102 +n04071263 +n04071393 +n04072193 +n04072551 +n04073948 +n04075468 +n04075916 +n04076284 +n04077430 +n04077734 +n04078574 +n04079106 +n04079244 +n04079933 +n04080454 +n04080833 +n04081844 +n04083649 +n04086794 +n04087126 +n04087709 +n04088696 +n04088797 +n04089666 +n04089976 +n04090548 +n04091097 +n04093625 +n04095210 +n04096066 +n04097622 +n04097866 +n04099175 +n04099429 +n04100174 +n04101497 +n04101701 +n04102037 +n04102285 +n04102406 +n04102962 +n04104147 +n04104500 +n04105068 +n04105438 +n04105893 +n04107984 +n04108268 +n04110068 +n04110654 +n04110955 +n04111190 +n04111414 +n04111668 +n04113765 +n04114996 +n04115256 +n04115996 +n04116389 +n04118021 +n04121228 +n04122349 +n04122492 +n04122825 +n04123123 +n04123567 +n04123740 +n04125116 +n04125853 +n04126541 +n04126659 +n04126980 +n04127904 +n04128499 +n04128837 +n04131929 +n04134632 +n04136510 +n04137444 +n04137897 +n04138977 +n04139859 +n04140064 +n04140631 +n04141838 +n04143897 +n04146050 +n04147495 +n04148054 +n04149083 +n04151108 +n04151581 +n04151940 +n04152387 +n04154753 +n04156297 +n04156411 +n04157320 +n04158807 +n04158956 +n04160372 +n04160586 +n04161358 +n04161981 +n04164757 +n04164868 +n04166111 +n04167489 +n04169437 +n04170037 +n04171459 +n04171629 +n04171831 +n04174101 +n04174500 +n04176068 +n04176190 +n04176528 +n04177329 +n04177545 +n04180063 +n04180888 +n04181228 +n04181718 +n04182322 +n04183217 +n04183329 +n04184435 +n04184600 +n04185071 +n04186051 +n04186268 +n04186455 +n04186624 +n04186848 +n04187061 +n04187547 +n04187885 +n04189092 +n04190052 +n04190464 +n04190747 +n04190997 +n04191150 +n04191595 +n04191943 +n04192238 +n04192858 +n04194289 +n04196080 +n04197391 +n04198015 +n04198797 +n04199027 +n04201733 +n04202417 +n04205318 +n04206356 +n04207763 +n04210390 +n04211356 +n04211970 +n04215910 +n04216634 +n04216860 +n04216963 +n04217718 +n04217882 +n04219424 +n04221823 +n04222210 +n04222470 +n04222847 +n04225031 +n04225222 +n04225729 +n04226464 +n04226537 +n04227900 +n04229007 +n04229107 +n04229480 +n04230603 +n04230808 +n04231693 +n04232153 +n04233832 +n04234455 +n04235291 +n04235771 +n04236001 +n04236377 +n04236702 +n04238617 +n04241042 +n04241394 +n04242408 +n04243003 +n04243941 +n04244997 +n04245847 +n04246855 +n04247630 +n04247736 +n04248507 +n04249415 +n04250224 +n04250599 +n04253931 +n04255499 +n04256520 +n04260589 +n04261116 +n04262678 +n04263336 +n04263760 +n04264233 +n04264914 +n04266486 +n04267577 +n04269944 +n04270891 +n04271148 +n04272054 +n04272782 +n04273064 +n04273796 +n04275283 +n04275661 +n04275904 +n04278353 +n04279172 +n04279987 +n04280259 +n04280970 +n04283585 +n04283905 +n04284002 +n04285146 +n04285622 +n04285803 +n04286128 +n04288272 +n04288533 +n04288673 +n04289449 +n04291242 +n04291759 +n04292414 +n04292572 +n04293119 +n04293744 +n04294212 +n04294426 +n04295081 +n04295881 +n04299215 +n04300358 +n04301000 +n04301474 +n04303258 +n04304375 +n04305471 +n04306080 +n04306847 +n04307419 +n04307878 +n04308084 +n04308273 +n04308397 +n04308583 +n04308807 +n04309348 +n04309833 +n04310721 +n04311595 +n04312154 +n04312432 +n04313220 +n04314914 +n04315828 +n04315948 +n04317420 +n04318131 +n04318982 +n04319937 +n04320405 +n04322026 +n04322692 +n04322801 +n04323819 +n04326799 +n04326896 +n04328054 +n04328329 +n04328946 +n04329477 +n04330340 +n04330669 +n04330998 +n04331277 +n04332987 +n04333129 +n04338517 +n04339638 +n04340750 +n04340935 +n04341133 +n04341414 +n04341686 +n04346679 +n04347519 +n04348184 +n04348359 +n04349401 +n04350104 +n04350458 +n04354589 +n04356595 +n04358707 +n04358874 +n04359335 +n04359589 +n04360501 +n04360798 +n04361095 +n04361260 +n04362821 +n04363210 +n04363874 +n04364545 +n04364827 +n04364994 +n04365328 +n04365484 +n04365751 +n04368695 +n04370048 +n04371563 +n04373894 +n04375775 +n04377057 +n04378956 +n04379243 +n04379964 +n04380346 +n04381994 +n04382334 +n04382880 +n04383130 +n04383301 +n04386664 +n04387201 +n04387400 +n04388162 +n04388743 +n04389521 +n04390873 +n04391838 +n04392526 +n04393095 +n04394261 +n04395875 +n04397168 +n04397261 +n04397645 +n04398497 +n04398688 +n04398834 +n04399046 +n04400289 +n04401088 +n04402057 +n04402580 +n04402746 +n04402984 +n04403638 +n04404817 +n04404997 +n04405540 +n04405762 +n04407435 +n04407686 +n04409128 +n04409806 +n04410086 +n04410365 +n04410485 +n04411264 +n04411966 +n04413151 +n04413419 +n04415663 +n04416901 +n04417180 +n04417361 +n04417809 +n04419073 +n04421872 +n04422875 +n04427715 +n04428008 +n04431436 +n04431745 +n04434932 +n04435180 +n04436185 +n04436401 +n04436542 +n04437670 +n04437953 +n04438304 +n04438643 +n04440963 +n04441662 +n04444749 +n04445040 +n04445952 +n04446276 +n04447276 +n04447443 +n04448070 +n04448361 +n04450243 +n04450640 +n04450749 +n04451818 +n04452615 +n04452848 +n04453156 +n04453666 +n04453910 +n04454654 +n04455250 +n04455652 +n04456472 +n04457326 +n04458843 +n04459362 +n04459610 +n04460130 +n04462011 +n04463679 +n04464852 +n04467099 +n04467307 +n04468005 +n04469251 +n04470741 +n04471315 +n04471632 +n04472243 +n04472726 +n04473884 +n04474466 +n04475411 +n04475631 +n04477548 +n04478512 +n04478657 +n04480527 +n04481524 +n04487724 +n04488427 +n04489008 +n04489817 +n04490091 +n04491769 +n04493109 +n04494204 +n04495450 +n04497442 +n04497570 +n04498523 +n04499446 +n04499554 +n04500060 +n04501837 +n04502197 +n04502502 +n04502670 +n04502851 +n04504141 +n04504770 +n04505036 +n04506994 +n04507453 +n04508163 +n04508489 +n04508949 +n04509260 +n04509592 +n04511002 +n04514241 +n04516116 +n04516214 +n04516672 +n04518132 +n04519153 +n04520170 +n04520382 +n04521987 +n04524313 +n04527648 +n04529681 +n04530566 +n04531098 +n04531873 +n04533042 +n04533199 +n04533700 +n04534127 +n04534895 +n04536153 +n04538552 +n04539203 +n04540761 +n04541320 +n04543158 +n04544450 +n04546194 +n04546855 +n04547592 +n04549122 +n04549919 +n04551055 +n04552696 +n04553389 +n04554871 +n04555600 +n04555897 +n04556948 +n04557308 +n04557751 +n04558059 +n04558804 +n04559023 +n04559730 +n04562262 +n04563204 +n04565375 +n04566257 +n04567098 +n04568069 +n04568557 +n04569520 +n04569822 +n04570958 +n04571292 +n04571566 +n04571958 +n04572935 +n04574471 +n04574999 +n04576002 +n04576211 +n04576971 +n04577426 +n04577769 +n04578801 +n04579230 +n04580493 +n04581595 +n04582349 +n04583620 +n04585745 +n04585980 +n04586932 +n04587648 +n04588739 +n04589190 +n04589434 +n04591056 +n04591887 +n04592005 +n04592099 +n04594218 +n04594489 +n04595285 +n04595855 +n04596852 +n04597066 +n04597804 +n04598136 +n04598582 +n04599124 +n04600312 +n04600486 +n04600912 +n04603729 +n04603872 +n04605726 +n04606574 +n04608329 +n04608567 +n04609531 +n04609651 +n04610176 +n04610503 +n04610676 +n04611916 +n04613015 +n04615226 +n04615644 +n04950713 +n04951373 +n04958634 +n04959672 +n04960277 +n04961691 +n04963740 +n04965179 +n04965661 +n04967191 +n04968895 +n04970059 +n04970631 +n04970916 +n04972801 +n04973386 +n04976952 +n05238282 +n05241218 +n05242070 +n05244934 +n05266879 +n05399034 +n05447757 +n05449959 +n05453657 +n05467758 +n05586759 +n06254669 +n06262567 +n06263369 +n06263609 +n06263762 +n06266417 +n06266710 +n06267145 +n06271778 +n06272290 +n06272803 +n06274092 +n06275353 +n06276697 +n06277280 +n06281040 +n06359467 +n06359657 +n06418693 +n06591815 +n06592078 +n06595351 +n06613686 +n06793231 +n07556637 +n07556970 +n07557165 +n07557434 +n07560652 +n07561112 +n07562495 +n07563800 +n07564629 +n07564971 +n07565725 +n07565945 +n07566340 +n07566863 +n07567390 +n07567707 +n07568818 +n07569106 +n07569543 +n07570720 +n07572353 +n07572957 +n07573103 +n07573696 +n07574602 +n07575076 +n07575726 +n07575984 +n07576182 +n07576438 +n07576577 +n07577374 +n07579575 +n07580053 +n07580359 +n07580470 +n07581346 +n07581775 +n07582277 +n07582441 +n07582609 +n07583197 +n07584228 +n07584593 +n07585208 +n07587441 +n07587700 +n07588947 +n07590320 +n07591473 +n07592094 +n07592656 +n07593774 +n07595914 +n07596046 +n07596452 +n07596684 +n07597145 +n07597365 +n07598734 +n07599468 +n07599783 +n07599998 +n07600506 +n07601407 +n07605474 +n07605944 +n07606278 +n07606764 +n07607707 +n07609407 +n07609840 +n07611148 +n07611358 +n07611839 +n07611991 +n07612367 +n07612632 +n07612996 +n07613671 +n07614198 +n07614825 +n07615052 +n07615190 +n07615460 +n07615569 +n07615671 +n07616590 +n07617188 +n07619004 +n07623136 +n07624466 +n07627931 +n07628068 +n07641928 +n07642471 +n07642933 +n07643306 +n07643474 +n07643764 +n07643981 +n07644244 +n07663899 +n07678729 +n07679356 +n07680517 +n07680932 +n07681926 +n07682316 +n07682624 +n07683786 +n07684600 +n07685730 +n07686873 +n07687211 +n07687469 +n07687789 +n07689003 +n07690273 +n07690892 +n07692405 +n07692614 +n07693889 +n07693972 +n07694403 +n07695878 +n07695965 +n07697100 +n07704054 +n07705931 +n07707451 +n07708512 +n07708798 +n07709333 +n07710007 +n07710283 +n07710616 +n07710952 +n07712063 +n07712382 +n07712748 +n07712856 +n07713395 +n07713895 +n07714078 +n07714802 +n07714895 +n07715561 +n07715721 +n07716034 +n07717070 +n07717858 +n07718671 +n07719437 +n07719839 +n07720442 +n07720615 +n07721325 +n07721456 +n07721678 +n07722217 +n07722763 +n07723330 +n07723559 +n07723753 +n07724943 +n07725376 +n07725531 +n07726796 +n07727578 +n07727868 +n07728804 +n07729000 +n07729485 +n07730406 +n07730855 +n07731122 +n07731587 +n07731952 +n07732302 +n07732747 +n07734017 +n07734292 +n07735052 +n07735803 +n07737081 +n07739125 +n07739506 +n07740220 +n07740954 +n07741461 +n07742012 +n07742704 +n07744246 +n07747055 +n07747811 +n07747951 +n07748753 +n07748912 +n07749095 +n07749192 +n07749312 +n07749731 +n07750586 +n07751451 +n07752377 +n07752664 +n07753743 +n07755089 +n07755411 +n07755707 +n07756096 +n07757132 +n07757312 +n07757602 +n07757990 +n07758680 +n07758950 +n07759424 +n07759691 +n07759816 +n07760501 +n07761141 +n07761309 +n07761611 +n07761777 +n07761954 +n07767344 +n07767847 +n07770571 +n07771212 +n07800091 +n07800740 +n07801508 +n07802152 +n07802417 +n07803093 +n07803545 +n07804323 +n07805254 +n07805594 +n07805731 +n07806221 +n07806633 +n07807317 +n07807710 +n07807922 +n07809096 +n07809368 +n07810907 +n07811416 +n07812184 +n07814203 +n07815588 +n07818277 +n07819480 +n07820497 +n07820814 +n07823951 +n07824702 +n07824988 +n07825717 +n07828987 +n07829412 +n07830593 +n07832902 +n07834507 +n07836731 +n07837002 +n07837362 +n07838233 +n07841495 +n07841639 +n07841907 +n07842753 +n07842972 +n07843464 +n07843775 +n07844042 +n07844604 +n07846143 +n07847198 +n07848338 +n07848771 +n07849336 +n07850083 +n07850329 +n07851298 +n07852045 +n07852919 +n07854813 +n07856270 +n07857959 +n07858595 +n07859284 +n07859583 +n07860805 +n07861158 +n07861813 +n07863374 +n07864638 +n07865105 +n07867421 +n07867883 +n07869391 +n07869775 +n07870313 +n07871436 +n07873464 +n07874063 +n07874159 +n07874259 +n07874343 +n07874441 +n07874780 +n07875693 +n07875835 +n07876281 +n07880751 +n07881117 +n07881205 +n07881404 +n07881800 +n07882497 +n07882886 +n07883031 +n07883251 +n07883384 +n07884567 +n07886572 +n07886849 +n07887634 +n07888465 +n07888909 +n07889510 +n07890352 +n07890750 +n07891726 +n07892813 +n07893528 +n07893891 +n07894102 +n07894298 +n07894965 +n07895237 +n07895435 +n07895595 +n07895710 +n07895839 +n07896287 +n07897200 +n07897865 +n07898117 +n07898333 +n07898745 +n07899108 +n07900406 +n07900616 +n07901587 +n07903208 +n07904395 +n07905038 +n07906284 +n07906877 +n07907161 +n07907548 +n07907943 +n07909129 +n07909811 +n07911371 +n07911677 +n07912211 +n07913393 +n07914413 +n07915618 +n07916041 +n07917618 +n07918028 +n07920222 +n07921455 +n07921948 +n07923748 +n07924033 +n07924560 +n07924834 +n07925966 +n07926920 +n07927197 +n07927931 +n07929519 +n07930554 +n07931001 +n07931096 +n07932614 +n07932841 +n07933274 +n07933891 +n07934032 +n07934530 +n07935152 +n07935504 +n07936263 +n07936745 +n07938149 +n07951464 +n08554440 +n08558963 +n08596076 +n08598301 +n08616050 +n08640531 +n08659446 +n09191635 +n09206896 +n09206985 +n09210862 +n09213434 +n09213565 +n09214060 +n09214916 +n09215437 +n09217230 +n09230041 +n09233446 +n09238926 +n09255070 +n09259025 +n09259219 +n09262690 +n09265620 +n09269882 +n09270735 +n09287968 +n09289331 +n09289596 +n09290444 +n09295946 +n09300306 +n09302616 +n09303008 +n09303528 +n09304750 +n09305031 +n09308572 +n09309292 +n09315159 +n09326662 +n09335693 +n09335809 +n09336555 +n09337253 +n09344198 +n09352849 +n09359803 +n09362945 +n09366017 +n09366317 +n09375606 +n09376526 +n09381242 +n09393605 +n09396465 +n09398677 +n09405787 +n09406793 +n09409512 +n09409752 +n09410224 +n09416076 +n09421799 +n09428628 +n09432990 +n09433442 +n09437454 +n09439213 +n09443641 +n09453008 +n09458269 +n09472413 +n09474010 +n09505153 +n09606009 +n09606527 +n09608709 +n09610405 +n09613191 +n09615336 +n09616922 +n09619168 +n09619452 +n09620078 +n09620794 +n09622049 +n09622302 +n09624168 +n09624559 +n09625401 +n09626238 +n09627906 +n09629246 +n09629752 +n09631129 +n09632274 +n09632518 +n09633969 +n09636339 +n09638875 +n09639919 +n09641002 +n09644152 +n09648743 +n09651123 +n09665545 +n09669631 +n09670280 +n09676884 +n09679925 +n09690208 +n09694771 +n09696585 +n09697401 +n09700964 +n09701148 +n09701833 +n09705124 +n09708750 +n09710164 +n09716047 +n09718217 +n09722658 +n09724785 +n09725229 +n09725772 +n09726621 +n09727440 +n09727826 +n09730204 +n09731436 +n09731571 +n09735258 +n09738400 +n09744679 +n09754217 +n09758173 +n09758885 +n09761068 +n09763784 +n09764201 +n09764598 +n09765278 +n09767197 +n09769076 +n09770179 +n09771435 +n09772746 +n09773962 +n09774783 +n09790482 +n09792555 +n09795124 +n09795334 +n09800964 +n09802445 +n09802641 +n09805151 +n09805475 +n09809538 +n09809749 +n09810166 +n09811712 +n09814660 +n09815790 +n09816771 +n09818022 +n09820263 +n09821831 +n09823502 +n09824135 +n09824609 +n09826204 +n09830194 +n09831962 +n09834699 +n09836160 +n09840217 +n09841188 +n09841515 +n09841696 +n09842047 +n09848489 +n09851575 +n09853645 +n09853881 +n09854915 +n09857007 +n09861946 +n09865398 +n09868270 +n09871681 +n09877951 +n09889691 +n09892693 +n09894654 +n09895222 +n09895701 +n09902353 +n09903153 +n09910374 +n09917593 +n09918248 +n09923418 +n09923673 +n09924996 +n09927089 +n09927451 +n09928136 +n09928451 +n09929298 +n09930257 +n09930876 +n09931640 +n09933098 +n09935434 +n09936892 +n09937056 +n09941964 +n09942970 +n09943239 +n09943811 +n09944160 +n09945319 +n09950457 +n09951070 +n09951274 +n09960688 +n09962966 +n09964411 +n09968845 +n09974648 +n09976728 +n09979321 +n09983572 +n09989502 +n09990415 +n09991867 +n09992538 +n09992837 +n09993252 +n09994673 +n09996481 +n09997622 +n10001217 +n10006748 +n10007684 +n10009484 +n10009671 +n10015215 +n10015897 +n10017422 +n10018861 +n10020890 +n10024362 +n10029068 +n10034201 +n10034614 +n10035952 +n10036266 +n10036929 +n10037385 +n10040945 +n10041887 +n10042690 +n10043643 +n10044879 +n10047459 +n10048367 +n10048836 +n10052694 +n10053808 +n10054657 +n10055730 +n10055847 +n10060175 +n10067968 +n10070711 +n10077593 +n10078131 +n10078806 +n10079399 +n10079893 +n10080869 +n10083823 +n10084043 +n10084295 +n10086383 +n10091651 +n10092488 +n10093475 +n10094584 +n10095869 +n10098710 +n10098862 +n10099375 +n10101634 +n10102800 +n10105085 +n10107303 +n10109662 +n10111903 +n10112129 +n10118844 +n10126177 +n10126424 +n10126708 +n10127689 +n10129825 +n10134396 +n10134982 +n10136959 +n10142747 +n10142946 +n10143172 +n10143725 +n10145340 +n10145774 +n10148305 +n10150071 +n10150940 +n10151570 +n10153594 +n10154186 +n10154601 +n10155849 +n10162194 +n10164233 +n10165448 +n10168183 +n10168584 +n10171567 +n10182190 +n10185793 +n10186774 +n10187130 +n10195593 +n10200781 +n10202624 +n10205457 +n10206173 +n10207169 +n10210137 +n10215623 +n10216106 +n10224578 +n10225219 +n10228278 +n10235385 +n10237069 +n10241300 +n10243664 +n10245639 +n10249270 +n10249459 +n10249950 +n10257221 +n10259348 +n10263411 +n10266328 +n10266848 +n10271677 +n10273064 +n10274815 +n10276045 +n10282672 +n10284064 +n10284965 +n10296444 +n10299250 +n10299700 +n10305635 +n10305802 +n10306004 +n10308732 +n10312287 +n10314054 +n10315561 +n10316360 +n10317007 +n10317500 +n10320863 +n10321340 +n10322238 +n10323999 +n10324560 +n10328437 +n10332385 +n10335246 +n10335931 +n10340312 +n10341573 +n10343554 +n10345100 +n10353016 +n10353355 +n10355142 +n10355449 +n10355688 +n10356450 +n10357613 +n10360747 +n10366966 +n10369528 +n10370381 +n10376523 +n10377021 +n10379376 +n10380672 +n10383816 +n10386984 +n10387196 +n10387324 +n10393909 +n10396106 +n10399130 +n10400998 +n10402824 +n10403876 +n10405694 +n10407954 +n10409752 +n10411551 +n10415037 +n10417551 +n10418101 +n10419047 +n10420031 +n10421016 +n10426454 +n10427764 +n10428004 +n10433737 +n10435716 +n10435988 +n10438172 +n10439851 +n10444194 +n10450303 +n10462860 +n10464052 +n10466918 +n10467179 +n10470779 +n10474064 +n10474645 +n10478960 +n10481268 +n10482054 +n10482921 +n10484858 +n10488309 +n10495421 +n10499355 +n10499857 +n10506544 +n10508710 +n10512372 +n10512708 +n10519494 +n10521100 +n10521662 +n10522035 +n10522324 +n10522759 +n10523341 +n10525134 +n10525436 +n10525617 +n10527334 +n10529231 +n10541833 +n10542888 +n10543161 +n10544232 +n10544748 +n10546633 +n10548537 +n10548681 +n10554846 +n10556518 +n10557854 +n10559288 +n10560637 +n10568200 +n10570019 +n10575787 +n10576962 +n10577284 +n10580535 +n10582746 +n10583387 +n10594147 +n10595164 +n10595647 +n10599806 +n10602985 +n10604634 +n10605253 +n10610465 +n10612210 +n10614629 +n10617193 +n10618685 +n10618848 +n10619642 +n10620758 +n10622053 +n10624074 +n10624310 +n10625860 +n10628644 +n10630188 +n10632576 +n10633450 +n10648237 +n10648696 +n10654932 +n10657835 +n10661002 +n10661563 +n10665698 +n10669991 +n10674130 +n10676018 +n10679174 +n10682953 +n10686073 +n10692883 +n10693824 +n10694258 +n10698368 +n10700201 +n10700640 +n10701180 +n10703336 +n10703692 +n10705615 +n10707233 +n10708454 +n10709529 +n10713686 +n10720453 +n10721321 +n10722575 +n10722965 +n10726786 +n10735298 +n10740868 +n10741152 +n10742997 +n10744164 +n10747119 +n10751265 +n10752480 +n10759151 +n10759982 +n10763383 +n10763620 +n10765679 +n10766260 +n10768903 +n10779610 +n10780632 +n10782791 +n10782940 +n10787470 +n10791221 +n10792335 +n10793570 +n10794014 +n11531193 +n11537327 +n11542640 +n11545524 +n11545714 +n11547562 +n11547855 +n11552386 +n11553240 +n11596108 +n11598686 +n11600372 +n11601177 +n11601918 +n11608250 +n11609475 +n11609684 +n11612923 +n11614250 +n11618861 +n11620673 +n11621029 +n11623105 +n11624531 +n11627168 +n11628456 +n11630017 +n11630489 +n11643835 +n11645914 +n11647306 +n11649878 +n11650558 +n11650759 +n11661372 +n11665372 +n11666854 +n11669921 +n11672400 +n11674332 +n11676500 +n11684264 +n11689483 +n11693981 +n11697560 +n11700864 +n11703669 +n11708658 +n11709674 +n11713164 +n11720353 +n11722982 +n11723770 +n11725015 +n11725623 +n11727091 +n11729478 +n11733054 +n11736694 +n11741350 +n11745817 +n11747468 +n11748002 +n11751765 +n11752578 +n11756092 +n11756669 +n11759224 +n11763625 +n11767354 +n11769621 +n11771539 +n11774513 +n11775340 +n11779300 +n11782036 +n11783920 +n11785668 +n11789438 +n11789962 +n11790788 +n11793779 +n11794519 +n11796005 +n11801392 +n11805956 +n11807108 +n11807979 +n11808721 +n11811473 +n11815491 +n11817914 +n11820965 +n11823043 +n11830714 +n11830906 +n11832214 +n11836722 +n11839568 +n11845557 +n11851578 +n11855274 +n11857696 +n11862835 +n11865071 +n11866248 +n11868814 +n11869351 +n11869689 +n11872146 +n11875691 +n11875938 +n11877473 +n11878283 +n11887119 +n11890022 +n11892637 +n11894327 +n11898639 +n11900569 +n11902709 +n11915214 +n11915658 +n11915899 +n11916467 +n11918286 +n11919447 +n11920498 +n11924445 +n11928352 +n11928858 +n11931918 +n11932745 +n11939699 +n11940006 +n11943407 +n11944196 +n11945367 +n11946727 +n11947251 +n11948264 +n11950345 +n11951511 +n11952346 +n11953884 +n11954484 +n11956850 +n11965627 +n11967744 +n11970101 +n11971248 +n11971783 +n11972759 +n11973341 +n11976170 +n11977303 +n11978233 +n11982115 +n11985053 +n11985739 +n11988893 +n11991263 +n11997032 +n11997969 +n12006766 +n12008252 +n12008749 +n12010628 +n12013511 +n12015959 +n12018760 +n12020507 +n12024176 +n12030654 +n12034141 +n12036067 +n12036939 +n12041446 +n12043444 +n12045860 +n12050959 +n12053405 +n12056217 +n12057447 +n12062468 +n12065316 +n12065777 +n12075151 +n12076577 +n12080395 +n12083591 +n12086012 +n12086539 +n12087961 +n12090890 +n12092262 +n12094244 +n12095020 +n12096395 +n12101870 +n12102133 +n12105125 +n12107970 +n12108432 +n12109827 +n12110778 +n12112008 +n12112918 +n12113657 +n12117017 +n12119099 +n12119238 +n12121033 +n12124627 +n12126360 +n12131550 +n12135898 +n12136720 +n12137120 +n12137569 +n12139575 +n12141495 +n12142085 +n12143676 +n12144313 +n12146311 +n12147226 +n12152532 +n12153580 +n12154773 +n12155583 +n12156819 +n12157056 +n12157769 +n12158031 +n12158798 +n12159055 +n12159555 +n12160490 +n12161285 +n12163035 +n12164363 +n12166424 +n12168565 +n12170585 +n12173664 +n12174311 +n12174926 +n12182049 +n12187663 +n12188289 +n12195391 +n12196129 +n12199266 +n12201580 +n12202936 +n12205694 +n12214789 +n12215579 +n12217453 +n12221191 +n12224978 +n12225349 +n12226932 +n12231192 +n12236546 +n12237486 +n12244153 +n12245695 +n12246232 +n12252168 +n12252866 +n12253229 +n12256112 +n12257570 +n12260799 +n12262553 +n12265394 +n12266217 +n12266796 +n12268246 +n12269241 +n12269652 +n12271643 +n12274630 +n12275489 +n12281241 +n12284262 +n12286826 +n12287642 +n12288823 +n12290748 +n12293723 +n12296432 +n12300840 +n12302071 +n12303462 +n12305475 +n12306717 +n12307756 +n12310349 +n12316444 +n12318378 +n12320010 +n12322501 +n12328398 +n12330469 +n12334293 +n12334891 +n12335483 +n12335664 +n12335800 +n12340383 +n12341542 +n12342299 +n12343480 +n12344283 +n12346578 +n12350758 +n12352287 +n12355760 +n12360108 +n12360684 +n12364604 +n12367611 +n12374418 +n12377198 +n12381511 +n12385429 +n12387633 +n12387839 +n12392070 +n12396924 +n12399132 +n12401335 +n12401684 +n12405714 +n12409231 +n12411461 +n12412355 +n12412606 +n12416423 +n12419037 +n12420535 +n12421467 +n12421683 +n12425281 +n12430198 +n12431434 +n12437513 +n12437769 +n12441958 +n12446200 +n12446519 +n12449296 +n12450344 +n12451915 +n12454159 +n12459629 +n12460697 +n12461466 +n12462032 +n12463743 +n12464476 +n12466727 +n12470092 +n12474167 +n12475035 +n12476510 +n12480895 +n12491826 +n12495146 +n12499163 +n12506181 +n12508309 +n12509476 +n12511856 +n12516584 +n12522188 +n12524188 +n12526516 +n12527738 +n12539074 +n12539306 +n12546183 +n12548280 +n12550210 +n12554526 +n12556656 +n12560282 +n12560775 +n12562577 +n12572546 +n12573256 +n12575322 +n12582231 +n12582665 +n12582846 +n12583126 +n12583401 +n12584191 +n12586298 +n12590232 +n12594989 +n12595964 +n12602262 +n12602980 +n12612170 +n12614477 +n12615710 +n12620196 +n12622875 +n12624381 +n12625383 +n12631331 +n12633638 +n12634211 +n12634429 +n12635744 +n12636885 +n12638218 +n12638556 +n12639736 +n12640607 +n12641413 +n12641931 +n12642200 +n12643473 +n12644902 +n12645174 +n12647376 +n12649065 +n12650556 +n12651821 +n12653218 +n12655869 +n12658118 +n12658846 +n12659356 +n12660601 +n12662772 +n12663804 +n12665048 +n12667406 +n12667964 +n12674120 +n12674685 +n12682411 +n12683407 +n12685431 +n12685831 +n12688716 +n12690653 +n12695144 +n12698435 +n12705013 +n12707781 +n12708293 +n12709901 +n12711596 +n12713063 +n12714755 +n12715914 +n12717072 +n12719684 +n12724942 +n12725521 +n12727301 +n12731401 +n12732491 +n12732756 +n12733647 +n12741222 +n12742741 +n12743823 +n12746884 +n12749049 +n12752205 +n12755225 +n12756457 +n12762896 +n12768369 +n12771192 +n12772753 +n12777436 +n12778605 +n12779603 +n12785724 +n12791064 +n12793015 +n12794985 +n12798284 +n12800586 +n12801520 +n12805146 +n12806732 +n12810595 +n12812235 +n12814643 +n12817464 +n12822769 +n12823717 +n12823859 +n12832315 +n12833985 +n12834798 +n12836212 +n12836862 +n12839979 +n12840749 +n12842302 +n12842887 +n12844939 +n12849061 +n12853080 +n12854048 +n12858150 +n12866968 +n12869478 +n12870535 +n12871272 +n12877244 +n12878169 +n12879963 +n12882779 +n12884260 +n12890265 +n12893463 +n12903367 +n12904938 +n12908645 +n12909421 +n12912670 +n12917901 +n12922763 +n12926480 +n12928071 +n12929403 +n12930778 +n12931906 +n12934036 +n12934479 +n12939104 +n12941536 +n12942395 +n12943443 +n12946849 +n12950126 +n12952165 +n12953206 +n12956170 +n12957608 +n12960378 +n12960863 +n12965626 +n12968136 +n12969131 +n12970193 +n12971400 +n12973791 +n12974987 +n12976198 +n12980840 +n12982468 +n12983961 +n12985773 +n12987056 +n12988158 +n12992868 +n12997654 +n12997919 +n13000891 +n13001041 +n13001206 +n13001366 +n13001529 +n13002750 +n13002925 +n13003061 +n13003254 +n13003522 +n13003712 +n13004423 +n13004640 +n13004826 +n13004992 +n13005329 +n13005984 +n13006171 +n13006631 +n13006894 +n13007417 +n13007629 +n13008157 +n13008315 +n13008485 +n13008689 +n13008839 +n13009085 +n13009244 +n13009429 +n13009656 +n13010694 +n13010951 +n13011221 +n13012253 +n13012469 +n13012973 +n13013534 +n13013764 +n13013965 +n13014097 +n13014265 +n13014409 +n13014581 +n13014741 +n13014879 +n13017102 +n13017240 +n13017439 +n13017610 +n13017789 +n13017979 +n13018088 +n13018232 +n13018407 +n13019496 +n13019643 +n13019835 +n13020191 +n13020481 +n13020964 +n13021166 +n13021332 +n13021543 +n13021689 +n13021867 +n13022210 +n13022709 +n13024012 +n13024500 +n13025647 +n13028611 +n13032115 +n13032923 +n13035241 +n13035389 +n13035707 +n13037585 +n13037805 +n13038068 +n13038376 +n13038744 +n13039349 +n13040629 +n13040796 +n13041312 +n13042982 +n13043926 +n13045210 +n13045975 +n13046130 +n13049953 +n13055423 +n13055577 +n13055792 +n13055949 +n13056135 +n13056349 +n13056607 +n13056799 +n13057054 +n13057242 +n13057422 +n13057639 +n13058037 +n13058272 +n13058608 +n13059298 +n13059657 +n13060017 +n13060190 +n13063269 +n13066129 +n13067191 +n13068917 +n13070308 +n13070875 +n13071371 +n13071553 +n13071815 +n13072031 +n13072209 +n13072350 +n13072528 +n13072706 +n13072863 +n13073055 +n13073703 +n13074619 +n13074814 +n13075020 +n13075272 +n13075441 +n13075684 +n13075847 +n13076041 +n13076405 +n13076643 +n13076831 +n13077033 +n13077295 +n13079419 +n13083023 +n13084184 +n13085113 +n13091620 +n13091774 +n13100156 +n13100677 +n13104059 +n13108131 +n13108662 +n13108841 +n13109733 +n13110915 +n13111174 +n13111881 +n13118707 +n13119870 +n13120211 +n13121104 +n13122364 +n13123431 +n13125117 +n13130161 +n13130726 +n13132034 +n13132338 +n13132486 +n13132940 +n13134302 +n13134947 +n13135832 +n13136316 +n13136556 +n13137409 +n13137672 +n13138308 +n13138658 +n13138842 +n13139055 +n13139647 +n13141141 +n13145444 +n13149296 +n13150894 +n13154841 +n13156986 +n13157137 +n13160831 +n13163991 +n13172923 +n13174670 +n13177529 +n13180534 +n13186388 +n13188096 +n13188268 +n13192625 +n13193642 +n13194572 +n13195761 +n13199970 +n13201969 +n13206817 +n13207736 +n13208705 +n13211790 +n13219422 +n13221529 +n13224673 +n13230662 +n13231678 +n13231919 +n13232106 +n13232363 +n13232779 +n13233727 +n13238375 +n13238988 +n13252672 +n13862780 +n13863186 +n13863473 +n13863771 +n13864153 +n13864965 +n13865298 +n13865483 +n13866144 +n13866827 +n13867492 +n13868248 +n13868371 +n13872592 +n13873502 +n13875392 +n13875571 +n13878306 +n13879320 +n13883603 +n13888491 +n13893786 +n13894434 +n13896100 +n13897996 +n13900287 +n13903079 +n13905121 +n13905275 +n13905792 +n13912260 +n13915999 +n14633206 +n14696793 +n14844693 +n14853210 +n14899328 +n14900184 +n14974264 +n14977504 +n14992287 +n15062057 +n15067877 +n15089258 +n15089472 +n15089645 +n15089803 +n15090742 +n15092409 +n15092751 \ No newline at end of file diff --git a/workloads/realworld/pinned/darknet/cfg/imagenet.shortnames.list b/workloads/realworld/pinned/darknet/cfg/imagenet.shortnames.list new file mode 100644 index 0000000000000000000000000000000000000000..e7a18d44b543086958eaf60e6dc0b7deb0df9400 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/imagenet.shortnames.list @@ -0,0 +1,21842 @@ +kit fox +English setter +Siberian husky +Australian terrier +English springer +grey whale +lesser panda +Egyptian cat +ibex +Persian cat +cougar +gazelle +porcupine +sea lion +malamute +badger +Great Dane +Walker hound +Welsh springer spaniel +whippet +Scottish deerhound +killer whale +mink +African elephant +Weimaraner +soft-coated wheaten terrier +Dandie Dinmont +red wolf +Old English sheepdog +jaguar +otterhound +bloodhound +Airedale +hyena +meerkat +giant schnauzer +titi +three-toed sloth +sorrel +black-footed ferret +dalmatian +black-and-tan coonhound +papillon +skunk +Staffordshire bullterrier +Mexican hairless +Bouvier des Flandres +weasel +miniature poodle +Cardigan +malinois +bighorn +fox squirrel +colobus +tiger cat +Lhasa +impala +coyote +Yorkshire terrier +Newfoundland +brown bear +red fox +Norwegian elkhound +Rottweiler +hartebeest +Saluki +grey fox +schipperke +Pekinese +Brabancon griffon +West Highland white terrier +Sealyham terrier +guenon +mongoose +indri +tiger +Irish wolfhound +wild boar +EntleBucher +zebra +ram +French bulldog +orangutan +basenji +leopard +Bernese mountain dog +Maltese dog +Norfolk terrier +toy terrier +vizsla +cairn +squirrel monkey +groenendael +clumber +Siamese cat +chimpanzee +komondor +Afghan hound +Japanese spaniel +proboscis monkey +guinea pig +white wolf +ice bear +gorilla +borzoi +toy poodle +Kerry blue terrier +ox +Scotch terrier +Tibetan mastiff +spider monkey +Doberman +Boston bull +Greater Swiss Mountain dog +Appenzeller +Shih-Tzu +Irish water spaniel +Pomeranian +Bedlington terrier +warthog +Arabian camel +siamang +miniature schnauzer +collie +golden retriever +Irish terrier +affenpinscher +Border collie +hare +boxer +silky terrier +beagle +Leonberg +German short-haired pointer +patas +dhole +baboon +macaque +Chesapeake Bay retriever +bull mastiff +kuvasz +capuchin +pug +curly-coated retriever +Norwich terrier +flat-coated retriever +hog +keeshond +Eskimo dog +Brittany spaniel +standard poodle +Lakeland terrier +snow leopard +Gordon setter +dingo +standard schnauzer +hamster +Tibetan terrier +Arctic fox +wire-haired fox terrier +basset +water buffalo +American black bear +Angora +bison +howler monkey +hippopotamus +chow +giant panda +American Staffordshire terrier +Shetland sheepdog +Great Pyrenees +Chihuahua +tabby +marmoset +Labrador retriever +Saint Bernard +armadillo +Samoyed +bluetick +redbone +polecat +marmot +kelpie +gibbon +llama +miniature pinscher +wood rabbit +Italian greyhound +lion +cocker spaniel +Irish setter +dugong +Indian elephant +beaver +Sussex spaniel +Pembroke +Blenheim spaniel +Madagascar cat +Rhodesian ridgeback +lynx +African hunting dog +langur +Ibizan hound +timber wolf +cheetah +English foxhound +briard +sloth bear +Border terrier +German shepherd +otter +koala +tusker +echidna +wallaby +platypus +wombat +revolver +umbrella +schooner +soccer ball +accordion +ant +starfish +chambered nautilus +grand piano +laptop +strawberry +airliner +warplane +airship +balloon +space shuttle +fireboat +gondola +speedboat +lifeboat +canoe +yawl +catamaran +trimaran +container ship +liner +pirate +aircraft carrier +submarine +wreck +half track +tank +missile +bobsled +dogsled +bicycle-built-for-two +mountain bike +freight car +passenger car +barrow +shopping cart +motor scooter +forklift +electric locomotive +steam locomotive +amphibian +ambulance +beach wagon +cab +convertible +jeep +limousine +minivan +Model T +racer +sports car +go-kart +golfcart +moped +snowplow +fire engine +garbage truck +pickup +tow truck +trailer truck +moving van +police van +recreational vehicle +streetcar +snowmobile +tractor +mobile home +tricycle +unicycle +horse cart +jinrikisha +oxcart +bassinet +cradle +crib +four-poster +bookcase +china cabinet +medicine chest +chiffonier +table lamp +file +park bench +barber chair +throne +folding chair +rocking chair +studio couch +toilet seat +desk +pool table +dining table +entertainment center +wardrobe +Granny Smith +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +acorn +hip +ear +rapeseed +corn +buckeye +organ +upright +chime +drum +gong +maraca +marimba +steel drum +banjo +cello +violin +harp +acoustic guitar +electric guitar +cornet +French horn +trombone +harmonica +ocarina +panpipe +bassoon +oboe +sax +flute +daisy +yellow lady's slipper +cliff +valley +alp +volcano +promontory +sandbar +coral reef +lakeside +seashore +geyser +hatchet +cleaver +letter opener +plane +power drill +lawn mower +hammer +corkscrew +can opener +plunger +screwdriver +shovel +plow +chain saw +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +white stork +black stork +spoonbill +flamingo +American egret +little blue heron +bittern +crane +limpkin +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +European gallinule +pelican +king penguin +albatross +great white shark +tiger shark +hammerhead +electric ray +stingray +barracouta +coho +tench +goldfish +eel +rock beauty +anemone fish +lionfish +puffer +sturgeon +gar +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +triceratops +African crocodile +American alligator +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +whistle +wing +paintbrush +hand blower +oxygen mask +snorkel +loudspeaker +microphone +screen +mouse +electric fan +oil filter +strainer +space heater +stove +guillotine +barometer +rule +odometer +scale +analog clock +digital clock +wall clock +hourglass +sundial +parking meter +stopwatch +digital watch +stethoscope +syringe +magnetic compass +binoculars +projector +sunglasses +loupe +radio telescope +bow +cannon +assault rifle +rifle +projectile +computer keyboard +typewriter keyboard +crane +lighter +abacus +cash machine +slide rule +desktop computer +hand-held computer +notebook +web site +harvester +thresher +printer +slot +vending machine +sewing machine +joystick +switch +hook +car wheel +paddlewheel +pinwheel +potter's wheel +gas pump +carousel +swing +reel +radiator +puck +hard disc +sunglass +pick +car mirror +solar dish +remote control +disk brake +buckle +hair slide +knot +combination lock +padlock +nail +safety pin +screw +muzzle +seat belt +ski +candle +jack-o'-lantern +spotlight +torch +neck brace +pier +tripod +maypole +mousetrap +spider web +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +isopod +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +sea urchin +sea cucumber +iron +espresso maker +microwave +Dutch oven +rotisserie +toaster +waffle iron +vacuum +dishwasher +refrigerator +washer +Crock Pot +frying pan +wok +caldron +coffeepot +teapot +spatula +altar +triumphal arch +patio +steel arch bridge +suspension bridge +viaduct +barn +greenhouse +palace +monastery +library +apiary +boathouse +church +mosque +stupa +planetarium +restaurant +cinema +home theater +lumbermill +coil +obelisk +totem pole +castle +prison +grocery store +bakery +barbershop +bookshop +butcher shop +confectionery +shoe shop +tobacco shop +toyshop +fountain +cliff dwelling +yurt +dock +brass +megalith +bannister +breakwater +dam +chainlink fence +picket fence +worm fence +stone wall +grille +sliding door +turnstile +mountain tent +scoreboard +honeycomb +plate rack +pedestal +beacon +mashed potato +bell pepper +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +cardoon +mushroom +shower curtain +jean +carton +handkerchief +sandal +ashcan +safe +plate +necklace +croquet ball +fur coat +thimble +pajama +running shoe +cocktail shaker +chest +manhole cover +modem +tub +tray +balance beam +bagel +prayer rug +kimono +hot pot +whiskey jug +knee pad +book jacket +spindle +ski mask +beer bottle +crash helmet +bottlecap +tile roof +mask +maillot +Petri dish +football helmet +bathing cap +teddy +holster +pop bottle +photocopier +vestment +crossword puzzle +golf ball +trifle +suit +water tower +feather boa +cloak +red wine +drumstick +shield +Christmas stocking +hoopskirt +menu +stage +bonnet +meat loaf +baseball +face powder +scabbard +sunscreen +beer glass +hen-of-the-woods +guacamole +lampshade +wool +hay +bow tie +mailbag +water jug +bucket +dishrag +soup bowl +eggnog +mortar +trench coat +paddle +chain +swab +mixing bowl +potpie +wine bottle +shoji +bulletproof vest +drilling platform +binder +cardigan +sweatshirt +pot +birdhouse +hamper +ping-pong ball +pencil box +pay-phone +consomme +apron +punching bag +backpack +groom +bearskin +pencil sharpener +broom +mosquito net +abaya +mortarboard +poncho +crutch +Polaroid camera +space bar +cup +racket +traffic light +quill +radio +dough +cuirass +military uniform +lipstick +shower cap +monitor +oscilloscope +mitten +brassiere +French loaf +vase +milk can +rugby ball +paper towel +earthstar +envelope +miniskirt +cowboy hat +trolleybus +perfume +bathtub +hotdog +coral fungus +bullet train +pillow +toilet tissue +cassette +carpenter's kit +ladle +stinkhorn +lotion +hair spray +academic gown +dome +crate +wig +burrito +pill bottle +chain mail +theater curtain +window shade +barrel +washbasin +ballpoint +basketball +bath towel +cowboy boot +gown +window screen +agaric +cellular telephone +nipple +barbell +mailbox +lab coat +fire screen +minibus +packet +maze +pole +horizontal bar +sombrero +pickelhaube +rain barrel +wallet +cassette player +comic book +piggy bank +street sign +bell cote +fountain pen +Windsor tie +volleyball +overskirt +sarong +purse +bolo tie +bib +parachute +sleeping bag +television +swimming trunks +measuring cup +espresso +pizza +breastplate +shopping basket +wooden spoon +saltshaker +chocolate sauce +ballplayer +goblet +gyromitra +stretcher +water bottle +dial telephone +soap dispenser +jersey +school bus +jigsaw puzzle +plastic bag +reflex camera +diaper +Band Aid +ice lolly +velvet +tennis ball +gasmask +doormat +Loafer +ice cream +pretzel +quilt +maillot +tape player +clog +iPod +bolete +scuba diver +pitcher +matchstick +bikini +sock +CD player +lens cap +thatch +vault +beaker +bubble +cheeseburger +parallel bars +flagpole +coffee mug +rubber eraser +stole +carbonara +dumbbell +singles +Virginia deer +eastern grey squirrel +gelding +pylon +table-tennis table +peacock +Segway +surfing +tamandua +knocker +steering wheel +motorcycling +coati +sitar +range +backhoe +agaric +dashboard +water polo +concrete mixer +treadmill +golf bag +skateboarding +royal tennis +tartan +four-wheel drive +sport utility +sedan +print +luggage rack +softball +windmill +ben +red admiral +jalousie +towel rail +truss +strand +ice hockey +sconce +wind turbine +plush +stained-glass window +ballpark +thoroughbred +love seat +red-spotted purple +miller +Adelie +freight liner +clock tower +acrobatics +shaving brush +ewe +ottoman +African violet +bicycle wheel +cork +windmill +satin +comma +coffee mill +baggage +wasp's nest +batting glove +Ferris wheel +push-bike +porthole +football stadium +gas tank +barbecue +handlebar +hula-hoop +fairground +rapier +garter stitch +exercise bike +control tower +carryall +minute hand +cog +riverbank +water nymph +common dandelion +android +hairbrush +redberry +fret +display window +pepper mill +litterbin +drapery +ducking +fly-fishing +broad jump +sprinkler +water-skiing +chicory +sail +volleyball +rugby +Texas bluebonnet +computer monitor +tortoiseshell +airplane propeller +solar array +figure skating +air conditioner +purple loosestrife +gearshift +outboard motor +cowslip +Abyssinian +dip +workstation +cosy +bunker +neon lamp +campanile +casket +verbena +amphora +sumo +common foxglove +sprocket +jelly bean +emperor penguin +night-blooming cereus +clock radio +black birch +bomber jacket +Virginia bluebell +bayonet +walker +altarpiece +tattoo +bridle +rocker arm +water turkey +spiderwort +flange +mute swan +laser printer +carburetor +coverlet +mountainside +baritone +auto racing +baluster +gal +peach bells +taffeta +grandfather +asparagus +horizontal stabilizer +world +grate +marsh marigold +white rhinoceros +movement +split rail +rollerblading +longhorn +muffler +church tower +light bulb +American agave +backpacking tent +overall +New World goldfinch +sectional +wing chair +transom +integrated circuit +dad +spar +picture frame +no-hit game +alternator +drill press +strawflower +hepatica +rangefinder +blinker +Welsh pony +nib +wagon wheel +rotor +tie +denim +jetliner +sculling +external drive +window frame +mourning dove +censer +stapler +batting helmet +flagon +machete +windshield +hedgehog +weeping willow +chief executive officer +hepatica +pet +Asiatic black bear +chinchilla +uke +Atlantic bottlenose dolphin +hair +dishtowel +flintlock +Bermuda shorts +lavender +searchlight +millwheel +piano keyboard +luna moth +bumper +parrot +skirt +manhole +coffee table +footstool +judo +Dalai Lama +armored personnel carrier +voile +saber +thoroughbred +wild carrot +gemsbok +caster +butterfly orchid +cow +sideboard +horseshoe crab +match play +cassette recorder +photomicrograph +drafting table +pediment +tramline +shipping +kitten +wainscoting +fried rice +helix +marguerite +pumpkin +white-bellied swallow +Tulipa gesneriana +common dolphin +face +red squirrel +bicycling +shipwreck +banded purple +cornice +pendant earring +forsythia +aardvark +seashell +spat +shoulder bag +fallow deer +yearling +common teasel +tufted titmouse +ancient +professional golf +purl +vehicle +okra +great grandmother +common lilac +rose mallow +newspaper +crucifix +chukka +armlet +fulmar +wapiti +doily +Greco-Roman wrestling +bleeding heart +kitchen table +bluebonnet +Cape buffalo +spun yarn +crape myrtle +dewdrop +great blue heron +medalist +vaulting horse +spinning wheel +skyscraper +Tahitian +forget-me-not +watercourse +guitarist +gargoyle +bee balm +pumpkin +hunting knife +flutist +lectern +skateboarder +foil +pant leg +hedge sparrow +dresser +automatic pistol +chicory +dialog box +chamberpot +black rhinoceros +fireweed +half-mast +pillow sham +pavilion +scarf joint +microprocessor +filly +dressing gown +shell +Arabian +child +radio antenna +butterweed +morris dancer +sparrow hawk +groom +brioche +floret +rainbow +earthworm +cellist +tine +toupee +balldress +map +angel's trumpet +ruin +fur +pronghorn +speed skating +used-car +stick +early spider orchid +stuffed peppers +snowdrift +flats +least sandpiper +stick +console table +ventilator +portable +kepi +pylon +viceroy +shoreline +Olympian Zeus +pestle +great-niece +life +air compressor +fanjet +scuba diving +fieldfare +tree swallow +personnel carrier +night-blooming cereus +sonogram +assembly hall +circuit breaker +chair +speed skate +soapwort +worsted +raspberry +burlap +flat panel display +Pyracantha +cemetery +turban +deer hunting +bottle green +dandelion green +pieta +aigrette +turntable +cover girl +clutch bag +kiwi +pea jacket +color guard +Malay +shire +crock +french fries +credenza +hockey stick +mourning cloak +potty seat +glass +balsamroot +medal play +red clover +gravy boat +garter belt +Guinness +meadow buttercup +jackass penguin +coursing +tooth +hawfinch +housetop +fluorescent lamp +black-backed gull +bookshelf +earplug +millipede +fawn +baseball bat +soup-strainer +organ loft +bugloss +tomahawk +blackcap +black-necked stilt +hand truck +bedstead +tempura +rose window +crimson +snow thrower +lesser whitethroat +palomino +ball +staff sergeant +wicker +garbage heap +great-nephew +parquet +coupe +nave +eggs Benedict +damask +flush toilet +Angora +pedometer +control room +bristle brush +kookaburra +telephone booth +Windsor chair +red-winged blackbird +cinnamon roll +briefs +cloister +sundress +mammillaria +unicyclist +covered bridge +coelogyne +fairy bluebird +phoebe +beer mug +headstock +parhelion +gorse +common European dogwood +fire-eater +professional football +rock climbing +cyclamen +tin +marjoram +Japanese morning glory +pipe +smasher +hang glider +abutment +birdbath +jotter +litter +artist's model +butterfly bush +dining area +sausage dog +piggery +English sparrow +Turk's-cap +platinum blond +song sparrow +alarm clock +tortoiseshell +chaise longue +flintlock +academic costume +graffito +Arnica montana +adding machine +waterside +director +jonquil +pipefitting +stud +Swedish meatball +musk rose +Venus's flytrap +raven +bougainvillea +little brother +field bindweed +finder +white admiral +tinfoil +serval +sheet +carthorse +people +potto +stockroom +sphinx +slate roof +mountain laurel +majolica +coal black +repository +bufo +pique +binder +tread +attorney general +hydraulic press +videocassette recorder +bumper car +professional baseball +cow parsley +ern +blue peafowl +common hyacinth +jack-in-the-pulpit +ice hockey rink +sport +camper +tailback +flash +stacks +pulp +Christmas cactus +netball +calliandra +curler +large periwinkle +cobweb +forward +Roman arch +cross bun +stoneware +banana bread +cape jasmine +settle +tongue +frock +pepper shaker +pitching coach +CD-R +casing +faience +hand cream +CD-ROM +recliner +striped bass +clary +sketch +risotto +reticle +white clover +touch football +kitty +great-aunt +Japanese maple +sidecar +muscovy duck +hack +rope bridge +organist +stinging nettle +pocket watch +Indian pipe +amorphophallus +bird's-foot violet +caller ID +furnishing +carriageway +dish rack +heiress +nail polish +beldam +Dall sheep +teriyaki +stateroom +laughing gull +chow +bookmark +timer +toga virilis +deviled egg +coltsfoot +Papuan +native +cygnet +automation +portfolio +cabbage palm +cube +broiler +radish +broodmare +castor-oil plant +pith hat +talus +lass +thatch +common marigold +young buck +igloo +prairie rattlesnake +soccer player +spoke +place +slide fastener +tapestry +toy +headboard +cross-country skiing +harness +sconce +rim +ballet skirt +transvestite +saddlebag +common evening primrose +taillight +challah +willet +ready-to-wear +cloud +answering machine +waterfront +vane +granddaughter +Chinese gooseberry +tureen +cab +truffle +viola +bootlace +chemise +taro +petal +candied apple +soccer +miniature golf +front porch +asparagus +Sauvignon blanc +daisy fleabane +ceiling +slip-on +bottle-nosed whale +redbud +black squirrel +snowsuit +ribbing +gravestone +creme brulee +ambassador +local +archery +love-in-a-mist +garbage +thyme +night-blooming cereus +goshawk +cuckoopint +azure +German iris +salad bowl +puppy +cockhorse +giant clam +biplane +stele +necklet +sea otter +crest +door +reformer +comforter +Byelorussian +bottle +hemline +book bag +leotard +owlet +spoon +sari +bidet +Latin +reticulated python +bowling shoe +futon +gaiter +coypu +tea urn +waders +bangle +snowbank +pencil +porter +azalea +English lavender +red spruce +team sport +cruet +high-rise +O ring +vodka +cormorant +Canada thistle +clasp +showjumping +rattan +red fox +sun parlor +Charolais +Tommy gun +bird's foot trefoil +sedge warbler +knot +chives +car tire +steam engine +adapter +spirea +common allamanda +oyster shell +harbor seal +baobab +wick +plumbago +downy woodpecker +coconut +leash +kasbah +hour hand +upholstery +mallard +cricket bat +lady +kitchenware +right-hander +leopard +olive green +common valerian +blue whale +blackboard +redhead +periwinkle +fingerboard +hard hat +locker +breakfast table +capybara +beekeeper +harness +feeder +water hyacinth +hexapod +brown thrasher +percale +lever +patriarch +arete +book +book +senator +bunya bunya +couch +durian +common lady's-slipper +mountain ash +golden barrel cactus +bicycle seat +beret +pop +musk mallow +manatee +cotton candy +boxing glove +backboard +tongue +saguaro +playground +capitol +sanderling +wagtail +deputy +tractor +tap +lady's smock +noseband +worsted +radiotelephone +camisole +forelock +muscat +sweet scabious +crane fly +butterfly weed +chestnut +pinata +inositol +borage +aquatic +belly +broadcaster +gondolier +egg yolk +blush wine +bufflehead +rambutan +oleander +horse-trail +sea holly +yard bird +conference room +lacrosse +belted kingfisher +defile +extremum +whistle +bear cub +grainfield +potage +watermelon +lasagna +sheik +Cooper's hawk +bulb +basketball court +paella +cassette tape +scatter rug +kid +impala lily +Minnesotan +Sudanese +chocolate +tail +quack-quack +whistling swan +shoulder patch +frozen custard +sumo wrestler +smoothie +bock +meat grinder +latch +palisade +radial +sake +kestrel +corn chowder +airframe +electrician +reamer +metropolitan +cotton flannel +cassowary +crossbill +operating room +winter aconite +flute +Tasmanian devil +billboard +suds +kilt +aperitif +cooling tower +avocado +hooded merganser +coleslaw +bee balm +ladder-back +insurance broker +scaffolding +polo mallet +double bed +two-hitter +bluff +gamboge +baby +lawn chair +frond +pistol grip +fancy dress +marquetry +jambalaya +fireweed +Eurasian kingfisher +cue ball +ice plant +horseweed +rose moss +musher +sun +viscount +white-breasted nuthatch +gin and tonic +thermos +Kenyan +first-aid kit +four-wheeler +tourist +stairwell +Gambian +liqueur glass +hovercraft +cocktail dress +twin +coriander +blister pack +Barrow's goldeneye +canteen +irrigation ditch +great white heron +tree sparrow +canal boat +lens +food processor +common raccoon +Baltimore oriole +black-eyed Susan +bush hibiscus +corolla +sire +mustachio +professional wrestling +elk +clustered bellflower +pannier +musk ox +crapaud +animal trainer +rosebud +ring-necked pheasant +little egret +cappuccino +rocker +bristlecone pine +cheerleader +hedge violet +semaphore +central processing unit +speedskater +delivery truck +assembly +hedgehog cactus +bergenia +bull thistle +bladder campion +cinquefoil +inula +cellulose tape +main rotor +bootee +autogiro +ice +grey +meadow cranesbill +hummus +valise +chassis +mountain goat +blacktail prairie dog +Chardonnay +romper +street +shoveler +wood ibis +topiary +chalice +silo +circus acrobat +Rollerblade +cosmos +woof +heroine +cold cream +marabou +herb robert +garden lettuce +nymph +floor lamp +automobile engine +heel +radiator +seeded player +fedora +father-in-law +peahen +Bahamian +wiper +wood pigeon +barn owl +pegboard +chorus frog +kin +roller skate +stob +rosemary +cowbird +hortensia +cranberry sauce +shot glass +Dixie cup +gnu +fire alarm +diet +booster +oxeye daisy +twayblade +high-definition television +truss bridge +bunk bed +mule +blackbuck +facsimile +frog orchid +point-and-shoot camera +brocade +gazebo +prairie gentian +concert +paintball +Cognac +maid +afghan +barbecued spareribs +pintail +tramway +commissioner +finger-painting +beef stew +caftan +Aberdeen Angus +demonstrator +sea trout +pigtail +thrush nightingale +barbados cherry +sashimi +ridgeling +lamppost +gabardine +red-shouldered hawk +bath salts +cavern +cymbid +Haitian +boater +southern buckthorn +arctic +motorcycle cop +red gum +Clydesdale +Zamboni +beagling +villa +demitasse +Sheetrock +lollipop +hybrid petunia +post horse +carabiner +brussels sprouts +Durham +stylist +pothole +sleigh bed +scallop shell +harrier eagle +papaya +Japanese persimmon +sachet +wild rice +chipboard +gun enclosure +menorah +chinook +headset +white campion +ocean +Secretary of State +G-string +bone china +basil +greenish blue +camcorder +concrete +screech owl +trumpet honeysuckle +flugelhorn +layette +cattle egret +case knife +mandarin duck +robber fly +salwar +dressing table +doughnut +facade +runner +honeypot +surf casting +diver +angel's trumpet +spin dryer +chameleon +wand +snow +vitamin A1 +manageress +volleyball net +antiperspirant +street clothes +tree sparrow +cords +sundew +bricks and mortar +caryatid +bridesmaid +trestle bridge +eyepiece +celebrant +scarlet pimpernel +gas range +onion +green salad +squill +creepy-crawly +hunk +little owl +salad nicoise +earflap +bird feeder +spray gun +bunny +Cheops +amazon +blue tit +Nissen hut +Kalashnikov +skylark +kremlin +shoebill +shopping bag +frigate bird +telephoto lens +peplum +moss pink +echidna +wastepaper basket +wood ibis +workroom +ankle brace +telpherage +Michaelmas daisy +figure skate +swami +nylons +cardoon +cocotte +headstall +twin bed +parsley +dirndl +corn poppy +nut bread +cloche +light heavyweight +mayor +lip-gloss +punch bowl +pottage +mango +fledgling +mousse +four-wheel drive +barrel +banana boat +trouser +bathroom +Sauterne +ring +settee +lavaliere +safe-deposit +godson +leatherette +schoolmate +radish +hedge trimmer +dahlia +euphonium +palace +vaulter +singlet +slicer +Pilsner +cockateel +kangaroo paw +Cub Scout +master bedroom +hexagon +cenotaph +Barberton daisy +Netherlander +intersection +Korean +gravel +chandelier +hospital bed +flash memory +pier +whole wheat flour +maroon +pale ale +special +snow bunting +crinoline +dustpan +barrette +common wood sorrel +yolk +pothos +speakerphone +tendril +cabinetwork +farm horse +brake disk +streetlight +superhighway +bandsaw +panting +pressure cooker +girdle +old man +cereal bowl +felt +hurling +architecture +harmonium +chain +blueberry +cellar +smocking +scrub brush +tablespoon +sweet corn +graining +library +street +bill +felt-tip pen +monkshood +crowd +log cabin +newel post +hack +elephant seal +golden pothos +popcorn +outhouse +patch pocket +fish and chips +tape +wax plant +eaves +fried egg +emerald +tea cart +fan blade +daily +Bowie knife +rowing boat +leaf shape +man +crayon +trumpetfish +chipping sparrow +whiskey bottle +pillion +city hall +golden pheasant +cheerleader +creeping bugle +couch +Dumpster +Homo sapiens sapiens +cranberry juice +cockpit +demagogue +joinery +scrambled eggs +technician +sidewalk +sheep +keyhole +power line +polyanthus +roulette +first lieutenant +checkout +tabletop +nasturtium +schnapps +engineering +skateboard +ground fir +bouquet +bunk +resort area +fleur-de-lis +power steering +opera +Bolivian +Friesian +buckskins +bay +slider +frozen yogurt +cabin cruiser +saunterer +lean-to +fishing eagle +bog star +cantaloupe +mouth +music stand +fiddlestick +brilliantine +pinball machine +bairn +barred owl +bath oil +signorina +Mason jar +nymph +rubber band +garden nasturtium +razorbill +Japanese beetle +batting cage +trestle +borage +Secretary of the Interior +scanner +baguet +baseball cap +chow mein +pen +jewelweed +barbet +chasm +pectoral sandpiper +holster +glasses case +sand +crevice +Kickapoo +snowboard +locket +satchel +tankard +alpinist +moorhen +cow pen +whooper +crown +chain +silversword +wild geranium +hi-fi +Tibetan +waterwheel +bee orchid +ruby-crowned kinglet +common broom +tabloid +javelin +sauna +klammath weed +zebra finch +spider orchid +velour +chiffon +lecture room +barrel +loggia +millstone +flatlet +soupspoon +econometrician +golf-club head +daphnia +parlor +fire-eater +juggler +attache case +hay bale +kisser +knitting needle +news magazine +flatbed +Senegalese +trumpeter +trampoline +brogan +bone +caftan +lobster pot +gazpacho +anthill +ramekin +mainsail +penitentiary +spotted flycatcher +cookstove +root beer +broom beard grass +pogo stick +plywood +epee +gas oven +Global Positioning System +sweet false chamomile +breakfast area +bullring +second cousin +wave +decolletage +rodeo +won ton +swastika +bobby pin +papaw +retaining wall +Muscadet +heavyweight +energizer +banner +amusement park +whinchat +drugstore +waxwork +meander +congee +heat sink +switch grass +commuter +peony +western white pine +wild raspberry +nightgown +saute +cardinal +claret +pollinator +biryani +pina colada +cassette deck +European sandpiper +block +flan +birdcage +baby +lieutenant colonel +ticking +European white lily +dog violet +coat hanger +premature baby +organza +string bean +balloonist +hurricane deck +window box +hang glider +bullfighting +piste +seahorse +hard cider +batik +common mullein +petite marmite +stuffed mushroom +tequila +ground ivy +fountain grass +stray +putter +buffer +comet +bomber +woodcarving +baseball glove +halter +garnish +selvage +megaphone +sea fan +rabbit hutch +very important person +analog watch +long-head coneflower +northern pike +roll-on +cigarette butt +terraced house +penknife +windshield wiper +cricket +straightener +snow pea +cockerel +canister +sour bread +recovery room +toilet bowl +tyrannosaur +big sister +quartz battery +television receiver +vitamin C +tailpipe +field thistle +stonechat +col +monstrance +gift wrapping +herbivore +quarter horse +ice-cream sundae +rumpus room +eyepatch +clary sage +French lavender +snorkel +choir +tent-fly +cat box +horse racing +high priest +barrel cactus +pin oak +wild thyme +keyboardist +raiser +hammock +hail +bungee +chocolate mousse +major +buzzard +gopher tortoise +Chablis +water meter +benthos +donna +blender +Mauser +avocet +rye +mulch +chancel +dusty miller +mate +corbel +minaret +frittata +French toast +mosaic +home brew +water faucet +beard +swivel chair +acropolis +largemouth +abbey +tabby +driver +copperhead +stirrup +Boston fern +Tennessee walker +artichoke +honor guard +chapatti +enchantress +sweat pants +electric organ +column +dry vermouth +range hood +Red Delicious +rape +splint +catapult +gourd +antipasto +plaza +carnation +star +wood anemone +English primrose +male fern +boot +atrium +Japanese deer +carnivore +yearling +doe +guelder rose +chicory +stretch pants +ice-cream cake +frogfish +tarpaulin +chicken soup +balaclava +tor +feverfew +three-hitter +flyweight +aqua vitae +locker room +wether +teacup +wide-angle lens +hook +ladder-back +osprey +awning +wedding +chest protector +pooch +rose mallow +orange daisy +fondant +envelope +duckling +blackberry +goosander +snorkeling +philatelist +broad bean +Frank +bok choy +basket +absinth +cayenne +blackbird +bottled water +trooper +timber +stable +chestnut +tomatillo +bell +banquet +rainbow trout +macrame +appointee +heart +chipmunk +purple clematis +safety bicycle +shuttle bus +Japanese black pine +lentil soup +downhill +field mustard +brass +hand-me-down +greater yellowlegs +fanny pack +croquet mallet +hip roof +duffel bag +Ritz +document +pie plant +staff member +lifeguard +white-throated sparrow +Cameroonian +hydrofoil +platter +common ageratum +middleweight +chairlift +brunch +pharmacist +lemon +driveshaft +green snake +lip +London plane +mangrove +crystal +siskin +common jasmine +hollandaise +villa +cross-country riding +mother-in-law's tongue +generator +Tanzanian +whisk +seeder +ashtray +griddle +evening bag +bluebird +bran muffin +square dancer +luggage compartment +tropical pitcher plant +autofocus +tape drive +silencer +Hawaiian guitar +swamp sparrow +Zimbabwean +drawing room +weekender +liparis +streambed +samosa +hitter +water heater +tidal basin +ossuary +dik-dik +camouflage +fiance +Jordanian +rolling pin +slingback +turret +hen +jennet +playpen +woodhewer +bushing +church bell +bear grass +double knit +tennis pro +Joe-Pye weed +pave +pochard +painted beauty +crinoline +gumbo +trestle table +schnitzel +balloon flower +Turkish coffee +extension cord +wireless local area network +sluice +umbel +microeconomist +sky +aisle +commander in chief +hydroplane racing +poll +Coca Cola +fuel injection +bird pepper +monkey puzzle +English muffin +riverbed +varietal +kachina +airport +saltwort +oolong +red-hot poker +mihrab +cocoa +jersey +Walkman +syndic +Hessian boot +millstone +carpenter +outfall +curbstone +mocha +field pansy +patriarch +slacks +switchblade +killdeer +whelk +pampas grass +racquetball +platform bed +Indian rhinoceros +Japanese iris +blacktop +dinner jacket +stud +jodhpurs +telephone pole +business district +kurta +basil +handset +file folder +gloriosa +orphan +cantle +cookie sheet +cafe au lait +drawbridge +hill myna +Western diamondback +watch case +cardcase +bowling alley +mattress cover +canvasback +pompadour +cornice +matador +cigar cutter +skunk cabbage +baptismal font +bitters +refectory +egg +parula warbler +tiger lily +field house +nanny +skin-diver +soda water +lymphocyte +carport +chocolate fudge +amphitheater +sugar candy +sea hare +open-face sandwich +dessert spoon +staple gun +envelope +worker bee +general +garment bag +maypop +autobahn +Atlantic puffin +polo shirt +Humvee +spice rack +grotto +banderillero +gaillardia +black-crowned night heron +oboist +weigela +Dictaphone +dwarf iris +marsh mallow +yarrow +eccentric +catsup +jade green +mistress +henbit +beachwear +head +commuter +strawberry tree +chickpea +clothespin +fleabane +brussels sprout +winter melon +Laconian +great horned owl +caricaturist +nan +flowerbed +triple sec +dairy +round of golf +cardinal +kauri +Zulu +Armagnac +cowberry +mouthpiece +wild calla +bling +puppeteer +beer drinker +adder +field sparrow +chocolate pudding +blacksmith +finback +Shetland pony +cheese fondue +panty girdle +soda can +electrolytic +florist's chrysanthemum +yellow jasmine +tudung +equalizer +ridge +dulcimer +grappa +barn swallow +coneflower +enamel +poached egg +halfback +yak +toby +Fleet Street +blue catfish +sand tiger +flying buttress +snaffle +stoop +first base +cultivated land +first lady +waratah +headquarters +arnica +lovebird +common morel +parasol +disk clutch +Xerox +vitamin P +vitamin B12 +long sleeve +certified public accountant +hot pants +pitch pine +pantie +drawers +cake mix +boar +grey +bride +false sago +bullion +coach house +bass guitar +Japanese banana +meadow clary +black belt +Canterbury bell +smallmouth +treadmill +great white heron +enchilada +rummer +captain +camisole +wild garlic +oak fern +ultramarine +peach +hawkweed +autostrada +adit +anaconda +artwork +skinhead +jello +hermit thrush +Bewick's swan +dress suit +trail bike +stubble +common polypody +Riesling +Easter lily +telegraph key +envelope +garlic bread +perianth +salad bar +steppe +club sandwich +nude +garden forget-me-not +Tuareg +flood +Statehouse +charcoal +boy scout +Rhone wine +parfait +spoor +lanyard +octagon +brown bread +quarterback +quilted bedspread +hookah +Pepsi +hamburger bun +entrepreneur +saddle oxford +snake's head fritillary +undies +chemise +skidder +chickpea +carnation +honey bun +mortar +Montrachet +automobile horn +skylight +gingham +rafter +pantile +climbing frame +scarlet runner +cable +cornstalk +mockingbird +raisin bread +chili sauce +hand calculator +concert-goer +detached house +coq au vin +lasso +hyssop +globe thistle +paper clip +slide +Jerusalem artichoke +tetrahedron +mock orange +lemon lily +finger +little sister +handcuff +horse wrangler +pavlova +oilcloth +snow-in-summer +common mugwort +greenshank +ice-cream cone +rubber boot +gunnysack +disk jockey +long trousers +sorghum +pontoon +calf +fire extinguisher +cotton thistle +pilot whale +ao dai +steamroller +wristwatch +tawny owl +city +country store +ironweed +kennel +bathrobe +rattan +drawer +fly tent +choline +musk thistle +courthouse +Yugoslav +bush +trawler +shellflower +jade vine +ragged orchid +pea soup +King Charles spaniel +hubcap +snook +paddy +bow and arrow +shovel +dill +cliff swallow +cadaver +hijab +masterpiece +fish geranium +kettle +sanitary napkin +carrot stick +Mountie +peanut brittle +dam +jackal +windowsill +butterfly orchid +bodice +picador +pale yellow +beanie +petiole +tenor saxophonist +bungalow +gnomon +stock saddle +field glass +rigging +wood grain +Speaker +settlement house +swamp milkweed +paper nautilus +tangerine +champagne +crescent roll +library +Schmidt telescope +stemless carline thistle +motorcyclist +alpine ash +planchet +water closet +casuist +hand luggage +hyssop +spaghetti and meatballs +cannelloni +cedar waxwing +water dog +brick red +linkage +sweep hand +purple heather +macaroni and cheese +butter knife +refreshment +malt +St. Augustine grass +wainscot +compass +gas heater +tamale +table saw +referee +borsch +projector +dracaena +peppermint +Reuben +Abyssinian banana +glassblower +floss +small stores +artilleryman +lapwing +ranch +garbage man +dwarf banana +commelina +currant +adulteress +landlocked salmon +pasqueflower +nan +tiger lily +Eritrean +rotunda +catsup bottle +mezzanine +royal fern +blended whiskey +bowler hat +mistletoe +manor +fusee drive +pistachio +dispensary +swamp +amputee +sculptor +schoolmaster +Chinese anise +dwarf iris +livestock +chronograph +nectarine +jockey +plaster +motel room +swamp azalea +hippeastrum +space station +duchess +catacomb +dovetail +cockscomb +common spotted orchid +brittlebush +cleats +cloche +hotchpotch +cabin car +prey +indigo +light beer +bear's breech +jonquil +analyzer +alyssum +spur gear +ice tea +honey buzzard +twayblade +dirndl +atlas moth +croquette +carafe +flyweight +professional basketball +multivitamin +air terminal +phial +roll-on +skunk cabbage +bird of paradise +rose +cooter +camping +divided highway +herbage +sweet vermouth +common comfrey +eggplant +office building +glutton +gefilte fish +bicycle rack +swamp birch +Venetian blind +Pernod +Norway spruce +portrait camera +bastion +vitamin Bc +Ugandan +Indian red +okapi +emu +vin ordinaire +chintz +shrimp cocktail +numbat +tall oat grass +cable car +stopcock +ham sandwich +Yemeni +stanhopea +plate +chicken broth +common yellowthroat +California poppy +radio +chocolate egg +mess jacket +tea table +physostegia +Japanese flowering cherry +confectionery +chicken cacciatore +painted nettle +popover +white rice +strapless +mohair +electrical cable +coil spring +arterial road +miniature fan palm +spectator pump +pesto +interlocutor +eastern kingbird +dongle +vitamin B6 +stuffed tomato +cough drop +okra +black +barbecue +burial mound +firstborn +corn snake +amberjack +bollard +horn +Black African +elbow pad +Camembert +circle +Japanese apricot +hearing aid +rock star +creature +taster +bubble gum +scull +lemon balm +chaetodon +anemometer +brake drum +fuselage +courthouse +aqualung +yellow adder's tongue +reception desk +guy +buffalo wing +ginger beer +robin +pantothenic acid +marsh hawk +yellow journalism +exhaust +cardamom +Tabasco +ax handle +patriarch +floor +pine snake +spoiler +hood +sphagnum +parrotfish +orphanage +redpoll +beef Wellington +white spruce +cherry plum +scapular +field lens +broomstick +mouser +wood thrush +Nebraskan +hotelier +milk thistle +soya milk +Munich beer +boucle +snowy egret +dust storm +steward +kudzu +oriental poppy +presbytery +burro +orange soda +stonecrop +splashboard +menagerie +dormer +wire cutter +yellow bells +Dubliner +shore pine +cousin +racing gig +Morgan +gold plate +villager +snifter +granny's bonnets +egg roll +Spode +amabilis fir +babbler +pestle +heliopsis +halter +black spruce +President of the United States +ski slope +chocolate fondue +lockstitch +motel +Epipactis helleborine +tabbouleh +Yorkshire pudding +overpass +Timorese +presbyter +tablefork +bottle gourd +tiara +vintage +pilgrim +reindeer moss +shower stall +towel rack +kachina +chef's salad +breeder +cow parsnip +walker +Black woman +Irish coffee +portrait lens +lateen +gilt +successor +cargo container +Lithuanian +mayapple +paisley +highchair +strawberry jam +flying fox +field scabious +blue-eyed grass +screw +Frisbee +dressing room +cholla +walkie-talkie +red currant +centrifugal pump +smorgasbord +hot rod +marcher +rowanberry +welwitschia +amphitheater +pew +concert band +bosom +pillbox +seagrass +openwork +meadow goldenrod +shower +chicken sandwich +Boston ivy +plastron +oilfield +stuffed tomato +juniper berries +frame +Spanish mackerel +family room +powder horn +fight +maguey +bunker +work-shirt +air filter +nosh +sugar bowl +foothill +reliquary +tugboat +horsebox +grater +palace +board member +campsite +halibut +geneva +ginger ale +high commissioner +genet +bodywork +spaghetti +protractor +pipe cutter +wood anemone +turkey cock +surge suppressor +green turtle +spoiler +bedsitting room +television room +ballot box +shasta daisy +impeller +capote +bitter +California wine +lock +spinnaker +gill fungus +baby's breath +nut and bolt +moonflower +houseboat +distributor cap +coffee bean +gusset +bowling ball +knitwear +frieze +mistflower +roadster +cue +circuitry +brake +butt hinge +Chickasaw +leopard frog +wing tip +puree +mantel +pantheon +grandfather clock +cockchafer +pomegranate +cleaners +eyeshadow +Oregon cedar +rock hopper +hawksbill turtle +agriculturist +yellow-crowned night heron +Albanian +pumpkin seed +chateau +goggles +camper trailer +bracket fungus +cigarette case +signal box +saddle blanket +poison ivy +set gun +cattleya +dry fly +concert hall +personal digital assistant +talcum +deodorant +common starling +painted turtle +kea +plenipotentiary +pantyhose +masjid +buskin +hurdle +cocktail lounge +belting +sour dock +knife blade +sugar snap pea +paddle +dickeybird +brace +keep +call center +yacht +lead pencil +tumbler +production line +tetra +private +French window +express +ski boot +pinto +broad bean +American crow +screech owl +snapper +power cord +Manx +rambutan +sun deck +stonefish +golden eagle +national monument +readout +cork oak +hacksaw +beer can +bathe +tussock bellflower +wet suit +mihrab +big game +highlighter +sprocket +measuring worm +grapefruit +samovar +distributor point +steak knife +incubator +loon +temporary hookup +hippodrome +hot spring +spacesuit +flea market +clay pigeon +catbird +earmuff +tetherball +yellowfin +cellophane +lanolin +clapperboard +velveteen +police dog +cashew +sequencer +mango +duplex house +bazaar +Golden Delicious +red carpet +collet +kickstand +broadloom +diskette +tank engine +compact +diesel-electric locomotive +whale shark +water moccasin +mountain avens +tropic bird +ginkgo +ski cap +fixative +glockenspiel +chopine +ethernet +herring gull +skeleton key +finger paint +conference table +great crested grebe +harbor +white-crowned sparrow +Bullock's oriole +guestroom +boutique +cable television +roulette wheel +Luger +Latin American +trumpeter +blindfold +baby +freshwater bass +home plate +bonefish +giant sunflower +giant tortoise +planking +pigeon hawk +oceanfront +door +bazaar +common wasp +conformation +kick starter +kid glove +corydalis +shuttlecock +writing desk +ivory gull +shirttail +diving suit +weka +downy birch +altar +wild sage +tufted puffin +cabinet +Orpington +cineraria +bottom +dial +coracle +resort hotel +soap dish +spotted owl +billiard room +ghetto blaster +red-breasted nuthatch +hatchling +chalet +bracteole +crusher +mixer +net melon +farmhouse +Dutch oven +transept +penlight +palmyra +stewing pan +solar cell +crochet needle +black-winged stilt +germander speedwell +crinkleroot +truncheon +bunchberry +hatchback +sounding board +mixing faucet +chess master +bisque +Brie +Sitka spruce +pawn +Mexican-American +space rocket +choreographer +collared peccary +duffel +nacho +patchcord +carpet snake +omnivore +watering can +hall of residence +streamer fly +sunroof +great grandson +oil refinery +billiard player +ivy geranium +key palm +pinwheel +yellow-shafted flicker +purple onion +soldering iron +condominium +fishing gear +heat pump +marine iguana +cuckoo clock +Bletilla striata +headrest +spotted salamander +field hockey ball +pound +carboy +vertical stabilizer +groundsheet +cinnamon bread +acorn squash +sheathing +lakefront +Jeffrey pine +synthesizer +olive +apple +pannier +ponderosa +Jew's-ear +latch +equatorial +metasequoia +permit +bloomers +town hall +fava bean +casino +bier +jampot +common snapping turtle +clary sage +oatmeal +Dutchman's breeches +massif +Guyanese +heifer +handball +sweat suit +pomelo +Iceland moss +customhouse +sandbag +archer +gyrfalcon +sword cane +marmite +whole snipe +blue crab +sugar spoon +brownstone +chicken wire +lizardfish +dump truck +chicken yard +chamois +electric +idle pulley +jujube +wrestling mat +aoudad +Burmese cat +water shamrock +dormitory +Unknown Soldier +hearse +bumper +clipper +desert pea +critter +semitrailer +backboard +common St John's wort +Atlantic manta +song thrush +jukebox +quoin +eastern chipmunk +copper beech +paintball gun +bull +package store +fraise +royal poinciana +niqab +traction engine +objective +day nursery +ski lodge +orphan +summer house +cereal box +router +sleuth +jodhpur +polyp +croquet +sport kite +green onion +tulle +etagere +tussock caterpillar +rest house +elderberry +bridal wreath +Torrey pine +silver wattle +kidney bean +pentode +laelia +Allen wrench +sporran +red drum +tricot +heterodyne receiver +magazine rack +stone curlew +trawler +suckling +niblick +sandwich plate +double door +Togolese +pitching wedge +desert tortoise +cloth cap +date palm +webbing +jumper +frogmouth +copperhead +covered couch +black mallee +riser +scraper +gauntlet +pantheon +food court +muntjac +grocery bag +bread-bin +transmission shaft +primigravida +window seat +crab apple +seat +Fresnel lens +dendrobium +hatchback +little theater +butter dish +back porch +umbrella tree +carrot +seventy-eight +coconut +music stool +Tesla coil +bay willow +American basswood +sabot +wheel and axle +gazette +lute +bassinet +hart +mecca +breadbasket +silverfish +handball +Scotch pine +box camera +stately home +Hereford +tread +single-breasted jacket +desk phone +deodar +professional boxing +fly casting +box wrench +black oak +martello tower +red campion +bullock +sweet William +bay leaf +dollhouse +flounder +fox hunting +beanbag +king mackerel +rouge +film advance +common mallow +parasitic jaeger +satellite receiver +nurse shark +chesterfield +tomatillo +plimsoll +hatbox +bloomer +foul-weather gear +longleaf pine +horse mackerel +tree lizard +bark +belfry +Treasury +perch +purple finch +stag beetle +fragrant orchid +tachymeter +tadpole +cookie jar +knee piece +agueweed +bones +chick +golf glove +toothpick +taboret +rotor blade +field artillery +purple willow +redhead +spark plug +guava +voice mail +cross +butterfly valve +star magnolia +olive +room light +Australian turtledove +embassy +Iraqi +singles +nestling +spinning rod +radial engine +rowan +sandbox +boss +moccasin flower +veneer +mint +American chestnut +white whale +CPU board +florist +press box +hurricane lamp +giant kangaroo +greater whitethroat +winter jasmine +blue +department store +southern red oak +saber saw +corn muffin +bellbottom trousers +toaster oven +red eft +condominium +galago +sunbather +redpoll +common European earwig +songbird +linnet +light meter +bracer +tepee +gumbo +water glass +roofing +spathiphyllum +shofar +sand lizard +washroom +Brussels carpet +brachyuran +home room +floatplane +knee brace +solar heater +felucca +gas ring +maguey +manse +blue columbine +cuppa +cigar band +male orchis +mudskipper +couscous +Chinese parasol tree +dude ranch +banyan +gopher snake +sundrops +aviary +African daisy +missel thrush +Photostat +stone pine +circus tent +tangle +printer cable +grease-gun +rose chafer +light pen +plantain +hearth +bullfinch +post oak +slow loris +Newtonian telescope +head +punt +spindle +New England aster +spotted sandpiper +pond pine +grass skirt +bug +black rat snake +tabasco +bull shark +tennis camp +scrambler +popinjay +bing cherry +ministry +cash register +redheaded woodpecker +kameez +farmer's market +roan +harpy +European toad +pizzeria +camshaft +hemp nettle +chicken coop +cottage pink +daybed +observatory +airdock +mountain devil +newsstand +kingfish +snow gum +jackdaw +lacquerware +peeler +miro +sister ship +damask rose +pack +snowshoe +Liberian +paramecium +tidytips +professional tennis +bookend +wood swallow +cayuse +cranberry +rock squirrel +steak au poivre +soul patch +female mammal +sash fastener +songwriter +oxeye daisy +apse +floor joist +hand towel +wheatear +cero +soul mate +golden fig +bus stop +psycholinguist +convenience store +manor hall +mountain sandwort +Euopean hoopoe +haricot vert +mausoleum +violist +flashlight battery +chard +fixer-upper +bank martin +testudo +diving duck +kohlrabi +Omani +sphygmomanometer +greyhound racing +chestnut +rattlesnake plantain +chaffinch +wolf pup +teakettle +cairn +souk +resident commissioner +chuckwalla +gaiter +capercaillie +liver chestnut +bean sprout +land line +ambassador +green pepper +common chickweed +Sharpie +Oriental arborvitae +oncidium +pallone +currawong +sweet alyssum +fire tower +eyebrow pencil +redfish +apricot +clementine +blucher +wigwam +pangolin +buggy +common oak +jumbojet +laser +cigarette holder +racquetball +georgette +cleft +scouring pad +drum printer +pond scum +American red squirrel +caranday +swamp willow +blindworm +brook trout +defense system +nyala +three-way calling +mizzen +shuttle +African lily +Oregon white oak +rain tree +fuel gauge +oriental cherry +wahoo +pear +jungle gym +bass fiddle +outrigger +angelfish +Old World coot +lime +battlement +yarmulke +herpes varicella zoster +burp gun +Alpine glacier +stun gun +pilot boat +Southern crab apple +bushtit +pullet +polo pony +jackfruit +raw vegetable +French marigold +golden shower tree +spike lavender +wahoo +brass knucks +cabbage palm +diesel-hydraulic locomotive +red jungle fowl +prairie sunflower +rye +loofa +icecap +shade tree +secretary bird +saffron +cos +muskrat +videodisk +Carolina wren +candy bar +Bohemian waxwing +flowering almond +cold frame +raglan +pine siskin +quince +western red cedar +red maple +adobe +agora +kumquat +tenement +bantam +bayberry +water jump +great granddaughter +snips +porcupinefish +brochette +love-in-a-mist +Iceland poppy +common sage +pace car +camel racing +slipcover +nopal +shoehorn +calypso +rhea +in-basket +maple syrup +cold chisel +Pacific ridley +dietary +aperture +lapin +rock hyrax +house wren +litchi +ragged robin +control center +shoebox +arabesque +eider +silver birch +bantamweight +ax head +softball +blue gum +Bechtel crab +tomato sauce +green douglas fir +sweet gum +macaroni salad +red phalarope +budgerigar +Bedford cord +Uzi +green woodpecker +ohmmeter +bacon-lettuce-tomato sandwich +hackney +Easter egg +motmot +red pine +opium poppy +gat +pussy willow +greater scaup +ocelot +persimmon +western hemlock +carambola +pinion +Malcolm stock +bobsled +larkspur +wood drake +pinetum +red gum +draft beer +funnel +terrarium +Pinot blanc +doodlebug +brittle star +salsa +cantaloup +pollack +stockpot +eastern hemlock +rock wren +burqa +squash +aircraft engine +billy +flamingo flower +odontoglossum +old squaw +redstart +sheepskin coat +mate +flathead catfish +gentianella +bilberry +bog rein orchid +incense cedar +mew +Colorado spruce +cob +portmanteau +grenadine +common ginger +masdevallia +compound microscope +sobralia +white fungus +guppy +chapterhouse +honey +green frog +sea swallow +African marigold +astrolabe +verdigris +yellowhammer +carrot juice +oxlip +medicine ball +highboy +grass frog +gamebag +surgery +mincer +mulloway +cactus wren +box office +resonator +table-mountain pine +European curlew +supernova +cabbageworm +peach +plane seat +asp +Yquem +tomato hornworm +rook +quadruped +chador +micrometer +dabchick +Afro-wig +balsam fir +bucket seat +sage green +macon +blue poppy +chinquapin oak +black pine +spinach +chrysalis +carnauba +tee +bearberry +shirt button +tree of heaven +southern white cedar +covered wagon +brood hen +spadix +European catfish +winter wren +bulldog clip +carpetbag +study hall +chino +simian +closeup lens +cookie cutter +grapefruit +mandola +sassaby +Allegheny plum +piaffe +scorpion fly +booby +draft animal +field tent +cumin +laurel oak +smooth-leaved elm +American arborvitae +American toad +grinding wheel +mountain ash +cuttlefish +pipistrelle +parer +safety rail +Clark's nutcracker +side-blotched lizard +giant hornet +wicket +dugout +electric toothbrush +dhow +common four-o'clock +long-eared owl +anchor +near beer +tansy +creme caramel +guided missile frigate +shelduck +durian +compact +iron tree +shiitake +polo +camouflage +pedal pusher +salon +tangerine +lacebark +Swiss mountain pine +goalpost +poolroom +space capsule +wild cherry +dress hat +wave +raglan sleeve +cassia +Jerusalem artichoke +cabbage palmetto +marsh harrier +American redstart +sea squirt +cliff diving +sparrow hawk +watch cap +frankfurter bun +police boat +flash camera +neem +eastern meadowlark +Italian cypress +orb-weaving spider +graniteware +sewing basket +latex paint +rock dove +stator +leaf lettuce +roulette +broadcloth +Spork +panicle +sternwheeler +cider vinegar +brown creeper +cowfish +closed gentian +chickpea +port +pimento +sheeting +matilija poppy +hawk owl +guava +papaya +huisache +European shrike +racing skiff +yellow warbler +gumbo-limbo +North Carolinian +staysail +court +iced coffee +money belt +shaver +Psychopsis papilio +sumo ring +refection +kingfish +clock pendulum +greater butterfly orchid +disk harrow +tawny eagle +polyphemus moth +pieplant +Nicaraguan +bocce ball +California box elder +porbeagle +crown of thorns +Mexican sunflower +fennel +stream orchid +slip ring +white fir +fold +moss campion +fairy ring +hose +pony-trekking +western larch +meadow pipit +Cape May warbler +longan +bookmobile +junk shop +lemon shark +smelling bottle +solan +widow +sea pen +universal joint +day game +goldcrest +maiden pink +biographer +rotunda +oriel +arranger +gambrel +Angora +fen orchid +leading rein +Wilson's snipe +European nuthatch +natterjack +athletic supporter +mouflon +emergency room +swallow-tailed coat +western meadowlark +feather star +Navy SEAL +toilet bag +loquat +lesser butterfly orchid +thumbhole +breathalyzer +featherweight +collards +mayfly +confessional +mountain ebony +redwing +Norway maple +refractometer +stagecoach +gasoline gauge +octopus +baker +Rhode Island red +European tortoise +cardiologist +Punjabi +Arkansas kingbird +tamarind +drum brake +flash +yellowtail +stokes' aster +emperor +free house +sour gum +ruddy duck +hamadryad +command module +tinamou +Norway lobster +washstand +European hornbeam +roaster +black-necked grebe +tallgrass +leopard lizard +anastigmat +Blackburn +deutzia +ground rattler +Christmas fern +wild pink +sesame seed +carrycot +Italian parsley +nectar +roll-on roll-off +true laurel +anisette +candy corn +flowering maple +revers +dun +tobacco hornworm +common sunflower +common grape hyacinth +cardiograph +electric meter +herb Paris +goalmouth +spruce grouse +canopy +wind poppy +stemma +gateleg table +lumper +speckled rattlesnake +gudgeon +rough-legged hawk +internal drive +pomelo +piece de resistance +storm door +clementine +Japanese pink +settler +yellow jacket +Fraser fir +royal palm +cicada killer +cayenne +guava +bluewing +red baneberry +lesser yellowlegs +cache +bog rose +sparring partner +ski jumping +sherry +glacier lily +beer mat +shredder +American widgeon +protectionist +green olive +black-tailed deer +Alpine fir +dispatch case +whipping cream +African daisy +cantilever bridge +maraschino +rhea +ink bottle +dacha +hagberry tree +lesser rorqual +orchard oriole +candidate +cuticle +breadfruit +fishbowl +giant puffball +closed gentian +Joshua tree +tie rod +beard lichen +flame tree +stegosaur +acerola +Swan River daisy +common murre +flowering almond +protegee +loggerhead shrike +Wilson's warbler +Japanese honeysuckle +basilisk +skimmer +hybrid tuberous begonia +pumpkin ash +chafing dish +collared lizard +iced-tea spoon +scrubbird +Iceland poppy +grey kingbird +wallflower +slick +diesel +Swiss pine +ethernet cable +ketch +lightship +black cherry +swordtail +Monterey cypress +lightweight +Floridian +Sabine +stall +contact +viola da gamba +hemstitch +upland sandpiper +box spring +sassafras +radome +lesser scaup +bluefin +yellow-bellied sapsucker +armored car +cabin class +Moorish arch +webcam +aquavit +overall +sergeant major +soft shield fern +gin and it +bobolink +subcompact +falconer +black morel +roadrunner +lab bench +thong +coffee urn +weeping beech +caladenia +southern live oak +scanner +wine vinegar +common speedwell +European roller +fuji +snag +piping plover +concertina +secateurs +meat thermometer +supercomputer +funnel +dais +western fence lizard +spruce pine +pommel horse +Cassegrainian telescope +pitta +India-rubber tree +mangosteen +tamp +aposematic coloration +dustcloth +birth +Atlas cedar +reed bunting +jabiru +sainfoin +press photographer +golden oriole +laryngoscope +thermal printer +winder +doubles +cricket ball +dabbling duck +tonic +Buddhist +Morris chair +swatter +quaking aspen +ancient pine +American larch +evaporative cooler +click beetle +yellow-breasted chat +souchong +bluegill +pied-billed grebe +tricorn +spring beauty +southern magnolia +rowel +chili +hard roll +flathead +satsuma +gangplank +bourguignon +cockfighting +greenwing +plum tomato +fly orchid +gnatcatcher +spotted eagle ray +ovenbird +brassavola +mocha +candy cane +afterburner +thriftshop +study +winter crookneck +grinder +muskellunge +sacred ibis +inverter +sandwort +deer fern +stair-carpet +Cotes de Provence +ovenbird +rex begonia +American woodcock +poison ash +lowland fir +pawpaw +loblolly pine +kinkajou +European hackberry +pest +coralwood +Bedouin +acetate rayon +snuffbox +radiator cap +basket oak +table-tennis racquet +smew +midge +telescopic sight +radish +great burdock +separate +damask violet +broadbill +bourbon +blacktip shark +gift shop +khimar +date +woodland caribou +policeman bird +grey birch +American elm +strawflower +officiant +hart's-tongue +straight razor +Spanish elm +radicchio +white croaker +vicuna +soft-shell clam +flannel +adonis +bonito +kittiwake +English walnut +soldierfish +hipflask +spotted crake +Streptopelia turtur +American maidenhair fern +corn cockle +telephone cord +canopy +playback +diocesan +marsh orchid +manakin +purple grackle +cob +fishmonger +otoscope +vermillion flycatcher +inhaler +instar +licentiate +myrtle warbler +goat herder +benthos +toggle +drumhead +piranha +doorplate +vault +triptych +red-necked grebe +transporter +vernier caliper +flathead +Portuguese man-of-war +countrywoman +vacation home +Bactrian camel +night-light +module +lemon curd +carancha +painted daisy +bok choy +ratatouille +troll +escarpment +cinnabar +computerized axial tomography scanner +lychgate +sowbread +bedside +guided missile cruiser +reel +cleat +hemostat +blue shark +Seven Wonders of the Ancient World +motorized wheelchair +pillow block +horned puffin +prickly pear +electric range +mother's daughter +vein +Oregon maple +bird dog +faceplate +wren warbler +feather reed grass +common alder +Adam's needle +straitjacket +organ-grinder +gantry +bikini pants +peristyle +herpes +terry +toad lily +celandine +red-breasted sapsucker +bragger +green peafowl +fuschia +quoits +house martin +dome +herpes simplex 1 +touraco +meeting house +vacuum gauge +cat's-ear +crisphead lettuce +carpet moth +European rabbit +puff adder +Old World scops owl +fire pink +fruit punch +ant bear +black walnut +stroboscope +white mangrove +pine grosbeak +cast +check-in +ring-necked parakeet +matai +shingle oak +fieldwork +rue anemone +landing net +ouzo +herringbone +lyceum +hydrogen bomb +mullein pink +masher +evening grosbeak +water vole +livingstone daisy +tomatillo +cavalier hat +interphone +wild lupine +goosefish +sugar maple +plantain +white dead nettle +Monterey pine +bugle +veloute +marsh gentian +Bermuda buttercup +alehouse +Peter Pan +thong +LP +tulip tree +scanner +scarlet tanager +music hall +angel shark +pecan +eight ball +rosy boa +outboard motorboat +garage +fanlight +black cottonwood +notornis +mountain fern +lunar crater +reddish orange +whitetip shark +executant +European ladies' tresses +washboard +revolving door +case knife +balloonfish +greater kudu +tarpan +cog +wet fly +Irish soda bread +basement +broken arch +canopic jar +muscat +kazoo +bobsledding +loaner +black guillemot +English saddle +garlic mustard +Foucault pendulum +mulberry +clotted cream +dove's foot geranium +Atlantic ridley +convector +ground floor +European wildcat +poinsettia +hideaway +great barracuda +black beech +bushy aster +cornflower +tam +true slime mold +carving knife +holly fern +railroad tunnel +crimson clover +disposal +etamine +suspension +plasmodium +political scientist +minnow +Spanish rice +twist bit +subway train +Scleroderma citrinum +saw palmetto +console +gimlet +hand pump +waratah +rock rattlesnake +keel +server +curlew sandpiper +hone +sable antelope +inkle +photostat +foresail +sallet +tiger salamander +chutney +onlooker +Exmoor +tiramisu +drawing room +battery +sour orange +juniper berry +beeper +funeral home +fescue +Maksutov telescope +ranch house +jai alai +carob +socket +popcorn +sandbar shark +pipal +summer tanager +oast +skipjack +rolling stock +dropper +great snipe +turnip greens +cowpea +honeycomb +ichneumon fly +maternity hospital +harp seal +nylon +bomb shelter +horse tick +litchi +camel's hair +mimosa +bur oak +anvil +belay +pinhead +continental breakfast +burglar alarm +Mojave rattlesnake +auxiliary storage +lightwood +ratepayer +cecropia +retractor +quadrate +pepper tree +Venus' slipper +abattoir +strawflower +firewater +purple saxifrage +black rat +pack +pepper pot +mayweed +winger +whitetip shark +great yellow gentian +snowdrop anemone +garden angelica +soy sauce +white poplar +inkwell +crouton +gas gun +honey locust +house of cards +ice maker +moquette +arrack +casualty +butterfly orchid +eau de vie +mosquitofish +prairie smoke +haft +horseshoe +steel +peach orchard +Mexican hat +encaustic +shoe +pennywhistle +sweet woodruff +hull +doorsill +globe amaranth +day school +housedog +crown princess +oxbow +maxi +positron emission tomography scanner +compere +European turkey oak +peanut +sentry box +house physician +hot line +loquat +rove beetle +riband +flowering fern +fan vaulting +ceibo +bongo +bat boy +omelet pan +European ash +breadwinner +gaff topsail +clerestory +bushbuck +bluethroat +khukuri +Father +portcullis +candy egg +brake lining +lawn furniture +buckskin +garden pea +Brazilian rosewood +Italian bread +horn poppy +silk tree +Christmasberry +hotel-casino +poplin +false lupine +desert sunflower +mimeograph +alpenstock +cork tree +cultivar +common mosquito +pollard +black marlin +understudy +lancet window +college +breadfruit +Herero +Labourite +bar printer +squaw grass +stelis +firing chamber +sycamore +artificial horizon +radiologist +pansy orchid +bicycle pump +wraparound +bell gable +home computer +orchard grass +carving fork +bergamot +honeycreeper +sewing room +radiator +core +brown bat +goose grass +adjutant general +Erlenmeyer flask +massasauga +tail rotor +cardinal tetra +Drambuie +wine palm +Sarcoscypha coccinea +shantung +Calvados +garganey +vicar +house mouse +creeping oxalis +digital subscriber line +cedar elm +backgammon board +blackberry-lily +pallid bat +New Zealander +Barbadian +rose geranium +European spider crab +gharry +electric hammer +mustard +Chinese lantern +laundry cart +filament +mozzarella +gooseberry +sukiyaki +porkpie +culvert +altazimuth +plum pudding +serin +Spanish dagger +Asian crocodile +crevalle jack +mascara +pig bed +alderman +northern shrike +Sufi +purple-fringed orchid +derringer +linseed +hockey skate +bell jar +Japanese wistaria +mantled ground squirrel +western toad +lieutenant commander +mechanical piano +ovoid +paddlefish +demijohn +coast live oak +brick +gearset +tailstock +phonograph needle +winery +tuberose +mother's boy +shot tower +crucian carp +carpet pad +lamb's-quarter +Menorah +common white dogwood +hypanthium +rosebay +wild medlar +soil horizon +sweet orange +bitterroot +hand glass +cloisonne +towpath +gum ball +margay +carambola +bolt cutter +charger +vibraphone +gueridon +elephant tree +wood-frog +ash grey +duffel coat +third base +chunga +glebe house +lake trout +encephalartos +Japanese oak +northern red oak +pruner +blue orchid +Biloxi +western wood pewee +corselet +alabaster +anechoic chamber +grass pink +wax begonia +blue daisy +pennyroyal +Asian tiger mosquito +cheese souffle +flat bench +caramel +sump pump +bush violet +common fennel +corner +skullcap +asparagus fern +white mangrove +calceolaria +sateen +saltbox +hollowware +head nurse +coal miner +mountain lily +tufted vetch +European perch +line officer +steamer +stickball +shin guard +cauliflower +Monegasque +hatpin +wolffish +trackball +khaki +arthrogram +rocket larkspur +naval commander +Gemini +ski binding +department head +Chenin blanc +wingstem +knothole +aerides +sweet bay +tautog +gangway +waterspout +Hudsonian godwit +armyworm +incinerator +kidney vetch +pine nut +cypress vine +hip tile +sorrel tree +relay +bench press +Kentucky coffee tree +dobson +sapling +false lily of the valley +veld +phaius +vitamin B2 +beaker +wall tent +sieva bean +dusty miller +sewing kit +cavalry horse +diaper +butterfly pea +Spam +saddlebill +pearly everlasting +kowhai +Sister +moneywort +organdy +pine marten +bareboat +hot-water bottle +baby blue-eyes +silver lime +common cotton grass +malmsey +blue pea +baggage car +pineapple +folding saw +cotton rose +brawler +black duck +Weizenbock +pool player +Gujarati +wild duck +purple sage +sage grouse +mail train +arm guard +short-spurred fragrant orchid +queen +eparchy +spring peeper +ortolan +shoulder +fighter pilot +American beech +snowcap +novitiate +roller +butcherbird +canyon oak +brompton stock +firebrick +rudder +light cream +Primus stove +nonsmoker +probationer +harp +kosher +surcoat +videotape +zebu +first class +yam +car +rissole +miso +funambulism +attic +curling iron +shutter +encolure +split-pea soup +yellow rocket +gas oven +ultracentrifuge +chamomile +canteen +eyeliner +yellow squash +Irish stew +collar +doublet +machinist +septic tank +snap bean +Polyporus squamosus +western tanager +creeping St John's wort +back +sinkhole +perforation +Romanian +epergne +fez +comfrey +sidecar +beach pea +screen door +instigator +plughole +woodbine +pigweed +hip pocket +common scoter +squeegee +Surinam cherry +porringer +body stocking +eatage +shallot +enlarger +common canary +trophy case +gun case +plow horse +hot plate +pearl oyster +margarita +madras +backspace key +pigeon guillemot +pajama +buckthorn berry +homestead +bedbug +Linotype +trundle bed +granadilla +theremin +chin rest +bouillabaisse +tumble-dryer +truffle +cassava +kurrajong +gyroscope +European silver fir +C-clamp +politician +green soybean +exponent +flame tree +scissortail +achimenes +crown daisy +soft tree fern +spaghetti squash +pale violet +beaver +dashiki +washboard +driving wheel +sack +foulard +sputnik +boatbill +English elm +sack coat +grog +golliwog +Malayan tapir +May wine +calash +stile +windjammer +American sycamore +rotor head +fast food +balata +dragonet +Emmenthal +metronome +negative +meadow saxifrage +rabbit ears +chenille +round +hobby +crankshaft +Wilson's phalarope +Murphy bed +soil pipe +forecourt +policyholder +tarmacadam +loyalist +gyro +Queen's crape myrtle +shortcake +apple butter +pumpkinseed +heronry +yellow perch +baggage claim +escarpment +diaphragm +mescal bean +shunter +flax +columbarium +Joe-Pye weed +Neandertal man +casement +hole-in-the-wall +Verdicchio +futurist +eaglet +tassel hyacinth +pup tent +fawn lily +cabbage palm +pogonia +hospital ship +water mill +Oregon grape +lentil +grindstone +banana split +inkberry +coonskin cap +bazooka +wrap +anise hyssop +Java sparrow +red-eyed vireo +common opossum +clintonia +bustle +booster +tribesman +soy +panhandle +jaboticaba +locking pliers +Sauvignon grape +ghat +screw +oximeter +white croaker +saucepot +eggbeater +reticule +cabbage bark +looking-glass plant +head gasket +California sycamore +cowbell +Aleuria aurantia +Herr +lever +spider orchid +cashew +shift key +solar house +wood chisel +white +mantilla +stamp +bolero +rear admiral +garden rake +Lao +crowbar +lapdog +buttermilk biscuit +yellow bedstraw +pickerel frog +dowel +serjeant-at-law +mill-hand +lambrequin +state treasurer +red silk-cotton tree +coiffeur +star anise +shoulder pad +marshal +sitar player +gown +ground cedar +hedge maple +caddie +pitahaya +corn marigold +stick cinnamon +woodland star +Eurasian green toad +anti +blueweed +medicinal leech +gaur +chocolate kiss +kit fox +mother +butte +audio CD +blast furnace +vitamin D +nutgrass +cornice +black sheep +hearing aid +lingonberry +quad +lentil +riding crop +pratincole +pentagon +sea lavender +nerita +flatmate +catboat +water clover +angiopteris +mushy peas +crown imperial +music school +woodshed +platy +Turk's-cap +rundle +reading teacher +hardtack +balloon sail +oriental spruce +bluefish +white mulberry +horned violet +satin bowerbird +treasure flower +sustaining pedal +mimosa +spurge nettle +sea green +hasp +lederhosen +pink cockatoo +long johns +basket weave +freewheel +thrust bearing +timber tree +orphan +falafel +common camas +bird of passage +bird's foot trefoil +electric eel +fizz +grape arbor +serape +brace +hazelnut +kylix +horse mackerel +cassia bark +lizard orchid +spat +Brown Swiss +pocket flap +pillory +purplish blue +rolling mill +tappet +broccoli rabe +semi-detached house +mushroom coral +fly orchid +nougat bar +ball hawk +sand wedge +shirred egg +black locust +strip lighting +drop scone +brush turkey +ball +tragopan +dallisgrass +tuatara +great knapweed +potentiometer +Kiliwa +Pacific bottlenose dolphin +accelerator +Darwin tulip +osteopath +Arizona cypress +manna ash +butterbur +cornelian cherry +American holly +nopal +tanker +foreshore +ditty bag +gas lamp +safety razor +chanter +fomite +chip +striped killifish +catalytic converter +plaice +dusty miller +takin +gerenuk +corn chamomile +Japanese pagoda tree +boneset +common osier +Guinean +taro +plotter +celandine poppy +churn +steenbok +edible mussel +sensitive fern +triode +black raspberry +zoo keeper +feather ball +dredger +starlet +cornpone +coat button +rosinweed +toy Manchester +crested cariama +finger food +basilisk +shotgun shell +comfort food +mountain hemlock +candytuft +Stilton +record changer +anklet +ball valve +Mediterranean snapdragon +BVD +sand cat +Galloway +nutmeg +water-mint +woodwaxen +citron +ark shell +federalist +drone +cheekpiece +hyperbaric chamber +addax +field-emission microscope +synchronous converter +men's room +medlar +electronic fetal monitor +Sazerac +false indigo +roof +passe-partout +meadow spittlebug +Phytophthora infestans +oast house +hedge nettle +voting booth +slender salamander +telephone jack +true bug +scouring rush +Scotch egg +matchbook +aperea +cytomegalovirus +garlic press +cove +whitebark pine +Slovene +narrow wale +mother's milk +Audubon's warbler +prickly poppy +cowl +tailorbird +mud brick +bamboo palm +welt +Afghan +Virginia spring beauty +dinner bell +night jasmine +fly rod +microtome +aerie +carinate +picker +brick trowel +loving cup +swathe +green mayonnaise +rivet +bandbox +newsroom +tea tortrix +bobby +gig +hush puppy +garlic chive +piston rod +aspidistra +bluejack oak +harvest-lice +strap hinge +sour mash +macadamia nut +histiocyte +fan belt +shelf bracket +abelia +Hottentot fig +fish chowder +abettor +compote +beige +dioon +hop +haymaker +oilskin +magnetometer +tool bag +tambour +call girl +gringo +fairy light +broad-leaved plantain +second base +zebra mussel +Japanese cedar +pistia +swamp chestnut oak +cashmere +double cream +samisen +lamb curry +companion +kapok +julep +sweet woodruff +gardener +jewfish +inspector general +collembolan +wheel bug +bass +scrubland +wryneck +macrozamia +trouser press +clove +tiger cowrie +yawl +collard +dildo +pony cart +ormer +annual +tessera +chancellery +two-toed sloth +queen +old lady +wringer +spritzer +baggage +black mangrove +black-eyed Susan +semifinalist +highlighter +alfalfa +Easter daisy +escapement +operating table +neutral spirits +bursar +roble +entablature +girl wonder +farm boy +ring ouzel +permanent press +auklet +beefsteak tomato +gaming table +tea bag +manul +giant bamboo +Ozark chinkapin +matzo +furrow +smoothhound +CD-ROM drive +powdery mildew +copilot +garden +American merganser +bunsen burner +Asian longhorned beetle +lead tree +creeping buttercup +Percheron +back brace +axseed +cub +soul food +rabbi +edelweiss +mineshaft +fox grape +sandwort +torque wrench +leisure wear +Mae West +broccoli +loach +maraschino +heavy cream +silkworm +cirque +vintner +whitewash +butterfly pea +two-toed sloth +midiron +ceriman +Bulgarian +operating microscope +sambuca +California fuchsia +silver maple +tangelo +black bean +lugsail +starting gate +leek +sunflower seed +fish fry +clinker +synagogue +coscoroba +brae +uphill +common limpet +golden plover +cedar of Lebanon +amphibian +Canary wine +taipan +agua +feeder +parallel +mater +pink calla +meat counter +yagi +crab cactus +cacao bean +bowfin +alley cat +stonefly +Eastern cottonwood +vernier scale +marginal wood fern +dancing-master +detective +yam +textile screw pine +hooch +spinet +single prop +sassafras +goose barnacle +triple cream +China tree +peeper +dressmaker +snatch block +ironmongery +dressing case +creeping bellflower +silver sage +honeydew +eastern red-backed salamander +peg +nombril +danish +mashie +anarchist +alligator snapping turtle +shepherd +American white pine +runner +chalice vine +rheumatologist +defibrillator +yellow chamomile +lemon balm +peacekeeper +native beech +sandwich board +Bavarian +titrator +paneling +deer mouse +poteen +sugar snap pea +meadow salsify +town crier +best +basinet +common myrtle +night lizard +cushaw +Tampax +camphor tree +gentile +orange peel +putty knife +pyromaniac +Brummie +fever tree +double +nest +inferior +cabbage tree +graduated cylinder +mucor +woodborer +earthwork +potato salad +four-hitter +gooseberry +water vole +ziggurat +grapefruit juice +four-in-hand +cranberry bush +diode +videotape +Mohican +niacin +beetroot +shirtsleeve +cork tree +two-eyed violet +white ash +drawing chalk +baked Alaska +bone-ash cup +toastrack +diastema +bed jacket +dwarf astilbe +yellow honeysuckle +cow pasture +sheet pile +saxhorn +upholstery material +California white oak +Spanish bayonet +horsemint +littleneck +deflector +magician +standard transmission +blue marlin +shallot +feijoa +collar +board +jump suit +common staghorn fern +priory +Xhosa +Loranthaceae +barbecued wing +barmaid +spit +lemon juice +umbrella plant +field pennycress +centenarian +queen bee +fish stick +black bread +dirk +secularist +German American +spotted weakfish +iron foundry +speed bump +yellow-fever mosquito +gag +frame +black-eyed pea +alcoholic +involucre +sperm whale +balanced diet +wax bean +butcher's broom +winter heath +Mainer +Australian pine +gas guzzler +double-breasted jacket +pod +palo verde +trimmer +wattmeter +dyer's woad +crotalaria +vine maple +sulky +jack pine +thumb +Wilton +Panchen Lama +welder +badminton court +business editor +Arabian coffee +Kamchatkan sea eagle +foamflower +steep +plane +freckle +cerebral cortex +Vouvray +tea +forest tent caterpillar +neckerchief +accelerator +jig +bridal wreath +highball glass +New England clam chowder +beach strawberry +call waiting +baton twirler +double boiler +Dutch elm +car bomb +filmy fern +breviary +Florida gallinule +dace +parsnip +riparian forest +crescent +earplug +grab bar +cusk +foglamp +screwtop +black mangrove +mascot +Welsh poppy +gas holder +support hose +salsify +red beech +Indian python +caroler +pineapple juice +lowboy +terra sigillata +black olive +hypodermic needle +radio-phonograph +moussaka +miter joint +creche +tuning fork +black wattle +affiliate +vertical tail +kiwi +red morning-glory +piping crow +runway +Kashmiri +studio apartment +sea feather +Judas tree +boatbuilder +corn earworm +fallboard +Victrola +lechwe +goat willow +turret clock +Canada anemone +leaf lettuce +savoy cabbage +headpiece +Lebanese +fothergilla +hemlock +toolshed +silver tree +blue-headed vireo +weatherman +cylinder +caltrop +adjutant bird +driving iron +millet +European woolly thistle +rose apple +clown +schoolfriend +eastern coral snake +barbecue +executive vice president +long-billed marsh wren +brittle bladder fern +tank destroyer +left-hander +matting +catchment +balsa raft +eastern fence lizard +color tube +corncrib +electric typewriter +westland pine +elder statesman +whey +plonk +mound +cittern +nest egg +copyholder +China aster +basking shark +gavial +common duckweed +vanilla orchid +red-shafted flicker +granadilla +sylph +sty +vest pocket +potherb +little brown bat +Trapezium +ordinary +adult +purple-fringed orchid +abseiler +disco +metal detector +beefsteak fungus +ilang-ilang +barley grass +hawser +suture +brake shoe +staghorn coral +barbecue sauce +Browning machine gun +sarcophagus +disa +oven thermometer +rosemary +track +gorget +quince +royal +piston ring +teak +pin cherry +Komi +walking fern +sloe +synchronous motor +fire-bellied toad +Teleprompter +co-star +cape gooseberry +oscillograph +bass clarinet +cock of the rock +Tyke +showy milkweed +safety valve +branch water +sweet marjoram +hugger +crampon +fairy godmother +band-tailed pigeon +snow-on-the-mountain +minibar +foreland +grosgrain +dita +rampion +calligrapher +jointed charlock +master +sheepshead +barrelhouse +Carolina allspice +mastic +brake pad +whiskey sour +casement window +conveyer belt +stolon +pavonia +shinny +witch elm +logwood +hostel +pageboy +vesper sparrow +pyrrhuloxia +common carline thistle +wafer +boysenberry +screw augur +hack +American white oak +governor general +Mother Hubbard +game fowl +drosophila +delft +nymphet +tollbooth +chough +Russian dressing +plum tomato +American saddle horse +dusky salamander +black medick +red valerian +cordage +Elastoplast +conacaste +backlighting +swell +riveting machine +cowpen daisy +openbill +water speedwell +picture hat +crested myna +servo +bletia +garden trowel +muscadine +common caper +false lily of the valley +aralia +sharp-tailed grouse +cigar smoker +bandoneon +Chinese alligator +crazy +point lace +charcoal +Texas horned lizard +marinara +backstay +Gatling gun +piston +game fish +fall armyworm +grammarian +beer hall +guadalupe fur seal +sugar palm +peanut +velvet ant +light machine gun +rya +cling film +adobo +myrtle oak +angelica +balsam apple +windbreak +brother-in-law +snap brim +automobile factory +clavichord +dusky shark +edible banana +altar boy +California lady's slipper +schoolbag +wax bean +Atlantic walrus +bullpen +straw wine +thatch palm +potluck +tamarind +charcuterie +sod house +tie rack +liebfraumilch +clinician +scarlet lychnis +Spanish iris +bread knife +water oak +bedpan +Angolan +bassarisk +Alaska fur seal +African wild ass +milk float +froghopper +Verpa bohemica +water cooler +chop suey +ranker +red helleborine +Prince of Wales +marmalade tree +car train +giant red paintbrush +desert sand verbena +right whale +baron +stevia +asterism +five-spot +catapult +Silex +fiberscope +refresher +beef Bourguignonne +snood +divot +waterproof +crabeater seal +Missouri primrose +bumper guard +rock opera +Lilo +coffee can +smokehouse +buffalo grass +propjet +ice tongs +poop deck +acorn barnacle +veal parmesan +shower room +collins +ringhals +silage +jawfish +trouser cuff +contour feather +songstress +rachis +White Russian +stanchion +mastaba +flatbed press +viand +legal representative +espalier +organic light-emitting diode +sushi +scorer +haricot +pinna +plectranthus +jungle cat +dried apricot +coach horse +white fringed orchis +veal cordon bleu +bath +dallier +marching order +donkey jacket +Panama tree +aerator +klaxon +pinnacle +shouldered arch +lesser celandine +common eland +Grand Marnier +cock of the rock +phlomis +Japanese umbrella pine +morning room +dead-man's-fingers +little auk +bascule +house paint +home fries +great skua +cesspool +flying gurnard +wild crab +checkerbloom +Wollemi pine +cheese dip +coif +charwoman +tea ball +waif +Arctic ground squirrel +parishioner +stabilizer bar +potentiometer +black cohosh +medlar +willow oak +cascara buckthorn +scoutmaster +Canada lily +poppy seed +paper mulberry +blackthorn +garrison cap +inductee +aeschynanthus +interior live oak +black spleenwort +wild service tree +sling +nicad +swab +sego lily +eiderdown +fruit cocktail +pallasite +weeping spruce +shiv +sea lamprey +coachman +half binding +American white birch +gainer +Concord grape +yellow birch +fucus +common room +io moth +red osier +crucible +galangal +salmagundi +pepper steak +cap opener +swizzle stick +tomato juice +Nobelist +Sarawakian +African monitor +sleeping beauty +stereoscope +curd +pyramid bugle +applejack +dosser +rake handle +pilot light +Eames chair +Scotch and soda +bell heather +dinette +blackpoll +dogie +sound camera +cattle guard +mashie niblick +edible cockle +monocle +steak tartare +partaker +sidesaddle +communications satellite +porkfish +water hemlock +drawbar +ultramicroscope +Jamaican cherry +craftsman +lovage +common apricot +drum majorette +backsword +smooth alder +Amniota +dribbler +theosophist +dolman +ivory tree +Green Beret +pipe smoker +mayoress +mignonette +crampon +henbane +kirtle +death's-head moth +instep +great St John's wort +lorry +black-necked cobra +ball carrier +Jordan almond +byway +earless lizard +marble +andiron +high-protein diet +buzzer +ice floe +crankcase +Bofors gun +sockeye +veery +Delaware +caravansary +prairie coneflower +star apple +suiting +cot +call forwarding +American gallinule +glossy snake +rose chafer +instant coffee +placket +Tarahumara +pulsar +philodendron +orange tortrix +cypress spurge +Welsh rarebit +music box +giant crab +vanilla bean +water thrush +prayer shawl +gouge +promoter +dagga +black currant +bitter cassava +drain basket +snare +digital audiotape +retainer +olive drab +gluten bread +graham cracker +cheddar pink +caregiver +spray paint +Anglo-American +boatyard +backbencher +Link trainer +bell arch +weir +arbor +millionairess +sour cream +earthtongue +crawlspace +crossjack +balalaika +crupper +western redbud +guinea hen +rangeland +gaboon viper +common louse +single-leaf +horseshoe +balsam poplar +triskelion +jack-in-the-box +jester +rain stick +glove compartment +imperial moth +Japanese beech +biotin +turnip +oligarch +western skink +mudguard +retsina +data system +green bristlegrass +visiting professor +beaded lizard +weathercock +Sloppy Joe +high tea +lightweight +record sleeve +cooler +nodding onion +pigs in blankets +torque converter +district attorney +bunting +orrery +radiator hose +common plum +wood spurge +calamus +chicken Kiev +pin +lath +telephone bell +thistledown +audiotape +gypsy moth +snuffer +pari-mutuel machine +peanut butter +hearthrug +sack +Old World yew +chives +stovepipe +xenolith +mattock +mangle +electric chair +backup system +Empire +blackwash +dodder +Allegheny chinkapin +finger plate +junk +brown rice +wild angelica +chinaberry +mason +rasp +den +violet wood sorrel +nosewheel +plenum +merino +kirtle +Igbo +ensign +sex symbol +Belgian endive +sugarberry +yellow salsify +purple emperor +atlas +African clawed frog +leatherjacket +midwife +sac fungus +European cuckoo +three-day event +Mexican poppy +wagon tire +armyworm +rain gauge +Oregon ash +columbarium +spectrophotometer +Milanese +pointing trowel +casualty +Eastern hop hornbeam +lobe +mouthpiece +au pair girl +giant water bug +Browning automatic rifle +laser-guided bomb +drone +white alder +cockleshell +mufti +gravy +berm +boat hook +marshmallow +pet shop +cowpea +tactician +wading pool +anchovy dressing +flip +shackle +Wedgwood +thick-billed murre +erecting prism +giant salamander +sleeper +quiver +chain store +wing tip +New World tapir +witches' butter +gendarme +ginseng +common maidenhair +graduate nurse +balsam pear +hoatzin +philanthropist +axle bar +gas meter +moth mullein +ragbag +Chinese cabbage +celery stick +rutabaga +scalpel +cape marigold +variometer +argali +brig +shuffleboard +wort +Orlon +epiphyllum +allice shad +coffee filter +solar telescope +Japanese linden +thinning shears +golden wattle +queen triggerfish +millinery +surfbird +flame fish +clove +dicamptodon +red-bellied terrapin +turmeric +baya +air horn +Indian coral tree +punnet +sharkskin +water crowfoot +bight +desert iguana +Texas toad +volva +dredge +Turkey red +chemical plant +gemma +dice cup +orange marmalade +mistletoe +surveyor +frozen orange juice +pallette +poultryman +burbot +courlan +captain +saddlery +bodyguard +dwarf tulip +black ash +pulse +nailbrush +tickseed sunflower +legless lizard +shirtwaist +polling booth +chickeree +garlic chive +common thyme +multichannel recorder +screw thread +sangoma +calliopsis +geoduck +colleen +bandicoot rat +pastis +swamp sunflower +scorekeeper +Honduras mahogany +Australian pitcher plant +triangle +elevator shaft +green pea soup +carrel +prairie aster +bird's-nest fungus +scarlet clematis +gook +mescal button +carcase +mulatto +ejection seat +strawberry daiquiri +goat grass +car battery +babu +chief of staff +monilia +Siberian crab +ridge rope +Morchella semilibera +nutmeg +moosewood +graham bread +California four o'clock +zwieback +velvetleaf +abelmosk +shadow box +corned beef hash +newsreader +backstairs +cutwork +sherbert +tooth fungus +angel-wing begonia +greasepaint +common milkwort +potato vine +CD drive +crepe de Chine +sporting man +koto +armet +barking frog +celeriac +drainage ditch +black box +steel blue +clotheshorse +corn speedwell +drawknife +spritsail +vichyssoise +modeler +pocketcomb +limey +suslik +cockpit +digester +brig +raita +troll +benedictine +rock wren +lock +Barnaby's thistle +school bell +school ship +Soave +falchion +swaddling clothes +terrine +smoke screen +rivulus +sweet lemon +cullis +bustier +peppermint +Philadelphia fleabane +Hampshire +active +charnel house +face guard +Quebecois +facilitator +tongue depressor +bitternut +heath aster +sapodilla +bluestem +centrist +Canterbury bell +needlenose pliers +groats +tapa +Qatari +paper feed +tilt-top table +plastering trowel +brazil nut +rotogravure +patriot +manicurist +bacon and eggs +puffbird +lightweight +golden willow +kaiser roll +duff +girandole +seaside daisy +Kurdistan +Skivvies +showboat +fire bell +lock-gate +greater masterwort +weald +ice ax +toetoe +mess kit +bucking bronco +black turnstone +backscratcher +backpacker +basement +marbleization +trigger +satsuma +fall-blooming hydrangea +mountain lady's slipper +yellow oleander +crookneck +ex-president +Venn diagram +psaltery +bulwark +old boy +linear leaf +aril +butt weld +fall webworm +pruner +bald-faced hornet +nougat +tailgate +field speedwell +potsherd +center punch +long beech fern +desert paintbrush +canyon treefrog +bushel basket +Eurasian +swamp horsetail +cryptanalyst +wicket +school newspaper +captive +spider brake +electric mixer +tumbleweed +mason wasp +sash window +paddock +wet bar +oxtongue +stevia +wheat rust +scute +switch engine +mud dauber +dotterel +snailflower +common barberry +mulligatawny +cinnamon bark +cigar box +trivet +proof spirit +cream soda +western grey squirrel +baby powder +Bren +Japanese yew +sailcloth +Basket Maker +bannock +basidiocarp +aphelion +erect bugle +limiter +bosc +Przewalski's horse +helmet orchid +audiometer +battle cruiser +grass widower +staphylococcus +Congolese +common pitcher plant +parliamentary agent +Virginia snakeroot +mockernut +Siberian elm +backbench +rough +chervil +chlamys +nationalist +galantine +screwdriver +falsifier +cancerweed +spur +jerkin +porte-cochere +dill pickle +Montagu's harrier +tetrode +true fungus +American quaking aspen +vitamin B1 +leopard lily +eggdrop soup +aurochs +core bit +Jaws of Life +trousseau +parquetry +Disciotis venosa +tender +beef goulash +vitamin K1 +pepper spray +covered smut +hook +sports announcer +weapons carrier +foxtail grass +sloe gin +mezereon +antifouling paint +pavior +pile driver +security consultant +monkey-wrench +Indian hemp +amaretto +American wistaria +A-line +market strategist +rainbow runner +souvlaki +binturong +stiletto +gastrula +Vietnamese +Old World hop hornbeam +cold cathode +pier table +houndstooth check +prop root +leaf-footed bug +sedge wren +Dutch iris +drop curtain +opossum rat +lame +pollen tube +doubletree +compression bandage +pinon pine +catmint +pier arch +kingmaker +deanery +loofah +fullback +fencing mask +flying boat +carpet sweeper +lemon-scented gum +Accipitriformes +kit +pigfish +clipper +dolmas +lesser centaury +blood agar +water violet +raw milk +lemonade +vicar-general +supply closet +Anzac +confectioner +ignition key +velvet grass +white willow +John Dory +ruddiness +wheel +common horsetail +hubbard squash +speculum +Spanish bayonet +mountain mint +glint +foxhole +housemate +bootjack +sleigh bell +clog dancer +Mexican mint +rendering +Hausa +star saxifrage +spring squill +clothesbrush +liquid metal reactor +Columbia tiger lily +sorrel +cartwheel +Jersey +Caucasian walnut +desert willow +surveyor +elbow +Santa Gertrudis +fringe bush +industry analyst +lyrebird +Cortland +arroz con pollo +catechist +tank top +jew's harp +cereal oat +heartleaf +short sleeve +butty +butterfly plant +stud finder +felloe +beer garden +clevis +wood warbler +demerara +cornetfish +mince +Jamaica rum +Spanish broom +binnacle +camise +ferrule +Copt +hall +minicar +scimitar +cryptogam +miter box +limestone fern +Marsala +Parliamentarian +gravy +woolly bear moth +formula +squash bug +pigmentation +plate +skin graft +radiotelegraph +hellbender +soft pedal +lavender cotton +propagator +Bailey bridge +cottage pie +rotgut +A battery +pintle +off-line equipment +European swift +shrimp butter +plumb bob +trunk lid +succotash +yellow cypress +heartleaf +antelope squirrel +sambar +maternity ward +deciduous plant +bartlett +Riesling +sour cherry +Klansman +poke +academician +sociolinguist +bird's nest fern +common privet +scale fern +tachograph +oyster stuffing +pusher +green June beetle +staghorn sumac +lockage +master +bap +harlequin +blackfly +spotted coral root +kahikatea +cabana +riot gun +apple mint +kob +praline +confidant +pahautea +float +city father +Zen Buddhist +pessimist +conference center +banksia rose +comfit +sweet cicely +winged bean +henroost +myope +bunt +nailfile +yellow mountain saxifrage +cruise control +abandoned ship +water chinquapin +spanker +wing nut +puccoon +pier glass +Atlantic sailfish +medlar +buttercrunch +rough-skinned newt +planter's punch +Dutch iris +control key +committeewoman +torpedo-boat destroyer +garambulla +tree heath +gladiator +September elm +inclinometer +snowbell +call-in +sunsuit +microfiche +bluestocking +cheval glass +server +franking machine +sugar syrup +Macoun +transport ship +alderfly +wash-and-wear +Abbe condenser +bush nasturtium +wild leek +canary seed +Northern Baptist +sweet wormwood +jaboticaba +cardroom +autoradiograph +ash-pan +sprinkler system +rattrap +claymore +parts bin +forest red gum +thermonuclear reactor +Indian crocus +lector +heir apparent +leafy spurge +masquerader +varicella zoster virus +cucumber tree +hedger +Shumard oak +zooplankton +quartermaster +arrester +bridge +hop clover +meadow foxtail +winter hazel +portable circular saw +penuche +limpa +blue toadflax +mesophyte +Alpine anemone +pet sitter +avocado +streptococcus +fiber optic cable +river red gum +hornist +chicken taco +red spider +tape grass +densitometer +salmonberry +tiger snake +hot toddy +silver fern +candlenut +buckram +local call +defoliator +king +mahoe +lever lock +social insect +winter purslane +bootblack +fireball +ramie +bellbird +prepuce +capote +Chinese forget-me-not +Pisces +costume +California black oak +tree lupine +golden polypody +liger +California whipsnake +urodele +sapodilla +skillet bread +duckpin +supremo +asparagus bean +kampong +endameba +cow pony +rider +motherwort +Persian iris +soursop +kohlrabi +Parisienne +irons +doubles +feijoa +farmplace +cottage cheese +bezoar goat +subcontractor +blunderbuss +down +purple martin +Lapp +crenate leaf +tobacco pouch +beach towel +Santa Lucia fir +monetarist +stringer +ocellated turkey +Texas purple spike +ackee +caddy +hedge mustard +second-rater +strawberry bush +valedictorian +steak sauce +prairie gourd +aspirant +mint +Valenciennes +vodka martini +American persimmon +big brown bat +Mycenaen +mouthpiece +norfolk island pine +pennyroyal +Jewish rye bread +granadilla +tract house +wall +shuttle helicopter +blackjack oak +Lippizan +storm window +white zinnia +sickle +sushi bar +polish +baldric +brooklime +church hat +control circuit +vicuna +death adder +eukaryote +durmast +field soybean +jacket potato +wild basil +queen consort +brooklime +octant +blue false indigo +broccoli raab +step-down transformer +date bread +blue ash +duffer +oak chestnut +pennant +wedge +Florentine iris +morion +weakfish +morning dress +public address system +spearmint +Ashkenazi +sow +interpreter +Metis +pita +iron lung +parfait glass +cylinder lock +immortelle +obstetrical toad +tee hinge +successor +western +working girl +julienne +AND circuit +spaghetti junction +fer-de-lance +enlisted woman +star +lightning rod +bilge pump +pacer +horse nettle +African oil palm +blastocyst +air hammer +bamboo fern +remote terminal +lambkin +money cowrie +Pelham +clinical thermometer +wiggler +guru +false indigo +tea bag +foredeck +king +baby shoe +mule +grab bag +silver-bell tree +knitting machine +cobia +roulette ball +larder +button pink +rumble seat +noria +queen mother +solar thermal system +aquaplane +highbrow +rusty blackbird +desktop +lima bean +pontoon bridge +watercress +wild cabbage +tumbleweed +dressing sack +compact-disk burner +spittoon +marrow +sporophyte +second fiddle +pot-au-feu +specialty store +dry +mole +khadi +japonica +lovage +squamous cell +lobe +European creeper +brown pine +bladderpod +rumble +French Canadian +mascarpone +Pacific halibut +perennial ryegrass +wine lover +turbot +longwool +silver tree fern +dust cover +synchromesh +corn pudding +alpine azalea +garboard +cane sugar +observation dome +condensation pump +hind +taximeter +hand drill +gas thermometer +jammer +buffing wheel +handstamp +prairie mallow +turkey stew +sun spurge +duck pate +kibble +Cassin's kingbird +apadana +Devon +grinner +oocyte +blank +header +schoolmaster +guard ship +intravenous pyelogram +rimu +luff +Mediterranean fruit fly +singlestick +lady-in-waiting +curb +birch +limekiln +orthoscope +serotine +Spanish oak +swamp cottonwood +edger +city man +picnicker +white basswood +Parsons table +Christmas begonia +perspirer +Pacific tree toad +Cape tulip +finger bowl +blue pike +greengage +handcar +milkweed +potbelly +river dolphin +creel +typewriter carriage +banteng +pawnbroker's shop +huon pine +biennial +man of action +foundress +caveman +featheredge +jordan almond +sandblaster +coralberry +low-calorie diet +hoot owl +garter +bain-marie +wrecker +fenugreek +double-hung window +idol +scullery +balloon vine +summer savory +winged spindle tree +Helvella crispa +walrus mustache +gas engine +boulle +rush grass +rue +hoe handle +cat fancier +deerstalker +dunker +American red plum +fall dandelion +groover +sprag +stair-rod +wish-wash +pricket +architrave +California laurel +net melon +Arizona sycamore +executive secretary +silverweed +silky cornel +surface ship +square sail +common purslane +villa +holly-leaved cherry +sweet birch +pecan +artillery shell +breast pocket +pirogi +scarlet runner +rabbit brush +mealworm +leather carp +palette knife +Jerusalem sage +boneshaker +slit lamp +digital voltmeter +polar glacier +square-rigger +homogenized milk +Sten gun +lesser calamint +pyrograph +Korean lawn grass +Zinfandel +crepe fern +western ragweed +clasp knife +distributor housing +cartouche +scooter +ski parka +jackknife +Carolina spring beauty +soft diet +candlesnuffer +horse trader +step stool +agouti +accelerometer +annual fern +judge advocate +angelica +roll film +treehopper +ombu +comer +sultanate +kitchen help +hooded ladies' tresses +milking machine +knuckle joint +Jamaica honeysuckle +music teacher +sauerkraut +Weston cell +slivovitz +Worcester sauce +tall bellflower +chancery +prophetess +casquet +shortfin mako +sorus +visual display unit +asp +grenadier +black pepper +crottle +erasable programmable read-only memory +jabot +ratchet +disk controller +chief petty officer +tap wrench +white mountain ash +cultivated rice +flying phalanger +skillet corn bread +BB gun +Elamite +European red elder +reed rhapis +ciderpress +inga +torpedo +wild teasel +bean curd +oeil de boeuf +acuminate leaf +bitter lemon +hitchrack +Lorraine cross +hostess +European dogtooth +adz +polonaise +rock sandwort +Waldorf salad +myrmecophile +klystron +mole rat +draba +corn borer +robusta coffee +chub mackerel +leatherleaf +chronometer +Moselle +sea aster +fennel +slop basin +constable +Brunswick stew +hydraulic pump +French omelet +icebreaker +Manx shearwater +press of sail +ninepin +blue succory +bootstrap +hallstand +chit +firefly +bearded seal +fuel filter +jezebel +mate +Roquefort +cheesecloth +plasterer +blue pimpernel +lake dwelling +shrink-wrap +goat cheese +common gum cistus +coastland +Sunday best +wild tobacco +mandrake +common unicorn plant +barbican +culotte +blockhouse +German iris +tarragon +caramel +wild rosemary +grain +voyager +squirting cucumber +eastern narrow-mouthed toad +creeping fern +luge +saffron +garland flower +furnace room +starship +Oriental scops owl +Italian honeysuckle +berserker +Chinese elm +scrubber +bishop pine +French polish +compromiser +skimmer +river shad +lobster thermidor +leadwort +man-of-the-earth +razorblade +vicegerent +empress +link +ham and eggs +wild lily of the valley +blackfish +splicer +fossa +mara +moneygrubber +brachiopod +fauteuil +caldera +finish coat +croupier +termer +leopard's-bane +sei whale +molucca balm +dolly +dog food +term infant +soft roll +episcia +sewer +inquiry agent +active citizen +perry +California newt +moon shell +bladderwrack +common shrew +dill +Dutch elm fungus +key lime +electrometer +divorce lawyer +lamb's-quarters +apple turnover +shipmate +Guernsey +legionnaire +electric blanket +Rocky mountain pinon +tobacco mildew +stinking iris +forestiera +departure lounge +wiper motor +jurist +scarlet runner +pallbearer +batter's box +inertial guidance system +fines herbes +oilcan +sisal +mustache cup +steamed pudding +Visayan +fiesta flower +lady tulip +lungless salamander +batiste +electrical system +blazing star +car carrier +Walloon +mother hen +stump +mulled cider +secondary coil +Alexandria senna +etui +scrumpy +Havasupai +jawbreaker +glume +ex-husband +Eskimo +Joint Direct Attack Munition +number theorist +five-hitter +pinstripe +Olympian +common mackerel +stone bass +bigos +Bahraini +airbrush +great ragweed +glass lizard +hand fern +roundel +riding master +shoetree +yellow avens +old fashioned +dolman +stinger +nursling +legate +faille +golden fern +bedpost +shop steward +kidney bean +bladderwort +internist +limeade +Bruneian +Coloradan +playsuit +wintergreen oil +Cantabrigian +mutton snapper +shot putter +hand grenade +moccasin +cobnut +marrow +separatist +cockscomb +discharge pipe +Gabonese +spade bit +chicken cordon bleu +varnish tree +European wood mouse +striped gentian +Ayrshire +curassow +moo goo gai pan +malarial mosquito +glow tube +ledger board +bib-and-tucker +European chestnut +suffragette +color wash +gaffsail +golden larch +voting machine +Kahlua +lungi +amusement arcade +Uzbek +butternut +mold +mule's ears +dickey +shrimper +trophozoite +dreadnought +shepherd's purse +greenhouse whitefly +spotted gum +copperware +perfect game +semigloss +spawn +telecom hotel +stakeholder +mason wasp +flibbertigibbet +chin strap +fringed pink +saki +urchin +memorizer +roulade +whiting +cling +corncrake +Queen of England +choo-choo +empty +heating pad +playmate +visualizer +popcorn ball +absconder +sou'wester +target acquisition system +mock-up +dental floss +tray cloth +haddock +bulblet fern +housing commissioner +delayed action +anchor light +harbor porpoise +water wings +PT boat +night latch +fennel +doorframe +green-tailed towhee +grey polypody +torture chamber +American germander +Chinese wistaria +cattalo +accompanist +rifleman +alpine clover +contrarian +lemon peel +Mexican cypress +sprog +dado +Galilean telescope +desmid +lockup +Latin +American raspberry +mescal +butternut +prairie orchid +downy yellow violet +green hellebore +radio compass +bread and butter pickle +Cherokee rose +knish +destroyer escort +Arkansan +langlaufer +pyxis +winter savory +velocipede +motley +winter savory +law student +barren ground caribou +apple dumpling +field hospital +works +city editor +European flatfish +Morchella crassipes +life office +boot camp +cream sauce +cape aloe +acetate disk +devil ray +tile cutter +Plymouth Rock +microspore +godown +Syrian +tiercel +American cranberry +lesser spearwort +anopheline +Spanish oyster plant +wire cloth +attic fan +birch beer +small computer system interface +crook +ribbon fern +explorer's gentian +nagami +I-beam +rosebud cherry +Jerusalem artichoke +Stillson wrench +pluralist +district manager +Levantine +orangeade +part-timer +post horn +Oregon grape +contadino +cargo helicopter +silverpoint +chaja +California bluebell +case +Shasta +cheese cutter +Leishmania +avalanche lily +iron horse +bialy +Yana +Delawarean +Prussian +nonpareil +hammer +hoper +chewink +anil +skim milk +desert four o'clock +crescent wrench +white marlin +blue jasmine +malacca +anadama bread +purple poppy mallow +ganglion cell +ligature +no-parking zone +golden clematis +Cotswold +aliterate +shebeen +yardarm +superbug +fanaloka +stinking cedar +spirochete +wort +pater +heaume +thermocouple +ironing +naval tactical data system +European goatsucker +prairie cordgrass +accused +foreign agent +halberd +western mugwort +esthetician +Persian lilac +cracked-wheat bread +crosscut saw +rock penstemon +paper cutter +crematory +ideologist +cattley guava +margarine +creosote bush +hoary plantain +spark gap +lumberjack +Greek valerian +mission bells +tight end +bigeye +large crabgrass +stone marten +cleat +lentil +bay scallop +lector +charger +assemblywoman +second lieutenant +boil smut +sarsaparilla +hydromel +cat flea +pinfish +whole milk +hairnet +myeloblast +peasant +blind curve +first offender +dwarf-white trillium +Brother +coatdress +gun emplacement +tamarisk gerbil +snap +air cushion +trailing edge +potato vine +gig +everlasting pea +champion +dibble +rattail cactus +timothy +prince's-feather +cutlas +lockring +sealing wax +Brussels lace +corn mint +highboard +she-oak +wild celery +pillar +Burberry +Hakka +leucothoe +bell tent +gallery +coontie +leather fern +smack +adenovirus +linoleum +chain wrench +tammy +gas fixture +nut bar +baneberry +butterscotch +goat's rue +bullock +grey snapper +mother-in-law +hyson +wayfaring tree +mollie +needle spike rush +buckwheat +bayberry +brush-tailed phalanger +dry rot +harborage +stormy petrel +Oriental beetle +Atlantic halibut +coping saw +simple fruit +viscose rayon +surgeonfish +upstairs +security system +common ragweed +verticillium +pancake batter +hawk's-beard +Dutchman's-pipe +refrigeration system +European parsley fern +Ivy Leaguer +totalitarian +gonococcus +towhead +showy sunflower +pallium +multiengine airplane +hair trigger +rabbit-eared bandicoot +siskiyou lewisia +fuel system +flat arch +broad beech fern +Alpine lady fern +bracken +Kentucky black bass +rut +mountain maple +tunaburger +umbrella fern +white-headed stilt +meat hook +panhandler +washhouse +barnyard +safety lamp +leg +ripple mark +paper +sagebrush lizard +light heavyweight +common nutcracker +operator +stalking-horse +horseless carriage +fishhook +suction cup +peg +Ungulata +false teeth +round-bottom flask +Luba +campaign hat +firebox +rudder +parapet +ice pack +appellant +spirit stove +metheglin +common bamboo +soapwort gentian +pannikin +time capsule +burn bag +folk poet +tropical prawn +end man +new caledonian pine +linen +web +free trader +jury box +railing +pignut +leaker +potboy +rubber boa +white snakeroot +plumber +Candida albicans +surfboat +woman +promulgator +eyecup +wild China tree +rattlesnake master +Viyella +alpine salamander +ailanthus silkworm +Albatrellus ovinus +war room +meadow vole +robotics equipment +rotary actuator +Engelmann spruce +pinesap +beefcake +native speaker +ridge +injector +water chute +salmonberry +decoupage +bottlebrush +date plum +circlet +American mountain ash +pocketbook +horsemint +sweet four o'clock +kirpan +pinto bean +chervil +equator +range animal +candy thermometer +calanthe +cul +stipendiary +brahman +pelican crossing +topgallant +wild senna +sliding window +carrier pigeon +Tatar +quadruplet +bumboat +spearmint oil +slip clutch +young Turk +golden yarrow +shank +glasswort +dental plaque +Manduca sexta +Northern bedstraw +dent corn +Life Saver +western wall flower +bedder +wherry +Tuscarora +scrapple +borstal +reflux condenser +problem solver +nondriver +perforation +eastern cricket frog +white wood aster +broad buckler-fern +Cape primrose +herringbone +head louse +earl +baton +recording system +primary color for light +cherry laurel +pomfret +ratafia +chocolate milk +obscurantist +revisionist +rood screen +magnetic needle +commensal +oil tycoon +celebrant +domicile +harvest mouse +California nutmeg +greater spearwort +black-billed cuckoo +winepress +demographer +straw boss +diabetic diet +sweetmeat +rabbet +ming tree +basketweaver +freestone +walk-in +Aryan +box coat +audio amplifier +chicken salad +churidars +whydah +box +batman +siren +selectman +gouger +drip coffee +Caesar salad +interpreter +whinstone +grey goldenrod +minicomputer +honey crisp +hypercoaster +Irishman +swamp white oak +reed canary grass +globeflower +cynthia moth +fennel seed +canthus +chino +blind date +tar pit +watermelon begonia +fishtail palm +overcast +Pearmain +primary color for pigments +coal seam +wherry +safety bolt +cretonne +Michigan lily +inflater +moneybag +huckleberry +brassard +bush vetch +looking glass tree +pinwheel roll +alfalfa sprout +sea kale +clinometer +achira +lorgnette +potter wasp +gilded flicker +tody +capulin +captain's chair +crackle +gerardia +prie-dieu +venture capitalist +New Jerseyan +block and tackle +elf cup +bur reed +automatic transmission +wax palm +flytrap +crack willow +coachwhip +swizzle +lugger +Dewar flask +baster +oxyacetylene torch +Culex quinquefasciatus +St Peter's wort +wild hyacinth +Russian almond +burrfish +wintergreen +katsura tree +butcher knife +perfumery +thresher +porte-cochere +sheepwalk +hypotenuse +Dalmatian iris +buttercup squash +demiglace +goldenseal +preceptor +rigger +poikilotherm +old-age pensioner +posthouse +wood horsetail +repeater +reciprocating engine +Rambouillet +terra cotta +togs +battledore +horizontal tail +missile defense system +trier +morello +woolly adelgid +munition +double creme +in-fighting +squirrel corn +crow's nest +antler moth +brake cylinder +bandoleer +noticer +Parmesan +hipline +cheapskate +Dubonnet +mole rat +bog aster +ribbon tree +meadow rue +nard +ratel +loose smut +snapping shrimp +golden glow +basil thyme +Florida strap fern +moonshine +flume +lace fern +black bream +orchestra pit +archerfish +exile +ringdove +career man +godfather +bottom-feeder +pasteurized milk +dental implant +pedicel +Catalpa speciosa +yellow foxglove +lancet arch +steam shovel +sampan +patrol boat +sailor cap +tollgate +monal +velociraptor +cacique +jack oak +cursed crowfoot +creep +Parry manzanita +common matrimony vine +grace cup +caecilian +spurge laurel +prickly lettuce +Regius professor +camail +Sitka willow +Courtelle +gin sling +dogmatist +guest +saltine +dust cover +sport +sweeper +feist +lady's-eardrop +vibist +wire stripper +tenpin +interplanetary space +beet green +pruning knife +drainage system +gunnery +ballet master +lime juice +flak catcher +lacrosse ball +Canadian aspen +beatnik +railhead +utilizer +spadefish +Arizona white oak +city university +dense blazing star +hedger +chain pickerel +right-hand man +namby-pamby +nacelle +redneck +tumbler +Chief Secretary +cannon +cupola +kummel +papaya juice +Burton +Stanley Steamer +loganberry +stylus +square meal +rock bass +western ladies' tresses +dramatist +assignee +tandoor +trumpetwood +segregator +green adder's mouth +coral necklace +ani +iceboat +densimeter +oxtail soup +kernel +cos lettuce +greenishness +panchromatic film +Parker House roll +oatmeal +backsaw +double Gloucester +bailey +storage cell +giant +coconut milk +broadtail +barouche +loir +soybean meal +white-leaved rockrose +junction barrier +spandrel +sweat bag +goldilocks +flowering wintergreen +cockspur +beef fondue +holding cell +cardamom +cagoule +Kamia +tangelo +Herschelian telescope +wine bar +kachina +sand sage +guy +ivory palm +citrus mealybug +topper +ladyfish +force pump +fanion +calaba +Iowa +orrisroot +ivorybill +Secretary of Agriculture +gagman +dry cell +hypnotist +kenaf +grey alder +deathwatch beetle +gagman +magnetic stripe +trap door +abdominal wall +prefab +broomcorn millet +architeuthis +angler +Pacific giant salamander +barbette carriage +low-fat diet +veal scallopini +B battery +wallah +landing flap +pistachio +jaguarundi +nagi +cicerone +felt fungus +Aertex +stocks +smooth aster +patchouli +lemon sole +sleeper +basket fern +dundathu pine +anjou +Moreton Bay chestnut +broom sedge +candid camera +red angel's trumpet +oilstone +cinnamon toast +Pacific walrus +fruit custard +Jehovah's Witness +mate +voyeur +Esselen +achromatic lens +sanguine +brine shrimp +dunce cap +swot +transit instrument +grey willow +pack +bench clamp +Nova Scotian +gadgetry +silvery spleenwort +enchantress +rough fish +morula +giant taro +sorus +roux +polyhedral angle +spruce beer +Chicano +cola extract +outfielder +kohleria +white-rumped shrike +car-ferry +subway token +spoon bread +totara +corn borer +bowhead +tensimeter +water scooter +flickertail +Catholicos +pleaser +blue-eyed Mary +calabash +handyman +cascades frog +facing +scarlet oak +lutist +ginger +tree tomato +Harvey Wallbanger +tent peg +insectivore +fusil +swale +chinning bar +bladderpod +New Dealer +dhoti +proscenium arch +common vetchling +channel +collect call +safflower +Texas tortoise +test equipment +theca +RAM disk +sheep sorrel +rammer +buttonhook +honey mesquite +dominus +babirusa +queen +Aspergillus fumigatus +crash barrier +nonmember +Muscovite +verdin +Australopithecus afarensis +Turkish Delight +stalked puffball +giardia +divider +mountain skink +head smut +pacemaker +evaporated milk +rattlesnake fern +flamethrower +navy bean +bather +steed +showy orchis +stone crab +artichoke heart +phantom orchid +space helmet +swamp laurel +privateer +junior +surcoat +bristlegrass +flower girl +aphid lion +penthouse +lemonade mix +coude telescope +natal plum +scriber +wood nettle +rape suspect +resplendent quetzel +western poppy +choir loft +fore-topsail +thyme-leaved sandwort +erotic +short circuit +outdoors +flowering tobacco +hookup +aviatrix +corker +horehound +horn +swamp pine +water biscuit +cherimoya +vaporizer +courtier +European sole +full skirt +Mother Carey's chicken +cymule +huck +white snapdragon +mountain nyala +country borage +bonduc +casein paint +grampus +shrimpfish +lodge +dragee +black walnut +caraway seed +roper +glass cutter +tab key +Richardson's geranium +demigod +chichipe +Italian ryegrass +cadet +electrograph +rudd +carpenteria +foie gras +lignum vitae +hedge nettle +pledger +American hackberry +flageolet +beaked hazelnut +reflectometer +sticky geranium +marriage bed +white pepper +japanese clover +whiteface +gnat +extrovert +Canada plum +talipot +chicken stew +egg foo yong +fraxinella +skibob +saucer magnolia +jacket +green smut fungus +cloakroom +landing skid +booth +ice milk +dipole +striped coral root +red buckeye +roughcast +breaststroker +cowherb +razor clam +first-aid station +briarroot +clambake +lander +Bramley's Seedling +frail +jird +minisub +luging +poison milkweed +European lobster +epidemiologist +spandex +paloverde +marumi +bypass condenser +punter +petty spurge +Coryphaena hippurus +bilberry +vermillion rockfish +witness box +viscometer +pulque +Massachusetts fern +herring salad +ridge tile +mesa +dwarf grey willow +southern aster +punch pliers +tarnished plant bug +hoop pine +Japanese red pine +benedick +rebozo +silver plate +silver willow +mouse-ear hawkweed +bonito shark +abutment arch +noble cane +tiger rattlesnake +pongee +jumping plant louse +pattypan squash +giant ryegrass +railroad bed +stiff aster +imperial Japanese morning glory +laundry +winter cress +large white petunia +tea maker +pen-and-ink +early warning system +lug +monocot +sea wormwood +breechblock +postage meter +third rail +Mongoloid +Australopithecus boisei +umbrella tent +stirrer +Dumpy level +beroe +post and lintel +green spleenwort +tomato paste +dishpan +stentor +sweatband +cobbler +New York fern +gaff +prairie willow +cyclops +jigsaw +rotavirus +pallet +eastern ground snake +boiling water reactor +acute triangle +agora +European cranberry +roebuck +surgical dressing +busboy +cannikin +feedlot +common pond-skater +cochin +horsehair lichen +fetter +sapote +fichu +dermatologist +fire tongs +creme anglais +foster-mother +laurelwood +chicken snake +mincemeat +rocker +wild spinach +powder and shot +butterwort +auxiliary engine +mamey +hart's-tongue +sucking pig +American turkey oak +troopship +buttermilk +divi-divi +boatswain's chair +soda fountain +southern flying squirrel +elastic +cutaway +housekeeper +renegade +apple rust +bridoon +machicolation +stunt +keyhole limpet +personality +solitary vireo +epidendron +Jihadist +boffin +bettong +terror +partial denture +pusher +saltcellar +capstan +large poodle +Bibb lettuce +low-bush blueberry +staple +banded krait +sickroom +barnyard grass +wandflower +woodworm +bluegrass +squirrel's-foot fern +rabbitfish +delta wing +milking shorthorn +limber pine +guru +gamine +scythe +sweetsop +Gruyere +bloodmobile +mine detector +American mistletoe +silver beech +hound's-tongue +Lombardy poplar +basket fern +pink-and-white everlasting +redtail +Aladdin's lamp +mace +outtake +condensed milk +Canada wild rye +silver perch +waxflower +taxer +Chinese chestnut +Our Lord's candle +mugwump +school system +salp +osso buco +dress shirt +butterweed +low-fat milk +couchette +broomcorn +proscenium +mill agent +smut grass +humpback +southern spadefoot +military leader +canebrake rattlesnake +tailor-made +ebony +beach house +flying gecko +hoary alison +typhoid bacillus +Romanov +vanilla pudding +sweet cicely +Spodoptera exigua +dress rack +flannel +skipjack +bolognese pasta sauce +rooibos +thunderer +blessed thistle +gauntlet +mahatma +granadilla +laurel sumac +Yuma +thyme-leaved speedwell +encyclical +twill +linocut +manna gum +spark arrester +cocklebur +Indian hemp +lemon oil +Hall's honeysuckle +raceway +flop +Himalayan lilac +one-flowered wintergreen +photosphere +silvery spleenwort +convex polygon +canarybird flower +foster-sister +fluffy omelet +palanquin +roll +dandelion green +Javanese +workpiece +Carmelite +bread mold +schlemiel +wild lily of the valley +grugru +solenoid +puff batter +skep +balance wheel +Gadaba +portia tree +mobcap +two-man tent +scuffle +firebrat +ant lion +anise +caster +giant petrel +American water spaniel +naboom +treasure ship +foster-son +fiddleneck +alidade +sugar refinery +wild oat +water beetle +generic +damson plum +abrocome +detainee +pitch pipe +coast +nilgai +radiotherapy equipment +heart-leaved aster +gristmill +grocer +Appaloosa +Cheviot +brake pedal +lantana +cave myotis +Rob Roy +sea spider +latrine +carpophore +recycling plant +coondog +brace and bit +funambulist +eggar +mantelet +postdoc +mezzanine +coco plum +pulse generator +high-vitamin diet +menhaden +mechanical engineer +bergamot mint +Chuvash +grated cheese +helicon +belladonna +beet armyworm +eelgrass +resuscitator +interrupted fern +arrow grass +cistern +Pacific herring +colostrum +journal bearing +Fauve +wrist pin +canape +choice morsel +quadraphony +guard boat +shortgrass +claymore mine +hitching post +cargo door +decoder +gym rat +Cocopa +commander +apple of Peru +seckel +yellow goatfish +dog flea +dodo +oconee bells +Tudor arch +turkey stuffing +ebony spleenwort +wheat flag smut +scolopendrium +Brazilian pepper tree +gusset +inspector +lunar excursion module +baron +plantigrade mammal +Creole +phosphate +aromatic aster +ghee +audiovisual +onychophoran +cotton stainer +lieutenant junior grade +spheroid +amen corner +caper sauce +Caladium bicolor +dyer's rocket +seaside goldenrod +flint corn +Very pistol +rotifer +steeplechaser +rouleau +escape wheel +Namibian +millivoltmeter +emmer +climatologist +agateware +sea lyme grass +inclinometer +water fennel +saddle seat +vicar +garden cress +ski rack +Norfolk jacket +casaba +coast rhododendron +sericea lespedeza +hematocrit +autopilot +tilter +finish coat +Pennsylvanian +shrubby St John's wort +podocarp +percussion cap +ceriman +peanut bar +gean +jack +durra +rotor +carob +cottage tulip +three-spined stickleback +trencher +elevator +kalumpang +abaca +Australopithecus robustus +active matrix screen +water bed +hatmaker +lodestone +cat food +overcup oak +balletomane +popgun +rheometer +process cheese +frog legs +heartleaf arnica +p-n-p transistor +steam turbine +Tulu +scalene triangle +licorice fern +coffee break +trade unionist +starved aster +firing pin +water gum +Masonite +hairspring +seminarian +blue racer +forecastle +scrub pine +Atlantic spiny dogfish +kopje +orphrey +fan tracery +gee-gee +vixen +interstellar space +Harris Tweed +sawmill +lemon mint +bitewing +ringlet +Chinese mustard +paleontologist +American hazel +brigantine +clay-colored robin +zombie +nectarine +West Indian jasmine +pineapple weed +rusher +gynecologist +pole +thylacine +myrtle beech +golden cup +woodruff +T-bar lift +terebinth +service club +homegirl +Blue Mountain tea +figwort +New Hampshirite +Stayman +tonometer +white turnip +messuage +cruet-stand +colliery +connecting room +lesser twayblade +bland diet +crown prince +beggarwoman +restharrow +bower actinidia +firebug +hepatic tanager +telegraph +Spodoptera frugiperda +spackle +carpenter's square +pyx +supermom +thickhead +whorled milkweed +Arctic char +Chinese rhubarb +pince-nez +wolverine +tomato concentrate +cascarilla bark +red underwing +leather flower +Jerusalem thorn +bullpen +Salisbury steak +anode +coffeeberry +bottling plant +fritter batter +aerial torpedo +matrix +local oscillator +stalked puffball +bruin +three-cornered leek +wassail +stabling +damping off fungus +myriapod +osier +lesser kudu +cownose ray +chokecherry +wagon +obstetrician +Glengarry +even-pinnate leaf +wine sauce +osteocyte +baker's yeast +heir presumptive +blackjack +tympanist +golden fern +fipple +Japanese oak +bar mask +stamping machine +argus +knobcone pine +oil beetle +lanai +upper berth +condenser +proctologist +catechu +wild spurge +vestry +ground snake +proton accelerator +walker +scarlet bush +transom +lagging +bouillon +slender loris +black currant +developer +football hero +plum sauce +striped mullet +prince charming +fictional animal +prosimian +lug wrench +lemonwood +kirsch +spy satellite +black caraway +Thompson Seedless +bead tree +purple fringeless orchid +Virginia strawberry +chigetai +punkie +gall wasp +addressing machine +rock polypody +good-king-henry +spring cankerworm +wimple +noncandidate +saskatoon +hacienda +Darjeeling +snowberry +lounging pajama +ascospore +ski-plane +hedgehog cereus +Welsh onion +yautia +coaster brake +sickle cell +parrot's beak +fuller's teasel +painted greenling +scablands +stuffed cabbage +barrel organ +etcher +dwarf maple +camp +Australian blacksnake +currycomb +obtuse triangle +rose gum +psychrometer +abridger +torpedo +carpet loom +sodalist +slender rush +loligo +sclerometer +wimp +dotted gayfeather +green ash +pinstripe +moralist +medusa's head +garden centipede +heath aster +fool's parsley +olla podrida +Potawatomi +Edam +toothache tree +hulk +seabag +narthex +compartment +prairie star +lookdown +B-flat clarinet +event planner +clip lead +shirting +milk punch +supercharger +macadamia nut +giant coreopsis +computer store +martingale +keyboard buffer +summer flounder +squash ball +gas turbine +object ball +plier +black mulberry +reef squirrelfish +scampi +willow aster +bowler +striped marlin +smooth muscle cell +diplodocus +Liberty ship +sponge cloth +guitarfish +walking leaf +showroom +California bluebell +bolo +turnbuckle +boysenberry +hardware +Gael +imago +endorser +jujube +dust bag +rapporteur +field wormwood +low-water mark +naval missile +Pacific yew +reversible +crabapple jelly +poniard +barricade +spawner +simnel +seltzer +deckle edge +needle +timbale +satellite transmitter +organization man +job candidate +orderly +native cranberry +fir clubmoss +coaming +chartered accountant +electron accelerator +Sierra plum +American foxhound +long underwear +Penobscot +blueberry yogurt +biretta +cascara +Paranthropus +Dorian +nun's habit +lenten rose +Augustinian +designer +northern phalarope +mombin +hazel mouse +reeve +waffler +telegraphy +Verpa conica +ignition coil +Japanese oyster +S-shape +divining rod +ant thrush +throat protector +interlocutor +Desmodus rotundus +pere david's deer +attenuator +Cypriot +red sandalwood +pendulum watch +broadcloth +striped drum +sequence +safety arch +diapensia +hog +western spadefoot +chlorella +comb-footed spider +Chechen +darning needle +C-ration +hard beech +piano action +scaling ladder +Nepal trumpet flower +ravigote +screw wrench +ramekin +Lyonnaise sauce +dinner napkin +partial veil +masseuse +coatrack +mooring tower +blue-eyed African daisy +English horn +baton +rope tow +toll bridge +massage parlor +quark cheese +lounging jacket +tall goldenrod +flying jib +coordinate axis +barley-sugar +integrator +worm gear +captain +sweatshop +class +layer +chili powder +dripping pan +oatcake +newsroom +tadpole shrimp +rake +trade magazine +silks +ram's-head +senior +knower +masseur +yam +peg +wheel tree +hardbake +test room +long-spurred violet +creeping spike rush +shrapnel +coffee senna +matchbox +creeping soft grass +welder's mask +pickaback plant +urial +hooded pitcher plant +incense cedar +Ohio buckeye +ant cow +skeleton fork fern +Indiaman +swamp ash +testatrix +marang +spherocyte +Winesap +Indian mallow +teju +Yersinia pestis +dye-works +sauerbraten +coral bean tree +safe house +postulator +eyas +lotus +wood vise +lady-of-the-night +East German +cymling +rock candy +western omelet +anoa +rainbow seaperch +crossover voter +Finn +tree shrew +hog plum +Federal +shagbark +clockwork +Alexandrian laurel +metal wood +brill +military chaplain +trend-setter +call-back +Indian rat snake +spurred gentian +Japanese maple +forest goat +bee moth +viola da braccio +duckboard +armyworm +hangnail +counterbore +cream-of-tartar tree +Mullah +bonbon +water hazard +temple orange +corporatist +rough bindweed +Turkish bath +mistletoe fig +beach sand verbena +caddisworm +English plantain +brown Betty +power pack +lion's-ear +Francis turbine +stayer +dichondra +marsh St-John's wort +squab +energizer +common horehound +mantispid +pullback +handwheel +spark arrester +yakuza +Virginian witch hazel +grunter +waterworks +bondwoman +chain printer +stockjobber +coconut milk +yardgrass +blue chip +bridle path +riser +pleurothallis +saltwort +salal +broadside +blackboard eraser +bastard +Para rubber tree +red bat +digital-analog converter +calabash +cashier +cow shark +horned pout +microphage +monologist +woolly monkey +Illinoisan +marsh horsetail +distaff +siris +eparch +gooseneck loosestrife +sounding rocket +multiprocessor +saiga +xerographic printer +madrona +right triangle +sweet gale +red maids +wolfsbane +pork-and-veal goulash +French sorrel +mutterer +Venetian sumac +drumlin +white crappie +squire +large-flowered calamint +northern cricket frog +mushroom sauce +supertanker +morello +auxiliary boiler +Virginia thimbleweed +cottage tent +bubble shell +big shellbark +wormwood sage +cider gum +coast lily +American feverfew +Peruvian balsam +purple silkweed +tobacco moth +desk dictionary +rock elm +eastern indigo snake +Japanese privet +lamb +levee +L-plate +soapfish +painted tongue +scuttle +markhor +Marburg virus +mackinaw +major +crypt +ball and chain +domestic silkworm moth +bottom feeder +mistress +death house +freight elevator +bellyband +Pulex irritans +Bacillus anthracis +fire control radar +hysterosalpingogram +turbogenerator +decompound leaf +vambrace +scentless camomile +Medinilla magnifica +prima ballerina +Northern Spy +quartz lamp +grains of paradise +justiciar +felt fern +seismograph +Madagascar jasmine +imaret +white perch +Alpine mouse-ear +tea bread +yellow bass +poseuse +espionage agent +punching bag +eurypterid +orange sneezeweed +banded stilt +armhole +postern +mother +kapuka +catechumen +Soubise +Sauvignon blanc +gunnery sergeant +self-starter +ceratozamia +Atlantic cod +Reoviridae +blood cup +horseshoe bat +oriental plane +voussoir +fetterbush +samara +truncated pyramid +lingcod +athenaeum +shyster +Carolina hemlock +submarine torpedo +floating fern +yataghan +sun tea +viola d'amore +conenose +ventilation shaft +walk-up apartment +saury +wild wheat +porcupine ball +tahini +kris +grass fern +drip pan +black bryony +Scotch broth +tapioca pudding +southwestern toad +Hare Krishna +guimpe +wild madder +megalocyte +teaching fellow +shrubby penstemon +lesser wintergreen +privet hedge +Fahrenheit thermometer +stern chaser +prickly ash +pump room +ricer +chicken mousse +wing commander +sun gear +bolus +alpine milk vetch +opera cloak +twinjet +Goldie's fern +abnegator +alphabet soup +node +grape jelly +early coral root +Tarzan +quarterstaff +greeter +Eurasian woodcock +primary coil +quirt +tinkerer +bolt +creme de fraise +voltage regulator +news photography +Jat +bristly locust +Gouda +dickey +lobster butter +dwarf flowering almond +fagot stitch +Reform Jew +ostrich fern +bathyscaphe +purple mullein +alpaca +civic leader +jellaba +Arizona ash +wasabi +Irishwoman +choke +stockinet +religionist +sewage disposal plant +bittersweet +Hyphantria cunea +pheasant under glass +screen actor +chapterhouse +quoit +horseshoe bat +rapper +cupule +planetary gear +cascade penstemon +redoubt +salt +areaway +megalomaniac +bush willow +amethystine python +plains spadefoot +colour supplement +kick pleat +bell apple +narwhal +slippery elm +stenograph +baa-lamb +quadrant +balker +jobcentre +spit curl +bastard indigo +malacca +serow +adobe lily +yacca +palestra +penalty box +scrub beefwood +reenactor +screening +white bryony +alderleaf Juneberry +harpoon +alpine clubmoss +neurosurgeon +surrey +sweet calabash +Scotch laburnum +coquille +French honeysuckle +extrados +pipe cleaner +southwestern white pine +Virginian stock +scaly lentinus +aileron +carob bar +swordfish +Alpine woodsia +negus +wireworm +sweep +goldfields +drop arch +European bream +roly-poly +pin +bastard wing +fustian +wild buckwheat +lake whitefish +overcoat +water filter +Bermuda chub +New Zealand spinach +high-hat cymbal +European larch +radiologic technologist +fine-tooth comb +brunch coat +splice +electronic converter +overmantel +extern +taper +cluster bomb +teletypewriter +pinwheel +trailing arbutus +quipu +creeping zinnia +orange milkwort +tabard +Australopithecus africanus +melancholy thistle +insole +courser +darkroom +surface-to-air missile system +bark-louse +Confederate +neritina +clip-on +spouter +trench knife +outside caliper +dhak +Limburger +chuck wagon +buttercup squash +shirtdress +pouter pigeon +dirty old man +zodiac +fennel flower +mother figure +appointment +Manichaean +lignum +bouffant +rum sling +Ravenna grass +hibachi +gin rickey +American harvest mouse +cocozelle +western wheatgrass +black crappie +rhombus +Missouri goldenrod +barndoor +wild mango +pneumococcus +Boston lettuce +ratline +desert holly +cobweb +fluoroscope +ethnologist +tor +bullshot +stockade +greave +rock sea bass +slip-joint pliers +taxi dancer +schizophrenic +zill +creme de menthe +orange-blossom orchid +divot +supplejack +busybody +casemaking clothes moth +ramrod +gearbox +birdcall +Wiffle +thwart +beauty consultant +chicken paprika +trawl +skep +spirometer +hopper +kvass +doggie bag +bath chair +showy daisy +wild tamarind +Tarsius syrichta +glyptics +Algerian +cargo area +bunk +Velveeta +iconoclast +clinch +New Caledonian yew +false mallow +Japanese tree lilac +convex polyhedron +water boatman +cruise missile +finisher +colonoscope +cumin +wickiup +saccharin +whipcord +trailer camp +eryngo +cuckold +yam bean +fighting chair +forewoman +galingale +citron +positivist +four-lined plant bug +suet pudding +field pea +Circaea lutetiana +deer grass +trap-door spider +common corn salad +mirror carp +sounder +second-in-command +seaside alder +burgoo +ming tree +curry sauce +courbaril +green alder +figure loom +fauld +halfbeak +squelch circuit +cladode +winter cress +tongue and groove joint +dwarf dandelion +joss house +western buttercup +welted thistle +potato tree +anglewing +cookfire +marzipan +hood latch +seed shrimp +common moonseed +toasting fork +bevel +three-quarter binding +midwife toad +stage director +Pentecostal +technical sergeant +golden-beard penstemon +drunk +silky oak +corn gluten feed +T-square +stoker +selling agent +cruse +server +rope-a-dope +bicorn +matzo meal +wide wale +roadblock +false foxglove +tuck box +bandsman +smoke bush +machinist's vise +Highlander +scholiast +self-starter +Swedish rye bread +spark transmitter +maverick +maquiladora +cabinetmaker +compress +rainbow shower +huntsman's horn +mackinaw +copper rockfish +lappet +nitrate bacterium +telephone plug +soutache +Dacron +toboggan +sissoo +yogi +laurel-tree +vice chancellor +Christ's-thorn +cartridge fuse +serial port +quassia +tarweed +pecopteris +beggarweed +anchovy pear +bookbindery +woodland oxeye +toad rush +sandalwood tree +marsh andromeda +Tyrian purple +boothose +tragedienne +fragrant cliff fern +festoon +bondwoman +melancholic +butternut squash +exhaust valve +semi-skimmed milk +glowworm +Virginia oyster +Identikit +ayah +gallows tree +Carioca +monoplane +jewels-of-opar +scallop +moth miller +marsh cress +lobed spleenwort +ricotta +emitter +arame +tub gurnard +army attache +maniac +organizer +pheasant's-eye +Melba toast +homeboy +Bavarian cream +Maximilian's sunflower +backstop +Tremella foliacea +yellow avens +spreading fleabane +plumb level +false rue anemone +zabaglione +climbing maidenhair +doeskin +walking shoe +lancewood +material +jacksnipe +South American poison toad +agonist +hinny +paper mill +psychophysicist +valley girl +toast mistress +jorum +tiler +chicken Tetrazzini +trivet +grasshopper +three-mile limit +kink +kiang +pole horse +jig +Cornish heath +hedge thorn +false alumroot +Popper +remount +photojournalist +sideroblast +stonecress +Agave tequilana +Japanese lilac +hawse +maenad +air bag +leaf spring +dwarf willow +soda cracker +contralto +moleskin +pilaster +Audubon's caracara +pia +American organ +bleu cheese dressing +betel palm +PC board +almond willow +socializer +tone arm +stammerer +free-liver +scaler +Gentianopsis crinita +leak +black haw +hound's-tongue +grass pea +Stassano furnace +coralbells +ministrant +perihelion +Luxemburger +powder-post termite +arboreal salamander +cushion flower +foramen magnum +pyrethrum +poacher +woolly mammoth +horned chameleon +tearaway +father-figure +tufted gentian +salmi +finger millet +physa +registrar +polyoma +bamboo shoot +matchlock +seine +congress boot +bulgur pilaf +monosodium glutamate +Kentucky wonder +mycologist +kedgeree +ragweed pollen +boarfish +yellow pimpernel +tan +northern Jacob's ladder +macrobiotic diet +migrant shrike +big-cone spruce +colonialist +white dogtooth violet +bath asparagus +webbing clothes moth +ladies' room +experimenter +prairie bird's-foot trefoil +bootleg +cognitive neuroscientist +fire chief +flagfish +dendrite +stinking goosefoot +fore edge +hogfish +Spanish cedar +hotel-casino +Tory +life-support system +pea flour +cash bar +Chenin blanc +white-footed mouse +Canada garlic +salt-rising bread +roomette +mastodon +bell founder +long iron +bi-fold door +fig-bird +European water shrew +dyer's weed +frog orchid +allosaur +Florida yew +wild potato vine +crape fern +flat-topped white aster +klebsiella +oil heater +waxmallow +enjoyer +mesocarp +semidesert +senior vice president +coccidium +burrawong +syllabub +jump suit +harrier +leaf roller +cherrystone +cinchona tree +touring car +eulogist +air force officer +red goosefoot +cat thyme +smoothbore +slugger +cardiac monitor +cobber +blister rust +musicologist +rolled biscuit +Braun's holly fern +hog plum +nonpasserine bird +pascal celery +damson +Jonathan +Sheraton +cohune palm +egg white +baton +sixth-former +Siberian pea tree +choanocyte +wineskin +auditor +detention home +Leichtlin's camas +Chartreuse +clusia +club car +wattle and daub +security blanket +common American shad +assistant professor +marsh pea +camomile tea +gopher hole +gravure +Freudian +spirillum +maharani +equilateral +crow garlic +mammee apple +felwort +hardtop +dillenia +curlycup gumweed +pilot engine +calcimine +wooly lip fern +bitter dock +wineberry +jumper +monolingual +spinning frame +old-timer +native cat +diving petrel +sodium-vapor lamp +marchand de vin +sexton +matelote +interior designer +windfall +mole salamander +minder +bodkin +neutron bomb +Caloscypha fulgens +slinger ring +mezzo-soprano +aura +Southern Baptist +viscacha +midfield +tie +prosthetist +round-headed leek +yellow mariposa tulip +canary grass +staddle +Tokay +Muenster +brazil nut +California black walnut +applesauce +penologist +virgin's bower +tenon +steward +Jerusalem oak +red-bellied snake +bindery +scow +fluid flywheel +bullhead +satinleaf +clove +double glazing +matron +wild parsnip +winged elm +shoot-'em-up +musk deer +white rust +lock +Cornishman +Vidalia onion +corn spurry +freeloader +justice of the peace +inlay +myxobacteria +tiglon +tangram +German ivy +scented fern +woolly daisy +caretaker +gastroscope +scuppernong +spotted sunfish +guilloche +codling +wormcast +Eskimo curlew +tayra +European fly honeysuckle +septuagenarian +third gear +coatee +red alder +water ice +cubitiere +frame buffer +gamboge tree +pernyi moth +chicken Marengo +Galliano +Lincoln +true sago palm +hunter's sauce +carpet beater +alpine goldenrod +arch support +vehicle-borne transmission +jilt +paternoster +redcap +Siberian larch +hoary plantain +swan's down +chicane +reverse +divan +kneeler +alexic +mock turtle soup +daffodil garlic +mission bells +squilla +ursinia +winter's bark +trifoliate orange +discina +frijole +Swiss steak +maildrop +knotgrass +dog fennel +drum sander +heroin addict +costume +camber arch +shining willow +lutefisk +red porgy +microfossil +good old boy +angle bracket +pitcher sage +bordelaise +heat exchanger +carrion +bush jacket +fanjet +coach +blackface +sicklepod +Manhattan clam chowder +daisywheel printer +olive +Sphacelotheca +Spanish needles +brown root rot fungus +boudoir +encyclopedist +V-8 juice +red haw +brass buttons +gym suit +skywalk +water wagon +gas-turbine ship +stoup +lisle +sailor suit +box beam +balm of gilead +housemaster +hayrack +neutralist +water elm +brook thistle +doyenne +nark +alpha-tocopheral +WASP +hydrilla +water-shield +footlocker +variola major +pargeting +ion engine +yellow globe lily +Malecite +bloodleaf +yellow sand verbena +whorled loosestrife +packinghouse +Carolina parakeet +Virginia waterleaf +armband +red rockfish +factory ship +moon trefoil +jump seat +water gillyflower +yerba mansa +chamfer bit +compass saw +hopsacking +Indian rhododendron +sickbed +treacle +honey eater +mailsorter +seabeach sandwort +sob sister +primrose jasmine +prince consort +elocutionist +wishing cap +runner +trestle +sugar water +half-and-half dressing +fringed poppy mallow +portiere +bung +swan orchid +weather satellite +beef broth +marblewood +sapper +agitator +wren-tit +grade +allspice tree +spacewalker +American hornbeam +sieva bean +dill seed +potoroo +love-in-winter +alembic +Cheshire cheese +small white aster +Oregonian +flipper +twill +differential gear +Prince Albert +licorice +foster-father +Melkite +portraitist +Yosemite toad +Cox's Orange Pippin +slender wheatgrass +knob +silique +Rocky Mountain bee plant +stirrup pump +chicken hawk +sweetbrier +Sierra lodgepole pine +poulette +biohazard suit +striated muscle cell +Geiger counter +World Wide Web +turmeric +prairie wake-robin +latchet +pushball +grill +shooting lodge +floating-moss +refried beans +boojum tree +red poll +toothbrush tree +rabbiteye blueberry +red haw +sweet vetch +delta +upland cotton +ballet mistress +padrone +complementary color +great Solomon's-seal +bud brush +brandy sling +spinster +Andorran +Mojave aster +mackinaw +golden calla +bottom rot fungus +segmental arch +periwinkle +hellion +topknot +copper +Mexican hyssop +weeping love grass +point woman +pathogen +fall cankerworm +common shiner +silverspot +corer +atomic pile +crystal detector +yellow spot fungus +truncated cone +saprobe +variegated horsetail +Cro-magnon +cercaria +aglet +pollster +oyster bed +pancake turner +egg cream +sporozoite +quirk molding +mutisia +sound bow +physic nut +sugar-bush +cow +magnetron +jungle hen +brassie +rock bit +taco sauce +seeded raisin +desert selaginella +folding door +vinegarroon +Pinot blanc +rye +ellipsoid +betel nut +tree of knowledge +ambrosia +long tom +breechloader +bicolor lespediza +cosmetician +monoblast +American oil palm +prancer +farina +caiman lizard +hardball +bullock's heart +cotton rat +whiting +weather ship +sharecropper +creamcups +gas bracket +divinity +ornithologist +yellow twining snapdragon +showy goldenrod +end man +heptagon +sand dropseed +round file +guama +blue elder +sand spurry +raccoon dog +zigzag goldenrod +fast reactor +arctic willow +cyclopean masonry +punter +sgraffito +slattern +storage ring +clipper +pulasan +short-tailed shrew +scammony +daybook +umbrella tree +coloring +element of a cone +gesneriad +cane +burgoo +western coral snake +friendship plant +Leydig cell +scrutineer +hairy golden aster +inclined fault +water milfoil +bryozoan +nardoo +native pomegranate +curly grass +Florence fennel +resurrection plant +ice water +crown +ploughman's lunch +clustered lady's slipper +kitchenette +sand sedge +pouched mouse +roadbed +parsley haw +predecessor +super heavyweight +seedless raisin +mailbag +sparling +codling moth +squama +Bercy +thermoelectric thermometer +Jaculus jaculus +saltpan +firmer chisel +round whitefish +ramrod +criollo +pinch bar +slash pocket +thigh pad +velvet plant +intergalactic space +brazilian ironwood +whaleboat +sirrah +hanging fly +aspirator +Dominican +dribbler +yellow-eyed grass +Cornish +geophysicist +tarmacadam +marchioness +rattlesnake orchid +Alaska Native +ilama +myrrh tree +zucchini +licorice root +nosebag +lounger +troposphere +virginal +spaghetti Western +Virgin Mary +waterwheel plant +dry nurse +enate +carpet shark +rijsttaffel +stuffing nut +caraway seed bread +Leotia lubrica +kaffiyeh +Boston baked beans +halophyte +backscratcher +instillator +trefoil arch +pip +digitizer +dosemeter +Carolinian +French sorrel +boards +historian +rangpur +clansman +goral +leatherjacket +coiner +fleece +white globe lily +storm cellar +roundhouse +mediatrix +butterfly flower +swamp gum +prairie vole +rhizomatous begonia +common tobacco +Marco Polo sheep +subarachnoid space +broomweed +safety net +silky wisteria +swagger stick +spectacled caiman +derris root +soap pad +chop-suey greens +summer hyacinth +palo santo +carbohydrate loading +chinch bug +roadman +sheep plant +messiah +desk officer +banquette +drugget +trumpet arch +great duckweed +purdah +heartbreaker +hasty pudding +alligator weed +dragee +yellow bristlegrass +Jacob's ladder +campstool +coffee fern +sweet fern +little chief hare +cat-o'-nine-tails +rep +American red elder +divorcee +black salsify +cambric +sennit +Canada ginger +wonderer +Formica +cream-colored courser +zooid +European beggar-ticks +sorrel tree +piddock +blolly +red-flowered silky oak +bay +Hooker's onion +dark horse +cone clutch +Roman hyacinth +paintbox +mestiza +green alder +bill +panicled aster +mammogram +snuffbox fern +Rediffusion +swamp fly honeysuckle +stoup +psychiatrist +nodding groundsel +student union +cold duck +bee beetle +playbox +Psychopsis krameriana +nosh-up +earthnut +narthex +single-rotor helicopter +revetment +sweetleaf +seasoned salt +piculet +speckled alder +mackerel scad +common yellowwood +devisee +static tube +Spanish heath +umbrella plant +fucoid +Chilean +coral-root bittercress +fanatic +cachou +agony aunt +bird's-foot fern +washwoman +torchbearer +placoderm +frosted bat +spicemill +Cape lobster +hard-shell crab +colonizer +camphor daisy +friar's-cowl +false tamarisk +toggle joint +tinsmith +theorist +hydrologist +loganberry +universal donor +northern whiting +tent-caterpillar moth +russet +kangaroo mouse +African scented mahogany +bastinado +breast implant +betel +grade separation +vox humana +stodge +Maryland chicken +Anguillan +oil pump +governor's plum +narcissist +deadwood +private citizen +winker +ropewalker +gidgee +Lothario +ski resort +major-domo +von Neumann machine +belaying pin +water parsnip +Fissipedia +luggage carrier +spring water +oyster stew +kohl +celesta +date-nut bread +punchboard +sunniness +hospital train +man +rack and pinion +mixer +pousse-cafe +narrow goldenrod +Maxim gun +stiff +recruiting-sergeant +watch glass +white hellebore +tung tree +prairie white-fringed orchid +beef Stroganoff +scoffer +grassy death camas +Shawnee cake +tapioca +Short's aster +banker +laparoscope +honeyflower +Caterpillar +electric clock +baling wire +huntress +Surinam toad +art school +incurable +Canton crepe +apple juice +hipline +bronchoscope +marshmallow fluff +Texan +wild fig +sawed-off shotgun +forestay +red kauri +fish slice +Egyptian grass +English walnut +brown sauce +ogee arch +nectary +chambray +leather flower +phloem +Persian violet +bomb calorimeter +western narrow-mouthed toad +soup du jour +sickle alfalfa +caracolito +periscope +coralberry +sword bean +sigmoidoscope +water locust +hygrodeik +sycamore +sheikdom +ballistocardiograph +clove +akee +fucoid +jacquard +cat's-ear +puritan +slender wild oat +smooth softshell +purchasing agent +landing craft +chartist +lace bug +sharksucker +Virginia chain fern +horseradish +namer +ripcord +personage +aspirin powder +puku +Wankel engine +nightcap +velvet bent +roridula +cytogeneticist +olm +almond extract +common heath +fringe-toed lizard +Kentucky yellowwood +lithosphere +cramp +bulgur +scurvy grass +officer's mess +frigate +electroscope +giant chinkapin +opah +rutabaga +wood hoopoe +Farley maidenhair +shingle tree +argentine +router +palm nut +quillwort +hiba arborvitae +runcible spoon +hireling +sickbay +alpine totara +white lupine +Cotoneaster horizontalis +desert plume +staghound +Sea Scout +opalescence +enophile +Jersey elm +coal house +Helvella acetabulum +selenium cell +white camas +creole-fish +auger +fragrant agrimony +research center +achromia +shank +cottonseed +mod con +extension +sugar beet +winter flounder +silky dogwood +strop +tokamak +rabbit ears +baby farmer +fireman's ax +serration +taproot +socket wrench +action officer +Chilean jasmine +Greek fire +stem-winder +body louse +lumpsucker +stink bomb +American lady crab +dicer +lie detector +maneuverer +black-headed snake +tiger moth +shooting stick +spermatid +babushka +deaconess +home +prior +chanfron +chickasaw plum +big-eared bat +rusty woodsia +tertigravida +miniver +combretum +habit +bluehead +angled loofah +gipsywort +fire-on-the-mountain +purple milk vetch +alpine gold +merozoite +loddon pondweed +Uniat +provost marshal +Gyromitra fastigiata +Coigue +proconsul +oarfish +San Jose scale +filature +chimney plant +spiny softshell +bluecoat +live axle +river limpet +clever Dick +pink bollworm +Japanese plum +roarer +caricature plant +wardroom +Texas chachalaca +Bahia grass +Moreton Bay tulipwood +accessory fruit +pearl barley +ashcake +bunt +Polynesian tattler +pine fern +laughing owl +potato fern +speaking trumpet +adjoining room +bearing rein +banana quit +redbrick university +Scleroderma bovista +magdalen +pressurized water reactor +advisee +NIMBY +poorwill +almond moth +comedian +star tulip +cracked wheat +water pump +guest of honor +yellow-breasted bunting +hire +pedate leaf +augur +purple locoweed +Socinian +upland white aster +guesthouse +double reed +detention basin +rollmops +hitch +bodega +mayeng +sparkplug wrench +attack dog +peach melba +heliozoan +tower mustard +blue mold fungus +lamplighter +banded sand snake +smooth crabgrass +elsholtzia +bodkin +Aegean island +bag lady +alewife +arcella +electrical contact +common ax +animist +concave polyhedron +coalface +climbing perch +yellowtail +hobble skirt +marquee +Russian dandelion +snow mushroom +polo ball +NADA daiquiri +cormous plant +chaparral mallow +inside caliper +milking stool +fallout shelter +sea gooseberry +Danish blue +grissino +chimney breast +mosquito fern +soundbox +spring chicken +epauliere +cape forget-me-not +japan +saddle oyster +white fritillary +push-button radio +bladder senna +bladder stone +macedoine +moire +Shawnee +starnose mole +douroucouli +horseradish sauce +electron gun +cotter +console +park commissioner +free press +lump sugar +western poison oak +apple maggot +keurboom +lisper +griffon +burin +horseshoe whipsnake +Jacobean lily +spinner +cochineal insect +emesis basin +sowbane +humanitarian +uakari +three-dimensional radar +wild hollyhock +heartseed +swinger +two-by-four +mop handle +common amsinckia +traitress +rush aster +fibrous-rooted begonia +violet-flowered petunia +milliammeter +alidade +azure aster +celery seed +snorer +scarlet plume +obtuse leaf +heathen +rose chestnut +headrace +dwarf buckeye +Pacific tripletail +wiggler +bounty hunter +Lowlander +slate pencil +typist +syconium +vaquita +skybox +business lunch +gusher +curacao +palometa +Diapsida +light diet +sourdine +thorny amaranth +potato fern +cartridge extractor +peshmerga +chaffweed +tahoka daisy +hematologist +massage parlor +diverging lens +breadroot +papyrus +amarelle +cover plate +hubbard squash +cryptomonad +whitetail prairie dog +rabbit burrow +orthochromatic film +goncalo alves +Chile bonito +tent-caterpillar moth +Manila grass +buck sergeant +mustard seed +crested wheatgrass +wise guy +asarabacca +field pea +bite plate +barbasco +heart-lung machine +mouse-eared bat +piping guan +gun pendulum +climbing onion +fungus gnat +Livonian +one-hitter +Chilean firebush +Sonoran whipsnake +round scad +myelogram +Rhodes grass +vomitory +roble beech +South-African yellowwood +molasses +Velcro +common calamint +radiation pyrometer +sketcher +chaparral pea +coffee stall +Australian nettle +bilimbi +Khedive +visionary +field spaniel +devilwood +collimator +Siberian spruce +sling +limestone salamander +ribbon worm +hazel +petter +coolant system +artillery plant +bailiff +chameleon tree frog +microsporophyll +maiden blue-eyed Mary +Drosophyllum lusitanicum +cocozelle +king post +nailer +knobkerrie +tovarich +Intelnet +worm lizard +drop forge +wool grass +brown bullhead +anthropoid +vitamin A2 +creche +hickory nut +whiffletree +deipnosophist +Muskhogean +masochist +hypsometer +gliricidia +complexifier +wild licorice +reconnaissance vehicle +fives +beefsteak plant +eastern dasyure +bookworm +crested coral root +wire recorder +cinnamon vine +bubble +Newfoundland dwarf birch +spruce bark beetle +teetotaler +fad diet +ascus +spicebush +African coral snake +soft-shell crab +Postum +packhorse +sand cherry +cricket-bat willow +middlebrow +Hungarian sauce +buffalo clover +jimsonweed +latanier +stablemate +jumper +zoospore +smooth woodsia +flowering ash +unilateralist +lomatia +flapper +wild cotton +Siberian wall flower +probe +bankrupt +blockade +lemon geranium +fig leaf +basic point defense missile system +clack valve +buttinsky +ingenue +mountain everlasting +zebra-tailed lizard +shaving-brush tree +evergreen huckleberry +core drill +lugworm +Cashmere goat +doorjamb +minelayer +student center +horsehair +European dewberry +white broom +arenavirus +eastern poison oak +rye ergot +Tupi +tensiometer +fleawort +coquille +icing sugar +junior lightweight +Doppler radar +mahuang +candlepin +chambermaid +evergreen blueberry +Eton jacket +parvis +solleret +molded salad +malvasia +birth-control campaigner +nonagon +backswimmer +ogee +bowstring +salt marsh mallow +trapezohedron +hoary willow +speech therapist +Zinjanthropus +core +red-backed mouse +eptatretus +mossy saxifrage +Aristotelian +Thessalonian +searing iron +bifocals +falangist +field pea +packsaddle +lay reader +hoecake +cuboid +white maire +iceman +lobscouse +neckcloth +color-blind person +Chinese holly +assemblyman +white-lipped peccary +kava +plastron +crab louse +hook wrench +trailing four o'clock +junior +skilly +internet +tonguefish +footman +sub-assembly +evangelist +track +bench lathe +desk clerk +scalded milk +chamois cloth +American marten +chachka +nondescript +pellitory-of-the-wall +swamp candles +procurator +cuddy +farkleberry +mountain male fern +trawl +dual scan display +fish meal +prospector +convener +guano bat +ant shrike +picture rail +sand rat +gynophore +quilting +sleeper +summer savory +Cotoneaster dammeri +smooth sumac +slumgullion +suite +catalufa +spherule +lean-to tent +gryphon +gas shell +short iron +sweet sultan +dewberry +Victoria plum +American water shrew +X-ray tube +macebearer +green arrow arum +abbe +poke milkweed +atheist +Fosbury flop +Ord kangaroo rat +moldboard +wheat germ +explosive trace detection +whippoorwill +examiner +tallyman +Crookes tube +wild peach +fringed grass of Parnassus +Crookes radiometer +Atlantic croaker +lobster stew +spring cress +maggot +pacer +hydra +Zionist +pepper tree +diamante +baize +Rhodesian man +county agent +respecter +Anglican +antimacassar +materialist +Swan River everlasting +cloud grass +toll line +C battery +chinese mustard +grass poly +warming pan +seasonal worker +common sickle pine +bathysphere +elegant Habenaria +card table +Chilean cedar +brocket +collimator +malted milk +avadavat +fire marshall +coloratura +yellow spiny daisy +fingerstall +narrow-leaf penstemon +indigo broom +pillwort +bearberry willow +Etonian +certified milk +climbing bird's nest fern +field coil +wrist pad +parr +kaoliang +engelmannia +stocker +satrap +Nantua +spearfish +caper tree +gold-tail moth +mountain chinchilla +sea milkwort +westerner +army cutworm +leaf-nosed snake +neurobiologist +xeranthemum +Eastern silvery aster +ecclesiastical attire +caper +Ukranian +bight +button fern +peach pit +oligodendrocyte +maar +digitigrade mammal +streptobacillus +sensitometer +preemptor +oat +bell foundry +crown lens +rock purslane +Junior +Brazilian guava +kicksorter +Ohio goldenrod +red mulberry +King's Counsel +mountain four o'clock +fairy shrimp +fell +oca +sycophant +chantry +dermatoglyphic +bomblet +keyhole saw +hangman's rope +little barley +lion-jaw forceps +giant scrambling fern +popper +dulcimer +Espagnole +tardigrade +smooth-haired fox terrier +bullbrier +rewa-rewa +Japanese poinsettia +trunk line +cannery +helminth +American spikenard +prince's-feather +arthroscope +ginger +aphakic +pilot bit +angle of refraction +low-sodium diet +wall creeper +growler +praetorium +Hall of Fame +soupfin shark +Molotov cocktail +kaffir boom +stitcher +sawwort +flagellant +Atlantic herring +Reticulitermes lucifugus +voltaic pile +snowy orchid +southern flounder +skysail +osage orange +white mullein +lined snake +tolu tree +poliovirus +foreman +burette +jackass bat +invigilator +electromyograph +acarus +presence chamber +columbian mammoth +hyacinth bean +pilot +meadow jumping mouse +Maria +outskirts +aftershaft +Queensland nut +schlockmeister +plainsman +afropavo +scarlet musk flower +five spice powder +gunboat +multiplex +Dutch uncle +louvered window +chimney corner +cuscus +psalmist +Vichy water +signer +amphiuma +harmonizer +authorizer +naiad +control rod +stentor +mountain bladder fern +gig +read-only memory chip +assenter +vixen +hermitage +corn dab +locksmith +cockspur thorn +variable-pitch propeller +western red-backed salamander +dolman sleeve +cultist +sweet buckeye +pine vole +Peking man +mountain swamp gum +nimblewill +bethel +aye-aye +lancelet +teff +Alpine celery pine +endive +nipa palm +center of curvature +seeder +Sabahan +sea scallop +social secretary +gorgonzola +western chokecherry +misanthrope +rabbitweed +beggarman +button fern +white mallee +doodia +mastiff bat +roper +prima donna +blanc +holding pen +fingerling +skyhook +flophouse +steam chest +crystallized ginger +acrocarp +horse pistol +true mahogany +costmary +ballistic galvanometer +jaunting car +bartonia +rep +mandibular notch +bubble and squeak +umpire +fringed loosestrife +bear oak +ski jump +staggerbush +plumcot +thermal reactor +field brome +bodkin +jackknife-fish +malope +writing arm +gold fern +Stayman Winesap +merlon +eclectic +fluxmeter +emeritus +imam +drum +pop tent +capital ship +subalpine larch +flail +Lorenzo dressing +tomboy +eastern woodrat +warrantee +Pacific spiny dogfish +sheepshead porgy +farthingale +Cryptoprocta +power loom +communicant +howdah +ectomorph +false foxglove +basset horn +odd-pinnate leaf +Wisconsin weeping willow +Queensland bottletree +dampener +corbel arch +silent butler +Circe +town clerk +Japanese chestnut +bloodwood tree +switcher +cup hook +spreader +rice rat +straightedge +traverser +fluid drive +Spanish paprika +sour milk +poison camas +bean dip +card table +vinegar fly +vizier +electric-discharge lamp +purple rock brake +dynamo +Japanese snowbell +Grindelia robusta +neuroglia +safflower seed +coronet +frown line +Renaissance man +Steller's sea cow +book scorpion +isosceles triangle +arthritic +spherical triangle +kangaroo mouse +garden orache +stemless hymenoxys +titi +out-basket +gent +columnea +mint sauce +mouthbreeder +Liebig condenser +cheerer +assegai +stickler +Merostomata +dimmer +grey poplar +common heath +scorzonera +glory hole +Blackfoot +oil slick +musketeer +apple geranium +daisyleaf grape fern +gas furnace +bijugate leaf +Arabist +star-thistle +hand throttle +huckleberry oak +lift pump +maulstick +Rome Beauty +Newburg sauce +pit +volunteer +Baldwin +ark +Asian horseshoe crab +black calla +marlinespike +Gentianopsid procera +guinea gold vine +tucker-bag +desk sergeant +piezometer +migrator +keelson +executrix +sackcloth +onion smut +buckboard +substitute +pudge +mess +cinchona +intervenor +gravimeter +pederast +censor +gastroenterologist +cutlassfish +launch +demerara +Diegueno +bog bilberry +aglet +soda fountain +crank call +harpoon gun +ribbon fern +Gurkha +output device +epilating wax +greasewood +water horehound +return key +fairy swallow +spatulate leaf +culverin +leptocephalus +kleptomaniac +barley water +bleeding tooth +Cheyenne +maleberry +limber +tapenade +whorled aster +toe +revenant +lap joint +vein +truant +florest's cineraria +morning dress +trichodesmium +nightshirt +element of a cylinder +shopaholic +section hand +electrodynamometer +Guadalupe cypress +rosebud +racist +avaram +keeled garlic +Alaska rein orchid +orange toast +cunner +dipstick +Neolentinus ponderosus +bulbil +charlotte +pull-through +header +Manduca quinquemaculata +persona grata +elegist +cafe royale +scup +semanticist +wood sage +field magnet +tundra +bay myrtle +alluvial flat +arrowleaf groundsel +celtuce +baryon +must +entrant +othonna +pied-a-terre +liza +sticky aster +grasshopper mouse +prison guard +tire iron +bomb rack +Spanish American +sheltered workshop +turfing daisy +backbone +tangle orchid +creeping willow +dumb bomb +horse cassia +barosaur +Yavapai +shrimp Newburg +peanut worm +dwarf chinkapin oak +corchorus +brick cheese +by-catch +stover +Urnula craterium +clasp +Kekchi +alpine coltsfoot +soybean future +altar wine +ripping chisel +encephalogram +mountain spleenwort +transferee +remoulade sauce +American rock brake +stenographer +read/write head +loblolly +ground +powdered mustard +brake band +sea dahlia +freak +proconsul +Coffey still +Sivapithecus +pellitory +palm cat +skew arch +American angelica tree +vigilante +candelilla +andryala +amarelle +swiftlet +petcock +associate professor +sclerite +open circuit +Virginia crownbeard +Last Supper +button tree +scyphozoan +margate +mercury cell +horsewhip +water scorpion +companionway +drop cloth +Amhara +miraculous food +pro-lifer +embryologist +Creole +bombazine +Indian blackwood +cubeb +trace detector +gros point +main-topsail +meringue kiss +spree killer +capstone +specimen bottle +woolly apple aphid +silverweed +American barberry +gallfly +European bog asphodel +northern flying squirrel +alliterator +Old Catholic +heliograph +Pteris cretica +tippler +pump well +allspice +balancer +scarlet bugler +lantern fly +white prairie aster +krummhorn +robin's plantain +Pacific sardine +patty-pan +decaffeinated coffee +western saxifrage +warrantee +colorimeter +ball bearing +makomako +foot +troika +apricot sauce +data multiplexer +rose-root +sound film +Northern dewberry +water hickory +swing door +spastic +Oligoporus leucospongia +botulinus +tamale pie +Sagittarius +muff +spicebush +petiolule +pump action +Parry's pinyon +split-pea +rudder blade +princess royal +wormseed mustard +honey guide +pip-squeak +fin keel +foretop +cyrilla +Navaho +melanocyte +deist +silver tree +citrus whitefly +Morrow's honeysuckle +green peach aphid +longanberry +call-board +wild yam +novelist +toothed spurge +alienee +pond apple +allspice +Carolina lupine +Jack of all trades +white false indigo +boiled dinner +princewood +sailor's-choice +false bracken +microbrewery +black grama +tutee +brickkiln +sea raven +guesser +wirework +European lemming +thyrse +plains lemon monarda +milo +shunt +spotted cowbane +anchovy sauce +grande dame +Maryland golden aster +Chinese puzzle +boarfish +burweed marsh elder +defense contractor +nitric bacteria +Belgian hare +beach plum +conformal projection +sand fly +steering linkage +quickset +Mahayanist +Geiger tube +loudmouth +Lancastrian +brownie mix +ex-spouse +deltoid leaf +Shasta salamander +rabbet joint +purple anise +garibaldi +gebang palm +bladderpod +Host +great bowerbird +string cheese +spinning jenny +drift net +matriarch +guar +bitter betch +panda car +mess +plains pocket mouse +scarlet wisteria tree +deerberry +reamer +homing torpedo +molehill +stockyard +reniform leaf +rag +symmetry +Texas star +lerot +pickle relish +three-seeded mercury +cotter pin +ice-cream bean +farmyard +bar magnet +hansom +prickle cell +renal cortex +pest +Ultrasuede +sailing master +brougham +wastrel +amboina pine +Canary Island hare's foot fern +ninepin ball +southwestern lip fern +usherette +lemon drop +star begonia +weeds +saltworks +Persian melon +corbina +medusa +bucksaw +Gibson girl +diameter +American twinflower +kino +clear liquid diet +angiocardiogram +wetter +oyster cracker +yellowfin mojarra +wild parsley +life tenant +broom closet +Corynebacterium diphtheriae +square shooter +bedwetter +ball-and-socket joint +nonsolid color +Salmonella typhimurium +buffel grass +hip pad +subaltern +heliothis moth +trail boss +hayloft +Francisella +primordial dwarf +cock-a-leekie +sugarplum +propulsion system +tyrolean +Carib +salai +ketembilla +ironclad +cornhusk +heckler +multistage rocket +north island edelweiss +Chaldean +twenty-two pistol +Francophobe +scofflaw +sickle feather +screw bean +sea squill +Scopolia carniolica +agglomerator +western holly fern +presenter +straight pin +Myxine glutinosa +Colbert +clover-leaf roll +war paint +bird's-eye bush +longfin mako +running suit +arrow wood +margrave +blue fleabane +dracontium +plastron +chimney swift +child prodigy +commissar +turtle soup +postulant +archaebacteria +snakefly +Pitot tube +chap +smilo +Malthusian +French roof +worm wheel +gulag +pointed-leaf maple +pull-off +Cathaya +American green toad +ball cartridge +infiltrator +snowfield +crotchet +auxiliary pump +bearnaise +galax +chaenactis +olympic salamander +sundowner +cows' milk +beach plum +moss-trooper +Arabidopsis thaliana +cat's-claw +bog rosemary +ribier +book agent +bumper jack +beefwood +monk's cloth +alpine bearberry +climbing fumitory +cucking stool +puka +Piltdown man +property man +discharge lamp +X chromosome +knobble +lobster Newburg +herbalist +sunray +golden saxifrage +leopard cat +muffle +stonewort +blancmange +intraocular lens +trepan +desert mariposa tulip +plume poppy +Dane +martynia +shaver +white milkweed +napu +tansy-leaved rocket +abortus +telemeter +tansy mustard +harpy +honeysuckle +ironworks +testacean +Tartuffe +silvervine +Sihasapa +surface gauge +western blind snake +paramyxovirus +Icelander +bird louse +stockbroker belt +test-tube baby +ague root +little golden zinnia +dietician +elephant's-foot +dirty bomb +sailing warship +brier +tinter +Connemara heath +potato fungus +bait casting +decagon +rosefish +die +high-pass filter +solitaire +widow's walk +goldthread +Tudor +trews +orange pekoe +ninon +soda jerk +sump +flying carpet +burial garment +oblanceolate leaf +press gallery +Shintoist +three-centered arch +spreading pogonia +Moro +foxtail orchid +Ghanian +dry kiln +thane +naranjilla +bitter pea +American bugbane +apron string +oyster fish +Port Jackson fig +prize winner +high-water mark +Oneida +smoking room +potato skin +charge d'affaires +gantlet +amyloid plaque +barmbrack +mate +arrow leaved aster +handbarrow +horned screamer +virago +linoleum knife +rattlesnake root +K ration +reset +foot brake +red coral +good guy +aberrant +lavalava +poleax +garden webworm +sneezer +mountain heath +American dog violet +eolith +chimneysweeper +matriarch +smalltooth sawfish +sea mouse +tubercle bacillus +superconducting supercollider +Abney level +darnel +gherkin +celery salt +Tungus +pulasan +oriflamme +death camp +redhorse +apprehender +scion +selectwoman +pentahedron +principal +old school tie +slice bar +chanar +pimento butter +wailer +zero +mescal +rosebud orchid +stone bramble +Jarvik heart +NOC +pitchman +rat cheese +strawberry tomato +dwarf golden chinkapin +landau +tocsin +ampulla +scratcher +crab Louis +ginseng +ripcord +polluter +tensiometer +eyewitness +aalii +Oregon crab apple +conservator +day jessamine +hexahedron +suture +tippet +linsey-woolsey +vernal witch hazel +stainer +egocentric +canistel +nudger +shipping agent +shortleaf pine +battle sight +cheese spread +weeder +incendiary bomb +honeyflower +stovepipe iron +stepper +hellgrammiate +votary +aflatoxin +arquebus +impulse turbine +pipewort +garrote +glow lamp +pigsticking +blood clam +surface search radar +Bolshevik +platen +chariot +Gentianopsis thermalis +water level +quandong +catalytic cracker +giant foxtail +nut butter +drainplug +holdover +coastguardsman +Secretary of Health and Human Services +Seeing Eye dog +American plaice +coquilles Saint-Jacques +christella +medium +clingfish +lally +light-o'-love +Gentianopsis detonsa +taper file +signal detection +trip wire +lignosae +receiver +sedan +mud puppy +corn sugar +Philippine mahogany +magnetic pole +jointed rush +trapper's tea +Dorking +welcome wagon +clammyweed +guard +false azalea +convalescent +babassu +dedicated file server +colossus +air search radar +marquess +straight flute +sand stargazer +sea catfish +rosilla +ripsaw +Bermuda onion +peach sauce +sagebrush mariposa tulip +yashmak +Virginia mallow +erose leaf +sand blackberry +boulevardier +forester +choragus +onion mildew +threadfin +winged pea +sugar daddy +rotary press +styracosaur +rathskeller +Japanese millet +anchorite +coral drops +false gavial +eastern pipistrel +cheese press +Chinese primrose +pamperer +real estate broker +power worker +breeder reactor +nutcracker +piano wire +cushaw +Sinanthropus +firebreak +kelp greenling +herba impia +toll call +yoke +bird fancier +evening-snow +fever tree +reed meadow grass +flanker back +toggle bolt +Santa Cruz cypress +carbonnade flamande +northern dune tansy +mikado +millettia +forty-five +court +icepick +holm oak +Japanese angelica tree +Pacific cod +cant hook +urologist +spelt +lekvar +enologist +Mediterranean flour moth +prickly-edged leaf +Spanish grunt +dune cycling +frostweed +whisperer +tucker +Roman wormwood +counterterrorist +woolly alder aphid +Nuttall oak +snail butter +threshing floor +motley +forge +water mold +mummichog +sulfur paintbrush +head +walking delegate +jujube +peachleaf willow +Christmas bells +valley pocket gopher +bear's-paw fern +Lanthanotus borneensis +pearl hominy +placeman +swage block +offerer +stargazer +jeweler's glass +male chauvinist +crossbar +Oktoberfest +tamarau +micronutrient +large-leaved aster +tasset +tepary bean +sausage curl +ivy +snob +roller towel +wood meadowgrass +archil +padrone +prairie rocket +tongueflower +kidney fern +Carolina buckthorn +sea island cotton +landscape architect +realist +oyabun +mother hen +ostracoderm +esker +heliophila +nympholept +shining clubmoss +press agent +clam dip +Djiboutian +white currant +codfish ball +hand cheese +kraal +trident +conventicle +bacteroid +Indian plantain +quandong +kola nut +signor +theater light +musk clover +canistel +silent partner +steel-wool pad +diggings +affluent +sightreader +John Doe +arrowworm +goatsfoot +guardroom +wild cinnamon +kaffir boom +ink eraser +yardie +industrialist +sea lily +polarimeter +Polistes annularis +western big-eared bat +omnivore +Ted +horsecloth +crab cocktail +vacuum chamber +flower-of-an-hour +bilge +poleax +neolith +Montezuma +plum-yew +welfare case +trave +pipe bomb +shading +Centigrade thermometer +bangalore torpedo +celery top pine +nuclear rocket +fowling piece +anti-Semite +landscape +derris +bush honeysuckle +Mediterranean water shrew +ticket collector +masked shrew +white dipladenia +Savoyard +bondman +tempter +pygmy cypress +pentathlete +thruster +usurper +Arminian +yerba buena +ice field +ichthyosaurus +sackcloth +bean tostada +Oxbridge +Pteropus hypomelanus +thinker +bank robber +ape-man +thurifer +knawel +mule fat +hot spot +hairy-legged vampire bat +night raven +hook and eye +crocodile bird +skunkweed +beaver rat +cypress sedge +florida selaginella +April fool +Jonah crab +glass wool +corkwood +dwarf elder +hinging post +gentile +Brazilian trumpeter +witch doctor +thermograph +pink shower +Mao jacket +capelin +parang +bradawl +stooper +jewel orchid +citrange +oarswoman +Macedonian +particolored buckeye +pachycephalosaur +satinwood +Chinese brown sauce +peep sight +straight man +quandong +chamois cress +nonfat dry milk +rosin bag +Leiden jar +Grimes' golden +spirillum +grass vetch +carillonneur +downy wood mint +melon ball +sweet calabash +chlamydospore +bombshell +sidewall +sprig +Indian button fern +globe pepper +rough-stemmed goldenrod +bocconia +bubble chamber +sand dab +plum-fruited yew +aecium +marrowfat pea +hobbyist +whipper-in +salad burnet +neckband +Tangier pea +sauce Louis +salad burnet +artist's loft +koumiss +Nazarene +cutter +scrim +drape +crab-eating dog +deckhand +bedroll +gaff +stifler +pink lady +great plains paintbrush +patternmaker +yoke +caryophyllaceous plant +angrecum +quadriplegic +grid +genlisea +aspic +water table +junket +signore +Mutillidae +proprioceptor +pivoting window +Indian poke +synchroscope +trichion +tarahumara frog +proctoscope +abomination +purslane speedwell +breast drill +Japanese barberry +mandrake root +breakable +salon +American watercress +take-up +entrenchment +cocktail sauce +Scotch asphodel +borough +matchmaker +Seneca snakeroot +pointsman +psephologist +clustered poppy mallow +onion thrips +nuclear-powered ship +organizer +deciduous holly +balsam willow +enzymologist +caraway +drip loop +dog laurel +Orangeman +sapsago +polymath +backplate +leathery grape fern +modillion +two-timer +handhold +consignee +white stringybark +nettle-leaved goosefoot +bookmaker +disk drive +doliolum +palmist +packinghouse +Spandau +Whipple's penstemon +sword grass +ribbon development +pearly-shelled mussel +winter heliotrope +rogue elephant +deck tennis +Venus's flower basket +football +shim +boatswain +blinks +armored catfish +hooded seal +outdoorswoman +water starwort +upholstery needle +pleurodont +silky anteater +cornmeal +lead-in +redfin pickerel +horse balm +Rydberg's penstemon +cascade transformer +fly poison +Volvaria bombycina +broad-leaved twayblade +pastry cart +body plethysmograph +waverer +hardware store +Parry's penstemon +European sanicle +strawberry geranium +cross-examiner +head gate +devil's tongue +hemiepiphyte +pine hyacinth +machmeter +spirit lamp +field judge +Rock Cornish +mayhaw +Sassenach +bog pimpernel +parallel interface +crowberry +roach +Aegyptopithecus +cajan pea +lapboard +cryostat +magnetic storage medium +white yam +Lombard +rhymer +bed and breakfast +bunya bunya +rifle grenade +caterer +collared pika +anti-submarine rocket +bookkeeper +Western mountain ash +profit taker +fruitlet +Knowlton's cactus +infernal +beefsteak begonia +lunula +emulsion +intermediate wheatgrass +titfer +European sea bream +bigeye scad +yak butter +kola +cone pepper +plesiosaur +ragwort +penal colony +black carpet beetle +lubber's hole +Stapelias asterias +yard marker +balloon bomb +Scythian lamb +armory +selsyn +marblewood +spirula +fatalist +hash head +armiger +Dom Pedro +white-chinned petrel +ballast +orthopter +greater water parsnip +clutch +largeleaf holly +Evangelist +king whiting +tuna fish salad +Muscadet +surpriser +jumping bristletail +proportional counter tube +Hamburg parsley +obstructionist +pus-forming bacteria +creep feed +stepbrother +janissary +control freak +trusty +trepan +King William pine +orthicon +geological horizon +molecular biologist +violator +pariah dog +Austrian +conciliator +Fauntleroy +packing needle +mazer +Saturday night special +leucocytozoan +coastal rein orchid +whirligig beetle +capitalist +breeches buoy +clubroot fungus +meadow spikemoss +Kichai +Spanish lime +land office +camera obscura +strafer +purple-stemmed aster +lusterware +valve +Roman nettle +isthmus +breadstuff +sealskin +maleo +bilge keel +carissa plum +fish fly +kolkhoznik +heath pea +cowage +hog sucker +Sam Browne belt +inductor +wild licorice +Socotra begonia +supernumerary +Angle +red shrubby penstemon +toilet kit +tawse +sweet bells +kawaka +brown soft scale +lyssavirus +betting shop +double-crosser +macrotus +climbing hempweed +poi +strip mall +deadhead +petit juror +tract housing +American mistletoe +lace-flower vine +precipitator +endoparasite +hairy wood mint +red snapper +Victorian +hog peanut +line of heart +opossum shrimp +plumcot +Bavarian blue +slops +light flyweight +oregano +sand myrtle +pocket battleship +curator +narc +hydraulic cement +plains pocket gopher +closed loop +pluralist +molter +Christmas bush +snuffers +slender knapweed +footwall +plage +caper tree +red siskin +tender +boat train +tipster +low-pass filter +student lamp +morosoph +japonica +bellows +herald +oyster plant +savory +mail +computational linguist +blade +winter crookneck squash +zoomastigote +blackmailer +richweed +dialectician +genip +plumed scorpionfish +jet bridge +thermopile +billy buttons +Brule +millwright +Arenaviridae +Jones' penstemon +monastic habit +genipap fruit +burnous +dairyman +top +crab-eating raccoon +quadrangular prism +pilot burner +weeder +trireme +boy wonder +man of letters +Catawba +high-muck-a-muck +light circuit +bloodworm +lappet caterpillar +half-and-half +office boy +saddle stitch +mistletoe cactus +false chamomile +Catalina cherry +workhouse +Jamaica quassia +britches +tooth shell +reduction gear +carrot pudding +balsam woolly aphid +handspike +aioli +silver hake +flour bin +wireman +gas-cooled reactor +aficionado +plus fours +gitano +gene chip +oilfish +ingenue +tulip orchid +late purple aster +pork and beans +envoy +lemon extract +milk bar +black huckleberry +ground roller +Connecticuter +siderocyte +Jacquard loom +chub +meat safe +stock cube +Australian sumac +purple sanicle +tailless tenrec +dog wrench +rainbow cactus +castor bean +scintillation counter +eohippus +pawnbroker +gauge boson +front man +early warning radar +bearing wall +Bourbon +sandwichman +sild +gravelweed +perishable +cembra nut +riflebird +quicksand +slate +sweeper +ship-towed long-range acoustic detection system +defamer +president +vitamin K3 +challis +tanekaha +bloodwort +grenadier +quietist +Zairese +fucker +foremother +gesneria +print buffer +salsilla +fissiped mammal +fender +consulate +acidophilus milk +Southern dewberry +snail darter +Panama redwood tree +dehydrated food +bush willow +coffee fungus +Sinologist +Mesoamerican +hood +large civet +deck-house +cyborg +smuggler +pepper sauce +cyberpunk +Grand Inquisitor +persona non grata +haggis +weeping tree broom +stop bath +modifier +coyol +conodont +yellow giant hyssop +optical pyrometer +Carolina moonseed +marinade +aspartame +false wintergreen +cityscape +philter +turnery +hemiplegic +chuck-will's-widow +vower +track star +myrtaceous tree +small civet +intelligence analyst +dogcart +yardman +cross bit +holometabola +platen +sweet cassava +Comstock mealybug +acute angle +Communist +alcohol thermometer +mountain hollyhock +Mead's milkweed +highjacker +Townes +congou +Astrophyton muricatum +lazybones +roughcast +pressure cabin +clinch +cinnamon +smoke bomb +quandong +tout +office-bearer +punctum +efficiency apartment +Queensland hemp +Ceylon bowstring hemp +newswoman +vermin +fetid bugbane +grantee +sanitary landfill +gluten-free diet +clabber +shillelagh +white lettuce +sweet coltsfoot +beggar's lice +samite +loser +flasher +water star grass +banana passion fruit +translator +artificial kidney +Virginia creeper +American crab apple +cactus mouse +nebbish +Ligustrum obtusifolium +vox angelica +stringer +hunter +know-it-all +scene painter +invalidator +jungle cock +basilica +coriander +California single-leaf pinyon +miles gloriosus +pina cloth +law agent +scarlet fritillary +keurboom +bailor +ramjet +seedling +rib joint pliers +ways +picket ship +Surgeon General +wasabi +marquis +clostridium perfringens +Helvella sulcata +furnace lining +kingwood +painted sandgrouse +plain wanderer +Indian madder +silver screen +bailey +dwarf spurge +Serbian +ball-buster +shaheed +Platte River penstemon +tensiometer +mute +nymphomaniac +Yokuts +arroyo willow +whipping post +class act +load +winged everlasting +periodontist +diarist +robber frog +diestock +curry powder +ratchet wheel +store detective +hog plum +prune whip +shortwave diathermy machine +Anabaptist +post chaise +Kennan +bean caper +delegate +orderly sergeant +celtuce +jumping bean +gowen cypress +puddingwife +registered nurse +West Saxon +rosita +gun room +nasotracheal tube +matchboard +flagship +Boswellia carteri +Canadian pondweed +wonder boy +sewer rat +dimetrodon +pantograph +marsh bellflower +angoumois moth +slippery dick +woolly indris +creme de cacao +dulciana +Jewess +Macadamia integrifolia +least shrew +don +diffuser +black-stem spleenwort +grouseberry +goniometer +annotator +sticktight +gossip columnist +speechwriter +capon +rock hind +Liederkranz +chandler +echocardiograph +sidelight +fisher +brocket +New Zealand daisybush +northern sea robin +roller bandage +peachick +pellet +pichi +plug fuse +spark coil +buckwheat +brood bitch +wedgie +dwarf bilberry +filigree +bull +queen +dodo +Salish +denticulate leaf +Western silvery aster +Prima +magnetic bottle +fetterbush +process-server +nainsook +mythologist +Piedmont glacier +hammerhead +niggard +Mound Builder +Kui +Nootka +highbinder +passenger pigeon +oblong +tickler coil +agnostic +succorer +esophagogastric junction +dressmaker's model +bombshell +social anthropologist +gildhall +orpine +pterodactyl +bristly sarsaparilla +Lane's Prince Albert +hognose bat +salesgirl +lubricating system +electric catfish +wrap +Jacksonian +chard +cherry laurel +foreground +beadsman +Kolam +amniote +frozen pudding +acid head +poor box +depositor +coattail +pallas's sandgrouse +mason's level +English lady crab +skeg +cruel plant +petrolatum gauze +tuna +swivel +stock-in-trade +perisperm +civies +Phyllostomus hastatus +alienor +Verdicchio +guard's van +onion butter +moviegoer +planter +citrange +box huckleberry +iconoscope +familiar +helmsman +baby boomer +constructivist +American bog asphodel +whorled caraway +simple pendulum +viviparous eelpout +Job's tears +holdout +sour salt +poison bush +dusky-footed woodrat +golden algae +granadilla tree +telethermometer +crossbar +thrift +African bowstring hemp +dog in the manger +hayrack +gold-crowned kinglet +prolonge +doge +pencil +discount house +mulligan stew +Nonconformist +virologist +gregarine +facula +rocket scientist +thin-shelled mussel +oospore +annual salt-marsh aster +Afrikaner +metallic +julienne +culverin +cleavers +Berliner +mudhif +thorny skate +brown lemming +yellow colicroot +cooling system +large-leaved magnolia +free-reed +canyonside +preemptor +stake +Brucella +anti-G suit +pleximeter +squire +salsilla +write-in candidate +lowland burrowing treefrog +flare star +dwarf hulsea +jobber +mangel-wurzel +quagga +red-skinned onion +positive pole +Pteropus capestratus +jug wine +stomacher +standee +bladder worm +hakim +house of correction +pelisse +golden mole +temporizer +rose apple +drove +umbrellawort +holy of holies +lawyer cane +smooth lip fern +anode +astatic coils +zip gun +feverroot +self-heal +expansion bit +salt reed grass +field pussytoes +nutmeg hickory +cryptic coloration +Venus's girdle +Hunkpapa +Calostoma cinnabarina +raft foundation +May apple +pygmy mouse +prokaryote +yellow-green algae +Bermuda maidenhair +withdrawer +coelacanth +Elliott's goldenrod +driftfish +epicyclic train +bowl +swamp dewberry +corbel step +sadist +party line +anti-American +mining engineer +Amur privet +conidium +Gastrocybe lateritia +lithia water +chaulmoogra +Rough Rider +Guinea pepper +glade mallow +pitcher sage +whitecup +shanghaier +low St Andrew's cross +phonologist +cocobolo +perfumery +visor +prison chaplain +belt +ingesta +literary critic +industrial watercourse +reckoner +pursuer +Kinetoscope +Kuiper belt +hyperope +raw recruit +Galiella rufa +Prince Albert yew +slit trench +usher +tenderfoot +white-rayed mule's ears +browser +piccalilli +bran +giant buttercup +water lobelia +arborescent plant +echinus +dryland blueberry +struggler +platyctenean +Geordie +domatium +twenty-two rifle +keteleeria +sports editor +chorus girl +Hakham +dry-bulb thermometer +onomancer +double-bitted ax +Girondist +bottle bank +thyrsopteris +bandwagon +star anise +armored car +dhawa +Bessemer converter +mutineer +paradise tree +tupik +centurion +mending +chowchow +margrave +International Grandmaster +African hemp +catafalque +leptodactylid frog +forcemeat +tank shell +pill +barbecue pit +worthy +lady's maid +evergreen +Jesuit +South American staghorn +rigger +suffragan +imperialist +spherical angle +grey lemming +kitchen police +tree swift +coliphage +archaist +Conservative +rib +exegete +Mendelian +tragedian +steerage +Paleo-American +obeche +garlic +grapefruit peel +accommodating lens implant +half blood +barrelfish +catgut +lanceolate spleenwort +hardliner +frieze +name dropper +carrack +huckster +onion bread +magnetic head +pease pudding +raisin moth +negative magnetic pole +electroencephalograph +bunji-bunji +synchroflash +Mornay sauce +stencil +winged pigweed +Nesselrode +MEDLINE +licorice +mainspring +melilotus +duke +experimenter +Napier's bones +four-minute man +pin-tailed sandgrouse +toolmaker +pogge +rootstock +baton +pricket +creeping snowberry +anomalops +nester +devourer +apolemia +Maricopa +pine-barren sandwort +larvacean +American dewberry +escalope de veau Orloff +gig +myrtle +pitsaw +Lutheran +fish house punch +gnathostome +intake valve +molasses taffy +clammy locust +vandyke beard +Atlantic tripletail +planktonic algae +estradiol patch +flummery +cytologist +sectarian +oil meal +tomtate +mediterranean anchovy +aspersorium +argonaut +porkholt +sheep ked +algometer +Adventist +false goatsbeard +snake polypody +streetwalker +shelver +adoptee +highflier +pitch apple +prairie rocket +fish mousse +viroid +deckle +manila tamarind +observer's meridian +pincurl clip +hardstem bulrush +gossamer +brookweed +Druze +hug-me-tight +accessory before the fact +oilman +Comanche +Marine +bedlamite +Chinese cork oak +squawbush +false miterwort +walk-on +Cynopterus sphinx +brandyball +landlubber +arrowroot +cape forget-me-not +galoot +tabor pipe +checker +Levant cotton +paddle box +murderess +smirker +fuddy-duddy +withdrawer +newel +shade +pink disease fungus +tipu +sweet sultan +aeronautical engineer +tall gallberry holly +acarid +conqueror +cucumber +film director +ordinary +salon +closet queen +allegorizer +tonka bean +flax rust +negative pole +dagame +dentist's drill +mock privet +micropyle +contributor +dark horse +climbing corydalis +cosmotron +land agent +Big Blue +Cynic +tassel flower +lyrate leaf +Minuteman +Dutch-elm beetle +Hessian fly +flower girl +West-sider +window dresser +skinny-dipper +whitebait +out-and-outer +hooker +amicus curiae +jack +camwood +stockist +black root rot fungus +Jamaica dogwood +diaphragm +Holocentrus ascensionis +roselle +black maire +Pygmy +fumigator +lame duck +mudder +hydraulic transmission +conning tower +phoronid +batfish +hearing dog +monohybrid +whaling gun +Cockcroft and Walton accelerator +allemande +seasoner +epileptic +ammonia clock +Young Turk +lanseh tree +urceole +cafe noir +poster girl +Oglala +deadeye +manna lichen +positive pole +cinch +lyricist +hermaphrodite +kidney stone +dilator +number one +frotteur +kaffir bread +fish knife +tarragon +adjuster +potato wart fungus +Florida pompano +conductor +corbie gable +rounders +Catha edulis +bender +recruit +Uruguayan +subject +bunghole +day boarder +pocketed bat +Oxonian +owner-occupier +yellow-leaf sickle pine +devisor +exhibitor +looking glass +shipowner +crooked-stemmed aster +calico +dash-pot +defilade +Confucian +egg-and-dart +irreligionist +lepton +self-rising flour +diving bell +Brahui +shop girl +maximum and minimum thermometer +Dalmatian laburnum +correspondent +subduer +nonperson +Reaumur thermometer +rough-leaved aster +jacksmelt +pinfold +magneto +ex-wife +round-leaved rein orchid +purloo +American shrew mole +sweet sand verbena +polymastigote +outfitter +curled leaf pondweed +Italian dressing +borderer +ambusher +geebung +four-stroke engine +small ship +homeopath +gynostegium +political prisoner +Radiigera fuscogleba +ensiform leaf +rhizoctinia +satyr orchid +rue +bouillon cube +flip +prophyll +tilefish +periselene +prima donna +choker +laminar flow clean room +Hooker's orchid +fish joint +mombin +remover +array +coelostat +autophyte +consigner +Damaraland mole rat +gasman +public works +lye hominy +pearlfish +piassava palm +Georgian +uxoricide +confessor +community center +epigone +tagger +abrading stone +cryoscope +nautch girl +reliever +Cartesian +Indian beech +protoplasmic astrocyte +fundamentalist +mustard sauce +crank +houselights +five-point bishop's cap +comedienne +triangle +presentist +beaugregory +dreamer +Wave +blue mockingbird +Barbados gooseberry +ten-spined stickleback +papoose +silky pocket mouse +holdup man +agent-in-place +suspensory +emigrant +ropemaker +bookbinder +jumby bead +undershrub +Killarney fern +sheep bell +city slicker +equerry +pea crab +down-and-out +blackmouth bass +shirtmaker +lister +UNIX guru +snipefish +gimbal +maisonette +haircloth +Ranvier's nodes +pigmy talinum +tribute album +msasa +hydroxide ion +madame +four-pounder +prophet +sloganeer +field-effect transistor +nude mouse +canteen +Calostoma lutescens +buteonine +sunlamp +Uruguay potato +Spanish tamarind +Prince-of-Wales'-heath +kishke +caprifig +chincapin +hegari +alarmist +bathtub gin +astatic galvanometer +Calostoma ravenelii +marang +tussah +coin box +bugleweed +hacker +frontal eminence +timekeeper +shunt +bicycle clip +mustang mint +caesium clock +hospice +glenoid fossa +archpriest +ex-gambler +incrustation +salvager +Donatist +violator +lamb succory +hygroscope +oilbird +sharptail mola +showplace +corn syrup +flashlight fish +pulse timing circuit +anchovy paste +fascista +chigoe +divan +Druid +squad room +Huntingdon elm +buffalo carpet beetle +carper +corn lily +goats' milk +assault gun +cockpit +Lochaber ax +Visigoth +occupier +Basotho +criminologist +spindle +Rosicrucian +Cornishwoman +musk kangaroo +artificial skin +pandurate leaf +Parkia javanica +roundhead +tea-like drink +basidiolichen +unguiculate +stepmother +Nauruan +gutta-percha tree +bloodberry +scarlet haw +marupa +censor +algebraist +pelvimeter +whaler +cowhide +paparazzo +biochip +internationalist +Yukon white birch +hangar queen +chlamydia +puttee +Pipturus albidus +pearly razorfish +sea moss +burglar +hoary golden bush +colter +drey +bushman's poison +maxillaria +gnetum +deadeye +shittah +swamp oak +damper block +deepwater squirrelfish +truffle +cangue +paleolith +lawyerbush +sorehead +Texas snowbell +Tremella reticulata +quarter +keelboat +dimity +whiner +Wagnerian +myrmecophyte +frontierswoman +pyrometric cone +big-tree plum +puppy +galbulus +hod +winceyette +carriage wrench +dictostylium +farmland +infanticide +Jacob's rod +threadfish +monocline +inamorato +leaf miner +purple cress +passer +black-fronted bush shrike +silverrod +bootmaker +segregate +captive +Edmontonia +spherometer +television transmitter +bladder +Saratoga spittlebug +dynamometer +lodge +smooth darling pea +Cossack +wake-up call +Olmec +sutler +molasses kiss +corner post +rattlesnake weed +yardmaster +adder +rhinoscope +referral +ulster +pantaloon +counterspy +gadgeteer +heart cherry +hospital chaplain +Clydesdale terrier +plank-bed +Russian thistle +actinometer +dyspeptic +common wolffia +firewall +seidel +potato moth +soapweed +seif dune +thill +cosmographer +absolver +halberdier +fire control system +kai apple +bastard pennyroyal +Big Brother +broadcast journalist +Albatrellus dispansus +citrophilous mealybug +split end +nickel-iron battery +Newtonian +gas maser +thumbstall +anaspid +dusky-footed wood rat +latitudinarian +flatbrod +schizocarp +niqaabi +flight surgeon +gyrocompass +Polyporus tenuiculus +Utopian +mailboat +spellbinder +undercoat +cassareep +typical jerboa +photocathode +katharometer +bight +fur-piece +penetration bomb +malik +Siberian millet +nanomia +Wykehamist +tosser +gyrostabilizer +microwave diathermy machine +crystal set +wall +legatee +alfalfa +angwantibo +charioteer +piano maker +African mahogany +Morlett's crocodile +taro +parallel circuit +cush-cush +etymologist +matriculate +neem seed +cornerback +kingfisher daisy +redoubt +blastomycete +peplos +costumier +publican +tobogganist +semolina +myrmidon +parricide +gymslip +whoremaster +cryptocoryne +header +platitudinarian +barleycorn +spiral bandage +reciter +abecedarian +dance +wrymouth +bilberry +Liopelma hamiltoni +streamliner +Fordhooks +fixed phagocyte +radiobiologist +neurologist +Selkup +dollarfish +cascade everlasting +acrodont +boarhound +midstream +theatrical producer +abhorrer +goldsmith +photometrist +Anglo-Saxon +rugel's plantain +sable +workmate +ferule +ankus +earleaved umbrella tree +Passamaquody +timucu +Mexican pocket mouse +yerba santa +Rochon prism +apomict +monocarp +sweet unicorn plant +common winterberry holly +archivist +drypis +paretic +fly-by-night +white-berry yew +Schoolman +blue cheese dressing +vintager +squatter +Euphausia pacifica +corrugated fastener +yellow henbane +Croesus +almoner +analphabet +acoustic delay line +sheep frog +workhouse +horseleech +venturer +pond-scum parasite +Pyrenees daisy +plagiarist +Truncocolumella citrina +rerebrace +group captain +caddis fly +hot-rock penstemon +kanzu +stylopodium +slopseller +rauli beech +starter +ootid +statesman +distributor cam +ascot +falcon-gentle +Duplicidentata +spotted antbird +heliometer +false buckthorn +Allegheny spurge +Cavalier +dart +photocoagulator +master-at-arms +kei apple +baldachin +crapshooter +gametangium +white hope +chipotle +spike heath +Scotch woodcock +Florentine +differential analyzer +Mitrula elegans +wet cell +basil balm +Circassian +corn cake +bouncing betty +vice-regent +lagerphone +ketembilla +whoremaster +fork +tetrasporangium +trifler +pill head +life-support system +quartermaster general +tobacco thrips +officeholder +teredo +toyon +Sundacarpus amara +Phytophthora citrophthora +naif +lobbyist +alligator wrench +bully +heavy +toxicologist +radio chassis +waterdog +drive line +kaffir cat +foster-brother +breakax +curette +traditionalist +pipe vise +striped button quail +gawker +homeotherm +schoolyard +battue +kalansuwa +deviationist +Bolshevik +transponder +pungapung +iron +Eyeish +roccella +manglietia +Tory +print seller +Texas Ranger +otter shrew +seconder +shellflower +outlier +party man +wold +hayfork +oncologist +framer +co-beneficiary +ocean pout +Chinese angelica +scrimshaw +air attache +false gromwell +standing press +fringepod +specifier +automatic choke +durum +yenta +wassailer +reeler +signora +beach pancake +common booklouse +pellicle +backroom boy +den mother +associate +Unitarian +gambist +brookweed +clubroom +cat's-tail +playboy +self-registering thermometer +doorstop +bennet +yak's milk +escapee +quail bush +sparge pipe +coast boykinia +screw key +half gainer +aggravator +cotton mill +tailor's chalk +free agent +cotton mouse +deadhead +bunny +turpentine camphor weed +amaranth +ceratodus +red lauan +beam-ends +thermograph +wally +Toda +handrest +commissary +oak-leaved goosefoot +manufacturer +voicer +Jafnea semitosta +bench hook +finder +abyssal zone +rabbitwood +Hercules'-club +epicarp +declinometer +camp follower +signaler +Australian pea +putz +qadi +banded palm civet +egg timer +regnellidium +calisaya +harvestfish +sound spectrograph +side-wheeler +glomerule +woolly rhinoceros +Black Muslim +horticulturist +ornithomimid +cryometer +battlefront +gametophyte +airmailer +cuisse +nakedwood +baseball club +slasher +anise +leatherleaf +leatherjacket +horned pondweed +gofer +Saigon cinnamon +barong +blazer +twinkler +skeleton shrimp +dial +floorwalker +case shot +flannelbush +cultivated parsnip +Jane Doe +few-flowered leek +nogging +placer miner +muzzler +serge +lion-hunter +capulin +Wandering Jew +ascidian tadpole +hispid pocket mouse +southern spatterdock +milk wagon +junior middleweight +duck sauce +promycelium +protozoologist +cascade liquefier +tout +longheaded thimbleweed +charcoal burner +footage +slop +bridge agent +miller's-thumb +Job's comforter +marocain +tanker plane +lancetfish +knocker +toque +ordinand +umbrella bird +favorite son +hare's-foot bristle fern +business traveler +plotter +Asiatic shrew mole +tallyman +stump +Paleacrita vernata +index register +mortgagee +accuser +codger +sand rat +seaside centaury +chiropractor +Florida smoothhound +dwarf sperm whale +T-man +sannup +dragonhead +numdah +alkali grass +gynobase +kymograph +ascolichen +steward +waterline +Nazarene +filer +lapidary +muncher +wincey +scyphus +question master +besieger +worldling +docent +facing +atmometer +quern +puerpera +three-decker +calliope +wild red oat +bailee +flame pea +cattle cake +theist +yellowtail flounder +cosmopolitan +rocket engineer +vouchee +Turkoman +hard sauce +Thousand Island dressing +assayer +messmate +mutilator +oyster bar +flame tokay +countess +prairie mimosa +microsporangium +cotter +townsman +paring +fundraiser +simperer +Comrade +orlop deck +power takeoff +cattleship +prime meridian +Javanthropus +scriptorium +curandera +long-clawed prawn +maestro +paster +potato tuberworm +chachka +junkyard +cape yellowwood +reentrant polygon +Liberian coffee +restaurateur +Alsophila pometaria +Jekyll and Hyde +electrophorus +Scomberomorus maculatus +manipulator +gromwell +chicken provencale +ashram +mangel-wurzel +shamrock pea +dossal +adducer +erection +Mysore thorn +smoothie +chufa +brace wrench +victualer +litterer +linstock +Protium guianense +palfrey +banyan +klieg light +dangleberry +trooper +yaupon holly +quitter +tradescant's aster +nullipara +melter +devil's urn +ghostwriter +mouth +analogist +Creek +sonic depth finder +fucker +locus of infection +mortician +esophageal smear +locum tenens +conic projection +aroeira blanca +bellarmine +night porter +automobile mechanic +codpiece +Munro +cottonweed +scoinson arch +tinderbox +frozen food +waterproofing +Egyptian henbane +lash +transactor +American smooth dogfish +existentialist +grabber +Sonoran lyre snake +Rufous rubber cup +colors +weekend warrior +power user +perennial salt marsh aster +Puritan +Apalachicola rosemary +anecdotist +tosser +moth bean +agnostic +stretcher-bearer +browntail +optimist +brewer's mole +astronomy satellite +flat file +rust mite +tuberous plant +day laborer +buster +trapezoid +bevatron +nonresident +Streptomyces griseus +mangosteen +customer agent +hero worshiper +suicide bomber +procellariiform seabird +archiannelid +reaction turbine +distortionist +bulldog wrench +grainy club +scalp +Aztec +scow +globigerina +pedant +heartleaf manzanita +kanchil +low gallberry holly +containment +scandalmonger +rose-colored starling +Powhatan +addle-head +Chilean rimu +Atlantic sea bream +arthrospore +ramrod +root climber +Kalapooia +roach clip +Schreiber's aster +horseradish +albino +Kshatriya +trombidiid +blasting cap +body pad +brachium +shallu +Wynnea americana +slender centaury +munj +upset +wind tunnel +cottonwick +airing cupboard +pepper shrub +ambrosia +languisher +chosen +rose globe lily +purple apricot +costia +sloop of war +sultana +frontlet +booster +sargassum fish +broad-leaved montia +rifleman bird +stillroom +amoralist +enginery +meter maid +fitment +southern bog lemming +Athenian +clincher +cusk-eel +mackintosh +diaphone +corozo +Australian reed grass +czar +spongioblast +Eurafrican +airhead +Shahaptian +Roman +pollinium +tourist class +halogeton +stamper +emperor +malingerer +tramp steamer +Peziza domicilina +pilot cloth +stenopterygius +cost accountant +Queen's Counsel +wine-maker's yeast +poppet +cage +rowlock arch +landgrave +bearded wheatgrass +stink bell +quaker +undesirable +algarroba +resistance pyrometer +exorcist +carib wood +guvnor +border patrolman +bathhouse +licenser +headman +rentier +pine spittlebug +nut-leaved screw tree +paraduodenal smear +apron +necker +smilax +Alpine besseya +creeper +castle +ground bait +Queensland grass-cloth plant +sclerotium +great yellowcress +fat farm +Stoker +hoop snake +elixir of life +Trotskyite +home buyer +wheat berry +Tutelo +semi-climber +utahraptor +wet-bulb thermometer +packrat +hygrophyte +darter +sketcher +refiner +camlet +midgrass +compound +tarwood +Colorado River hemp +toiler +abstractor +override +dwarf pipefish +plodder +briefcase computer +trunk hose +brown butter +valve-in-head engine +cymbalist +explosive detection system +horsewoman +boutonniere +chinchilla +venerator +scourer +exarch +cohune nut +ayapana +continental divide +cosigner +stalker +pyxie +Genet +Macowanites americanus +open-hearth furnace +water chestnut +American frogbit +tarwood +cutter +scout +burr +upsetter +grist +tagasaste +mouthpiece +palette +rattan +letterman +Exmoor +Methodist +eelblenny +marasca +slide valve +ventilation +saddle hackle +Yakut +flux applicator +air traveler +murder suspect +Cynocephalus variegatus +idolizer +Surgeon General +nutlet +little-head snakeweed +germ tube +fellow traveler +raceabout +commodore +czar +anamorphosis +treelet +girlfriend +groundnut +sideline +giant star grass +goffer +spark lever +oubliette +processor +tare +plodder +extremist +Kipp's apparatus +gripsack +S wrench +viscountess +bridgehead +cascarilla +Asiatic flying squirrel +protoceratops +equerry +difflugia +princeling +moonlighter +aspergill +common flat pea +Utahan +imperial mammoth +plantain-leaved pussytoes +Boott's goldenrod +bootlegger +reed pipe +runcinate leaf +onion salt +nitrite bacterium +introvert +duck +New World opah +goliath frog +heterostracan +disrupting explosive +haggler +candlenut +false bugbane +returning officer +eudiometer +ship-breaker +metazoan +mandarin +patka +gill net +cavity wall +armilla +rainmaker +dealfish +orderly +gleaner +muffin man +house sitter +alto +sand devil's claw +vulcanizer +appendicularia +boron chamber +chess +bitok +anchovy butter +dropout +flour mill +bishop +escapist +scapegrace +stanhope +smooth winterberry holly +upstager +stalking-horse +pony +prairie gourd +parabolic mirror +Polaroid +slasher +lap +garlic butter +sendee +German millet +hairy honeysuckle +Swiss canton +Scleroderma flavidium +red goatfish +telegraph plant +Jungian +garment cutter +mallee hen +stranger +driveway +schooner +Paiute +cisco +trestlework +sipper +shanny +romanticist +Molly Miller +mountain rimu +odd-leg caliper +bitumastic +Western Australia coral pea +labor coach +latchkey +harpulla +solitary pussytoes +chop-suey greens +coil +guimpe +diapir +Osage +gutta-percha tree +giant eland +reticulation +garden huckleberry +quick study +Hudson bay collared lemming +coreligionist +Lancastrian +stumblebum +omnirange +seersucker +Potemkin village +Rhea Silvia +symphonist +bolti +jaw +jaconet +page +visiting fireman +haulm +p-n junction +landlubber +yellow jack +triclinium +souari +invader +fire walker +Luddite +Plott hound +hemming-stitch +winker +star-duckweed +craniometer +Arabidopsis lyrata +loser +cypripedia +trimmer arch +cookhouse +pink fivecorner +transfer +ringleader +northern pocket gopher +moke +blockade-runner +cyclostome +web-spinning mite +Whig +transcriber +malahini +sawyer +patent log +paca +tragedian +thermojunction +soffit +black buffalo +foreigner +applecart +brit +pole horse +white mullet +argentinosaur +Homo soloensis +bounty hunter +decumary +hand +paperboy +Smitane +windowpane +Java man +Wynnea sparassoides +prune +middy +lilliputian +sorb +pyrostat +guest worker +hold +leaseholder +vegan +humanist +salinometer +piton +zygospore +means +night rider +tetraspore +archipelago +radiomicrometer +nitpicker +spot weld +slicer +girlfriend +round-tailed muskrat +cock's eggs +Shavian +bay +nuclear chemist +planetarium +hiccup nut +Marylander +milling +microsporidian +brown cup +Strophanthus kombe +little skate +emancipator +paperhanger +archaeopteryx +maigre +Mastotermes electrodominicus +procurer +seizure-alert dog +homeboy +cotton strain +mute +siren +spearnose bat +phenacomys +gayal +arsenal +pitchfork +Port Jackson heath +cud +magnetic core memory +interferometer +water jacket +account executive +hodoscope +window oyster +sudatorium +syncopator +loment +hypertensive +smoothbark +Geogia holly +nailhead +African holly +musette +chafeweed +microflora +derrick +strawworm +shogun +queen post +jerboa kangaroo +columbo +royal +sourball +solenogaster +cardsharp +Homo habilis +intaglio +calf's-foot jelly +flotsam +skirret +baronduki +chyme +shovel hat +Welsh +monoplane flying fish +groundfish +tablet-armed chair +swan dive +Indian club +colonial +cassiri +pyramidal tent +praya +silk vine +time clock +button snakeroot +clews +Korean lespedeza +diffuser +ripping bar +puttyroot +nipple shield +headpin +juneberry holly +hub-and-spoke +laver +weldment +plain flour +hoosegow +dudeen +grey skate +line of life +mung +arariba +Newtown Wonder +rock candy +side chapel +castor sugar +narrow-leaved white-topped aster +babassu nut +puka +rings +catchall +heat shield +caroche +oxbow +Australian coral snake +tapper +sporangiophore +fenugreek +spruce gall aphid +gouache +cutoff +private line +pod +cargo hatch +nailhead +penile implant +geophyte +small-leaved linden +deepwater pipefish +paperhanger +hairy spurge +Persian lamb +subtropics +feed grain +clarence +nonparticipant +scorpioid cyme +hand brake +tiller +Geglossaceae +albacore +monochrome +goa bean +bur +tongue worm +psittacosaur +frog's lettuce +pectoral +terreplein +light filter +fishpaste +dry point +grison +feterita +dolichocephalic +oenomel +stretcher +swag +cheval-de-frise +mountain beaver +scammony +discus +leatherleaf saxifrage +wharf rat +Dominique +pelycosaur +depth gauge +bishop +archespore +true anomaly +silver jenny +mercy seat +kelp +oviraptorid +acrylic +Chinese pea tree +meat house +bilge well +Temperate Zone +whale louse +balbriggan +briefcase bomb +pump-type pliers +oil +sour gourd +Jewbush +lunette +Chinese paddlefish +pyxidium +beechnut +calabar bean +grugru nut +gib +blunt file +cataphyll +megasporangium +blockbuster +sliding seat +hogchoker +calceus +Connarus guianensis +honest woman +survivor +second balcony +tempera +Calvary clover +murine +outwork +bogy +elephant's-foot +conning tower +set square +blackfly +stirk +Streptomyces erythreus +blade +goldfield +snowball +mortal enemy +waltzer +shoal +galley +hitchhiker +lithophyte +brisling +scauper +esophagoscope +grab +subtracter +philosopher +duplex apartment +southeastern pocket gopher +bonduc nut +reverberatory furnace +grader +lamp house +northern bog lemming +brotula +ornithopod +ptyalith +obturator +perpetual motion machine +range pole +Africander +curvet +daisy print wheel +floor +collector +mutant +tuck +fore-and-after +senega +buckler mustard +louvar +Tarsius glis +culdoscope +Spanish fly +steering gear +hatchet man +museum +saw set +cambric tea +comber +thermohydrometer +stationer +chalcis fly +bryanthus +whipstitch +harvest mite +rock gunnel +time bomb +rariora +pigfish +apetalous flower +head shop +horned whiff +sandpit +tachistoscope +sundries +taffrail +caller +monofocal lens implant +Dover's powder +souari nut +crowbait +render +Shakespearian +hagberry +megatherian +magus +hatchel +mangabey +garroter +piedmont +cope +barrio +psychodid +rigout +distributor +croupier's rake +sarcenet +narrow-leaved water plantain +treenail +biped +lanternfish +overdrive +barndoor skate +picket boat +amber lily +sawpit +sand lance +bucket shop +common beech +laundry truck +surtout +grogram +tampion +escape hatch +interstice +shop bell +snake mackerel +nakedwood +tumbrel +mericarp +mountain paca +cab +big board +cringle +eusporangium +shipping room +coal chute +dumbwaiter +Smiledon californicus +man-at-arms +cartridge +deinonychus +pigeon pea +screw bean +spectacle +floorboard +cutting room +low-warp-loom +proconsul +sabicu +genipap +clapper +aquifer +archaeornis +belly flop +Protium heptaphyllum +interrupter +high-warp loom +knight +wiper +impression +poker +Pithecanthropus +sable +guardroom +tenter +wellhead +raja +strickle +sodomite +mountebank +sand leek +Barbados gooseberry +shuffler +sensory fiber +crab-eating opossum +etching +rare bird +scup +fagot +negro vine +hutment +droshky +nephoscope +lady chapel +cutty stool +release +vestiture +buff +standard +Tabernacle +vascular ray +snakewood +chlorobenzylidenemalononitrile +limnologist +pouched mole +microwave linear accelerator +Mastotermes darwiniensis +wind tee +orange bat +open sight +carpospore +rampant arch +sabbatia +cursor +post exchange +bellpull +center +cyclostyle +canonist +pygmy sperm whale +moa +king +pass-through +angioscope +marrow +hookup +revetment +acanthocephalan +good Samaritan +apatosaur +web spinner +dixie +ommastrephes +crossbench +candlewick +jack +light arm +caisson +kaki +quandong nut +Meuniere butter +coquilla nut +mast +black +twitterer +bluethroat pikeblenny +shielding +water-shield +urolith +elephant bird +clearway +dark lantern +schizopetalon +press +Nazi +sugarberry +Maltese +stevedore +hair shirt +party wall +gainer +blackheart +nothosaur +cavetto +evergreen bittersweet +chemical bomb +calpac +shingle +turnpike +animator +heaver +isoclinic line +death knell +liner +anathema +aerie +razorback +Ichyostega +pound net +French dressing +mottle +yard +string tie +bell seat +brattice +battering ram +sierra +pompon +vertex +stomach pump +electrolytic cell +escolar +telpher +roadhouse +cerecloth +tartare sauce +letter case +whale sucker +hob +teg +canvas +strickle +hectograph +Cartagena bark +mail car +acinus +freedom rider +bread sauce +picture window +Rhizopogon idahoensis +pinprick +mass spectrograph +ringer +devil's cigar +salad cream +marlberry +airbrake +Clark cell +yellow-throated marten +wire gauge +dinoceras +aba +harpoon log +plate rail +mustard plaster +coelophysis +journal box +puce +ballcock +quartering +izar +clinid +whirler +turnspit +deathbed +pottle +shot +doubler +Coryphaena equisetis +English sole +chicken feed +borrow pit +mylodontid +Chilean nut +Kundt's tube +ling +asthenosphere +reseau +death seat +immovable bandage +peppermint patty +lecturer +electron multiplier +bear claw +hyacinth +beaked salmon +toehold +scull +snowball +gangsaw +fiber +oxeye +lashing +Beckman thermometer +fence +cantilever +dinner theater +Reynard +jag +umbrella plant +camera lucida +beaver +slug +yellowfin croaker +Sibley tent +rat-tail file +anchovy pear +soldier +cackler +chaise +Pitot-static tube +minniebush +Episcopalian +oleaster +ejaculator +wavy-leaved aster +knight +rack +real storage +magnetic mine +cocoa plum +vesiculovirus +birch leaf miner +water chevrotain +rudapithecus +torpedo tube +itch mite +warren +loft +washerman +terrace +nonstarter +shit +platform +caudex +ground control +Ostariophysi +slopshop +Peruvian cotton +crystal oscillator +plastic bomb +bar bit +watering cart +Asiatic sweetleaf +artificial joint +chariot +casern +charge-exchange accelerator +display adapter +hornpipe +honey bell +planula +Nephthytis afzelii +hame +ranter +trachodon +synchrocyclotron +splasher +heterotroph +Nicol prism +Himalayan rhubarb +headfast +put-put +bitter almond +parr +scantling +power breakfast +madder +Catalpa bignioides +rose of Jericho +spark chamber +rhizome +beard worm +supper club +negro peach +keratoscope +wain +apple aphid +planking +time-delay measuring instrument +sternpost +sicklepod +lake bed +gatherer +monotype +dead-man's float +poison gas +dicynodont +organism +cell +person +animal +plant +food +artifact +dressage +contact sport +outdoor sport +gymnastics +track and field +jumping +high jump +skiing +water sport +swimming +dive +floating +skin diving +rowing +boxing +sledding +tobogganing +wrestling +skating +ice skating +roller skating +racing +boat racing +riding +equestrian sport +cycling +blood sport +hunt +fishing +angling +casting +athletic game +outdoor game +golf +field game +field hockey +football +American football +ball game +baseball +court game +badminton +basketball +tennis +sport +Seder +scavenger +bottom-feeder +work animal +beast of burden +pack animal +domestic animal +marine animal +female +male +young +young mammal +pup +cub +lion cub +tiger cub +microorganism +arbovirus +herpes +herpes zoster +reovirus +moneran +cyanobacteria +enteric bacteria +actinomycete +streptomyces +diplococcus +parasite +ectoparasite +protoctist +protozoan +sarcodinian +ameba +ciliate +alga +brown algae +green algae +sporozoan +cypriniform fish +cyprinid +carp +domestic carp +shiner +catostomid +buffalo fish +cyprinodont +killifish +topminnow +squirrelfish +stickleback +pipefish +embryo +fetus +blastula +chordate +cephalochordate +tunicate +ascidian +vertebrate +aquatic vertebrate +jawless vertebrate +lamprey +hagfish +cartilaginous fish +holocephalan +chimaera +elasmobranch +shark +mackerel shark +mako +requiem shark +dogfish +smooth dogfish +spiny dogfish +smooth hammerhead +smalleye hammerhead +shovelhead +ray +sawfish +roughtail stingray +butterfly ray +eagle ray +manta +skate +bird +gamecock +night bird +ratite +passerine +oscine +accentor +lark +pipit +finch +canary +dark-eyed junco +New World sparrow +bunting +honeycreeper +sparrow +grosbeak +towhee +weaver +grassfinch +tyrannid +New World flycatcher +kingbird +pewee +cotinga +antbird +Old World flycatcher +thrush +nightingale +Old World chat +warbler +kinglet +Old World warbler +New World warbler +flycatching warbler +New World chat +yellowthroat +New World oriole +northern oriole +meadowlark +New World blackbird +grackle +Old World oriole +starling +myna +corvine bird +crow +Old World jay +common European jay +New World jay +blue jay +Canada jay +Rocky Mountain jay +nutcracker +European magpie +American magpie +Australian magpie +wren +marsh wren +thrasher +New Zealand wren +creeper +titmouse +black-capped chickadee +Carolina chickadee +swallow +martin +tanager +shrike +butcherbird +bush shrike +bowerbird +European water ouzel +American water ouzel +vireo +waxwing +bird of prey +hawk +black kite +swallow-tailed kite +white-tailed kite +harrier +falcon +peregrine +caracara +eagle +young bird +sea eagle +Aegypiidae +Old World vulture +griffon vulture +bearded vulture +Egyptian vulture +black vulture +New World vulture +buzzard +condor +Andean condor +California condor +black vulture +king vulture +owl +horned owl +scops owl +amphibian +salamander +newt +Pacific newt +ambystomid +climbing salamander +web-toed salamander +frog +true frog +true toad +spadefoot +tree toad +cricket frog +tongueless frog +reptile +anapsid +diapsid +chelonian +turtle +sea turtle +ridley +snapping turtle +musk turtle +diamondback terrapin +Western box turtle +tortoise +soft-shelled turtle +saurian +lizard +gecko +iguanid +spiny lizard +fence lizard +horned lizard +skink +teiid lizard +racerunner +plateau striped whiptail +Chihuahuan spotted whiptail +western whiptail +checkered whiptail +agamid +moloch +anguid lizard +venomous lizard +lacertid lizard +chameleon +monitor +crocodilian reptile +crocodile +alligator +caiman +armored dinosaur +ankylosaur +bone-headed dinosaur +ceratopsian +hadrosaur +saurischian +sauropod +theropod +ceratosaur +maniraptor +synapsid +pterosaur +ichthyosaur +snake +colubrid snake +smooth green snake +rough green snake +racer +blacksnake +whip-snake +rat snake +bull snake +common kingsnake +milk snake +common garter snake +ribbon snake +Western ribbon snake +common water snake +water moccasin +grass snake +viperine grass snake +sand snake +lyre snake +blind snake +indigo snake +constrictor +boa +python +elapid +coral snake +coral snake +cobra +mamba +black mamba +krait +viper +pit viper +rattlesnake +timber rattlesnake +arthropod +arachnid +false scorpion +whip-scorpion +spider +European wolf spider +acarine +hard tick +Ixodes dammini +Ixodes neotomae +Ixodes pacificus +Ixodes scapularis +sheep-tick +Ixodes persulcatus +Ixodes dentatus +Ixodes spinipalpis +wood tick +soft tick +mite +trombiculid +spider mite +house centipede +gallinaceous bird +domestic fowl +jungle fowl +chicken +cock +hen +turkey +grouse +European black grouse +Asian black grouse +blackcock +greyhen +red grouse +moorhen +greater prairie chicken +lesser prairie chicken +heath hen +guan +chachalaca +megapode +mallee fowl +phasianid +pheasant +bobwhite +northern bobwhite +Old World quail +migratory quail +peafowl +California quail +Hungarian partridge +red-legged partridge +Greek partridge +mountain quail +guinea fowl +columbiform bird +pigeon +dove +turtledove +domestic pigeon +homing pigeon +sandgrouse +parrot +cockatoo +lory +varied Lorikeet +rainbow lorikeet +parakeet +cuculiform bird +cuckoo +crow pheasant +coraciiform bird +roller +kingfisher +hoopoe +apodiform bird +swift +Archilochus colubris +thornbill +goatsucker +piciform bird +woodpecker +flicker +sapsucker +toucanet +trogon +quetzal +aquatic bird +waterfowl +anseriform bird +duck +teal +widgeon +sheldrake +goldeneye +scaup +wood duck +sea duck +scoter +merganser +gosling +gander +Chinese goose +greylag +blue goose +snow goose +brant +common brant goose +honker +barnacle goose +swan +tundra swan +screamer +crested screamer +mammal +prototherian +monotreme +marsupial +opossum +bandicoot +kangaroo +common wallaby +hare wallaby +nail-tailed wallaby +rock wallaby +pademelon +tree wallaby +rat kangaroo +phalanger +dasyurid marsupial +dasyure +placental +calf +buck +insectivore +mole +shrew mole +shrew +water shrew +tenrec +invertebrate +sponge +glass sponge +coelenterate +Chrysaora quinquecirrha +hydrozoan +siphonophore +anthozoan +actinia +coral +gorgonian +stony coral +ctenophore +worm +planarian +fluke +liver fluke +Fasciolopsis buski +schistosome +tapeworm +echinococcus +taenia +common roundworm +chicken roundworm +pinworm +eelworm +vinegar eel +trichina +hookworm +filaria +Guinea worm +annelid +oligochaete +polychaete +leech +mollusk +scaphopod +gastropod +abalone +scorpion shell +giant conch +edible snail +garden snail +brown snail +Helix hortensis +seasnail +neritid +limpet +Hermissenda crassicornis +cowrie +bivalve +clam +quahog +cockle +oyster +mussel +marine mussel +freshwater mussel +scallop +shipworm +cephalopod +octopod +decapod +squid +crustacean +malacostracan crustacean +decapod crustacean +crab +swimming crab +spider crab +lobster +true lobster +Old World crayfish +American crayfish +shrimp +prawn +krill +stomatopod +mantis shrimp +woodlouse +pill bug +sow bug +sea louse +amphipod +copepod +barnacle +wading bird +stork +ibis +common spoonbill +roseate spoonbill +heron +egret +night heron +American bittern +European bittern +least bittern +whooping crane +rail +crake +gallinule +purple gallinule +coot +great bustard +plain turkey +button quail +trumpeter +seabird +shorebird +plover +turnstone +sandpiper +yellowlegs +ruff +tattler +woodcock +snipe +greyback +red-breasted snipe +curlew +godwit +stilt +stilt +phalarope +courser +coastal diving bird +larid +gull +tern +jaeger +skua +auk +guillemot +murre +puffin +gaviiform seabird +podicipitiform seabird +grebe +pelecaniform seabird +white pelican +Old world white pelican +gannet +snakebird +sphenisciform seabird +penguin +pelagic bird +wandering albatross +black-footed albatross +petrel +shearwater +storm petrel +aquatic mammal +cetacean +whale +baleen whale +rorqual +toothed whale +beaked whale +dolphin +bottlenose dolphin +porpoise +sea cow +carnivore +pinniped mammal +seal +eared seal +fur seal +fur seal +South American sea lion +California sea lion +Australian sea lion +Steller sea lion +earless seal +walrus +canine +bitch +dog +cur +toy dog +toy spaniel +English toy spaniel +hunting dog +hound +coonhound +dachshund +foxhound +wolfhound +greyhound +terrier +bullterrier +rat terrier +Manchester terrier +fox terrier +wirehair +Welsh terrier +schnauzer +Skye terrier +sporting dog +retriever +pointer +setter +spaniel +springer spaniel +water spaniel +working dog +watchdog +shepherd dog +Belgian sheepdog +pinscher +Sennenhunde +mastiff +bulldog +guide dog +sled dog +liver-spotted dalmatian +spitz +griffon +corgi +poodle +wolf +coydog +wild dog +striped hyena +brown hyena +spotted hyena +aardwolf +fox +black fox +silver fox +blue fox +feline +cat +domestic cat +tom +blue point Siamese +wildcat +common lynx +Canada lynx +bobcat +spotted lynx +caracal +big cat +leopardess +panther +lioness +lionet +Bengal tiger +tigress +saber-toothed tiger +bear +Syrian bear +grizzly +Alaskan brown bear +cinnamon bear +viverrine +civet +Indian mongoose +ichneumon +slender-tailed meerkat +suricate +bat +fruit bat +carnivorous bat +leafnose bat +false vampire +vespertilian bat +long-eared bat +freetail +vampire bat +predator +game +game bird +fossorial mammal +tetrapod +insect +beetle +two-spotted ladybug +Mexican bean beetle +Hippodamia convergens +vedalia +bombardier beetle +calosoma +searcher +firefly +sawyer +pine sawyer +flea beetle +Colorado potato beetle +carpet beetle +clerid beetle +lamellicorn beetle +scarabaeid beetle +scarab +tumblebug +dorbeetle +June beetle +melolonthid beetle +elaterid beetle +snout beetle +boll weevil +blister beetle +bark beetle +darkling beetle +flour beetle +seed beetle +pea weevil +bean weevil +rice weevil +louse +flea +dipterous insect +gall midge +housefly +tsetse fly +blowfly +bluebottle +greenbottle +flesh fly +tachina fly +gadfly +botfly +human botfly +sheep botfly +warble fly +horsefly +bee fly +fruit fly +louse fly +horn fly +mosquito +gnat +fungus gnat +hymenopterous insect +drone +worker +honeybee +Africanized bee +black bee +Carniolan bee +Italian bee +carpenter bee +bumblebee +cuckoo-bumblebee +andrena +Nomia melanderi +leaf-cutting bee +mason bee +potter bee +wasp +vespid +paper wasp +hornet +sphecoid wasp +digger wasp +chalcid fly +sawfly +pharaoh ant +little black ant +army ant +carpenter ant +fire ant +wood ant +slave ant +Formica fusca +slave-making ant +sanguinary ant +bulldog ant +Amazon ant +termite +dry-wood termite +orthopterous insect +short-horned grasshopper +locust +migratory locust +migratory grasshopper +long-horned grasshopper +katydid +mormon cricket +sand cricket +mole cricket +European house cricket +field cricket +tree cricket +snowy tree cricket +phasmid +diapheromera +oriental cockroach +American cockroach +Australian cockroach +German cockroach +giant cockroach +praying mantis +hemipterous insect +leaf bug +mirid bug +lygus bug +lygaeid +coreid bug +heteropterous insect +water bug +water strider +assassin bug +homopterous insect +whitefly +sweet-potato whitefly +coccid insect +scale insect +soft scale +armored scale +mealybug +plant louse +aphid +greenfly +woolly aphid +adelgid +dog-day cicada +seventeen-year locust +spittle insect +plant hopper +psocopterous insect +psocid +booklouse +ephemerid +neuropteron +green lacewing +brown lacewing +odonate +trichopterous insect +caseworm +thysanuran insect +bristletail +thysanopter +thrips +earwig +lepidopterous insect +butterfly +nymphalid +fritillary +emperor butterfly +danaid +pierid +small white +large white +southern cabbage butterfly +blue +copper +American copper +hairstreak +Strymon melinus +moth +tortricid +lymantriid +geometrid +cankerworm +pyralid +tineoid +tineid +clothes moth +gelechiid +grain moth +noctuid moth +cutworm +underwing +hawkmoth +bombycid +saturniid +giant silkworm moth +silkworm +arctiid +lasiocampid +tent caterpillar +webworm +webworm moth +caterpillar +bollworm +woolly bear +larva +grub +pupa +queen +echinoderm +basket star +edible sea urchin +sand dollar +heart urchin +crinoid +trepang +lagomorph +leporid +rabbit +eastern cottontail +swamp rabbit +marsh hare +leveret +European hare +jackrabbit +white-tailed jackrabbit +blacktail jackrabbit +polar hare +snowshoe hare +pika +rodent +mouse +rat +pocket rat +field mouse +brown rat +jerboa rat +water rat +New World mouse +wood mouse +wood rat +vole +packrat +Eurasian hamster +golden hamster +gerbil +lemming +pied lemming +Old World porcupine +brush-tailed porcupine +long-tailed porcupine +New World porcupine +Canada porcupine +pocket mouse +kangaroo rat +jumping mouse +jerboa +dormouse +gopher +squirrel +tree squirrel +ground squirrel +prairie dog +American flying squirrel +groundhog +hoary marmot +yellowbelly marmot +Old World beaver +New World beaver +cavy +naked mole rat +ungulate +hyrax +odd-toed ungulate +equine +horse +foal +colt +male horse +stallion +mare +saddle horse +warhorse +pony +mustang +bronco +wild horse +pony +racehorse +racer +harness horse +workhorse +draft horse +trotting horse +ass +domestic ass +wild ass +onager +common zebra +mountain zebra +grevy's zebra +rhinoceros +tapir +even-toed ungulate +swine +piglet +porker +peccary +ruminant +bovid +bovine +ox +cattle +bull +cow +beef +Brahman +dairy cattle +Old World buffalo +Indian buffalo +carabao +Asian wild ox +American bison +wisent +sheep +lamb +domestic sheep +wild sheep +mountain sheep +goat +domestic goat +wild goat +goat antelope +antelope +Thomson's gazelle +Gazella subgutturosa +springbok +kudu +harnessed antelope +eland +waterbuck +oryx +deer +stag +red deer +mule deer +roe deer +caribou +chevrotain +camel +domestic llama +guanaco +alpaca +giraffe +musteline mammal +ermine +stoat +New World least weasel +Old World least weasel +longtail weasel +American mink +ferret +muishond +snake muishond +striped muishond +river otter +Eurasian otter +striped skunk +hooded skunk +hog-nosed skunk +spotted skunk +American badger +Eurasian badger +ferret badger +hog badger +marten +pachyderm +edentate +peba +apar +tatouay +peludo +giant armadillo +pichiciago +sloth +anteater +primate +ape +anthropoid ape +hominoid +hominid +homo +Homo erectus +Homo sapiens +australopithecine +great ape +western lowland gorilla +eastern lowland gorilla +mountain gorilla +silverback +western chimpanzee +eastern chimpanzee +central chimpanzee +pygmy chimpanzee +lesser ape +monkey +Old World monkey +talapoin +grivet +vervet +green monkey +chacma +mandrill +drill +rhesus +bonnet macaque +Barbary ape +crab-eating macaque +entellus +guereza +New World monkey +true marmoset +pygmy marmoset +tamarin +silky tamarin +pinche +lemur +tarsier +flying lemur +proboscidean +elephant +mammoth +procyonid +raccoon +fish +food fish +young fish +crossopterygian +lungfish +catfish +silurid +bullhead +channel catfish +gadoid +cod +hake +elver +common eel +tuna +moray +conger +teleost fish +clupeid fish +shad +herring +sardine +pilchard +anchovy +salmonid +salmon +Atlantic salmon +trout +brown trout +char +whitefish +smelt +tarpon +ribbonfish +toadfish +needlefish +flying fish +spiny-finned fish +percoid fish +perch +pike-perch +walleye +robalo +pike +pickerel +sunfish +crappie +freshwater bream +black bass +bass +serranid fish +grouper +hind +surfperch +cardinalfish +remora +carangid fish +jack +moonfish +pompano +scad +dolphinfish +characin +cichlid +snapper +grunt +sparid +sea bream +porgy +sciaenid fish +croaker +whiting +sea trout +mullet +goatfish +mullet +silversides +barracuda +sea chub +butterfly fish +damselfish +clown anemone fish +wrasse +blenny +pikeblenny +gunnel +goby +gempylid +scombroid +mackerel +Spanish mackerel +tuna +bonito +sailfish +billfish +marlin +tripletail +mojarra +ganoid +Pacific sturgeon +beluga +scorpaenoid +scorpaenid +scorpionfish +rockfish +lumpfish +greenling +gurnard +sea robin +plectognath +triggerfish +filefish +boxfish +spiny puffer +ocean sunfish +flatfish +righteye flounder +lefteye flounder +whiff +sole +abbey +abbey +abrader +accelerator +accessory +accommodation +acoustic device +acoustic modem +acrylic +action +actuator +adhesive bandage +adjustable wrench +aeolian harp +aerosol +after-shave +airbus +aircraft +airfield +airfoil +air gun +airplane +air pump +air-to-air missile +air-to-ground missile +alarm +alb +alcazar +Allen screw +alms dish +altimeter +Amati +ammeter +ammunition +amplifier +analog computer +analytical balance +anchor +anchor chain +aneroid barometer +angledozer +anklet +antenna +anteroom +antiaircraft +antiballistic missile +apartment +apartment building +aperture +apparatus +apparel +appliance +appliance +applicator +aquarium +arbor +arcade +arch +arc lamp +area +argyle +arm +armament +armature +armchair +armoire +armor +armored vehicle +armor plate +armrest +array +arrow +artificial heart +artillery +assembly +assembly plant +astrodome +astronomical telescope +athletic sock +atom bomb +atomic clock +atomizer +attachment +attack submarine +attire +audiocassette +audio system +audiotape +auditorium +autoclave +autoinjector +autoloader +automat +automat +automatic firearm +automatic rifle +automaton +auxiliary research submarine +awl +ax +axis +axle +axletree +baby bed +baby buggy +baby grand +back +background +backseat +badminton equipment +badminton racket +bag +bag +bag +baggage +bagpipe +bait +balance +balcony +balcony +bale +ball +ball gown +ballistic missile +ballistic pendulum +ball-peen hammer +ballroom +band +bandage +bandanna +banderilla +bar +bar +barbed wire +barge +barge pole +barn door +barograph +barrack +barrage balloon +barrel knot +barrel vault +barrier +barroom +base +base +baseball equipment +basilica +basin +basket +basketball equipment +bass +bass drum +bass horn +bastion +bat +bathhouse +battery +battle-ax +battle dress +battleship +bay rum +bay window +beading plane +beam +beam balance +bearing +beater +beating-reed instrument +bed +bed +bedclothes +bedroom +bedroom furniture +bedspread +bedspring +beehive +beer barrel +bell +bell push +bell tower +belt +belt buckle +bench +berlin +berth +besom +bevel gear +bicycle +bicycle chain +bier +billiard ball +bin +binding +bin liner +binocular microscope +bioscope +birchbark canoe +bird shot +bistro +bit +bit +black tie +blade +blade +blanket +blimp +blind +block +block plane +blouse +blower +blowtorch +bludgeon +boarding +boarding house +boardroom +boat +bobbin +body +body armor +body lotion +boiler +bolt +bolt +bomb +bomber +bongo +boom +boom +boomerang +boot +booth +booth +bore bit +Boston rocker +bota +bottle +bottle opener +bow +bow +bowed stringed instrument +bowl +bowl +bowline +bowling equipment +bowling pin +bowsprit +box +box +boxcar +boxing equipment +brace +brace +bracelet +bracket +brake +brake system +brass +brasserie +brazier +breechcloth +breeches +brewpub +brick +bricklayer's hammer +brickwork +bridal gown +bridge +briefcase +brigandine +brilliant pebble +brim +broad arrow +broadax +broad hatchet +broadsword +brush +bubble jet printer +buffer +buffet +building +building complex +bulldozer +bullet +bullhorn +bullnose +bundle +bunker +burial chamber +burner +bus +business suit +butt joint +button +buttress +butt shaft +buzz bomb +cabaret +caber +cabin +cabin +cabinet +cabinet +cabin liner +cable +cable +cafe +cafeteria +cafeteria tray +caff +cage +calculator +caliper +calorimeter +camera +camera lens +camera tripod +camp +camp +camp chair +camper +can +canal +candelabrum +candlestick +cane +cannikin +cannon +cannon +cannonball +canopy +canteen +canteen +canvas +canvas tent +cap +cap +cap +capacitor +caparison +cape +cap screw +capsule +car +car +carbine +carbon arc lamp +card index +cardioid microphone +car door +cargo liner +cargo ship +carillon +carpenter's hammer +carpenter's level +carpenter's mallet +carpenter's rule +carpet tack +carriage +carriage +carriage bolt +carrick bend +carrier +car seat +cart +cartridge +cartridge belt +cartridge holder +case +case +cashbox +casque +casserole +cassock +catch +catcher's mask +cathedra +cathedral +cathedral +catheter +cathode +cathode-ray tube +cat's-paw +cattle car +cautery +cavalry sword +cedar chest +cell +cell +cellblock +center +centrifuge +ceramic +ceramic ware +chain tongs +chair +chair of state +chalk +chamfer plane +chandlery +chapel +character printer +chassis +chasuble +chatelaine +checker +cheeseboard +chemical reactor +chessman +chest of drawers +child's room +china +chip +chip +chisel +choke +chokey +chordophone +chronoscope +chuck +church key +cigar lighter +circle +circuit +circuit board +circular plane +circular saw +cistern +civilian clothing +clamp +clamshell +clarinet +classroom +clavier +cleaning implement +cleaning pad +clean room +clinic +clip +cloak +clock +closed circuit +closed-circuit television +closet +cloth covering +clothes closet +clothes dryer +clothes hamper +clothes tree +clothing +clothing store +clout nail +clove hitch +clutch +coach +coal car +coal shovel +coat +coat closet +coating +coating +coat of paint +coaxial cable +cocked hat +coffee cup +coffee maker +coffer +coffin +coil +colander +collider +cologne +colonnade +color television +Colt +column +column +comb +comb +combination plane +combine +commissary +commodity +communication system +commutator +compact disk +compartment +compass +compass card +compound lens +compound lever +compressor +computer +computer circuit +computer network +computer screen +computer system +concentration camp +concert grand +concertina +condenser +condenser +condenser microphone +conductor +connecting rod +connection +conservatory +conservatory +contact +container +contrabassoon +control +control panel +control system +convent +converging lens +converter +convertible +conveyance +cooker +cooking utensil +cooler +cooling system +cord +cord +cordage +corner +correctional institution +corset +cosmetic +costume +costume +cotter +cotton +counter +counter +counter +counter tube +country house +coupling +court +court +coverall +covering +cowbarn +craft +cravat +crazy quilt +cream +cream pitcher +crematory +crepe +crib +cricket equipment +croquet equipment +crossbar +crossbow +crosspiece +crown jewels +cruiser +cruiser +cruise ship +crystal microphone +cudgel +cuff +cultivator +cup +cupboard +cupola +curb roof +curtain +cutout +cutter +cutting implement +cybercafe +cyclotron +cylinder +cymbal +dado plane +dagger +damper +dart +data converter +data input device +davenport +davenport +davit +dead axle +deck +deck +deck chair +deep-freeze +defensive structure +delay line +delicatessen +dental appliance +denture +depilatory +depressor +depth finder +derrick +destroyer +detector +detector +detonating fuse +detonator +developer +device +dial +dialyzer +diathermy machine +diesel locomotive +digital camera +digital computer +digital display +diner +dinghy +dining car +dining-hall +dining room +dining-room furniture +dining-room table +dinner dress +dinner pail +dinner table +diode +dip +diplomatic building +dipper +DIP switch +directional antenna +directional microphone +direction finder +disguise +dish +dish +disk +dispenser +display +display panel +distillery +ditch +ditch spade +dive bomber +doll +dolmen +domino +door +doorbell +doorlock +doornail +dormer window +dormitory +dot matrix printer +double-breasted suit +double-reed instrument +douche +dovecote +dovetail plane +downstage +drafting instrument +Dragunov +drawstring bag +dray +dredging bucket +dress +dress blues +dressing +dress uniform +drill +electric drill +drill rig +drinking fountain +drinking vessel +drip mat +drip pot +drive +drive +drogue +drogue parachute +drop-leaf table +dry battery +dry dock +dryer +dry masonry +dry wall +dugout canoe +dumdum +dumpcart +dune buggy +dungeon +duplicator +dustmop +dwelling +earphone +earthenware +easel +easy chair +edge tool +eiderdown +elastic bandage +electrical converter +electrical device +electric bell +electric frying pan +electric furnace +electric heater +electric lamp +electric motor +electric refrigerator +electro-acoustic transducer +electrode +electromagnet +electronic balance +electronic device +electronic equipment +electronic instrument +electronic voltmeter +electron microscope +electrostatic generator +electrostatic printer +elevator +embankment +embellishment +enamel +enamelware +enclosure +endoscope +engine +engine +ensemble +entrenching tool +epidiascope +equipment +eraser +escutcheon +espadrille +espresso shop +establishment +estaminet +exercise device +exhaust fan +exhibition hall +Exocet +expansion bolt +explosive device +external-combustion engine +extractor +fabric +face mask +face veil +facing +factory +fairlead +false face +fan +farm building +farm machine +fastener +fatigues +faucet +feedback circuit +fence +fencing sword +fender +ferry +fetoscope +field-sequential color television +fife +fifth wheel +fighter +figure eight +file +file server +filling +film +film +filter +filter +finery +finisher +fipple flute +fire +firearm +fire iron +fireplace +firkin +fisherman's bend +fisherman's knot +fisherman's lure +fishing boat +fishing rod +fishnet +flag +flageolet +flambeau +flannelette +flap +flashlight +flask +flatcar +flat tip screwdriver +fleet ballistic missile submarine +flight simulator +flip-flop +floating dock +floor +floor +floor cover +fly +flywheel +fob +foghorn +folder +food hamper +footbath +footbridge +foothold +foot rule +footwear +footwear +forceps +fore-and-aft sail +foremast +fore plane +fore-topmast +fork +formalwear +fortification +fortress +foundation garment +foundry +fragmentation bomb +framework +free-reed instrument +freight train +French door +friary +friction clutch +frigate +frill +frock coat +front projector +fruit machine +full-dress uniform +full metal jacket +funny wagon +fur hat +furnace +furnishing +furniture +fuse +gable +gable roof +gaff +galleon +gallery +galley +galley +gallows +galvanometer +gambling house +game +game equipment +gamp +garage +Garand rifle +garden +garden spade +garden tool +garment +gas burner +gas-discharge tube +gasket +gasoline engine +gate +gatehouse +gatepost +gathered skirt +gauge +gauze +gauze +gavel +gear +gear +gear +gearing +general-purpose bomb +generator +generator +Geneva gown +geodesic dome +girder +glass +glider +glove +glyptic art +goal +golf club +golf equipment +Gordian knot +Gothic arch +government building +government office +gown +gramophone +granary +granny knot +grapnel +grapnel +grate +graver +greasy spoon +greatcoat +great hall +greengrocery +grenade +grillroom +groined vault +Guarnerius +guidance system +guided missile +guildhall +guitar +guitar pick +gun +gun carriage +gunlock +gunsight +gun trigger +gurney +gymnastic apparatus +gym shoe +gypsy cab +habergeon +habit +hairdressing +hairpiece +hairpin +half hatchet +half hitch +hall +hall +hammer +hand +handbell +handbow +handcart +hand glass +handloom +hand lotion +hand mower +handsaw +hand shovel +hand tool +handwear +handwheel +hanger +hank +harpsichord +harrow +hash house +hat +hatch +hauberk +hawser bend +hazard +head +head +head covering +headdress +header +headgear +headlight +headsail +headscarf +health spa +heat engine +heater +heat lamp +heat-seeking missile +heavier-than-air craft +heckelphone +hedge +helicopter +helm +helmet +helmet +heraldry +high altar +high-angle gun +high gear +high table +hinge +hip boot +hitch +hoe +hogshead +hoist +holder +holding device +home appliance +homespun +hood +hood +hood +hook +Hoover +hope chest +horn +horn button +horse +horsecar +horse-drawn vehicle +horsehair wig +hosiery +hospital +hospital room +hostel +hot-air balloon +hotel +hotel room +hot tub +house +house +housing +hovel +huarache +humeral veil +hut +hutch +hydraulic brake +hydraulic system +hydroelectric turbine +hydrofoil +hydrometer +hygrometer +hypermarket +hypodermic syringe +ice machine +ice rink +ice skate +icetray +ignition switch +impact printer +implant +implement +imprint +improvised explosive device +inclined plane +indicator +induction coil +ink-jet printer +inkstand +institution +instrument +instrument of punishment +instrument of torture +interceptor +interchange +intercommunication system +intercontinental ballistic missile +interface +interior door +internal-combustion engine +ionization chamber +video iPod +iron +jack +jack +jacket +jacket +jack plane +jail +jamb +jar +jeroboam +jet +jet engine +jewelled headdress +jib +jibboom +jiggermast +joint +jointer +joist +jolly boat +jug +jumper +jumper cable +junction +junction +jury mast +kayak +keel +keg +kerchief +kettle +key +key +keyboard +keyboard instrument +khakis +kiln +kinescope +kingbolt +kirk +kit +kit +kitbag +kitchen +kitchen appliance +kitchen utensil +kite balloon +knee-high +knife +knife +knit +knob +lace +lacquer +ladder truck +lag screw +lamasery +laminate +lamination +lamp +lamp +landing gear +land mine +lantern +lapel +lathe +lattice +launcher +lead-acid battery +leather strip +Leclanche cell +leg +legging +lens +lens implant +level +lever +Levi's +lid +life buoy +life jacket +life preserver +lifting device +ligament +light +light-emitting diode +lighter-than-air craft +lighting +light microscope +linear accelerator +line printer +lingerie +lining +liquid crystal display +lister +living quarters +living room +local area network +lock +locomotive +lodge +lodging house +loft +loft +longbow +lookout +loom +loop knot +lota +lounge +loungewear +love knot +lunchroom +luxury liner +lyre +machine +machine +machine bolt +machine gun +machinery +machine screw +machine tool +magic lantern +magnet +magnetic disk +magnetic recorder +magnetic tape +magnifier +magnum +magnus hitch +mailer +mainframe +mainmast +main-topmast +main yard +makeup +mallet +mallet +mallet +mandolin +manger +man-of-war +manometer +MANPAD +mansard +mansion +marina +marker +marketplace +maser +mask +masonry +mass spectrometer +mast +mast +mat +mat +match +match +match plane +material +materiel +Matthew Walker +maul +measure +measuring instrument +measuring stick +mechanical device +mechanical system +mechanism +medical building +medical instrument +memorial +memory +memory chip +memory device +menhir +man's clothing +mercantile establishment +mercury barometer +mercury thermometer +mercury-vapor lamp +mess +metal screw +meteorological balloon +meter +meterstick +microbalance +microfilm +microscope +military hospital +military quarters +military vehicle +mill +milldam +millinery +mine +minibike +mink +minster +Minuteman +mirror +mixer +mizzenmast +module +mold +moldboard plow +monitor +monitor +morgue +mortise joint +motion-picture camera +motion-picture film +motor +motorboat +motorcycle +motor hotel +motor vehicle +mound +mount +mouse button +movie projector +moving-coil galvanometer +mug +multiplex +multiplexer +musette pipe +mushroom anchor +musical instrument +musket +musket ball +muslin +muzzle loader +narrowbody aircraft +nautilus +navigational system +naval equipment +naval gun +naval radar +naval weaponry +navigational instrument +nebuchadnezzar +neckline +neckpiece +necktie +neckwear +needle +needlework +negligee +net +net +net +net +network +network +night bell +nightwear +noisemaker +nonsmoker +non-volatile storage +nose flute +nuclear reactor +nuclear weapon +nursery +oar +oblique bandage +oboe da caccia +oboe d'amore +obstacle +office +office furniture +oil lamp +oil paint +oil tanker +olive drab +omnidirectional antenna +onion dome +open-air market +open-end wrench +opener +openside plane +ophthalmoscope +optical device +optical disk +optical instrument +optical telescope +organ pipe +outbuilding +outerwear +outfit +outrigger canoe +outside mirror +oven +overgarment +overhand knot +overhang +overhead projector +overnighter +overshoe +oxford +package +packaging +packing box +paddle +paddle steamer +page printer +paint +pallium +pan +pan +panic button +panopticon +panopticon +pantechnicon +pantry +pants suit +panzer +paper chain +paper fastener +parabolic reflector +parapet +parasail +parka +parsonage +particle detector +partition +passenger ship +passenger train +passenger van +passive matrix display +passkey +patch +patchouli +patchwork +patina +patisserie +pavis +peavey +pedal +pedestal table +pedestrian crossing +pedicab +peg +pen +penal institution +pencil +pendulum +pendulum clock +percolator +percussion instrument +perfumery +peripheral +periwig +personal computer +petticoat +Phillips screw +Phillips screwdriver +phonograph record +photographic equipment +photographic paper +photometer +physical pendulum +piano +piccolo +pick +pick +pickle barrel +piece of cloth +pile +pillow lace +pilothouse +pin +pincer +pinstripe +pipe +pipet +pipe wrench +pistol +pivot +place of business +place of worship +planetarium +planner +plant +planter +plasterboard +plastic laminate +plastic wrap +plastron +plate +platform +platform +platform rocker +plating +pleat +plethysmograph +plexor +pliers +plug +plug +pneumatic drill +pocket +pocket-handkerchief +pocketknife +pointed arch +polyester +polygraph +pomade +pontifical +pool ball +poorhouse +porcelain +porch +portable computer +portico +post +posthole digger +pot +potential divider +potpourri +pottery +pouch +poultice +powder +powder keg +power brake +power mower +power saw +power shovel +power tool +press +press +pressure dome +pressure gauge +pressure suit +printed circuit +printer +prison camp +prod +prolonge knot +prompter +prong +propeller +propeller plane +prosthesis +protective covering +protective garment +pruning saw +pruning shears +public house +public toilet +public transport +pull +pull chain +pulley +Pullman +pullover +pulse counter +pump +pump +pump house +punch +punch press +purifier +push broom +push button +pusher +puzzle +pyrometer +pyx +QWERTY keyboard +racing boat +rack +rack +radar +radiogram +radio interferometer +radio link +radiometer +radio receiver +radiotelegraph +radiotelephone +radio transmitter +raft +rail +rail fence +railing +raincoat +rake +ramp +rampart +random-access memory +rayon +razor +reaction-propulsion engine +reactor +reading lamp +reading room +read-only memory +rearview mirror +receiver +receptacle +reception room +recess +reconnaissance plane +recorder +recording +record player +recreation room +recycling bin +reed stop +reef knot +refectory table +refinery +reflecting telescope +reflector +reformatory +refracting telescope +refrigerator car +refuge +regalia +regimentals +regulator +rein +religious residence +removable disk +repair shop +repeating firearm +reproducer +rescue equipment +reservoir +reset button +residence +resistor +resonator +respirator +restraint +retort +rheostat +rib +ribbed vault +riddle +ride +riding boot +riding mower +rifle ball +rig +rink +river boat +road +roadway +robe +rocket +rocket +rod +roller +roller +in-line skate +roller blind +roller coaster +rolling hitch +Rolodex +Roman building +roof +roof +room +roost +rope +rose water +rotary engine +rotating mechanism +rotating shaft +rotisserie +rotor +round arch +router plane +row house +royal mast +rubber bullet +rug +rushlight +sable +sable coat +sack +sackbut +sacking +saddle +safe +safety belt +safety curtain +safety fuse +safety match +sail +sailboat +sailing vessel +salver +sandglass +sash +satellite +satellite television +saucepan +savings bank +saw +sawhorse +scale +scarf +school +scientific instrument +scissors +scoop +scratcher +screen +screen +screen +screw eye +scrub plane +scuffer +sculpture +sea boat +sea chest +seam +seaplane +seat +seat +second hand +secretary +security system +seeker +selector +self-propelled vehicle +semiautomatic firearm +semiautomatic pistol +semiconductor device +serger +serpent +serving cart +serving dish +set +setscrew +setscrew +sewing needle +sextant +shackle +shade +shaft +shag rug +shaker +shaper +shaping tool +sharpener +shaving cream +shaving foam +shawl +shawm +shears +sheath +shed +sheepshank +sheet bend +shelf +shell +shell +shell +shellac +shelter +shelter +shelter +shield +ship +shipboard system +shirt +shirtfront +shock absorber +shoe +shooting brake +shop +short pants +shotgun +shoulder holster +shrine +shutter +shuttle +sidewinder +sieve +sifter +sights +signaling device +signboard +silk +simulator +single bed +single-breasted suit +single-reed instrument +sitz bath +six-pack +skate +skein +skeleton +skewer +skidder +skid lid +skiff +ski pole +skirt +ski tow +skullcap +slack suit +slat +sled +sleeper +sleeping car +sleeve +sleeve +slide projector +slipknot +slipper +sloop +slop pail +slot machine +small boat +smart bomb +smoker +smooth plane +snack bar +snap-brim hat +snare drum +sniper rifle +Sno-cat +soapbox +socle +sofa +sonograph +sorter +sound recording +soup ladle +source of illumination +soutane +spacecraft +spade +spar +spatula +spear +spear +spectacles +spectrograph +spectroscope +speedometer +spider +spike +spike +spinet +spinning machine +spiral ratchet screwdriver +spiral spring +spit +spokeshave +sponge mop +spoon +sports equipment +sports implement +sportswear +spot +spring +spring balance +springboard +sprit +square +square knot +squash racket +squawk box +squeezer +squinch +stabilizer +stabilizer +stable gear +stadium +stall +stamp mill +stand +standard cell +staple +starter +state prison +station +statue +stay +steakhouse +stealth aircraft +stealth bomber +stealth fighter +steam bath +steamboat +steamer +steam iron +steam whistle +steel mill +steelyard +steeple +steering system +step +step-up transformer +stereo +stick +stick +still +stilt +Stinger +stock +stockcar +stock car +stocking +stonework +stool +stopper knot +storage battery +storage space +storeroom +stove +stove bolt +Stradavarius +straight chair +strap +strap +stringed instrument +strip +strongbox +stronghold +strongroom +structural member +structure +stylus +submachine gun +submersible +submersible +subwoofer +suction pump +suede cloth +sunbonnet +sunhat +supermarket +superstructure +supply chamber +support +support +support column +supporting structure +supporting tower +surface lift +surface-to-air missile +surgeon's knot +surgical instrument +surgical knife +surplice +surveillance system +surveying instrument +surveyor's level +swamp buggy +sweater +swimsuit +sword +synchrotron +system +tabi +table +table +table knife +tableware +tabor +tachometer +tack +tack hammer +talaria +tambour +tambourine +tampon +tank +tank car +tannoy +tape +tape deck +tape recorder +target +tavern +tea chest +teaching aid +tea gown +teashop +teaspoon +tea-strainer +tea tray +telecommunication system +telephone +telephone line +telephone receiver +telephone system +telephone wire +telescope +television antenna +television camera +television equipment +television monitor +temple +temple +tender +tennis racket +tenor drum +tenoroon +tenpenny nail +tent +tenterhook +terminal +terminal +test rocket +tetraskelion +textile machine +textile mill +theater +theodolite +thermometer +thermostat +three-piece suit +three-way switch +thumbscrew +thumbtack +tights +tile +timber +timber hitch +timbrel +time-fuse +timepiece +timer +time-switch +tire chain +tithe barn +toecap +toga +toggle switch +toilet +toilet powder +toiletry +toilet water +token +tomograph +toner +tongs +tool +toolbox +tooth +toothbrush +top +top +topgallant +topmast +topsail +torpedo +torpedo boat +touch screen +towel +toweling +tower +toy box +track +tracked vehicle +trailer +trailer +train +trammel +transdermal patch +transformer +transistor +transmission +transmitter +transporter +trap +trapeze +travel iron +treasure chest +trellis +trench +trial balloon +triclinium +troop carrier +trough +trouser +trowel +truck +trunk +try square +tube +tuck shop +tun +tunic +turbine +Turkish towel +Turk's head +turner +turntable +turtleneck +tweed +tweeter +twenty-two +two-piece +typesetting machine +typewriter +ultraviolet lamp +undercarriage +undergarment +underpants +underwear +uneven parallel bars +uniform +university +uplift +urn +urn +utensil +vacuum flask +valve +van +van +varnish +vehicle +veranda +vertical file +vessel +vessel +vest +vibrator +vibrator +videocassette +video recording +vigil light +viol +vise +vivarium +voltaic cell +voltmeter +wagon +waist pack +walking stick +wall +wall +wall unit +ward +warehouse +warship +wash +washer +washtub +watch +watchtower +water-base paint +water butt +water cart +watercolor +water-cooled reactor +water gauge +water ski +waterwheel +weapon +weaponry +weatherglass +weathervane +web +wedge +wedge +weighbridge +weight +weir +weld +well +whaler +wheel +wheelchair +wheeled vehicle +wheelwork +whetstone +whip +whisk +whispering gallery +white goods +whorehouse +wicker basket +widebody aircraft +winch +Winchester +wind instrument +window +window +window blind +window envelope +Windsor knot +wine bucket +wine cask +wineglass +wire +wire +wire matrix printer +wiring +woman's clothing +wood +woodenware +woodscrew +woodwind +woofer +workbasket +workbench +work-clothing +worktable +workwear +wrapping +wrench +writing desk +writing implement +X-ray film +X-ray machine +yacht chair +yard +yard +yardstick +yoke +zither +zoot suit +grain +light +colorlessness +chromatic color +black +gray +dark red +orange +yellow +green +blue +purple +reddish purple +pink +light brown +reddish brown +complexion +skin +epidermal cell +columnar cell +macule +specimen +milk +embryonic cell +leukocyte +neutrophil +astrocyte +exoskeleton +medium +film +press +print media +storage medium +journalism +photojournalism +newspaper +telecommunication +telephone +call +long distance +wireless +broadcasting +television +reception +chat room +portal site +wordbook +album +concept album +magazine +movie +sign +comestible +course +dainty +dish +fare +diet +dietary supplement +liquid diet +reducing diet +vegetarianism +ration +field ration +foodstuff +starches +concentrate +meal +roughage +flour +wheat flour +nutriment +commissariat +canned food +canned meat +meal +breakfast +lunch +dinner +supper +buffet +picnic +cookout +bite +entree +side dish +casserole +chicken casserole +appetizer +cocktail +hors d'oeuvre +relish +dip +soup +madrilene +broth +broth +chowder +clam chowder +stew +goulash +fish stew +fricassee +ragout +ready-mix +powdered sugar +granulated sugar +brown sugar +sweet +confiture +candy +hard candy +patty +brittle +chewing gum +candied fruit +candied citrus peel +fudge +gumdrop +mint +kiss +lozenge +taffy +dessert +dumpling +frozen dessert +mousse +mousse +whip +pudding +pudding +tipsy cake +ice +chocolate ice cream +Neapolitan ice cream +peach ice cream +strawberry ice cream +tutti-frutti +vanilla ice cream +split +pudding +custard +pastry +turnover +puff paste +phyllo +fish cake +conserve +jam +jelly +apple jelly +marmalade +gelatin +gelatin dessert +patty +stuffing +bread +breadstick +bun +cracker +dark bread +flatbread +loaf of bread +quick bread +rye bread +toast +white bread +French bread +cornbread +johnnycake +muffin +scone +onion roll +sweet roll +onion bagel +biscuit +baking-powder biscuit +soft pretzel +sandwich +hamburger +gruel +edible fruit +vegetable +crudites +legume +greens +solanaceous vegetable +root vegetable +potato +baked potato +sweet potato +snack food +corn chip +tortilla chip +cruciferous vegetable +cabbage +kale +red cabbage +savoy cabbage +squash +summer squash +yellow squash +winter squash +turban squash +gherkin +sprout +beet +pepper +sweet pepper +hot pepper +chili +jalapeno +onion +Spanish onion +salad green +lettuce +butterhead lettuce +bean +pea +green pea +common bean +fresh bean +green bean +shell bean +lima bean +soy +celery +chicory +coffee substitute +chicory escarole +corn +hominy +cress +tomato +cherry tomato +salsify +turnip +edible nut +apple +eating apple +Delicious +McIntosh +Pippin +cooking apple +berry +currant +citrus +temple orange +mandarin +bitter orange +sweet orange +Jaffa orange +navel orange +Valencia orange +lime +almond +plum +dried fruit +raisin +passion fruit +cocoa +melon +muskmelon +winter melon +cherry +sweet cherry +heart cherry +sour cherry +grape +fox grape +muscadine +slipskin grape +vinifera grape +Tokay +cherimoya +soursop +sweetsop +ilama +pond apple +olive +pear +edible seed +walnut +feed +fodder +oil cake +timothy +grain +barley +wheat +rice +mash +bird feed +petfood +salad +tossed salad +combination salad +pasta salad +fruit salad +ingredient +flavorer +condiment +herb +spice +cinnamon +pepper +garlic +mustard +sage +savory +curry +paprika +pickle +sweet pickle +vinegar +sauce +hot sauce +dressing +mayonnaise +cheese sauce +hot-fudge sauce +white sauce +spaghetti sauce +boiled egg +hard-boiled egg +Easter egg +omelet +firm omelet +souffle +dairy product +milk +milk +powdered milk +cream +butter +clarified butter +yogurt +curd +cheese +cream cheese +bleu +cheddar +Swiss cheese +spread +pate +sweetening +sugar +syrup +batter +bread dough +chicken and rice +pasta +Tetrazzini +chili dog +fondue +fondue +hash +kabob +seafood Newburg +meatball +pilaf +sausage pizza +pepperoni pizza +cheese pizza +anchovy pizza +Sicilian pizza +porridge +fish loaf +salmon loaf +scallopine +taco +beef burrito +quesadilla +tostada +beverage +concoction +mix +filling +potion +elixir +alcohol +brew +beer +lager +Weissbier +malt +ale +stout +mead +wine +white wine +sparkling wine +Burgundy +Beaujolais +Medoc +Pinot noir +Bordeaux +claret +Chianti +Cabernet +Merlot +dessert wine +Rhine wine +Rioja +Saint Emilion +zinfandel +table wine +vermouth +fortified wine +Madeira +liquor +brandy +gin +rum +whiskey +corn whiskey +Irish +Scotch +liqueur +coffee liqueur +orange liqueur +mixed drink +cocktail +highball +Bloody Mary +daiquiri +manhattan +martini +sling +sour +caffe latte +cider +sweet cider +juice +fruit juice +grape juice +orange juice +fruit drink +mulled wine +soft drink +cola +coffee +punch +champagne cup +claret cup +rickey +tea +tea +herb tea +tisane +black tea +green tea +water +drinking water +mineral water +vitamin pill +collection +suburb +residence +littoral +grassland +pasture +resort +field +air bubble +arroyo +ascent +atoll +bank +bank +bar +barrier reef +basin +beach +burrow +canyon +cave +continental glacier +crag +crater +dale +descent +draw +dune +geological formation +glacier +glen +gorge +gulch +gully +highland +hill +hillside +hole +hollow +iceberg +ice mass +ion +knoll +landfall +landfill +lather +ledge +lowland +meteorite +mountain +mull +natural depression +natural elevation +nullah +ocean floor +outcrop +plain +point +precipice +ravine +reef +ridge +ridge +rift valley +rock +sandbank +seaside +shiner +shore +slope +soapsuds +spume +tableland +tideland +volcanic crater +wadi +spiritual leader +adventurer +anomaly +benefactor +commoner +contestant +discussant +entertainer +female +finisher +inhabitant +native +juvenile +lover +male +mediator +national +peer +recipient +sensualist +traveler +unwelcome person +unskilled person +worker +wrongdoer +Black +White +Semite +white man +Mongol +Nahuatl +Caddo +Penutian +Teton +Taracahitian +Slav +Catholic +Altaic +Bornean +Canadian +Central American +Britisher +English person +Englishwoman +Ethiopian +Parisian +Greek +Italian +Japanese +Mexican +Nigerian +North American +Pakistani +South American Indian +Filipino +Polynesian +Scandinavian +South African +South American +Turki +American +New Yorker +abbess +abstainer +academic administrator +accomplice +acquaintance +acquirer +aerialist +actor +actor +addict +adjutant +admirer +adulterer +advertiser +advocate +analyst +ancestor +announcer +announcer +appointee +appreciator +appropriator +archbishop +architect +army engineer +army officer +arrival +articulator +asserter +assistant +associate +astronaut +athlete +attendant +aunt +authoritarian +authority +aviator +back +bad person +ballet dancer +bullfighter +baron +bartender +baseball coach +base runner +basketball player +believer +betrothed +bigot +big shot +biochemist +bisexual +boatman +bond servant +botanist +Boy Scout +buddy +campaigner +captain +card player +careerist +caretaker +cavalryman +celebrity +charmer +child +child +cipher +citizen +civil rights leader +cleaner +clergyman +cleric +clerk +climber +closer +clown +coach +cobbler +collaborator +college student +collegian +commanding officer +commissioned officer +commissioned military officer +commissioner +committee member +communist +compulsive +computer scientist +computer user +contractor +convict +copycat +counselor +craftsman +creditor +critic +curate +dancer +dancer +darling +date +daughter +dawdler +deacon +deaf person +debtor +deliveryman +descender +designated hitter +detective +detractor +director +disbeliever +dispatcher +distributor +doctor +domestic partner +draftsman +drinker +drinker +drug addict +drug user +drummer +drunkard +eager beaver +earner +eavesdropper +economist +editor +egotist +elder +elected official +emissary +employee +employer +endomorph +enemy +entrant +examiner +exhibitionist +fan +fancier +farmer +farmhand +fascist +father +female aristocrat +female offspring +female child +fielder +fireman +first baseman +first sergeant +flag officer +flatterer +foe +folk dancer +follower +football player +forefather +forger +founder +free agent +friar +monk +gambler +generator +geneticist +genitor +geologist +girl +godchild +godparent +golfer +grandma +grandmaster +grandparent +granter +great grandchild +great grandparent +grouch +guard +guest +guide +gymnast +Gypsy +hack +hairdresser +hater +headmaster +hearer +hedonist +heir +herder +homeless +horseman +host +host +hypocrite +important person +incumbent +infielder +informer +in-law +insurgent +investigator +investor +journalist +judge +juror +Counsel to the Crown +kinswoman +laborer +lama +landowner +lawgiver +lawman +lawyer +liberator +lieutenant +lineman +literate +litigant +Lord +failure +lowerclassman +lumberman +maid +maker +malcontent +martinet +master of ceremonies +masturbator +medical officer +medical practitioner +medical scientist +mender +meteorologist +middle-aged man +miler +military attache +military officer +military policeman +minister +minor leaguer +misfit +mixed-blood +model +moneymaker +mother +mourner +mover +musician +Muslimah +mystic +nanny +neonate +nephew +neutral +newcomer +newcomer +newspaper editor +niece +noncommissioned officer +nurse +observer +occultist +oldster +old woman +opportunist +orator +originator +outfielder +right fielder +right-handed pitcher +painter +panelist +pardoner +parodist +party +passenger +patient +patron +payer +peddler +percussionist +personal representative +personification +pervert +petitioner +Pharaoh +phonetician +physical therapist +physicist +pimp +pisser +pitcher +planner +player +poet +politician +practitioner +prayer +preserver +president +priest +princess +principal +proctor +programmer +promiser +propagandist +prosecutor +psychic +pusher +queen +queen +ranch hand +reader +recruit +recruiter +religious leader +repairman +reporter +representative +reprobate +rescuer +reservist +restrainer +retailer +retiree +revolutionist +rich person +civil authority +runner +running back +rustic +saboteur +sailor +salesman +salesperson +scalper +schemer +scholar +schoolchild +scientist +second baseman +secretary +seeker +selfish person +seller +serf +serviceman +settler +shrew +sibling +sick person +singer +sister +skeptic +skier +sleeper +slob +smith +snoop +social climber +socialist +social scientist +sociologist +soldier +son +songster +sorcerer +sovereign +speaker +specialist +spectator +stand-in +star +stepparent +stock trader +stranger +strategist +student +subordinate +suitor +superior +surgeon +sweetheart +sympathizer +tax assessor +taxonomist +teacher +television reporter +tenant +tenant +tennis player +testator +testee +theologian +therapist +thinker +thrower +toastmaster +trader +traffic cop +trainer +traitor +traveling salesman +tyrant +upstart +upstart +utility man +vacationer +vegetarian +vice president +victim +volunteer +votary +waiter +waitress +wanderer +wanton +washer +white supremacist +wife +winner +winner +woman +workman +worshiper +wright +writer +wilding +bryophyte +liverwort +pteridophyte +fern +fern ally +spore +spermatophyte +perennial +gymnosperm +ephedra +cycad +sago palm +zamia +pine +pinon +nut pine +white pine +yellow pine +larch +fir +silver fir +cedar +spruce +hemlock +douglas fir +cedar +cypress +arborvitae +araucaria +kauri pine +celery pine +yellowwood +gymnospermous yellowwood +yew +angiosperm +dicot +flower +wildflower +inflorescence +pistil +pericarp +oilseed +custard apple +barberry +allspice +laurel +anise tree +magnolia +moonseed +buttercup +aconite +baneberry +anemone +thimbleweed +columbine +clematis +delphinium +nigella +wax myrtle +zebrawood +legume +legume +darling pea +clover +acacia +wattle +albizzia +nitta tree +dogbane +allamanda +carissa +frangipani +rauwolfia +arum +alocasia +anthurium +caladium +monstera +nephthytis +arrow arum +calla lily +duckweed +watermeal +birthwort +sandwort +mouse-ear chickweed +pink +china pink +lychnis +silene +chickweed +fig marigold +amaranth +orach +saltbush +beet +sand verbena +four o'clock +echinocactus +prickly pear +pokeweed +portulaca +flame flower +caper +spiderflower +crucifer +cress +watercress +rock cress +cabbage +head cabbage +turnip plant +mustard +wallflower +woad +stock +radish plant +pennycress +poppy +prickly poppy +composite +compass plant +everlasting +achillea +ageratum +ragweed +ammobium +burdock +artemisia +mugwort +aster +wood aster +common daisy +bur marigold +calendula +thistle +carline thistle +catananche +centaury +knapweed +chrysanthemum +golden aster +goldenbush +plume thistle +woolly thistle +coreopsis +fleabane +woolly sunflower +cotton rose +gazania +African daisy +cudweed +gumweed +goldenbush +sneezeweed +sunflower +hawkweed +marsh elder +krigia +hawkbit +blazing star +rattlesnake root +daisybush +coneflower +coneflower +cutleaved coneflower +golden thistle +white-topped aster +goldenrod +sow thistle +marigold +dandelion +crownbeard +zinnia +achene +campanula +orchid +orchis +arethusa +helleborine +coral root +lady's slipper +large yellow lady's slipper +helleborine +fringed orchis +rein orchid +spider orchid +moth orchid +butterfly orchid +ladies' tresses +vanda +vanilla +yam +primrose +pimpernel +featherfoil +loosestrife +water pimpernel +gramineous plant +grass +wheatgrass +foxtail +broom grass +oat +brome +grama +reed grass +burgrass +crabgrass +lyme grass +wild rye +plume grass +rye grass +ricegrass +meadowgrass +millet +reed +sorghum +grain sorghum +cordgrass +cereal +wheat +corn +mealie +zoysia +bamboo +cotton grass +spike rush +pandanus +cattail +grain +kernel +gourd +gourd +squash +summer squash +marrow +winter squash +turban squash +bryony +sweet melon +luffa +lobelia +mallow +hollyhock +althea +poppy mallow +seashore mallow +globe mallow +tulipwood tree +sterculia +bottle-tree +screw tree +cacao +linden +herb +protea +banksia +grevillea +macadamia +casuarina +beefwood +heath +bearberry +huckleberry +kalmia +rhododendron +cranberry +blueberry +shortia +Australian heath +epacris +wintergreen +pipsissewa +beech +chestnut +tanbark oak +southern beech +New Zealand beech +oak +live oak +white oak +red oak +scrub oak +chestnut oak +birch +alder +hornbeam +hop hornbeam +hazelnut +centaury +gentian +fringed gentian +olive tree +fringe tree +ash +red ash +jasmine +privet +lilac +liquidambar +walnut +hickory +wing nut +loosestrife +myrtle +gum tree +eucalyptus +flooded gum +mallee +stringybark +tupelo +enchanter's nightshade +willowherb +fuchsia +evening primrose +daphne +canna +banana +ginger +begonia +tuberous begonia +poon +St John's wort +rockrose +dipterocarp +candlewood +reseda +viola +violet +nettle +cannabis +mulberry +fig tree +fig +elm +hackberry +iridaceous plant +bearded iris +beardless iris +crocus +amaryllis +blood lily +narcissus +daffodil +liliaceous plant +colicroot +alliaceous plant +kniphofia +poker plant +asphodel +mariposa +globe lily +camas +dogtooth violet +fritillary +tulip +star-of-Bethlehem +grape hyacinth +scilla +false asphodel +bog asphodel +hellebore +death camas +sarsaparilla +Solomon's-seal +bellwort +agave +sansevieria +cassia +locust tree +senna +angelim +milk vetch +wild indigo +pea tree +glory pea +rosewood +blackwood +tick trefoil +coral tree +vetchling +wild pea +lupine +medic +mucuna +locoweed +pole bean +pea +edible-pod pea +quira +hoary pea +bush pea +vetch +palm +sago palm +feather palm +fan palm +palmetto +areca +calamus +oil palm +raffia palm +lady palm +eriogonum +rhubarb +water plantain +waterweed +pondweed +rose +agrimonia +flowering quince +cotoneaster +avens +apple tree +wild apple +crab apple +Iowa crab +cinquefoil +plum +wild plum +bullace +apricot +cherry +wild cherry +sweet cherry +sour cherry +almond tree +almond +bird cherry +flowering cherry +chokecherry +fruit tree +bramble bush +raspberry +mountain ash +service tree +spirea +madderwort +coffee +cinchona +bedstraw +genipa +hamelia +honeysuckle +American fly honeysuckle +teasel +scabious +geranium +cranesbill +storksbill +incense tree +mahogany +silver ash +milkwort +citrus +orange +mandarin +lemon +kumquat +prickly ash +bitterwood tree +ailanthus +nasturtium +willow +osier +sallow +poplar +black poplar +cottonwood +aspen +soapberry +soapberry vine +harpullia +pachysandra +spindle tree +maple +box elder +holly +sumac +horse chestnut +persimmon +buckthorn +styrax +carnivorous plant +pitcher plant +sedum +philadelphus +saxifrage +astilbe +alumroot +miterwort +parnassia +currant +plane tree +phlox +acanthus +catalpa +anchusa +comfrey +convolvulus +bindweed +gloxinia +streptocarpus +waterleaf +nemophila +scorpionweed +giant hyssop +bugle +wood mint +calamint +coleus +dead nettle +origanum +horehound +monarda +savory +germander +thyme +blue curls +snapdragon +kitten-tails +Indian paintbrush +foxglove +toadflax +veronica +nightshade +thorn apple +matrimony vine +cupflower +petunia +salpiglossis +spurge +croton +cassava +slipper spurge +camellia +umbellifer +angelica +astrantia +caraway +fennel +parsnip +parsley +sanicle +dogwood +valerian +bristle fern +flowering fern +climbing fern +clover fern +adder's tongue +grape fern +ergot +sclerotinia +earthball +Podaxaceae +false truffle +rhizopus +slime mold +cellular slime mold +downy mildew +pythium +Sarcosomataceae +club fungus +lichen +lecanora +fungus +basidiomycete +mushroom +mushroom +mushroom +toadstool +horse mushroom +meadow mushroom +royal agaric +false deathcap +fly agaric +death cap +blushing mushroom +destroying angel +chanterelle +floccose chanterelle +pig's ears +cinnabar chanterelle +jack-o-lantern fungus +inky cap +shaggymane +milkcap +fairy-ring mushroom +oyster mushroom +olive-tree agaric +Pholiota astragalina +Pholiota aurea +Pholiota destruens +Pholiota flammans +Pholiota flavida +nameko +Pholiota squarrosa-adiposa +Pholiota squarrosa +Pholiota squarrosoides +Stropharia ambigua +Stropharia hornemannii +Stropharia rugoso-annulata +Entoloma lividum +Entoloma aprile +Chlorophyllum molybdites +lepiota +parasol mushroom +poisonous parasol +Lepiota naucina +Lepiota rhacodes +American parasol +Lepiota rubrotincta +Lepiota clypeolaria +onion stem +blewits +sandy mushroom +Tricholoma pessundatum +Tricholoma sejunctum +man-on-a-horse +Tricholoma venenata +Tricholoma pardinum +Tricholoma vaccinum +Tricholoma aurantium +Pluteus aurantiorugosus +Pluteus magnus +deer mushroom +straw mushroom +Volvariella bombycina +Clitocybe clavipes +Clitocybe dealbata +Clitocybe inornata +Clitocybe robusta +Clitocybe irina +Clitocybe subconnexa +winter mushroom +mycelium +ascomycete +Clavicipitaceae +yeast +discomycete +morel +Verpa +false morel +lorchel +helvella +Gyromitra californica +Gyromitra sphaerospora +Gyromitra esculenta +Gyromitra infula +Gyromitra gigas +gasteromycete +common stinkhorn +Phallus ravenelii +dog stinkhorn +stinky squid +puffball +Geastrum coronatum +Astreus pteridis +Astreus hygrometricus +polypore +Boletus chrysenteron +Boletus edulis +Frost's bolete +Boletus luridus +Boletus mirabilis +Boletus pallidus +Boletus pulcherrimus +Boletus pulverulentus +Boletus roxanae +Boletus subvelutipes +Boletus variipes +Boletus zelleri +Fuscoboletinus paluster +Fuscoboletinus serotinus +Leccinum fibrillosum +Suillus albivelatus +old-man-of-the-woods +Boletellus russellii +jelly fungus +rust +smut +cornsmut +flag smut fungus +waxycap +Hygrocybe acutoconica +Hygrophorus borealis +Hygrophorus caeruleus +Hygrophorus inocybiformis +Hygrophorus kauffmanii +Hygrophorus marzuolus +Hygrophorus purpurascens +Hygrophorus russula +Hygrophorus sordidus +Hygrophorus tennesseensis +Hygrophorus turundus +Neohygrophorus angelesianus +Cortinarius armillatus +Cortinarius atkinsonianus +Cortinarius corrugatus +Cortinarius gentilis +Cortinarius mutabilis +Cortinarius semisanguineus +Cortinarius subfoetidus +Cortinarius violaceus +Gymnopilus spectabilis +Gymnopilus validipes +Gymnopilus ventricosus +mold +mildew +candida +houseplant +succulent +weed +sporophyll +sporangium +poisonous plant +vine +tree +bean tree +gymnospermous tree +conifer +angiospermous tree +nut tree +spice tree +bonsai +subshrub +bramble +liana +desert plant +marsh plant +strangler +root +receptacle +scape +peduncle +flower cluster +raceme +cyme +bulbous plant +fruit +seed +bean +nut +berry +aggregate fruit +drupe +drupelet +pome +pod +husk +buckthorn +vinifera +true pepper +peperomia +bract +palmate leaf +pinnate leaf +dentate leaf +branchlet +polypody +strap fern +staghorn fern +spleenwort +chain fern +davallia +hare's-foot fern +shield fern +wood fern +lady fern +bladder fern +holly fern +woodsia +maidenhair +brittle maidenhair +lip fern +cliff brake +horsetail +club moss +spikemoss +beech fern +shoestring fungus +Armillaria caligata +Armillaria ponderosa +Armillaria zelleri +honey mushroom +milkweed +stapelia +stephanotis +orangery +figure +plane figure +solid figure +line +convex shape +concave shape +cylinder +round shape +polygon +concave polygon +amorphous shape +closed curve +simple closed curve +cone +circle +ring +loop +ellipse +triangle +spherical polygon +angular distance +groove +bulge +bow +balance +toroid +boundary +incisure +notch +wrinkle +tree +regular polyhedron +carbon +rock +soil +high explosive +culture medium +agar +paper +paving +plaster +stucco +tear gas +vitamin +fat-soluble vitamin +water-soluble vitamin +vitamin A +B-complex vitamin +vitamin E +vitamin K \ No newline at end of file diff --git a/workloads/realworld/pinned/darknet/cfg/imagenet1k.data b/workloads/realworld/pinned/darknet/cfg/imagenet1k.data new file mode 100644 index 0000000000000000000000000000000000000000..daf120a3c020003e8ed08096c51304272ca3ba27 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/imagenet1k.data @@ -0,0 +1,9 @@ +classes=1000 +train = /data/darknet/imagenet_mini/valid.list +valid = /data/darknet/imagenet_mini/valid.list +test = /data/darknet/imagenet_mini/valid.list +backup = /data/darknet/backup/ +labels = /data/darknet/imagenet_mini/imagenet.labels.list +names = /data/darknet/imagenet_mini/imagenet.shortnames.list +top=5 + diff --git a/workloads/realworld/pinned/darknet/cfg/imagenet22k.dataset b/workloads/realworld/pinned/darknet/cfg/imagenet22k.dataset new file mode 100644 index 0000000000000000000000000000000000000000..e25ef007ecceb096e5846ee7cacd1fd54fb8f9e4 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/imagenet22k.dataset @@ -0,0 +1,9 @@ +classes=21842 +train = /data/imagenet/imagenet22k.train.list +valid = /data/imagenet/imagenet22k.valid.list +#valid = /data/imagenet/imagenet1k.valid.list +backup = /home/pjreddie/backup/ +labels = data/imagenet.labels.list +names = data/imagenet.shortnames.list +top = 5 + diff --git a/workloads/realworld/pinned/darknet/cfg/imagenet9k.hierarchy.dataset b/workloads/realworld/pinned/darknet/cfg/imagenet9k.hierarchy.dataset new file mode 100644 index 0000000000000000000000000000000000000000..41fb71b065544b919bc8ed7d723afb5d04ad85ac --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/imagenet9k.hierarchy.dataset @@ -0,0 +1,9 @@ +classes=9418 +train = data/9k.train.list +valid = /data/imagenet/imagenet1k.valid.list +leaves = data/imagenet1k.labels +backup = /home/pjreddie/backup/ +labels = data/9k.labels +names = data/9k.names +top=5 + diff --git a/workloads/realworld/pinned/darknet/cfg/jnet-conv.cfg b/workloads/realworld/pinned/darknet/cfg/jnet-conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..056f82aa6e2a0710a664c4740ab763961e4de33d --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/jnet-conv.cfg @@ -0,0 +1,118 @@ +[net] +batch=1 +subdivisions=1 +height=10 +width=10 +channels=3 +learning_rate=0.01 +momentum=0.9 +decay=0.0005 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + diff --git a/workloads/realworld/pinned/darknet/cfg/openimages.data b/workloads/realworld/pinned/darknet/cfg/openimages.data new file mode 100644 index 0000000000000000000000000000000000000000..fa80e5ab7d8576d391c7cac9dfc8367aab704139 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/openimages.data @@ -0,0 +1,8 @@ +classes= 601 +train = /home/pjreddie/data/openimsv4/openimages.train.list +#valid = coco_testdev +valid = data/coco_val_5k.list +names = data/openimages.names +backup = /home/pjreddie/backup/ +eval=coco + diff --git a/workloads/realworld/pinned/darknet/cfg/resnet101.cfg b/workloads/realworld/pinned/darknet/cfg/resnet101.cfg new file mode 100644 index 0000000000000000000000000000000000000000..de458820bcd35f5e65d858f9f661e42653ed0184 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/resnet101.cfg @@ -0,0 +1,990 @@ +[net] +# Training +# batch=128 +# subdivisions=2 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + +[cost] +type=sse + diff --git a/workloads/realworld/pinned/darknet/cfg/resnet152.cfg b/workloads/realworld/pinned/darknet/cfg/resnet152.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e8e3297ac2364b95f28fa0a0bdd4ca71f14ac82c --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/resnet152.cfg @@ -0,0 +1,1460 @@ +[net] +# Training +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/resnet18.cfg b/workloads/realworld/pinned/darknet/cfg/resnet18.cfg new file mode 100644 index 0000000000000000000000000000000000000000..275f4bdb5962d77c16f353cd3d2751e189b9344c --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/resnet18.cfg @@ -0,0 +1,228 @@ +[net] +# Training +# batch=128 +# subdivisions=1 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/resnet18_b.cfg b/workloads/realworld/pinned/darknet/cfg/resnet18_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c0712e9cdfaf1f28dbe3a358355405d2758e6d2b --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/resnet18_b.cfg @@ -0,0 +1,228 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/resnet18_t.cfg b/workloads/realworld/pinned/darknet/cfg/resnet18_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c0712e9cdfaf1f28dbe3a358355405d2758e6d2b --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/resnet18_t.cfg @@ -0,0 +1,228 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/resnet34.cfg b/workloads/realworld/pinned/darknet/cfg/resnet34.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9f68f096741ae3b4898f40b76af7569d4697729f --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/resnet34.cfg @@ -0,0 +1,392 @@ +[net] +# Training +# batch=128 +# subdivisions=2 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/resnet50.cfg b/workloads/realworld/pinned/darknet/cfg/resnet50.cfg new file mode 100644 index 0000000000000000000000000000000000000000..d0d7c511516e997a392bb5ba77682740c0494972 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/resnet50.cfg @@ -0,0 +1,510 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/pinned/darknet/cfg/resnet50_b.cfg b/workloads/realworld/pinned/darknet/cfg/resnet50_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..462479918fd608c4a0761b84c7299e51081193c6 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/resnet50_b.cfg @@ -0,0 +1,510 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/pinned/darknet/cfg/resnet50_t.cfg b/workloads/realworld/pinned/darknet/cfg/resnet50_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..462479918fd608c4a0761b84c7299e51081193c6 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/resnet50_t.cfg @@ -0,0 +1,510 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/pinned/darknet/cfg/resnext101-32x4d.cfg b/workloads/realworld/pinned/darknet/cfg/resnext101-32x4d.cfg new file mode 100644 index 0000000000000000000000000000000000000000..8538ccc3daee2e3de589eb4e2edf868340d4924b --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/resnext101-32x4d.cfg @@ -0,0 +1,1053 @@ +[net] +# Training +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/resnext152-32x4d.cfg b/workloads/realworld/pinned/darknet/cfg/resnext152-32x4d.cfg new file mode 100644 index 0000000000000000000000000000000000000000..48279fd28eb0dbe23c7b0c593f67051cb6a62374 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/resnext152-32x4d.cfg @@ -0,0 +1,1562 @@ +[net] +# Training +# batch=128 +# subdivisions=16 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/resnext50.cfg b/workloads/realworld/pinned/darknet/cfg/resnext50.cfg new file mode 100644 index 0000000000000000000000000000000000000000..12aebdf6fbd48bde40ee22c4257e06f2e0cf46eb --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/resnext50.cfg @@ -0,0 +1,523 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/pinned/darknet/cfg/rnn.cfg b/workloads/realworld/pinned/darknet/cfg/rnn.cfg new file mode 100644 index 0000000000000000000000000000000000000000..61b202f3a441b701f76d9b007c6276467c639e11 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/rnn.cfg @@ -0,0 +1,38 @@ +[net] +subdivisions=1 +inputs=256 +batch = 1 +momentum=0.9 +decay=0.001 +max_batches = 2000 +time_steps=1 +learning_rate=0.1 +policy=steps +steps=1000,1500 +scales=.1,.1 + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[connected] +output=256 +activation=leaky + +[softmax] + + diff --git a/workloads/realworld/pinned/darknet/cfg/rnn.train.cfg b/workloads/realworld/pinned/darknet/cfg/rnn.train.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b9748990aceaa85cc2e43358073114606725dcbd --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/rnn.train.cfg @@ -0,0 +1,38 @@ +[net] +subdivisions=1 +inputs=256 +batch = 128 +momentum=0.9 +decay=0.001 +max_batches = 2000 +time_steps=576 +learning_rate=0.1 +policy=steps +steps=1000,1500 +scales=.1,.1 + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[connected] +output=256 +activation=leaky + +[softmax] + + diff --git a/workloads/realworld/pinned/darknet/cfg/strided.cfg b/workloads/realworld/pinned/darknet/cfg/strided.cfg new file mode 100644 index 0000000000000000000000000000000000000000..2f745085adc268a3e99bd7895bd4dda28227bffd --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/strided.cfg @@ -0,0 +1,182 @@ +[net] +batch=128 +subdivisions=4 +height=256 +width=256 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +policy=steps +scales=.1,.1,.1 +steps=200000,300000,400000 +max_batches=800000 + + +[crop] +crop_height=224 +crop_width=224 +flip=1 +angle=0 +saturation=1 +exposure=1 +shift=.2 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=192 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=1024 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=ramp + +[maxpool] +size=3 +stride=2 + +[connected] +output=4096 +activation=ramp + +[dropout] +probability=0.5 + +[connected] +output=1000 +activation=ramp + +[softmax] + diff --git a/workloads/realworld/pinned/darknet/cfg/t1.test.cfg b/workloads/realworld/pinned/darknet/cfg/t1.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b3628114e048dc78f4797342afd95a757c81c977 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/t1.test.cfg @@ -0,0 +1,117 @@ +[net] +batch=1 +subdivisions=1 +height=224 +width=224 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,20000,30000 +scales=2.5,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[connected] +output= 1470 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/pinned/darknet/cfg/tiny.cfg b/workloads/realworld/pinned/darknet/cfg/tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..f97327cfceebf868998bf2bb16224bd0994ebd82 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/tiny.cfg @@ -0,0 +1,174 @@ +[net] +# Train +batch=128 +subdivisions=1 +# Test +# batch=1 +# subdivisions=1 +height=224 +width=224 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=320 + +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + + diff --git a/workloads/realworld/pinned/darknet/cfg/vgg-16.cfg b/workloads/realworld/pinned/darknet/cfg/vgg-16.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c73b17b6ccfdcc9cae9b67591b662571463569ab --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/vgg-16.cfg @@ -0,0 +1,157 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +learning_rate=0.00001 +momentum=0.9 +decay=0.0005 + +[crop] +crop_height=224 +crop_width=224 +flip=1 +exposure=1 +saturation=1 +angle=0 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=1000 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/pinned/darknet/cfg/vgg-conv.cfg b/workloads/realworld/pinned/darknet/cfg/vgg-conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..21e1d724c9418107f9cf82f9bffb9ae64d3e2084 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/vgg-conv.cfg @@ -0,0 +1,121 @@ +[net] +batch=1 +subdivisions=1 +width=224 +height=224 +channels=3 +learning_rate=0.00001 +momentum=0.9 +decay=0.0005 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + diff --git a/workloads/realworld/pinned/darknet/cfg/voc.data b/workloads/realworld/pinned/darknet/cfg/voc.data new file mode 100644 index 0000000000000000000000000000000000000000..7807b5d2a8fd0f855a8c68e82c064dc320551da1 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/voc.data @@ -0,0 +1,6 @@ +classes= 20 +train = /home/pjreddie/data/voc/train.txt +valid = /home/pjreddie/data/voc/2007_test.txt +names = data/voc.names +backup = backup + diff --git a/workloads/realworld/pinned/darknet/cfg/writing.cfg b/workloads/realworld/pinned/darknet/cfg/writing.cfg new file mode 100644 index 0000000000000000000000000000000000000000..1ed899bcd63d6354e8320ace7e7f513ba1174886 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/writing.cfg @@ -0,0 +1,41 @@ +[net] +batch=128 +subdivisions=2 +height=256 +width=256 +channels=3 +learning_rate=0.00000001 +momentum=0.9 +decay=0.0005 +seen=0 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1 +size=3 +stride=1 +pad=1 +activation=logistic + +[cost] + diff --git a/workloads/realworld/pinned/darknet/cfg/yolo9000.cfg b/workloads/realworld/pinned/darknet/cfg/yolo9000.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e745f78a6e37611fb0f13c2d848c292cea1a89d3 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/yolo9000.cfg @@ -0,0 +1,218 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +batch=1 +subdivisions=1 +height=544 +width=544 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +hue=.1 +saturation=.75 +exposure=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=28269 +size=1 +stride=1 +pad=1 +activation=linear + +[region] +anchors = 0.77871, 1.14074, 3.00525, 4.31277, 9.22725, 9.61974 +bias_match=1 +classes=9418 +coords=4 +num=3 +softmax=1 +jitter=.2 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +thresh = .6 +absolute=1 +random=1 + +tree=data/9k.tree +map = data/coco9k.map diff --git a/workloads/realworld/pinned/darknet/cfg/yolov1-tiny.cfg b/workloads/realworld/pinned/darknet/cfg/yolov1-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a5e7b4920289ccb507a3a0356a33362bc7633581 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/yolov1-tiny.cfg @@ -0,0 +1,130 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 + +saturation=.75 +exposure=.75 +hue = .1 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,800,20000,30000 +scales=2.5,2,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[connected] +output= 1470 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/pinned/darknet/cfg/yolov1.cfg b/workloads/realworld/pinned/darknet/cfg/yolov1.cfg new file mode 100644 index 0000000000000000000000000000000000000000..06cf6e676170e41d24e63ec08d7b27a31c411718 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/yolov1.cfg @@ -0,0 +1,261 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 +saturation=1.5 +exposure=1.5 +hue=.1 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,20000,30000 +scales=2.5,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[local] +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[dropout] +probability=.5 + +[connected] +output= 1715 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=3 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/pinned/darknet/cfg/yolov2-tiny-voc.cfg b/workloads/realworld/pinned/darknet/cfg/yolov2-tiny-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c4c127cdd352bd98b3b7a3336d5c3b2efc6fadcd --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/yolov2-tiny-voc.cfg @@ -0,0 +1,138 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +max_batches = 40200 +policy=steps +steps=-1,100,20000,30000 +scales=.1,10,.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=125 +activation=linear + +[region] +anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 +bias_match=1 +classes=20 +coords=4 +num=5 +softmax=1 +jitter=.2 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/pinned/darknet/cfg/yolov2-tiny.cfg b/workloads/realworld/pinned/darknet/cfg/yolov2-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..81d0ac45d6dca10f50875bfe85f7496ded8e0f63 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/yolov2-tiny.cfg @@ -0,0 +1,139 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=425 +activation=linear + +[region] +anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 +bias_match=1 +classes=80 +coords=4 +num=5 +softmax=1 +jitter=.2 +rescore=0 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/pinned/darknet/cfg/yolov2-voc.cfg b/workloads/realworld/pinned/darknet/cfg/yolov2-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..dbf2de281c1200cb4889409c616e775080823268 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/yolov2-voc.cfg @@ -0,0 +1,258 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=416 +width=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 80200 +policy=steps +steps=40000,60000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[route] +layers=-9 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=64 +activation=leaky + +[reorg] +stride=2 + +[route] +layers=-1,-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=125 +activation=linear + + +[region] +anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 +bias_match=1 +classes=20 +coords=4 +num=5 +softmax=1 +jitter=.3 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/pinned/darknet/cfg/yolov2.cfg b/workloads/realworld/pinned/darknet/cfg/yolov2.cfg new file mode 100644 index 0000000000000000000000000000000000000000..088edf81573e83c59edd7137cbc07b6fe1433591 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/yolov2.cfg @@ -0,0 +1,258 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[route] +layers=-9 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=64 +activation=leaky + +[reorg] +stride=2 + +[route] +layers=-1,-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=425 +activation=linear + + +[region] +anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 +bias_match=1 +classes=80 +coords=4 +num=5 +softmax=1 +jitter=.3 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/pinned/darknet/cfg/yolov3-openimages.cfg b/workloads/realworld/pinned/darknet/cfg/yolov3-openimages.cfg new file mode 100644 index 0000000000000000000000000000000000000000..65d241a74c4c4995dbd997b1750575a83b0a17d4 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/yolov3-openimages.cfg @@ -0,0 +1,789 @@ +[net] +# Testing + batch=1 + subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=5000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/yolov3-spp.cfg b/workloads/realworld/pinned/darknet/cfg/yolov3-spp.cfg new file mode 100644 index 0000000000000000000000000000000000000000..4ad2a052d88328a79cff5686ff4dd1df6993a2fd --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/yolov3-spp.cfg @@ -0,0 +1,822 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 + +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/yolov3-tiny.cfg b/workloads/realworld/pinned/darknet/cfg/yolov3-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..cfca3cfa6415b7b61eae238aa71107dedbe5d607 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/yolov3-tiny.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/pinned/darknet/cfg/yolov3-tiny_b.cfg b/workloads/realworld/pinned/darknet/cfg/yolov3-tiny_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..0d8ded69bcf909f4cca02ab202cc99b1800a1562 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/yolov3-tiny_b.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/pinned/darknet/cfg/yolov3-tiny_t.cfg b/workloads/realworld/pinned/darknet/cfg/yolov3-tiny_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..0d8ded69bcf909f4cca02ab202cc99b1800a1562 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/yolov3-tiny_t.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/pinned/darknet/cfg/yolov3-voc.cfg b/workloads/realworld/pinned/darknet/cfg/yolov3-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..3f3e8dfb31b7103cf7ca00cd0bef83d6d426bb8d --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/yolov3-voc.cfg @@ -0,0 +1,785 @@ +[net] +# Testing + batch=1 + subdivisions=1 +# Training +# batch=64 +# subdivisions=16 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 50200 +policy=steps +steps=40000,45000 +scales=.1,.1 + + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/yolov3.cfg b/workloads/realworld/pinned/darknet/cfg/yolov3.cfg new file mode 100644 index 0000000000000000000000000000000000000000..938ffff23f106d65290faae217f6a9b0a715c023 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/yolov3.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/yolov3_b.cfg b/workloads/realworld/pinned/darknet/cfg/yolov3_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a86d73b42225ef65d8ff8692b0d72827ba3a8484 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/yolov3_b.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/pinned/darknet/cfg/yolov3_t.cfg b/workloads/realworld/pinned/darknet/cfg/yolov3_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a86d73b42225ef65d8ff8692b0d72827ba3a8484 --- /dev/null +++ b/workloads/realworld/pinned/darknet/cfg/yolov3_t.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/pinned/darknet/examples/art.c b/workloads/realworld/pinned/darknet/examples/art.c new file mode 100644 index 0000000000000000000000000000000000000000..932688e7b9ecbfd1a359a5d373dddf52815da9bb --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/art.c @@ -0,0 +1,59 @@ +#include "darknet.h" + +#include + +void demo_art(char *cfgfile, char *weightfile, int cam_index) +{ +#ifdef OPENCV + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + + void * cap = open_video_stream(0, cam_index, 0,0,0); + + char *window = "ArtJudgementBot9000!!!"; + if(!cap) error("Couldn't connect to webcam.\n"); + int i; + int idx[] = {37, 401, 434}; + int n = sizeof(idx)/sizeof(idx[0]); + + while(1){ + image in = get_image_from_stream(cap); + image in_s = resize_image(in, net->w, net->h); + + float *p = network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + + float score = 0; + for(i = 0; i < n; ++i){ + float s = p[idx[i]]; + if (s > score) score = s; + } + score = score; + printf("I APPRECIATE THIS ARTWORK: %10.7f%%\n", score*100); + printf("["); + int upper = 30; + for(i = 0; i < upper; ++i){ + printf("%c", ((i+.5) < score*upper) ? 219 : ' '); + } + printf("]\n"); + + show_image(in, window, 1); + free_image(in_s); + free_image(in); + } +#endif +} + + +void run_art(int argc, char **argv) +{ + int cam_index = find_int_arg(argc, argv, "-c", 0); + char *cfg = argv[2]; + char *weights = argv[3]; + demo_art(cfg, weights, cam_index); +} + diff --git a/workloads/realworld/pinned/darknet/examples/attention.c b/workloads/realworld/pinned/darknet/examples/attention.c new file mode 100644 index 0000000000000000000000000000000000000000..cd1e579d375be8ffed5620c70180f0a59a927159 --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/attention.c @@ -0,0 +1,459 @@ +#include "darknet.h" + +#include +#include + +void extend_data_truth(data *d, int n, float val) +{ + int i, j; + for(i = 0; i < d->y.rows; ++i){ + d->y.vals[i] = realloc(d->y.vals[i], (d->y.cols+n)*sizeof(float)); + for(j = 0; j < n; ++j){ + d->y.vals[i][d->y.cols + j] = val; + } + } + d->y.cols += n; +} + +matrix network_loss_data(network *net, data test) +{ + int i,b; + int k = 1; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.cols, sizeof(float)); + float *y = calloc(net->batch*test.y.cols, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + memcpy(y+b*test.y.cols, test.y.vals[i+b], test.y.cols*sizeof(float)); + } + + network orig = *net; + net->input = X; + net->truth = y; + net->train = 0; + net->delta = 0; + forward_network(net); + *net = orig; + + float *delta = net->layers[net->n-1].output; + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + int t = max_index(y + b*test.y.cols, 1000); + float err = sum_array(delta + b*net->outputs, net->outputs); + pred.vals[i+b][0] = -err; + //pred.vals[i+b][0] = 1-delta[b*net->outputs + t]; + } + } + free(X); + free(y); + return pred; +} + +void train_attention(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i, j; + + float avg_cls_loss = -1; + float avg_att_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *train_list = option_find_str(options, "train", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + + char **labels = get_labels(label_list); + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + double time; + + int divs=3; + int size=2; + + load_args args = {0}; + args.w = divs*net->w/size; + args.h = divs*net->h/size; + args.size = divs*net->w/size; + args.threads = 32; + args.hierarchy = net->hierarchy; + + args.min = net->min_ratio*args.w; + args.max = net->max_ratio*args.w; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + args.paths = paths; + args.classes = classes; + args.n = imgs; + args.m = N; + args.labels = labels; + args.type = CLASSIFICATION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + data resized = resize_data(train, net->w, net->h); + extend_data_truth(&resized, divs*divs, 0); + data *tiles = tile_data(train, divs, size); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float aloss = 0; + float closs = 0; + int z; + for (i = 0; i < divs*divs/ngpus; ++i) { +#pragma omp parallel for + for(j = 0; j < ngpus; ++j){ + int index = i*ngpus + j; + extend_data_truth(tiles+index, divs*divs, SECRET_NUM); + matrix deltas = network_loss_data(nets[j], tiles[index]); + for(z = 0; z < resized.y.rows; ++z){ + resized.y.vals[z][train.y.cols + index] = deltas.vals[z][0]; + } + free_matrix(deltas); + } + } + int *inds = calloc(resized.y.rows, sizeof(int)); + for(z = 0; z < resized.y.rows; ++z){ + int index = max_index(resized.y.vals[z] + train.y.cols, divs*divs); + inds[z] = index; + for(i = 0; i < divs*divs; ++i){ + resized.y.vals[z][train.y.cols + i] = (i == index)? 1 : 0; + } + } + data best = select_data(tiles, inds); + free(inds); + #ifdef GPU + if (ngpus == 1) { + closs = train_network(net, best); + } else { + closs = train_networks(nets, ngpus, best, 4); + } + #endif + for (i = 0; i < divs*divs; ++i) { + printf("%.2f ", resized.y.vals[0][train.y.cols + i]); + if((i+1)%divs == 0) printf("\n"); + free_data(tiles[i]); + } + free_data(best); + printf("\n"); + image im = float_to_image(64,64,3,resized.X.vals[0]); + //show_image(im, "orig"); + //cvWaitKey(100); + /* + image im1 = float_to_image(64,64,3,tiles[i].X.vals[0]); + image im2 = float_to_image(64,64,3,resized.X.vals[0]); + show_image(im1, "tile"); + show_image(im2, "res"); + */ +#ifdef GPU + if (ngpus == 1) { + aloss = train_network(net, resized); + } else { + aloss = train_networks(nets, ngpus, resized, 4); + } +#endif + for(i = 0; i < divs*divs; ++i){ + printf("%f ", nets[0]->output[1000 + i]); + if ((i+1) % divs == 0) printf("\n"); + } + printf("\n"); + + free_data(resized); + free_data(train); + if(avg_cls_loss == -1) avg_cls_loss = closs; + if(avg_att_loss == -1) avg_att_loss = aloss; + avg_cls_loss = avg_cls_loss*.9 + closs*.1; + avg_att_loss = avg_att_loss*.9 + aloss*.1; + + printf("%ld, %.3f: Att: %f, %f avg, Class: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, aloss, avg_att_loss, closs, avg_cls_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + pthread_join(load_thread, 0); + + free_network(net); + free_ptrs((void**)labels, classes); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_attention_single(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *leaf_list = option_find_str(options, "leaves", 0); + if(leaf_list) change_leaves(net->hierarchy, leaf_list); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + int divs = 4; + int size = 2; + int extra = 0; + float *avgs = calloc(classes, sizeof(float)); + int *inds = calloc(divs*divs, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w*divs/size); + image crop = crop_image(resized, (resized.w - net->w*divs/size)/2, (resized.h - net->h*divs/size)/2, net->w*divs/size, net->h*divs/size); + image rcrop = resize_image(crop, net->w, net->h); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, rcrop.data); + //pred[classes + 56] = 0; + for(j = 0; j < divs*divs; ++j){ + printf("%.2f ", pred[classes + j]); + if((j+1)%divs == 0) printf("\n"); + } + printf("\n"); + copy_cpu(classes, pred, 1, avgs, 1); + top_k(pred + classes, divs*divs, divs*divs, inds); + show_image(crop, "crop"); + for(j = 0; j < extra; ++j){ + int index = inds[j]; + int row = index / divs; + int col = index % divs; + int y = row * crop.h / divs - (net->h - crop.h/divs)/2; + int x = col * crop.w / divs - (net->w - crop.w/divs)/2; + printf("%d %d %d %d\n", row, col, y, x); + image tile = crop_image(crop, x, y, net->w, net->h); + float *pred = network_predict(net, tile.data); + axpy_cpu(classes, 1., pred, 1, avgs, 1); + show_image(tile, "tile"); + //cvWaitKey(10); + } + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + if(rcrop.data != resized.data) free_image(rcrop); + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_attention_multi(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + int scales[] = {224, 288, 320, 352, 384}; + int nscales = sizeof(scales)/sizeof(scales[0]); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + float *pred = calloc(classes, sizeof(float)); + image im = load_image_color(paths[i], 0, 0); + for(j = 0; j < nscales; ++j){ + image r = resize_min(im, scales[j]); + resize_network(net, r.w, r.h); + float *p = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + flip_image(r); + p = network_predict(net, r.data); + axpy_cpu(classes, 1, p, 1, pred, 1); + if(r.data != im.data) free_image(r); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void predict_attention(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + if(top == 0) top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = letterbox_image(im, net->w, net->h); + //resize_network(&net, r.w, r.h); + //printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + float *predictions = network_predict(net, X); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root"); + //else printf("%s: %f\n",names[index], predictions[index]); + printf("%5.2f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void run_attention(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int ngpus; + int *gpus = read_intlist(gpu_list, &ngpus, gpu_index); + + + int top = find_int_arg(argc, argv, "-t", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + char *layer_s = (argc > 7) ? argv[7]: 0; + if(0==strcmp(argv[2], "predict")) predict_attention(data, cfg, weights, filename, top); + else if(0==strcmp(argv[2], "train")) train_attention(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) validate_attention_single(data, cfg, weights); + else if(0==strcmp(argv[2], "validmulti")) validate_attention_multi(data, cfg, weights); +} + + diff --git a/workloads/realworld/pinned/darknet/examples/captcha.c b/workloads/realworld/pinned/darknet/examples/captcha.c new file mode 100644 index 0000000000000000000000000000000000000000..41d6d07c30801b35da34c05984be488e6f6767e9 --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/captcha.c @@ -0,0 +1,353 @@ +#include "darknet.h" + +void fix_data_captcha(data d, int mask) +{ + matrix labels = d.y; + int i, j; + for(i = 0; i < d.y.rows; ++i){ + for(j = 0; j < d.y.cols; j += 2){ + if (mask){ + if(!labels.vals[i][j]){ + labels.vals[i][j] = SECRET_NUM; + labels.vals[i][j+1] = SECRET_NUM; + }else if(labels.vals[i][j+1]){ + labels.vals[i][j] = 0; + } + } else{ + if (labels.vals[i][j]) { + labels.vals[i][j+1] = 0; + } else { + labels.vals[i][j+1] = 1; + } + } + } + } +} + +void train_captcha(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + int i = *net->seen/imgs; + int solved = 1; + list *plist; + char **labels = get_labels("/data/captcha/reimgs.labels.list"); + if (solved){ + plist = get_paths("/data/captcha/reimgs.solved.list"); + }else{ + plist = get_paths("/data/captcha/reimgs.raw.list"); + } + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = 26; + args.n = imgs; + args.m = plist->size; + args.labels = labels; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + + load_thread = load_data_in_thread(args); + while(1){ + ++i; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + fix_data_captcha(train, solved); + + /* + image im = float_to_image(256, 256, 3, train.X.vals[114]); + show_image(im, "training"); + cvWaitKey(0); + */ + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net->seen); + free_data(train); + if(i%100==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } + } +} + +void test_captcha(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + int i = 0; + char **names = get_labels("/data/captcha/reimgs.labels.list"); + char buff[256]; + char *input = buff; + int indexes[26]; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + //printf("Enter Image Path: "); + //fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, net->w, net->h); + float *X = im.data; + float *predictions = network_predict(net, X); + top_predictions(net, 26, indexes); + //printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < 26; ++i){ + int index = indexes[i]; + if(i != 0) printf(", "); + printf("%s %f", names[index], predictions[index]); + } + printf("\n"); + fflush(stdout); + free_image(im); + if (filename) break; + } +} + +void valid_captcha(char *cfgfile, char *weightfile, char *filename) +{ + char **labels = get_labels("/data/captcha/reimgs.labels.list"); + network *net = load_network(cfgfile, weightfile, 0); + list *plist = get_paths("/data/captcha/reimgs.fg.list"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + int outputs = net->outputs; + + set_batch_network(net, 1); + srand(2222222); + int i, j; + for(i = 0; i < N; ++i){ + if (i%100 == 0) fprintf(stderr, "%d\n", i); + image im = load_image_color(paths[i], net->w, net->h); + float *X = im.data; + float *predictions = network_predict(net, X); + //printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + int truth = -1; + for(j = 0; j < 13; ++j){ + if (strstr(paths[i], labels[j])) truth = j; + } + if (truth == -1){ + fprintf(stderr, "bad: %s\n", paths[i]); + return; + } + printf("%d, ", truth); + for(j = 0; j < outputs; ++j){ + if (j != 0) printf(", "); + printf("%f", predictions[j]); + } + printf("\n"); + fflush(stdout); + free_image(im); + if (filename) break; + } +} + +/* + void train_captcha(char *cfgfile, char *weightfile) + { + float avg_loss = -1; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + int i = net->seen/imgs; + list *plist = get_paths("/data/captcha/train.auto5"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + while(1){ + ++i; + time=clock(); + data train = load_data_captcha(paths, imgs, plist->size, 10, 200, 60); + translate_data_rows(train, -128); + scale_data_rows(train, 1./128); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + net->seen += imgs; + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net->seen); + free_data(train); + if(i%10==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } + } + } + + void decode_captcha(char *cfgfile, char *weightfile) + { + setbuf(stdout, NULL); + srand(time(0)); + network net = parse_network_cfg(cfgfile); + set_batch_network(&net, 1); + if(weightfile){ + load_weights(&net, weightfile); + } + char filename[256]; + while(1){ + printf("Enter filename: "); + fgets(filename, 256, stdin); + strtok(filename, "\n"); + image im = load_image_color(filename, 300, 57); + scale_image(im, 1./255.); + float *X = im.data; + float *predictions = network_predict(net, X); + image out = float_to_image(300, 57, 1, predictions); + show_image(out, "decoded"); +#ifdef OPENCV +cvWaitKey(0); +#endif +free_image(im); +} +} + +void encode_captcha(char *cfgfile, char *weightfile) +{ +float avg_loss = -1; +srand(time(0)); +char *base = basecfg(cfgfile); +printf("%s\n", base); +network net = parse_network_cfg(cfgfile); +if(weightfile){ + load_weights(&net, weightfile); +} +printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); +int imgs = 1024; +int i = net->seen/imgs; +list *plist = get_paths("/data/captcha/encode.list"); +char **paths = (char **)list_to_array(plist); +printf("%d\n", plist->size); +clock_t time; +while(1){ + ++i; + time=clock(); + data train = load_data_captcha_encode(paths, imgs, plist->size, 300, 57); + scale_data_rows(train, 1./255); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + net->seen += imgs; + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net->seen); + free_matrix(train.X); + if(i%100==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } +} +} + +void validate_captcha(char *cfgfile, char *weightfile) +{ + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + int numchars = 37; + list *plist = get_paths("/data/captcha/solved.hard"); + char **paths = (char **)list_to_array(plist); + int imgs = plist->size; + data valid = load_data_captcha(paths, imgs, 0, 10, 200, 60); + translate_data_rows(valid, -128); + scale_data_rows(valid, 1./128); + matrix pred = network_predict_data(net, valid); + int i, k; + int correct = 0; + int total = 0; + int accuracy = 0; + for(i = 0; i < imgs; ++i){ + int allcorrect = 1; + for(k = 0; k < 10; ++k){ + char truth = int_to_alphanum(max_index(valid.y.vals[i]+k*numchars, numchars)); + char prediction = int_to_alphanum(max_index(pred.vals[i]+k*numchars, numchars)); + if (truth != prediction) allcorrect=0; + if (truth != '.' && truth == prediction) ++correct; + if (truth != '.' || truth != prediction) ++total; + } + accuracy += allcorrect; + } + printf("Word Accuracy: %f, Char Accuracy %f\n", (float)accuracy/imgs, (float)correct/total); + free_data(valid); +} + +void test_captcha(char *cfgfile, char *weightfile) +{ + setbuf(stdout, NULL); + srand(time(0)); + //char *base = basecfg(cfgfile); + //printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + set_batch_network(&net, 1); + if(weightfile){ + load_weights(&net, weightfile); + } + char filename[256]; + while(1){ + //printf("Enter filename: "); + fgets(filename, 256, stdin); + strtok(filename, "\n"); + image im = load_image_color(filename, 200, 60); + translate_image(im, -128); + scale_image(im, 1/128.); + float *X = im.data; + float *predictions = network_predict(net, X); + print_letters(predictions, 10); + free_image(im); + } +} + */ +void run_captcha(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_captcha(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights, filename); + else if(0==strcmp(argv[2], "valid")) valid_captcha(cfg, weights, filename); + //if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "encode")) encode_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "decode")) decode_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "valid")) validate_captcha(cfg, weights); +} + diff --git a/workloads/realworld/pinned/darknet/examples/cifar.c b/workloads/realworld/pinned/darknet/examples/cifar.c new file mode 100644 index 0000000000000000000000000000000000000000..a5f5f240b9f680acd9b5890042300d3b683e0f82 --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/cifar.c @@ -0,0 +1,251 @@ +#include "darknet.h" + +void train_cifar(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + int classes = 10; + int N = 50000; + + char **labels = get_labels("data/cifar/labels.txt"); + int epoch = (*net->seen)/N; + data train = load_all_cifar10(); + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + clock_t time=clock(); + + float loss = train_network_sgd(net, train, 1); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)labels, classes); + free(base); + free_data(train); +} + +void train_cifar_distill(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + int classes = 10; + int N = 50000; + + char **labels = get_labels("data/cifar/labels.txt"); + int epoch = (*net->seen)/N; + + data train = load_all_cifar10(); + matrix soft = csv_to_matrix("results/ensemble.csv"); + + float weight = .9; + scale_matrix(soft, weight); + scale_matrix(train.y, 1. - weight); + matrix_add_matrix(soft, train.y); + + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + clock_t time=clock(); + + float loss = train_network_sgd(net, train, 1); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)labels, classes); + free(base); + free_data(train); +} + +void test_cifar_multi(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + float avg_acc = 0; + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + + float pred[10] = {0}; + + float *p = network_predict(net, im.data); + axpy_cpu(10, 1, p, 1, pred, 1); + flip_image(im); + p = network_predict(net, im.data); + axpy_cpu(10, 1, p, 1, pred, 1); + + int index = max_index(pred, 10); + int class = max_index(test.y.vals[i], 10); + if(index == class) avg_acc += 1; + free_image(im); + printf("%4d: %.2f%%\n", i, 100.*avg_acc/(i+1)); + } +} + +void test_cifar(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + clock_t time; + float avg_acc = 0; + float avg_top5 = 0; + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + time=clock(); + + float *acc = network_accuracies(net, test, 2); + avg_acc += acc[0]; + avg_top5 += acc[1]; + printf("top1: %f, %lf seconds, %d images\n", avg_acc, sec(clock()-time), test.X.rows); + free_data(test); +} + +void extract_cifar() +{ +char *labels[] = {"airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"}; + int i; + data train = load_all_cifar10(); + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + for(i = 0; i < train.X.rows; ++i){ + image im = float_to_image(32, 32, 3, train.X.vals[i]); + int class = max_index(train.y.vals[i], 10); + char buff[256]; + sprintf(buff, "data/cifar/train/%d_%s",i,labels[class]); + save_image_options(im, buff, PNG, 0); + } + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + int class = max_index(test.y.vals[i], 10); + char buff[256]; + sprintf(buff, "data/cifar/test/%d_%s",i,labels[class]); + save_image_options(im, buff, PNG, 0); + } +} + +void test_cifar_csv(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + matrix pred = network_predict_data(net, test); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + flip_image(im); + } + matrix pred2 = network_predict_data(net, test); + scale_matrix(pred, .5); + scale_matrix(pred2, .5); + matrix_add_matrix(pred2, pred); + + matrix_to_csv(pred); + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); +} + +void test_cifar_csvtrain(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + srand(time(0)); + + data test = load_all_cifar10(); + + matrix pred = network_predict_data(net, test); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + flip_image(im); + } + matrix pred2 = network_predict_data(net, test); + scale_matrix(pred, .5); + scale_matrix(pred2, .5); + matrix_add_matrix(pred2, pred); + + matrix_to_csv(pred); + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); +} + +void eval_cifar_csv() +{ + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + matrix pred = csv_to_matrix("results/combined.csv"); + fprintf(stderr, "%d %d\n", pred.rows, pred.cols); + + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); + free_matrix(pred); +} + + +void run_cifar(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_cifar(cfg, weights); + else if(0==strcmp(argv[2], "extract")) extract_cifar(); + else if(0==strcmp(argv[2], "distill")) train_cifar_distill(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_cifar(cfg, weights); + else if(0==strcmp(argv[2], "multi")) test_cifar_multi(cfg, weights); + else if(0==strcmp(argv[2], "csv")) test_cifar_csv(cfg, weights); + else if(0==strcmp(argv[2], "csvtrain")) test_cifar_csvtrain(cfg, weights); + else if(0==strcmp(argv[2], "eval")) eval_cifar_csv(); +} + + diff --git a/workloads/realworld/pinned/darknet/examples/classifier.c b/workloads/realworld/pinned/darknet/examples/classifier.c new file mode 100644 index 0000000000000000000000000000000000000000..e8779836dc01a2e476104132acd2dbfdd6ed29aa --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/classifier.c @@ -0,0 +1,1123 @@ +#include "darknet.h" + +#include +#include + +float *get_regression_values(char **labels, int n) +{ + float *v = calloc(n, sizeof(float)); + int i; + for(i = 0; i < n; ++i){ + char *p = strchr(labels[i], ' '); + *p = 0; + v[i] = atof(p+1); + } + return v; +} + +void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + // Ruihao + int tag = option_find_int_quiet(options, "tag", 0); + + char *backup_directory_cfg = option_find_str(options, "backup", "/backup/"); + char *label_list_cfg = option_find_str(options, "labels", "data/labels.list"); + char *train_list_cfg = option_find_str(options, "train", "data/train.list"); + + char *env = getenv("UVMAsyncBench_BASE"); + char backup_directory[256]; + char label_list[256]; + char train_list[256]; + sprintf(backup_directory, "%s/%s", env, backup_directory_cfg); + sprintf(label_list, "%s/%s", env, label_list_cfg); + sprintf(train_list, "%s/%s", env, train_list_cfg); + // Ruihao + char *tree = option_find_str(options, "tree", 0); + if (tree) net->hierarchy = read_tree(tree); + int classes = option_find_int(options, "classes", 2); + + char **labels = 0; + if(!tag){ + labels = get_labels(label_list); + } + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + double time; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.hierarchy = net->hierarchy; + + args.min = net->min_ratio*net->w; + args.max = net->max_ratio*net->w; + printf("%d %d\n", args.min, args.max); + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + + args.paths = paths; + args.classes = classes; + args.n = imgs; + args.m = N; + args.labels = labels; + if (tag){ + args.type = TAG_DATA; + } else { + args.type = CLASSIFICATION_DATA; + } + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int count = 0; + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + if(net->random && count++%40 == 0){ + printf("Resizing\n"); + int dim = (rand() % 11 + 4) * 32; + //if (get_current_batch(net)+200 > net->max_batches) dim = 608; + //int dim = (rand() % 4 + 16) * 32; + printf("%d\n", dim); + args.w = dim; + args.h = dim; + args.size = dim; + args.min = net->min_ratio*dim; + args.max = net->max_ratio*dim; + printf("%d %d\n", args.min, args.max); + + pthread_join(load_thread, 0); + train = buffer; + free_data(train); + load_thread = load_data(args); + + for(i = 0; i < ngpus; ++i){ + resize_network(nets[i], dim, dim); + } + net = nets[0]; + } + time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + pthread_join(load_thread, 0); + + // free_network(net); + if(labels) free_ptrs((void**)labels, classes); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_classifier_crop(char *datacfg, char *filename, char *weightfile) +{ + int i = 0; + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + clock_t time; + float avg_acc = 0; + float avg_topk = 0; + int splits = m/1000; + int num = (i+1)*m/splits - i*m/splits; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + + args.paths = paths; + args.classes = classes; + args.n = num; + args.m = 0; + args.labels = labels; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(i = 1; i <= splits; ++i){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + num = (i+1)*m/splits - i*m/splits; + char **part = paths+(i*m/splits); + if(i != splits){ + args.paths = part; + load_thread = load_data_in_thread(args); + } + printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + float *acc = network_accuracies(net, val, topk); + avg_acc += acc[0]; + avg_topk += acc[1]; + printf("%d: top 1: %f, top %d: %f, %lf seconds, %d images\n", i, avg_acc/i, topk, avg_topk/i, sec(clock()-time), val.X.rows); + free_data(val); + } +} + +void validate_classifier_10(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + int w = net->w; + int h = net->h; + int shift = 32; + image im = load_image_color(paths[i], w+shift, h+shift); + image images[10]; + images[0] = crop_image(im, -shift, -shift, w, h); + images[1] = crop_image(im, shift, -shift, w, h); + images[2] = crop_image(im, 0, 0, w, h); + images[3] = crop_image(im, -shift, shift, w, h); + images[4] = crop_image(im, shift, shift, w, h); + flip_image(im); + images[5] = crop_image(im, -shift, -shift, w, h); + images[6] = crop_image(im, shift, -shift, w, h); + images[7] = crop_image(im, 0, 0, w, h); + images[8] = crop_image(im, -shift, shift, w, h); + images[9] = crop_image(im, shift, shift, w, h); + float *pred = calloc(classes, sizeof(float)); + for(j = 0; j < 10; ++j){ + float *p = network_predict(net, images[j].data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1, 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + free_image(images[j]); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_classifier_full(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + int size = net->w; + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, size); + resize_network(net, resized.w, resized.h); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, resized.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + free_image(im); + free_image(resized); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + + +void validate_classifier_single(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *leaf_list = option_find_str(options, "leaves", 0); + if(leaf_list) change_leaves(net->hierarchy, leaf_list); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image crop = center_crop_image(im, net->w, net->h); + //grayscale_image_3c(crop); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, crop.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + free_image(im); + free_image(crop); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%s, %d, %f, %f, \n", paths[i], class, pred[0], pred[1]); + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_classifier_multi(char *datacfg, char *cfg, char *weights) +{ + int i, j; + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + //int scales[] = {224, 288, 320, 352, 384}; + int scales[] = {224, 256, 288, 320}; + int nscales = sizeof(scales)/sizeof(scales[0]); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + float *pred = calloc(classes, sizeof(float)); + image im = load_image_color(paths[i], 0, 0); + for(j = 0; j < nscales; ++j){ + image r = resize_max(im, scales[j]); + resize_network(net, r.w, r.h); + float *p = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + flip_image(r); + p = network_predict(net, r.data); + axpy_cpu(classes, 1, p, 1, pred, 1); + if(r.data != im.data) free_image(r); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int layer_num) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + int top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image orig = load_image_color(input, 0, 0); + image r = resize_min(orig, 256); + image im = crop_image(r, (r.w - 224 - 1)/2 + 1, (r.h - 224 - 1)/2 + 1, 224, 224); + float mean[] = {0.48263312050943, 0.45230225481413, 0.40099074308742}; + float std[] = {0.22590347483426, 0.22120921437787, 0.22103996251583}; + float var[3]; + var[0] = std[0]*std[0]; + var[1] = std[1]*std[1]; + var[2] = std[2]*std[2]; + + normalize_cpu(im.data, mean, var, 1, 3, im.w*im.h); + + float *X = im.data; + time=clock(); + float *predictions = network_predict(net, X); + + layer l = net->layers[layer_num]; + for(i = 0; i < l.c; ++i){ + if(l.rolling_mean) printf("%f %f %f\n", l.rolling_mean[i], l.rolling_variance[i], l.scales[i]); + } +#ifdef GPU + cuda_pull_array(l.output_gpu, l.output, l.outputs); +#endif + for(i = 0; i < l.outputs; ++i){ + printf("%f\n", l.output[i]); + } + /* + + printf("\n\nWeights\n"); + for(i = 0; i < l.n*l.size*l.size*l.c; ++i){ + printf("%f\n", l.filters[i]); + } + + printf("\n\nBiases\n"); + for(i = 0; i < l.n; ++i){ + printf("%f\n", l.biases[i]); + } + */ + + top_predictions(net, top, indexes); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + printf("%s: %f\n", names[index], predictions[index]); + } + free_image(im); + if (filename) break; + } +} + +void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *name_list_cfg = option_find_str(options, "names", 0); + char *env = getenv("UVMAsyncBench_BASE"); + char name_list[256]; + sprintf(name_list, "%s/%s", env, name_list_cfg); + // if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + // Ruihao + if(top == 0) top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = letterbox_image(im, net->w, net->h); + //image r = resize_min(im, 320); + //printf("%d %d\n", r.w, r.h); + //resize_network(net, r.w, r.h); + //printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + startCPU(); + float *predictions = network_predict(net, X); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + endCPU(); + fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root"); + //else printf("%s: %f\n",names[index], predictions[index]); + printf("%5.2f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void label_classifier(char *datacfg, char *filename, char *weightfile) +{ + int i; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "names", "data/labels.list"); + char *test_list = option_find_str(options, "test", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + + char **labels = get_labels(label_list); + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + for(i = 0; i < m; ++i){ + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + float *pred = network_predict(net, crop.data); + + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + int ind = max_index(pred, classes); + + printf("%s\n", labels[ind]); + } +} + +void csv_classifier(char *datacfg, char *cfgfile, char *weightfile) +{ + int i,j; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *test_list = option_find_str(options, "test", "data/test.list"); + int top = option_find_int(options, "top", 1); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + int *indexes = calloc(top, sizeof(int)); + + for(i = 0; i < m; ++i){ + double time = what_time_is_it_now(); + char *path = paths[i]; + image im = load_image_color(path, 0, 0); + image r = letterbox_image(im, net->w, net->h); + float *predictions = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + + printf("%s", path); + for(j = 0; j < top; ++j){ + printf("\t%d", indexes[j]); + } + printf("\n"); + + free_image(im); + free_image(r); + + fprintf(stderr, "%lf seconds, %d images, %d total\n", what_time_is_it_now() - time, i+1, m); + } +} + +void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_layer) +{ + int curr = 0; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *test_list_cfg = option_find_str(options, "test", "data/test.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char test_list[256]; + sprintf(test_list, "%s/%s", env, test_list_cfg); + // Ruihao + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + m = net->max_batches * net->batch; + free_list(plist); + + clock_t time; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = classes; + args.n = net->batch; + args.m = 0; + args.labels = 0; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(curr = net->batch; curr <= m; curr += net->batch){ + // while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + if(curr < m){ + args.paths = paths + curr; + if (curr + net->batch > m) args.n = m - curr; + load_thread = load_data_in_thread(args); + } + fprintf(stderr, "Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + + int i, j; + if (target_layer >= 0){ + //layer l = net->layers[target_layer]; + } + + // for(i = 0; i < pred.rows; ++i){ + // printf("%s", paths[curr-net->batch+i]); + // for(j = 0; j < pred.cols; ++j){ + // printf("\t%g", pred.vals[i][j]); + // } + // printf("\n"); + // } + + fprintf(stderr, "%lf seconds, %d images, %d total\n", sec(clock()-time), val.X.rows, curr); + free_matrix(pred); + free_data(val); + } +} + + +void file_output_classifier(char *datacfg, char *filename, char *weightfile, char *listfile) +{ + int i,j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + //char *label_list = option_find_str(options, "names", "data/labels.list"); + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(listfile); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + for(i = 0; i < m; ++i){ + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + + float *pred = network_predict(net, crop.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 0, 1); + + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + + printf("%s", paths[i]); + for(j = 0; j < classes; ++j){ + printf("\t%g", pred[j]); + } + printf("\n"); + } +} + + +void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + float threat = 0; + float roll = .2; + + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + int top = option_find_int(options, "top", 1); + + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + //cvNamedWindow("Threat", CV_WINDOW_NORMAL); + //cvResizeWindow("Threat", 512, 512); + float fps = 0; + int i; + + int count = 0; + + while(1){ + ++count; + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + if(!in.data) break; + image in_s = resize_image(in, net->w, net->h); + + image out = in; + int x1 = out.w / 20; + int y1 = out.h / 20; + int x2 = 2*x1; + int y2 = out.h - out.h/20; + + int border = .01*out.h; + int h = y2 - y1 - 2*border; + int w = x2 - x1 - 2*border; + + float *predictions = network_predict(net, in_s.data); + float curr_threat = 0; + if(1){ + curr_threat = predictions[0] * 0 + + predictions[1] * .6 + + predictions[2]; + } else { + curr_threat = predictions[218] + + predictions[539] + + predictions[540] + + predictions[368] + + predictions[369] + + predictions[370]; + } + threat = roll * curr_threat + (1-roll) * threat; + + draw_box_width(out, x2 + border, y1 + .02*h, x2 + .5 * w, y1 + .02*h + border, border, 0,0,0); + if(threat > .97) { + draw_box_width(out, x2 + .5 * w + border, + y1 + .02*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .02*h + 3*border, 3*border, 1,0,0); + } + draw_box_width(out, x2 + .5 * w + border, + y1 + .02*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .02*h + 3*border, .5*border, 0,0,0); + draw_box_width(out, x2 + border, y1 + .42*h, x2 + .5 * w, y1 + .42*h + border, border, 0,0,0); + if(threat > .57) { + draw_box_width(out, x2 + .5 * w + border, + y1 + .42*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .42*h + 3*border, 3*border, 1,1,0); + } + draw_box_width(out, x2 + .5 * w + border, + y1 + .42*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .42*h + 3*border, .5*border, 0,0,0); + + draw_box_width(out, x1, y1, x2, y2, border, 0,0,0); + for(i = 0; i < threat * h ; ++i){ + float ratio = (float) i / h; + float r = (ratio < .5) ? (2*(ratio)) : 1; + float g = (ratio < .5) ? 1 : 1 - 2*(ratio - .5); + draw_box_width(out, x1 + border, y2 - border - i, x2 - border, y2 - border - i, 1, r, g, 0); + } + top_predictions(net, top, indexes); + char buff[256]; + sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count); + //save_image(out, buff); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + for(i = 0; i < top; ++i){ + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + } + + if(1){ + show_image(out, "Threat", 10); + } + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + int bad_cats[] = {218, 539, 540, 1213, 1501, 1742, 1911, 2415, 4348, 19223, 368, 369, 370, 1133, 1200, 1306, 2122, 2301, 2537, 2823, 3179, 3596, 3639, 4489, 5107, 5140, 5289, 6240, 6631, 6762, 7048, 7171, 7969, 7984, 7989, 8824, 8927, 9915, 10270, 10448, 13401, 15205, 18358, 18894, 18895, 19249, 19697}; + + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + int top = option_find_int(options, "top", 1); + + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + int i; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = resize_image(in, net->w, net->h); + + float *predictions = network_predict(net, in_s.data); + top_predictions(net, top, indexes); + + printf("\033[2J"); + printf("\033[1;1H"); + + int threat = 0; + for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){ + int index = bad_cats[i]; + if(predictions[index] > .01){ + printf("Threat Detected!\n"); + threat = 1; + break; + } + } + if(!threat) printf("Scanning...\n"); + for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){ + int index = bad_cats[i]; + if(predictions[index] > .01){ + printf("%s\n", names[index]); + } + } + + show_image(in, "Threat Detection", 10); + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + +void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + image **alphabet = load_alphabet(); + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + + int w = 1280; + int h = 720; + void * cap = open_video_stream(filename, cam_index, w, h, 0); + + int top = option_find_int(options, "top", 1); + + char *label_list = option_find_str(options, "labels", 0); + char *name_list = option_find_str(options, "names", label_list); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + int i; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + + float *predictions = network_predict(net, in_s.data); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_predictions(net, top, indexes); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + int lh = in.h*.03; + int toph = 3*lh; + + float rgb[3] = {1,1,1}; + for(i = 0; i < top; ++i){ + printf("%d\n", toph); + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + + char buff[1024]; + sprintf(buff, "%3.1f%%: %s\n", predictions[index]*100, names[index]); + image label = get_label(alphabet, buff, lh); + draw_label(in, toph, lh, label, rgb); + toph += 2*lh; + free_image(label); + } + + show_image(in, base, 10); + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_classifier(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int ngpus; + int *gpus = read_intlist(gpu_list, &ngpus, gpu_index); + + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int top = find_int_arg(argc, argv, "-t", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + char *layer_s = (argc > 7) ? argv[7]: 0; + int layer = layer_s ? atoi(layer_s) : -1; + if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename, top); + else if(0==strcmp(argv[2], "fout")) file_output_classifier(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s)); + else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer); + else if(0==strcmp(argv[2], "csv")) csv_classifier(data, cfg, weights); + else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_classifier_single(data, cfg, weights); + else if(0==strcmp(argv[2], "validmulti")) validate_classifier_multi(data, cfg, weights); + else if(0==strcmp(argv[2], "valid10")) validate_classifier_10(data, cfg, weights); + else if(0==strcmp(argv[2], "validcrop")) validate_classifier_crop(data, cfg, weights); + else if(0==strcmp(argv[2], "validfull")) validate_classifier_full(data, cfg, weights); +} + + diff --git a/workloads/realworld/pinned/darknet/examples/coco.c b/workloads/realworld/pinned/darknet/examples/coco.c new file mode 100644 index 0000000000000000000000000000000000000000..6a50b89abd2abc7fb217b5118034a746f790f690 --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/coco.c @@ -0,0 +1,357 @@ +#include "darknet.h" + +#include + +char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"}; + +int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; + +void train_coco(char *cfgfile, char *weightfile) +{ + //char *train_images = "/home/pjreddie/data/voc/test/train.txt"; + //char *train_images = "/home/pjreddie/data/coco/train.txt"; + char *train_images = "data/coco.trainval.txt"; + //char *train_images = "data/bags.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + layer l = net->layers[net->n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + /* + image im = float_to_image(net->w, net->h, 3, train.X.vals[113]); + image copy = copy_image(im); + draw_coco(copy, train.y.vals[113], 7, "truth"); + cvWaitKey(0); + free_image(copy); + */ + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || (i < 1000 && i%100 == 0)){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +static void print_cocos(FILE *fp, int image_id, detection *dets, int num_boxes, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < num_boxes; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + float bx = xmin; + float by = ymin; + float bw = xmax - xmin; + float bh = ymax - ymin; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); + } + } +} + +int get_coco_image_id(char *filename) +{ + char *p = strrchr(filename, '_'); + return atoi(p+1); +} + +void validate_coco(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/"; + list *plist = get_paths("data/coco_val_5k.list"); + //list *plist = get_paths("/home/pjreddie/data/people-art/test.txt"); + //list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + snprintf(buff, 1024, "%s/coco_results.json", base); + FILE *fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + + int m = plist->size; + int i=0; + int t; + + float thresh = .01; + int nms = 1; + float iou_thresh = .5; + + int nthreads = 8; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.type = IMAGE_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + time_t start = time(0); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + int image_id = get_coco_image_id(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh); + print_cocos(fp, image_id, dets, l.side*l.side*l.n, classes, w, h); + free_detections(dets, nboxes); + free_image(val[t]); + free_image(val_resized[t]); + } + } + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); +} + +void validate_coco_recall(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + int side = l.side; + + int j, k; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + + float thresh = .001; + int nms = 0; + float iou_thresh = .5; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + + int nboxes = 0; + detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, side*side*l.n, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < side*side*l.n; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < side*side*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + free_detections(dets, nboxes); + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free(id); + free_image(orig); + free_image(sized); + } +} + +void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) +{ + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + layer l = net->layers[net->n-1]; + set_batch_network(net, 1); + srand(2222222); + float nms = .4; + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = resize_image(im, net->w, net->h); + float *X = sized.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + + int nboxes = 0; + detection *dets = get_network_boxes(net, 1, 1, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms); + + draw_detections(im, dets, l.side*l.side*l.n, thresh, coco_classes, alphabet, 80); + save_image(im, "prediction"); + show_image(im, "predictions", 0); + free_detections(dets, nboxes); + free_image(im); + free_image(sized); + if (filename) break; + } +} + +void run_coco(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .2); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + int avg = find_int_arg(argc, argv, "-avg", 1); + if(0==strcmp(argv[2], "test")) test_coco(cfg, weights, filename, thresh); + else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights); + else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, 80, frame_skip, prefix, avg, .5, 0,0,0,0); +} diff --git a/workloads/realworld/pinned/darknet/examples/darknet.c b/workloads/realworld/pinned/darknet/examples/darknet.c new file mode 100644 index 0000000000000000000000000000000000000000..f1c5e43b66391a9674c13a193e329cf3dfc26439 --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/darknet.c @@ -0,0 +1,559 @@ +#include "darknet.h" + +// #include "../../../../common/cupti_add.h" +// #include "../../../../common/cpu_timestamps.h" +// #include "cpu_timestamps.h" + +static uint64_t startCPUTime; +static uint64_t endCPUTime; + +void startCPU() +{ + struct timespec tv; + if (clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + startCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); +} + +void endCPU() +{ + struct timespec tv; + if (clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + + endCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); + // endCPUTimestamp1 = std::chrono::system_clock::now(); + printf("CPU_Times,%lu,%lu,%lu\n", startCPUTime, endCPUTime, endCPUTime - startCPUTime); +} + +#ifdef __cplusplus +extern "C" { +#endif +extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top); +extern void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen); +extern void run_yolo(int argc, char **argv); +extern void run_detector(int argc, char **argv); +extern void run_coco(int argc, char **argv); +extern void run_nightmare(int argc, char **argv); +extern void run_classifier(int argc, char **argv); +extern void run_regressor(int argc, char **argv); +extern void run_segmenter(int argc, char **argv); +extern void run_isegmenter(int argc, char **argv); +extern void run_char_rnn(int argc, char **argv); +extern void run_tag(int argc, char **argv); +extern void run_cifar(int argc, char **argv); +extern void run_go(int argc, char **argv); +extern void run_art(int argc, char **argv); +extern void run_super(int argc, char **argv); +extern void run_lsd(int argc, char **argv); +#ifdef __cplusplus +} +#endif + +void average(int argc, char *argv[]) +{ + char *cfgfile = argv[2]; + char *outfile = argv[3]; + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + network *sum = parse_network_cfg(cfgfile); + + char *weightfile = argv[4]; + load_weights(sum, weightfile); + + int i, j; + int n = argc - 5; + for(i = 0; i < n; ++i){ + weightfile = argv[i+5]; + load_weights(net, weightfile); + for(j = 0; j < net->n; ++j){ + layer l = net->layers[j]; + layer out = sum->layers[j]; + if(l.type == CONVOLUTIONAL){ + int num = l.n*l.c*l.size*l.size; + axpy_cpu(l.n, 1, l.biases, 1, out.biases, 1); + axpy_cpu(num, 1, l.weights, 1, out.weights, 1); + if(l.batch_normalize){ + axpy_cpu(l.n, 1, l.scales, 1, out.scales, 1); + axpy_cpu(l.n, 1, l.rolling_mean, 1, out.rolling_mean, 1); + axpy_cpu(l.n, 1, l.rolling_variance, 1, out.rolling_variance, 1); + } + } + if(l.type == CONNECTED){ + axpy_cpu(l.outputs, 1, l.biases, 1, out.biases, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weights, 1, out.weights, 1); + } + } + } + n = n+1; + for(j = 0; j < net->n; ++j){ + layer l = sum->layers[j]; + if(l.type == CONVOLUTIONAL){ + int num = l.n*l.c*l.size*l.size; + scal_cpu(l.n, 1./n, l.biases, 1); + scal_cpu(num, 1./n, l.weights, 1); + if(l.batch_normalize){ + scal_cpu(l.n, 1./n, l.scales, 1); + scal_cpu(l.n, 1./n, l.rolling_mean, 1); + scal_cpu(l.n, 1./n, l.rolling_variance, 1); + } + } + if(l.type == CONNECTED){ + scal_cpu(l.outputs, 1./n, l.biases, 1); + scal_cpu(l.outputs*l.inputs, 1./n, l.weights, 1); + } + } + save_weights(sum, outfile); +} + +long numops(network *net) +{ + int i; + long ops = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + ops += 2l * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w; + } else if(l.type == CONNECTED){ + ops += 2l * l.inputs * l.outputs; + } else if (l.type == RNN){ + ops += 2l * l.input_layer->inputs * l.input_layer->outputs; + ops += 2l * l.self_layer->inputs * l.self_layer->outputs; + ops += 2l * l.output_layer->inputs * l.output_layer->outputs; + } else if (l.type == GRU){ + ops += 2l * l.uz->inputs * l.uz->outputs; + ops += 2l * l.uh->inputs * l.uh->outputs; + ops += 2l * l.ur->inputs * l.ur->outputs; + ops += 2l * l.wz->inputs * l.wz->outputs; + ops += 2l * l.wh->inputs * l.wh->outputs; + ops += 2l * l.wr->inputs * l.wr->outputs; + } else if (l.type == LSTM){ + ops += 2l * l.uf->inputs * l.uf->outputs; + ops += 2l * l.ui->inputs * l.ui->outputs; + ops += 2l * l.ug->inputs * l.ug->outputs; + ops += 2l * l.uo->inputs * l.uo->outputs; + ops += 2l * l.wf->inputs * l.wf->outputs; + ops += 2l * l.wi->inputs * l.wi->outputs; + ops += 2l * l.wg->inputs * l.wg->outputs; + ops += 2l * l.wo->inputs * l.wo->outputs; + } + } + return ops; +} + +void speed(char *cfgfile, int tics) +{ + if (tics == 0) tics = 1000; + network *net = parse_network_cfg(cfgfile); + set_batch_network(net, 1); + int i; + double time=what_time_is_it_now(); + image im = make_image(net->w, net->h, net->c*net->batch); + for(i = 0; i < tics; ++i){ + network_predict(net, im.data); + } + double t = what_time_is_it_now() - time; + long ops = numops(net); + printf("\n%d evals, %f Seconds\n", tics, t); + printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); + printf("FLOPS: %.2f Bn\n", (float)ops/1000000000.*tics/t); + printf("Speed: %f sec/eval\n", t/tics); + printf("Speed: %f Hz\n", tics/t); +} + +void operations(char *cfgfile) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + long ops = numops(net); + printf("Floating Point Operations: %ld\n", ops); + printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); +} + +void oneoff(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + int oldn = net->layers[net->n - 2].n; + int c = net->layers[net->n - 2].c; + scal_cpu(oldn*c, .1, net->layers[net->n - 2].weights, 1); + scal_cpu(oldn, 0, net->layers[net->n - 2].biases, 1); + net->layers[net->n - 2].n = 11921; + net->layers[net->n - 2].biases += 5; + net->layers[net->n - 2].weights += 5*c; + if(weightfile){ + load_weights(net, weightfile); + } + net->layers[net->n - 2].biases -= 5; + net->layers[net->n - 2].weights -= 5*c; + net->layers[net->n - 2].n = oldn; + printf("%d\n", oldn); + layer l = net->layers[net->n - 2]; + copy_cpu(l.n/3, l.biases, 1, l.biases + l.n/3, 1); + copy_cpu(l.n/3, l.biases, 1, l.biases + 2*l.n/3, 1); + copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + l.n/3*l.c, 1); + copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + 2*l.n/3*l.c, 1); + *net->seen = 0; + save_weights(net, outfile); +} + +void oneoff2(char *cfgfile, char *weightfile, char *outfile, int l) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights_upto(net, weightfile, 0, net->n); + load_weights_upto(net, weightfile, l, net->n); + } + *net->seen = 0; + save_weights_upto(net, outfile, net->n); +} + +void partial(char *cfgfile, char *weightfile, char *outfile, int max) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 1); + save_weights_upto(net, outfile, max); +} + +void print_weights(char *cfgfile, char *weightfile, int n) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 1); + layer l = net->layers[n]; + int i, j; + //printf("["); + for(i = 0; i < l.n; ++i){ + //printf("["); + for(j = 0; j < l.size*l.size*l.c; ++j){ + //if(j > 0) printf(","); + printf("%g ", l.weights[i*l.size*l.size*l.c + j]); + } + printf("\n"); + //printf("]%s\n", (i == l.n-1)?"":","); + } + //printf("]"); +} + +void rescale_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + rescale_weights(l, 2, -.5); + break; + } + } + save_weights(net, outfile); +} + +void rgbgr_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + rgbgr_weights(l); + break; + } + } + save_weights(net, outfile); +} + +void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if (l.type == CONVOLUTIONAL && l.batch_normalize) { + denormalize_convolutional_layer(l); + } + if (l.type == CONNECTED && l.batch_normalize) { + denormalize_connected_layer(l); + } + if (l.type == GRU && l.batch_normalize) { + denormalize_connected_layer(*l.input_z_layer); + denormalize_connected_layer(*l.input_r_layer); + denormalize_connected_layer(*l.input_h_layer); + denormalize_connected_layer(*l.state_z_layer); + denormalize_connected_layer(*l.state_r_layer); + denormalize_connected_layer(*l.state_h_layer); + } + } + save_weights(net, outfile); +} + +layer normalize_layer(layer l, int n) +{ + int j; + l.batch_normalize=1; + l.scales = (float *) calloc(n, sizeof(float)); + for(j = 0; j < n; ++j){ + l.scales[j] = 1; + } + l.rolling_mean = (float *) calloc(n, sizeof(float)); + l.rolling_variance = (float *) calloc(n, sizeof(float)); + return l; +} + +void normalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL && !l.batch_normalize){ + net->layers[i] = normalize_layer(l, l.n); + } + if (l.type == CONNECTED && !l.batch_normalize) { + net->layers[i] = normalize_layer(l, l.outputs); + } + if (l.type == GRU && l.batch_normalize) { + *l.input_z_layer = normalize_layer(*l.input_z_layer, l.input_z_layer->outputs); + *l.input_r_layer = normalize_layer(*l.input_r_layer, l.input_r_layer->outputs); + *l.input_h_layer = normalize_layer(*l.input_h_layer, l.input_h_layer->outputs); + *l.state_z_layer = normalize_layer(*l.state_z_layer, l.state_z_layer->outputs); + *l.state_r_layer = normalize_layer(*l.state_r_layer, l.state_r_layer->outputs); + *l.state_h_layer = normalize_layer(*l.state_h_layer, l.state_h_layer->outputs); + net->layers[i].batch_normalize=1; + } + } + save_weights(net, outfile); +} + +void statistics_net(char *cfgfile, char *weightfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if (l.type == CONNECTED && l.batch_normalize) { + printf("Connected Layer %d\n", i); + statistics_connected_layer(l); + } + if (l.type == GRU && l.batch_normalize) { + printf("GRU Layer %d\n", i); + printf("Input Z\n"); + statistics_connected_layer(*l.input_z_layer); + printf("Input R\n"); + statistics_connected_layer(*l.input_r_layer); + printf("Input H\n"); + statistics_connected_layer(*l.input_h_layer); + printf("State Z\n"); + statistics_connected_layer(*l.state_z_layer); + printf("State R\n"); + statistics_connected_layer(*l.state_r_layer); + printf("State H\n"); + statistics_connected_layer(*l.state_h_layer); + } + printf("\n"); + } +} + +void denormalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if ((l.type == DECONVOLUTIONAL || l.type == CONVOLUTIONAL) && l.batch_normalize) { + denormalize_convolutional_layer(l); + net->layers[i].batch_normalize=0; + } + if (l.type == CONNECTED && l.batch_normalize) { + denormalize_connected_layer(l); + net->layers[i].batch_normalize=0; + } + if (l.type == GRU && l.batch_normalize) { + denormalize_connected_layer(*l.input_z_layer); + denormalize_connected_layer(*l.input_r_layer); + denormalize_connected_layer(*l.input_h_layer); + denormalize_connected_layer(*l.state_z_layer); + denormalize_connected_layer(*l.state_r_layer); + denormalize_connected_layer(*l.state_h_layer); + l.input_z_layer->batch_normalize = 0; + l.input_r_layer->batch_normalize = 0; + l.input_h_layer->batch_normalize = 0; + l.state_z_layer->batch_normalize = 0; + l.state_r_layer->batch_normalize = 0; + l.state_h_layer->batch_normalize = 0; + net->layers[i].batch_normalize=0; + } + } + save_weights(net, outfile); +} + +void mkimg(char *cfgfile, char *weightfile, int h, int w, int num, char *prefix) +{ + network *net = load_network(cfgfile, weightfile, 0); + image *ims = get_weights(net->layers[0]); + int n = net->layers[0].n; + int z; + for(z = 0; z < num; ++z){ + image im = make_image(h, w, 3); + fill_image(im, .5); + int i; + for(i = 0; i < 100; ++i){ + image r = copy_image(ims[rand()%n]); + rotate_image_cw(r, rand()%4); + random_distort_image(r, 1, 1.5, 1.5); + int dx = rand()%(w-r.w); + int dy = rand()%(h-r.h); + ghost_image(r, im, dx, dy); + free_image(r); + } + char buff[256]; + sprintf(buff, "%s/gen_%d", prefix, z); + save_image(im, buff); + free_image(im); + } +} + +void visualize(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + visualize_network(net); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsc() +{ + unsigned long a, d; + + __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); + + return (a | (d << 32)); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsp() { + struct timespec tms; + if (clock_gettime(CLOCK_REALTIME, &tms)) { + return -1; + } + unsigned long ns = tms.tv_sec * 1000000000; + ns += tms.tv_nsec; + return ns; +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + //test_resize("data/bad.jpg"); + //test_box(); + //test_convolutional_layer(); + if(argc < 2){ + fprintf(stderr, "usage: %s \n", argv[0]); + return 0; + } + gpu_index = find_int_arg(argc, argv, "-i", GPU_DEVICE); + if(find_arg(argc, argv, "-nogpu")) { + gpu_index = -1; + } + +#ifndef GPU + gpu_index = -1; +#else + if(gpu_index >= 0){ + cuda_set_device(gpu_index); + } + initTrace(); +#endif + + if (0 == strcmp(argv[1], "average")){ + average(argc, argv); + } else if (0 == strcmp(argv[1], "yolo")){ + run_yolo(argc, argv); + } else if (0 == strcmp(argv[1], "super")){ + run_super(argc, argv); + } else if (0 == strcmp(argv[1], "lsd")){ + run_lsd(argc, argv); + } else if (0 == strcmp(argv[1], "detector")){ + run_detector(argc, argv); + } else if (0 == strcmp(argv[1], "detect")){ + float thresh = find_float_arg(argc, argv, "-thresh", .5); + char *filename = (argc > 4) ? argv[4]: 0; + char *outfile = find_char_arg(argc, argv, "-out", 0); + int fullscreen = find_arg(argc, argv, "-fullscreen"); + char *value = getenv("UVMAsyncBench_BASE"); + char buff[256]; + sprintf(buff, "%s/workloads/realworld/standard/darknet/cfg/coco.data", value); + test_detector(buff, argv[2], argv[3], filename, thresh, .5, outfile, fullscreen); + } else if (0 == strcmp(argv[1], "cifar")){ + run_cifar(argc, argv); + } else if (0 == strcmp(argv[1], "go")){ + run_go(argc, argv); + } else if (0 == strcmp(argv[1], "rnn")){ + run_char_rnn(argc, argv); + } else if (0 == strcmp(argv[1], "coco")){ + run_coco(argc, argv); + } else if (0 == strcmp(argv[1], "classify")){ + predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5); + } else if (0 == strcmp(argv[1], "classifier")){ + run_classifier(argc, argv); + } else if (0 == strcmp(argv[1], "regressor")){ + run_regressor(argc, argv); + } else if (0 == strcmp(argv[1], "isegmenter")){ + run_isegmenter(argc, argv); + } else if (0 == strcmp(argv[1], "segmenter")){ + run_segmenter(argc, argv); + } else if (0 == strcmp(argv[1], "art")){ + run_art(argc, argv); + } else if (0 == strcmp(argv[1], "tag")){ + run_tag(argc, argv); + } else if (0 == strcmp(argv[1], "3d")){ + composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0); + } else if (0 == strcmp(argv[1], "test")){ + test_resize(argv[2]); + } else if (0 == strcmp(argv[1], "nightmare")){ + run_nightmare(argc, argv); + } else if (0 == strcmp(argv[1], "rgbgr")){ + rgbgr_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "reset")){ + reset_normalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "denormalize")){ + denormalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "statistics")){ + statistics_net(argv[2], argv[3]); + } else if (0 == strcmp(argv[1], "normalize")){ + normalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "rescale")){ + rescale_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "ops")){ + operations(argv[2]); + } else if (0 == strcmp(argv[1], "speed")){ + speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0); + } else if (0 == strcmp(argv[1], "oneoff")){ + oneoff(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "oneoff2")){ + oneoff2(argv[2], argv[3], argv[4], atoi(argv[5])); + } else if (0 == strcmp(argv[1], "print")){ + print_weights(argv[2], argv[3], atoi(argv[4])); + } else if (0 == strcmp(argv[1], "partial")){ + partial(argv[2], argv[3], argv[4], atoi(argv[5])); + } else if (0 == strcmp(argv[1], "average")){ + average(argc, argv); + } else if (0 == strcmp(argv[1], "visualize")){ + visualize(argv[2], (argc > 3) ? argv[3] : 0); + } else if (0 == strcmp(argv[1], "mkimg")){ + mkimg(argv[2], argv[3], atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), argv[7]); + } else if (0 == strcmp(argv[1], "imtest")){ + test_resize(argv[2]); + } else { + fprintf(stderr, "Not an option: %s\n", argv[1]); + } + finiTrace(); + return 0; +} + diff --git a/workloads/realworld/pinned/darknet/examples/detector-scipy-opencv.py b/workloads/realworld/pinned/darknet/examples/detector-scipy-opencv.py new file mode 100644 index 0000000000000000000000000000000000000000..3bfc591312ad89ff2b026ffac0daecd461c80447 --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/detector-scipy-opencv.py @@ -0,0 +1,56 @@ +# Stupid python path shit. +# Instead just add darknet.py to somewhere in your python path +# OK actually that might not be a great idea, idk, work in progress +# Use at your own risk. or don't, i don't care + +from scipy.misc import imread +import cv2 + +def array_to_image(arr): + arr = arr.transpose(2,0,1) + c = arr.shape[0] + h = arr.shape[1] + w = arr.shape[2] + arr = (arr/255.0).flatten() + data = dn.c_array(dn.c_float, arr) + im = dn.IMAGE(w,h,c,data) + return im + +def detect2(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): + boxes = dn.make_boxes(net) + probs = dn.make_probs(net) + num = dn.num_boxes(net) + dn.network_detect(net, image, thresh, hier_thresh, nms, boxes, probs) + res = [] + for j in range(num): + for i in range(meta.classes): + if probs[j][i] > 0: + res.append((meta.names[i], probs[j][i], (boxes[j].x, boxes[j].y, boxes[j].w, boxes[j].h))) + res = sorted(res, key=lambda x: -x[1]) + dn.free_ptrs(dn.cast(probs, dn.POINTER(dn.c_void_p)), num) + return res + +import sys, os +sys.path.append(os.path.join(os.getcwd(),'python/')) + +import darknet as dn + +# Darknet +net = dn.load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0) +meta = dn.load_meta("cfg/coco.data") +r = dn.detect(net, meta, "data/dog.jpg") +print r + +# scipy +arr= imread('data/dog.jpg') +im = array_to_image(arr) +r = detect2(net, meta, im) +print r + +# OpenCV +arr = cv2.imread('data/dog.jpg') +im = array_to_image(arr) +dn.rgbgr_image(im) +r = detect2(net, meta, im) +print r + diff --git a/workloads/realworld/pinned/darknet/examples/detector.c b/workloads/realworld/pinned/darknet/examples/detector.c new file mode 100644 index 0000000000000000000000000000000000000000..6ff1fcdff3d3d81abb458e001091cf2757b8d837 --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/detector.c @@ -0,0 +1,931 @@ +#include "darknet.h" + +static int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; + + +void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + list *options = read_data_cfg(datacfg); + // Ruihao + char *train_images_cfg = option_find_str(options, "train", "data/train.list"); + char *backup_directory_cfg = option_find_str(options, "backup", "/backup/"); + + char *env = getenv("UVMAsyncBench_BASE"); + char train_images[256]; + char backup_directory[256]; + sprintf(train_images, "%s/%s", env, train_images_cfg); + sprintf(backup_directory, "%s/%s", env, backup_directory_cfg); + // Ruihao + + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network **nets = calloc(ngpus, sizeof(network)); + + srand(time(0)); + int seed = rand(); + int i; + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + data train, buffer; + + layer l = net->layers[net->n - 1]; + + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = get_base_args(net); + args.coords = l.coords; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = l.max_boxes; + args.d = &buffer; + args.type = DETECTION_DATA; + //args.type = INSTANCE_DATA; + args.threads = 64; + + pthread_t load_thread = load_data(args); + double time; + int count = 0; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + if(l.random && count++%10 == 0){ + printf("Resizing\n"); + int dim = (rand() % 10 + 10) * 32; + if (get_current_batch(net)+200 > net->max_batches) dim = 608; + //int dim = (rand() % 4 + 16) * 32; + printf("%d\n", dim); + args.w = dim; + args.h = dim; + + pthread_join(load_thread, 0); + train = buffer; + free_data(train); + load_thread = load_data(args); + + #pragma omp parallel for + for(i = 0; i < ngpus; ++i){ + resize_network(nets[i], dim, dim); + } + net = nets[0]; + } + time=what_time_is_it_now(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + /* + int k; + for(k = 0; k < l.max_boxes; ++k){ + box b = float_to_box(train.y.vals[10] + 1 + k*5); + if(!b.x) break; + printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h); + } + */ + /* + int zz; + for(zz = 0; zz < train.X.cols; ++zz){ + image im = float_to_image(net->w, net->h, 3, train.X.vals[zz]); + int k; + for(k = 0; k < l.max_boxes; ++k){ + box b = float_to_box(train.y.vals[zz] + k*5, 1); + printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + draw_bbox(im, b, 1, 1,0,0); + } + show_image(im, "truth11"); + cvWaitKey(0); + save_image(im, "truth11"); + } + */ + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + + time=what_time_is_it_now(); + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + i = get_current_batch(net); + printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, i*imgs); + if(i%100==0){ +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + if(i%10000==0 || (i < 1000 && i%100 == 0)){ +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + + +static int get_coco_image_id(char *filename) +{ + char *p = strrchr(filename, '/'); + char *c = strrchr(filename, '_'); + if(c) p = c; + return atoi(p+1); +} + +static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_boxes, int classes, int w, int h) +{ + int i, j; + int image_id = get_coco_image_id(image_path); + for(i = 0; i < num_boxes; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + float bx = xmin; + float by = ymin; + float bw = xmax - xmin; + float bh = ymax - ymin; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); + } + } +} + +void print_detector_detections(FILE **fps, char *id, detection *dets, int total, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2. + 1; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2. + 1; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2. + 1; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2. + 1; + + if (xmin < 1) xmin = 1; + if (ymin < 1) ymin = 1; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], + xmin, ymin, xmax, ymax); + } + } +} + +void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + int class = j; + if (dets[i].prob[class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j+1, dets[i].prob[class], + xmin, ymin, xmax, ymax); + } + } +} + +void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char *outfile) +{ + int j; + list *options = read_data_cfg(datacfg); + char *valid_images = option_find_str(options, "valid", "data/train.list"); + char *name_list = option_find_str(options, "names", "data/names.list"); + char *prefix = option_find_str(options, "results", "results"); + char **names = get_labels(name_list); + char *mapf = option_find_str(options, "map", 0); + int *map = 0; + if (mapf) map = read_map(mapf); + + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 2); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths(valid_images); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + char *type = option_find_str(options, "eval", "voc"); + FILE *fp = 0; + FILE **fps = 0; + int coco = 0; + int imagenet = 0; + if(0==strcmp(type, "coco")){ + if(!outfile) outfile = "coco_results"; + snprintf(buff, 1024, "%s/%s.json", prefix, outfile); + fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + coco = 1; + } else if(0==strcmp(type, "imagenet")){ + if(!outfile) outfile = "imagenet-detection"; + snprintf(buff, 1024, "%s/%s.txt", prefix, outfile); + fp = fopen(buff, "w"); + imagenet = 1; + classes = 200; + } else { + if(!outfile) outfile = "comp4_det_test_"; + fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); + fps[j] = fopen(buff, "w"); + } + } + + int m = plist->size; + int i=0; + int t; + + float thresh = .005; + float nms = .45; + + int nthreads = 4; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + image input = make_image(net->w, net->h, net->c*2); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + //args.type = IMAGE_DATA; + args.type = LETTERBOX_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + double start = what_time_is_it_now(); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data, 1); + flip_image(val_resized[t]); + copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data + net->w*net->h*net->c, 1); + + network_predict(net, input.data); + int w = val[t].w; + int h = val[t].h; + int num = 0; + detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &num); + if (nms) do_nms_sort(dets, num, classes, nms); + if (coco){ + print_cocos(fp, path, dets, num, classes, w, h); + } else if (imagenet){ + print_imagenet_detections(fp, i+t-nthreads+1, dets, num, classes, w, h); + } else { + print_detector_detections(fps, id, dets, num, classes, w, h); + } + free_detections(dets, num); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + for(j = 0; j < classes; ++j){ + if(fps) fclose(fps[j]); + } + if(coco){ + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start); +} + + +void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile) +{ + int j; + list *options = read_data_cfg(datacfg); + char *valid_images = option_find_str(options, "valid", "data/train.list"); + char *name_list = option_find_str(options, "names", "data/names.list"); + char *prefix = option_find_str(options, "results", "results"); + char **names = get_labels(name_list); + char *mapf = option_find_str(options, "map", 0); + int *map = 0; + if (mapf) map = read_map(mapf); + + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths(valid_images); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + char *type = option_find_str(options, "eval", "voc"); + FILE *fp = 0; + FILE **fps = 0; + int coco = 0; + int imagenet = 0; + if(0==strcmp(type, "coco")){ + if(!outfile) outfile = "coco_results"; + snprintf(buff, 1024, "%s/%s.json", prefix, outfile); + fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + coco = 1; + } else if(0==strcmp(type, "imagenet")){ + if(!outfile) outfile = "imagenet-detection"; + snprintf(buff, 1024, "%s/%s.txt", prefix, outfile); + fp = fopen(buff, "w"); + imagenet = 1; + classes = 200; + } else { + if(!outfile) outfile = "comp4_det_test_"; + fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); + fps[j] = fopen(buff, "w"); + } + } + + + int m = plist->size; + int i=0; + int t; + + float thresh = .005; + float nms = .45; + + int nthreads = 4; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + //args.type = IMAGE_DATA; + args.type = LETTERBOX_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + double start = what_time_is_it_now(); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &nboxes); + if (nms) do_nms_sort(dets, nboxes, classes, nms); + if (coco){ + print_cocos(fp, path, dets, nboxes, classes, w, h); + } else if (imagenet){ + print_imagenet_detections(fp, i+t-nthreads+1, dets, nboxes, classes, w, h); + } else { + print_detector_detections(fps, id, dets, nboxes, classes, w, h); + } + free_detections(dets, nboxes); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + for(j = 0; j < classes; ++j){ + if(fps) fclose(fps[j]); + } + if(coco){ + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start); +} + +void validate_detector_recall(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths("data/coco_val_5k.list"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + + int j, k; + + int m = plist->size; + int i=0; + + float thresh = .001; + float iou_thresh = .5; + float nms = .4; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + int nboxes = 0; + detection *dets = get_network_boxes(net, sized.w, sized.h, thresh, .5, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, nboxes, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < nboxes; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < l.w*l.h*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free(id); + free_image(orig); + free_image(sized); + } +} + + +void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen) +{ + list *options = read_data_cfg(datacfg); + // Ruihao + char *name_list_cfg = option_find_str(options, "names", "data/names.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char name_list[256]; + sprintf(name_list, "%s/%s", env, name_list_cfg); + // Ruihao + char **names = get_labels(name_list); + + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + double time; + char buff[256]; + char *input = buff; + float nms=.45; + while(1){ + printf("fine name is %s\n", filename); + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = letterbox_image(im, net->w, net->h); + //image sized = resize_image(im, net->w, net->h); + //image sized2 = resize_max(im, net->w); + //image sized = crop_image(sized2, -((net->w - sized2.w)/2), -((net->h - sized2.h)/2), net->w, net->h); + //resize_network(net, sized.w, sized.h); + layer l = net->layers[net->n-1]; + + + float *X = sized.data; + time=what_time_is_it_now(); + startCPU(); + network_predict(net, X); + endCPU(); + printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time); + int nboxes = 0; + detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes); + //printf("%d\n", nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes); + free_detections(dets, nboxes); + if(outfile){ + save_image(im, outfile); + } + else{ + save_image(im, "predictions"); +#ifdef OPENCV + make_window("predictions", 512, 512, 0); + show_image(im, "predictions", 0); +#endif + } + + free_image(im); + free_image(sized); + if (filename) break; + } +} + +/* +void censor_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + CvCapture * cap; + + int w = 1280; + int h = 720; + + if(filename){ + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + } + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + cvNamedWindow(base, CV_WINDOW_NORMAL); + cvResizeWindow(base, 512, 512); + float fps = 0; + int i; + float nms = .45; + + while(1){ + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + layer l = net->layers[net->n-1]; + + float *X = in_s.data; + network_predict(net, X); + int nboxes = 0; + detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 0, &nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + + for(i = 0; i < nboxes; ++i){ + if(dets[i].prob[class] > thresh){ + box b = dets[i].bbox; + int left = b.x-b.w/2.; + int top = b.y-b.h/2.; + censor_image(in, left, top, b.w, b.h); + } + } + show_image(in, base); + cvWaitKey(10); + free_detections(dets, nboxes); + + + free_image(in_s); + free_image(in); + + + float curr = 0; + fps = .9*fps + .1*curr; + for(i = 0; i < skip; ++i){ + image in = get_image_from_stream(cap); + free_image(in); + } + } + #endif +} + +void extract_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + CvCapture * cap; + + int w = 1280; + int h = 720; + + if(filename){ + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + } + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + cvNamedWindow(base, CV_WINDOW_NORMAL); + cvResizeWindow(base, 512, 512); + float fps = 0; + int i; + int count = 0; + float nms = .45; + + while(1){ + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + layer l = net->layers[net->n-1]; + + show_image(in, base); + + int nboxes = 0; + float *X = in_s.data; + network_predict(net, X); + detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 1, &nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + + for(i = 0; i < nboxes; ++i){ + if(dets[i].prob[class] > thresh){ + box b = dets[i].bbox; + int size = b.w*in.w > b.h*in.h ? b.w*in.w : b.h*in.h; + int dx = b.x*in.w-size/2.; + int dy = b.y*in.h-size/2.; + image bim = crop_image(in, dx, dy, size, size); + char buff[2048]; + sprintf(buff, "results/extract/%07d", count); + ++count; + save_image(bim, buff); + free_image(bim); + } + } + free_detections(dets, nboxes); + + + free_image(in_s); + free_image(in); + + + float curr = 0; + fps = .9*fps + .1*curr; + for(i = 0; i < skip; ++i){ + image in = get_image_from_stream(cap); + free_image(in); + } + } + #endif +} +*/ + +/* +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets) +{ + network_predict_image(net, im); + layer l = net->layers[net->n-1]; + int nboxes = num_boxes(net); + fill_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 0, dets); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); +} +*/ + +void infer_detector(char *datacfg, char *cfgfile, char *weightfile) +{ + int curr = 0; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *test_list_cfg = option_find_str(options, "valid", "data/valid.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char test_list[256]; + sprintf(test_list, "%s/%s", env, test_list_cfg); + // Ruihao + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + m = net->max_batches * net->batch; + free_list(plist); + + clock_t time; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = classes; + args.n = net->batch; + args.m = 0; + args.labels = 0; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(curr = net->batch; curr <= m; curr += net->batch){ + // while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + if(curr < m){ + args.paths = paths + curr; + if (curr + net->batch > m) args.n = m - curr; + load_thread = load_data_in_thread(args); + } + fprintf(stderr, "Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + + fprintf(stderr, "%lf seconds, %d images, %d total\n", sec(clock()-time), val.X.rows, curr); + free_matrix(pred); + free_data(val); + } +} + +void run_detector(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .5); + float hier_thresh = find_float_arg(argc, argv, "-hier", .5); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + int avg = find_int_arg(argc, argv, "-avg", 3); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + char *outfile = find_char_arg(argc, argv, "-out", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int clear = find_arg(argc, argv, "-clear"); + int fullscreen = find_arg(argc, argv, "-fullscreen"); + int width = find_int_arg(argc, argv, "-w", 0); + int height = find_int_arg(argc, argv, "-h", 0); + int fps = find_int_arg(argc, argv, "-fps", 0); + //int class = find_int_arg(argc, argv, "-class", 0); + + char *datacfg = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen); + else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile); + // Ruihao + else if(0==strcmp(argv[2], "infer")) infer_detector(datacfg, cfg, weights); + // Ruihao + else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile); + else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) { + list *options = read_data_cfg(datacfg); + int classes = option_find_int(options, "classes", 20); + char *name_list = option_find_str(options, "names", "data/names.list"); + char **names = get_labels(name_list); + demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen); + } + //else if(0==strcmp(argv[2], "extract")) extract_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); + //else if(0==strcmp(argv[2], "censor")) censor_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); +} diff --git a/workloads/realworld/pinned/darknet/examples/detector.py b/workloads/realworld/pinned/darknet/examples/detector.py new file mode 100644 index 0000000000000000000000000000000000000000..40bb365e68211c513db9d63847ac95070f5eab98 --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/detector.py @@ -0,0 +1,27 @@ +# Stupid python path shit. +# Instead just add darknet.py to somewhere in your python path +# OK actually that might not be a great idea, idk, work in progress +# Use at your own risk. or don't, i don't care + +import sys, os +sys.path.append(os.path.join(os.getcwd(),'python/')) + +import darknet as dn +import pdb + +dn.set_gpu(0) +net = dn.load_net("cfg/yolo-thor.cfg", "/home/pjreddie/backup/yolo-thor_final.weights", 0) +meta = dn.load_meta("cfg/thor.data") +r = dn.detect(net, meta, "data/bedroom.jpg") +print r + +# And then down here you could detect a lot more images like: +r = dn.detect(net, meta, "data/eagle.jpg") +print r +r = dn.detect(net, meta, "data/giraffe.jpg") +print r +r = dn.detect(net, meta, "data/horses.jpg") +print r +r = dn.detect(net, meta, "data/person.jpg") +print r + diff --git a/workloads/realworld/pinned/darknet/examples/dice.c b/workloads/realworld/pinned/darknet/examples/dice.c new file mode 100644 index 0000000000000000000000000000000000000000..f56d76c0bb66c7f630ba1c4d1dc9195398b87cfb --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/dice.c @@ -0,0 +1,116 @@ +#include "darknet.h" + +char *dice_labels[] = {"face1","face2","face3","face4","face5","face6"}; + +void train_dice(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + int i = *net.seen/imgs; + char **labels = dice_labels; + list *plist = get_paths("data/dice/dice.train.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + while(1){ + ++i; + time=clock(); + data train = load_data_old(paths, imgs, plist->size, labels, 6, net.w, net.h); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); + free_data(train); + if((i % 100) == 0) net.learning_rate *= .1; + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, i); + save_weights(net, buff); + } + } +} + +void validate_dice(char *filename, char *weightfile) +{ + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + + char **labels = dice_labels; + list *plist = get_paths("data/dice/dice.val.list"); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + data val = load_data_old(paths, m, 0, labels, 6, net.w, net.h); + float *acc = network_accuracies(net, val, 2); + printf("Validation Accuracy: %f, %d images\n", acc[0], m); + free_data(val); +} + +void test_dice(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + int i = 0; + char **names = dice_labels; + char buff[256]; + char *input = buff; + int indexes[6]; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, net.w, net.h); + float *X = im.data; + float *predictions = network_predict(net, X); + top_predictions(net, 6, indexes); + for(i = 0; i < 6; ++i){ + int index = indexes[i]; + printf("%s: %f\n", names[index], predictions[index]); + } + free_image(im); + if (filename) break; + } +} + +void run_dice(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "test")) test_dice(cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_dice(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_dice(cfg, weights); +} + diff --git a/workloads/realworld/pinned/darknet/examples/go.c b/workloads/realworld/pinned/darknet/examples/go.c new file mode 100644 index 0000000000000000000000000000000000000000..688579dcb3a3e35e9a79b8fb8aa684f28f44290d --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/go.c @@ -0,0 +1,1370 @@ +#include "darknet.h" + +#include +#include +#include + +int inverted = 1; +int noi = 1; +static const int nind = 10; +int legal_go(float *b, float *ko, int p, int r, int c); +int check_ko(float *x, float *ko); + +typedef struct { + char **data; + int n; +} moves; + +char *fgetgo(FILE *fp) +{ + if(feof(fp)) return 0; + size_t size = 96; + char *line = malloc(size*sizeof(char)); + if(size != fread(line, sizeof(char), size, fp)){ + free(line); + return 0; + } + + return line; +} + +moves load_go_moves(char *filename) +{ + moves m; + m.n = 128; + m.data = calloc(128, sizeof(char*)); + FILE *fp = fopen(filename, "rb"); + int count = 0; + char *line = 0; + while ((line = fgetgo(fp))) { + if (count >= m.n) { + m.n *= 2; + m.data = realloc(m.data, m.n*sizeof(char*)); + } + m.data[count] = line; + ++count; + } + printf("%d\n", count); + m.n = count; + m.data = realloc(m.data, count*sizeof(char*)); + return m; +} + +void string_to_board(char *s, float *board) +{ + int i, j; + memset(board, 0, 2*19*19*sizeof(float)); + int count = 0; + for(i = 0; i < 91; ++i){ + char c = s[i]; + for(j = 0; j < 4; ++j){ + int me = (c >> (2*j)) & 1; + int you = (c >> (2*j + 1)) & 1; + if (me) board[count] = 1; + else if (you) board[count + 19*19] = 1; + ++count; + if(count >= 19*19) break; + } + } +} + +void board_to_string(char *s, float *board) +{ + int i, j; + memset(s, 0, (19*19/4+1)*sizeof(char)); + int count = 0; + for(i = 0; i < 91; ++i){ + for(j = 0; j < 4; ++j){ + int me = (board[count] == 1); + int you = (board[count + 19*19] == 1); + if (me) s[i] = s[i] | (1<<(2*j)); + if (you) s[i] = s[i] | (1<<(2*j + 1)); + ++count; + if(count >= 19*19) break; + } + } +} + +static int occupied(float *b, int i) +{ + if (b[i]) return 1; + if (b[i+19*19]) return -1; + return 0; +} + +data random_go_moves(moves m, int n) +{ + data d = {0}; + d.X = make_matrix(n, 19*19*3); + d.y = make_matrix(n, 19*19+2); + int i, j; + for(i = 0; i < n; ++i){ + float *board = d.X.vals[i]; + float *label = d.y.vals[i]; + char *b = m.data[rand()%m.n]; + int player = b[0] - '0'; + int result = b[1] - '0'; + int row = b[2]; + int col = b[3]; + string_to_board(b+4, board); + if(player > 0) for(j = 0; j < 19*19; ++j) board[19*19*2 + j] = 1; + label[19*19+1] = (player==result); + if(row >= 19 || col >= 19){ + label[19*19] = 1; + } else { + label[col + 19*row] = 1; + if(occupied(board, col + 19*row)) printf("hey\n"); + } + + int flip = rand()%2; + int rotate = rand()%4; + image in = float_to_image(19, 19, 3, board); + image out = float_to_image(19, 19, 1, label); + if(flip){ + flip_image(in); + flip_image(out); + } + rotate_image_cw(in, rotate); + rotate_image_cw(out, rotate); + } + return d; +} + + +void train_go(char *cfgfile, char *weightfile, char *filename, int *gpus, int ngpus, int clear) +{ + int i; + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + network *net = nets[0]; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + + char buff[256]; + moves m = load_go_moves(filename); + //moves m = load_go_moves("games.txt"); + + int N = m.n; + printf("Moves: %d\n", N); + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time=what_time_is_it_now(); + + data train = random_go_moves(m, net->batch*net->subdivisions*ngpus); + printf("Loaded: %lf seconds\n", what_time_is_it_now() - time); + time=what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 10); + } +#else + loss = train_network(net, train); +#endif + free_data(train); + + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory,base, epoch); + save_weights(net, buff); + + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + if(get_current_batch(net)%10000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_%ld.backup",backup_directory,base,get_current_batch(net)); + save_weights(net, buff); + } + } + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free(base); +} + +static void propagate_liberty(float *board, int *lib, int *visited, int row, int col, int side) +{ + if (row < 0 || row > 18 || col < 0 || col > 18) return; + int index = row*19 + col; + if (occupied(board,index) != side) return; + if (visited[index]) return; + visited[index] = 1; + lib[index] += 1; + propagate_liberty(board, lib, visited, row+1, col, side); + propagate_liberty(board, lib, visited, row-1, col, side); + propagate_liberty(board, lib, visited, row, col+1, side); + propagate_liberty(board, lib, visited, row, col-1, side); +} + + +static int *calculate_liberties(float *board) +{ + int *lib = calloc(19*19, sizeof(int)); + int visited[19*19]; + int i, j; + for(j = 0; j < 19; ++j){ + for(i = 0; i < 19; ++i){ + memset(visited, 0, 19*19*sizeof(int)); + int index = j*19 + i; + if(!occupied(board,index)){ + if ((i > 0) && occupied(board,index - 1)) propagate_liberty(board, lib, visited, j, i-1, occupied(board,index-1)); + if ((i < 18) && occupied(board,index + 1)) propagate_liberty(board, lib, visited, j, i+1, occupied(board,index+1)); + if ((j > 0) && occupied(board,index - 19)) propagate_liberty(board, lib, visited, j-1, i, occupied(board,index-19)); + if ((j < 18) && occupied(board,index + 19)) propagate_liberty(board, lib, visited, j+1, i, occupied(board,index+19)); + } + } + } + return lib; +} + +void print_board(FILE *stream, float *board, int player, int *indexes) +{ + int i,j,n; + fprintf(stream, " "); + for(i = 0; i < 19; ++i){ + fprintf(stream, "%c ", 'A' + i + 1*(i > 7 && noi)); + } + fprintf(stream, "\n"); + for(j = 0; j < 19; ++j){ + fprintf(stream, "%2d", (inverted) ? 19-j : j+1); + for(i = 0; i < 19; ++i){ + int index = j*19 + i; + if(indexes){ + int found = 0; + for(n = 0; n < nind; ++n){ + if(index == indexes[n]){ + found = 1; + /* + if(n == 0) fprintf(stream, "\uff11"); + else if(n == 1) fprintf(stream, "\uff12"); + else if(n == 2) fprintf(stream, "\uff13"); + else if(n == 3) fprintf(stream, "\uff14"); + else if(n == 4) fprintf(stream, "\uff15"); + */ + fprintf(stream, " %d", n+1); + } + } + if(found) continue; + } + //if(board[index]*-swap > 0) fprintf(stream, "\u25C9 "); + //else if(board[index]*-swap < 0) fprintf(stream, "\u25EF "); + if (occupied(board, index) == player) fprintf(stream, " X"); + else if (occupied(board, index) ==-player) fprintf(stream, " O"); + else fprintf(stream, " ."); + } + fprintf(stream, "\n"); + } +} + +void flip_board(float *board) +{ + int i; + for(i = 0; i < 19*19; ++i){ + float swap = board[i]; + board[i] = board[i+19*19]; + board[i+19*19] = swap; + board[i+19*19*2] = 1-board[i+19*19*2]; + } +} + +float predict_move2(network *net, float *board, float *move, int multi) +{ + float *output = network_predict(net, board); + copy_cpu(19*19+1, output, 1, move, 1); + float result = output[19*19 + 1]; + int i; + if(multi){ + image bim = float_to_image(19, 19, 3, board); + for(i = 1; i < 8; ++i){ + rotate_image_cw(bim, i); + if(i >= 4) flip_image(bim); + + float *output = network_predict(net, board); + image oim = float_to_image(19, 19, 1, output); + result += output[19*19 + 1]; + + if(i >= 4) flip_image(oim); + rotate_image_cw(oim, -i); + + axpy_cpu(19*19+1, 1, output, 1, move, 1); + + if(i >= 4) flip_image(bim); + rotate_image_cw(bim, -i); + } + result = result/8; + scal_cpu(19*19+1, 1./8., move, 1); + } + for(i = 0; i < 19*19; ++i){ + if(board[i] || board[i+19*19]) move[i] = 0; + } + return result; +} + +static void remove_connected(float *b, int *lib, int p, int r, int c) +{ + if (r < 0 || r >= 19 || c < 0 || c >= 19) return; + if (occupied(b, r*19 + c) != p) return; + if (lib[r*19 + c] != 1) return; + b[r*19 + c] = 0; + b[19*19 + r*19 + c] = 0; + remove_connected(b, lib, p, r+1, c); + remove_connected(b, lib, p, r-1, c); + remove_connected(b, lib, p, r, c+1); + remove_connected(b, lib, p, r, c-1); +} + + +void move_go(float *b, int p, int r, int c) +{ + int *l = calculate_liberties(b); + if(p > 0) b[r*19 + c] = 1; + else b[19*19 + r*19 + c] = 1; + remove_connected(b, l, -p, r+1, c); + remove_connected(b, l, -p, r-1, c); + remove_connected(b, l, -p, r, c+1); + remove_connected(b, l, -p, r, c-1); + free(l); +} + +int compare_board(float *a, float *b) +{ + if(memcmp(a, b, 19*19*3*sizeof(float)) == 0) return 1; + return 0; +} + +typedef struct mcts_tree{ + float *board; + struct mcts_tree **children; + float *prior; + int *visit_count; + float *value; + float *mean; + float *prob; + int total_count; + float result; + int done; + int pass; +} mcts_tree; + +void free_mcts(mcts_tree *root) +{ + if(!root) return; + int i; + free(root->board); + for(i = 0; i < 19*19+1; ++i){ + if(root->children[i]) free_mcts(root->children[i]); + } + free(root->children); + free(root->prior); + free(root->visit_count); + free(root->value); + free(root->mean); + free(root->prob); + free(root); +} + +float *network_predict_rotations(network *net, float *next) +{ + int n = net->batch; + float *in = calloc(19*19*3*n, sizeof(float)); + image im = float_to_image(19, 19, 3, next); + int i,j; + int *inds = random_index_order(0, 8); + for(j = 0; j < n; ++j){ + i = inds[j]; + rotate_image_cw(im, i); + if(i >= 4) flip_image(im); + memcpy(in + 19*19*3*j, im.data, 19*19*3*sizeof(float)); + if(i >= 4) flip_image(im); + rotate_image_cw(im, -i); + } + float *pred = network_predict(net, in); + for(j = 0; j < n; ++j){ + i = inds[j]; + image im = float_to_image(19, 19, 1, pred + j*(19*19 + 2)); + if(i >= 4) flip_image(im); + rotate_image_cw(im, -i); + if(j > 0){ + axpy_cpu(19*19+2, 1, im.data, 1, pred, 1); + } + } + free(in); + free(inds); + scal_cpu(19*19+2, 1./n, pred, 1); + return pred; +} + +mcts_tree *expand(float *next, float *ko, network *net) +{ + mcts_tree *root = calloc(1, sizeof(mcts_tree)); + root->board = next; + root->children = calloc(19*19+1, sizeof(mcts_tree*)); + root->prior = calloc(19*19 + 1, sizeof(float)); + root->prob = calloc(19*19 + 1, sizeof(float)); + root->mean = calloc(19*19 + 1, sizeof(float)); + root->value = calloc(19*19 + 1, sizeof(float)); + root->visit_count = calloc(19*19 + 1, sizeof(int)); + root->total_count = 1; + int i; + float *pred = network_predict_rotations(net, next); + copy_cpu(19*19+1, pred, 1, root->prior, 1); + float val = 2*pred[19*19 + 1] - 1; + root->result = val; + for(i = 0; i < 19*19+1; ++i) { + root->visit_count[i] = 0; + root->value[i] = 0; + root->mean[i] = val; + if(i < 19*19 && occupied(next, i)){ + root->value[i] = -1; + root->mean[i] = -1; + root->prior[i] = 0; + } + } + //print_board(stderr, next, flip?-1:1, 0); + return root; +} + +float *copy_board(float *board) +{ + float *next = calloc(19*19*3, sizeof(float)); + copy_cpu(19*19*3, board, 1, next, 1); + return next; +} + +float select_mcts(mcts_tree *root, network *net, float *prev, float cpuct) +{ + if(root->done) return -root->result; + int i; + float max = -1000; + int max_i = 0; + for(i = 0; i < 19*19+1; ++i){ + root->prob[i] = root->mean[i] + cpuct*root->prior[i] * sqrt(root->total_count) / (1. + root->visit_count[i]); + if(root->prob[i] > max){ + max = root->prob[i]; + max_i = i; + } + } + float val; + i = max_i; + root->visit_count[i]++; + root->total_count++; + if (root->children[i]) { + val = select_mcts(root->children[i], net, root->board, cpuct); + } else { + if(max_i < 19*19 && !legal_go(root->board, prev, 1, max_i/19, max_i%19)) { + root->mean[i] = -1; + root->value[i] = -1; + root->prior[i] = 0; + --root->total_count; + return select_mcts(root, net, prev, cpuct); + //printf("Detected ko\n"); + //getchar(); + } else { + float *next = copy_board(root->board); + if (max_i < 19*19) { + move_go(next, 1, max_i / 19, max_i % 19); + } + flip_board(next); + root->children[i] = expand(next, root->board, net); + val = -root->children[i]->result; + if(max_i == 19*19){ + root->children[i]->pass = 1; + if (root->pass){ + root->children[i]->done = 1; + } + } + } + } + root->value[i] += val; + root->mean[i] = root->value[i]/root->visit_count[i]; + return -val; +} + +mcts_tree *run_mcts(mcts_tree *tree, network *net, float *board, float *ko, int player, int n, float cpuct, float secs) +{ + int i; + double t = what_time_is_it_now(); + if(player < 0) flip_board(board); + if(!tree) tree = expand(copy_board(board), ko, net); + assert(compare_board(tree->board, board)); + for(i = 0; i < n; ++i){ + if (secs > 0 && (what_time_is_it_now() - t) > secs) break; + int max_i = max_int_index(tree->visit_count, 19*19+1); + if (tree->visit_count[max_i] >= n) break; + select_mcts(tree, net, ko, cpuct); + } + if(player < 0) flip_board(board); + //fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + return tree; +} + +mcts_tree *move_mcts(mcts_tree *tree, int index) +{ + if(index < 0 || index > 19*19 || !tree || !tree->children[index]) { + free_mcts(tree); + tree = 0; + } else { + mcts_tree *swap = tree; + tree = tree->children[index]; + swap->children[index] = 0; + free_mcts(swap); + } + return tree; +} + +typedef struct { + float value; + float mcts; + int row; + int col; +} move; + +move pick_move(mcts_tree *tree, float temp, int player) +{ + int i; + float probs[19*19+1] = {0}; + move m = {0}; + double sum = 0; + /* + for(i = 0; i < 19*19+1; ++i){ + probs[i] = tree->visit_count[i]; + } + */ + //softmax(probs, 19*19+1, temp, 1, probs); + for(i = 0; i < 19*19+1; ++i){ + sum += pow(tree->visit_count[i], 1./temp); + } + for(i = 0; i < 19*19+1; ++i){ + probs[i] = pow(tree->visit_count[i], 1./temp) / sum; + } + + int index = sample_array(probs, 19*19+1); + m.row = index / 19; + m.col = index % 19; + m.value = (tree->result+1.)/2.; + m.mcts = (tree->mean[index]+1.)/2.; + + int indexes[nind]; + top_k(probs, 19*19+1, nind, indexes); + print_board(stderr, tree->board, player, indexes); + + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", index/19, index%19, tree->result, tree->prior[index], probs[index], tree->mean[index], (tree->children[index])?tree->children[index]->result:0, tree->visit_count[index]); + int ind = max_index(probs, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, tree->result, tree->prior[ind], probs[ind], tree->mean[ind], (tree->children[ind])?tree->children[ind]->result:0, tree->visit_count[ind]); + ind = max_index(tree->prior, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, tree->result, tree->prior[ind], probs[ind], tree->mean[ind], (tree->children[ind])?tree->children[ind]->result:0, tree->visit_count[ind]); + return m; +} + +/* + float predict_move(network *net, float *board, float *move, int multi, float *ko, float temp) + { + + int i; + + int max_v = 0; + int max_i = 0; + for(i = 0; i < 19*19+1; ++i){ + if(root->visit_count[i] > max_v){ + max_v = root->visit_count[i]; + max_i = i; + } + } + fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + int ind = max_index(root->mean, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", max_i/19, max_i%19, root->result, root->prior[max_i], root->prob[max_i], root->mean[max_i], (root->children[max_i])?root->children[max_i]->result:0, root->visit_count[max_i]); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, root->result, root->prior[ind], root->prob[ind], root->mean[ind], (root->children[ind])?root->children[ind]->result:0, root->visit_count[ind]); + ind = max_index(root->prior, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, root->result, root->prior[ind], root->prob[ind], root->mean[ind], (root->children[ind])?root->children[ind]->result:0, root->visit_count[ind]); + if(root->result < -.9 && root->mean[max_i] < -.9) return -1000.f; + + float val = root->result; + free_mcts(root); + return val; + } + */ + +static int makes_safe_go(float *b, int *lib, int p, int r, int c){ + if (r < 0 || r >= 19 || c < 0 || c >= 19) return 0; + if (occupied(b,r*19 + c) == -p){ + if (lib[r*19 + c] > 1) return 0; + else return 1; + } + if (!occupied(b,r*19 + c)) return 1; + if (lib[r*19 + c] > 1) return 1; + return 0; +} + +int suicide_go(float *b, int p, int r, int c) +{ + int *l = calculate_liberties(b); + int safe = 0; + safe = safe || makes_safe_go(b, l, p, r+1, c); + safe = safe || makes_safe_go(b, l, p, r-1, c); + safe = safe || makes_safe_go(b, l, p, r, c+1); + safe = safe || makes_safe_go(b, l, p, r, c-1); + free(l); + return !safe; +} + +int check_ko(float *x, float *ko) +{ + if(!ko) return 0; + float curr[19*19*3]; + copy_cpu(19*19*3, x, 1, curr, 1); + if(curr[19*19*2] != ko[19*19*2]) flip_board(curr); + if(compare_board(curr, ko)) return 1; + return 0; +} + +int legal_go(float *b, float *ko, int p, int r, int c) +{ + if (occupied(b, r*19+c)) return 0; + float curr[19*19*3]; + copy_cpu(19*19*3, b, 1, curr, 1); + move_go(curr, p, r, c); + if(check_ko(curr, ko)) return 0; + if(suicide_go(b, p, r, c)) return 0; + return 1; +} + +/* + move generate_move(mcts_tree *root, network *net, int player, float *board, int multi, float temp, float *ko, int print) + { + move m = {0}; +//root = run_mcts(tree, network *net, float *board, float *ko, int n, float cpuct) +int i, j; +int empty = 1; +for(i = 0; i < 19*19; ++i){ +if (occupied(board, i)) { +empty = 0; +break; +} +} +if(empty) { +m.value = .5; +m.mcts = .5; +m.row = 3; +m.col = 15; +return m; +} + +float move[362]; +if (player < 0) flip_board(board); +float result = predict_move(net, board, move, multi, ko, temp); +if (player < 0) flip_board(board); +if(result == -1000.f) return -2; + +for(i = 0; i < 19; ++i){ +for(j = 0; j < 19; ++j){ +if (!legal_go(board, ko, player, i, j)) move[i*19 + j] = 0; +} +} + +int indexes[nind]; +top_k(move, 19*19+1, nind, indexes); + + +int max = max_index(move, 19*19+1); +int row = max / 19; +int col = max % 19; +int index = sample_array(move, 19*19+1); + +if(print){ +top_k(move, 19*19+1, nind, indexes); +for(i = 0; i < nind; ++i){ +if (!move[indexes[i]]) indexes[i] = -1; +} +print_board(stderr, board, 1, indexes); +fprintf(stderr, "%s To Move\n", player > 0 ? "X" : "O"); +fprintf(stderr, "%.2f%% Win Chance\n", (result+1)/2*100); +for(i = 0; i < nind; ++i){ +int index = indexes[i]; +int row = index / 19; +int col = index % 19; +if(row == 19){ +fprintf(stderr, "%d: Pass, %.2f%%\n", i+1, move[index]*100); +} else { +fprintf(stderr, "%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100); +} +} +} +if (row == 19) return -1; + +if (suicide_go(board, player, row, col)){ +return -1; +} + +if (suicide_go(board, player, index/19, index%19)){ +index = max; +} +if (index == 19*19) return -1; +return index; +} +*/ + +void valid_go(char *cfgfile, char *weightfile, int multi, char *filename) +{ + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + float *board = calloc(19*19*3, sizeof(float)); + float *move = calloc(19*19+2, sizeof(float)); + // moves m = load_go_moves("/home/pjreddie/backup/go.test"); + moves m = load_go_moves(filename); + + int N = m.n; + int i,j; + int correct = 0; + for (i = 0; i 0) for(j = 0; j < 19*19; ++j) board[19*19*2 + j] = 1; + predict_move2(net, board, move, multi); + int index = max_index(move, 19*19+1); + if(index == truth) ++correct; + printf("%d Accuracy %f\n", i, (float) correct/(i+1)); + } +} + +int print_game(float *board, FILE *fp) +{ + int i, j; + int count = 3; + fprintf(fp, "komi 6.5\n"); + fprintf(fp, "boardsize 19\n"); + fprintf(fp, "clear_board\n"); + for(j = 0; j < 19; ++j){ + for(i = 0; i < 19; ++i){ + if(occupied(board,j*19 + i) == 1) fprintf(fp, "play black %c%d\n", 'A'+i+(i>=8), 19-j); + if(occupied(board,j*19 + i) == -1) fprintf(fp, "play white %c%d\n", 'A'+i+(i>=8), 19-j); + if(occupied(board,j*19 + i)) ++count; + } + } + return count; +} + + +int stdin_ready() +{ + fd_set readfds; + FD_ZERO(&readfds); + + struct timeval timeout; + timeout.tv_sec = 0; + timeout.tv_usec = 0; + FD_SET(STDIN_FILENO, &readfds); + + if (select(1, &readfds, NULL, NULL, &timeout)){ + return 1; + } + return 0; +} + +mcts_tree *ponder(mcts_tree *tree, network *net, float *b, float *ko, int player, float cpuct) +{ + double t = what_time_is_it_now(); + int count = 0; + if (tree) count = tree->total_count; + while(!stdin_ready()){ + if (what_time_is_it_now() - t > 120) break; + tree = run_mcts(tree, net, b, ko, player, 100000, cpuct, .1); + } + fprintf(stderr, "Pondered %d moves...\n", tree->total_count - count); + return tree; +} + +void engine_go(char *filename, char *weightfile, int mcts_iters, float secs, float temp, float cpuct, int anon, int resign) +{ + mcts_tree *root = 0; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *one = calloc(19*19*3, sizeof(float)); + float *two = calloc(19*19*3, sizeof(float)); + int ponder_player = 0; + int passed = 0; + int move_num = 0; + int main_time = 0; + int byo_yomi_time = 0; + int byo_yomi_stones = 0; + int black_time_left = 0; + int black_stones_left = 0; + int white_time_left = 0; + int white_stones_left = 0; + float orig_time = secs; + int old_ponder = 0; + while(1){ + if(ponder_player){ + root = ponder(root, net, board, two, ponder_player, cpuct); + } + old_ponder = ponder_player; + ponder_player = 0; + char buff[256]; + int id = 0; + int has_id = (scanf("%d", &id) == 1); + scanf("%s", buff); + if (feof(stdin)) break; + fprintf(stderr, "%s\n", buff); + char ids[256]; + sprintf(ids, "%d", id); + //fprintf(stderr, "%s\n", buff); + if (!has_id) ids[0] = 0; + if (!strcmp(buff, "protocol_version")){ + printf("=%s 2\n\n", ids); + } else if (!strcmp(buff, "name")){ + if(anon){ + printf("=%s The Fool!\n\n", ids); + }else{ + printf("=%s DarkGo\n\n", ids); + } + } else if (!strcmp(buff, "time_settings")){ + ponder_player = old_ponder; + scanf("%d %d %d", &main_time, &byo_yomi_time, &byo_yomi_stones); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "time_left")){ + ponder_player = old_ponder; + char color[256]; + int time = 0, stones = 0; + scanf("%s %d %d", color, &time, &stones); + if (color[0] == 'b' || color[0] == 'B'){ + black_time_left = time; + black_stones_left = stones; + } else { + white_time_left = time; + white_stones_left = stones; + } + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "version")){ + if(anon){ + printf("=%s :-DDDD\n\n", ids); + }else { + printf("=%s 1.0. Want more DarkGo? You can find me on OGS, unlimited games, no waiting! https://online-go.com/user/view/434218\n\n", ids); + } + } else if (!strcmp(buff, "known_command")){ + char comm[256]; + scanf("%s", comm); + int known = (!strcmp(comm, "protocol_version") || + !strcmp(comm, "name") || + !strcmp(comm, "version") || + !strcmp(comm, "known_command") || + !strcmp(comm, "list_commands") || + !strcmp(comm, "quit") || + !strcmp(comm, "boardsize") || + !strcmp(comm, "clear_board") || + !strcmp(comm, "komi") || + !strcmp(comm, "final_status_list") || + !strcmp(comm, "play") || + !strcmp(comm, "genmove_white") || + !strcmp(comm, "genmove_black") || + !strcmp(comm, "fixed_handicap") || + !strcmp(comm, "genmove")); + if(known) printf("=%s true\n\n", ids); + else printf("=%s false\n\n", ids); + } else if (!strcmp(buff, "list_commands")){ + printf("=%s protocol_version\nshowboard\nname\nversion\nknown_command\nlist_commands\nquit\nboardsize\nclear_board\nkomi\nplay\ngenmove_black\ngenmove_white\ngenmove\nfinal_status_list\nfixed_handicap\n\n", ids); + } else if (!strcmp(buff, "quit")){ + break; + } else if (!strcmp(buff, "boardsize")){ + int boardsize = 0; + scanf("%d", &boardsize); + //fprintf(stderr, "%d\n", boardsize); + if(boardsize != 19){ + printf("?%s unacceptable size\n\n", ids); + } else { + root = move_mcts(root, -1); + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + move_num = 0; + printf("=%s \n\n", ids); + } + } else if (!strcmp(buff, "fixed_handicap")){ + int handicap = 0; + scanf("%d", &handicap); + int indexes[] = {72, 288, 300, 60, 180, 174, 186, 66, 294}; + int i; + for(i = 0; i < handicap; ++i){ + board[indexes[i]] = 1; + ++move_num; + } + root = move_mcts(root, -1); + } else if (!strcmp(buff, "clear_board")){ + passed = 0; + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + move_num = 0; + root = move_mcts(root, -1); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "komi")){ + float komi = 0; + scanf("%f", &komi); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "showboard")){ + printf("=%s \n", ids); + print_board(stdout, board, 1, 0); + printf("\n"); + } else if (!strcmp(buff, "play") || !strcmp(buff, "black") || !strcmp(buff, "white")){ + ++move_num; + char color[256]; + if(!strcmp(buff, "play")) + { + scanf("%s ", color); + } else { + scanf(" "); + color[0] = buff[0]; + } + char c; + int r; + int count = scanf("%c%d", &c, &r); + int player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1; + if((c == 'p' || c == 'P') && count < 2) { + passed = 1; + printf("=%s \n\n", ids); + char *line = fgetl(stdin); + free(line); + fflush(stdout); + fflush(stderr); + root = move_mcts(root, 19*19); + continue; + } else { + passed = 0; + } + if(c >= 'A' && c <= 'Z') c = c - 'A'; + if(c >= 'a' && c <= 'z') c = c - 'a'; + if(c >= 8) --c; + r = 19 - r; + fprintf(stderr, "move: %d %d\n", r, c); + + float *swap = two; + two = one; + one = swap; + move_go(board, player, r, c); + copy_cpu(19*19*3, board, 1, one, 1); + if(root) fprintf(stderr, "Prior: %f\n", root->prior[r*19 + c]); + if(root) fprintf(stderr, "Mean: %f\n", root->mean[r*19 + c]); + if(root) fprintf(stderr, "Result: %f\n", root->result); + root = move_mcts(root, r*19 + c); + if(root) fprintf(stderr, "Visited: %d\n", root->total_count); + else fprintf(stderr, "NOT VISITED\n"); + + printf("=%s \n\n", ids); + //print_board(stderr, board, 1, 0); + } else if (!strcmp(buff, "genmove") || !strcmp(buff, "genmove_black") || !strcmp(buff, "genmove_white")){ + ++move_num; + int player = 0; + if(!strcmp(buff, "genmove")){ + char color[256]; + scanf("%s", color); + player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1; + } else if (!strcmp(buff, "genmove_black")){ + player = 1; + } else { + player = -1; + } + if(player > 0){ + if(black_time_left <= 30) secs = 2.5; + else secs = orig_time; + } else { + if(white_time_left <= 30) secs = 2.5; + else secs = orig_time; + } + ponder_player = -player; + + //tree = generate_move(net, player, board, multi, .1, two, 1); + double t = what_time_is_it_now(); + root = run_mcts(root, net, board, two, player, mcts_iters, cpuct, secs); + fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + move m = pick_move(root, temp, player); + root = move_mcts(root, m.row*19 + m.col); + + + if(move_num > resign && m.value < .1 && m.mcts < .1){ + printf("=%s resign\n\n", ids); + } else if(m.row == 19){ + printf("=%s pass\n\n", ids); + passed = 0; + } else { + int row = m.row; + int col = m.col; + + float *swap = two; + two = one; + one = swap; + + move_go(board, player, row, col); + copy_cpu(19*19*3, board, 1, one, 1); + row = 19 - row; + if (col >= 8) ++col; + printf("=%s %c%d\n\n", ids, 'A' + col, row); + } + + } else if (!strcmp(buff, "p")){ + //print_board(board, 1, 0); + } else if (!strcmp(buff, "final_status_list")){ + char type[256]; + scanf("%s", type); + fprintf(stderr, "final_status\n"); + char *line = fgetl(stdin); + free(line); + if(type[0] == 'd' || type[0] == 'D'){ + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "%s final_status_list dead\n", ids); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + while((l = fgetl(p))){ + printf("%s\n", l); + free(l); + } + } else { + printf("?%s unknown command\n\n", ids); + } + } else if (!strcmp(buff, "kgs-genmove_cleanup")){ + char type[256]; + scanf("%s", type); + fprintf(stderr, "kgs-genmove_cleanup\n"); + char *line = fgetl(stdin); + free(line); + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "%s kgs-genmove_cleanup %s\n", ids, type); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + while((l = fgetl(p))){ + printf("%s\n", l); + free(l); + } + } else { + char *line = fgetl(stdin); + free(line); + printf("?%s unknown command\n\n", ids); + } + fflush(stdout); + fflush(stderr); + } + printf("%d %d %d\n",passed, black_stones_left, white_stones_left); +} + +void test_go(char *cfg, char *weights, int multi) +{ + int i; + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(time(0)); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *move = calloc(19*19+1, sizeof(float)); + int color = 1; + while(1){ + float result = predict_move2(net, board, move, multi); + printf("%.2f%% Win Chance\n", (result+1)/2*100); + + int indexes[nind]; + int row, col; + top_k(move, 19*19+1, nind, indexes); + print_board(stderr, board, color, indexes); + for(i = 0; i < nind; ++i){ + int index = indexes[i]; + row = index / 19; + col = index % 19; + if(row == 19){ + printf("%d: Pass, %.2f%%\n", i+1, move[index]*100); + } else { + printf("%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100); + } + } + //if(color == 1) printf("\u25EF Enter move: "); + //else printf("\u25C9 Enter move: "); + if(color == 1) printf("X Enter move: "); + else printf("O Enter move: "); + + char c; + char *line = fgetl(stdin); + int picked = 1; + int dnum = sscanf(line, "%d", &picked); + int cnum = sscanf(line, "%c", &c); + if (strlen(line) == 0 || dnum) { + --picked; + if (picked < nind){ + int index = indexes[picked]; + row = index / 19; + col = index % 19; + if(row < 19){ + move_go(board, 1, row, col); + } + } + } else if (cnum){ + if (c <= 'T' && c >= 'A'){ + int num = sscanf(line, "%c %d", &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 2) move_go(board, 1, row, col); + } else if (c == 'p') { + // Pass + } else if(c=='b' || c == 'w'){ + char g; + int num = sscanf(line, "%c %c %d", &g, &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 3) { + int mc = (g == 'b') ? 1 : -1; + if (mc == color) { + board[row*19 + col] = 1; + } else { + board[19*19 + row*19 + col] = 1; + } + } + } else if(c == 'c'){ + char g; + int num = sscanf(line, "%c %c %d", &g, &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 3) { + board[row*19 + col] = 0; + board[19*19 + row*19 + col] = 0; + } + } + } + free(line); + flip_board(board); + color = -color; + } +} + +float score_game(float *board) +{ + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "final_score\n"); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + float score = 0; + char player = 0; + while((l = fgetl(p))){ + fprintf(stderr, "%s \t", l); + int n = sscanf(l, "= %c+%f", &player, &score); + free(l); + if (n == 2) break; + } + if(player == 'W') score = -score; + pclose(p); + return score; +} + +void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi) +{ + mcts_tree *tree1 = 0; + mcts_tree *tree2 = 0; + network *net = load_network(filename, weightfile, 0); + //set_batch_network(net, 1); + + network *net2; + if (f2) { + net2 = parse_network_cfg(f2); + if(w2){ + load_weights(net2, w2); + } + } else { + net2 = calloc(1, sizeof(network)); + *net2 = *net; + } + srand(time(0)); + char boards[600][93]; + int count = 0; + //set_batch_network(net, 1); + //set_batch_network(net2, 1); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *one = calloc(19*19*3, sizeof(float)); + float *two = calloc(19*19*3, sizeof(float)); + int done = 0; + int player = 1; + int p1 = 0; + int p2 = 0; + int total = 0; + float temp = .1; + int mcts_iters = 500; + float cpuct = 5; + while(1){ + if (done){ + tree1 = move_mcts(tree1, -1); + tree2 = move_mcts(tree2, -1); + float score = score_game(board); + if((score > 0) == (total%2==0)) ++p1; + else ++p2; + ++total; + fprintf(stderr, "Total: %d, Player 1: %f, Player 2: %f\n", total, (float)p1/total, (float)p2/total); + sleep(1); + /* + int i = (score > 0)? 0 : 1; + int j; + for(; i < count; i += 2){ + for(j = 0; j < 93; ++j){ + printf("%c", boards[i][j]); + } + printf("\n"); + } + */ + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + player = 1; + done = 0; + count = 0; + fflush(stdout); + fflush(stderr); + } + //print_board(stderr, board, 1, 0); + //sleep(1); + + if ((total%2==0) == (player==1)){ + //mcts_iters = 4500; + cpuct = 5; + } else { + //mcts_iters = 500; + cpuct = 1; + } + network *use = ((total%2==0) == (player==1)) ? net : net2; + mcts_tree *t = ((total%2==0) == (player==1)) ? tree1 : tree2; + t = run_mcts(t, use, board, two, player, mcts_iters, cpuct, 0); + move m = pick_move(t, temp, player); + if(((total%2==0) == (player==1))) tree1 = t; + else tree2 = t; + + tree1 = move_mcts(tree1, m.row*19 + m.col); + tree2 = move_mcts(tree2, m.row*19 + m.col); + + if(m.row == 19){ + done = 1; + continue; + } + int row = m.row; + int col = m.col; + + float *swap = two; + two = one; + one = swap; + + if(player < 0) flip_board(board); + boards[count][0] = row; + boards[count][1] = col; + board_to_string(boards[count] + 2, board); + if(player < 0) flip_board(board); + ++count; + + move_go(board, player, row, col); + copy_cpu(19*19*3, board, 1, one, 1); + + player = -player; + } +} + +void run_go(int argc, char **argv) +{ + //boards_go(); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + int clear = find_arg(argc, argv, "-clear"); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *c2 = (argc > 5) ? argv[5] : 0; + char *w2 = (argc > 6) ? argv[6] : 0; + int multi = find_arg(argc, argv, "-multi"); + int anon = find_arg(argc, argv, "-anon"); + int iters = find_int_arg(argc, argv, "-iters", 500); + int resign = find_int_arg(argc, argv, "-resign", 175); + float cpuct = find_float_arg(argc, argv, "-cpuct", 5); + float temp = find_float_arg(argc, argv, "-temp", .1); + float time = find_float_arg(argc, argv, "-time", 0); + if(0==strcmp(argv[2], "train")) train_go(cfg, weights, c2, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) valid_go(cfg, weights, multi, c2); + else if(0==strcmp(argv[2], "self")) self_go(cfg, weights, c2, w2, multi); + else if(0==strcmp(argv[2], "test")) test_go(cfg, weights, multi); + else if(0==strcmp(argv[2], "engine")) engine_go(cfg, weights, iters, time, temp, cpuct, anon, resign); +} + + diff --git a/workloads/realworld/pinned/darknet/examples/instance-segmenter.c b/workloads/realworld/pinned/darknet/examples/instance-segmenter.c new file mode 100644 index 0000000000000000000000000000000000000000..664e71426d58e19f758bab198783eac178a3cdc4 --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/instance-segmenter.c @@ -0,0 +1,267 @@ +#include "darknet.h" +#include +#include + +void normalize_image2(image p); +void train_isegmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + image pred = get_network_image(net); + + image embed = pred; + embed.c = 3; + embed.data += embed.w*embed.h*80; + + int div = net->w/pred.w; + assert(pred.w * div == net->w); + assert(pred.h * div == net->h); + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.scale = div; + args.num_boxes = 90; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.classes = 80; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = ISEG_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(display){ + image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]); + image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]); + pred.c = 80; + image mask = mask_to_rgb(tr); + image prmask = mask_to_rgb(pred); + image ecopy = copy_image(embed); + normalize_image2(ecopy); + show_image(ecopy, "embed", 1); + free_image(ecopy); + + show_image(im, "input", 1); + show_image(prmask, "pred", 1); + show_image(mask, "truth", 100); + free_image(mask); + free_image(prmask); + } + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_isegmenter(char *datafile, char *cfg, char *weights, char *filename) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + show_image(sized, "orig", 1); + show_image(prmask, "pred", 0); + free_image(im); + free_image(sized); + free_image(prmask); + if (filename) break; + } +} + + +void demo_isegmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Classifier Demo\n"); + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = letterbox_image(in, net->w, net->h); + + network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + show_image(prmask, "Segmenter", 10); + + free_image(in_s); + free_image(in); + free_image(prmask); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_isegmenter(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_isegmenter(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_isegmenter(data, cfg, weights, gpus, ngpus, clear, display); + else if(0==strcmp(argv[2], "demo")) demo_isegmenter(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/pinned/darknet/examples/lsd.c b/workloads/realworld/pinned/darknet/examples/lsd.c new file mode 100644 index 0000000000000000000000000000000000000000..4ab944c884b9df422cd2b273b1faee128f2ab112 --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/lsd.c @@ -0,0 +1,1378 @@ +#include +#include "darknet.h" + +/* +void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg, char *aweight, int clear) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + //char *style_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *style_images = "/home/pjreddie/zelda.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + network fnet = load_network(fcfg, fweight, clear); + network gnet = load_network(gcfg, gweight, clear); + network anet = load_network(acfg, aweight, clear); + char *gbase = basecfg(gcfg); + char *abase = basecfg(acfg); + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + int i = *gnet->seen/imgs; + data train, tbuffer; + data style, sbuffer; + + + list *slist = get_paths(style_images); + char **spaths = (char **)list_to_array(slist); + + list *tlist = get_paths(train_images); + char **tpaths = (char **)list_to_array(tlist); + + load_args targs= get_base_args(gnet); + targs.paths = tpaths; + targs.n = imgs; + targs.m = tlist->size; + targs.d = &tbuffer; + targs.type = CLASSIFICATION_DATA; + targs.classes = 1; + char *ls[1] = {"zelda"}; + targs.labels = ls; + + load_args sargs = get_base_args(gnet); + sargs.paths = spaths; + sargs.n = imgs; + sargs.m = slist->size; + sargs.d = &sbuffer; + sargs.type = CLASSIFICATION_DATA; + sargs.classes = 1; + sargs.labels = ls; + + pthread_t tload_thread = load_data_in_thread(targs); + pthread_t sload_thread = load_data_in_thread(sargs); + clock_t time; + + float aloss_avg = -1; + float floss_avg = -1; + + fnet->train=1; + int x_size = fnet->inputs*fnet->batch; + int y_size = fnet->truths*fnet->batch; + float *X = calloc(x_size, sizeof(float)); + float *y = calloc(y_size, sizeof(float)); + + + int ax_size = anet->inputs*anet->batch; + int ay_size = anet->truths*anet->batch; + fill_gpu(ay_size, .9, anet->truth_gpu, 1); + anet->delta_gpu = cuda_make_array(0, ax_size); + anet->train = 1; + + int gx_size = gnet->inputs*gnet->batch; + int gy_size = gnet->truths*gnet->batch; + gstate.input = cuda_make_array(0, gx_size); + gstate.truth = 0; + gstate.delta = 0; + gstate.train = 1; + + while (get_current_batch(gnet) < gnet->max_batches) { + i += 1; + time=clock(); + pthread_join(tload_thread, 0); + pthread_join(sload_thread, 0); + train = tbuffer; + style = sbuffer; + tload_thread = load_data_in_thread(targs); + sload_thread = load_data_in_thread(sargs); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data generated = copy_data(train); + time=clock(); + + int j, k; + float floss = 0; + for(j = 0; j < fnet->subdivisions; ++j){ + layer imlayer = gnet->layers[gnet->n - 1]; + get_next_batch(train, fnet->batch, j*fnet->batch, X, y); + + cuda_push_array(fstate.input, X, x_size); + cuda_push_array(gstate.input, X, gx_size); + *gnet->seen += gnet->batch; + + forward_network_gpu(fnet, fstate); + float *feats = fnet->layers[fnet->n - 2].output_gpu; + copy_gpu(y_size, feats, 1, fstate.truth, 1); + + forward_network_gpu(gnet, gstate); + float *gen = gnet->layers[gnet->n-1].output_gpu; + copy_gpu(x_size, gen, 1, fstate.input, 1); + + fill_gpu(x_size, 0, fstate.delta, 1); + forward_network_gpu(fnet, fstate); + backward_network_gpu(fnet, fstate); + //HERE + + astate.input = gen; + fill_gpu(ax_size, 0, astate.delta, 1); + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + float *delta = imlayer.delta_gpu; + fill_gpu(x_size, 0, delta, 1); + scal_gpu(x_size, 100, astate.delta, 1); + scal_gpu(x_size, .001, fstate.delta, 1); + axpy_gpu(x_size, 1, fstate.delta, 1, delta, 1); + axpy_gpu(x_size, 1, astate.delta, 1, delta, 1); + + //fill_gpu(x_size, 0, delta, 1); + //cuda_push_array(delta, X, x_size); + //axpy_gpu(x_size, -1, imlayer.output_gpu, 1, delta, 1); + //printf("pix error: %f\n", cuda_mag_array(delta, x_size)); + printf("fea error: %f\n", cuda_mag_array(fstate.delta, x_size)); + printf("adv error: %f\n", cuda_mag_array(astate.delta, x_size)); + //axpy_gpu(x_size, 1, astate.delta, 1, delta, 1); + + backward_network_gpu(gnet, gstate); + + floss += get_network_cost(fnet) /(fnet->subdivisions*fnet->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1); + generated.y.vals[index][0] = .1; + style.y.vals[index][0] = .9; + } + } + +*/ +/* + image sim = float_to_image(anet->w, anet->h, anet->c, style.X.vals[j]); + show_image(sim, "style"); + cvWaitKey(0); + */ + /* + + harmless_update_network_gpu(anet); + + data merge = concat_data(style, generated); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(generated); + free_data(style); + if (aloss_avg < 0) aloss_avg = aloss; + if (floss_avg < 0) floss_avg = floss; + aloss_avg = aloss_avg*.9 + aloss*.1; + floss_avg = floss_avg*.9 + floss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, floss, aloss, floss_avg, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, gbase, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, gbase); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } +#endif +} +*/ + +/* +void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/train1.txt"; + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network net = load_network(cfg, weight, clear); + network anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = net->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.type = CLASSIFICATION_DATA; + args.classes = 1; + char *ls[1] = {"coco"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + network_state gstate = {0}; + gstate.index = 0; + gstate.net = net; + int x_size = get_network_input_size(net)*net->batch; + int y_size = x_size; + gstate.input = cuda_make_array(0, x_size); + gstate.truth = cuda_make_array(0, y_size); + gstate.delta = 0; + gstate.train = 1; + float *pixs = calloc(x_size, sizeof(float)); + float *graypixs = calloc(x_size, sizeof(float)); + float *y = calloc(y_size, sizeof(float)); + + network_state astate = {0}; + astate.index = 0; + astate.net = anet; + int ay_size = get_network_output_size(anet)*anet->batch; + astate.input = 0; + astate.truth = 0; + astate.delta = 0; + astate.train = 1; + + float *imerror = cuda_make_array(0, imlayer.outputs); + float *ones_gpu = cuda_make_array(0, ay_size); + fill_gpu(ay_size, .9, ones_gpu, 1); + + float aloss_avg = -1; + float gloss_avg = -1; + + //data generated = copy_data(train); + + while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gray = copy_data(train); + for(j = 0; j < imgs; ++j){ + image gim = float_to_image(net->w, net->h, net->c, gray.X.vals[j]); + grayscale_image_3c(gim); + train.y.vals[j][0] = .9; + + image yim = float_to_image(net->w, net->h, net->c, train.X.vals[j]); + //rgb_to_yuv(yim); + } + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, pixs, y); + get_next_batch(gray, net->batch, j*net->batch, graypixs, y); + cuda_push_array(gstate.input, graypixs, x_size); + cuda_push_array(gstate.truth, pixs, y_size); + */ + /* + image origi = float_to_image(net->w, net->h, 3, pixs); + image grayi = float_to_image(net->w, net->h, 3, graypixs); + show_image(grayi, "gray"); + show_image(origi, "orig"); + cvWaitKey(0); + */ + /* + *net->seen += net->batch; + forward_network_gpu(net, gstate); + + fill_gpu(imlayer.outputs, 0, imerror, 1); + astate.input = imlayer.output_gpu; + astate.delta = imerror; + astate.truth = ones_gpu; + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + scal_gpu(imlayer.outputs, .1, net->layers[net->n-1].delta_gpu, 1); + + backward_network_gpu(net, gstate); + + scal_gpu(imlayer.outputs, 1000, imerror, 1); + + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs)); + printf("features %f\n", cuda_mag_array(net->layers[net->n-1].delta_gpu, imlayer.outputs)); + + axpy_gpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1); + + gloss += get_network_cost(net) /(net->subdivisions*net->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1); + gray.y.vals[index][0] = .1; + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gray); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + update_network_gpu(anet); + free_data(merge); + free_data(train); + free_data(gray); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +#endif +} +*/ + +void slerp(float *start, float *end, float s, int n, float *out) +{ + float omega = acos(dot_cpu(n, start, 1, end, 1)); + float so = sin(omega); + fill_cpu(n, 0, out, 1); + axpy_cpu(n, sin((1-s)*omega)/so, start, 1, out, 1); + axpy_cpu(n, sin(s*omega)/so, end, 1, out, 1); + + float mag = mag_array(out, n); + scale_array(out, n, 1./mag); +} + +image random_unit_vector_image(int w, int h, int c) +{ + image im = make_image(w, h, c); + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + im.data[i] = rand_normal(); + } + float mag = mag_array(im.data, im.w*im.h*im.c); + scale_array(im.data, im.w*im.h*im.c, 1./mag); + return im; +} + +void inter_dcgan(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int i, imlayer = 0; + + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = i; + printf("%d\n", i); + break; + } + } + image start = random_unit_vector_image(net->w, net->h, net->c); + image end = random_unit_vector_image(net->w, net->h, net->c); + image im = make_image(net->w, net->h, net->c); + image orig = copy_image(start); + + int c = 0; + int count = 0; + int max_count = 15; + while(1){ + ++c; + + if(count == max_count){ + count = 0; + free_image(start); + start = end; + end = random_unit_vector_image(net->w, net->h, net->c); + if(c > 300){ + end = orig; + } + if(c>300 + max_count) return; + } + ++count; + + slerp(start.data, end.data, (float)count / max_count, im.w*im.h*im.c, im.data); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + normalize_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + //char buff[256]; + sprintf(buff, "out%05d", c); + save_image(out, "out"); + save_image(out, buff); + show_image(out, "out", 0); + } +} + +void test_dcgan(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int imlayer = 0; + + imlayer = net->n-1; + + while(1){ + image im = make_image(net->w, net->h, net->c); + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + im.data[i] = rand_normal(); + } + //float mag = mag_array(im.data, im.w*im.h*im.c); + //scale_array(im.data, im.w*im.h*im.c, 1./mag); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + normalize_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 0); + + free_image(im); + } +} + +void set_network_alpha_beta(network *net, float alpha, float beta) +{ + int i; + for(i = 0; i < net->n; ++i){ + if(net->layers[i].type == SHORTCUT){ + net->layers[i].alpha = alpha; + net->layers[i].beta = beta; + } + } +} + +void train_prog(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) +{ +#ifdef GPU + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *gnet = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = gnet->layers[gnet->n-1]; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + i = *gnet->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(anet); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + args.threads=16; + args.classes = 1; + char *ls[2] = {"imagenet", "zzzzzzzz"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + gnet->train = 1; + anet->train = 1; + + int x_size = gnet->inputs*gnet->batch; + int y_size = gnet->truths*gnet->batch; + float *imerror = cuda_make_array(0, y_size); + + float aloss_avg = -1; + + if (maxbatch == 0) maxbatch = gnet->max_batches; + while (get_current_batch(gnet) < maxbatch) { + { + int cb = get_current_batch(gnet); + float alpha = (float) cb / (maxbatch/2); + if(alpha > 1) alpha = 1; + float beta = 1 - alpha; + printf("%f %f\n", alpha, beta); + set_network_alpha_beta(gnet, alpha, beta); + set_network_alpha_beta(anet, beta, alpha); + } + + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gen = copy_data(train); + for (j = 0; j < imgs; ++j) { + train.y.vals[j][0] = 1; + gen.y.vals[j][0] = 0; + } + time=clock(); + + for (j = 0; j < gnet->subdivisions; ++j) { + get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); + int z; + for(z = 0; z < x_size; ++z){ + gnet->input[z] = rand_normal(); + } + /* + for(z = 0; z < gnet->batch; ++z){ + float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); + scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); + } + */ + *gnet->seen += gnet->batch; + forward_network(gnet); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); + copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1); + anet->delta_gpu = imerror; + forward_network(anet); + backward_network(anet); + + //float genaloss = *anet->cost / anet->batch; + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1); + + backward_network(gnet); + + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gen); + float aloss = train_network(anet, merge); + +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + save_image(im, "gen"); + save_image(im2, "train"); + } +#endif + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(gen); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + + printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(gnet, buff); +#endif +} + +void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) +{ +#ifdef GPU + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *gnet = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + //float orig_rate = anet->learning_rate; + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < gnet->n; ++i) { + if (gnet->layers[i].out_c == 3) { + imlayer = gnet->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + i = *gnet->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(anet); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + args.threads=16; + args.classes = 1; + char *ls[2] = {"imagenet", "zzzzzzzz"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + gnet->train = 1; + anet->train = 1; + + int x_size = gnet->inputs*gnet->batch; + int y_size = gnet->truths*gnet->batch; + float *imerror = cuda_make_array(0, y_size); + + //int ay_size = anet->truths*anet->batch; + + float aloss_avg = -1; + + //data generated = copy_data(train); + + if (maxbatch == 0) maxbatch = gnet->max_batches; + while (get_current_batch(gnet) < maxbatch) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + //translate_data_rows(train, -.5); + //scale_data_rows(train, 2); + + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gen = copy_data(train); + for (j = 0; j < imgs; ++j) { + train.y.vals[j][0] = 1; + gen.y.vals[j][0] = 0; + } + time=clock(); + + for(j = 0; j < gnet->subdivisions; ++j){ + get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); + int z; + for(z = 0; z < x_size; ++z){ + gnet->input[z] = rand_normal(); + } + for(z = 0; z < gnet->batch; ++z){ + float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); + scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); + } + /* + for(z = 0; z < 100; ++z){ + printf("%f, ", gnet->input[z]); + } + printf("\n"); + printf("input: %f %f\n", mean_array(gnet->input, x_size), variance_array(gnet->input, x_size)); + */ + + //cuda_push_array(gnet->input_gpu, gnet->input, x_size); + //cuda_push_array(gnet->truth_gpu, gnet->truth, y_size); + *gnet->seen += gnet->batch; + forward_network(gnet); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); + copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1); + anet->delta_gpu = imerror; + forward_network(anet); + backward_network(anet); + + //float genaloss = *anet->cost / anet->batch; + //printf("%f\n", genaloss); + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1); + + //printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch)); + //printf("features %f\n", cuda_mag_array(gnet->layers[gnet->n-1].delta_gpu, imlayer.outputs*imlayer.batch)); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1); + + backward_network(gnet); + + /* + for(k = 0; k < gnet->n; ++k){ + layer l = gnet->layers[k]; + cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); + printf("%d: %f %f\n", k, mean_array(l.output, l.outputs*l.batch), variance_array(l.output, l.outputs*l.batch)); + } + */ + + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gen); + //randomize_data(merge); + float aloss = train_network(anet, merge); + + //translate_image(im, 1); + //scale_image(im, .5); + //translate_image(im2, 1); + //scale_image(im2, .5); +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + save_image(im, "gen"); + save_image(im2, "train"); + } +#endif + + /* + if(aloss < .1){ + anet->learning_rate = 0; + } else if (aloss > .3){ + anet->learning_rate = orig_rate; + } + */ + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(gen); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + + printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(gnet, buff); +#endif +} + +void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/train1.txt"; + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *net = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = net->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(net); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + + args.type = CLASSIFICATION_DATA; + args.classes = 1; + char *ls[2] = {"imagenet"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + int x_size = net->inputs*net->batch; + //int y_size = x_size; + net->delta = 0; + net->train = 1; + float *pixs = calloc(x_size, sizeof(float)); + float *graypixs = calloc(x_size, sizeof(float)); + //float *y = calloc(y_size, sizeof(float)); + + //int ay_size = anet->outputs*anet->batch; + anet->delta = 0; + anet->train = 1; + + float *imerror = cuda_make_array(0, imlayer.outputs*imlayer.batch); + + float aloss_avg = -1; + float gloss_avg = -1; + + //data generated = copy_data(train); + + while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gray = copy_data(train); + for(j = 0; j < imgs; ++j){ + image gim = float_to_image(net->w, net->h, net->c, gray.X.vals[j]); + grayscale_image_3c(gim); + train.y.vals[j][0] = .95; + gray.y.vals[j][0] = .05; + } + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, pixs, 0); + get_next_batch(gray, net->batch, j*net->batch, graypixs, 0); + cuda_push_array(net->input_gpu, graypixs, net->inputs*net->batch); + cuda_push_array(net->truth_gpu, pixs, net->truths*net->batch); + /* + image origi = float_to_image(net->w, net->h, 3, pixs); + image grayi = float_to_image(net->w, net->h, 3, graypixs); + show_image(grayi, "gray"); + show_image(origi, "orig"); + cvWaitKey(0); + */ + *net->seen += net->batch; + forward_network_gpu(net); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + copy_gpu(anet->inputs*anet->batch, imlayer.output_gpu, 1, anet->input_gpu, 1); + fill_gpu(anet->inputs*anet->batch, .95, anet->truth_gpu, 1); + anet->delta_gpu = imerror; + forward_network_gpu(anet); + backward_network_gpu(anet); + + scal_gpu(imlayer.outputs*imlayer.batch, 1./100., net->layers[net->n-1].delta_gpu, 1); + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch)); + printf("features %f\n", cuda_mag_array(net->layers[net->n-1].delta_gpu, imlayer.outputs*imlayer.batch)); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, net->layers[net->n-1].delta_gpu, 1); + + backward_network_gpu(net); + + + gloss += *net->cost /(net->subdivisions*net->batch); + + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gray); + //randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gray.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + } +#endif + free_data(merge); + free_data(train); + free_data(gray); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +#endif +} + +/* + void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfile, int clear) + { +#ifdef GPU +char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; +char *backup_directory = "/home/pjreddie/backup/"; +srand(time(0)); +char *base = basecfg(cfgfile); +printf("%s\n", base); +network net = parse_network_cfg(cfgfile); +if(weightfile){ +load_weights(&net, weightfile); +} +if(clear) *net->seen = 0; + +char *abase = basecfg(acfgfile); +network anet = parse_network_cfg(acfgfile); +if(aweightfile){ +load_weights(&anet, aweightfile); +} +if(clear) *anet->seen = 0; + +int i, j, k; +layer imlayer = {0}; +for (i = 0; i < net->n; ++i) { +if (net->layers[i].out_c == 3) { +imlayer = net->layers[i]; +break; +} +} + +printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); +int imgs = net->batch*net->subdivisions; +i = *net->seen/imgs; +data train, buffer; + + +list *plist = get_paths(train_images); +//int N = plist->size; +char **paths = (char **)list_to_array(plist); + +load_args args = {0}; +args.w = net->w; +args.h = net->h; +args.paths = paths; +args.n = imgs; +args.m = plist->size; +args.d = &buffer; + +args.min = net->min_crop; +args.max = net->max_crop; +args.angle = net->angle; +args.aspect = net->aspect; +args.exposure = net->exposure; +args.saturation = net->saturation; +args.hue = net->hue; +args.size = net->w; +args.type = CLASSIFICATION_DATA; +args.classes = 1; +char *ls[1] = {"coco"}; +args.labels = ls; + +pthread_t load_thread = load_data_in_thread(args); +clock_t time; + +network_state gstate = {0}; +gstate.index = 0; +gstate.net = net; +int x_size = get_network_input_size(net)*net->batch; +int y_size = 1*net->batch; +gstate.input = cuda_make_array(0, x_size); +gstate.truth = 0; +gstate.delta = 0; +gstate.train = 1; +float *X = calloc(x_size, sizeof(float)); +float *y = calloc(y_size, sizeof(float)); + +network_state astate = {0}; +astate.index = 0; +astate.net = anet; +int ay_size = get_network_output_size(anet)*anet->batch; +astate.input = 0; +astate.truth = 0; +astate.delta = 0; +astate.train = 1; + +float *imerror = cuda_make_array(0, imlayer.outputs); +float *ones_gpu = cuda_make_array(0, ay_size); +fill_gpu(ay_size, 1, ones_gpu, 1); + +float aloss_avg = -1; +float gloss_avg = -1; + +//data generated = copy_data(train); + +while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data generated = copy_data(train); + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, X, y); + cuda_push_array(gstate.input, X, x_size); + *net->seen += net->batch; + forward_network_gpu(net, gstate); + + fill_gpu(imlayer.outputs, 0, imerror, 1); + astate.input = imlayer.output_gpu; + astate.delta = imerror; + astate.truth = ones_gpu; + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + scal_gpu(imlayer.outputs, 1, imerror, 1); + axpy_gpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1); + + backward_network_gpu(net, gstate); + + printf("features %f\n", cuda_mag_array(imlayer.delta_gpu, imlayer.outputs)); + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs)); + + gloss += get_network_cost(net) /(net->subdivisions*net->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1); + generated.y.vals[index][0] = 0; + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, generated); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + update_network_gpu(anet); + free_data(merge); + free_data(train); + free_data(generated); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } +} +char buff[256]; +sprintf(buff, "%s/%s_final.weights", backup_directory, base); +save_weights(net, buff); +#endif +} +*/ + +/* + void train_lsd(char *cfgfile, char *weightfile, int clear) + { + char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + if(clear) *net->seen = 0; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); +//int N = plist->size; +char **paths = (char **)list_to_array(plist); + +load_args args = {0}; +args.w = net->w; +args.h = net->h; +args.paths = paths; +args.n = imgs; +args.m = plist->size; +args.d = &buffer; + +args.min = net->min_crop; +args.max = net->max_crop; +args.angle = net->angle; +args.aspect = net->aspect; +args.exposure = net->exposure; +args.saturation = net->saturation; +args.hue = net->hue; +args.size = net->w; +args.type = CLASSIFICATION_DATA; +args.classes = 1; +char *ls[1] = {"coco"}; +args.labels = ls; + +pthread_t load_thread = load_data_in_thread(args); +clock_t time; +//while(i*imgs < N*120){ +while(get_current_batch(net) < net->max_batches){ +i += 1; +time=clock(); +pthread_join(load_thread, 0); +train = buffer; +load_thread = load_data_in_thread(args); + +printf("Loaded: %lf seconds\n", sec(clock()-time)); + +time=clock(); +float loss = train_network(net, train); +if (avg_loss < 0) avg_loss = loss; +avg_loss = avg_loss*.9 + loss*.1; + +printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); +if(i%1000==0){ +char buff[256]; +sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); +save_weights(net, buff); +} +if(i%100==0){ +char buff[256]; +sprintf(buff, "%s/%s.backup", backup_directory, base); +save_weights(net, buff); +} +free_data(train); +} +char buff[256]; +sprintf(buff, "%s/%s_final.weights", backup_directory, base); +save_weights(net, buff); +} +*/ + +void test_lsd(char *cfg, char *weights, char *filename, int gray) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int i, imlayer = 0; + + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = i; + printf("%d\n", i); + break; + } + } + + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + if(gray) grayscale_image_3c(crop); + + float *X = crop.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + constrain_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 1); + show_image(crop, "crop", 0); + + free_image(im); + free_image(resized); + free_image(crop); + if (filename) break; + } +} + + +void run_lsd(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + int batches = find_int_arg(argc, argv, "-b", 0); + char *file = find_char_arg(argc, argv, "-file", "/home/pjreddie/data/imagenet/imagenet1k.train.list"); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + char *acfg = argv[5]; + char *aweights = (argc > 6) ? argv[6] : 0; + //if(0==strcmp(argv[2], "train")) train_lsd(cfg, weights, clear); + //else if(0==strcmp(argv[2], "train2")) train_lsd2(cfg, weights, acfg, aweights, clear); + //else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear); + //else if(0==strcmp(argv[2], "train3")) train_lsd3(argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], clear); + if(0==strcmp(argv[2], "traingan")) train_dcgan(cfg, weights, acfg, aweights, clear, display, file, batches); + else if(0==strcmp(argv[2], "trainprog")) train_prog(cfg, weights, acfg, aweights, clear, display, file, batches); + else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear, display); + else if(0==strcmp(argv[2], "gan")) test_dcgan(cfg, weights); + else if(0==strcmp(argv[2], "inter")) inter_dcgan(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_lsd(cfg, weights, filename, 0); + else if(0==strcmp(argv[2], "color")) test_lsd(cfg, weights, filename, 1); + /* + else if(0==strcmp(argv[2], "valid")) validate_lsd(cfg, weights); + */ +} diff --git a/workloads/realworld/pinned/darknet/examples/nightmare.c b/workloads/realworld/pinned/darknet/examples/nightmare.c new file mode 100644 index 0000000000000000000000000000000000000000..2978eb61193e96325441c5b830a786eccb203569 --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/nightmare.c @@ -0,0 +1,414 @@ +#include "darknet.h" + +#include + +// ./darknet nightmare cfg/extractor.recon.cfg ~/trained/yolo-coco.conv frame6.png -reconstruct -iters 500 -i 3 -lambda .1 -rate .01 -smooth 2 + +float abs_mean(float *x, int n) +{ + int i; + float sum = 0; + for (i = 0; i < n; ++i){ + sum += fabs(x[i]); + } + return sum/n; +} + +void calculate_loss(float *output, float *delta, int n, float thresh) +{ + int i; + float mean = mean_array(output, n); + float var = variance_array(output, n); + for(i = 0; i < n; ++i){ + if(delta[i] > mean + thresh*sqrt(var)) delta[i] = output[i]; + else delta[i] = 0; + } +} + +void optimize_picture(network *net, image orig, int max_layer, float scale, float rate, float thresh, int norm) +{ + //scale_image(orig, 2); + //translate_image(orig, -1); + net->n = max_layer + 1; + + int dx = rand()%16 - 8; + int dy = rand()%16 - 8; + int flip = rand()%2; + + image crop = crop_image(orig, dx, dy, orig.w, orig.h); + image im = resize_image(crop, (int)(orig.w * scale), (int)(orig.h * scale)); + if(flip) flip_image(im); + + resize_network(net, im.w, im.h); + layer last = net->layers[net->n-1]; + //net->layers[net->n - 1].activation = LINEAR; + + image delta = make_image(im.w, im.h, im.c); + +#ifdef GPU + net->delta_gpu = cuda_make_array(delta.data, im.w*im.h*im.c); + copy_cpu(net->inputs, im.data, 1, net->input, 1); + + forward_network_gpu(net); + copy_gpu(last.outputs, last.output_gpu, 1, last.delta_gpu, 1); + + cuda_pull_array(last.delta_gpu, last.delta, last.outputs); + calculate_loss(last.delta, last.delta, last.outputs, thresh); + cuda_push_array(last.delta_gpu, last.delta, last.outputs); + + backward_network_gpu(net); + + cuda_pull_array(net->delta_gpu, delta.data, im.w*im.h*im.c); + cuda_free(net->delta_gpu); + net->delta_gpu = 0; +#else + printf("\nnet: %d %d %d im: %d %d %d\n", net->w, net->h, net->inputs, im.w, im.h, im.c); + copy_cpu(net->inputs, im.data, 1, net->input, 1); + net->delta = delta.data; + forward_network(net); + copy_cpu(last.outputs, last.output, 1, last.delta, 1); + calculate_loss(last.output, last.delta, last.outputs, thresh); + backward_network(net); +#endif + + if(flip) flip_image(delta); + //normalize_array(delta.data, delta.w*delta.h*delta.c); + image resized = resize_image(delta, orig.w, orig.h); + image out = crop_image(resized, -dx, -dy, orig.w, orig.h); + + /* + image g = grayscale_image(out); + free_image(out); + out = g; + */ + + //rate = rate / abs_mean(out.data, out.w*out.h*out.c); + image gray = make_image(out.w, out.h, out.c); + fill_image(gray, .5); + axpy_cpu(orig.w*orig.h*orig.c, -1, orig.data, 1, gray.data, 1); + axpy_cpu(orig.w*orig.h*orig.c, .1, gray.data, 1, out.data, 1); + + if(norm) normalize_array(out.data, out.w*out.h*out.c); + axpy_cpu(orig.w*orig.h*orig.c, rate, out.data, 1, orig.data, 1); + + /* + normalize_array(orig.data, orig.w*orig.h*orig.c); + scale_image(orig, sqrt(var)); + translate_image(orig, mean); + */ + + //translate_image(orig, 1); + //scale_image(orig, .5); + //normalize_image(orig); + + constrain_image(orig); + + free_image(crop); + free_image(im); + free_image(delta); + free_image(resized); + free_image(out); + +} + +void smooth(image recon, image update, float lambda, int num) +{ + int i, j, k; + int ii, jj; + for(k = 0; k < recon.c; ++k){ + for(j = 0; j < recon.h; ++j){ + for(i = 0; i < recon.w; ++i){ + int out_index = i + recon.w*(j + recon.h*k); + for(jj = j-num; jj <= j + num && jj < recon.h; ++jj){ + if (jj < 0) continue; + for(ii = i-num; ii <= i + num && ii < recon.w; ++ii){ + if (ii < 0) continue; + int in_index = ii + recon.w*(jj + recon.h*k); + update.data[out_index] += lambda * (recon.data[in_index] - recon.data[out_index]); + } + } + } + } + } +} + +void reconstruct_picture(network *net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters) +{ + int iter = 0; + for (iter = 0; iter < iters; ++iter) { + image delta = make_image(recon.w, recon.h, recon.c); + +#ifdef GPU + layer l = get_network_output_layer(net); + cuda_push_array(net->input_gpu, recon.data, recon.w*recon.h*recon.c); + //cuda_push_array(net->truth_gpu, features, net->truths); + net->delta_gpu = cuda_make_array(delta.data, delta.w*delta.h*delta.c); + + forward_network_gpu(net); + cuda_push_array(l.delta_gpu, features, l.outputs); + axpy_gpu(l.outputs, -1, l.output_gpu, 1, l.delta_gpu, 1); + backward_network_gpu(net); + + cuda_pull_array(net->delta_gpu, delta.data, delta.w*delta.h*delta.c); + + cuda_free(net->delta_gpu); +#else + net->input = recon.data; + net->delta = delta.data; + net->truth = features; + + forward_network(net); + backward_network(net); +#endif + + //normalize_array(delta.data, delta.w*delta.h*delta.c); + axpy_cpu(recon.w*recon.h*recon.c, 1, delta.data, 1, update.data, 1); + //smooth(recon, update, lambda, smooth_size); + + axpy_cpu(recon.w*recon.h*recon.c, rate, update.data, 1, recon.data, 1); + scal_cpu(recon.w*recon.h*recon.c, momentum, update.data, 1); + + float mag = mag_array(delta.data, recon.w*recon.h*recon.c); + printf("mag: %f\n", mag); + //scal_cpu(recon.w*recon.h*recon.c, 600/mag, recon.data, 1); + + constrain_image(recon); + free_image(delta); + } +} + +/* +void run_lsd(int argc, char **argv) +{ + srand(0); + if(argc < 3){ + fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [options! (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[2]; + char *weights = argv[3]; + char *input = argv[4]; + + int norm = find_int_arg(argc, argv, "-norm", 1); + int rounds = find_int_arg(argc, argv, "-rounds", 1); + int iters = find_int_arg(argc, argv, "-iters", 10); + float rate = find_float_arg(argc, argv, "-rate", .04); + float momentum = find_float_arg(argc, argv, "-momentum", .9); + float lambda = find_float_arg(argc, argv, "-lambda", .01); + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + int reconstruct = find_arg(argc, argv, "-reconstruct"); + int smooth_size = find_int_arg(argc, argv, "-smooth", 1); + + network net = parse_network_cfg(cfg); + load_weights(&net, weights); + char *cfgbase = basecfg(cfg); + char *imbase = basecfg(input); + + set_batch_network(&net, 1); + image im = load_image_color(input, 0, 0); + + float *features = 0; + image update; + if (reconstruct){ + im = letterbox_image(im, net->w, net->h); + + int zz = 0; + network_predict(net, im.data); + image out_im = get_network_image(net); + image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz); + //flip_image(crop); + image f_im = resize_image(crop, out_im.w, out_im.h); + free_image(crop); + printf("%d features\n", out_im.w*out_im.h*out_im.c); + + + im = resize_image(im, im.w, im.h); + f_im = resize_image(f_im, f_im.w, f_im.h); + features = f_im.data; + + int i; + for(i = 0; i < 14*14*512; ++i){ + features[i] += rand_uniform(-.19, .19); + } + + free_image(im); + im = make_random_image(im.w, im.h, im.c); + update = make_image(im.w, im.h, im.c); + + } + + int e; + int n; + for(e = 0; e < rounds; ++e){ + fprintf(stderr, "Iteration: "); + fflush(stderr); + for(n = 0; n < iters; ++n){ + fprintf(stderr, "%d, ", n); + fflush(stderr); + if(reconstruct){ + reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1); + //if ((n+1)%30 == 0) rate *= .5; + show_image(im, "reconstruction"); +#ifdef OPENCV + cvWaitKey(10); +#endif + }else{ + int layer = max_layer + rand()%range - range/2; + int octave = rand()%octaves; + optimize_picture(&net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); + } + } + fprintf(stderr, "done\n"); + char buff[256]; + if (prefix){ + sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e); + }else{ + sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e); + } + printf("%d %s\n", e, buff); + save_image(im, buff); + //show_image(im, buff); + //cvWaitKey(0); + + if(rotate){ + image rot = rotate_image(im, rotate); + free_image(im); + im = rot; + } + image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom); + image resized = resize_image(crop, im.w, im.h); + free_image(im); + free_image(crop); + im = resized; + } +} +*/ + +void run_nightmare(int argc, char **argv) +{ + srand(0); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [layer] [options! (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[2]; + char *weights = argv[3]; + char *input = argv[4]; + int max_layer = atoi(argv[5]); + + int range = find_int_arg(argc, argv, "-range", 1); + int norm = find_int_arg(argc, argv, "-norm", 1); + int rounds = find_int_arg(argc, argv, "-rounds", 1); + int iters = find_int_arg(argc, argv, "-iters", 10); + int octaves = find_int_arg(argc, argv, "-octaves", 4); + float zoom = find_float_arg(argc, argv, "-zoom", 1.); + float rate = find_float_arg(argc, argv, "-rate", .04); + float thresh = find_float_arg(argc, argv, "-thresh", 1.); + float rotate = find_float_arg(argc, argv, "-rotate", 0); + float momentum = find_float_arg(argc, argv, "-momentum", .9); + float lambda = find_float_arg(argc, argv, "-lambda", .01); + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + int reconstruct = find_arg(argc, argv, "-reconstruct"); + int smooth_size = find_int_arg(argc, argv, "-smooth", 1); + + network *net = load_network(cfg, weights, 0); + char *cfgbase = basecfg(cfg); + char *imbase = basecfg(input); + + set_batch_network(net, 1); + image im = load_image_color(input, 0, 0); + if(0){ + float scale = 1; + if(im.w > 512 || im.h > 512){ + if(im.w > im.h) scale = 512.0/im.w; + else scale = 512.0/im.h; + } + image resized = resize_image(im, scale*im.w, scale*im.h); + free_image(im); + im = resized; + } + //im = letterbox_image(im, net->w, net->h); + + float *features = 0; + image update; + if (reconstruct){ + net->n = max_layer; + im = letterbox_image(im, net->w, net->h); + //resize_network(&net, im.w, im.h); + + network_predict(net, im.data); + if(net->layers[net->n-1].type == REGION){ + printf("region!\n"); + zero_objectness(net->layers[net->n-1]); + } + image out_im = copy_image(get_network_image(net)); + /* + image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz); + //flip_image(crop); + image f_im = resize_image(crop, out_im.w, out_im.h); + free_image(crop); + */ + printf("%d features\n", out_im.w*out_im.h*out_im.c); + + features = out_im.data; + + /* + int i; + for(i = 0; i < 14*14*512; ++i){ + //features[i] += rand_uniform(-.19, .19); + } + free_image(im); + im = make_random_image(im.w, im.h, im.c); + */ + update = make_image(im.w, im.h, im.c); + } + + int e; + int n; + for(e = 0; e < rounds; ++e){ + fprintf(stderr, "Iteration: "); + fflush(stderr); + for(n = 0; n < iters; ++n){ + fprintf(stderr, "%d, ", n); + fflush(stderr); + if(reconstruct){ + reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1); + //if ((n+1)%30 == 0) rate *= .5; + show_image(im, "reconstruction", 10); + }else{ + int layer = max_layer + rand()%range - range/2; + int octave = rand()%octaves; + optimize_picture(net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); + } + } + fprintf(stderr, "done\n"); + if(0){ + image g = grayscale_image(im); + free_image(im); + im = g; + } + char buff[256]; + if (prefix){ + sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e); + }else{ + sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e); + } + printf("%d %s\n", e, buff); + save_image(im, buff); + //show_image(im, buff, 0); + + if(rotate){ + image rot = rotate_image(im, rotate); + free_image(im); + im = rot; + } + image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom); + image resized = resize_image(crop, im.w, im.h); + free_image(im); + free_image(crop); + im = resized; + } +} + diff --git a/workloads/realworld/pinned/darknet/examples/regressor.c b/workloads/realworld/pinned/darknet/examples/regressor.c new file mode 100644 index 0000000000000000000000000000000000000000..20cec0fad9f0a2ccb2c46a30d0a01793119b43ce --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/regressor.c @@ -0,0 +1,240 @@ +#include "darknet.h" +#include +#include + +void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + int classes = option_find_int(options, "classes", 1); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + clock_t time; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.classes = classes; + + args.min = net->min_ratio*net->w; + args.max = net->max_ratio*net->w; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = REGRESSION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_regressor(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + free_image(im); + free_image(sized); + if (filename) break; + } +} + + +void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Regressor Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + list *options = read_data_cfg(datacfg); + int classes = option_find_int(options, "classes", 1); + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + void * cap = open_video_stream(filename, cam_index, 0,0,0); + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image crop = center_crop_image(in, net->w, net->h); + grayscale_image_3c(crop); + + float *predictions = network_predict(net, crop.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + int i; + for(i = 0; i < classes; ++i){ + printf("%s: %f\n", names[i], predictions[i]); + } + + show_image(crop, "Regressor", 10); + free_image(in); + free_image(crop); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_regressor(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_regressor(data, cfg, weights); + else if(0==strcmp(argv[2], "train")) train_regressor(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "demo")) demo_regressor(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/pinned/darknet/examples/rnn.c b/workloads/realworld/pinned/darknet/examples/rnn.c new file mode 100644 index 0000000000000000000000000000000000000000..5d49eaae7070eb1dc9a87b5627b7ec6f7cb09e46 --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/rnn.c @@ -0,0 +1,542 @@ +#include "darknet.h" + +#include + +typedef struct { + float *x; + float *y; +} float_pair; + +unsigned char **load_files(char *filename, int *n) +{ + list *paths = get_paths(filename); + *n = paths->size; + unsigned char **contents = calloc(*n, sizeof(char *)); + int i; + node *x = paths->front; + for(i = 0; i < *n; ++i){ + contents[i] = read_file((char *)x->val); + x = x->next; + } + return contents; +} + +int *read_tokenized_data(char *filename, size_t *read) +{ + size_t size = 512; + size_t count = 0; + FILE *fp = fopen(filename, "r"); + int *d = calloc(size, sizeof(int)); + int n, one; + one = fscanf(fp, "%d", &n); + while(one == 1){ + ++count; + if(count > size){ + size = size*2; + d = realloc(d, size*sizeof(int)); + } + d[count-1] = n; + one = fscanf(fp, "%d", &n); + } + fclose(fp); + d = realloc(d, count*sizeof(int)); + *read = count; + return d; +} + +char **read_tokens(char *filename, size_t *read) +{ + size_t size = 512; + size_t count = 0; + FILE *fp = fopen(filename, "r"); + char **d = calloc(size, sizeof(char *)); + char *line; + while((line=fgetl(fp)) != 0){ + ++count; + if(count > size){ + size = size*2; + d = realloc(d, size*sizeof(char *)); + } + if(0==strcmp(line, "")) line = "\n"; + d[count-1] = line; + } + fclose(fp); + d = realloc(d, count*sizeof(char *)); + *read = count; + return d; +} + + +float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size_t len, int batch, int steps) +{ + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + int i,j; + for(i = 0; i < batch; ++i){ + for(j = 0; j < steps; ++j){ + int curr = tokens[(offsets[i])%len]; + int next = tokens[(offsets[i] + 1)%len]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + offsets[i] = (offsets[i] + 1) % len; + + if(curr >= characters || curr < 0 || next >= characters || next < 0){ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +float_pair get_seq2seq_data(char **source, char **dest, int n, int characters, size_t len, int batch, int steps) +{ + int i,j; + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + for(i = 0; i < batch; ++i){ + int index = rand()%n; + //int slen = strlen(source[index]); + //int dlen = strlen(dest[index]); + for(j = 0; j < steps; ++j){ + unsigned char curr = source[index][j]; + unsigned char next = dest[index][j]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + if(curr > 255 || curr <= 0 || next > 255 || next <= 0){ + /*text[(index+j+2)%len] = 0; + printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]); + printf("%s", text+index); + */ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +float_pair get_rnn_data(unsigned char *text, size_t *offsets, int characters, size_t len, int batch, int steps) +{ + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + int i,j; + for(i = 0; i < batch; ++i){ + for(j = 0; j < steps; ++j){ + unsigned char curr = text[(offsets[i])%len]; + unsigned char next = text[(offsets[i] + 1)%len]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + offsets[i] = (offsets[i] + 1) % len; + + if(curr > 255 || curr <= 0 || next > 255 || next <= 0){ + /*text[(index+j+2)%len] = 0; + printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]); + printf("%s", text+index); + */ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear, int tokenized) +{ + srand(time(0)); + unsigned char *text = 0; + int *tokens = 0; + size_t size; + if(tokenized){ + tokens = read_tokenized_data(filename, &size); + } else { + text = read_file(filename); + size = strlen((const char*)text); + } + + char *backup_directory = "/home/pjreddie/backup/"; + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, clear); + + int inputs = net->inputs; + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g, Inputs: %d %d %d\n", net->learning_rate, net->momentum, net->decay, inputs, net->batch, net->time_steps); + int batch = net->batch; + int steps = net->time_steps; + if(clear) *net->seen = 0; + int i = (*net->seen)/net->batch; + + int streams = batch/steps; + size_t *offsets = calloc(streams, sizeof(size_t)); + int j; + for(j = 0; j < streams; ++j){ + offsets[j] = rand_size_t()%size; + } + + clock_t time; + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + float_pair p; + if(tokenized){ + p = get_rnn_token_data(tokens, offsets, inputs, size, streams, steps); + }else{ + p = get_rnn_data(text, offsets, inputs, size, streams, steps); + } + + copy_cpu(net->inputs*net->batch, p.x, 1, net->input, 1); + copy_cpu(net->truths*net->batch, p.y, 1, net->truth, 1); + float loss = train_network_datum(net) / (batch); + free(p.x); + free(p.y); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + size_t chars = get_current_batch(net)*batch; + fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds, %f epochs\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), (float) chars/size); + + for(j = 0; j < streams; ++j){ + //printf("%d\n", j); + if(rand()%64 == 0){ + //fprintf(stderr, "Reset\n"); + offsets[j] = rand_size_t()%size; + reset_network_state(net, j); + } + } + + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void print_symbol(int n, char **tokens){ + if(tokens){ + printf("%s ", tokens[n]); + } else { + printf("%c", n); + } +} + +void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + + /* + fill_cpu(inputs, 0, input, 1); + for(i = 0; i < 10; ++i){ + network_predict(net, input); + } + fill_cpu(inputs, 0, input, 1); + */ + + for(i = 0; i < len-1; ++i){ + c = seed[i]; + input[c] = 1; + network_predict(net, input); + input[c] = 0; + print_symbol(c, tokens); + } + if(len) c = seed[len-1]; + print_symbol(c, tokens); + for(i = 0; i < num; ++i){ + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + for(j = 32; j < 127; ++j){ + //printf("%d %c %f\n",j, j, out[j]); + } + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + c = sample_array(out, inputs); + print_symbol(c, tokens); + } + printf("\n"); +} + +void test_tactic_rnn_multi(char *cfgfile, char *weightfile, int num, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + float *input = calloc(inputs, sizeof(float)); + float *out = 0; + + while(1){ + reset_network_state(net, 0); + while((c = getc(stdin)) != EOF && c != 0){ + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + for(i = 0; i < num; ++i){ + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + int next = sample_array(out, inputs); + if(c == '.' && next == '\n') break; + c = next; + print_symbol(c, tokens); + + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + printf("\n"); + } +} + +void test_tactic_rnn(char *cfgfile, char *weightfile, int num, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + float *input = calloc(inputs, sizeof(float)); + float *out = 0; + + while((c = getc(stdin)) != EOF){ + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + for(i = 0; i < num; ++i){ + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + int next = sample_array(out, inputs); + if(c == '.' && next == '\n') break; + c = next; + print_symbol(c, tokens); + + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + printf("\n"); +} + +void valid_tactic_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int count = 0; + int words = 1; + int c; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + for(i = 0; i < len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + float sum = 0; + c = getc(stdin); + float log2 = log(2); + int in = 0; + while(c != EOF){ + int next = getc(stdin); + if(next == EOF) break; + if(next < 0 || next >= 255) error("Out of range character"); + + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + + if(c == '.' && next == '\n') in = 0; + if(!in) { + if(c == '>' && next == '>'){ + in = 1; + ++words; + } + c = next; + continue; + } + ++count; + sum += log(out[next])/log2; + c = next; + printf("%d %d Perplexity: %4.4f Word Perplexity: %4.4f\n", count, words, pow(2, -sum/count), pow(2, -sum/words)); + } +} + +void valid_char_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int count = 0; + int words = 1; + int c; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + for(i = 0; i < len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + float sum = 0; + c = getc(stdin); + float log2 = log(2); + while(c != EOF){ + int next = getc(stdin); + if(next == EOF) break; + if(next < 0 || next >= 255) error("Out of range character"); + ++count; + if(next == ' ' || next == '\n' || next == '\t') ++words; + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + sum += log(out[next])/log2; + c = next; + printf("%d BPC: %4.4f Perplexity: %4.4f Word Perplexity: %4.4f\n", count, -sum/count, pow(2, -sum/count), pow(2, -sum/words)); + } +} + +void vec_char_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int c; + int seed_len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + char *line; + while((line=fgetl(stdin)) != 0){ + reset_network_state(net, 0); + for(i = 0; i < seed_len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + strip(line); + int str_len = strlen(line); + for(i = 0; i < str_len; ++i){ + c = line[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + c = ' '; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + + layer l = net->layers[0]; + #ifdef GPU + cuda_pull_array(l.output_gpu, l.output, l.outputs); + #endif + printf("%s", line); + for(i = 0; i < l.outputs; ++i){ + printf(",%g", l.output[i]); + } + printf("\n"); + } +} + +void run_char_rnn(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + char *filename = find_char_arg(argc, argv, "-file", "data/shakespeare.txt"); + char *seed = find_char_arg(argc, argv, "-seed", "\n\n"); + int len = find_int_arg(argc, argv, "-len", 1000); + float temp = find_float_arg(argc, argv, "-temp", .7); + int rseed = find_int_arg(argc, argv, "-srand", time(0)); + int clear = find_arg(argc, argv, "-clear"); + int tokenized = find_arg(argc, argv, "-tokenized"); + char *tokens = find_char_arg(argc, argv, "-tokens", 0); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_char_rnn(cfg, weights, filename, clear, tokenized); + else if(0==strcmp(argv[2], "valid")) valid_char_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "validtactic")) valid_tactic_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "vec")) vec_char_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "generate")) test_char_rnn(cfg, weights, len, seed, temp, rseed, tokens); + else if(0==strcmp(argv[2], "generatetactic")) test_tactic_rnn(cfg, weights, len, temp, rseed, tokens); +} diff --git a/workloads/realworld/pinned/darknet/examples/rnn_vid.c b/workloads/realworld/pinned/darknet/examples/rnn_vid.c new file mode 100644 index 0000000000000000000000000000000000000000..e88792352311438d0fcb25bb7befd0677f70bae5 --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/rnn_vid.c @@ -0,0 +1,208 @@ +#include "darknet.h" + +#ifdef OPENCV +image get_image_from_stream(CvCapture *cap); +image ipl_to_image(IplImage* src); + +void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters); + + +typedef struct { + float *x; + float *y; +} float_pair; + +float_pair get_rnn_vid_data(network net, char **files, int n, int batch, int steps) +{ + int b; + assert(net.batch == steps + 1); + image out_im = get_network_image(net); + int output_size = out_im.w*out_im.h*out_im.c; + printf("%d %d %d\n", out_im.w, out_im.h, out_im.c); + float *feats = calloc(net.batch*batch*output_size, sizeof(float)); + for(b = 0; b < batch; ++b){ + int input_size = net.w*net.h*net.c; + float *input = calloc(input_size*net.batch, sizeof(float)); + char *filename = files[rand()%n]; + CvCapture *cap = cvCaptureFromFile(filename); + int frames = cvGetCaptureProperty(cap, CV_CAP_PROP_FRAME_COUNT); + int index = rand() % (frames - steps - 2); + if (frames < (steps + 4)){ + --b; + free(input); + continue; + } + + printf("frames: %d, index: %d\n", frames, index); + cvSetCaptureProperty(cap, CV_CAP_PROP_POS_FRAMES, index); + + int i; + for(i = 0; i < net.batch; ++i){ + IplImage* src = cvQueryFrame(cap); + image im = ipl_to_image(src); + rgbgr_image(im); + image re = resize_image(im, net.w, net.h); + //show_image(re, "loaded"); + //cvWaitKey(10); + memcpy(input + i*input_size, re.data, input_size*sizeof(float)); + free_image(im); + free_image(re); + } + float *output = network_predict(net, input); + + free(input); + + for(i = 0; i < net.batch; ++i){ + memcpy(feats + (b + i*batch)*output_size, output + i*output_size, output_size*sizeof(float)); + } + + cvReleaseCapture(&cap); + } + + //printf("%d %d %d\n", out_im.w, out_im.h, out_im.c); + float_pair p = {0}; + p.x = feats; + p.y = feats + output_size*batch; //+ out_im.w*out_im.h*out_im.c; + + return p; +} + + +void train_vid_rnn(char *cfgfile, char *weightfile) +{ + char *train_videos = "data/vid/train.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + + list *plist = get_paths(train_videos); + int N = plist->size; + char **paths = (char **)list_to_array(plist); + clock_t time; + int steps = net.time_steps; + int batch = net.batch / net.time_steps; + + network extractor = parse_network_cfg("cfg/extractor.cfg"); + load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv"); + + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + float_pair p = get_rnn_vid_data(extractor, paths, N, batch, steps); + + copy_cpu(net.inputs*net.batch, p.x, 1, net.input, 1); + copy_cpu(net.truths*net.batch, p.y, 1, net.truth, 1); + float loss = train_network_datum(net) / (net.batch); + + + free(p.x); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time)); + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%10==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + + +image save_reconstruction(network net, image *init, float *feat, char *name, int i) +{ + image recon; + if (init) { + recon = copy_image(*init); + } else { + recon = make_random_image(net.w, net.h, 3); + } + + image update = make_image(net.w, net.h, 3); + reconstruct_picture(net, feat, recon, update, .01, .9, .1, 2, 50); + char buff[256]; + sprintf(buff, "%s%d", name, i); + save_image(recon, buff); + free_image(update); + return recon; +} + +void generate_vid_rnn(char *cfgfile, char *weightfile) +{ + network extractor = parse_network_cfg("cfg/extractor.recon.cfg"); + load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv"); + + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&extractor, 1); + set_batch_network(&net, 1); + + int i; + CvCapture *cap = cvCaptureFromFile("/extra/vid/ILSVRC2015/Data/VID/snippets/val/ILSVRC2015_val_00007030.mp4"); + float *feat; + float *next; + image last; + for(i = 0; i < 25; ++i){ + image im = get_image_from_stream(cap); + image re = resize_image(im, extractor.w, extractor.h); + feat = network_predict(extractor, re.data); + if(i > 0){ + printf("%f %f\n", mean_array(feat, 14*14*512), variance_array(feat, 14*14*512)); + printf("%f %f\n", mean_array(next, 14*14*512), variance_array(next, 14*14*512)); + printf("%f\n", mse_array(feat, 14*14*512)); + axpy_cpu(14*14*512, -1, feat, 1, next, 1); + printf("%f\n", mse_array(next, 14*14*512)); + } + next = network_predict(net, feat); + + free_image(im); + + free_image(save_reconstruction(extractor, 0, feat, "feat", i)); + free_image(save_reconstruction(extractor, 0, next, "next", i)); + if (i==24) last = copy_image(re); + free_image(re); + } + for(i = 0; i < 30; ++i){ + next = network_predict(net, next); + image new = save_reconstruction(extractor, &last, next, "new", i); + free_image(last); + last = new; + } +} + +void run_vid_rnn(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + //char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_vid_rnn(cfg, weights); + else if(0==strcmp(argv[2], "generate")) generate_vid_rnn(cfg, weights); +} +#else +void run_vid_rnn(int argc, char **argv){} +#endif + diff --git a/workloads/realworld/pinned/darknet/examples/segmenter.c b/workloads/realworld/pinned/darknet/examples/segmenter.c new file mode 100644 index 0000000000000000000000000000000000000000..2e7cea0b730754b74a125bcd865aa12f0bdd3be0 --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/segmenter.c @@ -0,0 +1,255 @@ +#include "darknet.h" +#include +#include + +void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + image pred = get_network_image(net); + + int div = net->w/pred.w; + assert(pred.w * div == net->w); + assert(pred.h * div == net->h); + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.scale = div; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.classes = 80; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = SEGMENTATION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(display){ + image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]); + image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]); + image mask = mask_to_rgb(tr); + image prmask = mask_to_rgb(pred); + show_image(im, "input", 1); + show_image(prmask, "pred", 1); + show_image(mask, "truth", 100); + free_image(mask); + free_image(prmask); + } + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_segmenter(char *datafile, char *cfg, char *weights, char *filename) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + show_image(sized, "orig", 1); + show_image(prmask, "pred", 0); + free_image(im); + free_image(sized); + free_image(prmask); + if (filename) break; + } +} + + +void demo_segmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Classifier Demo\n"); + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = letterbox_image(in, net->w, net->h); + + network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + show_image(prmask, "Segmenter", 10); + + free_image(in_s); + free_image(in); + free_image(prmask); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_segmenter(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_segmenter(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_segmenter(data, cfg, weights, gpus, ngpus, clear, display); + else if(0==strcmp(argv[2], "demo")) demo_segmenter(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/pinned/darknet/examples/super.c b/workloads/realworld/pinned/darknet/examples/super.c new file mode 100644 index 0000000000000000000000000000000000000000..d34406b1f2ce70cd36eecb8298bf1ca3e736f01b --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/super.c @@ -0,0 +1,120 @@ +#include "darknet.h" + +void train_super(char *cfgfile, char *weightfile, int clear) +{ + char *train_images = "/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, clear); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.scale = 4; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = SUPER_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void test_super(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + resize_network(net, im.w, im.h); + printf("%d %d\n", im.w, im.h); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image(net); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 0); + + free_image(im); + if (filename) break; + } +} + + +void run_super(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + int clear = find_arg(argc, argv, "-clear"); + if(0==strcmp(argv[2], "train")) train_super(cfg, weights, clear); + else if(0==strcmp(argv[2], "test")) test_super(cfg, weights, filename); + /* + else if(0==strcmp(argv[2], "valid")) validate_super(cfg, weights); + */ +} diff --git a/workloads/realworld/pinned/darknet/examples/swag.c b/workloads/realworld/pinned/darknet/examples/swag.c new file mode 100644 index 0000000000000000000000000000000000000000..c22d7855c46a975ecd1e94a60f9b7059bc288fee --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/swag.c @@ -0,0 +1,83 @@ +#include "darknet.h" +#include + +void train_swag(char *cfgfile, char *weightfile) +{ + char *train_images = "data/voc.0712.trainval"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + data train, buffer; + + layer l = net.layers[net.n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || i == 600){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void run_swag(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_swag(cfg, weights); +} diff --git a/workloads/realworld/pinned/darknet/examples/tag.c b/workloads/realworld/pinned/darknet/examples/tag.c new file mode 100644 index 0000000000000000000000000000000000000000..4caf8cba18f39f62deb54ea913fd40c194b3e33c --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/tag.c @@ -0,0 +1,140 @@ +#include "darknet.h" + +void train_tag(char *cfgfile, char *weightfile, int clear) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, clear); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + list *plist = get_paths("/home/pjreddie/tag/train.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + + args.min = net->w; + args.max = net->max_crop; + args.size = net->w; + + args.paths = paths; + args.classes = net->outputs; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = TAG_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + fprintf(stderr, "%d classes\n", net->outputs); + + load_thread = load_data_in_thread(args); + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + pthread_join(load_thread, 0); + free_data(buffer); + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void test_tag(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + int i = 0; + char **names = get_labels("data/tags.txt"); + clock_t time; + int indexes[10]; + char buff[256]; + char *input = buff; + int size = net->w; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = resize_min(im, size); + resize_network(net, r.w, r.h); + printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + float *predictions = network_predict(net, X); + top_predictions(net, 10, indexes); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < 10; ++i){ + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void run_tag(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int clear = find_arg(argc, argv, "-clear"); + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_tag(cfg, weights, clear); + else if(0==strcmp(argv[2], "test")) test_tag(cfg, weights, filename); +} + diff --git a/workloads/realworld/pinned/darknet/examples/voxel.c b/workloads/realworld/pinned/darknet/examples/voxel.c new file mode 100644 index 0000000000000000000000000000000000000000..01ea9bb98987590227758364bbfff50996cf9a2d --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/voxel.c @@ -0,0 +1,161 @@ +#include "darknet.h" + +void extract_voxel(char *lfile, char *rfile, char *prefix) +{ +#ifdef OPENCV + int w = 1920; + int h = 1080; + int shift = 0; + int count = 0; + CvCapture *lcap = cvCaptureFromFile(lfile); + CvCapture *rcap = cvCaptureFromFile(rfile); + while(1){ + image l = get_image_from_stream(lcap); + image r = get_image_from_stream(rcap); + if(!l.w || !r.w) break; + if(count%100 == 0) { + shift = best_3d_shift_r(l, r, -l.h/100, l.h/100); + printf("%d\n", shift); + } + image ls = crop_image(l, (l.w - w)/2, (l.h - h)/2, w, h); + image rs = crop_image(r, 105 + (r.w - w)/2, (r.h - h)/2 + shift, w, h); + char buff[256]; + sprintf(buff, "%s_%05d_l", prefix, count); + save_image(ls, buff); + sprintf(buff, "%s_%05d_r", prefix, count); + save_image(rs, buff); + free_image(l); + free_image(r); + free_image(ls); + free_image(rs); + ++count; + } + +#else + printf("need OpenCV for extraction\n"); +#endif +} + +void train_voxel(char *cfgfile, char *weightfile) +{ + char *train_images = "/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.scale = 4; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = SUPER_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void test_voxel(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + resize_network(&net, im.w, im.h); + printf("%d %d\n", im.w, im.h); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image(net); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + + free_image(im); + if (filename) break; + } +} + + +void run_voxel(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_voxel(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_voxel(cfg, weights, filename); + else if(0==strcmp(argv[2], "extract")) extract_voxel(argv[3], argv[4], argv[5]); + /* + else if(0==strcmp(argv[2], "valid")) validate_voxel(cfg, weights); + */ +} diff --git a/workloads/realworld/pinned/darknet/examples/writing.c b/workloads/realworld/pinned/darknet/examples/writing.c new file mode 100644 index 0000000000000000000000000000000000000000..1b6ff83b5838b654e0fd1b6664156daf6d7a889b --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/writing.c @@ -0,0 +1,144 @@ +#include "darknet.h" + +void train_writing(char *cfgfile, char *weightfile) +{ + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + list *plist = get_paths("figures.list"); + char **paths = (char **)list_to_array(plist); + clock_t time; + int N = plist->size; + printf("N: %d\n", N); + image out = get_network_image(net); + + data train, buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.out_w = out.w; + args.out_h = out.h; + args.paths = paths; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = WRITING_DATA; + + pthread_t load_thread = load_data_in_thread(args); + int epoch = (*net.seen)/N; + while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + printf("Loaded %lf seconds\n",sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + + /* + image pred = float_to_image(64, 64, 1, out); + print_image(pred); + */ + + /* + image im = float_to_image(256, 256, 3, train.X.vals[0]); + image lab = float_to_image(64, 64, 1, train.y.vals[0]); + image pred = float_to_image(64, 64, 1, out); + show_image(im, "image"); + show_image(lab, "label"); + print_image(lab); + show_image(pred, "pred"); + cvWaitKey(0); + */ + + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + free_data(train); + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_batch_%ld.weights", backup_directory, base, get_current_batch(net)); + save_weights(net, buff); + } + if(*net.seen/N > epoch){ + epoch = *net.seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + } +} + +void test_writing(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + + image im = load_image_color(input, 0, 0); + resize_network(&net, im.w, im.h); + printf("%d %d %d\n", im.h, im.w, im.c); + float *X = im.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + image pred = get_network_image(net); + + image upsampled = resize_image(pred, im.w, im.h); + image thresh = threshold_image(upsampled, .5); + pred = thresh; + + show_image(pred, "prediction"); + show_image(im, "orig"); +#ifdef OPENCV + cvWaitKey(0); + cvDestroyAllWindows(); +#endif + + free_image(upsampled); + free_image(thresh); + free_image(im); + if (filename) break; + } +} + +void run_writing(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_writing(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_writing(cfg, weights, filename); +} + diff --git a/workloads/realworld/pinned/darknet/examples/yolo.c b/workloads/realworld/pinned/darknet/examples/yolo.c new file mode 100644 index 0000000000000000000000000000000000000000..4ddb69a3e53b2123ccb89026645a66c044047faa --- /dev/null +++ b/workloads/realworld/pinned/darknet/examples/yolo.c @@ -0,0 +1,327 @@ +#include "darknet.h" + +char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; + +void train_yolo(char *cfgfile, char *weightfile) +{ + char *train_images = "/data/voc/train.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + layer l = net->layers[net->n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || (i < 1000 && i%100 == 0)){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void print_yolo_detections(FILE **fps, char *id, int total, int classes, int w, int h, detection *dets) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], + xmin, ymin, xmax, ymax); + } + } +} + +void validate_yolo(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + //list *plist = get_paths("data/voc.2007.test"); + list *plist = get_paths("/home/pjreddie/data/voc/2007_test.txt"); + //list *plist = get_paths("data/voc.2012.test"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + int j; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + int t; + + float thresh = .001; + int nms = 1; + float iou_thresh = .5; + + int nthreads = 8; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.type = IMAGE_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + time_t start = time(0); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh); + print_yolo_detections(fps, id, l.side*l.side*l.n, classes, w, h, dets); + free_detections(dets, nboxes); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); +} + +void validate_yolo_recall(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + list *plist = get_paths("data/voc.2007.test"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + int side = l.side; + + int j, k; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + + float thresh = .001; + float iou_thresh = .5; + float nms = 0; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + + int nboxes = 0; + detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, side*side*l.n, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < side*side*l.n; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < side*side*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free_detections(dets, nboxes); + free(id); + free_image(orig); + free_image(sized); + } +} + +void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) +{ + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + layer l = net->layers[net->n-1]; + set_batch_network(net, 1); + srand(2222222); + clock_t time; + char buff[256]; + char *input = buff; + float nms=.4; + while(1){ + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = resize_image(im, net->w, net->h); + float *X = sized.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + + int nboxes = 0; + detection *dets = get_network_boxes(net, 1, 1, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms); + + draw_detections(im, dets, l.side*l.side*l.n, thresh, voc_names, alphabet, 20); + save_image(im, "predictions"); + show_image(im, "predictions", 0); + free_detections(dets, nboxes); + free_image(im); + free_image(sized); + if (filename) break; + } +} + +void run_yolo(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .2); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int avg = find_int_arg(argc, argv, "-avg", 1); + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "test")) test_yolo(cfg, weights, filename, thresh); + else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights); + else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix, avg, .5, 0,0,0,0); +} diff --git a/workloads/realworld/pinned/darknet/include/darknet.h b/workloads/realworld/pinned/darknet/include/darknet.h new file mode 100644 index 0000000000000000000000000000000000000000..7be8225e2d39f079ca0a15da6980b42f8966af40 --- /dev/null +++ b/workloads/realworld/pinned/darknet/include/darknet.h @@ -0,0 +1,810 @@ +#ifndef DARKNET_API +#define DARKNET_API +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef GPU + #define GPU_DEVICE 7 + #define BLOCK 512 + + #include "cuda_runtime.h" + #include "curand.h" + #include "cublas_v2.h" + + #ifdef CUDNN + #include "cudnn.h" + #endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define SECRET_NUM -1234 +extern int gpu_index; + +typedef struct{ + int classes; + char **names; +} metadata; + +metadata get_metadata(char *file); + +typedef struct{ + int *leaf; + int n; + int *parent; + int *child; + int *group; + char **name; + + int groups; + int *group_size; + int *group_offset; +} tree; +tree *read_tree(char *filename); + +typedef enum{ + LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU +} ACTIVATION; + +typedef enum{ + PNG, BMP, TGA, JPG +} IMTYPE; + +typedef enum{ + MULT, ADD, SUB, DIV +} BINARY_ACTIVATION; + +typedef enum { + CONVOLUTIONAL, + DECONVOLUTIONAL, + CONNECTED, + MAXPOOL, + SOFTMAX, + DETECTION, + DROPOUT, + CROP, + ROUTE, + COST, + NORMALIZATION, + AVGPOOL, + LOCAL, + SHORTCUT, + ACTIVE, + RNN, + GRU, + LSTM, + CRNN, + BATCHNORM, + NETWORK, + XNOR, + REGION, + YOLO, + ISEG, + REORG, + UPSAMPLE, + LOGXENT, + L2NORM, + BLANK +} LAYER_TYPE; + +typedef enum{ + SSE, MASKED, L1, SEG, SMOOTH,WGAN +} COST_TYPE; + +typedef struct{ + int batch; + float learning_rate; + float momentum; + float decay; + int adam; + float B1; + float B2; + float eps; + int t; +} update_args; + +struct network; +typedef struct network network; + +struct layer; +typedef struct layer layer; + +struct layer{ + LAYER_TYPE type; + ACTIVATION activation; + COST_TYPE cost_type; + void (*forward) (struct layer, struct network); + void (*backward) (struct layer, struct network); + void (*update) (struct layer, update_args); + void (*forward_gpu) (struct layer, struct network); + void (*backward_gpu) (struct layer, struct network); + void (*update_gpu) (struct layer, update_args); + int batch_normalize; + int shortcut; + int batch; + int forced; + int flipped; + int inputs; + int outputs; + int nweights; + int nbiases; + int extra; + int truths; + int h,w,c; + int out_h, out_w, out_c; + int n; + int max_boxes; + int groups; + int size; + int side; + int stride; + int reverse; + int flatten; + int spatial; + int pad; + int sqrt; + int flip; + int index; + int binary; + int xnor; + int steps; + int hidden; + int truth; + float smooth; + float dot; + float angle; + float jitter; + float saturation; + float exposure; + float shift; + float ratio; + float learning_rate_scale; + float clip; + int noloss; + int softmax; + int classes; + int coords; + int background; + int rescore; + int objectness; + int joint; + int noadjust; + int reorg; + int log; + int tanh; + int *mask; + int total; + + float alpha; + float beta; + float kappa; + + float coord_scale; + float object_scale; + float noobject_scale; + float mask_scale; + float class_scale; + int bias_match; + int random; + float ignore_thresh; + float truth_thresh; + float thresh; + float focus; + int classfix; + int absolute; + + int onlyforward; + int stopbackward; + int dontload; + int dontsave; + int dontloadscales; + int numload; + + float temperature; + float probability; + float scale; + + char * cweights; + int * indexes; + int * input_layers; + int * input_sizes; + int * map; + int * counts; + float ** sums; + float * rand; + float * cost; + float * state; + float * prev_state; + float * forgot_state; + float * forgot_delta; + float * state_delta; + float * combine_cpu; + float * combine_delta_cpu; + + float * concat; + float * concat_delta; + + float * binary_weights; + + float * biases; + float * bias_updates; + + float * scales; + float * scale_updates; + + float * weights; + float * weight_updates; + + float * delta; + float * output; + float * loss; + float * squared; + float * norms; + + float * spatial_mean; + float * mean; + float * variance; + + float * mean_delta; + float * variance_delta; + + float * rolling_mean; + float * rolling_variance; + + float * x; + float * x_norm; + + float * m; + float * v; + + float * bias_m; + float * bias_v; + float * scale_m; + float * scale_v; + + + float *z_cpu; + float *r_cpu; + float *h_cpu; + float * prev_state_cpu; + + float *temp_cpu; + float *temp2_cpu; + float *temp3_cpu; + + float *dh_cpu; + float *hh_cpu; + float *prev_cell_cpu; + float *cell_cpu; + float *f_cpu; + float *i_cpu; + float *g_cpu; + float *o_cpu; + float *c_cpu; + float *dc_cpu; + + float * binary_input; + + struct layer *input_layer; + struct layer *self_layer; + struct layer *output_layer; + + struct layer *reset_layer; + struct layer *update_layer; + struct layer *state_layer; + + struct layer *input_gate_layer; + struct layer *state_gate_layer; + struct layer *input_save_layer; + struct layer *state_save_layer; + struct layer *input_state_layer; + struct layer *state_state_layer; + + struct layer *input_z_layer; + struct layer *state_z_layer; + + struct layer *input_r_layer; + struct layer *state_r_layer; + + struct layer *input_h_layer; + struct layer *state_h_layer; + + struct layer *wz; + struct layer *uz; + struct layer *wr; + struct layer *ur; + struct layer *wh; + struct layer *uh; + struct layer *uo; + struct layer *wo; + struct layer *uf; + struct layer *wf; + struct layer *ui; + struct layer *wi; + struct layer *ug; + struct layer *wg; + + tree *softmax_tree; + + size_t workspace_size; + +#ifdef GPU + int *indexes_gpu; + + float *z_gpu; + float *r_gpu; + float *h_gpu; + + float *temp_gpu; + float *temp2_gpu; + float *temp3_gpu; + + float *dh_gpu; + float *hh_gpu; + float *prev_cell_gpu; + float *cell_gpu; + float *f_gpu; + float *i_gpu; + float *g_gpu; + float *o_gpu; + float *c_gpu; + float *dc_gpu; + + float *m_gpu; + float *v_gpu; + float *bias_m_gpu; + float *scale_m_gpu; + float *bias_v_gpu; + float *scale_v_gpu; + + float * combine_gpu; + float * combine_delta_gpu; + + float * prev_state_gpu; + float * forgot_state_gpu; + float * forgot_delta_gpu; + float * state_gpu; + float * state_delta_gpu; + float * gate_gpu; + float * gate_delta_gpu; + float * save_gpu; + float * save_delta_gpu; + float * concat_gpu; + float * concat_delta_gpu; + + float * binary_input_gpu; + float * binary_weights_gpu; + + float * mean_gpu; + float * variance_gpu; + + float * rolling_mean_gpu; + float * rolling_variance_gpu; + + float * variance_delta_gpu; + float * mean_delta_gpu; + + float * x_gpu; + float * x_norm_gpu; + float * weights_gpu; + float * weight_updates_gpu; + float * weight_change_gpu; + + float * biases_gpu; + float * bias_updates_gpu; + float * bias_change_gpu; + + float * scales_gpu; + float * scale_updates_gpu; + float * scale_change_gpu; + + float * output_gpu; + float * loss_gpu; + float * delta_gpu; + float * rand_gpu; + float * squared_gpu; + float * norms_gpu; +#ifdef CUDNN + cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc; + cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc; + cudnnTensorDescriptor_t normTensorDesc; + cudnnFilterDescriptor_t weightDesc; + cudnnFilterDescriptor_t dweightDesc; + cudnnConvolutionDescriptor_t convDesc; + cudnnConvolutionFwdAlgo_t fw_algo; + cudnnConvolutionBwdDataAlgo_t bd_algo; + cudnnConvolutionBwdFilterAlgo_t bf_algo; +#endif +#endif +}; + +void free_layer(layer); + +typedef enum { + CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM +} learning_rate_policy; + +typedef struct network{ + int n; + int batch; + size_t *seen; + int *t; + float epoch; + int subdivisions; + layer *layers; + float *output; + learning_rate_policy policy; + + float learning_rate; + float momentum; + float decay; + float gamma; + float scale; + float power; + int time_steps; + int step; + int max_batches; + float *scales; + int *steps; + int num_steps; + int burn_in; + + int adam; + float B1; + float B2; + float eps; + + int inputs; + int outputs; + int truths; + int notruth; + int h, w, c; + int max_crop; + int min_crop; + float max_ratio; + float min_ratio; + int center; + float angle; + float aspect; + float exposure; + float saturation; + float hue; + int random; + + int gpu_index; + tree *hierarchy; + + float *input; + float *truth; + float *delta; + float *workspace; + int train; + int index; + float *cost; + float clip; + +#ifdef GPU + float *input_gpu; + float *truth_gpu; + float *delta_gpu; + float *output_gpu; +#endif + +} network; + +typedef struct { + int w; + int h; + float scale; + float rad; + float dx; + float dy; + float aspect; +} augment_args; + +typedef struct { + int w; + int h; + int c; + float *data; +} image; + +typedef struct{ + float x, y, w, h; +} box; + +typedef struct detection{ + box bbox; + int classes; + float *prob; + float *mask; + float objectness; + int sort_class; +} detection; + +typedef struct matrix{ + int rows, cols; + float **vals; +} matrix; + + +typedef struct{ + int w, h; + matrix X; + matrix y; + int shallow; + int *num_boxes; + box **boxes; +} data; + +typedef enum { + CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA, ISEG_DATA +} data_type; + +typedef struct load_args{ + int threads; + char **paths; + char *path; + int n; + int m; + char **labels; + int h; + int w; + int out_w; + int out_h; + int nh; + int nw; + int num_boxes; + int min, max, size; + int classes; + int background; + int scale; + int center; + int coords; + float jitter; + float angle; + float aspect; + float saturation; + float exposure; + float hue; + data *d; + image *im; + image *resized; + data_type type; + tree *hierarchy; +} load_args; + +typedef struct{ + int id; + float x,y,w,h; + float left, right, top, bottom; +} box_label; + + +network *load_network(char *cfg, char *weights, int clear); +load_args get_base_args(network *net); + +void free_data(data d); + +typedef struct node{ + void *val; + struct node *next; + struct node *prev; +} node; + +typedef struct list{ + int size; + node *front; + node *back; +} list; + +pthread_t load_data(load_args args); +list *read_data_cfg(char *filename); +list *read_cfg(char *filename); +unsigned char *read_file(char *filename); +data resize_data(data orig, int w, int h); +data *tile_data(data orig, int divs, int size); +data select_data(data *orig, int *inds); + +void forward_network(network *net); +void backward_network(network *net); +void update_network(network *net); + + +float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); +void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void copy_cpu(int N, float *X, int INCX, float *Y, int INCY); +void scal_cpu(int N, float ALPHA, float *X, int INCX); +void fill_cpu(int N, float ALPHA, float * X, int INCX); +void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); +void softmax(float *input, int n, float temp, int stride, float *output); + +int best_3d_shift_r(image a, image b, int min, int max); +#ifdef GPU +void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); +void fill_gpu(int N, float ALPHA, float * X, int INCX); +void scal_gpu(int N, float ALPHA, float * X, int INCX); +void copy_gpu(int N, float * X, int INCX, float * Y, int INCY); + +void cuda_set_device(int n); +void cuda_free(float *x_gpu); +float *cuda_make_array(float *x, size_t n); +void cuda_pull_array(float *x_gpu, float *x, size_t n); +float cuda_mag_array(float *x_gpu, size_t n); +void cuda_push_array(float *x_gpu, float *x, size_t n); + +void forward_network_gpu(network *net); +void backward_network_gpu(network *net); +void update_network_gpu(network *net); + +float train_networks(network **nets, int n, data d, int interval); +void sync_nets(network **nets, int n, int interval); +void harmless_update_network_gpu(network *net); +#endif +image get_label(image **characters, char *string, int size); +void draw_label(image a, int r, int c, image label, const float *rgb); +void save_image(image im, const char *name); +void save_image_options(image im, const char *name, IMTYPE f, int quality); +void get_next_batch(data d, int n, int offset, float *X, float *y); +void grayscale_image_3c(image im); +void normalize_image(image p); +void matrix_to_csv(matrix m); +float train_network_sgd(network *net, data d, int n); +void rgbgr_image(image im); +data copy_data(data d); +data concat_data(data d1, data d2); +data load_cifar10_data(char *filename); +float matrix_topk_accuracy(matrix truth, matrix guess, int k); +void matrix_add_matrix(matrix from, matrix to); +void scale_matrix(matrix m, float scale); +matrix csv_to_matrix(char *filename); +float *network_accuracies(network *net, data d, int n); +float train_network_datum(network *net); +image make_random_image(int w, int h, int c); + +void denormalize_connected_layer(layer l); +void denormalize_convolutional_layer(layer l); +void statistics_connected_layer(layer l); +void rescale_weights(layer l, float scale, float trans); +void rgbgr_weights(layer l); +image *get_weights(layer l); + +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, int avg, float hier_thresh, int w, int h, int fps, int fullscreen); +void get_detection_detections(layer l, int w, int h, float thresh, detection *dets); + +char *option_find_str(list *l, char *key, char *def); +int option_find_int(list *l, char *key, int def); +int option_find_int_quiet(list *l, char *key, int def); + +network *parse_network_cfg(char *filename); +void save_weights(network *net, char *filename); +void load_weights(network *net, char *filename); +void save_weights_upto(network *net, char *filename, int cutoff); +void load_weights_upto(network *net, char *filename, int start, int cutoff); + +void zero_objectness(layer l); +void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets); +int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets); +void free_network(network *net); +void set_batch_network(network *net, int b); +void set_temp_network(network *net, float t); +image load_image(char *filename, int w, int h, int c); +image load_image_color(char *filename, int w, int h); +image make_image(int w, int h, int c); +image resize_image(image im, int w, int h); +void censor_image(image im, int dx, int dy, int w, int h); +image letterbox_image(image im, int w, int h); +image crop_image(image im, int dx, int dy, int w, int h); +image center_crop_image(image im, int w, int h); +image resize_min(image im, int min); +image resize_max(image im, int max); +image threshold_image(image im, float thresh); +image mask_to_rgb(image mask); +int resize_network(network *net, int w, int h); +void free_matrix(matrix m); +void test_resize(char *filename); +int show_image(image p, const char *name, int ms); +image copy_image(image p); +void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b); +float get_current_rate(network *net); +void composite_3d(char *f1, char *f2, char *out, int delta); +data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h); +size_t get_current_batch(network *net); +void constrain_image(image im); +image get_network_image_layer(network *net, int i); +layer get_network_output_layer(network *net); +void top_predictions(network *net, int n, int *index); +void flip_image(image a); +image float_to_image(int w, int h, int c, float *data); +void ghost_image(image source, image dest, int dx, int dy); +float network_accuracy(network *net, data d); +void random_distort_image(image im, float hue, float saturation, float exposure); +void fill_image(image m, float s); +image grayscale_image(image im); +void rotate_image_cw(image im, int times); +double what_time_is_it_now(); +image rotate_image(image m, float rad); +void visualize_network(network *net); +float box_iou(box a, box b); +data load_all_cifar10(); +box_label *read_boxes(char *filename, int *n); +box float_to_box(float *f, int stride); +void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes); + +matrix network_predict_data(network *net, data test); +image **load_alphabet(); +image get_network_image(network *net); +float *network_predict(network *net, float *input); + +int network_width(network *net); +int network_height(network *net); +float *network_predict_image(network *net, image im); +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets); +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num); +void free_detections(detection *dets, int n); + +void reset_network_state(network *net, int b); + +char **get_labels(char *filename); +void do_nms_obj(detection *dets, int total, int classes, float thresh); +void do_nms_sort(detection *dets, int total, int classes, float thresh); + +matrix make_matrix(int rows, int cols); + +#ifdef OPENCV +void *open_video_stream(const char *f, int c, int w, int h, int fps); +image get_image_from_stream(void *p); +void make_window(char *name, int w, int h, int fullscreen); +#endif + +void free_image(image m); +float train_network(network *net, data d); +pthread_t load_data_in_thread(load_args args); +void load_data_blocking(load_args args); +list *get_paths(char *filename); +void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride); +void change_leaves(tree *t, char *leaf_list); + +int find_int_arg(int argc, char **argv, char *arg, int def); +float find_float_arg(int argc, char **argv, char *arg, float def); +int find_arg(int argc, char* argv[], char *arg); +char *find_char_arg(int argc, char **argv, char *arg, char *def); +char *basecfg(char *cfgfile); +void find_replace(char *str, char *orig, char *rep, char *output); +void free_ptrs(void **ptrs, int n); +char *fgetl(FILE *fp); +void strip(char *s); +float sec(clock_t clocks); +void **list_to_array(list *l); +void top_k(float *a, int n, int k, int *index); +int *read_map(char *filename); +void error(const char *s); +int max_index(float *a, int n); +int max_int_index(int *a, int n); +int sample_array(float *a, int n); +int *random_index_order(int min, int max); +void free_list(list *l); +float mse_array(float *a, int n); +float variance_array(float *a, int n); +float mag_array(float *a, int n); +void scale_array(float *a, int n, float s); +float mean_array(float *a, int n); +float sum_array(float *a, int n); +void normalize_array(float *a, int n); +int *read_intlist(char *s, int *n, int d); +size_t rand_size_t(); +float rand_normal(); +float rand_uniform(float min, float max); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/workloads/realworld/pinned/darknet/predictions.jpg b/workloads/realworld/pinned/darknet/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c92d70d77e70e11853e9838ca90b46eb71a18ffa Binary files /dev/null and b/workloads/realworld/pinned/darknet/predictions.jpg differ diff --git a/workloads/realworld/pinned/darknet/python/darknet.py b/workloads/realworld/pinned/darknet/python/darknet.py new file mode 100644 index 0000000000000000000000000000000000000000..b14d24485d86aa69f3991be79ec4f25c2b8e5a59 --- /dev/null +++ b/workloads/realworld/pinned/darknet/python/darknet.py @@ -0,0 +1,156 @@ +from ctypes import * +import math +import random + +def sample(probs): + s = sum(probs) + probs = [a/s for a in probs] + r = random.uniform(0, 1) + for i in range(len(probs)): + r = r - probs[i] + if r <= 0: + return i + return len(probs)-1 + +def c_array(ctype, values): + arr = (ctype*len(values))() + arr[:] = values + return arr + +class BOX(Structure): + _fields_ = [("x", c_float), + ("y", c_float), + ("w", c_float), + ("h", c_float)] + +class DETECTION(Structure): + _fields_ = [("bbox", BOX), + ("classes", c_int), + ("prob", POINTER(c_float)), + ("mask", POINTER(c_float)), + ("objectness", c_float), + ("sort_class", c_int)] + + +class IMAGE(Structure): + _fields_ = [("w", c_int), + ("h", c_int), + ("c", c_int), + ("data", POINTER(c_float))] + +class METADATA(Structure): + _fields_ = [("classes", c_int), + ("names", POINTER(c_char_p))] + + + +#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL) +lib = CDLL("libdarknet.so", RTLD_GLOBAL) +lib.network_width.argtypes = [c_void_p] +lib.network_width.restype = c_int +lib.network_height.argtypes = [c_void_p] +lib.network_height.restype = c_int + +predict = lib.network_predict +predict.argtypes = [c_void_p, POINTER(c_float)] +predict.restype = POINTER(c_float) + +set_gpu = lib.cuda_set_device +set_gpu.argtypes = [c_int] + +make_image = lib.make_image +make_image.argtypes = [c_int, c_int, c_int] +make_image.restype = IMAGE + +get_network_boxes = lib.get_network_boxes +get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)] +get_network_boxes.restype = POINTER(DETECTION) + +make_network_boxes = lib.make_network_boxes +make_network_boxes.argtypes = [c_void_p] +make_network_boxes.restype = POINTER(DETECTION) + +free_detections = lib.free_detections +free_detections.argtypes = [POINTER(DETECTION), c_int] + +free_ptrs = lib.free_ptrs +free_ptrs.argtypes = [POINTER(c_void_p), c_int] + +network_predict = lib.network_predict +network_predict.argtypes = [c_void_p, POINTER(c_float)] + +reset_rnn = lib.reset_rnn +reset_rnn.argtypes = [c_void_p] + +load_net = lib.load_network +load_net.argtypes = [c_char_p, c_char_p, c_int] +load_net.restype = c_void_p + +do_nms_obj = lib.do_nms_obj +do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +do_nms_sort = lib.do_nms_sort +do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +free_image = lib.free_image +free_image.argtypes = [IMAGE] + +letterbox_image = lib.letterbox_image +letterbox_image.argtypes = [IMAGE, c_int, c_int] +letterbox_image.restype = IMAGE + +load_meta = lib.get_metadata +lib.get_metadata.argtypes = [c_char_p] +lib.get_metadata.restype = METADATA + +load_image = lib.load_image_color +load_image.argtypes = [c_char_p, c_int, c_int] +load_image.restype = IMAGE + +rgbgr_image = lib.rgbgr_image +rgbgr_image.argtypes = [IMAGE] + +predict_image = lib.network_predict_image +predict_image.argtypes = [c_void_p, IMAGE] +predict_image.restype = POINTER(c_float) + +def classify(net, meta, im): + out = predict_image(net, im) + res = [] + for i in range(meta.classes): + res.append((meta.names[i], out[i])) + res = sorted(res, key=lambda x: -x[1]) + return res + +def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): + im = load_image(image, 0, 0) + num = c_int(0) + pnum = pointer(num) + predict_image(net, im) + dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum) + num = pnum[0] + if (nms): do_nms_obj(dets, num, meta.classes, nms); + + res = [] + for j in range(num): + for i in range(meta.classes): + if dets[j].prob[i] > 0: + b = dets[j].bbox + res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h))) + res = sorted(res, key=lambda x: -x[1]) + free_image(im) + free_detections(dets, num) + return res + +if __name__ == "__main__": + #net = load_net("cfg/densenet201.cfg", "/home/pjreddie/trained/densenet201.weights", 0) + #im = load_image("data/wolf.jpg", 0, 0) + #meta = load_meta("cfg/imagenet1k.data") + #r = classify(net, meta, im) + #print r[:10] + net = load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0) + meta = load_meta("cfg/coco.data") + r = detect(net, meta, "data/dog.jpg") + print(r) + + diff --git a/workloads/realworld/pinned/darknet/python/proverbot.py b/workloads/realworld/pinned/darknet/python/proverbot.py new file mode 100644 index 0000000000000000000000000000000000000000..095aae8f8bf8bbe47ea1768a6e2c948bb0ff8f85 --- /dev/null +++ b/workloads/realworld/pinned/darknet/python/proverbot.py @@ -0,0 +1,37 @@ +from darknet import * + +def predict_tactic(net, s): + prob = 0 + d = c_array(c_float, [0.0]*256) + tac = '' + if not len(s): + s = '\n' + for c in s[:-1]: + d[ord(c)] = 1 + pred = predict(net, d) + d[ord(c)] = 0 + c = s[-1] + while 1: + d[ord(c)] = 1 + pred = predict(net, d) + d[ord(c)] = 0 + pred = [pred[i] for i in range(256)] + ind = sample(pred) + c = chr(ind) + prob += math.log(pred[ind]) + if len(tac) and tac[-1] == '.': + break + tac = tac + c + return (tac, prob) + +def predict_tactics(net, s, n): + tacs = [] + for i in range(n): + reset_rnn(net) + tacs.append(predict_tactic(net, s)) + tacs = sorted(tacs, key=lambda x: -x[1]) + return tacs + +net = load_net("cfg/coq.test.cfg", "/home/pjreddie/backup/coq.backup", 0) +t = predict_tactics(net, "+++++\n", 10) +print t diff --git a/workloads/realworld/pinned/darknet/resnet18/run_resnet18.sh b/workloads/realworld/pinned/darknet/resnet18/run_resnet18.sh new file mode 100755 index 0000000000000000000000000000000000000000..10257ad02affc17f8287ea42917813fe320f5f23 --- /dev/null +++ b/workloads/realworld/pinned/darknet/resnet18/run_resnet18.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg diff --git a/workloads/realworld/pinned/darknet/resnet18/run_super.sh b/workloads/realworld/pinned/darknet/resnet18/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..10257ad02affc17f8287ea42917813fe320f5f23 --- /dev/null +++ b/workloads/realworld/pinned/darknet/resnet18/run_super.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg diff --git a/workloads/realworld/pinned/darknet/resnet18_b/run_super.sh b/workloads/realworld/pinned/darknet/resnet18_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..012635a1ce64ecda462e50097be554185989ae7a --- /dev/null +++ b/workloads/realworld/pinned/darknet/resnet18_b/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier test ../cfg/imagenet1k.data ../cfg/resnet18_b.cfg diff --git a/workloads/realworld/pinned/darknet/resnet18_t/run_super.sh b/workloads/realworld/pinned/darknet/resnet18_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..0eb59b3bd65cf0186c5ed5f36eff5ec34d54298c --- /dev/null +++ b/workloads/realworld/pinned/darknet/resnet18_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier train ../cfg/imagenet1k.data ../cfg/resnet18_t.cfg \ No newline at end of file diff --git a/workloads/realworld/pinned/darknet/resnet50/run_resnet50.sh b/workloads/realworld/pinned/darknet/resnet50/run_resnet50.sh new file mode 100755 index 0000000000000000000000000000000000000000..ce88111af06600203c1ae94421134eb3404f51a4 --- /dev/null +++ b/workloads/realworld/pinned/darknet/resnet50/run_resnet50.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet50.cfg ../../../../../data/darknet/resnet50.weights ../data/dog.jpg diff --git a/workloads/realworld/pinned/darknet/resnet50/run_super.sh b/workloads/realworld/pinned/darknet/resnet50/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ce88111af06600203c1ae94421134eb3404f51a4 --- /dev/null +++ b/workloads/realworld/pinned/darknet/resnet50/run_super.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet50.cfg ../../../../../data/darknet/resnet50.weights ../data/dog.jpg diff --git a/workloads/realworld/pinned/darknet/resnet50_b/run_super.sh b/workloads/realworld/pinned/darknet/resnet50_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..e6f1b1d59b612bef36d04af547bf61808261eb12 --- /dev/null +++ b/workloads/realworld/pinned/darknet/resnet50_b/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier test ../cfg/imagenet1k.data ../cfg/resnet50_b.cfg diff --git a/workloads/realworld/pinned/darknet/resnet50_t/run_super.sh b/workloads/realworld/pinned/darknet/resnet50_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..4d4c7feebd1bd5bdcded72e3d4cf58045949ac90 --- /dev/null +++ b/workloads/realworld/pinned/darknet/resnet50_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier train ../cfg/imagenet1k.data ../cfg/resnet50_t.cfg diff --git a/workloads/realworld/pinned/darknet/scripts/dice_label.sh b/workloads/realworld/pinned/darknet/scripts/dice_label.sh new file mode 100644 index 0000000000000000000000000000000000000000..f19f8a49481b46d5a04dd18b1b05af8928b21957 --- /dev/null +++ b/workloads/realworld/pinned/darknet/scripts/dice_label.sh @@ -0,0 +1,20 @@ +mkdir -p images +mkdir -p images/orig +mkdir -p images/train +mkdir -p images/val + +ffmpeg -i Face1.mp4 images/orig/face1_%6d.jpg +ffmpeg -i Face2.mp4 images/orig/face2_%6d.jpg +ffmpeg -i Face3.mp4 images/orig/face3_%6d.jpg +ffmpeg -i Face4.mp4 images/orig/face4_%6d.jpg +ffmpeg -i Face5.mp4 images/orig/face5_%6d.jpg +ffmpeg -i Face6.mp4 images/orig/face6_%6d.jpg + +mogrify -resize 100x100^ -gravity center -crop 100x100+0+0 +repage images/orig/* + +ls images/orig/* | shuf | head -n 1000 | xargs mv -t images/val +mv images/orig/* images/train + +find `pwd`/images/train > dice.train.list -name \*.jpg +find `pwd`/images/val > dice.val.list -name \*.jpg + diff --git a/workloads/realworld/pinned/darknet/scripts/gen_tactic.sh b/workloads/realworld/pinned/darknet/scripts/gen_tactic.sh new file mode 100755 index 0000000000000000000000000000000000000000..ffa30d27754dacdd03bd5996d41cbfab14db0f39 --- /dev/null +++ b/workloads/realworld/pinned/darknet/scripts/gen_tactic.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Usage: +# wget http://pjreddie.com/media/files/peek.weights +# scripts/gen_tactic.sh < data/goal.txt +./darknet rnn generatetactic cfg/gru.cfg peek.weights 2>/dev/null diff --git a/workloads/realworld/pinned/darknet/scripts/get_coco_dataset.sh b/workloads/realworld/pinned/darknet/scripts/get_coco_dataset.sh new file mode 100644 index 0000000000000000000000000000000000000000..28463015d1748fd331e071a0a778c6d4500b29ef --- /dev/null +++ b/workloads/realworld/pinned/darknet/scripts/get_coco_dataset.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Clone COCO API +git clone https://github.com/pdollar/coco +cd coco + +mkdir images +cd images + +# Download Images +wget -c https://pjreddie.com/media/files/train2014.zip +wget -c https://pjreddie.com/media/files/val2014.zip + +# Unzip +unzip -q train2014.zip +unzip -q val2014.zip + +cd .. + +# Download COCO Metadata +wget -c https://pjreddie.com/media/files/instances_train-val2014.zip +wget -c https://pjreddie.com/media/files/coco/5k.part +wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part +wget -c https://pjreddie.com/media/files/coco/labels.tgz +tar xzf labels.tgz +unzip -q instances_train-val2014.zip + +# Set Up Image Lists +paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt +paste <(awk "{print \"$PWD\"}" trainvalno5k.txt + diff --git a/workloads/realworld/pinned/darknet/scripts/imagenet_label.sh b/workloads/realworld/pinned/darknet/scripts/imagenet_label.sh new file mode 100644 index 0000000000000000000000000000000000000000..01e4306ee3cf7322427374f01c766bcdef970922 --- /dev/null +++ b/workloads/realworld/pinned/darknet/scripts/imagenet_label.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +mkdir -p labelled +wd=`pwd` + +for f in val/*.xml; +do +label=`grep -m1 "" $f | grep -oP '\K[^<]*'` +im=`echo $f | sed 's/val/imgs/; s/xml/JPEG/'` +out=`echo $im | sed 's/JPEG/'${label}'.JPEG/; s/imgs/labelled/'` +ln -s ${wd}/$im ${wd}/$out +done + +find ${wd}/labelled -name \*.JPEG > inet.val.list + diff --git a/workloads/realworld/pinned/darknet/scripts/voc_label.py b/workloads/realworld/pinned/darknet/scripts/voc_label.py new file mode 100644 index 0000000000000000000000000000000000000000..679fc366890d9eccf15124f950a274d8ad24fc83 --- /dev/null +++ b/workloads/realworld/pinned/darknet/scripts/voc_label.py @@ -0,0 +1,59 @@ +import xml.etree.ElementTree as ET +import pickle +import os +from os import listdir, getcwd +from os.path import join + +sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')] + +classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + + +def convert(size, box): + dw = 1./(size[0]) + dh = 1./(size[1]) + x = (box[0] + box[1])/2.0 - 1 + y = (box[2] + box[3])/2.0 - 1 + w = box[1] - box[0] + h = box[3] - box[2] + x = x*dw + w = w*dw + y = y*dh + h = h*dh + return (x,y,w,h) + +def convert_annotation(year, image_id): + in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id)) + out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w') + tree=ET.parse(in_file) + root = tree.getroot() + size = root.find('size') + w = int(size.find('width').text) + h = int(size.find('height').text) + + for obj in root.iter('object'): + difficult = obj.find('difficult').text + cls = obj.find('name').text + if cls not in classes or int(difficult)==1: + continue + cls_id = classes.index(cls) + xmlbox = obj.find('bndbox') + b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) + bb = convert((w,h), b) + out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') + +wd = getcwd() + +for year, image_set in sets: + if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)): + os.makedirs('VOCdevkit/VOC%s/labels/'%(year)) + image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split() + list_file = open('%s_%s.txt'%(year, image_set), 'w') + for image_id in image_ids: + list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id)) + convert_annotation(year, image_id) + list_file.close() + +os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt") +os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt") + diff --git a/workloads/realworld/pinned/darknet/src/activation_kernels.cu b/workloads/realworld/pinned/darknet/src/activation_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..659b44fb85fba664e37b6e8d6aa1abee39accdd2 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/activation_kernels.cu @@ -0,0 +1,206 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "activations.h" +#include "cuda_dark.h" +} + + +__device__ float lhtan_activate_kernel(float x) +{ + if(x < 0) return .001f*x; + if(x > 1) return .001f*(x-1.f) + 1.f; + return x; +} +__device__ float lhtan_gradient_kernel(float x) +{ + if(x > 0 && x < 1) return 1; + return .001; +} + +__device__ float hardtan_activate_kernel(float x) +{ + if (x < -1) return -1; + if (x > 1) return 1; + return x; +} +__device__ float linear_activate_kernel(float x){return x;} +__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));} +__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;} +__device__ float relu_activate_kernel(float x){return x*(x>0);} +__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);} +__device__ float selu_activate_kernel(float x){return (x >= 0)*1.0507f*x + (x < 0)*1.0507f*1.6732f*(expf(x)-1);} +__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;} +__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;} +__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;} +__device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);} +__device__ float plse_activate_kernel(float x) +{ + if(x < -4) return .01f * (x + 4); + if(x > 4) return .01f * (x - 4) + 1; + return .125f*x + .5f; +} +__device__ float stair_activate_kernel(float x) +{ + int n = floorf(x); + if (n%2 == 0) return floorf(x/2); + else return (x - n) + floorf(x/2); +} + + +__device__ float hardtan_gradient_kernel(float x) +{ + if (x > -1 && x < 1) return 1; + return 0; +} +__device__ float linear_gradient_kernel(float x){return 1;} +__device__ float logistic_gradient_kernel(float x){return (1-x)*x;} +__device__ float loggy_gradient_kernel(float x) +{ + float y = (x+1)/2; + return 2*(1-y)*y; +} +__device__ float relu_gradient_kernel(float x){return (x>0);} +__device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);} +__device__ float selu_gradient_kernel(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);} +__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;} +__device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;} +__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;} +__device__ float tanh_gradient_kernel(float x){return 1-x*x;} +__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;} +__device__ float stair_gradient_kernel(float x) +{ + if (floorf(x) == x) return 0; + return 1; +} + +__device__ float activate_kernel(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_activate_kernel(x); + case LOGISTIC: + return logistic_activate_kernel(x); + case LOGGY: + return loggy_activate_kernel(x); + case RELU: + return relu_activate_kernel(x); + case ELU: + return elu_activate_kernel(x); + case SELU: + return selu_activate_kernel(x); + case RELIE: + return relie_activate_kernel(x); + case RAMP: + return ramp_activate_kernel(x); + case LEAKY: + return leaky_activate_kernel(x); + case TANH: + return tanh_activate_kernel(x); + case PLSE: + return plse_activate_kernel(x); + case STAIR: + return stair_activate_kernel(x); + case HARDTAN: + return hardtan_activate_kernel(x); + case LHTAN: + return lhtan_activate_kernel(x); + } + return 0; +} + +__device__ float gradient_kernel(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_gradient_kernel(x); + case LOGISTIC: + return logistic_gradient_kernel(x); + case LOGGY: + return loggy_gradient_kernel(x); + case RELU: + return relu_gradient_kernel(x); + case ELU: + return elu_gradient_kernel(x); + case SELU: + return selu_gradient_kernel(x); + case RELIE: + return relie_gradient_kernel(x); + case RAMP: + return ramp_gradient_kernel(x); + case LEAKY: + return leaky_gradient_kernel(x); + case TANH: + return tanh_gradient_kernel(x); + case PLSE: + return plse_gradient_kernel(x); + case STAIR: + return stair_gradient_kernel(x); + case HARDTAN: + return hardtan_gradient_kernel(x); + case LHTAN: + return lhtan_gradient_kernel(x); + } + return 0; +} + +__global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int i = id % s; + int b = id / s; + float x1 = x[b*s + i]; + float x2 = x[b*s + s/2 + i]; + if(id < n) { + float de = dy[id]; + dx[b*s + i] = x2*de; + dx[b*s + s/2 + i] = x1*de; + } +} + +extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y) +{ + binary_gradient_array_kernel<<>>(x, dx, n/2, size, a, y); + check_error(cudaPeekAtLastError()); +} +__global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int i = id % s; + int b = id / s; + float x1 = x[b*s + i]; + float x2 = x[b*s + s/2 + i]; + if(id < n) y[id] = x1*x2; +} + +extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y) +{ + binary_activate_array_kernel<<>>(x, n/2, size, a, y); + check_error(cudaPeekAtLastError()); +} + +__global__ void activate_array_kernel(float *x, int n, ACTIVATION a) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n) x[i] = activate_kernel(x[i], a); +} + +__global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n) delta[i] *= gradient_kernel(x[i], a); +} + +extern "C" void activate_array_gpu(float *x, int n, ACTIVATION a) +{ + activate_array_kernel<<>>(x, n, a); + check_error(cudaPeekAtLastError()); +} + +extern "C" void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta) +{ + gradient_array_kernel<<>>(x, n, a, delta); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/pinned/darknet/src/activation_layer.c b/workloads/realworld/pinned/darknet/src/activation_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..0791772336e4d1b001ed1b76bbbf21ee8d6fa24f --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/activation_layer.c @@ -0,0 +1,63 @@ +#include "activation_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +layer make_activation_layer(int batch, int inputs, ACTIVATION activation) +{ + layer l = {0}; + l.type = ACTIVE; + + l.inputs = inputs; + l.outputs = inputs; + l.batch=batch; + + l.output = calloc(batch*inputs, sizeof(float*)); + l.delta = calloc(batch*inputs, sizeof(float*)); + + l.forward = forward_activation_layer; + l.backward = backward_activation_layer; +#ifdef GPU + l.forward_gpu = forward_activation_layer_gpu; + l.backward_gpu = backward_activation_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); +#endif + l.activation = activation; + fprintf(stderr, "Activation Layer: %d inputs\n", inputs); + return l; +} + +void forward_activation_layer(layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_activation_layer(layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_activation_layer_gpu(layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_activation_layer_gpu(layer l, network net) +{ + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/pinned/darknet/src/activation_layer.h b/workloads/realworld/pinned/darknet/src/activation_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..42118a84e83f59a8997e354959404d1283a3004c --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/activation_layer.h @@ -0,0 +1,19 @@ +#ifndef ACTIVATION_LAYER_H +#define ACTIVATION_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_activation_layer(int batch, int inputs, ACTIVATION activation); + +void forward_activation_layer(layer l, network net); +void backward_activation_layer(layer l, network net); + +#ifdef GPU +void forward_activation_layer_gpu(layer l, network net); +void backward_activation_layer_gpu(layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/pinned/darknet/src/activations.c b/workloads/realworld/pinned/darknet/src/activations.c new file mode 100644 index 0000000000000000000000000000000000000000..da1a17a89b46b6c41fa80b5dd113e1b30c910712 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/activations.c @@ -0,0 +1,150 @@ +#include "activations.h" + +#include +#include +#include +#include + +char *get_activation_string(ACTIVATION a) +{ + switch(a){ + case LOGISTIC: + return "logistic"; + case LOGGY: + return "loggy"; + case RELU: + return "relu"; + case ELU: + return "elu"; + case SELU: + return "selu"; + case RELIE: + return "relie"; + case RAMP: + return "ramp"; + case LINEAR: + return "linear"; + case TANH: + return "tanh"; + case PLSE: + return "plse"; + case LEAKY: + return "leaky"; + case STAIR: + return "stair"; + case HARDTAN: + return "hardtan"; + case LHTAN: + return "lhtan"; + default: + break; + } + return "relu"; +} + +ACTIVATION get_activation(char *s) +{ + if (strcmp(s, "logistic")==0) return LOGISTIC; + if (strcmp(s, "loggy")==0) return LOGGY; + if (strcmp(s, "relu")==0) return RELU; + if (strcmp(s, "elu")==0) return ELU; + if (strcmp(s, "selu")==0) return SELU; + if (strcmp(s, "relie")==0) return RELIE; + if (strcmp(s, "plse")==0) return PLSE; + if (strcmp(s, "hardtan")==0) return HARDTAN; + if (strcmp(s, "lhtan")==0) return LHTAN; + if (strcmp(s, "linear")==0) return LINEAR; + if (strcmp(s, "ramp")==0) return RAMP; + if (strcmp(s, "leaky")==0) return LEAKY; + if (strcmp(s, "tanh")==0) return TANH; + if (strcmp(s, "stair")==0) return STAIR; + fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s); + return RELU; +} + +float activate(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_activate(x); + case LOGISTIC: + return logistic_activate(x); + case LOGGY: + return loggy_activate(x); + case RELU: + return relu_activate(x); + case ELU: + return elu_activate(x); + case SELU: + return selu_activate(x); + case RELIE: + return relie_activate(x); + case RAMP: + return ramp_activate(x); + case LEAKY: + return leaky_activate(x); + case TANH: + return tanh_activate(x); + case PLSE: + return plse_activate(x); + case STAIR: + return stair_activate(x); + case HARDTAN: + return hardtan_activate(x); + case LHTAN: + return lhtan_activate(x); + } + return 0; +} + +void activate_array(float *x, const int n, const ACTIVATION a) +{ + int i; + for(i = 0; i < n; ++i){ + x[i] = activate(x[i], a); + } +} + +float gradient(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_gradient(x); + case LOGISTIC: + return logistic_gradient(x); + case LOGGY: + return loggy_gradient(x); + case RELU: + return relu_gradient(x); + case ELU: + return elu_gradient(x); + case SELU: + return selu_gradient(x); + case RELIE: + return relie_gradient(x); + case RAMP: + return ramp_gradient(x); + case LEAKY: + return leaky_gradient(x); + case TANH: + return tanh_gradient(x); + case PLSE: + return plse_gradient(x); + case STAIR: + return stair_gradient(x); + case HARDTAN: + return hardtan_gradient(x); + case LHTAN: + return lhtan_gradient(x); + } + return 0; +} + +void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta) +{ + int i; + for(i = 0; i < n; ++i){ + delta[i] *= gradient(x[i], a); + } +} + diff --git a/workloads/realworld/pinned/darknet/src/activations.h b/workloads/realworld/pinned/darknet/src/activations.h new file mode 100644 index 0000000000000000000000000000000000000000..eec28d5b692ede3975e01a4d454ace20e8a9fdd8 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/activations.h @@ -0,0 +1,87 @@ +#ifndef ACTIVATIONS_H +#define ACTIVATIONS_H +#include "darknet.h" +#include "cuda_dark.h" +#include "math.h" + +ACTIVATION get_activation(char *s); + +char *get_activation_string(ACTIVATION a); +float activate(float x, ACTIVATION a); +float gradient(float x, ACTIVATION a); +void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta); +void activate_array(float *x, const int n, const ACTIVATION a); +#ifdef GPU +void activate_array_gpu(float *x, int n, ACTIVATION a); +void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta); +#endif + +static inline float stair_activate(float x) +{ + int n = floor(x); + if (n%2 == 0) return floor(x/2.); + else return (x - n) + floor(x/2.); +} +static inline float hardtan_activate(float x) +{ + if (x < -1) return -1; + if (x > 1) return 1; + return x; +} +static inline float linear_activate(float x){return x;} +static inline float logistic_activate(float x){return 1./(1. + exp(-x));} +static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;} +static inline float relu_activate(float x){return x*(x>0);} +static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} +static inline float selu_activate(float x){return (x >= 0)*1.0507*x + (x < 0)*1.0507*1.6732*(exp(x)-1);} +static inline float relie_activate(float x){return (x>0) ? x : .01*x;} +static inline float ramp_activate(float x){return x*(x>0)+.1*x;} +static inline float leaky_activate(float x){return (x>0) ? x : .1*x;} +static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);} +static inline float plse_activate(float x) +{ + if(x < -4) return .01 * (x + 4); + if(x > 4) return .01 * (x - 4) + 1; + return .125*x + .5; +} + +static inline float lhtan_activate(float x) +{ + if(x < 0) return .001*x; + if(x > 1) return .001*(x-1) + 1; + return x; +} +static inline float lhtan_gradient(float x) +{ + if(x > 0 && x < 1) return 1; + return .001; +} + +static inline float hardtan_gradient(float x) +{ + if (x > -1 && x < 1) return 1; + return 0; +} +static inline float linear_gradient(float x){return 1;} +static inline float logistic_gradient(float x){return (1-x)*x;} +static inline float loggy_gradient(float x) +{ + float y = (x+1.)/2.; + return 2*(1-y)*y; +} +static inline float stair_gradient(float x) +{ + if (floor(x) == x) return 0; + return 1; +} +static inline float relu_gradient(float x){return (x>0);} +static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);} +static inline float selu_gradient(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);} +static inline float relie_gradient(float x){return (x>0) ? 1 : .01;} +static inline float ramp_gradient(float x){return (x>0)+.1;} +static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;} +static inline float tanh_gradient(float x){return 1-x*x;} +static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;} + +#endif + diff --git a/workloads/realworld/pinned/darknet/src/avgpool_layer.c b/workloads/realworld/pinned/darknet/src/avgpool_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..c44ce897a10023bab054c9ba53565bcdee165261 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/avgpool_layer.c @@ -0,0 +1,73 @@ +#include "avgpool_layer.h" +#include "cuda_dark.h" +#include + +avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) +{ + fprintf(stderr, "avg %4d x%4d x%4d -> %4d\n", w, h, c, c); + avgpool_layer l = {0}; + l.type = AVGPOOL; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.out_w = 1; + l.out_h = 1; + l.out_c = c; + l.outputs = l.out_c; + l.inputs = h*w*c; + int output_size = l.outputs * batch; + // l.output = calloc(output_size, sizeof(float)); + // l.delta = calloc(output_size, sizeof(float)); + cudaMallocHost(&l.output, output_size*sizeof(float)); + cudaMallocHost(&l.delta, output_size*sizeof(float)); + l.forward = forward_avgpool_layer; + l.backward = backward_avgpool_layer; + #ifdef GPU + l.forward_gpu = forward_avgpool_layer_gpu; + l.backward_gpu = backward_avgpool_layer_gpu; + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); + #endif + return l; +} + +void resize_avgpool_layer(avgpool_layer *l, int w, int h) +{ + l->w = w; + l->h = h; + l->inputs = h*w*l->c; +} + +void forward_avgpool_layer(const avgpool_layer l, network net) +{ + int b,i,k; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < l.c; ++k){ + int out_index = k + b*l.c; + l.output[out_index] = 0; + for(i = 0; i < l.h*l.w; ++i){ + int in_index = i + l.h*l.w*(k + b*l.c); + l.output[out_index] += net.input[in_index]; + } + l.output[out_index] /= l.h*l.w; + } + } +} + +void backward_avgpool_layer(const avgpool_layer l, network net) +{ + int b,i,k; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < l.c; ++k){ + int out_index = k + b*l.c; + for(i = 0; i < l.h*l.w; ++i){ + int in_index = i + l.h*l.w*(k + b*l.c); + net.delta[in_index] += l.delta[out_index] / (l.h*l.w); + } + } + } +} + diff --git a/workloads/realworld/pinned/darknet/src/avgpool_layer.h b/workloads/realworld/pinned/darknet/src/avgpool_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..576ad1db9e9cb87640b0c3f764e2bbfbaae4b2b3 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/avgpool_layer.h @@ -0,0 +1,23 @@ +#ifndef AVGPOOL_LAYER_H +#define AVGPOOL_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +typedef layer avgpool_layer; + +image get_avgpool_image(avgpool_layer l); +avgpool_layer make_avgpool_layer(int batch, int w, int h, int c); +void resize_avgpool_layer(avgpool_layer *l, int w, int h); +void forward_avgpool_layer(const avgpool_layer l, network net); +void backward_avgpool_layer(const avgpool_layer l, network net); + +#ifdef GPU +void forward_avgpool_layer_gpu(avgpool_layer l, network net); +void backward_avgpool_layer_gpu(avgpool_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/pinned/darknet/src/avgpool_layer_kernels.cu b/workloads/realworld/pinned/darknet/src/avgpool_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..55e5ec372d251e1d4b0c501563f9240437595795 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/avgpool_layer_kernels.cu @@ -0,0 +1,61 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "avgpool_layer.h" +#include "cuda_dark.h" +} + +__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int k = id % c; + id /= c; + int b = id; + + int i; + int out_index = (k + c*b); + output[out_index] = 0; + for(i = 0; i < w*h; ++i){ + int in_index = i + h*w*(k + b*c); + output[out_index] += input[in_index]; + } + output[out_index] /= w*h; +} + +__global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int k = id % c; + id /= c; + int b = id; + + int i; + int out_index = (k + c*b); + for(i = 0; i < w*h; ++i){ + int in_index = i + h*w*(k + b*c); + in_delta[in_index] += out_delta[out_index] / (w*h); + } +} + +extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network net) +{ + size_t n = layer.c*layer.batch; + + forward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, net.input_gpu, layer.output_gpu); + check_error(cudaPeekAtLastError()); +} + +extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network net) +{ + size_t n = layer.c*layer.batch; + + backward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, net.delta_gpu, layer.delta_gpu); + check_error(cudaPeekAtLastError()); +} + diff --git a/workloads/realworld/pinned/darknet/src/batchnorm_layer.c b/workloads/realworld/pinned/darknet/src/batchnorm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..ebff387cc4b0365173fb6727efd80ebc80bfbd41 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/batchnorm_layer.c @@ -0,0 +1,279 @@ +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "blas.h" +#include + +layer make_batchnorm_layer(int batch, int w, int h, int c) +{ + fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c); + layer l = {0}; + l.type = BATCHNORM; + l.batch = batch; + l.h = l.out_h = h; + l.w = l.out_w = w; + l.c = l.out_c = c; + l.output = calloc(h * w * c * batch, sizeof(float)); + l.delta = calloc(h * w * c * batch, sizeof(float)); + l.inputs = w*h*c; + l.outputs = l.inputs; + + l.scales = calloc(c, sizeof(float)); + l.scale_updates = calloc(c, sizeof(float)); + l.biases = calloc(c, sizeof(float)); + l.bias_updates = calloc(c, sizeof(float)); + int i; + for(i = 0; i < c; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(c, sizeof(float)); + l.variance = calloc(c, sizeof(float)); + + l.rolling_mean = calloc(c, sizeof(float)); + l.rolling_variance = calloc(c, sizeof(float)); + + l.forward = forward_batchnorm_layer; + l.backward = backward_batchnorm_layer; +#ifdef GPU + l.forward_gpu = forward_batchnorm_layer_gpu; + l.backward_gpu = backward_batchnorm_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, h * w * c * batch); + l.delta_gpu = cuda_make_array(l.delta, h * w * c * batch); + + l.biases_gpu = cuda_make_array(l.biases, c); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, c); + + l.scales_gpu = cuda_make_array(l.scales, c); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, c); + + l.mean_gpu = cuda_make_array(l.mean, c); + l.variance_gpu = cuda_make_array(l.variance, c); + + l.rolling_mean_gpu = cuda_make_array(l.mean, c); + l.rolling_variance_gpu = cuda_make_array(l.variance, c); + + l.mean_delta_gpu = cuda_make_array(l.mean, c); + l.variance_delta_gpu = cuda_make_array(l.variance, c); + + l.x_gpu = cuda_make_array(l.output, l.batch*l.outputs); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*l.outputs); + #ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); + + #endif +#endif + return l; +} + +void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + int i,b,f; + for(f = 0; f < n; ++f){ + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int index = i + size*(f + n*b); + sum += delta[index] * x_norm[index]; + } + } + scale_updates[f] += sum; + } +} + +void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + + int i,j,k; + for(i = 0; i < filters; ++i){ + mean_delta[i] = 0; + for (j = 0; j < batch; ++j) { + for (k = 0; k < spatial; ++k) { + int index = j*filters*spatial + i*spatial + k; + mean_delta[i] += delta[index]; + } + } + mean_delta[i] *= (-1./sqrt(variance[i] + .00001f)); + } +} +void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + + int i,j,k; + for(i = 0; i < filters; ++i){ + variance_delta[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance_delta[i] += delta[index]*(x[index] - mean[i]); + } + } + variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float)(-3./2.)); + } +} +void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + int f, j, k; + for(j = 0; j < batch; ++j){ + for(f = 0; f < filters; ++f){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + f*spatial + k; + delta[index] = delta[index] * 1./(sqrt(variance[f] + .00001f)) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); + } + } + } +} + +void resize_batchnorm_layer(layer *layer, int w, int h) +{ + fprintf(stderr, "Not implemented\n"); +} + +void forward_batchnorm_layer(layer l, network net) +{ + if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1); + if(net.train){ + mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean); + variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance); + + scal_cpu(l.out_c, .99, l.rolling_mean, 1); + axpy_cpu(l.out_c, .01, l.mean, 1, l.rolling_mean, 1); + scal_cpu(l.out_c, .99, l.rolling_variance, 1); + axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1); + + normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w); + copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1); + } else { + normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w); + } + scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w); + add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h*l.out_w); +} + +void backward_batchnorm_layer(layer l, network net) +{ + if(!net.train){ + l.mean = l.rolling_mean; + l.variance = l.rolling_variance; + } + backward_bias(l.bias_updates, l.delta, l.batch, l.out_c, l.out_w*l.out_h); + backward_scale_cpu(l.x_norm, l.delta, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates); + + scale_bias(l.delta, l.scales, l.batch, l.out_c, l.out_h*l.out_w); + + mean_delta_cpu(l.delta, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta); + variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta); + normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.out_c, l.out_w*l.out_h, l.delta); + if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_batchnorm_layer(layer l) +{ + cuda_pull_array(l.scales_gpu, l.scales, l.c); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.c); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.c); +} +void push_batchnorm_layer(layer l) +{ + cuda_push_array(l.scales_gpu, l.scales, l.c); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.c); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.c); +} + +void forward_batchnorm_layer_gpu(layer l, network net) +{ + if(l.type == BATCHNORM) copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); + if (net.train) { +#ifdef CUDNN + float one = 1; + float zero = 0; + cudnnBatchNormalizationForwardTraining(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + l.dstTensorDesc, + l.x_gpu, + l.dstTensorDesc, + l.output_gpu, + l.normTensorDesc, + l.scales_gpu, + l.biases_gpu, + .01, + l.rolling_mean_gpu, + l.rolling_variance_gpu, + .00001, + l.mean_gpu, + l.variance_gpu); +#else + fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu); + fast_variance_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.variance_gpu); + + scal_gpu(l.out_c, .99, l.rolling_mean_gpu, 1); + axpy_gpu(l.out_c, .01, l.mean_gpu, 1, l.rolling_mean_gpu, 1); + scal_gpu(l.out_c, .99, l.rolling_variance_gpu, 1); + axpy_gpu(l.out_c, .01, l.variance_gpu, 1, l.rolling_variance_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); + normalize_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_norm_gpu, 1); + + scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); +#endif + } else { + normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); + scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); + } + +} + +void backward_batchnorm_layer_gpu(layer l, network net) +{ + if(!net.train){ + l.mean_gpu = l.rolling_mean_gpu; + l.variance_gpu = l.rolling_variance_gpu; + } +#ifdef CUDNN + float one = 1; + float zero = 0; + cudnnBatchNormalizationBackward(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + &one, + &one, + l.dstTensorDesc, + l.x_gpu, + l.dstTensorDesc, + l.delta_gpu, + l.dstTensorDesc, + l.x_norm_gpu, + l.normTensorDesc, + l.scales_gpu, + l.scale_updates_gpu, + l.bias_updates_gpu, + .00001, + l.mean_gpu, + l.variance_gpu); + copy_gpu(l.outputs*l.batch, l.x_norm_gpu, 1, l.delta_gpu, 1); +#else + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h); + backward_scale_gpu(l.x_norm_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates_gpu); + + scale_bias_gpu(l.delta_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + + fast_mean_delta_gpu(l.delta_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta_gpu); + fast_variance_delta_gpu(l.x_gpu, l.delta_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta_gpu); + normalize_delta_gpu(l.x_gpu, l.mean_gpu, l.variance_gpu, l.mean_delta_gpu, l.variance_delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu); +#endif + if(l.type == BATCHNORM) copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/pinned/darknet/src/batchnorm_layer.h b/workloads/realworld/pinned/darknet/src/batchnorm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..25a18a3c8f2569bab135b088501248159e1cae11 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/batchnorm_layer.h @@ -0,0 +1,19 @@ +#ifndef BATCHNORM_LAYER_H +#define BATCHNORM_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +layer make_batchnorm_layer(int batch, int w, int h, int c); +void forward_batchnorm_layer(layer l, network net); +void backward_batchnorm_layer(layer l, network net); + +#ifdef GPU +void forward_batchnorm_layer_gpu(layer l, network net); +void backward_batchnorm_layer_gpu(layer l, network net); +void pull_batchnorm_layer(layer l); +void push_batchnorm_layer(layer l); +#endif + +#endif diff --git a/workloads/realworld/pinned/darknet/src/blas.c b/workloads/realworld/pinned/darknet/src/blas.c new file mode 100644 index 0000000000000000000000000000000000000000..9e1604449ba9aeb9decdc7f0395a38bd3b478671 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/blas.c @@ -0,0 +1,351 @@ +#include "blas.h" + +#include +#include +#include +#include +#include +#include +void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int b,i,j,k; + int out_c = c/(stride*stride); + + for(b = 0; b < batch; ++b){ + for(k = 0; k < c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int in_index = i + w*(j + h*(k + c*b)); + int c2 = k % out_c; + int offset = k / out_c; + int w2 = i*stride + offset % stride; + int h2 = j*stride + offset / stride; + int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); + if(forward) out[out_index] = x[in_index]; + else out[in_index] = x[out_index]; + } + } + } + } +} + +void flatten(float *x, int size, int layers, int batch, int forward) +{ + float *swap = calloc(size*layers*batch, sizeof(float)); + int i,c,b; + for(b = 0; b < batch; ++b){ + for(c = 0; c < layers; ++c){ + for(i = 0; i < size; ++i){ + int i1 = b*layers*size + c*size + i; + int i2 = b*layers*size + i*layers + c; + if (forward) swap[i2] = x[i1]; + else swap[i1] = x[i2]; + } + } + } + memcpy(x, swap, size*layers*batch*sizeof(float)); + free(swap); +} + +void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c) +{ + int i; + for(i = 0; i < n; ++i){ + c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); + } +} + +void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc) +{ + int i; + for(i = 0; i < n; ++i){ + if(da) da[i] += dc[i] * s[i]; + if(db) db[i] += dc[i] * (1-s[i]); + ds[i] += dc[i] * (a[i] - b[i]); + } +} + +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int stride = w1/w2; + int sample = w2/w1; + assert(stride == h1/h2); + assert(sample == h2/h1); + if(stride < 1) stride = 1; + if(sample < 1) sample = 1; + int minw = (w1 < w2) ? w1 : w2; + int minh = (h1 < h2) ? h1 : h2; + int minc = (c1 < c2) ? c1 : c2; + + int i,j,k,b; + for(b = 0; b < batch; ++b){ + for(k = 0; k < minc; ++k){ + for(j = 0; j < minh; ++j){ + for(i = 0; i < minw; ++i){ + int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); + int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); + out[out_index] = s1*out[out_index] + s2*add[add_index]; + } + } + } + } +} + +void mean_cpu(float *x, int batch, int filters, int spatial, float *mean) +{ + float scale = 1./(batch * spatial); + int i,j,k; + for(i = 0; i < filters; ++i){ + mean[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + mean[i] += x[index]; + } + } + mean[i] *= scale; + } +} + +void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + float scale = 1./(batch * spatial - 1); + int i,j,k; + for(i = 0; i < filters; ++i){ + variance[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance[i] += pow((x[index] - mean[i]), 2); + } + } + variance[i] *= scale; + } +} + +void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial) +{ + int b,f,i; + for(b = 0; b < batch; ++b){ + for(i = 0; i < spatial; ++i){ + float sum = 0; + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + sum += powf(x[index], 2); + } + sum = sqrtf(sum); + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + x[index] /= sum; + dx[index] = (1 - x[index]) / sum; + } + } + } +} + + +void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + int b, f, i; + for(b = 0; b < batch; ++b){ + for(f = 0; f < filters; ++f){ + for(i = 0; i < spatial; ++i){ + int index = b*filters*spatial + f*spatial + i; + x[index] = (x[index] - mean[f])/(sqrt(variance[f]) + .000001f); + } + } + } +} + +void const_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; +} + +void mul_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] *= X[i*INCX]; +} + +void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] = pow(X[i*INCX], ALPHA); +} + +void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX]; +} + +void scal_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA; +} + +void fill_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; +} + +void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i, j; + int index = 0; + for(j = 0; j < B; ++j) { + for(i = 0; i < NX; ++i){ + if(X) X[j*NX + i] += OUT[index]; + ++index; + } + for(i = 0; i < NY; ++i){ + if(Y) Y[j*NY + i] += OUT[index]; + ++index; + } + } +} + +void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i, j; + int index = 0; + for(j = 0; j < B; ++j) { + for(i = 0; i < NX; ++i){ + OUT[index++] = X[j*NX + i]; + } + for(i = 0; i < NY; ++i){ + OUT[index++] = Y[j*NY + i]; + } + } +} + +void copy_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX]; +} + +void mult_add_into_cpu(int N, float *X, float *Y, float *Z) +{ + int i; + for(i = 0; i < N; ++i) Z[i] += X[i]*Y[i]; +} + +void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + float abs_val = fabs(diff); + if(abs_val < 1) { + error[i] = diff * diff; + delta[i] = diff; + } + else { + error[i] = 2*abs_val - 1; + delta[i] = (diff < 0) ? 1 : -1; + } + } +} + +void l1_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + error[i] = fabs(diff); + delta[i] = diff > 0 ? 1 : -1; + } +} + +void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float t = truth[i]; + float p = pred[i]; + error[i] = (t) ? -log(p) : 0; + delta[i] = t-p; + } +} + +void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float t = truth[i]; + float p = pred[i]; + error[i] = -t*log(p) - (1-t)*log(1-p); + delta[i] = t-p; + } +} + +void l2_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + error[i] = diff * diff; + delta[i] = diff; + } +} + +float dot_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + float dot = 0; + for(i = 0; i < N; ++i) dot += X[i*INCX] * Y[i*INCY]; + return dot; +} + +void softmax(float *input, int n, float temp, int stride, float *output) +{ + int i; + float sum = 0; + float largest = -FLT_MAX; + for(i = 0; i < n; ++i){ + if(input[i*stride] > largest) largest = input[i*stride]; + } + for(i = 0; i < n; ++i){ + float e = exp(input[i*stride]/temp - largest/temp); + sum += e; + output[i*stride] = e; + } + for(i = 0; i < n; ++i){ + output[i*stride] /= sum; + } +} + + +void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + int g, b; + for(b = 0; b < batch; ++b){ + for(g = 0; g < groups; ++g){ + softmax(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); + } + } +} + +void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + int i, j, k, b; + for(b = 0; b < batch; ++b){ + for(k = 0; k < c; ++k){ + for(j = 0; j < h*stride; ++j){ + for(i = 0; i < w*stride; ++i){ + int in_index = b*w*h*c + k*w*h + (j/stride)*w + i/stride; + int out_index = b*w*h*c*stride*stride + k*w*h*stride*stride + j*w*stride + i; + if(forward) out[out_index] = scale*in[in_index]; + else in[in_index] += scale*out[out_index]; + } + } + } + } +} + + diff --git a/workloads/realworld/pinned/darknet/src/blas.h b/workloads/realworld/pinned/darknet/src/blas.h new file mode 100644 index 0000000000000000000000000000000000000000..5d24a9aea70d8050b05098aa7a5634576444a32c --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/blas.h @@ -0,0 +1,105 @@ +#ifndef BLAS_H +#define BLAS_H +#include "darknet.h" + +void flatten(float *x, int size, int layers, int batch, int forward); +void pm(int M, int N, float *A); +float *random_matrix(int rows, int cols); +void time_random_matrix(int TA, int TB, int m, int k, int n); +void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); + +void test_blas(); + +void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void mult_add_into_cpu(int N, float *X, float *Y, float *Z); + +void const_cpu(int N, float ALPHA, float *X, int INCX); +void constrain_gpu(int N, float ALPHA, float * X, int INCX); +void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void mul_cpu(int N, float *X, int INCX, float *Y, int INCY); + +int test_gpu_blas(); +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out); + +void mean_cpu(float *x, int batch, int filters, int spatial, float *mean); +void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); + +void scale_bias(float *output, float *scales, int batch, int n, int size); +void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); +void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); +void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); +void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); +void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial); + +void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error); +void l2_cpu(int n, float *pred, float *truth, float *delta, float *error); +void l1_cpu(int n, float *pred, float *truth, float *delta, float *error); +void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); +void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); +void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c); +void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc); + +void softmax(float *input, int n, float temp, int stride, float *output); +void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); +void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); + +#ifdef GPU +#include "cuda_dark.h" +#include "tree.h" + +void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); +void axpy_gpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); +void copy_gpu(int N, float * X, int INCX, float * Y, int INCY); +void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); +void add_gpu(int N, float ALPHA, float * X, int INCX); +void supp_gpu(int N, float ALPHA, float * X, int INCX); +void mask_gpu(int N, float * X, float mask_num, float * mask, float val); +void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale); +void const_gpu(int N, float ALPHA, float *X, int INCX); +void pow_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void mul_gpu(int N, float *X, int INCX, float *Y, int INCY); + +void mean_gpu(float *x, int batch, int filters, int spatial, float *mean); +void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); +void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); +void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial); + +void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); + +void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); +void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); + +void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); +void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean); +void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out); +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); +void add_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); + +void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error); +void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error); +void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error); +void l2_gpu(int n, float *pred, float *truth, float *delta, float *error); +void l1_gpu(int n, float *pred, float *truth, float *delta, float *error); +void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error); +void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc); +void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c); +void mult_add_into_gpu(int num, float *a, float *b, float *c); +void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT); + +void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); + +void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); +void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t); +void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t); + +void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out); +void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier); +void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); + +#endif +#endif diff --git a/workloads/realworld/pinned/darknet/src/blas_kernels.cu b/workloads/realworld/pinned/darknet/src/blas_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..3db34a057b318e87769058c4b7fdc81f02780a9d --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/blas_kernels.cu @@ -0,0 +1,1035 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" +#include + +extern "C" { +#include "blas.h" +#include "cuda_dark.h" +#include "utils.h" +} + +__global__ void scale_bias_kernel(float *output, float *biases, int n, int size) +{ + int offset = blockIdx.x * blockDim.x + threadIdx.x; + int filter = blockIdx.y; + int batch = blockIdx.z; + + if(offset < size) output[(batch*n+filter)*size + offset] *= biases[filter]; +} + +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size) +{ + dim3 dimGrid((size-1)/BLOCK + 1, n, batch); + dim3 dimBlock(BLOCK, 1, 1); + + scale_bias_kernel<<>>(output, biases, n, size); + check_error(cudaPeekAtLastError()); +} + +__global__ void backward_scale_kernel(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + __shared__ float part[BLOCK]; + int i,b; + int filter = blockIdx.x; + int p = threadIdx.x; + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; i += BLOCK){ + int index = p + i + size*(filter + n*b); + sum += (p+i < size) ? delta[index]*x_norm[index] : 0; + } + } + part[p] = sum; + __syncthreads(); + if (p == 0) { + for(i = 0; i < BLOCK; ++i) scale_updates[filter] += part[i]; + } +} + +void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + backward_scale_kernel<<>>(x_norm, delta, batch, n, size, scale_updates); + check_error(cudaPeekAtLastError()); +} + +__global__ void add_bias_kernel(float *output, float *biases, int batch, int n, int size) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= n*size*batch) return; + int i = index % size; + index /= size; + int j = index % n; + index /= n; + int k = index; + + output[(k*n+j)*size + i] += biases[j]; +} + +void add_bias_gpu(float *output, float *biases, int batch, int n, int size) +{ + int num = n*size*batch; + + add_bias_kernel<<>>(output, biases, batch, n, size); + check_error(cudaPeekAtLastError()); +} + +__global__ void backward_bias_conn_kernel(float *bias_updates, float *delta, int batch, int n) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= n) return; + int b; + float sum = 0; + for(b = 0; b < batch; ++b){ + int i = b*n + index; + sum += delta[i]; + } + bias_updates[index] += sum; +} + +__global__ void backward_bias_kernel(float *bias_updates, float *delta, int batch, int n, int size) +{ + __shared__ float part[BLOCK]; + int i,b; + int filter = blockIdx.x; + int p = threadIdx.x; + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; i += BLOCK){ + int index = p + i + size*(filter + n*b); + sum += (p+i < size) ? delta[index] : 0; + } + } + part[p] = sum; + __syncthreads(); + if (p == 0) { + for(i = 0; i < BLOCK; ++i) bias_updates[filter] += part[i]; + } +} + +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size) +{ + if(size == 1){ + backward_bias_conn_kernel<<>>(bias_updates, delta, batch, n); + }else{ + backward_bias_kernel<<>>(bias_updates, delta, batch, n, size); + } + check_error(cudaPeekAtLastError()); +} + +/* +__global__ void dot_kernel(float *output, float scale, int batch, int n, int size, float *delta) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int f1 = index / n; + int f2 = index % n; + if (f2 <= f1) return; + + float sum = 0; + float norm1 = 0; + float norm2 = 0; + int b, i; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int i1 = b * size * n + f1 * size + i; + int i2 = b * size * n + f2 * size + i; + sum += output[i1] * output[i2]; + norm1 += output[i1] * output[i1]; + norm2 += output[i2] * output[i2]; + } + } + norm1 = sqrt(norm1); + norm2 = sqrt(norm2); + float norm = norm1 * norm2; + sum = sum / norm; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int i1 = b * size * n + f1 * size + i; + int i2 = b * size * n + f2 * size + i; + delta[i1] += - scale * sum * output[i2] / norm; + delta[i2] += - scale * sum * output[i1] / norm; + } + } +} + +void dot_error_gpu(layer l) +{ + dot_kernel<<>>(l.output_gpu, l.dot, l.batch, l.n, l.out_w * l.out_h, l.delta_gpu); + check_error(cudaPeekAtLastError()); +} +*/ + + +__global__ void adam_kernel(int N, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + + float mhat = m[index] / (1.f - powf(B1, t)); + float vhat = v[index] / (1.f - powf(B2, t)); + + x[index] = x[index] + rate * mhat / (sqrtf(vhat) + eps); +} + +extern "C" void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) +{ + adam_kernel<<>>(n, x, m, v, B1, B2, rate, eps, t); + check_error(cudaPeekAtLastError()); +} + +extern "C" void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t) +{ + scal_gpu(n, B1, m, 1); + scal_gpu(n, B2, v, 1); + axpy_gpu(n, -decay*batch, w, 1, d, 1); + + axpy_gpu(n, (1-B1), d, 1, m, 1); + mul_gpu(n, d, 1, d, 1); + axpy_gpu(n, (1-B2), d, 1, v, 1); + + adam_gpu(n, w, m, v, B1, B2, rate, eps, t); + fill_gpu(n, 0, d, 1); +} + +__global__ void normalize_kernel(int N, float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int f = (index/spatial)%filters; + + x[index] = (x[index] - mean[f])/(sqrtf(variance[f] + .00001f)); +} + +__global__ void normalize_delta_kernel(int N, float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int f = (index/spatial)%filters; + + delta[index] = delta[index] * 1.f/(sqrtf(variance[f] + .00001f)) + variance_delta[f] * 2.f * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); +} + +extern "C" void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + size_t N = batch*filters*spatial; + normalize_delta_kernel<<>>(N, x, mean, variance, mean_delta, variance_delta, batch, filters, spatial, delta); + check_error(cudaPeekAtLastError()); +} + +__global__ void variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + variance_delta[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance_delta[i] += delta[index]*(x[index] - mean[i]); + } + } + variance_delta[i] *= -.5f * powf(variance[i] + .00001f, (float)(-3.f/2.f)); +} + +__global__ void accumulate_kernel(float *x, int n, int groups, float *sum) +{ + int k; + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= groups) return; + sum[i] = 0; + for(k = 0; k < n; ++k){ + sum[i] += x[k*groups + i]; + } +} + +__global__ void fast_mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + local[id] += (i+id < spatial) ? delta[index] : 0; + } + } + + __syncthreads(); + + if(id == 0){ + mean_delta[filter] = 0; + for(i = 0; i < threads; ++i){ + mean_delta[filter] += local[i]; + } + mean_delta[filter] *= (-1.f/sqrtf(variance[filter] + .00001f)); + } +} + +__global__ void fast_variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + + local[id] += (i+id < spatial) ? delta[index]*(x[index] - mean[filter]) : 0; + } + } + + __syncthreads(); + + if(id == 0){ + variance_delta[filter] = 0; + for(i = 0; i < threads; ++i){ + variance_delta[filter] += local[i]; + } + variance_delta[filter] *= -.5f * powf(variance[filter] + .00001f, (float)(-3.f/2.f)); + } +} + + +__global__ void mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + mean_delta[i] = 0; + for (j = 0; j < batch; ++j) { + for (k = 0; k < spatial; ++k) { + int index = j*filters*spatial + i*spatial + k; + mean_delta[i] += delta[index]; + } + } + mean_delta[i] *= (-1.f/sqrtf(variance[i] + .00001f)); +} + +extern "C" void mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + mean_delta_kernel<<>>(delta, variance, batch, filters, spatial, mean_delta); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + fast_mean_delta_kernel<<>>(delta, variance, batch, filters, spatial, mean_delta); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + fast_variance_delta_kernel<<>>(x, delta, mean, variance, batch, filters, spatial, variance_delta); + check_error(cudaPeekAtLastError()); +} + +__global__ void mean_kernel(float *x, int batch, int filters, int spatial, float *mean) +{ + float scale = 1.f/(batch * spatial); + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + mean[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + mean[i] += x[index]; + } + } + mean[i] *= scale; +} + +__global__ void variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + float scale = 1.f/(batch * spatial - 1); + int j,k; + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + variance[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance[i] += powf((x[index] - mean[i]), 2); + } + } + variance[i] *= scale; +} + +__global__ void reorg_kernel(int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int in_index = i; + int in_w = i%w; + i = i/w; + int in_h = i%h; + i = i/h; + int in_c = i%c; + i = i/c; + int b = i%batch; + + int out_c = c/(stride*stride); + + int c2 = in_c % out_c; + int offset = in_c / out_c; + int w2 = in_w*stride + offset % stride; + int h2 = in_h*stride + offset / stride; + //printf("%d\n", offset); + int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); + + // printf("%d %d %d\n", w2, h2, c2); + //printf("%d %d\n", in_index, out_index); + //if(out_index >= N || out_index < 0) printf("bad bad bad \n"); + + if(forward) out[out_index] = x[in_index]; + else out[in_index] = x[out_index]; + //if(forward) out[1] = x[1]; + //else out[0] = x[0]; +} + +__global__ void axpy_kernel(int N, float ALPHA, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[OFFY+i*INCY] += ALPHA*X[OFFX+i*INCX]; +} + +__global__ void pow_kernel(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY] = pow(X[i*INCX], ALPHA); +} + +__global__ void const_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = ALPHA; +} + +__global__ void constrain_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = fminf(ALPHA, fmaxf(-ALPHA, X[i*INCX])); +} + +__global__ void supp_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) { + if((X[i*INCX] * X[i*INCX]) < (ALPHA * ALPHA)) X[i*INCX] = 0; + } +} + +__global__ void add_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] += ALPHA; +} + +__global__ void scal_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] *= ALPHA; +} + +__global__ void fill_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = ALPHA; +} + +__global__ void copy_kernel(int N, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY + OFFY] = X[i*INCX + OFFX]; +} + +__global__ void mul_kernel(int N, float *X, int INCX, float *Y, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY] *= X[i*INCX]; +} + + +extern "C" void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + size_t N = batch*filters*spatial; + normalize_kernel<<>>(N, x, mean, variance, batch, filters, spatial); + check_error(cudaPeekAtLastError()); +} + +__global__ void l2norm_kernel(int N, float *x, float *dx, int batch, int filters, int spatial) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int b = index / spatial; + int i = index % spatial; + int f; + float sum = 0; + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + sum += powf(x[index], 2); + } + sum = sqrtf(sum); + if(sum == 0) sum = 1; + //printf("%f\n", sum); + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + x[index] /= sum; + dx[index] = (1 - x[index]) / sum; + } +} + +extern "C" void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial) +{ + size_t N = batch*spatial; + l2norm_kernel<<>>(N, x, dx, batch, filters, spatial); + check_error(cudaPeekAtLastError()); +} + +__global__ void fast_mean_kernel(float *x, int batch, int filters, int spatial, float *mean) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + local[id] += (i+id < spatial) ? x[index] : 0; + } + } + + __syncthreads(); + + if(id == 0){ + mean[filter] = 0; + for(i = 0; i < threads; ++i){ + mean[filter] += local[i]; + } + mean[filter] /= spatial * batch; + } +} + +__global__ void fast_variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + + local[id] += (i+id < spatial) ? powf((x[index] - mean[filter]), 2) : 0; + } + } + + __syncthreads(); + + if(id == 0){ + variance[filter] = 0; + for(i = 0; i < threads; ++i){ + variance[filter] += local[i]; + } + variance[filter] /= (spatial * batch - 1); + } +} + +extern "C" void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean) +{ + fast_mean_kernel<<>>(x, batch, filters, spatial, mean); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + fast_variance_kernel<<>>(x, mean, batch, filters, spatial, variance); + check_error(cudaPeekAtLastError()); +} + + +extern "C" void mean_gpu(float *x, int batch, int filters, int spatial, float *mean) +{ + mean_kernel<<>>(x, batch, filters, spatial, mean); + check_error(cudaPeekAtLastError()); +} + +extern "C" void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + variance_kernel<<>>(x, mean, batch, filters, spatial, variance); + check_error(cudaPeekAtLastError()); +} + +extern "C" void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY) +{ + axpy_gpu_offset(N, ALPHA, X, 0, INCX, Y, 0, INCY); +} + +extern "C" void pow_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY) +{ + pow_kernel<<>>(N, ALPHA, X, INCX, Y, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void axpy_gpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY) +{ + axpy_kernel<<>>(N, ALPHA, X, OFFX, INCX, Y, OFFY, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void copy_gpu(int N, float * X, int INCX, float * Y, int INCY) +{ + copy_gpu_offset(N, X, 0, INCX, Y, 0, INCY); +} + +extern "C" void mul_gpu(int N, float * X, int INCX, float * Y, int INCY) +{ + mul_kernel<<>>(N, X, INCX, Y, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY) +{ + copy_kernel<<>>(N, X, OFFX, INCX, Y, OFFY, INCY); + check_error(cudaPeekAtLastError()); +} + +__global__ void flatten_kernel(int N, float *x, int spatial, int layers, int batch, int forward, float *out) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int in_s = i%spatial; + i = i/spatial; + int in_c = i%layers; + i = i/layers; + int b = i; + + int i1 = b*layers*spatial + in_c*spatial + in_s; + int i2 = b*layers*spatial + in_s*layers + in_c; + + if (forward) out[i2] = x[i1]; + else out[i1] = x[i2]; +} + +extern "C" void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out) +{ + int size = spatial*batch*layers; + flatten_kernel<<>>(size, x, spatial, layers, batch, forward, out); + check_error(cudaPeekAtLastError()); +} + +extern "C" void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int size = w*h*c*batch; + reorg_kernel<<>>(size, x, w, h, c, batch, stride, forward, out); + check_error(cudaPeekAtLastError()); +} + +__global__ void mask_kernel(int n, float *x, float mask_num, float *mask, float val) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n && mask[i] == mask_num) x[i] = val; +} + +extern "C" void mask_gpu(int N, float * X, float mask_num, float * mask, float val) +{ + mask_kernel<<>>(N, X, mask_num, mask, val); + check_error(cudaPeekAtLastError()); +} + +__global__ void scale_mask_kernel(int n, float *x, float mask_num, float *mask, float scale) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n && mask[i] == mask_num) x[i] *= scale; +} + +extern "C" void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale) +{ + scale_mask_kernel<<>>(N, X, mask_num, mask, scale); + check_error(cudaPeekAtLastError()); +} + +extern "C" void const_gpu(int N, float ALPHA, float * X, int INCX) +{ + const_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void constrain_gpu(int N, float ALPHA, float * X, int INCX) +{ + constrain_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + + +extern "C" void add_gpu(int N, float ALPHA, float * X, int INCX) +{ + add_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void scal_gpu(int N, float ALPHA, float * X, int INCX) +{ + scal_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void supp_gpu(int N, float ALPHA, float * X, int INCX) +{ + supp_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fill_gpu(int N, float ALPHA, float * X, int INCX) +{ + fill_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +__global__ void shortcut_kernel(int size, int minw, int minh, int minc, int stride, int sample, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= size) return; + int i = id % minw; + id /= minw; + int j = id % minh; + id /= minh; + int k = id % minc; + id /= minc; + int b = id % batch; + + int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); + int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); + out[out_index] = s1*out[out_index] + s2*add[add_index]; + //out[out_index] += add[add_index]; +} + +extern "C" void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int minw = (w1 < w2) ? w1 : w2; + int minh = (h1 < h2) ? h1 : h2; + int minc = (c1 < c2) ? c1 : c2; + + int stride = w1/w2; + int sample = w2/w1; + assert(stride == h1/h2); + assert(sample == h2/h1); + if(stride < 1) stride = 1; + if(sample < 1) sample = 1; + + int size = batch * minw * minh * minc; + shortcut_kernel<<>>(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, s1, s2, out); + check_error(cudaPeekAtLastError()); +} + +__global__ void smooth_l1_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + float abs_val = fabsf(diff); + if(abs_val < 1) { + error[i] = diff * diff; + delta[i] = diff; + } + else { + error[i] = 2*abs_val - 1; + delta[i] = (diff > 0) ? 1 : -1; + } + } +} + +extern "C" void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + smooth_l1_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void softmax_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float t = truth[i]; + float p = pred[i]; + error[i] = (t) ? -log(p) : 0; + delta[i] = t-p; + } +} + +extern "C" void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + softmax_x_ent_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void logistic_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float t = truth[i]; + float p = pred[i]; + error[i] = -t*log(p+.0000001) - (1-t)*log(1-p+.0000001); + delta[i] = t-p; + } +} + +extern "C" void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + logistic_x_ent_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void l2_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + error[i] = diff * diff; //I know this is technically wrong, deal with it. + delta[i] = diff; + } +} + +extern "C" void l2_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + l2_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void l1_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + error[i] = abs(diff); + delta[i] = (diff > 0) ? 1 : -1; + } +} + +extern "C" void l1_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + l1_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void wgan_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + error[i] = truth[i] ? -pred[i] : pred[i]; + delta[i] = (truth[i] > 0) ? 1 : -1; + } +} + +extern "C" void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + wgan_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + + + + +__global__ void weighted_sum_kernel(int n, float *a, float *b, float *s, float *c) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); + } +} + +__global__ void deinter_kernel(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < (NX+NY)*B){ + int b = i / (NX+NY); + int j = i % (NX+NY); + if (j < NX){ + if(X) X[b*NX + j] += OUT[i]; + } else { + if(Y) Y[b*NY + j - NX] += OUT[i]; + } + } +} + +extern "C" void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + deinter_kernel<<>>(NX, X, NY, Y, B, OUT); + check_error(cudaPeekAtLastError()); +} + +__global__ void inter_kernel(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < (NX+NY)*B){ + int b = i / (NX+NY); + int j = i % (NX+NY); + if (j < NX){ + OUT[i] = X[b*NX + j]; + } else { + OUT[i] = Y[b*NY + j - NX]; + } + } +} + +extern "C" void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + inter_kernel<<>>(NX, X, NY, Y, B, OUT); + check_error(cudaPeekAtLastError()); +} + +extern "C" void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c) +{ + weighted_sum_kernel<<>>(num, a, b, s, c); + check_error(cudaPeekAtLastError()); +} + +__global__ void weighted_delta_kernel(int n, float *a, float *b, float *s, float *da, float *db, float *ds, float *dc) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + if(da) da[i] += dc[i] * s[i]; + if(db) db[i] += dc[i] * (1-s[i]); + ds[i] += dc[i] * (a[i] - b[i]); + } +} + +extern "C" void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc) +{ + weighted_delta_kernel<<>>(num, a, b, s, da, db, ds, dc); + check_error(cudaPeekAtLastError()); +} + +__global__ void mult_add_into_kernel(int n, float *a, float *b, float *c) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + c[i] += a[i]*b[i]; + } +} + +extern "C" void mult_add_into_gpu(int num, float *a, float *b, float *c) +{ + mult_add_into_kernel<<>>(num, a, b, c); + check_error(cudaPeekAtLastError()); +} + + +__device__ void softmax_device(float *input, int n, float temp, int stride, float *output) +{ + int i; + float sum = 0; + float largest = -INFINITY; + for(i = 0; i < n; ++i){ + int val = input[i*stride]; + largest = (val>largest) ? val : largest; + } + for(i = 0; i < n; ++i){ + float e = expf(input[i*stride]/temp - largest/temp); + sum += e; + output[i*stride] = e; + } + for(i = 0; i < n; ++i){ + output[i*stride] /= sum; + } +} + + +__global__ void softmax_tree_kernel(float *input, int spatial, int batch, int stride, float temp, float *output, int groups, int *group_size, int *group_offset) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= spatial*batch*groups) return; + int s = id % spatial; + id = id / spatial; + int g = id % groups; + int b = id / groups; + int goff = group_offset[g]*spatial; + int boff = b*stride; + softmax_device(input + goff + boff + s, group_size[g], temp, spatial, output + goff + boff + s); +} + +extern "C" void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier) +{ + int *tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); + int *tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); + /* + static int *tree_groups_size = 0; + static int *tree_groups_offset = 0; + if(!tree_groups_size){ + tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); + tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); + } + */ + int num = spatial*batch*hier.groups; + softmax_tree_kernel<<>>(input, spatial, batch, stride, temp, output, hier.groups, tree_groups_size, tree_groups_offset); + check_error(cudaPeekAtLastError()); + cuda_free((float *)tree_groups_size); + cuda_free((float *)tree_groups_offset); +} + +__global__ void softmax_kernel(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= batch*groups) return; + int b = id / groups; + int g = id % groups; + softmax_device(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); +} + +extern "C" void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + softmax_kernel<<>>(input, n, batch, batch_offset, groups, group_offset, stride, temp, output); + check_error(cudaPeekAtLastError()); +} + + +__global__ void upsample_kernel(size_t N, float *x, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + size_t i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int out_index = i; + int out_w = i%(w*stride); + i = i/(w*stride); + int out_h = i%(h*stride); + i = i/(h*stride); + int out_c = i%c; + i = i/c; + int b = i%batch; + + int in_w = out_w / stride; + int in_h = out_h / stride; + int in_c = out_c; + + int in_index = b*w*h*c + in_c*w*h + in_h*w + in_w; + + + if(forward) out[out_index] += scale * x[in_index]; + else atomicAdd(x+in_index, scale * out[out_index]); +} +extern "C" void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + size_t size = w*h*c*batch*stride*stride; + upsample_kernel<<>>(size, in, w, h, c, batch, stride, forward, scale, out); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/pinned/darknet/src/box.c b/workloads/realworld/pinned/darknet/src/box.c new file mode 100644 index 0000000000000000000000000000000000000000..8a1772c9ae05dede6ddc83d9b6465f64cf974ae8 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/box.c @@ -0,0 +1,357 @@ +#include "box.h" +#include +#include +#include + +int nms_comparator(const void *pa, const void *pb) +{ + detection a = *(detection *)pa; + detection b = *(detection *)pb; + float diff = 0; + if(b.sort_class >= 0){ + diff = a.prob[b.sort_class] - b.prob[b.sort_class]; + } else { + diff = a.objectness - b.objectness; + } + if(diff < 0) return 1; + else if(diff > 0) return -1; + return 0; +} + +void do_nms_obj(detection *dets, int total, int classes, float thresh) +{ + int i, j, k; + k = total-1; + for(i = 0; i <= k; ++i){ + if(dets[i].objectness == 0){ + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k+1; + + for(i = 0; i < total; ++i){ + dets[i].sort_class = -1; + } + + qsort(dets, total, sizeof(detection), nms_comparator); + for(i = 0; i < total; ++i){ + if(dets[i].objectness == 0) continue; + box a = dets[i].bbox; + for(j = i+1; j < total; ++j){ + if(dets[j].objectness == 0) continue; + box b = dets[j].bbox; + if (box_iou(a, b) > thresh){ + dets[j].objectness = 0; + for(k = 0; k < classes; ++k){ + dets[j].prob[k] = 0; + } + } + } + } +} + + +void do_nms_sort(detection *dets, int total, int classes, float thresh) +{ + int i, j, k; + k = total-1; + for(i = 0; i <= k; ++i){ + if(dets[i].objectness == 0){ + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k+1; + + for(k = 0; k < classes; ++k){ + for(i = 0; i < total; ++i){ + dets[i].sort_class = k; + } + qsort(dets, total, sizeof(detection), nms_comparator); + for(i = 0; i < total; ++i){ + if(dets[i].prob[k] == 0) continue; + box a = dets[i].bbox; + for(j = i+1; j < total; ++j){ + box b = dets[j].bbox; + if (box_iou(a, b) > thresh){ + dets[j].prob[k] = 0; + } + } + } + } +} + +box float_to_box(float *f, int stride) +{ + box b = {0}; + b.x = f[0]; + b.y = f[1*stride]; + b.w = f[2*stride]; + b.h = f[3*stride]; + return b; +} + +dbox derivative(box a, box b) +{ + dbox d; + d.dx = 0; + d.dw = 0; + float l1 = a.x - a.w/2; + float l2 = b.x - b.w/2; + if (l1 > l2){ + d.dx -= 1; + d.dw += .5; + } + float r1 = a.x + a.w/2; + float r2 = b.x + b.w/2; + if(r1 < r2){ + d.dx += 1; + d.dw += .5; + } + if (l1 > r2) { + d.dx = -1; + d.dw = 0; + } + if (r1 < l2){ + d.dx = 1; + d.dw = 0; + } + + d.dy = 0; + d.dh = 0; + float t1 = a.y - a.h/2; + float t2 = b.y - b.h/2; + if (t1 > t2){ + d.dy -= 1; + d.dh += .5; + } + float b1 = a.y + a.h/2; + float b2 = b.y + b.h/2; + if(b1 < b2){ + d.dy += 1; + d.dh += .5; + } + if (t1 > b2) { + d.dy = -1; + d.dh = 0; + } + if (b1 < t2){ + d.dy = 1; + d.dh = 0; + } + return d; +} + +float overlap(float x1, float w1, float x2, float w2) +{ + float l1 = x1 - w1/2; + float l2 = x2 - w2/2; + float left = l1 > l2 ? l1 : l2; + float r1 = x1 + w1/2; + float r2 = x2 + w2/2; + float right = r1 < r2 ? r1 : r2; + return right - left; +} + +float box_intersection(box a, box b) +{ + float w = overlap(a.x, a.w, b.x, b.w); + float h = overlap(a.y, a.h, b.y, b.h); + if(w < 0 || h < 0) return 0; + float area = w*h; + return area; +} + +float box_union(box a, box b) +{ + float i = box_intersection(a, b); + float u = a.w*a.h + b.w*b.h - i; + return u; +} + +float box_iou(box a, box b) +{ + return box_intersection(a, b)/box_union(a, b); +} + +float box_rmse(box a, box b) +{ + return sqrt(pow(a.x-b.x, 2) + + pow(a.y-b.y, 2) + + pow(a.w-b.w, 2) + + pow(a.h-b.h, 2)); +} + +dbox dintersect(box a, box b) +{ + float w = overlap(a.x, a.w, b.x, b.w); + float h = overlap(a.y, a.h, b.y, b.h); + dbox dover = derivative(a, b); + dbox di; + + di.dw = dover.dw*h; + di.dx = dover.dx*h; + di.dh = dover.dh*w; + di.dy = dover.dy*w; + + return di; +} + +dbox dunion(box a, box b) +{ + dbox du; + + dbox di = dintersect(a, b); + du.dw = a.h - di.dw; + du.dh = a.w - di.dh; + du.dx = -di.dx; + du.dy = -di.dy; + + return du; +} + + +void test_dunion() +{ + box a = {0, 0, 1, 1}; + box dxa= {0+.0001, 0, 1, 1}; + box dya= {0, 0+.0001, 1, 1}; + box dwa= {0, 0, 1+.0001, 1}; + box dha= {0, 0, 1, 1+.0001}; + + box b = {.5, .5, .2, .2}; + dbox di = dunion(a,b); + printf("Union: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); + float inter = box_union(a, b); + float xinter = box_union(dxa, b); + float yinter = box_union(dya, b); + float winter = box_union(dwa, b); + float hinter = box_union(dha, b); + xinter = (xinter - inter)/(.0001); + yinter = (yinter - inter)/(.0001); + winter = (winter - inter)/(.0001); + hinter = (hinter - inter)/(.0001); + printf("Union Manual %f %f %f %f\n", xinter, yinter, winter, hinter); +} +void test_dintersect() +{ + box a = {0, 0, 1, 1}; + box dxa= {0+.0001, 0, 1, 1}; + box dya= {0, 0+.0001, 1, 1}; + box dwa= {0, 0, 1+.0001, 1}; + box dha= {0, 0, 1, 1+.0001}; + + box b = {.5, .5, .2, .2}; + dbox di = dintersect(a,b); + printf("Inter: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); + float inter = box_intersection(a, b); + float xinter = box_intersection(dxa, b); + float yinter = box_intersection(dya, b); + float winter = box_intersection(dwa, b); + float hinter = box_intersection(dha, b); + xinter = (xinter - inter)/(.0001); + yinter = (yinter - inter)/(.0001); + winter = (winter - inter)/(.0001); + hinter = (hinter - inter)/(.0001); + printf("Inter Manual %f %f %f %f\n", xinter, yinter, winter, hinter); +} + +void test_box() +{ + test_dintersect(); + test_dunion(); + box a = {0, 0, 1, 1}; + box dxa= {0+.00001, 0, 1, 1}; + box dya= {0, 0+.00001, 1, 1}; + box dwa= {0, 0, 1+.00001, 1}; + box dha= {0, 0, 1, 1+.00001}; + + box b = {.5, 0, .2, .2}; + + float iou = box_iou(a,b); + iou = (1-iou)*(1-iou); + printf("%f\n", iou); + dbox d = diou(a, b); + printf("%f %f %f %f\n", d.dx, d.dy, d.dw, d.dh); + + float xiou = box_iou(dxa, b); + float yiou = box_iou(dya, b); + float wiou = box_iou(dwa, b); + float hiou = box_iou(dha, b); + xiou = ((1-xiou)*(1-xiou) - iou)/(.00001); + yiou = ((1-yiou)*(1-yiou) - iou)/(.00001); + wiou = ((1-wiou)*(1-wiou) - iou)/(.00001); + hiou = ((1-hiou)*(1-hiou) - iou)/(.00001); + printf("manual %f %f %f %f\n", xiou, yiou, wiou, hiou); +} + +dbox diou(box a, box b) +{ + float u = box_union(a,b); + float i = box_intersection(a,b); + dbox di = dintersect(a,b); + dbox du = dunion(a,b); + dbox dd = {0,0,0,0}; + + if(i <= 0 || 1) { + dd.dx = b.x - a.x; + dd.dy = b.y - a.y; + dd.dw = b.w - a.w; + dd.dh = b.h - a.h; + return dd; + } + + dd.dx = 2*pow((1-(i/u)),1)*(di.dx*u - du.dx*i)/(u*u); + dd.dy = 2*pow((1-(i/u)),1)*(di.dy*u - du.dy*i)/(u*u); + dd.dw = 2*pow((1-(i/u)),1)*(di.dw*u - du.dw*i)/(u*u); + dd.dh = 2*pow((1-(i/u)),1)*(di.dh*u - du.dh*i)/(u*u); + return dd; +} + + +void do_nms(box *boxes, float **probs, int total, int classes, float thresh) +{ + int i, j, k; + for(i = 0; i < total; ++i){ + int any = 0; + for(k = 0; k < classes; ++k) any = any || (probs[i][k] > 0); + if(!any) { + continue; + } + for(j = i+1; j < total; ++j){ + if (box_iou(boxes[i], boxes[j]) > thresh){ + for(k = 0; k < classes; ++k){ + if (probs[i][k] < probs[j][k]) probs[i][k] = 0; + else probs[j][k] = 0; + } + } + } + } +} + +box encode_box(box b, box anchor) +{ + box encode; + encode.x = (b.x - anchor.x) / anchor.w; + encode.y = (b.y - anchor.y) / anchor.h; + encode.w = log2(b.w / anchor.w); + encode.h = log2(b.h / anchor.h); + return encode; +} + +box decode_box(box b, box anchor) +{ + box decode; + decode.x = b.x * anchor.w + anchor.x; + decode.y = b.y * anchor.h + anchor.y; + decode.w = pow(2., b.w) * anchor.w; + decode.h = pow(2., b.h) * anchor.h; + return decode; +} diff --git a/workloads/realworld/pinned/darknet/src/box.h b/workloads/realworld/pinned/darknet/src/box.h new file mode 100644 index 0000000000000000000000000000000000000000..dda3e59100c3d9e0a6bb05a80070155d9fcbc876 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/box.h @@ -0,0 +1,14 @@ +#ifndef BOX_H +#define BOX_H +#include "darknet.h" + +typedef struct{ + float dx, dy, dw, dh; +} dbox; + +float box_rmse(box a, box b); +dbox diou(box a, box b); +box decode_box(box b, box anchor); +box encode_box(box b, box anchor); + +#endif diff --git a/workloads/realworld/pinned/darknet/src/classifier.h b/workloads/realworld/pinned/darknet/src/classifier.h new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/classifier.h @@ -0,0 +1 @@ + diff --git a/workloads/realworld/pinned/darknet/src/col2im.c b/workloads/realworld/pinned/darknet/src/col2im.c new file mode 100644 index 0000000000000000000000000000000000000000..5c4605e197439f79fe05c41337a5f2b8103f63ba --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/col2im.c @@ -0,0 +1,39 @@ +#include +#include +void col2im_add_pixel(float *im, int height, int width, int channels, + int row, int col, int channel, int pad, float val) +{ + row -= pad; + col -= pad; + + if (row < 0 || col < 0 || + row >= height || col >= width) return; + im[col + width*(row + height*channel)] += val; +} +//This one might be too, can't remember. +void col2im_cpu(float* data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_im) +{ + int c,h,w; + int height_col = (height + 2*pad - ksize) / stride + 1; + int width_col = (width + 2*pad - ksize) / stride + 1; + + int channels_col = channels * ksize * ksize; + for (c = 0; c < channels_col; ++c) { + int w_offset = c % ksize; + int h_offset = (c / ksize) % ksize; + int c_im = c / ksize / ksize; + for (h = 0; h < height_col; ++h) { + for (w = 0; w < width_col; ++w) { + int im_row = h_offset + h * stride; + int im_col = w_offset + w * stride; + int col_index = (c * height_col + h) * width_col + w; + double val = data_col[col_index]; + col2im_add_pixel(data_im, height, width, channels, + im_row, im_col, c_im, pad, val); + } + } + } +} + diff --git a/workloads/realworld/pinned/darknet/src/col2im.h b/workloads/realworld/pinned/darknet/src/col2im.h new file mode 100644 index 0000000000000000000000000000000000000000..3fbe05307db65a1f511f801670a23734e21b7dff --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/col2im.h @@ -0,0 +1,13 @@ +#ifndef COL2IM_H +#define COL2IM_H + +void col2im_cpu(float* data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_im); + +#ifdef GPU +void col2im_gpu(float *data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_im); +#endif +#endif diff --git a/workloads/realworld/pinned/darknet/src/col2im_kernels.cu b/workloads/realworld/pinned/darknet/src/col2im_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..30ea71e2c6ac0bb81235729c37568abbaa987d3d --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/col2im_kernels.cu @@ -0,0 +1,58 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "col2im.h" +#include "cuda_dark.h" +} + +// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu +// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE + +__global__ void col2im_gpu_kernel(const int n, const float* data_col, + const int height, const int width, const int ksize, + const int pad, + const int stride, + const int height_col, const int width_col, + float *data_im) { + int index = blockIdx.x*blockDim.x+threadIdx.x; + for(; index < n; index += blockDim.x*gridDim.x){ + float val = 0; + int w = index % width + pad; + int h = (index / width) % height + pad; + int c = index / (width * height); + // compute the start and end of the output + int w_col_start = (w < ksize) ? 0 : (w - ksize) / stride + 1; + int w_col_end = min(w / stride + 1, width_col); + int h_col_start = (h < ksize) ? 0 : (h - ksize) / stride + 1; + int h_col_end = min(h / stride + 1, height_col); + // equivalent implementation + int offset = + (c * ksize * ksize + h * ksize + w) * height_col * width_col; + int coeff_h_col = (1 - stride * ksize * height_col) * width_col; + int coeff_w_col = (1 - stride * height_col * width_col); + for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { + for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { + val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col]; + } + } + data_im[index] += val; + } +} + +void col2im_gpu(float *data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_im){ + // We are going to launch channels * height_col * width_col kernels, each + // kernel responsible for copying a single-channel grid. + int height_col = (height + 2 * pad - ksize) / stride + 1; + int width_col = (width + 2 * pad - ksize) / stride + 1; + int num_kernels = channels * height * width; + col2im_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, + BLOCK>>>( + num_kernels, data_col, height, width, ksize, pad, + stride, height_col, + width_col, data_im); +} + diff --git a/workloads/realworld/pinned/darknet/src/compare.c b/workloads/realworld/pinned/darknet/src/compare.c new file mode 100644 index 0000000000000000000000000000000000000000..d2d2b3bdc675cf808f483d1607550e072e245396 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/compare.c @@ -0,0 +1,352 @@ +#include + +#include "network.h" +#include "detection_layer.h" +#include "cost_layer.h" +#include "utils.h" +#include "parser.h" +#include "box.h" + +void train_compare(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + list *plist = get_paths("data/compare.train.list"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + printf("%d\n", N); + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.classes = 20; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = COMPARE_DATA; + + load_thread = load_data_in_thread(args); + int epoch = *net.seen/N; + int i = 0; + while(1){ + ++i; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%.3f: %f, %f avg, %lf seconds, %ld images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen); + free_data(train); + if(i%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_%d_minor_%d.weights",backup_directory,base, epoch, i); + save_weights(net, buff); + } + if(*net.seen/N > epoch){ + epoch = *net.seen/N; + i = 0; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + if(epoch%22 == 0) net.learning_rate *= .1; + } + } + pthread_join(load_thread, 0); + free_data(buffer); + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_compare(char *filename, char *weightfile) +{ + int i = 0; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + + list *plist = get_paths("data/compare.val.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size/2; + free_list(plist); + + clock_t time; + int correct = 0; + int total = 0; + int splits = 10; + int num = (i+1)*N/splits - i*N/splits; + + data val, buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.classes = 20; + args.n = num; + args.m = 0; + args.d = &buffer; + args.type = COMPARE_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(i = 1; i <= splits; ++i){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + num = (i+1)*N/splits - i*N/splits; + char **part = paths+(i*N/splits); + if(i != splits){ + args.paths = part; + load_thread = load_data_in_thread(args); + } + printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + int j,k; + for(j = 0; j < val.y.rows; ++j){ + for(k = 0; k < 20; ++k){ + if(val.y.vals[j][k*2] != val.y.vals[j][k*2+1]){ + ++total; + if((val.y.vals[j][k*2] < val.y.vals[j][k*2+1]) == (pred.vals[j][k*2] < pred.vals[j][k*2+1])){ + ++correct; + } + } + } + } + free_matrix(pred); + printf("%d: Acc: %f, %lf seconds, %d images\n", i, (float)correct/total, sec(clock()-time), val.X.rows); + free_data(val); + } +} + +typedef struct { + network net; + char *filename; + int class; + int classes; + float elo; + float *elos; +} sortable_bbox; + +int total_compares = 0; +int current_class = 0; + +int elo_comparator(const void*a, const void *b) +{ + sortable_bbox box1 = *(sortable_bbox*)a; + sortable_bbox box2 = *(sortable_bbox*)b; + if(box1.elos[current_class] == box2.elos[current_class]) return 0; + if(box1.elos[current_class] > box2.elos[current_class]) return -1; + return 1; +} + +int bbox_comparator(const void *a, const void *b) +{ + ++total_compares; + sortable_bbox box1 = *(sortable_bbox*)a; + sortable_bbox box2 = *(sortable_bbox*)b; + network net = box1.net; + int class = box1.class; + + image im1 = load_image_color(box1.filename, net.w, net.h); + image im2 = load_image_color(box2.filename, net.w, net.h); + float *X = calloc(net.w*net.h*net.c, sizeof(float)); + memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); + memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); + float *predictions = network_predict(net, X); + + free_image(im1); + free_image(im2); + free(X); + if (predictions[class*2] > predictions[class*2+1]){ + return 1; + } + return -1; +} + +void bbox_update(sortable_bbox *a, sortable_bbox *b, int class, int result) +{ + int k = 32; + float EA = 1./(1+pow(10, (b->elos[class] - a->elos[class])/400.)); + float EB = 1./(1+pow(10, (a->elos[class] - b->elos[class])/400.)); + float SA = result ? 1 : 0; + float SB = result ? 0 : 1; + a->elos[class] += k*(SA - EA); + b->elos[class] += k*(SB - EB); +} + +void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, int class) +{ + image im1 = load_image_color(a->filename, net.w, net.h); + image im2 = load_image_color(b->filename, net.w, net.h); + float *X = calloc(net.w*net.h*net.c, sizeof(float)); + memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); + memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); + float *predictions = network_predict(net, X); + ++total_compares; + + int i; + for(i = 0; i < classes; ++i){ + if(class < 0 || class == i){ + int result = predictions[i*2] > predictions[i*2+1]; + bbox_update(a, b, i, result); + } + } + + free_image(im1); + free_image(im2); + free(X); +} + +void SortMaster3000(char *filename, char *weightfile) +{ + int i = 0; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + set_batch_network(&net, 1); + + list *plist = get_paths("data/compare.sort.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + free_list(plist); + sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); + printf("Sorting %d boxes...\n", N); + for(i = 0; i < N; ++i){ + boxes[i].filename = paths[i]; + boxes[i].net = net; + boxes[i].class = 7; + boxes[i].elo = 1500; + } + clock_t time=clock(); + qsort(boxes, N, sizeof(sortable_bbox), bbox_comparator); + for(i = 0; i < N; ++i){ + printf("%s\n", boxes[i].filename); + } + printf("Sorted in %d compares, %f secs\n", total_compares, sec(clock()-time)); +} + +void BattleRoyaleWithCheese(char *filename, char *weightfile) +{ + int classes = 20; + int i,j; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + set_batch_network(&net, 1); + + list *plist = get_paths("data/compare.sort.list"); + //list *plist = get_paths("data/compare.small.list"); + //list *plist = get_paths("data/compare.cat.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + int total = N; + free_list(plist); + sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); + printf("Battling %d boxes...\n", N); + for(i = 0; i < N; ++i){ + boxes[i].filename = paths[i]; + boxes[i].net = net; + boxes[i].classes = classes; + boxes[i].elos = calloc(classes, sizeof(float));; + for(j = 0; j < classes; ++j){ + boxes[i].elos[j] = 1500; + } + } + int round; + clock_t time=clock(); + for(round = 1; round <= 4; ++round){ + clock_t round_time=clock(); + printf("Round: %d\n", round); + shuffle(boxes, N, sizeof(sortable_bbox)); + for(i = 0; i < N/2; ++i){ + bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, -1); + } + printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N); + } + + int class; + + for (class = 0; class < classes; ++class){ + + N = total; + current_class = class; + qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); + N /= 2; + + for(round = 1; round <= 100; ++round){ + clock_t round_time=clock(); + printf("Round: %d\n", round); + + sorta_shuffle(boxes, N, sizeof(sortable_bbox), 10); + for(i = 0; i < N/2; ++i){ + bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, class); + } + qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); + if(round <= 20) N = (N*9/10)/2*2; + + printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N); + } + char buff[256]; + sprintf(buff, "results/battle_%d.log", class); + FILE *outfp = fopen(buff, "w"); + for(i = 0; i < N; ++i){ + fprintf(outfp, "%s %f\n", boxes[i].filename, boxes[i].elos[class]); + } + fclose(outfp); + } + printf("Tournament in %d compares, %f secs\n", total_compares, sec(clock()-time)); +} + +void run_compare(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + //char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_compare(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_compare(cfg, weights); + else if(0==strcmp(argv[2], "sort")) SortMaster3000(cfg, weights); + else if(0==strcmp(argv[2], "battle")) BattleRoyaleWithCheese(cfg, weights); + /* + else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); + else if(0==strcmp(argv[2], "extract")) extract_boxes(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_recall(cfg, weights); + */ +} diff --git a/workloads/realworld/pinned/darknet/src/connected_layer.c b/workloads/realworld/pinned/darknet/src/connected_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..ec25b44d998661c4735cd9a8a86f2355a0ae0080 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/connected_layer.c @@ -0,0 +1,336 @@ +#include "connected_layer.h" +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam) +{ + int i; + layer l = {0}; + l.learning_rate_scale = 1; + l.type = CONNECTED; + + l.inputs = inputs; + l.outputs = outputs; + l.batch=batch; + l.batch_normalize = batch_normalize; + l.h = 1; + l.w = 1; + l.c = inputs; + l.out_h = 1; + l.out_w = 1; + l.out_c = outputs; + + l.output = calloc(batch*outputs, sizeof(float)); + l.delta = calloc(batch*outputs, sizeof(float)); + + l.weight_updates = calloc(inputs*outputs, sizeof(float)); + l.bias_updates = calloc(outputs, sizeof(float)); + + l.weights = calloc(outputs*inputs, sizeof(float)); + l.biases = calloc(outputs, sizeof(float)); + + l.forward = forward_connected_layer; + l.backward = backward_connected_layer; + l.update = update_connected_layer; + + //float scale = 1./sqrt(inputs); + float scale = sqrt(2./inputs); + for(i = 0; i < outputs*inputs; ++i){ + l.weights[i] = scale*rand_uniform(-1, 1); + } + + for(i = 0; i < outputs; ++i){ + l.biases[i] = 0; + } + + if(adam){ + l.m = calloc(l.inputs*l.outputs, sizeof(float)); + l.v = calloc(l.inputs*l.outputs, sizeof(float)); + l.bias_m = calloc(l.outputs, sizeof(float)); + l.scale_m = calloc(l.outputs, sizeof(float)); + l.bias_v = calloc(l.outputs, sizeof(float)); + l.scale_v = calloc(l.outputs, sizeof(float)); + } + if(batch_normalize){ + l.scales = calloc(outputs, sizeof(float)); + l.scale_updates = calloc(outputs, sizeof(float)); + for(i = 0; i < outputs; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(outputs, sizeof(float)); + l.mean_delta = calloc(outputs, sizeof(float)); + l.variance = calloc(outputs, sizeof(float)); + l.variance_delta = calloc(outputs, sizeof(float)); + + l.rolling_mean = calloc(outputs, sizeof(float)); + l.rolling_variance = calloc(outputs, sizeof(float)); + + l.x = calloc(batch*outputs, sizeof(float)); + l.x_norm = calloc(batch*outputs, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_connected_layer_gpu; + l.backward_gpu = backward_connected_layer_gpu; + l.update_gpu = update_connected_layer_gpu; + + l.weights_gpu = cuda_make_array(l.weights, outputs*inputs); + l.biases_gpu = cuda_make_array(l.biases, outputs); + + l.weight_updates_gpu = cuda_make_array(l.weight_updates, outputs*inputs); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, outputs); + + l.output_gpu = cuda_make_array(l.output, outputs*batch); + l.delta_gpu = cuda_make_array(l.delta, outputs*batch); + if (adam) { + l.m_gpu = cuda_make_array(0, inputs*outputs); + l.v_gpu = cuda_make_array(0, inputs*outputs); + l.bias_m_gpu = cuda_make_array(0, outputs); + l.bias_v_gpu = cuda_make_array(0, outputs); + l.scale_m_gpu = cuda_make_array(0, outputs); + l.scale_v_gpu = cuda_make_array(0, outputs); + } + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(l.mean, outputs); + l.variance_gpu = cuda_make_array(l.variance, outputs); + + l.rolling_mean_gpu = cuda_make_array(l.mean, outputs); + l.rolling_variance_gpu = cuda_make_array(l.variance, outputs); + + l.mean_delta_gpu = cuda_make_array(l.mean, outputs); + l.variance_delta_gpu = cuda_make_array(l.variance, outputs); + + l.scales_gpu = cuda_make_array(l.scales, outputs); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, outputs); + + l.x_gpu = cuda_make_array(l.output, l.batch*outputs); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*outputs); +#ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); +#endif + } +#endif + l.activation = activation; + fprintf(stderr, "connected %4d -> %4d\n", inputs, outputs); + return l; +} + +void update_connected_layer(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.outputs, momentum, l.bias_updates, 1); + + if(l.batch_normalize){ + axpy_cpu(l.outputs, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.outputs, momentum, l.scale_updates, 1); + } + + axpy_cpu(l.inputs*l.outputs, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(l.inputs*l.outputs, momentum, l.weight_updates, 1); +} + +void forward_connected_layer(layer l, network net) +{ + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + int m = l.batch; + int k = l.inputs; + int n = l.outputs; + float *a = net.input; + float *b = l.weights; + float *c = l.output; + gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); + if(l.batch_normalize){ + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.outputs, 1); + } + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_connected_layer(layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.outputs, 1); + } + + int m = l.outputs; + int k = l.batch; + int n = l.inputs; + float *a = l.delta; + float *b = net.input; + float *c = l.weight_updates; + gemm(1,0,m,n,k,1,a,m,b,n,1,c,n); + + m = l.batch; + k = l.outputs; + n = l.inputs; + + a = l.delta; + b = l.weights; + c = net.delta; + + if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); +} + + +void denormalize_connected_layer(layer l) +{ + int i, j; + for(i = 0; i < l.outputs; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .000001); + for(j = 0; j < l.inputs; ++j){ + l.weights[i*l.inputs + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + + +void statistics_connected_layer(layer l) +{ + if(l.batch_normalize){ + printf("Scales "); + print_statistics(l.scales, l.outputs); + /* + printf("Rolling Mean "); + print_statistics(l.rolling_mean, l.outputs); + printf("Rolling Variance "); + print_statistics(l.rolling_variance, l.outputs); + */ + } + printf("Biases "); + print_statistics(l.biases, l.outputs); + printf("Weights "); + print_statistics(l.weights, l.outputs); +} + +#ifdef GPU + +void pull_connected_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.inputs*l.outputs); + cuda_pull_array(l.biases_gpu, l.biases, l.outputs); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.outputs); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs); + } +} + +void push_connected_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.inputs*l.outputs); + cuda_push_array(l.biases_gpu, l.biases, l.outputs); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.outputs); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs); + } +} + +void update_connected_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.inputs*l.outputs, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.outputs, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.outputs, batch, a.t); + } + }else{ + axpy_gpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.outputs, momentum, l.bias_updates_gpu, 1); + + if(l.batch_normalize){ + axpy_gpu(l.outputs, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.outputs, momentum, l.scale_updates_gpu, 1); + } + + axpy_gpu(l.inputs*l.outputs, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.inputs*l.outputs, momentum, l.weight_updates_gpu, 1); + } +} + +void forward_connected_layer_gpu(layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + + int m = l.batch; + int k = l.inputs; + int n = l.outputs; + float * a = net.input_gpu; + float * b = l.weights_gpu; + float * c = l.output_gpu; + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.outputs, 1); + } + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_connected_layer_gpu(layer l, network net) +{ + constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.outputs, 1); + } + + int m = l.outputs; + int k = l.batch; + int n = l.inputs; + float * a = l.delta_gpu; + float * b = net.input_gpu; + float * c = l.weight_updates_gpu; + gemm_gpu(1,0,m,n,k,1,a,m,b,n,1,c,n); + + m = l.batch; + k = l.outputs; + n = l.inputs; + + a = l.delta_gpu; + b = l.weights_gpu; + c = net.delta_gpu; + + if(c) gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); +} +#endif diff --git a/workloads/realworld/pinned/darknet/src/connected_layer.h b/workloads/realworld/pinned/darknet/src/connected_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..6727a964eaa923906b202ff337aa69ad91817117 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/connected_layer.h @@ -0,0 +1,23 @@ +#ifndef CONNECTED_LAYER_H +#define CONNECTED_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam); + +void forward_connected_layer(layer l, network net); +void backward_connected_layer(layer l, network net); +void update_connected_layer(layer l, update_args a); + +#ifdef GPU +void forward_connected_layer_gpu(layer l, network net); +void backward_connected_layer_gpu(layer l, network net); +void update_connected_layer_gpu(layer l, update_args a); +void push_connected_layer(layer l); +void pull_connected_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/pinned/darknet/src/convolutional_kernels.cu b/workloads/realworld/pinned/darknet/src/convolutional_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..ed9d76e49548e4636c545d4e5d33ddc9b63e5905 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/convolutional_kernels.cu @@ -0,0 +1,330 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "gemm.h" +#include "blas.h" +#include "im2col.h" +#include "col2im.h" +#include "utils.h" +#include "cuda_dark.h" +} + +__global__ void binarize_kernel(float *x, int n, float *binary) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= n) return; + binary[i] = (x[i] >= 0) ? 1 : -1; +} + +void binarize_gpu(float *x, int n, float *binary) +{ + binarize_kernel<<>>(x, n, binary); + check_error(cudaPeekAtLastError()); +} + +__global__ void binarize_input_kernel(float *input, int n, int size, float *binary) +{ + int s = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (s >= size) return; + int i = 0; + float mean = 0; + for(i = 0; i < n; ++i){ + mean += fabsf(input[i*size + s]); + } + mean = mean / n; + for(i = 0; i < n; ++i){ + binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean; + } +} + +void binarize_input_gpu(float *input, int n, int size, float *binary) +{ + binarize_input_kernel<<>>(input, n, size, binary); + check_error(cudaPeekAtLastError()); +} + + +__global__ void binarize_weights_kernel(float *weights, int n, int size, float *binary) +{ + int f = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (f >= n) return; + int i = 0; + float mean = 0; + for(i = 0; i < size; ++i){ + mean += fabsf(weights[f*size + i]); + } + mean = mean / size; + for(i = 0; i < size; ++i){ + binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean; + //binary[f*size + i] = weights[f*size + i]; + } +} + +void binarize_weights_gpu(float *weights, int n, int size, float *binary) +{ + binarize_weights_kernel<<>>(weights, n, size, binary); + check_error(cudaPeekAtLastError()); +} + +void forward_convolutional_layer_gpu(convolutional_layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + if(l.binary){ + binarize_weights_gpu(l.weights_gpu, l.n, l.c/l.groups*l.size*l.size, l.binary_weights_gpu); + swap_binary(&l); + } + + if(l.xnor){ + binarize_weights_gpu(l.weights_gpu, l.n, l.c/l.groups*l.size*l.size, l.binary_weights_gpu); + swap_binary(&l); + binarize_gpu(net.input_gpu, l.c*l.h*l.w*l.batch, l.binary_input_gpu); + net.input_gpu = l.binary_input_gpu; + } + +#ifdef CUDNN + float one = 1; + cudnnConvolutionForward(cudnn_handle(), + &one, + l.srcTensorDesc, + net.input_gpu, + l.weightDesc, + l.weights_gpu, + l.convDesc, + l.fw_algo, + net.workspace, + l.workspace_size, + &one, + l.dstTensorDesc, + l.output_gpu); + +#else + int i, j; + int m = l.n/l.groups; + int k = l.size*l.size*l.c/l.groups; + int n = l.out_w*l.out_h; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.weights_gpu + j*l.nweights/l.groups; + float *b = net.workspace; + float *c = l.output_gpu + (i*l.groups + j)*n*m; + float *im = net.input_gpu + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if (l.size == 1){ + b = im; + } else { + im2col_gpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + } + gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +#endif + + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); + } + + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); + //if(l.dot > 0) dot_error_gpu(l); + if(l.binary || l.xnor) swap_binary(&l); +} + +__global__ void smooth_kernel(float *x, int n, int w, int h, int c, int size, float rate, float *delta) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int j = id % w; + id /= w; + int i = id % h; + id /= h; + int k = id % c; + id /= c; + int b = id; + + int w_offset = -(size/2.f); + int h_offset = -(size/2.f); + + int out_index = j + w*(i + h*(k + c*b)); + int l, m; + for(l = 0; l < size; ++l){ + for(m = 0; m < size; ++m){ + int cur_h = h_offset + i + l; + int cur_w = w_offset + j + m; + int index = cur_w + w*(cur_h + h*(k + b*c)); + int valid = (cur_h >= 0 && cur_h < h && + cur_w >= 0 && cur_w < w); + delta[out_index] += valid ? rate*(x[index] - x[out_index]) : 0; + } + } +} + +extern "C" void smooth_layer(layer l, int size, float rate) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.out_c; + + size_t n = h*w*c*l.batch; + + smooth_kernel<<>>(l.output_gpu, n, l.w, l.h, l.c, size, rate, l.delta_gpu); + check_error(cudaPeekAtLastError()); +} + +void backward_convolutional_layer_gpu(convolutional_layer l, network net) +{ + if(l.smooth){ + smooth_layer(l, 5, l.smooth); + } + //constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + + + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); + } + float *original_input = net.input_gpu; + + if(l.xnor) net.input_gpu = l.binary_input_gpu; +#ifdef CUDNN + float one = 1; + cudnnConvolutionBackwardFilter(cudnn_handle(), + &one, + l.srcTensorDesc, + net.input_gpu, + l.ddstTensorDesc, + l.delta_gpu, + l.convDesc, + l.bf_algo, + net.workspace, + l.workspace_size, + &one, + l.dweightDesc, + l.weight_updates_gpu); + + if(net.delta_gpu){ + if(l.binary || l.xnor) swap_binary(&l); + cudnnConvolutionBackwardData(cudnn_handle(), + &one, + l.weightDesc, + l.weights_gpu, + l.ddstTensorDesc, + l.delta_gpu, + l.convDesc, + l.bd_algo, + net.workspace, + l.workspace_size, + &one, + l.dsrcTensorDesc, + net.delta_gpu); + if(l.binary || l.xnor) swap_binary(&l); + if(l.xnor) gradient_array_gpu(original_input, l.batch*l.c*l.h*l.w, HARDTAN, net.delta_gpu); + } + +#else + int m = l.n/l.groups; + int n = l.size*l.size*l.c/l.groups; + int k = l.out_w*l.out_h; + + int i, j; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.delta_gpu + (i*l.groups + j)*m*k; + float *b = net.workspace; + float *c = l.weight_updates_gpu + j*l.nweights/l.groups; + + float *im = net.input_gpu+(i*l.groups + j)*l.c/l.groups*l.h*l.w; + float *imd = net.delta_gpu+(i*l.groups + j)*l.c/l.groups*l.h*l.w; + + im2col_gpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (net.delta_gpu) { + if (l.binary || l.xnor) swap_binary(&l); + a = l.weights_gpu + j*l.nweights/l.groups; + b = l.delta_gpu + (i*l.groups + j)*m*k; + c = net.workspace; + if (l.size == 1) { + c = imd; + } + + gemm_gpu(1,0,n,k,m,1,a,n,b,k,0,c,k); + + if (l.size != 1) { + col2im_gpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd); + } + if(l.binary || l.xnor) { + swap_binary(&l); + } + } + if(l.xnor) gradient_array_gpu(original_input + i*l.c*l.h*l.w, l.c*l.h*l.w, HARDTAN, net.delta_gpu + i*l.c*l.h*l.w); + } + } +#endif +} + +void pull_convolutional_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.nweights); + cuda_pull_array(l.biases_gpu, l.biases, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.n); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void push_convolutional_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.nweights); + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.n); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void update_convolutional_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + } + }else{ + axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1); + + axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.n, momentum, l.bias_updates_gpu, 1); + + if(l.scales_gpu){ + axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.n, momentum, l.scale_updates_gpu, 1); + } + } + if(l.clip){ + constrain_gpu(l.nweights, l.clip, l.weights_gpu, 1); + } +} + + diff --git a/workloads/realworld/pinned/darknet/src/convolutional_layer.c b/workloads/realworld/pinned/darknet/src/convolutional_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..51418599b0f261802896640354d66fad807c6f72 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/convolutional_layer.c @@ -0,0 +1,646 @@ +#include "convolutional_layer.h" +#include "utils.h" +#include "batchnorm_layer.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" +#include +#include + +#ifdef AI2 +#include "xnor_layer.h" +#endif + +void swap_binary(convolutional_layer *l) +{ + float *swap = l->weights; + l->weights = l->binary_weights; + l->binary_weights = swap; + +#ifdef GPU + swap = l->weights_gpu; + l->weights_gpu = l->binary_weights_gpu; + l->binary_weights_gpu = swap; +#endif +} + +void binarize_weights(float *weights, int n, int size, float *binary) +{ + int i, f; + for(f = 0; f < n; ++f){ + float mean = 0; + for(i = 0; i < size; ++i){ + mean += fabs(weights[f*size + i]); + } + mean = mean / size; + for(i = 0; i < size; ++i){ + binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean; + } + } +} + +void binarize_cpu(float *input, int n, float *binary) +{ + int i; + for(i = 0; i < n; ++i){ + binary[i] = (input[i] > 0) ? 1 : -1; + } +} + +void binarize_input(float *input, int n, int size, float *binary) +{ + int i, s; + for(s = 0; s < size; ++s){ + float mean = 0; + for(i = 0; i < n; ++i){ + mean += fabs(input[i*size + s]); + } + mean = mean / n; + for(i = 0; i < n; ++i){ + binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean; + } + } +} + +int convolutional_out_height(convolutional_layer l) +{ + return (l.h + 2*l.pad - l.size) / l.stride + 1; +} + +int convolutional_out_width(convolutional_layer l) +{ + return (l.w + 2*l.pad - l.size) / l.stride + 1; +} + +image get_convolutional_image(convolutional_layer l) +{ + return float_to_image(l.out_w,l.out_h,l.out_c,l.output); +} + +image get_convolutional_delta(convolutional_layer l) +{ + return float_to_image(l.out_w,l.out_h,l.out_c,l.delta); +} + +static size_t get_workspace_size(layer l){ +#ifdef CUDNN + if(gpu_index >= 0){ + size_t most = 0; + size_t s = 0; + cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle(), + l.srcTensorDesc, + l.weightDesc, + l.convDesc, + l.dstTensorDesc, + l.fw_algo, + &s); + if (s > most) most = s; + cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnn_handle(), + l.srcTensorDesc, + l.ddstTensorDesc, + l.convDesc, + l.dweightDesc, + l.bf_algo, + &s); + if (s > most) most = s; + cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(), + l.weightDesc, + l.ddstTensorDesc, + l.convDesc, + l.dsrcTensorDesc, + l.bd_algo, + &s); + if (s > most) most = s; + return most; + } +#endif + return (size_t)l.out_h*l.out_w*l.size*l.size*l.c/l.groups*sizeof(float); +} + +#ifdef GPU +#ifdef CUDNN +void cudnn_convolutional_setup(layer *l) +{ + cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); + cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + + cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + + cudnnSetFilter4dDescriptor(l->dweightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); + cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); + #if CUDNN_MAJOR >= 6 + cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT); + #else + cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION); + #endif + + #if CUDNN_MAJOR >= 7 + cudnnSetConvolutionGroupCount(l->convDesc, l->groups); + #else + if(l->groups > 1){ + error("CUDNN < 7 doesn't support groups, please upgrade!"); + } + #endif + + cudnnGetConvolutionForwardAlgorithm(cudnn_handle(), + l->srcTensorDesc, + l->weightDesc, + l->convDesc, + l->dstTensorDesc, + CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->fw_algo); + cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(), + l->weightDesc, + l->ddstTensorDesc, + l->convDesc, + l->dsrcTensorDesc, + CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->bd_algo); + cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(), + l->srcTensorDesc, + l->ddstTensorDesc, + l->convDesc, + l->dweightDesc, + CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->bf_algo); +} +#endif +#endif + +convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam) +{ + int i; + convolutional_layer l = {0}; + l.type = CONVOLUTIONAL; + + l.groups = groups; + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.binary = binary; + l.xnor = xnor; + l.batch = batch; + l.stride = stride; + l.size = size; + l.pad = padding; + l.batch_normalize = batch_normalize; + + // l.weights = calloc(c/groups*n*size*size, sizeof(float)); + // l.weight_updates = calloc(c/groups*n*size*size, sizeof(float)); + + // l.biases = calloc(n, sizeof(float)); + // l.bias_updates = calloc(n, sizeof(float)); + + cudaMallocHost(&l.weights, c/groups*n*size*size*sizeof(float)); + cudaMallocHost(&l.weight_updates, c/groups*n*size*size* sizeof(float)); + + cudaMallocHost(&l.biases, n*sizeof(float)); + cudaMallocHost(&l.bias_updates, n*sizeof(float)); + + l.nweights = c/groups*n*size*size; + l.nbiases = n; + + // float scale = 1./sqrt(size*size*c); + float scale = sqrt(2./(size*size*c/l.groups)); + //printf("convscale %f\n", scale); + //scale = .02; + //for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1); + for(i = 0; i < l.nweights; ++i) l.weights[i] = scale*rand_normal(); + int out_w = convolutional_out_width(l); + int out_h = convolutional_out_height(l); + l.out_h = out_h; + l.out_w = out_w; + l.out_c = n; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = l.w * l.h * l.c; + + // l.output = calloc(l.batch*l.outputs, sizeof(float)); + // l.delta = calloc(l.batch*l.outputs, sizeof(float)); + + cudaMallocHost(&l.output, l.batch*l.outputs*sizeof(float)); + cudaMallocHost(&l.delta, l.batch*l.outputs*sizeof(float)); + + l.forward = forward_convolutional_layer; + l.backward = backward_convolutional_layer; + l.update = update_convolutional_layer; + if(binary){ + l.binary_weights = calloc(l.nweights, sizeof(float)); + l.cweights = calloc(l.nweights, sizeof(char)); + l.scales = calloc(n, sizeof(float)); + } + if(xnor){ + l.binary_weights = calloc(l.nweights, sizeof(float)); + l.binary_input = calloc(l.inputs*l.batch, sizeof(float)); + } + + if(batch_normalize){ + // l.scales = calloc(n, sizeof(float)); + // l.scale_updates = calloc(n, sizeof(float)); + + cudaMallocHost(&l.scales, n*sizeof(float)); + cudaMallocHost(&l.scale_updates, n*sizeof(float)); + + for(i = 0; i < n; ++i){ + l.scales[i] = 1; + } + + // l.mean = calloc(n, sizeof(float)); + // l.variance = calloc(n, sizeof(float)); + + cudaMallocHost(&l.mean, n*sizeof(float)); + cudaMallocHost(&l.variance, n*sizeof(float)); + + // l.mean_delta = calloc(n, sizeof(float)); + // l.variance_delta = calloc(n, sizeof(float)); + + cudaMallocHost(&l.mean_delta, n*sizeof(float)); + cudaMallocHost(&l.variance_delta, n*sizeof(float)); + + // l.rolling_mean = calloc(n, sizeof(float)); + // l.rolling_variance = calloc(n, sizeof(float)); + // l.x = calloc(l.batch*l.outputs, sizeof(float)); + // l.x_norm = calloc(l.batch*l.outputs, sizeof(float)); + + cudaMallocHost(&l.rolling_mean, n*sizeof(float)); + cudaMallocHost(&l.rolling_variance, n*sizeof(float)); + cudaMallocHost(&l.x, l.batch*l.outputs*sizeof(float)); + cudaMallocHost(&l.x_norm, l.batch*l.outputs*sizeof(float)); + } + if(adam){ + l.m = calloc(l.nweights, sizeof(float)); + l.v = calloc(l.nweights, sizeof(float)); + l.bias_m = calloc(n, sizeof(float)); + l.scale_m = calloc(n, sizeof(float)); + l.bias_v = calloc(n, sizeof(float)); + l.scale_v = calloc(n, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_convolutional_layer_gpu; + l.backward_gpu = backward_convolutional_layer_gpu; + l.update_gpu = update_convolutional_layer_gpu; + + if(gpu_index >= 0){ + if (adam) { + l.m_gpu = cuda_make_array(l.m, l.nweights); + l.v_gpu = cuda_make_array(l.v, l.nweights); + l.bias_m_gpu = cuda_make_array(l.bias_m, n); + l.bias_v_gpu = cuda_make_array(l.bias_v, n); + l.scale_m_gpu = cuda_make_array(l.scale_m, n); + l.scale_v_gpu = cuda_make_array(l.scale_v, n); + } + + l.weights_gpu = cuda_make_array(l.weights, l.nweights); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights); + + l.biases_gpu = cuda_make_array(l.biases, n); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + + if(binary){ + l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights); + } + if(xnor){ + l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights); + l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch); + } + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(l.mean, n); + l.variance_gpu = cuda_make_array(l.variance, n); + + l.rolling_mean_gpu = cuda_make_array(l.mean, n); + l.rolling_variance_gpu = cuda_make_array(l.variance, n); + + l.mean_delta_gpu = cuda_make_array(l.mean, n); + l.variance_delta_gpu = cuda_make_array(l.variance, n); + + l.scales_gpu = cuda_make_array(l.scales, n); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); + + l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + } +#ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.srcTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnCreateFilterDescriptor(&l.weightDesc); + cudnnCreateTensorDescriptor(&l.dsrcTensorDesc); + cudnnCreateTensorDescriptor(&l.ddstTensorDesc); + cudnnCreateFilterDescriptor(&l.dweightDesc); + cudnnCreateConvolutionDescriptor(&l.convDesc); + cudnn_convolutional_setup(&l); +#endif + } +#endif + l.workspace_size = get_workspace_size(l); + l.activation = activation; + + fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BFLOPs\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, (2.0 * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w)/1000000000.); + + return l; +} + +void denormalize_convolutional_layer(convolutional_layer l) +{ + int i, j; + for(i = 0; i < l.n; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001); + for(j = 0; j < l.c/l.groups*l.size*l.size; ++j){ + l.weights[i*l.c/l.groups*l.size*l.size + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + +/* +void test_convolutional_layer() +{ + convolutional_layer l = make_convolutional_layer(1, 5, 5, 3, 2, 5, 2, 1, LEAKY, 1, 0, 0, 0); + l.batch_normalize = 1; + float data[] = {1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3}; + //net.input = data; + //forward_convolutional_layer(l); +} +*/ + +void resize_convolutional_layer(convolutional_layer *l, int w, int h) +{ + l->w = w; + l->h = h; + int out_w = convolutional_out_width(*l); + int out_h = convolutional_out_height(*l); + + l->out_w = out_w; + l->out_h = out_h; + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->w * l->h * l->c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + if(l->batch_normalize){ + l->x = realloc(l->x, l->batch*l->outputs*sizeof(float)); + l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float)); + } + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); + + if(l->batch_normalize){ + cuda_free(l->x_gpu); + cuda_free(l->x_norm_gpu); + + l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); + } +#ifdef CUDNN + cudnn_convolutional_setup(l); +#endif +#endif + l->workspace_size = get_workspace_size(*l); +} + +void add_bias(float *output, float *biases, int batch, int n, int size) +{ + int i,j,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + for(j = 0; j < size; ++j){ + output[(b*n + i)*size + j] += biases[i]; + } + } + } +} + +void scale_bias(float *output, float *scales, int batch, int n, int size) +{ + int i,j,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + for(j = 0; j < size; ++j){ + output[(b*n + i)*size + j] *= scales[i]; + } + } + } +} + +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size) +{ + int i,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + bias_updates[i] += sum_array(delta+size*(i+b*n), size); + } + } +} + +void forward_convolutional_layer(convolutional_layer l, network net) +{ + int i, j; + + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + + if(l.xnor){ + binarize_weights(l.weights, l.n, l.c/l.groups*l.size*l.size, l.binary_weights); + swap_binary(&l); + binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input); + net.input = l.binary_input; + } + + int m = l.n/l.groups; + int k = l.size*l.size*l.c/l.groups; + int n = l.out_w*l.out_h; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.weights + j*l.nweights/l.groups; + float *b = net.workspace; + float *c = l.output + (i*l.groups + j)*n*m; + float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if (l.size == 1) { + b = im; + } else { + im2col_cpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + } + gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } + + if(l.batch_normalize){ + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w); + } + + activate_array(l.output, l.outputs*l.batch, l.activation); + if(l.binary || l.xnor) swap_binary(&l); +} + +void backward_convolutional_layer(convolutional_layer l, network net) +{ + int i, j; + int m = l.n/l.groups; + int n = l.size*l.size*l.c/l.groups; + int k = l.out_w*l.out_h; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.n, k); + } + + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.delta + (i*l.groups + j)*m*k; + float *b = net.workspace; + float *c = l.weight_updates + j*l.nweights/l.groups; + + float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + float *imd = net.delta + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if(l.size == 1){ + b = im; + } else { + im2col_cpu(im, l.c/l.groups, l.h, l.w, + l.size, l.stride, l.pad, b); + } + + gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (net.delta) { + a = l.weights + j*l.nweights/l.groups; + b = l.delta + (i*l.groups + j)*m*k; + c = net.workspace; + if (l.size == 1) { + c = imd; + } + + gemm(1,0,n,k,m,1,a,n,b,k,0,c,k); + + if (l.size != 1) { + col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd); + } + } + } + } +} + +void update_convolutional_layer(convolutional_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.n, momentum, l.bias_updates, 1); + + if(l.scales){ + axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.n, momentum, l.scale_updates, 1); + } + + axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(l.nweights, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(l.nweights, momentum, l.weight_updates, 1); +} + + +image get_convolutional_weight(convolutional_layer l, int i) +{ + int h = l.size; + int w = l.size; + int c = l.c/l.groups; + return float_to_image(w,h,c,l.weights+i*h*w*c); +} + +void rgbgr_weights(convolutional_layer l) +{ + int i; + for(i = 0; i < l.n; ++i){ + image im = get_convolutional_weight(l, i); + if (im.c == 3) { + rgbgr_image(im); + } + } +} + +void rescale_weights(convolutional_layer l, float scale, float trans) +{ + int i; + for(i = 0; i < l.n; ++i){ + image im = get_convolutional_weight(l, i); + if (im.c == 3) { + scale_image(im, scale); + float sum = sum_array(im.data, im.w*im.h*im.c); + l.biases[i] += sum*trans; + } + } +} + +image *get_weights(convolutional_layer l) +{ + image *weights = calloc(l.n, sizeof(image)); + int i; + for(i = 0; i < l.n; ++i){ + weights[i] = copy_image(get_convolutional_weight(l, i)); + normalize_image(weights[i]); + /* + char buff[256]; + sprintf(buff, "filter%d", i); + save_image(weights[i], buff); + */ + } + //error("hey"); + return weights; +} + +image *visualize_convolutional_layer(convolutional_layer l, char *window, image *prev_weights) +{ + image *single_weights = get_weights(l); + show_images(single_weights, l.n, window); + + image delta = get_convolutional_image(l); + image dc = collapse_image_layers(delta, 1); + char buff[256]; + sprintf(buff, "%s: Output", window); + //show_image(dc, buff); + //save_image(dc, buff); + free_image(dc); + return single_weights; +} + diff --git a/workloads/realworld/pinned/darknet/src/convolutional_layer.h b/workloads/realworld/pinned/darknet/src/convolutional_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..baacf38f4127a42abe009ef8aa3b59543433a286 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/convolutional_layer.h @@ -0,0 +1,50 @@ +#ifndef CONVOLUTIONAL_LAYER_H +#define CONVOLUTIONAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +typedef layer convolutional_layer; + +#ifdef GPU +void forward_convolutional_layer_gpu(convolutional_layer layer, network net); +void backward_convolutional_layer_gpu(convolutional_layer layer, network net); +void update_convolutional_layer_gpu(convolutional_layer layer, update_args a); + +void push_convolutional_layer(convolutional_layer layer); +void pull_convolutional_layer(convolutional_layer layer); + +void add_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); +void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t); +#ifdef CUDNN +void cudnn_convolutional_setup(layer *l); +#endif +#endif + +convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam); +void resize_convolutional_layer(convolutional_layer *layer, int w, int h); +void forward_convolutional_layer(const convolutional_layer layer, network net); +void update_convolutional_layer(convolutional_layer layer, update_args a); +image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_weights); +void binarize_weights(float *weights, int n, int size, float *binary); +void swap_binary(convolutional_layer *l); +void binarize_weights2(float *weights, int n, int size, char *binary, float *scales); + +void backward_convolutional_layer(convolutional_layer layer, network net); + +void add_bias(float *output, float *biases, int batch, int n, int size); +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); + +image get_convolutional_image(convolutional_layer layer); +image get_convolutional_delta(convolutional_layer layer); +image get_convolutional_weight(convolutional_layer layer, int i); + +int convolutional_out_height(convolutional_layer layer); +int convolutional_out_width(convolutional_layer layer); + +#endif + diff --git a/workloads/realworld/pinned/darknet/src/cost_layer.c b/workloads/realworld/pinned/darknet/src/cost_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..85fa85daf306dda03c113a6bbdc2d92b25d0b00d --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/cost_layer.c @@ -0,0 +1,176 @@ +#include "cost_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include +#include +#include +#include + +COST_TYPE get_cost_type(char *s) +{ + if (strcmp(s, "seg")==0) return SEG; + if (strcmp(s, "sse")==0) return SSE; + if (strcmp(s, "masked")==0) return MASKED; + if (strcmp(s, "smooth")==0) return SMOOTH; + if (strcmp(s, "L1")==0) return L1; + if (strcmp(s, "wgan")==0) return WGAN; + fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s); + return SSE; +} + +char *get_cost_string(COST_TYPE a) +{ + switch(a){ + case SEG: + return "seg"; + case SSE: + return "sse"; + case MASKED: + return "masked"; + case SMOOTH: + return "smooth"; + case L1: + return "L1"; + case WGAN: + return "wgan"; + } + return "sse"; +} + +cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale) +{ + fprintf(stderr, "cost %4d\n", inputs); + cost_layer l = {0}; + l.type = COST; + + l.scale = scale; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.cost_type = cost_type; + l.delta = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_cost_layer; + l.backward = backward_cost_layer; + #ifdef GPU + l.forward_gpu = forward_cost_layer_gpu; + l.backward_gpu = backward_cost_layer_gpu; + + l.delta_gpu = cuda_make_array(l.output, inputs*batch); + l.output_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void resize_cost_layer(cost_layer *l, int inputs) +{ + l->inputs = inputs; + l->outputs = inputs; + l->delta = realloc(l->delta, inputs*l->batch*sizeof(float)); + l->output = realloc(l->output, inputs*l->batch*sizeof(float)); +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch); + l->output_gpu = cuda_make_array(l->output, inputs*l->batch); +#endif +} + +void forward_cost_layer(cost_layer l, network net) +{ + if (!net.truth) return; + if(l.cost_type == MASKED){ + int i; + for(i = 0; i < l.batch*l.inputs; ++i){ + if(net.truth[i] == SECRET_NUM) net.input[i] = SECRET_NUM; + } + } + if(l.cost_type == SMOOTH){ + smooth_l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + }else if(l.cost_type == L1){ + l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + } else { + l2_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + } + l.cost[0] = sum_array(l.output, l.batch*l.inputs); +} + +void backward_cost_layer(const cost_layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_cost_layer(cost_layer l) +{ + cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +void push_cost_layer(cost_layer l) +{ + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +int float_abs_compare (const void * a, const void * b) +{ + float fa = *(const float*) a; + if(fa < 0) fa = -fa; + float fb = *(const float*) b; + if(fb < 0) fb = -fb; + return (fa > fb) - (fa < fb); +} + +void forward_cost_layer_gpu(cost_layer l, network net) +{ + if (!net.truth) return; + if(l.smooth){ + scal_gpu(l.batch*l.inputs, (1-l.smooth), net.truth_gpu, 1); + add_gpu(l.batch*l.inputs, l.smooth * 1./l.inputs, net.truth_gpu, 1); + } + + if(l.cost_type == SMOOTH){ + smooth_l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else if (l.cost_type == L1){ + l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else if (l.cost_type == WGAN){ + wgan_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else { + l2_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } + + if (l.cost_type == SEG && l.noobject_scale != 1) { + scale_mask_gpu(l.batch*l.inputs, l.delta_gpu, 0, net.truth_gpu, l.noobject_scale); + scale_mask_gpu(l.batch*l.inputs, l.output_gpu, 0, net.truth_gpu, l.noobject_scale); + } + if (l.cost_type == MASKED) { + mask_gpu(l.batch*l.inputs, net.delta_gpu, SECRET_NUM, net.truth_gpu, 0); + } + + if(l.ratio){ + cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); + qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare); + int n = (1-l.ratio) * l.batch*l.inputs; + float thresh = l.delta[n]; + thresh = 0; + printf("%f\n", thresh); + supp_gpu(l.batch*l.inputs, thresh, l.delta_gpu, 1); + } + + if(l.thresh){ + supp_gpu(l.batch*l.inputs, l.thresh*1./l.inputs, l.delta_gpu, 1); + } + + cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs); + l.cost[0] = sum_array(l.output, l.batch*l.inputs); +} + +void backward_cost_layer_gpu(const cost_layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/pinned/darknet/src/cost_layer.h b/workloads/realworld/pinned/darknet/src/cost_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..ceb64de00bf66839c2f34852a05ea71114608a35 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/cost_layer.h @@ -0,0 +1,20 @@ +#ifndef COST_LAYER_H +#define COST_LAYER_H +#include "layer.h" +#include "network.h" + +typedef layer cost_layer; + +COST_TYPE get_cost_type(char *s); +char *get_cost_string(COST_TYPE a); +cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale); +void forward_cost_layer(const cost_layer l, network net); +void backward_cost_layer(const cost_layer l, network net); +void resize_cost_layer(cost_layer *l, int inputs); + +#ifdef GPU +void forward_cost_layer_gpu(cost_layer l, network net); +void backward_cost_layer_gpu(const cost_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/pinned/darknet/src/cpu_timestamps.c b/workloads/realworld/pinned/darknet/src/cpu_timestamps.c new file mode 100644 index 0000000000000000000000000000000000000000..35114479c7a9cce3debe2204b6886ad5528041d5 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/cpu_timestamps.c @@ -0,0 +1,20 @@ +#include "cpu_timestamps.h" + +void startCPU() { + struct timespec tv; + if(clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + startCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); +} + + + +void endCPU() { + struct timespec tv; + if(clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + + endCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); + //endCPUTimestamp1 = std::chrono::system_clock::now(); + printf("CPU_Times,%lu,%lu,%lu\n", startCPUTime, endCPUTime, endCPUTime-startCPUTime); +} diff --git a/workloads/realworld/pinned/darknet/src/cpu_timestamps.h b/workloads/realworld/pinned/darknet/src/cpu_timestamps.h new file mode 100644 index 0000000000000000000000000000000000000000..e53e995a5603b4610759c02a4a179eb9f0124e48 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/cpu_timestamps.h @@ -0,0 +1,21 @@ +#ifndef CPU_TIMESTAMP_ +#define CPU_TIMESTAMP_ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +static uint64_t startCPUTime; +static uint64_t endCPUTime; + +void startCPU(); +void endCPU(); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/workloads/realworld/pinned/darknet/src/crnn_layer.c b/workloads/realworld/pinned/darknet/src/crnn_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..159e17f92d45693461c92d482bf3aa7354a148d8 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/crnn_layer.c @@ -0,0 +1,283 @@ +#include "crnn_layer.h" +#include "convolutional_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize) +{ + fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = CRNN; + l.steps = steps; + l.h = h; + l.w = w; + l.c = c; + l.out_h = h; + l.out_w = w; + l.out_c = output_filters; + l.inputs = h*w*c; + l.hidden = h * w * hidden_filters; + l.outputs = l.out_h * l.out_w * l.out_c; + + l.state = calloc(l.hidden*batch*(steps+1), sizeof(float)); + + l.input_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.input_layer) = make_convolutional_layer(batch*steps, h, w, c, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.input_layer->batch = batch; + + l.self_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.self_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.self_layer->batch = batch; + + l.output_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.output_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, output_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.output_layer->batch = batch; + + l.output = l.output_layer->output; + l.delta = l.output_layer->delta; + + l.forward = forward_crnn_layer; + l.backward = backward_crnn_layer; + l.update = update_crnn_layer; + +#ifdef GPU + l.forward_gpu = forward_crnn_layer_gpu; + l.backward_gpu = backward_crnn_layer_gpu; + l.update_gpu = update_crnn_layer_gpu; + + l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1)); + l.output_gpu = l.output_layer->output_gpu; + l.delta_gpu = l.output_layer->delta_gpu; +#endif + + return l; +} + +void update_crnn_layer(layer l, update_args a) +{ + update_convolutional_layer(*(l.input_layer), a); + update_convolutional_layer(*(l.self_layer), a); + update_convolutional_layer(*(l.output_layer), a); +} + +void forward_crnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1); + fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1); + fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1); + if(net.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1); + + for (i = 0; i < l.steps; ++i) { + s.input = net.input; + forward_convolutional_layer(input_layer, s); + + s.input = l.state; + forward_convolutional_layer(self_layer, s); + + float *old_state = l.state; + if(net.train) l.state += l.hidden*l.batch; + if(l.shortcut){ + copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1); + }else{ + fill_cpu(l.hidden * l.batch, 0, l.state, 1); + } + axpy_cpu(l.hidden * l.batch, 1, input_layer.output, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + forward_convolutional_layer(output_layer, s); + + net.input += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_crnn_layer(layer l, network net) +{ + network s = net; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + increment_layer(&input_layer, l.steps-1); + increment_layer(&self_layer, l.steps-1); + increment_layer(&output_layer, l.steps-1); + + l.state += l.hidden*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + s.delta = self_layer.delta; + backward_convolutional_layer(output_layer, s); + + l.state -= l.hidden*l.batch; + /* + if(i > 0){ + copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output - l.hidden*l.batch, 1, l.state, 1); + }else{ + fill_cpu(l.hidden * l.batch, 0, l.state, 1); + } + */ + + s.input = l.state; + s.delta = self_layer.delta - l.hidden*l.batch; + if (i == 0) s.delta = 0; + backward_convolutional_layer(self_layer, s); + + copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1); + if (i > 0 && l.shortcut) axpy_cpu(l.hidden*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden*l.batch, 1); + s.input = net.input + i*l.inputs*l.batch; + if(net.delta) s.delta = net.delta + i*l.inputs*l.batch; + else s.delta = 0; + backward_convolutional_layer(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} + +#ifdef GPU + +void pull_crnn_layer(layer l) +{ + pull_convolutional_layer(*(l.input_layer)); + pull_convolutional_layer(*(l.self_layer)); + pull_convolutional_layer(*(l.output_layer)); +} + +void push_crnn_layer(layer l) +{ + push_convolutional_layer(*(l.input_layer)); + push_convolutional_layer(*(l.self_layer)); + push_convolutional_layer(*(l.output_layer)); +} + +void update_crnn_layer_gpu(layer l, update_args a) +{ + update_convolutional_layer_gpu(*(l.input_layer), a); + update_convolutional_layer_gpu(*(l.self_layer), a); + update_convolutional_layer_gpu(*(l.output_layer), a); +} + +void forward_crnn_layer_gpu(layer l, network net) +{ + network s = net; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_gpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); + fill_gpu(l.hidden * l.batch * l.steps, 0, self_layer.delta_gpu, 1); + fill_gpu(l.hidden * l.batch * l.steps, 0, input_layer.delta_gpu, 1); + if(net.train) fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1); + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = net.input_gpu; + forward_convolutional_layer_gpu(input_layer, s); + + s.input_gpu = l.state_gpu; + forward_convolutional_layer_gpu(self_layer, s); + + float *old_state = l.state_gpu; + if(net.train) l.state_gpu += l.hidden*l.batch; + if(l.shortcut){ + copy_gpu(l.hidden * l.batch, old_state, 1, l.state_gpu, 1); + }else{ + fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1); + } + axpy_gpu(l.hidden * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + forward_convolutional_layer_gpu(output_layer, s); + + net.input_gpu += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_crnn_layer_gpu(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + increment_layer(&input_layer, l.steps - 1); + increment_layer(&self_layer, l.steps - 1); + increment_layer(&output_layer, l.steps - 1); + l.state_gpu += l.hidden*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_gpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu; + backward_convolutional_layer_gpu(output_layer, s); + + l.state_gpu -= l.hidden*l.batch; + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu - l.hidden*l.batch; + if (i == 0) s.delta_gpu = 0; + backward_convolutional_layer_gpu(self_layer, s); + + copy_gpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); + if (i > 0 && l.shortcut) axpy_gpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1); + s.input_gpu = net.input_gpu + i*l.inputs*l.batch; + if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch; + else s.delta_gpu = 0; + backward_convolutional_layer_gpu(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} +#endif diff --git a/workloads/realworld/pinned/darknet/src/crnn_layer.h b/workloads/realworld/pinned/darknet/src/crnn_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..515f378354e9cc6149e7a1ac60ffc86ace112991 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/crnn_layer.h @@ -0,0 +1,24 @@ + +#ifndef CRNN_LAYER_H +#define CRNN_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize); + +void forward_crnn_layer(layer l, network net); +void backward_crnn_layer(layer l, network net); +void update_crnn_layer(layer l, update_args a); + +#ifdef GPU +void forward_crnn_layer_gpu(layer l, network net); +void backward_crnn_layer_gpu(layer l, network net); +void update_crnn_layer_gpu(layer l, update_args a); +void push_crnn_layer(layer l); +void pull_crnn_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/pinned/darknet/src/crop_layer.c b/workloads/realworld/pinned/darknet/src/crop_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..3c8c9650bda6dcf4485ce8da8e2fa1984f2b244d --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/crop_layer.c @@ -0,0 +1,103 @@ +#include "crop_layer.h" +#include "cuda_dark.h" +#include + +image get_crop_image(crop_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.out_c; + return float_to_image(w,h,c,l.output); +} + +void backward_crop_layer(const crop_layer l, network net){} +void backward_crop_layer_gpu(const crop_layer l, network net){} + +crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure) +{ + fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c); + crop_layer l = {0}; + l.type = CROP; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.scale = (float)crop_height / h; + l.flip = flip; + l.angle = angle; + l.saturation = saturation; + l.exposure = exposure; + l.out_w = crop_width; + l.out_h = crop_height; + l.out_c = c; + l.inputs = l.w * l.h * l.c; + l.outputs = l.out_w * l.out_h * l.out_c; + l.output = calloc(l.outputs*batch, sizeof(float)); + l.forward = forward_crop_layer; + l.backward = backward_crop_layer; + + #ifdef GPU + l.forward_gpu = forward_crop_layer_gpu; + l.backward_gpu = backward_crop_layer_gpu; + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + l.rand_gpu = cuda_make_array(0, l.batch*8); + #endif + return l; +} + +void resize_crop_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->out_w = l->scale*w; + l->out_h = l->scale*h; + + l->inputs = l->w * l->h * l->c; + l->outputs = l->out_h * l->out_w * l->out_c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + #ifdef GPU + cuda_free(l->output_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + #endif +} + + +void forward_crop_layer(const crop_layer l, network net) +{ + int i,j,c,b,row,col; + int index; + int count = 0; + int flip = (l.flip && rand()%2); + int dh = rand()%(l.h - l.out_h + 1); + int dw = rand()%(l.w - l.out_w + 1); + float scale = 2; + float trans = -1; + if(l.noadjust){ + scale = 1; + trans = 0; + } + if(!net.train){ + flip = 0; + dh = (l.h - l.out_h)/2; + dw = (l.w - l.out_w)/2; + } + for(b = 0; b < l.batch; ++b){ + for(c = 0; c < l.c; ++c){ + for(i = 0; i < l.out_h; ++i){ + for(j = 0; j < l.out_w; ++j){ + if(flip){ + col = l.w - dw - j - 1; + }else{ + col = j + dw; + } + row = i + dh; + index = col+l.w*(row+l.h*(c + l.c*b)); + l.output[count++] = net.input[index]*scale + trans; + } + } + } + } +} + diff --git a/workloads/realworld/pinned/darknet/src/crop_layer.h b/workloads/realworld/pinned/darknet/src/crop_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..3b5883c47d6df0987700e1b0434010eebd6312af --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/crop_layer.h @@ -0,0 +1,20 @@ +#ifndef CROP_LAYER_H +#define CROP_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +typedef layer crop_layer; + +image get_crop_image(crop_layer l); +crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure); +void forward_crop_layer(const crop_layer l, network net); +void resize_crop_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_crop_layer_gpu(crop_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/pinned/darknet/src/crop_layer_kernels.cu b/workloads/realworld/pinned/darknet/src/crop_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..7e262fce4ff8beb52de23d7c79bd1917410ef136 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/crop_layer_kernels.cu @@ -0,0 +1,225 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "crop_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "image.h" +} + +__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c) +{ + if(x < 0 || x >= w || y < 0 || y >= h) return 0; + return image[x + w*(y + c*h)]; +} + +__device__ float3 rgb_to_hsv_kernel(float3 rgb) +{ + float r = rgb.x; + float g = rgb.y; + float b = rgb.z; + + float h, s, v; + float max = (r > g) ? ( (r > b) ? r : b) : ( (g > b) ? g : b); + float min = (r < g) ? ( (r < b) ? r : b) : ( (g < b) ? g : b); + float delta = max - min; + v = max; + if(max == 0){ + s = 0; + h = -1; + }else{ + s = delta/max; + if(r == max){ + h = (g - b) / delta; + } else if (g == max) { + h = 2 + (b - r) / delta; + } else { + h = 4 + (r - g) / delta; + } + if (h < 0) h += 6; + } + return make_float3(h, s, v); +} + +__device__ float3 hsv_to_rgb_kernel(float3 hsv) +{ + float h = hsv.x; + float s = hsv.y; + float v = hsv.z; + + float r, g, b; + float f, p, q, t; + + if (s == 0) { + r = g = b = v; + } else { + int index = (int) floorf(h); + f = h - index; + p = v*(1-s); + q = v*(1-s*f); + t = v*(1-s*(1-f)); + if(index == 0){ + r = v; g = t; b = p; + } else if(index == 1){ + r = q; g = v; b = p; + } else if(index == 2){ + r = p; g = v; b = t; + } else if(index == 3){ + r = p; g = q; b = v; + } else if(index == 4){ + r = t; g = p; b = v; + } else { + r = v; g = p; b = q; + } + } + r = (r < 0) ? 0 : ((r > 1) ? 1 : r); + g = (g < 0) ? 0 : ((g > 1) ? 1 : g); + b = (b < 0) ? 0 : ((b > 1) ? 1 : b); + return make_float3(r, g, b); +} + +__device__ float bilinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c) +{ + int ix = (int) floorf(x); + int iy = (int) floorf(y); + + float dx = x - ix; + float dy = y - iy; + + float val = (1-dy) * (1-dx) * get_pixel_kernel(image, w, h, ix, iy, c) + + dy * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) + + (1-dy) * dx * get_pixel_kernel(image, w, h, ix+1, iy, c) + + dy * dx * get_pixel_kernel(image, w, h, ix+1, iy+1, c); + return val; +} + +__global__ void levels_image_kernel(float *image, float *rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale, float shift) +{ + int size = batch * w * h; + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= size) return; + int x = id % w; + id /= w; + int y = id % h; + id /= h; + float rshift = rand[0]; + float gshift = rand[1]; + float bshift = rand[2]; + float r0 = rand[8*id + 0]; + float r1 = rand[8*id + 1]; + float r2 = rand[8*id + 2]; + float r3 = rand[8*id + 3]; + + saturation = r0*(saturation - 1) + 1; + saturation = (r1 > .5f) ? 1.f/saturation : saturation; + exposure = r2*(exposure - 1) + 1; + exposure = (r3 > .5f) ? 1.f/exposure : exposure; + + size_t offset = id * h * w * 3; + image += offset; + float r = image[x + w*(y + h*0)]; + float g = image[x + w*(y + h*1)]; + float b = image[x + w*(y + h*2)]; + float3 rgb = make_float3(r,g,b); + if(train){ + float3 hsv = rgb_to_hsv_kernel(rgb); + hsv.y *= saturation; + hsv.z *= exposure; + rgb = hsv_to_rgb_kernel(hsv); + } else { + shift = 0; + } + image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5f)*shift; + image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5f)*shift; + image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5f)*shift; +} + +__global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= size) return; + + float cx = w/2.f; + float cy = h/2.f; + + int count = id; + int j = id % crop_width; + id /= crop_width; + int i = id % crop_height; + id /= crop_height; + int k = id % c; + id /= c; + int b = id; + + float r4 = rand[8*b + 4]; + float r5 = rand[8*b + 5]; + float r6 = rand[8*b + 6]; + float r7 = rand[8*b + 7]; + + float dw = (w - crop_width)*r4; + float dh = (h - crop_height)*r5; + flip = (flip && (r6 > .5f)); + angle = 2*angle*r7 - angle; + if(!train){ + dw = (w - crop_width)/2.f; + dh = (h - crop_height)/2.f; + flip = 0; + angle = 0; + } + + input += w*h*c*b; + + float x = (flip) ? w - dw - j - 1 : j + dw; + float y = i + dh; + + float rx = cosf(angle)*(x-cx) - sinf(angle)*(y-cy) + cx; + float ry = sinf(angle)*(x-cx) + cosf(angle)*(y-cy) + cy; + + output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k); +} + +extern "C" void forward_crop_layer_gpu(crop_layer layer, network net) +{ + cuda_random(layer.rand_gpu, layer.batch*8); + + float radians = layer.angle*3.14159265f/180.f; + + float scale = 2; + float translate = -1; + if(layer.noadjust){ + scale = 1; + translate = 0; + } + + int size = layer.batch * layer.w * layer.h; + + levels_image_kernel<<>>(net.input_gpu, layer.rand_gpu, layer.batch, layer.w, layer.h, net.train, layer.saturation, layer.exposure, translate, scale, layer.shift); + check_error(cudaPeekAtLastError()); + + size = layer.batch*layer.c*layer.out_w*layer.out_h; + + forward_crop_layer_kernel<<>>(net.input_gpu, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, net.train, layer.flip, radians, layer.output_gpu); + check_error(cudaPeekAtLastError()); + +/* + cuda_pull_array(layer.output_gpu, layer.output, size); + image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 0*(size/layer.batch)); + image im2 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 1*(size/layer.batch)); + image im3 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 2*(size/layer.batch)); + + translate_image(im, -translate); + scale_image(im, 1/scale); + translate_image(im2, -translate); + scale_image(im2, 1/scale); + translate_image(im3, -translate); + scale_image(im3, 1/scale); + + show_image(im, "cropped"); + show_image(im2, "cropped2"); + show_image(im3, "cropped3"); + cvWaitKey(0); + */ +} + diff --git a/workloads/realworld/pinned/darknet/src/cuda_dark.cu b/workloads/realworld/pinned/darknet/src/cuda_dark.cu new file mode 100644 index 0000000000000000000000000000000000000000..ff5017a7df658e88631909afa1e80978f1577b89 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/cuda_dark.cu @@ -0,0 +1,421 @@ +int gpu_index = 0; + +#ifdef GPU + +#include "cuda.h" +#include "utils.h" +#include "blas.h" +#include +#include +#include + + +#include + +void cuda_set_device(int n) +{ + gpu_index = n; + cudaError_t status = cudaSetDevice(n); + check_error(status); +} + +int cuda_get_device() +{ + int n = 0; + cudaError_t status = cudaGetDevice(&n); + check_error(status); + return n; +} + +void check_error(cudaError_t status) +{ + cudaDeviceSynchronize(); + cudaError_t status2 = cudaGetLastError(); + if (status != cudaSuccess) + { + const char *s = cudaGetErrorString(status); + char buffer[256]; + printf("CUDA Error: %s\n", s); + assert(0); + snprintf(buffer, 256, "CUDA Error: %s", s); + error(buffer); + } + if (status2 != cudaSuccess) + { + const char *s = cudaGetErrorString(status); + char buffer[256]; + printf("CUDA Error Prev: %s\n", s); + assert(0); + snprintf(buffer, 256, "CUDA Error Prev: %s", s); + error(buffer); + } +} + +dim3 cuda_gridsize(size_t n){ + size_t k = (n-1) / BLOCK + 1; + size_t x = k; + size_t y = 1; + if(x > 65535){ + x = ceil(sqrt(k)); + y = (n-1)/(x*BLOCK) + 1; + } + dim3 d = {x, y, 1}; + //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK); + return d; +} + +#ifdef CUDNN +cudnnHandle_t cudnn_handle() +{ + static int init[16] = {0}; + static cudnnHandle_t handle[16]; + int i = cuda_get_device(); + if(!init[i]) { + cudnnCreate(&handle[i]); + init[i] = 1; + } + return handle[i]; +} +#endif + +cublasHandle_t blas_handle() +{ + static int init[16] = {0}; + static cublasHandle_t handle[16]; + int i = cuda_get_device(); + if(!init[i]) { + cublasCreate(&handle[i]); + init[i] = 1; + } + return handle[i]; +} + +float *cuda_make_array(float *x, size_t n) +{ + float *x_gpu; + size_t size = sizeof(float)*n; + cudaError_t status = cudaMalloc((void **)&x_gpu, size); + // cudaError_t status = cudaMallocHost((void **)&x_gpu, size); + check_error(status); + if(x){ + status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + check_error(status); + } else { + fill_gpu(n, 0, x_gpu, 1); + } + if(!x_gpu) error("Cuda malloc failed\n"); + return x_gpu; +} + +void cuda_random(float *x_gpu, size_t n) +{ + static curandGenerator_t gen[16]; + static int init[16] = {0}; + int i = cuda_get_device(); + if(!init[i]){ + curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT); + curandSetPseudoRandomGeneratorSeed(gen[i], time(0)); + init[i] = 1; + } + curandGenerateUniform(gen[i], x_gpu, n); + check_error(cudaPeekAtLastError()); +} + +float cuda_compare(float *x_gpu, float *x, size_t n, char *s) +{ + float *tmp = (float *) calloc(n, sizeof(float)); + cuda_pull_array(x_gpu, tmp, n); + //int i; + //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]); + axpy_cpu(n, -1, x, 1, tmp, 1); + float err = dot_cpu(n, tmp, 1, tmp, 1); + printf("Error %s: %f\n", s, sqrt(err/n)); + free(tmp); + return err; +} + +int *cuda_make_int_array(int *x, size_t n) +{ + int *x_gpu; + size_t size = sizeof(int)*n; + cudaError_t status = cudaMalloc((void **)&x_gpu, size); + // cudaError_t status = cudaMallocHost((void **)&x_gpu, size); + check_error(status); + if(x){ + status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + check_error(status); + } + if(!x_gpu) error("Cuda malloc failed\n"); + return x_gpu; +} + +void cuda_free(float *x_gpu) +{ + cudaError_t status = cudaFree(x_gpu); + check_error(status); +} + +void cuda_push_array(float *x_gpu, float *x, size_t n) +{ + size_t size = sizeof(float)*n; + cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + check_error(status); +} + +void cuda_pull_array(float *x_gpu, float *x, size_t n) +{ + size_t size = sizeof(float)*n; + cudaError_t status = cudaMemcpy(x, x_gpu, size, cudaMemcpyDeviceToHost); + check_error(status); +} + +float cuda_mag_array(float *x_gpu, size_t n) +{ + float *temp = (float *) calloc(n, sizeof(float)); + cuda_pull_array(x_gpu, temp, n); + float m = mag_array(temp, n); + free(temp); + return m; +} + +static const char * +getMemcpyKindString(CUpti_ActivityMemcpyKind kind) +{ + switch (kind) + { + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOD: + return "HtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOH: + return "DtoH"; + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOA: + return "HtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOH: + return "AtoH"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOA: + return "AtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOD: + return "AtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOA: + return "DtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOD: + return "DtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOH: + return "HtoH"; + default: + break; + } + + return ""; +} + +static const char * +getUvmCounterKindString(CUpti_ActivityUnifiedMemoryCounterKind kind) +{ + switch (kind) + { + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD: + return "BYTES_TRANSFER_HTOD"; + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH: + return "BYTES_TRANSFER_DTOH"; + default: + break; + } + return ""; +} + +static void +printActivity(CUpti_Activity *record) +{ + switch (record->kind) + { + case CUPTI_ACTIVITY_KIND_KERNEL: + { + int status; + CUpti_ActivityKernel4 *kernel = (CUpti_ActivityKernel4 *)record; + printf("KERNEL %s, %llu, %llu, %llu\n", + abi::__cxa_demangle(kernel->name, 0, 0, &status), + (unsigned long long)(kernel->start), + (unsigned long long)(kernel->end), + (unsigned long long)(kernel->end) - (kernel->start)); + break; + } + case CUPTI_ACTIVITY_KIND_RUNTIME: + { + CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; + const char *callback_name; + cuptiGetCallbackName(CUPTI_CB_DOMAIN_RUNTIME_API, api->cbid, &callback_name); + // printf("RUNTIME %s (cbid=%u) [ %llu - %llu ] process %u, thread %u, correlation %u\n", + // callback_name, api->cbid, + // (unsigned long long)(api->start - startTimestamp), + // (unsigned long long)(api->end - startTimestamp), + // api->processId, api->threadId, api->correlationId); + printf("RUNTIME %s (cbid=%u), %llu,%llu,%llu, process %u, thread %u, correlation %u\n", + callback_name, api->cbid, + (unsigned long long)(api->start), + (unsigned long long)(api->end), + (unsigned long long)(api->end - api->start), + api->processId, api->threadId, api->correlationId); + break; + } + case CUPTI_ACTIVITY_KIND_MEMCPY: + { + CUpti_ActivityMemcpy4 *memcpy = (CUpti_ActivityMemcpy4 *)record; + printf("MEMCPY %s, size %llu, %llu, %llu, %llu\n", + getMemcpyKindString((CUpti_ActivityMemcpyKind)memcpy->copyKind), + (unsigned long long)memcpy->bytes, + (unsigned long long)(memcpy->start), + (unsigned long long)(memcpy->end), + (unsigned long long)(memcpy->end) - (memcpy->start)); + break; + } + case CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER: + { + CUpti_ActivityUnifiedMemoryCounter2 *uvm = (CUpti_ActivityUnifiedMemoryCounter2 *)record; + printf("UVM MEMCPY %s, size %llu, %llu, %llu, %llu \n", + getUvmCounterKindString(uvm->counterKind), + (unsigned long long)uvm->value, + (unsigned long long)(uvm->start), + (unsigned long long)(uvm->end), + (unsigned long long)(uvm->end - uvm->start)); + break; + } + } +} + +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) +{ + uint8_t *bfr = (uint8_t *)malloc(BUF_SIZE + ALIGN_SIZE); + if (bfr == NULL) + { + printf("Error: out of memory\n"); + exit(-1); + } + + *size = BUF_SIZE; + *buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE); + *maxNumRecords = 0; +} + +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) +{ + CUptiResult status; + CUpti_Activity *record = NULL; + if (validSize > 0) + { + do + { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if (status == CUPTI_SUCCESS) + { + printActivity(record); + } + else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) + break; + else + { + CUPTI_CALL(status); + } + } while (1); + + // report any records dropped from the queue + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if (dropped != 0) + { + printf("Dropped %u activity records\n", (unsigned int)dropped); + } + } + + free(buffer); +} + +#ifndef PROFILE +void initTrace() { + printf("not Profile initTrace()\n"); + return; +} + +void finiTrace() { + return; +} + +#else +void initTrace() +{ + printf("Profile initTrace()\n"); + size_t attrValue = 0, attrValueSize = sizeof(size_t); + + CUpti_ActivityUnifiedMemoryCounterConfig config[2]; + + // configure unified memory counters + config[0].scope = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE; + config[0].kind = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD; + config[0].deviceId = 0; + config[0].enable = 1; + + config[1].scope = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE; + config[1].kind = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH; + config[1].deviceId = 0; + config[1].enable = 1; + + CUptiResult res = cuptiActivityConfigureUnifiedMemoryCounter(config, 2); + if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED) + { + printf("Test is waived, unified memory is not supported on the underlying platform.\n"); + } + else if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE) + { + printf("Test is waived, unified memory is not supported on the device.\n"); + } + else if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES) + { + printf("Test is waived, unified memory is not supported on the non-P2P multi-gpu setup.\n"); + } + else + { + CUPTI_CALL(res); + } + + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER)); + + // Register callbacks for buffer requests and for buffers completed by CUPTI. + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + + // Optionally get and set activity attributes. + // Attributes can be set by the CUPTI client to change behavior of the activity API. + // Some attributes require to be set before any CUDA context is created to be effective, + // e.g. to be applied to all device buffer allocations (see documentation). + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + printf("%s = %llu B\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); +} + +void finiTrace() +{ + // Force flush any remaining activity buffers before termination of the application + CUPTI_CALL(cuptiActivityFlushAll(1)); +} +#endif + +void GPU_argv_init() +{ + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, GPU_DEVICE); + printf("setting device %d with name %s\n", GPU_DEVICE, deviceProp.name); + cudaSetDevice(GPU_DEVICE); +} +#else +void cuda_set_device(int n){} + +#endif diff --git a/workloads/realworld/pinned/darknet/src/cuda_dark.h b/workloads/realworld/pinned/darknet/src/cuda_dark.h new file mode 100644 index 0000000000000000000000000000000000000000..ac6b60bc3d27ec1ebc8190463648b946f6c809ef --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/cuda_dark.h @@ -0,0 +1,63 @@ +#ifndef CUDA_H +#define CUDA_H + +#include "darknet.h" + +#ifdef GPU + +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) (((uintptr_t)(buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t)(buffer) & ((align)-1))) : (buffer)) + +static uint64_t startTimestamp; +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +#define CUPTI_CALL(call) \ + do \ + { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) \ + { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + if (_status == CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED) \ + exit(0); \ + else \ + exit(-1); \ + } \ + } while (0) + +#include + +#ifdef __cplusplus +extern "C" { +#endif +void check_error(cudaError_t status); +cublasHandle_t blas_handle(); +int *cuda_make_int_array(int *x, size_t n); +void cuda_random(float *x_gpu, size_t n); +float cuda_compare(float *x_gpu, float *x, size_t n, char *s); +dim3 cuda_gridsize(size_t n); + +void GPU_argv_init(); +void initTrace(); +void finiTrace(); +void startCPU(); +void endCPU(); +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords); +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); +static void printActivity(CUpti_Activity *record); + +#ifdef __cplusplus +} +#endif + +#ifdef CUDNN +cudnnHandle_t cudnn_handle(); +#endif + +#endif +#endif diff --git a/workloads/realworld/pinned/darknet/src/cupti_add.cpp b/workloads/realworld/pinned/darknet/src/cupti_add.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a0d16eb72f41f8e858a59354d2de9d6b470c0e76 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/cupti_add.cpp @@ -0,0 +1,112 @@ +#include "cupti_add.h" + +static void +printActivity(CUpti_Activity *record) +{ + switch (record->kind) + { + case CUPTI_ACTIVITY_KIND_KERNEL: + { + int status; + CUpti_ActivityKernel4 *kernel = (CUpti_ActivityKernel4 *)record; + printf("CUPTI,%s,%llu,%llu,%llu\n", + abi::__cxa_demangle(kernel->name, 0, 0, &status), + (unsigned long long)(kernel->start), + (unsigned long long)(kernel->end), + (unsigned long long)(kernel->end - startTimestamp) - (kernel->start - startTimestamp)); + break; + } + case CUPTI_ACTIVITY_KIND_RUNTIME: + { + CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; + const char *callback_name; + cuptiGetCallbackName(CUPTI_CB_DOMAIN_RUNTIME_API, api->cbid, &callback_name); + printf("RUNTIME %s (cbid=%u) [ %llu - %llu ] process %u, thread %u, correlation %u\n", + callback_name, api->cbid, + (unsigned long long)(api->start - startTimestamp), + (unsigned long long)(api->end - startTimestamp), + api->processId, api->threadId, api->correlationId); + break; + } + } +} + +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) +{ + uint8_t *bfr = (uint8_t *)malloc(BUF_SIZE + ALIGN_SIZE); + if (bfr == NULL) + { + printf("Error: out of memory\n"); + exit(-1); + } + + *size = BUF_SIZE; + *buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE); + *maxNumRecords = 0; +} + +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) +{ + CUptiResult status; + CUpti_Activity *record = NULL; + if (validSize > 0) + { + do + { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if (status == CUPTI_SUCCESS) + { + printActivity(record); + } + else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) + break; + else + { + CUPTI_CALL(status); + } + } while (1); + + // report any records dropped from the queue + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if (dropped != 0) + { + printf("Dropped %u activity records\n", (unsigned int)dropped); + } + } + + free(buffer); +} + +void initTrace() +{ + size_t attrValue = 0, attrValueSize = sizeof(size_t); + + // CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); + + // Register callbacks for buffer requests and for buffers completed by CUPTI. + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + + // Optionally get and set activity attributes. + // Attributes can be set by the CUPTI client to change behavior of the activity API. + // Some attributes require to be set before any CUDA context is created to be effective, + // e.g. to be applied to all device buffer allocations (see documentation). + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + printf("%s = %llu B\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); +} + +void finiTrace() +{ + // Force flush any remaining activity buffers before termination of the application + CUPTI_CALL(cuptiActivityFlushAll(1)); +} \ No newline at end of file diff --git a/workloads/realworld/pinned/darknet/src/cupti_add.h b/workloads/realworld/pinned/darknet/src/cupti_add.h new file mode 100644 index 0000000000000000000000000000000000000000..a30b7b847ad13381032d2f60eac2955d30146485 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/cupti_add.h @@ -0,0 +1,36 @@ +#include +#include +#include + + +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) \ + (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer)) + +static uint64_t startTimestamp; +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +#define CUPTI_CALL(call) \ + do { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + if(_status == CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED) \ + exit(0); \ + else \ + exit(-1); \ + } \ + } while (0) + +void initTrace(); +void finiTrace(); +void startCPU(); +void endCPU(); +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords); +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); +static void printActivity(CUpti_Activity *record); diff --git a/workloads/realworld/pinned/darknet/src/data.c b/workloads/realworld/pinned/darknet/src/data.c new file mode 100644 index 0000000000000000000000000000000000000000..d50f1346c5cdcfe1dbeb2d0f70ec408fb4f33960 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/data.c @@ -0,0 +1,1685 @@ +#include "data.h" +#include "utils.h" +#include "image.h" +#include "cuda_dark.h" + +#include +#include +#include + +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + +list *get_paths(char *filename) +{ + char *path; + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + list *lines = make_list(); + while((path=fgetl(file))){ + list_insert(lines, path); + } + fclose(file); + return lines; +} + +/* +char **get_random_paths_indexes(char **paths, int n, int m, int *indexes) +{ + char **random_paths = calloc(n, sizeof(char*)); + int i; + pthread_mutex_lock(&mutex); + for(i = 0; i < n; ++i){ + int index = rand()%m; + indexes[i] = index; + random_paths[i] = paths[index]; + if(i == 0) printf("%s\n", paths[index]); + } + pthread_mutex_unlock(&mutex); + return random_paths; +} +*/ + +char **get_random_paths(char **paths, int n, int m) +{ + char **random_paths = calloc(n, sizeof(char*)); + int i; + pthread_mutex_lock(&mutex); + for(i = 0; i < n; ++i){ + int index = rand()%m; + random_paths[i] = paths[index]; + //if(i == 0) printf("%s\n", paths[index]); + } + pthread_mutex_unlock(&mutex); + return random_paths; +} + +char **find_replace_paths(char **paths, int n, char *find, char *replace) +{ + char **replace_paths = calloc(n, sizeof(char*)); + int i; + for(i = 0; i < n; ++i){ + char replaced[4096]; + find_replace(paths[i], find, replace, replaced); + replace_paths[i] = copy_string(replaced); + } + return replace_paths; +} + +matrix load_image_paths_gray(char **paths, int n, int w, int h) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image(paths[i], w, h, 3); + + image gray = grayscale_image(im); + free_image(im); + im = gray; + + X.vals[i] = im.data; + X.cols = im.h*im.w*im.c; + } + return X; +} + +matrix load_image_paths(char **paths, int n, int w, int h) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], w, h); + X.vals[i] = im.data; + X.cols = im.h*im.w*im.c; + } + return X; +} + +matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], 0, 0); + image crop; + if(center){ + crop = center_crop_image(im, size, size); + } else { + crop = random_augment_image(im, angle, aspect, min, max, size, size); + } + int flip = rand()%2; + if (flip) flip_image(crop); + random_distort_image(crop, hue, saturation, exposure); + + /* + show_image(im, "orig"); + show_image(crop, "crop"); + cvWaitKey(0); + */ + //grayscale_image_3c(crop); + free_image(im); + X.vals[i] = crop.data; + X.cols = crop.h*crop.w*crop.c; + } + return X; +} + + +box_label *read_boxes(char *filename, int *n) +{ + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + float x, y, h, w; + int id; + int count = 0; + int size = 64; + box_label *boxes = calloc(size, sizeof(box_label)); + while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){ + if(count == size) { + size = size * 2; + boxes = realloc(boxes, size*sizeof(box_label)); + } + boxes[count].id = id; + boxes[count].x = x; + boxes[count].y = y; + boxes[count].h = h; + boxes[count].w = w; + boxes[count].left = x - w/2; + boxes[count].right = x + w/2; + boxes[count].top = y - h/2; + boxes[count].bottom = y + h/2; + ++count; + } + fclose(file); + *n = count; + return boxes; +} + +void randomize_boxes(box_label *b, int n) +{ + int i; + for(i = 0; i < n; ++i){ + box_label swap = b[i]; + int index = rand()%n; + b[i] = b[index]; + b[index] = swap; + } +} + +void correct_boxes(box_label *boxes, int n, float dx, float dy, float sx, float sy, int flip) +{ + int i; + for(i = 0; i < n; ++i){ + if(boxes[i].x == 0 && boxes[i].y == 0) { + boxes[i].x = 999999; + boxes[i].y = 999999; + boxes[i].w = 999999; + boxes[i].h = 999999; + continue; + } + boxes[i].left = boxes[i].left * sx - dx; + boxes[i].right = boxes[i].right * sx - dx; + boxes[i].top = boxes[i].top * sy - dy; + boxes[i].bottom = boxes[i].bottom* sy - dy; + + if(flip){ + float swap = boxes[i].left; + boxes[i].left = 1. - boxes[i].right; + boxes[i].right = 1. - swap; + } + + boxes[i].left = constrain(0, 1, boxes[i].left); + boxes[i].right = constrain(0, 1, boxes[i].right); + boxes[i].top = constrain(0, 1, boxes[i].top); + boxes[i].bottom = constrain(0, 1, boxes[i].bottom); + + boxes[i].x = (boxes[i].left+boxes[i].right)/2; + boxes[i].y = (boxes[i].top+boxes[i].bottom)/2; + boxes[i].w = (boxes[i].right - boxes[i].left); + boxes[i].h = (boxes[i].bottom - boxes[i].top); + + boxes[i].w = constrain(0, 1, boxes[i].w); + boxes[i].h = constrain(0, 1, boxes[i].h); + } +} + +void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + float x,y,w,h; + int id; + int i; + + for (i = 0; i < count && i < 90; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if (w < .0 || h < .0) continue; + + int index = (4+classes) * i; + + truth[index++] = x; + truth[index++] = y; + truth[index++] = w; + truth[index++] = h; + + if (id < classes) truth[index+id] = 1; + } + free(boxes); +} + +void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + float x,y,w,h; + int id; + int i; + + for (i = 0; i < count; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if (w < .005 || h < .005) continue; + + int col = (int)(x*num_boxes); + int row = (int)(y*num_boxes); + + x = x*num_boxes - col; + y = y*num_boxes - row; + + int index = (col+row*num_boxes)*(5+classes); + if (truth[index]) continue; + truth[index++] = 1; + + if (id < classes) truth[index+id] = 1; + index += classes; + + truth[index++] = x; + truth[index++] = y; + truth[index++] = w; + truth[index++] = h; + } + free(boxes); +} + +void load_rle(image im, int *rle, int n) +{ + int count = 0; + int curr = 0; + int i,j; + for(i = 0; i < n; ++i){ + for(j = 0; j < rle[i]; ++j){ + im.data[count++] = curr; + } + curr = 1 - curr; + } + for(; count < im.h*im.w*im.c; ++count){ + im.data[count] = curr; + } +} + +void or_image(image src, image dest, int c) +{ + int i; + for(i = 0; i < src.w*src.h; ++i){ + if(src.data[i]) dest.data[dest.w*dest.h*c + i] = 1; + } +} + +void exclusive_image(image src) +{ + int k, j, i; + int s = src.w*src.h; + for(k = 0; k < src.c-1; ++k){ + for(i = 0; i < s; ++i){ + if (src.data[k*s + i]){ + for(j = k+1; j < src.c; ++j){ + src.data[j*s + i] = 0; + } + } + } + } +} + +box bound_image(image im) +{ + int x,y; + int minx = im.w; + int miny = im.h; + int maxx = 0; + int maxy = 0; + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + if(im.data[y*im.w + x]){ + minx = (x < minx) ? x : minx; + miny = (y < miny) ? y : miny; + maxx = (x > maxx) ? x : maxx; + maxy = (y > maxy) ? y : maxy; + } + } + } + box b = {minx, miny, maxx-minx + 1, maxy-miny + 1}; + //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + return b; +} + +void fill_truth_iseg(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + int i = 0; + int j; + image part = make_image(w, h, 1); + while((fscanf(file, "%d %s", &id, buff) == 2) && i < num_boxes){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + image sized = rotate_crop_image(part, aug.rad, aug.scale, aug.w, aug.h, aug.dx, aug.dy, aug.aspect); + if(flip) flip_image(sized); + + image mask = resize_image(sized, mw, mh); + truth[i*(mw*mh+1)] = id; + for(j = 0; j < mw*mh; ++j){ + truth[i*(mw*mh + 1) + 1 + j] = mask.data[j]; + } + ++i; + + free_image(mask); + free_image(sized); + free(rle); + } + if(i < num_boxes) truth[i*(mw*mh+1)] = -1; + fclose(file); + free_image(part); +} + +void fill_truth_mask(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + int i = 0; + image part = make_image(w, h, 1); + while((fscanf(file, "%d %s", &id, buff) == 2) && i < num_boxes){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + image sized = rotate_crop_image(part, aug.rad, aug.scale, aug.w, aug.h, aug.dx, aug.dy, aug.aspect); + if(flip) flip_image(sized); + box b = bound_image(sized); + if(b.w > 0){ + image crop = crop_image(sized, b.x, b.y, b.w, b.h); + image mask = resize_image(crop, mw, mh); + truth[i*(4 + mw*mh + 1) + 0] = (b.x + b.w/2.)/sized.w; + truth[i*(4 + mw*mh + 1) + 1] = (b.y + b.h/2.)/sized.h; + truth[i*(4 + mw*mh + 1) + 2] = b.w/sized.w; + truth[i*(4 + mw*mh + 1) + 3] = b.h/sized.h; + int j; + for(j = 0; j < mw*mh; ++j){ + truth[i*(4 + mw*mh + 1) + 4 + j] = mask.data[j]; + } + truth[i*(4 + mw*mh + 1) + 4 + mw*mh] = id; + free_image(crop); + free_image(mask); + ++i; + } + free_image(sized); + free(rle); + } + fclose(file); + free_image(part); +} + + +void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + + find_replace(labelpath, "raw", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + if(count > num_boxes) count = num_boxes; + float x,y,w,h; + int id; + int i; + int sub = 0; + + for (i = 0; i < count; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if ((w < .001 || h < .001)) { + ++sub; + continue; + } + + truth[(i-sub)*5+0] = x; + truth[(i-sub)*5+1] = y; + truth[(i-sub)*5+2] = w; + truth[(i-sub)*5+3] = h; + truth[(i-sub)*5+4] = id; + } + free(boxes); +} + +#define NUMCHARS 37 + +void print_letters(float *pred, int n) +{ + int i; + for(i = 0; i < n; ++i){ + int index = max_index(pred+i*NUMCHARS, NUMCHARS); + printf("%c", int_to_alphanum(index)); + } + printf("\n"); +} + +void fill_truth_captcha(char *path, int n, float *truth) +{ + char *begin = strrchr(path, '/'); + ++begin; + int i; + for(i = 0; i < strlen(begin) && i < n && begin[i] != '.'; ++i){ + int index = alphanum_to_int(begin[i]); + if(index > 35) printf("Bad %c\n", begin[i]); + truth[i*NUMCHARS+index] = 1; + } + for(;i < n; ++i){ + truth[i*NUMCHARS + NUMCHARS-1] = 1; + } +} + +data load_data_captcha(char **paths, int n, int m, int k, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = make_matrix(n, k*NUMCHARS); + int i; + for(i = 0; i < n; ++i){ + fill_truth_captcha(paths[i], k, d.y.vals[i]); + } + if(m) free(paths); + return d; +} + +data load_data_captcha_encode(char **paths, int n, int m, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.X.cols = 17100; + d.y = d.X; + if(m) free(paths); + return d; +} + +void fill_truth(char *path, char **labels, int k, float *truth) +{ + int i; + memset(truth, 0, k*sizeof(float)); + int count = 0; + for(i = 0; i < k; ++i){ + if(strstr(path, labels[i])){ + truth[i] = 1; + ++count; + //printf("%s %s %d\n", path, labels[i], i); + } + } + if(count != 1 && (k != 1 || count != 0)) printf("Too many or too few labels: %d, %s\n", count, path); +} + +void fill_hierarchy(float *truth, int k, tree *hierarchy) +{ + int j; + for(j = 0; j < k; ++j){ + if(truth[j]){ + int parent = hierarchy->parent[j]; + while(parent >= 0){ + truth[parent] = 1; + parent = hierarchy->parent[parent]; + } + } + } + int i; + int count = 0; + for(j = 0; j < hierarchy->groups; ++j){ + //printf("%d\n", count); + int mask = 1; + for(i = 0; i < hierarchy->group_size[j]; ++i){ + if(truth[count + i]){ + mask = 0; + break; + } + } + if (mask) { + for(i = 0; i < hierarchy->group_size[j]; ++i){ + truth[count + i] = SECRET_NUM; + } + } + count += hierarchy->group_size[j]; + } +} + +matrix load_regression_labels_paths(char **paths, int n, int k) +{ + matrix y = make_matrix(n, k); + int i,j; + for(i = 0; i < n; ++i){ + char labelpath[4096]; + find_replace(paths[i], "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".BMP", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPeG", ".txt", labelpath); + find_replace(labelpath, ".Jpeg", ".txt", labelpath); + find_replace(labelpath, ".PNG", ".txt", labelpath); + find_replace(labelpath, ".TIF", ".txt", labelpath); + find_replace(labelpath, ".bmp", ".txt", labelpath); + find_replace(labelpath, ".jpeg", ".txt", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".tif", ".txt", labelpath); + + FILE *file = fopen(labelpath, "r"); + for(j = 0; j < k; ++j){ + fscanf(file, "%f", &(y.vals[i][j])); + } + fclose(file); + } + return y; +} + +matrix load_labels_paths(char **paths, int n, char **labels, int k, tree *hierarchy) +{ + matrix y = make_matrix(n, k); + int i; + for(i = 0; i < n && labels; ++i){ + fill_truth(paths[i], labels, k, y.vals[i]); + if(hierarchy){ + fill_hierarchy(y.vals[i], k, hierarchy); + } + } + return y; +} + +matrix load_tags_paths(char **paths, int n, int k) +{ + matrix y = make_matrix(n, k); + int i; + //int count = 0; + for(i = 0; i < n; ++i){ + char label[4096]; + find_replace(paths[i], "images", "labels", label); + find_replace(label, ".jpg", ".txt", label); + FILE *file = fopen(label, "r"); + if (!file) continue; + //++count; + int tag; + while(fscanf(file, "%d", &tag) == 1){ + if(tag < k){ + y.vals[i][tag] = 1; + } + } + fclose(file); + } + //printf("%d/%d\n", count, n); + return y; +} + +char **get_labels(char *filename) +{ + list *plist = get_paths(filename); + char **labels = (char **)list_to_array(plist); + free_list(plist); + return labels; +} + +void free_data(data d) +{ + if(!d.shallow){ + free_matrix(d.X); + free_matrix(d.y); + }else{ + free(d.X.vals); + free(d.y.vals); + } +} + +image get_segmentation_image(char *path, int w, int h, int classes) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + image mask = make_image(w, h, classes); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + image part = make_image(w, h, 1); + while(fscanf(file, "%d %s", &id, buff) == 2){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + or_image(part, mask, id); + free(rle); + } + //exclusive_image(mask); + fclose(file); + free_image(part); + return mask; +} + +image get_segmentation_image2(char *path, int w, int h, int classes) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + image mask = make_image(w, h, classes+1); + int i; + for(i = 0; i < w*h; ++i){ + mask.data[w*h*classes + i] = 1; + } + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + image part = make_image(w, h, 1); + while(fscanf(file, "%d %s", &id, buff) == 2){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + or_image(part, mask, id); + for(i = 0; i < w*h; ++i){ + if(part.data[i]) mask.data[w*h*classes + i] = 0; + } + free(rle); + } + //exclusive_image(mask); + fclose(file); + free_image(part); + return mask; +} + +data load_data_seg(int n, char **paths, int m, int w, int h, int classes, int min, int max, float angle, float aspect, float hue, float saturation, float exposure, int div) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + + d.y.rows = n; + d.y.cols = h*w*classes/div/div; + d.y.vals = calloc(d.X.rows, sizeof(float*)); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + + image mask = get_segmentation_image(random_paths[i], orig.w, orig.h, classes); + //image mask = make_image(orig.w, orig.h, classes+1); + image sized_m = rotate_crop_image(mask, a.rad, a.scale/div, a.w/div, a.h/div, a.dx/div, a.dy/div, a.aspect); + + if(flip) flip_image(sized_m); + d.y.vals[i] = sized_m.data; + + free_image(orig); + free_image(mask); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_iseg(int n, char **paths, int m, int w, int h, int classes, int boxes, int div, int min, int max, float angle, float aspect, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, (((w/div)*(h/div))+1)*boxes); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + //show_image(sized, "image"); + + fill_truth_iseg(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, w/div, h/div); + + free_image(orig); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_mask(int n, char **paths, int m, int w, int h, int classes, int boxes, int coords, int min, int max, float angle, float aspect, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, (coords+1)*boxes); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + //show_image(sized, "image"); + + fill_truth_mask(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, 14, 14); + + free_image(orig); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + + int k = size*size*(5+classes); + d.y = make_matrix(n, k); + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + + int oh = orig.h; + int ow = orig.w; + + int dw = (ow*jitter); + int dh = (oh*jitter); + + int pleft = rand_uniform(-dw, dw); + int pright = rand_uniform(-dw, dw); + int ptop = rand_uniform(-dh, dh); + int pbot = rand_uniform(-dh, dh); + + int swidth = ow - pleft - pright; + int sheight = oh - ptop - pbot; + + float sx = (float)swidth / ow; + float sy = (float)sheight / oh; + + int flip = rand()%2; + image cropped = crop_image(orig, pleft, ptop, swidth, sheight); + + float dx = ((float)pleft/ow)/sx; + float dy = ((float)ptop /oh)/sy; + + image sized = resize_image(cropped, w, h); + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + + fill_truth_region(random_paths[i], d.y.vals[i], classes, size, flip, dx, dy, 1./sx, 1./sy); + + free_image(orig); + free_image(cropped); + } + free(random_paths); + return d; +} + +data load_data_compare(int n, char **paths, int m, int classes, int w, int h) +{ + if(m) paths = get_random_paths(paths, 2*n, m); + int i,j; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*6; + + int k = 2*(classes); + d.y = make_matrix(n, k); + for(i = 0; i < n; ++i){ + image im1 = load_image_color(paths[i*2], w, h); + image im2 = load_image_color(paths[i*2+1], w, h); + + d.X.vals[i] = calloc(d.X.cols, sizeof(float)); + memcpy(d.X.vals[i], im1.data, h*w*3*sizeof(float)); + memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float)); + + int id; + float iou; + + char imlabel1[4096]; + char imlabel2[4096]; + find_replace(paths[i*2], "imgs", "labels", imlabel1); + find_replace(imlabel1, "jpg", "txt", imlabel1); + FILE *fp1 = fopen(imlabel1, "r"); + + while(fscanf(fp1, "%d %f", &id, &iou) == 2){ + if (d.y.vals[i][2*id] < iou) d.y.vals[i][2*id] = iou; + } + + find_replace(paths[i*2+1], "imgs", "labels", imlabel2); + find_replace(imlabel2, "jpg", "txt", imlabel2); + FILE *fp2 = fopen(imlabel2, "r"); + + while(fscanf(fp2, "%d %f", &id, &iou) == 2){ + if (d.y.vals[i][2*id + 1] < iou) d.y.vals[i][2*id + 1] = iou; + } + + for (j = 0; j < classes; ++j){ + if (d.y.vals[i][2*j] > .5 && d.y.vals[i][2*j+1] < .5){ + d.y.vals[i][2*j] = 1; + d.y.vals[i][2*j+1] = 0; + } else if (d.y.vals[i][2*j] < .5 && d.y.vals[i][2*j+1] > .5){ + d.y.vals[i][2*j] = 0; + d.y.vals[i][2*j+1] = 1; + } else { + d.y.vals[i][2*j] = SECRET_NUM; + d.y.vals[i][2*j+1] = SECRET_NUM; + } + } + fclose(fp1); + fclose(fp2); + + free_image(im1); + free_image(im2); + } + if(m) free(paths); + return d; +} + +data load_data_swag(char **paths, int n, int classes, float jitter) +{ + int index = rand()%n; + char *random_path = paths[index]; + + image orig = load_image_color(random_path, 0, 0); + int h = orig.h; + int w = orig.w; + + data d = {0}; + d.shallow = 0; + d.w = w; + d.h = h; + + d.X.rows = 1; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + int k = (4+classes)*90; + d.y = make_matrix(1, k); + + int dw = w*jitter; + int dh = h*jitter; + + int pleft = rand_uniform(-dw, dw); + int pright = rand_uniform(-dw, dw); + int ptop = rand_uniform(-dh, dh); + int pbot = rand_uniform(-dh, dh); + + int swidth = w - pleft - pright; + int sheight = h - ptop - pbot; + + float sx = (float)swidth / w; + float sy = (float)sheight / h; + + int flip = rand()%2; + image cropped = crop_image(orig, pleft, ptop, swidth, sheight); + + float dx = ((float)pleft/w)/sx; + float dy = ((float)ptop /h)/sy; + + image sized = resize_image(cropped, w, h); + if(flip) flip_image(sized); + d.X.vals[0] = sized.data; + + fill_truth_swag(random_path, d.y.vals[0], classes, flip, dx, dy, 1./sx, 1./sy); + + free_image(orig); + free_image(cropped); + + return d; +} + +data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, 5*boxes); + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + image sized = make_image(w, h, orig.c); + fill_image(sized, .5); + + float dw = jitter * orig.w; + float dh = jitter * orig.h; + + float new_ar = (orig.w + rand_uniform(-dw, dw)) / (orig.h + rand_uniform(-dh, dh)); + //float scale = rand_uniform(.25, 2); + float scale = 1; + + float nw, nh; + + if(new_ar < 1){ + nh = scale * h; + nw = nh * new_ar; + } else { + nw = scale * w; + nh = nw / new_ar; + } + + float dx = rand_uniform(0, w - nw); + float dy = rand_uniform(0, h - nh); + + place_image(orig, nw, nh, dx, dy, sized); + + random_distort_image(sized, hue, saturation, exposure); + + int flip = rand()%2; + if(flip) flip_image(sized); + d.X.vals[i] = sized.data; + + + fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, -dx/w, -dy/h, nw/w, nh/h); + + free_image(orig); + } + free(random_paths); + return d; +} + +void *load_thread(void *ptr) +{ + //printf("Loading data: %d\n", rand()); + load_args a = *(struct load_args*)ptr; + if(a.exposure == 0) a.exposure = 1; + if(a.saturation == 0) a.saturation = 1; + if(a.aspect == 0) a.aspect = 1; + + if (a.type == OLD_CLASSIFICATION_DATA){ + *a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h); + } else if (a.type == REGRESSION_DATA){ + *a.d = load_data_regression(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == CLASSIFICATION_DATA){ + *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.center); + } else if (a.type == SUPER_DATA){ + *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale); + } else if (a.type == WRITING_DATA){ + *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h); + } else if (a.type == ISEG_DATA){ + *a.d = load_data_iseg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.scale, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == INSTANCE_DATA){ + *a.d = load_data_mask(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.coords, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == SEGMENTATION_DATA){ + *a.d = load_data_seg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.scale); + } else if (a.type == REGION_DATA){ + *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); + } else if (a.type == DETECTION_DATA){ + *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); + } else if (a.type == SWAG_DATA){ + *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter); + } else if (a.type == COMPARE_DATA){ + *a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h); + } else if (a.type == IMAGE_DATA){ + *(a.im) = load_image_color(a.path, 0, 0); + *(a.resized) = resize_image(*(a.im), a.w, a.h); + } else if (a.type == LETTERBOX_DATA){ + *(a.im) = load_image_color(a.path, 0, 0); + *(a.resized) = letterbox_image(*(a.im), a.w, a.h); + } else if (a.type == TAG_DATA){ + *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } + free(ptr); + return 0; +} + +pthread_t load_data_in_thread(load_args args) +{ + pthread_t thread; + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + if(pthread_create(&thread, 0, load_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void *load_threads(void *ptr) +{ + int i; + load_args args = *(load_args *)ptr; + if (args.threads == 0) args.threads = 1; + data *out = args.d; + int total = args.n; + free(ptr); + data *buffers = calloc(args.threads, sizeof(data)); + pthread_t *threads = calloc(args.threads, sizeof(pthread_t)); + for(i = 0; i < args.threads; ++i){ + args.d = buffers + i; + args.n = (i+1) * total/args.threads - i * total/args.threads; + threads[i] = load_data_in_thread(args); + } + for(i = 0; i < args.threads; ++i){ + pthread_join(threads[i], 0); + } + *out = concat_datas(buffers, args.threads); + out->shallow = 0; + for(i = 0; i < args.threads; ++i){ + buffers[i].shallow = 1; + free_data(buffers[i]); + } + free(buffers); + free(threads); + return 0; +} + +void load_data_blocking(load_args args) +{ + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + load_thread(ptr); +} + +pthread_t load_data(load_args args) +{ + pthread_t thread; + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed"); + return thread; +} + +data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h) +{ + if(m) paths = get_random_paths(paths, n, m); + char **replace_paths = find_replace_paths(paths, n, ".png", "-label.png"); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = load_image_paths_gray(replace_paths, n, out_w, out_h); + if(m) free(paths); + int i; + for(i = 0; i < n; ++i) free(replace_paths[i]); + free(replace_paths); + return d; +} + +data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = load_labels_paths(paths, n, labels, k, 0); + if(m) free(paths); + return d; +} + +/* + data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) + { + data d = {0}; + d.indexes = calloc(n, sizeof(int)); + if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes); + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure); + d.y = load_labels_paths(paths, n, labels, k); + if(m) free(paths); + return d; + } + */ + +data load_data_super(char **paths, int n, int m, int w, int h, int scale) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + + int i; + d.X.rows = n; + d.X.vals = calloc(n, sizeof(float*)); + d.X.cols = w*h*3; + + d.y.rows = n; + d.y.vals = calloc(n, sizeof(float*)); + d.y.cols = w*scale * h*scale * 3; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], 0, 0); + image crop = random_crop_image(im, w*scale, h*scale); + int flip = rand()%2; + if (flip) flip_image(crop); + image resize = resize_image(crop, w, h); + d.X.vals[i] = resize.data; + d.y.vals[i] = crop.data; + free_image(im); + } + + if(m) free(paths); + return d; +} + +data load_data_regression(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, 0); + d.y = load_regression_labels_paths(paths, n, k); + if(m) free(paths); + return d; +} + +data select_data(data *orig, int *inds) +{ + data d = {0}; + d.shallow = 1; + d.w = orig[0].w; + d.h = orig[0].h; + + d.X.rows = orig[0].X.rows; + d.y.rows = orig[0].X.rows; + + d.X.cols = orig[0].X.cols; + d.y.cols = orig[0].y.cols; + + d.X.vals = calloc(orig[0].X.rows, sizeof(float *)); + d.y.vals = calloc(orig[0].y.rows, sizeof(float *)); + int i; + for(i = 0; i < d.X.rows; ++i){ + d.X.vals[i] = orig[inds[i]].X.vals[i]; + d.y.vals[i] = orig[inds[i]].y.vals[i]; + } + return d; +} + +data *tile_data(data orig, int divs, int size) +{ + data *ds = calloc(divs*divs, sizeof(data)); + int i, j; +#pragma omp parallel for + for(i = 0; i < divs*divs; ++i){ + data d; + d.shallow = 0; + d.w = orig.w/divs * size; + d.h = orig.h/divs * size; + d.X.rows = orig.X.rows; + d.X.cols = d.w*d.h*3; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + + d.y = copy_matrix(orig.y); +#pragma omp parallel for + for(j = 0; j < orig.X.rows; ++j){ + int x = (i%divs) * orig.w / divs - (d.w - orig.w/divs)/2; + int y = (i/divs) * orig.h / divs - (d.h - orig.h/divs)/2; + image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[j]); + d.X.vals[j] = crop_image(im, x, y, d.w, d.h).data; + } + ds[i] = d; + } + return ds; +} + +data resize_data(data orig, int w, int h) +{ + data d = {0}; + d.shallow = 0; + d.w = w; + d.h = h; + int i; + d.X.rows = orig.X.rows; + d.X.cols = w*h*3; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + + d.y = copy_matrix(orig.y); +#pragma omp parallel for + for(i = 0; i < orig.X.rows; ++i){ + image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[i]); + d.X.vals[i] = resize_image(im, w, h).data; + } + return d; +} + +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.w=size; + d.h=size; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, center); + d.y = load_labels_paths(paths, n, labels, k, hierarchy); + if(m) free(paths); + return d; +} + +data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.w = size; + d.h = size; + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, 0); + d.y = load_tags_paths(paths, n, k); + if(m) free(paths); + return d; +} + +matrix concat_matrix(matrix m1, matrix m2) +{ + int i, count = 0; + matrix m; + m.cols = m1.cols; + m.rows = m1.rows+m2.rows; + m.vals = calloc(m1.rows + m2.rows, sizeof(float*)); + for(i = 0; i < m1.rows; ++i){ + m.vals[count++] = m1.vals[i]; + } + for(i = 0; i < m2.rows; ++i){ + m.vals[count++] = m2.vals[i]; + } + return m; +} + +data concat_data(data d1, data d2) +{ + data d = {0}; + d.shallow = 1; + d.X = concat_matrix(d1.X, d2.X); + d.y = concat_matrix(d1.y, d2.y); + d.w = d1.w; + d.h = d1.h; + return d; +} + +data concat_datas(data *d, int n) +{ + int i; + data out = {0}; + for(i = 0; i < n; ++i){ + data new = concat_data(d[i], out); + free_data(out); + out = new; + } + return out; +} + +data load_categorical_data_csv(char *filename, int target, int k) +{ + data d = {0}; + d.shallow = 0; + matrix X = csv_to_matrix(filename); + float *truth_1d = pop_column(&X, target); + float **truth = one_hot_encode(truth_1d, X.rows, k); + matrix y; + y.rows = X.rows; + y.cols = k; + y.vals = truth; + d.X = X; + d.y = y; + free(truth_1d); + return d; +} + +data load_cifar10_data(char *filename) +{ + data d = {0}; + d.shallow = 0; + long i,j; + matrix X = make_matrix(10000, 3072); + matrix y = make_matrix(10000, 10); + d.X = X; + d.y = y; + + FILE *fp = fopen(filename, "rb"); + if(!fp) file_error(filename); + for(i = 0; i < 10000; ++i){ + unsigned char bytes[3073]; + fread(bytes, 1, 3073, fp); + int class = bytes[0]; + y.vals[i][class] = 1; + for(j = 0; j < X.cols; ++j){ + X.vals[i][j] = (double)bytes[j+1]; + } + } + scale_data_rows(d, 1./255); + //normalize_data_rows(d); + fclose(fp); + return d; +} + +void get_random_batch(data d, int n, float *X, float *y) +{ + int j; + for(j = 0; j < n; ++j){ + int index = rand()%d.X.rows; + memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float)); + memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float)); + } +} + +void get_next_batch(data d, int n, int offset, float *X, float *y) +{ + int j; + for(j = 0; j < n; ++j){ + int index = offset + j; + memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float)); + if(y) memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float)); + } +} + +void smooth_data(data d) +{ + int i, j; + float scale = 1. / d.y.cols; + float eps = .1; + for(i = 0; i < d.y.rows; ++i){ + for(j = 0; j < d.y.cols; ++j){ + d.y.vals[i][j] = eps * scale + (1-eps) * d.y.vals[i][j]; + } + } +} + +data load_all_cifar10() +{ + data d = {0}; + d.shallow = 0; + int i,j,b; + matrix X = make_matrix(50000, 3072); + matrix y = make_matrix(50000, 10); + d.X = X; + d.y = y; + + + for(b = 0; b < 5; ++b){ + char buff[256]; + sprintf(buff, "data/cifar/cifar-10-batches-bin/data_batch_%d.bin", b+1); + FILE *fp = fopen(buff, "rb"); + if(!fp) file_error(buff); + for(i = 0; i < 10000; ++i){ + unsigned char bytes[3073]; + fread(bytes, 1, 3073, fp); + int class = bytes[0]; + y.vals[i+b*10000][class] = 1; + for(j = 0; j < X.cols; ++j){ + X.vals[i+b*10000][j] = (double)bytes[j+1]; + } + } + fclose(fp); + } + //normalize_data_rows(d); + scale_data_rows(d, 1./255); + smooth_data(d); + return d; +} + +data load_go(char *filename) +{ + FILE *fp = fopen(filename, "rb"); + matrix X = make_matrix(3363059, 361); + matrix y = make_matrix(3363059, 361); + int row, col; + + if(!fp) file_error(filename); + char *label; + int count = 0; + while((label = fgetl(fp))){ + int i; + if(count == X.rows){ + X = resize_matrix(X, count*2); + y = resize_matrix(y, count*2); + } + sscanf(label, "%d %d", &row, &col); + char *board = fgetl(fp); + + int index = row*19 + col; + y.vals[count][index] = 1; + + for(i = 0; i < 19*19; ++i){ + float val = 0; + if(board[i] == '1') val = 1; + else if(board[i] == '2') val = -1; + X.vals[count][i] = val; + } + ++count; + free(label); + free(board); + } + X = resize_matrix(X, count); + y = resize_matrix(y, count); + + data d = {0}; + d.shallow = 0; + d.X = X; + d.y = y; + + + fclose(fp); + + return d; +} + + +void randomize_data(data d) +{ + int i; + for(i = d.X.rows-1; i > 0; --i){ + int index = rand()%i; + float *swap = d.X.vals[index]; + d.X.vals[index] = d.X.vals[i]; + d.X.vals[i] = swap; + + swap = d.y.vals[index]; + d.y.vals[index] = d.y.vals[i]; + d.y.vals[i] = swap; + } +} + +void scale_data_rows(data d, float s) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + scale_array(d.X.vals[i], d.X.cols, s); + } +} + +void translate_data_rows(data d, float s) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + translate_array(d.X.vals[i], d.X.cols, s); + } +} + +data copy_data(data d) +{ + data c = {0}; + c.w = d.w; + c.h = d.h; + c.shallow = 0; + c.num_boxes = d.num_boxes; + c.boxes = d.boxes; + c.X = copy_matrix(d.X); + c.y = copy_matrix(d.y); + return c; +} + +void normalize_data_rows(data d) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + normalize_array(d.X.vals[i], d.X.cols); + } +} + +data get_data_part(data d, int part, int total) +{ + data p = {0}; + p.shallow = 1; + p.X.rows = d.X.rows * (part + 1) / total - d.X.rows * part / total; + p.y.rows = d.y.rows * (part + 1) / total - d.y.rows * part / total; + p.X.cols = d.X.cols; + p.y.cols = d.y.cols; + p.X.vals = d.X.vals + d.X.rows * part / total; + p.y.vals = d.y.vals + d.y.rows * part / total; + return p; +} + +data get_random_data(data d, int num) +{ + data r = {0}; + r.shallow = 1; + + r.X.rows = num; + r.y.rows = num; + + r.X.cols = d.X.cols; + r.y.cols = d.y.cols; + + r.X.vals = calloc(num, sizeof(float *)); + r.y.vals = calloc(num, sizeof(float *)); + + int i; + for(i = 0; i < num; ++i){ + int index = rand()%d.X.rows; + r.X.vals[i] = d.X.vals[index]; + r.y.vals[i] = d.y.vals[index]; + } + return r; +} + +data *split_data(data d, int part, int total) +{ + data *split = calloc(2, sizeof(data)); + int i; + int start = part*d.X.rows/total; + int end = (part+1)*d.X.rows/total; + data train; + data test; + train.shallow = test.shallow = 1; + + test.X.rows = test.y.rows = end-start; + train.X.rows = train.y.rows = d.X.rows - (end-start); + train.X.cols = test.X.cols = d.X.cols; + train.y.cols = test.y.cols = d.y.cols; + + train.X.vals = calloc(train.X.rows, sizeof(float*)); + test.X.vals = calloc(test.X.rows, sizeof(float*)); + train.y.vals = calloc(train.y.rows, sizeof(float*)); + test.y.vals = calloc(test.y.rows, sizeof(float*)); + + for(i = 0; i < start; ++i){ + train.X.vals[i] = d.X.vals[i]; + train.y.vals[i] = d.y.vals[i]; + } + for(i = start; i < end; ++i){ + test.X.vals[i-start] = d.X.vals[i]; + test.y.vals[i-start] = d.y.vals[i]; + } + for(i = end; i < d.X.rows; ++i){ + train.X.vals[i-(end-start)] = d.X.vals[i]; + train.y.vals[i-(end-start)] = d.y.vals[i]; + } + split[0] = train; + split[1] = test; + return split; +} + diff --git a/workloads/realworld/pinned/darknet/src/data.h b/workloads/realworld/pinned/darknet/src/data.h new file mode 100644 index 0000000000000000000000000000000000000000..781906f8743c7d88c0fa134403d0ae020b544053 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/data.h @@ -0,0 +1,50 @@ +#ifndef DATA_H +#define DATA_H +#include + +#include "darknet.h" +#include "matrix.h" +#include "list.h" +#include "image.h" +#include "tree.h" + +static inline float distance_from_edge(int x, int max) +{ + int dx = (max/2) - x; + if (dx < 0) dx = -dx; + dx = (max/2) + 1 - dx; + dx *= 2; + float dist = (float)dx/max; + if (dist > 1) dist = 1; + return dist; +} +void load_data_blocking(load_args args); + + +void print_letters(float *pred, int n); +data load_data_captcha(char **paths, int n, int m, int k, int w, int h); +data load_data_captcha_encode(char **paths, int n, int m, int w, int h); +data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure); +data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); +matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); +data load_data_super(char **paths, int n, int m, int w, int h, int scale); +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); +data load_data_regression(char **paths, int n, int m, int classes, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); +data load_go(char *filename); + + +data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h); + +void get_random_batch(data d, int n, float *X, float *y); +data get_data_part(data d, int part, int total); +data get_random_data(data d, int num); +data load_categorical_data_csv(char *filename, int target, int k); +void normalize_data_rows(data d); +void scale_data_rows(data d, float s); +void translate_data_rows(data d, float s); +void randomize_data(data d); +data *split_data(data d, int part, int total); +data concat_datas(data *d, int n); +void fill_truth(char *path, char **labels, int k, float *truth); + +#endif diff --git a/workloads/realworld/pinned/darknet/src/deconvolutional_kernels.cu b/workloads/realworld/pinned/darknet/src/deconvolutional_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..ed12e7a3dc5148b1cbff746f13901a9653bc0f6d --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/deconvolutional_kernels.cu @@ -0,0 +1,139 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "convolutional_layer.h" +#include "deconvolutional_layer.h" +#include "batchnorm_layer.h" +#include "gemm.h" +#include "blas.h" +#include "im2col.h" +#include "col2im.h" +#include "utils.h" +#include "cuda_dark.h" +} + +extern "C" void forward_deconvolutional_layer_gpu(layer l, network net) +{ + int i; + + int m = l.size*l.size*l.n; + int n = l.h*l.w; + int k = l.c; + + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + + for(i = 0; i < l.batch; ++i){ + float *a = l.weights_gpu; + float *b = net.input_gpu + i*l.c*l.h*l.w; + float *c = net.workspace; + + gemm_gpu(1,0,m,n,k,1,a,m,b,n,0,c,n); + + col2im_gpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output_gpu+i*l.outputs); + } + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); + } + activate_array_gpu(l.output_gpu, l.batch*l.n*l.out_w*l.out_h, l.activation); +} + +extern "C" void backward_deconvolutional_layer_gpu(layer l, network net) +{ + int i; + + //constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); + } + + //if(net.delta_gpu) memset(net.delta_gpu, 0, l.batch*l.h*l.w*l.c*sizeof(float)); + + for(i = 0; i < l.batch; ++i){ + int m = l.c; + int n = l.size*l.size*l.n; + int k = l.h*l.w; + + float *a = net.input_gpu + i*m*k; + float *b = net.workspace; + float *c = l.weight_updates_gpu; + + im2col_gpu(l.delta_gpu + i*l.outputs, l.out_c, l.out_h, l.out_w, + l.size, l.stride, l.pad, b); + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if(net.delta_gpu){ + int m = l.c; + int n = l.h*l.w; + int k = l.size*l.size*l.n; + + float *a = l.weights_gpu; + float *b = net.workspace; + float *c = net.delta_gpu + i*n*m; + + gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +} + +extern "C" void pull_deconvolutional_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size); + cuda_pull_array(l.biases_gpu, l.biases, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.n); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +extern "C" void push_deconvolutional_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size); + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.n); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void update_deconvolutional_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + } + }else{ + axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1); + + axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.n, momentum, l.bias_updates_gpu, 1); + + if(l.scales_gpu){ + axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.n, momentum, l.scale_updates_gpu, 1); + } + } +} + diff --git a/workloads/realworld/pinned/darknet/src/deconvolutional_layer.c b/workloads/realworld/pinned/darknet/src/deconvolutional_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..00c0e85771d42f99de969f9fd03e5f0f359d405c --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/deconvolutional_layer.c @@ -0,0 +1,312 @@ +#include "deconvolutional_layer.h" +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "utils.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" + +#include +#include + + +static size_t get_workspace_size(layer l){ + return (size_t)l.h*l.w*l.size*l.size*l.n*sizeof(float); +} + +void bilinear_init(layer l) +{ + int i,j,f; + float center = (l.size-1) / 2.; + for(f = 0; f < l.n; ++f){ + for(j = 0; j < l.size; ++j){ + for(i = 0; i < l.size; ++i){ + float val = (1 - fabs(i - center)) * (1 - fabs(j - center)); + int c = f%l.c; + int ind = f*l.size*l.size*l.c + c*l.size*l.size + j*l.size + i; + l.weights[ind] = val; + } + } + } +} + + +layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam) +{ + int i; + layer l = {0}; + l.type = DECONVOLUTIONAL; + + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.batch = batch; + l.stride = stride; + l.size = size; + + l.nweights = c*n*size*size; + l.nbiases = n; + + l.weights = calloc(c*n*size*size, sizeof(float)); + l.weight_updates = calloc(c*n*size*size, sizeof(float)); + + l.biases = calloc(n, sizeof(float)); + l.bias_updates = calloc(n, sizeof(float)); + //float scale = n/(size*size*c); + //printf("scale: %f\n", scale); + float scale = .02; + for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal(); + //bilinear_init(l); + for(i = 0; i < n; ++i){ + l.biases[i] = 0; + } + l.pad = padding; + + l.out_h = (l.h - 1) * l.stride + l.size - 2*l.pad; + l.out_w = (l.w - 1) * l.stride + l.size - 2*l.pad; + l.out_c = n; + l.outputs = l.out_w * l.out_h * l.out_c; + l.inputs = l.w * l.h * l.c; + + scal_cpu(l.nweights, (float)l.out_w*l.out_h/(l.w*l.h), l.weights, 1); + + l.output = calloc(l.batch*l.outputs, sizeof(float)); + l.delta = calloc(l.batch*l.outputs, sizeof(float)); + + l.forward = forward_deconvolutional_layer; + l.backward = backward_deconvolutional_layer; + l.update = update_deconvolutional_layer; + + l.batch_normalize = batch_normalize; + + if(batch_normalize){ + l.scales = calloc(n, sizeof(float)); + l.scale_updates = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(n, sizeof(float)); + l.variance = calloc(n, sizeof(float)); + + l.mean_delta = calloc(n, sizeof(float)); + l.variance_delta = calloc(n, sizeof(float)); + + l.rolling_mean = calloc(n, sizeof(float)); + l.rolling_variance = calloc(n, sizeof(float)); + l.x = calloc(l.batch*l.outputs, sizeof(float)); + l.x_norm = calloc(l.batch*l.outputs, sizeof(float)); + } + if(adam){ + l.m = calloc(c*n*size*size, sizeof(float)); + l.v = calloc(c*n*size*size, sizeof(float)); + l.bias_m = calloc(n, sizeof(float)); + l.scale_m = calloc(n, sizeof(float)); + l.bias_v = calloc(n, sizeof(float)); + l.scale_v = calloc(n, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_deconvolutional_layer_gpu; + l.backward_gpu = backward_deconvolutional_layer_gpu; + l.update_gpu = update_deconvolutional_layer_gpu; + + if(gpu_index >= 0){ + + if (adam) { + l.m_gpu = cuda_make_array(l.m, c*n*size*size); + l.v_gpu = cuda_make_array(l.v, c*n*size*size); + l.bias_m_gpu = cuda_make_array(l.bias_m, n); + l.bias_v_gpu = cuda_make_array(l.bias_v, n); + l.scale_m_gpu = cuda_make_array(l.scale_m, n); + l.scale_v_gpu = cuda_make_array(l.scale_v, n); + } + l.weights_gpu = cuda_make_array(l.weights, c*n*size*size); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size); + + l.biases_gpu = cuda_make_array(l.biases, n); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*l.out_h*l.out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n); + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(0, n); + l.variance_gpu = cuda_make_array(0, n); + + l.rolling_mean_gpu = cuda_make_array(0, n); + l.rolling_variance_gpu = cuda_make_array(0, n); + + l.mean_delta_gpu = cuda_make_array(0, n); + l.variance_delta_gpu = cuda_make_array(0, n); + + l.scales_gpu = cuda_make_array(l.scales, n); + l.scale_updates_gpu = cuda_make_array(0, n); + + l.x_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n); + l.x_norm_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n); + } + } + #ifdef CUDNN + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); + #endif +#endif + + l.activation = activation; + l.workspace_size = get_workspace_size(l); + + fprintf(stderr, "deconv%5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); + + return l; +} + +void denormalize_deconvolutional_layer(layer l) +{ + int i, j; + for(i = 0; i < l.n; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001); + for(j = 0; j < l.c*l.size*l.size; ++j){ + l.weights[i*l.c*l.size*l.size + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + +void resize_deconvolutional_layer(layer *l, int h, int w) +{ + l->h = h; + l->w = w; + l->out_h = (l->h - 1) * l->stride + l->size - 2*l->pad; + l->out_w = (l->w - 1) * l->stride + l->size - 2*l->pad; + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->w * l->h * l->c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + if(l->batch_normalize){ + l->x = realloc(l->x, l->batch*l->outputs*sizeof(float)); + l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float)); + } + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); + + if(l->batch_normalize){ + cuda_free(l->x_gpu); + cuda_free(l->x_norm_gpu); + + l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); + } + #ifdef CUDNN + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + #endif +#endif + l->workspace_size = get_workspace_size(*l); +} + +void forward_deconvolutional_layer(const layer l, network net) +{ + int i; + + int m = l.size*l.size*l.n; + int n = l.h*l.w; + int k = l.c; + + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + + for(i = 0; i < l.batch; ++i){ + float *a = l.weights; + float *b = net.input + i*l.c*l.h*l.w; + float *c = net.workspace; + + gemm_cpu(1,0,m,n,k,1,a,m,b,n,0,c,n); + + col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output+i*l.outputs); + } + if (l.batch_normalize) { + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.n, l.out_w*l.out_h); + } + activate_array(l.output, l.batch*l.n*l.out_w*l.out_h, l.activation); +} + +void backward_deconvolutional_layer(layer l, network net) +{ + int i; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h); + } + + //if(net.delta) memset(net.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float)); + + for(i = 0; i < l.batch; ++i){ + int m = l.c; + int n = l.size*l.size*l.n; + int k = l.h*l.w; + + float *a = net.input + i*m*k; + float *b = net.workspace; + float *c = l.weight_updates; + + im2col_cpu(l.delta + i*l.outputs, l.out_c, l.out_h, l.out_w, + l.size, l.stride, l.pad, b); + gemm_cpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if(net.delta){ + int m = l.c; + int n = l.h*l.w; + int k = l.size*l.size*l.n; + + float *a = l.weights; + float *b = net.workspace; + float *c = net.delta + i*n*m; + + gemm_cpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +} + +void update_deconvolutional_layer(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int size = l.size*l.size*l.c*l.n; + axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.n, momentum, l.bias_updates, 1); + + if(l.scales){ + axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.n, momentum, l.scale_updates, 1); + } + + axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(size, momentum, l.weight_updates, 1); +} + + + diff --git a/workloads/realworld/pinned/darknet/src/deconvolutional_layer.h b/workloads/realworld/pinned/darknet/src/deconvolutional_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..722a1a58feec4ef13dac2b811df98e3f9960d4ef --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/deconvolutional_layer.h @@ -0,0 +1,25 @@ +#ifndef DECONVOLUTIONAL_LAYER_H +#define DECONVOLUTIONAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +#ifdef GPU +void forward_deconvolutional_layer_gpu(layer l, network net); +void backward_deconvolutional_layer_gpu(layer l, network net); +void update_deconvolutional_layer_gpu(layer l, update_args a); +void push_deconvolutional_layer(layer l); +void pull_deconvolutional_layer(layer l); +#endif + +layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam); +void resize_deconvolutional_layer(layer *l, int h, int w); +void forward_deconvolutional_layer(const layer l, network net); +void update_deconvolutional_layer(layer l, update_args a); +void backward_deconvolutional_layer(layer l, network net); + +#endif + diff --git a/workloads/realworld/pinned/darknet/src/demo.c b/workloads/realworld/pinned/darknet/src/demo.c new file mode 100644 index 0000000000000000000000000000000000000000..b89efb8dc4c044c0240b7442e39222405409a676 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/demo.c @@ -0,0 +1,349 @@ +#include "network.h" +#include "detection_layer.h" +#include "region_layer.h" +#include "cost_layer.h" +#include "utils.h" +#include "parser.h" +#include "box.h" +#include "image.h" +#include "demo.h" +#include + +#define DEMO 1 + +#ifdef OPENCV + +static char **demo_names; +static image **demo_alphabet; +static int demo_classes; + +static network *net; +static image buff [3]; +static image buff_letter[3]; +static int buff_index = 0; +static void * cap; +static float fps = 0; +static float demo_thresh = 0; +static float demo_hier = .5; +static int running = 0; + +static int demo_frame = 3; +static int demo_index = 0; +static float **predictions; +static float *avg; +static int demo_done = 0; +static int demo_total = 0; +double demo_time; + +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num); + +int size_network(network *net) +{ + int i; + int count = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + count += l.outputs; + } + } + return count; +} + +void remember_network(network *net) +{ + int i; + int count = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + memcpy(predictions[demo_index] + count, net->layers[i].output, sizeof(float) * l.outputs); + count += l.outputs; + } + } +} + +detection *avg_predictions(network *net, int *nboxes) +{ + int i, j; + int count = 0; + fill_cpu(demo_total, 0, avg, 1); + for(j = 0; j < demo_frame; ++j){ + axpy_cpu(demo_total, 1./demo_frame, predictions[j], 1, avg, 1); + } + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + memcpy(l.output, avg + count, sizeof(float) * l.outputs); + count += l.outputs; + } + } + detection *dets = get_network_boxes(net, buff[0].w, buff[0].h, demo_thresh, demo_hier, 0, 1, nboxes); + return dets; +} + +void *detect_in_thread(void *ptr) +{ + running = 1; + float nms = .4; + + layer l = net->layers[net->n-1]; + float *X = buff_letter[(buff_index+2)%3].data; + network_predict(net, X); + + /* + if(l.type == DETECTION){ + get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0); + } else */ + remember_network(net); + detection *dets = 0; + int nboxes = 0; + dets = avg_predictions(net, &nboxes); + + + /* + int i,j; + box zero = {0}; + int classes = l.classes; + for(i = 0; i < demo_detections; ++i){ + avg[i].objectness = 0; + avg[i].bbox = zero; + memset(avg[i].prob, 0, classes*sizeof(float)); + for(j = 0; j < demo_frame; ++j){ + axpy_cpu(classes, 1./demo_frame, dets[j][i].prob, 1, avg[i].prob, 1); + avg[i].objectness += dets[j][i].objectness * 1./demo_frame; + avg[i].bbox.x += dets[j][i].bbox.x * 1./demo_frame; + avg[i].bbox.y += dets[j][i].bbox.y * 1./demo_frame; + avg[i].bbox.w += dets[j][i].bbox.w * 1./demo_frame; + avg[i].bbox.h += dets[j][i].bbox.h * 1./demo_frame; + } + //copy_cpu(classes, dets[0][i].prob, 1, avg[i].prob, 1); + //avg[i].objectness = dets[0][i].objectness; + } + */ + + if (nms > 0) do_nms_obj(dets, nboxes, l.classes, nms); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.1f\n",fps); + printf("Objects:\n\n"); + image display = buff[(buff_index+2) % 3]; + draw_detections(display, dets, nboxes, demo_thresh, demo_names, demo_alphabet, demo_classes); + free_detections(dets, nboxes); + + demo_index = (demo_index + 1)%demo_frame; + running = 0; + return 0; +} + +void *fetch_in_thread(void *ptr) +{ + free_image(buff[buff_index]); + buff[buff_index] = get_image_from_stream(cap); + if(buff[buff_index].data == 0) { + demo_done = 1; + return 0; + } + letterbox_image_into(buff[buff_index], net->w, net->h, buff_letter[buff_index]); + return 0; +} + +void *display_in_thread(void *ptr) +{ + int c = show_image(buff[(buff_index + 1)%3], "Demo", 1); + if (c != -1) c = c%256; + if (c == 27) { + demo_done = 1; + return 0; + } else if (c == 82) { + demo_thresh += .02; + } else if (c == 84) { + demo_thresh -= .02; + if(demo_thresh <= .02) demo_thresh = .02; + } else if (c == 83) { + demo_hier += .02; + } else if (c == 81) { + demo_hier -= .02; + if(demo_hier <= .0) demo_hier = .0; + } + return 0; +} + +void *display_loop(void *ptr) +{ + while(1){ + display_in_thread(0); + } +} + +void *detect_loop(void *ptr) +{ + while(1){ + detect_in_thread(0); + } +} + +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) +{ + //demo_frame = avg_frames; + image **alphabet = load_alphabet(); + demo_names = names; + demo_alphabet = alphabet; + demo_classes = classes; + demo_thresh = thresh; + demo_hier = hier; + printf("Demo\n"); + net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + pthread_t detect_thread; + pthread_t fetch_thread; + + srand(2222222); + + int i; + demo_total = size_network(net); + predictions = calloc(demo_frame, sizeof(float*)); + for (i = 0; i < demo_frame; ++i){ + predictions[i] = calloc(demo_total, sizeof(float)); + } + avg = calloc(demo_total, sizeof(float)); + + if(filename){ + printf("video file: %s\n", filename); + cap = open_video_stream(filename, 0, 0, 0, 0); + }else{ + cap = open_video_stream(0, cam_index, w, h, frames); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + + buff[0] = get_image_from_stream(cap); + buff[1] = copy_image(buff[0]); + buff[2] = copy_image(buff[0]); + buff_letter[0] = letterbox_image(buff[0], net->w, net->h); + buff_letter[1] = letterbox_image(buff[0], net->w, net->h); + buff_letter[2] = letterbox_image(buff[0], net->w, net->h); + + int count = 0; + if(!prefix){ + make_window("Demo", 1352, 1013, fullscreen); + } + + demo_time = what_time_is_it_now(); + + while(!demo_done){ + buff_index = (buff_index + 1) %3; + if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); + if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); + if(!prefix){ + fps = 1./(what_time_is_it_now() - demo_time); + demo_time = what_time_is_it_now(); + display_in_thread(0); + }else{ + char name[256]; + sprintf(name, "%s_%08d", prefix, count); + save_image(buff[(buff_index + 1)%3], name); + } + pthread_join(fetch_thread, 0); + pthread_join(detect_thread, 0); + ++count; + } +} + +/* + void demo_compare(char *cfg1, char *weight1, char *cfg2, char *weight2, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) + { + demo_frame = avg_frames; + predictions = calloc(demo_frame, sizeof(float*)); + image **alphabet = load_alphabet(); + demo_names = names; + demo_alphabet = alphabet; + demo_classes = classes; + demo_thresh = thresh; + demo_hier = hier; + printf("Demo\n"); + net = load_network(cfg1, weight1, 0); + set_batch_network(net, 1); + pthread_t detect_thread; + pthread_t fetch_thread; + + srand(2222222); + + if(filename){ + printf("video file: %s\n", filename); + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + if(frames){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FPS, frames); + } + } + + if(!cap) error("Couldn't connect to webcam.\n"); + + layer l = net->layers[net->n-1]; + demo_detections = l.n*l.w*l.h; + int j; + + avg = (float *) calloc(l.outputs, sizeof(float)); + for(j = 0; j < demo_frame; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float)); + + boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box)); + probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *)); + for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes+1, sizeof(float)); + + buff[0] = get_image_from_stream(cap); + buff[1] = copy_image(buff[0]); + buff[2] = copy_image(buff[0]); + buff_letter[0] = letterbox_image(buff[0], net->w, net->h); + buff_letter[1] = letterbox_image(buff[0], net->w, net->h); + buff_letter[2] = letterbox_image(buff[0], net->w, net->h); + ipl = cvCreateImage(cvSize(buff[0].w,buff[0].h), IPL_DEPTH_8U, buff[0].c); + + int count = 0; + if(!prefix){ + cvNamedWindow("Demo", CV_WINDOW_NORMAL); + if(fullscreen){ + cvSetWindowProperty("Demo", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); + } else { + cvMoveWindow("Demo", 0, 0); + cvResizeWindow("Demo", 1352, 1013); + } + } + + demo_time = what_time_is_it_now(); + + while(!demo_done){ +buff_index = (buff_index + 1) %3; +if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); +if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); +if(!prefix){ + fps = 1./(what_time_is_it_now() - demo_time); + demo_time = what_time_is_it_now(); + display_in_thread(0); +}else{ + char name[256]; + sprintf(name, "%s_%08d", prefix, count); + save_image(buff[(buff_index + 1)%3], name); +} +pthread_join(fetch_thread, 0); +pthread_join(detect_thread, 0); +++count; +} +} +*/ +#else +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg, float hier, int w, int h, int frames, int fullscreen) +{ + fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); +} +#endif + diff --git a/workloads/realworld/pinned/darknet/src/demo.h b/workloads/realworld/pinned/darknet/src/demo.h new file mode 100644 index 0000000000000000000000000000000000000000..86e46541d1a7473b22373b29bc6ff9cc281d4939 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/demo.h @@ -0,0 +1,6 @@ +#ifndef DEMO_H +#define DEMO_H + +#include "image.h" + +#endif diff --git a/workloads/realworld/pinned/darknet/src/detection_layer.c b/workloads/realworld/pinned/darknet/src/detection_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..565fa3c3f7d123736d65661d3be8ea91e26b3d5c --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/detection_layer.c @@ -0,0 +1,275 @@ +#include "detection_layer.h" +#include "activations.h" +#include "softmax_layer.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +detection_layer make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore) +{ + detection_layer l = {0}; + l.type = DETECTION; + + l.n = n; + l.batch = batch; + l.inputs = inputs; + l.classes = classes; + l.coords = coords; + l.rescore = rescore; + l.side = side; + l.w = side; + l.h = side; + assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs); + l.cost = calloc(1, sizeof(float)); + l.outputs = l.inputs; + l.truths = l.side*l.side*(1+l.coords+l.classes); + l.output = calloc(batch*l.outputs, sizeof(float)); + l.delta = calloc(batch*l.outputs, sizeof(float)); + + l.forward = forward_detection_layer; + l.backward = backward_detection_layer; +#ifdef GPU + l.forward_gpu = forward_detection_layer_gpu; + l.backward_gpu = backward_detection_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "Detection Layer\n"); + srand(0); + + return l; +} + +void forward_detection_layer(const detection_layer l, network net) +{ + int locations = l.side*l.side; + int i,j; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + //if(l.reorg) reorg(l.output, l.w*l.h, size*l.n, l.batch, 1); + int b; + if (l.softmax){ + for(b = 0; b < l.batch; ++b){ + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + int offset = i*l.classes; + softmax(l.output + index + offset, l.classes, 1, 1, + l.output + index + offset); + } + } + } + if(net.train){ + float avg_iou = 0; + float avg_cat = 0; + float avg_allcat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + *(l.cost) = 0; + int size = l.inputs * l.batch; + memset(l.delta, 0, size * sizeof(float)); + for (b = 0; b < l.batch; ++b){ + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + int truth_index = (b*locations + i)*(1+l.coords+l.classes); + int is_obj = net.truth[truth_index]; + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + l.delta[p_index] = l.noobject_scale*(0 - l.output[p_index]); + *(l.cost) += l.noobject_scale*pow(l.output[p_index], 2); + avg_anyobj += l.output[p_index]; + } + + int best_index = -1; + float best_iou = 0; + float best_rmse = 20; + + if (!is_obj){ + continue; + } + + int class_index = index + i*l.classes; + for(j = 0; j < l.classes; ++j) { + l.delta[class_index+j] = l.class_scale * (net.truth[truth_index+1+j] - l.output[class_index+j]); + *(l.cost) += l.class_scale * pow(net.truth[truth_index+1+j] - l.output[class_index+j], 2); + if(net.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j]; + avg_allcat += l.output[class_index+j]; + } + + box truth = float_to_box(net.truth + truth_index + 1 + l.classes, 1); + truth.x /= l.side; + truth.y /= l.side; + + for(j = 0; j < l.n; ++j){ + int box_index = index + locations*(l.classes + l.n) + (i*l.n + j) * l.coords; + box out = float_to_box(l.output + box_index, 1); + out.x /= l.side; + out.y /= l.side; + + if (l.sqrt){ + out.w = out.w*out.w; + out.h = out.h*out.h; + } + + float iou = box_iou(out, truth); + //iou = 0; + float rmse = box_rmse(out, truth); + if(best_iou > 0 || iou > 0){ + if(iou > best_iou){ + best_iou = iou; + best_index = j; + } + }else{ + if(rmse < best_rmse){ + best_rmse = rmse; + best_index = j; + } + } + } + + if(l.forced){ + if(truth.w*truth.h < .1){ + best_index = 1; + }else{ + best_index = 0; + } + } + if(l.random && *(net.seen) < 64000){ + best_index = rand()%l.n; + } + + int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords; + int tbox_index = truth_index + 1 + l.classes; + + box out = float_to_box(l.output + box_index, 1); + out.x /= l.side; + out.y /= l.side; + if (l.sqrt) { + out.w = out.w*out.w; + out.h = out.h*out.h; + } + float iou = box_iou(out, truth); + + //printf("%d,", best_index); + int p_index = index + locations*l.classes + i*l.n + best_index; + *(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2); + *(l.cost) += l.object_scale * pow(1-l.output[p_index], 2); + avg_obj += l.output[p_index]; + l.delta[p_index] = l.object_scale * (1.-l.output[p_index]); + + if(l.rescore){ + l.delta[p_index] = l.object_scale * (iou - l.output[p_index]); + } + + l.delta[box_index+0] = l.coord_scale*(net.truth[tbox_index + 0] - l.output[box_index + 0]); + l.delta[box_index+1] = l.coord_scale*(net.truth[tbox_index + 1] - l.output[box_index + 1]); + l.delta[box_index+2] = l.coord_scale*(net.truth[tbox_index + 2] - l.output[box_index + 2]); + l.delta[box_index+3] = l.coord_scale*(net.truth[tbox_index + 3] - l.output[box_index + 3]); + if(l.sqrt){ + l.delta[box_index+2] = l.coord_scale*(sqrt(net.truth[tbox_index + 2]) - l.output[box_index + 2]); + l.delta[box_index+3] = l.coord_scale*(sqrt(net.truth[tbox_index + 3]) - l.output[box_index + 3]); + } + + *(l.cost) += pow(1-iou, 2); + avg_iou += iou; + ++count; + } + } + + if(0){ + float *costs = calloc(l.batch*locations*l.n, sizeof(float)); + for (b = 0; b < l.batch; ++b) { + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + costs[b*locations*l.n + i*l.n + j] = l.delta[p_index]*l.delta[p_index]; + } + } + } + int indexes[100]; + top_k(costs, l.batch*locations*l.n, 100, indexes); + float cutoff = costs[indexes[99]]; + for (b = 0; b < l.batch; ++b) { + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + if (l.delta[p_index]*l.delta[p_index] < cutoff) l.delta[p_index] = 0; + } + } + } + free(costs); + } + + + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + + + printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count); + //if(l.reorg) reorg(l.delta, l.w*l.h, size*l.n, l.batch, 0); + } +} + +void backward_detection_layer(const detection_layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +void get_detection_detections(layer l, int w, int h, float thresh, detection *dets) +{ + int i,j,n; + float *predictions = l.output; + //int per_cell = 5*num+classes; + for (i = 0; i < l.side*l.side; ++i){ + int row = i / l.side; + int col = i % l.side; + for(n = 0; n < l.n; ++n){ + int index = i*l.n + n; + int p_index = l.side*l.side*l.classes + i*l.n + n; + float scale = predictions[p_index]; + int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n)*4; + box b; + b.x = (predictions[box_index + 0] + col) / l.side * w; + b.y = (predictions[box_index + 1] + row) / l.side * h; + b.w = pow(predictions[box_index + 2], (l.sqrt?2:1)) * w; + b.h = pow(predictions[box_index + 3], (l.sqrt?2:1)) * h; + dets[index].bbox = b; + dets[index].objectness = scale; + for(j = 0; j < l.classes; ++j){ + int class_index = i*l.classes; + float prob = scale*predictions[class_index+j]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } + } +} + +#ifdef GPU + +void forward_detection_layer_gpu(const detection_layer l, network net) +{ + if(!net.train){ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + return; + } + + cuda_pull_array(net.input_gpu, net.input, l.batch*l.inputs); + forward_detection_layer(l, net); + cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +void backward_detection_layer_gpu(detection_layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); + //copy_gpu(l.batch*l.inputs, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/pinned/darknet/src/detection_layer.h b/workloads/realworld/pinned/darknet/src/detection_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1c818535700c770c7a5d9387534b199b58876198 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/detection_layer.h @@ -0,0 +1,18 @@ +#ifndef DETECTION_LAYER_H +#define DETECTION_LAYER_H + +#include "layer.h" +#include "network.h" + +typedef layer detection_layer; + +detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore); +void forward_detection_layer(const detection_layer l, network net); +void backward_detection_layer(const detection_layer l, network net); + +#ifdef GPU +void forward_detection_layer_gpu(const detection_layer l, network net); +void backward_detection_layer_gpu(detection_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/pinned/darknet/src/dropout_layer.c b/workloads/realworld/pinned/darknet/src/dropout_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..8fefa22caeddd174b2a7010274fadae854c742c1 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/dropout_layer.c @@ -0,0 +1,60 @@ +#include "dropout_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include +#include + +dropout_layer make_dropout_layer(int batch, int inputs, float probability) +{ + dropout_layer l = {0}; + l.type = DROPOUT; + l.probability = probability; + l.inputs = inputs; + l.outputs = inputs; + l.batch = batch; + l.rand = calloc(inputs*batch, sizeof(float)); + l.scale = 1./(1.-probability); + l.forward = forward_dropout_layer; + l.backward = backward_dropout_layer; + #ifdef GPU + l.forward_gpu = forward_dropout_layer_gpu; + l.backward_gpu = backward_dropout_layer_gpu; + l.rand_gpu = cuda_make_array(l.rand, inputs*batch); + #endif + fprintf(stderr, "dropout p = %.2f %4d -> %4d\n", probability, inputs, inputs); + return l; +} + +void resize_dropout_layer(dropout_layer *l, int inputs) +{ + l->rand = realloc(l->rand, l->inputs*l->batch*sizeof(float)); + #ifdef GPU + cuda_free(l->rand_gpu); + + l->rand_gpu = cuda_make_array(l->rand, inputs*l->batch); + #endif +} + +void forward_dropout_layer(dropout_layer l, network net) +{ + int i; + if (!net.train) return; + for(i = 0; i < l.batch * l.inputs; ++i){ + float r = rand_uniform(0, 1); + l.rand[i] = r; + if(r < l.probability) net.input[i] = 0; + else net.input[i] *= l.scale; + } +} + +void backward_dropout_layer(dropout_layer l, network net) +{ + int i; + if(!net.delta) return; + for(i = 0; i < l.batch * l.inputs; ++i){ + float r = l.rand[i]; + if(r < l.probability) net.delta[i] = 0; + else net.delta[i] *= l.scale; + } +} + diff --git a/workloads/realworld/pinned/darknet/src/dropout_layer.h b/workloads/realworld/pinned/darknet/src/dropout_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..01f94d4d7d10b732fb0e558089579e95128a70bd --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/dropout_layer.h @@ -0,0 +1,20 @@ +#ifndef DROPOUT_LAYER_H +#define DROPOUT_LAYER_H + +#include "layer.h" +#include "network.h" + +typedef layer dropout_layer; + +dropout_layer make_dropout_layer(int batch, int inputs, float probability); + +void forward_dropout_layer(dropout_layer l, network net); +void backward_dropout_layer(dropout_layer l, network net); +void resize_dropout_layer(dropout_layer *l, int inputs); + +#ifdef GPU +void forward_dropout_layer_gpu(dropout_layer l, network net); +void backward_dropout_layer_gpu(dropout_layer l, network net); + +#endif +#endif diff --git a/workloads/realworld/pinned/darknet/src/dropout_layer_kernels.cu b/workloads/realworld/pinned/darknet/src/dropout_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..23aef8f12ffc390504e05f0839878f7787a5381f --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/dropout_layer_kernels.cu @@ -0,0 +1,41 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "dropout_layer.h" +#include "cuda_dark.h" +#include "utils.h" +} + +__global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale; +} + +void forward_dropout_layer_gpu(dropout_layer layer, network net) +{ + if (!net.train) return; + int size = layer.inputs*layer.batch; + cuda_random(layer.rand_gpu, size); + /* + int i; + for(i = 0; i < size; ++i){ + layer.rand[i] = rand_uniform(); + } + cuda_push_array(layer.rand_gpu, layer.rand, size); + */ + + yoloswag420blazeit360noscope<<>>(net.input_gpu, size, layer.rand_gpu, layer.probability, layer.scale); + check_error(cudaPeekAtLastError()); +} + +void backward_dropout_layer_gpu(dropout_layer layer, network net) +{ + if(!net.delta_gpu) return; + int size = layer.inputs*layer.batch; + + yoloswag420blazeit360noscope<<>>(net.delta_gpu, size, layer.rand_gpu, layer.probability, layer.scale); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/pinned/darknet/src/gemm.c b/workloads/realworld/pinned/darknet/src/gemm.c new file mode 100644 index 0000000000000000000000000000000000000000..756ae595d7348fc2d343a48715b05ea882d6aa7c --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/gemm.c @@ -0,0 +1,324 @@ +#include "gemm.h" +#include "utils.h" +#include "cuda_dark.h" +#include +#include +#include + +void gemm_bin(int M, int N, int K, float ALPHA, + char *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + char A_PART = A[i*lda+k]; + if(A_PART){ + for(j = 0; j < N; ++j){ + C[i*ldc+j] += B[k*ldb+j]; + } + } else { + for(j = 0; j < N; ++j){ + C[i*ldc+j] -= B[k*ldb+j]; + } + } + } + } +} + +float *random_matrix(int rows, int cols) +{ + int i; + float *m = calloc(rows*cols, sizeof(float)); + for(i = 0; i < rows*cols; ++i){ + m[i] = (float)rand()/RAND_MAX; + } + return m; +} + +void time_random_matrix(int TA, int TB, int m, int k, int n) +{ + float *a; + if(!TA) a = random_matrix(m,k); + else a = random_matrix(k,m); + int lda = (!TA)?k:m; + float *b; + if(!TB) b = random_matrix(k,n); + else b = random_matrix(n,k); + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + int i; + clock_t start = clock(), end; + for(i = 0; i<10; ++i){ + gemm_cpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); + } + end = clock(); + printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf ms\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); + free(a); + free(b); + free(c); +} + + +void gemm(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + gemm_cpu( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); +} + +void gemm_nn(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + register float A_PART = ALPHA*A[i*lda+k]; + for(j = 0; j < N; ++j){ + C[i*ldc+j] += A_PART*B[k*ldb+j]; + } + } + } +} + +void gemm_nt(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + register float sum = 0; + for(k = 0; k < K; ++k){ + sum += ALPHA*A[i*lda+k]*B[j*ldb + k]; + } + C[i*ldc+j] += sum; + } + } +} + +void gemm_tn(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + register float A_PART = ALPHA*A[k*lda+i]; + for(j = 0; j < N; ++j){ + C[i*ldc+j] += A_PART*B[k*ldb+j]; + } + } + } +} + +void gemm_tt(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + register float sum = 0; + for(k = 0; k < K; ++k){ + sum += ALPHA*A[i+k*lda]*B[k+j*ldb]; + } + C[i*ldc+j] += sum; + } + } +} + + +void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + //printf("cpu: %d %d %d %d %d %f %d %d %f %d\n",TA, TB, M, N, K, ALPHA, lda, ldb, BETA, ldc); + int i, j; + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + C[i*ldc + j] *= BETA; + } + } + if(!TA && !TB) + gemm_nn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else if(TA && !TB) + gemm_tn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else if(!TA && TB) + gemm_nt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else + gemm_tt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); +} + + +// #ifdef GPU + +// #include + +// void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, +// float *A_gpu, int lda, +// float *B_gpu, int ldb, +// float BETA, +// float *C_gpu, int ldc) +// { +// cublasHandle_t handle = blas_handle(); +// cudaError_t status = (cudaError_t) cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N), +// (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc); +// check_error(status); +// } + +// #include +// #include +// #include +// #include + +// void time_gpu_random_matrix(int TA, int TB, int m, int k, int n) +// { +// float *a; +// if(!TA) a = random_matrix(m,k); +// else a = random_matrix(k,m); +// int lda = (!TA)?k:m; +// float *b; +// if(!TB) b = random_matrix(k,n); +// else b = random_matrix(n,k); +// int ldb = (!TB)?n:k; + +// float *c = random_matrix(m,n); +// int i; +// clock_t start = clock(), end; +// for(i = 0; i<32; ++i){ +// gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); +// } +// end = clock(); +// printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); +// free(a); +// free(b); +// free(c); +// } + +// void time_gpu(int TA, int TB, int m, int k, int n) +// { +// int iter = 10; +// float *a = random_matrix(m,k); +// float *b = random_matrix(k,n); + +// int lda = (!TA)?k:m; +// int ldb = (!TB)?n:k; + +// float *c = random_matrix(m,n); + +// float *a_cl = cuda_make_array(a, m*k); +// float *b_cl = cuda_make_array(b, k*n); +// float *c_cl = cuda_make_array(c, m*n); + +// int i; +// clock_t start = clock(), end; +// for(i = 0; i +#include +#include +#include +#include + +#include "gemm.h" +#include "utils.h" +#include "cuda_dark.h" + +#include +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK_X 16 +#define DIM_THREAD_BLOCK_Y 16 + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +__global__ void gemm_kernel(float *a, float *b, float *c, int M, int K, int N, float alpha, float beta) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // Compute each thread's global row and column index + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + + + // Statically allocated shared memory + __shared__ float s_a[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + __shared__ float s_b[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + // Accumulate in temporary variable + + float tmp = 0.0f; + if (row < M && col < N) { + + tmp = beta * c[row * N + col]; + + // Sweep tile across matrix + for (int i = 0; i < K; i += blockDim.x) { + int left = K - i; + + if ((i + threadIdx.x) < K) + s_a[threadIdx.y * blockDim.x + threadIdx.x] = a[row * K + i + threadIdx.x]; + + if ((i + threadIdx.y) < K) + s_b[threadIdx.y * blockDim.x + threadIdx.x] = b[(i + threadIdx.y) * N + col]; + + block.sync(); + + for (int k = 0; k < blockDim.x && k < left ; k++) { + tmp += alpha * s_a[threadIdx.y * blockDim.x + k] * s_b[k * blockDim.x + threadIdx.x]; + } + block.sync(); + } + c[row * N + col] = tmp; + } + block.sync(); +} + +void gemmCuda(float *A, float *B, float *C, int M, int N, int K, float alpha, float beta) +{ + // float *A_gpu; + // float *B_gpu; + // float *C_gpu; + + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + dim3 grid((size_t)(ceil(((float)N) / ((float)block.y))), (size_t)(ceil(((float)M) / ((float)block.x)))); + + // cudaMalloc(&A_gpu, sizeof(float) * M * K); + // cudaMalloc(&B_gpu, sizeof(float) * K * N); + // cudaMalloc(&C_gpu, sizeof(float) * M * N); + + // cudaMemcpy(A_gpu, A, sizeof(float) * M * K, cudaMemcpyHostToDevice); + // cudaMemcpy(B_gpu, B, sizeof(float) * K * N, cudaMemcpyHostToDevice); + // cudaMemcpy(C_gpu, C, sizeof(float) * M * N, cudaMemcpyHostToDevice); + gemm_kernel<<>>(A, B, C, M, K, N, alpha, beta); + // cudaDeviceSynchronize(); + // cudaMemcpy(C, C_gpu, sizeof(float) * M * N, cudaMemcpyDeviceToHost); + + // cudaFree(A_gpu); + // cudaFree(B_gpu); + // cudaFree(C_gpu); +} + +// void gemmCuda(float *A, float *B, float *C, int M, int N, int K, float alpha, float beta) +// { +// float *A_gpu; +// float *B_gpu; +// float *C_gpu; + +// dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); +// dim3 grid((size_t)(ceil(((float)N) / ((float)block.y))), (size_t)(ceil(((float)M) / ((float)block.x)))); + +// cudaMalloc(&A_gpu, sizeof(float) * M * K); +// cudaMalloc(&B_gpu, sizeof(float) * K * N); +// cudaMalloc(&C_gpu, sizeof(float) * M * N); + +// cudaMemcpy(A_gpu, A, sizeof(float) * M * K, cudaMemcpyHostToDevice); +// cudaMemcpy(B_gpu, B, sizeof(float) * K * N, cudaMemcpyHostToDevice); +// cudaMemcpy(C_gpu, C, sizeof(float) * M * N, cudaMemcpyHostToDevice); +// gemm_kernel<<>>(A_gpu, B_gpu, C_gpu, M, K, N, alpha, beta); +// // cudaDeviceSynchronize(); +// cudaMemcpy(C, C_gpu, sizeof(float) * M * N, cudaMemcpyDeviceToHost); + +// cudaFree(A_gpu); +// cudaFree(B_gpu); +// cudaFree(C_gpu); +// } + +// void gemmCuda(float *A, float *B, float *C, int M, int N, int K, float alpha, float beta) +// { +// float *A_gpu; +// float *B_gpu; +// float *C_gpu; + +// dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); +// dim3 grid((size_t)(ceil(((float)N) / ((float)block.y))), (size_t)(ceil(((float)M) / ((float)block.x)))); + +// cudaMallocManaged(&A_gpu, sizeof(float) * M * K); +// cudaMallocManaged(&B_gpu, sizeof(float) * K * N); +// cudaMallocManaged(&C_gpu, sizeof(float) * M * N); +// printf("allocation succeed!\n"); + +// memcpy(A_gpu, A, sizeof(float) * M * K); +// memcpy(B_gpu, B, sizeof(float) * K * N); +// printf("memcpy succeed!\n"); +// // cudaMemcpy(C_gpu, C, sizeof(float) * M * N, cudaMemcpyHostToDevice); +// gemm_kernel<<>>(A_gpu, B_gpu, C_gpu, M, K, N, alpha, beta); +// cudaDeviceSynchronize(); +// memcpy(C, C_gpu, sizeof(float) * M * N); + +// cudaFree(A_gpu); +// cudaFree(B_gpu); +// cudaFree(C_gpu); +// } + +void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, + float *A_gpu, int lda, + float *B_gpu, int ldb, + float BETA, + float *C_gpu, int ldc) +{ + // printf("TA is %d, TB is %d, M is %d, N is %d, K is %d, lda is %d, ldb is %d, ldc is %d.\n", TA, TB, M, N, K, lda, ldb, ldc); + if (TA == 0 && TB == 0) { + gemmCuda(A_gpu, B_gpu, C_gpu, M, N, K, ALPHA, BETA); + } else { + cublasHandle_t handle = blas_handle(); + cudaError_t status = (cudaError_t) cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N), + (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc); + check_error(status); + } +} + + +void time_gpu_random_matrix(int TA, int TB, int m, int k, int n) +{ + float *a; + if(!TA) a = random_matrix(m,k); + else a = random_matrix(k,m); + int lda = (!TA)?k:m; + float *b; + if(!TB) b = random_matrix(k,n); + else b = random_matrix(n,k); + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + int i; + clock_t start = clock(), end; + for(i = 0; i<32; ++i){ + gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); + } + end = clock(); + printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); + free(a); + free(b); + free(c); +} + +void time_gpu(int TA, int TB, int m, int k, int n) +{ + int iter = 10; + float *a = random_matrix(m,k); + float *b = random_matrix(k,n); + + int lda = (!TA)?k:m; + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + + float *a_cl = cuda_make_array(a, m*k); + float *b_cl = cuda_make_array(b, k*n); + float *c_cl = cuda_make_array(c, m*n); + + int i; + clock_t start = clock(), end; + for(i = 0; i +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam) +{ + fprintf(stderr, "GRU Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = GRU; + l.steps = steps; + l.inputs = inputs; + + l.uz = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uz) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uz->batch = batch; + + l.wz = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wz) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wz->batch = batch; + + l.ur = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ur) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ur->batch = batch; + + l.wr = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wr) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wr->batch = batch; + + + + l.uh = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uh) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uh->batch = batch; + + l.wh = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wh) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wh->batch = batch; + + l.batch_normalize = batch_normalize; + + + l.outputs = outputs; + l.output = calloc(outputs*batch*steps, sizeof(float)); + l.delta = calloc(outputs*batch*steps, sizeof(float)); + l.state = calloc(outputs*batch, sizeof(float)); + l.prev_state = calloc(outputs*batch, sizeof(float)); + l.forgot_state = calloc(outputs*batch, sizeof(float)); + l.forgot_delta = calloc(outputs*batch, sizeof(float)); + + l.r_cpu = calloc(outputs*batch, sizeof(float)); + l.z_cpu = calloc(outputs*batch, sizeof(float)); + l.h_cpu = calloc(outputs*batch, sizeof(float)); + + l.forward = forward_gru_layer; + l.backward = backward_gru_layer; + l.update = update_gru_layer; + +#ifdef GPU + l.forward_gpu = forward_gru_layer_gpu; + l.backward_gpu = backward_gru_layer_gpu; + l.update_gpu = update_gru_layer_gpu; + + l.forgot_state_gpu = cuda_make_array(0, batch*outputs); + l.forgot_delta_gpu = cuda_make_array(0, batch*outputs); + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.state_gpu = cuda_make_array(0, batch*outputs); + l.output_gpu = cuda_make_array(0, batch*outputs*steps); + l.delta_gpu = cuda_make_array(0, batch*outputs*steps); + l.r_gpu = cuda_make_array(0, batch*outputs); + l.z_gpu = cuda_make_array(0, batch*outputs); + l.h_gpu = cuda_make_array(0, batch*outputs); + +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.uz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uz->out_c, l.uz->out_h, l.uz->out_w); + cudnnSetTensor4dDescriptor(l.uh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uh->out_c, l.uh->out_h, l.uh->out_w); + cudnnSetTensor4dDescriptor(l.ur->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ur->out_c, l.ur->out_h, l.ur->out_w); + cudnnSetTensor4dDescriptor(l.wz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wz->out_c, l.wz->out_h, l.wz->out_w); + cudnnSetTensor4dDescriptor(l.wh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wh->out_c, l.wh->out_h, l.wh->out_w); + cudnnSetTensor4dDescriptor(l.wr->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wr->out_c, l.wr->out_h, l.wr->out_w); +#endif +#endif + + return l; +} + +void update_gru_layer(layer l, update_args a) +{ + update_connected_layer(*(l.ur), a); + update_connected_layer(*(l.uz), a); + update_connected_layer(*(l.uh), a); + update_connected_layer(*(l.wr), a); + update_connected_layer(*(l.wz), a); + update_connected_layer(*(l.wh), a); +} + +void forward_gru_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + fill_cpu(l.outputs * l.batch * l.steps, 0, uz.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ur.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, uh.delta, 1); + + fill_cpu(l.outputs * l.batch * l.steps, 0, wz.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wr.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wh.delta, 1); + if(net.train) { + fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); + copy_cpu(l.outputs*l.batch, l.state, 1, l.prev_state, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input = l.state; + forward_connected_layer(wz, s); + forward_connected_layer(wr, s); + + s.input = net.input; + forward_connected_layer(uz, s); + forward_connected_layer(ur, s); + forward_connected_layer(uh, s); + + + copy_cpu(l.outputs*l.batch, uz.output, 1, l.z_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wz.output, 1, l.z_cpu, 1); + + copy_cpu(l.outputs*l.batch, ur.output, 1, l.r_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wr.output, 1, l.r_cpu, 1); + + activate_array(l.z_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.r_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.state, 1, l.forgot_state, 1); + mul_cpu(l.outputs*l.batch, l.r_cpu, 1, l.forgot_state, 1); + + s.input = l.forgot_state; + forward_connected_layer(wh, s); + + copy_cpu(l.outputs*l.batch, uh.output, 1, l.h_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wh.output, 1, l.h_cpu, 1); + + if(l.tanh){ + activate_array(l.h_cpu, l.outputs*l.batch, TANH); + } else { + activate_array(l.h_cpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_sum_cpu(l.state, l.h_cpu, l.z_cpu, l.outputs*l.batch, l.output); + + copy_cpu(l.outputs*l.batch, l.output, 1, l.state, 1); + + net.input += l.inputs*l.batch; + l.output += l.outputs*l.batch; + increment_layer(&uz, 1); + increment_layer(&ur, 1); + increment_layer(&uh, 1); + + increment_layer(&wz, 1); + increment_layer(&wr, 1); + increment_layer(&wh, 1); + } +} + +void backward_gru_layer(layer l, network net) +{ +} + +#ifdef GPU + +void pull_gru_layer(layer l) +{ +} + +void push_gru_layer(layer l) +{ +} + +void update_gru_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.ur), a); + update_connected_layer_gpu(*(l.uz), a); + update_connected_layer_gpu(*(l.uh), a); + update_connected_layer_gpu(*(l.wr), a); + update_connected_layer_gpu(*(l.wz), a); + update_connected_layer_gpu(*(l.wh), a); +} + +void forward_gru_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + fill_gpu(l.outputs * l.batch * l.steps, 0, uz.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ur.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, uh.delta_gpu, 1); + + fill_gpu(l.outputs * l.batch * l.steps, 0, wz.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wr.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wh.delta_gpu, 1); + if(net.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.prev_state_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(wz, s); + forward_connected_layer_gpu(wr, s); + + s.input_gpu = net.input_gpu; + forward_connected_layer_gpu(uz, s); + forward_connected_layer_gpu(ur, s); + forward_connected_layer_gpu(uh, s); + + copy_gpu(l.outputs*l.batch, uz.output_gpu, 1, l.z_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wz.output_gpu, 1, l.z_gpu, 1); + + copy_gpu(l.outputs*l.batch, ur.output_gpu, 1, l.r_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wr.output_gpu, 1, l.r_gpu, 1); + + activate_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.forgot_state_gpu, 1); + mul_gpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1); + + s.input_gpu = l.forgot_state_gpu; + forward_connected_layer_gpu(wh, s); + + copy_gpu(l.outputs*l.batch, uh.output_gpu, 1, l.h_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wh.output_gpu, 1, l.h_gpu, 1); + + if(l.tanh){ + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + } else { + activate_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_sum_gpu(l.state_gpu, l.h_gpu, l.z_gpu, l.outputs*l.batch, l.output_gpu); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.state_gpu, 1); + + net.input_gpu += l.inputs*l.batch; + l.output_gpu += l.outputs*l.batch; + increment_layer(&uz, 1); + increment_layer(&ur, 1); + increment_layer(&uh, 1); + + increment_layer(&wz, 1); + increment_layer(&wr, 1); + increment_layer(&wh, 1); + } +} + +void backward_gru_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + increment_layer(&uz, l.steps - 1); + increment_layer(&ur, l.steps - 1); + increment_layer(&uh, l.steps - 1); + + increment_layer(&wz, l.steps - 1); + increment_layer(&wr, l.steps - 1); + increment_layer(&wh, l.steps - 1); + + net.input_gpu += l.inputs*l.batch*(l.steps-1); + if(net.delta_gpu) net.delta_gpu += l.inputs*l.batch*(l.steps-1); + l.output_gpu += l.outputs*l.batch*(l.steps-1); + l.delta_gpu += l.outputs*l.batch*(l.steps-1); + float *end_state = l.output_gpu; + for (i = l.steps-1; i >= 0; --i) { + if(i != 0) copy_gpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + else copy_gpu(l.outputs*l.batch, l.prev_state_gpu, 1, l.state_gpu, 1); + float *prev_delta_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch; + + copy_gpu(l.outputs*l.batch, uz.output_gpu, 1, l.z_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wz.output_gpu, 1, l.z_gpu, 1); + + copy_gpu(l.outputs*l.batch, ur.output_gpu, 1, l.r_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wr.output_gpu, 1, l.r_gpu, 1); + + activate_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, uh.output_gpu, 1, l.h_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wh.output_gpu, 1, l.h_gpu, 1); + + if(l.tanh){ + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + } else { + activate_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_delta_gpu(l.state_gpu, l.h_gpu, l.z_gpu, prev_delta_gpu, uh.delta_gpu, uz.delta_gpu, l.outputs*l.batch, l.delta_gpu); + + if(l.tanh){ + gradient_array_gpu(l.h_gpu, l.outputs*l.batch, TANH, uh.delta_gpu); + } else { + gradient_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC, uh.delta_gpu); + } + + copy_gpu(l.outputs*l.batch, uh.delta_gpu, 1, wh.delta_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.forgot_state_gpu, 1); + mul_gpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1); + fill_gpu(l.outputs*l.batch, 0, l.forgot_delta_gpu, 1); + + s.input_gpu = l.forgot_state_gpu; + s.delta_gpu = l.forgot_delta_gpu; + + backward_connected_layer_gpu(wh, s); + if(prev_delta_gpu) mult_add_into_gpu(l.outputs*l.batch, l.forgot_delta_gpu, l.r_gpu, prev_delta_gpu); + mult_add_into_gpu(l.outputs*l.batch, l.forgot_delta_gpu, l.state_gpu, ur.delta_gpu); + + gradient_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC, ur.delta_gpu); + copy_gpu(l.outputs*l.batch, ur.delta_gpu, 1, wr.delta_gpu, 1); + + gradient_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC, uz.delta_gpu); + copy_gpu(l.outputs*l.batch, uz.delta_gpu, 1, wz.delta_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = prev_delta_gpu; + + backward_connected_layer_gpu(wr, s); + backward_connected_layer_gpu(wz, s); + + s.input_gpu = net.input_gpu; + s.delta_gpu = net.delta_gpu; + + backward_connected_layer_gpu(uh, s); + backward_connected_layer_gpu(ur, s); + backward_connected_layer_gpu(uz, s); + + + net.input_gpu -= l.inputs*l.batch; + if(net.delta_gpu) net.delta_gpu -= l.inputs*l.batch; + l.output_gpu -= l.outputs*l.batch; + l.delta_gpu -= l.outputs*l.batch; + increment_layer(&uz, -1); + increment_layer(&ur, -1); + increment_layer(&uh, -1); + + increment_layer(&wz, -1); + increment_layer(&wr, -1); + increment_layer(&wh, -1); + } + copy_gpu(l.outputs*l.batch, end_state, 1, l.state_gpu, 1); +} +#endif diff --git a/workloads/realworld/pinned/darknet/src/gru_layer.h b/workloads/realworld/pinned/darknet/src/gru_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9067942e9499d53c8d54f7728d64a5030200f4de --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/gru_layer.h @@ -0,0 +1,24 @@ + +#ifndef GRU_LAYER_H +#define GRU_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam); + +void forward_gru_layer(layer l, network state); +void backward_gru_layer(layer l, network state); +void update_gru_layer(layer l, update_args a); + +#ifdef GPU +void forward_gru_layer_gpu(layer l, network state); +void backward_gru_layer_gpu(layer l, network state); +void update_gru_layer_gpu(layer l, update_args a); +void push_gru_layer(layer l); +void pull_gru_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/pinned/darknet/src/im2col.c b/workloads/realworld/pinned/darknet/src/im2col.c new file mode 100644 index 0000000000000000000000000000000000000000..69ec98a9d12b2e21a3859611ad709d62fc80dcf3 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/im2col.c @@ -0,0 +1,40 @@ +#include "im2col.h" +#include +float im2col_get_pixel(float *im, int height, int width, int channels, + int row, int col, int channel, int pad) +{ + row -= pad; + col -= pad; + + if (row < 0 || col < 0 || + row >= height || col >= width) return 0; + return im[col + width*(row + height*channel)]; +} + +//From Berkeley Vision's Caffe! +//https://github.com/BVLC/caffe/blob/master/LICENSE +void im2col_cpu(float* data_im, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_col) +{ + int c,h,w; + int height_col = (height + 2*pad - ksize) / stride + 1; + int width_col = (width + 2*pad - ksize) / stride + 1; + + int channels_col = channels * ksize * ksize; + for (c = 0; c < channels_col; ++c) { + int w_offset = c % ksize; + int h_offset = (c / ksize) % ksize; + int c_im = c / ksize / ksize; + for (h = 0; h < height_col; ++h) { + for (w = 0; w < width_col; ++w) { + int im_row = h_offset + h * stride; + int im_col = w_offset + w * stride; + int col_index = (c * height_col + h) * width_col + w; + data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, + im_row, im_col, c_im, pad); + } + } + } +} + diff --git a/workloads/realworld/pinned/darknet/src/im2col.h b/workloads/realworld/pinned/darknet/src/im2col.h new file mode 100644 index 0000000000000000000000000000000000000000..02c4247fad9b8428a8e89fc8caec0b5b6ba5b36a --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/im2col.h @@ -0,0 +1,15 @@ +#ifndef IM2COL_H +#define IM2COL_H + +void im2col_cpu(float* data_im, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_col); + +#ifdef GPU + +void im2col_gpu(float *im, + int channels, int height, int width, + int ksize, int stride, int pad,float *data_col); + +#endif +#endif diff --git a/workloads/realworld/pinned/darknet/src/im2col_kernels.cu b/workloads/realworld/pinned/darknet/src/im2col_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..decbc1202a9ebd3916312527cc0cdad3fef9b264 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/im2col_kernels.cu @@ -0,0 +1,61 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "im2col.h" +#include "cuda_dark.h" +} + +// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu +// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE + +__global__ void im2col_gpu_kernel(const int n, const float* data_im, + const int height, const int width, const int ksize, + const int pad, + const int stride, + const int height_col, const int width_col, + float *data_col) { + int index = blockIdx.x*blockDim.x+threadIdx.x; + for(; index < n; index += blockDim.x*gridDim.x){ + int w_out = index % width_col; + int h_index = index / width_col; + int h_out = h_index % height_col; + int channel_in = h_index / height_col; + int channel_out = channel_in * ksize * ksize; + int h_in = h_out * stride - pad; + int w_in = w_out * stride - pad; + float* data_col_ptr = data_col; + data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out; + const float* data_im_ptr = data_im; + data_im_ptr += (channel_in * height + h_in) * width + w_in; + for (int i = 0; i < ksize; ++i) { + for (int j = 0; j < ksize; ++j) { + int h = h_in + i; + int w = w_in + j; + + *data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ? + data_im_ptr[i * width + j] : 0; + + //*data_col_ptr = data_im_ptr[ii * width + jj]; + + data_col_ptr += height_col * width_col; + } + } + } +} + +void im2col_gpu(float *im, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_col){ + // We are going to launch channels * height_col * width_col kernels, each + // kernel responsible for copying a single-channel grid. + int height_col = (height + 2 * pad - ksize) / stride + 1; + int width_col = (width + 2 * pad - ksize) / stride + 1; + int num_kernels = channels * height_col * width_col; + im2col_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, + BLOCK>>>( + num_kernels, im, height, width, ksize, pad, + stride, height_col, + width_col, data_col); +} diff --git a/workloads/realworld/pinned/darknet/src/image.c b/workloads/realworld/pinned/darknet/src/image.c new file mode 100644 index 0000000000000000000000000000000000000000..3edf6d1045f4637d7bb108440302fd36d5ef9a18 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/image.c @@ -0,0 +1,1467 @@ +#include "image.h" +#include "utils.h" +#include "blas.h" +#include "cuda_dark.h" +#include +#include + +#define STB_IMAGE_IMPLEMENTATION +#include "stb_image.h" +#define STB_IMAGE_WRITE_IMPLEMENTATION +#include "stb_image_write.h" + +int windows = 0; + +float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} }; + +float get_color(int c, int x, int max) +{ + float ratio = ((float)x/max)*5; + int i = floor(ratio); + int j = ceil(ratio); + ratio -= i; + float r = (1-ratio) * colors[i][c] + ratio*colors[j][c]; + //printf("%f\n", r); + return r; +} + +image mask_to_rgb(image mask) +{ + int n = mask.c; + image im = make_image(mask.w, mask.h, 3); + int i, j; + for(j = 0; j < n; ++j){ + int offset = j*123457 % n; + float red = get_color(2,offset,n); + float green = get_color(1,offset,n); + float blue = get_color(0,offset,n); + for(i = 0; i < im.w*im.h; ++i){ + im.data[i + 0*im.w*im.h] += mask.data[j*im.h*im.w + i]*red; + im.data[i + 1*im.w*im.h] += mask.data[j*im.h*im.w + i]*green; + im.data[i + 2*im.w*im.h] += mask.data[j*im.h*im.w + i]*blue; + } + } + return im; +} + +static float get_pixel(image m, int x, int y, int c) +{ + assert(x < m.w && y < m.h && c < m.c); + return m.data[c*m.h*m.w + y*m.w + x]; +} +static float get_pixel_extend(image m, int x, int y, int c) +{ + if(x < 0 || x >= m.w || y < 0 || y >= m.h) return 0; + /* + if(x < 0) x = 0; + if(x >= m.w) x = m.w-1; + if(y < 0) y = 0; + if(y >= m.h) y = m.h-1; + */ + if(c < 0 || c >= m.c) return 0; + return get_pixel(m, x, y, c); +} +static void set_pixel(image m, int x, int y, int c, float val) +{ + if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return; + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] = val; +} +static void add_pixel(image m, int x, int y, int c, float val) +{ + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] += val; +} + +static float bilinear_interpolate(image im, float x, float y, int c) +{ + int ix = (int) floorf(x); + int iy = (int) floorf(y); + + float dx = x - ix; + float dy = y - iy; + + float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) + + dy * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + + (1-dy) * dx * get_pixel_extend(im, ix+1, iy, c) + + dy * dx * get_pixel_extend(im, ix+1, iy+1, c); + return val; +} + + +void composite_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float val = get_pixel(source, x, y, k); + float val2 = get_pixel_extend(dest, dx+x, dy+y, k); + set_pixel(dest, dx+x, dy+y, k, val * val2); + } + } + } +} + +image border_image(image a, int border) +{ + image b = make_image(a.w + 2*border, a.h + 2*border, a.c); + int x,y,k; + for(k = 0; k < b.c; ++k){ + for(y = 0; y < b.h; ++y){ + for(x = 0; x < b.w; ++x){ + float val = get_pixel_extend(a, x - border, y - border, k); + if(x - border < 0 || x - border >= a.w || y - border < 0 || y - border >= a.h) val = 1; + set_pixel(b, x, y, k, val); + } + } + } + return b; +} + +image tile_images(image a, image b, int dx) +{ + if(a.w == 0) return copy_image(b); + image c = make_image(a.w + b.w + dx, (a.h > b.h) ? a.h : b.h, (a.c > b.c) ? a.c : b.c); + fill_cpu(c.w*c.h*c.c, 1, c.data, 1); + embed_image(a, c, 0, 0); + composite_image(b, c, a.w + dx, 0); + return c; +} + +image get_label(image **characters, char *string, int size) +{ + size = size/10; + if(size > 7) size = 7; + image label = make_empty_image(0,0,0); + while(*string){ + image l = characters[size][(int)*string]; + image n = tile_images(label, l, -size - 1 + (size+1)/2); + free_image(label); + label = n; + ++string; + } + image b = border_image(label, label.h*.25); + free_image(label); + return b; +} + +void draw_label(image a, int r, int c, image label, const float *rgb) +{ + int w = label.w; + int h = label.h; + if (r - h >= 0) r = r - h; + + int i, j, k; + for(j = 0; j < h && j + r < a.h; ++j){ + for(i = 0; i < w && i + c < a.w; ++i){ + for(k = 0; k < label.c; ++k){ + float val = get_pixel(label, i, j, k); + set_pixel(a, i+c, j+r, k, rgb[k] * val); + } + } + } +} + +void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b) +{ + //normalize_image(a); + int i; + if(x1 < 0) x1 = 0; + if(x1 >= a.w) x1 = a.w-1; + if(x2 < 0) x2 = 0; + if(x2 >= a.w) x2 = a.w-1; + + if(y1 < 0) y1 = 0; + if(y1 >= a.h) y1 = a.h-1; + if(y2 < 0) y2 = 0; + if(y2 >= a.h) y2 = a.h-1; + + for(i = x1; i <= x2; ++i){ + a.data[i + y1*a.w + 0*a.w*a.h] = r; + a.data[i + y2*a.w + 0*a.w*a.h] = r; + + a.data[i + y1*a.w + 1*a.w*a.h] = g; + a.data[i + y2*a.w + 1*a.w*a.h] = g; + + a.data[i + y1*a.w + 2*a.w*a.h] = b; + a.data[i + y2*a.w + 2*a.w*a.h] = b; + } + for(i = y1; i <= y2; ++i){ + a.data[x1 + i*a.w + 0*a.w*a.h] = r; + a.data[x2 + i*a.w + 0*a.w*a.h] = r; + + a.data[x1 + i*a.w + 1*a.w*a.h] = g; + a.data[x2 + i*a.w + 1*a.w*a.h] = g; + + a.data[x1 + i*a.w + 2*a.w*a.h] = b; + a.data[x2 + i*a.w + 2*a.w*a.h] = b; + } +} + +void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b) +{ + int i; + for(i = 0; i < w; ++i){ + draw_box(a, x1+i, y1+i, x2-i, y2-i, r, g, b); + } +} + +void draw_bbox(image a, box bbox, int w, float r, float g, float b) +{ + int left = (bbox.x-bbox.w/2)*a.w; + int right = (bbox.x+bbox.w/2)*a.w; + int top = (bbox.y-bbox.h/2)*a.h; + int bot = (bbox.y+bbox.h/2)*a.h; + + int i; + for(i = 0; i < w; ++i){ + draw_box(a, left+i, top+i, right-i, bot-i, r, g, b); + } +} + +image **load_alphabet() +{ + char *value = getenv("UVMAsyncBench_BASE"); + int i, j; + const int nsize = 8; + image **alphabets = calloc(nsize, sizeof(image)); + for(j = 0; j < nsize; ++j){ + alphabets[j] = calloc(128, sizeof(image)); + for(i = 32; i < 127; ++i){ + char buff[256]; + sprintf(buff, "%s/workloads/realworld/standard/darknet/data/labels/%d_%d.png", value, i, j); + alphabets[j][i] = load_image_color(buff, 0, 0); + } + } + return alphabets; +} + +void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes) +{ + int i,j; + + for(i = 0; i < num; ++i){ + char labelstr[4096] = {0}; + int class = -1; + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j] > thresh){ + if (class < 0) { + strcat(labelstr, names[j]); + class = j; + } else { + strcat(labelstr, ", "); + strcat(labelstr, names[j]); + } + printf("%s: %.0f%%\n", names[j], dets[i].prob[j]*100); + } + } + if(class >= 0){ + int width = im.h * .006; + + /* + if(0){ + width = pow(prob, 1./2.)*10+1; + alphabet = 0; + } + */ + + //printf("%d %s: %.0f%%\n", i, names[class], prob*100); + int offset = class*123457 % classes; + float red = get_color(2,offset,classes); + float green = get_color(1,offset,classes); + float blue = get_color(0,offset,classes); + float rgb[3]; + + //width = prob*20+2; + + rgb[0] = red; + rgb[1] = green; + rgb[2] = blue; + box b = dets[i].bbox; + //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + + int left = (b.x-b.w/2.)*im.w; + int right = (b.x+b.w/2.)*im.w; + int top = (b.y-b.h/2.)*im.h; + int bot = (b.y+b.h/2.)*im.h; + + if(left < 0) left = 0; + if(right > im.w-1) right = im.w-1; + if(top < 0) top = 0; + if(bot > im.h-1) bot = im.h-1; + + draw_box_width(im, left, top, right, bot, width, red, green, blue); + if (alphabet) { + image label = get_label(alphabet, labelstr, (im.h*.03)); + draw_label(im, top + width, left, label, rgb); + free_image(label); + } + if (dets[i].mask){ + image mask = float_to_image(14, 14, 1, dets[i].mask); + image resized_mask = resize_image(mask, b.w*im.w, b.h*im.h); + image tmask = threshold_image(resized_mask, .5); + embed_image(tmask, im, left, top); + free_image(mask); + free_image(resized_mask); + free_image(tmask); + } + } + } +} + +void transpose_image(image im) +{ + assert(im.w == im.h); + int n, m; + int c; + for(c = 0; c < im.c; ++c){ + for(n = 0; n < im.w-1; ++n){ + for(m = n + 1; m < im.w; ++m){ + float swap = im.data[m + im.w*(n + im.h*c)]; + im.data[m + im.w*(n + im.h*c)] = im.data[n + im.w*(m + im.h*c)]; + im.data[n + im.w*(m + im.h*c)] = swap; + } + } + } +} + +void rotate_image_cw(image im, int times) +{ + assert(im.w == im.h); + times = (times + 400) % 4; + int i, x, y, c; + int n = im.w; + for(i = 0; i < times; ++i){ + for(c = 0; c < im.c; ++c){ + for(x = 0; x < n/2; ++x){ + for(y = 0; y < (n-1)/2 + 1; ++y){ + float temp = im.data[y + im.w*(x + im.h*c)]; + im.data[y + im.w*(x + im.h*c)] = im.data[n-1-x + im.w*(y + im.h*c)]; + im.data[n-1-x + im.w*(y + im.h*c)] = im.data[n-1-y + im.w*(n-1-x + im.h*c)]; + im.data[n-1-y + im.w*(n-1-x + im.h*c)] = im.data[x + im.w*(n-1-y + im.h*c)]; + im.data[x + im.w*(n-1-y + im.h*c)] = temp; + } + } + } + } +} + +void flip_image(image a) +{ + int i,j,k; + for(k = 0; k < a.c; ++k){ + for(i = 0; i < a.h; ++i){ + for(j = 0; j < a.w/2; ++j){ + int index = j + a.w*(i + a.h*(k)); + int flip = (a.w - j - 1) + a.w*(i + a.h*(k)); + float swap = a.data[flip]; + a.data[flip] = a.data[index]; + a.data[index] = swap; + } + } + } +} + +image image_distance(image a, image b) +{ + int i,j; + image dist = make_image(a.w, a.h, 1); + for(i = 0; i < a.c; ++i){ + for(j = 0; j < a.h*a.w; ++j){ + dist.data[j] += pow(a.data[i*a.h*a.w+j]-b.data[i*a.h*a.w+j],2); + } + } + for(j = 0; j < a.h*a.w; ++j){ + dist.data[j] = sqrt(dist.data[j]); + } + return dist; +} + +void ghost_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + float max_dist = sqrt((-source.w/2. + .5)*(-source.w/2. + .5)); + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float dist = sqrt((x - source.w/2. + .5)*(x - source.w/2. + .5) + (y - source.h/2. + .5)*(y - source.h/2. + .5)); + float alpha = (1 - dist/max_dist); + if(alpha < 0) alpha = 0; + float v1 = get_pixel(source, x,y,k); + float v2 = get_pixel(dest, dx+x,dy+y,k); + float val = alpha*v1 + (1-alpha)*v2; + set_pixel(dest, dx+x, dy+y, k, val); + } + } + } +} + +void blocky_image(image im, int s) +{ + int i,j,k; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)]; + } + } + } +} + +void censor_image(image im, int dx, int dy, int w, int h) +{ + int i,j,k; + int s = 32; + if(dx < 0) dx = 0; + if(dy < 0) dy = 0; + + for(k = 0; k < im.c; ++k){ + for(j = dy; j < dy + h && j < im.h; ++j){ + for(i = dx; i < dx + w && i < im.w; ++i){ + im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)]; + //im.data[i + j*im.w + k*im.w*im.h] = 0; + } + } + } +} + +void embed_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float val = get_pixel(source, x,y,k); + set_pixel(dest, dx+x, dy+y, k, val); + } + } + } +} + +image collapse_image_layers(image source, int border) +{ + int h = source.h; + h = (h+border)*source.c - border; + image dest = make_image(source.w, h, 1); + int i; + for(i = 0; i < source.c; ++i){ + image layer = get_image_layer(source, i); + int h_offset = i*(source.h+border); + embed_image(layer, dest, 0, h_offset); + free_image(layer); + } + return dest; +} + +void constrain_image(image im) +{ + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + if(im.data[i] < 0) im.data[i] = 0; + if(im.data[i] > 1) im.data[i] = 1; + } +} + +void normalize_image(image p) +{ + int i; + float min = 9999999; + float max = -999999; + + for(i = 0; i < p.h*p.w*p.c; ++i){ + float v = p.data[i]; + if(v < min) min = v; + if(v > max) max = v; + } + if(max - min < .000000001){ + min = 0; + max = 1; + } + for(i = 0; i < p.c*p.w*p.h; ++i){ + p.data[i] = (p.data[i] - min)/(max-min); + } +} + +void normalize_image2(image p) +{ + float *min = calloc(p.c, sizeof(float)); + float *max = calloc(p.c, sizeof(float)); + int i,j; + for(i = 0; i < p.c; ++i) min[i] = max[i] = p.data[i*p.h*p.w]; + + for(j = 0; j < p.c; ++j){ + for(i = 0; i < p.h*p.w; ++i){ + float v = p.data[i+j*p.h*p.w]; + if(v < min[j]) min[j] = v; + if(v > max[j]) max[j] = v; + } + } + for(i = 0; i < p.c; ++i){ + if(max[i] - min[i] < .000000001){ + min[i] = 0; + max[i] = 1; + } + } + for(j = 0; j < p.c; ++j){ + for(i = 0; i < p.w*p.h; ++i){ + p.data[i+j*p.h*p.w] = (p.data[i+j*p.h*p.w] - min[j])/(max[j]-min[j]); + } + } + free(min); + free(max); +} + +void copy_image_into(image src, image dest) +{ + memcpy(dest.data, src.data, src.h*src.w*src.c*sizeof(float)); +} + +image copy_image(image p) +{ + image copy = p; + copy.data = calloc(p.h*p.w*p.c, sizeof(float)); + memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float)); + return copy; +} + +void rgbgr_image(image im) +{ + int i; + for(i = 0; i < im.w*im.h; ++i){ + float swap = im.data[i]; + im.data[i] = im.data[i+im.w*im.h*2]; + im.data[i+im.w*im.h*2] = swap; + } +} + +int show_image(image p, const char *name, int ms) +{ +#ifdef OPENCV + int c = show_image_cv(p, name, ms); + return c; +#else + fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name); + save_image(p, name); + return -1; +#endif +} + +void save_image_options(image im, const char *name, IMTYPE f, int quality) +{ + char buff[256]; + //sprintf(buff, "%s (%d)", name, windows); + if(f == PNG) sprintf(buff, "%s.png", name); + else if (f == BMP) sprintf(buff, "%s.bmp", name); + else if (f == TGA) sprintf(buff, "%s.tga", name); + else if (f == JPG) sprintf(buff, "%s.jpg", name); + else sprintf(buff, "%s.png", name); + unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char)); + int i,k; + for(k = 0; k < im.c; ++k){ + for(i = 0; i < im.w*im.h; ++i){ + data[i*im.c+k] = (unsigned char) (255*im.data[i + k*im.w*im.h]); + } + } + int success = 0; + if(f == PNG) success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c); + else if (f == BMP) success = stbi_write_bmp(buff, im.w, im.h, im.c, data); + else if (f == TGA) success = stbi_write_tga(buff, im.w, im.h, im.c, data); + else if (f == JPG) success = stbi_write_jpg(buff, im.w, im.h, im.c, data, quality); + free(data); + if(!success) fprintf(stderr, "Failed to write image %s\n", buff); +} + +void save_image(image im, const char *name) +{ + save_image_options(im, name, JPG, 80); +} + +void show_image_layers(image p, char *name) +{ + int i; + char buff[256]; + for(i = 0; i < p.c; ++i){ + sprintf(buff, "%s - Layer %d", name, i); + image layer = get_image_layer(p, i); + show_image(layer, buff, 1); + free_image(layer); + } +} + +void show_image_collapsed(image p, char *name) +{ + image c = collapse_image_layers(p, 1); + show_image(c, name, 1); + free_image(c); +} + +image make_empty_image(int w, int h, int c) +{ + image out; + out.data = 0; + out.h = h; + out.w = w; + out.c = c; + return out; +} + +image make_image(int w, int h, int c) +{ + image out = make_empty_image(w,h,c); + out.data = calloc(h*w*c, sizeof(float)); + return out; +} + +image make_random_image(int w, int h, int c) +{ + image out = make_empty_image(w,h,c); + out.data = calloc(h*w*c, sizeof(float)); + int i; + for(i = 0; i < w*h*c; ++i){ + out.data[i] = (rand_normal() * .25) + .5; + } + return out; +} + +image float_to_image(int w, int h, int c, float *data) +{ + image out = make_empty_image(w,h,c); + out.data = data; + return out; +} + +void place_image(image im, int w, int h, int dx, int dy, image canvas) +{ + int x, y, c; + for(c = 0; c < im.c; ++c){ + for(y = 0; y < h; ++y){ + for(x = 0; x < w; ++x){ + float rx = ((float)x / w) * im.w; + float ry = ((float)y / h) * im.h; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(canvas, x + dx, y + dy, c, val); + } + } + } +} + +image center_crop_image(image im, int w, int h) +{ + int m = (im.w < im.h) ? im.w : im.h; + image c = crop_image(im, (im.w - m) / 2, (im.h - m)/2, m, m); + image r = resize_image(c, w, h); + free_image(c); + return r; +} + +image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect) +{ + int x, y, c; + float cx = im.w/2.; + float cy = im.h/2.; + image rot = make_image(w, h, im.c); + for(c = 0; c < im.c; ++c){ + for(y = 0; y < h; ++y){ + for(x = 0; x < w; ++x){ + float rx = cos(rad)*((x - w/2.)/s*aspect + dx/s*aspect) - sin(rad)*((y - h/2.)/s + dy/s) + cx; + float ry = sin(rad)*((x - w/2.)/s*aspect + dx/s*aspect) + cos(rad)*((y - h/2.)/s + dy/s) + cy; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(rot, x, y, c, val); + } + } + } + return rot; +} + +image rotate_image(image im, float rad) +{ + int x, y, c; + float cx = im.w/2.; + float cy = im.h/2.; + image rot = make_image(im.w, im.h, im.c); + for(c = 0; c < im.c; ++c){ + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx; + float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(rot, x, y, c, val); + } + } + } + return rot; +} + +void fill_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] = s; +} + +void translate_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s; +} + +void scale_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s; +} + +image crop_image(image im, int dx, int dy, int w, int h) +{ + image cropped = make_image(w, h, im.c); + int i, j, k; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int r = j + dy; + int c = i + dx; + float val = 0; + r = constrain_int(r, 0, im.h-1); + c = constrain_int(c, 0, im.w-1); + val = get_pixel(im, c, r, k); + set_pixel(cropped, i, j, k, val); + } + } + } + return cropped; +} + +int best_3d_shift_r(image a, image b, int min, int max) +{ + if(min == max) return min; + int mid = floor((min + max) / 2.); + image c1 = crop_image(b, 0, mid, b.w, b.h); + image c2 = crop_image(b, 0, mid+1, b.w, b.h); + float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 10); + float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 10); + free_image(c1); + free_image(c2); + if(d1 < d2) return best_3d_shift_r(a, b, min, mid); + else return best_3d_shift_r(a, b, mid+1, max); +} + +int best_3d_shift(image a, image b, int min, int max) +{ + int i; + int best = 0; + float best_distance = FLT_MAX; + for(i = min; i <= max; i += 2){ + image c = crop_image(b, 0, i, b.w, b.h); + float d = dist_array(c.data, a.data, a.w*a.h*a.c, 100); + if(d < best_distance){ + best_distance = d; + best = i; + } + printf("%d %f\n", i, d); + free_image(c); + } + return best; +} + +void composite_3d(char *f1, char *f2, char *out, int delta) +{ + if(!out) out = "out"; + image a = load_image(f1, 0,0,0); + image b = load_image(f2, 0,0,0); + int shift = best_3d_shift_r(a, b, -a.h/100, a.h/100); + + image c1 = crop_image(b, 10, shift, b.w, b.h); + float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 100); + image c2 = crop_image(b, -10, shift, b.w, b.h); + float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 100); + + if(d2 < d1 && 0){ + image swap = a; + a = b; + b = swap; + shift = -shift; + printf("swapped, %d\n", shift); + } + else{ + printf("%d\n", shift); + } + + image c = crop_image(b, delta, shift, a.w, a.h); + int i; + for(i = 0; i < c.w*c.h; ++i){ + c.data[i] = a.data[i]; + } + save_image(c, out); +} + +void letterbox_image_into(image im, int w, int h, image boxed) +{ + int new_w = im.w; + int new_h = im.h; + if (((float)w/im.w) < ((float)h/im.h)) { + new_w = w; + new_h = (im.h * w)/im.w; + } else { + new_h = h; + new_w = (im.w * h)/im.h; + } + image resized = resize_image(im, new_w, new_h); + embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); + free_image(resized); +} + +image letterbox_image(image im, int w, int h) +{ + int new_w = im.w; + int new_h = im.h; + if (((float)w/im.w) < ((float)h/im.h)) { + new_w = w; + new_h = (im.h * w)/im.w; + } else { + new_h = h; + new_w = (im.w * h)/im.h; + } + image resized = resize_image(im, new_w, new_h); + image boxed = make_image(w, h, im.c); + fill_image(boxed, .5); + //int i; + //for(i = 0; i < boxed.w*boxed.h*boxed.c; ++i) boxed.data[i] = 0; + embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); + free_image(resized); + return boxed; +} + +image resize_max(image im, int max) +{ + int w = im.w; + int h = im.h; + if(w > h){ + h = (h * max) / w; + w = max; + } else { + w = (w * max) / h; + h = max; + } + if(w == im.w && h == im.h) return im; + image resized = resize_image(im, w, h); + return resized; +} + +image resize_min(image im, int min) +{ + int w = im.w; + int h = im.h; + if(w < h){ + h = (h * min) / w; + w = min; + } else { + w = (w * min) / h; + h = min; + } + if(w == im.w && h == im.h) return im; + image resized = resize_image(im, w, h); + return resized; +} + +image random_crop_image(image im, int w, int h) +{ + int dx = rand_int(0, im.w - w); + int dy = rand_int(0, im.h - h); + image crop = crop_image(im, dx, dy, w, h); + return crop; +} + +augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h) +{ + augment_args a = {0}; + aspect = rand_scale(aspect); + int r = rand_int(low, high); + int min = (im.h < im.w*aspect) ? im.h : im.w*aspect; + float scale = (float)r / min; + + float rad = rand_uniform(-angle, angle) * TWO_PI / 360.; + + float dx = (im.w*scale/aspect - w) / 2.; + float dy = (im.h*scale - w) / 2.; + //if(dx < 0) dx = 0; + //if(dy < 0) dy = 0; + dx = rand_uniform(-dx, dx); + dy = rand_uniform(-dy, dy); + + a.rad = rad; + a.scale = scale; + a.w = w; + a.h = h; + a.dx = dx; + a.dy = dy; + a.aspect = aspect; + return a; +} + +image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h) +{ + augment_args a = random_augment_args(im, angle, aspect, low, high, w, h); + image crop = rotate_crop_image(im, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + return crop; +} + +float three_way_max(float a, float b, float c) +{ + return (a > b) ? ( (a > c) ? a : c) : ( (b > c) ? b : c) ; +} + +float three_way_min(float a, float b, float c) +{ + return (a < b) ? ( (a < c) ? a : c) : ( (b < c) ? b : c) ; +} + +void yuv_to_rgb(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float y, u, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + y = get_pixel(im, i , j, 0); + u = get_pixel(im, i , j, 1); + v = get_pixel(im, i , j, 2); + + r = y + 1.13983*v; + g = y + -.39465*u + -.58060*v; + b = y + 2.03211*u; + + set_pixel(im, i, j, 0, r); + set_pixel(im, i, j, 1, g); + set_pixel(im, i, j, 2, b); + } + } +} + +void rgb_to_yuv(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float y, u, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + r = get_pixel(im, i , j, 0); + g = get_pixel(im, i , j, 1); + b = get_pixel(im, i , j, 2); + + y = .299*r + .587*g + .114*b; + u = -.14713*r + -.28886*g + .436*b; + v = .615*r + -.51499*g + -.10001*b; + + set_pixel(im, i, j, 0, y); + set_pixel(im, i, j, 1, u); + set_pixel(im, i, j, 2, v); + } + } +} + +// http://www.cs.rit.edu/~ncs/color/t_convert.html +void rgb_to_hsv(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float h, s, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + r = get_pixel(im, i , j, 0); + g = get_pixel(im, i , j, 1); + b = get_pixel(im, i , j, 2); + float max = three_way_max(r,g,b); + float min = three_way_min(r,g,b); + float delta = max - min; + v = max; + if(max == 0){ + s = 0; + h = 0; + }else{ + s = delta/max; + if(r == max){ + h = (g - b) / delta; + } else if (g == max) { + h = 2 + (b - r) / delta; + } else { + h = 4 + (r - g) / delta; + } + if (h < 0) h += 6; + h = h/6.; + } + set_pixel(im, i, j, 0, h); + set_pixel(im, i, j, 1, s); + set_pixel(im, i, j, 2, v); + } + } +} + +void hsv_to_rgb(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float h, s, v; + float f, p, q, t; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + h = 6 * get_pixel(im, i , j, 0); + s = get_pixel(im, i , j, 1); + v = get_pixel(im, i , j, 2); + if (s == 0) { + r = g = b = v; + } else { + int index = floor(h); + f = h - index; + p = v*(1-s); + q = v*(1-s*f); + t = v*(1-s*(1-f)); + if(index == 0){ + r = v; g = t; b = p; + } else if(index == 1){ + r = q; g = v; b = p; + } else if(index == 2){ + r = p; g = v; b = t; + } else if(index == 3){ + r = p; g = q; b = v; + } else if(index == 4){ + r = t; g = p; b = v; + } else { + r = v; g = p; b = q; + } + } + set_pixel(im, i, j, 0, r); + set_pixel(im, i, j, 1, g); + set_pixel(im, i, j, 2, b); + } + } +} + +void grayscale_image_3c(image im) +{ + assert(im.c == 3); + int i, j, k; + float scale[] = {0.299, 0.587, 0.114}; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float val = 0; + for(k = 0; k < 3; ++k){ + val += scale[k]*get_pixel(im, i, j, k); + } + im.data[0*im.h*im.w + im.w*j + i] = val; + im.data[1*im.h*im.w + im.w*j + i] = val; + im.data[2*im.h*im.w + im.w*j + i] = val; + } + } +} + +image grayscale_image(image im) +{ + assert(im.c == 3); + int i, j, k; + image gray = make_image(im.w, im.h, 1); + float scale[] = {0.299, 0.587, 0.114}; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + gray.data[i+im.w*j] += scale[k]*get_pixel(im, i, j, k); + } + } + } + return gray; +} + +image threshold_image(image im, float thresh) +{ + int i; + image t = make_image(im.w, im.h, im.c); + for(i = 0; i < im.w*im.h*im.c; ++i){ + t.data[i] = im.data[i]>thresh ? 1 : 0; + } + return t; +} + +image blend_image(image fore, image back, float alpha) +{ + assert(fore.w == back.w && fore.h == back.h && fore.c == back.c); + image blend = make_image(fore.w, fore.h, fore.c); + int i, j, k; + for(k = 0; k < fore.c; ++k){ + for(j = 0; j < fore.h; ++j){ + for(i = 0; i < fore.w; ++i){ + float val = alpha * get_pixel(fore, i, j, k) + + (1 - alpha)* get_pixel(back, i, j, k); + set_pixel(blend, i, j, k, val); + } + } + } + return blend; +} + +void scale_image_channel(image im, int c, float v) +{ + int i, j; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float pix = get_pixel(im, i, j, c); + pix = pix*v; + set_pixel(im, i, j, c, pix); + } + } +} + +void translate_image_channel(image im, int c, float v) +{ + int i, j; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float pix = get_pixel(im, i, j, c); + pix = pix+v; + set_pixel(im, i, j, c, pix); + } + } +} + +image binarize_image(image im) +{ + image c = copy_image(im); + int i; + for(i = 0; i < im.w * im.h * im.c; ++i){ + if(c.data[i] > .5) c.data[i] = 1; + else c.data[i] = 0; + } + return c; +} + +void saturate_image(image im, float sat) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + hsv_to_rgb(im); + constrain_image(im); +} + +void hue_image(image im, float hue) +{ + rgb_to_hsv(im); + int i; + for(i = 0; i < im.w*im.h; ++i){ + im.data[i] = im.data[i] + hue; + if (im.data[i] > 1) im.data[i] -= 1; + if (im.data[i] < 0) im.data[i] += 1; + } + hsv_to_rgb(im); + constrain_image(im); +} + +void exposure_image(image im, float sat) +{ + rgb_to_hsv(im); + scale_image_channel(im, 2, sat); + hsv_to_rgb(im); + constrain_image(im); +} + +void distort_image(image im, float hue, float sat, float val) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + scale_image_channel(im, 2, val); + int i; + for(i = 0; i < im.w*im.h; ++i){ + im.data[i] = im.data[i] + hue; + if (im.data[i] > 1) im.data[i] -= 1; + if (im.data[i] < 0) im.data[i] += 1; + } + hsv_to_rgb(im); + constrain_image(im); +} + +void random_distort_image(image im, float hue, float saturation, float exposure) +{ + float dhue = rand_uniform(-hue, hue); + float dsat = rand_scale(saturation); + float dexp = rand_scale(exposure); + distort_image(im, dhue, dsat, dexp); +} + +void saturate_exposure_image(image im, float sat, float exposure) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + scale_image_channel(im, 2, exposure); + hsv_to_rgb(im); + constrain_image(im); +} + +image resize_image(image im, int w, int h) +{ + image resized = make_image(w, h, im.c); + image part = make_image(w, im.h, im.c); + int r, c, k; + float w_scale = (float)(im.w - 1) / (w - 1); + float h_scale = (float)(im.h - 1) / (h - 1); + for(k = 0; k < im.c; ++k){ + for(r = 0; r < im.h; ++r){ + for(c = 0; c < w; ++c){ + float val = 0; + if(c == w-1 || im.w == 1){ + val = get_pixel(im, im.w-1, r, k); + } else { + float sx = c*w_scale; + int ix = (int) sx; + float dx = sx - ix; + val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k); + } + set_pixel(part, c, r, k, val); + } + } + } + for(k = 0; k < im.c; ++k){ + for(r = 0; r < h; ++r){ + float sy = r*h_scale; + int iy = (int) sy; + float dy = sy - iy; + for(c = 0; c < w; ++c){ + float val = (1-dy) * get_pixel(part, c, iy, k); + set_pixel(resized, c, r, k, val); + } + if(r == h-1 || im.h == 1) continue; + for(c = 0; c < w; ++c){ + float val = dy * get_pixel(part, c, iy+1, k); + add_pixel(resized, c, r, k, val); + } + } + } + + free_image(part); + return resized; +} + + +void test_resize(char *filename) +{ + image im = load_image(filename, 0,0, 3); + float mag = mag_array(im.data, im.w*im.h*im.c); + printf("L2 Norm: %f\n", mag); + image gray = grayscale_image(im); + + image c1 = copy_image(im); + image c2 = copy_image(im); + image c3 = copy_image(im); + image c4 = copy_image(im); + distort_image(c1, .1, 1.5, 1.5); + distort_image(c2, -.1, .66666, .66666); + distort_image(c3, .1, 1.5, .66666); + distort_image(c4, .1, .66666, 1.5); + + + show_image(im, "Original", 1); + show_image(gray, "Gray", 1); + show_image(c1, "C1", 1); + show_image(c2, "C2", 1); + show_image(c3, "C3", 1); + show_image(c4, "C4", 1); +#ifdef OPENCV + while(1){ + image aug = random_augment_image(im, 0, .75, 320, 448, 320, 320); + show_image(aug, "aug", 1); + free_image(aug); + + + float exposure = 1.15; + float saturation = 1.15; + float hue = .05; + + image c = copy_image(im); + + float dexp = rand_scale(exposure); + float dsat = rand_scale(saturation); + float dhue = rand_uniform(-hue, hue); + + distort_image(c, dhue, dsat, dexp); + show_image(c, "rand", 1); + printf("%f %f %f\n", dhue, dsat, dexp); + free_image(c); + } +#endif +} + + +image load_image_stb(char *filename, int channels) +{ + int w, h, c; + unsigned char *data = stbi_load(filename, &w, &h, &c, channels); + if (!data) { + fprintf(stderr, "Cannot load image \"%s\"\nSTB Reason: %s\n", filename, stbi_failure_reason()); + exit(0); + } + if(channels) c = channels; + int i,j,k; + image im = make_image(w, h, c); + for(k = 0; k < c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int dst_index = i + w*j + w*h*k; + int src_index = k + c*i + c*w*j; + im.data[dst_index] = (float)data[src_index]/255.; + } + } + } + free(data); + return im; +} + +image load_image(char *filename, int w, int h, int c) +{ +#ifdef OPENCV + image out = load_image_cv(filename, c); +#else + image out = load_image_stb(filename, c); +#endif + + if((h && w) && (h != out.h || w != out.w)){ + image resized = resize_image(out, w, h); + free_image(out); + out = resized; + } + return out; +} + +image load_image_color(char *filename, int w, int h) +{ + return load_image(filename, w, h, 3); +} + +image get_image_layer(image m, int l) +{ + image out = make_image(m.w, m.h, 1); + int i; + for(i = 0; i < m.h*m.w; ++i){ + out.data[i] = m.data[i+l*m.h*m.w]; + } + return out; +} +void print_image(image m) +{ + int i, j, k; + for(i =0 ; i < m.c; ++i){ + for(j =0 ; j < m.h; ++j){ + for(k = 0; k < m.w; ++k){ + printf("%.2lf, ", m.data[i*m.h*m.w + j*m.w + k]); + if(k > 30) break; + } + printf("\n"); + if(j > 30) break; + } + printf("\n"); + } + printf("\n"); +} + +image collapse_images_vert(image *ims, int n) +{ + int color = 1; + int border = 1; + int h,w,c; + w = ims[0].w; + h = (ims[0].h + border) * n - border; + c = ims[0].c; + if(c != 3 || !color){ + w = (w+border)*c - border; + c = 1; + } + + image filters = make_image(w, h, c); + int i,j; + for(i = 0; i < n; ++i){ + int h_offset = i*(ims[0].h+border); + image copy = copy_image(ims[i]); + //normalize_image(copy); + if(c == 3 && color){ + embed_image(copy, filters, 0, h_offset); + } + else{ + for(j = 0; j < copy.c; ++j){ + int w_offset = j*(ims[0].w+border); + image layer = get_image_layer(copy, j); + embed_image(layer, filters, w_offset, h_offset); + free_image(layer); + } + } + free_image(copy); + } + return filters; +} + +image collapse_images_horz(image *ims, int n) +{ + int color = 1; + int border = 1; + int h,w,c; + int size = ims[0].h; + h = size; + w = (ims[0].w + border) * n - border; + c = ims[0].c; + if(c != 3 || !color){ + h = (h+border)*c - border; + c = 1; + } + + image filters = make_image(w, h, c); + int i,j; + for(i = 0; i < n; ++i){ + int w_offset = i*(size+border); + image copy = copy_image(ims[i]); + //normalize_image(copy); + if(c == 3 && color){ + embed_image(copy, filters, w_offset, 0); + } + else{ + for(j = 0; j < copy.c; ++j){ + int h_offset = j*(size+border); + image layer = get_image_layer(copy, j); + embed_image(layer, filters, w_offset, h_offset); + free_image(layer); + } + } + free_image(copy); + } + return filters; +} + +void show_image_normalized(image im, const char *name) +{ + image c = copy_image(im); + normalize_image(c); + show_image(c, name, 1); + free_image(c); +} + +void show_images(image *ims, int n, char *window) +{ + image m = collapse_images_vert(ims, n); + /* + int w = 448; + int h = ((float)m.h/m.w) * 448; + if(h > 896){ + h = 896; + w = ((float)m.w/m.h) * 896; + } + image sized = resize_image(m, w, h); + */ + normalize_image(m); + save_image(m, window); + show_image(m, window, 1); + free_image(m); +} + +void free_image(image m) +{ + if(m.data){ + free(m.data); + } +} diff --git a/workloads/realworld/pinned/darknet/src/image.h b/workloads/realworld/pinned/darknet/src/image.h new file mode 100644 index 0000000000000000000000000000000000000000..3392bb9787fc542929cda064bcefa0f3f893b76c --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/image.h @@ -0,0 +1,69 @@ +#ifndef IMAGE_H +#define IMAGE_H + +#include +#include +#include +#include +#include +#include "box.h" +#include "darknet.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef OPENCV +void *open_video_stream(const char *f, int c, int w, int h, int fps); +image get_image_from_stream(void *p); +image load_image_cv(char *filename, int channels); +int show_image_cv(image im, const char* name, int ms); +#endif + +float get_color(int c, int x, int max); +void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); +void draw_bbox(image a, box bbox, int w, float r, float g, float b); +void write_label(image a, int r, int c, image *characters, char *string, float *rgb); +image image_distance(image a, image b); +void scale_image(image m, float s); +image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect); +image random_crop_image(image im, int w, int h); +image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h); +augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h); +void letterbox_image_into(image im, int w, int h, image boxed); +image resize_max(image im, int max); +void translate_image(image m, float s); +void embed_image(image source, image dest, int dx, int dy); +void place_image(image im, int w, int h, int dx, int dy, image canvas); +void saturate_image(image im, float sat); +void exposure_image(image im, float sat); +void distort_image(image im, float hue, float sat, float val); +void saturate_exposure_image(image im, float sat, float exposure); +void rgb_to_hsv(image im); +void hsv_to_rgb(image im); +void yuv_to_rgb(image im); +void rgb_to_yuv(image im); + + +image collapse_image_layers(image source, int border); +image collapse_images_horz(image *ims, int n); +image collapse_images_vert(image *ims, int n); + +void show_image_normalized(image im, const char *name); +void show_images(image *ims, int n, char *window); +void show_image_layers(image p, char *name); +void show_image_collapsed(image p, char *name); + +void print_image(image m); + +image make_empty_image(int w, int h, int c); +void copy_image_into(image src, image dest); + +image get_image_layer(image m, int l); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/workloads/realworld/pinned/darknet/src/image_opencv.cpp b/workloads/realworld/pinned/darknet/src/image_opencv.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7511280be07ca987fd51fa54aea55910cd34a706 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/image_opencv.cpp @@ -0,0 +1,135 @@ +#ifdef OPENCV + +#include "stdio.h" +#include "stdlib.h" +#include "opencv2/opencv.hpp" +#include "image.h" + +using namespace cv; + +extern "C" { + +IplImage *image_to_ipl(image im) +{ + int x,y,c; + IplImage *disp = cvCreateImage(cvSize(im.w,im.h), IPL_DEPTH_8U, im.c); + int step = disp->widthStep; + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + for(c= 0; c < im.c; ++c){ + float val = im.data[c*im.h*im.w + y*im.w + x]; + disp->imageData[y*step + x*im.c + c] = (unsigned char)(val*255); + } + } + } + return disp; +} + +image ipl_to_image(IplImage* src) +{ + int h = src->height; + int w = src->width; + int c = src->nChannels; + image im = make_image(w, h, c); + unsigned char *data = (unsigned char *)src->imageData; + int step = src->widthStep; + int i, j, k; + + for(i = 0; i < h; ++i){ + for(k= 0; k < c; ++k){ + for(j = 0; j < w; ++j){ + im.data[k*w*h + i*w + j] = data[i*step + j*c + k]/255.; + } + } + } + return im; +} + +Mat image_to_mat(image im) +{ + image copy = copy_image(im); + constrain_image(copy); + if(im.c == 3) rgbgr_image(copy); + + IplImage *ipl = image_to_ipl(copy); + Mat m = cvarrToMat(ipl, true); + cvReleaseImage(&ipl); + free_image(copy); + return m; +} + +image mat_to_image(Mat m) +{ + IplImage ipl = m; + image im = ipl_to_image(&ipl); + rgbgr_image(im); + return im; +} + +void *open_video_stream(const char *f, int c, int w, int h, int fps) +{ + VideoCapture *cap; + if(f) cap = new VideoCapture(f); + else cap = new VideoCapture(c); + if(!cap->isOpened()) return 0; + if(w) cap->set(CV_CAP_PROP_FRAME_WIDTH, w); + if(h) cap->set(CV_CAP_PROP_FRAME_HEIGHT, w); + if(fps) cap->set(CV_CAP_PROP_FPS, w); + return (void *) cap; +} + +image get_image_from_stream(void *p) +{ + VideoCapture *cap = (VideoCapture *)p; + Mat m; + *cap >> m; + if(m.empty()) return make_empty_image(0,0,0); + return mat_to_image(m); +} + +image load_image_cv(char *filename, int channels) +{ + int flag = -1; + if (channels == 0) flag = -1; + else if (channels == 1) flag = 0; + else if (channels == 3) flag = 1; + else { + fprintf(stderr, "OpenCV can't force load with %d channels\n", channels); + } + Mat m; + m = imread(filename, flag); + if(!m.data){ + fprintf(stderr, "Cannot load image \"%s\"\n", filename); + char buff[256]; + sprintf(buff, "echo %s >> bad.list", filename); + system(buff); + return make_image(10,10,3); + //exit(0); + } + image im = mat_to_image(m); + return im; +} + +int show_image_cv(image im, const char* name, int ms) +{ + Mat m = image_to_mat(im); + imshow(name, m); + int c = waitKey(ms); + if (c != -1) c = c%256; + return c; +} + +void make_window(char *name, int w, int h, int fullscreen) +{ + namedWindow(name, WINDOW_NORMAL); + if (fullscreen) { + setWindowProperty(name, CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); + } else { + resizeWindow(name, w, h); + if(strcmp(name, "Demo") == 0) moveWindow(name, 0, 0); + } +} + +} + +#endif diff --git a/workloads/realworld/pinned/darknet/src/iseg_layer.c b/workloads/realworld/pinned/darknet/src/iseg_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..a1b822a5797a6d04b0f3756f106cb2b20ba31a5b --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/iseg_layer.c @@ -0,0 +1,225 @@ +#include "iseg_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_iseg_layer(int batch, int w, int h, int classes, int ids) +{ + layer l = {0}; + l.type = ISEG; + + l.h = h; + l.w = w; + l.c = classes + ids; + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.batch = batch; + l.extra = ids; + l.cost = calloc(1, sizeof(float)); + l.outputs = h*w*l.c; + l.inputs = l.outputs; + l.truths = 90*(l.w*l.h+1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + + l.counts = calloc(90, sizeof(int)); + l.sums = calloc(90, sizeof(float*)); + if(ids){ + int i; + for(i = 0; i < 90; ++i){ + l.sums[i] = calloc(ids, sizeof(float)); + } + } + + l.forward = forward_iseg_layer; + l.backward = backward_iseg_layer; +#ifdef GPU + l.forward_gpu = forward_iseg_layer_gpu; + l.backward_gpu = backward_iseg_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "iseg\n"); + srand(0); + + return l; +} + +void resize_iseg_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->c; + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +void forward_iseg_layer(const layer l, network net) +{ + + double time = what_time_is_it_now(); + int i,b,j,k; + int ids = l.extra; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + int index = b*l.outputs; + activate_array(l.output + index, l.classes*l.w*l.h, LOGISTIC); + } +#endif + + for (b = 0; b < l.batch; ++b){ + // a priori, each pixel has no class + for(i = 0; i < l.classes; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + i*l.w*l.h + k; + l.delta[index] = 0 - l.output[index]; + } + } + + // a priori, embedding should be small magnitude + for(i = 0; i < ids; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + (i+l.classes)*l.w*l.h + k; + l.delta[index] = .1 * (0 - l.output[index]); + } + } + + + memset(l.counts, 0, 90*sizeof(int)); + for(i = 0; i < 90; ++i){ + fill_cpu(ids, 0, l.sums[i], 1); + + int c = net.truth[b*l.truths + i*(l.w*l.h+1)]; + if(c < 0) break; + // add up metric embeddings for each instance + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + c*l.w*l.h + k; + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + l.delta[index] = v - l.output[index]; + axpy_cpu(ids, 1, l.output + b*l.outputs + l.classes*l.w*l.h + k, l.w*l.h, l.sums[i], 1); + ++l.counts[i]; + } + } + } + + float *mse = calloc(90, sizeof(float)); + for(i = 0; i < 90; ++i){ + int c = net.truth[b*l.truths + i*(l.w*l.h+1)]; + if(c < 0) break; + for(k = 0; k < l.w*l.h; ++k){ + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + int z; + float sum = 0; + for(z = 0; z < ids; ++z){ + int index = b*l.outputs + (l.classes + z)*l.w*l.h + k; + sum += pow(l.sums[i][z]/l.counts[i] - l.output[index], 2); + } + mse[i] += sum; + } + } + mse[i] /= l.counts[i]; + } + + // Calculate average embedding + for(i = 0; i < 90; ++i){ + if(!l.counts[i]) continue; + scal_cpu(ids, 1.f/l.counts[i], l.sums[i], 1); + if(b == 0 && net.gpu_index == 0){ + printf("%4d, %6.3f, ", l.counts[i], mse[i]); + for(j = 0; j < ids; ++j){ + printf("%6.3f,", l.sums[i][j]); + } + printf("\n"); + } + } + free(mse); + + // Calculate embedding loss + for(i = 0; i < 90; ++i){ + if(!l.counts[i]) continue; + for(k = 0; k < l.w*l.h; ++k){ + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + for(j = 0; j < 90; ++j){ + if(!l.counts[j])continue; + int z; + for(z = 0; z < ids; ++z){ + int index = b*l.outputs + (l.classes + z)*l.w*l.h + k; + float diff = l.sums[j][z] - l.output[index]; + if (j == i) l.delta[index] += diff < 0? -.1 : .1; + else l.delta[index] += -(diff < 0? -.1 : .1); + } + } + } + } + } + + for(i = 0; i < ids; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + (i+l.classes)*l.w*l.h + k; + l.delta[index] *= .01; + } + } + } + + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("took %lf sec\n", what_time_is_it_now() - time); +} + +void backward_iseg_layer(const layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_iseg_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b; + for (b = 0; b < l.batch; ++b){ + activate_array_gpu(l.output_gpu + b*l.outputs, l.classes*l.w*l.h, LOGISTIC); + //if(l.extra) activate_array_gpu(l.output_gpu + b*l.outputs + l.classes*l.w*l.h, l.extra*l.w*l.h, LOGISTIC); + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_iseg_layer(l, net); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_iseg_layer_gpu(const layer l, network net) +{ + int b; + for (b = 0; b < l.batch; ++b){ + //if(l.extra) gradient_array_gpu(l.output_gpu + b*l.outputs + l.classes*l.w*l.h, l.extra*l.w*l.h, LOGISTIC, l.delta_gpu + b*l.outputs + l.classes*l.w*l.h); + } + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/pinned/darknet/src/iseg_layer.h b/workloads/realworld/pinned/darknet/src/iseg_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..dd8e64e023caf1e1fd0c30af57f9983f24ddd691 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/iseg_layer.h @@ -0,0 +1,19 @@ +#ifndef ISEG_LAYER_H +#define ISEG_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_iseg_layer(int batch, int w, int h, int classes, int ids); +void forward_iseg_layer(const layer l, network net); +void backward_iseg_layer(const layer l, network net); +void resize_iseg_layer(layer *l, int w, int h); +int iseg_num_detections(layer l, float thresh); + +#ifdef GPU +void forward_iseg_layer_gpu(const layer l, network net); +void backward_iseg_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/pinned/darknet/src/l2norm_layer.c b/workloads/realworld/pinned/darknet/src/l2norm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..0cf7f844170cb2c3dba15be94d4a435aaa63067c --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/l2norm_layer.c @@ -0,0 +1,63 @@ +#include "l2norm_layer.h" +#include "activations.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +layer make_l2norm_layer(int batch, int inputs) +{ + fprintf(stderr, "l2norm %4d\n", inputs); + layer l = {0}; + l.type = L2NORM; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.output = calloc(inputs*batch, sizeof(float)); + l.scales = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + + l.forward = forward_l2norm_layer; + l.backward = backward_l2norm_layer; + #ifdef GPU + l.forward_gpu = forward_l2norm_layer_gpu; + l.backward_gpu = backward_l2norm_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.scales_gpu = cuda_make_array(l.output, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_l2norm_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + l2normalize_cpu(l.output, l.scales, l.batch, l.out_c, l.out_w*l.out_h); +} + +void backward_l2norm_layer(const layer l, network net) +{ + //axpy_cpu(l.inputs*l.batch, 1, l.scales, 1, l.delta, 1); + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_l2norm_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + l2normalize_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_w*l.out_h); +} + +void backward_l2norm_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.scales_gpu, 1, l.delta_gpu, 1); + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/pinned/darknet/src/l2norm_layer.h b/workloads/realworld/pinned/darknet/src/l2norm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1ca6f710f017f2857f566eaed90634698d72b26d --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/l2norm_layer.h @@ -0,0 +1,15 @@ +#ifndef L2NORM_LAYER_H +#define L2NORM_LAYER_H +#include "layer.h" +#include "network.h" + +layer make_l2norm_layer(int batch, int inputs); +void forward_l2norm_layer(const layer l, network net); +void backward_l2norm_layer(const layer l, network net); + +#ifdef GPU +void forward_l2norm_layer_gpu(const layer l, network net); +void backward_l2norm_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/pinned/darknet/src/layer.c b/workloads/realworld/pinned/darknet/src/layer.c new file mode 100644 index 0000000000000000000000000000000000000000..3bffe436f06a455e2d1043158ff6da9b07bbb61f --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/layer.c @@ -0,0 +1,97 @@ +#include "layer.h" +#include "cuda_dark.h" + +#include + +void free_layer(layer l) +{ + if(l.type == DROPOUT){ + if(l.rand) free(l.rand); +#ifdef GPU + if(l.rand_gpu) cuda_free(l.rand_gpu); +#endif + return; + } + if(l.cweights) free(l.cweights); + if(l.indexes) free(l.indexes); + if(l.input_layers) free(l.input_layers); + if(l.input_sizes) free(l.input_sizes); + if(l.map) free(l.map); + if(l.rand) free(l.rand); + if(l.cost) free(l.cost); + if(l.state) free(l.state); + if(l.prev_state) free(l.prev_state); + if(l.forgot_state) free(l.forgot_state); + if(l.forgot_delta) free(l.forgot_delta); + if(l.state_delta) free(l.state_delta); + if(l.concat) free(l.concat); + if(l.concat_delta) free(l.concat_delta); + if(l.binary_weights) free(l.binary_weights); + if(l.biases) free(l.biases); + if(l.bias_updates) free(l.bias_updates); + if(l.scales) free(l.scales); + if(l.scale_updates) free(l.scale_updates); + if(l.weights) free(l.weights); + if(l.weight_updates) free(l.weight_updates); + if(l.delta) free(l.delta); + if(l.output) free(l.output); + if(l.squared) free(l.squared); + if(l.norms) free(l.norms); + if(l.spatial_mean) free(l.spatial_mean); + if(l.mean) free(l.mean); + if(l.variance) free(l.variance); + if(l.mean_delta) free(l.mean_delta); + if(l.variance_delta) free(l.variance_delta); + if(l.rolling_mean) free(l.rolling_mean); + if(l.rolling_variance) free(l.rolling_variance); + if(l.x) free(l.x); + if(l.x_norm) free(l.x_norm); + if(l.m) free(l.m); + if(l.v) free(l.v); + if(l.z_cpu) free(l.z_cpu); + if(l.r_cpu) free(l.r_cpu); + if(l.h_cpu) free(l.h_cpu); + if(l.binary_input) free(l.binary_input); + +#ifdef GPU + if(l.indexes_gpu) cuda_free((float *)l.indexes_gpu); + + if(l.z_gpu) cuda_free(l.z_gpu); + if(l.r_gpu) cuda_free(l.r_gpu); + if(l.h_gpu) cuda_free(l.h_gpu); + if(l.m_gpu) cuda_free(l.m_gpu); + if(l.v_gpu) cuda_free(l.v_gpu); + if(l.prev_state_gpu) cuda_free(l.prev_state_gpu); + if(l.forgot_state_gpu) cuda_free(l.forgot_state_gpu); + if(l.forgot_delta_gpu) cuda_free(l.forgot_delta_gpu); + if(l.state_gpu) cuda_free(l.state_gpu); + if(l.state_delta_gpu) cuda_free(l.state_delta_gpu); + if(l.gate_gpu) cuda_free(l.gate_gpu); + if(l.gate_delta_gpu) cuda_free(l.gate_delta_gpu); + if(l.save_gpu) cuda_free(l.save_gpu); + if(l.save_delta_gpu) cuda_free(l.save_delta_gpu); + if(l.concat_gpu) cuda_free(l.concat_gpu); + if(l.concat_delta_gpu) cuda_free(l.concat_delta_gpu); + if(l.binary_input_gpu) cuda_free(l.binary_input_gpu); + if(l.binary_weights_gpu) cuda_free(l.binary_weights_gpu); + if(l.mean_gpu) cuda_free(l.mean_gpu); + if(l.variance_gpu) cuda_free(l.variance_gpu); + if(l.rolling_mean_gpu) cuda_free(l.rolling_mean_gpu); + if(l.rolling_variance_gpu) cuda_free(l.rolling_variance_gpu); + if(l.variance_delta_gpu) cuda_free(l.variance_delta_gpu); + if(l.mean_delta_gpu) cuda_free(l.mean_delta_gpu); + if(l.x_gpu) cuda_free(l.x_gpu); + if(l.x_norm_gpu) cuda_free(l.x_norm_gpu); + if(l.weights_gpu) cuda_free(l.weights_gpu); + if(l.weight_updates_gpu) cuda_free(l.weight_updates_gpu); + if(l.biases_gpu) cuda_free(l.biases_gpu); + if(l.bias_updates_gpu) cuda_free(l.bias_updates_gpu); + if(l.scales_gpu) cuda_free(l.scales_gpu); + if(l.scale_updates_gpu) cuda_free(l.scale_updates_gpu); + if(l.output_gpu) cuda_free(l.output_gpu); + if(l.delta_gpu) cuda_free(l.delta_gpu); + if(l.rand_gpu) cuda_free(l.rand_gpu); + if(l.squared_gpu) cuda_free(l.squared_gpu); + if(l.norms_gpu) cuda_free(l.norms_gpu); +#endif +} diff --git a/workloads/realworld/pinned/darknet/src/layer.h b/workloads/realworld/pinned/darknet/src/layer.h new file mode 100644 index 0000000000000000000000000000000000000000..af6cd2ab5054f5ef3bbdfca2da45f08d710a7bd0 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/layer.h @@ -0,0 +1 @@ +#include "darknet.h" diff --git a/workloads/realworld/pinned/darknet/src/list.c b/workloads/realworld/pinned/darknet/src/list.c new file mode 100644 index 0000000000000000000000000000000000000000..0e4165d37800e1b4c7c33992cd64a6634fe4688c --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/list.c @@ -0,0 +1,92 @@ +#include +#include +#include "list.h" + +list *make_list() +{ + list *l = malloc(sizeof(list)); + l->size = 0; + l->front = 0; + l->back = 0; + return l; +} + +/* +void transfer_node(list *s, list *d, node *n) +{ + node *prev, *next; + prev = n->prev; + next = n->next; + if(prev) prev->next = next; + if(next) next->prev = prev; + --s->size; + if(s->front == n) s->front = next; + if(s->back == n) s->back = prev; +} +*/ + +void *list_pop(list *l){ + if(!l->back) return 0; + node *b = l->back; + void *val = b->val; + l->back = b->prev; + if(l->back) l->back->next = 0; + free(b); + --l->size; + + return val; +} + +void list_insert(list *l, void *val) +{ + node *new = malloc(sizeof(node)); + new->val = val; + new->next = 0; + + if(!l->back){ + l->front = new; + new->prev = 0; + }else{ + l->back->next = new; + new->prev = l->back; + } + l->back = new; + ++l->size; +} + +void free_node(node *n) +{ + node *next; + while(n) { + next = n->next; + free(n); + n = next; + } +} + +void free_list(list *l) +{ + free_node(l->front); + free(l); +} + +void free_list_contents(list *l) +{ + node *n = l->front; + while(n){ + free(n->val); + n = n->next; + } +} + +void **list_to_array(list *l) +{ + void **a = calloc(l->size, sizeof(void*)); + int count = 0; + node *n = l->front; + while(n){ + a[count++] = n->val; + n = n->next; + } + return a; +} diff --git a/workloads/realworld/pinned/darknet/src/list.h b/workloads/realworld/pinned/darknet/src/list.h new file mode 100644 index 0000000000000000000000000000000000000000..6b445c717c2937b9c90536654ba82a33e14bb4ec --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/list.h @@ -0,0 +1,13 @@ +#ifndef LIST_H +#define LIST_H +#include "darknet.h" + +list *make_list(); +int list_find(list *l, void *val); + +void list_insert(list *, void *); + + +void free_list_contents(list *l); + +#endif diff --git a/workloads/realworld/pinned/darknet/src/local_layer.c b/workloads/realworld/pinned/darknet/src/local_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..74f6910a8fd751ad9f3b41fc67be737399a151d0 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/local_layer.c @@ -0,0 +1,293 @@ +#include "local_layer.h" +#include "utils.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" +#include +#include + +int local_out_height(local_layer l) +{ + int h = l.h; + if (!l.pad) h -= l.size; + else h -= 1; + return h/l.stride + 1; +} + +int local_out_width(local_layer l) +{ + int w = l.w; + if (!l.pad) w -= l.size; + else w -= 1; + return w/l.stride + 1; +} + +local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation) +{ + int i; + local_layer l = {0}; + l.type = LOCAL; + + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.batch = batch; + l.stride = stride; + l.size = size; + l.pad = pad; + + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int locations = out_h*out_w; + l.out_h = out_h; + l.out_w = out_w; + l.out_c = n; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = l.w * l.h * l.c; + + l.weights = calloc(c*n*size*size*locations, sizeof(float)); + l.weight_updates = calloc(c*n*size*size*locations, sizeof(float)); + + l.biases = calloc(l.outputs, sizeof(float)); + l.bias_updates = calloc(l.outputs, sizeof(float)); + + // float scale = 1./sqrt(size*size*c); + float scale = sqrt(2./(size*size*c)); + for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1); + + l.output = calloc(l.batch*out_h * out_w * n, sizeof(float)); + l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float)); + + l.workspace_size = out_h*out_w*size*size*c; + + l.forward = forward_local_layer; + l.backward = backward_local_layer; + l.update = update_local_layer; + +#ifdef GPU + l.forward_gpu = forward_local_layer_gpu; + l.backward_gpu = backward_local_layer_gpu; + l.update_gpu = update_local_layer_gpu; + + l.weights_gpu = cuda_make_array(l.weights, c*n*size*size*locations); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size*locations); + + l.biases_gpu = cuda_make_array(l.biases, l.outputs); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, l.outputs); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + +#endif + l.activation = activation; + + fprintf(stderr, "Local Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); + + return l; +} + +void forward_local_layer(const local_layer l, network net) +{ + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int i, j; + int locations = out_h * out_w; + + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.outputs, l.biases, 1, l.output + i*l.outputs, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input + i*l.w*l.h*l.c; + im2col_cpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + float *output = l.output + i*l.outputs; + for(j = 0; j < locations; ++j){ + float *a = l.weights + j*l.size*l.size*l.c*l.n; + float *b = net.workspace + j; + float *c = output + j; + + int m = l.n; + int n = 1; + int k = l.size*l.size*l.c; + + gemm(0,0,m,n,k,1,a,k,b,locations,1,c,locations); + } + } + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_local_layer(local_layer l, network net) +{ + int i, j; + int locations = l.out_w*l.out_h; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + for(i = 0; i < l.batch; ++i){ + axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input + i*l.w*l.h*l.c; + im2col_cpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + + for(j = 0; j < locations; ++j){ + float *a = l.delta + i*l.outputs + j; + float *b = net.workspace + j; + float *c = l.weight_updates + j*l.size*l.size*l.c*l.n; + int m = l.n; + int n = l.size*l.size*l.c; + int k = 1; + + gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n); + } + + if(net.delta){ + for(j = 0; j < locations; ++j){ + float *a = l.weights + j*l.size*l.size*l.c*l.n; + float *b = l.delta + i*l.outputs + j; + float *c = net.workspace + j; + + int m = l.size*l.size*l.c; + int n = 1; + int k = l.n; + + gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations); + } + + col2im_cpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta+i*l.c*l.h*l.w); + } + } +} + +void update_local_layer(local_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.outputs, momentum, l.bias_updates, 1); + + axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(size, momentum, l.weight_updates, 1); +} + +#ifdef GPU + +void forward_local_layer_gpu(const local_layer l, network net) +{ + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int i, j; + int locations = out_h * out_w; + + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.outputs, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input_gpu + i*l.w*l.h*l.c; + im2col_gpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + float *output = l.output_gpu + i*l.outputs; + for(j = 0; j < locations; ++j){ + float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n; + float *b = net.workspace + j; + float *c = output + j; + + int m = l.n; + int n = 1; + int k = l.size*l.size*l.c; + + gemm_gpu(0,0,m,n,k,1,a,k,b,locations,1,c,locations); + } + } + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_local_layer_gpu(local_layer l, network net) +{ + int i, j; + int locations = l.out_w*l.out_h; + + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + for(i = 0; i < l.batch; ++i){ + axpy_gpu(l.outputs, 1, l.delta_gpu + i*l.outputs, 1, l.bias_updates_gpu, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input_gpu + i*l.w*l.h*l.c; + im2col_gpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + + for(j = 0; j < locations; ++j){ + float *a = l.delta_gpu + i*l.outputs + j; + float *b = net.workspace + j; + float *c = l.weight_updates_gpu + j*l.size*l.size*l.c*l.n; + int m = l.n; + int n = l.size*l.size*l.c; + int k = 1; + + gemm_gpu(0,1,m,n,k,1,a,locations,b,locations,1,c,n); + } + + if(net.delta_gpu){ + for(j = 0; j < locations; ++j){ + float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n; + float *b = l.delta_gpu + i*l.outputs + j; + float *c = net.workspace + j; + + int m = l.size*l.size*l.c; + int n = 1; + int k = l.n; + + gemm_gpu(1,0,m,n,k,1,a,m,b,locations,0,c,locations); + } + + col2im_gpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta_gpu+i*l.c*l.h*l.w); + } + } +} + +void update_local_layer_gpu(local_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + axpy_gpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.outputs, momentum, l.bias_updates_gpu, 1); + + axpy_gpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(size, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(size, momentum, l.weight_updates_gpu, 1); +} + +void pull_local_layer(local_layer l) +{ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + cuda_pull_array(l.weights_gpu, l.weights, size); + cuda_pull_array(l.biases_gpu, l.biases, l.outputs); +} + +void push_local_layer(local_layer l) +{ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + cuda_push_array(l.weights_gpu, l.weights, size); + cuda_push_array(l.biases_gpu, l.biases, l.outputs); +} +#endif diff --git a/workloads/realworld/pinned/darknet/src/local_layer.h b/workloads/realworld/pinned/darknet/src/local_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..56805c4f1cb51fed9ef0e771d2befb430df60df5 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/local_layer.h @@ -0,0 +1,31 @@ +#ifndef LOCAL_LAYER_H +#define LOCAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +typedef layer local_layer; + +#ifdef GPU +void forward_local_layer_gpu(local_layer layer, network net); +void backward_local_layer_gpu(local_layer layer, network net); +void update_local_layer_gpu(local_layer layer, update_args a); + +void push_local_layer(local_layer layer); +void pull_local_layer(local_layer layer); +#endif + +local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation); + +void forward_local_layer(const local_layer layer, network net); +void backward_local_layer(local_layer layer, network net); +void update_local_layer(local_layer layer, update_args a); + +void bias_output(float *output, float *biases, int batch, int n, int size); +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); + +#endif + diff --git a/workloads/realworld/pinned/darknet/src/logistic_layer.c b/workloads/realworld/pinned/darknet/src/logistic_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..8d98986f67a17da70df75e3d56a46615cfc8eaf1 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/logistic_layer.c @@ -0,0 +1,71 @@ +#include "logistic_layer.h" +#include "activations.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +layer make_logistic_layer(int batch, int inputs) +{ + fprintf(stderr, "logistic x entropy %4d\n", inputs); + layer l = {0}; + l.type = LOGXENT; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.loss = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_logistic_layer; + l.backward = backward_logistic_layer; + #ifdef GPU + l.forward_gpu = forward_logistic_layer_gpu; + l.backward_gpu = backward_logistic_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.loss_gpu = cuda_make_array(l.loss, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_logistic_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + activate_array(l.output, l.outputs*l.batch, LOGISTIC); + if(net.truth){ + logistic_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_logistic_layer(const layer l, network net) +{ + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_logistic_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, LOGISTIC); + if(net.truth){ + logistic_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu); + cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_logistic_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/pinned/darknet/src/logistic_layer.h b/workloads/realworld/pinned/darknet/src/logistic_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9c25bee3c2a6eb1013ed43ce0c4aeaa63b7a293f --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/logistic_layer.h @@ -0,0 +1,15 @@ +#ifndef LOGISTIC_LAYER_H +#define LOGISTIC_LAYER_H +#include "layer.h" +#include "network.h" + +layer make_logistic_layer(int batch, int inputs); +void forward_logistic_layer(const layer l, network net); +void backward_logistic_layer(const layer l, network net); + +#ifdef GPU +void forward_logistic_layer_gpu(const layer l, network net); +void backward_logistic_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/pinned/darknet/src/lstm_layer.c b/workloads/realworld/pinned/darknet/src/lstm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..16f43914df8f35fb7f7b16bd93ff1d83f513dda0 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/lstm_layer.c @@ -0,0 +1,626 @@ +#include "lstm_layer.h" +#include "connected_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam) +{ + fprintf(stderr, "LSTM Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = { 0 }; + l.batch = batch; + l.type = LSTM; + l.steps = steps; + l.inputs = inputs; + + l.uf = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uf) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uf->batch = batch; + + l.ui = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ui) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ui->batch = batch; + + l.ug = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ug) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ug->batch = batch; + + l.uo = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uo) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uo->batch = batch; + + l.wf = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wf) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wf->batch = batch; + + l.wi = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wi) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wi->batch = batch; + + l.wg = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wg) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wg->batch = batch; + + l.wo = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wo) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wo->batch = batch; + + l.batch_normalize = batch_normalize; + l.outputs = outputs; + + l.output = calloc(outputs*batch*steps, sizeof(float)); + l.state = calloc(outputs*batch, sizeof(float)); + + l.forward = forward_lstm_layer; + l.update = update_lstm_layer; + + l.prev_state_cpu = calloc(batch*outputs, sizeof(float)); + l.prev_cell_cpu = calloc(batch*outputs, sizeof(float)); + l.cell_cpu = calloc(batch*outputs*steps, sizeof(float)); + + l.f_cpu = calloc(batch*outputs, sizeof(float)); + l.i_cpu = calloc(batch*outputs, sizeof(float)); + l.g_cpu = calloc(batch*outputs, sizeof(float)); + l.o_cpu = calloc(batch*outputs, sizeof(float)); + l.c_cpu = calloc(batch*outputs, sizeof(float)); + l.h_cpu = calloc(batch*outputs, sizeof(float)); + l.temp_cpu = calloc(batch*outputs, sizeof(float)); + l.temp2_cpu = calloc(batch*outputs, sizeof(float)); + l.temp3_cpu = calloc(batch*outputs, sizeof(float)); + l.dc_cpu = calloc(batch*outputs, sizeof(float)); + l.dh_cpu = calloc(batch*outputs, sizeof(float)); + +#ifdef GPU + l.forward_gpu = forward_lstm_layer_gpu; + l.backward_gpu = backward_lstm_layer_gpu; + l.update_gpu = update_lstm_layer_gpu; + + l.output_gpu = cuda_make_array(0, batch*outputs*steps); + l.delta_gpu = cuda_make_array(0, batch*l.outputs*steps); + + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.prev_cell_gpu = cuda_make_array(0, batch*outputs); + l.cell_gpu = cuda_make_array(0, batch*outputs*steps); + + l.f_gpu = cuda_make_array(0, batch*outputs); + l.i_gpu = cuda_make_array(0, batch*outputs); + l.g_gpu = cuda_make_array(0, batch*outputs); + l.o_gpu = cuda_make_array(0, batch*outputs); + l.c_gpu = cuda_make_array(0, batch*outputs); + l.h_gpu = cuda_make_array(0, batch*outputs); + l.temp_gpu = cuda_make_array(0, batch*outputs); + l.temp2_gpu = cuda_make_array(0, batch*outputs); + l.temp3_gpu = cuda_make_array(0, batch*outputs); + l.dc_gpu = cuda_make_array(0, batch*outputs); + l.dh_gpu = cuda_make_array(0, batch*outputs); +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.wf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wf->out_c, l.wf->out_h, l.wf->out_w); + cudnnSetTensor4dDescriptor(l.wi->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wi->out_c, l.wi->out_h, l.wi->out_w); + cudnnSetTensor4dDescriptor(l.wg->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wg->out_c, l.wg->out_h, l.wg->out_w); + cudnnSetTensor4dDescriptor(l.wo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wo->out_c, l.wo->out_h, l.wo->out_w); + + cudnnSetTensor4dDescriptor(l.uf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uf->out_c, l.uf->out_h, l.uf->out_w); + cudnnSetTensor4dDescriptor(l.ui->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ui->out_c, l.ui->out_h, l.ui->out_w); + cudnnSetTensor4dDescriptor(l.ug->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ug->out_c, l.ug->out_h, l.ug->out_w); + cudnnSetTensor4dDescriptor(l.uo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uo->out_c, l.uo->out_h, l.uo->out_w); +#endif + +#endif + + return l; +} + +void update_lstm_layer(layer l, update_args a) +{ + update_connected_layer(*(l.wf), a); + update_connected_layer(*(l.wi), a); + update_connected_layer(*(l.wg), a); + update_connected_layer(*(l.wo), a); + update_connected_layer(*(l.uf), a); + update_connected_layer(*(l.ui), a); + update_connected_layer(*(l.ug), a); + update_connected_layer(*(l.uo), a); +} + +void forward_lstm_layer(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + fill_cpu(l.outputs * l.batch * l.steps, 0, wf.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wi.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wg.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wo.delta, 1); + + fill_cpu(l.outputs * l.batch * l.steps, 0, uf.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ui.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ug.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, uo.delta, 1); + if (state.train) { + fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input = l.h_cpu; + forward_connected_layer(wf, s); + forward_connected_layer(wi, s); + forward_connected_layer(wg, s); + forward_connected_layer(wo, s); + + s.input = state.input; + forward_connected_layer(uf, s); + forward_connected_layer(ui, s); + forward_connected_layer(ug, s); + forward_connected_layer(uo, s); + + copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); + + copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); + + copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); + + copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); + + activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.g_cpu, l.outputs*l.batch, TANH); + activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.c_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, l.temp_cpu, 1, l.c_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.h_cpu, 1); + activate_array(l.h_cpu, l.outputs*l.batch, TANH); + mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.h_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.cell_cpu, 1); + copy_cpu(l.outputs*l.batch, l.h_cpu, 1, l.output, 1); + + state.input += l.inputs*l.batch; + l.output += l.outputs*l.batch; + l.cell_cpu += l.outputs*l.batch; + + increment_layer(&wf, 1); + increment_layer(&wi, 1); + increment_layer(&wg, 1); + increment_layer(&wo, 1); + + increment_layer(&uf, 1); + increment_layer(&ui, 1); + increment_layer(&ug, 1); + increment_layer(&uo, 1); + } +} + +void backward_lstm_layer(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + increment_layer(&wf, l.steps - 1); + increment_layer(&wi, l.steps - 1); + increment_layer(&wg, l.steps - 1); + increment_layer(&wo, l.steps - 1); + + increment_layer(&uf, l.steps - 1); + increment_layer(&ui, l.steps - 1); + increment_layer(&ug, l.steps - 1); + increment_layer(&uo, l.steps - 1); + + state.input += l.inputs*l.batch*(l.steps - 1); + if (state.delta) state.delta += l.inputs*l.batch*(l.steps - 1); + + l.output += l.outputs*l.batch*(l.steps - 1); + l.cell_cpu += l.outputs*l.batch*(l.steps - 1); + l.delta += l.outputs*l.batch*(l.steps - 1); + + for (i = l.steps - 1; i >= 0; --i) { + if (i != 0) copy_cpu(l.outputs*l.batch, l.cell_cpu - l.outputs*l.batch, 1, l.prev_cell_cpu, 1); + copy_cpu(l.outputs*l.batch, l.cell_cpu, 1, l.c_cpu, 1); + if (i != 0) copy_cpu(l.outputs*l.batch, l.output - l.outputs*l.batch, 1, l.prev_state_cpu, 1); + copy_cpu(l.outputs*l.batch, l.output, 1, l.h_cpu, 1); + + l.dh_cpu = (i == 0) ? 0 : l.delta - l.outputs*l.batch; + + copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); + + copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); + + copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); + + copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); + + activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.g_cpu, l.outputs*l.batch, TANH); + activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.delta, 1, l.temp3_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); + activate_array(l.temp_cpu, l.outputs*l.batch, TANH); + + copy_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp2_cpu, 1); + mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.temp2_cpu, 1); + + gradient_array(l.temp_cpu, l.outputs*l.batch, TANH, l.temp2_cpu); + axpy_cpu(l.outputs*l.batch, 1, l.dc_cpu, 1, l.temp2_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); + activate_array(l.temp_cpu, l.outputs*l.batch, TANH); + mul_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp_cpu, 1); + gradient_array(l.o_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wo.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wo, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uo.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(uo, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); + gradient_array(l.g_cpu, l.outputs*l.batch, TANH, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wg.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wg, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ug.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(ug, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); + gradient_array(l.i_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wi.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wi, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ui.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(ui, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.prev_cell_cpu, 1, l.temp_cpu, 1); + gradient_array(l.f_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wf.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wf, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uf.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(uf, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.temp_cpu, 1); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, l.dc_cpu, 1); + + state.input -= l.inputs*l.batch; + if (state.delta) state.delta -= l.inputs*l.batch; + l.output -= l.outputs*l.batch; + l.cell_cpu -= l.outputs*l.batch; + l.delta -= l.outputs*l.batch; + + increment_layer(&wf, -1); + increment_layer(&wi, -1); + increment_layer(&wg, -1); + increment_layer(&wo, -1); + + increment_layer(&uf, -1); + increment_layer(&ui, -1); + increment_layer(&ug, -1); + increment_layer(&uo, -1); + } +} + +#ifdef GPU +void update_lstm_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.wf), a); + update_connected_layer_gpu(*(l.wi), a); + update_connected_layer_gpu(*(l.wg), a); + update_connected_layer_gpu(*(l.wo), a); + update_connected_layer_gpu(*(l.uf), a); + update_connected_layer_gpu(*(l.ui), a); + update_connected_layer_gpu(*(l.ug), a); + update_connected_layer_gpu(*(l.uo), a); +} + +void forward_lstm_layer_gpu(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + fill_gpu(l.outputs * l.batch * l.steps, 0, wf.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wi.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wg.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wo.delta_gpu, 1); + + fill_gpu(l.outputs * l.batch * l.steps, 0, uf.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ui.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ug.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, uo.delta_gpu, 1); + if (state.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = l.h_gpu; + forward_connected_layer_gpu(wf, s); + forward_connected_layer_gpu(wi, s); + forward_connected_layer_gpu(wg, s); + forward_connected_layer_gpu(wo, s); + + s.input_gpu = state.input_gpu; + forward_connected_layer_gpu(uf, s); + forward_connected_layer_gpu(ui, s); + forward_connected_layer_gpu(ug, s); + forward_connected_layer_gpu(uo, s); + + copy_gpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); + + copy_gpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); + + copy_gpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); + + copy_gpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); + + activate_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.g_gpu, l.outputs*l.batch, TANH); + activate_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.f_gpu, 1, l.c_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, l.temp_gpu, 1, l.c_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.h_gpu, 1); + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + mul_gpu(l.outputs*l.batch, l.o_gpu, 1, l.h_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.cell_gpu, 1); + copy_gpu(l.outputs*l.batch, l.h_gpu, 1, l.output_gpu, 1); + + state.input_gpu += l.inputs*l.batch; + l.output_gpu += l.outputs*l.batch; + l.cell_gpu += l.outputs*l.batch; + + increment_layer(&wf, 1); + increment_layer(&wi, 1); + increment_layer(&wg, 1); + increment_layer(&wo, 1); + + increment_layer(&uf, 1); + increment_layer(&ui, 1); + increment_layer(&ug, 1); + increment_layer(&uo, 1); + } +} + +void backward_lstm_layer_gpu(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + increment_layer(&wf, l.steps - 1); + increment_layer(&wi, l.steps - 1); + increment_layer(&wg, l.steps - 1); + increment_layer(&wo, l.steps - 1); + + increment_layer(&uf, l.steps - 1); + increment_layer(&ui, l.steps - 1); + increment_layer(&ug, l.steps - 1); + increment_layer(&uo, l.steps - 1); + + state.input_gpu += l.inputs*l.batch*(l.steps - 1); + if (state.delta_gpu) state.delta_gpu += l.inputs*l.batch*(l.steps - 1); + + l.output_gpu += l.outputs*l.batch*(l.steps - 1); + l.cell_gpu += l.outputs*l.batch*(l.steps - 1); + l.delta_gpu += l.outputs*l.batch*(l.steps - 1); + + for (i = l.steps - 1; i >= 0; --i) { + if (i != 0) copy_gpu(l.outputs*l.batch, l.cell_gpu - l.outputs*l.batch, 1, l.prev_cell_gpu, 1); + copy_gpu(l.outputs*l.batch, l.cell_gpu, 1, l.c_gpu, 1); + if (i != 0) copy_gpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, 1, l.prev_state_gpu, 1); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.h_gpu, 1); + + l.dh_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch; + + copy_gpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); + + copy_gpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); + + copy_gpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); + + copy_gpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); + + activate_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.g_gpu, l.outputs*l.batch, TANH); + activate_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, l.temp3_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.temp_gpu, 1); + activate_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH); + + copy_gpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp2_gpu, 1); + mul_gpu(l.outputs*l.batch, l.o_gpu, 1, l.temp2_gpu, 1); + + gradient_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH, l.temp2_gpu); + axpy_gpu(l.outputs*l.batch, 1, l.dc_gpu, 1, l.temp2_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.temp_gpu, 1); + activate_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH); + mul_gpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wo.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wo, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, uo.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(uo, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.g_gpu, l.outputs*l.batch, TANH, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wg.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wg, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, ug.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(ug, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wi.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wi, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, ui.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(ui, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.prev_cell_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wf.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wf, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, uf.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(uf, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.f_gpu, 1, l.temp_gpu, 1); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, l.dc_gpu, 1); + + state.input_gpu -= l.inputs*l.batch; + if (state.delta_gpu) state.delta_gpu -= l.inputs*l.batch; + l.output_gpu -= l.outputs*l.batch; + l.cell_gpu -= l.outputs*l.batch; + l.delta_gpu -= l.outputs*l.batch; + + increment_layer(&wf, -1); + increment_layer(&wi, -1); + increment_layer(&wg, -1); + increment_layer(&wo, -1); + + increment_layer(&uf, -1); + increment_layer(&ui, -1); + increment_layer(&ug, -1); + increment_layer(&uo, -1); + } +} +#endif diff --git a/workloads/realworld/pinned/darknet/src/lstm_layer.h b/workloads/realworld/pinned/darknet/src/lstm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..b9f07e6424b55c336e692aa6f1028d0bc1cae0b3 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/lstm_layer.h @@ -0,0 +1,20 @@ +#ifndef LSTM_LAYER_H +#define LSTM_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" +#define USET + +layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam); + +void forward_lstm_layer(layer l, network net); +void update_lstm_layer(layer l, update_args a); + +#ifdef GPU +void forward_lstm_layer_gpu(layer l, network net); +void backward_lstm_layer_gpu(layer l, network net); +void update_lstm_layer_gpu(layer l, update_args a); + +#endif +#endif diff --git a/workloads/realworld/pinned/darknet/src/matrix.c b/workloads/realworld/pinned/darknet/src/matrix.c new file mode 100644 index 0000000000000000000000000000000000000000..799916bff017180e220ae48748f495007793d168 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/matrix.c @@ -0,0 +1,196 @@ +#include "matrix.h" +#include "utils.h" +#include "blas.h" +#include +#include +#include +#include +#include + +void free_matrix(matrix m) +{ + int i; + for(i = 0; i < m.rows; ++i) free(m.vals[i]); + free(m.vals); +} + +float matrix_topk_accuracy(matrix truth, matrix guess, int k) +{ + int *indexes = calloc(k, sizeof(int)); + int n = truth.cols; + int i,j; + int correct = 0; + for(i = 0; i < truth.rows; ++i){ + top_k(guess.vals[i], n, k, indexes); + for(j = 0; j < k; ++j){ + int class = indexes[j]; + if(truth.vals[i][class]){ + ++correct; + break; + } + } + } + free(indexes); + return (float)correct/truth.rows; +} + +void scale_matrix(matrix m, float scale) +{ + int i,j; + for(i = 0; i < m.rows; ++i){ + for(j = 0; j < m.cols; ++j){ + m.vals[i][j] *= scale; + } + } +} + +matrix resize_matrix(matrix m, int size) +{ + int i; + if (m.rows == size) return m; + if (m.rows < size) { + m.vals = realloc(m.vals, size*sizeof(float*)); + for (i = m.rows; i < size; ++i) { + m.vals[i] = calloc(m.cols, sizeof(float)); + } + } else if (m.rows > size) { + for (i = size; i < m.rows; ++i) { + free(m.vals[i]); + } + m.vals = realloc(m.vals, size*sizeof(float*)); + } + m.rows = size; + return m; +} + +void matrix_add_matrix(matrix from, matrix to) +{ + assert(from.rows == to.rows && from.cols == to.cols); + int i,j; + for(i = 0; i < from.rows; ++i){ + for(j = 0; j < from.cols; ++j){ + to.vals[i][j] += from.vals[i][j]; + } + } +} + +matrix copy_matrix(matrix m) +{ + matrix c = {0}; + c.rows = m.rows; + c.cols = m.cols; + c.vals = calloc(c.rows, sizeof(float *)); + int i; + for(i = 0; i < c.rows; ++i){ + c.vals[i] = calloc(c.cols, sizeof(float)); + copy_cpu(c.cols, m.vals[i], 1, c.vals[i], 1); + } + return c; +} + +matrix make_matrix(int rows, int cols) +{ + int i; + matrix m; + m.rows = rows; + m.cols = cols; + m.vals = calloc(m.rows, sizeof(float *)); + for(i = 0; i < m.rows; ++i){ + m.vals[i] = calloc(m.cols, sizeof(float)); + } + return m; +} + +matrix hold_out_matrix(matrix *m, int n) +{ + int i; + matrix h; + h.rows = n; + h.cols = m->cols; + h.vals = calloc(h.rows, sizeof(float *)); + for(i = 0; i < n; ++i){ + int index = rand()%m->rows; + h.vals[i] = m->vals[index]; + m->vals[index] = m->vals[--(m->rows)]; + } + return h; +} + +float *pop_column(matrix *m, int c) +{ + float *col = calloc(m->rows, sizeof(float)); + int i, j; + for(i = 0; i < m->rows; ++i){ + col[i] = m->vals[i][c]; + for(j = c; j < m->cols-1; ++j){ + m->vals[i][j] = m->vals[i][j+1]; + } + } + --m->cols; + return col; +} + +matrix csv_to_matrix(char *filename) +{ + FILE *fp = fopen(filename, "r"); + if(!fp) file_error(filename); + + matrix m; + m.cols = -1; + + char *line; + + int n = 0; + int size = 1024; + m.vals = calloc(size, sizeof(float*)); + while((line = fgetl(fp))){ + if(m.cols == -1) m.cols = count_fields(line); + if(n == size){ + size *= 2; + m.vals = realloc(m.vals, size*sizeof(float*)); + } + m.vals[n] = parse_fields(line, m.cols); + free(line); + ++n; + } + m.vals = realloc(m.vals, n*sizeof(float*)); + m.rows = n; + return m; +} + +void matrix_to_csv(matrix m) +{ + int i, j; + + for(i = 0; i < m.rows; ++i){ + for(j = 0; j < m.cols; ++j){ + if(j > 0) printf(","); + printf("%.17g", m.vals[i][j]); + } + printf("\n"); + } +} + +void print_matrix(matrix m) +{ + int i, j; + printf("%d X %d Matrix:\n",m.rows, m.cols); + printf(" __"); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf("__ \n"); + + printf("| "); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf(" |\n"); + + for(i = 0; i < m.rows; ++i){ + printf("| "); + for(j = 0; j < m.cols; ++j){ + printf("%15.7f ", m.vals[i][j]); + } + printf(" |\n"); + } + printf("|__"); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf("__|\n"); +} diff --git a/workloads/realworld/pinned/darknet/src/matrix.h b/workloads/realworld/pinned/darknet/src/matrix.h new file mode 100644 index 0000000000000000000000000000000000000000..879acd70d26c084931b30067ddcc77057068e58c --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/matrix.h @@ -0,0 +1,13 @@ +#ifndef MATRIX_H +#define MATRIX_H +#include "darknet.h" + +matrix copy_matrix(matrix m); +void print_matrix(matrix m); + +matrix hold_out_matrix(matrix *m, int n); +matrix resize_matrix(matrix m, int size); + +float *pop_column(matrix *m, int c); + +#endif diff --git a/workloads/realworld/pinned/darknet/src/maxpool_layer.c b/workloads/realworld/pinned/darknet/src/maxpool_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..db0796aa15a7ffcf60b6855aa01d7b9e3e6a5092 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/maxpool_layer.c @@ -0,0 +1,130 @@ +#include "maxpool_layer.h" +#include "cuda_dark.h" +#include + +image get_maxpool_image(maxpool_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.c; + return float_to_image(w,h,c,l.output); +} + +image get_maxpool_delta(maxpool_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.c; + return float_to_image(w,h,c,l.delta); +} + +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding) +{ + maxpool_layer l = {0}; + l.type = MAXPOOL; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.pad = padding; + l.out_w = (w + padding - size)/stride + 1; + l.out_h = (h + padding - size)/stride + 1; + l.out_c = c; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = h*w*c; + l.size = size; + l.stride = stride; + int output_size = l.out_h * l.out_w * l.out_c * batch; + // l.indexes = calloc(output_size, sizeof(int)); + // l.output = calloc(output_size, sizeof(float)); + // l.delta = calloc(output_size, sizeof(float)); + cudaMallocHost(&l.indexes, output_size*sizeof(float)); + cudaMallocHost(&l.output, output_size*sizeof(float)); + cudaMallocHost(&l.delta, output_size*sizeof(float)); + l.forward = forward_maxpool_layer; + l.backward = backward_maxpool_layer; + #ifdef GPU + l.forward_gpu = forward_maxpool_layer_gpu; + l.backward_gpu = backward_maxpool_layer_gpu; + l.indexes_gpu = cuda_make_int_array(0, output_size); + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); + #endif + fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); + return l; +} + +void resize_maxpool_layer(maxpool_layer *l, int w, int h) +{ + l->h = h; + l->w = w; + l->inputs = h*w*l->c; + + l->out_w = (w + l->pad - l->size)/l->stride + 1; + l->out_h = (h + l->pad - l->size)/l->stride + 1; + l->outputs = l->out_w * l->out_h * l->c; + int output_size = l->outputs * l->batch; + + l->indexes = realloc(l->indexes, output_size * sizeof(int)); + l->output = realloc(l->output, output_size * sizeof(float)); + l->delta = realloc(l->delta, output_size * sizeof(float)); + + #ifdef GPU + cuda_free((float *)l->indexes_gpu); + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->indexes_gpu = cuda_make_int_array(0, output_size); + l->output_gpu = cuda_make_array(l->output, output_size); + l->delta_gpu = cuda_make_array(l->delta, output_size); + #endif +} + +void forward_maxpool_layer(const maxpool_layer l, network net) +{ + int b,i,j,k,m,n; + int w_offset = -l.pad/2; + int h_offset = -l.pad/2; + + int h = l.out_h; + int w = l.out_w; + int c = l.c; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < c; ++k){ + for(i = 0; i < h; ++i){ + for(j = 0; j < w; ++j){ + int out_index = j + w*(i + h*(k + c*b)); + float max = -FLT_MAX; + int max_i = -1; + for(n = 0; n < l.size; ++n){ + for(m = 0; m < l.size; ++m){ + int cur_h = h_offset + i*l.stride + n; + int cur_w = w_offset + j*l.stride + m; + int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c)); + int valid = (cur_h >= 0 && cur_h < l.h && + cur_w >= 0 && cur_w < l.w); + float val = (valid != 0) ? net.input[index] : -FLT_MAX; + max_i = (val > max) ? index : max_i; + max = (val > max) ? val : max; + } + } + l.output[out_index] = max; + l.indexes[out_index] = max_i; + } + } + } + } +} + +void backward_maxpool_layer(const maxpool_layer l, network net) +{ + int i; + int h = l.out_h; + int w = l.out_w; + int c = l.c; + for(i = 0; i < h*w*c*l.batch; ++i){ + int index = l.indexes[i]; + net.delta[index] += l.delta[i]; + } +} + diff --git a/workloads/realworld/pinned/darknet/src/maxpool_layer.h b/workloads/realworld/pinned/darknet/src/maxpool_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..f01adb957e8bd8ce01a06e5a1ff14a988ae07149 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/maxpool_layer.h @@ -0,0 +1,23 @@ +#ifndef MAXPOOL_LAYER_H +#define MAXPOOL_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +typedef layer maxpool_layer; + +image get_maxpool_image(maxpool_layer l); +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding); +void resize_maxpool_layer(maxpool_layer *l, int w, int h); +void forward_maxpool_layer(const maxpool_layer l, network net); +void backward_maxpool_layer(const maxpool_layer l, network net); + +#ifdef GPU +void forward_maxpool_layer_gpu(maxpool_layer l, network net); +void backward_maxpool_layer_gpu(maxpool_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/pinned/darknet/src/maxpool_layer_kernels.cu b/workloads/realworld/pinned/darknet/src/maxpool_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..23302f8295682c5d9112fb12a7f63cd47a82954b --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/maxpool_layer_kernels.cu @@ -0,0 +1,106 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "maxpool_layer.h" +#include "cuda_dark.h" +} + +__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) +{ + int h = (in_h + pad - size)/stride + 1; + int w = (in_w + pad - size)/stride + 1; + int c = in_c; + + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int j = id % w; + id /= w; + int i = id % h; + id /= h; + int k = id % c; + id /= c; + int b = id; + + int w_offset = -pad/2; + int h_offset = -pad/2; + + int out_index = j + w*(i + h*(k + c*b)); + float max = -INFINITY; + int max_i = -1; + int l, m; + for(l = 0; l < size; ++l){ + for(m = 0; m < size; ++m){ + int cur_h = h_offset + i*stride + l; + int cur_w = w_offset + j*stride + m; + int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); + int valid = (cur_h >= 0 && cur_h < in_h && + cur_w >= 0 && cur_w < in_w); + float val = (valid != 0) ? input[index] : -INFINITY; + max_i = (val > max) ? index : max_i; + max = (val > max) ? val : max; + } + } + output[out_index] = max; + indexes[out_index] = max_i; +} + +__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes) +{ + int h = (in_h + pad - size)/stride + 1; + int w = (in_w + pad - size)/stride + 1; + int c = in_c; + int area = (size-1)/stride; + + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int index = id; + int j = id % in_w; + id /= in_w; + int i = id % in_h; + id /= in_h; + int k = id % in_c; + id /= in_c; + int b = id; + + int w_offset = -pad/2; + int h_offset = -pad/2; + + float d = 0; + int l, m; + for(l = -area; l < area+1; ++l){ + for(m = -area; m < area+1; ++m){ + int out_w = (j-w_offset)/stride + m; + int out_h = (i-h_offset)/stride + l; + int out_index = out_w + w*(out_h + h*(k + c*b)); + int valid = (out_w >= 0 && out_w < w && + out_h >= 0 && out_h < h); + d += (valid && indexes[out_index] == index) ? delta[out_index] : 0; + } + } + prev_delta[index] += d; +} + +extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network net) +{ + int h = layer.out_h; + int w = layer.out_w; + int c = layer.c; + + size_t n = h*w*c*layer.batch; + + forward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, net.input_gpu, layer.output_gpu, layer.indexes_gpu); + check_error(cudaPeekAtLastError()); +} + +extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network net) +{ + size_t n = layer.h*layer.w*layer.c*layer.batch; + + backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, net.delta_gpu, layer.indexes_gpu); + check_error(cudaPeekAtLastError()); +} + diff --git a/workloads/realworld/pinned/darknet/src/network.c b/workloads/realworld/pinned/darknet/src/network.c new file mode 100644 index 0000000000000000000000000000000000000000..aaab7997b5ee7da829289fa153f942a066b43d8c --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/network.c @@ -0,0 +1,1129 @@ +#include +#include +#include +#include "network.h" +#include "image.h" +#include "data.h" +#include "utils.h" +#include "blas.h" + +#include "crop_layer.h" +#include "connected_layer.h" +#include "gru_layer.h" +#include "rnn_layer.h" +#include "crnn_layer.h" +#include "local_layer.h" +#include "convolutional_layer.h" +#include "activation_layer.h" +#include "detection_layer.h" +#include "region_layer.h" +#include "yolo_layer.h" +#include "normalization_layer.h" +#include "batchnorm_layer.h" +#include "maxpool_layer.h" +#include "reorg_layer.h" +#include "avgpool_layer.h" +#include "cost_layer.h" +#include "softmax_layer.h" +#include "dropout_layer.h" +#include "route_layer.h" +#include "upsample_layer.h" +#include "shortcut_layer.h" +#include "parser.h" +#include "data.h" + +load_args get_base_args(network *net) +{ + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.size = net->w; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.center = net->center; + args.saturation = net->saturation; + args.hue = net->hue; + return args; +} + +network *load_network(char *cfg, char *weights, int clear) +{ + network *net = parse_network_cfg(cfg); + if(weights && weights[0] != 0){ + load_weights(net, weights); + } + if(clear) (*net->seen) = 0; + return net; +} + +size_t get_current_batch(network *net) +{ + size_t batch_num = (*net->seen)/(net->batch*net->subdivisions); + return batch_num; +} + +void reset_network_state(network *net, int b) +{ + int i; + for (i = 0; i < net->n; ++i) { + #ifdef GPU + layer l = net->layers[i]; + if(l.state_gpu){ + fill_gpu(l.outputs, 0, l.state_gpu + l.outputs*b, 1); + } + if(l.h_gpu){ + fill_gpu(l.outputs, 0, l.h_gpu + l.outputs*b, 1); + } + #endif + } +} + +void reset_rnn(network *net) +{ + reset_network_state(net, 0); +} + +float get_current_rate(network *net) +{ + size_t batch_num = get_current_batch(net); + int i; + float rate; + if (batch_num < net->burn_in) return net->learning_rate * pow((float)batch_num / net->burn_in, net->power); + switch (net->policy) { + case CONSTANT: + return net->learning_rate; + case STEP: + return net->learning_rate * pow(net->scale, batch_num/net->step); + case STEPS: + rate = net->learning_rate; + for(i = 0; i < net->num_steps; ++i){ + if(net->steps[i] > batch_num) return rate; + rate *= net->scales[i]; + } + return rate; + case EXP: + return net->learning_rate * pow(net->gamma, batch_num); + case POLY: + return net->learning_rate * pow(1 - (float)batch_num / net->max_batches, net->power); + case RANDOM: + return net->learning_rate * pow(rand_uniform(0,1), net->power); + case SIG: + return net->learning_rate * (1./(1.+exp(net->gamma*(batch_num - net->step)))); + default: + fprintf(stderr, "Policy is weird!\n"); + return net->learning_rate; + } +} + +char *get_layer_string(LAYER_TYPE a) +{ + switch(a){ + case CONVOLUTIONAL: + return "convolutional"; + case ACTIVE: + return "activation"; + case LOCAL: + return "local"; + case DECONVOLUTIONAL: + return "deconvolutional"; + case CONNECTED: + return "connected"; + case RNN: + return "rnn"; + case GRU: + return "gru"; + case LSTM: + return "lstm"; + case CRNN: + return "crnn"; + case MAXPOOL: + return "maxpool"; + case REORG: + return "reorg"; + case AVGPOOL: + return "avgpool"; + case SOFTMAX: + return "softmax"; + case DETECTION: + return "detection"; + case REGION: + return "region"; + case YOLO: + return "yolo"; + case DROPOUT: + return "dropout"; + case CROP: + return "crop"; + case COST: + return "cost"; + case ROUTE: + return "route"; + case SHORTCUT: + return "shortcut"; + case NORMALIZATION: + return "normalization"; + case BATCHNORM: + return "batchnorm"; + default: + break; + } + return "none"; +} + +network *make_network(int n) +{ + network *net = calloc(1, sizeof(network)); + net->n = n; + net->layers = calloc(net->n, sizeof(layer)); + net->seen = calloc(1, sizeof(size_t)); + net->t = calloc(1, sizeof(int)); + net->cost = calloc(1, sizeof(float)); + return net; +} + +void forward_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + forward_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + for(i = 0; i < net.n; ++i){ + net.index = i; + layer l = net.layers[i]; + if(l.delta){ + fill_cpu(l.outputs * l.batch, 0, l.delta, 1); + } + l.forward(l, net); + net.input = l.output; + if(l.truth) { + net.truth = l.output; + } + } + calc_network_cost(netp); +} + +void update_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + update_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + update_args a = {0}; + a.batch = net.batch*net.subdivisions; + a.learning_rate = get_current_rate(netp); + a.momentum = net.momentum; + a.decay = net.decay; + a.adam = net.adam; + a.B1 = net.B1; + a.B2 = net.B2; + a.eps = net.eps; + ++*net.t; + a.t = *net.t; + + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.update){ + l.update(l, a); + } + } +} + +void calc_network_cost(network *netp) +{ + network net = *netp; + int i; + float sum = 0; + int count = 0; + for(i = 0; i < net.n; ++i){ + if(net.layers[i].cost){ + sum += net.layers[i].cost[0]; + ++count; + } + } + *net.cost = sum/count; +} + +int get_predicted_class_network(network *net) +{ + return max_index(net->output, net->outputs); +} + +void backward_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + backward_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + network orig = net; + for(i = net.n-1; i >= 0; --i){ + layer l = net.layers[i]; + if(l.stopbackward) break; + if(i == 0){ + net = orig; + }else{ + layer prev = net.layers[i-1]; + net.input = prev.output; + net.delta = prev.delta; + } + net.index = i; + l.backward(l, net); + } +} + +float train_network_datum(network *net) +{ + *net->seen += net->batch; + net->train = 1; + forward_network(net); + backward_network(net); + float error = *net->cost; + if(((*net->seen)/net->batch)%net->subdivisions == 0) update_network(net); + return error; +} + +float train_network_sgd(network *net, data d, int n) +{ + int batch = net->batch; + + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + get_random_batch(d, batch, net->input, net->truth); + float err = train_network_datum(net); + sum += err; + } + return (float)sum/(n*batch); +} + +float train_network(network *net, data d) +{ + assert(d.X.rows % net->batch == 0); + int batch = net->batch; + int n = d.X.rows / batch; + + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + get_next_batch(d, batch, i*batch, net->input, net->truth); + float err = train_network_datum(net); + sum += err; + } + return (float)sum/(n*batch); +} + +void set_temp_network(network *net, float t) +{ + int i; + for(i = 0; i < net->n; ++i){ + net->layers[i].temperature = t; + } +} + + +void set_batch_network(network *net, int b) +{ + net->batch = b; + int i; + for(i = 0; i < net->n; ++i){ + net->layers[i].batch = b; +#ifdef CUDNN + if(net->layers[i].type == CONVOLUTIONAL){ + cudnn_convolutional_setup(net->layers + i); + } + if(net->layers[i].type == DECONVOLUTIONAL){ + layer *l = net->layers + i; + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + } +#endif + } +} + +int resize_network(network *net, int w, int h) +{ +#ifdef GPU + cuda_set_device(net->gpu_index); + cuda_free(net->workspace); +#endif + int i; + //if(w == net->w && h == net->h) return 0; + net->w = w; + net->h = h; + int inputs = 0; + size_t workspace_size = 0; + //fprintf(stderr, "Resizing to %d x %d...\n", w, h); + //fflush(stderr); + for (i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + resize_convolutional_layer(&l, w, h); + }else if(l.type == CROP){ + resize_crop_layer(&l, w, h); + }else if(l.type == MAXPOOL){ + resize_maxpool_layer(&l, w, h); + }else if(l.type == REGION){ + resize_region_layer(&l, w, h); + }else if(l.type == YOLO){ + resize_yolo_layer(&l, w, h); + }else if(l.type == ROUTE){ + resize_route_layer(&l, net); + }else if(l.type == SHORTCUT){ + resize_shortcut_layer(&l, w, h); + }else if(l.type == UPSAMPLE){ + resize_upsample_layer(&l, w, h); + }else if(l.type == REORG){ + resize_reorg_layer(&l, w, h); + }else if(l.type == AVGPOOL){ + resize_avgpool_layer(&l, w, h); + }else if(l.type == NORMALIZATION){ + resize_normalization_layer(&l, w, h); + }else if(l.type == COST){ + resize_cost_layer(&l, inputs); + }else{ + error("Cannot resize this type of layer"); + } + if(l.workspace_size > workspace_size) workspace_size = l.workspace_size; + if(l.workspace_size > 2000000000) assert(0); + inputs = l.outputs; + net->layers[i] = l; + w = l.out_w; + h = l.out_h; + if(l.type == AVGPOOL) break; + } + layer out = get_network_output_layer(net); + net->inputs = net->layers[0].inputs; + net->outputs = out.outputs; + net->truths = out.outputs; + if(net->layers[net->n-1].truths) net->truths = net->layers[net->n-1].truths; + net->output = out.output; + free(net->input); + free(net->truth); + net->input = calloc(net->inputs*net->batch, sizeof(float)); + net->truth = calloc(net->truths*net->batch, sizeof(float)); +#ifdef GPU + if(gpu_index >= 0){ + cuda_free(net->input_gpu); + cuda_free(net->truth_gpu); + net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch); + net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch); + if(workspace_size){ + net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1); + } + }else { + free(net->workspace); + net->workspace = calloc(1, workspace_size); + } +#else + free(net->workspace); + net->workspace = calloc(1, workspace_size); +#endif + //fprintf(stderr, " Done!\n"); + return 0; +} + +layer get_network_detection_layer(network *net) +{ + int i; + for(i = 0; i < net->n; ++i){ + if(net->layers[i].type == DETECTION){ + return net->layers[i]; + } + } + fprintf(stderr, "Detection layer not found!!\n"); + layer l = {0}; + return l; +} + +image get_network_image_layer(network *net, int i) +{ + layer l = net->layers[i]; +#ifdef GPU + //cuda_pull_array(l.output_gpu, l.output, l.outputs); +#endif + if (l.out_w && l.out_h && l.out_c){ + return float_to_image(l.out_w, l.out_h, l.out_c, l.output); + } + image def = {0}; + return def; +} + +image get_network_image(network *net) +{ + int i; + for(i = net->n-1; i >= 0; --i){ + image m = get_network_image_layer(net, i); + if(m.h != 0) return m; + } + image def = {0}; + return def; +} + +void visualize_network(network *net) +{ + image *prev = 0; + int i; + char buff[256]; + for(i = 0; i < net->n; ++i){ + sprintf(buff, "Layer %d", i); + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + prev = visualize_convolutional_layer(l, buff, prev); + } + } +} + +void top_predictions(network *net, int k, int *index) +{ + top_k(net->output, net->outputs, k, index); +} + + +float *network_predict(network *net, float *input) +{ + network orig = *net; + net->input = input; + net->truth = 0; + net->train = 0; + net->delta = 0; + forward_network(net); + float *out = net->output; + *net = orig; + return out; +} + +int num_detections(network *net, float thresh) +{ + int i; + int s = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO){ + s += yolo_num_detections(l, thresh); + } + if(l.type == DETECTION || l.type == REGION){ + s += l.w*l.h*l.n; + } + } + return s; +} + +detection *make_network_boxes(network *net, float thresh, int *num) +{ + layer l = net->layers[net->n - 1]; + int i; + int nboxes = num_detections(net, thresh); + if(num) *num = nboxes; + detection *dets = calloc(nboxes, sizeof(detection)); + for(i = 0; i < nboxes; ++i){ + dets[i].prob = calloc(l.classes, sizeof(float)); + if(l.coords > 4){ + dets[i].mask = calloc(l.coords-4, sizeof(float)); + } + } + return dets; +} + +void fill_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets) +{ + int j; + for(j = 0; j < net->n; ++j){ + layer l = net->layers[j]; + if(l.type == YOLO){ + int count = get_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets); + dets += count; + } + if(l.type == REGION){ + get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets); + dets += l.w*l.h*l.n; + } + if(l.type == DETECTION){ + get_detection_detections(l, w, h, thresh, dets); + dets += l.w*l.h*l.n; + } + } +} + +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num) +{ + detection *dets = make_network_boxes(net, thresh, num); + fill_network_boxes(net, w, h, thresh, hier, map, relative, dets); + return dets; +} + +void free_detections(detection *dets, int n) +{ + int i; + for(i = 0; i < n; ++i){ + free(dets[i].prob); + if(dets[i].mask) free(dets[i].mask); + } + free(dets); +} + +float *network_predict_image(network *net, image im) +{ + image imr = letterbox_image(im, net->w, net->h); + set_batch_network(net, 1); + float *p = network_predict(net, imr.data); + free_image(imr); + return p; +} + +int network_width(network *net){return net->w;} +int network_height(network *net){return net->h;} + +matrix network_predict_data_multi(network *net, data test, int n) +{ + int i,j,b,m; + int k = net->outputs; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.rows, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + } + for(m = 0; m < n; ++m){ + float *out = network_predict(net, X); + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + for(j = 0; j < k; ++j){ + pred.vals[i+b][j] += out[j+b*k]/n; + } + } + } + } + free(X); + return pred; +} + +matrix network_predict_data(network *net, data test) +{ + int i,j,b; + int k = net->outputs; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.cols, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + } + float *out = network_predict(net, X); + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + for(j = 0; j < k; ++j){ + pred.vals[i+b][j] = out[j+b*k]; + } + } + } + free(X); + return pred; +} + +void print_network(network *net) +{ + int i,j; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + float *output = l.output; + int n = l.outputs; + float mean = mean_array(output, n); + float vari = variance_array(output, n); + fprintf(stderr, "Layer %d - Mean: %f, Variance: %f\n",i,mean, vari); + if(n > 100) n = 100; + for(j = 0; j < n; ++j) fprintf(stderr, "%f, ", output[j]); + if(n == 100)fprintf(stderr,".....\n"); + fprintf(stderr, "\n"); + } +} + +void compare_networks(network *n1, network *n2, data test) +{ + matrix g1 = network_predict_data(n1, test); + matrix g2 = network_predict_data(n2, test); + int i; + int a,b,c,d; + a = b = c = d = 0; + for(i = 0; i < g1.rows; ++i){ + int truth = max_index(test.y.vals[i], test.y.cols); + int p1 = max_index(g1.vals[i], g1.cols); + int p2 = max_index(g2.vals[i], g2.cols); + if(p1 == truth){ + if(p2 == truth) ++d; + else ++c; + }else{ + if(p2 == truth) ++b; + else ++a; + } + } + printf("%5d %5d\n%5d %5d\n", a, b, c, d); + float num = pow((abs(b - c) - 1.), 2.); + float den = b + c; + printf("%f\n", num/den); +} + +float network_accuracy(network *net, data d) +{ + matrix guess = network_predict_data(net, d); + float acc = matrix_topk_accuracy(d.y, guess,1); + free_matrix(guess); + return acc; +} + +float *network_accuracies(network *net, data d, int n) +{ + static float acc[2]; + matrix guess = network_predict_data(net, d); + acc[0] = matrix_topk_accuracy(d.y, guess, 1); + acc[1] = matrix_topk_accuracy(d.y, guess, n); + free_matrix(guess); + return acc; +} + +layer get_network_output_layer(network *net) +{ + int i; + for(i = net->n - 1; i >= 0; --i){ + if(net->layers[i].type != COST) break; + } + return net->layers[i]; +} + +float network_accuracy_multi(network *net, data d, int n) +{ + matrix guess = network_predict_data_multi(net, d, n); + float acc = matrix_topk_accuracy(d.y, guess,1); + free_matrix(guess); + return acc; +} + +void free_network(network *net) +{ + int i; + for(i = 0; i < net->n; ++i){ + free_layer(net->layers[i]); + } + free(net->layers); + if(net->input) free(net->input); + if(net->truth) free(net->truth); +#ifdef GPU + if(net->input_gpu) cuda_free(net->input_gpu); + if(net->truth_gpu) cuda_free(net->truth_gpu); +#endif + free(net); +} + +// Some day... +// ^ What the hell is this comment for? + + +layer network_output_layer(network *net) +{ + int i; + for(i = net->n - 1; i >= 0; --i){ + if(net->layers[i].type != COST) break; + } + return net->layers[i]; +} + +int network_inputs(network *net) +{ + return net->layers[0].inputs; +} + +int network_outputs(network *net) +{ + return network_output_layer(net).outputs; +} + +float *network_output(network *net) +{ + return network_output_layer(net).output; +} + +#ifdef GPU + +void forward_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + cuda_push_array(net.input_gpu, net.input, net.inputs*net.batch); + if(net.truth){ + cuda_push_array(net.truth_gpu, net.truth, net.truths*net.batch); + } + + int i; + for(i = 0; i < net.n; ++i){ + net.index = i; + layer l = net.layers[i]; + if(l.delta_gpu){ + fill_gpu(l.outputs * l.batch, 0, l.delta_gpu, 1); + } + l.forward_gpu(l, net); + net.input_gpu = l.output_gpu; + net.input = l.output; + if(l.truth) { + net.truth_gpu = l.output_gpu; + net.truth = l.output; + } + } + pull_network_output(netp); + calc_network_cost(netp); +} + +void backward_network_gpu(network *netp) +{ + int i; + network net = *netp; + network orig = net; + cuda_set_device(net.gpu_index); + for(i = net.n-1; i >= 0; --i){ + layer l = net.layers[i]; + if(l.stopbackward) break; + if(i == 0){ + net = orig; + }else{ + layer prev = net.layers[i-1]; + net.input = prev.output; + net.delta = prev.delta; + net.input_gpu = prev.output_gpu; + net.delta_gpu = prev.delta_gpu; + } + net.index = i; + l.backward_gpu(l, net); + } +} + +void update_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + int i; + update_args a = {0}; + a.batch = net.batch*net.subdivisions; + a.learning_rate = get_current_rate(netp); + a.momentum = net.momentum; + a.decay = net.decay; + a.adam = net.adam; + a.B1 = net.B1; + a.B2 = net.B2; + a.eps = net.eps; + ++*net.t; + a.t = (*net.t); + + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.update_gpu){ + l.update_gpu(l, a); + } + } +} + +void harmless_update_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + int i; + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.weight_updates_gpu) fill_gpu(l.nweights, 0, l.weight_updates_gpu, 1); + if(l.bias_updates_gpu) fill_gpu(l.nbiases, 0, l.bias_updates_gpu, 1); + if(l.scale_updates_gpu) fill_gpu(l.nbiases, 0, l.scale_updates_gpu, 1); + } +} + +typedef struct { + network *net; + data d; + float *err; +} train_args; + +void *train_thread(void *ptr) +{ + train_args args = *(train_args*)ptr; + free(ptr); + cuda_set_device(args.net->gpu_index); + *args.err = train_network(args.net, args.d); + return 0; +} + +pthread_t train_network_in_thread(network *net, data d, float *err) +{ + pthread_t thread; + train_args *ptr = (train_args *)calloc(1, sizeof(train_args)); + ptr->net = net; + ptr->d = d; + ptr->err = err; + if(pthread_create(&thread, 0, train_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void merge_weights(layer l, layer base) +{ + if (l.type == CONVOLUTIONAL) { + axpy_cpu(l.n, 1, l.bias_updates, 1, base.biases, 1); + axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weights, 1); + if (l.scales) { + axpy_cpu(l.n, 1, l.scale_updates, 1, base.scales, 1); + } + } else if(l.type == CONNECTED) { + axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.biases, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weights, 1); + } +} + +void scale_weights(layer l, float s) +{ + if (l.type == CONVOLUTIONAL) { + scal_cpu(l.n, s, l.biases, 1); + scal_cpu(l.nweights, s, l.weights, 1); + if (l.scales) { + scal_cpu(l.n, s, l.scales, 1); + } + } else if(l.type == CONNECTED) { + scal_cpu(l.outputs, s, l.biases, 1); + scal_cpu(l.outputs*l.inputs, s, l.weights, 1); + } +} + + +void pull_weights(layer l) +{ + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_pull_array(l.biases_gpu, l.bias_updates, l.n); + cuda_pull_array(l.weights_gpu, l.weight_updates, l.nweights); + if(l.scales) cuda_pull_array(l.scales_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_pull_array(l.biases_gpu, l.bias_updates, l.outputs); + cuda_pull_array(l.weights_gpu, l.weight_updates, l.outputs*l.inputs); + } +} + +void push_weights(layer l) +{ + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weights_gpu, l.weights, l.nweights); + if(l.scales) cuda_push_array(l.scales_gpu, l.scales, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.biases_gpu, l.biases, l.outputs); + cuda_push_array(l.weights_gpu, l.weights, l.outputs*l.inputs); + } +} + +void distribute_weights(layer l, layer base) +{ + if (l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL) { + cuda_push_array(l.biases_gpu, base.biases, l.n); + cuda_push_array(l.weights_gpu, base.weights, l.nweights); + if (base.scales) cuda_push_array(l.scales_gpu, base.scales, l.n); + } else if (l.type == CONNECTED) { + cuda_push_array(l.biases_gpu, base.biases, l.outputs); + cuda_push_array(l.weights_gpu, base.weights, l.outputs*l.inputs); + } +} + + +/* + + void pull_updates(layer l) + { + if(l.type == CONVOLUTIONAL){ + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + if(l.scale_updates) cuda_pull_array(l.scale_updates_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs); + } + } + + void push_updates(layer l) + { + if(l.type == CONVOLUTIONAL){ + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + if(l.scale_updates) cuda_push_array(l.scale_updates_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs); + } + } + + void update_layer(layer l, network net) + { + int update_batch = net.batch*net.subdivisions; + float rate = get_current_rate(net); + l.t = get_current_batch(net); + if(l.update_gpu){ + l.update_gpu(l, update_batch, rate*l.learning_rate_scale, net.momentum, net.decay); + } + } + void merge_updates(layer l, layer base) + { + if (l.type == CONVOLUTIONAL) { + axpy_cpu(l.n, 1, l.bias_updates, 1, base.bias_updates, 1); + axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weight_updates, 1); + if (l.scale_updates) { + axpy_cpu(l.n, 1, l.scale_updates, 1, base.scale_updates, 1); + } + } else if(l.type == CONNECTED) { + axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.bias_updates, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weight_updates, 1); + } + } + + void distribute_updates(layer l, layer base) + { + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.n); + cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.nweights); + if(base.scale_updates) cuda_push_array(l.scale_updates_gpu, base.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.outputs); + cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.outputs*l.inputs); + } + } + */ + +/* + void sync_layer(network *nets, int n, int j) + { + int i; + network net = nets[0]; + layer base = net.layers[j]; + scale_weights(base, 0); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i].gpu_index); + layer l = nets[i].layers[j]; + pull_weights(l); + merge_weights(l, base); + } + scale_weights(base, 1./n); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i].gpu_index); + layer l = nets[i].layers[j]; + distribute_weights(l, base); + } + } + */ + +void sync_layer(network **nets, int n, int j) +{ + int i; + network *net = nets[0]; + layer base = net->layers[j]; + scale_weights(base, 0); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i]->gpu_index); + layer l = nets[i]->layers[j]; + pull_weights(l); + merge_weights(l, base); + } + scale_weights(base, 1./n); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i]->gpu_index); + layer l = nets[i]->layers[j]; + distribute_weights(l, base); + } +} + +typedef struct{ + network **nets; + int n; + int j; +} sync_args; + +void *sync_layer_thread(void *ptr) +{ + sync_args args = *(sync_args*)ptr; + sync_layer(args.nets, args.n, args.j); + free(ptr); + return 0; +} + +pthread_t sync_layer_in_thread(network **nets, int n, int j) +{ + pthread_t thread; + sync_args *ptr = (sync_args *)calloc(1, sizeof(sync_args)); + ptr->nets = nets; + ptr->n = n; + ptr->j = j; + if(pthread_create(&thread, 0, sync_layer_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void sync_nets(network **nets, int n, int interval) +{ + int j; + int layers = nets[0]->n; + pthread_t *threads = (pthread_t *) calloc(layers, sizeof(pthread_t)); + + *(nets[0]->seen) += interval * (n-1) * nets[0]->batch * nets[0]->subdivisions; + for (j = 0; j < n; ++j){ + *(nets[j]->seen) = *(nets[0]->seen); + } + for (j = 0; j < layers; ++j) { + threads[j] = sync_layer_in_thread(nets, n, j); + } + for (j = 0; j < layers; ++j) { + pthread_join(threads[j], 0); + } + free(threads); +} + +float train_networks(network **nets, int n, data d, int interval) +{ + int i; + int batch = nets[0]->batch; + int subdivisions = nets[0]->subdivisions; + assert(batch * subdivisions * n == d.X.rows); + pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t)); + float *errors = (float *) calloc(n, sizeof(float)); + + float sum = 0; + for(i = 0; i < n; ++i){ + data p = get_data_part(d, i, n); + threads[i] = train_network_in_thread(nets[i], p, errors + i); + } + for(i = 0; i < n; ++i){ + pthread_join(threads[i], 0); + //printf("%f\n", errors[i]); + sum += errors[i]; + } + //cudaDeviceSynchronize(); + if (get_current_batch(nets[0]) % interval == 0) { + printf("Syncing... "); + fflush(stdout); + sync_nets(nets, n, interval); + printf("Done!\n"); + } + //cudaDeviceSynchronize(); + free(threads); + free(errors); + return (float)sum/(n); +} + +void pull_network_output(network *net) +{ + layer l = get_network_output_layer(net); + cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); +} + +#endif diff --git a/workloads/realworld/pinned/darknet/src/network.h b/workloads/realworld/pinned/darknet/src/network.h new file mode 100644 index 0000000000000000000000000000000000000000..1b0dfd1aaa3e090c6ce276d26f24d127de2cb66d --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/network.h @@ -0,0 +1,29 @@ +// Oh boy, why am I about to do this.... +#ifndef NETWORK_H +#define NETWORK_H +#include "darknet.h" + +#include "image.h" +#include "layer.h" +#include "data.h" +#include "tree.h" + + +#ifdef GPU +void pull_network_output(network *net); +#endif + +void compare_networks(network *n1, network *n2, data d); +char *get_layer_string(LAYER_TYPE a); + +network *make_network(int n); + + +float network_accuracy_multi(network *net, data d, int n); +int get_predicted_class_network(network *net); +void print_network(network *net); +int resize_network(network *net, int w, int h); +void calc_network_cost(network *net); + +#endif + diff --git a/workloads/realworld/pinned/darknet/src/normalization_layer.c b/workloads/realworld/pinned/darknet/src/normalization_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..424714fe8653f79b57fd4cde625997749d8eff83 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/normalization_layer.c @@ -0,0 +1,151 @@ +#include "normalization_layer.h" +#include "blas.h" + +#include + +layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa) +{ + fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size); + layer layer = {0}; + layer.type = NORMALIZATION; + layer.batch = batch; + layer.h = layer.out_h = h; + layer.w = layer.out_w = w; + layer.c = layer.out_c = c; + layer.kappa = kappa; + layer.size = size; + layer.alpha = alpha; + layer.beta = beta; + layer.output = calloc(h * w * c * batch, sizeof(float)); + layer.delta = calloc(h * w * c * batch, sizeof(float)); + layer.squared = calloc(h * w * c * batch, sizeof(float)); + layer.norms = calloc(h * w * c * batch, sizeof(float)); + layer.inputs = w*h*c; + layer.outputs = layer.inputs; + + layer.forward = forward_normalization_layer; + layer.backward = backward_normalization_layer; + #ifdef GPU + layer.forward_gpu = forward_normalization_layer_gpu; + layer.backward_gpu = backward_normalization_layer_gpu; + + layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch); + layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch); + layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch); + layer.norms_gpu = cuda_make_array(layer.norms, h * w * c * batch); + #endif + return layer; +} + +void resize_normalization_layer(layer *layer, int w, int h) +{ + int c = layer->c; + int batch = layer->batch; + layer->h = h; + layer->w = w; + layer->out_h = h; + layer->out_w = w; + layer->inputs = w*h*c; + layer->outputs = layer->inputs; + layer->output = realloc(layer->output, h * w * c * batch * sizeof(float)); + layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float)); + layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float)); + layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float)); +#ifdef GPU + cuda_free(layer->output_gpu); + cuda_free(layer->delta_gpu); + cuda_free(layer->squared_gpu); + cuda_free(layer->norms_gpu); + layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch); + layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch); + layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch); + layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch); +#endif +} + +void forward_normalization_layer(const layer layer, network net) +{ + int k,b; + int w = layer.w; + int h = layer.h; + int c = layer.c; + scal_cpu(w*h*c*layer.batch, 0, layer.squared, 1); + + for(b = 0; b < layer.batch; ++b){ + float *squared = layer.squared + w*h*c*b; + float *norms = layer.norms + w*h*c*b; + float *input = net.input + w*h*c*b; + pow_cpu(w*h*c, 2, input, 1, squared, 1); + + const_cpu(w*h, layer.kappa, norms, 1); + for(k = 0; k < layer.size/2; ++k){ + axpy_cpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); + } + + for(k = 1; k < layer.c; ++k){ + copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); + int prev = k - ((layer.size-1)/2) - 1; + int next = k + (layer.size/2); + if(prev >= 0) axpy_cpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); + if(next < layer.c) axpy_cpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); + } + } + pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1); + mul_cpu(w*h*c*layer.batch, net.input, 1, layer.output, 1); +} + +void backward_normalization_layer(const layer layer, network net) +{ + // TODO This is approximate ;-) + // Also this should add in to delta instead of overwritting. + + int w = layer.w; + int h = layer.h; + int c = layer.c; + pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, net.delta, 1); + mul_cpu(w*h*c*layer.batch, layer.delta, 1, net.delta, 1); +} + +#ifdef GPU +void forward_normalization_layer_gpu(const layer layer, network net) +{ + int k,b; + int w = layer.w; + int h = layer.h; + int c = layer.c; + scal_gpu(w*h*c*layer.batch, 0, layer.squared_gpu, 1); + + for(b = 0; b < layer.batch; ++b){ + float *squared = layer.squared_gpu + w*h*c*b; + float *norms = layer.norms_gpu + w*h*c*b; + float *input = net.input_gpu + w*h*c*b; + pow_gpu(w*h*c, 2, input, 1, squared, 1); + + const_gpu(w*h, layer.kappa, norms, 1); + for(k = 0; k < layer.size/2; ++k){ + axpy_gpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); + } + + for(k = 1; k < layer.c; ++k){ + copy_gpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); + int prev = k - ((layer.size-1)/2) - 1; + int next = k + (layer.size/2); + if(prev >= 0) axpy_gpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); + if(next < layer.c) axpy_gpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); + } + } + pow_gpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1); + mul_gpu(w*h*c*layer.batch, net.input_gpu, 1, layer.output_gpu, 1); +} + +void backward_normalization_layer_gpu(const layer layer, network net) +{ + // TODO This is approximate ;-) + + int w = layer.w; + int h = layer.h; + int c = layer.c; + pow_gpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, net.delta_gpu, 1); + mul_gpu(w*h*c*layer.batch, layer.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/pinned/darknet/src/normalization_layer.h b/workloads/realworld/pinned/darknet/src/normalization_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..665baa5066282335b6625618ce07c2fcc833d952 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/normalization_layer.h @@ -0,0 +1,19 @@ +#ifndef NORMALIZATION_LAYER_H +#define NORMALIZATION_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa); +void resize_normalization_layer(layer *layer, int h, int w); +void forward_normalization_layer(const layer layer, network net); +void backward_normalization_layer(const layer layer, network net); +void visualize_normalization_layer(layer layer, char *window); + +#ifdef GPU +void forward_normalization_layer_gpu(const layer layer, network net); +void backward_normalization_layer_gpu(const layer layer, network net); +#endif + +#endif diff --git a/workloads/realworld/pinned/darknet/src/option_list.c b/workloads/realworld/pinned/darknet/src/option_list.c new file mode 100644 index 0000000000000000000000000000000000000000..2f52781f8096fecc5e9d1db3cfbfa10685506b93 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/option_list.c @@ -0,0 +1,140 @@ +#include +#include +#include +#include "option_list.h" +#include "utils.h" + +list *read_data_cfg(char *filename) +{ + FILE *file = fopen(filename, "r"); + if(file == 0) file_error(filename); + char *line; + int nu = 0; + list *options = make_list(); + while((line=fgetl(file)) != 0){ + ++ nu; + strip(line); + switch(line[0]){ + case '\0': + case '#': + case ';': + free(line); + break; + default: + if(!read_option(line, options)){ + fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); + free(line); + } + break; + } + } + fclose(file); + return options; +} + +metadata get_metadata(char *file) +{ + metadata m = {0}; + list *options = read_data_cfg(file); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", 0); + if(!name_list) { + fprintf(stderr, "No names or labels found\n"); + } else { + m.names = get_labels(name_list); + } + m.classes = option_find_int(options, "classes", 2); + free_list(options); + return m; +} + +int read_option(char *s, list *options) +{ + size_t i; + size_t len = strlen(s); + char *val = 0; + for(i = 0; i < len; ++i){ + if(s[i] == '='){ + s[i] = '\0'; + val = s+i+1; + break; + } + } + if(i == len-1) return 0; + char *key = s; + option_insert(options, key, val); + return 1; +} + +void option_insert(list *l, char *key, char *val) +{ + kvp *p = malloc(sizeof(kvp)); + p->key = key; + p->val = val; + p->used = 0; + list_insert(l, p); +} + +void option_unused(list *l) +{ + node *n = l->front; + while(n){ + kvp *p = (kvp *)n->val; + if(!p->used){ + fprintf(stderr, "Unused field: '%s = %s'\n", p->key, p->val); + } + n = n->next; + } +} + +char *option_find(list *l, char *key) +{ + node *n = l->front; + while(n){ + kvp *p = (kvp *)n->val; + if(strcmp(p->key, key) == 0){ + p->used = 1; + return p->val; + } + n = n->next; + } + return 0; +} +char *option_find_str(list *l, char *key, char *def) +{ + char *v = option_find(l, key); + if(v) return v; + if(def) fprintf(stderr, "%s: Using default '%s'\n", key, def); + return def; +} + +int option_find_int(list *l, char *key, int def) +{ + char *v = option_find(l, key); + if(v) return atoi(v); + fprintf(stderr, "%s: Using default '%d'\n", key, def); + return def; +} + +int option_find_int_quiet(list *l, char *key, int def) +{ + char *v = option_find(l, key); + if(v) return atoi(v); + return def; +} + +float option_find_float_quiet(list *l, char *key, float def) +{ + char *v = option_find(l, key); + if(v) return atof(v); + return def; +} + +float option_find_float(list *l, char *key, float def) +{ + char *v = option_find(l, key); + if(v) return atof(v); + fprintf(stderr, "%s: Using default '%lf'\n", key, def); + return def; +} diff --git a/workloads/realworld/pinned/darknet/src/option_list.h b/workloads/realworld/pinned/darknet/src/option_list.h new file mode 100644 index 0000000000000000000000000000000000000000..844bd8724b77889d9ab6e6e70f62305e3339048c --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/option_list.h @@ -0,0 +1,19 @@ +#ifndef OPTION_LIST_H +#define OPTION_LIST_H +#include "list.h" + +typedef struct{ + char *key; + char *val; + int used; +} kvp; + + +int read_option(char *s, list *options); +void option_insert(list *l, char *key, char *val); +char *option_find(list *l, char *key); +float option_find_float(list *l, char *key, float def); +float option_find_float_quiet(list *l, char *key, float def); +void option_unused(list *l); + +#endif diff --git a/workloads/realworld/pinned/darknet/src/parser.c b/workloads/realworld/pinned/darknet/src/parser.c new file mode 100644 index 0000000000000000000000000000000000000000..c8141c9f2ddc95941900d11006ff583fadf22290 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/parser.c @@ -0,0 +1,1312 @@ +#include +#include +#include +#include + +#include "activation_layer.h" +#include "logistic_layer.h" +#include "l2norm_layer.h" +#include "activations.h" +#include "avgpool_layer.h" +#include "batchnorm_layer.h" +#include "blas.h" +#include "connected_layer.h" +#include "deconvolutional_layer.h" +#include "convolutional_layer.h" +#include "cost_layer.h" +#include "crnn_layer.h" +#include "crop_layer.h" +#include "detection_layer.h" +#include "dropout_layer.h" +#include "gru_layer.h" +#include "list.h" +#include "local_layer.h" +#include "maxpool_layer.h" +#include "normalization_layer.h" +#include "option_list.h" +#include "parser.h" +#include "region_layer.h" +#include "yolo_layer.h" +#include "iseg_layer.h" +#include "reorg_layer.h" +#include "rnn_layer.h" +#include "route_layer.h" +#include "upsample_layer.h" +#include "shortcut_layer.h" +#include "softmax_layer.h" +#include "lstm_layer.h" +#include "utils.h" + +typedef struct{ + char *type; + list *options; +}section; + +list *read_cfg(char *filename); + +LAYER_TYPE string_to_layer_type(char * type) +{ + + if (strcmp(type, "[shortcut]")==0) return SHORTCUT; + if (strcmp(type, "[crop]")==0) return CROP; + if (strcmp(type, "[cost]")==0) return COST; + if (strcmp(type, "[detection]")==0) return DETECTION; + if (strcmp(type, "[region]")==0) return REGION; + if (strcmp(type, "[yolo]")==0) return YOLO; + if (strcmp(type, "[iseg]")==0) return ISEG; + if (strcmp(type, "[local]")==0) return LOCAL; + if (strcmp(type, "[conv]")==0 + || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL; + if (strcmp(type, "[deconv]")==0 + || strcmp(type, "[deconvolutional]")==0) return DECONVOLUTIONAL; + if (strcmp(type, "[activation]")==0) return ACTIVE; + if (strcmp(type, "[logistic]")==0) return LOGXENT; + if (strcmp(type, "[l2norm]")==0) return L2NORM; + if (strcmp(type, "[net]")==0 + || strcmp(type, "[network]")==0) return NETWORK; + if (strcmp(type, "[crnn]")==0) return CRNN; + if (strcmp(type, "[gru]")==0) return GRU; + if (strcmp(type, "[lstm]") == 0) return LSTM; + if (strcmp(type, "[rnn]")==0) return RNN; + if (strcmp(type, "[conn]")==0 + || strcmp(type, "[connected]")==0) return CONNECTED; + if (strcmp(type, "[max]")==0 + || strcmp(type, "[maxpool]")==0) return MAXPOOL; + if (strcmp(type, "[reorg]")==0) return REORG; + if (strcmp(type, "[avg]")==0 + || strcmp(type, "[avgpool]")==0) return AVGPOOL; + if (strcmp(type, "[dropout]")==0) return DROPOUT; + if (strcmp(type, "[lrn]")==0 + || strcmp(type, "[normalization]")==0) return NORMALIZATION; + if (strcmp(type, "[batchnorm]")==0) return BATCHNORM; + if (strcmp(type, "[soft]")==0 + || strcmp(type, "[softmax]")==0) return SOFTMAX; + if (strcmp(type, "[route]")==0) return ROUTE; + if (strcmp(type, "[upsample]")==0) return UPSAMPLE; + return BLANK; +} + +void free_section(section *s) +{ + free(s->type); + node *n = s->options->front; + while(n){ + kvp *pair = (kvp *)n->val; + free(pair->key); + free(pair); + node *next = n->next; + free(n); + n = next; + } + free(s->options); + free(s); +} + +void parse_data(char *data, float *a, int n) +{ + int i; + if(!data) return; + char *curr = data; + char *next = data; + int done = 0; + for(i = 0; i < n && !done; ++i){ + while(*++next !='\0' && *next != ','); + if(*next == '\0') done = 1; + *next = '\0'; + sscanf(curr, "%g", &a[i]); + curr = next+1; + } +} + +typedef struct size_params{ + int batch; + int inputs; + int h; + int w; + int c; + int index; + int time_steps; + network *net; +} size_params; + +local_layer parse_local(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + int pad = option_find_int(options, "pad",0); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before local layer must output image."); + + local_layer layer = make_local_layer(batch,h,w,c,n,size,stride,pad,activation); + + return layer; +} + +layer parse_deconvolutional(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before deconvolutional layer must output image."); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + int pad = option_find_int_quiet(options, "pad",0); + int padding = option_find_int_quiet(options, "padding",0); + if(pad) padding = size/2; + + layer l = make_deconvolutional_layer(batch,h,w,c,n,size,stride,padding, activation, batch_normalize, params.net->adam); + + return l; +} + + +convolutional_layer parse_convolutional(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + int pad = option_find_int_quiet(options, "pad",0); + int padding = option_find_int_quiet(options, "padding",0); + int groups = option_find_int_quiet(options, "groups", 1); + if(pad) padding = size/2; + + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before convolutional layer must output image."); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + int binary = option_find_int_quiet(options, "binary", 0); + int xnor = option_find_int_quiet(options, "xnor", 0); + + convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,groups,size,stride,padding,activation, batch_normalize, binary, xnor, params.net->adam); + layer.flipped = option_find_int_quiet(options, "flipped", 0); + layer.dot = option_find_float_quiet(options, "dot", 0); + + return layer; +} + +layer parse_crnn(list *options, size_params params) +{ + int output_filters = option_find_int(options, "output_filters",1); + int hidden_filters = option_find_int(options, "hidden_filters",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_crnn_layer(params.batch, params.w, params.h, params.c, hidden_filters, output_filters, params.time_steps, activation, batch_normalize); + + l.shortcut = option_find_int_quiet(options, "shortcut", 0); + + return l; +} + +layer parse_rnn(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_rnn_layer(params.batch, params.inputs, output, params.time_steps, activation, batch_normalize, params.net->adam); + + l.shortcut = option_find_int_quiet(options, "shortcut", 0); + + return l; +} + +layer parse_gru(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_gru_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net->adam); + l.tanh = option_find_int_quiet(options, "tanh", 0); + + return l; +} + +layer parse_lstm(list *options, size_params params) +{ + int output = option_find_int(options, "output", 1); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_lstm_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net->adam); + + return l; +} + +layer parse_connected(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_connected_layer(params.batch, params.inputs, output, activation, batch_normalize, params.net->adam); + return l; +} + +layer parse_softmax(list *options, size_params params) +{ + int groups = option_find_int_quiet(options, "groups",1); + layer l = make_softmax_layer(params.batch, params.inputs, groups); + l.temperature = option_find_float_quiet(options, "temperature", 1); + char *tree_file = option_find_str(options, "tree", 0); + if (tree_file) l.softmax_tree = read_tree(tree_file); + l.w = params.w; + l.h = params.h; + l.c = params.c; + l.spatial = option_find_float_quiet(options, "spatial", 0); + l.noloss = option_find_int_quiet(options, "noloss", 0); + return l; +} + +int *parse_yolo_mask(char *a, int *num) +{ + int *mask = 0; + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + mask = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + int val = atoi(a); + mask[i] = val; + a = strchr(a, ',')+1; + } + *num = n; + } + return mask; +} + +layer parse_yolo(list *options, size_params params) +{ + int classes = option_find_int(options, "classes", 20); + int total = option_find_int(options, "num", 1); + int num = total; + + char *a = option_find_str(options, "mask", 0); + int *mask = parse_yolo_mask(a, &num); + layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes); + assert(l.outputs == params.inputs); + + l.max_boxes = option_find_int_quiet(options, "max",90); + l.jitter = option_find_float(options, "jitter", .2); + + l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); + l.truth_thresh = option_find_float(options, "truth_thresh", 1); + l.random = option_find_int_quiet(options, "random", 0); + + char *map_file = option_find_str(options, "map", 0); + if (map_file) l.map = read_map(map_file); + + a = option_find_str(options, "anchors", 0); + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + for(i = 0; i < n; ++i){ + float bias = atof(a); + l.biases[i] = bias; + a = strchr(a, ',')+1; + } + } + return l; +} + +layer parse_iseg(list *options, size_params params) +{ + int classes = option_find_int(options, "classes", 20); + int ids = option_find_int(options, "ids", 32); + layer l = make_iseg_layer(params.batch, params.w, params.h, classes, ids); + assert(l.outputs == params.inputs); + return l; +} + +layer parse_region(list *options, size_params params) +{ + int coords = option_find_int(options, "coords", 4); + int classes = option_find_int(options, "classes", 20); + int num = option_find_int(options, "num", 1); + + layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords); + assert(l.outputs == params.inputs); + + l.log = option_find_int_quiet(options, "log", 0); + l.sqrt = option_find_int_quiet(options, "sqrt", 0); + + l.softmax = option_find_int(options, "softmax", 0); + l.background = option_find_int_quiet(options, "background", 0); + l.max_boxes = option_find_int_quiet(options, "max",30); + l.jitter = option_find_float(options, "jitter", .2); + l.rescore = option_find_int_quiet(options, "rescore",0); + + l.thresh = option_find_float(options, "thresh", .5); + l.classfix = option_find_int_quiet(options, "classfix", 0); + l.absolute = option_find_int_quiet(options, "absolute", 0); + l.random = option_find_int_quiet(options, "random", 0); + + l.coord_scale = option_find_float(options, "coord_scale", 1); + l.object_scale = option_find_float(options, "object_scale", 1); + l.noobject_scale = option_find_float(options, "noobject_scale", 1); + l.mask_scale = option_find_float(options, "mask_scale", 1); + l.class_scale = option_find_float(options, "class_scale", 1); + l.bias_match = option_find_int_quiet(options, "bias_match",0); + + char *tree_file = option_find_str(options, "tree", 0); + if (tree_file) l.softmax_tree = read_tree(tree_file); + char *map_file = option_find_str(options, "map", 0); + if (map_file) l.map = read_map(map_file); + + char *a = option_find_str(options, "anchors", 0); + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + for(i = 0; i < n; ++i){ + float bias = atof(a); + l.biases[i] = bias; + a = strchr(a, ',')+1; + } + } + return l; +} + +detection_layer parse_detection(list *options, size_params params) +{ + int coords = option_find_int(options, "coords", 1); + int classes = option_find_int(options, "classes", 1); + int rescore = option_find_int(options, "rescore", 0); + int num = option_find_int(options, "num", 1); + int side = option_find_int(options, "side", 7); + detection_layer layer = make_detection_layer(params.batch, params.inputs, num, side, classes, coords, rescore); + + layer.softmax = option_find_int(options, "softmax", 0); + layer.sqrt = option_find_int(options, "sqrt", 0); + + layer.max_boxes = option_find_int_quiet(options, "max",90); + layer.coord_scale = option_find_float(options, "coord_scale", 1); + layer.forced = option_find_int(options, "forced", 0); + layer.object_scale = option_find_float(options, "object_scale", 1); + layer.noobject_scale = option_find_float(options, "noobject_scale", 1); + layer.class_scale = option_find_float(options, "class_scale", 1); + layer.jitter = option_find_float(options, "jitter", .2); + layer.random = option_find_int_quiet(options, "random", 0); + layer.reorg = option_find_int_quiet(options, "reorg", 0); + return layer; +} + +cost_layer parse_cost(list *options, size_params params) +{ + char *type_s = option_find_str(options, "type", "sse"); + COST_TYPE type = get_cost_type(type_s); + float scale = option_find_float_quiet(options, "scale",1); + cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale); + layer.ratio = option_find_float_quiet(options, "ratio",0); + layer.noobject_scale = option_find_float_quiet(options, "noobj", 1); + layer.thresh = option_find_float_quiet(options, "thresh",0); + return layer; +} + +crop_layer parse_crop(list *options, size_params params) +{ + int crop_height = option_find_int(options, "crop_height",1); + int crop_width = option_find_int(options, "crop_width",1); + int flip = option_find_int(options, "flip",0); + float angle = option_find_float(options, "angle",0); + float saturation = option_find_float(options, "saturation",1); + float exposure = option_find_float(options, "exposure",1); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before crop layer must output image."); + + int noadjust = option_find_int_quiet(options, "noadjust",0); + + crop_layer l = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip, angle, saturation, exposure); + l.shift = option_find_float(options, "shift", 0); + l.noadjust = noadjust; + return l; +} + +layer parse_reorg(list *options, size_params params) +{ + int stride = option_find_int(options, "stride",1); + int reverse = option_find_int_quiet(options, "reverse",0); + int flatten = option_find_int_quiet(options, "flatten",0); + int extra = option_find_int_quiet(options, "extra",0); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before reorg layer must output image."); + + layer layer = make_reorg_layer(batch,w,h,c,stride,reverse, flatten, extra); + return layer; +} + +maxpool_layer parse_maxpool(list *options, size_params params) +{ + int stride = option_find_int(options, "stride",1); + int size = option_find_int(options, "size",stride); + int padding = option_find_int_quiet(options, "padding", size-1); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before maxpool layer must output image."); + + maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride,padding); + return layer; +} + +avgpool_layer parse_avgpool(list *options, size_params params) +{ + int batch,w,h,c; + w = params.w; + h = params.h; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before avgpool layer must output image."); + + avgpool_layer layer = make_avgpool_layer(batch,w,h,c); + return layer; +} + +dropout_layer parse_dropout(list *options, size_params params) +{ + float probability = option_find_float(options, "probability", .5); + dropout_layer layer = make_dropout_layer(params.batch, params.inputs, probability); + layer.out_w = params.w; + layer.out_h = params.h; + layer.out_c = params.c; + return layer; +} + +layer parse_normalization(list *options, size_params params) +{ + float alpha = option_find_float(options, "alpha", .0001); + float beta = option_find_float(options, "beta" , .75); + float kappa = option_find_float(options, "kappa", 1); + int size = option_find_int(options, "size", 5); + layer l = make_normalization_layer(params.batch, params.w, params.h, params.c, size, alpha, beta, kappa); + return l; +} + +layer parse_batchnorm(list *options, size_params params) +{ + layer l = make_batchnorm_layer(params.batch, params.w, params.h, params.c); + return l; +} + +layer parse_shortcut(list *options, size_params params, network *net) +{ + char *l = option_find(options, "from"); + int index = atoi(l); + if(index < 0) index = params.index + index; + + int batch = params.batch; + layer from = net->layers[index]; + + layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c); + + char *activation_s = option_find_str(options, "activation", "linear"); + ACTIVATION activation = get_activation(activation_s); + s.activation = activation; + s.alpha = option_find_float_quiet(options, "alpha", 1); + s.beta = option_find_float_quiet(options, "beta", 1); + return s; +} + + +layer parse_l2norm(list *options, size_params params) +{ + layer l = make_l2norm_layer(params.batch, params.inputs); + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + return l; +} + + +layer parse_logistic(list *options, size_params params) +{ + layer l = make_logistic_layer(params.batch, params.inputs); + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + return l; +} + +layer parse_activation(list *options, size_params params) +{ + char *activation_s = option_find_str(options, "activation", "linear"); + ACTIVATION activation = get_activation(activation_s); + + layer l = make_activation_layer(params.batch, params.inputs, activation); + + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + + return l; +} + +layer parse_upsample(list *options, size_params params, network *net) +{ + + int stride = option_find_int(options, "stride",2); + layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride); + l.scale = option_find_float_quiet(options, "scale", 1); + return l; +} + +route_layer parse_route(list *options, size_params params, network *net) +{ + char *l = option_find(options, "layers"); + int len = strlen(l); + if(!l) error("Route Layer must specify input layers"); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (l[i] == ',') ++n; + } + + int *layers = calloc(n, sizeof(int)); + int *sizes = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + int index = atoi(l); + l = strchr(l, ',')+1; + if(index < 0) index = params.index + index; + layers[i] = index; + sizes[i] = net->layers[index].outputs; + } + int batch = params.batch; + + route_layer layer = make_route_layer(batch, n, layers, sizes); + + convolutional_layer first = net->layers[layers[0]]; + layer.out_w = first.out_w; + layer.out_h = first.out_h; + layer.out_c = first.out_c; + for(i = 1; i < n; ++i){ + int index = layers[i]; + convolutional_layer next = net->layers[index]; + if(next.out_w == first.out_w && next.out_h == first.out_h){ + layer.out_c += next.out_c; + }else{ + layer.out_h = layer.out_w = layer.out_c = 0; + } + } + + return layer; +} + +learning_rate_policy get_policy(char *s) +{ + if (strcmp(s, "random")==0) return RANDOM; + if (strcmp(s, "poly")==0) return POLY; + if (strcmp(s, "constant")==0) return CONSTANT; + if (strcmp(s, "step")==0) return STEP; + if (strcmp(s, "exp")==0) return EXP; + if (strcmp(s, "sigmoid")==0) return SIG; + if (strcmp(s, "steps")==0) return STEPS; + fprintf(stderr, "Couldn't find policy %s, going with constant\n", s); + return CONSTANT; +} + +void parse_net_options(list *options, network *net) +{ + net->batch = option_find_int(options, "batch",1); + net->learning_rate = option_find_float(options, "learning_rate", .001); + net->momentum = option_find_float(options, "momentum", .9); + net->decay = option_find_float(options, "decay", .0001); + int subdivs = option_find_int(options, "subdivisions",1); + net->time_steps = option_find_int_quiet(options, "time_steps",1); + net->notruth = option_find_int_quiet(options, "notruth",0); + net->batch /= subdivs; + net->batch *= net->time_steps; + net->subdivisions = subdivs; + net->random = option_find_int_quiet(options, "random", 0); + + net->adam = option_find_int_quiet(options, "adam", 0); + if(net->adam){ + net->B1 = option_find_float(options, "B1", .9); + net->B2 = option_find_float(options, "B2", .999); + net->eps = option_find_float(options, "eps", .0000001); + } + + net->h = option_find_int_quiet(options, "height",0); + net->w = option_find_int_quiet(options, "width",0); + net->c = option_find_int_quiet(options, "channels",0); + net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c); + net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2); + net->min_crop = option_find_int_quiet(options, "min_crop",net->w); + net->max_ratio = option_find_float_quiet(options, "max_ratio", (float) net->max_crop / net->w); + net->min_ratio = option_find_float_quiet(options, "min_ratio", (float) net->min_crop / net->w); + net->center = option_find_int_quiet(options, "center",0); + net->clip = option_find_float_quiet(options, "clip", 0); + + net->angle = option_find_float_quiet(options, "angle", 0); + net->aspect = option_find_float_quiet(options, "aspect", 1); + net->saturation = option_find_float_quiet(options, "saturation", 1); + net->exposure = option_find_float_quiet(options, "exposure", 1); + net->hue = option_find_float_quiet(options, "hue", 0); + + if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied"); + + char *policy_s = option_find_str(options, "policy", "constant"); + net->policy = get_policy(policy_s); + net->burn_in = option_find_int_quiet(options, "burn_in", 0); + net->power = option_find_float_quiet(options, "power", 4); + if(net->policy == STEP){ + net->step = option_find_int(options, "step", 1); + net->scale = option_find_float(options, "scale", 1); + } else if (net->policy == STEPS){ + char *l = option_find(options, "steps"); + char *p = option_find(options, "scales"); + if(!l || !p) error("STEPS policy must have steps and scales in cfg file"); + + int len = strlen(l); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (l[i] == ',') ++n; + } + int *steps = calloc(n, sizeof(int)); + float *scales = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + int step = atoi(l); + float scale = atof(p); + l = strchr(l, ',')+1; + p = strchr(p, ',')+1; + steps[i] = step; + scales[i] = scale; + } + net->scales = scales; + net->steps = steps; + net->num_steps = n; + } else if (net->policy == EXP){ + net->gamma = option_find_float(options, "gamma", 1); + } else if (net->policy == SIG){ + net->gamma = option_find_float(options, "gamma", 1); + net->step = option_find_int(options, "step", 1); + } else if (net->policy == POLY || net->policy == RANDOM){ + } + net->max_batches = option_find_int(options, "max_batches", 0); +} + +int is_network(section *s) +{ + return (strcmp(s->type, "[net]")==0 + || strcmp(s->type, "[network]")==0); +} + +network *parse_network_cfg(char *filename) +{ + list *sections = read_cfg(filename); + node *n = sections->front; + if(!n) error("Config file has no sections"); + network *net = make_network(sections->size - 1); + net->gpu_index = gpu_index; + size_params params; + + section *s = (section *)n->val; + list *options = s->options; + if(!is_network(s)) error("First section must be [net] or [network]"); + parse_net_options(options, net); + + params.h = net->h; + params.w = net->w; + params.c = net->c; + params.inputs = net->inputs; + params.batch = net->batch; + params.time_steps = net->time_steps; + params.net = net; + + size_t workspace_size = 0; + n = n->next; + int count = 0; + free_section(s); + fprintf(stderr, "layer filters size input output\n"); + while(n){ + params.index = count; + fprintf(stderr, "%5d ", count); + s = (section *)n->val; + options = s->options; + layer l = {0}; + LAYER_TYPE lt = string_to_layer_type(s->type); + if(lt == CONVOLUTIONAL){ + l = parse_convolutional(options, params); + }else if(lt == DECONVOLUTIONAL){ + l = parse_deconvolutional(options, params); + }else if(lt == LOCAL){ + l = parse_local(options, params); + }else if(lt == ACTIVE){ + l = parse_activation(options, params); + }else if(lt == LOGXENT){ + l = parse_logistic(options, params); + }else if(lt == L2NORM){ + l = parse_l2norm(options, params); + }else if(lt == RNN){ + l = parse_rnn(options, params); + }else if(lt == GRU){ + l = parse_gru(options, params); + }else if (lt == LSTM) { + l = parse_lstm(options, params); + }else if(lt == CRNN){ + l = parse_crnn(options, params); + }else if(lt == CONNECTED){ + l = parse_connected(options, params); + }else if(lt == CROP){ + l = parse_crop(options, params); + }else if(lt == COST){ + l = parse_cost(options, params); + }else if(lt == REGION){ + l = parse_region(options, params); + }else if(lt == YOLO){ + l = parse_yolo(options, params); + }else if(lt == ISEG){ + l = parse_iseg(options, params); + }else if(lt == DETECTION){ + l = parse_detection(options, params); + }else if(lt == SOFTMAX){ + l = parse_softmax(options, params); + net->hierarchy = l.softmax_tree; + }else if(lt == NORMALIZATION){ + l = parse_normalization(options, params); + }else if(lt == BATCHNORM){ + l = parse_batchnorm(options, params); + }else if(lt == MAXPOOL){ + l = parse_maxpool(options, params); + }else if(lt == REORG){ + l = parse_reorg(options, params); + }else if(lt == AVGPOOL){ + l = parse_avgpool(options, params); + }else if(lt == ROUTE){ + l = parse_route(options, params, net); + }else if(lt == UPSAMPLE){ + l = parse_upsample(options, params, net); + }else if(lt == SHORTCUT){ + l = parse_shortcut(options, params, net); + }else if(lt == DROPOUT){ + l = parse_dropout(options, params); + l.output = net->layers[count-1].output; + l.delta = net->layers[count-1].delta; +#ifdef GPU + l.output_gpu = net->layers[count-1].output_gpu; + l.delta_gpu = net->layers[count-1].delta_gpu; +#endif + }else{ + fprintf(stderr, "Type not recognized: %s\n", s->type); + } + l.clip = net->clip; + l.truth = option_find_int_quiet(options, "truth", 0); + l.onlyforward = option_find_int_quiet(options, "onlyforward", 0); + l.stopbackward = option_find_int_quiet(options, "stopbackward", 0); + l.dontsave = option_find_int_quiet(options, "dontsave", 0); + l.dontload = option_find_int_quiet(options, "dontload", 0); + l.numload = option_find_int_quiet(options, "numload", 0); + l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0); + l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1); + l.smooth = option_find_float_quiet(options, "smooth", 0); + option_unused(options); + net->layers[count] = l; + if (l.workspace_size > workspace_size) workspace_size = l.workspace_size; + free_section(s); + n = n->next; + ++count; + if(n){ + params.h = l.out_h; + params.w = l.out_w; + params.c = l.out_c; + params.inputs = l.outputs; + } + } + free_list(sections); + layer out = get_network_output_layer(net); + net->outputs = out.outputs; + net->truths = out.outputs; + if(net->layers[net->n-1].truths) net->truths = net->layers[net->n-1].truths; + net->output = out.output; + net->input = calloc(net->inputs*net->batch, sizeof(float)); + net->truth = calloc(net->truths*net->batch, sizeof(float)); +#ifdef GPU + net->output_gpu = out.output_gpu; + net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch); + net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch); +#endif + if(workspace_size){ + //printf("%ld\n", workspace_size); +#ifdef GPU + if(gpu_index >= 0){ + net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1); + }else { + net->workspace = calloc(1, workspace_size); + } +#else + net->workspace = calloc(1, workspace_size); +#endif + } + return net; +} + +list *read_cfg(char *filename) +{ + FILE *file = fopen(filename, "r"); + if(file == 0) file_error(filename); + char *line; + int nu = 0; + list *options = make_list(); + section *current = 0; + while((line=fgetl(file)) != 0){ + ++ nu; + strip(line); + switch(line[0]){ + case '[': + current = malloc(sizeof(section)); + list_insert(options, current); + current->options = make_list(); + current->type = line; + break; + case '\0': + case '#': + case ';': + free(line); + break; + default: + if(!read_option(line, current->options)){ + fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); + free(line); + } + break; + } + } + fclose(file); + return options; +} + +void save_convolutional_weights_binary(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_convolutional_layer(l); + } +#endif + binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.binary_weights); + int size = l.c*l.size*l.size; + int i, j, k; + fwrite(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.n, fp); + fwrite(l.rolling_mean, sizeof(float), l.n, fp); + fwrite(l.rolling_variance, sizeof(float), l.n, fp); + } + for(i = 0; i < l.n; ++i){ + float mean = l.binary_weights[i*size]; + if(mean < 0) mean = -mean; + fwrite(&mean, sizeof(float), 1, fp); + for(j = 0; j < size/8; ++j){ + int index = i*size + j*8; + unsigned char c = 0; + for(k = 0; k < 8; ++k){ + if (j*8 + k >= size) break; + if (l.binary_weights[index + k] > 0) c = (c | 1<= 0){ + pull_convolutional_layer(l); + } +#endif + int num = l.nweights; + fwrite(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.n, fp); + fwrite(l.rolling_mean, sizeof(float), l.n, fp); + fwrite(l.rolling_variance, sizeof(float), l.n, fp); + } + fwrite(l.weights, sizeof(float), num, fp); +} + +void save_batchnorm_weights(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_batchnorm_layer(l); + } +#endif + fwrite(l.scales, sizeof(float), l.c, fp); + fwrite(l.rolling_mean, sizeof(float), l.c, fp); + fwrite(l.rolling_variance, sizeof(float), l.c, fp); +} + +void save_connected_weights(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_connected_layer(l); + } +#endif + fwrite(l.biases, sizeof(float), l.outputs, fp); + fwrite(l.weights, sizeof(float), l.outputs*l.inputs, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.outputs, fp); + fwrite(l.rolling_mean, sizeof(float), l.outputs, fp); + fwrite(l.rolling_variance, sizeof(float), l.outputs, fp); + } +} + +void save_weights_upto(network *net, char *filename, int cutoff) +{ +#ifdef GPU + if(net->gpu_index >= 0){ + cuda_set_device(net->gpu_index); + } +#endif + fprintf(stderr, "Saving weights to %s\n", filename); + FILE *fp = fopen(filename, "wb"); + if(!fp) file_error(filename); + + int major = 0; + int minor = 2; + int revision = 0; + fwrite(&major, sizeof(int), 1, fp); + fwrite(&minor, sizeof(int), 1, fp); + fwrite(&revision, sizeof(int), 1, fp); + fwrite(net->seen, sizeof(size_t), 1, fp); + + int i; + for(i = 0; i < net->n && i < cutoff; ++i){ + layer l = net->layers[i]; + if (l.dontsave) continue; + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + save_convolutional_weights(l, fp); + } if(l.type == CONNECTED){ + save_connected_weights(l, fp); + } if(l.type == BATCHNORM){ + save_batchnorm_weights(l, fp); + } if(l.type == RNN){ + save_connected_weights(*(l.input_layer), fp); + save_connected_weights(*(l.self_layer), fp); + save_connected_weights(*(l.output_layer), fp); + } if (l.type == LSTM) { + save_connected_weights(*(l.wi), fp); + save_connected_weights(*(l.wf), fp); + save_connected_weights(*(l.wo), fp); + save_connected_weights(*(l.wg), fp); + save_connected_weights(*(l.ui), fp); + save_connected_weights(*(l.uf), fp); + save_connected_weights(*(l.uo), fp); + save_connected_weights(*(l.ug), fp); + } if (l.type == GRU) { + if(1){ + save_connected_weights(*(l.wz), fp); + save_connected_weights(*(l.wr), fp); + save_connected_weights(*(l.wh), fp); + save_connected_weights(*(l.uz), fp); + save_connected_weights(*(l.ur), fp); + save_connected_weights(*(l.uh), fp); + }else{ + save_connected_weights(*(l.reset_layer), fp); + save_connected_weights(*(l.update_layer), fp); + save_connected_weights(*(l.state_layer), fp); + } + } if(l.type == CRNN){ + save_convolutional_weights(*(l.input_layer), fp); + save_convolutional_weights(*(l.self_layer), fp); + save_convolutional_weights(*(l.output_layer), fp); + } if(l.type == LOCAL){ +#ifdef GPU + if(gpu_index >= 0){ + pull_local_layer(l); + } +#endif + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + fwrite(l.biases, sizeof(float), l.outputs, fp); + fwrite(l.weights, sizeof(float), size, fp); + } + } + fclose(fp); +} +void save_weights(network *net, char *filename) +{ + save_weights_upto(net, filename, net->n); +} + +void transpose_matrix(float *a, int rows, int cols) +{ + float *transpose = calloc(rows*cols, sizeof(float)); + int x, y; + for(x = 0; x < rows; ++x){ + for(y = 0; y < cols; ++y){ + transpose[y*rows + x] = a[x*cols + y]; + } + } + memcpy(a, transpose, rows*cols*sizeof(float)); + free(transpose); +} + +void load_connected_weights(layer l, FILE *fp, int transpose) +{ + fread(l.biases, sizeof(float), l.outputs, fp); + fread(l.weights, sizeof(float), l.outputs*l.inputs, fp); + if(transpose){ + transpose_matrix(l.weights, l.inputs, l.outputs); + } + //printf("Biases: %f mean %f variance\n", mean_array(l.biases, l.outputs), variance_array(l.biases, l.outputs)); + //printf("Weights: %f mean %f variance\n", mean_array(l.weights, l.outputs*l.inputs), variance_array(l.weights, l.outputs*l.inputs)); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.outputs, fp); + fread(l.rolling_mean, sizeof(float), l.outputs, fp); + fread(l.rolling_variance, sizeof(float), l.outputs, fp); + //printf("Scales: %f mean %f variance\n", mean_array(l.scales, l.outputs), variance_array(l.scales, l.outputs)); + //printf("rolling_mean: %f mean %f variance\n", mean_array(l.rolling_mean, l.outputs), variance_array(l.rolling_mean, l.outputs)); + //printf("rolling_variance: %f mean %f variance\n", mean_array(l.rolling_variance, l.outputs), variance_array(l.rolling_variance, l.outputs)); + } +#ifdef GPU + if(gpu_index >= 0){ + push_connected_layer(l); + } +#endif +} + +void load_batchnorm_weights(layer l, FILE *fp) +{ + fread(l.scales, sizeof(float), l.c, fp); + fread(l.rolling_mean, sizeof(float), l.c, fp); + fread(l.rolling_variance, sizeof(float), l.c, fp); +#ifdef GPU + if(gpu_index >= 0){ + push_batchnorm_layer(l); + } +#endif +} + +void load_convolutional_weights_binary(layer l, FILE *fp) +{ + fread(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.n, fp); + fread(l.rolling_mean, sizeof(float), l.n, fp); + fread(l.rolling_variance, sizeof(float), l.n, fp); + } + int size = l.c*l.size*l.size; + int i, j, k; + for(i = 0; i < l.n; ++i){ + float mean = 0; + fread(&mean, sizeof(float), 1, fp); + for(j = 0; j < size/8; ++j){ + int index = i*size + j*8; + unsigned char c = 0; + fread(&c, sizeof(char), 1, fp); + for(k = 0; k < 8; ++k){ + if (j*8 + k >= size) break; + l.weights[index + k] = (c & 1<= 0){ + push_convolutional_layer(l); + } +#endif +} + +void load_convolutional_weights(layer l, FILE *fp) +{ + if(l.binary){ + //load_convolutional_weights_binary(l, fp); + //return; + } + if(l.numload) l.n = l.numload; + int num = l.c/l.groups*l.n*l.size*l.size; + fread(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.n, fp); + fread(l.rolling_mean, sizeof(float), l.n, fp); + fread(l.rolling_variance, sizeof(float), l.n, fp); + if(0){ + int i; + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_mean[i]); + } + printf("\n"); + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_variance[i]); + } + printf("\n"); + } + if(0){ + fill_cpu(l.n, 0, l.rolling_mean, 1); + fill_cpu(l.n, 0, l.rolling_variance, 1); + } + if(0){ + int i; + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_mean[i]); + } + printf("\n"); + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_variance[i]); + } + printf("\n"); + } + } + fread(l.weights, sizeof(float), num, fp); + //if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1); + if (l.flipped) { + transpose_matrix(l.weights, l.c*l.size*l.size, l.n); + } + //if (l.binary) binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.weights); +#ifdef GPU + if(gpu_index >= 0){ + push_convolutional_layer(l); + } +#endif +} + + +void load_weights_upto(network *net, char *filename, int start, int cutoff) +{ +#ifdef GPU + if(net->gpu_index >= 0){ + cuda_set_device(net->gpu_index); + } +#endif + fprintf(stderr, "Loading weights from %s...", filename); + fflush(stdout); + FILE *fp = fopen(filename, "rb"); + if(!fp) file_error(filename); + + int major; + int minor; + int revision; + fread(&major, sizeof(int), 1, fp); + fread(&minor, sizeof(int), 1, fp); + fread(&revision, sizeof(int), 1, fp); + if ((major*10 + minor) >= 2 && major < 1000 && minor < 1000){ + fread(net->seen, sizeof(size_t), 1, fp); + } else { + int iseen = 0; + fread(&iseen, sizeof(int), 1, fp); + *net->seen = iseen; + } + int transpose = (major > 1000) || (minor > 1000); + + int i; + for(i = start; i < net->n && i < cutoff; ++i){ + layer l = net->layers[i]; + if (l.dontload) continue; + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + load_convolutional_weights(l, fp); + } + if(l.type == CONNECTED){ + load_connected_weights(l, fp, transpose); + } + if(l.type == BATCHNORM){ + load_batchnorm_weights(l, fp); + } + if(l.type == CRNN){ + load_convolutional_weights(*(l.input_layer), fp); + load_convolutional_weights(*(l.self_layer), fp); + load_convolutional_weights(*(l.output_layer), fp); + } + if(l.type == RNN){ + load_connected_weights(*(l.input_layer), fp, transpose); + load_connected_weights(*(l.self_layer), fp, transpose); + load_connected_weights(*(l.output_layer), fp, transpose); + } + if (l.type == LSTM) { + load_connected_weights(*(l.wi), fp, transpose); + load_connected_weights(*(l.wf), fp, transpose); + load_connected_weights(*(l.wo), fp, transpose); + load_connected_weights(*(l.wg), fp, transpose); + load_connected_weights(*(l.ui), fp, transpose); + load_connected_weights(*(l.uf), fp, transpose); + load_connected_weights(*(l.uo), fp, transpose); + load_connected_weights(*(l.ug), fp, transpose); + } + if (l.type == GRU) { + if(1){ + load_connected_weights(*(l.wz), fp, transpose); + load_connected_weights(*(l.wr), fp, transpose); + load_connected_weights(*(l.wh), fp, transpose); + load_connected_weights(*(l.uz), fp, transpose); + load_connected_weights(*(l.ur), fp, transpose); + load_connected_weights(*(l.uh), fp, transpose); + }else{ + load_connected_weights(*(l.reset_layer), fp, transpose); + load_connected_weights(*(l.update_layer), fp, transpose); + load_connected_weights(*(l.state_layer), fp, transpose); + } + } + if(l.type == LOCAL){ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + fread(l.biases, sizeof(float), l.outputs, fp); + fread(l.weights, sizeof(float), size, fp); +#ifdef GPU + if(gpu_index >= 0){ + push_local_layer(l); + } +#endif + } + } + fprintf(stderr, "Done!\n"); + fclose(fp); +} + +void load_weights(network *net, char *filename) +{ + load_weights_upto(net, filename, 0, net->n); +} + diff --git a/workloads/realworld/pinned/darknet/src/parser.h b/workloads/realworld/pinned/darknet/src/parser.h new file mode 100644 index 0000000000000000000000000000000000000000..81aef2c86f3e6cb362f8bde9695ce9d5699ca77f --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/parser.h @@ -0,0 +1,9 @@ +#ifndef PARSER_H +#define PARSER_H +#include "darknet.h" +#include "network.h" + +void save_network(network net, char *filename); +void save_weights_double(network net, char *filename); + +#endif diff --git a/workloads/realworld/pinned/darknet/src/region_layer.c b/workloads/realworld/pinned/darknet/src/region_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..9df1b8bc252239ca520fa3dabeff55e5eb5959b8 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/region_layer.c @@ -0,0 +1,507 @@ +#include "region_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_region_layer(int batch, int w, int h, int n, int classes, int coords) +{ + layer l = {0}; + l.type = REGION; + + l.n = n; + l.batch = batch; + l.h = h; + l.w = w; + l.c = n*(classes + coords + 1); + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.coords = coords; + l.cost = calloc(1, sizeof(float)); + l.biases = calloc(n*2, sizeof(float)); + l.bias_updates = calloc(n*2, sizeof(float)); + l.outputs = h*w*n*(classes + coords + 1); + l.inputs = l.outputs; + l.truths = 30*(l.coords + 1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + int i; + for(i = 0; i < n*2; ++i){ + l.biases[i] = .5; + } + + l.forward = forward_region_layer; + l.backward = backward_region_layer; +#ifdef GPU + l.forward_gpu = forward_region_layer_gpu; + l.backward_gpu = backward_region_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "detection\n"); + srand(0); + + return l; +} + +void resize_region_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->n*(l->classes + l->coords + 1); + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride) +{ + box b; + b.x = (i + x[index + 0*stride]) / w; + b.y = (j + x[index + 1*stride]) / h; + b.w = exp(x[index + 2*stride]) * biases[2*n] / w; + b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h; + return b; +} + +float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale, int stride) +{ + box pred = get_region_box(x, biases, n, index, i, j, w, h, stride); + float iou = box_iou(pred, truth); + + float tx = (truth.x*w - i); + float ty = (truth.y*h - j); + float tw = log(truth.w*w / biases[2*n]); + float th = log(truth.h*h / biases[2*n + 1]); + + delta[index + 0*stride] = scale * (tx - x[index + 0*stride]); + delta[index + 1*stride] = scale * (ty - x[index + 1*stride]); + delta[index + 2*stride] = scale * (tw - x[index + 2*stride]); + delta[index + 3*stride] = scale * (th - x[index + 3*stride]); + return iou; +} + +void delta_region_mask(float *truth, float *x, int n, int index, float *delta, int stride, int scale) +{ + int i; + for(i = 0; i < n; ++i){ + delta[index + i*stride] = scale*(truth[i] - x[index + i*stride]); + } +} + + +void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, int stride, float *avg_cat, int tag) +{ + int i, n; + if(hier){ + float pred = 1; + while(class >= 0){ + pred *= output[index + stride*class]; + int g = hier->group[class]; + int offset = hier->group_offset[g]; + for(i = 0; i < hier->group_size[g]; ++i){ + delta[index + stride*(offset + i)] = scale * (0 - output[index + stride*(offset + i)]); + } + delta[index + stride*class] = scale * (1 - output[index + stride*class]); + + class = hier->parent[class]; + } + *avg_cat += pred; + } else { + if (delta[index] && tag){ + delta[index + stride*class] = scale * (1 - output[index + stride*class]); + return; + } + for(n = 0; n < classes; ++n){ + delta[index + stride*n] = scale * (((n == class)?1 : 0) - output[index + stride*n]); + if(n == class) *avg_cat += output[index + stride*n]; + } + } +} + +float logit(float x) +{ + return log(x/(1.-x)); +} + +float tisnan(float x) +{ + return (x != x); +} + +int entry_index(layer l, int batch, int location, int entry) +{ + int n = location / (l.w*l.h); + int loc = location % (l.w*l.h); + return batch*l.outputs + n*l.w*l.h*(l.coords+l.classes+1) + entry*l.w*l.h + loc; +} + +void forward_region_layer(const layer l, network net) +{ + int i,j,b,t,n; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) activate_array(l.output + index, l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords + 1); + if(!l.softmax && !l.softmax_tree) activate_array(l.output + index, l.classes*l.w*l.h, LOGISTIC); + } + } + if (l.softmax_tree){ + int i; + int count = l.coords + 1; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_cpu(net.input + count, group_size, l.batch, l.inputs, l.n*l.w*l.h, 1, l.n*l.w*l.h, l.temperature, l.output + count); + count += group_size; + } + } else if (l.softmax){ + int index = entry_index(l, 0, 0, l.coords + !l.background); + softmax_cpu(net.input + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output + index); + } +#endif + + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + if(!net.train) return; + float avg_iou = 0; + float recall = 0; + float avg_cat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + int class_count = 0; + *(l.cost) = 0; + for (b = 0; b < l.batch; ++b) { + if(l.softmax_tree){ + int onlyclass = 0; + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + if(!truth.x) break; + int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords]; + float maxp = 0; + int maxi = 0; + if(truth.x > 100000 && truth.y > 100000){ + for(n = 0; n < l.n*l.w*l.h; ++n){ + int class_index = entry_index(l, b, n, l.coords + 1); + int obj_index = entry_index(l, b, n, l.coords); + float scale = l.output[obj_index]; + l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]); + float p = scale*get_hierarchy_probability(l.output + class_index, l.softmax_tree, class, l.w*l.h); + if(p > maxp){ + maxp = p; + maxi = n; + } + } + int class_index = entry_index(l, b, maxi, l.coords + 1); + int obj_index = entry_index(l, b, maxi, l.coords); + delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax); + if(l.output[obj_index] < .3) l.delta[obj_index] = l.object_scale * (.3 - l.output[obj_index]); + else l.delta[obj_index] = 0; + l.delta[obj_index] = 0; + ++class_count; + onlyclass = 1; + break; + } + } + if(onlyclass) continue; + } + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h); + float best_iou = 0; + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + if(!truth.x) break; + float iou = box_iou(pred, truth); + if (iou > best_iou) { + best_iou = iou; + } + } + int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, l.coords); + avg_anyobj += l.output[obj_index]; + l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]); + if(l.background) l.delta[obj_index] = l.noobject_scale * (1 - l.output[obj_index]); + if (best_iou > l.thresh) { + l.delta[obj_index] = 0; + } + + if(*(net.seen) < 12800){ + box truth = {0}; + truth.x = (i + .5)/l.w; + truth.y = (j + .5)/l.h; + truth.w = l.biases[2*n]/l.w; + truth.h = l.biases[2*n+1]/l.h; + delta_region_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, l.delta, .01, l.w*l.h); + } + } + } + } + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + + if(!truth.x) break; + float best_iou = 0; + int best_n = 0; + i = (truth.x * l.w); + j = (truth.y * l.h); + box truth_shift = truth; + truth_shift.x = 0; + truth_shift.y = 0; + for(n = 0; n < l.n; ++n){ + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h); + if(l.bias_match){ + pred.w = l.biases[2*n]/l.w; + pred.h = l.biases[2*n+1]/l.h; + } + pred.x = 0; + pred.y = 0; + float iou = box_iou(pred, truth_shift); + if (iou > best_iou){ + best_iou = iou; + best_n = n; + } + } + + int box_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 0); + float iou = delta_region_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, l.delta, l.coord_scale * (2 - truth.w*truth.h), l.w*l.h); + if(l.coords > 4){ + int mask_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 4); + delta_region_mask(net.truth + t*(l.coords + 1) + b*l.truths + 5, l.output, l.coords - 4, mask_index, l.delta, l.w*l.h, l.mask_scale); + } + if(iou > .5) recall += 1; + avg_iou += iou; + + int obj_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords); + avg_obj += l.output[obj_index]; + l.delta[obj_index] = l.object_scale * (1 - l.output[obj_index]); + if (l.rescore) { + l.delta[obj_index] = l.object_scale * (iou - l.output[obj_index]); + } + if(l.background){ + l.delta[obj_index] = l.object_scale * (0 - l.output[obj_index]); + } + + int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords + 1); + delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax); + ++count; + ++class_count; + } + } + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count); +} + +void backward_region_layer(const layer l, network net) +{ + /* + int b; + int size = l.coords + l.classes + 1; + for (b = 0; b < l.batch*l.n; ++b){ + int index = (b*size + 4)*l.w*l.h; + gradient_array(l.output + index, l.w*l.h, LOGISTIC, l.delta + index); + } + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); + */ +} + +void correct_region_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +{ + int i; + int new_w=0; + int new_h=0; + if (((float)netw/w) < ((float)neth/h)) { + new_w = netw; + new_h = (h * netw)/w; + } else { + new_h = neth; + new_w = (w * neth)/h; + } + for (i = 0; i < n; ++i){ + box b = dets[i].bbox; + b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); + b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); + b.w *= (float)netw/new_w; + b.h *= (float)neth/new_h; + if(!relative){ + b.x *= w; + b.w *= w; + b.y *= h; + b.h *= h; + } + dets[i].bbox = b; + } +} + +void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets) +{ + int i,j,n,z; + float *predictions = l.output; + if (l.batch == 2) { + float *flip = l.output + l.outputs; + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w/2; ++i) { + for (n = 0; n < l.n; ++n) { + for(z = 0; z < l.classes + l.coords + 1; ++z){ + int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; + int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); + float swap = flip[i1]; + flip[i1] = flip[i2]; + flip[i2] = swap; + if(z == 0){ + flip[i1] = -flip[i1]; + flip[i2] = -flip[i2]; + } + } + } + } + } + for(i = 0; i < l.outputs; ++i){ + l.output[i] = (l.output[i] + flip[i])/2.; + } + } + for (i = 0; i < l.w*l.h; ++i){ + int row = i / l.w; + int col = i % l.w; + for(n = 0; n < l.n; ++n){ + int index = n*l.w*l.h + i; + for(j = 0; j < l.classes; ++j){ + dets[index].prob[j] = 0; + } + int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); + int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); + int mask_index = entry_index(l, 0, n*l.w*l.h + i, 4); + float scale = l.background ? 1 : predictions[obj_index]; + dets[index].bbox = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h); + dets[index].objectness = scale > thresh ? scale : 0; + if(dets[index].mask){ + for(j = 0; j < l.coords - 4; ++j){ + dets[index].mask[j] = l.output[mask_index + j*l.w*l.h]; + } + } + + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + !l.background); + if(l.softmax_tree){ + + hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0, l.w*l.h); + if(map){ + for(j = 0; j < 200; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + map[j]); + float prob = scale*predictions[class_index]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } else { + int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh, l.w*l.h); + dets[index].prob[j] = (scale > thresh) ? scale : 0; + } + } else { + if(dets[index].objectness){ + for(j = 0; j < l.classes; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + j); + float prob = scale*predictions[class_index]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } + } + } + } + correct_region_boxes(dets, l.w*l.h*l.n, w, h, netw, neth, relative); +} + +#ifdef GPU + +void forward_region_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + if(l.coords > 4){ + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC); + } + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) activate_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords + 1); + if(!l.softmax && !l.softmax_tree) activate_array_gpu(l.output_gpu + index, l.classes*l.w*l.h, LOGISTIC); + } + } + if (l.softmax_tree){ + int index = entry_index(l, 0, 0, l.coords + 1); + softmax_tree(net.input_gpu + index, l.w*l.h, l.batch*l.n, l.inputs/l.n, 1, l.output_gpu + index, *l.softmax_tree); + } else if (l.softmax) { + int index = entry_index(l, 0, 0, l.coords + !l.background); + softmax_gpu(net.input_gpu + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index); + } + if(!net.train || l.onlyforward){ + cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); + return; + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_region_layer(l, net); + //cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs); + if(!net.train) return; + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_region_layer_gpu(const layer l, network net) +{ + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + gradient_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC, l.delta_gpu + index); + if(l.coords > 4){ + index = entry_index(l, b, n*l.w*l.h, 4); + gradient_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC, l.delta_gpu + index); + } + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) gradient_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC, l.delta_gpu + index); + } + } + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + +void zero_objectness(layer l) +{ + int i, n; + for (i = 0; i < l.w*l.h; ++i){ + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); + l.output[obj_index] = 0; + } + } +} + diff --git a/workloads/realworld/pinned/darknet/src/region_layer.h b/workloads/realworld/pinned/darknet/src/region_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9f12fd187fd490d10cbc21af8251e0e2a870b7cb --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/region_layer.h @@ -0,0 +1,18 @@ +#ifndef REGION_LAYER_H +#define REGION_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_region_layer(int batch, int w, int h, int n, int classes, int coords); +void forward_region_layer(const layer l, network net); +void backward_region_layer(const layer l, network net); +void resize_region_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_region_layer_gpu(const layer l, network net); +void backward_region_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/pinned/darknet/src/reorg_layer.c b/workloads/realworld/pinned/darknet/src/reorg_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..b3634d53a5e01a8bbcf00e62a90f70f40108e1d7 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/reorg_layer.c @@ -0,0 +1,173 @@ +#include "reorg_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + + +layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra) +{ + layer l = {0}; + l.type = REORG; + l.batch = batch; + l.stride = stride; + l.extra = extra; + l.h = h; + l.w = w; + l.c = c; + l.flatten = flatten; + if(reverse){ + l.out_w = w*stride; + l.out_h = h*stride; + l.out_c = c/(stride*stride); + }else{ + l.out_w = w/stride; + l.out_h = h/stride; + l.out_c = c*(stride*stride); + } + l.reverse = reverse; + + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = h*w*c; + if(l.extra){ + l.out_w = l.out_h = l.out_c = 0; + l.outputs = l.inputs + l.extra; + } + + if(extra){ + fprintf(stderr, "reorg %4d -> %4d\n", l.inputs, l.outputs); + } else { + fprintf(stderr, "reorg /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + } + int output_size = l.outputs * batch; + l.output = calloc(output_size, sizeof(float)); + l.delta = calloc(output_size, sizeof(float)); + + l.forward = forward_reorg_layer; + l.backward = backward_reorg_layer; +#ifdef GPU + l.forward_gpu = forward_reorg_layer_gpu; + l.backward_gpu = backward_reorg_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); +#endif + return l; +} + +void resize_reorg_layer(layer *l, int w, int h) +{ + int stride = l->stride; + int c = l->c; + + l->h = h; + l->w = w; + + if(l->reverse){ + l->out_w = w*stride; + l->out_h = h*stride; + l->out_c = c/(stride*stride); + }else{ + l->out_w = w/stride; + l->out_h = h/stride; + l->out_c = c*(stride*stride); + } + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->outputs; + int output_size = l->outputs * l->batch; + + l->output = realloc(l->output, output_size * sizeof(float)); + l->delta = realloc(l->delta, output_size * sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, output_size); + l->delta_gpu = cuda_make_array(l->delta, output_size); +#endif +} + +void forward_reorg_layer(const layer l, network net) +{ + int i; + if(l.flatten){ + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + if(l.reverse){ + flatten(l.output, l.w*l.h, l.c, l.batch, 0); + }else{ + flatten(l.output, l.w*l.h, l.c, l.batch, 1); + } + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.inputs, net.input + i*l.inputs, 1, l.output + i*l.outputs, 1); + } + } else if (l.reverse){ + reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output); + } else { + reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output); + } +} + +void backward_reorg_layer(const layer l, network net) +{ + int i; + if(l.flatten){ + memcpy(net.delta, l.delta, l.outputs*l.batch*sizeof(float)); + if(l.reverse){ + flatten(net.delta, l.w*l.h, l.c, l.batch, 1); + }else{ + flatten(net.delta, l.w*l.h, l.c, l.batch, 0); + } + } else if(l.reverse){ + reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta); + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.inputs, l.delta + i*l.outputs, 1, net.delta + i*l.inputs, 1); + } + }else{ + reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta); + } +} + +#ifdef GPU +void forward_reorg_layer_gpu(layer l, network net) +{ + int i; + if(l.flatten){ + if(l.reverse){ + flatten_gpu(net.input_gpu, l.w*l.h, l.c, l.batch, 0, l.output_gpu); + }else{ + flatten_gpu(net.input_gpu, l.w*l.h, l.c, l.batch, 1, l.output_gpu); + } + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.inputs, net.input_gpu + i*l.inputs, 1, l.output_gpu + i*l.outputs, 1); + } + } else if (l.reverse) { + reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu); + }else { + reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu); + } +} + +void backward_reorg_layer_gpu(layer l, network net) +{ + if(l.flatten){ + if(l.reverse){ + flatten_gpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 1, net.delta_gpu); + }else{ + flatten_gpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 0, net.delta_gpu); + } + } else if (l.extra) { + int i; + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.inputs, l.delta_gpu + i*l.outputs, 1, net.delta_gpu + i*l.inputs, 1); + } + } else if(l.reverse){ + reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta_gpu); + } else { + reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta_gpu); + } +} +#endif diff --git a/workloads/realworld/pinned/darknet/src/reorg_layer.h b/workloads/realworld/pinned/darknet/src/reorg_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1d1445f17d2874835ee19d033b50e09761374de3 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/reorg_layer.h @@ -0,0 +1,20 @@ +#ifndef REORG_LAYER_H +#define REORG_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra); +void resize_reorg_layer(layer *l, int w, int h); +void forward_reorg_layer(const layer l, network net); +void backward_reorg_layer(const layer l, network net); + +#ifdef GPU +void forward_reorg_layer_gpu(layer l, network net); +void backward_reorg_layer_gpu(layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/pinned/darknet/src/rnn_layer.c b/workloads/realworld/pinned/darknet/src/rnn_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..c07e338caee5418657eb1127058419566d9ef787 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/rnn_layer.c @@ -0,0 +1,292 @@ +#include "rnn_layer.h" +#include "connected_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTIVATION activation, int batch_normalize, int adam) +{ + fprintf(stderr, "RNN Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = RNN; + l.steps = steps; + l.inputs = inputs; + + l.state = calloc(batch*outputs, sizeof(float)); + l.prev_state = calloc(batch*outputs, sizeof(float)); + + l.input_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.input_layer) = make_connected_layer(batch*steps, inputs, outputs, activation, batch_normalize, adam); + l.input_layer->batch = batch; + + l.self_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.self_layer) = make_connected_layer(batch*steps, outputs, outputs, activation, batch_normalize, adam); + l.self_layer->batch = batch; + + l.output_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.output_layer) = make_connected_layer(batch*steps, outputs, outputs, activation, batch_normalize, adam); + l.output_layer->batch = batch; + + l.outputs = outputs; + l.output = l.output_layer->output; + l.delta = l.output_layer->delta; + + l.forward = forward_rnn_layer; + l.backward = backward_rnn_layer; + l.update = update_rnn_layer; +#ifdef GPU + l.forward_gpu = forward_rnn_layer_gpu; + l.backward_gpu = backward_rnn_layer_gpu; + l.update_gpu = update_rnn_layer_gpu; + l.state_gpu = cuda_make_array(0, batch*outputs); + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.output_gpu = l.output_layer->output_gpu; + l.delta_gpu = l.output_layer->delta_gpu; +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.input_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.input_layer->out_c, l.input_layer->out_h, l.input_layer->out_w); + cudnnSetTensor4dDescriptor(l.self_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.self_layer->out_c, l.self_layer->out_h, l.self_layer->out_w); + cudnnSetTensor4dDescriptor(l.output_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.output_layer->out_c, l.output_layer->out_h, l.output_layer->out_w); +#endif +#endif + + return l; +} + +void update_rnn_layer(layer l, update_args a) +{ + update_connected_layer(*(l.input_layer), a); + update_connected_layer(*(l.self_layer), a); + update_connected_layer(*(l.output_layer), a); +} + +void forward_rnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, self_layer.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, input_layer.delta, 1); + if(net.train) fill_cpu(l.outputs * l.batch, 0, l.state, 1); + + for (i = 0; i < l.steps; ++i) { + s.input = net.input; + forward_connected_layer(input_layer, s); + + s.input = l.state; + forward_connected_layer(self_layer, s); + + float *old_state = l.state; + if(net.train) l.state += l.outputs*l.batch; + if(l.shortcut){ + copy_cpu(l.outputs * l.batch, old_state, 1, l.state, 1); + }else{ + fill_cpu(l.outputs * l.batch, 0, l.state, 1); + } + axpy_cpu(l.outputs * l.batch, 1, input_layer.output, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + forward_connected_layer(output_layer, s); + + net.input += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_rnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + increment_layer(&input_layer, l.steps-1); + increment_layer(&self_layer, l.steps-1); + increment_layer(&output_layer, l.steps-1); + + l.state += l.outputs*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_cpu(l.outputs * l.batch, input_layer.output, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + s.delta = self_layer.delta; + backward_connected_layer(output_layer, s); + + l.state -= l.outputs*l.batch; + /* + if(i > 0){ + copy_cpu(l.outputs * l.batch, input_layer.output - l.outputs*l.batch, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output - l.outputs*l.batch, 1, l.state, 1); + }else{ + fill_cpu(l.outputs * l.batch, 0, l.state, 1); + } + */ + + s.input = l.state; + s.delta = self_layer.delta - l.outputs*l.batch; + if (i == 0) s.delta = 0; + backward_connected_layer(self_layer, s); + + copy_cpu(l.outputs*l.batch, self_layer.delta, 1, input_layer.delta, 1); + if (i > 0 && l.shortcut) axpy_cpu(l.outputs*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.outputs*l.batch, 1); + s.input = net.input + i*l.inputs*l.batch; + if(net.delta) s.delta = net.delta + i*l.inputs*l.batch; + else s.delta = 0; + backward_connected_layer(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} + +#ifdef GPU + +void pull_rnn_layer(layer l) +{ + pull_connected_layer(*(l.input_layer)); + pull_connected_layer(*(l.self_layer)); + pull_connected_layer(*(l.output_layer)); +} + +void push_rnn_layer(layer l) +{ + push_connected_layer(*(l.input_layer)); + push_connected_layer(*(l.self_layer)); + push_connected_layer(*(l.output_layer)); +} + +void update_rnn_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.input_layer), a); + update_connected_layer_gpu(*(l.self_layer), a); + update_connected_layer_gpu(*(l.output_layer), a); +} + +void forward_rnn_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_gpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, self_layer.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, input_layer.delta_gpu, 1); + + if(net.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.prev_state_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = net.input_gpu; + forward_connected_layer_gpu(input_layer, s); + + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(self_layer, s); + + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(output_layer, s); + + net.input_gpu += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_rnn_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + increment_layer(&input_layer, l.steps - 1); + increment_layer(&self_layer, l.steps - 1); + increment_layer(&output_layer, l.steps - 1); + float *last_input = input_layer.output_gpu; + float *last_self = self_layer.output_gpu; + for (i = l.steps-1; i >= 0; --i) { + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu; + backward_connected_layer_gpu(output_layer, s); + + if(i != 0) { + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + }else { + copy_gpu(l.outputs*l.batch, l.prev_state_gpu, 1, l.state_gpu, 1); + } + + copy_gpu(l.outputs*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = (i > 0) ? self_layer.delta_gpu - l.outputs*l.batch : 0; + if (i == 0) s.delta_gpu = 0; + backward_connected_layer_gpu(self_layer, s); + + s.input_gpu = net.input_gpu + i*l.inputs*l.batch; + if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch; + else s.delta_gpu = 0; + backward_connected_layer_gpu(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, last_input, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, last_self, 1, l.state_gpu, 1); +} +#endif diff --git a/workloads/realworld/pinned/darknet/src/rnn_layer.h b/workloads/realworld/pinned/darknet/src/rnn_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..270a63ffafca9a9adb7b995ed674f93c70bdeb51 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/rnn_layer.h @@ -0,0 +1,25 @@ + +#ifndef RNN_LAYER_H +#define RNN_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" +#define USET + +layer make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTIVATION activation, int batch_normalize, int adam); + +void forward_rnn_layer(layer l, network net); +void backward_rnn_layer(layer l, network net); +void update_rnn_layer(layer l, update_args a); + +#ifdef GPU +void forward_rnn_layer_gpu(layer l, network net); +void backward_rnn_layer_gpu(layer l, network net); +void update_rnn_layer_gpu(layer l, update_args a); +void push_rnn_layer(layer l); +void pull_rnn_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/pinned/darknet/src/route_layer.c b/workloads/realworld/pinned/darknet/src/route_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..9133e5738b7c2d02eac061e51992063de84d3282 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/route_layer.c @@ -0,0 +1,136 @@ +#include "route_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + +route_layer make_route_layer(int batch, int n, int *input_layers, int *input_sizes) +{ + fprintf(stderr,"route "); + route_layer l = {0}; + l.type = ROUTE; + l.batch = batch; + l.n = n; + l.input_layers = input_layers; + l.input_sizes = input_sizes; + int i; + int outputs = 0; + for(i = 0; i < n; ++i){ + fprintf(stderr," %d", input_layers[i]); + outputs += input_sizes[i]; + } + fprintf(stderr, "\n"); + l.outputs = outputs; + l.inputs = outputs; + // l.delta = calloc(outputs*batch, sizeof(float)); + // l.output = calloc(outputs*batch, sizeof(float)); + cudaMallocHost(&l.output, l.outputs*batch*sizeof(float)); + cudaMallocHost(&l.delta, l.outputs*batch*sizeof(float)); + + l.forward = forward_route_layer; + l.backward = backward_route_layer; + #ifdef GPU + l.forward_gpu = forward_route_layer_gpu; + l.backward_gpu = backward_route_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, outputs*batch); + l.output_gpu = cuda_make_array(l.output, outputs*batch); + #endif + return l; +} + +void resize_route_layer(route_layer *l, network *net) +{ + int i; + layer first = net->layers[l->input_layers[0]]; + l->out_w = first.out_w; + l->out_h = first.out_h; + l->out_c = first.out_c; + l->outputs = first.outputs; + l->input_sizes[0] = first.outputs; + for(i = 1; i < l->n; ++i){ + int index = l->input_layers[i]; + layer next = net->layers[index]; + l->outputs += next.outputs; + l->input_sizes[i] = next.outputs; + if(next.out_w == first.out_w && next.out_h == first.out_h){ + l->out_c += next.out_c; + }else{ + printf("%d %d, %d %d\n", next.out_w, next.out_h, first.out_w, first.out_h); + l->out_h = l->out_w = l->out_c = 0; + } + } + l->inputs = l->outputs; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + +void forward_route_layer(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *input = net.layers[index].output; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1); + } + offset += input_size; + } +} + +void backward_route_layer(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *delta = net.layers[index].delta; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1); + } + offset += input_size; + } +} + +#ifdef GPU +void forward_route_layer_gpu(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *input = net.layers[index].output_gpu; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + copy_gpu(input_size, input + j*input_size, 1, l.output_gpu + offset + j*l.outputs, 1); + } + offset += input_size; + } +} + +void backward_route_layer_gpu(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *delta = net.layers[index].delta_gpu; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + axpy_gpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1); + } + offset += input_size; + } +} +#endif diff --git a/workloads/realworld/pinned/darknet/src/route_layer.h b/workloads/realworld/pinned/darknet/src/route_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1d40330ff30c9c93a2180a696d5f67f628ea481c --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/route_layer.h @@ -0,0 +1,18 @@ +#ifndef ROUTE_LAYER_H +#define ROUTE_LAYER_H +#include "network.h" +#include "layer.h" + +typedef layer route_layer; + +route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size); +void forward_route_layer(const route_layer l, network net); +void backward_route_layer(const route_layer l, network net); +void resize_route_layer(route_layer *l, network *net); + +#ifdef GPU +void forward_route_layer_gpu(const route_layer l, network net); +void backward_route_layer_gpu(const route_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/pinned/darknet/src/shortcut_layer.c b/workloads/realworld/pinned/darknet/src/shortcut_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..01e54a9fadb3a06fdcbd61f6112f24747ce93e60 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/shortcut_layer.c @@ -0,0 +1,92 @@ +#include "shortcut_layer.h" +#include "cuda_dark.h" +#include "blas.h" +#include "activations.h" + +#include +#include + +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2) +{ + fprintf(stderr, "res %3d %4d x%4d x%4d -> %4d x%4d x%4d\n",index, w2,h2,c2, w,h,c); + layer l = {0}; + l.type = SHORTCUT; + l.batch = batch; + l.w = w2; + l.h = h2; + l.c = c2; + l.out_w = w; + l.out_h = h; + l.out_c = c; + l.outputs = w*h*c; + l.inputs = l.outputs; + + l.index = index; + + // l.delta = calloc(l.outputs*batch, sizeof(float)); + // l.output = calloc(l.outputs*batch, sizeof(float)); + cudaMallocHost(&l.output, l.outputs*batch*sizeof(float)); + cudaMallocHost(&l.delta, l.outputs*batch*sizeof(float)); + + l.forward = forward_shortcut_layer; + l.backward = backward_shortcut_layer; + #ifdef GPU + l.forward_gpu = forward_shortcut_layer_gpu; + l.backward_gpu = backward_shortcut_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + #endif + return l; +} + +void resize_shortcut_layer(layer *l, int w, int h) +{ + assert(l->w == l->out_w); + assert(l->h == l->out_h); + l->w = l->out_w = w; + l->h = l->out_h = h; + l->outputs = w*h*l->out_c; + l->inputs = l->outputs; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + + +void forward_shortcut_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + shortcut_cpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output); + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_shortcut_layer(const layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + axpy_cpu(l.outputs*l.batch, l.alpha, l.delta, 1, net.delta, 1); + shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta); +} + +#ifdef GPU +void forward_shortcut_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + shortcut_gpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output_gpu); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_shortcut_layer_gpu(const layer l, network net) +{ + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + axpy_gpu(l.outputs*l.batch, l.alpha, l.delta_gpu, 1, net.delta_gpu, 1); + shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta_gpu); +} +#endif diff --git a/workloads/realworld/pinned/darknet/src/shortcut_layer.h b/workloads/realworld/pinned/darknet/src/shortcut_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..5f684fc1eadea2c6902be96bf4a4bf9a3b533da9 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/shortcut_layer.h @@ -0,0 +1,17 @@ +#ifndef SHORTCUT_LAYER_H +#define SHORTCUT_LAYER_H + +#include "layer.h" +#include "network.h" + +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2); +void forward_shortcut_layer(const layer l, network net); +void backward_shortcut_layer(const layer l, network net); +void resize_shortcut_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_shortcut_layer_gpu(const layer l, network net); +void backward_shortcut_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/pinned/darknet/src/softmax_layer.c b/workloads/realworld/pinned/darknet/src/softmax_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..fd5843caccf89cd80ea85c111cf2bd052ba76d0d --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/softmax_layer.c @@ -0,0 +1,110 @@ +#include "softmax_layer.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +softmax_layer make_softmax_layer(int batch, int inputs, int groups) +{ + assert(inputs%groups == 0); + fprintf(stderr, "softmax %4d\n", inputs); + softmax_layer l = {0}; + l.type = SOFTMAX; + l.batch = batch; + l.groups = groups; + l.inputs = inputs; + l.outputs = inputs; + // l.loss = calloc(inputs*batch, sizeof(float)); + // l.output = calloc(inputs*batch, sizeof(float)); + // l.delta = calloc(inputs*batch, sizeof(float)); + cudaMallocHost(&l.loss, inputs*batch*sizeof(float)); + cudaMallocHost(&l.output, inputs*batch*sizeof(float)); + cudaMallocHost(&l.delta, inputs*batch*sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_softmax_layer; + l.backward = backward_softmax_layer; + #ifdef GPU + l.forward_gpu = forward_softmax_layer_gpu; + l.backward_gpu = backward_softmax_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.loss_gpu = cuda_make_array(l.loss, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_softmax_layer(const softmax_layer l, network net) +{ + if(l.softmax_tree){ + int i; + int count = 0; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_cpu(net.input + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output + count); + count += group_size; + } + } else { + softmax_cpu(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output); + } + + if(net.truth && !l.noloss){ + softmax_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_softmax_layer(const softmax_layer l, network net) +{ + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_softmax_layer_output(const softmax_layer layer) +{ + cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch); +} + +void forward_softmax_layer_gpu(const softmax_layer l, network net) +{ + if(l.softmax_tree){ + softmax_tree(net.input_gpu, 1, l.batch, l.inputs, l.temperature, l.output_gpu, *l.softmax_tree); + /* + int i; + int count = 0; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_gpu(net.input_gpu + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output_gpu + count); + count += group_size; + } + */ + } else { + if(l.spatial){ + softmax_gpu(net.input_gpu, l.c, l.batch*l.c, l.inputs/l.c, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu); + }else{ + softmax_gpu(net.input_gpu, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu); + } + } + if(net.truth && !l.noloss){ + softmax_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu); + if(l.softmax_tree){ + mask_gpu(l.batch*l.inputs, l.delta_gpu, SECRET_NUM, net.truth_gpu, 0); + mask_gpu(l.batch*l.inputs, l.loss_gpu, SECRET_NUM, net.truth_gpu, 0); + } + cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_softmax_layer_gpu(const softmax_layer layer, network net) +{ + axpy_gpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/pinned/darknet/src/softmax_layer.h b/workloads/realworld/pinned/darknet/src/softmax_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..2e3ffe01a6c5d273a9f6139bc9f265cd7e2bc860 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/softmax_layer.h @@ -0,0 +1,19 @@ +#ifndef SOFTMAX_LAYER_H +#define SOFTMAX_LAYER_H +#include "layer.h" +#include "network.h" + +typedef layer softmax_layer; + +void softmax_array(float *input, int n, float temp, float *output); +softmax_layer make_softmax_layer(int batch, int inputs, int groups); +void forward_softmax_layer(const softmax_layer l, network net); +void backward_softmax_layer(const softmax_layer l, network net); + +#ifdef GPU +void pull_softmax_layer_output(const softmax_layer l); +void forward_softmax_layer_gpu(const softmax_layer l, network net); +void backward_softmax_layer_gpu(const softmax_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/pinned/darknet/src/stb_image.h b/workloads/realworld/pinned/darknet/src/stb_image.h new file mode 100644 index 0000000000000000000000000000000000000000..d9c21bc813f1f24de2a25ee3cc82bdce9413eaa5 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/stb_image.h @@ -0,0 +1,7462 @@ +/* stb_image - v2.19 - public domain image loader - http://nothings.org/stb + no warranty implied; use at your own risk + + Do this: + #define STB_IMAGE_IMPLEMENTATION + before you include this file in *one* C or C++ file to create the implementation. + + // i.e. it should look like this: + #include ... + #include ... + #include ... + #define STB_IMAGE_IMPLEMENTATION + #include "stb_image.h" + + You can #define STBI_ASSERT(x) before the #include to avoid using assert.h. + And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free + + + QUICK NOTES: + Primarily of interest to game developers and other people who can + avoid problematic images and only need the trivial interface + + JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) + PNG 1/2/4/8/16-bit-per-channel + + TGA (not sure what subset, if a subset) + BMP non-1bpp, non-RLE + PSD (composited view only, no extra channels, 8/16 bit-per-channel) + + GIF (*comp always reports as 4-channel) + HDR (radiance rgbE format) + PIC (Softimage PIC) + PNM (PPM and PGM binary only) + + Animated GIF still needs a proper API, but here's one way to do it: + http://gist.github.com/urraka/685d9a6340b26b830d49 + + - decode from memory or through FILE (define STBI_NO_STDIO to remove code) + - decode from arbitrary I/O callbacks + - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON) + + Full documentation under "DOCUMENTATION" below. + + +LICENSE + + See end of file for license information. + +RECENT REVISION HISTORY: + + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings + 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes + 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 + RGB-format JPEG; remove white matting in PSD; + allocate large structures on the stack; + correct channel count for PNG & BMP + 2.10 (2016-01-22) avoid warning introduced in 2.09 + 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED + + See end of file for full revision history. + + + ============================ Contributors ========================= + + Image formats Extensions, features + Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) + Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) + Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) + Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) + Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) + Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) + Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) + github:urraka (animated gif) Junggon Kim (PNM comments) + Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) + socks-the-fox (16-bit PNG) + Jeremy Sawicki (handle all ImageNet JPGs) + Optimizations & bugfixes Mikhail Morozov (1-bit BMP) + Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query) + Arseny Kapoulkine + John-Mark Allen + + Bug & warning fixes + Marc LeBlanc David Woo Guillaume George Martins Mozeiko + Christpher Lloyd Jerry Jansson Joseph Thomson Phil Jordan + Dave Moore Roy Eltham Hayaki Saito Nathan Reed + Won Chun Luke Graham Johan Duparc Nick Verigakis + the Horde3D community Thomas Ruf Ronny Chevalier github:rlyeh + Janez Zemva John Bartholomew Michal Cichon github:romigrou + Jonathan Blow Ken Hamada Tero Hanninen github:svdijk + Laurent Gomila Cort Stratton Sergio Gonzalez github:snagar + Aruelien Pocheville Thibault Reuille Cass Everitt github:Zelex + Ryamond Barbiero Paul Du Bois Engin Manap github:grim210 + Aldo Culquicondor Philipp Wiesemann Dale Weiler github:sammyhw + Oriol Ferrer Mesia Josh Tobin Matthew Gregan github:phprus + Julian Raschke Gregory Mullen Baldur Karlsson github:poppolopoppo + Christian Floisand Kevin Schmidt github:darealshinji + Blazej Dariusz Roszkowski github:Michaelangel007 +*/ + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H + +// DOCUMENTATION +// +// Limitations: +// - no 12-bit-per-channel JPEG +// - no JPEGs with arithmetic coding +// - GIF always returns *comp=4 +// +// Basic usage (see HDR discussion below for HDR usage): +// int x,y,n; +// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +// // ... process data if not NULL ... +// // ... x = width, y = height, n = # 8-bit components per pixel ... +// // ... replace '0' with '1'..'4' to force that many components per pixel +// // ... but 'n' will always be the number that it would have been if you said 0 +// stbi_image_free(data) +// +// Standard parameters: +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *channels_in_file -- outputs # of image components in image file +// int desired_channels -- if non-zero, # of image components requested in result +// +// The return value from an image loader is an 'unsigned char *' which points +// to the pixel data, or NULL on an allocation failure or if the image is +// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels, +// with each pixel consisting of N interleaved 8-bit components; the first +// pixel pointed to is top-left-most in the image. There is no padding between +// image scanlines or between pixels, regardless of format. The number of +// components N is 'desired_channels' if desired_channels is non-zero, or +// *channels_in_file otherwise. If desired_channels is non-zero, +// *channels_in_file has the number of components that _would_ have been +// output otherwise. E.g. if you set desired_channels to 4, you will always +// get RGBA output, but you can check *channels_in_file to see if it's trivially +// opaque because e.g. there were only 3 channels in the source image. +// +// An output image with N components has the following components interleaved +// in this order in each pixel: +// +// N=#comp components +// 1 grey +// 2 grey, alpha +// 3 red, green, blue +// 4 red, green, blue, alpha +// +// If image loading fails for any reason, the return value will be NULL, +// and *x, *y, *channels_in_file will be unchanged. The function +// stbi_failure_reason() can be queried for an extremely brief, end-user +// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS +// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// more user-friendly ones. +// +// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. +// +// =========================================================================== +// +// Philosophy +// +// stb libraries are designed with the following priorities: +// +// 1. easy to use +// 2. easy to maintain +// 3. good performance +// +// Sometimes I let "good performance" creep up in priority over "easy to maintain", +// and for best performance I may provide less-easy-to-use APIs that give higher +// performance, in addition to the easy to use ones. Nevertheless, it's important +// to keep in mind that from the standpoint of you, a client of this library, +// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all. +// +// Some secondary priorities arise directly from the first two, some of which +// make more explicit reasons why performance can't be emphasized. +// +// - Portable ("ease of use") +// - Small source code footprint ("easy to maintain") +// - No dependencies ("ease of use") +// +// =========================================================================== +// +// I/O callbacks +// +// I/O callbacks allow you to read from arbitrary sources, like packaged +// files or some other source. Data read from callbacks are processed +// through a small internal buffer (currently 128 bytes) to try to reduce +// overhead. +// +// The three functions you must define are "read" (reads some bytes of data), +// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end). +// +// =========================================================================== +// +// SIMD support +// +// The JPEG decoder will try to automatically use SIMD kernels on x86 when +// supported by the compiler. For ARM Neon support, you must explicitly +// request it. +// +// (The old do-it-yourself SIMD API is no longer supported in the current +// code.) +// +// On x86, SSE2 will automatically be used when available based on a run-time +// test; if not, the generic C versions are used as a fall-back. On ARM targets, +// the typical path is to have separate builds for NEON and non-NEON devices +// (at least this is true for iOS and Android). Therefore, the NEON support is +// toggled by a build flag: define STBI_NEON to get NEON loops. +// +// If for some reason you do not want to use any of SIMD code, or if +// you have issues compiling it, you can disable it entirely by +// defining STBI_NO_SIMD. +// +// =========================================================================== +// +// HDR image support (disable by defining STBI_NO_HDR) +// +// stb_image now supports loading HDR images in general, and currently +// the Radiance .HDR file format, although the support is provided +// generically. You can still load any file through the existing interface; +// if you attempt to load an HDR file, it will be automatically remapped to +// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// both of these constants can be reconfigured through this interface: +// +// stbi_hdr_to_ldr_gamma(2.2f); +// stbi_hdr_to_ldr_scale(1.0f); +// +// (note, do not use _inverse_ constants; stbi_image will invert them +// appropriately). +// +// Additionally, there is a new, parallel interface for loading files as +// (linear) floats to preserve the full dynamic range: +// +// float *data = stbi_loadf(filename, &x, &y, &n, 0); +// +// If you load LDR images through this interface, those images will +// be promoted to floating point values, run through the inverse of +// constants corresponding to the above: +// +// stbi_ldr_to_hdr_scale(1.0f); +// stbi_ldr_to_hdr_gamma(2.2f); +// +// Finally, given a filename (or an open file or memory block--see header +// file for details) containing image data, you can query for the "most +// appropriate" interface to use (that is, whether the image is HDR or +// not), using: +// +// stbi_is_hdr(char *filename); +// +// =========================================================================== +// +// iPhone PNG support: +// +// By default we convert iphone-formatted PNGs back to RGB, even though +// they are internally encoded differently. You can disable this conversion +// by by calling stbi_convert_iphone_png_to_rgb(0), in which case +// you will always just get the native iphone "format" through (which +// is BGR stored in RGB). +// +// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per +// pixel to remove any premultiplied alpha *only* if the image file explicitly +// says there's premultiplied data (currently only happens in iPhone images, +// and only if iPhone convert-to-rgb processing is on). +// +// =========================================================================== +// +// ADDITIONAL CONFIGURATION +// +// - You can suppress implementation of any of the decoders to reduce +// your code footprint by #defining one or more of the following +// symbols before creating the implementation. +// +// STBI_NO_JPEG +// STBI_NO_PNG +// STBI_NO_BMP +// STBI_NO_PSD +// STBI_NO_TGA +// STBI_NO_GIF +// STBI_NO_HDR +// STBI_NO_PIC +// STBI_NO_PNM (.ppm and .pgm) +// +// - You can request *only* certain decoders and suppress all other ones +// (this will be more forward-compatible, as addition of new decoders +// doesn't require you to disable them explicitly): +// +// STBI_ONLY_JPEG +// STBI_ONLY_PNG +// STBI_ONLY_BMP +// STBI_ONLY_PSD +// STBI_ONLY_TGA +// STBI_ONLY_GIF +// STBI_ONLY_HDR +// STBI_ONLY_PIC +// STBI_ONLY_PNM (.ppm and .pgm) +// +// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still +// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB +// + + +#ifndef STBI_NO_STDIO +#include +#endif // STBI_NO_STDIO + +#define STBI_VERSION 1 + +enum +{ + STBI_default = 0, // only used for desired_channels + + STBI_grey = 1, + STBI_grey_alpha = 2, + STBI_rgb = 3, + STBI_rgb_alpha = 4 +}; + +typedef unsigned char stbi_uc; +typedef unsigned short stbi_us; + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef STB_IMAGE_STATIC +#define STBIDEF static +#else +#define STBIDEF extern +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// PRIMARY API - works on images of any type +// + +// +// load image by filename, open file, or memory buffer +// + +typedef struct +{ + int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read + void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative + int (*eof) (void *user); // returns nonzero if we are at end of file/data +} stbi_io_callbacks; + +//////////////////////////////////// +// +// 8-bits-per-channel interface +// + +STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels); +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +#endif + + +#ifndef STBI_NO_STDIO +STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +// for stbi_load_from_file, file pointer is left pointing immediately after image +#endif + +//////////////////////////////////// +// +// 16-bits-per-channel interface +// + +STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +#endif + +//////////////////////////////////// +// +// float-per-channel interface +// +#ifndef STBI_NO_LINEAR + STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + + #ifndef STBI_NO_STDIO + STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); + #endif +#endif + +#ifndef STBI_NO_HDR + STBIDEF void stbi_hdr_to_ldr_gamma(float gamma); + STBIDEF void stbi_hdr_to_ldr_scale(float scale); +#endif // STBI_NO_HDR + +#ifndef STBI_NO_LINEAR + STBIDEF void stbi_ldr_to_hdr_gamma(float gamma); + STBIDEF void stbi_ldr_to_hdr_scale(float scale); +#endif // STBI_NO_LINEAR + +// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user); +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename); +STBIDEF int stbi_is_hdr_from_file(FILE *f); +#endif // STBI_NO_STDIO + + +// get a VERY brief reason for failure +// NOT THREADSAFE +STBIDEF const char *stbi_failure_reason (void); + +// free the loaded image -- this is just free() +STBIDEF void stbi_image_free (void *retval_from_stbi_load); + +// get image dimensions & components without fully decoding +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len); +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user); + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit (char const *filename); +STBIDEF int stbi_is_16_bit_from_file(FILE *f); +#endif + + + +// for image formats that explicitly notate that they have premultiplied alpha, +// we just return the colors as stored in the file. set this flag to force +// unpremultiplication. results are undefined if the unpremultiply overflow. +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply); + +// indicate whether we should process iphone images back to canonical format, +// or just pass them through "as-is" +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); + +// flip the image vertically, so the first pixel in the output array is the bottom left +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip); + +// ZLIB client - used by PNG, available for other purposes + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header); +STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + + +#ifdef __cplusplus +} +#endif + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBI_INCLUDE_STB_IMAGE_H + +#ifdef STB_IMAGE_IMPLEMENTATION + +#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \ + || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \ + || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \ + || defined(STBI_ONLY_ZLIB) + #ifndef STBI_ONLY_JPEG + #define STBI_NO_JPEG + #endif + #ifndef STBI_ONLY_PNG + #define STBI_NO_PNG + #endif + #ifndef STBI_ONLY_BMP + #define STBI_NO_BMP + #endif + #ifndef STBI_ONLY_PSD + #define STBI_NO_PSD + #endif + #ifndef STBI_ONLY_TGA + #define STBI_NO_TGA + #endif + #ifndef STBI_ONLY_GIF + #define STBI_NO_GIF + #endif + #ifndef STBI_ONLY_HDR + #define STBI_NO_HDR + #endif + #ifndef STBI_ONLY_PIC + #define STBI_NO_PIC + #endif + #ifndef STBI_ONLY_PNM + #define STBI_NO_PNM + #endif +#endif + +#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB) +#define STBI_NO_ZLIB +#endif + + +#include +#include // ptrdiff_t on osx +#include +#include +#include + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +#include // ldexp, pow +#endif + +#ifndef STBI_NO_STDIO +#include +#endif + +#ifndef STBI_ASSERT +#include +#define STBI_ASSERT(x) assert(x) +#endif + + +#ifndef _MSC_VER + #ifdef __cplusplus + #define stbi_inline inline + #else + #define stbi_inline + #endif +#else + #define stbi_inline __forceinline +#endif + + +#ifdef _MSC_VER +typedef unsigned short stbi__uint16; +typedef signed short stbi__int16; +typedef unsigned int stbi__uint32; +typedef signed int stbi__int32; +#else +#include +typedef uint16_t stbi__uint16; +typedef int16_t stbi__int16; +typedef uint32_t stbi__uint32; +typedef int32_t stbi__int32; +#endif + +// should produce compiler error if size is wrong +typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; + +#ifdef _MSC_VER +#define STBI_NOTUSED(v) (void)(v) +#else +#define STBI_NOTUSED(v) (void)sizeof(v) +#endif + +#ifdef _MSC_VER +#define STBI_HAS_LROTL +#endif + +#ifdef STBI_HAS_LROTL + #define stbi_lrot(x,y) _lrotl(x,y) +#else + #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y)))) +#endif + +#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) +// ok +#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)." +#endif + +#ifndef STBI_MALLOC +#define STBI_MALLOC(sz) malloc(sz) +#define STBI_REALLOC(p,newsz) realloc(p,newsz) +#define STBI_FREE(p) free(p) +#endif + +#ifndef STBI_REALLOC_SIZED +#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz) +#endif + +// x86/x64 detection +#if defined(__x86_64__) || defined(_M_X64) +#define STBI__X64_TARGET +#elif defined(__i386) || defined(_M_IX86) +#define STBI__X86_TARGET +#endif + +#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) +// gcc doesn't support sse2 intrinsics unless you compile with -msse2, +// which in turn means it gets to use SSE2 everywhere. This is unfortunate, +// but previous attempts to provide the SSE2 functions with runtime +// detection caused numerous issues. The way architecture extensions are +// exposed in GCC/Clang is, sadly, not really suited for one-file libs. +// New behavior: if compiled with -msse2, we use SSE2 without any +// detection; if not, we don't use it at all. +#define STBI_NO_SIMD +#endif + +#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD) +// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET +// +// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the +// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant. +// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not +// simultaneously enabling "-mstackrealign". +// +// See https://github.com/nothings/stb/issues/81 for more information. +// +// So default to no SSE2 on 32-bit MinGW. If you've read this far and added +// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2. +#define STBI_NO_SIMD +#endif + +#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) +#define STBI_SSE2 +#include + +#ifdef _MSC_VER + +#if _MSC_VER >= 1400 // not VC6 +#include // __cpuid +static int stbi__cpuid3(void) +{ + int info[4]; + __cpuid(info,1); + return info[3]; +} +#else +static int stbi__cpuid3(void) +{ + int res; + __asm { + mov eax,1 + cpuid + mov res,edx + } + return res; +} +#endif + +#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name + +static int stbi__sse2_available(void) +{ + int info3 = stbi__cpuid3(); + return ((info3 >> 26) & 1) != 0; +} +#else // assume GCC-style if not VC++ +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) + +static int stbi__sse2_available(void) +{ + // If we're even attempting to compile this on GCC/Clang, that means + // -msse2 is on, which means the compiler is allowed to use SSE2 + // instructions at will, and so are we. + return 1; +} +#endif +#endif + +// ARM NEON +#if defined(STBI_NO_SIMD) && defined(STBI_NEON) +#undef STBI_NEON +#endif + +#ifdef STBI_NEON +#include +// assume GCC or Clang on ARM targets +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) +#endif + +#ifndef STBI_SIMD_ALIGN +#define STBI_SIMD_ALIGN(type, name) type name +#endif + +/////////////////////////////////////////////// +// +// stbi__context struct and start_xxx functions + +// stbi__context structure is our basic context used by all images, so it +// contains all the IO context, plus some basic image information +typedef struct +{ + stbi__uint32 img_x, img_y; + int img_n, img_out_n; + + stbi_io_callbacks io; + void *io_user_data; + + int read_from_callbacks; + int buflen; + stbi_uc buffer_start[128]; + + stbi_uc *img_buffer, *img_buffer_end; + stbi_uc *img_buffer_original, *img_buffer_original_end; +} stbi__context; + + +static void stbi__refill_buffer(stbi__context *s); + +// initialize a memory-decode context +static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len) +{ + s->io.read = NULL; + s->read_from_callbacks = 0; + s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer; + s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len; +} + +// initialize a callback-based context +static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user) +{ + s->io = *c; + s->io_user_data = user; + s->buflen = sizeof(s->buffer_start); + s->read_from_callbacks = 1; + s->img_buffer_original = s->buffer_start; + stbi__refill_buffer(s); + s->img_buffer_original_end = s->img_buffer_end; +} + +#ifndef STBI_NO_STDIO + +static int stbi__stdio_read(void *user, char *data, int size) +{ + return (int) fread(data,1,size,(FILE*) user); +} + +static void stbi__stdio_skip(void *user, int n) +{ + fseek((FILE*) user, n, SEEK_CUR); +} + +static int stbi__stdio_eof(void *user) +{ + return feof((FILE*) user); +} + +static stbi_io_callbacks stbi__stdio_callbacks = +{ + stbi__stdio_read, + stbi__stdio_skip, + stbi__stdio_eof, +}; + +static void stbi__start_file(stbi__context *s, FILE *f) +{ + stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f); +} + +//static void stop_file(stbi__context *s) { } + +#endif // !STBI_NO_STDIO + +static void stbi__rewind(stbi__context *s) +{ + // conceptually rewind SHOULD rewind to the beginning of the stream, + // but we just rewind to the beginning of the initial buffer, because + // we only use it after doing 'test', which only ever looks at at most 92 bytes + s->img_buffer = s->img_buffer_original; + s->img_buffer_end = s->img_buffer_original_end; +} + +enum +{ + STBI_ORDER_RGB, + STBI_ORDER_BGR +}; + +typedef struct +{ + int bits_per_channel; + int num_channels; + int channel_order; +} stbi__result_info; + +#ifndef STBI_NO_JPEG +static int stbi__jpeg_test(stbi__context *s); +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNG +static int stbi__png_test(stbi__context *s); +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__png_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_BMP +static int stbi__bmp_test(stbi__context *s); +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_TGA +static int stbi__tga_test(stbi__context *s); +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s); +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__psd_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_HDR +static int stbi__hdr_test(stbi__context *s); +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_test(stbi__context *s); +static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_GIF +static int stbi__gif_test(stbi__context *s); +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNM +static int stbi__pnm_test(stbi__context *s); +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +// this is not threadsafe +static const char *stbi__g_failure_reason; + +STBIDEF const char *stbi_failure_reason(void) +{ + return stbi__g_failure_reason; +} + +static int stbi__err(const char *str) +{ + stbi__g_failure_reason = str; + return 0; +} + +static void *stbi__malloc(size_t size) +{ + return STBI_MALLOC(size); +} + +// stb_image uses ints pervasively, including for offset calculations. +// therefore the largest decoded image size we can support with the +// current code, even on 64-bit targets, is INT_MAX. this is not a +// significant limitation for the intended use case. +// +// we do, however, need to make sure our size calculations don't +// overflow. hence a few helper functions for size calculations that +// multiply integers together, making sure that they're non-negative +// and no overflow occurs. + +// return 1 if the sum is valid, 0 on overflow. +// negative terms are considered invalid. +static int stbi__addsizes_valid(int a, int b) +{ + if (b < 0) return 0; + // now 0 <= b <= INT_MAX, hence also + // 0 <= INT_MAX - b <= INTMAX. + // And "a + b <= INT_MAX" (which might overflow) is the + // same as a <= INT_MAX - b (no overflow) + return a <= INT_MAX - b; +} + +// returns 1 if the product is valid, 0 on overflow. +// negative factors are considered invalid. +static int stbi__mul2sizes_valid(int a, int b) +{ + if (a < 0 || b < 0) return 0; + if (b == 0) return 1; // mul-by-0 is always safe + // portable way to check for no overflows in a*b + return a <= INT_MAX/b; +} + +// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow +static int stbi__mad2sizes_valid(int a, int b, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add); +} + +// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow +static int stbi__mad3sizes_valid(int a, int b, int c, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__addsizes_valid(a*b*c, add); +} + +// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); +} +#endif + +// mallocs with size overflow checking +static void *stbi__malloc_mad2(int a, int b, int add) +{ + if (!stbi__mad2sizes_valid(a, b, add)) return NULL; + return stbi__malloc(a*b + add); +} + +static void *stbi__malloc_mad3(int a, int b, int c, int add) +{ + if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL; + return stbi__malloc(a*b*c + add); +} + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static void *stbi__malloc_mad4(int a, int b, int c, int d, int add) +{ + if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL; + return stbi__malloc(a*b*c*d + add); +} +#endif + +// stbi__err - error +// stbi__errpf - error returning pointer to float +// stbi__errpuc - error returning pointer to unsigned char + +#ifdef STBI_NO_FAILURE_STRINGS + #define stbi__err(x,y) 0 +#elif defined(STBI_FAILURE_USERMSG) + #define stbi__err(x,y) stbi__err(y) +#else + #define stbi__err(x,y) stbi__err(x) +#endif + +#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL)) +#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL)) + +STBIDEF void stbi_image_free(void *retval_from_stbi_load) +{ + STBI_FREE(retval_from_stbi_load); +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp); +#endif + +#ifndef STBI_NO_HDR +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp); +#endif + +static int stbi__vertically_flip_on_load = 0; + +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) +{ + stbi__vertically_flip_on_load = flag_true_if_should_flip; +} + +static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields + ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed + ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order + ri->num_channels = 0; + + #ifndef STBI_NO_JPEG + if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNG + if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_BMP + if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_GIF + if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PSD + if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc); + #endif + #ifndef STBI_NO_PIC + if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNM + if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri); + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); + return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + + #ifndef STBI_NO_TGA + // test tga last because it's a crappy test! + if (stbi__tga_test(s)) + return stbi__tga_load(s,x,y,comp,req_comp, ri); + #endif + + return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); +} + +static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi_uc *reduced; + + reduced = (stbi_uc *) stbi__malloc(img_len); + if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling + + STBI_FREE(orig); + return reduced; +} + +static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi__uint16 *enlarged; + + enlarged = (stbi__uint16 *) stbi__malloc(img_len*2); + if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff + + STBI_FREE(orig); + return enlarged; +} + +static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel) +{ + int row; + size_t bytes_per_row = (size_t)w * bytes_per_pixel; + stbi_uc temp[2048]; + stbi_uc *bytes = (stbi_uc *)image; + + for (row = 0; row < (h>>1); row++) { + stbi_uc *row0 = bytes + row*bytes_per_row; + stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; + // swap row0 with row1 + size_t bytes_left = bytes_per_row; + while (bytes_left) { + size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); + memcpy(temp, row0, bytes_copy); + memcpy(row0, row1, bytes_copy); + memcpy(row1, temp, bytes_copy); + row0 += bytes_copy; + row1 += bytes_copy; + bytes_left -= bytes_copy; + } + } +} + +static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel) +{ + int slice; + int slice_size = w * h * bytes_per_pixel; + + stbi_uc *bytes = (stbi_uc *)image; + for (slice = 0; slice < z; ++slice) { + stbi__vertical_flip(bytes, w, h, bytes_per_pixel); + bytes += slice_size; + } +} + +static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); + + if (result == NULL) + return NULL; + + if (ri.bits_per_channel != 8) { + STBI_ASSERT(ri.bits_per_channel == 16); + result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 8; + } + + // @TODO: move stbi__convert_format to here + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); + } + + return (unsigned char *) result; +} + +static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); + + if (result == NULL) + return NULL; + + if (ri.bits_per_channel != 16) { + STBI_ASSERT(ri.bits_per_channel == 8); + result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 16; + } + + // @TODO: move stbi__convert_format16 to here + // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); + } + + return (stbi__uint16 *) result; +} + +#if !defined(STBI_NO_HDR) || !defined(STBI_NO_LINEAR) +static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp) +{ + if (stbi__vertically_flip_on_load && result != NULL) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); + } +} +#endif + +#ifndef STBI_NO_STDIO + +static FILE *stbi__fopen(char const *filename, char const *mode) +{ + FILE *f; +#if defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != fopen_s(&f, filename, mode)) + f=0; +#else + f = fopen(filename, mode); +#endif + return f; +} + + +STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + unsigned char *result; + if (!f) return stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__uint16 *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + stbi__uint16 *result; + if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file_16(f,x,y,comp,req_comp); + fclose(f); + return result; +} + + +#endif //!STBI_NO_STDIO + +STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_mem(&s,buffer,len); + + result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp); + if (stbi__vertically_flip_on_load) { + stbi__vertical_flip_slices( result, *x, *y, *z, *comp ); + } + + return result; +} +#endif + +#ifndef STBI_NO_LINEAR +static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + stbi__result_info ri; + float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); + if (hdr_data) + stbi__float_postprocess(hdr_data,x,y,comp,req_comp); + return hdr_data; + } + #endif + data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp); + if (data) + return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); +} + +STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + float *result; + FILE *f = stbi__fopen(filename, "rb"); + if (!f) return stbi__errpf("can't fopen", "Unable to open file"); + result = stbi_loadf_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_file(&s,f); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} +#endif // !STBI_NO_STDIO + +#endif // !STBI_NO_LINEAR + +// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is +// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always +// reports false! + +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(buffer); + STBI_NOTUSED(len); + return 0; + #endif +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result=0; + if (f) { + result = stbi_is_hdr_from_file(f); + fclose(f); + } + return result; +} + +STBIDEF int stbi_is_hdr_from_file(FILE *f) +{ + #ifndef STBI_NO_HDR + long pos = ftell(f); + int res; + stbi__context s; + stbi__start_file(&s,f); + res = stbi__hdr_test(&s); + fseek(f, pos, SEEK_SET); + return res; + #else + STBI_NOTUSED(f); + return 0; + #endif +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(clbk); + STBI_NOTUSED(user); + return 0; + #endif +} + +#ifndef STBI_NO_LINEAR +static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f; + +STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; } +STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; } +#endif + +static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f; + +STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; } +STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; } + + +////////////////////////////////////////////////////////////////////////////// +// +// Common code used by all image loaders +// + +enum +{ + STBI__SCAN_load=0, + STBI__SCAN_type, + STBI__SCAN_header +}; + +static void stbi__refill_buffer(stbi__context *s) +{ + int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen); + if (n == 0) { + // at end of file, treat same as if from memory, but need to handle case + // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file + s->read_from_callbacks = 0; + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start+1; + *s->img_buffer = 0; + } else { + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start + n; + } +} + +stbi_inline static stbi_uc stbi__get8(stbi__context *s) +{ + if (s->img_buffer < s->img_buffer_end) + return *s->img_buffer++; + if (s->read_from_callbacks) { + stbi__refill_buffer(s); + return *s->img_buffer++; + } + return 0; +} + +stbi_inline static int stbi__at_eof(stbi__context *s) +{ + if (s->io.read) { + if (!(s->io.eof)(s->io_user_data)) return 0; + // if feof() is true, check if buffer = end + // special case: we've only got the special 0 character at the end + if (s->read_from_callbacks == 0) return 1; + } + + return s->img_buffer >= s->img_buffer_end; +} + +static void stbi__skip(stbi__context *s, int n) +{ + if (n < 0) { + s->img_buffer = s->img_buffer_end; + return; + } + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + s->img_buffer = s->img_buffer_end; + (s->io.skip)(s->io_user_data, n - blen); + return; + } + } + s->img_buffer += n; +} + +static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) +{ + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + int res, count; + + memcpy(buffer, s->img_buffer, blen); + + count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen); + res = (count == (n-blen)); + s->img_buffer = s->img_buffer_end; + return res; + } + } + + if (s->img_buffer+n <= s->img_buffer_end) { + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; + return 1; + } else + return 0; +} + +static int stbi__get16be(stbi__context *s) +{ + int z = stbi__get8(s); + return (z << 8) + stbi__get8(s); +} + +static stbi__uint32 stbi__get32be(stbi__context *s) +{ + stbi__uint32 z = stbi__get16be(s); + return (z << 16) + stbi__get16be(s); +} + +#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) +// nothing +#else +static int stbi__get16le(stbi__context *s) +{ + int z = stbi__get8(s); + return z + (stbi__get8(s) << 8); +} +#endif + +#ifndef STBI_NO_BMP +static stbi__uint32 stbi__get32le(stbi__context *s) +{ + stbi__uint32 z = stbi__get16le(s); + return z + (stbi__get16le(s) << 16); +} +#endif + +#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings + + +////////////////////////////////////////////////////////////////////////////// +// +// generic converter from built-in img_n to req_comp +// individual types do this automatically as much as possible (e.g. jpeg +// does all cases internally since it needs to colorspace convert anyway, +// and it never has alpha, so very few cases ). png can automatically +// interleave an alpha=255 channel, but falls back to this for other cases +// +// assume data buffer is malloced, so malloc a new one and free that one +// only failure mode is malloc failing + +static stbi_uc stbi__compute_y(int r, int g, int b) +{ + return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + unsigned char *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0); + if (good == NULL) { + STBI_FREE(data); + return stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; + default: STBI_ASSERT(0); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} + +static stbi__uint16 stbi__compute_y_16(int r, int g, int b) +{ + return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + stbi__uint16 *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2); + if (good == NULL) { + STBI_FREE(data); + return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + stbi__uint16 *src = data + j * x * img_n ; + stbi__uint16 *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; + default: STBI_ASSERT(0); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) +{ + int i,k,n; + float *output; + if (!data) return NULL; + output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); + } + if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f; + } + STBI_FREE(data); + return output; +} +#endif + +#ifndef STBI_NO_HDR +#define stbi__float2int(x) ((int) (x)) +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) +{ + int i,k,n; + stbi_uc *output; + if (!data) return NULL; + output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + if (k < comp) { + float z = data[i*comp+k] * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + } + STBI_FREE(data); + return output; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// "baseline" JPEG/JFIF decoder +// +// simple implementation +// - doesn't support delayed output of y-dimension +// - simple interface (only one output format: 8-bit interleaved RGB) +// - doesn't try to recover corrupt jpegs +// - doesn't allow partial loading, loading multiple at once +// - still fast on x86 (copying globals into locals doesn't help x86) +// - allocates lots of intermediate memory (full size of all components) +// - non-interleaved case requires this anyway +// - allows good upsampling (see next) +// high-quality +// - upsampled channels are bilinearly interpolated, even across blocks +// - quality integer IDCT derived from IJG's 'slow' +// performance +// - fast huffman; reasonable integer IDCT +// - some SIMD kernels for common paths on targets with SSE2/NEON +// - uses a lot of intermediate memory, could cache poorly + +#ifndef STBI_NO_JPEG + +// huffman decoding acceleration +#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache + +typedef struct +{ + stbi_uc fast[1 << FAST_BITS]; + // weirdly, repacking this into AoS is a 10% speed loss, instead of a win + stbi__uint16 code[256]; + stbi_uc values[256]; + stbi_uc size[257]; + unsigned int maxcode[18]; + int delta[17]; // old 'firstsymbol' - old 'firstcode' +} stbi__huffman; + +typedef struct +{ + stbi__context *s; + stbi__huffman huff_dc[4]; + stbi__huffman huff_ac[4]; + stbi__uint16 dequant[4][64]; + stbi__int16 fast_ac[4][1 << FAST_BITS]; + +// sizes for components, interleaved MCUs + int img_h_max, img_v_max; + int img_mcu_x, img_mcu_y; + int img_mcu_w, img_mcu_h; + +// definition of jpeg image component + struct + { + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + stbi_uc *data; + void *raw_data, *raw_coeff; + stbi_uc *linebuf; + short *coeff; // progressive only + int coeff_w, coeff_h; // number of 8x8 coefficient blocks + } img_comp[4]; + + stbi__uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int progressive; + int spec_start; + int spec_end; + int succ_high; + int succ_low; + int eob_run; + int jfif; + int app14_color_transform; // Adobe APP14 tag + int rgb; + + int scan_n, order[4]; + int restart_interval, todo; + +// kernels + void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]); + void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step); + stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs); +} stbi__jpeg; + +static int stbi__build_huffman(stbi__huffman *h, int *count) +{ + int i,j,k=0; + unsigned int code; + // build size list for each symbol (from JPEG spec) + for (i=0; i < 16; ++i) + for (j=0; j < count[i]; ++j) + h->size[k++] = (stbi_uc) (i+1); + h->size[k] = 0; + + // compute actual symbols (from jpeg spec) + code = 0; + k = 0; + for(j=1; j <= 16; ++j) { + // compute delta to add to code to compute symbol id + h->delta[j] = k - code; + if (h->size[k] == j) { + while (h->size[k] == j) + h->code[k++] = (stbi__uint16) (code++); + if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG"); + } + // compute largest code + 1 for this size, preshifted as needed later + h->maxcode[j] = code << (16-j); + code <<= 1; + } + h->maxcode[j] = 0xffffffff; + + // build non-spec acceleration table; 255 is flag for not-accelerated + memset(h->fast, 255, 1 << FAST_BITS); + for (i=0; i < k; ++i) { + int s = h->size[i]; + if (s <= FAST_BITS) { + int c = h->code[i] << (FAST_BITS-s); + int m = 1 << (FAST_BITS-s); + for (j=0; j < m; ++j) { + h->fast[c+j] = (stbi_uc) i; + } + } + } + return 1; +} + +// build a table that decodes both magnitude and value of small ACs in +// one go. +static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) +{ + int i; + for (i=0; i < (1 << FAST_BITS); ++i) { + stbi_uc fast = h->fast[i]; + fast_ac[i] = 0; + if (fast < 255) { + int rs = h->values[fast]; + int run = (rs >> 4) & 15; + int magbits = rs & 15; + int len = h->size[fast]; + + if (magbits && len + magbits <= FAST_BITS) { + // magnitude code followed by receive_extend code + int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); + int m = 1 << (magbits - 1); + if (k < m) k += (~0U << magbits) + 1; + // if the result is small enough, we can fit it in fast_ac table + if (k >= -128 && k <= 127) + fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits)); + } + } + } +} + +static void stbi__grow_buffer_unsafe(stbi__jpeg *j) +{ + do { + unsigned int b = j->nomore ? 0 : stbi__get8(j->s); + if (b == 0xff) { + int c = stbi__get8(j->s); + while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes + if (c != 0) { + j->marker = (unsigned char) c; + j->nomore = 1; + return; + } + } + j->code_buffer |= b << (24 - j->code_bits); + j->code_bits += 8; + } while (j->code_bits <= 24); +} + +// (1 << n) - 1 +static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; + +// decode a jpeg huffman value from the bitstream +stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) +{ + unsigned int temp; + int c,k; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + // look at the top FAST_BITS and determine what symbol ID it is, + // if the code is <= FAST_BITS + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + k = h->fast[c]; + if (k < 255) { + int s = h->size[k]; + if (s > j->code_bits) + return -1; + j->code_buffer <<= s; + j->code_bits -= s; + return h->values[k]; + } + + // naive test is to shift the code_buffer down so k bits are + // valid, then test against maxcode. To speed this up, we've + // preshifted maxcode left so that it has (16-k) 0s at the + // end; in other words, regardless of the number of bits, it + // wants to be compared against something shifted to have 16; + // that way we don't need to shift inside the loop. + temp = j->code_buffer >> 16; + for (k=FAST_BITS+1 ; ; ++k) + if (temp < h->maxcode[k]) + break; + if (k == 17) { + // error! code not found + j->code_bits -= 16; + return -1; + } + + if (k > j->code_bits) + return -1; + + // convert the huffman code to the symbol id + c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; + STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]); + + // convert the id to a symbol + j->code_bits -= k; + j->code_buffer <<= k; + return h->values[c]; +} + +// bias[n] = (-1<code_bits < n) stbi__grow_buffer_unsafe(j); + + sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB + k = stbi_lrot(j->code_buffer, n); + STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask))); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k + (stbi__jbias[n] & ~sgn); +} + +// get some unsigned bits +stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n) +{ + unsigned int k; + if (j->code_bits < n) stbi__grow_buffer_unsafe(j); + k = stbi_lrot(j->code_buffer, n); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k; +} + +stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) +{ + unsigned int k; + if (j->code_bits < 1) stbi__grow_buffer_unsafe(j); + k = j->code_buffer; + j->code_buffer <<= 1; + --j->code_bits; + return k & 0x80000000; +} + +// given a value that's at position X in the zigzag stream, +// where does it appear in the 8x8 matrix coded as row-major? +static const stbi_uc stbi__jpeg_dezigzag[64+15] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // let corrupt input sample past end + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63 +}; + +// decode one 64-entry block-- +static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant) +{ + int diff,dc,k; + int t; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + t = stbi__jpeg_huff_decode(j, hdc); + if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + + // 0 all the ac values now so we can do it 32-bits at a time + memset(data,0,64*sizeof(data[0])); + + diff = t ? stbi__extend_receive(j, t) : 0; + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc * dequant[0]); + + // decode AC components, see JPEG spec + k = 1; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) * dequant[zig]); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (rs != 0xf0) break; // end block + k += 16; + } else { + k += r; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]); + } + } + } while (k < 64); + return 1; +} + +static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b) +{ + int diff,dc; + int t; + if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + if (j->succ_high == 0) { + // first scan for DC coefficient, must be first + memset(data,0,64*sizeof(data[0])); // 0 all the ac values now + t = stbi__jpeg_huff_decode(j, hdc); + diff = t ? stbi__extend_receive(j, t) : 0; + + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc << j->succ_low); + } else { + // refinement scan for DC coefficient + if (stbi__jpeg_get_bit(j)) + data[0] += (short) (1 << j->succ_low); + } + return 1; +} + +// @OPTIMIZE: store non-zigzagged during the decode passes, +// and only de-zigzag when dequantizing +static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac) +{ + int k; + if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->succ_high == 0) { + int shift = j->succ_low; + + if (j->eob_run) { + --j->eob_run; + return 1; + } + + k = j->spec_start; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) << shift); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r); + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + --j->eob_run; + break; + } + k += 16; + } else { + k += r; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) << shift); + } + } + } while (k <= j->spec_end); + } else { + // refinement scan for these AC coefficients + + short bit = (short) (1 << j->succ_low); + + if (j->eob_run) { + --j->eob_run; + for (k = j->spec_start; k <= j->spec_end; ++k) { + short *p = &data[stbi__jpeg_dezigzag[k]]; + if (*p != 0) + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } + } else { + k = j->spec_start; + do { + int r,s; + int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r) - 1; + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + r = 64; // force end of block + } else { + // r=15 s=0 should write 16 0s, so we just do + // a run of 15 0s and then write s (which is 0), + // so we don't have to do anything special here + } + } else { + if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG"); + // sign bit + if (stbi__jpeg_get_bit(j)) + s = bit; + else + s = -bit; + } + + // advance by r + while (k <= j->spec_end) { + short *p = &data[stbi__jpeg_dezigzag[k++]]; + if (*p != 0) { + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } else { + if (r == 0) { + *p = (short) s; + break; + } + --r; + } + } + } while (k <= j->spec_end); + } + } + return 1; +} + +// take a -128..127 value and stbi__clamp it and convert to 0..255 +stbi_inline static stbi_uc stbi__clamp(int x) +{ + // trick to use a single test to catch both cases + if ((unsigned int) x > 255) { + if (x < 0) return 0; + if (x > 255) return 255; + } + return (stbi_uc) x; +} + +#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5))) +#define stbi__fsh(x) ((x) * 4096) + +// derived from jidctint -- DCT_ISLOW +#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ + int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2+p3) * stbi__f2f(0.5411961f); \ + t2 = p1 + p3*stbi__f2f(-1.847759065f); \ + t3 = p1 + p2*stbi__f2f( 0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = stbi__fsh(p2+p3); \ + t1 = stbi__fsh(p2-p3); \ + x0 = t0+t3; \ + x3 = t0-t3; \ + x1 = t1+t2; \ + x2 = t1-t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0+t2; \ + p4 = t1+t3; \ + p1 = t0+t3; \ + p2 = t1+t2; \ + p5 = (p3+p4)*stbi__f2f( 1.175875602f); \ + t0 = t0*stbi__f2f( 0.298631336f); \ + t1 = t1*stbi__f2f( 2.053119869f); \ + t2 = t2*stbi__f2f( 3.072711026f); \ + t3 = t3*stbi__f2f( 1.501321110f); \ + p1 = p5 + p1*stbi__f2f(-0.899976223f); \ + p2 = p5 + p2*stbi__f2f(-2.562915447f); \ + p3 = p3*stbi__f2f(-1.961570560f); \ + p4 = p4*stbi__f2f(-0.390180644f); \ + t3 += p1+p4; \ + t2 += p2+p3; \ + t1 += p2+p4; \ + t0 += p1+p3; + +static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) +{ + int i,val[64],*v=val; + stbi_uc *o; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0]*4; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + x0 += 65536 + (128<<17); + x1 += 65536 + (128<<17); + x2 += 65536 + (128<<17); + x3 += 65536 + (128<<17); + // tried computing the shifts into temps, or'ing the temps to see + // if any were out of range, but that was slower + o[0] = stbi__clamp((x0+t3) >> 17); + o[7] = stbi__clamp((x0-t3) >> 17); + o[1] = stbi__clamp((x1+t2) >> 17); + o[6] = stbi__clamp((x1-t2) >> 17); + o[2] = stbi__clamp((x2+t1) >> 17); + o[5] = stbi__clamp((x2-t1) >> 17); + o[3] = stbi__clamp((x3+t0) >> 17); + o[4] = stbi__clamp((x3-t0) >> 17); + } +} + +#ifdef STBI_SSE2 +// sse2 integer IDCT. not the fastest possible implementation but it +// produces bit-identical results to the generic C version so it's +// fully "transparent". +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + // This is constructed to match our regular (generic) integer IDCT exactly. + __m128i row0, row1, row2, row3, row4, row5, row6, row7; + __m128i tmp; + + // dot product constant: even elems=x, odd elems=y + #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y)) + + // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit) + // out(1) = c1[even]*x + c1[odd]*y + #define dct_rot(out0,out1, x,y,c0,c1) \ + __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \ + __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \ + __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ + __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ + __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ + __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) + + // out = in << 12 (in 16-bit, out 32-bit) + #define dct_widen(out, in) \ + __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ + __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) + + // wide add + #define dct_wadd(out, a, b) \ + __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_add_epi32(a##_h, b##_h) + + // wide sub + #define dct_wsub(out, a, b) \ + __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) + + // butterfly a/b, add bias, then shift by "s" and pack + #define dct_bfly32o(out0, out1, a,b,bias,s) \ + { \ + __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ + __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ + dct_wadd(sum, abiased, b); \ + dct_wsub(dif, abiased, b); \ + out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ + out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ + } + + // 8-bit interleave step (for transposes) + #define dct_interleave8(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi8(a, b); \ + b = _mm_unpackhi_epi8(tmp, b) + + // 16-bit interleave step (for transposes) + #define dct_interleave16(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi16(a, b); \ + b = _mm_unpackhi_epi16(tmp, b) + + #define dct_pass(bias,shift) \ + { \ + /* even part */ \ + dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \ + __m128i sum04 = _mm_add_epi16(row0, row4); \ + __m128i dif04 = _mm_sub_epi16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ + dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \ + __m128i sum17 = _mm_add_epi16(row1, row7); \ + __m128i sum35 = _mm_add_epi16(row3, row5); \ + dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \ + dct_wadd(x4, y0o, y4o); \ + dct_wadd(x5, y1o, y5o); \ + dct_wadd(x6, y2o, y5o); \ + dct_wadd(x7, y3o, y4o); \ + dct_bfly32o(row0,row7, x0,x7,bias,shift); \ + dct_bfly32o(row1,row6, x1,x6,bias,shift); \ + dct_bfly32o(row2,row5, x2,x5,bias,shift); \ + dct_bfly32o(row3,row4, x3,x4,bias,shift); \ + } + + __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); + __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f)); + __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f)); + __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f)); + __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f)); + __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f)); + __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f)); + __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f)); + + // rounding biases in column/row passes, see stbi__idct_block for explanation. + __m128i bias_0 = _mm_set1_epi32(512); + __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17)); + + // load + row0 = _mm_load_si128((const __m128i *) (data + 0*8)); + row1 = _mm_load_si128((const __m128i *) (data + 1*8)); + row2 = _mm_load_si128((const __m128i *) (data + 2*8)); + row3 = _mm_load_si128((const __m128i *) (data + 3*8)); + row4 = _mm_load_si128((const __m128i *) (data + 4*8)); + row5 = _mm_load_si128((const __m128i *) (data + 5*8)); + row6 = _mm_load_si128((const __m128i *) (data + 6*8)); + row7 = _mm_load_si128((const __m128i *) (data + 7*8)); + + // column pass + dct_pass(bias_0, 10); + + { + // 16bit 8x8 transpose pass 1 + dct_interleave16(row0, row4); + dct_interleave16(row1, row5); + dct_interleave16(row2, row6); + dct_interleave16(row3, row7); + + // transpose pass 2 + dct_interleave16(row0, row2); + dct_interleave16(row1, row3); + dct_interleave16(row4, row6); + dct_interleave16(row5, row7); + + // transpose pass 3 + dct_interleave16(row0, row1); + dct_interleave16(row2, row3); + dct_interleave16(row4, row5); + dct_interleave16(row6, row7); + } + + // row pass + dct_pass(bias_1, 17); + + { + // pack + __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 + __m128i p1 = _mm_packus_epi16(row2, row3); + __m128i p2 = _mm_packus_epi16(row4, row5); + __m128i p3 = _mm_packus_epi16(row6, row7); + + // 8bit 8x8 transpose pass 1 + dct_interleave8(p0, p2); // a0e0a1e1... + dct_interleave8(p1, p3); // c0g0c1g1... + + // transpose pass 2 + dct_interleave8(p0, p1); // a0c0e0g0... + dct_interleave8(p2, p3); // b0d0f0h0... + + // transpose pass 3 + dct_interleave8(p0, p2); // a0b0c0d0... + dct_interleave8(p1, p3); // a4b4c4d4... + + // store + _mm_storel_epi64((__m128i *) out, p0); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p2); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p1); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p3); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e)); + } + +#undef dct_const +#undef dct_rot +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_interleave8 +#undef dct_interleave16 +#undef dct_pass +} + +#endif // STBI_SSE2 + +#ifdef STBI_NEON + +// NEON integer IDCT. should produce bit-identical +// results to the generic C version. +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + int16x8_t row0, row1, row2, row3, row4, row5, row6, row7; + + int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f)); + int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f)); + int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f)); + int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f)); + int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f)); + int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f)); + int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f)); + int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f)); + int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f)); + int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f)); + int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f)); + int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f)); + +#define dct_long_mul(out, inq, coeff) \ + int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff) + +#define dct_long_mac(out, acc, inq, coeff) \ + int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff) + +#define dct_widen(out, inq) \ + int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \ + int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12) + +// wide add +#define dct_wadd(out, a, b) \ + int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vaddq_s32(a##_h, b##_h) + +// wide sub +#define dct_wsub(out, a, b) \ + int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vsubq_s32(a##_h, b##_h) + +// butterfly a/b, then shift using "shiftop" by "s" and pack +#define dct_bfly32o(out0,out1, a,b,shiftop,s) \ + { \ + dct_wadd(sum, a, b); \ + dct_wsub(dif, a, b); \ + out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ + out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ + } + +#define dct_pass(shiftop, shift) \ + { \ + /* even part */ \ + int16x8_t sum26 = vaddq_s16(row2, row6); \ + dct_long_mul(p1e, sum26, rot0_0); \ + dct_long_mac(t2e, p1e, row6, rot0_1); \ + dct_long_mac(t3e, p1e, row2, rot0_2); \ + int16x8_t sum04 = vaddq_s16(row0, row4); \ + int16x8_t dif04 = vsubq_s16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + int16x8_t sum15 = vaddq_s16(row1, row5); \ + int16x8_t sum17 = vaddq_s16(row1, row7); \ + int16x8_t sum35 = vaddq_s16(row3, row5); \ + int16x8_t sum37 = vaddq_s16(row3, row7); \ + int16x8_t sumodd = vaddq_s16(sum17, sum35); \ + dct_long_mul(p5o, sumodd, rot1_0); \ + dct_long_mac(p1o, p5o, sum17, rot1_1); \ + dct_long_mac(p2o, p5o, sum35, rot1_2); \ + dct_long_mul(p3o, sum37, rot2_0); \ + dct_long_mul(p4o, sum15, rot2_1); \ + dct_wadd(sump13o, p1o, p3o); \ + dct_wadd(sump24o, p2o, p4o); \ + dct_wadd(sump23o, p2o, p3o); \ + dct_wadd(sump14o, p1o, p4o); \ + dct_long_mac(x4, sump13o, row7, rot3_0); \ + dct_long_mac(x5, sump24o, row5, rot3_1); \ + dct_long_mac(x6, sump23o, row3, rot3_2); \ + dct_long_mac(x7, sump14o, row1, rot3_3); \ + dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \ + dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \ + dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \ + dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \ + } + + // load + row0 = vld1q_s16(data + 0*8); + row1 = vld1q_s16(data + 1*8); + row2 = vld1q_s16(data + 2*8); + row3 = vld1q_s16(data + 3*8); + row4 = vld1q_s16(data + 4*8); + row5 = vld1q_s16(data + 5*8); + row6 = vld1q_s16(data + 6*8); + row7 = vld1q_s16(data + 7*8); + + // add DC bias + row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0)); + + // column pass + dct_pass(vrshrn_n_s32, 10); + + // 16bit 8x8 transpose + { +// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively. +// whether compilers actually get this is another story, sadly. +#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); } +#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); } + + // pass 1 + dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 + dct_trn16(row2, row3); + dct_trn16(row4, row5); + dct_trn16(row6, row7); + + // pass 2 + dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 + dct_trn32(row1, row3); + dct_trn32(row4, row6); + dct_trn32(row5, row7); + + // pass 3 + dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 + dct_trn64(row1, row5); + dct_trn64(row2, row6); + dct_trn64(row3, row7); + +#undef dct_trn16 +#undef dct_trn32 +#undef dct_trn64 + } + + // row pass + // vrshrn_n_s32 only supports shifts up to 16, we need + // 17. so do a non-rounding shift of 16 first then follow + // up with a rounding shift by 1. + dct_pass(vshrn_n_s32, 16); + + { + // pack and round + uint8x8_t p0 = vqrshrun_n_s16(row0, 1); + uint8x8_t p1 = vqrshrun_n_s16(row1, 1); + uint8x8_t p2 = vqrshrun_n_s16(row2, 1); + uint8x8_t p3 = vqrshrun_n_s16(row3, 1); + uint8x8_t p4 = vqrshrun_n_s16(row4, 1); + uint8x8_t p5 = vqrshrun_n_s16(row5, 1); + uint8x8_t p6 = vqrshrun_n_s16(row6, 1); + uint8x8_t p7 = vqrshrun_n_s16(row7, 1); + + // again, these can translate into one instruction, but often don't. +#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); } +#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); } + + // sadly can't use interleaved stores here since we only write + // 8 bytes to each scan line! + + // 8x8 8-bit transpose pass 1 + dct_trn8_8(p0, p1); + dct_trn8_8(p2, p3); + dct_trn8_8(p4, p5); + dct_trn8_8(p6, p7); + + // pass 2 + dct_trn8_16(p0, p2); + dct_trn8_16(p1, p3); + dct_trn8_16(p4, p6); + dct_trn8_16(p5, p7); + + // pass 3 + dct_trn8_32(p0, p4); + dct_trn8_32(p1, p5); + dct_trn8_32(p2, p6); + dct_trn8_32(p3, p7); + + // store + vst1_u8(out, p0); out += out_stride; + vst1_u8(out, p1); out += out_stride; + vst1_u8(out, p2); out += out_stride; + vst1_u8(out, p3); out += out_stride; + vst1_u8(out, p4); out += out_stride; + vst1_u8(out, p5); out += out_stride; + vst1_u8(out, p6); out += out_stride; + vst1_u8(out, p7); + +#undef dct_trn8_8 +#undef dct_trn8_16 +#undef dct_trn8_32 + } + +#undef dct_long_mul +#undef dct_long_mac +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_pass +} + +#endif // STBI_NEON + +#define STBI__MARKER_none 0xff +// if there's a pending marker from the entropy stream, return that +// otherwise, fetch from the stream and get a marker. if there's no +// marker, return 0xff, which is never a valid marker value +static stbi_uc stbi__get_marker(stbi__jpeg *j) +{ + stbi_uc x; + if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; } + x = stbi__get8(j->s); + if (x != 0xff) return STBI__MARKER_none; + while (x == 0xff) + x = stbi__get8(j->s); // consume repeated 0xff fill bytes + return x; +} + +// in each scan, we'll have scan_n components, and the order +// of the components is specified by order[] +#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) + +// after a restart interval, stbi__jpeg_reset the entropy decoder and +// the dc prediction +static void stbi__jpeg_reset(stbi__jpeg *j) +{ + j->code_bits = 0; + j->code_buffer = 0; + j->nomore = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0; + j->marker = STBI__MARKER_none; + j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; + j->eob_run = 0; + // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, + // since we don't even allow 1<<30 pixels +} + +static int stbi__parse_entropy_coded_data(stbi__jpeg *z) +{ + stbi__jpeg_reset(z); + if (!z->progressive) { + if (z->scan_n == 1) { + int i,j; + STBI_SIMD_ALIGN(short, data[64]); + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + STBI_SIMD_ALIGN(short, data[64]); + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data); + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } else { + if (z->scan_n == 1) { + int i,j; + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + if (z->spec_start == 0) { + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } else { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha])) + return 0; + } + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x); + int y2 = (j*z->img_comp[n].v + y); + short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w); + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } +} + +static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant) +{ + int i; + for (i=0; i < 64; ++i) + data[i] *= dequant[i]; +} + +static void stbi__jpeg_finish(stbi__jpeg *z) +{ + if (z->progressive) { + // dequantize and idct the data + int i,j,n; + for (n=0; n < z->s->img_n; ++n) { + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + } + } + } + } +} + +static int stbi__process_marker(stbi__jpeg *z, int m) +{ + int L; + switch (m) { + case STBI__MARKER_none: // no marker found + return stbi__err("expected marker","Corrupt JPEG"); + + case 0xDD: // DRI - specify restart interval + if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG"); + z->restart_interval = stbi__get16be(z->s); + return 1; + + case 0xDB: // DQT - define quantization table + L = stbi__get16be(z->s)-2; + while (L > 0) { + int q = stbi__get8(z->s); + int p = q >> 4, sixteen = (p != 0); + int t = q & 15,i; + if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); + if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); + + for (i=0; i < 64; ++i) + z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); + L -= (sixteen ? 129 : 65); + } + return L==0; + + case 0xC4: // DHT - define huffman table + L = stbi__get16be(z->s)-2; + while (L > 0) { + stbi_uc *v; + int sizes[16],i,n=0; + int q = stbi__get8(z->s); + int tc = q >> 4; + int th = q & 15; + if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG"); + for (i=0; i < 16; ++i) { + sizes[i] = stbi__get8(z->s); + n += sizes[i]; + } + L -= 17; + if (tc == 0) { + if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; + } else { + if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; + } + for (i=0; i < n; ++i) + v[i] = stbi__get8(z->s); + if (tc != 0) + stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th); + L -= n; + } + return L==0; + } + + // check for comment block or APP blocks + if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { + L = stbi__get16be(z->s); + if (L < 2) { + if (m == 0xFE) + return stbi__err("bad COM len","Corrupt JPEG"); + else + return stbi__err("bad APP len","Corrupt JPEG"); + } + L -= 2; + + if (m == 0xE0 && L >= 5) { // JFIF APP0 segment + static const unsigned char tag[5] = {'J','F','I','F','\0'}; + int ok = 1; + int i; + for (i=0; i < 5; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 5; + if (ok) + z->jfif = 1; + } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment + static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; + int ok = 1; + int i; + for (i=0; i < 6; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 6; + if (ok) { + stbi__get8(z->s); // version + stbi__get16be(z->s); // flags0 + stbi__get16be(z->s); // flags1 + z->app14_color_transform = stbi__get8(z->s); // color transform + L -= 6; + } + } + + stbi__skip(z->s, L); + return 1; + } + + return stbi__err("unknown marker","Corrupt JPEG"); +} + +// after we see SOS +static int stbi__process_scan_header(stbi__jpeg *z) +{ + int i; + int Ls = stbi__get16be(z->s); + z->scan_n = stbi__get8(z->s); + if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG"); + if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG"); + for (i=0; i < z->scan_n; ++i) { + int id = stbi__get8(z->s), which; + int q = stbi__get8(z->s); + for (which = 0; which < z->s->img_n; ++which) + if (z->img_comp[which].id == id) + break; + if (which == z->s->img_n) return 0; // no match + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG"); + z->order[i] = which; + } + + { + int aa; + z->spec_start = stbi__get8(z->s); + z->spec_end = stbi__get8(z->s); // should be 63, but might be 0 + aa = stbi__get8(z->s); + z->succ_high = (aa >> 4); + z->succ_low = (aa & 15); + if (z->progressive) { + if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13) + return stbi__err("bad SOS", "Corrupt JPEG"); + } else { + if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG"); + if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG"); + z->spec_end = 63; + } + } + + return 1; +} + +static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why) +{ + int i; + for (i=0; i < ncomp; ++i) { + if (z->img_comp[i].raw_data) { + STBI_FREE(z->img_comp[i].raw_data); + z->img_comp[i].raw_data = NULL; + z->img_comp[i].data = NULL; + } + if (z->img_comp[i].raw_coeff) { + STBI_FREE(z->img_comp[i].raw_coeff); + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].coeff = 0; + } + if (z->img_comp[i].linebuf) { + STBI_FREE(z->img_comp[i].linebuf); + z->img_comp[i].linebuf = NULL; + } + } + return why; +} + +static int stbi__process_frame_header(stbi__jpeg *z, int scan) +{ + stbi__context *s = z->s; + int Lf,p,i,q, h_max=1,v_max=1,c; + Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG + p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires + c = stbi__get8(s); + if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG"); + s->img_n = c; + for (i=0; i < c; ++i) { + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; + } + + if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG"); + + z->rgb = 0; + for (i=0; i < s->img_n; ++i) { + static const unsigned char rgb[3] = { 'R', 'G', 'B' }; + z->img_comp[i].id = stbi__get8(s); + if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) + ++z->rgb; + q = stbi__get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); + z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG"); + } + + if (scan != STBI__SCAN_load) return 1; + + if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode"); + + for (i=0; i < s->img_n; ++i) { + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; + } + + // compute interleaved mcu info + z->img_h_max = h_max; + z->img_v_max = v_max; + z->img_mcu_w = h_max * 8; + z->img_mcu_h = v_max * 8; + // these sizes can't be more than 17 bits + z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; + z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; + + for (i=0; i < s->img_n; ++i) { + // number of effective pixels (e.g. for non-interleaved MCU) + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; + // to simplify generation, we'll allocate enough memory to decode + // the bogus oversized data from using interleaved MCUs and their + // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't + // discard the extra data until colorspace conversion + // + // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) + // so these muls can't overflow with 32-bit ints (which we require) + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].coeff = 0; + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].linebuf = NULL; + z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); + if (z->img_comp[i].raw_data == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + // align blocks for idct using mmx/sse + z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + if (z->progressive) { + // w2, h2 are multiples of 8 (see above) + z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; + z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; + z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); + if (z->img_comp[i].raw_coeff == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); + } + } + + return 1; +} + +// use comparisons since in some cases we handle more than one case (e.g. SOF) +#define stbi__DNL(x) ((x) == 0xdc) +#define stbi__SOI(x) ((x) == 0xd8) +#define stbi__EOI(x) ((x) == 0xd9) +#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2) +#define stbi__SOS(x) ((x) == 0xda) + +#define stbi__SOF_progressive(x) ((x) == 0xc2) + +static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) +{ + int m; + z->jfif = 0; + z->app14_color_transform = -1; // valid values are 0,1,2 + z->marker = STBI__MARKER_none; // initialize cached marker to empty + m = stbi__get_marker(z); + if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG"); + if (scan == STBI__SCAN_type) return 1; + m = stbi__get_marker(z); + while (!stbi__SOF(m)) { + if (!stbi__process_marker(z,m)) return 0; + m = stbi__get_marker(z); + while (m == STBI__MARKER_none) { + // some files have extra padding after their blocks, so ok, we'll scan + if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG"); + m = stbi__get_marker(z); + } + } + z->progressive = stbi__SOF_progressive(m); + if (!stbi__process_frame_header(z, scan)) return 0; + return 1; +} + +// decode image to YCbCr format +static int stbi__decode_jpeg_image(stbi__jpeg *j) +{ + int m; + for (m = 0; m < 4; m++) { + j->img_comp[m].raw_data = NULL; + j->img_comp[m].raw_coeff = NULL; + } + j->restart_interval = 0; + if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0; + m = stbi__get_marker(j); + while (!stbi__EOI(m)) { + if (stbi__SOS(m)) { + if (!stbi__process_scan_header(j)) return 0; + if (!stbi__parse_entropy_coded_data(j)) return 0; + if (j->marker == STBI__MARKER_none ) { + // handle 0s at the end of image data from IP Kamera 9060 + while (!stbi__at_eof(j->s)) { + int x = stbi__get8(j->s); + if (x == 255) { + j->marker = stbi__get8(j->s); + break; + } + } + // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 + } + } else if (stbi__DNL(m)) { + int Ld = stbi__get16be(j->s); + stbi__uint32 NL = stbi__get16be(j->s); + if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); + if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); + } else { + if (!stbi__process_marker(j, m)) return 0; + } + m = stbi__get_marker(j); + } + if (j->progressive) + stbi__jpeg_finish(j); + return 1; +} + +// static jfif-centered resampling (across block boundaries) + +typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1, + int w, int hs); + +#define stbi__div4(x) ((stbi_uc) ((x) >> 2)) + +static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + STBI_NOTUSED(out); + STBI_NOTUSED(in_far); + STBI_NOTUSED(w); + STBI_NOTUSED(hs); + return in_near; +} + +static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples vertically for every one in input + int i; + STBI_NOTUSED(hs); + for (i=0; i < w; ++i) + out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2); + return out; +} + +static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples horizontally for every one in input + int i; + stbi_uc *input = in_near; + + if (w == 1) { + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; + } + + out[0] = input[0]; + out[1] = stbi__div4(input[0]*3 + input[1] + 2); + for (i=1; i < w-1; ++i) { + int n = 3*input[i]+2; + out[i*2+0] = stbi__div4(n+input[i-1]); + out[i*2+1] = stbi__div4(n+input[i+1]); + } + out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2); + out[i*2+1] = input[w-1]; + + STBI_NOTUSED(in_far); + STBI_NOTUSED(hs); + + return out; +} + +#define stbi__div16(x) ((stbi_uc) ((x) >> 4)) + +static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i,t0,t1; + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + out[0] = stbi__div4(t1+2); + for (i=1; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i=0,t0,t1; + + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + // process groups of 8 pixels for as long as we can. + // note we can't handle the last pixel in a row in this loop + // because we need to handle the filter boundary conditions. + for (; i < ((w-1) & ~7); i += 8) { +#if defined(STBI_SSE2) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + __m128i zero = _mm_setzero_si128(); + __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i)); + __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i)); + __m128i farw = _mm_unpacklo_epi8(farb, zero); + __m128i nearw = _mm_unpacklo_epi8(nearb, zero); + __m128i diff = _mm_sub_epi16(farw, nearw); + __m128i nears = _mm_slli_epi16(nearw, 2); + __m128i curr = _mm_add_epi16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + __m128i prv0 = _mm_slli_si128(curr, 2); + __m128i nxt0 = _mm_srli_si128(curr, 2); + __m128i prev = _mm_insert_epi16(prv0, t1, 0); + __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + __m128i bias = _mm_set1_epi16(8); + __m128i curs = _mm_slli_epi16(curr, 2); + __m128i prvd = _mm_sub_epi16(prev, curr); + __m128i nxtd = _mm_sub_epi16(next, curr); + __m128i curb = _mm_add_epi16(curs, bias); + __m128i even = _mm_add_epi16(prvd, curb); + __m128i odd = _mm_add_epi16(nxtd, curb); + + // interleave even and odd pixels, then undo scaling. + __m128i int0 = _mm_unpacklo_epi16(even, odd); + __m128i int1 = _mm_unpackhi_epi16(even, odd); + __m128i de0 = _mm_srli_epi16(int0, 4); + __m128i de1 = _mm_srli_epi16(int1, 4); + + // pack and write output + __m128i outv = _mm_packus_epi16(de0, de1); + _mm_storeu_si128((__m128i *) (out + i*2), outv); +#elif defined(STBI_NEON) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + uint8x8_t farb = vld1_u8(in_far + i); + uint8x8_t nearb = vld1_u8(in_near + i); + int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb)); + int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2)); + int16x8_t curr = vaddq_s16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + int16x8_t prv0 = vextq_s16(curr, curr, 7); + int16x8_t nxt0 = vextq_s16(curr, curr, 1); + int16x8_t prev = vsetq_lane_s16(t1, prv0, 0); + int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + int16x8_t curs = vshlq_n_s16(curr, 2); + int16x8_t prvd = vsubq_s16(prev, curr); + int16x8_t nxtd = vsubq_s16(next, curr); + int16x8_t even = vaddq_s16(curs, prvd); + int16x8_t odd = vaddq_s16(curs, nxtd); + + // undo scaling and round, then store with even/odd phases interleaved + uint8x8x2_t o; + o.val[0] = vqrshrun_n_s16(even, 4); + o.val[1] = vqrshrun_n_s16(odd, 4); + vst2_u8(out + i*2, o); +#endif + + // "previous" value for next iter + t1 = 3*in_near[i+7] + in_far[i+7]; + } + + t0 = t1; + t1 = 3*in_near[i] + in_far[i]; + out[i*2] = stbi__div16(3*t1 + t0 + 8); + + for (++i; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} +#endif + +static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // resample with nearest-neighbor + int i,j; + STBI_NOTUSED(in_far); + for (i=0; i < w; ++i) + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; + return out; +} + +// this is a reduced-precision calculation of YCbCr-to-RGB introduced +// to make sure the code produces the same results in both SIMD and scalar +#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8) +static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step) +{ + int i; + for (i=0; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step) +{ + int i = 0; + +#ifdef STBI_SSE2 + // step == 3 is pretty ugly on the final interleave, and i'm not convinced + // it's useful in practice (you wouldn't use it for textures, for example). + // so just accelerate step == 4 case. + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + __m128i signflip = _mm_set1_epi8(-0x80); + __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f)); + __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); + __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); + __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f)); + __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128); + __m128i xw = _mm_set1_epi16(255); // alpha channel + + for (; i+7 < count; i += 8) { + // load + __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i)); + __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i)); + __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i)); + __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 + __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 + + // unpack to short (and left-shift cr, cb by 8) + __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes); + __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); + __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); + + // color transform + __m128i yws = _mm_srli_epi16(yw, 4); + __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); + __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); + __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); + __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); + __m128i rws = _mm_add_epi16(cr0, yws); + __m128i gwt = _mm_add_epi16(cb0, yws); + __m128i bws = _mm_add_epi16(yws, cb1); + __m128i gws = _mm_add_epi16(gwt, cr1); + + // descale + __m128i rw = _mm_srai_epi16(rws, 4); + __m128i bw = _mm_srai_epi16(bws, 4); + __m128i gw = _mm_srai_epi16(gws, 4); + + // back to byte, set up for transpose + __m128i brb = _mm_packus_epi16(rw, bw); + __m128i gxb = _mm_packus_epi16(gw, xw); + + // transpose to interleave channels + __m128i t0 = _mm_unpacklo_epi8(brb, gxb); + __m128i t1 = _mm_unpackhi_epi8(brb, gxb); + __m128i o0 = _mm_unpacklo_epi16(t0, t1); + __m128i o1 = _mm_unpackhi_epi16(t0, t1); + + // store + _mm_storeu_si128((__m128i *) (out + 0), o0); + _mm_storeu_si128((__m128i *) (out + 16), o1); + out += 32; + } + } +#endif + +#ifdef STBI_NEON + // in this version, step=3 support would be easy to add. but is there demand? + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + uint8x8_t signflip = vdup_n_u8(0x80); + int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f)); + int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f)); + int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f)); + int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f)); + + for (; i+7 < count; i += 8) { + // load + uint8x8_t y_bytes = vld1_u8(y + i); + uint8x8_t cr_bytes = vld1_u8(pcr + i); + uint8x8_t cb_bytes = vld1_u8(pcb + i); + int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); + int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); + + // expand to s16 + int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); + int16x8_t crw = vshll_n_s8(cr_biased, 7); + int16x8_t cbw = vshll_n_s8(cb_biased, 7); + + // color transform + int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); + int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); + int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); + int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); + int16x8_t rws = vaddq_s16(yws, cr0); + int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); + int16x8_t bws = vaddq_s16(yws, cb1); + + // undo scaling, round, convert to byte + uint8x8x4_t o; + o.val[0] = vqrshrun_n_s16(rws, 4); + o.val[1] = vqrshrun_n_s16(gws, 4); + o.val[2] = vqrshrun_n_s16(bws, 4); + o.val[3] = vdup_n_u8(255); + + // store, interleaving r/g/b/a + vst4_u8(out, o); + out += 8*4; + } + } +#endif + + for (; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} +#endif + +// set up the kernels +static void stbi__setup_jpeg(stbi__jpeg *j) +{ + j->idct_block_kernel = stbi__idct_block; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2; + +#ifdef STBI_SSE2 + if (stbi__sse2_available()) { + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; + } +#endif + +#ifdef STBI_NEON + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; +#endif +} + +// clean up the temporary component buffers +static void stbi__cleanup_jpeg(stbi__jpeg *j) +{ + stbi__free_jpeg_components(j, j->s->img_n, 0); +} + +typedef struct +{ + resample_row_func resample; + stbi_uc *line0,*line1; + int hs,vs; // expansion factor in each axis + int w_lores; // horizontal pixels pre-expansion + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on +} stbi__resample; + +// fast 0..255 * 0..255 => 0..255 rounded multiplication +static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) +{ + unsigned int t = x*y + 128; + return (stbi_uc) ((t + (t >>8)) >> 8); +} + +static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ + int n, decode_n, is_rgb; + z->s->img_n = 0; // make stbi__cleanup_jpeg safe + + // validate req_comp + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + + // load a jpeg image from whichever source, but leave in YCbCr format + if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; } + + // determine actual number of components to generate + n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1; + + is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif)); + + if (z->s->img_n == 3 && n < 3 && !is_rgb) + decode_n = 1; + else + decode_n = z->s->img_n; + + // resample and color-convert + { + int k; + unsigned int i,j; + stbi_uc *output; + stbi_uc *coutput[4]; + + stbi__resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3); + if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s->img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel; + else r->resample = stbi__resample_row_generic; + } + + // can't error after this so, this is safe + output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); + if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s->img_y; ++j) { + stbi_uc *out = output + n * z->s->img_x * j; + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; + } + } + if (n >= 3) { + stbi_uc *y = coutput[0]; + if (z->s->img_n == 3) { + if (is_rgb) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = y[i]; + out[1] = coutput[1][i]; + out[2] = coutput[2][i]; + out[3] = 255; + out += n; + } + } else { + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else if (z->s->img_n == 4) { + if (z->app14_color_transform == 0) { // CMYK + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(coutput[0][i], m); + out[1] = stbi__blinn_8x8(coutput[1][i], m); + out[2] = stbi__blinn_8x8(coutput[2][i], m); + out[3] = 255; + out += n; + } + } else if (z->app14_color_transform == 2) { // YCCK + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(255 - out[0], m); + out[1] = stbi__blinn_8x8(255 - out[1], m); + out[2] = stbi__blinn_8x8(255 - out[2], m); + out += n; + } + } else { // YCbCr + alpha? Ignore the fourth channel for now + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else + for (i=0; i < z->s->img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + if (is_rgb) { + if (n == 1) + for (i=0; i < z->s->img_x; ++i) + *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + else { + for (i=0; i < z->s->img_x; ++i, out += 2) { + out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + out[1] = 255; + } + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); + stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); + stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); + out[0] = stbi__compute_y(r, g, b); + out[1] = 255; + out += n; + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); + out[1] = 255; + out += n; + } + } else { + stbi_uc *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255; + } + } + } + stbi__cleanup_jpeg(z); + *out_x = z->s->img_x; + *out_y = z->s->img_y; + if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output + return output; + } +} + +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + unsigned char* result; + stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg)); + STBI_NOTUSED(ri); + j->s = s; + stbi__setup_jpeg(j); + result = load_jpeg_image(j, x,y,comp,req_comp); + STBI_FREE(j); + return result; +} + +static int stbi__jpeg_test(stbi__context *s) +{ + int r; + stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg)); + j->s = s; + stbi__setup_jpeg(j); + r = stbi__decode_jpeg_header(j, STBI__SCAN_type); + stbi__rewind(s); + STBI_FREE(j); + return r; +} + +static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) +{ + if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) { + stbi__rewind( j->s ); + return 0; + } + if (x) *x = j->s->img_x; + if (y) *y = j->s->img_y; + if (comp) *comp = j->s->img_n >= 3 ? 3 : 1; + return 1; +} + +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) +{ + int result; + stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg))); + j->s = s; + result = stbi__jpeg_info_raw(j, x, y, comp); + STBI_FREE(j); + return result; +} +#endif + +// public domain zlib decode v0.2 Sean Barrett 2006-11-18 +// simple implementation +// - all input must be provided in an upfront buffer +// - all output is written to a single output buffer (can malloc/realloc) +// performance +// - fast huffman + +#ifndef STBI_NO_ZLIB + +// fast-way is faster to check than jpeg huffman, but slow way is slower +#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables +#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1) + +// zlib-style huffman encoding +// (jpegs packs from left, zlib from right, so can't share code) +typedef struct +{ + stbi__uint16 fast[1 << STBI__ZFAST_BITS]; + stbi__uint16 firstcode[16]; + int maxcode[17]; + stbi__uint16 firstsymbol[16]; + stbi_uc size[288]; + stbi__uint16 value[288]; +} stbi__zhuffman; + +stbi_inline static int stbi__bitreverse16(int n) +{ + n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); + n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); + n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); + n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); + return n; +} + +stbi_inline static int stbi__bit_reverse(int v, int bits) +{ + STBI_ASSERT(bits <= 16); + // to bit reverse n bits, reverse 16 and shift + // e.g. 11 bits, bit reverse and shift away 5 + return stbi__bitreverse16(v) >> (16-bits); +} + +static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num) +{ + int i,k=0; + int code, next_code[16], sizes[17]; + + // DEFLATE spec for generating codes + memset(sizes, 0, sizeof(sizes)); + memset(z->fast, 0, sizeof(z->fast)); + for (i=0; i < num; ++i) + ++sizes[sizelist[i]]; + sizes[0] = 0; + for (i=1; i < 16; ++i) + if (sizes[i] > (1 << i)) + return stbi__err("bad sizes", "Corrupt PNG"); + code = 0; + for (i=1; i < 16; ++i) { + next_code[i] = code; + z->firstcode[i] = (stbi__uint16) code; + z->firstsymbol[i] = (stbi__uint16) k; + code = (code + sizes[i]); + if (sizes[i]) + if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG"); + z->maxcode[i] = code << (16-i); // preshift for inner loop + code <<= 1; + k += sizes[i]; + } + z->maxcode[16] = 0x10000; // sentinel + for (i=0; i < num; ++i) { + int s = sizelist[i]; + if (s) { + int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); + z->size [c] = (stbi_uc ) s; + z->value[c] = (stbi__uint16) i; + if (s <= STBI__ZFAST_BITS) { + int j = stbi__bit_reverse(next_code[s],s); + while (j < (1 << STBI__ZFAST_BITS)) { + z->fast[j] = fastv; + j += (1 << s); + } + } + ++next_code[s]; + } + } + return 1; +} + +// zlib-from-memory implementation for PNG reading +// because PNG allows splitting the zlib stream arbitrarily, +// and it's annoying structurally to have PNG call ZLIB call PNG, +// we require PNG read all the IDATs and combine them into a single +// memory buffer + +typedef struct +{ + stbi_uc *zbuffer, *zbuffer_end; + int num_bits; + stbi__uint32 code_buffer; + + char *zout; + char *zout_start; + char *zout_end; + int z_expandable; + + stbi__zhuffman z_length, z_distance; +} stbi__zbuf; + +stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z) +{ + if (z->zbuffer >= z->zbuffer_end) return 0; + return *z->zbuffer++; +} + +static void stbi__fill_bits(stbi__zbuf *z) +{ + do { + STBI_ASSERT(z->code_buffer < (1U << z->num_bits)); + z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; + z->num_bits += 8; + } while (z->num_bits <= 24); +} + +stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n) +{ + unsigned int k; + if (z->num_bits < n) stbi__fill_bits(z); + k = z->code_buffer & ((1 << n) - 1); + z->code_buffer >>= n; + z->num_bits -= n; + return k; +} + +static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s,k; + // not resolved by fast table, so compute it the slow way + // use jpeg approach, which requires MSbits at top + k = stbi__bit_reverse(a->code_buffer, 16); + for (s=STBI__ZFAST_BITS+1; ; ++s) + if (k < z->maxcode[s]) + break; + if (s == 16) return -1; // invalid code! + // code size is s, so: + b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; + STBI_ASSERT(z->size[b] == s); + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; +} + +stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s; + if (a->num_bits < 16) stbi__fill_bits(a); + b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; + if (b) { + s = b >> 9; + a->code_buffer >>= s; + a->num_bits -= s; + return b & 511; + } + return stbi__zhuffman_decode_slowpath(a, z); +} + +static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes +{ + char *q; + int cur, limit, old_limit; + z->zout = zout; + if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); + cur = (int) (z->zout - z->zout_start); + limit = old_limit = (int) (z->zout_end - z->zout_start); + while (cur + n > limit) + limit *= 2; + q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); + STBI_NOTUSED(old_limit); + if (q == NULL) return stbi__err("outofmem", "Out of memory"); + z->zout_start = q; + z->zout = q + cur; + z->zout_end = q + limit; + return 1; +} + +static const int stbi__zlength_base[31] = { + 3,4,5,6,7,8,9,10,11,13, + 15,17,19,23,27,31,35,43,51,59, + 67,83,99,115,131,163,195,227,258,0,0 }; + +static const int stbi__zlength_extra[31]= +{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + +static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + +static const int stbi__zdist_extra[32] = +{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int stbi__parse_huffman_block(stbi__zbuf *a) +{ + char *zout = a->zout; + for(;;) { + int z = stbi__zhuffman_decode(a, &a->z_length); + if (z < 256) { + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes + if (zout >= a->zout_end) { + if (!stbi__zexpand(a, zout, 1)) return 0; + zout = a->zout; + } + *zout++ = (char) z; + } else { + stbi_uc *p; + int len,dist; + if (z == 256) { + a->zout = zout; + return 1; + } + z -= 257; + len = stbi__zlength_base[z]; + if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); + z = stbi__zhuffman_decode(a, &a->z_distance); + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); + dist = stbi__zdist_base[z]; + if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); + if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); + if (zout + len > a->zout_end) { + if (!stbi__zexpand(a, zout, len)) return 0; + zout = a->zout; + } + p = (stbi_uc *) (zout - dist); + if (dist == 1) { // run of one byte; common in images. + stbi_uc v = *p; + if (len) { do *zout++ = v; while (--len); } + } else { + if (len) { do *zout++ = *p++; while (--len); } + } + } + } +} + +static int stbi__compute_huffman_codes(stbi__zbuf *a) +{ + static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + stbi__zhuffman z_codelength; + stbi_uc lencodes[286+32+137];//padding for maximum single op + stbi_uc codelength_sizes[19]; + int i,n; + + int hlit = stbi__zreceive(a,5) + 257; + int hdist = stbi__zreceive(a,5) + 1; + int hclen = stbi__zreceive(a,4) + 4; + int ntot = hlit + hdist; + + memset(codelength_sizes, 0, sizeof(codelength_sizes)); + for (i=0; i < hclen; ++i) { + int s = stbi__zreceive(a,3); + codelength_sizes[length_dezigzag[i]] = (stbi_uc) s; + } + if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; + + n = 0; + while (n < ntot) { + int c = stbi__zhuffman_decode(a, &z_codelength); + if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); + if (c < 16) + lencodes[n++] = (stbi_uc) c; + else { + stbi_uc fill = 0; + if (c == 16) { + c = stbi__zreceive(a,2)+3; + if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); + fill = lencodes[n-1]; + } else if (c == 17) + c = stbi__zreceive(a,3)+3; + else { + STBI_ASSERT(c == 18); + c = stbi__zreceive(a,7)+11; + } + if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); + memset(lencodes+n, fill, c); + n += c; + } + } + if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG"); + if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; + return 1; +} + +static int stbi__parse_uncompressed_block(stbi__zbuf *a) +{ + stbi_uc header[4]; + int len,nlen,k; + if (a->num_bits & 7) + stbi__zreceive(a, a->num_bits & 7); // discard + // drain the bit-packed data into header + k = 0; + while (a->num_bits > 0) { + header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check + a->code_buffer >>= 8; + a->num_bits -= 8; + } + STBI_ASSERT(a->num_bits == 0); + // now fill header the normal way + while (k < 4) + header[k++] = stbi__zget8(a); + len = header[1] * 256 + header[0]; + nlen = header[3] * 256 + header[2]; + if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG"); + if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG"); + if (a->zout + len > a->zout_end) + if (!stbi__zexpand(a, a->zout, len)) return 0; + memcpy(a->zout, a->zbuffer, len); + a->zbuffer += len; + a->zout += len; + return 1; +} + +static int stbi__parse_zlib_header(stbi__zbuf *a) +{ + int cmf = stbi__zget8(a); + int cm = cmf & 15; + /* int cinfo = cmf >> 4; */ + int flg = stbi__zget8(a); + if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec + if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png + if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png + // window = 1 << (8 + cinfo)... but who cares, we fully buffer output + return 1; +} + +static const stbi_uc stbi__zdefault_length[288] = +{ + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8 +}; +static const stbi_uc stbi__zdefault_distance[32] = +{ + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 +}; +/* +Init algorithm: +{ + int i; // use <= to match clearly with spec + for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8; + for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9; + for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7; + for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8; + + for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5; +} +*/ + +static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) +{ + int final, type; + if (parse_header) + if (!stbi__parse_zlib_header(a)) return 0; + a->num_bits = 0; + a->code_buffer = 0; + do { + final = stbi__zreceive(a,1); + type = stbi__zreceive(a,2); + if (type == 0) { + if (!stbi__parse_uncompressed_block(a)) return 0; + } else if (type == 3) { + return 0; + } else { + if (type == 1) { + // use fixed code lengths + if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; + } else { + if (!stbi__compute_huffman_codes(a)) return 0; + } + if (!stbi__parse_huffman_block(a)) return 0; + } + } while (!final); + return 1; +} + +static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header) +{ + a->zout_start = obuf; + a->zout = obuf; + a->zout_end = obuf + olen; + a->z_expandable = exp; + + return stbi__parse_zlib(a, parse_header); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, 1)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) +{ + return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 1)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(16384); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer+len; + if (stbi__do_zlib(&a, p, 16384, 1, 0)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 0)) + return (int) (a.zout - a.zout_start); + else + return -1; +} +#endif + +// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 +// simple implementation +// - only 8-bit samples +// - no CRC checking +// - allocates lots of intermediate memory +// - avoids problem of streaming data between subsystems +// - avoids explicit window management +// performance +// - uses stb_zlib, a PD zlib implementation with fast huffman decoding + +#ifndef STBI_NO_PNG +typedef struct +{ + stbi__uint32 length; + stbi__uint32 type; +} stbi__pngchunk; + +static stbi__pngchunk stbi__get_chunk_header(stbi__context *s) +{ + stbi__pngchunk c; + c.length = stbi__get32be(s); + c.type = stbi__get32be(s); + return c; +} + +static int stbi__check_png_header(stbi__context *s) +{ + static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; + int i; + for (i=0; i < 8; ++i) + if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); + return 1; +} + +typedef struct +{ + stbi__context *s; + stbi_uc *idata, *expanded, *out; + int depth; +} stbi__png; + + +enum { + STBI__F_none=0, + STBI__F_sub=1, + STBI__F_up=2, + STBI__F_avg=3, + STBI__F_paeth=4, + // synthetic filters used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static stbi_uc first_row_filter[5] = +{ + STBI__F_none, + STBI__F_sub, + STBI__F_none, + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static int stbi__paeth(int a, int b, int c) +{ + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; +} + +static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; + +// create the png data from post-deflated data +static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) +{ + int bytes = (depth == 16? 2 : 1); + stbi__context *s = a->s; + stbi__uint32 i,j,stride = x*out_n*bytes; + stbi__uint32 img_len, img_width_bytes; + int k; + int img_n = s->img_n; // copy it into a local for later + + int output_bytes = out_n*bytes; + int filter_bytes = img_n*bytes; + int width = x; + + STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1); + a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into + if (!a->out) return stbi__err("outofmem", "Out of memory"); + + if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); + img_width_bytes = (((img_n * x * depth) + 7) >> 3); + img_len = (img_width_bytes + 1) * y; + + // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, + // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros), + // so just check for raw_len < img_len always. + if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *prior; + int filter = *raw++; + + if (filter > 4) + return stbi__err("invalid filter","Corrupt PNG"); + + if (depth < 8) { + STBI_ASSERT(img_width_bytes <= x); + cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place + filter_bytes = 1; + width = img_width_bytes; + } + prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above + + // if first row, use special filter that doesn't sample previous row + if (j == 0) filter = first_row_filter[filter]; + + // handle first byte explicitly + for (k=0; k < filter_bytes; ++k) { + switch (filter) { + case STBI__F_none : cur[k] = raw[k]; break; + case STBI__F_sub : cur[k] = raw[k]; break; + case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; + case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; + case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; + case STBI__F_avg_first : cur[k] = raw[k]; break; + case STBI__F_paeth_first: cur[k] = raw[k]; break; + } + } + + if (depth == 8) { + if (img_n != out_n) + cur[img_n] = 255; // first pixel + raw += img_n; + cur += out_n; + prior += out_n; + } else if (depth == 16) { + if (img_n != out_n) { + cur[filter_bytes] = 255; // first pixel top byte + cur[filter_bytes+1] = 255; // first pixel bottom byte + } + raw += filter_bytes; + cur += output_bytes; + prior += output_bytes; + } else { + raw += 1; + cur += 1; + prior += 1; + } + + // this is a little gross, so that we don't switch per-pixel or per-component + if (depth < 8 || img_n == out_n) { + int nk = (width - 1)*filter_bytes; + #define STBI__CASE(f) \ + case f: \ + for (k=0; k < nk; ++k) + switch (filter) { + // "none" filter turns into a memcpy here; make that explicit. + case STBI__F_none: memcpy(cur, raw, nk); break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; + } + #undef STBI__CASE + raw += nk; + } else { + STBI_ASSERT(img_n+1 == out_n); + #define STBI__CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ + for (k=0; k < filter_bytes; ++k) + switch (filter) { + STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; + } + #undef STBI__CASE + + // the loop above sets the high byte of the pixels' alpha, but for + // 16 bit png files we also need the low byte set. we'll do that here. + if (depth == 16) { + cur = a->out + stride*j; // start at the beginning of the row again + for (i=0; i < x; ++i,cur+=output_bytes) { + cur[filter_bytes+1] = 255; + } + } + } + } + + // we make a separate pass to expand bits to pixels; for performance, + // this could run two scanlines behind the above code, so it won't + // intefere with filtering but will still be in the cache. + if (depth < 8) { + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; + // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit + // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop + stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + + // note that the final byte might overshoot and write more data than desired. + // we can allocate enough data that this never writes out of memory, but it + // could also overwrite the next scanline. can it overwrite non-empty data + // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. + // so we need to explicitly clamp the final ones + + if (depth == 4) { + for (k=x*img_n; k >= 2; k-=2, ++in) { + *cur++ = scale * ((*in >> 4) ); + *cur++ = scale * ((*in ) & 0x0f); + } + if (k > 0) *cur++ = scale * ((*in >> 4) ); + } else if (depth == 2) { + for (k=x*img_n; k >= 4; k-=4, ++in) { + *cur++ = scale * ((*in >> 6) ); + *cur++ = scale * ((*in >> 4) & 0x03); + *cur++ = scale * ((*in >> 2) & 0x03); + *cur++ = scale * ((*in ) & 0x03); + } + if (k > 0) *cur++ = scale * ((*in >> 6) ); + if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); + if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); + } else if (depth == 1) { + for (k=x*img_n; k >= 8; k-=8, ++in) { + *cur++ = scale * ((*in >> 7) ); + *cur++ = scale * ((*in >> 6) & 0x01); + *cur++ = scale * ((*in >> 5) & 0x01); + *cur++ = scale * ((*in >> 4) & 0x01); + *cur++ = scale * ((*in >> 3) & 0x01); + *cur++ = scale * ((*in >> 2) & 0x01); + *cur++ = scale * ((*in >> 1) & 0x01); + *cur++ = scale * ((*in ) & 0x01); + } + if (k > 0) *cur++ = scale * ((*in >> 7) ); + if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); + if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); + if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); + if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); + if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); + if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); + } + if (img_n != out_n) { + int q; + // insert alpha = 255 + cur = a->out + stride*j; + if (img_n == 1) { + for (q=x-1; q >= 0; --q) { + cur[q*2+1] = 255; + cur[q*2+0] = cur[q]; + } + } else { + STBI_ASSERT(img_n == 3); + for (q=x-1; q >= 0; --q) { + cur[q*4+3] = 255; + cur[q*4+2] = cur[q*3+2]; + cur[q*4+1] = cur[q*3+1]; + cur[q*4+0] = cur[q*3+0]; + } + } + } + } + } else if (depth == 16) { + // force the image data from big-endian to platform-native. + // this is done in a separate pass due to the decoding relying + // on the data being untouched, but could probably be done + // per-line during decode if care is taken. + stbi_uc *cur = a->out; + stbi__uint16 *cur16 = (stbi__uint16*)cur; + + for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { + *cur16 = (cur[0] << 8) | cur[1]; + } + } + + return 1; +} + +static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) +{ + int bytes = (depth == 16 ? 2 : 1); + int out_bytes = out_n * bytes; + stbi_uc *final; + int p; + if (!interlaced) + return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); + + // de-interlacing + final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); + for (p=0; p < 7; ++p) { + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y; + if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) { + STBI_FREE(final); + return 0; + } + for (j=0; j < y; ++j) { + for (i=0; i < x; ++i) { + int out_y = j*yspc[p]+yorig[p]; + int out_x = i*xspc[p]+xorig[p]; + memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, + a->out + (j*x+i)*out_bytes, out_bytes); + } + } + STBI_FREE(a->out); + image_data += img_len; + image_data_len -= img_len; + } + } + a->out = final; + + return 1; +} + +static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + // compute color-based transparency, assuming we've + // already got 255 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i=0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 255); + p += 2; + } + } else { + for (i=0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi__uint16 *p = (stbi__uint16*) z->out; + + // compute color-based transparency, assuming we've + // already got 65535 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i = 0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 65535); + p += 2; + } + } else { + for (i = 0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n) +{ + stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y; + stbi_uc *p, *temp_out, *orig = a->out; + + p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0); + if (p == NULL) return stbi__err("outofmem", "Out of memory"); + + // between here and free(out) below, exitting would leak + temp_out = p; + + if (pal_img_n == 3) { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p += 3; + } + } else { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p[3] = palette[n+3]; + p += 4; + } + } + STBI_FREE(a->out); + a->out = temp_out; + + STBI_NOTUSED(len); + + return 1; +} + +static int stbi__unpremultiply_on_load = 0; +static int stbi__de_iphone_flag = 0; + +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) +{ + stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply; +} + +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) +{ + stbi__de_iphone_flag = flag_true_if_should_convert; +} + +static void stbi__de_iphone(stbi__png *z) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + if (s->img_out_n == 3) { // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 3; + } + } else { + STBI_ASSERT(s->img_out_n == 4); + if (stbi__unpremultiply_on_load) { + // convert bgr to rgb and unpremultiply + for (i=0; i < pixel_count; ++i) { + stbi_uc a = p[3]; + stbi_uc t = p[0]; + if (a) { + stbi_uc half = a / 2; + p[0] = (p[2] * 255 + half) / a; + p[1] = (p[1] * 255 + half) / a; + p[2] = ( t * 255 + half) / a; + } else { + p[0] = p[2]; + p[2] = t; + } + p += 4; + } + } else { + // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 4; + } + } + } +} + +#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d)) + +static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) +{ + stbi_uc palette[1024], pal_img_n=0; + stbi_uc has_trans=0, tc[3]; + stbi__uint16 tc16[3]; + stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0; + int first=1,k,interlace=0, color=0, is_iphone=0; + stbi__context *s = z->s; + + z->expanded = NULL; + z->idata = NULL; + z->out = NULL; + + if (!stbi__check_png_header(s)) return 0; + + if (scan == STBI__SCAN_type) return 1; + + for (;;) { + stbi__pngchunk c = stbi__get_chunk_header(s); + switch (c.type) { + case STBI__PNG_TYPE('C','g','B','I'): + is_iphone = 1; + stbi__skip(s, c.length); + break; + case STBI__PNG_TYPE('I','H','D','R'): { + int comp,filter; + if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); + first = 0; + if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); + s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); + s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); + z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); + color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); + comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG"); + filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG"); + interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG"); + if (!pal_img_n) { + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); + if (scan == STBI__SCAN_header) return 1; + } else { + // if paletted, then pal_n is our final components, and + // img_n is # components to decompress/filter. + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); + // if SCAN_header, have to scan to see if we have a tRNS + } + break; + } + + case STBI__PNG_TYPE('P','L','T','E'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG"); + pal_len = c.length / 3; + if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG"); + for (i=0; i < pal_len; ++i) { + palette[i*4+0] = stbi__get8(s); + palette[i*4+1] = stbi__get8(s); + palette[i*4+2] = stbi__get8(s); + palette[i*4+3] = 255; + } + break; + } + + case STBI__PNG_TYPE('t','R','N','S'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG"); + if (pal_img_n) { + if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; } + if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG"); + if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG"); + pal_img_n = 4; + for (i=0; i < c.length; ++i) + palette[i*4+3] = stbi__get8(s); + } else { + if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); + if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); + has_trans = 1; + if (z->depth == 16) { + for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is + } else { + for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger + } + } + break; + } + + case STBI__PNG_TYPE('I','D','A','T'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); + if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; } + if ((int)(ioff + c.length) < (int)ioff) return 0; + if (ioff + c.length > idata_limit) { + stbi__uint32 idata_limit_old = idata_limit; + stbi_uc *p; + if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; + while (ioff + c.length > idata_limit) + idata_limit *= 2; + STBI_NOTUSED(idata_limit_old); + p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); + z->idata = p; + } + if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG"); + ioff += c.length; + break; + } + + case STBI__PNG_TYPE('I','E','N','D'): { + stbi__uint32 raw_len, bpl; + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (scan != STBI__SCAN_load) return 1; + if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); + // initial guess for decoded data size to avoid unnecessary reallocs + bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component + raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; + z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); + if (z->expanded == NULL) return 0; // zlib should set error + STBI_FREE(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; + else + s->img_out_n = s->img_n; + if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0; + if (has_trans) { + if (z->depth == 16) { + if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0; + } else { + if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0; + } + } + if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) + stbi__de_iphone(z); + if (pal_img_n) { + // pal_img_n == 3 or 4 + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) + return 0; + } else if (has_trans) { + // non-paletted image with tRNS -> source image has (constant) alpha + ++s->img_n; + } + STBI_FREE(z->expanded); z->expanded = NULL; + return 1; + } + + default: + // if critical, fail + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if ((c.type & (1 << 29)) == 0) { + #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe + static char invalid_chunk[] = "XXXX PNG chunk not known"; + invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); + invalid_chunk[1] = STBI__BYTECAST(c.type >> 16); + invalid_chunk[2] = STBI__BYTECAST(c.type >> 8); + invalid_chunk[3] = STBI__BYTECAST(c.type >> 0); + #endif + return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type"); + } + stbi__skip(s, c.length); + break; + } + // end of PNG chunk, read and skip CRC + stbi__get32be(s); + } +} + +static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri) +{ + void *result=NULL; + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { + if (p->depth < 8) + ri->bits_per_channel = 8; + else + ri->bits_per_channel = p->depth; + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s->img_out_n) { + if (ri->bits_per_channel == 8) + result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + else + result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + p->s->img_out_n = req_comp; + if (result == NULL) return result; + } + *x = p->s->img_x; + *y = p->s->img_y; + if (n) *n = p->s->img_n; + } + STBI_FREE(p->out); p->out = NULL; + STBI_FREE(p->expanded); p->expanded = NULL; + STBI_FREE(p->idata); p->idata = NULL; + + return result; +} + +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi__png p; + p.s = s; + return stbi__do_png(&p, x,y,comp,req_comp, ri); +} + +static int stbi__png_test(stbi__context *s) +{ + int r; + r = stbi__check_png_header(s); + stbi__rewind(s); + return r; +} + +static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp) +{ + if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) { + stbi__rewind( p->s ); + return 0; + } + if (x) *x = p->s->img_x; + if (y) *y = p->s->img_y; + if (comp) *comp = p->s->img_n; + return 1; +} + +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__png p; + p.s = s; + return stbi__png_info_raw(&p, x, y, comp); +} + +static int stbi__png_is16(stbi__context *s) +{ + stbi__png p; + p.s = s; + if (!stbi__png_info_raw(&p, NULL, NULL, NULL)) + return 0; + if (p.depth != 16) { + stbi__rewind(p.s); + return 0; + } + return 1; +} +#endif + +// Microsoft/Windows BMP image + +#ifndef STBI_NO_BMP +static int stbi__bmp_test_raw(stbi__context *s) +{ + int r; + int sz; + if (stbi__get8(s) != 'B') return 0; + if (stbi__get8(s) != 'M') return 0; + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + stbi__get32le(s); // discard data offset + sz = stbi__get32le(s); + r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124); + return r; +} + +static int stbi__bmp_test(stbi__context *s) +{ + int r = stbi__bmp_test_raw(s); + stbi__rewind(s); + return r; +} + + +// returns 0..31 for the highest set bit +static int stbi__high_bit(unsigned int z) +{ + int n=0; + if (z == 0) return -1; + if (z >= 0x10000) n += 16, z >>= 16; + if (z >= 0x00100) n += 8, z >>= 8; + if (z >= 0x00010) n += 4, z >>= 4; + if (z >= 0x00004) n += 2, z >>= 2; + if (z >= 0x00002) n += 1, z >>= 1; + return n; +} + +static int stbi__bitcount(unsigned int a) +{ + a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 + a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 + a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits + a = (a + (a >> 8)); // max 16 per 8 bits + a = (a + (a >> 16)); // max 32 per 8 bits + return a & 0xff; +} + +// extract an arbitrarily-aligned N-bit value (N=bits) +// from v, and then make it 8-bits long and fractionally +// extend it to full full range. +static int stbi__shiftsigned(int v, int shift, int bits) +{ + static unsigned int mul_table[9] = { + 0, + 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/, + 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/, + }; + static unsigned int shift_table[9] = { + 0, 0,0,1,0,2,4,6,0, + }; + if (shift < 0) + v <<= -shift; + else + v >>= shift; + STBI_ASSERT(v >= 0 && v < 256); + v >>= (8-bits); + STBI_ASSERT(bits >= 0 && bits <= 8); + return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits]; +} + +typedef struct +{ + int bpp, offset, hsz; + unsigned int mr,mg,mb,ma, all_a; +} stbi__bmp_data; + +static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) +{ + int hsz; + if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP"); + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + info->offset = stbi__get32le(s); + info->hsz = hsz = stbi__get32le(s); + info->mr = info->mg = info->mb = info->ma = 0; + + if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown"); + if (hsz == 12) { + s->img_x = stbi__get16le(s); + s->img_y = stbi__get16le(s); + } else { + s->img_x = stbi__get32le(s); + s->img_y = stbi__get32le(s); + } + if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP"); + info->bpp = stbi__get16le(s); + if (hsz != 12) { + int compress = stbi__get32le(s); + if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); + stbi__get32le(s); // discard sizeof + stbi__get32le(s); // discard hres + stbi__get32le(s); // discard vres + stbi__get32le(s); // discard colorsused + stbi__get32le(s); // discard max important + if (hsz == 40 || hsz == 56) { + if (hsz == 56) { + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + } + if (info->bpp == 16 || info->bpp == 32) { + if (compress == 0) { + if (info->bpp == 32) { + info->mr = 0xffu << 16; + info->mg = 0xffu << 8; + info->mb = 0xffu << 0; + info->ma = 0xffu << 24; + info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 + } else { + info->mr = 31u << 10; + info->mg = 31u << 5; + info->mb = 31u << 0; + } + } else if (compress == 3) { + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + // not documented, but generated by photoshop and handled by mspaint + if (info->mr == info->mg && info->mg == info->mb) { + // ?!?!? + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else { + int i; + if (hsz != 108 && hsz != 124) + return stbi__errpuc("bad BMP", "bad BMP"); + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + info->ma = stbi__get32le(s); + stbi__get32le(s); // discard color space + for (i=0; i < 12; ++i) + stbi__get32le(s); // discard color space parameters + if (hsz == 124) { + stbi__get32le(s); // discard rendering intent + stbi__get32le(s); // discard offset of profile data + stbi__get32le(s); // discard size of profile data + stbi__get32le(s); // discard reserved + } + } + } + return (void *) 1; +} + + +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + unsigned int mr=0,mg=0,mb=0,ma=0, all_a; + stbi_uc pal[256][4]; + int psize=0,i,j,width; + int flip_vertically, pad, target; + stbi__bmp_data info; + STBI_NOTUSED(ri); + + info.all_a = 255; + if (stbi__bmp_parse_header(s, &info) == NULL) + return NULL; // error code already set + + flip_vertically = ((int) s->img_y) > 0; + s->img_y = abs((int) s->img_y); + + mr = info.mr; + mg = info.mg; + mb = info.mb; + ma = info.ma; + all_a = info.all_a; + + if (info.hsz == 12) { + if (info.bpp < 24) + psize = (info.offset - 14 - 24) / 3; + } else { + if (info.bpp < 16) + psize = (info.offset - 14 - info.hsz) >> 2; + } + + s->img_n = ma ? 4 : 3; + if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 + target = req_comp; + else + target = s->img_n; // if they want monochrome, we'll post-convert + + // sanity-check size + if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "Corrupt BMP"); + + out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + if (info.bpp < 16) { + int z=0; + if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); } + for (i=0; i < psize; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + if (info.hsz != 12) stbi__get8(s); + pal[i][3] = 255; + } + stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); + if (info.bpp == 1) width = (s->img_x + 7) >> 3; + else if (info.bpp == 4) width = (s->img_x + 1) >> 1; + else if (info.bpp == 8) width = s->img_x; + else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } + pad = (-width)&3; + if (info.bpp == 1) { + for (j=0; j < (int) s->img_y; ++j) { + int bit_offset = 7, v = stbi__get8(s); + for (i=0; i < (int) s->img_x; ++i) { + int color = (v>>bit_offset)&0x1; + out[z++] = pal[color][0]; + out[z++] = pal[color][1]; + out[z++] = pal[color][2]; + if((--bit_offset) < 0) { + bit_offset = 7; + v = stbi__get8(s); + } + } + stbi__skip(s, pad); + } + } else { + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=stbi__get8(s),v2=0; + if (info.bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (info.bpp == 8) ? stbi__get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + stbi__skip(s, pad); + } + } + } else { + int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; + int z = 0; + int easy=0; + stbi__skip(s, info.offset - 14 - info.hsz); + if (info.bpp == 24) width = 3 * s->img_x; + else if (info.bpp == 16) width = 2*s->img_x; + else /* bpp = 32 and pad = 0 */ width=0; + pad = (-width) & 3; + if (info.bpp == 24) { + easy = 1; + } else if (info.bpp == 32) { + if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) + easy = 2; + } + if (!easy) { + if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } + // right shift amt to put high bit in position #7 + rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr); + gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); + bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); + ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); + } + for (j=0; j < (int) s->img_y; ++j) { + if (easy) { + for (i=0; i < (int) s->img_x; ++i) { + unsigned char a; + out[z+2] = stbi__get8(s); + out[z+1] = stbi__get8(s); + out[z+0] = stbi__get8(s); + z += 3; + a = (easy == 2 ? stbi__get8(s) : 255); + all_a |= a; + if (target == 4) out[z++] = a; + } + } else { + int bpp = info.bpp; + for (i=0; i < (int) s->img_x; ++i) { + stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); + unsigned int a; + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); + a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255); + all_a |= a; + if (target == 4) out[z++] = STBI__BYTECAST(a); + } + } + stbi__skip(s, pad); + } + } + + // if alpha channel is all 0s, replace with all 255s + if (target == 4 && all_a == 0) + for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) + out[i] = 255; + + if (flip_vertically) { + stbi_uc t; + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { + t = p1[i], p1[i] = p2[i], p2[i] = t; + } + } + } + + if (req_comp && req_comp != target) { + out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + return out; +} +#endif + +// Targa Truevision - TGA +// by Jonathan Dummer +#ifndef STBI_NO_TGA +// returns STBI_rgb or whatever, 0 on error +static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) +{ + // only RGB or RGBA (incl. 16bit) or grey allowed + if (is_rgb16) *is_rgb16 = 0; + switch(bits_per_pixel) { + case 8: return STBI_grey; + case 16: if(is_grey) return STBI_grey_alpha; + // fallthrough + case 15: if(is_rgb16) *is_rgb16 = 1; + return STBI_rgb; + case 24: // fallthrough + case 32: return bits_per_pixel/8; + default: return 0; + } +} + +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp) +{ + int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp; + int sz, tga_colormap_type; + stbi__get8(s); // discard Offset + tga_colormap_type = stbi__get8(s); // colormap type + if( tga_colormap_type > 1 ) { + stbi__rewind(s); + return 0; // only RGB or indexed allowed + } + tga_image_type = stbi__get8(s); // image type + if ( tga_colormap_type == 1 ) { // colormapped (paletted) image + if (tga_image_type != 1 && tga_image_type != 9) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip image x and y origin + tga_colormap_bpp = sz; + } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE + if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) { + stbi__rewind(s); + return 0; // only RGB or grey allowed, +/- RLE + } + stbi__skip(s,9); // skip colormap specification and image x/y origin + tga_colormap_bpp = 0; + } + tga_w = stbi__get16le(s); + if( tga_w < 1 ) { + stbi__rewind(s); + return 0; // test width + } + tga_h = stbi__get16le(s); + if( tga_h < 1 ) { + stbi__rewind(s); + return 0; // test height + } + tga_bits_per_pixel = stbi__get8(s); // bits per pixel + stbi__get8(s); // ignore alpha bits + if (tga_colormap_bpp != 0) { + if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) { + // when using a colormap, tga_bits_per_pixel is the size of the indexes + // I don't think anything but 8 or 16bit indexes makes sense + stbi__rewind(s); + return 0; + } + tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL); + } else { + tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL); + } + if(!tga_comp) { + stbi__rewind(s); + return 0; + } + if (x) *x = tga_w; + if (y) *y = tga_h; + if (comp) *comp = tga_comp; + return 1; // seems to have passed everything +} + +static int stbi__tga_test(stbi__context *s) +{ + int res = 0; + int sz, tga_color_type; + stbi__get8(s); // discard Offset + tga_color_type = stbi__get8(s); // color type + if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed + sz = stbi__get8(s); // image type + if ( tga_color_type == 1 ) { // colormapped (paletted) image + if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9 + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + stbi__skip(s,4); // skip image x and y origin + } else { // "normal" image w/o colormap + if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE + stbi__skip(s,9); // skip colormap specification and image x/y origin + } + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height + sz = stbi__get8(s); // bits per pixel + if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + + res = 1; // if we got this far, everything's good and we can return 1 instead of 0 + +errorEnd: + stbi__rewind(s); + return res; +} + +// read 16bit value and convert to 24bit RGB +static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) +{ + stbi__uint16 px = (stbi__uint16)stbi__get16le(s); + stbi__uint16 fiveBitMask = 31; + // we have 3 channels with 5bits each + int r = (px >> 10) & fiveBitMask; + int g = (px >> 5) & fiveBitMask; + int b = px & fiveBitMask; + // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later + out[0] = (stbi_uc)((r * 255)/31); + out[1] = (stbi_uc)((g * 255)/31); + out[2] = (stbi_uc)((b * 255)/31); + + // some people claim that the most significant bit might be used for alpha + // (possibly if an alpha-bit is set in the "image descriptor byte") + // but that only made 16bit test images completely translucent.. + // so let's treat all 15 and 16bit TGAs as RGB with no alpha. +} + +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + // read in the TGA header stuff + int tga_offset = stbi__get8(s); + int tga_indexed = stbi__get8(s); + int tga_image_type = stbi__get8(s); + int tga_is_RLE = 0; + int tga_palette_start = stbi__get16le(s); + int tga_palette_len = stbi__get16le(s); + int tga_palette_bits = stbi__get8(s); + int tga_x_origin = stbi__get16le(s); + int tga_y_origin = stbi__get16le(s); + int tga_width = stbi__get16le(s); + int tga_height = stbi__get16le(s); + int tga_bits_per_pixel = stbi__get8(s); + int tga_comp, tga_rgb16=0; + int tga_inverted = stbi__get8(s); + // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?) + // image data + unsigned char *tga_data; + unsigned char *tga_palette = NULL; + int i, j; + unsigned char raw_data[4] = {0}; + int RLE_count = 0; + int RLE_repeating = 0; + int read_next_pixel = 1; + STBI_NOTUSED(ri); + + // do a tiny bit of precessing + if ( tga_image_type >= 8 ) + { + tga_image_type -= 8; + tga_is_RLE = 1; + } + tga_inverted = 1 - ((tga_inverted >> 5) & 1); + + // If I'm paletted, then I'll use the number of bits from the palette + if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16); + else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16); + + if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency + return stbi__errpuc("bad format", "Can't find out TGA pixelformat"); + + // tga info + *x = tga_width; + *y = tga_height; + if (comp) *comp = tga_comp; + + if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) + return stbi__errpuc("too large", "Corrupt TGA"); + + tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0); + if (!tga_data) return stbi__errpuc("outofmem", "Out of memory"); + + // skip to the data's starting position (offset usually = 0) + stbi__skip(s, tga_offset ); + + if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) { + for (i=0; i < tga_height; ++i) { + int row = tga_inverted ? tga_height -i - 1 : i; + stbi_uc *tga_row = tga_data + row*tga_width*tga_comp; + stbi__getn(s, tga_row, tga_width * tga_comp); + } + } else { + // do I need to load a palette? + if ( tga_indexed) + { + // any data to skip? (offset usually = 0) + stbi__skip(s, tga_palette_start ); + // load the palette + tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); + if (!tga_palette) { + STBI_FREE(tga_data); + return stbi__errpuc("outofmem", "Out of memory"); + } + if (tga_rgb16) { + stbi_uc *pal_entry = tga_palette; + STBI_ASSERT(tga_comp == STBI_rgb); + for (i=0; i < tga_palette_len; ++i) { + stbi__tga_read_rgb16(s, pal_entry); + pal_entry += tga_comp; + } + } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) { + STBI_FREE(tga_data); + STBI_FREE(tga_palette); + return stbi__errpuc("bad palette", "Corrupt TGA"); + } + } + // load the data + for (i=0; i < tga_width * tga_height; ++i) + { + // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk? + if ( tga_is_RLE ) + { + if ( RLE_count == 0 ) + { + // yep, get the next byte as a RLE command + int RLE_cmd = stbi__get8(s); + RLE_count = 1 + (RLE_cmd & 127); + RLE_repeating = RLE_cmd >> 7; + read_next_pixel = 1; + } else if ( !RLE_repeating ) + { + read_next_pixel = 1; + } + } else + { + read_next_pixel = 1; + } + // OK, if I need to read a pixel, do it now + if ( read_next_pixel ) + { + // load however much data we did have + if ( tga_indexed ) + { + // read in index, then perform the lookup + int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s); + if ( pal_idx >= tga_palette_len ) { + // invalid index + pal_idx = 0; + } + pal_idx *= tga_comp; + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = tga_palette[pal_idx+j]; + } + } else if(tga_rgb16) { + STBI_ASSERT(tga_comp == STBI_rgb); + stbi__tga_read_rgb16(s, raw_data); + } else { + // read in the data raw + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = stbi__get8(s); + } + } + // clear the reading flag for the next pixel + read_next_pixel = 0; + } // end of reading a pixel + + // copy data + for (j = 0; j < tga_comp; ++j) + tga_data[i*tga_comp+j] = raw_data[j]; + + // in case we're in RLE mode, keep counting down + --RLE_count; + } + // do I need to invert the image? + if ( tga_inverted ) + { + for (j = 0; j*2 < tga_height; ++j) + { + int index1 = j * tga_width * tga_comp; + int index2 = (tga_height - 1 - j) * tga_width * tga_comp; + for (i = tga_width * tga_comp; i > 0; --i) + { + unsigned char temp = tga_data[index1]; + tga_data[index1] = tga_data[index2]; + tga_data[index2] = temp; + ++index1; + ++index2; + } + } + } + // clear my palette, if I had one + if ( tga_palette != NULL ) + { + STBI_FREE( tga_palette ); + } + } + + // swap RGB - if the source data was RGB16, it already is in the right order + if (tga_comp >= 3 && !tga_rgb16) + { + unsigned char* tga_pixel = tga_data; + for (i=0; i < tga_width * tga_height; ++i) + { + unsigned char temp = tga_pixel[0]; + tga_pixel[0] = tga_pixel[2]; + tga_pixel[2] = temp; + tga_pixel += tga_comp; + } + } + + // convert to target component count + if (req_comp && req_comp != tga_comp) + tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height); + + // the things I do to get rid of an error message, and yet keep + // Microsoft's C compilers happy... [8^( + tga_palette_start = tga_palette_len = tga_palette_bits = + tga_x_origin = tga_y_origin = 0; + // OK, done + return tga_data; +} +#endif + +// ************************************************************************************************* +// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s) +{ + int r = (stbi__get32be(s) == 0x38425053); + stbi__rewind(s); + return r; +} + +static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount) +{ + int count, nleft, len; + + count = 0; + while ((nleft = pixelCount - count) > 0) { + len = stbi__get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + if (len > nleft) return 0; // corrupt data + count += len; + while (len) { + *p = stbi__get8(s); + p += 4; + len--; + } + } else if (len > 128) { + stbi_uc val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len = 257 - len; + if (len > nleft) return 0; // corrupt data + val = stbi__get8(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } + } + + return 1; +} + +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + int pixelCount; + int channelCount, compression; + int channel, i; + int bitdepth; + int w,h; + stbi_uc *out; + STBI_NOTUSED(ri); + + // Check identifier + if (stbi__get32be(s) != 0x38425053) // "8BPS" + return stbi__errpuc("not PSD", "Corrupt PSD image"); + + // Check file type version. + if (stbi__get16be(s) != 1) + return stbi__errpuc("wrong version", "Unsupported version of PSD image"); + + // Skip 6 reserved bytes. + stbi__skip(s, 6 ); + + // Read the number of channels (R, G, B, A, etc). + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) + return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image"); + + // Read the rows and columns of the image. + h = stbi__get32be(s); + w = stbi__get32be(s); + + // Make sure the depth is 8 bits. + bitdepth = stbi__get16be(s); + if (bitdepth != 8 && bitdepth != 16) + return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit"); + + // Make sure the color mode is RGB. + // Valid options are: + // 0: Bitmap + // 1: Grayscale + // 2: Indexed color + // 3: RGB color + // 4: CMYK color + // 7: Multichannel + // 8: Duotone + // 9: Lab color + if (stbi__get16be(s) != 3) + return stbi__errpuc("wrong color format", "PSD is not in RGB color format"); + + // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) + stbi__skip(s,stbi__get32be(s) ); + + // Skip the image resources. (resolution, pen tool paths, etc) + stbi__skip(s, stbi__get32be(s) ); + + // Skip the reserved data. + stbi__skip(s, stbi__get32be(s) ); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + compression = stbi__get16be(s); + if (compression > 1) + return stbi__errpuc("bad compression", "PSD has an unknown compression format"); + + // Check size + if (!stbi__mad3sizes_valid(4, w, h, 0)) + return stbi__errpuc("too large", "Corrupt PSD"); + + // Create the destination image. + + if (!compression && bitdepth == 16 && bpc == 16) { + out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); + ri->bits_per_channel = 16; + } else + out = (stbi_uc *) stbi__malloc(4 * w*h); + + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + pixelCount = w*h; + + // Initialize the data to zero. + //memset( out, 0, pixelCount * 4 ); + + // Finally, the image data. + if (compression) { + // RLE as used by .PSD and .TIFF + // Loop until you get the number of unpacked bytes you are expecting: + // Read the next source byte into n. + // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. + // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. + // Else if n is 128, noop. + // Endloop + + // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data, + // which we're going to just skip. + stbi__skip(s, h * channelCount * 2 ); + + // Read the RLE data by channel. + for (channel = 0; channel < 4; channel++) { + stbi_uc *p; + + p = out+channel; + if (channel >= channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++, p += 4) + *p = (channel == 3 ? 255 : 0); + } else { + // Read the RLE data. + if (!stbi__psd_decode_rle(s, p, pixelCount)) { + STBI_FREE(out); + return stbi__errpuc("corrupt", "bad RLE data"); + } + } + } + + } else { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. + + // Read the data by channel. + for (channel = 0; channel < 4; channel++) { + if (channel >= channelCount) { + // Fill this channel with default data. + if (bitdepth == 16 && bpc == 16) { + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + stbi__uint16 val = channel == 3 ? 65535 : 0; + for (i = 0; i < pixelCount; i++, q += 4) + *q = val; + } else { + stbi_uc *p = out+channel; + stbi_uc val = channel == 3 ? 255 : 0; + for (i = 0; i < pixelCount; i++, p += 4) + *p = val; + } + } else { + if (ri->bits_per_channel == 16) { // output bpc + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + for (i = 0; i < pixelCount; i++, q += 4) + *q = (stbi__uint16) stbi__get16be(s); + } else { + stbi_uc *p = out+channel; + if (bitdepth == 16) { // input bpc + for (i = 0; i < pixelCount; i++, p += 4) + *p = (stbi_uc) (stbi__get16be(s) >> 8); + } else { + for (i = 0; i < pixelCount; i++, p += 4) + *p = stbi__get8(s); + } + } + } + } + } + + // remove weird white matte from PSD + if (channelCount >= 4) { + if (ri->bits_per_channel == 16) { + for (i=0; i < w*h; ++i) { + stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; + if (pixel[3] != 0 && pixel[3] != 65535) { + float a = pixel[3] / 65535.0f; + float ra = 1.0f / a; + float inv_a = 65535.0f * (1 - ra); + pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); + pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); + pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); + } + } + } else { + for (i=0; i < w*h; ++i) { + unsigned char *pixel = out + 4*i; + if (pixel[3] != 0 && pixel[3] != 255) { + float a = pixel[3] / 255.0f; + float ra = 1.0f / a; + float inv_a = 255.0f * (1 - ra); + pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); + pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); + pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); + } + } + } + } + + // convert to desired output format + if (req_comp && req_comp != 4) { + if (ri->bits_per_channel == 16) + out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); + else + out = stbi__convert_format(out, 4, req_comp, w, h); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + if (comp) *comp = 4; + *y = h; + *x = w; + + return out; +} +#endif + +// ************************************************************************************************* +// Softimage PIC loader +// by Tom Seddon +// +// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format +// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/ + +#ifndef STBI_NO_PIC +static int stbi__pic_is4(stbi__context *s,const char *str) +{ + int i; + for (i=0; i<4; ++i) + if (stbi__get8(s) != (stbi_uc)str[i]) + return 0; + + return 1; +} + +static int stbi__pic_test_core(stbi__context *s) +{ + int i; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) + return 0; + + for(i=0;i<84;++i) + stbi__get8(s); + + if (!stbi__pic_is4(s,"PICT")) + return 0; + + return 1; +} + +typedef struct +{ + stbi_uc size,type,channel; +} stbi__pic_packet; + +static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest) +{ + int mask=0x80, i; + + for (i=0; i<4; ++i, mask>>=1) { + if (channel & mask) { + if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short"); + dest[i]=stbi__get8(s); + } + } + + return dest; +} + +static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src) +{ + int mask=0x80,i; + + for (i=0;i<4; ++i, mask>>=1) + if (channel&mask) + dest[i]=src[i]; +} + +static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result) +{ + int act_comp=0,num_packets=0,y,chained; + stbi__pic_packet packets[10]; + + // this will (should...) cater for even some bizarre stuff like having data + // for the same channel in multiple packets. + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return stbi__errpuc("bad format","too many packets"); + + packet = &packets[num_packets++]; + + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + + act_comp |= packet->channel; + + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)"); + if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp"); + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? + + for(y=0; ytype) { + default: + return stbi__errpuc("bad format","packet has bad compression type"); + + case 0: {//uncompressed + int x; + + for(x=0;xchannel,dest)) + return 0; + break; + } + + case 1://Pure RLE + { + int left=width, i; + + while (left>0) { + stbi_uc count,value[4]; + + count=stbi__get8(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)"); + + if (count > left) + count = (stbi_uc) left; + + if (!stbi__readval(s,packet->channel,value)) return 0; + + for(i=0; ichannel,dest,value); + left -= count; + } + } + break; + + case 2: {//Mixed RLE + int left=width; + while (left>0) { + int count = stbi__get8(s), i; + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)"); + + if (count >= 128) { // Repeated + stbi_uc value[4]; + + if (count==128) + count = stbi__get16be(s); + else + count -= 127; + if (count > left) + return stbi__errpuc("bad file","scanline overrun"); + + if (!stbi__readval(s,packet->channel,value)) + return 0; + + for(i=0;ichannel,dest,value); + } else { // Raw + ++count; + if (count>left) return stbi__errpuc("bad file","scanline overrun"); + + for(i=0;ichannel,dest)) + return 0; + } + left-=count; + } + break; + } + } + } + } + + return result; +} + +static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri) +{ + stbi_uc *result; + int i, x,y, internal_comp; + STBI_NOTUSED(ri); + + if (!comp) comp = &internal_comp; + + for (i=0; i<92; ++i) + stbi__get8(s); + + x = stbi__get16be(s); + y = stbi__get16be(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)"); + if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode"); + + stbi__get32be(s); //skip `ratio' + stbi__get16be(s); //skip `fields' + stbi__get16be(s); //skip `pad' + + // intermediate buffer is RGBA + result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0); + memset(result, 0xff, x*y*4); + + if (!stbi__pic_load_core(s,x,y,comp, result)) { + STBI_FREE(result); + result=0; + } + *px = x; + *py = y; + if (req_comp == 0) req_comp = *comp; + result=stbi__convert_format(result,4,req_comp,x,y); + + return result; +} + +static int stbi__pic_test(stbi__context *s) +{ + int r = stbi__pic_test_core(s); + stbi__rewind(s); + return r; +} +#endif + +// ************************************************************************************************* +// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb + +#ifndef STBI_NO_GIF +typedef struct +{ + stbi__int16 prefix; + stbi_uc first; + stbi_uc suffix; +} stbi__gif_lzw; + +typedef struct +{ + int w,h; + stbi_uc *out; // output buffer (always 4 components) + stbi_uc *background; // The current "background" as far as a gif is concerned + stbi_uc *history; + int flags, bgindex, ratio, transparent, eflags; + stbi_uc pal[256][4]; + stbi_uc lpal[256][4]; + stbi__gif_lzw codes[8192]; + stbi_uc *color_table; + int parse, step; + int lflags; + int start_x, start_y; + int max_x, max_y; + int cur_x, cur_y; + int line_size; + int delay; +} stbi__gif; + +static int stbi__gif_test_raw(stbi__context *s) +{ + int sz; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0; + sz = stbi__get8(s); + if (sz != '9' && sz != '7') return 0; + if (stbi__get8(s) != 'a') return 0; + return 1; +} + +static int stbi__gif_test(stbi__context *s) +{ + int r = stbi__gif_test_raw(s); + stbi__rewind(s); + return r; +} + +static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp) +{ + int i; + for (i=0; i < num_entries; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + pal[i][3] = transp == i ? 0 : 255; + } +} + +static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info) +{ + stbi_uc version; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') + return stbi__err("not GIF", "Corrupt GIF"); + + version = stbi__get8(s); + if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF"); + if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF"); + + stbi__g_failure_reason = ""; + g->w = stbi__get16le(s); + g->h = stbi__get16le(s); + g->flags = stbi__get8(s); + g->bgindex = stbi__get8(s); + g->ratio = stbi__get8(s); + g->transparent = -1; + + if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments + + if (is_info) return 1; + + if (g->flags & 0x80) + stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); + + return 1; +} + +static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); + if (!stbi__gif_header(s, g, comp, 1)) { + STBI_FREE(g); + stbi__rewind( s ); + return 0; + } + if (x) *x = g->w; + if (y) *y = g->h; + STBI_FREE(g); + return 1; +} + +static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) +{ + stbi_uc *p, *c; + int idx; + + // recurse to decode the prefixes, since the linked-list is backwards, + // and working backwards through an interleaved image would be nasty + if (g->codes[code].prefix >= 0) + stbi__out_gif_code(g, g->codes[code].prefix); + + if (g->cur_y >= g->max_y) return; + + idx = g->cur_x + g->cur_y; + p = &g->out[idx]; + g->history[idx / 4] = 1; + + c = &g->color_table[g->codes[code].suffix * 4]; + if (c[3] > 128) { // don't render transparent pixels; + p[0] = c[2]; + p[1] = c[1]; + p[2] = c[0]; + p[3] = c[3]; + } + g->cur_x += 4; + + if (g->cur_x >= g->max_x) { + g->cur_x = g->start_x; + g->cur_y += g->step; + + while (g->cur_y >= g->max_y && g->parse > 0) { + g->step = (1 << g->parse) * g->line_size; + g->cur_y = g->start_y + (g->step >> 1); + --g->parse; + } + } +} + +static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) +{ + stbi_uc lzw_cs; + stbi__int32 len, init_code; + stbi__uint32 first; + stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear; + stbi__gif_lzw *p; + + lzw_cs = stbi__get8(s); + if (lzw_cs > 12) return NULL; + clear = 1 << lzw_cs; + first = 1; + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + bits = 0; + valid_bits = 0; + for (init_code = 0; init_code < clear; init_code++) { + g->codes[init_code].prefix = -1; + g->codes[init_code].first = (stbi_uc) init_code; + g->codes[init_code].suffix = (stbi_uc) init_code; + } + + // support no starting clear code + avail = clear+2; + oldcode = -1; + + len = 0; + for(;;) { + if (valid_bits < codesize) { + if (len == 0) { + len = stbi__get8(s); // start new block + if (len == 0) + return g->out; + } + --len; + bits |= (stbi__int32) stbi__get8(s) << valid_bits; + valid_bits += 8; + } else { + stbi__int32 code = bits & codemask; + bits >>= codesize; + valid_bits -= codesize; + // @OPTIMIZE: is there some way we can accelerate the non-clear path? + if (code == clear) { // clear code + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + avail = clear + 2; + oldcode = -1; + first = 0; + } else if (code == clear + 1) { // end of stream code + stbi__skip(s, len); + while ((len = stbi__get8(s)) > 0) + stbi__skip(s,len); + return g->out; + } else if (code <= avail) { + if (first) { + return stbi__errpuc("no clear code", "Corrupt GIF"); + } + + if (oldcode >= 0) { + p = &g->codes[avail++]; + if (avail > 8192) { + return stbi__errpuc("too many codes", "Corrupt GIF"); + } + + p->prefix = (stbi__int16) oldcode; + p->first = g->codes[oldcode].first; + p->suffix = (code == avail) ? p->first : g->codes[code].first; + } else if (code == avail) + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + + stbi__out_gif_code(g, (stbi__uint16) code); + + if ((avail & codemask) == 0 && avail <= 0x0FFF) { + codesize++; + codemask = (1 << codesize) - 1; + } + + oldcode = code; + } else { + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + } + } + } +} + +// this function is designed to support animated gifs, although stb_image doesn't support it +// two back is the image from two frames ago, used for a very specific disposal format +static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back) +{ + int dispose; + int first_frame; + int pi; + int pcount; + + // on first frame, any non-written pixels get the background colour (non-transparent) + first_frame = 0; + if (g->out == 0) { + if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header + g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h); + g->background = (stbi_uc *) stbi__malloc(4 * g->w * g->h); + g->history = (stbi_uc *) stbi__malloc(g->w * g->h); + if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory"); + + // image is treated as "tranparent" at the start - ie, nothing overwrites the current background; + // background colour is only used for pixels that are not rendered first frame, after that "background" + // color refers to teh color that was there the previous frame. + memset( g->out, 0x00, 4 * g->w * g->h ); + memset( g->background, 0x00, 4 * g->w * g->h ); // state of the background (starts transparent) + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + first_frame = 1; + } else { + // second frame - how do we dispoase of the previous one? + dispose = (g->eflags & 0x1C) >> 2; + pcount = g->w * g->h; + + if ((dispose == 3) && (two_back == 0)) { + dispose = 2; // if I don't have an image to revert back to, default to the old background + } + + if (dispose == 3) { // use previous graphic + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 ); + } + } + } else if (dispose == 2) { + // restore what was changed last frame to background before that frame; + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 ); + } + } + } else { + // This is a non-disposal case eithe way, so just + // leave the pixels as is, and they will become the new background + // 1: do not dispose + // 0: not specified. + } + + // background is what out is after the undoing of the previou frame; + memcpy( g->background, g->out, 4 * g->w * g->h ); + } + + // clear my history; + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + + for (;;) { + int tag = stbi__get8(s); + switch (tag) { + case 0x2C: /* Image Descriptor */ + { + stbi__int32 x, y, w, h; + stbi_uc *o; + + x = stbi__get16le(s); + y = stbi__get16le(s); + w = stbi__get16le(s); + h = stbi__get16le(s); + if (((x + w) > (g->w)) || ((y + h) > (g->h))) + return stbi__errpuc("bad Image Descriptor", "Corrupt GIF"); + + g->line_size = g->w * 4; + g->start_x = x * 4; + g->start_y = y * g->line_size; + g->max_x = g->start_x + w * 4; + g->max_y = g->start_y + h * g->line_size; + g->cur_x = g->start_x; + g->cur_y = g->start_y; + + g->lflags = stbi__get8(s); + + if (g->lflags & 0x40) { + g->step = 8 * g->line_size; // first interlaced spacing + g->parse = 3; + } else { + g->step = g->line_size; + g->parse = 0; + } + + if (g->lflags & 0x80) { + stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); + g->color_table = (stbi_uc *) g->lpal; + } else if (g->flags & 0x80) { + g->color_table = (stbi_uc *) g->pal; + } else + return stbi__errpuc("missing color table", "Corrupt GIF"); + + o = stbi__process_gif_raster(s, g); + if (o == NULL) return NULL; + + // if this was the first frame, + pcount = g->w * g->h; + if (first_frame && (g->bgindex > 0)) { + // if first frame, any pixel not drawn to gets the background color + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi] == 0) { + g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; + memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 ); + } + } + } + + return o; + } + + case 0x21: // Comment Extension. + { + int len; + int ext = stbi__get8(s); + if (ext == 0xF9) { // Graphic Control Extension. + len = stbi__get8(s); + if (len == 4) { + g->eflags = stbi__get8(s); + g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths. + + // unset old transparent + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 255; + } + if (g->eflags & 0x01) { + g->transparent = stbi__get8(s); + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 0; + } + } else { + // don't need transparent + stbi__skip(s, 1); + g->transparent = -1; + } + } else { + stbi__skip(s, len); + break; + } + } + while ((len = stbi__get8(s)) != 0) { + stbi__skip(s, len); + } + break; + } + + case 0x3B: // gif stream termination code + return (stbi_uc *) s; // using '1' causes warning on some compilers + + default: + return stbi__errpuc("unknown code", "Corrupt GIF"); + } + } +} + +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + if (stbi__gif_test(s)) { + int layers = 0; + stbi_uc *u = 0; + stbi_uc *out = 0; + stbi_uc *two_back = 0; + stbi__gif g; + int stride; + memset(&g, 0, sizeof(g)); + if (delays) { + *delays = 0; + } + + do { + u = stbi__gif_load_next(s, &g, comp, req_comp, two_back); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + + if (u) { + *x = g.w; + *y = g.h; + ++layers; + stride = g.w * g.h * 4; + + if (out) { + out = (stbi_uc*) STBI_REALLOC( out, layers * stride ); + if (delays) { + *delays = (int*) STBI_REALLOC( *delays, sizeof(int) * layers ); + } + } else { + out = (stbi_uc*)stbi__malloc( layers * stride ); + if (delays) { + *delays = (int*) stbi__malloc( layers * sizeof(int) ); + } + } + memcpy( out + ((layers - 1) * stride), u, stride ); + if (layers >= 2) { + two_back = out - 2 * stride; + } + + if (delays) { + (*delays)[layers - 1U] = g.delay; + } + } + } while (u != 0); + + // free temp buffer; + STBI_FREE(g.out); + STBI_FREE(g.history); + STBI_FREE(g.background); + + // do the final conversion after loading everything; + if (req_comp && req_comp != 4) + out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h); + + *z = layers; + return out; + } else { + return stbi__errpuc("not GIF", "Image was not as a gif type."); + } +} + +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *u = 0; + stbi__gif g; + memset(&g, 0, sizeof(g)); + + u = stbi__gif_load_next(s, &g, comp, req_comp, 0); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + if (u) { + *x = g.w; + *y = g.h; + + // moved conversion to after successful load so that the same + // can be done for multiple frames. + if (req_comp && req_comp != 4) + u = stbi__convert_format(u, 4, req_comp, g.w, g.h); + } + + // free buffers needed for multiple frame loading; + STBI_FREE(g.history); + STBI_FREE(g.background); + + return u; +} + +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp) +{ + return stbi__gif_info_raw(s,x,y,comp); +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR loader +// originally by Nicolas Schulz +#ifndef STBI_NO_HDR +static int stbi__hdr_test_core(stbi__context *s, const char *signature) +{ + int i; + for (i=0; signature[i]; ++i) + if (stbi__get8(s) != signature[i]) + return 0; + stbi__rewind(s); + return 1; +} + +static int stbi__hdr_test(stbi__context* s) +{ + int r = stbi__hdr_test_core(s, "#?RADIANCE\n"); + stbi__rewind(s); + if(!r) { + r = stbi__hdr_test_core(s, "#?RGBE\n"); + stbi__rewind(s); + } + return r; +} + +#define STBI__HDR_BUFLEN 1024 +static char *stbi__hdr_gettoken(stbi__context *z, char *buffer) +{ + int len=0; + char c = '\0'; + + c = (char) stbi__get8(z); + + while (!stbi__at_eof(z) && c != '\n') { + buffer[len++] = c; + if (len == STBI__HDR_BUFLEN-1) { + // flush to end of line + while (!stbi__at_eof(z) && stbi__get8(z) != '\n') + ; + break; + } + c = (char) stbi__get8(z); + } + + buffer[len] = 0; + return buffer; +} + +static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp) +{ + if ( input[3] != 0 ) { + float f1; + // Exponent + f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); + if (req_comp <= 2) + output[0] = (input[0] + input[1] + input[2]) * f1 / 3; + else { + output[0] = input[0] * f1; + output[1] = input[1] * f1; + output[2] = input[2] * f1; + } + if (req_comp == 2) output[1] = 1; + if (req_comp == 4) output[3] = 1; + } else { + switch (req_comp) { + case 4: output[3] = 1; /* fallthrough */ + case 3: output[0] = output[1] = output[2] = 0; + break; + case 2: output[1] = 1; /* fallthrough */ + case 1: output[0] = 0; + break; + } + } +} + +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int width, height; + stbi_uc *scanline; + float *hdr_data; + int len; + unsigned char count, value; + int i, j, k, c1,c2, z; + const char *headerToken; + STBI_NOTUSED(ri); + + // Check identifier + headerToken = stbi__hdr_gettoken(s,buffer); + if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0) + return stbi__errpf("not HDR", "Corrupt HDR image"); + + // Parse header + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format"); + + // Parse width and height + // can't use sscanf() if we're not using stdio! + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + height = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + width = (int) strtol(token, NULL, 10); + + *x = width; + *y = height; + + if (comp) *comp = 3; + if (req_comp == 0) req_comp = 3; + + if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) + return stbi__errpf("too large", "HDR image is too large"); + + // Read data + hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0); + if (!hdr_data) + return stbi__errpf("outofmem", "Out of memory"); + + // Load image data + // image data is stored as some number of sca + if ( width < 8 || width >= 32768) { + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + stbi_uc rgbe[4]; + main_decode_loop: + stbi__getn(s, rgbe, 4); + stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); + } + } + } else { + // Read RLE-encoded data + scanline = NULL; + + for (j = 0; j < height; ++j) { + c1 = stbi__get8(s); + c2 = stbi__get8(s); + len = stbi__get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + stbi_uc rgbe[4]; + rgbe[0] = (stbi_uc) c1; + rgbe[1] = (stbi_uc) c2; + rgbe[2] = (stbi_uc) len; + rgbe[3] = (stbi_uc) stbi__get8(s); + stbi__hdr_convert(hdr_data, rgbe, req_comp); + i = 1; + j = 0; + STBI_FREE(scanline); + goto main_decode_loop; // yes, this makes no sense + } + len <<= 8; + len |= stbi__get8(s); + if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } + if (scanline == NULL) { + scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); + if (!scanline) { + STBI_FREE(hdr_data); + return stbi__errpf("outofmem", "Out of memory"); + } + } + + for (k = 0; k < 4; ++k) { + int nleft; + i = 0; + while ((nleft = width - i) > 0) { + count = stbi__get8(s); + if (count > 128) { + // Run + value = stbi__get8(s); + count -= 128; + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = stbi__get8(s); + } + } + } + for (i=0; i < width; ++i) + stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); + } + if (scanline) + STBI_FREE(scanline); + } + + return hdr_data; +} + +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int dummy; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (stbi__hdr_test(s) == 0) { + stbi__rewind( s ); + return 0; + } + + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) { + stbi__rewind( s ); + return 0; + } + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *y = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *x = (int) strtol(token, NULL, 10); + *comp = 3; + return 1; +} +#endif // STBI_NO_HDR + +#ifndef STBI_NO_BMP +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) +{ + void *p; + stbi__bmp_data info; + + info.all_a = 255; + p = stbi__bmp_parse_header(s, &info); + stbi__rewind( s ); + if (p == NULL) + return 0; + if (x) *x = s->img_x; + if (y) *y = s->img_y; + if (comp) *comp = info.ma ? 4 : 3; + return 1; +} +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) +{ + int channelCount, dummy, depth; + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + *y = stbi__get32be(s); + *x = stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 8 && depth != 16) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 3) { + stbi__rewind( s ); + return 0; + } + *comp = 4; + return 1; +} + +static int stbi__psd_is16(stbi__context *s) +{ + int channelCount, depth; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + (void) stbi__get32be(s); + (void) stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 16) { + stbi__rewind( s ); + return 0; + } + return 1; +} +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) +{ + int act_comp=0,num_packets=0,chained,dummy; + stbi__pic_packet packets[10]; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) { + stbi__rewind(s); + return 0; + } + + stbi__skip(s, 88); + + *x = stbi__get16be(s); + *y = stbi__get16be(s); + if (stbi__at_eof(s)) { + stbi__rewind( s); + return 0; + } + if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) { + stbi__rewind( s ); + return 0; + } + + stbi__skip(s, 8); + + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return 0; + + packet = &packets[num_packets++]; + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + act_comp |= packet->channel; + + if (stbi__at_eof(s)) { + stbi__rewind( s ); + return 0; + } + if (packet->size != 8) { + stbi__rewind( s ); + return 0; + } + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); + + return 1; +} +#endif + +// ************************************************************************************************* +// Portable Gray Map and Portable Pixel Map loader +// by Ken Miller +// +// PGM: http://netpbm.sourceforge.net/doc/pgm.html +// PPM: http://netpbm.sourceforge.net/doc/ppm.html +// +// Known limitations: +// Does not support comments in the header section +// Does not support ASCII image data (formats P2 and P3) +// Does not support 16-bit-per-channel + +#ifndef STBI_NO_PNM + +static int stbi__pnm_test(stbi__context *s) +{ + char p, t; + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind( s ); + return 0; + } + return 1; +} + +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + STBI_NOTUSED(ri); + + if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n)) + return 0; + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + + if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "PNM too large"); + + out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + stbi__getn(s, out, s->img_n * s->img_x * s->img_y); + + if (req_comp && req_comp != s->img_n) { + out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + return out; +} + +static int stbi__pnm_isspace(char c) +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; +} + +static void stbi__pnm_skip_whitespace(stbi__context *s, char *c) +{ + for (;;) { + while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) + *c = (char) stbi__get8(s); + + if (stbi__at_eof(s) || *c != '#') + break; + + while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' ) + *c = (char) stbi__get8(s); + } +} + +static int stbi__pnm_isdigit(char c) +{ + return c >= '0' && c <= '9'; +} + +static int stbi__pnm_getinteger(stbi__context *s, char *c) +{ + int value = 0; + + while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { + value = value*10 + (*c - '0'); + *c = (char) stbi__get8(s); + } + + return value; +} + +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) +{ + int maxv, dummy; + char c, p, t; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + stbi__rewind(s); + + // Get identifier + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind(s); + return 0; + } + + *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm + + c = (char) stbi__get8(s); + stbi__pnm_skip_whitespace(s, &c); + + *x = stbi__pnm_getinteger(s, &c); // read width + stbi__pnm_skip_whitespace(s, &c); + + *y = stbi__pnm_getinteger(s, &c); // read height + stbi__pnm_skip_whitespace(s, &c); + + maxv = stbi__pnm_getinteger(s, &c); // read max value + + if (maxv > 255) + return stbi__err("max value > 255", "PPM image not 8-bit"); + else + return 1; +} +#endif + +static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) +{ + #ifndef STBI_NO_JPEG + if (stbi__jpeg_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNG + if (stbi__png_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_GIF + if (stbi__gif_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_BMP + if (stbi__bmp_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PIC + if (stbi__pic_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNM + if (stbi__pnm_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_info(s, x, y, comp)) return 1; + #endif + + // test tga last because it's a crappy test! + #ifndef STBI_NO_TGA + if (stbi__tga_info(s, x, y, comp)) + return 1; + #endif + return stbi__err("unknown image type", "Image not of any known type, or corrupt"); +} + +static int stbi__is_16_main(stbi__context *s) +{ + #ifndef STBI_NO_PNG + if (stbi__png_is16(s)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_is16(s)) return 1; + #endif + + return 0; +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_info_from_file(f, x, y, comp); + fclose(f); + return result; +} + +STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__info_main(&s,x,y,comp); + fseek(f,pos,SEEK_SET); + return r; +} + +STBIDEF int stbi_is_16_bit(char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_is_16_bit_from_file(f); + fclose(f); + return result; +} + +STBIDEF int stbi_is_16_bit_from_file(FILE *f) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__is_16_main(&s); + fseek(f,pos,SEEK_SET); + return r; +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__is_16_main(&s); +} + +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__is_16_main(&s); +} + +#endif // STB_IMAGE_IMPLEMENTATION + +/* + revision history: + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug + 1-bit BMP + *_is_16_bit api + avoid warnings + 2.16 (2017-07-23) all functions have 16-bit variants; + STBI_NO_STDIO works again; + compilation fixes; + fix rounding in unpremultiply; + optimize vertical flip; + disable raw_len validation; + documentation fixes + 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; + warning fixes; disable run-time SSE detection on gcc; + uniform handling of optional "return" values; + thread-safe initialization of zlib tables + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) allocate large structures on the stack + remove white matting for transparent PSD + fix reported channel count for PNG & BMP + re-enable SSE2 in non-gcc 64-bit + support RGB-formatted JPEG + read 16-bit PNGs (only as 8-bit) + 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED + 2.09 (2016-01-16) allow comments in PNM files + 16-bit-per-pixel TGA (not bit-per-component) + info() for TGA could break due to .hdr handling + info() for BMP to shares code instead of sloppy parse + can use STBI_REALLOC_SIZED if allocator doesn't support realloc + code cleanup + 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA + 2.07 (2015-09-13) fix compiler warnings + partial animated GIF support + limited 16-bpc PSD support + #ifdef unused functions + bug with < 92 byte PIC,PNM,HDR,TGA + 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value + 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning + 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit + 2.03 (2015-04-12) extra corruption checking (mmozeiko) + stbi_set_flip_vertically_on_load (nguillemot) + fix NEON support; fix mingw support + 2.02 (2015-01-19) fix incorrect assert, fix warning + 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2 + 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG + 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) + progressive JPEG (stb) + PGM/PPM support (Ken Miller) + STBI_MALLOC,STBI_REALLOC,STBI_FREE + GIF bugfix -- seemingly never worked + STBI_NO_*, STBI_ONLY_* + 1.48 (2014-12-14) fix incorrectly-named assert() + 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb) + optimize PNG (ryg) + fix bug in interlaced PNG with user-specified channel count (stb) + 1.46 (2014-08-26) + fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG + 1.45 (2014-08-16) + fix MSVC-ARM internal compiler error by wrapping malloc + 1.44 (2014-08-07) + various warning fixes from Ronny Chevalier + 1.43 (2014-07-15) + fix MSVC-only compiler problem in code changed in 1.42 + 1.42 (2014-07-09) + don't define _CRT_SECURE_NO_WARNINGS (affects user code) + fixes to stbi__cleanup_jpeg path + added STBI_ASSERT to avoid requiring assert.h + 1.41 (2014-06-25) + fix search&replace from 1.36 that messed up comments/error messages + 1.40 (2014-06-22) + fix gcc struct-initialization warning + 1.39 (2014-06-15) + fix to TGA optimization when req_comp != number of components in TGA; + fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite) + add support for BMP version 5 (more ignored fields) + 1.38 (2014-06-06) + suppress MSVC warnings on integer casts truncating values + fix accidental rename of 'skip' field of I/O + 1.37 (2014-06-04) + remove duplicate typedef + 1.36 (2014-06-03) + convert to header file single-file library + if de-iphone isn't set, load iphone images color-swapped instead of returning NULL + 1.35 (2014-05-27) + various warnings + fix broken STBI_SIMD path + fix bug where stbi_load_from_file no longer left file pointer in correct place + fix broken non-easy path for 32-bit BMP (possibly never used) + TGA optimization by Arseny Kapoulkine + 1.34 (unknown) + use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case + 1.33 (2011-07-14) + make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements + 1.32 (2011-07-13) + support for "info" function for all supported filetypes (SpartanJ) + 1.31 (2011-06-20) + a few more leak fixes, bug in PNG handling (SpartanJ) + 1.30 (2011-06-11) + added ability to load files via callbacks to accomidate custom input streams (Ben Wenger) + removed deprecated format-specific test/load functions + removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway + error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) + fix inefficiency in decoding 32-bit BMP (David Woo) + 1.29 (2010-08-16) + various warning fixes from Aurelien Pocheville + 1.28 (2010-08-01) + fix bug in GIF palette transparency (SpartanJ) + 1.27 (2010-08-01) + cast-to-stbi_uc to fix warnings + 1.26 (2010-07-24) + fix bug in file buffering for PNG reported by SpartanJ + 1.25 (2010-07-17) + refix trans_data warning (Won Chun) + 1.24 (2010-07-12) + perf improvements reading from files on platforms with lock-heavy fgetc() + minor perf improvements for jpeg + deprecated type-specific functions so we'll get feedback if they're needed + attempt to fix trans_data warning (Won Chun) + 1.23 fixed bug in iPhone support + 1.22 (2010-07-10) + removed image *writing* support + stbi_info support from Jetro Lauha + GIF support from Jean-Marc Lienher + iPhone PNG-extensions from James Brown + warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva) + 1.21 fix use of 'stbi_uc' in header (reported by jon blow) + 1.20 added support for Softimage PIC, by Tom Seddon + 1.19 bug in interlaced PNG corruption check (found by ryg) + 1.18 (2008-08-02) + fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - stbi__convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug + header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) + 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz + 1.07 attempt to fix C++ warning/errors again + 1.06 attempt to fix C++ warning/errors again + 1.05 fix TGA loading to return correct *comp and use good luminance calc + 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free + 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR + 1.02 support for (subset of) HDR files, float interface for preferred access to them + 1.01 fix bug: possible bug in handling right-side up bmps... not sure + fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all + 1.00 interface to zlib that skips zlib header + 0.99 correct handling of alpha in palette + 0.98 TGA loader by lonesock; dynamically add loaders (untested) + 0.97 jpeg errors on too large a file; also catch another malloc failure + 0.96 fix detection of invalid v value - particleman@mollyrocket forum + 0.95 during header scan, seek to markers in case of padding + 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same + 0.93 handle jpegtran output; verbose errors + 0.92 read 4,8,16,24,32-bit BMP files of several formats + 0.91 output 24-bit Windows 3.0 BMP files + 0.90 fix a few more warnings; bump version number to approach 1.0 + 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd + 0.60 fix compiling as c++ + 0.59 fix warnings: merge Dave Moore's -Wall fixes + 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian + 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available + 0.56 fix bug: zlib uncompressed mode len vs. nlen + 0.55 fix bug: restart_interval not initialized to 0 + 0.54 allow NULL for 'int *comp' + 0.53 fix bug in png 3->4; speedup png decoding + 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments + 0.51 obey req_comp requests, 1-component jpegs return as 1-component, + on 'test' only check type, not whether we support this variant + 0.50 (2006-11-19) + first released version +*/ + + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/workloads/realworld/pinned/darknet/src/stb_image_write.h b/workloads/realworld/pinned/darknet/src/stb_image_write.h new file mode 100644 index 0000000000000000000000000000000000000000..c05e95812b96232abd3617f98255832cc3fe4716 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/stb_image_write.h @@ -0,0 +1,1568 @@ +/* stb_image_write - v1.09 - public domain - http://nothings.org/stb/stb_image_write.h + writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 + no warranty implied; use at your own risk + + Before #including, + + #define STB_IMAGE_WRITE_IMPLEMENTATION + + in the file that you want to have the implementation. + + Will probably not work correctly with strict-aliasing optimizations. + + If using a modern Microsoft Compiler, non-safe versions of CRT calls may cause + compilation warnings or even errors. To avoid this, also before #including, + + #define STBI_MSC_SECURE_CRT + +ABOUT: + + This header file is a library for writing images to C stdio. It could be + adapted to write to memory or a general streaming interface; let me know. + + The PNG output is not optimal; it is 20-50% larger than the file + written by a decent optimizing implementation; though providing a custom + zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that. + This library is designed for source code compactness and simplicity, + not optimal image file size or run-time performance. + +BUILDING: + + You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h. + You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace + malloc,realloc,free. + You can #define STBIW_MEMMOVE() to replace memmove() + You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function + for PNG compression (instead of the builtin one), it must have the following signature: + unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality); + The returned data will be freed with STBIW_FREE() (free() by default), + so it must be heap allocated with STBIW_MALLOC() (malloc() by default), + +USAGE: + + There are five functions, one for each image file format: + + int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality); + int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); + + void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically + + There are also five equivalent functions that use an arbitrary write function. You are + expected to open/close your file-equivalent before and after calling these: + + int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); + int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + + where the callback is: + void stbi_write_func(void *context, void *data, int size); + + You can configure it with these global variables: + int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE + int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression + int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode + + + You can define STBI_WRITE_NO_STDIO to disable the file variant of these + functions, so the library will not use stdio.h at all. However, this will + also disable HDR writing, because it requires stdio for formatted output. + + Each function returns 0 on failure and non-0 on success. + + The functions create an image file defined by the parameters. The image + is a rectangle of pixels stored from left-to-right, top-to-bottom. + Each pixel contains 'comp' channels of data stored interleaved with 8-bits + per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is + monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall. + The *data pointer points to the first byte of the top-left-most pixel. + For PNG, "stride_in_bytes" is the distance in bytes from the first byte of + a row of pixels to the first byte of the next row of pixels. + + PNG creates output files with the same number of components as the input. + The BMP format expands Y to RGB in the file format and does not + output alpha. + + PNG supports writing rectangles of data even when the bytes storing rows of + data are not consecutive in memory (e.g. sub-rectangles of a larger image), + by supplying the stride between the beginning of adjacent rows. The other + formats do not. (Thus you cannot write a native-format BMP through the BMP + writer, both because it is in BGR order and because it may have padding + at the end of the line.) + + PNG allows you to set the deflate compression level by setting the global + variable 'stbi_write_png_compression_level' (it defaults to 8). + + HDR expects linear float data. Since the format is always 32-bit rgb(e) + data, alpha (if provided) is discarded, and for monochrome data it is + replicated across all three channels. + + TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed + data, set the global variable 'stbi_write_tga_with_rle' to 0. + + JPEG does ignore alpha channels in input data; quality is between 1 and 100. + Higher quality looks better but results in a bigger image. + JPEG baseline (no JPEG progressive). + +CREDITS: + + + Sean Barrett - PNG/BMP/TGA + Baldur Karlsson - HDR + Jean-Sebastien Guay - TGA monochrome + Tim Kelsey - misc enhancements + Alan Hickman - TGA RLE + Emmanuel Julien - initial file IO callback implementation + Jon Olick - original jo_jpeg.cpp code + Daniel Gibson - integrate JPEG, allow external zlib + Aarni Koskela - allow choosing PNG filter + + bugfixes: + github:Chribba + Guillaume Chereau + github:jry2 + github:romigrou + Sergio Gonzalez + Jonas Karlsson + Filip Wasil + Thatcher Ulrich + github:poppolopoppo + Patrick Boettcher + github:xeekworx + Cap Petschulat + Simon Rodriguez + Ivan Tikhonov + github:ignotion + Adam Schackart + +LICENSE + + See end of file for license information. + +*/ + +#ifndef INCLUDE_STB_IMAGE_WRITE_H +#define INCLUDE_STB_IMAGE_WRITE_H + +// if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline' or 'static inline' +#ifndef STBIWDEF +#ifdef STB_IMAGE_WRITE_STATIC +#define STBIWDEF static +#else +#ifdef __cplusplus +#define STBIWDEF extern "C" +#else +#define STBIWDEF extern +#endif +#endif +#endif + +#ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations +extern int stbi_write_tga_with_rle; +extern int stbi_write_png_compression_level; +extern int stbi_write_force_png_filter; +#endif + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality); +#endif + +typedef void stbi_write_func(void *context, void *data, int size); + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + +STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); + +#endif//INCLUDE_STB_IMAGE_WRITE_H + +#ifdef STB_IMAGE_WRITE_IMPLEMENTATION + +#ifdef _WIN32 + #ifndef _CRT_SECURE_NO_WARNINGS + #define _CRT_SECURE_NO_WARNINGS + #endif + #ifndef _CRT_NONSTDC_NO_DEPRECATE + #define _CRT_NONSTDC_NO_DEPRECATE + #endif +#endif + +#ifndef STBI_WRITE_NO_STDIO +#include +#endif // STBI_WRITE_NO_STDIO + +#include +#include +#include +#include + +#if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED)) +// ok +#elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)." +#endif + +#ifndef STBIW_MALLOC +#define STBIW_MALLOC(sz) malloc(sz) +#define STBIW_REALLOC(p,newsz) realloc(p,newsz) +#define STBIW_FREE(p) free(p) +#endif + +#ifndef STBIW_REALLOC_SIZED +#define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz) +#endif + + +#ifndef STBIW_MEMMOVE +#define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz) +#endif + + +#ifndef STBIW_ASSERT +#include +#define STBIW_ASSERT(x) assert(x) +#endif + +#define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff) + +#ifdef STB_IMAGE_WRITE_STATIC +static int stbi__flip_vertically_on_write=0; +static int stbi_write_png_compression_level = 8; +static int stbi_write_tga_with_rle = 1; +static int stbi_write_force_png_filter = -1; +#else +int stbi_write_png_compression_level = 8; +int stbi__flip_vertically_on_write=0; +int stbi_write_tga_with_rle = 1; +int stbi_write_force_png_filter = -1; +#endif + +STBIWDEF void stbi_flip_vertically_on_write(int flag) +{ + stbi__flip_vertically_on_write = flag; +} + +typedef struct +{ + stbi_write_func *func; + void *context; +} stbi__write_context; + +// initialize a callback-based context +static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context) +{ + s->func = c; + s->context = context; +} + +#ifndef STBI_WRITE_NO_STDIO + +static void stbi__stdio_write(void *context, void *data, int size) +{ + fwrite(data,1,size,(FILE*) context); +} + +static int stbi__start_write_file(stbi__write_context *s, const char *filename) +{ + FILE *f; +#ifdef STBI_MSC_SECURE_CRT + if (fopen_s(&f, filename, "wb")) + f = NULL; +#else + f = fopen(filename, "wb"); +#endif + stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f); + return f != NULL; +} + +static void stbi__end_write_file(stbi__write_context *s) +{ + fclose((FILE *)s->context); +} + +#endif // !STBI_WRITE_NO_STDIO + +typedef unsigned int stbiw_uint32; +typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1]; + +static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) +{ + while (*fmt) { + switch (*fmt++) { + case ' ': break; + case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int)); + s->func(s->context,&x,1); + break; } + case '2': { int x = va_arg(v,int); + unsigned char b[2]; + b[0] = STBIW_UCHAR(x); + b[1] = STBIW_UCHAR(x>>8); + s->func(s->context,b,2); + break; } + case '4': { stbiw_uint32 x = va_arg(v,int); + unsigned char b[4]; + b[0]=STBIW_UCHAR(x); + b[1]=STBIW_UCHAR(x>>8); + b[2]=STBIW_UCHAR(x>>16); + b[3]=STBIW_UCHAR(x>>24); + s->func(s->context,b,4); + break; } + default: + STBIW_ASSERT(0); + return; + } + } +} + +static void stbiw__writef(stbi__write_context *s, const char *fmt, ...) +{ + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); +} + +static void stbiw__putc(stbi__write_context *s, unsigned char c) +{ + s->func(s->context, &c, 1); +} + +static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c) +{ + unsigned char arr[3]; + arr[0] = a, arr[1] = b, arr[2] = c; + s->func(s->context, arr, 3); +} + +static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d) +{ + unsigned char bg[3] = { 255, 0, 255}, px[3]; + int k; + + if (write_alpha < 0) + s->func(s->context, &d[comp - 1], 1); + + switch (comp) { + case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case + case 1: + if (expand_mono) + stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp + else + s->func(s->context, d, 1); // monochrome TGA + break; + case 4: + if (!write_alpha) { + // composite against pink background + for (k = 0; k < 3; ++k) + px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255; + stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]); + break; + } + /* FALLTHROUGH */ + case 3: + stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); + break; + } + if (write_alpha > 0) + s->func(s->context, &d[comp - 1], 1); +} + +static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) +{ + stbiw_uint32 zero = 0; + int i,j, j_end; + + if (y <= 0) + return; + + if (stbi__flip_vertically_on_write) + vdir *= -1; + + if (vdir < 0) + j_end = -1, j = y-1; + else + j_end = y, j = 0; + + for (; j != j_end; j += vdir) { + for (i=0; i < x; ++i) { + unsigned char *d = (unsigned char *) data + (j*x+i)*comp; + stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); + } + s->func(s->context, &zero, scanline_pad); + } +} + +static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...) +{ + if (y < 0 || x < 0) { + return 0; + } else { + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); + stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono); + return 1; + } +} + +static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data) +{ + int pad = (-x*3) & 3; + return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad, + "11 4 22 4" "4 44 22 444444", + 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header + 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header +} + +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_bmp_core(&s, x, y, comp, data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_bmp_core(&s, x, y, comp, data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif //!STBI_WRITE_NO_STDIO + +static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data) +{ + int has_alpha = (comp == 2 || comp == 4); + int colorbytes = has_alpha ? comp-1 : comp; + int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3 + + if (y < 0 || x < 0) + return 0; + + if (!stbi_write_tga_with_rle) { + return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0, + "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); + } else { + int i,j,k; + int jend, jdir; + + stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8); + + if (stbi__flip_vertically_on_write) { + j = 0; + jend = y; + jdir = 1; + } else { + j = y-1; + jend = -1; + jdir = -1; + } + for (; j != jend; j += jdir) { + unsigned char *row = (unsigned char *) data + j * x * comp; + int len; + + for (i = 0; i < x; i += len) { + unsigned char *begin = row + i * comp; + int diff = 1; + len = 1; + + if (i < x - 1) { + ++len; + diff = memcmp(begin, row + (i + 1) * comp, comp); + if (diff) { + const unsigned char *prev = begin; + for (k = i + 2; k < x && len < 128; ++k) { + if (memcmp(prev, row + k * comp, comp)) { + prev += comp; + ++len; + } else { + --len; + break; + } + } + } else { + for (k = i + 2; k < x && len < 128; ++k) { + if (!memcmp(begin, row + k * comp, comp)) { + ++len; + } else { + break; + } + } + } + } + + if (diff) { + unsigned char header = STBIW_UCHAR(len - 1); + s->func(s->context, &header, 1); + for (k = 0; k < len; ++k) { + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); + } + } else { + unsigned char header = STBIW_UCHAR(len - 129); + s->func(s->context, &header, 1); + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); + } + } + } + } + return 1; +} + +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_tga_core(&s, x, y, comp, (void *) data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_tga_core(&s, x, y, comp, (void *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR writer +// by Baldur Karlsson + +#define stbiw__max(a, b) ((a) > (b) ? (a) : (b)) + +void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear) +{ + int exponent; + float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2])); + + if (maxcomp < 1e-32f) { + rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; + } else { + float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp; + + rgbe[0] = (unsigned char)(linear[0] * normalize); + rgbe[1] = (unsigned char)(linear[1] * normalize); + rgbe[2] = (unsigned char)(linear[2] * normalize); + rgbe[3] = (unsigned char)(exponent + 128); + } +} + +void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte) +{ + unsigned char lengthbyte = STBIW_UCHAR(length+128); + STBIW_ASSERT(length+128 <= 255); + s->func(s->context, &lengthbyte, 1); + s->func(s->context, &databyte, 1); +} + +void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data) +{ + unsigned char lengthbyte = STBIW_UCHAR(length); + STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code + s->func(s->context, &lengthbyte, 1); + s->func(s->context, data, length); +} + +void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline) +{ + unsigned char scanlineheader[4] = { 2, 2, 0, 0 }; + unsigned char rgbe[4]; + float linear[3]; + int x; + + scanlineheader[2] = (width&0xff00)>>8; + scanlineheader[3] = (width&0x00ff); + + /* skip RLE for images too small or large */ + if (width < 8 || width >= 32768) { + for (x=0; x < width; x++) { + switch (ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + s->func(s->context, rgbe, 4); + } + } else { + int c,r; + /* encode into scratch buffer */ + for (x=0; x < width; x++) { + switch(ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + scratch[x + width*0] = rgbe[0]; + scratch[x + width*1] = rgbe[1]; + scratch[x + width*2] = rgbe[2]; + scratch[x + width*3] = rgbe[3]; + } + + s->func(s->context, scanlineheader, 4); + + /* RLE each component separately */ + for (c=0; c < 4; c++) { + unsigned char *comp = &scratch[width*c]; + + x = 0; + while (x < width) { + // find first run + r = x; + while (r+2 < width) { + if (comp[r] == comp[r+1] && comp[r] == comp[r+2]) + break; + ++r; + } + if (r+2 >= width) + r = width; + // dump up to first run + while (x < r) { + int len = r-x; + if (len > 128) len = 128; + stbiw__write_dump_data(s, len, &comp[x]); + x += len; + } + // if there's a run, output it + if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd + // find next byte after run + while (r < width && comp[r] == comp[x]) + ++r; + // output run up to r + while (x < r) { + int len = r-x; + if (len > 127) len = 127; + stbiw__write_run_data(s, len, comp[x]); + x += len; + } + } + } + } + } +} + +static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data) +{ + if (y <= 0 || x <= 0 || data == NULL) + return 0; + else { + // Each component is stored separately. Allocate scratch space for full output scanline. + unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4); + int i, len; + char buffer[128]; + char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"; + s->func(s->context, header, sizeof(header)-1); + +#ifdef STBI_MSC_SECURE_CRT + len = sprintf_s(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#else + len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#endif + s->func(s->context, buffer, len); + + for(i=0; i < y; i++) + stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i)*x); + STBIW_FREE(scratch); + return 1; + } +} + +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_hdr_core(&s, x, y, comp, (float *) data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif // STBI_WRITE_NO_STDIO + + +////////////////////////////////////////////////////////////////////////////// +// +// PNG writer +// + +#ifndef STBIW_ZLIB_COMPRESS +// stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size() +#define stbiw__sbraw(a) ((int *) (a) - 2) +#define stbiw__sbm(a) stbiw__sbraw(a)[0] +#define stbiw__sbn(a) stbiw__sbraw(a)[1] + +#define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) +#define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0) +#define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) + +#define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) +#define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) +#define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) + +static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) +{ + int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1; + void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2); + STBIW_ASSERT(p); + if (p) { + if (!*arr) ((int *) p)[1] = 0; + *arr = (void *) ((int *) p + 2); + stbiw__sbm(*arr) = m; + } + return *arr; +} + +static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount) +{ + while (*bitcount >= 8) { + stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer)); + *bitbuffer >>= 8; + *bitcount -= 8; + } + return data; +} + +static int stbiw__zlib_bitrev(int code, int codebits) +{ + int res=0; + while (codebits--) { + res = (res << 1) | (code & 1); + code >>= 1; + } + return res; +} + +static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit) +{ + int i; + for (i=0; i < limit && i < 258; ++i) + if (a[i] != b[i]) break; + return i; +} + +static unsigned int stbiw__zhash(unsigned char *data) +{ + stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16); + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + return hash; +} + +#define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount)) +#define stbiw__zlib_add(code,codebits) \ + (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) +#define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) +// default huffman tables +#define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8) +#define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9) +#define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7) +#define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8) +#define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) +#define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n)) + +#define stbiw__ZHASH 16384 + +#endif // STBIW_ZLIB_COMPRESS + +unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality) +{ +#ifdef STBIW_ZLIB_COMPRESS + // user provided a zlib compress implementation, use that + return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality); +#else // use builtin + static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 }; + static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; + static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 }; + static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; + unsigned int bitbuf=0; + int i,j, bitcount=0; + unsigned char *out = NULL; + unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(char**)); + if (hash_table == NULL) + return NULL; + if (quality < 5) quality = 5; + + stbiw__sbpush(out, 0x78); // DEFLATE 32K window + stbiw__sbpush(out, 0x5e); // FLEVEL = 1 + stbiw__zlib_add(1,1); // BFINAL = 1 + stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman + + for (i=0; i < stbiw__ZHASH; ++i) + hash_table[i] = NULL; + + i=0; + while (i < data_len-3) { + // hash next 3 bytes of data to be compressed + int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3; + unsigned char *bestloc = 0; + unsigned char **hlist = hash_table[h]; + int n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32768) { // if entry lies within window + int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i); + if (d >= best) best=d,bestloc=hlist[j]; + } + } + // when hash table entry is too long, delete half the entries + if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) { + STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality); + stbiw__sbn(hash_table[h]) = quality; + } + stbiw__sbpush(hash_table[h],data+i); + + if (bestloc) { + // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal + h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1); + hlist = hash_table[h]; + n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32767) { + int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1); + if (e > best) { // if next match is better, bail on current match + bestloc = NULL; + break; + } + } + } + } + + if (bestloc) { + int d = (int) (data+i - bestloc); // distance back + STBIW_ASSERT(d <= 32767 && best <= 258); + for (j=0; best > lengthc[j+1]-1; ++j); + stbiw__zlib_huff(j+257); + if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); + for (j=0; d > distc[j+1]-1; ++j); + stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5); + if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]); + i += best; + } else { + stbiw__zlib_huffb(data[i]); + ++i; + } + } + // write out final bytes + for (;i < data_len; ++i) + stbiw__zlib_huffb(data[i]); + stbiw__zlib_huff(256); // end of block + // pad with 0 bits to byte boundary + while (bitcount) + stbiw__zlib_add(0,1); + + for (i=0; i < stbiw__ZHASH; ++i) + (void) stbiw__sbfree(hash_table[i]); + STBIW_FREE(hash_table); + + { + // compute adler32 on input + unsigned int s1=1, s2=0; + int blocklen = (int) (data_len % 5552); + j=0; + while (j < data_len) { + for (i=0; i < blocklen; ++i) s1 += data[j+i], s2 += s1; + s1 %= 65521, s2 %= 65521; + j += blocklen; + blocklen = 5552; + } + stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s2)); + stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s1)); + } + *out_len = stbiw__sbn(out); + // make returned pointer freeable + STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len); + return (unsigned char *) stbiw__sbraw(out); +#endif // STBIW_ZLIB_COMPRESS +} + +static unsigned int stbiw__crc32(unsigned char *buffer, int len) +{ + static unsigned int crc_table[256] = + { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + }; + + unsigned int crc = ~0u; + int i; + for (i=0; i < len; ++i) + crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; + return ~crc; +} + +#define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4) +#define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v)); +#define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3]) + +static void stbiw__wpcrc(unsigned char **data, int len) +{ + unsigned int crc = stbiw__crc32(*data - len - 4, len+4); + stbiw__wp32(*data, crc); +} + +static unsigned char stbiw__paeth(int a, int b, int c) +{ + int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c); + if (pa <= pb && pa <= pc) return STBIW_UCHAR(a); + if (pb <= pc) return STBIW_UCHAR(b); + return STBIW_UCHAR(c); +} + +// @OPTIMIZE: provide an option that always forces left-predict or paeth predict +static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer) +{ + static int mapping[] = { 0,1,2,3,4 }; + static int firstmap[] = { 0,1,0,5,6 }; + int *mymap = (y != 0) ? mapping : firstmap; + int i; + int type = mymap[filter_type]; + unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y); + int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes; + for (i = 0; i < n; ++i) { + switch (type) { + case 0: line_buffer[i] = z[i]; break; + case 1: line_buffer[i] = z[i]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break; + case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break; + case 5: line_buffer[i] = z[i]; break; + case 6: line_buffer[i] = z[i]; break; + } + } + for (i=n; i < width*n; ++i) { + switch (type) { + case 0: line_buffer[i] = z[i]; break; + case 1: line_buffer[i] = z[i] - z[i-n]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break; + case 4: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break; + case 5: line_buffer[i] = z[i] - (z[i-n]>>1); break; + case 6: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break; + } + } +} + +unsigned char *stbi_write_png_to_mem(unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len) +{ + int force_filter = stbi_write_force_png_filter; + int ctype[5] = { -1, 0, 4, 2, 6 }; + unsigned char sig[8] = { 137,80,78,71,13,10,26,10 }; + unsigned char *out,*o, *filt, *zlib; + signed char *line_buffer; + int j,zlen; + + if (stride_bytes == 0) + stride_bytes = x * n; + + if (force_filter >= 5) { + force_filter = -1; + } + + filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0; + line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; } + for (j=0; j < y; ++j) { + int filter_type; + if (force_filter > -1) { + filter_type = force_filter; + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, force_filter, line_buffer); + } else { // Estimate the best filter by running through all of them: + int best_filter = 0, best_filter_val = 0x7fffffff, est, i; + for (filter_type = 0; filter_type < 5; filter_type++) { + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, filter_type, line_buffer); + + // Estimate the entropy of the line using this filter; the less, the better. + est = 0; + for (i = 0; i < x*n; ++i) { + est += abs((signed char) line_buffer[i]); + } + if (est < best_filter_val) { + best_filter_val = est; + best_filter = filter_type; + } + } + if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, best_filter, line_buffer); + filter_type = best_filter; + } + } + // when we get here, filter_type contains the filter type, and line_buffer contains the data + filt[j*(x*n+1)] = (unsigned char) filter_type; + STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); + } + STBIW_FREE(line_buffer); + zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level); + STBIW_FREE(filt); + if (!zlib) return 0; + + // each tag requires 12 bytes of overhead + out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12); + if (!out) return 0; + *out_len = 8 + 12+13 + 12+zlen + 12; + + o=out; + STBIW_MEMMOVE(o,sig,8); o+= 8; + stbiw__wp32(o, 13); // header length + stbiw__wptag(o, "IHDR"); + stbiw__wp32(o, x); + stbiw__wp32(o, y); + *o++ = 8; + *o++ = STBIW_UCHAR(ctype[n]); + *o++ = 0; + *o++ = 0; + *o++ = 0; + stbiw__wpcrc(&o,13); + + stbiw__wp32(o, zlen); + stbiw__wptag(o, "IDAT"); + STBIW_MEMMOVE(o, zlib, zlen); + o += zlen; + STBIW_FREE(zlib); + stbiw__wpcrc(&o, zlen); + + stbiw__wp32(o,0); + stbiw__wptag(o, "IEND"); + stbiw__wpcrc(&o,0); + + STBIW_ASSERT(o == out + *out_len); + + return out; +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes) +{ + FILE *f; + int len; + unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; +#ifdef STBI_MSC_SECURE_CRT + if (fopen_s(&f, filename, "wb")) + f = NULL; +#else + f = fopen(filename, "wb"); +#endif + if (!f) { STBIW_FREE(png); return 0; } + fwrite(png, 1, len, f); + fclose(f); + STBIW_FREE(png); + return 1; +} +#endif + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes) +{ + int len; + unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; + func(context, png, len); + STBIW_FREE(png); + return 1; +} + + +/* *************************************************************************** + * + * JPEG writer + * + * This is based on Jon Olick's jo_jpeg.cpp: + * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html + */ + +static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18, + 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; + +static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) { + int bitBuf = *bitBufP, bitCnt = *bitCntP; + bitCnt += bs[1]; + bitBuf |= bs[0] << (24 - bitCnt); + while(bitCnt >= 8) { + unsigned char c = (bitBuf >> 16) & 255; + stbiw__putc(s, c); + if(c == 255) { + stbiw__putc(s, 0); + } + bitBuf <<= 8; + bitCnt -= 8; + } + *bitBufP = bitBuf; + *bitCntP = bitCnt; +} + +static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) { + float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p; + float z1, z2, z3, z4, z5, z11, z13; + + float tmp0 = d0 + d7; + float tmp7 = d0 - d7; + float tmp1 = d1 + d6; + float tmp6 = d1 - d6; + float tmp2 = d2 + d5; + float tmp5 = d2 - d5; + float tmp3 = d3 + d4; + float tmp4 = d3 - d4; + + // Even part + float tmp10 = tmp0 + tmp3; // phase 2 + float tmp13 = tmp0 - tmp3; + float tmp11 = tmp1 + tmp2; + float tmp12 = tmp1 - tmp2; + + d0 = tmp10 + tmp11; // phase 3 + d4 = tmp10 - tmp11; + + z1 = (tmp12 + tmp13) * 0.707106781f; // c4 + d2 = tmp13 + z1; // phase 5 + d6 = tmp13 - z1; + + // Odd part + tmp10 = tmp4 + tmp5; // phase 2 + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + // The rotator is modified from fig 4-8 to avoid extra negations. + z5 = (tmp10 - tmp12) * 0.382683433f; // c6 + z2 = tmp10 * 0.541196100f + z5; // c2-c6 + z4 = tmp12 * 1.306562965f + z5; // c2+c6 + z3 = tmp11 * 0.707106781f; // c4 + + z11 = tmp7 + z3; // phase 5 + z13 = tmp7 - z3; + + *d5p = z13 + z2; // phase 6 + *d3p = z13 - z2; + *d1p = z11 + z4; + *d7p = z11 - z4; + + *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6; +} + +static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) { + int tmp1 = val < 0 ? -val : val; + val = val < 0 ? val-1 : val; + bits[1] = 1; + while(tmp1 >>= 1) { + ++bits[1]; + } + bits[0] = val & ((1<0)&&(DU[end0pos]==0); --end0pos) { + } + // end0pos = first element in reverse order !=0 + if(end0pos == 0) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + return DU[0]; + } + for(i = 1; i <= end0pos; ++i) { + int startpos = i; + int nrzeroes; + unsigned short bits[2]; + for (; DU[i]==0 && i<=end0pos; ++i) { + } + nrzeroes = i-startpos; + if ( nrzeroes >= 16 ) { + int lng = nrzeroes>>4; + int nrmarker; + for (nrmarker=1; nrmarker <= lng; ++nrmarker) + stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes); + nrzeroes &= 15; + } + stbiw__jpg_calcBits(DU[i], bits); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); + } + if(end0pos != 63) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + } + return DU[0]; +} + +static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) { + // Constants that don't pollute global namespace + static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0}; + static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d}; + static const unsigned char std_ac_luminance_values[] = { + 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08, + 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28, + 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59, + 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, + 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6, + 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2, + 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0}; + static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77}; + static const unsigned char std_ac_chrominance_values[] = { + 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, + 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26, + 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58, + 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, + 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4, + 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda, + 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + // Huffman tables + static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}}; + static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}}; + static const unsigned short YAC_HT[256][2] = { + {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const unsigned short UVAC_HT[256][2] = { + {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22, + 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99}; + static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99, + 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99}; + static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, + 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; + + int row, col, i, k; + float fdtbl_Y[64], fdtbl_UV[64]; + unsigned char YTable[64], UVTable[64]; + + if(!data || !width || !height || comp > 4 || comp < 1) { + return 0; + } + + quality = quality ? quality : 90; + quality = quality < 1 ? 1 : quality > 100 ? 100 : quality; + quality = quality < 50 ? 5000 / quality : 200 - quality * 2; + + for(i = 0; i < 64; ++i) { + int uvti, yti = (YQT[i]*quality+50)/100; + YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti); + uvti = (UVQT[i]*quality+50)/100; + UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); + } + + for(row = 0, k = 0; row < 8; ++row) { + for(col = 0; col < 8; ++col, ++k) { + fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + } + } + + // Write Headers + { + static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 }; + static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 }; + const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width), + 3,1,0x11,0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 }; + s->func(s->context, (void*)head0, sizeof(head0)); + s->func(s->context, (void*)YTable, sizeof(YTable)); + stbiw__putc(s, 1); + s->func(s->context, UVTable, sizeof(UVTable)); + s->func(s->context, (void*)head1, sizeof(head1)); + s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1); + s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values)); + stbiw__putc(s, 0x10); // HTYACinfo + s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1); + s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values)); + stbiw__putc(s, 1); // HTUDCinfo + s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values)); + stbiw__putc(s, 0x11); // HTUACinfo + s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values)); + s->func(s->context, (void*)head2, sizeof(head2)); + } + + // Encode 8x8 macroblocks + { + static const unsigned short fillBits[] = {0x7F, 7}; + const unsigned char *imageData = (const unsigned char *)data; + int DCY=0, DCU=0, DCV=0; + int bitBuf=0, bitCnt=0; + // comp == 2 is grey+alpha (alpha is ignored) + int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; + int x, y, pos; + for(y = 0; y < height; y += 8) { + for(x = 0; x < width; x += 8) { + float YDU[64], UDU[64], VDU[64]; + for(row = y, pos = 0; row < y+8; ++row) { + for(col = x; col < x+8; ++col, ++pos) { + int p = (stbi__flip_vertically_on_write ? height-1-row : row)*width*comp + col*comp; + float r, g, b; + if(row >= height) { + p -= width*comp*(row+1 - height); + } + if(col >= width) { + p -= comp*(col+1 - width); + } + + r = imageData[p+0]; + g = imageData[p+ofsG]; + b = imageData[p+ofsB]; + YDU[pos]=+0.29900f*r+0.58700f*g+0.11400f*b-128; + UDU[pos]=-0.16874f*r-0.33126f*g+0.50000f*b; + VDU[pos]=+0.50000f*r-0.41869f*g-0.08131f*b; + } + } + + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, YDU, fdtbl_Y, DCY, YDC_HT, YAC_HT); + DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, UDU, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); + DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, VDU, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); + } + } + + // Do the bit alignment of the EOI marker + stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits); + } + + // EOI + stbiw__putc(s, 0xFF); + stbiw__putc(s, 0xD9); + + return 1; +} + +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality); +} + + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +#endif // STB_IMAGE_WRITE_IMPLEMENTATION + +/* Revision history + 1.09 (2018-02-11) + fix typo in zlib quality API, improve STB_I_W_STATIC in C++ + 1.08 (2018-01-29) + add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter + 1.07 (2017-07-24) + doc fix + 1.06 (2017-07-23) + writing JPEG (using Jon Olick's code) + 1.05 ??? + 1.04 (2017-03-03) + monochrome BMP expansion + 1.03 ??? + 1.02 (2016-04-02) + avoid allocating large structures on the stack + 1.01 (2016-01-16) + STBIW_REALLOC_SIZED: support allocators with no realloc support + avoid race-condition in crc initialization + minor compile issues + 1.00 (2015-09-14) + installable file IO function + 0.99 (2015-09-13) + warning fixes; TGA rle support + 0.98 (2015-04-08) + added STBIW_MALLOC, STBIW_ASSERT etc + 0.97 (2015-01-18) + fixed HDR asserts, rewrote HDR rle logic + 0.96 (2015-01-17) + add HDR output + fix monochrome BMP + 0.95 (2014-08-17) + add monochrome TGA output + 0.94 (2014-05-31) + rename private functions to avoid conflicts with stb_image.h + 0.93 (2014-05-27) + warning fixes + 0.92 (2010-08-01) + casts to unsigned char to fix warnings + 0.91 (2010-07-17) + first public release + 0.90 first internal release +*/ + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/workloads/realworld/pinned/darknet/src/tree.c b/workloads/realworld/pinned/darknet/src/tree.c new file mode 100644 index 0000000000000000000000000000000000000000..67b6d431f6f7e92ede234c71ecae9bd9146dc71f --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/tree.c @@ -0,0 +1,139 @@ +#include +#include +#include "tree.h" +#include "utils.h" +#include "data.h" + +void change_leaves(tree *t, char *leaf_list) +{ + list *llist = get_paths(leaf_list); + char **leaves = (char **)list_to_array(llist); + int n = llist->size; + int i,j; + int found = 0; + for(i = 0; i < t->n; ++i){ + t->leaf[i] = 0; + for(j = 0; j < n; ++j){ + if (0==strcmp(t->name[i], leaves[j])){ + t->leaf[i] = 1; + ++found; + break; + } + } + } + fprintf(stderr, "Found %d leaves.\n", found); +} + +float get_hierarchy_probability(float *x, tree *hier, int c, int stride) +{ + float p = 1; + while(c >= 0){ + p = p * x[c*stride]; + c = hier->parent[c]; + } + return p; +} + +void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride) +{ + int j; + for(j = 0; j < n; ++j){ + int parent = hier->parent[j]; + if(parent >= 0){ + predictions[j*stride] *= predictions[parent*stride]; + } + } + if(only_leaves){ + for(j = 0; j < n; ++j){ + if(!hier->leaf[j]) predictions[j*stride] = 0; + } + } +} + +int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride) +{ + float p = 1; + int group = 0; + int i; + while(1){ + float max = 0; + int max_i = 0; + + for(i = 0; i < hier->group_size[group]; ++i){ + int index = i + hier->group_offset[group]; + float val = predictions[(i + hier->group_offset[group])*stride]; + if(val > max){ + max_i = index; + max = val; + } + } + if(p*max > thresh){ + p = p*max; + group = hier->child[max_i]; + if(hier->child[max_i] < 0) return max_i; + } else if (group == 0){ + return max_i; + } else { + return hier->parent[hier->group_offset[group]]; + } + } + return 0; +} + +tree *read_tree(char *filename) +{ + tree t = {0}; + FILE *fp = fopen(filename, "r"); + + char *line; + int last_parent = -1; + int group_size = 0; + int groups = 0; + int n = 0; + while((line=fgetl(fp)) != 0){ + char *id = calloc(256, sizeof(char)); + int parent = -1; + sscanf(line, "%s %d", id, &parent); + t.parent = realloc(t.parent, (n+1)*sizeof(int)); + t.parent[n] = parent; + + t.child = realloc(t.child, (n+1)*sizeof(int)); + t.child[n] = -1; + + t.name = realloc(t.name, (n+1)*sizeof(char *)); + t.name[n] = id; + if(parent != last_parent){ + ++groups; + t.group_offset = realloc(t.group_offset, groups * sizeof(int)); + t.group_offset[groups - 1] = n - group_size; + t.group_size = realloc(t.group_size, groups * sizeof(int)); + t.group_size[groups - 1] = group_size; + group_size = 0; + last_parent = parent; + } + t.group = realloc(t.group, (n+1)*sizeof(int)); + t.group[n] = groups; + if (parent >= 0) { + t.child[parent] = groups; + } + ++n; + ++group_size; + } + ++groups; + t.group_offset = realloc(t.group_offset, groups * sizeof(int)); + t.group_offset[groups - 1] = n - group_size; + t.group_size = realloc(t.group_size, groups * sizeof(int)); + t.group_size[groups - 1] = group_size; + t.n = n; + t.groups = groups; + t.leaf = calloc(n, sizeof(int)); + int i; + for(i = 0; i < n; ++i) t.leaf[i] = 1; + for(i = 0; i < n; ++i) if(t.parent[i] >= 0) t.leaf[t.parent[i]] = 0; + + fclose(fp); + tree *tree_ptr = calloc(1, sizeof(tree)); + *tree_ptr = t; + //error(0); + return tree_ptr; +} diff --git a/workloads/realworld/pinned/darknet/src/tree.h b/workloads/realworld/pinned/darknet/src/tree.h new file mode 100644 index 0000000000000000000000000000000000000000..3802b8ead806266edd291de5407b08c2d7ed5dd1 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/tree.h @@ -0,0 +1,8 @@ +#ifndef TREE_H +#define TREE_H +#include "darknet.h" + +int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride); +float get_hierarchy_probability(float *x, tree *hier, int c, int stride); + +#endif diff --git a/workloads/realworld/pinned/darknet/src/upsample_layer.c b/workloads/realworld/pinned/darknet/src/upsample_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..8c7620686435d2b729e6738aec53adfe137b10df --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/upsample_layer.c @@ -0,0 +1,108 @@ +#include "upsample_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + +layer make_upsample_layer(int batch, int w, int h, int c, int stride) +{ + layer l = {0}; + l.type = UPSAMPLE; + l.batch = batch; + l.w = w; + l.h = h; + l.c = c; + l.out_w = w*stride; + l.out_h = h*stride; + l.out_c = c; + if(stride < 0){ + stride = -stride; + l.reverse=1; + l.out_w = w/stride; + l.out_h = h/stride; + } + l.stride = stride; + l.outputs = l.out_w*l.out_h*l.out_c; + l.inputs = l.w*l.h*l.c; + // l.delta = calloc(l.outputs*batch, sizeof(float)); + // l.output = calloc(l.outputs*batch, sizeof(float)); + cudaMallocHost(&l.output, l.outputs*batch*sizeof(float)); + cudaMallocHost(&l.delta, l.outputs*batch*sizeof(float)); + + l.forward = forward_upsample_layer; + l.backward = backward_upsample_layer; + #ifdef GPU + l.forward_gpu = forward_upsample_layer_gpu; + l.backward_gpu = backward_upsample_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + #endif + if(l.reverse) fprintf(stderr, "downsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + else fprintf(stderr, "upsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + return l; +} + +void resize_upsample_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + l->out_w = w*l->stride; + l->out_h = h*l->stride; + if(l->reverse){ + l->out_w = w/l->stride; + l->out_h = h/l->stride; + } + l->outputs = l->out_w*l->out_h*l->out_c; + l->inputs = l->h*l->w*l->c; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + +void forward_upsample_layer(const layer l, network net) +{ + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + if(l.reverse){ + upsample_cpu(l.output, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input); + }else{ + upsample_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output); + } +} + +void backward_upsample_layer(const layer l, network net) +{ + if(l.reverse){ + upsample_cpu(l.delta, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, net.delta); + }else{ + upsample_cpu(net.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta); + } +} + +#ifdef GPU +void forward_upsample_layer_gpu(const layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + if(l.reverse){ + upsample_gpu(l.output_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input_gpu); + }else{ + upsample_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output_gpu); + } +} + +void backward_upsample_layer_gpu(const layer l, network net) +{ + if(l.reverse){ + upsample_gpu(l.delta_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, net.delta_gpu); + }else{ + upsample_gpu(net.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta_gpu); + } +} +#endif diff --git a/workloads/realworld/pinned/darknet/src/upsample_layer.h b/workloads/realworld/pinned/darknet/src/upsample_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..86790d1088354ea9c46a4b20fbe1dacf36925ca8 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/upsample_layer.h @@ -0,0 +1,15 @@ +#ifndef UPSAMPLE_LAYER_H +#define UPSAMPLE_LAYER_H +#include "darknet.h" + +layer make_upsample_layer(int batch, int w, int h, int c, int stride); +void forward_upsample_layer(const layer l, network net); +void backward_upsample_layer(const layer l, network net); +void resize_upsample_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_upsample_layer_gpu(const layer l, network net); +void backward_upsample_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/pinned/darknet/src/utils.c b/workloads/realworld/pinned/darknet/src/utils.c new file mode 100644 index 0000000000000000000000000000000000000000..626b4678c1e2779552ed9d34f19ce4b0f57d9ded --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/utils.c @@ -0,0 +1,726 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + + +/* +// old timing. is it better? who knows!! +double get_wall_time() +{ + struct timeval time; + if (gettimeofday(&time,NULL)){ + return 0; + } + return (double)time.tv_sec + (double)time.tv_usec * .000001; +} +*/ + +double what_time_is_it_now() +{ + struct timeval time; + if (gettimeofday(&time,NULL)){ + return 0; + } + return (double)time.tv_sec + (double)time.tv_usec * .000001; +} + +int *read_intlist(char *gpu_list, int *ngpus, int d) +{ + int *gpus = 0; + if(gpu_list){ + int len = strlen(gpu_list); + *ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++*ngpus; + } + gpus = calloc(*ngpus, sizeof(int)); + for(i = 0; i < *ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpus = calloc(1, sizeof(float)); + *gpus = d; + *ngpus = 1; + } + return gpus; +} + +int *read_map(char *filename) +{ + int n = 0; + int *map = 0; + char *str; + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + while((str=fgetl(file))){ + ++n; + map = realloc(map, n*sizeof(int)); + map[n-1] = atoi(str); + } + return map; +} + +void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections) +{ + size_t i; + for(i = 0; i < sections; ++i){ + size_t start = n*i/sections; + size_t end = n*(i+1)/sections; + size_t num = end-start; + shuffle(arr+(start*size), num, size); + } +} + +void shuffle(void *arr, size_t n, size_t size) +{ + size_t i; + void *swp = calloc(1, size); + for(i = 0; i < n-1; ++i){ + size_t j = i + rand()/(RAND_MAX / (n-i)+1); + memcpy(swp, arr+(j*size), size); + memcpy(arr+(j*size), arr+(i*size), size); + memcpy(arr+(i*size), swp, size); + } +} + +int *random_index_order(int min, int max) +{ + int *inds = calloc(max-min, sizeof(int)); + int i; + for(i = min; i < max; ++i){ + inds[i] = i; + } + for(i = min; i < max-1; ++i){ + int swap = inds[i]; + int index = i + rand()%(max-i); + inds[i] = inds[index]; + inds[index] = swap; + } + return inds; +} + +void del_arg(int argc, char **argv, int index) +{ + int i; + for(i = index; i < argc-1; ++i) argv[i] = argv[i+1]; + argv[i] = 0; +} + +int find_arg(int argc, char* argv[], char *arg) +{ + int i; + for(i = 0; i < argc; ++i) { + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)) { + del_arg(argc, argv, i); + return 1; + } + } + return 0; +} + +int find_int_arg(int argc, char **argv, char *arg, int def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = atoi(argv[i+1]); + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + +float find_float_arg(int argc, char **argv, char *arg, float def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = atof(argv[i+1]); + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + +char *find_char_arg(int argc, char **argv, char *arg, char *def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = argv[i+1]; + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + + +char *basecfg(char *cfgfile) +{ + char *c = cfgfile; + char *next; + while((next = strchr(c, '/'))) + { + c = next+1; + } + c = copy_string(c); + next = strchr(c, '.'); + if (next) *next = 0; + return c; +} + +int alphanum_to_int(char c) +{ + return (c < 58) ? c - 48 : c-87; +} +char int_to_alphanum(int i) +{ + if (i == 36) return '.'; + return (i < 10) ? i + 48 : i + 87; +} + +void pm(int M, int N, float *A) +{ + int i,j; + for(i =0 ; i < M; ++i){ + printf("%d ", i+1); + for(j = 0; j < N; ++j){ + printf("%2.4f, ", A[i*N+j]); + } + printf("\n"); + } + printf("\n"); +} + +void find_replace(char *str, char *orig, char *rep, char *output) +{ + char buffer[4096] = {0}; + char *p; + + sprintf(buffer, "%s", str); + if(!(p = strstr(buffer, orig))){ // Is 'orig' even in 'str'? + sprintf(output, "%s", str); + return; + } + + *p = '\0'; + + sprintf(output, "%s%s%s", buffer, rep, p+strlen(orig)); +} + +float sec(clock_t clocks) +{ + return (float)clocks/CLOCKS_PER_SEC; +} + +void top_k(float *a, int n, int k, int *index) +{ + int i,j; + for(j = 0; j < k; ++j) index[j] = -1; + for(i = 0; i < n; ++i){ + int curr = i; + for(j = 0; j < k; ++j){ + if((index[j] < 0) || a[curr] > a[index[j]]){ + int swap = curr; + curr = index[j]; + index[j] = swap; + } + } + } +} + +void error(const char *s) +{ + perror(s); + assert(0); + exit(-1); +} + +unsigned char *read_file(char *filename) +{ + FILE *fp = fopen(filename, "rb"); + size_t size; + + fseek(fp, 0, SEEK_END); + size = ftell(fp); + fseek(fp, 0, SEEK_SET); + + unsigned char *text = calloc(size+1, sizeof(char)); + fread(text, 1, size, fp); + fclose(fp); + return text; +} + +void malloc_error() +{ + fprintf(stderr, "Malloc error\n"); + exit(-1); +} + +void file_error(char *s) +{ + fprintf(stderr, "Couldn't open file: %s\n", s); + exit(0); +} + +list *split_str(char *s, char delim) +{ + size_t i; + size_t len = strlen(s); + list *l = make_list(); + list_insert(l, s); + for(i = 0; i < len; ++i){ + if(s[i] == delim){ + s[i] = '\0'; + list_insert(l, &(s[i+1])); + } + } + return l; +} + +void strip(char *s) +{ + size_t i; + size_t len = strlen(s); + size_t offset = 0; + for(i = 0; i < len; ++i){ + char c = s[i]; + if(c==' '||c=='\t'||c=='\n') ++offset; + else s[i-offset] = c; + } + s[len-offset] = '\0'; +} + +void strip_char(char *s, char bad) +{ + size_t i; + size_t len = strlen(s); + size_t offset = 0; + for(i = 0; i < len; ++i){ + char c = s[i]; + if(c==bad) ++offset; + else s[i-offset] = c; + } + s[len-offset] = '\0'; +} + +void free_ptrs(void **ptrs, int n) +{ + int i; + for(i = 0; i < n; ++i) free(ptrs[i]); + free(ptrs); +} + +char *fgetl(FILE *fp) +{ + if(feof(fp)) return 0; + size_t size = 512; + char *line = malloc(size*sizeof(char)); + if(!fgets(line, size, fp)){ + free(line); + return 0; + } + + size_t curr = strlen(line); + + while((line[curr-1] != '\n') && !feof(fp)){ + if(curr == size-1){ + size *= 2; + line = realloc(line, size*sizeof(char)); + if(!line) { + printf("%ld\n", size); + malloc_error(); + } + } + size_t readsize = size-curr; + if(readsize > INT_MAX) readsize = INT_MAX-1; + fgets(&line[curr], readsize, fp); + curr = strlen(line); + } + if(line[curr-1] == '\n') line[curr-1] = '\0'; + + return line; +} + +int read_int(int fd) +{ + int n = 0; + int next = read(fd, &n, sizeof(int)); + if(next <= 0) return -1; + return n; +} + +void write_int(int fd, int n) +{ + int next = write(fd, &n, sizeof(int)); + if(next <= 0) error("read failed"); +} + +int read_all_fail(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + int next = read(fd, buffer + n, bytes-n); + if(next <= 0) return 1; + n += next; + } + return 0; +} + +int write_all_fail(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + size_t next = write(fd, buffer + n, bytes-n); + if(next <= 0) return 1; + n += next; + } + return 0; +} + +void read_all(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + int next = read(fd, buffer + n, bytes-n); + if(next <= 0) error("read failed"); + n += next; + } +} + +void write_all(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + size_t next = write(fd, buffer + n, bytes-n); + if(next <= 0) error("write failed"); + n += next; + } +} + + +char *copy_string(char *s) +{ + char *copy = malloc(strlen(s)+1); + strncpy(copy, s, strlen(s)+1); + return copy; +} + +list *parse_csv_line(char *line) +{ + list *l = make_list(); + char *c, *p; + int in = 0; + for(c = line, p = line; *c != '\0'; ++c){ + if(*c == '"') in = !in; + else if(*c == ',' && !in){ + *c = '\0'; + list_insert(l, copy_string(p)); + p = c+1; + } + } + list_insert(l, copy_string(p)); + return l; +} + +int count_fields(char *line) +{ + int count = 0; + int done = 0; + char *c; + for(c = line; !done; ++c){ + done = (*c == '\0'); + if(*c == ',' || done) ++count; + } + return count; +} + +float *parse_fields(char *line, int n) +{ + float *field = calloc(n, sizeof(float)); + char *c, *p, *end; + int count = 0; + int done = 0; + for(c = line, p = line; !done; ++c){ + done = (*c == '\0'); + if(*c == ',' || done){ + *c = '\0'; + field[count] = strtod(p, &end); + if(p == c) field[count] = nan(""); + if(end != c && (end != c-1 || *end != '\r')) field[count] = nan(""); //DOS file formats! + p = c+1; + ++count; + } + } + return field; +} + +float sum_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i) sum += a[i]; + return sum; +} + +float mean_array(float *a, int n) +{ + return sum_array(a,n)/n; +} + +void mean_arrays(float **a, int n, int els, float *avg) +{ + int i; + int j; + memset(avg, 0, els*sizeof(float)); + for(j = 0; j < n; ++j){ + for(i = 0; i < els; ++i){ + avg[i] += a[j][i]; + } + } + for(i = 0; i < els; ++i){ + avg[i] /= n; + } +} + +void print_statistics(float *a, int n) +{ + float m = mean_array(a, n); + float v = variance_array(a, n); + printf("MSE: %.6f, Mean: %.6f, Variance: %.6f\n", mse_array(a, n), m, v); +} + +float variance_array(float *a, int n) +{ + int i; + float sum = 0; + float mean = mean_array(a, n); + for(i = 0; i < n; ++i) sum += (a[i] - mean)*(a[i]-mean); + float variance = sum/n; + return variance; +} + +int constrain_int(int a, int min, int max) +{ + if (a < min) return min; + if (a > max) return max; + return a; +} + +float constrain(float min, float max, float a) +{ + if (a < min) return min; + if (a > max) return max; + return a; +} + +float dist_array(float *a, float *b, int n, int sub) +{ + int i; + float sum = 0; + for(i = 0; i < n; i += sub) sum += pow(a[i]-b[i], 2); + return sqrt(sum); +} + +float mse_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i) sum += a[i]*a[i]; + return sqrt(sum/n); +} + +void normalize_array(float *a, int n) +{ + int i; + float mu = mean_array(a,n); + float sigma = sqrt(variance_array(a,n)); + for(i = 0; i < n; ++i){ + a[i] = (a[i] - mu)/sigma; + } + mu = mean_array(a,n); + sigma = sqrt(variance_array(a,n)); +} + +void translate_array(float *a, int n, float s) +{ + int i; + for(i = 0; i < n; ++i){ + a[i] += s; + } +} + +float mag_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + sum += a[i]*a[i]; + } + return sqrt(sum); +} + +void scale_array(float *a, int n, float s) +{ + int i; + for(i = 0; i < n; ++i){ + a[i] *= s; + } +} + +int sample_array(float *a, int n) +{ + float sum = sum_array(a, n); + scale_array(a, n, 1./sum); + float r = rand_uniform(0, 1); + int i; + for(i = 0; i < n; ++i){ + r = r - a[i]; + if (r <= 0) return i; + } + return n-1; +} + +int max_int_index(int *a, int n) +{ + if(n <= 0) return -1; + int i, max_i = 0; + int max = a[0]; + for(i = 1; i < n; ++i){ + if(a[i] > max){ + max = a[i]; + max_i = i; + } + } + return max_i; +} + +int max_index(float *a, int n) +{ + if(n <= 0) return -1; + int i, max_i = 0; + float max = a[0]; + for(i = 1; i < n; ++i){ + if(a[i] > max){ + max = a[i]; + max_i = i; + } + } + return max_i; +} + +int int_index(int *a, int val, int n) +{ + int i; + for(i = 0; i < n; ++i){ + if(a[i] == val) return i; + } + return -1; +} + +int rand_int(int min, int max) +{ + if (max < min){ + int s = min; + min = max; + max = s; + } + int r = (rand()%(max - min + 1)) + min; + return r; +} + +// From http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform +float rand_normal() +{ + static int haveSpare = 0; + static double rand1, rand2; + + if(haveSpare) + { + haveSpare = 0; + return sqrt(rand1) * sin(rand2); + } + + haveSpare = 1; + + rand1 = rand() / ((double) RAND_MAX); + if(rand1 < 1e-100) rand1 = 1e-100; + rand1 = -2 * log(rand1); + rand2 = (rand() / ((double) RAND_MAX)) * TWO_PI; + + return sqrt(rand1) * cos(rand2); +} + +/* + float rand_normal() + { + int n = 12; + int i; + float sum= 0; + for(i = 0; i < n; ++i) sum += (float)rand()/RAND_MAX; + return sum-n/2.; + } + */ + +size_t rand_size_t() +{ + return ((size_t)(rand()&0xff) << 56) | + ((size_t)(rand()&0xff) << 48) | + ((size_t)(rand()&0xff) << 40) | + ((size_t)(rand()&0xff) << 32) | + ((size_t)(rand()&0xff) << 24) | + ((size_t)(rand()&0xff) << 16) | + ((size_t)(rand()&0xff) << 8) | + ((size_t)(rand()&0xff) << 0); +} + +float rand_uniform(float min, float max) +{ + if(max < min){ + float swap = min; + min = max; + max = swap; + } + return ((float)rand()/RAND_MAX * (max - min)) + min; +} + +float rand_scale(float s) +{ + float scale = rand_uniform(1, s); + if(rand()%2) return scale; + return 1./scale; +} + +float **one_hot_encode(float *a, int n, int k) +{ + int i; + float **t = calloc(n, sizeof(float*)); + for(i = 0; i < n; ++i){ + t[i] = calloc(k, sizeof(float)); + int index = (int)a[i]; + t[i][index] = 1; + } + return t; +} + diff --git a/workloads/realworld/pinned/darknet/src/utils.h b/workloads/realworld/pinned/darknet/src/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..ef24da79888612f5b48fbb4dc233c483590e0c34 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/utils.h @@ -0,0 +1,53 @@ +#ifndef UTILS_H +#define UTILS_H +#include +#include +#include "darknet.h" +#include "list.h" + +#define TIME(a) \ + do { \ + double start = what_time_is_it_now(); \ + a; \ + printf("%s took: %f seconds\n", #a, what_time_is_it_now() - start); \ + } while (0) + +#define TWO_PI 6.2831853071795864769252866f + +double what_time_is_it_now(); +void shuffle(void *arr, size_t n, size_t size); +void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections); +void free_ptrs(void **ptrs, int n); +int alphanum_to_int(char c); +char int_to_alphanum(int i); +int read_int(int fd); +void write_int(int fd, int n); +void read_all(int fd, char *buffer, size_t bytes); +void write_all(int fd, char *buffer, size_t bytes); +int read_all_fail(int fd, char *buffer, size_t bytes); +int write_all_fail(int fd, char *buffer, size_t bytes); +void find_replace(char *str, char *orig, char *rep, char *output); +void malloc_error(); +void file_error(char *s); +void strip(char *s); +void strip_char(char *s, char bad); +list *split_str(char *s, char delim); +char *fgetl(FILE *fp); +list *parse_csv_line(char *line); +char *copy_string(char *s); +int count_fields(char *line); +float *parse_fields(char *line, int n); +void translate_array(float *a, int n, float s); +float constrain(float min, float max, float a); +int constrain_int(int a, int min, int max); +float rand_scale(float s); +int rand_int(int min, int max); +void mean_arrays(float **a, int n, int els, float *avg); +float dist_array(float *a, float *b, int n, int sub); +float **one_hot_encode(float *a, int n, int k); +float sec(clock_t clocks); +void print_statistics(float *a, int n); +int int_index(int *a, int val, int n); + +#endif + diff --git a/workloads/realworld/pinned/darknet/src/yolo_layer.c b/workloads/realworld/pinned/darknet/src/yolo_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..9e483a2179c662e03b1b55ae2953a04c8d864e18 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/yolo_layer.c @@ -0,0 +1,376 @@ +#include "yolo_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes) +{ + int i; + layer l = {0}; + l.type = YOLO; + + l.n = n; + l.total = total; + l.batch = batch; + l.h = h; + l.w = w; + l.c = n*(classes + 4 + 1); + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.cost = calloc(1, sizeof(float)); + l.biases = calloc(total*2, sizeof(float)); + if(mask) l.mask = mask; + else{ + l.mask = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + l.mask[i] = i; + } + } + l.bias_updates = calloc(n*2, sizeof(float)); + l.outputs = h*w*n*(classes + 4 + 1); + l.inputs = l.outputs; + l.truths = 90*(4 + 1); + // l.delta = calloc(batch*l.outputs, sizeof(float)); + // l.output = calloc(batch*l.outputs, sizeof(float)); + cudaMallocHost(&l.output, l.outputs*batch*sizeof(float)); + cudaMallocHost(&l.delta, l.outputs*batch*sizeof(float)); + for(i = 0; i < total*2; ++i){ + l.biases[i] = .5; + } + + l.forward = forward_yolo_layer; + l.backward = backward_yolo_layer; +#ifdef GPU + l.forward_gpu = forward_yolo_layer_gpu; + l.backward_gpu = backward_yolo_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "yolo\n"); + srand(0); + + return l; +} + +void resize_yolo_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->n*(l->classes + 4 + 1); + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride) +{ + box b; + b.x = (i + x[index + 0*stride]) / lw; + b.y = (j + x[index + 1*stride]) / lh; + b.w = exp(x[index + 2*stride]) * biases[2*n] / w; + b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h; + return b; +} + +float delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride) +{ + box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride); + float iou = box_iou(pred, truth); + + float tx = (truth.x*lw - i); + float ty = (truth.y*lh - j); + float tw = log(truth.w*w / biases[2*n]); + float th = log(truth.h*h / biases[2*n + 1]); + + delta[index + 0*stride] = scale * (tx - x[index + 0*stride]); + delta[index + 1*stride] = scale * (ty - x[index + 1*stride]); + delta[index + 2*stride] = scale * (tw - x[index + 2*stride]); + delta[index + 3*stride] = scale * (th - x[index + 3*stride]); + return iou; +} + + +void delta_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat) +{ + int n; + if (delta[index]){ + delta[index + stride*class] = 1 - output[index + stride*class]; + if(avg_cat) *avg_cat += output[index + stride*class]; + return; + } + for(n = 0; n < classes; ++n){ + delta[index + stride*n] = ((n == class)?1 : 0) - output[index + stride*n]; + if(n == class && avg_cat) *avg_cat += output[index + stride*n]; + } +} + +static int entry_index(layer l, int batch, int location, int entry) +{ + int n = location / (l.w*l.h); + int loc = location % (l.w*l.h); + return batch*l.outputs + n*l.w*l.h*(4+l.classes+1) + entry*l.w*l.h + loc; +} + +void forward_yolo_layer(const layer l, network net) +{ + int i,j,b,t,n; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array(l.output + index, (1+l.classes)*l.w*l.h, LOGISTIC); + } + } +#endif + + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + if(!net.train) return; + float avg_iou = 0; + float recall = 0; + float recall75 = 0; + float avg_cat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + int class_count = 0; + *(l.cost) = 0; + for (b = 0; b < l.batch; ++b) { + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.w*l.h); + float best_iou = 0; + int best_t = 0; + for(t = 0; t < l.max_boxes; ++t){ + box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1); + if(!truth.x) break; + float iou = box_iou(pred, truth); + if (iou > best_iou) { + best_iou = iou; + best_t = t; + } + } + int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4); + avg_anyobj += l.output[obj_index]; + l.delta[obj_index] = 0 - l.output[obj_index]; + if (best_iou > l.ignore_thresh) { + l.delta[obj_index] = 0; + } + if (best_iou > l.truth_thresh) { + l.delta[obj_index] = 1 - l.output[obj_index]; + + int class = net.truth[best_t*(4 + 1) + b*l.truths + 4]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); + delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, 0); + box truth = float_to_box(net.truth + best_t*(4 + 1) + b*l.truths, 1); + delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + } + } + } + } + for(t = 0; t < l.max_boxes; ++t){ + box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1); + + if(!truth.x) break; + float best_iou = 0; + int best_n = 0; + i = (truth.x * l.w); + j = (truth.y * l.h); + box truth_shift = truth; + truth_shift.x = truth_shift.y = 0; + for(n = 0; n < l.total; ++n){ + box pred = {0}; + pred.w = l.biases[2*n]/net.w; + pred.h = l.biases[2*n+1]/net.h; + float iou = box_iou(pred, truth_shift); + if (iou > best_iou){ + best_iou = iou; + best_n = n; + } + } + + int mask_n = int_index(l.mask, best_n, l.n); + if(mask_n >= 0){ + int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); + float iou = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + + int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4); + avg_obj += l.output[obj_index]; + l.delta[obj_index] = 1 - l.output[obj_index]; + + int class = net.truth[t*(4 + 1) + b*l.truths + 4]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4 + 1); + delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, &avg_cat); + + ++count; + ++class_count; + if(iou > .5) recall += 1; + if(iou > .75) recall75 += 1; + avg_iou += iou; + } + } + } + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", net.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count); +} + +void backward_yolo_layer(const layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +{ + int i; + int new_w=0; + int new_h=0; + if (((float)netw/w) < ((float)neth/h)) { + new_w = netw; + new_h = (h * netw)/w; + } else { + new_h = neth; + new_w = (w * neth)/h; + } + for (i = 0; i < n; ++i){ + box b = dets[i].bbox; + b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); + b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); + b.w *= (float)netw/new_w; + b.h *= (float)neth/new_h; + if(!relative){ + b.x *= w; + b.w *= w; + b.y *= h; + b.h *= h; + } + dets[i].bbox = b; + } +} + +int yolo_num_detections(layer l, float thresh) +{ + int i, n; + int count = 0; + for (i = 0; i < l.w*l.h; ++i){ + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); + if(l.output[obj_index] > thresh){ + ++count; + } + } + } + return count; +} + +void avg_flipped_yolo(layer l) +{ + int i,j,n,z; + float *flip = l.output + l.outputs; + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w/2; ++i) { + for (n = 0; n < l.n; ++n) { + for(z = 0; z < l.classes + 4 + 1; ++z){ + int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; + int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); + float swap = flip[i1]; + flip[i1] = flip[i2]; + flip[i2] = swap; + if(z == 0){ + flip[i1] = -flip[i1]; + flip[i2] = -flip[i2]; + } + } + } + } + } + for(i = 0; i < l.outputs; ++i){ + l.output[i] = (l.output[i] + flip[i])/2.; + } +} + +int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets) +{ + int i,j,n; + float *predictions = l.output; + if (l.batch == 2) avg_flipped_yolo(l); + int count = 0; + for (i = 0; i < l.w*l.h; ++i){ + int row = i / l.w; + int col = i % l.w; + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); + float objectness = predictions[obj_index]; + if(objectness <= thresh) continue; + int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); + dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); + dets[count].objectness = objectness; + dets[count].classes = l.classes; + for(j = 0; j < l.classes; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j); + float prob = objectness*predictions[class_index]; + dets[count].prob[j] = (prob > thresh) ? prob : 0; + } + ++count; + } + } + correct_yolo_boxes(dets, count, w, h, netw, neth, relative); + return count; +} + +#ifdef GPU + +void forward_yolo_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array_gpu(l.output_gpu + index, (1+l.classes)*l.w*l.h, LOGISTIC); + } + } + if(!net.train || l.onlyforward){ + cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); + return; + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_yolo_layer(l, net); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_yolo_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/pinned/darknet/src/yolo_layer.h b/workloads/realworld/pinned/darknet/src/yolo_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..d2a0243268146e00ebff2b4b11bce23f830689d1 --- /dev/null +++ b/workloads/realworld/pinned/darknet/src/yolo_layer.h @@ -0,0 +1,19 @@ +#ifndef YOLO_LAYER_H +#define YOLO_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes); +void forward_yolo_layer(const layer l, network net); +void backward_yolo_layer(const layer l, network net); +void resize_yolo_layer(layer *l, int w, int h); +int yolo_num_detections(layer l, float thresh); + +#ifdef GPU +void forward_yolo_layer_gpu(const layer l, network net); +void backward_yolo_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/pinned/darknet/yolov3-tiny/predictions.jpg b/workloads/realworld/pinned/darknet/yolov3-tiny/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e76dabc8b31ad049905fe65ca8aeee298ae22f2e Binary files /dev/null and b/workloads/realworld/pinned/darknet/yolov3-tiny/predictions.jpg differ diff --git a/workloads/realworld/pinned/darknet/yolov3-tiny/run_super.sh b/workloads/realworld/pinned/darknet/yolov3-tiny/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..d890a7c581dc59167fab1b93778d143fc7e679a9 --- /dev/null +++ b/workloads/realworld/pinned/darknet/yolov3-tiny/run_super.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg diff --git a/workloads/realworld/pinned/darknet/yolov3-tiny/run_yolov3-tiny.sh b/workloads/realworld/pinned/darknet/yolov3-tiny/run_yolov3-tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..d890a7c581dc59167fab1b93778d143fc7e679a9 --- /dev/null +++ b/workloads/realworld/pinned/darknet/yolov3-tiny/run_yolov3-tiny.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg diff --git a/workloads/realworld/pinned/darknet/yolov3-tiny_b/run_super.sh b/workloads/realworld/pinned/darknet/yolov3-tiny_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..31669e62fb94142e7dc24b3f905c8f1d25950367 --- /dev/null +++ b/workloads/realworld/pinned/darknet/yolov3-tiny_b/run_super.sh @@ -0,0 +1,2 @@ +#../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg +../darknet detector infer ../cfg/coco.data ../cfg/yolov3-tiny_b.cfg diff --git a/workloads/realworld/pinned/darknet/yolov3-tiny_t/run_super.sh b/workloads/realworld/pinned/darknet/yolov3-tiny_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..6cc56bc601476fa212f615b5aec964f12e044473 --- /dev/null +++ b/workloads/realworld/pinned/darknet/yolov3-tiny_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg +../darknet detector train ../cfg/coco.data ../cfg/yolov3-tiny_t.cfg diff --git a/workloads/realworld/pinned/darknet/yolov3/predictions.jpg b/workloads/realworld/pinned/darknet/yolov3/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..49c1abe30cdcdceadad4353da30ce5660c96be1a Binary files /dev/null and b/workloads/realworld/pinned/darknet/yolov3/predictions.jpg differ diff --git a/workloads/realworld/pinned/darknet/yolov3/run_super.sh b/workloads/realworld/pinned/darknet/yolov3/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..440a4b456a80a434243dffa9614d5a501790990f --- /dev/null +++ b/workloads/realworld/pinned/darknet/yolov3/run_super.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg diff --git a/workloads/realworld/pinned/darknet/yolov3/run_yolov3.sh b/workloads/realworld/pinned/darknet/yolov3/run_yolov3.sh new file mode 100755 index 0000000000000000000000000000000000000000..440a4b456a80a434243dffa9614d5a501790990f --- /dev/null +++ b/workloads/realworld/pinned/darknet/yolov3/run_yolov3.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg diff --git a/workloads/realworld/pinned/darknet/yolov3_b/run_super.sh b/workloads/realworld/pinned/darknet/yolov3_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..913790c1ee53a0d442a89306fbd8bda93faa2581 --- /dev/null +++ b/workloads/realworld/pinned/darknet/yolov3_b/run_super.sh @@ -0,0 +1,2 @@ +#../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg +../darknet detector infer ../cfg/coco.data ../cfg/yolov3_b.cfg diff --git a/workloads/realworld/pinned/darknet/yolov3_t/run_super.sh b/workloads/realworld/pinned/darknet/yolov3_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ee7df07c1a4172f25c1129d8027095d2e3861e28 --- /dev/null +++ b/workloads/realworld/pinned/darknet/yolov3_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg +../darknet detector train ../cfg/coco.data ../cfg/yolov3_t.cfg diff --git a/workloads/realworld/pinned/hotspot/Makefile b/workloads/realworld/pinned/hotspot/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..793c7c13d91b4f492c5df5801f2a9ddadf023470 --- /dev/null +++ b/workloads/realworld/pinned/hotspot/Makefile @@ -0,0 +1,24 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include + +SRC = hotspot.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = hotspot + +release: $(SRC) + $(CC) $(KERNEL_DIM) $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +enum: $(SRC) + $(CC) $(KERNEL_DIM) -deviceemu $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +debug: $(SRC) + $(CC) $(KERNEL_DIM) -g $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +debugenum: $(SRC) + $(CC) $(KERNEL_DIM) -g -deviceemu $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt diff --git a/workloads/realworld/pinned/hotspot/Makefile_nvidia b/workloads/realworld/pinned/hotspot/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..1f7ae25a90c968563e96208c693b927b6765490a --- /dev/null +++ b/workloads/realworld/pinned/hotspot/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := hotspot +# CUDA source files (compiled with cudacc) +CUFILES := hotspot.cu +# CUDA dependency files +# CU_DEPS := needle_kernel.cu +# C/C++ source files (compiled with gcc / c++) +# CCFILES := + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/pinned/hotspot/README b/workloads/realworld/pinned/hotspot/README new file mode 100644 index 0000000000000000000000000000000000000000..f24239abebe938fe3e8d3c1a7a97f915bd09a90b --- /dev/null +++ b/workloads/realworld/pinned/hotspot/README @@ -0,0 +1,8 @@ +******Adjustable work group size***** +The kernel has square shape +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe one dimension +The actually dimension = RD_WG_SIZE_0 * RD_WG_SIZE_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" \ No newline at end of file diff --git a/workloads/realworld/pinned/hotspot/hotspot b/workloads/realworld/pinned/hotspot/hotspot new file mode 100755 index 0000000000000000000000000000000000000000..f84e674f1c3f5ab632a952d7cdc78943b7dd10c4 Binary files /dev/null and b/workloads/realworld/pinned/hotspot/hotspot differ diff --git a/workloads/realworld/pinned/hotspot/hotspot.cu b/workloads/realworld/pinned/hotspot/hotspot.cu new file mode 100644 index 0000000000000000000000000000000000000000..b68b5d0a2aad18cedf5808ec496e10b9ae068fe3 --- /dev/null +++ b/workloads/realworld/pinned/hotspot/hotspot.cu @@ -0,0 +1,386 @@ +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#ifdef RD_WG_SIZE_0_0 +#define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) +#define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) +#define BLOCK_SIZE RD_WG_SIZE +#else +#define BLOCK_SIZE 16 +#endif + +#define STR_SIZE 256 + +/* maximum power density possible (say 300W for a 10mm x 10mm chip) */ +#define MAX_PD (3.0e6) +/* required precision in degrees */ +#define PRECISION 0.001 +#define SPEC_HEAT_SI 1.75e6 +#define K_SI 100 +/* capacitance fitting factor */ +#define FACTOR_CHIP 0.5 + +/* chip parameters */ +float t_chip = 0.0005; +float chip_height = 0.016; +float chip_width = 0.016; +/* ambient temperature, assuming no package at all */ +float amb_temp = 80.0; + +void run(int argc, char **argv); + +/* define timer macros */ +#define pin_stats_reset() startCycle() +#define pin_stats_pause(cycles) stopCycle(cycles) +#define pin_stats_dump(cycles) printf("timer: %Lu\n", cycles) + +void fatal(char *s) +{ + fprintf(stderr, "error: %s\n", s); +} + +void writeoutput(float *vect, int grid_rows, int grid_cols, char *file) +{ + + int i, j, index = 0; + FILE *fp; + char str[STR_SIZE]; + + if ((fp = fopen(file, "w")) == 0) + printf("The file was not opened\n"); + + for (i = 0; i < grid_rows; i++) + for (j = 0; j < grid_cols; j++) + { + + sprintf(str, "%d\t%g\n", index, vect[i * grid_cols + j]); + fputs(str, fp); + index++; + } + + fclose(fp); +} + +void readinput(float *vect, int grid_rows, int grid_cols, char *file) +{ + + int i, j; + FILE *fp; + char str[STR_SIZE]; + float val; + + if ((fp = fopen(file, "r")) == 0) + printf("The file was not opened\n"); + + for (i = 0; i <= grid_rows - 1; i++) + for (j = 0; j <= grid_cols - 1; j++) + { + fgets(str, STR_SIZE, fp); + if (feof(fp)) + fatal("not enough lines in file"); + // if ((sscanf(str, "%d%f", &index, &val) != 2) || (index != ((i-1)*(grid_cols-2)+j-1))) + if ((sscanf(str, "%f", &val) != 1)) + fatal("invalid file format"); + vect[i * grid_cols + j] = val; + } + + fclose(fp); +} + +#define IN_RANGE(x, min, max) ((x) >= (min) && (x) <= (max)) +#define CLAMP_RANGE(x, min, max) x = (x < (min)) ? min : ((x > (max)) ? max : x) +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +__global__ void calculate_temp(int iteration, // number of iteration + float *power, // power input + float *temp_src, // temperature input/output + float *temp_dst, // temperature input/output + int grid_cols, // Col of grid + int grid_rows, // Row of grid + int border_cols, // border offset + int border_rows, // border offset + float Cap, // Capacitance + float Rx, + float Ry, + float Rz, + float step, + int batch_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + __shared__ float temp_on_cuda[BLOCK_SIZE][BLOCK_SIZE]; + __shared__ float power_on_cuda[BLOCK_SIZE][BLOCK_SIZE]; + __shared__ float temp_t[BLOCK_SIZE][BLOCK_SIZE]; // saving temparary temperature result + + float amb_temp = 80.0; + float step_div_Cap; + float Rx_1, Ry_1, Rz_1; + + // int bx = blockIdx.x; + // int by = blockIdx.y; + + int tx = threadIdx.x; + int ty = threadIdx.y; + + step_div_Cap = step / Cap; + + Rx_1 = 1 / Rx; + Ry_1 = 1 / Ry; + Rz_1 = 1 / Rz; + + // each block finally computes result for a small block + // after N iterations. + // it is the non-overlapping small blocks that cover + // all the input data + + // calculate the small block size + int small_block_rows = BLOCK_SIZE - iteration * 2; // EXPAND_RATE + int small_block_cols = BLOCK_SIZE - iteration * 2; // EXPAND_RATE + + // if (bx == 0 && by == 0 && tx == 0 && ty == 0) + // printf("iteration is %d, small_block_rows is %d\n", iteration, small_block_rows); + + int tile_dim_x = gridDim.x * batch_size; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = batch_size * batch_size; + int tiles_this_block_x = batch_size; + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + // block id + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + // calculate the boundary for the block according to + // the boundary of its small block + int blkY = small_block_rows * by - border_rows; + int blkX = small_block_cols * bx - border_cols; + int blkYmax = blkY + BLOCK_SIZE - 1; + int blkXmax = blkX + BLOCK_SIZE - 1; + + // calculate the global thread coordination + int yidx = blkY + ty; + int xidx = blkX + tx; + + // load data if it is within the valid input range + int loadYidx = yidx, loadXidx = xidx; + int index = grid_cols * loadYidx + loadXidx; + + if (IN_RANGE(loadYidx, 0, grid_rows - 1) && IN_RANGE(loadXidx, 0, grid_cols - 1)) + { + temp_on_cuda[ty][tx] = temp_src[index]; // Load the temperature data from global memory to shared memory + power_on_cuda[ty][tx] = power[index]; // Load the power data from global memory to shared memory + } + block.sync(); + + // effective range within this block that falls within + // the valid range of the input data + // used to rule out computation outside the boundary. + int validYmin = (blkY < 0) ? -blkY : 0; + int validYmax = (blkYmax > grid_rows - 1) ? BLOCK_SIZE - 1 - (blkYmax - grid_rows + 1) : BLOCK_SIZE - 1; + int validXmin = (blkX < 0) ? -blkX : 0; + int validXmax = (blkXmax > grid_cols - 1) ? BLOCK_SIZE - 1 - (blkXmax - grid_cols + 1) : BLOCK_SIZE - 1; + + int N = ty - 1; + int S = ty + 1; + int W = tx - 1; + int E = tx + 1; + + N = (N < validYmin) ? validYmin : N; + S = (S > validYmax) ? validYmax : S; + W = (W < validXmin) ? validXmin : W; + E = (E > validXmax) ? validXmax : E; + + bool computed; + for (int i = 0; i < iteration; i++) + { + computed = false; + if (IN_RANGE(tx, i + 1, BLOCK_SIZE - i - 2) && + IN_RANGE(ty, i + 1, BLOCK_SIZE - i - 2) && + IN_RANGE(tx, validXmin, validXmax) && + IN_RANGE(ty, validYmin, validYmax)) + { + computed = true; + temp_t[ty][tx] = temp_on_cuda[ty][tx] + step_div_Cap * (power_on_cuda[ty][tx] + + (temp_on_cuda[S][tx] + temp_on_cuda[N][tx] - 2.0 * temp_on_cuda[ty][tx]) * Ry_1 + + (temp_on_cuda[ty][E] + temp_on_cuda[ty][W] - 2.0 * temp_on_cuda[ty][tx]) * Rx_1 + + (amb_temp - temp_on_cuda[ty][tx]) * Rz_1); + } + block.sync(); + if (i == iteration - 1) + break; + if (computed) // Assign the computation range + temp_on_cuda[ty][tx] = temp_t[ty][tx]; + block.sync(); + } + + // update the global memory + // after the last iteration, only threads coordinated within the + // small block perform the calculation and switch on ``computed'' + if (computed) + { + temp_dst[index] = temp_t[ty][tx]; + } + } +} + +/* + compute N time steps +*/ + +int compute_tran_temp(float *MatrixPower, float *MatrixTemp[2], int col, int row, + int total_iterations, int num_iterations, int blockCols, int blockRows, int borderCols, int borderRows, int batch_size) +{ + dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE); + dim3 dimGrid(blockCols, blockRows); + + float grid_height = chip_height / row; + float grid_width = chip_width / col; + + float Cap = FACTOR_CHIP * SPEC_HEAT_SI * t_chip * grid_width * grid_height; + float Rx = grid_width / (2.0 * K_SI * t_chip * grid_height); + float Ry = grid_height / (2.0 * K_SI * t_chip * grid_width); + float Rz = t_chip / (K_SI * grid_height * grid_width); + + float max_slope = MAX_PD / (FACTOR_CHIP * t_chip * SPEC_HEAT_SI); + float step = PRECISION / max_slope; + float t; + + int src = 1, dst = 0; + + for (t = 0; t < total_iterations; t += num_iterations) + { + int temp = src; + src = dst; + dst = temp; + calculate_temp<<>>(MIN(num_iterations, total_iterations - t), MatrixPower, MatrixTemp[src], MatrixTemp[dst], + col, row, borderCols, borderRows, Cap, Rx, Ry, Rz, step, batch_size); + } + return dst; +} + +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - number of rows/cols in the grid (positive integer)\n"); + fprintf(stderr, "\t - pyramid heigh(positive integer)\n"); + fprintf(stderr, "\t - number of iterations\n"); + fprintf(stderr, "\t - name of the file containing the initial temperature values of each cell\n"); + fprintf(stderr, "\t - name of the file containing the dissipated power values of each cell\n"); + fprintf(stderr, "\t - name of the output file\n"); + fprintf(stderr, "\t - batch_size * batch_size per block\n"); + exit(1); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE); + + run(argc, argv); + + return EXIT_SUCCESS; +} + +void run(int argc, char **argv) +{ + int size; + int grid_rows, grid_cols; + float *FilesavingTemp, *FilesavingPower, *MatrixOut; + char *tfile, *pfile, *ofile; + + int total_iterations = 60; + int pyramid_height = 1; // number of iterations + + if (argc != 8) + usage(argc, argv); + if ((grid_rows = atoi(argv[1])) <= 0 || + (grid_cols = atoi(argv[1])) <= 0 || + (pyramid_height = atoi(argv[2])) <= 0 || + (total_iterations = atoi(argv[3])) <= 0) + usage(argc, argv); + + tfile = argv[4]; + pfile = argv[5]; + ofile = argv[6]; + + int batch_size = atoi(argv[7]); + + size = grid_rows * grid_cols; + +/* --------------- pyramid parameters --------------- */ +#define EXPAND_RATE 2 // add one iteration will extend the pyramid base by 2 per each borderline + int borderCols = (pyramid_height)*EXPAND_RATE / 2; + int borderRows = (pyramid_height)*EXPAND_RATE / 2; + int smallBlockCol = BLOCK_SIZE - (pyramid_height)*EXPAND_RATE; + int smallBlockRow = BLOCK_SIZE - (pyramid_height)*EXPAND_RATE; + // int blockCols = grid_cols / smallBlockCol + ((grid_cols % smallBlockCol == 0) ? 0 : 1); + // int blockRows = grid_rows / smallBlockRow + ((grid_rows % smallBlockRow == 0) ? 0 : 1); + + int blockCols = (grid_cols + smallBlockCol * batch_size - 1) / (smallBlockCol * batch_size); + int blockRows = (grid_rows + smallBlockRow * batch_size - 1) / (smallBlockRow * batch_size); + + // printf("borderCols is %d, smallBlockCol is %d, blockCols is %d, grid_cols is %d \n", borderCols, smallBlockCol, blockCols, grid_cols); + + FilesavingTemp = (float *)malloc(size * sizeof(float)); + FilesavingPower = (float *)malloc(size * sizeof(float)); + MatrixOut = (float *)calloc(size, sizeof(float)); + + if (!FilesavingPower || !FilesavingTemp || !MatrixOut) + fatal("unable to allocate memory"); + + printf("pyramidHeight: %d\ngridSize: [%d, %d]\nborder:[%d, %d]\nblockGrid:[%d, %d]\ntargetBlock:[%d, %d]\n", + pyramid_height, grid_cols, grid_rows, borderCols, borderRows, blockCols, blockRows, smallBlockCol, smallBlockRow); + + readinput(FilesavingTemp, grid_rows, grid_cols, tfile); + readinput(FilesavingPower, grid_rows, grid_cols, pfile); + + GPU_argv_init(); + + initTrace(); + startCPU(); + float *MatrixTemp[2], *MatrixPower; + cudaMalloc((void **)&MatrixTemp[0], sizeof(float) * size); + cudaMalloc((void **)&MatrixTemp[1], sizeof(float) * size); + cudaMemcpy(MatrixTemp[0], FilesavingTemp, sizeof(float) * size, cudaMemcpyHostToDevice); + + cudaMalloc((void **)&MatrixPower, sizeof(float) * size); + cudaMemcpy(MatrixPower, FilesavingPower, sizeof(float) * size, cudaMemcpyHostToDevice); + // printf("Start computing the transient temperature\n"); + int ret = compute_tran_temp(MatrixPower, MatrixTemp, grid_cols, grid_rows, + total_iterations, pyramid_height, blockCols, blockRows, borderCols, borderRows, batch_size); + // printf("Ending simulation\n"); + cudaMemcpy(MatrixOut, MatrixTemp[ret], sizeof(float) * size, cudaMemcpyDeviceToHost); + + cudaFree(MatrixPower); + cudaFree(MatrixTemp[0]); + cudaFree(MatrixTemp[1]); + + endCPU(); + finiTrace(); + + writeoutput(MatrixOut, grid_rows, grid_cols, ofile); + free(MatrixOut); +} diff --git a/workloads/realworld/pinned/hotspot/run.sh b/workloads/realworld/pinned/hotspot/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..66b814725286fb6699d2b893740518ad43dc307a --- /dev/null +++ b/workloads/realworld/pinned/hotspot/run.sh @@ -0,0 +1 @@ +./hotspot 512 2 2 ../../../../data/hotspot/temp_512 ../../../../data/hotspot/power_512 output.out 4 diff --git a/workloads/realworld/pinned/hotspot/run_super.sh b/workloads/realworld/pinned/hotspot/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ad31b9dcd0ce3e1d53df5aaf445082ab86cf2366 --- /dev/null +++ b/workloads/realworld/pinned/hotspot/run_super.sh @@ -0,0 +1 @@ +./hotspot 8192 2 2 ../../../../data/hotspot/temp_8192.txt ../../../../data/hotspot/power_8192.txt output.out 8 diff --git a/workloads/realworld/pinned/kmeans/Makefile b/workloads/realworld/pinned/kmeans/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..e473b45be17e5805399c15b04dda57742451d913 --- /dev/null +++ b/workloads/realworld/pinned/kmeans/Makefile @@ -0,0 +1,33 @@ +include ../../../common/make.config + +# C compiler +CC = gcc +CC_FLAGS = -g -fopenmp -O2 + +# CUDA compiler +NVCC = $(CUDA_DIR)/bin/nvcc +NVCC_FLAGS = -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) + +# 'make dbg=1' enables NVCC debugging + + +# 'make emu=1' compiles the CUDA kernels for emulation +ifeq ($(emu),1) + NVCC_FLAGS += -deviceemu +endif + + +kmeans: cluster.o getopt.o kmeans.o kmeans_clustering.o kmeans_cuda.o rmse.o $(CUPTI_ADD_COMMON)/cpu_timestamps.o + $(CC) $(CC_FLAGS) cluster.o getopt.o kmeans.o kmeans_clustering.o kmeans_cuda.o rmse.o $(CUPTI_ADD_COMMON)/cpu_timestamps.o $(CUPTI_ADD_COMMON)/cupti_add.cpp -o kmeans $(NVCC_FLAGS) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +kmeans.o: kmeans.c + $(CC) $(CC_FLAGS) $< -c $(NVCC_FLAGS) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +%.o: %.[ch] + $(CC) $(CC_FLAGS) $< -c -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lstdc++ + +kmeans_cuda.o: kmeans_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp + $(NVCC) -O2 -c kmeans_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(NVCC_FLAGS) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +clean: + rm -f *.o *~ kmeans kmeans_cuda.linkinfo diff --git a/workloads/realworld/pinned/kmeans/Makefile_nvidia b/workloads/realworld/pinned/kmeans/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..5d612b9dc8b5c19310162e1f721e1d2d4e33fb72 --- /dev/null +++ b/workloads/realworld/pinned/kmeans/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := kmeans +# CUDA source files (compiled with cudacc) +CUFILES := kmeans_cuda.cu +# CUDA dependency files +CU_DEPS := kmeans_cuda_kernel.cu +# C/C++ source files (compiled with gcc / c++) +CFILES := cluster.c getopt.c kmeans.c kmeans_clustering.c rmse.c + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/pinned/kmeans/README b/workloads/realworld/pinned/kmeans/README new file mode 100755 index 0000000000000000000000000000000000000000..bebae52d716986889b30a435214e095346424190 --- /dev/null +++ b/workloads/realworld/pinned/kmeans/README @@ -0,0 +1,10 @@ +Usage: ./kmeans [switches] -i filename + + -i filename :file containing data to be clustered + -m max_nclusters :maximum number of clusters allowed [default=5] + -n min_nclusters :minimum number of clusters allowed [default=5] + -t threshold :threshold value [default=0.001] + -l nloops :iteration for each number of clusters [default=1] + -b :input file is in binary format + -r :calculate RMSE [default=off] + -o :output cluster center coordinates [default=off] \ No newline at end of file diff --git a/workloads/realworld/pinned/kmeans/cluster.c b/workloads/realworld/pinned/kmeans/cluster.c new file mode 100755 index 0000000000000000000000000000000000000000..1dfba11084300cdc4355f08e35722ae725cd6ce5 --- /dev/null +++ b/workloads/realworld/pinned/kmeans/cluster.c @@ -0,0 +1,160 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: cluster.c **/ +/** Description: Takes as input a file, containing 1 data point per **/ +/** per line, and performs a fuzzy c-means clustering **/ +/** on the data. Fuzzy clustering is performed using **/ +/** min to max clusters and the clustering that gets **/ +/** the best score according to a compactness and **/ +/** separation criterion are returned. **/ +/** Author: Brendan McCane **/ +/** James Cook University of North Queensland. **/ +/** Australia. email: mccane@cs.jcu.edu.au **/ +/** **/ +/** Edited by: Jay Pisharath, Wei-keng Liao **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "kmeans.h" + +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +extern double wtime(void); +float min_rmse_ref = FLT_MAX; /* reference min_rmse value */ + +/*---< cluster() >-----------------------------------------------------------*/ +int cluster(int npoints, /* number of data points */ + int nfeatures, /* number of attributes for each point */ + float **features, /* array: [npoints][nfeatures] */ + int min_nclusters, /* range of min to max number of clusters */ + int max_nclusters, + float threshold, /* loop terminating factor */ + int *best_nclusters, /* out: number between min and max with lowest RMSE */ + float ***cluster_centres, /* out: [best_nclusters][nfeatures] */ + float *min_rmse, /* out: minimum RMSE */ + int isRMSE, /* calculate RMSE */ + int nloops /* number of iteration for each number of clusters */ + ) +{ + + //initTrace(); + + int nclusters; /* number of clusters k */ + int index =0; /* number of iteration to reach the best RMSE */ + int rmse; /* RMSE for each clustering */ + int *membership; /* which cluster a data point belongs to */ + float **tmp_cluster_centres; /* hold coordinates of cluster centers */ + int i; + + /* allocate memory for membership */ + membership = (int*) malloc(npoints * sizeof(int)); + + /* sweep k from min to max_nclusters to find the best number of clusters */ + for(nclusters = min_nclusters; nclusters <= max_nclusters; nclusters++) + { + if (nclusters > npoints) break; /* cannot have more clusters than points */ + + /* allocate device memory, invert data array (@ kmeans_cuda.cu) */ + allocateMemory(npoints, nfeatures, nclusters, features); + + /* iterate nloops times for each number of clusters */ + for(i = 0; i < nloops; i++) + { + /* initialize initial cluster centers, CUDA calls (@ kmeans_cuda.cu) */ + tmp_cluster_centres = kmeans_clustering(features, + nfeatures, + npoints, + nclusters, + threshold, + membership); + + if (*cluster_centres) { + free((*cluster_centres)[0]); + free(*cluster_centres); + } + *cluster_centres = tmp_cluster_centres; + + /* find the number of clusters with the best RMSE */ + if(isRMSE) + { + rmse = rms_err(features, + nfeatures, + npoints, + tmp_cluster_centres, + nclusters); + + if(rmse < min_rmse_ref){ + min_rmse_ref = rmse; //update reference min RMSE + *min_rmse = min_rmse_ref; //update return min RMSE + *best_nclusters = nclusters; //update optimum number of clusters + index = i; //update number of iteration to reach best RMSE + } + } + } + deallocateMemory(); /* free device memory (@ kmeans_cuda.cu) */ + } + + free(membership); + + return index; +} + diff --git a/workloads/realworld/pinned/kmeans/cluster.o b/workloads/realworld/pinned/kmeans/cluster.o new file mode 100644 index 0000000000000000000000000000000000000000..59e3a24a3f6c8a2f7c19b5875d4dba1178842f72 Binary files /dev/null and b/workloads/realworld/pinned/kmeans/cluster.o differ diff --git a/workloads/realworld/pinned/kmeans/cp.sh b/workloads/realworld/pinned/kmeans/cp.sh new file mode 100755 index 0000000000000000000000000000000000000000..243885047459789eb8eae5d6c5b2b4822eb3aabf --- /dev/null +++ b/workloads/realworld/pinned/kmeans/cp.sh @@ -0,0 +1,27 @@ +cp super_0.log super_3.log +cp super_0.log super_4.log +cp super_0.log super_5.log +cp super_0.log super_6.log +cp super_0.log super_7.log +cp super_0.log super_8.log +cp super_0.log super_9.log +cp super_0.log super_10.log +cp super_0.log super_11.log +cp super_0.log super_12.log +cp super_0.log super_13.log +cp super_0.log super_14.log +cp super_0.log super_15.log +cp super_0.log super_17.log +cp super_0.log super_17.log +cp super_0.log super_18.log +cp super_0.log super_19.log +cp super_0.log super_20.log +cp super_0.log super_21.log +cp super_0.log super_22.log +cp super_0.log super_23.log +cp super_0.log super_24.log +cp super_0.log super_25.log +cp super_0.log super_26.log +cp super_0.log super_27.log +cp super_0.log super_28.log +cp super_0.log super_29.log diff --git a/workloads/realworld/pinned/kmeans/cupti_add.o b/workloads/realworld/pinned/kmeans/cupti_add.o new file mode 100644 index 0000000000000000000000000000000000000000..01810db3e980c2777587804007d4bd611a74f500 Binary files /dev/null and b/workloads/realworld/pinned/kmeans/cupti_add.o differ diff --git a/workloads/realworld/pinned/kmeans/getopt.c b/workloads/realworld/pinned/kmeans/getopt.c new file mode 100755 index 0000000000000000000000000000000000000000..fa2f31378fb2978f65267ba2e810aae3ff1ee016 --- /dev/null +++ b/workloads/realworld/pinned/kmeans/getopt.c @@ -0,0 +1,1184 @@ +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to drepper@gnu.org + before changing it! + Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This tells Alpha OSF/1 not to define a getopt prototype in . + Ditto for AIX 3.2 and . */ +#ifndef _NO_PROTO +# define _NO_PROTO +#endif + +#ifdef HAVE_CONFIG_H +# include +#endif + +#if !defined __STDC__ || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +# ifndef const +# define const +# endif +#endif + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +# include +# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +# define ELIDE_CODE +# endif +#endif + +#ifndef ELIDE_CODE + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +/* Don't include stdlib.h for non-GNU C libraries because some of them + contain conflicting prototypes for getopt. */ +# include +# include +#endif /* GNU C library. */ + +#ifdef VMS +# include +# if HAVE_STRING_H - 0 +# include +# endif +#endif + +#ifndef _ +/* This is for other GNU distributions with internationalized messages. */ +# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include +# ifndef _ +# define _(msgid) gettext (msgid) +# endif +# else +# define _(msgid) (msgid) +# endif +# if defined _LIBC && defined USE_IN_LIBIO +# include +# endif +#endif + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* 1003.2 says this must be 1 before any call. */ +int optind = 1; + +/* Formerly, initialization of getopt depended on optind==0, which + causes problems with re-calling getopt as programs generally don't + know that. */ + +int __getopt_initialized; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return -1 with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +/* Value of POSIXLY_CORRECT environment variable. */ +static char *posixly_correct; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +# include +# define my_index strchr +#else + +//# if HAVE_STRING_H || WIN32 /* Pete Wilson mod 7/28/02 */ +# include +//# else +//# include +//# endif + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +#ifndef getenv +extern char *getenv (); +#endif + +static char * +my_index (str, chr) + const char *str; + int chr; +{ + while (*str) + { + if (*str == chr) + return (char *) str; + str++; + } + return 0; +} + +/* If using GCC, we can safely declare strlen this way. + If not using GCC, it is ok not to declare it. */ +#ifdef __GNUC__ +/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. + That was relevant to code that was here before. */ +# if (!defined __STDC__ || !__STDC__) && !defined strlen +/* gcc with -traditional declares the built-in strlen to return int, + and has done so at least since version 2.4.5. -- rms. */ +extern int strlen (const char *); +# endif /* not __STDC__ */ +#endif /* __GNUC__ */ + +#endif /* not __GNU_LIBRARY__ */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +#ifdef _LIBC +/* Stored original parameters. + XXX This is no good solution. We should rather copy the args so + that we can compare them later. But we must not use malloc(3). */ +extern int __libc_argc; +extern char **__libc_argv; + +/* Bash 2.0 gives us an environment variable containing flags + indicating ARGV elements that should not be considered arguments. */ + +# ifdef USE_NONOPTION_FLAGS +/* Defined in getopt_init.c */ +extern char *__getopt_nonoption_flags; + +static int nonoption_flags_max_len; +static int nonoption_flags_len; +# endif + +# ifdef USE_NONOPTION_FLAGS +# define SWAP_FLAGS(ch1, ch2) \ + if (nonoption_flags_len > 0) \ + { \ + char __tmp = __getopt_nonoption_flags[ch1]; \ + __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ + __getopt_nonoption_flags[ch2] = __tmp; \ + } +# else +# define SWAP_FLAGS(ch1, ch2) +# endif +#else /* !_LIBC */ +# define SWAP_FLAGS(ch1, ch2) +#endif /* _LIBC */ + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +#if defined __STDC__ && __STDC__ +static void exchange (char **); +#endif + +static void +exchange (argv) + char **argv; +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + /* First make sure the handling of the `__getopt_nonoption_flags' + string can work normally. Our top argument must be in the range + of the string. */ + if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len) + { + /* We must extend the array. The user plays games with us and + presents new arguments. */ + char *new_str = malloc (top + 1); + if (new_str == NULL) + nonoption_flags_len = nonoption_flags_max_len = 0; + else + { + memset (__mempcpy (new_str, __getopt_nonoption_flags, + nonoption_flags_max_len), + '\0', top + 1 - nonoption_flags_max_len); + nonoption_flags_max_len = top + 1; + __getopt_nonoption_flags = new_str; + } + } +#endif + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + SWAP_FLAGS (bottom + i, middle + i); + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Initialize the internal data when the first call is made. */ + +#if defined __STDC__ && __STDC__ +static const char *_getopt_initialize (int, char *const *, const char *); +#endif +static const char * +_getopt_initialize (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = optind; + + nextchar = NULL; + + posixly_correct = getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + if (posixly_correct == NULL + && argc == __libc_argc && argv == __libc_argv) + { + if (nonoption_flags_max_len == 0) + { + if (__getopt_nonoption_flags == NULL + || __getopt_nonoption_flags[0] == '\0') + nonoption_flags_max_len = -1; + else + { + const char *orig_str = __getopt_nonoption_flags; + int len = nonoption_flags_max_len = strlen (orig_str); + if (nonoption_flags_max_len < argc) + nonoption_flags_max_len = argc; + __getopt_nonoption_flags = + (char *) malloc (nonoption_flags_max_len); + if (__getopt_nonoption_flags == NULL) + nonoption_flags_max_len = -1; + else + memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), + '\0', nonoption_flags_max_len - len); + } + } + nonoption_flags_len = nonoption_flags_max_len; + } + else + nonoption_flags_len = 0; +#endif + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns -1. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (argc, argv, optstring, longopts, longind, long_only) + int argc; + char *const *argv; + const char *optstring; + const struct option *longopts; + int *longind; + int long_only; +{ + int print_errors = opterr; + if (optstring[0] == ':') + print_errors = 0; + + if (argc < 1) + return -1; + + optarg = NULL; + + if (optind == 0 || !__getopt_initialized) + { + if (optind == 0) + optind = 1; /* Don't scan ARGV[0], the program name. */ + optstring = _getopt_initialize (argc, argv, optstring); + __getopt_initialized = 1; + } + + /* Test whether ARGV[optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ +#if defined _LIBC && defined USE_NONOPTION_FLAGS +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \ + || (optind < nonoption_flags_len \ + && __getopt_nonoption_flags[optind] == '1')) +#else +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0') +#endif + + if (nextchar == NULL || *nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT and LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (last_nonopt > optind) + last_nonopt = optind; + if (first_nonopt > optind) + first_nonopt = optind; + + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc && NONOPTION_P) + optind++; + last_nonopt = optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) + { + if (ordering == REQUIRE_ORDER) + return -1; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[optind][1] == '-' + || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = -1; + int option_index; + + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) + == (unsigned int) strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else if (long_only + || pfound->has_arg != p->has_arg + || pfound->flag != p->flag + || pfound->val != p->val) + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + optopt = 0; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind - 1][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#else + fprintf (stderr, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], + pfound->name); +#else + fprintf (stderr, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], pfound->name); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + + nextchar += strlen (nextchar); + + optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' + || my_index (optstring, *nextchar) == NULL) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + nextchar = (char *) ""; + optind++; + optopt = 0; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (posixly_correct) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: illegal option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c); +#endif + } + else + { +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: invalid option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + optopt = c; + return '?'; + } + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + + /* optarg is now the argument, see if it's in the + table of longopts. */ + + for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + return '?'; + } + if (pfound != NULL) + { + option_index = indfound; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = NULL; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +#endif /* Not ELIDE_CODE. */ + + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ \ No newline at end of file diff --git a/workloads/realworld/pinned/kmeans/getopt.h b/workloads/realworld/pinned/kmeans/getopt.h new file mode 100755 index 0000000000000000000000000000000000000000..bae04bf7d418206d73892a94ff94923a36549362 --- /dev/null +++ b/workloads/realworld/pinned/kmeans/getopt.h @@ -0,0 +1,191 @@ + + +/* getopt.h */ +/* Declarations for getopt. + Copyright (C) 1989-1994, 1996-1999, 2001 Free Software + Foundation, Inc. This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute + it and/or modify it under the terms of the GNU Lesser + General Public License as published by the Free Software + Foundation; either version 2.1 of the License, or + (at your option) any later version. + + The GNU C Library is distributed in the hope that it will + be useful, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A + PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General + Public License along with the GNU C Library; if not, write + to the Free Software Foundation, Inc., 59 Temple Place, + Suite 330, Boston, MA 02111-1307 USA. */ + + + + + +#ifndef _GETOPT_H + +#ifndef __need_getopt +# define _GETOPT_H 1 +#endif + +/* If __GNU_LIBRARY__ is not already defined, either we are being used + standalone, or this is the first header included in the source file. + If we are being used with glibc, we need to include , but + that does not exist if we are standalone. So: if __GNU_LIBRARY__ is + not defined, include , which will pull in for us + if it's from glibc. (Why ctype.h? It's guaranteed to exist and it + doesn't flood the namespace with stuff the way some other headers do.) */ +#if !defined __GNU_LIBRARY__ +# include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +#ifndef __need_getopt +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +# if (defined __STDC__ && __STDC__) || defined __cplusplus + const char *name; +# else + char *name; +# endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +# define no_argument 0 +# define required_argument 1 +# define optional_argument 2 +#endif /* need getopt */ + + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. + + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, `optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in `optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU `getopt'. + + The argument `--' causes premature termination of argument + scanning, explicitly telling `getopt' that there are no more + options. + + If OPTS begins with `--', then non-option arguments are treated as + arguments to the option '\0'. This behavior is specific to the GNU + `getopt'. */ + +#if (defined __STDC__ && __STDC__) || defined __cplusplus +# ifdef __GNU_LIBRARY__ +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int ___argc, char *const *___argv, const char *__shortopts); +# else /* not __GNU_LIBRARY__ */ +extern int getopt (); +# endif /* __GNU_LIBRARY__ */ + +# ifndef __need_getopt +extern int getopt_long (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); +extern int getopt_long_only (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only); +# endif +#else /* not __STDC__ */ +extern int getopt (); +# ifndef __need_getopt +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +# endif +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +/* Make sure we later can get all the definitions and declarations. */ +#undef __need_getopt + +#endif /* getopt.h */ + diff --git a/workloads/realworld/pinned/kmeans/getopt.o b/workloads/realworld/pinned/kmeans/getopt.o new file mode 100644 index 0000000000000000000000000000000000000000..4bdff5a2b83a3c479dc824743dae146514a465f0 Binary files /dev/null and b/workloads/realworld/pinned/kmeans/getopt.o differ diff --git a/workloads/realworld/pinned/kmeans/kmeans b/workloads/realworld/pinned/kmeans/kmeans new file mode 100755 index 0000000000000000000000000000000000000000..70fe59695a2a8c6ce504ea243caa75adcc4562ea Binary files /dev/null and b/workloads/realworld/pinned/kmeans/kmeans differ diff --git a/workloads/realworld/pinned/kmeans/kmeans.c b/workloads/realworld/pinned/kmeans/kmeans.c new file mode 100755 index 0000000000000000000000000000000000000000..e0725dfaab140d5cbbf60aa80cae1e50fe613b7d --- /dev/null +++ b/workloads/realworld/pinned/kmeans/kmeans.c @@ -0,0 +1,308 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: example.c **/ +/** Description: Takes as input a file: **/ +/** ascii file: containing 1 data point per line **/ +/** binary file: first int is the number of objects **/ +/** 2nd int is the no. of features of each **/ +/** object **/ +/** This example performs a fuzzy c-means clustering **/ +/** on the data. Fuzzy clustering is performed using **/ +/** min to max clusters and the clustering that gets **/ +/** the best score according to a compactness and **/ +/** separation criterion are returned. **/ +/** Author: Wei-keng Liao **/ +/** ECE Department Northwestern University **/ +/** email: wkliao@ece.northwestern.edu **/ +/** **/ +/** Edited by: Jay Pisharath **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ +#define _CRT_SECURE_NO_DEPRECATE 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#define _POSIX_C_SOURCE 200809L +#include +#include +#include "kmeans.h" + +extern double wtime(void); + + + +/*---< usage() >------------------------------------------------------------*/ +void usage(char *argv0) { + char *help = + "\nUsage: %s [switches] -i filename\n\n" + " -i filename :file containing data to be clustered\n" + " -m max_nclusters :maximum number of clusters allowed [default=5]\n" + " -n min_nclusters :minimum number of clusters allowed [default=5]\n" + " -t threshold :threshold value [default=0.001]\n" + " -l nloops :iteration for each number of clusters [default=1]\n" + " -b :input file is in binary format\n" + " -r :calculate RMSE [default=off]\n" + " -o :output cluster center coordinates [default=off]\n"; + fprintf(stderr, help, argv0); + exit(-1); +} + +/*---< main() >-------------------------------------------------------------*/ +int setup(int argc, char **argv) { + int opt; + extern char *optarg; + char *filename = 0; + float *buf; + char line[1024]; + int isBinaryFile = 0; + + float threshold = 0.001; /* default value */ + int max_nclusters=5; /* default value */ + int min_nclusters=5; /* default value */ + int best_nclusters = 0; + int nfeatures = 0; + int npoints = 0; + float len; + + float **features; + float **cluster_centres=NULL; + int i, j, index; + int nloops = 1; /* default value */ + + int isRMSE = 0; + float rmse; + + int isOutput = 0; + //float cluster_timing, io_timing; + + /* obtain command line arguments and change appropriate options */ + while ( (opt=getopt(argc,argv,"i:t:m:n:l:bro"))!= EOF) { + switch (opt) { + case 'i': filename=optarg; + break; + case 'b': isBinaryFile = 1; + break; + case 't': threshold=atof(optarg); + break; + case 'm': max_nclusters = atoi(optarg); + break; + case 'n': min_nclusters = atoi(optarg); + break; + case 'r': isRMSE = 1; + break; + case 'o': isOutput = 1; + break; + case 'l': nloops = atoi(optarg); + break; + case '?': usage(argv[0]); + break; + default: usage(argv[0]); + break; + } + } + + if (filename == 0) usage(argv[0]); + + /* ============== I/O begin ==============*/ + /* get nfeatures and npoints */ + //io_timing = omp_get_wtime(); + if (isBinaryFile) { //Binary file input + int infile; + if ((infile = open(filename, O_RDONLY, "0600")) == -1) { + fprintf(stderr, "Error: no such file (%s)\n", filename); + exit(1); + } + read(infile, &npoints, sizeof(int)); + read(infile, &nfeatures, sizeof(int)); + + /* allocate space for features[][] and read attributes of all objects */ + buf = (float*) malloc(npoints*nfeatures*sizeof(float)); + features = (float**)malloc(npoints* sizeof(float*)); + features[0] = (float*) malloc(npoints*nfeatures*sizeof(float)); + for (i=1; i npoints(%d) -- cannot proceed\n", min_nclusters, npoints); + exit(0); + } + + srand(7); /* seed for future random number generator */ + memcpy(features[0], buf, npoints*nfeatures*sizeof(float)); /* now features holds 2-dimensional array of features */ + free(buf); + + /* ======================= core of the clustering ===================*/ + + //cluster_timing = omp_get_wtime(); /* Total clustering time */ + cluster_centres = NULL; + index = cluster(npoints, /* number of data points */ + nfeatures, /* number of features for each point */ + features, /* array: [npoints][nfeatures] */ + min_nclusters, /* range of min to max number of clusters */ + max_nclusters, + threshold, /* loop termination factor */ + &best_nclusters, /* return: number between min and max */ + &cluster_centres, /* return: [best_nclusters][nfeatures] */ + &rmse, /* Root Mean Squared Error */ + isRMSE, /* calculate RMSE */ + nloops); /* number of iteration for each number of clusters */ + //cluster_timing = omp_get_wtime() - cluster_timing; + + + /* =============== Command Line Output =============== */ + + /* cluster center coordinates + :displayed only for when k=1*/ + if((min_nclusters == max_nclusters) && (isOutput == 1)) { + printf("\n================= Centroid Coordinates =================\n"); + for(i = 0; i < max_nclusters; i++){ + printf("%d:", i); + for(j = 0; j < nfeatures; j++){ + printf(" %.2f", cluster_centres[i][j]); + } + printf("\n\n"); + } + } + + len = (float) ((max_nclusters - min_nclusters + 1)*nloops); + + printf("Number of Iteration: %d\n", nloops); + //printf("Time for I/O: %.5fsec\n", io_timing); + //printf("Time for Entire Clustering: %.5fsec\n", cluster_timing); + + if(min_nclusters != max_nclusters){ + if(nloops != 1){ //range of k, multiple iteration + //printf("Average Clustering Time: %fsec\n", + // cluster_timing / len); + printf("Best number of clusters is %d\n", best_nclusters); + } + else{ //range of k, single iteration + //printf("Average Clustering Time: %fsec\n", + // cluster_timing / len); + printf("Best number of clusters is %d\n", best_nclusters); + } + } + else{ + if(nloops != 1){ // single k, multiple iteration + //printf("Average Clustering Time: %.5fsec\n", + // cluster_timing / nloops); + if(isRMSE) // if calculated RMSE + printf("Number of trials to approach the best RMSE of %.3f is %d\n", rmse, index + 1); + } + else{ // single k, single iteration + if(isRMSE) // if calculated RMSE + printf("Root Mean Squared Error: %.3f\n", rmse); + } + } + + + /* free up memory */ + free(features[0]); + free(features); + return(0); +} + diff --git a/workloads/realworld/pinned/kmeans/kmeans.h b/workloads/realworld/pinned/kmeans/kmeans.h new file mode 100755 index 0000000000000000000000000000000000000000..28b6c34732313f04c02b59b095361bc8142d4b05 --- /dev/null +++ b/workloads/realworld/pinned/kmeans/kmeans.h @@ -0,0 +1,60 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +#ifndef _H_FUZZY_KMEANS +#define _H_FUZZY_KMEANS + +#ifndef FLT_MAX +#define FLT_MAX 3.40282347e+38 +#endif + +#include + +/* rmse.c */ +float euclid_dist_2 (float*, float*, int); +int find_nearest_point (float* , int, float**, int); +float rms_err(float**, int, int, float**, int); + +/* cluster.c */ +int cluster(int, int, float**, int, int, float, int*, float***, float*, int, int); + +/* kmeans_clustering.c */ +float **kmeans_clustering(float**, int, int, int, float, int*); + +#endif diff --git a/workloads/realworld/pinned/kmeans/kmeans.o b/workloads/realworld/pinned/kmeans/kmeans.o new file mode 100644 index 0000000000000000000000000000000000000000..005f050f5a2db686d6ccb29d7f899e812ff1c6c7 Binary files /dev/null and b/workloads/realworld/pinned/kmeans/kmeans.o differ diff --git a/workloads/realworld/pinned/kmeans/kmeans_clustering.c b/workloads/realworld/pinned/kmeans/kmeans_clustering.c new file mode 100755 index 0000000000000000000000000000000000000000..54ddcd6d8ff6f0d075ff16e6b6aef84fda4716d2 --- /dev/null +++ b/workloads/realworld/pinned/kmeans/kmeans_clustering.c @@ -0,0 +1,178 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: kmeans_clustering.c **/ +/** Description: Implementation of regular k-means clustering **/ +/** algorithm **/ +/** Author: Wei-keng Liao **/ +/** ECE Department, Northwestern University **/ +/** email: wkliao@ece.northwestern.edu **/ +/** **/ +/** Edited by: Jay Pisharath **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include + +#include "kmeans.h" + +#define RANDOM_MAX 2147483647 + +extern double wtime(void); + +/*----< kmeans_clustering() >---------------------------------------------*/ +float** kmeans_clustering(float **feature, /* in: [npoints][nfeatures] */ + int nfeatures, + int npoints, + int nclusters, + float threshold, + int *membership) /* out: [npoints] */ +{ + int i, j, n = 0; /* counters */ + int loop=0, temp; + int *new_centers_len; /* [nclusters]: no. of points in each cluster */ + float delta; /* if the point moved */ + float **clusters; /* out: [nclusters][nfeatures] */ + float **new_centers; /* [nclusters][nfeatures] */ + + int *initial; /* used to hold the index of points not yet selected + prevents the "birthday problem" of dual selection (?) + considered holding initial cluster indices, but changed due to + possible, though unlikely, infinite loops */ + int initial_points; + int c = 0; + + /* nclusters should never be > npoints + that would guarantee a cluster without points */ + if (nclusters > npoints) + nclusters = npoints; + + /* allocate space for and initialize returning variable clusters[] */ + clusters = (float**) malloc(nclusters * sizeof(float*)); + clusters[0] = (float*) malloc(nclusters * nfeatures * sizeof(float)); + for (i=1; i= 0; i++) { + //n = (int)rand() % initial_points; + + for (j=0; j 0) + clusters[i][j] = new_centers[i][j] / new_centers_len[i]; /* take average i.e. sum/n */ + new_centers[i][j] = 0.0; /* set back to 0 */ + } + new_centers_len[i] = 0; /* set back to 0 */ + } + c++; + } while ((delta > threshold) && (loop++ < 500)); /* makes sure loop terminates */ + printf("iterated %d times\n", c); + free(new_centers[0]); + free(new_centers); + free(new_centers_len); + + return clusters; +} + diff --git a/workloads/realworld/pinned/kmeans/kmeans_clustering.o b/workloads/realworld/pinned/kmeans/kmeans_clustering.o new file mode 100644 index 0000000000000000000000000000000000000000..f489cb915420db92f7efa96d45fa9552ead688c4 Binary files /dev/null and b/workloads/realworld/pinned/kmeans/kmeans_clustering.o differ diff --git a/workloads/realworld/pinned/kmeans/kmeans_cuda.cu b/workloads/realworld/pinned/kmeans/kmeans_cuda.cu new file mode 100755 index 0000000000000000000000000000000000000000..81e3b6dda591e8198764aa505d6527b1a31a6feb --- /dev/null +++ b/workloads/realworld/pinned/kmeans/kmeans_cuda.cu @@ -0,0 +1,220 @@ +#include +#include +#include +#include +#include +#include + +#include + +#define THREADS_PER_DIM 16 +#define BLOCKS_PER_DIM 64 +#define THREADS_PER_BLOCK THREADS_PER_DIM*THREADS_PER_DIM +#include +#include "kmeans_cuda_kernel.cu" +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + + +//#define BLOCK_DELTA_REDUCE +//#define BLOCK_CENTER_REDUCE + +#define CPU_DELTA_REDUCE +#define CPU_CENTER_REDUCE + +extern "C" +int setup(int argc, char** argv); /* function prototype */ + +// GLOBAL!!!!! +unsigned int num_threads_perdim = THREADS_PER_DIM; /* sqrt(256) -- see references for this choice */ +unsigned int num_blocks_perdim = BLOCKS_PER_DIM; /* temporary */ +unsigned int num_threads = num_threads_perdim*num_threads_perdim; /* number of threads */ +unsigned int num_blocks = num_blocks_perdim*num_blocks_perdim; /* number of blocks */ + +/* _d denotes it resides on the device */ +int *membership_new; /* newly assignment membership */ +float *feature_d; /* inverted data array */ +float *feature_flipped_d; /* original (not inverted) data array */ +int *membership_d; /* membership on the device */ +float *block_new_centers; /* sum of points in a cluster (per block) */ +float *clusters_d; /* cluster centers on the device */ +float *block_clusters_d; /* per block calculation of cluster centers */ +int *block_deltas_d; /* per block calculation of deltas */ + + +/* -------------- allocateMemory() ------------------- */ +/* allocate device memory, calculate number of blocks and threads, and invert the data array */ +extern "C" +void allocateMemory(int npoints, int nfeatures, int nclusters, float **features) +{ + // printf("npoints is %d, num_threads is %d\n", npoints, num_threads); + // num_blocks = npoints / num_threads; + // if (npoints % num_threads > 0) /* defeat truncation */ + // num_blocks++; + + // num_blocks_perdim = sqrt((double) num_blocks); + // while (num_blocks_perdim * num_blocks_perdim < num_blocks) // defeat truncation (should run once) + // num_blocks_perdim++; + + num_blocks = num_blocks_perdim*num_blocks_perdim; + + /* allocate memory for memory_new[] and initialize to -1 (host) */ + membership_new = (int*) malloc(npoints * sizeof(int)); + for(int i=0;i>>(feature_flipped_d, feature_d, npoints, (num_blocks_perdim * num_blocks_perdim * num_threads_perdim * num_threads_perdim), nfeatures); + + /* allocate memory for membership_d[] and clusters_d[][] (device) */ + cudaMalloc((void**) &membership_d, npoints*sizeof(int)); + cudaMalloc((void**) &clusters_d, nclusters*nfeatures*sizeof(float)); +} +/* -------------- allocateMemory() end ------------------- */ + +/* -------------- deallocateMemory() ------------------- */ +/* free host and device memory */ +extern "C" +void deallocateMemory() +{ + free(membership_new); + free(block_new_centers); + cudaFree(feature_d); + cudaFree(feature_flipped_d); + cudaFree(membership_d); + + cudaFree(clusters_d); + + endCPU(); +} +/* -------------- deallocateMemory() end ------------------- */ + +//////////////////////////////////////////////////////////////////////////////// +// Program main // + +int +main( int argc, char** argv) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + // make sure we're running on the big card + GPU_argv_init(); + // as done in the CUDA start/help document provided + initTrace(); + setup(argc, argv); + finiTrace(); +} + +// // +//////////////////////////////////////////////////////////////////////////////// + +/* ------------------- kmeansCuda() ------------------------ */ +extern "C" +int // delta -- had problems when return value was of float type +kmeansCuda(float **feature, /* in: [npoints][nfeatures] */ + int nfeatures, /* number of attributes for each point */ + int npoints, /* number of data points */ + int nclusters, /* number of clusters */ + int *membership, /* which cluster the point belongs to */ + float **clusters, /* coordinates of cluster centers */ + int *new_centers_len, /* number of elements in each cluster */ + float **new_centers /* sum of elements in each cluster */ + ) +{ + int delta = 0; /* if point has moved */ + int i,j; /* counters */ + + // cudaSetDevice(1); + + + /* copy membership (host to device) */ + cudaMemcpy(membership_d, membership_new, npoints*sizeof(int), cudaMemcpyHostToDevice); + + // /* copy clusters (host to device) */ + // cudaMemcpy(clusters_d, clusters[0], nclusters*nfeatures*sizeof(float), cudaMemcpyHostToDevice); + + // /* set up texture */ + // cudaChannelFormatDesc chDesc0 = cudaCreateChannelDesc(); + // t_features.filterMode = cudaFilterModePoint; + // t_features.normalized = false; + // t_features.channelDesc = chDesc0; + + // if(cudaBindTexture(NULL, &t_features, feature_d, &chDesc0, npoints*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind features array to texture!\n"); + + // cudaChannelFormatDesc chDesc1 = cudaCreateChannelDesc(); + // t_features_flipped.filterMode = cudaFilterModePoint; + // t_features_flipped.normalized = false; + // t_features_flipped.channelDesc = chDesc1; + + // if(cudaBindTexture(NULL, &t_features_flipped, feature_flipped_d, &chDesc1, npoints*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind features_flipped array to texture!\n"); + + // cudaChannelFormatDesc chDesc2 = cudaCreateChannelDesc(); + // t_clusters.filterMode = cudaFilterModePoint; + // t_clusters.normalized = false; + // t_clusters.channelDesc = chDesc2; + + // if(cudaBindTexture(NULL, &t_clusters, clusters_d, &chDesc2, nclusters*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind clusters array to texture!\n"); + + // /* copy clusters to constant memory */ + // cudaMemcpyToSymbol("c_clusters",clusters[0],nclusters*nfeatures*sizeof(float),0,cudaMemcpyHostToDevice); + + // cudaMemcpy(feature_d, feature, npoints * nfeatures * sizeof(float), cudaMemcpyHostToDevice); + + /* setup execution parameters. + changed to 2d (source code on NVIDIA CUDA Programming Guide) */ + dim3 grid( num_blocks_perdim, num_blocks_perdim ); + dim3 threads( num_threads_perdim*num_threads_perdim ); + /* execute the kernel */ + kmeansPoint<<>>(feature_d, + nfeatures, + npoints, + (num_blocks_perdim * num_blocks_perdim * num_threads_perdim * num_threads_perdim), + nclusters, + membership_d); + cudaDeviceSynchronize(); + + /* copy back membership (device to host) */ + cudaMemcpy(membership_new, membership_d, npoints * sizeof(int), cudaMemcpyDeviceToHost); + + /* for each point, sum data points in each cluster + and see if membership has changed: + if so, increase delta and change old membership, and update new_centers; + otherwise, update new_centers */ + delta = 0; + for (i = 0; i < npoints; i++) + { + int cluster_id = membership_new[i]; + new_centers_len[cluster_id]++; + if (membership_new[i] != membership[i]) + { +#ifdef CPU_DELTA_REDUCE + delta++; +#endif + membership[i] = membership_new[i]; + } +#ifdef CPU_CENTER_REDUCE + for (j = 0; j < nfeatures; j++) + { + new_centers[cluster_id][j] += feature[i][j]; + } +#endif + } + return delta; + +} +/* ------------------- kmeansCuda() end ------------------------ */ + diff --git a/workloads/realworld/pinned/kmeans/kmeans_cuda.o b/workloads/realworld/pinned/kmeans/kmeans_cuda.o new file mode 100644 index 0000000000000000000000000000000000000000..f64784b2eedf73f39d42b5f0c8f9a49699704567 Binary files /dev/null and b/workloads/realworld/pinned/kmeans/kmeans_cuda.o differ diff --git a/workloads/realworld/pinned/kmeans/kmeans_cuda_kernel.cu b/workloads/realworld/pinned/kmeans/kmeans_cuda_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..d5d94f15588c4097081c1fa9f2dab392ffe57dc9 --- /dev/null +++ b/workloads/realworld/pinned/kmeans/kmeans_cuda_kernel.cu @@ -0,0 +1,136 @@ +#ifndef _KMEANS_CUDA_KERNEL_H_ +#define _KMEANS_CUDA_KERNEL_H_ + +#include +#include + +#include "kmeans.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +// FIXME: Make this a runtime selectable variable! +#define ASSUMED_NR_CLUSTERS 32 + +#define SDATA(index) CUT_BANK_CHECKER(sdata, index) + +// t_features has the layout dim0[points 0-m-1]dim1[ points 0-m-1]... +texture t_features; +// t_features_flipped has the layout point0[dim 0-n-1]point1[dim 0-n-1] +texture t_features_flipped; +texture t_clusters; + +__constant__ float c_clusters[ASSUMED_NR_CLUSTERS * 34]; /* constant memory for cluster centers */ + +/* ----------------- invert_mapping() --------------------- */ +/* inverts data array from row-major to column-major. + + [p0,dim0][p0,dim1][p0,dim2] ... + [p1,dim0][p1,dim1][p1,dim2] ... + [p2,dim0][p2,dim1][p2,dim2] ... + to + [dim0,p0][dim0,p1][dim0,p2] ... + [dim1,p0][dim1,p1][dim1,p2] ... + [dim2,p0][dim2,p1][dim2,p2] ... +*/ +__global__ void invert_mapping(float *input, /* original */ + float *output, /* inverted */ + int npoints, /* npoints */ + int batch_size, + int nfeatures) /* nfeatures */ +{ + int point_id = threadIdx.x + blockDim.x * blockIdx.x; /* id of thread */ + + int batches = npoints / batch_size; + + for (int b = 0; b < batches; b++) + { + for (int i = 0; i < nfeatures; i++) + { + output[b * batch_size + point_id + npoints * i] = input[(b * batch_size + point_id) * nfeatures + i]; + } + } + + + + return; +} +/* ----------------- invert_mapping() end --------------------- */ + +/* to turn on the GPU delta and center reduction */ +// #define GPU_DELTA_REDUCTION +// #define GPU_NEW_CENTER_REDUCTION + +/* ----------------- kmeansPoint() --------------------- */ +/* find the index of nearest cluster centers and change membership*/ +__global__ void +kmeansPoint(float *features, /* in: [npoints*nfeatures] */ + int nfeatures, + int npoints, + int batch_size, + int nclusters, + int *membership) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // block ID + const unsigned int block_id = gridDim.x * blockIdx.y + blockIdx.x; + // point/thread ID + const unsigned int point_id = block_id * blockDim.x * blockDim.y + threadIdx.x; + + __shared__ float tmp_features[THREADS_PER_DIM][THREADS_PER_DIM][16]; + + int batches = npoints / batch_size; + int tile = 0; + int end_tile = tile + batches; + + for (; tile < end_tile; tile += 1) + { + for (int i = 0; i < 16; i++) + { + int addr = tile * batch_size + point_id + i * npoints; + tmp_features[threadIdx.y][threadIdx.x][i] = features[addr]; + } + block.sync(); + + int index = -1; + + float min_dist = FLT_MAX; + float dist; /* distance square between a point to cluster center */ + + /* find the cluster center id with min distance to pt */ + for (int i = 0; i < nclusters; i++) + { + int cluster_base_index = i * nfeatures; /* base index of cluster centers for inverted array */ + float ans = 0.0; /* Euclidean distance sqaure */ + + for (int j = 0; j < nfeatures; j++) + { + // int addr = point_id + j * npoints; /* appropriate index of data point */ + // float diff = (tex1Dfetch(t_features,addr) - c_clusters[cluster_base_index + j]); /* distance between a data point to cluster centers */ + + // int addr = point_id + j * npoints; /* appropriate index of data point */ + // float diff = features[addr] - c_clusters[cluster_base_index + j]; /* distance between a data point to cluster centers */ + float diff = tmp_features[threadIdx.y][threadIdx.x][j] - c_clusters[cluster_base_index + j]; /* distance between a data point to cluster centers */ + ans += diff * diff; /* sum of squares */ + } + dist = ans; + block.sync(); + + /* see if distance is smaller than previous ones: + if so, change minimum distance and save index of cluster center */ + if (dist < min_dist) + { + min_dist = dist; + index = i; + } + } + membership[tile * batch_size + point_id] = index; + block.sync(); + } + +} +#endif // #ifndef _KMEANS_CUDA_KERNEL_H_ diff --git a/workloads/realworld/pinned/kmeans/kmeans_cuda_kernel.cu.old b/workloads/realworld/pinned/kmeans/kmeans_cuda_kernel.cu.old new file mode 100755 index 0000000000000000000000000000000000000000..dd0dec27eebf197d811a58063b13a86c8f72e1ed --- /dev/null +++ b/workloads/realworld/pinned/kmeans/kmeans_cuda_kernel.cu.old @@ -0,0 +1,185 @@ +#ifndef _KMEANS_CUDA_KERNEL_H_ +#define _KMEANS_CUDA_KERNEL_H_ + +#include +#include + +#include "kmeans.h" + +// FIXME: Make this a runtime selectable variable! +#define ASSUMED_NR_CLUSTERS 32 + +#define SDATA( index) CUT_BANK_CHECKER(sdata, index) + +// t_features has the layout dim0[points 0-m-1]dim1[ points 0-m-1]... +texture t_features; +// t_features_flipped has the layout point0[dim 0-n-1]point1[dim 0-n-1] +texture t_features_flipped; +texture t_clusters; + + +__constant__ float c_clusters[ASSUMED_NR_CLUSTERS*34]; /* constant memory for cluster centers */ + +/* ----------------- invert_mapping() --------------------- */ +/* inverts data array from row-major to column-major. + + [p0,dim0][p0,dim1][p0,dim2] ... + [p1,dim0][p1,dim1][p1,dim2] ... + [p2,dim0][p2,dim1][p2,dim2] ... + to + [dim0,p0][dim0,p1][dim0,p2] ... + [dim1,p0][dim1,p1][dim1,p2] ... + [dim2,p0][dim2,p1][dim2,p2] ... +*/ +__global__ void invert_mapping(float *input, /* original */ + float *output, /* inverted */ + int npoints, /* npoints */ + int nfeatures) /* nfeatures */ +{ + int point_id = threadIdx.x + blockDim.x*blockIdx.x; /* id of thread */ + int i; + + if(point_id < npoints){ + for(i=0;i 1; threadids_participating /= 2) { + if(threadIdx.x < threadids_participating) { + deltas[threadIdx.x] += deltas[threadIdx.x + threadids_participating]; + } + __syncthreads(); + } + if(threadIdx.x < 1) {deltas[threadIdx.x] += deltas[threadIdx.x + 1];} + __syncthreads(); + // propagate number of changes to global counter + if(threadIdx.x == 0) { + block_deltas[blockIdx.y * gridDim.x + blockIdx.x] = deltas[0]; + //printf("original id: %d, modified: %d\n", blockIdx.y*gridDim.x+blockIdx.x, blockIdx.x); + + } + +#endif + + +#ifdef GPU_NEW_CENTER_REDUCTION + int center_id = threadIdx.x / nfeatures; + int dim_id = threadIdx.x - nfeatures*center_id; + + __shared__ int new_center_ids[THREADS_PER_BLOCK]; + + new_center_ids[threadIdx.x] = index; + __syncthreads(); + + /*** + determine which dimension calculte the sum for + mapping of threads is + center0[dim0,dim1,dim2,...]center1[dim0,dim1,dim2,...]... + ***/ + + int new_base_index = (point_id - threadIdx.x)*nfeatures + dim_id; + float accumulator = 0.f; + + if(threadIdx.x < nfeatures * nclusters) { + // accumulate over all the elements of this threadblock + for(int i = 0; i< (THREADS_PER_BLOCK); i++) { + float val = tex1Dfetch(t_features_flipped,new_base_index+i*nfeatures); + if(new_center_ids[i] == center_id) + accumulator += val; + } + + // now store the sum for this threadblock + /*** + mapping to global array is + block0[center0[dim0,dim1,dim2,...]center1[dim0,dim1,dim2,...]...]block1[...]... + ***/ + block_clusters[(blockIdx.y*gridDim.x + blockIdx.x) * nclusters * nfeatures + threadIdx.x] = accumulator; + } +#endif + +} +#endif // #ifndef _KMEANS_CUDA_KERNEL_H_ diff --git a/workloads/realworld/pinned/kmeans/rmse.c b/workloads/realworld/pinned/kmeans/rmse.c new file mode 100755 index 0000000000000000000000000000000000000000..fe7786342bf77cab12958e630cb8d99834312f0d --- /dev/null +++ b/workloads/realworld/pinned/kmeans/rmse.c @@ -0,0 +1,95 @@ +/*************************************************************************/ +/** File: rmse.c **/ +/** Description: calculate root mean squared error of particular **/ +/** clustering. **/ +/** Author: Sang-Ha Lee **/ +/** University of Virginia. **/ +/** **/ +/** Note: euclid_dist_2() and find_nearest_point() adopted from **/ +/** Minebench code. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include + +#include "kmeans.h" + +extern double wtime(void); + +/*----< euclid_dist_2() >----------------------------------------------------*/ +/* multi-dimensional spatial Euclid distance square */ +__inline +float euclid_dist_2(float *pt1, + float *pt2, + int numdims) +{ + int i; + float ans=0.0; + + for (i=0; i-----------------------------------------------*/ +__inline +int find_nearest_point(float *pt, /* [nfeatures] */ + int nfeatures, + float **pts, /* [npts][nfeatures] */ + int npts) +{ + int index, i; + float max_dist=FLT_MAX; + + /* find the cluster center id with min distance to pt */ + for (i=0; i-------------------------------------*/ +float rms_err (float **feature, /* [npoints][nfeatures] */ + int nfeatures, + int npoints, + float **cluster_centres, /* [nclusters][nfeatures] */ + int nclusters) +{ + int i; + int nearest_cluster_index; /* cluster center id with min distance to pt */ + float sum_euclid = 0.0; /* sum of Euclidean distance squares */ + float ret; /* return value */ + + /* calculate and sum the sqaure of euclidean distance*/ + #pragma omp parallel for \ + shared(feature,cluster_centres) \ + firstprivate(npoints,nfeatures,nclusters) \ + private(i, nearest_cluster_index) \ + schedule (static) + for (i=0; i +#endif + +#include + +#ifndef _H_TYPES +#include +#endif + +#include + +#ifndef _H_ACCESS +#include /* for the "access" function */ +#endif + +/* + * POSIX requires that certain values be included in unistd.h. It also + * requires that when _POSIX_SOURCE is defined only those standard + * specific values are present. This header includes all the POSIX + * required entries. + */ + +#ifdef _POSIX_SOURCE +#ifdef _LARGE_FILES +#define lseek lseek64 +#endif + + +/* Symbolic constants for the "lseek" function: */ +#ifndef SEEK_SET +#define SEEK_SET 0 /* Set file pointer to "offset" */ +#define SEEK_CUR 1 /* Set file pointer to current plus "offset" */ +#define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif /* SEEK_SET */ + +#ifdef _NO_PROTO + +#ifndef _KERNEL +extern int access(); +extern unsigned int alarm(); +extern int chdir(); +extern int chown(); +extern int close(); +extern char *ctermid(); +extern int dup(); +extern int dup2(); +extern int execl(); +extern int execv(); +extern int execle(); +extern int execve(); +extern int execlp(); +extern int execvp(); +extern void _exit(); +extern pid_t fork(); +extern long fpathconf(); +extern char *getcwd(); +extern gid_t getegid(); +extern uid_t geteuid(); +extern gid_t getgid(); +extern int getgroups(); +extern char *getlogin(); +extern pid_t getpgrp(); +extern pid_t getpid(); +extern pid_t getppid(); +extern uid_t getuid(); +extern int isatty(); +extern int link(); +extern off_t lseek(); +extern long pathconf(); +extern int pause(); +extern int pipe(); +#if defined(_XOPEN_SOURCE) && ( _XOPEN_SOURCE >= 500 ) +extern int pthread_atfork(); +#endif +extern int read(); +extern int rmdir(); +extern int setgid(); +extern int setpgid(); +extern int setsid(); +extern int setuid(); +extern unsigned int sleep(); +extern long sysconf(); +extern pid_t tcgetpgrp(); +extern int tcsetpgrp(); +extern char *ttyname(); +extern int unlink(); +extern int write(); +#endif /* !_KERNEL */ + +#else /* POSIX required prototypes */ + +#ifndef _KERNEL +extern int access(const char *, int); +extern unsigned int alarm(unsigned int); +extern int chdir(const char *); +extern int chown(const char *, uid_t, gid_t); +extern int close(int); +extern char *ctermid(char *); +extern int dup(int); +extern int dup2(int, int); +extern int execl(const char *, const char *, ...); +extern int execv(const char *, char *const []); +extern int execle(const char *, const char *, ...); +extern int execve(const char *, char *const [], char *const []); +extern int execlp(const char *, const char *, ...); +extern int execvp(const char *, char *const []); +extern void _exit(int); +extern pid_t fork(void); +extern long fpathconf(int, int); +extern char *getcwd(char *, size_t); +extern gid_t getegid(void); +extern uid_t geteuid(void); +extern gid_t getgid(void); +extern int getgroups(int, gid_t []); +extern char *getlogin(void); +#ifndef _BSD +extern pid_t getpgrp(void); +#endif /* _BSD */ +extern pid_t getpid(void); +extern pid_t getppid(void); +extern uid_t getuid(void); +extern int isatty(int); +extern int link(const char *, const char *); +extern off_t lseek(int, off_t, int); +#ifdef _LARGE_FILE_API +extern off64_t lseek64(int, off64_t, int); +#endif +extern long pathconf(const char *, int); +extern int pause(void); +extern int pipe(int []); +#if defined(_XOPEN_SOURCE) && ( _XOPEN_SOURCE >= 500 ) +extern int pthread_atfork(void (*)(void), void (*)(void), void (*)(void)); +#endif +extern ssize_t read(int, void *, size_t); +extern int rmdir(const char *); +extern int setgid(gid_t); +extern int setpgid(pid_t, pid_t); +extern pid_t setsid(void); +extern int setuid(uid_t); +extern unsigned int sleep(unsigned int); +extern long sysconf(int); +extern pid_t tcgetpgrp(int); +extern int tcsetpgrp(int, pid_t); +extern char *ttyname(int); +extern int unlink(const char *); +extern ssize_t write(int, const void *, size_t); +#endif /* !_KERNEL */ +#endif /* !_NO_PROTO */ + +#define STDIN_FILENO 0 +#define STDOUT_FILENO 1 +#define STDERR_FILENO 2 + +#define _POSIX_JOB_CONTROL 1 +#define _POSIX_SAVED_IDS 1 + +#define _POSIX_VERSION 200112L +#define _POSIX2_VERSION 200112L +#define _POSIX2_C_VERSION 200112L + + +#ifdef _XOPEN_SOURCE + +#define _XOPEN_VERSION 600 +#define _XOPEN_XCU_VERSION 4 +#define _XOPEN_XPG3 1 +#define _XOPEN_XPG4 1 +#define _XOPEN_UNIX 1 + +#define _XOPEN_REALTIME (-1) +#define _XOPEN_REALTIME_THREADS (-1) + +#if (_XOPEN_SOURCE >= 600) +#define _XOPEN_STREAMS 1 +#endif + +#define _XBS5_ILP32_OFF32 1 +#define _XBS5_ILP32_OFFBIG 1 +#define _XBS5_LP64_OFF64 1 +#define _XBS5_LPBIG_OFFBIG 1 + +#define _POSIX2_C_BIND 200112L +#define _POSIX2_C_DEV 200112L +#define _POSIX2_CHAR_TERM 1 +#define _POSIX2_LOCALEDEF 200112L +#define _POSIX2_UPE 200112L +#define _POSIX2_FORT_DEV (-1) +#define _POSIX2_FORT_RUN (-1) +#define _POSIX2_SW_DEV (-1) + +#if (_POSIX_C_SOURCE >= 200112L) +#define _POSIX_REGEXP 1 +#define _POSIX_SHELL 1 +#define _POSIX2_PBS (-1) +#define _POSIX2_PBS_ACCOUNTING (-1) +#define _POSIX2_PBS_CHECKPOINT (-1) +#define _POSIX2_PBS_LOCATE (-1) +#define _POSIX2_PBS_MESSAGE (-1) +#define _POSIX2_PBS_TRACK (-1) +#define _V6_ILP32_OFF32 1 +#define _V6_ILP32_OFFBIG 1 +#define _V6_LP64_OFF64 1 +#define _V6_LPBIG_OFFBIG 1 + +#define _POSIX_ADVISORY_INFO 200112L +#define _POSIX_BARRIERS 200112L +#define _POSIX_CLOCK_SELECTION 200112L +#define _POSIX_CPUTIME 200112L +#define _POSIX_MONOTONIC_CLOCK 200112L + +#ifdef _POSIX_RAW_SOCKETS +#undef _POSIX_RAW_SOCKETS +#endif + +#define _POSIX_SPAWN 200112L +#define _POSIX_SPIN_LOCKS 200112L +#define _POSIX_SPORADIC_SERVER (-1) +#define _POSIX_THREAD_CPUTIME 200112L +#define _POSIX_THREAD_SPORADIC_SERVER (-1) +#define _POSIX_TIMEOUTS 200112L +#define _POSIX_TRACE (-1) +#define _POSIX_TRACE_EVENT_FILTER (-1) +#define _POSIX_TRACE_INHERIT (-1) +#define _POSIX_TRACE_LOG (-1) +#define _POSIX_TYPED_MEMORY_OBJECTS (-1) + +#endif /* _POSIX_C_SOURCE >= 200112L */ + +#define _XOPEN_CRYPT 1 +#define _XOPEN_SHM 1 +#define _XOPEN_ENH_I18N 1 +#define _XOPEN_LEGACY (-1) +#ifndef __64BIT__ +#define _UNIX_ABI (-1) +#define _UNIX_ABI_IA64 (-1) +#define _UNIX_ABI_BIG_ENDIAN (-1) +#define _UNIX_ABI_LITTLE_ENDIAN (-1) +#endif /* __64BIT__ */ + +extern char *optarg; +extern int optind, opterr, optopt; + +#ifdef _NO_PROTO + extern size_t confstr(); + extern char *crypt(); + extern void encrypt(); + extern int fsync(); + extern int getopt(); + extern int nice(); + extern void swab(); +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern char *getpass(); + extern int chroot(); +#endif +#else + extern size_t confstr(int, char*, size_t); + extern char *crypt(const char *, const char *); + extern void encrypt(char *, int); + extern int fsync(int); + extern int getopt(int, char* const*, const char*); + extern int nice(int); + extern void swab(const void *, void *, ssize_t); + extern int fdatasync(int); +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern char *getpass(const char *); + extern int chroot(const char *); +#endif +#endif + +#endif /* _XOPEN _SOURCE */ + +/* Threads options for 1003.1c and XPG UNIX98 */ +#define _POSIX_THREADS 200112L +#define _POSIX_THREAD_ATTR_STACKADDR 200112L +#define _POSIX_THREAD_ATTR_STACKSIZE 200112L +#define _POSIX_THREAD_PROCESS_SHARED 200112L +#define _POSIX_THREAD_SAFE_FUNCTIONS 200112L +#ifdef _ALL_SOURCE +#define _POSIX_REENTRANT_FUNCTIONS _POSIX_THREAD_SAFE_FUNCTIONS +#endif + +/* Realtime threads options for 1003.1c and XPG UNIX98 */ +#define _POSIX_THREAD_PRIORITY_SCHEDULING (-1) +#define _POSIX_THREAD_PRIO_INHERIT (-1) +#define _POSIX_THREAD_PRIO_PROTECT (-1) + +#undef _POSIX_THREAD_FORKALL + +/* Realtime options for 1003.1c and XPG UNIX98 */ +#define _POSIX_ASYNCHRONOUS_IO 200112L +#define _POSIX_FSYNC 200112L +#define _POSIX_MAPPED_FILES 200112L +#define _POSIX_MEMLOCK 200112L +#define _POSIX_MEMLOCK_RANGE 200112L +#define _POSIX_MEMORY_PROTECTION 200112L +#define _POSIX_MESSAGE_PASSING 200112L +#define _POSIX_PRIORITIZED_IO 200112L +#define _POSIX_PRIORITY_SCHEDULING 200112L +#define _POSIX_REALTIME_SIGNALS 200112L +#define _POSIX_SEMAPHORES 200112L +#define _POSIX_SHARED_MEMORY_OBJECTS 200112L +#define _POSIX_SYNCHRONIZED_IO 200112L +#define _POSIX_TIMERS 200112L + +#define _POSIX_ASYNC_IO (-1) +#undef _POSIX_SYNC_IO +#define _POSIX_PRIO_IO (-1) + +#define _POSIX_CHOWN_RESTRICTED 0 +#define _POSIX_VDISABLE 0xFF +#define _POSIX_NO_TRUNC 0 + + /* UNIX03 and POSIX01 */ + /* Always enabled */ +#define _POSIX_IPV6 200112L +#define _POSIX_RAW_SOCKETS 200112L + + +#ifndef NULL +#define NULL 0 +#endif + +#if (_POSIX_C_SOURCE >= 200112L) +#define _POSIX_READER_WRITER_LOCKS 200112L +#endif + +/* arguments for the confstr() function */ + +#define _CS_PATH 1 + + /* compile,link,lib,lint flags for 32bit, no_LARGE_FILES system */ +#define _CS_XBS5_ILP32_OFF32_CFLAGS 2 +#define _CS_XBS5_ILP32_OFF32_LDFLAGS 3 +#define _CS_XBS5_ILP32_OFF32_LIBS 4 +#define _CS_XBS5_ILP32_OFF32_LINTFLAGS 5 + + /* compile,link,lib,lint flags for 32bit, _LARGE_FILES system */ +#define _CS_XBS5_ILP32_OFFBIG_CFLAGS 6 +#define _CS_XBS5_ILP32_OFFBIG_LDFLAGS 7 +#define _CS_XBS5_ILP32_OFFBIG_LIBS 8 +#define _CS_XBS5_ILP32_OFFBIG_LINTFLAGS 9 + + /* compile,link,lib,lint flags for LP64 64bit system */ +#define _CS_XBS5_LP64_OFF64_CFLAGS 10 +#define _CS_XBS5_LP64_OFF64_LDFLAGS 11 +#define _CS_XBS5_LP64_OFF64_LIBS 12 +#define _CS_XBS5_LP64_OFF64_LINTFLAGS 13 + + /* compile,link,lib,lint flags for ILP64 64bit system */ + /* AIX does not currently support this */ +#define _CS_XBS5_LPBIG_OFFBIG_CFLAGS 14 +#define _CS_XBS5_LPBIG_OFFBIG_LDFLAGS 15 +#define _CS_XBS5_LPBIG_OFFBIG_LIBS 16 +#define _CS_XBS5_LPBIG_OFFBIG_LINTFLAGS 17 + +#define _CS_AIX_BOOTDEV 24 +#define _CS_AIX_MODEL_CODE 25 +#define _CS_AIX_ARCHITECTURE 26 +#define _CS_AIX_MODEL_CLASS 40 + +#if (_POSIX_C_SOURCE >= 200112L) +#define _CS_POSIX_V6_ILP32_OFF32_CFLAGS 27 +#define _CS_POSIX_V6_ILP32_OFF32_LDFLAGS 28 +#define _CS_POSIX_V6_ILP32_OFF32_LIBS 29 +#define _CS_POSIX_V6_ILP32_OFFBIG_CFLAGS 30 +#define _CS_POSIX_V6_ILP32_OFFBIG_LDFLAGS 31 +#define _CS_POSIX_V6_ILP32_OFFBIG_LIBS 32 +#define _CS_POSIX_V6_LP64_OFF64_CFLAGS 33 +#define _CS_POSIX_V6_LP64_OFF64_LDFLAGS 34 +#define _CS_POSIX_V6_LP64_OFF64_LIBS 35 +#define _CS_POSIX_V6_LPBIG_OFFBIG_CFLAGS 36 +#define _CS_POSIX_V6_LPBIG_OFFBIG_LDFLAGS 37 +#define _CS_POSIX_V6_LPBIG_OFFBIG_LIBS 38 +#define _CS_POSIX_V6_WIDTH_RESTRICTED_ENVS 39 +#endif + + /* Values for the above */ +#define _CSPATH "/usr/bin:/usr/vac/bin" + + /* ILP32_OFF32 */ +#define _CSPOSIX_V6_ILP32_OFF32_CFLAGS "-q32" +#define _CSXBS5_ILP32_OFF32_CFLAGS _CSPOSIX_V6_ILP32_OFF32_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_ILP32_OFF32_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_ILP32_OFF32_LDFLAGS "-b32" +#define _CSXBS5_ILP32_OFF32_LDFLAGS _CSPOSIX_V6_ILP32_OFF32_LDFLAGS +#endif + +#define _CSPOSIX_V6_ILP32_OFF32_LIBS "-lc -lpthread -lm" +#define _CSXBS5_ILP32_OFF32_LIBS _CSPOSIX_V6_ILP32_OFF32_LIBS + +#define _CSXBS5_ILP32_OFF32_LINTFLAGS "" + + /* ILP32_OFFOFFBIG */ +#define _CSPOSIX_V6_ILP32_OFFBIG_CFLAGS "-q32 -D_LARGE_FILES -qlonglong" +#define _CSXBS5_ILP32_OFFBIG_CFLAGS _CSPOSIX_V6_ILP32_OFFBIG_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_ILP32_OFFBIG_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_ILP32_OFFBIG_LDFLAGS "-b32" +#define _CSXBS5_ILP32_OFFBIG_LDFLAGS _CSPOSIX_V6_ILP32_OFFBIG_LDFLAGS +#endif + +#define _CSPOSIX_V6_ILP32_OFFBIG_LIBS "-lc -lpthread -lm" +#define _CSXBS5_ILP32_OFFBIG_LIBS _CSPOSIX_V6_ILP32_OFFBIG_LIBS + +#define _CSXBS5_ILP32_OFFBIG_LINTFLAGS "-D_LARGE_FILES -qlonglong" + + /* LP64_OFF64 */ +#define _CSPOSIX_V6_LP64_OFF64_CFLAGS "-q64" +#define _CSXBS5_LP64_OFF64_CFLAGS _CSPOSIX_V6_LP64_OFF64_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_LP64_OFF64_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_LP64_OFF64_LDFLAGS "-b64" +#define _CSXBS5_LP64_OFF64_LDFLAGS _CSPOSIX_V6_LP64_OFF64_LDFLAGS +#endif + +#define _CSPOSIX_V6_LP64_OFF64_LIBS "-lc -lpthread -lm" +#define _CSXBS5_LP64_OFF64_LIBS _CSPOSIX_V6_LP64_OFF64_LIBS + +#define _CSXBS5_LP64_OFF64_LINTFLAGS "-D__64BIT__" + + /* LPBIG_OFFBIG */ +#define _CSPOSIX_V6_LPBIG_OFFBIG_CFLAGS "-q64" +#define _CSXBS5_LPBIG_OFFBIG_CFLAGS _CSPOSIX_V6_LPBIG_OFFBIG_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_LPBIG_OFFBIG_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_LPBIG_OFFBIG_LDFLAGS "-b64" +#define _CSXBS5_LPBIG_OFFBIG_LDFLAGS _CSPOSIX_V6_LPBIG_OFFBIG_LDFLAGS +#endif + +#define _CSPOSIX_V6_LPBIG_OFFBIG_LIBS "-lc -lpthread -lm" +#define _CSXBS5_LPBIG_OFFBIG_LIBS _CSPOSIX_V6_LPBIG_OFFBIG_LIBS + +#define _CSXBS5_LPBIG_OFFBIG_LINTFLAGS "-D__64BIT__" + +#if (_POSIX_C_SOURCE >= 200112L) +#define _CSPOSIX_V6_WIDTH_RESTRICTED_ENVS \ + "POSIX_V6_ILP32_OFF32\n" \ + "POSIX_V6_ILP32_OFFBIG\n" \ + "POSIX_V6_LP64_OFF64\n" \ + "POSIX_V6_LPBIG_OFFBIG" +#endif + +/* arguments for the pathconf() function */ + +#define _PC_CHOWN_RESTRICTED 10 +#define _PC_LINK_MAX 11 +#define _PC_MAX_CANON 12 +#define _PC_MAX_INPUT 13 +#define _PC_NAME_MAX 14 +#define _PC_NO_TRUNC 15 +#define _PC_PATH_MAX 16 +#define _PC_PIPE_BUF 17 +#define _PC_VDISABLE 18 +#define _PC_ASYNC_IO 19 +#define _PC_SYNC_IO 20 +#define _PC_PRIO_IO 21 +#define _PC_FILESIZEBITS 22 /* # bits needed to hold offset */ +#define _PC_AIX_DISK_PARTITION 23 +#define _PC_AIX_DISK_SIZE 24 +#if (_POSIX_C_SOURCE >= 200112L) +#define _PC_SYMLINK_MAX 25 +#define _PC_ALLOC_SIZE_MIN 26 +#define _PC_REC_INCR_XFER_SIZE 27 +#define _PC_REC_MAX_XFER_SIZE 28 +#define _PC_REC_MIN_XFER_SIZE 29 +#define _PC_REC_XFER_ALIGN 30 +#define _PC_2_SYMLINKS 31 +#endif + +/* arguments for the sysconf() function, the defined numbers are used as + * array index in sysconf(). + * + * POSIX.1(1990), Table 4-2 + */ +#define _SC_ARG_MAX 0 +#define _SC_CHILD_MAX 1 +#define _SC_CLK_TCK 2 +#define _SC_NGROUPS_MAX 3 +#define _SC_OPEN_MAX 4 +#define _SC_STREAM_MAX 5 +#define _SC_TZNAME_MAX 6 +#define _SC_JOB_CONTROL 7 +#define _SC_SAVED_IDS 8 +#define _SC_VERSION 9 + +/* POSIX.1(1990), Table 2-3, required by command getconf */ + +#define _SC_POSIX_ARG_MAX 10 +#define _SC_POSIX_CHILD_MAX 11 +#define _SC_POSIX_LINK_MAX 12 +#define _SC_POSIX_MAX_CANON 13 +#define _SC_POSIX_MAX_INPUT 14 +#define _SC_POSIX_NAME_MAX 15 +#define _SC_POSIX_NGROUPS_MAX 16 +#define _SC_POSIX_OPEN_MAX 17 +#define _SC_POSIX_PATH_MAX 18 +#define _SC_POSIX_PIPE_BUF 19 +#define _SC_POSIX_SSIZE_MAX 20 +#define _SC_POSIX_STREAM_MAX 21 +#define _SC_POSIX_TZNAME_MAX 22 + +/* POSIX.2 (Draft 10), Table 41) */ + +#define _SC_BC_BASE_MAX 23 +#define _SC_BC_DIM_MAX 24 +#define _SC_BC_SCALE_MAX 25 +#define _SC_BC_STRING_MAX 26 +#define _SC_EQUIV_CLASS_MAX 27 +#define _SC_EXPR_NEST_MAX 28 +#define _SC_LINE_MAX 29 +#define _SC_RE_DUP_MAX 30 +#define _SC_2_VERSION 31 +#define _SC_2_C_DEV 32 +#define _SC_2_FORT_DEV 33 +#define _SC_2_FORT_RUN 34 +#define _SC_2_LOCALEDEF 35 +#define _SC_2_SW_DEV 36 + +/* POSIX.2 (Draft 10), Table 13) */ + +#define _SC_POSIX2_BC_BASE_MAX 37 +#define _SC_POSIX2_BC_DIM_MAX 38 +#define _SC_POSIX2_BC_SCALE_MAX 39 +#define _SC_POSIX2_BC_STRING_MAX 40 +#define _SC_POSIX2_EQUIV_CLASS_MAX 41 +#define _SC_POSIX2_EXPR_NEST_MAX 42 +#define _SC_POSIX2_LINE_MAX 43 +#define _SC_POSIX2_RE_DUP_MAX 44 +#define _SC_PASS_MAX 45 +#define _SC_XOPEN_VERSION 46 +#define _SC_ATEXIT_MAX 47 +#if _XOPEN_SOURCE_EXTENDED==1 +#define _SC_PAGE_SIZE 48 +#endif /* _XOPEN_SOURCE_EXTENDED */ +#define _SC_AES_OS_VERSION 49 +#define _SC_COLL_WEIGHTS_MAX 50 +#define _SC_2_C_BIND 51 +#define _SC_2_C_VERSION 52 +#define _SC_2_UPE 53 +#define _SC_2_CHAR_TERM 54 +#define _SC_XOPEN_SHM 55 +#define _SC_XOPEN_CRYPT 56 +#define _SC_XOPEN_ENH_I18N 57 +#if _XOPEN_SOURCE_EXTENDED==1 +#define _SC_PAGESIZE _SC_PAGE_SIZE +#define _SC_IOV_MAX 58 +#endif /* _XOPEN_SOURCE_EXTENDED */ +#define _SC_THREAD_SAFE_FUNCTIONS 59 +#define _SC_THREADS 60 +#define _SC_THREAD_ATTR_STACKADDR 61 +#define _SC_THREAD_ATTR_STACKSIZE 62 +#define _SC_THREAD_FORKALL 63 +#define _SC_THREAD_PRIORITY_SCHEDULING 64 +#define _SC_THREAD_PRIO_INHERIT 65 +#define _SC_THREAD_PRIO_PROTECT 66 +#define _SC_THREAD_PROCESS_SHARED 67 +#define _SC_THREAD_KEYS_MAX 68 +#define _SC_THREAD_DATAKEYS_MAX _SC_THREAD_KEYS_MAX +#define _SC_THREAD_STACK_MIN 69 +#define _SC_THREAD_THREADS_MAX 70 +#ifdef _ALL_SOURCE +#define _SC_NPROCESSORS_CONF 71 +#define _SC_NPROCESSORS_ONLN 72 +#endif /* _ALL_SOURCE */ +#define _SC_XOPEN_UNIX 73 + +#if (_XOPEN_SOURCE >= 500) + +/* POSIX 1003.1c and XPG UNIX98 */ +/* look to defines above for meanings */ +#define _SC_AIO_LISTIO_MAX 75 +#define _SC_AIO_MAX 76 +#define _SC_AIO_PRIO_DELTA_MAX 77 +#define _SC_ASYNCHRONOUS_IO 78 +#define _SC_DELAYTIMER_MAX 79 +#define _SC_FSYNC 80 +#define _SC_GETGR_R_SIZE_MAX 81 +#define _SC_GETPW_R_SIZE_MAX 82 +#define _SC_LOGIN_NAME_MAX 83 +#define _SC_MAPPED_FILES 84 +#define _SC_MEMLOCK 85 +#define _SC_MEMLOCK_RANGE 86 +#define _SC_MEMORY_PROTECTION 87 +#define _SC_MESSAGE_PASSING 88 +#define _SC_MQ_OPEN_MAX 89 +#define _SC_MQ_PRIO_MAX 90 +#define _SC_PRIORITIZED_IO 91 +#define _SC_PRIORITY_SCHEDULING 92 +#define _SC_REALTIME_SIGNALS 93 +#define _SC_RTSIG_MAX 94 +#define _SC_SEMAPHORES 95 +#define _SC_SEM_NSEMS_MAX 96 +#define _SC_SEM_VALUE_MAX 97 +#define _SC_SHARED_MEMORY_OBJECTS 98 +#define _SC_SIGQUEUE_MAX 99 +#define _SC_SYNCHRONIZED_IO 100 +#define _SC_THREAD_DESTRUCTOR_ITERATIONS 101 +#define _SC_TIMERS 102 +#define _SC_TIMER_MAX 103 +#define _SC_TTY_NAME_MAX 104 +#define _SC_XBS5_ILP32_OFF32 105 +#define _SC_XBS5_ILP32_OFFBIG 106 +#define _SC_XBS5_LP64_OFF64 107 +#define _SC_XBS5_LPBIG_OFFBIG 108 +#define _SC_XOPEN_XCU_VERSION 109 +#define _SC_XOPEN_REALTIME 110 +#define _SC_XOPEN_REALTIME_THREADS 111 +#define _SC_XOPEN_LEGACY 112 +#endif /* _XOPEN_SOURCE >= 500 */ + +#ifdef _ALL_SOURCE +#define _SC_REENTRANT_FUNCTIONS _SC_THREAD_SAFE_FUNCTIONS +#define _SC_PHYS_PAGES 113 +#define _SC_AVPHYS_PAGES 114 +#define _SC_LPAR_ENABLED 115 +#define _SC_LARGE_PAGESIZE 116 +#endif /* _ALL_SOURCE */ + +#define _SC_AIX_KERNEL_BITMODE 117 +#define _SC_AIX_REALMEM 118 +#define _SC_AIX_HARDWARE_BITMODE 119 +#define _SC_AIX_MP_CAPABLE 120 + +#define _SC_V6_ILP32_OFF32 121 +#define _SC_V6_ILP32_OFFBIG 122 +#define _SC_V6_LP64_OFF64 123 +#define _SC_V6_LPBIG_OFFBIG 124 + +#define _SC_XOPEN_STREAMS 125 + +#if (_POSIX_C_SOURCE >= 200112L) +#define _SC_HOST_NAME_MAX 126 +#define _SC_REGEXP 127 +#define _SC_SHELL 128 +#define _SC_SYMLOOP_MAX 129 +#define _SC_ADVISORY_INFO 130 +#define _SC_FILE_LOCKING 131 +#define _SC_2_PBS 132 +#define _SC_2_PBS_ACCOUNTING 133 +#define _SC_2_PBS_CHECKPOINT 134 +#define _SC_2_PBS_LOCATE 135 +#define _SC_2_PBS_MESSAGE 136 +#define _SC_2_PBS_TRACK 137 +#define _SC_BARRIERS 138 +#define _SC_CLOCK_SELECTION 139 +#define _SC_CPUTIME 140 +#define _SC_MONOTONIC_CLOCK 141 +#define _SC_READER_WRITER_LOCKS 142 +#define _SC_SPAWN 143 +#define _SC_SPIN_LOCKS 144 +#define _SC_SPORADIC_SERVER 145 +#define _SC_THREAD_CPUTIME 146 +#define _SC_THREAD_SPORADIC_SERVER 147 +#define _SC_TIMEOUTS 148 +#define _SC_TRACE 149 +#define _SC_TRACE_EVENT_FILTER 150 +#define _SC_TRACE_INHERIT 151 +#define _SC_TRACE_LOG 152 +#define _SC_TYPED_MEMORY_OBJECTS 153 +#define _SC_IPV6 154 +#define _SC_RAW_SOCKETS 155 +#define _SC_SS_REPL_MAX 156 +#define _SC_TRACE_EVENT_NAME_MAX 157 +#define _SC_TRACE_NAME_MAX 158 +#define _SC_TRACE_SYS_MAX 159 +#define _SC_TRACE_USER_EVENT_MAX 160 +#endif /* _POSIX_C_SOURCE >= 200112L */ + +#ifdef _ALL_SOURCE +#define _SC_AIX_UKEYS 161 +#endif /* _ALL_SOURCE */ + +#endif /* _POSIX_SOURCE */ + + +#if _XOPEN_SOURCE_EXTENDED==1 +#ifdef _LARGE_FILES +#define ftruncate ftruncate64 +#define truncate truncate64 +#endif + +#ifndef _H_LOCKF +#include /* lockf definitions for portability */ +#endif + +#ifdef _NO_PROTO +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern int brk(); + extern int getpagesize(); +#ifndef _MSGQSUPPORT + extern int __fd_getdtablesize(); + static int getdtablesize() + { + return __fd_getdtablesize(); + } +#else + extern int getdtablesize(); +#endif /* _MSGQSUPPORT */ + + extern void *sbrk(); +#endif /* _POSIX_C_SOURCE<200112L */ + extern int fchdir(); + extern int fchown(); + extern int ftruncate(); + extern long gethostid(); + extern int gethostname(); + extern pid_t getpgid(); + extern pid_t getsid(); + extern char *getwd(); + extern int lchown(); + extern int readlink(); + extern pid_t setpgrp(); + extern int setregid(); + extern int setreuid(); + extern int symlink(); + extern void sync(); + extern int truncate(); + extern useconds_t ualarm(); + extern int usleep(); + extern pid_t vfork(); +#else /* _NO_PROTO */ +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern int brk(void *); + extern int getpagesize(void); +#ifndef _MSGQSUPPORT + extern int __fd_getdtablesize(void); + static int getdtablesize() + { + return __fd_getdtablesize(); + } +#else + extern int getdtablesize(void); +#endif /* _MSGQSUPPORT */ +#ifdef _LINUX_SOURCE_COMPAT + extern void *sbrk(ptrdiff_t); +#elif (_XOPEN_SOURCE >= 500) || defined(__64BIT__) + extern void *sbrk(intptr_t); +#else + extern void *sbrk(int); +#endif +#endif /* _POSIX_C_SOURCE<200112L */ + extern int fchdir(int); + extern int fchown(int, uid_t, gid_t); + extern int ftruncate(int, off_t); +#ifdef _LARGE_FILE_API + extern int ftruncate64(int, off64_t); +#endif + extern int gethostname(char *, size_t); + extern long gethostid(void); + extern pid_t getpgid(pid_t); + extern pid_t getsid(pid_t); + extern char *getwd(char *); + extern int lchown(const char *, uid_t, gid_t); + +#if (defined(_SUSV3_READLINK) || \ + (!defined(_ALL_SOURCE) && (_POSIX_C_SOURCE >= 200112L))) + /* If SUSV3 readlink specifically requested or if strict SUSv3 + * environment requested */ +#ifdef __64BIT__ +static ssize_t readlink(const char *__restrict__ __path, + char *__restrict__ __buf, size_t __bufsize) +{ + extern ssize_t __readlink64(const char *__restrict__, char *__restrict__, size_t); + return __readlink64(__path, __buf, __bufsize); +} +#else + extern ssize_t readlink(const char *__restrict__, char *__restrict__, size_t); +#endif /* __64BIT__ */ +#else + extern int readlink(const char *, char *, size_t); +#endif /* _SUSV3_READLINK || !_ALL_SOURCE && _POSIX_C_SOURCE >= 200112L */ + +#ifndef _BSD + extern pid_t setpgrp(void); +#endif /* _BSD */ + extern int setregid(gid_t, gid_t); + extern int setreuid(uid_t, uid_t); + extern int symlink(const char *, const char *); + extern void sync(void); + extern int truncate(const char *, off_t); +#ifdef _LARGE_FILE_API + extern int truncate64(const char *, off64_t); +#endif + extern useconds_t ualarm(useconds_t, useconds_t); + extern int usleep(useconds_t); + extern pid_t vfork(void); +#if _XOPEN_SOURCE>=500 + extern int getlogin_r(char *, size_t); + extern int ttyname_r(int, char *, size_t); + +#ifdef _LARGE_FILES +#define pread pread64 +#define pwrite pwrite64 +#endif /* _LARGE_FILES */ + + extern ssize_t pread(int, void *, size_t, off_t); + extern ssize_t pwrite(int, const void *, size_t, off_t); +#ifdef _LARGE_FILE_API + extern ssize_t pread64(int, void *, size_t, off64_t); + extern ssize_t pwrite64(int, const void *, size_t, off64_t); +#endif /* _LARGE_FILE_API */ +#endif /* _XOPEN_SOURCE>=500 */ + +#endif /* _NO_PROTO */ + +#endif /* _XOPEN_SOURCE_EXTENDED */ + +#ifdef _ALL_SOURCE + +extern char **environ; + +#ifndef _KERNEL +#ifdef _NO_PROTO + extern pid_t f_fork(); +#else /* _NO_PROTO */ + extern pid_t f_fork(void); +#endif /* _NO_PROTO */ +#endif /* _KERNEL */ + +#ifdef _NO_PROTO + extern char * cuserid(); + extern int ioctl(); +#ifdef __64BIT__ + extern int ioctlx(); + extern int ioctl32(); + extern int ioctl32x(); +#endif /* __64BIT__ */ + extern int readx(); + extern int setgroups(); + extern int writex(); + extern int setegid(); + extern int seteuid(); + extern int setrgid(); + extern int setruid(); + extern offset_t llseek(); + extern char * getusershell(); + extern void setusershell(); + extern void endusershell(); + extern char * get_current_dir_name(); + extern int sysfs(); +#else + extern char * cuserid(char *); + extern int setegid(gid_t); + extern int seteuid(uid_t); + extern int setrgid(gid_t); + extern int setruid(uid_t); +#ifndef _BSD + extern int ioctl(int, int, ...); +#endif /* _BSD */ +#ifdef __64BIT__ + extern int ioctlx(int, int, void *, long); + extern int ioctl32(int, int, ...); + extern int ioctl32x(int, int, unsigned int, unsigned int); +#endif /* __64BIT__ */ + extern int setgroups(int, gid_t []); +#ifndef _KERNEL + extern int readx(int, char*, unsigned, long); + extern int writex(int, char*, unsigned, long); + +#ifdef _LARGE_FILES +#define fclear fclear64 +#define fsync_range fsync_range64 +#endif + extern off_t fclear(int, off_t); + extern int fsync_range(int, int, off_t, off_t); +#ifdef _LARGE_FILE_API + extern off64_t fclear64(int, off64_t); + extern int fsync_range64(int, int, off64_t, off64_t); +#endif + extern offset_t llseek(int, offset_t, int); + extern char * getusershell(void); + extern void setusershell(void); + extern void endusershell(void); + extern char * get_current_dir_name(void); + extern int sysfs(int, ...); + extern int finfo(const char *, int, void *, int32long64_t); + extern int ffinfo(int, int, void *, int32long64_t); + +#endif /* ndef _KERNEL */ + +#endif /* _NO_PROTO */ + +#define _AES_OS_VERSION 1 /* OSF, AES version */ + +#endif /* _ALL_SOURCE */ + +#ifdef __cplusplus +} +#endif + +#endif /* _H_UNISTD */ diff --git a/workloads/realworld/pinned/knn/Makefile b/workloads/realworld/pinned/knn/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..6ebd397ea3d2a2082eb070de41b7f1eb452dbf53 --- /dev/null +++ b/workloads/realworld/pinned/knn/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := knn +CUFILES := knn_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o knn + diff --git a/workloads/realworld/pinned/knn/knn b/workloads/realworld/pinned/knn/knn new file mode 100755 index 0000000000000000000000000000000000000000..5343d68d55c9db620cf8318318757a4e91ff42b0 Binary files /dev/null and b/workloads/realworld/pinned/knn/knn differ diff --git a/workloads/realworld/pinned/knn/knn_cuda.cu b/workloads/realworld/pinned/knn/knn_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..d67babcdd7236c2568843c634353bd0087928397 --- /dev/null +++ b/workloads/realworld/pinned/knn/knn_cuda.cu @@ -0,0 +1,588 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +int ref_nb = 4096; // Reference point number, max=65535 +int query_nb = 4096; // Query point number, max=65535 +int dim = 128; // Dimension of points +int k = 20; // Nearest neighbors to consider +int iterations = 100; + +//-----------------------------------------------------------------------------------------------// +// KERNELS // +//-----------------------------------------------------------------------------------------------// +__global__ void extract_with_interpolation(int nthreads, float *data, + float *n_xy_coords, + float *extracted_data, + int n_max_coord, int channels, + int height, int width) { + + int x0, x1, y0, y1, nc; + float wx0, wx1, wy0, wy1; + int n, nd; + float x, y; + + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); + index += blockDim.x * gridDim.x) { + n = (index / n_max_coord); + nd = n * n_max_coord * channels; + x = n_xy_coords[index * 2]; + y = n_xy_coords[index * 2 + 1]; + + x0 = static_cast(floor(x)); + x1 = x0 + 1; + y0 = static_cast(floor(y)); + y1 = y0 + 1; + + x0 = x0 <= 0 ? 0 : (x0 >= (width - 1) ? (width - 1) : x0); + y0 = y0 <= 0 ? 0 : (y0 >= (height - 1) ? (height - 1) : y0); + x1 = x1 <= 0 ? 0 : (x1 >= (width - 1) ? (width - 1) : x1); + y1 = y1 <= 0 ? 0 : (y1 >= (height - 1) ? (height - 1) : y1); + + wx0 = static_cast(x1) - x; + wx1 = x - x0; + wy0 = static_cast(y1) - y; + wy1 = y - y0; + + if (x0 == x1) { + wx0 = 1; + wx1 = 0; + } + if (y0 == y1) { + wy0 = 1; + wy1 = 0; + } + for (int c = 0; c < channels; c++) { + nc = (n * channels + c) * height; + // extracted_data[index * channels + c] = wy0 * wx0 * data[(nc + y0) * + // width + x0] + // extracted_data[nd + index % n_max_coord + n_max_coord * c] = index; + extracted_data[nd + index % n_max_coord + n_max_coord * c] = + wy0 * wx0 * data[(nc + y0) * width + x0] + + wy1 * wx0 * data[(nc + y1) * width + x0] + + wy0 * wx1 * data[(nc + y0) * width + x1] + + wy1 * wx1 * data[(nc + y1) * width + x1]; + } + } +} + +/** + * Computes the distance between two matrix A (reference points) and + * B (query points) containing respectively wA and wB points. + * + * @param A pointer on the matrix A + * @param wA width of the matrix A = number of points in A + * @param B pointer on the matrix B + * @param wB width of the matrix B = number of points in B + * @param dim dimension of points = height of matrices A and B + * @param AB pointer on the matrix containing the wA*wB distances computed + */ +__global__ void cuComputeDistanceGlobal(float *A, int wA, float *B, int wB, + int dim, float *AB) { + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // Declaration of the shared memory arrays As and Bs used to store the + // sub-matrix of A and B + __shared__ float shared_A[BLOCK_DIM][BLOCK_DIM]; + __shared__ float shared_B[BLOCK_DIM][BLOCK_DIM]; + + // Sub-matrix of A (begin, step, end) and Sub-matrix of B (begin, step) + __shared__ int begin_A; + __shared__ int begin_B; + __shared__ int step_A; + __shared__ int step_B; + __shared__ int end_A; + + // Thread index + int tx = threadIdx.x; + int ty = threadIdx.y; + + // Other variables + float tmp; + float ssd = 0; + + // Loop parameters + begin_A = BLOCK_DIM * blockIdx.y; + begin_B = BLOCK_DIM * blockIdx.x; + step_A = BLOCK_DIM * wA; + step_B = BLOCK_DIM * wB; + end_A = begin_A + (dim - 1) * wA; + + // if (blockIdx.x == 0 && blockIdx.y == 0 && tx == 0 && ty == 0) + // printf("begin_A is %d, end_A is %d, step_A is %d, begin_B is %d, step_B is %d\n", begin_A, end_A, step_A, begin_B, step_B); + + // Conditions + int cond0 = (begin_A + tx < wA); // used to write in shared memory + int cond1 = (begin_B + tx < wB); // used to write in shared memory & to + // computations and to write in output matrix + int cond2 = + (begin_A + ty < wA); // used to computations and to write in output matrix + // Loop over all the sub-matrices of A and B required to compute the block + // sub-matrix + for (int a = begin_A, b = begin_B; a <= end_A; a += step_A, b += step_B) { + // if (blockIdx.x == 0 && blockIdx.y == 0 && tx == 0 && ty == 0) + // printf("a is %d, end_A is %d, step_A is %d, b is %d, step_B is %d\n", begin_A, end_A, step_A, begin_B, step_B); + // Load the matrices from device memory to shared memory; each thread loads + // one element of each matrix + if (a / wA + ty < dim) { + shared_A[ty][tx] = (cond0) ? A[a + wA * ty + tx] : 0; + shared_B[ty][tx] = (cond1) ? B[b + wB * ty + tx] : 0; + } else { + shared_A[ty][tx] = 0; + shared_B[ty][tx] = 0; + } + + // Synchronize to make sure the matrices are loaded + block.sync(); + // Compute the difference between the two matrixes; each thread computes one + // element of the block sub-matrix + if (cond2 && cond1) { + for (int k = 0; k < BLOCK_DIM; ++k) { + tmp = shared_A[k][ty] - shared_B[k][tx]; + ssd += tmp * tmp; + } + } + + // Synchronize to make sure that the preceding computation is done before + // loading two new sub-matrices of A and B in the next iteration + block.sync(); + } + + // Write the block sub-matrix to device memory; each thread writes one element + if (cond2 && cond1) { + AB[(begin_A + ty) * wB + begin_B + tx] = ssd; + } +} + +/** + * Gathers k-th smallest distances for each column of the distance matrix in + * the top. + * + * @param dist distance matrix + * @param ind index matrix + * @param width width of the distance matrix and of the index matrix + * @param height height of the distance matrix and of the index matrix + * @param k number of neighbors to consider + */ +__global__ void cuInsertionSort(float *dist, int *ind, int width, int height, + int k) { + // printf("test2\n"); + // Variables + int l, i, j; + float *p_dist; + int *p_ind; + float curr_dist, max_dist; + int curr_row, max_row; + unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x; + + if (xIndex < width) { + // Pointer shift, initialization, and max value + p_dist = dist + xIndex; + p_ind = ind + xIndex; + max_dist = p_dist[0]; + p_ind[0] = 0; + + // Part 1 : sort kth firt elementZ + for (l = 1; l < k; l++) { + curr_row = l * width; + curr_dist = p_dist[curr_row]; + if (curr_dist < max_dist) { + i = l - 1; + for (int a = 0; a < l - 1; a++) { + if (p_dist[a * width] > curr_dist) { + i = a; + break; + } + } + for (j = l; j > i; j--) { + p_dist[j * width] = p_dist[(j - 1) * width]; + p_ind[j * width] = p_ind[(j - 1) * width]; + } + p_dist[i * width] = curr_dist; + p_ind[i * width] = l; + } else { + p_ind[l * width] = l; + } + max_dist = p_dist[curr_row]; + } + + // Part 2 : insert element in the k-th first lines + max_row = (k - 1) * width; + for (l = k; l < height; l++) { + curr_dist = p_dist[l * width]; + if (curr_dist < max_dist) { + i = k - 1; + for (int a = 0; a < k - 1; a++) { + if (p_dist[a * width] > curr_dist) { + i = a; + break; + } + } + for (j = k - 1; j > i; j--) { + p_dist[j * width] = p_dist[(j - 1) * width]; + p_ind[j * width] = p_ind[(j - 1) * width]; + } + p_dist[i * width] = curr_dist; + p_ind[i * width] = l; + max_dist = p_dist[max_row]; + } + } + } +} + +/** + * Computes the square root of the first line (width-th first element) + * of the distance matrix. + * + * @param dist distance matrix + * @param width width of the distance matrix + * @param k number of neighbors to consider + */ +__global__ void cuParallelSqrt(float *dist, int width, int k) { + unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int yIndex = blockIdx.y * blockDim.y + threadIdx.y; + // printf("test3\n"); + if (xIndex < width && yIndex < k) + dist[yIndex * width + xIndex] = sqrt(dist[yIndex * width + xIndex]); +} + +//-----------------------------------------------------------------------------------------------// +// K-th NEAREST NEIGHBORS // +//-----------------------------------------------------------------------------------------------// + +/** + * Prints the error message return during the memory allocation. + * + * @param error error value return by the memory allocation function + * @param memorySize size of memory tried to be allocated + */ +void printErrorMessage(cudaError_t error, int memorySize) { + printf("==================================================\n"); + printf("MEMORY ALLOCATION ERROR : %s\n", cudaGetErrorString(error)); + printf("Whished allocated memory : %d\n", memorySize); + printf("==================================================\n"); +} + +/** + * K nearest neighbor algorithm + * - Initialize CUDA + * - Allocate device memory + * - Copy point sets (reference and query points) from host to device memory + * - Compute the distances + indexes to the k nearest neighbors for each query + * point + * - Copy distances from device to host memory + * + * @param ref_host reference points ; pointer to linear matrix + * @param ref_width number of reference points ; width of the matrix + * @param query_host query points ; pointer to linear matrix + * @param query_width number of query points ; width of the matrix + * @param height dimension of points ; height of the matrices + * @param k number of neighbor to consider + * @param dist_host distances to k nearest neighbors ; pointer to linear + * matrix + * @param dist_host indexes of the k nearest neighbors ; pointer to linear + * matrix + * + */ +// void knn_cuda(float *ref_host, int ref_width, float *query_host, +// int query_width, int height, int k, float *dist_host, +// int *ind_host) { +void knn_cuda(float *ref_device, int ref_width, float *query_device, + int query_width, int height, int k, float *dist_device, + int *ind_device) { + // Grids ans threads + dim3 g_16x16(query_width / 16, ref_width / 16, 1); + dim3 t_16x16(16, 16, 1); + if (query_width % 16 != 0) + g_16x16.x += 1; + if (ref_width % 16 != 0) + g_16x16.y += 1; + // + dim3 g_256x1(query_width / 256, 1, 1); + dim3 t_256x1(256, 1, 1); + if (query_width % 256 != 0) + g_256x1.x += 1; + + dim3 g_k_16x16(query_width / 16, k / 16, 1); + dim3 t_k_16x16(16, 16, 1); + if (query_width % 16 != 0) + g_k_16x16.x += 1; + if (k % 16 != 0) + g_k_16x16.y += 1; + + // printf("ref_width is %d, query_width is %d, height is %d\n", ref_width, query_width, height); + + // Kernel 1: Compute all the distances + cuComputeDistanceGlobal<<>>(ref_device, ref_width, query_device, + query_width, height, dist_device); + // Kernel 2: Sort each column + cuInsertionSort<<>>(dist_device, ind_device, query_width, + ref_width, k); + // Kernel 3: Compute square root of k first elements + cuParallelSqrt<<>>(dist_device, query_width, k); + cudaDeviceSynchronize(); +} + +float compute_distance(const float *ref, int ref_nb, const float *query, + int query_nb, int dim, int ref_index, int query_index) { + float sum = 0.f; + for (int d = 0; d < dim; ++d) { + const float diff = + ref[d * ref_nb + ref_index] - query[d * query_nb + query_index]; + sum += diff * diff; + } + return sqrtf(sum); +} + +void modified_insertion_sort(float *dist, int *index, int length, int k) { + + // Initialise the first index + index[0] = 0; + + // Go through all points + for (int i = 1; i < length; ++i) { + + // Store current distance and associated index + float curr_dist = dist[i]; + int curr_index = i; + + // Skip the current value if its index is >= k and if it's higher the k-th + // slready sorted mallest value + if (i >= k && curr_dist >= dist[k - 1]) { + continue; + } + + // Shift values (and indexes) higher that the current distance to the right + int j = min(i, k - 1); + while (j > 0 && dist[j - 1] > curr_dist) { + dist[j] = dist[j - 1]; + index[j] = index[j - 1]; + --j; + } + + // Write the current distance and index at their position + dist[j] = curr_dist; + index[j] = curr_index; + } +} + +bool knn_c(const float *ref, int ref_nb, const float *query, int query_nb, + int dim, int k, float *knn_dist, int *knn_index) { + // Allocate local array to store all the distances / indexes for a given query + // point + float *dist = (float *)malloc(ref_nb * sizeof(float)); + int *index = (int *)malloc(ref_nb * sizeof(int)); + + // Allocation checks + if (!dist || !index) { + printf("Memory allocation error\n"); + free(dist); + free(index); + return false; + } + + // Process one query point at the time + for (int i = 0; i < query_nb; ++i) { + + // Compute all distances / indexes + for (int j = 0; j < ref_nb; ++j) { + dist[j] = compute_distance(ref, ref_nb, query, query_nb, dim, j, i); + index[j] = j; + } + + // Sort distances / indexes + modified_insertion_sort(dist, index, ref_nb, k); + + // Copy k smallest distances and their associated index + for (int j = 0; j < k; ++j) { + knn_dist[j * query_nb + i] = dist[j]; + knn_index[j * query_nb + i] = index[j]; + } + } + + // Memory clean-up + free(dist); + free(index); + return true; +} + +/** + * Example of use of kNN search CUDA. + */ +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + // Variables and parameters + float *ref; // Pointer to reference point array + float *query; // Pointer to query point array + float *dist, *dist_c; // Pointer to distance array + int *ind, *ind_c; // Pointer to index array + + + if (argc >= 4) { + ref_nb = atoi(argv[1]); + query_nb = atoi(argv[2]); + dim = atoi(argv[3]); + } + + int c_iterations = 10; + int i; + const float precision = 0.001f; // distance error max + int nb_correct_precisions = 0; + int nb_correct_indexes = 0; + float *knn_dist = (float *)malloc(query_nb * k * sizeof(float)); + int *knn_index = (int *)malloc(query_nb * k * sizeof(int)); + + // Memory allocation + ref = (float *)malloc(ref_nb * dim * sizeof(float)); + query = (float *)malloc(query_nb * dim * sizeof(float)); + dist = (float *)malloc(query_nb * ref_nb * sizeof(float)); + ind = (int *)malloc(query_nb * k * sizeof(int)); + // cudaMallocManaged(&ref, ref_nb * dim * sizeof(float)); + // cudaMallocManaged(&query, query_nb * dim * sizeof(float)); + // cudaMallocManaged(&dist, query_nb * ref_nb * sizeof(float)); + // cudaMallocManaged(&ind, query_nb * k * sizeof(int)); + dist_c = (float *)malloc(query_nb * k * sizeof(float)); + ind_c = (int *)malloc(query_nb * k * sizeof(float)); + + // Init + srand(time(NULL)); + for (i = 0; i < ref_nb * dim; i++) + ref[i] = (float)rand() / (float)RAND_MAX; + for (i = 0; i < query_nb * dim; i++) + query[i] = (float)rand() / (float)RAND_MAX; + + // printf("Ground truth computation in progress...\n\n"); + // if (!knn_c(ref, ref_nb, query, query_nb, dim, k, knn_dist, knn_index)) { + // free(knn_dist); + // free(knn_index); + // return EXIT_FAILURE; + // } + + // Variables for duration evaluation + float elapsed_time; + + // Display informations + printf("Number of reference points : %6d\n", ref_nb); + printf("Number of query points : %6d\n", query_nb); + printf("Dimension of points : %4d\n", dim); + printf("Number of neighbors to consider : %4d\n", k); + printf("Processing kNN search :\n"); + + float precision_accuracy = 0.0f; + float index_accuracy = 0.0f; + /* + printf("On CPU: \n"); + struct timeval tic; + gettimeofday(&tic, NULL); + for (i = 0; i < c_iterations; i++) { + knn_c(ref, ref_nb, query, query_nb, dim, k, dist_c, ind_c); + } + + for (int i = 0; i < query_nb * k; ++i) { + if (fabs(dist_c[i] - knn_dist[i]) <= precision) { + nb_correct_precisions++; + } + if (ind_c[i] == knn_index[i]) { + nb_correct_indexes++; + } + } + + struct timeval toc; + gettimeofday(&toc, NULL); + elapsed_time = toc.tv_sec - tic.tv_sec; + elapsed_time += (toc.tv_usec - tic.tv_usec) / 1000000.; + precision_accuracy = nb_correct_precisions / ((float)query_nb * k); + index_accuracy = nb_correct_indexes / ((float)query_nb * k); + printf("%f, %f\n", precision_accuracy, index_accuracy); + printf(" done in %f s for %d iterations (%f s by iteration)\n", elapsed_time, + c_iterations, elapsed_time / (c_iterations)); + */ + printf("on GPU: \n"); + + // Call kNN search CUDA + GPU_argv_init(); + + initTrace(); + startCPU(); + + float *ref_device; + float *query_device; + float *dist_device; + int *ind_device; + + cudaMalloc(&ref_device, ref_nb * dim * sizeof(float)); + cudaMalloc(&query_device, query_nb * dim * sizeof(float)); + cudaMalloc(&dist_device, query_nb * ref_nb * sizeof(float)); + cudaMalloc(&ind_device, query_nb * k * sizeof(int)); + + cudaMemcpy(ref_device, ref, ref_nb * dim * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(query_device, query, query_nb * dim * sizeof(float), cudaMemcpyHostToDevice); + + for (i = 0; i < iterations; i++) { + // knn_cuda(ref, ref_nb, query, query_nb, dim, k, dist, ind); + knn_cuda(ref_device, ref_nb, query_device, query_nb, dim, k, dist_device, ind_device); + } + + cudaMemcpy(dist, dist_device, query_nb * ref_nb * sizeof(float), cudaMemcpyDeviceToHost); + cudaMemcpy(ind, ind_device, query_nb * k * sizeof(int), cudaMemcpyDeviceToHost); + + cudaFree(ind_device); + cudaFree(dist_device); + cudaFree(query_device); + cudaFree(ref_device); + + endCPU(); + finiTrace(); + + nb_correct_precisions = 0; + nb_correct_indexes = 0; + for (int i = 0; i < query_nb * k; ++i) { + if (fabs(dist[i] - knn_dist[i]) <= precision) { + nb_correct_precisions++; + } + if (ind[i] == knn_index[i]) { + nb_correct_indexes++; + } + } + + precision_accuracy = nb_correct_precisions / ((float)query_nb * k); + index_accuracy = nb_correct_indexes / ((float)query_nb * k); + printf("%f, %f\n", precision_accuracy, index_accuracy); + + // Destroy cuda event object and free memory + // cudaFree(ind); + // cudaFree(dist); + // cudaFree(query); + // cudaFree(ref); + free(ind); + free(dist); + free(query); + free(ref); + free(dist_c); + free(ind_c); +} \ No newline at end of file diff --git a/workloads/realworld/pinned/knn/run.sh b/workloads/realworld/pinned/knn/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..541db1387ce3ebe87b1338f079609b8b4a2736c6 --- /dev/null +++ b/workloads/realworld/pinned/knn/run.sh @@ -0,0 +1 @@ +./knn 4096 4096 128 \ No newline at end of file diff --git a/workloads/realworld/pinned/knn/run_super.sh b/workloads/realworld/pinned/knn/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..86ad9321b470072e5e84e706e1619ee200cf2b31 --- /dev/null +++ b/workloads/realworld/pinned/knn/run_super.sh @@ -0,0 +1 @@ +./knn 32768 32768 128 \ No newline at end of file diff --git a/workloads/realworld/pinned/lavaMD/README b/workloads/realworld/pinned/lavaMD/README new file mode 100755 index 0000000000000000000000000000000000000000..27b526ff669e9632b11193634307bfe778a2dfff --- /dev/null +++ b/workloads/realworld/pinned/lavaMD/README @@ -0,0 +1,50 @@ +//======================================================================================================================================================150 +// DESCRIPTION +//======================================================================================================================================================150 + +This is the CUDA version of the code. + +The code calculates particle potential and relocation due to mutual forces between particles within a large 3D space. This space is +divided into cubes, or large boxes, that are allocated to individual cluster nodes. The large box at each node is further divided into +cubes, called boxes. 26 neighbor boxes surround each box (the home box). Home boxes at the boundaries of the particle space have fewer neighbors. +Particles only interact with those other particles that are within a cutoff radius since ones at larger distances exert negligible forces. Thus the +box size s chosen so that cutoff radius does not span beyond any neighbor box for any particle in a home box, thus limiting the reference space to +a finite number of boxes. + +This code [1] was derived from the ddcMD application [2] by rewriting the front end and structuring it for parallelization. This code represents MPI +task that runs on a single cluster node. While the details of the code are somewhat different than the original, the code retains the structure of the +MPI task in the original code. Since the rest of MPI code is not included here, the application first emulates MPI partitioning of the particle space +into boxes. Then, for every particle in the home box, the nested loop processes interactions first with other particles in the home box and then with +particles in all neighbor boxes. The processing of each particle consists of a single stage of calculation that is enclosed in the innermost loop. The +nested loops in the application were parallelized in such a way that at any point of time GPU warp/wavefront accesses adjacent memory locations. The +speedup depends on the number of boxes, particles (fixed) and the actualcal culation for each particle (fixed). The application is memory bound, and +GPU speedup seems to saturate at about 16x when compared to single-core CPU. + +More information about the parallel version of this code can be found in: +[1] L. G. Szafaryn, T. Gamblin, B. deSupinski and K. Skadron. "Experiences with Achieving Portability across Heterogeneous Architectures." Submitted to +WOLFHPC workshop at 25th International Conference on Supercomputing (ICS). Tucson, AZ. 2010. +More about the original ddcMD application can be found in: +[2] F. H. Streitz, J. N. Glosli, M. V. Patel, B. Chan, R. K. Yates, B. R. de Supinski, J. Sexton, J and A. Gunnels. "100+ TFlop Solidification Simulations +on BlueGene/L." In Proceedings of the 2005 Supercomputing Conference (SC 05). Seattle, WA. 2005. + +//======================================================================================================================================================150 +// USE +//======================================================================================================================================================150 + +The code takes the followint parameters: +-boxes1d (number of boxes in one dimension, the total number of boxes will be that^3) + +The code can be run as follows: +./lavaMD -boxes1d 10 + +******Adjustable work group size***** +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=128" + +######OUTPUT FOR VALIDATION######## +USAGE: +make clean +make OUTPUT=Y \ No newline at end of file diff --git a/workloads/realworld/pinned/lavaMD/kernel/kernel_gpu_cuda.cu b/workloads/realworld/pinned/lavaMD/kernel/kernel_gpu_cuda.cu new file mode 100755 index 0000000000000000000000000000000000000000..15164488f40349d583134da5d2a03a1ffc854c52 --- /dev/null +++ b/workloads/realworld/pinned/lavaMD/kernel/kernel_gpu_cuda.cu @@ -0,0 +1,199 @@ +//----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------200 +// plasmaKernel_gpu_2 +//----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------200 + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +__global__ void kernel_gpu_cuda(par_str d_par_gpu, + dim_str d_dim_gpu, + box_str *d_box_gpu, + FOUR_VECTOR *d_rv_gpu, + fp *d_qv_gpu, + FOUR_VECTOR *d_fv_gpu, + int boxes_per_block) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + // THREAD PARAMETERS + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + int bx = blockIdx.x; // get current horizontal block index (0-n) + int tx = threadIdx.x; // get current horizontal thread index (0-n) + int wtx = tx; + + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // Extract input parameters + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + // parameters + fp a2 = 2.0 * d_par_gpu.alpha * d_par_gpu.alpha; + + // home box + int first_i; + FOUR_VECTOR *rA; + FOUR_VECTOR *fA; + __shared__ FOUR_VECTOR rA_shared[100]; + + // nei box + int pointer; + int k = 0; + int first_j; + FOUR_VECTOR *rB; + fp *qB; + int j = 0; + __shared__ FOUR_VECTOR rB_shared[100]; + __shared__ double qB_shared[100]; + + // common + fp r2; + fp u2; + fp vij; + fp fs; + fp fxij; + fp fyij; + fp fzij; + THREE_VECTOR d; + + + int box = bx * boxes_per_block; + int end_box = box + boxes_per_block; + + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + // DO FOR THE NUMBER OF BOXES + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + for (; box < end_box; box++) + { + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // Home box + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // home box - box parameters + first_i = d_box_gpu[box].offset; + + // home box - distance, force, charge and type parameters + rA = &d_rv_gpu[first_i]; + fA = &d_fv_gpu[first_i]; + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Copy to shared memory + //----------------------------------------------------------------------------------------------------------------------------------140 + + // home box - shared memory + while (wtx < NUMBER_PAR_PER_BOX) + { + rA_shared[wtx] = rA[wtx]; + wtx = wtx + NUMBER_THREADS; + } + wtx = tx; + + // synchronize threads - not needed, but just to be safe + block.sync(); + + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // nei box loop + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + // if (wtx == 0) + // printf("d_box_gpu[%d].nn is %d\n", bx, d_box_gpu[bx].nn); + + int tile = 0; + int end_tile = 1 + d_box_gpu[box].nn; + + // loop over neiing boxes of home box + for (; tile < end_tile; tile++) + { + + //----------------------------------------50 + // nei box - get pointer to the right box + //----------------------------------------50 + + if (tile == 0) + { + pointer = box; // set first box to be processed to home box + } + else + { + pointer = d_box_gpu[box].nei[tile - 1].number; // remaining boxes are nei boxes + } + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // nei box - box parameters + first_j = d_box_gpu[pointer].offset; + + // nei box - distance, (force), charge and (type) parameters + rB = &d_rv_gpu[first_j]; + qB = &d_qv_gpu[first_j]; + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // nei box - shared memory + while (wtx < NUMBER_PAR_PER_BOX) + { + rB_shared[wtx] = rB[wtx]; + qB_shared[wtx] = qB[wtx]; + wtx = wtx + NUMBER_THREADS; + } + wtx = tx; + + // synchronize threads because in next section each thread accesses data brought in by different threads here + block.sync(); + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Calculation + //----------------------------------------------------------------------------------------------------------------------------------140 + + // loop for the number of particles in the home box + // for (int i=0; i +#include "../../../../common/cupti_add.h" +#include "../../../../common/cpu_timestamps.h" + +void +kernel_gpu_cuda_wrapper(par_str par_cpu, + dim_str dim_cpu, + box_str* box_cpu, + FOUR_VECTOR* rv_cpu, + fp* qv_cpu, + FOUR_VECTOR* fv_cpu, + int nblocks) +{ + + //======================================================================================================================================================150 + // CPU VARIABLES + //======================================================================================================================================================150 + + // timer + long long time0; + long long time1; + long long time2; + long long time3; + long long time4; + long long time5; + long long time6; + + time0 = get_time(); + + //======================================================================================================================================================150 + // GPU SETUP + //======================================================================================================================================================150 + + //====================================================================================================100 + // INITIAL DRIVER OVERHEAD + //====================================================================================================100 + + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaThreadSynchronize(); + + //====================================================================================================100 + // VARIABLES + //====================================================================================================100 + + box_str* d_box_gpu; + FOUR_VECTOR* d_rv_gpu; + fp* d_qv_gpu; + FOUR_VECTOR* d_fv_gpu; + + dim3 threads; + dim3 blocks; + + //====================================================================================================100 + // EXECUTION PARAMETERS + //====================================================================================================100 + + // blocks.x = dim_cpu.number_boxes; + blocks.x = nblocks * nblocks * nblocks; + blocks.y = 1; + threads.x = NUMBER_THREADS; // define the number of threads in the block + threads.y = 1; + + int boxes_per_block = 1; + if (dim_cpu.number_boxes >= blocks.x) + { + boxes_per_block = (dim_cpu.number_boxes + blocks.x - 1) / blocks.x; + } + + time1 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY (MALLOC) + //======================================================================================================================================================150 + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY IN + //====================================================================================================100 + + //==================================================50 + // boxes + //==================================================50 + + cudaMalloc( (void **)&d_box_gpu, + dim_cpu.box_mem); + + //==================================================50 + // rv + //==================================================50 + + cudaMalloc( (void **)&d_rv_gpu, + dim_cpu.space_mem); + + //==================================================50 + // qv + //==================================================50 + + cudaMalloc( (void **)&d_qv_gpu, + dim_cpu.space_mem2); + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY + //====================================================================================================100 + + //==================================================50 + // fv + //==================================================50 + + cudaMalloc( (void **)&d_fv_gpu, + dim_cpu.space_mem); + + time2 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY COPY + //======================================================================================================================================================150 + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY IN + //====================================================================================================100 + + //==================================================50 + // boxes + //==================================================50 + + cudaMemcpy( d_box_gpu, + box_cpu, + dim_cpu.box_mem, + cudaMemcpyHostToDevice); + + //==================================================50 + // rv + //==================================================50 + + cudaMemcpy( d_rv_gpu, + rv_cpu, + dim_cpu.space_mem, + cudaMemcpyHostToDevice); + + //==================================================50 + // qv + //==================================================50 + + cudaMemcpy( d_qv_gpu, + qv_cpu, + dim_cpu.space_mem2, + cudaMemcpyHostToDevice); + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY + //====================================================================================================100 + + //==================================================50 + // fv + //==================================================50 + + cudaMemcpy( d_fv_gpu, + fv_cpu, + dim_cpu.space_mem, + cudaMemcpyHostToDevice); + + time3 = get_time(); + + //======================================================================================================================================================150 + // KERNEL + //======================================================================================================================================================150 + // launch kernel - all boxes + kernel_gpu_cuda<<>>(par_cpu, + dim_cpu, + d_box_gpu, + d_rv_gpu, + d_qv_gpu, + d_fv_gpu, + boxes_per_block); + + checkCUDAError("Start"); + cudaDeviceSynchronize(); + + + time4 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY COPY (CONTD.) + //======================================================================================================================================================150 + + cudaMemcpy( fv_cpu, + d_fv_gpu, + dim_cpu.space_mem, + cudaMemcpyDeviceToHost); + + time5 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY DEALLOCATION + //======================================================================================================================================================150 + + cudaFree(d_rv_gpu); + cudaFree(d_qv_gpu); + cudaFree(d_fv_gpu); + cudaFree(d_box_gpu); + + endCPU(); + finiTrace(); + + time6 = get_time(); + + //======================================================================================================================================================150 + // DISPLAY TIMING + //======================================================================================================================================================150 + + printf("Time spent in different stages of GPU_CUDA KERNEL:\n"); + + printf("%15.12f s, %15.12f % : GPU: SET DEVICE / DRIVER INIT\n", (float) (time1-time0) / 1000000, (float) (time1-time0) / (float) (time6-time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: ALO\n", (float) (time2-time1) / 1000000, (float) (time2-time1) / (float) (time6-time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: COPY IN\n", (float) (time3-time2) / 1000000, (float) (time3-time2) / (float) (time6-time0) * 100); + + printf("%15.12f s, %15.12f % : GPU: KERNEL\n", (float) (time4-time3) / 1000000, (float) (time4-time3) / (float) (time6-time0) * 100); + + printf("%15.12f s, %15.12f % : GPU MEM: COPY OUT\n", (float) (time5-time4) / 1000000, (float) (time5-time4) / (float) (time6-time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: FRE\n", (float) (time6-time5) / 1000000, (float) (time6-time5) / (float) (time6-time0) * 100); + + printf("Total time:\n"); + printf("%.12f s\n", (float) (time6-time0) / 1000000); + +} diff --git a/workloads/realworld/pinned/lavaMD/kernel/kernel_gpu_cuda_wrapper.h b/workloads/realworld/pinned/lavaMD/kernel/kernel_gpu_cuda_wrapper.h new file mode 100755 index 0000000000000000000000000000000000000000..cf499f1480469569c649eccf174cc8ba0655ddbd --- /dev/null +++ b/workloads/realworld/pinned/lavaMD/kernel/kernel_gpu_cuda_wrapper.h @@ -0,0 +1,19 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//========================================================================================================================================================================================================200 +// KERNEL_GPU_CUDA_WRAPPER HEADER +//========================================================================================================================================================================================================200 + +void kernel_gpu_cuda_wrapper( par_str parms_cpu, + dim_str dim_cpu, + box_str* box_cpu, + FOUR_VECTOR* rv_cpu, + fp* qv_cpu, + FOUR_VECTOR* fv_cpu, + int nblocks); + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/pinned/lavaMD/kernel/kernel_gpu_cuda_wrapper.o b/workloads/realworld/pinned/lavaMD/kernel/kernel_gpu_cuda_wrapper.o new file mode 100644 index 0000000000000000000000000000000000000000..1f19fc5bc32bae93adf685df901728f59b0cc51e Binary files /dev/null and b/workloads/realworld/pinned/lavaMD/kernel/kernel_gpu_cuda_wrapper.o differ diff --git a/workloads/realworld/pinned/lavaMD/lavaMD b/workloads/realworld/pinned/lavaMD/lavaMD new file mode 100755 index 0000000000000000000000000000000000000000..334dcce46881f888e370616c9ed36c80b315711c Binary files /dev/null and b/workloads/realworld/pinned/lavaMD/lavaMD differ diff --git a/workloads/realworld/pinned/lavaMD/main.c b/workloads/realworld/pinned/lavaMD/main.c new file mode 100755 index 0000000000000000000000000000000000000000..a7c88472e3939414bbdc314e2bfb1c46bc345bea --- /dev/null +++ b/workloads/realworld/pinned/lavaMD/main.c @@ -0,0 +1,318 @@ +//========================================================================================================================================================================================================200 +//======================================================================================================================================================150 +//====================================================================================================100 +//==================================================50 + +//========================================================================================================================================================================================================200 +// UPDATE +//========================================================================================================================================================================================================200 + +// 14 APR 2011 Lukasz G. Szafaryn + +//========================================================================================================================================================================================================200 +// DEFINE/INCLUDE +//========================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// LIBRARIES +//======================================================================================================================================================150 + +#include // (in path known to compiler) needed by printf +#include // (in path known to compiler) needed by malloc +#include // (in path known to compiler) needed by true/false + +//======================================================================================================================================================150 +// UTILITIES +//======================================================================================================================================================150 + +#include "./util/timer/timer.h" // (in path specified here) +#include "./util/num/num.h" // (in path specified here) + +//======================================================================================================================================================150 +// MAIN FUNCTION HEADER +//======================================================================================================================================================150 + +#include "./main.h" // (in the current directory) + +//======================================================================================================================================================150 +// KERNEL +//======================================================================================================================================================150 + +#include "./kernel/kernel_gpu_cuda_wrapper.h" // (in library path specified here) + +//========================================================================================================================================================================================================200 +// MAIN FUNCTION +//========================================================================================================================================================================================================200 +#define _POSIX_C_SOURCE 200809L +#include +#include +#include +extern inline __attribute__((always_inline)) unsigned long rdtsc() +{ + unsigned long a, d; + + __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); + + return (a | (d << 32)); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsp() { + struct timespec tms; + if (clock_gettime(CLOCK_REALTIME, &tms)) { + return -1; + } + unsigned long ns = tms.tv_sec * 1000000000; + ns += tms.tv_nsec; + return ns; +} + + +int +main( int argc, + char *argv []) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + + printf("thread block size of kernel = %d \n", NUMBER_THREADS); + //======================================================================================================================================================150 + // CPU/MCPU VARIABLES + //======================================================================================================================================================150 + + // timer + long long time0; + + time0 = get_time(); + + // timer + long long time1; + long long time2; + long long time3; + long long time4; + long long time5; + long long time6; + long long time7; + + // counters + int i, j, k, l, m, n; + + // system memory + par_str par_cpu; + dim_str dim_cpu; + box_str* box_cpu; + FOUR_VECTOR* rv_cpu; + fp* qv_cpu; + FOUR_VECTOR* fv_cpu; + int nh; + + time1 = get_time(); + + //======================================================================================================================================================150 + // CHECK INPUT ARGUMENTS + //======================================================================================================================================================150 + + // assing default values + dim_cpu.boxes1d_arg = 1; + + // go through arguments + dim_cpu.boxes1d_arg = atoi(argv[1]); + int nblocks = atoi(argv[2]); + + // Print configuration + printf("Configuration used: boxes1d = %d\n", dim_cpu.boxes1d_arg); + + time2 = get_time(); + + //======================================================================================================================================================150 + // INPUTS + //======================================================================================================================================================150 + + par_cpu.alpha = 0.5; + + time3 = get_time(); + + //======================================================================================================================================================150 + // DIMENSIONS + //======================================================================================================================================================150 + + // total number of boxes + dim_cpu.number_boxes = dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg; + + // how many particles space has in each direction + dim_cpu.space_elem = dim_cpu.number_boxes * NUMBER_PAR_PER_BOX; + dim_cpu.space_mem = dim_cpu.space_elem * sizeof(FOUR_VECTOR); + dim_cpu.space_mem2 = dim_cpu.space_elem * sizeof(fp); + + // box array + dim_cpu.box_mem = dim_cpu.number_boxes * sizeof(box_str); + + time4 = get_time(); + + //======================================================================================================================================================150 + // SYSTEM MEMORY + //======================================================================================================================================================150 + + //====================================================================================================100 + // BOX + //====================================================================================================100 + + // allocate boxes + box_cpu = (box_str*)malloc(dim_cpu.box_mem); + + // initialize number of home boxes + nh = 0; + + // home boxes in z direction + for(i=0; i=0 && (j+m)>=0 && (k+n)>=0)==true && ((i+l) 1) { + + // variables + int max_multiprocessors; + int max_device; + cudaDeviceProp properties; + + // initialize variables + max_multiprocessors = 0; + max_device = 0; + + for (device = 0; device < num_devices; device++) { + cudaGetDeviceProperties(&properties, device); + if (max_multiprocessors < properties.multiProcessorCount) { + max_multiprocessors = properties.multiProcessorCount; + max_device = device; + } + } + cudaSetDevice(max_device); + } + +} + +//====================================================================================================100 +// GET LAST ERROR +//====================================================================================================100 + +void checkCUDAError(const char *msg) +{ + cudaError_t err = cudaGetLastError(); + if( cudaSuccess != err) { + // fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + printf("Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + fflush(NULL); + exit(EXIT_FAILURE); + } +} + +//===============================================================================================================================================================================================================200 +// END SET_DEVICE CODE +//===============================================================================================================================================================================================================200 diff --git a/workloads/realworld/pinned/lavaMD/util/device/device.h b/workloads/realworld/pinned/lavaMD/util/device/device.h new file mode 100755 index 0000000000000000000000000000000000000000..23bb31d26c1bc0e607c9b2faf7bddaa5a5c06d98 --- /dev/null +++ b/workloads/realworld/pinned/lavaMD/util/device/device.h @@ -0,0 +1,29 @@ +//===============================================================================================================================================================================================================200 +// SET_DEVICE HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// INCLUDE/DEFINE +//======================================================================================================================================================150 + +#include // (in library path known to compiler) needed by printf + +//======================================================================================================================================================150 +// FUNCTION PROTOTYPES +//======================================================================================================================================================150 + +//====================================================================================================100 +// SET DEVICE +//====================================================================================================100 + +void setdevice(void); + +//====================================================================================================100 +// GET LAST ERROR +//====================================================================================================100 + +void checkCUDAError(const char *msg); + +//===============================================================================================================================================================================================================200 +// END SET_DEVICE HEADER +//===============================================================================================================================================================================================================200 diff --git a/workloads/realworld/pinned/lavaMD/util/device/device.o b/workloads/realworld/pinned/lavaMD/util/device/device.o new file mode 100644 index 0000000000000000000000000000000000000000..0e7eae5dceba4b212ceaadb36f8e8677cedc3c61 Binary files /dev/null and b/workloads/realworld/pinned/lavaMD/util/device/device.o differ diff --git a/workloads/realworld/pinned/lavaMD/util/num/num.c b/workloads/realworld/pinned/lavaMD/util/num/num.c new file mode 100755 index 0000000000000000000000000000000000000000..980ff7498832c784eab718a8b886e82891047599 --- /dev/null +++ b/workloads/realworld/pinned/lavaMD/util/num/num.c @@ -0,0 +1,53 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// DESCRIPTION +//===============================================================================================================================================================================================================200 + +// Returns: 0 if string does not represent integer +// 1 if string represents integer + +//===============================================================================================================================================================================================================200 +// NUM CODE +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// ISINTEGER FUNCTION +//======================================================================================================================================================150 + +int isInteger(char *str){ + + //====================================================================================================100 + // make sure it's not empty + //====================================================================================================100 + + if (*str == '\0'){ + return 0; + } + + //====================================================================================================100 + // if any digit is not a number, return false + //====================================================================================================100 + + for(; *str != '\0'; str++){ + if (*str < 48 || *str > 57){ // digit characters (need to include . if checking for float) + return 0; + } + } + + //====================================================================================================100 + // it got past all my checks so I think it's a number + //====================================================================================================100 + + return 1; +} + +//===============================================================================================================================================================================================================200 +// END NUM CODE +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/pinned/lavaMD/util/num/num.h b/workloads/realworld/pinned/lavaMD/util/num/num.h new file mode 100755 index 0000000000000000000000000000000000000000..27a5e42fe2819d9ecc2f569b5979fb451985976f --- /dev/null +++ b/workloads/realworld/pinned/lavaMD/util/num/num.h @@ -0,0 +1,21 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// FILE HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// ISINTEGER FUNCTION PROTOTYPE +//======================================================================================================================================================150 + +int isInteger(char *str); + +//===============================================================================================================================================================================================================200 +// END FILE HEADER +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/pinned/lavaMD/util/num/num.o b/workloads/realworld/pinned/lavaMD/util/num/num.o new file mode 100644 index 0000000000000000000000000000000000000000..b47a2f8a7301ec705aea96bd58bca1b9756c0f5e Binary files /dev/null and b/workloads/realworld/pinned/lavaMD/util/num/num.o differ diff --git a/workloads/realworld/pinned/lavaMD/util/timer/timer.c b/workloads/realworld/pinned/lavaMD/util/timer/timer.c new file mode 100755 index 0000000000000000000000000000000000000000..c7cc252b4e67b3a868722b7b2c58f5b863ae0cfc --- /dev/null +++ b/workloads/realworld/pinned/lavaMD/util/timer/timer.c @@ -0,0 +1,36 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// TIMER CODE +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// INCLUDE/DEFINE +//======================================================================================================================================================150 + +#include + +//======================================================================================================================================================150 +// FUNCTIONS +//======================================================================================================================================================150 + +//====================================================================================================100 +// DISPLAY TIME +//====================================================================================================100 + + // Returns the current system time in microseconds +long long get_time() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000000) + tv.tv_usec; +} + +//===============================================================================================================================================================================================================200 +// END TIMER CODE +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/pinned/lavaMD/util/timer/timer.h b/workloads/realworld/pinned/lavaMD/util/timer/timer.h new file mode 100755 index 0000000000000000000000000000000000000000..1744df4b8607f95c057ac4db6e9ced5ff84c4ab7 --- /dev/null +++ b/workloads/realworld/pinned/lavaMD/util/timer/timer.h @@ -0,0 +1,21 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// TIMER HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// FUNCTION PROTOTYPES +//======================================================================================================================================================150 + +long long get_time(); + +//===============================================================================================================================================================================================================200 +// END TIMER HEADER +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/pinned/lavaMD/util/timer/timer.o b/workloads/realworld/pinned/lavaMD/util/timer/timer.o new file mode 100644 index 0000000000000000000000000000000000000000..9f425a0a985a154eea9ab5345aa430de01bea353 Binary files /dev/null and b/workloads/realworld/pinned/lavaMD/util/timer/timer.o differ diff --git a/workloads/realworld/pinned/lud/Makefile b/workloads/realworld/pinned/lud/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1dfc37ac7fa0db46535d0583970dba1cb5cfb80e --- /dev/null +++ b/workloads/realworld/pinned/lud/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := lud +CUFILES := lud_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o lud + diff --git a/workloads/realworld/pinned/lud/lud b/workloads/realworld/pinned/lud/lud new file mode 100755 index 0000000000000000000000000000000000000000..f141c62dd25ff0610b68c06f0287b29c42995fdb Binary files /dev/null and b/workloads/realworld/pinned/lud/lud differ diff --git a/workloads/realworld/pinned/lud/lud_cuda.cu b/workloads/realworld/pinned/lud/lud_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..989ee0c96e91f78ba9976a5376e78c93bffdeaec --- /dev/null +++ b/workloads/realworld/pinned/lud/lud_cuda.cu @@ -0,0 +1,278 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK 256 + +#ifndef SIZE +#define SIZE 4096 +#endif + +// __global__ void add(float *a, float *b, float *c) +// { +// int tid = blockIdx.x; // Handle the data at the index + +// c[tid] = a[tid] + b[tid]; +// } + +// __global__ void scale(float *a, int size, int index) +// { +// int i; +// int start = (index * size + index); +// int end = (index * size + size); + +// for (i = start + 1; i < end; i++) +// { +// a[i] = (a[i] / a[start]); +// } +// } + +// __global__ void reduce(float *a, int size, int index, int b_size) +// { +// extern __shared__ float pivot[]; +// int i; + +// int tid = threadIdx.x; +// int bid = blockIdx.x; +// int block_size = b_size; + +// int pivot_start = (index * size + index); +// int pivot_end = (index * size + size); + +// int start; +// int end; +// int pivot_row; +// int my_row; + +// if (tid == 0) +// { +// for (i = index; i < size; i++) +// pivot[i] = a[(index * size) + i]; +// } + +// __syncthreads(); + +// pivot_row = (index * size); +// my_row = (((block_size * bid) + tid) * size); +// start = my_row + index; +// end = my_row + size; + +// if (my_row > pivot_row) +// { +// for (i = start + 1; i < end; i++) +// { +// a[i] = a[i] - (a[start] * pivot[(i - my_row)]); +// } +// } +// } + +void initCPU(float *a, int N) +{ + srand((unsigned)2); + // fill the arrays 'a' on the CPU + for (int i = 0; i < (N * N); i++) + { + a[i] = ((rand() % 10) + 1); + } +} + +void initGPU(float *a_dev, int N) +{ + srand((unsigned)2); + for (int i = 0; i < (N * N); i++) + { + a_dev[i] = ((rand() % 10) + 1); + } +} + +__global__ void lud_kernel(float *a, int N) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // extern __shared__ float pivot[]; + __shared__ float pivot[SIZE]; + + for (int tile = 0; tile < N; tile += 1) { + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = blockDim.x; + + if (tid == 0 && bid == 0) { + int start = (tile * N + tile); + int end = (tile * N + N); + + for (int i = start + 1; i < end; i++) + a[i] = (a[i] / a[start]); + } + block.sync(); + + if (tid == 0) + { + for (int i = tile; i < N; i++) + pivot[i] = a[(tile * N) + i]; + } + block.sync(); + + int pivot_row = (tile * N); + int my_row = (((block_size * bid) + tid) * N); + int start = my_row + tile; + int end = my_row + N; + + if (my_row > pivot_row) + { + for (int i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(i - my_row)]); + } + } + block.sync(); + } +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + float *a; + float *c; + float error; + int N; + int flag = 0; + + float **result; + float **a_ref; + int blocks; + + float *dev_a; + int i; + int j; + int k; + float l1; + float u1; + + N = SIZE; + // allocate memory on CPU + a = (float *)malloc(sizeof(float) * N * N); + c = (float *)malloc(sizeof(float) * N * N); + + result = (float **)malloc(sizeof(float *) * N); + a_ref = (float **)malloc(sizeof(float *) * N); + + for (i = 0; i < N; i++) + { + result[i] = (float *)malloc(sizeof(float) * N); + a_ref[i] = (float *)malloc(sizeof(float) * N); + } + + GPU_argv_init(); + initCPU(a, N); + + initTrace(); + startCPU(); + // allocate the memory on the GPU + cudaMalloc((void **)&dev_a, N * N * sizeof(float)); + + cudaMemcpy(dev_a, a, N * N * sizeof(float), cudaMemcpyHostToDevice); // copy array to device memory + + /*Perform LU Decomposition*/ + // for (i = 0; i < N; i++) + // { + // scale<<<1, 1>>>(dev_a, N, i); + // // blocks= ((N-i-1)/512)+1; + // blocks = ((N / 512)); + // // printf("Number of blocks rxd : %d \n",blocks); + // reduce<<>>(dev_a, N, i, 512); + // } + blocks = ((N / DIM_THREAD_BLOCK)); + // lud_kernel<<>>(dev_a, N); + lud_kernel<<>>(dev_a, N); + /*LU decomposition ends here*/ + + cudaMemcpy(c, dev_a, N * N * sizeof(float), cudaMemcpyDeviceToHost); // copy array back to host + + // free the memory allocated on the GPU + cudaFree(dev_a); + endCPU(); + finiTrace(); + + /*copy the result matrix into explicit 2D matrix for verification*/ + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + result[i][j] = c[i * N + j]; + } + } + + printf("======================================================="); + printf("\n Performing inplace verification \n"); + + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + a_ref[i][j] = 0; + for (k = 0; k < N; k++) + { + if (i >= k) + l1 = result[i][k]; + else + l1 = 0; + + if (k == j) + u1 = 1; + else if (k < j) + u1 = result[k][j]; // figured it out + else + u1 = 0.0; + + a_ref[i][j] = a_ref[i][j] + (l1 * u1); + } + } + } + + // for (i = 0; i < N; i++) + // { + // for (j = 0; j < N; j++) + // { + // error = abs(a[(i * N + j)] - a_ref[i][j]); + // if (error > 1) + // { + // // printf("No match occured at %d %d Error is %lf \n ", i, j, abs(a[(i*N+j)]-b[i][j])); + // flag = flag + 1; + // } + // } + // } + + // if (flag == 0) + // printf("Match \n"); + // else + // printf("No Matchs %d \n", flag); + + + + return 0; +} diff --git a/workloads/realworld/pinned/lud/run.sh b/workloads/realworld/pinned/lud/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..ea7db937489e328f5e923d2b18774e4256eef123 --- /dev/null +++ b/workloads/realworld/pinned/lud/run.sh @@ -0,0 +1 @@ +./lud 1024 diff --git a/workloads/realworld/pinned/lud/run_super.sh b/workloads/realworld/pinned/lud/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2791fe07c43d75b40894206ba79ed441f207ee26 --- /dev/null +++ b/workloads/realworld/pinned/lud/run_super.sh @@ -0,0 +1 @@ +./lud 4096 diff --git a/workloads/realworld/pinned/lud_perf/Makefile b/workloads/realworld/pinned/lud_perf/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1dfc37ac7fa0db46535d0583970dba1cb5cfb80e --- /dev/null +++ b/workloads/realworld/pinned/lud_perf/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := lud +CUFILES := lud_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o lud + diff --git a/workloads/realworld/pinned/lud_perf/lud b/workloads/realworld/pinned/lud_perf/lud new file mode 100755 index 0000000000000000000000000000000000000000..9ac25127a55eb2d03b73565edaddd232dd6d5069 Binary files /dev/null and b/workloads/realworld/pinned/lud_perf/lud differ diff --git a/workloads/realworld/pinned/lud_perf/lud_cuda.cu b/workloads/realworld/pinned/lud_perf/lud_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..bfe586362809ae646019c2828bc038f757610e97 --- /dev/null +++ b/workloads/realworld/pinned/lud_perf/lud_cuda.cu @@ -0,0 +1,278 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK 256 + +#ifndef SIZE +#define SIZE 4096 +#endif + +// __global__ void add(float *a, float *b, float *c) +// { +// int tid = blockIdx.x; // Handle the data at the index + +// c[tid] = a[tid] + b[tid]; +// } + +// __global__ void scale(float *a, int size, int index) +// { +// int i; +// int start = (index * size + index); +// int end = (index * size + size); + +// for (i = start + 1; i < end; i++) +// { +// a[i] = (a[i] / a[start]); +// } +// } + +// __global__ void reduce(float *a, int size, int index, int b_size) +// { +// extern __shared__ float pivot[]; +// int i; + +// int tid = threadIdx.x; +// int bid = blockIdx.x; +// int block_size = b_size; + +// int pivot_start = (index * size + index); +// int pivot_end = (index * size + size); + +// int start; +// int end; +// int pivot_row; +// int my_row; + +// if (tid == 0) +// { +// for (i = index; i < size; i++) +// pivot[i] = a[(index * size) + i]; +// } + +// __syncthreads(); + +// pivot_row = (index * size); +// my_row = (((block_size * bid) + tid) * size); +// start = my_row + index; +// end = my_row + size; + +// if (my_row > pivot_row) +// { +// for (i = start + 1; i < end; i++) +// { +// a[i] = a[i] - (a[start] * pivot[(i - my_row)]); +// } +// } +// } + +void initCPU(float *a, int N) +{ + srand((unsigned)2); + // fill the arrays 'a' on the CPU + for (int i = 0; i < (N * N); i++) + { + a[i] = ((rand() % 10) + 1); + } +} + +void initGPU(float *a_dev, int N) +{ + srand((unsigned)2); + for (int i = 0; i < (N * N); i++) + { + a_dev[i] = ((rand() % 10) + 1); + } +} + +__global__ void lud_kernel(float *a, int N) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // extern __shared__ float pivot[]; + __shared__ float pivot[SIZE]; + + for (int tile = 0; tile < N; tile += 1) { + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = blockDim.x; + + if (tid == 0 && bid == 0) { + int start = (tile * N + tile); + int end = (tile * N + N); + + for (int i = start + 1; i < end; i++) + a[i] = (a[i] / a[start]); + } + block.sync(); + + if (tid == 0) + { + for (int i = tile; i < N; i++) + pivot[i] = a[(tile * N) + i]; + } + block.sync(); + + int pivot_row = (tile * N); + int my_row = (((block_size * bid) + tid) * N); + int start = my_row + tile; + int end = my_row + N; + + if (my_row > pivot_row) + { + for (int i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(i - my_row)]); + } + } + block.sync(); + } +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + float *a; + float *c; + float error; + int N; + int flag = 0; + + float **result; + float **a_ref; + int blocks; + + float *dev_a; + int i; + int j; + int k; + float l1; + float u1; + + N = SIZE; + // allocate memory on CPU + a = (float *)malloc(sizeof(float) * N * N); + c = (float *)malloc(sizeof(float) * N * N); + + result = (float **)malloc(sizeof(float *) * N); + a_ref = (float **)malloc(sizeof(float *) * N); + + for (i = 0; i < N; i++) + { + result[i] = (float *)malloc(sizeof(float) * N); + a_ref[i] = (float *)malloc(sizeof(float) * N); + } + + GPU_argv_init(); + initCPU(a, N); + + // initTrace(); + startCPU(); + // allocate the memory on the GPU + cudaMalloc((void **)&dev_a, N * N * sizeof(float)); + + cudaMemcpy(dev_a, a, N * N * sizeof(float), cudaMemcpyHostToDevice); // copy array to device memory + + /*Perform LU Decomposition*/ + // for (i = 0; i < N; i++) + // { + // scale<<<1, 1>>>(dev_a, N, i); + // // blocks= ((N-i-1)/512)+1; + // blocks = ((N / 512)); + // // printf("Number of blocks rxd : %d \n",blocks); + // reduce<<>>(dev_a, N, i, 512); + // } + blocks = ((N / DIM_THREAD_BLOCK)); + // lud_kernel<<>>(dev_a, N); + lud_kernel<<>>(dev_a, N); + /*LU decomposition ends here*/ + + cudaMemcpy(c, dev_a, N * N * sizeof(float), cudaMemcpyDeviceToHost); // copy array back to host + + // free the memory allocated on the GPU + cudaFree(dev_a); + endCPU(); + // finiTrace(); + + /*copy the result matrix into explicit 2D matrix for verification*/ + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + result[i][j] = c[i * N + j]; + } + } + + printf("======================================================="); + printf("\n Performing inplace verification \n"); + + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + a_ref[i][j] = 0; + for (k = 0; k < N; k++) + { + if (i >= k) + l1 = result[i][k]; + else + l1 = 0; + + if (k == j) + u1 = 1; + else if (k < j) + u1 = result[k][j]; // figured it out + else + u1 = 0.0; + + a_ref[i][j] = a_ref[i][j] + (l1 * u1); + } + } + } + + // for (i = 0; i < N; i++) + // { + // for (j = 0; j < N; j++) + // { + // error = abs(a[(i * N + j)] - a_ref[i][j]); + // if (error > 1) + // { + // // printf("No match occured at %d %d Error is %lf \n ", i, j, abs(a[(i*N+j)]-b[i][j])); + // flag = flag + 1; + // } + // } + // } + + // if (flag == 0) + // printf("Match \n"); + // else + // printf("No Matchs %d \n", flag); + + + + return 0; +} diff --git a/workloads/realworld/pinned/lud_perf/run.sh b/workloads/realworld/pinned/lud_perf/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..ea7db937489e328f5e923d2b18774e4256eef123 --- /dev/null +++ b/workloads/realworld/pinned/lud_perf/run.sh @@ -0,0 +1 @@ +./lud 1024 diff --git a/workloads/realworld/pinned/lud_perf/run_super.sh b/workloads/realworld/pinned/lud_perf/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2791fe07c43d75b40894206ba79ed441f207ee26 --- /dev/null +++ b/workloads/realworld/pinned/lud_perf/run_super.sh @@ -0,0 +1 @@ +./lud 4096 diff --git a/workloads/realworld/pinned/nw/Makefile b/workloads/realworld/pinned/nw/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..b33ae6462826357f9665bfd7fc9929ed176f9b35 --- /dev/null +++ b/workloads/realworld/pinned/nw/Makefile @@ -0,0 +1,15 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include + +SRC = needle.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = needle + +release: $(SRC) + $(CC) ${KERNEL_DIM} $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt diff --git a/workloads/realworld/pinned/nw/Makefile_nvidia b/workloads/realworld/pinned/nw/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..2fd0b98d07beea56ae69a96a0c8cb3af87d602f6 --- /dev/null +++ b/workloads/realworld/pinned/nw/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := needle +# CUDA source files (compiled with cudacc) +CUFILES := needle.cu +# CUDA dependency files +CU_DEPS := needle_kernel.cu +# C/C++ source files (compiled with gcc / c++) +# CCFILES := BlackScholes_gold.cpp + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/pinned/nw/README b/workloads/realworld/pinned/nw/README new file mode 100755 index 0000000000000000000000000000000000000000..683cbd53db81f0ece4f926fa01316582ea0d5fc9 --- /dev/null +++ b/workloads/realworld/pinned/nw/README @@ -0,0 +1,12 @@ +Note: This program generate two sequences randomly. Please specify your own sequences for different uses. + At the current stage, the program only supports two sequences with the same lengh, which can be divided by 16. +Usage: needle 32 10 + 32 //the length of both sequences + 10 //penalty value + +******Adjustable work group size***** +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" diff --git a/workloads/realworld/pinned/nw/needle b/workloads/realworld/pinned/nw/needle new file mode 100755 index 0000000000000000000000000000000000000000..9f78555182009ec8564571ae3a42494b999aad2b Binary files /dev/null and b/workloads/realworld/pinned/nw/needle differ diff --git a/workloads/realworld/pinned/nw/needle.cu b/workloads/realworld/pinned/nw/needle.cu new file mode 100755 index 0000000000000000000000000000000000000000..7246ec604cf38867be161b8df3ebb0fd9318e832 --- /dev/null +++ b/workloads/realworld/pinned/nw/needle.cu @@ -0,0 +1,285 @@ +#define LIMIT -999 +#include +#include +#include +#include +#include "needle.h" +#include +#include +#include + +// includes, kernels +#include "needle_kernel.cu" +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#ifdef TIMING +#include "timing.h" + +struct timeval tv; +struct timeval tv_total_start, tv_total_end; +struct timeval tv_h2d_start, tv_h2d_end; +struct timeval tv_d2h_start, tv_d2h_end; +struct timeval tv_kernel_start, tv_kernel_end; +struct timeval tv_mem_alloc_start, tv_mem_alloc_end; +struct timeval tv_close_start, tv_close_end; +float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, + d2h_time = 0, close_time = 0, total_time = 0; +#endif + +//////////////////////////////////////////////////////////////////////////////// +// declaration, forward +void runTest( int argc, char** argv); + + +int blosum62[24][24] = { +{ 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0, -4}, +{-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1, -4}, +{-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1, -4}, +{-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1, -4}, +{ 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4}, +{-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1, -4}, +{-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4}, +{ 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -4}, +{-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1, -4}, +{-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1, -4}, +{-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1, -4}, +{-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1, -4}, +{-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1, -4}, +{-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1, -4}, +{-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2, -4}, +{ 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0, -4}, +{ 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0, -4}, +{-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2, -4}, +{-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1, -4}, +{ 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1, -4}, +{-2, -1, 3, 4, -3, 0, 1, -1, 0, -3, -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1, -4}, +{-1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4}, +{ 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1, -4}, +{-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1} +}; + +double gettime() { + struct timeval t; + gettimeofday(&t,NULL); + return t.tv_sec+t.tv_usec*1e-6; +} + +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int +main( int argc, char** argv) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + + printf("WG size of kernel = %d \n", BLOCK_SIZE); + + runTest( argc, argv); + + return EXIT_SUCCESS; +} + +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - x and y dimensions\n"); + fprintf(stderr, "\t - penalty(positive integer)\n"); + exit(1); +} + +void runTest( int argc, char** argv) +{ + int max_rows, max_cols, penalty, nblocks; + int *input_itemsets, *output_itemsets, *referrence; + int *matrix_cuda, *referrence_cuda; + int size; + + + // the lengths of the two sequences should be able to divided by 16. + // And at current stage max_rows needs to equal max_cols + if (argc == 4) + { + max_rows = atoi(argv[1]); + max_cols = atoi(argv[1]); + penalty = atoi(argv[2]); + nblocks = atoi(argv[3]); + } + else{ + usage(argc, argv); + } + + if(atoi(argv[1])%16!=0){ + fprintf(stderr,"The dimension values must be a multiple of 16\n"); + exit(1); + } + + + max_rows = max_rows + 1; + max_cols = max_cols + 1; + referrence = (int *)malloc( max_rows * max_cols * sizeof(int) ); + input_itemsets = (int *)malloc( max_rows * max_cols * sizeof(int) ); + output_itemsets = (int *)malloc( max_rows * max_cols * sizeof(int) ); + + + if (!input_itemsets) + fprintf(stderr, "error: can not allocate memory"); + + srand ( 7 ); + + + for (int i = 0 ; i < max_cols; i++){ + for (int j = 0 ; j < max_rows; j++){ + input_itemsets[i*max_cols+j] = 0; + } + } + + printf("Start Needleman-Wunsch\n"); + + for( int i=1; i< max_rows ; i++){ //please define your own sequence. + input_itemsets[i*max_cols] = rand() % 10 + 1; + } + for( int j=1; j< max_cols ; j++){ //please define your own sequence. + input_itemsets[j] = rand() % 10 + 1; + } + + + for (int i = 1 ; i < max_cols; i++){ + for (int j = 1 ; j < max_rows; j++){ + referrence[i*max_cols+j] = blosum62[input_itemsets[i*max_cols]][input_itemsets[j]]; + } + } + + for( int i = 1; i< max_rows ; i++) + input_itemsets[i*max_cols] = -i * penalty; + for( int j = 1; j< max_cols ; j++) + input_itemsets[j] = -j * penalty; + + + size = max_cols * max_rows; + + GPU_argv_init(); + initTrace(); + startCPU(); + + cudaMalloc((void**)& referrence_cuda, sizeof(int)*size); + cudaMalloc((void**)& matrix_cuda, sizeof(int)*size); + + cudaMemcpy(referrence_cuda, referrence, sizeof(int) * size, cudaMemcpyHostToDevice); + cudaMemcpy(matrix_cuda, input_itemsets, sizeof(int) * size, cudaMemcpyHostToDevice); + + dim3 dimGrid; + dim3 dimBlock(BLOCK_SIZE, 1); + // int block_width = ( max_cols - 1 )/BLOCK_SIZE; + int block_width = nblocks - 1; + int block_size = (max_cols - 1) / (nblocks * BLOCK_SIZE); + +#ifdef TIMING + gettimeofday(&tv_kernel_start, NULL); +#endif + + //printf("Processing top-left matrix\n"); + //process top-left matrix + for( int i = 1 ; i <= block_width ; i++) { + dimGrid.x = i; + dimGrid.y = 1; + needle_cuda_shared_1<<>>(referrence_cuda, matrix_cuda + ,max_cols, penalty, i, block_width, block_size); + } + //printf("Processing bottom-right matrix\n"); + //process bottom-right matrix + for( int i = block_width - 1 ; i >= 1 ; i--){ + dimGrid.x = i; + dimGrid.y = 1; + needle_cuda_shared_2<<>>(referrence_cuda, matrix_cuda + ,max_cols, penalty, i, block_width, block_size); + } + cudaDeviceSynchronize(); + +#ifdef TIMING + gettimeofday(&tv_kernel_end, NULL); + tvsub(&tv_kernel_end, &tv_kernel_start, &tv); + kernel_time += tv.tv_sec * 1000.0 + (float) tv.tv_usec / 1000.0; +#endif + + cudaMemcpy(output_itemsets, matrix_cuda, sizeof(int) * size, cudaMemcpyDeviceToHost); + + //sanjana - moved cudaFree up bc it's unused + cudaFree(referrence_cuda); + cudaFree(matrix_cuda); + + endCPU(); + finiTrace(); + +//#define TRACEBACK +#ifdef TRACEBACK + + FILE *fpo = fopen("result.txt","w"); + fprintf(fpo, "print traceback value GPU:\n"); + + for (int i = max_rows - 2, j = max_rows - 2; i>=0, j>=0;){ + int nw, n, w, traceback; + if ( i == max_rows - 2 && j == max_rows - 2 ) + fprintf(fpo, "%d ", output_itemsets[ i * max_cols + j]); //print the first element + if ( i == 0 && j == 0 ) + break; + if ( i > 0 && j > 0 ){ + nw = output_itemsets[(i - 1) * max_cols + j - 1]; + w = output_itemsets[ i * max_cols + j - 1 ]; + n = output_itemsets[(i - 1) * max_cols + j]; + } + else if ( i == 0 ){ + nw = n = LIMIT; + w = output_itemsets[ i * max_cols + j - 1 ]; + } + else if ( j == 0 ){ + nw = w = LIMIT; + n = output_itemsets[(i - 1) * max_cols + j]; + } + else{ + } + + //traceback = maximum(nw, w, n); + int new_nw, new_w, new_n; + new_nw = nw + referrence[i * max_cols + j]; + new_w = w - penalty; + new_n = n - penalty; + + traceback = maximum(new_nw, new_w, new_n); + if(traceback == new_nw) + traceback = nw; + if(traceback == new_w) + traceback = w; + if(traceback == new_n) + traceback = n; + + fprintf(fpo, "%d ", traceback); + + if(traceback == nw ) + {i--; j--; continue;} + + else if(traceback == w ) + {j--; continue;} + + else if(traceback == n ) + {i--; continue;} + + else + ; + } + + fclose(fpo); + +#endif + + free(referrence); + free(input_itemsets); + free(output_itemsets); + +#ifdef TIMING + printf("Exec: %f\n", kernel_time); +#endif +} + diff --git a/workloads/realworld/pinned/nw/needle.h b/workloads/realworld/pinned/nw/needle.h new file mode 100755 index 0000000000000000000000000000000000000000..e73320d6496262665592117d242e9bc383298b5b --- /dev/null +++ b/workloads/realworld/pinned/nw/needle.h @@ -0,0 +1,11 @@ +#ifdef RD_WG_SIZE_0_0 + #define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) + #define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) + #define BLOCK_SIZE RD_WG_SIZE +#else + #define BLOCK_SIZE 16 +#endif +//#define TRACE + diff --git a/workloads/realworld/pinned/nw/needle_kernel.cu b/workloads/realworld/pinned/nw/needle_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..d7b4a0a1984521fa4a3d0dab3e1a3b3645ad5a4c --- /dev/null +++ b/workloads/realworld/pinned/nw/needle_kernel.cu @@ -0,0 +1,197 @@ + +#include "needle.h" +#include + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SDATA( index) CUT_BANK_CHECKER(sdata, index) + +__device__ __host__ int +maximum( int a, + int b, + int c){ + +int k; +if( a <= b ) +k = b; +else +k = a; + +if( k <=c ) +return(c); +else +return(k); + +} + +__global__ void +needle_cuda_shared_1( int* referrence, + int* matrix_cuda, + int cols, + int penalty, + int i, + int block_width, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + int bx = blockIdx.x; + int tx = threadIdx.x; + + int b_index_x = bx; + int b_index_y = i - 1 - bx; + + __shared__ int temp[BLOCK_SIZE+1][BLOCK_SIZE+1]; + __shared__ int ref[BLOCK_SIZE][BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (b_index_y * gridDim.x + b_index_x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int b_index_x = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int b_index_y = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + int index = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( cols + 1 ); + int index_n = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( 1 ); + int index_w = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + ( cols ); + int index_nw = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x; + + if (tx == 0) + temp[tx][0] = matrix_cuda[index_nw]; + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + ref[ty][tx] = referrence[index + cols * ty]; + block.sync(); + + temp[tx + 1][0] = matrix_cuda[index_w + cols * tx]; + block.sync(); + + temp[0][tx + 1] = matrix_cuda[index_n]; + block.sync(); + + for( int m = 0 ; m < BLOCK_SIZE ; m++){ + if ( tx <= m ){ + int t_index_x = tx + 1; + int t_index_y = m - tx + 1; + + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[t_index_y-1][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + } + block.sync(); + } + + for( int m = BLOCK_SIZE - 2 ; m >=0 ; m--){ + if ( tx <= m){ + int t_index_x = tx + BLOCK_SIZE - m ; + int t_index_y = BLOCK_SIZE - tx; + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[t_index_y-1][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + + } + block.sync(); + } + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + matrix_cuda[index + ty * cols] = temp[ty+1][tx+1]; + } +} + + +__global__ void +needle_cuda_shared_2( int* referrence, + int* matrix_cuda, + int cols, + int penalty, + int i, + int block_width, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + int bx = blockIdx.x; + int tx = threadIdx.x; + + int b_index_x = bx + block_width - i; + int b_index_y = block_width - bx -1; + + __shared__ int temp[BLOCK_SIZE+1][BLOCK_SIZE+1]; + __shared__ int ref[BLOCK_SIZE][BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (b_index_y * gridDim.x + b_index_x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int b_index_x = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int b_index_y = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + int index = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( cols + 1 ); + int index_n = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( 1 ); + int index_w = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + ( cols ); + int index_nw = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x; + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + ref[ty][tx] = referrence[index + cols * ty]; + block.sync(); + + if (tx == 0) + temp[tx][0] = matrix_cuda[index_nw]; + temp[tx + 1][0] = matrix_cuda[index_w + cols * tx]; + block.sync(); + + temp[0][tx + 1] = matrix_cuda[index_n]; + block.sync(); + + for( int m = 0 ; m < BLOCK_SIZE ; m++){ + if ( tx <= m ){ + int t_index_x = tx + 1; + int t_index_y = m - tx + 1; + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[t_index_y-1][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + + } + block.sync(); + } + + for( int m = BLOCK_SIZE - 2 ; m >=0 ; m--){ + if ( tx <= m){ + int t_index_x = tx + BLOCK_SIZE - m ; + int t_index_y = BLOCK_SIZE - tx; + + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[t_index_y-1][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + } + block.sync(); + } + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + matrix_cuda[index + ty * cols] = temp[ty+1][tx+1]; + } +} + diff --git a/workloads/realworld/pinned/nw/run.sh b/workloads/realworld/pinned/nw/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..e3d20f9f4402de57c2a49c4db5c04d917907d741 --- /dev/null +++ b/workloads/realworld/pinned/nw/run.sh @@ -0,0 +1 @@ +./needle 32768 10 256 diff --git a/workloads/realworld/pinned/nw/run_super.sh b/workloads/realworld/pinned/nw/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..23b570be1ac96cce67094a9469de1c6d24c03b08 --- /dev/null +++ b/workloads/realworld/pinned/nw/run_super.sh @@ -0,0 +1 @@ +./needle 32768 10 64 diff --git a/workloads/realworld/pinned/pathfinder/Makefile b/workloads/realworld/pinned/pathfinder/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d740e79027f3651c458e229179bbbd46fb4fcbec --- /dev/null +++ b/workloads/realworld/pinned/pathfinder/Makefile @@ -0,0 +1,14 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc +INCLUDE := $(CUDA_DIR)/include + +SRC = pathfinder.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = pathfinder + +release: + $(CC) $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +clean: + rm -f pathfinder diff --git a/workloads/realworld/pinned/pathfinder/README b/workloads/realworld/pinned/pathfinder/README new file mode 100644 index 0000000000000000000000000000000000000000..9af75abe201eb95c5c89a038c6d79f54b276f94e --- /dev/null +++ b/workloads/realworld/pinned/pathfinder/README @@ -0,0 +1,6 @@ +To compile the program: + +nvcc -cuda dynproc.cu +nvcc -o dynproc dynproc.cu.cpp + +Usage: dynproc row_len col_len pyramid_height diff --git a/workloads/realworld/pinned/pathfinder/pathfinder.cu b/workloads/realworld/pinned/pathfinder/pathfinder.cu new file mode 100644 index 0000000000000000000000000000000000000000..a3c6069ab53f8197d1f61d5fdb417ffdba9be5cc --- /dev/null +++ b/workloads/realworld/pinned/pathfinder/pathfinder.cu @@ -0,0 +1,299 @@ +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#ifdef TIMING +#include "timing.h" + +struct timeval tv; +struct timeval tv_total_start, tv_total_end; +struct timeval tv_h2d_start, tv_h2d_end; +struct timeval tv_d2h_start, tv_d2h_end; +struct timeval tv_kernel_start, tv_kernel_end; +struct timeval tv_mem_alloc_start, tv_mem_alloc_end; +struct timeval tv_close_start, tv_close_end; +float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, + d2h_time = 0, close_time = 0, total_time = 0; +#endif + +#define BLOCK_SIZE 256 +#define STR_SIZE 256 +#define DEVICE 0 +#define HALO 1 // halo width along one direction when advancing to the next iteration + +// #define BENCH_PRINT + +void run(int argc, char **argv); + +int rows, cols; +int *data; +int **wall; +int *result; +#define M_SEED 9 +int pyramid_height; +int nblocks; + +void init(int argc, char **argv) +{ + if (argc == 5) + { + cols = atoi(argv[1]); + rows = atoi(argv[2]); + pyramid_height = atoi(argv[3]); + nblocks = atoi(argv[4]); + } + else + { + printf("Usage: dynproc row_len col_len pyramid_height\n"); + exit(0); + } + data = new int[rows * cols]; + wall = new int *[rows]; + for (int n = 0; n < rows; n++) + wall[n] = data + cols * n; + result = new int[cols]; + + int seed = M_SEED; + srand(seed); + + for (int i = 0; i < rows; i++) + { + for (int j = 0; j < cols; j++) + { + wall[i][j] = rand() % 10; + } + } +#ifdef BENCH_PRINT + for (int i = 0; i < rows; i++) + { + for (int j = 0; j < cols; j++) + { + printf("%d ", wall[i][j]); + } + printf("\n"); + } +#endif +} + +void fatal(char *s) +{ + fprintf(stderr, "error: %s\n", s); +} + +#define IN_RANGE(x, min, max) ((x) >= (min) && (x) <= (max)) +#define CLAMP_RANGE(x, min, max) x = (x < (min)) ? min : ((x > (max)) ? max : x) +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +__global__ void dynproc_kernel( + int iteration, + int *gpuWall, + int *gpuSrc, + int *gpuResults, + int cols, + int rows, + int startStep, + int border, + int small_block_cols, + int tile_size, + int batches) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + __shared__ int prev[BLOCK_SIZE]; + __shared__ int result[BLOCK_SIZE]; + + int bx = blockIdx.x; + int tx = threadIdx.x; + + for (int b = 0; b < batches; b++) + { + // each block finally computes result for a small block + // after N iterations. + // it is the non-overlapping small blocks that cover + // all the input data + + // calculate the boundary for the block according to + // the boundary of its small block + int blkX = bx * tile_size + small_block_cols * b - border; + int blkXmax = blkX + BLOCK_SIZE - 1; + + // calculate the global thread coordination + int xidx = blkX + tx; + + // effective range within this block that falls within + // the valid range of the input data + // used to rule out computation outside the boundary. + int validXmin = (blkX < 0) ? -blkX : 0; + int validXmax = (blkXmax > cols - 1) ? BLOCK_SIZE - 1 - (blkXmax - cols + 1) : BLOCK_SIZE - 1; + + int W = tx - 1; + int E = tx + 1; + + W = (W < validXmin) ? validXmin : W; + E = (E > validXmax) ? validXmax : E; + + bool isValid = IN_RANGE(tx, validXmin, validXmax); + + if (IN_RANGE(xidx, 0, cols - 1)) + { + prev[tx] = gpuSrc[xidx]; + } + block.sync(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012 + bool computed; + for (int i = 0; i < iteration; i++) + { + computed = false; + if (IN_RANGE(tx, i + 1, BLOCK_SIZE - i - 2) && + isValid) + { + computed = true; + int left = prev[W]; + int up = prev[tx]; + int right = prev[E]; + int shortest = MIN(left, up); + shortest = MIN(shortest, right); + int index = cols * (startStep + i) + xidx; + result[tx] = shortest + gpuWall[index]; + } + block.sync(); + if (i == iteration - 1) + break; + if (computed) // Assign the computation range + prev[tx] = result[tx]; + block.sync(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012 + } + + // update the global memory + // after the last iteration, only threads coordinated within the + // small block perform the calculation and switch on ``computed'' + if (computed) + { + gpuResults[xidx] = result[tx]; + } + } + + +} + +/* + compute N time steps +*/ +int calc_path(int *gpuWall, int *gpuResult[2], int rows, int cols, + int pyramid_height, int blockCols, int borderCols, int tile_size, int batches) +{ + dim3 dimBlock(BLOCK_SIZE); + dim3 dimGrid(nblocks); + + int src = 1, dst = 0; + for (int t = 0; t < rows - 1; t += pyramid_height) + { + int temp = src; + src = dst; + dst = temp; + + int iteration = MIN(pyramid_height, rows - t - 1); + int small_block_cols = BLOCK_SIZE - iteration * HALO * 2; + dynproc_kernel<<>>( + iteration, gpuWall, gpuResult[src], gpuResult[dst], + cols, rows, t, borderCols, small_block_cols, tile_size, batches); + + // for the measurement fairness + cudaDeviceSynchronize(); + } + return dst; +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + GPU_argv_init(); + + run(argc, argv); + + return EXIT_SUCCESS; +} + +void run(int argc, char **argv) +{ + init(argc, argv); + + /* --------------- pyramid parameters --------------- */ + int borderCols = (pyramid_height)*HALO; + int smallBlockCol = BLOCK_SIZE - (pyramid_height)*HALO * 2; + int blockCols = cols / smallBlockCol + ((cols % smallBlockCol == 0) ? 0 : 1); + + //ruihao + int cols_per_block = cols / nblocks; + if (cols_per_block < BLOCK_SIZE) cols_per_block = BLOCK_SIZE; + int batches = cols_per_block / smallBlockCol + ((cols_per_block % smallBlockCol == 0) ? 0 : 1); + + // printf("pyramidHeight: %d\ngridSize: [%d]\nborder:[%d]\nblockSize: %d\nblockGrid:[%d]\ntargetBlock:[%d]\n", + // pyramid_height, cols, borderCols, BLOCK_SIZE, blockCols, smallBlockCol); + printf("pyramidHeight: %d\ngridSize: [%d]\nborder:[%d]\nblockSize: %d\nblockGrid:[%d]\ntargetBlock:[%d]\n", + pyramid_height, cols, borderCols, BLOCK_SIZE, nblocks, smallBlockCol); + + int *gpuWall, *gpuResult[2]; + int size = rows * cols; + + initTrace(); + startCPU(); + + cudaMalloc((void **)&gpuResult[0], sizeof(int) * cols); + cudaMalloc((void **)&gpuResult[1], sizeof(int) * cols); + cudaMemcpy(gpuResult[0], data, sizeof(int) * cols, cudaMemcpyHostToDevice); + cudaMalloc((void **)&gpuWall, sizeof(int) * (size - cols)); + cudaMemcpy(gpuWall, data + cols, sizeof(int) * (size - cols), cudaMemcpyHostToDevice); + +#ifdef TIMING + gettimeofday(&tv_kernel_start, NULL); +#endif + + // int final_ret = calc_path(gpuWall, gpuResult, rows, cols, + // pyramid_height, blockCols, borderCols); + int final_ret = calc_path(gpuWall, gpuResult, rows, cols, + pyramid_height, blockCols, borderCols, cols_per_block, batches); + +#ifdef TIMING + gettimeofday(&tv_kernel_end, NULL); + tvsub(&tv_kernel_end, &tv_kernel_start, &tv); + kernel_time += tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; +#endif + + cudaMemcpy(result, gpuResult[final_ret], sizeof(int) * cols, cudaMemcpyDeviceToHost); + +#ifdef BENCH_PRINT + for (int i = 0; i < cols; i++) + printf("%d ", data[i]); + printf("\n"); + for (int i = 0; i < cols; i++) + printf("%d ", result[i]); + printf("\n"); +#endif + + cudaFree(gpuWall); + cudaFree(gpuResult[0]); + cudaFree(gpuResult[1]); + + endCPU(); + finiTrace(); + + delete[] data; + delete[] wall; + delete[] result; + +#ifdef TIMING + printf("Exec: %f\n", kernel_time); +#endif +} diff --git a/workloads/realworld/pinned/pathfinder/result.txt b/workloads/realworld/pinned/pathfinder/result.txt new file mode 100644 index 0000000000000000000000000000000000000000..fa67c591d071682e1842a455f4477b397825e250 --- /dev/null +++ b/workloads/realworld/pinned/pathfinder/result.txt @@ -0,0 +1,11 @@ +pyramidHeight: 20 +gridSize: [100000] +border:[20] +blockSize: 256 +blockGrid:[463] +targetBlock:[216] +CUPTI dynproc_kernel iter 0 start: 1679530155077329959 end: 1679530155078946951 +CUPTI dynproc_kernel iter 20 start: 1679530155078953603 end: 1679530155081441509 +CUPTI dynproc_kernel iter 40 start: 1679530155081441880 end: 1679530155083936228 +CUPTI dynproc_kernel iter 60 start: 1679530155083936508 end: 1679530155086433891 +CUPTI dynproc_kernel iter 80 start: 1679530155086434172 end: 1679530155088929332 diff --git a/workloads/realworld/pinned/pathfinder/run.sh b/workloads/realworld/pinned/pathfinder/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..11a9e6199ea4b2ff7fd3e0ebf893dc96fa89ff45 --- /dev/null +++ b/workloads/realworld/pinned/pathfinder/run.sh @@ -0,0 +1,2 @@ +#./pathfinder 100000 100 20 1024 > result.txt +./pathfinder 10000000 100 20 1024 diff --git a/workloads/realworld/pinned/pathfinder/run_super.sh b/workloads/realworld/pinned/pathfinder/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..b35cc0b44511def3912323c1e0d58c2daa280722 --- /dev/null +++ b/workloads/realworld/pinned/pathfinder/run_super.sh @@ -0,0 +1 @@ +./pathfinder 10000000 100 20 1024 diff --git a/workloads/realworld/pinned/srad/Makefile b/workloads/realworld/pinned/srad/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..47da520a663446e36c04461d54cfbb3d12cfa328 --- /dev/null +++ b/workloads/realworld/pinned/srad/Makefile @@ -0,0 +1,15 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -std=c++11 -arch=sm_80 -O3 + +SRC = srad.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = srad + +release: $(SRC) + $(CC) $(KERNEL_DIM) $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt diff --git a/workloads/realworld/pinned/srad/Makefile_nvidia b/workloads/realworld/pinned/srad/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..e1f345c41c0f838dcf159958f628276455ef4dd7 --- /dev/null +++ b/workloads/realworld/pinned/srad/Makefile_nvidia @@ -0,0 +1,22 @@ +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := srad +# CUDA source files (compiled with cudacc) +CUFILES := srad.cu +# CUDA dependency files +CU_DEPS := \ + srad_kernel.cu \ + +# C/C++ source files (compiled with gcc / c++) +CCFILES := \ + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/pinned/srad/README b/workloads/realworld/pinned/srad/README new file mode 100755 index 0000000000000000000000000000000000000000..91e803b576bdeebe232c00a5112dadd836ffc33f --- /dev/null +++ b/workloads/realworld/pinned/srad/README @@ -0,0 +1,24 @@ +In srad.h, define either GPU or CPU computation +Currently, the GPU implementation can only support x-, y-dimensions that can be divided by 16. + +Usage: +srad 128 128 0 31 0 31 0.5 2 + +128 //number of rows in the domain +128 //number of cols in the domain +0 //y1 position of the speckle +31 //y2 position of the speckle +0 //x1 position of the speckle +31 //x2 position of the speckle +0.5 //Lambda value +2 //number of iterations + + +******Adjustable work group size***** +The kernel has square shape +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe one dimesion +The total thread number for one block is RD_WG_SIZE_0*RD_WG_SIZE_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" \ No newline at end of file diff --git a/workloads/realworld/pinned/srad/run.sh b/workloads/realworld/pinned/srad/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..46b08d0b82af4817b9d19fe704601ee98d058357 --- /dev/null +++ b/workloads/realworld/pinned/srad/run.sh @@ -0,0 +1,5 @@ +./srad 8192 8192 0 127 0 127 0.5 2 8 + +# ./srad 128 128 0 127 0 127 0.5 2 8 + +# ./srad 16384 16384 0 16383 0 16383 0.5 2 8 diff --git a/workloads/realworld/pinned/srad/run_super.sh b/workloads/realworld/pinned/srad/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2d0f1b8ebd049e33bbb74a15722fc7172167641f --- /dev/null +++ b/workloads/realworld/pinned/srad/run_super.sh @@ -0,0 +1 @@ +./srad 32768 32768 0 127 0 127 0.5 2 8 \ No newline at end of file diff --git a/workloads/realworld/pinned/srad/srad b/workloads/realworld/pinned/srad/srad new file mode 100755 index 0000000000000000000000000000000000000000..19a684385466c9a5e886b2883fe277c135c1a414 Binary files /dev/null and b/workloads/realworld/pinned/srad/srad differ diff --git a/workloads/realworld/pinned/srad/srad.cu b/workloads/realworld/pinned/srad/srad.cu new file mode 100755 index 0000000000000000000000000000000000000000..2dd1c82c5b3ee97949a20969ddf1d16ca4cf1909 --- /dev/null +++ b/workloads/realworld/pinned/srad/srad.cu @@ -0,0 +1,299 @@ +// includes, system +#include +#include +#include +#include +#include "srad.h" +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +// includes, project +#include +#define GPU + +// includes, kernels +#include "srad_kernel.cu" + +void random_matrix(float *I, int rows, int cols); +void runTest( int argc, char** argv); +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - number of rows\n"); + fprintf(stderr, "\t - number of cols\n"); + fprintf(stderr, "\t - y1 value of the speckle\n"); + fprintf(stderr, "\t - y2 value of the speckle\n"); + fprintf(stderr, "\t - x1 value of the speckle\n"); + fprintf(stderr, "\t - x2 value of the speckle\n"); + fprintf(stderr, "\t - lambda (0,1)\n"); + fprintf(stderr, "\t - number of iterations\n"); + + exit(1); +} +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE); + runTest( argc, argv); + + return EXIT_SUCCESS; +} + +void +runTest( int argc, char** argv) +{ + int rows, cols, size_I, size_R, niter = 10, iter, nblocks; + float *I, *J, lambda, q0sqr, sum, sum2, tmp, meanROI,varROI ; + +#ifdef CPU + float Jc, G2, L, num, den, qsqr; + int *iN,*iS,*jE,*jW, k; + float *dN,*dS,*dW,*dE; + float cN,cS,cW,cE,D; +#endif + +#ifdef GPU + + float *J_cuda; + float *C_cuda; + float *E_C, *W_C, *N_C, *S_C; + +#endif + + unsigned int r1, r2, c1, c2; + float *c; + + + + if (argc == 10) + { + rows = atoi(argv[1]); //number of rows in the domain + cols = atoi(argv[2]); //number of cols in the domain + if ((rows%16!=0) || (cols%16!=0)){ + fprintf(stderr, "rows and cols must be multiples of 16\n"); + exit(1); + } + r1 = atoi(argv[3]); //y1 position of the speckle + r2 = atoi(argv[4]); //y2 position of the speckle + c1 = atoi(argv[5]); //x1 position of the speckle + c2 = atoi(argv[6]); //x2 position of the speckle + lambda = atof(argv[7]); //Lambda value + niter = atoi(argv[8]); //number of iterations + nblocks = atoi(argv[9]); // number of blocks + } + else{ + usage(argc, argv); + } + + size_I = cols * rows; + size_R = (r2-r1+1)*(c2-c1+1); + + I = (float *)malloc( size_I * sizeof(float) ); + J = (float *)malloc( size_I * sizeof(float) ); + c = (float *)malloc(sizeof(float)* size_I) ; + + +#ifdef CPU + + iN = (int *)malloc(sizeof(unsigned int*) * rows) ; + iS = (int *)malloc(sizeof(unsigned int*) * rows) ; + jW = (int *)malloc(sizeof(unsigned int*) * cols) ; + jE = (int *)malloc(sizeof(unsigned int*) * cols) ; + + + dN = (float *)malloc(sizeof(float)* size_I) ; + dS = (float *)malloc(sizeof(float)* size_I) ; + dW = (float *)malloc(sizeof(float)* size_I) ; + dE = (float *)malloc(sizeof(float)* size_I) ; + + + for (int i=0; i< rows; i++) { + iN[i] = i-1; + iS[i] = i+1; + } + for (int j=0; j< cols; j++) { + jW[j] = j-1; + jE[j] = j+1; + } + iN[0] = 0; + iS[rows-1] = rows-1; + jW[0] = 0; + jE[cols-1] = cols-1; + +#endif +GPU_argv_init(); +initTrace(); +startCPU(); + +#ifdef GPU + //Allocate device memory + cudaMalloc((void**)& J_cuda, sizeof(float)* size_I); + cudaMalloc((void**)& C_cuda, sizeof(float)* size_I); + cudaMalloc((void**)& E_C, sizeof(float)* size_I); + cudaMalloc((void**)& W_C, sizeof(float)* size_I); + cudaMalloc((void**)& S_C, sizeof(float)* size_I); + cudaMalloc((void**)& N_C, sizeof(float)* size_I); + + (E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr, cols / nblocks); +#endif + + printf("Randomizing the input matrix\n"); + //Generate a random matrix + random_matrix(I, rows, cols); + + for (int k = 0; k < size_I; k++ ) { + J[k] = (float)exp(I[k]) ; + } + printf("Start the SRAD main loop\n"); + for (iter=0; iter< niter; iter++){ + sum=0; sum2=0; + for (int i=r1; i<=r2; i++) { + for (int j=c1; j<=c2; j++) { + tmp = J[i * cols + j]; + sum += tmp ; + sum2 += tmp*tmp; + } + } + meanROI = sum / size_R; + varROI = (sum2 / size_R) - meanROI*meanROI; + q0sqr = varROI / (meanROI*meanROI); + +#ifdef CPU + + for (int i = 0 ; i < rows ; i++) { + for (int j = 0; j < cols; j++) { + + k = i * cols + j; + Jc = J[k]; + + // directional derivates + dN[k] = J[iN[i] * cols + j] - Jc; + dS[k] = J[iS[i] * cols + j] - Jc; + dW[k] = J[i * cols + jW[j]] - Jc; + dE[k] = J[i * cols + jE[j]] - Jc; + + G2 = (dN[k]*dN[k] + dS[k]*dS[k] + + dW[k]*dW[k] + dE[k]*dE[k]) / (Jc*Jc); + + L = (dN[k] + dS[k] + dW[k] + dE[k]) / Jc; + + num = (0.5*G2) - ((1.0/16.0)*(L*L)) ; + den = 1 + (.25*L); + qsqr = num/(den*den); + + // diffusion coefficent (equ 33) + den = (qsqr-q0sqr) / (q0sqr * (1+q0sqr)) ; + c[k] = 1.0 / (1.0+den) ; + + // saturate diffusion coefficent + if (c[k] < 0) {c[k] = 0;} + else if (c[k] > 1) {c[k] = 1;} + } + } + + for (int i = 0; i < rows; i++) { + for (int j = 0; j < cols; j++) { + + // current index + k = i * cols + j; + + // diffusion coefficent + cN = c[k]; + cS = c[iS[i] * cols + j]; + cW = c[k]; + cE = c[i * cols + jE[j]]; + + // divergence (equ 58) + D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k]; + + // image update (equ 61) + J[k] = J[k] + 0.25*lambda*D; + } + } + +#endif // CPU + + +#ifdef GPU + + //Currently the input size must be divided by 16 - the block size + + // ruihao + int block_x = cols/BLOCK_SIZE ; + int block_y = rows/BLOCK_SIZE ; + + if (nblocks > block_x) nblocks = block_x; + + dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE); + // dim3 dimGrid(block_x, block_y); + dim3 dimGrid(nblocks, nblocks); + // ruihao + + //Copy data from main memory to device memory + cudaMemcpy(J_cuda, J, sizeof(float) * size_I, cudaMemcpyHostToDevice); + + //Run kernels + // srad_cuda_1<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr); + // srad_cuda_2<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, lambda, q0sqr); + srad_cuda_1<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr, cols / nblocks); + srad_cuda_2<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, lambda, q0sqr, cols / nblocks); + //Copy data from device memory to main memory + cudaMemcpy(J, J_cuda, sizeof(float) * size_I, cudaMemcpyDeviceToHost); + +#endif +} + + cudaThreadSynchronize(); + + #ifdef GPU + cudaFree(C_cuda); + cudaFree(J_cuda); + cudaFree(E_C); + cudaFree(W_C); + cudaFree(N_C); + cudaFree(S_C); +#endif +endCPU(); +finiTrace(); + +#ifdef OUTPUT + //Printing output + printf("Printing Output:\n"); + for( int i = 0 ; i < rows ; i++){ + for ( int j = 0 ; j < cols ; j++){ + printf("%.5f ", J[i * cols + j]); + } + printf("\n"); + } +#endif + + printf("Computation Done\n"); + + free(I); + free(J); +#ifdef CPU + free(iN); free(iS); free(jW); free(jE); + free(dN); free(dS); free(dW); free(dE); +#endif + free(c); + +} + + +void random_matrix(float *I, int rows, int cols){ + + srand(7); + + for( int i = 0 ; i < rows ; i++){ + for ( int j = 0 ; j < cols ; j++){ + I[i * cols + j] = rand()/(float)RAND_MAX ; + } + } + +} + diff --git a/workloads/realworld/pinned/srad/srad.h b/workloads/realworld/pinned/srad/srad.h new file mode 100755 index 0000000000000000000000000000000000000000..2b2adb6d956b697c5b0ace9bccb89162ef98be50 --- /dev/null +++ b/workloads/realworld/pinned/srad/srad.h @@ -0,0 +1,16 @@ +#define STR_SIZE 256 + +#ifdef RD_WG_SIZE_0_0 + #define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) + #define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) + #define BLOCK_SIZE RD_WG_SIZE +#else + #define BLOCK_SIZE 16 +#endif + +#define GPU +#define TIMER +//#define OUTPUT + diff --git a/workloads/realworld/pinned/srad/srad_kernel.cu b/workloads/realworld/pinned/srad/srad_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..a81386576462e82375b7826f9b203005cca803ab --- /dev/null +++ b/workloads/realworld/pinned/srad/srad_kernel.cu @@ -0,0 +1,316 @@ +#include "srad.h" +#include + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +__global__ void +srad_cuda_1( + float *E_C, + float *W_C, + float *N_C, + float *S_C, + float *J_cuda, + float *C_cuda, + int cols, + int rows, + float q0sqr, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // shared memory allocation + __shared__ float temp[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float temp_result[BLOCK_SIZE * BLOCK_SIZE]; + + __shared__ float north[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float south[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float east[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float west[BLOCK_SIZE * BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + // block id + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + // thread id + int tx = threadIdx.x; + int ty = threadIdx.y; + + // indices + int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; + int index_n = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + tx - cols; + int index_s = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * BLOCK_SIZE + tx; + int index_w = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty - 1; + int index_e = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + BLOCK_SIZE; + + if (index_n < 0) index_n = 0; + if (index_s >= (cols * rows)) index_s = cols * rows - 1; + if (index_w < 0) index_w = 0; + if (index_e >= (cols * rows)) index_e = cols * rows - 1; + + float n, w, e, s, jc, g2, l, num, den, qsqr, c; + + // load data to shared memory + north[ty * BLOCK_SIZE + tx] = J_cuda[index_n]; + south[ty * BLOCK_SIZE + tx] = J_cuda[index_s]; + if (by == 0) + { + north[ty * BLOCK_SIZE + tx] = J_cuda[BLOCK_SIZE * bx + tx]; + } + else if (by == tile_dim_x - 1) + { + south[ty * BLOCK_SIZE + tx] = J_cuda[cols * BLOCK_SIZE * (tile_dim_x - 1) + BLOCK_SIZE * bx + cols * (BLOCK_SIZE - 1) + tx]; + } + block.sync(); + + west[ty * BLOCK_SIZE + tx] = J_cuda[index_w]; + east[ty * BLOCK_SIZE + tx] = J_cuda[index_e]; + + if (bx == 0) + { + west[ty * BLOCK_SIZE + tx] = J_cuda[cols * BLOCK_SIZE * by + cols * ty]; + } + else if (bx == tile_dim_x - 1) + { + east[ty * BLOCK_SIZE + tx] = J_cuda[cols * BLOCK_SIZE * by + BLOCK_SIZE * (tile_dim_x - 1) + cols * ty + BLOCK_SIZE - 1]; + } + + block.sync(); + temp[ty * BLOCK_SIZE + tx] = J_cuda[index]; + + block.sync(); + + jc = temp[ty * BLOCK_SIZE + tx]; + + if (ty == 0 && tx == 0) + { // nw + n = north[ty * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = west[ty * BLOCK_SIZE + tx] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (ty == 0 && tx == BLOCK_SIZE - 1) + { // ne + n = north[ty * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = east[ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1 && tx == BLOCK_SIZE - 1) + { // se + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[ty * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = east[ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1 && tx == 0) + { // sw + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[ty * BLOCK_SIZE + tx] - jc; + w = west[ty * BLOCK_SIZE + tx] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + + else if (ty == 0) + { // n + n = north[ty * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (tx == BLOCK_SIZE - 1) + { // e + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = east[ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1) + { // s + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[ty * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (tx == 0) + { // w + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = west[ty * BLOCK_SIZE + tx] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + else + { // the data elements which are not on the borders + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + + g2 = (n * n + s * s + w * w + e * e) / (jc * jc); + + l = (n + s + w + e) / jc; + + num = (0.5 * g2) - ((1.0 / 16.0) * (l * l)); + den = 1 + (.25 * l); + qsqr = num / (den * den); + + // diffusion coefficent (equ 33) + den = (qsqr - q0sqr) / (q0sqr * (1 + q0sqr)); + c = 1.0 / (1.0 + den); + + // saturate diffusion coefficent + if (c < 0) + { + temp_result[ty * BLOCK_SIZE + tx] = 0; + } + else if (c > 1) + { + temp_result[ty * BLOCK_SIZE + tx] = 1; + } + else + { + temp_result[ty * BLOCK_SIZE + tx] = c; + } + + block.sync(); + + C_cuda[index] = temp_result[ty * BLOCK_SIZE + tx]; + E_C[index] = e; + W_C[index] = w; + S_C[index] = s; + N_C[index] = n; + } +} + +__global__ void +srad_cuda_2( + float *E_C, + float *W_C, + float *N_C, + float *S_C, + float *J_cuda, + float *C_cuda, + int cols, + int rows, + float lambda, + float q0sqr, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // shared memory allocation + __shared__ float south_c[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float east_c[BLOCK_SIZE * BLOCK_SIZE]; + + __shared__ float c_cuda_temp[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float c_cuda_result[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float temp[BLOCK_SIZE * BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + //block id + int bx = tile % tile_dim_x; + int by = tile / tile_dim_x; + + //thread id + int tx = threadIdx.x; + int ty = threadIdx.y; + + // indices + int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; + int index_s = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * BLOCK_SIZE + tx; + int index_e = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + BLOCK_SIZE; + + if (index_s >= (cols * rows)) index_s = cols * rows - 1; + if (index_e >= (cols * rows)) index_e = cols * rows - 1; + + float cc, cn, cs, ce, cw, d_sum; + + // load data to shared memory + temp[ty * BLOCK_SIZE + tx] = J_cuda[index]; + block.sync(); + + south_c[ty * BLOCK_SIZE + tx] = C_cuda[index_s]; + if (by == tile_dim_x - 1) + { + south_c[ty * BLOCK_SIZE + tx] = C_cuda[cols * BLOCK_SIZE * (tile_dim_x - 1) + BLOCK_SIZE * bx + cols * (BLOCK_SIZE - 1) + tx]; + } + block.sync(); + + east_c[ty * BLOCK_SIZE + tx] = C_cuda[index_e]; + if (bx == tile_dim_x - 1) + { + east_c[ty * BLOCK_SIZE + tx] = C_cuda[cols * BLOCK_SIZE * by + BLOCK_SIZE * (tile_dim_x - 1) + cols * ty + BLOCK_SIZE - 1]; + } + block.sync(); + + c_cuda_temp[ty * BLOCK_SIZE + tx] = C_cuda[index]; + block.sync(); + cc = c_cuda_temp[ty * BLOCK_SIZE + tx]; + + if (ty == BLOCK_SIZE - 1 && tx == BLOCK_SIZE - 1) + { // se + cn = cc; + cs = south_c[ty * BLOCK_SIZE + tx]; + cw = cc; + ce = east_c[ty * BLOCK_SIZE + tx]; + } + else if (tx == BLOCK_SIZE - 1) + { // e + cn = cc; + cs = c_cuda_temp[(ty + 1) * BLOCK_SIZE + tx]; + cw = cc; + ce = east_c[ty * BLOCK_SIZE + tx]; + } + else if (ty == BLOCK_SIZE - 1) + { // s + cn = cc; + cs = south_c[ty * BLOCK_SIZE + tx]; + cw = cc; + ce = c_cuda_temp[ty * BLOCK_SIZE + tx + 1]; + } + else + { // the data elements which are not on the borders + cn = cc; + cs = c_cuda_temp[(ty + 1) * BLOCK_SIZE + tx]; + cw = cc; + ce = c_cuda_temp[ty * BLOCK_SIZE + tx + 1]; + } + + // divergence (equ 58) + d_sum = cn * N_C[index] + cs * S_C[index] + cw * W_C[index] + ce * E_C[index]; + + // image update (equ 61) + c_cuda_result[ty * BLOCK_SIZE + tx] = temp[ty * BLOCK_SIZE + tx] + 0.25 * lambda * d_sum; + + block.sync(); + + J_cuda[index] = c_cuda_result[ty * BLOCK_SIZE + tx]; + } +} diff --git a/workloads/realworld/pipeline/darknet/LICENSE b/workloads/realworld/pipeline/darknet/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..a50f7d700ba02bfacd50f59b315311cf4d0bbda2 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/LICENSE @@ -0,0 +1,12 @@ + YOLO LICENSE + Version 2, July 29 2016 + +THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER +SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN +TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES +LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY. NOW HERE'S +THE REAL LICENSE: + +0. Darknet is public domain. +1. Do whatever you want with it. +2. Stop emailing me about it! diff --git a/workloads/realworld/pipeline/darknet/LICENSE.fuck b/workloads/realworld/pipeline/darknet/LICENSE.fuck new file mode 100644 index 0000000000000000000000000000000000000000..8b1a9d8189b3b9f4479221d52882ce36fdc73a62 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/LICENSE.fuck @@ -0,0 +1,13 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + +Copyright (C) 2004 Sam Hocevar + +Everyone is permitted to copy and distribute verbatim or modified +copies of this license document, and changing it is allowed as long +as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. diff --git a/workloads/realworld/pipeline/darknet/LICENSE.gen b/workloads/realworld/pipeline/darknet/LICENSE.gen new file mode 100644 index 0000000000000000000000000000000000000000..c54113271e15057c4def6676693eb96fd6362b28 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/LICENSE.gen @@ -0,0 +1,91 @@ +RNN LICENSE Version 3, June 21 2017 + +Copyright (c) 1990, 1989, 1999 Free87337 May 48 THIRD PARTIES OR ANY OTHER THE +COMPLAIN OR CONSEQUENTIAL DAMAGES AND REGARDLESS OF WHETHER IN CONTRACT, TO THE +EXTENT REPAIR OR AGENTS (NOT THE IN ANY EVENT). THE SOFTWARE WILL BE +UNINTERRUPTED OR ERROR-FREE OR ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF ALL THE WORK (GOVERNED CODE) HIM RESPONSES, OR OF FINES, +SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR ANY OTHER OR OTHER HARL UNDER NO +CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), +PATENT PERMITTED BY THE INSTAGRAM PARENT STATE OR TORT (INCLUDING NEGLIGENCE), +PRODUCT LIABILITY OR OTHERWISE, ARISING OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR ANYTHING PROVIDED IN THIS PRODUCT, COMMIS AND SERVICES +ARE LICENSED SOFTWARE AND ANY RESULE OR ANY OTHER THE COPYRIGHT HOLDERS BE +LIABLE FOR ANY SPECIAL, INCIDENTAL, CASE, SUCH WARRANTIES, EXPRESS OR IMPLIED, +INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COPYRIGHT HOLDERS AND/OR ANY +PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY +EXPRESS OR DISTRIBUTE THAT ALL CLAIMS ARE SHALL CREATE DERAVE BE LIABLE TO YOU +WILL HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +6\. TERMINATION. TO THE EXTENT PERMITTED BY LAW, NO USE OF THE COVERED CODE IS +WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE +INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY +SERVICING, REPAIR OR COULT OR IN ANY WAY OUT OF THE USE OF THE WEBSITES OR +SERVICE WILL BE CONSEQUENTIAL DAMAGES OF ANY KIND HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + +This paragraph Agreement constitutes the entire agreement between the parties +with respect to the Work licensed here. However, if you place the name of the +fact that the arbitration was the consultation of the parties as a "patent is". +Subject to the terms and conditions of this License, Contributor has knowledge +that a license under a third party may also be used to endorse or promote +products derived from the Work, and there is no warranty on the Software and +Science Fees. For the purposes of this Agreement, attach the following +disclaimers (without liabilities of written notice to the Subject Software) in a +manner that a product is under common control with you. The Free Software +Foundation may publish revised and/or new versions of the License for the +Modifications made by the applicable terms. The Recipient shall promptly retain +the covered works for any reason be entered in any federal or state or login +Restricted Laws appearing in the United States or any of its own information +that is not disabled from a derivative work except as expressly permitted in +this License, to the extent that they are in receiving the Software and Source +Code or any exercise of the rights granted to You by this License or a +Contributor made by the Licensor or are authorized to make a reasonable +retirement by the courts of the courts located in Santa Clara County, California +printed and related to the Work or “Company” and Apache Software Foundation. If +the Licensor shall be entitled to reflect your rights to use the Software and +the Software to exercise the rights granted to the recipient without a +requirement to exercise the rights granted by the Agreement to the provision +will begin will appear in such cases, you will use such information without such +corporation shall be an officer with respect to any part of the Software or any +portion thereof. Capitalized terms are included in the Initial Contributor and +under no circumstances will license the Service at any time and for any direct, +indirect, special, incidental, or consequential damages of or assist in +connection with any Services or the registration purposes only to the extent +that it includes any or all means including the processing of which you download +any derivative work. Any of the purchases’ transmission purposes are made +available, if any, in other circumstances, we may review the copyright notice. +In the event that this Agreement is required to give us strict content. The +inclusion of the other party hereunder may also notify you Intellectual Property +Rights to any third party. This means that the Source Code exists of the Work +will not charge a program available to you at any time. You must include a +prominent statement that the Software is governed under a particular version of +this Agreement. You must include a provision to the extent that there is no +warranty for the content of others. You agree that the Recipient was appointed +as a Contributor, (c) are effective until terminated by hereunder, then the +registration are not disabled and not limited to, submit any Customer Data +without the updated use of the Software and that no fee is released. You grant +to Use Other Arbitration Rules for Diagnostic or Services may use or modify the +Apple Software and Consolidated Apple Software or Services. The Company may have +full risk as a product of the Compatible Source. A Contribution by the Licensor +or by the updated Software under the following conditions we can redistribute +any General Provision of this Agreement. If the Program is used in accordance +with the terms of this Agreement, Customer may provide advertisements from your +devices that clause you can your employer or a transaction or country that has +been controlled by the arbitrator, that they will be useful of this Agreement. +The term "Open Source Software is available in connection with the program, and +you may not protect the combination of the Covered Code. You should like to +select a user's rights to charge a copy of this License. I are Contributor's +confidentiality of the exercise of the rights granted herein. Such a covered +work is released as a consequence, the Licensor shall be eligible for a purpose +or subcontractor of the person or entity to the user of the user, then the word +"Application" means having the original fee for any reason; and that no patent +license to more than fifty stated close of the license term. The terms of this +License will the license terms and conditions set forth in Section 2.2 (OPEC) +and You will not use the Software or any set of responsibility for any resulting +information that the Original Code warrants that you have the right to disclose +these information (or in the notification; or (iii) late use of the software or +any third party to the three (50) days before such belief to the extent that it +includes a court court obtains the rights granted by this License. diff --git a/workloads/realworld/pipeline/darknet/LICENSE.gpl b/workloads/realworld/pipeline/darknet/LICENSE.gpl new file mode 100644 index 0000000000000000000000000000000000000000..9cecc1d4669ee8af2ca727a5d8cde10cd8b2d7cc --- /dev/null +++ b/workloads/realworld/pipeline/darknet/LICENSE.gpl @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + {one line to give the program's name and a brief idea of what it does.} + Copyright (C) {year} {name of author} + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + {project} Copyright (C) {year} {fullname} + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/workloads/realworld/pipeline/darknet/LICENSE.meta b/workloads/realworld/pipeline/darknet/LICENSE.meta new file mode 100644 index 0000000000000000000000000000000000000000..6728bd28d319c68ae04944fb034118dcc4c9aa09 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/LICENSE.meta @@ -0,0 +1,8 @@ + META-LICENSE + Version 1, June 21 2017 + +Any and all licenses may be applied to the software either individually +or in concert. Any issues, ambiguities, paradoxes, or metaphysical quandries +arising from this combination should be discussed with a local faith leader, +hermit, or guru. The Oxford comma shall be used. + diff --git a/workloads/realworld/pipeline/darknet/LICENSE.mit b/workloads/realworld/pipeline/darknet/LICENSE.mit new file mode 100644 index 0000000000000000000000000000000000000000..5bd806ce16ea5053c8631793787362439375026e --- /dev/null +++ b/workloads/realworld/pipeline/darknet/LICENSE.mit @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2017 Joseph Redmon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/workloads/realworld/pipeline/darknet/LICENSE.v1 b/workloads/realworld/pipeline/darknet/LICENSE.v1 new file mode 100644 index 0000000000000000000000000000000000000000..5b8709acc43e7b76ed69758a52a9eaffaba775e6 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/LICENSE.v1 @@ -0,0 +1,13 @@ + YOLO LICENSE + Version 1, July 10 2015 + +THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER +SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN +TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES +LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY SUBJECT TO +THE FOLLOWING CONDITIONS: + +1. #yolo +2. #swag +3. #blazeit + diff --git a/workloads/realworld/pipeline/darknet/Makefile b/workloads/realworld/pipeline/darknet/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..5022f68377d7626ab768f2501883d72b013dafba --- /dev/null +++ b/workloads/realworld/pipeline/darknet/Makefile @@ -0,0 +1,114 @@ +GPU=1 +CUDNN=0 +OPENCV=0 +OPENMP=0 +DEBUG=0 + +#ARCH= -gencode arch=compute_30,code=sm_30 \ +# -gencode arch=compute_35,code=sm_35 \ +# -gencode arch=compute_50,code=[sm_50,compute_50] \ +# -gencode arch=compute_52,code=[sm_52,compute_52] +# -gencode arch=compute_20,code=[sm_20,sm_21] \ This one is deprecated? + +# This is what I use, uncomment if you know your arch and want to specify +ARCH= -gencode arch=compute_80,code=sm_80 \ +#ARCH= -arch=sm_80 + +VPATH=./src/:./examples:$(CUPTI_ADD_COMMON) +SLIB=libdarknet.so +ALIB=libdarknet.a +EXEC=darknet +OBJDIR=./obj/ + +CC=gcc +CPP=g++ +# NVCC=nvcc --default-stream per-thread +NVCC=nvcc +AR=ar +ARFLAGS=rcs +OPTS=-Ofast +LDFLAGS= -lm -pthread +COMMON= -Iinclude/ -Isrc/ +CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC +ifeq ($(PROFILE), 1) +CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -DPROFILE +endif + + +ifeq ($(OPENMP), 1) +CFLAGS+= -fopenmp +endif + +ifeq ($(DEBUG), 1) +OPTS=-O0 -g +endif + +CFLAGS+=$(OPTS) + +ifeq ($(OPENCV), 1) +COMMON+= -DOPENCV +CFLAGS+= -DOPENCV +LDFLAGS+= `pkg-config --libs opencv` -lstdc++ +COMMON+= `pkg-config --cflags opencv` +endif + +ifeq ($(GPU), 1) +include ../../../common/make.config +COMMON+= -DGPU -I$(CUDA_DIR)/include/ -I$(CUDA_DIR)/extras/CUPTI/include/ +CFLAGS+= -DGPU +LDFLAGS+= -L$(CUDA_DIR)/lib64 -L$(CUDA_DIR)/extras/CUPTI/lib64/ -lcuda -lcudart -lcublas -lcurand -lcupti +endif + + +ifeq ($(CUDNN), 1) +COMMON+= -DCUDNN +CFLAGS+= -DCUDNN +LDFLAGS+= -lcudnn +endif + +OBJ=gemm.o utils.o cuda_dark.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o lstm_layer.o l2norm_layer.o yolo_layer.o iseg_layer.o image_opencv.o +EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o instance-segmenter.o darknet.o +ifeq ($(GPU), 1) +LDFLAGS+= -lstdc++ +OBJ+=gemm_kernel.o convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o +endif + +# cpu_timestamps.o +# cupti_add.o + +EXECOBJ = $(addprefix $(OBJDIR), $(EXECOBJA)) +OBJS = $(addprefix $(OBJDIR), $(OBJ)) +DEPS = $(wildcard src/*.h) Makefile include/darknet.h + +all: obj backup results $(SLIB) $(ALIB) $(EXEC) + +$(EXEC): $(EXECOBJ) $(ALIB) + $(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS) $(ALIB) + +$(ALIB): $(OBJS) + $(AR) $(ARFLAGS) $@ $^ + +$(SLIB): $(OBJS) + $(CC) $(CFLAGS) -shared $^ -o $@ $(LDFLAGS) + +$(OBJDIR)%.o: %.cpp $(DEPS) + $(CPP) $(COMMON) $(CFLAGS) -c $< -o $@ + +$(OBJDIR)%.o: %.c $(DEPS) + $(CC) $(COMMON) $(CFLAGS) -c $< -o $@ + +$(OBJDIR)%.o: %.cu $(DEPS) + $(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@ + +obj: + mkdir -p obj +backup: + mkdir -p backup +results: + mkdir -p results + +.PHONY: clean + +clean: + rm -rf $(OBJS) $(SLIB) $(ALIB) $(EXEC) $(EXECOBJ) $(OBJDIR)/* + diff --git a/workloads/realworld/pipeline/darknet/README.md b/workloads/realworld/pipeline/darknet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fb58c2640038a963cd573d121e4fab59399f67dc --- /dev/null +++ b/workloads/realworld/pipeline/darknet/README.md @@ -0,0 +1,124 @@ +![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png) + +# Darknet # +Darknet is an open source neural network framework written in C and CUDA. It is fast, easy to install, and supports CPU and GPU computation. + +**Discord** invite link for for communication and questions: https://discord.gg/zSq8rtW + +## YOLOv7: + +* **paper** - YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors: https://arxiv.org/abs/2207.02696 + +* **source code - Pytorch (use to reproduce results):** https://github.com/WongKinYiu/yolov7 + +---- + +Official YOLOv7 is more accurate and faster than YOLOv5 by **120%** FPS, than YOLOX by **180%** FPS, than Dual-Swin-T by **1200%** FPS, than ConvNext by **550%** FPS, than SWIN-L by **500%** FPS. + +YOLOv7 surpasses all known object detectors in both speed and accuracy in the range from 5 FPS to 160 FPS and has the highest accuracy 56.8% AP among all known real-time object detectors with 30 FPS or higher on GPU V100, batch=1. + +* YOLOv7-e6 (55.9% AP, 56 FPS V100 b=1) by `+500%` FPS faster than SWIN-L Cascade-Mask R-CNN (53.9% AP, 9.2 FPS A100 b=1) +* YOLOv7-e6 (55.9% AP, 56 FPS V100 b=1) by `+550%` FPS faster than ConvNeXt-XL C-M-RCNN (55.2% AP, 8.6 FPS A100 b=1) +* YOLOv7-w6 (54.6% AP, 84 FPS V100 b=1) by `+120%` FPS faster than YOLOv5-X6-r6.1 (55.0% AP, 38 FPS V100 b=1) +* YOLOv7-w6 (54.6% AP, 84 FPS V100 b=1) by `+1200%` FPS faster than Dual-Swin-T C-M-RCNN (53.6% AP, 6.5 FPS V100 b=1) +* YOLOv7x (52.9% AP, 114 FPS V100 b=1) by `+150%` FPS faster than PPYOLOE-X (51.9% AP, 45 FPS V100 b=1) +* YOLOv7 (51.2% AP, 161 FPS V100 b=1) by `+180%` FPS faster than YOLOX-X (51.1% AP, 58 FPS V100 b=1) + +---- + +![more5](https://user-images.githubusercontent.com/4096485/179425274-f55a36d4-8450-4471-816b-8c105841effd.jpg) + +---- + +![image](https://user-images.githubusercontent.com/4096485/177675030-a929ee00-0eba-4d93-95c2-225231d0fd61.png) + + +---- + +![yolov7_640_1280](https://user-images.githubusercontent.com/4096485/177688869-d75e0c36-63af-46ec-bdbd-81dbb281f257.png) + +---- + +## Scaled-YOLOv4: + +* **paper (CVPR 2021)**: https://openaccess.thecvf.com/content/CVPR2021/html/Wang_Scaled-YOLOv4_Scaling_Cross_Stage_Partial_Network_CVPR_2021_paper.html + +* **source code - Pytorch (use to reproduce results):** https://github.com/WongKinYiu/ScaledYOLOv4 + +* **source code - Darknet:** https://github.com/AlexeyAB/darknet + +* **Medium:** https://alexeyab84.medium.com/scaled-yolo-v4-is-the-best-neural-network-for-object-detection-on-ms-coco-dataset-39dfa22fa982?source=friends_link&sk=c8553bfed861b1a7932f739d26f487c8 + +## YOLOv4: + +* **paper:** https://arxiv.org/abs/2004.10934 + +* **source code:** https://github.com/AlexeyAB/darknet + +* **Wiki:** https://github.com/AlexeyAB/darknet/wiki + +* **useful links:** https://medium.com/@alexeyab84/yolov4-the-most-accurate-real-time-neural-network-on-ms-coco-dataset-73adfd3602fe?source=friends_link&sk=6039748846bbcf1d960c3061542591d7 + +For more information see the [Darknet project website](http://pjreddie.com/darknet). + + +
Expand + +![yolo_progress](https://user-images.githubusercontent.com/4096485/146988929-1ed0cbec-1e01-4ad0-b42c-808dcef32994.png) https://paperswithcode.com/sota/object-detection-on-coco + +---- + +![scaled_yolov4](https://user-images.githubusercontent.com/4096485/112776361-281d8380-9048-11eb-8083-8728b12dcd55.png) AP50:95 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2011.08036 + +---- + +![YOLOv4Tiny](https://user-images.githubusercontent.com/4096485/101363015-e5c21200-38b1-11eb-986f-b3e516e05977.png) + +---- + +![YOLOv4](https://user-images.githubusercontent.com/4096485/90338826-06114c80-dff5-11ea-9ba2-8eb63a7409b3.png) + +
+ +---- + +![OpenCV_TRT](https://user-images.githubusercontent.com/4096485/90338805-e5e18d80-dff4-11ea-8a68-5710956256ff.png) + + +## Citation + + +``` +@misc{https://doi.org/10.48550/arxiv.2207.02696, + doi = {10.48550/ARXIV.2207.02696}, + url = {https://arxiv.org/abs/2207.02696}, + author = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors}, + publisher = {arXiv}, + year = {2022}, + copyright = {arXiv.org perpetual, non-exclusive license} +} +``` + +``` +@misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +``` +@InProceedings{Wang_2021_CVPR, + author = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + title = {{Scaled-YOLOv4}: Scaling Cross Stage Partial Network}, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2021}, + pages = {13029-13038} +} +``` diff --git a/workloads/realworld/pipeline/darknet/cfg/alexnet.cfg b/workloads/realworld/pipeline/darknet/cfg/alexnet.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e2ed4bb8e7b1bad7859aef0d802cb4084753cb7a --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/alexnet.cfg @@ -0,0 +1,96 @@ +[net] +# Training +# batch=128 +# subdivisions=1 +# Testing +batch=1 +subdivisions=1 +height=227 +width=227 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=256 + +learning_rate=0.01 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue = .1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +filters=96 +size=11 +stride=4 +pad=0 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[convolutional] +filters=256 +size=5 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[convolutional] +filters=384 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=384 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=1000 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/cifar.cfg b/workloads/realworld/pipeline/darknet/cfg/cifar.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b2f69f53903e55c24718ed12d9adaaa1557e3647 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/cifar.cfg @@ -0,0 +1,121 @@ +[net] +batch=128 +subdivisions=1 +height=28 +width=28 +channels=3 +max_crop=32 +min_crop=32 + +hue=.1 +saturation=.75 +exposure=.75 + +learning_rate=0.4 +policy=poly +power=4 +max_batches = 5000 +momentum=0.9 +decay=0.0005 + + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[dropout] +probability=.5 + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 diff --git a/workloads/realworld/pipeline/darknet/cfg/cifar.data b/workloads/realworld/pipeline/darknet/cfg/cifar.data new file mode 100644 index 0000000000000000000000000000000000000000..a52208db1b203b5e1f24d5afaf8c7002adfd71a3 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/cifar.data @@ -0,0 +1,7 @@ +classes=10 +train = data/cifar/train.list +valid = data/cifar/test.list +test = data/cifar/test.list +labels = data/cifar/labels.txt +backup = backup/ +top=2 diff --git a/workloads/realworld/pipeline/darknet/cfg/cifar.test.cfg b/workloads/realworld/pipeline/darknet/cfg/cifar.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..18b6c54c909152b1201d6320b85fafc5c36ba1ef --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/cifar.test.cfg @@ -0,0 +1,117 @@ +[net] +batch=128 +subdivisions=1 +height=32 +width=32 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.4 +policy=poly +power=4 +max_batches = 50000 + + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[dropout] +probability=.5 + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 +temperature=3 + diff --git a/workloads/realworld/pipeline/darknet/cfg/cifar_small.cfg b/workloads/realworld/pipeline/darknet/cfg/cifar_small.cfg new file mode 100644 index 0000000000000000000000000000000000000000..d48b1231f0131faaa187b18df6705411c3d16a76 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/cifar_small.cfg @@ -0,0 +1,86 @@ +[net] +batch=128 +subdivisions=1 +height=28 +width=28 +channels=3 +max_crop=32 +min_crop=32 + +hue=.1 +saturation=.75 +exposure=.75 + +learning_rate=0.1 +policy=poly +power=4 +max_batches = 5000 +momentum=0.9 +decay=0.0005 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] diff --git a/workloads/realworld/pipeline/darknet/cfg/coco.data b/workloads/realworld/pipeline/darknet/cfg/coco.data new file mode 100644 index 0000000000000000000000000000000000000000..5951d5245a7895e8418bc3155de3b03049e76c42 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/coco.data @@ -0,0 +1,6 @@ +classes= 80 +train = /data/darknet/coco/valid.list +valid = /data/darknet/coco/valid.list +backup = /data/darknet/backup/ +names = /data/darknet/coco/coco.names +eval=coco diff --git a/workloads/realworld/pipeline/darknet/cfg/coco.names b/workloads/realworld/pipeline/darknet/cfg/coco.names new file mode 100644 index 0000000000000000000000000000000000000000..16315f2becec9705017bfaf1b9fb81ca2a83c0b0 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/coco.names @@ -0,0 +1,80 @@ +person +bicycle +car +motorbike +aeroplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +sofa +pottedplant +bed +diningtable +toilet +tvmonitor +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush \ No newline at end of file diff --git a/workloads/realworld/pipeline/darknet/cfg/combine9k.data b/workloads/realworld/pipeline/darknet/cfg/combine9k.data new file mode 100644 index 0000000000000000000000000000000000000000..06a3e76aefac9c1074c3dfe159bc115a92b0791e --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/combine9k.data @@ -0,0 +1,10 @@ +classes= 9418 +#train = /home/pjreddie/data/coco/trainvalno5k.txt +train = data/combine9k.train.list +valid = /home/pjreddie/data/imagenet/det.val.files +labels = data/9k.labels +names = data/9k.names +backup = backup/ +map = data/inet9k.map +eval = imagenet +results = results diff --git a/workloads/realworld/pipeline/darknet/cfg/darknet.cfg b/workloads/realworld/pipeline/darknet/cfg/darknet.cfg new file mode 100644 index 0000000000000000000000000000000000000000..375107f7c196baf7adf229a7cfffc84739875828 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/darknet.cfg @@ -0,0 +1,120 @@ +[net] +# Training +# batch=128 +# subdivisions=1 +# Testing +batch=1 +subdivisions=1 +height=256 +width=256 +min_crop=128 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/darknet19.cfg b/workloads/realworld/pipeline/darknet/cfg/darknet19.cfg new file mode 100644 index 0000000000000000000000000000000000000000..28ac9669ef686b4d638a5bf462451962fec45a4e --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/darknet19.cfg @@ -0,0 +1,205 @@ +[net] +# Training +#batch=128 +#subdivisions=2 + +# Testing + batch=1 + subdivisions=1 + +height=256 +width=256 +min_crop=128 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/darknet19_448.cfg b/workloads/realworld/pipeline/darknet/cfg/darknet19_448.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c6df7306d3ef0622e0a0e0cbd6a5603699344e56 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/darknet19_448.cfg @@ -0,0 +1,197 @@ +[net] +batch=128 +subdivisions=4 +height=448 +width=448 +max_crop=512 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 + +angle=7 +hue = .1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/darknet53.cfg b/workloads/realworld/pipeline/darknet/cfg/darknet53.cfg new file mode 100644 index 0000000000000000000000000000000000000000..7b6d5766e9ec48ee19a74321583b44621c1e07b3 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/darknet53.cfg @@ -0,0 +1,566 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/darknet53_448.cfg b/workloads/realworld/pipeline/darknet/cfg/darknet53_448.cfg new file mode 100644 index 0000000000000000000000000000000000000000..dedab1b97c7e5d4226f061e6c983046d7a278dd0 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/darknet53_448.cfg @@ -0,0 +1,559 @@ +[net] +# Training - start training with darknet53.weights +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=448 +width=448 +channels=3 +min_crop=448 +max_crop=512 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 +momentum=0.9 +decay=0.0005 + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/darknet9000.cfg b/workloads/realworld/pipeline/darknet/cfg/darknet9000.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9dd2dfbbf5a7137faada4e091b8e6d48233f09bf --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/darknet9000.cfg @@ -0,0 +1,205 @@ +[net] +# Training +# batch=128 +# subdivisions=4 +# Testing +batch = 1 +subdivisions = 1 +height=448 +width=448 +max_crop=512 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=9418 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 +tree=data/9k.tree + +[cost] +type=masked + diff --git a/workloads/realworld/pipeline/darknet/cfg/densenet201.cfg b/workloads/realworld/pipeline/darknet/cfg/densenet201.cfg new file mode 100644 index 0000000000000000000000000000000000000000..65b4aecc52d45075f2913e3d63b9aec0527fa44c --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/densenet201.cfg @@ -0,0 +1,1951 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/extraction.cfg b/workloads/realworld/pipeline/darknet/cfg/extraction.cfg new file mode 100644 index 0000000000000000000000000000000000000000..66cb15f80e9a5e811223299594882a3b5d9485dc --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/extraction.cfg @@ -0,0 +1,209 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=224 +width=224 +max_crop=320 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/extraction.conv.cfg b/workloads/realworld/pipeline/darknet/cfg/extraction.conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..2a7d09ec80fa2f47e1ebb4134b7845d5cae2b828 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/extraction.conv.cfg @@ -0,0 +1,179 @@ +[net] +batch=1 +subdivisions=1 +height=256 +width=256 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.5 +policy=poly +power=6 +max_batches=500000 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[connected] +output=1000 +activation=leaky + +[softmax] +groups=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/extraction22k.cfg b/workloads/realworld/pipeline/darknet/cfg/extraction22k.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b5f54090d00537fdca72f54bb2eed69dd78f5b00 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/extraction22k.cfg @@ -0,0 +1,206 @@ +[net] +batch=128 +subdivisions=1 +height=224 +width=224 +max_crop=320 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +max_batches = 0 +policy=steps +steps=444000,590000,970000 +scales=.5,.2,.1 + +#policy=sigmoid +#gamma=.00008 +#step=100000 +#max_batches=200000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[connected] +output=21842 +activation=leaky + +[softmax] +groups=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/go.cfg b/workloads/realworld/pipeline/darknet/cfg/go.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c730092ff3ffda0124baeace050bd382c442d88d --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/go.cfg @@ -0,0 +1,132 @@ +[net] +batch=512 +subdivisions=1 +height=19 +width=19 +channels=1 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=10000000 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=1 +size=1 +stride=1 +pad=1 +activation=linear + +[reorg] +extra=1 +stride=1 + +[softmax] + diff --git a/workloads/realworld/pipeline/darknet/cfg/go.test.cfg b/workloads/realworld/pipeline/darknet/cfg/go.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..1e4e43809bf3ede20a67b5020fcca0f61612e8f7 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/go.test.cfg @@ -0,0 +1,132 @@ +[net] +batch=1 +subdivisions=1 +height=19 +width=19 +channels=1 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +policy=poly +power=4 +max_batches=100000 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=1 +size=1 +stride=1 +pad=1 +activation=linear + +[reorg] +extra=1 +stride=1 + +[softmax] + + diff --git a/workloads/realworld/pipeline/darknet/cfg/gru.cfg b/workloads/realworld/pipeline/darknet/cfg/gru.cfg new file mode 100644 index 0000000000000000000000000000000000000000..6064221289d41dc3ee464a715ae05593a02f34f4 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/gru.cfg @@ -0,0 +1,29 @@ +[net] +inputs=256 +momentum=0.9 +decay=0.0 +subdivisions=1 +batch = 1 +time_steps=1 +learning_rate=.002 +adam=1 + +policy=constant +power=4 +max_batches=1000000 + +[gru] +output = 256 + +[gru] +output = 256 + +[gru] +output = 256 + +[connected] +output=256 +activation=linear + +[softmax] + diff --git a/workloads/realworld/pipeline/darknet/cfg/imagenet.labels.list b/workloads/realworld/pipeline/darknet/cfg/imagenet.labels.list new file mode 100644 index 0000000000000000000000000000000000000000..e73d41762d311df7f7eefec0f65ab12a7bacc023 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/imagenet.labels.list @@ -0,0 +1,21842 @@ +n02119789 +n02100735 +n02110185 +n02096294 +n02102040 +n02066245 +n02509815 +n02124075 +n02417914 +n02123394 +n02125311 +n02423022 +n02346627 +n02077923 +n02110063 +n02447366 +n02109047 +n02089867 +n02102177 +n02091134 +n02092002 +n02071294 +n02442845 +n02504458 +n02092339 +n02098105 +n02096437 +n02114712 +n02105641 +n02128925 +n02091635 +n02088466 +n02096051 +n02117135 +n02138441 +n02097130 +n02493509 +n02457408 +n02389026 +n02443484 +n02110341 +n02089078 +n02086910 +n02445715 +n02093256 +n02113978 +n02106382 +n02441942 +n02113712 +n02113186 +n02105162 +n02415577 +n02356798 +n02488702 +n02123159 +n02098413 +n02422699 +n02114855 +n02094433 +n02111277 +n02132136 +n02119022 +n02091467 +n02106550 +n02422106 +n02091831 +n02120505 +n02104365 +n02086079 +n02112706 +n02098286 +n02095889 +n02484975 +n02137549 +n02500267 +n02129604 +n02090721 +n02396427 +n02108000 +n02391049 +n02412080 +n02108915 +n02480495 +n02110806 +n02128385 +n02107683 +n02085936 +n02094114 +n02087046 +n02100583 +n02096177 +n02494079 +n02105056 +n02101556 +n02123597 +n02481823 +n02105505 +n02088094 +n02085782 +n02489166 +n02364673 +n02114548 +n02134084 +n02480855 +n02090622 +n02113624 +n02093859 +n02403003 +n02097298 +n02108551 +n02493793 +n02107142 +n02096585 +n02107574 +n02107908 +n02086240 +n02102973 +n02112018 +n02093647 +n02397096 +n02437312 +n02483708 +n02097047 +n02106030 +n02099601 +n02093991 +n02110627 +n02106166 +n02326432 +n02108089 +n02097658 +n02088364 +n02111129 +n02100236 +n02486261 +n02115913 +n02486410 +n02487347 +n02099849 +n02108422 +n02104029 +n02492035 +n02110958 +n02099429 +n02094258 +n02099267 +n02395406 +n02112350 +n02109961 +n02101388 +n02113799 +n02095570 +n02128757 +n02101006 +n02115641 +n02097209 +n02342885 +n02097474 +n02120079 +n02095314 +n02088238 +n02408429 +n02133161 +n02328150 +n02410509 +n02492660 +n02398521 +n02112137 +n02510455 +n02093428 +n02105855 +n02111500 +n02085620 +n02123045 +n02490219 +n02099712 +n02109525 +n02454379 +n02111889 +n02088632 +n02090379 +n02443114 +n02361337 +n02105412 +n02483362 +n02437616 +n02107312 +n02325366 +n02091032 +n02129165 +n02102318 +n02100877 +n02074367 +n02504013 +n02363005 +n02102480 +n02113023 +n02086646 +n02497673 +n02087394 +n02127052 +n02116738 +n02488291 +n02091244 +n02114367 +n02130308 +n02089973 +n02105251 +n02134418 +n02093754 +n02106662 +n02444819 +n01882714 +n01871265 +n01872401 +n01877812 +n01873310 +n01883070 +n04086273 +n04507155 +n04147183 +n04254680 +n02672831 +n02219486 +n02317335 +n01968897 +n03452741 +n03642806 +n07745940 +n02690373 +n04552348 +n02692877 +n02782093 +n04266014 +n03344393 +n03447447 +n04273569 +n03662601 +n02951358 +n04612504 +n02981792 +n04483307 +n03095699 +n03673027 +n03947888 +n02687172 +n04347754 +n04606251 +n03478589 +n04389033 +n03773504 +n02860847 +n03218198 +n02835271 +n03792782 +n03393912 +n03895866 +n02797295 +n04204347 +n03791053 +n03384352 +n03272562 +n04310018 +n02704792 +n02701002 +n02814533 +n02930766 +n03100240 +n03594945 +n03670208 +n03770679 +n03777568 +n04037443 +n04285008 +n03444034 +n03445924 +n03785016 +n04252225 +n03345487 +n03417042 +n03930630 +n04461696 +n04467665 +n03796401 +n03977966 +n04065272 +n04335435 +n04252077 +n04465501 +n03776460 +n04482393 +n04509417 +n03538406 +n03599486 +n03868242 +n02804414 +n03125729 +n03131574 +n03388549 +n02870880 +n03018349 +n03742115 +n03016953 +n04380533 +n03337140 +n03891251 +n02791124 +n04429376 +n03376595 +n04099969 +n04344873 +n04447861 +n03179701 +n03982430 +n03201208 +n03290653 +n04550184 +n07742313 +n07747607 +n07749582 +n07753113 +n07753275 +n07753592 +n07754684 +n07760859 +n07768694 +n12267677 +n12620546 +n13133613 +n11879895 +n12144580 +n12768682 +n03854065 +n04515003 +n03017168 +n03249569 +n03447721 +n03720891 +n03721384 +n04311174 +n02787622 +n02992211 +n04536866 +n03495258 +n02676566 +n03272010 +n03110669 +n03394916 +n04487394 +n03494278 +n03840681 +n03884397 +n02804610 +n03838899 +n04141076 +n03372029 +n11939491 +n12057211 +n09246464 +n09468604 +n09193705 +n09472597 +n09399592 +n09421951 +n09256479 +n09332890 +n09428293 +n09288635 +n03498962 +n03041632 +n03658185 +n03954731 +n03995372 +n03649909 +n03481172 +n03109150 +n02951585 +n03970156 +n04154565 +n04208210 +n03967562 +n03000684 +n01514668 +n01514859 +n01518878 +n01530575 +n01531178 +n01532829 +n01534433 +n01537544 +n01558993 +n01560419 +n01580077 +n01582220 +n01592084 +n01601694 +n01608432 +n01614925 +n01616318 +n01622779 +n01795545 +n01796340 +n01797886 +n01798484 +n01806143 +n01806567 +n01807496 +n01817953 +n01818515 +n01819313 +n01820546 +n01824575 +n01828970 +n01829413 +n01833805 +n01843065 +n01843383 +n01847000 +n01855032 +n01855672 +n01860187 +n02002556 +n02002724 +n02006656 +n02007558 +n02009912 +n02009229 +n02011460 +n02012849 +n02013706 +n02018207 +n02018795 +n02025239 +n02027492 +n02028035 +n02033041 +n02037110 +n02017213 +n02051845 +n02056570 +n02058221 +n01484850 +n01491361 +n01494475 +n01496331 +n01498041 +n02514041 +n02536864 +n01440764 +n01443537 +n02526121 +n02606052 +n02607072 +n02643566 +n02655020 +n02640242 +n02641379 +n01664065 +n01665541 +n01667114 +n01667778 +n01669191 +n01675722 +n01677366 +n01682714 +n01685808 +n01687978 +n01688243 +n01689811 +n01692333 +n01693334 +n01694178 +n01695060 +n01704323 +n01697457 +n01698640 +n01728572 +n01728920 +n01729322 +n01729977 +n01734418 +n01735189 +n01737021 +n01739381 +n01740131 +n01742172 +n01744401 +n01748264 +n01749939 +n01751748 +n01753488 +n01755581 +n01756291 +n01629819 +n01630670 +n01631663 +n01632458 +n01632777 +n01641577 +n01644373 +n01644900 +n04579432 +n04592741 +n03876231 +n03483316 +n03868863 +n04251144 +n03691459 +n03759954 +n04152593 +n03793489 +n03271574 +n03843555 +n04332243 +n04265275 +n04330267 +n03467068 +n02794156 +n04118776 +n03841143 +n04141975 +n02708093 +n03196217 +n04548280 +n03544143 +n04355338 +n03891332 +n04328186 +n03197337 +n04317175 +n04376876 +n03706229 +n02841315 +n04009552 +n04356056 +n03692522 +n04044716 +n02879718 +n02950826 +n02749479 +n04090263 +n04008634 +n03085013 +n04505470 +n03126707 +n03666591 +n02666196 +n02977058 +n04238763 +n03180011 +n03485407 +n03832673 +n06359193 +n03496892 +n04428191 +n04004767 +n04243546 +n04525305 +n04179913 +n03602883 +n04372370 +n03532672 +n02974003 +n03874293 +n03944341 +n03992509 +n03425413 +n02966193 +n04371774 +n04067472 +n04040759 +n04019541 +n03492542 +n04355933 +n03929660 +n02965783 +n04258138 +n04074963 +n03208938 +n02910353 +n03476684 +n03627232 +n03075370 +n03874599 +n03804744 +n04127249 +n04153751 +n03803284 +n04162706 +n04228054 +n02948072 +n03590841 +n04286575 +n04456115 +n03814639 +n03933933 +n04485082 +n03733131 +n03794056 +n04275548 +n01768244 +n01770081 +n01770393 +n01773157 +n01773549 +n01773797 +n01774384 +n01774750 +n01775062 +n01776313 +n01784675 +n01990800 +n01978287 +n01978455 +n01980166 +n01981276 +n01983481 +n01984695 +n01985128 +n01986214 +n02165105 +n02165456 +n02167151 +n02168699 +n02169497 +n02172182 +n02174001 +n02177972 +n02190166 +n02206856 +n02226429 +n02229544 +n02231487 +n02233338 +n02236044 +n02256656 +n02259212 +n02264363 +n02268443 +n02268853 +n02276258 +n02277742 +n02279972 +n02280649 +n02281406 +n02281787 +n01910747 +n01914609 +n01917289 +n01924916 +n01930112 +n01943899 +n01944390 +n01945685 +n01950731 +n01955084 +n02319095 +n02321529 +n03584829 +n03297495 +n03761084 +n03259280 +n04111531 +n04442312 +n04542943 +n04517823 +n03207941 +n04070727 +n04554684 +n03133878 +n03400231 +n04596742 +n02939185 +n03063689 +n04398044 +n04270147 +n02699494 +n04486054 +n03899768 +n04311004 +n04366367 +n04532670 +n02793495 +n03457902 +n03877845 +n03781244 +n03661043 +n02727426 +n02859443 +n03028079 +n03788195 +n04346328 +n03956157 +n04081281 +n03032252 +n03529860 +n03697007 +n03065424 +n03837869 +n04458633 +n02980441 +n04005630 +n03461385 +n02776631 +n02791270 +n02871525 +n02927161 +n03089624 +n04200800 +n04443257 +n04462240 +n03388043 +n03042490 +n04613696 +n03216828 +n02892201 +n03743016 +n02788148 +n02894605 +n03160309 +n03000134 +n03930313 +n04604644 +n04326547 +n03459775 +n04239074 +n04501370 +n03792972 +n04149813 +n03530642 +n03961711 +n03903868 +n02814860 +n07711569 +n07720875 +n07714571 +n07714990 +n07715103 +n07716358 +n07716906 +n07717410 +n07717556 +n07718472 +n07718747 +n07730033 +n07734744 +n04209239 +n03594734 +n02971356 +n03485794 +n04133789 +n02747177 +n04125021 +n07579787 +n03814906 +n03134739 +n03404251 +n04423845 +n03877472 +n04120489 +n03062245 +n03014705 +n03717622 +n03777754 +n04493381 +n04476259 +n02777292 +n07693725 +n03998194 +n03617480 +n07590611 +n04579145 +n03623198 +n07248320 +n04277352 +n04229816 +n02823428 +n03127747 +n02877765 +n04435653 +n03724870 +n03710637 +n03920288 +n03379051 +n02807133 +n04399382 +n03527444 +n03983396 +n03924679 +n04532106 +n06785654 +n03445777 +n07613480 +n04350905 +n04562935 +n03325584 +n03045698 +n07892512 +n03250847 +n04192698 +n03026506 +n03534580 +n07565083 +n04296562 +n02869837 +n07871810 +n02799071 +n03314780 +n04141327 +n04357314 +n02823750 +n13052670 +n07583066 +n03637318 +n04599235 +n07802026 +n02883205 +n03709823 +n04560804 +n02909870 +n03207743 +n04263257 +n07932039 +n03786901 +n04479046 +n03873416 +n02999410 +n04367480 +n03775546 +n07875152 +n04591713 +n04201297 +n02916936 +n03240683 +n02840245 +n02963159 +n04370456 +n03991062 +n02843684 +n03482405 +n03942813 +n03908618 +n03902125 +n07584110 +n02730930 +n04023962 +n02769748 +n10148035 +n02817516 +n03908714 +n02906734 +n03788365 +n02667093 +n03787032 +n03980874 +n03141823 +n03976467 +n04264628 +n07930864 +n04039381 +n06874185 +n04033901 +n04041544 +n07860988 +n03146219 +n03763968 +n03676483 +n04209133 +n03782006 +n03857828 +n03775071 +n02892767 +n07684084 +n04522168 +n03764736 +n04118538 +n03887697 +n13044778 +n03291819 +n03770439 +n03124170 +n04487081 +n03916031 +n02808440 +n07697537 +n12985857 +n02917067 +n03938244 +n15075141 +n02978881 +n02966687 +n03633091 +n13040303 +n03690938 +n03476991 +n02669723 +n03220513 +n03127925 +n04584207 +n07880968 +n03937543 +n03000247 +n04418357 +n04590129 +n02795169 +n04553703 +n02783161 +n02802426 +n02808304 +n03124043 +n03450230 +n04589890 +n12998815 +n02992529 +n03825788 +n02790996 +n03710193 +n03630383 +n03347037 +n03769881 +n03871628 +n03733281 +n03976657 +n03535780 +n04259630 +n03929855 +n04049303 +n04548362 +n02979186 +n06596364 +n03935335 +n06794110 +n02825657 +n03388183 +n04591157 +n04540053 +n03866082 +n04136333 +n04026417 +n02865351 +n02834397 +n03888257 +n04235860 +n04404412 +n04371430 +n03733805 +n07920052 +n07873807 +n02895154 +n04204238 +n04597913 +n04131690 +n07836838 +n09835506 +n03443371 +n13037406 +n04336792 +n04557648 +n03187595 +n04254120 +n03595614 +n04146614 +n03598930 +n03958227 +n04069434 +n03188531 +n02786058 +n07615774 +n04525038 +n04409515 +n03424325 +n03223299 +n03680355 +n07614500 +n07695742 +n04033995 +n03710721 +n04392985 +n03047690 +n03584254 +n13054560 +n10565667 +n03950228 +n03729826 +n02837789 +n04254777 +n02988304 +n03657121 +n04417672 +n04523525 +n02815834 +n09229709 +n07697313 +n03888605 +n03355925 +n03063599 +n04116512 +n04325704 +n07831146 +n03255030 +n00483313 +n02432291 +n02356381 +n02377388 +n04028764 +n04381587 +n02279257 +n04168199 +n00445055 +n02461128 +n03626760 +n04313503 +n00451635 +n02509515 +n04224842 +n09403734 +n02769290 +n13054073 +n03163222 +n00464478 +n03087069 +n04477219 +n03445617 +n00449054 +n00483705 +n04395106 +n03389611 +n04285965 +n04166281 +n04003856 +n03696301 +n00475787 +n04587404 +n09218641 +n02276355 +n03592669 +n04459909 +n04492375 +n09447666 +n00463543 +n04148703 +n04591517 +n03970546 +n04297750 +n02782778 +n02383231 +n03693474 +n02277094 +n03766044 +n02056228 +n03394272 +n03047052 +n00434075 +n04185946 +n02411999 +n03858418 +n12833149 +n02836035 +n03108853 +n04587559 +n04138261 +n02278024 +n03063485 +n02774921 +n09475044 +n02811204 +n03329302 +n04026813 +n03986562 +n03379204 +n03426134 +n02790669 +n03487090 +n03548402 +n08614632 +n04054361 +n03421485 +n03302671 +n03098959 +n02970408 +n03772584 +n03064935 +n09415584 +n11715430 +n12024445 +n02710201 +n03475581 +n13142504 +n03396074 +n03211789 +n03914337 +n03678558 +n03233123 +n00453396 +n00454395 +n00440382 +n04289027 +n00445226 +n11953610 +n04128413 +n00480211 +n00470966 +n12547503 +n03085219 +n02275773 +n02692086 +n04257790 +n00448748 +n02686379 +n12328567 +n03432129 +n03859000 +n12091377 +n02124313 +n00442847 +n04603399 +n03114379 +n02920369 +n03818343 +n02946127 +n02978055 +n12914923 +n02705429 +n00448232 +n12882945 +n04289690 +n07606669 +n02056728 +n11848479 +n03046921 +n12282933 +n02867966 +n12821505 +n02812949 +n04545305 +n02699770 +n04395651 +n02900160 +n04099003 +n02054711 +n12606545 +n03356858 +n01859190 +n03643737 +n02962200 +n03123553 +n09361517 +n02793089 +n00449517 +n02783994 +n10117851 +n12038585 +n04383839 +n10142391 +n07719213 +n03536122 +n02472987 +n03454536 +n11728099 +n02392824 +n03795758 +n04282872 +n00448872 +n02404432 +n03797182 +n03029197 +n03665924 +n12477163 +n02769963 +n03863262 +n01532325 +n04165409 +n04593077 +n04473108 +n03577090 +n09988063 +n00446804 +n03931765 +n00475014 +n02700064 +n03240892 +n12475242 +n11735053 +n04053508 +n02852173 +n02382750 +n03823111 +n04543772 +n04112147 +n04433585 +n03175189 +n03596543 +n00445685 +n03307792 +n04589593 +n01814217 +n02993368 +n04303497 +n02811350 +n03355768 +n03699591 +n04590553 +n01893825 +n12726670 +n09916348 +n11544015 +n01318894 +n02133704 +n02367492 +n04506289 +n02069974 +n01900150 +n03207835 +n03363549 +n02831595 +n04970470 +n04160847 +n03767203 +n03928814 +n02302969 +n02918595 +n10401331 +n04231272 +n03717447 +n03063968 +n03380724 +n00825773 +n09988493 +n02740300 +n04539794 +n04121511 +n01323599 +n12937130 +n02428508 +n02980036 +n12061380 +n01887787 +n04214046 +n01787835 +n00466630 +n02979290 +n03927091 +n03231368 +n03904657 +n04469003 +n04196502 +n02122948 +n04544325 +n07868340 +n13876561 +n11925898 +n12158443 +n01595450 +n12454705 +n02069412 +n09618957 +n02357111 +n00451563 +n04197110 +n02276902 +n03111296 +n03909020 +n12303083 +n02082791 +n01956764 +n04269822 +n04207343 +n02433318 +n01888181 +n12682668 +n01592387 +n09793141 +n00466273 +n04026180 +n06255081 +n12172364 +n10145590 +n12311579 +n12173912 +n03822171 +n03140292 +n03027625 +n02739427 +n02060133 +n02431785 +n03219010 +n00447957 +n11910271 +n03620967 +n12547215 +n02409508 +n04290079 +n12329260 +n13901858 +n02008497 +n10304914 +n04524142 +n04279462 +n04233124 +n09733793 +n12822115 +n09475179 +n10151760 +n03418618 +n12858397 +n07735510 +n03549473 +n10098245 +n03653583 +n10604380 +n03375575 +n03885293 +n01527347 +n03237340 +n02760658 +n11953038 +n03187268 +n03004275 +n02393161 +n11965218 +n08580944 +n03938725 +n03900979 +n04144241 +n03760310 +n02376679 +n03237992 +n09432283 +n02379908 +n09918554 +n04041747 +n12012111 +n10331167 +n01612122 +n10147935 +n07691539 +n11669786 +n09403427 +n01935395 +n09903501 +n04439585 +n04459018 +n02780704 +n03720163 +n12899752 +n04118635 +n03404149 +n02429456 +n00449168 +n04516354 +n04317833 +n12075299 +n07878647 +n09438940 +n03361550 +n02027357 +n04317976 +n03092883 +n04526964 +n03985069 +n03610682 +n04028581 +n02277268 +n09433839 +n03846431 +n03919289 +n10146104 +n10260706 +n02686227 +n03321103 +n00444846 +n01558307 +n01595168 +n03919096 +n11844892 +n04260364 +n02750070 +n03034244 +n03002096 +n04273972 +n11814584 +n04605321 +n07745466 +n02922798 +n03361380 +n12651229 +n08521623 +n04498389 +n00453313 +n04967882 +n12024690 +n03934656 +n02685082 +n04501550 +n09972458 +n03055418 +n07763629 +n03902756 +n09938449 +n09712696 +n02387346 +n03133415 +n07711080 +n03129753 +n03524150 +n02275560 +n03993053 +n03438661 +n11939180 +n00466524 +n11753355 +n03456024 +n03421324 +n07890540 +n11720643 +n02057035 +n00453126 +n04453037 +n01540832 +n03546235 +n03370387 +n02041875 +n02871439 +n03262072 +n01786646 +n02430830 +n02799175 +n05262422 +n03854722 +n12817694 +n04449966 +n01564773 +n02034971 +n03490119 +n02822579 +n07879953 +n04110178 +n04963588 +n04252653 +n01565078 +n02389128 +n02779435 +n10645017 +n04582205 +n08573842 +n10146002 +n03892178 +n03119396 +n03813078 +n07866868 +n03160740 +n03371875 +n02417387 +n03904782 +n03098688 +n02902687 +n01828556 +n04401680 +n04590933 +n01575401 +n07693048 +n02901114 +n03047941 +n04355511 +n11849871 +n10738111 +n03122073 +n12052787 +n01594004 +n01549886 +n02824058 +n03506184 +n11487732 +n12574866 +n12948053 +n10091450 +n00470554 +n00326094 +n12093329 +n04438897 +n07818995 +n12828791 +n13901321 +n10613996 +n10159533 +n02669295 +n02843158 +n06415688 +n14858292 +n09813219 +n12485653 +n03200231 +n02089468 +n03935234 +n01539925 +n12428076 +n10439373 +n01536644 +n02694662 +n02123242 +n03002711 +n03363749 +n02669534 +n03451798 +n11927215 +n02679257 +n09475925 +n10015485 +n12422129 +n03946162 +n02377291 +n07871720 +n12622297 +n12782915 +n01579260 +n11838916 +n10267311 +n12824053 +n03340723 +n02276749 +n04439712 +n02126139 +n04188179 +n02386853 +n07942152 +n02499316 +n04324387 +n10635788 +n04234887 +n12237641 +n03713436 +n04960582 +n04076713 +n01646292 +n03947798 +n02840134 +n04476972 +n09822830 +n03551395 +n04533802 +n02918964 +n00474657 +n12932966 +n01615458 +n01806364 +n12458550 +n11784497 +n03557360 +n10638922 +n09889941 +n10689306 +n03358172 +n04295571 +n06596607 +n11853356 +n00482122 +n11760785 +n03150232 +n11778257 +n03059685 +n10105733 +n04104384 +n07691237 +n04326676 +n07684938 +n12666965 +n04177820 +n13918387 +n03398153 +n03914438 +n09932098 +n02988486 +n02977619 +n03317788 +n03484487 +n02988679 +n04062428 +n02568087 +n12866162 +n04227144 +n07875436 +n04082886 +n11753700 +n00470682 +n02122298 +n10145239 +n12755727 +n04214282 +n01852671 +n02378969 +n04108822 +n10382825 +n12392549 +n03973839 +n12258885 +n11782761 +n12389501 +n02940570 +n03405595 +n02969323 +n03207630 +n10169147 +n03805725 +n09847543 +n02415253 +n07880080 +n04305572 +n02042180 +n07565161 +n02871147 +n04438507 +n04445154 +n07842433 +n12029635 +n09750282 +n09621232 +n01858906 +n02761206 +n03986355 +n12591351 +n13916721 +n02905036 +n11894770 +n02377603 +n12924623 +n03950899 +n09454153 +n10247358 +n05261310 +n11943660 +n10804287 +n03560430 +n01756089 +n10618342 +n04283378 +n13926786 +n04238321 +n04393549 +n04461879 +n03502200 +n00440941 +n03494706 +n04148579 +n13902336 +n02780815 +n10726031 +n04124098 +n12344483 +n04384910 +n07681450 +n02030837 +n04059157 +n09247410 +n02714751 +n08633683 +n04520784 +n10141732 +n12371439 +n04499062 +n02931148 +n07609632 +n04536335 +n02874537 +n03013438 +n11786539 +n11690455 +n07600696 +n00478262 +n00466712 +n03399677 +n12441183 +n07895962 +n11966083 +n02990373 +n04241249 +n02068541 +n12513933 +n02356977 +n04252560 +n04087826 +n03455488 +n07619409 +n09787534 +n03680942 +n00446980 +n12384839 +n03416900 +n07821758 +n11853813 +n01606522 +n11780148 +n04969242 +n12413880 +n04130257 +n01322604 +n03061211 +n01959492 +n02842573 +n04313628 +n03815149 +n02445394 +n08547544 +n03222176 +n04070003 +n03075768 +n09695979 +n02877266 +n08583292 +n02870676 +n03657511 +n01621635 +n04284341 +n04136161 +n02836174 +n10247880 +n01744100 +n02882894 +n03408444 +n03411079 +n02366959 +n04399158 +n04542715 +n02787435 +n04251701 +n13863020 +n07890226 +n12245319 +n12849952 +n11626826 +n00887544 +n03140431 +n03519387 +n03855604 +n07906111 +n02054036 +n11954161 +n03038281 +n00450998 +n12136392 +n02119477 +n04356925 +n02406647 +n04450133 +n12545635 +n01565599 +n02028900 +n07817024 +n02971167 +n04309049 +n02678897 +n12795555 +n11769803 +n01904886 +n02079851 +n12189987 +n04581829 +n12098403 +n01839330 +n12587803 +n03652932 +n08628141 +n03544238 +n04513827 +n01847806 +n03132076 +n10243137 +n03621377 +n10530959 +n14765422 +n04968139 +n12950314 +n02064816 +n02846511 +n10513823 +n11772408 +n03341297 +n03492922 +n03683606 +n02894337 +n02365480 +n09846755 +n03495039 +n01317813 +n12610328 +n02157206 +n01588002 +n03914831 +n03659686 +n10406765 +n09205509 +n02870526 +n07954211 +n10578471 +n11646694 +n03115762 +n07762913 +n12056758 +n12305986 +n11845913 +n02835915 +n02831237 +n07927512 +n12171098 +n02073831 +n07605040 +n02885462 +n02768114 +n04450994 +n11844371 +n03963645 +n02956699 +n02029378 +n01528396 +n10005934 +n04465666 +n04390977 +n11882074 +n03831382 +n04605163 +n06276501 +n02944075 +n05258051 +n07901457 +n12683571 +n02205219 +n13235503 +n02388735 +n03941231 +n14919819 +n12816508 +n11536673 +n13895262 +n02903204 +n10137825 +n07841345 +n07893253 +n01850192 +n07769731 +n11773987 +n03539678 +n12938193 +n10802507 +n03089879 +n00477392 +n01828096 +n09263912 +n13653902 +n04579667 +n01322983 +n08579352 +n07587023 +n07756951 +n07870167 +n10588357 +n01606809 +n13864035 +n02802544 +n07591961 +n02979399 +n04144539 +n02416820 +n11769176 +n09743792 +n09732170 +n04972451 +n13918274 +n01847089 +n01859689 +n04208065 +n07617051 +n10674713 +n07914271 +n07887461 +n03736064 +n03644858 +n03878963 +n04040247 +n07891433 +n01611969 +n07587618 +n02689144 +n10049363 +n04059516 +n10313239 +n03115400 +n01519563 +n01533893 +n03850245 +n11733548 +n03372549 +n01884834 +n02839110 +n07887192 +n03617312 +n07886463 +n03103396 +n07764847 +n01855476 +n07808587 +n12858871 +n03632729 +n10209731 +n04141712 +n03978686 +n03225988 +n00475273 +n09224725 +n04966543 +n01322221 +n03649674 +n13154494 +n03948830 +n03320519 +n03723267 +n07869611 +n12342498 +n01827793 +n03145719 +n11821184 +n11956348 +n11857875 +n10339717 +n09450163 +n10756148 +n01591301 +n07915094 +n04422727 +n09719309 +n03349469 +n03389889 +n10718131 +n04298661 +n09747495 +n03676623 +n03547229 +n03062015 +n10734394 +n07817315 +n02852360 +n01850553 +n02952585 +n03587205 +n02009750 +n01540090 +n02947660 +n03656957 +n03378174 +n02508213 +n01572489 +n12008487 +n12185859 +n11691046 +n01323355 +n05262534 +n00448126 +n02432983 +n12038406 +n03883385 +n02411206 +n01643896 +n10159045 +n11675025 +n01803362 +n02009508 +n07920349 +n04098513 +n11617272 +n09913455 +n12390314 +n04171208 +n02995345 +n10634849 +n03173929 +n02749953 +n11845793 +n12796022 +n11955153 +n11816829 +n03032453 +n11984542 +n02992795 +n03712111 +n02873733 +n02759387 +n14915184 +n02381364 +n12686274 +n07857731 +n04518764 +n03010473 +n02418465 +n02359556 +n07894799 +n04104770 +n04335209 +n01848976 +n02006063 +n04454908 +n03002948 +n04220250 +n09923561 +n04102162 +n11958080 +n04598965 +n10173410 +n03067339 +n02003204 +n12686676 +n11986511 +n02311617 +n03367059 +n02761557 +n05578095 +n04041069 +n10575463 +n03325941 +n10082043 +n01806297 +n09691729 +n04593866 +n01813088 +n01625562 +n03906224 +n01652026 +n10236304 +n04102618 +n04321453 +n07820145 +n01575117 +n12788854 +n07823698 +n04206225 +n03216710 +n02421449 +n03343737 +n07560903 +n02872529 +n11989869 +n12071744 +n06278475 +n04492749 +n02920259 +n03798061 +n02420509 +n03316105 +n12052447 +n03974915 +n02904803 +n03430418 +n12291959 +n06892775 +n03875806 +n07903841 +n10282482 +n02683323 +n07862348 +n01849157 +n04469813 +n09944022 +n03342127 +n07592481 +n02936402 +n02405929 +n10002760 +n02537716 +n05259914 +n01560280 +n12694486 +n07879350 +n02377063 +n03637181 +n03409297 +n01607812 +n02808185 +n09239302 +n12055516 +n09712448 +n02859184 +n12772908 +n02735538 +n10333838 +n12336092 +n02386968 +n04613939 +n00452864 +n04535524 +n03174731 +n04189816 +n07607605 +n12909917 +n02387722 +n02960690 +n07715221 +n02407071 +n10667477 +n09398076 +n04236809 +n01904806 +n01610552 +n12373100 +n12771390 +n04122685 +n07804771 +n15102455 +n03469175 +n03746005 +n02536456 +n03505667 +n11816336 +n09376198 +n10572706 +n03464053 +n02869155 +n07816164 +n04969798 +n02942349 +n14820180 +n01623615 +n12676703 +n03369276 +n03650551 +n02010272 +n02976123 +n01852400 +n02196119 +n04132158 +n03238586 +n07639069 +n03313333 +n10542761 +n12215022 +n00455173 +n10019406 +n12899537 +n04277826 +n09906449 +n04549629 +n11508382 +n15090065 +n10289462 +n04540255 +n02723165 +n04335693 +n01536334 +n03107488 +n12782530 +n14785065 +n02974348 +n09874862 +n04479939 +n03309465 +n09902954 +n12092417 +n03425595 +n12433081 +n07806774 +n12462805 +n01314781 +n10192839 +n01622120 +n07807171 +n03261019 +n02843553 +n04287747 +n02324587 +n09915434 +n01818299 +n01592694 +n03826186 +n03607659 +n01527917 +n03628511 +n02005399 +n04204081 +n02052775 +n04403413 +n03914106 +n12811027 +n01872772 +n04555700 +n02004855 +n04602762 +n02713003 +n04406817 +n11934807 +n03336282 +n09684901 +n03836976 +n11959862 +n03062336 +n03506028 +n04503413 +n07819896 +n03205669 +n11902200 +n07685218 +n03046133 +n10261624 +n10303814 +n03676087 +n04023695 +n07587111 +n07764155 +n01504179 +n03794136 +n03389761 +n13901211 +n02784124 +n04488530 +n02807731 +n07898443 +n04981658 +n04177755 +n03649161 +n04125257 +n10135129 +n03653110 +n10560106 +n07735687 +n03511333 +n11960245 +n03301568 +n03878066 +n10746931 +n04223299 +n04237423 +n07888229 +n01819734 +n12312728 +n09981939 +n03727465 +n13882276 +n02993194 +n11971927 +n09713108 +n03581125 +n09718936 +n14698884 +n03005285 +n03540914 +n03359436 +n03934042 +n07569644 +n04964878 +n07890068 +n07580253 +n01538630 +n03132666 +n03259009 +n02796318 +n12703190 +n01464844 +n11792029 +n04270371 +n13102775 +n02933649 +n02387254 +n02890188 +n04335886 +n04358491 +n02786837 +n03885194 +n04001265 +n03438071 +n10375402 +n02997910 +n03326795 +n00470830 +n02734725 +n03494537 +n08376250 +n07743544 +n02991847 +n04246271 +n04156140 +n04381073 +n07732168 +n04951071 +n07977870 +n04334599 +n02838728 +n03326948 +n11723227 +n08182379 +n03686924 +n03821518 +n02382204 +n02080415 +n11788727 +n07732636 +n03860404 +n03898395 +n07867324 +n04392113 +n13237188 +n03263076 +n07843636 +n04968056 +n04397027 +n03320421 +n06267564 +n02880842 +n04115456 +n13862407 +n10289039 +n03128248 +n01457852 +n01536035 +n04579056 +n03937931 +n03036022 +n01804163 +n09913593 +n12841007 +n03115897 +n03256032 +n02475669 +n07924443 +n03061505 +n10001481 +n03600722 +n07842308 +n10696508 +n04215402 +n10588074 +n03614782 +n03995535 +n12091953 +n04113194 +n10092978 +n03011741 +n04381860 +n07819769 +n07905474 +n03288500 +n04225987 +n13223710 +n02879087 +n02920083 +n08640739 +n03362890 +n03996849 +n03849814 +n09694664 +n02407390 +n02910864 +n02388917 +n01668665 +n07616046 +n02932891 +n10553235 +n03652729 +n01615703 +n12801781 +n12164656 +n05302499 +n03801760 +n03332271 +n02901793 +n03941417 +n09833441 +n01623110 +n02807523 +n10598181 +n03725600 +n10368528 +n04116098 +n12719944 +n02045864 +n02173373 +n02811059 +n04479823 +n07816398 +n10572889 +n04142731 +n07687381 +n02799323 +n07865484 +n01858845 +n12684379 +n01842235 +n09242389 +n02028727 +n03527565 +n03438863 +n15019030 +n13907272 +n09659039 +n04251791 +n03683995 +n04137217 +n04389430 +n09785659 +n02016816 +n03124590 +n01859325 +n03138669 +n02999936 +n11926365 +n12686077 +n03517760 +n09734450 +n04563413 +n12074867 +n01564217 +n12521394 +n06267893 +n03594148 +n04139395 +n12369309 +n01544389 +n12048056 +n04524941 +n03016868 +n03653740 +n02795528 +n03687137 +n03766935 +n03361297 +n04263502 +n10043491 +n03446268 +n01994910 +n03891538 +n10091564 +n10226413 +n02755140 +n03500389 +n10237196 +n03625646 +n06596474 +n03360300 +n09730824 +n10732010 +n04469514 +n02904927 +n04961331 +n02936570 +n03680858 +n07585758 +n09199101 +n04050933 +n03712337 +n03911513 +n01556182 +n03102371 +n07928887 +n12133462 +n03974070 +n03971218 +n03292475 +n03425241 +n03440216 +n11995092 +n02894158 +n02918112 +n10568358 +n11524451 +n03169176 +n04100519 +n07588193 +n06883725 +n02860640 +n07762114 +n04082710 +n07896893 +n10167152 +n03287351 +n02788021 +n08494231 +n01560935 +n03249342 +n04564581 +n09349648 +n07704205 +n03510244 +n12127460 +n09945745 +n11719286 +n11613459 +n12656369 +n03824381 +n07655263 +n09894143 +n04964001 +n02161457 +n07654298 +n07930433 +n02979074 +n02026948 +n13914608 +n07611267 +n02843276 +n09827363 +n10259780 +n04432662 +n11715678 +n12388858 +n03057920 +n10465451 +n03855214 +n07728181 +n09835348 +n03549732 +n04589325 +n03491032 +n00452034 +n03948242 +n01456756 +n07921615 +n02809105 +n12889713 +n07586894 +n07734879 +n07905979 +n12847374 +n12129134 +n02122580 +n04028074 +n02911332 +n09251407 +n07697825 +n04597309 +n02800213 +n03480579 +n07621618 +n04170933 +n03743279 +n01916481 +n04037220 +n10748620 +n02708433 +n12007196 +n02561381 +n04103769 +n03030880 +n04413969 +n03911658 +n04590746 +n00476389 +n04331639 +n07725789 +n01792429 +n02949542 +n07686720 +n04064862 +n04447028 +n01713764 +n09854218 +n04032603 +n04405907 +n15093298 +n04385536 +n11954345 +n01560793 +n09249034 +n03784270 +n03436549 +n01324610 +n02379183 +n07616487 +n04119478 +n03309356 +n12865037 +n12850168 +n04250850 +n03024064 +n04412097 +n02982515 +n00450070 +n10175248 +n11847169 +n12276872 +n12870891 +n10229883 +n10505613 +n03482252 +n09300905 +n02919890 +n07617611 +n10283170 +n01607962 +n01671125 +n07894551 +n04561287 +n00005787 +n10025635 +n02850732 +n03732020 +n02036711 +n07907429 +n03797896 +n03004824 +n12011620 +n10300303 +n03105467 +n03767745 +n07868508 +n07868200 +n03788047 +n07886057 +n04559451 +n09845401 +n04373704 +n02676938 +n02565324 +n02667478 +n02122878 +n03244047 +n01747589 +n04320973 +n13205058 +n02379430 +n11959632 +n10183931 +n07683490 +n10055410 +n04370288 +n03273551 +n13900422 +n07899434 +n04053677 +n07740461 +n11879722 +n04282494 +n02981911 +n03449451 +n07581249 +n03965456 +n11808468 +n13881644 +n11725973 +n12091213 +n13193856 +n02873520 +n02754656 +n02431976 +n01324431 +n02385214 +n01888411 +n12680864 +n07731284 +n04337287 +n07631926 +n02549248 +n04395024 +n07585557 +n02776825 +n09460046 +n12023108 +n00475403 +n10098517 +n07902336 +n03683708 +n02412210 +n04397452 +n04583212 +n13869547 +n03632577 +n01616086 +n02763901 +n08256735 +n03015478 +n02084732 +n12178896 +n11966215 +n07605380 +n13869788 +n01847170 +n07744811 +n01854700 +n00444937 +n10422405 +n07801892 +n09688804 +n11879054 +n02802215 +n07908411 +n07822518 +n01558594 +n07935737 +n10730728 +n04436329 +n04294879 +n04972350 +n12911440 +n13886260 +n07578093 +n02537525 +n03703730 +n09607630 +n13865904 +n02360282 +n11731659 +n04126066 +n04212165 +n11618290 +n07588574 +n09269472 +n11896722 +n02892304 +n03487642 +n02028342 +n03321563 +n03135030 +n03522100 +n03253886 +n04095109 +n06470073 +n12603449 +n10644598 +n10260800 +n01535469 +n09696456 +n03553019 +n03963198 +n11918473 +n10314517 +n03002341 +n07574923 +n10421470 +n05716342 +n03244231 +n01730563 +n11691857 +n12807251 +n12345899 +n03142679 +n01531512 +n12307240 +n07835457 +n04535370 +n00451186 +n12481458 +n03434188 +n09734185 +n04578934 +n04167346 +n02747802 +n03459328 +n03301940 +n01562014 +n07690431 +n10642596 +n03696065 +n12781940 +n02759257 +n04392764 +n04218564 +n03499907 +n01536780 +n09751895 +n03235042 +n04570815 +n12070381 +n09448690 +n07625061 +n10178216 +n04560113 +n09457979 +n03858085 +n02421792 +n02944579 +n10085869 +n09718811 +n04103206 +n04239786 +n04501947 +n01321123 +n02390015 +n03964495 +n01554448 +n02925107 +n03028596 +n12483625 +n03227317 +n10701644 +n11968704 +n03900393 +n01851038 +n02276078 +n03132776 +n07585906 +n04480033 +n07880458 +n12887293 +n07921239 +n03307037 +n04595028 +n04244379 +n13131028 +n10313724 +n09436708 +n02694045 +n09941787 +n00449796 +n01817346 +n07928696 +n03401279 +n12901724 +n11646167 +n07682477 +n09415671 +n07900225 +n03607029 +n02692232 +n11834654 +n07935379 +n12437930 +n03762434 +n07922764 +n03595523 +n04546340 +n10686885 +n03516844 +n03767112 +n09896685 +n03859608 +n03149686 +n07920872 +n12388143 +n10406391 +n04233715 +n04373089 +n02023992 +n01947396 +n12115180 +n00479616 +n03962852 +n02392434 +n12414035 +n14976871 +n03201776 +n10665587 +n03600285 +n04402449 +n08539072 +n03629231 +n12860365 +n03488438 +n03337383 +n12455950 +n10384392 +n02953455 +n03101796 +n07919572 +n03233744 +n01578180 +n01756508 +n04556533 +n02962843 +n02882190 +n03731483 +n01850873 +n05260240 +n03111177 +n09836519 +n03030557 +n11789066 +n02788572 +n07903101 +n04067818 +n07840804 +n01567678 +n12427184 +n03333610 +n02416964 +n10607291 +n07936548 +n05451384 +n02968074 +n07605597 +n02704949 +n07609215 +n01951274 +n07696977 +n03180384 +n04303357 +n03291741 +n02207805 +n10123844 +n03420345 +n12384227 +n02758863 +n02047975 +n03978966 +n03549199 +n04275175 +n09294877 +n09836343 +n11970586 +n02010728 +n10369317 +n12681893 +n03192543 +n12413165 +n12174521 +n11916696 +n10042845 +n07822197 +n04968749 +n10323634 +n12849416 +n02814774 +n05538625 +n03078802 +n12230794 +n07726095 +n03051249 +n12005656 +n11876432 +n12164881 +n09711435 +n01622483 +n09896170 +n07684289 +n03368352 +n07910048 +n03159535 +n00466377 +n01541386 +n11647703 +n09752023 +n07903731 +n12249542 +n03794798 +n11786131 +n02852043 +n10493685 +n09846894 +n01752585 +n01536186 +n07618432 +n09859152 +n02065026 +n02382635 +n07867616 +n03885788 +n04255586 +n03275681 +n11961100 +n12485981 +n04495698 +n03293741 +n13902048 +n03254862 +n07903962 +n01594787 +n11962272 +n03284886 +n07842202 +n10157128 +n02405302 +n04443766 +n06266633 +n02519862 +n01487506 +n03373943 +n04247876 +n04327204 +n03349771 +n09260907 +n10092794 +n12223764 +n03504723 +n11926833 +n01820052 +n13032381 +n03889871 +n03209359 +n04608923 +n15093137 +n15091304 +n03688405 +n09905185 +n03543112 +n11611356 +n03885028 +n03234164 +n07594066 +n02396014 +n03456186 +n09874725 +n11601333 +n02917521 +n03055857 +n02804123 +n12352844 +n12866002 +n09858165 +n12037691 +n02565072 +n04477387 +n02008643 +n07867021 +n04119360 +n09893191 +n02944146 +n12435649 +n13197274 +n04974859 +n07751004 +n12003696 +n02762508 +n02680512 +n01743086 +n06998748 +n10607478 +n07613815 +n01559477 +n01859852 +n03239054 +n04466871 +n05263183 +n13173882 +n07897438 +n12427757 +n04400737 +n03291963 +n07682808 +n11692265 +n04130143 +n09445289 +n07696839 +n03835197 +n12821895 +n09734639 +n03365374 +n04305210 +n04962240 +n09871867 +n07897750 +n07616386 +n09443281 +n03641569 +n13882563 +n07680761 +n10498816 +n04034262 +n03533014 +n07928790 +n07690152 +n10060352 +n04124370 +n12453186 +n04509171 +n03013580 +n10604979 +n12515711 +n04971211 +n07693223 +n03786715 +n07894703 +n02761834 +n04232800 +n03437741 +n04045644 +n14976759 +n03042697 +n12557681 +n06275095 +n11678010 +n01586941 +n07684517 +n07822845 +n03483823 +n09951616 +n03180865 +n07861557 +n03644378 +n12848499 +n11962667 +n03886762 +n04238128 +n11979964 +n13915113 +n12791329 +n12457091 +n03341153 +n10267865 +n03484576 +n10186216 +n07612137 +n03843438 +n11807525 +n11931540 +n02027897 +n07614730 +n04116294 +n03469903 +n10017272 +n03688605 +n07860103 +n03981566 +n01888045 +n03345837 +n11998888 +n02071636 +n02726017 +n04310157 +n04607869 +n01622959 +n08524735 +n03119203 +n12031927 +n03610524 +n02807616 +n04056180 +n03233905 +n03374473 +n14810561 +n11944954 +n03121431 +n09750891 +n08505018 +n10727171 +n12357485 +n12571781 +n12067193 +n07586604 +n02086753 +n03548086 +n02560110 +n07804900 +n02880393 +n04208427 +n12931542 +n01594968 +n05218119 +n03520493 +n03727605 +n12687698 +n03612965 +n04135315 +n07730320 +n10540114 +n07599911 +n01323493 +n02115096 +n04590263 +n12043836 +n02861387 +n09836786 +n04966941 +n02816768 +n13131618 +n10701962 +n02919792 +n03442597 +n04325041 +n03333129 +n04091693 +n04950952 +n10631309 +n04177931 +n13234678 +n01970667 +n07748416 +n07893642 +n07691650 +n03660909 +n04145863 +n11945514 +n10334009 +n12336973 +n03954393 +n04558478 +n09899929 +n03487533 +n07816575 +n07877187 +n07863547 +n01603812 +n02098906 +n04973585 +n03674440 +n04371050 +n12243109 +n07871234 +n02928049 +n07574504 +n07889274 +n12141167 +n04543996 +n03080633 +n03423479 +n07879659 +n04380916 +n10514429 +n07584423 +n04009801 +n12479537 +n07606538 +n07698543 +n12353754 +n10132035 +n03367545 +n04245508 +n09811852 +n02024763 +n04052442 +n10120330 +n12352639 +n12606438 +n07752966 +n09772930 +n02535759 +n11737534 +n10345015 +n12427566 +n09705784 +n04112654 +n02985963 +n03758089 +n12953484 +n07906572 +n02881757 +n12739332 +n03718458 +n03407865 +n07775050 +n03210552 +n09452395 +n09789566 +n10566072 +n10559996 +n07826930 +n12414932 +n01887474 +n03026907 +n07751148 +n10223177 +n03957420 +n03788601 +n12244819 +n12421137 +n04266162 +n10038409 +n02981024 +n03228967 +n11825351 +n12058822 +n11963932 +n03041449 +n03046029 +n07590502 +n02932523 +n02152881 +n04970398 +n07887967 +n12812478 +n12421917 +n02708711 +n11870747 +n04290507 +n07934282 +n01608265 +n12070583 +n03205574 +n02305085 +n07866015 +n02960903 +n10098624 +n00481803 +n07938007 +n02693246 +n03923379 +n04103665 +n11792742 +n12489815 +n04971313 +n01668892 +n01055165 +n03215508 +n12104501 +n07899292 +n12822955 +n07713074 +n03842012 +n02449350 +n07868955 +n02835829 +n12283542 +n04525584 +n07910656 +n11625003 +n03987266 +n02805983 +n15091846 +n09736945 +n04973816 +n02439398 +n01519873 +n07899003 +n03019938 +n07582152 +n01885498 +n12108871 +n02934451 +n04327682 +n07696625 +n09750770 +n12084890 +n03960374 +n07585107 +n01570839 +n11905392 +n06277135 +n07842044 +n03751269 +n04398951 +n12861892 +n12649539 +n07596967 +n07580592 +n12845413 +n07690739 +n07804657 +n04334105 +n03779128 +n03268918 +n03066359 +n02744323 +n12596148 +n04272389 +n07832416 +n10210911 +n01548865 +n03221351 +n15091669 +n07878926 +n07607967 +n12171966 +n02846141 +n07576781 +n02922292 +n10092643 +n01732614 +n02578771 +n02864593 +n03537241 +n09635534 +n03268645 +n07852833 +n13873917 +n12640839 +n03506727 +n10536416 +n09976429 +n10692482 +n07600285 +n04156946 +n07818689 +n02605703 +n02710429 +n02890351 +n03408054 +n03121298 +n02731629 +n12450840 +n04061681 +n10153414 +n07648913 +n07891309 +n01562265 +n14973585 +n01610226 +n06267991 +n03302938 +n07822323 +n07826091 +n02764398 +n10406266 +n09282208 +n01734104 +n04283096 +n03530910 +n11542137 +n02610664 +n03856012 +n01531811 +n07862611 +n11625632 +n12643313 +n02469248 +n03333711 +n02907082 +n02122430 +n01559804 +n09744161 +n10187990 +n12015525 +n07844867 +n07887304 +n02878425 +n02009380 +n11448153 +n10655594 +n12566954 +n11901977 +n03999160 +n02389779 +n07928488 +n12785889 +n04281375 +n03745146 +n03224603 +n04594828 +n12835331 +n09715427 +n11615026 +n09972010 +n04038231 +n02379329 +n03445326 +n10753442 +n04249882 +n11727738 +n07866723 +n04282992 +n11621281 +n01566645 +n03919430 +n11980682 +n03480719 +n11625804 +n10467395 +n09436444 +n07867751 +n03684611 +n03788498 +n12062626 +n07808904 +n07690585 +n03865557 +n10711766 +n10465831 +n04380255 +n12166128 +n04432203 +n07892418 +n10432441 +n12991184 +n04209613 +n04459773 +n09666883 +n07807472 +n09873899 +n12939874 +n04545748 +n09637339 +n07919441 +n03987376 +n03645577 +n03437430 +n10671613 +n02964843 +n09707289 +n11700058 +n03877351 +n03518445 +n07643200 +n02140049 +n12683791 +n12418221 +n04154152 +n03397947 +n03238131 +n11851839 +n04545858 +n07744682 +n02995871 +n07593199 +n03543394 +n10293332 +n12658481 +n11599324 +n02705201 +n03920867 +n08249459 +n02876084 +n03937835 +n01397871 +n03849679 +n12016567 +n04208936 +n07696728 +n13148208 +n01904029 +n08659861 +n07878785 +n07827130 +n03390983 +n02624807 +n03319745 +n03994614 +n00446493 +n12477583 +n02920658 +n04602956 +n02688273 +n07577538 +n04350581 +n09283405 +n04074185 +n04495843 +n03538179 +n03454885 +n03878211 +n10308168 +n08518171 +n02660208 +n07904760 +n07928367 +n10174445 +n02137015 +n02863426 +n07700003 +n04015908 +n03946076 +n11725821 +n01794344 +n04364160 +n01663782 +n04283255 +n02822064 +n04406239 +n02782681 +n11990313 +n03563460 +n02957008 +n07889814 +n07896060 +n03683079 +n04278447 +n13011595 +n11810358 +n03836451 +n12827537 +n03545470 +n03213538 +n07929351 +n03471190 +n02882301 +n03625943 +n03397087 +n11955896 +n04097373 +n03145522 +n03034405 +n02889646 +n02928299 +n09652149 +n01641391 +n04593524 +n07651025 +n03719343 +n03884778 +n03452594 +n02174659 +n12345280 +n03039827 +n03309687 +n11635433 +n02057330 +n01664990 +n09779790 +n02011016 +n09689958 +n07770763 +n03010915 +n03443912 +n02946509 +n13050397 +n03031012 +n04217546 +n04124202 +n12766869 +n04177041 +n12050533 +n03251932 +n03086580 +n03918737 +n04386792 +n03176594 +n01577035 +n01669654 +n01818832 +n10441962 +n03885904 +n03724756 +n02925666 +n03549589 +n03062122 +n02828427 +n12604228 +n03624400 +n07725888 +n03873699 +n01503976 +n02887079 +n03610098 +n02940385 +n04610013 +n03652100 +n04496872 +n04008385 +n02583890 +n10476467 +n03395514 +n03306385 +n04228581 +n02389261 +n12576323 +n01579149 +n01623425 +n02593019 +n03995265 +n02124484 +n12745386 +n04355267 +n02643836 +n01614343 +n03810952 +n04058594 +n12278650 +n03474779 +n02823510 +n00442437 +n12039317 +n04574067 +n03762602 +n02153109 +n03518943 +n04289827 +n02288268 +n07749969 +n04132985 +n03213826 +n04307986 +n03567066 +n02049088 +n04408871 +n03522003 +n09305898 +n04266375 +n08571898 +n03039259 +n01587526 +n03261603 +n00464277 +n02627532 +n02992368 +n03640850 +n03037404 +n04525191 +n02106854 +n07772147 +n04173511 +n12761284 +n03257210 +n02813544 +n07740342 +n04066270 +n03070059 +n03616428 +n02904233 +n03209910 +n04389854 +n03078995 +n03193260 +n01488038 +n01754533 +n12629305 +n02055107 +n11664418 +n04228693 +n03353951 +n03440682 +n03025250 +n03300216 +n02042046 +n04226826 +n03342015 +n03090000 +n02050313 +n03492250 +n01535690 +n01572654 +n03465718 +n02879309 +n06278338 +n04113406 +n03695857 +n09720256 +n01860002 +n02851939 +n09828216 +n02564270 +n03528901 +n02542432 +n11978961 +n01670802 +n03956623 +n01612275 +n09376786 +n03222318 +n02813645 +n02213543 +n13898207 +n03616763 +n03616979 +n11904109 +n04212282 +n04608435 +n02042472 +n04198453 +n03216402 +n02015357 +n12282737 +n02699629 +n12866635 +n02048353 +n02933340 +n01793715 +n12001707 +n02878222 +n03187037 +n03105306 +n04080705 +n04254009 +n01623880 +n02839592 +n03436182 +n01591123 +n01318279 +n03002816 +n13155095 +n03141702 +n03775388 +n12165170 +n03322836 +n03259401 +n04471148 +n03911767 +n12585629 +n04317325 +n04257986 +n03133050 +n02035210 +n12891305 +n11882426 +n04491388 +n12948251 +n03498781 +n04262161 +n03775636 +n09915651 +n07584332 +n07852614 +n11626152 +n03901750 +n09723067 +n04265904 +n09920283 +n02397744 +n03253796 +n07712959 +n03898129 +n01743936 +n02075612 +n04560292 +n03479397 +n04334365 +n04357121 +n10145902 +n03844673 +n09854421 +n12687957 +n12598027 +n03944138 +n01839750 +n07722888 +n04258859 +n03088389 +n03351434 +n03509608 +n01677747 +n03145147 +n12046815 +n03505133 +n01629962 +n03333252 +n03993703 +n02962061 +n04529962 +n03463666 +n07681691 +n12160857 +n04187233 +n09331251 +n11614713 +n04376400 +n12301445 +n12633994 +n03883524 +n11614420 +n13062421 +n03645011 +n03293863 +n11640132 +n02579928 +n02854739 +n04461437 +n07729384 +n02977936 +n02836392 +n03593122 +n01666228 +n07820683 +n07568502 +n11910460 +n09348460 +n09712324 +n02403740 +n03482877 +n04370774 +n07750146 +n12992177 +n03152303 +n04134008 +n09805324 +n01611800 +n04374315 +n07586099 +n02032222 +n01979874 +n04350769 +n02907873 +n03016609 +n02543565 +n03256166 +n03016737 +n02419336 +n03268790 +n03559999 +n07765999 +n04607035 +n02416104 +n02123917 +n12484784 +n03225108 +n10739391 +n03506880 +n02918831 +n03045228 +n12516828 +n01314663 +n04172342 +n02768226 +n12368028 +n01500476 +n01558149 +n03604156 +n04035912 +n02359915 +n12261571 +n03875955 +n01887623 +n03871371 +n03390786 +n12494794 +n03826039 +n04465358 +n03838298 +n03165466 +n04229737 +n01321770 +n04354026 +n02998003 +n04114844 +n10611613 +n03600475 +n01909906 +n00466880 +n04284869 +n07722485 +n04496614 +n03298716 +n02285801 +n04081699 +n07765208 +n12659539 +n11618525 +n11757653 +n07727048 +n03913343 +n12070016 +n02697675 +n04284572 +n02595702 +n04482297 +n03516996 +n03704549 +n02040266 +n04476116 +n01323261 +n03823216 +n07696403 +n03226880 +n09734535 +n03950537 +n01671479 +n03049924 +n12593994 +n04568841 +n03604400 +n01837072 +n01754370 +n03122202 +n12338454 +n04094720 +n04150980 +n03429682 +n03884926 +n03378005 +n02434954 +n03461288 +n02893692 +n04472563 +n10472129 +n04590021 +n07739344 +n04162433 +n03395859 +n12059314 +n03498662 +n03678729 +n02927764 +n02770211 +n11710393 +n07730207 +n04178190 +n07772935 +n03801880 +n04414675 +n12729521 +n12203529 +n04122578 +n04575824 +n06267655 +n03698360 +n02804515 +n02431337 +n08598568 +n02893608 +n02270623 +n00479440 +n11616662 +n02884994 +n04305323 +n02407625 +n04476831 +n04222307 +n03179910 +n11623967 +n00446311 +n00454983 +n02886434 +n12279458 +n03723781 +n11816121 +n02403231 +n11808299 +n07816296 +n03219483 +n02657694 +n00453478 +n02816656 +n02625851 +n04112752 +n03339529 +n12171316 +n02044517 +n04137773 +n01486838 +n03015149 +n12911673 +n03967270 +n03498441 +n11672269 +n03386870 +n11615967 +n02580679 +n01681653 +n02793199 +n02824319 +n10727458 +n02555863 +n01533000 +n02175916 +n12064389 +n04383015 +n02469472 +n03101664 +n03623338 +n12295796 +n02869249 +n01792042 +n03447075 +n04453390 +n04382438 +n04112252 +n03332393 +n12729729 +n01851207 +n04269270 +n12333771 +n06272612 +n03135532 +n02927887 +n11711537 +n12301180 +n04107743 +n01813948 +n03282295 +n09714694 +n00483409 +n01504344 +n04279353 +n04040373 +n12658308 +n04134523 +n10104064 +n12056601 +n04525417 +n07819166 +n12263038 +n02072798 +n03125057 +n03367410 +n04000592 +n03549897 +n01877606 +n01564914 +n12307076 +n02855925 +n03176763 +n12271933 +n04121728 +n07690511 +n02825442 +n04442441 +n01630901 +n03088580 +n02499808 +n10675010 +n01531971 +n02273392 +n01526521 +n01531344 +n03667664 +n02888270 +n04412416 +n07733394 +n04559910 +n04105704 +n11792341 +n04201064 +n01693175 +n04555291 +n02908773 +n01976868 +n03529175 +n03365231 +n03622839 +n04258333 +n03327133 +n03425769 +n12477747 +n03718935 +n11727540 +n07933799 +n03030262 +n12043673 +n02619550 +n07937461 +n12198286 +n08560295 +n12402348 +n01733957 +n12344700 +n02763604 +n11925303 +n01557962 +n03927299 +n11611758 +n03035252 +n09454412 +n04004990 +n03456299 +n02175569 +n03668279 +n12352990 +n03507241 +n01534155 +n12278371 +n02499022 +n03822767 +n01318381 +n04024983 +n04277493 +n11934616 +n02027075 +n11611561 +n03454442 +n02236355 +n01732789 +n07722052 +n01489501 +n04409625 +n10563403 +n01817263 +n07757511 +n03770316 +n02977438 +n01840775 +n03607923 +n03322704 +n02375302 +n01614038 +n01646555 +n03952576 +n02946824 +n12847008 +n03016389 +n11809594 +n03165096 +n03839671 +n02687821 +n01689081 +n03822656 +n02597608 +n12336727 +n01579578 +n03631922 +n03904909 +n11658331 +n04224543 +n12621410 +n03870672 +n04252331 +n09720842 +n01396048 +n11988596 +n00483205 +n02871005 +n01597022 +n02382039 +n07743902 +n02358890 +n07877961 +n05263448 +n01862399 +n04136800 +n10624540 +n11990167 +n02731398 +n03366974 +n03490006 +n01561732 +n02626265 +n10627252 +n12402051 +n08517676 +n10488656 +n03099274 +n03718581 +n11806219 +n01830042 +n07728585 +n03732114 +n10755080 +n03359285 +n07720277 +n03354207 +n01596273 +n04416005 +n01847253 +n07733567 +n09725653 +n04274985 +n00449977 +n07772274 +n12063639 +n01530439 +n01322508 +n04397768 +n07273802 +n04261281 +n10524076 +n01678343 +n03410938 +n01797020 +n02388832 +n07719616 +n03639497 +n09787765 +n07721018 +n11818069 +n04185529 +n11644462 +n12074408 +n00483848 +n01583495 +n11891175 +n03347617 +n03308481 +n02535258 +n07750872 +n07748157 +n02855701 +n04584373 +n02461830 +n02912557 +n12277578 +n03604311 +n03643253 +n03031152 +n04039742 +n03435743 +n13908201 +n04150153 +n03250405 +n01410457 +n02357401 +n12588780 +n12729315 +n01690149 +n02538216 +n03171228 +n02424909 +n06274760 +n03775747 +n04211857 +n12429352 +n12272239 +n11759853 +n03401129 +n12649317 +n02625258 +n12651611 +n03603442 +n02803934 +n03861271 +n02605936 +n02018368 +n12711984 +n02811936 +n04612026 +n01339471 +n02923682 +n09194227 +n04346157 +n03939178 +n12635532 +n01593028 +n01793249 +n02380464 +n12400720 +n07708398 +n12020941 +n12492106 +n12850336 +n12749679 +n02892948 +n12591017 +n03193423 +n01791463 +n11979527 +n12134025 +n12167075 +n09308743 +n13108545 +n01618503 +n07827284 +n07724492 +n02338145 +n04533946 +n01586020 +n07598256 +n01603953 +n12646740 +n03067518 +n04046277 +n01532511 +n07769584 +n11644046 +n12753573 +n02681392 +n08492461 +n07749446 +n04409384 +n01791954 +n12330891 +n04560882 +n10145480 +n04250473 +n02655848 +n02903126 +n11736851 +n11901294 +n12865824 +n03870105 +n00449892 +n04240752 +n11851258 +n04200537 +n12049562 +n01521399 +n03565830 +n07860447 +n03067212 +n01664674 +n07561590 +n02727141 +n02324514 +n02372952 +n01584853 +n07766173 +n11811706 +n03097362 +n04200258 +n02732572 +n01853195 +n12282527 +n09838621 +n02764505 +n04256891 +n12337617 +n12635955 +n07831267 +n11628793 +n12316572 +n07807834 +n02037869 +n01821869 +n02820556 +n04517211 +n01839086 +n03842986 +n07698401 +n02386224 +n07841800 +n01830915 +n11616486 +n11902389 +n03427202 +n12727101 +n01851573 +n02125494 +n07746186 +n11628087 +n07746551 +n03943115 +n11892029 +n02861022 +n11733312 +n01852329 +n09392402 +n12336224 +n07887099 +n03403643 +n04414199 +n07895100 +n02264232 +n02317781 +n07823460 +n07755929 +n02524202 +n04324297 +n11627512 +n01585715 +n02922578 +n00479887 +n02687423 +n02416880 +n11784126 +n12073991 +n01853870 +n01561452 +n04187970 +n10300154 +n02520147 +n12294124 +n07743224 +n12066018 +n11634736 +n02041678 +n11626585 +n02386141 +n03986949 +n07860331 +n12356023 +n12072722 +n03082280 +n12083113 +n12979829 +n01448594 +n03007444 +n07858978 +n01641739 +n02043333 +n12020736 +n02751215 +n04528079 +n01538200 +n07925608 +n12091550 +n03742019 +n03518305 +n01642539 +n03414029 +n04363991 +n03767966 +n02596067 +n01586374 +n02885882 +n04080138 +n11617631 +n02033779 +n09451237 +n02310585 +n12648045 +n03955489 +n01752736 +n07899899 +n02299505 +n01579410 +n02156871 +n02998841 +n03759661 +n02050809 +n02683454 +n11621950 +n02910145 +n04967801 +n07896661 +n11906917 +n12275675 +n11611233 +n07736692 +n02312640 +n12588320 +n04399537 +n12757303 +n04197781 +n12717224 +n11635152 +n03122295 +n01792955 +n13133932 +n02518324 +n01584695 +n02915904 +n02967294 +n04345201 +n03019434 +n02470238 +n03049782 +n03101517 +n12709688 +n03716887 +n02422391 +n12638753 +n00288384 +n02162561 +n02053584 +n01317294 +n03334291 +n07814634 +n12273768 +n12406715 +n11644226 +n01646802 +n03460147 +n12338796 +n01972541 +n02147947 +n03890093 +n04127395 +n01581984 +n01681328 +n02213239 +n04582869 +n03254189 +n03274265 +n03186285 +n11839823 +n01624833 +n09792969 +n07891189 +n12023726 +n07619208 +n03466600 +n01849676 +n12190869 +n03079136 +n12317296 +n13001930 +n00477639 +n02944459 +n03903733 +n04131208 +n12710295 +n12180885 +n11612349 +n03443149 +n03982331 +n04264765 +n12642090 +n03237416 +n13868944 +n04046400 +n11705171 +n11979715 +n12597134 +n01609956 +n01568294 +n01469103 +n00443692 +n01606672 +n04556408 +n07690019 +n03977592 +n03358726 +n12696492 +n01573240 +n11632619 +n01772664 +n03453231 +n04179712 +n03646020 +n01812662 +n04306592 +n07724654 +n13908580 +n02903852 +n04284438 +n13132656 +n04317063 +n07829248 +n01589718 +n02654745 +n12294331 +n12515925 +n07900825 +n07721195 +n04189282 +n11907689 +n01624537 +n12333530 +n07762244 +n11757851 +n01599159 +n04038338 +n01568892 +n12691661 +n09744834 +n04307767 +n03120778 +n07920540 +n03781683 +n04185804 +n12080820 +n04354182 +n07574426 +n02579303 +n03046802 +n12078172 +n03210245 +n01614556 +n02304432 +n07713267 +n09724656 +n02861147 +n12755387 +n01483830 +n12921868 +n12026018 +n07817871 +n12062781 +n04241573 +n11621727 +n03376159 +n11815721 +n13007034 +n03540090 +n00450866 +n11619455 +n01528845 +n01568720 +n12743352 +n02871314 +n03606251 +n01490670 +n04246060 +n02053425 +n10780284 +n01915700 +n04510706 +n00456465 +n01563945 +n11809094 +n09855433 +n04112579 +n03855333 +n09809925 +n03413684 +n02123478 +n12070712 +n03651843 +n02032355 +n01591005 +n01646648 +n02752615 +n02415829 +n03283221 +n04368496 +n01573360 +n02321170 +n10348526 +n04446844 +n07763792 +n12077944 +n04431025 +n02895438 +n10082687 +n07714188 +n02262449 +n03090172 +n12491017 +n01558461 +n12754781 +n04070415 +n04297098 +n03424862 +n01970164 +n09833536 +n01793435 +n01670535 +n09894445 +n09676247 +n01548492 +n12501202 +n03250089 +n03358380 +n02578928 +n12020184 +n02301935 +n03393017 +n12340755 +n01849863 +n01748906 +n03075946 +n01810268 +n01984245 +n04555400 +n12286988 +n04097760 +n02050586 +n12104238 +n01679962 +n02709101 +n01569060 +n12790430 +n01757901 +n13199717 +n11815918 +n07827410 +n02970534 +n12942572 +n07924276 +n04103918 +n11704093 +n07908647 +n07601686 +n12172906 +n04084889 +n02381261 +n02299157 +n11978713 +n12460957 +n02963503 +n03272810 +n12469517 +n03443005 +n01797307 +n02952237 +n11908549 +n13912540 +n03428226 +n10276477 +n01757343 +n01443243 +n01607600 +n03580518 +n12709103 +n07579688 +n04329834 +n12710415 +n11808932 +n10583790 +n02213788 +n11622184 +n12596709 +n02216211 +n07721942 +n07765361 +n01848453 +n11724109 +n02028451 +n02935017 +n12046028 +n10629939 +n00441073 +n07900958 +n12451399 +n02823964 +n04210120 +n01848840 +n10485883 +n07767709 +n02432704 +n11622591 +n03210372 +n07848196 +n11992806 +n02953197 +n07620689 +n01521756 +n03571625 +n03158186 +n12647560 +n02065407 +n01572782 +n09890749 +n05581932 +n07754451 +n03350204 +n13044375 +n12294723 +n12482893 +n04434531 +n12989938 +n12196336 +n01701859 +n07746334 +n11941924 +n02047411 +n12650379 +n10486166 +n01599556 +n01567879 +n12675876 +n01682435 +n02043808 +n12362668 +n12306089 +n02999138 +n01679626 +n03557270 +n01546039 +n11901759 +n01549053 +n11883328 +n06596727 +n03193107 +n11612018 +n03300443 +n03612010 +n03668488 +n12648888 +n01448291 +n11632167 +n10262445 +n09742101 +n09717233 +n04299370 +n03094159 +n04536595 +n03514693 +n02029706 +n02886321 +n07816052 +n04045255 +n01851731 +n02627292 +n01841288 +n02739889 +n02932693 +n03784896 +n04569063 +n07902799 +n03863108 +n02607470 +n13200651 +n07916183 +n01573898 +n04347119 +n10076604 +n13033577 +n01824035 +n03630262 +n04426316 +n03064250 +n12262018 +n12048399 +n12279772 +n04143140 +n07829331 +n12891643 +n01826680 +n12646605 +n13103877 +n02023855 +n03086868 +n04163530 +n03736470 +n04358117 +n13872822 +n03159640 +n01680655 +n11611087 +n03980478 +n02978478 +n01555004 +n12402840 +n07763987 +n04387706 +n04979002 +n03258330 +n09856671 +n11624192 +n01538059 +n02003839 +n12552309 +n10469874 +n01576076 +n03643149 +n04419868 +n04586581 +n00483508 +n03131967 +n01847407 +n07929172 +n09683757 +n03786621 +n04369282 +n12733870 +n11612575 +n11619227 +n03301833 +n02176439 +n01569971 +n07935043 +n02563792 +n02051059 +n04482177 +n11859472 +n11710136 +n04115144 +n07864934 +n07691758 +n02620167 +n07748276 +n03415486 +n07835921 +n00452152 +n01848323 +n12906214 +n12075010 +n01563449 +n01499396 +n01570267 +n12047345 +n07920989 +n07601572 +n02683558 +n04428634 +n04345028 +n12161969 +n03460040 +n02561514 +n02006364 +n03582959 +n11812910 +n13185269 +n04297847 +n07896165 +n01552813 +n12361946 +n02031585 +n12766595 +n11622368 +n11695599 +n11615387 +n02509197 +n12409470 +n01314388 +n11758799 +n09846469 +n02675219 +n04253057 +n04041243 +n12276628 +n04381724 +n01855188 +n02203152 +n04403925 +n11895092 +n11924849 +n04172904 +n11888800 +n01546506 +n07906718 +n01489920 +n03436417 +n03615655 +n07765073 +n02434190 +n02004492 +n12282235 +n12406488 +n11981192 +n10373390 +n13183056 +n04332074 +n12818346 +n07731006 +n02598573 +n02438580 +n01957335 +n03356982 +n10288964 +n02629230 +n02042759 +n12319414 +n01451426 +n03521675 +n02016066 +n01813532 +n13207335 +n11805544 +n04401828 +n02952109 +n03963294 +n10013811 +n12058630 +n01551711 +n01574560 +n01858780 +n10093818 +n03858183 +n01550172 +n03571280 +n02309242 +n10258786 +n01569423 +n10134178 +n08578517 +n04445327 +n03250279 +n02584449 +n03223553 +n04523831 +n04485423 +n02050442 +n04474035 +n04528968 +n02649546 +n01913166 +n09971273 +n04517408 +n02437482 +n03824713 +n03778817 +n07643026 +n01613177 +n12022054 +n07714448 +n07592768 +n00454493 +n03296328 +n02305929 +n03084834 +n03698815 +n12093600 +n08649711 +n03466493 +n04067658 +n03041114 +n03514451 +n01491006 +n04178329 +n03790953 +n03938401 +n02048115 +n07768858 +n03273740 +n10333601 +n05418717 +n12754003 +n02098806 +n03314608 +n01565930 +n12113195 +n12284821 +n12483427 +n04332580 +n10382710 +n03416094 +n02837887 +n03917198 +n14131950 +n04414476 +n11861641 +n11903671 +n01841441 +n09872066 +n01806467 +n04964799 +n00467320 +n01595974 +n03220692 +n01339083 +n01825278 +n11727358 +n04518343 +n11984144 +n07724269 +n02292692 +n02324850 +n01753032 +n01624115 +n11816649 +n07930062 +n02460451 +n12319204 +n04340521 +n12325234 +n01541102 +n02979836 +n00141669 +n01822300 +n11658544 +n12272883 +n03334382 +n11726707 +n03639077 +n07904934 +n03516367 +n03698723 +n03553248 +n11812094 +n03724417 +n01540566 +n02341974 +n11819912 +n07734555 +n02987379 +n03580845 +n12546962 +n02548247 +n12753245 +n07768423 +n12849279 +n11617090 +n02912894 +n07840027 +n12295033 +n12703383 +n02696165 +n10419785 +n04426427 +n03694639 +n11712282 +n04142999 +n01597737 +n03801533 +n01495493 +n07774719 +n03267113 +n01742821 +n03859170 +n03416640 +n03320959 +n12733218 +n02017725 +n13229543 +n09344324 +n04965451 +n01490112 +n10069296 +n12084555 +n04554406 +n04086446 +n02976249 +n02656032 +n02424486 +n02381609 +n09934337 +n04573937 +n07685399 +n02800497 +n02905152 +n02951703 +n07760153 +n03609397 +n00447463 +n03680512 +n02046939 +n03288886 +n11870418 +n03386544 +n07767171 +n07847453 +n12687044 +n01664492 +n03099147 +n03463381 +n02125081 +n12920204 +n03517647 +n02603540 +n12267411 +n11933546 +n11947802 +n04387095 +n12975804 +n02973904 +n13195341 +n04048441 +n11753143 +n03212114 +n03298858 +n04366116 +n01424420 +n10450161 +n01442972 +n07877299 +n04503593 +n04349306 +n12969425 +n12597466 +n03092656 +n07914995 +n03487886 +n12223569 +n01756733 +n13919919 +n04175147 +n02029087 +n03530511 +n02425887 +n03572107 +n03927539 +n03383099 +n04130907 +n01632601 +n07823105 +n10378026 +n02382850 +n07613266 +n03235180 +n02810782 +n12708654 +n11636835 +n02823124 +n03402941 +n12121610 +n03715114 +n04052658 +n00480366 +n12493208 +n04255163 +n12145477 +n01489709 +n12402596 +n01598074 +n03837606 +n02628062 +n04103364 +n03247083 +n02032480 +n07736256 +n12578916 +n09218315 +n02218371 +n03730334 +n02080146 +n03836906 +n02868638 +n02198859 +n12744387 +n02942460 +n11754893 +n12274358 +n02725872 +n09218494 +n03942920 +n07574780 +n02921756 +n01757115 +n02763306 +n11758122 +n10508141 +n02303284 +n04083800 +n13879049 +n12765115 +n12075830 +n02666943 +n11980318 +n07907037 +n12794135 +n02333909 +n03870980 +n07586718 +n11923174 +n10782471 +n01493146 +n12294871 +n11726269 +n12932173 +n07825972 +n12732009 +n03572321 +n07682197 +n03423306 +n12495895 +n03545756 +n03557692 +n03785237 +n07902937 +n09899671 +n12061614 +n07902443 +n01449374 +n12632335 +n03474896 +n03539433 +n04310904 +n03902482 +n12006930 +n03285578 +n04200000 +n03912218 +n07821260 +n03548626 +n03223686 +n11826198 +n03165616 +n02104280 +n09981278 +n09382099 +n03732458 +n03987990 +n09946814 +n12270741 +n07737745 +n04172776 +n10189278 +n03543012 +n12629666 +n02180875 +n04087432 +n12961879 +n03321954 +n12528549 +n02424085 +n09843443 +n03846677 +n12304703 +n09873473 +n03410571 +n03041810 +n02425228 +n01562451 +n03615790 +n10081204 +n03985881 +n07842130 +n02890513 +n03649797 +n02381004 +n12560621 +n12523475 +n07687626 +n11905749 +n11759404 +n12905412 +n03542605 +n03983612 +n12573474 +n11972291 +n03767459 +n02698634 +n12713866 +n13084834 +n02202006 +n13108323 +n02631475 +n10737103 +n03637898 +n03069752 +n12400489 +n09692915 +n10242328 +n02794664 +n12465557 +n12085267 +n03348868 +n12754981 +n02745611 +n10504206 +n12073554 +n02835724 +n04605572 +n02825961 +n03528523 +n12116429 +n02973805 +n12708941 +n01544704 +n04180229 +n09403211 +n08242223 +n02146371 +n12127768 +n09770359 +n03295246 +n01757677 +n04385799 +n02584145 +n07909593 +n12587132 +n13029326 +n04184316 +n07903643 +n01848555 +n10750031 +n02332156 +n12703557 +n03196990 +n12406902 +n02768973 +n12416073 +n02147591 +n09724533 +n09693982 +n12687462 +n01982068 +n03435991 +n03272125 +n07713763 +n03018712 +n03648431 +n03336575 +n07854184 +n12806015 +n07879174 +n03984643 +n03147280 +n02699915 +n07617708 +n01533651 +n12483841 +n01697611 +n02576906 +n03724066 +n03935116 +n09782397 +n01599269 +n10672371 +n12066630 +n03178674 +n15086247 +n03523987 +n02826068 +n12580654 +n02358390 +n01647640 +n10259997 +n03738066 +n13915023 +n02639605 +n03174450 +n12269406 +n09874428 +n03432061 +n04386051 +n03923918 +n04592465 +n12480456 +n10333439 +n04206790 +n01443831 +n02967626 +n07733712 +n03746155 +n12947313 +n11690254 +n12244650 +n12670758 +n08658309 +n12710693 +n11860555 +n03485198 +n03047799 +n04461570 +n07600177 +n02126640 +n12704343 +n02866386 +n03008976 +n04532831 +n03465426 +n12691428 +n01641206 +n04962062 +n03254046 +n04425804 +n02014524 +n03439348 +n02538010 +n11603246 +n12265600 +n12277800 +n04016240 +n12086192 +n09650729 +n01549641 +n03112719 +n04961062 +n02710324 +n12049282 +n12362274 +n11969607 +n12856680 +n02201000 +n07863802 +n03360622 +n07601809 +n04354487 +n12898774 +n12939282 +n03109693 +n12867826 +n12441390 +n12915811 +n12879527 +n04137355 +n04131368 +n03527149 +n10164492 +n09932508 +n12426623 +n12575812 +n02557318 +n10263790 +n04309548 +n00476235 +n04194127 +n11876634 +n10327987 +n03499354 +n02616851 +n04464615 +n03615406 +n02744844 +n11732567 +n10347446 +n09752519 +n04228215 +n10004718 +n07899533 +n12030908 +n15102894 +n12044467 +n11711764 +n02610066 +n03415749 +n04562496 +n02034295 +n02297442 +n03566193 +n12506991 +n07774842 +n12827270 +n14908027 +n12242409 +n04072960 +n02829596 +n12496427 +n02266050 +n13108481 +n12473840 +n08677424 +n12076223 +n15091473 +n02815749 +n04549028 +n12558425 +n12023407 +n04179824 +n02378541 +n03188725 +n12517445 +n07573347 +n02004131 +n11921395 +n12570972 +n10602470 +n12095647 +n03854421 +n02450295 +n02792409 +n03543735 +n12836337 +n12204175 +n12152722 +n07900734 +n12517642 +n02775039 +n12607456 +n03376938 +n12179122 +n09873348 +n01847978 +n07888816 +n10453184 +n09675922 +n01851895 +n12865562 +n01797601 +n03711044 +n02738859 +n12064591 +n04033425 +n08551296 +n01650690 +n01537895 +n04207151 +n10087434 +n12261808 +n09438844 +n10364198 +n01814755 +n01583209 +n12270946 +n11892817 +n03344642 +n04117464 +n07847917 +n04003241 +n10362319 +n10477713 +n03495570 +n07560542 +n04363777 +n04534359 +n02404906 +n03349892 +n07712267 +n02960352 +n07866277 +n07857170 +n00324978 +n02755823 +n03150511 +n04211528 +n01899894 +n07588299 +n11874081 +n03425325 +n04506506 +n11949402 +n02952374 +n03309110 +n12159388 +n07591049 +n03068998 +n03228254 +n10279018 +n04173046 +n07728053 +n13052931 +n01597906 +n12368451 +n02767665 +n09435739 +n03915900 +n09728285 +n03292603 +n03331077 +n07817160 +n07917392 +n12540250 +n04153025 +n10209082 +n03968581 +n12676534 +n11824146 +n03521899 +n01853666 +n04292921 +n12332030 +n03984759 +n02863014 +n07801091 +n07723177 +n03289660 +n01533481 +n04488202 +n03468821 +n02382338 +n03543254 +n01961985 +n07915918 +n03703862 +n02771004 +n02047045 +n03877674 +n13141415 +n03529629 +n02240517 +n03675235 +n04491638 +n12384037 +n04419642 +n03019685 +n07591586 +n04496726 +n12985420 +n12927013 +n12196694 +n03473227 +n11621547 +n02988066 +n10451450 +n07729828 +n09618760 +n12196527 +n01555305 +n12830222 +n11950877 +n13190747 +n12160303 +n12390099 +n02818135 +n03163381 +n04554211 +n03244919 +n07897975 +n03386726 +n04290615 +n02011281 +n12407890 +n04123448 +n07904865 +n03447358 +n02393940 +n07931870 +n02937958 +n04318787 +n04587327 +n12807409 +n04112430 +n07560193 +n12774299 +n02618827 +n07854982 +n03757604 +n03817191 +n12793494 +n02324431 +n03013850 +n04113641 +n01612476 +n03127408 +n02038466 +n03799876 +n04257684 +n03382292 +n10449664 +n04394630 +n10275395 +n07698250 +n12329473 +n07694659 +n07642742 +n02563648 +n08583455 +n02557182 +n02775178 +n09274152 +n03189083 +n12570703 +n04211219 +n12486574 +n03073694 +n11969166 +n02475078 +n02976350 +n08584914 +n07899660 +n10116702 +n01613807 +n12461109 +n04025508 +n12451240 +n12596849 +n12079963 +n03541269 +n04561422 +n11699442 +n07725255 +n03460297 +n07616748 +n12757458 +n03103563 +n02813752 +n07698782 +n12840362 +n01543632 +n01602832 +n01875313 +n12472024 +n02926591 +n02872333 +n10728624 +n12532564 +n03882960 +n12333053 +n03684224 +n13146583 +n03436075 +n04154340 +n03868643 +n02598878 +n04139140 +n03266371 +n04083309 +n12506341 +n12200143 +n03503477 +n12807773 +n03123917 +n13029760 +n10173771 +n03659809 +n12047884 +n12759273 +n04193377 +n04258438 +n04597400 +n04579986 +n03719743 +n04299963 +n02864504 +n10510245 +n03417970 +n09719794 +n03138344 +n02085272 +n07694516 +n12665857 +n01642257 +n03229244 +n10581890 +n10318293 +n03635108 +n10652605 +n12189429 +n09934774 +n11709205 +n04207903 +n10296176 +n10603851 +n03450734 +n13223588 +n12754648 +n09886403 +n07751280 +n11950686 +n07814390 +n12799776 +n01646902 +n09796809 +n12819728 +n01938454 +n02410011 +n07607138 +n02119634 +n10332861 +n09230202 +n02757061 +n02849885 +n15092227 +n12151615 +n03111041 +n02413050 +n03506560 +n07744057 +n04030518 +n12544539 +n04089836 +n02038993 +n13882201 +n12099342 +n01946630 +n10095769 +n02982416 +n12957924 +n13215586 +n07726525 +n12452836 +n03801671 +n04598318 +n01449712 +n12428747 +n04119751 +n10509063 +n07694839 +n02782602 +n11626409 +n02573704 +n12399384 +n12388989 +n01601068 +n11971406 +n04367011 +n07930315 +n12925179 +n04967674 +n03497352 +n03653833 +n01819465 +n03688192 +n02802990 +n03393761 +n04430475 +n13107694 +n10384496 +n07867164 +n12449526 +n01515303 +n12574320 +n01444339 +n07919310 +n03453443 +n04173907 +n02887489 +n07772788 +n03629520 +n02580830 +n11705387 +n12069679 +n01956344 +n02406533 +n03973402 +n03938037 +n04969952 +n04103094 +n04393808 +n07715407 +n04172107 +n01917882 +n12085664 +n07608429 +n09835230 +n04135024 +n07842605 +n12568186 +n04339879 +n07691091 +n01801876 +n00474568 +n01807105 +n12128071 +n01673282 +n11948864 +n03991837 +n09659188 +n02070174 +n02670683 +n12454949 +n10385566 +n11631854 +n12305293 +n12002428 +n12948495 +n12757816 +n11852028 +n10690648 +n09283866 +n03214582 +n03423877 +n04127521 +n03006626 +n09283193 +n07712559 +n01447331 +n02981321 +n02658531 +n11947629 +n02419634 +n02420828 +n11923637 +n12570394 +n11968931 +n12731029 +n09749386 +n07736813 +n03967396 +n11908846 +n03029445 +n02426481 +n01964271 +n13198914 +n04484432 +n12656685 +n10806113 +n11849983 +n03236423 +n10649197 +n07688624 +n03057541 +n12015221 +n02094931 +n02014237 +n07560331 +n02801450 +n04206570 +n07556406 +n11627908 +n11889619 +n07852229 +n04063154 +n02713364 +n02783459 +n12877838 +n02930214 +n02125010 +n02407276 +n07815424 +n12855494 +n12530818 +n07750449 +n01963317 +n10082997 +n03245724 +n03012013 +n03555006 +n02421136 +n03332989 +n04375405 +n03746486 +n12636224 +n03278914 +n07917133 +n12504783 +n09416890 +n03896526 +n02258198 +n12983048 +n03837698 +n12869061 +n04541987 +n01637615 +n04401949 +n02241426 +n13220122 +n07876651 +n03729308 +n02364840 +n01339801 +n03418915 +n09257843 +n11614039 +n09731343 +n03809603 +n05399243 +n01569262 +n11901597 +n03124474 +n01566207 +n03796522 +n12595699 +n04573281 +n09689435 +n11859737 +n03201529 +n12902662 +n03374372 +n03760944 +n09189157 +n01517966 +n10431625 +n02898269 +n03693707 +n04369025 +n07834618 +n04095342 +n02786331 +n03822504 +n02284611 +n09862621 +n03436891 +n07688898 +n12435777 +n03949317 +n12443323 +n12273114 +n12623077 +n04333869 +n07907831 +n07774596 +n05450617 +n03320262 +n04190376 +n12671651 +n11819509 +n07588111 +n09756049 +n07611046 +n04973291 +n11602873 +n00120010 +n03500699 +n03844815 +n03708843 +n04452528 +n04387261 +n09889065 +n10147121 +n03318294 +n12599435 +n04164406 +n01965529 +n11636204 +n11791569 +n12275131 +n02977330 +n07851443 +n04132603 +n07824191 +n09760609 +n12190410 +n07915491 +n12665271 +n10120671 +n02570164 +n10208950 +n02163297 +n02244797 +n09842528 +n08645104 +n01841679 +n11603835 +n04488857 +n07814487 +n01953762 +n04612373 +n11877193 +n03198500 +n03981924 +n01943087 +n11552806 +n04414909 +n03005033 +n02457945 +n10500217 +n10375314 +n04607242 +n07914777 +n09832456 +n12915568 +n12813189 +n10578021 +n03519081 +n07801779 +n12026476 +n03296081 +n03850492 +n07902121 +n09881265 +n12562785 +n03290195 +n10131151 +n10078719 +n01558765 +n03917814 +n02045596 +n07734183 +n03414676 +n07933154 +n02126787 +n12148757 +n12263987 +n07684164 +n03406966 +n01492569 +n02988963 +n12963628 +n09964202 +n03417749 +n01854838 +n02921029 +n02183096 +n11762433 +n11722466 +n02387093 +n02768655 +n12519089 +n09871229 +n07938313 +n10502329 +n11989393 +n03768916 +n13145040 +n11813077 +n04457910 +n03655720 +n03703945 +n11876803 +n01438581 +n07910379 +n07847827 +n02300797 +n09245515 +n10754189 +n04581102 +n12513172 +n02458135 +n03762332 +n11789589 +n09695620 +n03850053 +n07911249 +n12342852 +n12753007 +n07748574 +n07727458 +n03696568 +n04304680 +n07723039 +n07775197 +n07577144 +n03043693 +n04374735 +n01858281 +n09228055 +n09466678 +n01949085 +n02024479 +n11623815 +n02704645 +n07894451 +n01751472 +n01646388 +n01317916 +n13880994 +n10300500 +n11794024 +n03735963 +n04610274 +n11854479 +n07754894 +n02639087 +n02122510 +n02262803 +n12732966 +n04529108 +n13194036 +n09990777 +n10009276 +n12088223 +n12155009 +n07886176 +n04278247 +n04222723 +n11707229 +n01999186 +n07851641 +n12741792 +n01315213 +n10033412 +n04249582 +n03586631 +n03237839 +n12037499 +n12014085 +n07756325 +n01636352 +n03905947 +n08611339 +n07693590 +n03724538 +n09791816 +n01666585 +n10588965 +n11613219 +n10542608 +n12913791 +n10528023 +n03171635 +n11923397 +n12854600 +n10410246 +n12698598 +n04135118 +n09844457 +n04441790 +n03882611 +n02337001 +n07907342 +n12561169 +n12027658 +n10719132 +n09851165 +n02801823 +n12330587 +n01683558 +n12162181 +n04387932 +n11704620 +n09679170 +n07601290 +n04028221 +n10277027 +n09877750 +n11758483 +n10027246 +n03819336 +n10205231 +n12478768 +n03451711 +n12973443 +n01923025 +n03262717 +n07807594 +n00475535 +n07744430 +n02341475 +n04614655 +n07924747 +n03388323 +n12680402 +n03202940 +n04534520 +n09661873 +n15092059 +n11832480 +n04198355 +n12529220 +n12389130 +n12304115 +n03234952 +n07610620 +n02868975 +n04442741 +n05282652 +n02820675 +n12795352 +n12675299 +n08547468 +n04189651 +n04141198 +n04513998 +n12273939 +n12482668 +n12858618 +n01958346 +n03172038 +n10280674 +n04301760 +n02631330 +n12433178 +n07763107 +n03068181 +n07565259 +n03605598 +n13177884 +n04005197 +n09751496 +n12737383 +n07648997 +n09839702 +n09442595 +n07925229 +n12150722 +n11898775 +n09904208 +n02207345 +n07642361 +n07685918 +n03205458 +n10574538 +n09742315 +n02599557 +n03585682 +n04273659 +n02200850 +n03410740 +n03391301 +n07726672 +n09782167 +n13155305 +n02067240 +n07561848 +n07728708 +n12463134 +n12228229 +n09743487 +n12225563 +n03421669 +n03226375 +n03973945 +n12498055 +n04483925 +n04564278 +n11890150 +n12519563 +n12754468 +n04353573 +n11615607 +n04430896 +n04585128 +n10395828 +n10773665 +n02772435 +n09881895 +n12663023 +n01615303 +n12803754 +n09445008 +n03955296 +n05245192 +n05486510 +n07899769 +n07575510 +n02307681 +n03814817 +n02670186 +n03598515 +n12797860 +n03518135 +n07587962 +n12630763 +n06273743 +n09843824 +n03226254 +n12407222 +n02961544 +n12951835 +n06417096 +n02016659 +n01441117 +n07735404 +n09411189 +n13896217 +n03262248 +n03451120 +n02525382 +n03375329 +n04155068 +n12916179 +n10297234 +n11907100 +n03423568 +n04360914 +n12027222 +n12199790 +n01744270 +n09896401 +n07925116 +n03693860 +n04414319 +n07767549 +n03555564 +n04043411 +n07872593 +n03774461 +n03129471 +n04497801 +n11756870 +n09776346 +n04530283 +n01520576 +n12828220 +n01583828 +n04120842 +n09676021 +n04344734 +n01916388 +n12513613 +n09861863 +n02310334 +n03318983 +n04533499 +n02427576 +n12727518 +n04502059 +n11725480 +n11987126 +n11876204 +n03504205 +n09720595 +n12315999 +n12935609 +n04452757 +n12201331 +n01603152 +n10772092 +n03156279 +n12723610 +n02003037 +n03244775 +n07802963 +n11954642 +n07770034 +n09931165 +n10559508 +n01745902 +n07654148 +n10070108 +n01585287 +n13196003 +n04389718 +n10253122 +n03730893 +n02983357 +n02783900 +n01680813 +n03072440 +n03109253 +n03274435 +n11655974 +n10048612 +n07849733 +n07896994 +n03792334 +n03035832 +n03819448 +n03105088 +n11943992 +n01485479 +n01699675 +n11795049 +n12086778 +n01840120 +n07753980 +n10685398 +n04346428 +n04532398 +n07709172 +n02146700 +n09461069 +n03853924 +n01321456 +n12068432 +n09757449 +n03206282 +n03751757 +n13053608 +n11695974 +n12123741 +n03500209 +n04367371 +n02890940 +n01917611 +n07835331 +n02907656 +n04136045 +n12059625 +n03862862 +n12864160 +n00440039 +n03448590 +n12628986 +n04115802 +n03949145 +n12916511 +n12647893 +n09706255 +n13181811 +n07752109 +n04375615 +n01648620 +n04403524 +n09967967 +n12911079 +n03857687 +n02803539 +n01551080 +n10734891 +n13235159 +n04127633 +n07935878 +n12853482 +n10191001 +n03126385 +n10076224 +n01812866 +n12919403 +n03769610 +n09283767 +n03462110 +n11770256 +n12038898 +n09889170 +n11894558 +n10298647 +n02592055 +n02795670 +n11701066 +n12762049 +n02890662 +n07918193 +n02976455 +n03100897 +n13127843 +n12184912 +n00468299 +n12407079 +n12496949 +n03541537 +n05260127 +n01535140 +n01541760 +n11945783 +n07687053 +n07745046 +n12083847 +n02382132 +n12270027 +n10140597 +n03788914 +n01790711 +n02197689 +n03173270 +n10368624 +n04449290 +n01579729 +n07834872 +n07734417 +n02379630 +n01636829 +n12549192 +n12951146 +n13579829 +n03268142 +n11761202 +n02769669 +n09452760 +n04095577 +n12031139 +n02003577 +n12891469 +n03931885 +n01577941 +n04176295 +n12046428 +n03418402 +n13145250 +n11865874 +n12473608 +n11797321 +n01798168 +n09923186 +n02786736 +n01698782 +n09976283 +n03975788 +n14685296 +n01682172 +n07838441 +n02771286 +n03429137 +n03948950 +n02512830 +n02298218 +n10141364 +n02823848 +n02077384 +n12584715 +n11748811 +n02214773 +n03667552 +n04121426 +n04135710 +n07579917 +n12275888 +n07826453 +n12167436 +n04586072 +n09877288 +n04248396 +n02761696 +n03038870 +n01490360 +n12353203 +n09785891 +n12057660 +n04146343 +n12557556 +n02081798 +n02917964 +n07898617 +n12597798 +n07574176 +n07764630 +n03008177 +n04255899 +n04434207 +n07897600 +n09929577 +n11811921 +n12415595 +n02893941 +n12276110 +n02821202 +n09690621 +n02508742 +n02077787 +n02390640 +n03764822 +n02257985 +n13033134 +n04559166 +n07865196 +n10506915 +n12051103 +n10473917 +n12775919 +n02971579 +n12880462 +n11837970 +n02063662 +n09840520 +n12019827 +n09208496 +n12836508 +n02982232 +n04219185 +n03332005 +n07914128 +n07862461 +n04250692 +n09267854 +n04561734 +n02076402 +n12344837 +n02919148 +n06592281 +n03668803 +n03062985 +n04246731 +n12112609 +n04012482 +n03558633 +n03982642 +n01998741 +n07665438 +n04209509 +n07913882 +n01749244 +n07801342 +n02611561 +n04488742 +n01897536 +n10624437 +n13128976 +n07931612 +n04300643 +n03727067 +n03360431 +n07593471 +n10253296 +n03297226 +n03854506 +n07879450 +n10562283 +n12557438 +n13154388 +n12862512 +n02126028 +n07752514 +n02387887 +n12066261 +n07666176 +n02806530 +n09988703 +n03721252 +n03221540 +n12195533 +n02682569 +n03622058 +n03943266 +n04207596 +n11721337 +n02427032 +n07910152 +n01551300 +n12861345 +n11660300 +n03786313 +n12966945 +n02046171 +n02797535 +n03546112 +n07711232 +n02044908 +n02998563 +n02652132 +n12634986 +n12187247 +n11645590 +n07582892 +n03065243 +n09911226 +n04396902 +n10763075 +n02359047 +n10400108 +n04294614 +n03991646 +n11728945 +n07766891 +n12277150 +n13141564 +n10563314 +n12426749 +n07827750 +n12403994 +n12627119 +n03420801 +n10203949 +n12830568 +n12280060 +n13180875 +n12659064 +n04239436 +n03823312 +n04367746 +n12448700 +n01896844 +n07581931 +n09384106 +n11625223 +n04198722 +n01477875 +n09932336 +n03477512 +n12281974 +n10117739 +n07759194 +n12281788 +n01405007 +n03077616 +n02304036 +n12947544 +n03140126 +n12356960 +n07807002 +n07877849 +n02956795 +n04373795 +n07925500 +n10359546 +n09730077 +n01694955 +n10611267 +n04316498 +n07849912 +n12841354 +n07903543 +n10026976 +n04050313 +n03939844 +n03260849 +n07917507 +n12228387 +n03199775 +n01569566 +n02403920 +n04261638 +n02986160 +n03724623 +n01960177 +n03783430 +n07877675 +n10401639 +n04215153 +n03077741 +n02589623 +n12934985 +n03233624 +n04506688 +n12194147 +n09975425 +n07818825 +n12641007 +n10036692 +n02771750 +n12285900 +n01472303 +n10033663 +n10707134 +n03219966 +n11772879 +n10146416 +n10435169 +n10304086 +n12385566 +n03126580 +n12904314 +n03619196 +n02299846 +n03574243 +n12368257 +n03690473 +n01748686 +n09834378 +n07750736 +n02930645 +n01679307 +n03721047 +n02710044 +n07563207 +n02930080 +n09309168 +n03127203 +n02863536 +n02536165 +n01559639 +n09654518 +n02961035 +n12007406 +n12773651 +n04351699 +n03114504 +n06273414 +n02017475 +n01733466 +n02175014 +n07920663 +n03953901 +n09670521 +n09400987 +n11791341 +n02284884 +n12919646 +n07880325 +n03801353 +n01982347 +n07828642 +n01570421 +n03998333 +n03449309 +n10482220 +n12850906 +n12805561 +n12926689 +n03232543 +n04248851 +n03195959 +n04082562 +n03846100 +n07682952 +n07695652 +n11809271 +n09895561 +n04287898 +n09740724 +n02859955 +n09830400 +n03674731 +n02825153 +n04571686 +n13107891 +n10318607 +n07848093 +n13226871 +n08555710 +n03137473 +n02776978 +n03141455 +n12514138 +n01809371 +n09405078 +n01753180 +n02184473 +n11610215 +n03539546 +n12731835 +n04485884 +n03590588 +n10221312 +n04049753 +n03441345 +n02302244 +n12262185 +n15092650 +n11877646 +n10377185 +n01684578 +n03796605 +n07897116 +n03164344 +n12135049 +n10757050 +n01692523 +n04566756 +n07697699 +n07575392 +n10262655 +n04064747 +n07914006 +n12433769 +n07873348 +n04457767 +n10019072 +n02921195 +n03856465 +n04041408 +n12639584 +n12920955 +n11781176 +n07864756 +n03941013 +n03646148 +n04401578 +n11692792 +n02757714 +n02286089 +n04253168 +n03890514 +n07855510 +n03507458 +n04123026 +n11661909 +n12435152 +n04330746 +n09481120 +n03731019 +n03717285 +n03271030 +n02772101 +n07740597 +n02847852 +n12825497 +n12263738 +n03342262 +n03603594 +n07804543 +n12932365 +n12695975 +n10297531 +n04054670 +n03175081 +n12703856 +n03832144 +n03966206 +n02414290 +n03619275 +n09738121 +n03290096 +n10585077 +n07731767 +n12409840 +n12026981 +n02278980 +n02752810 +n01654637 +n02654112 +n10314836 +n13023134 +n01823414 +n07461050 +n11902982 +n04543636 +n02204907 +n04049585 +n12304899 +n03073545 +n04272928 +n10315456 +n03975657 +n09899782 +n12288005 +n07005523 +n03795269 +n09823832 +n02242137 +n02907391 +n03643491 +n03245889 +n12285369 +n03061345 +n03797264 +n07838073 +n09219233 +n02859343 +n07608098 +n03920641 +n12578626 +n10688356 +n04542858 +n07834065 +n00443803 +n04181561 +n04570214 +n02047517 +n03295012 +n01633781 +n10610850 +n04035836 +n03001115 +n04593376 +n02393807 +n13061348 +n10123122 +n11800236 +n13207094 +n10140929 +n12167602 +n01809752 +n10421956 +n02764935 +n03424489 +n12889219 +n04046091 +n07714287 +n07708685 +n07736087 +n04142434 +n11961446 +n04521863 +n02414763 +n02901377 +n00467536 +n13085747 +n03855756 +n11846765 +n02530999 +n03063199 +n04258618 +n12204032 +n04424692 +n11758276 +n02653497 +n03766508 +n02026629 +n02572484 +n12339831 +n01635027 +n01668436 +n07821919 +n01543175 +n02689748 +n12528974 +n04024862 +n04184880 +n11720891 +n13869896 +n01678043 +n01647303 +n11532682 +n03236217 +n04963307 +n03012897 +n11682659 +n03191286 +n07643891 +n12737898 +n10680609 +n07924955 +n03879705 +n10461060 +n02523427 +n02013567 +n09893344 +n04124488 +n09863031 +n12454436 +n12305089 +n07709046 +n03805180 +n11940599 +n01691217 +n04198562 +n03978421 +n02357585 +n07818572 +n12870682 +n03798442 +n04154938 +n10550369 +n11957678 +n01958531 +n09936825 +n02334201 +n07910538 +n11978551 +n10562135 +n12700088 +n12784889 +n04480853 +n03281673 +n07588419 +n02968333 +n11935469 +n13046669 +n11730602 +n09643799 +n11849467 +n01758757 +n09638454 +n03267468 +n07914586 +n12104734 +n02961225 +n09827246 +n09917214 +n13079073 +n12634734 +n04089376 +n13034062 +n11714382 +n12753762 +n07683039 +n11840067 +n07689842 +n12173069 +n12172481 +n04182152 +n07869522 +n10356877 +n02771166 +n03154895 +n07615289 +n12986227 +n12361135 +n03456447 +n12706410 +n12895811 +n02988156 +n03130761 +n10639359 +n03628215 +n02738741 +n01643507 +n07730708 +n03232309 +n02846733 +n04969540 +n03051041 +n12890928 +n03235327 +n04289576 +n07588817 +n10325774 +n03973285 +n09703485 +n02358584 +n03061674 +n03195332 +n02901259 +n07849619 +n04486934 +n07908812 +n01588725 +n03682877 +n11949015 +n04146504 +n04146862 +n07898247 +n03318865 +n04367950 +n07880213 +n04247011 +n01447658 +n12711817 +n03146687 +n02926426 +n12856091 +n11966896 +n02413593 +n09764900 +n03009794 +n03314227 +n10499232 +n10075693 +n04451318 +n12320806 +n11933728 +n07764315 +n12133682 +n09904837 +n12832538 +n03816530 +n07802863 +n04391445 +n09728137 +n03887330 +n04436012 +n03957991 +n07771731 +n06266973 +n10407310 +n10290919 +n07862244 +n01842504 +n10262561 +n12726159 +n07691954 +n07618119 +n03437829 +n11966617 +n03629100 +n04231905 +n04208760 +n03344305 +n03684143 +n12934174 +n08645212 +n03556679 +n12109365 +n03751458 +n02380875 +n02025389 +n02770721 +n09830629 +n02800675 +n04951186 +n04483073 +n12710577 +n12789054 +n12058192 +n11777080 +n07716203 +n09618880 +n04525821 +n04016846 +n02918330 +n10375052 +n13158512 +n13090871 +n02929582 +n02308735 +n10487182 +n02213663 +n07608339 +n04384593 +n12890490 +n03992436 +n02994573 +n13231078 +n12880244 +n01651059 +n02925009 +n09686401 +n13219976 +n09981540 +n04582771 +n06267758 +n09893502 +n13214340 +n03272940 +n12554911 +n02214341 +n04137089 +n03874487 +n04573513 +n12003167 +n12004547 +n13065089 +n01903346 +n04373428 +n02216365 +n02024185 +n12577895 +n11698042 +n07586318 +n11705776 +n03030353 +n04486213 +n07885705 +n07928163 +n02356612 +n02767038 +n02897097 +n11662371 +n04128710 +n09842395 +n07683360 +n11533212 +n08495908 +n12841193 +n03669886 +n07768068 +n02381831 +n12081215 +n02757337 +n02811618 +n10144338 +n01379389 +n09698644 +n12779851 +n10400618 +n11801891 +n12322099 +n12408077 +n02767956 +n08640962 +n07816839 +n03021228 +n10346015 +n07868830 +n07917272 +n10076957 +n12865708 +n04290259 +n03595264 +n03986224 +n07825194 +n01610100 +n04417086 +n12995601 +n12734070 +n15091129 +n12428412 +n07587331 +n02405101 +n03108455 +n03594523 +n04489695 +n03892425 +n13032618 +n04409011 +n07590752 +n15092942 +n03914583 +n13066448 +n03532919 +n10639637 +n04566561 +n13223843 +n07904637 +n12347158 +n02720048 +n03901229 +n03936466 +n10574840 +n03782794 +n12397431 +n07908567 +n12580896 +n02697221 +n09791419 +n02577403 +n07870069 +n02136103 +n04318892 +n01462544 +n09747191 +n12287836 +n03067093 +n03934565 +n03543945 +n13126856 +n02240068 +n01585422 +n12413301 +n03246454 +n01876034 +n03635330 +n11680596 +n03228365 +n03082656 +n11609862 +n12859986 +n03934229 +n10233248 +n03166514 +n12166793 +n10115430 +n03327553 +n03373611 +n02967782 +n12338258 +n01604968 +n01323155 +n02590094 +n03044934 +n07866409 +n12291143 +n14900342 +n12094612 +n07845702 +n07926250 +n10750640 +n04359500 +n09797873 +n09953350 +n03561047 +n12122725 +n12725738 +n01453087 +n04977412 +n04575723 +n13219833 +n12161056 +n04273285 +n12482437 +n12863624 +n04953296 +n03390075 +n10188957 +n02874442 +n04236935 +n09990690 +n12866459 +n04075715 +n09725000 +n12794367 +n12461673 +n03050453 +n03677115 +n12427391 +n07736371 +n02973236 +n02406749 +n12322699 +n12815198 +n10680796 +n03268311 +n02405799 +n12302248 +n09791014 +n01545574 +n07740033 +n07862095 +n09901337 +n04390577 +n03597916 +n12110085 +n11802586 +n04205505 +n07696527 +n12076852 +n04344003 +n03326660 +n02823586 +n03042139 +n01565345 +n07905296 +n01454545 +n07650903 +n07905386 +n12530629 +n02841187 +n02943964 +n03329536 +n09681234 +n03479121 +n03770085 +n04147793 +n11552133 +n03774327 +n13197507 +n07901355 +n10400437 +n07837912 +n02310941 +n07845087 +n02239774 +n04976319 +n03960490 +n05239437 +n06275471 +n01633406 +n04257223 +n12009420 +n10483138 +n02775897 +n07866151 +n07922512 +n02666624 +n03944024 +n03842377 +n01832493 +n07855907 +n03968728 +n04492060 +n07879072 +n11635830 +n11802800 +n02357911 +n02431628 +n03730494 +n13099999 +n07768230 +n13147270 +n12331655 +n10237676 +n11855553 +n09759501 +n10620586 +n13181055 +n12309277 +n13183489 +n04382695 +n07679034 +n10495756 +n02173113 +n12764202 +n03683457 +n10298912 +n07680313 +n10160280 +n02205673 +n12053690 +n11653904 +n02931294 +n04093775 +n12856479 +n02427470 +n07608866 +n09954639 +n11639445 +n03364599 +n09924106 +n09683924 +n10419472 +n03089753 +n12620969 +n07604956 +n12940609 +n12564083 +n03514894 +n10343355 +n13068255 +n03805280 +n12793284 +n03140652 +n02666501 +n11717577 +n04267435 +n04593185 +n12820853 +n03934311 +n02630615 +n07767002 +n07723968 +n01631354 +n07931452 +n12414818 +n03097673 +n09944430 +n04457474 +n11850521 +n12227658 +n10131815 +n12408717 +n03566730 +n12777680 +n06273555 +n04357531 +n03759243 +n09861599 +n03015851 +n04175039 +n03392741 +n07859796 +n07741138 +n04474187 +n02266864 +n04553561 +n02667244 +n12720200 +n12432356 +n07806120 +n10362557 +n11929743 +n07765862 +n02963987 +n02762371 +n02747672 +n04289195 +n04056413 +n03039493 +n03894677 +n12338655 +n04422409 +n12079120 +n10252222 +n10168837 +n12919847 +n10297841 +n01340014 +n11710827 +n10167838 +n12278107 +n01384164 +n10498986 +n02742468 +n02899439 +n11752937 +n12107710 +n12315598 +n03985441 +n07605804 +n07686202 +n12884100 +n13121349 +n11725311 +n10420507 +n11706761 +n01381044 +n03331599 +n12336333 +n10185483 +n07880880 +n01782516 +n12615232 +n03175457 +n12657082 +n01750437 +n07918879 +n13213066 +n12927494 +n02910542 +n06273986 +n02161338 +n10235024 +n12180168 +n03659950 +n02160947 +n11861853 +n09866817 +n09279986 +n12393269 +n01552034 +n05526957 +n02956883 +n12818966 +n09753792 +n03114236 +n12273344 +n12546617 +n13177048 +n02129991 +n01731941 +n01628770 +n12774641 +n07685546 +n03253279 +n10678937 +n12579038 +n08673039 +n01392275 +n02379081 +n10530150 +n12851469 +n12414449 +n11694664 +n11877283 +n09708889 +n03585438 +n00483605 +n12332555 +n03323096 +n07851767 +n02417663 +n10667863 +n02856237 +n09269341 +n01596608 +n09720033 +n13160604 +n04443164 +n02814428 +n11622771 +n10328123 +n04338963 +n01794651 +n12069217 +n07762740 +n02935387 +n11897116 +n10569179 +n12749852 +n10745006 +n07823280 +n12162425 +n09801533 +n03772269 +n04518643 +n07916319 +n12771597 +n02147173 +n10342992 +n03795123 +n11646344 +n12847927 +n07686021 +n12383894 +n04465050 +n14564779 +n04212467 +n12274863 +n02380052 +n04329958 +n12034384 +n04213353 +n04366033 +n04955160 +n02778294 +n12890685 +n03028785 +n03097535 +n04533594 +n01750167 +n01415626 +n12276477 +n07729926 +n07711371 +n12843970 +n10500419 +n12891093 +n03840823 +n12509665 +n11878101 +n04315342 +n07685031 +n12305819 +n10039271 +n12264512 +n03911866 +n13919547 +n12413419 +n03785721 +n02599347 +n03786194 +n04018155 +n12856287 +n09607903 +n02396088 +n10212501 +n10313000 +n07683617 +n03586219 +n03890233 +n03156767 +n12033709 +n01648139 +n04399846 +n10671736 +n07698672 +n10791115 +n07708124 +n02709908 +n04266968 +n01758141 +n10058962 +n09444783 +n03668067 +n02838345 +n02388143 +n12893993 +n12590499 +n01462042 +n02689434 +n13209808 +n04075291 +n02412629 +n01953594 +n03906463 +n03043423 +n02200509 +n10152763 +n12504570 +n04396808 +n03382413 +n03618101 +n02767147 +n02390101 +n03450974 +n12778398 +n03625539 +n02574271 +n04113316 +n07572616 +n11809437 +n04119230 +n03829954 +n10500603 +n04258732 +n02731900 +n10174330 +n01574801 +n08663703 +n12558230 +n03981760 +n07732904 +n11875523 +n11823436 +n03238286 +n03079494 +n04281260 +n07873057 +n11686912 +n10568608 +n07593004 +n04271531 +n10037922 +n07838551 +n03615300 +n12624568 +n12940226 +n05242928 +n03680734 +n01589893 +n11652376 +n11893640 +n04119091 +n09696763 +n07851554 +n02660640 +n12124818 +n10370955 +n02663211 +n02414209 +n13187367 +n03258577 +n04375241 +n07617932 +n12240477 +n03417202 +n07595649 +n03839424 +n03087245 +n02431441 +n04396335 +n03484809 +n03426285 +n03592931 +n02912319 +n03488887 +n12187891 +n07592400 +n12918609 +n07858114 +n07567980 +n01548694 +n02726210 +n02406859 +n10147262 +n05458576 +n02848921 +n03503233 +n02587618 +n03465151 +n03582508 +n11654293 +n03695452 +n02197185 +n04223170 +n10243273 +n03149135 +n02842809 +n03669534 +n03857291 +n02147328 +n12278865 +n12733428 +n03264906 +n09924195 +n10432189 +n12203896 +n03892728 +n12360958 +n10418735 +n01650901 +n12420722 +n03341606 +n02557909 +n07751858 +n03483971 +n12019035 +n03991202 +n02072040 +n03129848 +n04505345 +n02405440 +n03901974 +n11656123 +n11552976 +n10291822 +n10108018 +n09902731 +n03325691 +n12646072 +n04134170 +n12097396 +n07564008 +n01624305 +n03421117 +n02776007 +n10792856 +n07818133 +n03227184 +n10198437 +n04157099 +n12743009 +n07820960 +n12749456 +n13035925 +n05262698 +n03422771 +n02878628 +n12140903 +n07820297 +n03524745 +n09901921 +n03170872 +n10039946 +n12638964 +n11989087 +n03461988 +n04287451 +n04298053 +n07882420 +n04002262 +n02734835 +n11707827 +n07756641 +n12808007 +n10069981 +n12637123 +n12947895 +n04363082 +n04292080 +n11858077 +n04535252 +n12646397 +n12283147 +n12321077 +n02746595 +n02895328 +n07624924 +n12537253 +n11952541 +n02181477 +n01440160 +n03878828 +n12861541 +n02869563 +n04242084 +n03197201 +n09396608 +n04291992 +n07845863 +n04314522 +n12843557 +n04029647 +n12146654 +n13147386 +n12954799 +n11920133 +n03038480 +n03213715 +n02971473 +n04149374 +n04230387 +n00444340 +n11859275 +n07564796 +n02948403 +n10186068 +n04315713 +n02366002 +n02670935 +n13208302 +n10225931 +n07826340 +n04102872 +n02259708 +n11855842 +n09941089 +n08896327 +n10237464 +n12084158 +n03764995 +n03627954 +n12384375 +n10341343 +n07876189 +n04573379 +n07904293 +n07840520 +n12038038 +n03005147 +n10483799 +n02978367 +n01484285 +n13094273 +n04539053 +n01748389 +n10146816 +n07815839 +n12991837 +n03294604 +n03588841 +n04055180 +n03209477 +n09917345 +n04393913 +n12337391 +n12126084 +n01882125 +n07688130 +n02814116 +n09640715 +n12679593 +n12596345 +n03029925 +n11761650 +n04457157 +n12683096 +n07709881 +n03841290 +n13157684 +n07927836 +n03523134 +n03690279 +n10187491 +n12451070 +n02682311 +n03978815 +n11806679 +n07808022 +n01386354 +n03622526 +n02369293 +n11885856 +n02289610 +n12663359 +n02624987 +n13173488 +n03027001 +n07896765 +n11935330 +n07814790 +n04242704 +n09959142 +n07589543 +n03551582 +n07843117 +n03556992 +n02060569 +n04000998 +n03825271 +n11946918 +n02874750 +n03479502 +n09919451 +n02176747 +n02080713 +n03400972 +n10222170 +n07926785 +n07852302 +n03012373 +n10438842 +n12868019 +n03634034 +n04210591 +n07853560 +n12374862 +n09248399 +n04355115 +n12908093 +n12906498 +n12875269 +n02791665 +n03146777 +n02854378 +n12414159 +n07821610 +n07595180 +n12238913 +n12141385 +n10761190 +n12165758 +n01653223 +n12956367 +n03695753 +n12416703 +n12346813 +n03405111 +n04304215 +n01624212 +n12674895 +n09850760 +n12407715 +n04156040 +n11610437 +n03395256 +n09970822 +n04229959 +n02530831 +n07870894 +n12098524 +n12828379 +n04057215 +n10751152 +n10053439 +n03674270 +n07869291 +n12256920 +n02535163 +n04282231 +n02136452 +n02365108 +n10328328 +n02315487 +n03325403 +n09231117 +n03342657 +n09980985 +n10702167 +n11961871 +n02065263 +n12857779 +n03219612 +n07805966 +n10699981 +n07691863 +n12831932 +n04179126 +n10208189 +n09765118 +n07922147 +n01631512 +n01947997 +n01405616 +n01892030 +n07827896 +n12964920 +n07749870 +n03276696 +n10020670 +n11828577 +n07624666 +n10590146 +n02407521 +n10253703 +n03270854 +n11610047 +n12981443 +n12413642 +n12302565 +n03177059 +n04594114 +n10227985 +n07728391 +n10395073 +n02810270 +n03569293 +n07812046 +n03843316 +n12477401 +n03802643 +n07618029 +n10755648 +n12837803 +n12454556 +n01636127 +n02809241 +n03270165 +n12035631 +n02962414 +n09750641 +n01793085 +n04346003 +n07922041 +n04164002 +n12499979 +n03301291 +n07921834 +n09656077 +n07599161 +n13155611 +n10194231 +n10063635 +n03601442 +n10366276 +n00475661 +n03943714 +n10377291 +n02624551 +n02568447 +n07589458 +n09691858 +n02685995 +n11919975 +n01690466 +n13211020 +n04114069 +n10530383 +n04200908 +n12631932 +n07916437 +n03219859 +n07918309 +n10368291 +n10253479 +n03317889 +n13206178 +n02821415 +n10592811 +n12557064 +n12872458 +n10212231 +n07926346 +n09695514 +n09741816 +n03964611 +n07812913 +n09703708 +n02587479 +n10593521 +n03485309 +n03776877 +n12289433 +n07716504 +n10580030 +n03061893 +n03206158 +n09710041 +n04266849 +n07864065 +n12767648 +n02333190 +n12295429 +n02406432 +n01799679 +n07861983 +n02201626 +n03441582 +n03653975 +n02834506 +n12263204 +n10672662 +n03072682 +n03410423 +n11620389 +n04542095 +n07910970 +n03697913 +n02706806 +n09736798 +n12318965 +n07938594 +n12032429 +n03191776 +n04210288 +n01422335 +n03236093 +n11881189 +n02247216 +n12338146 +n03104512 +n00474881 +n04172230 +n01461315 +n04400109 +n10646140 +n02215621 +n10096126 +n03019806 +n11809754 +n02492948 +n10741367 +n10308504 +n07875560 +n02523110 +n07738224 +n02015797 +n10499631 +n03025165 +n03284308 +n03508881 +n10441037 +n10757492 +n07608721 +n09755241 +n04264361 +n04394421 +n03776997 +n03175843 +n04476526 +n02523877 +n13196369 +n10190122 +n03172738 +n02709763 +n02070624 +n04563560 +n04017807 +n03824589 +n07817758 +n03222722 +n01542433 +n13173259 +n04458201 +n12869668 +n12580786 +n02407763 +n09760913 +n10530571 +n11752798 +n09612700 +n07601175 +n11632376 +n10641223 +n03158668 +n03411208 +n01413457 +n03684740 +n10248008 +n12656528 +n11849271 +n07771891 +n12067433 +n12389727 +n11734698 +n04042204 +n07825399 +n12621945 +n07624757 +n03180732 +n09741331 +n10246317 +n04030414 +n07821107 +n04524716 +n03789603 +n12867449 +n10249869 +n02434415 +n07614103 +n03333349 +n04602840 +n09923996 +n02658811 +n13033879 +n03663433 +n02873623 +n07837545 +n12436907 +n02675077 +n01500854 +n04435552 +n01790304 +n11687789 +n03443543 +n09733459 +n01606177 +n12245885 +n11721642 +n02201497 +n12010815 +n04594742 +n02755984 +n07927716 +n04245218 +n03134118 +n13214485 +n12294542 +n12713521 +n03556173 +n12650038 +n07719058 +n04319774 +n10443830 +n10019187 +n09720702 +n07926442 +n10402709 +n03989777 +n11699751 +n09613118 +n02965122 +n04221076 +n01861330 +n12837052 +n02975589 +n09668437 +n03012499 +n01418498 +n12451566 +n03585778 +n07692517 +n09672590 +n09741999 +n09748648 +n07621264 +n03482001 +n10185148 +n01542168 +n12536291 +n07846557 +n11840476 +n03130866 +n02631775 +n11730015 +n03715275 +n07680168 +n12175370 +n05427346 +n03665232 +n08611421 +n11730458 +n02413484 +n09783884 +n07888378 +n04611351 +n02247655 +n02136794 +n11649359 +n01382033 +n07889193 +n10405540 +n03510384 +n04420720 +n03585875 +n03812789 +n01835769 +n12139921 +n09762011 +n10103228 +n03477410 +n11930788 +n10064831 +n12311045 +n07681805 +n03136504 +n12887713 +n03886940 +n03130233 +n10197392 +n12333961 +n07672914 +n12723062 +n12599661 +n04268799 +n03696909 +n12809868 +n12452256 +n10710778 +n02571652 +n12117326 +n02450677 +n03041265 +n12544240 +n01966377 +n10252354 +n02378625 +n09814488 +n10569011 +n13067330 +n07928998 +n07890970 +n02187279 +n02592371 +n07846802 +n03475961 +n05448704 +n10410996 +n02851795 +n10093167 +n12468719 +n09876701 +n03057724 +n03469031 +n02344270 +n04248209 +n02687682 +n04467899 +n12897788 +n03436656 +n12539832 +n09906704 +n03190458 +n11843441 +n12130549 +n11823756 +n03153246 +n03684489 +n04160036 +n02908951 +n12855365 +n03518230 +n12225222 +n12933274 +n10432957 +n02921406 +n10156831 +n12239647 +n02826812 +n03411927 +n11602091 +n13200986 +n04244847 +n01330126 +n14938389 +n03001540 +n04387531 +n03423099 +n07608533 +n11723986 +n07600394 +n12529500 +n02403820 +n02587300 +n10333317 +n07935288 +n12680652 +n01449980 +n12153914 +n07803310 +n11741797 +n01881857 +n13081999 +n08644045 +n02061217 +n02173784 +n02660519 +n03104019 +n13137951 +n04538403 +n02621258 +n04515729 +n04165945 +n11919761 +n13078021 +n07861247 +n11959259 +n11801665 +n04070545 +n13210597 +n10218043 +n10717337 +n01365885 +n10718952 +n11979187 +n03880032 +n03798610 +n03477303 +n01876667 +n11860208 +n03401721 +n03360133 +n13230843 +n13194758 +n13190060 +n02564935 +n13894154 +n12754311 +n07697408 +n13171210 +n02035402 +n03736147 +n10396337 +n04554998 +n02793930 +n04126852 +n03654826 +n09411295 +n06255613 +n01680983 +n10261862 +n01581874 +n10378780 +n10646641 +n03539103 +n03351151 +n04349913 +n03906106 +n02370525 +n03319576 +n04113968 +n09693244 +n02945964 +n03344509 +n04117216 +n03889626 +n03557840 +n09800469 +n04280487 +n07890890 +n12147835 +n12295237 +n03883664 +n04436992 +n02922877 +n10099002 +n01988203 +n10056719 +n11646517 +n03672521 +n04568713 +n10111358 +n03606347 +n04047733 +n12320627 +n10251612 +n10460033 +n01742447 +n11917835 +n10443032 +n13079567 +n04363671 +n10788852 +n10482587 +n03308614 +n12741586 +n12938667 +n04539407 +n01630148 +n02303777 +n13050940 +n04552551 +n02341288 +n04098169 +n04110439 +n11625391 +n12259316 +n02822762 +n10631131 +n04089152 +n03571439 +n04558199 +n12656909 +n03170292 +n02877642 +n12771890 +n03033267 +n12658603 +n13354021 +n12855886 +n11840246 +n03619050 +n07727252 +n12932706 +n13874073 +n01315805 +n02948942 +n12048928 +n03146449 +n10656969 +n09872557 +n03906590 +n04454792 +n12500309 +n04239333 +n01815036 +n09644657 +n10497645 +n02918455 +n07812662 +n04240434 +n10804636 +n11967878 +n04184095 +n11834272 +n05244755 +n02299039 +n12665659 +n12144987 +n07607492 +n11887750 +n13083461 +n04577139 +n09670909 +n07876893 +n02875948 +n04069582 +n10458111 +n10361194 +n09389867 +n01651778 +n11933387 +n13193143 +n12834190 +n03516266 +n02184589 +n10041373 +n02809605 +n04064213 +n04957589 +n12643113 +n02582721 +n07911061 +n07921360 +n10369417 +n10527147 +n04104925 +n03707372 +n01386182 +n10374849 +n09902851 +n08559155 +n02332447 +n11649150 +n11722036 +n01823740 +n04592356 +n10002257 +n10661732 +n07562379 +n07597263 +n04036776 +n13112201 +n09842288 +n07738105 +n04545984 +n09635973 +n02885233 +n02756854 +n07808479 +n03029296 +n01543383 +n02884450 +n09843716 +n04224395 +n10576676 +n10140051 +n07919894 +n07806879 +n10212780 +n09478210 +n12017127 +n03770224 +n07606191 +n03555217 +n09715165 +n12270460 +n12129738 +n11739365 +n02303585 +n07818029 +n05314075 +n03019304 +n09859975 +n09454744 +n13151082 +n12586989 +n00455076 +n07741357 +n04957356 +n08659242 +n04577293 +n04126244 +n03131193 +n12428242 +n03569494 +n03781594 +n07743384 +n02892392 +n12576695 +n12199982 +n07693439 +n07719756 +n11884384 +n03043798 +n12351091 +n03690168 +n02214499 +n01839949 +n01831360 +n12642964 +n02957862 +n03125588 +n12883628 +n04002371 +n10747965 +n09744462 +n02853745 +n13030337 +n12156679 +n02761034 +n12587487 +n03374570 +n12728322 +n01731764 +n07918706 +n03696445 +n03185868 +n02805283 +n03868763 +n02202124 +n12369665 +n12449934 +n12650229 +n02656301 +n07743723 +n11702713 +n02927053 +n03916385 +n01486010 +n03986071 +n04188064 +n13897528 +n12414329 +n07718068 +n07837755 +n11735570 +n10464542 +n04091466 +n01315581 +n10374943 +n03989898 +n13220525 +n04076052 +n04062179 +n02414442 +n04414101 +n04446162 +n00480885 +n03536568 +n03773835 +n10728998 +n12643877 +n02255391 +n03799610 +n07847585 +n00446411 +n11910666 +n03139998 +n02296276 +n02889996 +n02786611 +n10363445 +n07854348 +n08583682 +n09912681 +n07896422 +n02368821 +n11935953 +n12185254 +n11738547 +n03809211 +n02448318 +n13066979 +n01987076 +n12009047 +n12839574 +n13174823 +n07902520 +n03369866 +n13209129 +n02593191 +n03853291 +n02620578 +n10071332 +n01813658 +n09895480 +n10134760 +n01316734 +n07845166 +n03175983 +n13132156 +n12814960 +n12883265 +n03637787 +n04310507 +n04133114 +n03900194 +n04129688 +n04449550 +n01805321 +n01717467 +n01573627 +n12271451 +n11722621 +n09976917 +n12232280 +n12905135 +n03451253 +n01655344 +n12346986 +n11987511 +n10517283 +n02941845 +n12730370 +n03121190 +n07917874 +n10023656 +n10151133 +n07695187 +n03258456 +n10639238 +n10682713 +n02085019 +n12343753 +n10749928 +n04595611 +n04410565 +n08500819 +n07719980 +n04016479 +n03232417 +n03469832 +n09834885 +n07925327 +n10094782 +n03632100 +n12734215 +n09845849 +n04047139 +n10743124 +n02604954 +n12270278 +n03036244 +n11991777 +n10168012 +n02561803 +n10531109 +n10344319 +n03804211 +n10513938 +n10732967 +n09917481 +n02950482 +n03148808 +n07910245 +n07925423 +n07889990 +n04302988 +n07745357 +n04346511 +n07573563 +n02564403 +n12084400 +n10030277 +n09815455 +n04388473 +n12404729 +n10576316 +n12072210 +n11811059 +n01824344 +n03556811 +n03175301 +n07586485 +n13137010 +n11986729 +n04967561 +n03881404 +n07692114 +n07874995 +n02770585 +n07853345 +n02775689 +n04328580 +n01323781 +n07773428 +n02414043 +n02794474 +n02352932 +n07569873 +n12374705 +n03606106 +n04267246 +n04369485 +n11934239 +n12705698 +n11841247 +n07868045 +n03525693 +n12358293 +n02937010 +n09658398 +n12711182 +n03516647 +n04591631 +n10228712 +n11930353 +n03471779 +n12594324 +n02251593 +n04455579 +n02542017 +n03381450 +n03320845 +n12364940 +n09657748 +n12412987 +n01840412 +n10570704 +n10117267 +n03251280 +n10195261 +n12178129 +n12285049 +n02177775 +n10117415 +n03707766 +n04475309 +n05604434 +n03999064 +n12127575 +n01972131 +n09793946 +n01635176 +n02791532 +n07564101 +n07876460 +n02813981 +n10764719 +n03638743 +n12761702 +n02125689 +n11657585 +n09923003 +n13069773 +n02683183 +n04324515 +n11936946 +n12862828 +n02659808 +n02619861 +n13175682 +n11648039 +n07768139 +n12512674 +n12108613 +n02947977 +n12899971 +n03845107 +n07689490 +n02081927 +n07619508 +n10248377 +n10300041 +n10761326 +n09655213 +n02675522 +n04963111 +n01995686 +n03256631 +n10684630 +n04471912 +n12728864 +n03870546 +n02829246 +n09725546 +n03409920 +n13194918 +n10055297 +n02513248 +n01462803 +n11782266 +n13094145 +n07839478 +n13916363 +n07932454 +n09722817 +n07774479 +n10386874 +n12832822 +n01599388 +n02964295 +n04349189 +n07689313 +n11653126 +n02309841 +n02064000 +n04410663 +n04562122 +n02358712 +n09901786 +n10441124 +n12882158 +n12815668 +n10159289 +n01641930 +n03315990 +n12271187 +n10277638 +n07815163 +n12903014 +n07915366 +n04412300 +n01324799 +n03408264 +n09452291 +n03019198 +n11890884 +n10355806 +n03186199 +n04013600 +n12541157 +n06259898 +n06273294 +n11946051 +n01671705 +n04415257 +n01905321 +n04050600 +n12604460 +n04051439 +n02929184 +n11765568 +n10025060 +n02396796 +n04033287 +n13027557 +n03127531 +n10308066 +n09729062 +n01593553 +n02476567 +n07609728 +n12970293 +n01419888 +n03215749 +n01684741 +n13067672 +n03870290 +n07846359 +n12961536 +n03356559 +n07727140 +n09843602 +n02378755 +n12044041 +n01977485 +n07718920 +n12060546 +n04265428 +n12237855 +n04006067 +n10227266 +n04361937 +n12134486 +n10097842 +n02264591 +n03912821 +n07594155 +n03116163 +n11771924 +n04155457 +n12394118 +n10507380 +n01844746 +n11901452 +n03024233 +n03383562 +n11806814 +n10062716 +n04204755 +n08613733 +n12907671 +n03533654 +n09826605 +n03109033 +n07606419 +n03537085 +n11615812 +n07695504 +n11694300 +n04520962 +n09971839 +n02664285 +n03402511 +n02061560 +n13133140 +n03548195 +n12877493 +n02425086 +n12845187 +n12488454 +n02975994 +n02071028 +n01457407 +n03685486 +n07605282 +n07771405 +n07827554 +n10538733 +n03438780 +n04379096 +n12686496 +n10001764 +n11848867 +n12125001 +n09886540 +n03275566 +n01442710 +n12789554 +n07858197 +n12722071 +n12868880 +n10441694 +n12409651 +n07727741 +n12289585 +n04069166 +n12686877 +n03723439 +n07815956 +n12543455 +n10778044 +n02200630 +n10074841 +n12640284 +n12589841 +n07592317 +n07866571 +n12712626 +n04228422 +n11711289 +n03590475 +n13081229 +n03045800 +n03639230 +n02874214 +n07615954 +n03204134 +n12053962 +n12769219 +n15006012 +n09873769 +n11818636 +n01959029 +n03349599 +n12227909 +n07576969 +n03638180 +n07742224 +n03390673 +n02344175 +n03770520 +n00447361 +n13235319 +n01983674 +n10061882 +n04267165 +n12493868 +n12713358 +n02930339 +n10493419 +n12918810 +n02582220 +n12248359 +n02644501 +n04596492 +n04538249 +n07905618 +n13230190 +n07808268 +n15005577 +n09351905 +n12730544 +n11937023 +n04024137 +n02238358 +n11646955 +n11618079 +n09849990 +n04060448 +n04220805 +n12725940 +n12004120 +n01484562 +n02669442 +n12132956 +n01756916 +n03980986 +n02256172 +n07716750 +n12119390 +n04047834 +n11934041 +n12828977 +n03648219 +n11873612 +n12909614 +n04397860 +n03908111 +n03261395 +n03695616 +n11668117 +n12014355 +n02896074 +n03988758 +n04426184 +n10328696 +n02477028 +n04507326 +n04320871 +n03256472 +n01919385 +n03988926 +n13182164 +n07826250 +n03207548 +n01396617 +n04369618 +n07913774 +n13229951 +n03410022 +n12728508 +n01997119 +n03598783 +n01341090 +n03879456 +n01736796 +n02864122 +n13879816 +n02684962 +n12246037 +n02433729 +n04364397 +n09881358 +n02950120 +n03326371 +n02243878 +n01790812 +n12990597 +n03330947 +n07764486 +n03332173 +n10006177 +n03347472 +n07619301 +n10106509 +n12365285 +n01732989 +n07678586 +n04098795 +n07733847 +n03994297 +n12872914 +n02762909 +n07766530 +n13198482 +n02395855 +n12273515 +n04487894 +n07847047 +n12488709 +n02859557 +n04255768 +n02360933 +n03267696 +n03152951 +n10188715 +n10520544 +n13065514 +n02900594 +n03699754 +n01319187 +n01949499 +n10417424 +n01603000 +n12062105 +n09683180 +n09863339 +n01880716 +n10702615 +n03893935 +n10495555 +n04131499 +n02957252 +n02113892 +n07724078 +n12246941 +n04303095 +n01751215 +n04213530 +n12117695 +n12418507 +n01922948 +n12131405 +n13188767 +n01481498 +n03174079 +n02407172 +n11613867 +n10152616 +n10119609 +n04158250 +n11695085 +n07855105 +n02854630 +n03768683 +n12739966 +n12266984 +n12819141 +n12732605 +n13205249 +n11917407 +n01607429 +n02694279 +n07815294 +n06614901 +n07846471 +n12119717 +n02595339 +n12366186 +n10693235 +n12263410 +n12484244 +n10337488 +n04146976 +n01469723 +n07872748 +n03238879 +n12000191 +n07846938 +n03116008 +n12139196 +n04013176 +n10317963 +n12140511 +n02065726 +n01649556 +n10316862 +n01755952 +n04385079 +n12770529 +n02814338 +n01675352 +n11874423 +n01369484 +n10537708 +n07618281 +n07821404 +n02297819 +n03238762 +n03357081 +n02628600 +n07830986 +n12507823 +n04431925 +n11955532 +n03429771 +n10281896 +n12383737 +n12760875 +n09673091 +n12892013 +n06625062 +n04503269 +n03674842 +n12338979 +n04268275 +n12033139 +n11767877 +n07812790 +n12676134 +n04037873 +n10097477 +n12310638 +n12258101 +n09391386 +n13196738 +n13866626 +n12720354 +n10106995 +n07843220 +n03878294 +n04101375 +n07733217 +n10220080 +n04601938 +n10778148 +n12973937 +n10556825 +n12256708 +n12583855 +n04259202 +n07628181 +n04226962 +n02777402 +n09674412 +n12188635 +n03776167 +n04504038 +n04156591 +n02270945 +n02264021 +n07826653 +n02980203 +n02059852 +n02102806 +n12921660 +n04477725 +n10107173 +n12837466 +n02697022 +n04350688 +n12110236 +n02177196 +n07899976 +n12639910 +n02368399 +n10009162 +n03950647 +n09248153 +n02425532 +n04044955 +n11933257 +n03460899 +n10147710 +n02379743 +n02413917 +n02890804 +n12915140 +n02146879 +n07915800 +n01787006 +n03646809 +n11677902 +n04065909 +n02088992 +n02887832 +n10115946 +n02306825 +n03719560 +n10456696 +n03758220 +n12625003 +n04021503 +n07563366 +n02531625 +n10304650 +n12855710 +n09735654 +n07853762 +n03512030 +n12898342 +n02297938 +n12618727 +n04082344 +n12953712 +n12617559 +n03035715 +n02532451 +n05399356 +n03602686 +n10082423 +n04607759 +n07581607 +n07594737 +n04030965 +n03464628 +n12103894 +n03039353 +n03522990 +n02964934 +n03169063 +n10153865 +n09653144 +n09941571 +n12907057 +n07768318 +n02600798 +n02187150 +n01811243 +n12252383 +n04495555 +n07678953 +n13181244 +n13069224 +n13184394 +n12765402 +n03471347 +n10208847 +n03697366 +n09840435 +n02506947 +n09709673 +n07928578 +n11935715 +n07848936 +n02757927 +n01999767 +n02245443 +n10260473 +n13898645 +n02701260 +n07840219 +n11785875 +n12385830 +n12017664 +n12145148 +n04530456 +n01929186 +n02384741 +n04113038 +n03296217 +n09723819 +n03766697 +n12143215 +n09929202 +n02684248 +n12119539 +n03566555 +n12941220 +n04124573 +n10750188 +n07733005 +n04230707 +n03829857 +n07756838 +n12244458 +n12543826 +n03514129 +n02762169 +n04435870 +n03342863 +n09745324 +n12369476 +n11652039 +n03915320 +n07746749 +n07608641 +n12642600 +n02389943 +n12137791 +n04111962 +n12493426 +n12454793 +n01455317 +n10728117 +n03281524 +n12195734 +n12353431 +n02477329 +n02678010 +n04557522 +n10162354 +n14942411 +n07806043 +n12274151 +n09835153 +n03983499 +n04086663 +n07851926 +n07868684 +n11926976 +n03972146 +n04310604 +n09675799 +n13880704 +n13173132 +n07577918 +n10720964 +n11937102 +n03349020 +n12340581 +n03725506 +n03477143 +n10578162 +n01731137 +n03382104 +n11616852 +n01493829 +n09327077 +n03856335 +n03321843 +n02375757 +n02118643 +n08500989 +n03496486 +n04140777 +n12858987 +n02845293 +n04093157 +n07819682 +n10394786 +n12289310 +n02901620 +n01559160 +n07919165 +n12648196 +n11774972 +n11995396 +n10543937 +n10154013 +n03977158 +n01884476 +n12266528 +n11906127 +n12661538 +n04396650 +n12761905 +n04175574 +n10181878 +n12017326 +n12876899 +n09744346 +n07741706 +n04451636 +n07735981 +n03751590 +n03140546 +n03070396 +n03091223 +n12071477 +n07562017 +n09981092 +n09847344 +n12552893 +n12371202 +n02245111 +n01598271 +n04400499 +n02298095 +n15048888 +n02967170 +n04030161 +n10676434 +n01556514 +n13235766 +n02538562 +n12603672 +n03941586 +n02449183 +n07567611 +n12923257 +n02296021 +n11730933 +n12497669 +n02917742 +n07875926 +n02714535 +n13142182 +n02878107 +n07861334 +n02682811 +n03730655 +n03681813 +n12970733 +n02132320 +n12436090 +n07931280 +n04295353 +n12982590 +n01783017 +n13164501 +n02424589 +n01499732 +n12650805 +n04543509 +n10369699 +n03439631 +n13160116 +n07831663 +n05449196 +n13025854 +n10169241 +n02847461 +n10734963 +n13213397 +n03343234 +n12275317 +n02793414 +n04300509 +n01803893 +n11617878 +n02179192 +n03637480 +n04514648 +n03087521 +n10478827 +n11757190 +n12919195 +n04532504 +n01736375 +n04015786 +n04545471 +n12668131 +n04472961 +n14786943 +n07584938 +n02498743 +n07744559 +n10010062 +n10101308 +n07832099 +n02601767 +n10473453 +n02451575 +n02496052 +n03696746 +n12669803 +n07904072 +n04290762 +n11737125 +n07760755 +n12553742 +n12068138 +n12630999 +n02390938 +n02202678 +n02216740 +n02679961 +n13173697 +n11828973 +n02287987 +n04585318 +n10360366 +n07745661 +n03474352 +n07934800 +n12677612 +n03692272 +n13092240 +n04230487 +n11846312 +n12433952 +n11793403 +n03056873 +n05454833 +n12517077 +n12682882 +n02649218 +n09425344 +n07878283 +n02795978 +n10064977 +n12754174 +n02945813 +n01750743 +n03150661 +n13880415 +n12337800 +n04017571 +n09754907 +n04456734 +n02967540 +n10621400 +n11744471 +n01971620 +n04148285 +n10781817 +n11991549 +n12305654 +n03943833 +n10330931 +n12918991 +n01783706 +n11933099 +n12931231 +n07589967 +n09666349 +n07853445 +n12714949 +n03548533 +n04158672 +n03809802 +n03080309 +n12800049 +n02578454 +n02834027 +n10067600 +n03044671 +n04198233 +n07930205 +n04357930 +n12221522 +n11957317 +n03085781 +n03723885 +n03614383 +n02661618 +n04292221 +n03426574 +n03838024 +n10442093 +n12399534 +n01450950 +n07876550 +n11937446 +n09870096 +n02631628 +n05460759 +n01710177 +n03660562 +n04283784 +n01497738 +n02232223 +n04209811 +n12837259 +n02864987 +n04499810 +n12654857 +n03493792 +n09688233 +n02312912 +n10057271 +n07606058 +n03258192 +n10507565 +n11930038 +n08679269 +n03812263 +n11662128 +n04085574 +n07643577 +n03981094 +n02796412 +n02513939 +n07686634 +n07936979 +n03168774 +n03816394 +n07625324 +n04138131 +n10383094 +n10222716 +n10381981 +n12254168 +n13223090 +n03056583 +n09910556 +n03277004 +n12649866 +n02089725 +n03688707 +n09665367 +n07849506 +n02843909 +n13141797 +n02477516 +n09710886 +n03835941 +n11734493 +n10778711 +n10007809 +n02038141 +n12766043 +n02353172 +n02030224 +n10762212 +n06274921 +n13033396 +n03560860 +n01961234 +n13868515 +n03216199 +n01553527 +n04429038 +n10211036 +n02150885 +n02435517 +n02755675 +n09699020 +n12566331 +n03909516 +n02903727 +n02594942 +n04173172 +n04125692 +n12251001 +n02412787 +n01649412 +n01411450 +n01774097 +n09912907 +n03162556 +n07566231 +n12267534 +n03928589 +n04142327 +n11771147 +n07832592 +n04155177 +n07937621 +n07839864 +n03201895 +n13095013 +n10298271 +n03059103 +n03784793 +n11925450 +n03288742 +n02809364 +n04108999 +n04449449 +n03726233 +n07854455 +n03692136 +n12018447 +n03374282 +n06008896 +n07598928 +n03577312 +n04604806 +n09892513 +n04370600 +n08238463 +n01793159 +n07822687 +n03242390 +n07685303 +n03822361 +n01996280 +n10505942 +n06596845 +n04219580 +n12056990 +n10579062 +n10240082 +n10298202 +n07711907 +n03905730 +n12222900 +n07598622 +n04415815 +n12389932 +n12154114 +n04210012 +n12500751 +n03729402 +n12122918 +n04572121 +n12804352 +n02415130 +n12780325 +n11639084 +n12768933 +n02253494 +n13217005 +n03567788 +n12304286 +n10703480 +n07766723 +n05455113 +n07741804 +n12186839 +n01687128 +n01350701 +n03260206 +n07876026 +n12528382 +n04125541 +n10457444 +n01606097 +n11717399 +n04598416 +n12899166 +n09748101 +n12160125 +n07608980 +n07843348 +n02409038 +n02571167 +n09980805 +n09706029 +n02495242 +n12765846 +n10373525 +n12321873 +n03047171 +n12365462 +n03752398 +n02662993 +n10316527 +n10728233 +n06273207 +n01733214 +n12297846 +n12755876 +n02428842 +n02289307 +n04536465 +n03253187 +n02297294 +n05584746 +n03117642 +n12189779 +n10338231 +n07599649 +n04559994 +n12710917 +n09966470 +n12470907 +n04499300 +n12403075 +n11837743 +n02269657 +n12599185 +n07618587 +n03996004 +n12851094 +n03392648 +n01319001 +n12826143 +n12369845 +n01814549 +n10056103 +n12854193 +n02267483 +n04019881 +n03490649 +n04268142 +n10801802 +n12315060 +n10149436 +n04563790 +n09865068 +n03000530 +n10657556 +n07840672 +n12118414 +n02856013 +n02900459 +n04094859 +n12079523 +n11827541 +n12236160 +n02904505 +n02846619 +n09842823 +n12926039 +n02146201 +n03195799 +n12815838 +n09899289 +n01483021 +n02519340 +n05453815 +n10329035 +n02494383 +n09742927 +n13220355 +n03212406 +n11759609 +n10061431 +n12095281 +n04262530 +n03799240 +n02426176 +n04608809 +n12230540 +n13880551 +n11741175 +n11858814 +n11723452 +n07590841 +n12604845 +n10342543 +n12760539 +n09270657 +n02563079 +n10643937 +n12843316 +n01651641 +n07838811 +n04359034 +n07758260 +n02762725 +n11726433 +n03114743 +n01952029 +n12321395 +n11930571 +n12337922 +n12427946 +n12001294 +n12551457 +n13235011 +n02290340 +n06419354 +n12408873 +n01741442 +n12308447 +n10243872 +n03658635 +n03694761 +n02570484 +n12912801 +n04158002 +n02417785 +n01332181 +n03703075 +n10283366 +n03142431 +n02779609 +n02300554 +n09868782 +n10323752 +n03166809 +n03394149 +n02827148 +n02186717 +n01350226 +n03344784 +n03555996 +n04498873 +n13157481 +n04519887 +n12028424 +n12349711 +n10471640 +n07741235 +n04032936 +n12357968 +n10228592 +n13178284 +n04168840 +n13239177 +n03561573 +n02566489 +n11807696 +n07681264 +n02566665 +n10456070 +n10063919 +n10492727 +n01788579 +n11977660 +n02036228 +n02738978 +n03989349 +n10332953 +n12949361 +n09901502 +n07839730 +n13146928 +n10152306 +n04170515 +n11602478 +n02522722 +n01333610 +n13030852 +n02143891 +n12807624 +n04542329 +n12243693 +n12036226 +n13917690 +n02553028 +n02752199 +n10594857 +n11627714 +n04348070 +n13171797 +n04612257 +n07934373 +n04536765 +n02244515 +n04526800 +n04546595 +n02551668 +n12143405 +n07871588 +n07858484 +n03628728 +n13179804 +n03242264 +n12089846 +n07588688 +n07620047 +n01647466 +n09685233 +n03467254 +n12666369 +n05449661 +n10694939 +n12886600 +n12256522 +n04006330 +n03317673 +n04316815 +n12222090 +n04022866 +n04088441 +n07617526 +n10782362 +n04355821 +n13901490 +n12508618 +n03849943 +n04503499 +n13193466 +n09754633 +n07583978 +n13911045 +n07643679 +n12054195 +n10692090 +n04032509 +n10146927 +n02031298 +n04002629 +n04035748 +n10712229 +n02866106 +n07909504 +n04540397 +n06266878 +n10219879 +n12567950 +n07853648 +n03191561 +n07856045 +n12646197 +n03317510 +n10515863 +n13198054 +n02808829 +n12889579 +n02698473 +n09924437 +n03595055 +n12306270 +n07857356 +n09715303 +n03024518 +n04323519 +n09629065 +n04178668 +n12748248 +n02308618 +n07873198 +n10564098 +n03007297 +n04036155 +n02143439 +n10507482 +n12267931 +n03956331 +n12888234 +n04066476 +n07813107 +n02736396 +n10306496 +n12324388 +n01744555 +n01649726 +n06596179 +n03616091 +n07754279 +n02072493 +n12408280 +n04314632 +n02412700 +n04030846 +n09833997 +n03599964 +n05258627 +n12572759 +n12136581 +n02419056 +n12453714 +n11652217 +n03878511 +n03907908 +n12223160 +n10514121 +n04153330 +n12163279 +n12623818 +n03495671 +n13222985 +n10354754 +n04365112 +n12384680 +n12538209 +n03105214 +n12534862 +n13869045 +n03945928 +n11613692 +n11892181 +n13002209 +n02685253 +n07598529 +n02629716 +n13202355 +n07927070 +n02176916 +n04370955 +n11988132 +n03246197 +n01440467 +n07620145 +n03940894 +n01897667 +n03408340 +n12602612 +n02539424 +n03863657 +n04559620 +n02604480 +n11822300 +n03518829 +n11619845 +n10504090 +n03341035 +n02908123 +n04281998 +n03277602 +n03865288 +n10074578 +n13902793 +n03054605 +n04404200 +n12786836 +n12235051 +n04035231 +n12009792 +n12705458 +n04378489 +n02476870 +n11954798 +n03573848 +n02087314 +n03162460 +n04363412 +n02261063 +n09953615 +n01947139 +n03044801 +n04287351 +n04479287 +n03861596 +n12510343 +n07854066 +n03027505 +n12161577 +n04197878 +n01812187 +n10015792 +n08685188 +n11737009 +n10333044 +n02730568 +n10290813 +n13096779 +n05257476 +n07917951 +n12121187 +n03517509 +n07932762 +n02336275 +n12159942 +n12105981 +n02562971 +n13882961 +n12016777 +n02793684 +n12717644 +n01380754 +n07724173 +n04055861 +n11831297 +n03059934 +n03370646 +n10065758 +n09459979 +n07913644 +n04322531 +n03457451 +n02567633 +n04240867 +n10693334 +n10556704 +n04614844 +n07909362 +n12082131 +n09268007 +n04359217 +n09883807 +n02292085 +n04052346 +n03431570 +n02843465 +n04584056 +n04432043 +n09846142 +n07864317 +n04475749 +n04227050 +n04280845 +n03535284 +n07890617 +n03217889 +n02806762 +n11967315 +n11762927 +n02501923 +n03442487 +n09690083 +n02964634 +n02920164 +n07855317 +n10196725 +n03042829 +n11662937 +n12183816 +n12311224 +n13884261 +n02243209 +n03140771 +n02385002 +n03071288 +n12936826 +n04583022 +n07859142 +n04578112 +n04467506 +n12938081 +n09982152 +n12555255 +n03335333 +n10104888 +n12151170 +n12709349 +n10456138 +n02237868 +n07620327 +n12561309 +n12341931 +n12350032 +n01775730 +n12950796 +n01440242 +n04261767 +n10568915 +n12285195 +n07589872 +n13112035 +n07840395 +n11750508 +n12286197 +n03336168 +n03325288 +n02551134 +n04293258 +n13130014 +n07733124 +n04451139 +n11985903 +n03602365 +n11722342 +n11944751 +n12897999 +n02277422 +n03101302 +n07608245 +n03531982 +n01997825 +n11713370 +n04442582 +n02833403 +n04427857 +n01648356 +n10645223 +n10414865 +n10696101 +n12885045 +n10037080 +n12218274 +n07570530 +n04493259 +n10659042 +n10577710 +n03141612 +n10582604 +n00446632 +n02834642 +n07568389 +n04583888 +n04096848 +n12879068 +n04495051 +n09837459 +n12216215 +n03702440 +n10174695 +n10559009 +n10577182 +n07686299 +n04269668 +n02404028 +n03720665 +n09885866 +n03082450 +n12492682 +n12780563 +n03703463 +n02644360 +n02307910 +n01374703 +n04402342 +n04264134 +n03158414 +n04443433 +n12522894 +n10803978 +n11706942 +n10751026 +n13143758 +n02972934 +n04174234 +n12718995 +n11994150 +n11545350 +n12526754 +n07753448 +n02870772 +n11942659 +n11744108 +n12735160 +n12229887 +n04970312 +n02874336 +n10721819 +n13193269 +n03330665 +n09865162 +n10306595 +n12161744 +n03303669 +n07846688 +n02168427 +n01961600 +n03559531 +n09826821 +n03413124 +n09695019 +n03783873 +n11863877 +n13874558 +n02283617 +n11895472 +n13182799 +n07854614 +n03283827 +n01397690 +n02650413 +n09809279 +n10290541 +n10383505 +n11724660 +n07689757 +n10181547 +n07620597 +n11979354 +n02771547 +n13061471 +n12631637 +n11966385 +n03969510 +n11735977 +n07621497 +n12956588 +n03217653 +n04546081 +n11696450 +n10300654 +n02032769 +n01654863 +n09779280 +n02390258 +n03887512 +n10489426 +n10745770 +n10713843 +n03602194 +n10710913 +n07864475 +n04486322 +n07915213 +n08663051 +n10236842 +n02390738 +n02388453 +n03598385 +n12228689 +n11771746 +n12803226 +n11242849 +n02378149 +n10427223 +n05448827 +n11870044 +n12477983 +n12311413 +n03500090 +n10280034 +n02685365 +n03652389 +n12728656 +n07695284 +n09961198 +n03780799 +n03935883 +n01612955 +n12475774 +n02701730 +n07833535 +n12584365 +n03902220 +n12727960 +n10619492 +n04450465 +n10646780 +n10110731 +n04142175 +n12296735 +n09337048 +n12681579 +n12819354 +n12541403 +n04305016 +n12798910 +n10321126 +n08618831 +n09721244 +n02225798 +n01637338 +n12218868 +n05545879 +n12022382 +n03972372 +n02505063 +n01694311 +n10695450 +n10081842 +n12297507 +n07592922 +n12118661 +n01952712 +n10517137 +n01340522 +n07719330 +n03729482 +n04168541 +n03090710 +n07873679 +n07828378 +n07728284 +n10343088 +n07869937 +n14585392 +n01453475 +n12095412 +n04973020 +n12810007 +n07564515 +n01599741 +n11629047 +n09937802 +n12450607 +n12460146 +n02292401 +n03632963 +n09617696 +n12545232 +n02874642 +n09934488 +n10091349 +n01447946 +n05469861 +n11830400 +n03382533 +n02608547 +n12697152 +n03542727 +n10716576 +n03664159 +n07568625 +n02976815 +n13147532 +n02336826 +n12432574 +n07686461 +n04107598 +n02505998 +n09849167 +n03688066 +n02836513 +n01576358 +n01893021 +n12017511 +n12065649 +n01714231 +n11662585 +n12827907 +n12954353 +n11936199 +n01368672 +n03843883 +n12184095 +n10058411 +n11684654 +n08506347 +n10579549 +n01423302 +n11604046 +n07613158 +n03605504 +n02090129 +n02284224 +n01958435 +n12664469 +n04459122 +n09617161 +n09780828 +n11830252 +n12870048 +n04247544 +n09871095 +n02962938 +n09933020 +n13064457 +n10341243 +n07694169 +n13200193 +n07765728 +n01524761 +n07730562 +n07751737 +n07740855 +n04192521 +n12593122 +n07841037 +n02809736 +n10604275 +n12512095 +n01907287 +n04592596 +n09823153 +n03181667 +n12449784 +n07908923 +n12365900 +n03053976 +n15060688 +n04165675 +n02530637 +n09816654 +n12540966 +n07934152 +n09290350 +n03455802 +n10111779 +n01351315 +n10281770 +n13862552 +n12435486 +n12370174 +n12296045 +n03493219 +n12363301 +n11973749 +n03939565 +n02938321 +n13209270 +n12604639 +n12657755 +n03604536 +n10328941 +n04278932 +n10376890 +n01884203 +n02061853 +n04256318 +n07831821 +n10585217 +n07591813 +n10210648 +n07739035 +n01632308 +n10319313 +n02861777 +n03821145 +n13029610 +n04239900 +n10313441 +n04951716 +n10628097 +n02368116 +n08571275 +n04433377 +n10458596 +n12435965 +n12448136 +n12129986 +n04295777 +n07898895 +n07854266 +n12327846 +n12318782 +n07825850 +n10414239 +n11731157 +n04409911 +n10655442 +n11829205 +n01738306 +n02840515 +n04150371 +n03369512 +n02645538 +n12773917 +n07818422 +n03227010 +n10303037 +n12942025 +n12406304 +n06616216 +n02435216 +n12981954 +n03683341 +n09703809 +n07722666 +n11817160 +n10110893 +n10228468 +n03572631 +n01378545 +n02130086 +n04388574 +n11960673 +n12956922 +n11924014 +n09895902 +n03426462 +n07759576 +n02563949 +n03466947 +n02522637 +n09480959 +n02033882 +n02451415 +n12677120 +n10580437 +n04425977 +n03057841 +n12285512 +n07614348 +n03144873 +n03391613 +n12366870 +n02304657 +n07863935 +n07909714 +n02413717 +n12591702 +n07838659 +n02967407 +n12016914 +n02735268 +n09470027 +n10222259 +n03899100 +n10513509 +n11620016 +n12600267 +n04368840 +n03016209 +n04085017 +n03215076 +n10238272 +n09782855 +n07586179 +n12434483 +n12452480 +n01990516 +n12030092 +n11739978 +n12714254 +n13036804 +n07727377 +n07879560 +n03710421 +n12128490 +n11968519 +n03250588 +n10173579 +n03114041 +n02942015 +n12729164 +n07871065 +n02591330 +n09353815 +n10138472 +n02712545 +n12866333 +n07835823 +n03508485 +n01758895 +n02925385 +n03321419 +n09931418 +n02846874 +n12500518 +n07587819 +n03160186 +n04974340 +n13067532 +n11940349 +n13027879 +n02878534 +n10055566 +n07925708 +n12628356 +n11958499 +n03472672 +n04233295 +n04563020 +n03426871 +n04330109 +n03677682 +n04129766 +n02884859 +n12692521 +n10188856 +n03500971 +n10355306 +n12407545 +n11955040 +n10028541 +n10345659 +n14720833 +n09641578 +n12613706 +n11718296 +n03380301 +n01334217 +n03890358 +n03583419 +n12447121 +n09660010 +n11826569 +n11837351 +n12096089 +n03871860 +n01821554 +n12834938 +n02738449 +n02644665 +n03316873 +n12548564 +n03605417 +n12094401 +n13152339 +n03004531 +n03080904 +n03535647 +n12349315 +n04213264 +n07860208 +n01526766 +n03710937 +n11806521 +n10618234 +n12306938 +n10473562 +n10050880 +n04596116 +n02577164 +n04479694 +n07936093 +n07834286 +n12175181 +n03986857 +n02919648 +n12055073 +n04567593 +n07585015 +n12771085 +n10551576 +n09778783 +n01593282 +n02406952 +n12331263 +n10629329 +n12287195 +n07729225 +n07828041 +n01880473 +n12257725 +n02696246 +n07853232 +n11936864 +n09745229 +n03364156 +n04503155 +n03194297 +n04003359 +n07607361 +n10106387 +n10306890 +n10455619 +n01647180 +n07740115 +n12106323 +n03626272 +n11685621 +n11866706 +n04321121 +n01606978 +n12621619 +n11615259 +n07840304 +n02841847 +n05459769 +n03432360 +n04604276 +n12356395 +n12468545 +n03645168 +n00477827 +n03459591 +n04202142 +n12959074 +n07881625 +n12382233 +n02405692 +n12299640 +n12247202 +n12628705 +n12534625 +n09264803 +n12176953 +n09835017 +n10390807 +n04975739 +n12474418 +n11931135 +n07917791 +n10636488 +n09690496 +n11993675 +n03703203 +n11794139 +n13015688 +n04168084 +n01948446 +n10169419 +n04455048 +n04973669 +n12840502 +n12120578 +n10448455 +n01386007 +n02288122 +n01441910 +n02278463 +n03108759 +n02753710 +n03143400 +n13080866 +n13917785 +n13124358 +n13220663 +n02475358 +n01925916 +n02684649 +n10451590 +n03869976 +n03881305 +n07928264 +n01422185 +n04035634 +n11996677 +n04261369 +n12925583 +n12764008 +n09972587 +n03708962 +n01791388 +n02892626 +n04098399 +n07823369 +n07752874 +n13225244 +n03376771 +n01771766 +n13146403 +n12157179 +n13897198 +n07770869 +n13240362 +n07610502 +n03688504 +n02896856 +n12543186 +n09967063 +n05453412 +n12590600 +n02378870 +n07568241 +n01687290 +n00474769 +n11694866 +n02338722 +n02637977 +n04567746 +n10586444 +n11907405 +n03421960 +n07605693 +n10384214 +n12877637 +n12018363 +n10056611 +n13882487 +n12140759 +n04114301 +n11762018 +n12678794 +n11817501 +n02116450 +n12018530 +n03324629 +n12726528 +n03155502 +n10493199 +n04181083 +n10609198 +n04328703 +n03045074 +n07769886 +n01892385 +n12828520 +n03165211 +n11800565 +n07567139 +n13877547 +n12829582 +n02949084 +n07589724 +n01746191 +n12395463 +n05459457 +n10565502 +n11981475 +n09310616 +n12327022 +n02313709 +n12957803 +n11865276 +n12955414 +n12939479 +n13225365 +n07936459 +n03139089 +n07577772 +n12057895 +n03620353 +n12152031 +n01885158 +n04096733 +n12626674 +n10464711 +n10675609 +n07752782 +n03709960 +n02540983 +n02285179 +n01903234 +n07835701 +n04421083 +n02352290 +n09421031 +n03349367 +n02539894 +n04052235 +n07922955 +n03941887 +n04234260 +n04423552 +n11975254 +n08501887 +n12489676 +n04574348 +n10602119 +n02163008 +n02748491 +n10024937 +n10033888 +n12605683 +n01790398 +n10128519 +n14977188 +n10293590 +n12077244 +n09741074 +n11694469 +n12692714 +n12159804 +n12533437 +n03831203 +n03692004 +n09462600 +n04537436 +n06618653 +n07913537 +n12783316 +n10038119 +n10236521 +n01486540 +n07875267 +n04345787 +n07681355 +n13028937 +n03607186 +n07863107 +n12387103 +n09830926 +n03574416 +n04478383 +n11685091 +n03197446 +n03225458 +n09741722 +n07736527 +n02857907 +n10177150 +n12711398 +n10308275 +n02418770 +n02577662 +n09935107 +n03362639 +n12446908 +n04329681 +n04114428 +n09624899 +n12913144 +n12338034 +n02341616 +n12360817 +n12907857 +n02414904 +n05482922 +n11974888 +n04127117 +n12581110 +n04368365 +n01699254 +n12525753 +n04254450 +n11951052 +n12458874 +n12721477 +n07562651 +n02239192 +n10533874 +n12006306 +n09537660 +n10008123 +n02788386 +n03248835 +n04491312 +n11795580 +n04025633 +n10166189 +n07703889 +n11824747 +n07605198 +n12134836 +n03591116 +n02946753 +n13212025 +n11742310 +n02328820 +n02985606 +n09955944 +n12679432 +n10020366 +n12013035 +n02942147 +n04172512 +n11802410 +n10789709 +n03385295 +n02039497 +n01416213 +n11940750 +n12178780 +n01967963 +n12662379 +n12217851 +n02812631 +n12432069 +n09991740 +n03089477 +n12458713 +n03876111 +n10311661 +n12286068 +n02838958 +n11936369 +n03716228 +n13228017 +n06276902 +n12677331 +n04330189 +n10488016 +n12011370 +n04343740 +n07893792 +n02171164 +n03963483 +n12080588 +n07577657 +n12936155 +n03809686 +n04223066 +n04086066 +n12776558 +n07813579 +n01841943 +n12285705 +n02581482 +n11653570 +n10010632 +n04305947 +n12228886 +n12797368 +n01404495 +n09697986 +n11882237 +n10077879 +n07607832 +n09779461 +n13212379 +n10769188 +n10715789 +n01480106 +n02145910 +n04275093 +n01983829 +n01978010 +n09937903 +n11976314 +n11785276 +n12386945 +n04445782 +n10712374 +n10706812 +n10194775 +n12655062 +n10739135 +n02597972 +n02307176 +n04121342 +n02350670 +n12698027 +n02805845 +n02895008 +n13149970 +n03451365 +n04542595 +n07803895 +n07864198 +n09690864 +n03844550 +n12378249 +n10345422 +n13163553 +n10457903 +n10783539 +n10539015 +n11757017 +n10274173 +n08652376 +n10283546 +n04541777 +n02824152 +n12945177 +n02082056 +n03695957 +n07936015 +n07591162 +n03628071 +n02990758 +n07685118 +n04023422 +n04951875 +n03541393 +n10289176 +n04039209 +n07913180 +n07910799 +n12017853 +n03732543 +n10656120 +n10512859 +n04556664 +n12464649 +n12927758 +n12078451 +n07878145 +n10561320 +n12467592 +n07689217 +n07619881 +n11935187 +n09837720 +n03642144 +n12220019 +n02983507 +n03271260 +n02778588 +n10193650 +n01654083 +n02746978 +n10202763 +n02953552 +n07924366 +n08583554 +n02905886 +n07855603 +n09745834 +n12366053 +n04140539 +n03383211 +n11648268 +n03352961 +n12116734 +n07771539 +n07836077 +n03842754 +n11683838 +n03004409 +n11730750 +n13098962 +n12292463 +n02867592 +n01653026 +n07583865 +n12548804 +n12702124 +n03917048 +n12677841 +n12511488 +n04217387 +n12495670 +n03554375 +n12403513 +n08558770 +n02781764 +n12339526 +n12742290 +n01404365 +n03591798 +n12446737 +n10494195 +n12110352 +n01672611 +n10493922 +n03638623 +n09910840 +n02238594 +n02575325 +n13186546 +n11873182 +n10344774 +n04094060 +n10417682 +n02749169 +n02428089 +n04549721 +n03824284 +n12107002 +n12784371 +n09986904 +n01634227 +n07826544 +n12253487 +n01679005 +n12516165 +n09339810 +n03126090 +n07803408 +n11883945 +n03842276 +n03397412 +n03280216 +n12264786 +n02545841 +n11877860 +n01830479 +n13207923 +n12490490 +n02542958 +n04114719 +n12590715 +n13226320 +n11644872 +n04119630 +n10176913 +n04213105 +n11652966 +n12546420 +n12625823 +n11897466 +n02092173 +n10567613 +n04953678 +n10059067 +n12408466 +n03056288 +n13036116 +n04169597 +n12467197 +n02569905 +n02758490 +n12623211 +n04077889 +n04959061 +n04183957 +n11689815 +n03777126 +n03306869 +n07720084 +n02659478 +n12947756 +n04341288 +n04448185 +n04037076 +n09828988 +n03346289 +n04174705 +n13126050 +n04255346 +n09764732 +n11773628 +n14891255 +n04314107 +n02184720 +n02646892 +n04320598 +n01979526 +n03191451 +n03662452 +n10290422 +n01739094 +n02305636 +n04202282 +n05459101 +n02766168 +n09994808 +n03528100 +n10475940 +n03005619 +n12639168 +n02144936 +n13202125 +n10703221 +n03770834 +n12324056 +n03474167 +n02609302 +n12166929 +n12852570 +n12920719 +n12508762 +n11983375 +n01422450 +n12616630 +n09681107 +n10486561 +n13038577 +n12266644 +n02478875 +n02547014 +n02249809 +n03336742 +n12038760 +n01672432 +n09861287 +n03678879 +n01949973 +n09928845 +n02310149 +n12648693 +n10533983 +n12812801 +n04550676 +n01800633 +n12128306 +n12744142 +n13140367 +n07803213 +n07688265 +n13068434 +n02030568 +n12955840 +n01625121 +n13215258 +n04270576 +n02680638 +n02817251 +n01539272 +n04066023 +n12969927 +n10280598 +n04001661 +n09774167 +n10358575 +n01836673 +n02290664 +n09940725 +n12447581 +n07803779 +n04561965 +n10151261 +n01538362 +n10170060 +n13160365 +n09823287 +n12554729 +n10620212 +n11935027 +n03465605 +n03227856 +n08519299 +n07785487 +n03522863 +n02861286 +n12200905 +n04269502 +n02104184 +n07612273 +n01390763 +n11872658 +n12981086 +n10244359 +n01738731 +n12117235 +n12846690 +n02861658 +n08782627 +n09832633 +n02531114 +n01394492 +n03269073 +n03077442 +n09794668 +n13884384 +n08659331 +n02556373 +n02587877 +n03523506 +n03723153 +n12024805 +n13061172 +n03978575 +n07914686 +n13134844 +n12183026 +n03573574 +n03765128 +n03319167 +n01920438 +n07852452 +n07680655 +n03017698 +n12959538 +n04261506 +n01793340 +n03292362 +n12817855 +n03593222 +n01962506 +n12453018 +n04027367 +n12518481 +n09223487 +n07871335 +n03779246 +n09668562 +n01889849 +n02492356 +n07830841 +n03277149 +n09968652 +n03092476 +n10400205 +n06263202 +n07595368 +n12767208 +n02196896 +n12580012 +n10265801 +n02103181 +n02922461 +n01731277 +n12422559 +n04278605 +n02250280 +n03283413 +n11829922 +n10191613 +n02493224 +n04427559 +n12181352 +n12742878 +n10683675 +n04503705 +n03785142 +n12816942 +n10723230 +n11936707 +n12360534 +n12909759 +n03766218 +n02696843 +n11935877 +n07828156 +n10617397 +n12921499 +n13158714 +n10166394 +n12370549 +n03505015 +n12769065 +n02636550 +n10781236 +n09869317 +n10275249 +n04234763 +n10735173 +n13137225 +n02070776 +n04232312 +n07575226 +n03471030 +n07909954 +n02633677 +n01662060 +n07563642 +n04263950 +n11824344 +n13178707 +n02972714 +n10417288 +n12092930 +n11993203 +n10170681 +n03726116 +n03215337 +n12564613 +n14975598 +n07758125 +n03123666 +n07717714 +n01421333 +n02359667 +n09403086 +n03857026 +n12759668 +n02628259 +n02307515 +n12146488 +n09777870 +n07819303 +n12105353 +n10784113 +n11802995 +n12561594 +n02845130 +n12100187 +n03507658 +n02141611 +n01800195 +n03470005 +n12444898 +n02203592 +n09707061 +n00475142 +n12216628 +n01732093 +n02581642 +n03803780 +n12114590 +n04541662 +n12267133 +n11652753 +n07859951 +n04524594 +n12843144 +n04040540 +n10604880 +n12559044 +n03063834 +n12394328 +n12704513 +n10230216 +n10756641 +n02101670 +n12309630 +n03070587 +n11626010 +n04239639 +n01638329 +n01928517 +n13144084 +n10420649 +n03102516 +n12395289 +n09833111 +n01651285 +n11688069 +n12881913 +n12783730 +n07716649 +n03618678 +n10344203 +n03626502 +n10718665 +n03577474 +n01683201 +n03246653 +n12153224 +n02519472 +n02470709 +n15090238 +n03129636 +n07774295 +n04577567 +n09995829 +n09662038 +n10297367 +n03555862 +n12531727 +n09947127 +n12533190 +n04062807 +n00479734 +n12860978 +n01884104 +n09866559 +n12069009 +n04595501 +n12088495 +n02909053 +n12283790 +n02180427 +n10697282 +n07562881 +n13092078 +n11706325 +n01746952 +n01978136 +n07731436 +n02386746 +n12648424 +n12726357 +n10314182 +n07839172 +n11753562 +n12903503 +n12589687 +n02375438 +n03604763 +n11549895 +n13202602 +n12304420 +n10738215 +n12220829 +n10095420 +n12177455 +n11887476 +n04006411 +n09838370 +n02853218 +n12688372 +n03335461 +n02800940 +n03036701 +n09885059 +n10206629 +n11922926 +n01678657 +n12192132 +n12248141 +n03108624 +n01936671 +n02417242 +n03222857 +n03768823 +n04343511 +n03538817 +n12655726 +n12521186 +n01330497 +n12767423 +n12965951 +n09695132 +n04410886 +n12599874 +n07865700 +n07596160 +n10227698 +n03224490 +n11598886 +n02948293 +n09906293 +n12247963 +n03301175 +n03895170 +n04259468 +n07808806 +n13147689 +n09856827 +n13882639 +n02241008 +n03842585 +n02883101 +n12182276 +n13918717 +n12728164 +n10634464 +n02477187 +n03107716 +n02342250 +n01479213 +n12793695 +n09808080 +n10707707 +n04161010 +n02836607 +n10076483 +n07726386 +n03872273 +n10250712 +n07688412 +n13884930 +n12301766 +n10196404 +n07591330 +n03814727 +n09610255 +n12757115 +n09814381 +n02397987 +n07886317 +n03959123 +n02185167 +n03533845 +n11838413 +n10227393 +n07704305 +n03580615 +n02663485 +n10101981 +n04346855 +n10067011 +n04464125 +n02829510 +n10007995 +n07845775 +n03004713 +n02450561 +n09905530 +n10361060 +n12394638 +n12095934 +n10479135 +n03145277 +n12246773 +n13194212 +n04475900 +n03252787 +n14867545 +n10485298 +n09961739 +n02149653 +n01553762 +n03931980 +n02344408 +n11676850 +n04034367 +n04235646 +n12867184 +n12625670 +n12763529 +n07593107 +n04351550 +n02571810 +n13899735 +n03652826 +n09495962 +n03421768 +n04205062 +n11918808 +n07745197 +n07752264 +n01892744 +n04609811 +n10278456 +n11790936 +n09754152 +n13234519 +n09820044 +n00440643 +n02350357 +n03779884 +n07803992 +n03305953 +n01836087 +n10068234 +n10690421 +n03134394 +n12380761 +n12801966 +n03134232 +n02596720 +n07591236 +n11882821 +n02312175 +n02387983 +n01912152 +n10805501 +n12718074 +n03188290 +n02776505 +n10528148 +n09971385 +n10524223 +n09958292 +n02721813 +n10300829 +n12007766 +n12107191 +n04449700 +n02987950 +n11878633 +n12328801 +n04551833 +n10567722 +n11654984 +n02808968 +n12066451 +n02964075 +n11633284 +n02434712 +n03070854 +n07926540 +n01543936 +n10091861 +n09938080 +n11976511 +n03342432 +n12886831 +n12509993 +n12958261 +n12730776 +n10066206 +n07846014 +n13176714 +n03332591 +n04607640 +n02513727 +n12138248 +n11964848 +n01318053 +n10553140 +n07839055 +n02632039 +n11865429 +n02286654 +n02367812 +n12093885 +n10774329 +n02296912 +n01729672 +n10353928 +n12033504 +n11936113 +n03263338 +n07822053 +n09737050 +n13875884 +n13212559 +n11690088 +n05468739 +n09344724 +n02507148 +n01377694 +n04172607 +n10464870 +n07804152 +n02825872 +n03139640 +n11858703 +n10227490 +n12334153 +n03616225 +n12018188 +n12399656 +n10235269 +n11840764 +n01995514 +n03326475 +n12704041 +n10684827 +n03006788 +n13906484 +n02868240 +n03614887 +n03491724 +n12124172 +n03675907 +n13170840 +n03983712 +n03254737 +n07836269 +n01784293 +n02095212 +n12470512 +n12219668 +n12920521 +n04492157 +n02950018 +n01922717 +n11797981 +n12601805 +n02744961 +n07814925 +n09798096 +n03939062 +n13891547 +n07564292 +n01590220 +n09295210 +n03997875 +n03479266 +n01491661 +n03781055 +n12528768 +n10657306 +n12014923 +n10094320 +n02532272 +n02224023 +n04541136 +n12067672 +n02661473 +n04233027 +n12399899 +n12889412 +n01736032 +n12551173 +n01337734 +n10104487 +n02921592 +n02148512 +n10216403 +n03276839 +n01781570 +n03999621 +n02505238 +n12537569 +n10433452 +n02351343 +n12365158 +n08539276 +n01897257 +n12221801 +n10557246 +n10437698 +n01803641 +n11836327 +n07813833 +n03468570 +n06277025 +n10040240 +n03692842 +n03017835 +n01881564 +n10487363 +n07937069 +n10597505 +n01638722 +n10160412 +n09825096 +n12611640 +n03098515 +n10654211 +n13196234 +n03436990 +n04058486 +n09814567 +n10758337 +n03515934 +n07688757 +n10269199 +n12627347 +n04521571 +n01636510 +n03220095 +n09982525 +n12768809 +n02340930 +n02473857 +n12336586 +n12125584 +n02833040 +n02498153 +n01467804 +n12120347 +n11650430 +n11953339 +n12592058 +n05102764 +n10575594 +n09722064 +n01966586 +n10619888 +n07852376 +n12650915 +n10321882 +n11974557 +n09847267 +n13201423 +n12337131 +n13185658 +n02150134 +n10538853 +n10471732 +n07836600 +n03526062 +n02512752 +n04232437 +n03367321 +n04308915 +n07600895 +n11539289 +n03539293 +n12699922 +n07817599 +n02781213 +n03594010 +n12035907 +n04075813 +n05233741 +n07863229 +n10735984 +n12095543 +n12272735 +n04229620 +n12240965 +n07768590 +n04420024 +n12111627 +n02861509 +n02595056 +n12183452 +n04607982 +n13213577 +n07741888 +n03750614 +n10043024 +n03372933 +n10051861 +n10199251 +n03249956 +n03984125 +n02956393 +n11619687 +n03356279 +n07833951 +n10715030 +n02340358 +n10768272 +n01494041 +n02592734 +n03323319 +n02136285 +n03995661 +n09945223 +n03547397 +n10044682 +n12878784 +n02803809 +n13160254 +n12726902 +n12196954 +n03161016 +n03105645 +n04218921 +n09493983 +n10719036 +n12263588 +n12565102 +n10684146 +n03148518 +n04287986 +n02340640 +n04331443 +n10727016 +n03369407 +n07824863 +n07844786 +n12467433 +n07582811 +n02964196 +n02197877 +n10758445 +n03271376 +n13212175 +n03260504 +n12777778 +n11973634 +n05467054 +n11946313 +n02462213 +n13906669 +n10520286 +n02074726 +n01771100 +n13880199 +n09811568 +n13883763 +n02334728 +n11831100 +n12025220 +n12751172 +n03858837 +n10127186 +n12831535 +n07823591 +n02513805 +n03662301 +n09913329 +n02749670 +n10655986 +n01787191 +n03199488 +n12732252 +n12253664 +n07735294 +n03440876 +n09650839 +n03844965 +n10341446 +n12688187 +n12961242 +n03423224 +n13157346 +n09802951 +n11948044 +n03489048 +n12279060 +n03664840 +n03731882 +n07742605 +n07870734 +n03949761 +n10759331 +n07739923 +n02737351 +n01788291 +n11780424 +n03722646 +n12297110 +n12363768 +n04495310 +n10008254 +n03934890 +n01318478 +n03609959 +n10070377 +n04123228 +n13068735 +n02909706 +n10671042 +n10491998 +n07650792 +n12664710 +n10213034 +n03455642 +n10411867 +n09903936 +n10121800 +n02622955 +n03647423 +n07596566 +n09654898 +n12248780 +n02684515 +n04255670 +n06273890 +n03495941 +n12960552 +n09724234 +n03861048 +n03293095 +n11835251 +n12852428 +n04084517 +n01814620 +n13159890 +n03147156 +n02311748 +n10237799 +n07584859 +n01946827 +n09651968 +n12241192 +n03669245 +n07858336 +n11932927 +n04444218 +n10526534 +n03642573 +n09470222 +n10731732 +n12001924 +n03786096 +n01359762 +n03824999 +n13877667 +n10591811 +n10574311 +n03275125 +n11631985 +n10539160 +n10502950 +n12499757 +n12432707 +n12068615 +n07689624 +n02610373 +n03204436 +n13051346 +n13134531 +n07610890 +n04021164 +n03502897 +n02299378 +n10417843 +n10050043 +n07929940 +n02593453 +n10577820 +n12870225 +n03333851 +n09463226 +n11741575 +n09193551 +n12012510 +n11987349 +n09215023 +n07924655 +n10060075 +n11999278 +n03933391 +n02602059 +n11993444 +n02337902 +n10149867 +n04441093 +n02868429 +n10629647 +n04192361 +n12029039 +n02768433 +n12078747 +n12730143 +n03255167 +n12492900 +n01709876 +n09672725 +n07870620 +n02315821 +n12277334 +n12204730 +n07852712 +n01319685 +n07802246 +n13031193 +n00812526 +n09658815 +n11982939 +n04264485 +n07893425 +n04094438 +n03285730 +n13182338 +n10724570 +n07832741 +n13210350 +n10654015 +n04058721 +n07875086 +n03462747 +n03994417 +n02889856 +n11957514 +n10109443 +n10478462 +n03064562 +n02477782 +n11920998 +n02138169 +n04227787 +n11797508 +n10753339 +n12928307 +n11921792 +n12643688 +n01833112 +n03919808 +n09817386 +n01903498 +n03848033 +n12031547 +n01035504 +n12324906 +n01911063 +n02588794 +n03749634 +n03539754 +n02242455 +n03079616 +n03246312 +n09705671 +n07860629 +n10458356 +n10051761 +n09709531 +n02867401 +n12522678 +n13150378 +n04462576 +n03462315 +n03712981 +n07607027 +n10581648 +n02957427 +n04271793 +n02253913 +n12824735 +n11697802 +n02161588 +n12463975 +n02361090 +n09784564 +n09680908 +n03512452 +n13214217 +n10712690 +n04023119 +n07814007 +n09833751 +n12885265 +n02259987 +n11933903 +n03628831 +n11967142 +n02533545 +n03900301 +n07919787 +n12793886 +n10768148 +n03071552 +n02780315 +n12193665 +n03378442 +n04486616 +n07832307 +n03164192 +n12786273 +n04261868 +n12655351 +n12320414 +n04371979 +n10630093 +n13052014 +n01357328 +n07879821 +n09753348 +n03796974 +n11701302 +n11678299 +n04022434 +n11610823 +n07726009 +n04117639 +n10474343 +n11888061 +n01842788 +n10435251 +n03343047 +n03383378 +n12750767 +n09662661 +n05241485 +n10000459 +n12220496 +n02246941 +n12676370 +n02253264 +n07766409 +n02940289 +n12089320 +n10363573 +n12922119 +n09783537 +n11695285 +n12331066 +n12573647 +n10218164 +n12509821 +n07862946 +n12818601 +n02589316 +n13191620 +n03758992 +n12112337 +n10733820 +n02898093 +n02645953 +n10150794 +n04595762 +n02344918 +n13132756 +n12859153 +n12138444 +n04211001 +n12935166 +n07830493 +n10142166 +n11951820 +n03018848 +n01453742 +n11985321 +n10000294 +n01362336 +n02328009 +n12639376 +n03090437 +n02204249 +n04312916 +n13127666 +n09684082 +n03432509 +n10274318 +n09704057 +n07593972 +n10074249 +n13157971 +n01638194 +n04036963 +n11708857 +n03418749 +n12589458 +n11899762 +n07683138 +n01601410 +n07854707 +n04279063 +n03239607 +n10302700 +n12520406 +n12576451 +n03881534 +n07565608 +n02349390 +n12569851 +n12249294 +n04059399 +n03530189 +n09357346 +n04325208 +n13159691 +n04045941 +n13898315 +n11992479 +n02353411 +n07825496 +n12922458 +n03115014 +n11761836 +n03323211 +n02793296 +n03492087 +n05241662 +n05491154 +n10419630 +n04506895 +n10546428 +n02907296 +n10769459 +n11647868 +n13188462 +n03825442 +n13209460 +n10742005 +n07599242 +n12361754 +n04570532 +n04131811 +n07756499 +n02598134 +n01910252 +n02910701 +n10129338 +n13871717 +n12673588 +n12565912 +n07562172 +n02711237 +n10775003 +n07695410 +n02637179 +n12930951 +n10261211 +n02906963 +n01366700 +n10642705 +n09846586 +n02779719 +n04978561 +n01369358 +n12114010 +n03521771 +n10667709 +n02296612 +n10722029 +n03500557 +n01365474 +n10472447 +n07585644 +n07609316 +n04013060 +n04505888 +n09726811 +n12692160 +n12378963 +n03585551 +n13139837 +n10167565 +n03799375 +n11990920 +n09640327 +n04502989 +n10108832 +n10561736 +n01897426 +n11766189 +n12462582 +n12913524 +n02684356 +n13200542 +n10466198 +n04331892 +n01478969 +n07837234 +n07692248 +n04552097 +n12382875 +n01484447 +n04120695 +n12681376 +n10293861 +n11965962 +n11788039 +n03959227 +n01832813 +n09918867 +n09942697 +n07587206 +n10459882 +n01347583 +n02267208 +n03951453 +n03006903 +n12126736 +n10286749 +n03395401 +n04605057 +n03467887 +n12755559 +n04020744 +n11629354 +n01647033 +n02780445 +n10205714 +n09439032 +n03138128 +n02763083 +n07835547 +n12251278 +n11949857 +n01635480 +n10675142 +n07845335 +n07751977 +n10332110 +n11871496 +n11764814 +n12229651 +n07760297 +n09865672 +n02919308 +n12218490 +n03782929 +n12231709 +n11909864 +n03144982 +n11799331 +n10433610 +n10483395 +n03206023 +n05442594 +n03626418 +n07870478 +n10171456 +n11964446 +n12796849 +n02126317 +n03797062 +n01412694 +n07610746 +n03581897 +n04479526 +n12447891 +n11906514 +n09699642 +n12873984 +n10586903 +n13234114 +n02436353 +n11889205 +n01460303 +n04400899 +n11884967 +n02140491 +n12215824 +n03586911 +n01394040 +n10691937 +n12371704 +n09668988 +n04362624 +n01740885 +n01337191 +n09714120 +n02185481 +n08555333 +n10704238 +n12430471 +n12034594 +n10012484 +n12088909 +n03205903 +n04129490 +n13090018 +n10712474 +n12234669 +n13016076 +n00454855 +n13882713 +n02644817 +n03192907 +n03519226 +n01561181 +n04583967 +n11732052 +n10732854 +n04480303 +n07934908 +n03825673 +n10621294 +n04354387 +n03374102 +n02922159 +n13158815 +n04000716 +n09685806 +n04427216 +n12051514 +n09712967 +n12081649 +n09748889 +n03252231 +n10704886 +n12897118 +n12525168 +n11728769 +n02731251 +n02548884 +n12403276 +n09627807 +n08679167 +n09663999 +n04247440 +n07711683 +n09909929 +n03415868 +n05244421 +n07680416 +n12757668 +n11935794 +n03483086 +n01860864 +n10755164 +n03675076 +n12004987 +n07566092 +n04078955 +n03379719 +n01916588 +n10138369 +n09755893 +n03649003 +n03977430 +n02309120 +n10616578 +n12242850 +n12388293 +n03292085 +n09919061 +n10302576 +n01497413 +n01936858 +n01377278 +n04358256 +n02667693 +n12125183 +n07758582 +n07813324 +n09737453 +n12745564 +n03855464 +n03166685 +n01446152 +n09801102 +n10561222 +n10576818 +n13915209 +n10474446 +n03845990 +n04237174 +n12531328 +n07855812 +n10763245 +n04614505 +n07905770 +n12051792 +n12653633 +n03593862 +n10359659 +n10436334 +n07853125 +n12911264 +n12265083 +n03638014 +n04444121 +n02706221 +n10563711 +n07808166 +n11799732 +n04093915 +n10451858 +n04410760 +n10075299 +n12740967 +n12635359 +n09611722 +n12902466 +n13915305 +n05542893 +n04440597 +n03675445 +n12315245 +n10646032 +n10047199 +n12775717 +n10365514 +n10590452 +n11616260 +n02812342 +n07856756 +n04570416 +n03565991 +n12215210 +n04330896 +n02388588 +n02266269 +n10760199 +n14714645 +n02742070 +n03565710 +n12609379 +n03420935 +n03441465 +n00453631 +n01963479 +n04362972 +n09863936 +n03961394 +n03009269 +n12297280 +n04561010 +n12192877 +n02981565 +n12134695 +n07855413 +n03232815 +n10180791 +n09932788 +n10571907 +n02109256 +n02660091 +n07865788 +n13228536 +n10306279 +n02635580 +n03634899 +n10262343 +n12296929 +n04393301 +n06281175 +n04485586 +n13103660 +n10510974 +n04166436 +n01634522 +n07596362 +n12700357 +n08597579 +n11744011 +n12238756 +n01790171 +n04571800 +n11867311 +n03464467 +n12241880 +n09961605 +n12592544 +n03170459 +n09938991 +n02692680 +n10295371 +n04331765 +n02612167 +n02520810 +n11977887 +n04094608 +n07722390 +n07832202 +n12448361 +n04612159 +n12186352 +n13161151 +n12654227 +n09868899 +n10104756 +n09920106 +n12981301 +n02610980 +n12545865 +n10673296 +n04110841 +n01704626 +n04055700 +n12117912 +n10519126 +n12443736 +n01697978 +n02148088 +n03012644 +n12091697 +n10395390 +n10509810 +n10462751 +n02896949 +n03836602 +n03928994 +n07718195 +n02473983 +n08571642 +n02648916 +n11970298 +n06274292 +n04613158 +n09856401 +n12811713 +n13111340 +n12122442 +n10095265 +n04445610 +n11631619 +n07863644 +n12022821 +n10315217 +n12549799 +n03386343 +n03121040 +n03558007 +n12272432 +n11798496 +n02522866 +n02952935 +n10741493 +n12143065 +n07883156 +n09616573 +n02289988 +n13161904 +n02588945 +n00451768 +n12375769 +n10777299 +n04495183 +n11930994 +n09970088 +n02254246 +n12276314 +n07857598 +n04428382 +n03789794 +n03383821 +n12980080 +n01447139 +n12880799 +n03501520 +n10764465 +n13143285 +n12727729 +n12444095 +n02354621 +n13174354 +n01691652 +n07732525 +n10437014 +n04368235 +n10371052 +n02611898 +n03597147 +n09912431 +n03135788 +n07888058 +n02409202 +n14582716 +n11934463 +n04395332 +n12558680 +n05257967 +n11798978 +n10617024 +n04102760 +n12132092 +n12988572 +n10390698 +n11887310 +n12063211 +n12952717 +n13141972 +n12176453 +n10245863 +n10509161 +n10389976 +n10333165 +n01474864 +n09274305 +n11888424 +n10368711 +n13222877 +n10469611 +n07582970 +n09700125 +n12805762 +n07865575 +n07853852 +n03628421 +n04482975 +n03099622 +n01349735 +n11943133 +n12736603 +n12197601 +n10597745 +n04418644 +n12689305 +n07755262 +n10598459 +n04312020 +n03195485 +n09776642 +n10596517 +n10223606 +n01923890 +n12703716 +n03465040 +n12372233 +n12528109 +n03571853 +n10802621 +n10204177 +n02320465 +n03976105 +n02214096 +n02148991 +n10377542 +n10697135 +n03538542 +n07582027 +n04517999 +n12180456 +n02838014 +n03977266 +n03818001 +n12191240 +n11648776 +n10773800 +n04475496 +n03945817 +n04682018 +n02994743 +n02787269 +n11650160 +n03834472 +n03389983 +n09797742 +n06209940 +n12525513 +n12672289 +n01893164 +n10710259 +n01892145 +n11773408 +n10554024 +n09864968 +n10699752 +n11631405 +n10414768 +n04430605 +n10742546 +n10738871 +n12857204 +n09309046 +n01724840 +n04123317 +n07881525 +n03868044 +n02140268 +n10708292 +n09838295 +n09797998 +n10710171 +n11814996 +n11938556 +n03543511 +n02151230 +n01515217 +n03533392 +n02039780 +n12810151 +n02335231 +n12152251 +n13225617 +n09801275 +n01978587 +n14821852 +n11742878 +n12679023 +n03521431 +n09679028 +n02021281 +n10784544 +n04421258 +n12492460 +n03720005 +n02541257 +n03889397 +n02888898 +n10659762 +n12045157 +n12712320 +n10369095 +n09721444 +n12769318 +n01703161 +n12697514 +n07836456 +n03905361 +n10660883 +n07769306 +n11893916 +n07846274 +n04110281 +n03655470 +n07740744 +n01363719 +n12540647 +n09896311 +n12842642 +n07755619 +n07754155 +n11548870 +n02868546 +n04215588 +n04288165 +n13201566 +n07721118 +n12018271 +n11903333 +n02909165 +n02662559 +n11658709 +n13063514 +n07725663 +n10179069 +n10776887 +n12637485 +n03814528 +n12542043 +n07833333 +n07820036 +n02746683 +n07925808 +n10349750 +n03154316 +n04155625 +n03232923 +n02116185 +n09998788 +n02821543 +n03410303 +n10656223 +n07916582 +n12880638 +n10408809 +n04612840 +n11805255 +n12044784 +n10497534 +n03458422 +n12873341 +n07808675 +n09476123 +n07611733 +n10598013 +n02214660 +n05469664 +n03952150 +n11855435 +n04375926 +n08523340 +n01642391 +n04007415 +n09756961 +n12891824 +n02894847 +n11698245 +n12906771 +n02894024 +n04131015 +n11882636 +n04386456 +n03291551 +n07837110 +n12462221 +n08540532 +n10299875 +n12705978 +n10448322 +n10487592 +n12175598 +n02272552 +n03833907 +n10383237 +n12758176 +n12729950 +n10061195 +n07816726 +n03241903 +n12239880 +n10380499 +n07855188 +n10207077 +n02770078 +n12961393 +n03778459 +n10734741 +n03485575 +n09958447 +n12337246 +n11830045 +n09866354 +n03209666 +n01470145 +n10395209 +n03872016 +n04267091 +n12888457 +n12104104 +n04088229 +n01964957 +n12002651 +n02503756 +n00481938 +n01908042 +n03378765 +n04193883 +n09862183 +n11861487 +n02520525 +n02081060 +n10386754 +n12693865 +n04514095 +n01325060 +n02460817 +n07568095 +n03651605 +n02561937 +n12844409 +n12888016 +n02974565 +n12439154 +n13018906 +n12071259 +n03897634 +n02863176 +n10603528 +n03493911 +n12887532 +n12944095 +n12794568 +n09980458 +n03503567 +n11783162 +n13123309 +n11729860 +n03702582 +n04280373 +n10086744 +n01790557 +n12627526 +n10552393 +n12092629 +n03888998 +n12751675 +n01442450 +n02479332 +n07726230 +n03642341 +n03142325 +n06263895 +n12088327 +n09703344 +n10528493 +n02820085 +n07737594 +n04090781 +n09901642 +n02328942 +n02724722 +n09866115 +n12658715 +n10481167 +n13135692 +n11850918 +n10205344 +n12361560 +n03698123 +n03284482 +n12106134 +n04441528 +n02591613 +n02581108 +n07856186 +n12197359 +n12900783 +n01725713 +n12012253 +n03907475 +n02170738 +n03694949 +n13238654 +n04611795 +n02782432 +n13191148 +n02741367 +n04170694 +n12770892 +n01973148 +n10080508 +n10161622 +n09808591 +n07912093 +n02059541 +n02779971 +n03857156 +n12945366 +n03055159 +n12758325 +n10067305 +n02597818 +n07808352 +n13147153 +n10679723 +n02271222 +n04012665 +n12942729 +n10349243 +n01377510 +n07800636 +n10654321 +n10219453 +n09961469 +n10732521 +n04479405 +n11632929 +n03856728 +n08658918 +n10327143 +n10754281 +n02085118 +n09691604 +n09952163 +n10082299 +n03872167 +n03733465 +n04138869 +n01425223 +n12066821 +n02177506 +n09892262 +n02896694 +n12983654 +n13224922 +n09658921 +n12744850 +n03639880 +n02943686 +n10660621 +n11936539 +n03698226 +n04519536 +n12392765 +n09319604 +n07567039 +n04160261 +n01802159 +n02838178 +n07746910 +n02266421 +n10240417 +n12542240 +n12550408 +n01445857 +n04132465 +n03569014 +n12666050 +n12362514 +n10676569 +n09702673 +n12885510 +n04447156 +n04396226 +n12240150 +n11639306 +n02249134 +n01340785 +n02833140 +n10027590 +n02142407 +n11996251 +n07874531 +n04340019 +n03166120 +n10420277 +n04465203 +n12738259 +n12831141 +n03998673 +n01385017 +n12842519 +n02587051 +n10753061 +n12505253 +n13906936 +n01989516 +n12640435 +n07852532 +n04243142 +n10261511 +n12853287 +n12239240 +n03973003 +n09983889 +n10345302 +n14804958 +n02354162 +n03049326 +n10443659 +n01318660 +n12787364 +n04253304 +n11941094 +n09283514 +n09393524 +n11865574 +n01531639 +n04409279 +n02859729 +n10712835 +n03694196 +n04343630 +n10331098 +n12929600 +n02826259 +n10171219 +n07735179 +n07594840 +n03709644 +n09950728 +n09859285 +n07718329 +n01418620 +n09858299 +n12395068 +n10011360 +n07763290 +n02643316 +n03596099 +n04422566 +n11958888 +n09650989 +n10318686 +n01333082 +n12886402 +n03781467 +n12667582 +n02923535 +n09988311 +n08663860 +n02508346 +n13885011 +n03939281 +n10772937 +n04485750 +n09871952 +n10291942 +n07759324 +n10174971 +n03666238 +n01937579 +n02308033 +n07847706 +n10371330 +n04124887 +n11853079 +n11941478 +n12647231 +n04601041 +n12718483 +n02902816 +n01941340 +n04066767 +n07617839 +n02254901 +n03488784 +n07834774 +n02524659 +n03367969 +n10783734 +n03422484 +n09776807 +n03970363 +n10131590 +n03433247 +n02622712 +n10206506 +n12061104 +n11936287 +n07874674 +n10061043 +n07828275 +n03764606 +n12236768 +n01826844 +n09741904 +n05454978 +n03591592 +n01441272 +n03736372 +n07585474 +n12762405 +n12943912 +n01894522 +n03218446 +n11846425 +n11689678 +n04147916 +n02375862 +n10409459 +n09287415 +n10113583 +n03261263 +n02817386 +n09869578 +n10550252 +n02532786 +n12031388 +n07937344 +n11612235 +n01571410 +n09402944 +n04234670 +n02603862 +n04196925 +n09999135 +n10468750 +n15093049 +n03003633 +n11650307 +n12312110 +n02525703 +n10501635 +n09751622 +n10114550 +n10103155 +n12829975 +n04004099 +n12419878 +n02082190 +n03328201 +n03093427 +n07845571 +n12655498 +n02558206 +n12563045 +n07573453 +n12324558 +n13016289 +n10601234 +n10310783 +n03531691 +n02135610 +n03168543 +n09985978 +n10615334 +n07839312 +n09985809 +n10142537 +n10417969 +n07869111 +n12514992 +n04327544 +n10326776 +n12583681 +n01476418 +n12840168 +n03852544 +n11713763 +n07824502 +n07858841 +n12256325 +n03036149 +n07883661 +n04500390 +n10170866 +n01835918 +n10760951 +n10720197 +n12330239 +n02135844 +n10210512 +n03217739 +n10802953 +n03136254 +n02161225 +n03961630 +n12927194 +n02251233 +n13891937 +n09945603 +n02695762 +n12181612 +n13234857 +n10175725 +n11346873 +n07934678 +n02318687 +n10251329 +n04112921 +n04001132 +n03042984 +n11704791 +n04246459 +n12193334 +n10718509 +n10371221 +n05278922 +n03265754 +n12186554 +n12481289 +n10521853 +n10748506 +n11729142 +n10143595 +n09422631 +n07562984 +n07850219 +n04193742 +n11997160 +n12002826 +n12820113 +n04132829 +n10272913 +n03358841 +n12610740 +n12384569 +n10725280 +n02746008 +n13148384 +n12635151 +n02337171 +n10350774 +n12308907 +n04542474 +n04339062 +n03549350 +n10240235 +n10556033 +n10214390 +n01791314 +n02801047 +n07817465 +n11610602 +n10315730 +n14592309 +n10249191 +n12453857 +n12579822 +n09833275 +n04051269 +n11552594 +n04088343 +n04565039 +n03930431 +n10679503 +n11899921 +n10295479 +n01357507 +n13036312 +n03404900 +n12523141 +n01816017 +n02020578 +n12661045 +n06262943 +n02775813 +n12921315 +n09751076 +n09834258 +n10585628 +n12885754 +n04411019 +n10342367 +n10368798 +n09672840 +n12729023 +n04578329 +n10325549 +n03680248 +n11920663 +n10416567 +n10011486 +n01643255 +n03193754 +n07823814 +n04055447 +n10660128 +n07765612 +n07612530 +n04205613 +n09677427 +n03989199 +n11100798 +n12721122 +n10000787 +n10382157 +n07724819 +n12928819 +n11631159 +n02608996 +n10516527 +n09703101 +n12290975 +n03470222 +n03810412 +n03729131 +n03356038 +n12692024 +n12614625 +n10789415 +n02333819 +n01722670 +n03885410 +n12038208 +n02294097 +n02608860 +n02500596 +n07909231 +n03254625 +n09681973 +n12221368 +n01893399 +n10025295 +n03194812 +n13181406 +n12249122 +n03447894 +n09795010 +n02187900 +n10139651 +n10631654 +n01792530 +n02569631 +n07853946 +n09907804 +n03263758 +n04214649 +n02450829 +n02431542 +n11998492 +n02651060 +n04101860 +n01806061 +n13901423 +n12903964 +n03968479 +n04268565 +n12601494 +n02083780 +n04570118 +n12247407 +n03337822 +n09878921 +n02369935 +n10022908 +n09667358 +n13160938 +n11937360 +n07741623 +n03705808 +n12241426 +n10478118 +n03805933 +n10343869 +n09391774 +n03482128 +n10357737 +n10334461 +n09675045 +n09662951 +n10174253 +n01815270 +n13873361 +n04432785 +n09778927 +n10671898 +n05571341 +n10033572 +n09864632 +n10618465 +n03437184 +n12786464 +n01723579 +n11798270 +n07742415 +n02143142 +n10548419 +n03695122 +n02518622 +n04605446 +n10218292 +n11832671 +n12646950 +n03382708 +n09844898 +n09674786 +n01472502 +n07616906 +n09763272 +n03982767 +n10005006 +n03059236 +n01816474 +n03725869 +n01979269 +n04226322 +n13236100 +n03920384 +n11852148 +n04373563 +n04324120 +n11686652 +n03036341 +n02142898 +n09783776 +n13147918 +n03465320 +n07855721 +n10336411 +n10438619 +n07750299 +n12237152 +n03559373 +n10077106 +n10169796 +n09828403 +n09959658 +n12464128 +n12934685 +n04221673 +n02617537 +n11689367 +n10180580 +n07813717 +n12529905 +n02340186 +n01400247 +n11749112 +n04404072 +n03135656 +n12098827 +n12481150 +n10023506 +n03500838 +n01564101 +n04009923 +n10023264 +n03908456 +n03206405 +n07590068 +n09958133 +n10755394 +n01423617 +n11511327 +n10536274 +n01965252 +n11549245 +n11935627 +n09635635 +n03752071 +n07585997 +n03147084 +n12666159 +n09748408 +n03796848 +n01501948 +n02345078 +n12430675 +n03103128 +n11710987 +n03393199 +n09233603 +n10465002 +n04298765 +n01351170 +n02720576 +n03966582 +n10643837 +n12420124 +n10793799 +n01652297 +n09281252 +n11983606 +n10222497 +n11832899 +n02391617 +n12434106 +n03987674 +n02140179 +n07896560 +n04325804 +n10647745 +n01924800 +n10156629 +n03545961 +n03906789 +n01890564 +n10699558 +n12332218 +n03247495 +n11839460 +n03527675 +n12586725 +n13208965 +n02714315 +n02750320 +n04615149 +n12679876 +n12863234 +n03304323 +n12139793 +n11922755 +n12321669 +n04979307 +n01921059 +n09657206 +n13042134 +n04045787 +n11700279 +n02337598 +n01415920 +n01400391 +n13207572 +n10785480 +n02515713 +n12018100 +n02634545 +n03292736 +n02881546 +n12655605 +n03105810 +n10545792 +n03894933 +n09796974 +n10320484 +n12308112 +n11549009 +n13047862 +n14941787 +n12379531 +n10540252 +n11696935 +n12184468 +n12851860 +n12908854 +n10586265 +n12369066 +n10426630 +n12523850 +n03916289 +n04538878 +n09908769 +n02828115 +n07560422 +n10266016 +n03569174 +n06423496 +n10495167 +n03617834 +n09327538 +n10195056 +n10508379 +n13031323 +n11659248 +n04242315 +n10742111 +n10700963 +n12032686 +n09877587 +n07825597 +n07568991 +n11736362 +n12169099 +n13103750 +n03263640 +n12248941 +n10665302 +n01920051 +n09704283 +n11533999 +n04503073 +n11645163 +n10639817 +n09920901 +n06340977 +n03251100 +n10378113 +n03226090 +n10131268 +n02877513 +n13191884 +n02787120 +n11709045 +n02740061 +n12323665 +n02831998 +n10342180 +n12716594 +n04498275 +n09905050 +n03745487 +n07642833 +n10294020 +n10211666 +n12205460 +n02981198 +n01642943 +n07679140 +n04390483 +n10432875 +n09214269 +n10792506 +n10243483 +n13099833 +n10221520 +n13177768 +n04091584 +n10672540 +n10200246 +n13889331 +n02345340 +n10237556 +n01833415 +n01335218 +n09804230 +n09957523 +n05235879 +n10070449 +n10308653 +n10721708 +n04312654 +n10394434 +n12201938 +n12434775 +n07601025 +n02672152 +n10157271 +n02635154 +n12572858 +n13182937 +n10160188 +n03396997 +n10344656 +n02968210 +n10190516 +n07684422 +n03706939 +n07618871 +n02290870 +n03817331 +n03275311 +n12698774 +n04375080 +n07837630 +n04314216 +n11833373 +n07618684 +n03742238 +n12532886 +n03712444 +n11750989 +n10038620 +n09617577 +n03807334 +n10108089 +n01816140 +n10715347 +n02648035 +n13127303 +n02809491 +n02430748 +n12235479 +n01451863 +n01514926 +n10010864 +n01913440 +n09660240 +n11806369 +n01470479 +n12655245 +n07655067 +n03436772 +n11778092 +n03951800 +n10277815 +n07931733 +n01479820 +n03576955 +n07609549 +n12568649 +n05263316 +n02636405 +n01384084 +n03298352 +n07617344 +n09987045 +n10573957 +n07801709 +n02589062 +n02534165 +n02748359 +n09607782 +n07590974 +n02199170 +n02696569 +n09678747 +n12795209 +n13176363 +n10663315 +n10588724 +n09772330 +n10174589 +n12366313 +n11883628 +n07617447 +n01334690 +n03168663 +n11764478 +n08599174 +n03942028 +n12153033 +n03448696 +n12096674 +n10037588 +n03548320 +n09760290 +n10374541 +n09653438 +n10294139 +n10276942 +n12279293 +n12764507 +n12803958 +n10764622 +n02140858 +n07599068 +n10245507 +n12351790 +n12818004 +n10118301 +n03945459 +n09912995 +n12176709 +n03873996 +n10339179 +n10614507 +n10114662 +n10784922 +n03821424 +n04959230 +n13015509 +n12573911 +n11948469 +n09775907 +n12758014 +n01780142 +n09956578 +n12165384 +n10088200 +n10382480 +n04131113 +n09930628 +n09784160 +n11750173 +n13064111 +n03817522 +n12662074 +n03176238 +n12310021 +n11679378 +n09961331 +n02385580 +n11904274 +n03113505 +n10244913 +n02836900 +n09986700 +n11963572 +n13158605 +n10321632 +n02179891 +n02189670 +n10097995 +n10774756 +n10783240 +n10605737 +n02530052 +n10386196 +n10184505 +n09788237 +n03589672 +n12509109 +n10658304 +n12966804 +n12559518 +n03189311 +n01451295 +n12179632 +n12301613 +n10496489 +n03402785 +n10244108 +n02385676 +n03552001 +n03092053 +n02313360 +n02547733 +n02109391 +n01327909 +n04574606 +n03060728 +n07840124 +n10567848 +n10062176 +n02703124 +n10804732 +n12699301 +n04515890 +n07919665 +n10457214 +n09663248 +n03165955 +n12988341 +n03987865 +n03031756 +n10277912 +n10172080 +n09325824 +n03198223 +n09605110 +n10113869 +n11603462 +n03352366 +n11930203 +n09769929 +n12979316 +n02579762 +n09953052 +n03105974 +n00476140 +n11598287 +n02830157 +n10512201 +n09746936 +n10668666 +n02919976 +n09993651 +n02149861 +n09705003 +n10389865 +n11655152 +n10010767 +n10070563 +n03688832 +n10590239 +n11936027 +n02939763 +n03163488 +n03171910 +n09955406 +n03266195 +n10217208 +n09338013 +n07594250 +n03215930 +n09725935 +n10592049 +n03732658 +n12498457 +n09966554 +n10668450 +n10361525 +n04060198 +n11936624 +n02602760 +n03942600 +n03708425 +n10020533 +n12067817 +n07590177 +n01891274 +n11837204 +n01419332 +n03860234 +n12616248 +n07834160 +n09867154 +n09788073 +n12222493 +n03388990 +n04245412 +n10182402 +n11675404 +n10450038 +n13045594 +n13158167 +n13082568 +n12052267 +n12707199 +n07810531 +n07914887 +n13127001 +n02573249 +n08619112 +n10471859 +n09919899 +n03635516 +n12067029 +n03352232 +n07765517 +n10519984 +n02742194 +n03062798 +n13124654 +n09958569 +n02370137 +n10121714 +n04019335 +n07732433 +n02559383 +n12585137 +n09729156 +n10744078 +n09954355 +n03078506 +n10062042 +n10688811 +n02668613 +n03142205 +n10347204 +n10518349 +n09898020 +n12563702 +n05468098 +n10116370 +n07838905 +n03127024 +n03545585 +n12801072 +n09940818 +n04480995 +n10466564 +n02606751 +n10032987 +n10771066 +n01587278 +n11852531 +n01455461 +n10397392 +n02349205 +n10180923 +n09778266 +n04366832 +n10051975 +n10538629 +n09865744 +n12554029 +n13118330 +n12952590 +n04187751 +n09924313 +n10062594 +n01980655 +n10028402 +n02567334 +n10590903 +n10265891 +n10739297 +n01457082 +n03437581 +n03713151 +n03475674 +n05464534 +n11863467 +n06592421 +n12491435 +n14914945 +n10279778 +n03388711 +n10483890 +n10612373 +n03332784 +n02332954 +n02952798 +n13041943 +n01607309 +n04356772 +n07711799 +n12670962 +n12229111 +n07878479 +n12401893 +n07772413 +n12138110 +n09781504 +n07902698 +n02750652 +n13042316 +n12400924 +n02304797 +n03066464 +n12852234 +n10155222 +n05541509 +n10711483 +n04210858 +n02835551 +n12859679 +n02935490 +n03540476 +n05279953 +n09807075 +n09617435 +n03566860 +n10549510 +n10025391 +n10754449 +n11927740 +n03554645 +n01837526 +n02656969 +n08648917 +n07860548 +n01452345 +n04021704 +n07783827 +n10080117 +n02187554 +n03214966 +n10036444 +n04291069 +n12407396 +n02170599 +n09896826 +n12417836 +n07845495 +n02749292 +n03061819 +n03682380 +n10756261 +n10369955 +n09692125 +n09978442 +n04277669 +n10539278 +n09703932 +n01879837 +n02746225 +n13159357 +n11763874 +n10540656 +n07933530 +n12987535 +n02371344 +n10654827 +n09723944 +n12775393 +n11856573 +n12626878 +n12716400 +n09903639 +n09784043 +n03906894 +n10775128 +n03124313 +n10396727 +n02841641 +n10211830 +n12283395 +n03490784 +n14175579 +n04027935 +n12396091 +n02609823 +n01414216 +n09880741 +n11976933 +n03073384 +n09270160 +n11768816 +n12073217 +n11597657 +n09994878 +n11756329 +n12579404 +n03161893 +n01451115 +n07736971 +n02949356 +n03878418 +n12653436 +n10626630 +n12777892 +n13061704 +n10498699 +n03609786 +n03199358 +n10776339 +n10762480 +n13179056 +n10113249 +n04029913 +n12640081 +n10493835 +n11683216 +n03524287 +n04585626 +n02969527 +n12976554 +n08569482 +n10204833 +n12442548 +n02577952 +n09357447 +n10202225 +n02198129 +n11882972 +n10404426 +n01600341 +n12016434 +n09867069 +n10576223 +n09893600 +n01702479 +n04274686 +n04406552 +n02848118 +n02258629 +n03260733 +n03685640 +n11751974 +n09967555 +n06274546 +n09649067 +n10681557 +n07606933 +n03110202 +n11982545 +n10803031 +n02679142 +n04086937 +n10514255 +n04506402 +n03884554 +n09970192 +n10117017 +n12642435 +n10186686 +n02097967 +n03956531 +n11834890 +n02677436 +n10040698 +n11796188 +n03348142 +n04168472 +n02294407 +n12483282 +n09429630 +n04423687 +n09819477 +n09755555 +n10157016 +n03344935 +n07762373 +n12871859 +n09853541 +n09875979 +n13050705 +n02251067 +n10637483 +n03823673 +n10357012 +n03424204 +n04431648 +n01475940 +n02339282 +n10248198 +n07683265 +n13150592 +n10359117 +n10096508 +n03473078 +n13052248 +n10743356 +n03710079 +n10634990 +n04507689 +n07921090 +n02352002 +n03924407 +n03609147 +n02837567 +n03406759 +n03909658 +n10286282 +n12135576 +n01912809 +n10801561 +n10717055 +n03473465 +n03761588 +n03144156 +n09474412 +n10253611 +n12549420 +n02499568 +n09910222 +n10431122 +n12699031 +n01697749 +n11786843 +n03888808 +n12089496 +n10066314 +n10302905 +n12696830 +n09965787 +n11969806 +n04066388 +n13080306 +n03913930 +n09968259 +n10490421 +n10714195 +n07570021 +n10343449 +n10401204 +n03472796 +n10779897 +n11787190 +n03503097 +n10439523 +n12123648 +n04279858 +n10511771 +n09755788 +n08253141 +n02616397 +n12248574 +n01645466 +n04334504 +n07729142 +n05451099 +n10503818 +n10354265 +n09707735 +n02633422 +n11999656 +n01324916 +n02088745 +n09354511 +n10705448 +n09756195 +n10136615 +n10427359 +n09702134 +n12600095 +n04122262 +n10791820 +n03330002 +n02713496 +n11710658 +n09664908 +n02550203 +n02349847 +n12835766 +n04098260 +n11536567 +n11686780 +n12875861 +n12758471 +n09806944 +n11810030 +n10400003 +n10098388 +n11663263 +n10559683 +n07833672 +n10753989 +n10643095 +n01988869 +n03112240 +n12911914 +n09979913 +n09785236 +n09790047 +n02676097 +n01653509 +n04601159 +n01938735 +n10748142 +n12978076 +n11990627 +n10437262 +n12972136 +n04077594 +n10148825 +n02269340 +n12886185 +n03608504 +n11677485 +n10612518 +n12267265 +n10649308 +n05458173 +n10650162 +n03213361 +n02747063 +n01611674 +n02322992 +n01554017 +n03512624 +n12773142 +n12747120 +n09902128 +n03162714 +n03924532 +n10299125 +n12378753 +n02778131 +n09976024 +n13093629 +n10778999 +n07721833 +n12232851 +n07876775 +n10097590 +n03194170 +n13029122 +n04573832 +n12859272 +n09639382 +n07688021 +n02878796 +n10751710 +n03633632 +n07762534 +n10779995 +n13914265 +n13093275 +n10729330 +n10433077 +n03663910 +n10499110 +n02272286 +n10371450 +n01967308 +n12633061 +n11659627 +n12982915 +n10344121 +n10268629 +n02697876 +n09879552 +n10167361 +n10719807 +n04042076 +n01632952 +n03243625 +n02125872 +n10105906 +n12194613 +n03149810 +n10721124 +n03947343 +n02020219 +n10122531 +n01315330 +n08647264 +n00452734 +n03607527 +n10010243 +n09863749 +n04473275 +n11782878 +n03585337 +n09655466 +n12989007 +n11711971 +n10716864 +n10475835 +n10704712 +n01894956 +n10568443 +n12881105 +n10387836 +n10403633 +n08645318 +n03500457 +n10377633 +n10108464 +n09933972 +n02618094 +n11798688 +n04155735 +n09780395 +n12822466 +n04302200 +n11899223 +n10633298 +n02760298 +n12142450 +n10803282 +n10769321 +n10514051 +n10597889 +n11837562 +n02261757 +n01458746 +n09830759 +n10003476 +n09817174 +n10738670 +n10118743 +n12096563 +n03054491 +n12155773 +n10439727 +n04170384 +n03223923 +n12632733 +n07845421 +n10062905 +n11831521 +n04267985 +n12796385 +n04154854 +n00444142 +n09778537 +n03115663 +n04385157 +n10109826 +n02337332 +n09996304 +n09880189 +n12871696 +n11823305 +n02516776 +n12377494 +n08511017 +n04421417 +n10765305 +n09675673 +n03488111 +n03076623 +n11829672 +n10292316 +n10758949 +n13031474 +n02829353 +n10090745 +n09186592 +n12736999 +n12715195 +n11684499 +n03168933 +n09890192 +n10596899 +n12527081 +n10496393 +n10497135 +n02137302 +n03266620 +n12958615 +n12664187 +n02633977 +n04262869 +n04215800 +n13133233 +n02392555 +n09858733 +n10186350 +n01715888 +n03142099 +n08573674 +n11687071 +n02690715 +n03146342 +n12331788 +n08079613 +n10609092 +n12943049 +n12234318 +n02312325 +n12618146 +n10135197 +n11705573 +n02794368 +n02850358 +n09464486 +n01993525 +n03187153 +n10097262 +n02976641 +n12198793 +n12941717 +n10219240 +n12434634 +n03827420 +n10437137 +n10342893 +n04174026 +n10265281 +n07757874 +n10765885 +n01470895 +n02349557 +n11716698 +n03765467 +n10227793 +n07824268 +n12994892 +n10486236 +n02974454 +n10718349 +n11726145 +n09909660 +n03378593 +n07805006 +n09875025 +n02645691 +n10223069 +n03722944 +n04389999 +n02544274 +n10239928 +n04456011 +n10382302 +n01552333 +n10082562 +n12952469 +n09883047 +n10442573 +n01891013 +n10690268 +n13111504 +n02287352 +n03567635 +n10331347 +n09762385 +n09933842 +n02369555 +n12291459 +n09919200 +n01492860 +n02067768 +n10713254 +n10550468 +n12846335 +n03835729 +n12467018 +n11676743 +n03629643 +n12987423 +n10655730 +n08678783 +n10349836 +n10087736 +n10246703 +n10338391 +n04585456 +n04158138 +n10500942 +n09850974 +n10791890 +n10020807 +n03315805 +n02752917 +n04033801 +n10492086 +n04427473 +n02940706 +n12110475 +n09832978 +n12515393 +n07800487 +n09848110 +n02659176 +n09967406 +n10536134 +n10760622 +n09736485 +n07830690 +n07835173 +n09814252 +n10311506 +n10341955 +n03869838 +n07760673 +n09970402 +n12526178 +n11687964 +n09968741 +n10719267 +n07851054 +n10116478 +n10599215 +n09951524 +n03855908 +n03997274 +n02986348 +n08599292 +n02474282 +n04155889 +n09983314 +n01987727 +n10280130 +n10404998 +n02294577 +n02998696 +n08586978 +n11652578 +n13867005 +n12663254 +n10524869 +n02287622 +n10220924 +n03279918 +n02626089 +n10291110 +n12820669 +n07861681 +n08643267 +n07720185 +n12555859 +n03225616 +n09769525 +n03295140 +n12489046 +n10615179 +n12150969 +n02888429 +n10753182 +n10267166 +n03675558 +n12693352 +n02378299 +n02788462 +n03622401 +n12236977 +n10730542 +n12758099 +n10502046 +n11937195 +n10366145 +n10307114 +n12984595 +n10128748 +n09362316 +n09789898 +n09654079 +n04260192 +n10114476 +n08623676 +n10331841 +n05265736 +n10269289 +n03090856 +n12764978 +n02825240 +n10358032 +n09825750 +n03062651 +n11196627 +n11825749 +n04148464 +n04439505 +n07572858 +n04561857 +n12904562 +n03643907 +n10723597 +n01492708 +n10071557 +n10140683 +n01739871 +n12984267 +n03072056 +n10772580 +n10462588 +n11936448 +n10494373 +n12845908 +n09793352 +n10717196 +n12577362 +n09779124 +n10663549 +n02286425 +n10380126 +n01890144 +n02751490 +n03361109 +n01781875 +n13128278 +n09994400 +n09883452 +n13881512 +n02833275 +n10362003 +n01376543 +n12366675 +n09984960 +n10173665 +n10673776 +n02057898 +n01934844 +n04057673 +n10018747 +n02916065 +n13024653 +n05539947 +n09648911 +n04150273 +n01393486 +n10411356 +n12232114 +n02436224 +n12757930 +n03095965 +n10555059 +n01577458 +n09666476 +n10598904 +n11656549 +n02591911 +n13092385 +n10506336 +n13103023 +n09658254 +n04095938 +n11936782 +n07824383 +n09781650 +n10240821 +n01780426 +n02850060 +n02863340 +n13914141 +n12138578 +n13034555 +n12291671 +n12133151 +n04515444 +n04591359 +n02589196 +n02689819 +n11740414 +n07610295 +n10246395 +n09921034 +n12447346 +n12641180 +n01419573 +n04242587 +n07760395 +n03399579 +n09866661 +n02549376 +n11861238 +n01588996 +n04319545 +n09789150 +n03288643 +n10312491 +n03353281 +n02345997 +n09711132 +n03043173 +n02558860 +n03703590 +n03188871 +n12589142 +n12113323 +n09987161 +n05242239 +n09686262 +n09780984 +n09668199 +n09716933 +n11675738 +n04459243 +n11833749 +n10646942 +n07760070 +n10286539 +n04469684 +n13030616 +n03939440 +n01725086 +n09967816 +n10500824 +n13026015 +n03983928 +n02936921 +n04115542 +n10245029 +n12105828 +n12452673 +n10498046 +n10737264 +n11766046 +n04079603 +n10072054 +n12569037 +n10153155 +n09867311 +n02806992 +n10258602 +n10164025 +n10520964 +n02258508 +n12199399 +n05266096 +n08496334 +n10351064 +n12441552 +n12878325 +n13102648 +n02980625 +n03462972 +n12395906 +n13022903 +n11895714 +n03324814 +n11318824 +n01728266 +n07883510 +n10731013 +n10181799 +n12142357 +n09671089 +n11531334 +n01718414 +n04573625 +n10390600 +n11553522 +n01314910 +n04227519 +n10514784 +n02944256 +n12103680 +n03081859 +n11655592 +n12569616 +n10700105 +n09755086 +n03865820 +n01456137 +n10442232 +n02900987 +n04491934 +n07849026 +n04519728 +n09986450 +n03305300 +n10186143 +n02879422 +n03018614 +n10747548 +n10562509 +n10068425 +n12593341 +n11937692 +n08679562 +n09613690 +n10646433 +n12251740 +n10994097 +n13048447 +n03848537 +n12153741 +n12614096 +n11654438 +n09985470 +n10562968 +n02923915 +n10740594 +n07802767 +n12514592 +n10335801 +n03878674 +n12586499 +n10255459 +n02413824 +n10312600 +n02616128 +n12644283 +n04238953 +n04526520 +n01898593 +n09737161 +n03372822 +n09781398 +n10339251 +n02502807 +n10198832 +n10679610 +n13136781 +n11974373 +n11680457 +n10083677 +n04037298 +n09945021 +n09987239 +n02708885 +n13107807 +n10130877 +n12507379 +n08651104 +n12116058 +n10135297 +n04269086 +n03858533 +n10477955 +n04394031 +n10442417 +n10074735 +n03618797 +n03460455 +n04374521 +n10756061 +n08517010 +n12923108 +n02362194 +n01704103 +n10062492 +n01394771 +n10473789 +n10330593 +n02748183 +n12562141 +n09745933 +n02505485 +n11922661 +n12018014 +n09866922 +n04067143 +n13161254 +n07813495 +n01374846 +n10213429 +n03253071 +n02546028 +n01642097 +n01475232 +n03212247 +n10155600 +n11689957 +n11738997 +n10525878 +n03301389 +n10589666 +n01908958 +n10289766 +n03900028 +n03437295 +n02987823 +n02739123 +n10505347 +n02546627 +n10381804 +n10132502 +n10336904 +n10189597 +n09786115 +n12875697 +n10761519 +n01470733 +n02875626 +n12111238 +n07862770 +n07856895 +n09996039 +n03368048 +n07913300 +n10062996 +n10555430 +n04302863 +n12758555 +n10740732 +n02385898 +n02385098 +n12162758 +n03887899 +n03976268 +n04234160 +n03641947 +n07857076 +n10578656 +n12135729 +n12675515 +n09032191 +n12969670 +n02600503 +n12518013 +n10227166 +n10121026 +n01801672 +n10661216 +n03244388 +n04147291 +n09664556 +n02539573 +n04480141 +n10601362 +n02613572 +n10537906 +n02613820 +n11656771 +n03841011 +n02845985 +n12534208 +n10241024 +n03645290 +n12743976 +n11922839 +n07709701 +n03066232 +n03467380 +n09266604 +n09663786 +n12775070 +n02427183 +n04083113 +n12896615 +n10501453 +n02345774 +n09965515 +n09704157 +n10666752 +n03846970 +n04167661 +n03991321 +n09556121 +n10686517 +n02586238 +n03594277 +n03591313 +n10391416 +n10756837 +n13163649 +n03971960 +n10245341 +n02577041 +n04481642 +n12373739 +n10214062 +n10091997 +n10275848 +n02090253 +n03514340 +n04593629 +n11795216 +n03126927 +n11871748 +n10272782 +n12056099 +n04484024 +n03101375 +n12255225 +n10724372 +n10531838 +n02354781 +n02389865 +n02853336 +n01477080 +n01779939 +n10776052 +n10724132 +n10284871 +n10554141 +n03898787 +n02366301 +n10721612 +n04421740 +n04256758 +n01445593 +n10103921 +n02729222 +n02530188 +n02387452 +n02601921 +n01711160 +n02474110 +n09869447 +n12789977 +n10158506 +n10396908 +n07839593 +n02662825 +n02473720 +n13034788 +n07752602 +n03762238 +n10262880 +n07770180 +n04030054 +n10151367 +n03525252 +n10252075 +n10747424 +n10191388 +n04130566 +n03951068 +n13239921 +n03733547 +n10358124 +n11549779 +n09203827 +n04043168 +n10359422 +n04286960 +n04237287 +n10130686 +n02338449 +n12912274 +n10586998 +n02812785 +n10364502 +n03955941 +n12324222 +n09743601 +n03766600 +n01427399 +n12968309 +n11776234 +n01501777 +n10051026 +n10397001 +n01516212 +n02596252 +n02225081 +n10479328 +n02109687 +n10181445 +n02248062 +n03802973 +n01639187 +n02142734 +n02342534 +n02410141 +n02743426 +n03950359 +n12253835 +n07805478 +n03706415 +n03578981 +n04560619 +n09761753 +n03524425 +n01962788 +n04350235 +n10686694 +n13139321 +n10195155 +n12335937 +n12758399 +n03805374 +n12895298 +n03800371 +n11972959 +n11530008 +n03178538 +n02217839 +n10591072 +n04033557 +n01880813 +n12292877 +n02430643 +n07599383 +n01954516 +n09894909 +n02474605 +n03576443 +n07595051 +n03367875 +n12945549 +n02360480 +n14583400 +n04208582 +n02405577 +n02550655 +n02513355 +n04381450 +n00444490 +n03567912 +n09937688 +n07932323 +n04029416 +n01913346 +n13237508 +n04437276 +n12938445 +n03042384 +n12543639 +n03194992 +n04094250 +n12045514 +n03825913 +n03504293 +n12758250 +n03547861 +n03649288 +n04572235 +n07569423 +n03534695 +n03253714 +n01501641 +n13906767 +n12578255 +n11749603 +n07742513 +n07609083 +n04214413 +n07595751 +n12013701 +n12592839 +n12949160 +n04093223 +n02983072 +n03510072 +n02966068 +n03867854 +n01747285 +n10691318 +n13091982 +n12574470 +n02255023 +n03449217 +n03153585 +n04006227 +n13140049 +n02965024 +n03805503 +n03911406 +n13120958 +n12203699 +n01456454 +n10397142 +n12920043 +n02412977 +n08674344 +n07801007 +n03037590 +n10361296 +n13133316 +n03483637 +n04435759 +n12983873 +n02627037 +n03783304 +n07725158 +n02921292 +n01788864 +n01705010 +n12616996 +n03903290 +n08662427 +n03667060 +n07856992 +n03252422 +n02449699 +n12137954 +n10024025 +n07891095 +n04337157 +n04368109 +n03015631 +n02363996 +n12824289 +n03206602 +n12799269 +n02333733 +n01793565 +n01721898 +n03178173 +n02844056 +n11688378 +n13889066 +n02637475 +n03750437 +n01403457 +n01717229 +n02677136 +n12512294 +n03736269 +n02838577 +n08661878 +n01993830 +n02777638 +n02900857 +n04023021 +n03843092 +n07770439 +n12928491 +n03697812 +n02639922 +n13139482 +n07771082 +n12487058 +n07774182 +n02122810 +n02856362 +n11686195 +n11687432 +n02853870 +n04239218 +n02665250 +n02938218 +n11746600 +n10183347 +n10681194 +n04164199 +n04407257 +n12549005 +n02331842 +n03862379 +n02863638 +n11962994 +n03091907 +n04177654 +n02252972 +n02403153 +n01376437 +n02848806 +n08579266 +n07616265 +n10331258 +n10765587 +n09433312 +n03412387 +n10178077 +n13123841 +n02532918 +n04144651 +n03296963 +n03450881 +n04348988 +n10425946 +n03257065 +n02354320 +n11689197 +n04084682 +n10140783 +n03637027 +n02346170 +n02559144 +n01705591 +n09400584 +n03840327 +n03918074 +n04053767 +n02406046 +n00288190 +n03160001 +n03366464 +n09249155 +n01324305 +n07556872 +n03381565 +n12705220 +n11874878 +n02632494 +n02502006 +n03146560 +n02179340 +n04312756 +n10162016 +n03800563 +n04140853 +n07933652 +n03075248 +n04421582 +n10652703 +n02218134 +n12233249 +n04578559 +n01781071 +n02615298 +n04436832 +n04054566 +n02608284 +n11674019 +n03505764 +n02662397 +n09422190 +n04382537 +n04355684 +n04383923 +n09888635 +n03783575 +n03228796 +n07772026 +n02381119 +n15060326 +n10586166 +n12647787 +n02458517 +n10281546 +n03498866 +n02485988 +n10121246 +n09391644 +n03103904 +n08676253 +n02203978 +n04092168 +n03213014 +n03138217 +n04135933 +n12612811 +n04478066 +n02157285 +n02543255 +n03863783 +n01502101 +n03930229 +n12439830 +n09425019 +n02618513 +n02910241 +n12261359 +n03648667 +n04365229 +n03461651 +n04388040 +n03295928 +n03581531 +n04203356 +n02622249 +n13142907 +n04497249 +n11678377 +n02366579 +n02931013 +n02837134 +n03132438 +n13092987 +n04196803 +n03056215 +n03255322 +n02130925 +n10291469 +n02971940 +n01718096 +n12510774 +n11766432 +n04271891 +n03366721 +n03154616 +n03694356 +n10478293 +n11763142 +n07763483 +n03037228 +n09201998 +n01517389 +n00443517 +n12693244 +n03580990 +n03519848 +n10238375 +n10783646 +n03564849 +n03975926 +n02473554 +n02450426 +n03464952 +n04411835 +n04573045 +n10505732 +n04337650 +n10621514 +n10334782 +n12434985 +n07769102 +n10594523 +n05475397 +n01875610 +n03299406 +n10507692 +n02593679 +n03317233 +n13239736 +n03550420 +n03247351 +n03819047 +n03633341 +n03154745 +n04073425 +n04532022 +n02910964 +n04301242 +n04378651 +n13098515 +n11775626 +n14603798 +n10263146 +n01886045 +n03761731 +n02224713 +n04591249 +n02144251 +n03849412 +n11548728 +n04051705 +n12298165 +n03150795 +n03989447 +n02826459 +n07602650 +n03155915 +n09891730 +n02067603 +n01523105 +n03618339 +n03897130 +n02711780 +n05285623 +n03533486 +n04085873 +n01923404 +n10139077 +n01709484 +n02183507 +n03216562 +n01971850 +n03136051 +n02948834 +n03589313 +n03665851 +n02937336 +n02035656 +n07769465 +n07849186 +n12585373 +n12280364 +n02846260 +n02511730 +n02614653 +n04193179 +n11718681 +n09467696 +n01522450 +n03040836 +n03162297 +n11896141 +n04000480 +n10350220 +n07746038 +n02124157 +n10655169 +n03476542 +n03895038 +n00443917 +n07757753 +n01726203 +n02987706 +n12750076 +n03012734 +n02941228 +n04194009 +n04501127 +n09794550 +n03510487 +n08589670 +n03166951 +n03673270 +n09792125 +n08492354 +n02396157 +n01628331 +n03993878 +n07833816 +n04958865 +n13650447 +n04339191 +n02826683 +n02893269 +n02810139 +n02626471 +n02589796 +n08677801 +n04325968 +n03275864 +n02622547 +n04406687 +n04097085 +n02998107 +n07831450 +n03658102 +n02575590 +n03523398 +n02412909 +n02953850 +n04337503 +n03510987 +n12664005 +n03710294 +n13138155 +n10110093 +n07831955 +n03932080 +n12971804 +n03943623 +n03726371 +n10531445 +n12984489 +n07835051 +n12097556 +n02685701 +n03038041 +n02451125 +n04594919 +n02372140 +n02665985 +n03496183 +n03961828 +n03802800 +n01713170 +n03602790 +n04974145 +n02780588 +n04031884 +n03588216 +n02614140 +n04578708 +n04501281 +n03166600 +n03992975 +n04206070 +n03227721 +n02582349 +n02664642 +n07805389 +n09226869 +n02459190 +n12216968 +n03628984 +n02524928 +n09209025 +n04078002 +n03167153 +n03562565 +n07599554 +n10252547 +n03279804 +n07692887 +n14909584 +n02529293 +n04444953 +n04156814 +n07616174 +n03415626 +n03331244 +n03868324 +n03644073 +n02818687 +n10085101 +n02953056 +n03202481 +n02118707 +n03591901 +n12602434 +n02943465 +n02818254 +n07922607 +n02597004 +n04212810 +n04056073 +n12327528 +n02207647 +n01792808 +n03002555 +n03951213 +n12242123 +n10062275 +n12325787 +n10048117 +n11937278 +n03624767 +n04039041 +n04059298 +n03707171 +n07758407 +n01333483 +n02219015 +n02436645 +n02478239 +n04457638 +n01781698 +n09474765 +n03686363 +n10769084 +n09456207 +n02385776 +n13555775 +n03962685 +n13129078 +n03463185 +n01429172 +n04243251 +n12177129 +n03143754 +n03958338 +n02791795 +n04560502 +n12776774 +n02745816 +n03009111 +n02976552 +n03008817 +n03211413 +n03537550 +n12200504 +n01909788 +n11790089 +n03480973 +n10507070 +n01707294 +n04374907 +n04281571 +n00006024 +n03823906 +n12603273 +n03503358 +n04027820 +n12645530 +n02535080 +n04143365 +n08385989 +n12661227 +n12814857 +n11871059 +n04268418 +n13128582 +n01928865 +n04359124 +n12670334 +n03610836 +n04543924 +n02252799 +n15102359 +n04437380 +n04316924 +n11872324 +n09330378 +n10122300 +n03784139 +n00443375 +n14993378 +n01721174 +n00004475 +n00006484 +n00007846 +n00015388 +n00017222 +n00021265 +n00021939 +n00288000 +n00433458 +n00433661 +n00433802 +n00439826 +n00440218 +n00440509 +n00440747 +n00441824 +n00442115 +n00442981 +n00443231 +n00444651 +n00445351 +n00445802 +n00447073 +n00447221 +n00447540 +n00448466 +n00448640 +n00448958 +n00449295 +n00449695 +n00450335 +n00450700 +n00451370 +n00451866 +n00452293 +n00453935 +n00454237 +n00454624 +n00463246 +n00464651 +n00464894 +n00467719 +n00467995 +n00468480 +n00469651 +n00471437 +n00471613 +n00479076 +n00480508 +n00480993 +n00482298 +n00523513 +n01035667 +n01316422 +n01316579 +n01316949 +n01317089 +n01317391 +n01317541 +n01319467 +n01320872 +n01321230 +n01321579 +n01321854 +n01322343 +n01322685 +n01322898 +n01323068 +n01326291 +n01329186 +n01338685 +n01339336 +n01340935 +n01342269 +n01358572 +n01367772 +n01375204 +n01376237 +n01380610 +n01384687 +n01385330 +n01387065 +n01389507 +n01390123 +n01392380 +n01395254 +n01397114 +n01402600 +n01407798 +n01421807 +n01438208 +n01439121 +n01439514 +n01439808 +n01441425 +n01444783 +n01445429 +n01446589 +n01446760 +n01448951 +n01450661 +n01454856 +n01455778 +n01458842 +n01459791 +n01461646 +n01466257 +n01467336 +n01468238 +n01468712 +n01471682 +n01473806 +n01474283 +n01477525 +n01478511 +n01480516 +n01480880 +n01481331 +n01482071 +n01482330 +n01483522 +n01484097 +n01488918 +n01491874 +n01492357 +n01493541 +n01494757 +n01494882 +n01495006 +n01495701 +n01497118 +n01498406 +n01498699 +n01498989 +n01500091 +n01501160 +n01503061 +n01514752 +n01515078 +n01517565 +n01524359 +n01525720 +n01527194 +n01527617 +n01528654 +n01529672 +n01533339 +n01534582 +n01534762 +n01537134 +n01538955 +n01539573 +n01540233 +n01541922 +n01542786 +n01544208 +n01546921 +n01547832 +n01548301 +n01549430 +n01550761 +n01553142 +n01555809 +n01557185 +n01560105 +n01560636 +n01563128 +n01563746 +n01564394 +n01567133 +n01568132 +n01569836 +n01570676 +n01571904 +n01572328 +n01573074 +n01574045 +n01574390 +n01575745 +n01576695 +n01577659 +n01578575 +n01579028 +n01580379 +n01580490 +n01580772 +n01580870 +n01581166 +n01581434 +n01581730 +n01582398 +n01582498 +n01582856 +n01584225 +n01585121 +n01587834 +n01588431 +n01589286 +n01591697 +n01592257 +n01592540 +n01594372 +n01595624 +n01597336 +n01598588 +n01598988 +n01600085 +n01600657 +n01602080 +n01602209 +n01602630 +n01603600 +n01604330 +n01605630 +n01608814 +n01609062 +n01609391 +n01609751 +n01610955 +n01611472 +n01612628 +n01613294 +n01613615 +n01615121 +n01616551 +n01616764 +n01617095 +n01617443 +n01617766 +n01618082 +n01618922 +n01619310 +n01619536 +n01619835 +n01620135 +n01620414 +n01620735 +n01621127 +n01622352 +n01623706 +n01627424 +n01629276 +n01630284 +n01631175 +n01632047 +n01637112 +n01637932 +n01639765 +n01640846 +n01645776 +n01649170 +n01650167 +n01651487 +n01653773 +n01661091 +n01661592 +n01661818 +n01662622 +n01662784 +n01663401 +n01664369 +n01665932 +n01667432 +n01668091 +n01669372 +n01670092 +n01672032 +n01674216 +n01674464 +n01674990 +n01676755 +n01680264 +n01680478 +n01681940 +n01684133 +n01685439 +n01686044 +n01686220 +n01686403 +n01686609 +n01686808 +n01687665 +n01688961 +n01689411 +n01691951 +n01692864 +n01693783 +n01694709 +n01696633 +n01697178 +n01698434 +n01699040 +n01701551 +n01702256 +n01703011 +n01703569 +n01705934 +n01708106 +n01708998 +n01712008 +n01712752 +n01717016 +n01719403 +n01722998 +n01724231 +n01726692 +n01727646 +n01730185 +n01730307 +n01730812 +n01730960 +n01731545 +n01732244 +n01733757 +n01734637 +n01734808 +n01735439 +n01735577 +n01735728 +n01737472 +n01737728 +n01737875 +n01738065 +n01738601 +n01739647 +n01740551 +n01741232 +n01741562 +n01741943 +n01743605 +n01745125 +n01745484 +n01746359 +n01747885 +n01749582 +n01749742 +n01751036 +n01752165 +n01753959 +n01754876 +n01755740 +n01767661 +n01769347 +n01770795 +n01771417 +n01772222 +n01775370 +n01776192 +n01776705 +n01777304 +n01777467 +n01777649 +n01777909 +n01778217 +n01778487 +n01778621 +n01778801 +n01779148 +n01779463 +n01779629 +n01780696 +n01782209 +n01785667 +n01789386 +n01789740 +n01791107 +n01791625 +n01792158 +n01792640 +n01794158 +n01795088 +n01795735 +n01795900 +n01796019 +n01796105 +n01796519 +n01796729 +n01798706 +n01798839 +n01798979 +n01799302 +n01800424 +n01801088 +n01801479 +n01802721 +n01803078 +n01804478 +n01804653 +n01804921 +n01805070 +n01805801 +n01806847 +n01807828 +n01808140 +n01808291 +n01808596 +n01809106 +n01810700 +n01811909 +n01812337 +n01813385 +n01814370 +n01814921 +n01815601 +n01816887 +n01819115 +n01820348 +n01820801 +n01821076 +n01821203 +n01822602 +n01823013 +n01824749 +n01825930 +n01826364 +n01827403 +n01829869 +n01831712 +n01832167 +n01834177 +n01834540 +n01835276 +n01838038 +n01838598 +n01839598 +n01841102 +n01843719 +n01844231 +n01844551 +n01844917 +n01845132 +n01845477 +n01846331 +n01848123 +n01848648 +n01849466 +n01850373 +n01851375 +n01852142 +n01852861 +n01853498 +n01854415 +n01856072 +n01856155 +n01856380 +n01856553 +n01856890 +n01857079 +n01857325 +n01857512 +n01857632 +n01857851 +n01858441 +n01859496 +n01860497 +n01861148 +n01861778 +n01871543 +n01871875 +n01874434 +n01874928 +n01876326 +n01877134 +n01878061 +n01878335 +n01878639 +n01878929 +n01879217 +n01879509 +n01880152 +n01881171 +n01883513 +n01883920 +n01886756 +n01887896 +n01888264 +n01889074 +n01889520 +n01890860 +n01891633 +n01892551 +n01894207 +n01905661 +n01906749 +n01907738 +n01909422 +n01911403 +n01911839 +n01912454 +n01914163 +n01914830 +n01915811 +n01916187 +n01916925 +n01918744 +n01922303 +n01925270 +n01925695 +n01926379 +n01926689 +n01927159 +n01927456 +n01927928 +n01928215 +n01930852 +n01931140 +n01931520 +n01931714 +n01932151 +n01932936 +n01933151 +n01933478 +n01933988 +n01934440 +n01935176 +n01936391 +n01937909 +n01940736 +n01941223 +n01942177 +n01942869 +n01943541 +n01944118 +n01944812 +n01944955 +n01945143 +n01945340 +n01945845 +n01946277 +n01948573 +n01951613 +n01953361 +n01955933 +n01956481 +n01958038 +n01959985 +n01960459 +n01963571 +n01964049 +n01964441 +n01965889 +n01967094 +n01968315 +n01969726 +n01971094 +n01971280 +n01974773 +n01975687 +n01976146 +n01976957 +n01978930 +n01981702 +n01982650 +n01983048 +n01985493 +n01985797 +n01986806 +n01987545 +n01988701 +n01989869 +n01990007 +n01991028 +n01991520 +n01992262 +n01992423 +n01992773 +n01996585 +n01998183 +n02000954 +n02002075 +n02005790 +n02006985 +n02007284 +n02008041 +n02008796 +n02010453 +n02011805 +n02011943 +n02012185 +n02013177 +n02014941 +n02015554 +n02016358 +n02016956 +n02018027 +n02019190 +n02019438 +n02019929 +n02021050 +n02021795 +n02022684 +n02023341 +n02025043 +n02026059 +n02028175 +n02030035 +n02030287 +n02030996 +n02031934 +n02033208 +n02033324 +n02033561 +n02034129 +n02034661 +n02036053 +n02037464 +n02039171 +n02040505 +n02041085 +n02041246 +n02043063 +n02044178 +n02044778 +n02045369 +n02046759 +n02047260 +n02047614 +n02048698 +n02049532 +n02050004 +n02051474 +n02052204 +n02052365 +n02053083 +n02054502 +n02055658 +n02055803 +n02057731 +n02058594 +n02058747 +n02059162 +n02060411 +n02060889 +n02062017 +n02062430 +n02062744 +n02063224 +n02064338 +n02066707 +n02068206 +n02068974 +n02069701 +n02070430 +n02073250 +n02075296 +n02075927 +n02076196 +n02076779 +n02077152 +n02077658 +n02078292 +n02078574 +n02078738 +n02079005 +n02079389 +n02081571 +n02083346 +n02083672 +n02084071 +n02084861 +n02085374 +n02086346 +n02086478 +n02087122 +n02087551 +n02088839 +n02089232 +n02089555 +n02090475 +n02090827 +n02092468 +n02093056 +n02094562 +n02094721 +n02095050 +n02095412 +n02095727 +n02096756 +n02097786 +n02098550 +n02099029 +n02099997 +n02100399 +n02101108 +n02101861 +n02102605 +n02103406 +n02103841 +n02104523 +n02104882 +n02106966 +n02107420 +n02108254 +n02108672 +n02109150 +n02109811 +n02110532 +n02111626 +n02112497 +n02112826 +n02113335 +n02114100 +n02115012 +n02115335 +n02117512 +n02117646 +n02117900 +n02118176 +n02118333 +n02119247 +n02119359 +n02120278 +n02120997 +n02121620 +n02121808 +n02122725 +n02123785 +n02124623 +n02127292 +n02127381 +n02127482 +n02127586 +n02127678 +n02127808 +n02128598 +n02128669 +n02129463 +n02129530 +n02129837 +n02129923 +n02130545 +n02131653 +n02132466 +n02132580 +n02132788 +n02133400 +n02134971 +n02135220 +n02137722 +n02137888 +n02138647 +n02138777 +n02139199 +n02139671 +n02141306 +n02141713 +n02144593 +n02145424 +n02148835 +n02149420 +n02150482 +n02152740 +n02152991 +n02153203 +n02153809 +n02156732 +n02159955 +n02164464 +n02165877 +n02166229 +n02166567 +n02166826 +n02167505 +n02167820 +n02167944 +n02168245 +n02169023 +n02169218 +n02169705 +n02169974 +n02170400 +n02170993 +n02171453 +n02171869 +n02172518 +n02172678 +n02172761 +n02172870 +n02174355 +n02176261 +n02178411 +n02178717 +n02179012 +n02180233 +n02181235 +n02181724 +n02182045 +n02182355 +n02182642 +n02182930 +n02183857 +n02186153 +n02188699 +n02189363 +n02190790 +n02191273 +n02191773 +n02191979 +n02192252 +n02192513 +n02192814 +n02193009 +n02193163 +n02194249 +n02194750 +n02195091 +n02195526 +n02195819 +n02196344 +n02198532 +n02199502 +n02200198 +n02202287 +n02204722 +n02206270 +n02207179 +n02207449 +n02208280 +n02208498 +n02208848 +n02208979 +n02209111 +n02209354 +n02209624 +n02209964 +n02210427 +n02210921 +n02211444 +n02211627 +n02211896 +n02212062 +n02212602 +n02212958 +n02213107 +n02215161 +n02215770 +n02217563 +n02218713 +n02220055 +n02220225 +n02220518 +n02220804 +n02221083 +n02221414 +n02221571 +n02221715 +n02221820 +n02222035 +n02222321 +n02222582 +n02223266 +n02223520 +n02226183 +n02226821 +n02226970 +n02227247 +n02227604 +n02227966 +n02228341 +n02228697 +n02229156 +n02229765 +n02230023 +n02230187 +n02230480 +n02230634 +n02231052 +n02231803 +n02233943 +n02234355 +n02234570 +n02234848 +n02235205 +n02236241 +n02236896 +n02237424 +n02237581 +n02238235 +n02238887 +n02239528 +n02241569 +n02241799 +n02243562 +n02244173 +n02246011 +n02246628 +n02247511 +n02248368 +n02248510 +n02248887 +n02249515 +n02250822 +n02251775 +n02252226 +n02253127 +n02253715 +n02254697 +n02257003 +n02257284 +n02257715 +n02259377 +n02260421 +n02260863 +n02261419 +n02262178 +n02263378 +n02264885 +n02265330 +n02268148 +n02269196 +n02269522 +n02270011 +n02270200 +n02271570 +n02271897 +n02272871 +n02274024 +n02274259 +n02274822 +n02278210 +n02278839 +n02279637 +n02280458 +n02281015 +n02281136 +n02281267 +n02282257 +n02282385 +n02282553 +n02282903 +n02283077 +n02283201 +n02283951 +n02285548 +n02287004 +n02287799 +n02288789 +n02291220 +n02291572 +n02291748 +n02293352 +n02293868 +n02295064 +n02295390 +n02295870 +n02298541 +n02300173 +n02301452 +n02302459 +n02302620 +n02305407 +n02306433 +n02307325 +n02308139 +n02308471 +n02309337 +n02310000 +n02310717 +n02311060 +n02312006 +n02312427 +n02313008 +n02316707 +n02318167 +n02319308 +n02319555 +n02319829 +n02320127 +n02322047 +n02323449 +n02323902 +n02324045 +n02325722 +n02325884 +n02326074 +n02326763 +n02326862 +n02327028 +n02327175 +n02327435 +n02327656 +n02327842 +n02328429 +n02329401 +n02330245 +n02331046 +n02331309 +n02332755 +n02333546 +n02334460 +n02335127 +n02336011 +n02336641 +n02338901 +n02339376 +n02339922 +n02343058 +n02343320 +n02343772 +n02344528 +n02345600 +n02346998 +n02347274 +n02347573 +n02347744 +n02348173 +n02348788 +n02350105 +n02350989 +n02351870 +n02352591 +n02353861 +n02355227 +n02355477 +n02358091 +n02359324 +n02360781 +n02361587 +n02361706 +n02361850 +n02363245 +n02363351 +n02364520 +n02369680 +n02370806 +n02372584 +n02373336 +n02374149 +n02374451 +n02376542 +n02376791 +n02376918 +n02377181 +n02377480 +n02377703 +n02378415 +n02380335 +n02380583 +n02380745 +n02381460 +n02382437 +n02382948 +n02384858 +n02386014 +n02386310 +n02386496 +n02388276 +n02389346 +n02389559 +n02390454 +n02390834 +n02391234 +n02391373 +n02391508 +n02391994 +n02393580 +n02394477 +n02395003 +n02395694 +n02395931 +n02397529 +n02399000 +n02401031 +n02402010 +n02402175 +n02402425 +n02403325 +n02403454 +n02404186 +n02404573 +n02406174 +n02407959 +n02408660 +n02408817 +n02409870 +n02410702 +n02410900 +n02411705 +n02412440 +n02413131 +n02414578 +n02415435 +n02416519 +n02417070 +n02417534 +n02418064 +n02419796 +n02423218 +n02423362 +n02423589 +n02424305 +n02424695 +n02426813 +n02427724 +n02428349 +n02430045 +n02430559 +n02431122 +n02432511 +n02433546 +n02433925 +n02435853 +n02437136 +n02437971 +n02438173 +n02438272 +n02439033 +n02441326 +n02442172 +n02442336 +n02442446 +n02442572 +n02442668 +n02443015 +n02443346 +n02443808 +n02443959 +n02444251 +n02445004 +n02445171 +n02446206 +n02446352 +n02446645 +n02447021 +n02447762 +n02448060 +n02448633 +n02448885 +n02450034 +n02453108 +n02453611 +n02454794 +n02455135 +n02455428 +n02455720 +n02456008 +n02456275 +n02456962 +n02460009 +n02469914 +n02470325 +n02470899 +n02471300 +n02471762 +n02472293 +n02473307 +n02474777 +n02476219 +n02480153 +n02481103 +n02481235 +n02481366 +n02481500 +n02482060 +n02482286 +n02482474 +n02482650 +n02483092 +n02484322 +n02484473 +n02485225 +n02485371 +n02485536 +n02485688 +n02486657 +n02486908 +n02487079 +n02487547 +n02487675 +n02487847 +n02488003 +n02488415 +n02488894 +n02489589 +n02490597 +n02490811 +n02491107 +n02491329 +n02491474 +n02496913 +n02501583 +n02502514 +n02503127 +n02503517 +n02504770 +n02507649 +n02508021 +n02512053 +n02512938 +n02513560 +n02515214 +n02516188 +n02517442 +n02517938 +n02519148 +n02519686 +n02521646 +n02522399 +n02524524 +n02526425 +n02526818 +n02527057 +n02527271 +n02527622 +n02528163 +n02529772 +n02530421 +n02532028 +n02532602 +n02533209 +n02533834 +n02534559 +n02534734 +n02535537 +n02537085 +n02537319 +n02538406 +n02538985 +n02540412 +n02541687 +n02546331 +n02548689 +n02549989 +n02550460 +n02552171 +n02554730 +n02556846 +n02557591 +n02557749 +n02559862 +n02561108 +n02561661 +n02562315 +n02562796 +n02563182 +n02564720 +n02565573 +n02566109 +n02568959 +n02569484 +n02570838 +n02572196 +n02574910 +n02576223 +n02576575 +n02578233 +n02579557 +n02580336 +n02581957 +n02583567 +n02585872 +n02586543 +n02588286 +n02590495 +n02590702 +n02590987 +n02594250 +n02596381 +n02597367 +n02599052 +n02599958 +n02600298 +n02601344 +n02602405 +n02603317 +n02604157 +n02605316 +n02606384 +n02607201 +n02607862 +n02613181 +n02614482 +n02614978 +n02619165 +n02621908 +n02623445 +n02624167 +n02625612 +n02626762 +n02627835 +n02630281 +n02630739 +n02631041 +n02636170 +n02636854 +n02638596 +n02640626 +n02640857 +n02642107 +n02642644 +n02643112 +n02644113 +n02646667 +n02648625 +n02650050 +n02650541 +n02652668 +n02653145 +n02653786 +n02654425 +n02655523 +n02656670 +n02657368 +n02658079 +n02661017 +n02662239 +n02663849 +n02667379 +n02667576 +n02668393 +n02670382 +n02671780 +n02672371 +n02676261 +n02676670 +n02677028 +n02677718 +n02678384 +n02680110 +n02680754 +n02682407 +n02682922 +n02683791 +n02686121 +n02686568 +n02687992 +n02688443 +n02689274 +n02691156 +n02692513 +n02693413 +n02693540 +n02694426 +n02694966 +n02695627 +n02697576 +n02698244 +n02700258 +n02700895 +n02702989 +n02703275 +n02705944 +n02708224 +n02708555 +n02709367 +n02709637 +n02710600 +n02712643 +n02713218 +n02715229 +n02715513 +n02715712 +n02716626 +n02726305 +n02726681 +n02727016 +n02727825 +n02728440 +n02729837 +n02729965 +n02730265 +n02732072 +n02732827 +n02733213 +n02733524 +n02735361 +n02735688 +n02736798 +n02737660 +n02738031 +n02738271 +n02738535 +n02739550 +n02739668 +n02740533 +n02740764 +n02741475 +n02742322 +n02742753 +n02745492 +n02746365 +n02749790 +n02750169 +n02751067 +n02751295 +n02752496 +n02753044 +n02753394 +n02754103 +n02755352 +n02755529 +n02756098 +n02756977 +n02757462 +n02757810 +n02758134 +n02758960 +n02759700 +n02759963 +n02760099 +n02760199 +n02760429 +n02760855 +n02761392 +n02763198 +n02763714 +n02764044 +n02764614 +n02764779 +n02765028 +n02766320 +n02766534 +n02766792 +n02767433 +n02769075 +n02770830 +n02772554 +n02772700 +n02773037 +n02773838 +n02774152 +n02774630 +n02775483 +n02776205 +n02777100 +n02777734 +n02777927 +n02778456 +n02778669 +n02781121 +n02781338 +n02781517 +n02783035 +n02783324 +n02784998 +n02785648 +n02786198 +n02786463 +n02788689 +n02789487 +n02790823 +n02792552 +n02792948 +n02793842 +n02794008 +n02794779 +n02794972 +n02795783 +n02796207 +n02796623 +n02796995 +n02797692 +n02797881 +n02799897 +n02801184 +n02801525 +n02801938 +n02802721 +n02803349 +n02803666 +n02804252 +n02806088 +n02806379 +n02806875 +n02810471 +n02811468 +n02811719 +n02812201 +n02813252 +n02813399 +n02815478 +n02815950 +n02816494 +n02817031 +n02817650 +n02817799 +n02818832 +n02819697 +n02820210 +n02821627 +n02821943 +n02822220 +n02822399 +n02822865 +n02823335 +n02824448 +n02826589 +n02826886 +n02827606 +n02828299 +n02828884 +n02831335 +n02831724 +n02831894 +n02833793 +n02834778 +n02835412 +n02836268 +n02839351 +n02839910 +n02840619 +n02841063 +n02841506 +n02842133 +n02843029 +n02843777 +n02844214 +n02844307 +n02844714 +n02847631 +n02848216 +n02848523 +n02849154 +n02850950 +n02851099 +n02853016 +n02854532 +n02854926 +n02855089 +n02855390 +n02855793 +n02857365 +n02857477 +n02857644 +n02858304 +n02860415 +n02861886 +n02862048 +n02862916 +n02863750 +n02865665 +n02865931 +n02866578 +n02867715 +n02869737 +n02871631 +n02871824 +n02871963 +n02872752 +n02873839 +n02874086 +n02875436 +n02876326 +n02876457 +n02876657 +n02877962 +n02879517 +n02880189 +n02880546 +n02880940 +n02881193 +n02881906 +n02882483 +n02882647 +n02883004 +n02883344 +n02884225 +n02885108 +n02885338 +n02886599 +n02887209 +n02887970 +n02888569 +n02889425 +n02891188 +n02891788 +n02892499 +n02893418 +n02896294 +n02896442 +n02897389 +n02897820 +n02898173 +n02898369 +n02898585 +n02898711 +n02900705 +n02901481 +n02901901 +n02902079 +n02902916 +n02903006 +n02904109 +n02904640 +n02908217 +n02909285 +n02911485 +n02912065 +n02913152 +n02914991 +n02916179 +n02916350 +n02917377 +n02917607 +n02919414 +n02920503 +n02921884 +n02923129 +n02924116 +n02925519 +n02928413 +n02928608 +n02929289 +n02929462 +n02929923 +n02931417 +n02931836 +n02932019 +n02932400 +n02933112 +n02933462 +n02933750 +n02933990 +n02934168 +n02935658 +n02935891 +n02936176 +n02936281 +n02936714 +n02938886 +n02939866 +n02941095 +n02942699 +n02943241 +n02943871 +n02944826 +n02945161 +n02946270 +n02946348 +n02946921 +n02947212 +n02947818 +n02948557 +n02949202 +n02950186 +n02950256 +n02950632 +n02950943 +n02951843 +n02952485 +n02952674 +n02953673 +n02954163 +n02954340 +n02954938 +n02955065 +n02955247 +n02955540 +n02955767 +n02957135 +n02957755 +n02958343 +n02959942 +n02961451 +n02961947 +n02963302 +n02963692 +n02963821 +n02965216 +n02965300 +n02965529 +n02966545 +n02966786 +n02966942 +n02967081 +n02967991 +n02968473 +n02969010 +n02969163 +n02969634 +n02969886 +n02970685 +n02970849 +n02971691 +n02972397 +n02973017 +n02974697 +n02975212 +n02976939 +n02978205 +n02978753 +n02979516 +n02982599 +n02983189 +n02983904 +n02984061 +n02984203 +n02984469 +n02984699 +n02985137 +n02985828 +n02986066 +n02987047 +n02987492 +n02989099 +n02991048 +n02991302 +n02992032 +n02993546 +n02995998 +n02997391 +n02997607 +n03001282 +n03001627 +n03002210 +n03003091 +n03004620 +n03005515 +n03007130 +n03007591 +n03010656 +n03010795 +n03011018 +n03011355 +n03012159 +n03013006 +n03014440 +n03015254 +n03017070 +n03018209 +n03020034 +n03020416 +n03020692 +n03024333 +n03025070 +n03025886 +n03027108 +n03027250 +n03029066 +n03031422 +n03032811 +n03033362 +n03033986 +n03034516 +n03034663 +n03035510 +n03036469 +n03036866 +n03037108 +n03037709 +n03038685 +n03039015 +n03039947 +n03040229 +n03040376 +n03043274 +n03043958 +n03045337 +n03046257 +n03048883 +n03049066 +n03049457 +n03050026 +n03050546 +n03050655 +n03050864 +n03051396 +n03051540 +n03052464 +n03052917 +n03053047 +n03054901 +n03055670 +n03056097 +n03056493 +n03057021 +n03057636 +n03058107 +n03058603 +n03058949 +n03059366 +n03061050 +n03063073 +n03063338 +n03064350 +n03064758 +n03065708 +n03066849 +n03070193 +n03071021 +n03071160 +n03072201 +n03073296 +n03073977 +n03074380 +n03074855 +n03075097 +n03075500 +n03075634 +n03076411 +n03076708 +n03078287 +n03078670 +n03079230 +n03079741 +n03080497 +n03080731 +n03081986 +n03082127 +n03082807 +n03082979 +n03084420 +n03085333 +n03085602 +n03085915 +n03086183 +n03086457 +n03086670 +n03087366 +n03087643 +n03087816 +n03088707 +n03091044 +n03091374 +n03092166 +n03092314 +n03093792 +n03094503 +n03096439 +n03096960 +n03098140 +n03098806 +n03099454 +n03099771 +n03099945 +n03100346 +n03100490 +n03101156 +n03101986 +n03102654 +n03102859 +n03106722 +n03106898 +n03107046 +n03109881 +n03111690 +n03112869 +n03113152 +n03113657 +n03113835 +n03114839 +n03115180 +n03116530 +n03116767 +n03117199 +n03118346 +n03118969 +n03119510 +n03120198 +n03120491 +n03121897 +n03122748 +n03123809 +n03125870 +n03128085 +n03128427 +n03128519 +n03129001 +n03130066 +n03130563 +n03131669 +n03132261 +n03134853 +n03135917 +n03136369 +n03137579 +n03139464 +n03140900 +n03141065 +n03141327 +n03143572 +n03145384 +n03145843 +n03146846 +n03147509 +n03148324 +n03148727 +n03149401 +n03151077 +n03153948 +n03154073 +n03154446 +n03155178 +n03156071 +n03156405 +n03157348 +n03158796 +n03158885 +n03161450 +n03162818 +n03163798 +n03163973 +n03164605 +n03164722 +n03164929 +n03165823 +n03167978 +n03168107 +n03168217 +n03170635 +n03171356 +n03172965 +n03173387 +n03175604 +n03176386 +n03177165 +n03177708 +n03178000 +n03178430 +n03180504 +n03180969 +n03181293 +n03182140 +n03182232 +n03182912 +n03183080 +n03186818 +n03187751 +n03189818 +n03193597 +n03196062 +n03196324 +n03196598 +n03199647 +n03199901 +n03200357 +n03200539 +n03200701 +n03200906 +n03201035 +n03201638 +n03201996 +n03202354 +n03202760 +n03203089 +n03203806 +n03204306 +n03204558 +n03204955 +n03205143 +n03205304 +n03206718 +n03206908 +n03207305 +n03208556 +n03210683 +n03211117 +n03211616 +n03212811 +n03214253 +n03214450 +n03215191 +n03219135 +n03220237 +n03221059 +n03221720 +n03222516 +n03223162 +n03223441 +n03224753 +n03224893 +n03225777 +n03226538 +n03228016 +n03228533 +n03228692 +n03229115 +n03229526 +n03231160 +n03231819 +n03235796 +n03235979 +n03236580 +n03236735 +n03237212 +n03237639 +n03239259 +n03239726 +n03240140 +n03241093 +n03241335 +n03241496 +n03242120 +n03242506 +n03242995 +n03243218 +n03245271 +n03245421 +n03246933 +n03250952 +n03251533 +n03251766 +n03252324 +n03252637 +n03254374 +n03255488 +n03255899 +n03256788 +n03256928 +n03257586 +n03258905 +n03259505 +n03261776 +n03262519 +n03262809 +n03262932 +n03265032 +n03266749 +n03267821 +n03269203 +n03269401 +n03270695 +n03271765 +n03271865 +n03272239 +n03272383 +n03273061 +n03273913 +n03274561 +n03274796 +n03276179 +n03277459 +n03277771 +n03278248 +n03279153 +n03279364 +n03279508 +n03280394 +n03280644 +n03281145 +n03282060 +n03282401 +n03284743 +n03284981 +n03285912 +n03286572 +n03287733 +n03288003 +n03289985 +n03291413 +n03292960 +n03294048 +n03294833 +n03296478 +n03297103 +n03297644 +n03297735 +n03298089 +n03302790 +n03303217 +n03303831 +n03304197 +n03304465 +n03305522 +n03307573 +n03308152 +n03309808 +n03314378 +n03314884 +n03315644 +n03316406 +n03318136 +n03319457 +n03320046 +n03322570 +n03322940 +n03323703 +n03324928 +n03325088 +n03326073 +n03327234 +n03327691 +n03327841 +n03329663 +n03330792 +n03334017 +n03334492 +n03334912 +n03335030 +n03335846 +n03336839 +n03337494 +n03338287 +n03338821 +n03339296 +n03339643 +n03340009 +n03340923 +n03342961 +n03343354 +n03343560 +n03343853 +n03346135 +n03346455 +n03349296 +n03350352 +n03350456 +n03350602 +n03351262 +n03351979 +n03352628 +n03354903 +n03355468 +n03356446 +n03357267 +n03357716 +n03359137 +n03359566 +n03360731 +n03361683 +n03362771 +n03363363 +n03364008 +n03364937 +n03365592 +n03365991 +n03366823 +n03373237 +n03374649 +n03374838 +n03375171 +n03376279 +n03378342 +n03379343 +n03379828 +n03379989 +n03380647 +n03380867 +n03381126 +n03381231 +n03381776 +n03382856 +n03382969 +n03383468 +n03384167 +n03384891 +n03385557 +n03386011 +n03387323 +n03387653 +n03390327 +n03391770 +n03393324 +n03394480 +n03394649 +n03396580 +n03396654 +n03397266 +n03397532 +n03398228 +n03399761 +n03399971 +n03402188 +n03402369 +n03404012 +n03404360 +n03404449 +n03405265 +n03405725 +n03407369 +n03409393 +n03409591 +n03410147 +n03411339 +n03412058 +n03412220 +n03412511 +n03412906 +n03413264 +n03413428 +n03413828 +n03414162 +n03415252 +n03416489 +n03416775 +n03417345 +n03418158 +n03418242 +n03419014 +n03422072 +n03422589 +n03423719 +n03424630 +n03427296 +n03428090 +n03428349 +n03429003 +n03429288 +n03429914 +n03430091 +n03430313 +n03430551 +n03430959 +n03431243 +n03431745 +n03433637 +n03433877 +n03434285 +n03434830 +n03435593 +n03437941 +n03438257 +n03439814 +n03441112 +n03442288 +n03442756 +n03446070 +n03446832 +n03448031 +n03448956 +n03449564 +n03449858 +n03450516 +n03452267 +n03452449 +n03453320 +n03454110 +n03454211 +n03454707 +n03455355 +n03456548 +n03456665 +n03457008 +n03457686 +n03458271 +n03459914 +n03461882 +n03465500 +n03465818 +n03466162 +n03466839 +n03467517 +n03467796 +n03467984 +n03468696 +n03469493 +n03470387 +n03470629 +n03470948 +n03472232 +n03472535 +n03472937 +n03473817 +n03473966 +n03475823 +n03476083 +n03476313 +n03477773 +n03477902 +n03478756 +n03478907 +n03481521 +n03482523 +n03483230 +n03483531 +n03484083 +n03484931 +n03487331 +n03487444 +n03487774 +n03488188 +n03488603 +n03489162 +n03490324 +n03490449 +n03490884 +n03491988 +n03496296 +n03496612 +n03497100 +n03497657 +n03498536 +n03499468 +n03500295 +n03501152 +n03501288 +n03501614 +n03502331 +n03502509 +n03502777 +n03503718 +n03503997 +n03505383 +n03505504 +n03506370 +n03507963 +n03508101 +n03509394 +n03509843 +n03510583 +n03510866 +n03511175 +n03512147 +n03512911 +n03513137 +n03513376 +n03515338 +n03517899 +n03517982 +n03518631 +n03519674 +n03521076 +n03521544 +n03522634 +n03524574 +n03524976 +n03525074 +n03525454 +n03525827 +n03528263 +n03529444 +n03531281 +n03531447 +n03531546 +n03532342 +n03534776 +n03535024 +n03536761 +n03537412 +n03538037 +n03538300 +n03538634 +n03538957 +n03540267 +n03540595 +n03541091 +n03541696 +n03541923 +n03542333 +n03542860 +n03543603 +n03544360 +n03545150 +n03546340 +n03547054 +n03547530 +n03548930 +n03550153 +n03550289 +n03551084 +n03551790 +n03552449 +n03552749 +n03553486 +n03554460 +n03555426 +n03555662 +n03557590 +n03558176 +n03558404 +n03558739 +n03561169 +n03563200 +n03563710 +n03563967 +n03565288 +n03565565 +n03566329 +n03568117 +n03568818 +n03571942 +n03572205 +n03574555 +n03574816 +n03575958 +n03576215 +n03577672 +n03577818 +n03578055 +n03578251 +n03578656 +n03579538 +n03579982 +n03583621 +n03584400 +n03585073 +n03588951 +n03589513 +n03589791 +n03590306 +n03590932 +n03592245 +n03592773 +n03593526 +n03595409 +n03595860 +n03596285 +n03597317 +n03598151 +n03598299 +n03598646 +n03600977 +n03601638 +n03601840 +n03602081 +n03603722 +n03604629 +n03604843 +n03605722 +n03605915 +n03606465 +n03609235 +n03609542 +n03610418 +n03610992 +n03612814 +n03613294 +n03613592 +n03614007 +n03614532 +n03615563 +n03617095 +n03617594 +n03618546 +n03618982 +n03619396 +n03619650 +n03619793 +n03619890 +n03620052 +n03621049 +n03621694 +n03622931 +n03623556 +n03624134 +n03625355 +n03626115 +n03631177 +n03631811 +n03632852 +n03633886 +n03635032 +n03635668 +n03635932 +n03636248 +n03636649 +n03638883 +n03639675 +n03640988 +n03642444 +n03646296 +n03646916 +n03647520 +n03651388 +n03653220 +n03653454 +n03654576 +n03655072 +n03656484 +n03657239 +n03658858 +n03659292 +n03660124 +n03661340 +n03662719 +n03662887 +n03663531 +n03664675 +n03664943 +n03665366 +n03666362 +n03666917 +n03667235 +n03667829 +n03671914 +n03672827 +n03673450 +n03673767 +n03676759 +n03677766 +n03679384 +n03679712 +n03681477 +n03682487 +n03684823 +n03685307 +n03685820 +n03686130 +n03686470 +n03687928 +n03688943 +n03689157 +n03689570 +n03690851 +n03691817 +n03692379 +n03693293 +n03697552 +n03698604 +n03699280 +n03699975 +n03700963 +n03701191 +n03701391 +n03701640 +n03701790 +n03702248 +n03704834 +n03705379 +n03706653 +n03707597 +n03708036 +n03709206 +n03709363 +n03709545 +n03710528 +n03711711 +n03711999 +n03712887 +n03713069 +n03714235 +n03715386 +n03715669 +n03715892 +n03716966 +n03717131 +n03718212 +n03718335 +n03718699 +n03718789 +n03719053 +n03721590 +n03722007 +n03722288 +n03724176 +n03725035 +n03725717 +n03726516 +n03726760 +n03726993 +n03727837 +n03727946 +n03728437 +n03728982 +n03729647 +n03729951 +n03730153 +n03730788 +n03731695 +n03733644 +n03733925 +n03735637 +n03736970 +n03738241 +n03738472 +n03739518 +n03739693 +n03743902 +n03744276 +n03744684 +n03744840 +n03745571 +n03746330 +n03748162 +n03749504 +n03749807 +n03750206 +n03751065 +n03752185 +n03752922 +n03753077 +n03753514 +n03758894 +n03759432 +n03760671 +n03762982 +n03763727 +n03764276 +n03765561 +n03765934 +n03766322 +n03768132 +n03769722 +n03770954 +n03772077 +n03772674 +n03773035 +n03775199 +n03775847 +n03779000 +n03779370 +n03780047 +n03781787 +n03782190 +n03785499 +n03787523 +n03789171 +n03789400 +n03789946 +n03790230 +n03790512 +n03790755 +n03791235 +n03792048 +n03792526 +n03793850 +n03795976 +n03796181 +n03797390 +n03798982 +n03799113 +n03800485 +n03800772 +n03800933 +n03802007 +n03802228 +n03802393 +n03803116 +n03809312 +n03811295 +n03811444 +n03811847 +n03811965 +n03812382 +n03812924 +n03813176 +n03813946 +n03815278 +n03815482 +n03815615 +n03816005 +n03816136 +n03816849 +n03817647 +n03819595 +n03819994 +n03820154 +n03820318 +n03820728 +n03820950 +n03824197 +n03825080 +n03827536 +n03828020 +n03829340 +n03831757 +n03834040 +n03834604 +n03836062 +n03837422 +n03838748 +n03839172 +n03839276 +n03839795 +n03841666 +n03842156 +n03844045 +n03844233 +n03845190 +n03846234 +n03846772 +n03847471 +n03847823 +n03848168 +n03848348 +n03849275 +n03850613 +n03851341 +n03851787 +n03852280 +n03852688 +n03854815 +n03859280 +n03859495 +n03859958 +n03861430 +n03861842 +n03862676 +n03863923 +n03864139 +n03864356 +n03864692 +n03865371 +n03865949 +n03868406 +n03871083 +n03871524 +n03871724 +n03873848 +n03874138 +n03874823 +n03875218 +n03880129 +n03880323 +n03880531 +n03883054 +n03883773 +n03883944 +n03884639 +n03885535 +n03885669 +n03886053 +n03886641 +n03887185 +n03888022 +n03889503 +n03889726 +n03891051 +n03892557 +n03894051 +n03894379 +n03896103 +n03896233 +n03896419 +n03896628 +n03896984 +n03897943 +n03898271 +n03898633 +n03899612 +n03899933 +n03901338 +n03903133 +n03903424 +n03904060 +n03904183 +n03904433 +n03905540 +n03906997 +n03907654 +n03908204 +n03909160 +n03909406 +n03915118 +n03915437 +n03916470 +n03916720 +n03917327 +n03918480 +n03920737 +n03923564 +n03923692 +n03924069 +n03926148 +n03926412 +n03926876 +n03927792 +n03928116 +n03929091 +n03929202 +n03929443 +n03930515 +n03932670 +n03936269 +n03938522 +n03939677 +n03940256 +n03941684 +n03943920 +n03945615 +n03947111 +n03947466 +n03948459 +n03951971 +n03953020 +n03953416 +n03955809 +n03956785 +n03956922 +n03957315 +n03957762 +n03958630 +n03958752 +n03959014 +n03959701 +n03961939 +n03962525 +n03962932 +n03963028 +n03965907 +n03966325 +n03966751 +n03966976 +n03967942 +n03968293 +n03971321 +n03972524 +n03973520 +n03973628 +n03975035 +n03979377 +n03979492 +n03980026 +n03981340 +n03982232 +n03982895 +n03984234 +n03984381 +n03985232 +n03986704 +n03988170 +n03989665 +n03990474 +n03991443 +n03992325 +n03992703 +n03993180 +n03993403 +n03994008 +n03994757 +n03995018 +n03995856 +n03996145 +n03996416 +n03997484 +n03999992 +n04000311 +n04001397 +n04001499 +n04001845 +n04004210 +n04004475 +n04005912 +n04007664 +n04010057 +n04010779 +n04010927 +n04011827 +n04012084 +n04013729 +n04014297 +n04015204 +n04016576 +n04016684 +n04018399 +n04018667 +n04019101 +n04019696 +n04020087 +n04020298 +n04020912 +n04021028 +n04021362 +n04021798 +n04022332 +n04022708 +n04023249 +n04024274 +n04026053 +n04026918 +n04027023 +n04027706 +n04028315 +n04029734 +n04030274 +n04036303 +n04037964 +n04038440 +n04038727 +n04039848 +n04042358 +n04042632 +n04042795 +n04042985 +n04043733 +n04044307 +n04044498 +n04045085 +n04045397 +n04046590 +n04046974 +n04047401 +n04049405 +n04050066 +n04051549 +n04051825 +n04052757 +n04056932 +n04057047 +n04057435 +n04057846 +n04057981 +n04058096 +n04058239 +n04059947 +n04060647 +n04060904 +n04061793 +n04061969 +n04062644 +n04063373 +n04063868 +n04064401 +n04065464 +n04065789 +n04067231 +n04067353 +n04067921 +n04068441 +n04068601 +n04069276 +n04069777 +n04070207 +n04070964 +n04071102 +n04071263 +n04071393 +n04072193 +n04072551 +n04073948 +n04075468 +n04075916 +n04076284 +n04077430 +n04077734 +n04078574 +n04079106 +n04079244 +n04079933 +n04080454 +n04080833 +n04081844 +n04083649 +n04086794 +n04087126 +n04087709 +n04088696 +n04088797 +n04089666 +n04089976 +n04090548 +n04091097 +n04093625 +n04095210 +n04096066 +n04097622 +n04097866 +n04099175 +n04099429 +n04100174 +n04101497 +n04101701 +n04102037 +n04102285 +n04102406 +n04102962 +n04104147 +n04104500 +n04105068 +n04105438 +n04105893 +n04107984 +n04108268 +n04110068 +n04110654 +n04110955 +n04111190 +n04111414 +n04111668 +n04113765 +n04114996 +n04115256 +n04115996 +n04116389 +n04118021 +n04121228 +n04122349 +n04122492 +n04122825 +n04123123 +n04123567 +n04123740 +n04125116 +n04125853 +n04126541 +n04126659 +n04126980 +n04127904 +n04128499 +n04128837 +n04131929 +n04134632 +n04136510 +n04137444 +n04137897 +n04138977 +n04139859 +n04140064 +n04140631 +n04141838 +n04143897 +n04146050 +n04147495 +n04148054 +n04149083 +n04151108 +n04151581 +n04151940 +n04152387 +n04154753 +n04156297 +n04156411 +n04157320 +n04158807 +n04158956 +n04160372 +n04160586 +n04161358 +n04161981 +n04164757 +n04164868 +n04166111 +n04167489 +n04169437 +n04170037 +n04171459 +n04171629 +n04171831 +n04174101 +n04174500 +n04176068 +n04176190 +n04176528 +n04177329 +n04177545 +n04180063 +n04180888 +n04181228 +n04181718 +n04182322 +n04183217 +n04183329 +n04184435 +n04184600 +n04185071 +n04186051 +n04186268 +n04186455 +n04186624 +n04186848 +n04187061 +n04187547 +n04187885 +n04189092 +n04190052 +n04190464 +n04190747 +n04190997 +n04191150 +n04191595 +n04191943 +n04192238 +n04192858 +n04194289 +n04196080 +n04197391 +n04198015 +n04198797 +n04199027 +n04201733 +n04202417 +n04205318 +n04206356 +n04207763 +n04210390 +n04211356 +n04211970 +n04215910 +n04216634 +n04216860 +n04216963 +n04217718 +n04217882 +n04219424 +n04221823 +n04222210 +n04222470 +n04222847 +n04225031 +n04225222 +n04225729 +n04226464 +n04226537 +n04227900 +n04229007 +n04229107 +n04229480 +n04230603 +n04230808 +n04231693 +n04232153 +n04233832 +n04234455 +n04235291 +n04235771 +n04236001 +n04236377 +n04236702 +n04238617 +n04241042 +n04241394 +n04242408 +n04243003 +n04243941 +n04244997 +n04245847 +n04246855 +n04247630 +n04247736 +n04248507 +n04249415 +n04250224 +n04250599 +n04253931 +n04255499 +n04256520 +n04260589 +n04261116 +n04262678 +n04263336 +n04263760 +n04264233 +n04264914 +n04266486 +n04267577 +n04269944 +n04270891 +n04271148 +n04272054 +n04272782 +n04273064 +n04273796 +n04275283 +n04275661 +n04275904 +n04278353 +n04279172 +n04279987 +n04280259 +n04280970 +n04283585 +n04283905 +n04284002 +n04285146 +n04285622 +n04285803 +n04286128 +n04288272 +n04288533 +n04288673 +n04289449 +n04291242 +n04291759 +n04292414 +n04292572 +n04293119 +n04293744 +n04294212 +n04294426 +n04295081 +n04295881 +n04299215 +n04300358 +n04301000 +n04301474 +n04303258 +n04304375 +n04305471 +n04306080 +n04306847 +n04307419 +n04307878 +n04308084 +n04308273 +n04308397 +n04308583 +n04308807 +n04309348 +n04309833 +n04310721 +n04311595 +n04312154 +n04312432 +n04313220 +n04314914 +n04315828 +n04315948 +n04317420 +n04318131 +n04318982 +n04319937 +n04320405 +n04322026 +n04322692 +n04322801 +n04323819 +n04326799 +n04326896 +n04328054 +n04328329 +n04328946 +n04329477 +n04330340 +n04330669 +n04330998 +n04331277 +n04332987 +n04333129 +n04338517 +n04339638 +n04340750 +n04340935 +n04341133 +n04341414 +n04341686 +n04346679 +n04347519 +n04348184 +n04348359 +n04349401 +n04350104 +n04350458 +n04354589 +n04356595 +n04358707 +n04358874 +n04359335 +n04359589 +n04360501 +n04360798 +n04361095 +n04361260 +n04362821 +n04363210 +n04363874 +n04364545 +n04364827 +n04364994 +n04365328 +n04365484 +n04365751 +n04368695 +n04370048 +n04371563 +n04373894 +n04375775 +n04377057 +n04378956 +n04379243 +n04379964 +n04380346 +n04381994 +n04382334 +n04382880 +n04383130 +n04383301 +n04386664 +n04387201 +n04387400 +n04388162 +n04388743 +n04389521 +n04390873 +n04391838 +n04392526 +n04393095 +n04394261 +n04395875 +n04397168 +n04397261 +n04397645 +n04398497 +n04398688 +n04398834 +n04399046 +n04400289 +n04401088 +n04402057 +n04402580 +n04402746 +n04402984 +n04403638 +n04404817 +n04404997 +n04405540 +n04405762 +n04407435 +n04407686 +n04409128 +n04409806 +n04410086 +n04410365 +n04410485 +n04411264 +n04411966 +n04413151 +n04413419 +n04415663 +n04416901 +n04417180 +n04417361 +n04417809 +n04419073 +n04421872 +n04422875 +n04427715 +n04428008 +n04431436 +n04431745 +n04434932 +n04435180 +n04436185 +n04436401 +n04436542 +n04437670 +n04437953 +n04438304 +n04438643 +n04440963 +n04441662 +n04444749 +n04445040 +n04445952 +n04446276 +n04447276 +n04447443 +n04448070 +n04448361 +n04450243 +n04450640 +n04450749 +n04451818 +n04452615 +n04452848 +n04453156 +n04453666 +n04453910 +n04454654 +n04455250 +n04455652 +n04456472 +n04457326 +n04458843 +n04459362 +n04459610 +n04460130 +n04462011 +n04463679 +n04464852 +n04467099 +n04467307 +n04468005 +n04469251 +n04470741 +n04471315 +n04471632 +n04472243 +n04472726 +n04473884 +n04474466 +n04475411 +n04475631 +n04477548 +n04478512 +n04478657 +n04480527 +n04481524 +n04487724 +n04488427 +n04489008 +n04489817 +n04490091 +n04491769 +n04493109 +n04494204 +n04495450 +n04497442 +n04497570 +n04498523 +n04499446 +n04499554 +n04500060 +n04501837 +n04502197 +n04502502 +n04502670 +n04502851 +n04504141 +n04504770 +n04505036 +n04506994 +n04507453 +n04508163 +n04508489 +n04508949 +n04509260 +n04509592 +n04511002 +n04514241 +n04516116 +n04516214 +n04516672 +n04518132 +n04519153 +n04520170 +n04520382 +n04521987 +n04524313 +n04527648 +n04529681 +n04530566 +n04531098 +n04531873 +n04533042 +n04533199 +n04533700 +n04534127 +n04534895 +n04536153 +n04538552 +n04539203 +n04540761 +n04541320 +n04543158 +n04544450 +n04546194 +n04546855 +n04547592 +n04549122 +n04549919 +n04551055 +n04552696 +n04553389 +n04554871 +n04555600 +n04555897 +n04556948 +n04557308 +n04557751 +n04558059 +n04558804 +n04559023 +n04559730 +n04562262 +n04563204 +n04565375 +n04566257 +n04567098 +n04568069 +n04568557 +n04569520 +n04569822 +n04570958 +n04571292 +n04571566 +n04571958 +n04572935 +n04574471 +n04574999 +n04576002 +n04576211 +n04576971 +n04577426 +n04577769 +n04578801 +n04579230 +n04580493 +n04581595 +n04582349 +n04583620 +n04585745 +n04585980 +n04586932 +n04587648 +n04588739 +n04589190 +n04589434 +n04591056 +n04591887 +n04592005 +n04592099 +n04594218 +n04594489 +n04595285 +n04595855 +n04596852 +n04597066 +n04597804 +n04598136 +n04598582 +n04599124 +n04600312 +n04600486 +n04600912 +n04603729 +n04603872 +n04605726 +n04606574 +n04608329 +n04608567 +n04609531 +n04609651 +n04610176 +n04610503 +n04610676 +n04611916 +n04613015 +n04615226 +n04615644 +n04950713 +n04951373 +n04958634 +n04959672 +n04960277 +n04961691 +n04963740 +n04965179 +n04965661 +n04967191 +n04968895 +n04970059 +n04970631 +n04970916 +n04972801 +n04973386 +n04976952 +n05238282 +n05241218 +n05242070 +n05244934 +n05266879 +n05399034 +n05447757 +n05449959 +n05453657 +n05467758 +n05586759 +n06254669 +n06262567 +n06263369 +n06263609 +n06263762 +n06266417 +n06266710 +n06267145 +n06271778 +n06272290 +n06272803 +n06274092 +n06275353 +n06276697 +n06277280 +n06281040 +n06359467 +n06359657 +n06418693 +n06591815 +n06592078 +n06595351 +n06613686 +n06793231 +n07556637 +n07556970 +n07557165 +n07557434 +n07560652 +n07561112 +n07562495 +n07563800 +n07564629 +n07564971 +n07565725 +n07565945 +n07566340 +n07566863 +n07567390 +n07567707 +n07568818 +n07569106 +n07569543 +n07570720 +n07572353 +n07572957 +n07573103 +n07573696 +n07574602 +n07575076 +n07575726 +n07575984 +n07576182 +n07576438 +n07576577 +n07577374 +n07579575 +n07580053 +n07580359 +n07580470 +n07581346 +n07581775 +n07582277 +n07582441 +n07582609 +n07583197 +n07584228 +n07584593 +n07585208 +n07587441 +n07587700 +n07588947 +n07590320 +n07591473 +n07592094 +n07592656 +n07593774 +n07595914 +n07596046 +n07596452 +n07596684 +n07597145 +n07597365 +n07598734 +n07599468 +n07599783 +n07599998 +n07600506 +n07601407 +n07605474 +n07605944 +n07606278 +n07606764 +n07607707 +n07609407 +n07609840 +n07611148 +n07611358 +n07611839 +n07611991 +n07612367 +n07612632 +n07612996 +n07613671 +n07614198 +n07614825 +n07615052 +n07615190 +n07615460 +n07615569 +n07615671 +n07616590 +n07617188 +n07619004 +n07623136 +n07624466 +n07627931 +n07628068 +n07641928 +n07642471 +n07642933 +n07643306 +n07643474 +n07643764 +n07643981 +n07644244 +n07663899 +n07678729 +n07679356 +n07680517 +n07680932 +n07681926 +n07682316 +n07682624 +n07683786 +n07684600 +n07685730 +n07686873 +n07687211 +n07687469 +n07687789 +n07689003 +n07690273 +n07690892 +n07692405 +n07692614 +n07693889 +n07693972 +n07694403 +n07695878 +n07695965 +n07697100 +n07704054 +n07705931 +n07707451 +n07708512 +n07708798 +n07709333 +n07710007 +n07710283 +n07710616 +n07710952 +n07712063 +n07712382 +n07712748 +n07712856 +n07713395 +n07713895 +n07714078 +n07714802 +n07714895 +n07715561 +n07715721 +n07716034 +n07717070 +n07717858 +n07718671 +n07719437 +n07719839 +n07720442 +n07720615 +n07721325 +n07721456 +n07721678 +n07722217 +n07722763 +n07723330 +n07723559 +n07723753 +n07724943 +n07725376 +n07725531 +n07726796 +n07727578 +n07727868 +n07728804 +n07729000 +n07729485 +n07730406 +n07730855 +n07731122 +n07731587 +n07731952 +n07732302 +n07732747 +n07734017 +n07734292 +n07735052 +n07735803 +n07737081 +n07739125 +n07739506 +n07740220 +n07740954 +n07741461 +n07742012 +n07742704 +n07744246 +n07747055 +n07747811 +n07747951 +n07748753 +n07748912 +n07749095 +n07749192 +n07749312 +n07749731 +n07750586 +n07751451 +n07752377 +n07752664 +n07753743 +n07755089 +n07755411 +n07755707 +n07756096 +n07757132 +n07757312 +n07757602 +n07757990 +n07758680 +n07758950 +n07759424 +n07759691 +n07759816 +n07760501 +n07761141 +n07761309 +n07761611 +n07761777 +n07761954 +n07767344 +n07767847 +n07770571 +n07771212 +n07800091 +n07800740 +n07801508 +n07802152 +n07802417 +n07803093 +n07803545 +n07804323 +n07805254 +n07805594 +n07805731 +n07806221 +n07806633 +n07807317 +n07807710 +n07807922 +n07809096 +n07809368 +n07810907 +n07811416 +n07812184 +n07814203 +n07815588 +n07818277 +n07819480 +n07820497 +n07820814 +n07823951 +n07824702 +n07824988 +n07825717 +n07828987 +n07829412 +n07830593 +n07832902 +n07834507 +n07836731 +n07837002 +n07837362 +n07838233 +n07841495 +n07841639 +n07841907 +n07842753 +n07842972 +n07843464 +n07843775 +n07844042 +n07844604 +n07846143 +n07847198 +n07848338 +n07848771 +n07849336 +n07850083 +n07850329 +n07851298 +n07852045 +n07852919 +n07854813 +n07856270 +n07857959 +n07858595 +n07859284 +n07859583 +n07860805 +n07861158 +n07861813 +n07863374 +n07864638 +n07865105 +n07867421 +n07867883 +n07869391 +n07869775 +n07870313 +n07871436 +n07873464 +n07874063 +n07874159 +n07874259 +n07874343 +n07874441 +n07874780 +n07875693 +n07875835 +n07876281 +n07880751 +n07881117 +n07881205 +n07881404 +n07881800 +n07882497 +n07882886 +n07883031 +n07883251 +n07883384 +n07884567 +n07886572 +n07886849 +n07887634 +n07888465 +n07888909 +n07889510 +n07890352 +n07890750 +n07891726 +n07892813 +n07893528 +n07893891 +n07894102 +n07894298 +n07894965 +n07895237 +n07895435 +n07895595 +n07895710 +n07895839 +n07896287 +n07897200 +n07897865 +n07898117 +n07898333 +n07898745 +n07899108 +n07900406 +n07900616 +n07901587 +n07903208 +n07904395 +n07905038 +n07906284 +n07906877 +n07907161 +n07907548 +n07907943 +n07909129 +n07909811 +n07911371 +n07911677 +n07912211 +n07913393 +n07914413 +n07915618 +n07916041 +n07917618 +n07918028 +n07920222 +n07921455 +n07921948 +n07923748 +n07924033 +n07924560 +n07924834 +n07925966 +n07926920 +n07927197 +n07927931 +n07929519 +n07930554 +n07931001 +n07931096 +n07932614 +n07932841 +n07933274 +n07933891 +n07934032 +n07934530 +n07935152 +n07935504 +n07936263 +n07936745 +n07938149 +n07951464 +n08554440 +n08558963 +n08596076 +n08598301 +n08616050 +n08640531 +n08659446 +n09191635 +n09206896 +n09206985 +n09210862 +n09213434 +n09213565 +n09214060 +n09214916 +n09215437 +n09217230 +n09230041 +n09233446 +n09238926 +n09255070 +n09259025 +n09259219 +n09262690 +n09265620 +n09269882 +n09270735 +n09287968 +n09289331 +n09289596 +n09290444 +n09295946 +n09300306 +n09302616 +n09303008 +n09303528 +n09304750 +n09305031 +n09308572 +n09309292 +n09315159 +n09326662 +n09335693 +n09335809 +n09336555 +n09337253 +n09344198 +n09352849 +n09359803 +n09362945 +n09366017 +n09366317 +n09375606 +n09376526 +n09381242 +n09393605 +n09396465 +n09398677 +n09405787 +n09406793 +n09409512 +n09409752 +n09410224 +n09416076 +n09421799 +n09428628 +n09432990 +n09433442 +n09437454 +n09439213 +n09443641 +n09453008 +n09458269 +n09472413 +n09474010 +n09505153 +n09606009 +n09606527 +n09608709 +n09610405 +n09613191 +n09615336 +n09616922 +n09619168 +n09619452 +n09620078 +n09620794 +n09622049 +n09622302 +n09624168 +n09624559 +n09625401 +n09626238 +n09627906 +n09629246 +n09629752 +n09631129 +n09632274 +n09632518 +n09633969 +n09636339 +n09638875 +n09639919 +n09641002 +n09644152 +n09648743 +n09651123 +n09665545 +n09669631 +n09670280 +n09676884 +n09679925 +n09690208 +n09694771 +n09696585 +n09697401 +n09700964 +n09701148 +n09701833 +n09705124 +n09708750 +n09710164 +n09716047 +n09718217 +n09722658 +n09724785 +n09725229 +n09725772 +n09726621 +n09727440 +n09727826 +n09730204 +n09731436 +n09731571 +n09735258 +n09738400 +n09744679 +n09754217 +n09758173 +n09758885 +n09761068 +n09763784 +n09764201 +n09764598 +n09765278 +n09767197 +n09769076 +n09770179 +n09771435 +n09772746 +n09773962 +n09774783 +n09790482 +n09792555 +n09795124 +n09795334 +n09800964 +n09802445 +n09802641 +n09805151 +n09805475 +n09809538 +n09809749 +n09810166 +n09811712 +n09814660 +n09815790 +n09816771 +n09818022 +n09820263 +n09821831 +n09823502 +n09824135 +n09824609 +n09826204 +n09830194 +n09831962 +n09834699 +n09836160 +n09840217 +n09841188 +n09841515 +n09841696 +n09842047 +n09848489 +n09851575 +n09853645 +n09853881 +n09854915 +n09857007 +n09861946 +n09865398 +n09868270 +n09871681 +n09877951 +n09889691 +n09892693 +n09894654 +n09895222 +n09895701 +n09902353 +n09903153 +n09910374 +n09917593 +n09918248 +n09923418 +n09923673 +n09924996 +n09927089 +n09927451 +n09928136 +n09928451 +n09929298 +n09930257 +n09930876 +n09931640 +n09933098 +n09935434 +n09936892 +n09937056 +n09941964 +n09942970 +n09943239 +n09943811 +n09944160 +n09945319 +n09950457 +n09951070 +n09951274 +n09960688 +n09962966 +n09964411 +n09968845 +n09974648 +n09976728 +n09979321 +n09983572 +n09989502 +n09990415 +n09991867 +n09992538 +n09992837 +n09993252 +n09994673 +n09996481 +n09997622 +n10001217 +n10006748 +n10007684 +n10009484 +n10009671 +n10015215 +n10015897 +n10017422 +n10018861 +n10020890 +n10024362 +n10029068 +n10034201 +n10034614 +n10035952 +n10036266 +n10036929 +n10037385 +n10040945 +n10041887 +n10042690 +n10043643 +n10044879 +n10047459 +n10048367 +n10048836 +n10052694 +n10053808 +n10054657 +n10055730 +n10055847 +n10060175 +n10067968 +n10070711 +n10077593 +n10078131 +n10078806 +n10079399 +n10079893 +n10080869 +n10083823 +n10084043 +n10084295 +n10086383 +n10091651 +n10092488 +n10093475 +n10094584 +n10095869 +n10098710 +n10098862 +n10099375 +n10101634 +n10102800 +n10105085 +n10107303 +n10109662 +n10111903 +n10112129 +n10118844 +n10126177 +n10126424 +n10126708 +n10127689 +n10129825 +n10134396 +n10134982 +n10136959 +n10142747 +n10142946 +n10143172 +n10143725 +n10145340 +n10145774 +n10148305 +n10150071 +n10150940 +n10151570 +n10153594 +n10154186 +n10154601 +n10155849 +n10162194 +n10164233 +n10165448 +n10168183 +n10168584 +n10171567 +n10182190 +n10185793 +n10186774 +n10187130 +n10195593 +n10200781 +n10202624 +n10205457 +n10206173 +n10207169 +n10210137 +n10215623 +n10216106 +n10224578 +n10225219 +n10228278 +n10235385 +n10237069 +n10241300 +n10243664 +n10245639 +n10249270 +n10249459 +n10249950 +n10257221 +n10259348 +n10263411 +n10266328 +n10266848 +n10271677 +n10273064 +n10274815 +n10276045 +n10282672 +n10284064 +n10284965 +n10296444 +n10299250 +n10299700 +n10305635 +n10305802 +n10306004 +n10308732 +n10312287 +n10314054 +n10315561 +n10316360 +n10317007 +n10317500 +n10320863 +n10321340 +n10322238 +n10323999 +n10324560 +n10328437 +n10332385 +n10335246 +n10335931 +n10340312 +n10341573 +n10343554 +n10345100 +n10353016 +n10353355 +n10355142 +n10355449 +n10355688 +n10356450 +n10357613 +n10360747 +n10366966 +n10369528 +n10370381 +n10376523 +n10377021 +n10379376 +n10380672 +n10383816 +n10386984 +n10387196 +n10387324 +n10393909 +n10396106 +n10399130 +n10400998 +n10402824 +n10403876 +n10405694 +n10407954 +n10409752 +n10411551 +n10415037 +n10417551 +n10418101 +n10419047 +n10420031 +n10421016 +n10426454 +n10427764 +n10428004 +n10433737 +n10435716 +n10435988 +n10438172 +n10439851 +n10444194 +n10450303 +n10462860 +n10464052 +n10466918 +n10467179 +n10470779 +n10474064 +n10474645 +n10478960 +n10481268 +n10482054 +n10482921 +n10484858 +n10488309 +n10495421 +n10499355 +n10499857 +n10506544 +n10508710 +n10512372 +n10512708 +n10519494 +n10521100 +n10521662 +n10522035 +n10522324 +n10522759 +n10523341 +n10525134 +n10525436 +n10525617 +n10527334 +n10529231 +n10541833 +n10542888 +n10543161 +n10544232 +n10544748 +n10546633 +n10548537 +n10548681 +n10554846 +n10556518 +n10557854 +n10559288 +n10560637 +n10568200 +n10570019 +n10575787 +n10576962 +n10577284 +n10580535 +n10582746 +n10583387 +n10594147 +n10595164 +n10595647 +n10599806 +n10602985 +n10604634 +n10605253 +n10610465 +n10612210 +n10614629 +n10617193 +n10618685 +n10618848 +n10619642 +n10620758 +n10622053 +n10624074 +n10624310 +n10625860 +n10628644 +n10630188 +n10632576 +n10633450 +n10648237 +n10648696 +n10654932 +n10657835 +n10661002 +n10661563 +n10665698 +n10669991 +n10674130 +n10676018 +n10679174 +n10682953 +n10686073 +n10692883 +n10693824 +n10694258 +n10698368 +n10700201 +n10700640 +n10701180 +n10703336 +n10703692 +n10705615 +n10707233 +n10708454 +n10709529 +n10713686 +n10720453 +n10721321 +n10722575 +n10722965 +n10726786 +n10735298 +n10740868 +n10741152 +n10742997 +n10744164 +n10747119 +n10751265 +n10752480 +n10759151 +n10759982 +n10763383 +n10763620 +n10765679 +n10766260 +n10768903 +n10779610 +n10780632 +n10782791 +n10782940 +n10787470 +n10791221 +n10792335 +n10793570 +n10794014 +n11531193 +n11537327 +n11542640 +n11545524 +n11545714 +n11547562 +n11547855 +n11552386 +n11553240 +n11596108 +n11598686 +n11600372 +n11601177 +n11601918 +n11608250 +n11609475 +n11609684 +n11612923 +n11614250 +n11618861 +n11620673 +n11621029 +n11623105 +n11624531 +n11627168 +n11628456 +n11630017 +n11630489 +n11643835 +n11645914 +n11647306 +n11649878 +n11650558 +n11650759 +n11661372 +n11665372 +n11666854 +n11669921 +n11672400 +n11674332 +n11676500 +n11684264 +n11689483 +n11693981 +n11697560 +n11700864 +n11703669 +n11708658 +n11709674 +n11713164 +n11720353 +n11722982 +n11723770 +n11725015 +n11725623 +n11727091 +n11729478 +n11733054 +n11736694 +n11741350 +n11745817 +n11747468 +n11748002 +n11751765 +n11752578 +n11756092 +n11756669 +n11759224 +n11763625 +n11767354 +n11769621 +n11771539 +n11774513 +n11775340 +n11779300 +n11782036 +n11783920 +n11785668 +n11789438 +n11789962 +n11790788 +n11793779 +n11794519 +n11796005 +n11801392 +n11805956 +n11807108 +n11807979 +n11808721 +n11811473 +n11815491 +n11817914 +n11820965 +n11823043 +n11830714 +n11830906 +n11832214 +n11836722 +n11839568 +n11845557 +n11851578 +n11855274 +n11857696 +n11862835 +n11865071 +n11866248 +n11868814 +n11869351 +n11869689 +n11872146 +n11875691 +n11875938 +n11877473 +n11878283 +n11887119 +n11890022 +n11892637 +n11894327 +n11898639 +n11900569 +n11902709 +n11915214 +n11915658 +n11915899 +n11916467 +n11918286 +n11919447 +n11920498 +n11924445 +n11928352 +n11928858 +n11931918 +n11932745 +n11939699 +n11940006 +n11943407 +n11944196 +n11945367 +n11946727 +n11947251 +n11948264 +n11950345 +n11951511 +n11952346 +n11953884 +n11954484 +n11956850 +n11965627 +n11967744 +n11970101 +n11971248 +n11971783 +n11972759 +n11973341 +n11976170 +n11977303 +n11978233 +n11982115 +n11985053 +n11985739 +n11988893 +n11991263 +n11997032 +n11997969 +n12006766 +n12008252 +n12008749 +n12010628 +n12013511 +n12015959 +n12018760 +n12020507 +n12024176 +n12030654 +n12034141 +n12036067 +n12036939 +n12041446 +n12043444 +n12045860 +n12050959 +n12053405 +n12056217 +n12057447 +n12062468 +n12065316 +n12065777 +n12075151 +n12076577 +n12080395 +n12083591 +n12086012 +n12086539 +n12087961 +n12090890 +n12092262 +n12094244 +n12095020 +n12096395 +n12101870 +n12102133 +n12105125 +n12107970 +n12108432 +n12109827 +n12110778 +n12112008 +n12112918 +n12113657 +n12117017 +n12119099 +n12119238 +n12121033 +n12124627 +n12126360 +n12131550 +n12135898 +n12136720 +n12137120 +n12137569 +n12139575 +n12141495 +n12142085 +n12143676 +n12144313 +n12146311 +n12147226 +n12152532 +n12153580 +n12154773 +n12155583 +n12156819 +n12157056 +n12157769 +n12158031 +n12158798 +n12159055 +n12159555 +n12160490 +n12161285 +n12163035 +n12164363 +n12166424 +n12168565 +n12170585 +n12173664 +n12174311 +n12174926 +n12182049 +n12187663 +n12188289 +n12195391 +n12196129 +n12199266 +n12201580 +n12202936 +n12205694 +n12214789 +n12215579 +n12217453 +n12221191 +n12224978 +n12225349 +n12226932 +n12231192 +n12236546 +n12237486 +n12244153 +n12245695 +n12246232 +n12252168 +n12252866 +n12253229 +n12256112 +n12257570 +n12260799 +n12262553 +n12265394 +n12266217 +n12266796 +n12268246 +n12269241 +n12269652 +n12271643 +n12274630 +n12275489 +n12281241 +n12284262 +n12286826 +n12287642 +n12288823 +n12290748 +n12293723 +n12296432 +n12300840 +n12302071 +n12303462 +n12305475 +n12306717 +n12307756 +n12310349 +n12316444 +n12318378 +n12320010 +n12322501 +n12328398 +n12330469 +n12334293 +n12334891 +n12335483 +n12335664 +n12335800 +n12340383 +n12341542 +n12342299 +n12343480 +n12344283 +n12346578 +n12350758 +n12352287 +n12355760 +n12360108 +n12360684 +n12364604 +n12367611 +n12374418 +n12377198 +n12381511 +n12385429 +n12387633 +n12387839 +n12392070 +n12396924 +n12399132 +n12401335 +n12401684 +n12405714 +n12409231 +n12411461 +n12412355 +n12412606 +n12416423 +n12419037 +n12420535 +n12421467 +n12421683 +n12425281 +n12430198 +n12431434 +n12437513 +n12437769 +n12441958 +n12446200 +n12446519 +n12449296 +n12450344 +n12451915 +n12454159 +n12459629 +n12460697 +n12461466 +n12462032 +n12463743 +n12464476 +n12466727 +n12470092 +n12474167 +n12475035 +n12476510 +n12480895 +n12491826 +n12495146 +n12499163 +n12506181 +n12508309 +n12509476 +n12511856 +n12516584 +n12522188 +n12524188 +n12526516 +n12527738 +n12539074 +n12539306 +n12546183 +n12548280 +n12550210 +n12554526 +n12556656 +n12560282 +n12560775 +n12562577 +n12572546 +n12573256 +n12575322 +n12582231 +n12582665 +n12582846 +n12583126 +n12583401 +n12584191 +n12586298 +n12590232 +n12594989 +n12595964 +n12602262 +n12602980 +n12612170 +n12614477 +n12615710 +n12620196 +n12622875 +n12624381 +n12625383 +n12631331 +n12633638 +n12634211 +n12634429 +n12635744 +n12636885 +n12638218 +n12638556 +n12639736 +n12640607 +n12641413 +n12641931 +n12642200 +n12643473 +n12644902 +n12645174 +n12647376 +n12649065 +n12650556 +n12651821 +n12653218 +n12655869 +n12658118 +n12658846 +n12659356 +n12660601 +n12662772 +n12663804 +n12665048 +n12667406 +n12667964 +n12674120 +n12674685 +n12682411 +n12683407 +n12685431 +n12685831 +n12688716 +n12690653 +n12695144 +n12698435 +n12705013 +n12707781 +n12708293 +n12709901 +n12711596 +n12713063 +n12714755 +n12715914 +n12717072 +n12719684 +n12724942 +n12725521 +n12727301 +n12731401 +n12732491 +n12732756 +n12733647 +n12741222 +n12742741 +n12743823 +n12746884 +n12749049 +n12752205 +n12755225 +n12756457 +n12762896 +n12768369 +n12771192 +n12772753 +n12777436 +n12778605 +n12779603 +n12785724 +n12791064 +n12793015 +n12794985 +n12798284 +n12800586 +n12801520 +n12805146 +n12806732 +n12810595 +n12812235 +n12814643 +n12817464 +n12822769 +n12823717 +n12823859 +n12832315 +n12833985 +n12834798 +n12836212 +n12836862 +n12839979 +n12840749 +n12842302 +n12842887 +n12844939 +n12849061 +n12853080 +n12854048 +n12858150 +n12866968 +n12869478 +n12870535 +n12871272 +n12877244 +n12878169 +n12879963 +n12882779 +n12884260 +n12890265 +n12893463 +n12903367 +n12904938 +n12908645 +n12909421 +n12912670 +n12917901 +n12922763 +n12926480 +n12928071 +n12929403 +n12930778 +n12931906 +n12934036 +n12934479 +n12939104 +n12941536 +n12942395 +n12943443 +n12946849 +n12950126 +n12952165 +n12953206 +n12956170 +n12957608 +n12960378 +n12960863 +n12965626 +n12968136 +n12969131 +n12970193 +n12971400 +n12973791 +n12974987 +n12976198 +n12980840 +n12982468 +n12983961 +n12985773 +n12987056 +n12988158 +n12992868 +n12997654 +n12997919 +n13000891 +n13001041 +n13001206 +n13001366 +n13001529 +n13002750 +n13002925 +n13003061 +n13003254 +n13003522 +n13003712 +n13004423 +n13004640 +n13004826 +n13004992 +n13005329 +n13005984 +n13006171 +n13006631 +n13006894 +n13007417 +n13007629 +n13008157 +n13008315 +n13008485 +n13008689 +n13008839 +n13009085 +n13009244 +n13009429 +n13009656 +n13010694 +n13010951 +n13011221 +n13012253 +n13012469 +n13012973 +n13013534 +n13013764 +n13013965 +n13014097 +n13014265 +n13014409 +n13014581 +n13014741 +n13014879 +n13017102 +n13017240 +n13017439 +n13017610 +n13017789 +n13017979 +n13018088 +n13018232 +n13018407 +n13019496 +n13019643 +n13019835 +n13020191 +n13020481 +n13020964 +n13021166 +n13021332 +n13021543 +n13021689 +n13021867 +n13022210 +n13022709 +n13024012 +n13024500 +n13025647 +n13028611 +n13032115 +n13032923 +n13035241 +n13035389 +n13035707 +n13037585 +n13037805 +n13038068 +n13038376 +n13038744 +n13039349 +n13040629 +n13040796 +n13041312 +n13042982 +n13043926 +n13045210 +n13045975 +n13046130 +n13049953 +n13055423 +n13055577 +n13055792 +n13055949 +n13056135 +n13056349 +n13056607 +n13056799 +n13057054 +n13057242 +n13057422 +n13057639 +n13058037 +n13058272 +n13058608 +n13059298 +n13059657 +n13060017 +n13060190 +n13063269 +n13066129 +n13067191 +n13068917 +n13070308 +n13070875 +n13071371 +n13071553 +n13071815 +n13072031 +n13072209 +n13072350 +n13072528 +n13072706 +n13072863 +n13073055 +n13073703 +n13074619 +n13074814 +n13075020 +n13075272 +n13075441 +n13075684 +n13075847 +n13076041 +n13076405 +n13076643 +n13076831 +n13077033 +n13077295 +n13079419 +n13083023 +n13084184 +n13085113 +n13091620 +n13091774 +n13100156 +n13100677 +n13104059 +n13108131 +n13108662 +n13108841 +n13109733 +n13110915 +n13111174 +n13111881 +n13118707 +n13119870 +n13120211 +n13121104 +n13122364 +n13123431 +n13125117 +n13130161 +n13130726 +n13132034 +n13132338 +n13132486 +n13132940 +n13134302 +n13134947 +n13135832 +n13136316 +n13136556 +n13137409 +n13137672 +n13138308 +n13138658 +n13138842 +n13139055 +n13139647 +n13141141 +n13145444 +n13149296 +n13150894 +n13154841 +n13156986 +n13157137 +n13160831 +n13163991 +n13172923 +n13174670 +n13177529 +n13180534 +n13186388 +n13188096 +n13188268 +n13192625 +n13193642 +n13194572 +n13195761 +n13199970 +n13201969 +n13206817 +n13207736 +n13208705 +n13211790 +n13219422 +n13221529 +n13224673 +n13230662 +n13231678 +n13231919 +n13232106 +n13232363 +n13232779 +n13233727 +n13238375 +n13238988 +n13252672 +n13862780 +n13863186 +n13863473 +n13863771 +n13864153 +n13864965 +n13865298 +n13865483 +n13866144 +n13866827 +n13867492 +n13868248 +n13868371 +n13872592 +n13873502 +n13875392 +n13875571 +n13878306 +n13879320 +n13883603 +n13888491 +n13893786 +n13894434 +n13896100 +n13897996 +n13900287 +n13903079 +n13905121 +n13905275 +n13905792 +n13912260 +n13915999 +n14633206 +n14696793 +n14844693 +n14853210 +n14899328 +n14900184 +n14974264 +n14977504 +n14992287 +n15062057 +n15067877 +n15089258 +n15089472 +n15089645 +n15089803 +n15090742 +n15092409 +n15092751 \ No newline at end of file diff --git a/workloads/realworld/pipeline/darknet/cfg/imagenet.shortnames.list b/workloads/realworld/pipeline/darknet/cfg/imagenet.shortnames.list new file mode 100644 index 0000000000000000000000000000000000000000..e7a18d44b543086958eaf60e6dc0b7deb0df9400 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/imagenet.shortnames.list @@ -0,0 +1,21842 @@ +kit fox +English setter +Siberian husky +Australian terrier +English springer +grey whale +lesser panda +Egyptian cat +ibex +Persian cat +cougar +gazelle +porcupine +sea lion +malamute +badger +Great Dane +Walker hound +Welsh springer spaniel +whippet +Scottish deerhound +killer whale +mink +African elephant +Weimaraner +soft-coated wheaten terrier +Dandie Dinmont +red wolf +Old English sheepdog +jaguar +otterhound +bloodhound +Airedale +hyena +meerkat +giant schnauzer +titi +three-toed sloth +sorrel +black-footed ferret +dalmatian +black-and-tan coonhound +papillon +skunk +Staffordshire bullterrier +Mexican hairless +Bouvier des Flandres +weasel +miniature poodle +Cardigan +malinois +bighorn +fox squirrel +colobus +tiger cat +Lhasa +impala +coyote +Yorkshire terrier +Newfoundland +brown bear +red fox +Norwegian elkhound +Rottweiler +hartebeest +Saluki +grey fox +schipperke +Pekinese +Brabancon griffon +West Highland white terrier +Sealyham terrier +guenon +mongoose +indri +tiger +Irish wolfhound +wild boar +EntleBucher +zebra +ram +French bulldog +orangutan +basenji +leopard +Bernese mountain dog +Maltese dog +Norfolk terrier +toy terrier +vizsla +cairn +squirrel monkey +groenendael +clumber +Siamese cat +chimpanzee +komondor +Afghan hound +Japanese spaniel +proboscis monkey +guinea pig +white wolf +ice bear +gorilla +borzoi +toy poodle +Kerry blue terrier +ox +Scotch terrier +Tibetan mastiff +spider monkey +Doberman +Boston bull +Greater Swiss Mountain dog +Appenzeller +Shih-Tzu +Irish water spaniel +Pomeranian +Bedlington terrier +warthog +Arabian camel +siamang +miniature schnauzer +collie +golden retriever +Irish terrier +affenpinscher +Border collie +hare +boxer +silky terrier +beagle +Leonberg +German short-haired pointer +patas +dhole +baboon +macaque +Chesapeake Bay retriever +bull mastiff +kuvasz +capuchin +pug +curly-coated retriever +Norwich terrier +flat-coated retriever +hog +keeshond +Eskimo dog +Brittany spaniel +standard poodle +Lakeland terrier +snow leopard +Gordon setter +dingo +standard schnauzer +hamster +Tibetan terrier +Arctic fox +wire-haired fox terrier +basset +water buffalo +American black bear +Angora +bison +howler monkey +hippopotamus +chow +giant panda +American Staffordshire terrier +Shetland sheepdog +Great Pyrenees +Chihuahua +tabby +marmoset +Labrador retriever +Saint Bernard +armadillo +Samoyed +bluetick +redbone +polecat +marmot +kelpie +gibbon +llama +miniature pinscher +wood rabbit +Italian greyhound +lion +cocker spaniel +Irish setter +dugong +Indian elephant +beaver +Sussex spaniel +Pembroke +Blenheim spaniel +Madagascar cat +Rhodesian ridgeback +lynx +African hunting dog +langur +Ibizan hound +timber wolf +cheetah +English foxhound +briard +sloth bear +Border terrier +German shepherd +otter +koala +tusker +echidna +wallaby +platypus +wombat +revolver +umbrella +schooner +soccer ball +accordion +ant +starfish +chambered nautilus +grand piano +laptop +strawberry +airliner +warplane +airship +balloon +space shuttle +fireboat +gondola +speedboat +lifeboat +canoe +yawl +catamaran +trimaran +container ship +liner +pirate +aircraft carrier +submarine +wreck +half track +tank +missile +bobsled +dogsled +bicycle-built-for-two +mountain bike +freight car +passenger car +barrow +shopping cart +motor scooter +forklift +electric locomotive +steam locomotive +amphibian +ambulance +beach wagon +cab +convertible +jeep +limousine +minivan +Model T +racer +sports car +go-kart +golfcart +moped +snowplow +fire engine +garbage truck +pickup +tow truck +trailer truck +moving van +police van +recreational vehicle +streetcar +snowmobile +tractor +mobile home +tricycle +unicycle +horse cart +jinrikisha +oxcart +bassinet +cradle +crib +four-poster +bookcase +china cabinet +medicine chest +chiffonier +table lamp +file +park bench +barber chair +throne +folding chair +rocking chair +studio couch +toilet seat +desk +pool table +dining table +entertainment center +wardrobe +Granny Smith +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +acorn +hip +ear +rapeseed +corn +buckeye +organ +upright +chime +drum +gong +maraca +marimba +steel drum +banjo +cello +violin +harp +acoustic guitar +electric guitar +cornet +French horn +trombone +harmonica +ocarina +panpipe +bassoon +oboe +sax +flute +daisy +yellow lady's slipper +cliff +valley +alp +volcano +promontory +sandbar +coral reef +lakeside +seashore +geyser +hatchet +cleaver +letter opener +plane +power drill +lawn mower +hammer +corkscrew +can opener +plunger +screwdriver +shovel +plow +chain saw +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +white stork +black stork +spoonbill +flamingo +American egret +little blue heron +bittern +crane +limpkin +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +European gallinule +pelican +king penguin +albatross +great white shark +tiger shark +hammerhead +electric ray +stingray +barracouta +coho +tench +goldfish +eel +rock beauty +anemone fish +lionfish +puffer +sturgeon +gar +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +triceratops +African crocodile +American alligator +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +whistle +wing +paintbrush +hand blower +oxygen mask +snorkel +loudspeaker +microphone +screen +mouse +electric fan +oil filter +strainer +space heater +stove +guillotine +barometer +rule +odometer +scale +analog clock +digital clock +wall clock +hourglass +sundial +parking meter +stopwatch +digital watch +stethoscope +syringe +magnetic compass +binoculars +projector +sunglasses +loupe +radio telescope +bow +cannon +assault rifle +rifle +projectile +computer keyboard +typewriter keyboard +crane +lighter +abacus +cash machine +slide rule +desktop computer +hand-held computer +notebook +web site +harvester +thresher +printer +slot +vending machine +sewing machine +joystick +switch +hook +car wheel +paddlewheel +pinwheel +potter's wheel +gas pump +carousel +swing +reel +radiator +puck +hard disc +sunglass +pick +car mirror +solar dish +remote control +disk brake +buckle +hair slide +knot +combination lock +padlock +nail +safety pin +screw +muzzle +seat belt +ski +candle +jack-o'-lantern +spotlight +torch +neck brace +pier +tripod +maypole +mousetrap +spider web +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +isopod +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +sea urchin +sea cucumber +iron +espresso maker +microwave +Dutch oven +rotisserie +toaster +waffle iron +vacuum +dishwasher +refrigerator +washer +Crock Pot +frying pan +wok +caldron +coffeepot +teapot +spatula +altar +triumphal arch +patio +steel arch bridge +suspension bridge +viaduct +barn +greenhouse +palace +monastery +library +apiary +boathouse +church +mosque +stupa +planetarium +restaurant +cinema +home theater +lumbermill +coil +obelisk +totem pole +castle +prison +grocery store +bakery +barbershop +bookshop +butcher shop +confectionery +shoe shop +tobacco shop +toyshop +fountain +cliff dwelling +yurt +dock +brass +megalith +bannister +breakwater +dam +chainlink fence +picket fence +worm fence +stone wall +grille +sliding door +turnstile +mountain tent +scoreboard +honeycomb +plate rack +pedestal +beacon +mashed potato +bell pepper +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +cardoon +mushroom +shower curtain +jean +carton +handkerchief +sandal +ashcan +safe +plate +necklace +croquet ball +fur coat +thimble +pajama +running shoe +cocktail shaker +chest +manhole cover +modem +tub +tray +balance beam +bagel +prayer rug +kimono +hot pot +whiskey jug +knee pad +book jacket +spindle +ski mask +beer bottle +crash helmet +bottlecap +tile roof +mask +maillot +Petri dish +football helmet +bathing cap +teddy +holster +pop bottle +photocopier +vestment +crossword puzzle +golf ball +trifle +suit +water tower +feather boa +cloak +red wine +drumstick +shield +Christmas stocking +hoopskirt +menu +stage +bonnet +meat loaf +baseball +face powder +scabbard +sunscreen +beer glass +hen-of-the-woods +guacamole +lampshade +wool +hay +bow tie +mailbag +water jug +bucket +dishrag +soup bowl +eggnog +mortar +trench coat +paddle +chain +swab +mixing bowl +potpie +wine bottle +shoji +bulletproof vest +drilling platform +binder +cardigan +sweatshirt +pot +birdhouse +hamper +ping-pong ball +pencil box +pay-phone +consomme +apron +punching bag +backpack +groom +bearskin +pencil sharpener +broom +mosquito net +abaya +mortarboard +poncho +crutch +Polaroid camera +space bar +cup +racket +traffic light +quill +radio +dough +cuirass +military uniform +lipstick +shower cap +monitor +oscilloscope +mitten +brassiere +French loaf +vase +milk can +rugby ball +paper towel +earthstar +envelope +miniskirt +cowboy hat +trolleybus +perfume +bathtub +hotdog +coral fungus +bullet train +pillow +toilet tissue +cassette +carpenter's kit +ladle +stinkhorn +lotion +hair spray +academic gown +dome +crate +wig +burrito +pill bottle +chain mail +theater curtain +window shade +barrel +washbasin +ballpoint +basketball +bath towel +cowboy boot +gown +window screen +agaric +cellular telephone +nipple +barbell +mailbox +lab coat +fire screen +minibus +packet +maze +pole +horizontal bar +sombrero +pickelhaube +rain barrel +wallet +cassette player +comic book +piggy bank +street sign +bell cote +fountain pen +Windsor tie +volleyball +overskirt +sarong +purse +bolo tie +bib +parachute +sleeping bag +television +swimming trunks +measuring cup +espresso +pizza +breastplate +shopping basket +wooden spoon +saltshaker +chocolate sauce +ballplayer +goblet +gyromitra +stretcher +water bottle +dial telephone +soap dispenser +jersey +school bus +jigsaw puzzle +plastic bag +reflex camera +diaper +Band Aid +ice lolly +velvet +tennis ball +gasmask +doormat +Loafer +ice cream +pretzel +quilt +maillot +tape player +clog +iPod +bolete +scuba diver +pitcher +matchstick +bikini +sock +CD player +lens cap +thatch +vault +beaker +bubble +cheeseburger +parallel bars +flagpole +coffee mug +rubber eraser +stole +carbonara +dumbbell +singles +Virginia deer +eastern grey squirrel +gelding +pylon +table-tennis table +peacock +Segway +surfing +tamandua +knocker +steering wheel +motorcycling +coati +sitar +range +backhoe +agaric +dashboard +water polo +concrete mixer +treadmill +golf bag +skateboarding +royal tennis +tartan +four-wheel drive +sport utility +sedan +print +luggage rack +softball +windmill +ben +red admiral +jalousie +towel rail +truss +strand +ice hockey +sconce +wind turbine +plush +stained-glass window +ballpark +thoroughbred +love seat +red-spotted purple +miller +Adelie +freight liner +clock tower +acrobatics +shaving brush +ewe +ottoman +African violet +bicycle wheel +cork +windmill +satin +comma +coffee mill +baggage +wasp's nest +batting glove +Ferris wheel +push-bike +porthole +football stadium +gas tank +barbecue +handlebar +hula-hoop +fairground +rapier +garter stitch +exercise bike +control tower +carryall +minute hand +cog +riverbank +water nymph +common dandelion +android +hairbrush +redberry +fret +display window +pepper mill +litterbin +drapery +ducking +fly-fishing +broad jump +sprinkler +water-skiing +chicory +sail +volleyball +rugby +Texas bluebonnet +computer monitor +tortoiseshell +airplane propeller +solar array +figure skating +air conditioner +purple loosestrife +gearshift +outboard motor +cowslip +Abyssinian +dip +workstation +cosy +bunker +neon lamp +campanile +casket +verbena +amphora +sumo +common foxglove +sprocket +jelly bean +emperor penguin +night-blooming cereus +clock radio +black birch +bomber jacket +Virginia bluebell +bayonet +walker +altarpiece +tattoo +bridle +rocker arm +water turkey +spiderwort +flange +mute swan +laser printer +carburetor +coverlet +mountainside +baritone +auto racing +baluster +gal +peach bells +taffeta +grandfather +asparagus +horizontal stabilizer +world +grate +marsh marigold +white rhinoceros +movement +split rail +rollerblading +longhorn +muffler +church tower +light bulb +American agave +backpacking tent +overall +New World goldfinch +sectional +wing chair +transom +integrated circuit +dad +spar +picture frame +no-hit game +alternator +drill press +strawflower +hepatica +rangefinder +blinker +Welsh pony +nib +wagon wheel +rotor +tie +denim +jetliner +sculling +external drive +window frame +mourning dove +censer +stapler +batting helmet +flagon +machete +windshield +hedgehog +weeping willow +chief executive officer +hepatica +pet +Asiatic black bear +chinchilla +uke +Atlantic bottlenose dolphin +hair +dishtowel +flintlock +Bermuda shorts +lavender +searchlight +millwheel +piano keyboard +luna moth +bumper +parrot +skirt +manhole +coffee table +footstool +judo +Dalai Lama +armored personnel carrier +voile +saber +thoroughbred +wild carrot +gemsbok +caster +butterfly orchid +cow +sideboard +horseshoe crab +match play +cassette recorder +photomicrograph +drafting table +pediment +tramline +shipping +kitten +wainscoting +fried rice +helix +marguerite +pumpkin +white-bellied swallow +Tulipa gesneriana +common dolphin +face +red squirrel +bicycling +shipwreck +banded purple +cornice +pendant earring +forsythia +aardvark +seashell +spat +shoulder bag +fallow deer +yearling +common teasel +tufted titmouse +ancient +professional golf +purl +vehicle +okra +great grandmother +common lilac +rose mallow +newspaper +crucifix +chukka +armlet +fulmar +wapiti +doily +Greco-Roman wrestling +bleeding heart +kitchen table +bluebonnet +Cape buffalo +spun yarn +crape myrtle +dewdrop +great blue heron +medalist +vaulting horse +spinning wheel +skyscraper +Tahitian +forget-me-not +watercourse +guitarist +gargoyle +bee balm +pumpkin +hunting knife +flutist +lectern +skateboarder +foil +pant leg +hedge sparrow +dresser +automatic pistol +chicory +dialog box +chamberpot +black rhinoceros +fireweed +half-mast +pillow sham +pavilion +scarf joint +microprocessor +filly +dressing gown +shell +Arabian +child +radio antenna +butterweed +morris dancer +sparrow hawk +groom +brioche +floret +rainbow +earthworm +cellist +tine +toupee +balldress +map +angel's trumpet +ruin +fur +pronghorn +speed skating +used-car +stick +early spider orchid +stuffed peppers +snowdrift +flats +least sandpiper +stick +console table +ventilator +portable +kepi +pylon +viceroy +shoreline +Olympian Zeus +pestle +great-niece +life +air compressor +fanjet +scuba diving +fieldfare +tree swallow +personnel carrier +night-blooming cereus +sonogram +assembly hall +circuit breaker +chair +speed skate +soapwort +worsted +raspberry +burlap +flat panel display +Pyracantha +cemetery +turban +deer hunting +bottle green +dandelion green +pieta +aigrette +turntable +cover girl +clutch bag +kiwi +pea jacket +color guard +Malay +shire +crock +french fries +credenza +hockey stick +mourning cloak +potty seat +glass +balsamroot +medal play +red clover +gravy boat +garter belt +Guinness +meadow buttercup +jackass penguin +coursing +tooth +hawfinch +housetop +fluorescent lamp +black-backed gull +bookshelf +earplug +millipede +fawn +baseball bat +soup-strainer +organ loft +bugloss +tomahawk +blackcap +black-necked stilt +hand truck +bedstead +tempura +rose window +crimson +snow thrower +lesser whitethroat +palomino +ball +staff sergeant +wicker +garbage heap +great-nephew +parquet +coupe +nave +eggs Benedict +damask +flush toilet +Angora +pedometer +control room +bristle brush +kookaburra +telephone booth +Windsor chair +red-winged blackbird +cinnamon roll +briefs +cloister +sundress +mammillaria +unicyclist +covered bridge +coelogyne +fairy bluebird +phoebe +beer mug +headstock +parhelion +gorse +common European dogwood +fire-eater +professional football +rock climbing +cyclamen +tin +marjoram +Japanese morning glory +pipe +smasher +hang glider +abutment +birdbath +jotter +litter +artist's model +butterfly bush +dining area +sausage dog +piggery +English sparrow +Turk's-cap +platinum blond +song sparrow +alarm clock +tortoiseshell +chaise longue +flintlock +academic costume +graffito +Arnica montana +adding machine +waterside +director +jonquil +pipefitting +stud +Swedish meatball +musk rose +Venus's flytrap +raven +bougainvillea +little brother +field bindweed +finder +white admiral +tinfoil +serval +sheet +carthorse +people +potto +stockroom +sphinx +slate roof +mountain laurel +majolica +coal black +repository +bufo +pique +binder +tread +attorney general +hydraulic press +videocassette recorder +bumper car +professional baseball +cow parsley +ern +blue peafowl +common hyacinth +jack-in-the-pulpit +ice hockey rink +sport +camper +tailback +flash +stacks +pulp +Christmas cactus +netball +calliandra +curler +large periwinkle +cobweb +forward +Roman arch +cross bun +stoneware +banana bread +cape jasmine +settle +tongue +frock +pepper shaker +pitching coach +CD-R +casing +faience +hand cream +CD-ROM +recliner +striped bass +clary +sketch +risotto +reticle +white clover +touch football +kitty +great-aunt +Japanese maple +sidecar +muscovy duck +hack +rope bridge +organist +stinging nettle +pocket watch +Indian pipe +amorphophallus +bird's-foot violet +caller ID +furnishing +carriageway +dish rack +heiress +nail polish +beldam +Dall sheep +teriyaki +stateroom +laughing gull +chow +bookmark +timer +toga virilis +deviled egg +coltsfoot +Papuan +native +cygnet +automation +portfolio +cabbage palm +cube +broiler +radish +broodmare +castor-oil plant +pith hat +talus +lass +thatch +common marigold +young buck +igloo +prairie rattlesnake +soccer player +spoke +place +slide fastener +tapestry +toy +headboard +cross-country skiing +harness +sconce +rim +ballet skirt +transvestite +saddlebag +common evening primrose +taillight +challah +willet +ready-to-wear +cloud +answering machine +waterfront +vane +granddaughter +Chinese gooseberry +tureen +cab +truffle +viola +bootlace +chemise +taro +petal +candied apple +soccer +miniature golf +front porch +asparagus +Sauvignon blanc +daisy fleabane +ceiling +slip-on +bottle-nosed whale +redbud +black squirrel +snowsuit +ribbing +gravestone +creme brulee +ambassador +local +archery +love-in-a-mist +garbage +thyme +night-blooming cereus +goshawk +cuckoopint +azure +German iris +salad bowl +puppy +cockhorse +giant clam +biplane +stele +necklet +sea otter +crest +door +reformer +comforter +Byelorussian +bottle +hemline +book bag +leotard +owlet +spoon +sari +bidet +Latin +reticulated python +bowling shoe +futon +gaiter +coypu +tea urn +waders +bangle +snowbank +pencil +porter +azalea +English lavender +red spruce +team sport +cruet +high-rise +O ring +vodka +cormorant +Canada thistle +clasp +showjumping +rattan +red fox +sun parlor +Charolais +Tommy gun +bird's foot trefoil +sedge warbler +knot +chives +car tire +steam engine +adapter +spirea +common allamanda +oyster shell +harbor seal +baobab +wick +plumbago +downy woodpecker +coconut +leash +kasbah +hour hand +upholstery +mallard +cricket bat +lady +kitchenware +right-hander +leopard +olive green +common valerian +blue whale +blackboard +redhead +periwinkle +fingerboard +hard hat +locker +breakfast table +capybara +beekeeper +harness +feeder +water hyacinth +hexapod +brown thrasher +percale +lever +patriarch +arete +book +book +senator +bunya bunya +couch +durian +common lady's-slipper +mountain ash +golden barrel cactus +bicycle seat +beret +pop +musk mallow +manatee +cotton candy +boxing glove +backboard +tongue +saguaro +playground +capitol +sanderling +wagtail +deputy +tractor +tap +lady's smock +noseband +worsted +radiotelephone +camisole +forelock +muscat +sweet scabious +crane fly +butterfly weed +chestnut +pinata +inositol +borage +aquatic +belly +broadcaster +gondolier +egg yolk +blush wine +bufflehead +rambutan +oleander +horse-trail +sea holly +yard bird +conference room +lacrosse +belted kingfisher +defile +extremum +whistle +bear cub +grainfield +potage +watermelon +lasagna +sheik +Cooper's hawk +bulb +basketball court +paella +cassette tape +scatter rug +kid +impala lily +Minnesotan +Sudanese +chocolate +tail +quack-quack +whistling swan +shoulder patch +frozen custard +sumo wrestler +smoothie +bock +meat grinder +latch +palisade +radial +sake +kestrel +corn chowder +airframe +electrician +reamer +metropolitan +cotton flannel +cassowary +crossbill +operating room +winter aconite +flute +Tasmanian devil +billboard +suds +kilt +aperitif +cooling tower +avocado +hooded merganser +coleslaw +bee balm +ladder-back +insurance broker +scaffolding +polo mallet +double bed +two-hitter +bluff +gamboge +baby +lawn chair +frond +pistol grip +fancy dress +marquetry +jambalaya +fireweed +Eurasian kingfisher +cue ball +ice plant +horseweed +rose moss +musher +sun +viscount +white-breasted nuthatch +gin and tonic +thermos +Kenyan +first-aid kit +four-wheeler +tourist +stairwell +Gambian +liqueur glass +hovercraft +cocktail dress +twin +coriander +blister pack +Barrow's goldeneye +canteen +irrigation ditch +great white heron +tree sparrow +canal boat +lens +food processor +common raccoon +Baltimore oriole +black-eyed Susan +bush hibiscus +corolla +sire +mustachio +professional wrestling +elk +clustered bellflower +pannier +musk ox +crapaud +animal trainer +rosebud +ring-necked pheasant +little egret +cappuccino +rocker +bristlecone pine +cheerleader +hedge violet +semaphore +central processing unit +speedskater +delivery truck +assembly +hedgehog cactus +bergenia +bull thistle +bladder campion +cinquefoil +inula +cellulose tape +main rotor +bootee +autogiro +ice +grey +meadow cranesbill +hummus +valise +chassis +mountain goat +blacktail prairie dog +Chardonnay +romper +street +shoveler +wood ibis +topiary +chalice +silo +circus acrobat +Rollerblade +cosmos +woof +heroine +cold cream +marabou +herb robert +garden lettuce +nymph +floor lamp +automobile engine +heel +radiator +seeded player +fedora +father-in-law +peahen +Bahamian +wiper +wood pigeon +barn owl +pegboard +chorus frog +kin +roller skate +stob +rosemary +cowbird +hortensia +cranberry sauce +shot glass +Dixie cup +gnu +fire alarm +diet +booster +oxeye daisy +twayblade +high-definition television +truss bridge +bunk bed +mule +blackbuck +facsimile +frog orchid +point-and-shoot camera +brocade +gazebo +prairie gentian +concert +paintball +Cognac +maid +afghan +barbecued spareribs +pintail +tramway +commissioner +finger-painting +beef stew +caftan +Aberdeen Angus +demonstrator +sea trout +pigtail +thrush nightingale +barbados cherry +sashimi +ridgeling +lamppost +gabardine +red-shouldered hawk +bath salts +cavern +cymbid +Haitian +boater +southern buckthorn +arctic +motorcycle cop +red gum +Clydesdale +Zamboni +beagling +villa +demitasse +Sheetrock +lollipop +hybrid petunia +post horse +carabiner +brussels sprouts +Durham +stylist +pothole +sleigh bed +scallop shell +harrier eagle +papaya +Japanese persimmon +sachet +wild rice +chipboard +gun enclosure +menorah +chinook +headset +white campion +ocean +Secretary of State +G-string +bone china +basil +greenish blue +camcorder +concrete +screech owl +trumpet honeysuckle +flugelhorn +layette +cattle egret +case knife +mandarin duck +robber fly +salwar +dressing table +doughnut +facade +runner +honeypot +surf casting +diver +angel's trumpet +spin dryer +chameleon +wand +snow +vitamin A1 +manageress +volleyball net +antiperspirant +street clothes +tree sparrow +cords +sundew +bricks and mortar +caryatid +bridesmaid +trestle bridge +eyepiece +celebrant +scarlet pimpernel +gas range +onion +green salad +squill +creepy-crawly +hunk +little owl +salad nicoise +earflap +bird feeder +spray gun +bunny +Cheops +amazon +blue tit +Nissen hut +Kalashnikov +skylark +kremlin +shoebill +shopping bag +frigate bird +telephoto lens +peplum +moss pink +echidna +wastepaper basket +wood ibis +workroom +ankle brace +telpherage +Michaelmas daisy +figure skate +swami +nylons +cardoon +cocotte +headstall +twin bed +parsley +dirndl +corn poppy +nut bread +cloche +light heavyweight +mayor +lip-gloss +punch bowl +pottage +mango +fledgling +mousse +four-wheel drive +barrel +banana boat +trouser +bathroom +Sauterne +ring +settee +lavaliere +safe-deposit +godson +leatherette +schoolmate +radish +hedge trimmer +dahlia +euphonium +palace +vaulter +singlet +slicer +Pilsner +cockateel +kangaroo paw +Cub Scout +master bedroom +hexagon +cenotaph +Barberton daisy +Netherlander +intersection +Korean +gravel +chandelier +hospital bed +flash memory +pier +whole wheat flour +maroon +pale ale +special +snow bunting +crinoline +dustpan +barrette +common wood sorrel +yolk +pothos +speakerphone +tendril +cabinetwork +farm horse +brake disk +streetlight +superhighway +bandsaw +panting +pressure cooker +girdle +old man +cereal bowl +felt +hurling +architecture +harmonium +chain +blueberry +cellar +smocking +scrub brush +tablespoon +sweet corn +graining +library +street +bill +felt-tip pen +monkshood +crowd +log cabin +newel post +hack +elephant seal +golden pothos +popcorn +outhouse +patch pocket +fish and chips +tape +wax plant +eaves +fried egg +emerald +tea cart +fan blade +daily +Bowie knife +rowing boat +leaf shape +man +crayon +trumpetfish +chipping sparrow +whiskey bottle +pillion +city hall +golden pheasant +cheerleader +creeping bugle +couch +Dumpster +Homo sapiens sapiens +cranberry juice +cockpit +demagogue +joinery +scrambled eggs +technician +sidewalk +sheep +keyhole +power line +polyanthus +roulette +first lieutenant +checkout +tabletop +nasturtium +schnapps +engineering +skateboard +ground fir +bouquet +bunk +resort area +fleur-de-lis +power steering +opera +Bolivian +Friesian +buckskins +bay +slider +frozen yogurt +cabin cruiser +saunterer +lean-to +fishing eagle +bog star +cantaloupe +mouth +music stand +fiddlestick +brilliantine +pinball machine +bairn +barred owl +bath oil +signorina +Mason jar +nymph +rubber band +garden nasturtium +razorbill +Japanese beetle +batting cage +trestle +borage +Secretary of the Interior +scanner +baguet +baseball cap +chow mein +pen +jewelweed +barbet +chasm +pectoral sandpiper +holster +glasses case +sand +crevice +Kickapoo +snowboard +locket +satchel +tankard +alpinist +moorhen +cow pen +whooper +crown +chain +silversword +wild geranium +hi-fi +Tibetan +waterwheel +bee orchid +ruby-crowned kinglet +common broom +tabloid +javelin +sauna +klammath weed +zebra finch +spider orchid +velour +chiffon +lecture room +barrel +loggia +millstone +flatlet +soupspoon +econometrician +golf-club head +daphnia +parlor +fire-eater +juggler +attache case +hay bale +kisser +knitting needle +news magazine +flatbed +Senegalese +trumpeter +trampoline +brogan +bone +caftan +lobster pot +gazpacho +anthill +ramekin +mainsail +penitentiary +spotted flycatcher +cookstove +root beer +broom beard grass +pogo stick +plywood +epee +gas oven +Global Positioning System +sweet false chamomile +breakfast area +bullring +second cousin +wave +decolletage +rodeo +won ton +swastika +bobby pin +papaw +retaining wall +Muscadet +heavyweight +energizer +banner +amusement park +whinchat +drugstore +waxwork +meander +congee +heat sink +switch grass +commuter +peony +western white pine +wild raspberry +nightgown +saute +cardinal +claret +pollinator +biryani +pina colada +cassette deck +European sandpiper +block +flan +birdcage +baby +lieutenant colonel +ticking +European white lily +dog violet +coat hanger +premature baby +organza +string bean +balloonist +hurricane deck +window box +hang glider +bullfighting +piste +seahorse +hard cider +batik +common mullein +petite marmite +stuffed mushroom +tequila +ground ivy +fountain grass +stray +putter +buffer +comet +bomber +woodcarving +baseball glove +halter +garnish +selvage +megaphone +sea fan +rabbit hutch +very important person +analog watch +long-head coneflower +northern pike +roll-on +cigarette butt +terraced house +penknife +windshield wiper +cricket +straightener +snow pea +cockerel +canister +sour bread +recovery room +toilet bowl +tyrannosaur +big sister +quartz battery +television receiver +vitamin C +tailpipe +field thistle +stonechat +col +monstrance +gift wrapping +herbivore +quarter horse +ice-cream sundae +rumpus room +eyepatch +clary sage +French lavender +snorkel +choir +tent-fly +cat box +horse racing +high priest +barrel cactus +pin oak +wild thyme +keyboardist +raiser +hammock +hail +bungee +chocolate mousse +major +buzzard +gopher tortoise +Chablis +water meter +benthos +donna +blender +Mauser +avocet +rye +mulch +chancel +dusty miller +mate +corbel +minaret +frittata +French toast +mosaic +home brew +water faucet +beard +swivel chair +acropolis +largemouth +abbey +tabby +driver +copperhead +stirrup +Boston fern +Tennessee walker +artichoke +honor guard +chapatti +enchantress +sweat pants +electric organ +column +dry vermouth +range hood +Red Delicious +rape +splint +catapult +gourd +antipasto +plaza +carnation +star +wood anemone +English primrose +male fern +boot +atrium +Japanese deer +carnivore +yearling +doe +guelder rose +chicory +stretch pants +ice-cream cake +frogfish +tarpaulin +chicken soup +balaclava +tor +feverfew +three-hitter +flyweight +aqua vitae +locker room +wether +teacup +wide-angle lens +hook +ladder-back +osprey +awning +wedding +chest protector +pooch +rose mallow +orange daisy +fondant +envelope +duckling +blackberry +goosander +snorkeling +philatelist +broad bean +Frank +bok choy +basket +absinth +cayenne +blackbird +bottled water +trooper +timber +stable +chestnut +tomatillo +bell +banquet +rainbow trout +macrame +appointee +heart +chipmunk +purple clematis +safety bicycle +shuttle bus +Japanese black pine +lentil soup +downhill +field mustard +brass +hand-me-down +greater yellowlegs +fanny pack +croquet mallet +hip roof +duffel bag +Ritz +document +pie plant +staff member +lifeguard +white-throated sparrow +Cameroonian +hydrofoil +platter +common ageratum +middleweight +chairlift +brunch +pharmacist +lemon +driveshaft +green snake +lip +London plane +mangrove +crystal +siskin +common jasmine +hollandaise +villa +cross-country riding +mother-in-law's tongue +generator +Tanzanian +whisk +seeder +ashtray +griddle +evening bag +bluebird +bran muffin +square dancer +luggage compartment +tropical pitcher plant +autofocus +tape drive +silencer +Hawaiian guitar +swamp sparrow +Zimbabwean +drawing room +weekender +liparis +streambed +samosa +hitter +water heater +tidal basin +ossuary +dik-dik +camouflage +fiance +Jordanian +rolling pin +slingback +turret +hen +jennet +playpen +woodhewer +bushing +church bell +bear grass +double knit +tennis pro +Joe-Pye weed +pave +pochard +painted beauty +crinoline +gumbo +trestle table +schnitzel +balloon flower +Turkish coffee +extension cord +wireless local area network +sluice +umbel +microeconomist +sky +aisle +commander in chief +hydroplane racing +poll +Coca Cola +fuel injection +bird pepper +monkey puzzle +English muffin +riverbed +varietal +kachina +airport +saltwort +oolong +red-hot poker +mihrab +cocoa +jersey +Walkman +syndic +Hessian boot +millstone +carpenter +outfall +curbstone +mocha +field pansy +patriarch +slacks +switchblade +killdeer +whelk +pampas grass +racquetball +platform bed +Indian rhinoceros +Japanese iris +blacktop +dinner jacket +stud +jodhpurs +telephone pole +business district +kurta +basil +handset +file folder +gloriosa +orphan +cantle +cookie sheet +cafe au lait +drawbridge +hill myna +Western diamondback +watch case +cardcase +bowling alley +mattress cover +canvasback +pompadour +cornice +matador +cigar cutter +skunk cabbage +baptismal font +bitters +refectory +egg +parula warbler +tiger lily +field house +nanny +skin-diver +soda water +lymphocyte +carport +chocolate fudge +amphitheater +sugar candy +sea hare +open-face sandwich +dessert spoon +staple gun +envelope +worker bee +general +garment bag +maypop +autobahn +Atlantic puffin +polo shirt +Humvee +spice rack +grotto +banderillero +gaillardia +black-crowned night heron +oboist +weigela +Dictaphone +dwarf iris +marsh mallow +yarrow +eccentric +catsup +jade green +mistress +henbit +beachwear +head +commuter +strawberry tree +chickpea +clothespin +fleabane +brussels sprout +winter melon +Laconian +great horned owl +caricaturist +nan +flowerbed +triple sec +dairy +round of golf +cardinal +kauri +Zulu +Armagnac +cowberry +mouthpiece +wild calla +bling +puppeteer +beer drinker +adder +field sparrow +chocolate pudding +blacksmith +finback +Shetland pony +cheese fondue +panty girdle +soda can +electrolytic +florist's chrysanthemum +yellow jasmine +tudung +equalizer +ridge +dulcimer +grappa +barn swallow +coneflower +enamel +poached egg +halfback +yak +toby +Fleet Street +blue catfish +sand tiger +flying buttress +snaffle +stoop +first base +cultivated land +first lady +waratah +headquarters +arnica +lovebird +common morel +parasol +disk clutch +Xerox +vitamin P +vitamin B12 +long sleeve +certified public accountant +hot pants +pitch pine +pantie +drawers +cake mix +boar +grey +bride +false sago +bullion +coach house +bass guitar +Japanese banana +meadow clary +black belt +Canterbury bell +smallmouth +treadmill +great white heron +enchilada +rummer +captain +camisole +wild garlic +oak fern +ultramarine +peach +hawkweed +autostrada +adit +anaconda +artwork +skinhead +jello +hermit thrush +Bewick's swan +dress suit +trail bike +stubble +common polypody +Riesling +Easter lily +telegraph key +envelope +garlic bread +perianth +salad bar +steppe +club sandwich +nude +garden forget-me-not +Tuareg +flood +Statehouse +charcoal +boy scout +Rhone wine +parfait +spoor +lanyard +octagon +brown bread +quarterback +quilted bedspread +hookah +Pepsi +hamburger bun +entrepreneur +saddle oxford +snake's head fritillary +undies +chemise +skidder +chickpea +carnation +honey bun +mortar +Montrachet +automobile horn +skylight +gingham +rafter +pantile +climbing frame +scarlet runner +cable +cornstalk +mockingbird +raisin bread +chili sauce +hand calculator +concert-goer +detached house +coq au vin +lasso +hyssop +globe thistle +paper clip +slide +Jerusalem artichoke +tetrahedron +mock orange +lemon lily +finger +little sister +handcuff +horse wrangler +pavlova +oilcloth +snow-in-summer +common mugwort +greenshank +ice-cream cone +rubber boot +gunnysack +disk jockey +long trousers +sorghum +pontoon +calf +fire extinguisher +cotton thistle +pilot whale +ao dai +steamroller +wristwatch +tawny owl +city +country store +ironweed +kennel +bathrobe +rattan +drawer +fly tent +choline +musk thistle +courthouse +Yugoslav +bush +trawler +shellflower +jade vine +ragged orchid +pea soup +King Charles spaniel +hubcap +snook +paddy +bow and arrow +shovel +dill +cliff swallow +cadaver +hijab +masterpiece +fish geranium +kettle +sanitary napkin +carrot stick +Mountie +peanut brittle +dam +jackal +windowsill +butterfly orchid +bodice +picador +pale yellow +beanie +petiole +tenor saxophonist +bungalow +gnomon +stock saddle +field glass +rigging +wood grain +Speaker +settlement house +swamp milkweed +paper nautilus +tangerine +champagne +crescent roll +library +Schmidt telescope +stemless carline thistle +motorcyclist +alpine ash +planchet +water closet +casuist +hand luggage +hyssop +spaghetti and meatballs +cannelloni +cedar waxwing +water dog +brick red +linkage +sweep hand +purple heather +macaroni and cheese +butter knife +refreshment +malt +St. Augustine grass +wainscot +compass +gas heater +tamale +table saw +referee +borsch +projector +dracaena +peppermint +Reuben +Abyssinian banana +glassblower +floss +small stores +artilleryman +lapwing +ranch +garbage man +dwarf banana +commelina +currant +adulteress +landlocked salmon +pasqueflower +nan +tiger lily +Eritrean +rotunda +catsup bottle +mezzanine +royal fern +blended whiskey +bowler hat +mistletoe +manor +fusee drive +pistachio +dispensary +swamp +amputee +sculptor +schoolmaster +Chinese anise +dwarf iris +livestock +chronograph +nectarine +jockey +plaster +motel room +swamp azalea +hippeastrum +space station +duchess +catacomb +dovetail +cockscomb +common spotted orchid +brittlebush +cleats +cloche +hotchpotch +cabin car +prey +indigo +light beer +bear's breech +jonquil +analyzer +alyssum +spur gear +ice tea +honey buzzard +twayblade +dirndl +atlas moth +croquette +carafe +flyweight +professional basketball +multivitamin +air terminal +phial +roll-on +skunk cabbage +bird of paradise +rose +cooter +camping +divided highway +herbage +sweet vermouth +common comfrey +eggplant +office building +glutton +gefilte fish +bicycle rack +swamp birch +Venetian blind +Pernod +Norway spruce +portrait camera +bastion +vitamin Bc +Ugandan +Indian red +okapi +emu +vin ordinaire +chintz +shrimp cocktail +numbat +tall oat grass +cable car +stopcock +ham sandwich +Yemeni +stanhopea +plate +chicken broth +common yellowthroat +California poppy +radio +chocolate egg +mess jacket +tea table +physostegia +Japanese flowering cherry +confectionery +chicken cacciatore +painted nettle +popover +white rice +strapless +mohair +electrical cable +coil spring +arterial road +miniature fan palm +spectator pump +pesto +interlocutor +eastern kingbird +dongle +vitamin B6 +stuffed tomato +cough drop +okra +black +barbecue +burial mound +firstborn +corn snake +amberjack +bollard +horn +Black African +elbow pad +Camembert +circle +Japanese apricot +hearing aid +rock star +creature +taster +bubble gum +scull +lemon balm +chaetodon +anemometer +brake drum +fuselage +courthouse +aqualung +yellow adder's tongue +reception desk +guy +buffalo wing +ginger beer +robin +pantothenic acid +marsh hawk +yellow journalism +exhaust +cardamom +Tabasco +ax handle +patriarch +floor +pine snake +spoiler +hood +sphagnum +parrotfish +orphanage +redpoll +beef Wellington +white spruce +cherry plum +scapular +field lens +broomstick +mouser +wood thrush +Nebraskan +hotelier +milk thistle +soya milk +Munich beer +boucle +snowy egret +dust storm +steward +kudzu +oriental poppy +presbytery +burro +orange soda +stonecrop +splashboard +menagerie +dormer +wire cutter +yellow bells +Dubliner +shore pine +cousin +racing gig +Morgan +gold plate +villager +snifter +granny's bonnets +egg roll +Spode +amabilis fir +babbler +pestle +heliopsis +halter +black spruce +President of the United States +ski slope +chocolate fondue +lockstitch +motel +Epipactis helleborine +tabbouleh +Yorkshire pudding +overpass +Timorese +presbyter +tablefork +bottle gourd +tiara +vintage +pilgrim +reindeer moss +shower stall +towel rack +kachina +chef's salad +breeder +cow parsnip +walker +Black woman +Irish coffee +portrait lens +lateen +gilt +successor +cargo container +Lithuanian +mayapple +paisley +highchair +strawberry jam +flying fox +field scabious +blue-eyed grass +screw +Frisbee +dressing room +cholla +walkie-talkie +red currant +centrifugal pump +smorgasbord +hot rod +marcher +rowanberry +welwitschia +amphitheater +pew +concert band +bosom +pillbox +seagrass +openwork +meadow goldenrod +shower +chicken sandwich +Boston ivy +plastron +oilfield +stuffed tomato +juniper berries +frame +Spanish mackerel +family room +powder horn +fight +maguey +bunker +work-shirt +air filter +nosh +sugar bowl +foothill +reliquary +tugboat +horsebox +grater +palace +board member +campsite +halibut +geneva +ginger ale +high commissioner +genet +bodywork +spaghetti +protractor +pipe cutter +wood anemone +turkey cock +surge suppressor +green turtle +spoiler +bedsitting room +television room +ballot box +shasta daisy +impeller +capote +bitter +California wine +lock +spinnaker +gill fungus +baby's breath +nut and bolt +moonflower +houseboat +distributor cap +coffee bean +gusset +bowling ball +knitwear +frieze +mistflower +roadster +cue +circuitry +brake +butt hinge +Chickasaw +leopard frog +wing tip +puree +mantel +pantheon +grandfather clock +cockchafer +pomegranate +cleaners +eyeshadow +Oregon cedar +rock hopper +hawksbill turtle +agriculturist +yellow-crowned night heron +Albanian +pumpkin seed +chateau +goggles +camper trailer +bracket fungus +cigarette case +signal box +saddle blanket +poison ivy +set gun +cattleya +dry fly +concert hall +personal digital assistant +talcum +deodorant +common starling +painted turtle +kea +plenipotentiary +pantyhose +masjid +buskin +hurdle +cocktail lounge +belting +sour dock +knife blade +sugar snap pea +paddle +dickeybird +brace +keep +call center +yacht +lead pencil +tumbler +production line +tetra +private +French window +express +ski boot +pinto +broad bean +American crow +screech owl +snapper +power cord +Manx +rambutan +sun deck +stonefish +golden eagle +national monument +readout +cork oak +hacksaw +beer can +bathe +tussock bellflower +wet suit +mihrab +big game +highlighter +sprocket +measuring worm +grapefruit +samovar +distributor point +steak knife +incubator +loon +temporary hookup +hippodrome +hot spring +spacesuit +flea market +clay pigeon +catbird +earmuff +tetherball +yellowfin +cellophane +lanolin +clapperboard +velveteen +police dog +cashew +sequencer +mango +duplex house +bazaar +Golden Delicious +red carpet +collet +kickstand +broadloom +diskette +tank engine +compact +diesel-electric locomotive +whale shark +water moccasin +mountain avens +tropic bird +ginkgo +ski cap +fixative +glockenspiel +chopine +ethernet +herring gull +skeleton key +finger paint +conference table +great crested grebe +harbor +white-crowned sparrow +Bullock's oriole +guestroom +boutique +cable television +roulette wheel +Luger +Latin American +trumpeter +blindfold +baby +freshwater bass +home plate +bonefish +giant sunflower +giant tortoise +planking +pigeon hawk +oceanfront +door +bazaar +common wasp +conformation +kick starter +kid glove +corydalis +shuttlecock +writing desk +ivory gull +shirttail +diving suit +weka +downy birch +altar +wild sage +tufted puffin +cabinet +Orpington +cineraria +bottom +dial +coracle +resort hotel +soap dish +spotted owl +billiard room +ghetto blaster +red-breasted nuthatch +hatchling +chalet +bracteole +crusher +mixer +net melon +farmhouse +Dutch oven +transept +penlight +palmyra +stewing pan +solar cell +crochet needle +black-winged stilt +germander speedwell +crinkleroot +truncheon +bunchberry +hatchback +sounding board +mixing faucet +chess master +bisque +Brie +Sitka spruce +pawn +Mexican-American +space rocket +choreographer +collared peccary +duffel +nacho +patchcord +carpet snake +omnivore +watering can +hall of residence +streamer fly +sunroof +great grandson +oil refinery +billiard player +ivy geranium +key palm +pinwheel +yellow-shafted flicker +purple onion +soldering iron +condominium +fishing gear +heat pump +marine iguana +cuckoo clock +Bletilla striata +headrest +spotted salamander +field hockey ball +pound +carboy +vertical stabilizer +groundsheet +cinnamon bread +acorn squash +sheathing +lakefront +Jeffrey pine +synthesizer +olive +apple +pannier +ponderosa +Jew's-ear +latch +equatorial +metasequoia +permit +bloomers +town hall +fava bean +casino +bier +jampot +common snapping turtle +clary sage +oatmeal +Dutchman's breeches +massif +Guyanese +heifer +handball +sweat suit +pomelo +Iceland moss +customhouse +sandbag +archer +gyrfalcon +sword cane +marmite +whole snipe +blue crab +sugar spoon +brownstone +chicken wire +lizardfish +dump truck +chicken yard +chamois +electric +idle pulley +jujube +wrestling mat +aoudad +Burmese cat +water shamrock +dormitory +Unknown Soldier +hearse +bumper +clipper +desert pea +critter +semitrailer +backboard +common St John's wort +Atlantic manta +song thrush +jukebox +quoin +eastern chipmunk +copper beech +paintball gun +bull +package store +fraise +royal poinciana +niqab +traction engine +objective +day nursery +ski lodge +orphan +summer house +cereal box +router +sleuth +jodhpur +polyp +croquet +sport kite +green onion +tulle +etagere +tussock caterpillar +rest house +elderberry +bridal wreath +Torrey pine +silver wattle +kidney bean +pentode +laelia +Allen wrench +sporran +red drum +tricot +heterodyne receiver +magazine rack +stone curlew +trawler +suckling +niblick +sandwich plate +double door +Togolese +pitching wedge +desert tortoise +cloth cap +date palm +webbing +jumper +frogmouth +copperhead +covered couch +black mallee +riser +scraper +gauntlet +pantheon +food court +muntjac +grocery bag +bread-bin +transmission shaft +primigravida +window seat +crab apple +seat +Fresnel lens +dendrobium +hatchback +little theater +butter dish +back porch +umbrella tree +carrot +seventy-eight +coconut +music stool +Tesla coil +bay willow +American basswood +sabot +wheel and axle +gazette +lute +bassinet +hart +mecca +breadbasket +silverfish +handball +Scotch pine +box camera +stately home +Hereford +tread +single-breasted jacket +desk phone +deodar +professional boxing +fly casting +box wrench +black oak +martello tower +red campion +bullock +sweet William +bay leaf +dollhouse +flounder +fox hunting +beanbag +king mackerel +rouge +film advance +common mallow +parasitic jaeger +satellite receiver +nurse shark +chesterfield +tomatillo +plimsoll +hatbox +bloomer +foul-weather gear +longleaf pine +horse mackerel +tree lizard +bark +belfry +Treasury +perch +purple finch +stag beetle +fragrant orchid +tachymeter +tadpole +cookie jar +knee piece +agueweed +bones +chick +golf glove +toothpick +taboret +rotor blade +field artillery +purple willow +redhead +spark plug +guava +voice mail +cross +butterfly valve +star magnolia +olive +room light +Australian turtledove +embassy +Iraqi +singles +nestling +spinning rod +radial engine +rowan +sandbox +boss +moccasin flower +veneer +mint +American chestnut +white whale +CPU board +florist +press box +hurricane lamp +giant kangaroo +greater whitethroat +winter jasmine +blue +department store +southern red oak +saber saw +corn muffin +bellbottom trousers +toaster oven +red eft +condominium +galago +sunbather +redpoll +common European earwig +songbird +linnet +light meter +bracer +tepee +gumbo +water glass +roofing +spathiphyllum +shofar +sand lizard +washroom +Brussels carpet +brachyuran +home room +floatplane +knee brace +solar heater +felucca +gas ring +maguey +manse +blue columbine +cuppa +cigar band +male orchis +mudskipper +couscous +Chinese parasol tree +dude ranch +banyan +gopher snake +sundrops +aviary +African daisy +missel thrush +Photostat +stone pine +circus tent +tangle +printer cable +grease-gun +rose chafer +light pen +plantain +hearth +bullfinch +post oak +slow loris +Newtonian telescope +head +punt +spindle +New England aster +spotted sandpiper +pond pine +grass skirt +bug +black rat snake +tabasco +bull shark +tennis camp +scrambler +popinjay +bing cherry +ministry +cash register +redheaded woodpecker +kameez +farmer's market +roan +harpy +European toad +pizzeria +camshaft +hemp nettle +chicken coop +cottage pink +daybed +observatory +airdock +mountain devil +newsstand +kingfish +snow gum +jackdaw +lacquerware +peeler +miro +sister ship +damask rose +pack +snowshoe +Liberian +paramecium +tidytips +professional tennis +bookend +wood swallow +cayuse +cranberry +rock squirrel +steak au poivre +soul patch +female mammal +sash fastener +songwriter +oxeye daisy +apse +floor joist +hand towel +wheatear +cero +soul mate +golden fig +bus stop +psycholinguist +convenience store +manor hall +mountain sandwort +Euopean hoopoe +haricot vert +mausoleum +violist +flashlight battery +chard +fixer-upper +bank martin +testudo +diving duck +kohlrabi +Omani +sphygmomanometer +greyhound racing +chestnut +rattlesnake plantain +chaffinch +wolf pup +teakettle +cairn +souk +resident commissioner +chuckwalla +gaiter +capercaillie +liver chestnut +bean sprout +land line +ambassador +green pepper +common chickweed +Sharpie +Oriental arborvitae +oncidium +pallone +currawong +sweet alyssum +fire tower +eyebrow pencil +redfish +apricot +clementine +blucher +wigwam +pangolin +buggy +common oak +jumbojet +laser +cigarette holder +racquetball +georgette +cleft +scouring pad +drum printer +pond scum +American red squirrel +caranday +swamp willow +blindworm +brook trout +defense system +nyala +three-way calling +mizzen +shuttle +African lily +Oregon white oak +rain tree +fuel gauge +oriental cherry +wahoo +pear +jungle gym +bass fiddle +outrigger +angelfish +Old World coot +lime +battlement +yarmulke +herpes varicella zoster +burp gun +Alpine glacier +stun gun +pilot boat +Southern crab apple +bushtit +pullet +polo pony +jackfruit +raw vegetable +French marigold +golden shower tree +spike lavender +wahoo +brass knucks +cabbage palm +diesel-hydraulic locomotive +red jungle fowl +prairie sunflower +rye +loofa +icecap +shade tree +secretary bird +saffron +cos +muskrat +videodisk +Carolina wren +candy bar +Bohemian waxwing +flowering almond +cold frame +raglan +pine siskin +quince +western red cedar +red maple +adobe +agora +kumquat +tenement +bantam +bayberry +water jump +great granddaughter +snips +porcupinefish +brochette +love-in-a-mist +Iceland poppy +common sage +pace car +camel racing +slipcover +nopal +shoehorn +calypso +rhea +in-basket +maple syrup +cold chisel +Pacific ridley +dietary +aperture +lapin +rock hyrax +house wren +litchi +ragged robin +control center +shoebox +arabesque +eider +silver birch +bantamweight +ax head +softball +blue gum +Bechtel crab +tomato sauce +green douglas fir +sweet gum +macaroni salad +red phalarope +budgerigar +Bedford cord +Uzi +green woodpecker +ohmmeter +bacon-lettuce-tomato sandwich +hackney +Easter egg +motmot +red pine +opium poppy +gat +pussy willow +greater scaup +ocelot +persimmon +western hemlock +carambola +pinion +Malcolm stock +bobsled +larkspur +wood drake +pinetum +red gum +draft beer +funnel +terrarium +Pinot blanc +doodlebug +brittle star +salsa +cantaloup +pollack +stockpot +eastern hemlock +rock wren +burqa +squash +aircraft engine +billy +flamingo flower +odontoglossum +old squaw +redstart +sheepskin coat +mate +flathead catfish +gentianella +bilberry +bog rein orchid +incense cedar +mew +Colorado spruce +cob +portmanteau +grenadine +common ginger +masdevallia +compound microscope +sobralia +white fungus +guppy +chapterhouse +honey +green frog +sea swallow +African marigold +astrolabe +verdigris +yellowhammer +carrot juice +oxlip +medicine ball +highboy +grass frog +gamebag +surgery +mincer +mulloway +cactus wren +box office +resonator +table-mountain pine +European curlew +supernova +cabbageworm +peach +plane seat +asp +Yquem +tomato hornworm +rook +quadruped +chador +micrometer +dabchick +Afro-wig +balsam fir +bucket seat +sage green +macon +blue poppy +chinquapin oak +black pine +spinach +chrysalis +carnauba +tee +bearberry +shirt button +tree of heaven +southern white cedar +covered wagon +brood hen +spadix +European catfish +winter wren +bulldog clip +carpetbag +study hall +chino +simian +closeup lens +cookie cutter +grapefruit +mandola +sassaby +Allegheny plum +piaffe +scorpion fly +booby +draft animal +field tent +cumin +laurel oak +smooth-leaved elm +American arborvitae +American toad +grinding wheel +mountain ash +cuttlefish +pipistrelle +parer +safety rail +Clark's nutcracker +side-blotched lizard +giant hornet +wicket +dugout +electric toothbrush +dhow +common four-o'clock +long-eared owl +anchor +near beer +tansy +creme caramel +guided missile frigate +shelduck +durian +compact +iron tree +shiitake +polo +camouflage +pedal pusher +salon +tangerine +lacebark +Swiss mountain pine +goalpost +poolroom +space capsule +wild cherry +dress hat +wave +raglan sleeve +cassia +Jerusalem artichoke +cabbage palmetto +marsh harrier +American redstart +sea squirt +cliff diving +sparrow hawk +watch cap +frankfurter bun +police boat +flash camera +neem +eastern meadowlark +Italian cypress +orb-weaving spider +graniteware +sewing basket +latex paint +rock dove +stator +leaf lettuce +roulette +broadcloth +Spork +panicle +sternwheeler +cider vinegar +brown creeper +cowfish +closed gentian +chickpea +port +pimento +sheeting +matilija poppy +hawk owl +guava +papaya +huisache +European shrike +racing skiff +yellow warbler +gumbo-limbo +North Carolinian +staysail +court +iced coffee +money belt +shaver +Psychopsis papilio +sumo ring +refection +kingfish +clock pendulum +greater butterfly orchid +disk harrow +tawny eagle +polyphemus moth +pieplant +Nicaraguan +bocce ball +California box elder +porbeagle +crown of thorns +Mexican sunflower +fennel +stream orchid +slip ring +white fir +fold +moss campion +fairy ring +hose +pony-trekking +western larch +meadow pipit +Cape May warbler +longan +bookmobile +junk shop +lemon shark +smelling bottle +solan +widow +sea pen +universal joint +day game +goldcrest +maiden pink +biographer +rotunda +oriel +arranger +gambrel +Angora +fen orchid +leading rein +Wilson's snipe +European nuthatch +natterjack +athletic supporter +mouflon +emergency room +swallow-tailed coat +western meadowlark +feather star +Navy SEAL +toilet bag +loquat +lesser butterfly orchid +thumbhole +breathalyzer +featherweight +collards +mayfly +confessional +mountain ebony +redwing +Norway maple +refractometer +stagecoach +gasoline gauge +octopus +baker +Rhode Island red +European tortoise +cardiologist +Punjabi +Arkansas kingbird +tamarind +drum brake +flash +yellowtail +stokes' aster +emperor +free house +sour gum +ruddy duck +hamadryad +command module +tinamou +Norway lobster +washstand +European hornbeam +roaster +black-necked grebe +tallgrass +leopard lizard +anastigmat +Blackburn +deutzia +ground rattler +Christmas fern +wild pink +sesame seed +carrycot +Italian parsley +nectar +roll-on roll-off +true laurel +anisette +candy corn +flowering maple +revers +dun +tobacco hornworm +common sunflower +common grape hyacinth +cardiograph +electric meter +herb Paris +goalmouth +spruce grouse +canopy +wind poppy +stemma +gateleg table +lumper +speckled rattlesnake +gudgeon +rough-legged hawk +internal drive +pomelo +piece de resistance +storm door +clementine +Japanese pink +settler +yellow jacket +Fraser fir +royal palm +cicada killer +cayenne +guava +bluewing +red baneberry +lesser yellowlegs +cache +bog rose +sparring partner +ski jumping +sherry +glacier lily +beer mat +shredder +American widgeon +protectionist +green olive +black-tailed deer +Alpine fir +dispatch case +whipping cream +African daisy +cantilever bridge +maraschino +rhea +ink bottle +dacha +hagberry tree +lesser rorqual +orchard oriole +candidate +cuticle +breadfruit +fishbowl +giant puffball +closed gentian +Joshua tree +tie rod +beard lichen +flame tree +stegosaur +acerola +Swan River daisy +common murre +flowering almond +protegee +loggerhead shrike +Wilson's warbler +Japanese honeysuckle +basilisk +skimmer +hybrid tuberous begonia +pumpkin ash +chafing dish +collared lizard +iced-tea spoon +scrubbird +Iceland poppy +grey kingbird +wallflower +slick +diesel +Swiss pine +ethernet cable +ketch +lightship +black cherry +swordtail +Monterey cypress +lightweight +Floridian +Sabine +stall +contact +viola da gamba +hemstitch +upland sandpiper +box spring +sassafras +radome +lesser scaup +bluefin +yellow-bellied sapsucker +armored car +cabin class +Moorish arch +webcam +aquavit +overall +sergeant major +soft shield fern +gin and it +bobolink +subcompact +falconer +black morel +roadrunner +lab bench +thong +coffee urn +weeping beech +caladenia +southern live oak +scanner +wine vinegar +common speedwell +European roller +fuji +snag +piping plover +concertina +secateurs +meat thermometer +supercomputer +funnel +dais +western fence lizard +spruce pine +pommel horse +Cassegrainian telescope +pitta +India-rubber tree +mangosteen +tamp +aposematic coloration +dustcloth +birth +Atlas cedar +reed bunting +jabiru +sainfoin +press photographer +golden oriole +laryngoscope +thermal printer +winder +doubles +cricket ball +dabbling duck +tonic +Buddhist +Morris chair +swatter +quaking aspen +ancient pine +American larch +evaporative cooler +click beetle +yellow-breasted chat +souchong +bluegill +pied-billed grebe +tricorn +spring beauty +southern magnolia +rowel +chili +hard roll +flathead +satsuma +gangplank +bourguignon +cockfighting +greenwing +plum tomato +fly orchid +gnatcatcher +spotted eagle ray +ovenbird +brassavola +mocha +candy cane +afterburner +thriftshop +study +winter crookneck +grinder +muskellunge +sacred ibis +inverter +sandwort +deer fern +stair-carpet +Cotes de Provence +ovenbird +rex begonia +American woodcock +poison ash +lowland fir +pawpaw +loblolly pine +kinkajou +European hackberry +pest +coralwood +Bedouin +acetate rayon +snuffbox +radiator cap +basket oak +table-tennis racquet +smew +midge +telescopic sight +radish +great burdock +separate +damask violet +broadbill +bourbon +blacktip shark +gift shop +khimar +date +woodland caribou +policeman bird +grey birch +American elm +strawflower +officiant +hart's-tongue +straight razor +Spanish elm +radicchio +white croaker +vicuna +soft-shell clam +flannel +adonis +bonito +kittiwake +English walnut +soldierfish +hipflask +spotted crake +Streptopelia turtur +American maidenhair fern +corn cockle +telephone cord +canopy +playback +diocesan +marsh orchid +manakin +purple grackle +cob +fishmonger +otoscope +vermillion flycatcher +inhaler +instar +licentiate +myrtle warbler +goat herder +benthos +toggle +drumhead +piranha +doorplate +vault +triptych +red-necked grebe +transporter +vernier caliper +flathead +Portuguese man-of-war +countrywoman +vacation home +Bactrian camel +night-light +module +lemon curd +carancha +painted daisy +bok choy +ratatouille +troll +escarpment +cinnabar +computerized axial tomography scanner +lychgate +sowbread +bedside +guided missile cruiser +reel +cleat +hemostat +blue shark +Seven Wonders of the Ancient World +motorized wheelchair +pillow block +horned puffin +prickly pear +electric range +mother's daughter +vein +Oregon maple +bird dog +faceplate +wren warbler +feather reed grass +common alder +Adam's needle +straitjacket +organ-grinder +gantry +bikini pants +peristyle +herpes +terry +toad lily +celandine +red-breasted sapsucker +bragger +green peafowl +fuschia +quoits +house martin +dome +herpes simplex 1 +touraco +meeting house +vacuum gauge +cat's-ear +crisphead lettuce +carpet moth +European rabbit +puff adder +Old World scops owl +fire pink +fruit punch +ant bear +black walnut +stroboscope +white mangrove +pine grosbeak +cast +check-in +ring-necked parakeet +matai +shingle oak +fieldwork +rue anemone +landing net +ouzo +herringbone +lyceum +hydrogen bomb +mullein pink +masher +evening grosbeak +water vole +livingstone daisy +tomatillo +cavalier hat +interphone +wild lupine +goosefish +sugar maple +plantain +white dead nettle +Monterey pine +bugle +veloute +marsh gentian +Bermuda buttercup +alehouse +Peter Pan +thong +LP +tulip tree +scanner +scarlet tanager +music hall +angel shark +pecan +eight ball +rosy boa +outboard motorboat +garage +fanlight +black cottonwood +notornis +mountain fern +lunar crater +reddish orange +whitetip shark +executant +European ladies' tresses +washboard +revolving door +case knife +balloonfish +greater kudu +tarpan +cog +wet fly +Irish soda bread +basement +broken arch +canopic jar +muscat +kazoo +bobsledding +loaner +black guillemot +English saddle +garlic mustard +Foucault pendulum +mulberry +clotted cream +dove's foot geranium +Atlantic ridley +convector +ground floor +European wildcat +poinsettia +hideaway +great barracuda +black beech +bushy aster +cornflower +tam +true slime mold +carving knife +holly fern +railroad tunnel +crimson clover +disposal +etamine +suspension +plasmodium +political scientist +minnow +Spanish rice +twist bit +subway train +Scleroderma citrinum +saw palmetto +console +gimlet +hand pump +waratah +rock rattlesnake +keel +server +curlew sandpiper +hone +sable antelope +inkle +photostat +foresail +sallet +tiger salamander +chutney +onlooker +Exmoor +tiramisu +drawing room +battery +sour orange +juniper berry +beeper +funeral home +fescue +Maksutov telescope +ranch house +jai alai +carob +socket +popcorn +sandbar shark +pipal +summer tanager +oast +skipjack +rolling stock +dropper +great snipe +turnip greens +cowpea +honeycomb +ichneumon fly +maternity hospital +harp seal +nylon +bomb shelter +horse tick +litchi +camel's hair +mimosa +bur oak +anvil +belay +pinhead +continental breakfast +burglar alarm +Mojave rattlesnake +auxiliary storage +lightwood +ratepayer +cecropia +retractor +quadrate +pepper tree +Venus' slipper +abattoir +strawflower +firewater +purple saxifrage +black rat +pack +pepper pot +mayweed +winger +whitetip shark +great yellow gentian +snowdrop anemone +garden angelica +soy sauce +white poplar +inkwell +crouton +gas gun +honey locust +house of cards +ice maker +moquette +arrack +casualty +butterfly orchid +eau de vie +mosquitofish +prairie smoke +haft +horseshoe +steel +peach orchard +Mexican hat +encaustic +shoe +pennywhistle +sweet woodruff +hull +doorsill +globe amaranth +day school +housedog +crown princess +oxbow +maxi +positron emission tomography scanner +compere +European turkey oak +peanut +sentry box +house physician +hot line +loquat +rove beetle +riband +flowering fern +fan vaulting +ceibo +bongo +bat boy +omelet pan +European ash +breadwinner +gaff topsail +clerestory +bushbuck +bluethroat +khukuri +Father +portcullis +candy egg +brake lining +lawn furniture +buckskin +garden pea +Brazilian rosewood +Italian bread +horn poppy +silk tree +Christmasberry +hotel-casino +poplin +false lupine +desert sunflower +mimeograph +alpenstock +cork tree +cultivar +common mosquito +pollard +black marlin +understudy +lancet window +college +breadfruit +Herero +Labourite +bar printer +squaw grass +stelis +firing chamber +sycamore +artificial horizon +radiologist +pansy orchid +bicycle pump +wraparound +bell gable +home computer +orchard grass +carving fork +bergamot +honeycreeper +sewing room +radiator +core +brown bat +goose grass +adjutant general +Erlenmeyer flask +massasauga +tail rotor +cardinal tetra +Drambuie +wine palm +Sarcoscypha coccinea +shantung +Calvados +garganey +vicar +house mouse +creeping oxalis +digital subscriber line +cedar elm +backgammon board +blackberry-lily +pallid bat +New Zealander +Barbadian +rose geranium +European spider crab +gharry +electric hammer +mustard +Chinese lantern +laundry cart +filament +mozzarella +gooseberry +sukiyaki +porkpie +culvert +altazimuth +plum pudding +serin +Spanish dagger +Asian crocodile +crevalle jack +mascara +pig bed +alderman +northern shrike +Sufi +purple-fringed orchid +derringer +linseed +hockey skate +bell jar +Japanese wistaria +mantled ground squirrel +western toad +lieutenant commander +mechanical piano +ovoid +paddlefish +demijohn +coast live oak +brick +gearset +tailstock +phonograph needle +winery +tuberose +mother's boy +shot tower +crucian carp +carpet pad +lamb's-quarter +Menorah +common white dogwood +hypanthium +rosebay +wild medlar +soil horizon +sweet orange +bitterroot +hand glass +cloisonne +towpath +gum ball +margay +carambola +bolt cutter +charger +vibraphone +gueridon +elephant tree +wood-frog +ash grey +duffel coat +third base +chunga +glebe house +lake trout +encephalartos +Japanese oak +northern red oak +pruner +blue orchid +Biloxi +western wood pewee +corselet +alabaster +anechoic chamber +grass pink +wax begonia +blue daisy +pennyroyal +Asian tiger mosquito +cheese souffle +flat bench +caramel +sump pump +bush violet +common fennel +corner +skullcap +asparagus fern +white mangrove +calceolaria +sateen +saltbox +hollowware +head nurse +coal miner +mountain lily +tufted vetch +European perch +line officer +steamer +stickball +shin guard +cauliflower +Monegasque +hatpin +wolffish +trackball +khaki +arthrogram +rocket larkspur +naval commander +Gemini +ski binding +department head +Chenin blanc +wingstem +knothole +aerides +sweet bay +tautog +gangway +waterspout +Hudsonian godwit +armyworm +incinerator +kidney vetch +pine nut +cypress vine +hip tile +sorrel tree +relay +bench press +Kentucky coffee tree +dobson +sapling +false lily of the valley +veld +phaius +vitamin B2 +beaker +wall tent +sieva bean +dusty miller +sewing kit +cavalry horse +diaper +butterfly pea +Spam +saddlebill +pearly everlasting +kowhai +Sister +moneywort +organdy +pine marten +bareboat +hot-water bottle +baby blue-eyes +silver lime +common cotton grass +malmsey +blue pea +baggage car +pineapple +folding saw +cotton rose +brawler +black duck +Weizenbock +pool player +Gujarati +wild duck +purple sage +sage grouse +mail train +arm guard +short-spurred fragrant orchid +queen +eparchy +spring peeper +ortolan +shoulder +fighter pilot +American beech +snowcap +novitiate +roller +butcherbird +canyon oak +brompton stock +firebrick +rudder +light cream +Primus stove +nonsmoker +probationer +harp +kosher +surcoat +videotape +zebu +first class +yam +car +rissole +miso +funambulism +attic +curling iron +shutter +encolure +split-pea soup +yellow rocket +gas oven +ultracentrifuge +chamomile +canteen +eyeliner +yellow squash +Irish stew +collar +doublet +machinist +septic tank +snap bean +Polyporus squamosus +western tanager +creeping St John's wort +back +sinkhole +perforation +Romanian +epergne +fez +comfrey +sidecar +beach pea +screen door +instigator +plughole +woodbine +pigweed +hip pocket +common scoter +squeegee +Surinam cherry +porringer +body stocking +eatage +shallot +enlarger +common canary +trophy case +gun case +plow horse +hot plate +pearl oyster +margarita +madras +backspace key +pigeon guillemot +pajama +buckthorn berry +homestead +bedbug +Linotype +trundle bed +granadilla +theremin +chin rest +bouillabaisse +tumble-dryer +truffle +cassava +kurrajong +gyroscope +European silver fir +C-clamp +politician +green soybean +exponent +flame tree +scissortail +achimenes +crown daisy +soft tree fern +spaghetti squash +pale violet +beaver +dashiki +washboard +driving wheel +sack +foulard +sputnik +boatbill +English elm +sack coat +grog +golliwog +Malayan tapir +May wine +calash +stile +windjammer +American sycamore +rotor head +fast food +balata +dragonet +Emmenthal +metronome +negative +meadow saxifrage +rabbit ears +chenille +round +hobby +crankshaft +Wilson's phalarope +Murphy bed +soil pipe +forecourt +policyholder +tarmacadam +loyalist +gyro +Queen's crape myrtle +shortcake +apple butter +pumpkinseed +heronry +yellow perch +baggage claim +escarpment +diaphragm +mescal bean +shunter +flax +columbarium +Joe-Pye weed +Neandertal man +casement +hole-in-the-wall +Verdicchio +futurist +eaglet +tassel hyacinth +pup tent +fawn lily +cabbage palm +pogonia +hospital ship +water mill +Oregon grape +lentil +grindstone +banana split +inkberry +coonskin cap +bazooka +wrap +anise hyssop +Java sparrow +red-eyed vireo +common opossum +clintonia +bustle +booster +tribesman +soy +panhandle +jaboticaba +locking pliers +Sauvignon grape +ghat +screw +oximeter +white croaker +saucepot +eggbeater +reticule +cabbage bark +looking-glass plant +head gasket +California sycamore +cowbell +Aleuria aurantia +Herr +lever +spider orchid +cashew +shift key +solar house +wood chisel +white +mantilla +stamp +bolero +rear admiral +garden rake +Lao +crowbar +lapdog +buttermilk biscuit +yellow bedstraw +pickerel frog +dowel +serjeant-at-law +mill-hand +lambrequin +state treasurer +red silk-cotton tree +coiffeur +star anise +shoulder pad +marshal +sitar player +gown +ground cedar +hedge maple +caddie +pitahaya +corn marigold +stick cinnamon +woodland star +Eurasian green toad +anti +blueweed +medicinal leech +gaur +chocolate kiss +kit fox +mother +butte +audio CD +blast furnace +vitamin D +nutgrass +cornice +black sheep +hearing aid +lingonberry +quad +lentil +riding crop +pratincole +pentagon +sea lavender +nerita +flatmate +catboat +water clover +angiopteris +mushy peas +crown imperial +music school +woodshed +platy +Turk's-cap +rundle +reading teacher +hardtack +balloon sail +oriental spruce +bluefish +white mulberry +horned violet +satin bowerbird +treasure flower +sustaining pedal +mimosa +spurge nettle +sea green +hasp +lederhosen +pink cockatoo +long johns +basket weave +freewheel +thrust bearing +timber tree +orphan +falafel +common camas +bird of passage +bird's foot trefoil +electric eel +fizz +grape arbor +serape +brace +hazelnut +kylix +horse mackerel +cassia bark +lizard orchid +spat +Brown Swiss +pocket flap +pillory +purplish blue +rolling mill +tappet +broccoli rabe +semi-detached house +mushroom coral +fly orchid +nougat bar +ball hawk +sand wedge +shirred egg +black locust +strip lighting +drop scone +brush turkey +ball +tragopan +dallisgrass +tuatara +great knapweed +potentiometer +Kiliwa +Pacific bottlenose dolphin +accelerator +Darwin tulip +osteopath +Arizona cypress +manna ash +butterbur +cornelian cherry +American holly +nopal +tanker +foreshore +ditty bag +gas lamp +safety razor +chanter +fomite +chip +striped killifish +catalytic converter +plaice +dusty miller +takin +gerenuk +corn chamomile +Japanese pagoda tree +boneset +common osier +Guinean +taro +plotter +celandine poppy +churn +steenbok +edible mussel +sensitive fern +triode +black raspberry +zoo keeper +feather ball +dredger +starlet +cornpone +coat button +rosinweed +toy Manchester +crested cariama +finger food +basilisk +shotgun shell +comfort food +mountain hemlock +candytuft +Stilton +record changer +anklet +ball valve +Mediterranean snapdragon +BVD +sand cat +Galloway +nutmeg +water-mint +woodwaxen +citron +ark shell +federalist +drone +cheekpiece +hyperbaric chamber +addax +field-emission microscope +synchronous converter +men's room +medlar +electronic fetal monitor +Sazerac +false indigo +roof +passe-partout +meadow spittlebug +Phytophthora infestans +oast house +hedge nettle +voting booth +slender salamander +telephone jack +true bug +scouring rush +Scotch egg +matchbook +aperea +cytomegalovirus +garlic press +cove +whitebark pine +Slovene +narrow wale +mother's milk +Audubon's warbler +prickly poppy +cowl +tailorbird +mud brick +bamboo palm +welt +Afghan +Virginia spring beauty +dinner bell +night jasmine +fly rod +microtome +aerie +carinate +picker +brick trowel +loving cup +swathe +green mayonnaise +rivet +bandbox +newsroom +tea tortrix +bobby +gig +hush puppy +garlic chive +piston rod +aspidistra +bluejack oak +harvest-lice +strap hinge +sour mash +macadamia nut +histiocyte +fan belt +shelf bracket +abelia +Hottentot fig +fish chowder +abettor +compote +beige +dioon +hop +haymaker +oilskin +magnetometer +tool bag +tambour +call girl +gringo +fairy light +broad-leaved plantain +second base +zebra mussel +Japanese cedar +pistia +swamp chestnut oak +cashmere +double cream +samisen +lamb curry +companion +kapok +julep +sweet woodruff +gardener +jewfish +inspector general +collembolan +wheel bug +bass +scrubland +wryneck +macrozamia +trouser press +clove +tiger cowrie +yawl +collard +dildo +pony cart +ormer +annual +tessera +chancellery +two-toed sloth +queen +old lady +wringer +spritzer +baggage +black mangrove +black-eyed Susan +semifinalist +highlighter +alfalfa +Easter daisy +escapement +operating table +neutral spirits +bursar +roble +entablature +girl wonder +farm boy +ring ouzel +permanent press +auklet +beefsteak tomato +gaming table +tea bag +manul +giant bamboo +Ozark chinkapin +matzo +furrow +smoothhound +CD-ROM drive +powdery mildew +copilot +garden +American merganser +bunsen burner +Asian longhorned beetle +lead tree +creeping buttercup +Percheron +back brace +axseed +cub +soul food +rabbi +edelweiss +mineshaft +fox grape +sandwort +torque wrench +leisure wear +Mae West +broccoli +loach +maraschino +heavy cream +silkworm +cirque +vintner +whitewash +butterfly pea +two-toed sloth +midiron +ceriman +Bulgarian +operating microscope +sambuca +California fuchsia +silver maple +tangelo +black bean +lugsail +starting gate +leek +sunflower seed +fish fry +clinker +synagogue +coscoroba +brae +uphill +common limpet +golden plover +cedar of Lebanon +amphibian +Canary wine +taipan +agua +feeder +parallel +mater +pink calla +meat counter +yagi +crab cactus +cacao bean +bowfin +alley cat +stonefly +Eastern cottonwood +vernier scale +marginal wood fern +dancing-master +detective +yam +textile screw pine +hooch +spinet +single prop +sassafras +goose barnacle +triple cream +China tree +peeper +dressmaker +snatch block +ironmongery +dressing case +creeping bellflower +silver sage +honeydew +eastern red-backed salamander +peg +nombril +danish +mashie +anarchist +alligator snapping turtle +shepherd +American white pine +runner +chalice vine +rheumatologist +defibrillator +yellow chamomile +lemon balm +peacekeeper +native beech +sandwich board +Bavarian +titrator +paneling +deer mouse +poteen +sugar snap pea +meadow salsify +town crier +best +basinet +common myrtle +night lizard +cushaw +Tampax +camphor tree +gentile +orange peel +putty knife +pyromaniac +Brummie +fever tree +double +nest +inferior +cabbage tree +graduated cylinder +mucor +woodborer +earthwork +potato salad +four-hitter +gooseberry +water vole +ziggurat +grapefruit juice +four-in-hand +cranberry bush +diode +videotape +Mohican +niacin +beetroot +shirtsleeve +cork tree +two-eyed violet +white ash +drawing chalk +baked Alaska +bone-ash cup +toastrack +diastema +bed jacket +dwarf astilbe +yellow honeysuckle +cow pasture +sheet pile +saxhorn +upholstery material +California white oak +Spanish bayonet +horsemint +littleneck +deflector +magician +standard transmission +blue marlin +shallot +feijoa +collar +board +jump suit +common staghorn fern +priory +Xhosa +Loranthaceae +barbecued wing +barmaid +spit +lemon juice +umbrella plant +field pennycress +centenarian +queen bee +fish stick +black bread +dirk +secularist +German American +spotted weakfish +iron foundry +speed bump +yellow-fever mosquito +gag +frame +black-eyed pea +alcoholic +involucre +sperm whale +balanced diet +wax bean +butcher's broom +winter heath +Mainer +Australian pine +gas guzzler +double-breasted jacket +pod +palo verde +trimmer +wattmeter +dyer's woad +crotalaria +vine maple +sulky +jack pine +thumb +Wilton +Panchen Lama +welder +badminton court +business editor +Arabian coffee +Kamchatkan sea eagle +foamflower +steep +plane +freckle +cerebral cortex +Vouvray +tea +forest tent caterpillar +neckerchief +accelerator +jig +bridal wreath +highball glass +New England clam chowder +beach strawberry +call waiting +baton twirler +double boiler +Dutch elm +car bomb +filmy fern +breviary +Florida gallinule +dace +parsnip +riparian forest +crescent +earplug +grab bar +cusk +foglamp +screwtop +black mangrove +mascot +Welsh poppy +gas holder +support hose +salsify +red beech +Indian python +caroler +pineapple juice +lowboy +terra sigillata +black olive +hypodermic needle +radio-phonograph +moussaka +miter joint +creche +tuning fork +black wattle +affiliate +vertical tail +kiwi +red morning-glory +piping crow +runway +Kashmiri +studio apartment +sea feather +Judas tree +boatbuilder +corn earworm +fallboard +Victrola +lechwe +goat willow +turret clock +Canada anemone +leaf lettuce +savoy cabbage +headpiece +Lebanese +fothergilla +hemlock +toolshed +silver tree +blue-headed vireo +weatherman +cylinder +caltrop +adjutant bird +driving iron +millet +European woolly thistle +rose apple +clown +schoolfriend +eastern coral snake +barbecue +executive vice president +long-billed marsh wren +brittle bladder fern +tank destroyer +left-hander +matting +catchment +balsa raft +eastern fence lizard +color tube +corncrib +electric typewriter +westland pine +elder statesman +whey +plonk +mound +cittern +nest egg +copyholder +China aster +basking shark +gavial +common duckweed +vanilla orchid +red-shafted flicker +granadilla +sylph +sty +vest pocket +potherb +little brown bat +Trapezium +ordinary +adult +purple-fringed orchid +abseiler +disco +metal detector +beefsteak fungus +ilang-ilang +barley grass +hawser +suture +brake shoe +staghorn coral +barbecue sauce +Browning machine gun +sarcophagus +disa +oven thermometer +rosemary +track +gorget +quince +royal +piston ring +teak +pin cherry +Komi +walking fern +sloe +synchronous motor +fire-bellied toad +Teleprompter +co-star +cape gooseberry +oscillograph +bass clarinet +cock of the rock +Tyke +showy milkweed +safety valve +branch water +sweet marjoram +hugger +crampon +fairy godmother +band-tailed pigeon +snow-on-the-mountain +minibar +foreland +grosgrain +dita +rampion +calligrapher +jointed charlock +master +sheepshead +barrelhouse +Carolina allspice +mastic +brake pad +whiskey sour +casement window +conveyer belt +stolon +pavonia +shinny +witch elm +logwood +hostel +pageboy +vesper sparrow +pyrrhuloxia +common carline thistle +wafer +boysenberry +screw augur +hack +American white oak +governor general +Mother Hubbard +game fowl +drosophila +delft +nymphet +tollbooth +chough +Russian dressing +plum tomato +American saddle horse +dusky salamander +black medick +red valerian +cordage +Elastoplast +conacaste +backlighting +swell +riveting machine +cowpen daisy +openbill +water speedwell +picture hat +crested myna +servo +bletia +garden trowel +muscadine +common caper +false lily of the valley +aralia +sharp-tailed grouse +cigar smoker +bandoneon +Chinese alligator +crazy +point lace +charcoal +Texas horned lizard +marinara +backstay +Gatling gun +piston +game fish +fall armyworm +grammarian +beer hall +guadalupe fur seal +sugar palm +peanut +velvet ant +light machine gun +rya +cling film +adobo +myrtle oak +angelica +balsam apple +windbreak +brother-in-law +snap brim +automobile factory +clavichord +dusky shark +edible banana +altar boy +California lady's slipper +schoolbag +wax bean +Atlantic walrus +bullpen +straw wine +thatch palm +potluck +tamarind +charcuterie +sod house +tie rack +liebfraumilch +clinician +scarlet lychnis +Spanish iris +bread knife +water oak +bedpan +Angolan +bassarisk +Alaska fur seal +African wild ass +milk float +froghopper +Verpa bohemica +water cooler +chop suey +ranker +red helleborine +Prince of Wales +marmalade tree +car train +giant red paintbrush +desert sand verbena +right whale +baron +stevia +asterism +five-spot +catapult +Silex +fiberscope +refresher +beef Bourguignonne +snood +divot +waterproof +crabeater seal +Missouri primrose +bumper guard +rock opera +Lilo +coffee can +smokehouse +buffalo grass +propjet +ice tongs +poop deck +acorn barnacle +veal parmesan +shower room +collins +ringhals +silage +jawfish +trouser cuff +contour feather +songstress +rachis +White Russian +stanchion +mastaba +flatbed press +viand +legal representative +espalier +organic light-emitting diode +sushi +scorer +haricot +pinna +plectranthus +jungle cat +dried apricot +coach horse +white fringed orchis +veal cordon bleu +bath +dallier +marching order +donkey jacket +Panama tree +aerator +klaxon +pinnacle +shouldered arch +lesser celandine +common eland +Grand Marnier +cock of the rock +phlomis +Japanese umbrella pine +morning room +dead-man's-fingers +little auk +bascule +house paint +home fries +great skua +cesspool +flying gurnard +wild crab +checkerbloom +Wollemi pine +cheese dip +coif +charwoman +tea ball +waif +Arctic ground squirrel +parishioner +stabilizer bar +potentiometer +black cohosh +medlar +willow oak +cascara buckthorn +scoutmaster +Canada lily +poppy seed +paper mulberry +blackthorn +garrison cap +inductee +aeschynanthus +interior live oak +black spleenwort +wild service tree +sling +nicad +swab +sego lily +eiderdown +fruit cocktail +pallasite +weeping spruce +shiv +sea lamprey +coachman +half binding +American white birch +gainer +Concord grape +yellow birch +fucus +common room +io moth +red osier +crucible +galangal +salmagundi +pepper steak +cap opener +swizzle stick +tomato juice +Nobelist +Sarawakian +African monitor +sleeping beauty +stereoscope +curd +pyramid bugle +applejack +dosser +rake handle +pilot light +Eames chair +Scotch and soda +bell heather +dinette +blackpoll +dogie +sound camera +cattle guard +mashie niblick +edible cockle +monocle +steak tartare +partaker +sidesaddle +communications satellite +porkfish +water hemlock +drawbar +ultramicroscope +Jamaican cherry +craftsman +lovage +common apricot +drum majorette +backsword +smooth alder +Amniota +dribbler +theosophist +dolman +ivory tree +Green Beret +pipe smoker +mayoress +mignonette +crampon +henbane +kirtle +death's-head moth +instep +great St John's wort +lorry +black-necked cobra +ball carrier +Jordan almond +byway +earless lizard +marble +andiron +high-protein diet +buzzer +ice floe +crankcase +Bofors gun +sockeye +veery +Delaware +caravansary +prairie coneflower +star apple +suiting +cot +call forwarding +American gallinule +glossy snake +rose chafer +instant coffee +placket +Tarahumara +pulsar +philodendron +orange tortrix +cypress spurge +Welsh rarebit +music box +giant crab +vanilla bean +water thrush +prayer shawl +gouge +promoter +dagga +black currant +bitter cassava +drain basket +snare +digital audiotape +retainer +olive drab +gluten bread +graham cracker +cheddar pink +caregiver +spray paint +Anglo-American +boatyard +backbencher +Link trainer +bell arch +weir +arbor +millionairess +sour cream +earthtongue +crawlspace +crossjack +balalaika +crupper +western redbud +guinea hen +rangeland +gaboon viper +common louse +single-leaf +horseshoe +balsam poplar +triskelion +jack-in-the-box +jester +rain stick +glove compartment +imperial moth +Japanese beech +biotin +turnip +oligarch +western skink +mudguard +retsina +data system +green bristlegrass +visiting professor +beaded lizard +weathercock +Sloppy Joe +high tea +lightweight +record sleeve +cooler +nodding onion +pigs in blankets +torque converter +district attorney +bunting +orrery +radiator hose +common plum +wood spurge +calamus +chicken Kiev +pin +lath +telephone bell +thistledown +audiotape +gypsy moth +snuffer +pari-mutuel machine +peanut butter +hearthrug +sack +Old World yew +chives +stovepipe +xenolith +mattock +mangle +electric chair +backup system +Empire +blackwash +dodder +Allegheny chinkapin +finger plate +junk +brown rice +wild angelica +chinaberry +mason +rasp +den +violet wood sorrel +nosewheel +plenum +merino +kirtle +Igbo +ensign +sex symbol +Belgian endive +sugarberry +yellow salsify +purple emperor +atlas +African clawed frog +leatherjacket +midwife +sac fungus +European cuckoo +three-day event +Mexican poppy +wagon tire +armyworm +rain gauge +Oregon ash +columbarium +spectrophotometer +Milanese +pointing trowel +casualty +Eastern hop hornbeam +lobe +mouthpiece +au pair girl +giant water bug +Browning automatic rifle +laser-guided bomb +drone +white alder +cockleshell +mufti +gravy +berm +boat hook +marshmallow +pet shop +cowpea +tactician +wading pool +anchovy dressing +flip +shackle +Wedgwood +thick-billed murre +erecting prism +giant salamander +sleeper +quiver +chain store +wing tip +New World tapir +witches' butter +gendarme +ginseng +common maidenhair +graduate nurse +balsam pear +hoatzin +philanthropist +axle bar +gas meter +moth mullein +ragbag +Chinese cabbage +celery stick +rutabaga +scalpel +cape marigold +variometer +argali +brig +shuffleboard +wort +Orlon +epiphyllum +allice shad +coffee filter +solar telescope +Japanese linden +thinning shears +golden wattle +queen triggerfish +millinery +surfbird +flame fish +clove +dicamptodon +red-bellied terrapin +turmeric +baya +air horn +Indian coral tree +punnet +sharkskin +water crowfoot +bight +desert iguana +Texas toad +volva +dredge +Turkey red +chemical plant +gemma +dice cup +orange marmalade +mistletoe +surveyor +frozen orange juice +pallette +poultryman +burbot +courlan +captain +saddlery +bodyguard +dwarf tulip +black ash +pulse +nailbrush +tickseed sunflower +legless lizard +shirtwaist +polling booth +chickeree +garlic chive +common thyme +multichannel recorder +screw thread +sangoma +calliopsis +geoduck +colleen +bandicoot rat +pastis +swamp sunflower +scorekeeper +Honduras mahogany +Australian pitcher plant +triangle +elevator shaft +green pea soup +carrel +prairie aster +bird's-nest fungus +scarlet clematis +gook +mescal button +carcase +mulatto +ejection seat +strawberry daiquiri +goat grass +car battery +babu +chief of staff +monilia +Siberian crab +ridge rope +Morchella semilibera +nutmeg +moosewood +graham bread +California four o'clock +zwieback +velvetleaf +abelmosk +shadow box +corned beef hash +newsreader +backstairs +cutwork +sherbert +tooth fungus +angel-wing begonia +greasepaint +common milkwort +potato vine +CD drive +crepe de Chine +sporting man +koto +armet +barking frog +celeriac +drainage ditch +black box +steel blue +clotheshorse +corn speedwell +drawknife +spritsail +vichyssoise +modeler +pocketcomb +limey +suslik +cockpit +digester +brig +raita +troll +benedictine +rock wren +lock +Barnaby's thistle +school bell +school ship +Soave +falchion +swaddling clothes +terrine +smoke screen +rivulus +sweet lemon +cullis +bustier +peppermint +Philadelphia fleabane +Hampshire +active +charnel house +face guard +Quebecois +facilitator +tongue depressor +bitternut +heath aster +sapodilla +bluestem +centrist +Canterbury bell +needlenose pliers +groats +tapa +Qatari +paper feed +tilt-top table +plastering trowel +brazil nut +rotogravure +patriot +manicurist +bacon and eggs +puffbird +lightweight +golden willow +kaiser roll +duff +girandole +seaside daisy +Kurdistan +Skivvies +showboat +fire bell +lock-gate +greater masterwort +weald +ice ax +toetoe +mess kit +bucking bronco +black turnstone +backscratcher +backpacker +basement +marbleization +trigger +satsuma +fall-blooming hydrangea +mountain lady's slipper +yellow oleander +crookneck +ex-president +Venn diagram +psaltery +bulwark +old boy +linear leaf +aril +butt weld +fall webworm +pruner +bald-faced hornet +nougat +tailgate +field speedwell +potsherd +center punch +long beech fern +desert paintbrush +canyon treefrog +bushel basket +Eurasian +swamp horsetail +cryptanalyst +wicket +school newspaper +captive +spider brake +electric mixer +tumbleweed +mason wasp +sash window +paddock +wet bar +oxtongue +stevia +wheat rust +scute +switch engine +mud dauber +dotterel +snailflower +common barberry +mulligatawny +cinnamon bark +cigar box +trivet +proof spirit +cream soda +western grey squirrel +baby powder +Bren +Japanese yew +sailcloth +Basket Maker +bannock +basidiocarp +aphelion +erect bugle +limiter +bosc +Przewalski's horse +helmet orchid +audiometer +battle cruiser +grass widower +staphylococcus +Congolese +common pitcher plant +parliamentary agent +Virginia snakeroot +mockernut +Siberian elm +backbench +rough +chervil +chlamys +nationalist +galantine +screwdriver +falsifier +cancerweed +spur +jerkin +porte-cochere +dill pickle +Montagu's harrier +tetrode +true fungus +American quaking aspen +vitamin B1 +leopard lily +eggdrop soup +aurochs +core bit +Jaws of Life +trousseau +parquetry +Disciotis venosa +tender +beef goulash +vitamin K1 +pepper spray +covered smut +hook +sports announcer +weapons carrier +foxtail grass +sloe gin +mezereon +antifouling paint +pavior +pile driver +security consultant +monkey-wrench +Indian hemp +amaretto +American wistaria +A-line +market strategist +rainbow runner +souvlaki +binturong +stiletto +gastrula +Vietnamese +Old World hop hornbeam +cold cathode +pier table +houndstooth check +prop root +leaf-footed bug +sedge wren +Dutch iris +drop curtain +opossum rat +lame +pollen tube +doubletree +compression bandage +pinon pine +catmint +pier arch +kingmaker +deanery +loofah +fullback +fencing mask +flying boat +carpet sweeper +lemon-scented gum +Accipitriformes +kit +pigfish +clipper +dolmas +lesser centaury +blood agar +water violet +raw milk +lemonade +vicar-general +supply closet +Anzac +confectioner +ignition key +velvet grass +white willow +John Dory +ruddiness +wheel +common horsetail +hubbard squash +speculum +Spanish bayonet +mountain mint +glint +foxhole +housemate +bootjack +sleigh bell +clog dancer +Mexican mint +rendering +Hausa +star saxifrage +spring squill +clothesbrush +liquid metal reactor +Columbia tiger lily +sorrel +cartwheel +Jersey +Caucasian walnut +desert willow +surveyor +elbow +Santa Gertrudis +fringe bush +industry analyst +lyrebird +Cortland +arroz con pollo +catechist +tank top +jew's harp +cereal oat +heartleaf +short sleeve +butty +butterfly plant +stud finder +felloe +beer garden +clevis +wood warbler +demerara +cornetfish +mince +Jamaica rum +Spanish broom +binnacle +camise +ferrule +Copt +hall +minicar +scimitar +cryptogam +miter box +limestone fern +Marsala +Parliamentarian +gravy +woolly bear moth +formula +squash bug +pigmentation +plate +skin graft +radiotelegraph +hellbender +soft pedal +lavender cotton +propagator +Bailey bridge +cottage pie +rotgut +A battery +pintle +off-line equipment +European swift +shrimp butter +plumb bob +trunk lid +succotash +yellow cypress +heartleaf +antelope squirrel +sambar +maternity ward +deciduous plant +bartlett +Riesling +sour cherry +Klansman +poke +academician +sociolinguist +bird's nest fern +common privet +scale fern +tachograph +oyster stuffing +pusher +green June beetle +staghorn sumac +lockage +master +bap +harlequin +blackfly +spotted coral root +kahikatea +cabana +riot gun +apple mint +kob +praline +confidant +pahautea +float +city father +Zen Buddhist +pessimist +conference center +banksia rose +comfit +sweet cicely +winged bean +henroost +myope +bunt +nailfile +yellow mountain saxifrage +cruise control +abandoned ship +water chinquapin +spanker +wing nut +puccoon +pier glass +Atlantic sailfish +medlar +buttercrunch +rough-skinned newt +planter's punch +Dutch iris +control key +committeewoman +torpedo-boat destroyer +garambulla +tree heath +gladiator +September elm +inclinometer +snowbell +call-in +sunsuit +microfiche +bluestocking +cheval glass +server +franking machine +sugar syrup +Macoun +transport ship +alderfly +wash-and-wear +Abbe condenser +bush nasturtium +wild leek +canary seed +Northern Baptist +sweet wormwood +jaboticaba +cardroom +autoradiograph +ash-pan +sprinkler system +rattrap +claymore +parts bin +forest red gum +thermonuclear reactor +Indian crocus +lector +heir apparent +leafy spurge +masquerader +varicella zoster virus +cucumber tree +hedger +Shumard oak +zooplankton +quartermaster +arrester +bridge +hop clover +meadow foxtail +winter hazel +portable circular saw +penuche +limpa +blue toadflax +mesophyte +Alpine anemone +pet sitter +avocado +streptococcus +fiber optic cable +river red gum +hornist +chicken taco +red spider +tape grass +densitometer +salmonberry +tiger snake +hot toddy +silver fern +candlenut +buckram +local call +defoliator +king +mahoe +lever lock +social insect +winter purslane +bootblack +fireball +ramie +bellbird +prepuce +capote +Chinese forget-me-not +Pisces +costume +California black oak +tree lupine +golden polypody +liger +California whipsnake +urodele +sapodilla +skillet bread +duckpin +supremo +asparagus bean +kampong +endameba +cow pony +rider +motherwort +Persian iris +soursop +kohlrabi +Parisienne +irons +doubles +feijoa +farmplace +cottage cheese +bezoar goat +subcontractor +blunderbuss +down +purple martin +Lapp +crenate leaf +tobacco pouch +beach towel +Santa Lucia fir +monetarist +stringer +ocellated turkey +Texas purple spike +ackee +caddy +hedge mustard +second-rater +strawberry bush +valedictorian +steak sauce +prairie gourd +aspirant +mint +Valenciennes +vodka martini +American persimmon +big brown bat +Mycenaen +mouthpiece +norfolk island pine +pennyroyal +Jewish rye bread +granadilla +tract house +wall +shuttle helicopter +blackjack oak +Lippizan +storm window +white zinnia +sickle +sushi bar +polish +baldric +brooklime +church hat +control circuit +vicuna +death adder +eukaryote +durmast +field soybean +jacket potato +wild basil +queen consort +brooklime +octant +blue false indigo +broccoli raab +step-down transformer +date bread +blue ash +duffer +oak chestnut +pennant +wedge +Florentine iris +morion +weakfish +morning dress +public address system +spearmint +Ashkenazi +sow +interpreter +Metis +pita +iron lung +parfait glass +cylinder lock +immortelle +obstetrical toad +tee hinge +successor +western +working girl +julienne +AND circuit +spaghetti junction +fer-de-lance +enlisted woman +star +lightning rod +bilge pump +pacer +horse nettle +African oil palm +blastocyst +air hammer +bamboo fern +remote terminal +lambkin +money cowrie +Pelham +clinical thermometer +wiggler +guru +false indigo +tea bag +foredeck +king +baby shoe +mule +grab bag +silver-bell tree +knitting machine +cobia +roulette ball +larder +button pink +rumble seat +noria +queen mother +solar thermal system +aquaplane +highbrow +rusty blackbird +desktop +lima bean +pontoon bridge +watercress +wild cabbage +tumbleweed +dressing sack +compact-disk burner +spittoon +marrow +sporophyte +second fiddle +pot-au-feu +specialty store +dry +mole +khadi +japonica +lovage +squamous cell +lobe +European creeper +brown pine +bladderpod +rumble +French Canadian +mascarpone +Pacific halibut +perennial ryegrass +wine lover +turbot +longwool +silver tree fern +dust cover +synchromesh +corn pudding +alpine azalea +garboard +cane sugar +observation dome +condensation pump +hind +taximeter +hand drill +gas thermometer +jammer +buffing wheel +handstamp +prairie mallow +turkey stew +sun spurge +duck pate +kibble +Cassin's kingbird +apadana +Devon +grinner +oocyte +blank +header +schoolmaster +guard ship +intravenous pyelogram +rimu +luff +Mediterranean fruit fly +singlestick +lady-in-waiting +curb +birch +limekiln +orthoscope +serotine +Spanish oak +swamp cottonwood +edger +city man +picnicker +white basswood +Parsons table +Christmas begonia +perspirer +Pacific tree toad +Cape tulip +finger bowl +blue pike +greengage +handcar +milkweed +potbelly +river dolphin +creel +typewriter carriage +banteng +pawnbroker's shop +huon pine +biennial +man of action +foundress +caveman +featheredge +jordan almond +sandblaster +coralberry +low-calorie diet +hoot owl +garter +bain-marie +wrecker +fenugreek +double-hung window +idol +scullery +balloon vine +summer savory +winged spindle tree +Helvella crispa +walrus mustache +gas engine +boulle +rush grass +rue +hoe handle +cat fancier +deerstalker +dunker +American red plum +fall dandelion +groover +sprag +stair-rod +wish-wash +pricket +architrave +California laurel +net melon +Arizona sycamore +executive secretary +silverweed +silky cornel +surface ship +square sail +common purslane +villa +holly-leaved cherry +sweet birch +pecan +artillery shell +breast pocket +pirogi +scarlet runner +rabbit brush +mealworm +leather carp +palette knife +Jerusalem sage +boneshaker +slit lamp +digital voltmeter +polar glacier +square-rigger +homogenized milk +Sten gun +lesser calamint +pyrograph +Korean lawn grass +Zinfandel +crepe fern +western ragweed +clasp knife +distributor housing +cartouche +scooter +ski parka +jackknife +Carolina spring beauty +soft diet +candlesnuffer +horse trader +step stool +agouti +accelerometer +annual fern +judge advocate +angelica +roll film +treehopper +ombu +comer +sultanate +kitchen help +hooded ladies' tresses +milking machine +knuckle joint +Jamaica honeysuckle +music teacher +sauerkraut +Weston cell +slivovitz +Worcester sauce +tall bellflower +chancery +prophetess +casquet +shortfin mako +sorus +visual display unit +asp +grenadier +black pepper +crottle +erasable programmable read-only memory +jabot +ratchet +disk controller +chief petty officer +tap wrench +white mountain ash +cultivated rice +flying phalanger +skillet corn bread +BB gun +Elamite +European red elder +reed rhapis +ciderpress +inga +torpedo +wild teasel +bean curd +oeil de boeuf +acuminate leaf +bitter lemon +hitchrack +Lorraine cross +hostess +European dogtooth +adz +polonaise +rock sandwort +Waldorf salad +myrmecophile +klystron +mole rat +draba +corn borer +robusta coffee +chub mackerel +leatherleaf +chronometer +Moselle +sea aster +fennel +slop basin +constable +Brunswick stew +hydraulic pump +French omelet +icebreaker +Manx shearwater +press of sail +ninepin +blue succory +bootstrap +hallstand +chit +firefly +bearded seal +fuel filter +jezebel +mate +Roquefort +cheesecloth +plasterer +blue pimpernel +lake dwelling +shrink-wrap +goat cheese +common gum cistus +coastland +Sunday best +wild tobacco +mandrake +common unicorn plant +barbican +culotte +blockhouse +German iris +tarragon +caramel +wild rosemary +grain +voyager +squirting cucumber +eastern narrow-mouthed toad +creeping fern +luge +saffron +garland flower +furnace room +starship +Oriental scops owl +Italian honeysuckle +berserker +Chinese elm +scrubber +bishop pine +French polish +compromiser +skimmer +river shad +lobster thermidor +leadwort +man-of-the-earth +razorblade +vicegerent +empress +link +ham and eggs +wild lily of the valley +blackfish +splicer +fossa +mara +moneygrubber +brachiopod +fauteuil +caldera +finish coat +croupier +termer +leopard's-bane +sei whale +molucca balm +dolly +dog food +term infant +soft roll +episcia +sewer +inquiry agent +active citizen +perry +California newt +moon shell +bladderwrack +common shrew +dill +Dutch elm fungus +key lime +electrometer +divorce lawyer +lamb's-quarters +apple turnover +shipmate +Guernsey +legionnaire +electric blanket +Rocky mountain pinon +tobacco mildew +stinking iris +forestiera +departure lounge +wiper motor +jurist +scarlet runner +pallbearer +batter's box +inertial guidance system +fines herbes +oilcan +sisal +mustache cup +steamed pudding +Visayan +fiesta flower +lady tulip +lungless salamander +batiste +electrical system +blazing star +car carrier +Walloon +mother hen +stump +mulled cider +secondary coil +Alexandria senna +etui +scrumpy +Havasupai +jawbreaker +glume +ex-husband +Eskimo +Joint Direct Attack Munition +number theorist +five-hitter +pinstripe +Olympian +common mackerel +stone bass +bigos +Bahraini +airbrush +great ragweed +glass lizard +hand fern +roundel +riding master +shoetree +yellow avens +old fashioned +dolman +stinger +nursling +legate +faille +golden fern +bedpost +shop steward +kidney bean +bladderwort +internist +limeade +Bruneian +Coloradan +playsuit +wintergreen oil +Cantabrigian +mutton snapper +shot putter +hand grenade +moccasin +cobnut +marrow +separatist +cockscomb +discharge pipe +Gabonese +spade bit +chicken cordon bleu +varnish tree +European wood mouse +striped gentian +Ayrshire +curassow +moo goo gai pan +malarial mosquito +glow tube +ledger board +bib-and-tucker +European chestnut +suffragette +color wash +gaffsail +golden larch +voting machine +Kahlua +lungi +amusement arcade +Uzbek +butternut +mold +mule's ears +dickey +shrimper +trophozoite +dreadnought +shepherd's purse +greenhouse whitefly +spotted gum +copperware +perfect game +semigloss +spawn +telecom hotel +stakeholder +mason wasp +flibbertigibbet +chin strap +fringed pink +saki +urchin +memorizer +roulade +whiting +cling +corncrake +Queen of England +choo-choo +empty +heating pad +playmate +visualizer +popcorn ball +absconder +sou'wester +target acquisition system +mock-up +dental floss +tray cloth +haddock +bulblet fern +housing commissioner +delayed action +anchor light +harbor porpoise +water wings +PT boat +night latch +fennel +doorframe +green-tailed towhee +grey polypody +torture chamber +American germander +Chinese wistaria +cattalo +accompanist +rifleman +alpine clover +contrarian +lemon peel +Mexican cypress +sprog +dado +Galilean telescope +desmid +lockup +Latin +American raspberry +mescal +butternut +prairie orchid +downy yellow violet +green hellebore +radio compass +bread and butter pickle +Cherokee rose +knish +destroyer escort +Arkansan +langlaufer +pyxis +winter savory +velocipede +motley +winter savory +law student +barren ground caribou +apple dumpling +field hospital +works +city editor +European flatfish +Morchella crassipes +life office +boot camp +cream sauce +cape aloe +acetate disk +devil ray +tile cutter +Plymouth Rock +microspore +godown +Syrian +tiercel +American cranberry +lesser spearwort +anopheline +Spanish oyster plant +wire cloth +attic fan +birch beer +small computer system interface +crook +ribbon fern +explorer's gentian +nagami +I-beam +rosebud cherry +Jerusalem artichoke +Stillson wrench +pluralist +district manager +Levantine +orangeade +part-timer +post horn +Oregon grape +contadino +cargo helicopter +silverpoint +chaja +California bluebell +case +Shasta +cheese cutter +Leishmania +avalanche lily +iron horse +bialy +Yana +Delawarean +Prussian +nonpareil +hammer +hoper +chewink +anil +skim milk +desert four o'clock +crescent wrench +white marlin +blue jasmine +malacca +anadama bread +purple poppy mallow +ganglion cell +ligature +no-parking zone +golden clematis +Cotswold +aliterate +shebeen +yardarm +superbug +fanaloka +stinking cedar +spirochete +wort +pater +heaume +thermocouple +ironing +naval tactical data system +European goatsucker +prairie cordgrass +accused +foreign agent +halberd +western mugwort +esthetician +Persian lilac +cracked-wheat bread +crosscut saw +rock penstemon +paper cutter +crematory +ideologist +cattley guava +margarine +creosote bush +hoary plantain +spark gap +lumberjack +Greek valerian +mission bells +tight end +bigeye +large crabgrass +stone marten +cleat +lentil +bay scallop +lector +charger +assemblywoman +second lieutenant +boil smut +sarsaparilla +hydromel +cat flea +pinfish +whole milk +hairnet +myeloblast +peasant +blind curve +first offender +dwarf-white trillium +Brother +coatdress +gun emplacement +tamarisk gerbil +snap +air cushion +trailing edge +potato vine +gig +everlasting pea +champion +dibble +rattail cactus +timothy +prince's-feather +cutlas +lockring +sealing wax +Brussels lace +corn mint +highboard +she-oak +wild celery +pillar +Burberry +Hakka +leucothoe +bell tent +gallery +coontie +leather fern +smack +adenovirus +linoleum +chain wrench +tammy +gas fixture +nut bar +baneberry +butterscotch +goat's rue +bullock +grey snapper +mother-in-law +hyson +wayfaring tree +mollie +needle spike rush +buckwheat +bayberry +brush-tailed phalanger +dry rot +harborage +stormy petrel +Oriental beetle +Atlantic halibut +coping saw +simple fruit +viscose rayon +surgeonfish +upstairs +security system +common ragweed +verticillium +pancake batter +hawk's-beard +Dutchman's-pipe +refrigeration system +European parsley fern +Ivy Leaguer +totalitarian +gonococcus +towhead +showy sunflower +pallium +multiengine airplane +hair trigger +rabbit-eared bandicoot +siskiyou lewisia +fuel system +flat arch +broad beech fern +Alpine lady fern +bracken +Kentucky black bass +rut +mountain maple +tunaburger +umbrella fern +white-headed stilt +meat hook +panhandler +washhouse +barnyard +safety lamp +leg +ripple mark +paper +sagebrush lizard +light heavyweight +common nutcracker +operator +stalking-horse +horseless carriage +fishhook +suction cup +peg +Ungulata +false teeth +round-bottom flask +Luba +campaign hat +firebox +rudder +parapet +ice pack +appellant +spirit stove +metheglin +common bamboo +soapwort gentian +pannikin +time capsule +burn bag +folk poet +tropical prawn +end man +new caledonian pine +linen +web +free trader +jury box +railing +pignut +leaker +potboy +rubber boa +white snakeroot +plumber +Candida albicans +surfboat +woman +promulgator +eyecup +wild China tree +rattlesnake master +Viyella +alpine salamander +ailanthus silkworm +Albatrellus ovinus +war room +meadow vole +robotics equipment +rotary actuator +Engelmann spruce +pinesap +beefcake +native speaker +ridge +injector +water chute +salmonberry +decoupage +bottlebrush +date plum +circlet +American mountain ash +pocketbook +horsemint +sweet four o'clock +kirpan +pinto bean +chervil +equator +range animal +candy thermometer +calanthe +cul +stipendiary +brahman +pelican crossing +topgallant +wild senna +sliding window +carrier pigeon +Tatar +quadruplet +bumboat +spearmint oil +slip clutch +young Turk +golden yarrow +shank +glasswort +dental plaque +Manduca sexta +Northern bedstraw +dent corn +Life Saver +western wall flower +bedder +wherry +Tuscarora +scrapple +borstal +reflux condenser +problem solver +nondriver +perforation +eastern cricket frog +white wood aster +broad buckler-fern +Cape primrose +herringbone +head louse +earl +baton +recording system +primary color for light +cherry laurel +pomfret +ratafia +chocolate milk +obscurantist +revisionist +rood screen +magnetic needle +commensal +oil tycoon +celebrant +domicile +harvest mouse +California nutmeg +greater spearwort +black-billed cuckoo +winepress +demographer +straw boss +diabetic diet +sweetmeat +rabbet +ming tree +basketweaver +freestone +walk-in +Aryan +box coat +audio amplifier +chicken salad +churidars +whydah +box +batman +siren +selectman +gouger +drip coffee +Caesar salad +interpreter +whinstone +grey goldenrod +minicomputer +honey crisp +hypercoaster +Irishman +swamp white oak +reed canary grass +globeflower +cynthia moth +fennel seed +canthus +chino +blind date +tar pit +watermelon begonia +fishtail palm +overcast +Pearmain +primary color for pigments +coal seam +wherry +safety bolt +cretonne +Michigan lily +inflater +moneybag +huckleberry +brassard +bush vetch +looking glass tree +pinwheel roll +alfalfa sprout +sea kale +clinometer +achira +lorgnette +potter wasp +gilded flicker +tody +capulin +captain's chair +crackle +gerardia +prie-dieu +venture capitalist +New Jerseyan +block and tackle +elf cup +bur reed +automatic transmission +wax palm +flytrap +crack willow +coachwhip +swizzle +lugger +Dewar flask +baster +oxyacetylene torch +Culex quinquefasciatus +St Peter's wort +wild hyacinth +Russian almond +burrfish +wintergreen +katsura tree +butcher knife +perfumery +thresher +porte-cochere +sheepwalk +hypotenuse +Dalmatian iris +buttercup squash +demiglace +goldenseal +preceptor +rigger +poikilotherm +old-age pensioner +posthouse +wood horsetail +repeater +reciprocating engine +Rambouillet +terra cotta +togs +battledore +horizontal tail +missile defense system +trier +morello +woolly adelgid +munition +double creme +in-fighting +squirrel corn +crow's nest +antler moth +brake cylinder +bandoleer +noticer +Parmesan +hipline +cheapskate +Dubonnet +mole rat +bog aster +ribbon tree +meadow rue +nard +ratel +loose smut +snapping shrimp +golden glow +basil thyme +Florida strap fern +moonshine +flume +lace fern +black bream +orchestra pit +archerfish +exile +ringdove +career man +godfather +bottom-feeder +pasteurized milk +dental implant +pedicel +Catalpa speciosa +yellow foxglove +lancet arch +steam shovel +sampan +patrol boat +sailor cap +tollgate +monal +velociraptor +cacique +jack oak +cursed crowfoot +creep +Parry manzanita +common matrimony vine +grace cup +caecilian +spurge laurel +prickly lettuce +Regius professor +camail +Sitka willow +Courtelle +gin sling +dogmatist +guest +saltine +dust cover +sport +sweeper +feist +lady's-eardrop +vibist +wire stripper +tenpin +interplanetary space +beet green +pruning knife +drainage system +gunnery +ballet master +lime juice +flak catcher +lacrosse ball +Canadian aspen +beatnik +railhead +utilizer +spadefish +Arizona white oak +city university +dense blazing star +hedger +chain pickerel +right-hand man +namby-pamby +nacelle +redneck +tumbler +Chief Secretary +cannon +cupola +kummel +papaya juice +Burton +Stanley Steamer +loganberry +stylus +square meal +rock bass +western ladies' tresses +dramatist +assignee +tandoor +trumpetwood +segregator +green adder's mouth +coral necklace +ani +iceboat +densimeter +oxtail soup +kernel +cos lettuce +greenishness +panchromatic film +Parker House roll +oatmeal +backsaw +double Gloucester +bailey +storage cell +giant +coconut milk +broadtail +barouche +loir +soybean meal +white-leaved rockrose +junction barrier +spandrel +sweat bag +goldilocks +flowering wintergreen +cockspur +beef fondue +holding cell +cardamom +cagoule +Kamia +tangelo +Herschelian telescope +wine bar +kachina +sand sage +guy +ivory palm +citrus mealybug +topper +ladyfish +force pump +fanion +calaba +Iowa +orrisroot +ivorybill +Secretary of Agriculture +gagman +dry cell +hypnotist +kenaf +grey alder +deathwatch beetle +gagman +magnetic stripe +trap door +abdominal wall +prefab +broomcorn millet +architeuthis +angler +Pacific giant salamander +barbette carriage +low-fat diet +veal scallopini +B battery +wallah +landing flap +pistachio +jaguarundi +nagi +cicerone +felt fungus +Aertex +stocks +smooth aster +patchouli +lemon sole +sleeper +basket fern +dundathu pine +anjou +Moreton Bay chestnut +broom sedge +candid camera +red angel's trumpet +oilstone +cinnamon toast +Pacific walrus +fruit custard +Jehovah's Witness +mate +voyeur +Esselen +achromatic lens +sanguine +brine shrimp +dunce cap +swot +transit instrument +grey willow +pack +bench clamp +Nova Scotian +gadgetry +silvery spleenwort +enchantress +rough fish +morula +giant taro +sorus +roux +polyhedral angle +spruce beer +Chicano +cola extract +outfielder +kohleria +white-rumped shrike +car-ferry +subway token +spoon bread +totara +corn borer +bowhead +tensimeter +water scooter +flickertail +Catholicos +pleaser +blue-eyed Mary +calabash +handyman +cascades frog +facing +scarlet oak +lutist +ginger +tree tomato +Harvey Wallbanger +tent peg +insectivore +fusil +swale +chinning bar +bladderpod +New Dealer +dhoti +proscenium arch +common vetchling +channel +collect call +safflower +Texas tortoise +test equipment +theca +RAM disk +sheep sorrel +rammer +buttonhook +honey mesquite +dominus +babirusa +queen +Aspergillus fumigatus +crash barrier +nonmember +Muscovite +verdin +Australopithecus afarensis +Turkish Delight +stalked puffball +giardia +divider +mountain skink +head smut +pacemaker +evaporated milk +rattlesnake fern +flamethrower +navy bean +bather +steed +showy orchis +stone crab +artichoke heart +phantom orchid +space helmet +swamp laurel +privateer +junior +surcoat +bristlegrass +flower girl +aphid lion +penthouse +lemonade mix +coude telescope +natal plum +scriber +wood nettle +rape suspect +resplendent quetzel +western poppy +choir loft +fore-topsail +thyme-leaved sandwort +erotic +short circuit +outdoors +flowering tobacco +hookup +aviatrix +corker +horehound +horn +swamp pine +water biscuit +cherimoya +vaporizer +courtier +European sole +full skirt +Mother Carey's chicken +cymule +huck +white snapdragon +mountain nyala +country borage +bonduc +casein paint +grampus +shrimpfish +lodge +dragee +black walnut +caraway seed +roper +glass cutter +tab key +Richardson's geranium +demigod +chichipe +Italian ryegrass +cadet +electrograph +rudd +carpenteria +foie gras +lignum vitae +hedge nettle +pledger +American hackberry +flageolet +beaked hazelnut +reflectometer +sticky geranium +marriage bed +white pepper +japanese clover +whiteface +gnat +extrovert +Canada plum +talipot +chicken stew +egg foo yong +fraxinella +skibob +saucer magnolia +jacket +green smut fungus +cloakroom +landing skid +booth +ice milk +dipole +striped coral root +red buckeye +roughcast +breaststroker +cowherb +razor clam +first-aid station +briarroot +clambake +lander +Bramley's Seedling +frail +jird +minisub +luging +poison milkweed +European lobster +epidemiologist +spandex +paloverde +marumi +bypass condenser +punter +petty spurge +Coryphaena hippurus +bilberry +vermillion rockfish +witness box +viscometer +pulque +Massachusetts fern +herring salad +ridge tile +mesa +dwarf grey willow +southern aster +punch pliers +tarnished plant bug +hoop pine +Japanese red pine +benedick +rebozo +silver plate +silver willow +mouse-ear hawkweed +bonito shark +abutment arch +noble cane +tiger rattlesnake +pongee +jumping plant louse +pattypan squash +giant ryegrass +railroad bed +stiff aster +imperial Japanese morning glory +laundry +winter cress +large white petunia +tea maker +pen-and-ink +early warning system +lug +monocot +sea wormwood +breechblock +postage meter +third rail +Mongoloid +Australopithecus boisei +umbrella tent +stirrer +Dumpy level +beroe +post and lintel +green spleenwort +tomato paste +dishpan +stentor +sweatband +cobbler +New York fern +gaff +prairie willow +cyclops +jigsaw +rotavirus +pallet +eastern ground snake +boiling water reactor +acute triangle +agora +European cranberry +roebuck +surgical dressing +busboy +cannikin +feedlot +common pond-skater +cochin +horsehair lichen +fetter +sapote +fichu +dermatologist +fire tongs +creme anglais +foster-mother +laurelwood +chicken snake +mincemeat +rocker +wild spinach +powder and shot +butterwort +auxiliary engine +mamey +hart's-tongue +sucking pig +American turkey oak +troopship +buttermilk +divi-divi +boatswain's chair +soda fountain +southern flying squirrel +elastic +cutaway +housekeeper +renegade +apple rust +bridoon +machicolation +stunt +keyhole limpet +personality +solitary vireo +epidendron +Jihadist +boffin +bettong +terror +partial denture +pusher +saltcellar +capstan +large poodle +Bibb lettuce +low-bush blueberry +staple +banded krait +sickroom +barnyard grass +wandflower +woodworm +bluegrass +squirrel's-foot fern +rabbitfish +delta wing +milking shorthorn +limber pine +guru +gamine +scythe +sweetsop +Gruyere +bloodmobile +mine detector +American mistletoe +silver beech +hound's-tongue +Lombardy poplar +basket fern +pink-and-white everlasting +redtail +Aladdin's lamp +mace +outtake +condensed milk +Canada wild rye +silver perch +waxflower +taxer +Chinese chestnut +Our Lord's candle +mugwump +school system +salp +osso buco +dress shirt +butterweed +low-fat milk +couchette +broomcorn +proscenium +mill agent +smut grass +humpback +southern spadefoot +military leader +canebrake rattlesnake +tailor-made +ebony +beach house +flying gecko +hoary alison +typhoid bacillus +Romanov +vanilla pudding +sweet cicely +Spodoptera exigua +dress rack +flannel +skipjack +bolognese pasta sauce +rooibos +thunderer +blessed thistle +gauntlet +mahatma +granadilla +laurel sumac +Yuma +thyme-leaved speedwell +encyclical +twill +linocut +manna gum +spark arrester +cocklebur +Indian hemp +lemon oil +Hall's honeysuckle +raceway +flop +Himalayan lilac +one-flowered wintergreen +photosphere +silvery spleenwort +convex polygon +canarybird flower +foster-sister +fluffy omelet +palanquin +roll +dandelion green +Javanese +workpiece +Carmelite +bread mold +schlemiel +wild lily of the valley +grugru +solenoid +puff batter +skep +balance wheel +Gadaba +portia tree +mobcap +two-man tent +scuffle +firebrat +ant lion +anise +caster +giant petrel +American water spaniel +naboom +treasure ship +foster-son +fiddleneck +alidade +sugar refinery +wild oat +water beetle +generic +damson plum +abrocome +detainee +pitch pipe +coast +nilgai +radiotherapy equipment +heart-leaved aster +gristmill +grocer +Appaloosa +Cheviot +brake pedal +lantana +cave myotis +Rob Roy +sea spider +latrine +carpophore +recycling plant +coondog +brace and bit +funambulist +eggar +mantelet +postdoc +mezzanine +coco plum +pulse generator +high-vitamin diet +menhaden +mechanical engineer +bergamot mint +Chuvash +grated cheese +helicon +belladonna +beet armyworm +eelgrass +resuscitator +interrupted fern +arrow grass +cistern +Pacific herring +colostrum +journal bearing +Fauve +wrist pin +canape +choice morsel +quadraphony +guard boat +shortgrass +claymore mine +hitching post +cargo door +decoder +gym rat +Cocopa +commander +apple of Peru +seckel +yellow goatfish +dog flea +dodo +oconee bells +Tudor arch +turkey stuffing +ebony spleenwort +wheat flag smut +scolopendrium +Brazilian pepper tree +gusset +inspector +lunar excursion module +baron +plantigrade mammal +Creole +phosphate +aromatic aster +ghee +audiovisual +onychophoran +cotton stainer +lieutenant junior grade +spheroid +amen corner +caper sauce +Caladium bicolor +dyer's rocket +seaside goldenrod +flint corn +Very pistol +rotifer +steeplechaser +rouleau +escape wheel +Namibian +millivoltmeter +emmer +climatologist +agateware +sea lyme grass +inclinometer +water fennel +saddle seat +vicar +garden cress +ski rack +Norfolk jacket +casaba +coast rhododendron +sericea lespedeza +hematocrit +autopilot +tilter +finish coat +Pennsylvanian +shrubby St John's wort +podocarp +percussion cap +ceriman +peanut bar +gean +jack +durra +rotor +carob +cottage tulip +three-spined stickleback +trencher +elevator +kalumpang +abaca +Australopithecus robustus +active matrix screen +water bed +hatmaker +lodestone +cat food +overcup oak +balletomane +popgun +rheometer +process cheese +frog legs +heartleaf arnica +p-n-p transistor +steam turbine +Tulu +scalene triangle +licorice fern +coffee break +trade unionist +starved aster +firing pin +water gum +Masonite +hairspring +seminarian +blue racer +forecastle +scrub pine +Atlantic spiny dogfish +kopje +orphrey +fan tracery +gee-gee +vixen +interstellar space +Harris Tweed +sawmill +lemon mint +bitewing +ringlet +Chinese mustard +paleontologist +American hazel +brigantine +clay-colored robin +zombie +nectarine +West Indian jasmine +pineapple weed +rusher +gynecologist +pole +thylacine +myrtle beech +golden cup +woodruff +T-bar lift +terebinth +service club +homegirl +Blue Mountain tea +figwort +New Hampshirite +Stayman +tonometer +white turnip +messuage +cruet-stand +colliery +connecting room +lesser twayblade +bland diet +crown prince +beggarwoman +restharrow +bower actinidia +firebug +hepatic tanager +telegraph +Spodoptera frugiperda +spackle +carpenter's square +pyx +supermom +thickhead +whorled milkweed +Arctic char +Chinese rhubarb +pince-nez +wolverine +tomato concentrate +cascarilla bark +red underwing +leather flower +Jerusalem thorn +bullpen +Salisbury steak +anode +coffeeberry +bottling plant +fritter batter +aerial torpedo +matrix +local oscillator +stalked puffball +bruin +three-cornered leek +wassail +stabling +damping off fungus +myriapod +osier +lesser kudu +cownose ray +chokecherry +wagon +obstetrician +Glengarry +even-pinnate leaf +wine sauce +osteocyte +baker's yeast +heir presumptive +blackjack +tympanist +golden fern +fipple +Japanese oak +bar mask +stamping machine +argus +knobcone pine +oil beetle +lanai +upper berth +condenser +proctologist +catechu +wild spurge +vestry +ground snake +proton accelerator +walker +scarlet bush +transom +lagging +bouillon +slender loris +black currant +developer +football hero +plum sauce +striped mullet +prince charming +fictional animal +prosimian +lug wrench +lemonwood +kirsch +spy satellite +black caraway +Thompson Seedless +bead tree +purple fringeless orchid +Virginia strawberry +chigetai +punkie +gall wasp +addressing machine +rock polypody +good-king-henry +spring cankerworm +wimple +noncandidate +saskatoon +hacienda +Darjeeling +snowberry +lounging pajama +ascospore +ski-plane +hedgehog cereus +Welsh onion +yautia +coaster brake +sickle cell +parrot's beak +fuller's teasel +painted greenling +scablands +stuffed cabbage +barrel organ +etcher +dwarf maple +camp +Australian blacksnake +currycomb +obtuse triangle +rose gum +psychrometer +abridger +torpedo +carpet loom +sodalist +slender rush +loligo +sclerometer +wimp +dotted gayfeather +green ash +pinstripe +moralist +medusa's head +garden centipede +heath aster +fool's parsley +olla podrida +Potawatomi +Edam +toothache tree +hulk +seabag +narthex +compartment +prairie star +lookdown +B-flat clarinet +event planner +clip lead +shirting +milk punch +supercharger +macadamia nut +giant coreopsis +computer store +martingale +keyboard buffer +summer flounder +squash ball +gas turbine +object ball +plier +black mulberry +reef squirrelfish +scampi +willow aster +bowler +striped marlin +smooth muscle cell +diplodocus +Liberty ship +sponge cloth +guitarfish +walking leaf +showroom +California bluebell +bolo +turnbuckle +boysenberry +hardware +Gael +imago +endorser +jujube +dust bag +rapporteur +field wormwood +low-water mark +naval missile +Pacific yew +reversible +crabapple jelly +poniard +barricade +spawner +simnel +seltzer +deckle edge +needle +timbale +satellite transmitter +organization man +job candidate +orderly +native cranberry +fir clubmoss +coaming +chartered accountant +electron accelerator +Sierra plum +American foxhound +long underwear +Penobscot +blueberry yogurt +biretta +cascara +Paranthropus +Dorian +nun's habit +lenten rose +Augustinian +designer +northern phalarope +mombin +hazel mouse +reeve +waffler +telegraphy +Verpa conica +ignition coil +Japanese oyster +S-shape +divining rod +ant thrush +throat protector +interlocutor +Desmodus rotundus +pere david's deer +attenuator +Cypriot +red sandalwood +pendulum watch +broadcloth +striped drum +sequence +safety arch +diapensia +hog +western spadefoot +chlorella +comb-footed spider +Chechen +darning needle +C-ration +hard beech +piano action +scaling ladder +Nepal trumpet flower +ravigote +screw wrench +ramekin +Lyonnaise sauce +dinner napkin +partial veil +masseuse +coatrack +mooring tower +blue-eyed African daisy +English horn +baton +rope tow +toll bridge +massage parlor +quark cheese +lounging jacket +tall goldenrod +flying jib +coordinate axis +barley-sugar +integrator +worm gear +captain +sweatshop +class +layer +chili powder +dripping pan +oatcake +newsroom +tadpole shrimp +rake +trade magazine +silks +ram's-head +senior +knower +masseur +yam +peg +wheel tree +hardbake +test room +long-spurred violet +creeping spike rush +shrapnel +coffee senna +matchbox +creeping soft grass +welder's mask +pickaback plant +urial +hooded pitcher plant +incense cedar +Ohio buckeye +ant cow +skeleton fork fern +Indiaman +swamp ash +testatrix +marang +spherocyte +Winesap +Indian mallow +teju +Yersinia pestis +dye-works +sauerbraten +coral bean tree +safe house +postulator +eyas +lotus +wood vise +lady-of-the-night +East German +cymling +rock candy +western omelet +anoa +rainbow seaperch +crossover voter +Finn +tree shrew +hog plum +Federal +shagbark +clockwork +Alexandrian laurel +metal wood +brill +military chaplain +trend-setter +call-back +Indian rat snake +spurred gentian +Japanese maple +forest goat +bee moth +viola da braccio +duckboard +armyworm +hangnail +counterbore +cream-of-tartar tree +Mullah +bonbon +water hazard +temple orange +corporatist +rough bindweed +Turkish bath +mistletoe fig +beach sand verbena +caddisworm +English plantain +brown Betty +power pack +lion's-ear +Francis turbine +stayer +dichondra +marsh St-John's wort +squab +energizer +common horehound +mantispid +pullback +handwheel +spark arrester +yakuza +Virginian witch hazel +grunter +waterworks +bondwoman +chain printer +stockjobber +coconut milk +yardgrass +blue chip +bridle path +riser +pleurothallis +saltwort +salal +broadside +blackboard eraser +bastard +Para rubber tree +red bat +digital-analog converter +calabash +cashier +cow shark +horned pout +microphage +monologist +woolly monkey +Illinoisan +marsh horsetail +distaff +siris +eparch +gooseneck loosestrife +sounding rocket +multiprocessor +saiga +xerographic printer +madrona +right triangle +sweet gale +red maids +wolfsbane +pork-and-veal goulash +French sorrel +mutterer +Venetian sumac +drumlin +white crappie +squire +large-flowered calamint +northern cricket frog +mushroom sauce +supertanker +morello +auxiliary boiler +Virginia thimbleweed +cottage tent +bubble shell +big shellbark +wormwood sage +cider gum +coast lily +American feverfew +Peruvian balsam +purple silkweed +tobacco moth +desk dictionary +rock elm +eastern indigo snake +Japanese privet +lamb +levee +L-plate +soapfish +painted tongue +scuttle +markhor +Marburg virus +mackinaw +major +crypt +ball and chain +domestic silkworm moth +bottom feeder +mistress +death house +freight elevator +bellyband +Pulex irritans +Bacillus anthracis +fire control radar +hysterosalpingogram +turbogenerator +decompound leaf +vambrace +scentless camomile +Medinilla magnifica +prima ballerina +Northern Spy +quartz lamp +grains of paradise +justiciar +felt fern +seismograph +Madagascar jasmine +imaret +white perch +Alpine mouse-ear +tea bread +yellow bass +poseuse +espionage agent +punching bag +eurypterid +orange sneezeweed +banded stilt +armhole +postern +mother +kapuka +catechumen +Soubise +Sauvignon blanc +gunnery sergeant +self-starter +ceratozamia +Atlantic cod +Reoviridae +blood cup +horseshoe bat +oriental plane +voussoir +fetterbush +samara +truncated pyramid +lingcod +athenaeum +shyster +Carolina hemlock +submarine torpedo +floating fern +yataghan +sun tea +viola d'amore +conenose +ventilation shaft +walk-up apartment +saury +wild wheat +porcupine ball +tahini +kris +grass fern +drip pan +black bryony +Scotch broth +tapioca pudding +southwestern toad +Hare Krishna +guimpe +wild madder +megalocyte +teaching fellow +shrubby penstemon +lesser wintergreen +privet hedge +Fahrenheit thermometer +stern chaser +prickly ash +pump room +ricer +chicken mousse +wing commander +sun gear +bolus +alpine milk vetch +opera cloak +twinjet +Goldie's fern +abnegator +alphabet soup +node +grape jelly +early coral root +Tarzan +quarterstaff +greeter +Eurasian woodcock +primary coil +quirt +tinkerer +bolt +creme de fraise +voltage regulator +news photography +Jat +bristly locust +Gouda +dickey +lobster butter +dwarf flowering almond +fagot stitch +Reform Jew +ostrich fern +bathyscaphe +purple mullein +alpaca +civic leader +jellaba +Arizona ash +wasabi +Irishwoman +choke +stockinet +religionist +sewage disposal plant +bittersweet +Hyphantria cunea +pheasant under glass +screen actor +chapterhouse +quoit +horseshoe bat +rapper +cupule +planetary gear +cascade penstemon +redoubt +salt +areaway +megalomaniac +bush willow +amethystine python +plains spadefoot +colour supplement +kick pleat +bell apple +narwhal +slippery elm +stenograph +baa-lamb +quadrant +balker +jobcentre +spit curl +bastard indigo +malacca +serow +adobe lily +yacca +palestra +penalty box +scrub beefwood +reenactor +screening +white bryony +alderleaf Juneberry +harpoon +alpine clubmoss +neurosurgeon +surrey +sweet calabash +Scotch laburnum +coquille +French honeysuckle +extrados +pipe cleaner +southwestern white pine +Virginian stock +scaly lentinus +aileron +carob bar +swordfish +Alpine woodsia +negus +wireworm +sweep +goldfields +drop arch +European bream +roly-poly +pin +bastard wing +fustian +wild buckwheat +lake whitefish +overcoat +water filter +Bermuda chub +New Zealand spinach +high-hat cymbal +European larch +radiologic technologist +fine-tooth comb +brunch coat +splice +electronic converter +overmantel +extern +taper +cluster bomb +teletypewriter +pinwheel +trailing arbutus +quipu +creeping zinnia +orange milkwort +tabard +Australopithecus africanus +melancholy thistle +insole +courser +darkroom +surface-to-air missile system +bark-louse +Confederate +neritina +clip-on +spouter +trench knife +outside caliper +dhak +Limburger +chuck wagon +buttercup squash +shirtdress +pouter pigeon +dirty old man +zodiac +fennel flower +mother figure +appointment +Manichaean +lignum +bouffant +rum sling +Ravenna grass +hibachi +gin rickey +American harvest mouse +cocozelle +western wheatgrass +black crappie +rhombus +Missouri goldenrod +barndoor +wild mango +pneumococcus +Boston lettuce +ratline +desert holly +cobweb +fluoroscope +ethnologist +tor +bullshot +stockade +greave +rock sea bass +slip-joint pliers +taxi dancer +schizophrenic +zill +creme de menthe +orange-blossom orchid +divot +supplejack +busybody +casemaking clothes moth +ramrod +gearbox +birdcall +Wiffle +thwart +beauty consultant +chicken paprika +trawl +skep +spirometer +hopper +kvass +doggie bag +bath chair +showy daisy +wild tamarind +Tarsius syrichta +glyptics +Algerian +cargo area +bunk +Velveeta +iconoclast +clinch +New Caledonian yew +false mallow +Japanese tree lilac +convex polyhedron +water boatman +cruise missile +finisher +colonoscope +cumin +wickiup +saccharin +whipcord +trailer camp +eryngo +cuckold +yam bean +fighting chair +forewoman +galingale +citron +positivist +four-lined plant bug +suet pudding +field pea +Circaea lutetiana +deer grass +trap-door spider +common corn salad +mirror carp +sounder +second-in-command +seaside alder +burgoo +ming tree +curry sauce +courbaril +green alder +figure loom +fauld +halfbeak +squelch circuit +cladode +winter cress +tongue and groove joint +dwarf dandelion +joss house +western buttercup +welted thistle +potato tree +anglewing +cookfire +marzipan +hood latch +seed shrimp +common moonseed +toasting fork +bevel +three-quarter binding +midwife toad +stage director +Pentecostal +technical sergeant +golden-beard penstemon +drunk +silky oak +corn gluten feed +T-square +stoker +selling agent +cruse +server +rope-a-dope +bicorn +matzo meal +wide wale +roadblock +false foxglove +tuck box +bandsman +smoke bush +machinist's vise +Highlander +scholiast +self-starter +Swedish rye bread +spark transmitter +maverick +maquiladora +cabinetmaker +compress +rainbow shower +huntsman's horn +mackinaw +copper rockfish +lappet +nitrate bacterium +telephone plug +soutache +Dacron +toboggan +sissoo +yogi +laurel-tree +vice chancellor +Christ's-thorn +cartridge fuse +serial port +quassia +tarweed +pecopteris +beggarweed +anchovy pear +bookbindery +woodland oxeye +toad rush +sandalwood tree +marsh andromeda +Tyrian purple +boothose +tragedienne +fragrant cliff fern +festoon +bondwoman +melancholic +butternut squash +exhaust valve +semi-skimmed milk +glowworm +Virginia oyster +Identikit +ayah +gallows tree +Carioca +monoplane +jewels-of-opar +scallop +moth miller +marsh cress +lobed spleenwort +ricotta +emitter +arame +tub gurnard +army attache +maniac +organizer +pheasant's-eye +Melba toast +homeboy +Bavarian cream +Maximilian's sunflower +backstop +Tremella foliacea +yellow avens +spreading fleabane +plumb level +false rue anemone +zabaglione +climbing maidenhair +doeskin +walking shoe +lancewood +material +jacksnipe +South American poison toad +agonist +hinny +paper mill +psychophysicist +valley girl +toast mistress +jorum +tiler +chicken Tetrazzini +trivet +grasshopper +three-mile limit +kink +kiang +pole horse +jig +Cornish heath +hedge thorn +false alumroot +Popper +remount +photojournalist +sideroblast +stonecress +Agave tequilana +Japanese lilac +hawse +maenad +air bag +leaf spring +dwarf willow +soda cracker +contralto +moleskin +pilaster +Audubon's caracara +pia +American organ +bleu cheese dressing +betel palm +PC board +almond willow +socializer +tone arm +stammerer +free-liver +scaler +Gentianopsis crinita +leak +black haw +hound's-tongue +grass pea +Stassano furnace +coralbells +ministrant +perihelion +Luxemburger +powder-post termite +arboreal salamander +cushion flower +foramen magnum +pyrethrum +poacher +woolly mammoth +horned chameleon +tearaway +father-figure +tufted gentian +salmi +finger millet +physa +registrar +polyoma +bamboo shoot +matchlock +seine +congress boot +bulgur pilaf +monosodium glutamate +Kentucky wonder +mycologist +kedgeree +ragweed pollen +boarfish +yellow pimpernel +tan +northern Jacob's ladder +macrobiotic diet +migrant shrike +big-cone spruce +colonialist +white dogtooth violet +bath asparagus +webbing clothes moth +ladies' room +experimenter +prairie bird's-foot trefoil +bootleg +cognitive neuroscientist +fire chief +flagfish +dendrite +stinking goosefoot +fore edge +hogfish +Spanish cedar +hotel-casino +Tory +life-support system +pea flour +cash bar +Chenin blanc +white-footed mouse +Canada garlic +salt-rising bread +roomette +mastodon +bell founder +long iron +bi-fold door +fig-bird +European water shrew +dyer's weed +frog orchid +allosaur +Florida yew +wild potato vine +crape fern +flat-topped white aster +klebsiella +oil heater +waxmallow +enjoyer +mesocarp +semidesert +senior vice president +coccidium +burrawong +syllabub +jump suit +harrier +leaf roller +cherrystone +cinchona tree +touring car +eulogist +air force officer +red goosefoot +cat thyme +smoothbore +slugger +cardiac monitor +cobber +blister rust +musicologist +rolled biscuit +Braun's holly fern +hog plum +nonpasserine bird +pascal celery +damson +Jonathan +Sheraton +cohune palm +egg white +baton +sixth-former +Siberian pea tree +choanocyte +wineskin +auditor +detention home +Leichtlin's camas +Chartreuse +clusia +club car +wattle and daub +security blanket +common American shad +assistant professor +marsh pea +camomile tea +gopher hole +gravure +Freudian +spirillum +maharani +equilateral +crow garlic +mammee apple +felwort +hardtop +dillenia +curlycup gumweed +pilot engine +calcimine +wooly lip fern +bitter dock +wineberry +jumper +monolingual +spinning frame +old-timer +native cat +diving petrel +sodium-vapor lamp +marchand de vin +sexton +matelote +interior designer +windfall +mole salamander +minder +bodkin +neutron bomb +Caloscypha fulgens +slinger ring +mezzo-soprano +aura +Southern Baptist +viscacha +midfield +tie +prosthetist +round-headed leek +yellow mariposa tulip +canary grass +staddle +Tokay +Muenster +brazil nut +California black walnut +applesauce +penologist +virgin's bower +tenon +steward +Jerusalem oak +red-bellied snake +bindery +scow +fluid flywheel +bullhead +satinleaf +clove +double glazing +matron +wild parsnip +winged elm +shoot-'em-up +musk deer +white rust +lock +Cornishman +Vidalia onion +corn spurry +freeloader +justice of the peace +inlay +myxobacteria +tiglon +tangram +German ivy +scented fern +woolly daisy +caretaker +gastroscope +scuppernong +spotted sunfish +guilloche +codling +wormcast +Eskimo curlew +tayra +European fly honeysuckle +septuagenarian +third gear +coatee +red alder +water ice +cubitiere +frame buffer +gamboge tree +pernyi moth +chicken Marengo +Galliano +Lincoln +true sago palm +hunter's sauce +carpet beater +alpine goldenrod +arch support +vehicle-borne transmission +jilt +paternoster +redcap +Siberian larch +hoary plantain +swan's down +chicane +reverse +divan +kneeler +alexic +mock turtle soup +daffodil garlic +mission bells +squilla +ursinia +winter's bark +trifoliate orange +discina +frijole +Swiss steak +maildrop +knotgrass +dog fennel +drum sander +heroin addict +costume +camber arch +shining willow +lutefisk +red porgy +microfossil +good old boy +angle bracket +pitcher sage +bordelaise +heat exchanger +carrion +bush jacket +fanjet +coach +blackface +sicklepod +Manhattan clam chowder +daisywheel printer +olive +Sphacelotheca +Spanish needles +brown root rot fungus +boudoir +encyclopedist +V-8 juice +red haw +brass buttons +gym suit +skywalk +water wagon +gas-turbine ship +stoup +lisle +sailor suit +box beam +balm of gilead +housemaster +hayrack +neutralist +water elm +brook thistle +doyenne +nark +alpha-tocopheral +WASP +hydrilla +water-shield +footlocker +variola major +pargeting +ion engine +yellow globe lily +Malecite +bloodleaf +yellow sand verbena +whorled loosestrife +packinghouse +Carolina parakeet +Virginia waterleaf +armband +red rockfish +factory ship +moon trefoil +jump seat +water gillyflower +yerba mansa +chamfer bit +compass saw +hopsacking +Indian rhododendron +sickbed +treacle +honey eater +mailsorter +seabeach sandwort +sob sister +primrose jasmine +prince consort +elocutionist +wishing cap +runner +trestle +sugar water +half-and-half dressing +fringed poppy mallow +portiere +bung +swan orchid +weather satellite +beef broth +marblewood +sapper +agitator +wren-tit +grade +allspice tree +spacewalker +American hornbeam +sieva bean +dill seed +potoroo +love-in-winter +alembic +Cheshire cheese +small white aster +Oregonian +flipper +twill +differential gear +Prince Albert +licorice +foster-father +Melkite +portraitist +Yosemite toad +Cox's Orange Pippin +slender wheatgrass +knob +silique +Rocky Mountain bee plant +stirrup pump +chicken hawk +sweetbrier +Sierra lodgepole pine +poulette +biohazard suit +striated muscle cell +Geiger counter +World Wide Web +turmeric +prairie wake-robin +latchet +pushball +grill +shooting lodge +floating-moss +refried beans +boojum tree +red poll +toothbrush tree +rabbiteye blueberry +red haw +sweet vetch +delta +upland cotton +ballet mistress +padrone +complementary color +great Solomon's-seal +bud brush +brandy sling +spinster +Andorran +Mojave aster +mackinaw +golden calla +bottom rot fungus +segmental arch +periwinkle +hellion +topknot +copper +Mexican hyssop +weeping love grass +point woman +pathogen +fall cankerworm +common shiner +silverspot +corer +atomic pile +crystal detector +yellow spot fungus +truncated cone +saprobe +variegated horsetail +Cro-magnon +cercaria +aglet +pollster +oyster bed +pancake turner +egg cream +sporozoite +quirk molding +mutisia +sound bow +physic nut +sugar-bush +cow +magnetron +jungle hen +brassie +rock bit +taco sauce +seeded raisin +desert selaginella +folding door +vinegarroon +Pinot blanc +rye +ellipsoid +betel nut +tree of knowledge +ambrosia +long tom +breechloader +bicolor lespediza +cosmetician +monoblast +American oil palm +prancer +farina +caiman lizard +hardball +bullock's heart +cotton rat +whiting +weather ship +sharecropper +creamcups +gas bracket +divinity +ornithologist +yellow twining snapdragon +showy goldenrod +end man +heptagon +sand dropseed +round file +guama +blue elder +sand spurry +raccoon dog +zigzag goldenrod +fast reactor +arctic willow +cyclopean masonry +punter +sgraffito +slattern +storage ring +clipper +pulasan +short-tailed shrew +scammony +daybook +umbrella tree +coloring +element of a cone +gesneriad +cane +burgoo +western coral snake +friendship plant +Leydig cell +scrutineer +hairy golden aster +inclined fault +water milfoil +bryozoan +nardoo +native pomegranate +curly grass +Florence fennel +resurrection plant +ice water +crown +ploughman's lunch +clustered lady's slipper +kitchenette +sand sedge +pouched mouse +roadbed +parsley haw +predecessor +super heavyweight +seedless raisin +mailbag +sparling +codling moth +squama +Bercy +thermoelectric thermometer +Jaculus jaculus +saltpan +firmer chisel +round whitefish +ramrod +criollo +pinch bar +slash pocket +thigh pad +velvet plant +intergalactic space +brazilian ironwood +whaleboat +sirrah +hanging fly +aspirator +Dominican +dribbler +yellow-eyed grass +Cornish +geophysicist +tarmacadam +marchioness +rattlesnake orchid +Alaska Native +ilama +myrrh tree +zucchini +licorice root +nosebag +lounger +troposphere +virginal +spaghetti Western +Virgin Mary +waterwheel plant +dry nurse +enate +carpet shark +rijsttaffel +stuffing nut +caraway seed bread +Leotia lubrica +kaffiyeh +Boston baked beans +halophyte +backscratcher +instillator +trefoil arch +pip +digitizer +dosemeter +Carolinian +French sorrel +boards +historian +rangpur +clansman +goral +leatherjacket +coiner +fleece +white globe lily +storm cellar +roundhouse +mediatrix +butterfly flower +swamp gum +prairie vole +rhizomatous begonia +common tobacco +Marco Polo sheep +subarachnoid space +broomweed +safety net +silky wisteria +swagger stick +spectacled caiman +derris root +soap pad +chop-suey greens +summer hyacinth +palo santo +carbohydrate loading +chinch bug +roadman +sheep plant +messiah +desk officer +banquette +drugget +trumpet arch +great duckweed +purdah +heartbreaker +hasty pudding +alligator weed +dragee +yellow bristlegrass +Jacob's ladder +campstool +coffee fern +sweet fern +little chief hare +cat-o'-nine-tails +rep +American red elder +divorcee +black salsify +cambric +sennit +Canada ginger +wonderer +Formica +cream-colored courser +zooid +European beggar-ticks +sorrel tree +piddock +blolly +red-flowered silky oak +bay +Hooker's onion +dark horse +cone clutch +Roman hyacinth +paintbox +mestiza +green alder +bill +panicled aster +mammogram +snuffbox fern +Rediffusion +swamp fly honeysuckle +stoup +psychiatrist +nodding groundsel +student union +cold duck +bee beetle +playbox +Psychopsis krameriana +nosh-up +earthnut +narthex +single-rotor helicopter +revetment +sweetleaf +seasoned salt +piculet +speckled alder +mackerel scad +common yellowwood +devisee +static tube +Spanish heath +umbrella plant +fucoid +Chilean +coral-root bittercress +fanatic +cachou +agony aunt +bird's-foot fern +washwoman +torchbearer +placoderm +frosted bat +spicemill +Cape lobster +hard-shell crab +colonizer +camphor daisy +friar's-cowl +false tamarisk +toggle joint +tinsmith +theorist +hydrologist +loganberry +universal donor +northern whiting +tent-caterpillar moth +russet +kangaroo mouse +African scented mahogany +bastinado +breast implant +betel +grade separation +vox humana +stodge +Maryland chicken +Anguillan +oil pump +governor's plum +narcissist +deadwood +private citizen +winker +ropewalker +gidgee +Lothario +ski resort +major-domo +von Neumann machine +belaying pin +water parsnip +Fissipedia +luggage carrier +spring water +oyster stew +kohl +celesta +date-nut bread +punchboard +sunniness +hospital train +man +rack and pinion +mixer +pousse-cafe +narrow goldenrod +Maxim gun +stiff +recruiting-sergeant +watch glass +white hellebore +tung tree +prairie white-fringed orchid +beef Stroganoff +scoffer +grassy death camas +Shawnee cake +tapioca +Short's aster +banker +laparoscope +honeyflower +Caterpillar +electric clock +baling wire +huntress +Surinam toad +art school +incurable +Canton crepe +apple juice +hipline +bronchoscope +marshmallow fluff +Texan +wild fig +sawed-off shotgun +forestay +red kauri +fish slice +Egyptian grass +English walnut +brown sauce +ogee arch +nectary +chambray +leather flower +phloem +Persian violet +bomb calorimeter +western narrow-mouthed toad +soup du jour +sickle alfalfa +caracolito +periscope +coralberry +sword bean +sigmoidoscope +water locust +hygrodeik +sycamore +sheikdom +ballistocardiograph +clove +akee +fucoid +jacquard +cat's-ear +puritan +slender wild oat +smooth softshell +purchasing agent +landing craft +chartist +lace bug +sharksucker +Virginia chain fern +horseradish +namer +ripcord +personage +aspirin powder +puku +Wankel engine +nightcap +velvet bent +roridula +cytogeneticist +olm +almond extract +common heath +fringe-toed lizard +Kentucky yellowwood +lithosphere +cramp +bulgur +scurvy grass +officer's mess +frigate +electroscope +giant chinkapin +opah +rutabaga +wood hoopoe +Farley maidenhair +shingle tree +argentine +router +palm nut +quillwort +hiba arborvitae +runcible spoon +hireling +sickbay +alpine totara +white lupine +Cotoneaster horizontalis +desert plume +staghound +Sea Scout +opalescence +enophile +Jersey elm +coal house +Helvella acetabulum +selenium cell +white camas +creole-fish +auger +fragrant agrimony +research center +achromia +shank +cottonseed +mod con +extension +sugar beet +winter flounder +silky dogwood +strop +tokamak +rabbit ears +baby farmer +fireman's ax +serration +taproot +socket wrench +action officer +Chilean jasmine +Greek fire +stem-winder +body louse +lumpsucker +stink bomb +American lady crab +dicer +lie detector +maneuverer +black-headed snake +tiger moth +shooting stick +spermatid +babushka +deaconess +home +prior +chanfron +chickasaw plum +big-eared bat +rusty woodsia +tertigravida +miniver +combretum +habit +bluehead +angled loofah +gipsywort +fire-on-the-mountain +purple milk vetch +alpine gold +merozoite +loddon pondweed +Uniat +provost marshal +Gyromitra fastigiata +Coigue +proconsul +oarfish +San Jose scale +filature +chimney plant +spiny softshell +bluecoat +live axle +river limpet +clever Dick +pink bollworm +Japanese plum +roarer +caricature plant +wardroom +Texas chachalaca +Bahia grass +Moreton Bay tulipwood +accessory fruit +pearl barley +ashcake +bunt +Polynesian tattler +pine fern +laughing owl +potato fern +speaking trumpet +adjoining room +bearing rein +banana quit +redbrick university +Scleroderma bovista +magdalen +pressurized water reactor +advisee +NIMBY +poorwill +almond moth +comedian +star tulip +cracked wheat +water pump +guest of honor +yellow-breasted bunting +hire +pedate leaf +augur +purple locoweed +Socinian +upland white aster +guesthouse +double reed +detention basin +rollmops +hitch +bodega +mayeng +sparkplug wrench +attack dog +peach melba +heliozoan +tower mustard +blue mold fungus +lamplighter +banded sand snake +smooth crabgrass +elsholtzia +bodkin +Aegean island +bag lady +alewife +arcella +electrical contact +common ax +animist +concave polyhedron +coalface +climbing perch +yellowtail +hobble skirt +marquee +Russian dandelion +snow mushroom +polo ball +NADA daiquiri +cormous plant +chaparral mallow +inside caliper +milking stool +fallout shelter +sea gooseberry +Danish blue +grissino +chimney breast +mosquito fern +soundbox +spring chicken +epauliere +cape forget-me-not +japan +saddle oyster +white fritillary +push-button radio +bladder senna +bladder stone +macedoine +moire +Shawnee +starnose mole +douroucouli +horseradish sauce +electron gun +cotter +console +park commissioner +free press +lump sugar +western poison oak +apple maggot +keurboom +lisper +griffon +burin +horseshoe whipsnake +Jacobean lily +spinner +cochineal insect +emesis basin +sowbane +humanitarian +uakari +three-dimensional radar +wild hollyhock +heartseed +swinger +two-by-four +mop handle +common amsinckia +traitress +rush aster +fibrous-rooted begonia +violet-flowered petunia +milliammeter +alidade +azure aster +celery seed +snorer +scarlet plume +obtuse leaf +heathen +rose chestnut +headrace +dwarf buckeye +Pacific tripletail +wiggler +bounty hunter +Lowlander +slate pencil +typist +syconium +vaquita +skybox +business lunch +gusher +curacao +palometa +Diapsida +light diet +sourdine +thorny amaranth +potato fern +cartridge extractor +peshmerga +chaffweed +tahoka daisy +hematologist +massage parlor +diverging lens +breadroot +papyrus +amarelle +cover plate +hubbard squash +cryptomonad +whitetail prairie dog +rabbit burrow +orthochromatic film +goncalo alves +Chile bonito +tent-caterpillar moth +Manila grass +buck sergeant +mustard seed +crested wheatgrass +wise guy +asarabacca +field pea +bite plate +barbasco +heart-lung machine +mouse-eared bat +piping guan +gun pendulum +climbing onion +fungus gnat +Livonian +one-hitter +Chilean firebush +Sonoran whipsnake +round scad +myelogram +Rhodes grass +vomitory +roble beech +South-African yellowwood +molasses +Velcro +common calamint +radiation pyrometer +sketcher +chaparral pea +coffee stall +Australian nettle +bilimbi +Khedive +visionary +field spaniel +devilwood +collimator +Siberian spruce +sling +limestone salamander +ribbon worm +hazel +petter +coolant system +artillery plant +bailiff +chameleon tree frog +microsporophyll +maiden blue-eyed Mary +Drosophyllum lusitanicum +cocozelle +king post +nailer +knobkerrie +tovarich +Intelnet +worm lizard +drop forge +wool grass +brown bullhead +anthropoid +vitamin A2 +creche +hickory nut +whiffletree +deipnosophist +Muskhogean +masochist +hypsometer +gliricidia +complexifier +wild licorice +reconnaissance vehicle +fives +beefsteak plant +eastern dasyure +bookworm +crested coral root +wire recorder +cinnamon vine +bubble +Newfoundland dwarf birch +spruce bark beetle +teetotaler +fad diet +ascus +spicebush +African coral snake +soft-shell crab +Postum +packhorse +sand cherry +cricket-bat willow +middlebrow +Hungarian sauce +buffalo clover +jimsonweed +latanier +stablemate +jumper +zoospore +smooth woodsia +flowering ash +unilateralist +lomatia +flapper +wild cotton +Siberian wall flower +probe +bankrupt +blockade +lemon geranium +fig leaf +basic point defense missile system +clack valve +buttinsky +ingenue +mountain everlasting +zebra-tailed lizard +shaving-brush tree +evergreen huckleberry +core drill +lugworm +Cashmere goat +doorjamb +minelayer +student center +horsehair +European dewberry +white broom +arenavirus +eastern poison oak +rye ergot +Tupi +tensiometer +fleawort +coquille +icing sugar +junior lightweight +Doppler radar +mahuang +candlepin +chambermaid +evergreen blueberry +Eton jacket +parvis +solleret +molded salad +malvasia +birth-control campaigner +nonagon +backswimmer +ogee +bowstring +salt marsh mallow +trapezohedron +hoary willow +speech therapist +Zinjanthropus +core +red-backed mouse +eptatretus +mossy saxifrage +Aristotelian +Thessalonian +searing iron +bifocals +falangist +field pea +packsaddle +lay reader +hoecake +cuboid +white maire +iceman +lobscouse +neckcloth +color-blind person +Chinese holly +assemblyman +white-lipped peccary +kava +plastron +crab louse +hook wrench +trailing four o'clock +junior +skilly +internet +tonguefish +footman +sub-assembly +evangelist +track +bench lathe +desk clerk +scalded milk +chamois cloth +American marten +chachka +nondescript +pellitory-of-the-wall +swamp candles +procurator +cuddy +farkleberry +mountain male fern +trawl +dual scan display +fish meal +prospector +convener +guano bat +ant shrike +picture rail +sand rat +gynophore +quilting +sleeper +summer savory +Cotoneaster dammeri +smooth sumac +slumgullion +suite +catalufa +spherule +lean-to tent +gryphon +gas shell +short iron +sweet sultan +dewberry +Victoria plum +American water shrew +X-ray tube +macebearer +green arrow arum +abbe +poke milkweed +atheist +Fosbury flop +Ord kangaroo rat +moldboard +wheat germ +explosive trace detection +whippoorwill +examiner +tallyman +Crookes tube +wild peach +fringed grass of Parnassus +Crookes radiometer +Atlantic croaker +lobster stew +spring cress +maggot +pacer +hydra +Zionist +pepper tree +diamante +baize +Rhodesian man +county agent +respecter +Anglican +antimacassar +materialist +Swan River everlasting +cloud grass +toll line +C battery +chinese mustard +grass poly +warming pan +seasonal worker +common sickle pine +bathysphere +elegant Habenaria +card table +Chilean cedar +brocket +collimator +malted milk +avadavat +fire marshall +coloratura +yellow spiny daisy +fingerstall +narrow-leaf penstemon +indigo broom +pillwort +bearberry willow +Etonian +certified milk +climbing bird's nest fern +field coil +wrist pad +parr +kaoliang +engelmannia +stocker +satrap +Nantua +spearfish +caper tree +gold-tail moth +mountain chinchilla +sea milkwort +westerner +army cutworm +leaf-nosed snake +neurobiologist +xeranthemum +Eastern silvery aster +ecclesiastical attire +caper +Ukranian +bight +button fern +peach pit +oligodendrocyte +maar +digitigrade mammal +streptobacillus +sensitometer +preemptor +oat +bell foundry +crown lens +rock purslane +Junior +Brazilian guava +kicksorter +Ohio goldenrod +red mulberry +King's Counsel +mountain four o'clock +fairy shrimp +fell +oca +sycophant +chantry +dermatoglyphic +bomblet +keyhole saw +hangman's rope +little barley +lion-jaw forceps +giant scrambling fern +popper +dulcimer +Espagnole +tardigrade +smooth-haired fox terrier +bullbrier +rewa-rewa +Japanese poinsettia +trunk line +cannery +helminth +American spikenard +prince's-feather +arthroscope +ginger +aphakic +pilot bit +angle of refraction +low-sodium diet +wall creeper +growler +praetorium +Hall of Fame +soupfin shark +Molotov cocktail +kaffir boom +stitcher +sawwort +flagellant +Atlantic herring +Reticulitermes lucifugus +voltaic pile +snowy orchid +southern flounder +skysail +osage orange +white mullein +lined snake +tolu tree +poliovirus +foreman +burette +jackass bat +invigilator +electromyograph +acarus +presence chamber +columbian mammoth +hyacinth bean +pilot +meadow jumping mouse +Maria +outskirts +aftershaft +Queensland nut +schlockmeister +plainsman +afropavo +scarlet musk flower +five spice powder +gunboat +multiplex +Dutch uncle +louvered window +chimney corner +cuscus +psalmist +Vichy water +signer +amphiuma +harmonizer +authorizer +naiad +control rod +stentor +mountain bladder fern +gig +read-only memory chip +assenter +vixen +hermitage +corn dab +locksmith +cockspur thorn +variable-pitch propeller +western red-backed salamander +dolman sleeve +cultist +sweet buckeye +pine vole +Peking man +mountain swamp gum +nimblewill +bethel +aye-aye +lancelet +teff +Alpine celery pine +endive +nipa palm +center of curvature +seeder +Sabahan +sea scallop +social secretary +gorgonzola +western chokecherry +misanthrope +rabbitweed +beggarman +button fern +white mallee +doodia +mastiff bat +roper +prima donna +blanc +holding pen +fingerling +skyhook +flophouse +steam chest +crystallized ginger +acrocarp +horse pistol +true mahogany +costmary +ballistic galvanometer +jaunting car +bartonia +rep +mandibular notch +bubble and squeak +umpire +fringed loosestrife +bear oak +ski jump +staggerbush +plumcot +thermal reactor +field brome +bodkin +jackknife-fish +malope +writing arm +gold fern +Stayman Winesap +merlon +eclectic +fluxmeter +emeritus +imam +drum +pop tent +capital ship +subalpine larch +flail +Lorenzo dressing +tomboy +eastern woodrat +warrantee +Pacific spiny dogfish +sheepshead porgy +farthingale +Cryptoprocta +power loom +communicant +howdah +ectomorph +false foxglove +basset horn +odd-pinnate leaf +Wisconsin weeping willow +Queensland bottletree +dampener +corbel arch +silent butler +Circe +town clerk +Japanese chestnut +bloodwood tree +switcher +cup hook +spreader +rice rat +straightedge +traverser +fluid drive +Spanish paprika +sour milk +poison camas +bean dip +card table +vinegar fly +vizier +electric-discharge lamp +purple rock brake +dynamo +Japanese snowbell +Grindelia robusta +neuroglia +safflower seed +coronet +frown line +Renaissance man +Steller's sea cow +book scorpion +isosceles triangle +arthritic +spherical triangle +kangaroo mouse +garden orache +stemless hymenoxys +titi +out-basket +gent +columnea +mint sauce +mouthbreeder +Liebig condenser +cheerer +assegai +stickler +Merostomata +dimmer +grey poplar +common heath +scorzonera +glory hole +Blackfoot +oil slick +musketeer +apple geranium +daisyleaf grape fern +gas furnace +bijugate leaf +Arabist +star-thistle +hand throttle +huckleberry oak +lift pump +maulstick +Rome Beauty +Newburg sauce +pit +volunteer +Baldwin +ark +Asian horseshoe crab +black calla +marlinespike +Gentianopsid procera +guinea gold vine +tucker-bag +desk sergeant +piezometer +migrator +keelson +executrix +sackcloth +onion smut +buckboard +substitute +pudge +mess +cinchona +intervenor +gravimeter +pederast +censor +gastroenterologist +cutlassfish +launch +demerara +Diegueno +bog bilberry +aglet +soda fountain +crank call +harpoon gun +ribbon fern +Gurkha +output device +epilating wax +greasewood +water horehound +return key +fairy swallow +spatulate leaf +culverin +leptocephalus +kleptomaniac +barley water +bleeding tooth +Cheyenne +maleberry +limber +tapenade +whorled aster +toe +revenant +lap joint +vein +truant +florest's cineraria +morning dress +trichodesmium +nightshirt +element of a cylinder +shopaholic +section hand +electrodynamometer +Guadalupe cypress +rosebud +racist +avaram +keeled garlic +Alaska rein orchid +orange toast +cunner +dipstick +Neolentinus ponderosus +bulbil +charlotte +pull-through +header +Manduca quinquemaculata +persona grata +elegist +cafe royale +scup +semanticist +wood sage +field magnet +tundra +bay myrtle +alluvial flat +arrowleaf groundsel +celtuce +baryon +must +entrant +othonna +pied-a-terre +liza +sticky aster +grasshopper mouse +prison guard +tire iron +bomb rack +Spanish American +sheltered workshop +turfing daisy +backbone +tangle orchid +creeping willow +dumb bomb +horse cassia +barosaur +Yavapai +shrimp Newburg +peanut worm +dwarf chinkapin oak +corchorus +brick cheese +by-catch +stover +Urnula craterium +clasp +Kekchi +alpine coltsfoot +soybean future +altar wine +ripping chisel +encephalogram +mountain spleenwort +transferee +remoulade sauce +American rock brake +stenographer +read/write head +loblolly +ground +powdered mustard +brake band +sea dahlia +freak +proconsul +Coffey still +Sivapithecus +pellitory +palm cat +skew arch +American angelica tree +vigilante +candelilla +andryala +amarelle +swiftlet +petcock +associate professor +sclerite +open circuit +Virginia crownbeard +Last Supper +button tree +scyphozoan +margate +mercury cell +horsewhip +water scorpion +companionway +drop cloth +Amhara +miraculous food +pro-lifer +embryologist +Creole +bombazine +Indian blackwood +cubeb +trace detector +gros point +main-topsail +meringue kiss +spree killer +capstone +specimen bottle +woolly apple aphid +silverweed +American barberry +gallfly +European bog asphodel +northern flying squirrel +alliterator +Old Catholic +heliograph +Pteris cretica +tippler +pump well +allspice +balancer +scarlet bugler +lantern fly +white prairie aster +krummhorn +robin's plantain +Pacific sardine +patty-pan +decaffeinated coffee +western saxifrage +warrantee +colorimeter +ball bearing +makomako +foot +troika +apricot sauce +data multiplexer +rose-root +sound film +Northern dewberry +water hickory +swing door +spastic +Oligoporus leucospongia +botulinus +tamale pie +Sagittarius +muff +spicebush +petiolule +pump action +Parry's pinyon +split-pea +rudder blade +princess royal +wormseed mustard +honey guide +pip-squeak +fin keel +foretop +cyrilla +Navaho +melanocyte +deist +silver tree +citrus whitefly +Morrow's honeysuckle +green peach aphid +longanberry +call-board +wild yam +novelist +toothed spurge +alienee +pond apple +allspice +Carolina lupine +Jack of all trades +white false indigo +boiled dinner +princewood +sailor's-choice +false bracken +microbrewery +black grama +tutee +brickkiln +sea raven +guesser +wirework +European lemming +thyrse +plains lemon monarda +milo +shunt +spotted cowbane +anchovy sauce +grande dame +Maryland golden aster +Chinese puzzle +boarfish +burweed marsh elder +defense contractor +nitric bacteria +Belgian hare +beach plum +conformal projection +sand fly +steering linkage +quickset +Mahayanist +Geiger tube +loudmouth +Lancastrian +brownie mix +ex-spouse +deltoid leaf +Shasta salamander +rabbet joint +purple anise +garibaldi +gebang palm +bladderpod +Host +great bowerbird +string cheese +spinning jenny +drift net +matriarch +guar +bitter betch +panda car +mess +plains pocket mouse +scarlet wisteria tree +deerberry +reamer +homing torpedo +molehill +stockyard +reniform leaf +rag +symmetry +Texas star +lerot +pickle relish +three-seeded mercury +cotter pin +ice-cream bean +farmyard +bar magnet +hansom +prickle cell +renal cortex +pest +Ultrasuede +sailing master +brougham +wastrel +amboina pine +Canary Island hare's foot fern +ninepin ball +southwestern lip fern +usherette +lemon drop +star begonia +weeds +saltworks +Persian melon +corbina +medusa +bucksaw +Gibson girl +diameter +American twinflower +kino +clear liquid diet +angiocardiogram +wetter +oyster cracker +yellowfin mojarra +wild parsley +life tenant +broom closet +Corynebacterium diphtheriae +square shooter +bedwetter +ball-and-socket joint +nonsolid color +Salmonella typhimurium +buffel grass +hip pad +subaltern +heliothis moth +trail boss +hayloft +Francisella +primordial dwarf +cock-a-leekie +sugarplum +propulsion system +tyrolean +Carib +salai +ketembilla +ironclad +cornhusk +heckler +multistage rocket +north island edelweiss +Chaldean +twenty-two pistol +Francophobe +scofflaw +sickle feather +screw bean +sea squill +Scopolia carniolica +agglomerator +western holly fern +presenter +straight pin +Myxine glutinosa +Colbert +clover-leaf roll +war paint +bird's-eye bush +longfin mako +running suit +arrow wood +margrave +blue fleabane +dracontium +plastron +chimney swift +child prodigy +commissar +turtle soup +postulant +archaebacteria +snakefly +Pitot tube +chap +smilo +Malthusian +French roof +worm wheel +gulag +pointed-leaf maple +pull-off +Cathaya +American green toad +ball cartridge +infiltrator +snowfield +crotchet +auxiliary pump +bearnaise +galax +chaenactis +olympic salamander +sundowner +cows' milk +beach plum +moss-trooper +Arabidopsis thaliana +cat's-claw +bog rosemary +ribier +book agent +bumper jack +beefwood +monk's cloth +alpine bearberry +climbing fumitory +cucking stool +puka +Piltdown man +property man +discharge lamp +X chromosome +knobble +lobster Newburg +herbalist +sunray +golden saxifrage +leopard cat +muffle +stonewort +blancmange +intraocular lens +trepan +desert mariposa tulip +plume poppy +Dane +martynia +shaver +white milkweed +napu +tansy-leaved rocket +abortus +telemeter +tansy mustard +harpy +honeysuckle +ironworks +testacean +Tartuffe +silvervine +Sihasapa +surface gauge +western blind snake +paramyxovirus +Icelander +bird louse +stockbroker belt +test-tube baby +ague root +little golden zinnia +dietician +elephant's-foot +dirty bomb +sailing warship +brier +tinter +Connemara heath +potato fungus +bait casting +decagon +rosefish +die +high-pass filter +solitaire +widow's walk +goldthread +Tudor +trews +orange pekoe +ninon +soda jerk +sump +flying carpet +burial garment +oblanceolate leaf +press gallery +Shintoist +three-centered arch +spreading pogonia +Moro +foxtail orchid +Ghanian +dry kiln +thane +naranjilla +bitter pea +American bugbane +apron string +oyster fish +Port Jackson fig +prize winner +high-water mark +Oneida +smoking room +potato skin +charge d'affaires +gantlet +amyloid plaque +barmbrack +mate +arrow leaved aster +handbarrow +horned screamer +virago +linoleum knife +rattlesnake root +K ration +reset +foot brake +red coral +good guy +aberrant +lavalava +poleax +garden webworm +sneezer +mountain heath +American dog violet +eolith +chimneysweeper +matriarch +smalltooth sawfish +sea mouse +tubercle bacillus +superconducting supercollider +Abney level +darnel +gherkin +celery salt +Tungus +pulasan +oriflamme +death camp +redhorse +apprehender +scion +selectwoman +pentahedron +principal +old school tie +slice bar +chanar +pimento butter +wailer +zero +mescal +rosebud orchid +stone bramble +Jarvik heart +NOC +pitchman +rat cheese +strawberry tomato +dwarf golden chinkapin +landau +tocsin +ampulla +scratcher +crab Louis +ginseng +ripcord +polluter +tensiometer +eyewitness +aalii +Oregon crab apple +conservator +day jessamine +hexahedron +suture +tippet +linsey-woolsey +vernal witch hazel +stainer +egocentric +canistel +nudger +shipping agent +shortleaf pine +battle sight +cheese spread +weeder +incendiary bomb +honeyflower +stovepipe iron +stepper +hellgrammiate +votary +aflatoxin +arquebus +impulse turbine +pipewort +garrote +glow lamp +pigsticking +blood clam +surface search radar +Bolshevik +platen +chariot +Gentianopsis thermalis +water level +quandong +catalytic cracker +giant foxtail +nut butter +drainplug +holdover +coastguardsman +Secretary of Health and Human Services +Seeing Eye dog +American plaice +coquilles Saint-Jacques +christella +medium +clingfish +lally +light-o'-love +Gentianopsis detonsa +taper file +signal detection +trip wire +lignosae +receiver +sedan +mud puppy +corn sugar +Philippine mahogany +magnetic pole +jointed rush +trapper's tea +Dorking +welcome wagon +clammyweed +guard +false azalea +convalescent +babassu +dedicated file server +colossus +air search radar +marquess +straight flute +sand stargazer +sea catfish +rosilla +ripsaw +Bermuda onion +peach sauce +sagebrush mariposa tulip +yashmak +Virginia mallow +erose leaf +sand blackberry +boulevardier +forester +choragus +onion mildew +threadfin +winged pea +sugar daddy +rotary press +styracosaur +rathskeller +Japanese millet +anchorite +coral drops +false gavial +eastern pipistrel +cheese press +Chinese primrose +pamperer +real estate broker +power worker +breeder reactor +nutcracker +piano wire +cushaw +Sinanthropus +firebreak +kelp greenling +herba impia +toll call +yoke +bird fancier +evening-snow +fever tree +reed meadow grass +flanker back +toggle bolt +Santa Cruz cypress +carbonnade flamande +northern dune tansy +mikado +millettia +forty-five +court +icepick +holm oak +Japanese angelica tree +Pacific cod +cant hook +urologist +spelt +lekvar +enologist +Mediterranean flour moth +prickly-edged leaf +Spanish grunt +dune cycling +frostweed +whisperer +tucker +Roman wormwood +counterterrorist +woolly alder aphid +Nuttall oak +snail butter +threshing floor +motley +forge +water mold +mummichog +sulfur paintbrush +head +walking delegate +jujube +peachleaf willow +Christmas bells +valley pocket gopher +bear's-paw fern +Lanthanotus borneensis +pearl hominy +placeman +swage block +offerer +stargazer +jeweler's glass +male chauvinist +crossbar +Oktoberfest +tamarau +micronutrient +large-leaved aster +tasset +tepary bean +sausage curl +ivy +snob +roller towel +wood meadowgrass +archil +padrone +prairie rocket +tongueflower +kidney fern +Carolina buckthorn +sea island cotton +landscape architect +realist +oyabun +mother hen +ostracoderm +esker +heliophila +nympholept +shining clubmoss +press agent +clam dip +Djiboutian +white currant +codfish ball +hand cheese +kraal +trident +conventicle +bacteroid +Indian plantain +quandong +kola nut +signor +theater light +musk clover +canistel +silent partner +steel-wool pad +diggings +affluent +sightreader +John Doe +arrowworm +goatsfoot +guardroom +wild cinnamon +kaffir boom +ink eraser +yardie +industrialist +sea lily +polarimeter +Polistes annularis +western big-eared bat +omnivore +Ted +horsecloth +crab cocktail +vacuum chamber +flower-of-an-hour +bilge +poleax +neolith +Montezuma +plum-yew +welfare case +trave +pipe bomb +shading +Centigrade thermometer +bangalore torpedo +celery top pine +nuclear rocket +fowling piece +anti-Semite +landscape +derris +bush honeysuckle +Mediterranean water shrew +ticket collector +masked shrew +white dipladenia +Savoyard +bondman +tempter +pygmy cypress +pentathlete +thruster +usurper +Arminian +yerba buena +ice field +ichthyosaurus +sackcloth +bean tostada +Oxbridge +Pteropus hypomelanus +thinker +bank robber +ape-man +thurifer +knawel +mule fat +hot spot +hairy-legged vampire bat +night raven +hook and eye +crocodile bird +skunkweed +beaver rat +cypress sedge +florida selaginella +April fool +Jonah crab +glass wool +corkwood +dwarf elder +hinging post +gentile +Brazilian trumpeter +witch doctor +thermograph +pink shower +Mao jacket +capelin +parang +bradawl +stooper +jewel orchid +citrange +oarswoman +Macedonian +particolored buckeye +pachycephalosaur +satinwood +Chinese brown sauce +peep sight +straight man +quandong +chamois cress +nonfat dry milk +rosin bag +Leiden jar +Grimes' golden +spirillum +grass vetch +carillonneur +downy wood mint +melon ball +sweet calabash +chlamydospore +bombshell +sidewall +sprig +Indian button fern +globe pepper +rough-stemmed goldenrod +bocconia +bubble chamber +sand dab +plum-fruited yew +aecium +marrowfat pea +hobbyist +whipper-in +salad burnet +neckband +Tangier pea +sauce Louis +salad burnet +artist's loft +koumiss +Nazarene +cutter +scrim +drape +crab-eating dog +deckhand +bedroll +gaff +stifler +pink lady +great plains paintbrush +patternmaker +yoke +caryophyllaceous plant +angrecum +quadriplegic +grid +genlisea +aspic +water table +junket +signore +Mutillidae +proprioceptor +pivoting window +Indian poke +synchroscope +trichion +tarahumara frog +proctoscope +abomination +purslane speedwell +breast drill +Japanese barberry +mandrake root +breakable +salon +American watercress +take-up +entrenchment +cocktail sauce +Scotch asphodel +borough +matchmaker +Seneca snakeroot +pointsman +psephologist +clustered poppy mallow +onion thrips +nuclear-powered ship +organizer +deciduous holly +balsam willow +enzymologist +caraway +drip loop +dog laurel +Orangeman +sapsago +polymath +backplate +leathery grape fern +modillion +two-timer +handhold +consignee +white stringybark +nettle-leaved goosefoot +bookmaker +disk drive +doliolum +palmist +packinghouse +Spandau +Whipple's penstemon +sword grass +ribbon development +pearly-shelled mussel +winter heliotrope +rogue elephant +deck tennis +Venus's flower basket +football +shim +boatswain +blinks +armored catfish +hooded seal +outdoorswoman +water starwort +upholstery needle +pleurodont +silky anteater +cornmeal +lead-in +redfin pickerel +horse balm +Rydberg's penstemon +cascade transformer +fly poison +Volvaria bombycina +broad-leaved twayblade +pastry cart +body plethysmograph +waverer +hardware store +Parry's penstemon +European sanicle +strawberry geranium +cross-examiner +head gate +devil's tongue +hemiepiphyte +pine hyacinth +machmeter +spirit lamp +field judge +Rock Cornish +mayhaw +Sassenach +bog pimpernel +parallel interface +crowberry +roach +Aegyptopithecus +cajan pea +lapboard +cryostat +magnetic storage medium +white yam +Lombard +rhymer +bed and breakfast +bunya bunya +rifle grenade +caterer +collared pika +anti-submarine rocket +bookkeeper +Western mountain ash +profit taker +fruitlet +Knowlton's cactus +infernal +beefsteak begonia +lunula +emulsion +intermediate wheatgrass +titfer +European sea bream +bigeye scad +yak butter +kola +cone pepper +plesiosaur +ragwort +penal colony +black carpet beetle +lubber's hole +Stapelias asterias +yard marker +balloon bomb +Scythian lamb +armory +selsyn +marblewood +spirula +fatalist +hash head +armiger +Dom Pedro +white-chinned petrel +ballast +orthopter +greater water parsnip +clutch +largeleaf holly +Evangelist +king whiting +tuna fish salad +Muscadet +surpriser +jumping bristletail +proportional counter tube +Hamburg parsley +obstructionist +pus-forming bacteria +creep feed +stepbrother +janissary +control freak +trusty +trepan +King William pine +orthicon +geological horizon +molecular biologist +violator +pariah dog +Austrian +conciliator +Fauntleroy +packing needle +mazer +Saturday night special +leucocytozoan +coastal rein orchid +whirligig beetle +capitalist +breeches buoy +clubroot fungus +meadow spikemoss +Kichai +Spanish lime +land office +camera obscura +strafer +purple-stemmed aster +lusterware +valve +Roman nettle +isthmus +breadstuff +sealskin +maleo +bilge keel +carissa plum +fish fly +kolkhoznik +heath pea +cowage +hog sucker +Sam Browne belt +inductor +wild licorice +Socotra begonia +supernumerary +Angle +red shrubby penstemon +toilet kit +tawse +sweet bells +kawaka +brown soft scale +lyssavirus +betting shop +double-crosser +macrotus +climbing hempweed +poi +strip mall +deadhead +petit juror +tract housing +American mistletoe +lace-flower vine +precipitator +endoparasite +hairy wood mint +red snapper +Victorian +hog peanut +line of heart +opossum shrimp +plumcot +Bavarian blue +slops +light flyweight +oregano +sand myrtle +pocket battleship +curator +narc +hydraulic cement +plains pocket gopher +closed loop +pluralist +molter +Christmas bush +snuffers +slender knapweed +footwall +plage +caper tree +red siskin +tender +boat train +tipster +low-pass filter +student lamp +morosoph +japonica +bellows +herald +oyster plant +savory +mail +computational linguist +blade +winter crookneck squash +zoomastigote +blackmailer +richweed +dialectician +genip +plumed scorpionfish +jet bridge +thermopile +billy buttons +Brule +millwright +Arenaviridae +Jones' penstemon +monastic habit +genipap fruit +burnous +dairyman +top +crab-eating raccoon +quadrangular prism +pilot burner +weeder +trireme +boy wonder +man of letters +Catawba +high-muck-a-muck +light circuit +bloodworm +lappet caterpillar +half-and-half +office boy +saddle stitch +mistletoe cactus +false chamomile +Catalina cherry +workhouse +Jamaica quassia +britches +tooth shell +reduction gear +carrot pudding +balsam woolly aphid +handspike +aioli +silver hake +flour bin +wireman +gas-cooled reactor +aficionado +plus fours +gitano +gene chip +oilfish +ingenue +tulip orchid +late purple aster +pork and beans +envoy +lemon extract +milk bar +black huckleberry +ground roller +Connecticuter +siderocyte +Jacquard loom +chub +meat safe +stock cube +Australian sumac +purple sanicle +tailless tenrec +dog wrench +rainbow cactus +castor bean +scintillation counter +eohippus +pawnbroker +gauge boson +front man +early warning radar +bearing wall +Bourbon +sandwichman +sild +gravelweed +perishable +cembra nut +riflebird +quicksand +slate +sweeper +ship-towed long-range acoustic detection system +defamer +president +vitamin K3 +challis +tanekaha +bloodwort +grenadier +quietist +Zairese +fucker +foremother +gesneria +print buffer +salsilla +fissiped mammal +fender +consulate +acidophilus milk +Southern dewberry +snail darter +Panama redwood tree +dehydrated food +bush willow +coffee fungus +Sinologist +Mesoamerican +hood +large civet +deck-house +cyborg +smuggler +pepper sauce +cyberpunk +Grand Inquisitor +persona non grata +haggis +weeping tree broom +stop bath +modifier +coyol +conodont +yellow giant hyssop +optical pyrometer +Carolina moonseed +marinade +aspartame +false wintergreen +cityscape +philter +turnery +hemiplegic +chuck-will's-widow +vower +track star +myrtaceous tree +small civet +intelligence analyst +dogcart +yardman +cross bit +holometabola +platen +sweet cassava +Comstock mealybug +acute angle +Communist +alcohol thermometer +mountain hollyhock +Mead's milkweed +highjacker +Townes +congou +Astrophyton muricatum +lazybones +roughcast +pressure cabin +clinch +cinnamon +smoke bomb +quandong +tout +office-bearer +punctum +efficiency apartment +Queensland hemp +Ceylon bowstring hemp +newswoman +vermin +fetid bugbane +grantee +sanitary landfill +gluten-free diet +clabber +shillelagh +white lettuce +sweet coltsfoot +beggar's lice +samite +loser +flasher +water star grass +banana passion fruit +translator +artificial kidney +Virginia creeper +American crab apple +cactus mouse +nebbish +Ligustrum obtusifolium +vox angelica +stringer +hunter +know-it-all +scene painter +invalidator +jungle cock +basilica +coriander +California single-leaf pinyon +miles gloriosus +pina cloth +law agent +scarlet fritillary +keurboom +bailor +ramjet +seedling +rib joint pliers +ways +picket ship +Surgeon General +wasabi +marquis +clostridium perfringens +Helvella sulcata +furnace lining +kingwood +painted sandgrouse +plain wanderer +Indian madder +silver screen +bailey +dwarf spurge +Serbian +ball-buster +shaheed +Platte River penstemon +tensiometer +mute +nymphomaniac +Yokuts +arroyo willow +whipping post +class act +load +winged everlasting +periodontist +diarist +robber frog +diestock +curry powder +ratchet wheel +store detective +hog plum +prune whip +shortwave diathermy machine +Anabaptist +post chaise +Kennan +bean caper +delegate +orderly sergeant +celtuce +jumping bean +gowen cypress +puddingwife +registered nurse +West Saxon +rosita +gun room +nasotracheal tube +matchboard +flagship +Boswellia carteri +Canadian pondweed +wonder boy +sewer rat +dimetrodon +pantograph +marsh bellflower +angoumois moth +slippery dick +woolly indris +creme de cacao +dulciana +Jewess +Macadamia integrifolia +least shrew +don +diffuser +black-stem spleenwort +grouseberry +goniometer +annotator +sticktight +gossip columnist +speechwriter +capon +rock hind +Liederkranz +chandler +echocardiograph +sidelight +fisher +brocket +New Zealand daisybush +northern sea robin +roller bandage +peachick +pellet +pichi +plug fuse +spark coil +buckwheat +brood bitch +wedgie +dwarf bilberry +filigree +bull +queen +dodo +Salish +denticulate leaf +Western silvery aster +Prima +magnetic bottle +fetterbush +process-server +nainsook +mythologist +Piedmont glacier +hammerhead +niggard +Mound Builder +Kui +Nootka +highbinder +passenger pigeon +oblong +tickler coil +agnostic +succorer +esophagogastric junction +dressmaker's model +bombshell +social anthropologist +gildhall +orpine +pterodactyl +bristly sarsaparilla +Lane's Prince Albert +hognose bat +salesgirl +lubricating system +electric catfish +wrap +Jacksonian +chard +cherry laurel +foreground +beadsman +Kolam +amniote +frozen pudding +acid head +poor box +depositor +coattail +pallas's sandgrouse +mason's level +English lady crab +skeg +cruel plant +petrolatum gauze +tuna +swivel +stock-in-trade +perisperm +civies +Phyllostomus hastatus +alienor +Verdicchio +guard's van +onion butter +moviegoer +planter +citrange +box huckleberry +iconoscope +familiar +helmsman +baby boomer +constructivist +American bog asphodel +whorled caraway +simple pendulum +viviparous eelpout +Job's tears +holdout +sour salt +poison bush +dusky-footed woodrat +golden algae +granadilla tree +telethermometer +crossbar +thrift +African bowstring hemp +dog in the manger +hayrack +gold-crowned kinglet +prolonge +doge +pencil +discount house +mulligan stew +Nonconformist +virologist +gregarine +facula +rocket scientist +thin-shelled mussel +oospore +annual salt-marsh aster +Afrikaner +metallic +julienne +culverin +cleavers +Berliner +mudhif +thorny skate +brown lemming +yellow colicroot +cooling system +large-leaved magnolia +free-reed +canyonside +preemptor +stake +Brucella +anti-G suit +pleximeter +squire +salsilla +write-in candidate +lowland burrowing treefrog +flare star +dwarf hulsea +jobber +mangel-wurzel +quagga +red-skinned onion +positive pole +Pteropus capestratus +jug wine +stomacher +standee +bladder worm +hakim +house of correction +pelisse +golden mole +temporizer +rose apple +drove +umbrellawort +holy of holies +lawyer cane +smooth lip fern +anode +astatic coils +zip gun +feverroot +self-heal +expansion bit +salt reed grass +field pussytoes +nutmeg hickory +cryptic coloration +Venus's girdle +Hunkpapa +Calostoma cinnabarina +raft foundation +May apple +pygmy mouse +prokaryote +yellow-green algae +Bermuda maidenhair +withdrawer +coelacanth +Elliott's goldenrod +driftfish +epicyclic train +bowl +swamp dewberry +corbel step +sadist +party line +anti-American +mining engineer +Amur privet +conidium +Gastrocybe lateritia +lithia water +chaulmoogra +Rough Rider +Guinea pepper +glade mallow +pitcher sage +whitecup +shanghaier +low St Andrew's cross +phonologist +cocobolo +perfumery +visor +prison chaplain +belt +ingesta +literary critic +industrial watercourse +reckoner +pursuer +Kinetoscope +Kuiper belt +hyperope +raw recruit +Galiella rufa +Prince Albert yew +slit trench +usher +tenderfoot +white-rayed mule's ears +browser +piccalilli +bran +giant buttercup +water lobelia +arborescent plant +echinus +dryland blueberry +struggler +platyctenean +Geordie +domatium +twenty-two rifle +keteleeria +sports editor +chorus girl +Hakham +dry-bulb thermometer +onomancer +double-bitted ax +Girondist +bottle bank +thyrsopteris +bandwagon +star anise +armored car +dhawa +Bessemer converter +mutineer +paradise tree +tupik +centurion +mending +chowchow +margrave +International Grandmaster +African hemp +catafalque +leptodactylid frog +forcemeat +tank shell +pill +barbecue pit +worthy +lady's maid +evergreen +Jesuit +South American staghorn +rigger +suffragan +imperialist +spherical angle +grey lemming +kitchen police +tree swift +coliphage +archaist +Conservative +rib +exegete +Mendelian +tragedian +steerage +Paleo-American +obeche +garlic +grapefruit peel +accommodating lens implant +half blood +barrelfish +catgut +lanceolate spleenwort +hardliner +frieze +name dropper +carrack +huckster +onion bread +magnetic head +pease pudding +raisin moth +negative magnetic pole +electroencephalograph +bunji-bunji +synchroflash +Mornay sauce +stencil +winged pigweed +Nesselrode +MEDLINE +licorice +mainspring +melilotus +duke +experimenter +Napier's bones +four-minute man +pin-tailed sandgrouse +toolmaker +pogge +rootstock +baton +pricket +creeping snowberry +anomalops +nester +devourer +apolemia +Maricopa +pine-barren sandwort +larvacean +American dewberry +escalope de veau Orloff +gig +myrtle +pitsaw +Lutheran +fish house punch +gnathostome +intake valve +molasses taffy +clammy locust +vandyke beard +Atlantic tripletail +planktonic algae +estradiol patch +flummery +cytologist +sectarian +oil meal +tomtate +mediterranean anchovy +aspersorium +argonaut +porkholt +sheep ked +algometer +Adventist +false goatsbeard +snake polypody +streetwalker +shelver +adoptee +highflier +pitch apple +prairie rocket +fish mousse +viroid +deckle +manila tamarind +observer's meridian +pincurl clip +hardstem bulrush +gossamer +brookweed +Druze +hug-me-tight +accessory before the fact +oilman +Comanche +Marine +bedlamite +Chinese cork oak +squawbush +false miterwort +walk-on +Cynopterus sphinx +brandyball +landlubber +arrowroot +cape forget-me-not +galoot +tabor pipe +checker +Levant cotton +paddle box +murderess +smirker +fuddy-duddy +withdrawer +newel +shade +pink disease fungus +tipu +sweet sultan +aeronautical engineer +tall gallberry holly +acarid +conqueror +cucumber +film director +ordinary +salon +closet queen +allegorizer +tonka bean +flax rust +negative pole +dagame +dentist's drill +mock privet +micropyle +contributor +dark horse +climbing corydalis +cosmotron +land agent +Big Blue +Cynic +tassel flower +lyrate leaf +Minuteman +Dutch-elm beetle +Hessian fly +flower girl +West-sider +window dresser +skinny-dipper +whitebait +out-and-outer +hooker +amicus curiae +jack +camwood +stockist +black root rot fungus +Jamaica dogwood +diaphragm +Holocentrus ascensionis +roselle +black maire +Pygmy +fumigator +lame duck +mudder +hydraulic transmission +conning tower +phoronid +batfish +hearing dog +monohybrid +whaling gun +Cockcroft and Walton accelerator +allemande +seasoner +epileptic +ammonia clock +Young Turk +lanseh tree +urceole +cafe noir +poster girl +Oglala +deadeye +manna lichen +positive pole +cinch +lyricist +hermaphrodite +kidney stone +dilator +number one +frotteur +kaffir bread +fish knife +tarragon +adjuster +potato wart fungus +Florida pompano +conductor +corbie gable +rounders +Catha edulis +bender +recruit +Uruguayan +subject +bunghole +day boarder +pocketed bat +Oxonian +owner-occupier +yellow-leaf sickle pine +devisor +exhibitor +looking glass +shipowner +crooked-stemmed aster +calico +dash-pot +defilade +Confucian +egg-and-dart +irreligionist +lepton +self-rising flour +diving bell +Brahui +shop girl +maximum and minimum thermometer +Dalmatian laburnum +correspondent +subduer +nonperson +Reaumur thermometer +rough-leaved aster +jacksmelt +pinfold +magneto +ex-wife +round-leaved rein orchid +purloo +American shrew mole +sweet sand verbena +polymastigote +outfitter +curled leaf pondweed +Italian dressing +borderer +ambusher +geebung +four-stroke engine +small ship +homeopath +gynostegium +political prisoner +Radiigera fuscogleba +ensiform leaf +rhizoctinia +satyr orchid +rue +bouillon cube +flip +prophyll +tilefish +periselene +prima donna +choker +laminar flow clean room +Hooker's orchid +fish joint +mombin +remover +array +coelostat +autophyte +consigner +Damaraland mole rat +gasman +public works +lye hominy +pearlfish +piassava palm +Georgian +uxoricide +confessor +community center +epigone +tagger +abrading stone +cryoscope +nautch girl +reliever +Cartesian +Indian beech +protoplasmic astrocyte +fundamentalist +mustard sauce +crank +houselights +five-point bishop's cap +comedienne +triangle +presentist +beaugregory +dreamer +Wave +blue mockingbird +Barbados gooseberry +ten-spined stickleback +papoose +silky pocket mouse +holdup man +agent-in-place +suspensory +emigrant +ropemaker +bookbinder +jumby bead +undershrub +Killarney fern +sheep bell +city slicker +equerry +pea crab +down-and-out +blackmouth bass +shirtmaker +lister +UNIX guru +snipefish +gimbal +maisonette +haircloth +Ranvier's nodes +pigmy talinum +tribute album +msasa +hydroxide ion +madame +four-pounder +prophet +sloganeer +field-effect transistor +nude mouse +canteen +Calostoma lutescens +buteonine +sunlamp +Uruguay potato +Spanish tamarind +Prince-of-Wales'-heath +kishke +caprifig +chincapin +hegari +alarmist +bathtub gin +astatic galvanometer +Calostoma ravenelii +marang +tussah +coin box +bugleweed +hacker +frontal eminence +timekeeper +shunt +bicycle clip +mustang mint +caesium clock +hospice +glenoid fossa +archpriest +ex-gambler +incrustation +salvager +Donatist +violator +lamb succory +hygroscope +oilbird +sharptail mola +showplace +corn syrup +flashlight fish +pulse timing circuit +anchovy paste +fascista +chigoe +divan +Druid +squad room +Huntingdon elm +buffalo carpet beetle +carper +corn lily +goats' milk +assault gun +cockpit +Lochaber ax +Visigoth +occupier +Basotho +criminologist +spindle +Rosicrucian +Cornishwoman +musk kangaroo +artificial skin +pandurate leaf +Parkia javanica +roundhead +tea-like drink +basidiolichen +unguiculate +stepmother +Nauruan +gutta-percha tree +bloodberry +scarlet haw +marupa +censor +algebraist +pelvimeter +whaler +cowhide +paparazzo +biochip +internationalist +Yukon white birch +hangar queen +chlamydia +puttee +Pipturus albidus +pearly razorfish +sea moss +burglar +hoary golden bush +colter +drey +bushman's poison +maxillaria +gnetum +deadeye +shittah +swamp oak +damper block +deepwater squirrelfish +truffle +cangue +paleolith +lawyerbush +sorehead +Texas snowbell +Tremella reticulata +quarter +keelboat +dimity +whiner +Wagnerian +myrmecophyte +frontierswoman +pyrometric cone +big-tree plum +puppy +galbulus +hod +winceyette +carriage wrench +dictostylium +farmland +infanticide +Jacob's rod +threadfish +monocline +inamorato +leaf miner +purple cress +passer +black-fronted bush shrike +silverrod +bootmaker +segregate +captive +Edmontonia +spherometer +television transmitter +bladder +Saratoga spittlebug +dynamometer +lodge +smooth darling pea +Cossack +wake-up call +Olmec +sutler +molasses kiss +corner post +rattlesnake weed +yardmaster +adder +rhinoscope +referral +ulster +pantaloon +counterspy +gadgeteer +heart cherry +hospital chaplain +Clydesdale terrier +plank-bed +Russian thistle +actinometer +dyspeptic +common wolffia +firewall +seidel +potato moth +soapweed +seif dune +thill +cosmographer +absolver +halberdier +fire control system +kai apple +bastard pennyroyal +Big Brother +broadcast journalist +Albatrellus dispansus +citrophilous mealybug +split end +nickel-iron battery +Newtonian +gas maser +thumbstall +anaspid +dusky-footed wood rat +latitudinarian +flatbrod +schizocarp +niqaabi +flight surgeon +gyrocompass +Polyporus tenuiculus +Utopian +mailboat +spellbinder +undercoat +cassareep +typical jerboa +photocathode +katharometer +bight +fur-piece +penetration bomb +malik +Siberian millet +nanomia +Wykehamist +tosser +gyrostabilizer +microwave diathermy machine +crystal set +wall +legatee +alfalfa +angwantibo +charioteer +piano maker +African mahogany +Morlett's crocodile +taro +parallel circuit +cush-cush +etymologist +matriculate +neem seed +cornerback +kingfisher daisy +redoubt +blastomycete +peplos +costumier +publican +tobogganist +semolina +myrmidon +parricide +gymslip +whoremaster +cryptocoryne +header +platitudinarian +barleycorn +spiral bandage +reciter +abecedarian +dance +wrymouth +bilberry +Liopelma hamiltoni +streamliner +Fordhooks +fixed phagocyte +radiobiologist +neurologist +Selkup +dollarfish +cascade everlasting +acrodont +boarhound +midstream +theatrical producer +abhorrer +goldsmith +photometrist +Anglo-Saxon +rugel's plantain +sable +workmate +ferule +ankus +earleaved umbrella tree +Passamaquody +timucu +Mexican pocket mouse +yerba santa +Rochon prism +apomict +monocarp +sweet unicorn plant +common winterberry holly +archivist +drypis +paretic +fly-by-night +white-berry yew +Schoolman +blue cheese dressing +vintager +squatter +Euphausia pacifica +corrugated fastener +yellow henbane +Croesus +almoner +analphabet +acoustic delay line +sheep frog +workhouse +horseleech +venturer +pond-scum parasite +Pyrenees daisy +plagiarist +Truncocolumella citrina +rerebrace +group captain +caddis fly +hot-rock penstemon +kanzu +stylopodium +slopseller +rauli beech +starter +ootid +statesman +distributor cam +ascot +falcon-gentle +Duplicidentata +spotted antbird +heliometer +false buckthorn +Allegheny spurge +Cavalier +dart +photocoagulator +master-at-arms +kei apple +baldachin +crapshooter +gametangium +white hope +chipotle +spike heath +Scotch woodcock +Florentine +differential analyzer +Mitrula elegans +wet cell +basil balm +Circassian +corn cake +bouncing betty +vice-regent +lagerphone +ketembilla +whoremaster +fork +tetrasporangium +trifler +pill head +life-support system +quartermaster general +tobacco thrips +officeholder +teredo +toyon +Sundacarpus amara +Phytophthora citrophthora +naif +lobbyist +alligator wrench +bully +heavy +toxicologist +radio chassis +waterdog +drive line +kaffir cat +foster-brother +breakax +curette +traditionalist +pipe vise +striped button quail +gawker +homeotherm +schoolyard +battue +kalansuwa +deviationist +Bolshevik +transponder +pungapung +iron +Eyeish +roccella +manglietia +Tory +print seller +Texas Ranger +otter shrew +seconder +shellflower +outlier +party man +wold +hayfork +oncologist +framer +co-beneficiary +ocean pout +Chinese angelica +scrimshaw +air attache +false gromwell +standing press +fringepod +specifier +automatic choke +durum +yenta +wassailer +reeler +signora +beach pancake +common booklouse +pellicle +backroom boy +den mother +associate +Unitarian +gambist +brookweed +clubroom +cat's-tail +playboy +self-registering thermometer +doorstop +bennet +yak's milk +escapee +quail bush +sparge pipe +coast boykinia +screw key +half gainer +aggravator +cotton mill +tailor's chalk +free agent +cotton mouse +deadhead +bunny +turpentine camphor weed +amaranth +ceratodus +red lauan +beam-ends +thermograph +wally +Toda +handrest +commissary +oak-leaved goosefoot +manufacturer +voicer +Jafnea semitosta +bench hook +finder +abyssal zone +rabbitwood +Hercules'-club +epicarp +declinometer +camp follower +signaler +Australian pea +putz +qadi +banded palm civet +egg timer +regnellidium +calisaya +harvestfish +sound spectrograph +side-wheeler +glomerule +woolly rhinoceros +Black Muslim +horticulturist +ornithomimid +cryometer +battlefront +gametophyte +airmailer +cuisse +nakedwood +baseball club +slasher +anise +leatherleaf +leatherjacket +horned pondweed +gofer +Saigon cinnamon +barong +blazer +twinkler +skeleton shrimp +dial +floorwalker +case shot +flannelbush +cultivated parsnip +Jane Doe +few-flowered leek +nogging +placer miner +muzzler +serge +lion-hunter +capulin +Wandering Jew +ascidian tadpole +hispid pocket mouse +southern spatterdock +milk wagon +junior middleweight +duck sauce +promycelium +protozoologist +cascade liquefier +tout +longheaded thimbleweed +charcoal burner +footage +slop +bridge agent +miller's-thumb +Job's comforter +marocain +tanker plane +lancetfish +knocker +toque +ordinand +umbrella bird +favorite son +hare's-foot bristle fern +business traveler +plotter +Asiatic shrew mole +tallyman +stump +Paleacrita vernata +index register +mortgagee +accuser +codger +sand rat +seaside centaury +chiropractor +Florida smoothhound +dwarf sperm whale +T-man +sannup +dragonhead +numdah +alkali grass +gynobase +kymograph +ascolichen +steward +waterline +Nazarene +filer +lapidary +muncher +wincey +scyphus +question master +besieger +worldling +docent +facing +atmometer +quern +puerpera +three-decker +calliope +wild red oat +bailee +flame pea +cattle cake +theist +yellowtail flounder +cosmopolitan +rocket engineer +vouchee +Turkoman +hard sauce +Thousand Island dressing +assayer +messmate +mutilator +oyster bar +flame tokay +countess +prairie mimosa +microsporangium +cotter +townsman +paring +fundraiser +simperer +Comrade +orlop deck +power takeoff +cattleship +prime meridian +Javanthropus +scriptorium +curandera +long-clawed prawn +maestro +paster +potato tuberworm +chachka +junkyard +cape yellowwood +reentrant polygon +Liberian coffee +restaurateur +Alsophila pometaria +Jekyll and Hyde +electrophorus +Scomberomorus maculatus +manipulator +gromwell +chicken provencale +ashram +mangel-wurzel +shamrock pea +dossal +adducer +erection +Mysore thorn +smoothie +chufa +brace wrench +victualer +litterer +linstock +Protium guianense +palfrey +banyan +klieg light +dangleberry +trooper +yaupon holly +quitter +tradescant's aster +nullipara +melter +devil's urn +ghostwriter +mouth +analogist +Creek +sonic depth finder +fucker +locus of infection +mortician +esophageal smear +locum tenens +conic projection +aroeira blanca +bellarmine +night porter +automobile mechanic +codpiece +Munro +cottonweed +scoinson arch +tinderbox +frozen food +waterproofing +Egyptian henbane +lash +transactor +American smooth dogfish +existentialist +grabber +Sonoran lyre snake +Rufous rubber cup +colors +weekend warrior +power user +perennial salt marsh aster +Puritan +Apalachicola rosemary +anecdotist +tosser +moth bean +agnostic +stretcher-bearer +browntail +optimist +brewer's mole +astronomy satellite +flat file +rust mite +tuberous plant +day laborer +buster +trapezoid +bevatron +nonresident +Streptomyces griseus +mangosteen +customer agent +hero worshiper +suicide bomber +procellariiform seabird +archiannelid +reaction turbine +distortionist +bulldog wrench +grainy club +scalp +Aztec +scow +globigerina +pedant +heartleaf manzanita +kanchil +low gallberry holly +containment +scandalmonger +rose-colored starling +Powhatan +addle-head +Chilean rimu +Atlantic sea bream +arthrospore +ramrod +root climber +Kalapooia +roach clip +Schreiber's aster +horseradish +albino +Kshatriya +trombidiid +blasting cap +body pad +brachium +shallu +Wynnea americana +slender centaury +munj +upset +wind tunnel +cottonwick +airing cupboard +pepper shrub +ambrosia +languisher +chosen +rose globe lily +purple apricot +costia +sloop of war +sultana +frontlet +booster +sargassum fish +broad-leaved montia +rifleman bird +stillroom +amoralist +enginery +meter maid +fitment +southern bog lemming +Athenian +clincher +cusk-eel +mackintosh +diaphone +corozo +Australian reed grass +czar +spongioblast +Eurafrican +airhead +Shahaptian +Roman +pollinium +tourist class +halogeton +stamper +emperor +malingerer +tramp steamer +Peziza domicilina +pilot cloth +stenopterygius +cost accountant +Queen's Counsel +wine-maker's yeast +poppet +cage +rowlock arch +landgrave +bearded wheatgrass +stink bell +quaker +undesirable +algarroba +resistance pyrometer +exorcist +carib wood +guvnor +border patrolman +bathhouse +licenser +headman +rentier +pine spittlebug +nut-leaved screw tree +paraduodenal smear +apron +necker +smilax +Alpine besseya +creeper +castle +ground bait +Queensland grass-cloth plant +sclerotium +great yellowcress +fat farm +Stoker +hoop snake +elixir of life +Trotskyite +home buyer +wheat berry +Tutelo +semi-climber +utahraptor +wet-bulb thermometer +packrat +hygrophyte +darter +sketcher +refiner +camlet +midgrass +compound +tarwood +Colorado River hemp +toiler +abstractor +override +dwarf pipefish +plodder +briefcase computer +trunk hose +brown butter +valve-in-head engine +cymbalist +explosive detection system +horsewoman +boutonniere +chinchilla +venerator +scourer +exarch +cohune nut +ayapana +continental divide +cosigner +stalker +pyxie +Genet +Macowanites americanus +open-hearth furnace +water chestnut +American frogbit +tarwood +cutter +scout +burr +upsetter +grist +tagasaste +mouthpiece +palette +rattan +letterman +Exmoor +Methodist +eelblenny +marasca +slide valve +ventilation +saddle hackle +Yakut +flux applicator +air traveler +murder suspect +Cynocephalus variegatus +idolizer +Surgeon General +nutlet +little-head snakeweed +germ tube +fellow traveler +raceabout +commodore +czar +anamorphosis +treelet +girlfriend +groundnut +sideline +giant star grass +goffer +spark lever +oubliette +processor +tare +plodder +extremist +Kipp's apparatus +gripsack +S wrench +viscountess +bridgehead +cascarilla +Asiatic flying squirrel +protoceratops +equerry +difflugia +princeling +moonlighter +aspergill +common flat pea +Utahan +imperial mammoth +plantain-leaved pussytoes +Boott's goldenrod +bootlegger +reed pipe +runcinate leaf +onion salt +nitrite bacterium +introvert +duck +New World opah +goliath frog +heterostracan +disrupting explosive +haggler +candlenut +false bugbane +returning officer +eudiometer +ship-breaker +metazoan +mandarin +patka +gill net +cavity wall +armilla +rainmaker +dealfish +orderly +gleaner +muffin man +house sitter +alto +sand devil's claw +vulcanizer +appendicularia +boron chamber +chess +bitok +anchovy butter +dropout +flour mill +bishop +escapist +scapegrace +stanhope +smooth winterberry holly +upstager +stalking-horse +pony +prairie gourd +parabolic mirror +Polaroid +slasher +lap +garlic butter +sendee +German millet +hairy honeysuckle +Swiss canton +Scleroderma flavidium +red goatfish +telegraph plant +Jungian +garment cutter +mallee hen +stranger +driveway +schooner +Paiute +cisco +trestlework +sipper +shanny +romanticist +Molly Miller +mountain rimu +odd-leg caliper +bitumastic +Western Australia coral pea +labor coach +latchkey +harpulla +solitary pussytoes +chop-suey greens +coil +guimpe +diapir +Osage +gutta-percha tree +giant eland +reticulation +garden huckleberry +quick study +Hudson bay collared lemming +coreligionist +Lancastrian +stumblebum +omnirange +seersucker +Potemkin village +Rhea Silvia +symphonist +bolti +jaw +jaconet +page +visiting fireman +haulm +p-n junction +landlubber +yellow jack +triclinium +souari +invader +fire walker +Luddite +Plott hound +hemming-stitch +winker +star-duckweed +craniometer +Arabidopsis lyrata +loser +cypripedia +trimmer arch +cookhouse +pink fivecorner +transfer +ringleader +northern pocket gopher +moke +blockade-runner +cyclostome +web-spinning mite +Whig +transcriber +malahini +sawyer +patent log +paca +tragedian +thermojunction +soffit +black buffalo +foreigner +applecart +brit +pole horse +white mullet +argentinosaur +Homo soloensis +bounty hunter +decumary +hand +paperboy +Smitane +windowpane +Java man +Wynnea sparassoides +prune +middy +lilliputian +sorb +pyrostat +guest worker +hold +leaseholder +vegan +humanist +salinometer +piton +zygospore +means +night rider +tetraspore +archipelago +radiomicrometer +nitpicker +spot weld +slicer +girlfriend +round-tailed muskrat +cock's eggs +Shavian +bay +nuclear chemist +planetarium +hiccup nut +Marylander +milling +microsporidian +brown cup +Strophanthus kombe +little skate +emancipator +paperhanger +archaeopteryx +maigre +Mastotermes electrodominicus +procurer +seizure-alert dog +homeboy +cotton strain +mute +siren +spearnose bat +phenacomys +gayal +arsenal +pitchfork +Port Jackson heath +cud +magnetic core memory +interferometer +water jacket +account executive +hodoscope +window oyster +sudatorium +syncopator +loment +hypertensive +smoothbark +Geogia holly +nailhead +African holly +musette +chafeweed +microflora +derrick +strawworm +shogun +queen post +jerboa kangaroo +columbo +royal +sourball +solenogaster +cardsharp +Homo habilis +intaglio +calf's-foot jelly +flotsam +skirret +baronduki +chyme +shovel hat +Welsh +monoplane flying fish +groundfish +tablet-armed chair +swan dive +Indian club +colonial +cassiri +pyramidal tent +praya +silk vine +time clock +button snakeroot +clews +Korean lespedeza +diffuser +ripping bar +puttyroot +nipple shield +headpin +juneberry holly +hub-and-spoke +laver +weldment +plain flour +hoosegow +dudeen +grey skate +line of life +mung +arariba +Newtown Wonder +rock candy +side chapel +castor sugar +narrow-leaved white-topped aster +babassu nut +puka +rings +catchall +heat shield +caroche +oxbow +Australian coral snake +tapper +sporangiophore +fenugreek +spruce gall aphid +gouache +cutoff +private line +pod +cargo hatch +nailhead +penile implant +geophyte +small-leaved linden +deepwater pipefish +paperhanger +hairy spurge +Persian lamb +subtropics +feed grain +clarence +nonparticipant +scorpioid cyme +hand brake +tiller +Geglossaceae +albacore +monochrome +goa bean +bur +tongue worm +psittacosaur +frog's lettuce +pectoral +terreplein +light filter +fishpaste +dry point +grison +feterita +dolichocephalic +oenomel +stretcher +swag +cheval-de-frise +mountain beaver +scammony +discus +leatherleaf saxifrage +wharf rat +Dominique +pelycosaur +depth gauge +bishop +archespore +true anomaly +silver jenny +mercy seat +kelp +oviraptorid +acrylic +Chinese pea tree +meat house +bilge well +Temperate Zone +whale louse +balbriggan +briefcase bomb +pump-type pliers +oil +sour gourd +Jewbush +lunette +Chinese paddlefish +pyxidium +beechnut +calabar bean +grugru nut +gib +blunt file +cataphyll +megasporangium +blockbuster +sliding seat +hogchoker +calceus +Connarus guianensis +honest woman +survivor +second balcony +tempera +Calvary clover +murine +outwork +bogy +elephant's-foot +conning tower +set square +blackfly +stirk +Streptomyces erythreus +blade +goldfield +snowball +mortal enemy +waltzer +shoal +galley +hitchhiker +lithophyte +brisling +scauper +esophagoscope +grab +subtracter +philosopher +duplex apartment +southeastern pocket gopher +bonduc nut +reverberatory furnace +grader +lamp house +northern bog lemming +brotula +ornithopod +ptyalith +obturator +perpetual motion machine +range pole +Africander +curvet +daisy print wheel +floor +collector +mutant +tuck +fore-and-after +senega +buckler mustard +louvar +Tarsius glis +culdoscope +Spanish fly +steering gear +hatchet man +museum +saw set +cambric tea +comber +thermohydrometer +stationer +chalcis fly +bryanthus +whipstitch +harvest mite +rock gunnel +time bomb +rariora +pigfish +apetalous flower +head shop +horned whiff +sandpit +tachistoscope +sundries +taffrail +caller +monofocal lens implant +Dover's powder +souari nut +crowbait +render +Shakespearian +hagberry +megatherian +magus +hatchel +mangabey +garroter +piedmont +cope +barrio +psychodid +rigout +distributor +croupier's rake +sarcenet +narrow-leaved water plantain +treenail +biped +lanternfish +overdrive +barndoor skate +picket boat +amber lily +sawpit +sand lance +bucket shop +common beech +laundry truck +surtout +grogram +tampion +escape hatch +interstice +shop bell +snake mackerel +nakedwood +tumbrel +mericarp +mountain paca +cab +big board +cringle +eusporangium +shipping room +coal chute +dumbwaiter +Smiledon californicus +man-at-arms +cartridge +deinonychus +pigeon pea +screw bean +spectacle +floorboard +cutting room +low-warp-loom +proconsul +sabicu +genipap +clapper +aquifer +archaeornis +belly flop +Protium heptaphyllum +interrupter +high-warp loom +knight +wiper +impression +poker +Pithecanthropus +sable +guardroom +tenter +wellhead +raja +strickle +sodomite +mountebank +sand leek +Barbados gooseberry +shuffler +sensory fiber +crab-eating opossum +etching +rare bird +scup +fagot +negro vine +hutment +droshky +nephoscope +lady chapel +cutty stool +release +vestiture +buff +standard +Tabernacle +vascular ray +snakewood +chlorobenzylidenemalononitrile +limnologist +pouched mole +microwave linear accelerator +Mastotermes darwiniensis +wind tee +orange bat +open sight +carpospore +rampant arch +sabbatia +cursor +post exchange +bellpull +center +cyclostyle +canonist +pygmy sperm whale +moa +king +pass-through +angioscope +marrow +hookup +revetment +acanthocephalan +good Samaritan +apatosaur +web spinner +dixie +ommastrephes +crossbench +candlewick +jack +light arm +caisson +kaki +quandong nut +Meuniere butter +coquilla nut +mast +black +twitterer +bluethroat pikeblenny +shielding +water-shield +urolith +elephant bird +clearway +dark lantern +schizopetalon +press +Nazi +sugarberry +Maltese +stevedore +hair shirt +party wall +gainer +blackheart +nothosaur +cavetto +evergreen bittersweet +chemical bomb +calpac +shingle +turnpike +animator +heaver +isoclinic line +death knell +liner +anathema +aerie +razorback +Ichyostega +pound net +French dressing +mottle +yard +string tie +bell seat +brattice +battering ram +sierra +pompon +vertex +stomach pump +electrolytic cell +escolar +telpher +roadhouse +cerecloth +tartare sauce +letter case +whale sucker +hob +teg +canvas +strickle +hectograph +Cartagena bark +mail car +acinus +freedom rider +bread sauce +picture window +Rhizopogon idahoensis +pinprick +mass spectrograph +ringer +devil's cigar +salad cream +marlberry +airbrake +Clark cell +yellow-throated marten +wire gauge +dinoceras +aba +harpoon log +plate rail +mustard plaster +coelophysis +journal box +puce +ballcock +quartering +izar +clinid +whirler +turnspit +deathbed +pottle +shot +doubler +Coryphaena equisetis +English sole +chicken feed +borrow pit +mylodontid +Chilean nut +Kundt's tube +ling +asthenosphere +reseau +death seat +immovable bandage +peppermint patty +lecturer +electron multiplier +bear claw +hyacinth +beaked salmon +toehold +scull +snowball +gangsaw +fiber +oxeye +lashing +Beckman thermometer +fence +cantilever +dinner theater +Reynard +jag +umbrella plant +camera lucida +beaver +slug +yellowfin croaker +Sibley tent +rat-tail file +anchovy pear +soldier +cackler +chaise +Pitot-static tube +minniebush +Episcopalian +oleaster +ejaculator +wavy-leaved aster +knight +rack +real storage +magnetic mine +cocoa plum +vesiculovirus +birch leaf miner +water chevrotain +rudapithecus +torpedo tube +itch mite +warren +loft +washerman +terrace +nonstarter +shit +platform +caudex +ground control +Ostariophysi +slopshop +Peruvian cotton +crystal oscillator +plastic bomb +bar bit +watering cart +Asiatic sweetleaf +artificial joint +chariot +casern +charge-exchange accelerator +display adapter +hornpipe +honey bell +planula +Nephthytis afzelii +hame +ranter +trachodon +synchrocyclotron +splasher +heterotroph +Nicol prism +Himalayan rhubarb +headfast +put-put +bitter almond +parr +scantling +power breakfast +madder +Catalpa bignioides +rose of Jericho +spark chamber +rhizome +beard worm +supper club +negro peach +keratoscope +wain +apple aphid +planking +time-delay measuring instrument +sternpost +sicklepod +lake bed +gatherer +monotype +dead-man's float +poison gas +dicynodont +organism +cell +person +animal +plant +food +artifact +dressage +contact sport +outdoor sport +gymnastics +track and field +jumping +high jump +skiing +water sport +swimming +dive +floating +skin diving +rowing +boxing +sledding +tobogganing +wrestling +skating +ice skating +roller skating +racing +boat racing +riding +equestrian sport +cycling +blood sport +hunt +fishing +angling +casting +athletic game +outdoor game +golf +field game +field hockey +football +American football +ball game +baseball +court game +badminton +basketball +tennis +sport +Seder +scavenger +bottom-feeder +work animal +beast of burden +pack animal +domestic animal +marine animal +female +male +young +young mammal +pup +cub +lion cub +tiger cub +microorganism +arbovirus +herpes +herpes zoster +reovirus +moneran +cyanobacteria +enteric bacteria +actinomycete +streptomyces +diplococcus +parasite +ectoparasite +protoctist +protozoan +sarcodinian +ameba +ciliate +alga +brown algae +green algae +sporozoan +cypriniform fish +cyprinid +carp +domestic carp +shiner +catostomid +buffalo fish +cyprinodont +killifish +topminnow +squirrelfish +stickleback +pipefish +embryo +fetus +blastula +chordate +cephalochordate +tunicate +ascidian +vertebrate +aquatic vertebrate +jawless vertebrate +lamprey +hagfish +cartilaginous fish +holocephalan +chimaera +elasmobranch +shark +mackerel shark +mako +requiem shark +dogfish +smooth dogfish +spiny dogfish +smooth hammerhead +smalleye hammerhead +shovelhead +ray +sawfish +roughtail stingray +butterfly ray +eagle ray +manta +skate +bird +gamecock +night bird +ratite +passerine +oscine +accentor +lark +pipit +finch +canary +dark-eyed junco +New World sparrow +bunting +honeycreeper +sparrow +grosbeak +towhee +weaver +grassfinch +tyrannid +New World flycatcher +kingbird +pewee +cotinga +antbird +Old World flycatcher +thrush +nightingale +Old World chat +warbler +kinglet +Old World warbler +New World warbler +flycatching warbler +New World chat +yellowthroat +New World oriole +northern oriole +meadowlark +New World blackbird +grackle +Old World oriole +starling +myna +corvine bird +crow +Old World jay +common European jay +New World jay +blue jay +Canada jay +Rocky Mountain jay +nutcracker +European magpie +American magpie +Australian magpie +wren +marsh wren +thrasher +New Zealand wren +creeper +titmouse +black-capped chickadee +Carolina chickadee +swallow +martin +tanager +shrike +butcherbird +bush shrike +bowerbird +European water ouzel +American water ouzel +vireo +waxwing +bird of prey +hawk +black kite +swallow-tailed kite +white-tailed kite +harrier +falcon +peregrine +caracara +eagle +young bird +sea eagle +Aegypiidae +Old World vulture +griffon vulture +bearded vulture +Egyptian vulture +black vulture +New World vulture +buzzard +condor +Andean condor +California condor +black vulture +king vulture +owl +horned owl +scops owl +amphibian +salamander +newt +Pacific newt +ambystomid +climbing salamander +web-toed salamander +frog +true frog +true toad +spadefoot +tree toad +cricket frog +tongueless frog +reptile +anapsid +diapsid +chelonian +turtle +sea turtle +ridley +snapping turtle +musk turtle +diamondback terrapin +Western box turtle +tortoise +soft-shelled turtle +saurian +lizard +gecko +iguanid +spiny lizard +fence lizard +horned lizard +skink +teiid lizard +racerunner +plateau striped whiptail +Chihuahuan spotted whiptail +western whiptail +checkered whiptail +agamid +moloch +anguid lizard +venomous lizard +lacertid lizard +chameleon +monitor +crocodilian reptile +crocodile +alligator +caiman +armored dinosaur +ankylosaur +bone-headed dinosaur +ceratopsian +hadrosaur +saurischian +sauropod +theropod +ceratosaur +maniraptor +synapsid +pterosaur +ichthyosaur +snake +colubrid snake +smooth green snake +rough green snake +racer +blacksnake +whip-snake +rat snake +bull snake +common kingsnake +milk snake +common garter snake +ribbon snake +Western ribbon snake +common water snake +water moccasin +grass snake +viperine grass snake +sand snake +lyre snake +blind snake +indigo snake +constrictor +boa +python +elapid +coral snake +coral snake +cobra +mamba +black mamba +krait +viper +pit viper +rattlesnake +timber rattlesnake +arthropod +arachnid +false scorpion +whip-scorpion +spider +European wolf spider +acarine +hard tick +Ixodes dammini +Ixodes neotomae +Ixodes pacificus +Ixodes scapularis +sheep-tick +Ixodes persulcatus +Ixodes dentatus +Ixodes spinipalpis +wood tick +soft tick +mite +trombiculid +spider mite +house centipede +gallinaceous bird +domestic fowl +jungle fowl +chicken +cock +hen +turkey +grouse +European black grouse +Asian black grouse +blackcock +greyhen +red grouse +moorhen +greater prairie chicken +lesser prairie chicken +heath hen +guan +chachalaca +megapode +mallee fowl +phasianid +pheasant +bobwhite +northern bobwhite +Old World quail +migratory quail +peafowl +California quail +Hungarian partridge +red-legged partridge +Greek partridge +mountain quail +guinea fowl +columbiform bird +pigeon +dove +turtledove +domestic pigeon +homing pigeon +sandgrouse +parrot +cockatoo +lory +varied Lorikeet +rainbow lorikeet +parakeet +cuculiform bird +cuckoo +crow pheasant +coraciiform bird +roller +kingfisher +hoopoe +apodiform bird +swift +Archilochus colubris +thornbill +goatsucker +piciform bird +woodpecker +flicker +sapsucker +toucanet +trogon +quetzal +aquatic bird +waterfowl +anseriform bird +duck +teal +widgeon +sheldrake +goldeneye +scaup +wood duck +sea duck +scoter +merganser +gosling +gander +Chinese goose +greylag +blue goose +snow goose +brant +common brant goose +honker +barnacle goose +swan +tundra swan +screamer +crested screamer +mammal +prototherian +monotreme +marsupial +opossum +bandicoot +kangaroo +common wallaby +hare wallaby +nail-tailed wallaby +rock wallaby +pademelon +tree wallaby +rat kangaroo +phalanger +dasyurid marsupial +dasyure +placental +calf +buck +insectivore +mole +shrew mole +shrew +water shrew +tenrec +invertebrate +sponge +glass sponge +coelenterate +Chrysaora quinquecirrha +hydrozoan +siphonophore +anthozoan +actinia +coral +gorgonian +stony coral +ctenophore +worm +planarian +fluke +liver fluke +Fasciolopsis buski +schistosome +tapeworm +echinococcus +taenia +common roundworm +chicken roundworm +pinworm +eelworm +vinegar eel +trichina +hookworm +filaria +Guinea worm +annelid +oligochaete +polychaete +leech +mollusk +scaphopod +gastropod +abalone +scorpion shell +giant conch +edible snail +garden snail +brown snail +Helix hortensis +seasnail +neritid +limpet +Hermissenda crassicornis +cowrie +bivalve +clam +quahog +cockle +oyster +mussel +marine mussel +freshwater mussel +scallop +shipworm +cephalopod +octopod +decapod +squid +crustacean +malacostracan crustacean +decapod crustacean +crab +swimming crab +spider crab +lobster +true lobster +Old World crayfish +American crayfish +shrimp +prawn +krill +stomatopod +mantis shrimp +woodlouse +pill bug +sow bug +sea louse +amphipod +copepod +barnacle +wading bird +stork +ibis +common spoonbill +roseate spoonbill +heron +egret +night heron +American bittern +European bittern +least bittern +whooping crane +rail +crake +gallinule +purple gallinule +coot +great bustard +plain turkey +button quail +trumpeter +seabird +shorebird +plover +turnstone +sandpiper +yellowlegs +ruff +tattler +woodcock +snipe +greyback +red-breasted snipe +curlew +godwit +stilt +stilt +phalarope +courser +coastal diving bird +larid +gull +tern +jaeger +skua +auk +guillemot +murre +puffin +gaviiform seabird +podicipitiform seabird +grebe +pelecaniform seabird +white pelican +Old world white pelican +gannet +snakebird +sphenisciform seabird +penguin +pelagic bird +wandering albatross +black-footed albatross +petrel +shearwater +storm petrel +aquatic mammal +cetacean +whale +baleen whale +rorqual +toothed whale +beaked whale +dolphin +bottlenose dolphin +porpoise +sea cow +carnivore +pinniped mammal +seal +eared seal +fur seal +fur seal +South American sea lion +California sea lion +Australian sea lion +Steller sea lion +earless seal +walrus +canine +bitch +dog +cur +toy dog +toy spaniel +English toy spaniel +hunting dog +hound +coonhound +dachshund +foxhound +wolfhound +greyhound +terrier +bullterrier +rat terrier +Manchester terrier +fox terrier +wirehair +Welsh terrier +schnauzer +Skye terrier +sporting dog +retriever +pointer +setter +spaniel +springer spaniel +water spaniel +working dog +watchdog +shepherd dog +Belgian sheepdog +pinscher +Sennenhunde +mastiff +bulldog +guide dog +sled dog +liver-spotted dalmatian +spitz +griffon +corgi +poodle +wolf +coydog +wild dog +striped hyena +brown hyena +spotted hyena +aardwolf +fox +black fox +silver fox +blue fox +feline +cat +domestic cat +tom +blue point Siamese +wildcat +common lynx +Canada lynx +bobcat +spotted lynx +caracal +big cat +leopardess +panther +lioness +lionet +Bengal tiger +tigress +saber-toothed tiger +bear +Syrian bear +grizzly +Alaskan brown bear +cinnamon bear +viverrine +civet +Indian mongoose +ichneumon +slender-tailed meerkat +suricate +bat +fruit bat +carnivorous bat +leafnose bat +false vampire +vespertilian bat +long-eared bat +freetail +vampire bat +predator +game +game bird +fossorial mammal +tetrapod +insect +beetle +two-spotted ladybug +Mexican bean beetle +Hippodamia convergens +vedalia +bombardier beetle +calosoma +searcher +firefly +sawyer +pine sawyer +flea beetle +Colorado potato beetle +carpet beetle +clerid beetle +lamellicorn beetle +scarabaeid beetle +scarab +tumblebug +dorbeetle +June beetle +melolonthid beetle +elaterid beetle +snout beetle +boll weevil +blister beetle +bark beetle +darkling beetle +flour beetle +seed beetle +pea weevil +bean weevil +rice weevil +louse +flea +dipterous insect +gall midge +housefly +tsetse fly +blowfly +bluebottle +greenbottle +flesh fly +tachina fly +gadfly +botfly +human botfly +sheep botfly +warble fly +horsefly +bee fly +fruit fly +louse fly +horn fly +mosquito +gnat +fungus gnat +hymenopterous insect +drone +worker +honeybee +Africanized bee +black bee +Carniolan bee +Italian bee +carpenter bee +bumblebee +cuckoo-bumblebee +andrena +Nomia melanderi +leaf-cutting bee +mason bee +potter bee +wasp +vespid +paper wasp +hornet +sphecoid wasp +digger wasp +chalcid fly +sawfly +pharaoh ant +little black ant +army ant +carpenter ant +fire ant +wood ant +slave ant +Formica fusca +slave-making ant +sanguinary ant +bulldog ant +Amazon ant +termite +dry-wood termite +orthopterous insect +short-horned grasshopper +locust +migratory locust +migratory grasshopper +long-horned grasshopper +katydid +mormon cricket +sand cricket +mole cricket +European house cricket +field cricket +tree cricket +snowy tree cricket +phasmid +diapheromera +oriental cockroach +American cockroach +Australian cockroach +German cockroach +giant cockroach +praying mantis +hemipterous insect +leaf bug +mirid bug +lygus bug +lygaeid +coreid bug +heteropterous insect +water bug +water strider +assassin bug +homopterous insect +whitefly +sweet-potato whitefly +coccid insect +scale insect +soft scale +armored scale +mealybug +plant louse +aphid +greenfly +woolly aphid +adelgid +dog-day cicada +seventeen-year locust +spittle insect +plant hopper +psocopterous insect +psocid +booklouse +ephemerid +neuropteron +green lacewing +brown lacewing +odonate +trichopterous insect +caseworm +thysanuran insect +bristletail +thysanopter +thrips +earwig +lepidopterous insect +butterfly +nymphalid +fritillary +emperor butterfly +danaid +pierid +small white +large white +southern cabbage butterfly +blue +copper +American copper +hairstreak +Strymon melinus +moth +tortricid +lymantriid +geometrid +cankerworm +pyralid +tineoid +tineid +clothes moth +gelechiid +grain moth +noctuid moth +cutworm +underwing +hawkmoth +bombycid +saturniid +giant silkworm moth +silkworm +arctiid +lasiocampid +tent caterpillar +webworm +webworm moth +caterpillar +bollworm +woolly bear +larva +grub +pupa +queen +echinoderm +basket star +edible sea urchin +sand dollar +heart urchin +crinoid +trepang +lagomorph +leporid +rabbit +eastern cottontail +swamp rabbit +marsh hare +leveret +European hare +jackrabbit +white-tailed jackrabbit +blacktail jackrabbit +polar hare +snowshoe hare +pika +rodent +mouse +rat +pocket rat +field mouse +brown rat +jerboa rat +water rat +New World mouse +wood mouse +wood rat +vole +packrat +Eurasian hamster +golden hamster +gerbil +lemming +pied lemming +Old World porcupine +brush-tailed porcupine +long-tailed porcupine +New World porcupine +Canada porcupine +pocket mouse +kangaroo rat +jumping mouse +jerboa +dormouse +gopher +squirrel +tree squirrel +ground squirrel +prairie dog +American flying squirrel +groundhog +hoary marmot +yellowbelly marmot +Old World beaver +New World beaver +cavy +naked mole rat +ungulate +hyrax +odd-toed ungulate +equine +horse +foal +colt +male horse +stallion +mare +saddle horse +warhorse +pony +mustang +bronco +wild horse +pony +racehorse +racer +harness horse +workhorse +draft horse +trotting horse +ass +domestic ass +wild ass +onager +common zebra +mountain zebra +grevy's zebra +rhinoceros +tapir +even-toed ungulate +swine +piglet +porker +peccary +ruminant +bovid +bovine +ox +cattle +bull +cow +beef +Brahman +dairy cattle +Old World buffalo +Indian buffalo +carabao +Asian wild ox +American bison +wisent +sheep +lamb +domestic sheep +wild sheep +mountain sheep +goat +domestic goat +wild goat +goat antelope +antelope +Thomson's gazelle +Gazella subgutturosa +springbok +kudu +harnessed antelope +eland +waterbuck +oryx +deer +stag +red deer +mule deer +roe deer +caribou +chevrotain +camel +domestic llama +guanaco +alpaca +giraffe +musteline mammal +ermine +stoat +New World least weasel +Old World least weasel +longtail weasel +American mink +ferret +muishond +snake muishond +striped muishond +river otter +Eurasian otter +striped skunk +hooded skunk +hog-nosed skunk +spotted skunk +American badger +Eurasian badger +ferret badger +hog badger +marten +pachyderm +edentate +peba +apar +tatouay +peludo +giant armadillo +pichiciago +sloth +anteater +primate +ape +anthropoid ape +hominoid +hominid +homo +Homo erectus +Homo sapiens +australopithecine +great ape +western lowland gorilla +eastern lowland gorilla +mountain gorilla +silverback +western chimpanzee +eastern chimpanzee +central chimpanzee +pygmy chimpanzee +lesser ape +monkey +Old World monkey +talapoin +grivet +vervet +green monkey +chacma +mandrill +drill +rhesus +bonnet macaque +Barbary ape +crab-eating macaque +entellus +guereza +New World monkey +true marmoset +pygmy marmoset +tamarin +silky tamarin +pinche +lemur +tarsier +flying lemur +proboscidean +elephant +mammoth +procyonid +raccoon +fish +food fish +young fish +crossopterygian +lungfish +catfish +silurid +bullhead +channel catfish +gadoid +cod +hake +elver +common eel +tuna +moray +conger +teleost fish +clupeid fish +shad +herring +sardine +pilchard +anchovy +salmonid +salmon +Atlantic salmon +trout +brown trout +char +whitefish +smelt +tarpon +ribbonfish +toadfish +needlefish +flying fish +spiny-finned fish +percoid fish +perch +pike-perch +walleye +robalo +pike +pickerel +sunfish +crappie +freshwater bream +black bass +bass +serranid fish +grouper +hind +surfperch +cardinalfish +remora +carangid fish +jack +moonfish +pompano +scad +dolphinfish +characin +cichlid +snapper +grunt +sparid +sea bream +porgy +sciaenid fish +croaker +whiting +sea trout +mullet +goatfish +mullet +silversides +barracuda +sea chub +butterfly fish +damselfish +clown anemone fish +wrasse +blenny +pikeblenny +gunnel +goby +gempylid +scombroid +mackerel +Spanish mackerel +tuna +bonito +sailfish +billfish +marlin +tripletail +mojarra +ganoid +Pacific sturgeon +beluga +scorpaenoid +scorpaenid +scorpionfish +rockfish +lumpfish +greenling +gurnard +sea robin +plectognath +triggerfish +filefish +boxfish +spiny puffer +ocean sunfish +flatfish +righteye flounder +lefteye flounder +whiff +sole +abbey +abbey +abrader +accelerator +accessory +accommodation +acoustic device +acoustic modem +acrylic +action +actuator +adhesive bandage +adjustable wrench +aeolian harp +aerosol +after-shave +airbus +aircraft +airfield +airfoil +air gun +airplane +air pump +air-to-air missile +air-to-ground missile +alarm +alb +alcazar +Allen screw +alms dish +altimeter +Amati +ammeter +ammunition +amplifier +analog computer +analytical balance +anchor +anchor chain +aneroid barometer +angledozer +anklet +antenna +anteroom +antiaircraft +antiballistic missile +apartment +apartment building +aperture +apparatus +apparel +appliance +appliance +applicator +aquarium +arbor +arcade +arch +arc lamp +area +argyle +arm +armament +armature +armchair +armoire +armor +armored vehicle +armor plate +armrest +array +arrow +artificial heart +artillery +assembly +assembly plant +astrodome +astronomical telescope +athletic sock +atom bomb +atomic clock +atomizer +attachment +attack submarine +attire +audiocassette +audio system +audiotape +auditorium +autoclave +autoinjector +autoloader +automat +automat +automatic firearm +automatic rifle +automaton +auxiliary research submarine +awl +ax +axis +axle +axletree +baby bed +baby buggy +baby grand +back +background +backseat +badminton equipment +badminton racket +bag +bag +bag +baggage +bagpipe +bait +balance +balcony +balcony +bale +ball +ball gown +ballistic missile +ballistic pendulum +ball-peen hammer +ballroom +band +bandage +bandanna +banderilla +bar +bar +barbed wire +barge +barge pole +barn door +barograph +barrack +barrage balloon +barrel knot +barrel vault +barrier +barroom +base +base +baseball equipment +basilica +basin +basket +basketball equipment +bass +bass drum +bass horn +bastion +bat +bathhouse +battery +battle-ax +battle dress +battleship +bay rum +bay window +beading plane +beam +beam balance +bearing +beater +beating-reed instrument +bed +bed +bedclothes +bedroom +bedroom furniture +bedspread +bedspring +beehive +beer barrel +bell +bell push +bell tower +belt +belt buckle +bench +berlin +berth +besom +bevel gear +bicycle +bicycle chain +bier +billiard ball +bin +binding +bin liner +binocular microscope +bioscope +birchbark canoe +bird shot +bistro +bit +bit +black tie +blade +blade +blanket +blimp +blind +block +block plane +blouse +blower +blowtorch +bludgeon +boarding +boarding house +boardroom +boat +bobbin +body +body armor +body lotion +boiler +bolt +bolt +bomb +bomber +bongo +boom +boom +boomerang +boot +booth +booth +bore bit +Boston rocker +bota +bottle +bottle opener +bow +bow +bowed stringed instrument +bowl +bowl +bowline +bowling equipment +bowling pin +bowsprit +box +box +boxcar +boxing equipment +brace +brace +bracelet +bracket +brake +brake system +brass +brasserie +brazier +breechcloth +breeches +brewpub +brick +bricklayer's hammer +brickwork +bridal gown +bridge +briefcase +brigandine +brilliant pebble +brim +broad arrow +broadax +broad hatchet +broadsword +brush +bubble jet printer +buffer +buffet +building +building complex +bulldozer +bullet +bullhorn +bullnose +bundle +bunker +burial chamber +burner +bus +business suit +butt joint +button +buttress +butt shaft +buzz bomb +cabaret +caber +cabin +cabin +cabinet +cabinet +cabin liner +cable +cable +cafe +cafeteria +cafeteria tray +caff +cage +calculator +caliper +calorimeter +camera +camera lens +camera tripod +camp +camp +camp chair +camper +can +canal +candelabrum +candlestick +cane +cannikin +cannon +cannon +cannonball +canopy +canteen +canteen +canvas +canvas tent +cap +cap +cap +capacitor +caparison +cape +cap screw +capsule +car +car +carbine +carbon arc lamp +card index +cardioid microphone +car door +cargo liner +cargo ship +carillon +carpenter's hammer +carpenter's level +carpenter's mallet +carpenter's rule +carpet tack +carriage +carriage +carriage bolt +carrick bend +carrier +car seat +cart +cartridge +cartridge belt +cartridge holder +case +case +cashbox +casque +casserole +cassock +catch +catcher's mask +cathedra +cathedral +cathedral +catheter +cathode +cathode-ray tube +cat's-paw +cattle car +cautery +cavalry sword +cedar chest +cell +cell +cellblock +center +centrifuge +ceramic +ceramic ware +chain tongs +chair +chair of state +chalk +chamfer plane +chandlery +chapel +character printer +chassis +chasuble +chatelaine +checker +cheeseboard +chemical reactor +chessman +chest of drawers +child's room +china +chip +chip +chisel +choke +chokey +chordophone +chronoscope +chuck +church key +cigar lighter +circle +circuit +circuit board +circular plane +circular saw +cistern +civilian clothing +clamp +clamshell +clarinet +classroom +clavier +cleaning implement +cleaning pad +clean room +clinic +clip +cloak +clock +closed circuit +closed-circuit television +closet +cloth covering +clothes closet +clothes dryer +clothes hamper +clothes tree +clothing +clothing store +clout nail +clove hitch +clutch +coach +coal car +coal shovel +coat +coat closet +coating +coating +coat of paint +coaxial cable +cocked hat +coffee cup +coffee maker +coffer +coffin +coil +colander +collider +cologne +colonnade +color television +Colt +column +column +comb +comb +combination plane +combine +commissary +commodity +communication system +commutator +compact disk +compartment +compass +compass card +compound lens +compound lever +compressor +computer +computer circuit +computer network +computer screen +computer system +concentration camp +concert grand +concertina +condenser +condenser +condenser microphone +conductor +connecting rod +connection +conservatory +conservatory +contact +container +contrabassoon +control +control panel +control system +convent +converging lens +converter +convertible +conveyance +cooker +cooking utensil +cooler +cooling system +cord +cord +cordage +corner +correctional institution +corset +cosmetic +costume +costume +cotter +cotton +counter +counter +counter +counter tube +country house +coupling +court +court +coverall +covering +cowbarn +craft +cravat +crazy quilt +cream +cream pitcher +crematory +crepe +crib +cricket equipment +croquet equipment +crossbar +crossbow +crosspiece +crown jewels +cruiser +cruiser +cruise ship +crystal microphone +cudgel +cuff +cultivator +cup +cupboard +cupola +curb roof +curtain +cutout +cutter +cutting implement +cybercafe +cyclotron +cylinder +cymbal +dado plane +dagger +damper +dart +data converter +data input device +davenport +davenport +davit +dead axle +deck +deck +deck chair +deep-freeze +defensive structure +delay line +delicatessen +dental appliance +denture +depilatory +depressor +depth finder +derrick +destroyer +detector +detector +detonating fuse +detonator +developer +device +dial +dialyzer +diathermy machine +diesel locomotive +digital camera +digital computer +digital display +diner +dinghy +dining car +dining-hall +dining room +dining-room furniture +dining-room table +dinner dress +dinner pail +dinner table +diode +dip +diplomatic building +dipper +DIP switch +directional antenna +directional microphone +direction finder +disguise +dish +dish +disk +dispenser +display +display panel +distillery +ditch +ditch spade +dive bomber +doll +dolmen +domino +door +doorbell +doorlock +doornail +dormer window +dormitory +dot matrix printer +double-breasted suit +double-reed instrument +douche +dovecote +dovetail plane +downstage +drafting instrument +Dragunov +drawstring bag +dray +dredging bucket +dress +dress blues +dressing +dress uniform +drill +electric drill +drill rig +drinking fountain +drinking vessel +drip mat +drip pot +drive +drive +drogue +drogue parachute +drop-leaf table +dry battery +dry dock +dryer +dry masonry +dry wall +dugout canoe +dumdum +dumpcart +dune buggy +dungeon +duplicator +dustmop +dwelling +earphone +earthenware +easel +easy chair +edge tool +eiderdown +elastic bandage +electrical converter +electrical device +electric bell +electric frying pan +electric furnace +electric heater +electric lamp +electric motor +electric refrigerator +electro-acoustic transducer +electrode +electromagnet +electronic balance +electronic device +electronic equipment +electronic instrument +electronic voltmeter +electron microscope +electrostatic generator +electrostatic printer +elevator +embankment +embellishment +enamel +enamelware +enclosure +endoscope +engine +engine +ensemble +entrenching tool +epidiascope +equipment +eraser +escutcheon +espadrille +espresso shop +establishment +estaminet +exercise device +exhaust fan +exhibition hall +Exocet +expansion bolt +explosive device +external-combustion engine +extractor +fabric +face mask +face veil +facing +factory +fairlead +false face +fan +farm building +farm machine +fastener +fatigues +faucet +feedback circuit +fence +fencing sword +fender +ferry +fetoscope +field-sequential color television +fife +fifth wheel +fighter +figure eight +file +file server +filling +film +film +filter +filter +finery +finisher +fipple flute +fire +firearm +fire iron +fireplace +firkin +fisherman's bend +fisherman's knot +fisherman's lure +fishing boat +fishing rod +fishnet +flag +flageolet +flambeau +flannelette +flap +flashlight +flask +flatcar +flat tip screwdriver +fleet ballistic missile submarine +flight simulator +flip-flop +floating dock +floor +floor +floor cover +fly +flywheel +fob +foghorn +folder +food hamper +footbath +footbridge +foothold +foot rule +footwear +footwear +forceps +fore-and-aft sail +foremast +fore plane +fore-topmast +fork +formalwear +fortification +fortress +foundation garment +foundry +fragmentation bomb +framework +free-reed instrument +freight train +French door +friary +friction clutch +frigate +frill +frock coat +front projector +fruit machine +full-dress uniform +full metal jacket +funny wagon +fur hat +furnace +furnishing +furniture +fuse +gable +gable roof +gaff +galleon +gallery +galley +galley +gallows +galvanometer +gambling house +game +game equipment +gamp +garage +Garand rifle +garden +garden spade +garden tool +garment +gas burner +gas-discharge tube +gasket +gasoline engine +gate +gatehouse +gatepost +gathered skirt +gauge +gauze +gauze +gavel +gear +gear +gear +gearing +general-purpose bomb +generator +generator +Geneva gown +geodesic dome +girder +glass +glider +glove +glyptic art +goal +golf club +golf equipment +Gordian knot +Gothic arch +government building +government office +gown +gramophone +granary +granny knot +grapnel +grapnel +grate +graver +greasy spoon +greatcoat +great hall +greengrocery +grenade +grillroom +groined vault +Guarnerius +guidance system +guided missile +guildhall +guitar +guitar pick +gun +gun carriage +gunlock +gunsight +gun trigger +gurney +gymnastic apparatus +gym shoe +gypsy cab +habergeon +habit +hairdressing +hairpiece +hairpin +half hatchet +half hitch +hall +hall +hammer +hand +handbell +handbow +handcart +hand glass +handloom +hand lotion +hand mower +handsaw +hand shovel +hand tool +handwear +handwheel +hanger +hank +harpsichord +harrow +hash house +hat +hatch +hauberk +hawser bend +hazard +head +head +head covering +headdress +header +headgear +headlight +headsail +headscarf +health spa +heat engine +heater +heat lamp +heat-seeking missile +heavier-than-air craft +heckelphone +hedge +helicopter +helm +helmet +helmet +heraldry +high altar +high-angle gun +high gear +high table +hinge +hip boot +hitch +hoe +hogshead +hoist +holder +holding device +home appliance +homespun +hood +hood +hood +hook +Hoover +hope chest +horn +horn button +horse +horsecar +horse-drawn vehicle +horsehair wig +hosiery +hospital +hospital room +hostel +hot-air balloon +hotel +hotel room +hot tub +house +house +housing +hovel +huarache +humeral veil +hut +hutch +hydraulic brake +hydraulic system +hydroelectric turbine +hydrofoil +hydrometer +hygrometer +hypermarket +hypodermic syringe +ice machine +ice rink +ice skate +icetray +ignition switch +impact printer +implant +implement +imprint +improvised explosive device +inclined plane +indicator +induction coil +ink-jet printer +inkstand +institution +instrument +instrument of punishment +instrument of torture +interceptor +interchange +intercommunication system +intercontinental ballistic missile +interface +interior door +internal-combustion engine +ionization chamber +video iPod +iron +jack +jack +jacket +jacket +jack plane +jail +jamb +jar +jeroboam +jet +jet engine +jewelled headdress +jib +jibboom +jiggermast +joint +jointer +joist +jolly boat +jug +jumper +jumper cable +junction +junction +jury mast +kayak +keel +keg +kerchief +kettle +key +key +keyboard +keyboard instrument +khakis +kiln +kinescope +kingbolt +kirk +kit +kit +kitbag +kitchen +kitchen appliance +kitchen utensil +kite balloon +knee-high +knife +knife +knit +knob +lace +lacquer +ladder truck +lag screw +lamasery +laminate +lamination +lamp +lamp +landing gear +land mine +lantern +lapel +lathe +lattice +launcher +lead-acid battery +leather strip +Leclanche cell +leg +legging +lens +lens implant +level +lever +Levi's +lid +life buoy +life jacket +life preserver +lifting device +ligament +light +light-emitting diode +lighter-than-air craft +lighting +light microscope +linear accelerator +line printer +lingerie +lining +liquid crystal display +lister +living quarters +living room +local area network +lock +locomotive +lodge +lodging house +loft +loft +longbow +lookout +loom +loop knot +lota +lounge +loungewear +love knot +lunchroom +luxury liner +lyre +machine +machine +machine bolt +machine gun +machinery +machine screw +machine tool +magic lantern +magnet +magnetic disk +magnetic recorder +magnetic tape +magnifier +magnum +magnus hitch +mailer +mainframe +mainmast +main-topmast +main yard +makeup +mallet +mallet +mallet +mandolin +manger +man-of-war +manometer +MANPAD +mansard +mansion +marina +marker +marketplace +maser +mask +masonry +mass spectrometer +mast +mast +mat +mat +match +match +match plane +material +materiel +Matthew Walker +maul +measure +measuring instrument +measuring stick +mechanical device +mechanical system +mechanism +medical building +medical instrument +memorial +memory +memory chip +memory device +menhir +man's clothing +mercantile establishment +mercury barometer +mercury thermometer +mercury-vapor lamp +mess +metal screw +meteorological balloon +meter +meterstick +microbalance +microfilm +microscope +military hospital +military quarters +military vehicle +mill +milldam +millinery +mine +minibike +mink +minster +Minuteman +mirror +mixer +mizzenmast +module +mold +moldboard plow +monitor +monitor +morgue +mortise joint +motion-picture camera +motion-picture film +motor +motorboat +motorcycle +motor hotel +motor vehicle +mound +mount +mouse button +movie projector +moving-coil galvanometer +mug +multiplex +multiplexer +musette pipe +mushroom anchor +musical instrument +musket +musket ball +muslin +muzzle loader +narrowbody aircraft +nautilus +navigational system +naval equipment +naval gun +naval radar +naval weaponry +navigational instrument +nebuchadnezzar +neckline +neckpiece +necktie +neckwear +needle +needlework +negligee +net +net +net +net +network +network +night bell +nightwear +noisemaker +nonsmoker +non-volatile storage +nose flute +nuclear reactor +nuclear weapon +nursery +oar +oblique bandage +oboe da caccia +oboe d'amore +obstacle +office +office furniture +oil lamp +oil paint +oil tanker +olive drab +omnidirectional antenna +onion dome +open-air market +open-end wrench +opener +openside plane +ophthalmoscope +optical device +optical disk +optical instrument +optical telescope +organ pipe +outbuilding +outerwear +outfit +outrigger canoe +outside mirror +oven +overgarment +overhand knot +overhang +overhead projector +overnighter +overshoe +oxford +package +packaging +packing box +paddle +paddle steamer +page printer +paint +pallium +pan +pan +panic button +panopticon +panopticon +pantechnicon +pantry +pants suit +panzer +paper chain +paper fastener +parabolic reflector +parapet +parasail +parka +parsonage +particle detector +partition +passenger ship +passenger train +passenger van +passive matrix display +passkey +patch +patchouli +patchwork +patina +patisserie +pavis +peavey +pedal +pedestal table +pedestrian crossing +pedicab +peg +pen +penal institution +pencil +pendulum +pendulum clock +percolator +percussion instrument +perfumery +peripheral +periwig +personal computer +petticoat +Phillips screw +Phillips screwdriver +phonograph record +photographic equipment +photographic paper +photometer +physical pendulum +piano +piccolo +pick +pick +pickle barrel +piece of cloth +pile +pillow lace +pilothouse +pin +pincer +pinstripe +pipe +pipet +pipe wrench +pistol +pivot +place of business +place of worship +planetarium +planner +plant +planter +plasterboard +plastic laminate +plastic wrap +plastron +plate +platform +platform +platform rocker +plating +pleat +plethysmograph +plexor +pliers +plug +plug +pneumatic drill +pocket +pocket-handkerchief +pocketknife +pointed arch +polyester +polygraph +pomade +pontifical +pool ball +poorhouse +porcelain +porch +portable computer +portico +post +posthole digger +pot +potential divider +potpourri +pottery +pouch +poultice +powder +powder keg +power brake +power mower +power saw +power shovel +power tool +press +press +pressure dome +pressure gauge +pressure suit +printed circuit +printer +prison camp +prod +prolonge knot +prompter +prong +propeller +propeller plane +prosthesis +protective covering +protective garment +pruning saw +pruning shears +public house +public toilet +public transport +pull +pull chain +pulley +Pullman +pullover +pulse counter +pump +pump +pump house +punch +punch press +purifier +push broom +push button +pusher +puzzle +pyrometer +pyx +QWERTY keyboard +racing boat +rack +rack +radar +radiogram +radio interferometer +radio link +radiometer +radio receiver +radiotelegraph +radiotelephone +radio transmitter +raft +rail +rail fence +railing +raincoat +rake +ramp +rampart +random-access memory +rayon +razor +reaction-propulsion engine +reactor +reading lamp +reading room +read-only memory +rearview mirror +receiver +receptacle +reception room +recess +reconnaissance plane +recorder +recording +record player +recreation room +recycling bin +reed stop +reef knot +refectory table +refinery +reflecting telescope +reflector +reformatory +refracting telescope +refrigerator car +refuge +regalia +regimentals +regulator +rein +religious residence +removable disk +repair shop +repeating firearm +reproducer +rescue equipment +reservoir +reset button +residence +resistor +resonator +respirator +restraint +retort +rheostat +rib +ribbed vault +riddle +ride +riding boot +riding mower +rifle ball +rig +rink +river boat +road +roadway +robe +rocket +rocket +rod +roller +roller +in-line skate +roller blind +roller coaster +rolling hitch +Rolodex +Roman building +roof +roof +room +roost +rope +rose water +rotary engine +rotating mechanism +rotating shaft +rotisserie +rotor +round arch +router plane +row house +royal mast +rubber bullet +rug +rushlight +sable +sable coat +sack +sackbut +sacking +saddle +safe +safety belt +safety curtain +safety fuse +safety match +sail +sailboat +sailing vessel +salver +sandglass +sash +satellite +satellite television +saucepan +savings bank +saw +sawhorse +scale +scarf +school +scientific instrument +scissors +scoop +scratcher +screen +screen +screen +screw eye +scrub plane +scuffer +sculpture +sea boat +sea chest +seam +seaplane +seat +seat +second hand +secretary +security system +seeker +selector +self-propelled vehicle +semiautomatic firearm +semiautomatic pistol +semiconductor device +serger +serpent +serving cart +serving dish +set +setscrew +setscrew +sewing needle +sextant +shackle +shade +shaft +shag rug +shaker +shaper +shaping tool +sharpener +shaving cream +shaving foam +shawl +shawm +shears +sheath +shed +sheepshank +sheet bend +shelf +shell +shell +shell +shellac +shelter +shelter +shelter +shield +ship +shipboard system +shirt +shirtfront +shock absorber +shoe +shooting brake +shop +short pants +shotgun +shoulder holster +shrine +shutter +shuttle +sidewinder +sieve +sifter +sights +signaling device +signboard +silk +simulator +single bed +single-breasted suit +single-reed instrument +sitz bath +six-pack +skate +skein +skeleton +skewer +skidder +skid lid +skiff +ski pole +skirt +ski tow +skullcap +slack suit +slat +sled +sleeper +sleeping car +sleeve +sleeve +slide projector +slipknot +slipper +sloop +slop pail +slot machine +small boat +smart bomb +smoker +smooth plane +snack bar +snap-brim hat +snare drum +sniper rifle +Sno-cat +soapbox +socle +sofa +sonograph +sorter +sound recording +soup ladle +source of illumination +soutane +spacecraft +spade +spar +spatula +spear +spear +spectacles +spectrograph +spectroscope +speedometer +spider +spike +spike +spinet +spinning machine +spiral ratchet screwdriver +spiral spring +spit +spokeshave +sponge mop +spoon +sports equipment +sports implement +sportswear +spot +spring +spring balance +springboard +sprit +square +square knot +squash racket +squawk box +squeezer +squinch +stabilizer +stabilizer +stable gear +stadium +stall +stamp mill +stand +standard cell +staple +starter +state prison +station +statue +stay +steakhouse +stealth aircraft +stealth bomber +stealth fighter +steam bath +steamboat +steamer +steam iron +steam whistle +steel mill +steelyard +steeple +steering system +step +step-up transformer +stereo +stick +stick +still +stilt +Stinger +stock +stockcar +stock car +stocking +stonework +stool +stopper knot +storage battery +storage space +storeroom +stove +stove bolt +Stradavarius +straight chair +strap +strap +stringed instrument +strip +strongbox +stronghold +strongroom +structural member +structure +stylus +submachine gun +submersible +submersible +subwoofer +suction pump +suede cloth +sunbonnet +sunhat +supermarket +superstructure +supply chamber +support +support +support column +supporting structure +supporting tower +surface lift +surface-to-air missile +surgeon's knot +surgical instrument +surgical knife +surplice +surveillance system +surveying instrument +surveyor's level +swamp buggy +sweater +swimsuit +sword +synchrotron +system +tabi +table +table +table knife +tableware +tabor +tachometer +tack +tack hammer +talaria +tambour +tambourine +tampon +tank +tank car +tannoy +tape +tape deck +tape recorder +target +tavern +tea chest +teaching aid +tea gown +teashop +teaspoon +tea-strainer +tea tray +telecommunication system +telephone +telephone line +telephone receiver +telephone system +telephone wire +telescope +television antenna +television camera +television equipment +television monitor +temple +temple +tender +tennis racket +tenor drum +tenoroon +tenpenny nail +tent +tenterhook +terminal +terminal +test rocket +tetraskelion +textile machine +textile mill +theater +theodolite +thermometer +thermostat +three-piece suit +three-way switch +thumbscrew +thumbtack +tights +tile +timber +timber hitch +timbrel +time-fuse +timepiece +timer +time-switch +tire chain +tithe barn +toecap +toga +toggle switch +toilet +toilet powder +toiletry +toilet water +token +tomograph +toner +tongs +tool +toolbox +tooth +toothbrush +top +top +topgallant +topmast +topsail +torpedo +torpedo boat +touch screen +towel +toweling +tower +toy box +track +tracked vehicle +trailer +trailer +train +trammel +transdermal patch +transformer +transistor +transmission +transmitter +transporter +trap +trapeze +travel iron +treasure chest +trellis +trench +trial balloon +triclinium +troop carrier +trough +trouser +trowel +truck +trunk +try square +tube +tuck shop +tun +tunic +turbine +Turkish towel +Turk's head +turner +turntable +turtleneck +tweed +tweeter +twenty-two +two-piece +typesetting machine +typewriter +ultraviolet lamp +undercarriage +undergarment +underpants +underwear +uneven parallel bars +uniform +university +uplift +urn +urn +utensil +vacuum flask +valve +van +van +varnish +vehicle +veranda +vertical file +vessel +vessel +vest +vibrator +vibrator +videocassette +video recording +vigil light +viol +vise +vivarium +voltaic cell +voltmeter +wagon +waist pack +walking stick +wall +wall +wall unit +ward +warehouse +warship +wash +washer +washtub +watch +watchtower +water-base paint +water butt +water cart +watercolor +water-cooled reactor +water gauge +water ski +waterwheel +weapon +weaponry +weatherglass +weathervane +web +wedge +wedge +weighbridge +weight +weir +weld +well +whaler +wheel +wheelchair +wheeled vehicle +wheelwork +whetstone +whip +whisk +whispering gallery +white goods +whorehouse +wicker basket +widebody aircraft +winch +Winchester +wind instrument +window +window +window blind +window envelope +Windsor knot +wine bucket +wine cask +wineglass +wire +wire +wire matrix printer +wiring +woman's clothing +wood +woodenware +woodscrew +woodwind +woofer +workbasket +workbench +work-clothing +worktable +workwear +wrapping +wrench +writing desk +writing implement +X-ray film +X-ray machine +yacht chair +yard +yard +yardstick +yoke +zither +zoot suit +grain +light +colorlessness +chromatic color +black +gray +dark red +orange +yellow +green +blue +purple +reddish purple +pink +light brown +reddish brown +complexion +skin +epidermal cell +columnar cell +macule +specimen +milk +embryonic cell +leukocyte +neutrophil +astrocyte +exoskeleton +medium +film +press +print media +storage medium +journalism +photojournalism +newspaper +telecommunication +telephone +call +long distance +wireless +broadcasting +television +reception +chat room +portal site +wordbook +album +concept album +magazine +movie +sign +comestible +course +dainty +dish +fare +diet +dietary supplement +liquid diet +reducing diet +vegetarianism +ration +field ration +foodstuff +starches +concentrate +meal +roughage +flour +wheat flour +nutriment +commissariat +canned food +canned meat +meal +breakfast +lunch +dinner +supper +buffet +picnic +cookout +bite +entree +side dish +casserole +chicken casserole +appetizer +cocktail +hors d'oeuvre +relish +dip +soup +madrilene +broth +broth +chowder +clam chowder +stew +goulash +fish stew +fricassee +ragout +ready-mix +powdered sugar +granulated sugar +brown sugar +sweet +confiture +candy +hard candy +patty +brittle +chewing gum +candied fruit +candied citrus peel +fudge +gumdrop +mint +kiss +lozenge +taffy +dessert +dumpling +frozen dessert +mousse +mousse +whip +pudding +pudding +tipsy cake +ice +chocolate ice cream +Neapolitan ice cream +peach ice cream +strawberry ice cream +tutti-frutti +vanilla ice cream +split +pudding +custard +pastry +turnover +puff paste +phyllo +fish cake +conserve +jam +jelly +apple jelly +marmalade +gelatin +gelatin dessert +patty +stuffing +bread +breadstick +bun +cracker +dark bread +flatbread +loaf of bread +quick bread +rye bread +toast +white bread +French bread +cornbread +johnnycake +muffin +scone +onion roll +sweet roll +onion bagel +biscuit +baking-powder biscuit +soft pretzel +sandwich +hamburger +gruel +edible fruit +vegetable +crudites +legume +greens +solanaceous vegetable +root vegetable +potato +baked potato +sweet potato +snack food +corn chip +tortilla chip +cruciferous vegetable +cabbage +kale +red cabbage +savoy cabbage +squash +summer squash +yellow squash +winter squash +turban squash +gherkin +sprout +beet +pepper +sweet pepper +hot pepper +chili +jalapeno +onion +Spanish onion +salad green +lettuce +butterhead lettuce +bean +pea +green pea +common bean +fresh bean +green bean +shell bean +lima bean +soy +celery +chicory +coffee substitute +chicory escarole +corn +hominy +cress +tomato +cherry tomato +salsify +turnip +edible nut +apple +eating apple +Delicious +McIntosh +Pippin +cooking apple +berry +currant +citrus +temple orange +mandarin +bitter orange +sweet orange +Jaffa orange +navel orange +Valencia orange +lime +almond +plum +dried fruit +raisin +passion fruit +cocoa +melon +muskmelon +winter melon +cherry +sweet cherry +heart cherry +sour cherry +grape +fox grape +muscadine +slipskin grape +vinifera grape +Tokay +cherimoya +soursop +sweetsop +ilama +pond apple +olive +pear +edible seed +walnut +feed +fodder +oil cake +timothy +grain +barley +wheat +rice +mash +bird feed +petfood +salad +tossed salad +combination salad +pasta salad +fruit salad +ingredient +flavorer +condiment +herb +spice +cinnamon +pepper +garlic +mustard +sage +savory +curry +paprika +pickle +sweet pickle +vinegar +sauce +hot sauce +dressing +mayonnaise +cheese sauce +hot-fudge sauce +white sauce +spaghetti sauce +boiled egg +hard-boiled egg +Easter egg +omelet +firm omelet +souffle +dairy product +milk +milk +powdered milk +cream +butter +clarified butter +yogurt +curd +cheese +cream cheese +bleu +cheddar +Swiss cheese +spread +pate +sweetening +sugar +syrup +batter +bread dough +chicken and rice +pasta +Tetrazzini +chili dog +fondue +fondue +hash +kabob +seafood Newburg +meatball +pilaf +sausage pizza +pepperoni pizza +cheese pizza +anchovy pizza +Sicilian pizza +porridge +fish loaf +salmon loaf +scallopine +taco +beef burrito +quesadilla +tostada +beverage +concoction +mix +filling +potion +elixir +alcohol +brew +beer +lager +Weissbier +malt +ale +stout +mead +wine +white wine +sparkling wine +Burgundy +Beaujolais +Medoc +Pinot noir +Bordeaux +claret +Chianti +Cabernet +Merlot +dessert wine +Rhine wine +Rioja +Saint Emilion +zinfandel +table wine +vermouth +fortified wine +Madeira +liquor +brandy +gin +rum +whiskey +corn whiskey +Irish +Scotch +liqueur +coffee liqueur +orange liqueur +mixed drink +cocktail +highball +Bloody Mary +daiquiri +manhattan +martini +sling +sour +caffe latte +cider +sweet cider +juice +fruit juice +grape juice +orange juice +fruit drink +mulled wine +soft drink +cola +coffee +punch +champagne cup +claret cup +rickey +tea +tea +herb tea +tisane +black tea +green tea +water +drinking water +mineral water +vitamin pill +collection +suburb +residence +littoral +grassland +pasture +resort +field +air bubble +arroyo +ascent +atoll +bank +bank +bar +barrier reef +basin +beach +burrow +canyon +cave +continental glacier +crag +crater +dale +descent +draw +dune +geological formation +glacier +glen +gorge +gulch +gully +highland +hill +hillside +hole +hollow +iceberg +ice mass +ion +knoll +landfall +landfill +lather +ledge +lowland +meteorite +mountain +mull +natural depression +natural elevation +nullah +ocean floor +outcrop +plain +point +precipice +ravine +reef +ridge +ridge +rift valley +rock +sandbank +seaside +shiner +shore +slope +soapsuds +spume +tableland +tideland +volcanic crater +wadi +spiritual leader +adventurer +anomaly +benefactor +commoner +contestant +discussant +entertainer +female +finisher +inhabitant +native +juvenile +lover +male +mediator +national +peer +recipient +sensualist +traveler +unwelcome person +unskilled person +worker +wrongdoer +Black +White +Semite +white man +Mongol +Nahuatl +Caddo +Penutian +Teton +Taracahitian +Slav +Catholic +Altaic +Bornean +Canadian +Central American +Britisher +English person +Englishwoman +Ethiopian +Parisian +Greek +Italian +Japanese +Mexican +Nigerian +North American +Pakistani +South American Indian +Filipino +Polynesian +Scandinavian +South African +South American +Turki +American +New Yorker +abbess +abstainer +academic administrator +accomplice +acquaintance +acquirer +aerialist +actor +actor +addict +adjutant +admirer +adulterer +advertiser +advocate +analyst +ancestor +announcer +announcer +appointee +appreciator +appropriator +archbishop +architect +army engineer +army officer +arrival +articulator +asserter +assistant +associate +astronaut +athlete +attendant +aunt +authoritarian +authority +aviator +back +bad person +ballet dancer +bullfighter +baron +bartender +baseball coach +base runner +basketball player +believer +betrothed +bigot +big shot +biochemist +bisexual +boatman +bond servant +botanist +Boy Scout +buddy +campaigner +captain +card player +careerist +caretaker +cavalryman +celebrity +charmer +child +child +cipher +citizen +civil rights leader +cleaner +clergyman +cleric +clerk +climber +closer +clown +coach +cobbler +collaborator +college student +collegian +commanding officer +commissioned officer +commissioned military officer +commissioner +committee member +communist +compulsive +computer scientist +computer user +contractor +convict +copycat +counselor +craftsman +creditor +critic +curate +dancer +dancer +darling +date +daughter +dawdler +deacon +deaf person +debtor +deliveryman +descender +designated hitter +detective +detractor +director +disbeliever +dispatcher +distributor +doctor +domestic partner +draftsman +drinker +drinker +drug addict +drug user +drummer +drunkard +eager beaver +earner +eavesdropper +economist +editor +egotist +elder +elected official +emissary +employee +employer +endomorph +enemy +entrant +examiner +exhibitionist +fan +fancier +farmer +farmhand +fascist +father +female aristocrat +female offspring +female child +fielder +fireman +first baseman +first sergeant +flag officer +flatterer +foe +folk dancer +follower +football player +forefather +forger +founder +free agent +friar +monk +gambler +generator +geneticist +genitor +geologist +girl +godchild +godparent +golfer +grandma +grandmaster +grandparent +granter +great grandchild +great grandparent +grouch +guard +guest +guide +gymnast +Gypsy +hack +hairdresser +hater +headmaster +hearer +hedonist +heir +herder +homeless +horseman +host +host +hypocrite +important person +incumbent +infielder +informer +in-law +insurgent +investigator +investor +journalist +judge +juror +Counsel to the Crown +kinswoman +laborer +lama +landowner +lawgiver +lawman +lawyer +liberator +lieutenant +lineman +literate +litigant +Lord +failure +lowerclassman +lumberman +maid +maker +malcontent +martinet +master of ceremonies +masturbator +medical officer +medical practitioner +medical scientist +mender +meteorologist +middle-aged man +miler +military attache +military officer +military policeman +minister +minor leaguer +misfit +mixed-blood +model +moneymaker +mother +mourner +mover +musician +Muslimah +mystic +nanny +neonate +nephew +neutral +newcomer +newcomer +newspaper editor +niece +noncommissioned officer +nurse +observer +occultist +oldster +old woman +opportunist +orator +originator +outfielder +right fielder +right-handed pitcher +painter +panelist +pardoner +parodist +party +passenger +patient +patron +payer +peddler +percussionist +personal representative +personification +pervert +petitioner +Pharaoh +phonetician +physical therapist +physicist +pimp +pisser +pitcher +planner +player +poet +politician +practitioner +prayer +preserver +president +priest +princess +principal +proctor +programmer +promiser +propagandist +prosecutor +psychic +pusher +queen +queen +ranch hand +reader +recruit +recruiter +religious leader +repairman +reporter +representative +reprobate +rescuer +reservist +restrainer +retailer +retiree +revolutionist +rich person +civil authority +runner +running back +rustic +saboteur +sailor +salesman +salesperson +scalper +schemer +scholar +schoolchild +scientist +second baseman +secretary +seeker +selfish person +seller +serf +serviceman +settler +shrew +sibling +sick person +singer +sister +skeptic +skier +sleeper +slob +smith +snoop +social climber +socialist +social scientist +sociologist +soldier +son +songster +sorcerer +sovereign +speaker +specialist +spectator +stand-in +star +stepparent +stock trader +stranger +strategist +student +subordinate +suitor +superior +surgeon +sweetheart +sympathizer +tax assessor +taxonomist +teacher +television reporter +tenant +tenant +tennis player +testator +testee +theologian +therapist +thinker +thrower +toastmaster +trader +traffic cop +trainer +traitor +traveling salesman +tyrant +upstart +upstart +utility man +vacationer +vegetarian +vice president +victim +volunteer +votary +waiter +waitress +wanderer +wanton +washer +white supremacist +wife +winner +winner +woman +workman +worshiper +wright +writer +wilding +bryophyte +liverwort +pteridophyte +fern +fern ally +spore +spermatophyte +perennial +gymnosperm +ephedra +cycad +sago palm +zamia +pine +pinon +nut pine +white pine +yellow pine +larch +fir +silver fir +cedar +spruce +hemlock +douglas fir +cedar +cypress +arborvitae +araucaria +kauri pine +celery pine +yellowwood +gymnospermous yellowwood +yew +angiosperm +dicot +flower +wildflower +inflorescence +pistil +pericarp +oilseed +custard apple +barberry +allspice +laurel +anise tree +magnolia +moonseed +buttercup +aconite +baneberry +anemone +thimbleweed +columbine +clematis +delphinium +nigella +wax myrtle +zebrawood +legume +legume +darling pea +clover +acacia +wattle +albizzia +nitta tree +dogbane +allamanda +carissa +frangipani +rauwolfia +arum +alocasia +anthurium +caladium +monstera +nephthytis +arrow arum +calla lily +duckweed +watermeal +birthwort +sandwort +mouse-ear chickweed +pink +china pink +lychnis +silene +chickweed +fig marigold +amaranth +orach +saltbush +beet +sand verbena +four o'clock +echinocactus +prickly pear +pokeweed +portulaca +flame flower +caper +spiderflower +crucifer +cress +watercress +rock cress +cabbage +head cabbage +turnip plant +mustard +wallflower +woad +stock +radish plant +pennycress +poppy +prickly poppy +composite +compass plant +everlasting +achillea +ageratum +ragweed +ammobium +burdock +artemisia +mugwort +aster +wood aster +common daisy +bur marigold +calendula +thistle +carline thistle +catananche +centaury +knapweed +chrysanthemum +golden aster +goldenbush +plume thistle +woolly thistle +coreopsis +fleabane +woolly sunflower +cotton rose +gazania +African daisy +cudweed +gumweed +goldenbush +sneezeweed +sunflower +hawkweed +marsh elder +krigia +hawkbit +blazing star +rattlesnake root +daisybush +coneflower +coneflower +cutleaved coneflower +golden thistle +white-topped aster +goldenrod +sow thistle +marigold +dandelion +crownbeard +zinnia +achene +campanula +orchid +orchis +arethusa +helleborine +coral root +lady's slipper +large yellow lady's slipper +helleborine +fringed orchis +rein orchid +spider orchid +moth orchid +butterfly orchid +ladies' tresses +vanda +vanilla +yam +primrose +pimpernel +featherfoil +loosestrife +water pimpernel +gramineous plant +grass +wheatgrass +foxtail +broom grass +oat +brome +grama +reed grass +burgrass +crabgrass +lyme grass +wild rye +plume grass +rye grass +ricegrass +meadowgrass +millet +reed +sorghum +grain sorghum +cordgrass +cereal +wheat +corn +mealie +zoysia +bamboo +cotton grass +spike rush +pandanus +cattail +grain +kernel +gourd +gourd +squash +summer squash +marrow +winter squash +turban squash +bryony +sweet melon +luffa +lobelia +mallow +hollyhock +althea +poppy mallow +seashore mallow +globe mallow +tulipwood tree +sterculia +bottle-tree +screw tree +cacao +linden +herb +protea +banksia +grevillea +macadamia +casuarina +beefwood +heath +bearberry +huckleberry +kalmia +rhododendron +cranberry +blueberry +shortia +Australian heath +epacris +wintergreen +pipsissewa +beech +chestnut +tanbark oak +southern beech +New Zealand beech +oak +live oak +white oak +red oak +scrub oak +chestnut oak +birch +alder +hornbeam +hop hornbeam +hazelnut +centaury +gentian +fringed gentian +olive tree +fringe tree +ash +red ash +jasmine +privet +lilac +liquidambar +walnut +hickory +wing nut +loosestrife +myrtle +gum tree +eucalyptus +flooded gum +mallee +stringybark +tupelo +enchanter's nightshade +willowherb +fuchsia +evening primrose +daphne +canna +banana +ginger +begonia +tuberous begonia +poon +St John's wort +rockrose +dipterocarp +candlewood +reseda +viola +violet +nettle +cannabis +mulberry +fig tree +fig +elm +hackberry +iridaceous plant +bearded iris +beardless iris +crocus +amaryllis +blood lily +narcissus +daffodil +liliaceous plant +colicroot +alliaceous plant +kniphofia +poker plant +asphodel +mariposa +globe lily +camas +dogtooth violet +fritillary +tulip +star-of-Bethlehem +grape hyacinth +scilla +false asphodel +bog asphodel +hellebore +death camas +sarsaparilla +Solomon's-seal +bellwort +agave +sansevieria +cassia +locust tree +senna +angelim +milk vetch +wild indigo +pea tree +glory pea +rosewood +blackwood +tick trefoil +coral tree +vetchling +wild pea +lupine +medic +mucuna +locoweed +pole bean +pea +edible-pod pea +quira +hoary pea +bush pea +vetch +palm +sago palm +feather palm +fan palm +palmetto +areca +calamus +oil palm +raffia palm +lady palm +eriogonum +rhubarb +water plantain +waterweed +pondweed +rose +agrimonia +flowering quince +cotoneaster +avens +apple tree +wild apple +crab apple +Iowa crab +cinquefoil +plum +wild plum +bullace +apricot +cherry +wild cherry +sweet cherry +sour cherry +almond tree +almond +bird cherry +flowering cherry +chokecherry +fruit tree +bramble bush +raspberry +mountain ash +service tree +spirea +madderwort +coffee +cinchona +bedstraw +genipa +hamelia +honeysuckle +American fly honeysuckle +teasel +scabious +geranium +cranesbill +storksbill +incense tree +mahogany +silver ash +milkwort +citrus +orange +mandarin +lemon +kumquat +prickly ash +bitterwood tree +ailanthus +nasturtium +willow +osier +sallow +poplar +black poplar +cottonwood +aspen +soapberry +soapberry vine +harpullia +pachysandra +spindle tree +maple +box elder +holly +sumac +horse chestnut +persimmon +buckthorn +styrax +carnivorous plant +pitcher plant +sedum +philadelphus +saxifrage +astilbe +alumroot +miterwort +parnassia +currant +plane tree +phlox +acanthus +catalpa +anchusa +comfrey +convolvulus +bindweed +gloxinia +streptocarpus +waterleaf +nemophila +scorpionweed +giant hyssop +bugle +wood mint +calamint +coleus +dead nettle +origanum +horehound +monarda +savory +germander +thyme +blue curls +snapdragon +kitten-tails +Indian paintbrush +foxglove +toadflax +veronica +nightshade +thorn apple +matrimony vine +cupflower +petunia +salpiglossis +spurge +croton +cassava +slipper spurge +camellia +umbellifer +angelica +astrantia +caraway +fennel +parsnip +parsley +sanicle +dogwood +valerian +bristle fern +flowering fern +climbing fern +clover fern +adder's tongue +grape fern +ergot +sclerotinia +earthball +Podaxaceae +false truffle +rhizopus +slime mold +cellular slime mold +downy mildew +pythium +Sarcosomataceae +club fungus +lichen +lecanora +fungus +basidiomycete +mushroom +mushroom +mushroom +toadstool +horse mushroom +meadow mushroom +royal agaric +false deathcap +fly agaric +death cap +blushing mushroom +destroying angel +chanterelle +floccose chanterelle +pig's ears +cinnabar chanterelle +jack-o-lantern fungus +inky cap +shaggymane +milkcap +fairy-ring mushroom +oyster mushroom +olive-tree agaric +Pholiota astragalina +Pholiota aurea +Pholiota destruens +Pholiota flammans +Pholiota flavida +nameko +Pholiota squarrosa-adiposa +Pholiota squarrosa +Pholiota squarrosoides +Stropharia ambigua +Stropharia hornemannii +Stropharia rugoso-annulata +Entoloma lividum +Entoloma aprile +Chlorophyllum molybdites +lepiota +parasol mushroom +poisonous parasol +Lepiota naucina +Lepiota rhacodes +American parasol +Lepiota rubrotincta +Lepiota clypeolaria +onion stem +blewits +sandy mushroom +Tricholoma pessundatum +Tricholoma sejunctum +man-on-a-horse +Tricholoma venenata +Tricholoma pardinum +Tricholoma vaccinum +Tricholoma aurantium +Pluteus aurantiorugosus +Pluteus magnus +deer mushroom +straw mushroom +Volvariella bombycina +Clitocybe clavipes +Clitocybe dealbata +Clitocybe inornata +Clitocybe robusta +Clitocybe irina +Clitocybe subconnexa +winter mushroom +mycelium +ascomycete +Clavicipitaceae +yeast +discomycete +morel +Verpa +false morel +lorchel +helvella +Gyromitra californica +Gyromitra sphaerospora +Gyromitra esculenta +Gyromitra infula +Gyromitra gigas +gasteromycete +common stinkhorn +Phallus ravenelii +dog stinkhorn +stinky squid +puffball +Geastrum coronatum +Astreus pteridis +Astreus hygrometricus +polypore +Boletus chrysenteron +Boletus edulis +Frost's bolete +Boletus luridus +Boletus mirabilis +Boletus pallidus +Boletus pulcherrimus +Boletus pulverulentus +Boletus roxanae +Boletus subvelutipes +Boletus variipes +Boletus zelleri +Fuscoboletinus paluster +Fuscoboletinus serotinus +Leccinum fibrillosum +Suillus albivelatus +old-man-of-the-woods +Boletellus russellii +jelly fungus +rust +smut +cornsmut +flag smut fungus +waxycap +Hygrocybe acutoconica +Hygrophorus borealis +Hygrophorus caeruleus +Hygrophorus inocybiformis +Hygrophorus kauffmanii +Hygrophorus marzuolus +Hygrophorus purpurascens +Hygrophorus russula +Hygrophorus sordidus +Hygrophorus tennesseensis +Hygrophorus turundus +Neohygrophorus angelesianus +Cortinarius armillatus +Cortinarius atkinsonianus +Cortinarius corrugatus +Cortinarius gentilis +Cortinarius mutabilis +Cortinarius semisanguineus +Cortinarius subfoetidus +Cortinarius violaceus +Gymnopilus spectabilis +Gymnopilus validipes +Gymnopilus ventricosus +mold +mildew +candida +houseplant +succulent +weed +sporophyll +sporangium +poisonous plant +vine +tree +bean tree +gymnospermous tree +conifer +angiospermous tree +nut tree +spice tree +bonsai +subshrub +bramble +liana +desert plant +marsh plant +strangler +root +receptacle +scape +peduncle +flower cluster +raceme +cyme +bulbous plant +fruit +seed +bean +nut +berry +aggregate fruit +drupe +drupelet +pome +pod +husk +buckthorn +vinifera +true pepper +peperomia +bract +palmate leaf +pinnate leaf +dentate leaf +branchlet +polypody +strap fern +staghorn fern +spleenwort +chain fern +davallia +hare's-foot fern +shield fern +wood fern +lady fern +bladder fern +holly fern +woodsia +maidenhair +brittle maidenhair +lip fern +cliff brake +horsetail +club moss +spikemoss +beech fern +shoestring fungus +Armillaria caligata +Armillaria ponderosa +Armillaria zelleri +honey mushroom +milkweed +stapelia +stephanotis +orangery +figure +plane figure +solid figure +line +convex shape +concave shape +cylinder +round shape +polygon +concave polygon +amorphous shape +closed curve +simple closed curve +cone +circle +ring +loop +ellipse +triangle +spherical polygon +angular distance +groove +bulge +bow +balance +toroid +boundary +incisure +notch +wrinkle +tree +regular polyhedron +carbon +rock +soil +high explosive +culture medium +agar +paper +paving +plaster +stucco +tear gas +vitamin +fat-soluble vitamin +water-soluble vitamin +vitamin A +B-complex vitamin +vitamin E +vitamin K \ No newline at end of file diff --git a/workloads/realworld/pipeline/darknet/cfg/imagenet1k.data b/workloads/realworld/pipeline/darknet/cfg/imagenet1k.data new file mode 100644 index 0000000000000000000000000000000000000000..daf120a3c020003e8ed08096c51304272ca3ba27 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/imagenet1k.data @@ -0,0 +1,9 @@ +classes=1000 +train = /data/darknet/imagenet_mini/valid.list +valid = /data/darknet/imagenet_mini/valid.list +test = /data/darknet/imagenet_mini/valid.list +backup = /data/darknet/backup/ +labels = /data/darknet/imagenet_mini/imagenet.labels.list +names = /data/darknet/imagenet_mini/imagenet.shortnames.list +top=5 + diff --git a/workloads/realworld/pipeline/darknet/cfg/imagenet22k.dataset b/workloads/realworld/pipeline/darknet/cfg/imagenet22k.dataset new file mode 100644 index 0000000000000000000000000000000000000000..e25ef007ecceb096e5846ee7cacd1fd54fb8f9e4 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/imagenet22k.dataset @@ -0,0 +1,9 @@ +classes=21842 +train = /data/imagenet/imagenet22k.train.list +valid = /data/imagenet/imagenet22k.valid.list +#valid = /data/imagenet/imagenet1k.valid.list +backup = /home/pjreddie/backup/ +labels = data/imagenet.labels.list +names = data/imagenet.shortnames.list +top = 5 + diff --git a/workloads/realworld/pipeline/darknet/cfg/imagenet9k.hierarchy.dataset b/workloads/realworld/pipeline/darknet/cfg/imagenet9k.hierarchy.dataset new file mode 100644 index 0000000000000000000000000000000000000000..41fb71b065544b919bc8ed7d723afb5d04ad85ac --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/imagenet9k.hierarchy.dataset @@ -0,0 +1,9 @@ +classes=9418 +train = data/9k.train.list +valid = /data/imagenet/imagenet1k.valid.list +leaves = data/imagenet1k.labels +backup = /home/pjreddie/backup/ +labels = data/9k.labels +names = data/9k.names +top=5 + diff --git a/workloads/realworld/pipeline/darknet/cfg/jnet-conv.cfg b/workloads/realworld/pipeline/darknet/cfg/jnet-conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..056f82aa6e2a0710a664c4740ab763961e4de33d --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/jnet-conv.cfg @@ -0,0 +1,118 @@ +[net] +batch=1 +subdivisions=1 +height=10 +width=10 +channels=3 +learning_rate=0.01 +momentum=0.9 +decay=0.0005 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + diff --git a/workloads/realworld/pipeline/darknet/cfg/openimages.data b/workloads/realworld/pipeline/darknet/cfg/openimages.data new file mode 100644 index 0000000000000000000000000000000000000000..fa80e5ab7d8576d391c7cac9dfc8367aab704139 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/openimages.data @@ -0,0 +1,8 @@ +classes= 601 +train = /home/pjreddie/data/openimsv4/openimages.train.list +#valid = coco_testdev +valid = data/coco_val_5k.list +names = data/openimages.names +backup = /home/pjreddie/backup/ +eval=coco + diff --git a/workloads/realworld/pipeline/darknet/cfg/resnet101.cfg b/workloads/realworld/pipeline/darknet/cfg/resnet101.cfg new file mode 100644 index 0000000000000000000000000000000000000000..de458820bcd35f5e65d858f9f661e42653ed0184 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/resnet101.cfg @@ -0,0 +1,990 @@ +[net] +# Training +# batch=128 +# subdivisions=2 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + +[cost] +type=sse + diff --git a/workloads/realworld/pipeline/darknet/cfg/resnet152.cfg b/workloads/realworld/pipeline/darknet/cfg/resnet152.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e8e3297ac2364b95f28fa0a0bdd4ca71f14ac82c --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/resnet152.cfg @@ -0,0 +1,1460 @@ +[net] +# Training +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/resnet18.cfg b/workloads/realworld/pipeline/darknet/cfg/resnet18.cfg new file mode 100644 index 0000000000000000000000000000000000000000..275f4bdb5962d77c16f353cd3d2751e189b9344c --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/resnet18.cfg @@ -0,0 +1,228 @@ +[net] +# Training +# batch=128 +# subdivisions=1 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/resnet18_b.cfg b/workloads/realworld/pipeline/darknet/cfg/resnet18_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c0712e9cdfaf1f28dbe3a358355405d2758e6d2b --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/resnet18_b.cfg @@ -0,0 +1,228 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/resnet18_t.cfg b/workloads/realworld/pipeline/darknet/cfg/resnet18_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c0712e9cdfaf1f28dbe3a358355405d2758e6d2b --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/resnet18_t.cfg @@ -0,0 +1,228 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/resnet34.cfg b/workloads/realworld/pipeline/darknet/cfg/resnet34.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9f68f096741ae3b4898f40b76af7569d4697729f --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/resnet34.cfg @@ -0,0 +1,392 @@ +[net] +# Training +# batch=128 +# subdivisions=2 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/resnet50.cfg b/workloads/realworld/pipeline/darknet/cfg/resnet50.cfg new file mode 100644 index 0000000000000000000000000000000000000000..d0d7c511516e997a392bb5ba77682740c0494972 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/resnet50.cfg @@ -0,0 +1,510 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/pipeline/darknet/cfg/resnet50_b.cfg b/workloads/realworld/pipeline/darknet/cfg/resnet50_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..462479918fd608c4a0761b84c7299e51081193c6 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/resnet50_b.cfg @@ -0,0 +1,510 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/pipeline/darknet/cfg/resnet50_t.cfg b/workloads/realworld/pipeline/darknet/cfg/resnet50_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..462479918fd608c4a0761b84c7299e51081193c6 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/resnet50_t.cfg @@ -0,0 +1,510 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/pipeline/darknet/cfg/resnext101-32x4d.cfg b/workloads/realworld/pipeline/darknet/cfg/resnext101-32x4d.cfg new file mode 100644 index 0000000000000000000000000000000000000000..8538ccc3daee2e3de589eb4e2edf868340d4924b --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/resnext101-32x4d.cfg @@ -0,0 +1,1053 @@ +[net] +# Training +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/resnext152-32x4d.cfg b/workloads/realworld/pipeline/darknet/cfg/resnext152-32x4d.cfg new file mode 100644 index 0000000000000000000000000000000000000000..48279fd28eb0dbe23c7b0c593f67051cb6a62374 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/resnext152-32x4d.cfg @@ -0,0 +1,1562 @@ +[net] +# Training +# batch=128 +# subdivisions=16 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/resnext50.cfg b/workloads/realworld/pipeline/darknet/cfg/resnext50.cfg new file mode 100644 index 0000000000000000000000000000000000000000..12aebdf6fbd48bde40ee22c4257e06f2e0cf46eb --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/resnext50.cfg @@ -0,0 +1,523 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/pipeline/darknet/cfg/rnn.cfg b/workloads/realworld/pipeline/darknet/cfg/rnn.cfg new file mode 100644 index 0000000000000000000000000000000000000000..61b202f3a441b701f76d9b007c6276467c639e11 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/rnn.cfg @@ -0,0 +1,38 @@ +[net] +subdivisions=1 +inputs=256 +batch = 1 +momentum=0.9 +decay=0.001 +max_batches = 2000 +time_steps=1 +learning_rate=0.1 +policy=steps +steps=1000,1500 +scales=.1,.1 + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[connected] +output=256 +activation=leaky + +[softmax] + + diff --git a/workloads/realworld/pipeline/darknet/cfg/rnn.train.cfg b/workloads/realworld/pipeline/darknet/cfg/rnn.train.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b9748990aceaa85cc2e43358073114606725dcbd --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/rnn.train.cfg @@ -0,0 +1,38 @@ +[net] +subdivisions=1 +inputs=256 +batch = 128 +momentum=0.9 +decay=0.001 +max_batches = 2000 +time_steps=576 +learning_rate=0.1 +policy=steps +steps=1000,1500 +scales=.1,.1 + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[connected] +output=256 +activation=leaky + +[softmax] + + diff --git a/workloads/realworld/pipeline/darknet/cfg/strided.cfg b/workloads/realworld/pipeline/darknet/cfg/strided.cfg new file mode 100644 index 0000000000000000000000000000000000000000..2f745085adc268a3e99bd7895bd4dda28227bffd --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/strided.cfg @@ -0,0 +1,182 @@ +[net] +batch=128 +subdivisions=4 +height=256 +width=256 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +policy=steps +scales=.1,.1,.1 +steps=200000,300000,400000 +max_batches=800000 + + +[crop] +crop_height=224 +crop_width=224 +flip=1 +angle=0 +saturation=1 +exposure=1 +shift=.2 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=192 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=1024 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=ramp + +[maxpool] +size=3 +stride=2 + +[connected] +output=4096 +activation=ramp + +[dropout] +probability=0.5 + +[connected] +output=1000 +activation=ramp + +[softmax] + diff --git a/workloads/realworld/pipeline/darknet/cfg/t1.test.cfg b/workloads/realworld/pipeline/darknet/cfg/t1.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b3628114e048dc78f4797342afd95a757c81c977 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/t1.test.cfg @@ -0,0 +1,117 @@ +[net] +batch=1 +subdivisions=1 +height=224 +width=224 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,20000,30000 +scales=2.5,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[connected] +output= 1470 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/pipeline/darknet/cfg/tiny.cfg b/workloads/realworld/pipeline/darknet/cfg/tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..f97327cfceebf868998bf2bb16224bd0994ebd82 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/tiny.cfg @@ -0,0 +1,174 @@ +[net] +# Train +batch=128 +subdivisions=1 +# Test +# batch=1 +# subdivisions=1 +height=224 +width=224 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=320 + +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + + diff --git a/workloads/realworld/pipeline/darknet/cfg/vgg-16.cfg b/workloads/realworld/pipeline/darknet/cfg/vgg-16.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c73b17b6ccfdcc9cae9b67591b662571463569ab --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/vgg-16.cfg @@ -0,0 +1,157 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +learning_rate=0.00001 +momentum=0.9 +decay=0.0005 + +[crop] +crop_height=224 +crop_width=224 +flip=1 +exposure=1 +saturation=1 +angle=0 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=1000 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/pipeline/darknet/cfg/vgg-conv.cfg b/workloads/realworld/pipeline/darknet/cfg/vgg-conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..21e1d724c9418107f9cf82f9bffb9ae64d3e2084 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/vgg-conv.cfg @@ -0,0 +1,121 @@ +[net] +batch=1 +subdivisions=1 +width=224 +height=224 +channels=3 +learning_rate=0.00001 +momentum=0.9 +decay=0.0005 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + diff --git a/workloads/realworld/pipeline/darknet/cfg/voc.data b/workloads/realworld/pipeline/darknet/cfg/voc.data new file mode 100644 index 0000000000000000000000000000000000000000..7807b5d2a8fd0f855a8c68e82c064dc320551da1 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/voc.data @@ -0,0 +1,6 @@ +classes= 20 +train = /home/pjreddie/data/voc/train.txt +valid = /home/pjreddie/data/voc/2007_test.txt +names = data/voc.names +backup = backup + diff --git a/workloads/realworld/pipeline/darknet/cfg/writing.cfg b/workloads/realworld/pipeline/darknet/cfg/writing.cfg new file mode 100644 index 0000000000000000000000000000000000000000..1ed899bcd63d6354e8320ace7e7f513ba1174886 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/writing.cfg @@ -0,0 +1,41 @@ +[net] +batch=128 +subdivisions=2 +height=256 +width=256 +channels=3 +learning_rate=0.00000001 +momentum=0.9 +decay=0.0005 +seen=0 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1 +size=3 +stride=1 +pad=1 +activation=logistic + +[cost] + diff --git a/workloads/realworld/pipeline/darknet/cfg/yolo9000.cfg b/workloads/realworld/pipeline/darknet/cfg/yolo9000.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e745f78a6e37611fb0f13c2d848c292cea1a89d3 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/yolo9000.cfg @@ -0,0 +1,218 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +batch=1 +subdivisions=1 +height=544 +width=544 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +hue=.1 +saturation=.75 +exposure=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=28269 +size=1 +stride=1 +pad=1 +activation=linear + +[region] +anchors = 0.77871, 1.14074, 3.00525, 4.31277, 9.22725, 9.61974 +bias_match=1 +classes=9418 +coords=4 +num=3 +softmax=1 +jitter=.2 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +thresh = .6 +absolute=1 +random=1 + +tree=data/9k.tree +map = data/coco9k.map diff --git a/workloads/realworld/pipeline/darknet/cfg/yolov1-tiny.cfg b/workloads/realworld/pipeline/darknet/cfg/yolov1-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a5e7b4920289ccb507a3a0356a33362bc7633581 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/yolov1-tiny.cfg @@ -0,0 +1,130 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 + +saturation=.75 +exposure=.75 +hue = .1 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,800,20000,30000 +scales=2.5,2,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[connected] +output= 1470 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/pipeline/darknet/cfg/yolov1.cfg b/workloads/realworld/pipeline/darknet/cfg/yolov1.cfg new file mode 100644 index 0000000000000000000000000000000000000000..06cf6e676170e41d24e63ec08d7b27a31c411718 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/yolov1.cfg @@ -0,0 +1,261 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 +saturation=1.5 +exposure=1.5 +hue=.1 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,20000,30000 +scales=2.5,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[local] +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[dropout] +probability=.5 + +[connected] +output= 1715 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=3 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/pipeline/darknet/cfg/yolov2-tiny-voc.cfg b/workloads/realworld/pipeline/darknet/cfg/yolov2-tiny-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c4c127cdd352bd98b3b7a3336d5c3b2efc6fadcd --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/yolov2-tiny-voc.cfg @@ -0,0 +1,138 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +max_batches = 40200 +policy=steps +steps=-1,100,20000,30000 +scales=.1,10,.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=125 +activation=linear + +[region] +anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 +bias_match=1 +classes=20 +coords=4 +num=5 +softmax=1 +jitter=.2 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/pipeline/darknet/cfg/yolov2-tiny.cfg b/workloads/realworld/pipeline/darknet/cfg/yolov2-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..81d0ac45d6dca10f50875bfe85f7496ded8e0f63 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/yolov2-tiny.cfg @@ -0,0 +1,139 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=425 +activation=linear + +[region] +anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 +bias_match=1 +classes=80 +coords=4 +num=5 +softmax=1 +jitter=.2 +rescore=0 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/pipeline/darknet/cfg/yolov2-voc.cfg b/workloads/realworld/pipeline/darknet/cfg/yolov2-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..dbf2de281c1200cb4889409c616e775080823268 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/yolov2-voc.cfg @@ -0,0 +1,258 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=416 +width=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 80200 +policy=steps +steps=40000,60000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[route] +layers=-9 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=64 +activation=leaky + +[reorg] +stride=2 + +[route] +layers=-1,-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=125 +activation=linear + + +[region] +anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 +bias_match=1 +classes=20 +coords=4 +num=5 +softmax=1 +jitter=.3 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/pipeline/darknet/cfg/yolov2.cfg b/workloads/realworld/pipeline/darknet/cfg/yolov2.cfg new file mode 100644 index 0000000000000000000000000000000000000000..088edf81573e83c59edd7137cbc07b6fe1433591 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/yolov2.cfg @@ -0,0 +1,258 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[route] +layers=-9 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=64 +activation=leaky + +[reorg] +stride=2 + +[route] +layers=-1,-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=425 +activation=linear + + +[region] +anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 +bias_match=1 +classes=80 +coords=4 +num=5 +softmax=1 +jitter=.3 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/pipeline/darknet/cfg/yolov3-openimages.cfg b/workloads/realworld/pipeline/darknet/cfg/yolov3-openimages.cfg new file mode 100644 index 0000000000000000000000000000000000000000..65d241a74c4c4995dbd997b1750575a83b0a17d4 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/yolov3-openimages.cfg @@ -0,0 +1,789 @@ +[net] +# Testing + batch=1 + subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=5000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/yolov3-spp.cfg b/workloads/realworld/pipeline/darknet/cfg/yolov3-spp.cfg new file mode 100644 index 0000000000000000000000000000000000000000..4ad2a052d88328a79cff5686ff4dd1df6993a2fd --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/yolov3-spp.cfg @@ -0,0 +1,822 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 + +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/yolov3-tiny.cfg b/workloads/realworld/pipeline/darknet/cfg/yolov3-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..cfca3cfa6415b7b61eae238aa71107dedbe5d607 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/yolov3-tiny.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/pipeline/darknet/cfg/yolov3-tiny_b.cfg b/workloads/realworld/pipeline/darknet/cfg/yolov3-tiny_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..0d8ded69bcf909f4cca02ab202cc99b1800a1562 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/yolov3-tiny_b.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/pipeline/darknet/cfg/yolov3-tiny_t.cfg b/workloads/realworld/pipeline/darknet/cfg/yolov3-tiny_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..0d8ded69bcf909f4cca02ab202cc99b1800a1562 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/yolov3-tiny_t.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/pipeline/darknet/cfg/yolov3-voc.cfg b/workloads/realworld/pipeline/darknet/cfg/yolov3-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..3f3e8dfb31b7103cf7ca00cd0bef83d6d426bb8d --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/yolov3-voc.cfg @@ -0,0 +1,785 @@ +[net] +# Testing + batch=1 + subdivisions=1 +# Training +# batch=64 +# subdivisions=16 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 50200 +policy=steps +steps=40000,45000 +scales=.1,.1 + + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/yolov3.cfg b/workloads/realworld/pipeline/darknet/cfg/yolov3.cfg new file mode 100644 index 0000000000000000000000000000000000000000..938ffff23f106d65290faae217f6a9b0a715c023 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/yolov3.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/yolov3_b.cfg b/workloads/realworld/pipeline/darknet/cfg/yolov3_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a86d73b42225ef65d8ff8692b0d72827ba3a8484 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/yolov3_b.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/pipeline/darknet/cfg/yolov3_t.cfg b/workloads/realworld/pipeline/darknet/cfg/yolov3_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a86d73b42225ef65d8ff8692b0d72827ba3a8484 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/cfg/yolov3_t.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/pipeline/darknet/examples/art.c b/workloads/realworld/pipeline/darknet/examples/art.c new file mode 100644 index 0000000000000000000000000000000000000000..932688e7b9ecbfd1a359a5d373dddf52815da9bb --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/art.c @@ -0,0 +1,59 @@ +#include "darknet.h" + +#include + +void demo_art(char *cfgfile, char *weightfile, int cam_index) +{ +#ifdef OPENCV + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + + void * cap = open_video_stream(0, cam_index, 0,0,0); + + char *window = "ArtJudgementBot9000!!!"; + if(!cap) error("Couldn't connect to webcam.\n"); + int i; + int idx[] = {37, 401, 434}; + int n = sizeof(idx)/sizeof(idx[0]); + + while(1){ + image in = get_image_from_stream(cap); + image in_s = resize_image(in, net->w, net->h); + + float *p = network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + + float score = 0; + for(i = 0; i < n; ++i){ + float s = p[idx[i]]; + if (s > score) score = s; + } + score = score; + printf("I APPRECIATE THIS ARTWORK: %10.7f%%\n", score*100); + printf("["); + int upper = 30; + for(i = 0; i < upper; ++i){ + printf("%c", ((i+.5) < score*upper) ? 219 : ' '); + } + printf("]\n"); + + show_image(in, window, 1); + free_image(in_s); + free_image(in); + } +#endif +} + + +void run_art(int argc, char **argv) +{ + int cam_index = find_int_arg(argc, argv, "-c", 0); + char *cfg = argv[2]; + char *weights = argv[3]; + demo_art(cfg, weights, cam_index); +} + diff --git a/workloads/realworld/pipeline/darknet/examples/attention.c b/workloads/realworld/pipeline/darknet/examples/attention.c new file mode 100644 index 0000000000000000000000000000000000000000..cd1e579d375be8ffed5620c70180f0a59a927159 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/attention.c @@ -0,0 +1,459 @@ +#include "darknet.h" + +#include +#include + +void extend_data_truth(data *d, int n, float val) +{ + int i, j; + for(i = 0; i < d->y.rows; ++i){ + d->y.vals[i] = realloc(d->y.vals[i], (d->y.cols+n)*sizeof(float)); + for(j = 0; j < n; ++j){ + d->y.vals[i][d->y.cols + j] = val; + } + } + d->y.cols += n; +} + +matrix network_loss_data(network *net, data test) +{ + int i,b; + int k = 1; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.cols, sizeof(float)); + float *y = calloc(net->batch*test.y.cols, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + memcpy(y+b*test.y.cols, test.y.vals[i+b], test.y.cols*sizeof(float)); + } + + network orig = *net; + net->input = X; + net->truth = y; + net->train = 0; + net->delta = 0; + forward_network(net); + *net = orig; + + float *delta = net->layers[net->n-1].output; + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + int t = max_index(y + b*test.y.cols, 1000); + float err = sum_array(delta + b*net->outputs, net->outputs); + pred.vals[i+b][0] = -err; + //pred.vals[i+b][0] = 1-delta[b*net->outputs + t]; + } + } + free(X); + free(y); + return pred; +} + +void train_attention(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i, j; + + float avg_cls_loss = -1; + float avg_att_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *train_list = option_find_str(options, "train", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + + char **labels = get_labels(label_list); + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + double time; + + int divs=3; + int size=2; + + load_args args = {0}; + args.w = divs*net->w/size; + args.h = divs*net->h/size; + args.size = divs*net->w/size; + args.threads = 32; + args.hierarchy = net->hierarchy; + + args.min = net->min_ratio*args.w; + args.max = net->max_ratio*args.w; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + args.paths = paths; + args.classes = classes; + args.n = imgs; + args.m = N; + args.labels = labels; + args.type = CLASSIFICATION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + data resized = resize_data(train, net->w, net->h); + extend_data_truth(&resized, divs*divs, 0); + data *tiles = tile_data(train, divs, size); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float aloss = 0; + float closs = 0; + int z; + for (i = 0; i < divs*divs/ngpus; ++i) { +#pragma omp parallel for + for(j = 0; j < ngpus; ++j){ + int index = i*ngpus + j; + extend_data_truth(tiles+index, divs*divs, SECRET_NUM); + matrix deltas = network_loss_data(nets[j], tiles[index]); + for(z = 0; z < resized.y.rows; ++z){ + resized.y.vals[z][train.y.cols + index] = deltas.vals[z][0]; + } + free_matrix(deltas); + } + } + int *inds = calloc(resized.y.rows, sizeof(int)); + for(z = 0; z < resized.y.rows; ++z){ + int index = max_index(resized.y.vals[z] + train.y.cols, divs*divs); + inds[z] = index; + for(i = 0; i < divs*divs; ++i){ + resized.y.vals[z][train.y.cols + i] = (i == index)? 1 : 0; + } + } + data best = select_data(tiles, inds); + free(inds); + #ifdef GPU + if (ngpus == 1) { + closs = train_network(net, best); + } else { + closs = train_networks(nets, ngpus, best, 4); + } + #endif + for (i = 0; i < divs*divs; ++i) { + printf("%.2f ", resized.y.vals[0][train.y.cols + i]); + if((i+1)%divs == 0) printf("\n"); + free_data(tiles[i]); + } + free_data(best); + printf("\n"); + image im = float_to_image(64,64,3,resized.X.vals[0]); + //show_image(im, "orig"); + //cvWaitKey(100); + /* + image im1 = float_to_image(64,64,3,tiles[i].X.vals[0]); + image im2 = float_to_image(64,64,3,resized.X.vals[0]); + show_image(im1, "tile"); + show_image(im2, "res"); + */ +#ifdef GPU + if (ngpus == 1) { + aloss = train_network(net, resized); + } else { + aloss = train_networks(nets, ngpus, resized, 4); + } +#endif + for(i = 0; i < divs*divs; ++i){ + printf("%f ", nets[0]->output[1000 + i]); + if ((i+1) % divs == 0) printf("\n"); + } + printf("\n"); + + free_data(resized); + free_data(train); + if(avg_cls_loss == -1) avg_cls_loss = closs; + if(avg_att_loss == -1) avg_att_loss = aloss; + avg_cls_loss = avg_cls_loss*.9 + closs*.1; + avg_att_loss = avg_att_loss*.9 + aloss*.1; + + printf("%ld, %.3f: Att: %f, %f avg, Class: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, aloss, avg_att_loss, closs, avg_cls_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + pthread_join(load_thread, 0); + + free_network(net); + free_ptrs((void**)labels, classes); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_attention_single(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *leaf_list = option_find_str(options, "leaves", 0); + if(leaf_list) change_leaves(net->hierarchy, leaf_list); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + int divs = 4; + int size = 2; + int extra = 0; + float *avgs = calloc(classes, sizeof(float)); + int *inds = calloc(divs*divs, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w*divs/size); + image crop = crop_image(resized, (resized.w - net->w*divs/size)/2, (resized.h - net->h*divs/size)/2, net->w*divs/size, net->h*divs/size); + image rcrop = resize_image(crop, net->w, net->h); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, rcrop.data); + //pred[classes + 56] = 0; + for(j = 0; j < divs*divs; ++j){ + printf("%.2f ", pred[classes + j]); + if((j+1)%divs == 0) printf("\n"); + } + printf("\n"); + copy_cpu(classes, pred, 1, avgs, 1); + top_k(pred + classes, divs*divs, divs*divs, inds); + show_image(crop, "crop"); + for(j = 0; j < extra; ++j){ + int index = inds[j]; + int row = index / divs; + int col = index % divs; + int y = row * crop.h / divs - (net->h - crop.h/divs)/2; + int x = col * crop.w / divs - (net->w - crop.w/divs)/2; + printf("%d %d %d %d\n", row, col, y, x); + image tile = crop_image(crop, x, y, net->w, net->h); + float *pred = network_predict(net, tile.data); + axpy_cpu(classes, 1., pred, 1, avgs, 1); + show_image(tile, "tile"); + //cvWaitKey(10); + } + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + if(rcrop.data != resized.data) free_image(rcrop); + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_attention_multi(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + int scales[] = {224, 288, 320, 352, 384}; + int nscales = sizeof(scales)/sizeof(scales[0]); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + float *pred = calloc(classes, sizeof(float)); + image im = load_image_color(paths[i], 0, 0); + for(j = 0; j < nscales; ++j){ + image r = resize_min(im, scales[j]); + resize_network(net, r.w, r.h); + float *p = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + flip_image(r); + p = network_predict(net, r.data); + axpy_cpu(classes, 1, p, 1, pred, 1); + if(r.data != im.data) free_image(r); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void predict_attention(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + if(top == 0) top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = letterbox_image(im, net->w, net->h); + //resize_network(&net, r.w, r.h); + //printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + float *predictions = network_predict(net, X); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root"); + //else printf("%s: %f\n",names[index], predictions[index]); + printf("%5.2f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void run_attention(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int ngpus; + int *gpus = read_intlist(gpu_list, &ngpus, gpu_index); + + + int top = find_int_arg(argc, argv, "-t", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + char *layer_s = (argc > 7) ? argv[7]: 0; + if(0==strcmp(argv[2], "predict")) predict_attention(data, cfg, weights, filename, top); + else if(0==strcmp(argv[2], "train")) train_attention(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) validate_attention_single(data, cfg, weights); + else if(0==strcmp(argv[2], "validmulti")) validate_attention_multi(data, cfg, weights); +} + + diff --git a/workloads/realworld/pipeline/darknet/examples/captcha.c b/workloads/realworld/pipeline/darknet/examples/captcha.c new file mode 100644 index 0000000000000000000000000000000000000000..41d6d07c30801b35da34c05984be488e6f6767e9 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/captcha.c @@ -0,0 +1,353 @@ +#include "darknet.h" + +void fix_data_captcha(data d, int mask) +{ + matrix labels = d.y; + int i, j; + for(i = 0; i < d.y.rows; ++i){ + for(j = 0; j < d.y.cols; j += 2){ + if (mask){ + if(!labels.vals[i][j]){ + labels.vals[i][j] = SECRET_NUM; + labels.vals[i][j+1] = SECRET_NUM; + }else if(labels.vals[i][j+1]){ + labels.vals[i][j] = 0; + } + } else{ + if (labels.vals[i][j]) { + labels.vals[i][j+1] = 0; + } else { + labels.vals[i][j+1] = 1; + } + } + } + } +} + +void train_captcha(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + int i = *net->seen/imgs; + int solved = 1; + list *plist; + char **labels = get_labels("/data/captcha/reimgs.labels.list"); + if (solved){ + plist = get_paths("/data/captcha/reimgs.solved.list"); + }else{ + plist = get_paths("/data/captcha/reimgs.raw.list"); + } + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = 26; + args.n = imgs; + args.m = plist->size; + args.labels = labels; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + + load_thread = load_data_in_thread(args); + while(1){ + ++i; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + fix_data_captcha(train, solved); + + /* + image im = float_to_image(256, 256, 3, train.X.vals[114]); + show_image(im, "training"); + cvWaitKey(0); + */ + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net->seen); + free_data(train); + if(i%100==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } + } +} + +void test_captcha(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + int i = 0; + char **names = get_labels("/data/captcha/reimgs.labels.list"); + char buff[256]; + char *input = buff; + int indexes[26]; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + //printf("Enter Image Path: "); + //fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, net->w, net->h); + float *X = im.data; + float *predictions = network_predict(net, X); + top_predictions(net, 26, indexes); + //printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < 26; ++i){ + int index = indexes[i]; + if(i != 0) printf(", "); + printf("%s %f", names[index], predictions[index]); + } + printf("\n"); + fflush(stdout); + free_image(im); + if (filename) break; + } +} + +void valid_captcha(char *cfgfile, char *weightfile, char *filename) +{ + char **labels = get_labels("/data/captcha/reimgs.labels.list"); + network *net = load_network(cfgfile, weightfile, 0); + list *plist = get_paths("/data/captcha/reimgs.fg.list"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + int outputs = net->outputs; + + set_batch_network(net, 1); + srand(2222222); + int i, j; + for(i = 0; i < N; ++i){ + if (i%100 == 0) fprintf(stderr, "%d\n", i); + image im = load_image_color(paths[i], net->w, net->h); + float *X = im.data; + float *predictions = network_predict(net, X); + //printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + int truth = -1; + for(j = 0; j < 13; ++j){ + if (strstr(paths[i], labels[j])) truth = j; + } + if (truth == -1){ + fprintf(stderr, "bad: %s\n", paths[i]); + return; + } + printf("%d, ", truth); + for(j = 0; j < outputs; ++j){ + if (j != 0) printf(", "); + printf("%f", predictions[j]); + } + printf("\n"); + fflush(stdout); + free_image(im); + if (filename) break; + } +} + +/* + void train_captcha(char *cfgfile, char *weightfile) + { + float avg_loss = -1; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + int i = net->seen/imgs; + list *plist = get_paths("/data/captcha/train.auto5"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + while(1){ + ++i; + time=clock(); + data train = load_data_captcha(paths, imgs, plist->size, 10, 200, 60); + translate_data_rows(train, -128); + scale_data_rows(train, 1./128); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + net->seen += imgs; + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net->seen); + free_data(train); + if(i%10==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } + } + } + + void decode_captcha(char *cfgfile, char *weightfile) + { + setbuf(stdout, NULL); + srand(time(0)); + network net = parse_network_cfg(cfgfile); + set_batch_network(&net, 1); + if(weightfile){ + load_weights(&net, weightfile); + } + char filename[256]; + while(1){ + printf("Enter filename: "); + fgets(filename, 256, stdin); + strtok(filename, "\n"); + image im = load_image_color(filename, 300, 57); + scale_image(im, 1./255.); + float *X = im.data; + float *predictions = network_predict(net, X); + image out = float_to_image(300, 57, 1, predictions); + show_image(out, "decoded"); +#ifdef OPENCV +cvWaitKey(0); +#endif +free_image(im); +} +} + +void encode_captcha(char *cfgfile, char *weightfile) +{ +float avg_loss = -1; +srand(time(0)); +char *base = basecfg(cfgfile); +printf("%s\n", base); +network net = parse_network_cfg(cfgfile); +if(weightfile){ + load_weights(&net, weightfile); +} +printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); +int imgs = 1024; +int i = net->seen/imgs; +list *plist = get_paths("/data/captcha/encode.list"); +char **paths = (char **)list_to_array(plist); +printf("%d\n", plist->size); +clock_t time; +while(1){ + ++i; + time=clock(); + data train = load_data_captcha_encode(paths, imgs, plist->size, 300, 57); + scale_data_rows(train, 1./255); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + net->seen += imgs; + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net->seen); + free_matrix(train.X); + if(i%100==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } +} +} + +void validate_captcha(char *cfgfile, char *weightfile) +{ + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + int numchars = 37; + list *plist = get_paths("/data/captcha/solved.hard"); + char **paths = (char **)list_to_array(plist); + int imgs = plist->size; + data valid = load_data_captcha(paths, imgs, 0, 10, 200, 60); + translate_data_rows(valid, -128); + scale_data_rows(valid, 1./128); + matrix pred = network_predict_data(net, valid); + int i, k; + int correct = 0; + int total = 0; + int accuracy = 0; + for(i = 0; i < imgs; ++i){ + int allcorrect = 1; + for(k = 0; k < 10; ++k){ + char truth = int_to_alphanum(max_index(valid.y.vals[i]+k*numchars, numchars)); + char prediction = int_to_alphanum(max_index(pred.vals[i]+k*numchars, numchars)); + if (truth != prediction) allcorrect=0; + if (truth != '.' && truth == prediction) ++correct; + if (truth != '.' || truth != prediction) ++total; + } + accuracy += allcorrect; + } + printf("Word Accuracy: %f, Char Accuracy %f\n", (float)accuracy/imgs, (float)correct/total); + free_data(valid); +} + +void test_captcha(char *cfgfile, char *weightfile) +{ + setbuf(stdout, NULL); + srand(time(0)); + //char *base = basecfg(cfgfile); + //printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + set_batch_network(&net, 1); + if(weightfile){ + load_weights(&net, weightfile); + } + char filename[256]; + while(1){ + //printf("Enter filename: "); + fgets(filename, 256, stdin); + strtok(filename, "\n"); + image im = load_image_color(filename, 200, 60); + translate_image(im, -128); + scale_image(im, 1/128.); + float *X = im.data; + float *predictions = network_predict(net, X); + print_letters(predictions, 10); + free_image(im); + } +} + */ +void run_captcha(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_captcha(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights, filename); + else if(0==strcmp(argv[2], "valid")) valid_captcha(cfg, weights, filename); + //if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "encode")) encode_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "decode")) decode_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "valid")) validate_captcha(cfg, weights); +} + diff --git a/workloads/realworld/pipeline/darknet/examples/cifar.c b/workloads/realworld/pipeline/darknet/examples/cifar.c new file mode 100644 index 0000000000000000000000000000000000000000..d4f14d16ae64b0d5bb7a82688a3833ee9fd9597d --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/cifar.c @@ -0,0 +1,251 @@ +#include "darknet.h" + +void train_cifar(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + int classes = 10; + int N = 50000; + + char **labels = get_labels("data/cifar/labels.txt"); + int epoch = (*net->seen)/N; + data train = load_all_cifar10(); + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + clock_t time=clock(); + + float loss = train_network_sgd(net, train, 1); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)labels, classes); + free(base); + free_data(train); +} + +void train_cifar_distill(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + int classes = 10; + int N = 50000; + + char **labels = get_labels("data/cifar/labels.txt"); + int epoch = (*net->seen)/N; + + data train = load_all_cifar10(); + matrix soft = csv_to_matrix("results/ensemble.csv"); + + float weight = .9; + scale_matrix(soft, weight); + scale_matrix(train.y, 1. - weight); + matrix_add_matrix(soft, train.y); + + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + clock_t time=clock(); + + float loss = train_network_sgd(net, train, 1); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)labels, classes); + free(base); + free_data(train); +} + +void test_cifar_multi(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + float avg_acc = 0; + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + + float pred[10] = {0}; + + float *p = network_predict(net, im.data); + axpy_cpu(10, 1, p, 1, pred, 1); + flip_image(im); + p = network_predict(net, im.data); + axpy_cpu(10, 1, p, 1, pred, 1); + + int index = max_index(pred, 10); + int class_ = max_index(test.y.vals[i], 10); + if(index == class_) avg_acc += 1; + free_image(im); + printf("%4d: %.2f%%\n", i, 100.*avg_acc/(i+1)); + } +} + +void test_cifar(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + clock_t time; + float avg_acc = 0; + float avg_top5 = 0; + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + time=clock(); + + float *acc = network_accuracies(net, test, 2); + avg_acc += acc[0]; + avg_top5 += acc[1]; + printf("top1: %f, %lf seconds, %d images\n", avg_acc, sec(clock()-time), test.X.rows); + free_data(test); +} + +void extract_cifar() +{ +char *labels[] = {"airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"}; + int i; + data train = load_all_cifar10(); + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + for(i = 0; i < train.X.rows; ++i){ + image im = float_to_image(32, 32, 3, train.X.vals[i]); + int class_ = max_index(train.y.vals[i], 10); + char buff[256]; + sprintf(buff, "data/cifar/train/%d_%s",i,labels[class_]); + save_image_options(im, buff, PNG, 0); + } + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + int class_ = max_index(test.y.vals[i], 10); + char buff[256]; + sprintf(buff, "data/cifar/test/%d_%s",i,labels[class_]); + save_image_options(im, buff, PNG, 0); + } +} + +void test_cifar_csv(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + matrix pred = network_predict_data(net, test); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + flip_image(im); + } + matrix pred2 = network_predict_data(net, test); + scale_matrix(pred, .5); + scale_matrix(pred2, .5); + matrix_add_matrix(pred2, pred); + + matrix_to_csv(pred); + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); +} + +void test_cifar_csvtrain(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + srand(time(0)); + + data test = load_all_cifar10(); + + matrix pred = network_predict_data(net, test); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + flip_image(im); + } + matrix pred2 = network_predict_data(net, test); + scale_matrix(pred, .5); + scale_matrix(pred2, .5); + matrix_add_matrix(pred2, pred); + + matrix_to_csv(pred); + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); +} + +void eval_cifar_csv() +{ + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + matrix pred = csv_to_matrix("results/combined.csv"); + fprintf(stderr, "%d %d\n", pred.rows, pred.cols); + + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); + free_matrix(pred); +} + + +void run_cifar(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_cifar(cfg, weights); + else if(0==strcmp(argv[2], "extract")) extract_cifar(); + else if(0==strcmp(argv[2], "distill")) train_cifar_distill(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_cifar(cfg, weights); + else if(0==strcmp(argv[2], "multi")) test_cifar_multi(cfg, weights); + else if(0==strcmp(argv[2], "csv")) test_cifar_csv(cfg, weights); + else if(0==strcmp(argv[2], "csvtrain")) test_cifar_csvtrain(cfg, weights); + else if(0==strcmp(argv[2], "eval")) eval_cifar_csv(); +} + + diff --git a/workloads/realworld/pipeline/darknet/examples/classifier.c b/workloads/realworld/pipeline/darknet/examples/classifier.c new file mode 100644 index 0000000000000000000000000000000000000000..1801d7ffb45c233a833a48af24cc4c32b66c4ed6 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/classifier.c @@ -0,0 +1,1123 @@ +#include "darknet.h" + +#include +#include + +float *get_regression_values(char **labels, int n) +{ + float *v = calloc(n, sizeof(float)); + int i; + for(i = 0; i < n; ++i){ + char *p = strchr(labels[i], ' '); + *p = 0; + v[i] = atof(p+1); + } + return v; +} + +void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + // Ruihao + int tag = option_find_int_quiet(options, "tag", 0); + + char *backup_directory_cfg = option_find_str(options, "backup", "/backup/"); + char *label_list_cfg = option_find_str(options, "labels", "data/labels.list"); + char *train_list_cfg = option_find_str(options, "train", "data/train.list"); + + char *env = getenv("UVMAsyncBench_BASE"); + char backup_directory[256]; + char label_list[256]; + char train_list[256]; + sprintf(backup_directory, "%s/%s", env, backup_directory_cfg); + sprintf(label_list, "%s/%s", env, label_list_cfg); + sprintf(train_list, "%s/%s", env, train_list_cfg); + // Ruihao + char *tree = option_find_str(options, "tree", 0); + if (tree) net->hierarchy = read_tree(tree); + int classes = option_find_int(options, "classes", 2); + + char **labels = 0; + if(!tag){ + labels = get_labels(label_list); + } + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + double time; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.hierarchy = net->hierarchy; + + args.min = net->min_ratio*net->w; + args.max = net->max_ratio*net->w; + printf("%d %d\n", args.min, args.max); + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + + args.paths = paths; + args.classes = classes; + args.n = imgs; + args.m = N; + args.labels = labels; + if (tag){ + args.type = TAG_DATA; + } else { + args.type = CLASSIFICATION_DATA; + } + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int count = 0; + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + if(net->random && count++%40 == 0){ + printf("Resizing\n"); + int dim = (rand() % 11 + 4) * 32; + //if (get_current_batch(net)+200 > net->max_batches) dim = 608; + //int dim = (rand() % 4 + 16) * 32; + printf("%d\n", dim); + args.w = dim; + args.h = dim; + args.size = dim; + args.min = net->min_ratio*dim; + args.max = net->max_ratio*dim; + printf("%d %d\n", args.min, args.max); + + pthread_join(load_thread, 0); + train = buffer; + free_data(train); + load_thread = load_data(args); + + for(i = 0; i < ngpus; ++i){ + resize_network(nets[i], dim, dim); + } + net = nets[0]; + } + time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + pthread_join(load_thread, 0); + + // free_network(net); + if(labels) free_ptrs((void**)labels, classes); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_classifier_crop(char *datacfg, char *filename, char *weightfile) +{ + int i = 0; + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + clock_t time; + float avg_acc = 0; + float avg_topk = 0; + int splits = m/1000; + int num = (i+1)*m/splits - i*m/splits; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + + args.paths = paths; + args.classes = classes; + args.n = num; + args.m = 0; + args.labels = labels; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(i = 1; i <= splits; ++i){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + num = (i+1)*m/splits - i*m/splits; + char **part = paths+(i*m/splits); + if(i != splits){ + args.paths = part; + load_thread = load_data_in_thread(args); + } + printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + float *acc = network_accuracies(net, val, topk); + avg_acc += acc[0]; + avg_topk += acc[1]; + printf("%d: top 1: %f, top %d: %f, %lf seconds, %d images\n", i, avg_acc/i, topk, avg_topk/i, sec(clock()-time), val.X.rows); + free_data(val); + } +} + +void validate_classifier_10(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class_ = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class_ = j; + break; + } + } + int w = net->w; + int h = net->h; + int shift = 32; + image im = load_image_color(paths[i], w+shift, h+shift); + image images[10]; + images[0] = crop_image(im, -shift, -shift, w, h); + images[1] = crop_image(im, shift, -shift, w, h); + images[2] = crop_image(im, 0, 0, w, h); + images[3] = crop_image(im, -shift, shift, w, h); + images[4] = crop_image(im, shift, shift, w, h); + flip_image(im); + images[5] = crop_image(im, -shift, -shift, w, h); + images[6] = crop_image(im, shift, -shift, w, h); + images[7] = crop_image(im, 0, 0, w, h); + images[8] = crop_image(im, -shift, shift, w, h); + images[9] = crop_image(im, shift, shift, w, h); + float *pred = calloc(classes, sizeof(float)); + for(j = 0; j < 10; ++j){ + float *p = network_predict(net, images[j].data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1, 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + free_image(images[j]); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class_) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class_) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_classifier_full(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + int size = net->w; + for(i = 0; i < m; ++i){ + int class_ = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class_ = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, size); + resize_network(net, resized.w, resized.h); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, resized.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + free_image(im); + free_image(resized); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class_) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class_) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + + +void validate_classifier_single(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *leaf_list = option_find_str(options, "leaves", 0); + if(leaf_list) change_leaves(net->hierarchy, leaf_list); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class_ = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class_ = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image crop = center_crop_image(im, net->w, net->h); + //grayscale_image_3c(crop); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, crop.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + free_image(im); + free_image(crop); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class_) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class_) avg_topk += 1; + } + + printf("%s, %d, %f, %f, \n", paths[i], class_, pred[0], pred[1]); + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_classifier_multi(char *datacfg, char *cfg, char *weights) +{ + int i, j; + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + //int scales[] = {224, 288, 320, 352, 384}; + int scales[] = {224, 256, 288, 320}; + int nscales = sizeof(scales)/sizeof(scales[0]); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class_ = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class_ = j; + break; + } + } + float *pred = calloc(classes, sizeof(float)); + image im = load_image_color(paths[i], 0, 0); + for(j = 0; j < nscales; ++j){ + image r = resize_max(im, scales[j]); + resize_network(net, r.w, r.h); + float *p = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + flip_image(r); + p = network_predict(net, r.data); + axpy_cpu(classes, 1, p, 1, pred, 1); + if(r.data != im.data) free_image(r); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class_) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class_) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int layer_num) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + int top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image orig = load_image_color(input, 0, 0); + image r = resize_min(orig, 256); + image im = crop_image(r, (r.w - 224 - 1)/2 + 1, (r.h - 224 - 1)/2 + 1, 224, 224); + float mean[] = {0.48263312050943, 0.45230225481413, 0.40099074308742}; + float std[] = {0.22590347483426, 0.22120921437787, 0.22103996251583}; + float var[3]; + var[0] = std[0]*std[0]; + var[1] = std[1]*std[1]; + var[2] = std[2]*std[2]; + + normalize_cpu(im.data, mean, var, 1, 3, im.w*im.h); + + float *X = im.data; + time=clock(); + float *predictions = network_predict(net, X); + + layer *l = net->layers[layer_num]; + for(i = 0; i < l->c; ++i){ + if(l->rolling_mean) printf("%f %f %f\n", l->rolling_mean[i], l->rolling_variance[i], l->scales[i]); + } +#ifdef GPU + cuda_pull_array(l->output_gpu, l->output, l->outputs); +#endif + for(i = 0; i < l->outputs; ++i){ + printf("%f\n", l->output[i]); + } + /* + + printf("\n\nWeights\n"); + for(i = 0; i < l.n*l.size*l.size*l.c; ++i){ + printf("%f\n", l.filters[i]); + } + + printf("\n\nBiases\n"); + for(i = 0; i < l.n; ++i){ + printf("%f\n", l.biases[i]); + } + */ + + top_predictions(net, top, indexes); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + printf("%s: %f\n", names[index], predictions[index]); + } + free_image(im); + if (filename) break; + } +} + +void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top) +{ + network *net = load_network(cfgfile, weightfile, 0); + fprintf(stderr, "load_network done! \n"); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *name_list_cfg = option_find_str(options, "names", 0); + char *env = getenv("UVMAsyncBench_BASE"); + char name_list[256]; + sprintf(name_list, "%s/%s", env, name_list_cfg); + // if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + // Ruihao + if(top == 0) top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = letterbox_image(im, net->w, net->h); + //image r = resize_min(im, 320); + //printf("%d %d\n", r.w, r.h); + //resize_network(net, r.w, r.h); + //printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + startCPU(); + float *predictions = network_predict(net, X); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + endCPU(); + fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root"); + //else printf("%s: %f\n",names[index], predictions[index]); + printf("%5.2f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void label_classifier(char *datacfg, char *filename, char *weightfile) +{ + int i; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "names", "data/labels.list"); + char *test_list = option_find_str(options, "test", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + + char **labels = get_labels(label_list); + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + for(i = 0; i < m; ++i){ + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + float *pred = network_predict(net, crop.data); + + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + int ind = max_index(pred, classes); + + printf("%s\n", labels[ind]); + } +} + +void csv_classifier(char *datacfg, char *cfgfile, char *weightfile) +{ + int i,j; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *test_list = option_find_str(options, "test", "data/test.list"); + int top = option_find_int(options, "top", 1); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + int *indexes = calloc(top, sizeof(int)); + + for(i = 0; i < m; ++i){ + double time = what_time_is_it_now(); + char *path = paths[i]; + image im = load_image_color(path, 0, 0); + image r = letterbox_image(im, net->w, net->h); + float *predictions = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + + printf("%s", path); + for(j = 0; j < top; ++j){ + printf("\t%d", indexes[j]); + } + printf("\n"); + + free_image(im); + free_image(r); + + fprintf(stderr, "%lf seconds, %d images, %d total\n", what_time_is_it_now() - time, i+1, m); + } +} + +void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_layer) +{ + int curr = 0; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *test_list_cfg = option_find_str(options, "test", "data/test.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char test_list[256]; + sprintf(test_list, "%s/%s", env, test_list_cfg); + // Ruihao + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + m = net->max_batches * net->batch; + free_list(plist); + + clock_t time; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = classes; + args.n = net->batch; + args.m = 0; + args.labels = 0; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(curr = net->batch; curr <= m; curr += net->batch){ + // while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + if(curr < m){ + args.paths = paths + curr; + if (curr + net->batch > m) args.n = m - curr; + load_thread = load_data_in_thread(args); + } + fprintf(stderr, "Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + + int i, j; + if (target_layer >= 0){ + //layer l = net->layers[target_layer]; + } + + // for(i = 0; i < pred.rows; ++i){ + // printf("%s", paths[curr-net->batch+i]); + // for(j = 0; j < pred.cols; ++j){ + // printf("\t%g", pred.vals[i][j]); + // } + // printf("\n"); + // } + + fprintf(stderr, "%lf seconds, %d images, %d total\n", sec(clock()-time), val.X.rows, curr); + free_matrix(pred); + free_data(val); + } +} + +void file_output_classifier(char *datacfg, char *filename, char *weightfile, char *listfile) +{ + int i,j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + //char *label_list = option_find_str(options, "names", "data/labels.list"); + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(listfile); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + for(i = 0; i < m; ++i){ + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + + float *pred = network_predict(net, crop.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 0, 1); + + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + + printf("%s", paths[i]); + for(j = 0; j < classes; ++j){ + printf("\t%g", pred[j]); + } + printf("\n"); + } +} + + +void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + float threat = 0; + float roll = .2; + + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + int top = option_find_int(options, "top", 1); + + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + //cvNamedWindow("Threat", CV_WINDOW_NORMAL); + //cvResizeWindow("Threat", 512, 512); + float fps = 0; + int i; + + int count = 0; + + while(1){ + ++count; + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + if(!in.data) break; + image in_s = resize_image(in, net->w, net->h); + + image out = in; + int x1 = out.w / 20; + int y1 = out.h / 20; + int x2 = 2*x1; + int y2 = out.h - out.h/20; + + int border = .01*out.h; + int h = y2 - y1 - 2*border; + int w = x2 - x1 - 2*border; + + float *predictions = network_predict(net, in_s.data); + float curr_threat = 0; + if(1){ + curr_threat = predictions[0] * 0 + + predictions[1] * .6 + + predictions[2]; + } else { + curr_threat = predictions[218] + + predictions[539] + + predictions[540] + + predictions[368] + + predictions[369] + + predictions[370]; + } + threat = roll * curr_threat + (1-roll) * threat; + + draw_box_width(out, x2 + border, y1 + .02*h, x2 + .5 * w, y1 + .02*h + border, border, 0,0,0); + if(threat > .97) { + draw_box_width(out, x2 + .5 * w + border, + y1 + .02*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .02*h + 3*border, 3*border, 1,0,0); + } + draw_box_width(out, x2 + .5 * w + border, + y1 + .02*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .02*h + 3*border, .5*border, 0,0,0); + draw_box_width(out, x2 + border, y1 + .42*h, x2 + .5 * w, y1 + .42*h + border, border, 0,0,0); + if(threat > .57) { + draw_box_width(out, x2 + .5 * w + border, + y1 + .42*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .42*h + 3*border, 3*border, 1,1,0); + } + draw_box_width(out, x2 + .5 * w + border, + y1 + .42*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .42*h + 3*border, .5*border, 0,0,0); + + draw_box_width(out, x1, y1, x2, y2, border, 0,0,0); + for(i = 0; i < threat * h ; ++i){ + float ratio = (float) i / h; + float r = (ratio < .5) ? (2*(ratio)) : 1; + float g = (ratio < .5) ? 1 : 1 - 2*(ratio - .5); + draw_box_width(out, x1 + border, y2 - border - i, x2 - border, y2 - border - i, 1, r, g, 0); + } + top_predictions(net, top, indexes); + char buff[256]; + sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count); + //save_image(out, buff); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + for(i = 0; i < top; ++i){ + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + } + + if(1){ + show_image(out, "Threat", 10); + } + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + int bad_cats[] = {218, 539, 540, 1213, 1501, 1742, 1911, 2415, 4348, 19223, 368, 369, 370, 1133, 1200, 1306, 2122, 2301, 2537, 2823, 3179, 3596, 3639, 4489, 5107, 5140, 5289, 6240, 6631, 6762, 7048, 7171, 7969, 7984, 7989, 8824, 8927, 9915, 10270, 10448, 13401, 15205, 18358, 18894, 18895, 19249, 19697}; + + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + int top = option_find_int(options, "top", 1); + + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + int i; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = resize_image(in, net->w, net->h); + + float *predictions = network_predict(net, in_s.data); + top_predictions(net, top, indexes); + + printf("\033[2J"); + printf("\033[1;1H"); + + int threat = 0; + for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){ + int index = bad_cats[i]; + if(predictions[index] > .01){ + printf("Threat Detected!\n"); + threat = 1; + break; + } + } + if(!threat) printf("Scanning...\n"); + for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){ + int index = bad_cats[i]; + if(predictions[index] > .01){ + printf("%s\n", names[index]); + } + } + + show_image(in, "Threat Detection", 10); + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + +void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + image **alphabet = load_alphabet(); + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + + int w = 1280; + int h = 720; + void * cap = open_video_stream(filename, cam_index, w, h, 0); + + int top = option_find_int(options, "top", 1); + + char *label_list = option_find_str(options, "labels", 0); + char *name_list = option_find_str(options, "names", label_list); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + int i; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + + float *predictions = network_predict(net, in_s.data); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_predictions(net, top, indexes); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + int lh = in.h*.03; + int toph = 3*lh; + + float rgb[3] = {1,1,1}; + for(i = 0; i < top; ++i){ + printf("%d\n", toph); + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + + char buff[1024]; + sprintf(buff, "%3.1f%%: %s\n", predictions[index]*100, names[index]); + image label = get_label(alphabet, buff, lh); + draw_label(in, toph, lh, label, rgb); + toph += 2*lh; + free_image(label); + } + + show_image(in, base, 10); + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_classifier(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int ngpus; + int *gpus = read_intlist(gpu_list, &ngpus, gpu_index); + + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int top = find_int_arg(argc, argv, "-t", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + char *layer_s = (argc > 7) ? argv[7]: 0; + int layer = layer_s ? atoi(layer_s) : -1; + if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename, top); + else if(0==strcmp(argv[2], "fout")) file_output_classifier(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s)); + else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer); + else if(0==strcmp(argv[2], "csv")) csv_classifier(data, cfg, weights); + else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_classifier_single(data, cfg, weights); + else if(0==strcmp(argv[2], "validmulti")) validate_classifier_multi(data, cfg, weights); + else if(0==strcmp(argv[2], "valid10")) validate_classifier_10(data, cfg, weights); + else if(0==strcmp(argv[2], "validcrop")) validate_classifier_crop(data, cfg, weights); + else if(0==strcmp(argv[2], "validfull")) validate_classifier_full(data, cfg, weights); +} + + diff --git a/workloads/realworld/pipeline/darknet/examples/coco.c b/workloads/realworld/pipeline/darknet/examples/coco.c new file mode 100644 index 0000000000000000000000000000000000000000..0369283f3c93433a34d61283cc5b077b1f0b57c1 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/coco.c @@ -0,0 +1,357 @@ +#include "darknet.h" + +#include + +char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"}; + +int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; + +void train_coco(char *cfgfile, char *weightfile) +{ + //char *train_images = "/home/pjreddie/data/voc/test/train.txt"; + //char *train_images = "/home/pjreddie/data/coco/train.txt"; + char *train_images = "data/coco.trainval.txt"; + //char *train_images = "data/bags.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + layer *l = net->layers[net->n - 1]; + + int side = l->side; + int classes = l->classes; + float jitter = l->jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + /* + image im = float_to_image(net->w, net->h, 3, train.X.vals[113]); + image copy = copy_image(im); + draw_coco(copy, train.y.vals[113], 7, "truth"); + cvWaitKey(0); + free_image(copy); + */ + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || (i < 1000 && i%100 == 0)){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +static void print_cocos(FILE *fp, int image_id, detection *dets, int num_boxes, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < num_boxes; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + float bx = xmin; + float by = ymin; + float bw = xmax - xmin; + float bh = ymax - ymin; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); + } + } +} + +int get_coco_image_id(char *filename) +{ + char *p = strrchr(filename, '_'); + return atoi(p+1); +} + +void validate_coco(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/"; + list *plist = get_paths("data/coco_val_5k.list"); + //list *plist = get_paths("/home/pjreddie/data/people-art/test.txt"); + //list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt"); + char **paths = (char **)list_to_array(plist); + + layer *l = net->layers[net->n-1]; + int classes = l->classes; + + char buff[1024]; + snprintf(buff, 1024, "%s/coco_results.json", base); + FILE *fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + + int m = plist->size; + int i=0; + int t; + + float thresh = .01; + int nms = 1; + float iou_thresh = .5; + + int nthreads = 8; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.type = IMAGE_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + time_t start = time(0); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + int image_id = get_coco_image_id(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l->side*l->side*l->n, classes, iou_thresh); + print_cocos(fp, image_id, dets, l->side*l->side*l->n, classes, w, h); + free_detections(dets, nboxes); + free_image(val[t]); + free_image(val_resized[t]); + } + } + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); +} + +void validate_coco_recall(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt"); + char **paths = (char **)list_to_array(plist); + + layer *l = net->layers[net->n-1]; + int classes = l->classes; + int side = l->side; + + int j, k; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + + float thresh = .001; + int nms = 0; + float iou_thresh = .5; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + + int nboxes = 0; + detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, side*side*l->n, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < side*side*l->n; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < side*side*l->n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + free_detections(dets, nboxes); + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free(id); + free_image(orig); + free_image(sized); + } +} + +void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) +{ + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + layer *l = net->layers[net->n-1]; + set_batch_network(net, 1); + srand(2222222); + float nms = .4; + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = resize_image(im, net->w, net->h); + float *X = sized.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + + int nboxes = 0; + detection *dets = get_network_boxes(net, 1, 1, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l->side*l->side*l->n, l->classes, nms); + + draw_detections(im, dets, l->side*l->side*l->n, thresh, coco_classes, alphabet, 80); + save_image(im, "prediction"); + show_image(im, "predictions", 0); + free_detections(dets, nboxes); + free_image(im); + free_image(sized); + if (filename) break; + } +} + +void run_coco(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .2); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + int avg = find_int_arg(argc, argv, "-avg", 1); + if(0==strcmp(argv[2], "test")) test_coco(cfg, weights, filename, thresh); + else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights); + else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, 80, frame_skip, prefix, avg, .5, 0,0,0,0); +} diff --git a/workloads/realworld/pipeline/darknet/examples/darknet.c b/workloads/realworld/pipeline/darknet/examples/darknet.c new file mode 100644 index 0000000000000000000000000000000000000000..b906357dbc8b85475cf57801b276c2858d6a6b38 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/darknet.c @@ -0,0 +1,559 @@ +#include "darknet.h" + +// #include "../../../../common/cupti_add.h" +// #include "../../../../common/cpu_timestamps.h" +// #include "cpu_timestamps.h" + +static uint64_t startCPUTime; +static uint64_t endCPUTime; + +void startCPU() +{ + struct timespec tv; + if (clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + startCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); +} + +void endCPU() +{ + struct timespec tv; + if (clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + + endCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); + // endCPUTimestamp1 = std::chrono::system_clock::now(); + printf("CPU_Times,%lu,%lu,%lu\n", startCPUTime, endCPUTime, endCPUTime - startCPUTime); +} + +#ifdef __cplusplus +extern "C" { +#endif +extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top); +extern void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen); +extern void run_yolo(int argc, char **argv); +extern void run_detector(int argc, char **argv); +extern void run_coco(int argc, char **argv); +extern void run_nightmare(int argc, char **argv); +extern void run_classifier(int argc, char **argv); +extern void run_regressor(int argc, char **argv); +extern void run_segmenter(int argc, char **argv); +extern void run_isegmenter(int argc, char **argv); +extern void run_char_rnn(int argc, char **argv); +extern void run_tag(int argc, char **argv); +extern void run_cifar(int argc, char **argv); +extern void run_go(int argc, char **argv); +extern void run_art(int argc, char **argv); +extern void run_super(int argc, char **argv); +extern void run_lsd(int argc, char **argv); +#ifdef __cplusplus +} +#endif + +void average(int argc, char *argv[]) +{ + char *cfgfile = argv[2]; + char *outfile = argv[3]; + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + network *sum = parse_network_cfg(cfgfile); + + char *weightfile = argv[4]; + load_weights(sum, weightfile); + + int i, j; + int n = argc - 5; + for(i = 0; i < n; ++i){ + weightfile = argv[i+5]; + load_weights(net, weightfile); + for(j = 0; j < net->n; ++j){ + layer *l = net->layers[j]; + layer *out = sum->layers[j]; + if(l->type == CONVOLUTIONAL){ + int num = l->n*l->c*l->size*l->size; + axpy_cpu(l->n, 1, l->biases, 1, out->biases, 1); + axpy_cpu(num, 1, l->weights, 1, out->weights, 1); + if(l->batch_normalize){ + axpy_cpu(l->n, 1, l->scales, 1, out->scales, 1); + axpy_cpu(l->n, 1, l->rolling_mean, 1, out->rolling_mean, 1); + axpy_cpu(l->n, 1, l->rolling_variance, 1, out->rolling_variance, 1); + } + } + if(l->type == CONNECTED){ + axpy_cpu(l->outputs, 1, l->biases, 1, out->biases, 1); + axpy_cpu(l->outputs*l->inputs, 1, l->weights, 1, out->weights, 1); + } + } + } + n = n+1; + for(j = 0; j < net->n; ++j){ + layer *l = sum->layers[j]; + if(l->type == CONVOLUTIONAL){ + int num = l->n*l->c*l->size*l->size; + scal_cpu(l->n, 1./n, l->biases, 1); + scal_cpu(num, 1./n, l->weights, 1); + if(l->batch_normalize){ + scal_cpu(l->n, 1./n, l->scales, 1); + scal_cpu(l->n, 1./n, l->rolling_mean, 1); + scal_cpu(l->n, 1./n, l->rolling_variance, 1); + } + } + if(l->type == CONNECTED){ + scal_cpu(l->outputs, 1./n, l->biases, 1); + scal_cpu(l->outputs*l->inputs, 1./n, l->weights, 1); + } + } + save_weights(sum, outfile); +} + +long numops(network *net) +{ + int i; + long ops = 0; + for(i = 0; i < net->n; ++i){ + layer *l = net->layers[i]; + if(l->type == CONVOLUTIONAL){ + ops += 2l * l->n * l->size*l->size*l->c/l->groups * l->out_h*l->out_w; + } else if(l->type == CONNECTED){ + ops += 2l * l->inputs * l->outputs; + } else if (l->type == RNN){ + ops += 2l * l->input_layer->inputs * l->input_layer->outputs; + ops += 2l * l->self_layer->inputs * l->self_layer->outputs; + ops += 2l * l->output_layer->inputs * l->output_layer->outputs; + } else if (l->type == GRU){ + ops += 2l * l->uz->inputs * l->uz->outputs; + ops += 2l * l->uh->inputs * l->uh->outputs; + ops += 2l * l->ur->inputs * l->ur->outputs; + ops += 2l * l->wz->inputs * l->wz->outputs; + ops += 2l * l->wh->inputs * l->wh->outputs; + ops += 2l * l->wr->inputs * l->wr->outputs; + } else if (l->type == LSTM){ + ops += 2l * l->uf->inputs * l->uf->outputs; + ops += 2l * l->ui->inputs * l->ui->outputs; + ops += 2l * l->ug->inputs * l->ug->outputs; + ops += 2l * l->uo->inputs * l->uo->outputs; + ops += 2l * l->wf->inputs * l->wf->outputs; + ops += 2l * l->wi->inputs * l->wi->outputs; + ops += 2l * l->wg->inputs * l->wg->outputs; + ops += 2l * l->wo->inputs * l->wo->outputs; + } + } + return ops; +} + +void speed(char *cfgfile, int tics) +{ + if (tics == 0) tics = 1000; + network *net = parse_network_cfg(cfgfile); + set_batch_network(net, 1); + int i; + double time=what_time_is_it_now(); + image im = make_image(net->w, net->h, net->c*net->batch); + for(i = 0; i < tics; ++i){ + network_predict(net, im.data); + } + double t = what_time_is_it_now() - time; + long ops = numops(net); + printf("\n%d evals, %f Seconds\n", tics, t); + printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); + printf("FLOPS: %.2f Bn\n", (float)ops/1000000000.*tics/t); + printf("Speed: %f sec/eval\n", t/tics); + printf("Speed: %f Hz\n", tics/t); +} + +void operations(char *cfgfile) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + long ops = numops(net); + printf("Floating Point Operations: %ld\n", ops); + printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); +} + +void oneoff(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + int oldn = net->layers[net->n - 2]->n; + int c = net->layers[net->n - 2]->c; + scal_cpu(oldn*c, .1, net->layers[net->n - 2]->weights, 1); + scal_cpu(oldn, 0, net->layers[net->n - 2]->biases, 1); + net->layers[net->n - 2]->n = 11921; + net->layers[net->n - 2]->biases += 5; + net->layers[net->n - 2]->weights += 5*c; + if(weightfile){ + load_weights(net, weightfile); + } + net->layers[net->n - 2]->biases -= 5; + net->layers[net->n - 2]->weights -= 5*c; + net->layers[net->n - 2]->n = oldn; + printf("%d\n", oldn); + layer *l = net->layers[net->n - 2]; + copy_cpu(l->n/3, l->biases, 1, l->biases + l->n/3, 1); + copy_cpu(l->n/3, l->biases, 1, l->biases + 2*l->n/3, 1); + copy_cpu(l->n/3*l->c, l->weights, 1, l->weights + l->n/3*l->c, 1); + copy_cpu(l->n/3*l->c, l->weights, 1, l->weights + 2*l->n/3*l->c, 1); + *net->seen = 0; + save_weights(net, outfile); +} + +void oneoff2(char *cfgfile, char *weightfile, char *outfile, int l) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights_upto(net, weightfile, 0, net->n); + load_weights_upto(net, weightfile, l, net->n); + } + *net->seen = 0; + save_weights_upto(net, outfile, net->n); +} + +void partial(char *cfgfile, char *weightfile, char *outfile, int max) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 1); + save_weights_upto(net, outfile, max); +} + +void print_weights(char *cfgfile, char *weightfile, int n) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 1); + layer *l = net->layers[n]; + int i, j; + //printf("["); + for(i = 0; i < l->n; ++i){ + //printf("["); + for(j = 0; j < l->size*l->size*l->c; ++j){ + //if(j > 0) printf(","); + printf("%g ", l->weights[i*l->size*l->size*l->c + j]); + } + printf("\n"); + //printf("]%s\n", (i == l->n-1)?"":","); + } + //printf("]"); +} + +void rescale_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer *l = net->layers[i]; + if(l->type == CONVOLUTIONAL){ + rescale_weights(l, 2, -.5); + break; + } + } + save_weights(net, outfile); +} + +void rgbgr_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer *l = net->layers[i]; + if(l->type == CONVOLUTIONAL){ + rgbgr_weights(l); + break; + } + } + save_weights(net, outfile); +} + +void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer *l = net->layers[i]; + if (l->type == CONVOLUTIONAL && l->batch_normalize) { + denormalize_convolutional_layer(l); + } + if (l->type == CONNECTED && l->batch_normalize) { + denormalize_connected_layer(l); + } + if (l->type == GRU && l->batch_normalize) { + denormalize_connected_layer(l->input_z_layer); + denormalize_connected_layer(l->input_r_layer); + denormalize_connected_layer(l->input_h_layer); + denormalize_connected_layer(l->state_z_layer); + denormalize_connected_layer(l->state_r_layer); + denormalize_connected_layer(l->state_h_layer); + } + } + save_weights(net, outfile); +} + +layer* normalize_layer(layer *l, int n) +{ + int j; + l->batch_normalize=1; + l->scales = (float *) calloc(n, sizeof(float)); + for(j = 0; j < n; ++j){ + l->scales[j] = 1; + } + l->rolling_mean = (float *) calloc(n, sizeof(float)); + l->rolling_variance = (float *) calloc(n, sizeof(float)); + return l; +} + +void normalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer *l = net->layers[i]; + if(l->type == CONVOLUTIONAL && !l->batch_normalize){ + net->layers[i] = normalize_layer(l, l->n); + } + if (l->type == CONNECTED && !l->batch_normalize) { + net->layers[i] = normalize_layer(l, l->outputs); + } + if (l->type == GRU && l->batch_normalize) { + l->input_z_layer = normalize_layer(l->input_z_layer, l->input_z_layer->outputs); + l->input_r_layer = normalize_layer(l->input_r_layer, l->input_r_layer->outputs); + l->input_h_layer = normalize_layer(l->input_h_layer, l->input_h_layer->outputs); + l->state_z_layer = normalize_layer(l->state_z_layer, l->state_z_layer->outputs); + l->state_r_layer = normalize_layer(l->state_r_layer, l->state_r_layer->outputs); + l->state_h_layer = normalize_layer(l->state_h_layer, l->state_h_layer->outputs); + net->layers[i]->batch_normalize=1; + } + } + save_weights(net, outfile); +} + +void statistics_net(char *cfgfile, char *weightfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer *l = net->layers[i]; + if (l->type == CONNECTED && l->batch_normalize) { + printf("Connected Layer %d\n", i); + statistics_connected_layer(l); + } + if (l->type == GRU && l->batch_normalize) { + printf("GRU Layer %d\n", i); + printf("Input Z\n"); + statistics_connected_layer(l->input_z_layer); + printf("Input R\n"); + statistics_connected_layer(l->input_r_layer); + printf("Input H\n"); + statistics_connected_layer(l->input_h_layer); + printf("State Z\n"); + statistics_connected_layer(l->state_z_layer); + printf("State R\n"); + statistics_connected_layer(l->state_r_layer); + printf("State H\n"); + statistics_connected_layer(l->state_h_layer); + } + printf("\n"); + } +} + +void denormalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer *l = net->layers[i]; + if ((l->type == DECONVOLUTIONAL || l->type == CONVOLUTIONAL) && l->batch_normalize) { + denormalize_convolutional_layer(l); + net->layers[i]->batch_normalize=0; + } + if (l->type == CONNECTED && l->batch_normalize) { + denormalize_connected_layer(l); + net->layers[i]->batch_normalize=0; + } + if (l->type == GRU && l->batch_normalize) { + denormalize_connected_layer(l->input_z_layer); + denormalize_connected_layer(l->input_r_layer); + denormalize_connected_layer(l->input_h_layer); + denormalize_connected_layer(l->state_z_layer); + denormalize_connected_layer(l->state_r_layer); + denormalize_connected_layer(l->state_h_layer); + l->input_z_layer->batch_normalize = 0; + l->input_r_layer->batch_normalize = 0; + l->input_h_layer->batch_normalize = 0; + l->state_z_layer->batch_normalize = 0; + l->state_r_layer->batch_normalize = 0; + l->state_h_layer->batch_normalize = 0; + net->layers[i]->batch_normalize=0; + } + } + save_weights(net, outfile); +} + +void mkimg(char *cfgfile, char *weightfile, int h, int w, int num, char *prefix) +{ + network *net = load_network(cfgfile, weightfile, 0); + image *ims = get_weights(net->layers[0]); + int n = net->layers[0]->n; + int z; + for(z = 0; z < num; ++z){ + image im = make_image(h, w, 3); + fill_image(im, .5); + int i; + for(i = 0; i < 100; ++i){ + image r = copy_image(ims[rand()%n]); + rotate_image_cw(r, rand()%4); + random_distort_image(r, 1, 1.5, 1.5); + int dx = rand()%(w-r.w); + int dy = rand()%(h-r.h); + ghost_image(r, im, dx, dy); + free_image(r); + } + char buff[256]; + sprintf(buff, "%s/gen_%d", prefix, z); + save_image(im, buff); + free_image(im); + } +} + +void visualize(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + visualize_network(net); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsc() +{ + unsigned long a, d; + + __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); + + return (a | (d << 32)); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsp() { + struct timespec tms; + if (clock_gettime(CLOCK_REALTIME, &tms)) { + return -1; + } + unsigned long ns = tms.tv_sec * 1000000000; + ns += tms.tv_nsec; + return ns; +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + //test_resize("data/bad.jpg"); + //test_box(); + //test_convolutional_layer(); + if(argc < 2){ + fprintf(stderr, "usage: %s \n", argv[0]); + return 0; + } + gpu_index = find_int_arg(argc, argv, "-i", GPU_DEVICE); + if(find_arg(argc, argv, "-nogpu")) { + gpu_index = -1; + } + +#ifndef GPU + gpu_index = -1; +#else + if(gpu_index >= 0){ + cuda_set_device(gpu_index); + } + initTrace(); +#endif + + if (0 == strcmp(argv[1], "average")){ + average(argc, argv); + } else if (0 == strcmp(argv[1], "yolo")){ + run_yolo(argc, argv); + } else if (0 == strcmp(argv[1], "super")){ + run_super(argc, argv); + } else if (0 == strcmp(argv[1], "lsd")){ + run_lsd(argc, argv); + } else if (0 == strcmp(argv[1], "detector")){ + run_detector(argc, argv); + } else if (0 == strcmp(argv[1], "detect")){ + float thresh = find_float_arg(argc, argv, "-thresh", .5); + char *filename = (argc > 4) ? argv[4]: 0; + char *outfile = find_char_arg(argc, argv, "-out", 0); + int fullscreen = find_arg(argc, argv, "-fullscreen"); + char *value = getenv("UVMAsyncBench_BASE"); + char buff[256]; + sprintf(buff, "%s/workloads/realworld/standard/darknet/cfg/coco.data", value); + test_detector(buff, argv[2], argv[3], filename, thresh, .5, outfile, fullscreen); + } else if (0 == strcmp(argv[1], "cifar")){ + run_cifar(argc, argv); + } else if (0 == strcmp(argv[1], "go")){ + run_go(argc, argv); + } else if (0 == strcmp(argv[1], "rnn")){ + run_char_rnn(argc, argv); + } else if (0 == strcmp(argv[1], "coco")){ + run_coco(argc, argv); + } else if (0 == strcmp(argv[1], "classify")){ + predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5); + } else if (0 == strcmp(argv[1], "classifier")){ + run_classifier(argc, argv); + } else if (0 == strcmp(argv[1], "regressor")){ + run_regressor(argc, argv); + } else if (0 == strcmp(argv[1], "isegmenter")){ + run_isegmenter(argc, argv); + } else if (0 == strcmp(argv[1], "segmenter")){ + run_segmenter(argc, argv); + } else if (0 == strcmp(argv[1], "art")){ + run_art(argc, argv); + } else if (0 == strcmp(argv[1], "tag")){ + run_tag(argc, argv); + } else if (0 == strcmp(argv[1], "3d")){ + composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0); + } else if (0 == strcmp(argv[1], "test")){ + test_resize(argv[2]); + } else if (0 == strcmp(argv[1], "nightmare")){ + run_nightmare(argc, argv); + } else if (0 == strcmp(argv[1], "rgbgr")){ + rgbgr_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "reset")){ + reset_normalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "denormalize")){ + denormalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "statistics")){ + statistics_net(argv[2], argv[3]); + } else if (0 == strcmp(argv[1], "normalize")){ + normalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "rescale")){ + rescale_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "ops")){ + operations(argv[2]); + } else if (0 == strcmp(argv[1], "speed")){ + speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0); + } else if (0 == strcmp(argv[1], "oneoff")){ + oneoff(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "oneoff2")){ + oneoff2(argv[2], argv[3], argv[4], atoi(argv[5])); + } else if (0 == strcmp(argv[1], "print")){ + print_weights(argv[2], argv[3], atoi(argv[4])); + } else if (0 == strcmp(argv[1], "partial")){ + partial(argv[2], argv[3], argv[4], atoi(argv[5])); + } else if (0 == strcmp(argv[1], "average")){ + average(argc, argv); + } else if (0 == strcmp(argv[1], "visualize")){ + visualize(argv[2], (argc > 3) ? argv[3] : 0); + } else if (0 == strcmp(argv[1], "mkimg")){ + mkimg(argv[2], argv[3], atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), argv[7]); + } else if (0 == strcmp(argv[1], "imtest")){ + test_resize(argv[2]); + } else { + fprintf(stderr, "Not an option: %s\n", argv[1]); + } + finiTrace(); + return 0; +} + diff --git a/workloads/realworld/pipeline/darknet/examples/detector-scipy-opencv.py b/workloads/realworld/pipeline/darknet/examples/detector-scipy-opencv.py new file mode 100644 index 0000000000000000000000000000000000000000..3bfc591312ad89ff2b026ffac0daecd461c80447 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/detector-scipy-opencv.py @@ -0,0 +1,56 @@ +# Stupid python path shit. +# Instead just add darknet.py to somewhere in your python path +# OK actually that might not be a great idea, idk, work in progress +# Use at your own risk. or don't, i don't care + +from scipy.misc import imread +import cv2 + +def array_to_image(arr): + arr = arr.transpose(2,0,1) + c = arr.shape[0] + h = arr.shape[1] + w = arr.shape[2] + arr = (arr/255.0).flatten() + data = dn.c_array(dn.c_float, arr) + im = dn.IMAGE(w,h,c,data) + return im + +def detect2(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): + boxes = dn.make_boxes(net) + probs = dn.make_probs(net) + num = dn.num_boxes(net) + dn.network_detect(net, image, thresh, hier_thresh, nms, boxes, probs) + res = [] + for j in range(num): + for i in range(meta.classes): + if probs[j][i] > 0: + res.append((meta.names[i], probs[j][i], (boxes[j].x, boxes[j].y, boxes[j].w, boxes[j].h))) + res = sorted(res, key=lambda x: -x[1]) + dn.free_ptrs(dn.cast(probs, dn.POINTER(dn.c_void_p)), num) + return res + +import sys, os +sys.path.append(os.path.join(os.getcwd(),'python/')) + +import darknet as dn + +# Darknet +net = dn.load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0) +meta = dn.load_meta("cfg/coco.data") +r = dn.detect(net, meta, "data/dog.jpg") +print r + +# scipy +arr= imread('data/dog.jpg') +im = array_to_image(arr) +r = detect2(net, meta, im) +print r + +# OpenCV +arr = cv2.imread('data/dog.jpg') +im = array_to_image(arr) +dn.rgbgr_image(im) +r = detect2(net, meta, im) +print r + diff --git a/workloads/realworld/pipeline/darknet/examples/detector.c b/workloads/realworld/pipeline/darknet/examples/detector.c new file mode 100644 index 0000000000000000000000000000000000000000..fb0bae61f5ccb019ee4fdd37a7135991fef94b69 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/detector.c @@ -0,0 +1,985 @@ +#include "darknet.h" + +static int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; + + +void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + list *options = read_data_cfg(datacfg); + // Ruihao + char *train_images_cfg = option_find_str(options, "train", "data/train.list"); + char *backup_directory_cfg = option_find_str(options, "backup", "/backup/"); + + char *env = getenv("UVMAsyncBench_BASE"); + char train_images[256]; + char backup_directory[256]; + sprintf(train_images, "%s/%s", env, train_images_cfg); + sprintf(backup_directory, "%s/%s", env, backup_directory_cfg); + // Ruihao + + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network **nets = calloc(ngpus, sizeof(network)); + + srand(time(0)); + int seed = rand(); + int i; + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + data train, buffer; + + layer *l = net->layers[net->n - 1]; + + int classes = l->classes; + float jitter = l->jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = get_base_args(net); + args.coords = l->coords; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = l->max_boxes; + args.d = &buffer; + args.type = DETECTION_DATA; + //args.type = INSTANCE_DATA; + args.threads = 64; + + pthread_t load_thread = load_data(args); + double time; + int count = 0; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + if(l->random && count++%10 == 0){ + printf("Resizing\n"); + int dim = (rand() % 10 + 10) * 32; + if (get_current_batch(net)+200 > net->max_batches) dim = 608; + //int dim = (rand() % 4 + 16) * 32; + printf("%d\n", dim); + args.w = dim; + args.h = dim; + + pthread_join(load_thread, 0); + train = buffer; + free_data(train); + load_thread = load_data(args); + + #pragma omp parallel for + for(i = 0; i < ngpus; ++i){ + resize_network(nets[i], dim, dim); + } + net = nets[0]; + } + time=what_time_is_it_now(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + /* + int k; + for(k = 0; k < l.max_boxes; ++k){ + box b = float_to_box(train.y.vals[10] + 1 + k*5); + if(!b.x) break; + printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h); + } + */ + /* + int zz; + for(zz = 0; zz < train.X.cols; ++zz){ + image im = float_to_image(net->w, net->h, 3, train.X.vals[zz]); + int k; + for(k = 0; k < l.max_boxes; ++k){ + box b = float_to_box(train.y.vals[zz] + k*5, 1); + printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + draw_bbox(im, b, 1, 1,0,0); + } + show_image(im, "truth11"); + cvWaitKey(0); + save_image(im, "truth11"); + } + */ + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + + time=what_time_is_it_now(); + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + i = get_current_batch(net); + printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, i*imgs); + if(i%100==0){ +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + if(i%10000==0 || (i < 1000 && i%100 == 0)){ +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void infer_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + list *options = read_data_cfg(datacfg); + // Ruihao + char *test_images_cfg = option_find_str(options, "test", "data/test.list"); + + char *env = getenv("UVMAsyncBench_BASE"); + char test_images[256]; + sprintf(test_images, "%s/%s", env, test_images_cfg); + // Ruihao + + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network **nets = calloc(ngpus, sizeof(network)); + + srand(time(0)); + int seed = rand(); + int i; + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + data val, buffer; + + layer *l = net->layers[net->n - 1]; + + int classes = l->classes; + float jitter = l->jitter; + + list *plist = get_paths(test_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = get_base_args(net); + args.coords = l->coords; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = l->max_boxes; + args.d = &buffer; + args.type = DETECTION_DATA; + //args.type = INSTANCE_DATA; + args.threads = 64; + + pthread_t load_thread = load_data(args); + double time; + int count = 0; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + if(l->random && count++%10 == 0){ + printf("Resizing\n"); + int dim = (rand() % 10 + 10) * 32; + if (get_current_batch(net)+200 > net->max_batches) dim = 608; + //int dim = (rand() % 4 + 16) * 32; + printf("%d\n", dim); + args.w = dim; + args.h = dim; + + pthread_join(load_thread, 0); + val = buffer; + free_data(val); + load_thread = load_data(args); + + #pragma omp parallel for + for(i = 0; i < ngpus; ++i){ + resize_network(nets[i], dim, dim); + } + net = nets[0]; + } + time=what_time_is_it_now(); + pthread_join(load_thread, 0); + val = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + + time=what_time_is_it_now(); + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = val_network(net, val); + } else { + loss = train_networks(nets, ngpus, val, 4); + } +#else + loss = train_network(net, val); +#endif + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + i = get_current_batch(net); + printf("%ld: %lf seconds, %d images\n", get_current_batch(net), what_time_is_it_now()-time, i*imgs); + free_data(val); + } +} + + +static int get_coco_image_id(char *filename) +{ + char *p = strrchr(filename, '/'); + char *c = strrchr(filename, '_'); + if(c) p = c; + return atoi(p+1); +} + +static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_boxes, int classes, int w, int h) +{ + int i, j; + int image_id = get_coco_image_id(image_path); + for(i = 0; i < num_boxes; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + float bx = xmin; + float by = ymin; + float bw = xmax - xmin; + float bh = ymax - ymin; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); + } + } +} + +void print_detector_detections(FILE **fps, char *id, detection *dets, int total, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2. + 1; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2. + 1; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2. + 1; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2. + 1; + + if (xmin < 1) xmin = 1; + if (ymin < 1) ymin = 1; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], + xmin, ymin, xmax, ymax); + } + } +} + +void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + int class_ = j; + if (dets[i].prob[class_]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j+1, dets[i].prob[class_], + xmin, ymin, xmax, ymax); + } + } +} + +void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char *outfile) +{ + int j; + list *options = read_data_cfg(datacfg); + char *valid_images = option_find_str(options, "valid", "data/train.list"); + char *name_list = option_find_str(options, "names", "data/names.list"); + char *prefix = option_find_str(options, "results", "results"); + char **names = get_labels(name_list); + char *mapf = option_find_str(options, "map", 0); + int *map = 0; + if (mapf) map = read_map(mapf); + + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 2); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths(valid_images); + char **paths = (char **)list_to_array(plist); + + layer *l = net->layers[net->n-1]; + int classes = l->classes; + + char buff[1024]; + char *type = option_find_str(options, "eval", "voc"); + FILE *fp = 0; + FILE **fps = 0; + int coco = 0; + int imagenet = 0; + if(0==strcmp(type, "coco")){ + if(!outfile) outfile = "coco_results"; + snprintf(buff, 1024, "%s/%s.json", prefix, outfile); + fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + coco = 1; + } else if(0==strcmp(type, "imagenet")){ + if(!outfile) outfile = "imagenet-detection"; + snprintf(buff, 1024, "%s/%s.txt", prefix, outfile); + fp = fopen(buff, "w"); + imagenet = 1; + classes = 200; + } else { + if(!outfile) outfile = "comp4_det_test_"; + fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); + fps[j] = fopen(buff, "w"); + } + } + + int m = plist->size; + int i=0; + int t; + + float thresh = .005; + float nms = .45; + + int nthreads = 4; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + image input = make_image(net->w, net->h, net->c*2); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + //args.type = IMAGE_DATA; + args.type = LETTERBOX_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + double start = what_time_is_it_now(); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data, 1); + flip_image(val_resized[t]); + copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data + net->w*net->h*net->c, 1); + + network_predict(net, input.data); + int w = val[t].w; + int h = val[t].h; + int num = 0; + detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &num); + if (nms) do_nms_sort(dets, num, classes, nms); + if (coco){ + print_cocos(fp, path, dets, num, classes, w, h); + } else if (imagenet){ + print_imagenet_detections(fp, i+t-nthreads+1, dets, num, classes, w, h); + } else { + print_detector_detections(fps, id, dets, num, classes, w, h); + } + free_detections(dets, num); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + for(j = 0; j < classes; ++j){ + if(fps) fclose(fps[j]); + } + if(coco){ + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start); +} + + +void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile) +{ + int j; + list *options = read_data_cfg(datacfg); + // char *valid_images = option_find_str(options, "valid", "data/train.list"); + // char *name_list = option_find_str(options, "names", "data/names.list"); + + // Ruihao + char *valid_images_cfg = option_find_str(options, "train", "data/train.list"); + char *name_list_cfg = option_find_str(options, "names", "data/names.list"); + + char *env = getenv("UVMAsyncBench_BASE"); + char valid_images[256]; + char name_list[256]; + sprintf(valid_images, "%s/%s", env, valid_images_cfg); + sprintf(name_list, "%s/%s", env, name_list_cfg); + // Ruihao + + char *prefix = option_find_str(options, "results", "results"); + char **names = get_labels(name_list); + char *mapf = option_find_str(options, "map", 0); + int *map = 0; + if (mapf) map = read_map(mapf); + + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths(valid_images); + char **paths = (char **)list_to_array(plist); + + layer *l = net->layers[net->n-1]; + int classes = l->classes; + + char buff[1024]; + char *type = option_find_str(options, "eval", "voc"); + FILE *fp = 0; + FILE **fps = 0; + int coco = 0; + int imagenet = 0; + if(0==strcmp(type, "coco")){ + if(!outfile) outfile = "coco_results"; + snprintf(buff, 1024, "%s/%s.json", prefix, outfile); + fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + coco = 1; + } else if(0==strcmp(type, "imagenet")){ + if(!outfile) outfile = "imagenet-detection"; + snprintf(buff, 1024, "%s/%s.txt", prefix, outfile); + fp = fopen(buff, "w"); + imagenet = 1; + classes = 200; + } else { + if(!outfile) outfile = "comp4_det_test_"; + fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); + fps[j] = fopen(buff, "w"); + } + } + + + int m = plist->size; + int i=0; + int t; + + float thresh = .005; + float nms = .45; + + int nthreads = 4; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + //args.type = IMAGE_DATA; + args.type = LETTERBOX_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + double start = what_time_is_it_now(); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &nboxes); + if (nms) do_nms_sort(dets, nboxes, classes, nms); + if (coco){ + print_cocos(fp, path, dets, nboxes, classes, w, h); + } else if (imagenet){ + print_imagenet_detections(fp, i+t-nthreads+1, dets, nboxes, classes, w, h); + } else { + print_detector_detections(fps, id, dets, nboxes, classes, w, h); + } + free_detections(dets, nboxes); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + for(j = 0; j < classes; ++j){ + if(fps) fclose(fps[j]); + } + if(coco){ + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start); +} + +void validate_detector_recall(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths("data/coco_val_5k.list"); + char **paths = (char **)list_to_array(plist); + + layer *l = net->layers[net->n-1]; + + int j, k; + + int m = plist->size; + int i=0; + + float thresh = .001; + float iou_thresh = .5; + float nms = .4; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + int nboxes = 0; + detection *dets = get_network_boxes(net, sized.w, sized.h, thresh, .5, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, nboxes, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < nboxes; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < l->w*l->h*l->n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free(id); + free_image(orig); + free_image(sized); + } +} + + +void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen) +{ + list *options = read_data_cfg(datacfg); + // Ruihao + char *name_list_cfg = option_find_str(options, "names", "data/names.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char name_list[256]; + sprintf(name_list, "%s/%s", env, name_list_cfg); + // Ruihao + char **names = get_labels(name_list); + + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + double time; + char buff[256]; + char *input = buff; + float nms=.45; + while(1){ + printf("fine name is %s\n", filename); + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = letterbox_image(im, net->w, net->h); + //image sized = resize_image(im, net->w, net->h); + //image sized2 = resize_max(im, net->w); + //image sized = crop_image(sized2, -((net->w - sized2.w)/2), -((net->h - sized2.h)/2), net->w, net->h); + //resize_network(net, sized.w, sized.h); + layer *l = net->layers[net->n-1]; + + + float *X = sized.data; + time=what_time_is_it_now(); + startCPU(); + network_predict(net, X); + endCPU(); + printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time); + int nboxes = 0; + detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes); + //printf("%d\n", nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l->classes, nms); + draw_detections(im, dets, nboxes, thresh, names, alphabet, l->classes); + free_detections(dets, nboxes); + if(outfile){ + save_image(im, outfile); + } + else{ + save_image(im, "predictions"); +#ifdef OPENCV + make_window("predictions", 512, 512, 0); + show_image(im, "predictions", 0); +#endif + } + + free_image(im); + free_image(sized); + if (filename) break; + } +} + +/* +void censor_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + CvCapture * cap; + + int w = 1280; + int h = 720; + + if(filename){ + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + } + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + cvNamedWindow(base, CV_WINDOW_NORMAL); + cvResizeWindow(base, 512, 512); + float fps = 0; + int i; + float nms = .45; + + while(1){ + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + layer l = net->layers[net->n-1]; + + float *X = in_s.data; + network_predict(net, X); + int nboxes = 0; + detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 0, &nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + + for(i = 0; i < nboxes; ++i){ + if(dets[i].prob[class] > thresh){ + box b = dets[i].bbox; + int left = b.x-b.w/2.; + int top = b.y-b.h/2.; + censor_image(in, left, top, b.w, b.h); + } + } + show_image(in, base); + cvWaitKey(10); + free_detections(dets, nboxes); + + + free_image(in_s); + free_image(in); + + + float curr = 0; + fps = .9*fps + .1*curr; + for(i = 0; i < skip; ++i){ + image in = get_image_from_stream(cap); + free_image(in); + } + } + #endif +} + +void extract_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + CvCapture * cap; + + int w = 1280; + int h = 720; + + if(filename){ + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + } + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + cvNamedWindow(base, CV_WINDOW_NORMAL); + cvResizeWindow(base, 512, 512); + float fps = 0; + int i; + int count = 0; + float nms = .45; + + while(1){ + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + layer l = net->layers[net->n-1]; + + show_image(in, base); + + int nboxes = 0; + float *X = in_s.data; + network_predict(net, X); + detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 1, &nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + + for(i = 0; i < nboxes; ++i){ + if(dets[i].prob[class] > thresh){ + box b = dets[i].bbox; + int size = b.w*in.w > b.h*in.h ? b.w*in.w : b.h*in.h; + int dx = b.x*in.w-size/2.; + int dy = b.y*in.h-size/2.; + image bim = crop_image(in, dx, dy, size, size); + char buff[2048]; + sprintf(buff, "results/extract/%07d", count); + ++count; + save_image(bim, buff); + free_image(bim); + } + } + free_detections(dets, nboxes); + + + free_image(in_s); + free_image(in); + + + float curr = 0; + fps = .9*fps + .1*curr; + for(i = 0; i < skip; ++i){ + image in = get_image_from_stream(cap); + free_image(in); + } + } + #endif +} +*/ + +/* +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets) +{ + network_predict_image(net, im); + layer l = net->layers[net->n-1]; + int nboxes = num_boxes(net); + fill_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 0, dets); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); +} +*/ + +void run_detector(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .5); + float hier_thresh = find_float_arg(argc, argv, "-hier", .5); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + int avg = find_int_arg(argc, argv, "-avg", 3); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + char *outfile = find_char_arg(argc, argv, "-out", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int clear = find_arg(argc, argv, "-clear"); + int fullscreen = find_arg(argc, argv, "-fullscreen"); + int width = find_int_arg(argc, argv, "-w", 0); + int height = find_int_arg(argc, argv, "-h", 0); + int fps = find_int_arg(argc, argv, "-fps", 0); + //int class = find_int_arg(argc, argv, "-class", 0); + + char *datacfg = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen); + else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile); + else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile); + else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) { + list *options = read_data_cfg(datacfg); + int classes = option_find_int(options, "classes", 20); + char *name_list = option_find_str(options, "names", "data/names.list"); + char **names = get_labels(name_list); + demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen); + } + //else if(0==strcmp(argv[2], "extract")) extract_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); + //else if(0==strcmp(argv[2], "censor")) censor_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); +} diff --git a/workloads/realworld/pipeline/darknet/examples/detector.py b/workloads/realworld/pipeline/darknet/examples/detector.py new file mode 100644 index 0000000000000000000000000000000000000000..40bb365e68211c513db9d63847ac95070f5eab98 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/detector.py @@ -0,0 +1,27 @@ +# Stupid python path shit. +# Instead just add darknet.py to somewhere in your python path +# OK actually that might not be a great idea, idk, work in progress +# Use at your own risk. or don't, i don't care + +import sys, os +sys.path.append(os.path.join(os.getcwd(),'python/')) + +import darknet as dn +import pdb + +dn.set_gpu(0) +net = dn.load_net("cfg/yolo-thor.cfg", "/home/pjreddie/backup/yolo-thor_final.weights", 0) +meta = dn.load_meta("cfg/thor.data") +r = dn.detect(net, meta, "data/bedroom.jpg") +print r + +# And then down here you could detect a lot more images like: +r = dn.detect(net, meta, "data/eagle.jpg") +print r +r = dn.detect(net, meta, "data/giraffe.jpg") +print r +r = dn.detect(net, meta, "data/horses.jpg") +print r +r = dn.detect(net, meta, "data/person.jpg") +print r + diff --git a/workloads/realworld/pipeline/darknet/examples/dice.c b/workloads/realworld/pipeline/darknet/examples/dice.c new file mode 100644 index 0000000000000000000000000000000000000000..f56d76c0bb66c7f630ba1c4d1dc9195398b87cfb --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/dice.c @@ -0,0 +1,116 @@ +#include "darknet.h" + +char *dice_labels[] = {"face1","face2","face3","face4","face5","face6"}; + +void train_dice(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + int i = *net.seen/imgs; + char **labels = dice_labels; + list *plist = get_paths("data/dice/dice.train.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + while(1){ + ++i; + time=clock(); + data train = load_data_old(paths, imgs, plist->size, labels, 6, net.w, net.h); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); + free_data(train); + if((i % 100) == 0) net.learning_rate *= .1; + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, i); + save_weights(net, buff); + } + } +} + +void validate_dice(char *filename, char *weightfile) +{ + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + + char **labels = dice_labels; + list *plist = get_paths("data/dice/dice.val.list"); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + data val = load_data_old(paths, m, 0, labels, 6, net.w, net.h); + float *acc = network_accuracies(net, val, 2); + printf("Validation Accuracy: %f, %d images\n", acc[0], m); + free_data(val); +} + +void test_dice(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + int i = 0; + char **names = dice_labels; + char buff[256]; + char *input = buff; + int indexes[6]; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, net.w, net.h); + float *X = im.data; + float *predictions = network_predict(net, X); + top_predictions(net, 6, indexes); + for(i = 0; i < 6; ++i){ + int index = indexes[i]; + printf("%s: %f\n", names[index], predictions[index]); + } + free_image(im); + if (filename) break; + } +} + +void run_dice(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "test")) test_dice(cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_dice(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_dice(cfg, weights); +} + diff --git a/workloads/realworld/pipeline/darknet/examples/go.c b/workloads/realworld/pipeline/darknet/examples/go.c new file mode 100644 index 0000000000000000000000000000000000000000..688579dcb3a3e35e9a79b8fb8aa684f28f44290d --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/go.c @@ -0,0 +1,1370 @@ +#include "darknet.h" + +#include +#include +#include + +int inverted = 1; +int noi = 1; +static const int nind = 10; +int legal_go(float *b, float *ko, int p, int r, int c); +int check_ko(float *x, float *ko); + +typedef struct { + char **data; + int n; +} moves; + +char *fgetgo(FILE *fp) +{ + if(feof(fp)) return 0; + size_t size = 96; + char *line = malloc(size*sizeof(char)); + if(size != fread(line, sizeof(char), size, fp)){ + free(line); + return 0; + } + + return line; +} + +moves load_go_moves(char *filename) +{ + moves m; + m.n = 128; + m.data = calloc(128, sizeof(char*)); + FILE *fp = fopen(filename, "rb"); + int count = 0; + char *line = 0; + while ((line = fgetgo(fp))) { + if (count >= m.n) { + m.n *= 2; + m.data = realloc(m.data, m.n*sizeof(char*)); + } + m.data[count] = line; + ++count; + } + printf("%d\n", count); + m.n = count; + m.data = realloc(m.data, count*sizeof(char*)); + return m; +} + +void string_to_board(char *s, float *board) +{ + int i, j; + memset(board, 0, 2*19*19*sizeof(float)); + int count = 0; + for(i = 0; i < 91; ++i){ + char c = s[i]; + for(j = 0; j < 4; ++j){ + int me = (c >> (2*j)) & 1; + int you = (c >> (2*j + 1)) & 1; + if (me) board[count] = 1; + else if (you) board[count + 19*19] = 1; + ++count; + if(count >= 19*19) break; + } + } +} + +void board_to_string(char *s, float *board) +{ + int i, j; + memset(s, 0, (19*19/4+1)*sizeof(char)); + int count = 0; + for(i = 0; i < 91; ++i){ + for(j = 0; j < 4; ++j){ + int me = (board[count] == 1); + int you = (board[count + 19*19] == 1); + if (me) s[i] = s[i] | (1<<(2*j)); + if (you) s[i] = s[i] | (1<<(2*j + 1)); + ++count; + if(count >= 19*19) break; + } + } +} + +static int occupied(float *b, int i) +{ + if (b[i]) return 1; + if (b[i+19*19]) return -1; + return 0; +} + +data random_go_moves(moves m, int n) +{ + data d = {0}; + d.X = make_matrix(n, 19*19*3); + d.y = make_matrix(n, 19*19+2); + int i, j; + for(i = 0; i < n; ++i){ + float *board = d.X.vals[i]; + float *label = d.y.vals[i]; + char *b = m.data[rand()%m.n]; + int player = b[0] - '0'; + int result = b[1] - '0'; + int row = b[2]; + int col = b[3]; + string_to_board(b+4, board); + if(player > 0) for(j = 0; j < 19*19; ++j) board[19*19*2 + j] = 1; + label[19*19+1] = (player==result); + if(row >= 19 || col >= 19){ + label[19*19] = 1; + } else { + label[col + 19*row] = 1; + if(occupied(board, col + 19*row)) printf("hey\n"); + } + + int flip = rand()%2; + int rotate = rand()%4; + image in = float_to_image(19, 19, 3, board); + image out = float_to_image(19, 19, 1, label); + if(flip){ + flip_image(in); + flip_image(out); + } + rotate_image_cw(in, rotate); + rotate_image_cw(out, rotate); + } + return d; +} + + +void train_go(char *cfgfile, char *weightfile, char *filename, int *gpus, int ngpus, int clear) +{ + int i; + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + network *net = nets[0]; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + + char buff[256]; + moves m = load_go_moves(filename); + //moves m = load_go_moves("games.txt"); + + int N = m.n; + printf("Moves: %d\n", N); + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time=what_time_is_it_now(); + + data train = random_go_moves(m, net->batch*net->subdivisions*ngpus); + printf("Loaded: %lf seconds\n", what_time_is_it_now() - time); + time=what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 10); + } +#else + loss = train_network(net, train); +#endif + free_data(train); + + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory,base, epoch); + save_weights(net, buff); + + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + if(get_current_batch(net)%10000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_%ld.backup",backup_directory,base,get_current_batch(net)); + save_weights(net, buff); + } + } + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free(base); +} + +static void propagate_liberty(float *board, int *lib, int *visited, int row, int col, int side) +{ + if (row < 0 || row > 18 || col < 0 || col > 18) return; + int index = row*19 + col; + if (occupied(board,index) != side) return; + if (visited[index]) return; + visited[index] = 1; + lib[index] += 1; + propagate_liberty(board, lib, visited, row+1, col, side); + propagate_liberty(board, lib, visited, row-1, col, side); + propagate_liberty(board, lib, visited, row, col+1, side); + propagate_liberty(board, lib, visited, row, col-1, side); +} + + +static int *calculate_liberties(float *board) +{ + int *lib = calloc(19*19, sizeof(int)); + int visited[19*19]; + int i, j; + for(j = 0; j < 19; ++j){ + for(i = 0; i < 19; ++i){ + memset(visited, 0, 19*19*sizeof(int)); + int index = j*19 + i; + if(!occupied(board,index)){ + if ((i > 0) && occupied(board,index - 1)) propagate_liberty(board, lib, visited, j, i-1, occupied(board,index-1)); + if ((i < 18) && occupied(board,index + 1)) propagate_liberty(board, lib, visited, j, i+1, occupied(board,index+1)); + if ((j > 0) && occupied(board,index - 19)) propagate_liberty(board, lib, visited, j-1, i, occupied(board,index-19)); + if ((j < 18) && occupied(board,index + 19)) propagate_liberty(board, lib, visited, j+1, i, occupied(board,index+19)); + } + } + } + return lib; +} + +void print_board(FILE *stream, float *board, int player, int *indexes) +{ + int i,j,n; + fprintf(stream, " "); + for(i = 0; i < 19; ++i){ + fprintf(stream, "%c ", 'A' + i + 1*(i > 7 && noi)); + } + fprintf(stream, "\n"); + for(j = 0; j < 19; ++j){ + fprintf(stream, "%2d", (inverted) ? 19-j : j+1); + for(i = 0; i < 19; ++i){ + int index = j*19 + i; + if(indexes){ + int found = 0; + for(n = 0; n < nind; ++n){ + if(index == indexes[n]){ + found = 1; + /* + if(n == 0) fprintf(stream, "\uff11"); + else if(n == 1) fprintf(stream, "\uff12"); + else if(n == 2) fprintf(stream, "\uff13"); + else if(n == 3) fprintf(stream, "\uff14"); + else if(n == 4) fprintf(stream, "\uff15"); + */ + fprintf(stream, " %d", n+1); + } + } + if(found) continue; + } + //if(board[index]*-swap > 0) fprintf(stream, "\u25C9 "); + //else if(board[index]*-swap < 0) fprintf(stream, "\u25EF "); + if (occupied(board, index) == player) fprintf(stream, " X"); + else if (occupied(board, index) ==-player) fprintf(stream, " O"); + else fprintf(stream, " ."); + } + fprintf(stream, "\n"); + } +} + +void flip_board(float *board) +{ + int i; + for(i = 0; i < 19*19; ++i){ + float swap = board[i]; + board[i] = board[i+19*19]; + board[i+19*19] = swap; + board[i+19*19*2] = 1-board[i+19*19*2]; + } +} + +float predict_move2(network *net, float *board, float *move, int multi) +{ + float *output = network_predict(net, board); + copy_cpu(19*19+1, output, 1, move, 1); + float result = output[19*19 + 1]; + int i; + if(multi){ + image bim = float_to_image(19, 19, 3, board); + for(i = 1; i < 8; ++i){ + rotate_image_cw(bim, i); + if(i >= 4) flip_image(bim); + + float *output = network_predict(net, board); + image oim = float_to_image(19, 19, 1, output); + result += output[19*19 + 1]; + + if(i >= 4) flip_image(oim); + rotate_image_cw(oim, -i); + + axpy_cpu(19*19+1, 1, output, 1, move, 1); + + if(i >= 4) flip_image(bim); + rotate_image_cw(bim, -i); + } + result = result/8; + scal_cpu(19*19+1, 1./8., move, 1); + } + for(i = 0; i < 19*19; ++i){ + if(board[i] || board[i+19*19]) move[i] = 0; + } + return result; +} + +static void remove_connected(float *b, int *lib, int p, int r, int c) +{ + if (r < 0 || r >= 19 || c < 0 || c >= 19) return; + if (occupied(b, r*19 + c) != p) return; + if (lib[r*19 + c] != 1) return; + b[r*19 + c] = 0; + b[19*19 + r*19 + c] = 0; + remove_connected(b, lib, p, r+1, c); + remove_connected(b, lib, p, r-1, c); + remove_connected(b, lib, p, r, c+1); + remove_connected(b, lib, p, r, c-1); +} + + +void move_go(float *b, int p, int r, int c) +{ + int *l = calculate_liberties(b); + if(p > 0) b[r*19 + c] = 1; + else b[19*19 + r*19 + c] = 1; + remove_connected(b, l, -p, r+1, c); + remove_connected(b, l, -p, r-1, c); + remove_connected(b, l, -p, r, c+1); + remove_connected(b, l, -p, r, c-1); + free(l); +} + +int compare_board(float *a, float *b) +{ + if(memcmp(a, b, 19*19*3*sizeof(float)) == 0) return 1; + return 0; +} + +typedef struct mcts_tree{ + float *board; + struct mcts_tree **children; + float *prior; + int *visit_count; + float *value; + float *mean; + float *prob; + int total_count; + float result; + int done; + int pass; +} mcts_tree; + +void free_mcts(mcts_tree *root) +{ + if(!root) return; + int i; + free(root->board); + for(i = 0; i < 19*19+1; ++i){ + if(root->children[i]) free_mcts(root->children[i]); + } + free(root->children); + free(root->prior); + free(root->visit_count); + free(root->value); + free(root->mean); + free(root->prob); + free(root); +} + +float *network_predict_rotations(network *net, float *next) +{ + int n = net->batch; + float *in = calloc(19*19*3*n, sizeof(float)); + image im = float_to_image(19, 19, 3, next); + int i,j; + int *inds = random_index_order(0, 8); + for(j = 0; j < n; ++j){ + i = inds[j]; + rotate_image_cw(im, i); + if(i >= 4) flip_image(im); + memcpy(in + 19*19*3*j, im.data, 19*19*3*sizeof(float)); + if(i >= 4) flip_image(im); + rotate_image_cw(im, -i); + } + float *pred = network_predict(net, in); + for(j = 0; j < n; ++j){ + i = inds[j]; + image im = float_to_image(19, 19, 1, pred + j*(19*19 + 2)); + if(i >= 4) flip_image(im); + rotate_image_cw(im, -i); + if(j > 0){ + axpy_cpu(19*19+2, 1, im.data, 1, pred, 1); + } + } + free(in); + free(inds); + scal_cpu(19*19+2, 1./n, pred, 1); + return pred; +} + +mcts_tree *expand(float *next, float *ko, network *net) +{ + mcts_tree *root = calloc(1, sizeof(mcts_tree)); + root->board = next; + root->children = calloc(19*19+1, sizeof(mcts_tree*)); + root->prior = calloc(19*19 + 1, sizeof(float)); + root->prob = calloc(19*19 + 1, sizeof(float)); + root->mean = calloc(19*19 + 1, sizeof(float)); + root->value = calloc(19*19 + 1, sizeof(float)); + root->visit_count = calloc(19*19 + 1, sizeof(int)); + root->total_count = 1; + int i; + float *pred = network_predict_rotations(net, next); + copy_cpu(19*19+1, pred, 1, root->prior, 1); + float val = 2*pred[19*19 + 1] - 1; + root->result = val; + for(i = 0; i < 19*19+1; ++i) { + root->visit_count[i] = 0; + root->value[i] = 0; + root->mean[i] = val; + if(i < 19*19 && occupied(next, i)){ + root->value[i] = -1; + root->mean[i] = -1; + root->prior[i] = 0; + } + } + //print_board(stderr, next, flip?-1:1, 0); + return root; +} + +float *copy_board(float *board) +{ + float *next = calloc(19*19*3, sizeof(float)); + copy_cpu(19*19*3, board, 1, next, 1); + return next; +} + +float select_mcts(mcts_tree *root, network *net, float *prev, float cpuct) +{ + if(root->done) return -root->result; + int i; + float max = -1000; + int max_i = 0; + for(i = 0; i < 19*19+1; ++i){ + root->prob[i] = root->mean[i] + cpuct*root->prior[i] * sqrt(root->total_count) / (1. + root->visit_count[i]); + if(root->prob[i] > max){ + max = root->prob[i]; + max_i = i; + } + } + float val; + i = max_i; + root->visit_count[i]++; + root->total_count++; + if (root->children[i]) { + val = select_mcts(root->children[i], net, root->board, cpuct); + } else { + if(max_i < 19*19 && !legal_go(root->board, prev, 1, max_i/19, max_i%19)) { + root->mean[i] = -1; + root->value[i] = -1; + root->prior[i] = 0; + --root->total_count; + return select_mcts(root, net, prev, cpuct); + //printf("Detected ko\n"); + //getchar(); + } else { + float *next = copy_board(root->board); + if (max_i < 19*19) { + move_go(next, 1, max_i / 19, max_i % 19); + } + flip_board(next); + root->children[i] = expand(next, root->board, net); + val = -root->children[i]->result; + if(max_i == 19*19){ + root->children[i]->pass = 1; + if (root->pass){ + root->children[i]->done = 1; + } + } + } + } + root->value[i] += val; + root->mean[i] = root->value[i]/root->visit_count[i]; + return -val; +} + +mcts_tree *run_mcts(mcts_tree *tree, network *net, float *board, float *ko, int player, int n, float cpuct, float secs) +{ + int i; + double t = what_time_is_it_now(); + if(player < 0) flip_board(board); + if(!tree) tree = expand(copy_board(board), ko, net); + assert(compare_board(tree->board, board)); + for(i = 0; i < n; ++i){ + if (secs > 0 && (what_time_is_it_now() - t) > secs) break; + int max_i = max_int_index(tree->visit_count, 19*19+1); + if (tree->visit_count[max_i] >= n) break; + select_mcts(tree, net, ko, cpuct); + } + if(player < 0) flip_board(board); + //fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + return tree; +} + +mcts_tree *move_mcts(mcts_tree *tree, int index) +{ + if(index < 0 || index > 19*19 || !tree || !tree->children[index]) { + free_mcts(tree); + tree = 0; + } else { + mcts_tree *swap = tree; + tree = tree->children[index]; + swap->children[index] = 0; + free_mcts(swap); + } + return tree; +} + +typedef struct { + float value; + float mcts; + int row; + int col; +} move; + +move pick_move(mcts_tree *tree, float temp, int player) +{ + int i; + float probs[19*19+1] = {0}; + move m = {0}; + double sum = 0; + /* + for(i = 0; i < 19*19+1; ++i){ + probs[i] = tree->visit_count[i]; + } + */ + //softmax(probs, 19*19+1, temp, 1, probs); + for(i = 0; i < 19*19+1; ++i){ + sum += pow(tree->visit_count[i], 1./temp); + } + for(i = 0; i < 19*19+1; ++i){ + probs[i] = pow(tree->visit_count[i], 1./temp) / sum; + } + + int index = sample_array(probs, 19*19+1); + m.row = index / 19; + m.col = index % 19; + m.value = (tree->result+1.)/2.; + m.mcts = (tree->mean[index]+1.)/2.; + + int indexes[nind]; + top_k(probs, 19*19+1, nind, indexes); + print_board(stderr, tree->board, player, indexes); + + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", index/19, index%19, tree->result, tree->prior[index], probs[index], tree->mean[index], (tree->children[index])?tree->children[index]->result:0, tree->visit_count[index]); + int ind = max_index(probs, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, tree->result, tree->prior[ind], probs[ind], tree->mean[ind], (tree->children[ind])?tree->children[ind]->result:0, tree->visit_count[ind]); + ind = max_index(tree->prior, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, tree->result, tree->prior[ind], probs[ind], tree->mean[ind], (tree->children[ind])?tree->children[ind]->result:0, tree->visit_count[ind]); + return m; +} + +/* + float predict_move(network *net, float *board, float *move, int multi, float *ko, float temp) + { + + int i; + + int max_v = 0; + int max_i = 0; + for(i = 0; i < 19*19+1; ++i){ + if(root->visit_count[i] > max_v){ + max_v = root->visit_count[i]; + max_i = i; + } + } + fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + int ind = max_index(root->mean, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", max_i/19, max_i%19, root->result, root->prior[max_i], root->prob[max_i], root->mean[max_i], (root->children[max_i])?root->children[max_i]->result:0, root->visit_count[max_i]); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, root->result, root->prior[ind], root->prob[ind], root->mean[ind], (root->children[ind])?root->children[ind]->result:0, root->visit_count[ind]); + ind = max_index(root->prior, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, root->result, root->prior[ind], root->prob[ind], root->mean[ind], (root->children[ind])?root->children[ind]->result:0, root->visit_count[ind]); + if(root->result < -.9 && root->mean[max_i] < -.9) return -1000.f; + + float val = root->result; + free_mcts(root); + return val; + } + */ + +static int makes_safe_go(float *b, int *lib, int p, int r, int c){ + if (r < 0 || r >= 19 || c < 0 || c >= 19) return 0; + if (occupied(b,r*19 + c) == -p){ + if (lib[r*19 + c] > 1) return 0; + else return 1; + } + if (!occupied(b,r*19 + c)) return 1; + if (lib[r*19 + c] > 1) return 1; + return 0; +} + +int suicide_go(float *b, int p, int r, int c) +{ + int *l = calculate_liberties(b); + int safe = 0; + safe = safe || makes_safe_go(b, l, p, r+1, c); + safe = safe || makes_safe_go(b, l, p, r-1, c); + safe = safe || makes_safe_go(b, l, p, r, c+1); + safe = safe || makes_safe_go(b, l, p, r, c-1); + free(l); + return !safe; +} + +int check_ko(float *x, float *ko) +{ + if(!ko) return 0; + float curr[19*19*3]; + copy_cpu(19*19*3, x, 1, curr, 1); + if(curr[19*19*2] != ko[19*19*2]) flip_board(curr); + if(compare_board(curr, ko)) return 1; + return 0; +} + +int legal_go(float *b, float *ko, int p, int r, int c) +{ + if (occupied(b, r*19+c)) return 0; + float curr[19*19*3]; + copy_cpu(19*19*3, b, 1, curr, 1); + move_go(curr, p, r, c); + if(check_ko(curr, ko)) return 0; + if(suicide_go(b, p, r, c)) return 0; + return 1; +} + +/* + move generate_move(mcts_tree *root, network *net, int player, float *board, int multi, float temp, float *ko, int print) + { + move m = {0}; +//root = run_mcts(tree, network *net, float *board, float *ko, int n, float cpuct) +int i, j; +int empty = 1; +for(i = 0; i < 19*19; ++i){ +if (occupied(board, i)) { +empty = 0; +break; +} +} +if(empty) { +m.value = .5; +m.mcts = .5; +m.row = 3; +m.col = 15; +return m; +} + +float move[362]; +if (player < 0) flip_board(board); +float result = predict_move(net, board, move, multi, ko, temp); +if (player < 0) flip_board(board); +if(result == -1000.f) return -2; + +for(i = 0; i < 19; ++i){ +for(j = 0; j < 19; ++j){ +if (!legal_go(board, ko, player, i, j)) move[i*19 + j] = 0; +} +} + +int indexes[nind]; +top_k(move, 19*19+1, nind, indexes); + + +int max = max_index(move, 19*19+1); +int row = max / 19; +int col = max % 19; +int index = sample_array(move, 19*19+1); + +if(print){ +top_k(move, 19*19+1, nind, indexes); +for(i = 0; i < nind; ++i){ +if (!move[indexes[i]]) indexes[i] = -1; +} +print_board(stderr, board, 1, indexes); +fprintf(stderr, "%s To Move\n", player > 0 ? "X" : "O"); +fprintf(stderr, "%.2f%% Win Chance\n", (result+1)/2*100); +for(i = 0; i < nind; ++i){ +int index = indexes[i]; +int row = index / 19; +int col = index % 19; +if(row == 19){ +fprintf(stderr, "%d: Pass, %.2f%%\n", i+1, move[index]*100); +} else { +fprintf(stderr, "%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100); +} +} +} +if (row == 19) return -1; + +if (suicide_go(board, player, row, col)){ +return -1; +} + +if (suicide_go(board, player, index/19, index%19)){ +index = max; +} +if (index == 19*19) return -1; +return index; +} +*/ + +void valid_go(char *cfgfile, char *weightfile, int multi, char *filename) +{ + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + float *board = calloc(19*19*3, sizeof(float)); + float *move = calloc(19*19+2, sizeof(float)); + // moves m = load_go_moves("/home/pjreddie/backup/go.test"); + moves m = load_go_moves(filename); + + int N = m.n; + int i,j; + int correct = 0; + for (i = 0; i 0) for(j = 0; j < 19*19; ++j) board[19*19*2 + j] = 1; + predict_move2(net, board, move, multi); + int index = max_index(move, 19*19+1); + if(index == truth) ++correct; + printf("%d Accuracy %f\n", i, (float) correct/(i+1)); + } +} + +int print_game(float *board, FILE *fp) +{ + int i, j; + int count = 3; + fprintf(fp, "komi 6.5\n"); + fprintf(fp, "boardsize 19\n"); + fprintf(fp, "clear_board\n"); + for(j = 0; j < 19; ++j){ + for(i = 0; i < 19; ++i){ + if(occupied(board,j*19 + i) == 1) fprintf(fp, "play black %c%d\n", 'A'+i+(i>=8), 19-j); + if(occupied(board,j*19 + i) == -1) fprintf(fp, "play white %c%d\n", 'A'+i+(i>=8), 19-j); + if(occupied(board,j*19 + i)) ++count; + } + } + return count; +} + + +int stdin_ready() +{ + fd_set readfds; + FD_ZERO(&readfds); + + struct timeval timeout; + timeout.tv_sec = 0; + timeout.tv_usec = 0; + FD_SET(STDIN_FILENO, &readfds); + + if (select(1, &readfds, NULL, NULL, &timeout)){ + return 1; + } + return 0; +} + +mcts_tree *ponder(mcts_tree *tree, network *net, float *b, float *ko, int player, float cpuct) +{ + double t = what_time_is_it_now(); + int count = 0; + if (tree) count = tree->total_count; + while(!stdin_ready()){ + if (what_time_is_it_now() - t > 120) break; + tree = run_mcts(tree, net, b, ko, player, 100000, cpuct, .1); + } + fprintf(stderr, "Pondered %d moves...\n", tree->total_count - count); + return tree; +} + +void engine_go(char *filename, char *weightfile, int mcts_iters, float secs, float temp, float cpuct, int anon, int resign) +{ + mcts_tree *root = 0; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *one = calloc(19*19*3, sizeof(float)); + float *two = calloc(19*19*3, sizeof(float)); + int ponder_player = 0; + int passed = 0; + int move_num = 0; + int main_time = 0; + int byo_yomi_time = 0; + int byo_yomi_stones = 0; + int black_time_left = 0; + int black_stones_left = 0; + int white_time_left = 0; + int white_stones_left = 0; + float orig_time = secs; + int old_ponder = 0; + while(1){ + if(ponder_player){ + root = ponder(root, net, board, two, ponder_player, cpuct); + } + old_ponder = ponder_player; + ponder_player = 0; + char buff[256]; + int id = 0; + int has_id = (scanf("%d", &id) == 1); + scanf("%s", buff); + if (feof(stdin)) break; + fprintf(stderr, "%s\n", buff); + char ids[256]; + sprintf(ids, "%d", id); + //fprintf(stderr, "%s\n", buff); + if (!has_id) ids[0] = 0; + if (!strcmp(buff, "protocol_version")){ + printf("=%s 2\n\n", ids); + } else if (!strcmp(buff, "name")){ + if(anon){ + printf("=%s The Fool!\n\n", ids); + }else{ + printf("=%s DarkGo\n\n", ids); + } + } else if (!strcmp(buff, "time_settings")){ + ponder_player = old_ponder; + scanf("%d %d %d", &main_time, &byo_yomi_time, &byo_yomi_stones); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "time_left")){ + ponder_player = old_ponder; + char color[256]; + int time = 0, stones = 0; + scanf("%s %d %d", color, &time, &stones); + if (color[0] == 'b' || color[0] == 'B'){ + black_time_left = time; + black_stones_left = stones; + } else { + white_time_left = time; + white_stones_left = stones; + } + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "version")){ + if(anon){ + printf("=%s :-DDDD\n\n", ids); + }else { + printf("=%s 1.0. Want more DarkGo? You can find me on OGS, unlimited games, no waiting! https://online-go.com/user/view/434218\n\n", ids); + } + } else if (!strcmp(buff, "known_command")){ + char comm[256]; + scanf("%s", comm); + int known = (!strcmp(comm, "protocol_version") || + !strcmp(comm, "name") || + !strcmp(comm, "version") || + !strcmp(comm, "known_command") || + !strcmp(comm, "list_commands") || + !strcmp(comm, "quit") || + !strcmp(comm, "boardsize") || + !strcmp(comm, "clear_board") || + !strcmp(comm, "komi") || + !strcmp(comm, "final_status_list") || + !strcmp(comm, "play") || + !strcmp(comm, "genmove_white") || + !strcmp(comm, "genmove_black") || + !strcmp(comm, "fixed_handicap") || + !strcmp(comm, "genmove")); + if(known) printf("=%s true\n\n", ids); + else printf("=%s false\n\n", ids); + } else if (!strcmp(buff, "list_commands")){ + printf("=%s protocol_version\nshowboard\nname\nversion\nknown_command\nlist_commands\nquit\nboardsize\nclear_board\nkomi\nplay\ngenmove_black\ngenmove_white\ngenmove\nfinal_status_list\nfixed_handicap\n\n", ids); + } else if (!strcmp(buff, "quit")){ + break; + } else if (!strcmp(buff, "boardsize")){ + int boardsize = 0; + scanf("%d", &boardsize); + //fprintf(stderr, "%d\n", boardsize); + if(boardsize != 19){ + printf("?%s unacceptable size\n\n", ids); + } else { + root = move_mcts(root, -1); + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + move_num = 0; + printf("=%s \n\n", ids); + } + } else if (!strcmp(buff, "fixed_handicap")){ + int handicap = 0; + scanf("%d", &handicap); + int indexes[] = {72, 288, 300, 60, 180, 174, 186, 66, 294}; + int i; + for(i = 0; i < handicap; ++i){ + board[indexes[i]] = 1; + ++move_num; + } + root = move_mcts(root, -1); + } else if (!strcmp(buff, "clear_board")){ + passed = 0; + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + move_num = 0; + root = move_mcts(root, -1); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "komi")){ + float komi = 0; + scanf("%f", &komi); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "showboard")){ + printf("=%s \n", ids); + print_board(stdout, board, 1, 0); + printf("\n"); + } else if (!strcmp(buff, "play") || !strcmp(buff, "black") || !strcmp(buff, "white")){ + ++move_num; + char color[256]; + if(!strcmp(buff, "play")) + { + scanf("%s ", color); + } else { + scanf(" "); + color[0] = buff[0]; + } + char c; + int r; + int count = scanf("%c%d", &c, &r); + int player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1; + if((c == 'p' || c == 'P') && count < 2) { + passed = 1; + printf("=%s \n\n", ids); + char *line = fgetl(stdin); + free(line); + fflush(stdout); + fflush(stderr); + root = move_mcts(root, 19*19); + continue; + } else { + passed = 0; + } + if(c >= 'A' && c <= 'Z') c = c - 'A'; + if(c >= 'a' && c <= 'z') c = c - 'a'; + if(c >= 8) --c; + r = 19 - r; + fprintf(stderr, "move: %d %d\n", r, c); + + float *swap = two; + two = one; + one = swap; + move_go(board, player, r, c); + copy_cpu(19*19*3, board, 1, one, 1); + if(root) fprintf(stderr, "Prior: %f\n", root->prior[r*19 + c]); + if(root) fprintf(stderr, "Mean: %f\n", root->mean[r*19 + c]); + if(root) fprintf(stderr, "Result: %f\n", root->result); + root = move_mcts(root, r*19 + c); + if(root) fprintf(stderr, "Visited: %d\n", root->total_count); + else fprintf(stderr, "NOT VISITED\n"); + + printf("=%s \n\n", ids); + //print_board(stderr, board, 1, 0); + } else if (!strcmp(buff, "genmove") || !strcmp(buff, "genmove_black") || !strcmp(buff, "genmove_white")){ + ++move_num; + int player = 0; + if(!strcmp(buff, "genmove")){ + char color[256]; + scanf("%s", color); + player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1; + } else if (!strcmp(buff, "genmove_black")){ + player = 1; + } else { + player = -1; + } + if(player > 0){ + if(black_time_left <= 30) secs = 2.5; + else secs = orig_time; + } else { + if(white_time_left <= 30) secs = 2.5; + else secs = orig_time; + } + ponder_player = -player; + + //tree = generate_move(net, player, board, multi, .1, two, 1); + double t = what_time_is_it_now(); + root = run_mcts(root, net, board, two, player, mcts_iters, cpuct, secs); + fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + move m = pick_move(root, temp, player); + root = move_mcts(root, m.row*19 + m.col); + + + if(move_num > resign && m.value < .1 && m.mcts < .1){ + printf("=%s resign\n\n", ids); + } else if(m.row == 19){ + printf("=%s pass\n\n", ids); + passed = 0; + } else { + int row = m.row; + int col = m.col; + + float *swap = two; + two = one; + one = swap; + + move_go(board, player, row, col); + copy_cpu(19*19*3, board, 1, one, 1); + row = 19 - row; + if (col >= 8) ++col; + printf("=%s %c%d\n\n", ids, 'A' + col, row); + } + + } else if (!strcmp(buff, "p")){ + //print_board(board, 1, 0); + } else if (!strcmp(buff, "final_status_list")){ + char type[256]; + scanf("%s", type); + fprintf(stderr, "final_status\n"); + char *line = fgetl(stdin); + free(line); + if(type[0] == 'd' || type[0] == 'D'){ + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "%s final_status_list dead\n", ids); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + while((l = fgetl(p))){ + printf("%s\n", l); + free(l); + } + } else { + printf("?%s unknown command\n\n", ids); + } + } else if (!strcmp(buff, "kgs-genmove_cleanup")){ + char type[256]; + scanf("%s", type); + fprintf(stderr, "kgs-genmove_cleanup\n"); + char *line = fgetl(stdin); + free(line); + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "%s kgs-genmove_cleanup %s\n", ids, type); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + while((l = fgetl(p))){ + printf("%s\n", l); + free(l); + } + } else { + char *line = fgetl(stdin); + free(line); + printf("?%s unknown command\n\n", ids); + } + fflush(stdout); + fflush(stderr); + } + printf("%d %d %d\n",passed, black_stones_left, white_stones_left); +} + +void test_go(char *cfg, char *weights, int multi) +{ + int i; + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(time(0)); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *move = calloc(19*19+1, sizeof(float)); + int color = 1; + while(1){ + float result = predict_move2(net, board, move, multi); + printf("%.2f%% Win Chance\n", (result+1)/2*100); + + int indexes[nind]; + int row, col; + top_k(move, 19*19+1, nind, indexes); + print_board(stderr, board, color, indexes); + for(i = 0; i < nind; ++i){ + int index = indexes[i]; + row = index / 19; + col = index % 19; + if(row == 19){ + printf("%d: Pass, %.2f%%\n", i+1, move[index]*100); + } else { + printf("%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100); + } + } + //if(color == 1) printf("\u25EF Enter move: "); + //else printf("\u25C9 Enter move: "); + if(color == 1) printf("X Enter move: "); + else printf("O Enter move: "); + + char c; + char *line = fgetl(stdin); + int picked = 1; + int dnum = sscanf(line, "%d", &picked); + int cnum = sscanf(line, "%c", &c); + if (strlen(line) == 0 || dnum) { + --picked; + if (picked < nind){ + int index = indexes[picked]; + row = index / 19; + col = index % 19; + if(row < 19){ + move_go(board, 1, row, col); + } + } + } else if (cnum){ + if (c <= 'T' && c >= 'A'){ + int num = sscanf(line, "%c %d", &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 2) move_go(board, 1, row, col); + } else if (c == 'p') { + // Pass + } else if(c=='b' || c == 'w'){ + char g; + int num = sscanf(line, "%c %c %d", &g, &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 3) { + int mc = (g == 'b') ? 1 : -1; + if (mc == color) { + board[row*19 + col] = 1; + } else { + board[19*19 + row*19 + col] = 1; + } + } + } else if(c == 'c'){ + char g; + int num = sscanf(line, "%c %c %d", &g, &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 3) { + board[row*19 + col] = 0; + board[19*19 + row*19 + col] = 0; + } + } + } + free(line); + flip_board(board); + color = -color; + } +} + +float score_game(float *board) +{ + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "final_score\n"); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + float score = 0; + char player = 0; + while((l = fgetl(p))){ + fprintf(stderr, "%s \t", l); + int n = sscanf(l, "= %c+%f", &player, &score); + free(l); + if (n == 2) break; + } + if(player == 'W') score = -score; + pclose(p); + return score; +} + +void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi) +{ + mcts_tree *tree1 = 0; + mcts_tree *tree2 = 0; + network *net = load_network(filename, weightfile, 0); + //set_batch_network(net, 1); + + network *net2; + if (f2) { + net2 = parse_network_cfg(f2); + if(w2){ + load_weights(net2, w2); + } + } else { + net2 = calloc(1, sizeof(network)); + *net2 = *net; + } + srand(time(0)); + char boards[600][93]; + int count = 0; + //set_batch_network(net, 1); + //set_batch_network(net2, 1); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *one = calloc(19*19*3, sizeof(float)); + float *two = calloc(19*19*3, sizeof(float)); + int done = 0; + int player = 1; + int p1 = 0; + int p2 = 0; + int total = 0; + float temp = .1; + int mcts_iters = 500; + float cpuct = 5; + while(1){ + if (done){ + tree1 = move_mcts(tree1, -1); + tree2 = move_mcts(tree2, -1); + float score = score_game(board); + if((score > 0) == (total%2==0)) ++p1; + else ++p2; + ++total; + fprintf(stderr, "Total: %d, Player 1: %f, Player 2: %f\n", total, (float)p1/total, (float)p2/total); + sleep(1); + /* + int i = (score > 0)? 0 : 1; + int j; + for(; i < count; i += 2){ + for(j = 0; j < 93; ++j){ + printf("%c", boards[i][j]); + } + printf("\n"); + } + */ + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + player = 1; + done = 0; + count = 0; + fflush(stdout); + fflush(stderr); + } + //print_board(stderr, board, 1, 0); + //sleep(1); + + if ((total%2==0) == (player==1)){ + //mcts_iters = 4500; + cpuct = 5; + } else { + //mcts_iters = 500; + cpuct = 1; + } + network *use = ((total%2==0) == (player==1)) ? net : net2; + mcts_tree *t = ((total%2==0) == (player==1)) ? tree1 : tree2; + t = run_mcts(t, use, board, two, player, mcts_iters, cpuct, 0); + move m = pick_move(t, temp, player); + if(((total%2==0) == (player==1))) tree1 = t; + else tree2 = t; + + tree1 = move_mcts(tree1, m.row*19 + m.col); + tree2 = move_mcts(tree2, m.row*19 + m.col); + + if(m.row == 19){ + done = 1; + continue; + } + int row = m.row; + int col = m.col; + + float *swap = two; + two = one; + one = swap; + + if(player < 0) flip_board(board); + boards[count][0] = row; + boards[count][1] = col; + board_to_string(boards[count] + 2, board); + if(player < 0) flip_board(board); + ++count; + + move_go(board, player, row, col); + copy_cpu(19*19*3, board, 1, one, 1); + + player = -player; + } +} + +void run_go(int argc, char **argv) +{ + //boards_go(); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + int clear = find_arg(argc, argv, "-clear"); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *c2 = (argc > 5) ? argv[5] : 0; + char *w2 = (argc > 6) ? argv[6] : 0; + int multi = find_arg(argc, argv, "-multi"); + int anon = find_arg(argc, argv, "-anon"); + int iters = find_int_arg(argc, argv, "-iters", 500); + int resign = find_int_arg(argc, argv, "-resign", 175); + float cpuct = find_float_arg(argc, argv, "-cpuct", 5); + float temp = find_float_arg(argc, argv, "-temp", .1); + float time = find_float_arg(argc, argv, "-time", 0); + if(0==strcmp(argv[2], "train")) train_go(cfg, weights, c2, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) valid_go(cfg, weights, multi, c2); + else if(0==strcmp(argv[2], "self")) self_go(cfg, weights, c2, w2, multi); + else if(0==strcmp(argv[2], "test")) test_go(cfg, weights, multi); + else if(0==strcmp(argv[2], "engine")) engine_go(cfg, weights, iters, time, temp, cpuct, anon, resign); +} + + diff --git a/workloads/realworld/pipeline/darknet/examples/instance-segmenter.c b/workloads/realworld/pipeline/darknet/examples/instance-segmenter.c new file mode 100644 index 0000000000000000000000000000000000000000..664e71426d58e19f758bab198783eac178a3cdc4 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/instance-segmenter.c @@ -0,0 +1,267 @@ +#include "darknet.h" +#include +#include + +void normalize_image2(image p); +void train_isegmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + image pred = get_network_image(net); + + image embed = pred; + embed.c = 3; + embed.data += embed.w*embed.h*80; + + int div = net->w/pred.w; + assert(pred.w * div == net->w); + assert(pred.h * div == net->h); + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.scale = div; + args.num_boxes = 90; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.classes = 80; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = ISEG_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(display){ + image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]); + image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]); + pred.c = 80; + image mask = mask_to_rgb(tr); + image prmask = mask_to_rgb(pred); + image ecopy = copy_image(embed); + normalize_image2(ecopy); + show_image(ecopy, "embed", 1); + free_image(ecopy); + + show_image(im, "input", 1); + show_image(prmask, "pred", 1); + show_image(mask, "truth", 100); + free_image(mask); + free_image(prmask); + } + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_isegmenter(char *datafile, char *cfg, char *weights, char *filename) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + show_image(sized, "orig", 1); + show_image(prmask, "pred", 0); + free_image(im); + free_image(sized); + free_image(prmask); + if (filename) break; + } +} + + +void demo_isegmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Classifier Demo\n"); + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = letterbox_image(in, net->w, net->h); + + network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + show_image(prmask, "Segmenter", 10); + + free_image(in_s); + free_image(in); + free_image(prmask); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_isegmenter(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_isegmenter(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_isegmenter(data, cfg, weights, gpus, ngpus, clear, display); + else if(0==strcmp(argv[2], "demo")) demo_isegmenter(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/pipeline/darknet/examples/lsd.c b/workloads/realworld/pipeline/darknet/examples/lsd.c new file mode 100644 index 0000000000000000000000000000000000000000..d20a6efff42c2096a8393b8d07f7157dc5733c55 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/lsd.c @@ -0,0 +1,1385 @@ +#include +#include "darknet.h" + +/* +void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg, char *aweight, int clear) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + //char *style_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *style_images = "/home/pjreddie/zelda.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + network fnet = load_network(fcfg, fweight, clear); + network gnet = load_network(gcfg, gweight, clear); + network anet = load_network(acfg, aweight, clear); + char *gbase = basecfg(gcfg); + char *abase = basecfg(acfg); + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + int i = *gnet->seen/imgs; + data train, tbuffer; + data style, sbuffer; + + + list *slist = get_paths(style_images); + char **spaths = (char **)list_to_array(slist); + + list *tlist = get_paths(train_images); + char **tpaths = (char **)list_to_array(tlist); + + load_args targs= get_base_args(gnet); + targs.paths = tpaths; + targs.n = imgs; + targs.m = tlist->size; + targs.d = &tbuffer; + targs.type = CLASSIFICATION_DATA; + targs.classes = 1; + char *ls[1] = {"zelda"}; + targs.labels = ls; + + load_args sargs = get_base_args(gnet); + sargs.paths = spaths; + sargs.n = imgs; + sargs.m = slist->size; + sargs.d = &sbuffer; + sargs.type = CLASSIFICATION_DATA; + sargs.classes = 1; + sargs.labels = ls; + + pthread_t tload_thread = load_data_in_thread(targs); + pthread_t sload_thread = load_data_in_thread(sargs); + clock_t time; + + float aloss_avg = -1; + float floss_avg = -1; + + fnet->train=1; + int x_size = fnet->inputs*fnet->batch; + int y_size = fnet->truths*fnet->batch; + float *X = calloc(x_size, sizeof(float)); + float *y = calloc(y_size, sizeof(float)); + + + int ax_size = anet->inputs*anet->batch; + int ay_size = anet->truths*anet->batch; + fill_gpu(ay_size, .9, anet->truth_gpu, 1); + anet->delta_gpu = cuda_make_array(0, ax_size); + anet->train = 1; + + int gx_size = gnet->inputs*gnet->batch; + int gy_size = gnet->truths*gnet->batch; + gstate.input = cuda_make_array(0, gx_size); + gstate.truth = 0; + gstate.delta = 0; + gstate.train = 1; + + while (get_current_batch(gnet) < gnet->max_batches) { + i += 1; + time=clock(); + pthread_join(tload_thread, 0); + pthread_join(sload_thread, 0); + train = tbuffer; + style = sbuffer; + tload_thread = load_data_in_thread(targs); + sload_thread = load_data_in_thread(sargs); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data generated = copy_data(train); + time=clock(); + + int j, k; + float floss = 0; + for(j = 0; j < fnet->subdivisions; ++j){ + layer imlayer = gnet->layers[gnet->n - 1]; + get_next_batch(train, fnet->batch, j*fnet->batch, X, y); + + cuda_push_array(fstate.input, X, x_size); + cuda_push_array(gstate.input, X, gx_size); + *gnet->seen += gnet->batch; + + forward_network_gpu(fnet, fstate); + float *feats = fnet->layers[fnet->n - 2].output_gpu; + copy_gpu(y_size, feats, 1, fstate.truth, 1); + + forward_network_gpu(gnet, gstate); + float *gen = gnet->layers[gnet->n-1].output_gpu; + copy_gpu(x_size, gen, 1, fstate.input, 1); + + fill_gpu(x_size, 0, fstate.delta, 1); + forward_network_gpu(fnet, fstate); + backward_network_gpu(fnet, fstate); + //HERE + + astate.input = gen; + fill_gpu(ax_size, 0, astate.delta, 1); + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + float *delta = imlayer->delta_gpu; + fill_gpu(x_size, 0, delta, 1); + scal_gpu(x_size, 100, astate.delta, 1); + scal_gpu(x_size, .001, fstate.delta, 1); + axpy_gpu(x_size, 1, fstate.delta, 1, delta, 1); + axpy_gpu(x_size, 1, astate.delta, 1, delta, 1); + + //fill_gpu(x_size, 0, delta, 1); + //cuda_push_array(delta, X, x_size); + //axpy_gpu(x_size, -1, imlayer->output_gpu, 1, delta, 1); + //printf("pix error: %f\n", cuda_mag_array(delta, x_size)); + printf("fea error: %f\n", cuda_mag_array(fstate.delta, x_size)); + printf("adv error: %f\n", cuda_mag_array(astate.delta, x_size)); + //axpy_gpu(x_size, 1, astate.delta, 1, delta, 1); + + backward_network_gpu(gnet, gstate); + + floss += get_network_cost(fnet) /(fnet->subdivisions*fnet->batch); + + cuda_pull_array(imlayer->output_gpu, imlayer->output, imlayer->outputs*imlayer->batch); + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(imlayer->outputs, imlayer->output + k*imlayer->outputs, 1, generated.X.vals[index], 1); + generated.y.vals[index][0] = .1; + style.y.vals[index][0] = .9; + } + } + +*/ +/* + image sim = float_to_image(anet->w, anet->h, anet->c, style.X.vals[j]); + show_image(sim, "style"); + cvWaitKey(0); + */ + /* + + harmless_update_network_gpu(anet); + + data merge = concat_data(style, generated); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(generated); + free_data(style); + if (aloss_avg < 0) aloss_avg = aloss; + if (floss_avg < 0) floss_avg = floss; + aloss_avg = aloss_avg*.9 + aloss*.1; + floss_avg = floss_avg*.9 + floss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, floss, aloss, floss_avg, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, gbase, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, gbase); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } +#endif +} +*/ + +/* +void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/train1.txt"; + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network net = load_network(cfg, weight, clear); + network anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = net->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.type = CLASSIFICATION_DATA; + args.classes = 1; + char *ls[1] = {"coco"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + network_state gstate = {0}; + gstate.index = 0; + gstate.net = net; + int x_size = get_network_input_size(net)*net->batch; + int y_size = x_size; + gstate.input = cuda_make_array(0, x_size); + gstate.truth = cuda_make_array(0, y_size); + gstate.delta = 0; + gstate.train = 1; + float *pixs = calloc(x_size, sizeof(float)); + float *graypixs = calloc(x_size, sizeof(float)); + float *y = calloc(y_size, sizeof(float)); + + network_state astate = {0}; + astate.index = 0; + astate.net = anet; + int ay_size = get_network_output_size(anet)*anet->batch; + astate.input = 0; + astate.truth = 0; + astate.delta = 0; + astate.train = 1; + + float *imerror = cuda_make_array(0, imlayer->outputs); + float *ones_gpu = cuda_make_array(0, ay_size); + fill_gpu(ay_size, .9, ones_gpu, 1); + + float aloss_avg = -1; + float gloss_avg = -1; + + //data generated = copy_data(train); + + while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gray = copy_data(train); + for(j = 0; j < imgs; ++j){ + image gim = float_to_image(net->w, net->h, net->c, gray.X.vals[j]); + grayscale_image_3c(gim); + train.y.vals[j][0] = .9; + + image yim = float_to_image(net->w, net->h, net->c, train.X.vals[j]); + //rgb_to_yuv(yim); + } + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, pixs, y); + get_next_batch(gray, net->batch, j*net->batch, graypixs, y); + cuda_push_array(gstate.input, graypixs, x_size); + cuda_push_array(gstate.truth, pixs, y_size); + */ + /* + image origi = float_to_image(net->w, net->h, 3, pixs); + image grayi = float_to_image(net->w, net->h, 3, graypixs); + show_image(grayi, "gray"); + show_image(origi, "orig"); + cvWaitKey(0); + */ + /* + *net->seen += net->batch; + forward_network_gpu(net, gstate); + + fill_gpu(imlayer->outputs, 0, imerror, 1); + astate.input = imlayer->output_gpu; + astate.delta = imerror; + astate.truth = ones_gpu; + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + scal_gpu(imlayer->outputs, .1, net->layers[net->n-1].delta_gpu, 1); + + backward_network_gpu(net, gstate); + + scal_gpu(imlayer->outputs, 1000, imerror, 1); + + printf("realness %f\n", cuda_mag_array(imerror, imlayer->outputs)); + printf("features %f\n", cuda_mag_array(net->layers[net->n-1].delta_gpu, imlayer->outputs)); + + axpy_gpu(imlayer->outputs, 1, imerror, 1, imlayer->delta_gpu, 1); + + gloss += get_network_cost(net) /(net->subdivisions*net->batch); + + cuda_pull_array(imlayer->output_gpu, imlayer->output, imlayer->outputs*imlayer->batch); + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer->outputs, imlayer->output + k*imlayer->outputs, 1, gray.X.vals[index], 1); + gray.y.vals[index][0] = .1; + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gray); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + update_network_gpu(anet); + free_data(merge); + free_data(train); + free_data(gray); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +#endif +} +*/ + +void slerp(float *start, float *end, float s, int n, float *out) +{ + float omega = acos(dot_cpu(n, start, 1, end, 1)); + float so = sin(omega); + fill_cpu(n, 0, out, 1); + axpy_cpu(n, sin((1-s)*omega)/so, start, 1, out, 1); + axpy_cpu(n, sin(s*omega)/so, end, 1, out, 1); + + float mag = mag_array(out, n); + scale_array(out, n, 1./mag); +} + +image random_unit_vector_image(int w, int h, int c) +{ + image im = make_image(w, h, c); + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + im.data[i] = rand_normal(); + } + float mag = mag_array(im.data, im.w*im.h*im.c); + scale_array(im.data, im.w*im.h*im.c, 1./mag); + return im; +} + +void inter_dcgan(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int i, imlayer = 0; + + for (i = 0; i < net->n; ++i) { + if (net->layers[i]->out_c == 3) { + imlayer = i; + printf("%d\n", i); + break; + } + } + image start = random_unit_vector_image(net->w, net->h, net->c); + image end = random_unit_vector_image(net->w, net->h, net->c); + image im = make_image(net->w, net->h, net->c); + image orig = copy_image(start); + + int c = 0; + int count = 0; + int max_count = 15; + while(1){ + ++c; + + if(count == max_count){ + count = 0; + free_image(start); + start = end; + end = random_unit_vector_image(net->w, net->h, net->c); + if(c > 300){ + end = orig; + } + if(c>300 + max_count) return; + } + ++count; + + slerp(start.data, end.data, (float)count / max_count, im.w*im.h*im.c, im.data); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + normalize_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + //char buff[256]; + sprintf(buff, "out%05d", c); + save_image(out, "out"); + save_image(out, buff); + show_image(out, "out", 0); + } +} + +void test_dcgan(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int imlayer = 0; + + imlayer = net->n-1; + + while(1){ + image im = make_image(net->w, net->h, net->c); + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + im.data[i] = rand_normal(); + } + //float mag = mag_array(im.data, im.w*im.h*im.c); + //scale_array(im.data, im.w*im.h*im.c, 1./mag); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + normalize_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 0); + + free_image(im); + } +} + +void set_network_alpha_beta(network *net, float alpha, float beta) +{ + int i; + for(i = 0; i < net->n; ++i){ + if(net->layers[i]->type == SHORTCUT){ + net->layers[i]->alpha = alpha; + net->layers[i]->beta = beta; + } + } +} + +void train_prog(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) +{ +#ifdef GPU + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *gnet = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer *imlayer = gnet->layers[gnet->n-1]; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + i = *gnet->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(anet); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + args.threads=16; + args.classes = 1; + char *ls[2] = {"imagenet", "zzzzzzzz"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + gnet->train = 1; + anet->train = 1; + + int x_size = gnet->inputs*gnet->batch; + int y_size = gnet->truths*gnet->batch; + float *imerror = cuda_make_array(0, y_size); + + float aloss_avg = -1; + + if (maxbatch == 0) maxbatch = gnet->max_batches; + while (get_current_batch(gnet) < maxbatch) { + { + int cb = get_current_batch(gnet); + float alpha = (float) cb / (maxbatch/2); + if(alpha > 1) alpha = 1; + float beta = 1 - alpha; + printf("%f %f\n", alpha, beta); + set_network_alpha_beta(gnet, alpha, beta); + set_network_alpha_beta(anet, beta, alpha); + } + + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gen = copy_data(train); + for (j = 0; j < imgs; ++j) { + train.y.vals[j][0] = 1; + gen.y.vals[j][0] = 0; + } + time=clock(); + + for (j = 0; j < gnet->subdivisions; ++j) { + get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); + int z; + for(z = 0; z < x_size; ++z){ + gnet->input[z] = rand_normal(); + } + /* + for(z = 0; z < gnet->batch; ++z){ + float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); + scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); + } + */ + *gnet->seen += gnet->batch; + forward_network(gnet); + + fill_gpu(imlayer->outputs*imlayer->batch, 0, imerror, 1); + fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); + copy_cpu(anet->inputs*anet->batch, imlayer->output, 1, anet->input, 1); + anet->delta_gpu = imerror; + forward_network(anet); + backward_network(anet); + + //float genaloss = *anet->cost / anet->batch; + + // Ruihao + // scal_gpu(imlayer->outputs*imlayer->batch, 1, imerror, 1); + // scal_gpu(imlayer->outputs*imlayer->batch, 0, gnet->layers[gnet->n-1]->delta_gpu, 1); + + // axpy_gpu(imlayer->outputs*imlayer->batch, 1, imerror, 1, gnet->layers[gnet->n-1]->delta_gpu, 1); + // Ruihao + + backward_network(gnet); + + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gen); + float aloss = train_network(anet, merge); + +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + save_image(im, "gen"); + save_image(im2, "train"); + } +#endif + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(gen); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + + printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(gnet, buff); +#endif +} + +void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) +{ +#ifdef GPU + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *gnet = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + //float orig_rate = anet->learning_rate; + + int i, j, k; + layer* imlayer = malloc(sizeof(layer)); + for (i = 0; i < gnet->n; ++i) { + if (gnet->layers[i]->out_c == 3) { + imlayer = gnet->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + i = *gnet->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(anet); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + args.threads=16; + args.classes = 1; + char *ls[2] = {"imagenet", "zzzzzzzz"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + gnet->train = 1; + anet->train = 1; + + int x_size = gnet->inputs*gnet->batch; + int y_size = gnet->truths*gnet->batch; + float *imerror = cuda_make_array(0, y_size); + + //int ay_size = anet->truths*anet->batch; + + float aloss_avg = -1; + + //data generated = copy_data(train); + + if (maxbatch == 0) maxbatch = gnet->max_batches; + while (get_current_batch(gnet) < maxbatch) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + //translate_data_rows(train, -.5); + //scale_data_rows(train, 2); + + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gen = copy_data(train); + for (j = 0; j < imgs; ++j) { + train.y.vals[j][0] = 1; + gen.y.vals[j][0] = 0; + } + time=clock(); + + for(j = 0; j < gnet->subdivisions; ++j){ + get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); + int z; + for(z = 0; z < x_size; ++z){ + gnet->input[z] = rand_normal(); + } + for(z = 0; z < gnet->batch; ++z){ + float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); + scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); + } + /* + for(z = 0; z < 100; ++z){ + printf("%f, ", gnet->input[z]); + } + printf("\n"); + printf("input: %f %f\n", mean_array(gnet->input, x_size), variance_array(gnet->input, x_size)); + */ + + //cuda_push_array(gnet->input_gpu, gnet->input, x_size); + //cuda_push_array(gnet->truth_gpu, gnet->truth, y_size); + *gnet->seen += gnet->batch; + forward_network(gnet); + + fill_gpu(imlayer->outputs*imlayer->batch, 0, imerror, 1); + fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); + copy_cpu(anet->inputs*anet->batch, imlayer->output, 1, anet->input, 1); + anet->delta_gpu = imerror; + forward_network(anet); + backward_network(anet); + + //float genaloss = *anet->cost / anet->batch; + //printf("%f\n", genaloss); + + // Ruihao + // scal_gpu(imlayer->outputs*imlayer->batch, 1, imerror, 1); + // scal_gpu(imlayer->outputs*imlayer->batch, 0, gnet->layers[gnet->n-1]->delta_gpu, 1); + + //printf("realness %f\n", cuda_mag_array(imerror, imlayer->outputs*imlayer->batch)); + //printf("features %f\n", cuda_mag_array(gnet->layers[gnet->n-1].delta_gpu, imlayer->outputs*imlayer->batch)); + + // axpy_gpu(imlayer->outputs*imlayer->batch, 1, imerror, 1, gnet->layers[gnet->n-1]->delta_gpu, 1); + // Ruihao + + + backward_network(gnet); + + /* + for(k = 0; k < gnet->n; ++k){ + layer l = gnet->layers[k]; + cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); + printf("%d: %f %f\n", k, mean_array(l.output, l.outputs*l.batch), variance_array(l.output, l.outputs*l.batch)); + } + */ + + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gen); + //randomize_data(merge); + float aloss = train_network(anet, merge); + + //translate_image(im, 1); + //scale_image(im, .5); + //translate_image(im2, 1); + //scale_image(im2, .5); +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + save_image(im, "gen"); + save_image(im2, "train"); + } +#endif + + /* + if(aloss < .1){ + anet->learning_rate = 0; + } else if (aloss > .3){ + anet->learning_rate = orig_rate; + } + */ + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(gen); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + + printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(gnet, buff); +#endif +} + +void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/train1.txt"; + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *net = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer* imlayer = malloc(sizeof(layer)); + for (i = 0; i < net->n; ++i) { + if (net->layers[i]->out_c == 3) { + imlayer = net->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(net); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + + args.type = CLASSIFICATION_DATA; + args.classes = 1; + char *ls[2] = {"imagenet"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + int x_size = net->inputs*net->batch; + //int y_size = x_size; + net->delta = 0; + net->train = 1; + float *pixs = calloc(x_size, sizeof(float)); + float *graypixs = calloc(x_size, sizeof(float)); + //float *y = calloc(y_size, sizeof(float)); + + //int ay_size = anet->outputs*anet->batch; + anet->delta = 0; + anet->train = 1; + + float *imerror = cuda_make_array(0, imlayer->outputs*imlayer->batch); + + float aloss_avg = -1; + float gloss_avg = -1; + + //data generated = copy_data(train); + + while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gray = copy_data(train); + for(j = 0; j < imgs; ++j){ + image gim = float_to_image(net->w, net->h, net->c, gray.X.vals[j]); + grayscale_image_3c(gim); + train.y.vals[j][0] = .95; + gray.y.vals[j][0] = .05; + } + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, pixs, 0); + get_next_batch(gray, net->batch, j*net->batch, graypixs, 0); + cuda_push_array(net->input_gpu, graypixs, net->inputs*net->batch); + cuda_push_array(net->truth_gpu, pixs, net->truths*net->batch); + /* + image origi = float_to_image(net->w, net->h, 3, pixs); + image grayi = float_to_image(net->w, net->h, 3, graypixs); + show_image(grayi, "gray"); + show_image(origi, "orig"); + cvWaitKey(0); + */ + *net->seen += net->batch; + forward_network_gpu(net); + + // Ruihao + fill_gpu(imlayer->outputs*imlayer->batch, 0, imerror, 1); + // copy_gpu(anet->inputs*anet->batch, imlayer->output_gpu, 1, anet->input_gpu, 1); + fill_gpu(anet->inputs*anet->batch, .95, anet->truth_gpu, 1); + anet->delta_gpu = imerror; + forward_network_gpu(anet); + backward_network_gpu(anet); + + // scal_gpu(imlayer->outputs*imlayer->batch, 1./100., net->layers[net->n-1]->delta_gpu, 1); + + // scal_gpu(imlayer->outputs*imlayer->batch, 1, imerror, 1); + + printf("realness %f\n", cuda_mag_array(imerror, imlayer->outputs*imlayer->batch)); + printf("features %f\n", cuda_mag_array(net->layers[net->n-1]->delta_gpu, imlayer->outputs*imlayer->batch)); + + // axpy_gpu(imlayer->outputs*imlayer->batch, 1, imerror, 1, net->layers[net->n-1]->delta_gpu, 1); + // Ruihao + + backward_network_gpu(net); + + + gloss += *net->cost /(net->subdivisions*net->batch); + + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer->outputs, imlayer->output + k*imlayer->outputs, 1, gray.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gray); + //randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gray.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + } +#endif + free_data(merge); + free_data(train); + free_data(gray); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +#endif +} + +/* + void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfile, int clear) + { +#ifdef GPU +char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; +char *backup_directory = "/home/pjreddie/backup/"; +srand(time(0)); +char *base = basecfg(cfgfile); +printf("%s\n", base); +network net = parse_network_cfg(cfgfile); +if(weightfile){ +load_weights(&net, weightfile); +} +if(clear) *net->seen = 0; + +char *abase = basecfg(acfgfile); +network anet = parse_network_cfg(acfgfile); +if(aweightfile){ +load_weights(&anet, aweightfile); +} +if(clear) *anet->seen = 0; + +int i, j, k; +layer imlayer = {0}; +for (i = 0; i < net->n; ++i) { +if (net->layers[i].out_c == 3) { +imlayer = net->layers[i]; +break; +} +} + +printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); +int imgs = net->batch*net->subdivisions; +i = *net->seen/imgs; +data train, buffer; + + +list *plist = get_paths(train_images); +//int N = plist->size; +char **paths = (char **)list_to_array(plist); + +load_args args = {0}; +args.w = net->w; +args.h = net->h; +args.paths = paths; +args.n = imgs; +args.m = plist->size; +args.d = &buffer; + +args.min = net->min_crop; +args.max = net->max_crop; +args.angle = net->angle; +args.aspect = net->aspect; +args.exposure = net->exposure; +args.saturation = net->saturation; +args.hue = net->hue; +args.size = net->w; +args.type = CLASSIFICATION_DATA; +args.classes = 1; +char *ls[1] = {"coco"}; +args.labels = ls; + +pthread_t load_thread = load_data_in_thread(args); +clock_t time; + +network_state gstate = {0}; +gstate.index = 0; +gstate.net = net; +int x_size = get_network_input_size(net)*net->batch; +int y_size = 1*net->batch; +gstate.input = cuda_make_array(0, x_size); +gstate.truth = 0; +gstate.delta = 0; +gstate.train = 1; +float *X = calloc(x_size, sizeof(float)); +float *y = calloc(y_size, sizeof(float)); + +network_state astate = {0}; +astate.index = 0; +astate.net = anet; +int ay_size = get_network_output_size(anet)*anet->batch; +astate.input = 0; +astate.truth = 0; +astate.delta = 0; +astate.train = 1; + +float *imerror = cuda_make_array(0, imlayer->outputs); +float *ones_gpu = cuda_make_array(0, ay_size); +fill_gpu(ay_size, 1, ones_gpu, 1); + +float aloss_avg = -1; +float gloss_avg = -1; + +//data generated = copy_data(train); + +while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data generated = copy_data(train); + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, X, y); + cuda_push_array(gstate.input, X, x_size); + *net->seen += net->batch; + forward_network_gpu(net, gstate); + + fill_gpu(imlayer->outputs, 0, imerror, 1); + astate.input = imlayer->output_gpu; + astate.delta = imerror; + astate.truth = ones_gpu; + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + scal_gpu(imlayer->outputs, 1, imerror, 1); + axpy_gpu(imlayer->outputs, 1, imerror, 1, imlayer->delta_gpu, 1); + + backward_network_gpu(net, gstate); + + printf("features %f\n", cuda_mag_array(imlayer->delta_gpu, imlayer->outputs)); + printf("realness %f\n", cuda_mag_array(imerror, imlayer->outputs)); + + gloss += get_network_cost(net) /(net->subdivisions*net->batch); + + cuda_pull_array(imlayer->output_gpu, imlayer->output, imlayer->outputs*imlayer->batch); + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer->outputs, imlayer->output + k*imlayer->outputs, 1, generated.X.vals[index], 1); + generated.y.vals[index][0] = 0; + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, generated); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + update_network_gpu(anet); + free_data(merge); + free_data(train); + free_data(generated); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } +} +char buff[256]; +sprintf(buff, "%s/%s_final.weights", backup_directory, base); +save_weights(net, buff); +#endif +} +*/ + +/* + void train_lsd(char *cfgfile, char *weightfile, int clear) + { + char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + if(clear) *net->seen = 0; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); +//int N = plist->size; +char **paths = (char **)list_to_array(plist); + +load_args args = {0}; +args.w = net->w; +args.h = net->h; +args.paths = paths; +args.n = imgs; +args.m = plist->size; +args.d = &buffer; + +args.min = net->min_crop; +args.max = net->max_crop; +args.angle = net->angle; +args.aspect = net->aspect; +args.exposure = net->exposure; +args.saturation = net->saturation; +args.hue = net->hue; +args.size = net->w; +args.type = CLASSIFICATION_DATA; +args.classes = 1; +char *ls[1] = {"coco"}; +args.labels = ls; + +pthread_t load_thread = load_data_in_thread(args); +clock_t time; +//while(i*imgs < N*120){ +while(get_current_batch(net) < net->max_batches){ +i += 1; +time=clock(); +pthread_join(load_thread, 0); +train = buffer; +load_thread = load_data_in_thread(args); + +printf("Loaded: %lf seconds\n", sec(clock()-time)); + +time=clock(); +float loss = train_network(net, train); +if (avg_loss < 0) avg_loss = loss; +avg_loss = avg_loss*.9 + loss*.1; + +printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); +if(i%1000==0){ +char buff[256]; +sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); +save_weights(net, buff); +} +if(i%100==0){ +char buff[256]; +sprintf(buff, "%s/%s.backup", backup_directory, base); +save_weights(net, buff); +} +free_data(train); +} +char buff[256]; +sprintf(buff, "%s/%s_final.weights", backup_directory, base); +save_weights(net, buff); +} +*/ + +void test_lsd(char *cfg, char *weights, char *filename, int gray) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int i, imlayer = 0; + + for (i = 0; i < net->n; ++i) { + if (net->layers[i]->out_c == 3) { + imlayer = i; + printf("%d\n", i); + break; + } + } + + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + if(gray) grayscale_image_3c(crop); + + float *X = crop.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + constrain_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 1); + show_image(crop, "crop", 0); + + free_image(im); + free_image(resized); + free_image(crop); + if (filename) break; + } +} + + +void run_lsd(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + int batches = find_int_arg(argc, argv, "-b", 0); + char *file = find_char_arg(argc, argv, "-file", "/home/pjreddie/data/imagenet/imagenet1k.train.list"); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + char *acfg = argv[5]; + char *aweights = (argc > 6) ? argv[6] : 0; + //if(0==strcmp(argv[2], "train")) train_lsd(cfg, weights, clear); + //else if(0==strcmp(argv[2], "train2")) train_lsd2(cfg, weights, acfg, aweights, clear); + //else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear); + //else if(0==strcmp(argv[2], "train3")) train_lsd3(argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], clear); + if(0==strcmp(argv[2], "traingan")) train_dcgan(cfg, weights, acfg, aweights, clear, display, file, batches); + else if(0==strcmp(argv[2], "trainprog")) train_prog(cfg, weights, acfg, aweights, clear, display, file, batches); + else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear, display); + else if(0==strcmp(argv[2], "gan")) test_dcgan(cfg, weights); + else if(0==strcmp(argv[2], "inter")) inter_dcgan(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_lsd(cfg, weights, filename, 0); + else if(0==strcmp(argv[2], "color")) test_lsd(cfg, weights, filename, 1); + /* + else if(0==strcmp(argv[2], "valid")) validate_lsd(cfg, weights); + */ +} diff --git a/workloads/realworld/pipeline/darknet/examples/nightmare.c b/workloads/realworld/pipeline/darknet/examples/nightmare.c new file mode 100644 index 0000000000000000000000000000000000000000..fccf90a04db64ba896d639f53694c6387ecf0488 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/nightmare.c @@ -0,0 +1,418 @@ +#include "darknet.h" + +#include + +// ./darknet nightmare cfg/extractor.recon.cfg ~/trained/yolo-coco.conv frame6.png -reconstruct -iters 500 -i 3 -lambda .1 -rate .01 -smooth 2 + +float abs_mean(float *x, int n) +{ + int i; + float sum = 0; + for (i = 0; i < n; ++i){ + sum += fabs(x[i]); + } + return sum/n; +} + +void calculate_loss(float *output, float *delta, int n, float thresh) +{ + int i; + float mean = mean_array(output, n); + float var = variance_array(output, n); + for(i = 0; i < n; ++i){ + if(delta[i] > mean + thresh*sqrt(var)) delta[i] = output[i]; + else delta[i] = 0; + } +} + +void optimize_picture(network *net, image orig, int max_layer, float scale, float rate, float thresh, int norm) +{ + //scale_image(orig, 2); + //translate_image(orig, -1); + net->n = max_layer + 1; + + int dx = rand()%16 - 8; + int dy = rand()%16 - 8; + int flip = rand()%2; + + image crop = crop_image(orig, dx, dy, orig.w, orig.h); + image im = resize_image(crop, (int)(orig.w * scale), (int)(orig.h * scale)); + if(flip) flip_image(im); + + resize_network(net, im.w, im.h); + layer *last = net->layers[net->n-1]; + //net->layers[net->n - 1].activation = LINEAR; + + image delta = make_image(im.w, im.h, im.c); + +#ifdef GPU + net->delta_gpu = cuda_make_array(delta.data, im.w*im.h*im.c); + copy_cpu(net->inputs, im.data, 1, net->input, 1); + + forward_network_gpu(net); + // Ruihao + // copy_gpu(last->outputs, last->output_gpu, 1, last->delta_gpu, 1); + // Ruihao + + cuda_pull_array(last->delta_gpu, last->delta, last->outputs); + calculate_loss(last->delta, last->delta, last->outputs, thresh); + cuda_push_array(last->delta_gpu, last->delta, last->outputs); + + backward_network_gpu(net); + + cuda_pull_array(net->delta_gpu, delta.data, im.w*im.h*im.c); + cuda_free(net->delta_gpu); + net->delta_gpu = 0; +#else + printf("\nnet: %d %d %d im: %d %d %d\n", net->w, net->h, net->inputs, im.w, im.h, im.c); + copy_cpu(net->inputs, im.data, 1, net->input, 1); + net->delta = delta.data; + forward_network(net); + copy_cpu(last->outputs, last->output, 1, last->delta, 1); + calculate_loss(last->output, last->delta, last->outputs, thresh); + backward_network(net); +#endif + + if(flip) flip_image(delta); + //normalize_array(delta.data, delta.w*delta.h*delta.c); + image resized = resize_image(delta, orig.w, orig.h); + image out = crop_image(resized, -dx, -dy, orig.w, orig.h); + + /* + image g = grayscale_image(out); + free_image(out); + out = g; + */ + + //rate = rate / abs_mean(out.data, out.w*out.h*out.c); + image gray = make_image(out.w, out.h, out.c); + fill_image(gray, .5); + axpy_cpu(orig.w*orig.h*orig.c, -1, orig.data, 1, gray.data, 1); + axpy_cpu(orig.w*orig.h*orig.c, .1, gray.data, 1, out.data, 1); + + if(norm) normalize_array(out.data, out.w*out.h*out.c); + axpy_cpu(orig.w*orig.h*orig.c, rate, out.data, 1, orig.data, 1); + + /* + normalize_array(orig.data, orig.w*orig.h*orig.c); + scale_image(orig, sqrt(var)); + translate_image(orig, mean); + */ + + //translate_image(orig, 1); + //scale_image(orig, .5); + //normalize_image(orig); + + constrain_image(orig); + + free_image(crop); + free_image(im); + free_image(delta); + free_image(resized); + free_image(out); + +} + +void smooth(image recon, image update, float lambda, int num) +{ + int i, j, k; + int ii, jj; + for(k = 0; k < recon.c; ++k){ + for(j = 0; j < recon.h; ++j){ + for(i = 0; i < recon.w; ++i){ + int out_index = i + recon.w*(j + recon.h*k); + for(jj = j-num; jj <= j + num && jj < recon.h; ++jj){ + if (jj < 0) continue; + for(ii = i-num; ii <= i + num && ii < recon.w; ++ii){ + if (ii < 0) continue; + int in_index = ii + recon.w*(jj + recon.h*k); + update.data[out_index] += lambda * (recon.data[in_index] - recon.data[out_index]); + } + } + } + } + } +} + +void reconstruct_picture(network *net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters) +{ + int iter = 0; + for (iter = 0; iter < iters; ++iter) { + image delta = make_image(recon.w, recon.h, recon.c); + +#ifdef GPU + layer *l = get_network_output_layer(net); + cuda_push_array(net->input_gpu, recon.data, recon.w*recon.h*recon.c); + //cuda_push_array(net->truth_gpu, features, net->truths); + net->delta_gpu = cuda_make_array(delta.data, delta.w*delta.h*delta.c); + + forward_network_gpu(net); + cuda_push_array(l->delta_gpu, features, l->outputs); + // Ruihao + // axpy_gpu(l->outputs, -1, l->output_gpu, 1, l->delta_gpu, 1); + // Ruihao + backward_network_gpu(net); + + cuda_pull_array(net->delta_gpu, delta.data, delta.w*delta.h*delta.c); + + cuda_free(net->delta_gpu); +#else + net->input = recon.data; + net->delta = delta.data; + net->truth = features; + + forward_network(net); + backward_network(net); +#endif + + //normalize_array(delta.data, delta.w*delta.h*delta.c); + axpy_cpu(recon.w*recon.h*recon.c, 1, delta.data, 1, update.data, 1); + //smooth(recon, update, lambda, smooth_size); + + axpy_cpu(recon.w*recon.h*recon.c, rate, update.data, 1, recon.data, 1); + scal_cpu(recon.w*recon.h*recon.c, momentum, update.data, 1); + + float mag = mag_array(delta.data, recon.w*recon.h*recon.c); + printf("mag: %f\n", mag); + //scal_cpu(recon.w*recon.h*recon.c, 600/mag, recon.data, 1); + + constrain_image(recon); + free_image(delta); + } +} + +/* +void run_lsd(int argc, char **argv) +{ + srand(0); + if(argc < 3){ + fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [options! (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[2]; + char *weights = argv[3]; + char *input = argv[4]; + + int norm = find_int_arg(argc, argv, "-norm", 1); + int rounds = find_int_arg(argc, argv, "-rounds", 1); + int iters = find_int_arg(argc, argv, "-iters", 10); + float rate = find_float_arg(argc, argv, "-rate", .04); + float momentum = find_float_arg(argc, argv, "-momentum", .9); + float lambda = find_float_arg(argc, argv, "-lambda", .01); + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + int reconstruct = find_arg(argc, argv, "-reconstruct"); + int smooth_size = find_int_arg(argc, argv, "-smooth", 1); + + network net = parse_network_cfg(cfg); + load_weights(&net, weights); + char *cfgbase = basecfg(cfg); + char *imbase = basecfg(input); + + set_batch_network(&net, 1); + image im = load_image_color(input, 0, 0); + + float *features = 0; + image update; + if (reconstruct){ + im = letterbox_image(im, net->w, net->h); + + int zz = 0; + network_predict(net, im.data); + image out_im = get_network_image(net); + image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz); + //flip_image(crop); + image f_im = resize_image(crop, out_im.w, out_im.h); + free_image(crop); + printf("%d features\n", out_im.w*out_im.h*out_im.c); + + + im = resize_image(im, im.w, im.h); + f_im = resize_image(f_im, f_im.w, f_im.h); + features = f_im.data; + + int i; + for(i = 0; i < 14*14*512; ++i){ + features[i] += rand_uniform(-.19, .19); + } + + free_image(im); + im = make_random_image(im.w, im.h, im.c); + update = make_image(im.w, im.h, im.c); + + } + + int e; + int n; + for(e = 0; e < rounds; ++e){ + fprintf(stderr, "Iteration: "); + fflush(stderr); + for(n = 0; n < iters; ++n){ + fprintf(stderr, "%d, ", n); + fflush(stderr); + if(reconstruct){ + reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1); + //if ((n+1)%30 == 0) rate *= .5; + show_image(im, "reconstruction"); +#ifdef OPENCV + cvWaitKey(10); +#endif + }else{ + int layer = max_layer + rand()%range - range/2; + int octave = rand()%octaves; + optimize_picture(&net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); + } + } + fprintf(stderr, "done\n"); + char buff[256]; + if (prefix){ + sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e); + }else{ + sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e); + } + printf("%d %s\n", e, buff); + save_image(im, buff); + //show_image(im, buff); + //cvWaitKey(0); + + if(rotate){ + image rot = rotate_image(im, rotate); + free_image(im); + im = rot; + } + image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom); + image resized = resize_image(crop, im.w, im.h); + free_image(im); + free_image(crop); + im = resized; + } +} +*/ + +void run_nightmare(int argc, char **argv) +{ + srand(0); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [layer] [options! (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[2]; + char *weights = argv[3]; + char *input = argv[4]; + int max_layer = atoi(argv[5]); + + int range = find_int_arg(argc, argv, "-range", 1); + int norm = find_int_arg(argc, argv, "-norm", 1); + int rounds = find_int_arg(argc, argv, "-rounds", 1); + int iters = find_int_arg(argc, argv, "-iters", 10); + int octaves = find_int_arg(argc, argv, "-octaves", 4); + float zoom = find_float_arg(argc, argv, "-zoom", 1.); + float rate = find_float_arg(argc, argv, "-rate", .04); + float thresh = find_float_arg(argc, argv, "-thresh", 1.); + float rotate = find_float_arg(argc, argv, "-rotate", 0); + float momentum = find_float_arg(argc, argv, "-momentum", .9); + float lambda = find_float_arg(argc, argv, "-lambda", .01); + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + int reconstruct = find_arg(argc, argv, "-reconstruct"); + int smooth_size = find_int_arg(argc, argv, "-smooth", 1); + + network *net = load_network(cfg, weights, 0); + char *cfgbase = basecfg(cfg); + char *imbase = basecfg(input); + + set_batch_network(net, 1); + image im = load_image_color(input, 0, 0); + if(0){ + float scale = 1; + if(im.w > 512 || im.h > 512){ + if(im.w > im.h) scale = 512.0/im.w; + else scale = 512.0/im.h; + } + image resized = resize_image(im, scale*im.w, scale*im.h); + free_image(im); + im = resized; + } + //im = letterbox_image(im, net->w, net->h); + + float *features = 0; + image update; + if (reconstruct){ + net->n = max_layer; + im = letterbox_image(im, net->w, net->h); + //resize_network(&net, im.w, im.h); + + network_predict(net, im.data); + if(net->layers[net->n-1]->type == REGION){ + printf("region!\n"); + zero_objectness(net->layers[net->n-1]); + } + image out_im = copy_image(get_network_image(net)); + /* + image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz); + //flip_image(crop); + image f_im = resize_image(crop, out_im.w, out_im.h); + free_image(crop); + */ + printf("%d features\n", out_im.w*out_im.h*out_im.c); + + features = out_im.data; + + /* + int i; + for(i = 0; i < 14*14*512; ++i){ + //features[i] += rand_uniform(-.19, .19); + } + free_image(im); + im = make_random_image(im.w, im.h, im.c); + */ + update = make_image(im.w, im.h, im.c); + } + + int e; + int n; + for(e = 0; e < rounds; ++e){ + fprintf(stderr, "Iteration: "); + fflush(stderr); + for(n = 0; n < iters; ++n){ + fprintf(stderr, "%d, ", n); + fflush(stderr); + if(reconstruct){ + reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1); + //if ((n+1)%30 == 0) rate *= .5; + show_image(im, "reconstruction", 10); + }else{ + int layer = max_layer + rand()%range - range/2; + int octave = rand()%octaves; + optimize_picture(net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); + } + } + fprintf(stderr, "done\n"); + if(0){ + image g = grayscale_image(im); + free_image(im); + im = g; + } + char buff[256]; + if (prefix){ + sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e); + }else{ + sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e); + } + printf("%d %s\n", e, buff); + save_image(im, buff); + //show_image(im, buff, 0); + + if(rotate){ + image rot = rotate_image(im, rotate); + free_image(im); + im = rot; + } + image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom); + image resized = resize_image(crop, im.w, im.h); + free_image(im); + free_image(crop); + im = resized; + } +} + diff --git a/workloads/realworld/pipeline/darknet/examples/regressor.c b/workloads/realworld/pipeline/darknet/examples/regressor.c new file mode 100644 index 0000000000000000000000000000000000000000..20cec0fad9f0a2ccb2c46a30d0a01793119b43ce --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/regressor.c @@ -0,0 +1,240 @@ +#include "darknet.h" +#include +#include + +void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + int classes = option_find_int(options, "classes", 1); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + clock_t time; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.classes = classes; + + args.min = net->min_ratio*net->w; + args.max = net->max_ratio*net->w; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = REGRESSION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_regressor(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + free_image(im); + free_image(sized); + if (filename) break; + } +} + + +void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Regressor Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + list *options = read_data_cfg(datacfg); + int classes = option_find_int(options, "classes", 1); + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + void * cap = open_video_stream(filename, cam_index, 0,0,0); + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image crop = center_crop_image(in, net->w, net->h); + grayscale_image_3c(crop); + + float *predictions = network_predict(net, crop.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + int i; + for(i = 0; i < classes; ++i){ + printf("%s: %f\n", names[i], predictions[i]); + } + + show_image(crop, "Regressor", 10); + free_image(in); + free_image(crop); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_regressor(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_regressor(data, cfg, weights); + else if(0==strcmp(argv[2], "train")) train_regressor(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "demo")) demo_regressor(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/pipeline/darknet/examples/rnn.c b/workloads/realworld/pipeline/darknet/examples/rnn.c new file mode 100644 index 0000000000000000000000000000000000000000..ec23b2cb1a89177317957e9fddf0860a18e97e05 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/rnn.c @@ -0,0 +1,542 @@ +#include "darknet.h" + +#include + +typedef struct { + float *x; + float *y; +} float_pair; + +unsigned char **load_files(char *filename, int *n) +{ + list *paths = get_paths(filename); + *n = paths->size; + unsigned char **contents = calloc(*n, sizeof(char *)); + int i; + node *x = paths->front; + for(i = 0; i < *n; ++i){ + contents[i] = read_file((char *)x->val); + x = x->next; + } + return contents; +} + +int *read_tokenized_data(char *filename, size_t *read) +{ + size_t size = 512; + size_t count = 0; + FILE *fp = fopen(filename, "r"); + int *d = calloc(size, sizeof(int)); + int n, one; + one = fscanf(fp, "%d", &n); + while(one == 1){ + ++count; + if(count > size){ + size = size*2; + d = realloc(d, size*sizeof(int)); + } + d[count-1] = n; + one = fscanf(fp, "%d", &n); + } + fclose(fp); + d = realloc(d, count*sizeof(int)); + *read = count; + return d; +} + +char **read_tokens(char *filename, size_t *read) +{ + size_t size = 512; + size_t count = 0; + FILE *fp = fopen(filename, "r"); + char **d = calloc(size, sizeof(char *)); + char *line; + while((line=fgetl(fp)) != 0){ + ++count; + if(count > size){ + size = size*2; + d = realloc(d, size*sizeof(char *)); + } + if(0==strcmp(line, "")) line = "\n"; + d[count-1] = line; + } + fclose(fp); + d = realloc(d, count*sizeof(char *)); + *read = count; + return d; +} + + +float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size_t len, int batch, int steps) +{ + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + int i,j; + for(i = 0; i < batch; ++i){ + for(j = 0; j < steps; ++j){ + int curr = tokens[(offsets[i])%len]; + int next = tokens[(offsets[i] + 1)%len]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + offsets[i] = (offsets[i] + 1) % len; + + if(curr >= characters || curr < 0 || next >= characters || next < 0){ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +float_pair get_seq2seq_data(char **source, char **dest, int n, int characters, size_t len, int batch, int steps) +{ + int i,j; + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + for(i = 0; i < batch; ++i){ + int index = rand()%n; + //int slen = strlen(source[index]); + //int dlen = strlen(dest[index]); + for(j = 0; j < steps; ++j){ + unsigned char curr = source[index][j]; + unsigned char next = dest[index][j]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + if(curr > 255 || curr <= 0 || next > 255 || next <= 0){ + /*text[(index+j+2)%len] = 0; + printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]); + printf("%s", text+index); + */ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +float_pair get_rnn_data(unsigned char *text, size_t *offsets, int characters, size_t len, int batch, int steps) +{ + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + int i,j; + for(i = 0; i < batch; ++i){ + for(j = 0; j < steps; ++j){ + unsigned char curr = text[(offsets[i])%len]; + unsigned char next = text[(offsets[i] + 1)%len]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + offsets[i] = (offsets[i] + 1) % len; + + if(curr > 255 || curr <= 0 || next > 255 || next <= 0){ + /*text[(index+j+2)%len] = 0; + printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]); + printf("%s", text+index); + */ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear, int tokenized) +{ + srand(time(0)); + unsigned char *text = 0; + int *tokens = 0; + size_t size; + if(tokenized){ + tokens = read_tokenized_data(filename, &size); + } else { + text = read_file(filename); + size = strlen((const char*)text); + } + + char *backup_directory = "/home/pjreddie/backup/"; + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, clear); + + int inputs = net->inputs; + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g, Inputs: %d %d %d\n", net->learning_rate, net->momentum, net->decay, inputs, net->batch, net->time_steps); + int batch = net->batch; + int steps = net->time_steps; + if(clear) *net->seen = 0; + int i = (*net->seen)/net->batch; + + int streams = batch/steps; + size_t *offsets = calloc(streams, sizeof(size_t)); + int j; + for(j = 0; j < streams; ++j){ + offsets[j] = rand_size_t()%size; + } + + clock_t time; + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + float_pair p; + if(tokenized){ + p = get_rnn_token_data(tokens, offsets, inputs, size, streams, steps); + }else{ + p = get_rnn_data(text, offsets, inputs, size, streams, steps); + } + + copy_cpu(net->inputs*net->batch, p.x, 1, net->input, 1); + copy_cpu(net->truths*net->batch, p.y, 1, net->truth, 1); + float loss = train_network_datum(net) / (batch); + free(p.x); + free(p.y); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + size_t chars = get_current_batch(net)*batch; + fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds, %f epochs\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), (float) chars/size); + + for(j = 0; j < streams; ++j){ + //printf("%d\n", j); + if(rand()%64 == 0){ + //fprintf(stderr, "Reset\n"); + offsets[j] = rand_size_t()%size; + reset_network_state(net, j); + } + } + + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void print_symbol(int n, char **tokens){ + if(tokens){ + printf("%s ", tokens[n]); + } else { + printf("%c", n); + } +} + +void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i]->temperature = temp; + int c = 0; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + + /* + fill_cpu(inputs, 0, input, 1); + for(i = 0; i < 10; ++i){ + network_predict(net, input); + } + fill_cpu(inputs, 0, input, 1); + */ + + for(i = 0; i < len-1; ++i){ + c = seed[i]; + input[c] = 1; + network_predict(net, input); + input[c] = 0; + print_symbol(c, tokens); + } + if(len) c = seed[len-1]; + print_symbol(c, tokens); + for(i = 0; i < num; ++i){ + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + for(j = 32; j < 127; ++j){ + //printf("%d %c %f\n",j, j, out[j]); + } + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + c = sample_array(out, inputs); + print_symbol(c, tokens); + } + printf("\n"); +} + +void test_tactic_rnn_multi(char *cfgfile, char *weightfile, int num, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i]->temperature = temp; + int c = 0; + float *input = calloc(inputs, sizeof(float)); + float *out = 0; + + while(1){ + reset_network_state(net, 0); + while((c = getc(stdin)) != EOF && c != 0){ + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + for(i = 0; i < num; ++i){ + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + int next = sample_array(out, inputs); + if(c == '.' && next == '\n') break; + c = next; + print_symbol(c, tokens); + + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + printf("\n"); + } +} + +void test_tactic_rnn(char *cfgfile, char *weightfile, int num, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i]->temperature = temp; + int c = 0; + float *input = calloc(inputs, sizeof(float)); + float *out = 0; + + while((c = getc(stdin)) != EOF){ + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + for(i = 0; i < num; ++i){ + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + int next = sample_array(out, inputs); + if(c == '.' && next == '\n') break; + c = next; + print_symbol(c, tokens); + + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + printf("\n"); +} + +void valid_tactic_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int count = 0; + int words = 1; + int c; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + for(i = 0; i < len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + float sum = 0; + c = getc(stdin); + float log2 = log(2); + int in = 0; + while(c != EOF){ + int next = getc(stdin); + if(next == EOF) break; + if(next < 0 || next >= 255) error("Out of range character"); + + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + + if(c == '.' && next == '\n') in = 0; + if(!in) { + if(c == '>' && next == '>'){ + in = 1; + ++words; + } + c = next; + continue; + } + ++count; + sum += log(out[next])/log2; + c = next; + printf("%d %d Perplexity: %4.4f Word Perplexity: %4.4f\n", count, words, pow(2, -sum/count), pow(2, -sum/words)); + } +} + +void valid_char_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int count = 0; + int words = 1; + int c; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + for(i = 0; i < len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + float sum = 0; + c = getc(stdin); + float log2 = log(2); + while(c != EOF){ + int next = getc(stdin); + if(next == EOF) break; + if(next < 0 || next >= 255) error("Out of range character"); + ++count; + if(next == ' ' || next == '\n' || next == '\t') ++words; + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + sum += log(out[next])/log2; + c = next; + printf("%d BPC: %4.4f Perplexity: %4.4f Word Perplexity: %4.4f\n", count, -sum/count, pow(2, -sum/count), pow(2, -sum/words)); + } +} + +void vec_char_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int c; + int seed_len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + char *line; + while((line=fgetl(stdin)) != 0){ + reset_network_state(net, 0); + for(i = 0; i < seed_len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + strip(line); + int str_len = strlen(line); + for(i = 0; i < str_len; ++i){ + c = line[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + c = ' '; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + + layer *l = net->layers[0]; + #ifdef GPU + cuda_pull_array(l->output_gpu, l->output, l->outputs); + #endif + printf("%s", line); + for(i = 0; i < l->outputs; ++i){ + printf(",%g", l->output[i]); + } + printf("\n"); + } +} + +void run_char_rnn(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + char *filename = find_char_arg(argc, argv, "-file", "data/shakespeare.txt"); + char *seed = find_char_arg(argc, argv, "-seed", "\n\n"); + int len = find_int_arg(argc, argv, "-len", 1000); + float temp = find_float_arg(argc, argv, "-temp", .7); + int rseed = find_int_arg(argc, argv, "-srand", time(0)); + int clear = find_arg(argc, argv, "-clear"); + int tokenized = find_arg(argc, argv, "-tokenized"); + char *tokens = find_char_arg(argc, argv, "-tokens", 0); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_char_rnn(cfg, weights, filename, clear, tokenized); + else if(0==strcmp(argv[2], "valid")) valid_char_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "validtactic")) valid_tactic_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "vec")) vec_char_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "generate")) test_char_rnn(cfg, weights, len, seed, temp, rseed, tokens); + else if(0==strcmp(argv[2], "generatetactic")) test_tactic_rnn(cfg, weights, len, temp, rseed, tokens); +} diff --git a/workloads/realworld/pipeline/darknet/examples/rnn_vid.c b/workloads/realworld/pipeline/darknet/examples/rnn_vid.c new file mode 100644 index 0000000000000000000000000000000000000000..e88792352311438d0fcb25bb7befd0677f70bae5 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/rnn_vid.c @@ -0,0 +1,208 @@ +#include "darknet.h" + +#ifdef OPENCV +image get_image_from_stream(CvCapture *cap); +image ipl_to_image(IplImage* src); + +void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters); + + +typedef struct { + float *x; + float *y; +} float_pair; + +float_pair get_rnn_vid_data(network net, char **files, int n, int batch, int steps) +{ + int b; + assert(net.batch == steps + 1); + image out_im = get_network_image(net); + int output_size = out_im.w*out_im.h*out_im.c; + printf("%d %d %d\n", out_im.w, out_im.h, out_im.c); + float *feats = calloc(net.batch*batch*output_size, sizeof(float)); + for(b = 0; b < batch; ++b){ + int input_size = net.w*net.h*net.c; + float *input = calloc(input_size*net.batch, sizeof(float)); + char *filename = files[rand()%n]; + CvCapture *cap = cvCaptureFromFile(filename); + int frames = cvGetCaptureProperty(cap, CV_CAP_PROP_FRAME_COUNT); + int index = rand() % (frames - steps - 2); + if (frames < (steps + 4)){ + --b; + free(input); + continue; + } + + printf("frames: %d, index: %d\n", frames, index); + cvSetCaptureProperty(cap, CV_CAP_PROP_POS_FRAMES, index); + + int i; + for(i = 0; i < net.batch; ++i){ + IplImage* src = cvQueryFrame(cap); + image im = ipl_to_image(src); + rgbgr_image(im); + image re = resize_image(im, net.w, net.h); + //show_image(re, "loaded"); + //cvWaitKey(10); + memcpy(input + i*input_size, re.data, input_size*sizeof(float)); + free_image(im); + free_image(re); + } + float *output = network_predict(net, input); + + free(input); + + for(i = 0; i < net.batch; ++i){ + memcpy(feats + (b + i*batch)*output_size, output + i*output_size, output_size*sizeof(float)); + } + + cvReleaseCapture(&cap); + } + + //printf("%d %d %d\n", out_im.w, out_im.h, out_im.c); + float_pair p = {0}; + p.x = feats; + p.y = feats + output_size*batch; //+ out_im.w*out_im.h*out_im.c; + + return p; +} + + +void train_vid_rnn(char *cfgfile, char *weightfile) +{ + char *train_videos = "data/vid/train.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + + list *plist = get_paths(train_videos); + int N = plist->size; + char **paths = (char **)list_to_array(plist); + clock_t time; + int steps = net.time_steps; + int batch = net.batch / net.time_steps; + + network extractor = parse_network_cfg("cfg/extractor.cfg"); + load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv"); + + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + float_pair p = get_rnn_vid_data(extractor, paths, N, batch, steps); + + copy_cpu(net.inputs*net.batch, p.x, 1, net.input, 1); + copy_cpu(net.truths*net.batch, p.y, 1, net.truth, 1); + float loss = train_network_datum(net) / (net.batch); + + + free(p.x); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time)); + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%10==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + + +image save_reconstruction(network net, image *init, float *feat, char *name, int i) +{ + image recon; + if (init) { + recon = copy_image(*init); + } else { + recon = make_random_image(net.w, net.h, 3); + } + + image update = make_image(net.w, net.h, 3); + reconstruct_picture(net, feat, recon, update, .01, .9, .1, 2, 50); + char buff[256]; + sprintf(buff, "%s%d", name, i); + save_image(recon, buff); + free_image(update); + return recon; +} + +void generate_vid_rnn(char *cfgfile, char *weightfile) +{ + network extractor = parse_network_cfg("cfg/extractor.recon.cfg"); + load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv"); + + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&extractor, 1); + set_batch_network(&net, 1); + + int i; + CvCapture *cap = cvCaptureFromFile("/extra/vid/ILSVRC2015/Data/VID/snippets/val/ILSVRC2015_val_00007030.mp4"); + float *feat; + float *next; + image last; + for(i = 0; i < 25; ++i){ + image im = get_image_from_stream(cap); + image re = resize_image(im, extractor.w, extractor.h); + feat = network_predict(extractor, re.data); + if(i > 0){ + printf("%f %f\n", mean_array(feat, 14*14*512), variance_array(feat, 14*14*512)); + printf("%f %f\n", mean_array(next, 14*14*512), variance_array(next, 14*14*512)); + printf("%f\n", mse_array(feat, 14*14*512)); + axpy_cpu(14*14*512, -1, feat, 1, next, 1); + printf("%f\n", mse_array(next, 14*14*512)); + } + next = network_predict(net, feat); + + free_image(im); + + free_image(save_reconstruction(extractor, 0, feat, "feat", i)); + free_image(save_reconstruction(extractor, 0, next, "next", i)); + if (i==24) last = copy_image(re); + free_image(re); + } + for(i = 0; i < 30; ++i){ + next = network_predict(net, next); + image new = save_reconstruction(extractor, &last, next, "new", i); + free_image(last); + last = new; + } +} + +void run_vid_rnn(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + //char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_vid_rnn(cfg, weights); + else if(0==strcmp(argv[2], "generate")) generate_vid_rnn(cfg, weights); +} +#else +void run_vid_rnn(int argc, char **argv){} +#endif + diff --git a/workloads/realworld/pipeline/darknet/examples/segmenter.c b/workloads/realworld/pipeline/darknet/examples/segmenter.c new file mode 100644 index 0000000000000000000000000000000000000000..2e7cea0b730754b74a125bcd865aa12f0bdd3be0 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/segmenter.c @@ -0,0 +1,255 @@ +#include "darknet.h" +#include +#include + +void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + image pred = get_network_image(net); + + int div = net->w/pred.w; + assert(pred.w * div == net->w); + assert(pred.h * div == net->h); + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.scale = div; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.classes = 80; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = SEGMENTATION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(display){ + image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]); + image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]); + image mask = mask_to_rgb(tr); + image prmask = mask_to_rgb(pred); + show_image(im, "input", 1); + show_image(prmask, "pred", 1); + show_image(mask, "truth", 100); + free_image(mask); + free_image(prmask); + } + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_segmenter(char *datafile, char *cfg, char *weights, char *filename) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + show_image(sized, "orig", 1); + show_image(prmask, "pred", 0); + free_image(im); + free_image(sized); + free_image(prmask); + if (filename) break; + } +} + + +void demo_segmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Classifier Demo\n"); + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = letterbox_image(in, net->w, net->h); + + network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + show_image(prmask, "Segmenter", 10); + + free_image(in_s); + free_image(in); + free_image(prmask); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_segmenter(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_segmenter(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_segmenter(data, cfg, weights, gpus, ngpus, clear, display); + else if(0==strcmp(argv[2], "demo")) demo_segmenter(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/pipeline/darknet/examples/super.c b/workloads/realworld/pipeline/darknet/examples/super.c new file mode 100644 index 0000000000000000000000000000000000000000..d34406b1f2ce70cd36eecb8298bf1ca3e736f01b --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/super.c @@ -0,0 +1,120 @@ +#include "darknet.h" + +void train_super(char *cfgfile, char *weightfile, int clear) +{ + char *train_images = "/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, clear); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.scale = 4; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = SUPER_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void test_super(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + resize_network(net, im.w, im.h); + printf("%d %d\n", im.w, im.h); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image(net); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 0); + + free_image(im); + if (filename) break; + } +} + + +void run_super(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + int clear = find_arg(argc, argv, "-clear"); + if(0==strcmp(argv[2], "train")) train_super(cfg, weights, clear); + else if(0==strcmp(argv[2], "test")) test_super(cfg, weights, filename); + /* + else if(0==strcmp(argv[2], "valid")) validate_super(cfg, weights); + */ +} diff --git a/workloads/realworld/pipeline/darknet/examples/swag.c b/workloads/realworld/pipeline/darknet/examples/swag.c new file mode 100644 index 0000000000000000000000000000000000000000..c22d7855c46a975ecd1e94a60f9b7059bc288fee --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/swag.c @@ -0,0 +1,83 @@ +#include "darknet.h" +#include + +void train_swag(char *cfgfile, char *weightfile) +{ + char *train_images = "data/voc.0712.trainval"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + data train, buffer; + + layer l = net.layers[net.n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || i == 600){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void run_swag(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_swag(cfg, weights); +} diff --git a/workloads/realworld/pipeline/darknet/examples/tag.c b/workloads/realworld/pipeline/darknet/examples/tag.c new file mode 100644 index 0000000000000000000000000000000000000000..4caf8cba18f39f62deb54ea913fd40c194b3e33c --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/tag.c @@ -0,0 +1,140 @@ +#include "darknet.h" + +void train_tag(char *cfgfile, char *weightfile, int clear) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, clear); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + list *plist = get_paths("/home/pjreddie/tag/train.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + + args.min = net->w; + args.max = net->max_crop; + args.size = net->w; + + args.paths = paths; + args.classes = net->outputs; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = TAG_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + fprintf(stderr, "%d classes\n", net->outputs); + + load_thread = load_data_in_thread(args); + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + pthread_join(load_thread, 0); + free_data(buffer); + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void test_tag(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + int i = 0; + char **names = get_labels("data/tags.txt"); + clock_t time; + int indexes[10]; + char buff[256]; + char *input = buff; + int size = net->w; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = resize_min(im, size); + resize_network(net, r.w, r.h); + printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + float *predictions = network_predict(net, X); + top_predictions(net, 10, indexes); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < 10; ++i){ + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void run_tag(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int clear = find_arg(argc, argv, "-clear"); + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_tag(cfg, weights, clear); + else if(0==strcmp(argv[2], "test")) test_tag(cfg, weights, filename); +} + diff --git a/workloads/realworld/pipeline/darknet/examples/voxel.c b/workloads/realworld/pipeline/darknet/examples/voxel.c new file mode 100644 index 0000000000000000000000000000000000000000..01ea9bb98987590227758364bbfff50996cf9a2d --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/voxel.c @@ -0,0 +1,161 @@ +#include "darknet.h" + +void extract_voxel(char *lfile, char *rfile, char *prefix) +{ +#ifdef OPENCV + int w = 1920; + int h = 1080; + int shift = 0; + int count = 0; + CvCapture *lcap = cvCaptureFromFile(lfile); + CvCapture *rcap = cvCaptureFromFile(rfile); + while(1){ + image l = get_image_from_stream(lcap); + image r = get_image_from_stream(rcap); + if(!l.w || !r.w) break; + if(count%100 == 0) { + shift = best_3d_shift_r(l, r, -l.h/100, l.h/100); + printf("%d\n", shift); + } + image ls = crop_image(l, (l.w - w)/2, (l.h - h)/2, w, h); + image rs = crop_image(r, 105 + (r.w - w)/2, (r.h - h)/2 + shift, w, h); + char buff[256]; + sprintf(buff, "%s_%05d_l", prefix, count); + save_image(ls, buff); + sprintf(buff, "%s_%05d_r", prefix, count); + save_image(rs, buff); + free_image(l); + free_image(r); + free_image(ls); + free_image(rs); + ++count; + } + +#else + printf("need OpenCV for extraction\n"); +#endif +} + +void train_voxel(char *cfgfile, char *weightfile) +{ + char *train_images = "/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.scale = 4; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = SUPER_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void test_voxel(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + resize_network(&net, im.w, im.h); + printf("%d %d\n", im.w, im.h); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image(net); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + + free_image(im); + if (filename) break; + } +} + + +void run_voxel(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_voxel(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_voxel(cfg, weights, filename); + else if(0==strcmp(argv[2], "extract")) extract_voxel(argv[3], argv[4], argv[5]); + /* + else if(0==strcmp(argv[2], "valid")) validate_voxel(cfg, weights); + */ +} diff --git a/workloads/realworld/pipeline/darknet/examples/writing.c b/workloads/realworld/pipeline/darknet/examples/writing.c new file mode 100644 index 0000000000000000000000000000000000000000..1b6ff83b5838b654e0fd1b6664156daf6d7a889b --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/writing.c @@ -0,0 +1,144 @@ +#include "darknet.h" + +void train_writing(char *cfgfile, char *weightfile) +{ + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + list *plist = get_paths("figures.list"); + char **paths = (char **)list_to_array(plist); + clock_t time; + int N = plist->size; + printf("N: %d\n", N); + image out = get_network_image(net); + + data train, buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.out_w = out.w; + args.out_h = out.h; + args.paths = paths; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = WRITING_DATA; + + pthread_t load_thread = load_data_in_thread(args); + int epoch = (*net.seen)/N; + while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + printf("Loaded %lf seconds\n",sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + + /* + image pred = float_to_image(64, 64, 1, out); + print_image(pred); + */ + + /* + image im = float_to_image(256, 256, 3, train.X.vals[0]); + image lab = float_to_image(64, 64, 1, train.y.vals[0]); + image pred = float_to_image(64, 64, 1, out); + show_image(im, "image"); + show_image(lab, "label"); + print_image(lab); + show_image(pred, "pred"); + cvWaitKey(0); + */ + + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + free_data(train); + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_batch_%ld.weights", backup_directory, base, get_current_batch(net)); + save_weights(net, buff); + } + if(*net.seen/N > epoch){ + epoch = *net.seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + } +} + +void test_writing(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + + image im = load_image_color(input, 0, 0); + resize_network(&net, im.w, im.h); + printf("%d %d %d\n", im.h, im.w, im.c); + float *X = im.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + image pred = get_network_image(net); + + image upsampled = resize_image(pred, im.w, im.h); + image thresh = threshold_image(upsampled, .5); + pred = thresh; + + show_image(pred, "prediction"); + show_image(im, "orig"); +#ifdef OPENCV + cvWaitKey(0); + cvDestroyAllWindows(); +#endif + + free_image(upsampled); + free_image(thresh); + free_image(im); + if (filename) break; + } +} + +void run_writing(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_writing(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_writing(cfg, weights, filename); +} + diff --git a/workloads/realworld/pipeline/darknet/examples/yolo.c b/workloads/realworld/pipeline/darknet/examples/yolo.c new file mode 100644 index 0000000000000000000000000000000000000000..a0e336fc6fbea1ae5e20ef15661a35e18a907821 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/examples/yolo.c @@ -0,0 +1,327 @@ +#include "darknet.h" + +char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; + +void train_yolo(char *cfgfile, char *weightfile) +{ + char *train_images = "/data/voc/train.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + layer *l = net->layers[net->n - 1]; + + int side = l->side; + int classes = l->classes; + float jitter = l->jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || (i < 1000 && i%100 == 0)){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void print_yolo_detections(FILE **fps, char *id, int total, int classes, int w, int h, detection *dets) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], + xmin, ymin, xmax, ymax); + } + } +} + +void validate_yolo(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + //list *plist = get_paths("data/voc.2007.test"); + list *plist = get_paths("/home/pjreddie/data/voc/2007_test.txt"); + //list *plist = get_paths("data/voc.2012.test"); + char **paths = (char **)list_to_array(plist); + + layer *l = net->layers[net->n-1]; + int classes = l->classes; + + int j; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + int t; + + float thresh = .001; + int nms = 1; + float iou_thresh = .5; + + int nthreads = 8; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.type = IMAGE_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + time_t start = time(0); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l->side*l->side*l->n, classes, iou_thresh); + print_yolo_detections(fps, id, l->side*l->side*l->n, classes, w, h, dets); + free_detections(dets, nboxes); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); +} + +void validate_yolo_recall(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + list *plist = get_paths("data/voc.2007.test"); + char **paths = (char **)list_to_array(plist); + + layer *l = net->layers[net->n-1]; + int classes = l->classes; + int side = l->side; + + int j, k; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + + float thresh = .001; + float iou_thresh = .5; + float nms = 0; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + + int nboxes = 0; + detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, side*side*l->n, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < side*side*l->n; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < side*side*l->n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free_detections(dets, nboxes); + free(id); + free_image(orig); + free_image(sized); + } +} + +void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) +{ + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + layer *l = net->layers[net->n-1]; + set_batch_network(net, 1); + srand(2222222); + clock_t time; + char buff[256]; + char *input = buff; + float nms=.4; + while(1){ + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = resize_image(im, net->w, net->h); + float *X = sized.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + + int nboxes = 0; + detection *dets = get_network_boxes(net, 1, 1, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l->side*l->side*l->n, l->classes, nms); + + draw_detections(im, dets, l->side*l->side*l->n, thresh, voc_names, alphabet, 20); + save_image(im, "predictions"); + show_image(im, "predictions", 0); + free_detections(dets, nboxes); + free_image(im); + free_image(sized); + if (filename) break; + } +} + +void run_yolo(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .2); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int avg = find_int_arg(argc, argv, "-avg", 1); + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "test")) test_yolo(cfg, weights, filename, thresh); + else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights); + else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix, avg, .5, 0,0,0,0); +} diff --git a/workloads/realworld/pipeline/darknet/include/darknet.h b/workloads/realworld/pipeline/darknet/include/darknet.h new file mode 100644 index 0000000000000000000000000000000000000000..78aaa44b3942e856bd42920584e688d2c03e32de --- /dev/null +++ b/workloads/realworld/pipeline/darknet/include/darknet.h @@ -0,0 +1,830 @@ +#ifndef DARKNET_API +#define DARKNET_API +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef GPU + #define GPU_DEVICE 7 + #define BLOCK 512 + + #include "cuda_runtime.h" + #include "curand.h" + #include "cublas_v2.h" + + #ifdef CUDNN + #include "cudnn.h" + #endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define SECRET_NUM -1234 +extern int gpu_index; + +void startCPU(); +void endCPU(); + +void initTrace(); +void finiTrace(); + + +typedef struct{ + int classes; + char **names; +} metadata; + +metadata get_metadata(char *file); + +typedef struct{ + int *leaf; + int n; + int *parent; + int *child; + int *group; + char **name; + + int groups; + int *group_size; + int *group_offset; +} tree; +tree *read_tree(char *filename); + +typedef enum{ + LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU +} ACTIVATION; + +typedef enum{ + PNG, BMP, TGA, JPG +} IMTYPE; + +typedef enum{ + MULT, ADD, SUB, DIV +} BINARY_ACTIVATION; + +typedef enum { + CONVOLUTIONAL, + DECONVOLUTIONAL, + CONNECTED, + MAXPOOL, + SOFTMAX, + DETECTION, + DROPOUT, + CROP, + ROUTE, + COST, + NORMALIZATION, + AVGPOOL, + LOCAL, + SHORTCUT, + ACTIVE, + RNN, + GRU, + LSTM, + CRNN, + BATCHNORM, + NETWORK, + XNOR, + REGION, + YOLO, + ISEG, + REORG, + UPSAMPLE, + LOGXENT, + L2NORM, + BLANK +} LAYER_TYPE; + +typedef enum{ + SSE, MASKED, L1, SEG, SMOOTH,WGAN +} COST_TYPE; + +typedef struct{ + int batch; + float learning_rate; + float momentum; + float decay; + int adam; + float B1; + float B2; + float eps; + int t; +} update_args; + +struct network; +typedef struct network network; + +struct layer; +typedef struct layer layer; + +struct layer{ + LAYER_TYPE type; + ACTIVATION activation; + COST_TYPE cost_type; + void (*forward) (layer* l, struct network); + void (*backward) (layer* l, struct network); + void (*update) (layer* l, update_args, struct network); + void (*forward_gpu) (layer* l, struct network); + void (*backward_gpu) (layer* l, struct network); + void (*update_gpu) (layer* l, update_args, struct network); + int batch_normalize; + int shortcut; + int batch; + int forced; + int flipped; + int inputs; + int outputs; + int nweights; + int nbiases; + int extra; + int truths; + int h,w,c; + int out_h, out_w, out_c; + int n; + int max_boxes; + int groups; + int size; + int side; + int stride; + int reverse; + int flatten; + int spatial; + int pad; + int sqrt; + int flip; + int index; + int binary; + int xnor; + int steps; + int hidden; + int truth; + float smooth; + float dot; + float angle; + float jitter; + float saturation; + float exposure; + float shift; + float ratio; + float learning_rate_scale; + float clip; + int noloss; + int softmax; + int classes; + int coords; + int background; + int rescore; + int objectness; + int joint; + int noadjust; + int reorg; + int log; + int tanh; + int *mask; + int total; + + float alpha; + float beta; + float kappa; + + float coord_scale; + float object_scale; + float noobject_scale; + float mask_scale; + float class_scale; + int bias_match; + int random; + float ignore_thresh; + float truth_thresh; + float thresh; + float focus; + int classfix; + int absolute; + + int onlyforward; + int stopbackward; + int dontload; + int dontsave; + int dontloadscales; + int numload; + + float temperature; + float probability; + float scale; + + char * cweights; + int * indexes; + int * input_layers; + int * input_sizes; + int * map; + int * counts; + float ** sums; + float * rand; + float * cost; + float * state; + float * prev_state; + float * forgot_state; + float * forgot_delta; + float * state_delta; + float * combine_cpu; + float * combine_delta_cpu; + + float * concat; + float * concat_delta; + + float * binary_weights; + + float * biases; + float * bias_updates; + + float * scales; + float * scale_updates; + + float * weights; + float * weight_updates; + + float * delta; + float * output; + float * loss; + float * squared; + float * norms; + + float * spatial_mean; + float * mean; + float * variance; + + float * mean_delta; + float * variance_delta; + + float * rolling_mean; + float * rolling_variance; + + float * x; + float * x_norm; + + float * m; + float * v; + + float * bias_m; + float * bias_v; + float * scale_m; + float * scale_v; + + + float *z_cpu; + float *r_cpu; + float *h_cpu; + float * prev_state_cpu; + + float *temp_cpu; + float *temp2_cpu; + float *temp3_cpu; + + float *dh_cpu; + float *hh_cpu; + float *prev_cell_cpu; + float *cell_cpu; + float *f_cpu; + float *i_cpu; + float *g_cpu; + float *o_cpu; + float *c_cpu; + float *dc_cpu; + + float * binary_input; + + struct layer *input_layer; + struct layer *self_layer; + struct layer *output_layer; + + struct layer *reset_layer; + struct layer *update_layer; + struct layer *state_layer; + + struct layer *input_gate_layer; + struct layer *state_gate_layer; + struct layer *input_save_layer; + struct layer *state_save_layer; + struct layer *input_state_layer; + struct layer *state_state_layer; + + struct layer *input_z_layer; + struct layer *state_z_layer; + + struct layer *input_r_layer; + struct layer *state_r_layer; + + struct layer *input_h_layer; + struct layer *state_h_layer; + + struct layer *wz; + struct layer *uz; + struct layer *wr; + struct layer *ur; + struct layer *wh; + struct layer *uh; + struct layer *uo; + struct layer *wo; + struct layer *uf; + struct layer *wf; + struct layer *ui; + struct layer *wi; + struct layer *ug; + struct layer *wg; + + tree *softmax_tree; + + size_t workspace_size; + + int has_delta; + +#ifdef GPU + int *indexes_gpu; + + float *z_gpu; + float *r_gpu; + float *h_gpu; + + float *temp_gpu; + float *temp2_gpu; + float *temp3_gpu; + + float *dh_gpu; + float *hh_gpu; + float *prev_cell_gpu; + float *cell_gpu; + float *f_gpu; + float *i_gpu; + float *g_gpu; + float *o_gpu; + float *c_gpu; + float *dc_gpu; + + float *m_gpu; + float *v_gpu; + float *bias_m_gpu; + float *scale_m_gpu; + float *bias_v_gpu; + float *scale_v_gpu; + + float * combine_gpu; + float * combine_delta_gpu; + + float * prev_state_gpu; + float * forgot_state_gpu; + float * forgot_delta_gpu; + float * state_gpu; + float * state_delta_gpu; + float * gate_gpu; + float * gate_delta_gpu; + float * save_gpu; + float * save_delta_gpu; + float * concat_gpu; + float * concat_delta_gpu; + + float * binary_input_gpu; + float * binary_weights_gpu; + + float * mean_gpu; + float * variance_gpu; + + float * rolling_mean_gpu; + float * rolling_variance_gpu; + + float * variance_delta_gpu; + float * mean_delta_gpu; + + float * x_gpu; + float * x_norm_gpu; + float * weights_gpu; + float * weight_updates_gpu; + float * weight_change_gpu; + + float * biases_gpu; + float * bias_updates_gpu; + float * bias_change_gpu; + + float * scales_gpu; + float * scale_updates_gpu; + float * scale_change_gpu; + + float * output_gpu; + float * loss_gpu; + float * delta_gpu; + float * rand_gpu; + float * squared_gpu; + float * norms_gpu; + + pthread_t thread; + int malloc_async; + int stream_index; +#ifdef CUDNN + cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc; + cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc; + cudnnTensorDescriptor_t normTensorDesc; + cudnnFilterDescriptor_t weightDesc; + cudnnFilterDescriptor_t dweightDesc; + cudnnConvolutionDescriptor_t convDesc; + cudnnConvolutionFwdAlgo_t fw_algo; + cudnnConvolutionBwdDataAlgo_t bd_algo; + cudnnConvolutionBwdFilterAlgo_t bf_algo; +#endif +#endif +}; + +void free_layer(layer *l); + +typedef enum { + CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM +} learning_rate_policy; + +typedef struct network{ + int n; + int batch; + size_t *seen; + int *t; + float epoch; + int subdivisions; + layer **layers; + float *output; + learning_rate_policy policy; + + float learning_rate; + float momentum; + float decay; + float gamma; + float scale; + float power; + int time_steps; + int step; + int max_batches; + float *scales; + int *steps; + int num_steps; + int burn_in; + + int adam; + float B1; + float B2; + float eps; + + int inputs; + int outputs; + int truths; + int notruth; + int h, w, c; + int max_crop; + int min_crop; + float max_ratio; + float min_ratio; + int center; + float angle; + float aspect; + float exposure; + float saturation; + float hue; + int random; + + int gpu_index; + tree *hierarchy; + + float *input; + float *truth; + float *delta; + float *workspace; + int train; + int index; + int next_index; + int is_first; + float *cost; + float clip; + +#ifdef GPU + float *input_gpu; + float *truth_gpu; + float *delta_gpu; + float *output_gpu; + + cudaStream_t streams[2]; + // int stream_index; +#endif + +} network; + +typedef struct { + int w; + int h; + float scale; + float rad; + float dx; + float dy; + float aspect; +} augment_args; + +typedef struct { + int w; + int h; + int c; + float *data; +} image; + +typedef struct{ + float x, y, w, h; +} box; + +typedef struct detection{ + box bbox; + int classes; + float *prob; + float *mask; + float objectness; + int sort_class; +} detection; + +typedef struct matrix{ + int rows, cols; + float **vals; +} matrix; + + +typedef struct{ + int w, h; + matrix X; + matrix y; + int shallow; + int *num_boxes; + box **boxes; +} data; + +typedef enum { + CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA, ISEG_DATA +} data_type; + +typedef struct load_args{ + int threads; + char **paths; + char *path; + int n; + int m; + char **labels; + int h; + int w; + int out_w; + int out_h; + int nh; + int nw; + int num_boxes; + int min, max, size; + int classes; + int background; + int scale; + int center; + int coords; + float jitter; + float angle; + float aspect; + float saturation; + float exposure; + float hue; + data *d; + image *im; + image *resized; + data_type type; + tree *hierarchy; +} load_args; + +typedef struct{ + int id; + float x,y,w,h; + float left, right, top, bottom; +} box_label; + + +network *load_network(char *cfg, char *weights, int clear); +load_args get_base_args(network *net); + +void free_data(data d); + +typedef struct node{ + void *val; + struct node *next; + struct node *prev; +} node; + +typedef struct list{ + int size; + node *front; + node *back; +} list; + +pthread_t load_data(load_args args); +list *read_data_cfg(char *filename); +list *read_cfg(char *filename); +unsigned char *read_file(char *filename); +data resize_data(data orig, int w, int h); +data *tile_data(data orig, int divs, int size); +data select_data(data *orig, int *inds); + +void forward_network(network *net); +void backward_network(network *net); +void update_network(network *net); + + +float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); +void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void copy_cpu(int N, float *X, int INCX, float *Y, int INCY); +void scal_cpu(int N, float ALPHA, float *X, int INCX); +void fill_cpu(int N, float ALPHA, float * X, int INCX); +void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); +void softmax(float *input, int n, float temp, int stride, float *output); + +int best_3d_shift_r(image a, image b, int min, int max); +#ifdef GPU +void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY, cudaStream_t stream); +void fill_gpu(int N, float ALPHA, float * X, int INCX); +void fill_gpu_1(int N, float ALPHA, float * X, int INCX); +void scal_gpu(int N, float ALPHA, float * X, int INCX, cudaStream_t stream); +void copy_gpu(int N, float * X, int INCX, float * Y, int INCY, cudaStream_t stream); + +void cuda_set_device(int n); +void cuda_free(float *x_gpu); +float *cuda_make_array(float *x, size_t n); +float *cuda_make_array_pipe(float *x, size_t n, cudaStream_t stream); +void cuda_pull_array(float *x_gpu, float *x, size_t n); +float cuda_mag_array(float *x_gpu, size_t n); +void cuda_push_array(float *x_gpu, float *x, size_t n); + +void forward_network_gpu(network *net); +void backward_network_gpu(network *net); +void update_network_gpu(network *net); + +float train_networks(network **nets, int n, data d, int interval); +void sync_nets(network **nets, int n, int interval); +void harmless_update_network_gpu(network *net); +#endif +image get_label(image **characters, char *string, int size); +void draw_label(image a, int r, int c, image label, const float *rgb); +void save_image(image im, const char *name); +void save_image_options(image im, const char *name, IMTYPE f, int quality); +void get_next_batch(data d, int n, int offset, float *X, float *y); +void grayscale_image_3c(image im); +void normalize_image(image p); +void matrix_to_csv(matrix m); +float train_network_sgd(network *net, data d, int n); +void rgbgr_image(image im); +data copy_data(data d); +data concat_data(data d1, data d2); +data load_cifar10_data(char *filename); +float matrix_topk_accuracy(matrix truth, matrix guess, int k); +void matrix_add_matrix(matrix from, matrix to); +void scale_matrix(matrix m, float scale); +matrix csv_to_matrix(char *filename); +float *network_accuracies(network *net, data d, int n); +float train_network_datum(network *net); +image make_random_image(int w, int h, int c); + +void denormalize_connected_layer(layer *l); +void denormalize_convolutional_layer(layer *l); +void statistics_connected_layer(layer *l); +void rescale_weights(layer *l, float scale, float trans); +void rgbgr_weights(layer *l); +image *get_weights(layer *l); + +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, int avg, float hier_thresh, int w, int h, int fps, int fullscreen); +void get_detection_detections(layer *l, int w, int h, float thresh, detection *dets); + +char *option_find_str(list *l, char *key, char *def); +int option_find_int(list *l, char *key, int def); +int option_find_int_quiet(list *l, char *key, int def); + +network *parse_network_cfg(char *filename); +void save_weights(network *net, char *filename); +void load_weights(network *net, char *filename); +void save_weights_upto(network *net, char *filename, int cutoff); +void load_weights_upto(network *net, char *filename, int start, int cutoff); + +void zero_objectness(layer *l); +void get_region_detections(layer *l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets); +int get_yolo_detections(layer *l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets); +void free_network(network *net); +void set_batch_network(network *net, int b); +void set_temp_network(network *net, float t); +image load_image(char *filename, int w, int h, int c); +image load_image_color(char *filename, int w, int h); +image make_image(int w, int h, int c); +image resize_image(image im, int w, int h); +void censor_image(image im, int dx, int dy, int w, int h); +image letterbox_image(image im, int w, int h); +image crop_image(image im, int dx, int dy, int w, int h); +image center_crop_image(image im, int w, int h); +image resize_min(image im, int min); +image resize_max(image im, int max); +image threshold_image(image im, float thresh); +image mask_to_rgb(image mask); +int resize_network(network *net, int w, int h); +void free_matrix(matrix m); +void test_resize(char *filename); +int show_image(image p, const char *name, int ms); +image copy_image(image p); +void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b); +float get_current_rate(network *net); +void composite_3d(char *f1, char *f2, char *out, int delta); +data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h); +size_t get_current_batch(network *net); +void constrain_image(image im); +image get_network_image_layer(network *net, int i); +layer* get_network_output_layer(network *net); +void top_predictions(network *net, int n, int *index); +void flip_image(image a); +image float_to_image(int w, int h, int c, float *data); +void ghost_image(image source, image dest, int dx, int dy); +float network_accuracy(network *net, data d); +void random_distort_image(image im, float hue, float saturation, float exposure); +void fill_image(image m, float s); +image grayscale_image(image im); +void rotate_image_cw(image im, int times); +double what_time_is_it_now(); +image rotate_image(image m, float rad); +void visualize_network(network *net); +float box_iou(box a, box b); +data load_all_cifar10(); +box_label *read_boxes(char *filename, int *n); +box float_to_box(float *f, int stride); +void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes); + +matrix network_predict_data(network *net, data test); +image **load_alphabet(); +image get_network_image(network *net); +float *network_predict(network *net, float *input); + +int network_width(network *net); +int network_height(network *net); +float *network_predict_image(network *net, image im); +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets); +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num); +void free_detections(detection *dets, int n); + +void reset_network_state(network *net, int b); + +char **get_labels(char *filename); +void do_nms_obj(detection *dets, int total, int classes, float thresh); +void do_nms_sort(detection *dets, int total, int classes, float thresh); + +matrix make_matrix(int rows, int cols); + +#ifdef OPENCV +void *open_video_stream(const char *f, int c, int w, int h, int fps); +image get_image_from_stream(void *p); +void make_window(char *name, int w, int h, int fullscreen); +#endif + +void free_image(image m); +float train_network(network *net, data d); +pthread_t load_data_in_thread(load_args args); +void load_data_blocking(load_args args); +list *get_paths(char *filename); +void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride); +void change_leaves(tree *t, char *leaf_list); + +int find_int_arg(int argc, char **argv, char *arg, int def); +float find_float_arg(int argc, char **argv, char *arg, float def); +int find_arg(int argc, char* argv[], char *arg); +char *find_char_arg(int argc, char **argv, char *arg, char *def); +char *basecfg(char *cfgfile); +void find_replace(char *str, char *orig, char *rep, char *output); +void free_ptrs(void **ptrs, int n); +char *fgetl(FILE *fp); +void strip(char *s); +float sec(clock_t clocks); +void **list_to_array(list *l); +void top_k(float *a, int n, int k, int *index); +int *read_map(char *filename); +void error(const char *s); +int max_index(float *a, int n); +int max_int_index(int *a, int n); +int sample_array(float *a, int n); +int *random_index_order(int min, int max); +void free_list(list *l); +float mse_array(float *a, int n); +float variance_array(float *a, int n); +float mag_array(float *a, int n); +void scale_array(float *a, int n, float s); +float mean_array(float *a, int n); +float sum_array(float *a, int n); +void normalize_array(float *a, int n); +int *read_intlist(char *s, int *n, int d); +size_t rand_size_t(); +float rand_normal(); +float rand_uniform(float min, float max); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/workloads/realworld/pipeline/darknet/predictions.jpg b/workloads/realworld/pipeline/darknet/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..49c1abe30cdcdceadad4353da30ce5660c96be1a Binary files /dev/null and b/workloads/realworld/pipeline/darknet/predictions.jpg differ diff --git a/workloads/realworld/pipeline/darknet/python/darknet.py b/workloads/realworld/pipeline/darknet/python/darknet.py new file mode 100644 index 0000000000000000000000000000000000000000..b14d24485d86aa69f3991be79ec4f25c2b8e5a59 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/python/darknet.py @@ -0,0 +1,156 @@ +from ctypes import * +import math +import random + +def sample(probs): + s = sum(probs) + probs = [a/s for a in probs] + r = random.uniform(0, 1) + for i in range(len(probs)): + r = r - probs[i] + if r <= 0: + return i + return len(probs)-1 + +def c_array(ctype, values): + arr = (ctype*len(values))() + arr[:] = values + return arr + +class BOX(Structure): + _fields_ = [("x", c_float), + ("y", c_float), + ("w", c_float), + ("h", c_float)] + +class DETECTION(Structure): + _fields_ = [("bbox", BOX), + ("classes", c_int), + ("prob", POINTER(c_float)), + ("mask", POINTER(c_float)), + ("objectness", c_float), + ("sort_class", c_int)] + + +class IMAGE(Structure): + _fields_ = [("w", c_int), + ("h", c_int), + ("c", c_int), + ("data", POINTER(c_float))] + +class METADATA(Structure): + _fields_ = [("classes", c_int), + ("names", POINTER(c_char_p))] + + + +#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL) +lib = CDLL("libdarknet.so", RTLD_GLOBAL) +lib.network_width.argtypes = [c_void_p] +lib.network_width.restype = c_int +lib.network_height.argtypes = [c_void_p] +lib.network_height.restype = c_int + +predict = lib.network_predict +predict.argtypes = [c_void_p, POINTER(c_float)] +predict.restype = POINTER(c_float) + +set_gpu = lib.cuda_set_device +set_gpu.argtypes = [c_int] + +make_image = lib.make_image +make_image.argtypes = [c_int, c_int, c_int] +make_image.restype = IMAGE + +get_network_boxes = lib.get_network_boxes +get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)] +get_network_boxes.restype = POINTER(DETECTION) + +make_network_boxes = lib.make_network_boxes +make_network_boxes.argtypes = [c_void_p] +make_network_boxes.restype = POINTER(DETECTION) + +free_detections = lib.free_detections +free_detections.argtypes = [POINTER(DETECTION), c_int] + +free_ptrs = lib.free_ptrs +free_ptrs.argtypes = [POINTER(c_void_p), c_int] + +network_predict = lib.network_predict +network_predict.argtypes = [c_void_p, POINTER(c_float)] + +reset_rnn = lib.reset_rnn +reset_rnn.argtypes = [c_void_p] + +load_net = lib.load_network +load_net.argtypes = [c_char_p, c_char_p, c_int] +load_net.restype = c_void_p + +do_nms_obj = lib.do_nms_obj +do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +do_nms_sort = lib.do_nms_sort +do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +free_image = lib.free_image +free_image.argtypes = [IMAGE] + +letterbox_image = lib.letterbox_image +letterbox_image.argtypes = [IMAGE, c_int, c_int] +letterbox_image.restype = IMAGE + +load_meta = lib.get_metadata +lib.get_metadata.argtypes = [c_char_p] +lib.get_metadata.restype = METADATA + +load_image = lib.load_image_color +load_image.argtypes = [c_char_p, c_int, c_int] +load_image.restype = IMAGE + +rgbgr_image = lib.rgbgr_image +rgbgr_image.argtypes = [IMAGE] + +predict_image = lib.network_predict_image +predict_image.argtypes = [c_void_p, IMAGE] +predict_image.restype = POINTER(c_float) + +def classify(net, meta, im): + out = predict_image(net, im) + res = [] + for i in range(meta.classes): + res.append((meta.names[i], out[i])) + res = sorted(res, key=lambda x: -x[1]) + return res + +def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): + im = load_image(image, 0, 0) + num = c_int(0) + pnum = pointer(num) + predict_image(net, im) + dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum) + num = pnum[0] + if (nms): do_nms_obj(dets, num, meta.classes, nms); + + res = [] + for j in range(num): + for i in range(meta.classes): + if dets[j].prob[i] > 0: + b = dets[j].bbox + res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h))) + res = sorted(res, key=lambda x: -x[1]) + free_image(im) + free_detections(dets, num) + return res + +if __name__ == "__main__": + #net = load_net("cfg/densenet201.cfg", "/home/pjreddie/trained/densenet201.weights", 0) + #im = load_image("data/wolf.jpg", 0, 0) + #meta = load_meta("cfg/imagenet1k.data") + #r = classify(net, meta, im) + #print r[:10] + net = load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0) + meta = load_meta("cfg/coco.data") + r = detect(net, meta, "data/dog.jpg") + print(r) + + diff --git a/workloads/realworld/pipeline/darknet/python/proverbot.py b/workloads/realworld/pipeline/darknet/python/proverbot.py new file mode 100644 index 0000000000000000000000000000000000000000..095aae8f8bf8bbe47ea1768a6e2c948bb0ff8f85 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/python/proverbot.py @@ -0,0 +1,37 @@ +from darknet import * + +def predict_tactic(net, s): + prob = 0 + d = c_array(c_float, [0.0]*256) + tac = '' + if not len(s): + s = '\n' + for c in s[:-1]: + d[ord(c)] = 1 + pred = predict(net, d) + d[ord(c)] = 0 + c = s[-1] + while 1: + d[ord(c)] = 1 + pred = predict(net, d) + d[ord(c)] = 0 + pred = [pred[i] for i in range(256)] + ind = sample(pred) + c = chr(ind) + prob += math.log(pred[ind]) + if len(tac) and tac[-1] == '.': + break + tac = tac + c + return (tac, prob) + +def predict_tactics(net, s, n): + tacs = [] + for i in range(n): + reset_rnn(net) + tacs.append(predict_tactic(net, s)) + tacs = sorted(tacs, key=lambda x: -x[1]) + return tacs + +net = load_net("cfg/coq.test.cfg", "/home/pjreddie/backup/coq.backup", 0) +t = predict_tactics(net, "+++++\n", 10) +print t diff --git a/workloads/realworld/pipeline/darknet/resnet18/run_resnet18.sh b/workloads/realworld/pipeline/darknet/resnet18/run_resnet18.sh new file mode 100755 index 0000000000000000000000000000000000000000..10257ad02affc17f8287ea42917813fe320f5f23 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/resnet18/run_resnet18.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg diff --git a/workloads/realworld/pipeline/darknet/resnet18/run_super.sh b/workloads/realworld/pipeline/darknet/resnet18/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..10257ad02affc17f8287ea42917813fe320f5f23 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/resnet18/run_super.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg diff --git a/workloads/realworld/pipeline/darknet/resnet18_b/run_super.sh b/workloads/realworld/pipeline/darknet/resnet18_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..012635a1ce64ecda462e50097be554185989ae7a --- /dev/null +++ b/workloads/realworld/pipeline/darknet/resnet18_b/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier test ../cfg/imagenet1k.data ../cfg/resnet18_b.cfg diff --git a/workloads/realworld/pipeline/darknet/resnet18_t/run_super.sh b/workloads/realworld/pipeline/darknet/resnet18_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..0eb59b3bd65cf0186c5ed5f36eff5ec34d54298c --- /dev/null +++ b/workloads/realworld/pipeline/darknet/resnet18_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier train ../cfg/imagenet1k.data ../cfg/resnet18_t.cfg \ No newline at end of file diff --git a/workloads/realworld/pipeline/darknet/resnet50/run_resnet50.sh b/workloads/realworld/pipeline/darknet/resnet50/run_resnet50.sh new file mode 100755 index 0000000000000000000000000000000000000000..ce88111af06600203c1ae94421134eb3404f51a4 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/resnet50/run_resnet50.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet50.cfg ../../../../../data/darknet/resnet50.weights ../data/dog.jpg diff --git a/workloads/realworld/pipeline/darknet/resnet50/run_super.sh b/workloads/realworld/pipeline/darknet/resnet50/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..b11c0dd7d233f7bb654100eb5882112ca183821e --- /dev/null +++ b/workloads/realworld/pipeline/darknet/resnet50/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet50.cfg ../../../../../data/darknet/resnet50.weights ../data/dog.jpg +../darknet classifier test ../cfg/imagenet1k.data ../cfg/resnet50_b.cfg \ No newline at end of file diff --git a/workloads/realworld/pipeline/darknet/resnet50_b/run_super.sh b/workloads/realworld/pipeline/darknet/resnet50_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..e6f1b1d59b612bef36d04af547bf61808261eb12 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/resnet50_b/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier test ../cfg/imagenet1k.data ../cfg/resnet50_b.cfg diff --git a/workloads/realworld/pipeline/darknet/resnet50_t/run_super.sh b/workloads/realworld/pipeline/darknet/resnet50_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..4d4c7feebd1bd5bdcded72e3d4cf58045949ac90 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/resnet50_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier train ../cfg/imagenet1k.data ../cfg/resnet50_t.cfg diff --git a/workloads/realworld/pipeline/darknet/scripts/dice_label.sh b/workloads/realworld/pipeline/darknet/scripts/dice_label.sh new file mode 100644 index 0000000000000000000000000000000000000000..f19f8a49481b46d5a04dd18b1b05af8928b21957 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/scripts/dice_label.sh @@ -0,0 +1,20 @@ +mkdir -p images +mkdir -p images/orig +mkdir -p images/train +mkdir -p images/val + +ffmpeg -i Face1.mp4 images/orig/face1_%6d.jpg +ffmpeg -i Face2.mp4 images/orig/face2_%6d.jpg +ffmpeg -i Face3.mp4 images/orig/face3_%6d.jpg +ffmpeg -i Face4.mp4 images/orig/face4_%6d.jpg +ffmpeg -i Face5.mp4 images/orig/face5_%6d.jpg +ffmpeg -i Face6.mp4 images/orig/face6_%6d.jpg + +mogrify -resize 100x100^ -gravity center -crop 100x100+0+0 +repage images/orig/* + +ls images/orig/* | shuf | head -n 1000 | xargs mv -t images/val +mv images/orig/* images/train + +find `pwd`/images/train > dice.train.list -name \*.jpg +find `pwd`/images/val > dice.val.list -name \*.jpg + diff --git a/workloads/realworld/pipeline/darknet/scripts/gen_tactic.sh b/workloads/realworld/pipeline/darknet/scripts/gen_tactic.sh new file mode 100755 index 0000000000000000000000000000000000000000..ffa30d27754dacdd03bd5996d41cbfab14db0f39 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/scripts/gen_tactic.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Usage: +# wget http://pjreddie.com/media/files/peek.weights +# scripts/gen_tactic.sh < data/goal.txt +./darknet rnn generatetactic cfg/gru.cfg peek.weights 2>/dev/null diff --git a/workloads/realworld/pipeline/darknet/scripts/get_coco_dataset.sh b/workloads/realworld/pipeline/darknet/scripts/get_coco_dataset.sh new file mode 100644 index 0000000000000000000000000000000000000000..28463015d1748fd331e071a0a778c6d4500b29ef --- /dev/null +++ b/workloads/realworld/pipeline/darknet/scripts/get_coco_dataset.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Clone COCO API +git clone https://github.com/pdollar/coco +cd coco + +mkdir images +cd images + +# Download Images +wget -c https://pjreddie.com/media/files/train2014.zip +wget -c https://pjreddie.com/media/files/val2014.zip + +# Unzip +unzip -q train2014.zip +unzip -q val2014.zip + +cd .. + +# Download COCO Metadata +wget -c https://pjreddie.com/media/files/instances_train-val2014.zip +wget -c https://pjreddie.com/media/files/coco/5k.part +wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part +wget -c https://pjreddie.com/media/files/coco/labels.tgz +tar xzf labels.tgz +unzip -q instances_train-val2014.zip + +# Set Up Image Lists +paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt +paste <(awk "{print \"$PWD\"}" trainvalno5k.txt + diff --git a/workloads/realworld/pipeline/darknet/scripts/imagenet_label.sh b/workloads/realworld/pipeline/darknet/scripts/imagenet_label.sh new file mode 100644 index 0000000000000000000000000000000000000000..01e4306ee3cf7322427374f01c766bcdef970922 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/scripts/imagenet_label.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +mkdir -p labelled +wd=`pwd` + +for f in val/*.xml; +do +label=`grep -m1 "" $f | grep -oP '\K[^<]*'` +im=`echo $f | sed 's/val/imgs/; s/xml/JPEG/'` +out=`echo $im | sed 's/JPEG/'${label}'.JPEG/; s/imgs/labelled/'` +ln -s ${wd}/$im ${wd}/$out +done + +find ${wd}/labelled -name \*.JPEG > inet.val.list + diff --git a/workloads/realworld/pipeline/darknet/scripts/voc_label.py b/workloads/realworld/pipeline/darknet/scripts/voc_label.py new file mode 100644 index 0000000000000000000000000000000000000000..679fc366890d9eccf15124f950a274d8ad24fc83 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/scripts/voc_label.py @@ -0,0 +1,59 @@ +import xml.etree.ElementTree as ET +import pickle +import os +from os import listdir, getcwd +from os.path import join + +sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')] + +classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + + +def convert(size, box): + dw = 1./(size[0]) + dh = 1./(size[1]) + x = (box[0] + box[1])/2.0 - 1 + y = (box[2] + box[3])/2.0 - 1 + w = box[1] - box[0] + h = box[3] - box[2] + x = x*dw + w = w*dw + y = y*dh + h = h*dh + return (x,y,w,h) + +def convert_annotation(year, image_id): + in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id)) + out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w') + tree=ET.parse(in_file) + root = tree.getroot() + size = root.find('size') + w = int(size.find('width').text) + h = int(size.find('height').text) + + for obj in root.iter('object'): + difficult = obj.find('difficult').text + cls = obj.find('name').text + if cls not in classes or int(difficult)==1: + continue + cls_id = classes.index(cls) + xmlbox = obj.find('bndbox') + b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) + bb = convert((w,h), b) + out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') + +wd = getcwd() + +for year, image_set in sets: + if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)): + os.makedirs('VOCdevkit/VOC%s/labels/'%(year)) + image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split() + list_file = open('%s_%s.txt'%(year, image_set), 'w') + for image_id in image_ids: + list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id)) + convert_annotation(year, image_id) + list_file.close() + +os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt") +os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt") + diff --git a/workloads/realworld/pipeline/darknet/src/activation_kernels.cu b/workloads/realworld/pipeline/darknet/src/activation_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..71ca939c1bd7559a94c4be3344c57a59ea3640e6 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/activation_kernels.cu @@ -0,0 +1,206 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "activations.h" +#include "cuda_dark.h" +} + + +__device__ float lhtan_activate_kernel(float x) +{ + if(x < 0) return .001f*x; + if(x > 1) return .001f*(x-1.f) + 1.f; + return x; +} +__device__ float lhtan_gradient_kernel(float x) +{ + if(x > 0 && x < 1) return 1; + return .001; +} + +__device__ float hardtan_activate_kernel(float x) +{ + if (x < -1) return -1; + if (x > 1) return 1; + return x; +} +__device__ float linear_activate_kernel(float x){return x;} +__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));} +__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;} +__device__ float relu_activate_kernel(float x){return x*(x>0);} +__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);} +__device__ float selu_activate_kernel(float x){return (x >= 0)*1.0507f*x + (x < 0)*1.0507f*1.6732f*(expf(x)-1);} +__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;} +__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;} +__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;} +__device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);} +__device__ float plse_activate_kernel(float x) +{ + if(x < -4) return .01f * (x + 4); + if(x > 4) return .01f * (x - 4) + 1; + return .125f*x + .5f; +} +__device__ float stair_activate_kernel(float x) +{ + int n = floorf(x); + if (n%2 == 0) return floorf(x/2); + else return (x - n) + floorf(x/2); +} + + +__device__ float hardtan_gradient_kernel(float x) +{ + if (x > -1 && x < 1) return 1; + return 0; +} +__device__ float linear_gradient_kernel(float x){return 1;} +__device__ float logistic_gradient_kernel(float x){return (1-x)*x;} +__device__ float loggy_gradient_kernel(float x) +{ + float y = (x+1)/2; + return 2*(1-y)*y; +} +__device__ float relu_gradient_kernel(float x){return (x>0);} +__device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);} +__device__ float selu_gradient_kernel(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);} +__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;} +__device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;} +__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;} +__device__ float tanh_gradient_kernel(float x){return 1-x*x;} +__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;} +__device__ float stair_gradient_kernel(float x) +{ + if (floorf(x) == x) return 0; + return 1; +} + +__device__ float activate_kernel(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_activate_kernel(x); + case LOGISTIC: + return logistic_activate_kernel(x); + case LOGGY: + return loggy_activate_kernel(x); + case RELU: + return relu_activate_kernel(x); + case ELU: + return elu_activate_kernel(x); + case SELU: + return selu_activate_kernel(x); + case RELIE: + return relie_activate_kernel(x); + case RAMP: + return ramp_activate_kernel(x); + case LEAKY: + return leaky_activate_kernel(x); + case TANH: + return tanh_activate_kernel(x); + case PLSE: + return plse_activate_kernel(x); + case STAIR: + return stair_activate_kernel(x); + case HARDTAN: + return hardtan_activate_kernel(x); + case LHTAN: + return lhtan_activate_kernel(x); + } + return 0; +} + +__device__ float gradient_kernel(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_gradient_kernel(x); + case LOGISTIC: + return logistic_gradient_kernel(x); + case LOGGY: + return loggy_gradient_kernel(x); + case RELU: + return relu_gradient_kernel(x); + case ELU: + return elu_gradient_kernel(x); + case SELU: + return selu_gradient_kernel(x); + case RELIE: + return relie_gradient_kernel(x); + case RAMP: + return ramp_gradient_kernel(x); + case LEAKY: + return leaky_gradient_kernel(x); + case TANH: + return tanh_gradient_kernel(x); + case PLSE: + return plse_gradient_kernel(x); + case STAIR: + return stair_gradient_kernel(x); + case HARDTAN: + return hardtan_gradient_kernel(x); + case LHTAN: + return lhtan_gradient_kernel(x); + } + return 0; +} + +__global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int i = id % s; + int b = id / s; + float x1 = x[b*s + i]; + float x2 = x[b*s + s/2 + i]; + if(id < n) { + float de = dy[id]; + dx[b*s + i] = x2*de; + dx[b*s + s/2 + i] = x1*de; + } +} + +extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y) +{ + binary_gradient_array_kernel<<>>(x, dx, n/2, size, a, y); + check_error(cudaPeekAtLastError()); +} +__global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int i = id % s; + int b = id / s; + float x1 = x[b*s + i]; + float x2 = x[b*s + s/2 + i]; + if(id < n) y[id] = x1*x2; +} + +extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y) +{ + binary_activate_array_kernel<<>>(x, n/2, size, a, y); + check_error(cudaPeekAtLastError()); +} + +__global__ void activate_array_kernel(float *x, int n, ACTIVATION a) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n) x[i] = activate_kernel(x[i], a); +} + +__global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n) delta[i] *= gradient_kernel(x[i], a); +} + +extern "C" void activate_array_gpu(float *x, int n, ACTIVATION a, cudaStream_t stream) +{ + activate_array_kernel<<>>(x, n, a); + check_error_stream(cudaPeekAtLastError(), stream); +} + +extern "C" void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta) +{ + gradient_array_kernel<<>>(x, n, a, delta); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/pipeline/darknet/src/activation_layer.c b/workloads/realworld/pipeline/darknet/src/activation_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..7afe08f4b709b23fa5ff7190fda476b0de32eb5d --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/activation_layer.c @@ -0,0 +1,64 @@ +#include "activation_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +layer* make_activation_layer(int batch, int inputs, ACTIVATION activation) +{ + // layer l = {0}; + layer *l = calloc(1, sizeof(layer)); + l->type = ACTIVE; + + l->inputs = inputs; + l->outputs = inputs; + l->batch=batch; + + l->output = calloc(batch*inputs, sizeof(float*)); + l->delta = calloc(batch*inputs, sizeof(float*)); + + l->forward = forward_activation_layer; + l->backward = backward_activation_layer; +#ifdef GPU + l->forward_gpu = forward_activation_layer_gpu; + l->backward_gpu = backward_activation_layer_gpu; + + l->output_gpu = cuda_make_array(l->output, inputs*batch); + l->delta_gpu = cuda_make_array(l->delta, inputs*batch); +#endif + l->activation = activation; + fprintf(stderr, "Activation Layer: %d inputs\n", inputs); + return l; +} + +void forward_activation_layer(layer *l, network net) +{ + copy_cpu(l->outputs*l->batch, net.input, 1, l->output, 1); + activate_array(l->output, l->outputs*l->batch, l->activation); +} + +void backward_activation_layer(layer *l, network net) +{ + gradient_array(l->output, l->outputs*l->batch, l->activation, l->delta); + copy_cpu(l->outputs*l->batch, l->delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_activation_layer_gpu(layer *l, network net) +{ + copy_gpu(l->outputs*l->batch, net.input_gpu, 1, l->output_gpu, 1, net.streams[l->stream_index]); + activate_array_gpu(l->output_gpu, l->outputs*l->batch, l->activation, net.streams[l->stream_index]); +} + +void backward_activation_layer_gpu(layer *l, network net) +{ + gradient_array_gpu(l->output_gpu, l->outputs*l->batch, l->activation, l->delta_gpu); + copy_gpu(l->outputs*l->batch, l->delta_gpu, 1, net.delta_gpu, 1, net.streams[l->stream_index]); +} +#endif diff --git a/workloads/realworld/pipeline/darknet/src/activation_layer.h b/workloads/realworld/pipeline/darknet/src/activation_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1b83edf0d70637706c9a02a983a3e1ae43e994b6 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/activation_layer.h @@ -0,0 +1,19 @@ +#ifndef ACTIVATION_LAYER_H +#define ACTIVATION_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer* make_activation_layer(int batch, int inputs, ACTIVATION activation); + +void forward_activation_layer(layer* l, network net); +void backward_activation_layer(layer* l, network net); + +#ifdef GPU +void forward_activation_layer_gpu(layer* l, network net); +void backward_activation_layer_gpu(layer* l, network net); +#endif + +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/activations.c b/workloads/realworld/pipeline/darknet/src/activations.c new file mode 100644 index 0000000000000000000000000000000000000000..da1a17a89b46b6c41fa80b5dd113e1b30c910712 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/activations.c @@ -0,0 +1,150 @@ +#include "activations.h" + +#include +#include +#include +#include + +char *get_activation_string(ACTIVATION a) +{ + switch(a){ + case LOGISTIC: + return "logistic"; + case LOGGY: + return "loggy"; + case RELU: + return "relu"; + case ELU: + return "elu"; + case SELU: + return "selu"; + case RELIE: + return "relie"; + case RAMP: + return "ramp"; + case LINEAR: + return "linear"; + case TANH: + return "tanh"; + case PLSE: + return "plse"; + case LEAKY: + return "leaky"; + case STAIR: + return "stair"; + case HARDTAN: + return "hardtan"; + case LHTAN: + return "lhtan"; + default: + break; + } + return "relu"; +} + +ACTIVATION get_activation(char *s) +{ + if (strcmp(s, "logistic")==0) return LOGISTIC; + if (strcmp(s, "loggy")==0) return LOGGY; + if (strcmp(s, "relu")==0) return RELU; + if (strcmp(s, "elu")==0) return ELU; + if (strcmp(s, "selu")==0) return SELU; + if (strcmp(s, "relie")==0) return RELIE; + if (strcmp(s, "plse")==0) return PLSE; + if (strcmp(s, "hardtan")==0) return HARDTAN; + if (strcmp(s, "lhtan")==0) return LHTAN; + if (strcmp(s, "linear")==0) return LINEAR; + if (strcmp(s, "ramp")==0) return RAMP; + if (strcmp(s, "leaky")==0) return LEAKY; + if (strcmp(s, "tanh")==0) return TANH; + if (strcmp(s, "stair")==0) return STAIR; + fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s); + return RELU; +} + +float activate(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_activate(x); + case LOGISTIC: + return logistic_activate(x); + case LOGGY: + return loggy_activate(x); + case RELU: + return relu_activate(x); + case ELU: + return elu_activate(x); + case SELU: + return selu_activate(x); + case RELIE: + return relie_activate(x); + case RAMP: + return ramp_activate(x); + case LEAKY: + return leaky_activate(x); + case TANH: + return tanh_activate(x); + case PLSE: + return plse_activate(x); + case STAIR: + return stair_activate(x); + case HARDTAN: + return hardtan_activate(x); + case LHTAN: + return lhtan_activate(x); + } + return 0; +} + +void activate_array(float *x, const int n, const ACTIVATION a) +{ + int i; + for(i = 0; i < n; ++i){ + x[i] = activate(x[i], a); + } +} + +float gradient(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_gradient(x); + case LOGISTIC: + return logistic_gradient(x); + case LOGGY: + return loggy_gradient(x); + case RELU: + return relu_gradient(x); + case ELU: + return elu_gradient(x); + case SELU: + return selu_gradient(x); + case RELIE: + return relie_gradient(x); + case RAMP: + return ramp_gradient(x); + case LEAKY: + return leaky_gradient(x); + case TANH: + return tanh_gradient(x); + case PLSE: + return plse_gradient(x); + case STAIR: + return stair_gradient(x); + case HARDTAN: + return hardtan_gradient(x); + case LHTAN: + return lhtan_gradient(x); + } + return 0; +} + +void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta) +{ + int i; + for(i = 0; i < n; ++i){ + delta[i] *= gradient(x[i], a); + } +} + diff --git a/workloads/realworld/pipeline/darknet/src/activations.h b/workloads/realworld/pipeline/darknet/src/activations.h new file mode 100644 index 0000000000000000000000000000000000000000..dc8476533ab849089af1994d811cd43b6ea7dd40 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/activations.h @@ -0,0 +1,87 @@ +#ifndef ACTIVATIONS_H +#define ACTIVATIONS_H +#include "darknet.h" +#include "cuda_dark.h" +#include "math.h" + +ACTIVATION get_activation(char *s); + +char *get_activation_string(ACTIVATION a); +float activate(float x, ACTIVATION a); +float gradient(float x, ACTIVATION a); +void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta); +void activate_array(float *x, const int n, const ACTIVATION a); +#ifdef GPU +void activate_array_gpu(float *x, int n, ACTIVATION a, cudaStream_t stream); +void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta); +#endif + +static inline float stair_activate(float x) +{ + int n = floor(x); + if (n%2 == 0) return floor(x/2.); + else return (x - n) + floor(x/2.); +} +static inline float hardtan_activate(float x) +{ + if (x < -1) return -1; + if (x > 1) return 1; + return x; +} +static inline float linear_activate(float x){return x;} +static inline float logistic_activate(float x){return 1./(1. + exp(-x));} +static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;} +static inline float relu_activate(float x){return x*(x>0);} +static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} +static inline float selu_activate(float x){return (x >= 0)*1.0507*x + (x < 0)*1.0507*1.6732*(exp(x)-1);} +static inline float relie_activate(float x){return (x>0) ? x : .01*x;} +static inline float ramp_activate(float x){return x*(x>0)+.1*x;} +static inline float leaky_activate(float x){return (x>0) ? x : .1*x;} +static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);} +static inline float plse_activate(float x) +{ + if(x < -4) return .01 * (x + 4); + if(x > 4) return .01 * (x - 4) + 1; + return .125*x + .5; +} + +static inline float lhtan_activate(float x) +{ + if(x < 0) return .001*x; + if(x > 1) return .001*(x-1) + 1; + return x; +} +static inline float lhtan_gradient(float x) +{ + if(x > 0 && x < 1) return 1; + return .001; +} + +static inline float hardtan_gradient(float x) +{ + if (x > -1 && x < 1) return 1; + return 0; +} +static inline float linear_gradient(float x){return 1;} +static inline float logistic_gradient(float x){return (1-x)*x;} +static inline float loggy_gradient(float x) +{ + float y = (x+1.)/2.; + return 2*(1-y)*y; +} +static inline float stair_gradient(float x) +{ + if (floor(x) == x) return 0; + return 1; +} +static inline float relu_gradient(float x){return (x>0);} +static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);} +static inline float selu_gradient(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);} +static inline float relie_gradient(float x){return (x>0) ? 1 : .01;} +static inline float ramp_gradient(float x){return (x>0)+.1;} +static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;} +static inline float tanh_gradient(float x){return 1-x*x;} +static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;} + +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/avgpool_layer.c b/workloads/realworld/pipeline/darknet/src/avgpool_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..a9725622b2232fddb981e987b31d57ec8e3cf0de --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/avgpool_layer.c @@ -0,0 +1,72 @@ +#include "avgpool_layer.h" +#include "cuda_dark.h" +#include + +avgpool_layer* make_avgpool_layer(int batch, int w, int h, int c) +{ + fprintf(stderr, "avg %4d x%4d x%4d -> %4d\n", w, h, c, c); + // avgpool_layer *l = {0}; + avgpool_layer *l = calloc(1, sizeof(avgpool_layer)); + l->type = AVGPOOL; + l->batch = batch; + l->h = h; + l->w = w; + l->c = c; + l->out_w = 1; + l->out_h = 1; + l->out_c = c; + l->outputs = l->out_c; + l->inputs = h*w*c; + int output_size = l->outputs * batch; + l->output = calloc(output_size, sizeof(float)); + l->delta = calloc(output_size, sizeof(float)); + l->forward = forward_avgpool_layer; + l->backward = backward_avgpool_layer; + #ifdef GPU + l->forward_gpu = forward_avgpool_layer_gpu; + l->backward_gpu = backward_avgpool_layer_gpu; + // l->output_gpu = cuda_make_array(l->output, output_size); + // l->delta_gpu = cuda_make_array(l->delta, output_size); + #endif + return l; +} + +void resize_avgpool_layer(avgpool_layer *l, int w, int h) +{ + l->w = w; + l->h = h; + l->inputs = h*w*l->c; +} + +void forward_avgpool_layer(avgpool_layer *l, network net) +{ + int b,i,k; + + for(b = 0; b < l->batch; ++b){ + for(k = 0; k < l->c; ++k){ + int out_index = k + b*l->c; + l->output[out_index] = 0; + for(i = 0; i < l->h*l->w; ++i){ + int in_index = i + l->h*l->w*(k + b*l->c); + l->output[out_index] += net.input[in_index]; + } + l->output[out_index] /= l->h*l->w; + } + } +} + +void backward_avgpool_layer(avgpool_layer *l, network net) +{ + int b,i,k; + + for(b = 0; b < l->batch; ++b){ + for(k = 0; k < l->c; ++k){ + int out_index = k + b*l->c; + for(i = 0; i < l->h*l->w; ++i){ + int in_index = i + l->h*l->w*(k + b*l->c); + net.delta[in_index] += l->delta[out_index] / (l->h*l->w); + } + } + } +} + diff --git a/workloads/realworld/pipeline/darknet/src/avgpool_layer.h b/workloads/realworld/pipeline/darknet/src/avgpool_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..980849a325595661e39d0f40e856c50053bf6f6a --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/avgpool_layer.h @@ -0,0 +1,23 @@ +#ifndef AVGPOOL_LAYER_H +#define AVGPOOL_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +typedef layer avgpool_layer; + +image get_avgpool_image(avgpool_layer* l); +avgpool_layer* make_avgpool_layer(int batch, int w, int h, int c); +void resize_avgpool_layer(avgpool_layer *l, int w, int h); +void forward_avgpool_layer(avgpool_layer *l, network net); +void backward_avgpool_layer(avgpool_layer *l, network net); + +#ifdef GPU +void forward_avgpool_layer_gpu(avgpool_layer *l, network net); +void backward_avgpool_layer_gpu(avgpool_layer *l, network net); +#endif + +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/avgpool_layer_kernels.cu b/workloads/realworld/pipeline/darknet/src/avgpool_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..af02af051a62daab35be0070258841c9172a98e5 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/avgpool_layer_kernels.cu @@ -0,0 +1,69 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "avgpool_layer.h" +#include "cuda_dark.h" +} + +__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int k = id % c; + id /= c; + int b = id; + + int i; + int out_index = (k + c*b); + output[out_index] = 0; + for(i = 0; i < w*h; ++i){ + int in_index = i + h*w*(k + b*c); + output[out_index] += input[in_index]; + } + output[out_index] /= w*h; +} + +__global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int k = id % c; + id /= c; + int b = id; + + int i; + int out_index = (k + c*b); + for(i = 0; i < w*h; ++i){ + int in_index = i + h*w*(k + b*c); + in_delta[in_index] += out_delta[out_index] / (w*h); + } +} + +extern "C" void forward_avgpool_layer_gpu(avgpool_layer* layer, network net) +{ + // Ruihao + // forward_layer_start(layer, net); + // Ruihao + size_t n = layer->c*layer->batch; + + forward_avgpool_layer_kernel<<>>(n, layer->w, layer->h, layer->c, net.input_gpu, layer->output_gpu); + check_error(cudaPeekAtLastError()); + // Ruihao + // forward_layer_end(layer, net); + // Ruihao +} + +extern "C" void backward_avgpool_layer_gpu(avgpool_layer* layer, network net) +{ + // fprintf(stderr, "backward_avgpool_layer_gpu \n"); + size_t n = layer->c*layer->batch; + + backward_avgpool_layer_kernel<<>>(n, layer->w, layer->h, layer->c, net.delta_gpu, layer->delta_gpu); + check_error(cudaPeekAtLastError()); + // fprintf(stderr, "backward_avgpool_layer_gpu succeed \n"); +} + diff --git a/workloads/realworld/pipeline/darknet/src/batchnorm_layer.c b/workloads/realworld/pipeline/darknet/src/batchnorm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..d5a63adf51678d2257f04c878c4935ee03c4133d --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/batchnorm_layer.c @@ -0,0 +1,281 @@ +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "blas.h" +#include + +layer* make_batchnorm_layer(int batch, int w, int h, int c) +{ + fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c); + layer *l = calloc(1, sizeof(layer)); + l->type = BATCHNORM; + l->batch = batch; + l->h = l->out_h = h; + l->w = l->out_w = w; + l->c = l->out_c = c; + l->output = calloc(h * w * c * batch, sizeof(float)); + l->delta = calloc(h * w * c * batch, sizeof(float)); + l->inputs = w*h*c; + l->outputs = l->inputs; + + l->scales = calloc(c, sizeof(float)); + l->scale_updates = calloc(c, sizeof(float)); + l->biases = calloc(c, sizeof(float)); + l->bias_updates = calloc(c, sizeof(float)); + int i; + for(i = 0; i < c; ++i){ + l->scales[i] = 1; + } + + l->mean = calloc(c, sizeof(float)); + l->variance = calloc(c, sizeof(float)); + + l->rolling_mean = calloc(c, sizeof(float)); + l->rolling_variance = calloc(c, sizeof(float)); + + l->forward = forward_batchnorm_layer; + l->backward = backward_batchnorm_layer; +#ifdef GPU + l->forward_gpu = forward_batchnorm_layer_gpu; + l->backward_gpu = backward_batchnorm_layer_gpu; + + l->output_gpu = cuda_make_array(l->output, h * w * c * batch); + l->delta_gpu = cuda_make_array(l->delta, h * w * c * batch); + + l->biases_gpu = cuda_make_array(l->biases, c); + l->bias_updates_gpu = cuda_make_array(l->bias_updates, c); + + l->scales_gpu = cuda_make_array(l->scales, c); + l->scale_updates_gpu = cuda_make_array(l->scale_updates, c); + + l->mean_gpu = cuda_make_array(l->mean, c); + l->variance_gpu = cuda_make_array(l->variance, c); + + l->rolling_mean_gpu = cuda_make_array(l->mean, c); + l->rolling_variance_gpu = cuda_make_array(l->variance, c); + + l->mean_delta_gpu = cuda_make_array(l->mean, c); + l->variance_delta_gpu = cuda_make_array(l->variance, c); + + l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); + #ifdef CUDNN + cudnnCreateTensorDescriptor(&l->normTensorDesc); + cudnnCreateTensorDescriptor(&l->dstTensorDesc); + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + + #endif +#endif + return l; +} + +void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + int i,b,f; + for(f = 0; f < n; ++f){ + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int index = i + size*(f + n*b); + sum += delta[index] * x_norm[index]; + } + } + scale_updates[f] += sum; + } +} + +void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + + int i,j,k; + for(i = 0; i < filters; ++i){ + mean_delta[i] = 0; + for (j = 0; j < batch; ++j) { + for (k = 0; k < spatial; ++k) { + int index = j*filters*spatial + i*spatial + k; + mean_delta[i] += delta[index]; + } + } + mean_delta[i] *= (-1./sqrt(variance[i] + .00001f)); + } +} +void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + + int i,j,k; + for(i = 0; i < filters; ++i){ + variance_delta[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance_delta[i] += delta[index]*(x[index] - mean[i]); + } + } + variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float)(-3./2.)); + } +} +void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + int f, j, k; + for(j = 0; j < batch; ++j){ + for(f = 0; f < filters; ++f){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + f*spatial + k; + delta[index] = delta[index] * 1./(sqrt(variance[f] + .00001f)) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); + } + } + } +} + +void resize_batchnorm_layer(layer *layer, int w, int h) +{ + fprintf(stderr, "Not implemented\n"); +} + +void forward_batchnorm_layer(layer *l, network net) +{ + if(l->type == BATCHNORM) copy_cpu(l->outputs*l->batch, net.input, 1, l->output, 1); + copy_cpu(l->outputs*l->batch, l->output, 1, l->x, 1); + if(net.train){ + mean_cpu(l->output, l->batch, l->out_c, l->out_h*l->out_w, l->mean); + variance_cpu(l->output, l->mean, l->batch, l->out_c, l->out_h*l->out_w, l->variance); + + scal_cpu(l->out_c, .99, l->rolling_mean, 1); + axpy_cpu(l->out_c, .01, l->mean, 1, l->rolling_mean, 1); + scal_cpu(l->out_c, .99, l->rolling_variance, 1); + axpy_cpu(l->out_c, .01, l->variance, 1, l->rolling_variance, 1); + + normalize_cpu(l->output, l->mean, l->variance, l->batch, l->out_c, l->out_h*l->out_w); + copy_cpu(l->outputs*l->batch, l->output, 1, l->x_norm, 1); + } else { + normalize_cpu(l->output, l->rolling_mean, l->rolling_variance, l->batch, l->out_c, l->out_h*l->out_w); + } + scale_bias(l->output, l->scales, l->batch, l->out_c, l->out_h*l->out_w); + add_bias(l->output, l->biases, l->batch, l->out_c, l->out_h*l->out_w); +} + +void backward_batchnorm_layer(layer *l, network net) +{ + if(!net.train){ + l->mean = l->rolling_mean; + l->variance = l->rolling_variance; + } + backward_bias(l->bias_updates, l->delta, l->batch, l->out_c, l->out_w*l->out_h); + backward_scale_cpu(l->x_norm, l->delta, l->batch, l->out_c, l->out_w*l->out_h, l->scale_updates); + + scale_bias(l->delta, l->scales, l->batch, l->out_c, l->out_h*l->out_w); + + mean_delta_cpu(l->delta, l->variance, l->batch, l->out_c, l->out_w*l->out_h, l->mean_delta); + variance_delta_cpu(l->x, l->delta, l->mean, l->variance, l->batch, l->out_c, l->out_w*l->out_h, l->variance_delta); + normalize_delta_cpu(l->x, l->mean, l->variance, l->mean_delta, l->variance_delta, l->batch, l->out_c, l->out_w*l->out_h, l->delta); + if(l->type == BATCHNORM) copy_cpu(l->outputs*l->batch, l->delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_batchnorm_layer(layer *l) +{ + cuda_pull_array(l->scales_gpu, l->scales, l->c); + cuda_pull_array(l->rolling_mean_gpu, l->rolling_mean, l->c); + cuda_pull_array(l->rolling_variance_gpu, l->rolling_variance, l->c); +} +void push_batchnorm_layer(layer *l) +{ + cuda_push_array(l->scales_gpu, l->scales, l->c); + cuda_push_array(l->rolling_mean_gpu, l->rolling_mean, l->c); + cuda_push_array(l->rolling_variance_gpu, l->rolling_variance, l->c); +} + +void forward_batchnorm_layer_gpu(layer *l, network net) +{ + if(l->type == BATCHNORM) { + copy_gpu(l->outputs*l->batch, net.input_gpu, 1, l->output_gpu, 1, net.streams[l->stream_index]); + } + copy_gpu(l->outputs*l->batch, l->output_gpu, 1, l->x_gpu, 1, net.streams[l->stream_index]); + if (net.train) { +#ifdef CUDNN + float one = 1; + float zero = 0; + cudnnBatchNormalizationForwardTraining(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + l->dstTensorDesc, + l->x_gpu, + l->dstTensorDesc, + l->output_gpu, + l->normTensorDesc, + l->scales_gpu, + l->biases_gpu, + .01, + l->rolling_mean_gpu, + l->rolling_variance_gpu, + .00001, + l->mean_gpu, + l->variance_gpu); +#else + fast_mean_gpu(l->output_gpu, l->batch, l->out_c, l->out_h*l->out_w, l->mean_gpu, net.streams[l->stream_index]); + fast_variance_gpu(l->output_gpu, l->mean_gpu, l->batch, l->out_c, l->out_h*l->out_w, l->variance_gpu, net.streams[l->stream_index]); + + scal_gpu(l->out_c, .99, l->rolling_mean_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->out_c, .01, l->mean_gpu, 1, l->rolling_mean_gpu, 1, net.streams[l->stream_index]); + scal_gpu(l->out_c, .99, l->rolling_variance_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->out_c, .01, l->variance_gpu, 1, l->rolling_variance_gpu, 1, net.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, l->output_gpu, 1, l->x_gpu, 1, net.streams[l->stream_index]); + normalize_gpu(l->output_gpu, l->mean_gpu, l->variance_gpu, l->batch, l->out_c, l->out_h*l->out_w, net.streams[l->stream_index]); + copy_gpu(l->outputs*l->batch, l->output_gpu, 1, l->x_norm_gpu, 1, net.streams[l->stream_index]); + + scale_bias_gpu(l->output_gpu, l->scales_gpu, l->batch, l->out_c, l->out_h*l->out_w, net.streams[l->stream_index]); + add_bias_gpu(l->output_gpu, l->biases_gpu, l->batch, l->out_c, l->out_w*l->out_h, net.streams[l->stream_index]); +#endif + } else { + normalize_gpu(l->output_gpu, l->rolling_mean_gpu, l->rolling_variance_gpu, l->batch, l->out_c, l->out_h*l->out_w, net.streams[l->stream_index]); + scale_bias_gpu(l->output_gpu, l->scales_gpu, l->batch, l->out_c, l->out_h*l->out_w, net.streams[l->stream_index]); + add_bias_gpu(l->output_gpu, l->biases_gpu, l->batch, l->out_c, l->out_w*l->out_h, net.streams[l->stream_index]); + } + +} + +void backward_batchnorm_layer_gpu(layer *l, network net) +{ + if(!net.train){ + l->mean_gpu = l->rolling_mean_gpu; + l->variance_gpu = l->rolling_variance_gpu; + } +#ifdef CUDNN + float one = 1; + float zero = 0; + cudnnBatchNormalizationBackward(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + &one, + &one, + l->dstTensorDesc, + l->x_gpu, + l->dstTensorDesc, + l->delta_gpu, + l->dstTensorDesc, + l->x_norm_gpu, + l->normTensorDesc, + l->scales_gpu, + l->scale_updates_gpu, + l->bias_updates_gpu, + .00001, + l->mean_gpu, + l->variance_gpu); + copy_gpu(l->outputs*l->batch, l->x_norm_gpu, 1, l->delta_gpu, 1, net.streams[l->stream_index]); +#else + backward_bias_gpu(l->bias_updates_gpu, l->delta_gpu, l->batch, l->out_c, l->out_w*l->out_h); + backward_scale_gpu(l->x_norm_gpu, l->delta_gpu, l->batch, l->out_c, l->out_w*l->out_h, l->scale_updates_gpu); + + scale_bias_gpu(l->delta_gpu, l->scales_gpu, l->batch, l->out_c, l->out_h*l->out_w, net.streams[l->stream_index]); + + fast_mean_delta_gpu(l->delta_gpu, l->variance_gpu, l->batch, l->out_c, l->out_w*l->out_h, l->mean_delta_gpu); + fast_variance_delta_gpu(l->x_gpu, l->delta_gpu, l->mean_gpu, l->variance_gpu, l->batch, l->out_c, l->out_w*l->out_h, l->variance_delta_gpu); + normalize_delta_gpu(l->x_gpu, l->mean_gpu, l->variance_gpu, l->mean_delta_gpu, l->variance_delta_gpu, l->batch, l->out_c, l->out_w*l->out_h, l->delta_gpu); +#endif + if(l->type == BATCHNORM) copy_gpu(l->outputs*l->batch, l->delta_gpu, 1, net.delta_gpu, 1, net.streams[l->stream_index]); +} +#endif diff --git a/workloads/realworld/pipeline/darknet/src/batchnorm_layer.h b/workloads/realworld/pipeline/darknet/src/batchnorm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..a5fcd00854f973e0cfd1cbff5377b0274daeb591 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/batchnorm_layer.h @@ -0,0 +1,19 @@ +#ifndef BATCHNORM_LAYER_H +#define BATCHNORM_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +layer* make_batchnorm_layer(int batch, int w, int h, int c); +void forward_batchnorm_layer(layer *l, network net); +void backward_batchnorm_layer(layer *l, network net); + +#ifdef GPU +void forward_batchnorm_layer_gpu(layer *l, network net); +void backward_batchnorm_layer_gpu(layer *l, network net); +void pull_batchnorm_layer(layer *l); +void push_batchnorm_layer(layer *l); +#endif + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/blas.c b/workloads/realworld/pipeline/darknet/src/blas.c new file mode 100644 index 0000000000000000000000000000000000000000..9e1604449ba9aeb9decdc7f0395a38bd3b478671 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/blas.c @@ -0,0 +1,351 @@ +#include "blas.h" + +#include +#include +#include +#include +#include +#include +void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int b,i,j,k; + int out_c = c/(stride*stride); + + for(b = 0; b < batch; ++b){ + for(k = 0; k < c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int in_index = i + w*(j + h*(k + c*b)); + int c2 = k % out_c; + int offset = k / out_c; + int w2 = i*stride + offset % stride; + int h2 = j*stride + offset / stride; + int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); + if(forward) out[out_index] = x[in_index]; + else out[in_index] = x[out_index]; + } + } + } + } +} + +void flatten(float *x, int size, int layers, int batch, int forward) +{ + float *swap = calloc(size*layers*batch, sizeof(float)); + int i,c,b; + for(b = 0; b < batch; ++b){ + for(c = 0; c < layers; ++c){ + for(i = 0; i < size; ++i){ + int i1 = b*layers*size + c*size + i; + int i2 = b*layers*size + i*layers + c; + if (forward) swap[i2] = x[i1]; + else swap[i1] = x[i2]; + } + } + } + memcpy(x, swap, size*layers*batch*sizeof(float)); + free(swap); +} + +void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c) +{ + int i; + for(i = 0; i < n; ++i){ + c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); + } +} + +void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc) +{ + int i; + for(i = 0; i < n; ++i){ + if(da) da[i] += dc[i] * s[i]; + if(db) db[i] += dc[i] * (1-s[i]); + ds[i] += dc[i] * (a[i] - b[i]); + } +} + +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int stride = w1/w2; + int sample = w2/w1; + assert(stride == h1/h2); + assert(sample == h2/h1); + if(stride < 1) stride = 1; + if(sample < 1) sample = 1; + int minw = (w1 < w2) ? w1 : w2; + int minh = (h1 < h2) ? h1 : h2; + int minc = (c1 < c2) ? c1 : c2; + + int i,j,k,b; + for(b = 0; b < batch; ++b){ + for(k = 0; k < minc; ++k){ + for(j = 0; j < minh; ++j){ + for(i = 0; i < minw; ++i){ + int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); + int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); + out[out_index] = s1*out[out_index] + s2*add[add_index]; + } + } + } + } +} + +void mean_cpu(float *x, int batch, int filters, int spatial, float *mean) +{ + float scale = 1./(batch * spatial); + int i,j,k; + for(i = 0; i < filters; ++i){ + mean[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + mean[i] += x[index]; + } + } + mean[i] *= scale; + } +} + +void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + float scale = 1./(batch * spatial - 1); + int i,j,k; + for(i = 0; i < filters; ++i){ + variance[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance[i] += pow((x[index] - mean[i]), 2); + } + } + variance[i] *= scale; + } +} + +void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial) +{ + int b,f,i; + for(b = 0; b < batch; ++b){ + for(i = 0; i < spatial; ++i){ + float sum = 0; + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + sum += powf(x[index], 2); + } + sum = sqrtf(sum); + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + x[index] /= sum; + dx[index] = (1 - x[index]) / sum; + } + } + } +} + + +void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + int b, f, i; + for(b = 0; b < batch; ++b){ + for(f = 0; f < filters; ++f){ + for(i = 0; i < spatial; ++i){ + int index = b*filters*spatial + f*spatial + i; + x[index] = (x[index] - mean[f])/(sqrt(variance[f]) + .000001f); + } + } + } +} + +void const_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; +} + +void mul_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] *= X[i*INCX]; +} + +void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] = pow(X[i*INCX], ALPHA); +} + +void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX]; +} + +void scal_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA; +} + +void fill_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; +} + +void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i, j; + int index = 0; + for(j = 0; j < B; ++j) { + for(i = 0; i < NX; ++i){ + if(X) X[j*NX + i] += OUT[index]; + ++index; + } + for(i = 0; i < NY; ++i){ + if(Y) Y[j*NY + i] += OUT[index]; + ++index; + } + } +} + +void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i, j; + int index = 0; + for(j = 0; j < B; ++j) { + for(i = 0; i < NX; ++i){ + OUT[index++] = X[j*NX + i]; + } + for(i = 0; i < NY; ++i){ + OUT[index++] = Y[j*NY + i]; + } + } +} + +void copy_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX]; +} + +void mult_add_into_cpu(int N, float *X, float *Y, float *Z) +{ + int i; + for(i = 0; i < N; ++i) Z[i] += X[i]*Y[i]; +} + +void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + float abs_val = fabs(diff); + if(abs_val < 1) { + error[i] = diff * diff; + delta[i] = diff; + } + else { + error[i] = 2*abs_val - 1; + delta[i] = (diff < 0) ? 1 : -1; + } + } +} + +void l1_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + error[i] = fabs(diff); + delta[i] = diff > 0 ? 1 : -1; + } +} + +void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float t = truth[i]; + float p = pred[i]; + error[i] = (t) ? -log(p) : 0; + delta[i] = t-p; + } +} + +void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float t = truth[i]; + float p = pred[i]; + error[i] = -t*log(p) - (1-t)*log(1-p); + delta[i] = t-p; + } +} + +void l2_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + error[i] = diff * diff; + delta[i] = diff; + } +} + +float dot_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + float dot = 0; + for(i = 0; i < N; ++i) dot += X[i*INCX] * Y[i*INCY]; + return dot; +} + +void softmax(float *input, int n, float temp, int stride, float *output) +{ + int i; + float sum = 0; + float largest = -FLT_MAX; + for(i = 0; i < n; ++i){ + if(input[i*stride] > largest) largest = input[i*stride]; + } + for(i = 0; i < n; ++i){ + float e = exp(input[i*stride]/temp - largest/temp); + sum += e; + output[i*stride] = e; + } + for(i = 0; i < n; ++i){ + output[i*stride] /= sum; + } +} + + +void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + int g, b; + for(b = 0; b < batch; ++b){ + for(g = 0; g < groups; ++g){ + softmax(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); + } + } +} + +void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + int i, j, k, b; + for(b = 0; b < batch; ++b){ + for(k = 0; k < c; ++k){ + for(j = 0; j < h*stride; ++j){ + for(i = 0; i < w*stride; ++i){ + int in_index = b*w*h*c + k*w*h + (j/stride)*w + i/stride; + int out_index = b*w*h*c*stride*stride + k*w*h*stride*stride + j*w*stride + i; + if(forward) out[out_index] = scale*in[in_index]; + else in[in_index] += scale*out[out_index]; + } + } + } + } +} + + diff --git a/workloads/realworld/pipeline/darknet/src/blas.h b/workloads/realworld/pipeline/darknet/src/blas.h new file mode 100644 index 0000000000000000000000000000000000000000..bcd249d33374947d52cba4f6777f5e2393ff0fd2 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/blas.h @@ -0,0 +1,105 @@ +#ifndef BLAS_H +#define BLAS_H +#include "darknet.h" + +void flatten(float *x, int size, int layers, int batch, int forward); +void pm(int M, int N, float *A); +float *random_matrix(int rows, int cols); +void time_random_matrix(int TA, int TB, int m, int k, int n); +void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); + +void test_blas(); + +void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void mult_add_into_cpu(int N, float *X, float *Y, float *Z); + +void const_cpu(int N, float ALPHA, float *X, int INCX); +void constrain_gpu(int N, float ALPHA, float * X, int INCX); +void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void mul_cpu(int N, float *X, int INCX, float *Y, int INCY); + +int test_gpu_blas(); +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out); + +void mean_cpu(float *x, int batch, int filters, int spatial, float *mean); +void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); + +void scale_bias(float *output, float *scales, int batch, int n, int size); +void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); +void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); +void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); +void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); +void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial); + +void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error); +void l2_cpu(int n, float *pred, float *truth, float *delta, float *error); +void l1_cpu(int n, float *pred, float *truth, float *delta, float *error); +void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); +void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); +void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c); +void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc); + +void softmax(float *input, int n, float temp, int stride, float *output); +void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); +void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); + +#ifdef GPU +#include "cuda_dark.h" +#include "tree.h" + +void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY, cudaStream_t stream); +void axpy_gpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY, cudaStream_t stream); +void copy_gpu(int N, float * X, int INCX, float * Y, int INCY, cudaStream_t stream); +void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY, cudaStream_t stream); +void add_gpu(int N, float ALPHA, float * X, int INCX); +void supp_gpu(int N, float ALPHA, float * X, int INCX); +void mask_gpu(int N, float * X, float mask_num, float * mask, float val); +void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale); +void const_gpu(int N, float ALPHA, float *X, int INCX); +void pow_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void mul_gpu(int N, float *X, int INCX, float *Y, int INCY); + +void mean_gpu(float *x, int batch, int filters, int spatial, float *mean); +void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); +void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial, cudaStream_t stream); +void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial); + +void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); + +void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); +void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); + +void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance, cudaStream_t stream); +void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean, cudaStream_t stream); +void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out); +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size, cudaStream_t stream); +void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size, cudaStream_t stream); +void add_bias_gpu(float *output, float *biases, int batch, int n, int size, cudaStream_t stream); +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); + +void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error); +void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error); +void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error); +void l2_gpu(int n, float *pred, float *truth, float *delta, float *error); +void l1_gpu(int n, float *pred, float *truth, float *delta, float *error); +void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error); +void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc); +void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c); +void mult_add_into_gpu(int num, float *a, float *b, float *c); +void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT); + +void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); + +void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); +void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t, cudaStream_t stream); +void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t); + +void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out); +void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier); +void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); + +#endif +#endif diff --git a/workloads/realworld/pipeline/darknet/src/blas_kernels.cu b/workloads/realworld/pipeline/darknet/src/blas_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..a746982b2eb2bbc1279bd9addb27247267c023e0 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/blas_kernels.cu @@ -0,0 +1,1043 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" +#include + +extern "C" { +#include "blas.h" +#include "cuda_dark.h" +#include "utils.h" +} + +__global__ void scale_bias_kernel(float *output, float *biases, int n, int size) +{ + int offset = blockIdx.x * blockDim.x + threadIdx.x; + int filter = blockIdx.y; + int batch = blockIdx.z; + + if(offset < size) output[(batch*n+filter)*size + offset] *= biases[filter]; +} + +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size, cudaStream_t stream) +{ + dim3 dimGrid((size-1)/BLOCK + 1, n, batch); + dim3 dimBlock(BLOCK, 1, 1); + + scale_bias_kernel<<>>(output, biases, n, size); + check_error_stream(cudaPeekAtLastError(), stream); +} + +__global__ void backward_scale_kernel(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + __shared__ float part[BLOCK]; + int i,b; + int filter = blockIdx.x; + int p = threadIdx.x; + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; i += BLOCK){ + int index = p + i + size*(filter + n*b); + sum += (p+i < size) ? delta[index]*x_norm[index] : 0; + } + } + part[p] = sum; + __syncthreads(); + if (p == 0) { + for(i = 0; i < BLOCK; ++i) scale_updates[filter] += part[i]; + } +} + +void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + backward_scale_kernel<<>>(x_norm, delta, batch, n, size, scale_updates); + check_error(cudaPeekAtLastError()); +} + +__global__ void add_bias_kernel(float *output, float *biases, int batch, int n, int size) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= n*size*batch) return; + int i = index % size; + index /= size; + int j = index % n; + index /= n; + int k = index; + + output[(k*n+j)*size + i] += biases[j]; +} + +void add_bias_gpu(float *output, float *biases, int batch, int n, int size, cudaStream_t stream) +{ + int num = n*size*batch; + + add_bias_kernel<<>>(output, biases, batch, n, size); + check_error_stream(cudaPeekAtLastError(), stream); +} + +__global__ void backward_bias_conn_kernel(float *bias_updates, float *delta, int batch, int n) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= n) return; + int b; + float sum = 0; + for(b = 0; b < batch; ++b){ + int i = b*n + index; + sum += delta[i]; + } + bias_updates[index] += sum; +} + +__global__ void backward_bias_kernel(float *bias_updates, float *delta, int batch, int n, int size) +{ + __shared__ float part[BLOCK]; + int i,b; + int filter = blockIdx.x; + int p = threadIdx.x; + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; i += BLOCK){ + int index = p + i + size*(filter + n*b); + sum += (p+i < size) ? delta[index] : 0; + } + } + part[p] = sum; + __syncthreads(); + if (p == 0) { + for(i = 0; i < BLOCK; ++i) bias_updates[filter] += part[i]; + } +} + +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size) +{ + if(size == 1){ + backward_bias_conn_kernel<<>>(bias_updates, delta, batch, n); + }else{ + backward_bias_kernel<<>>(bias_updates, delta, batch, n, size); + } + check_error(cudaPeekAtLastError()); +} + +/* +__global__ void dot_kernel(float *output, float scale, int batch, int n, int size, float *delta) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int f1 = index / n; + int f2 = index % n; + if (f2 <= f1) return; + + float sum = 0; + float norm1 = 0; + float norm2 = 0; + int b, i; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int i1 = b * size * n + f1 * size + i; + int i2 = b * size * n + f2 * size + i; + sum += output[i1] * output[i2]; + norm1 += output[i1] * output[i1]; + norm2 += output[i2] * output[i2]; + } + } + norm1 = sqrt(norm1); + norm2 = sqrt(norm2); + float norm = norm1 * norm2; + sum = sum / norm; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int i1 = b * size * n + f1 * size + i; + int i2 = b * size * n + f2 * size + i; + delta[i1] += - scale * sum * output[i2] / norm; + delta[i2] += - scale * sum * output[i1] / norm; + } + } +} + +void dot_error_gpu(layer l) +{ + dot_kernel<<>>(l.output_gpu, l.dot, l.batch, l.n, l.out_w * l.out_h, l.delta_gpu); + check_error(cudaPeekAtLastError()); +} +*/ + + +__global__ void adam_kernel(int N, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + + float mhat = m[index] / (1.f - powf(B1, t)); + float vhat = v[index] / (1.f - powf(B2, t)); + + x[index] = x[index] + rate * mhat / (sqrtf(vhat) + eps); +} + +extern "C" void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) +{ + adam_kernel<<>>(n, x, m, v, B1, B2, rate, eps, t); + check_error(cudaPeekAtLastError()); +} + +extern "C" void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t, cudaStream_t stream) +{ + scal_gpu(n, B1, m, 1, stream); + scal_gpu(n, B2, v, 1, stream); + axpy_gpu(n, -decay*batch, w, 1, d, 1, stream); + + axpy_gpu(n, (1-B1), d, 1, m, 1, stream); + mul_gpu(n, d, 1, d, 1); + axpy_gpu(n, (1-B2), d, 1, v, 1, stream); + + adam_gpu(n, w, m, v, B1, B2, rate, eps, t); + fill_gpu(n, 0, d, 1); +} + +__global__ void normalize_kernel(int N, float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int f = (index/spatial)%filters; + + x[index] = (x[index] - mean[f])/(sqrtf(variance[f] + .00001f)); +} + +__global__ void normalize_delta_kernel(int N, float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int f = (index/spatial)%filters; + + delta[index] = delta[index] * 1.f/(sqrtf(variance[f] + .00001f)) + variance_delta[f] * 2.f * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); +} + +extern "C" void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + size_t N = batch*filters*spatial; + normalize_delta_kernel<<>>(N, x, mean, variance, mean_delta, variance_delta, batch, filters, spatial, delta); + check_error(cudaPeekAtLastError()); +} + +__global__ void variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + variance_delta[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance_delta[i] += delta[index]*(x[index] - mean[i]); + } + } + variance_delta[i] *= -.5f * powf(variance[i] + .00001f, (float)(-3.f/2.f)); +} + +__global__ void accumulate_kernel(float *x, int n, int groups, float *sum) +{ + int k; + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= groups) return; + sum[i] = 0; + for(k = 0; k < n; ++k){ + sum[i] += x[k*groups + i]; + } +} + +__global__ void fast_mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + local[id] += (i+id < spatial) ? delta[index] : 0; + } + } + + __syncthreads(); + + if(id == 0){ + mean_delta[filter] = 0; + for(i = 0; i < threads; ++i){ + mean_delta[filter] += local[i]; + } + mean_delta[filter] *= (-1.f/sqrtf(variance[filter] + .00001f)); + } +} + +__global__ void fast_variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + + local[id] += (i+id < spatial) ? delta[index]*(x[index] - mean[filter]) : 0; + } + } + + __syncthreads(); + + if(id == 0){ + variance_delta[filter] = 0; + for(i = 0; i < threads; ++i){ + variance_delta[filter] += local[i]; + } + variance_delta[filter] *= -.5f * powf(variance[filter] + .00001f, (float)(-3.f/2.f)); + } +} + + +__global__ void mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + mean_delta[i] = 0; + for (j = 0; j < batch; ++j) { + for (k = 0; k < spatial; ++k) { + int index = j*filters*spatial + i*spatial + k; + mean_delta[i] += delta[index]; + } + } + mean_delta[i] *= (-1.f/sqrtf(variance[i] + .00001f)); +} + +extern "C" void mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + mean_delta_kernel<<>>(delta, variance, batch, filters, spatial, mean_delta); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + fast_mean_delta_kernel<<>>(delta, variance, batch, filters, spatial, mean_delta); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + fast_variance_delta_kernel<<>>(x, delta, mean, variance, batch, filters, spatial, variance_delta); + check_error(cudaPeekAtLastError()); +} + +__global__ void mean_kernel(float *x, int batch, int filters, int spatial, float *mean) +{ + float scale = 1.f/(batch * spatial); + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + mean[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + mean[i] += x[index]; + } + } + mean[i] *= scale; +} + +__global__ void variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + float scale = 1.f/(batch * spatial - 1); + int j,k; + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + variance[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance[i] += powf((x[index] - mean[i]), 2); + } + } + variance[i] *= scale; +} + +__global__ void reorg_kernel(int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int in_index = i; + int in_w = i%w; + i = i/w; + int in_h = i%h; + i = i/h; + int in_c = i%c; + i = i/c; + int b = i%batch; + + int out_c = c/(stride*stride); + + int c2 = in_c % out_c; + int offset = in_c / out_c; + int w2 = in_w*stride + offset % stride; + int h2 = in_h*stride + offset / stride; + //printf("%d\n", offset); + int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); + + // printf("%d %d %d\n", w2, h2, c2); + //printf("%d %d\n", in_index, out_index); + //if(out_index >= N || out_index < 0) printf("bad bad bad \n"); + + if(forward) out[out_index] = x[in_index]; + else out[in_index] = x[out_index]; + //if(forward) out[1] = x[1]; + //else out[0] = x[0]; +} + +__global__ void axpy_kernel(int N, float ALPHA, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[OFFY+i*INCY] += ALPHA*X[OFFX+i*INCX]; +} + +__global__ void pow_kernel(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY] = pow(X[i*INCX], ALPHA); +} + +__global__ void const_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = ALPHA; +} + +__global__ void constrain_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = fminf(ALPHA, fmaxf(-ALPHA, X[i*INCX])); +} + +__global__ void supp_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) { + if((X[i*INCX] * X[i*INCX]) < (ALPHA * ALPHA)) X[i*INCX] = 0; + } +} + +__global__ void add_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] += ALPHA; +} + +__global__ void scal_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] *= ALPHA; +} + +__global__ void fill_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = ALPHA; +} + +__global__ void copy_kernel(int N, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY + OFFY] = X[i*INCX + OFFX]; +} + +__global__ void mul_kernel(int N, float *X, int INCX, float *Y, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY] *= X[i*INCX]; +} + + +extern "C" void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial, cudaStream_t stream) +{ + size_t N = batch*filters*spatial; + normalize_kernel<<>>(N, x, mean, variance, batch, filters, spatial); + check_error_stream(cudaPeekAtLastError(), stream); +} + +__global__ void l2norm_kernel(int N, float *x, float *dx, int batch, int filters, int spatial) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int b = index / spatial; + int i = index % spatial; + int f; + float sum = 0; + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + sum += powf(x[index], 2); + } + sum = sqrtf(sum); + if(sum == 0) sum = 1; + //printf("%f\n", sum); + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + x[index] /= sum; + dx[index] = (1 - x[index]) / sum; + } +} + +extern "C" void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial) +{ + size_t N = batch*spatial; + l2norm_kernel<<>>(N, x, dx, batch, filters, spatial); + check_error(cudaPeekAtLastError()); +} + +__global__ void fast_mean_kernel(float *x, int batch, int filters, int spatial, float *mean) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + local[id] += (i+id < spatial) ? x[index] : 0; + } + } + + __syncthreads(); + + if(id == 0){ + mean[filter] = 0; + for(i = 0; i < threads; ++i){ + mean[filter] += local[i]; + } + mean[filter] /= spatial * batch; + } +} + +__global__ void fast_variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + + local[id] += (i+id < spatial) ? powf((x[index] - mean[filter]), 2) : 0; + } + } + + __syncthreads(); + + if(id == 0){ + variance[filter] = 0; + for(i = 0; i < threads; ++i){ + variance[filter] += local[i]; + } + variance[filter] /= (spatial * batch - 1); + } +} + +extern "C" void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean, cudaStream_t stream) +{ + fast_mean_kernel<<>>(x, batch, filters, spatial, mean); + check_error_stream(cudaPeekAtLastError(), stream); +} + +extern "C" void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance, cudaStream_t stream) +{ + fast_variance_kernel<<>>(x, mean, batch, filters, spatial, variance); + check_error_stream(cudaPeekAtLastError(), stream); +} + + +extern "C" void mean_gpu(float *x, int batch, int filters, int spatial, float *mean) +{ + mean_kernel<<>>(x, batch, filters, spatial, mean); + check_error(cudaPeekAtLastError()); +} + +extern "C" void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + variance_kernel<<>>(x, mean, batch, filters, spatial, variance); + check_error(cudaPeekAtLastError()); +} + +extern "C" void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY, cudaStream_t stream) +{ + axpy_gpu_offset(N, ALPHA, X, 0, INCX, Y, 0, INCY, stream); +} + +extern "C" void pow_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY) +{ + pow_kernel<<>>(N, ALPHA, X, INCX, Y, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void axpy_gpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY, cudaStream_t stream) +{ + axpy_kernel<<>>(N, ALPHA, X, OFFX, INCX, Y, OFFY, INCY); + check_error_stream(cudaPeekAtLastError(), stream); +} + +extern "C" void copy_gpu(int N, float * X, int INCX, float * Y, int INCY, cudaStream_t stream) +{ + copy_gpu_offset(N, X, 0, INCX, Y, 0, INCY, stream); +} + +extern "C" void mul_gpu(int N, float * X, int INCX, float * Y, int INCY) +{ + mul_kernel<<>>(N, X, INCX, Y, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY, cudaStream_t stream) +{ + copy_kernel<<>>(N, X, OFFX, INCX, Y, OFFY, INCY); + check_error_stream(cudaPeekAtLastError(), stream); +} + +__global__ void flatten_kernel(int N, float *x, int spatial, int layers, int batch, int forward, float *out) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int in_s = i%spatial; + i = i/spatial; + int in_c = i%layers; + i = i/layers; + int b = i; + + int i1 = b*layers*spatial + in_c*spatial + in_s; + int i2 = b*layers*spatial + in_s*layers + in_c; + + if (forward) out[i2] = x[i1]; + else out[i1] = x[i2]; +} + +extern "C" void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out) +{ + int size = spatial*batch*layers; + flatten_kernel<<>>(size, x, spatial, layers, batch, forward, out); + check_error(cudaPeekAtLastError()); +} + +extern "C" void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int size = w*h*c*batch; + reorg_kernel<<>>(size, x, w, h, c, batch, stride, forward, out); + check_error(cudaPeekAtLastError()); +} + +__global__ void mask_kernel(int n, float *x, float mask_num, float *mask, float val) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n && mask[i] == mask_num) x[i] = val; +} + +extern "C" void mask_gpu(int N, float * X, float mask_num, float * mask, float val) +{ + mask_kernel<<>>(N, X, mask_num, mask, val); + check_error(cudaPeekAtLastError()); +} + +__global__ void scale_mask_kernel(int n, float *x, float mask_num, float *mask, float scale) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n && mask[i] == mask_num) x[i] *= scale; +} + +extern "C" void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale) +{ + scale_mask_kernel<<>>(N, X, mask_num, mask, scale); + check_error(cudaPeekAtLastError()); +} + +extern "C" void const_gpu(int N, float ALPHA, float * X, int INCX) +{ + const_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void constrain_gpu(int N, float ALPHA, float * X, int INCX) +{ + constrain_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + + +extern "C" void add_gpu(int N, float ALPHA, float * X, int INCX) +{ + add_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void scal_gpu(int N, float ALPHA, float * X, int INCX, cudaStream_t stream) +{ + scal_kernel<<>>(N, ALPHA, X, INCX); + check_error_stream(cudaPeekAtLastError(), stream); +} + +extern "C" void supp_gpu(int N, float ALPHA, float * X, int INCX) +{ + supp_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fill_gpu(int N, float ALPHA, float * X, int INCX) +{ + fill_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fill_gpu_1(int N, float ALPHA, float * X, int INCX) +{ + cudaStream_t stream; + cudaStreamCreate(&stream); + fill_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +__global__ void shortcut_kernel(int size, int minw, int minh, int minc, int stride, int sample, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= size) return; + int i = id % minw; + id /= minw; + int j = id % minh; + id /= minh; + int k = id % minc; + id /= minc; + int b = id % batch; + + int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); + int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); + out[out_index] = s1*out[out_index] + s2*add[add_index]; + //out[out_index] += add[add_index]; +} + +extern "C" void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int minw = (w1 < w2) ? w1 : w2; + int minh = (h1 < h2) ? h1 : h2; + int minc = (c1 < c2) ? c1 : c2; + + int stride = w1/w2; + int sample = w2/w1; + assert(stride == h1/h2); + assert(sample == h2/h1); + if(stride < 1) stride = 1; + if(sample < 1) sample = 1; + + int size = batch * minw * minh * minc; + shortcut_kernel<<>>(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, s1, s2, out); + check_error(cudaPeekAtLastError()); +} + +__global__ void smooth_l1_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + float abs_val = fabsf(diff); + if(abs_val < 1) { + error[i] = diff * diff; + delta[i] = diff; + } + else { + error[i] = 2*abs_val - 1; + delta[i] = (diff > 0) ? 1 : -1; + } + } +} + +extern "C" void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + smooth_l1_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void softmax_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float t = truth[i]; + float p = pred[i]; + error[i] = (t) ? -log(p) : 0; + delta[i] = t-p; + } +} + +extern "C" void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + softmax_x_ent_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void logistic_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float t = truth[i]; + float p = pred[i]; + error[i] = -t*log(p+.0000001) - (1-t)*log(1-p+.0000001); + delta[i] = t-p; + } +} + +extern "C" void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + logistic_x_ent_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void l2_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + error[i] = diff * diff; //I know this is technically wrong, deal with it. + delta[i] = diff; + } +} + +extern "C" void l2_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + l2_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void l1_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + error[i] = abs(diff); + delta[i] = (diff > 0) ? 1 : -1; + } +} + +extern "C" void l1_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + l1_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void wgan_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + error[i] = truth[i] ? -pred[i] : pred[i]; + delta[i] = (truth[i] > 0) ? 1 : -1; + } +} + +extern "C" void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + wgan_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + + + + +__global__ void weighted_sum_kernel(int n, float *a, float *b, float *s, float *c) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); + } +} + +__global__ void deinter_kernel(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < (NX+NY)*B){ + int b = i / (NX+NY); + int j = i % (NX+NY); + if (j < NX){ + if(X) X[b*NX + j] += OUT[i]; + } else { + if(Y) Y[b*NY + j - NX] += OUT[i]; + } + } +} + +extern "C" void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + deinter_kernel<<>>(NX, X, NY, Y, B, OUT); + check_error(cudaPeekAtLastError()); +} + +__global__ void inter_kernel(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < (NX+NY)*B){ + int b = i / (NX+NY); + int j = i % (NX+NY); + if (j < NX){ + OUT[i] = X[b*NX + j]; + } else { + OUT[i] = Y[b*NY + j - NX]; + } + } +} + +extern "C" void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + inter_kernel<<>>(NX, X, NY, Y, B, OUT); + check_error(cudaPeekAtLastError()); +} + +extern "C" void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c) +{ + weighted_sum_kernel<<>>(num, a, b, s, c); + check_error(cudaPeekAtLastError()); +} + +__global__ void weighted_delta_kernel(int n, float *a, float *b, float *s, float *da, float *db, float *ds, float *dc) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + if(da) da[i] += dc[i] * s[i]; + if(db) db[i] += dc[i] * (1-s[i]); + ds[i] += dc[i] * (a[i] - b[i]); + } +} + +extern "C" void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc) +{ + weighted_delta_kernel<<>>(num, a, b, s, da, db, ds, dc); + check_error(cudaPeekAtLastError()); +} + +__global__ void mult_add_into_kernel(int n, float *a, float *b, float *c) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + c[i] += a[i]*b[i]; + } +} + +extern "C" void mult_add_into_gpu(int num, float *a, float *b, float *c) +{ + mult_add_into_kernel<<>>(num, a, b, c); + check_error(cudaPeekAtLastError()); +} + + +__device__ void softmax_device(float *input, int n, float temp, int stride, float *output) +{ + int i; + float sum = 0; + float largest = -INFINITY; + for(i = 0; i < n; ++i){ + int val = input[i*stride]; + largest = (val>largest) ? val : largest; + } + for(i = 0; i < n; ++i){ + float e = expf(input[i*stride]/temp - largest/temp); + sum += e; + output[i*stride] = e; + } + for(i = 0; i < n; ++i){ + output[i*stride] /= sum; + } +} + + +__global__ void softmax_tree_kernel(float *input, int spatial, int batch, int stride, float temp, float *output, int groups, int *group_size, int *group_offset) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= spatial*batch*groups) return; + int s = id % spatial; + id = id / spatial; + int g = id % groups; + int b = id / groups; + int goff = group_offset[g]*spatial; + int boff = b*stride; + softmax_device(input + goff + boff + s, group_size[g], temp, spatial, output + goff + boff + s); +} + +extern "C" void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier) +{ + int *tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); + int *tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); + /* + static int *tree_groups_size = 0; + static int *tree_groups_offset = 0; + if(!tree_groups_size){ + tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); + tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); + } + */ + int num = spatial*batch*hier.groups; + softmax_tree_kernel<<>>(input, spatial, batch, stride, temp, output, hier.groups, tree_groups_size, tree_groups_offset); + check_error(cudaPeekAtLastError()); + cuda_free((float *)tree_groups_size); + cuda_free((float *)tree_groups_offset); +} + +__global__ void softmax_kernel(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= batch*groups) return; + int b = id / groups; + int g = id % groups; + softmax_device(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); +} + +extern "C" void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + softmax_kernel<<>>(input, n, batch, batch_offset, groups, group_offset, stride, temp, output); + check_error(cudaPeekAtLastError()); +} + + +__global__ void upsample_kernel(size_t N, float *x, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + size_t i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int out_index = i; + int out_w = i%(w*stride); + i = i/(w*stride); + int out_h = i%(h*stride); + i = i/(h*stride); + int out_c = i%c; + i = i/c; + int b = i%batch; + + int in_w = out_w / stride; + int in_h = out_h / stride; + int in_c = out_c; + + int in_index = b*w*h*c + in_c*w*h + in_h*w + in_w; + + + if(forward) out[out_index] += scale * x[in_index]; + else atomicAdd(x+in_index, scale * out[out_index]); +} +extern "C" void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + size_t size = w*h*c*batch*stride*stride; + upsample_kernel<<>>(size, in, w, h, c, batch, stride, forward, scale, out); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/pipeline/darknet/src/box.c b/workloads/realworld/pipeline/darknet/src/box.c new file mode 100644 index 0000000000000000000000000000000000000000..8a1772c9ae05dede6ddc83d9b6465f64cf974ae8 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/box.c @@ -0,0 +1,357 @@ +#include "box.h" +#include +#include +#include + +int nms_comparator(const void *pa, const void *pb) +{ + detection a = *(detection *)pa; + detection b = *(detection *)pb; + float diff = 0; + if(b.sort_class >= 0){ + diff = a.prob[b.sort_class] - b.prob[b.sort_class]; + } else { + diff = a.objectness - b.objectness; + } + if(diff < 0) return 1; + else if(diff > 0) return -1; + return 0; +} + +void do_nms_obj(detection *dets, int total, int classes, float thresh) +{ + int i, j, k; + k = total-1; + for(i = 0; i <= k; ++i){ + if(dets[i].objectness == 0){ + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k+1; + + for(i = 0; i < total; ++i){ + dets[i].sort_class = -1; + } + + qsort(dets, total, sizeof(detection), nms_comparator); + for(i = 0; i < total; ++i){ + if(dets[i].objectness == 0) continue; + box a = dets[i].bbox; + for(j = i+1; j < total; ++j){ + if(dets[j].objectness == 0) continue; + box b = dets[j].bbox; + if (box_iou(a, b) > thresh){ + dets[j].objectness = 0; + for(k = 0; k < classes; ++k){ + dets[j].prob[k] = 0; + } + } + } + } +} + + +void do_nms_sort(detection *dets, int total, int classes, float thresh) +{ + int i, j, k; + k = total-1; + for(i = 0; i <= k; ++i){ + if(dets[i].objectness == 0){ + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k+1; + + for(k = 0; k < classes; ++k){ + for(i = 0; i < total; ++i){ + dets[i].sort_class = k; + } + qsort(dets, total, sizeof(detection), nms_comparator); + for(i = 0; i < total; ++i){ + if(dets[i].prob[k] == 0) continue; + box a = dets[i].bbox; + for(j = i+1; j < total; ++j){ + box b = dets[j].bbox; + if (box_iou(a, b) > thresh){ + dets[j].prob[k] = 0; + } + } + } + } +} + +box float_to_box(float *f, int stride) +{ + box b = {0}; + b.x = f[0]; + b.y = f[1*stride]; + b.w = f[2*stride]; + b.h = f[3*stride]; + return b; +} + +dbox derivative(box a, box b) +{ + dbox d; + d.dx = 0; + d.dw = 0; + float l1 = a.x - a.w/2; + float l2 = b.x - b.w/2; + if (l1 > l2){ + d.dx -= 1; + d.dw += .5; + } + float r1 = a.x + a.w/2; + float r2 = b.x + b.w/2; + if(r1 < r2){ + d.dx += 1; + d.dw += .5; + } + if (l1 > r2) { + d.dx = -1; + d.dw = 0; + } + if (r1 < l2){ + d.dx = 1; + d.dw = 0; + } + + d.dy = 0; + d.dh = 0; + float t1 = a.y - a.h/2; + float t2 = b.y - b.h/2; + if (t1 > t2){ + d.dy -= 1; + d.dh += .5; + } + float b1 = a.y + a.h/2; + float b2 = b.y + b.h/2; + if(b1 < b2){ + d.dy += 1; + d.dh += .5; + } + if (t1 > b2) { + d.dy = -1; + d.dh = 0; + } + if (b1 < t2){ + d.dy = 1; + d.dh = 0; + } + return d; +} + +float overlap(float x1, float w1, float x2, float w2) +{ + float l1 = x1 - w1/2; + float l2 = x2 - w2/2; + float left = l1 > l2 ? l1 : l2; + float r1 = x1 + w1/2; + float r2 = x2 + w2/2; + float right = r1 < r2 ? r1 : r2; + return right - left; +} + +float box_intersection(box a, box b) +{ + float w = overlap(a.x, a.w, b.x, b.w); + float h = overlap(a.y, a.h, b.y, b.h); + if(w < 0 || h < 0) return 0; + float area = w*h; + return area; +} + +float box_union(box a, box b) +{ + float i = box_intersection(a, b); + float u = a.w*a.h + b.w*b.h - i; + return u; +} + +float box_iou(box a, box b) +{ + return box_intersection(a, b)/box_union(a, b); +} + +float box_rmse(box a, box b) +{ + return sqrt(pow(a.x-b.x, 2) + + pow(a.y-b.y, 2) + + pow(a.w-b.w, 2) + + pow(a.h-b.h, 2)); +} + +dbox dintersect(box a, box b) +{ + float w = overlap(a.x, a.w, b.x, b.w); + float h = overlap(a.y, a.h, b.y, b.h); + dbox dover = derivative(a, b); + dbox di; + + di.dw = dover.dw*h; + di.dx = dover.dx*h; + di.dh = dover.dh*w; + di.dy = dover.dy*w; + + return di; +} + +dbox dunion(box a, box b) +{ + dbox du; + + dbox di = dintersect(a, b); + du.dw = a.h - di.dw; + du.dh = a.w - di.dh; + du.dx = -di.dx; + du.dy = -di.dy; + + return du; +} + + +void test_dunion() +{ + box a = {0, 0, 1, 1}; + box dxa= {0+.0001, 0, 1, 1}; + box dya= {0, 0+.0001, 1, 1}; + box dwa= {0, 0, 1+.0001, 1}; + box dha= {0, 0, 1, 1+.0001}; + + box b = {.5, .5, .2, .2}; + dbox di = dunion(a,b); + printf("Union: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); + float inter = box_union(a, b); + float xinter = box_union(dxa, b); + float yinter = box_union(dya, b); + float winter = box_union(dwa, b); + float hinter = box_union(dha, b); + xinter = (xinter - inter)/(.0001); + yinter = (yinter - inter)/(.0001); + winter = (winter - inter)/(.0001); + hinter = (hinter - inter)/(.0001); + printf("Union Manual %f %f %f %f\n", xinter, yinter, winter, hinter); +} +void test_dintersect() +{ + box a = {0, 0, 1, 1}; + box dxa= {0+.0001, 0, 1, 1}; + box dya= {0, 0+.0001, 1, 1}; + box dwa= {0, 0, 1+.0001, 1}; + box dha= {0, 0, 1, 1+.0001}; + + box b = {.5, .5, .2, .2}; + dbox di = dintersect(a,b); + printf("Inter: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); + float inter = box_intersection(a, b); + float xinter = box_intersection(dxa, b); + float yinter = box_intersection(dya, b); + float winter = box_intersection(dwa, b); + float hinter = box_intersection(dha, b); + xinter = (xinter - inter)/(.0001); + yinter = (yinter - inter)/(.0001); + winter = (winter - inter)/(.0001); + hinter = (hinter - inter)/(.0001); + printf("Inter Manual %f %f %f %f\n", xinter, yinter, winter, hinter); +} + +void test_box() +{ + test_dintersect(); + test_dunion(); + box a = {0, 0, 1, 1}; + box dxa= {0+.00001, 0, 1, 1}; + box dya= {0, 0+.00001, 1, 1}; + box dwa= {0, 0, 1+.00001, 1}; + box dha= {0, 0, 1, 1+.00001}; + + box b = {.5, 0, .2, .2}; + + float iou = box_iou(a,b); + iou = (1-iou)*(1-iou); + printf("%f\n", iou); + dbox d = diou(a, b); + printf("%f %f %f %f\n", d.dx, d.dy, d.dw, d.dh); + + float xiou = box_iou(dxa, b); + float yiou = box_iou(dya, b); + float wiou = box_iou(dwa, b); + float hiou = box_iou(dha, b); + xiou = ((1-xiou)*(1-xiou) - iou)/(.00001); + yiou = ((1-yiou)*(1-yiou) - iou)/(.00001); + wiou = ((1-wiou)*(1-wiou) - iou)/(.00001); + hiou = ((1-hiou)*(1-hiou) - iou)/(.00001); + printf("manual %f %f %f %f\n", xiou, yiou, wiou, hiou); +} + +dbox diou(box a, box b) +{ + float u = box_union(a,b); + float i = box_intersection(a,b); + dbox di = dintersect(a,b); + dbox du = dunion(a,b); + dbox dd = {0,0,0,0}; + + if(i <= 0 || 1) { + dd.dx = b.x - a.x; + dd.dy = b.y - a.y; + dd.dw = b.w - a.w; + dd.dh = b.h - a.h; + return dd; + } + + dd.dx = 2*pow((1-(i/u)),1)*(di.dx*u - du.dx*i)/(u*u); + dd.dy = 2*pow((1-(i/u)),1)*(di.dy*u - du.dy*i)/(u*u); + dd.dw = 2*pow((1-(i/u)),1)*(di.dw*u - du.dw*i)/(u*u); + dd.dh = 2*pow((1-(i/u)),1)*(di.dh*u - du.dh*i)/(u*u); + return dd; +} + + +void do_nms(box *boxes, float **probs, int total, int classes, float thresh) +{ + int i, j, k; + for(i = 0; i < total; ++i){ + int any = 0; + for(k = 0; k < classes; ++k) any = any || (probs[i][k] > 0); + if(!any) { + continue; + } + for(j = i+1; j < total; ++j){ + if (box_iou(boxes[i], boxes[j]) > thresh){ + for(k = 0; k < classes; ++k){ + if (probs[i][k] < probs[j][k]) probs[i][k] = 0; + else probs[j][k] = 0; + } + } + } + } +} + +box encode_box(box b, box anchor) +{ + box encode; + encode.x = (b.x - anchor.x) / anchor.w; + encode.y = (b.y - anchor.y) / anchor.h; + encode.w = log2(b.w / anchor.w); + encode.h = log2(b.h / anchor.h); + return encode; +} + +box decode_box(box b, box anchor) +{ + box decode; + decode.x = b.x * anchor.w + anchor.x; + decode.y = b.y * anchor.h + anchor.y; + decode.w = pow(2., b.w) * anchor.w; + decode.h = pow(2., b.h) * anchor.h; + return decode; +} diff --git a/workloads/realworld/pipeline/darknet/src/box.h b/workloads/realworld/pipeline/darknet/src/box.h new file mode 100644 index 0000000000000000000000000000000000000000..dda3e59100c3d9e0a6bb05a80070155d9fcbc876 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/box.h @@ -0,0 +1,14 @@ +#ifndef BOX_H +#define BOX_H +#include "darknet.h" + +typedef struct{ + float dx, dy, dw, dh; +} dbox; + +float box_rmse(box a, box b); +dbox diou(box a, box b); +box decode_box(box b, box anchor); +box encode_box(box b, box anchor); + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/classifier.h b/workloads/realworld/pipeline/darknet/src/classifier.h new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/classifier.h @@ -0,0 +1 @@ + diff --git a/workloads/realworld/pipeline/darknet/src/col2im.c b/workloads/realworld/pipeline/darknet/src/col2im.c new file mode 100644 index 0000000000000000000000000000000000000000..5c4605e197439f79fe05c41337a5f2b8103f63ba --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/col2im.c @@ -0,0 +1,39 @@ +#include +#include +void col2im_add_pixel(float *im, int height, int width, int channels, + int row, int col, int channel, int pad, float val) +{ + row -= pad; + col -= pad; + + if (row < 0 || col < 0 || + row >= height || col >= width) return; + im[col + width*(row + height*channel)] += val; +} +//This one might be too, can't remember. +void col2im_cpu(float* data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_im) +{ + int c,h,w; + int height_col = (height + 2*pad - ksize) / stride + 1; + int width_col = (width + 2*pad - ksize) / stride + 1; + + int channels_col = channels * ksize * ksize; + for (c = 0; c < channels_col; ++c) { + int w_offset = c % ksize; + int h_offset = (c / ksize) % ksize; + int c_im = c / ksize / ksize; + for (h = 0; h < height_col; ++h) { + for (w = 0; w < width_col; ++w) { + int im_row = h_offset + h * stride; + int im_col = w_offset + w * stride; + int col_index = (c * height_col + h) * width_col + w; + double val = data_col[col_index]; + col2im_add_pixel(data_im, height, width, channels, + im_row, im_col, c_im, pad, val); + } + } + } +} + diff --git a/workloads/realworld/pipeline/darknet/src/col2im.h b/workloads/realworld/pipeline/darknet/src/col2im.h new file mode 100644 index 0000000000000000000000000000000000000000..3fbe05307db65a1f511f801670a23734e21b7dff --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/col2im.h @@ -0,0 +1,13 @@ +#ifndef COL2IM_H +#define COL2IM_H + +void col2im_cpu(float* data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_im); + +#ifdef GPU +void col2im_gpu(float *data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_im); +#endif +#endif diff --git a/workloads/realworld/pipeline/darknet/src/col2im_kernels.cu b/workloads/realworld/pipeline/darknet/src/col2im_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..30ea71e2c6ac0bb81235729c37568abbaa987d3d --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/col2im_kernels.cu @@ -0,0 +1,58 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "col2im.h" +#include "cuda_dark.h" +} + +// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu +// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE + +__global__ void col2im_gpu_kernel(const int n, const float* data_col, + const int height, const int width, const int ksize, + const int pad, + const int stride, + const int height_col, const int width_col, + float *data_im) { + int index = blockIdx.x*blockDim.x+threadIdx.x; + for(; index < n; index += blockDim.x*gridDim.x){ + float val = 0; + int w = index % width + pad; + int h = (index / width) % height + pad; + int c = index / (width * height); + // compute the start and end of the output + int w_col_start = (w < ksize) ? 0 : (w - ksize) / stride + 1; + int w_col_end = min(w / stride + 1, width_col); + int h_col_start = (h < ksize) ? 0 : (h - ksize) / stride + 1; + int h_col_end = min(h / stride + 1, height_col); + // equivalent implementation + int offset = + (c * ksize * ksize + h * ksize + w) * height_col * width_col; + int coeff_h_col = (1 - stride * ksize * height_col) * width_col; + int coeff_w_col = (1 - stride * height_col * width_col); + for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { + for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { + val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col]; + } + } + data_im[index] += val; + } +} + +void col2im_gpu(float *data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_im){ + // We are going to launch channels * height_col * width_col kernels, each + // kernel responsible for copying a single-channel grid. + int height_col = (height + 2 * pad - ksize) / stride + 1; + int width_col = (width + 2 * pad - ksize) / stride + 1; + int num_kernels = channels * height * width; + col2im_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, + BLOCK>>>( + num_kernels, data_col, height, width, ksize, pad, + stride, height_col, + width_col, data_im); +} + diff --git a/workloads/realworld/pipeline/darknet/src/compare.c b/workloads/realworld/pipeline/darknet/src/compare.c new file mode 100644 index 0000000000000000000000000000000000000000..d2d2b3bdc675cf808f483d1607550e072e245396 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/compare.c @@ -0,0 +1,352 @@ +#include + +#include "network.h" +#include "detection_layer.h" +#include "cost_layer.h" +#include "utils.h" +#include "parser.h" +#include "box.h" + +void train_compare(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + list *plist = get_paths("data/compare.train.list"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + printf("%d\n", N); + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.classes = 20; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = COMPARE_DATA; + + load_thread = load_data_in_thread(args); + int epoch = *net.seen/N; + int i = 0; + while(1){ + ++i; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%.3f: %f, %f avg, %lf seconds, %ld images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen); + free_data(train); + if(i%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_%d_minor_%d.weights",backup_directory,base, epoch, i); + save_weights(net, buff); + } + if(*net.seen/N > epoch){ + epoch = *net.seen/N; + i = 0; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + if(epoch%22 == 0) net.learning_rate *= .1; + } + } + pthread_join(load_thread, 0); + free_data(buffer); + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_compare(char *filename, char *weightfile) +{ + int i = 0; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + + list *plist = get_paths("data/compare.val.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size/2; + free_list(plist); + + clock_t time; + int correct = 0; + int total = 0; + int splits = 10; + int num = (i+1)*N/splits - i*N/splits; + + data val, buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.classes = 20; + args.n = num; + args.m = 0; + args.d = &buffer; + args.type = COMPARE_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(i = 1; i <= splits; ++i){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + num = (i+1)*N/splits - i*N/splits; + char **part = paths+(i*N/splits); + if(i != splits){ + args.paths = part; + load_thread = load_data_in_thread(args); + } + printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + int j,k; + for(j = 0; j < val.y.rows; ++j){ + for(k = 0; k < 20; ++k){ + if(val.y.vals[j][k*2] != val.y.vals[j][k*2+1]){ + ++total; + if((val.y.vals[j][k*2] < val.y.vals[j][k*2+1]) == (pred.vals[j][k*2] < pred.vals[j][k*2+1])){ + ++correct; + } + } + } + } + free_matrix(pred); + printf("%d: Acc: %f, %lf seconds, %d images\n", i, (float)correct/total, sec(clock()-time), val.X.rows); + free_data(val); + } +} + +typedef struct { + network net; + char *filename; + int class; + int classes; + float elo; + float *elos; +} sortable_bbox; + +int total_compares = 0; +int current_class = 0; + +int elo_comparator(const void*a, const void *b) +{ + sortable_bbox box1 = *(sortable_bbox*)a; + sortable_bbox box2 = *(sortable_bbox*)b; + if(box1.elos[current_class] == box2.elos[current_class]) return 0; + if(box1.elos[current_class] > box2.elos[current_class]) return -1; + return 1; +} + +int bbox_comparator(const void *a, const void *b) +{ + ++total_compares; + sortable_bbox box1 = *(sortable_bbox*)a; + sortable_bbox box2 = *(sortable_bbox*)b; + network net = box1.net; + int class = box1.class; + + image im1 = load_image_color(box1.filename, net.w, net.h); + image im2 = load_image_color(box2.filename, net.w, net.h); + float *X = calloc(net.w*net.h*net.c, sizeof(float)); + memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); + memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); + float *predictions = network_predict(net, X); + + free_image(im1); + free_image(im2); + free(X); + if (predictions[class*2] > predictions[class*2+1]){ + return 1; + } + return -1; +} + +void bbox_update(sortable_bbox *a, sortable_bbox *b, int class, int result) +{ + int k = 32; + float EA = 1./(1+pow(10, (b->elos[class] - a->elos[class])/400.)); + float EB = 1./(1+pow(10, (a->elos[class] - b->elos[class])/400.)); + float SA = result ? 1 : 0; + float SB = result ? 0 : 1; + a->elos[class] += k*(SA - EA); + b->elos[class] += k*(SB - EB); +} + +void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, int class) +{ + image im1 = load_image_color(a->filename, net.w, net.h); + image im2 = load_image_color(b->filename, net.w, net.h); + float *X = calloc(net.w*net.h*net.c, sizeof(float)); + memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); + memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); + float *predictions = network_predict(net, X); + ++total_compares; + + int i; + for(i = 0; i < classes; ++i){ + if(class < 0 || class == i){ + int result = predictions[i*2] > predictions[i*2+1]; + bbox_update(a, b, i, result); + } + } + + free_image(im1); + free_image(im2); + free(X); +} + +void SortMaster3000(char *filename, char *weightfile) +{ + int i = 0; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + set_batch_network(&net, 1); + + list *plist = get_paths("data/compare.sort.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + free_list(plist); + sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); + printf("Sorting %d boxes...\n", N); + for(i = 0; i < N; ++i){ + boxes[i].filename = paths[i]; + boxes[i].net = net; + boxes[i].class = 7; + boxes[i].elo = 1500; + } + clock_t time=clock(); + qsort(boxes, N, sizeof(sortable_bbox), bbox_comparator); + for(i = 0; i < N; ++i){ + printf("%s\n", boxes[i].filename); + } + printf("Sorted in %d compares, %f secs\n", total_compares, sec(clock()-time)); +} + +void BattleRoyaleWithCheese(char *filename, char *weightfile) +{ + int classes = 20; + int i,j; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + set_batch_network(&net, 1); + + list *plist = get_paths("data/compare.sort.list"); + //list *plist = get_paths("data/compare.small.list"); + //list *plist = get_paths("data/compare.cat.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + int total = N; + free_list(plist); + sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); + printf("Battling %d boxes...\n", N); + for(i = 0; i < N; ++i){ + boxes[i].filename = paths[i]; + boxes[i].net = net; + boxes[i].classes = classes; + boxes[i].elos = calloc(classes, sizeof(float));; + for(j = 0; j < classes; ++j){ + boxes[i].elos[j] = 1500; + } + } + int round; + clock_t time=clock(); + for(round = 1; round <= 4; ++round){ + clock_t round_time=clock(); + printf("Round: %d\n", round); + shuffle(boxes, N, sizeof(sortable_bbox)); + for(i = 0; i < N/2; ++i){ + bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, -1); + } + printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N); + } + + int class; + + for (class = 0; class < classes; ++class){ + + N = total; + current_class = class; + qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); + N /= 2; + + for(round = 1; round <= 100; ++round){ + clock_t round_time=clock(); + printf("Round: %d\n", round); + + sorta_shuffle(boxes, N, sizeof(sortable_bbox), 10); + for(i = 0; i < N/2; ++i){ + bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, class); + } + qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); + if(round <= 20) N = (N*9/10)/2*2; + + printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N); + } + char buff[256]; + sprintf(buff, "results/battle_%d.log", class); + FILE *outfp = fopen(buff, "w"); + for(i = 0; i < N; ++i){ + fprintf(outfp, "%s %f\n", boxes[i].filename, boxes[i].elos[class]); + } + fclose(outfp); + } + printf("Tournament in %d compares, %f secs\n", total_compares, sec(clock()-time)); +} + +void run_compare(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + //char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_compare(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_compare(cfg, weights); + else if(0==strcmp(argv[2], "sort")) SortMaster3000(cfg, weights); + else if(0==strcmp(argv[2], "battle")) BattleRoyaleWithCheese(cfg, weights); + /* + else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); + else if(0==strcmp(argv[2], "extract")) extract_boxes(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_recall(cfg, weights); + */ +} diff --git a/workloads/realworld/pipeline/darknet/src/connected_layer.c b/workloads/realworld/pipeline/darknet/src/connected_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..c7fdae3c8633f8fa24ec121207e509209d712956 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/connected_layer.c @@ -0,0 +1,336 @@ +#include "connected_layer.h" +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +layer* make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam) +{ + int i; + layer *l = calloc(1, sizeof(layer)); + l->learning_rate_scale = 1; + l->type = CONNECTED; + + l->inputs = inputs; + l->outputs = outputs; + l->batch=batch; + l->batch_normalize = batch_normalize; + l->h = 1; + l->w = 1; + l->c = inputs; + l->out_h = 1; + l->out_w = 1; + l->out_c = outputs; + + l->output = calloc(batch*outputs, sizeof(float)); + l->delta = calloc(batch*outputs, sizeof(float)); + + l->weight_updates = calloc(inputs*outputs, sizeof(float)); + l->bias_updates = calloc(outputs, sizeof(float)); + + l->weights = calloc(outputs*inputs, sizeof(float)); + l->biases = calloc(outputs, sizeof(float)); + + l->forward = forward_connected_layer; + l->backward = backward_connected_layer; + l->update = update_connected_layer; + + //float scale = 1./sqrt(inputs); + float scale = sqrt(2./inputs); + for(i = 0; i < outputs*inputs; ++i){ + l->weights[i] = scale*rand_uniform(-1, 1); + } + + for(i = 0; i < outputs; ++i){ + l->biases[i] = 0; + } + + if(adam){ + l->m = calloc(l->inputs*l->outputs, sizeof(float)); + l->v = calloc(l->inputs*l->outputs, sizeof(float)); + l->bias_m = calloc(l->outputs, sizeof(float)); + l->scale_m = calloc(l->outputs, sizeof(float)); + l->bias_v = calloc(l->outputs, sizeof(float)); + l->scale_v = calloc(l->outputs, sizeof(float)); + } + if(batch_normalize){ + l->scales = calloc(outputs, sizeof(float)); + l->scale_updates = calloc(outputs, sizeof(float)); + for(i = 0; i < outputs; ++i){ + l->scales[i] = 1; + } + + l->mean = calloc(outputs, sizeof(float)); + l->mean_delta = calloc(outputs, sizeof(float)); + l->variance = calloc(outputs, sizeof(float)); + l->variance_delta = calloc(outputs, sizeof(float)); + + l->rolling_mean = calloc(outputs, sizeof(float)); + l->rolling_variance = calloc(outputs, sizeof(float)); + + l->x = calloc(batch*outputs, sizeof(float)); + l->x_norm = calloc(batch*outputs, sizeof(float)); + } + +#ifdef GPU + l->forward_gpu = forward_connected_layer_gpu; + l->backward_gpu = backward_connected_layer_gpu; + l->update_gpu = update_connected_layer_gpu; + + l->weights_gpu = cuda_make_array(l->weights, outputs*inputs); + l->biases_gpu = cuda_make_array(l->biases, outputs); + + l->weight_updates_gpu = cuda_make_array(l->weight_updates, outputs*inputs); + l->bias_updates_gpu = cuda_make_array(l->bias_updates, outputs); + + l->output_gpu = cuda_make_array(l->output, outputs*batch); + l->delta_gpu = cuda_make_array(l->delta, outputs*batch); + if (adam) { + l->m_gpu = cuda_make_array(0, inputs*outputs); + l->v_gpu = cuda_make_array(0, inputs*outputs); + l->bias_m_gpu = cuda_make_array(0, outputs); + l->bias_v_gpu = cuda_make_array(0, outputs); + l->scale_m_gpu = cuda_make_array(0, outputs); + l->scale_v_gpu = cuda_make_array(0, outputs); + } + + if(batch_normalize){ + l->mean_gpu = cuda_make_array(l->mean, outputs); + l->variance_gpu = cuda_make_array(l->variance, outputs); + + l->rolling_mean_gpu = cuda_make_array(l->mean, outputs); + l->rolling_variance_gpu = cuda_make_array(l->variance, outputs); + + l->mean_delta_gpu = cuda_make_array(l->mean, outputs); + l->variance_delta_gpu = cuda_make_array(l->variance, outputs); + + l->scales_gpu = cuda_make_array(l->scales, outputs); + l->scale_updates_gpu = cuda_make_array(l->scale_updates, outputs); + + l->x_gpu = cuda_make_array(l->output, l->batch*outputs); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*outputs); +#ifdef CUDNN + cudnnCreateTensorDescriptor(&l->normTensorDesc); + cudnnCreateTensorDescriptor(&l->dstTensorDesc); + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); +#endif + } +#endif + l->activation = activation; + fprintf(stderr, "connected %4d -> %4d\n", inputs, outputs); + return l; +} + +void update_connected_layer(layer *l, update_args a, network net) +{ + float learning_rate = a.learning_rate*l->learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + axpy_cpu(l->outputs, learning_rate/batch, l->bias_updates, 1, l->biases, 1); + scal_cpu(l->outputs, momentum, l->bias_updates, 1); + + if(l->batch_normalize){ + axpy_cpu(l->outputs, learning_rate/batch, l->scale_updates, 1, l->scales, 1); + scal_cpu(l->outputs, momentum, l->scale_updates, 1); + } + + axpy_cpu(l->inputs*l->outputs, -decay*batch, l->weights, 1, l->weight_updates, 1); + axpy_cpu(l->inputs*l->outputs, learning_rate/batch, l->weight_updates, 1, l->weights, 1); + scal_cpu(l->inputs*l->outputs, momentum, l->weight_updates, 1); +} + +void forward_connected_layer(layer *l, network net) +{ + fill_cpu(l->outputs*l->batch, 0, l->output, 1); + int m = l->batch; + int k = l->inputs; + int n = l->outputs; + float *a = net.input; + float *b = l->weights; + float *c = l->output; + gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); + if(l->batch_normalize){ + forward_batchnorm_layer(l, net); + } else { + add_bias(l->output, l->biases, l->batch, l->outputs, 1); + } + activate_array(l->output, l->outputs*l->batch, l->activation); +} + +void backward_connected_layer(layer *l, network net) +{ + gradient_array(l->output, l->outputs*l->batch, l->activation, l->delta); + + if(l->batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l->bias_updates, l->delta, l->batch, l->outputs, 1); + } + + int m = l->outputs; + int k = l->batch; + int n = l->inputs; + float *a = l->delta; + float *b = net.input; + float *c = l->weight_updates; + gemm(1,0,m,n,k,1,a,m,b,n,1,c,n); + + m = l->batch; + k = l->outputs; + n = l->inputs; + + a = l->delta; + b = l->weights; + c = net.delta; + + if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); +} + + +void denormalize_connected_layer(layer *l) +{ + int i, j; + for(i = 0; i < l->outputs; ++i){ + float scale = l->scales[i]/sqrt(l->rolling_variance[i] + .000001); + for(j = 0; j < l->inputs; ++j){ + l->weights[i*l->inputs + j] *= scale; + } + l->biases[i] -= l->rolling_mean[i] * scale; + l->scales[i] = 1; + l->rolling_mean[i] = 0; + l->rolling_variance[i] = 1; + } +} + + +void statistics_connected_layer(layer *l) +{ + if(l->batch_normalize){ + printf("Scales "); + print_statistics(l->scales, l->outputs); + /* + printf("Rolling Mean "); + print_statistics(l->rolling_mean, l->outputs); + printf("Rolling Variance "); + print_statistics(l->rolling_variance, l->outputs); + */ + } + printf("Biases "); + print_statistics(l->biases, l->outputs); + printf("Weights "); + print_statistics(l->weights, l->outputs); +} + +#ifdef GPU + +void pull_connected_layer(layer *l) +{ + cuda_pull_array(l->weights_gpu, l->weights, l->inputs*l->outputs); + cuda_pull_array(l->biases_gpu, l->biases, l->outputs); + cuda_pull_array(l->weight_updates_gpu, l->weight_updates, l->inputs*l->outputs); + cuda_pull_array(l->bias_updates_gpu, l->bias_updates, l->outputs); + if (l->batch_normalize){ + cuda_pull_array(l->scales_gpu, l->scales, l->outputs); + cuda_pull_array(l->rolling_mean_gpu, l->rolling_mean, l->outputs); + cuda_pull_array(l->rolling_variance_gpu, l->rolling_variance, l->outputs); + } +} + +void push_connected_layer(layer *l) +{ + cuda_push_array(l->weights_gpu, l->weights, l->inputs*l->outputs); + cuda_push_array(l->biases_gpu, l->biases, l->outputs); + cuda_push_array(l->weight_updates_gpu, l->weight_updates, l->inputs*l->outputs); + cuda_push_array(l->bias_updates_gpu, l->bias_updates, l->outputs); + if (l->batch_normalize){ + cuda_push_array(l->scales_gpu, l->scales, l->outputs); + cuda_push_array(l->rolling_mean_gpu, l->rolling_mean, l->outputs); + cuda_push_array(l->rolling_variance_gpu, l->rolling_variance, l->outputs); + } +} + +void update_connected_layer_gpu(layer *l, update_args a, network net) +{ + float learning_rate = a.learning_rate*l->learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + if(a.adam){ + adam_update_gpu(l->weights_gpu, l->weight_updates_gpu, l->m_gpu, l->v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l->inputs*l->outputs, batch, a.t, net.streams[l->stream_index]); + adam_update_gpu(l->biases_gpu, l->bias_updates_gpu, l->bias_m_gpu, l->bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l->outputs, batch, a.t, net.streams[l->stream_index]); + if(l->scales_gpu){ + adam_update_gpu(l->scales_gpu, l->scale_updates_gpu, l->scale_m_gpu, l->scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l->outputs, batch, a.t, net.streams[l->stream_index]); + } + }else{ + axpy_gpu(l->outputs, learning_rate/batch, l->bias_updates_gpu, 1, l->biases_gpu, 1, net.streams[l->stream_index]); + scal_gpu(l->outputs, momentum, l->bias_updates_gpu, 1, net.streams[l->stream_index]); + + if(l->batch_normalize){ + axpy_gpu(l->outputs, learning_rate/batch, l->scale_updates_gpu, 1, l->scales_gpu, 1, net.streams[l->stream_index]); + scal_gpu(l->outputs, momentum, l->scale_updates_gpu, 1, net.streams[l->stream_index]); + } + + axpy_gpu(l->inputs*l->outputs, -decay*batch, l->weights_gpu, 1, l->weight_updates_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->inputs*l->outputs, learning_rate/batch, l->weight_updates_gpu, 1, l->weights_gpu, 1, net.streams[l->stream_index]); + scal_gpu(l->inputs*l->outputs, momentum, l->weight_updates_gpu, 1, net.streams[l->stream_index]); + } +} + +void forward_connected_layer_gpu(layer *l, network net) +{ + fill_gpu(l->outputs*l->batch, 0, l->output_gpu, 1); + + int m = l->batch; + int k = l->inputs; + int n = l->outputs; + float * a = net.input_gpu; + float * b = l->weights_gpu; + float * c = l->output_gpu; + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n,net.streams[l->stream_index]); + + if (l->batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l->output_gpu, l->biases_gpu, l->batch, l->outputs, 1, net.streams[l->stream_index]); + } + activate_array_gpu(l->output_gpu, l->outputs*l->batch, l->activation, net.streams[l->stream_index]); +} + +void backward_connected_layer_gpu(layer *l, network net) +{ + constrain_gpu(l->outputs*l->batch, 1, l->delta_gpu, 1); + gradient_array_gpu(l->output_gpu, l->outputs*l->batch, l->activation, l->delta_gpu); + if(l->batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l->bias_updates_gpu, l->delta_gpu, l->batch, l->outputs, 1); + } + + int m = l->outputs; + int k = l->batch; + int n = l->inputs; + float * a = l->delta_gpu; + float * b = net.input_gpu; + float * c = l->weight_updates_gpu; + gemm_gpu(1,0,m,n,k,1,a,m,b,n,1,c,n,net.streams[l->stream_index]); + + m = l->batch; + k = l->outputs; + n = l->inputs; + + a = l->delta_gpu; + b = l->weights_gpu; + c = net.delta_gpu; + + if(c) gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n,net.streams[l->stream_index]); +} +#endif diff --git a/workloads/realworld/pipeline/darknet/src/connected_layer.h b/workloads/realworld/pipeline/darknet/src/connected_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..3a354723f247c242ad9745c473081e60e5e3a610 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/connected_layer.h @@ -0,0 +1,23 @@ +#ifndef CONNECTED_LAYER_H +#define CONNECTED_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer* make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam); + +void forward_connected_layer(layer *l, network net); +void backward_connected_layer(layer *l, network net); +void update_connected_layer(layer *l, update_args a, network net); + +#ifdef GPU +void forward_connected_layer_gpu(layer *l, network net); +void backward_connected_layer_gpu(layer *l, network net); +void update_connected_layer_gpu(layer *l, update_args a, network net); +void push_connected_layer(layer *l); +void pull_connected_layer(layer *l); +#endif + +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/convolutional_kernels.cu b/workloads/realworld/pipeline/darknet/src/convolutional_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..455663a0c70c076f5786b7be351e39241ffd3083 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/convolutional_kernels.cu @@ -0,0 +1,350 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "gemm.h" +#include "blas.h" +#include "im2col.h" +#include "col2im.h" +#include "utils.h" +#include "cuda_dark.h" +} + +__global__ void binarize_kernel(float *x, int n, float *binary) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= n) return; + binary[i] = (x[i] >= 0) ? 1 : -1; +} + +void binarize_gpu(float *x, int n, float *binary) +{ + binarize_kernel<<>>(x, n, binary); + check_error(cudaPeekAtLastError()); +} + +__global__ void binarize_input_kernel(float *input, int n, int size, float *binary) +{ + int s = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (s >= size) return; + int i = 0; + float mean = 0; + for(i = 0; i < n; ++i){ + mean += fabsf(input[i*size + s]); + } + mean = mean / n; + for(i = 0; i < n; ++i){ + binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean; + } +} + +void binarize_input_gpu(float *input, int n, int size, float *binary) +{ + binarize_input_kernel<<>>(input, n, size, binary); + check_error(cudaPeekAtLastError()); +} + + +__global__ void binarize_weights_kernel(float *weights, int n, int size, float *binary) +{ + int f = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (f >= n) return; + int i = 0; + float mean = 0; + for(i = 0; i < size; ++i){ + mean += fabsf(weights[f*size + i]); + } + mean = mean / size; + for(i = 0; i < size; ++i){ + binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean; + //binary[f*size + i] = weights[f*size + i]; + } +} + +void binarize_weights_gpu(float *weights, int n, int size, float *binary) +{ + binarize_weights_kernel<<>>(weights, n, size, binary); + check_error(cudaPeekAtLastError()); +} + +void forward_convolutional_layer_gpu(convolutional_layer *l, network net) +{ + // Ruihao + // forward_layer_start(l, net); + // Ruihao + + fill_gpu(l->outputs*l->batch, 0, l->output_gpu, 1); + if(l->binary){ + binarize_weights_gpu(l->weights_gpu, l->n, l->c/l->groups*l->size*l->size, l->binary_weights_gpu); + swap_binary(l); + } + + if(l->xnor){ + binarize_weights_gpu(l->weights_gpu, l->n, l->c/l->groups*l->size*l->size, l->binary_weights_gpu); + swap_binary(l); + binarize_gpu(net.input_gpu, l->c*l->h*l->w*l->batch, l->binary_input_gpu); + net.input_gpu = l->binary_input_gpu; + } + +#ifdef CUDNN + float one = 1; + cudnnConvolutionForward(cudnn_handle(), + &one, + l->srcTensorDesc, + net.input_gpu, + l->weightDesc, + l->weights_gpu, + l->convDesc, + l->fw_algo, + net.workspace, + l->workspace_size, + &one, + l->dstTensorDesc, + l->output_gpu); + +#else + int i, j; + int m = l->n/l->groups; + int k = l->size*l->size*l->c/l->groups; + int n = l->out_w*l->out_h; + for(i = 0; i < l->batch; ++i){ + for(j = 0; j < l->groups; ++j){ + float *a = l->weights_gpu + j*l->nweights/l->groups; + float *b = net.workspace; + float *c = l->output_gpu + (i*l->groups + j)*n*m; + float *im = net.input_gpu + (i*l->groups + j)*l->c/l->groups*l->h*l->w; + + if (l->size == 1){ + b = im; + } else { + im2col_gpu(im, l->c/l->groups, l->h, l->w, l->size, l->stride, l->pad, b, net.streams[l->stream_index]); + } + gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n, net.streams[l->stream_index]); + } + } +#endif + if (l->batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l->output_gpu, l->biases_gpu, l->batch, l->n, l->out_w*l->out_h, net.streams[l->stream_index]); + } + + activate_array_gpu(l->output_gpu, l->outputs*l->batch, l->activation, net.streams[l->stream_index]); + //if(l->dot > 0) dot_error_gpu(l); + if(l->binary || l->xnor) swap_binary(l); + + // Ruihao + // forward_layer_end(l, net); + // Ruihao +} + +__global__ void smooth_kernel(float *x, int n, int w, int h, int c, int size, float rate, float *delta) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int j = id % w; + id /= w; + int i = id % h; + id /= h; + int k = id % c; + id /= c; + int b = id; + + int w_offset = -(size/2.f); + int h_offset = -(size/2.f); + + int out_index = j + w*(i + h*(k + c*b)); + int l, m; + for(l = 0; l < size; ++l){ + for(m = 0; m < size; ++m){ + int cur_h = h_offset + i + l; + int cur_w = w_offset + j + m; + int index = cur_w + w*(cur_h + h*(k + b*c)); + int valid = (cur_h >= 0 && cur_h < h && + cur_w >= 0 && cur_w < w); + delta[out_index] += valid ? rate*(x[index] - x[out_index]) : 0; + } + } +} + +extern "C" void smooth_layer(layer *l, int size, float rate) +{ + int h = l->out_h; + int w = l->out_w; + int c = l->out_c; + + size_t n = h*w*c*l->batch; + + smooth_kernel<<>>(l->output_gpu, n, l->w, l->h, l->c, size, rate, l->delta_gpu); + check_error(cudaPeekAtLastError()); +} + +void backward_convolutional_layer_gpu(convolutional_layer *l, network net) +{ + // Ruihao + // fprintf(stderr, "backward_convolutional_layer_gpu layer %d\n", l->index); + // Ruihao + if(l->smooth){ + smooth_layer(l, 5, l->smooth); + } + //constrain_gpu(l->outputs*l->batch, 1, l->delta_gpu, 1); + gradient_array_gpu(l->output_gpu, l->outputs*l->batch, l->activation, l->delta_gpu); + + + if(l->batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l->bias_updates_gpu, l->delta_gpu, l->batch, l->n, l->out_w*l->out_h); + } + float *original_input = net.input_gpu; + + if(l->xnor) net.input_gpu = l->binary_input_gpu; +#ifdef CUDNN + float one = 1; + cudnnConvolutionBackwardFilter(cudnn_handle(), + &one, + l->srcTensorDesc, + net.input_gpu, + l->ddstTensorDesc, + l->delta_gpu, + l->convDesc, + l->bf_algo, + net.workspace, + l->workspace_size, + &one, + l->dweightDesc, + l->weight_updates_gpu); + + if(net.delta_gpu){ + if(l->binary || l->xnor) swap_binary(&l); + cudnnConvolutionBackwardData(cudnn_handle(), + &one, + l->weightDesc, + l->weights_gpu, + l->ddstTensorDesc, + l->delta_gpu, + l->convDesc, + l->bd_algo, + net.workspace, + l->workspace_size, + &one, + l->dsrcTensorDesc, + net.delta_gpu); + if(l->binary || l->xnor) swap_binary(&l); + if(l->xnor) gradient_array_gpu(original_input, l->batch*l->c*l->h*l->w, HARDTAN, net.delta_gpu); + } + +#else + int m = l->n/l->groups; + int n = l->size*l->size*l->c/l->groups; + int k = l->out_w*l->out_h; + + int i, j; + for(i = 0; i < l->batch; ++i){ + for(j = 0; j < l->groups; ++j){ + float *a = l->delta_gpu + (i*l->groups + j)*m*k; + float *b = net.workspace; + float *c = l->weight_updates_gpu + j*l->nweights/l->groups; + + float *im = net.input_gpu+(i*l->groups + j)*l->c/l->groups*l->h*l->w; + float *imd = net.delta_gpu+(i*l->groups + j)*l->c/l->groups*l->h*l->w; + + im2col_gpu(im, l->c/l->groups, l->h, l->w, l->size, l->stride, l->pad, b, net.streams[l->stream_index]); + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n,net.streams[l->stream_index]); + + if (net.delta_gpu) { + if (l->binary || l->xnor) swap_binary(l); + a = l->weights_gpu + j*l->nweights/l->groups; + b = l->delta_gpu + (i*l->groups + j)*m*k; + c = net.workspace; + if (l->size == 1) { + c = imd; + } + + gemm_gpu(1,0,n,k,m,1,a,n,b,k,0,c,k,net.streams[l->stream_index]); + + if (l->size != 1) { + col2im_gpu(net.workspace, l->c/l->groups, l->h, l->w, l->size, l->stride, l->pad, imd); + } + if(l->binary || l->xnor) { + swap_binary(l); + } + } + if(l->xnor) gradient_array_gpu(original_input + i*l->c*l->h*l->w, l->c*l->h*l->w, HARDTAN, net.delta_gpu + i*l->c*l->h*l->w); + } + } +#endif +} + +void pull_convolutional_layer(layer *l) +{ + // Ruihao + // fprintf(stderr, "pull_convolutional_layer %d, l->nweights %d \n", l->index, l->nweights); + // Ruihao + cuda_pull_array(l->weights_gpu, l->weights, l->nweights); + cuda_pull_array(l->biases_gpu, l->biases, l->n); + cuda_pull_array(l->weight_updates_gpu, l->weight_updates, l->nweights); + cuda_pull_array(l->bias_updates_gpu, l->bias_updates, l->n); + if (l->batch_normalize){ + cuda_pull_array(l->scales_gpu, l->scales, l->n); + cuda_pull_array(l->rolling_mean_gpu, l->rolling_mean, l->n); + cuda_pull_array(l->rolling_variance_gpu, l->rolling_variance, l->n); + } +} + +void push_convolutional_layer(layer *l) +{ + // Ruihao + // l->weights_gpu = cuda_make_array(l->weights, l->nweights); + // fprintf(stderr, "push_convolutional_layer %d, l->nweights %d \n", l->index, l->nweights); + // Ruihao + cuda_push_array(l->weights_gpu, l->weights, l->nweights); + cuda_push_array(l->biases_gpu, l->biases, l->n); + cuda_push_array(l->weight_updates_gpu, l->weight_updates, l->nweights); + cuda_push_array(l->bias_updates_gpu, l->bias_updates, l->n); + if (l->batch_normalize){ + cuda_push_array(l->scales_gpu, l->scales, l->n); + cuda_push_array(l->rolling_mean_gpu, l->rolling_mean, l->n); + cuda_push_array(l->rolling_variance_gpu, l->rolling_variance, l->n); + } +} + +void update_convolutional_layer_gpu(layer *l, update_args a, network net) +{ + // Ruihao + // fprintf(stderr, "update_convolutional_layer_gpu layer %d\n", l->index); + // Ruihao + float learning_rate = a.learning_rate*l->learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + if(a.adam){ + adam_update_gpu(l->weights_gpu, l->weight_updates_gpu, l->m_gpu, l->v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l->nweights, batch, a.t, net.streams[l->stream_index]); + adam_update_gpu(l->biases_gpu, l->bias_updates_gpu, l->bias_m_gpu, l->bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l->n, batch, a.t, net.streams[l->stream_index]); + if(l->scales_gpu){ + adam_update_gpu(l->scales_gpu, l->scale_updates_gpu, l->scale_m_gpu, l->scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l->n, batch, a.t, net.streams[l->stream_index]); + } + }else{ + axpy_gpu(l->nweights, -decay*batch, l->weights_gpu, 1, l->weight_updates_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->nweights, learning_rate/batch, l->weight_updates_gpu, 1, l->weights_gpu, 1, net.streams[l->stream_index]); + scal_gpu(l->nweights, momentum, l->weight_updates_gpu, 1, net.streams[l->stream_index]); + + axpy_gpu(l->n, learning_rate/batch, l->bias_updates_gpu, 1, l->biases_gpu, 1, net.streams[l->stream_index]); + scal_gpu(l->n, momentum, l->bias_updates_gpu, 1, net.streams[l->stream_index]); + + if(l->scales_gpu){ + axpy_gpu(l->n, learning_rate/batch, l->scale_updates_gpu, 1, l->scales_gpu, 1, net.streams[l->stream_index]); + scal_gpu(l->n, momentum, l->scale_updates_gpu, 1, net.streams[l->stream_index]); + } + } + if(l->clip){ + constrain_gpu(l->nweights, l->clip, l->weights_gpu, 1); + } +} + + diff --git a/workloads/realworld/pipeline/darknet/src/convolutional_layer.c b/workloads/realworld/pipeline/darknet/src/convolutional_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..651ce8719efb40ae462b92ff0b00934ab62a0994 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/convolutional_layer.c @@ -0,0 +1,635 @@ +#include "convolutional_layer.h" +#include "utils.h" +#include "batchnorm_layer.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" +#include +#include + +#ifdef AI2 +#include "xnor_layer.h" +#endif + +void swap_binary(convolutional_layer *l) +{ + float *swap = l->weights; + l->weights = l->binary_weights; + l->binary_weights = swap; + +#ifdef GPU + swap = l->weights_gpu; + l->weights_gpu = l->binary_weights_gpu; + l->binary_weights_gpu = swap; +#endif +} + +void binarize_weights(float *weights, int n, int size, float *binary) +{ + int i, f; + for(f = 0; f < n; ++f){ + float mean = 0; + for(i = 0; i < size; ++i){ + mean += fabs(weights[f*size + i]); + } + mean = mean / size; + for(i = 0; i < size; ++i){ + binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean; + } + } +} + +void binarize_cpu(float *input, int n, float *binary) +{ + int i; + for(i = 0; i < n; ++i){ + binary[i] = (input[i] > 0) ? 1 : -1; + } +} + +void binarize_input(float *input, int n, int size, float *binary) +{ + int i, s; + for(s = 0; s < size; ++s){ + float mean = 0; + for(i = 0; i < n; ++i){ + mean += fabs(input[i*size + s]); + } + mean = mean / n; + for(i = 0; i < n; ++i){ + binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean; + } + } +} + +int convolutional_out_height(convolutional_layer *l) +{ + return (l->h + 2*l->pad - l->size) / l->stride + 1; +} + +int convolutional_out_width(convolutional_layer *l) +{ + return (l->w + 2*l->pad - l->size) / l->stride + 1; +} + +image get_convolutional_image(convolutional_layer *l) +{ + return float_to_image(l->out_w,l->out_h,l->out_c,l->output); +} + +image get_convolutional_delta(convolutional_layer *l) +{ + return float_to_image(l->out_w,l->out_h,l->out_c,l->delta); +} + +static size_t get_workspace_size(layer *l){ +#ifdef CUDNN + if(gpu_index >= 0){ + size_t most = 0; + size_t s = 0; + cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle(), + l->srcTensorDesc, + l->weightDesc, + l->convDesc, + l->dstTensorDesc, + l->fw_algo, + &s); + if (s > most) most = s; + cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnn_handle(), + l->srcTensorDesc, + l->ddstTensorDesc, + l->convDesc, + l->dweightDesc, + l->bf_algo, + &s); + if (s > most) most = s; + cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(), + l->weightDesc, + l->ddstTensorDesc, + l->convDesc, + l->dsrcTensorDesc, + l->bd_algo, + &s); + if (s > most) most = s; + return most; + } +#endif + return (size_t)l->out_h*l->out_w*l->size*l->size*l->c/l->groups*sizeof(float); +} + +#ifdef GPU +#ifdef CUDNN +void cudnn_convolutional_setup(layer *l) +{ + cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); + cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + + cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + + cudnnSetFilter4dDescriptor(l->dweightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); + cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); + #if CUDNN_MAJOR >= 6 + cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT); + #else + cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION); + #endif + + #if CUDNN_MAJOR >= 7 + cudnnSetConvolutionGroupCount(l->convDesc, l->groups); + #else + if(l->groups > 1){ + error("CUDNN < 7 doesn't support groups, please upgrade!"); + } + #endif + + cudnnGetConvolutionForwardAlgorithm(cudnn_handle(), + l->srcTensorDesc, + l->weightDesc, + l->convDesc, + l->dstTensorDesc, + CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->fw_algo); + cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(), + l->weightDesc, + l->ddstTensorDesc, + l->convDesc, + l->dsrcTensorDesc, + CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->bd_algo); + cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(), + l->srcTensorDesc, + l->ddstTensorDesc, + l->convDesc, + l->dweightDesc, + CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->bf_algo); +} +#endif +#endif + +convolutional_layer* make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam) +{ + int i; + // convolutional_layer *l = {0}; + convolutional_layer *l = calloc(1, sizeof(convolutional_layer)); + l->type = CONVOLUTIONAL; + + l->groups = groups; + l->h = h; + l->w = w; + l->c = c; + l->n = n; + l->binary = binary; + l->xnor = xnor; + l->batch = batch; + l->stride = stride; + l->size = size; + l->pad = padding; + l->batch_normalize = batch_normalize; + + l->weights = calloc(c/groups*n*size*size, sizeof(float)); + l->weight_updates = calloc(c/groups*n*size*size, sizeof(float)); + + l->biases = calloc(n, sizeof(float)); + l->bias_updates = calloc(n, sizeof(float)); + + l->nweights = c/groups*n*size*size; + l->nbiases = n; + + // float scale = 1./sqrt(size*size*c); + float scale = sqrt(2./(size*size*c/l->groups)); + //printf("convscale %f\n", scale); + //scale = .02; + //for(i = 0; i < c*n*size*size; ++i) l->weights[i] = scale*rand_uniform(-1, 1); + for(i = 0; i < l->nweights; ++i) l->weights[i] = scale*rand_normal(); + int out_w = convolutional_out_width(l); + int out_h = convolutional_out_height(l); + l->out_h = out_h; + l->out_w = out_w; + l->out_c = n; + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->w * l->h * l->c; + + // l->output = calloc(l->batch*l->outputs, sizeof(float)); + // l->delta = calloc(l->batch*l->outputs, sizeof(float)); + // cudaHostAlloc(&l->output, l->batch*l->outputs *sizeof(float), cudaHostAllocDefault); + // cudaHostAlloc(&l->delta, l->batch*l->outputs * sizeof(float), cudaHostAllocDefault); + + cudaMallocHost(&l->output, l->batch*l->outputs *sizeof(float)); + cudaMallocHost(&l->delta, l->batch*l->outputs * sizeof(float)); + + l->forward = forward_convolutional_layer; + l->backward = backward_convolutional_layer; + l->update = update_convolutional_layer; + if(binary){ + l->binary_weights = calloc(l->nweights, sizeof(float)); + l->cweights = calloc(l->nweights, sizeof(char)); + l->scales = calloc(n, sizeof(float)); + } + if(xnor){ + l->binary_weights = calloc(l->nweights, sizeof(float)); + l->binary_input = calloc(l->inputs*l->batch, sizeof(float)); + } + + if(batch_normalize){ + l->scales = calloc(n, sizeof(float)); + l->scale_updates = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + l->scales[i] = 1; + } + + l->mean = calloc(n, sizeof(float)); + l->variance = calloc(n, sizeof(float)); + + l->mean_delta = calloc(n, sizeof(float)); + l->variance_delta = calloc(n, sizeof(float)); + + l->rolling_mean = calloc(n, sizeof(float)); + l->rolling_variance = calloc(n, sizeof(float)); + l->x = calloc(l->batch*l->outputs, sizeof(float)); + l->x_norm = calloc(l->batch*l->outputs, sizeof(float)); + } + if(adam){ + l->m = calloc(l->nweights, sizeof(float)); + l->v = calloc(l->nweights, sizeof(float)); + l->bias_m = calloc(n, sizeof(float)); + l->scale_m = calloc(n, sizeof(float)); + l->bias_v = calloc(n, sizeof(float)); + l->scale_v = calloc(n, sizeof(float)); + } + +#ifdef GPU + l->forward_gpu = forward_convolutional_layer_gpu; + l->backward_gpu = backward_convolutional_layer_gpu; + l->update_gpu = update_convolutional_layer_gpu; + + if(gpu_index >= 0) { + l->malloc_async = 0; + + if (adam) { + l->m_gpu = cuda_make_array(l->m, l->nweights); + l->v_gpu = cuda_make_array(l->v, l->nweights); + l->bias_m_gpu = cuda_make_array(l->bias_m, l->n); + l->bias_v_gpu = cuda_make_array(l->bias_v, l->n); + l->scale_m_gpu = cuda_make_array(l->scale_m, l->n); + l->scale_v_gpu = cuda_make_array(l->scale_v, l->n); + } + + l->weights_gpu = cuda_make_array(l->weights, l->nweights); + l->weight_updates_gpu = cuda_make_array(l->weight_updates, l->nweights); + + l->biases_gpu = cuda_make_array(l->biases, l->n); + l->bias_updates_gpu = cuda_make_array(l->bias_updates, l->n); + + // l->delta_gpu = cuda_make_array(l->delta, l->batch*l->out_h*l->out_w*l->n); + // l->output_gpu = cuda_make_array(l->output, l->batch*l->out_h*l->out_w*l->n); + + if(binary){ + l->binary_weights_gpu = cuda_make_array(l->weights, l->nweights); + } + if(xnor){ + l->binary_weights_gpu = cuda_make_array(l->weights, l->nweights); + l->binary_input_gpu = cuda_make_array(0, l->inputs*l->batch); + } + + if(batch_normalize){ + l->mean_gpu = cuda_make_array(l->mean, l->n); + l->variance_gpu = cuda_make_array(l->variance, l->n); + + l->rolling_mean_gpu = cuda_make_array(l->mean, l->n); + l->rolling_variance_gpu = cuda_make_array(l->variance, l->n); + + l->mean_delta_gpu = cuda_make_array(l->mean, l->n); + l->variance_delta_gpu = cuda_make_array(l->variance, l->n); + + l->scales_gpu = cuda_make_array(l->scales, l->n); + l->scale_updates_gpu = cuda_make_array(l->scale_updates, l->n); + + l->x_gpu = cuda_make_array(l->output, l->batch*l->out_h*l->out_w*l->n); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->out_h*l->out_w*l->n); + } +#ifdef CUDNN + cudnnCreateTensorDescriptor(&l->normTensorDesc); + cudnnCreateTensorDescriptor(&l->srcTensorDesc); + cudnnCreateTensorDescriptor(&l->dstTensorDesc); + cudnnCreateFilterDescriptor(&l->weightDesc); + cudnnCreateTensorDescriptor(&l->dsrcTensorDesc); + cudnnCreateTensorDescriptor(&l->ddstTensorDesc); + cudnnCreateFilterDescriptor(&l->dweightDesc); + cudnnCreateConvolutionDescriptor(&l->convDesc); + cudnn_convolutional_setup(&l); +#endif + } +#endif + l->workspace_size = get_workspace_size(l); + l->activation = activation; + fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BFLOPs\n", n, size, size, stride, w, h, c, l->out_w, l->out_h, l->out_c, (2.0 * l->n * l->size*l->size*l->c/l->groups * l->out_h*l->out_w)/1000000000.); + // fprintf(stderr, "workspace_size %ld \n", l->workspace_size); + return l; +} + +void denormalize_convolutional_layer(convolutional_layer *l) +{ + int i, j; + for(i = 0; i < l->n; ++i){ + float scale = l->scales[i]/sqrt(l->rolling_variance[i] + .00001); + for(j = 0; j < l->c/l->groups*l->size*l->size; ++j){ + l->weights[i*l->c/l->groups*l->size*l->size + j] *= scale; + } + l->biases[i] -= l->rolling_mean[i] * scale; + l->scales[i] = 1; + l->rolling_mean[i] = 0; + l->rolling_variance[i] = 1; + } +} + +/* +void test_convolutional_layer() +{ + convolutional_layer *l = make_convolutional_layer(1, 5, 5, 3, 2, 5, 2, 1, LEAKY, 1, 0, 0, 0); + l->batch_normalize = 1; + float data[] = {1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3}; + //net.input = data; + //forward_convolutional_layer(l); +} +*/ + +void resize_convolutional_layer(convolutional_layer *l, int w, int h) +{ + // Ruihao + fprintf(stderr, "resize_convolutional_layer *layer %d\n", l->index); + // Ruihao + l->w = w; + l->h = h; + int out_w = convolutional_out_width(l); + int out_h = convolutional_out_height(l); + + l->out_w = out_w; + l->out_h = out_h; + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->w * l->h * l->c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + if(l->batch_normalize){ + l->x = realloc(l->x, l->batch*l->outputs*sizeof(float)); + l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float)); + } + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); + + if(l->batch_normalize){ + cuda_free(l->x_gpu); + cuda_free(l->x_norm_gpu); + + l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); + } +#ifdef CUDNN + cudnn_convolutional_setup(l); +#endif +#endif + l->workspace_size = get_workspace_size(l); +} + +void add_bias(float *output, float *biases, int batch, int n, int size) +{ + int i,j,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + for(j = 0; j < size; ++j){ + output[(b*n + i)*size + j] += biases[i]; + } + } + } +} + +void scale_bias(float *output, float *scales, int batch, int n, int size) +{ + int i,j,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + for(j = 0; j < size; ++j){ + output[(b*n + i)*size + j] *= scales[i]; + } + } + } +} + +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size) +{ + int i,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + bias_updates[i] += sum_array(delta+size*(i+b*n), size); + } + } +} + +void forward_convolutional_layer(convolutional_layer *l, network net) +{ + int i, j; + + fill_cpu(l->outputs*l->batch, 0, l->output, 1); + + if(l->xnor){ + binarize_weights(l->weights, l->n, l->c/l->groups*l->size*l->size, l->binary_weights); + swap_binary(l); + binarize_cpu(net.input, l->c*l->h*l->w*l->batch, l->binary_input); + net.input = l->binary_input; + } + + int m = l->n/l->groups; + int k = l->size*l->size*l->c/l->groups; + int n = l->out_w*l->out_h; + for(i = 0; i < l->batch; ++i){ + for(j = 0; j < l->groups; ++j){ + float *a = l->weights + j*l->nweights/l->groups; + float *b = net.workspace; + float *c = l->output + (i*l->groups + j)*n*m; + float *im = net.input + (i*l->groups + j)*l->c/l->groups*l->h*l->w; + + if (l->size == 1) { + b = im; + } else { + im2col_cpu(im, l->c/l->groups, l->h, l->w, l->size, l->stride, l->pad, b); + } + gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } + + if(l->batch_normalize){ + forward_batchnorm_layer(l, net); + } else { + add_bias(l->output, l->biases, l->batch, l->n, l->out_h*l->out_w); + } + + activate_array(l->output, l->outputs*l->batch, l->activation); + if(l->binary || l->xnor) swap_binary(l); +} + +void backward_convolutional_layer(convolutional_layer *l, network net) +{ + int i, j; + int m = l->n/l->groups; + int n = l->size*l->size*l->c/l->groups; + int k = l->out_w*l->out_h; + + gradient_array(l->output, l->outputs*l->batch, l->activation, l->delta); + + if(l->batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l->bias_updates, l->delta, l->batch, l->n, k); + } + + for(i = 0; i < l->batch; ++i){ + for(j = 0; j < l->groups; ++j){ + float *a = l->delta + (i*l->groups + j)*m*k; + float *b = net.workspace; + float *c = l->weight_updates + j*l->nweights/l->groups; + + float *im = net.input + (i*l->groups + j)*l->c/l->groups*l->h*l->w; + float *imd = net.delta + (i*l->groups + j)*l->c/l->groups*l->h*l->w; + + if(l->size == 1){ + b = im; + } else { + im2col_cpu(im, l->c/l->groups, l->h, l->w, + l->size, l->stride, l->pad, b); + } + + gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (net.delta) { + a = l->weights + j*l->nweights/l->groups; + b = l->delta + (i*l->groups + j)*m*k; + c = net.workspace; + if (l->size == 1) { + c = imd; + } + + gemm(1,0,n,k,m,1,a,n,b,k,0,c,k); + + if (l->size != 1) { + col2im_cpu(net.workspace, l->c/l->groups, l->h, l->w, l->size, l->stride, l->pad, imd); + } + } + } + } +} + +void update_convolutional_layer(convolutional_layer *l, update_args a, network net) +{ + // Ruihao + fprintf(stderr, "update_convolutional_layer *layer %d\n", l->index); + // Ruihao + float learning_rate = a.learning_rate*l->learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + axpy_cpu(l->n, learning_rate/batch, l->bias_updates, 1, l->biases, 1); + scal_cpu(l->n, momentum, l->bias_updates, 1); + + if(l->scales){ + axpy_cpu(l->n, learning_rate/batch, l->scale_updates, 1, l->scales, 1); + scal_cpu(l->n, momentum, l->scale_updates, 1); + } + + axpy_cpu(l->nweights, -decay*batch, l->weights, 1, l->weight_updates, 1); + axpy_cpu(l->nweights, learning_rate/batch, l->weight_updates, 1, l->weights, 1); + scal_cpu(l->nweights, momentum, l->weight_updates, 1); +} + + +image get_convolutional_weight(convolutional_layer *l, int i) +{ + int h = l->size; + int w = l->size; + int c = l->c/l->groups; + return float_to_image(w,h,c,l->weights+i*h*w*c); +} + +void rgbgr_weights(convolutional_layer *l) +{ + int i; + for(i = 0; i < l->n; ++i){ + image im = get_convolutional_weight(l, i); + if (im.c == 3) { + rgbgr_image(im); + } + } +} + +void rescale_weights(convolutional_layer *l, float scale, float trans) +{ + int i; + for(i = 0; i < l->n; ++i){ + image im = get_convolutional_weight(l, i); + if (im.c == 3) { + scale_image(im, scale); + float sum = sum_array(im.data, im.w*im.h*im.c); + l->biases[i] += sum*trans; + } + } +} + +image *get_weights(convolutional_layer *l) +{ + image *weights = calloc(l->n, sizeof(image)); + int i; + for(i = 0; i < l->n; ++i){ + weights[i] = copy_image(get_convolutional_weight(l, i)); + normalize_image(weights[i]); + /* + char buff[256]; + sprintf(buff, "filter%d", i); + save_image(weights[i], buff); + */ + } + //error("hey"); + return weights; +} + +image *visualize_convolutional_layer(convolutional_layer *l, char *window, image *prev_weights) +{ + image *single_weights = get_weights(l); + show_images(single_weights, l->n, window); + + image delta = get_convolutional_image(l); + image dc = collapse_image_layers(delta, 1); + char buff[256]; + sprintf(buff, "%s: Output", window); + //show_image(dc, buff); + //save_image(dc, buff); + free_image(dc); + return single_weights; +} + diff --git a/workloads/realworld/pipeline/darknet/src/convolutional_layer.h b/workloads/realworld/pipeline/darknet/src/convolutional_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..73bca068d6a1da4a3d132d7872882e8069b4652d --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/convolutional_layer.h @@ -0,0 +1,59 @@ +#ifndef CONVOLUTIONAL_LAYER_H +#define CONVOLUTIONAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +typedef layer convolutional_layer; + +#ifdef GPU +void forward_convolutional_layer_gpu(convolutional_layer *layer, network net); +void backward_convolutional_layer_gpu(convolutional_layer *layer, network net); +void update_convolutional_layer_gpu(convolutional_layer *layer, update_args a, network net); + +void push_convolutional_layer(convolutional_layer *layer); +void pull_convolutional_layer(convolutional_layer *layer); + +void add_bias_gpu(float *output, float *biases, int batch, int n, int size, cudaStream_t stream); +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); +void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t, cudaStream_t stream); +#ifdef CUDNN +void cudnn_convolutional_setup(layer *l); +#endif + +// typedef struct thr_data { +// convolutional_layer *l; + +// int batch_normalize; +// int binary; +// int xnor; +// int adam; +// } thread_data; +#endif + +convolutional_layer* make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam); +void resize_convolutional_layer(convolutional_layer *layer, int w, int h); +void forward_convolutional_layer(convolutional_layer *layer, network net); +void update_convolutional_layer(convolutional_layer *layer, update_args a, network net); +image *visualize_convolutional_layer(convolutional_layer *layer, char *window, image *prev_weights); +void binarize_weights(float *weights, int n, int size, float *binary); +void swap_binary(convolutional_layer *l); +void binarize_weights2(float *weights, int n, int size, char *binary, float *scales); + +void backward_convolutional_layer(convolutional_layer *layer, network net); + +void add_bias(float *output, float *biases, int batch, int n, int size); +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); + +image get_convolutional_image(convolutional_layer *layer); +image get_convolutional_delta(convolutional_layer *layer); +image get_convolutional_weight(convolutional_layer *layer, int i); + +int convolutional_out_height(convolutional_layer *layer); +int convolutional_out_width(convolutional_layer *layer); + +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/cost_layer.c b/workloads/realworld/pipeline/darknet/src/cost_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..3e852483505730d1ad1dd52e937a14ebeab39d2e --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/cost_layer.c @@ -0,0 +1,176 @@ +#include "cost_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include +#include +#include +#include + +COST_TYPE get_cost_type(char *s) +{ + if (strcmp(s, "seg")==0) return SEG; + if (strcmp(s, "sse")==0) return SSE; + if (strcmp(s, "masked")==0) return MASKED; + if (strcmp(s, "smooth")==0) return SMOOTH; + if (strcmp(s, "L1")==0) return L1; + if (strcmp(s, "wgan")==0) return WGAN; + fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s); + return SSE; +} + +char *get_cost_string(COST_TYPE a) +{ + switch(a){ + case SEG: + return "seg"; + case SSE: + return "sse"; + case MASKED: + return "masked"; + case SMOOTH: + return "smooth"; + case L1: + return "L1"; + case WGAN: + return "wgan"; + } + return "sse"; +} + +cost_layer* make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale) +{ + fprintf(stderr, "cost %4d\n", inputs); + cost_layer *l = calloc(1, sizeof(cost_layer)); + l->type = COST; + + l->scale = scale; + l->batch = batch; + l->inputs = inputs; + l->outputs = inputs; + l->cost_type = cost_type; + l->delta = calloc(inputs*batch, sizeof(float)); + l->output = calloc(inputs*batch, sizeof(float)); + l->cost = calloc(1, sizeof(float)); + + l->forward = forward_cost_layer; + l->backward = backward_cost_layer; + #ifdef GPU + l->forward_gpu = forward_cost_layer_gpu; + l->backward_gpu = backward_cost_layer_gpu; + + l->delta_gpu = cuda_make_array(l->output, inputs*batch); + l->output_gpu = cuda_make_array(l->delta, inputs*batch); + #endif + return l; +} + +void resize_cost_layer(cost_layer *l, int inputs) +{ + l->inputs = inputs; + l->outputs = inputs; + l->delta = realloc(l->delta, inputs*l->batch*sizeof(float)); + l->output = realloc(l->output, inputs*l->batch*sizeof(float)); +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch); + l->output_gpu = cuda_make_array(l->output, inputs*l->batch); +#endif +} + +void forward_cost_layer(cost_layer *l, network net) +{ + if (!net.truth) return; + if(l->cost_type == MASKED){ + int i; + for(i = 0; i < l->batch*l->inputs; ++i){ + if(net.truth[i] == SECRET_NUM) net.input[i] = SECRET_NUM; + } + } + if(l->cost_type == SMOOTH){ + smooth_l1_cpu(l->batch*l->inputs, net.input, net.truth, l->delta, l->output); + }else if(l->cost_type == L1){ + l1_cpu(l->batch*l->inputs, net.input, net.truth, l->delta, l->output); + } else { + l2_cpu(l->batch*l->inputs, net.input, net.truth, l->delta, l->output); + } + l->cost[0] = sum_array(l->output, l->batch*l->inputs); +} + +void backward_cost_layer(cost_layer *l, network net) +{ + axpy_cpu(l->batch*l->inputs, l->scale, l->delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_cost_layer(cost_layer *l) +{ + cuda_pull_array(l->delta_gpu, l->delta, l->batch*l->inputs); +} + +void push_cost_layer(cost_layer *l) +{ + cuda_push_array(l->delta_gpu, l->delta, l->batch*l->inputs); +} + +int float_abs_compare (void * a, void * b) +{ + float fa = *(float*) a; + if(fa < 0) fa = -fa; + float fb = *(float*) b; + if(fb < 0) fb = -fb; + return (fa > fb) - (fa < fb); +} + +void forward_cost_layer_gpu(cost_layer *l, network net) +{ + if (!net.truth) return; + if(l->smooth){ + scal_gpu(l->batch*l->inputs, (1-l->smooth), net.truth_gpu, 1, net.streams[l->stream_index]); + add_gpu(l->batch*l->inputs, l->smooth * 1./l->inputs, net.truth_gpu, 1); + } + + if(l->cost_type == SMOOTH){ + smooth_l1_gpu(l->batch*l->inputs, net.input_gpu, net.truth_gpu, l->delta_gpu, l->output_gpu); + } else if (l->cost_type == L1){ + l1_gpu(l->batch*l->inputs, net.input_gpu, net.truth_gpu, l->delta_gpu, l->output_gpu); + } else if (l->cost_type == WGAN){ + wgan_gpu(l->batch*l->inputs, net.input_gpu, net.truth_gpu, l->delta_gpu, l->output_gpu); + } else { + l2_gpu(l->batch*l->inputs, net.input_gpu, net.truth_gpu, l->delta_gpu, l->output_gpu); + } + + if (l->cost_type == SEG && l->noobject_scale != 1) { + scale_mask_gpu(l->batch*l->inputs, l->delta_gpu, 0, net.truth_gpu, l->noobject_scale); + scale_mask_gpu(l->batch*l->inputs, l->output_gpu, 0, net.truth_gpu, l->noobject_scale); + } + if (l->cost_type == MASKED) { + mask_gpu(l->batch*l->inputs, net.delta_gpu, SECRET_NUM, net.truth_gpu, 0); + } + + if(l->ratio){ + cuda_pull_array(l->delta_gpu, l->delta, l->batch*l->inputs); + qsort(l->delta, l->batch*l->inputs, sizeof(float), float_abs_compare); + int n = (1-l->ratio) * l->batch*l->inputs; + float thresh = l->delta[n]; + thresh = 0; + printf("%f\n", thresh); + supp_gpu(l->batch*l->inputs, thresh, l->delta_gpu, 1); + } + + if(l->thresh){ + supp_gpu(l->batch*l->inputs, l->thresh*1./l->inputs, l->delta_gpu, 1); + } + + cuda_pull_array(l->output_gpu, l->output, l->batch*l->inputs); + l->cost[0] = sum_array(l->output, l->batch*l->inputs); +} + +void backward_cost_layer_gpu(cost_layer *l, network net) +{ + axpy_gpu(l->batch*l->inputs, l->scale, l->delta_gpu, 1, net.delta_gpu, 1, net.streams[l->stream_index]); +} +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/cost_layer.h b/workloads/realworld/pipeline/darknet/src/cost_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..417c81f2203407f6036f6e2e7e42425387670e08 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/cost_layer.h @@ -0,0 +1,20 @@ +#ifndef COST_LAYER_H +#define COST_LAYER_H +#include "layer.h" +#include "network.h" + +typedef layer cost_layer; + +COST_TYPE get_cost_type(char *s); +char *get_cost_string(COST_TYPE a); +cost_layer* make_cost_layer(int batch, int inputs, COST_TYPE type, float scale); +void forward_cost_layer(cost_layer *l, network net); +void backward_cost_layer(cost_layer *l, network net); +void resize_cost_layer(cost_layer *l, int inputs); + +#ifdef GPU +void forward_cost_layer_gpu(cost_layer *l, network net); +void backward_cost_layer_gpu(cost_layer *l, network net); +#endif + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/cpu_timestamps.c b/workloads/realworld/pipeline/darknet/src/cpu_timestamps.c new file mode 100644 index 0000000000000000000000000000000000000000..35114479c7a9cce3debe2204b6886ad5528041d5 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/cpu_timestamps.c @@ -0,0 +1,20 @@ +#include "cpu_timestamps.h" + +void startCPU() { + struct timespec tv; + if(clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + startCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); +} + + + +void endCPU() { + struct timespec tv; + if(clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + + endCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); + //endCPUTimestamp1 = std::chrono::system_clock::now(); + printf("CPU_Times,%lu,%lu,%lu\n", startCPUTime, endCPUTime, endCPUTime-startCPUTime); +} diff --git a/workloads/realworld/pipeline/darknet/src/cpu_timestamps.h b/workloads/realworld/pipeline/darknet/src/cpu_timestamps.h new file mode 100644 index 0000000000000000000000000000000000000000..e53e995a5603b4610759c02a4a179eb9f0124e48 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/cpu_timestamps.h @@ -0,0 +1,21 @@ +#ifndef CPU_TIMESTAMP_ +#define CPU_TIMESTAMP_ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +static uint64_t startCPUTime; +static uint64_t endCPUTime; + +void startCPU(); +void endCPU(); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/crnn_layer.c b/workloads/realworld/pipeline/darknet/src/crnn_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..76d3bc1e65538782e061c4c763ad076947e8f27c --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/crnn_layer.c @@ -0,0 +1,283 @@ +#include "crnn_layer.h" +#include "convolutional_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer* make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize) +{ + fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters); + batch = batch / steps; + layer *l = calloc(1, sizeof(layer)); + l->batch = batch; + l->type = CRNN; + l->steps = steps; + l->h = h; + l->w = w; + l->c = c; + l->out_h = h; + l->out_w = w; + l->out_c = output_filters; + l->inputs = h*w*c; + l->hidden = h * w * hidden_filters; + l->outputs = l->out_h * l->out_w * l->out_c; + + l->state = calloc(l->hidden*batch*(steps+1), sizeof(float)); + + l->input_layer = calloc(1, sizeof(layer)); + fprintf(stderr, "\t\t"); + (l->input_layer) = make_convolutional_layer(batch*steps, h, w, c, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l->input_layer->batch = batch; + + l->self_layer = calloc(1, sizeof(layer)); + fprintf(stderr, "\t\t"); + (l->self_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l->self_layer->batch = batch; + + l->output_layer = calloc(1, sizeof(layer)); + fprintf(stderr, "\t\t"); + (l->output_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, output_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l->output_layer->batch = batch; + + l->output = l->output_layer->output; + l->delta = l->output_layer->delta; + + l->forward = forward_crnn_layer; + l->backward = backward_crnn_layer; + l->update = update_crnn_layer; + +#ifdef GPU + l->forward_gpu = forward_crnn_layer_gpu; + l->backward_gpu = backward_crnn_layer_gpu; + l->update_gpu = update_crnn_layer_gpu; + + l->state_gpu = cuda_make_array(l->state, l->hidden*batch*(steps+1)); + l->output_gpu = l->output_layer->output_gpu; + l->delta_gpu = l->output_layer->delta_gpu; +#endif + + return l; +} + +void update_crnn_layer(layer *l, update_args a, network net) +{ + update_convolutional_layer((l->input_layer), a, net); + update_convolutional_layer((l->self_layer), a, net); + update_convolutional_layer((l->output_layer), a, net); +} + +void forward_crnn_layer(layer *l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer *input_layer = (l->input_layer); + layer *self_layer = (l->self_layer); + layer *output_layer = (l->output_layer); + + fill_cpu(l->outputs * l->batch * l->steps, 0, output_layer->delta, 1); + fill_cpu(l->hidden * l->batch * l->steps, 0, self_layer->delta, 1); + fill_cpu(l->hidden * l->batch * l->steps, 0, input_layer->delta, 1); + if(net.train) fill_cpu(l->hidden * l->batch, 0, l->state, 1); + + for (i = 0; i < l->steps; ++i) { + s.input = net.input; + forward_convolutional_layer(input_layer, s); + + s.input = l->state; + forward_convolutional_layer(self_layer, s); + + float *old_state = l->state; + if(net.train) l->state += l->hidden*l->batch; + if(l->shortcut){ + copy_cpu(l->hidden * l->batch, old_state, 1, l->state, 1); + }else{ + fill_cpu(l->hidden * l->batch, 0, l->state, 1); + } + axpy_cpu(l->hidden * l->batch, 1, input_layer->output, 1, l->state, 1); + axpy_cpu(l->hidden * l->batch, 1, self_layer->output, 1, l->state, 1); + + s.input = l->state; + forward_convolutional_layer(output_layer, s); + + net.input += l->inputs*l->batch; + increment_layer(input_layer, 1); + increment_layer(self_layer, 1); + increment_layer(output_layer, 1); + } +} + +void backward_crnn_layer(layer *l, network net) +{ + network s = net; + int i; + layer *input_layer = (l->input_layer); + layer *self_layer = (l->self_layer); + layer *output_layer = (l->output_layer); + + increment_layer(input_layer, l->steps-1); + increment_layer(self_layer, l->steps-1); + increment_layer(output_layer, l->steps-1); + + l->state += l->hidden*l->batch*l->steps; + for (i = l->steps-1; i >= 0; --i) { + copy_cpu(l->hidden * l->batch, input_layer->output, 1, l->state, 1); + axpy_cpu(l->hidden * l->batch, 1, self_layer->output, 1, l->state, 1); + + s.input = l->state; + s.delta = self_layer->delta; + backward_convolutional_layer(output_layer, s); + + l->state -= l->hidden*l->batch; + /* + if(i > 0){ + copy_cpu(l->hidden * l->batch, input_layer->output - l->hidden*l->batch, 1, l->state, 1); + axpy_cpu(l->hidden * l->batch, 1, self_layer->output - l->hidden*l->batch, 1, l->state, 1); + }else{ + fill_cpu(l->hidden * l->batch, 0, l->state, 1); + } + */ + + s.input = l->state; + s.delta = self_layer->delta - l->hidden*l->batch; + if (i == 0) s.delta = 0; + backward_convolutional_layer(self_layer, s); + + copy_cpu(l->hidden*l->batch, self_layer->delta, 1, input_layer->delta, 1); + if (i > 0 && l->shortcut) axpy_cpu(l->hidden*l->batch, 1, self_layer->delta, 1, self_layer->delta - l->hidden*l->batch, 1); + s.input = net.input + i*l->inputs*l->batch; + if(net.delta) s.delta = net.delta + i*l->inputs*l->batch; + else s.delta = 0; + backward_convolutional_layer(input_layer, s); + + increment_layer(input_layer, -1); + increment_layer(self_layer, -1); + increment_layer(output_layer, -1); + } +} + +#ifdef GPU + +void pull_crnn_layer(layer *l) +{ + pull_convolutional_layer((l->input_layer)); + pull_convolutional_layer((l->self_layer)); + pull_convolutional_layer((l->output_layer)); +} + +void push_crnn_layer(layer *l) +{ + push_convolutional_layer((l->input_layer)); + push_convolutional_layer((l->self_layer)); + push_convolutional_layer((l->output_layer)); +} + +void update_crnn_layer_gpu(layer *l, update_args a, network net) +{ + update_convolutional_layer_gpu((l->input_layer), a, net); + update_convolutional_layer_gpu((l->self_layer), a, net); + update_convolutional_layer_gpu((l->output_layer), a, net); +} + +void forward_crnn_layer_gpu(layer *l, network net) +{ + network s = net; + int i; + layer *input_layer = (l->input_layer); + layer *self_layer = (l->self_layer); + layer *output_layer = (l->output_layer); + + fill_gpu(l->outputs * l->batch * l->steps, 0, output_layer->delta_gpu, 1); + fill_gpu(l->hidden * l->batch * l->steps, 0, self_layer->delta_gpu, 1); + fill_gpu(l->hidden * l->batch * l->steps, 0, input_layer->delta_gpu, 1); + if(net.train) fill_gpu(l->hidden * l->batch, 0, l->state_gpu, 1); + + for (i = 0; i < l->steps; ++i) { + s.input_gpu = net.input_gpu; + forward_convolutional_layer_gpu(input_layer, s); + + s.input_gpu = l->state_gpu; + forward_convolutional_layer_gpu(self_layer, s); + + float *old_state = l->state_gpu; + if(net.train) l->state_gpu += l->hidden*l->batch; + if(l->shortcut){ + copy_gpu(l->hidden * l->batch, old_state, 1, l->state_gpu, 1, net.streams[l->stream_index]); + }else{ + fill_gpu(l->hidden * l->batch, 0, l->state_gpu, 1); + } + axpy_gpu(l->hidden * l->batch, 1, input_layer->output_gpu, 1, l->state_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->hidden * l->batch, 1, self_layer->output_gpu, 1, l->state_gpu, 1, net.streams[l->stream_index]); + + s.input_gpu = l->state_gpu; + forward_convolutional_layer_gpu(output_layer, s); + + net.input_gpu += l->inputs*l->batch; + increment_layer(input_layer, 1); + increment_layer(self_layer, 1); + increment_layer(output_layer, 1); + } +} + +void backward_crnn_layer_gpu(layer *l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer *input_layer = (l->input_layer); + layer *self_layer = (l->self_layer); + layer *output_layer = (l->output_layer); + increment_layer(input_layer, l->steps - 1); + increment_layer(self_layer, l->steps - 1); + increment_layer(output_layer, l->steps - 1); + l->state_gpu += l->hidden*l->batch*l->steps; + for (i = l->steps-1; i >= 0; --i) { + copy_gpu(l->hidden * l->batch, input_layer->output_gpu, 1, l->state_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->hidden * l->batch, 1, self_layer->output_gpu, 1, l->state_gpu, 1, net.streams[l->stream_index]); + + s.input_gpu = l->state_gpu; + s.delta_gpu = self_layer->delta_gpu; + backward_convolutional_layer_gpu(output_layer, s); + + l->state_gpu -= l->hidden*l->batch; + + s.input_gpu = l->state_gpu; + s.delta_gpu = self_layer->delta_gpu - l->hidden*l->batch; + if (i == 0) s.delta_gpu = 0; + backward_convolutional_layer_gpu(self_layer, s); + + copy_gpu(l->hidden*l->batch, self_layer->delta_gpu, 1, input_layer->delta_gpu, 1, net.streams[l->stream_index]); + if (i > 0 && l->shortcut) axpy_gpu(l->hidden*l->batch, 1, self_layer->delta_gpu, 1, self_layer->delta_gpu - l->hidden*l->batch, 1, net.streams[l->stream_index]); + s.input_gpu = net.input_gpu + i*l->inputs*l->batch; + if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l->inputs*l->batch; + else s.delta_gpu = 0; + backward_convolutional_layer_gpu(input_layer, s); + + increment_layer(input_layer, -1); + increment_layer(self_layer, -1); + increment_layer(output_layer, -1); + } +} +#endif diff --git a/workloads/realworld/pipeline/darknet/src/crnn_layer.h b/workloads/realworld/pipeline/darknet/src/crnn_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1b1849d0fdb33fb72270ccf791286a087f123585 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/crnn_layer.h @@ -0,0 +1,24 @@ + +#ifndef CRNN_LAYER_H +#define CRNN_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer* make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize); + +void forward_crnn_layer(layer *l, network net); +void backward_crnn_layer(layer *l, network net); +void update_crnn_layer(layer *l, update_args a, network net); + +#ifdef GPU +void forward_crnn_layer_gpu(layer *l, network net); +void backward_crnn_layer_gpu(layer *l, network net); +void update_crnn_layer_gpu(layer *l, update_args a, network net); +void push_crnn_layer(layer *l); +void pull_crnn_layer(layer *l); +#endif + +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/crop_layer.c b/workloads/realworld/pipeline/darknet/src/crop_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..12e8d3721d38c4fde5c0dd2b37615cfd6dde93c3 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/crop_layer.c @@ -0,0 +1,103 @@ +#include "crop_layer.h" +#include "cuda_dark.h" +#include + +image get_crop_image(crop_layer *l) +{ + int h = l->out_h; + int w = l->out_w; + int c = l->out_c; + return float_to_image(w,h,c,l->output); +} + +void backward_crop_layer(crop_layer *l, network net){} +void backward_crop_layer_gpu(crop_layer *l, network net){} + +crop_layer* make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure) +{ + fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c); + crop_layer *l = calloc(1, sizeof(crop_layer)); + l->type = CROP; + l->batch = batch; + l->h = h; + l->w = w; + l->c = c; + l->scale = (float)crop_height / h; + l->flip = flip; + l->angle = angle; + l->saturation = saturation; + l->exposure = exposure; + l->out_w = crop_width; + l->out_h = crop_height; + l->out_c = c; + l->inputs = l->w * l->h * l->c; + l->outputs = l->out_w * l->out_h * l->out_c; + l->output = calloc(l->outputs*batch, sizeof(float)); + l->forward = forward_crop_layer; + l->backward = backward_crop_layer; + + #ifdef GPU + l->forward_gpu = forward_crop_layer_gpu; + l->backward_gpu = backward_crop_layer_gpu; + l->output_gpu = cuda_make_array(l->output, l->outputs*batch); + l->rand_gpu = cuda_make_array(0, l->batch*8); + #endif + return l; +} + +void resize_crop_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->out_w = l->scale*w; + l->out_h = l->scale*h; + + l->inputs = l->w * l->h * l->c; + l->outputs = l->out_h * l->out_w * l->out_c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + #ifdef GPU + cuda_free(l->output_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + #endif +} + + +void forward_crop_layer(crop_layer *l, network net) +{ + int i,j,c,b,row,col; + int index; + int count = 0; + int flip = (l->flip && rand()%2); + int dh = rand()%(l->h - l->out_h + 1); + int dw = rand()%(l->w - l->out_w + 1); + float scale = 2; + float trans = -1; + if(l->noadjust){ + scale = 1; + trans = 0; + } + if(!net.train){ + flip = 0; + dh = (l->h - l->out_h)/2; + dw = (l->w - l->out_w)/2; + } + for(b = 0; b < l->batch; ++b){ + for(c = 0; c < l->c; ++c){ + for(i = 0; i < l->out_h; ++i){ + for(j = 0; j < l->out_w; ++j){ + if(flip){ + col = l->w - dw - j - 1; + }else{ + col = j + dw; + } + row = i + dh; + index = col+l->w*(row+l->h*(c + l->c*b)); + l->output[count++] = net.input[index]*scale + trans; + } + } + } + } +} + diff --git a/workloads/realworld/pipeline/darknet/src/crop_layer.h b/workloads/realworld/pipeline/darknet/src/crop_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..b20cef4e3ac9eb4bfcbc738775a58de93a9497e9 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/crop_layer.h @@ -0,0 +1,20 @@ +#ifndef CROP_LAYER_H +#define CROP_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +typedef layer crop_layer; + +image get_crop_image(crop_layer *l); +crop_layer* make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure); +void forward_crop_layer(crop_layer *l, network net); +void resize_crop_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_crop_layer_gpu(crop_layer *l, network net); +#endif + +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/crop_layer_kernels.cu b/workloads/realworld/pipeline/darknet/src/crop_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..35e3cdc843914bcea98853973bf8688325f77549 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/crop_layer_kernels.cu @@ -0,0 +1,225 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "crop_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "image.h" +} + +__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c) +{ + if(x < 0 || x >= w || y < 0 || y >= h) return 0; + return image[x + w*(y + c*h)]; +} + +__device__ float3 rgb_to_hsv_kernel(float3 rgb) +{ + float r = rgb.x; + float g = rgb.y; + float b = rgb.z; + + float h, s, v; + float max = (r > g) ? ( (r > b) ? r : b) : ( (g > b) ? g : b); + float min = (r < g) ? ( (r < b) ? r : b) : ( (g < b) ? g : b); + float delta = max - min; + v = max; + if(max == 0){ + s = 0; + h = -1; + }else{ + s = delta/max; + if(r == max){ + h = (g - b) / delta; + } else if (g == max) { + h = 2 + (b - r) / delta; + } else { + h = 4 + (r - g) / delta; + } + if (h < 0) h += 6; + } + return make_float3(h, s, v); +} + +__device__ float3 hsv_to_rgb_kernel(float3 hsv) +{ + float h = hsv.x; + float s = hsv.y; + float v = hsv.z; + + float r, g, b; + float f, p, q, t; + + if (s == 0) { + r = g = b = v; + } else { + int index = (int) floorf(h); + f = h - index; + p = v*(1-s); + q = v*(1-s*f); + t = v*(1-s*(1-f)); + if(index == 0){ + r = v; g = t; b = p; + } else if(index == 1){ + r = q; g = v; b = p; + } else if(index == 2){ + r = p; g = v; b = t; + } else if(index == 3){ + r = p; g = q; b = v; + } else if(index == 4){ + r = t; g = p; b = v; + } else { + r = v; g = p; b = q; + } + } + r = (r < 0) ? 0 : ((r > 1) ? 1 : r); + g = (g < 0) ? 0 : ((g > 1) ? 1 : g); + b = (b < 0) ? 0 : ((b > 1) ? 1 : b); + return make_float3(r, g, b); +} + +__device__ float bilinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c) +{ + int ix = (int) floorf(x); + int iy = (int) floorf(y); + + float dx = x - ix; + float dy = y - iy; + + float val = (1-dy) * (1-dx) * get_pixel_kernel(image, w, h, ix, iy, c) + + dy * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) + + (1-dy) * dx * get_pixel_kernel(image, w, h, ix+1, iy, c) + + dy * dx * get_pixel_kernel(image, w, h, ix+1, iy+1, c); + return val; +} + +__global__ void levels_image_kernel(float *image, float *rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale, float shift) +{ + int size = batch * w * h; + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= size) return; + int x = id % w; + id /= w; + int y = id % h; + id /= h; + float rshift = rand[0]; + float gshift = rand[1]; + float bshift = rand[2]; + float r0 = rand[8*id + 0]; + float r1 = rand[8*id + 1]; + float r2 = rand[8*id + 2]; + float r3 = rand[8*id + 3]; + + saturation = r0*(saturation - 1) + 1; + saturation = (r1 > .5f) ? 1.f/saturation : saturation; + exposure = r2*(exposure - 1) + 1; + exposure = (r3 > .5f) ? 1.f/exposure : exposure; + + size_t offset = id * h * w * 3; + image += offset; + float r = image[x + w*(y + h*0)]; + float g = image[x + w*(y + h*1)]; + float b = image[x + w*(y + h*2)]; + float3 rgb = make_float3(r,g,b); + if(train){ + float3 hsv = rgb_to_hsv_kernel(rgb); + hsv.y *= saturation; + hsv.z *= exposure; + rgb = hsv_to_rgb_kernel(hsv); + } else { + shift = 0; + } + image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5f)*shift; + image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5f)*shift; + image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5f)*shift; +} + +__global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= size) return; + + float cx = w/2.f; + float cy = h/2.f; + + int count = id; + int j = id % crop_width; + id /= crop_width; + int i = id % crop_height; + id /= crop_height; + int k = id % c; + id /= c; + int b = id; + + float r4 = rand[8*b + 4]; + float r5 = rand[8*b + 5]; + float r6 = rand[8*b + 6]; + float r7 = rand[8*b + 7]; + + float dw = (w - crop_width)*r4; + float dh = (h - crop_height)*r5; + flip = (flip && (r6 > .5f)); + angle = 2*angle*r7 - angle; + if(!train){ + dw = (w - crop_width)/2.f; + dh = (h - crop_height)/2.f; + flip = 0; + angle = 0; + } + + input += w*h*c*b; + + float x = (flip) ? w - dw - j - 1 : j + dw; + float y = i + dh; + + float rx = cosf(angle)*(x-cx) - sinf(angle)*(y-cy) + cx; + float ry = sinf(angle)*(x-cx) + cosf(angle)*(y-cy) + cy; + + output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k); +} + +extern "C" void forward_crop_layer_gpu(crop_layer *layer, network net) +{ + cuda_random(layer->rand_gpu, layer->batch*8); + + float radians = layer->angle*3.14159265f/180.f; + + float scale = 2; + float translate = -1; + if(layer->noadjust){ + scale = 1; + translate = 0; + } + + int size = layer->batch * layer->w * layer->h; + + levels_image_kernel<<>>(net.input_gpu, layer->rand_gpu, layer->batch, layer->w, layer->h, net.train, layer->saturation, layer->exposure, translate, scale, layer->shift); + check_error(cudaPeekAtLastError()); + + size = layer->batch*layer->c*layer->out_w*layer->out_h; + + forward_crop_layer_kernel<<>>(net.input_gpu, layer->rand_gpu, size, layer->c, layer->h, layer->w, layer->out_h, layer->out_w, net.train, layer->flip, radians, layer->output_gpu); + check_error(cudaPeekAtLastError()); + +/* + cuda_pull_array(layer->output_gpu, layer->output, size); + image im = float_to_image(layer->crop_width, layer->crop_height, layer->c, layer->output + 0*(size/layer->batch)); + image im2 = float_to_image(layer->crop_width, layer->crop_height, layer->c, layer->output + 1*(size/layer->batch)); + image im3 = float_to_image(layer->crop_width, layer->crop_height, layer->c, layer->output + 2*(size/layer->batch)); + + translate_image(im, -translate); + scale_image(im, 1/scale); + translate_image(im2, -translate); + scale_image(im2, 1/scale); + translate_image(im3, -translate); + scale_image(im3, 1/scale); + + show_image(im, "cropped"); + show_image(im2, "cropped2"); + show_image(im3, "cropped3"); + cvWaitKey(0); + */ +} + diff --git a/workloads/realworld/pipeline/darknet/src/cuda_dark.cu b/workloads/realworld/pipeline/darknet/src/cuda_dark.cu new file mode 100644 index 0000000000000000000000000000000000000000..68ec847b4b313eacfbcbe210a13e8f585cb5fcff --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/cuda_dark.cu @@ -0,0 +1,554 @@ +int gpu_index = 7; + +#ifdef GPU + +#include "cuda.h" +#include "utils.h" +#include "blas.h" +#include +#include +#include + + +#include + +void cuda_set_device(int n) +{ + gpu_index = n; + cudaError_t status = cudaSetDevice(n); + check_error(status); +} + +int cuda_get_device() +{ + int n = 0; + cudaError_t status = cudaGetDevice(&n); + check_error(status); + return n; +} + +void check_error(cudaError_t status) +{ + cudaDeviceSynchronize(); + cudaError_t status2 = cudaGetLastError(); + if (status != cudaSuccess) + { + const char *s = cudaGetErrorString(status); + char buffer[256]; + // printf("CUDA Error: %s\n", s); + fprintf(stderr, "CUDA Error: %s\n", s); + assert(0); + snprintf(buffer, 256, "CUDA Error: %s", s); + error(buffer); + } + if (status2 != cudaSuccess) + { + const char *s = cudaGetErrorString(status2); + char buffer[256]; + // printf("CUDA Error Prev: %s\n", s); + fprintf(stderr, "CUDA Error Prev: %s\n", s); + assert(0); + snprintf(buffer, 256, "CUDA Error Prev: %s", s); + error(buffer); + } +} + +void check_error_stream(cudaError_t status, cudaStream_t stream) +{ + cudaStreamSynchronize(stream); + cudaError_t status2 = cudaGetLastError(); + if (status != cudaSuccess) + { + const char *s = cudaGetErrorString(status); + char buffer[256]; + // printf("CUDA Error: %s\n", s); + fprintf(stderr, "CUDA Error: %s\n", s); + assert(0); + snprintf(buffer, 256, "CUDA Error: %s", s); + error(buffer); + } + if (status2 != cudaSuccess) + { + const char *s = cudaGetErrorString(status2); + char buffer[256]; + // printf("CUDA Error Prev: %s\n", s); + fprintf(stderr, "CUDA Error Prev: %s\n", s); + assert(0); + snprintf(buffer, 256, "CUDA Error Prev: %s", s); + error(buffer); + } +} + + +dim3 cuda_gridsize(size_t n){ + size_t k = (n-1) / BLOCK + 1; + size_t x = k; + size_t y = 1; + if(x > 65535){ + x = ceil(sqrt(k)); + y = (n-1)/(x*BLOCK) + 1; + } + dim3 d = {x, y, 1}; + //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK); + return d; +} + +#ifdef CUDNN +cudnnHandle_t cudnn_handle() +{ + static int init[16] = {0}; + static cudnnHandle_t handle[16]; + int i = cuda_get_device(); + if(!init[i]) { + cudnnCreate(&handle[i]); + init[i] = 1; + } + return handle[i]; +} +#endif + +cublasHandle_t blas_handle() +{ + static int init[16] = {0}; + static cublasHandle_t handle[16]; + int i = cuda_get_device(); + if(!init[i]) { + cublasCreate(&handle[i]); + init[i] = 1; + } + return handle[i]; +} + +void forward_layer_gpu_malloc(thread_data *my_data) { + // cudaSetDevice(GPU_DEVICE); + + layer *l = my_data->l; + network *net = my_data->net; + + if (l->has_delta) { + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); + // fprintf(stderr, "forward_network_gpu has delta_gpu! \n"); + // cudaMalloc((void **) (&(l->delta_gpu)), l->outputs*l->batch*sizeof(float)); + // fill_gpu(l->outputs*l->batch, 0, l->delta_gpu, 1); + } + + // fprintf(stderr, "l->outputs is %d! \n", l->outputs); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + // cudaMalloc((void **) (&(l->output_gpu)), l->outputs*l->batch*sizeof(float)); + // fill_gpu(l->outputs*l->batch, 0, l->output_gpu, 1); + // l->forward_gpu(l, *net); +} + +void forward_layer_gpu_malloc_pipeline(thread_data *my_data) { + cudaSetDevice(GPU_DEVICE); + + layer *l = my_data->l; + network *net = my_data->net; + + if (l->has_delta) { + l->delta_gpu = cuda_make_array_pipe(l->delta, l->outputs*l->batch, net->streams[l->stream_index]); + // fill_gpu_1(l->outputs*l->batch, 0, l->delta_gpu, 1); + } + l->output_gpu = cuda_make_array_pipe(l->output, l->outputs*l->batch, net->streams[l->stream_index]); + // l->forward_gpu(l, *net); + // net->input_gpu = l->output_gpu; + // net->input = l->output; + // if(l->truth) { + // net->truth_gpu = l->output_gpu; + // net->truth = l->output; + // } +} + +void forward_layer_start(layer* l, network *net) { + if (net->is_first) { + thread_data *data = (thread_data *) malloc(sizeof(thread_data)); + data->l = l; + data->net = net; + // fprintf(stderr, "forward_layer_gpu %d, malloc not ready \n", net.index); + // forward_layer_gpu_malloc(data); + forward_layer_gpu_malloc_pipeline(data); + } + + if (l->malloc_async) { + pthread_join(l->thread, NULL); + } +} + +void forward_layer_end(layer* l, network *net) { + // l->forward_gpu(l, *net); + if (net->next_index < net->n && net->next_index != -1) { + fprintf(stderr, "forward_layer_gpu %d, next is %d, stream_index is %d \n", net->index, net->next_index, l->stream_index); + + layer *next = net->layers[net->next_index]; + + thread_data *data = (thread_data *) malloc(sizeof(thread_data)); + data->l = next; + data->net = net; + + // forward_layer_gpu_malloc(data); + forward_layer_gpu_malloc_pipeline(data); + + // next->thread = _beginthread((VoidFunction *)forward_layer_gpu_malloc_pipeline, data); + // next->malloc_async = 1; + } + + // net->input_gpu = l->output_gpu; + // net->input = l->output; + // if(l->truth) { + // net->truth_gpu = l->output_gpu; + // net->truth = l->output; + // } +} + +pthread_t _beginthread(VoidFunction x, void *z) +{ + pthread_t pt; + int v = pthread_create(&pt, NULL, x, z); + return pt; +} + +float *cuda_make_array(float *x, size_t n) +{ + float *x_gpu; + size_t size = sizeof(float)*n; + cudaError_t status = cudaMalloc((void **)&x_gpu, size); + check_error(status); + if(x){ + status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + check_error(status); + } else { + fill_gpu(n, 0, x_gpu, 1); + } + if(!x_gpu) error("Cuda malloc failed\n"); + return x_gpu; +} + +float *cuda_make_array_pipe(float *x, size_t n, cudaStream_t stream) +{ + float *x_gpu; + size_t size = sizeof(float)*n; + cudaError_t status = cudaMalloc((void **)&x_gpu, size); + // check_error(status); + if(x){ + // fprintf(stderr, "cuda_make_array has X \n"); + cudaMemcpyAsync(x_gpu, x, size, cudaMemcpyHostToDevice, stream); + // check_error_stream(status, stream); + } else { + // fprintf(stderr, "cuda_make_array not has X \n"); + fill_gpu(n, 0, x_gpu, 1); + } + if(!x_gpu) error("Cuda malloc failed\n"); + return x_gpu; +} + +void cuda_random(float *x_gpu, size_t n) +{ + static curandGenerator_t gen[16]; + static int init[16] = {0}; + int i = cuda_get_device(); + if(!init[i]){ + curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT); + curandSetPseudoRandomGeneratorSeed(gen[i], time(0)); + init[i] = 1; + } + curandGenerateUniform(gen[i], x_gpu, n); + check_error(cudaPeekAtLastError()); +} + +float cuda_compare(float *x_gpu, float *x, size_t n, char *s) +{ + float *tmp = (float *) calloc(n, sizeof(float)); + cuda_pull_array(x_gpu, tmp, n); + //int i; + //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]); + axpy_cpu(n, -1, x, 1, tmp, 1); + float err = dot_cpu(n, tmp, 1, tmp, 1); + printf("Error %s: %f\n", s, sqrt(err/n)); + free(tmp); + return err; +} +int *cuda_make_int_array(int *x, size_t n) +{ + int *x_gpu; + size_t size = sizeof(int)*n; + cudaError_t status = cudaMalloc((void **)&x_gpu, size); + check_error(status); + if(x){ + status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + check_error(status); + } + if(!x_gpu) error("Cuda malloc failed\n"); + return x_gpu; +} + +void cuda_free(float *x_gpu) +{ + cudaError_t status = cudaFree(x_gpu); + check_error(status); +} + +void cuda_push_array(float *x_gpu, float *x, size_t n) +{ + size_t size = sizeof(float)*n; + cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + check_error(status); +} + +void cuda_pull_array(float *x_gpu, float *x, size_t n) +{ + size_t size = sizeof(float)*n; + cudaError_t status = cudaMemcpy(x, x_gpu, size, cudaMemcpyDeviceToHost); + check_error(status); +} + +float cuda_mag_array(float *x_gpu, size_t n) +{ + float *temp = (float *) calloc(n, sizeof(float)); + cuda_pull_array(x_gpu, temp, n); + float m = mag_array(temp, n); + free(temp); + return m; +} + +static const char * +getMemcpyKindString(CUpti_ActivityMemcpyKind kind) +{ + switch (kind) + { + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOD: + return "HtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOH: + return "DtoH"; + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOA: + return "HtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOH: + return "AtoH"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOA: + return "AtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOD: + return "AtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOA: + return "DtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOD: + return "DtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOH: + return "HtoH"; + default: + break; + } + + return ""; +} + +static const char * +getUvmCounterKindString(CUpti_ActivityUnifiedMemoryCounterKind kind) +{ + switch (kind) + { + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD: + return "BYTES_TRANSFER_HTOD"; + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH: + return "BYTES_TRANSFER_DTOH"; + default: + break; + } + return ""; +} + +static void +printActivity(CUpti_Activity *record) +{ + switch (record->kind) + { + case CUPTI_ACTIVITY_KIND_KERNEL: + { + int status; + CUpti_ActivityKernel4 *kernel = (CUpti_ActivityKernel4 *)record; + printf("KERNEL %s, %llu, %llu, %llu\n", + abi::__cxa_demangle(kernel->name, 0, 0, &status), + (unsigned long long)(kernel->start), + (unsigned long long)(kernel->end), + (unsigned long long)(kernel->end) - (kernel->start)); + break; + } + case CUPTI_ACTIVITY_KIND_RUNTIME: + { + CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; + const char *callback_name; + cuptiGetCallbackName(CUPTI_CB_DOMAIN_RUNTIME_API, api->cbid, &callback_name); + // printf("RUNTIME %s (cbid=%u) [ %llu - %llu ] process %u, thread %u, correlation %u\n", + // callback_name, api->cbid, + // (unsigned long long)(api->start - startTimestamp), + // (unsigned long long)(api->end - startTimestamp), + // api->processId, api->threadId, api->correlationId); + printf("RUNTIME %s (cbid=%u), %llu,%llu,%llu, process %u, thread %u, correlation %u\n", + callback_name, api->cbid, + (unsigned long long)(api->start), + (unsigned long long)(api->end), + (unsigned long long)(api->end - api->start), + api->processId, api->threadId, api->correlationId); + break; + } + case CUPTI_ACTIVITY_KIND_MEMCPY: + { + CUpti_ActivityMemcpy4 *memcpy = (CUpti_ActivityMemcpy4 *)record; + printf("MEMCPY %s, size %llu, %llu, %llu, %llu\n", + getMemcpyKindString((CUpti_ActivityMemcpyKind)memcpy->copyKind), + (unsigned long long)memcpy->bytes, + (unsigned long long)(memcpy->start), + (unsigned long long)(memcpy->end), + (unsigned long long)(memcpy->end) - (memcpy->start)); + break; + } + case CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER: + { + CUpti_ActivityUnifiedMemoryCounter2 *uvm = (CUpti_ActivityUnifiedMemoryCounter2 *)record; + printf("UVM MEMCPY %s, size %llu, %llu, %llu, %llu \n", + getUvmCounterKindString(uvm->counterKind), + (unsigned long long)uvm->value, + (unsigned long long)(uvm->start), + (unsigned long long)(uvm->end), + (unsigned long long)(uvm->end - uvm->start)); + break; + } + } +} + +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) +{ + uint8_t *bfr = (uint8_t *)malloc(BUF_SIZE + ALIGN_SIZE); + if (bfr == NULL) + { + printf("Error: out of memory\n"); + exit(-1); + } + + *size = BUF_SIZE; + *buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE); + *maxNumRecords = 0; +} + +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) +{ + CUptiResult status; + CUpti_Activity *record = NULL; + if (validSize > 0) + { + do + { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if (status == CUPTI_SUCCESS) + { + printActivity(record); + } + else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) + break; + else + { + CUPTI_CALL(status); + } + } while (1); + + // report any records dropped from the queue + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if (dropped != 0) + { + printf("Dropped %u activity records\n", (unsigned int)dropped); + } + } + + free(buffer); +} + +#ifndef PROFILE +void initTrace() { + printf("not Profile initTrace()\n"); + return; +} + +void finiTrace() { + return; +} + +#else +void initTrace() +{ + printf("Profile initTrace()\n"); + size_t attrValue = 0, attrValueSize = sizeof(size_t); + + CUpti_ActivityUnifiedMemoryCounterConfig config[2]; + + // configure unified memory counters + config[0].scope = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE; + config[0].kind = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD; + config[0].deviceId = 0; + config[0].enable = 1; + + config[1].scope = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE; + config[1].kind = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH; + config[1].deviceId = 0; + config[1].enable = 1; + + CUptiResult res = cuptiActivityConfigureUnifiedMemoryCounter(config, 2); + if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED) + { + printf("Test is waived, unified memory is not supported on the underlying platform.\n"); + } + else if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE) + { + printf("Test is waived, unified memory is not supported on the device.\n"); + } + else if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES) + { + printf("Test is waived, unified memory is not supported on the non-P2P multi-gpu setup.\n"); + } + else + { + CUPTI_CALL(res); + } + + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER)); + + // Register callbacks for buffer requests and for buffers completed by CUPTI. + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + + // Optionally get and set activity attributes. + // Attributes can be set by the CUPTI client to change behavior of the activity API. + // Some attributes require to be set before any CUDA context is created to be effective, + // e.g. to be applied to all device buffer allocations (see documentation). + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + printf("%s = %llu B\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); +} + +void finiTrace() +{ + // Force flush any remaining activity buffers before termination of the application + CUPTI_CALL(cuptiActivityFlushAll(1)); +} +#endif + + +void GPU_argv_init() +{ + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, GPU_DEVICE); + printf("setting device %d with name %s\n", GPU_DEVICE, deviceProp.name); + cudaSetDevice(GPU_DEVICE); +} +#else +void cuda_set_device(int n){} + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/cuda_dark.h b/workloads/realworld/pipeline/darknet/src/cuda_dark.h new file mode 100644 index 0000000000000000000000000000000000000000..d010819c397bfdf1f7945a79ecb733fe33db4fa3 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/cuda_dark.h @@ -0,0 +1,74 @@ +#ifndef CUDA_H +#define CUDA_H + +#include "darknet.h" + +#ifdef GPU + +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) (((uintptr_t)(buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t)(buffer) & ((align)-1))) : (buffer)) + +static uint64_t startTimestamp; +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +#define CUPTI_CALL(call) \ + do \ + { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) \ + { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + if (_status == CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED) \ + exit(0); \ + else \ + exit(-1); \ + } \ + } while (0) + +#include + +#ifdef __cplusplus +extern "C" { +#endif +void check_error(cudaError_t status); +void check_error_stream(cudaError_t status, cudaStream_t stream); +cublasHandle_t blas_handle(); +int *cuda_make_int_array(int *x, size_t n); +void cuda_random(float *x_gpu, size_t n); +float cuda_compare(float *x_gpu, float *x, size_t n, char *s); +dim3 cuda_gridsize(size_t n); + +struct thread_data { + layer* l; + network *net; +}; + +void GPU_argv_init(); +void initTrace(); +void finiTrace(); +void startCPU(); +void endCPU(); +void forward_layer_start(layer* l, network *net); +void forward_layer_end(layer* l, network *net); +typedef void *VoidFunction(void *); +pthread_t _beginthread(VoidFunction x, void *z); + +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords); +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); +static void printActivity(CUpti_Activity *record); + +#ifdef __cplusplus +} +#endif + +#ifdef CUDNN +cudnnHandle_t cudnn_handle(); +#endif + +#endif +#endif diff --git a/workloads/realworld/pipeline/darknet/src/cupti_add.cpp b/workloads/realworld/pipeline/darknet/src/cupti_add.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a0d16eb72f41f8e858a59354d2de9d6b470c0e76 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/cupti_add.cpp @@ -0,0 +1,112 @@ +#include "cupti_add.h" + +static void +printActivity(CUpti_Activity *record) +{ + switch (record->kind) + { + case CUPTI_ACTIVITY_KIND_KERNEL: + { + int status; + CUpti_ActivityKernel4 *kernel = (CUpti_ActivityKernel4 *)record; + printf("CUPTI,%s,%llu,%llu,%llu\n", + abi::__cxa_demangle(kernel->name, 0, 0, &status), + (unsigned long long)(kernel->start), + (unsigned long long)(kernel->end), + (unsigned long long)(kernel->end - startTimestamp) - (kernel->start - startTimestamp)); + break; + } + case CUPTI_ACTIVITY_KIND_RUNTIME: + { + CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; + const char *callback_name; + cuptiGetCallbackName(CUPTI_CB_DOMAIN_RUNTIME_API, api->cbid, &callback_name); + printf("RUNTIME %s (cbid=%u) [ %llu - %llu ] process %u, thread %u, correlation %u\n", + callback_name, api->cbid, + (unsigned long long)(api->start - startTimestamp), + (unsigned long long)(api->end - startTimestamp), + api->processId, api->threadId, api->correlationId); + break; + } + } +} + +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) +{ + uint8_t *bfr = (uint8_t *)malloc(BUF_SIZE + ALIGN_SIZE); + if (bfr == NULL) + { + printf("Error: out of memory\n"); + exit(-1); + } + + *size = BUF_SIZE; + *buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE); + *maxNumRecords = 0; +} + +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) +{ + CUptiResult status; + CUpti_Activity *record = NULL; + if (validSize > 0) + { + do + { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if (status == CUPTI_SUCCESS) + { + printActivity(record); + } + else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) + break; + else + { + CUPTI_CALL(status); + } + } while (1); + + // report any records dropped from the queue + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if (dropped != 0) + { + printf("Dropped %u activity records\n", (unsigned int)dropped); + } + } + + free(buffer); +} + +void initTrace() +{ + size_t attrValue = 0, attrValueSize = sizeof(size_t); + + // CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); + + // Register callbacks for buffer requests and for buffers completed by CUPTI. + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + + // Optionally get and set activity attributes. + // Attributes can be set by the CUPTI client to change behavior of the activity API. + // Some attributes require to be set before any CUDA context is created to be effective, + // e.g. to be applied to all device buffer allocations (see documentation). + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + printf("%s = %llu B\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); +} + +void finiTrace() +{ + // Force flush any remaining activity buffers before termination of the application + CUPTI_CALL(cuptiActivityFlushAll(1)); +} \ No newline at end of file diff --git a/workloads/realworld/pipeline/darknet/src/cupti_add.h b/workloads/realworld/pipeline/darknet/src/cupti_add.h new file mode 100644 index 0000000000000000000000000000000000000000..a30b7b847ad13381032d2f60eac2955d30146485 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/cupti_add.h @@ -0,0 +1,36 @@ +#include +#include +#include + + +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) \ + (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer)) + +static uint64_t startTimestamp; +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +#define CUPTI_CALL(call) \ + do { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + if(_status == CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED) \ + exit(0); \ + else \ + exit(-1); \ + } \ + } while (0) + +void initTrace(); +void finiTrace(); +void startCPU(); +void endCPU(); +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords); +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); +static void printActivity(CUpti_Activity *record); diff --git a/workloads/realworld/pipeline/darknet/src/data.c b/workloads/realworld/pipeline/darknet/src/data.c new file mode 100644 index 0000000000000000000000000000000000000000..d2ddc2b130ab1eaff3cb48d8eca63b5b74510a74 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/data.c @@ -0,0 +1,1685 @@ +#include "data.h" +#include "utils.h" +#include "image.h" +#include "cuda_dark.h" + +#include +#include +#include + +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + +list *get_paths(char *filename) +{ + char *path; + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + list *lines = make_list(); + while((path=fgetl(file))){ + list_insert(lines, path); + } + fclose(file); + return lines; +} + +/* +char **get_random_paths_indexes(char **paths, int n, int m, int *indexes) +{ + char **random_paths = calloc(n, sizeof(char*)); + int i; + pthread_mutex_lock(&mutex); + for(i = 0; i < n; ++i){ + int index = rand()%m; + indexes[i] = index; + random_paths[i] = paths[index]; + if(i == 0) printf("%s\n", paths[index]); + } + pthread_mutex_unlock(&mutex); + return random_paths; +} +*/ + +char **get_random_paths(char **paths, int n, int m) +{ + char **random_paths = calloc(n, sizeof(char*)); + int i; + pthread_mutex_lock(&mutex); + for(i = 0; i < n; ++i){ + int index = rand()%m; + random_paths[i] = paths[index]; + //if(i == 0) printf("%s\n", paths[index]); + } + pthread_mutex_unlock(&mutex); + return random_paths; +} + +char **find_replace_paths(char **paths, int n, char *find, char *replace) +{ + char **replace_paths = calloc(n, sizeof(char*)); + int i; + for(i = 0; i < n; ++i){ + char replaced[4096]; + find_replace(paths[i], find, replace, replaced); + replace_paths[i] = copy_string(replaced); + } + return replace_paths; +} + +matrix load_image_paths_gray(char **paths, int n, int w, int h) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image(paths[i], w, h, 3); + + image gray = grayscale_image(im); + free_image(im); + im = gray; + + X.vals[i] = im.data; + X.cols = im.h*im.w*im.c; + } + return X; +} + +matrix load_image_paths(char **paths, int n, int w, int h) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], w, h); + X.vals[i] = im.data; + X.cols = im.h*im.w*im.c; + } + return X; +} + +matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], 0, 0); + image crop; + if(center){ + crop = center_crop_image(im, size, size); + } else { + crop = random_augment_image(im, angle, aspect, min, max, size, size); + } + int flip = rand()%2; + if (flip) flip_image(crop); + random_distort_image(crop, hue, saturation, exposure); + + /* + show_image(im, "orig"); + show_image(crop, "crop"); + cvWaitKey(0); + */ + //grayscale_image_3c(crop); + free_image(im); + X.vals[i] = crop.data; + X.cols = crop.h*crop.w*crop.c; + } + return X; +} + + +box_label *read_boxes(char *filename, int *n) +{ + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + float x, y, h, w; + int id; + int count = 0; + int size = 64; + box_label *boxes = calloc(size, sizeof(box_label)); + while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){ + if(count == size) { + size = size * 2; + boxes = realloc(boxes, size*sizeof(box_label)); + } + boxes[count].id = id; + boxes[count].x = x; + boxes[count].y = y; + boxes[count].h = h; + boxes[count].w = w; + boxes[count].left = x - w/2; + boxes[count].right = x + w/2; + boxes[count].top = y - h/2; + boxes[count].bottom = y + h/2; + ++count; + } + fclose(file); + *n = count; + return boxes; +} + +void randomize_boxes(box_label *b, int n) +{ + int i; + for(i = 0; i < n; ++i){ + box_label swap = b[i]; + int index = rand()%n; + b[i] = b[index]; + b[index] = swap; + } +} + +void correct_boxes(box_label *boxes, int n, float dx, float dy, float sx, float sy, int flip) +{ + int i; + for(i = 0; i < n; ++i){ + if(boxes[i].x == 0 && boxes[i].y == 0) { + boxes[i].x = 999999; + boxes[i].y = 999999; + boxes[i].w = 999999; + boxes[i].h = 999999; + continue; + } + boxes[i].left = boxes[i].left * sx - dx; + boxes[i].right = boxes[i].right * sx - dx; + boxes[i].top = boxes[i].top * sy - dy; + boxes[i].bottom = boxes[i].bottom* sy - dy; + + if(flip){ + float swap = boxes[i].left; + boxes[i].left = 1. - boxes[i].right; + boxes[i].right = 1. - swap; + } + + boxes[i].left = constrain(0, 1, boxes[i].left); + boxes[i].right = constrain(0, 1, boxes[i].right); + boxes[i].top = constrain(0, 1, boxes[i].top); + boxes[i].bottom = constrain(0, 1, boxes[i].bottom); + + boxes[i].x = (boxes[i].left+boxes[i].right)/2; + boxes[i].y = (boxes[i].top+boxes[i].bottom)/2; + boxes[i].w = (boxes[i].right - boxes[i].left); + boxes[i].h = (boxes[i].bottom - boxes[i].top); + + boxes[i].w = constrain(0, 1, boxes[i].w); + boxes[i].h = constrain(0, 1, boxes[i].h); + } +} + +void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + float x,y,w,h; + int id; + int i; + + for (i = 0; i < count && i < 90; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if (w < .0 || h < .0) continue; + + int index = (4+classes) * i; + + truth[index++] = x; + truth[index++] = y; + truth[index++] = w; + truth[index++] = h; + + if (id < classes) truth[index+id] = 1; + } + free(boxes); +} + +void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + float x,y,w,h; + int id; + int i; + + for (i = 0; i < count; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if (w < .005 || h < .005) continue; + + int col = (int)(x*num_boxes); + int row = (int)(y*num_boxes); + + x = x*num_boxes - col; + y = y*num_boxes - row; + + int index = (col+row*num_boxes)*(5+classes); + if (truth[index]) continue; + truth[index++] = 1; + + if (id < classes) truth[index+id] = 1; + index += classes; + + truth[index++] = x; + truth[index++] = y; + truth[index++] = w; + truth[index++] = h; + } + free(boxes); +} + +void load_rle(image im, int *rle, int n) +{ + int count = 0; + int curr = 0; + int i,j; + for(i = 0; i < n; ++i){ + for(j = 0; j < rle[i]; ++j){ + im.data[count++] = curr; + } + curr = 1 - curr; + } + for(; count < im.h*im.w*im.c; ++count){ + im.data[count] = curr; + } +} + +void or_image(image src, image dest, int c) +{ + int i; + for(i = 0; i < src.w*src.h; ++i){ + if(src.data[i]) dest.data[dest.w*dest.h*c + i] = 1; + } +} + +void exclusive_image(image src) +{ + int k, j, i; + int s = src.w*src.h; + for(k = 0; k < src.c-1; ++k){ + for(i = 0; i < s; ++i){ + if (src.data[k*s + i]){ + for(j = k+1; j < src.c; ++j){ + src.data[j*s + i] = 0; + } + } + } + } +} + +box bound_image(image im) +{ + int x,y; + int minx = im.w; + int miny = im.h; + int maxx = 0; + int maxy = 0; + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + if(im.data[y*im.w + x]){ + minx = (x < minx) ? x : minx; + miny = (y < miny) ? y : miny; + maxx = (x > maxx) ? x : maxx; + maxy = (y > maxy) ? y : maxy; + } + } + } + box b = {minx, miny, maxx-minx + 1, maxy-miny + 1}; + //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + return b; +} + +void fill_truth_iseg(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + int i = 0; + int j; + image part = make_image(w, h, 1); + while((fscanf(file, "%d %s", &id, buff) == 2) && i < num_boxes){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + image sized = rotate_crop_image(part, aug.rad, aug.scale, aug.w, aug.h, aug.dx, aug.dy, aug.aspect); + if(flip) flip_image(sized); + + image mask = resize_image(sized, mw, mh); + truth[i*(mw*mh+1)] = id; + for(j = 0; j < mw*mh; ++j){ + truth[i*(mw*mh + 1) + 1 + j] = mask.data[j]; + } + ++i; + + free_image(mask); + free_image(sized); + free(rle); + } + if(i < num_boxes) truth[i*(mw*mh+1)] = -1; + fclose(file); + free_image(part); +} + +void fill_truth_mask(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + int i = 0; + image part = make_image(w, h, 1); + while((fscanf(file, "%d %s", &id, buff) == 2) && i < num_boxes){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + image sized = rotate_crop_image(part, aug.rad, aug.scale, aug.w, aug.h, aug.dx, aug.dy, aug.aspect); + if(flip) flip_image(sized); + box b = bound_image(sized); + if(b.w > 0){ + image crop = crop_image(sized, b.x, b.y, b.w, b.h); + image mask = resize_image(crop, mw, mh); + truth[i*(4 + mw*mh + 1) + 0] = (b.x + b.w/2.)/sized.w; + truth[i*(4 + mw*mh + 1) + 1] = (b.y + b.h/2.)/sized.h; + truth[i*(4 + mw*mh + 1) + 2] = b.w/sized.w; + truth[i*(4 + mw*mh + 1) + 3] = b.h/sized.h; + int j; + for(j = 0; j < mw*mh; ++j){ + truth[i*(4 + mw*mh + 1) + 4 + j] = mask.data[j]; + } + truth[i*(4 + mw*mh + 1) + 4 + mw*mh] = id; + free_image(crop); + free_image(mask); + ++i; + } + free_image(sized); + free(rle); + } + fclose(file); + free_image(part); +} + + +void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + + find_replace(labelpath, "raw", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + if(count > num_boxes) count = num_boxes; + float x,y,w,h; + int id; + int i; + int sub = 0; + + for (i = 0; i < count; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if ((w < .001 || h < .001)) { + ++sub; + continue; + } + + truth[(i-sub)*5+0] = x; + truth[(i-sub)*5+1] = y; + truth[(i-sub)*5+2] = w; + truth[(i-sub)*5+3] = h; + truth[(i-sub)*5+4] = id; + } + free(boxes); +} + +#define NUMCHARS 37 + +void print_letters(float *pred, int n) +{ + int i; + for(i = 0; i < n; ++i){ + int index = max_index(pred+i*NUMCHARS, NUMCHARS); + printf("%c", int_to_alphanum(index)); + } + printf("\n"); +} + +void fill_truth_captcha(char *path, int n, float *truth) +{ + char *begin = strrchr(path, '/'); + ++begin; + int i; + for(i = 0; i < strlen(begin) && i < n && begin[i] != '.'; ++i){ + int index = alphanum_to_int(begin[i]); + if(index > 35) printf("Bad %c\n", begin[i]); + truth[i*NUMCHARS+index] = 1; + } + for(;i < n; ++i){ + truth[i*NUMCHARS + NUMCHARS-1] = 1; + } +} + +data load_data_captcha(char **paths, int n, int m, int k, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = make_matrix(n, k*NUMCHARS); + int i; + for(i = 0; i < n; ++i){ + fill_truth_captcha(paths[i], k, d.y.vals[i]); + } + if(m) free(paths); + return d; +} + +data load_data_captcha_encode(char **paths, int n, int m, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.X.cols = 17100; + d.y = d.X; + if(m) free(paths); + return d; +} + +void fill_truth(char *path, char **labels, int k, float *truth) +{ + int i; + memset(truth, 0, k*sizeof(float)); + int count = 0; + for(i = 0; i < k; ++i){ + if(strstr(path, labels[i])){ + truth[i] = 1; + ++count; + //printf("%s %s %d\n", path, labels[i], i); + } + } + if(count != 1 && (k != 1 || count != 0)) printf("Too many or too few labels: %d, %s\n", count, path); +} + +void fill_hierarchy(float *truth, int k, tree *hierarchy) +{ + int j; + for(j = 0; j < k; ++j){ + if(truth[j]){ + int parent = hierarchy->parent[j]; + while(parent >= 0){ + truth[parent] = 1; + parent = hierarchy->parent[parent]; + } + } + } + int i; + int count = 0; + for(j = 0; j < hierarchy->groups; ++j){ + //printf("%d\n", count); + int mask = 1; + for(i = 0; i < hierarchy->group_size[j]; ++i){ + if(truth[count + i]){ + mask = 0; + break; + } + } + if (mask) { + for(i = 0; i < hierarchy->group_size[j]; ++i){ + truth[count + i] = SECRET_NUM; + } + } + count += hierarchy->group_size[j]; + } +} + +matrix load_regression_labels_paths(char **paths, int n, int k) +{ + matrix y = make_matrix(n, k); + int i,j; + for(i = 0; i < n; ++i){ + char labelpath[4096]; + find_replace(paths[i], "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".BMP", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPeG", ".txt", labelpath); + find_replace(labelpath, ".Jpeg", ".txt", labelpath); + find_replace(labelpath, ".PNG", ".txt", labelpath); + find_replace(labelpath, ".TIF", ".txt", labelpath); + find_replace(labelpath, ".bmp", ".txt", labelpath); + find_replace(labelpath, ".jpeg", ".txt", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".tif", ".txt", labelpath); + + FILE *file = fopen(labelpath, "r"); + for(j = 0; j < k; ++j){ + fscanf(file, "%f", &(y.vals[i][j])); + } + fclose(file); + } + return y; +} + +matrix load_labels_paths(char **paths, int n, char **labels, int k, tree *hierarchy) +{ + matrix y = make_matrix(n, k); + int i; + for(i = 0; i < n && labels; ++i){ + fill_truth(paths[i], labels, k, y.vals[i]); + if(hierarchy){ + fill_hierarchy(y.vals[i], k, hierarchy); + } + } + return y; +} + +matrix load_tags_paths(char **paths, int n, int k) +{ + matrix y = make_matrix(n, k); + int i; + //int count = 0; + for(i = 0; i < n; ++i){ + char label[4096]; + find_replace(paths[i], "images", "labels", label); + find_replace(label, ".jpg", ".txt", label); + FILE *file = fopen(label, "r"); + if (!file) continue; + //++count; + int tag; + while(fscanf(file, "%d", &tag) == 1){ + if(tag < k){ + y.vals[i][tag] = 1; + } + } + fclose(file); + } + //printf("%d/%d\n", count, n); + return y; +} + +char **get_labels(char *filename) +{ + list *plist = get_paths(filename); + char **labels = (char **)list_to_array(plist); + free_list(plist); + return labels; +} + +void free_data(data d) +{ + if(!d.shallow){ + free_matrix(d.X); + free_matrix(d.y); + }else{ + free(d.X.vals); + free(d.y.vals); + } +} + +image get_segmentation_image(char *path, int w, int h, int classes) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + image mask = make_image(w, h, classes); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + image part = make_image(w, h, 1); + while(fscanf(file, "%d %s", &id, buff) == 2){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + or_image(part, mask, id); + free(rle); + } + //exclusive_image(mask); + fclose(file); + free_image(part); + return mask; +} + +image get_segmentation_image2(char *path, int w, int h, int classes) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + image mask = make_image(w, h, classes+1); + int i; + for(i = 0; i < w*h; ++i){ + mask.data[w*h*classes + i] = 1; + } + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + image part = make_image(w, h, 1); + while(fscanf(file, "%d %s", &id, buff) == 2){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + or_image(part, mask, id); + for(i = 0; i < w*h; ++i){ + if(part.data[i]) mask.data[w*h*classes + i] = 0; + } + free(rle); + } + //exclusive_image(mask); + fclose(file); + free_image(part); + return mask; +} + +data load_data_seg(int n, char **paths, int m, int w, int h, int classes, int min, int max, float angle, float aspect, float hue, float saturation, float exposure, int div) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + + d.y.rows = n; + d.y.cols = h*w*classes/div/div; + d.y.vals = calloc(d.X.rows, sizeof(float*)); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + + image mask = get_segmentation_image(random_paths[i], orig.w, orig.h, classes); + //image mask = make_image(orig.w, orig.h, classes+1); + image sized_m = rotate_crop_image(mask, a.rad, a.scale/div, a.w/div, a.h/div, a.dx/div, a.dy/div, a.aspect); + + if(flip) flip_image(sized_m); + d.y.vals[i] = sized_m.data; + + free_image(orig); + free_image(mask); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_iseg(int n, char **paths, int m, int w, int h, int classes, int boxes, int div, int min, int max, float angle, float aspect, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, (((w/div)*(h/div))+1)*boxes); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + //show_image(sized, "image"); + + fill_truth_iseg(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, w/div, h/div); + + free_image(orig); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_mask(int n, char **paths, int m, int w, int h, int classes, int boxes, int coords, int min, int max, float angle, float aspect, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, (coords+1)*boxes); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + //show_image(sized, "image"); + + fill_truth_mask(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, 14, 14); + + free_image(orig); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + + int k = size*size*(5+classes); + d.y = make_matrix(n, k); + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + + int oh = orig.h; + int ow = orig.w; + + int dw = (ow*jitter); + int dh = (oh*jitter); + + int pleft = rand_uniform(-dw, dw); + int pright = rand_uniform(-dw, dw); + int ptop = rand_uniform(-dh, dh); + int pbot = rand_uniform(-dh, dh); + + int swidth = ow - pleft - pright; + int sheight = oh - ptop - pbot; + + float sx = (float)swidth / ow; + float sy = (float)sheight / oh; + + int flip = rand()%2; + image cropped = crop_image(orig, pleft, ptop, swidth, sheight); + + float dx = ((float)pleft/ow)/sx; + float dy = ((float)ptop /oh)/sy; + + image sized = resize_image(cropped, w, h); + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + + fill_truth_region(random_paths[i], d.y.vals[i], classes, size, flip, dx, dy, 1./sx, 1./sy); + + free_image(orig); + free_image(cropped); + } + free(random_paths); + return d; +} + +data load_data_compare(int n, char **paths, int m, int classes, int w, int h) +{ + if(m) paths = get_random_paths(paths, 2*n, m); + int i,j; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*6; + + int k = 2*(classes); + d.y = make_matrix(n, k); + for(i = 0; i < n; ++i){ + image im1 = load_image_color(paths[i*2], w, h); + image im2 = load_image_color(paths[i*2+1], w, h); + + d.X.vals[i] = calloc(d.X.cols, sizeof(float)); + memcpy(d.X.vals[i], im1.data, h*w*3*sizeof(float)); + memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float)); + + int id; + float iou; + + char imlabel1[4096]; + char imlabel2[4096]; + find_replace(paths[i*2], "imgs", "labels", imlabel1); + find_replace(imlabel1, "jpg", "txt", imlabel1); + FILE *fp1 = fopen(imlabel1, "r"); + + while(fscanf(fp1, "%d %f", &id, &iou) == 2){ + if (d.y.vals[i][2*id] < iou) d.y.vals[i][2*id] = iou; + } + + find_replace(paths[i*2+1], "imgs", "labels", imlabel2); + find_replace(imlabel2, "jpg", "txt", imlabel2); + FILE *fp2 = fopen(imlabel2, "r"); + + while(fscanf(fp2, "%d %f", &id, &iou) == 2){ + if (d.y.vals[i][2*id + 1] < iou) d.y.vals[i][2*id + 1] = iou; + } + + for (j = 0; j < classes; ++j){ + if (d.y.vals[i][2*j] > .5 && d.y.vals[i][2*j+1] < .5){ + d.y.vals[i][2*j] = 1; + d.y.vals[i][2*j+1] = 0; + } else if (d.y.vals[i][2*j] < .5 && d.y.vals[i][2*j+1] > .5){ + d.y.vals[i][2*j] = 0; + d.y.vals[i][2*j+1] = 1; + } else { + d.y.vals[i][2*j] = SECRET_NUM; + d.y.vals[i][2*j+1] = SECRET_NUM; + } + } + fclose(fp1); + fclose(fp2); + + free_image(im1); + free_image(im2); + } + if(m) free(paths); + return d; +} + +data load_data_swag(char **paths, int n, int classes, float jitter) +{ + int index = rand()%n; + char *random_path = paths[index]; + + image orig = load_image_color(random_path, 0, 0); + int h = orig.h; + int w = orig.w; + + data d = {0}; + d.shallow = 0; + d.w = w; + d.h = h; + + d.X.rows = 1; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + int k = (4+classes)*90; + d.y = make_matrix(1, k); + + int dw = w*jitter; + int dh = h*jitter; + + int pleft = rand_uniform(-dw, dw); + int pright = rand_uniform(-dw, dw); + int ptop = rand_uniform(-dh, dh); + int pbot = rand_uniform(-dh, dh); + + int swidth = w - pleft - pright; + int sheight = h - ptop - pbot; + + float sx = (float)swidth / w; + float sy = (float)sheight / h; + + int flip = rand()%2; + image cropped = crop_image(orig, pleft, ptop, swidth, sheight); + + float dx = ((float)pleft/w)/sx; + float dy = ((float)ptop /h)/sy; + + image sized = resize_image(cropped, w, h); + if(flip) flip_image(sized); + d.X.vals[0] = sized.data; + + fill_truth_swag(random_path, d.y.vals[0], classes, flip, dx, dy, 1./sx, 1./sy); + + free_image(orig); + free_image(cropped); + + return d; +} + +data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, 5*boxes); + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + image sized = make_image(w, h, orig.c); + fill_image(sized, .5); + + float dw = jitter * orig.w; + float dh = jitter * orig.h; + + float new_ar = (orig.w + rand_uniform(-dw, dw)) / (orig.h + rand_uniform(-dh, dh)); + //float scale = rand_uniform(.25, 2); + float scale = 1; + + float nw, nh; + + if(new_ar < 1){ + nh = scale * h; + nw = nh * new_ar; + } else { + nw = scale * w; + nh = nw / new_ar; + } + + float dx = rand_uniform(0, w - nw); + float dy = rand_uniform(0, h - nh); + + place_image(orig, nw, nh, dx, dy, sized); + + random_distort_image(sized, hue, saturation, exposure); + + int flip = rand()%2; + if(flip) flip_image(sized); + d.X.vals[i] = sized.data; + + + fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, -dx/w, -dy/h, nw/w, nh/h); + + free_image(orig); + } + free(random_paths); + return d; +} + +void *load_thread(void *ptr) +{ + //printf("Loading data: %d\n", rand()); + load_args a = *(struct load_args*)ptr; + if(a.exposure == 0) a.exposure = 1; + if(a.saturation == 0) a.saturation = 1; + if(a.aspect == 0) a.aspect = 1; + + if (a.type == OLD_CLASSIFICATION_DATA){ + *a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h); + } else if (a.type == REGRESSION_DATA){ + *a.d = load_data_regression(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == CLASSIFICATION_DATA){ + *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.center); + } else if (a.type == SUPER_DATA){ + *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale); + } else if (a.type == WRITING_DATA){ + *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h); + } else if (a.type == ISEG_DATA){ + *a.d = load_data_iseg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.scale, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == INSTANCE_DATA){ + *a.d = load_data_mask(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.coords, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == SEGMENTATION_DATA){ + *a.d = load_data_seg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.scale); + } else if (a.type == REGION_DATA){ + *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); + } else if (a.type == DETECTION_DATA){ + *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); + } else if (a.type == SWAG_DATA){ + *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter); + } else if (a.type == COMPARE_DATA){ + *a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h); + } else if (a.type == IMAGE_DATA){ + *(a.im) = load_image_color(a.path, 0, 0); + *(a.resized) = resize_image(*(a.im), a.w, a.h); + } else if (a.type == LETTERBOX_DATA){ + *(a.im) = load_image_color(a.path, 0, 0); + *(a.resized) = letterbox_image(*(a.im), a.w, a.h); + } else if (a.type == TAG_DATA){ + *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } + free(ptr); + return 0; +} + +pthread_t load_data_in_thread(load_args args) +{ + pthread_t thread; + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + if(pthread_create(&thread, 0, load_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void *load_threads(void *ptr) +{ + int i; + load_args args = *(load_args *)ptr; + if (args.threads == 0) args.threads = 1; + data *out = args.d; + int total = args.n; + free(ptr); + data *buffers = calloc(args.threads, sizeof(data)); + pthread_t *threads = calloc(args.threads, sizeof(pthread_t)); + for(i = 0; i < args.threads; ++i){ + args.d = buffers + i; + args.n = (i+1) * total/args.threads - i * total/args.threads; + threads[i] = load_data_in_thread(args); + } + for(i = 0; i < args.threads; ++i){ + pthread_join(threads[i], 0); + } + *out = concat_datas(buffers, args.threads); + out->shallow = 0; + for(i = 0; i < args.threads; ++i){ + buffers[i].shallow = 1; + free_data(buffers[i]); + } + free(buffers); + free(threads); + return 0; +} + +void load_data_blocking(load_args args) +{ + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + load_thread(ptr); +} + +pthread_t load_data(load_args args) +{ + pthread_t thread; + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed"); + return thread; +} + +data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h) +{ + if(m) paths = get_random_paths(paths, n, m); + char **replace_paths = find_replace_paths(paths, n, ".png", "-label.png"); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = load_image_paths_gray(replace_paths, n, out_w, out_h); + if(m) free(paths); + int i; + for(i = 0; i < n; ++i) free(replace_paths[i]); + free(replace_paths); + return d; +} + +data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = load_labels_paths(paths, n, labels, k, 0); + if(m) free(paths); + return d; +} + +/* + data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) + { + data d = {0}; + d.indexes = calloc(n, sizeof(int)); + if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes); + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure); + d.y = load_labels_paths(paths, n, labels, k); + if(m) free(paths); + return d; + } + */ + +data load_data_super(char **paths, int n, int m, int w, int h, int scale) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + + int i; + d.X.rows = n; + d.X.vals = calloc(n, sizeof(float*)); + d.X.cols = w*h*3; + + d.y.rows = n; + d.y.vals = calloc(n, sizeof(float*)); + d.y.cols = w*scale * h*scale * 3; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], 0, 0); + image crop = random_crop_image(im, w*scale, h*scale); + int flip = rand()%2; + if (flip) flip_image(crop); + image resize = resize_image(crop, w, h); + d.X.vals[i] = resize.data; + d.y.vals[i] = crop.data; + free_image(im); + } + + if(m) free(paths); + return d; +} + +data load_data_regression(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, 0); + d.y = load_regression_labels_paths(paths, n, k); + if(m) free(paths); + return d; +} + +data select_data(data *orig, int *inds) +{ + data d = {0}; + d.shallow = 1; + d.w = orig[0].w; + d.h = orig[0].h; + + d.X.rows = orig[0].X.rows; + d.y.rows = orig[0].X.rows; + + d.X.cols = orig[0].X.cols; + d.y.cols = orig[0].y.cols; + + d.X.vals = calloc(orig[0].X.rows, sizeof(float *)); + d.y.vals = calloc(orig[0].y.rows, sizeof(float *)); + int i; + for(i = 0; i < d.X.rows; ++i){ + d.X.vals[i] = orig[inds[i]].X.vals[i]; + d.y.vals[i] = orig[inds[i]].y.vals[i]; + } + return d; +} + +data *tile_data(data orig, int divs, int size) +{ + data *ds = calloc(divs*divs, sizeof(data)); + int i, j; +#pragma omp parallel for + for(i = 0; i < divs*divs; ++i){ + data d; + d.shallow = 0; + d.w = orig.w/divs * size; + d.h = orig.h/divs * size; + d.X.rows = orig.X.rows; + d.X.cols = d.w*d.h*3; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + + d.y = copy_matrix(orig.y); +#pragma omp parallel for + for(j = 0; j < orig.X.rows; ++j){ + int x = (i%divs) * orig.w / divs - (d.w - orig.w/divs)/2; + int y = (i/divs) * orig.h / divs - (d.h - orig.h/divs)/2; + image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[j]); + d.X.vals[j] = crop_image(im, x, y, d.w, d.h).data; + } + ds[i] = d; + } + return ds; +} + +data resize_data(data orig, int w, int h) +{ + data d = {0}; + d.shallow = 0; + d.w = w; + d.h = h; + int i; + d.X.rows = orig.X.rows; + d.X.cols = w*h*3; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + + d.y = copy_matrix(orig.y); +#pragma omp parallel for + for(i = 0; i < orig.X.rows; ++i){ + image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[i]); + d.X.vals[i] = resize_image(im, w, h).data; + } + return d; +} + +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.w=size; + d.h=size; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, center); + d.y = load_labels_paths(paths, n, labels, k, hierarchy); + if(m) free(paths); + return d; +} + +data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.w = size; + d.h = size; + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, 0); + d.y = load_tags_paths(paths, n, k); + if(m) free(paths); + return d; +} + +matrix concat_matrix(matrix m1, matrix m2) +{ + int i, count = 0; + matrix m; + m.cols = m1.cols; + m.rows = m1.rows+m2.rows; + m.vals = calloc(m1.rows + m2.rows, sizeof(float*)); + for(i = 0; i < m1.rows; ++i){ + m.vals[count++] = m1.vals[i]; + } + for(i = 0; i < m2.rows; ++i){ + m.vals[count++] = m2.vals[i]; + } + return m; +} + +data concat_data(data d1, data d2) +{ + data d = {0}; + d.shallow = 1; + d.X = concat_matrix(d1.X, d2.X); + d.y = concat_matrix(d1.y, d2.y); + d.w = d1.w; + d.h = d1.h; + return d; +} + +data concat_datas(data *d, int n) +{ + int i; + data out = {0}; + for(i = 0; i < n; ++i){ + data new_data = concat_data(d[i], out); + free_data(out); + out = new_data; + } + return out; +} + +data load_categorical_data_csv(char *filename, int target, int k) +{ + data d = {0}; + d.shallow = 0; + matrix X = csv_to_matrix(filename); + float *truth_1d = pop_column(&X, target); + float **truth = one_hot_encode(truth_1d, X.rows, k); + matrix y; + y.rows = X.rows; + y.cols = k; + y.vals = truth; + d.X = X; + d.y = y; + free(truth_1d); + return d; +} + +data load_cifar10_data(char *filename) +{ + data d = {0}; + d.shallow = 0; + long i,j; + matrix X = make_matrix(10000, 3072); + matrix y = make_matrix(10000, 10); + d.X = X; + d.y = y; + + FILE *fp = fopen(filename, "rb"); + if(!fp) file_error(filename); + for(i = 0; i < 10000; ++i){ + unsigned char bytes[3073]; + fread(bytes, 1, 3073, fp); + int class_ = bytes[0]; + y.vals[i][class_] = 1; + for(j = 0; j < X.cols; ++j){ + X.vals[i][j] = (double)bytes[j+1]; + } + } + scale_data_rows(d, 1./255); + //normalize_data_rows(d); + fclose(fp); + return d; +} + +void get_random_batch(data d, int n, float *X, float *y) +{ + int j; + for(j = 0; j < n; ++j){ + int index = rand()%d.X.rows; + memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float)); + memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float)); + } +} + +void get_next_batch(data d, int n, int offset, float *X, float *y) +{ + int j; + for(j = 0; j < n; ++j){ + int index = offset + j; + memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float)); + if(y) memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float)); + } +} + +void smooth_data(data d) +{ + int i, j; + float scale = 1. / d.y.cols; + float eps = .1; + for(i = 0; i < d.y.rows; ++i){ + for(j = 0; j < d.y.cols; ++j){ + d.y.vals[i][j] = eps * scale + (1-eps) * d.y.vals[i][j]; + } + } +} + +data load_all_cifar10() +{ + data d = {0}; + d.shallow = 0; + int i,j,b; + matrix X = make_matrix(50000, 3072); + matrix y = make_matrix(50000, 10); + d.X = X; + d.y = y; + + + for(b = 0; b < 5; ++b){ + char buff[256]; + sprintf(buff, "data/cifar/cifar-10-batches-bin/data_batch_%d.bin", b+1); + FILE *fp = fopen(buff, "rb"); + if(!fp) file_error(buff); + for(i = 0; i < 10000; ++i){ + unsigned char bytes[3073]; + fread(bytes, 1, 3073, fp); + int class_ = bytes[0]; + y.vals[i+b*10000][class_] = 1; + for(j = 0; j < X.cols; ++j){ + X.vals[i+b*10000][j] = (double)bytes[j+1]; + } + } + fclose(fp); + } + //normalize_data_rows(d); + scale_data_rows(d, 1./255); + smooth_data(d); + return d; +} + +data load_go(char *filename) +{ + FILE *fp = fopen(filename, "rb"); + matrix X = make_matrix(3363059, 361); + matrix y = make_matrix(3363059, 361); + int row, col; + + if(!fp) file_error(filename); + char *label; + int count = 0; + while((label = fgetl(fp))){ + int i; + if(count == X.rows){ + X = resize_matrix(X, count*2); + y = resize_matrix(y, count*2); + } + sscanf(label, "%d %d", &row, &col); + char *board = fgetl(fp); + + int index = row*19 + col; + y.vals[count][index] = 1; + + for(i = 0; i < 19*19; ++i){ + float val = 0; + if(board[i] == '1') val = 1; + else if(board[i] == '2') val = -1; + X.vals[count][i] = val; + } + ++count; + free(label); + free(board); + } + X = resize_matrix(X, count); + y = resize_matrix(y, count); + + data d = {0}; + d.shallow = 0; + d.X = X; + d.y = y; + + + fclose(fp); + + return d; +} + + +void randomize_data(data d) +{ + int i; + for(i = d.X.rows-1; i > 0; --i){ + int index = rand()%i; + float *swap = d.X.vals[index]; + d.X.vals[index] = d.X.vals[i]; + d.X.vals[i] = swap; + + swap = d.y.vals[index]; + d.y.vals[index] = d.y.vals[i]; + d.y.vals[i] = swap; + } +} + +void scale_data_rows(data d, float s) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + scale_array(d.X.vals[i], d.X.cols, s); + } +} + +void translate_data_rows(data d, float s) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + translate_array(d.X.vals[i], d.X.cols, s); + } +} + +data copy_data(data d) +{ + data c = {0}; + c.w = d.w; + c.h = d.h; + c.shallow = 0; + c.num_boxes = d.num_boxes; + c.boxes = d.boxes; + c.X = copy_matrix(d.X); + c.y = copy_matrix(d.y); + return c; +} + +void normalize_data_rows(data d) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + normalize_array(d.X.vals[i], d.X.cols); + } +} + +data get_data_part(data d, int part, int total) +{ + data p = {0}; + p.shallow = 1; + p.X.rows = d.X.rows * (part + 1) / total - d.X.rows * part / total; + p.y.rows = d.y.rows * (part + 1) / total - d.y.rows * part / total; + p.X.cols = d.X.cols; + p.y.cols = d.y.cols; + p.X.vals = d.X.vals + d.X.rows * part / total; + p.y.vals = d.y.vals + d.y.rows * part / total; + return p; +} + +data get_random_data(data d, int num) +{ + data r = {0}; + r.shallow = 1; + + r.X.rows = num; + r.y.rows = num; + + r.X.cols = d.X.cols; + r.y.cols = d.y.cols; + + r.X.vals = calloc(num, sizeof(float *)); + r.y.vals = calloc(num, sizeof(float *)); + + int i; + for(i = 0; i < num; ++i){ + int index = rand()%d.X.rows; + r.X.vals[i] = d.X.vals[index]; + r.y.vals[i] = d.y.vals[index]; + } + return r; +} + +data *split_data(data d, int part, int total) +{ + data *split = calloc(2, sizeof(data)); + int i; + int start = part*d.X.rows/total; + int end = (part+1)*d.X.rows/total; + data train; + data test; + train.shallow = test.shallow = 1; + + test.X.rows = test.y.rows = end-start; + train.X.rows = train.y.rows = d.X.rows - (end-start); + train.X.cols = test.X.cols = d.X.cols; + train.y.cols = test.y.cols = d.y.cols; + + train.X.vals = calloc(train.X.rows, sizeof(float*)); + test.X.vals = calloc(test.X.rows, sizeof(float*)); + train.y.vals = calloc(train.y.rows, sizeof(float*)); + test.y.vals = calloc(test.y.rows, sizeof(float*)); + + for(i = 0; i < start; ++i){ + train.X.vals[i] = d.X.vals[i]; + train.y.vals[i] = d.y.vals[i]; + } + for(i = start; i < end; ++i){ + test.X.vals[i-start] = d.X.vals[i]; + test.y.vals[i-start] = d.y.vals[i]; + } + for(i = end; i < d.X.rows; ++i){ + train.X.vals[i-(end-start)] = d.X.vals[i]; + train.y.vals[i-(end-start)] = d.y.vals[i]; + } + split[0] = train; + split[1] = test; + return split; +} + diff --git a/workloads/realworld/pipeline/darknet/src/data.h b/workloads/realworld/pipeline/darknet/src/data.h new file mode 100644 index 0000000000000000000000000000000000000000..781906f8743c7d88c0fa134403d0ae020b544053 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/data.h @@ -0,0 +1,50 @@ +#ifndef DATA_H +#define DATA_H +#include + +#include "darknet.h" +#include "matrix.h" +#include "list.h" +#include "image.h" +#include "tree.h" + +static inline float distance_from_edge(int x, int max) +{ + int dx = (max/2) - x; + if (dx < 0) dx = -dx; + dx = (max/2) + 1 - dx; + dx *= 2; + float dist = (float)dx/max; + if (dist > 1) dist = 1; + return dist; +} +void load_data_blocking(load_args args); + + +void print_letters(float *pred, int n); +data load_data_captcha(char **paths, int n, int m, int k, int w, int h); +data load_data_captcha_encode(char **paths, int n, int m, int w, int h); +data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure); +data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); +matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); +data load_data_super(char **paths, int n, int m, int w, int h, int scale); +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); +data load_data_regression(char **paths, int n, int m, int classes, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); +data load_go(char *filename); + + +data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h); + +void get_random_batch(data d, int n, float *X, float *y); +data get_data_part(data d, int part, int total); +data get_random_data(data d, int num); +data load_categorical_data_csv(char *filename, int target, int k); +void normalize_data_rows(data d); +void scale_data_rows(data d, float s); +void translate_data_rows(data d, float s); +void randomize_data(data d); +data *split_data(data d, int part, int total); +data concat_datas(data *d, int n); +void fill_truth(char *path, char **labels, int k, float *truth); + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/deconvolutional_kernels.cu b/workloads/realworld/pipeline/darknet/src/deconvolutional_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..ed274624f995248eff746480df0b6c044287e3da --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/deconvolutional_kernels.cu @@ -0,0 +1,139 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "convolutional_layer.h" +#include "deconvolutional_layer.h" +#include "batchnorm_layer.h" +#include "gemm.h" +#include "blas.h" +#include "im2col.h" +#include "col2im.h" +#include "utils.h" +#include "cuda_dark.h" +} + +extern "C" void forward_deconvolutional_layer_gpu(layer *l, network net) +{ + int i; + + int m = l->size*l->size*l->n; + int n = l->h*l->w; + int k = l->c; + + fill_gpu(l->outputs*l->batch, 0, l->output_gpu, 1); + + for(i = 0; i < l->batch; ++i){ + float *a = l->weights_gpu; + float *b = net.input_gpu + i*l->c*l->h*l->w; + float *c = net.workspace; + + gemm_gpu(1,0,m,n,k,1,a,m,b,n,0,c,n,net.streams[l->stream_index]); + + col2im_gpu(net.workspace, l->out_c, l->out_h, l->out_w, l->size, l->stride, l->pad, l->output_gpu+i*l->outputs); + } + if (l->batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l->output_gpu, l->biases_gpu, l->batch, l->n, l->out_w*l->out_h, net.streams[l->stream_index]); + } + activate_array_gpu(l->output_gpu, l->batch*l->n*l->out_w*l->out_h, l->activation, net.streams[l->stream_index]); +} + +extern "C" void backward_deconvolutional_layer_gpu(layer *l, network net) +{ + int i; + + //constrain_gpu(l->outputs*l->batch, 1, l->delta_gpu, 1); + gradient_array_gpu(l->output_gpu, l->outputs*l->batch, l->activation, l->delta_gpu); + + if(l->batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l->bias_updates_gpu, l->delta_gpu, l->batch, l->n, l->out_w*l->out_h); + } + + //if(net.delta_gpu) memset(net.delta_gpu, 0, l->batch*l->h*l->w*l->c*sizeof(float)); + + for(i = 0; i < l->batch; ++i){ + int m = l->c; + int n = l->size*l->size*l->n; + int k = l->h*l->w; + + float *a = net.input_gpu + i*m*k; + float *b = net.workspace; + float *c = l->weight_updates_gpu; + + im2col_gpu(l->delta_gpu + i*l->outputs, l->out_c, l->out_h, l->out_w, + l->size, l->stride, l->pad, b, net.streams[l->stream_index]); + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n,net.streams[l->stream_index]); + + if(net.delta_gpu){ + int m = l->c; + int n = l->h*l->w; + int k = l->size*l->size*l->n; + + float *a = l->weights_gpu; + float *b = net.workspace; + float *c = net.delta_gpu + i*n*m; + + gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n,net.streams[l->stream_index]); + } + } +} + +extern "C" void pull_deconvolutional_layer(layer *l) +{ + cuda_pull_array(l->weights_gpu, l->weights, l->c*l->n*l->size*l->size); + cuda_pull_array(l->biases_gpu, l->biases, l->n); + cuda_pull_array(l->weight_updates_gpu, l->weight_updates, l->c*l->n*l->size*l->size); + cuda_pull_array(l->bias_updates_gpu, l->bias_updates, l->n); + if (l->batch_normalize){ + cuda_pull_array(l->scales_gpu, l->scales, l->n); + cuda_pull_array(l->rolling_mean_gpu, l->rolling_mean, l->n); + cuda_pull_array(l->rolling_variance_gpu, l->rolling_variance, l->n); + } +} + +extern "C" void push_deconvolutional_layer(layer *l) +{ + cuda_push_array(l->weights_gpu, l->weights, l->c*l->n*l->size*l->size); + cuda_push_array(l->biases_gpu, l->biases, l->n); + cuda_push_array(l->weight_updates_gpu, l->weight_updates, l->c*l->n*l->size*l->size); + cuda_push_array(l->bias_updates_gpu, l->bias_updates, l->n); + if (l->batch_normalize){ + cuda_push_array(l->scales_gpu, l->scales, l->n); + cuda_push_array(l->rolling_mean_gpu, l->rolling_mean, l->n); + cuda_push_array(l->rolling_variance_gpu, l->rolling_variance, l->n); + } +} + +void update_deconvolutional_layer_gpu(layer *l, update_args a, network net) +{ + float learning_rate = a.learning_rate*l->learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + if(a.adam){ + adam_update_gpu(l->weights_gpu, l->weight_updates_gpu, l->m_gpu, l->v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l->nweights, batch, a.t, net.streams[l->stream_index]); + adam_update_gpu(l->biases_gpu, l->bias_updates_gpu, l->bias_m_gpu, l->bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l->n, batch, a.t, net.streams[l->stream_index]); + if(l->scales_gpu){ + adam_update_gpu(l->scales_gpu, l->scale_updates_gpu, l->scale_m_gpu, l->scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l->n, batch, a.t, net.streams[l->stream_index]); + } + }else{ + axpy_gpu(l->nweights, -decay*batch, l->weights_gpu, 1, l->weight_updates_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->nweights, learning_rate/batch, l->weight_updates_gpu, 1, l->weights_gpu, 1, net.streams[l->stream_index]); + scal_gpu(l->nweights, momentum, l->weight_updates_gpu, 1, net.streams[l->stream_index]); + + axpy_gpu(l->n, learning_rate/batch, l->bias_updates_gpu, 1, l->biases_gpu, 1, net.streams[l->stream_index]); + scal_gpu(l->n, momentum, l->bias_updates_gpu, 1, net.streams[l->stream_index]); + + if(l->scales_gpu){ + axpy_gpu(l->n, learning_rate/batch, l->scale_updates_gpu, 1, l->scales_gpu, 1, net.streams[l->stream_index]); + scal_gpu(l->n, momentum, l->scale_updates_gpu, 1, net.streams[l->stream_index]); + } + } +} + diff --git a/workloads/realworld/pipeline/darknet/src/deconvolutional_layer.c b/workloads/realworld/pipeline/darknet/src/deconvolutional_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..c1c7e65110db9d79299f2fe063971e236aee1329 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/deconvolutional_layer.c @@ -0,0 +1,312 @@ +#include "deconvolutional_layer.h" +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "utils.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" + +#include +#include + + +static size_t get_workspace_size(layer *l){ + return (size_t)l->h*l->w*l->size*l->size*l->n*sizeof(float); +} + +void bilinear_init(layer *l) +{ + int i,j,f; + float center = (l->size-1) / 2.; + for(f = 0; f < l->n; ++f){ + for(j = 0; j < l->size; ++j){ + for(i = 0; i < l->size; ++i){ + float val = (1 - fabs(i - center)) * (1 - fabs(j - center)); + int c = f%l->c; + int ind = f*l->size*l->size*l->c + c*l->size*l->size + j*l->size + i; + l->weights[ind] = val; + } + } + } +} + + +layer* make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam) +{ + int i; + layer *l = calloc(1, sizeof(layer)); + l->type = DECONVOLUTIONAL; + + l->h = h; + l->w = w; + l->c = c; + l->n = n; + l->batch = batch; + l->stride = stride; + l->size = size; + + l->nweights = c*n*size*size; + l->nbiases = n; + + l->weights = calloc(c*n*size*size, sizeof(float)); + l->weight_updates = calloc(c*n*size*size, sizeof(float)); + + l->biases = calloc(n, sizeof(float)); + l->bias_updates = calloc(n, sizeof(float)); + //float scale = n/(size*size*c); + //printf("scale: %f\n", scale); + float scale = .02; + for(i = 0; i < c*n*size*size; ++i) l->weights[i] = scale*rand_normal(); + //bilinear_init(l); + for(i = 0; i < n; ++i){ + l->biases[i] = 0; + } + l->pad = padding; + + l->out_h = (l->h - 1) * l->stride + l->size - 2*l->pad; + l->out_w = (l->w - 1) * l->stride + l->size - 2*l->pad; + l->out_c = n; + l->outputs = l->out_w * l->out_h * l->out_c; + l->inputs = l->w * l->h * l->c; + + scal_cpu(l->nweights, (float)l->out_w*l->out_h/(l->w*l->h), l->weights, 1); + + l->output = calloc(l->batch*l->outputs, sizeof(float)); + l->delta = calloc(l->batch*l->outputs, sizeof(float)); + + l->forward = forward_deconvolutional_layer; + l->backward = backward_deconvolutional_layer; + l->update = update_deconvolutional_layer; + + l->batch_normalize = batch_normalize; + + if(batch_normalize){ + l->scales = calloc(n, sizeof(float)); + l->scale_updates = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + l->scales[i] = 1; + } + + l->mean = calloc(n, sizeof(float)); + l->variance = calloc(n, sizeof(float)); + + l->mean_delta = calloc(n, sizeof(float)); + l->variance_delta = calloc(n, sizeof(float)); + + l->rolling_mean = calloc(n, sizeof(float)); + l->rolling_variance = calloc(n, sizeof(float)); + l->x = calloc(l->batch*l->outputs, sizeof(float)); + l->x_norm = calloc(l->batch*l->outputs, sizeof(float)); + } + if(adam){ + l->m = calloc(c*n*size*size, sizeof(float)); + l->v = calloc(c*n*size*size, sizeof(float)); + l->bias_m = calloc(n, sizeof(float)); + l->scale_m = calloc(n, sizeof(float)); + l->bias_v = calloc(n, sizeof(float)); + l->scale_v = calloc(n, sizeof(float)); + } + +#ifdef GPU + l->forward_gpu = forward_deconvolutional_layer_gpu; + l->backward_gpu = backward_deconvolutional_layer_gpu; + l->update_gpu = update_deconvolutional_layer_gpu; + + if(gpu_index >= 0){ + + if (adam) { + l->m_gpu = cuda_make_array(l->m, c*n*size*size); + l->v_gpu = cuda_make_array(l->v, c*n*size*size); + l->bias_m_gpu = cuda_make_array(l->bias_m, n); + l->bias_v_gpu = cuda_make_array(l->bias_v, n); + l->scale_m_gpu = cuda_make_array(l->scale_m, n); + l->scale_v_gpu = cuda_make_array(l->scale_v, n); + } + l->weights_gpu = cuda_make_array(l->weights, c*n*size*size); + l->weight_updates_gpu = cuda_make_array(l->weight_updates, c*n*size*size); + + l->biases_gpu = cuda_make_array(l->biases, n); + l->bias_updates_gpu = cuda_make_array(l->bias_updates, n); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->out_h*l->out_w*n); + l->output_gpu = cuda_make_array(l->output, l->batch*l->out_h*l->out_w*n); + + if(batch_normalize){ + l->mean_gpu = cuda_make_array(0, n); + l->variance_gpu = cuda_make_array(0, n); + + l->rolling_mean_gpu = cuda_make_array(0, n); + l->rolling_variance_gpu = cuda_make_array(0, n); + + l->mean_delta_gpu = cuda_make_array(0, n); + l->variance_delta_gpu = cuda_make_array(0, n); + + l->scales_gpu = cuda_make_array(l->scales, n); + l->scale_updates_gpu = cuda_make_array(0, n); + + l->x_gpu = cuda_make_array(0, l->batch*l->out_h*l->out_w*n); + l->x_norm_gpu = cuda_make_array(0, l->batch*l->out_h*l->out_w*n); + } + } + #ifdef CUDNN + cudnnCreateTensorDescriptor(&l->dstTensorDesc); + cudnnCreateTensorDescriptor(&l->normTensorDesc); + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + #endif +#endif + + l->activation = activation; + l->workspace_size = get_workspace_size(l); + + fprintf(stderr, "deconv%5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l->out_w, l->out_h, l->out_c); + + return l; +} + +void denormalize_deconvolutional_layer(layer *l) +{ + int i, j; + for(i = 0; i < l->n; ++i){ + float scale = l->scales[i]/sqrt(l->rolling_variance[i] + .00001); + for(j = 0; j < l->c*l->size*l->size; ++j){ + l->weights[i*l->c*l->size*l->size + j] *= scale; + } + l->biases[i] -= l->rolling_mean[i] * scale; + l->scales[i] = 1; + l->rolling_mean[i] = 0; + l->rolling_variance[i] = 1; + } +} + +void resize_deconvolutional_layer(layer *l, int h, int w) +{ + l->h = h; + l->w = w; + l->out_h = (l->h - 1) * l->stride + l->size - 2*l->pad; + l->out_w = (l->w - 1) * l->stride + l->size - 2*l->pad; + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->w * l->h * l->c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + if(l->batch_normalize){ + l->x = realloc(l->x, l->batch*l->outputs*sizeof(float)); + l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float)); + } + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); + + if(l->batch_normalize){ + cuda_free(l->x_gpu); + cuda_free(l->x_norm_gpu); + + l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); + } + #ifdef CUDNN + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + #endif +#endif + l->workspace_size = get_workspace_size(l); +} + +void forward_deconvolutional_layer(layer *l, network net) +{ + int i; + + int m = l->size*l->size*l->n; + int n = l->h*l->w; + int k = l->c; + + fill_cpu(l->outputs*l->batch, 0, l->output, 1); + + for(i = 0; i < l->batch; ++i){ + float *a = l->weights; + float *b = net.input + i*l->c*l->h*l->w; + float *c = net.workspace; + + gemm_cpu(1,0,m,n,k,1,a,m,b,n,0,c,n); + + col2im_cpu(net.workspace, l->out_c, l->out_h, l->out_w, l->size, l->stride, l->pad, l->output+i*l->outputs); + } + if (l->batch_normalize) { + forward_batchnorm_layer(l, net); + } else { + add_bias(l->output, l->biases, l->batch, l->n, l->out_w*l->out_h); + } + activate_array(l->output, l->batch*l->n*l->out_w*l->out_h, l->activation); +} + +void backward_deconvolutional_layer(layer *l, network net) +{ + int i; + + gradient_array(l->output, l->outputs*l->batch, l->activation, l->delta); + + if(l->batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l->bias_updates, l->delta, l->batch, l->n, l->out_w*l->out_h); + } + + //if(net.delta) memset(net.delta, 0, l->batch*l->h*l->w*l->c*sizeof(float)); + + for(i = 0; i < l->batch; ++i){ + int m = l->c; + int n = l->size*l->size*l->n; + int k = l->h*l->w; + + float *a = net.input + i*m*k; + float *b = net.workspace; + float *c = l->weight_updates; + + im2col_cpu(l->delta + i*l->outputs, l->out_c, l->out_h, l->out_w, + l->size, l->stride, l->pad, b); + gemm_cpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if(net.delta){ + int m = l->c; + int n = l->h*l->w; + int k = l->size*l->size*l->n; + + float *a = l->weights; + float *b = net.workspace; + float *c = net.delta + i*n*m; + + gemm_cpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +} + +void update_deconvolutional_layer(layer *l, update_args a, network net) +{ + float learning_rate = a.learning_rate*l->learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int size = l->size*l->size*l->c*l->n; + axpy_cpu(l->n, learning_rate/batch, l->bias_updates, 1, l->biases, 1); + scal_cpu(l->n, momentum, l->bias_updates, 1); + + if(l->scales){ + axpy_cpu(l->n, learning_rate/batch, l->scale_updates, 1, l->scales, 1); + scal_cpu(l->n, momentum, l->scale_updates, 1); + } + + axpy_cpu(size, -decay*batch, l->weights, 1, l->weight_updates, 1); + axpy_cpu(size, learning_rate/batch, l->weight_updates, 1, l->weights, 1); + scal_cpu(size, momentum, l->weight_updates, 1); +} + + + diff --git a/workloads/realworld/pipeline/darknet/src/deconvolutional_layer.h b/workloads/realworld/pipeline/darknet/src/deconvolutional_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..b8a9c54ce3be21063d7d3a514437da7b43a09569 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/deconvolutional_layer.h @@ -0,0 +1,25 @@ +#ifndef DECONVOLUTIONAL_LAYER_H +#define DECONVOLUTIONAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +#ifdef GPU +void forward_deconvolutional_layer_gpu(layer *l, network net); +void backward_deconvolutional_layer_gpu(layer *l, network net); +void update_deconvolutional_layer_gpu(layer *l, update_args a, network net); +void push_deconvolutional_layer(layer *l); +void pull_deconvolutional_layer(layer *l); +#endif + +layer* make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam); +void resize_deconvolutional_layer(layer *l, int h, int w); +void forward_deconvolutional_layer(layer *l, network net); +void update_deconvolutional_layer(layer *l, update_args a, network net); +void backward_deconvolutional_layer(layer *l, network net); + +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/demo.c b/workloads/realworld/pipeline/darknet/src/demo.c new file mode 100644 index 0000000000000000000000000000000000000000..b89efb8dc4c044c0240b7442e39222405409a676 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/demo.c @@ -0,0 +1,349 @@ +#include "network.h" +#include "detection_layer.h" +#include "region_layer.h" +#include "cost_layer.h" +#include "utils.h" +#include "parser.h" +#include "box.h" +#include "image.h" +#include "demo.h" +#include + +#define DEMO 1 + +#ifdef OPENCV + +static char **demo_names; +static image **demo_alphabet; +static int demo_classes; + +static network *net; +static image buff [3]; +static image buff_letter[3]; +static int buff_index = 0; +static void * cap; +static float fps = 0; +static float demo_thresh = 0; +static float demo_hier = .5; +static int running = 0; + +static int demo_frame = 3; +static int demo_index = 0; +static float **predictions; +static float *avg; +static int demo_done = 0; +static int demo_total = 0; +double demo_time; + +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num); + +int size_network(network *net) +{ + int i; + int count = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + count += l.outputs; + } + } + return count; +} + +void remember_network(network *net) +{ + int i; + int count = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + memcpy(predictions[demo_index] + count, net->layers[i].output, sizeof(float) * l.outputs); + count += l.outputs; + } + } +} + +detection *avg_predictions(network *net, int *nboxes) +{ + int i, j; + int count = 0; + fill_cpu(demo_total, 0, avg, 1); + for(j = 0; j < demo_frame; ++j){ + axpy_cpu(demo_total, 1./demo_frame, predictions[j], 1, avg, 1); + } + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + memcpy(l.output, avg + count, sizeof(float) * l.outputs); + count += l.outputs; + } + } + detection *dets = get_network_boxes(net, buff[0].w, buff[0].h, demo_thresh, demo_hier, 0, 1, nboxes); + return dets; +} + +void *detect_in_thread(void *ptr) +{ + running = 1; + float nms = .4; + + layer l = net->layers[net->n-1]; + float *X = buff_letter[(buff_index+2)%3].data; + network_predict(net, X); + + /* + if(l.type == DETECTION){ + get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0); + } else */ + remember_network(net); + detection *dets = 0; + int nboxes = 0; + dets = avg_predictions(net, &nboxes); + + + /* + int i,j; + box zero = {0}; + int classes = l.classes; + for(i = 0; i < demo_detections; ++i){ + avg[i].objectness = 0; + avg[i].bbox = zero; + memset(avg[i].prob, 0, classes*sizeof(float)); + for(j = 0; j < demo_frame; ++j){ + axpy_cpu(classes, 1./demo_frame, dets[j][i].prob, 1, avg[i].prob, 1); + avg[i].objectness += dets[j][i].objectness * 1./demo_frame; + avg[i].bbox.x += dets[j][i].bbox.x * 1./demo_frame; + avg[i].bbox.y += dets[j][i].bbox.y * 1./demo_frame; + avg[i].bbox.w += dets[j][i].bbox.w * 1./demo_frame; + avg[i].bbox.h += dets[j][i].bbox.h * 1./demo_frame; + } + //copy_cpu(classes, dets[0][i].prob, 1, avg[i].prob, 1); + //avg[i].objectness = dets[0][i].objectness; + } + */ + + if (nms > 0) do_nms_obj(dets, nboxes, l.classes, nms); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.1f\n",fps); + printf("Objects:\n\n"); + image display = buff[(buff_index+2) % 3]; + draw_detections(display, dets, nboxes, demo_thresh, demo_names, demo_alphabet, demo_classes); + free_detections(dets, nboxes); + + demo_index = (demo_index + 1)%demo_frame; + running = 0; + return 0; +} + +void *fetch_in_thread(void *ptr) +{ + free_image(buff[buff_index]); + buff[buff_index] = get_image_from_stream(cap); + if(buff[buff_index].data == 0) { + demo_done = 1; + return 0; + } + letterbox_image_into(buff[buff_index], net->w, net->h, buff_letter[buff_index]); + return 0; +} + +void *display_in_thread(void *ptr) +{ + int c = show_image(buff[(buff_index + 1)%3], "Demo", 1); + if (c != -1) c = c%256; + if (c == 27) { + demo_done = 1; + return 0; + } else if (c == 82) { + demo_thresh += .02; + } else if (c == 84) { + demo_thresh -= .02; + if(demo_thresh <= .02) demo_thresh = .02; + } else if (c == 83) { + demo_hier += .02; + } else if (c == 81) { + demo_hier -= .02; + if(demo_hier <= .0) demo_hier = .0; + } + return 0; +} + +void *display_loop(void *ptr) +{ + while(1){ + display_in_thread(0); + } +} + +void *detect_loop(void *ptr) +{ + while(1){ + detect_in_thread(0); + } +} + +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) +{ + //demo_frame = avg_frames; + image **alphabet = load_alphabet(); + demo_names = names; + demo_alphabet = alphabet; + demo_classes = classes; + demo_thresh = thresh; + demo_hier = hier; + printf("Demo\n"); + net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + pthread_t detect_thread; + pthread_t fetch_thread; + + srand(2222222); + + int i; + demo_total = size_network(net); + predictions = calloc(demo_frame, sizeof(float*)); + for (i = 0; i < demo_frame; ++i){ + predictions[i] = calloc(demo_total, sizeof(float)); + } + avg = calloc(demo_total, sizeof(float)); + + if(filename){ + printf("video file: %s\n", filename); + cap = open_video_stream(filename, 0, 0, 0, 0); + }else{ + cap = open_video_stream(0, cam_index, w, h, frames); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + + buff[0] = get_image_from_stream(cap); + buff[1] = copy_image(buff[0]); + buff[2] = copy_image(buff[0]); + buff_letter[0] = letterbox_image(buff[0], net->w, net->h); + buff_letter[1] = letterbox_image(buff[0], net->w, net->h); + buff_letter[2] = letterbox_image(buff[0], net->w, net->h); + + int count = 0; + if(!prefix){ + make_window("Demo", 1352, 1013, fullscreen); + } + + demo_time = what_time_is_it_now(); + + while(!demo_done){ + buff_index = (buff_index + 1) %3; + if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); + if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); + if(!prefix){ + fps = 1./(what_time_is_it_now() - demo_time); + demo_time = what_time_is_it_now(); + display_in_thread(0); + }else{ + char name[256]; + sprintf(name, "%s_%08d", prefix, count); + save_image(buff[(buff_index + 1)%3], name); + } + pthread_join(fetch_thread, 0); + pthread_join(detect_thread, 0); + ++count; + } +} + +/* + void demo_compare(char *cfg1, char *weight1, char *cfg2, char *weight2, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) + { + demo_frame = avg_frames; + predictions = calloc(demo_frame, sizeof(float*)); + image **alphabet = load_alphabet(); + demo_names = names; + demo_alphabet = alphabet; + demo_classes = classes; + demo_thresh = thresh; + demo_hier = hier; + printf("Demo\n"); + net = load_network(cfg1, weight1, 0); + set_batch_network(net, 1); + pthread_t detect_thread; + pthread_t fetch_thread; + + srand(2222222); + + if(filename){ + printf("video file: %s\n", filename); + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + if(frames){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FPS, frames); + } + } + + if(!cap) error("Couldn't connect to webcam.\n"); + + layer l = net->layers[net->n-1]; + demo_detections = l.n*l.w*l.h; + int j; + + avg = (float *) calloc(l.outputs, sizeof(float)); + for(j = 0; j < demo_frame; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float)); + + boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box)); + probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *)); + for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes+1, sizeof(float)); + + buff[0] = get_image_from_stream(cap); + buff[1] = copy_image(buff[0]); + buff[2] = copy_image(buff[0]); + buff_letter[0] = letterbox_image(buff[0], net->w, net->h); + buff_letter[1] = letterbox_image(buff[0], net->w, net->h); + buff_letter[2] = letterbox_image(buff[0], net->w, net->h); + ipl = cvCreateImage(cvSize(buff[0].w,buff[0].h), IPL_DEPTH_8U, buff[0].c); + + int count = 0; + if(!prefix){ + cvNamedWindow("Demo", CV_WINDOW_NORMAL); + if(fullscreen){ + cvSetWindowProperty("Demo", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); + } else { + cvMoveWindow("Demo", 0, 0); + cvResizeWindow("Demo", 1352, 1013); + } + } + + demo_time = what_time_is_it_now(); + + while(!demo_done){ +buff_index = (buff_index + 1) %3; +if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); +if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); +if(!prefix){ + fps = 1./(what_time_is_it_now() - demo_time); + demo_time = what_time_is_it_now(); + display_in_thread(0); +}else{ + char name[256]; + sprintf(name, "%s_%08d", prefix, count); + save_image(buff[(buff_index + 1)%3], name); +} +pthread_join(fetch_thread, 0); +pthread_join(detect_thread, 0); +++count; +} +} +*/ +#else +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg, float hier, int w, int h, int frames, int fullscreen) +{ + fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); +} +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/demo.h b/workloads/realworld/pipeline/darknet/src/demo.h new file mode 100644 index 0000000000000000000000000000000000000000..86e46541d1a7473b22373b29bc6ff9cc281d4939 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/demo.h @@ -0,0 +1,6 @@ +#ifndef DEMO_H +#define DEMO_H + +#include "image.h" + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/detection_layer.c b/workloads/realworld/pipeline/darknet/src/detection_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..da8a517ece68da7f211c59ac6e9e2c3419e2493f --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/detection_layer.c @@ -0,0 +1,275 @@ +#include "detection_layer.h" +#include "activations.h" +#include "softmax_layer.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +detection_layer* make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore) +{ + detection_layer *l = calloc(1, sizeof(detection_layer)); + l->type = DETECTION; + + l->n = n; + l->batch = batch; + l->inputs = inputs; + l->classes = classes; + l->coords = coords; + l->rescore = rescore; + l->side = side; + l->w = side; + l->h = side; + assert(side*side*((1 + l->coords)*l->n + l->classes) == inputs); + l->cost = calloc(1, sizeof(float)); + l->outputs = l->inputs; + l->truths = l->side*l->side*(1+l->coords+l->classes); + l->output = calloc(batch*l->outputs, sizeof(float)); + l->delta = calloc(batch*l->outputs, sizeof(float)); + + l->forward = forward_detection_layer; + l->backward = backward_detection_layer; +#ifdef GPU + l->forward_gpu = forward_detection_layer_gpu; + l->backward_gpu = backward_detection_layer_gpu; + l->output_gpu = cuda_make_array(l->output, batch*l->outputs); + l->delta_gpu = cuda_make_array(l->delta, batch*l->outputs); +#endif + + fprintf(stderr, "Detection Layer\n"); + srand(0); + + return l; +} + +void forward_detection_layer(detection_layer *l, network net) +{ + int locations = l->side*l->side; + int i,j; + memcpy(l->output, net.input, l->outputs*l->batch*sizeof(float)); + //if(l->reorg) reorg(l->output, l->w*l->h, size*l->n, l->batch, 1); + int b; + if (l->softmax){ + for(b = 0; b < l->batch; ++b){ + int index = b*l->inputs; + for (i = 0; i < locations; ++i) { + int offset = i*l->classes; + softmax(l->output + index + offset, l->classes, 1, 1, + l->output + index + offset); + } + } + } + if(net.train){ + float avg_iou = 0; + float avg_cat = 0; + float avg_allcat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + *(l->cost) = 0; + int size = l->inputs * l->batch; + memset(l->delta, 0, size * sizeof(float)); + for (b = 0; b < l->batch; ++b){ + int index = b*l->inputs; + for (i = 0; i < locations; ++i) { + int truth_index = (b*locations + i)*(1+l->coords+l->classes); + int is_obj = net.truth[truth_index]; + for (j = 0; j < l->n; ++j) { + int p_index = index + locations*l->classes + i*l->n + j; + l->delta[p_index] = l->noobject_scale*(0 - l->output[p_index]); + *(l->cost) += l->noobject_scale*pow(l->output[p_index], 2); + avg_anyobj += l->output[p_index]; + } + + int best_index = -1; + float best_iou = 0; + float best_rmse = 20; + + if (!is_obj){ + continue; + } + + int class_index = index + i*l->classes; + for(j = 0; j < l->classes; ++j) { + l->delta[class_index+j] = l->class_scale * (net.truth[truth_index+1+j] - l->output[class_index+j]); + *(l->cost) += l->class_scale * pow(net.truth[truth_index+1+j] - l->output[class_index+j], 2); + if(net.truth[truth_index + 1 + j]) avg_cat += l->output[class_index+j]; + avg_allcat += l->output[class_index+j]; + } + + box truth = float_to_box(net.truth + truth_index + 1 + l->classes, 1); + truth.x /= l->side; + truth.y /= l->side; + + for(j = 0; j < l->n; ++j){ + int box_index = index + locations*(l->classes + l->n) + (i*l->n + j) * l->coords; + box out = float_to_box(l->output + box_index, 1); + out.x /= l->side; + out.y /= l->side; + + if (l->sqrt){ + out.w = out.w*out.w; + out.h = out.h*out.h; + } + + float iou = box_iou(out, truth); + //iou = 0; + float rmse = box_rmse(out, truth); + if(best_iou > 0 || iou > 0){ + if(iou > best_iou){ + best_iou = iou; + best_index = j; + } + }else{ + if(rmse < best_rmse){ + best_rmse = rmse; + best_index = j; + } + } + } + + if(l->forced){ + if(truth.w*truth.h < .1){ + best_index = 1; + }else{ + best_index = 0; + } + } + if(l->random && *(net.seen) < 64000){ + best_index = rand()%l->n; + } + + int box_index = index + locations*(l->classes + l->n) + (i*l->n + best_index) * l->coords; + int tbox_index = truth_index + 1 + l->classes; + + box out = float_to_box(l->output + box_index, 1); + out.x /= l->side; + out.y /= l->side; + if (l->sqrt) { + out.w = out.w*out.w; + out.h = out.h*out.h; + } + float iou = box_iou(out, truth); + + //printf("%d,", best_index); + int p_index = index + locations*l->classes + i*l->n + best_index; + *(l->cost) -= l->noobject_scale * pow(l->output[p_index], 2); + *(l->cost) += l->object_scale * pow(1-l->output[p_index], 2); + avg_obj += l->output[p_index]; + l->delta[p_index] = l->object_scale * (1.-l->output[p_index]); + + if(l->rescore){ + l->delta[p_index] = l->object_scale * (iou - l->output[p_index]); + } + + l->delta[box_index+0] = l->coord_scale*(net.truth[tbox_index + 0] - l->output[box_index + 0]); + l->delta[box_index+1] = l->coord_scale*(net.truth[tbox_index + 1] - l->output[box_index + 1]); + l->delta[box_index+2] = l->coord_scale*(net.truth[tbox_index + 2] - l->output[box_index + 2]); + l->delta[box_index+3] = l->coord_scale*(net.truth[tbox_index + 3] - l->output[box_index + 3]); + if(l->sqrt){ + l->delta[box_index+2] = l->coord_scale*(sqrt(net.truth[tbox_index + 2]) - l->output[box_index + 2]); + l->delta[box_index+3] = l->coord_scale*(sqrt(net.truth[tbox_index + 3]) - l->output[box_index + 3]); + } + + *(l->cost) += pow(1-iou, 2); + avg_iou += iou; + ++count; + } + } + + if(0){ + float *costs = calloc(l->batch*locations*l->n, sizeof(float)); + for (b = 0; b < l->batch; ++b) { + int index = b*l->inputs; + for (i = 0; i < locations; ++i) { + for (j = 0; j < l->n; ++j) { + int p_index = index + locations*l->classes + i*l->n + j; + costs[b*locations*l->n + i*l->n + j] = l->delta[p_index]*l->delta[p_index]; + } + } + } + int indexes[100]; + top_k(costs, l->batch*locations*l->n, 100, indexes); + float cutoff = costs[indexes[99]]; + for (b = 0; b < l->batch; ++b) { + int index = b*l->inputs; + for (i = 0; i < locations; ++i) { + for (j = 0; j < l->n; ++j) { + int p_index = index + locations*l->classes + i*l->n + j; + if (l->delta[p_index]*l->delta[p_index] < cutoff) l->delta[p_index] = 0; + } + } + } + free(costs); + } + + + *(l->cost) = pow(mag_array(l->delta, l->outputs * l->batch), 2); + + + printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l->classes), avg_obj/count, avg_anyobj/(l->batch*locations*l->n), count); + //if(l->reorg) reorg(l->delta, l->w*l->h, size*l->n, l->batch, 0); + } +} + +void backward_detection_layer(detection_layer *l, network net) +{ + axpy_cpu(l->batch*l->inputs, 1, l->delta, 1, net.delta, 1); +} + +void get_detection_detections(layer *l, int w, int h, float thresh, detection *dets) +{ + int i,j,n; + float *predictions = l->output; + //int per_cell = 5*num+classes; + for (i = 0; i < l->side*l->side; ++i){ + int row = i / l->side; + int col = i % l->side; + for(n = 0; n < l->n; ++n){ + int index = i*l->n + n; + int p_index = l->side*l->side*l->classes + i*l->n + n; + float scale = predictions[p_index]; + int box_index = l->side*l->side*(l->classes + l->n) + (i*l->n + n)*4; + box b; + b.x = (predictions[box_index + 0] + col) / l->side * w; + b.y = (predictions[box_index + 1] + row) / l->side * h; + b.w = pow(predictions[box_index + 2], (l->sqrt?2:1)) * w; + b.h = pow(predictions[box_index + 3], (l->sqrt?2:1)) * h; + dets[index].bbox = b; + dets[index].objectness = scale; + for(j = 0; j < l->classes; ++j){ + int class_index = i*l->classes; + float prob = scale*predictions[class_index+j]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } + } +} + +#ifdef GPU + +void forward_detection_layer_gpu(detection_layer *l, network net) +{ + if(!net.train){ + copy_gpu(l->batch*l->inputs, net.input_gpu, 1, l->output_gpu, 1, net.streams[l->stream_index]); + return; + } + + cuda_pull_array(net.input_gpu, net.input, l->batch*l->inputs); + forward_detection_layer(l, net); + cuda_push_array(l->output_gpu, l->output, l->batch*l->outputs); + cuda_push_array(l->delta_gpu, l->delta, l->batch*l->inputs); +} + +void backward_detection_layer_gpu(detection_layer *l, network net) +{ + axpy_gpu(l->batch*l->inputs, 1, l->delta_gpu, 1, net.delta_gpu, 1, net.streams[l->stream_index]); + //copy_gpu(l->batch*l->inputs, l->delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/detection_layer.h b/workloads/realworld/pipeline/darknet/src/detection_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..cca1e9b6ca342860c79eb0b22ed0f7f7fcf02896 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/detection_layer.h @@ -0,0 +1,18 @@ +#ifndef DETECTION_LAYER_H +#define DETECTION_LAYER_H + +#include "layer.h" +#include "network.h" + +typedef layer detection_layer; + +detection_layer* make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore); +void forward_detection_layer(detection_layer *l, network net); +void backward_detection_layer(detection_layer *l, network net); + +#ifdef GPU +void forward_detection_layer_gpu(detection_layer *l, network net); +void backward_detection_layer_gpu(detection_layer *l, network net); +#endif + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/dropout_layer.c b/workloads/realworld/pipeline/darknet/src/dropout_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..996ef335e5a8a0742d74d031d380ecb776dcbf39 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/dropout_layer.c @@ -0,0 +1,60 @@ +#include "dropout_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include +#include + +dropout_layer* make_dropout_layer(int batch, int inputs, float probability) +{ + dropout_layer *l = calloc(1, sizeof(dropout_layer)); + l->type = DROPOUT; + l->probability = probability; + l->inputs = inputs; + l->outputs = inputs; + l->batch = batch; + l->rand = calloc(inputs*batch, sizeof(float)); + l->scale = 1./(1.-probability); + l->forward = forward_dropout_layer; + l->backward = backward_dropout_layer; + #ifdef GPU + l->forward_gpu = forward_dropout_layer_gpu; + l->backward_gpu = backward_dropout_layer_gpu; + l->rand_gpu = cuda_make_array(l->rand, inputs*batch); + #endif + fprintf(stderr, "dropout p = %.2f %4d -> %4d\n", probability, inputs, inputs); + return l; +} + +void resize_dropout_layer(dropout_layer *l, int inputs) +{ + l->rand = realloc(l->rand, l->inputs*l->batch*sizeof(float)); + #ifdef GPU + cuda_free(l->rand_gpu); + + l->rand_gpu = cuda_make_array(l->rand, inputs*l->batch); + #endif +} + +void forward_dropout_layer(dropout_layer *l, network net) +{ + int i; + if (!net.train) return; + for(i = 0; i < l->batch * l->inputs; ++i){ + float r = rand_uniform(0, 1); + l->rand[i] = r; + if(r < l->probability) net.input[i] = 0; + else net.input[i] *= l->scale; + } +} + +void backward_dropout_layer(dropout_layer *l, network net) +{ + int i; + if(!net.delta) return; + for(i = 0; i < l->batch * l->inputs; ++i){ + float r = l->rand[i]; + if(r < l->probability) net.delta[i] = 0; + else net.delta[i] *= l->scale; + } +} + diff --git a/workloads/realworld/pipeline/darknet/src/dropout_layer.h b/workloads/realworld/pipeline/darknet/src/dropout_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..d2c92f49d5e223ea60b4ae5fbd4bc2f11d678332 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/dropout_layer.h @@ -0,0 +1,20 @@ +#ifndef DROPOUT_LAYER_H +#define DROPOUT_LAYER_H + +#include "layer.h" +#include "network.h" + +typedef layer dropout_layer; + +dropout_layer* make_dropout_layer(int batch, int inputs, float probability); + +void forward_dropout_layer(dropout_layer *l, network net); +void backward_dropout_layer(dropout_layer *l, network net); +void resize_dropout_layer(dropout_layer *l, int inputs); + +#ifdef GPU +void forward_dropout_layer_gpu(dropout_layer *l, network net); +void backward_dropout_layer_gpu(dropout_layer *l, network net); + +#endif +#endif diff --git a/workloads/realworld/pipeline/darknet/src/dropout_layer_kernels.cu b/workloads/realworld/pipeline/darknet/src/dropout_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..51faab01e57a61fcc801735fdc3430c4c4775adb --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/dropout_layer_kernels.cu @@ -0,0 +1,41 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "dropout_layer.h" +#include "cuda_dark.h" +#include "utils.h" +} + +__global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale; +} + +void forward_dropout_layer_gpu(dropout_layer *layer, network net) +{ + if (!net.train) return; + int size = layer->inputs*layer->batch; + cuda_random(layer->rand_gpu, size); + /* + int i; + for(i = 0; i < size; ++i){ + layer->rand[i] = rand_uniform(); + } + cuda_push_array(layer->rand_gpu, layer->rand, size); + */ + + yoloswag420blazeit360noscope<<>>(net.input_gpu, size, layer->rand_gpu, layer->probability, layer->scale); + check_error(cudaPeekAtLastError()); +} + +void backward_dropout_layer_gpu(dropout_layer *layer, network net) +{ + if(!net.delta_gpu) return; + int size = layer->inputs*layer->batch; + + yoloswag420blazeit360noscope<<>>(net.delta_gpu, size, layer->rand_gpu, layer->probability, layer->scale); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/pipeline/darknet/src/gemm.c b/workloads/realworld/pipeline/darknet/src/gemm.c new file mode 100644 index 0000000000000000000000000000000000000000..39937f79be5a89b2b595a0b4000bf1a8f979ea12 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/gemm.c @@ -0,0 +1,324 @@ +#include "gemm.h" +#include "utils.h" +#include "cuda_dark.h" +#include +#include +#include + +void gemm_bin(int M, int N, int K, float ALPHA, + char *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + char A_PART = A[i*lda+k]; + if(A_PART){ + for(j = 0; j < N; ++j){ + C[i*ldc+j] += B[k*ldb+j]; + } + } else { + for(j = 0; j < N; ++j){ + C[i*ldc+j] -= B[k*ldb+j]; + } + } + } + } +} + +float *random_matrix_gemm(int rows, int cols) +{ + int i; + float *m = calloc(rows*cols, sizeof(float)); + for(i = 0; i < rows*cols; ++i){ + m[i] = (float)rand()/RAND_MAX; + } + return m; +} + +void time_random_matrix(int TA, int TB, int m, int k, int n) +{ + float *a; + if(!TA) a = random_matrix_gemm(m,k); + else a = random_matrix_gemm(k,m); + int lda = (!TA)?k:m; + float *b; + if(!TB) b = random_matrix_gemm(k,n); + else b = random_matrix_gemm(n,k); + int ldb = (!TB)?n:k; + + float *c = random_matrix_gemm(m,n); + int i; + clock_t start = clock(), end; + for(i = 0; i<10; ++i){ + gemm_cpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); + } + end = clock(); + printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf ms\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); + free(a); + free(b); + free(c); +} + + +void gemm(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + gemm_cpu( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); +} + +void gemm_nn(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + register float A_PART = ALPHA*A[i*lda+k]; + for(j = 0; j < N; ++j){ + C[i*ldc+j] += A_PART*B[k*ldb+j]; + } + } + } +} + +void gemm_nt(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + register float sum = 0; + for(k = 0; k < K; ++k){ + sum += ALPHA*A[i*lda+k]*B[j*ldb + k]; + } + C[i*ldc+j] += sum; + } + } +} + +void gemm_tn(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + register float A_PART = ALPHA*A[k*lda+i]; + for(j = 0; j < N; ++j){ + C[i*ldc+j] += A_PART*B[k*ldb+j]; + } + } + } +} + +void gemm_tt(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + register float sum = 0; + for(k = 0; k < K; ++k){ + sum += ALPHA*A[i+k*lda]*B[k+j*ldb]; + } + C[i*ldc+j] += sum; + } + } +} + + +void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + //printf("cpu: %d %d %d %d %d %f %d %d %f %d\n",TA, TB, M, N, K, ALPHA, lda, ldb, BETA, ldc); + int i, j; + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + C[i*ldc + j] *= BETA; + } + } + if(!TA && !TB) + gemm_nn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else if(TA && !TB) + gemm_tn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else if(!TA && TB) + gemm_nt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else + gemm_tt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); +} + + +// #ifdef GPU + +// #include + +// void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, +// float *A_gpu, int lda, +// float *B_gpu, int ldb, +// float BETA, +// float *C_gpu, int ldc) +// { +// cublasHandle_t handle = blas_handle(); +// cudaError_t status = (cudaError_t) cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N), +// (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc); +// check_error(status); +// } + +// #include +// #include +// #include +// #include + +// void time_gpu_random_matrix(int TA, int TB, int m, int k, int n) +// { +// float *a; +// if(!TA) a = random_matrix_gemm(m,k); +// else a = random_matrix_gemm(k,m); +// int lda = (!TA)?k:m; +// float *b; +// if(!TB) b = random_matrix_gemm(k,n); +// else b = random_matrix_gemm(n,k); +// int ldb = (!TB)?n:k; + +// float *c = random_matrix_gemm(m,n); +// int i; +// clock_t start = clock(), end; +// for(i = 0; i<32; ++i){ +// gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); +// } +// end = clock(); +// printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); +// free(a); +// free(b); +// free(c); +// } + +// void time_gpu(int TA, int TB, int m, int k, int n) +// { +// int iter = 10; +// float *a = random_matrix_gemm(m,k); +// float *b = random_matrix_gemm(k,n); + +// int lda = (!TA)?k:m; +// int ldb = (!TB)?n:k; + +// float *c = random_matrix_gemm(m,n); + +// float *a_cl = cuda_make_array(a, m*k); +// float *b_cl = cuda_make_array(b, k*n); +// float *c_cl = cuda_make_array(c, m*n); + +// int i; +// clock_t start = clock(), end; +// for(i = 0; i +#include +#include +#include +#include + +#include "gemm.h" +#include "utils.h" +#include "cuda_dark.h" + +#include +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK_X 16 +#define DIM_THREAD_BLOCK_Y 16 + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +__global__ void gemm_kernel(float *a, float *b, float *c, int M, int K, int N, float alpha, float beta) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // Compute each thread's global row and column index + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + + + // Statically allocated shared memory + __shared__ float s_a[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + __shared__ float s_b[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + // Accumulate in temporary variable + + float tmp = 0.0f; + if (row < M && col < N) { + + tmp = beta * c[row * N + col]; + + // Sweep tile across matrix + for (int i = 0; i < K; i += blockDim.x) { + int left = K - i; + + if ((i + threadIdx.x) < K) + s_a[threadIdx.y * blockDim.x + threadIdx.x] = a[row * K + i + threadIdx.x]; + + if ((i + threadIdx.y) < K) + s_b[threadIdx.y * blockDim.x + threadIdx.x] = b[(i + threadIdx.y) * N + col]; + + block.sync(); + + for (int k = 0; k < blockDim.x && k < left ; k++) { + tmp += alpha * s_a[threadIdx.y * blockDim.x + k] * s_b[k * blockDim.x + threadIdx.x]; + } + block.sync(); + } + c[row * N + col] = tmp; + } + block.sync(); +} + +void gemmCuda(float *A, float *B, float *C, int M, int N, int K, float alpha, float beta, cudaStream_t stream) +{ + // float *A_gpu; + // float *B_gpu; + // float *C_gpu; + + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + dim3 grid((size_t)(ceil(((float)N) / ((float)block.y))), (size_t)(ceil(((float)M) / ((float)block.x)))); + + // cudaMalloc(&A_gpu, sizeof(float) * M * K); + // cudaMalloc(&B_gpu, sizeof(float) * K * N); + // cudaMalloc(&C_gpu, sizeof(float) * M * N); + + // cudaMemcpy(A_gpu, A, sizeof(float) * M * K, cudaMemcpyHostToDevice); + // cudaMemcpy(B_gpu, B, sizeof(float) * K * N, cudaMemcpyHostToDevice); + // cudaMemcpy(C_gpu, C, sizeof(float) * M * N, cudaMemcpyHostToDevice); + gemm_kernel<<>>(A, B, C, M, K, N, alpha, beta); + // cudaDeviceSynchronize(); + // cudaMemcpy(C, C_gpu, sizeof(float) * M * N, cudaMemcpyDeviceToHost); + + // cudaFree(A_gpu); + // cudaFree(B_gpu); + // cudaFree(C_gpu); +} + +// void gemmCuda(float *A, float *B, float *C, int M, int N, int K, float alpha, float beta) +// { +// float *A_gpu; +// float *B_gpu; +// float *C_gpu; + +// dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); +// dim3 grid((size_t)(ceil(((float)N) / ((float)block.y))), (size_t)(ceil(((float)M) / ((float)block.x)))); + +// cudaMalloc(&A_gpu, sizeof(float) * M * K); +// cudaMalloc(&B_gpu, sizeof(float) * K * N); +// cudaMalloc(&C_gpu, sizeof(float) * M * N); + +// cudaMemcpy(A_gpu, A, sizeof(float) * M * K, cudaMemcpyHostToDevice); +// cudaMemcpy(B_gpu, B, sizeof(float) * K * N, cudaMemcpyHostToDevice); +// cudaMemcpy(C_gpu, C, sizeof(float) * M * N, cudaMemcpyHostToDevice); +// gemm_kernel<<>>(A_gpu, B_gpu, C_gpu, M, K, N, alpha, beta); +// // cudaDeviceSynchronize(); +// cudaMemcpy(C, C_gpu, sizeof(float) * M * N, cudaMemcpyDeviceToHost); + +// cudaFree(A_gpu); +// cudaFree(B_gpu); +// cudaFree(C_gpu); +// } + +// void gemmCuda(float *A, float *B, float *C, int M, int N, int K, float alpha, float beta) +// { +// float *A_gpu; +// float *B_gpu; +// float *C_gpu; + +// dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); +// dim3 grid((size_t)(ceil(((float)N) / ((float)block.y))), (size_t)(ceil(((float)M) / ((float)block.x)))); + +// cudaMallocManaged(&A_gpu, sizeof(float) * M * K); +// cudaMallocManaged(&B_gpu, sizeof(float) * K * N); +// cudaMallocManaged(&C_gpu, sizeof(float) * M * N); +// printf("allocation succeed!\n"); + +// memcpy(A_gpu, A, sizeof(float) * M * K); +// memcpy(B_gpu, B, sizeof(float) * K * N); +// printf("memcpy succeed!\n"); +// // cudaMemcpy(C_gpu, C, sizeof(float) * M * N, cudaMemcpyHostToDevice); +// gemm_kernel<<>>(A_gpu, B_gpu, C_gpu, M, K, N, alpha, beta); +// cudaDeviceSynchronize(); +// memcpy(C, C_gpu, sizeof(float) * M * N); + +// cudaFree(A_gpu); +// cudaFree(B_gpu); +// cudaFree(C_gpu); +// } + +void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, + float *A_gpu, int lda, + float *B_gpu, int ldb, + float BETA, + float *C_gpu, int ldc, cudaStream_t stream) +{ + // printf("TA is %d, TB is %d, M is %d, N is %d, K is %d, lda is %d, ldb is %d, ldc is %d.\n", TA, TB, M, N, K, lda, ldb, ldc); + if (TA == 0 && TB == 0) { + gemmCuda(A_gpu, B_gpu, C_gpu, M, N, K, ALPHA, BETA, stream); + check_error_stream(cudaPeekAtLastError(), stream); + } else { + cublasHandle_t handle = blas_handle(); + cudaError_t status = (cudaError_t) cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N), + (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc); + check_error(status); + } +} + + +// void time_gpu_random_matrix(int TA, int TB, int m, int k, int n) +// { +// float *a; +// if(!TA) a = random_matrix_gemm(m,k); +// else a = random_matrix_gemm(k,m); +// int lda = (!TA)?k:m; +// float *b; +// if(!TB) b = random_matrix_gemm(k,n); +// else b = random_matrix_gemm(n,k); +// int ldb = (!TB)?n:k; + +// float *c = random_matrix_gemm(m,n); +// int i; +// clock_t start = clock(), end; +// for(i = 0; i<32; ++i){ +// gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); +// } +// end = clock(); +// printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); +// free(a); +// free(b); +// free(c); +// } + +// void time_gpu(int TA, int TB, int m, int k, int n) +// { +// int iter = 10; +// float *a = random_matrix_gemm(m,k); +// float *b = random_matrix_gemm(k,n); + +// int lda = (!TA)?k:m; +// int ldb = (!TB)?n:k; + +// float *c = random_matrix_gemm(m,n); + +// float *a_cl = cuda_make_array(a, m*k); +// float *b_cl = cuda_make_array(b, k*n); +// float *c_cl = cuda_make_array(c, m*n); + +// int i; +// clock_t start = clock(), end; +// for(i = 0; i +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer* make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam) +{ + fprintf(stderr, "GRU Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer *l = malloc(sizeof(layer)); + l->batch = batch; + l->type = GRU; + l->steps = steps; + l->inputs = inputs; + + l->uz = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + l->uz = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l->uz->batch = batch; + + l->wz = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + l->wz = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l->wz->batch = batch; + + l->ur = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + l->ur = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l->ur->batch = batch; + + l->wr = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + l->wr = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l->wr->batch = batch; + + + + l->uh = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + l->uh = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l->uh->batch = batch; + + l->wh = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + l->wh = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l->wh->batch = batch; + + l->batch_normalize = batch_normalize; + + + l->outputs = outputs; + l->output = calloc(outputs*batch*steps, sizeof(float)); + l->delta = calloc(outputs*batch*steps, sizeof(float)); + l->state = calloc(outputs*batch, sizeof(float)); + l->prev_state = calloc(outputs*batch, sizeof(float)); + l->forgot_state = calloc(outputs*batch, sizeof(float)); + l->forgot_delta = calloc(outputs*batch, sizeof(float)); + + l->r_cpu = calloc(outputs*batch, sizeof(float)); + l->z_cpu = calloc(outputs*batch, sizeof(float)); + l->h_cpu = calloc(outputs*batch, sizeof(float)); + + l->forward = forward_gru_layer; + l->backward = backward_gru_layer; + l->update = update_gru_layer; + +#ifdef GPU + l->forward_gpu = forward_gru_layer_gpu; + l->backward_gpu = backward_gru_layer_gpu; + l->update_gpu = update_gru_layer_gpu; + + l->forgot_state_gpu = cuda_make_array(0, batch*outputs); + l->forgot_delta_gpu = cuda_make_array(0, batch*outputs); + l->prev_state_gpu = cuda_make_array(0, batch*outputs); + l->state_gpu = cuda_make_array(0, batch*outputs); + l->output_gpu = cuda_make_array(0, batch*outputs*steps); + l->delta_gpu = cuda_make_array(0, batch*outputs*steps); + l->r_gpu = cuda_make_array(0, batch*outputs); + l->z_gpu = cuda_make_array(0, batch*outputs); + l->h_gpu = cuda_make_array(0, batch*outputs); + +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l->uz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l->uz->out_c, l->uz->out_h, l->uz->out_w); + cudnnSetTensor4dDescriptor(l->uh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l->uh->out_c, l->uh->out_h, l->uh->out_w); + cudnnSetTensor4dDescriptor(l->ur->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l->ur->out_c, l->ur->out_h, l->ur->out_w); + cudnnSetTensor4dDescriptor(l->wz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l->wz->out_c, l->wz->out_h, l->wz->out_w); + cudnnSetTensor4dDescriptor(l->wh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l->wh->out_c, l->wh->out_h, l->wh->out_w); + cudnnSetTensor4dDescriptor(l->wr->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l->wr->out_c, l->wr->out_h, l->wr->out_w); +#endif +#endif + + return l; +} + +void update_gru_layer(layer *l, update_args a, network net) +{ + update_connected_layer((l->ur), a, net); + update_connected_layer((l->uz), a, net); + update_connected_layer((l->uh), a, net); + update_connected_layer((l->wr), a, net); + update_connected_layer((l->wz), a, net); + update_connected_layer((l->wh), a, net); +} + +void forward_gru_layer(layer *l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer *uz = (l->uz); + layer *ur = (l->ur); + layer *uh = (l->uh); + + layer *wz = (l->wz); + layer *wr = (l->wr); + layer *wh = (l->wh); + + fill_cpu(l->outputs * l->batch * l->steps, 0, uz->delta, 1); + fill_cpu(l->outputs * l->batch * l->steps, 0, ur->delta, 1); + fill_cpu(l->outputs * l->batch * l->steps, 0, uh->delta, 1); + + fill_cpu(l->outputs * l->batch * l->steps, 0, wz->delta, 1); + fill_cpu(l->outputs * l->batch * l->steps, 0, wr->delta, 1); + fill_cpu(l->outputs * l->batch * l->steps, 0, wh->delta, 1); + if(net.train) { + fill_cpu(l->outputs * l->batch * l->steps, 0, l->delta, 1); + copy_cpu(l->outputs*l->batch, l->state, 1, l->prev_state, 1); + } + + for (i = 0; i < l->steps; ++i) { + s.input = l->state; + forward_connected_layer(wz, s); + forward_connected_layer(wr, s); + + s.input = net.input; + forward_connected_layer(uz, s); + forward_connected_layer(ur, s); + forward_connected_layer(uh, s); + + + copy_cpu(l->outputs*l->batch, uz->output, 1, l->z_cpu, 1); + axpy_cpu(l->outputs*l->batch, 1, wz->output, 1, l->z_cpu, 1); + + copy_cpu(l->outputs*l->batch, ur->output, 1, l->r_cpu, 1); + axpy_cpu(l->outputs*l->batch, 1, wr->output, 1, l->r_cpu, 1); + + activate_array(l->z_cpu, l->outputs*l->batch, LOGISTIC); + activate_array(l->r_cpu, l->outputs*l->batch, LOGISTIC); + + copy_cpu(l->outputs*l->batch, l->state, 1, l->forgot_state, 1); + mul_cpu(l->outputs*l->batch, l->r_cpu, 1, l->forgot_state, 1); + + s.input = l->forgot_state; + forward_connected_layer(wh, s); + + copy_cpu(l->outputs*l->batch, uh->output, 1, l->h_cpu, 1); + axpy_cpu(l->outputs*l->batch, 1, wh->output, 1, l->h_cpu, 1); + + if(l->tanh){ + activate_array(l->h_cpu, l->outputs*l->batch, TANH); + } else { + activate_array(l->h_cpu, l->outputs*l->batch, LOGISTIC); + } + + weighted_sum_cpu(l->state, l->h_cpu, l->z_cpu, l->outputs*l->batch, l->output); + + copy_cpu(l->outputs*l->batch, l->output, 1, l->state, 1); + + net.input += l->inputs*l->batch; + l->output += l->outputs*l->batch; + increment_layer(uz, 1); + increment_layer(ur, 1); + increment_layer(uh, 1); + + increment_layer(wz, 1); + increment_layer(wr, 1); + increment_layer(wh, 1); + } +} + +void backward_gru_layer(layer *l, network net) +{ +} + +#ifdef GPU + +void pull_gru_layer(layer *l) +{ +} + +void push_gru_layer(layer *l) +{ +} + +void update_gru_layer_gpu(layer *l, update_args a, network net) +{ + update_connected_layer_gpu((l->ur), a, net); + update_connected_layer_gpu((l->uz), a, net); + update_connected_layer_gpu((l->uh), a, net); + update_connected_layer_gpu((l->wr), a, net); + update_connected_layer_gpu((l->wz), a, net); + update_connected_layer_gpu((l->wh), a, net); +} + +void forward_gru_layer_gpu(layer *l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer *uz = (l->uz); + layer *ur = (l->ur); + layer *uh = (l->uh); + + layer *wz = (l->wz); + layer *wr = (l->wr); + layer *wh = (l->wh); + + fill_gpu(l->outputs * l->batch * l->steps, 0, uz->delta_gpu, 1); + fill_gpu(l->outputs * l->batch * l->steps, 0, ur->delta_gpu, 1); + fill_gpu(l->outputs * l->batch * l->steps, 0, uh->delta_gpu, 1); + + fill_gpu(l->outputs * l->batch * l->steps, 0, wz->delta_gpu, 1); + fill_gpu(l->outputs * l->batch * l->steps, 0, wr->delta_gpu, 1); + fill_gpu(l->outputs * l->batch * l->steps, 0, wh->delta_gpu, 1); + if(net.train) { + fill_gpu(l->outputs * l->batch * l->steps, 0, l->delta_gpu, 1); + copy_gpu(l->outputs*l->batch, l->state_gpu, 1, l->prev_state_gpu, 1, net.streams[l->stream_index]); + } + + for (i = 0; i < l->steps; ++i) { + s.input_gpu = l->state_gpu; + forward_connected_layer_gpu(wz, s); + forward_connected_layer_gpu(wr, s); + + s.input_gpu = net.input_gpu; + forward_connected_layer_gpu(uz, s); + forward_connected_layer_gpu(ur, s); + forward_connected_layer_gpu(uh, s); + + copy_gpu(l->outputs*l->batch, uz->output_gpu, 1, l->z_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->outputs*l->batch, 1, wz->output_gpu, 1, l->z_gpu, 1, net.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, ur->output_gpu, 1, l->r_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->outputs*l->batch, 1, wr->output_gpu, 1, l->r_gpu, 1, net.streams[l->stream_index]); + + activate_array_gpu(l->z_gpu, l->outputs*l->batch, LOGISTIC, net.streams[l->stream_index]); + activate_array_gpu(l->r_gpu, l->outputs*l->batch, LOGISTIC, net.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, l->state_gpu, 1, l->forgot_state_gpu, 1, net.streams[l->stream_index]); + mul_gpu(l->outputs*l->batch, l->r_gpu, 1, l->forgot_state_gpu, 1); + + s.input_gpu = l->forgot_state_gpu; + forward_connected_layer_gpu(wh, s); + + copy_gpu(l->outputs*l->batch, uh->output_gpu, 1, l->h_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->outputs*l->batch, 1, wh->output_gpu, 1, l->h_gpu, 1, net.streams[l->stream_index]); + + if(l->tanh){ + activate_array_gpu(l->h_gpu, l->outputs*l->batch, TANH, net.streams[l->stream_index]); + } else { + activate_array_gpu(l->h_gpu, l->outputs*l->batch, LOGISTIC, net.streams[l->stream_index]); + } + + weighted_sum_gpu(l->state_gpu, l->h_gpu, l->z_gpu, l->outputs*l->batch, l->output_gpu); + copy_gpu(l->outputs*l->batch, l->output_gpu, 1, l->state_gpu, 1, net.streams[l->stream_index]); + + net.input_gpu += l->inputs*l->batch; + l->output_gpu += l->outputs*l->batch; + increment_layer(uz, 1); + increment_layer(ur, 1); + increment_layer(uh, 1); + + increment_layer(wz, 1); + increment_layer(wr, 1); + increment_layer(wh, 1); + } +} + +void backward_gru_layer_gpu(layer *l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer *uz = (l->uz); + layer *ur = (l->ur); + layer *uh = (l->uh); + + layer *wz = (l->wz); + layer *wr = (l->wr); + layer *wh = (l->wh); + + increment_layer(uz, l->steps - 1); + increment_layer(ur, l->steps - 1); + increment_layer(uh, l->steps - 1); + + increment_layer(wz, l->steps - 1); + increment_layer(wr, l->steps - 1); + increment_layer(wh, l->steps - 1); + + net.input_gpu += l->inputs*l->batch*(l->steps-1); + if(net.delta_gpu) net.delta_gpu += l->inputs*l->batch*(l->steps-1); + l->output_gpu += l->outputs*l->batch*(l->steps-1); + l->delta_gpu += l->outputs*l->batch*(l->steps-1); + float *end_state = l->output_gpu; + for (i = l->steps-1; i >= 0; --i) { + if(i != 0) copy_gpu(l->outputs*l->batch, l->output_gpu - l->outputs*l->batch, 1, l->state_gpu, 1, net.streams[l->stream_index]); + else copy_gpu(l->outputs*l->batch, l->prev_state_gpu, 1, l->state_gpu, 1, net.streams[l->stream_index]); + float *prev_delta_gpu = (i == 0) ? 0 : l->delta_gpu - l->outputs*l->batch; + + copy_gpu(l->outputs*l->batch, uz->output_gpu, 1, l->z_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->outputs*l->batch, 1, wz->output_gpu, 1, l->z_gpu, 1, net.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, ur->output_gpu, 1, l->r_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->outputs*l->batch, 1, wr->output_gpu, 1, l->r_gpu, 1, net.streams[l->stream_index]); + + activate_array_gpu(l->z_gpu, l->outputs*l->batch, LOGISTIC, net.streams[l->stream_index]); + activate_array_gpu(l->r_gpu, l->outputs*l->batch, LOGISTIC, net.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, uh->output_gpu, 1, l->h_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->outputs*l->batch, 1, wh->output_gpu, 1, l->h_gpu, 1, net.streams[l->stream_index]); + + if(l->tanh){ + activate_array_gpu(l->h_gpu, l->outputs*l->batch, TANH, net.streams[l->stream_index]); + } else { + activate_array_gpu(l->h_gpu, l->outputs*l->batch, LOGISTIC, net.streams[l->stream_index]); + } + + weighted_delta_gpu(l->state_gpu, l->h_gpu, l->z_gpu, prev_delta_gpu, uh->delta_gpu, uz->delta_gpu, l->outputs*l->batch, l->delta_gpu); + + if(l->tanh){ + gradient_array_gpu(l->h_gpu, l->outputs*l->batch, TANH, uh->delta_gpu); + } else { + gradient_array_gpu(l->h_gpu, l->outputs*l->batch, LOGISTIC, uh->delta_gpu); + } + + copy_gpu(l->outputs*l->batch, uh->delta_gpu, 1, wh->delta_gpu, 1, net.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, l->state_gpu, 1, l->forgot_state_gpu, 1, net.streams[l->stream_index]); + mul_gpu(l->outputs*l->batch, l->r_gpu, 1, l->forgot_state_gpu, 1); + fill_gpu(l->outputs*l->batch, 0, l->forgot_delta_gpu, 1); + + s.input_gpu = l->forgot_state_gpu; + s.delta_gpu = l->forgot_delta_gpu; + + backward_connected_layer_gpu(wh, s); + if(prev_delta_gpu) mult_add_into_gpu(l->outputs*l->batch, l->forgot_delta_gpu, l->r_gpu, prev_delta_gpu); + mult_add_into_gpu(l->outputs*l->batch, l->forgot_delta_gpu, l->state_gpu, ur->delta_gpu); + + gradient_array_gpu(l->r_gpu, l->outputs*l->batch, LOGISTIC, ur->delta_gpu); + copy_gpu(l->outputs*l->batch, ur->delta_gpu, 1, wr->delta_gpu, 1, net.streams[l->stream_index]); + + gradient_array_gpu(l->z_gpu, l->outputs*l->batch, LOGISTIC, uz->delta_gpu); + copy_gpu(l->outputs*l->batch, uz->delta_gpu, 1, wz->delta_gpu, 1, net.streams[l->stream_index]); + + s.input_gpu = l->state_gpu; + s.delta_gpu = prev_delta_gpu; + + backward_connected_layer_gpu(wr, s); + backward_connected_layer_gpu(wz, s); + + s.input_gpu = net.input_gpu; + s.delta_gpu = net.delta_gpu; + + backward_connected_layer_gpu(uh, s); + backward_connected_layer_gpu(ur, s); + backward_connected_layer_gpu(uz, s); + + + net.input_gpu -= l->inputs*l->batch; + if(net.delta_gpu) net.delta_gpu -= l->inputs*l->batch; + l->output_gpu -= l->outputs*l->batch; + l->delta_gpu -= l->outputs*l->batch; + increment_layer(uz, -1); + increment_layer(ur, -1); + increment_layer(uh, -1); + + increment_layer(wz, -1); + increment_layer(wr, -1); + increment_layer(wh, -1); + } + copy_gpu(l->outputs*l->batch, end_state, 1, l->state_gpu, 1, net.streams[l->stream_index]); +} +#endif diff --git a/workloads/realworld/pipeline/darknet/src/gru_layer.h b/workloads/realworld/pipeline/darknet/src/gru_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..475f986f1e6610fe95be364810ad987f40efba60 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/gru_layer.h @@ -0,0 +1,24 @@ + +#ifndef GRU_LAYER_H +#define GRU_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer* make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam); + +void forward_gru_layer(layer *l, network state); +void backward_gru_layer(layer *l, network state); +void update_gru_layer(layer *l, update_args a, network net); + +#ifdef GPU +void forward_gru_layer_gpu(layer *l, network state); +void backward_gru_layer_gpu(layer *l, network state); +void update_gru_layer_gpu(layer *l, update_args a, network net); +void push_gru_layer(layer *l); +void pull_gru_layer(layer *l); +#endif + +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/im2col.c b/workloads/realworld/pipeline/darknet/src/im2col.c new file mode 100644 index 0000000000000000000000000000000000000000..69ec98a9d12b2e21a3859611ad709d62fc80dcf3 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/im2col.c @@ -0,0 +1,40 @@ +#include "im2col.h" +#include +float im2col_get_pixel(float *im, int height, int width, int channels, + int row, int col, int channel, int pad) +{ + row -= pad; + col -= pad; + + if (row < 0 || col < 0 || + row >= height || col >= width) return 0; + return im[col + width*(row + height*channel)]; +} + +//From Berkeley Vision's Caffe! +//https://github.com/BVLC/caffe/blob/master/LICENSE +void im2col_cpu(float* data_im, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_col) +{ + int c,h,w; + int height_col = (height + 2*pad - ksize) / stride + 1; + int width_col = (width + 2*pad - ksize) / stride + 1; + + int channels_col = channels * ksize * ksize; + for (c = 0; c < channels_col; ++c) { + int w_offset = c % ksize; + int h_offset = (c / ksize) % ksize; + int c_im = c / ksize / ksize; + for (h = 0; h < height_col; ++h) { + for (w = 0; w < width_col; ++w) { + int im_row = h_offset + h * stride; + int im_col = w_offset + w * stride; + int col_index = (c * height_col + h) * width_col + w; + data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, + im_row, im_col, c_im, pad); + } + } + } +} + diff --git a/workloads/realworld/pipeline/darknet/src/im2col.h b/workloads/realworld/pipeline/darknet/src/im2col.h new file mode 100644 index 0000000000000000000000000000000000000000..d85011125f1c6148c5b66d0e49e9d9b6119f9b6e --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/im2col.h @@ -0,0 +1,15 @@ +#ifndef IM2COL_H +#define IM2COL_H + +void im2col_cpu(float* data_im, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_col); + +#ifdef GPU +#include "cuda_dark.h" +void im2col_gpu(float *im, + int channels, int height, int width, + int ksize, int stride, int pad,float *data_col, cudaStream_t stream); + +#endif +#endif diff --git a/workloads/realworld/pipeline/darknet/src/im2col_kernels.cu b/workloads/realworld/pipeline/darknet/src/im2col_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..24c3cc10384a3f14c8167e223b502ccf670c1182 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/im2col_kernels.cu @@ -0,0 +1,61 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "im2col.h" +#include "cuda_dark.h" +} + +// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu +// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE + +__global__ void im2col_gpu_kernel(const int n, const float* data_im, + const int height, const int width, const int ksize, + const int pad, + const int stride, + const int height_col, const int width_col, + float *data_col) { + int index = blockIdx.x*blockDim.x+threadIdx.x; + for(; index < n; index += blockDim.x*gridDim.x){ + int w_out = index % width_col; + int h_index = index / width_col; + int h_out = h_index % height_col; + int channel_in = h_index / height_col; + int channel_out = channel_in * ksize * ksize; + int h_in = h_out * stride - pad; + int w_in = w_out * stride - pad; + float* data_col_ptr = data_col; + data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out; + const float* data_im_ptr = data_im; + data_im_ptr += (channel_in * height + h_in) * width + w_in; + for (int i = 0; i < ksize; ++i) { + for (int j = 0; j < ksize; ++j) { + int h = h_in + i; + int w = w_in + j; + + *data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ? + data_im_ptr[i * width + j] : 0; + + //*data_col_ptr = data_im_ptr[ii * width + jj]; + + data_col_ptr += height_col * width_col; + } + } + } +} + +void im2col_gpu(float *im, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_col, cudaStream_t stream){ + // We are going to launch channels * height_col * width_col kernels, each + // kernel responsible for copying a single-channel grid. + int height_col = (height + 2 * pad - ksize) / stride + 1; + int width_col = (width + 2 * pad - ksize) / stride + 1; + int num_kernels = channels * height_col * width_col; + im2col_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, + BLOCK, 0, stream>>>( + num_kernels, im, height, width, ksize, pad, + stride, height_col, + width_col, data_col); +} diff --git a/workloads/realworld/pipeline/darknet/src/image.c b/workloads/realworld/pipeline/darknet/src/image.c new file mode 100644 index 0000000000000000000000000000000000000000..e8dce0686548d23fe572de414ec49c2ba79acac9 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/image.c @@ -0,0 +1,1467 @@ +#include "image.h" +#include "utils.h" +#include "blas.h" +#include "cuda_dark.h" +#include +#include + +#define STB_IMAGE_IMPLEMENTATION +#include "stb_image.h" +#define STB_IMAGE_WRITE_IMPLEMENTATION +#include "stb_image_write.h" + +int windows = 0; + +float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} }; + +float get_color(int c, int x, int max) +{ + float ratio = ((float)x/max)*5; + int i = floor(ratio); + int j = ceil(ratio); + ratio -= i; + float r = (1-ratio) * colors[i][c] + ratio*colors[j][c]; + //printf("%f\n", r); + return r; +} + +image mask_to_rgb(image mask) +{ + int n = mask.c; + image im = make_image(mask.w, mask.h, 3); + int i, j; + for(j = 0; j < n; ++j){ + int offset = j*123457 % n; + float red = get_color(2,offset,n); + float green = get_color(1,offset,n); + float blue = get_color(0,offset,n); + for(i = 0; i < im.w*im.h; ++i){ + im.data[i + 0*im.w*im.h] += mask.data[j*im.h*im.w + i]*red; + im.data[i + 1*im.w*im.h] += mask.data[j*im.h*im.w + i]*green; + im.data[i + 2*im.w*im.h] += mask.data[j*im.h*im.w + i]*blue; + } + } + return im; +} + +static float get_pixel(image m, int x, int y, int c) +{ + assert(x < m.w && y < m.h && c < m.c); + return m.data[c*m.h*m.w + y*m.w + x]; +} +static float get_pixel_extend(image m, int x, int y, int c) +{ + if(x < 0 || x >= m.w || y < 0 || y >= m.h) return 0; + /* + if(x < 0) x = 0; + if(x >= m.w) x = m.w-1; + if(y < 0) y = 0; + if(y >= m.h) y = m.h-1; + */ + if(c < 0 || c >= m.c) return 0; + return get_pixel(m, x, y, c); +} +static void set_pixel(image m, int x, int y, int c, float val) +{ + if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return; + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] = val; +} +static void add_pixel(image m, int x, int y, int c, float val) +{ + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] += val; +} + +static float bilinear_interpolate(image im, float x, float y, int c) +{ + int ix = (int) floorf(x); + int iy = (int) floorf(y); + + float dx = x - ix; + float dy = y - iy; + + float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) + + dy * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + + (1-dy) * dx * get_pixel_extend(im, ix+1, iy, c) + + dy * dx * get_pixel_extend(im, ix+1, iy+1, c); + return val; +} + + +void composite_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float val = get_pixel(source, x, y, k); + float val2 = get_pixel_extend(dest, dx+x, dy+y, k); + set_pixel(dest, dx+x, dy+y, k, val * val2); + } + } + } +} + +image border_image(image a, int border) +{ + image b = make_image(a.w + 2*border, a.h + 2*border, a.c); + int x,y,k; + for(k = 0; k < b.c; ++k){ + for(y = 0; y < b.h; ++y){ + for(x = 0; x < b.w; ++x){ + float val = get_pixel_extend(a, x - border, y - border, k); + if(x - border < 0 || x - border >= a.w || y - border < 0 || y - border >= a.h) val = 1; + set_pixel(b, x, y, k, val); + } + } + } + return b; +} + +image tile_images(image a, image b, int dx) +{ + if(a.w == 0) return copy_image(b); + image c = make_image(a.w + b.w + dx, (a.h > b.h) ? a.h : b.h, (a.c > b.c) ? a.c : b.c); + fill_cpu(c.w*c.h*c.c, 1, c.data, 1); + embed_image(a, c, 0, 0); + composite_image(b, c, a.w + dx, 0); + return c; +} + +image get_label(image **characters, char *string, int size) +{ + size = size/10; + if(size > 7) size = 7; + image label = make_empty_image(0,0,0); + while(*string){ + image l = characters[size][(int)*string]; + image n = tile_images(label, l, -size - 1 + (size+1)/2); + free_image(label); + label = n; + ++string; + } + image b = border_image(label, label.h*.25); + free_image(label); + return b; +} + +void draw_label(image a, int r, int c, image label, const float *rgb) +{ + int w = label.w; + int h = label.h; + if (r - h >= 0) r = r - h; + + int i, j, k; + for(j = 0; j < h && j + r < a.h; ++j){ + for(i = 0; i < w && i + c < a.w; ++i){ + for(k = 0; k < label.c; ++k){ + float val = get_pixel(label, i, j, k); + set_pixel(a, i+c, j+r, k, rgb[k] * val); + } + } + } +} + +void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b) +{ + //normalize_image(a); + int i; + if(x1 < 0) x1 = 0; + if(x1 >= a.w) x1 = a.w-1; + if(x2 < 0) x2 = 0; + if(x2 >= a.w) x2 = a.w-1; + + if(y1 < 0) y1 = 0; + if(y1 >= a.h) y1 = a.h-1; + if(y2 < 0) y2 = 0; + if(y2 >= a.h) y2 = a.h-1; + + for(i = x1; i <= x2; ++i){ + a.data[i + y1*a.w + 0*a.w*a.h] = r; + a.data[i + y2*a.w + 0*a.w*a.h] = r; + + a.data[i + y1*a.w + 1*a.w*a.h] = g; + a.data[i + y2*a.w + 1*a.w*a.h] = g; + + a.data[i + y1*a.w + 2*a.w*a.h] = b; + a.data[i + y2*a.w + 2*a.w*a.h] = b; + } + for(i = y1; i <= y2; ++i){ + a.data[x1 + i*a.w + 0*a.w*a.h] = r; + a.data[x2 + i*a.w + 0*a.w*a.h] = r; + + a.data[x1 + i*a.w + 1*a.w*a.h] = g; + a.data[x2 + i*a.w + 1*a.w*a.h] = g; + + a.data[x1 + i*a.w + 2*a.w*a.h] = b; + a.data[x2 + i*a.w + 2*a.w*a.h] = b; + } +} + +void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b) +{ + int i; + for(i = 0; i < w; ++i){ + draw_box(a, x1+i, y1+i, x2-i, y2-i, r, g, b); + } +} + +void draw_bbox(image a, box bbox, int w, float r, float g, float b) +{ + int left = (bbox.x-bbox.w/2)*a.w; + int right = (bbox.x+bbox.w/2)*a.w; + int top = (bbox.y-bbox.h/2)*a.h; + int bot = (bbox.y+bbox.h/2)*a.h; + + int i; + for(i = 0; i < w; ++i){ + draw_box(a, left+i, top+i, right-i, bot-i, r, g, b); + } +} + +image **load_alphabet() +{ + char *value = getenv("UVMAsyncBench_BASE"); + int i, j; + const int nsize = 8; + image **alphabets = calloc(nsize, sizeof(image)); + for(j = 0; j < nsize; ++j){ + alphabets[j] = calloc(128, sizeof(image)); + for(i = 32; i < 127; ++i){ + char buff[256]; + sprintf(buff, "%s/workloads/realworld/standard/darknet/data/labels/%d_%d.png", value, i, j); + alphabets[j][i] = load_image_color(buff, 0, 0); + } + } + return alphabets; +} + +void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes) +{ + int i,j; + + for(i = 0; i < num; ++i){ + char labelstr[4096] = {0}; + int class_ = -1; + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j] > thresh){ + if (class_ < 0) { + strcat(labelstr, names[j]); + class_ = j; + } else { + strcat(labelstr, ", "); + strcat(labelstr, names[j]); + } + printf("%s: %.0f%%\n", names[j], dets[i].prob[j]*100); + } + } + if(class_ >= 0){ + int width = im.h * .006; + + /* + if(0){ + width = pow(prob, 1./2.)*10+1; + alphabet = 0; + } + */ + + //printf("%d %s: %.0f%%\n", i, names[class], prob*100); + int offset = class_*123457 % classes; + float red = get_color(2,offset,classes); + float green = get_color(1,offset,classes); + float blue = get_color(0,offset,classes); + float rgb[3]; + + //width = prob*20+2; + + rgb[0] = red; + rgb[1] = green; + rgb[2] = blue; + box b = dets[i].bbox; + //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + + int left = (b.x-b.w/2.)*im.w; + int right = (b.x+b.w/2.)*im.w; + int top = (b.y-b.h/2.)*im.h; + int bot = (b.y+b.h/2.)*im.h; + + if(left < 0) left = 0; + if(right > im.w-1) right = im.w-1; + if(top < 0) top = 0; + if(bot > im.h-1) bot = im.h-1; + + draw_box_width(im, left, top, right, bot, width, red, green, blue); + if (alphabet) { + image label = get_label(alphabet, labelstr, (im.h*.03)); + draw_label(im, top + width, left, label, rgb); + free_image(label); + } + if (dets[i].mask){ + image mask = float_to_image(14, 14, 1, dets[i].mask); + image resized_mask = resize_image(mask, b.w*im.w, b.h*im.h); + image tmask = threshold_image(resized_mask, .5); + embed_image(tmask, im, left, top); + free_image(mask); + free_image(resized_mask); + free_image(tmask); + } + } + } +} + +void transpose_image(image im) +{ + assert(im.w == im.h); + int n, m; + int c; + for(c = 0; c < im.c; ++c){ + for(n = 0; n < im.w-1; ++n){ + for(m = n + 1; m < im.w; ++m){ + float swap = im.data[m + im.w*(n + im.h*c)]; + im.data[m + im.w*(n + im.h*c)] = im.data[n + im.w*(m + im.h*c)]; + im.data[n + im.w*(m + im.h*c)] = swap; + } + } + } +} + +void rotate_image_cw(image im, int times) +{ + assert(im.w == im.h); + times = (times + 400) % 4; + int i, x, y, c; + int n = im.w; + for(i = 0; i < times; ++i){ + for(c = 0; c < im.c; ++c){ + for(x = 0; x < n/2; ++x){ + for(y = 0; y < (n-1)/2 + 1; ++y){ + float temp = im.data[y + im.w*(x + im.h*c)]; + im.data[y + im.w*(x + im.h*c)] = im.data[n-1-x + im.w*(y + im.h*c)]; + im.data[n-1-x + im.w*(y + im.h*c)] = im.data[n-1-y + im.w*(n-1-x + im.h*c)]; + im.data[n-1-y + im.w*(n-1-x + im.h*c)] = im.data[x + im.w*(n-1-y + im.h*c)]; + im.data[x + im.w*(n-1-y + im.h*c)] = temp; + } + } + } + } +} + +void flip_image(image a) +{ + int i,j,k; + for(k = 0; k < a.c; ++k){ + for(i = 0; i < a.h; ++i){ + for(j = 0; j < a.w/2; ++j){ + int index = j + a.w*(i + a.h*(k)); + int flip = (a.w - j - 1) + a.w*(i + a.h*(k)); + float swap = a.data[flip]; + a.data[flip] = a.data[index]; + a.data[index] = swap; + } + } + } +} + +image image_distance(image a, image b) +{ + int i,j; + image dist = make_image(a.w, a.h, 1); + for(i = 0; i < a.c; ++i){ + for(j = 0; j < a.h*a.w; ++j){ + dist.data[j] += pow(a.data[i*a.h*a.w+j]-b.data[i*a.h*a.w+j],2); + } + } + for(j = 0; j < a.h*a.w; ++j){ + dist.data[j] = sqrt(dist.data[j]); + } + return dist; +} + +void ghost_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + float max_dist = sqrt((-source.w/2. + .5)*(-source.w/2. + .5)); + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float dist = sqrt((x - source.w/2. + .5)*(x - source.w/2. + .5) + (y - source.h/2. + .5)*(y - source.h/2. + .5)); + float alpha = (1 - dist/max_dist); + if(alpha < 0) alpha = 0; + float v1 = get_pixel(source, x,y,k); + float v2 = get_pixel(dest, dx+x,dy+y,k); + float val = alpha*v1 + (1-alpha)*v2; + set_pixel(dest, dx+x, dy+y, k, val); + } + } + } +} + +void blocky_image(image im, int s) +{ + int i,j,k; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)]; + } + } + } +} + +void censor_image(image im, int dx, int dy, int w, int h) +{ + int i,j,k; + int s = 32; + if(dx < 0) dx = 0; + if(dy < 0) dy = 0; + + for(k = 0; k < im.c; ++k){ + for(j = dy; j < dy + h && j < im.h; ++j){ + for(i = dx; i < dx + w && i < im.w; ++i){ + im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)]; + //im.data[i + j*im.w + k*im.w*im.h] = 0; + } + } + } +} + +void embed_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float val = get_pixel(source, x,y,k); + set_pixel(dest, dx+x, dy+y, k, val); + } + } + } +} + +image collapse_image_layers(image source, int border) +{ + int h = source.h; + h = (h+border)*source.c - border; + image dest = make_image(source.w, h, 1); + int i; + for(i = 0; i < source.c; ++i){ + image layer = get_image_layer(source, i); + int h_offset = i*(source.h+border); + embed_image(layer, dest, 0, h_offset); + free_image(layer); + } + return dest; +} + +void constrain_image(image im) +{ + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + if(im.data[i] < 0) im.data[i] = 0; + if(im.data[i] > 1) im.data[i] = 1; + } +} + +void normalize_image(image p) +{ + int i; + float min = 9999999; + float max = -999999; + + for(i = 0; i < p.h*p.w*p.c; ++i){ + float v = p.data[i]; + if(v < min) min = v; + if(v > max) max = v; + } + if(max - min < .000000001){ + min = 0; + max = 1; + } + for(i = 0; i < p.c*p.w*p.h; ++i){ + p.data[i] = (p.data[i] - min)/(max-min); + } +} + +void normalize_image2(image p) +{ + float *min = calloc(p.c, sizeof(float)); + float *max = calloc(p.c, sizeof(float)); + int i,j; + for(i = 0; i < p.c; ++i) min[i] = max[i] = p.data[i*p.h*p.w]; + + for(j = 0; j < p.c; ++j){ + for(i = 0; i < p.h*p.w; ++i){ + float v = p.data[i+j*p.h*p.w]; + if(v < min[j]) min[j] = v; + if(v > max[j]) max[j] = v; + } + } + for(i = 0; i < p.c; ++i){ + if(max[i] - min[i] < .000000001){ + min[i] = 0; + max[i] = 1; + } + } + for(j = 0; j < p.c; ++j){ + for(i = 0; i < p.w*p.h; ++i){ + p.data[i+j*p.h*p.w] = (p.data[i+j*p.h*p.w] - min[j])/(max[j]-min[j]); + } + } + free(min); + free(max); +} + +void copy_image_into(image src, image dest) +{ + memcpy(dest.data, src.data, src.h*src.w*src.c*sizeof(float)); +} + +image copy_image(image p) +{ + image copy = p; + copy.data = calloc(p.h*p.w*p.c, sizeof(float)); + memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float)); + return copy; +} + +void rgbgr_image(image im) +{ + int i; + for(i = 0; i < im.w*im.h; ++i){ + float swap = im.data[i]; + im.data[i] = im.data[i+im.w*im.h*2]; + im.data[i+im.w*im.h*2] = swap; + } +} + +int show_image(image p, const char *name, int ms) +{ +#ifdef OPENCV + int c = show_image_cv(p, name, ms); + return c; +#else + fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name); + save_image(p, name); + return -1; +#endif +} + +void save_image_options(image im, const char *name, IMTYPE f, int quality) +{ + char buff[256]; + //sprintf(buff, "%s (%d)", name, windows); + if(f == PNG) sprintf(buff, "%s.png", name); + else if (f == BMP) sprintf(buff, "%s.bmp", name); + else if (f == TGA) sprintf(buff, "%s.tga", name); + else if (f == JPG) sprintf(buff, "%s.jpg", name); + else sprintf(buff, "%s.png", name); + unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char)); + int i,k; + for(k = 0; k < im.c; ++k){ + for(i = 0; i < im.w*im.h; ++i){ + data[i*im.c+k] = (unsigned char) (255*im.data[i + k*im.w*im.h]); + } + } + int success = 0; + if(f == PNG) success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c); + else if (f == BMP) success = stbi_write_bmp(buff, im.w, im.h, im.c, data); + else if (f == TGA) success = stbi_write_tga(buff, im.w, im.h, im.c, data); + else if (f == JPG) success = stbi_write_jpg(buff, im.w, im.h, im.c, data, quality); + free(data); + if(!success) fprintf(stderr, "Failed to write image %s\n", buff); +} + +void save_image(image im, const char *name) +{ + save_image_options(im, name, JPG, 80); +} + +void show_image_layers(image p, char *name) +{ + int i; + char buff[256]; + for(i = 0; i < p.c; ++i){ + sprintf(buff, "%s - Layer %d", name, i); + image layer = get_image_layer(p, i); + show_image(layer, buff, 1); + free_image(layer); + } +} + +void show_image_collapsed(image p, char *name) +{ + image c = collapse_image_layers(p, 1); + show_image(c, name, 1); + free_image(c); +} + +image make_empty_image(int w, int h, int c) +{ + image out; + out.data = 0; + out.h = h; + out.w = w; + out.c = c; + return out; +} + +image make_image(int w, int h, int c) +{ + image out = make_empty_image(w,h,c); + out.data = calloc(h*w*c, sizeof(float)); + return out; +} + +image make_random_image(int w, int h, int c) +{ + image out = make_empty_image(w,h,c); + out.data = calloc(h*w*c, sizeof(float)); + int i; + for(i = 0; i < w*h*c; ++i){ + out.data[i] = (rand_normal() * .25) + .5; + } + return out; +} + +image float_to_image(int w, int h, int c, float *data) +{ + image out = make_empty_image(w,h,c); + out.data = data; + return out; +} + +void place_image(image im, int w, int h, int dx, int dy, image canvas) +{ + int x, y, c; + for(c = 0; c < im.c; ++c){ + for(y = 0; y < h; ++y){ + for(x = 0; x < w; ++x){ + float rx = ((float)x / w) * im.w; + float ry = ((float)y / h) * im.h; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(canvas, x + dx, y + dy, c, val); + } + } + } +} + +image center_crop_image(image im, int w, int h) +{ + int m = (im.w < im.h) ? im.w : im.h; + image c = crop_image(im, (im.w - m) / 2, (im.h - m)/2, m, m); + image r = resize_image(c, w, h); + free_image(c); + return r; +} + +image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect) +{ + int x, y, c; + float cx = im.w/2.; + float cy = im.h/2.; + image rot = make_image(w, h, im.c); + for(c = 0; c < im.c; ++c){ + for(y = 0; y < h; ++y){ + for(x = 0; x < w; ++x){ + float rx = cos(rad)*((x - w/2.)/s*aspect + dx/s*aspect) - sin(rad)*((y - h/2.)/s + dy/s) + cx; + float ry = sin(rad)*((x - w/2.)/s*aspect + dx/s*aspect) + cos(rad)*((y - h/2.)/s + dy/s) + cy; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(rot, x, y, c, val); + } + } + } + return rot; +} + +image rotate_image(image im, float rad) +{ + int x, y, c; + float cx = im.w/2.; + float cy = im.h/2.; + image rot = make_image(im.w, im.h, im.c); + for(c = 0; c < im.c; ++c){ + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx; + float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(rot, x, y, c, val); + } + } + } + return rot; +} + +void fill_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] = s; +} + +void translate_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s; +} + +void scale_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s; +} + +image crop_image(image im, int dx, int dy, int w, int h) +{ + image cropped = make_image(w, h, im.c); + int i, j, k; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int r = j + dy; + int c = i + dx; + float val = 0; + r = constrain_int(r, 0, im.h-1); + c = constrain_int(c, 0, im.w-1); + val = get_pixel(im, c, r, k); + set_pixel(cropped, i, j, k, val); + } + } + } + return cropped; +} + +int best_3d_shift_r(image a, image b, int min, int max) +{ + if(min == max) return min; + int mid = floor((min + max) / 2.); + image c1 = crop_image(b, 0, mid, b.w, b.h); + image c2 = crop_image(b, 0, mid+1, b.w, b.h); + float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 10); + float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 10); + free_image(c1); + free_image(c2); + if(d1 < d2) return best_3d_shift_r(a, b, min, mid); + else return best_3d_shift_r(a, b, mid+1, max); +} + +int best_3d_shift(image a, image b, int min, int max) +{ + int i; + int best = 0; + float best_distance = FLT_MAX; + for(i = min; i <= max; i += 2){ + image c = crop_image(b, 0, i, b.w, b.h); + float d = dist_array(c.data, a.data, a.w*a.h*a.c, 100); + if(d < best_distance){ + best_distance = d; + best = i; + } + printf("%d %f\n", i, d); + free_image(c); + } + return best; +} + +void composite_3d(char *f1, char *f2, char *out, int delta) +{ + if(!out) out = "out"; + image a = load_image(f1, 0,0,0); + image b = load_image(f2, 0,0,0); + int shift = best_3d_shift_r(a, b, -a.h/100, a.h/100); + + image c1 = crop_image(b, 10, shift, b.w, b.h); + float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 100); + image c2 = crop_image(b, -10, shift, b.w, b.h); + float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 100); + + if(d2 < d1 && 0){ + image swap = a; + a = b; + b = swap; + shift = -shift; + printf("swapped, %d\n", shift); + } + else{ + printf("%d\n", shift); + } + + image c = crop_image(b, delta, shift, a.w, a.h); + int i; + for(i = 0; i < c.w*c.h; ++i){ + c.data[i] = a.data[i]; + } + save_image(c, out); +} + +void letterbox_image_into(image im, int w, int h, image boxed) +{ + int new_w = im.w; + int new_h = im.h; + if (((float)w/im.w) < ((float)h/im.h)) { + new_w = w; + new_h = (im.h * w)/im.w; + } else { + new_h = h; + new_w = (im.w * h)/im.h; + } + image resized = resize_image(im, new_w, new_h); + embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); + free_image(resized); +} + +image letterbox_image(image im, int w, int h) +{ + int new_w = im.w; + int new_h = im.h; + if (((float)w/im.w) < ((float)h/im.h)) { + new_w = w; + new_h = (im.h * w)/im.w; + } else { + new_h = h; + new_w = (im.w * h)/im.h; + } + image resized = resize_image(im, new_w, new_h); + image boxed = make_image(w, h, im.c); + fill_image(boxed, .5); + //int i; + //for(i = 0; i < boxed.w*boxed.h*boxed.c; ++i) boxed.data[i] = 0; + embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); + free_image(resized); + return boxed; +} + +image resize_max(image im, int max) +{ + int w = im.w; + int h = im.h; + if(w > h){ + h = (h * max) / w; + w = max; + } else { + w = (w * max) / h; + h = max; + } + if(w == im.w && h == im.h) return im; + image resized = resize_image(im, w, h); + return resized; +} + +image resize_min(image im, int min) +{ + int w = im.w; + int h = im.h; + if(w < h){ + h = (h * min) / w; + w = min; + } else { + w = (w * min) / h; + h = min; + } + if(w == im.w && h == im.h) return im; + image resized = resize_image(im, w, h); + return resized; +} + +image random_crop_image(image im, int w, int h) +{ + int dx = rand_int(0, im.w - w); + int dy = rand_int(0, im.h - h); + image crop = crop_image(im, dx, dy, w, h); + return crop; +} + +augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h) +{ + augment_args a = {0}; + aspect = rand_scale(aspect); + int r = rand_int(low, high); + int min = (im.h < im.w*aspect) ? im.h : im.w*aspect; + float scale = (float)r / min; + + float rad = rand_uniform(-angle, angle) * TWO_PI / 360.; + + float dx = (im.w*scale/aspect - w) / 2.; + float dy = (im.h*scale - w) / 2.; + //if(dx < 0) dx = 0; + //if(dy < 0) dy = 0; + dx = rand_uniform(-dx, dx); + dy = rand_uniform(-dy, dy); + + a.rad = rad; + a.scale = scale; + a.w = w; + a.h = h; + a.dx = dx; + a.dy = dy; + a.aspect = aspect; + return a; +} + +image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h) +{ + augment_args a = random_augment_args(im, angle, aspect, low, high, w, h); + image crop = rotate_crop_image(im, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + return crop; +} + +float three_way_max(float a, float b, float c) +{ + return (a > b) ? ( (a > c) ? a : c) : ( (b > c) ? b : c) ; +} + +float three_way_min(float a, float b, float c) +{ + return (a < b) ? ( (a < c) ? a : c) : ( (b < c) ? b : c) ; +} + +void yuv_to_rgb(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float y, u, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + y = get_pixel(im, i , j, 0); + u = get_pixel(im, i , j, 1); + v = get_pixel(im, i , j, 2); + + r = y + 1.13983*v; + g = y + -.39465*u + -.58060*v; + b = y + 2.03211*u; + + set_pixel(im, i, j, 0, r); + set_pixel(im, i, j, 1, g); + set_pixel(im, i, j, 2, b); + } + } +} + +void rgb_to_yuv(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float y, u, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + r = get_pixel(im, i , j, 0); + g = get_pixel(im, i , j, 1); + b = get_pixel(im, i , j, 2); + + y = .299*r + .587*g + .114*b; + u = -.14713*r + -.28886*g + .436*b; + v = .615*r + -.51499*g + -.10001*b; + + set_pixel(im, i, j, 0, y); + set_pixel(im, i, j, 1, u); + set_pixel(im, i, j, 2, v); + } + } +} + +// http://www.cs.rit.edu/~ncs/color/t_convert.html +void rgb_to_hsv(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float h, s, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + r = get_pixel(im, i , j, 0); + g = get_pixel(im, i , j, 1); + b = get_pixel(im, i , j, 2); + float max = three_way_max(r,g,b); + float min = three_way_min(r,g,b); + float delta = max - min; + v = max; + if(max == 0){ + s = 0; + h = 0; + }else{ + s = delta/max; + if(r == max){ + h = (g - b) / delta; + } else if (g == max) { + h = 2 + (b - r) / delta; + } else { + h = 4 + (r - g) / delta; + } + if (h < 0) h += 6; + h = h/6.; + } + set_pixel(im, i, j, 0, h); + set_pixel(im, i, j, 1, s); + set_pixel(im, i, j, 2, v); + } + } +} + +void hsv_to_rgb(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float h, s, v; + float f, p, q, t; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + h = 6 * get_pixel(im, i , j, 0); + s = get_pixel(im, i , j, 1); + v = get_pixel(im, i , j, 2); + if (s == 0) { + r = g = b = v; + } else { + int index = floor(h); + f = h - index; + p = v*(1-s); + q = v*(1-s*f); + t = v*(1-s*(1-f)); + if(index == 0){ + r = v; g = t; b = p; + } else if(index == 1){ + r = q; g = v; b = p; + } else if(index == 2){ + r = p; g = v; b = t; + } else if(index == 3){ + r = p; g = q; b = v; + } else if(index == 4){ + r = t; g = p; b = v; + } else { + r = v; g = p; b = q; + } + } + set_pixel(im, i, j, 0, r); + set_pixel(im, i, j, 1, g); + set_pixel(im, i, j, 2, b); + } + } +} + +void grayscale_image_3c(image im) +{ + assert(im.c == 3); + int i, j, k; + float scale[] = {0.299, 0.587, 0.114}; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float val = 0; + for(k = 0; k < 3; ++k){ + val += scale[k]*get_pixel(im, i, j, k); + } + im.data[0*im.h*im.w + im.w*j + i] = val; + im.data[1*im.h*im.w + im.w*j + i] = val; + im.data[2*im.h*im.w + im.w*j + i] = val; + } + } +} + +image grayscale_image(image im) +{ + assert(im.c == 3); + int i, j, k; + image gray = make_image(im.w, im.h, 1); + float scale[] = {0.299, 0.587, 0.114}; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + gray.data[i+im.w*j] += scale[k]*get_pixel(im, i, j, k); + } + } + } + return gray; +} + +image threshold_image(image im, float thresh) +{ + int i; + image t = make_image(im.w, im.h, im.c); + for(i = 0; i < im.w*im.h*im.c; ++i){ + t.data[i] = im.data[i]>thresh ? 1 : 0; + } + return t; +} + +image blend_image(image fore, image back, float alpha) +{ + assert(fore.w == back.w && fore.h == back.h && fore.c == back.c); + image blend = make_image(fore.w, fore.h, fore.c); + int i, j, k; + for(k = 0; k < fore.c; ++k){ + for(j = 0; j < fore.h; ++j){ + for(i = 0; i < fore.w; ++i){ + float val = alpha * get_pixel(fore, i, j, k) + + (1 - alpha)* get_pixel(back, i, j, k); + set_pixel(blend, i, j, k, val); + } + } + } + return blend; +} + +void scale_image_channel(image im, int c, float v) +{ + int i, j; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float pix = get_pixel(im, i, j, c); + pix = pix*v; + set_pixel(im, i, j, c, pix); + } + } +} + +void translate_image_channel(image im, int c, float v) +{ + int i, j; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float pix = get_pixel(im, i, j, c); + pix = pix+v; + set_pixel(im, i, j, c, pix); + } + } +} + +image binarize_image(image im) +{ + image c = copy_image(im); + int i; + for(i = 0; i < im.w * im.h * im.c; ++i){ + if(c.data[i] > .5) c.data[i] = 1; + else c.data[i] = 0; + } + return c; +} + +void saturate_image(image im, float sat) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + hsv_to_rgb(im); + constrain_image(im); +} + +void hue_image(image im, float hue) +{ + rgb_to_hsv(im); + int i; + for(i = 0; i < im.w*im.h; ++i){ + im.data[i] = im.data[i] + hue; + if (im.data[i] > 1) im.data[i] -= 1; + if (im.data[i] < 0) im.data[i] += 1; + } + hsv_to_rgb(im); + constrain_image(im); +} + +void exposure_image(image im, float sat) +{ + rgb_to_hsv(im); + scale_image_channel(im, 2, sat); + hsv_to_rgb(im); + constrain_image(im); +} + +void distort_image(image im, float hue, float sat, float val) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + scale_image_channel(im, 2, val); + int i; + for(i = 0; i < im.w*im.h; ++i){ + im.data[i] = im.data[i] + hue; + if (im.data[i] > 1) im.data[i] -= 1; + if (im.data[i] < 0) im.data[i] += 1; + } + hsv_to_rgb(im); + constrain_image(im); +} + +void random_distort_image(image im, float hue, float saturation, float exposure) +{ + float dhue = rand_uniform(-hue, hue); + float dsat = rand_scale(saturation); + float dexp = rand_scale(exposure); + distort_image(im, dhue, dsat, dexp); +} + +void saturate_exposure_image(image im, float sat, float exposure) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + scale_image_channel(im, 2, exposure); + hsv_to_rgb(im); + constrain_image(im); +} + +image resize_image(image im, int w, int h) +{ + image resized = make_image(w, h, im.c); + image part = make_image(w, im.h, im.c); + int r, c, k; + float w_scale = (float)(im.w - 1) / (w - 1); + float h_scale = (float)(im.h - 1) / (h - 1); + for(k = 0; k < im.c; ++k){ + for(r = 0; r < im.h; ++r){ + for(c = 0; c < w; ++c){ + float val = 0; + if(c == w-1 || im.w == 1){ + val = get_pixel(im, im.w-1, r, k); + } else { + float sx = c*w_scale; + int ix = (int) sx; + float dx = sx - ix; + val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k); + } + set_pixel(part, c, r, k, val); + } + } + } + for(k = 0; k < im.c; ++k){ + for(r = 0; r < h; ++r){ + float sy = r*h_scale; + int iy = (int) sy; + float dy = sy - iy; + for(c = 0; c < w; ++c){ + float val = (1-dy) * get_pixel(part, c, iy, k); + set_pixel(resized, c, r, k, val); + } + if(r == h-1 || im.h == 1) continue; + for(c = 0; c < w; ++c){ + float val = dy * get_pixel(part, c, iy+1, k); + add_pixel(resized, c, r, k, val); + } + } + } + + free_image(part); + return resized; +} + + +void test_resize(char *filename) +{ + image im = load_image(filename, 0,0, 3); + float mag = mag_array(im.data, im.w*im.h*im.c); + printf("L2 Norm: %f\n", mag); + image gray = grayscale_image(im); + + image c1 = copy_image(im); + image c2 = copy_image(im); + image c3 = copy_image(im); + image c4 = copy_image(im); + distort_image(c1, .1, 1.5, 1.5); + distort_image(c2, -.1, .66666, .66666); + distort_image(c3, .1, 1.5, .66666); + distort_image(c4, .1, .66666, 1.5); + + + show_image(im, "Original", 1); + show_image(gray, "Gray", 1); + show_image(c1, "C1", 1); + show_image(c2, "C2", 1); + show_image(c3, "C3", 1); + show_image(c4, "C4", 1); +#ifdef OPENCV + while(1){ + image aug = random_augment_image(im, 0, .75, 320, 448, 320, 320); + show_image(aug, "aug", 1); + free_image(aug); + + + float exposure = 1.15; + float saturation = 1.15; + float hue = .05; + + image c = copy_image(im); + + float dexp = rand_scale(exposure); + float dsat = rand_scale(saturation); + float dhue = rand_uniform(-hue, hue); + + distort_image(c, dhue, dsat, dexp); + show_image(c, "rand", 1); + printf("%f %f %f\n", dhue, dsat, dexp); + free_image(c); + } +#endif +} + + +image load_image_stb(char *filename, int channels) +{ + int w, h, c; + unsigned char *data = stbi_load(filename, &w, &h, &c, channels); + if (!data) { + fprintf(stderr, "Cannot load image \"%s\"\nSTB Reason: %s\n", filename, stbi_failure_reason()); + exit(0); + } + if(channels) c = channels; + int i,j,k; + image im = make_image(w, h, c); + for(k = 0; k < c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int dst_index = i + w*j + w*h*k; + int src_index = k + c*i + c*w*j; + im.data[dst_index] = (float)data[src_index]/255.; + } + } + } + free(data); + return im; +} + +image load_image(char *filename, int w, int h, int c) +{ +#ifdef OPENCV + image out = load_image_cv(filename, c); +#else + image out = load_image_stb(filename, c); +#endif + + if((h && w) && (h != out.h || w != out.w)){ + image resized = resize_image(out, w, h); + free_image(out); + out = resized; + } + return out; +} + +image load_image_color(char *filename, int w, int h) +{ + return load_image(filename, w, h, 3); +} + +image get_image_layer(image m, int l) +{ + image out = make_image(m.w, m.h, 1); + int i; + for(i = 0; i < m.h*m.w; ++i){ + out.data[i] = m.data[i+l*m.h*m.w]; + } + return out; +} +void print_image(image m) +{ + int i, j, k; + for(i =0 ; i < m.c; ++i){ + for(j =0 ; j < m.h; ++j){ + for(k = 0; k < m.w; ++k){ + printf("%.2lf, ", m.data[i*m.h*m.w + j*m.w + k]); + if(k > 30) break; + } + printf("\n"); + if(j > 30) break; + } + printf("\n"); + } + printf("\n"); +} + +image collapse_images_vert(image *ims, int n) +{ + int color = 1; + int border = 1; + int h,w,c; + w = ims[0].w; + h = (ims[0].h + border) * n - border; + c = ims[0].c; + if(c != 3 || !color){ + w = (w+border)*c - border; + c = 1; + } + + image filters = make_image(w, h, c); + int i,j; + for(i = 0; i < n; ++i){ + int h_offset = i*(ims[0].h+border); + image copy = copy_image(ims[i]); + //normalize_image(copy); + if(c == 3 && color){ + embed_image(copy, filters, 0, h_offset); + } + else{ + for(j = 0; j < copy.c; ++j){ + int w_offset = j*(ims[0].w+border); + image layer = get_image_layer(copy, j); + embed_image(layer, filters, w_offset, h_offset); + free_image(layer); + } + } + free_image(copy); + } + return filters; +} + +image collapse_images_horz(image *ims, int n) +{ + int color = 1; + int border = 1; + int h,w,c; + int size = ims[0].h; + h = size; + w = (ims[0].w + border) * n - border; + c = ims[0].c; + if(c != 3 || !color){ + h = (h+border)*c - border; + c = 1; + } + + image filters = make_image(w, h, c); + int i,j; + for(i = 0; i < n; ++i){ + int w_offset = i*(size+border); + image copy = copy_image(ims[i]); + //normalize_image(copy); + if(c == 3 && color){ + embed_image(copy, filters, w_offset, 0); + } + else{ + for(j = 0; j < copy.c; ++j){ + int h_offset = j*(size+border); + image layer = get_image_layer(copy, j); + embed_image(layer, filters, w_offset, h_offset); + free_image(layer); + } + } + free_image(copy); + } + return filters; +} + +void show_image_normalized(image im, const char *name) +{ + image c = copy_image(im); + normalize_image(c); + show_image(c, name, 1); + free_image(c); +} + +void show_images(image *ims, int n, char *window) +{ + image m = collapse_images_vert(ims, n); + /* + int w = 448; + int h = ((float)m.h/m.w) * 448; + if(h > 896){ + h = 896; + w = ((float)m.w/m.h) * 896; + } + image sized = resize_image(m, w, h); + */ + normalize_image(m); + save_image(m, window); + show_image(m, window, 1); + free_image(m); +} + +void free_image(image m) +{ + if(m.data){ + free(m.data); + } +} diff --git a/workloads/realworld/pipeline/darknet/src/image.h b/workloads/realworld/pipeline/darknet/src/image.h new file mode 100644 index 0000000000000000000000000000000000000000..3392bb9787fc542929cda064bcefa0f3f893b76c --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/image.h @@ -0,0 +1,69 @@ +#ifndef IMAGE_H +#define IMAGE_H + +#include +#include +#include +#include +#include +#include "box.h" +#include "darknet.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef OPENCV +void *open_video_stream(const char *f, int c, int w, int h, int fps); +image get_image_from_stream(void *p); +image load_image_cv(char *filename, int channels); +int show_image_cv(image im, const char* name, int ms); +#endif + +float get_color(int c, int x, int max); +void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); +void draw_bbox(image a, box bbox, int w, float r, float g, float b); +void write_label(image a, int r, int c, image *characters, char *string, float *rgb); +image image_distance(image a, image b); +void scale_image(image m, float s); +image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect); +image random_crop_image(image im, int w, int h); +image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h); +augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h); +void letterbox_image_into(image im, int w, int h, image boxed); +image resize_max(image im, int max); +void translate_image(image m, float s); +void embed_image(image source, image dest, int dx, int dy); +void place_image(image im, int w, int h, int dx, int dy, image canvas); +void saturate_image(image im, float sat); +void exposure_image(image im, float sat); +void distort_image(image im, float hue, float sat, float val); +void saturate_exposure_image(image im, float sat, float exposure); +void rgb_to_hsv(image im); +void hsv_to_rgb(image im); +void yuv_to_rgb(image im); +void rgb_to_yuv(image im); + + +image collapse_image_layers(image source, int border); +image collapse_images_horz(image *ims, int n); +image collapse_images_vert(image *ims, int n); + +void show_image_normalized(image im, const char *name); +void show_images(image *ims, int n, char *window); +void show_image_layers(image p, char *name); +void show_image_collapsed(image p, char *name); + +void print_image(image m); + +image make_empty_image(int w, int h, int c); +void copy_image_into(image src, image dest); + +image get_image_layer(image m, int l); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/image_opencv.cpp b/workloads/realworld/pipeline/darknet/src/image_opencv.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7511280be07ca987fd51fa54aea55910cd34a706 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/image_opencv.cpp @@ -0,0 +1,135 @@ +#ifdef OPENCV + +#include "stdio.h" +#include "stdlib.h" +#include "opencv2/opencv.hpp" +#include "image.h" + +using namespace cv; + +extern "C" { + +IplImage *image_to_ipl(image im) +{ + int x,y,c; + IplImage *disp = cvCreateImage(cvSize(im.w,im.h), IPL_DEPTH_8U, im.c); + int step = disp->widthStep; + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + for(c= 0; c < im.c; ++c){ + float val = im.data[c*im.h*im.w + y*im.w + x]; + disp->imageData[y*step + x*im.c + c] = (unsigned char)(val*255); + } + } + } + return disp; +} + +image ipl_to_image(IplImage* src) +{ + int h = src->height; + int w = src->width; + int c = src->nChannels; + image im = make_image(w, h, c); + unsigned char *data = (unsigned char *)src->imageData; + int step = src->widthStep; + int i, j, k; + + for(i = 0; i < h; ++i){ + for(k= 0; k < c; ++k){ + for(j = 0; j < w; ++j){ + im.data[k*w*h + i*w + j] = data[i*step + j*c + k]/255.; + } + } + } + return im; +} + +Mat image_to_mat(image im) +{ + image copy = copy_image(im); + constrain_image(copy); + if(im.c == 3) rgbgr_image(copy); + + IplImage *ipl = image_to_ipl(copy); + Mat m = cvarrToMat(ipl, true); + cvReleaseImage(&ipl); + free_image(copy); + return m; +} + +image mat_to_image(Mat m) +{ + IplImage ipl = m; + image im = ipl_to_image(&ipl); + rgbgr_image(im); + return im; +} + +void *open_video_stream(const char *f, int c, int w, int h, int fps) +{ + VideoCapture *cap; + if(f) cap = new VideoCapture(f); + else cap = new VideoCapture(c); + if(!cap->isOpened()) return 0; + if(w) cap->set(CV_CAP_PROP_FRAME_WIDTH, w); + if(h) cap->set(CV_CAP_PROP_FRAME_HEIGHT, w); + if(fps) cap->set(CV_CAP_PROP_FPS, w); + return (void *) cap; +} + +image get_image_from_stream(void *p) +{ + VideoCapture *cap = (VideoCapture *)p; + Mat m; + *cap >> m; + if(m.empty()) return make_empty_image(0,0,0); + return mat_to_image(m); +} + +image load_image_cv(char *filename, int channels) +{ + int flag = -1; + if (channels == 0) flag = -1; + else if (channels == 1) flag = 0; + else if (channels == 3) flag = 1; + else { + fprintf(stderr, "OpenCV can't force load with %d channels\n", channels); + } + Mat m; + m = imread(filename, flag); + if(!m.data){ + fprintf(stderr, "Cannot load image \"%s\"\n", filename); + char buff[256]; + sprintf(buff, "echo %s >> bad.list", filename); + system(buff); + return make_image(10,10,3); + //exit(0); + } + image im = mat_to_image(m); + return im; +} + +int show_image_cv(image im, const char* name, int ms) +{ + Mat m = image_to_mat(im); + imshow(name, m); + int c = waitKey(ms); + if (c != -1) c = c%256; + return c; +} + +void make_window(char *name, int w, int h, int fullscreen) +{ + namedWindow(name, WINDOW_NORMAL); + if (fullscreen) { + setWindowProperty(name, CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); + } else { + resizeWindow(name, w, h); + if(strcmp(name, "Demo") == 0) moveWindow(name, 0, 0); + } +} + +} + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/iseg_layer.c b/workloads/realworld/pipeline/darknet/src/iseg_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..f4cbd81d92544b0651bace682ecb6c6f629a4b3c --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/iseg_layer.c @@ -0,0 +1,225 @@ +#include "iseg_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer* make_iseg_layer(int batch, int w, int h, int classes, int ids) +{ + layer *l = calloc(1, sizeof(layer)); + l->type = ISEG; + + l->h = h; + l->w = w; + l->c = classes + ids; + l->out_w = l->w; + l->out_h = l->h; + l->out_c = l->c; + l->classes = classes; + l->batch = batch; + l->extra = ids; + l->cost = calloc(1, sizeof(float)); + l->outputs = h*w*l->c; + l->inputs = l->outputs; + l->truths = 90*(l->w*l->h+1); + l->delta = calloc(batch*l->outputs, sizeof(float)); + l->output = calloc(batch*l->outputs, sizeof(float)); + + l->counts = calloc(90, sizeof(int)); + l->sums = calloc(90, sizeof(float*)); + if(ids){ + int i; + for(i = 0; i < 90; ++i){ + l->sums[i] = calloc(ids, sizeof(float)); + } + } + + l->forward = forward_iseg_layer; + l->backward = backward_iseg_layer; +#ifdef GPU + l->forward_gpu = forward_iseg_layer_gpu; + l->backward_gpu = backward_iseg_layer_gpu; + l->output_gpu = cuda_make_array(l->output, batch*l->outputs); + l->delta_gpu = cuda_make_array(l->delta, batch*l->outputs); +#endif + + fprintf(stderr, "iseg\n"); + srand(0); + + return l; +} + +void resize_iseg_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->c; + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +void forward_iseg_layer(layer *l, network net) +{ + + double time = what_time_is_it_now(); + int i,b,j,k; + int ids = l->extra; + memcpy(l->output, net.input, l->outputs*l->batch*sizeof(float)); + memset(l->delta, 0, l->outputs * l->batch * sizeof(float)); + +#ifndef GPU + for (b = 0; b < l->batch; ++b){ + int index = b*l->outputs; + activate_array(l->output + index, l->classes*l->w*l->h, LOGISTIC); + } +#endif + + for (b = 0; b < l->batch; ++b){ + // a priori, each pixel has no class + for(i = 0; i < l->classes; ++i){ + for(k = 0; k < l->w*l->h; ++k){ + int index = b*l->outputs + i*l->w*l->h + k; + l->delta[index] = 0 - l->output[index]; + } + } + + // a priori, embedding should be small magnitude + for(i = 0; i < ids; ++i){ + for(k = 0; k < l->w*l->h; ++k){ + int index = b*l->outputs + (i+l->classes)*l->w*l->h + k; + l->delta[index] = .1 * (0 - l->output[index]); + } + } + + + memset(l->counts, 0, 90*sizeof(int)); + for(i = 0; i < 90; ++i){ + fill_cpu(ids, 0, l->sums[i], 1); + + int c = net.truth[b*l->truths + i*(l->w*l->h+1)]; + if(c < 0) break; + // add up metric embeddings for each instance + for(k = 0; k < l->w*l->h; ++k){ + int index = b*l->outputs + c*l->w*l->h + k; + float v = net.truth[b*l->truths + i*(l->w*l->h + 1) + 1 + k]; + if(v){ + l->delta[index] = v - l->output[index]; + axpy_cpu(ids, 1, l->output + b*l->outputs + l->classes*l->w*l->h + k, l->w*l->h, l->sums[i], 1); + ++l->counts[i]; + } + } + } + + float *mse = calloc(90, sizeof(float)); + for(i = 0; i < 90; ++i){ + int c = net.truth[b*l->truths + i*(l->w*l->h+1)]; + if(c < 0) break; + for(k = 0; k < l->w*l->h; ++k){ + float v = net.truth[b*l->truths + i*(l->w*l->h + 1) + 1 + k]; + if(v){ + int z; + float sum = 0; + for(z = 0; z < ids; ++z){ + int index = b*l->outputs + (l->classes + z)*l->w*l->h + k; + sum += pow(l->sums[i][z]/l->counts[i] - l->output[index], 2); + } + mse[i] += sum; + } + } + mse[i] /= l->counts[i]; + } + + // Calculate average embedding + for(i = 0; i < 90; ++i){ + if(!l->counts[i]) continue; + scal_cpu(ids, 1.f/l->counts[i], l->sums[i], 1); + if(b == 0 && net.gpu_index == 0){ + printf("%4d, %6.3f, ", l->counts[i], mse[i]); + for(j = 0; j < ids; ++j){ + printf("%6.3f,", l->sums[i][j]); + } + printf("\n"); + } + } + free(mse); + + // Calculate embedding loss + for(i = 0; i < 90; ++i){ + if(!l->counts[i]) continue; + for(k = 0; k < l->w*l->h; ++k){ + float v = net.truth[b*l->truths + i*(l->w*l->h + 1) + 1 + k]; + if(v){ + for(j = 0; j < 90; ++j){ + if(!l->counts[j])continue; + int z; + for(z = 0; z < ids; ++z){ + int index = b*l->outputs + (l->classes + z)*l->w*l->h + k; + float diff = l->sums[j][z] - l->output[index]; + if (j == i) l->delta[index] += diff < 0? -.1 : .1; + else l->delta[index] += -(diff < 0? -.1 : .1); + } + } + } + } + } + + for(i = 0; i < ids; ++i){ + for(k = 0; k < l->w*l->h; ++k){ + int index = b*l->outputs + (i+l->classes)*l->w*l->h + k; + l->delta[index] *= .01; + } + } + } + + *(l->cost) = pow(mag_array(l->delta, l->outputs * l->batch), 2); + printf("took %lf sec\n", what_time_is_it_now() - time); +} + +void backward_iseg_layer(layer *l, network net) +{ + axpy_cpu(l->batch*l->inputs, 1, l->delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_iseg_layer_gpu(layer *l, network net) +{ + copy_gpu(l->batch*l->inputs, net.input_gpu, 1, l->output_gpu, 1, net.streams[l->stream_index]); + int b; + for (b = 0; b < l->batch; ++b){ + activate_array_gpu(l->output_gpu + b*l->outputs, l->classes*l->w*l->h, LOGISTIC, net.streams[l->stream_index]); + //if(l->extra) activate_array_gpu(l->output_gpu + b*l->outputs + l->classes*l->w*l->h, l->extra*l->w*l->h, LOGISTIC); + } + + cuda_pull_array(l->output_gpu, net.input, l->batch*l->inputs); + forward_iseg_layer(l, net); + cuda_push_array(l->delta_gpu, l->delta, l->batch*l->outputs); +} + +void backward_iseg_layer_gpu(layer *l, network net) +{ + int b; + for (b = 0; b < l->batch; ++b){ + //if(l->extra) gradient_array_gpu(l->output_gpu + b*l->outputs + l->classes*l->w*l->h, l->extra*l->w*l->h, LOGISTIC, l->delta_gpu + b*l->outputs + l->classes*l->w*l->h); + } + axpy_gpu(l->batch*l->inputs, 1, l->delta_gpu, 1, net.delta_gpu, 1, net.streams[l->stream_index]); +} +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/iseg_layer.h b/workloads/realworld/pipeline/darknet/src/iseg_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..e4ab138b5452c72c740b420cc0851201e9bbec1a --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/iseg_layer.h @@ -0,0 +1,19 @@ +#ifndef ISEG_LAYER_H +#define ISEG_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer* make_iseg_layer(int batch, int w, int h, int classes, int ids); +void forward_iseg_layer(layer *l, network net); +void backward_iseg_layer(layer *l, network net); +void resize_iseg_layer(layer *l, int w, int h); +int iseg_num_detections(layer *l, float thresh); + +#ifdef GPU +void forward_iseg_layer_gpu(layer *l, network net); +void backward_iseg_layer_gpu(layer *l, network net); +#endif + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/l2norm_layer.c b/workloads/realworld/pipeline/darknet/src/l2norm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..137ffb7cf3de56ca0739584a8759bbf70f370873 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/l2norm_layer.c @@ -0,0 +1,63 @@ +#include "l2norm_layer.h" +#include "activations.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +layer* make_l2norm_layer(int batch, int inputs) +{ + fprintf(stderr, "l2norm %4d\n", inputs); + layer *l = calloc(1, sizeof(layer)); + l->type = L2NORM; + l->batch = batch; + l->inputs = inputs; + l->outputs = inputs; + l->output = calloc(inputs*batch, sizeof(float)); + l->scales = calloc(inputs*batch, sizeof(float)); + l->delta = calloc(inputs*batch, sizeof(float)); + + l->forward = forward_l2norm_layer; + l->backward = backward_l2norm_layer; + #ifdef GPU + l->forward_gpu = forward_l2norm_layer_gpu; + l->backward_gpu = backward_l2norm_layer_gpu; + + l->output_gpu = cuda_make_array(l->output, inputs*batch); + l->scales_gpu = cuda_make_array(l->output, inputs*batch); + l->delta_gpu = cuda_make_array(l->delta, inputs*batch); + #endif + return l; +} + +void forward_l2norm_layer(layer *l, network net) +{ + copy_cpu(l->outputs*l->batch, net.input, 1, l->output, 1); + l2normalize_cpu(l->output, l->scales, l->batch, l->out_c, l->out_w*l->out_h); +} + +void backward_l2norm_layer(layer *l, network net) +{ + //axpy_cpu(l->inputs*l->batch, 1, l->scales, 1, l->delta, 1); + axpy_cpu(l->inputs*l->batch, 1, l->delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_l2norm_layer_gpu(layer *l, network net) +{ + copy_gpu(l->outputs*l->batch, net.input_gpu, 1, l->output_gpu, 1, net.streams[l->stream_index]); + l2normalize_gpu(l->output_gpu, l->scales_gpu, l->batch, l->out_c, l->out_w*l->out_h); +} + +void backward_l2norm_layer_gpu(layer *l, network net) +{ + axpy_gpu(l->batch*l->inputs, 1, l->scales_gpu, 1, l->delta_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->batch*l->inputs, 1, l->delta_gpu, 1, net.delta_gpu, 1, net.streams[l->stream_index]); +} + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/l2norm_layer.h b/workloads/realworld/pipeline/darknet/src/l2norm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..6ddb0349c56959bc92982bedcf338588e71f64a5 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/l2norm_layer.h @@ -0,0 +1,15 @@ +#ifndef L2NORM_LAYER_H +#define L2NORM_LAYER_H +#include "layer.h" +#include "network.h" + +layer* make_l2norm_layer(int batch, int inputs); +void forward_l2norm_layer(layer *l, network net); +void backward_l2norm_layer(layer *l, network net); + +#ifdef GPU +void forward_l2norm_layer_gpu(layer *l, network net); +void backward_l2norm_layer_gpu(layer *l, network net); +#endif + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/layer.c b/workloads/realworld/pipeline/darknet/src/layer.c new file mode 100644 index 0000000000000000000000000000000000000000..0733fdd8654ec34a7c7cc06a7c898d98e2bb5495 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/layer.c @@ -0,0 +1,97 @@ +#include "layer.h" +#include "cuda_dark.h" + +#include + +void free_layer(layer *l) +{ + if(l->type == DROPOUT){ + if(l->rand) free(l->rand); +#ifdef GPU + if(l->rand_gpu) cuda_free(l->rand_gpu); +#endif + return; + } + if(l->cweights) free(l->cweights); + if(l->indexes) free(l->indexes); + if(l->input_layers) free(l->input_layers); + if(l->input_sizes) free(l->input_sizes); + if(l->map) free(l->map); + if(l->rand) free(l->rand); + if(l->cost) free(l->cost); + if(l->state) free(l->state); + if(l->prev_state) free(l->prev_state); + if(l->forgot_state) free(l->forgot_state); + if(l->forgot_delta) free(l->forgot_delta); + if(l->state_delta) free(l->state_delta); + if(l->concat) free(l->concat); + if(l->concat_delta) free(l->concat_delta); + if(l->binary_weights) free(l->binary_weights); + if(l->biases) free(l->biases); + if(l->bias_updates) free(l->bias_updates); + if(l->scales) free(l->scales); + if(l->scale_updates) free(l->scale_updates); + if(l->weights) free(l->weights); + if(l->weight_updates) free(l->weight_updates); + if(l->delta) free(l->delta); + if(l->output) free(l->output); + if(l->squared) free(l->squared); + if(l->norms) free(l->norms); + if(l->spatial_mean) free(l->spatial_mean); + if(l->mean) free(l->mean); + if(l->variance) free(l->variance); + if(l->mean_delta) free(l->mean_delta); + if(l->variance_delta) free(l->variance_delta); + if(l->rolling_mean) free(l->rolling_mean); + if(l->rolling_variance) free(l->rolling_variance); + if(l->x) free(l->x); + if(l->x_norm) free(l->x_norm); + if(l->m) free(l->m); + if(l->v) free(l->v); + if(l->z_cpu) free(l->z_cpu); + if(l->r_cpu) free(l->r_cpu); + if(l->h_cpu) free(l->h_cpu); + if(l->binary_input) free(l->binary_input); + +#ifdef GPU + if(l->indexes_gpu) cuda_free((float *)l->indexes_gpu); + + if(l->z_gpu) cuda_free(l->z_gpu); + if(l->r_gpu) cuda_free(l->r_gpu); + if(l->h_gpu) cuda_free(l->h_gpu); + if(l->m_gpu) cuda_free(l->m_gpu); + if(l->v_gpu) cuda_free(l->v_gpu); + if(l->prev_state_gpu) cuda_free(l->prev_state_gpu); + if(l->forgot_state_gpu) cuda_free(l->forgot_state_gpu); + if(l->forgot_delta_gpu) cuda_free(l->forgot_delta_gpu); + if(l->state_gpu) cuda_free(l->state_gpu); + if(l->state_delta_gpu) cuda_free(l->state_delta_gpu); + if(l->gate_gpu) cuda_free(l->gate_gpu); + if(l->gate_delta_gpu) cuda_free(l->gate_delta_gpu); + if(l->save_gpu) cuda_free(l->save_gpu); + if(l->save_delta_gpu) cuda_free(l->save_delta_gpu); + if(l->concat_gpu) cuda_free(l->concat_gpu); + if(l->concat_delta_gpu) cuda_free(l->concat_delta_gpu); + if(l->binary_input_gpu) cuda_free(l->binary_input_gpu); + if(l->binary_weights_gpu) cuda_free(l->binary_weights_gpu); + if(l->mean_gpu) cuda_free(l->mean_gpu); + if(l->variance_gpu) cuda_free(l->variance_gpu); + if(l->rolling_mean_gpu) cuda_free(l->rolling_mean_gpu); + if(l->rolling_variance_gpu) cuda_free(l->rolling_variance_gpu); + if(l->variance_delta_gpu) cuda_free(l->variance_delta_gpu); + if(l->mean_delta_gpu) cuda_free(l->mean_delta_gpu); + if(l->x_gpu) cuda_free(l->x_gpu); + if(l->x_norm_gpu) cuda_free(l->x_norm_gpu); + if(l->weights_gpu) cuda_free(l->weights_gpu); + if(l->weight_updates_gpu) cuda_free(l->weight_updates_gpu); + if(l->biases_gpu) cuda_free(l->biases_gpu); + if(l->bias_updates_gpu) cuda_free(l->bias_updates_gpu); + if(l->scales_gpu) cuda_free(l->scales_gpu); + if(l->scale_updates_gpu) cuda_free(l->scale_updates_gpu); + if(l->output_gpu) cuda_free(l->output_gpu); + if(l->delta_gpu) cuda_free(l->delta_gpu); + if(l->rand_gpu) cuda_free(l->rand_gpu); + if(l->squared_gpu) cuda_free(l->squared_gpu); + if(l->norms_gpu) cuda_free(l->norms_gpu); +#endif +} diff --git a/workloads/realworld/pipeline/darknet/src/layer.h b/workloads/realworld/pipeline/darknet/src/layer.h new file mode 100644 index 0000000000000000000000000000000000000000..af6cd2ab5054f5ef3bbdfca2da45f08d710a7bd0 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/layer.h @@ -0,0 +1 @@ +#include "darknet.h" diff --git a/workloads/realworld/pipeline/darknet/src/list.c b/workloads/realworld/pipeline/darknet/src/list.c new file mode 100644 index 0000000000000000000000000000000000000000..f87be5d7c60bb9dc82726ba7b0fd7b45e55de6c9 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/list.c @@ -0,0 +1,92 @@ +#include +#include +#include "list.h" + +list *make_list() +{ + list *l = malloc(sizeof(list)); + l->size = 0; + l->front = 0; + l->back = 0; + return l; +} + +/* +void transfer_node(list *s, list *d, node *n) +{ + node *prev, *next; + prev = n->prev; + next = n->next; + if(prev) prev->next = next; + if(next) next->prev = prev; + --s->size; + if(s->front == n) s->front = next; + if(s->back == n) s->back = prev; +} +*/ + +void *list_pop(list *l){ + if(!l->back) return 0; + node *b = l->back; + void *val = b->val; + l->back = b->prev; + if(l->back) l->back->next = 0; + free(b); + --l->size; + + return val; +} + +void list_insert(list *l, void *val) +{ + node *new_node = malloc(sizeof(node)); + new_node->val = val; + new_node->next = 0; + + if(!l->back){ + l->front = new_node; + new_node->prev = 0; + }else{ + l->back->next = new_node; + new_node->prev = l->back; + } + l->back = new_node; + ++l->size; +} + +void free_node(node *n) +{ + node *next; + while(n) { + next = n->next; + free(n); + n = next; + } +} + +void free_list(list *l) +{ + free_node(l->front); + free(l); +} + +void free_list_contents(list *l) +{ + node *n = l->front; + while(n){ + free(n->val); + n = n->next; + } +} + +void **list_to_array(list *l) +{ + void **a = calloc(l->size, sizeof(void*)); + int count = 0; + node *n = l->front; + while(n){ + a[count++] = n->val; + n = n->next; + } + return a; +} diff --git a/workloads/realworld/pipeline/darknet/src/list.h b/workloads/realworld/pipeline/darknet/src/list.h new file mode 100644 index 0000000000000000000000000000000000000000..6b445c717c2937b9c90536654ba82a33e14bb4ec --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/list.h @@ -0,0 +1,13 @@ +#ifndef LIST_H +#define LIST_H +#include "darknet.h" + +list *make_list(); +int list_find(list *l, void *val); + +void list_insert(list *, void *); + + +void free_list_contents(list *l); + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/local_layer.c b/workloads/realworld/pipeline/darknet/src/local_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..87fab791c5a6be15cd0cd3f5c69dcc6f93722c54 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/local_layer.c @@ -0,0 +1,293 @@ +#include "local_layer.h" +#include "utils.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" +#include +#include + +int local_out_height(local_layer *l) +{ + int h = l->h; + if (!l->pad) h -= l->size; + else h -= 1; + return h/l->stride + 1; +} + +int local_out_width(local_layer *l) +{ + int w = l->w; + if (!l->pad) w -= l->size; + else w -= 1; + return w/l->stride + 1; +} + +local_layer* make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation) +{ + int i; + local_layer *l = calloc(1, sizeof(local_layer)); + l->type = LOCAL; + + l->h = h; + l->w = w; + l->c = c; + l->n = n; + l->batch = batch; + l->stride = stride; + l->size = size; + l->pad = pad; + + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int locations = out_h*out_w; + l->out_h = out_h; + l->out_w = out_w; + l->out_c = n; + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->w * l->h * l->c; + + l->weights = calloc(c*n*size*size*locations, sizeof(float)); + l->weight_updates = calloc(c*n*size*size*locations, sizeof(float)); + + l->biases = calloc(l->outputs, sizeof(float)); + l->bias_updates = calloc(l->outputs, sizeof(float)); + + // float scale = 1./sqrt(size*size*c); + float scale = sqrt(2./(size*size*c)); + for(i = 0; i < c*n*size*size; ++i) l->weights[i] = scale*rand_uniform(-1,1); + + l->output = calloc(l->batch*out_h * out_w * n, sizeof(float)); + l->delta = calloc(l->batch*out_h * out_w * n, sizeof(float)); + + l->workspace_size = out_h*out_w*size*size*c; + + l->forward = forward_local_layer; + l->backward = backward_local_layer; + l->update = update_local_layer; + +#ifdef GPU + l->forward_gpu = forward_local_layer_gpu; + l->backward_gpu = backward_local_layer_gpu; + l->update_gpu = update_local_layer_gpu; + + l->weights_gpu = cuda_make_array(l->weights, c*n*size*size*locations); + l->weight_updates_gpu = cuda_make_array(l->weight_updates, c*n*size*size*locations); + + l->biases_gpu = cuda_make_array(l->biases, l->outputs); + l->bias_updates_gpu = cuda_make_array(l->bias_updates, l->outputs); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*out_h*out_w*n); + l->output_gpu = cuda_make_array(l->output, l->batch*out_h*out_w*n); + +#endif + l->activation = activation; + + fprintf(stderr, "Local Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); + + return l; +} + +void forward_local_layer(local_layer *l, network net) +{ + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int i, j; + int locations = out_h * out_w; + + for(i = 0; i < l->batch; ++i){ + copy_cpu(l->outputs, l->biases, 1, l->output + i*l->outputs, 1); + } + + for(i = 0; i < l->batch; ++i){ + float *input = net.input + i*l->w*l->h*l->c; + im2col_cpu(input, l->c, l->h, l->w, + l->size, l->stride, l->pad, net.workspace); + float *output = l->output + i*l->outputs; + for(j = 0; j < locations; ++j){ + float *a = l->weights + j*l->size*l->size*l->c*l->n; + float *b = net.workspace + j; + float *c = output + j; + + int m = l->n; + int n = 1; + int k = l->size*l->size*l->c; + + gemm(0,0,m,n,k,1,a,k,b,locations,1,c,locations); + } + } + activate_array(l->output, l->outputs*l->batch, l->activation); +} + +void backward_local_layer(local_layer *l, network net) +{ + int i, j; + int locations = l->out_w*l->out_h; + + gradient_array(l->output, l->outputs*l->batch, l->activation, l->delta); + + for(i = 0; i < l->batch; ++i){ + axpy_cpu(l->outputs, 1, l->delta + i*l->outputs, 1, l->bias_updates, 1); + } + + for(i = 0; i < l->batch; ++i){ + float *input = net.input + i*l->w*l->h*l->c; + im2col_cpu(input, l->c, l->h, l->w, + l->size, l->stride, l->pad, net.workspace); + + for(j = 0; j < locations; ++j){ + float *a = l->delta + i*l->outputs + j; + float *b = net.workspace + j; + float *c = l->weight_updates + j*l->size*l->size*l->c*l->n; + int m = l->n; + int n = l->size*l->size*l->c; + int k = 1; + + gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n); + } + + if(net.delta){ + for(j = 0; j < locations; ++j){ + float *a = l->weights + j*l->size*l->size*l->c*l->n; + float *b = l->delta + i*l->outputs + j; + float *c = net.workspace + j; + + int m = l->size*l->size*l->c; + int n = 1; + int k = l->n; + + gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations); + } + + col2im_cpu(net.workspace, l->c, l->h, l->w, l->size, l->stride, l->pad, net.delta+i*l->c*l->h*l->w); + } + } +} + +void update_local_layer(local_layer *l, update_args a, network net) +{ + float learning_rate = a.learning_rate*l->learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int locations = l->out_w*l->out_h; + int size = l->size*l->size*l->c*l->n*locations; + axpy_cpu(l->outputs, learning_rate/batch, l->bias_updates, 1, l->biases, 1); + scal_cpu(l->outputs, momentum, l->bias_updates, 1); + + axpy_cpu(size, -decay*batch, l->weights, 1, l->weight_updates, 1); + axpy_cpu(size, learning_rate/batch, l->weight_updates, 1, l->weights, 1); + scal_cpu(size, momentum, l->weight_updates, 1); +} + +#ifdef GPU + +void forward_local_layer_gpu(local_layer *l, network net) +{ + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int i, j; + int locations = out_h * out_w; + + for(i = 0; i < l->batch; ++i){ + copy_gpu(l->outputs, l->biases_gpu, 1, l->output_gpu + i*l->outputs, 1, net.streams[l->stream_index]); + } + + for(i = 0; i < l->batch; ++i){ + float *input = net.input_gpu + i*l->w*l->h*l->c; + im2col_gpu(input, l->c, l->h, l->w, + l->size, l->stride, l->pad, net.workspace, net.streams[l->stream_index]); + float *output = l->output_gpu + i*l->outputs; + for(j = 0; j < locations; ++j){ + float *a = l->weights_gpu + j*l->size*l->size*l->c*l->n; + float *b = net.workspace + j; + float *c = output + j; + + int m = l->n; + int n = 1; + int k = l->size*l->size*l->c; + + gemm_gpu(0,0,m,n,k,1,a,k,b,locations,1,c,locations,net.streams[l->stream_index]); + } + } + activate_array_gpu(l->output_gpu, l->outputs*l->batch, l->activation, net.streams[l->stream_index]); +} + +void backward_local_layer_gpu(local_layer *l, network net) +{ + int i, j; + int locations = l->out_w*l->out_h; + + gradient_array_gpu(l->output_gpu, l->outputs*l->batch, l->activation, l->delta_gpu); + for(i = 0; i < l->batch; ++i){ + axpy_gpu(l->outputs, 1, l->delta_gpu + i*l->outputs, 1, l->bias_updates_gpu, 1, net.streams[l->stream_index]); + } + + for(i = 0; i < l->batch; ++i){ + float *input = net.input_gpu + i*l->w*l->h*l->c; + im2col_gpu(input, l->c, l->h, l->w, + l->size, l->stride, l->pad, net.workspace, net.streams[l->stream_index]); + + for(j = 0; j < locations; ++j){ + float *a = l->delta_gpu + i*l->outputs + j; + float *b = net.workspace + j; + float *c = l->weight_updates_gpu + j*l->size*l->size*l->c*l->n; + int m = l->n; + int n = l->size*l->size*l->c; + int k = 1; + + gemm_gpu(0,1,m,n,k,1,a,locations,b,locations,1,c,n,net.streams[l->stream_index]); + } + + if(net.delta_gpu){ + for(j = 0; j < locations; ++j){ + float *a = l->weights_gpu + j*l->size*l->size*l->c*l->n; + float *b = l->delta_gpu + i*l->outputs + j; + float *c = net.workspace + j; + + int m = l->size*l->size*l->c; + int n = 1; + int k = l->n; + + gemm_gpu(1,0,m,n,k,1,a,m,b,locations,0,c,locations,net.streams[l->stream_index]); + } + + col2im_gpu(net.workspace, l->c, l->h, l->w, l->size, l->stride, l->pad, net.delta_gpu+i*l->c*l->h*l->w); + } + } +} + +void update_local_layer_gpu(local_layer *l, update_args a, network net) +{ + float learning_rate = a.learning_rate*l->learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int locations = l->out_w*l->out_h; + int size = l->size*l->size*l->c*l->n*locations; + axpy_gpu(l->outputs, learning_rate/batch, l->bias_updates_gpu, 1, l->biases_gpu, 1, net.streams[l->stream_index]); + scal_gpu(l->outputs, momentum, l->bias_updates_gpu, 1, net.streams[l->stream_index]); + + axpy_gpu(size, -decay*batch, l->weights_gpu, 1, l->weight_updates_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(size, learning_rate/batch, l->weight_updates_gpu, 1, l->weights_gpu, 1, net.streams[l->stream_index]); + scal_gpu(size, momentum, l->weight_updates_gpu, 1, net.streams[l->stream_index]); +} + +void pull_local_layer(local_layer *l) +{ + int locations = l->out_w*l->out_h; + int size = l->size*l->size*l->c*l->n*locations; + cuda_pull_array(l->weights_gpu, l->weights, size); + cuda_pull_array(l->biases_gpu, l->biases, l->outputs); +} + +void push_local_layer(local_layer *l) +{ + int locations = l->out_w*l->out_h; + int size = l->size*l->size*l->c*l->n*locations; + cuda_push_array(l->weights_gpu, l->weights, size); + cuda_push_array(l->biases_gpu, l->biases, l->outputs); +} +#endif diff --git a/workloads/realworld/pipeline/darknet/src/local_layer.h b/workloads/realworld/pipeline/darknet/src/local_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1420991d44f82804e88d3c81a6e784cadf9d9dd4 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/local_layer.h @@ -0,0 +1,31 @@ +#ifndef LOCAL_LAYER_H +#define LOCAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +typedef layer local_layer; + +#ifdef GPU +void forward_local_layer_gpu(local_layer *layer, network net); +void backward_local_layer_gpu(local_layer *layer, network net); +void update_local_layer_gpu(local_layer *layer, update_args a, network net); + +void push_local_layer(local_layer *layer); +void pull_local_layer(local_layer *layer); +#endif + +local_layer* make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation); + +void forward_local_layer(local_layer *layer, network net); +void backward_local_layer(local_layer *layer, network net); +void update_local_layer(local_layer *layer, update_args a, network net); + +void bias_output(float *output, float *biases, int batch, int n, int size); +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); + +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/logistic_layer.c b/workloads/realworld/pipeline/darknet/src/logistic_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..0af91b90f2e467e7335e3cbfd1054e346d09b3e6 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/logistic_layer.c @@ -0,0 +1,71 @@ +#include "logistic_layer.h" +#include "activations.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +layer* make_logistic_layer(int batch, int inputs) +{ + fprintf(stderr, "logistic x entropy %4d\n", inputs); + layer *l = calloc(1, sizeof(layer)); + l->type = LOGXENT; + l->batch = batch; + l->inputs = inputs; + l->outputs = inputs; + l->loss = calloc(inputs*batch, sizeof(float)); + l->output = calloc(inputs*batch, sizeof(float)); + l->delta = calloc(inputs*batch, sizeof(float)); + l->cost = calloc(1, sizeof(float)); + + l->forward = forward_logistic_layer; + l->backward = backward_logistic_layer; + #ifdef GPU + l->forward_gpu = forward_logistic_layer_gpu; + l->backward_gpu = backward_logistic_layer_gpu; + + l->output_gpu = cuda_make_array(l->output, inputs*batch); + l->loss_gpu = cuda_make_array(l->loss, inputs*batch); + l->delta_gpu = cuda_make_array(l->delta, inputs*batch); + #endif + return l; +} + +void forward_logistic_layer(layer *l, network net) +{ + copy_cpu(l->outputs*l->batch, net.input, 1, l->output, 1); + activate_array(l->output, l->outputs*l->batch, LOGISTIC); + if(net.truth){ + logistic_x_ent_cpu(l->batch*l->inputs, l->output, net.truth, l->delta, l->loss); + l->cost[0] = sum_array(l->loss, l->batch*l->inputs); + } +} + +void backward_logistic_layer(layer *l, network net) +{ + axpy_cpu(l->inputs*l->batch, 1, l->delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_logistic_layer_gpu(layer *l, network net) +{ + copy_gpu(l->outputs*l->batch, net.input_gpu, 1, l->output_gpu, 1, net.streams[l->stream_index]); + activate_array_gpu(l->output_gpu, l->outputs*l->batch, LOGISTIC, net.streams[l->stream_index]); + if(net.truth){ + logistic_x_ent_gpu(l->batch*l->inputs, l->output_gpu, net.truth_gpu, l->delta_gpu, l->loss_gpu); + cuda_pull_array(l->loss_gpu, l->loss, l->batch*l->inputs); + l->cost[0] = sum_array(l->loss, l->batch*l->inputs); + } +} + +void backward_logistic_layer_gpu(layer *l, network net) +{ + axpy_gpu(l->batch*l->inputs, 1, l->delta_gpu, 1, net.delta_gpu, 1, net.streams[l->stream_index]); +} + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/logistic_layer.h b/workloads/realworld/pipeline/darknet/src/logistic_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..66eef3f70b6d17469119d23fb433e3ccfea686a1 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/logistic_layer.h @@ -0,0 +1,15 @@ +#ifndef LOGISTIC_LAYER_H +#define LOGISTIC_LAYER_H +#include "layer.h" +#include "network.h" + +layer* make_logistic_layer(int batch, int inputs); +void forward_logistic_layer(layer *l, network net); +void backward_logistic_layer(layer *l, network net); + +#ifdef GPU +void forward_logistic_layer_gpu(layer *l, network net); +void backward_logistic_layer_gpu(layer *l, network net); +#endif + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/lstm_layer.c b/workloads/realworld/pipeline/darknet/src/lstm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..48818bc6b11a39301b2c9bb34e1742efdf9300fd --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/lstm_layer.c @@ -0,0 +1,626 @@ +#include "lstm_layer.h" +#include "connected_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer* make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam) +{ + fprintf(stderr, "LSTM Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer* l = calloc(1, sizeof(layer)); + l->batch = batch; + l->type = LSTM; + l->steps = steps; + l->inputs = inputs; + + l->uf = calloc(1, sizeof(layer)); + fprintf(stderr, "\t\t"); + (l->uf) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l->uf->batch = batch; + + l->ui = calloc(1, sizeof(layer)); + fprintf(stderr, "\t\t"); + (l->ui) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l->ui->batch = batch; + + l->ug = calloc(1, sizeof(layer)); + fprintf(stderr, "\t\t"); + (l->ug) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l->ug->batch = batch; + + l->uo = calloc(1, sizeof(layer)); + fprintf(stderr, "\t\t"); + (l->uo) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l->uo->batch = batch; + + l->wf = calloc(1, sizeof(layer)); + fprintf(stderr, "\t\t"); + (l->wf) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l->wf->batch = batch; + + l->wi = calloc(1, sizeof(layer)); + fprintf(stderr, "\t\t"); + (l->wi) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l->wi->batch = batch; + + l->wg = calloc(1, sizeof(layer)); + fprintf(stderr, "\t\t"); + (l->wg) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l->wg->batch = batch; + + l->wo = calloc(1, sizeof(layer)); + fprintf(stderr, "\t\t"); + (l->wo) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l->wo->batch = batch; + + l->batch_normalize = batch_normalize; + l->outputs = outputs; + + l->output = calloc(outputs*batch*steps, sizeof(float)); + l->state = calloc(outputs*batch, sizeof(float)); + + l->forward = forward_lstm_layer; + l->update = update_lstm_layer; + + l->prev_state_cpu = calloc(batch*outputs, sizeof(float)); + l->prev_cell_cpu = calloc(batch*outputs, sizeof(float)); + l->cell_cpu = calloc(batch*outputs*steps, sizeof(float)); + + l->f_cpu = calloc(batch*outputs, sizeof(float)); + l->i_cpu = calloc(batch*outputs, sizeof(float)); + l->g_cpu = calloc(batch*outputs, sizeof(float)); + l->o_cpu = calloc(batch*outputs, sizeof(float)); + l->c_cpu = calloc(batch*outputs, sizeof(float)); + l->h_cpu = calloc(batch*outputs, sizeof(float)); + l->temp_cpu = calloc(batch*outputs, sizeof(float)); + l->temp2_cpu = calloc(batch*outputs, sizeof(float)); + l->temp3_cpu = calloc(batch*outputs, sizeof(float)); + l->dc_cpu = calloc(batch*outputs, sizeof(float)); + l->dh_cpu = calloc(batch*outputs, sizeof(float)); + +#ifdef GPU + l->forward_gpu = forward_lstm_layer_gpu; + l->backward_gpu = backward_lstm_layer_gpu; + l->update_gpu = update_lstm_layer_gpu; + + l->output_gpu = cuda_make_array(0, batch*outputs*steps); + l->delta_gpu = cuda_make_array(0, batch*l->outputs*steps); + + l->prev_state_gpu = cuda_make_array(0, batch*outputs); + l->prev_cell_gpu = cuda_make_array(0, batch*outputs); + l->cell_gpu = cuda_make_array(0, batch*outputs*steps); + + l->f_gpu = cuda_make_array(0, batch*outputs); + l->i_gpu = cuda_make_array(0, batch*outputs); + l->g_gpu = cuda_make_array(0, batch*outputs); + l->o_gpu = cuda_make_array(0, batch*outputs); + l->c_gpu = cuda_make_array(0, batch*outputs); + l->h_gpu = cuda_make_array(0, batch*outputs); + l->temp_gpu = cuda_make_array(0, batch*outputs); + l->temp2_gpu = cuda_make_array(0, batch*outputs); + l->temp3_gpu = cuda_make_array(0, batch*outputs); + l->dc_gpu = cuda_make_array(0, batch*outputs); + l->dh_gpu = cuda_make_array(0, batch*outputs); +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l->wf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l->wf->out_c, l->wf->out_h, l->wf->out_w); + cudnnSetTensor4dDescriptor(l->wi->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l->wi->out_c, l->wi->out_h, l->wi->out_w); + cudnnSetTensor4dDescriptor(l->wg->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l->wg->out_c, l->wg->out_h, l->wg->out_w); + cudnnSetTensor4dDescriptor(l->wo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l->wo->out_c, l->wo->out_h, l->wo->out_w); + + cudnnSetTensor4dDescriptor(l->uf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l->uf->out_c, l->uf->out_h, l->uf->out_w); + cudnnSetTensor4dDescriptor(l->ui->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l->ui->out_c, l->ui->out_h, l->ui->out_w); + cudnnSetTensor4dDescriptor(l->ug->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l->ug->out_c, l->ug->out_h, l->ug->out_w); + cudnnSetTensor4dDescriptor(l->uo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l->uo->out_c, l->uo->out_h, l->uo->out_w); +#endif + +#endif + + return l; +} + +void update_lstm_layer(layer* l, update_args a, network net) +{ + update_connected_layer((l->wf), a, net); + update_connected_layer((l->wi), a, net); + update_connected_layer((l->wg), a, net); + update_connected_layer((l->wo), a, net); + update_connected_layer((l->uf), a, net); + update_connected_layer((l->ui), a, net); + update_connected_layer((l->ug), a, net); + update_connected_layer((l->uo), a, net); +} + +void forward_lstm_layer(layer* l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer *wf = (l->wf); + layer *wi = (l->wi); + layer *wg = (l->wg); + layer *wo = (l->wo); + + layer *uf = (l->uf); + layer *ui = (l->ui); + layer *ug = (l->ug); + layer *uo = (l->uo); + + fill_cpu(l->outputs * l->batch * l->steps, 0, wf->delta, 1); + fill_cpu(l->outputs * l->batch * l->steps, 0, wi->delta, 1); + fill_cpu(l->outputs * l->batch * l->steps, 0, wg->delta, 1); + fill_cpu(l->outputs * l->batch * l->steps, 0, wo->delta, 1); + + fill_cpu(l->outputs * l->batch * l->steps, 0, uf->delta, 1); + fill_cpu(l->outputs * l->batch * l->steps, 0, ui->delta, 1); + fill_cpu(l->outputs * l->batch * l->steps, 0, ug->delta, 1); + fill_cpu(l->outputs * l->batch * l->steps, 0, uo->delta, 1); + if (state.train) { + fill_cpu(l->outputs * l->batch * l->steps, 0, l->delta, 1); + } + + for (i = 0; i < l->steps; ++i) { + s.input = l->h_cpu; + forward_connected_layer(wf, s); + forward_connected_layer(wi, s); + forward_connected_layer(wg, s); + forward_connected_layer(wo, s); + + s.input = state.input; + forward_connected_layer(uf, s); + forward_connected_layer(ui, s); + forward_connected_layer(ug, s); + forward_connected_layer(uo, s); + + copy_cpu(l->outputs*l->batch, wf->output, 1, l->f_cpu, 1); + axpy_cpu(l->outputs*l->batch, 1, uf->output, 1, l->f_cpu, 1); + + copy_cpu(l->outputs*l->batch, wi->output, 1, l->i_cpu, 1); + axpy_cpu(l->outputs*l->batch, 1, ui->output, 1, l->i_cpu, 1); + + copy_cpu(l->outputs*l->batch, wg->output, 1, l->g_cpu, 1); + axpy_cpu(l->outputs*l->batch, 1, ug->output, 1, l->g_cpu, 1); + + copy_cpu(l->outputs*l->batch, wo->output, 1, l->o_cpu, 1); + axpy_cpu(l->outputs*l->batch, 1, uo->output, 1, l->o_cpu, 1); + + activate_array(l->f_cpu, l->outputs*l->batch, LOGISTIC); + activate_array(l->i_cpu, l->outputs*l->batch, LOGISTIC); + activate_array(l->g_cpu, l->outputs*l->batch, TANH); + activate_array(l->o_cpu, l->outputs*l->batch, LOGISTIC); + + copy_cpu(l->outputs*l->batch, l->i_cpu, 1, l->temp_cpu, 1); + mul_cpu(l->outputs*l->batch, l->g_cpu, 1, l->temp_cpu, 1); + mul_cpu(l->outputs*l->batch, l->f_cpu, 1, l->c_cpu, 1); + axpy_cpu(l->outputs*l->batch, 1, l->temp_cpu, 1, l->c_cpu, 1); + + copy_cpu(l->outputs*l->batch, l->c_cpu, 1, l->h_cpu, 1); + activate_array(l->h_cpu, l->outputs*l->batch, TANH); + mul_cpu(l->outputs*l->batch, l->o_cpu, 1, l->h_cpu, 1); + + copy_cpu(l->outputs*l->batch, l->c_cpu, 1, l->cell_cpu, 1); + copy_cpu(l->outputs*l->batch, l->h_cpu, 1, l->output, 1); + + state.input += l->inputs*l->batch; + l->output += l->outputs*l->batch; + l->cell_cpu += l->outputs*l->batch; + + increment_layer(wf, 1); + increment_layer(wi, 1); + increment_layer(wg, 1); + increment_layer(wo, 1); + + increment_layer(uf, 1); + increment_layer(ui, 1); + increment_layer(ug, 1); + increment_layer(uo, 1); + } +} + +void backward_lstm_layer(layer* l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer *wf = (l->wf); + layer *wi = (l->wi); + layer *wg = (l->wg); + layer *wo = (l->wo); + + layer *uf = (l->uf); + layer *ui = (l->ui); + layer *ug = (l->ug); + layer *uo = (l->uo); + + increment_layer(wf, l->steps - 1); + increment_layer(wi, l->steps - 1); + increment_layer(wg, l->steps - 1); + increment_layer(wo, l->steps - 1); + + increment_layer(uf, l->steps - 1); + increment_layer(ui, l->steps - 1); + increment_layer(ug, l->steps - 1); + increment_layer(uo, l->steps - 1); + + state.input += l->inputs*l->batch*(l->steps - 1); + if (state.delta) state.delta += l->inputs*l->batch*(l->steps - 1); + + l->output += l->outputs*l->batch*(l->steps - 1); + l->cell_cpu += l->outputs*l->batch*(l->steps - 1); + l->delta += l->outputs*l->batch*(l->steps - 1); + + for (i = l->steps - 1; i >= 0; --i) { + if (i != 0) copy_cpu(l->outputs*l->batch, l->cell_cpu - l->outputs*l->batch, 1, l->prev_cell_cpu, 1); + copy_cpu(l->outputs*l->batch, l->cell_cpu, 1, l->c_cpu, 1); + if (i != 0) copy_cpu(l->outputs*l->batch, l->output - l->outputs*l->batch, 1, l->prev_state_cpu, 1); + copy_cpu(l->outputs*l->batch, l->output, 1, l->h_cpu, 1); + + l->dh_cpu = (i == 0) ? 0 : l->delta - l->outputs*l->batch; + + copy_cpu(l->outputs*l->batch, wf->output, 1, l->f_cpu, 1); + axpy_cpu(l->outputs*l->batch, 1, uf->output, 1, l->f_cpu, 1); + + copy_cpu(l->outputs*l->batch, wi->output, 1, l->i_cpu, 1); + axpy_cpu(l->outputs*l->batch, 1, ui->output, 1, l->i_cpu, 1); + + copy_cpu(l->outputs*l->batch, wg->output, 1, l->g_cpu, 1); + axpy_cpu(l->outputs*l->batch, 1, ug->output, 1, l->g_cpu, 1); + + copy_cpu(l->outputs*l->batch, wo->output, 1, l->o_cpu, 1); + axpy_cpu(l->outputs*l->batch, 1, uo->output, 1, l->o_cpu, 1); + + activate_array(l->f_cpu, l->outputs*l->batch, LOGISTIC); + activate_array(l->i_cpu, l->outputs*l->batch, LOGISTIC); + activate_array(l->g_cpu, l->outputs*l->batch, TANH); + activate_array(l->o_cpu, l->outputs*l->batch, LOGISTIC); + + copy_cpu(l->outputs*l->batch, l->delta, 1, l->temp3_cpu, 1); + + copy_cpu(l->outputs*l->batch, l->c_cpu, 1, l->temp_cpu, 1); + activate_array(l->temp_cpu, l->outputs*l->batch, TANH); + + copy_cpu(l->outputs*l->batch, l->temp3_cpu, 1, l->temp2_cpu, 1); + mul_cpu(l->outputs*l->batch, l->o_cpu, 1, l->temp2_cpu, 1); + + gradient_array(l->temp_cpu, l->outputs*l->batch, TANH, l->temp2_cpu); + axpy_cpu(l->outputs*l->batch, 1, l->dc_cpu, 1, l->temp2_cpu, 1); + + copy_cpu(l->outputs*l->batch, l->c_cpu, 1, l->temp_cpu, 1); + activate_array(l->temp_cpu, l->outputs*l->batch, TANH); + mul_cpu(l->outputs*l->batch, l->temp3_cpu, 1, l->temp_cpu, 1); + gradient_array(l->o_cpu, l->outputs*l->batch, LOGISTIC, l->temp_cpu); + copy_cpu(l->outputs*l->batch, l->temp_cpu, 1, wo->delta, 1); + s.input = l->prev_state_cpu; + s.delta = l->dh_cpu; + backward_connected_layer(wo, s); + + copy_cpu(l->outputs*l->batch, l->temp_cpu, 1, uo->delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(uo, s); + + copy_cpu(l->outputs*l->batch, l->temp2_cpu, 1, l->temp_cpu, 1); + mul_cpu(l->outputs*l->batch, l->i_cpu, 1, l->temp_cpu, 1); + gradient_array(l->g_cpu, l->outputs*l->batch, TANH, l->temp_cpu); + copy_cpu(l->outputs*l->batch, l->temp_cpu, 1, wg->delta, 1); + s.input = l->prev_state_cpu; + s.delta = l->dh_cpu; + backward_connected_layer(wg, s); + + copy_cpu(l->outputs*l->batch, l->temp_cpu, 1, ug->delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(ug, s); + + copy_cpu(l->outputs*l->batch, l->temp2_cpu, 1, l->temp_cpu, 1); + mul_cpu(l->outputs*l->batch, l->g_cpu, 1, l->temp_cpu, 1); + gradient_array(l->i_cpu, l->outputs*l->batch, LOGISTIC, l->temp_cpu); + copy_cpu(l->outputs*l->batch, l->temp_cpu, 1, wi->delta, 1); + s.input = l->prev_state_cpu; + s.delta = l->dh_cpu; + backward_connected_layer(wi, s); + + copy_cpu(l->outputs*l->batch, l->temp_cpu, 1, ui->delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(ui, s); + + copy_cpu(l->outputs*l->batch, l->temp2_cpu, 1, l->temp_cpu, 1); + mul_cpu(l->outputs*l->batch, l->prev_cell_cpu, 1, l->temp_cpu, 1); + gradient_array(l->f_cpu, l->outputs*l->batch, LOGISTIC, l->temp_cpu); + copy_cpu(l->outputs*l->batch, l->temp_cpu, 1, wf->delta, 1); + s.input = l->prev_state_cpu; + s.delta = l->dh_cpu; + backward_connected_layer(wf, s); + + copy_cpu(l->outputs*l->batch, l->temp_cpu, 1, uf->delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(uf, s); + + copy_cpu(l->outputs*l->batch, l->temp2_cpu, 1, l->temp_cpu, 1); + mul_cpu(l->outputs*l->batch, l->f_cpu, 1, l->temp_cpu, 1); + copy_cpu(l->outputs*l->batch, l->temp_cpu, 1, l->dc_cpu, 1); + + state.input -= l->inputs*l->batch; + if (state.delta) state.delta -= l->inputs*l->batch; + l->output -= l->outputs*l->batch; + l->cell_cpu -= l->outputs*l->batch; + l->delta -= l->outputs*l->batch; + + increment_layer(wf, -1); + increment_layer(wi, -1); + increment_layer(wg, -1); + increment_layer(wo, -1); + + increment_layer(uf, -1); + increment_layer(ui, -1); + increment_layer(ug, -1); + increment_layer(uo, -1); + } +} + +#ifdef GPU +void update_lstm_layer_gpu(layer* l, update_args a, network net) +{ + update_connected_layer_gpu((l->wf), a, net); + update_connected_layer_gpu((l->wi), a, net); + update_connected_layer_gpu((l->wg), a, net); + update_connected_layer_gpu((l->wo), a, net); + update_connected_layer_gpu((l->uf), a, net); + update_connected_layer_gpu((l->ui), a, net); + update_connected_layer_gpu((l->ug), a, net); + update_connected_layer_gpu((l->uo), a, net); +} + +void forward_lstm_layer_gpu(layer* l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer *wf = (l->wf); + layer *wi = (l->wi); + layer *wg = (l->wg); + layer *wo = (l->wo); + + layer *uf = (l->uf); + layer *ui = (l->ui); + layer *ug = (l->ug); + layer *uo = (l->uo); + + fill_gpu(l->outputs * l->batch * l->steps, 0, wf->delta_gpu, 1); + fill_gpu(l->outputs * l->batch * l->steps, 0, wi->delta_gpu, 1); + fill_gpu(l->outputs * l->batch * l->steps, 0, wg->delta_gpu, 1); + fill_gpu(l->outputs * l->batch * l->steps, 0, wo->delta_gpu, 1); + + fill_gpu(l->outputs * l->batch * l->steps, 0, uf->delta_gpu, 1); + fill_gpu(l->outputs * l->batch * l->steps, 0, ui->delta_gpu, 1); + fill_gpu(l->outputs * l->batch * l->steps, 0, ug->delta_gpu, 1); + fill_gpu(l->outputs * l->batch * l->steps, 0, uo->delta_gpu, 1); + if (state.train) { + fill_gpu(l->outputs * l->batch * l->steps, 0, l->delta_gpu, 1); + } + + for (i = 0; i < l->steps; ++i) { + s.input_gpu = l->h_gpu; + forward_connected_layer_gpu(wf, s); + forward_connected_layer_gpu(wi, s); + forward_connected_layer_gpu(wg, s); + forward_connected_layer_gpu(wo, s); + + s.input_gpu = state.input_gpu; + forward_connected_layer_gpu(uf, s); + forward_connected_layer_gpu(ui, s); + forward_connected_layer_gpu(ug, s); + forward_connected_layer_gpu(uo, s); + + copy_gpu(l->outputs*l->batch, wf->output_gpu, 1, l->f_gpu, 1, state.streams[l->stream_index]); + axpy_gpu(l->outputs*l->batch, 1, uf->output_gpu, 1, l->f_gpu, 1, state.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, wi->output_gpu, 1, l->i_gpu, 1, state.streams[l->stream_index]); + axpy_gpu(l->outputs*l->batch, 1, ui->output_gpu, 1, l->i_gpu, 1, state.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, wg->output_gpu, 1, l->g_gpu, 1, state.streams[l->stream_index]); + axpy_gpu(l->outputs*l->batch, 1, ug->output_gpu, 1, l->g_gpu, 1, state.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, wo->output_gpu, 1, l->o_gpu, 1, state.streams[l->stream_index]); + axpy_gpu(l->outputs*l->batch, 1, uo->output_gpu, 1, l->o_gpu, 1, state.streams[l->stream_index]); + + activate_array_gpu(l->f_gpu, l->outputs*l->batch, LOGISTIC, state.streams[l->stream_index]); + activate_array_gpu(l->i_gpu, l->outputs*l->batch, LOGISTIC, state.streams[l->stream_index]); + activate_array_gpu(l->g_gpu, l->outputs*l->batch, TANH, state.streams[l->stream_index]); + activate_array_gpu(l->o_gpu, l->outputs*l->batch, LOGISTIC, state.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, l->i_gpu, 1, l->temp_gpu, 1, state.streams[l->stream_index]); + mul_gpu(l->outputs*l->batch, l->g_gpu, 1, l->temp_gpu, 1); + mul_gpu(l->outputs*l->batch, l->f_gpu, 1, l->c_gpu, 1); + axpy_gpu(l->outputs*l->batch, 1, l->temp_gpu, 1, l->c_gpu, 1, state.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, l->c_gpu, 1, l->h_gpu, 1, state.streams[l->stream_index]); + activate_array_gpu(l->h_gpu, l->outputs*l->batch, TANH, state.streams[l->stream_index]); + mul_gpu(l->outputs*l->batch, l->o_gpu, 1, l->h_gpu, 1); + + copy_gpu(l->outputs*l->batch, l->c_gpu, 1, l->cell_gpu, 1, state.streams[l->stream_index]); + copy_gpu(l->outputs*l->batch, l->h_gpu, 1, l->output_gpu, 1, state.streams[l->stream_index]); + + state.input_gpu += l->inputs*l->batch; + l->output_gpu += l->outputs*l->batch; + l->cell_gpu += l->outputs*l->batch; + + increment_layer(wf, 1); + increment_layer(wi, 1); + increment_layer(wg, 1); + increment_layer(wo, 1); + + increment_layer(uf, 1); + increment_layer(ui, 1); + increment_layer(ug, 1); + increment_layer(uo, 1); + } +} + +void backward_lstm_layer_gpu(layer* l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer *wf = (l->wf); + layer *wi = (l->wi); + layer *wg = (l->wg); + layer *wo = (l->wo); + + layer *uf = (l->uf); + layer *ui = (l->ui); + layer *ug = (l->ug); + layer *uo = (l->uo); + + increment_layer(wf, l->steps - 1); + increment_layer(wi, l->steps - 1); + increment_layer(wg, l->steps - 1); + increment_layer(wo, l->steps - 1); + + increment_layer(uf, l->steps - 1); + increment_layer(ui, l->steps - 1); + increment_layer(ug, l->steps - 1); + increment_layer(uo, l->steps - 1); + + state.input_gpu += l->inputs*l->batch*(l->steps - 1); + if (state.delta_gpu) state.delta_gpu += l->inputs*l->batch*(l->steps - 1); + + l->output_gpu += l->outputs*l->batch*(l->steps - 1); + l->cell_gpu += l->outputs*l->batch*(l->steps - 1); + l->delta_gpu += l->outputs*l->batch*(l->steps - 1); + + for (i = l->steps - 1; i >= 0; --i) { + if (i != 0) copy_gpu(l->outputs*l->batch, l->cell_gpu - l->outputs*l->batch, 1, l->prev_cell_gpu, 1, state.streams[l->stream_index]); + copy_gpu(l->outputs*l->batch, l->cell_gpu, 1, l->c_gpu, 1, state.streams[l->stream_index]); + if (i != 0) copy_gpu(l->outputs*l->batch, l->output_gpu - l->outputs*l->batch, 1, l->prev_state_gpu, 1, state.streams[l->stream_index]); + copy_gpu(l->outputs*l->batch, l->output_gpu, 1, l->h_gpu, 1, state.streams[l->stream_index]); + + l->dh_gpu = (i == 0) ? 0 : l->delta_gpu - l->outputs*l->batch; + + copy_gpu(l->outputs*l->batch, wf->output_gpu, 1, l->f_gpu, 1, state.streams[l->stream_index]); + axpy_gpu(l->outputs*l->batch, 1, uf->output_gpu, 1, l->f_gpu, 1, state.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, wi->output_gpu, 1, l->i_gpu, 1, state.streams[l->stream_index]); + axpy_gpu(l->outputs*l->batch, 1, ui->output_gpu, 1, l->i_gpu, 1, state.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, wg->output_gpu, 1, l->g_gpu, 1, state.streams[l->stream_index]); + axpy_gpu(l->outputs*l->batch, 1, ug->output_gpu, 1, l->g_gpu, 1, state.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, wo->output_gpu, 1, l->o_gpu, 1, state.streams[l->stream_index]); + axpy_gpu(l->outputs*l->batch, 1, uo->output_gpu, 1, l->o_gpu, 1, state.streams[l->stream_index]); + + activate_array_gpu(l->f_gpu, l->outputs*l->batch, LOGISTIC, state.streams[l->stream_index]); + activate_array_gpu(l->i_gpu, l->outputs*l->batch, LOGISTIC, state.streams[l->stream_index]); + activate_array_gpu(l->g_gpu, l->outputs*l->batch, TANH, state.streams[l->stream_index]); + activate_array_gpu(l->o_gpu, l->outputs*l->batch, LOGISTIC, state.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, l->delta_gpu, 1, l->temp3_gpu, 1, state.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, l->c_gpu, 1, l->temp_gpu, 1, state.streams[l->stream_index]); + activate_array_gpu(l->temp_gpu, l->outputs*l->batch, TANH, state.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, l->temp3_gpu, 1, l->temp2_gpu, 1, state.streams[l->stream_index]); + mul_gpu(l->outputs*l->batch, l->o_gpu, 1, l->temp2_gpu, 1); + + gradient_array_gpu(l->temp_gpu, l->outputs*l->batch, TANH, l->temp2_gpu); + axpy_gpu(l->outputs*l->batch, 1, l->dc_gpu, 1, l->temp2_gpu, 1, state.streams[l->stream_index]); + + copy_gpu(l->outputs*l->batch, l->c_gpu, 1, l->temp_gpu, 1, state.streams[l->stream_index]); + activate_array_gpu(l->temp_gpu, l->outputs*l->batch, TANH, state.streams[l->stream_index]); + mul_gpu(l->outputs*l->batch, l->temp3_gpu, 1, l->temp_gpu, 1); + gradient_array_gpu(l->o_gpu, l->outputs*l->batch, LOGISTIC, l->temp_gpu); + copy_gpu(l->outputs*l->batch, l->temp_gpu, 1, wo->delta_gpu, 1, state.streams[l->stream_index]); + s.input_gpu = l->prev_state_gpu; + s.delta_gpu = l->dh_gpu; + backward_connected_layer_gpu(wo, s); + + copy_gpu(l->outputs*l->batch, l->temp_gpu, 1, uo->delta_gpu, 1, state.streams[l->stream_index]); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(uo, s); + + copy_gpu(l->outputs*l->batch, l->temp2_gpu, 1, l->temp_gpu, 1, state.streams[l->stream_index]); + mul_gpu(l->outputs*l->batch, l->i_gpu, 1, l->temp_gpu, 1); + gradient_array_gpu(l->g_gpu, l->outputs*l->batch, TANH, l->temp_gpu); + copy_gpu(l->outputs*l->batch, l->temp_gpu, 1, wg->delta_gpu, 1, state.streams[l->stream_index]); + s.input_gpu = l->prev_state_gpu; + s.delta_gpu = l->dh_gpu; + backward_connected_layer_gpu(wg, s); + + copy_gpu(l->outputs*l->batch, l->temp_gpu, 1, ug->delta_gpu, 1, state.streams[l->stream_index]); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(ug, s); + + copy_gpu(l->outputs*l->batch, l->temp2_gpu, 1, l->temp_gpu, 1, state.streams[l->stream_index]); + mul_gpu(l->outputs*l->batch, l->g_gpu, 1, l->temp_gpu, 1); + gradient_array_gpu(l->i_gpu, l->outputs*l->batch, LOGISTIC, l->temp_gpu); + copy_gpu(l->outputs*l->batch, l->temp_gpu, 1, wi->delta_gpu, 1, state.streams[l->stream_index]); + s.input_gpu = l->prev_state_gpu; + s.delta_gpu = l->dh_gpu; + backward_connected_layer_gpu(wi, s); + + copy_gpu(l->outputs*l->batch, l->temp_gpu, 1, ui->delta_gpu, 1, state.streams[l->stream_index]); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(ui, s); + + copy_gpu(l->outputs*l->batch, l->temp2_gpu, 1, l->temp_gpu, 1, state.streams[l->stream_index]); + mul_gpu(l->outputs*l->batch, l->prev_cell_gpu, 1, l->temp_gpu, 1); + gradient_array_gpu(l->f_gpu, l->outputs*l->batch, LOGISTIC, l->temp_gpu); + copy_gpu(l->outputs*l->batch, l->temp_gpu, 1, wf->delta_gpu, 1, state.streams[l->stream_index]); + s.input_gpu = l->prev_state_gpu; + s.delta_gpu = l->dh_gpu; + backward_connected_layer_gpu(wf, s); + + copy_gpu(l->outputs*l->batch, l->temp_gpu, 1, uf->delta_gpu, 1, state.streams[l->stream_index]); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(uf, s); + + copy_gpu(l->outputs*l->batch, l->temp2_gpu, 1, l->temp_gpu, 1, state.streams[l->stream_index]); + mul_gpu(l->outputs*l->batch, l->f_gpu, 1, l->temp_gpu, 1); + copy_gpu(l->outputs*l->batch, l->temp_gpu, 1, l->dc_gpu, 1, state.streams[l->stream_index]); + + state.input_gpu -= l->inputs*l->batch; + if (state.delta_gpu) state.delta_gpu -= l->inputs*l->batch; + l->output_gpu -= l->outputs*l->batch; + l->cell_gpu -= l->outputs*l->batch; + l->delta_gpu -= l->outputs*l->batch; + + increment_layer(wf, -1); + increment_layer(wi, -1); + increment_layer(wg, -1); + increment_layer(wo, -1); + + increment_layer(uf, -1); + increment_layer(ui, -1); + increment_layer(ug, -1); + increment_layer(uo, -1); + } +} +#endif diff --git a/workloads/realworld/pipeline/darknet/src/lstm_layer.h b/workloads/realworld/pipeline/darknet/src/lstm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..df47d7f2ef1651d5ae32fc86b95b278273503194 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/lstm_layer.h @@ -0,0 +1,20 @@ +#ifndef LSTM_LAYER_H +#define LSTM_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" +#define USET + +layer* make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam); + +void forward_lstm_layer(layer* l, network net); +void update_lstm_layer(layer* l, update_args a, network net); + +#ifdef GPU +void forward_lstm_layer_gpu(layer* l, network net); +void backward_lstm_layer_gpu(layer* l, network net); +void update_lstm_layer_gpu(layer* l, update_args a, network net); + +#endif +#endif diff --git a/workloads/realworld/pipeline/darknet/src/matrix.c b/workloads/realworld/pipeline/darknet/src/matrix.c new file mode 100644 index 0000000000000000000000000000000000000000..580d5b60ef808482f5afe7640b502310e5253fe6 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/matrix.c @@ -0,0 +1,196 @@ +#include "matrix.h" +#include "utils.h" +#include "blas.h" +#include +#include +#include +#include +#include + +void free_matrix(matrix m) +{ + int i; + for(i = 0; i < m.rows; ++i) free(m.vals[i]); + free(m.vals); +} + +float matrix_topk_accuracy(matrix truth, matrix guess, int k) +{ + int *indexes = calloc(k, sizeof(int)); + int n = truth.cols; + int i,j; + int correct = 0; + for(i = 0; i < truth.rows; ++i){ + top_k(guess.vals[i], n, k, indexes); + for(j = 0; j < k; ++j){ + int class_ = indexes[j]; + if(truth.vals[i][class_]){ + ++correct; + break; + } + } + } + free(indexes); + return (float)correct/truth.rows; +} + +void scale_matrix(matrix m, float scale) +{ + int i,j; + for(i = 0; i < m.rows; ++i){ + for(j = 0; j < m.cols; ++j){ + m.vals[i][j] *= scale; + } + } +} + +matrix resize_matrix(matrix m, int size) +{ + int i; + if (m.rows == size) return m; + if (m.rows < size) { + m.vals = realloc(m.vals, size*sizeof(float*)); + for (i = m.rows; i < size; ++i) { + m.vals[i] = calloc(m.cols, sizeof(float)); + } + } else if (m.rows > size) { + for (i = size; i < m.rows; ++i) { + free(m.vals[i]); + } + m.vals = realloc(m.vals, size*sizeof(float*)); + } + m.rows = size; + return m; +} + +void matrix_add_matrix(matrix from, matrix to) +{ + assert(from.rows == to.rows && from.cols == to.cols); + int i,j; + for(i = 0; i < from.rows; ++i){ + for(j = 0; j < from.cols; ++j){ + to.vals[i][j] += from.vals[i][j]; + } + } +} + +matrix copy_matrix(matrix m) +{ + matrix c = {0}; + c.rows = m.rows; + c.cols = m.cols; + c.vals = calloc(c.rows, sizeof(float *)); + int i; + for(i = 0; i < c.rows; ++i){ + c.vals[i] = calloc(c.cols, sizeof(float)); + copy_cpu(c.cols, m.vals[i], 1, c.vals[i], 1); + } + return c; +} + +matrix make_matrix(int rows, int cols) +{ + int i; + matrix m; + m.rows = rows; + m.cols = cols; + m.vals = calloc(m.rows, sizeof(float *)); + for(i = 0; i < m.rows; ++i){ + m.vals[i] = calloc(m.cols, sizeof(float)); + } + return m; +} + +matrix hold_out_matrix(matrix *m, int n) +{ + int i; + matrix h; + h.rows = n; + h.cols = m->cols; + h.vals = calloc(h.rows, sizeof(float *)); + for(i = 0; i < n; ++i){ + int index = rand()%m->rows; + h.vals[i] = m->vals[index]; + m->vals[index] = m->vals[--(m->rows)]; + } + return h; +} + +float *pop_column(matrix *m, int c) +{ + float *col = calloc(m->rows, sizeof(float)); + int i, j; + for(i = 0; i < m->rows; ++i){ + col[i] = m->vals[i][c]; + for(j = c; j < m->cols-1; ++j){ + m->vals[i][j] = m->vals[i][j+1]; + } + } + --m->cols; + return col; +} + +matrix csv_to_matrix(char *filename) +{ + FILE *fp = fopen(filename, "r"); + if(!fp) file_error(filename); + + matrix m; + m.cols = -1; + + char *line; + + int n = 0; + int size = 1024; + m.vals = calloc(size, sizeof(float*)); + while((line = fgetl(fp))){ + if(m.cols == -1) m.cols = count_fields(line); + if(n == size){ + size *= 2; + m.vals = realloc(m.vals, size*sizeof(float*)); + } + m.vals[n] = parse_fields(line, m.cols); + free(line); + ++n; + } + m.vals = realloc(m.vals, n*sizeof(float*)); + m.rows = n; + return m; +} + +void matrix_to_csv(matrix m) +{ + int i, j; + + for(i = 0; i < m.rows; ++i){ + for(j = 0; j < m.cols; ++j){ + if(j > 0) printf(","); + printf("%.17g", m.vals[i][j]); + } + printf("\n"); + } +} + +void print_matrix(matrix m) +{ + int i, j; + printf("%d X %d Matrix:\n",m.rows, m.cols); + printf(" __"); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf("__ \n"); + + printf("| "); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf(" |\n"); + + for(i = 0; i < m.rows; ++i){ + printf("| "); + for(j = 0; j < m.cols; ++j){ + printf("%15.7f ", m.vals[i][j]); + } + printf(" |\n"); + } + printf("|__"); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf("__|\n"); +} diff --git a/workloads/realworld/pipeline/darknet/src/matrix.h b/workloads/realworld/pipeline/darknet/src/matrix.h new file mode 100644 index 0000000000000000000000000000000000000000..879acd70d26c084931b30067ddcc77057068e58c --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/matrix.h @@ -0,0 +1,13 @@ +#ifndef MATRIX_H +#define MATRIX_H +#include "darknet.h" + +matrix copy_matrix(matrix m); +void print_matrix(matrix m); + +matrix hold_out_matrix(matrix *m, int n); +matrix resize_matrix(matrix m, int size); + +float *pop_column(matrix *m, int c); + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/maxpool_layer.c b/workloads/realworld/pipeline/darknet/src/maxpool_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..93c24d1dde53e90cd6a04f17c31506c7d35ce6fa --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/maxpool_layer.c @@ -0,0 +1,127 @@ +#include "maxpool_layer.h" +#include "cuda_dark.h" +#include + +image get_maxpool_image(maxpool_layer* l) +{ + int h = l->out_h; + int w = l->out_w; + int c = l->c; + return float_to_image(w,h,c,l->output); +} + +image get_maxpool_delta(maxpool_layer* l) +{ + int h = l->out_h; + int w = l->out_w; + int c = l->c; + return float_to_image(w,h,c,l->delta); +} + +maxpool_layer* make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding) +{ + maxpool_layer* l = calloc(1, sizeof(maxpool_layer)); + l->type = MAXPOOL; + l->batch = batch; + l->h = h; + l->w = w; + l->c = c; + l->pad = padding; + l->out_w = (w + padding - size)/stride + 1; + l->out_h = (h + padding - size)/stride + 1; + l->out_c = c; + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = h*w*c; + l->size = size; + l->stride = stride; + int output_size = l->out_h * l->out_w * l->out_c * batch; + l->indexes = calloc(output_size, sizeof(int)); + l->output = calloc(output_size, sizeof(float)); + l->delta = calloc(output_size, sizeof(float)); + l->forward = forward_maxpool_layer; + l->backward = backward_maxpool_layer; + #ifdef GPU + l->forward_gpu = forward_maxpool_layer_gpu; + l->backward_gpu = backward_maxpool_layer_gpu; + l->indexes_gpu = cuda_make_int_array(0, output_size); + // l->output_gpu = cuda_make_array(l->output, output_size); + // l->delta_gpu = cuda_make_array(l->delta, output_size); + #endif + fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d\n", size, size, stride, w, h, c, l->out_w, l->out_h, l->out_c); + return l; +} + +void resize_maxpool_layer(maxpool_layer *l, int w, int h) +{ + l->h = h; + l->w = w; + l->inputs = h*w*l->c; + + l->out_w = (w + l->pad - l->size)/l->stride + 1; + l->out_h = (h + l->pad - l->size)/l->stride + 1; + l->outputs = l->out_w * l->out_h * l->c; + int output_size = l->outputs * l->batch; + + l->indexes = realloc(l->indexes, output_size * sizeof(int)); + l->output = realloc(l->output, output_size * sizeof(float)); + l->delta = realloc(l->delta, output_size * sizeof(float)); + + #ifdef GPU + cuda_free((float *)l->indexes_gpu); + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->indexes_gpu = cuda_make_int_array(0, output_size); + l->output_gpu = cuda_make_array(l->output, output_size); + l->delta_gpu = cuda_make_array(l->delta, output_size); + #endif +} + +void forward_maxpool_layer(maxpool_layer* l, network net) +{ + int b,i,j,k,m,n; + int w_offset = -l->pad/2; + int h_offset = -l->pad/2; + + int h = l->out_h; + int w = l->out_w; + int c = l->c; + + for(b = 0; b < l->batch; ++b){ + for(k = 0; k < c; ++k){ + for(i = 0; i < h; ++i){ + for(j = 0; j < w; ++j){ + int out_index = j + w*(i + h*(k + c*b)); + float max = -FLT_MAX; + int max_i = -1; + for(n = 0; n < l->size; ++n){ + for(m = 0; m < l->size; ++m){ + int cur_h = h_offset + i*l->stride + n; + int cur_w = w_offset + j*l->stride + m; + int index = cur_w + l->w*(cur_h + l->h*(k + b*l->c)); + int valid = (cur_h >= 0 && cur_h < l->h && + cur_w >= 0 && cur_w < l->w); + float val = (valid != 0) ? net.input[index] : -FLT_MAX; + max_i = (val > max) ? index : max_i; + max = (val > max) ? val : max; + } + } + l->output[out_index] = max; + l->indexes[out_index] = max_i; + } + } + } + } +} + +void backward_maxpool_layer(maxpool_layer* l, network net) +{ + int i; + int h = l->out_h; + int w = l->out_w; + int c = l->c; + for(i = 0; i < h*w*c*l->batch; ++i){ + int index = l->indexes[i]; + net.delta[index] += l->delta[i]; + } +} + diff --git a/workloads/realworld/pipeline/darknet/src/maxpool_layer.h b/workloads/realworld/pipeline/darknet/src/maxpool_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..a597c647695b5ea21df5765f1959821a369ffb6d --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/maxpool_layer.h @@ -0,0 +1,23 @@ +#ifndef MAXPOOL_LAYER_H +#define MAXPOOL_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +typedef layer maxpool_layer; + +image get_maxpool_image(maxpool_layer* l); +maxpool_layer* make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding); +void resize_maxpool_layer(maxpool_layer *l, int w, int h); +void forward_maxpool_layer(maxpool_layer* l, network net); +void backward_maxpool_layer(maxpool_layer* l, network net); + +#ifdef GPU +void forward_maxpool_layer_gpu(maxpool_layer* l, network net); +void backward_maxpool_layer_gpu(maxpool_layer* l, network net); +#endif + +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/maxpool_layer_kernels.cu b/workloads/realworld/pipeline/darknet/src/maxpool_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..1d1300ac4327e43c95a2e29e01a554483d02c617 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/maxpool_layer_kernels.cu @@ -0,0 +1,112 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "maxpool_layer.h" +#include "cuda_dark.h" +} + +__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) +{ + int h = (in_h + pad - size)/stride + 1; + int w = (in_w + pad - size)/stride + 1; + int c = in_c; + + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int j = id % w; + id /= w; + int i = id % h; + id /= h; + int k = id % c; + id /= c; + int b = id; + + int w_offset = -pad/2; + int h_offset = -pad/2; + + int out_index = j + w*(i + h*(k + c*b)); + float max = -INFINITY; + int max_i = -1; + int l, m; + for(l = 0; l < size; ++l){ + for(m = 0; m < size; ++m){ + int cur_h = h_offset + i*stride + l; + int cur_w = w_offset + j*stride + m; + int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); + int valid = (cur_h >= 0 && cur_h < in_h && + cur_w >= 0 && cur_w < in_w); + float val = (valid != 0) ? input[index] : -INFINITY; + max_i = (val > max) ? index : max_i; + max = (val > max) ? val : max; + } + } + output[out_index] = max; + indexes[out_index] = max_i; +} + +__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes) +{ + int h = (in_h + pad - size)/stride + 1; + int w = (in_w + pad - size)/stride + 1; + int c = in_c; + int area = (size-1)/stride; + + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int index = id; + int j = id % in_w; + id /= in_w; + int i = id % in_h; + id /= in_h; + int k = id % in_c; + id /= in_c; + int b = id; + + int w_offset = -pad/2; + int h_offset = -pad/2; + + float d = 0; + int l, m; + for(l = -area; l < area+1; ++l){ + for(m = -area; m < area+1; ++m){ + int out_w = (j-w_offset)/stride + m; + int out_h = (i-h_offset)/stride + l; + int out_index = out_w + w*(out_h + h*(k + c*b)); + int valid = (out_w >= 0 && out_w < w && + out_h >= 0 && out_h < h); + d += (valid && indexes[out_index] == index) ? delta[out_index] : 0; + } + } + prev_delta[index] += d; +} + +extern "C" void forward_maxpool_layer_gpu(maxpool_layer* l, network net) +{ + // Ruihao + // forward_layer_start(l, net); + // Ruihao + int h = l->out_h; + int w = l->out_w; + int c = l->c; + + size_t n = h*w*c*l->batch; + + forward_maxpool_layer_kernel<<>>(n, l->h, l->w, l->c, l->stride, l->size, l->pad, net.input_gpu, l->output_gpu, l->indexes_gpu); + check_error(cudaPeekAtLastError()); + // Ruihao + // forward_layer_end(l, net); + // Ruihao +} + +extern "C" void backward_maxpool_layer_gpu(maxpool_layer* layer, network net) +{ + size_t n = layer->h*layer->w*layer->c*layer->batch; + + backward_maxpool_layer_kernel<<>>(n, layer->h, layer->w, layer->c, layer->stride, layer->size, layer->pad, layer->delta_gpu, net.delta_gpu, layer->indexes_gpu); + check_error(cudaPeekAtLastError()); +} + diff --git a/workloads/realworld/pipeline/darknet/src/network.c b/workloads/realworld/pipeline/darknet/src/network.c new file mode 100644 index 0000000000000000000000000000000000000000..8d2fd7b73f4e766dc6487382f30c81beefad9781 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/network.c @@ -0,0 +1,1205 @@ +#include +#include +#include +#include "network.h" +#include "image.h" +#include "data.h" +#include "utils.h" +#include "blas.h" + +#include "crop_layer.h" +#include "connected_layer.h" +#include "gru_layer.h" +#include "rnn_layer.h" +#include "crnn_layer.h" +#include "local_layer.h" +#include "convolutional_layer.h" +#include "activation_layer.h" +#include "detection_layer.h" +#include "region_layer.h" +#include "yolo_layer.h" +#include "normalization_layer.h" +#include "batchnorm_layer.h" +#include "maxpool_layer.h" +#include "reorg_layer.h" +#include "avgpool_layer.h" +#include "cost_layer.h" +#include "softmax_layer.h" +#include "dropout_layer.h" +#include "route_layer.h" +#include "upsample_layer.h" +#include "shortcut_layer.h" +#include "parser.h" +#include "data.h" + +load_args get_base_args(network *net) +{ + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.size = net->w; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.center = net->center; + args.saturation = net->saturation; + args.hue = net->hue; + return args; +} + +network *load_network(char *cfg, char *weights, int clear) +{ + network *net = parse_network_cfg(cfg); + if(weights && weights[0] != 0){ + load_weights(net, weights); + } + if(clear) (*net->seen) = 0; + return net; +} + +size_t get_current_batch(network *net) +{ + size_t batch_num = (*net->seen)/(net->batch*net->subdivisions); + return batch_num; +} + +void reset_network_state(network *net, int b) +{ + int i; + for (i = 0; i < net->n; ++i) { + #ifdef GPU + layer *l = net->layers[i]; + if(l->state_gpu){ + fill_gpu(l->outputs, 0, l->state_gpu + l->outputs*b, 1); + } + if(l->h_gpu){ + fill_gpu(l->outputs, 0, l->h_gpu + l->outputs*b, 1); + } + #endif + } +} + +void reset_rnn(network *net) +{ + reset_network_state(net, 0); +} + +float get_current_rate(network *net) +{ + size_t batch_num = get_current_batch(net); + int i; + float rate; + if (batch_num < net->burn_in) return net->learning_rate * pow((float)batch_num / net->burn_in, net->power); + switch (net->policy) { + case CONSTANT: + return net->learning_rate; + case STEP: + return net->learning_rate * pow(net->scale, batch_num/net->step); + case STEPS: + rate = net->learning_rate; + for(i = 0; i < net->num_steps; ++i){ + if(net->steps[i] > batch_num) return rate; + rate *= net->scales[i]; + } + return rate; + case EXP: + return net->learning_rate * pow(net->gamma, batch_num); + case POLY: + return net->learning_rate * pow(1 - (float)batch_num / net->max_batches, net->power); + case RANDOM: + return net->learning_rate * pow(rand_uniform(0,1), net->power); + case SIG: + return net->learning_rate * (1./(1.+exp(net->gamma*(batch_num - net->step)))); + default: + fprintf(stderr, "Policy is weird!\n"); + return net->learning_rate; + } +} + +char *get_layer_string(LAYER_TYPE a) +{ + switch(a){ + case CONVOLUTIONAL: + return "convolutional"; + case ACTIVE: + return "activation"; + case LOCAL: + return "local"; + case DECONVOLUTIONAL: + return "deconvolutional"; + case CONNECTED: + return "connected"; + case RNN: + return "rnn"; + case GRU: + return "gru"; + case LSTM: + return "lstm"; + case CRNN: + return "crnn"; + case MAXPOOL: + return "maxpool"; + case REORG: + return "reorg"; + case AVGPOOL: + return "avgpool"; + case SOFTMAX: + return "softmax"; + case DETECTION: + return "detection"; + case REGION: + return "region"; + case YOLO: + return "yolo"; + case DROPOUT: + return "dropout"; + case CROP: + return "crop"; + case COST: + return "cost"; + case ROUTE: + return "route"; + case SHORTCUT: + return "shortcut"; + case NORMALIZATION: + return "normalization"; + case BATCHNORM: + return "batchnorm"; + default: + break; + } + return "none"; +} + +network *make_network(int n) +{ + network *net = calloc(1, sizeof(network)); + net->n = n; + net->layers = calloc(net->n, sizeof(layer)); + net->seen = calloc(1, sizeof(size_t)); + net->t = calloc(1, sizeof(int)); + net->cost = calloc(1, sizeof(float)); + return net; +} + +void forward_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + forward_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + for(i = 0; i < net.n; ++i){ + net.index = i; + layer *l = net.layers[i]; + if(l->delta){ + fill_cpu(l->outputs * l->batch, 0, l->delta, 1); + } + l->forward(l, net); + net.input = l->output; + if(l->truth) { + net.truth = l->output; + } + } + calc_network_cost(netp); +} + +void update_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + update_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + update_args a = {0}; + a.batch = net.batch*net.subdivisions; + a.learning_rate = get_current_rate(netp); + a.momentum = net.momentum; + a.decay = net.decay; + a.adam = net.adam; + a.B1 = net.B1; + a.B2 = net.B2; + a.eps = net.eps; + ++*net.t; + a.t = *net.t; + + for(i = 0; i < net.n; ++i){ + layer *l = net.layers[i]; + if(l->update){ + l->update(l, a, net); + } + } +} + +void calc_network_cost(network *netp) +{ + network net = *netp; + int i; + float sum = 0; + int count = 0; + for(i = 0; i < net.n; ++i){ + if(net.layers[i]->cost){ + sum += net.layers[i]->cost[0]; + ++count; + } + } + *net.cost = sum/count; +} + +int get_predicted_class_network(network *net) +{ + return max_index(net->output, net->outputs); +} + +void backward_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + backward_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + network orig = net; + for(i = net.n-1; i >= 0; --i){ + layer *l = net.layers[i]; + if(l->stopbackward) break; + if(i == 0){ + net = orig; + }else{ + layer *prev = net.layers[i-1]; + net.input = prev->output; + net.delta = prev->delta; + } + net.index = i; + l->backward(l, net); + } +} + +float train_network_datum(network *net) +{ + fprintf(stderr, "train_network_datum \n"); + *net->seen += net->batch; + net->train = 1; + forward_network(net); + backward_network(net); + float error = *net->cost; + if(((*net->seen)/net->batch)%net->subdivisions == 0) update_network(net); + return error; +} + +float train_network_sgd(network *net, data d, int n) +{ + int batch = net->batch; + + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + get_random_batch(d, batch, net->input, net->truth); + float err = train_network_datum(net); + sum += err; + } + return (float)sum/(n*batch); +} + +float train_network(network *net, data d) +{ + fprintf(stderr, "train_network \n"); + assert(d.X.rows % net->batch == 0); + int batch = net->batch; + int n = d.X.rows / batch; + + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + get_next_batch(d, batch, i*batch, net->input, net->truth); + float err = train_network_datum(net); + sum += err; + } + return (float)sum/(n*batch); +} + +void set_temp_network(network *net, float t) +{ + int i; + for(i = 0; i < net->n; ++i){ + net->layers[i]->temperature = t; + } +} + + +void set_batch_network(network *net, int b) +{ + net->batch = b; + int i; + for(i = 0; i < net->n; ++i){ + net->layers[i]->batch = b; +#ifdef CUDNN + if(net->layers[i].type == CONVOLUTIONAL){ + cudnn_convolutional_setup(net->layers + i); + } + if(net->layers[i].type == DECONVOLUTIONAL){ + layer *l = net->layers + i; + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + } +#endif + } +} + +int resize_network(network *net, int w, int h) +{ + fprintf(stderr, "resize_network \n"); +#ifdef GPU + cuda_set_device(net->gpu_index); + cuda_free(net->workspace); +#endif + int i; + //if(w == net->w && h == net->h) return 0; + net->w = w; + net->h = h; + int inputs = 0; + size_t workspace_size = 0; + // fprintf(stderr, "Resizing to %d x %d...\n", w, h); + //fflush(stderr); + for (i = 0; i < net->n; ++i){ + layer *l = net->layers[i]; + if(l->type == CONVOLUTIONAL){ + resize_convolutional_layer(l, w, h); + }else if(l->type == CROP){ + resize_crop_layer(l, w, h); + }else if(l->type == MAXPOOL){ + resize_maxpool_layer(l, w, h); + }else if(l->type == REGION){ + resize_region_layer(l, w, h); + }else if(l->type == YOLO){ + resize_yolo_layer(l, w, h); + }else if(l->type == ROUTE){ + resize_route_layer(l, net); + }else if(l->type == SHORTCUT){ + resize_shortcut_layer(l, w, h); + }else if(l->type == UPSAMPLE){ + resize_upsample_layer(l, w, h); + }else if(l->type == REORG){ + resize_reorg_layer(l, w, h); + }else if(l->type == AVGPOOL){ + resize_avgpool_layer(l, w, h); + }else if(l->type == NORMALIZATION){ + resize_normalization_layer(l, w, h); + }else if(l->type == COST){ + resize_cost_layer(l, inputs); + }else{ + error("Cannot resize this type of layer"); + } + if(l->workspace_size > workspace_size) workspace_size = l->workspace_size; + if(l->workspace_size > 2000000000) assert(0); + inputs = l->outputs; + net->layers[i] = l; + w = l->out_w; + h = l->out_h; + if(l->type == AVGPOOL) break; + } + layer *out = get_network_output_layer(net); + net->inputs = net->layers[0]->inputs; + net->outputs = out->outputs; + net->truths = out->outputs; + if(net->layers[net->n-1]->truths) net->truths = net->layers[net->n-1]->truths; + net->output = out->output; + free(net->input); + free(net->truth); + net->input = calloc(net->inputs*net->batch, sizeof(float)); + net->truth = calloc(net->truths*net->batch, sizeof(float)); +#ifdef GPU + if(gpu_index >= 0){ + cuda_free(net->input_gpu); + cuda_free(net->truth_gpu); + net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch); + net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch); + if(workspace_size){ + net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1); + } + }else { + free(net->workspace); + net->workspace = calloc(1, workspace_size); + } +#else + free(net->workspace); + net->workspace = calloc(1, workspace_size); +#endif + //fprintf(stderr, " Done!\n"); + return 0; +} + +layer* get_network_detection_layer(network *net) +{ + int i; + for(i = 0; i < net->n; ++i){ + if(net->layers[i]->type == DETECTION){ + return net->layers[i]; + } + } + fprintf(stderr, "Detection layer not found!!\n"); + layer *l = {0}; + return l; +} + +image get_network_image_layer(network *net, int i) +{ + layer *l = net->layers[i]; +#ifdef GPU + //cuda_pull_array(l->output_gpu, l->output, l->outputs); +#endif + if (l->out_w && l->out_h && l->out_c){ + return float_to_image(l->out_w, l->out_h, l->out_c, l->output); + } + image def = {0}; + return def; +} + +image get_network_image(network *net) +{ + int i; + for(i = net->n-1; i >= 0; --i){ + image m = get_network_image_layer(net, i); + if(m.h != 0) return m; + } + image def = {0}; + return def; +} + +void visualize_network(network *net) +{ + image *prev = 0; + int i; + char buff[256]; + for(i = 0; i < net->n; ++i){ + sprintf(buff, "Layer %d", i); + layer *l = net->layers[i]; + if(l->type == CONVOLUTIONAL){ + prev = visualize_convolutional_layer(l, buff, prev); + } + } +} + +void top_predictions(network *net, int k, int *index) +{ + top_k(net->output, net->outputs, k, index); +} + + +float *network_predict(network *net, float *input) +{ + network orig = *net; + net->input = input; + net->truth = 0; + net->train = 0; + net->delta = 0; + forward_network(net); + float *out = net->output; + *net = orig; + return out; +} + +int num_detections(network *net, float thresh) +{ + int i; + int s = 0; + for(i = 0; i < net->n; ++i){ + layer *l = net->layers[i]; + if(l->type == YOLO){ + s += yolo_num_detections(l, thresh); + } + if(l->type == DETECTION || l->type == REGION){ + s += l->w*l->h*l->n; + } + } + return s; +} + +detection *make_network_boxes(network *net, float thresh, int *num) +{ + layer *l = net->layers[net->n - 1]; + int i; + int nboxes = num_detections(net, thresh); + if(num) *num = nboxes; + detection *dets = calloc(nboxes, sizeof(detection)); + for(i = 0; i < nboxes; ++i){ + dets[i].prob = calloc(l->classes, sizeof(float)); + if(l->coords > 4){ + dets[i].mask = calloc(l->coords-4, sizeof(float)); + } + } + return dets; +} + +void fill_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets) +{ + int j; + for(j = 0; j < net->n; ++j){ + layer *l = net->layers[j]; + if(l->type == YOLO){ + int count = get_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets); + dets += count; + } + if(l->type == REGION){ + get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets); + dets += l->w*l->h*l->n; + } + if(l->type == DETECTION){ + get_detection_detections(l, w, h, thresh, dets); + dets += l->w*l->h*l->n; + } + } +} + +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num) +{ + detection *dets = make_network_boxes(net, thresh, num); + fill_network_boxes(net, w, h, thresh, hier, map, relative, dets); + return dets; +} + +void free_detections(detection *dets, int n) +{ + int i; + for(i = 0; i < n; ++i){ + free(dets[i].prob); + if(dets[i].mask) free(dets[i].mask); + } + free(dets); +} + +float *network_predict_image(network *net, image im) +{ + image imr = letterbox_image(im, net->w, net->h); + set_batch_network(net, 1); + float *p = network_predict(net, imr.data); + free_image(imr); + return p; +} + +int network_width(network *net){return net->w;} +int network_height(network *net){return net->h;} + +matrix network_predict_data_multi(network *net, data test, int n) +{ + int i,j,b,m; + int k = net->outputs; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.rows, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + } + for(m = 0; m < n; ++m){ + float *out = network_predict(net, X); + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + for(j = 0; j < k; ++j){ + pred.vals[i+b][j] += out[j+b*k]/n; + } + } + } + } + free(X); + return pred; +} + +matrix network_predict_data(network *net, data test) +{ + int i,j,b; + int k = net->outputs; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.cols, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + } + // Ruihao + clock_t time = clock(); + float *out = network_predict(net, X); + fprintf(stderr, "%lf seconds\n", sec(clock()-time)); + // Ruihao + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + for(j = 0; j < k; ++j){ + pred.vals[i+b][j] = out[j+b*k]; + } + } + } + free(X); + return pred; +} + +void print_network(network *net) +{ + int i,j; + for(i = 0; i < net->n; ++i){ + layer *l = net->layers[i]; + float *output = l->output; + int n = l->outputs; + float mean = mean_array(output, n); + float vari = variance_array(output, n); + fprintf(stderr, "Layer %d - Mean: %f, Variance: %f\n",i,mean, vari); + if(n > 100) n = 100; + for(j = 0; j < n; ++j) fprintf(stderr, "%f, ", output[j]); + if(n == 100)fprintf(stderr,".....\n"); + fprintf(stderr, "\n"); + } +} + +void compare_networks(network *n1, network *n2, data test) +{ + matrix g1 = network_predict_data(n1, test); + matrix g2 = network_predict_data(n2, test); + int i; + int a,b,c,d; + a = b = c = d = 0; + for(i = 0; i < g1.rows; ++i){ + int truth = max_index(test.y.vals[i], test.y.cols); + int p1 = max_index(g1.vals[i], g1.cols); + int p2 = max_index(g2.vals[i], g2.cols); + if(p1 == truth){ + if(p2 == truth) ++d; + else ++c; + }else{ + if(p2 == truth) ++b; + else ++a; + } + } + printf("%5d %5d\n%5d %5d\n", a, b, c, d); + float num = pow((abs(b - c) - 1.), 2.); + float den = b + c; + printf("%f\n", num/den); +} + +float network_accuracy(network *net, data d) +{ + matrix guess = network_predict_data(net, d); + float acc = matrix_topk_accuracy(d.y, guess,1); + free_matrix(guess); + return acc; +} + +float *network_accuracies(network *net, data d, int n) +{ + static float acc[2]; + matrix guess = network_predict_data(net, d); + acc[0] = matrix_topk_accuracy(d.y, guess, 1); + acc[1] = matrix_topk_accuracy(d.y, guess, n); + free_matrix(guess); + return acc; +} + +layer* get_network_output_layer(network *net) +{ + int i; + for(i = net->n - 1; i >= 0; --i){ + if(net->layers[i]->type != COST) break; + } + return net->layers[i]; +} + +float network_accuracy_multi(network *net, data d, int n) +{ + matrix guess = network_predict_data_multi(net, d, n); + float acc = matrix_topk_accuracy(d.y, guess,1); + free_matrix(guess); + return acc; +} + +void free_network(network *net) +{ + fprintf(stderr, "free_network \n"); + int i; + for(i = 0; i < net->n; ++i){ + free_layer(net->layers[i]); + } + free(net->layers); + if(net->input) free(net->input); + if(net->truth) free(net->truth); +#ifdef GPU + if(net->input_gpu) cuda_free(net->input_gpu); + if(net->truth_gpu) cuda_free(net->truth_gpu); +#endif + free(net); +} + +// Some day... +// ^ What the hell is this comment for? + + +layer* network_output_layer(network *net) +{ + int i; + for(i = net->n - 1; i >= 0; --i){ + if(net->layers[i]->type != COST) break; + } + return net->layers[i]; +} + +int network_inputs(network *net) +{ + return net->layers[0]->inputs; +} + +int network_outputs(network *net) +{ + return network_output_layer(net)->outputs; +} + +float *network_output(network *net) +{ + return network_output_layer(net)->output; +} + +#ifdef GPU + +void forward_network_gpu(network *net) +{ + fprintf(stderr, "forward_network_gpu!\n"); + cuda_set_device(net->gpu_index); + cuda_push_array(net->input_gpu, net->input, net->inputs*net->batch); + if(net->truth){ + cuda_push_array(net->truth_gpu, net->truth, net->truths*net->batch); + } + + // cudaStreamCreate(&net->streams[0]); + // cudaStreamCreate(&net->streams[1]); + cudaStreamCreateWithFlags(&net->streams[0],cudaStreamNonBlocking); + cudaStreamCreateWithFlags(&net->streams[1],cudaStreamNonBlocking); + net->is_first = 0; + int i; + for(i = 0; i < net->n; ++i) { + net->layers[i]->stream_index = i % 2; + // net->layers[i]->stream_index = 0; + } + for(i = 0; i < net->n; ++i){ + // Ruihao + // fprintf(stderr, "forward_network_gpu %d, type is %d! \n", i, net->layers[i]->type); + net->index = i; + net->next_index = -1; + if (i == 0) { + net->is_first = 1; + } else { + net->is_first = 0; + } + for (int j = i + 1; j < net->n; j++) { + if (net->layers[j]->type == CONVOLUTIONAL + || net->layers[j]->type == SHORTCUT + || net->layers[j]->type == MAXPOOL + || net->layers[j]->type == AVGPOOL + || net->layers[j]->type == SOFTMAX + || net->layers[j]->type == UPSAMPLE + || net->layers[j]->type == ROUTE + || net->layers[j]->type == YOLO + ) { + net->next_index = j; + break; + } + } + // Ruihao + layer *l = net->layers[i]; + + // if(l->delta_gpu){ + // fprintf(stderr, "forward_network_gpu %d delta_gpu! \n", i); + // fill_gpu(l->outputs * l->batch, 0, l->delta_gpu, 1); + // } + forward_layer_start(l, net); + // fprintf(stderr, "forward_network_gpu %d fill_gpu! \n", i); + forward_layer_end(l, net); + l->forward_gpu(l, *net); + + net->input_gpu = l->output_gpu; + net->input = l->output; + if(l->truth) { + net->truth_gpu = l->output_gpu; + net->truth = l->output; + } + } + pull_network_output(net); + calc_network_cost(net); +} + +void backward_network_gpu(network *netp) +{ + fprintf(stderr, "backward_network_gpu!\n"); + int i; + network net = *netp; + net.is_first = 0; + network orig = net; + cuda_set_device(net.gpu_index); + for(i = net.n-1; i >= 0; --i){ + layer *l = net.layers[i]; + if(l->stopbackward) break; + + // Ruihao + // fprintf(stderr, "backward_network_gpu %d, type is %d! \n", i, net.layers[i]->type); + net.index = i; + net.next_index = -1; + if (i == net.n-1) { + net.is_first = 1; + } else { + net.is_first = 0; + } + for (int j = i - 1; j >= 0; j--) { + if (net.layers[j]->type == CONVOLUTIONAL + || net.layers[j]->type == SHORTCUT + || net.layers[j]->type == MAXPOOL + || net.layers[j]->type == AVGPOOL + || net.layers[j]->type == SOFTMAX + ) { + net.next_index = j; + break; + } + } + // Ruihao + + if(i == 0){ + net = orig; + net.next_index = -1; + }else{ + layer *prev = net.layers[i-1]; + net.input = prev->output; + net.delta = prev->delta; + net.input_gpu = prev->output_gpu; + net.delta_gpu = prev->delta_gpu; + } + net.index = i; + + // forward_layer_start(l, net); + l->backward_gpu(l, net); + // forward_layer_end(l, net); + } +} + +void update_network_gpu(network *netp) +{ + fprintf(stderr, "update_network_gpu!\n"); + network net = *netp; + cuda_set_device(net.gpu_index); + int i; + update_args a = {0}; + a.batch = net.batch*net.subdivisions; + a.learning_rate = get_current_rate(netp); + a.momentum = net.momentum; + a.decay = net.decay; + a.adam = net.adam; + a.B1 = net.B1; + a.B2 = net.B2; + a.eps = net.eps; + ++*net.t; + a.t = (*net.t); + + for(i = 0; i < net.n; ++i){ + layer *l = net.layers[i]; + if(l->update_gpu){ + l->update_gpu(l, a, net); + } + } +} + +void harmless_update_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + int i; + for(i = 0; i < net.n; ++i){ + layer *l = net.layers[i]; + if(l->weight_updates_gpu) fill_gpu(l->nweights, 0, l->weight_updates_gpu, 1); + if(l->bias_updates_gpu) fill_gpu(l->nbiases, 0, l->bias_updates_gpu, 1); + if(l->scale_updates_gpu) fill_gpu(l->nbiases, 0, l->scale_updates_gpu, 1); + } +} + +typedef struct { + network *net; + data d; + float *err; +} train_args; + +void *train_thread(void *ptr) +{ + train_args args = *(train_args*)ptr; + free(ptr); + cuda_set_device(args.net->gpu_index); + *args.err = train_network(args.net, args.d); + return 0; +} + +pthread_t train_network_in_thread(network *net, data d, float *err) +{ + pthread_t thread; + train_args *ptr = (train_args *)calloc(1, sizeof(train_args)); + ptr->net = net; + ptr->d = d; + ptr->err = err; + if(pthread_create(&thread, 0, train_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void merge_weights(layer *l, layer *base) +{ + if (l->type == CONVOLUTIONAL) { + axpy_cpu(l->n, 1, l->bias_updates, 1, base->biases, 1); + axpy_cpu(l->nweights, 1, l->weight_updates, 1, base->weights, 1); + if (l->scales) { + axpy_cpu(l->n, 1, l->scale_updates, 1, base->scales, 1); + } + } else if(l->type == CONNECTED) { + axpy_cpu(l->outputs, 1, l->bias_updates, 1, base->biases, 1); + axpy_cpu(l->outputs*l->inputs, 1, l->weight_updates, 1, base->weights, 1); + } +} + +void scale_weights(layer *l, float s) +{ + if (l->type == CONVOLUTIONAL) { + scal_cpu(l->n, s, l->biases, 1); + scal_cpu(l->nweights, s, l->weights, 1); + if (l->scales) { + scal_cpu(l->n, s, l->scales, 1); + } + } else if(l->type == CONNECTED) { + scal_cpu(l->outputs, s, l->biases, 1); + scal_cpu(l->outputs*l->inputs, s, l->weights, 1); + } +} + + +void pull_weights(layer *l) +{ + if(l->type == CONVOLUTIONAL || l->type == DECONVOLUTIONAL){ + cuda_pull_array(l->biases_gpu, l->bias_updates, l->n); + cuda_pull_array(l->weights_gpu, l->weight_updates, l->nweights); + if(l->scales) cuda_pull_array(l->scales_gpu, l->scale_updates, l->n); + } else if(l->type == CONNECTED){ + cuda_pull_array(l->biases_gpu, l->bias_updates, l->outputs); + cuda_pull_array(l->weights_gpu, l->weight_updates, l->outputs*l->inputs); + } +} + +void push_weights(layer *l) +{ + if(l->type == CONVOLUTIONAL || l->type == DECONVOLUTIONAL){ + cuda_push_array(l->biases_gpu, l->biases, l->n); + cuda_push_array(l->weights_gpu, l->weights, l->nweights); + if(l->scales) cuda_push_array(l->scales_gpu, l->scales, l->n); + } else if(l->type == CONNECTED){ + cuda_push_array(l->biases_gpu, l->biases, l->outputs); + cuda_push_array(l->weights_gpu, l->weights, l->outputs*l->inputs); + } +} + +void distribute_weights(layer *l, layer *base) +{ + if (l->type == CONVOLUTIONAL || l->type == DECONVOLUTIONAL) { + cuda_push_array(l->biases_gpu, base->biases, l->n); + cuda_push_array(l->weights_gpu, base->weights, l->nweights); + if (base->scales) cuda_push_array(l->scales_gpu, base->scales, l->n); + } else if (l->type == CONNECTED) { + cuda_push_array(l->biases_gpu, base->biases, l->outputs); + cuda_push_array(l->weights_gpu, base->weights, l->outputs*l->inputs); + } +} + + +/* + + void pull_updates(layer *l) + { + if(l->type == CONVOLUTIONAL){ + cuda_pull_array(l->bias_updates_gpu, l->bias_updates, l->n); + cuda_pull_array(l->weight_updates_gpu, l->weight_updates, l->nweights); + if(l->scale_updates) cuda_pull_array(l->scale_updates_gpu, l->scale_updates, l->n); + } else if(l->type == CONNECTED){ + cuda_pull_array(l->bias_updates_gpu, l->bias_updates, l->outputs); + cuda_pull_array(l->weight_updates_gpu, l->weight_updates, l->outputs*l->inputs); + } + } + + void push_updates(layer *l) + { + if(l->type == CONVOLUTIONAL){ + cuda_push_array(l->bias_updates_gpu, l->bias_updates, l->n); + cuda_push_array(l->weight_updates_gpu, l->weight_updates, l->nweights); + if(l->scale_updates) cuda_push_array(l->scale_updates_gpu, l->scale_updates, l->n); + } else if(l->type == CONNECTED){ + cuda_push_array(l->bias_updates_gpu, l->bias_updates, l->outputs); + cuda_push_array(l->weight_updates_gpu, l->weight_updates, l->outputs*l->inputs); + } + } + + void update_layer(layer *l, network net) + { + int update_batch = net.batch*net.subdivisions; + float rate = get_current_rate(net); + l->t = get_current_batch(net); + if(l->update_gpu){ + l->update_gpu(l, update_batch, rate*l->learning_rate_scale, net.momentum, net.decay); + } + } + void merge_updates(layer *l, layer base) + { + if (l->type == CONVOLUTIONAL) { + axpy_cpu(l->n, 1, l->bias_updates, 1, base.bias_updates, 1); + axpy_cpu(l->nweights, 1, l->weight_updates, 1, base.weight_updates, 1); + if (l->scale_updates) { + axpy_cpu(l->n, 1, l->scale_updates, 1, base.scale_updates, 1); + } + } else if(l->type == CONNECTED) { + axpy_cpu(l->outputs, 1, l->bias_updates, 1, base.bias_updates, 1); + axpy_cpu(l->outputs*l->inputs, 1, l->weight_updates, 1, base.weight_updates, 1); + } + } + + void distribute_updates(layer *l, layer base) + { + if(l->type == CONVOLUTIONAL || l->type == DECONVOLUTIONAL){ + cuda_push_array(l->bias_updates_gpu, base.bias_updates, l->n); + cuda_push_array(l->weight_updates_gpu, base.weight_updates, l->nweights); + if(base.scale_updates) cuda_push_array(l->scale_updates_gpu, base.scale_updates, l->n); + } else if(l->type == CONNECTED){ + cuda_push_array(l->bias_updates_gpu, base.bias_updates, l->outputs); + cuda_push_array(l->weight_updates_gpu, base.weight_updates, l->outputs*l->inputs); + } + } + */ + +/* + void sync_layer(network *nets, int n, int j) + { + int i; + network net = nets[0]; + layer base = net.layers[j]; + scale_weights(base, 0); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i].gpu_index); + layer *l = nets[i].layers[j]; + pull_weights(l); + merge_weights(l, base); + } + scale_weights(base, 1./n); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i].gpu_index); + layer *l = nets[i].layers[j]; + distribute_weights(l, base); + } + } + */ + +void sync_layer(network **nets, int n, int j) +{ + int i; + network *net = nets[0]; + layer *base = net->layers[j]; + scale_weights(base, 0); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i]->gpu_index); + layer *l = nets[i]->layers[j]; + pull_weights(l); + merge_weights(l, base); + } + scale_weights(base, 1./n); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i]->gpu_index); + layer *l = nets[i]->layers[j]; + distribute_weights(l, base); + } +} + +typedef struct{ + network **nets; + int n; + int j; +} sync_args; + +void *sync_layer_thread(void *ptr) +{ + sync_args args = *(sync_args*)ptr; + sync_layer(args.nets, args.n, args.j); + free(ptr); + return 0; +} + +pthread_t sync_layer_in_thread(network **nets, int n, int j) +{ + pthread_t thread; + sync_args *ptr = (sync_args *)calloc(1, sizeof(sync_args)); + ptr->nets = nets; + ptr->n = n; + ptr->j = j; + if(pthread_create(&thread, 0, sync_layer_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void sync_nets(network **nets, int n, int interval) +{ + int j; + int layers = nets[0]->n; + pthread_t *threads = (pthread_t *) calloc(layers, sizeof(pthread_t)); + + *(nets[0]->seen) += interval * (n-1) * nets[0]->batch * nets[0]->subdivisions; + for (j = 0; j < n; ++j){ + *(nets[j]->seen) = *(nets[0]->seen); + } + for (j = 0; j < layers; ++j) { + threads[j] = sync_layer_in_thread(nets, n, j); + } + for (j = 0; j < layers; ++j) { + pthread_join(threads[j], 0); + } + free(threads); +} + +float train_networks(network **nets, int n, data d, int interval) +{ + int i; + int batch = nets[0]->batch; + int subdivisions = nets[0]->subdivisions; + assert(batch * subdivisions * n == d.X.rows); + pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t)); + float *errors = (float *) calloc(n, sizeof(float)); + + float sum = 0; + for(i = 0; i < n; ++i){ + data p = get_data_part(d, i, n); + threads[i] = train_network_in_thread(nets[i], p, errors + i); + } + for(i = 0; i < n; ++i){ + pthread_join(threads[i], 0); + //printf("%f\n", errors[i]); + sum += errors[i]; + } + //cudaDeviceSynchronize(); + if (get_current_batch(nets[0]) % interval == 0) { + printf("Syncing... "); + fflush(stdout); + sync_nets(nets, n, interval); + printf("Done!\n"); + } + //cudaDeviceSynchronize(); + free(threads); + free(errors); + return (float)sum/(n); +} + +void pull_network_output(network *net) +{ + layer *l = get_network_output_layer(net); + cuda_pull_array(l->output_gpu, l->output, l->outputs*l->batch); +} + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/network.h b/workloads/realworld/pipeline/darknet/src/network.h new file mode 100644 index 0000000000000000000000000000000000000000..1b0dfd1aaa3e090c6ce276d26f24d127de2cb66d --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/network.h @@ -0,0 +1,29 @@ +// Oh boy, why am I about to do this.... +#ifndef NETWORK_H +#define NETWORK_H +#include "darknet.h" + +#include "image.h" +#include "layer.h" +#include "data.h" +#include "tree.h" + + +#ifdef GPU +void pull_network_output(network *net); +#endif + +void compare_networks(network *n1, network *n2, data d); +char *get_layer_string(LAYER_TYPE a); + +network *make_network(int n); + + +float network_accuracy_multi(network *net, data d, int n); +int get_predicted_class_network(network *net); +void print_network(network *net); +int resize_network(network *net, int w, int h); +void calc_network_cost(network *net); + +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/normalization_layer.c b/workloads/realworld/pipeline/darknet/src/normalization_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..9dd1070b2ea29c2d7dac7562f5b29d02884ba2d0 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/normalization_layer.c @@ -0,0 +1,151 @@ +#include "normalization_layer.h" +#include "blas.h" + +#include + +layer* make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa) +{ + fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size); + layer* layer = calloc(1, sizeof(layer)); + layer->type = NORMALIZATION; + layer->batch = batch; + layer->h = layer->out_h = h; + layer->w = layer->out_w = w; + layer->c = layer->out_c = c; + layer->kappa = kappa; + layer->size = size; + layer->alpha = alpha; + layer->beta = beta; + layer->output = calloc(h * w * c * batch, sizeof(float)); + layer->delta = calloc(h * w * c * batch, sizeof(float)); + layer->squared = calloc(h * w * c * batch, sizeof(float)); + layer->norms = calloc(h * w * c * batch, sizeof(float)); + layer->inputs = w*h*c; + layer->outputs = layer->inputs; + + layer->forward = forward_normalization_layer; + layer->backward = backward_normalization_layer; + #ifdef GPU + layer->forward_gpu = forward_normalization_layer_gpu; + layer->backward_gpu = backward_normalization_layer_gpu; + + layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch); + layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch); + layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch); + layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch); + #endif + return layer; +} + +void resize_normalization_layer(layer *layer, int w, int h) +{ + int c = layer->c; + int batch = layer->batch; + layer->h = h; + layer->w = w; + layer->out_h = h; + layer->out_w = w; + layer->inputs = w*h*c; + layer->outputs = layer->inputs; + layer->output = realloc(layer->output, h * w * c * batch * sizeof(float)); + layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float)); + layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float)); + layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float)); +#ifdef GPU + cuda_free(layer->output_gpu); + cuda_free(layer->delta_gpu); + cuda_free(layer->squared_gpu); + cuda_free(layer->norms_gpu); + layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch); + layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch); + layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch); + layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch); +#endif +} + +void forward_normalization_layer(layer* layer, network net) +{ + int k,b; + int w = layer->w; + int h = layer->h; + int c = layer->c; + scal_cpu(w*h*c*layer->batch, 0, layer->squared, 1); + + for(b = 0; b < layer->batch; ++b){ + float *squared = layer->squared + w*h*c*b; + float *norms = layer->norms + w*h*c*b; + float *input = net.input + w*h*c*b; + pow_cpu(w*h*c, 2, input, 1, squared, 1); + + const_cpu(w*h, layer->kappa, norms, 1); + for(k = 0; k < layer->size/2; ++k){ + axpy_cpu(w*h, layer->alpha, squared + w*h*k, 1, norms, 1); + } + + for(k = 1; k < layer->c; ++k){ + copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); + int prev = k - ((layer->size-1)/2) - 1; + int next = k + (layer->size/2); + if(prev >= 0) axpy_cpu(w*h, -layer->alpha, squared + w*h*prev, 1, norms + w*h*k, 1); + if(next < layer->c) axpy_cpu(w*h, layer->alpha, squared + w*h*next, 1, norms + w*h*k, 1); + } + } + pow_cpu(w*h*c*layer->batch, -layer->beta, layer->norms, 1, layer->output, 1); + mul_cpu(w*h*c*layer->batch, net.input, 1, layer->output, 1); +} + +void backward_normalization_layer(layer* layer, network net) +{ + // TODO This is approximate ;-) + // Also this should add in to delta instead of overwritting. + + int w = layer->w; + int h = layer->h; + int c = layer->c; + pow_cpu(w*h*c*layer->batch, -layer->beta, layer->norms, 1, net.delta, 1); + mul_cpu(w*h*c*layer->batch, layer->delta, 1, net.delta, 1); +} + +#ifdef GPU +void forward_normalization_layer_gpu(layer* layer, network net) +{ + int k,b; + int w = layer->w; + int h = layer->h; + int c = layer->c; + scal_gpu(w*h*c*layer->batch, 0, layer->squared_gpu, 1, net.streams[layer->stream_index]); + + for(b = 0; b < layer->batch; ++b){ + float *squared = layer->squared_gpu + w*h*c*b; + float *norms = layer->norms_gpu + w*h*c*b; + float *input = net.input_gpu + w*h*c*b; + pow_gpu(w*h*c, 2, input, 1, squared, 1); + + const_gpu(w*h, layer->kappa, norms, 1); + for(k = 0; k < layer->size/2; ++k){ + axpy_gpu(w*h, layer->alpha, squared + w*h*k, 1, norms, 1, net.streams[layer->stream_index]); + } + + for(k = 1; k < layer->c; ++k){ + copy_gpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1, net.streams[layer->stream_index]); + int prev = k - ((layer->size-1)/2) - 1; + int next = k + (layer->size/2); + if(prev >= 0) axpy_gpu(w*h, -layer->alpha, squared + w*h*prev, 1, norms + w*h*k, 1, net.streams[layer->stream_index]); + if(next < layer->c) axpy_gpu(w*h, layer->alpha, squared + w*h*next, 1, norms + w*h*k, 1, net.streams[layer->stream_index]); + } + } + pow_gpu(w*h*c*layer->batch, -layer->beta, layer->norms_gpu, 1, layer->output_gpu, 1); + mul_gpu(w*h*c*layer->batch, net.input_gpu, 1, layer->output_gpu, 1); +} + +void backward_normalization_layer_gpu(layer* layer, network net) +{ + // TODO This is approximate ;-) + + int w = layer->w; + int h = layer->h; + int c = layer->c; + pow_gpu(w*h*c*layer->batch, -layer->beta, layer->norms_gpu, 1, net.delta_gpu, 1); + mul_gpu(w*h*c*layer->batch, layer->delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/pipeline/darknet/src/normalization_layer.h b/workloads/realworld/pipeline/darknet/src/normalization_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..ebc6e718df55b937989345289c42823b85eb2904 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/normalization_layer.h @@ -0,0 +1,19 @@ +#ifndef NORMALIZATION_LAYER_H +#define NORMALIZATION_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +layer* make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa); +void resize_normalization_layer(layer *layer, int h, int w); +void forward_normalization_layer(layer* layer, network net); +void backward_normalization_layer(layer* layer, network net); +void visualize_normalization_layer(layer* layer, char *window); + +#ifdef GPU +void forward_normalization_layer_gpu(layer* layer, network net); +void backward_normalization_layer_gpu(layer* layer, network net); +#endif + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/option_list.c b/workloads/realworld/pipeline/darknet/src/option_list.c new file mode 100644 index 0000000000000000000000000000000000000000..2f52781f8096fecc5e9d1db3cfbfa10685506b93 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/option_list.c @@ -0,0 +1,140 @@ +#include +#include +#include +#include "option_list.h" +#include "utils.h" + +list *read_data_cfg(char *filename) +{ + FILE *file = fopen(filename, "r"); + if(file == 0) file_error(filename); + char *line; + int nu = 0; + list *options = make_list(); + while((line=fgetl(file)) != 0){ + ++ nu; + strip(line); + switch(line[0]){ + case '\0': + case '#': + case ';': + free(line); + break; + default: + if(!read_option(line, options)){ + fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); + free(line); + } + break; + } + } + fclose(file); + return options; +} + +metadata get_metadata(char *file) +{ + metadata m = {0}; + list *options = read_data_cfg(file); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", 0); + if(!name_list) { + fprintf(stderr, "No names or labels found\n"); + } else { + m.names = get_labels(name_list); + } + m.classes = option_find_int(options, "classes", 2); + free_list(options); + return m; +} + +int read_option(char *s, list *options) +{ + size_t i; + size_t len = strlen(s); + char *val = 0; + for(i = 0; i < len; ++i){ + if(s[i] == '='){ + s[i] = '\0'; + val = s+i+1; + break; + } + } + if(i == len-1) return 0; + char *key = s; + option_insert(options, key, val); + return 1; +} + +void option_insert(list *l, char *key, char *val) +{ + kvp *p = malloc(sizeof(kvp)); + p->key = key; + p->val = val; + p->used = 0; + list_insert(l, p); +} + +void option_unused(list *l) +{ + node *n = l->front; + while(n){ + kvp *p = (kvp *)n->val; + if(!p->used){ + fprintf(stderr, "Unused field: '%s = %s'\n", p->key, p->val); + } + n = n->next; + } +} + +char *option_find(list *l, char *key) +{ + node *n = l->front; + while(n){ + kvp *p = (kvp *)n->val; + if(strcmp(p->key, key) == 0){ + p->used = 1; + return p->val; + } + n = n->next; + } + return 0; +} +char *option_find_str(list *l, char *key, char *def) +{ + char *v = option_find(l, key); + if(v) return v; + if(def) fprintf(stderr, "%s: Using default '%s'\n", key, def); + return def; +} + +int option_find_int(list *l, char *key, int def) +{ + char *v = option_find(l, key); + if(v) return atoi(v); + fprintf(stderr, "%s: Using default '%d'\n", key, def); + return def; +} + +int option_find_int_quiet(list *l, char *key, int def) +{ + char *v = option_find(l, key); + if(v) return atoi(v); + return def; +} + +float option_find_float_quiet(list *l, char *key, float def) +{ + char *v = option_find(l, key); + if(v) return atof(v); + return def; +} + +float option_find_float(list *l, char *key, float def) +{ + char *v = option_find(l, key); + if(v) return atof(v); + fprintf(stderr, "%s: Using default '%lf'\n", key, def); + return def; +} diff --git a/workloads/realworld/pipeline/darknet/src/option_list.h b/workloads/realworld/pipeline/darknet/src/option_list.h new file mode 100644 index 0000000000000000000000000000000000000000..844bd8724b77889d9ab6e6e70f62305e3339048c --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/option_list.h @@ -0,0 +1,19 @@ +#ifndef OPTION_LIST_H +#define OPTION_LIST_H +#include "list.h" + +typedef struct{ + char *key; + char *val; + int used; +} kvp; + + +int read_option(char *s, list *options); +void option_insert(list *l, char *key, char *val); +char *option_find(list *l, char *key); +float option_find_float(list *l, char *key, float def); +float option_find_float_quiet(list *l, char *key, float def); +void option_unused(list *l); + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/parser.c b/workloads/realworld/pipeline/darknet/src/parser.c new file mode 100644 index 0000000000000000000000000000000000000000..942b046fb92b2a239e9252bd0e8565d825317379 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/parser.c @@ -0,0 +1,1347 @@ +#include +#include +#include +#include + +#include "activation_layer.h" +#include "logistic_layer.h" +#include "l2norm_layer.h" +#include "activations.h" +#include "avgpool_layer.h" +#include "batchnorm_layer.h" +#include "blas.h" +#include "connected_layer.h" +#include "deconvolutional_layer.h" +#include "convolutional_layer.h" +#include "cost_layer.h" +#include "crnn_layer.h" +#include "crop_layer.h" +#include "detection_layer.h" +#include "dropout_layer.h" +#include "gru_layer.h" +#include "list.h" +#include "local_layer.h" +#include "maxpool_layer.h" +#include "normalization_layer.h" +#include "option_list.h" +#include "parser.h" +#include "region_layer.h" +#include "yolo_layer.h" +#include "iseg_layer.h" +#include "reorg_layer.h" +#include "rnn_layer.h" +#include "route_layer.h" +#include "upsample_layer.h" +#include "shortcut_layer.h" +#include "softmax_layer.h" +#include "lstm_layer.h" +#include "utils.h" + +typedef struct{ + char *type; + list *options; +}section; + +list *read_cfg(char *filename); + +LAYER_TYPE string_to_layer_type(char * type) +{ + + if (strcmp(type, "[shortcut]")==0) return SHORTCUT; + if (strcmp(type, "[crop]")==0) return CROP; + if (strcmp(type, "[cost]")==0) return COST; + if (strcmp(type, "[detection]")==0) return DETECTION; + if (strcmp(type, "[region]")==0) return REGION; + if (strcmp(type, "[yolo]")==0) return YOLO; + if (strcmp(type, "[iseg]")==0) return ISEG; + if (strcmp(type, "[local]")==0) return LOCAL; + if (strcmp(type, "[conv]")==0 + || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL; + if (strcmp(type, "[deconv]")==0 + || strcmp(type, "[deconvolutional]")==0) return DECONVOLUTIONAL; + if (strcmp(type, "[activation]")==0) return ACTIVE; + if (strcmp(type, "[logistic]")==0) return LOGXENT; + if (strcmp(type, "[l2norm]")==0) return L2NORM; + if (strcmp(type, "[net]")==0 + || strcmp(type, "[network]")==0) return NETWORK; + if (strcmp(type, "[crnn]")==0) return CRNN; + if (strcmp(type, "[gru]")==0) return GRU; + if (strcmp(type, "[lstm]") == 0) return LSTM; + if (strcmp(type, "[rnn]")==0) return RNN; + if (strcmp(type, "[conn]")==0 + || strcmp(type, "[connected]")==0) return CONNECTED; + if (strcmp(type, "[max]")==0 + || strcmp(type, "[maxpool]")==0) return MAXPOOL; + if (strcmp(type, "[reorg]")==0) return REORG; + if (strcmp(type, "[avg]")==0 + || strcmp(type, "[avgpool]")==0) return AVGPOOL; + if (strcmp(type, "[dropout]")==0) return DROPOUT; + if (strcmp(type, "[lrn]")==0 + || strcmp(type, "[normalization]")==0) return NORMALIZATION; + if (strcmp(type, "[batchnorm]")==0) return BATCHNORM; + if (strcmp(type, "[soft]")==0 + || strcmp(type, "[softmax]")==0) return SOFTMAX; + if (strcmp(type, "[route]")==0) return ROUTE; + if (strcmp(type, "[upsample]")==0) return UPSAMPLE; + return BLANK; +} + +void free_section(section *s) +{ + free(s->type); + node *n = s->options->front; + while(n){ + kvp *pair = (kvp *)n->val; + free(pair->key); + free(pair); + node *next = n->next; + free(n); + n = next; + } + free(s->options); + free(s); +} + +void parse_data(char *data, float *a, int n) +{ + int i; + if(!data) return; + char *curr = data; + char *next = data; + int done = 0; + for(i = 0; i < n && !done; ++i){ + while(*++next !='\0' && *next != ','); + if(*next == '\0') done = 1; + *next = '\0'; + sscanf(curr, "%g", &a[i]); + curr = next+1; + } +} + +typedef struct size_params{ + int batch; + int inputs; + int h; + int w; + int c; + int index; + int time_steps; + network *net; +} size_params; + +local_layer* parse_local(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + int pad = option_find_int(options, "pad",0); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before local layer must output image."); + + local_layer *layer = make_local_layer(batch,h,w,c,n,size,stride,pad,activation); + + return layer; +} + +layer* parse_deconvolutional(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before deconvolutional layer must output image."); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + int pad = option_find_int_quiet(options, "pad",0); + int padding = option_find_int_quiet(options, "padding",0); + if(pad) padding = size/2; + + layer *l = make_deconvolutional_layer(batch,h,w,c,n,size,stride,padding, activation, batch_normalize, params.net->adam); + + return l; +} + + +convolutional_layer* parse_convolutional(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + int pad = option_find_int_quiet(options, "pad",0); + int padding = option_find_int_quiet(options, "padding",0); + int groups = option_find_int_quiet(options, "groups", 1); + if(pad) padding = size/2; + + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before convolutional layer must output image."); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + int binary = option_find_int_quiet(options, "binary", 0); + int xnor = option_find_int_quiet(options, "xnor", 0); + + convolutional_layer *layer = make_convolutional_layer(batch,h,w,c,n,groups,size,stride,padding,activation, batch_normalize, binary, xnor, params.net->adam); + layer->flipped = option_find_int_quiet(options, "flipped", 0); + layer->dot = option_find_float_quiet(options, "dot", 0); + + return layer; +} + +layer* parse_crnn(list *options, size_params params) +{ + int output_filters = option_find_int(options, "output_filters",1); + int hidden_filters = option_find_int(options, "hidden_filters",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer *l = make_crnn_layer(params.batch, params.w, params.h, params.c, hidden_filters, output_filters, params.time_steps, activation, batch_normalize); + + l->shortcut = option_find_int_quiet(options, "shortcut", 0); + + return l; +} + +layer* parse_rnn(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer *l = make_rnn_layer(params.batch, params.inputs, output, params.time_steps, activation, batch_normalize, params.net->adam); + + l->shortcut = option_find_int_quiet(options, "shortcut", 0); + + return l; +} + +layer* parse_gru(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer *l = make_gru_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net->adam); + l->tanh = option_find_int_quiet(options, "tanh", 0); + + return l; +} + +layer* parse_lstm(list *options, size_params params) +{ + int output = option_find_int(options, "output", 1); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer *l = make_lstm_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net->adam); + + return l; +} + +layer* parse_connected(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer *l = make_connected_layer(params.batch, params.inputs, output, activation, batch_normalize, params.net->adam); + return l; +} + +layer* parse_softmax(list *options, size_params params) +{ + int groups = option_find_int_quiet(options, "groups",1); + layer *l = make_softmax_layer(params.batch, params.inputs, groups); + l->temperature = option_find_float_quiet(options, "temperature", 1); + char *tree_file = option_find_str(options, "tree", 0); + if (tree_file) l->softmax_tree = read_tree(tree_file); + l->w = params.w; + l->h = params.h; + l->c = params.c; + l->spatial = option_find_float_quiet(options, "spatial", 0); + l->noloss = option_find_int_quiet(options, "noloss", 0); + return l; +} + +int *parse_yolo_mask(char *a, int *num) +{ + int *mask = 0; + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + mask = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + int val = atoi(a); + mask[i] = val; + a = strchr(a, ',')+1; + } + *num = n; + } + return mask; +} + +layer* parse_yolo(list *options, size_params params) +{ + int classes = option_find_int(options, "classes", 20); + int total = option_find_int(options, "num", 1); + int num = total; + + char *a = option_find_str(options, "mask", 0); + int *mask = parse_yolo_mask(a, &num); + layer *l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes); + assert(l->outputs == params.inputs); + + l->max_boxes = option_find_int_quiet(options, "max",90); + l->jitter = option_find_float(options, "jitter", .2); + + l->ignore_thresh = option_find_float(options, "ignore_thresh", .5); + l->truth_thresh = option_find_float(options, "truth_thresh", 1); + l->random = option_find_int_quiet(options, "random", 0); + + char *map_file = option_find_str(options, "map", 0); + if (map_file) l->map = read_map(map_file); + + a = option_find_str(options, "anchors", 0); + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + for(i = 0; i < n; ++i){ + float bias = atof(a); + l->biases[i] = bias; + a = strchr(a, ',')+1; + } + } + return l; +} + +layer* parse_iseg(list *options, size_params params) +{ + int classes = option_find_int(options, "classes", 20); + int ids = option_find_int(options, "ids", 32); + layer *l = make_iseg_layer(params.batch, params.w, params.h, classes, ids); + assert(l->outputs == params.inputs); + return l; +} + +layer* parse_region(list *options, size_params params) +{ + int coords = option_find_int(options, "coords", 4); + int classes = option_find_int(options, "classes", 20); + int num = option_find_int(options, "num", 1); + + layer *l = make_region_layer(params.batch, params.w, params.h, num, classes, coords); + assert(l->outputs == params.inputs); + + l->log = option_find_int_quiet(options, "log", 0); + l->sqrt = option_find_int_quiet(options, "sqrt", 0); + + l->softmax = option_find_int(options, "softmax", 0); + l->background = option_find_int_quiet(options, "background", 0); + l->max_boxes = option_find_int_quiet(options, "max",30); + l->jitter = option_find_float(options, "jitter", .2); + l->rescore = option_find_int_quiet(options, "rescore",0); + + l->thresh = option_find_float(options, "thresh", .5); + l->classfix = option_find_int_quiet(options, "classfix", 0); + l->absolute = option_find_int_quiet(options, "absolute", 0); + l->random = option_find_int_quiet(options, "random", 0); + + l->coord_scale = option_find_float(options, "coord_scale", 1); + l->object_scale = option_find_float(options, "object_scale", 1); + l->noobject_scale = option_find_float(options, "noobject_scale", 1); + l->mask_scale = option_find_float(options, "mask_scale", 1); + l->class_scale = option_find_float(options, "class_scale", 1); + l->bias_match = option_find_int_quiet(options, "bias_match",0); + + char *tree_file = option_find_str(options, "tree", 0); + if (tree_file) l->softmax_tree = read_tree(tree_file); + char *map_file = option_find_str(options, "map", 0); + if (map_file) l->map = read_map(map_file); + + char *a = option_find_str(options, "anchors", 0); + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + for(i = 0; i < n; ++i){ + float bias = atof(a); + l->biases[i] = bias; + a = strchr(a, ',')+1; + } + } + return l; +} + +detection_layer* parse_detection(list *options, size_params params) +{ + int coords = option_find_int(options, "coords", 1); + int classes = option_find_int(options, "classes", 1); + int rescore = option_find_int(options, "rescore", 0); + int num = option_find_int(options, "num", 1); + int side = option_find_int(options, "side", 7); + detection_layer *layer = make_detection_layer(params.batch, params.inputs, num, side, classes, coords, rescore); + + layer->softmax = option_find_int(options, "softmax", 0); + layer->sqrt = option_find_int(options, "sqrt", 0); + + layer->max_boxes = option_find_int_quiet(options, "max",90); + layer->coord_scale = option_find_float(options, "coord_scale", 1); + layer->forced = option_find_int(options, "forced", 0); + layer->object_scale = option_find_float(options, "object_scale", 1); + layer->noobject_scale = option_find_float(options, "noobject_scale", 1); + layer->class_scale = option_find_float(options, "class_scale", 1); + layer->jitter = option_find_float(options, "jitter", .2); + layer->random = option_find_int_quiet(options, "random", 0); + layer->reorg = option_find_int_quiet(options, "reorg", 0); + return layer; +} + +cost_layer* parse_cost(list *options, size_params params) +{ + char *type_s = option_find_str(options, "type", "sse"); + COST_TYPE type = get_cost_type(type_s); + float scale = option_find_float_quiet(options, "scale",1); + cost_layer *layer = make_cost_layer(params.batch, params.inputs, type, scale); + layer->ratio = option_find_float_quiet(options, "ratio",0); + layer->noobject_scale = option_find_float_quiet(options, "noobj", 1); + layer->thresh = option_find_float_quiet(options, "thresh",0); + return layer; +} + +crop_layer* parse_crop(list *options, size_params params) +{ + int crop_height = option_find_int(options, "crop_height",1); + int crop_width = option_find_int(options, "crop_width",1); + int flip = option_find_int(options, "flip",0); + float angle = option_find_float(options, "angle",0); + float saturation = option_find_float(options, "saturation",1); + float exposure = option_find_float(options, "exposure",1); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before crop layer must output image."); + + int noadjust = option_find_int_quiet(options, "noadjust",0); + + crop_layer *l = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip, angle, saturation, exposure); + l->shift = option_find_float(options, "shift", 0); + l->noadjust = noadjust; + return l; +} + +layer* parse_reorg(list *options, size_params params) +{ + int stride = option_find_int(options, "stride",1); + int reverse = option_find_int_quiet(options, "reverse",0); + int flatten = option_find_int_quiet(options, "flatten",0); + int extra = option_find_int_quiet(options, "extra",0); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before reorg layer must output image."); + + layer *layer = make_reorg_layer(batch,w,h,c,stride,reverse, flatten, extra); + return layer; +} + +maxpool_layer* parse_maxpool(list *options, size_params params) +{ + int stride = option_find_int(options, "stride",1); + int size = option_find_int(options, "size",stride); + int padding = option_find_int_quiet(options, "padding", size-1); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before maxpool layer must output image."); + + maxpool_layer *layer = make_maxpool_layer(batch,h,w,c,size,stride,padding); + return layer; +} + +avgpool_layer* parse_avgpool(list *options, size_params params) +{ + int batch,w,h,c; + w = params.w; + h = params.h; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before avgpool layer must output image."); + + avgpool_layer *layer = make_avgpool_layer(batch,w,h,c); + return layer; +} + +dropout_layer* parse_dropout(list *options, size_params params) +{ + float probability = option_find_float(options, "probability", .5); + dropout_layer *layer = make_dropout_layer(params.batch, params.inputs, probability); + layer->out_w = params.w; + layer->out_h = params.h; + layer->out_c = params.c; + return layer; +} + +layer* parse_normalization(list *options, size_params params) +{ + float alpha = option_find_float(options, "alpha", .0001); + float beta = option_find_float(options, "beta" , .75); + float kappa = option_find_float(options, "kappa", 1); + int size = option_find_int(options, "size", 5); + layer *l = make_normalization_layer(params.batch, params.w, params.h, params.c, size, alpha, beta, kappa); + return l; +} + +layer* parse_batchnorm(list *options, size_params params) +{ + layer *l = make_batchnorm_layer(params.batch, params.w, params.h, params.c); + return l; +} + +layer* parse_shortcut(list *options, size_params params, network *net) +{ + char *l = option_find(options, "from"); + int index = atoi(l); + if(index < 0) index = params.index + index; + + int batch = params.batch; + layer *from = net->layers[index]; + + layer *s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from->out_w, from->out_h, from->out_c); + + char *activation_s = option_find_str(options, "activation", "linear"); + ACTIVATION activation = get_activation(activation_s); + s->activation = activation; + s->alpha = option_find_float_quiet(options, "alpha", 1); + s->beta = option_find_float_quiet(options, "beta", 1); + return s; +} + + +layer* parse_l2norm(list *options, size_params params) +{ + layer *l = make_l2norm_layer(params.batch, params.inputs); + l->h = l->out_h = params.h; + l->w = l->out_w = params.w; + l->c = l->out_c = params.c; + return l; +} + + +layer* parse_logistic(list *options, size_params params) +{ + layer *l = make_logistic_layer(params.batch, params.inputs); + l->h = l->out_h = params.h; + l->w = l->out_w = params.w; + l->c = l->out_c = params.c; + return l; +} + +layer* parse_activation(list *options, size_params params) +{ + char *activation_s = option_find_str(options, "activation", "linear"); + ACTIVATION activation = get_activation(activation_s); + + layer *l = make_activation_layer(params.batch, params.inputs, activation); + + l->h = l->out_h = params.h; + l->w = l->out_w = params.w; + l->c = l->out_c = params.c; + + return l; +} + +layer* parse_upsample(list *options, size_params params, network *net) +{ + + int stride = option_find_int(options, "stride",2); + layer *l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride); + l->scale = option_find_float_quiet(options, "scale", 1); + return l; +} + +route_layer* parse_route(list *options, size_params params, network *net) +{ + char *l = option_find(options, "layers"); + int len = strlen(l); + if(!l) error("Route Layer must specify input layers"); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (l[i] == ',') ++n; + } + + int *layers = calloc(n, sizeof(int)); + int *sizes = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + int index = atoi(l); + l = strchr(l, ',')+1; + if(index < 0) index = params.index + index; + layers[i] = index; + sizes[i] = net->layers[index]->outputs; + } + int batch = params.batch; + + route_layer *layer = make_route_layer(batch, n, layers, sizes); + + convolutional_layer *first = net->layers[layers[0]]; + layer->out_w = first->out_w; + layer->out_h = first->out_h; + layer->out_c = first->out_c; + for(i = 1; i < n; ++i){ + int index = layers[i]; + convolutional_layer *next = net->layers[index]; + if(next->out_w == first->out_w && next->out_h == first->out_h){ + layer->out_c += next->out_c; + }else{ + layer->out_h = layer->out_w = layer->out_c = 0; + } + } + + return layer; +} + +learning_rate_policy get_policy(char *s) +{ + if (strcmp(s, "random")==0) return RANDOM; + if (strcmp(s, "poly")==0) return POLY; + if (strcmp(s, "constant")==0) return CONSTANT; + if (strcmp(s, "step")==0) return STEP; + if (strcmp(s, "exp")==0) return EXP; + if (strcmp(s, "sigmoid")==0) return SIG; + if (strcmp(s, "steps")==0) return STEPS; + fprintf(stderr, "Couldn't find policy %s, going with constant\n", s); + return CONSTANT; +} + +void parse_net_options(list *options, network *net) +{ + net->batch = option_find_int(options, "batch",1); + net->learning_rate = option_find_float(options, "learning_rate", .001); + net->momentum = option_find_float(options, "momentum", .9); + net->decay = option_find_float(options, "decay", .0001); + int subdivs = option_find_int(options, "subdivisions",1); + net->time_steps = option_find_int_quiet(options, "time_steps",1); + net->notruth = option_find_int_quiet(options, "notruth",0); + net->batch /= subdivs; + net->batch *= net->time_steps; + net->subdivisions = subdivs; + net->random = option_find_int_quiet(options, "random", 0); + + net->adam = option_find_int_quiet(options, "adam", 0); + if(net->adam){ + net->B1 = option_find_float(options, "B1", .9); + net->B2 = option_find_float(options, "B2", .999); + net->eps = option_find_float(options, "eps", .0000001); + } + + net->h = option_find_int_quiet(options, "height",0); + net->w = option_find_int_quiet(options, "width",0); + net->c = option_find_int_quiet(options, "channels",0); + net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c); + net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2); + net->min_crop = option_find_int_quiet(options, "min_crop",net->w); + net->max_ratio = option_find_float_quiet(options, "max_ratio", (float) net->max_crop / net->w); + net->min_ratio = option_find_float_quiet(options, "min_ratio", (float) net->min_crop / net->w); + net->center = option_find_int_quiet(options, "center",0); + net->clip = option_find_float_quiet(options, "clip", 0); + + net->angle = option_find_float_quiet(options, "angle", 0); + net->aspect = option_find_float_quiet(options, "aspect", 1); + net->saturation = option_find_float_quiet(options, "saturation", 1); + net->exposure = option_find_float_quiet(options, "exposure", 1); + net->hue = option_find_float_quiet(options, "hue", 0); + + if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied"); + + char *policy_s = option_find_str(options, "policy", "constant"); + net->policy = get_policy(policy_s); + net->burn_in = option_find_int_quiet(options, "burn_in", 0); + net->power = option_find_float_quiet(options, "power", 4); + if(net->policy == STEP){ + net->step = option_find_int(options, "step", 1); + net->scale = option_find_float(options, "scale", 1); + } else if (net->policy == STEPS){ + char *l = option_find(options, "steps"); + char *p = option_find(options, "scales"); + if(!l || !p) error("STEPS policy must have steps and scales in cfg file"); + + int len = strlen(l); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (l[i] == ',') ++n; + } + int *steps = calloc(n, sizeof(int)); + float *scales = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + int step = atoi(l); + float scale = atof(p); + l = strchr(l, ',')+1; + p = strchr(p, ',')+1; + steps[i] = step; + scales[i] = scale; + } + net->scales = scales; + net->steps = steps; + net->num_steps = n; + } else if (net->policy == EXP){ + net->gamma = option_find_float(options, "gamma", 1); + } else if (net->policy == SIG){ + net->gamma = option_find_float(options, "gamma", 1); + net->step = option_find_int(options, "step", 1); + } else if (net->policy == POLY || net->policy == RANDOM){ + } + net->max_batches = option_find_int(options, "max_batches", 0); +} + +int is_network(section *s) +{ + return (strcmp(s->type, "[net]")==0 + || strcmp(s->type, "[network]")==0); +} + +network *parse_network_cfg(char *filename) +{ + list *sections = read_cfg(filename); + node *n = sections->front; + if(!n) error("Config file has no sections"); + network *net = make_network(sections->size - 1); + net->gpu_index = gpu_index; + size_params params; + + section *s = (section *)n->val; + list *options = s->options; + if(!is_network(s)) error("First section must be [net] or [network]"); + parse_net_options(options, net); + + params.h = net->h; + params.w = net->w; + params.c = net->c; + params.inputs = net->inputs; + params.batch = net->batch; + params.time_steps = net->time_steps; + params.net = net; + + size_t workspace_size = 0; + n = n->next; + int count = 0; + free_section(s); + fprintf(stderr, "layer filters size input output\n"); + while(n){ + params.index = count; + fprintf(stderr, "%5d ", count); + s = (section *)n->val; + options = s->options; + layer *l = malloc(sizeof(layer)); + LAYER_TYPE lt = string_to_layer_type(s->type); + if(lt == CONVOLUTIONAL){ + l = parse_convolutional(options, params); + }else if(lt == DECONVOLUTIONAL){ + l = parse_deconvolutional(options, params); + }else if(lt == LOCAL){ + l = parse_local(options, params); + }else if(lt == ACTIVE){ + l = parse_activation(options, params); + }else if(lt == LOGXENT){ + l = parse_logistic(options, params); + }else if(lt == L2NORM){ + l = parse_l2norm(options, params); + }else if(lt == RNN){ + l = parse_rnn(options, params); + }else if(lt == GRU){ + l = parse_gru(options, params); + }else if (lt == LSTM) { + l = parse_lstm(options, params); + }else if(lt == CRNN){ + l = parse_crnn(options, params); + }else if(lt == CONNECTED){ + l = parse_connected(options, params); + }else if(lt == CROP){ + l = parse_crop(options, params); + }else if(lt == COST){ + l = parse_cost(options, params); + }else if(lt == REGION){ + l = parse_region(options, params); + }else if(lt == YOLO){ + l = parse_yolo(options, params); + }else if(lt == ISEG){ + l = parse_iseg(options, params); + }else if(lt == DETECTION){ + l = parse_detection(options, params); + }else if(lt == SOFTMAX){ + l = parse_softmax(options, params); + net->hierarchy = l->softmax_tree; + }else if(lt == NORMALIZATION){ + l = parse_normalization(options, params); + }else if(lt == BATCHNORM){ + l = parse_batchnorm(options, params); + }else if(lt == MAXPOOL){ + l = parse_maxpool(options, params); + }else if(lt == REORG){ + l = parse_reorg(options, params); + }else if(lt == AVGPOOL){ + l = parse_avgpool(options, params); + }else if(lt == ROUTE){ + l = parse_route(options, params, net); + }else if(lt == UPSAMPLE){ + l = parse_upsample(options, params, net); + }else if(lt == SHORTCUT){ + l = parse_shortcut(options, params, net); + }else if(lt == DROPOUT){ + l = parse_dropout(options, params); + l->output = net->layers[count-1]->output; + l->delta = net->layers[count-1]->delta; +#ifdef GPU + l->output_gpu = net->layers[count-1]->output_gpu; + l->delta_gpu = net->layers[count-1]->delta_gpu; +#endif + }else{ + fprintf(stderr, "Type not recognized: %s\n", s->type); + } + // Ruihao + if (lt == LOGXENT + || lt == LSTM + || lt == ACTIVE + || lt == DETECTION + || lt == L2NORM + || lt == GRU + || lt == YOLO + || lt == CONVOLUTIONAL + || lt == SHORTCUT + || lt == AVGPOOL + || lt == MAXPOOL + || lt == COST + || lt == REGION + || lt == DECONVOLUTIONAL + || lt == LOCAL + || lt == CONNECTED + || lt == SOFTMAX + || lt == UPSAMPLE + || lt == ROUTE + || lt == ISEG) { + l->has_delta = 1; + } else { + l->has_delta = 0; + } + // Ruihao + + l->clip = net->clip; + l->truth = option_find_int_quiet(options, "truth", 0); + l->onlyforward = option_find_int_quiet(options, "onlyforward", 0); + l->stopbackward = option_find_int_quiet(options, "stopbackward", 0); + l->dontsave = option_find_int_quiet(options, "dontsave", 0); + l->dontload = option_find_int_quiet(options, "dontload", 0); + l->numload = option_find_int_quiet(options, "numload", 0); + l->dontloadscales = option_find_int_quiet(options, "dontloadscales", 0); + l->learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1); + l->smooth = option_find_float_quiet(options, "smooth", 0); + option_unused(options); + net->layers[count] = l; + if (l->workspace_size > workspace_size) workspace_size = l->workspace_size; + free_section(s); + n = n->next; + ++count; + if(n){ + params.h = l->out_h; + params.w = l->out_w; + params.c = l->out_c; + params.inputs = l->outputs; + } + } + // Ruihao + // fprintf(stderr, "parser finished \n"); + // Ruihao + free_list(sections); + layer *out = get_network_output_layer(net); + net->outputs = out->outputs; + net->truths = out->outputs; + if(net->layers[net->n-1]->truths) net->truths = net->layers[net->n-1]->truths; + net->output = out->output; + net->input = calloc(net->inputs*net->batch, sizeof(float)); + net->truth = calloc(net->truths*net->batch, sizeof(float)); +#ifdef GPU + net->output_gpu = out->output_gpu; + net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch); + net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch); +#endif + if(workspace_size){ + //printf("%ld\n", workspace_size); +#ifdef GPU + if(gpu_index >= 0){ + net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1); + }else { + net->workspace = calloc(1, workspace_size); + } +#else + net->workspace = calloc(1, workspace_size); +#endif + } + return net; +} + +list *read_cfg(char *filename) +{ + FILE *file = fopen(filename, "r"); + if(file == 0) file_error(filename); + char *line; + int nu = 0; + list *options = make_list(); + section *current = 0; + while((line=fgetl(file)) != 0){ + ++ nu; + strip(line); + switch(line[0]){ + case '[': + current = malloc(sizeof(section)); + list_insert(options, current); + current->options = make_list(); + current->type = line; + break; + case '\0': + case '#': + case ';': + free(line); + break; + default: + if(!read_option(line, current->options)){ + fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); + free(line); + } + break; + } + } + fclose(file); + return options; +} + +void save_convolutional_weights_binary(layer *l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_convolutional_layer(l); + } +#endif + binarize_weights(l->weights, l->n, l->c*l->size*l->size, l->binary_weights); + int size = l->c*l->size*l->size; + int i, j, k; + fwrite(l->biases, sizeof(float), l->n, fp); + if (l->batch_normalize){ + fwrite(l->scales, sizeof(float), l->n, fp); + fwrite(l->rolling_mean, sizeof(float), l->n, fp); + fwrite(l->rolling_variance, sizeof(float), l->n, fp); + } + for(i = 0; i < l->n; ++i){ + float mean = l->binary_weights[i*size]; + if(mean < 0) mean = -mean; + fwrite(&mean, sizeof(float), 1, fp); + for(j = 0; j < size/8; ++j){ + int index = i*size + j*8; + unsigned char c = 0; + for(k = 0; k < 8; ++k){ + if (j*8 + k >= size) break; + if (l->binary_weights[index + k] > 0) c = (c | 1<binary){ + //save_convolutional_weights_binary(l, fp); + //return; + } +#ifdef GPU + if(gpu_index >= 0){ + pull_convolutional_layer(l); + } +#endif + int num = l->nweights; + fwrite(l->biases, sizeof(float), l->n, fp); + if (l->batch_normalize){ + fwrite(l->scales, sizeof(float), l->n, fp); + fwrite(l->rolling_mean, sizeof(float), l->n, fp); + fwrite(l->rolling_variance, sizeof(float), l->n, fp); + } + fwrite(l->weights, sizeof(float), num, fp); +} + +void save_batchnorm_weights(layer *l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_batchnorm_layer(l); + } +#endif + fwrite(l->scales, sizeof(float), l->c, fp); + fwrite(l->rolling_mean, sizeof(float), l->c, fp); + fwrite(l->rolling_variance, sizeof(float), l->c, fp); +} + +void save_connected_weights(layer *l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_connected_layer(l); + } +#endif + fwrite(l->biases, sizeof(float), l->outputs, fp); + fwrite(l->weights, sizeof(float), l->outputs*l->inputs, fp); + if (l->batch_normalize){ + fwrite(l->scales, sizeof(float), l->outputs, fp); + fwrite(l->rolling_mean, sizeof(float), l->outputs, fp); + fwrite(l->rolling_variance, sizeof(float), l->outputs, fp); + } +} + +void save_weights_upto(network *net, char *filename, int cutoff) +{ +#ifdef GPU + if(net->gpu_index >= 0){ + cuda_set_device(net->gpu_index); + } +#endif + fprintf(stderr, "Saving weights to %s\n", filename); + FILE *fp = fopen(filename, "wb"); + if(!fp) file_error(filename); + + int major = 0; + int minor = 2; + int revision = 0; + fwrite(&major, sizeof(int), 1, fp); + fwrite(&minor, sizeof(int), 1, fp); + fwrite(&revision, sizeof(int), 1, fp); + fwrite(net->seen, sizeof(size_t), 1, fp); + + int i; + for(i = 0; i < net->n && i < cutoff; ++i){ + layer *l = net->layers[i]; + if (l->dontsave) continue; + if(l->type == CONVOLUTIONAL || l->type == DECONVOLUTIONAL){ + save_convolutional_weights(l, fp); + } if(l->type == CONNECTED){ + save_connected_weights(l, fp); + } if(l->type == BATCHNORM){ + save_batchnorm_weights(l, fp); + } if(l->type == RNN){ + save_connected_weights((l->input_layer), fp); + save_connected_weights((l->self_layer), fp); + save_connected_weights((l->output_layer), fp); + } if (l->type == LSTM) { + save_connected_weights((l->wi), fp); + save_connected_weights((l->wf), fp); + save_connected_weights((l->wo), fp); + save_connected_weights((l->wg), fp); + save_connected_weights((l->ui), fp); + save_connected_weights((l->uf), fp); + save_connected_weights((l->uo), fp); + save_connected_weights((l->ug), fp); + } if (l->type == GRU) { + if(1){ + save_connected_weights((l->wz), fp); + save_connected_weights((l->wr), fp); + save_connected_weights((l->wh), fp); + save_connected_weights((l->uz), fp); + save_connected_weights((l->ur), fp); + save_connected_weights((l->uh), fp); + }else{ + save_connected_weights((l->reset_layer), fp); + save_connected_weights((l->update_layer), fp); + save_connected_weights((l->state_layer), fp); + } + } if(l->type == CRNN){ + save_convolutional_weights((l->input_layer), fp); + save_convolutional_weights((l->self_layer), fp); + save_convolutional_weights((l->output_layer), fp); + } if(l->type == LOCAL){ +#ifdef GPU + if(gpu_index >= 0){ + pull_local_layer(l); + } +#endif + int locations = l->out_w*l->out_h; + int size = l->size*l->size*l->c*l->n*locations; + fwrite(l->biases, sizeof(float), l->outputs, fp); + fwrite(l->weights, sizeof(float), size, fp); + } + } + fclose(fp); +} +void save_weights(network *net, char *filename) +{ + save_weights_upto(net, filename, net->n); +} + +void transpose_matrix(float *a, int rows, int cols) +{ + float *transpose = calloc(rows*cols, sizeof(float)); + int x, y; + for(x = 0; x < rows; ++x){ + for(y = 0; y < cols; ++y){ + transpose[y*rows + x] = a[x*cols + y]; + } + } + memcpy(a, transpose, rows*cols*sizeof(float)); + free(transpose); +} + +void load_connected_weights(layer *l, FILE *fp, int transpose) +{ + fread(l->biases, sizeof(float), l->outputs, fp); + fread(l->weights, sizeof(float), l->outputs*l->inputs, fp); + if(transpose){ + transpose_matrix(l->weights, l->inputs, l->outputs); + } + //printf("Biases: %f mean %f variance\n", mean_array(l->biases, l->outputs), variance_array(l->biases, l->outputs)); + //printf("Weights: %f mean %f variance\n", mean_array(l->weights, l->outputs*l->inputs), variance_array(l->weights, l->outputs*l->inputs)); + if (l->batch_normalize && (!l->dontloadscales)){ + fread(l->scales, sizeof(float), l->outputs, fp); + fread(l->rolling_mean, sizeof(float), l->outputs, fp); + fread(l->rolling_variance, sizeof(float), l->outputs, fp); + //printf("Scales: %f mean %f variance\n", mean_array(l->scales, l->outputs), variance_array(l->scales, l->outputs)); + //printf("rolling_mean: %f mean %f variance\n", mean_array(l->rolling_mean, l->outputs), variance_array(l->rolling_mean, l->outputs)); + //printf("rolling_variance: %f mean %f variance\n", mean_array(l->rolling_variance, l->outputs), variance_array(l->rolling_variance, l->outputs)); + } +#ifdef GPU + if(gpu_index >= 0){ + push_connected_layer(l); + } +#endif +} + +void load_batchnorm_weights(layer *l, FILE *fp) +{ + fread(l->scales, sizeof(float), l->c, fp); + fread(l->rolling_mean, sizeof(float), l->c, fp); + fread(l->rolling_variance, sizeof(float), l->c, fp); +#ifdef GPU + if(gpu_index >= 0){ + push_batchnorm_layer(l); + } +#endif +} + +void load_convolutional_weights_binary(layer *l, FILE *fp) +{ + fread(l->biases, sizeof(float), l->n, fp); + if (l->batch_normalize && (!l->dontloadscales)){ + fread(l->scales, sizeof(float), l->n, fp); + fread(l->rolling_mean, sizeof(float), l->n, fp); + fread(l->rolling_variance, sizeof(float), l->n, fp); + } + int size = l->c*l->size*l->size; + int i, j, k; + for(i = 0; i < l->n; ++i){ + float mean = 0; + fread(&mean, sizeof(float), 1, fp); + for(j = 0; j < size/8; ++j){ + int index = i*size + j*8; + unsigned char c = 0; + fread(&c, sizeof(char), 1, fp); + for(k = 0; k < 8; ++k){ + if (j*8 + k >= size) break; + l->weights[index + k] = (c & 1<= 0){ + push_convolutional_layer(l); + } +#endif +} + +void load_convolutional_weights(layer *l, FILE *fp) +{ + if(l->binary){ + //load_convolutional_weights_binary(l, fp); + //return; + } + if(l->numload) l->n = l->numload; + int num = l->c/l->groups*l->n*l->size*l->size; + fread(l->biases, sizeof(float), l->n, fp); + if (l->batch_normalize && (!l->dontloadscales)){ + fread(l->scales, sizeof(float), l->n, fp); + fread(l->rolling_mean, sizeof(float), l->n, fp); + fread(l->rolling_variance, sizeof(float), l->n, fp); + if(0){ + int i; + for(i = 0; i < l->n; ++i){ + printf("%g, ", l->rolling_mean[i]); + } + printf("\n"); + for(i = 0; i < l->n; ++i){ + printf("%g, ", l->rolling_variance[i]); + } + printf("\n"); + } + if(0){ + fill_cpu(l->n, 0, l->rolling_mean, 1); + fill_cpu(l->n, 0, l->rolling_variance, 1); + } + if(0){ + int i; + for(i = 0; i < l->n; ++i){ + printf("%g, ", l->rolling_mean[i]); + } + printf("\n"); + for(i = 0; i < l->n; ++i){ + printf("%g, ", l->rolling_variance[i]); + } + printf("\n"); + } + } + fread(l->weights, sizeof(float), num, fp); + //if(l->c == 3) scal_cpu(num, 1./256, l->weights, 1); + if (l->flipped) { + transpose_matrix(l->weights, l->c*l->size*l->size, l->n); + } + //if (l->binary) binarize_weights(l->weights, l->n, l->c*l->size*l->size, l->weights); +#ifdef GPU + if(gpu_index >= 0){ + push_convolutional_layer(l); + } +#endif +} + + +void load_weights_upto(network *net, char *filename, int start, int cutoff) +{ +#ifdef GPU + if(net->gpu_index >= 0){ + cuda_set_device(net->gpu_index); + } +#endif + fprintf(stderr, "Loading weights from %s...", filename); + fflush(stdout); + FILE *fp = fopen(filename, "rb"); + if(!fp) file_error(filename); + + int major; + int minor; + int revision; + fread(&major, sizeof(int), 1, fp); + fread(&minor, sizeof(int), 1, fp); + fread(&revision, sizeof(int), 1, fp); + if ((major*10 + minor) >= 2 && major < 1000 && minor < 1000){ + fread(net->seen, sizeof(size_t), 1, fp); + } else { + int iseen = 0; + fread(&iseen, sizeof(int), 1, fp); + *net->seen = iseen; + } + int transpose = (major > 1000) || (minor > 1000); + + int i; + for(i = start; i < net->n && i < cutoff; ++i){ + layer *l = net->layers[i]; + if (l->dontload) { + continue; + } + if(l->type == CONVOLUTIONAL || l->type == DECONVOLUTIONAL){ + // Ruihao + // fprintf(stderr, "layer %d is Conv Layer\n", i); + // Ruihao + load_convolutional_weights(l, fp); + } + if(l->type == CONNECTED){ + load_connected_weights(l, fp, transpose); + } + if(l->type == BATCHNORM){ + load_batchnorm_weights(l, fp); + } + if(l->type == CRNN){ + load_convolutional_weights((l->input_layer), fp); + load_convolutional_weights((l->self_layer), fp); + load_convolutional_weights((l->output_layer), fp); + } + if(l->type == RNN){ + load_connected_weights((l->input_layer), fp, transpose); + load_connected_weights((l->self_layer), fp, transpose); + load_connected_weights((l->output_layer), fp, transpose); + } + if (l->type == LSTM) { + load_connected_weights((l->wi), fp, transpose); + load_connected_weights((l->wf), fp, transpose); + load_connected_weights((l->wo), fp, transpose); + load_connected_weights((l->wg), fp, transpose); + load_connected_weights((l->ui), fp, transpose); + load_connected_weights((l->uf), fp, transpose); + load_connected_weights((l->uo), fp, transpose); + load_connected_weights((l->ug), fp, transpose); + } + if (l->type == GRU) { + if(1){ + load_connected_weights((l->wz), fp, transpose); + load_connected_weights((l->wr), fp, transpose); + load_connected_weights((l->wh), fp, transpose); + load_connected_weights((l->uz), fp, transpose); + load_connected_weights((l->ur), fp, transpose); + load_connected_weights((l->uh), fp, transpose); + }else{ + load_connected_weights((l->reset_layer), fp, transpose); + load_connected_weights((l->update_layer), fp, transpose); + load_connected_weights((l->state_layer), fp, transpose); + } + } + if(l->type == LOCAL){ + int locations = l->out_w*l->out_h; + int size = l->size*l->size*l->c*l->n*locations; + fread(l->biases, sizeof(float), l->outputs, fp); + fread(l->weights, sizeof(float), size, fp); +#ifdef GPU + if(gpu_index >= 0){ + push_local_layer(l); + } +#endif + } + } + fprintf(stderr, "Done!\n"); + fclose(fp); +} + +void load_weights(network *net, char *filename) +{ + load_weights_upto(net, filename, 0, net->n); +} + diff --git a/workloads/realworld/pipeline/darknet/src/parser.h b/workloads/realworld/pipeline/darknet/src/parser.h new file mode 100644 index 0000000000000000000000000000000000000000..81aef2c86f3e6cb362f8bde9695ce9d5699ca77f --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/parser.h @@ -0,0 +1,9 @@ +#ifndef PARSER_H +#define PARSER_H +#include "darknet.h" +#include "network.h" + +void save_network(network net, char *filename); +void save_weights_double(network net, char *filename); + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/region_layer.c b/workloads/realworld/pipeline/darknet/src/region_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..7bd4137bc6472bc123fc0729548c99e85fc57c49 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/region_layer.c @@ -0,0 +1,513 @@ +#include "region_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer* make_region_layer(int batch, int w, int h, int n, int classes, int coords) +{ + layer* l = calloc(1, sizeof(layer)); + l->type = REGION; + + l->n = n; + l->batch = batch; + l->h = h; + l->w = w; + l->c = n*(classes + coords + 1); + l->out_w = l->w; + l->out_h = l->h; + l->out_c = l->c; + l->classes = classes; + l->coords = coords; + l->cost = calloc(1, sizeof(float)); + l->biases = calloc(n*2, sizeof(float)); + l->bias_updates = calloc(n*2, sizeof(float)); + l->outputs = h*w*n*(classes + coords + 1); + l->inputs = l->outputs; + l->truths = 30*(l->coords + 1); + l->delta = calloc(batch*l->outputs, sizeof(float)); + l->output = calloc(batch*l->outputs, sizeof(float)); + int i; + for(i = 0; i < n*2; ++i){ + l->biases[i] = .5; + } + + l->forward = forward_region_layer; + l->backward = backward_region_layer; +#ifdef GPU + l->forward_gpu = forward_region_layer_gpu; + l->backward_gpu = backward_region_layer_gpu; + // l->output_gpu = cuda_make_array(l->output, batch*l->outputs); + // l->delta_gpu = cuda_make_array(l->delta, batch*l->outputs); +#endif + + fprintf(stderr, "detection\n"); + srand(0); + + return l; +} + +void resize_region_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->n*(l->classes + l->coords + 1); + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride) +{ + box b; + b.x = (i + x[index + 0*stride]) / w; + b.y = (j + x[index + 1*stride]) / h; + b.w = exp(x[index + 2*stride]) * biases[2*n] / w; + b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h; + return b; +} + +float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale, int stride) +{ + box pred = get_region_box(x, biases, n, index, i, j, w, h, stride); + float iou = box_iou(pred, truth); + + float tx = (truth.x*w - i); + float ty = (truth.y*h - j); + float tw = log(truth.w*w / biases[2*n]); + float th = log(truth.h*h / biases[2*n + 1]); + + delta[index + 0*stride] = scale * (tx - x[index + 0*stride]); + delta[index + 1*stride] = scale * (ty - x[index + 1*stride]); + delta[index + 2*stride] = scale * (tw - x[index + 2*stride]); + delta[index + 3*stride] = scale * (th - x[index + 3*stride]); + return iou; +} + +void delta_region_mask(float *truth, float *x, int n, int index, float *delta, int stride, int scale) +{ + int i; + for(i = 0; i < n; ++i){ + delta[index + i*stride] = scale*(truth[i] - x[index + i*stride]); + } +} + + +void delta_region_class(float *output, float *delta, int index, int class_, int classes, tree *hier, float scale, int stride, float *avg_cat, int tag) +{ + int i, n; + if(hier){ + float pred = 1; + while(class_ >= 0){ + pred *= output[index + stride*class_]; + int g = hier->group[class_]; + int offset = hier->group_offset[g]; + for(i = 0; i < hier->group_size[g]; ++i){ + delta[index + stride*(offset + i)] = scale * (0 - output[index + stride*(offset + i)]); + } + delta[index + stride*class_] = scale * (1 - output[index + stride*class_]); + + class_ = hier->parent[class_]; + } + *avg_cat += pred; + } else { + if (delta[index] && tag){ + delta[index + stride*class_] = scale * (1 - output[index + stride*class_]); + return; + } + for(n = 0; n < classes; ++n){ + delta[index + stride*n] = scale * (((n == class_)?1 : 0) - output[index + stride*n]); + if(n == class_) *avg_cat += output[index + stride*n]; + } + } +} + +float logit(float x) +{ + return log(x/(1.-x)); +} + +float tisnan(float x) +{ + return (x != x); +} + +int entry_index(layer* l, int batch, int location, int entry) +{ + int n = location / (l->w*l->h); + int loc = location % (l->w*l->h); + return batch*l->outputs + n*l->w*l->h*(l->coords+l->classes+1) + entry*l->w*l->h + loc; +} + +void forward_region_layer(layer* l, network net) +{ + int i,j,b,t,n; + memcpy(l->output, net.input, l->outputs*l->batch*sizeof(float)); + +#ifndef GPU + for (b = 0; b < l->batch; ++b){ + for(n = 0; n < l->n; ++n){ + int index = entry_index(l, b, n*l->w*l->h, 0); + activate_array(l->output + index, 2*l->w*l->h, LOGISTIC); + index = entry_index(l, b, n*l->w*l->h, l->coords); + if(!l->background) activate_array(l->output + index, l->w*l->h, LOGISTIC); + index = entry_index(l, b, n*l->w*l->h, l->coords + 1); + if(!l->softmax && !l->softmax_tree) activate_array(l->output + index, l->classes*l->w*l->h, LOGISTIC); + } + } + if (l->softmax_tree){ + int i; + int count = l->coords + 1; + for (i = 0; i < l->softmax_tree->groups; ++i) { + int group_size = l->softmax_tree->group_size[i]; + softmax_cpu(net.input + count, group_size, l->batch, l->inputs, l->n*l->w*l->h, 1, l->n*l->w*l->h, l->temperature, l->output + count); + count += group_size; + } + } else if (l->softmax){ + int index = entry_index(l, 0, 0, l->coords + !l->background); + softmax_cpu(net.input + index, l->classes + l->background, l->batch*l->n, l->inputs/l->n, l->w*l->h, 1, l->w*l->h, 1, l->output + index); + } +#endif + + memset(l->delta, 0, l->outputs * l->batch * sizeof(float)); + if(!net.train) return; + float avg_iou = 0; + float recall = 0; + float avg_cat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + int class_count = 0; + *(l->cost) = 0; + for (b = 0; b < l->batch; ++b) { + if(l->softmax_tree){ + int onlyclass = 0; + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l->coords + 1) + b*l->truths, 1); + if(!truth.x) break; + int class_ = net.truth[t*(l->coords + 1) + b*l->truths + l->coords]; + float maxp = 0; + int maxi = 0; + if(truth.x > 100000 && truth.y > 100000){ + for(n = 0; n < l->n*l->w*l->h; ++n){ + int class_index = entry_index(l, b, n, l->coords + 1); + int obj_index = entry_index(l, b, n, l->coords); + float scale = l->output[obj_index]; + l->delta[obj_index] = l->noobject_scale * (0 - l->output[obj_index]); + float p = scale*get_hierarchy_probability(l->output + class_index, l->softmax_tree, class_, l->w*l->h); + if(p > maxp){ + maxp = p; + maxi = n; + } + } + int class_index = entry_index(l, b, maxi, l->coords + 1); + int obj_index = entry_index(l, b, maxi, l->coords); + delta_region_class(l->output, l->delta, class_index, class_, l->classes, l->softmax_tree, l->class_scale, l->w*l->h, &avg_cat, !l->softmax); + if(l->output[obj_index] < .3) l->delta[obj_index] = l->object_scale * (.3 - l->output[obj_index]); + else l->delta[obj_index] = 0; + l->delta[obj_index] = 0; + ++class_count; + onlyclass = 1; + break; + } + } + if(onlyclass) continue; + } + for (j = 0; j < l->h; ++j) { + for (i = 0; i < l->w; ++i) { + for (n = 0; n < l->n; ++n) { + int box_index = entry_index(l, b, n*l->w*l->h + j*l->w + i, 0); + box pred = get_region_box(l->output, l->biases, n, box_index, i, j, l->w, l->h, l->w*l->h); + float best_iou = 0; + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l->coords + 1) + b*l->truths, 1); + if(!truth.x) break; + float iou = box_iou(pred, truth); + if (iou > best_iou) { + best_iou = iou; + } + } + int obj_index = entry_index(l, b, n*l->w*l->h + j*l->w + i, l->coords); + avg_anyobj += l->output[obj_index]; + l->delta[obj_index] = l->noobject_scale * (0 - l->output[obj_index]); + if(l->background) l->delta[obj_index] = l->noobject_scale * (1 - l->output[obj_index]); + if (best_iou > l->thresh) { + l->delta[obj_index] = 0; + } + + if(*(net.seen) < 12800){ + box truth = {0}; + truth.x = (i + .5)/l->w; + truth.y = (j + .5)/l->h; + truth.w = l->biases[2*n]/l->w; + truth.h = l->biases[2*n+1]/l->h; + delta_region_box(truth, l->output, l->biases, n, box_index, i, j, l->w, l->h, l->delta, .01, l->w*l->h); + } + } + } + } + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l->coords + 1) + b*l->truths, 1); + + if(!truth.x) break; + float best_iou = 0; + int best_n = 0; + i = (truth.x * l->w); + j = (truth.y * l->h); + box truth_shift = truth; + truth_shift.x = 0; + truth_shift.y = 0; + for(n = 0; n < l->n; ++n){ + int box_index = entry_index(l, b, n*l->w*l->h + j*l->w + i, 0); + box pred = get_region_box(l->output, l->biases, n, box_index, i, j, l->w, l->h, l->w*l->h); + if(l->bias_match){ + pred.w = l->biases[2*n]/l->w; + pred.h = l->biases[2*n+1]/l->h; + } + pred.x = 0; + pred.y = 0; + float iou = box_iou(pred, truth_shift); + if (iou > best_iou){ + best_iou = iou; + best_n = n; + } + } + + int box_index = entry_index(l, b, best_n*l->w*l->h + j*l->w + i, 0); + float iou = delta_region_box(truth, l->output, l->biases, best_n, box_index, i, j, l->w, l->h, l->delta, l->coord_scale * (2 - truth.w*truth.h), l->w*l->h); + if(l->coords > 4){ + int mask_index = entry_index(l, b, best_n*l->w*l->h + j*l->w + i, 4); + delta_region_mask(net.truth + t*(l->coords + 1) + b*l->truths + 5, l->output, l->coords - 4, mask_index, l->delta, l->w*l->h, l->mask_scale); + } + if(iou > .5) recall += 1; + avg_iou += iou; + + int obj_index = entry_index(l, b, best_n*l->w*l->h + j*l->w + i, l->coords); + avg_obj += l->output[obj_index]; + l->delta[obj_index] = l->object_scale * (1 - l->output[obj_index]); + if (l->rescore) { + l->delta[obj_index] = l->object_scale * (iou - l->output[obj_index]); + } + if(l->background){ + l->delta[obj_index] = l->object_scale * (0 - l->output[obj_index]); + } + + int class_ = net.truth[t*(l->coords + 1) + b*l->truths + l->coords]; + if (l->map) class_ = l->map[class_]; + int class_index = entry_index(l, b, best_n*l->w*l->h + j*l->w + i, l->coords + 1); + delta_region_class(l->output, l->delta, class_index, class_, l->classes, l->softmax_tree, l->class_scale, l->w*l->h, &avg_cat, !l->softmax); + ++count; + ++class_count; + } + } + *(l->cost) = pow(mag_array(l->delta, l->outputs * l->batch), 2); + printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l->w*l->h*l->n*l->batch), recall/count, count); +} + +void backward_region_layer(layer* l, network net) +{ + /* + int b; + int size = l->coords + l->classes + 1; + for (b = 0; b < l->batch*l->n; ++b){ + int index = (b*size + 4)*l->w*l->h; + gradient_array(l->output + index, l->w*l->h, LOGISTIC, l->delta + index); + } + axpy_cpu(l->batch*l->inputs, 1, l->delta, 1, net.delta, 1); + */ +} + +void correct_region_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +{ + int i; + int new_w=0; + int new_h=0; + if (((float)netw/w) < ((float)neth/h)) { + new_w = netw; + new_h = (h * netw)/w; + } else { + new_h = neth; + new_w = (w * neth)/h; + } + for (i = 0; i < n; ++i){ + box b = dets[i].bbox; + b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); + b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); + b.w *= (float)netw/new_w; + b.h *= (float)neth/new_h; + if(!relative){ + b.x *= w; + b.w *= w; + b.y *= h; + b.h *= h; + } + dets[i].bbox = b; + } +} + +void get_region_detections(layer* l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets) +{ + int i,j,n,z; + float *predictions = l->output; + if (l->batch == 2) { + float *flip = l->output + l->outputs; + for (j = 0; j < l->h; ++j) { + for (i = 0; i < l->w/2; ++i) { + for (n = 0; n < l->n; ++n) { + for(z = 0; z < l->classes + l->coords + 1; ++z){ + int i1 = z*l->w*l->h*l->n + n*l->w*l->h + j*l->w + i; + int i2 = z*l->w*l->h*l->n + n*l->w*l->h + j*l->w + (l->w - i - 1); + float swap = flip[i1]; + flip[i1] = flip[i2]; + flip[i2] = swap; + if(z == 0){ + flip[i1] = -flip[i1]; + flip[i2] = -flip[i2]; + } + } + } + } + } + for(i = 0; i < l->outputs; ++i){ + l->output[i] = (l->output[i] + flip[i])/2.; + } + } + for (i = 0; i < l->w*l->h; ++i){ + int row = i / l->w; + int col = i % l->w; + for(n = 0; n < l->n; ++n){ + int index = n*l->w*l->h + i; + for(j = 0; j < l->classes; ++j){ + dets[index].prob[j] = 0; + } + int obj_index = entry_index(l, 0, n*l->w*l->h + i, l->coords); + int box_index = entry_index(l, 0, n*l->w*l->h + i, 0); + int mask_index = entry_index(l, 0, n*l->w*l->h + i, 4); + float scale = l->background ? 1 : predictions[obj_index]; + dets[index].bbox = get_region_box(predictions, l->biases, n, box_index, col, row, l->w, l->h, l->w*l->h); + dets[index].objectness = scale > thresh ? scale : 0; + if(dets[index].mask){ + for(j = 0; j < l->coords - 4; ++j){ + dets[index].mask[j] = l->output[mask_index + j*l->w*l->h]; + } + } + + int class_index = entry_index(l, 0, n*l->w*l->h + i, l->coords + !l->background); + if(l->softmax_tree){ + + hierarchy_predictions(predictions + class_index, l->classes, l->softmax_tree, 0, l->w*l->h); + if(map){ + for(j = 0; j < 200; ++j){ + int class_index = entry_index(l, 0, n*l->w*l->h + i, l->coords + 1 + map[j]); + float prob = scale*predictions[class_index]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } else { + int j = hierarchy_top_prediction(predictions + class_index, l->softmax_tree, tree_thresh, l->w*l->h); + dets[index].prob[j] = (scale > thresh) ? scale : 0; + } + } else { + if(dets[index].objectness){ + for(j = 0; j < l->classes; ++j){ + int class_index = entry_index(l, 0, n*l->w*l->h + i, l->coords + 1 + j); + float prob = scale*predictions[class_index]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } + } + } + } + correct_region_boxes(dets, l->w*l->h*l->n, w, h, netw, neth, relative); +} + +#ifdef GPU + +void forward_region_layer_gpu(layer* l, network net) +{ + // Ruihao + // forward_layer_start(l, net); + // Ruihao + copy_gpu(l->batch*l->inputs, net.input_gpu, 1, l->output_gpu, 1, net.streams[l->stream_index]); + int b, n; + for (b = 0; b < l->batch; ++b){ + for(n = 0; n < l->n; ++n){ + int index = entry_index(l, b, n*l->w*l->h, 0); + activate_array_gpu(l->output_gpu + index, 2*l->w*l->h, LOGISTIC, net.streams[l->stream_index]); + if(l->coords > 4){ + index = entry_index(l, b, n*l->w*l->h, 4); + activate_array_gpu(l->output_gpu + index, (l->coords - 4)*l->w*l->h, LOGISTIC, net.streams[l->stream_index]); + } + index = entry_index(l, b, n*l->w*l->h, l->coords); + if(!l->background) activate_array_gpu(l->output_gpu + index, l->w*l->h, LOGISTIC, net.streams[l->stream_index]); + index = entry_index(l, b, n*l->w*l->h, l->coords + 1); + if(!l->softmax && !l->softmax_tree) activate_array_gpu(l->output_gpu + index, l->classes*l->w*l->h, LOGISTIC, net.streams[l->stream_index]); + } + } + if (l->softmax_tree){ + int index = entry_index(l, 0, 0, l->coords + 1); + softmax_tree(net.input_gpu + index, l->w*l->h, l->batch*l->n, l->inputs/l->n, 1, l->output_gpu + index, *l->softmax_tree); + } else if (l->softmax) { + int index = entry_index(l, 0, 0, l->coords + !l->background); + softmax_gpu(net.input_gpu + index, l->classes + l->background, l->batch*l->n, l->inputs/l->n, l->w*l->h, 1, l->w*l->h, 1, l->output_gpu + index); + } + if(!net.train || l->onlyforward){ + cuda_pull_array(l->output_gpu, l->output, l->batch*l->outputs); + return; + } + + cuda_pull_array(l->output_gpu, net.input, l->batch*l->inputs); + forward_region_layer(l, net); + //cuda_push_array(l->output_gpu, l->output, l->batch*l->outputs); + if(!net.train) return; + cuda_push_array(l->delta_gpu, l->delta, l->batch*l->outputs); + // Ruihao + // forward_layer_end(l, net); + // Ruihao +} + +void backward_region_layer_gpu(layer* l, network net) +{ + int b, n; + for (b = 0; b < l->batch; ++b){ + for(n = 0; n < l->n; ++n){ + int index = entry_index(l, b, n*l->w*l->h, 0); + gradient_array_gpu(l->output_gpu + index, 2*l->w*l->h, LOGISTIC, l->delta_gpu + index); + if(l->coords > 4){ + index = entry_index(l, b, n*l->w*l->h, 4); + gradient_array_gpu(l->output_gpu + index, (l->coords - 4)*l->w*l->h, LOGISTIC, l->delta_gpu + index); + } + index = entry_index(l, b, n*l->w*l->h, l->coords); + if(!l->background) gradient_array_gpu(l->output_gpu + index, l->w*l->h, LOGISTIC, l->delta_gpu + index); + } + } + axpy_gpu(l->batch*l->inputs, 1, l->delta_gpu, 1, net.delta_gpu, 1, net.streams[l->stream_index]); +} +#endif + +void zero_objectness(layer* l) +{ + int i, n; + for (i = 0; i < l->w*l->h; ++i){ + for(n = 0; n < l->n; ++n){ + int obj_index = entry_index(l, 0, n*l->w*l->h + i, l->coords); + l->output[obj_index] = 0; + } + } +} + diff --git a/workloads/realworld/pipeline/darknet/src/region_layer.h b/workloads/realworld/pipeline/darknet/src/region_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..d7edb49d07abea838e0fb0f6eb1daf03626d4c2e --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/region_layer.h @@ -0,0 +1,18 @@ +#ifndef REGION_LAYER_H +#define REGION_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer* make_region_layer(int batch, int w, int h, int n, int classes, int coords); +void forward_region_layer(layer* l, network net); +void backward_region_layer(layer* l, network net); +void resize_region_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_region_layer_gpu(layer* l, network net); +void backward_region_layer_gpu(layer* l, network net); +#endif + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/reorg_layer.c b/workloads/realworld/pipeline/darknet/src/reorg_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..e5a6eaef80e22b99ea1c7394f4989375b8f9115b --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/reorg_layer.c @@ -0,0 +1,173 @@ +#include "reorg_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + + +layer* make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra) +{ + layer* l = calloc(1, sizeof(layer)); + l->type = REORG; + l->batch = batch; + l->stride = stride; + l->extra = extra; + l->h = h; + l->w = w; + l->c = c; + l->flatten = flatten; + if(reverse){ + l->out_w = w*stride; + l->out_h = h*stride; + l->out_c = c/(stride*stride); + }else{ + l->out_w = w/stride; + l->out_h = h/stride; + l->out_c = c*(stride*stride); + } + l->reverse = reverse; + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = h*w*c; + if(l->extra){ + l->out_w = l->out_h = l->out_c = 0; + l->outputs = l->inputs + l->extra; + } + + if(extra){ + fprintf(stderr, "reorg %4d -> %4d\n", l->inputs, l->outputs); + } else { + fprintf(stderr, "reorg /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l->out_w, l->out_h, l->out_c); + } + int output_size = l->outputs * batch; + l->output = calloc(output_size, sizeof(float)); + l->delta = calloc(output_size, sizeof(float)); + + l->forward = forward_reorg_layer; + l->backward = backward_reorg_layer; +#ifdef GPU + l->forward_gpu = forward_reorg_layer_gpu; + l->backward_gpu = backward_reorg_layer_gpu; + + l->output_gpu = cuda_make_array(l->output, output_size); + l->delta_gpu = cuda_make_array(l->delta, output_size); +#endif + return l; +} + +void resize_reorg_layer(layer *l, int w, int h) +{ + int stride = l->stride; + int c = l->c; + + l->h = h; + l->w = w; + + if(l->reverse){ + l->out_w = w*stride; + l->out_h = h*stride; + l->out_c = c/(stride*stride); + }else{ + l->out_w = w/stride; + l->out_h = h/stride; + l->out_c = c*(stride*stride); + } + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->outputs; + int output_size = l->outputs * l->batch; + + l->output = realloc(l->output, output_size * sizeof(float)); + l->delta = realloc(l->delta, output_size * sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, output_size); + l->delta_gpu = cuda_make_array(l->delta, output_size); +#endif +} + +void forward_reorg_layer(layer* l, network net) +{ + int i; + if(l->flatten){ + memcpy(l->output, net.input, l->outputs*l->batch*sizeof(float)); + if(l->reverse){ + flatten(l->output, l->w*l->h, l->c, l->batch, 0); + }else{ + flatten(l->output, l->w*l->h, l->c, l->batch, 1); + } + } else if (l->extra) { + for(i = 0; i < l->batch; ++i){ + copy_cpu(l->inputs, net.input + i*l->inputs, 1, l->output + i*l->outputs, 1); + } + } else if (l->reverse){ + reorg_cpu(net.input, l->w, l->h, l->c, l->batch, l->stride, 1, l->output); + } else { + reorg_cpu(net.input, l->w, l->h, l->c, l->batch, l->stride, 0, l->output); + } +} + +void backward_reorg_layer(layer* l, network net) +{ + int i; + if(l->flatten){ + memcpy(net.delta, l->delta, l->outputs*l->batch*sizeof(float)); + if(l->reverse){ + flatten(net.delta, l->w*l->h, l->c, l->batch, 1); + }else{ + flatten(net.delta, l->w*l->h, l->c, l->batch, 0); + } + } else if(l->reverse){ + reorg_cpu(l->delta, l->w, l->h, l->c, l->batch, l->stride, 0, net.delta); + } else if (l->extra) { + for(i = 0; i < l->batch; ++i){ + copy_cpu(l->inputs, l->delta + i*l->outputs, 1, net.delta + i*l->inputs, 1); + } + }else{ + reorg_cpu(l->delta, l->w, l->h, l->c, l->batch, l->stride, 1, net.delta); + } +} + +#ifdef GPU +void forward_reorg_layer_gpu(layer* l, network net) +{ + int i; + if(l->flatten){ + if(l->reverse){ + flatten_gpu(net.input_gpu, l->w*l->h, l->c, l->batch, 0, l->output_gpu); + }else{ + flatten_gpu(net.input_gpu, l->w*l->h, l->c, l->batch, 1, l->output_gpu); + } + } else if (l->extra) { + for(i = 0; i < l->batch; ++i){ + copy_gpu(l->inputs, net.input_gpu + i*l->inputs, 1, l->output_gpu + i*l->outputs, 1, net.streams[l->stream_index]); + } + } else if (l->reverse) { + reorg_gpu(net.input_gpu, l->w, l->h, l->c, l->batch, l->stride, 1, l->output_gpu); + }else { + reorg_gpu(net.input_gpu, l->w, l->h, l->c, l->batch, l->stride, 0, l->output_gpu); + } +} + +void backward_reorg_layer_gpu(layer* l, network net) +{ + if(l->flatten){ + if(l->reverse){ + flatten_gpu(l->delta_gpu, l->w*l->h, l->c, l->batch, 1, net.delta_gpu); + }else{ + flatten_gpu(l->delta_gpu, l->w*l->h, l->c, l->batch, 0, net.delta_gpu); + } + } else if (l->extra) { + int i; + for(i = 0; i < l->batch; ++i){ + copy_gpu(l->inputs, l->delta_gpu + i*l->outputs, 1, net.delta_gpu + i*l->inputs, 1, net.streams[l->stream_index]); + } + } else if(l->reverse){ + reorg_gpu(l->delta_gpu, l->w, l->h, l->c, l->batch, l->stride, 0, net.delta_gpu); + } else { + reorg_gpu(l->delta_gpu, l->w, l->h, l->c, l->batch, l->stride, 1, net.delta_gpu); + } +} +#endif diff --git a/workloads/realworld/pipeline/darknet/src/reorg_layer.h b/workloads/realworld/pipeline/darknet/src/reorg_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..866fadc16e57c861f41df415026c850b0fa6fe79 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/reorg_layer.h @@ -0,0 +1,20 @@ +#ifndef REORG_LAYER_H +#define REORG_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +layer* make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra); +void resize_reorg_layer(layer *l, int w, int h); +void forward_reorg_layer(layer* l, network net); +void backward_reorg_layer(layer* l, network net); + +#ifdef GPU +void forward_reorg_layer_gpu(layer* l, network net); +void backward_reorg_layer_gpu(layer* l, network net); +#endif + +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/rnn_layer.c b/workloads/realworld/pipeline/darknet/src/rnn_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..85a6412615e501981019e200143712571333a1f1 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/rnn_layer.c @@ -0,0 +1,292 @@ +#include "rnn_layer.h" +#include "connected_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer* make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTIVATION activation, int batch_normalize, int adam) +{ + fprintf(stderr, "RNN Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer* l = calloc(1, sizeof(layer)); + l->batch = batch; + l->type = RNN; + l->steps = steps; + l->inputs = inputs; + + l->state = calloc(batch*outputs, sizeof(float)); + l->prev_state = calloc(batch*outputs, sizeof(float)); + + l->input_layer = calloc(1, sizeof(layer)); + fprintf(stderr, "\t\t"); + l->input_layer = make_connected_layer(batch*steps, inputs, outputs, activation, batch_normalize, adam); + l->input_layer->batch = batch; + + l->self_layer = calloc(1, sizeof(layer)); + fprintf(stderr, "\t\t"); + l->self_layer = make_connected_layer(batch*steps, outputs, outputs, activation, batch_normalize, adam); + l->self_layer->batch = batch; + + l->output_layer = calloc(1, sizeof(layer)); + fprintf(stderr, "\t\t"); + l->output_layer = make_connected_layer(batch*steps, outputs, outputs, activation, batch_normalize, adam); + l->output_layer->batch = batch; + + l->outputs = outputs; + l->output = l->output_layer->output; + l->delta = l->output_layer->delta; + + l->forward = forward_rnn_layer; + l->backward = backward_rnn_layer; + l->update = update_rnn_layer; +#ifdef GPU + l->forward_gpu = forward_rnn_layer_gpu; + l->backward_gpu = backward_rnn_layer_gpu; + l->update_gpu = update_rnn_layer_gpu; + l->state_gpu = cuda_make_array(0, batch*outputs); + l->prev_state_gpu = cuda_make_array(0, batch*outputs); + l->output_gpu = l->output_layer->output_gpu; + l->delta_gpu = l->output_layer->delta_gpu; +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l->input_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l->input_layer->out_c, l->input_layer->out_h, l->input_layer->out_w); + cudnnSetTensor4dDescriptor(l->self_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l->self_layer->out_c, l->self_layer->out_h, l->self_layer->out_w); + cudnnSetTensor4dDescriptor(l->output_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l->output_layer->out_c, l->output_layer->out_h, l->output_layer->out_w); +#endif +#endif + + return l; +} + +void update_rnn_layer(layer* l, update_args a, network net) +{ + update_connected_layer((l->input_layer), a, net); + update_connected_layer((l->self_layer), a, net); + update_connected_layer((l->output_layer), a, net); +} + +void forward_rnn_layer(layer* l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer *input_layer = l->input_layer; + layer *self_layer = l->self_layer; + layer *output_layer = l->output_layer; + + fill_cpu(l->outputs * l->batch * l->steps, 0, output_layer->delta, 1); + fill_cpu(l->outputs * l->batch * l->steps, 0, self_layer->delta, 1); + fill_cpu(l->outputs * l->batch * l->steps, 0, input_layer->delta, 1); + if(net.train) fill_cpu(l->outputs * l->batch, 0, l->state, 1); + + for (i = 0; i < l->steps; ++i) { + s.input = net.input; + forward_connected_layer(input_layer, s); + + s.input = l->state; + forward_connected_layer(self_layer, s); + + float *old_state = l->state; + if(net.train) l->state += l->outputs*l->batch; + if(l->shortcut){ + copy_cpu(l->outputs * l->batch, old_state, 1, l->state, 1); + }else{ + fill_cpu(l->outputs * l->batch, 0, l->state, 1); + } + axpy_cpu(l->outputs * l->batch, 1, input_layer->output, 1, l->state, 1); + axpy_cpu(l->outputs * l->batch, 1, self_layer->output, 1, l->state, 1); + + s.input = l->state; + forward_connected_layer(output_layer, s); + + net.input += l->inputs*l->batch; + increment_layer(input_layer, 1); + increment_layer(self_layer, 1); + increment_layer(output_layer, 1); + } +} + +void backward_rnn_layer(layer* l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer *input_layer = l->input_layer; + layer *self_layer = l->self_layer; + layer *output_layer = l->output_layer; + + increment_layer(input_layer, l->steps-1); + increment_layer(self_layer, l->steps-1); + increment_layer(output_layer, l->steps-1); + + l->state += l->outputs*l->batch*l->steps; + for (i = l->steps-1; i >= 0; --i) { + copy_cpu(l->outputs * l->batch, input_layer->output, 1, l->state, 1); + axpy_cpu(l->outputs * l->batch, 1, self_layer->output, 1, l->state, 1); + + s.input = l->state; + s.delta = self_layer->delta; + backward_connected_layer(output_layer, s); + + l->state -= l->outputs*l->batch; + /* + if(i > 0){ + copy_cpu(l->outputs * l->batch, input_layer.output - l->outputs*l->batch, 1, l->state, 1); + axpy_cpu(l->outputs * l->batch, 1, self_layer.output - l->outputs*l->batch, 1, l->state, 1); + }else{ + fill_cpu(l->outputs * l->batch, 0, l->state, 1); + } + */ + + s.input = l->state; + s.delta = self_layer->delta - l->outputs*l->batch; + if (i == 0) s.delta = 0; + backward_connected_layer(self_layer, s); + + copy_cpu(l->outputs*l->batch, self_layer->delta, 1, input_layer->delta, 1); + if (i > 0 && l->shortcut) axpy_cpu(l->outputs*l->batch, 1, self_layer->delta, 1, self_layer->delta - l->outputs*l->batch, 1); + s.input = net.input + i*l->inputs*l->batch; + if(net.delta) s.delta = net.delta + i*l->inputs*l->batch; + else s.delta = 0; + backward_connected_layer(input_layer, s); + + increment_layer(input_layer, -1); + increment_layer(self_layer, -1); + increment_layer(output_layer, -1); + } +} + +#ifdef GPU + +void pull_rnn_layer(layer* l) +{ + pull_connected_layer((l->input_layer)); + pull_connected_layer((l->self_layer)); + pull_connected_layer((l->output_layer)); +} + +void push_rnn_layer(layer* l) +{ + push_connected_layer((l->input_layer)); + push_connected_layer((l->self_layer)); + push_connected_layer((l->output_layer)); +} + +void update_rnn_layer_gpu(layer* l, update_args a, network net) +{ + update_connected_layer_gpu((l->input_layer), a, net); + update_connected_layer_gpu((l->self_layer), a, net); + update_connected_layer_gpu((l->output_layer), a, net); +} + +void forward_rnn_layer_gpu(layer* l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer *input_layer = (l->input_layer); + layer *self_layer = (l->self_layer); + layer *output_layer = (l->output_layer); + + fill_gpu(l->outputs * l->batch * l->steps, 0, output_layer->delta_gpu, 1); + fill_gpu(l->outputs * l->batch * l->steps, 0, self_layer->delta_gpu, 1); + fill_gpu(l->outputs * l->batch * l->steps, 0, input_layer->delta_gpu, 1); + + if(net.train) { + fill_gpu(l->outputs * l->batch * l->steps, 0, l->delta_gpu, 1); + copy_gpu(l->outputs*l->batch, l->state_gpu, 1, l->prev_state_gpu, 1, net.streams[l->stream_index]); + } + + for (i = 0; i < l->steps; ++i) { + s.input_gpu = net.input_gpu; + forward_connected_layer_gpu(input_layer, s); + + s.input_gpu = l->state_gpu; + forward_connected_layer_gpu(self_layer, s); + + fill_gpu(l->outputs * l->batch, 0, l->state_gpu, 1); + axpy_gpu(l->outputs * l->batch, 1, input_layer->output_gpu, 1, l->state_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->outputs * l->batch, 1, self_layer->output_gpu, 1, l->state_gpu, 1, net.streams[l->stream_index]); + + s.input_gpu = l->state_gpu; + forward_connected_layer_gpu(output_layer, s); + + net.input_gpu += l->inputs*l->batch; + increment_layer(input_layer, 1); + increment_layer(self_layer, 1); + increment_layer(output_layer, 1); + } +} + +void backward_rnn_layer_gpu(layer* l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer *input_layer = (l->input_layer); + layer *self_layer = (l->self_layer); + layer *output_layer = (l->output_layer); + increment_layer(input_layer, l->steps - 1); + increment_layer(self_layer, l->steps - 1); + increment_layer(output_layer, l->steps - 1); + float *last_input = input_layer->output_gpu; + float *last_self = self_layer->output_gpu; + for (i = l->steps-1; i >= 0; --i) { + fill_gpu(l->outputs * l->batch, 0, l->state_gpu, 1); + axpy_gpu(l->outputs * l->batch, 1, input_layer->output_gpu, 1, l->state_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->outputs * l->batch, 1, self_layer->output_gpu, 1, l->state_gpu, 1, net.streams[l->stream_index]); + + s.input_gpu = l->state_gpu; + s.delta_gpu = self_layer->delta_gpu; + backward_connected_layer_gpu(output_layer, s); + + if(i != 0) { + fill_gpu(l->outputs * l->batch, 0, l->state_gpu, 1); + axpy_gpu(l->outputs * l->batch, 1, input_layer->output_gpu - l->outputs*l->batch, 1, l->state_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->outputs * l->batch, 1, self_layer->output_gpu - l->outputs*l->batch, 1, l->state_gpu, 1, net.streams[l->stream_index]); + }else { + copy_gpu(l->outputs*l->batch, l->prev_state_gpu, 1, l->state_gpu, 1, net.streams[l->stream_index]); + } + + copy_gpu(l->outputs*l->batch, self_layer->delta_gpu, 1, input_layer->delta_gpu, 1, net.streams[l->stream_index]); + + s.input_gpu = l->state_gpu; + s.delta_gpu = (i > 0) ? self_layer->delta_gpu - l->outputs*l->batch : 0; + if (i == 0) s.delta_gpu = 0; + backward_connected_layer_gpu(self_layer, s); + + s.input_gpu = net.input_gpu + i*l->inputs*l->batch; + if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l->inputs*l->batch; + else s.delta_gpu = 0; + backward_connected_layer_gpu(input_layer, s); + + increment_layer(input_layer, -1); + increment_layer(self_layer, -1); + increment_layer(output_layer, -1); + } + fill_gpu(l->outputs * l->batch, 0, l->state_gpu, 1); + axpy_gpu(l->outputs * l->batch, 1, last_input, 1, l->state_gpu, 1, net.streams[l->stream_index]); + axpy_gpu(l->outputs * l->batch, 1, last_self, 1, l->state_gpu, 1, net.streams[l->stream_index]); +} +#endif diff --git a/workloads/realworld/pipeline/darknet/src/rnn_layer.h b/workloads/realworld/pipeline/darknet/src/rnn_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..943facc32e04c0000b8eb0a4062085215e383b41 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/rnn_layer.h @@ -0,0 +1,25 @@ + +#ifndef RNN_LAYER_H +#define RNN_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" +#define USET + +layer* make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTIVATION activation, int batch_normalize, int adam); + +void forward_rnn_layer(layer* l, network net); +void backward_rnn_layer(layer* l, network net); +void update_rnn_layer(layer* l, update_args a, network net); + +#ifdef GPU +void forward_rnn_layer_gpu(layer* l, network net); +void backward_rnn_layer_gpu(layer* l, network net); +void update_rnn_layer_gpu(layer* l, update_args a, network net); +void push_rnn_layer(layer* l); +void pull_rnn_layer(layer* l); +#endif + +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/route_layer.c b/workloads/realworld/pipeline/darknet/src/route_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..9a8332165b5065a98799d3ea408014fb836f510d --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/route_layer.c @@ -0,0 +1,134 @@ +#include "route_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + +route_layer* make_route_layer(int batch, int n, int *input_layers, int *input_sizes) +{ + fprintf(stderr,"route "); + route_layer* l = calloc(1, sizeof(route_layer)); + l->type = ROUTE; + l->batch = batch; + l->n = n; + l->input_layers = input_layers; + l->input_sizes = input_sizes; + int i; + int outputs = 0; + for(i = 0; i < n; ++i){ + fprintf(stderr," %d", input_layers[i]); + outputs += input_sizes[i]; + } + fprintf(stderr, "\n"); + l->outputs = outputs; + l->inputs = outputs; + l->delta = calloc(outputs*batch, sizeof(float)); + l->output = calloc(outputs*batch, sizeof(float));; + + l->forward = forward_route_layer; + l->backward = backward_route_layer; + #ifdef GPU + l->forward_gpu = forward_route_layer_gpu; + l->backward_gpu = backward_route_layer_gpu; + + // l->delta_gpu = cuda_make_array(l->delta, outputs*batch); + // l->output_gpu = cuda_make_array(l->output, outputs*batch); + #endif + return l; +} + +void resize_route_layer(route_layer *l, network *net) +{ + int i; + layer *first = net->layers[l->input_layers[0]]; + l->out_w = first->out_w; + l->out_h = first->out_h; + l->out_c = first->out_c; + l->outputs = first->outputs; + l->input_sizes[0] = first->outputs; + for(i = 1; i < l->n; ++i){ + int index = l->input_layers[i]; + layer *next = net->layers[index]; + l->outputs += next->outputs; + l->input_sizes[i] = next->outputs; + if(next->out_w == first->out_w && next->out_h == first->out_h){ + l->out_c += next->out_c; + }else{ + printf("%d %d, %d %d\n", next->out_w, next->out_h, first->out_w, first->out_h); + l->out_h = l->out_w = l->out_c = 0; + } + } + l->inputs = l->outputs; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + +void forward_route_layer(route_layer* l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l->n; ++i){ + int index = l->input_layers[i]; + float *input = net.layers[index]->output; + int input_size = l->input_sizes[i]; + for(j = 0; j < l->batch; ++j){ + copy_cpu(input_size, input + j*input_size, 1, l->output + offset + j*l->outputs, 1); + } + offset += input_size; + } +} + +void backward_route_layer(route_layer* l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l->n; ++i){ + int index = l->input_layers[i]; + float *delta = net.layers[index]->delta; + int input_size = l->input_sizes[i]; + for(j = 0; j < l->batch; ++j){ + axpy_cpu(input_size, 1, l->delta + offset + j*l->outputs, 1, delta + j*input_size, 1); + } + offset += input_size; + } +} + +#ifdef GPU +void forward_route_layer_gpu(route_layer* l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l->n; ++i){ + int index = l->input_layers[i]; + float *input = net.layers[index]->output_gpu; + int input_size = l->input_sizes[i]; + for(j = 0; j < l->batch; ++j){ + copy_gpu(input_size, input + j*input_size, 1, l->output_gpu + offset + j*l->outputs, 1, net.streams[l->stream_index]); + } + offset += input_size; + } +} + +void backward_route_layer_gpu(route_layer* l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l->n; ++i){ + int index = l->input_layers[i]; + float *delta = net.layers[index]->delta_gpu; + int input_size = l->input_sizes[i]; + for(j = 0; j < l->batch; ++j){ + axpy_gpu(input_size, 1, l->delta_gpu + offset + j*l->outputs, 1, delta + j*input_size, 1, net.streams[l->stream_index]); + } + offset += input_size; + } +} +#endif diff --git a/workloads/realworld/pipeline/darknet/src/route_layer.h b/workloads/realworld/pipeline/darknet/src/route_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..578929e7d6a037a8643fc2d53989b95cf0534fe9 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/route_layer.h @@ -0,0 +1,18 @@ +#ifndef ROUTE_LAYER_H +#define ROUTE_LAYER_H +#include "network.h" +#include "layer.h" + +typedef layer route_layer; + +route_layer* make_route_layer(int batch, int n, int *input_layers, int *input_size); +void forward_route_layer(route_layer* l, network net); +void backward_route_layer(route_layer* l, network net); +void resize_route_layer(route_layer *l, network *net); + +#ifdef GPU +void forward_route_layer_gpu(route_layer* l, network net); +void backward_route_layer_gpu(route_layer* l, network net); +#endif + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/shortcut_layer.c b/workloads/realworld/pipeline/darknet/src/shortcut_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..2744e2d63f74d5b061744251563fccb27d37c08b --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/shortcut_layer.c @@ -0,0 +1,97 @@ +#include "shortcut_layer.h" +#include "cuda_dark.h" +#include "blas.h" +#include "activations.h" + +#include +#include + +layer* make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2) +{ + fprintf(stderr, "res %3d %4d x%4d x%4d -> %4d x%4d x%4d\n",index, w2,h2,c2, w,h,c); + layer* l = calloc(1, sizeof(layer)); + l->type = SHORTCUT; + l->batch = batch; + l->w = w2; + l->h = h2; + l->c = c2; + l->out_w = w; + l->out_h = h; + l->out_c = c; + l->outputs = w*h*c; + l->inputs = l->outputs; + + l->index = index; + + l->delta = calloc(l->outputs*batch, sizeof(float)); + l->output = calloc(l->outputs*batch, sizeof(float));; + + l->forward = forward_shortcut_layer; + l->backward = backward_shortcut_layer; + #ifdef GPU + l->forward_gpu = forward_shortcut_layer_gpu; + l->backward_gpu = backward_shortcut_layer_gpu; + + // l->delta_gpu = cuda_make_array(l->delta, l->outputs*batch); + // l->output_gpu = cuda_make_array(l->output, l->outputs*batch); + #endif + return l; +} + +void resize_shortcut_layer(layer *l, int w, int h) +{ + fprintf(stderr, "resize_shortcut_layer \n"); + assert(l->w == l->out_w); + assert(l->h == l->out_h); + l->w = l->out_w = w; + l->h = l->out_h = h; + l->outputs = w*h*l->out_c; + l->inputs = l->outputs; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + + +void forward_shortcut_layer(layer* l, network net) +{ + copy_cpu(l->outputs*l->batch, net.input, 1, l->output, 1); + shortcut_cpu(l->batch, l->w, l->h, l->c, net.layers[l->index]->output, l->out_w, l->out_h, l->out_c, l->alpha, l->beta, l->output); + activate_array(l->output, l->outputs*l->batch, l->activation); +} + +void backward_shortcut_layer(layer* l, network net) +{ + gradient_array(l->output, l->outputs*l->batch, l->activation, l->delta); + axpy_cpu(l->outputs*l->batch, l->alpha, l->delta, 1, net.delta, 1); + shortcut_cpu(l->batch, l->out_w, l->out_h, l->out_c, l->delta, l->w, l->h, l->c, 1, l->beta, net.layers[l->index]->delta); +} + +#ifdef GPU +void forward_shortcut_layer_gpu(layer* l, network net) +{ + // Ruihao + // forward_layer_start(l, net); + // Ruihao + copy_gpu(l->outputs*l->batch, net.input_gpu, 1, l->output_gpu, 1, net.streams[l->stream_index]); + shortcut_gpu(l->batch, l->w, l->h, l->c, net.layers[l->index]->output_gpu, l->out_w, l->out_h, l->out_c, l->alpha, l->beta, l->output_gpu); + activate_array_gpu(l->output_gpu, l->outputs*l->batch, l->activation, net.streams[l->stream_index]); + // Ruihao + // forward_layer_end(l, net); + // Ruihao +} + +void backward_shortcut_layer_gpu(layer* l, network net) +{ + gradient_array_gpu(l->output_gpu, l->outputs*l->batch, l->activation, l->delta_gpu); + axpy_gpu(l->outputs*l->batch, l->alpha, l->delta_gpu, 1, net.delta_gpu, 1, net.streams[l->stream_index]); + shortcut_gpu(l->batch, l->out_w, l->out_h, l->out_c, l->delta_gpu, l->w, l->h, l->c, 1, l->beta, net.layers[l->index]->delta_gpu); +} +#endif diff --git a/workloads/realworld/pipeline/darknet/src/shortcut_layer.h b/workloads/realworld/pipeline/darknet/src/shortcut_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..d5efe38f5c5cd1e1dbf3cf6ae1af893e7f15c20f --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/shortcut_layer.h @@ -0,0 +1,17 @@ +#ifndef SHORTCUT_LAYER_H +#define SHORTCUT_LAYER_H + +#include "layer.h" +#include "network.h" + +layer* make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2); +void forward_shortcut_layer(layer* l, network net); +void backward_shortcut_layer(layer* l, network net); +void resize_shortcut_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_shortcut_layer_gpu(layer* l, network net); +void backward_shortcut_layer_gpu(layer* l, network net); +#endif + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/softmax_layer.c b/workloads/realworld/pipeline/darknet/src/softmax_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..ca8243ccbee81806ae62a2cd0a833f0151c1561c --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/softmax_layer.c @@ -0,0 +1,113 @@ +#include "softmax_layer.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +softmax_layer* make_softmax_layer(int batch, int inputs, int groups) +{ + assert(inputs%groups == 0); + fprintf(stderr, "softmax %4d\n", inputs); + softmax_layer* l = calloc(1, sizeof(softmax_layer)); + l->type = SOFTMAX; + l->batch = batch; + l->groups = groups; + l->inputs = inputs; + l->outputs = inputs; + l->loss = calloc(inputs*batch, sizeof(float)); + l->output = calloc(inputs*batch, sizeof(float)); + l->delta = calloc(inputs*batch, sizeof(float)); + l->cost = calloc(1, sizeof(float)); + + l->forward = forward_softmax_layer; + l->backward = backward_softmax_layer; + #ifdef GPU + l->forward_gpu = forward_softmax_layer_gpu; + l->backward_gpu = backward_softmax_layer_gpu; + + // l->output_gpu = cuda_make_array(l->output, inputs*batch); + l->loss_gpu = cuda_make_array(l->loss, inputs*batch); + // l->delta_gpu = cuda_make_array(l->delta, inputs*batch); + #endif + return l; +} + +void forward_softmax_layer(softmax_layer* l, network net) +{ + if(l->softmax_tree){ + int i; + int count = 0; + for (i = 0; i < l->softmax_tree->groups; ++i) { + int group_size = l->softmax_tree->group_size[i]; + softmax_cpu(net.input + count, group_size, l->batch, l->inputs, 1, 0, 1, l->temperature, l->output + count); + count += group_size; + } + } else { + softmax_cpu(net.input, l->inputs/l->groups, l->batch, l->inputs, l->groups, l->inputs/l->groups, 1, l->temperature, l->output); + } + + if(net.truth && !l->noloss){ + softmax_x_ent_cpu(l->batch*l->inputs, l->output, net.truth, l->delta, l->loss); + l->cost[0] = sum_array(l->loss, l->batch*l->inputs); + } +} + +void backward_softmax_layer(softmax_layer* l, network net) +{ + axpy_cpu(l->inputs*l->batch, 1, l->delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_softmax_layer_output(softmax_layer* layer) +{ + cuda_pull_array(layer->output_gpu, layer->output, layer->inputs*layer->batch); +} + +void forward_softmax_layer_gpu(softmax_layer* l, network net) +{ + // Ruihao + // forward_layer_start(l, net); + // Ruihao + if(l->softmax_tree){ + softmax_tree(net.input_gpu, 1, l->batch, l->inputs, l->temperature, l->output_gpu, *l->softmax_tree); + /* + int i; + int count = 0; + for (i = 0; i < l->softmax_tree->groups; ++i) { + int group_size = l->softmax_tree->group_size[i]; + softmax_gpu(net.input_gpu + count, group_size, l->batch, l->inputs, 1, 0, 1, l->temperature, l->output_gpu + count); + count += group_size; + } + */ + } else { + if(l->spatial){ + softmax_gpu(net.input_gpu, l->c, l->batch*l->c, l->inputs/l->c, l->w*l->h, 1, l->w*l->h, 1, l->output_gpu); + }else{ + softmax_gpu(net.input_gpu, l->inputs/l->groups, l->batch, l->inputs, l->groups, l->inputs/l->groups, 1, l->temperature, l->output_gpu); + } + } + if(net.truth && !l->noloss){ + softmax_x_ent_gpu(l->batch*l->inputs, l->output_gpu, net.truth_gpu, l->delta_gpu, l->loss_gpu); + if(l->softmax_tree){ + mask_gpu(l->batch*l->inputs, l->delta_gpu, SECRET_NUM, net.truth_gpu, 0); + mask_gpu(l->batch*l->inputs, l->loss_gpu, SECRET_NUM, net.truth_gpu, 0); + } + cuda_pull_array(l->loss_gpu, l->loss, l->batch*l->inputs); + l->cost[0] = sum_array(l->loss, l->batch*l->inputs); + } + // Ruihao + // forward_layer_end(l, net); + // Ruihao +} + +void backward_softmax_layer_gpu(softmax_layer* layer, network net) +{ + axpy_gpu(layer->batch*layer->inputs, 1, layer->delta_gpu, 1, net.delta_gpu, 1, net.streams[layer->stream_index]); +} + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/softmax_layer.h b/workloads/realworld/pipeline/darknet/src/softmax_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..e0e102d2eff8267d89c882a3eaf5daea98f8e199 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/softmax_layer.h @@ -0,0 +1,19 @@ +#ifndef SOFTMAX_LAYER_H +#define SOFTMAX_LAYER_H +#include "layer.h" +#include "network.h" + +typedef layer softmax_layer; + +void softmax_array(float *input, int n, float temp, float *output); +softmax_layer* make_softmax_layer(int batch, int inputs, int groups); +void forward_softmax_layer(softmax_layer* l, network net); +void backward_softmax_layer(softmax_layer* l, network net); + +#ifdef GPU +void pull_softmax_layer_output(softmax_layer* l); +void forward_softmax_layer_gpu(softmax_layer* l, network net); +void backward_softmax_layer_gpu(softmax_layer* l, network net); +#endif + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/stb_image.h b/workloads/realworld/pipeline/darknet/src/stb_image.h new file mode 100644 index 0000000000000000000000000000000000000000..d9c21bc813f1f24de2a25ee3cc82bdce9413eaa5 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/stb_image.h @@ -0,0 +1,7462 @@ +/* stb_image - v2.19 - public domain image loader - http://nothings.org/stb + no warranty implied; use at your own risk + + Do this: + #define STB_IMAGE_IMPLEMENTATION + before you include this file in *one* C or C++ file to create the implementation. + + // i.e. it should look like this: + #include ... + #include ... + #include ... + #define STB_IMAGE_IMPLEMENTATION + #include "stb_image.h" + + You can #define STBI_ASSERT(x) before the #include to avoid using assert.h. + And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free + + + QUICK NOTES: + Primarily of interest to game developers and other people who can + avoid problematic images and only need the trivial interface + + JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) + PNG 1/2/4/8/16-bit-per-channel + + TGA (not sure what subset, if a subset) + BMP non-1bpp, non-RLE + PSD (composited view only, no extra channels, 8/16 bit-per-channel) + + GIF (*comp always reports as 4-channel) + HDR (radiance rgbE format) + PIC (Softimage PIC) + PNM (PPM and PGM binary only) + + Animated GIF still needs a proper API, but here's one way to do it: + http://gist.github.com/urraka/685d9a6340b26b830d49 + + - decode from memory or through FILE (define STBI_NO_STDIO to remove code) + - decode from arbitrary I/O callbacks + - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON) + + Full documentation under "DOCUMENTATION" below. + + +LICENSE + + See end of file for license information. + +RECENT REVISION HISTORY: + + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings + 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes + 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 + RGB-format JPEG; remove white matting in PSD; + allocate large structures on the stack; + correct channel count for PNG & BMP + 2.10 (2016-01-22) avoid warning introduced in 2.09 + 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED + + See end of file for full revision history. + + + ============================ Contributors ========================= + + Image formats Extensions, features + Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) + Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) + Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) + Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) + Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) + Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) + Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) + github:urraka (animated gif) Junggon Kim (PNM comments) + Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) + socks-the-fox (16-bit PNG) + Jeremy Sawicki (handle all ImageNet JPGs) + Optimizations & bugfixes Mikhail Morozov (1-bit BMP) + Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query) + Arseny Kapoulkine + John-Mark Allen + + Bug & warning fixes + Marc LeBlanc David Woo Guillaume George Martins Mozeiko + Christpher Lloyd Jerry Jansson Joseph Thomson Phil Jordan + Dave Moore Roy Eltham Hayaki Saito Nathan Reed + Won Chun Luke Graham Johan Duparc Nick Verigakis + the Horde3D community Thomas Ruf Ronny Chevalier github:rlyeh + Janez Zemva John Bartholomew Michal Cichon github:romigrou + Jonathan Blow Ken Hamada Tero Hanninen github:svdijk + Laurent Gomila Cort Stratton Sergio Gonzalez github:snagar + Aruelien Pocheville Thibault Reuille Cass Everitt github:Zelex + Ryamond Barbiero Paul Du Bois Engin Manap github:grim210 + Aldo Culquicondor Philipp Wiesemann Dale Weiler github:sammyhw + Oriol Ferrer Mesia Josh Tobin Matthew Gregan github:phprus + Julian Raschke Gregory Mullen Baldur Karlsson github:poppolopoppo + Christian Floisand Kevin Schmidt github:darealshinji + Blazej Dariusz Roszkowski github:Michaelangel007 +*/ + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H + +// DOCUMENTATION +// +// Limitations: +// - no 12-bit-per-channel JPEG +// - no JPEGs with arithmetic coding +// - GIF always returns *comp=4 +// +// Basic usage (see HDR discussion below for HDR usage): +// int x,y,n; +// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +// // ... process data if not NULL ... +// // ... x = width, y = height, n = # 8-bit components per pixel ... +// // ... replace '0' with '1'..'4' to force that many components per pixel +// // ... but 'n' will always be the number that it would have been if you said 0 +// stbi_image_free(data) +// +// Standard parameters: +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *channels_in_file -- outputs # of image components in image file +// int desired_channels -- if non-zero, # of image components requested in result +// +// The return value from an image loader is an 'unsigned char *' which points +// to the pixel data, or NULL on an allocation failure or if the image is +// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels, +// with each pixel consisting of N interleaved 8-bit components; the first +// pixel pointed to is top-left-most in the image. There is no padding between +// image scanlines or between pixels, regardless of format. The number of +// components N is 'desired_channels' if desired_channels is non-zero, or +// *channels_in_file otherwise. If desired_channels is non-zero, +// *channels_in_file has the number of components that _would_ have been +// output otherwise. E.g. if you set desired_channels to 4, you will always +// get RGBA output, but you can check *channels_in_file to see if it's trivially +// opaque because e.g. there were only 3 channels in the source image. +// +// An output image with N components has the following components interleaved +// in this order in each pixel: +// +// N=#comp components +// 1 grey +// 2 grey, alpha +// 3 red, green, blue +// 4 red, green, blue, alpha +// +// If image loading fails for any reason, the return value will be NULL, +// and *x, *y, *channels_in_file will be unchanged. The function +// stbi_failure_reason() can be queried for an extremely brief, end-user +// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS +// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// more user-friendly ones. +// +// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. +// +// =========================================================================== +// +// Philosophy +// +// stb libraries are designed with the following priorities: +// +// 1. easy to use +// 2. easy to maintain +// 3. good performance +// +// Sometimes I let "good performance" creep up in priority over "easy to maintain", +// and for best performance I may provide less-easy-to-use APIs that give higher +// performance, in addition to the easy to use ones. Nevertheless, it's important +// to keep in mind that from the standpoint of you, a client of this library, +// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all. +// +// Some secondary priorities arise directly from the first two, some of which +// make more explicit reasons why performance can't be emphasized. +// +// - Portable ("ease of use") +// - Small source code footprint ("easy to maintain") +// - No dependencies ("ease of use") +// +// =========================================================================== +// +// I/O callbacks +// +// I/O callbacks allow you to read from arbitrary sources, like packaged +// files or some other source. Data read from callbacks are processed +// through a small internal buffer (currently 128 bytes) to try to reduce +// overhead. +// +// The three functions you must define are "read" (reads some bytes of data), +// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end). +// +// =========================================================================== +// +// SIMD support +// +// The JPEG decoder will try to automatically use SIMD kernels on x86 when +// supported by the compiler. For ARM Neon support, you must explicitly +// request it. +// +// (The old do-it-yourself SIMD API is no longer supported in the current +// code.) +// +// On x86, SSE2 will automatically be used when available based on a run-time +// test; if not, the generic C versions are used as a fall-back. On ARM targets, +// the typical path is to have separate builds for NEON and non-NEON devices +// (at least this is true for iOS and Android). Therefore, the NEON support is +// toggled by a build flag: define STBI_NEON to get NEON loops. +// +// If for some reason you do not want to use any of SIMD code, or if +// you have issues compiling it, you can disable it entirely by +// defining STBI_NO_SIMD. +// +// =========================================================================== +// +// HDR image support (disable by defining STBI_NO_HDR) +// +// stb_image now supports loading HDR images in general, and currently +// the Radiance .HDR file format, although the support is provided +// generically. You can still load any file through the existing interface; +// if you attempt to load an HDR file, it will be automatically remapped to +// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// both of these constants can be reconfigured through this interface: +// +// stbi_hdr_to_ldr_gamma(2.2f); +// stbi_hdr_to_ldr_scale(1.0f); +// +// (note, do not use _inverse_ constants; stbi_image will invert them +// appropriately). +// +// Additionally, there is a new, parallel interface for loading files as +// (linear) floats to preserve the full dynamic range: +// +// float *data = stbi_loadf(filename, &x, &y, &n, 0); +// +// If you load LDR images through this interface, those images will +// be promoted to floating point values, run through the inverse of +// constants corresponding to the above: +// +// stbi_ldr_to_hdr_scale(1.0f); +// stbi_ldr_to_hdr_gamma(2.2f); +// +// Finally, given a filename (or an open file or memory block--see header +// file for details) containing image data, you can query for the "most +// appropriate" interface to use (that is, whether the image is HDR or +// not), using: +// +// stbi_is_hdr(char *filename); +// +// =========================================================================== +// +// iPhone PNG support: +// +// By default we convert iphone-formatted PNGs back to RGB, even though +// they are internally encoded differently. You can disable this conversion +// by by calling stbi_convert_iphone_png_to_rgb(0), in which case +// you will always just get the native iphone "format" through (which +// is BGR stored in RGB). +// +// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per +// pixel to remove any premultiplied alpha *only* if the image file explicitly +// says there's premultiplied data (currently only happens in iPhone images, +// and only if iPhone convert-to-rgb processing is on). +// +// =========================================================================== +// +// ADDITIONAL CONFIGURATION +// +// - You can suppress implementation of any of the decoders to reduce +// your code footprint by #defining one or more of the following +// symbols before creating the implementation. +// +// STBI_NO_JPEG +// STBI_NO_PNG +// STBI_NO_BMP +// STBI_NO_PSD +// STBI_NO_TGA +// STBI_NO_GIF +// STBI_NO_HDR +// STBI_NO_PIC +// STBI_NO_PNM (.ppm and .pgm) +// +// - You can request *only* certain decoders and suppress all other ones +// (this will be more forward-compatible, as addition of new decoders +// doesn't require you to disable them explicitly): +// +// STBI_ONLY_JPEG +// STBI_ONLY_PNG +// STBI_ONLY_BMP +// STBI_ONLY_PSD +// STBI_ONLY_TGA +// STBI_ONLY_GIF +// STBI_ONLY_HDR +// STBI_ONLY_PIC +// STBI_ONLY_PNM (.ppm and .pgm) +// +// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still +// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB +// + + +#ifndef STBI_NO_STDIO +#include +#endif // STBI_NO_STDIO + +#define STBI_VERSION 1 + +enum +{ + STBI_default = 0, // only used for desired_channels + + STBI_grey = 1, + STBI_grey_alpha = 2, + STBI_rgb = 3, + STBI_rgb_alpha = 4 +}; + +typedef unsigned char stbi_uc; +typedef unsigned short stbi_us; + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef STB_IMAGE_STATIC +#define STBIDEF static +#else +#define STBIDEF extern +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// PRIMARY API - works on images of any type +// + +// +// load image by filename, open file, or memory buffer +// + +typedef struct +{ + int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read + void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative + int (*eof) (void *user); // returns nonzero if we are at end of file/data +} stbi_io_callbacks; + +//////////////////////////////////// +// +// 8-bits-per-channel interface +// + +STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels); +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +#endif + + +#ifndef STBI_NO_STDIO +STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +// for stbi_load_from_file, file pointer is left pointing immediately after image +#endif + +//////////////////////////////////// +// +// 16-bits-per-channel interface +// + +STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +#endif + +//////////////////////////////////// +// +// float-per-channel interface +// +#ifndef STBI_NO_LINEAR + STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + + #ifndef STBI_NO_STDIO + STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); + #endif +#endif + +#ifndef STBI_NO_HDR + STBIDEF void stbi_hdr_to_ldr_gamma(float gamma); + STBIDEF void stbi_hdr_to_ldr_scale(float scale); +#endif // STBI_NO_HDR + +#ifndef STBI_NO_LINEAR + STBIDEF void stbi_ldr_to_hdr_gamma(float gamma); + STBIDEF void stbi_ldr_to_hdr_scale(float scale); +#endif // STBI_NO_LINEAR + +// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user); +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename); +STBIDEF int stbi_is_hdr_from_file(FILE *f); +#endif // STBI_NO_STDIO + + +// get a VERY brief reason for failure +// NOT THREADSAFE +STBIDEF const char *stbi_failure_reason (void); + +// free the loaded image -- this is just free() +STBIDEF void stbi_image_free (void *retval_from_stbi_load); + +// get image dimensions & components without fully decoding +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len); +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user); + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit (char const *filename); +STBIDEF int stbi_is_16_bit_from_file(FILE *f); +#endif + + + +// for image formats that explicitly notate that they have premultiplied alpha, +// we just return the colors as stored in the file. set this flag to force +// unpremultiplication. results are undefined if the unpremultiply overflow. +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply); + +// indicate whether we should process iphone images back to canonical format, +// or just pass them through "as-is" +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); + +// flip the image vertically, so the first pixel in the output array is the bottom left +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip); + +// ZLIB client - used by PNG, available for other purposes + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header); +STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + + +#ifdef __cplusplus +} +#endif + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBI_INCLUDE_STB_IMAGE_H + +#ifdef STB_IMAGE_IMPLEMENTATION + +#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \ + || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \ + || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \ + || defined(STBI_ONLY_ZLIB) + #ifndef STBI_ONLY_JPEG + #define STBI_NO_JPEG + #endif + #ifndef STBI_ONLY_PNG + #define STBI_NO_PNG + #endif + #ifndef STBI_ONLY_BMP + #define STBI_NO_BMP + #endif + #ifndef STBI_ONLY_PSD + #define STBI_NO_PSD + #endif + #ifndef STBI_ONLY_TGA + #define STBI_NO_TGA + #endif + #ifndef STBI_ONLY_GIF + #define STBI_NO_GIF + #endif + #ifndef STBI_ONLY_HDR + #define STBI_NO_HDR + #endif + #ifndef STBI_ONLY_PIC + #define STBI_NO_PIC + #endif + #ifndef STBI_ONLY_PNM + #define STBI_NO_PNM + #endif +#endif + +#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB) +#define STBI_NO_ZLIB +#endif + + +#include +#include // ptrdiff_t on osx +#include +#include +#include + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +#include // ldexp, pow +#endif + +#ifndef STBI_NO_STDIO +#include +#endif + +#ifndef STBI_ASSERT +#include +#define STBI_ASSERT(x) assert(x) +#endif + + +#ifndef _MSC_VER + #ifdef __cplusplus + #define stbi_inline inline + #else + #define stbi_inline + #endif +#else + #define stbi_inline __forceinline +#endif + + +#ifdef _MSC_VER +typedef unsigned short stbi__uint16; +typedef signed short stbi__int16; +typedef unsigned int stbi__uint32; +typedef signed int stbi__int32; +#else +#include +typedef uint16_t stbi__uint16; +typedef int16_t stbi__int16; +typedef uint32_t stbi__uint32; +typedef int32_t stbi__int32; +#endif + +// should produce compiler error if size is wrong +typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; + +#ifdef _MSC_VER +#define STBI_NOTUSED(v) (void)(v) +#else +#define STBI_NOTUSED(v) (void)sizeof(v) +#endif + +#ifdef _MSC_VER +#define STBI_HAS_LROTL +#endif + +#ifdef STBI_HAS_LROTL + #define stbi_lrot(x,y) _lrotl(x,y) +#else + #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y)))) +#endif + +#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) +// ok +#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)." +#endif + +#ifndef STBI_MALLOC +#define STBI_MALLOC(sz) malloc(sz) +#define STBI_REALLOC(p,newsz) realloc(p,newsz) +#define STBI_FREE(p) free(p) +#endif + +#ifndef STBI_REALLOC_SIZED +#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz) +#endif + +// x86/x64 detection +#if defined(__x86_64__) || defined(_M_X64) +#define STBI__X64_TARGET +#elif defined(__i386) || defined(_M_IX86) +#define STBI__X86_TARGET +#endif + +#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) +// gcc doesn't support sse2 intrinsics unless you compile with -msse2, +// which in turn means it gets to use SSE2 everywhere. This is unfortunate, +// but previous attempts to provide the SSE2 functions with runtime +// detection caused numerous issues. The way architecture extensions are +// exposed in GCC/Clang is, sadly, not really suited for one-file libs. +// New behavior: if compiled with -msse2, we use SSE2 without any +// detection; if not, we don't use it at all. +#define STBI_NO_SIMD +#endif + +#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD) +// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET +// +// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the +// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant. +// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not +// simultaneously enabling "-mstackrealign". +// +// See https://github.com/nothings/stb/issues/81 for more information. +// +// So default to no SSE2 on 32-bit MinGW. If you've read this far and added +// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2. +#define STBI_NO_SIMD +#endif + +#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) +#define STBI_SSE2 +#include + +#ifdef _MSC_VER + +#if _MSC_VER >= 1400 // not VC6 +#include // __cpuid +static int stbi__cpuid3(void) +{ + int info[4]; + __cpuid(info,1); + return info[3]; +} +#else +static int stbi__cpuid3(void) +{ + int res; + __asm { + mov eax,1 + cpuid + mov res,edx + } + return res; +} +#endif + +#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name + +static int stbi__sse2_available(void) +{ + int info3 = stbi__cpuid3(); + return ((info3 >> 26) & 1) != 0; +} +#else // assume GCC-style if not VC++ +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) + +static int stbi__sse2_available(void) +{ + // If we're even attempting to compile this on GCC/Clang, that means + // -msse2 is on, which means the compiler is allowed to use SSE2 + // instructions at will, and so are we. + return 1; +} +#endif +#endif + +// ARM NEON +#if defined(STBI_NO_SIMD) && defined(STBI_NEON) +#undef STBI_NEON +#endif + +#ifdef STBI_NEON +#include +// assume GCC or Clang on ARM targets +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) +#endif + +#ifndef STBI_SIMD_ALIGN +#define STBI_SIMD_ALIGN(type, name) type name +#endif + +/////////////////////////////////////////////// +// +// stbi__context struct and start_xxx functions + +// stbi__context structure is our basic context used by all images, so it +// contains all the IO context, plus some basic image information +typedef struct +{ + stbi__uint32 img_x, img_y; + int img_n, img_out_n; + + stbi_io_callbacks io; + void *io_user_data; + + int read_from_callbacks; + int buflen; + stbi_uc buffer_start[128]; + + stbi_uc *img_buffer, *img_buffer_end; + stbi_uc *img_buffer_original, *img_buffer_original_end; +} stbi__context; + + +static void stbi__refill_buffer(stbi__context *s); + +// initialize a memory-decode context +static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len) +{ + s->io.read = NULL; + s->read_from_callbacks = 0; + s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer; + s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len; +} + +// initialize a callback-based context +static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user) +{ + s->io = *c; + s->io_user_data = user; + s->buflen = sizeof(s->buffer_start); + s->read_from_callbacks = 1; + s->img_buffer_original = s->buffer_start; + stbi__refill_buffer(s); + s->img_buffer_original_end = s->img_buffer_end; +} + +#ifndef STBI_NO_STDIO + +static int stbi__stdio_read(void *user, char *data, int size) +{ + return (int) fread(data,1,size,(FILE*) user); +} + +static void stbi__stdio_skip(void *user, int n) +{ + fseek((FILE*) user, n, SEEK_CUR); +} + +static int stbi__stdio_eof(void *user) +{ + return feof((FILE*) user); +} + +static stbi_io_callbacks stbi__stdio_callbacks = +{ + stbi__stdio_read, + stbi__stdio_skip, + stbi__stdio_eof, +}; + +static void stbi__start_file(stbi__context *s, FILE *f) +{ + stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f); +} + +//static void stop_file(stbi__context *s) { } + +#endif // !STBI_NO_STDIO + +static void stbi__rewind(stbi__context *s) +{ + // conceptually rewind SHOULD rewind to the beginning of the stream, + // but we just rewind to the beginning of the initial buffer, because + // we only use it after doing 'test', which only ever looks at at most 92 bytes + s->img_buffer = s->img_buffer_original; + s->img_buffer_end = s->img_buffer_original_end; +} + +enum +{ + STBI_ORDER_RGB, + STBI_ORDER_BGR +}; + +typedef struct +{ + int bits_per_channel; + int num_channels; + int channel_order; +} stbi__result_info; + +#ifndef STBI_NO_JPEG +static int stbi__jpeg_test(stbi__context *s); +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNG +static int stbi__png_test(stbi__context *s); +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__png_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_BMP +static int stbi__bmp_test(stbi__context *s); +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_TGA +static int stbi__tga_test(stbi__context *s); +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s); +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__psd_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_HDR +static int stbi__hdr_test(stbi__context *s); +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_test(stbi__context *s); +static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_GIF +static int stbi__gif_test(stbi__context *s); +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNM +static int stbi__pnm_test(stbi__context *s); +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +// this is not threadsafe +static const char *stbi__g_failure_reason; + +STBIDEF const char *stbi_failure_reason(void) +{ + return stbi__g_failure_reason; +} + +static int stbi__err(const char *str) +{ + stbi__g_failure_reason = str; + return 0; +} + +static void *stbi__malloc(size_t size) +{ + return STBI_MALLOC(size); +} + +// stb_image uses ints pervasively, including for offset calculations. +// therefore the largest decoded image size we can support with the +// current code, even on 64-bit targets, is INT_MAX. this is not a +// significant limitation for the intended use case. +// +// we do, however, need to make sure our size calculations don't +// overflow. hence a few helper functions for size calculations that +// multiply integers together, making sure that they're non-negative +// and no overflow occurs. + +// return 1 if the sum is valid, 0 on overflow. +// negative terms are considered invalid. +static int stbi__addsizes_valid(int a, int b) +{ + if (b < 0) return 0; + // now 0 <= b <= INT_MAX, hence also + // 0 <= INT_MAX - b <= INTMAX. + // And "a + b <= INT_MAX" (which might overflow) is the + // same as a <= INT_MAX - b (no overflow) + return a <= INT_MAX - b; +} + +// returns 1 if the product is valid, 0 on overflow. +// negative factors are considered invalid. +static int stbi__mul2sizes_valid(int a, int b) +{ + if (a < 0 || b < 0) return 0; + if (b == 0) return 1; // mul-by-0 is always safe + // portable way to check for no overflows in a*b + return a <= INT_MAX/b; +} + +// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow +static int stbi__mad2sizes_valid(int a, int b, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add); +} + +// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow +static int stbi__mad3sizes_valid(int a, int b, int c, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__addsizes_valid(a*b*c, add); +} + +// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); +} +#endif + +// mallocs with size overflow checking +static void *stbi__malloc_mad2(int a, int b, int add) +{ + if (!stbi__mad2sizes_valid(a, b, add)) return NULL; + return stbi__malloc(a*b + add); +} + +static void *stbi__malloc_mad3(int a, int b, int c, int add) +{ + if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL; + return stbi__malloc(a*b*c + add); +} + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static void *stbi__malloc_mad4(int a, int b, int c, int d, int add) +{ + if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL; + return stbi__malloc(a*b*c*d + add); +} +#endif + +// stbi__err - error +// stbi__errpf - error returning pointer to float +// stbi__errpuc - error returning pointer to unsigned char + +#ifdef STBI_NO_FAILURE_STRINGS + #define stbi__err(x,y) 0 +#elif defined(STBI_FAILURE_USERMSG) + #define stbi__err(x,y) stbi__err(y) +#else + #define stbi__err(x,y) stbi__err(x) +#endif + +#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL)) +#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL)) + +STBIDEF void stbi_image_free(void *retval_from_stbi_load) +{ + STBI_FREE(retval_from_stbi_load); +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp); +#endif + +#ifndef STBI_NO_HDR +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp); +#endif + +static int stbi__vertically_flip_on_load = 0; + +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) +{ + stbi__vertically_flip_on_load = flag_true_if_should_flip; +} + +static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields + ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed + ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order + ri->num_channels = 0; + + #ifndef STBI_NO_JPEG + if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNG + if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_BMP + if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_GIF + if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PSD + if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc); + #endif + #ifndef STBI_NO_PIC + if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNM + if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri); + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); + return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + + #ifndef STBI_NO_TGA + // test tga last because it's a crappy test! + if (stbi__tga_test(s)) + return stbi__tga_load(s,x,y,comp,req_comp, ri); + #endif + + return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); +} + +static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi_uc *reduced; + + reduced = (stbi_uc *) stbi__malloc(img_len); + if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling + + STBI_FREE(orig); + return reduced; +} + +static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi__uint16 *enlarged; + + enlarged = (stbi__uint16 *) stbi__malloc(img_len*2); + if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff + + STBI_FREE(orig); + return enlarged; +} + +static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel) +{ + int row; + size_t bytes_per_row = (size_t)w * bytes_per_pixel; + stbi_uc temp[2048]; + stbi_uc *bytes = (stbi_uc *)image; + + for (row = 0; row < (h>>1); row++) { + stbi_uc *row0 = bytes + row*bytes_per_row; + stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; + // swap row0 with row1 + size_t bytes_left = bytes_per_row; + while (bytes_left) { + size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); + memcpy(temp, row0, bytes_copy); + memcpy(row0, row1, bytes_copy); + memcpy(row1, temp, bytes_copy); + row0 += bytes_copy; + row1 += bytes_copy; + bytes_left -= bytes_copy; + } + } +} + +static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel) +{ + int slice; + int slice_size = w * h * bytes_per_pixel; + + stbi_uc *bytes = (stbi_uc *)image; + for (slice = 0; slice < z; ++slice) { + stbi__vertical_flip(bytes, w, h, bytes_per_pixel); + bytes += slice_size; + } +} + +static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); + + if (result == NULL) + return NULL; + + if (ri.bits_per_channel != 8) { + STBI_ASSERT(ri.bits_per_channel == 16); + result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 8; + } + + // @TODO: move stbi__convert_format to here + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); + } + + return (unsigned char *) result; +} + +static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); + + if (result == NULL) + return NULL; + + if (ri.bits_per_channel != 16) { + STBI_ASSERT(ri.bits_per_channel == 8); + result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 16; + } + + // @TODO: move stbi__convert_format16 to here + // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); + } + + return (stbi__uint16 *) result; +} + +#if !defined(STBI_NO_HDR) || !defined(STBI_NO_LINEAR) +static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp) +{ + if (stbi__vertically_flip_on_load && result != NULL) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); + } +} +#endif + +#ifndef STBI_NO_STDIO + +static FILE *stbi__fopen(char const *filename, char const *mode) +{ + FILE *f; +#if defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != fopen_s(&f, filename, mode)) + f=0; +#else + f = fopen(filename, mode); +#endif + return f; +} + + +STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + unsigned char *result; + if (!f) return stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__uint16 *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + stbi__uint16 *result; + if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file_16(f,x,y,comp,req_comp); + fclose(f); + return result; +} + + +#endif //!STBI_NO_STDIO + +STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_mem(&s,buffer,len); + + result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp); + if (stbi__vertically_flip_on_load) { + stbi__vertical_flip_slices( result, *x, *y, *z, *comp ); + } + + return result; +} +#endif + +#ifndef STBI_NO_LINEAR +static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + stbi__result_info ri; + float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); + if (hdr_data) + stbi__float_postprocess(hdr_data,x,y,comp,req_comp); + return hdr_data; + } + #endif + data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp); + if (data) + return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); +} + +STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + float *result; + FILE *f = stbi__fopen(filename, "rb"); + if (!f) return stbi__errpf("can't fopen", "Unable to open file"); + result = stbi_loadf_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_file(&s,f); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} +#endif // !STBI_NO_STDIO + +#endif // !STBI_NO_LINEAR + +// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is +// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always +// reports false! + +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(buffer); + STBI_NOTUSED(len); + return 0; + #endif +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result=0; + if (f) { + result = stbi_is_hdr_from_file(f); + fclose(f); + } + return result; +} + +STBIDEF int stbi_is_hdr_from_file(FILE *f) +{ + #ifndef STBI_NO_HDR + long pos = ftell(f); + int res; + stbi__context s; + stbi__start_file(&s,f); + res = stbi__hdr_test(&s); + fseek(f, pos, SEEK_SET); + return res; + #else + STBI_NOTUSED(f); + return 0; + #endif +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(clbk); + STBI_NOTUSED(user); + return 0; + #endif +} + +#ifndef STBI_NO_LINEAR +static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f; + +STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; } +STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; } +#endif + +static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f; + +STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; } +STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; } + + +////////////////////////////////////////////////////////////////////////////// +// +// Common code used by all image loaders +// + +enum +{ + STBI__SCAN_load=0, + STBI__SCAN_type, + STBI__SCAN_header +}; + +static void stbi__refill_buffer(stbi__context *s) +{ + int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen); + if (n == 0) { + // at end of file, treat same as if from memory, but need to handle case + // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file + s->read_from_callbacks = 0; + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start+1; + *s->img_buffer = 0; + } else { + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start + n; + } +} + +stbi_inline static stbi_uc stbi__get8(stbi__context *s) +{ + if (s->img_buffer < s->img_buffer_end) + return *s->img_buffer++; + if (s->read_from_callbacks) { + stbi__refill_buffer(s); + return *s->img_buffer++; + } + return 0; +} + +stbi_inline static int stbi__at_eof(stbi__context *s) +{ + if (s->io.read) { + if (!(s->io.eof)(s->io_user_data)) return 0; + // if feof() is true, check if buffer = end + // special case: we've only got the special 0 character at the end + if (s->read_from_callbacks == 0) return 1; + } + + return s->img_buffer >= s->img_buffer_end; +} + +static void stbi__skip(stbi__context *s, int n) +{ + if (n < 0) { + s->img_buffer = s->img_buffer_end; + return; + } + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + s->img_buffer = s->img_buffer_end; + (s->io.skip)(s->io_user_data, n - blen); + return; + } + } + s->img_buffer += n; +} + +static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) +{ + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + int res, count; + + memcpy(buffer, s->img_buffer, blen); + + count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen); + res = (count == (n-blen)); + s->img_buffer = s->img_buffer_end; + return res; + } + } + + if (s->img_buffer+n <= s->img_buffer_end) { + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; + return 1; + } else + return 0; +} + +static int stbi__get16be(stbi__context *s) +{ + int z = stbi__get8(s); + return (z << 8) + stbi__get8(s); +} + +static stbi__uint32 stbi__get32be(stbi__context *s) +{ + stbi__uint32 z = stbi__get16be(s); + return (z << 16) + stbi__get16be(s); +} + +#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) +// nothing +#else +static int stbi__get16le(stbi__context *s) +{ + int z = stbi__get8(s); + return z + (stbi__get8(s) << 8); +} +#endif + +#ifndef STBI_NO_BMP +static stbi__uint32 stbi__get32le(stbi__context *s) +{ + stbi__uint32 z = stbi__get16le(s); + return z + (stbi__get16le(s) << 16); +} +#endif + +#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings + + +////////////////////////////////////////////////////////////////////////////// +// +// generic converter from built-in img_n to req_comp +// individual types do this automatically as much as possible (e.g. jpeg +// does all cases internally since it needs to colorspace convert anyway, +// and it never has alpha, so very few cases ). png can automatically +// interleave an alpha=255 channel, but falls back to this for other cases +// +// assume data buffer is malloced, so malloc a new one and free that one +// only failure mode is malloc failing + +static stbi_uc stbi__compute_y(int r, int g, int b) +{ + return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + unsigned char *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0); + if (good == NULL) { + STBI_FREE(data); + return stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; + default: STBI_ASSERT(0); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} + +static stbi__uint16 stbi__compute_y_16(int r, int g, int b) +{ + return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + stbi__uint16 *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2); + if (good == NULL) { + STBI_FREE(data); + return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + stbi__uint16 *src = data + j * x * img_n ; + stbi__uint16 *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; + default: STBI_ASSERT(0); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) +{ + int i,k,n; + float *output; + if (!data) return NULL; + output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); + } + if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f; + } + STBI_FREE(data); + return output; +} +#endif + +#ifndef STBI_NO_HDR +#define stbi__float2int(x) ((int) (x)) +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) +{ + int i,k,n; + stbi_uc *output; + if (!data) return NULL; + output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + if (k < comp) { + float z = data[i*comp+k] * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + } + STBI_FREE(data); + return output; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// "baseline" JPEG/JFIF decoder +// +// simple implementation +// - doesn't support delayed output of y-dimension +// - simple interface (only one output format: 8-bit interleaved RGB) +// - doesn't try to recover corrupt jpegs +// - doesn't allow partial loading, loading multiple at once +// - still fast on x86 (copying globals into locals doesn't help x86) +// - allocates lots of intermediate memory (full size of all components) +// - non-interleaved case requires this anyway +// - allows good upsampling (see next) +// high-quality +// - upsampled channels are bilinearly interpolated, even across blocks +// - quality integer IDCT derived from IJG's 'slow' +// performance +// - fast huffman; reasonable integer IDCT +// - some SIMD kernels for common paths on targets with SSE2/NEON +// - uses a lot of intermediate memory, could cache poorly + +#ifndef STBI_NO_JPEG + +// huffman decoding acceleration +#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache + +typedef struct +{ + stbi_uc fast[1 << FAST_BITS]; + // weirdly, repacking this into AoS is a 10% speed loss, instead of a win + stbi__uint16 code[256]; + stbi_uc values[256]; + stbi_uc size[257]; + unsigned int maxcode[18]; + int delta[17]; // old 'firstsymbol' - old 'firstcode' +} stbi__huffman; + +typedef struct +{ + stbi__context *s; + stbi__huffman huff_dc[4]; + stbi__huffman huff_ac[4]; + stbi__uint16 dequant[4][64]; + stbi__int16 fast_ac[4][1 << FAST_BITS]; + +// sizes for components, interleaved MCUs + int img_h_max, img_v_max; + int img_mcu_x, img_mcu_y; + int img_mcu_w, img_mcu_h; + +// definition of jpeg image component + struct + { + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + stbi_uc *data; + void *raw_data, *raw_coeff; + stbi_uc *linebuf; + short *coeff; // progressive only + int coeff_w, coeff_h; // number of 8x8 coefficient blocks + } img_comp[4]; + + stbi__uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int progressive; + int spec_start; + int spec_end; + int succ_high; + int succ_low; + int eob_run; + int jfif; + int app14_color_transform; // Adobe APP14 tag + int rgb; + + int scan_n, order[4]; + int restart_interval, todo; + +// kernels + void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]); + void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step); + stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs); +} stbi__jpeg; + +static int stbi__build_huffman(stbi__huffman *h, int *count) +{ + int i,j,k=0; + unsigned int code; + // build size list for each symbol (from JPEG spec) + for (i=0; i < 16; ++i) + for (j=0; j < count[i]; ++j) + h->size[k++] = (stbi_uc) (i+1); + h->size[k] = 0; + + // compute actual symbols (from jpeg spec) + code = 0; + k = 0; + for(j=1; j <= 16; ++j) { + // compute delta to add to code to compute symbol id + h->delta[j] = k - code; + if (h->size[k] == j) { + while (h->size[k] == j) + h->code[k++] = (stbi__uint16) (code++); + if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG"); + } + // compute largest code + 1 for this size, preshifted as needed later + h->maxcode[j] = code << (16-j); + code <<= 1; + } + h->maxcode[j] = 0xffffffff; + + // build non-spec acceleration table; 255 is flag for not-accelerated + memset(h->fast, 255, 1 << FAST_BITS); + for (i=0; i < k; ++i) { + int s = h->size[i]; + if (s <= FAST_BITS) { + int c = h->code[i] << (FAST_BITS-s); + int m = 1 << (FAST_BITS-s); + for (j=0; j < m; ++j) { + h->fast[c+j] = (stbi_uc) i; + } + } + } + return 1; +} + +// build a table that decodes both magnitude and value of small ACs in +// one go. +static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) +{ + int i; + for (i=0; i < (1 << FAST_BITS); ++i) { + stbi_uc fast = h->fast[i]; + fast_ac[i] = 0; + if (fast < 255) { + int rs = h->values[fast]; + int run = (rs >> 4) & 15; + int magbits = rs & 15; + int len = h->size[fast]; + + if (magbits && len + magbits <= FAST_BITS) { + // magnitude code followed by receive_extend code + int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); + int m = 1 << (magbits - 1); + if (k < m) k += (~0U << magbits) + 1; + // if the result is small enough, we can fit it in fast_ac table + if (k >= -128 && k <= 127) + fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits)); + } + } + } +} + +static void stbi__grow_buffer_unsafe(stbi__jpeg *j) +{ + do { + unsigned int b = j->nomore ? 0 : stbi__get8(j->s); + if (b == 0xff) { + int c = stbi__get8(j->s); + while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes + if (c != 0) { + j->marker = (unsigned char) c; + j->nomore = 1; + return; + } + } + j->code_buffer |= b << (24 - j->code_bits); + j->code_bits += 8; + } while (j->code_bits <= 24); +} + +// (1 << n) - 1 +static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; + +// decode a jpeg huffman value from the bitstream +stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) +{ + unsigned int temp; + int c,k; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + // look at the top FAST_BITS and determine what symbol ID it is, + // if the code is <= FAST_BITS + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + k = h->fast[c]; + if (k < 255) { + int s = h->size[k]; + if (s > j->code_bits) + return -1; + j->code_buffer <<= s; + j->code_bits -= s; + return h->values[k]; + } + + // naive test is to shift the code_buffer down so k bits are + // valid, then test against maxcode. To speed this up, we've + // preshifted maxcode left so that it has (16-k) 0s at the + // end; in other words, regardless of the number of bits, it + // wants to be compared against something shifted to have 16; + // that way we don't need to shift inside the loop. + temp = j->code_buffer >> 16; + for (k=FAST_BITS+1 ; ; ++k) + if (temp < h->maxcode[k]) + break; + if (k == 17) { + // error! code not found + j->code_bits -= 16; + return -1; + } + + if (k > j->code_bits) + return -1; + + // convert the huffman code to the symbol id + c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; + STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]); + + // convert the id to a symbol + j->code_bits -= k; + j->code_buffer <<= k; + return h->values[c]; +} + +// bias[n] = (-1<code_bits < n) stbi__grow_buffer_unsafe(j); + + sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB + k = stbi_lrot(j->code_buffer, n); + STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask))); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k + (stbi__jbias[n] & ~sgn); +} + +// get some unsigned bits +stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n) +{ + unsigned int k; + if (j->code_bits < n) stbi__grow_buffer_unsafe(j); + k = stbi_lrot(j->code_buffer, n); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k; +} + +stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) +{ + unsigned int k; + if (j->code_bits < 1) stbi__grow_buffer_unsafe(j); + k = j->code_buffer; + j->code_buffer <<= 1; + --j->code_bits; + return k & 0x80000000; +} + +// given a value that's at position X in the zigzag stream, +// where does it appear in the 8x8 matrix coded as row-major? +static const stbi_uc stbi__jpeg_dezigzag[64+15] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // let corrupt input sample past end + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63 +}; + +// decode one 64-entry block-- +static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant) +{ + int diff,dc,k; + int t; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + t = stbi__jpeg_huff_decode(j, hdc); + if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + + // 0 all the ac values now so we can do it 32-bits at a time + memset(data,0,64*sizeof(data[0])); + + diff = t ? stbi__extend_receive(j, t) : 0; + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc * dequant[0]); + + // decode AC components, see JPEG spec + k = 1; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) * dequant[zig]); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (rs != 0xf0) break; // end block + k += 16; + } else { + k += r; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]); + } + } + } while (k < 64); + return 1; +} + +static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b) +{ + int diff,dc; + int t; + if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + if (j->succ_high == 0) { + // first scan for DC coefficient, must be first + memset(data,0,64*sizeof(data[0])); // 0 all the ac values now + t = stbi__jpeg_huff_decode(j, hdc); + diff = t ? stbi__extend_receive(j, t) : 0; + + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc << j->succ_low); + } else { + // refinement scan for DC coefficient + if (stbi__jpeg_get_bit(j)) + data[0] += (short) (1 << j->succ_low); + } + return 1; +} + +// @OPTIMIZE: store non-zigzagged during the decode passes, +// and only de-zigzag when dequantizing +static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac) +{ + int k; + if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->succ_high == 0) { + int shift = j->succ_low; + + if (j->eob_run) { + --j->eob_run; + return 1; + } + + k = j->spec_start; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) << shift); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r); + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + --j->eob_run; + break; + } + k += 16; + } else { + k += r; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) << shift); + } + } + } while (k <= j->spec_end); + } else { + // refinement scan for these AC coefficients + + short bit = (short) (1 << j->succ_low); + + if (j->eob_run) { + --j->eob_run; + for (k = j->spec_start; k <= j->spec_end; ++k) { + short *p = &data[stbi__jpeg_dezigzag[k]]; + if (*p != 0) + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } + } else { + k = j->spec_start; + do { + int r,s; + int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r) - 1; + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + r = 64; // force end of block + } else { + // r=15 s=0 should write 16 0s, so we just do + // a run of 15 0s and then write s (which is 0), + // so we don't have to do anything special here + } + } else { + if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG"); + // sign bit + if (stbi__jpeg_get_bit(j)) + s = bit; + else + s = -bit; + } + + // advance by r + while (k <= j->spec_end) { + short *p = &data[stbi__jpeg_dezigzag[k++]]; + if (*p != 0) { + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } else { + if (r == 0) { + *p = (short) s; + break; + } + --r; + } + } + } while (k <= j->spec_end); + } + } + return 1; +} + +// take a -128..127 value and stbi__clamp it and convert to 0..255 +stbi_inline static stbi_uc stbi__clamp(int x) +{ + // trick to use a single test to catch both cases + if ((unsigned int) x > 255) { + if (x < 0) return 0; + if (x > 255) return 255; + } + return (stbi_uc) x; +} + +#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5))) +#define stbi__fsh(x) ((x) * 4096) + +// derived from jidctint -- DCT_ISLOW +#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ + int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2+p3) * stbi__f2f(0.5411961f); \ + t2 = p1 + p3*stbi__f2f(-1.847759065f); \ + t3 = p1 + p2*stbi__f2f( 0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = stbi__fsh(p2+p3); \ + t1 = stbi__fsh(p2-p3); \ + x0 = t0+t3; \ + x3 = t0-t3; \ + x1 = t1+t2; \ + x2 = t1-t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0+t2; \ + p4 = t1+t3; \ + p1 = t0+t3; \ + p2 = t1+t2; \ + p5 = (p3+p4)*stbi__f2f( 1.175875602f); \ + t0 = t0*stbi__f2f( 0.298631336f); \ + t1 = t1*stbi__f2f( 2.053119869f); \ + t2 = t2*stbi__f2f( 3.072711026f); \ + t3 = t3*stbi__f2f( 1.501321110f); \ + p1 = p5 + p1*stbi__f2f(-0.899976223f); \ + p2 = p5 + p2*stbi__f2f(-2.562915447f); \ + p3 = p3*stbi__f2f(-1.961570560f); \ + p4 = p4*stbi__f2f(-0.390180644f); \ + t3 += p1+p4; \ + t2 += p2+p3; \ + t1 += p2+p4; \ + t0 += p1+p3; + +static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) +{ + int i,val[64],*v=val; + stbi_uc *o; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0]*4; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + x0 += 65536 + (128<<17); + x1 += 65536 + (128<<17); + x2 += 65536 + (128<<17); + x3 += 65536 + (128<<17); + // tried computing the shifts into temps, or'ing the temps to see + // if any were out of range, but that was slower + o[0] = stbi__clamp((x0+t3) >> 17); + o[7] = stbi__clamp((x0-t3) >> 17); + o[1] = stbi__clamp((x1+t2) >> 17); + o[6] = stbi__clamp((x1-t2) >> 17); + o[2] = stbi__clamp((x2+t1) >> 17); + o[5] = stbi__clamp((x2-t1) >> 17); + o[3] = stbi__clamp((x3+t0) >> 17); + o[4] = stbi__clamp((x3-t0) >> 17); + } +} + +#ifdef STBI_SSE2 +// sse2 integer IDCT. not the fastest possible implementation but it +// produces bit-identical results to the generic C version so it's +// fully "transparent". +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + // This is constructed to match our regular (generic) integer IDCT exactly. + __m128i row0, row1, row2, row3, row4, row5, row6, row7; + __m128i tmp; + + // dot product constant: even elems=x, odd elems=y + #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y)) + + // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit) + // out(1) = c1[even]*x + c1[odd]*y + #define dct_rot(out0,out1, x,y,c0,c1) \ + __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \ + __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \ + __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ + __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ + __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ + __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) + + // out = in << 12 (in 16-bit, out 32-bit) + #define dct_widen(out, in) \ + __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ + __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) + + // wide add + #define dct_wadd(out, a, b) \ + __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_add_epi32(a##_h, b##_h) + + // wide sub + #define dct_wsub(out, a, b) \ + __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) + + // butterfly a/b, add bias, then shift by "s" and pack + #define dct_bfly32o(out0, out1, a,b,bias,s) \ + { \ + __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ + __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ + dct_wadd(sum, abiased, b); \ + dct_wsub(dif, abiased, b); \ + out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ + out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ + } + + // 8-bit interleave step (for transposes) + #define dct_interleave8(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi8(a, b); \ + b = _mm_unpackhi_epi8(tmp, b) + + // 16-bit interleave step (for transposes) + #define dct_interleave16(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi16(a, b); \ + b = _mm_unpackhi_epi16(tmp, b) + + #define dct_pass(bias,shift) \ + { \ + /* even part */ \ + dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \ + __m128i sum04 = _mm_add_epi16(row0, row4); \ + __m128i dif04 = _mm_sub_epi16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ + dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \ + __m128i sum17 = _mm_add_epi16(row1, row7); \ + __m128i sum35 = _mm_add_epi16(row3, row5); \ + dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \ + dct_wadd(x4, y0o, y4o); \ + dct_wadd(x5, y1o, y5o); \ + dct_wadd(x6, y2o, y5o); \ + dct_wadd(x7, y3o, y4o); \ + dct_bfly32o(row0,row7, x0,x7,bias,shift); \ + dct_bfly32o(row1,row6, x1,x6,bias,shift); \ + dct_bfly32o(row2,row5, x2,x5,bias,shift); \ + dct_bfly32o(row3,row4, x3,x4,bias,shift); \ + } + + __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); + __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f)); + __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f)); + __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f)); + __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f)); + __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f)); + __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f)); + __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f)); + + // rounding biases in column/row passes, see stbi__idct_block for explanation. + __m128i bias_0 = _mm_set1_epi32(512); + __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17)); + + // load + row0 = _mm_load_si128((const __m128i *) (data + 0*8)); + row1 = _mm_load_si128((const __m128i *) (data + 1*8)); + row2 = _mm_load_si128((const __m128i *) (data + 2*8)); + row3 = _mm_load_si128((const __m128i *) (data + 3*8)); + row4 = _mm_load_si128((const __m128i *) (data + 4*8)); + row5 = _mm_load_si128((const __m128i *) (data + 5*8)); + row6 = _mm_load_si128((const __m128i *) (data + 6*8)); + row7 = _mm_load_si128((const __m128i *) (data + 7*8)); + + // column pass + dct_pass(bias_0, 10); + + { + // 16bit 8x8 transpose pass 1 + dct_interleave16(row0, row4); + dct_interleave16(row1, row5); + dct_interleave16(row2, row6); + dct_interleave16(row3, row7); + + // transpose pass 2 + dct_interleave16(row0, row2); + dct_interleave16(row1, row3); + dct_interleave16(row4, row6); + dct_interleave16(row5, row7); + + // transpose pass 3 + dct_interleave16(row0, row1); + dct_interleave16(row2, row3); + dct_interleave16(row4, row5); + dct_interleave16(row6, row7); + } + + // row pass + dct_pass(bias_1, 17); + + { + // pack + __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 + __m128i p1 = _mm_packus_epi16(row2, row3); + __m128i p2 = _mm_packus_epi16(row4, row5); + __m128i p3 = _mm_packus_epi16(row6, row7); + + // 8bit 8x8 transpose pass 1 + dct_interleave8(p0, p2); // a0e0a1e1... + dct_interleave8(p1, p3); // c0g0c1g1... + + // transpose pass 2 + dct_interleave8(p0, p1); // a0c0e0g0... + dct_interleave8(p2, p3); // b0d0f0h0... + + // transpose pass 3 + dct_interleave8(p0, p2); // a0b0c0d0... + dct_interleave8(p1, p3); // a4b4c4d4... + + // store + _mm_storel_epi64((__m128i *) out, p0); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p2); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p1); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p3); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e)); + } + +#undef dct_const +#undef dct_rot +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_interleave8 +#undef dct_interleave16 +#undef dct_pass +} + +#endif // STBI_SSE2 + +#ifdef STBI_NEON + +// NEON integer IDCT. should produce bit-identical +// results to the generic C version. +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + int16x8_t row0, row1, row2, row3, row4, row5, row6, row7; + + int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f)); + int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f)); + int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f)); + int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f)); + int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f)); + int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f)); + int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f)); + int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f)); + int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f)); + int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f)); + int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f)); + int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f)); + +#define dct_long_mul(out, inq, coeff) \ + int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff) + +#define dct_long_mac(out, acc, inq, coeff) \ + int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff) + +#define dct_widen(out, inq) \ + int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \ + int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12) + +// wide add +#define dct_wadd(out, a, b) \ + int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vaddq_s32(a##_h, b##_h) + +// wide sub +#define dct_wsub(out, a, b) \ + int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vsubq_s32(a##_h, b##_h) + +// butterfly a/b, then shift using "shiftop" by "s" and pack +#define dct_bfly32o(out0,out1, a,b,shiftop,s) \ + { \ + dct_wadd(sum, a, b); \ + dct_wsub(dif, a, b); \ + out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ + out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ + } + +#define dct_pass(shiftop, shift) \ + { \ + /* even part */ \ + int16x8_t sum26 = vaddq_s16(row2, row6); \ + dct_long_mul(p1e, sum26, rot0_0); \ + dct_long_mac(t2e, p1e, row6, rot0_1); \ + dct_long_mac(t3e, p1e, row2, rot0_2); \ + int16x8_t sum04 = vaddq_s16(row0, row4); \ + int16x8_t dif04 = vsubq_s16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + int16x8_t sum15 = vaddq_s16(row1, row5); \ + int16x8_t sum17 = vaddq_s16(row1, row7); \ + int16x8_t sum35 = vaddq_s16(row3, row5); \ + int16x8_t sum37 = vaddq_s16(row3, row7); \ + int16x8_t sumodd = vaddq_s16(sum17, sum35); \ + dct_long_mul(p5o, sumodd, rot1_0); \ + dct_long_mac(p1o, p5o, sum17, rot1_1); \ + dct_long_mac(p2o, p5o, sum35, rot1_2); \ + dct_long_mul(p3o, sum37, rot2_0); \ + dct_long_mul(p4o, sum15, rot2_1); \ + dct_wadd(sump13o, p1o, p3o); \ + dct_wadd(sump24o, p2o, p4o); \ + dct_wadd(sump23o, p2o, p3o); \ + dct_wadd(sump14o, p1o, p4o); \ + dct_long_mac(x4, sump13o, row7, rot3_0); \ + dct_long_mac(x5, sump24o, row5, rot3_1); \ + dct_long_mac(x6, sump23o, row3, rot3_2); \ + dct_long_mac(x7, sump14o, row1, rot3_3); \ + dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \ + dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \ + dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \ + dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \ + } + + // load + row0 = vld1q_s16(data + 0*8); + row1 = vld1q_s16(data + 1*8); + row2 = vld1q_s16(data + 2*8); + row3 = vld1q_s16(data + 3*8); + row4 = vld1q_s16(data + 4*8); + row5 = vld1q_s16(data + 5*8); + row6 = vld1q_s16(data + 6*8); + row7 = vld1q_s16(data + 7*8); + + // add DC bias + row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0)); + + // column pass + dct_pass(vrshrn_n_s32, 10); + + // 16bit 8x8 transpose + { +// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively. +// whether compilers actually get this is another story, sadly. +#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); } +#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); } + + // pass 1 + dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 + dct_trn16(row2, row3); + dct_trn16(row4, row5); + dct_trn16(row6, row7); + + // pass 2 + dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 + dct_trn32(row1, row3); + dct_trn32(row4, row6); + dct_trn32(row5, row7); + + // pass 3 + dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 + dct_trn64(row1, row5); + dct_trn64(row2, row6); + dct_trn64(row3, row7); + +#undef dct_trn16 +#undef dct_trn32 +#undef dct_trn64 + } + + // row pass + // vrshrn_n_s32 only supports shifts up to 16, we need + // 17. so do a non-rounding shift of 16 first then follow + // up with a rounding shift by 1. + dct_pass(vshrn_n_s32, 16); + + { + // pack and round + uint8x8_t p0 = vqrshrun_n_s16(row0, 1); + uint8x8_t p1 = vqrshrun_n_s16(row1, 1); + uint8x8_t p2 = vqrshrun_n_s16(row2, 1); + uint8x8_t p3 = vqrshrun_n_s16(row3, 1); + uint8x8_t p4 = vqrshrun_n_s16(row4, 1); + uint8x8_t p5 = vqrshrun_n_s16(row5, 1); + uint8x8_t p6 = vqrshrun_n_s16(row6, 1); + uint8x8_t p7 = vqrshrun_n_s16(row7, 1); + + // again, these can translate into one instruction, but often don't. +#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); } +#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); } + + // sadly can't use interleaved stores here since we only write + // 8 bytes to each scan line! + + // 8x8 8-bit transpose pass 1 + dct_trn8_8(p0, p1); + dct_trn8_8(p2, p3); + dct_trn8_8(p4, p5); + dct_trn8_8(p6, p7); + + // pass 2 + dct_trn8_16(p0, p2); + dct_trn8_16(p1, p3); + dct_trn8_16(p4, p6); + dct_trn8_16(p5, p7); + + // pass 3 + dct_trn8_32(p0, p4); + dct_trn8_32(p1, p5); + dct_trn8_32(p2, p6); + dct_trn8_32(p3, p7); + + // store + vst1_u8(out, p0); out += out_stride; + vst1_u8(out, p1); out += out_stride; + vst1_u8(out, p2); out += out_stride; + vst1_u8(out, p3); out += out_stride; + vst1_u8(out, p4); out += out_stride; + vst1_u8(out, p5); out += out_stride; + vst1_u8(out, p6); out += out_stride; + vst1_u8(out, p7); + +#undef dct_trn8_8 +#undef dct_trn8_16 +#undef dct_trn8_32 + } + +#undef dct_long_mul +#undef dct_long_mac +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_pass +} + +#endif // STBI_NEON + +#define STBI__MARKER_none 0xff +// if there's a pending marker from the entropy stream, return that +// otherwise, fetch from the stream and get a marker. if there's no +// marker, return 0xff, which is never a valid marker value +static stbi_uc stbi__get_marker(stbi__jpeg *j) +{ + stbi_uc x; + if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; } + x = stbi__get8(j->s); + if (x != 0xff) return STBI__MARKER_none; + while (x == 0xff) + x = stbi__get8(j->s); // consume repeated 0xff fill bytes + return x; +} + +// in each scan, we'll have scan_n components, and the order +// of the components is specified by order[] +#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) + +// after a restart interval, stbi__jpeg_reset the entropy decoder and +// the dc prediction +static void stbi__jpeg_reset(stbi__jpeg *j) +{ + j->code_bits = 0; + j->code_buffer = 0; + j->nomore = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0; + j->marker = STBI__MARKER_none; + j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; + j->eob_run = 0; + // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, + // since we don't even allow 1<<30 pixels +} + +static int stbi__parse_entropy_coded_data(stbi__jpeg *z) +{ + stbi__jpeg_reset(z); + if (!z->progressive) { + if (z->scan_n == 1) { + int i,j; + STBI_SIMD_ALIGN(short, data[64]); + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + STBI_SIMD_ALIGN(short, data[64]); + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data); + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } else { + if (z->scan_n == 1) { + int i,j; + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + if (z->spec_start == 0) { + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } else { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha])) + return 0; + } + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x); + int y2 = (j*z->img_comp[n].v + y); + short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w); + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } +} + +static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant) +{ + int i; + for (i=0; i < 64; ++i) + data[i] *= dequant[i]; +} + +static void stbi__jpeg_finish(stbi__jpeg *z) +{ + if (z->progressive) { + // dequantize and idct the data + int i,j,n; + for (n=0; n < z->s->img_n; ++n) { + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + } + } + } + } +} + +static int stbi__process_marker(stbi__jpeg *z, int m) +{ + int L; + switch (m) { + case STBI__MARKER_none: // no marker found + return stbi__err("expected marker","Corrupt JPEG"); + + case 0xDD: // DRI - specify restart interval + if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG"); + z->restart_interval = stbi__get16be(z->s); + return 1; + + case 0xDB: // DQT - define quantization table + L = stbi__get16be(z->s)-2; + while (L > 0) { + int q = stbi__get8(z->s); + int p = q >> 4, sixteen = (p != 0); + int t = q & 15,i; + if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); + if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); + + for (i=0; i < 64; ++i) + z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); + L -= (sixteen ? 129 : 65); + } + return L==0; + + case 0xC4: // DHT - define huffman table + L = stbi__get16be(z->s)-2; + while (L > 0) { + stbi_uc *v; + int sizes[16],i,n=0; + int q = stbi__get8(z->s); + int tc = q >> 4; + int th = q & 15; + if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG"); + for (i=0; i < 16; ++i) { + sizes[i] = stbi__get8(z->s); + n += sizes[i]; + } + L -= 17; + if (tc == 0) { + if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; + } else { + if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; + } + for (i=0; i < n; ++i) + v[i] = stbi__get8(z->s); + if (tc != 0) + stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th); + L -= n; + } + return L==0; + } + + // check for comment block or APP blocks + if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { + L = stbi__get16be(z->s); + if (L < 2) { + if (m == 0xFE) + return stbi__err("bad COM len","Corrupt JPEG"); + else + return stbi__err("bad APP len","Corrupt JPEG"); + } + L -= 2; + + if (m == 0xE0 && L >= 5) { // JFIF APP0 segment + static const unsigned char tag[5] = {'J','F','I','F','\0'}; + int ok = 1; + int i; + for (i=0; i < 5; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 5; + if (ok) + z->jfif = 1; + } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment + static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; + int ok = 1; + int i; + for (i=0; i < 6; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 6; + if (ok) { + stbi__get8(z->s); // version + stbi__get16be(z->s); // flags0 + stbi__get16be(z->s); // flags1 + z->app14_color_transform = stbi__get8(z->s); // color transform + L -= 6; + } + } + + stbi__skip(z->s, L); + return 1; + } + + return stbi__err("unknown marker","Corrupt JPEG"); +} + +// after we see SOS +static int stbi__process_scan_header(stbi__jpeg *z) +{ + int i; + int Ls = stbi__get16be(z->s); + z->scan_n = stbi__get8(z->s); + if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG"); + if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG"); + for (i=0; i < z->scan_n; ++i) { + int id = stbi__get8(z->s), which; + int q = stbi__get8(z->s); + for (which = 0; which < z->s->img_n; ++which) + if (z->img_comp[which].id == id) + break; + if (which == z->s->img_n) return 0; // no match + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG"); + z->order[i] = which; + } + + { + int aa; + z->spec_start = stbi__get8(z->s); + z->spec_end = stbi__get8(z->s); // should be 63, but might be 0 + aa = stbi__get8(z->s); + z->succ_high = (aa >> 4); + z->succ_low = (aa & 15); + if (z->progressive) { + if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13) + return stbi__err("bad SOS", "Corrupt JPEG"); + } else { + if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG"); + if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG"); + z->spec_end = 63; + } + } + + return 1; +} + +static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why) +{ + int i; + for (i=0; i < ncomp; ++i) { + if (z->img_comp[i].raw_data) { + STBI_FREE(z->img_comp[i].raw_data); + z->img_comp[i].raw_data = NULL; + z->img_comp[i].data = NULL; + } + if (z->img_comp[i].raw_coeff) { + STBI_FREE(z->img_comp[i].raw_coeff); + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].coeff = 0; + } + if (z->img_comp[i].linebuf) { + STBI_FREE(z->img_comp[i].linebuf); + z->img_comp[i].linebuf = NULL; + } + } + return why; +} + +static int stbi__process_frame_header(stbi__jpeg *z, int scan) +{ + stbi__context *s = z->s; + int Lf,p,i,q, h_max=1,v_max=1,c; + Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG + p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires + c = stbi__get8(s); + if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG"); + s->img_n = c; + for (i=0; i < c; ++i) { + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; + } + + if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG"); + + z->rgb = 0; + for (i=0; i < s->img_n; ++i) { + static const unsigned char rgb[3] = { 'R', 'G', 'B' }; + z->img_comp[i].id = stbi__get8(s); + if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) + ++z->rgb; + q = stbi__get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); + z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG"); + } + + if (scan != STBI__SCAN_load) return 1; + + if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode"); + + for (i=0; i < s->img_n; ++i) { + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; + } + + // compute interleaved mcu info + z->img_h_max = h_max; + z->img_v_max = v_max; + z->img_mcu_w = h_max * 8; + z->img_mcu_h = v_max * 8; + // these sizes can't be more than 17 bits + z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; + z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; + + for (i=0; i < s->img_n; ++i) { + // number of effective pixels (e.g. for non-interleaved MCU) + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; + // to simplify generation, we'll allocate enough memory to decode + // the bogus oversized data from using interleaved MCUs and their + // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't + // discard the extra data until colorspace conversion + // + // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) + // so these muls can't overflow with 32-bit ints (which we require) + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].coeff = 0; + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].linebuf = NULL; + z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); + if (z->img_comp[i].raw_data == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + // align blocks for idct using mmx/sse + z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + if (z->progressive) { + // w2, h2 are multiples of 8 (see above) + z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; + z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; + z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); + if (z->img_comp[i].raw_coeff == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); + } + } + + return 1; +} + +// use comparisons since in some cases we handle more than one case (e.g. SOF) +#define stbi__DNL(x) ((x) == 0xdc) +#define stbi__SOI(x) ((x) == 0xd8) +#define stbi__EOI(x) ((x) == 0xd9) +#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2) +#define stbi__SOS(x) ((x) == 0xda) + +#define stbi__SOF_progressive(x) ((x) == 0xc2) + +static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) +{ + int m; + z->jfif = 0; + z->app14_color_transform = -1; // valid values are 0,1,2 + z->marker = STBI__MARKER_none; // initialize cached marker to empty + m = stbi__get_marker(z); + if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG"); + if (scan == STBI__SCAN_type) return 1; + m = stbi__get_marker(z); + while (!stbi__SOF(m)) { + if (!stbi__process_marker(z,m)) return 0; + m = stbi__get_marker(z); + while (m == STBI__MARKER_none) { + // some files have extra padding after their blocks, so ok, we'll scan + if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG"); + m = stbi__get_marker(z); + } + } + z->progressive = stbi__SOF_progressive(m); + if (!stbi__process_frame_header(z, scan)) return 0; + return 1; +} + +// decode image to YCbCr format +static int stbi__decode_jpeg_image(stbi__jpeg *j) +{ + int m; + for (m = 0; m < 4; m++) { + j->img_comp[m].raw_data = NULL; + j->img_comp[m].raw_coeff = NULL; + } + j->restart_interval = 0; + if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0; + m = stbi__get_marker(j); + while (!stbi__EOI(m)) { + if (stbi__SOS(m)) { + if (!stbi__process_scan_header(j)) return 0; + if (!stbi__parse_entropy_coded_data(j)) return 0; + if (j->marker == STBI__MARKER_none ) { + // handle 0s at the end of image data from IP Kamera 9060 + while (!stbi__at_eof(j->s)) { + int x = stbi__get8(j->s); + if (x == 255) { + j->marker = stbi__get8(j->s); + break; + } + } + // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 + } + } else if (stbi__DNL(m)) { + int Ld = stbi__get16be(j->s); + stbi__uint32 NL = stbi__get16be(j->s); + if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); + if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); + } else { + if (!stbi__process_marker(j, m)) return 0; + } + m = stbi__get_marker(j); + } + if (j->progressive) + stbi__jpeg_finish(j); + return 1; +} + +// static jfif-centered resampling (across block boundaries) + +typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1, + int w, int hs); + +#define stbi__div4(x) ((stbi_uc) ((x) >> 2)) + +static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + STBI_NOTUSED(out); + STBI_NOTUSED(in_far); + STBI_NOTUSED(w); + STBI_NOTUSED(hs); + return in_near; +} + +static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples vertically for every one in input + int i; + STBI_NOTUSED(hs); + for (i=0; i < w; ++i) + out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2); + return out; +} + +static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples horizontally for every one in input + int i; + stbi_uc *input = in_near; + + if (w == 1) { + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; + } + + out[0] = input[0]; + out[1] = stbi__div4(input[0]*3 + input[1] + 2); + for (i=1; i < w-1; ++i) { + int n = 3*input[i]+2; + out[i*2+0] = stbi__div4(n+input[i-1]); + out[i*2+1] = stbi__div4(n+input[i+1]); + } + out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2); + out[i*2+1] = input[w-1]; + + STBI_NOTUSED(in_far); + STBI_NOTUSED(hs); + + return out; +} + +#define stbi__div16(x) ((stbi_uc) ((x) >> 4)) + +static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i,t0,t1; + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + out[0] = stbi__div4(t1+2); + for (i=1; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i=0,t0,t1; + + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + // process groups of 8 pixels for as long as we can. + // note we can't handle the last pixel in a row in this loop + // because we need to handle the filter boundary conditions. + for (; i < ((w-1) & ~7); i += 8) { +#if defined(STBI_SSE2) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + __m128i zero = _mm_setzero_si128(); + __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i)); + __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i)); + __m128i farw = _mm_unpacklo_epi8(farb, zero); + __m128i nearw = _mm_unpacklo_epi8(nearb, zero); + __m128i diff = _mm_sub_epi16(farw, nearw); + __m128i nears = _mm_slli_epi16(nearw, 2); + __m128i curr = _mm_add_epi16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + __m128i prv0 = _mm_slli_si128(curr, 2); + __m128i nxt0 = _mm_srli_si128(curr, 2); + __m128i prev = _mm_insert_epi16(prv0, t1, 0); + __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + __m128i bias = _mm_set1_epi16(8); + __m128i curs = _mm_slli_epi16(curr, 2); + __m128i prvd = _mm_sub_epi16(prev, curr); + __m128i nxtd = _mm_sub_epi16(next, curr); + __m128i curb = _mm_add_epi16(curs, bias); + __m128i even = _mm_add_epi16(prvd, curb); + __m128i odd = _mm_add_epi16(nxtd, curb); + + // interleave even and odd pixels, then undo scaling. + __m128i int0 = _mm_unpacklo_epi16(even, odd); + __m128i int1 = _mm_unpackhi_epi16(even, odd); + __m128i de0 = _mm_srli_epi16(int0, 4); + __m128i de1 = _mm_srli_epi16(int1, 4); + + // pack and write output + __m128i outv = _mm_packus_epi16(de0, de1); + _mm_storeu_si128((__m128i *) (out + i*2), outv); +#elif defined(STBI_NEON) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + uint8x8_t farb = vld1_u8(in_far + i); + uint8x8_t nearb = vld1_u8(in_near + i); + int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb)); + int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2)); + int16x8_t curr = vaddq_s16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + int16x8_t prv0 = vextq_s16(curr, curr, 7); + int16x8_t nxt0 = vextq_s16(curr, curr, 1); + int16x8_t prev = vsetq_lane_s16(t1, prv0, 0); + int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + int16x8_t curs = vshlq_n_s16(curr, 2); + int16x8_t prvd = vsubq_s16(prev, curr); + int16x8_t nxtd = vsubq_s16(next, curr); + int16x8_t even = vaddq_s16(curs, prvd); + int16x8_t odd = vaddq_s16(curs, nxtd); + + // undo scaling and round, then store with even/odd phases interleaved + uint8x8x2_t o; + o.val[0] = vqrshrun_n_s16(even, 4); + o.val[1] = vqrshrun_n_s16(odd, 4); + vst2_u8(out + i*2, o); +#endif + + // "previous" value for next iter + t1 = 3*in_near[i+7] + in_far[i+7]; + } + + t0 = t1; + t1 = 3*in_near[i] + in_far[i]; + out[i*2] = stbi__div16(3*t1 + t0 + 8); + + for (++i; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} +#endif + +static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // resample with nearest-neighbor + int i,j; + STBI_NOTUSED(in_far); + for (i=0; i < w; ++i) + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; + return out; +} + +// this is a reduced-precision calculation of YCbCr-to-RGB introduced +// to make sure the code produces the same results in both SIMD and scalar +#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8) +static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step) +{ + int i; + for (i=0; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step) +{ + int i = 0; + +#ifdef STBI_SSE2 + // step == 3 is pretty ugly on the final interleave, and i'm not convinced + // it's useful in practice (you wouldn't use it for textures, for example). + // so just accelerate step == 4 case. + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + __m128i signflip = _mm_set1_epi8(-0x80); + __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f)); + __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); + __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); + __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f)); + __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128); + __m128i xw = _mm_set1_epi16(255); // alpha channel + + for (; i+7 < count; i += 8) { + // load + __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i)); + __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i)); + __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i)); + __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 + __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 + + // unpack to short (and left-shift cr, cb by 8) + __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes); + __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); + __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); + + // color transform + __m128i yws = _mm_srli_epi16(yw, 4); + __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); + __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); + __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); + __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); + __m128i rws = _mm_add_epi16(cr0, yws); + __m128i gwt = _mm_add_epi16(cb0, yws); + __m128i bws = _mm_add_epi16(yws, cb1); + __m128i gws = _mm_add_epi16(gwt, cr1); + + // descale + __m128i rw = _mm_srai_epi16(rws, 4); + __m128i bw = _mm_srai_epi16(bws, 4); + __m128i gw = _mm_srai_epi16(gws, 4); + + // back to byte, set up for transpose + __m128i brb = _mm_packus_epi16(rw, bw); + __m128i gxb = _mm_packus_epi16(gw, xw); + + // transpose to interleave channels + __m128i t0 = _mm_unpacklo_epi8(brb, gxb); + __m128i t1 = _mm_unpackhi_epi8(brb, gxb); + __m128i o0 = _mm_unpacklo_epi16(t0, t1); + __m128i o1 = _mm_unpackhi_epi16(t0, t1); + + // store + _mm_storeu_si128((__m128i *) (out + 0), o0); + _mm_storeu_si128((__m128i *) (out + 16), o1); + out += 32; + } + } +#endif + +#ifdef STBI_NEON + // in this version, step=3 support would be easy to add. but is there demand? + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + uint8x8_t signflip = vdup_n_u8(0x80); + int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f)); + int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f)); + int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f)); + int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f)); + + for (; i+7 < count; i += 8) { + // load + uint8x8_t y_bytes = vld1_u8(y + i); + uint8x8_t cr_bytes = vld1_u8(pcr + i); + uint8x8_t cb_bytes = vld1_u8(pcb + i); + int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); + int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); + + // expand to s16 + int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); + int16x8_t crw = vshll_n_s8(cr_biased, 7); + int16x8_t cbw = vshll_n_s8(cb_biased, 7); + + // color transform + int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); + int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); + int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); + int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); + int16x8_t rws = vaddq_s16(yws, cr0); + int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); + int16x8_t bws = vaddq_s16(yws, cb1); + + // undo scaling, round, convert to byte + uint8x8x4_t o; + o.val[0] = vqrshrun_n_s16(rws, 4); + o.val[1] = vqrshrun_n_s16(gws, 4); + o.val[2] = vqrshrun_n_s16(bws, 4); + o.val[3] = vdup_n_u8(255); + + // store, interleaving r/g/b/a + vst4_u8(out, o); + out += 8*4; + } + } +#endif + + for (; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} +#endif + +// set up the kernels +static void stbi__setup_jpeg(stbi__jpeg *j) +{ + j->idct_block_kernel = stbi__idct_block; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2; + +#ifdef STBI_SSE2 + if (stbi__sse2_available()) { + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; + } +#endif + +#ifdef STBI_NEON + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; +#endif +} + +// clean up the temporary component buffers +static void stbi__cleanup_jpeg(stbi__jpeg *j) +{ + stbi__free_jpeg_components(j, j->s->img_n, 0); +} + +typedef struct +{ + resample_row_func resample; + stbi_uc *line0,*line1; + int hs,vs; // expansion factor in each axis + int w_lores; // horizontal pixels pre-expansion + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on +} stbi__resample; + +// fast 0..255 * 0..255 => 0..255 rounded multiplication +static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) +{ + unsigned int t = x*y + 128; + return (stbi_uc) ((t + (t >>8)) >> 8); +} + +static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ + int n, decode_n, is_rgb; + z->s->img_n = 0; // make stbi__cleanup_jpeg safe + + // validate req_comp + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + + // load a jpeg image from whichever source, but leave in YCbCr format + if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; } + + // determine actual number of components to generate + n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1; + + is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif)); + + if (z->s->img_n == 3 && n < 3 && !is_rgb) + decode_n = 1; + else + decode_n = z->s->img_n; + + // resample and color-convert + { + int k; + unsigned int i,j; + stbi_uc *output; + stbi_uc *coutput[4]; + + stbi__resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3); + if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s->img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel; + else r->resample = stbi__resample_row_generic; + } + + // can't error after this so, this is safe + output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); + if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s->img_y; ++j) { + stbi_uc *out = output + n * z->s->img_x * j; + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; + } + } + if (n >= 3) { + stbi_uc *y = coutput[0]; + if (z->s->img_n == 3) { + if (is_rgb) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = y[i]; + out[1] = coutput[1][i]; + out[2] = coutput[2][i]; + out[3] = 255; + out += n; + } + } else { + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else if (z->s->img_n == 4) { + if (z->app14_color_transform == 0) { // CMYK + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(coutput[0][i], m); + out[1] = stbi__blinn_8x8(coutput[1][i], m); + out[2] = stbi__blinn_8x8(coutput[2][i], m); + out[3] = 255; + out += n; + } + } else if (z->app14_color_transform == 2) { // YCCK + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(255 - out[0], m); + out[1] = stbi__blinn_8x8(255 - out[1], m); + out[2] = stbi__blinn_8x8(255 - out[2], m); + out += n; + } + } else { // YCbCr + alpha? Ignore the fourth channel for now + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else + for (i=0; i < z->s->img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + if (is_rgb) { + if (n == 1) + for (i=0; i < z->s->img_x; ++i) + *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + else { + for (i=0; i < z->s->img_x; ++i, out += 2) { + out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + out[1] = 255; + } + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); + stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); + stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); + out[0] = stbi__compute_y(r, g, b); + out[1] = 255; + out += n; + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); + out[1] = 255; + out += n; + } + } else { + stbi_uc *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255; + } + } + } + stbi__cleanup_jpeg(z); + *out_x = z->s->img_x; + *out_y = z->s->img_y; + if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output + return output; + } +} + +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + unsigned char* result; + stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg)); + STBI_NOTUSED(ri); + j->s = s; + stbi__setup_jpeg(j); + result = load_jpeg_image(j, x,y,comp,req_comp); + STBI_FREE(j); + return result; +} + +static int stbi__jpeg_test(stbi__context *s) +{ + int r; + stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg)); + j->s = s; + stbi__setup_jpeg(j); + r = stbi__decode_jpeg_header(j, STBI__SCAN_type); + stbi__rewind(s); + STBI_FREE(j); + return r; +} + +static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) +{ + if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) { + stbi__rewind( j->s ); + return 0; + } + if (x) *x = j->s->img_x; + if (y) *y = j->s->img_y; + if (comp) *comp = j->s->img_n >= 3 ? 3 : 1; + return 1; +} + +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) +{ + int result; + stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg))); + j->s = s; + result = stbi__jpeg_info_raw(j, x, y, comp); + STBI_FREE(j); + return result; +} +#endif + +// public domain zlib decode v0.2 Sean Barrett 2006-11-18 +// simple implementation +// - all input must be provided in an upfront buffer +// - all output is written to a single output buffer (can malloc/realloc) +// performance +// - fast huffman + +#ifndef STBI_NO_ZLIB + +// fast-way is faster to check than jpeg huffman, but slow way is slower +#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables +#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1) + +// zlib-style huffman encoding +// (jpegs packs from left, zlib from right, so can't share code) +typedef struct +{ + stbi__uint16 fast[1 << STBI__ZFAST_BITS]; + stbi__uint16 firstcode[16]; + int maxcode[17]; + stbi__uint16 firstsymbol[16]; + stbi_uc size[288]; + stbi__uint16 value[288]; +} stbi__zhuffman; + +stbi_inline static int stbi__bitreverse16(int n) +{ + n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); + n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); + n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); + n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); + return n; +} + +stbi_inline static int stbi__bit_reverse(int v, int bits) +{ + STBI_ASSERT(bits <= 16); + // to bit reverse n bits, reverse 16 and shift + // e.g. 11 bits, bit reverse and shift away 5 + return stbi__bitreverse16(v) >> (16-bits); +} + +static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num) +{ + int i,k=0; + int code, next_code[16], sizes[17]; + + // DEFLATE spec for generating codes + memset(sizes, 0, sizeof(sizes)); + memset(z->fast, 0, sizeof(z->fast)); + for (i=0; i < num; ++i) + ++sizes[sizelist[i]]; + sizes[0] = 0; + for (i=1; i < 16; ++i) + if (sizes[i] > (1 << i)) + return stbi__err("bad sizes", "Corrupt PNG"); + code = 0; + for (i=1; i < 16; ++i) { + next_code[i] = code; + z->firstcode[i] = (stbi__uint16) code; + z->firstsymbol[i] = (stbi__uint16) k; + code = (code + sizes[i]); + if (sizes[i]) + if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG"); + z->maxcode[i] = code << (16-i); // preshift for inner loop + code <<= 1; + k += sizes[i]; + } + z->maxcode[16] = 0x10000; // sentinel + for (i=0; i < num; ++i) { + int s = sizelist[i]; + if (s) { + int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); + z->size [c] = (stbi_uc ) s; + z->value[c] = (stbi__uint16) i; + if (s <= STBI__ZFAST_BITS) { + int j = stbi__bit_reverse(next_code[s],s); + while (j < (1 << STBI__ZFAST_BITS)) { + z->fast[j] = fastv; + j += (1 << s); + } + } + ++next_code[s]; + } + } + return 1; +} + +// zlib-from-memory implementation for PNG reading +// because PNG allows splitting the zlib stream arbitrarily, +// and it's annoying structurally to have PNG call ZLIB call PNG, +// we require PNG read all the IDATs and combine them into a single +// memory buffer + +typedef struct +{ + stbi_uc *zbuffer, *zbuffer_end; + int num_bits; + stbi__uint32 code_buffer; + + char *zout; + char *zout_start; + char *zout_end; + int z_expandable; + + stbi__zhuffman z_length, z_distance; +} stbi__zbuf; + +stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z) +{ + if (z->zbuffer >= z->zbuffer_end) return 0; + return *z->zbuffer++; +} + +static void stbi__fill_bits(stbi__zbuf *z) +{ + do { + STBI_ASSERT(z->code_buffer < (1U << z->num_bits)); + z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; + z->num_bits += 8; + } while (z->num_bits <= 24); +} + +stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n) +{ + unsigned int k; + if (z->num_bits < n) stbi__fill_bits(z); + k = z->code_buffer & ((1 << n) - 1); + z->code_buffer >>= n; + z->num_bits -= n; + return k; +} + +static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s,k; + // not resolved by fast table, so compute it the slow way + // use jpeg approach, which requires MSbits at top + k = stbi__bit_reverse(a->code_buffer, 16); + for (s=STBI__ZFAST_BITS+1; ; ++s) + if (k < z->maxcode[s]) + break; + if (s == 16) return -1; // invalid code! + // code size is s, so: + b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; + STBI_ASSERT(z->size[b] == s); + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; +} + +stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s; + if (a->num_bits < 16) stbi__fill_bits(a); + b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; + if (b) { + s = b >> 9; + a->code_buffer >>= s; + a->num_bits -= s; + return b & 511; + } + return stbi__zhuffman_decode_slowpath(a, z); +} + +static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes +{ + char *q; + int cur, limit, old_limit; + z->zout = zout; + if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); + cur = (int) (z->zout - z->zout_start); + limit = old_limit = (int) (z->zout_end - z->zout_start); + while (cur + n > limit) + limit *= 2; + q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); + STBI_NOTUSED(old_limit); + if (q == NULL) return stbi__err("outofmem", "Out of memory"); + z->zout_start = q; + z->zout = q + cur; + z->zout_end = q + limit; + return 1; +} + +static const int stbi__zlength_base[31] = { + 3,4,5,6,7,8,9,10,11,13, + 15,17,19,23,27,31,35,43,51,59, + 67,83,99,115,131,163,195,227,258,0,0 }; + +static const int stbi__zlength_extra[31]= +{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + +static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + +static const int stbi__zdist_extra[32] = +{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int stbi__parse_huffman_block(stbi__zbuf *a) +{ + char *zout = a->zout; + for(;;) { + int z = stbi__zhuffman_decode(a, &a->z_length); + if (z < 256) { + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes + if (zout >= a->zout_end) { + if (!stbi__zexpand(a, zout, 1)) return 0; + zout = a->zout; + } + *zout++ = (char) z; + } else { + stbi_uc *p; + int len,dist; + if (z == 256) { + a->zout = zout; + return 1; + } + z -= 257; + len = stbi__zlength_base[z]; + if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); + z = stbi__zhuffman_decode(a, &a->z_distance); + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); + dist = stbi__zdist_base[z]; + if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); + if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); + if (zout + len > a->zout_end) { + if (!stbi__zexpand(a, zout, len)) return 0; + zout = a->zout; + } + p = (stbi_uc *) (zout - dist); + if (dist == 1) { // run of one byte; common in images. + stbi_uc v = *p; + if (len) { do *zout++ = v; while (--len); } + } else { + if (len) { do *zout++ = *p++; while (--len); } + } + } + } +} + +static int stbi__compute_huffman_codes(stbi__zbuf *a) +{ + static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + stbi__zhuffman z_codelength; + stbi_uc lencodes[286+32+137];//padding for maximum single op + stbi_uc codelength_sizes[19]; + int i,n; + + int hlit = stbi__zreceive(a,5) + 257; + int hdist = stbi__zreceive(a,5) + 1; + int hclen = stbi__zreceive(a,4) + 4; + int ntot = hlit + hdist; + + memset(codelength_sizes, 0, sizeof(codelength_sizes)); + for (i=0; i < hclen; ++i) { + int s = stbi__zreceive(a,3); + codelength_sizes[length_dezigzag[i]] = (stbi_uc) s; + } + if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; + + n = 0; + while (n < ntot) { + int c = stbi__zhuffman_decode(a, &z_codelength); + if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); + if (c < 16) + lencodes[n++] = (stbi_uc) c; + else { + stbi_uc fill = 0; + if (c == 16) { + c = stbi__zreceive(a,2)+3; + if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); + fill = lencodes[n-1]; + } else if (c == 17) + c = stbi__zreceive(a,3)+3; + else { + STBI_ASSERT(c == 18); + c = stbi__zreceive(a,7)+11; + } + if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); + memset(lencodes+n, fill, c); + n += c; + } + } + if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG"); + if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; + return 1; +} + +static int stbi__parse_uncompressed_block(stbi__zbuf *a) +{ + stbi_uc header[4]; + int len,nlen,k; + if (a->num_bits & 7) + stbi__zreceive(a, a->num_bits & 7); // discard + // drain the bit-packed data into header + k = 0; + while (a->num_bits > 0) { + header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check + a->code_buffer >>= 8; + a->num_bits -= 8; + } + STBI_ASSERT(a->num_bits == 0); + // now fill header the normal way + while (k < 4) + header[k++] = stbi__zget8(a); + len = header[1] * 256 + header[0]; + nlen = header[3] * 256 + header[2]; + if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG"); + if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG"); + if (a->zout + len > a->zout_end) + if (!stbi__zexpand(a, a->zout, len)) return 0; + memcpy(a->zout, a->zbuffer, len); + a->zbuffer += len; + a->zout += len; + return 1; +} + +static int stbi__parse_zlib_header(stbi__zbuf *a) +{ + int cmf = stbi__zget8(a); + int cm = cmf & 15; + /* int cinfo = cmf >> 4; */ + int flg = stbi__zget8(a); + if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec + if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png + if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png + // window = 1 << (8 + cinfo)... but who cares, we fully buffer output + return 1; +} + +static const stbi_uc stbi__zdefault_length[288] = +{ + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8 +}; +static const stbi_uc stbi__zdefault_distance[32] = +{ + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 +}; +/* +Init algorithm: +{ + int i; // use <= to match clearly with spec + for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8; + for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9; + for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7; + for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8; + + for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5; +} +*/ + +static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) +{ + int final, type; + if (parse_header) + if (!stbi__parse_zlib_header(a)) return 0; + a->num_bits = 0; + a->code_buffer = 0; + do { + final = stbi__zreceive(a,1); + type = stbi__zreceive(a,2); + if (type == 0) { + if (!stbi__parse_uncompressed_block(a)) return 0; + } else if (type == 3) { + return 0; + } else { + if (type == 1) { + // use fixed code lengths + if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; + } else { + if (!stbi__compute_huffman_codes(a)) return 0; + } + if (!stbi__parse_huffman_block(a)) return 0; + } + } while (!final); + return 1; +} + +static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header) +{ + a->zout_start = obuf; + a->zout = obuf; + a->zout_end = obuf + olen; + a->z_expandable = exp; + + return stbi__parse_zlib(a, parse_header); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, 1)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) +{ + return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 1)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(16384); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer+len; + if (stbi__do_zlib(&a, p, 16384, 1, 0)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 0)) + return (int) (a.zout - a.zout_start); + else + return -1; +} +#endif + +// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 +// simple implementation +// - only 8-bit samples +// - no CRC checking +// - allocates lots of intermediate memory +// - avoids problem of streaming data between subsystems +// - avoids explicit window management +// performance +// - uses stb_zlib, a PD zlib implementation with fast huffman decoding + +#ifndef STBI_NO_PNG +typedef struct +{ + stbi__uint32 length; + stbi__uint32 type; +} stbi__pngchunk; + +static stbi__pngchunk stbi__get_chunk_header(stbi__context *s) +{ + stbi__pngchunk c; + c.length = stbi__get32be(s); + c.type = stbi__get32be(s); + return c; +} + +static int stbi__check_png_header(stbi__context *s) +{ + static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; + int i; + for (i=0; i < 8; ++i) + if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); + return 1; +} + +typedef struct +{ + stbi__context *s; + stbi_uc *idata, *expanded, *out; + int depth; +} stbi__png; + + +enum { + STBI__F_none=0, + STBI__F_sub=1, + STBI__F_up=2, + STBI__F_avg=3, + STBI__F_paeth=4, + // synthetic filters used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static stbi_uc first_row_filter[5] = +{ + STBI__F_none, + STBI__F_sub, + STBI__F_none, + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static int stbi__paeth(int a, int b, int c) +{ + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; +} + +static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; + +// create the png data from post-deflated data +static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) +{ + int bytes = (depth == 16? 2 : 1); + stbi__context *s = a->s; + stbi__uint32 i,j,stride = x*out_n*bytes; + stbi__uint32 img_len, img_width_bytes; + int k; + int img_n = s->img_n; // copy it into a local for later + + int output_bytes = out_n*bytes; + int filter_bytes = img_n*bytes; + int width = x; + + STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1); + a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into + if (!a->out) return stbi__err("outofmem", "Out of memory"); + + if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); + img_width_bytes = (((img_n * x * depth) + 7) >> 3); + img_len = (img_width_bytes + 1) * y; + + // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, + // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros), + // so just check for raw_len < img_len always. + if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *prior; + int filter = *raw++; + + if (filter > 4) + return stbi__err("invalid filter","Corrupt PNG"); + + if (depth < 8) { + STBI_ASSERT(img_width_bytes <= x); + cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place + filter_bytes = 1; + width = img_width_bytes; + } + prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above + + // if first row, use special filter that doesn't sample previous row + if (j == 0) filter = first_row_filter[filter]; + + // handle first byte explicitly + for (k=0; k < filter_bytes; ++k) { + switch (filter) { + case STBI__F_none : cur[k] = raw[k]; break; + case STBI__F_sub : cur[k] = raw[k]; break; + case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; + case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; + case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; + case STBI__F_avg_first : cur[k] = raw[k]; break; + case STBI__F_paeth_first: cur[k] = raw[k]; break; + } + } + + if (depth == 8) { + if (img_n != out_n) + cur[img_n] = 255; // first pixel + raw += img_n; + cur += out_n; + prior += out_n; + } else if (depth == 16) { + if (img_n != out_n) { + cur[filter_bytes] = 255; // first pixel top byte + cur[filter_bytes+1] = 255; // first pixel bottom byte + } + raw += filter_bytes; + cur += output_bytes; + prior += output_bytes; + } else { + raw += 1; + cur += 1; + prior += 1; + } + + // this is a little gross, so that we don't switch per-pixel or per-component + if (depth < 8 || img_n == out_n) { + int nk = (width - 1)*filter_bytes; + #define STBI__CASE(f) \ + case f: \ + for (k=0; k < nk; ++k) + switch (filter) { + // "none" filter turns into a memcpy here; make that explicit. + case STBI__F_none: memcpy(cur, raw, nk); break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; + } + #undef STBI__CASE + raw += nk; + } else { + STBI_ASSERT(img_n+1 == out_n); + #define STBI__CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ + for (k=0; k < filter_bytes; ++k) + switch (filter) { + STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; + } + #undef STBI__CASE + + // the loop above sets the high byte of the pixels' alpha, but for + // 16 bit png files we also need the low byte set. we'll do that here. + if (depth == 16) { + cur = a->out + stride*j; // start at the beginning of the row again + for (i=0; i < x; ++i,cur+=output_bytes) { + cur[filter_bytes+1] = 255; + } + } + } + } + + // we make a separate pass to expand bits to pixels; for performance, + // this could run two scanlines behind the above code, so it won't + // intefere with filtering but will still be in the cache. + if (depth < 8) { + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; + // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit + // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop + stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + + // note that the final byte might overshoot and write more data than desired. + // we can allocate enough data that this never writes out of memory, but it + // could also overwrite the next scanline. can it overwrite non-empty data + // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. + // so we need to explicitly clamp the final ones + + if (depth == 4) { + for (k=x*img_n; k >= 2; k-=2, ++in) { + *cur++ = scale * ((*in >> 4) ); + *cur++ = scale * ((*in ) & 0x0f); + } + if (k > 0) *cur++ = scale * ((*in >> 4) ); + } else if (depth == 2) { + for (k=x*img_n; k >= 4; k-=4, ++in) { + *cur++ = scale * ((*in >> 6) ); + *cur++ = scale * ((*in >> 4) & 0x03); + *cur++ = scale * ((*in >> 2) & 0x03); + *cur++ = scale * ((*in ) & 0x03); + } + if (k > 0) *cur++ = scale * ((*in >> 6) ); + if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); + if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); + } else if (depth == 1) { + for (k=x*img_n; k >= 8; k-=8, ++in) { + *cur++ = scale * ((*in >> 7) ); + *cur++ = scale * ((*in >> 6) & 0x01); + *cur++ = scale * ((*in >> 5) & 0x01); + *cur++ = scale * ((*in >> 4) & 0x01); + *cur++ = scale * ((*in >> 3) & 0x01); + *cur++ = scale * ((*in >> 2) & 0x01); + *cur++ = scale * ((*in >> 1) & 0x01); + *cur++ = scale * ((*in ) & 0x01); + } + if (k > 0) *cur++ = scale * ((*in >> 7) ); + if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); + if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); + if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); + if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); + if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); + if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); + } + if (img_n != out_n) { + int q; + // insert alpha = 255 + cur = a->out + stride*j; + if (img_n == 1) { + for (q=x-1; q >= 0; --q) { + cur[q*2+1] = 255; + cur[q*2+0] = cur[q]; + } + } else { + STBI_ASSERT(img_n == 3); + for (q=x-1; q >= 0; --q) { + cur[q*4+3] = 255; + cur[q*4+2] = cur[q*3+2]; + cur[q*4+1] = cur[q*3+1]; + cur[q*4+0] = cur[q*3+0]; + } + } + } + } + } else if (depth == 16) { + // force the image data from big-endian to platform-native. + // this is done in a separate pass due to the decoding relying + // on the data being untouched, but could probably be done + // per-line during decode if care is taken. + stbi_uc *cur = a->out; + stbi__uint16 *cur16 = (stbi__uint16*)cur; + + for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { + *cur16 = (cur[0] << 8) | cur[1]; + } + } + + return 1; +} + +static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) +{ + int bytes = (depth == 16 ? 2 : 1); + int out_bytes = out_n * bytes; + stbi_uc *final; + int p; + if (!interlaced) + return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); + + // de-interlacing + final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); + for (p=0; p < 7; ++p) { + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y; + if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) { + STBI_FREE(final); + return 0; + } + for (j=0; j < y; ++j) { + for (i=0; i < x; ++i) { + int out_y = j*yspc[p]+yorig[p]; + int out_x = i*xspc[p]+xorig[p]; + memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, + a->out + (j*x+i)*out_bytes, out_bytes); + } + } + STBI_FREE(a->out); + image_data += img_len; + image_data_len -= img_len; + } + } + a->out = final; + + return 1; +} + +static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + // compute color-based transparency, assuming we've + // already got 255 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i=0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 255); + p += 2; + } + } else { + for (i=0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi__uint16 *p = (stbi__uint16*) z->out; + + // compute color-based transparency, assuming we've + // already got 65535 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i = 0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 65535); + p += 2; + } + } else { + for (i = 0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n) +{ + stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y; + stbi_uc *p, *temp_out, *orig = a->out; + + p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0); + if (p == NULL) return stbi__err("outofmem", "Out of memory"); + + // between here and free(out) below, exitting would leak + temp_out = p; + + if (pal_img_n == 3) { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p += 3; + } + } else { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p[3] = palette[n+3]; + p += 4; + } + } + STBI_FREE(a->out); + a->out = temp_out; + + STBI_NOTUSED(len); + + return 1; +} + +static int stbi__unpremultiply_on_load = 0; +static int stbi__de_iphone_flag = 0; + +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) +{ + stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply; +} + +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) +{ + stbi__de_iphone_flag = flag_true_if_should_convert; +} + +static void stbi__de_iphone(stbi__png *z) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + if (s->img_out_n == 3) { // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 3; + } + } else { + STBI_ASSERT(s->img_out_n == 4); + if (stbi__unpremultiply_on_load) { + // convert bgr to rgb and unpremultiply + for (i=0; i < pixel_count; ++i) { + stbi_uc a = p[3]; + stbi_uc t = p[0]; + if (a) { + stbi_uc half = a / 2; + p[0] = (p[2] * 255 + half) / a; + p[1] = (p[1] * 255 + half) / a; + p[2] = ( t * 255 + half) / a; + } else { + p[0] = p[2]; + p[2] = t; + } + p += 4; + } + } else { + // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 4; + } + } + } +} + +#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d)) + +static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) +{ + stbi_uc palette[1024], pal_img_n=0; + stbi_uc has_trans=0, tc[3]; + stbi__uint16 tc16[3]; + stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0; + int first=1,k,interlace=0, color=0, is_iphone=0; + stbi__context *s = z->s; + + z->expanded = NULL; + z->idata = NULL; + z->out = NULL; + + if (!stbi__check_png_header(s)) return 0; + + if (scan == STBI__SCAN_type) return 1; + + for (;;) { + stbi__pngchunk c = stbi__get_chunk_header(s); + switch (c.type) { + case STBI__PNG_TYPE('C','g','B','I'): + is_iphone = 1; + stbi__skip(s, c.length); + break; + case STBI__PNG_TYPE('I','H','D','R'): { + int comp,filter; + if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); + first = 0; + if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); + s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); + s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); + z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); + color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); + comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG"); + filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG"); + interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG"); + if (!pal_img_n) { + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); + if (scan == STBI__SCAN_header) return 1; + } else { + // if paletted, then pal_n is our final components, and + // img_n is # components to decompress/filter. + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); + // if SCAN_header, have to scan to see if we have a tRNS + } + break; + } + + case STBI__PNG_TYPE('P','L','T','E'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG"); + pal_len = c.length / 3; + if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG"); + for (i=0; i < pal_len; ++i) { + palette[i*4+0] = stbi__get8(s); + palette[i*4+1] = stbi__get8(s); + palette[i*4+2] = stbi__get8(s); + palette[i*4+3] = 255; + } + break; + } + + case STBI__PNG_TYPE('t','R','N','S'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG"); + if (pal_img_n) { + if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; } + if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG"); + if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG"); + pal_img_n = 4; + for (i=0; i < c.length; ++i) + palette[i*4+3] = stbi__get8(s); + } else { + if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); + if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); + has_trans = 1; + if (z->depth == 16) { + for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is + } else { + for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger + } + } + break; + } + + case STBI__PNG_TYPE('I','D','A','T'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); + if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; } + if ((int)(ioff + c.length) < (int)ioff) return 0; + if (ioff + c.length > idata_limit) { + stbi__uint32 idata_limit_old = idata_limit; + stbi_uc *p; + if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; + while (ioff + c.length > idata_limit) + idata_limit *= 2; + STBI_NOTUSED(idata_limit_old); + p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); + z->idata = p; + } + if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG"); + ioff += c.length; + break; + } + + case STBI__PNG_TYPE('I','E','N','D'): { + stbi__uint32 raw_len, bpl; + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (scan != STBI__SCAN_load) return 1; + if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); + // initial guess for decoded data size to avoid unnecessary reallocs + bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component + raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; + z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); + if (z->expanded == NULL) return 0; // zlib should set error + STBI_FREE(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; + else + s->img_out_n = s->img_n; + if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0; + if (has_trans) { + if (z->depth == 16) { + if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0; + } else { + if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0; + } + } + if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) + stbi__de_iphone(z); + if (pal_img_n) { + // pal_img_n == 3 or 4 + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) + return 0; + } else if (has_trans) { + // non-paletted image with tRNS -> source image has (constant) alpha + ++s->img_n; + } + STBI_FREE(z->expanded); z->expanded = NULL; + return 1; + } + + default: + // if critical, fail + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if ((c.type & (1 << 29)) == 0) { + #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe + static char invalid_chunk[] = "XXXX PNG chunk not known"; + invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); + invalid_chunk[1] = STBI__BYTECAST(c.type >> 16); + invalid_chunk[2] = STBI__BYTECAST(c.type >> 8); + invalid_chunk[3] = STBI__BYTECAST(c.type >> 0); + #endif + return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type"); + } + stbi__skip(s, c.length); + break; + } + // end of PNG chunk, read and skip CRC + stbi__get32be(s); + } +} + +static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri) +{ + void *result=NULL; + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { + if (p->depth < 8) + ri->bits_per_channel = 8; + else + ri->bits_per_channel = p->depth; + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s->img_out_n) { + if (ri->bits_per_channel == 8) + result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + else + result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + p->s->img_out_n = req_comp; + if (result == NULL) return result; + } + *x = p->s->img_x; + *y = p->s->img_y; + if (n) *n = p->s->img_n; + } + STBI_FREE(p->out); p->out = NULL; + STBI_FREE(p->expanded); p->expanded = NULL; + STBI_FREE(p->idata); p->idata = NULL; + + return result; +} + +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi__png p; + p.s = s; + return stbi__do_png(&p, x,y,comp,req_comp, ri); +} + +static int stbi__png_test(stbi__context *s) +{ + int r; + r = stbi__check_png_header(s); + stbi__rewind(s); + return r; +} + +static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp) +{ + if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) { + stbi__rewind( p->s ); + return 0; + } + if (x) *x = p->s->img_x; + if (y) *y = p->s->img_y; + if (comp) *comp = p->s->img_n; + return 1; +} + +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__png p; + p.s = s; + return stbi__png_info_raw(&p, x, y, comp); +} + +static int stbi__png_is16(stbi__context *s) +{ + stbi__png p; + p.s = s; + if (!stbi__png_info_raw(&p, NULL, NULL, NULL)) + return 0; + if (p.depth != 16) { + stbi__rewind(p.s); + return 0; + } + return 1; +} +#endif + +// Microsoft/Windows BMP image + +#ifndef STBI_NO_BMP +static int stbi__bmp_test_raw(stbi__context *s) +{ + int r; + int sz; + if (stbi__get8(s) != 'B') return 0; + if (stbi__get8(s) != 'M') return 0; + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + stbi__get32le(s); // discard data offset + sz = stbi__get32le(s); + r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124); + return r; +} + +static int stbi__bmp_test(stbi__context *s) +{ + int r = stbi__bmp_test_raw(s); + stbi__rewind(s); + return r; +} + + +// returns 0..31 for the highest set bit +static int stbi__high_bit(unsigned int z) +{ + int n=0; + if (z == 0) return -1; + if (z >= 0x10000) n += 16, z >>= 16; + if (z >= 0x00100) n += 8, z >>= 8; + if (z >= 0x00010) n += 4, z >>= 4; + if (z >= 0x00004) n += 2, z >>= 2; + if (z >= 0x00002) n += 1, z >>= 1; + return n; +} + +static int stbi__bitcount(unsigned int a) +{ + a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 + a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 + a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits + a = (a + (a >> 8)); // max 16 per 8 bits + a = (a + (a >> 16)); // max 32 per 8 bits + return a & 0xff; +} + +// extract an arbitrarily-aligned N-bit value (N=bits) +// from v, and then make it 8-bits long and fractionally +// extend it to full full range. +static int stbi__shiftsigned(int v, int shift, int bits) +{ + static unsigned int mul_table[9] = { + 0, + 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/, + 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/, + }; + static unsigned int shift_table[9] = { + 0, 0,0,1,0,2,4,6,0, + }; + if (shift < 0) + v <<= -shift; + else + v >>= shift; + STBI_ASSERT(v >= 0 && v < 256); + v >>= (8-bits); + STBI_ASSERT(bits >= 0 && bits <= 8); + return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits]; +} + +typedef struct +{ + int bpp, offset, hsz; + unsigned int mr,mg,mb,ma, all_a; +} stbi__bmp_data; + +static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) +{ + int hsz; + if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP"); + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + info->offset = stbi__get32le(s); + info->hsz = hsz = stbi__get32le(s); + info->mr = info->mg = info->mb = info->ma = 0; + + if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown"); + if (hsz == 12) { + s->img_x = stbi__get16le(s); + s->img_y = stbi__get16le(s); + } else { + s->img_x = stbi__get32le(s); + s->img_y = stbi__get32le(s); + } + if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP"); + info->bpp = stbi__get16le(s); + if (hsz != 12) { + int compress = stbi__get32le(s); + if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); + stbi__get32le(s); // discard sizeof + stbi__get32le(s); // discard hres + stbi__get32le(s); // discard vres + stbi__get32le(s); // discard colorsused + stbi__get32le(s); // discard max important + if (hsz == 40 || hsz == 56) { + if (hsz == 56) { + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + } + if (info->bpp == 16 || info->bpp == 32) { + if (compress == 0) { + if (info->bpp == 32) { + info->mr = 0xffu << 16; + info->mg = 0xffu << 8; + info->mb = 0xffu << 0; + info->ma = 0xffu << 24; + info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 + } else { + info->mr = 31u << 10; + info->mg = 31u << 5; + info->mb = 31u << 0; + } + } else if (compress == 3) { + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + // not documented, but generated by photoshop and handled by mspaint + if (info->mr == info->mg && info->mg == info->mb) { + // ?!?!? + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else { + int i; + if (hsz != 108 && hsz != 124) + return stbi__errpuc("bad BMP", "bad BMP"); + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + info->ma = stbi__get32le(s); + stbi__get32le(s); // discard color space + for (i=0; i < 12; ++i) + stbi__get32le(s); // discard color space parameters + if (hsz == 124) { + stbi__get32le(s); // discard rendering intent + stbi__get32le(s); // discard offset of profile data + stbi__get32le(s); // discard size of profile data + stbi__get32le(s); // discard reserved + } + } + } + return (void *) 1; +} + + +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + unsigned int mr=0,mg=0,mb=0,ma=0, all_a; + stbi_uc pal[256][4]; + int psize=0,i,j,width; + int flip_vertically, pad, target; + stbi__bmp_data info; + STBI_NOTUSED(ri); + + info.all_a = 255; + if (stbi__bmp_parse_header(s, &info) == NULL) + return NULL; // error code already set + + flip_vertically = ((int) s->img_y) > 0; + s->img_y = abs((int) s->img_y); + + mr = info.mr; + mg = info.mg; + mb = info.mb; + ma = info.ma; + all_a = info.all_a; + + if (info.hsz == 12) { + if (info.bpp < 24) + psize = (info.offset - 14 - 24) / 3; + } else { + if (info.bpp < 16) + psize = (info.offset - 14 - info.hsz) >> 2; + } + + s->img_n = ma ? 4 : 3; + if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 + target = req_comp; + else + target = s->img_n; // if they want monochrome, we'll post-convert + + // sanity-check size + if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "Corrupt BMP"); + + out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + if (info.bpp < 16) { + int z=0; + if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); } + for (i=0; i < psize; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + if (info.hsz != 12) stbi__get8(s); + pal[i][3] = 255; + } + stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); + if (info.bpp == 1) width = (s->img_x + 7) >> 3; + else if (info.bpp == 4) width = (s->img_x + 1) >> 1; + else if (info.bpp == 8) width = s->img_x; + else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } + pad = (-width)&3; + if (info.bpp == 1) { + for (j=0; j < (int) s->img_y; ++j) { + int bit_offset = 7, v = stbi__get8(s); + for (i=0; i < (int) s->img_x; ++i) { + int color = (v>>bit_offset)&0x1; + out[z++] = pal[color][0]; + out[z++] = pal[color][1]; + out[z++] = pal[color][2]; + if((--bit_offset) < 0) { + bit_offset = 7; + v = stbi__get8(s); + } + } + stbi__skip(s, pad); + } + } else { + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=stbi__get8(s),v2=0; + if (info.bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (info.bpp == 8) ? stbi__get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + stbi__skip(s, pad); + } + } + } else { + int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; + int z = 0; + int easy=0; + stbi__skip(s, info.offset - 14 - info.hsz); + if (info.bpp == 24) width = 3 * s->img_x; + else if (info.bpp == 16) width = 2*s->img_x; + else /* bpp = 32 and pad = 0 */ width=0; + pad = (-width) & 3; + if (info.bpp == 24) { + easy = 1; + } else if (info.bpp == 32) { + if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) + easy = 2; + } + if (!easy) { + if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } + // right shift amt to put high bit in position #7 + rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr); + gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); + bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); + ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); + } + for (j=0; j < (int) s->img_y; ++j) { + if (easy) { + for (i=0; i < (int) s->img_x; ++i) { + unsigned char a; + out[z+2] = stbi__get8(s); + out[z+1] = stbi__get8(s); + out[z+0] = stbi__get8(s); + z += 3; + a = (easy == 2 ? stbi__get8(s) : 255); + all_a |= a; + if (target == 4) out[z++] = a; + } + } else { + int bpp = info.bpp; + for (i=0; i < (int) s->img_x; ++i) { + stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); + unsigned int a; + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); + a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255); + all_a |= a; + if (target == 4) out[z++] = STBI__BYTECAST(a); + } + } + stbi__skip(s, pad); + } + } + + // if alpha channel is all 0s, replace with all 255s + if (target == 4 && all_a == 0) + for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) + out[i] = 255; + + if (flip_vertically) { + stbi_uc t; + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { + t = p1[i], p1[i] = p2[i], p2[i] = t; + } + } + } + + if (req_comp && req_comp != target) { + out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + return out; +} +#endif + +// Targa Truevision - TGA +// by Jonathan Dummer +#ifndef STBI_NO_TGA +// returns STBI_rgb or whatever, 0 on error +static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) +{ + // only RGB or RGBA (incl. 16bit) or grey allowed + if (is_rgb16) *is_rgb16 = 0; + switch(bits_per_pixel) { + case 8: return STBI_grey; + case 16: if(is_grey) return STBI_grey_alpha; + // fallthrough + case 15: if(is_rgb16) *is_rgb16 = 1; + return STBI_rgb; + case 24: // fallthrough + case 32: return bits_per_pixel/8; + default: return 0; + } +} + +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp) +{ + int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp; + int sz, tga_colormap_type; + stbi__get8(s); // discard Offset + tga_colormap_type = stbi__get8(s); // colormap type + if( tga_colormap_type > 1 ) { + stbi__rewind(s); + return 0; // only RGB or indexed allowed + } + tga_image_type = stbi__get8(s); // image type + if ( tga_colormap_type == 1 ) { // colormapped (paletted) image + if (tga_image_type != 1 && tga_image_type != 9) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip image x and y origin + tga_colormap_bpp = sz; + } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE + if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) { + stbi__rewind(s); + return 0; // only RGB or grey allowed, +/- RLE + } + stbi__skip(s,9); // skip colormap specification and image x/y origin + tga_colormap_bpp = 0; + } + tga_w = stbi__get16le(s); + if( tga_w < 1 ) { + stbi__rewind(s); + return 0; // test width + } + tga_h = stbi__get16le(s); + if( tga_h < 1 ) { + stbi__rewind(s); + return 0; // test height + } + tga_bits_per_pixel = stbi__get8(s); // bits per pixel + stbi__get8(s); // ignore alpha bits + if (tga_colormap_bpp != 0) { + if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) { + // when using a colormap, tga_bits_per_pixel is the size of the indexes + // I don't think anything but 8 or 16bit indexes makes sense + stbi__rewind(s); + return 0; + } + tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL); + } else { + tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL); + } + if(!tga_comp) { + stbi__rewind(s); + return 0; + } + if (x) *x = tga_w; + if (y) *y = tga_h; + if (comp) *comp = tga_comp; + return 1; // seems to have passed everything +} + +static int stbi__tga_test(stbi__context *s) +{ + int res = 0; + int sz, tga_color_type; + stbi__get8(s); // discard Offset + tga_color_type = stbi__get8(s); // color type + if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed + sz = stbi__get8(s); // image type + if ( tga_color_type == 1 ) { // colormapped (paletted) image + if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9 + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + stbi__skip(s,4); // skip image x and y origin + } else { // "normal" image w/o colormap + if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE + stbi__skip(s,9); // skip colormap specification and image x/y origin + } + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height + sz = stbi__get8(s); // bits per pixel + if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + + res = 1; // if we got this far, everything's good and we can return 1 instead of 0 + +errorEnd: + stbi__rewind(s); + return res; +} + +// read 16bit value and convert to 24bit RGB +static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) +{ + stbi__uint16 px = (stbi__uint16)stbi__get16le(s); + stbi__uint16 fiveBitMask = 31; + // we have 3 channels with 5bits each + int r = (px >> 10) & fiveBitMask; + int g = (px >> 5) & fiveBitMask; + int b = px & fiveBitMask; + // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later + out[0] = (stbi_uc)((r * 255)/31); + out[1] = (stbi_uc)((g * 255)/31); + out[2] = (stbi_uc)((b * 255)/31); + + // some people claim that the most significant bit might be used for alpha + // (possibly if an alpha-bit is set in the "image descriptor byte") + // but that only made 16bit test images completely translucent.. + // so let's treat all 15 and 16bit TGAs as RGB with no alpha. +} + +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + // read in the TGA header stuff + int tga_offset = stbi__get8(s); + int tga_indexed = stbi__get8(s); + int tga_image_type = stbi__get8(s); + int tga_is_RLE = 0; + int tga_palette_start = stbi__get16le(s); + int tga_palette_len = stbi__get16le(s); + int tga_palette_bits = stbi__get8(s); + int tga_x_origin = stbi__get16le(s); + int tga_y_origin = stbi__get16le(s); + int tga_width = stbi__get16le(s); + int tga_height = stbi__get16le(s); + int tga_bits_per_pixel = stbi__get8(s); + int tga_comp, tga_rgb16=0; + int tga_inverted = stbi__get8(s); + // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?) + // image data + unsigned char *tga_data; + unsigned char *tga_palette = NULL; + int i, j; + unsigned char raw_data[4] = {0}; + int RLE_count = 0; + int RLE_repeating = 0; + int read_next_pixel = 1; + STBI_NOTUSED(ri); + + // do a tiny bit of precessing + if ( tga_image_type >= 8 ) + { + tga_image_type -= 8; + tga_is_RLE = 1; + } + tga_inverted = 1 - ((tga_inverted >> 5) & 1); + + // If I'm paletted, then I'll use the number of bits from the palette + if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16); + else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16); + + if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency + return stbi__errpuc("bad format", "Can't find out TGA pixelformat"); + + // tga info + *x = tga_width; + *y = tga_height; + if (comp) *comp = tga_comp; + + if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) + return stbi__errpuc("too large", "Corrupt TGA"); + + tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0); + if (!tga_data) return stbi__errpuc("outofmem", "Out of memory"); + + // skip to the data's starting position (offset usually = 0) + stbi__skip(s, tga_offset ); + + if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) { + for (i=0; i < tga_height; ++i) { + int row = tga_inverted ? tga_height -i - 1 : i; + stbi_uc *tga_row = tga_data + row*tga_width*tga_comp; + stbi__getn(s, tga_row, tga_width * tga_comp); + } + } else { + // do I need to load a palette? + if ( tga_indexed) + { + // any data to skip? (offset usually = 0) + stbi__skip(s, tga_palette_start ); + // load the palette + tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); + if (!tga_palette) { + STBI_FREE(tga_data); + return stbi__errpuc("outofmem", "Out of memory"); + } + if (tga_rgb16) { + stbi_uc *pal_entry = tga_palette; + STBI_ASSERT(tga_comp == STBI_rgb); + for (i=0; i < tga_palette_len; ++i) { + stbi__tga_read_rgb16(s, pal_entry); + pal_entry += tga_comp; + } + } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) { + STBI_FREE(tga_data); + STBI_FREE(tga_palette); + return stbi__errpuc("bad palette", "Corrupt TGA"); + } + } + // load the data + for (i=0; i < tga_width * tga_height; ++i) + { + // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk? + if ( tga_is_RLE ) + { + if ( RLE_count == 0 ) + { + // yep, get the next byte as a RLE command + int RLE_cmd = stbi__get8(s); + RLE_count = 1 + (RLE_cmd & 127); + RLE_repeating = RLE_cmd >> 7; + read_next_pixel = 1; + } else if ( !RLE_repeating ) + { + read_next_pixel = 1; + } + } else + { + read_next_pixel = 1; + } + // OK, if I need to read a pixel, do it now + if ( read_next_pixel ) + { + // load however much data we did have + if ( tga_indexed ) + { + // read in index, then perform the lookup + int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s); + if ( pal_idx >= tga_palette_len ) { + // invalid index + pal_idx = 0; + } + pal_idx *= tga_comp; + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = tga_palette[pal_idx+j]; + } + } else if(tga_rgb16) { + STBI_ASSERT(tga_comp == STBI_rgb); + stbi__tga_read_rgb16(s, raw_data); + } else { + // read in the data raw + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = stbi__get8(s); + } + } + // clear the reading flag for the next pixel + read_next_pixel = 0; + } // end of reading a pixel + + // copy data + for (j = 0; j < tga_comp; ++j) + tga_data[i*tga_comp+j] = raw_data[j]; + + // in case we're in RLE mode, keep counting down + --RLE_count; + } + // do I need to invert the image? + if ( tga_inverted ) + { + for (j = 0; j*2 < tga_height; ++j) + { + int index1 = j * tga_width * tga_comp; + int index2 = (tga_height - 1 - j) * tga_width * tga_comp; + for (i = tga_width * tga_comp; i > 0; --i) + { + unsigned char temp = tga_data[index1]; + tga_data[index1] = tga_data[index2]; + tga_data[index2] = temp; + ++index1; + ++index2; + } + } + } + // clear my palette, if I had one + if ( tga_palette != NULL ) + { + STBI_FREE( tga_palette ); + } + } + + // swap RGB - if the source data was RGB16, it already is in the right order + if (tga_comp >= 3 && !tga_rgb16) + { + unsigned char* tga_pixel = tga_data; + for (i=0; i < tga_width * tga_height; ++i) + { + unsigned char temp = tga_pixel[0]; + tga_pixel[0] = tga_pixel[2]; + tga_pixel[2] = temp; + tga_pixel += tga_comp; + } + } + + // convert to target component count + if (req_comp && req_comp != tga_comp) + tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height); + + // the things I do to get rid of an error message, and yet keep + // Microsoft's C compilers happy... [8^( + tga_palette_start = tga_palette_len = tga_palette_bits = + tga_x_origin = tga_y_origin = 0; + // OK, done + return tga_data; +} +#endif + +// ************************************************************************************************* +// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s) +{ + int r = (stbi__get32be(s) == 0x38425053); + stbi__rewind(s); + return r; +} + +static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount) +{ + int count, nleft, len; + + count = 0; + while ((nleft = pixelCount - count) > 0) { + len = stbi__get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + if (len > nleft) return 0; // corrupt data + count += len; + while (len) { + *p = stbi__get8(s); + p += 4; + len--; + } + } else if (len > 128) { + stbi_uc val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len = 257 - len; + if (len > nleft) return 0; // corrupt data + val = stbi__get8(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } + } + + return 1; +} + +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + int pixelCount; + int channelCount, compression; + int channel, i; + int bitdepth; + int w,h; + stbi_uc *out; + STBI_NOTUSED(ri); + + // Check identifier + if (stbi__get32be(s) != 0x38425053) // "8BPS" + return stbi__errpuc("not PSD", "Corrupt PSD image"); + + // Check file type version. + if (stbi__get16be(s) != 1) + return stbi__errpuc("wrong version", "Unsupported version of PSD image"); + + // Skip 6 reserved bytes. + stbi__skip(s, 6 ); + + // Read the number of channels (R, G, B, A, etc). + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) + return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image"); + + // Read the rows and columns of the image. + h = stbi__get32be(s); + w = stbi__get32be(s); + + // Make sure the depth is 8 bits. + bitdepth = stbi__get16be(s); + if (bitdepth != 8 && bitdepth != 16) + return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit"); + + // Make sure the color mode is RGB. + // Valid options are: + // 0: Bitmap + // 1: Grayscale + // 2: Indexed color + // 3: RGB color + // 4: CMYK color + // 7: Multichannel + // 8: Duotone + // 9: Lab color + if (stbi__get16be(s) != 3) + return stbi__errpuc("wrong color format", "PSD is not in RGB color format"); + + // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) + stbi__skip(s,stbi__get32be(s) ); + + // Skip the image resources. (resolution, pen tool paths, etc) + stbi__skip(s, stbi__get32be(s) ); + + // Skip the reserved data. + stbi__skip(s, stbi__get32be(s) ); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + compression = stbi__get16be(s); + if (compression > 1) + return stbi__errpuc("bad compression", "PSD has an unknown compression format"); + + // Check size + if (!stbi__mad3sizes_valid(4, w, h, 0)) + return stbi__errpuc("too large", "Corrupt PSD"); + + // Create the destination image. + + if (!compression && bitdepth == 16 && bpc == 16) { + out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); + ri->bits_per_channel = 16; + } else + out = (stbi_uc *) stbi__malloc(4 * w*h); + + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + pixelCount = w*h; + + // Initialize the data to zero. + //memset( out, 0, pixelCount * 4 ); + + // Finally, the image data. + if (compression) { + // RLE as used by .PSD and .TIFF + // Loop until you get the number of unpacked bytes you are expecting: + // Read the next source byte into n. + // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. + // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. + // Else if n is 128, noop. + // Endloop + + // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data, + // which we're going to just skip. + stbi__skip(s, h * channelCount * 2 ); + + // Read the RLE data by channel. + for (channel = 0; channel < 4; channel++) { + stbi_uc *p; + + p = out+channel; + if (channel >= channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++, p += 4) + *p = (channel == 3 ? 255 : 0); + } else { + // Read the RLE data. + if (!stbi__psd_decode_rle(s, p, pixelCount)) { + STBI_FREE(out); + return stbi__errpuc("corrupt", "bad RLE data"); + } + } + } + + } else { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. + + // Read the data by channel. + for (channel = 0; channel < 4; channel++) { + if (channel >= channelCount) { + // Fill this channel with default data. + if (bitdepth == 16 && bpc == 16) { + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + stbi__uint16 val = channel == 3 ? 65535 : 0; + for (i = 0; i < pixelCount; i++, q += 4) + *q = val; + } else { + stbi_uc *p = out+channel; + stbi_uc val = channel == 3 ? 255 : 0; + for (i = 0; i < pixelCount; i++, p += 4) + *p = val; + } + } else { + if (ri->bits_per_channel == 16) { // output bpc + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + for (i = 0; i < pixelCount; i++, q += 4) + *q = (stbi__uint16) stbi__get16be(s); + } else { + stbi_uc *p = out+channel; + if (bitdepth == 16) { // input bpc + for (i = 0; i < pixelCount; i++, p += 4) + *p = (stbi_uc) (stbi__get16be(s) >> 8); + } else { + for (i = 0; i < pixelCount; i++, p += 4) + *p = stbi__get8(s); + } + } + } + } + } + + // remove weird white matte from PSD + if (channelCount >= 4) { + if (ri->bits_per_channel == 16) { + for (i=0; i < w*h; ++i) { + stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; + if (pixel[3] != 0 && pixel[3] != 65535) { + float a = pixel[3] / 65535.0f; + float ra = 1.0f / a; + float inv_a = 65535.0f * (1 - ra); + pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); + pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); + pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); + } + } + } else { + for (i=0; i < w*h; ++i) { + unsigned char *pixel = out + 4*i; + if (pixel[3] != 0 && pixel[3] != 255) { + float a = pixel[3] / 255.0f; + float ra = 1.0f / a; + float inv_a = 255.0f * (1 - ra); + pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); + pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); + pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); + } + } + } + } + + // convert to desired output format + if (req_comp && req_comp != 4) { + if (ri->bits_per_channel == 16) + out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); + else + out = stbi__convert_format(out, 4, req_comp, w, h); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + if (comp) *comp = 4; + *y = h; + *x = w; + + return out; +} +#endif + +// ************************************************************************************************* +// Softimage PIC loader +// by Tom Seddon +// +// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format +// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/ + +#ifndef STBI_NO_PIC +static int stbi__pic_is4(stbi__context *s,const char *str) +{ + int i; + for (i=0; i<4; ++i) + if (stbi__get8(s) != (stbi_uc)str[i]) + return 0; + + return 1; +} + +static int stbi__pic_test_core(stbi__context *s) +{ + int i; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) + return 0; + + for(i=0;i<84;++i) + stbi__get8(s); + + if (!stbi__pic_is4(s,"PICT")) + return 0; + + return 1; +} + +typedef struct +{ + stbi_uc size,type,channel; +} stbi__pic_packet; + +static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest) +{ + int mask=0x80, i; + + for (i=0; i<4; ++i, mask>>=1) { + if (channel & mask) { + if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short"); + dest[i]=stbi__get8(s); + } + } + + return dest; +} + +static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src) +{ + int mask=0x80,i; + + for (i=0;i<4; ++i, mask>>=1) + if (channel&mask) + dest[i]=src[i]; +} + +static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result) +{ + int act_comp=0,num_packets=0,y,chained; + stbi__pic_packet packets[10]; + + // this will (should...) cater for even some bizarre stuff like having data + // for the same channel in multiple packets. + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return stbi__errpuc("bad format","too many packets"); + + packet = &packets[num_packets++]; + + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + + act_comp |= packet->channel; + + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)"); + if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp"); + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? + + for(y=0; ytype) { + default: + return stbi__errpuc("bad format","packet has bad compression type"); + + case 0: {//uncompressed + int x; + + for(x=0;xchannel,dest)) + return 0; + break; + } + + case 1://Pure RLE + { + int left=width, i; + + while (left>0) { + stbi_uc count,value[4]; + + count=stbi__get8(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)"); + + if (count > left) + count = (stbi_uc) left; + + if (!stbi__readval(s,packet->channel,value)) return 0; + + for(i=0; ichannel,dest,value); + left -= count; + } + } + break; + + case 2: {//Mixed RLE + int left=width; + while (left>0) { + int count = stbi__get8(s), i; + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)"); + + if (count >= 128) { // Repeated + stbi_uc value[4]; + + if (count==128) + count = stbi__get16be(s); + else + count -= 127; + if (count > left) + return stbi__errpuc("bad file","scanline overrun"); + + if (!stbi__readval(s,packet->channel,value)) + return 0; + + for(i=0;ichannel,dest,value); + } else { // Raw + ++count; + if (count>left) return stbi__errpuc("bad file","scanline overrun"); + + for(i=0;ichannel,dest)) + return 0; + } + left-=count; + } + break; + } + } + } + } + + return result; +} + +static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri) +{ + stbi_uc *result; + int i, x,y, internal_comp; + STBI_NOTUSED(ri); + + if (!comp) comp = &internal_comp; + + for (i=0; i<92; ++i) + stbi__get8(s); + + x = stbi__get16be(s); + y = stbi__get16be(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)"); + if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode"); + + stbi__get32be(s); //skip `ratio' + stbi__get16be(s); //skip `fields' + stbi__get16be(s); //skip `pad' + + // intermediate buffer is RGBA + result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0); + memset(result, 0xff, x*y*4); + + if (!stbi__pic_load_core(s,x,y,comp, result)) { + STBI_FREE(result); + result=0; + } + *px = x; + *py = y; + if (req_comp == 0) req_comp = *comp; + result=stbi__convert_format(result,4,req_comp,x,y); + + return result; +} + +static int stbi__pic_test(stbi__context *s) +{ + int r = stbi__pic_test_core(s); + stbi__rewind(s); + return r; +} +#endif + +// ************************************************************************************************* +// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb + +#ifndef STBI_NO_GIF +typedef struct +{ + stbi__int16 prefix; + stbi_uc first; + stbi_uc suffix; +} stbi__gif_lzw; + +typedef struct +{ + int w,h; + stbi_uc *out; // output buffer (always 4 components) + stbi_uc *background; // The current "background" as far as a gif is concerned + stbi_uc *history; + int flags, bgindex, ratio, transparent, eflags; + stbi_uc pal[256][4]; + stbi_uc lpal[256][4]; + stbi__gif_lzw codes[8192]; + stbi_uc *color_table; + int parse, step; + int lflags; + int start_x, start_y; + int max_x, max_y; + int cur_x, cur_y; + int line_size; + int delay; +} stbi__gif; + +static int stbi__gif_test_raw(stbi__context *s) +{ + int sz; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0; + sz = stbi__get8(s); + if (sz != '9' && sz != '7') return 0; + if (stbi__get8(s) != 'a') return 0; + return 1; +} + +static int stbi__gif_test(stbi__context *s) +{ + int r = stbi__gif_test_raw(s); + stbi__rewind(s); + return r; +} + +static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp) +{ + int i; + for (i=0; i < num_entries; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + pal[i][3] = transp == i ? 0 : 255; + } +} + +static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info) +{ + stbi_uc version; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') + return stbi__err("not GIF", "Corrupt GIF"); + + version = stbi__get8(s); + if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF"); + if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF"); + + stbi__g_failure_reason = ""; + g->w = stbi__get16le(s); + g->h = stbi__get16le(s); + g->flags = stbi__get8(s); + g->bgindex = stbi__get8(s); + g->ratio = stbi__get8(s); + g->transparent = -1; + + if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments + + if (is_info) return 1; + + if (g->flags & 0x80) + stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); + + return 1; +} + +static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); + if (!stbi__gif_header(s, g, comp, 1)) { + STBI_FREE(g); + stbi__rewind( s ); + return 0; + } + if (x) *x = g->w; + if (y) *y = g->h; + STBI_FREE(g); + return 1; +} + +static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) +{ + stbi_uc *p, *c; + int idx; + + // recurse to decode the prefixes, since the linked-list is backwards, + // and working backwards through an interleaved image would be nasty + if (g->codes[code].prefix >= 0) + stbi__out_gif_code(g, g->codes[code].prefix); + + if (g->cur_y >= g->max_y) return; + + idx = g->cur_x + g->cur_y; + p = &g->out[idx]; + g->history[idx / 4] = 1; + + c = &g->color_table[g->codes[code].suffix * 4]; + if (c[3] > 128) { // don't render transparent pixels; + p[0] = c[2]; + p[1] = c[1]; + p[2] = c[0]; + p[3] = c[3]; + } + g->cur_x += 4; + + if (g->cur_x >= g->max_x) { + g->cur_x = g->start_x; + g->cur_y += g->step; + + while (g->cur_y >= g->max_y && g->parse > 0) { + g->step = (1 << g->parse) * g->line_size; + g->cur_y = g->start_y + (g->step >> 1); + --g->parse; + } + } +} + +static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) +{ + stbi_uc lzw_cs; + stbi__int32 len, init_code; + stbi__uint32 first; + stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear; + stbi__gif_lzw *p; + + lzw_cs = stbi__get8(s); + if (lzw_cs > 12) return NULL; + clear = 1 << lzw_cs; + first = 1; + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + bits = 0; + valid_bits = 0; + for (init_code = 0; init_code < clear; init_code++) { + g->codes[init_code].prefix = -1; + g->codes[init_code].first = (stbi_uc) init_code; + g->codes[init_code].suffix = (stbi_uc) init_code; + } + + // support no starting clear code + avail = clear+2; + oldcode = -1; + + len = 0; + for(;;) { + if (valid_bits < codesize) { + if (len == 0) { + len = stbi__get8(s); // start new block + if (len == 0) + return g->out; + } + --len; + bits |= (stbi__int32) stbi__get8(s) << valid_bits; + valid_bits += 8; + } else { + stbi__int32 code = bits & codemask; + bits >>= codesize; + valid_bits -= codesize; + // @OPTIMIZE: is there some way we can accelerate the non-clear path? + if (code == clear) { // clear code + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + avail = clear + 2; + oldcode = -1; + first = 0; + } else if (code == clear + 1) { // end of stream code + stbi__skip(s, len); + while ((len = stbi__get8(s)) > 0) + stbi__skip(s,len); + return g->out; + } else if (code <= avail) { + if (first) { + return stbi__errpuc("no clear code", "Corrupt GIF"); + } + + if (oldcode >= 0) { + p = &g->codes[avail++]; + if (avail > 8192) { + return stbi__errpuc("too many codes", "Corrupt GIF"); + } + + p->prefix = (stbi__int16) oldcode; + p->first = g->codes[oldcode].first; + p->suffix = (code == avail) ? p->first : g->codes[code].first; + } else if (code == avail) + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + + stbi__out_gif_code(g, (stbi__uint16) code); + + if ((avail & codemask) == 0 && avail <= 0x0FFF) { + codesize++; + codemask = (1 << codesize) - 1; + } + + oldcode = code; + } else { + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + } + } + } +} + +// this function is designed to support animated gifs, although stb_image doesn't support it +// two back is the image from two frames ago, used for a very specific disposal format +static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back) +{ + int dispose; + int first_frame; + int pi; + int pcount; + + // on first frame, any non-written pixels get the background colour (non-transparent) + first_frame = 0; + if (g->out == 0) { + if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header + g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h); + g->background = (stbi_uc *) stbi__malloc(4 * g->w * g->h); + g->history = (stbi_uc *) stbi__malloc(g->w * g->h); + if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory"); + + // image is treated as "tranparent" at the start - ie, nothing overwrites the current background; + // background colour is only used for pixels that are not rendered first frame, after that "background" + // color refers to teh color that was there the previous frame. + memset( g->out, 0x00, 4 * g->w * g->h ); + memset( g->background, 0x00, 4 * g->w * g->h ); // state of the background (starts transparent) + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + first_frame = 1; + } else { + // second frame - how do we dispoase of the previous one? + dispose = (g->eflags & 0x1C) >> 2; + pcount = g->w * g->h; + + if ((dispose == 3) && (two_back == 0)) { + dispose = 2; // if I don't have an image to revert back to, default to the old background + } + + if (dispose == 3) { // use previous graphic + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 ); + } + } + } else if (dispose == 2) { + // restore what was changed last frame to background before that frame; + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 ); + } + } + } else { + // This is a non-disposal case eithe way, so just + // leave the pixels as is, and they will become the new background + // 1: do not dispose + // 0: not specified. + } + + // background is what out is after the undoing of the previou frame; + memcpy( g->background, g->out, 4 * g->w * g->h ); + } + + // clear my history; + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + + for (;;) { + int tag = stbi__get8(s); + switch (tag) { + case 0x2C: /* Image Descriptor */ + { + stbi__int32 x, y, w, h; + stbi_uc *o; + + x = stbi__get16le(s); + y = stbi__get16le(s); + w = stbi__get16le(s); + h = stbi__get16le(s); + if (((x + w) > (g->w)) || ((y + h) > (g->h))) + return stbi__errpuc("bad Image Descriptor", "Corrupt GIF"); + + g->line_size = g->w * 4; + g->start_x = x * 4; + g->start_y = y * g->line_size; + g->max_x = g->start_x + w * 4; + g->max_y = g->start_y + h * g->line_size; + g->cur_x = g->start_x; + g->cur_y = g->start_y; + + g->lflags = stbi__get8(s); + + if (g->lflags & 0x40) { + g->step = 8 * g->line_size; // first interlaced spacing + g->parse = 3; + } else { + g->step = g->line_size; + g->parse = 0; + } + + if (g->lflags & 0x80) { + stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); + g->color_table = (stbi_uc *) g->lpal; + } else if (g->flags & 0x80) { + g->color_table = (stbi_uc *) g->pal; + } else + return stbi__errpuc("missing color table", "Corrupt GIF"); + + o = stbi__process_gif_raster(s, g); + if (o == NULL) return NULL; + + // if this was the first frame, + pcount = g->w * g->h; + if (first_frame && (g->bgindex > 0)) { + // if first frame, any pixel not drawn to gets the background color + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi] == 0) { + g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; + memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 ); + } + } + } + + return o; + } + + case 0x21: // Comment Extension. + { + int len; + int ext = stbi__get8(s); + if (ext == 0xF9) { // Graphic Control Extension. + len = stbi__get8(s); + if (len == 4) { + g->eflags = stbi__get8(s); + g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths. + + // unset old transparent + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 255; + } + if (g->eflags & 0x01) { + g->transparent = stbi__get8(s); + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 0; + } + } else { + // don't need transparent + stbi__skip(s, 1); + g->transparent = -1; + } + } else { + stbi__skip(s, len); + break; + } + } + while ((len = stbi__get8(s)) != 0) { + stbi__skip(s, len); + } + break; + } + + case 0x3B: // gif stream termination code + return (stbi_uc *) s; // using '1' causes warning on some compilers + + default: + return stbi__errpuc("unknown code", "Corrupt GIF"); + } + } +} + +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + if (stbi__gif_test(s)) { + int layers = 0; + stbi_uc *u = 0; + stbi_uc *out = 0; + stbi_uc *two_back = 0; + stbi__gif g; + int stride; + memset(&g, 0, sizeof(g)); + if (delays) { + *delays = 0; + } + + do { + u = stbi__gif_load_next(s, &g, comp, req_comp, two_back); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + + if (u) { + *x = g.w; + *y = g.h; + ++layers; + stride = g.w * g.h * 4; + + if (out) { + out = (stbi_uc*) STBI_REALLOC( out, layers * stride ); + if (delays) { + *delays = (int*) STBI_REALLOC( *delays, sizeof(int) * layers ); + } + } else { + out = (stbi_uc*)stbi__malloc( layers * stride ); + if (delays) { + *delays = (int*) stbi__malloc( layers * sizeof(int) ); + } + } + memcpy( out + ((layers - 1) * stride), u, stride ); + if (layers >= 2) { + two_back = out - 2 * stride; + } + + if (delays) { + (*delays)[layers - 1U] = g.delay; + } + } + } while (u != 0); + + // free temp buffer; + STBI_FREE(g.out); + STBI_FREE(g.history); + STBI_FREE(g.background); + + // do the final conversion after loading everything; + if (req_comp && req_comp != 4) + out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h); + + *z = layers; + return out; + } else { + return stbi__errpuc("not GIF", "Image was not as a gif type."); + } +} + +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *u = 0; + stbi__gif g; + memset(&g, 0, sizeof(g)); + + u = stbi__gif_load_next(s, &g, comp, req_comp, 0); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + if (u) { + *x = g.w; + *y = g.h; + + // moved conversion to after successful load so that the same + // can be done for multiple frames. + if (req_comp && req_comp != 4) + u = stbi__convert_format(u, 4, req_comp, g.w, g.h); + } + + // free buffers needed for multiple frame loading; + STBI_FREE(g.history); + STBI_FREE(g.background); + + return u; +} + +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp) +{ + return stbi__gif_info_raw(s,x,y,comp); +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR loader +// originally by Nicolas Schulz +#ifndef STBI_NO_HDR +static int stbi__hdr_test_core(stbi__context *s, const char *signature) +{ + int i; + for (i=0; signature[i]; ++i) + if (stbi__get8(s) != signature[i]) + return 0; + stbi__rewind(s); + return 1; +} + +static int stbi__hdr_test(stbi__context* s) +{ + int r = stbi__hdr_test_core(s, "#?RADIANCE\n"); + stbi__rewind(s); + if(!r) { + r = stbi__hdr_test_core(s, "#?RGBE\n"); + stbi__rewind(s); + } + return r; +} + +#define STBI__HDR_BUFLEN 1024 +static char *stbi__hdr_gettoken(stbi__context *z, char *buffer) +{ + int len=0; + char c = '\0'; + + c = (char) stbi__get8(z); + + while (!stbi__at_eof(z) && c != '\n') { + buffer[len++] = c; + if (len == STBI__HDR_BUFLEN-1) { + // flush to end of line + while (!stbi__at_eof(z) && stbi__get8(z) != '\n') + ; + break; + } + c = (char) stbi__get8(z); + } + + buffer[len] = 0; + return buffer; +} + +static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp) +{ + if ( input[3] != 0 ) { + float f1; + // Exponent + f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); + if (req_comp <= 2) + output[0] = (input[0] + input[1] + input[2]) * f1 / 3; + else { + output[0] = input[0] * f1; + output[1] = input[1] * f1; + output[2] = input[2] * f1; + } + if (req_comp == 2) output[1] = 1; + if (req_comp == 4) output[3] = 1; + } else { + switch (req_comp) { + case 4: output[3] = 1; /* fallthrough */ + case 3: output[0] = output[1] = output[2] = 0; + break; + case 2: output[1] = 1; /* fallthrough */ + case 1: output[0] = 0; + break; + } + } +} + +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int width, height; + stbi_uc *scanline; + float *hdr_data; + int len; + unsigned char count, value; + int i, j, k, c1,c2, z; + const char *headerToken; + STBI_NOTUSED(ri); + + // Check identifier + headerToken = stbi__hdr_gettoken(s,buffer); + if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0) + return stbi__errpf("not HDR", "Corrupt HDR image"); + + // Parse header + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format"); + + // Parse width and height + // can't use sscanf() if we're not using stdio! + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + height = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + width = (int) strtol(token, NULL, 10); + + *x = width; + *y = height; + + if (comp) *comp = 3; + if (req_comp == 0) req_comp = 3; + + if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) + return stbi__errpf("too large", "HDR image is too large"); + + // Read data + hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0); + if (!hdr_data) + return stbi__errpf("outofmem", "Out of memory"); + + // Load image data + // image data is stored as some number of sca + if ( width < 8 || width >= 32768) { + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + stbi_uc rgbe[4]; + main_decode_loop: + stbi__getn(s, rgbe, 4); + stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); + } + } + } else { + // Read RLE-encoded data + scanline = NULL; + + for (j = 0; j < height; ++j) { + c1 = stbi__get8(s); + c2 = stbi__get8(s); + len = stbi__get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + stbi_uc rgbe[4]; + rgbe[0] = (stbi_uc) c1; + rgbe[1] = (stbi_uc) c2; + rgbe[2] = (stbi_uc) len; + rgbe[3] = (stbi_uc) stbi__get8(s); + stbi__hdr_convert(hdr_data, rgbe, req_comp); + i = 1; + j = 0; + STBI_FREE(scanline); + goto main_decode_loop; // yes, this makes no sense + } + len <<= 8; + len |= stbi__get8(s); + if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } + if (scanline == NULL) { + scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); + if (!scanline) { + STBI_FREE(hdr_data); + return stbi__errpf("outofmem", "Out of memory"); + } + } + + for (k = 0; k < 4; ++k) { + int nleft; + i = 0; + while ((nleft = width - i) > 0) { + count = stbi__get8(s); + if (count > 128) { + // Run + value = stbi__get8(s); + count -= 128; + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = stbi__get8(s); + } + } + } + for (i=0; i < width; ++i) + stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); + } + if (scanline) + STBI_FREE(scanline); + } + + return hdr_data; +} + +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int dummy; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (stbi__hdr_test(s) == 0) { + stbi__rewind( s ); + return 0; + } + + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) { + stbi__rewind( s ); + return 0; + } + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *y = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *x = (int) strtol(token, NULL, 10); + *comp = 3; + return 1; +} +#endif // STBI_NO_HDR + +#ifndef STBI_NO_BMP +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) +{ + void *p; + stbi__bmp_data info; + + info.all_a = 255; + p = stbi__bmp_parse_header(s, &info); + stbi__rewind( s ); + if (p == NULL) + return 0; + if (x) *x = s->img_x; + if (y) *y = s->img_y; + if (comp) *comp = info.ma ? 4 : 3; + return 1; +} +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) +{ + int channelCount, dummy, depth; + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + *y = stbi__get32be(s); + *x = stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 8 && depth != 16) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 3) { + stbi__rewind( s ); + return 0; + } + *comp = 4; + return 1; +} + +static int stbi__psd_is16(stbi__context *s) +{ + int channelCount, depth; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + (void) stbi__get32be(s); + (void) stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 16) { + stbi__rewind( s ); + return 0; + } + return 1; +} +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) +{ + int act_comp=0,num_packets=0,chained,dummy; + stbi__pic_packet packets[10]; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) { + stbi__rewind(s); + return 0; + } + + stbi__skip(s, 88); + + *x = stbi__get16be(s); + *y = stbi__get16be(s); + if (stbi__at_eof(s)) { + stbi__rewind( s); + return 0; + } + if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) { + stbi__rewind( s ); + return 0; + } + + stbi__skip(s, 8); + + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return 0; + + packet = &packets[num_packets++]; + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + act_comp |= packet->channel; + + if (stbi__at_eof(s)) { + stbi__rewind( s ); + return 0; + } + if (packet->size != 8) { + stbi__rewind( s ); + return 0; + } + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); + + return 1; +} +#endif + +// ************************************************************************************************* +// Portable Gray Map and Portable Pixel Map loader +// by Ken Miller +// +// PGM: http://netpbm.sourceforge.net/doc/pgm.html +// PPM: http://netpbm.sourceforge.net/doc/ppm.html +// +// Known limitations: +// Does not support comments in the header section +// Does not support ASCII image data (formats P2 and P3) +// Does not support 16-bit-per-channel + +#ifndef STBI_NO_PNM + +static int stbi__pnm_test(stbi__context *s) +{ + char p, t; + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind( s ); + return 0; + } + return 1; +} + +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + STBI_NOTUSED(ri); + + if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n)) + return 0; + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + + if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "PNM too large"); + + out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + stbi__getn(s, out, s->img_n * s->img_x * s->img_y); + + if (req_comp && req_comp != s->img_n) { + out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + return out; +} + +static int stbi__pnm_isspace(char c) +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; +} + +static void stbi__pnm_skip_whitespace(stbi__context *s, char *c) +{ + for (;;) { + while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) + *c = (char) stbi__get8(s); + + if (stbi__at_eof(s) || *c != '#') + break; + + while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' ) + *c = (char) stbi__get8(s); + } +} + +static int stbi__pnm_isdigit(char c) +{ + return c >= '0' && c <= '9'; +} + +static int stbi__pnm_getinteger(stbi__context *s, char *c) +{ + int value = 0; + + while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { + value = value*10 + (*c - '0'); + *c = (char) stbi__get8(s); + } + + return value; +} + +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) +{ + int maxv, dummy; + char c, p, t; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + stbi__rewind(s); + + // Get identifier + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind(s); + return 0; + } + + *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm + + c = (char) stbi__get8(s); + stbi__pnm_skip_whitespace(s, &c); + + *x = stbi__pnm_getinteger(s, &c); // read width + stbi__pnm_skip_whitespace(s, &c); + + *y = stbi__pnm_getinteger(s, &c); // read height + stbi__pnm_skip_whitespace(s, &c); + + maxv = stbi__pnm_getinteger(s, &c); // read max value + + if (maxv > 255) + return stbi__err("max value > 255", "PPM image not 8-bit"); + else + return 1; +} +#endif + +static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) +{ + #ifndef STBI_NO_JPEG + if (stbi__jpeg_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNG + if (stbi__png_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_GIF + if (stbi__gif_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_BMP + if (stbi__bmp_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PIC + if (stbi__pic_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNM + if (stbi__pnm_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_info(s, x, y, comp)) return 1; + #endif + + // test tga last because it's a crappy test! + #ifndef STBI_NO_TGA + if (stbi__tga_info(s, x, y, comp)) + return 1; + #endif + return stbi__err("unknown image type", "Image not of any known type, or corrupt"); +} + +static int stbi__is_16_main(stbi__context *s) +{ + #ifndef STBI_NO_PNG + if (stbi__png_is16(s)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_is16(s)) return 1; + #endif + + return 0; +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_info_from_file(f, x, y, comp); + fclose(f); + return result; +} + +STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__info_main(&s,x,y,comp); + fseek(f,pos,SEEK_SET); + return r; +} + +STBIDEF int stbi_is_16_bit(char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_is_16_bit_from_file(f); + fclose(f); + return result; +} + +STBIDEF int stbi_is_16_bit_from_file(FILE *f) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__is_16_main(&s); + fseek(f,pos,SEEK_SET); + return r; +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__is_16_main(&s); +} + +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__is_16_main(&s); +} + +#endif // STB_IMAGE_IMPLEMENTATION + +/* + revision history: + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug + 1-bit BMP + *_is_16_bit api + avoid warnings + 2.16 (2017-07-23) all functions have 16-bit variants; + STBI_NO_STDIO works again; + compilation fixes; + fix rounding in unpremultiply; + optimize vertical flip; + disable raw_len validation; + documentation fixes + 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; + warning fixes; disable run-time SSE detection on gcc; + uniform handling of optional "return" values; + thread-safe initialization of zlib tables + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) allocate large structures on the stack + remove white matting for transparent PSD + fix reported channel count for PNG & BMP + re-enable SSE2 in non-gcc 64-bit + support RGB-formatted JPEG + read 16-bit PNGs (only as 8-bit) + 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED + 2.09 (2016-01-16) allow comments in PNM files + 16-bit-per-pixel TGA (not bit-per-component) + info() for TGA could break due to .hdr handling + info() for BMP to shares code instead of sloppy parse + can use STBI_REALLOC_SIZED if allocator doesn't support realloc + code cleanup + 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA + 2.07 (2015-09-13) fix compiler warnings + partial animated GIF support + limited 16-bpc PSD support + #ifdef unused functions + bug with < 92 byte PIC,PNM,HDR,TGA + 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value + 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning + 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit + 2.03 (2015-04-12) extra corruption checking (mmozeiko) + stbi_set_flip_vertically_on_load (nguillemot) + fix NEON support; fix mingw support + 2.02 (2015-01-19) fix incorrect assert, fix warning + 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2 + 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG + 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) + progressive JPEG (stb) + PGM/PPM support (Ken Miller) + STBI_MALLOC,STBI_REALLOC,STBI_FREE + GIF bugfix -- seemingly never worked + STBI_NO_*, STBI_ONLY_* + 1.48 (2014-12-14) fix incorrectly-named assert() + 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb) + optimize PNG (ryg) + fix bug in interlaced PNG with user-specified channel count (stb) + 1.46 (2014-08-26) + fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG + 1.45 (2014-08-16) + fix MSVC-ARM internal compiler error by wrapping malloc + 1.44 (2014-08-07) + various warning fixes from Ronny Chevalier + 1.43 (2014-07-15) + fix MSVC-only compiler problem in code changed in 1.42 + 1.42 (2014-07-09) + don't define _CRT_SECURE_NO_WARNINGS (affects user code) + fixes to stbi__cleanup_jpeg path + added STBI_ASSERT to avoid requiring assert.h + 1.41 (2014-06-25) + fix search&replace from 1.36 that messed up comments/error messages + 1.40 (2014-06-22) + fix gcc struct-initialization warning + 1.39 (2014-06-15) + fix to TGA optimization when req_comp != number of components in TGA; + fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite) + add support for BMP version 5 (more ignored fields) + 1.38 (2014-06-06) + suppress MSVC warnings on integer casts truncating values + fix accidental rename of 'skip' field of I/O + 1.37 (2014-06-04) + remove duplicate typedef + 1.36 (2014-06-03) + convert to header file single-file library + if de-iphone isn't set, load iphone images color-swapped instead of returning NULL + 1.35 (2014-05-27) + various warnings + fix broken STBI_SIMD path + fix bug where stbi_load_from_file no longer left file pointer in correct place + fix broken non-easy path for 32-bit BMP (possibly never used) + TGA optimization by Arseny Kapoulkine + 1.34 (unknown) + use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case + 1.33 (2011-07-14) + make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements + 1.32 (2011-07-13) + support for "info" function for all supported filetypes (SpartanJ) + 1.31 (2011-06-20) + a few more leak fixes, bug in PNG handling (SpartanJ) + 1.30 (2011-06-11) + added ability to load files via callbacks to accomidate custom input streams (Ben Wenger) + removed deprecated format-specific test/load functions + removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway + error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) + fix inefficiency in decoding 32-bit BMP (David Woo) + 1.29 (2010-08-16) + various warning fixes from Aurelien Pocheville + 1.28 (2010-08-01) + fix bug in GIF palette transparency (SpartanJ) + 1.27 (2010-08-01) + cast-to-stbi_uc to fix warnings + 1.26 (2010-07-24) + fix bug in file buffering for PNG reported by SpartanJ + 1.25 (2010-07-17) + refix trans_data warning (Won Chun) + 1.24 (2010-07-12) + perf improvements reading from files on platforms with lock-heavy fgetc() + minor perf improvements for jpeg + deprecated type-specific functions so we'll get feedback if they're needed + attempt to fix trans_data warning (Won Chun) + 1.23 fixed bug in iPhone support + 1.22 (2010-07-10) + removed image *writing* support + stbi_info support from Jetro Lauha + GIF support from Jean-Marc Lienher + iPhone PNG-extensions from James Brown + warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva) + 1.21 fix use of 'stbi_uc' in header (reported by jon blow) + 1.20 added support for Softimage PIC, by Tom Seddon + 1.19 bug in interlaced PNG corruption check (found by ryg) + 1.18 (2008-08-02) + fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - stbi__convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug + header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) + 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz + 1.07 attempt to fix C++ warning/errors again + 1.06 attempt to fix C++ warning/errors again + 1.05 fix TGA loading to return correct *comp and use good luminance calc + 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free + 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR + 1.02 support for (subset of) HDR files, float interface for preferred access to them + 1.01 fix bug: possible bug in handling right-side up bmps... not sure + fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all + 1.00 interface to zlib that skips zlib header + 0.99 correct handling of alpha in palette + 0.98 TGA loader by lonesock; dynamically add loaders (untested) + 0.97 jpeg errors on too large a file; also catch another malloc failure + 0.96 fix detection of invalid v value - particleman@mollyrocket forum + 0.95 during header scan, seek to markers in case of padding + 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same + 0.93 handle jpegtran output; verbose errors + 0.92 read 4,8,16,24,32-bit BMP files of several formats + 0.91 output 24-bit Windows 3.0 BMP files + 0.90 fix a few more warnings; bump version number to approach 1.0 + 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd + 0.60 fix compiling as c++ + 0.59 fix warnings: merge Dave Moore's -Wall fixes + 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian + 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available + 0.56 fix bug: zlib uncompressed mode len vs. nlen + 0.55 fix bug: restart_interval not initialized to 0 + 0.54 allow NULL for 'int *comp' + 0.53 fix bug in png 3->4; speedup png decoding + 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments + 0.51 obey req_comp requests, 1-component jpegs return as 1-component, + on 'test' only check type, not whether we support this variant + 0.50 (2006-11-19) + first released version +*/ + + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/workloads/realworld/pipeline/darknet/src/stb_image_write.h b/workloads/realworld/pipeline/darknet/src/stb_image_write.h new file mode 100644 index 0000000000000000000000000000000000000000..c05e95812b96232abd3617f98255832cc3fe4716 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/stb_image_write.h @@ -0,0 +1,1568 @@ +/* stb_image_write - v1.09 - public domain - http://nothings.org/stb/stb_image_write.h + writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 + no warranty implied; use at your own risk + + Before #including, + + #define STB_IMAGE_WRITE_IMPLEMENTATION + + in the file that you want to have the implementation. + + Will probably not work correctly with strict-aliasing optimizations. + + If using a modern Microsoft Compiler, non-safe versions of CRT calls may cause + compilation warnings or even errors. To avoid this, also before #including, + + #define STBI_MSC_SECURE_CRT + +ABOUT: + + This header file is a library for writing images to C stdio. It could be + adapted to write to memory or a general streaming interface; let me know. + + The PNG output is not optimal; it is 20-50% larger than the file + written by a decent optimizing implementation; though providing a custom + zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that. + This library is designed for source code compactness and simplicity, + not optimal image file size or run-time performance. + +BUILDING: + + You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h. + You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace + malloc,realloc,free. + You can #define STBIW_MEMMOVE() to replace memmove() + You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function + for PNG compression (instead of the builtin one), it must have the following signature: + unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality); + The returned data will be freed with STBIW_FREE() (free() by default), + so it must be heap allocated with STBIW_MALLOC() (malloc() by default), + +USAGE: + + There are five functions, one for each image file format: + + int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality); + int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); + + void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically + + There are also five equivalent functions that use an arbitrary write function. You are + expected to open/close your file-equivalent before and after calling these: + + int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); + int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + + where the callback is: + void stbi_write_func(void *context, void *data, int size); + + You can configure it with these global variables: + int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE + int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression + int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode + + + You can define STBI_WRITE_NO_STDIO to disable the file variant of these + functions, so the library will not use stdio.h at all. However, this will + also disable HDR writing, because it requires stdio for formatted output. + + Each function returns 0 on failure and non-0 on success. + + The functions create an image file defined by the parameters. The image + is a rectangle of pixels stored from left-to-right, top-to-bottom. + Each pixel contains 'comp' channels of data stored interleaved with 8-bits + per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is + monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall. + The *data pointer points to the first byte of the top-left-most pixel. + For PNG, "stride_in_bytes" is the distance in bytes from the first byte of + a row of pixels to the first byte of the next row of pixels. + + PNG creates output files with the same number of components as the input. + The BMP format expands Y to RGB in the file format and does not + output alpha. + + PNG supports writing rectangles of data even when the bytes storing rows of + data are not consecutive in memory (e.g. sub-rectangles of a larger image), + by supplying the stride between the beginning of adjacent rows. The other + formats do not. (Thus you cannot write a native-format BMP through the BMP + writer, both because it is in BGR order and because it may have padding + at the end of the line.) + + PNG allows you to set the deflate compression level by setting the global + variable 'stbi_write_png_compression_level' (it defaults to 8). + + HDR expects linear float data. Since the format is always 32-bit rgb(e) + data, alpha (if provided) is discarded, and for monochrome data it is + replicated across all three channels. + + TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed + data, set the global variable 'stbi_write_tga_with_rle' to 0. + + JPEG does ignore alpha channels in input data; quality is between 1 and 100. + Higher quality looks better but results in a bigger image. + JPEG baseline (no JPEG progressive). + +CREDITS: + + + Sean Barrett - PNG/BMP/TGA + Baldur Karlsson - HDR + Jean-Sebastien Guay - TGA monochrome + Tim Kelsey - misc enhancements + Alan Hickman - TGA RLE + Emmanuel Julien - initial file IO callback implementation + Jon Olick - original jo_jpeg.cpp code + Daniel Gibson - integrate JPEG, allow external zlib + Aarni Koskela - allow choosing PNG filter + + bugfixes: + github:Chribba + Guillaume Chereau + github:jry2 + github:romigrou + Sergio Gonzalez + Jonas Karlsson + Filip Wasil + Thatcher Ulrich + github:poppolopoppo + Patrick Boettcher + github:xeekworx + Cap Petschulat + Simon Rodriguez + Ivan Tikhonov + github:ignotion + Adam Schackart + +LICENSE + + See end of file for license information. + +*/ + +#ifndef INCLUDE_STB_IMAGE_WRITE_H +#define INCLUDE_STB_IMAGE_WRITE_H + +// if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline' or 'static inline' +#ifndef STBIWDEF +#ifdef STB_IMAGE_WRITE_STATIC +#define STBIWDEF static +#else +#ifdef __cplusplus +#define STBIWDEF extern "C" +#else +#define STBIWDEF extern +#endif +#endif +#endif + +#ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations +extern int stbi_write_tga_with_rle; +extern int stbi_write_png_compression_level; +extern int stbi_write_force_png_filter; +#endif + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality); +#endif + +typedef void stbi_write_func(void *context, void *data, int size); + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + +STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); + +#endif//INCLUDE_STB_IMAGE_WRITE_H + +#ifdef STB_IMAGE_WRITE_IMPLEMENTATION + +#ifdef _WIN32 + #ifndef _CRT_SECURE_NO_WARNINGS + #define _CRT_SECURE_NO_WARNINGS + #endif + #ifndef _CRT_NONSTDC_NO_DEPRECATE + #define _CRT_NONSTDC_NO_DEPRECATE + #endif +#endif + +#ifndef STBI_WRITE_NO_STDIO +#include +#endif // STBI_WRITE_NO_STDIO + +#include +#include +#include +#include + +#if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED)) +// ok +#elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)." +#endif + +#ifndef STBIW_MALLOC +#define STBIW_MALLOC(sz) malloc(sz) +#define STBIW_REALLOC(p,newsz) realloc(p,newsz) +#define STBIW_FREE(p) free(p) +#endif + +#ifndef STBIW_REALLOC_SIZED +#define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz) +#endif + + +#ifndef STBIW_MEMMOVE +#define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz) +#endif + + +#ifndef STBIW_ASSERT +#include +#define STBIW_ASSERT(x) assert(x) +#endif + +#define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff) + +#ifdef STB_IMAGE_WRITE_STATIC +static int stbi__flip_vertically_on_write=0; +static int stbi_write_png_compression_level = 8; +static int stbi_write_tga_with_rle = 1; +static int stbi_write_force_png_filter = -1; +#else +int stbi_write_png_compression_level = 8; +int stbi__flip_vertically_on_write=0; +int stbi_write_tga_with_rle = 1; +int stbi_write_force_png_filter = -1; +#endif + +STBIWDEF void stbi_flip_vertically_on_write(int flag) +{ + stbi__flip_vertically_on_write = flag; +} + +typedef struct +{ + stbi_write_func *func; + void *context; +} stbi__write_context; + +// initialize a callback-based context +static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context) +{ + s->func = c; + s->context = context; +} + +#ifndef STBI_WRITE_NO_STDIO + +static void stbi__stdio_write(void *context, void *data, int size) +{ + fwrite(data,1,size,(FILE*) context); +} + +static int stbi__start_write_file(stbi__write_context *s, const char *filename) +{ + FILE *f; +#ifdef STBI_MSC_SECURE_CRT + if (fopen_s(&f, filename, "wb")) + f = NULL; +#else + f = fopen(filename, "wb"); +#endif + stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f); + return f != NULL; +} + +static void stbi__end_write_file(stbi__write_context *s) +{ + fclose((FILE *)s->context); +} + +#endif // !STBI_WRITE_NO_STDIO + +typedef unsigned int stbiw_uint32; +typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1]; + +static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) +{ + while (*fmt) { + switch (*fmt++) { + case ' ': break; + case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int)); + s->func(s->context,&x,1); + break; } + case '2': { int x = va_arg(v,int); + unsigned char b[2]; + b[0] = STBIW_UCHAR(x); + b[1] = STBIW_UCHAR(x>>8); + s->func(s->context,b,2); + break; } + case '4': { stbiw_uint32 x = va_arg(v,int); + unsigned char b[4]; + b[0]=STBIW_UCHAR(x); + b[1]=STBIW_UCHAR(x>>8); + b[2]=STBIW_UCHAR(x>>16); + b[3]=STBIW_UCHAR(x>>24); + s->func(s->context,b,4); + break; } + default: + STBIW_ASSERT(0); + return; + } + } +} + +static void stbiw__writef(stbi__write_context *s, const char *fmt, ...) +{ + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); +} + +static void stbiw__putc(stbi__write_context *s, unsigned char c) +{ + s->func(s->context, &c, 1); +} + +static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c) +{ + unsigned char arr[3]; + arr[0] = a, arr[1] = b, arr[2] = c; + s->func(s->context, arr, 3); +} + +static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d) +{ + unsigned char bg[3] = { 255, 0, 255}, px[3]; + int k; + + if (write_alpha < 0) + s->func(s->context, &d[comp - 1], 1); + + switch (comp) { + case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case + case 1: + if (expand_mono) + stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp + else + s->func(s->context, d, 1); // monochrome TGA + break; + case 4: + if (!write_alpha) { + // composite against pink background + for (k = 0; k < 3; ++k) + px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255; + stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]); + break; + } + /* FALLTHROUGH */ + case 3: + stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); + break; + } + if (write_alpha > 0) + s->func(s->context, &d[comp - 1], 1); +} + +static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) +{ + stbiw_uint32 zero = 0; + int i,j, j_end; + + if (y <= 0) + return; + + if (stbi__flip_vertically_on_write) + vdir *= -1; + + if (vdir < 0) + j_end = -1, j = y-1; + else + j_end = y, j = 0; + + for (; j != j_end; j += vdir) { + for (i=0; i < x; ++i) { + unsigned char *d = (unsigned char *) data + (j*x+i)*comp; + stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); + } + s->func(s->context, &zero, scanline_pad); + } +} + +static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...) +{ + if (y < 0 || x < 0) { + return 0; + } else { + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); + stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono); + return 1; + } +} + +static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data) +{ + int pad = (-x*3) & 3; + return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad, + "11 4 22 4" "4 44 22 444444", + 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header + 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header +} + +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_bmp_core(&s, x, y, comp, data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_bmp_core(&s, x, y, comp, data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif //!STBI_WRITE_NO_STDIO + +static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data) +{ + int has_alpha = (comp == 2 || comp == 4); + int colorbytes = has_alpha ? comp-1 : comp; + int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3 + + if (y < 0 || x < 0) + return 0; + + if (!stbi_write_tga_with_rle) { + return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0, + "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); + } else { + int i,j,k; + int jend, jdir; + + stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8); + + if (stbi__flip_vertically_on_write) { + j = 0; + jend = y; + jdir = 1; + } else { + j = y-1; + jend = -1; + jdir = -1; + } + for (; j != jend; j += jdir) { + unsigned char *row = (unsigned char *) data + j * x * comp; + int len; + + for (i = 0; i < x; i += len) { + unsigned char *begin = row + i * comp; + int diff = 1; + len = 1; + + if (i < x - 1) { + ++len; + diff = memcmp(begin, row + (i + 1) * comp, comp); + if (diff) { + const unsigned char *prev = begin; + for (k = i + 2; k < x && len < 128; ++k) { + if (memcmp(prev, row + k * comp, comp)) { + prev += comp; + ++len; + } else { + --len; + break; + } + } + } else { + for (k = i + 2; k < x && len < 128; ++k) { + if (!memcmp(begin, row + k * comp, comp)) { + ++len; + } else { + break; + } + } + } + } + + if (diff) { + unsigned char header = STBIW_UCHAR(len - 1); + s->func(s->context, &header, 1); + for (k = 0; k < len; ++k) { + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); + } + } else { + unsigned char header = STBIW_UCHAR(len - 129); + s->func(s->context, &header, 1); + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); + } + } + } + } + return 1; +} + +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_tga_core(&s, x, y, comp, (void *) data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_tga_core(&s, x, y, comp, (void *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR writer +// by Baldur Karlsson + +#define stbiw__max(a, b) ((a) > (b) ? (a) : (b)) + +void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear) +{ + int exponent; + float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2])); + + if (maxcomp < 1e-32f) { + rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; + } else { + float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp; + + rgbe[0] = (unsigned char)(linear[0] * normalize); + rgbe[1] = (unsigned char)(linear[1] * normalize); + rgbe[2] = (unsigned char)(linear[2] * normalize); + rgbe[3] = (unsigned char)(exponent + 128); + } +} + +void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte) +{ + unsigned char lengthbyte = STBIW_UCHAR(length+128); + STBIW_ASSERT(length+128 <= 255); + s->func(s->context, &lengthbyte, 1); + s->func(s->context, &databyte, 1); +} + +void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data) +{ + unsigned char lengthbyte = STBIW_UCHAR(length); + STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code + s->func(s->context, &lengthbyte, 1); + s->func(s->context, data, length); +} + +void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline) +{ + unsigned char scanlineheader[4] = { 2, 2, 0, 0 }; + unsigned char rgbe[4]; + float linear[3]; + int x; + + scanlineheader[2] = (width&0xff00)>>8; + scanlineheader[3] = (width&0x00ff); + + /* skip RLE for images too small or large */ + if (width < 8 || width >= 32768) { + for (x=0; x < width; x++) { + switch (ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + s->func(s->context, rgbe, 4); + } + } else { + int c,r; + /* encode into scratch buffer */ + for (x=0; x < width; x++) { + switch(ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + scratch[x + width*0] = rgbe[0]; + scratch[x + width*1] = rgbe[1]; + scratch[x + width*2] = rgbe[2]; + scratch[x + width*3] = rgbe[3]; + } + + s->func(s->context, scanlineheader, 4); + + /* RLE each component separately */ + for (c=0; c < 4; c++) { + unsigned char *comp = &scratch[width*c]; + + x = 0; + while (x < width) { + // find first run + r = x; + while (r+2 < width) { + if (comp[r] == comp[r+1] && comp[r] == comp[r+2]) + break; + ++r; + } + if (r+2 >= width) + r = width; + // dump up to first run + while (x < r) { + int len = r-x; + if (len > 128) len = 128; + stbiw__write_dump_data(s, len, &comp[x]); + x += len; + } + // if there's a run, output it + if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd + // find next byte after run + while (r < width && comp[r] == comp[x]) + ++r; + // output run up to r + while (x < r) { + int len = r-x; + if (len > 127) len = 127; + stbiw__write_run_data(s, len, comp[x]); + x += len; + } + } + } + } + } +} + +static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data) +{ + if (y <= 0 || x <= 0 || data == NULL) + return 0; + else { + // Each component is stored separately. Allocate scratch space for full output scanline. + unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4); + int i, len; + char buffer[128]; + char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"; + s->func(s->context, header, sizeof(header)-1); + +#ifdef STBI_MSC_SECURE_CRT + len = sprintf_s(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#else + len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#endif + s->func(s->context, buffer, len); + + for(i=0; i < y; i++) + stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i)*x); + STBIW_FREE(scratch); + return 1; + } +} + +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_hdr_core(&s, x, y, comp, (float *) data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif // STBI_WRITE_NO_STDIO + + +////////////////////////////////////////////////////////////////////////////// +// +// PNG writer +// + +#ifndef STBIW_ZLIB_COMPRESS +// stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size() +#define stbiw__sbraw(a) ((int *) (a) - 2) +#define stbiw__sbm(a) stbiw__sbraw(a)[0] +#define stbiw__sbn(a) stbiw__sbraw(a)[1] + +#define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) +#define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0) +#define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) + +#define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) +#define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) +#define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) + +static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) +{ + int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1; + void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2); + STBIW_ASSERT(p); + if (p) { + if (!*arr) ((int *) p)[1] = 0; + *arr = (void *) ((int *) p + 2); + stbiw__sbm(*arr) = m; + } + return *arr; +} + +static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount) +{ + while (*bitcount >= 8) { + stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer)); + *bitbuffer >>= 8; + *bitcount -= 8; + } + return data; +} + +static int stbiw__zlib_bitrev(int code, int codebits) +{ + int res=0; + while (codebits--) { + res = (res << 1) | (code & 1); + code >>= 1; + } + return res; +} + +static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit) +{ + int i; + for (i=0; i < limit && i < 258; ++i) + if (a[i] != b[i]) break; + return i; +} + +static unsigned int stbiw__zhash(unsigned char *data) +{ + stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16); + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + return hash; +} + +#define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount)) +#define stbiw__zlib_add(code,codebits) \ + (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) +#define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) +// default huffman tables +#define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8) +#define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9) +#define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7) +#define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8) +#define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) +#define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n)) + +#define stbiw__ZHASH 16384 + +#endif // STBIW_ZLIB_COMPRESS + +unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality) +{ +#ifdef STBIW_ZLIB_COMPRESS + // user provided a zlib compress implementation, use that + return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality); +#else // use builtin + static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 }; + static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; + static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 }; + static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; + unsigned int bitbuf=0; + int i,j, bitcount=0; + unsigned char *out = NULL; + unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(char**)); + if (hash_table == NULL) + return NULL; + if (quality < 5) quality = 5; + + stbiw__sbpush(out, 0x78); // DEFLATE 32K window + stbiw__sbpush(out, 0x5e); // FLEVEL = 1 + stbiw__zlib_add(1,1); // BFINAL = 1 + stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman + + for (i=0; i < stbiw__ZHASH; ++i) + hash_table[i] = NULL; + + i=0; + while (i < data_len-3) { + // hash next 3 bytes of data to be compressed + int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3; + unsigned char *bestloc = 0; + unsigned char **hlist = hash_table[h]; + int n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32768) { // if entry lies within window + int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i); + if (d >= best) best=d,bestloc=hlist[j]; + } + } + // when hash table entry is too long, delete half the entries + if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) { + STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality); + stbiw__sbn(hash_table[h]) = quality; + } + stbiw__sbpush(hash_table[h],data+i); + + if (bestloc) { + // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal + h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1); + hlist = hash_table[h]; + n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32767) { + int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1); + if (e > best) { // if next match is better, bail on current match + bestloc = NULL; + break; + } + } + } + } + + if (bestloc) { + int d = (int) (data+i - bestloc); // distance back + STBIW_ASSERT(d <= 32767 && best <= 258); + for (j=0; best > lengthc[j+1]-1; ++j); + stbiw__zlib_huff(j+257); + if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); + for (j=0; d > distc[j+1]-1; ++j); + stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5); + if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]); + i += best; + } else { + stbiw__zlib_huffb(data[i]); + ++i; + } + } + // write out final bytes + for (;i < data_len; ++i) + stbiw__zlib_huffb(data[i]); + stbiw__zlib_huff(256); // end of block + // pad with 0 bits to byte boundary + while (bitcount) + stbiw__zlib_add(0,1); + + for (i=0; i < stbiw__ZHASH; ++i) + (void) stbiw__sbfree(hash_table[i]); + STBIW_FREE(hash_table); + + { + // compute adler32 on input + unsigned int s1=1, s2=0; + int blocklen = (int) (data_len % 5552); + j=0; + while (j < data_len) { + for (i=0; i < blocklen; ++i) s1 += data[j+i], s2 += s1; + s1 %= 65521, s2 %= 65521; + j += blocklen; + blocklen = 5552; + } + stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s2)); + stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s1)); + } + *out_len = stbiw__sbn(out); + // make returned pointer freeable + STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len); + return (unsigned char *) stbiw__sbraw(out); +#endif // STBIW_ZLIB_COMPRESS +} + +static unsigned int stbiw__crc32(unsigned char *buffer, int len) +{ + static unsigned int crc_table[256] = + { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + }; + + unsigned int crc = ~0u; + int i; + for (i=0; i < len; ++i) + crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; + return ~crc; +} + +#define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4) +#define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v)); +#define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3]) + +static void stbiw__wpcrc(unsigned char **data, int len) +{ + unsigned int crc = stbiw__crc32(*data - len - 4, len+4); + stbiw__wp32(*data, crc); +} + +static unsigned char stbiw__paeth(int a, int b, int c) +{ + int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c); + if (pa <= pb && pa <= pc) return STBIW_UCHAR(a); + if (pb <= pc) return STBIW_UCHAR(b); + return STBIW_UCHAR(c); +} + +// @OPTIMIZE: provide an option that always forces left-predict or paeth predict +static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer) +{ + static int mapping[] = { 0,1,2,3,4 }; + static int firstmap[] = { 0,1,0,5,6 }; + int *mymap = (y != 0) ? mapping : firstmap; + int i; + int type = mymap[filter_type]; + unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y); + int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes; + for (i = 0; i < n; ++i) { + switch (type) { + case 0: line_buffer[i] = z[i]; break; + case 1: line_buffer[i] = z[i]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break; + case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break; + case 5: line_buffer[i] = z[i]; break; + case 6: line_buffer[i] = z[i]; break; + } + } + for (i=n; i < width*n; ++i) { + switch (type) { + case 0: line_buffer[i] = z[i]; break; + case 1: line_buffer[i] = z[i] - z[i-n]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break; + case 4: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break; + case 5: line_buffer[i] = z[i] - (z[i-n]>>1); break; + case 6: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break; + } + } +} + +unsigned char *stbi_write_png_to_mem(unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len) +{ + int force_filter = stbi_write_force_png_filter; + int ctype[5] = { -1, 0, 4, 2, 6 }; + unsigned char sig[8] = { 137,80,78,71,13,10,26,10 }; + unsigned char *out,*o, *filt, *zlib; + signed char *line_buffer; + int j,zlen; + + if (stride_bytes == 0) + stride_bytes = x * n; + + if (force_filter >= 5) { + force_filter = -1; + } + + filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0; + line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; } + for (j=0; j < y; ++j) { + int filter_type; + if (force_filter > -1) { + filter_type = force_filter; + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, force_filter, line_buffer); + } else { // Estimate the best filter by running through all of them: + int best_filter = 0, best_filter_val = 0x7fffffff, est, i; + for (filter_type = 0; filter_type < 5; filter_type++) { + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, filter_type, line_buffer); + + // Estimate the entropy of the line using this filter; the less, the better. + est = 0; + for (i = 0; i < x*n; ++i) { + est += abs((signed char) line_buffer[i]); + } + if (est < best_filter_val) { + best_filter_val = est; + best_filter = filter_type; + } + } + if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, best_filter, line_buffer); + filter_type = best_filter; + } + } + // when we get here, filter_type contains the filter type, and line_buffer contains the data + filt[j*(x*n+1)] = (unsigned char) filter_type; + STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); + } + STBIW_FREE(line_buffer); + zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level); + STBIW_FREE(filt); + if (!zlib) return 0; + + // each tag requires 12 bytes of overhead + out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12); + if (!out) return 0; + *out_len = 8 + 12+13 + 12+zlen + 12; + + o=out; + STBIW_MEMMOVE(o,sig,8); o+= 8; + stbiw__wp32(o, 13); // header length + stbiw__wptag(o, "IHDR"); + stbiw__wp32(o, x); + stbiw__wp32(o, y); + *o++ = 8; + *o++ = STBIW_UCHAR(ctype[n]); + *o++ = 0; + *o++ = 0; + *o++ = 0; + stbiw__wpcrc(&o,13); + + stbiw__wp32(o, zlen); + stbiw__wptag(o, "IDAT"); + STBIW_MEMMOVE(o, zlib, zlen); + o += zlen; + STBIW_FREE(zlib); + stbiw__wpcrc(&o, zlen); + + stbiw__wp32(o,0); + stbiw__wptag(o, "IEND"); + stbiw__wpcrc(&o,0); + + STBIW_ASSERT(o == out + *out_len); + + return out; +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes) +{ + FILE *f; + int len; + unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; +#ifdef STBI_MSC_SECURE_CRT + if (fopen_s(&f, filename, "wb")) + f = NULL; +#else + f = fopen(filename, "wb"); +#endif + if (!f) { STBIW_FREE(png); return 0; } + fwrite(png, 1, len, f); + fclose(f); + STBIW_FREE(png); + return 1; +} +#endif + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes) +{ + int len; + unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; + func(context, png, len); + STBIW_FREE(png); + return 1; +} + + +/* *************************************************************************** + * + * JPEG writer + * + * This is based on Jon Olick's jo_jpeg.cpp: + * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html + */ + +static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18, + 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; + +static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) { + int bitBuf = *bitBufP, bitCnt = *bitCntP; + bitCnt += bs[1]; + bitBuf |= bs[0] << (24 - bitCnt); + while(bitCnt >= 8) { + unsigned char c = (bitBuf >> 16) & 255; + stbiw__putc(s, c); + if(c == 255) { + stbiw__putc(s, 0); + } + bitBuf <<= 8; + bitCnt -= 8; + } + *bitBufP = bitBuf; + *bitCntP = bitCnt; +} + +static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) { + float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p; + float z1, z2, z3, z4, z5, z11, z13; + + float tmp0 = d0 + d7; + float tmp7 = d0 - d7; + float tmp1 = d1 + d6; + float tmp6 = d1 - d6; + float tmp2 = d2 + d5; + float tmp5 = d2 - d5; + float tmp3 = d3 + d4; + float tmp4 = d3 - d4; + + // Even part + float tmp10 = tmp0 + tmp3; // phase 2 + float tmp13 = tmp0 - tmp3; + float tmp11 = tmp1 + tmp2; + float tmp12 = tmp1 - tmp2; + + d0 = tmp10 + tmp11; // phase 3 + d4 = tmp10 - tmp11; + + z1 = (tmp12 + tmp13) * 0.707106781f; // c4 + d2 = tmp13 + z1; // phase 5 + d6 = tmp13 - z1; + + // Odd part + tmp10 = tmp4 + tmp5; // phase 2 + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + // The rotator is modified from fig 4-8 to avoid extra negations. + z5 = (tmp10 - tmp12) * 0.382683433f; // c6 + z2 = tmp10 * 0.541196100f + z5; // c2-c6 + z4 = tmp12 * 1.306562965f + z5; // c2+c6 + z3 = tmp11 * 0.707106781f; // c4 + + z11 = tmp7 + z3; // phase 5 + z13 = tmp7 - z3; + + *d5p = z13 + z2; // phase 6 + *d3p = z13 - z2; + *d1p = z11 + z4; + *d7p = z11 - z4; + + *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6; +} + +static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) { + int tmp1 = val < 0 ? -val : val; + val = val < 0 ? val-1 : val; + bits[1] = 1; + while(tmp1 >>= 1) { + ++bits[1]; + } + bits[0] = val & ((1<0)&&(DU[end0pos]==0); --end0pos) { + } + // end0pos = first element in reverse order !=0 + if(end0pos == 0) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + return DU[0]; + } + for(i = 1; i <= end0pos; ++i) { + int startpos = i; + int nrzeroes; + unsigned short bits[2]; + for (; DU[i]==0 && i<=end0pos; ++i) { + } + nrzeroes = i-startpos; + if ( nrzeroes >= 16 ) { + int lng = nrzeroes>>4; + int nrmarker; + for (nrmarker=1; nrmarker <= lng; ++nrmarker) + stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes); + nrzeroes &= 15; + } + stbiw__jpg_calcBits(DU[i], bits); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); + } + if(end0pos != 63) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + } + return DU[0]; +} + +static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) { + // Constants that don't pollute global namespace + static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0}; + static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d}; + static const unsigned char std_ac_luminance_values[] = { + 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08, + 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28, + 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59, + 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, + 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6, + 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2, + 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0}; + static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77}; + static const unsigned char std_ac_chrominance_values[] = { + 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, + 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26, + 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58, + 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, + 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4, + 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda, + 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + // Huffman tables + static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}}; + static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}}; + static const unsigned short YAC_HT[256][2] = { + {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const unsigned short UVAC_HT[256][2] = { + {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22, + 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99}; + static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99, + 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99}; + static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, + 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; + + int row, col, i, k; + float fdtbl_Y[64], fdtbl_UV[64]; + unsigned char YTable[64], UVTable[64]; + + if(!data || !width || !height || comp > 4 || comp < 1) { + return 0; + } + + quality = quality ? quality : 90; + quality = quality < 1 ? 1 : quality > 100 ? 100 : quality; + quality = quality < 50 ? 5000 / quality : 200 - quality * 2; + + for(i = 0; i < 64; ++i) { + int uvti, yti = (YQT[i]*quality+50)/100; + YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti); + uvti = (UVQT[i]*quality+50)/100; + UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); + } + + for(row = 0, k = 0; row < 8; ++row) { + for(col = 0; col < 8; ++col, ++k) { + fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + } + } + + // Write Headers + { + static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 }; + static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 }; + const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width), + 3,1,0x11,0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 }; + s->func(s->context, (void*)head0, sizeof(head0)); + s->func(s->context, (void*)YTable, sizeof(YTable)); + stbiw__putc(s, 1); + s->func(s->context, UVTable, sizeof(UVTable)); + s->func(s->context, (void*)head1, sizeof(head1)); + s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1); + s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values)); + stbiw__putc(s, 0x10); // HTYACinfo + s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1); + s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values)); + stbiw__putc(s, 1); // HTUDCinfo + s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values)); + stbiw__putc(s, 0x11); // HTUACinfo + s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values)); + s->func(s->context, (void*)head2, sizeof(head2)); + } + + // Encode 8x8 macroblocks + { + static const unsigned short fillBits[] = {0x7F, 7}; + const unsigned char *imageData = (const unsigned char *)data; + int DCY=0, DCU=0, DCV=0; + int bitBuf=0, bitCnt=0; + // comp == 2 is grey+alpha (alpha is ignored) + int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; + int x, y, pos; + for(y = 0; y < height; y += 8) { + for(x = 0; x < width; x += 8) { + float YDU[64], UDU[64], VDU[64]; + for(row = y, pos = 0; row < y+8; ++row) { + for(col = x; col < x+8; ++col, ++pos) { + int p = (stbi__flip_vertically_on_write ? height-1-row : row)*width*comp + col*comp; + float r, g, b; + if(row >= height) { + p -= width*comp*(row+1 - height); + } + if(col >= width) { + p -= comp*(col+1 - width); + } + + r = imageData[p+0]; + g = imageData[p+ofsG]; + b = imageData[p+ofsB]; + YDU[pos]=+0.29900f*r+0.58700f*g+0.11400f*b-128; + UDU[pos]=-0.16874f*r-0.33126f*g+0.50000f*b; + VDU[pos]=+0.50000f*r-0.41869f*g-0.08131f*b; + } + } + + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, YDU, fdtbl_Y, DCY, YDC_HT, YAC_HT); + DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, UDU, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); + DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, VDU, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); + } + } + + // Do the bit alignment of the EOI marker + stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits); + } + + // EOI + stbiw__putc(s, 0xFF); + stbiw__putc(s, 0xD9); + + return 1; +} + +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality); +} + + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +#endif // STB_IMAGE_WRITE_IMPLEMENTATION + +/* Revision history + 1.09 (2018-02-11) + fix typo in zlib quality API, improve STB_I_W_STATIC in C++ + 1.08 (2018-01-29) + add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter + 1.07 (2017-07-24) + doc fix + 1.06 (2017-07-23) + writing JPEG (using Jon Olick's code) + 1.05 ??? + 1.04 (2017-03-03) + monochrome BMP expansion + 1.03 ??? + 1.02 (2016-04-02) + avoid allocating large structures on the stack + 1.01 (2016-01-16) + STBIW_REALLOC_SIZED: support allocators with no realloc support + avoid race-condition in crc initialization + minor compile issues + 1.00 (2015-09-14) + installable file IO function + 0.99 (2015-09-13) + warning fixes; TGA rle support + 0.98 (2015-04-08) + added STBIW_MALLOC, STBIW_ASSERT etc + 0.97 (2015-01-18) + fixed HDR asserts, rewrote HDR rle logic + 0.96 (2015-01-17) + add HDR output + fix monochrome BMP + 0.95 (2014-08-17) + add monochrome TGA output + 0.94 (2014-05-31) + rename private functions to avoid conflicts with stb_image.h + 0.93 (2014-05-27) + warning fixes + 0.92 (2010-08-01) + casts to unsigned char to fix warnings + 0.91 (2010-07-17) + first public release + 0.90 first internal release +*/ + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/workloads/realworld/pipeline/darknet/src/tree.c b/workloads/realworld/pipeline/darknet/src/tree.c new file mode 100644 index 0000000000000000000000000000000000000000..67b6d431f6f7e92ede234c71ecae9bd9146dc71f --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/tree.c @@ -0,0 +1,139 @@ +#include +#include +#include "tree.h" +#include "utils.h" +#include "data.h" + +void change_leaves(tree *t, char *leaf_list) +{ + list *llist = get_paths(leaf_list); + char **leaves = (char **)list_to_array(llist); + int n = llist->size; + int i,j; + int found = 0; + for(i = 0; i < t->n; ++i){ + t->leaf[i] = 0; + for(j = 0; j < n; ++j){ + if (0==strcmp(t->name[i], leaves[j])){ + t->leaf[i] = 1; + ++found; + break; + } + } + } + fprintf(stderr, "Found %d leaves.\n", found); +} + +float get_hierarchy_probability(float *x, tree *hier, int c, int stride) +{ + float p = 1; + while(c >= 0){ + p = p * x[c*stride]; + c = hier->parent[c]; + } + return p; +} + +void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride) +{ + int j; + for(j = 0; j < n; ++j){ + int parent = hier->parent[j]; + if(parent >= 0){ + predictions[j*stride] *= predictions[parent*stride]; + } + } + if(only_leaves){ + for(j = 0; j < n; ++j){ + if(!hier->leaf[j]) predictions[j*stride] = 0; + } + } +} + +int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride) +{ + float p = 1; + int group = 0; + int i; + while(1){ + float max = 0; + int max_i = 0; + + for(i = 0; i < hier->group_size[group]; ++i){ + int index = i + hier->group_offset[group]; + float val = predictions[(i + hier->group_offset[group])*stride]; + if(val > max){ + max_i = index; + max = val; + } + } + if(p*max > thresh){ + p = p*max; + group = hier->child[max_i]; + if(hier->child[max_i] < 0) return max_i; + } else if (group == 0){ + return max_i; + } else { + return hier->parent[hier->group_offset[group]]; + } + } + return 0; +} + +tree *read_tree(char *filename) +{ + tree t = {0}; + FILE *fp = fopen(filename, "r"); + + char *line; + int last_parent = -1; + int group_size = 0; + int groups = 0; + int n = 0; + while((line=fgetl(fp)) != 0){ + char *id = calloc(256, sizeof(char)); + int parent = -1; + sscanf(line, "%s %d", id, &parent); + t.parent = realloc(t.parent, (n+1)*sizeof(int)); + t.parent[n] = parent; + + t.child = realloc(t.child, (n+1)*sizeof(int)); + t.child[n] = -1; + + t.name = realloc(t.name, (n+1)*sizeof(char *)); + t.name[n] = id; + if(parent != last_parent){ + ++groups; + t.group_offset = realloc(t.group_offset, groups * sizeof(int)); + t.group_offset[groups - 1] = n - group_size; + t.group_size = realloc(t.group_size, groups * sizeof(int)); + t.group_size[groups - 1] = group_size; + group_size = 0; + last_parent = parent; + } + t.group = realloc(t.group, (n+1)*sizeof(int)); + t.group[n] = groups; + if (parent >= 0) { + t.child[parent] = groups; + } + ++n; + ++group_size; + } + ++groups; + t.group_offset = realloc(t.group_offset, groups * sizeof(int)); + t.group_offset[groups - 1] = n - group_size; + t.group_size = realloc(t.group_size, groups * sizeof(int)); + t.group_size[groups - 1] = group_size; + t.n = n; + t.groups = groups; + t.leaf = calloc(n, sizeof(int)); + int i; + for(i = 0; i < n; ++i) t.leaf[i] = 1; + for(i = 0; i < n; ++i) if(t.parent[i] >= 0) t.leaf[t.parent[i]] = 0; + + fclose(fp); + tree *tree_ptr = calloc(1, sizeof(tree)); + *tree_ptr = t; + //error(0); + return tree_ptr; +} diff --git a/workloads/realworld/pipeline/darknet/src/tree.h b/workloads/realworld/pipeline/darknet/src/tree.h new file mode 100644 index 0000000000000000000000000000000000000000..3802b8ead806266edd291de5407b08c2d7ed5dd1 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/tree.h @@ -0,0 +1,8 @@ +#ifndef TREE_H +#define TREE_H +#include "darknet.h" + +int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride); +float get_hierarchy_probability(float *x, tree *hier, int c, int stride); + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/upsample_layer.c b/workloads/realworld/pipeline/darknet/src/upsample_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..08b44c8973a0a4a7842503369cec0414eff6032a --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/upsample_layer.c @@ -0,0 +1,106 @@ +#include "upsample_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + +layer* make_upsample_layer(int batch, int w, int h, int c, int stride) +{ + layer* l = calloc(1, sizeof(layer)); + l->type = UPSAMPLE; + l->batch = batch; + l->w = w; + l->h = h; + l->c = c; + l->out_w = w*stride; + l->out_h = h*stride; + l->out_c = c; + if(stride < 0){ + stride = -stride; + l->reverse=1; + l->out_w = w/stride; + l->out_h = h/stride; + } + l->stride = stride; + l->outputs = l->out_w*l->out_h*l->out_c; + l->inputs = l->w*l->h*l->c; + l->delta = calloc(l->outputs*batch, sizeof(float)); + l->output = calloc(l->outputs*batch, sizeof(float));; + + l->forward = forward_upsample_layer; + l->backward = backward_upsample_layer; + #ifdef GPU + l->forward_gpu = forward_upsample_layer_gpu; + l->backward_gpu = backward_upsample_layer_gpu; + + // l->delta_gpu = cuda_make_array(l->delta, l->outputs*batch); + // l->output_gpu = cuda_make_array(l->output, l->outputs*batch); + #endif + if(l->reverse) fprintf(stderr, "downsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l->out_w, l->out_h, l->out_c); + else fprintf(stderr, "upsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l->out_w, l->out_h, l->out_c); + return l; +} + +void resize_upsample_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + l->out_w = w*l->stride; + l->out_h = h*l->stride; + if(l->reverse){ + l->out_w = w/l->stride; + l->out_h = h/l->stride; + } + l->outputs = l->out_w*l->out_h*l->out_c; + l->inputs = l->h*l->w*l->c; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + +void forward_upsample_layer(layer* l, network net) +{ + fill_cpu(l->outputs*l->batch, 0, l->output, 1); + if(l->reverse){ + upsample_cpu(l->output, l->out_w, l->out_h, l->c, l->batch, l->stride, 0, l->scale, net.input); + }else{ + upsample_cpu(net.input, l->w, l->h, l->c, l->batch, l->stride, 1, l->scale, l->output); + } +} + +void backward_upsample_layer(layer* l, network net) +{ + if(l->reverse){ + upsample_cpu(l->delta, l->out_w, l->out_h, l->c, l->batch, l->stride, 1, l->scale, net.delta); + }else{ + upsample_cpu(net.delta, l->w, l->h, l->c, l->batch, l->stride, 0, l->scale, l->delta); + } +} + +#ifdef GPU +void forward_upsample_layer_gpu(layer* l, network net) +{ + fill_gpu(l->outputs*l->batch, 0, l->output_gpu, 1); + if(l->reverse){ + upsample_gpu(l->output_gpu, l->out_w, l->out_h, l->c, l->batch, l->stride, 0, l->scale, net.input_gpu); + }else{ + upsample_gpu(net.input_gpu, l->w, l->h, l->c, l->batch, l->stride, 1, l->scale, l->output_gpu); + } +} + +void backward_upsample_layer_gpu(layer* l, network net) +{ + if(l->reverse){ + upsample_gpu(l->delta_gpu, l->out_w, l->out_h, l->c, l->batch, l->stride, 1, l->scale, net.delta_gpu); + }else{ + upsample_gpu(net.delta_gpu, l->w, l->h, l->c, l->batch, l->stride, 0, l->scale, l->delta_gpu); + } +} +#endif diff --git a/workloads/realworld/pipeline/darknet/src/upsample_layer.h b/workloads/realworld/pipeline/darknet/src/upsample_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..b3f701095a5d5758fd43fed1045fe147c7579d6c --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/upsample_layer.h @@ -0,0 +1,15 @@ +#ifndef UPSAMPLE_LAYER_H +#define UPSAMPLE_LAYER_H +#include "darknet.h" + +layer* make_upsample_layer(int batch, int w, int h, int c, int stride); +void forward_upsample_layer(layer* l, network net); +void backward_upsample_layer(layer* l, network net); +void resize_upsample_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_upsample_layer_gpu(layer* l, network net); +void backward_upsample_layer_gpu(layer* l, network net); +#endif + +#endif diff --git a/workloads/realworld/pipeline/darknet/src/utils.c b/workloads/realworld/pipeline/darknet/src/utils.c new file mode 100644 index 0000000000000000000000000000000000000000..626b4678c1e2779552ed9d34f19ce4b0f57d9ded --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/utils.c @@ -0,0 +1,726 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + + +/* +// old timing. is it better? who knows!! +double get_wall_time() +{ + struct timeval time; + if (gettimeofday(&time,NULL)){ + return 0; + } + return (double)time.tv_sec + (double)time.tv_usec * .000001; +} +*/ + +double what_time_is_it_now() +{ + struct timeval time; + if (gettimeofday(&time,NULL)){ + return 0; + } + return (double)time.tv_sec + (double)time.tv_usec * .000001; +} + +int *read_intlist(char *gpu_list, int *ngpus, int d) +{ + int *gpus = 0; + if(gpu_list){ + int len = strlen(gpu_list); + *ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++*ngpus; + } + gpus = calloc(*ngpus, sizeof(int)); + for(i = 0; i < *ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpus = calloc(1, sizeof(float)); + *gpus = d; + *ngpus = 1; + } + return gpus; +} + +int *read_map(char *filename) +{ + int n = 0; + int *map = 0; + char *str; + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + while((str=fgetl(file))){ + ++n; + map = realloc(map, n*sizeof(int)); + map[n-1] = atoi(str); + } + return map; +} + +void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections) +{ + size_t i; + for(i = 0; i < sections; ++i){ + size_t start = n*i/sections; + size_t end = n*(i+1)/sections; + size_t num = end-start; + shuffle(arr+(start*size), num, size); + } +} + +void shuffle(void *arr, size_t n, size_t size) +{ + size_t i; + void *swp = calloc(1, size); + for(i = 0; i < n-1; ++i){ + size_t j = i + rand()/(RAND_MAX / (n-i)+1); + memcpy(swp, arr+(j*size), size); + memcpy(arr+(j*size), arr+(i*size), size); + memcpy(arr+(i*size), swp, size); + } +} + +int *random_index_order(int min, int max) +{ + int *inds = calloc(max-min, sizeof(int)); + int i; + for(i = min; i < max; ++i){ + inds[i] = i; + } + for(i = min; i < max-1; ++i){ + int swap = inds[i]; + int index = i + rand()%(max-i); + inds[i] = inds[index]; + inds[index] = swap; + } + return inds; +} + +void del_arg(int argc, char **argv, int index) +{ + int i; + for(i = index; i < argc-1; ++i) argv[i] = argv[i+1]; + argv[i] = 0; +} + +int find_arg(int argc, char* argv[], char *arg) +{ + int i; + for(i = 0; i < argc; ++i) { + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)) { + del_arg(argc, argv, i); + return 1; + } + } + return 0; +} + +int find_int_arg(int argc, char **argv, char *arg, int def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = atoi(argv[i+1]); + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + +float find_float_arg(int argc, char **argv, char *arg, float def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = atof(argv[i+1]); + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + +char *find_char_arg(int argc, char **argv, char *arg, char *def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = argv[i+1]; + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + + +char *basecfg(char *cfgfile) +{ + char *c = cfgfile; + char *next; + while((next = strchr(c, '/'))) + { + c = next+1; + } + c = copy_string(c); + next = strchr(c, '.'); + if (next) *next = 0; + return c; +} + +int alphanum_to_int(char c) +{ + return (c < 58) ? c - 48 : c-87; +} +char int_to_alphanum(int i) +{ + if (i == 36) return '.'; + return (i < 10) ? i + 48 : i + 87; +} + +void pm(int M, int N, float *A) +{ + int i,j; + for(i =0 ; i < M; ++i){ + printf("%d ", i+1); + for(j = 0; j < N; ++j){ + printf("%2.4f, ", A[i*N+j]); + } + printf("\n"); + } + printf("\n"); +} + +void find_replace(char *str, char *orig, char *rep, char *output) +{ + char buffer[4096] = {0}; + char *p; + + sprintf(buffer, "%s", str); + if(!(p = strstr(buffer, orig))){ // Is 'orig' even in 'str'? + sprintf(output, "%s", str); + return; + } + + *p = '\0'; + + sprintf(output, "%s%s%s", buffer, rep, p+strlen(orig)); +} + +float sec(clock_t clocks) +{ + return (float)clocks/CLOCKS_PER_SEC; +} + +void top_k(float *a, int n, int k, int *index) +{ + int i,j; + for(j = 0; j < k; ++j) index[j] = -1; + for(i = 0; i < n; ++i){ + int curr = i; + for(j = 0; j < k; ++j){ + if((index[j] < 0) || a[curr] > a[index[j]]){ + int swap = curr; + curr = index[j]; + index[j] = swap; + } + } + } +} + +void error(const char *s) +{ + perror(s); + assert(0); + exit(-1); +} + +unsigned char *read_file(char *filename) +{ + FILE *fp = fopen(filename, "rb"); + size_t size; + + fseek(fp, 0, SEEK_END); + size = ftell(fp); + fseek(fp, 0, SEEK_SET); + + unsigned char *text = calloc(size+1, sizeof(char)); + fread(text, 1, size, fp); + fclose(fp); + return text; +} + +void malloc_error() +{ + fprintf(stderr, "Malloc error\n"); + exit(-1); +} + +void file_error(char *s) +{ + fprintf(stderr, "Couldn't open file: %s\n", s); + exit(0); +} + +list *split_str(char *s, char delim) +{ + size_t i; + size_t len = strlen(s); + list *l = make_list(); + list_insert(l, s); + for(i = 0; i < len; ++i){ + if(s[i] == delim){ + s[i] = '\0'; + list_insert(l, &(s[i+1])); + } + } + return l; +} + +void strip(char *s) +{ + size_t i; + size_t len = strlen(s); + size_t offset = 0; + for(i = 0; i < len; ++i){ + char c = s[i]; + if(c==' '||c=='\t'||c=='\n') ++offset; + else s[i-offset] = c; + } + s[len-offset] = '\0'; +} + +void strip_char(char *s, char bad) +{ + size_t i; + size_t len = strlen(s); + size_t offset = 0; + for(i = 0; i < len; ++i){ + char c = s[i]; + if(c==bad) ++offset; + else s[i-offset] = c; + } + s[len-offset] = '\0'; +} + +void free_ptrs(void **ptrs, int n) +{ + int i; + for(i = 0; i < n; ++i) free(ptrs[i]); + free(ptrs); +} + +char *fgetl(FILE *fp) +{ + if(feof(fp)) return 0; + size_t size = 512; + char *line = malloc(size*sizeof(char)); + if(!fgets(line, size, fp)){ + free(line); + return 0; + } + + size_t curr = strlen(line); + + while((line[curr-1] != '\n') && !feof(fp)){ + if(curr == size-1){ + size *= 2; + line = realloc(line, size*sizeof(char)); + if(!line) { + printf("%ld\n", size); + malloc_error(); + } + } + size_t readsize = size-curr; + if(readsize > INT_MAX) readsize = INT_MAX-1; + fgets(&line[curr], readsize, fp); + curr = strlen(line); + } + if(line[curr-1] == '\n') line[curr-1] = '\0'; + + return line; +} + +int read_int(int fd) +{ + int n = 0; + int next = read(fd, &n, sizeof(int)); + if(next <= 0) return -1; + return n; +} + +void write_int(int fd, int n) +{ + int next = write(fd, &n, sizeof(int)); + if(next <= 0) error("read failed"); +} + +int read_all_fail(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + int next = read(fd, buffer + n, bytes-n); + if(next <= 0) return 1; + n += next; + } + return 0; +} + +int write_all_fail(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + size_t next = write(fd, buffer + n, bytes-n); + if(next <= 0) return 1; + n += next; + } + return 0; +} + +void read_all(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + int next = read(fd, buffer + n, bytes-n); + if(next <= 0) error("read failed"); + n += next; + } +} + +void write_all(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + size_t next = write(fd, buffer + n, bytes-n); + if(next <= 0) error("write failed"); + n += next; + } +} + + +char *copy_string(char *s) +{ + char *copy = malloc(strlen(s)+1); + strncpy(copy, s, strlen(s)+1); + return copy; +} + +list *parse_csv_line(char *line) +{ + list *l = make_list(); + char *c, *p; + int in = 0; + for(c = line, p = line; *c != '\0'; ++c){ + if(*c == '"') in = !in; + else if(*c == ',' && !in){ + *c = '\0'; + list_insert(l, copy_string(p)); + p = c+1; + } + } + list_insert(l, copy_string(p)); + return l; +} + +int count_fields(char *line) +{ + int count = 0; + int done = 0; + char *c; + for(c = line; !done; ++c){ + done = (*c == '\0'); + if(*c == ',' || done) ++count; + } + return count; +} + +float *parse_fields(char *line, int n) +{ + float *field = calloc(n, sizeof(float)); + char *c, *p, *end; + int count = 0; + int done = 0; + for(c = line, p = line; !done; ++c){ + done = (*c == '\0'); + if(*c == ',' || done){ + *c = '\0'; + field[count] = strtod(p, &end); + if(p == c) field[count] = nan(""); + if(end != c && (end != c-1 || *end != '\r')) field[count] = nan(""); //DOS file formats! + p = c+1; + ++count; + } + } + return field; +} + +float sum_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i) sum += a[i]; + return sum; +} + +float mean_array(float *a, int n) +{ + return sum_array(a,n)/n; +} + +void mean_arrays(float **a, int n, int els, float *avg) +{ + int i; + int j; + memset(avg, 0, els*sizeof(float)); + for(j = 0; j < n; ++j){ + for(i = 0; i < els; ++i){ + avg[i] += a[j][i]; + } + } + for(i = 0; i < els; ++i){ + avg[i] /= n; + } +} + +void print_statistics(float *a, int n) +{ + float m = mean_array(a, n); + float v = variance_array(a, n); + printf("MSE: %.6f, Mean: %.6f, Variance: %.6f\n", mse_array(a, n), m, v); +} + +float variance_array(float *a, int n) +{ + int i; + float sum = 0; + float mean = mean_array(a, n); + for(i = 0; i < n; ++i) sum += (a[i] - mean)*(a[i]-mean); + float variance = sum/n; + return variance; +} + +int constrain_int(int a, int min, int max) +{ + if (a < min) return min; + if (a > max) return max; + return a; +} + +float constrain(float min, float max, float a) +{ + if (a < min) return min; + if (a > max) return max; + return a; +} + +float dist_array(float *a, float *b, int n, int sub) +{ + int i; + float sum = 0; + for(i = 0; i < n; i += sub) sum += pow(a[i]-b[i], 2); + return sqrt(sum); +} + +float mse_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i) sum += a[i]*a[i]; + return sqrt(sum/n); +} + +void normalize_array(float *a, int n) +{ + int i; + float mu = mean_array(a,n); + float sigma = sqrt(variance_array(a,n)); + for(i = 0; i < n; ++i){ + a[i] = (a[i] - mu)/sigma; + } + mu = mean_array(a,n); + sigma = sqrt(variance_array(a,n)); +} + +void translate_array(float *a, int n, float s) +{ + int i; + for(i = 0; i < n; ++i){ + a[i] += s; + } +} + +float mag_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + sum += a[i]*a[i]; + } + return sqrt(sum); +} + +void scale_array(float *a, int n, float s) +{ + int i; + for(i = 0; i < n; ++i){ + a[i] *= s; + } +} + +int sample_array(float *a, int n) +{ + float sum = sum_array(a, n); + scale_array(a, n, 1./sum); + float r = rand_uniform(0, 1); + int i; + for(i = 0; i < n; ++i){ + r = r - a[i]; + if (r <= 0) return i; + } + return n-1; +} + +int max_int_index(int *a, int n) +{ + if(n <= 0) return -1; + int i, max_i = 0; + int max = a[0]; + for(i = 1; i < n; ++i){ + if(a[i] > max){ + max = a[i]; + max_i = i; + } + } + return max_i; +} + +int max_index(float *a, int n) +{ + if(n <= 0) return -1; + int i, max_i = 0; + float max = a[0]; + for(i = 1; i < n; ++i){ + if(a[i] > max){ + max = a[i]; + max_i = i; + } + } + return max_i; +} + +int int_index(int *a, int val, int n) +{ + int i; + for(i = 0; i < n; ++i){ + if(a[i] == val) return i; + } + return -1; +} + +int rand_int(int min, int max) +{ + if (max < min){ + int s = min; + min = max; + max = s; + } + int r = (rand()%(max - min + 1)) + min; + return r; +} + +// From http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform +float rand_normal() +{ + static int haveSpare = 0; + static double rand1, rand2; + + if(haveSpare) + { + haveSpare = 0; + return sqrt(rand1) * sin(rand2); + } + + haveSpare = 1; + + rand1 = rand() / ((double) RAND_MAX); + if(rand1 < 1e-100) rand1 = 1e-100; + rand1 = -2 * log(rand1); + rand2 = (rand() / ((double) RAND_MAX)) * TWO_PI; + + return sqrt(rand1) * cos(rand2); +} + +/* + float rand_normal() + { + int n = 12; + int i; + float sum= 0; + for(i = 0; i < n; ++i) sum += (float)rand()/RAND_MAX; + return sum-n/2.; + } + */ + +size_t rand_size_t() +{ + return ((size_t)(rand()&0xff) << 56) | + ((size_t)(rand()&0xff) << 48) | + ((size_t)(rand()&0xff) << 40) | + ((size_t)(rand()&0xff) << 32) | + ((size_t)(rand()&0xff) << 24) | + ((size_t)(rand()&0xff) << 16) | + ((size_t)(rand()&0xff) << 8) | + ((size_t)(rand()&0xff) << 0); +} + +float rand_uniform(float min, float max) +{ + if(max < min){ + float swap = min; + min = max; + max = swap; + } + return ((float)rand()/RAND_MAX * (max - min)) + min; +} + +float rand_scale(float s) +{ + float scale = rand_uniform(1, s); + if(rand()%2) return scale; + return 1./scale; +} + +float **one_hot_encode(float *a, int n, int k) +{ + int i; + float **t = calloc(n, sizeof(float*)); + for(i = 0; i < n; ++i){ + t[i] = calloc(k, sizeof(float)); + int index = (int)a[i]; + t[i][index] = 1; + } + return t; +} + diff --git a/workloads/realworld/pipeline/darknet/src/utils.h b/workloads/realworld/pipeline/darknet/src/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..ef24da79888612f5b48fbb4dc233c483590e0c34 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/utils.h @@ -0,0 +1,53 @@ +#ifndef UTILS_H +#define UTILS_H +#include +#include +#include "darknet.h" +#include "list.h" + +#define TIME(a) \ + do { \ + double start = what_time_is_it_now(); \ + a; \ + printf("%s took: %f seconds\n", #a, what_time_is_it_now() - start); \ + } while (0) + +#define TWO_PI 6.2831853071795864769252866f + +double what_time_is_it_now(); +void shuffle(void *arr, size_t n, size_t size); +void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections); +void free_ptrs(void **ptrs, int n); +int alphanum_to_int(char c); +char int_to_alphanum(int i); +int read_int(int fd); +void write_int(int fd, int n); +void read_all(int fd, char *buffer, size_t bytes); +void write_all(int fd, char *buffer, size_t bytes); +int read_all_fail(int fd, char *buffer, size_t bytes); +int write_all_fail(int fd, char *buffer, size_t bytes); +void find_replace(char *str, char *orig, char *rep, char *output); +void malloc_error(); +void file_error(char *s); +void strip(char *s); +void strip_char(char *s, char bad); +list *split_str(char *s, char delim); +char *fgetl(FILE *fp); +list *parse_csv_line(char *line); +char *copy_string(char *s); +int count_fields(char *line); +float *parse_fields(char *line, int n); +void translate_array(float *a, int n, float s); +float constrain(float min, float max, float a); +int constrain_int(int a, int min, int max); +float rand_scale(float s); +int rand_int(int min, int max); +void mean_arrays(float **a, int n, int els, float *avg); +float dist_array(float *a, float *b, int n, int sub); +float **one_hot_encode(float *a, int n, int k); +float sec(clock_t clocks); +void print_statistics(float *a, int n); +int int_index(int *a, int val, int n); + +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/yolo_layer.c b/workloads/realworld/pipeline/darknet/src/yolo_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..07ff4310ee9feed59b7d218dc56b5014b5f3596f --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/yolo_layer.c @@ -0,0 +1,374 @@ +#include "yolo_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer* make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes) +{ + int i; + layer* l = calloc(1, sizeof(layer)); + l->type = YOLO; + + l->n = n; + l->total = total; + l->batch = batch; + l->h = h; + l->w = w; + l->c = n*(classes + 4 + 1); + l->out_w = l->w; + l->out_h = l->h; + l->out_c = l->c; + l->classes = classes; + l->cost = calloc(1, sizeof(float)); + l->biases = calloc(total*2, sizeof(float)); + if(mask) l->mask = mask; + else{ + l->mask = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + l->mask[i] = i; + } + } + l->bias_updates = calloc(n*2, sizeof(float)); + l->outputs = h*w*n*(classes + 4 + 1); + l->inputs = l->outputs; + l->truths = 90*(4 + 1); + l->delta = calloc(batch*l->outputs, sizeof(float)); + l->output = calloc(batch*l->outputs, sizeof(float)); + for(i = 0; i < total*2; ++i){ + l->biases[i] = .5; + } + + l->forward = forward_yolo_layer; + l->backward = backward_yolo_layer; +#ifdef GPU + l->forward_gpu = forward_yolo_layer_gpu; + l->backward_gpu = backward_yolo_layer_gpu; + // l->output_gpu = cuda_make_array(l->output, batch*l->outputs); + // l->delta_gpu = cuda_make_array(l->delta, batch*l->outputs); +#endif + + fprintf(stderr, "yolo\n"); + srand(0); + + return l; +} + +void resize_yolo_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->n*(l->classes + 4 + 1); + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride) +{ + box b; + b.x = (i + x[index + 0*stride]) / lw; + b.y = (j + x[index + 1*stride]) / lh; + b.w = exp(x[index + 2*stride]) * biases[2*n] / w; + b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h; + return b; +} + +float delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride) +{ + box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride); + float iou = box_iou(pred, truth); + + float tx = (truth.x*lw - i); + float ty = (truth.y*lh - j); + float tw = log(truth.w*w / biases[2*n]); + float th = log(truth.h*h / biases[2*n + 1]); + + delta[index + 0*stride] = scale * (tx - x[index + 0*stride]); + delta[index + 1*stride] = scale * (ty - x[index + 1*stride]); + delta[index + 2*stride] = scale * (tw - x[index + 2*stride]); + delta[index + 3*stride] = scale * (th - x[index + 3*stride]); + return iou; +} + + +void delta_yolo_class(float *output, float *delta, int index, int class_, int classes, int stride, float *avg_cat) +{ + int n; + if (delta[index]){ + delta[index + stride*class_] = 1 - output[index + stride*class_]; + if(avg_cat) *avg_cat += output[index + stride*class_]; + return; + } + for(n = 0; n < classes; ++n){ + delta[index + stride*n] = ((n == class_)?1 : 0) - output[index + stride*n]; + if(n == class_ && avg_cat) *avg_cat += output[index + stride*n]; + } +} + +static int entry_index(layer* l, int batch, int location, int entry) +{ + int n = location / (l->w*l->h); + int loc = location % (l->w*l->h); + return batch*l->outputs + n*l->w*l->h*(4+l->classes+1) + entry*l->w*l->h + loc; +} + +void forward_yolo_layer(layer* l, network net) +{ + int i,j,b,t,n; + memcpy(l->output, net.input, l->outputs*l->batch*sizeof(float)); + +#ifndef GPU + for (b = 0; b < l->batch; ++b){ + for(n = 0; n < l->n; ++n){ + int index = entry_index(l, b, n*l->w*l->h, 0); + activate_array(l->output + index, 2*l->w*l->h, LOGISTIC); + index = entry_index(l, b, n*l->w*l->h, 4); + activate_array(l->output + index, (1+l->classes)*l->w*l->h, LOGISTIC); + } + } +#endif + + memset(l->delta, 0, l->outputs * l->batch * sizeof(float)); + if(!net.train) return; + float avg_iou = 0; + float recall = 0; + float recall75 = 0; + float avg_cat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + int class_count = 0; + *(l->cost) = 0; + for (b = 0; b < l->batch; ++b) { + for (j = 0; j < l->h; ++j) { + for (i = 0; i < l->w; ++i) { + for (n = 0; n < l->n; ++n) { + int box_index = entry_index(l, b, n*l->w*l->h + j*l->w + i, 0); + box pred = get_yolo_box(l->output, l->biases, l->mask[n], box_index, i, j, l->w, l->h, net.w, net.h, l->w*l->h); + float best_iou = 0; + int best_t = 0; + for(t = 0; t < l->max_boxes; ++t){ + box truth = float_to_box(net.truth + t*(4 + 1) + b*l->truths, 1); + if(!truth.x) break; + float iou = box_iou(pred, truth); + if (iou > best_iou) { + best_iou = iou; + best_t = t; + } + } + int obj_index = entry_index(l, b, n*l->w*l->h + j*l->w + i, 4); + avg_anyobj += l->output[obj_index]; + l->delta[obj_index] = 0 - l->output[obj_index]; + if (best_iou > l->ignore_thresh) { + l->delta[obj_index] = 0; + } + if (best_iou > l->truth_thresh) { + l->delta[obj_index] = 1 - l->output[obj_index]; + + int class_ = net.truth[best_t*(4 + 1) + b*l->truths + 4]; + if (l->map) class_ = l->map[class_]; + int class_index = entry_index(l, b, n*l->w*l->h + j*l->w + i, 4 + 1); + delta_yolo_class(l->output, l->delta, class_index, class_, l->classes, l->w*l->h, 0); + box truth = float_to_box(net.truth + best_t*(4 + 1) + b*l->truths, 1); + delta_yolo_box(truth, l->output, l->biases, l->mask[n], box_index, i, j, l->w, l->h, net.w, net.h, l->delta, (2-truth.w*truth.h), l->w*l->h); + } + } + } + } + for(t = 0; t < l->max_boxes; ++t){ + box truth = float_to_box(net.truth + t*(4 + 1) + b*l->truths, 1); + + if(!truth.x) break; + float best_iou = 0; + int best_n = 0; + i = (truth.x * l->w); + j = (truth.y * l->h); + box truth_shift = truth; + truth_shift.x = truth_shift.y = 0; + for(n = 0; n < l->total; ++n){ + box pred = {0}; + pred.w = l->biases[2*n]/net.w; + pred.h = l->biases[2*n+1]/net.h; + float iou = box_iou(pred, truth_shift); + if (iou > best_iou){ + best_iou = iou; + best_n = n; + } + } + + int mask_n = int_index(l->mask, best_n, l->n); + if(mask_n >= 0){ + int box_index = entry_index(l, b, mask_n*l->w*l->h + j*l->w + i, 0); + float iou = delta_yolo_box(truth, l->output, l->biases, best_n, box_index, i, j, l->w, l->h, net.w, net.h, l->delta, (2-truth.w*truth.h), l->w*l->h); + + int obj_index = entry_index(l, b, mask_n*l->w*l->h + j*l->w + i, 4); + avg_obj += l->output[obj_index]; + l->delta[obj_index] = 1 - l->output[obj_index]; + + int class_ = net.truth[t*(4 + 1) + b*l->truths + 4]; + if (l->map) class_ = l->map[class_]; + int class_index = entry_index(l, b, mask_n*l->w*l->h + j*l->w + i, 4 + 1); + delta_yolo_class(l->output, l->delta, class_index, class_, l->classes, l->w*l->h, &avg_cat); + + ++count; + ++class_count; + if(iou > .5) recall += 1; + if(iou > .75) recall75 += 1; + avg_iou += iou; + } + } + } + *(l->cost) = pow(mag_array(l->delta, l->outputs * l->batch), 2); + printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", net.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l->w*l->h*l->n*l->batch), recall/count, recall75/count, count); +} + +void backward_yolo_layer(layer* l, network net) +{ + axpy_cpu(l->batch*l->inputs, 1, l->delta, 1, net.delta, 1); +} + +void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +{ + int i; + int new_w=0; + int new_h=0; + if (((float)netw/w) < ((float)neth/h)) { + new_w = netw; + new_h = (h * netw)/w; + } else { + new_h = neth; + new_w = (w * neth)/h; + } + for (i = 0; i < n; ++i){ + box b = dets[i].bbox; + b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); + b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); + b.w *= (float)netw/new_w; + b.h *= (float)neth/new_h; + if(!relative){ + b.x *= w; + b.w *= w; + b.y *= h; + b.h *= h; + } + dets[i].bbox = b; + } +} + +int yolo_num_detections(layer* l, float thresh) +{ + int i, n; + int count = 0; + for (i = 0; i < l->w*l->h; ++i){ + for(n = 0; n < l->n; ++n){ + int obj_index = entry_index(l, 0, n*l->w*l->h + i, 4); + if(l->output[obj_index] > thresh){ + ++count; + } + } + } + return count; +} + +void avg_flipped_yolo(layer* l) +{ + int i,j,n,z; + float *flip = l->output + l->outputs; + for (j = 0; j < l->h; ++j) { + for (i = 0; i < l->w/2; ++i) { + for (n = 0; n < l->n; ++n) { + for(z = 0; z < l->classes + 4 + 1; ++z){ + int i1 = z*l->w*l->h*l->n + n*l->w*l->h + j*l->w + i; + int i2 = z*l->w*l->h*l->n + n*l->w*l->h + j*l->w + (l->w - i - 1); + float swap = flip[i1]; + flip[i1] = flip[i2]; + flip[i2] = swap; + if(z == 0){ + flip[i1] = -flip[i1]; + flip[i2] = -flip[i2]; + } + } + } + } + } + for(i = 0; i < l->outputs; ++i){ + l->output[i] = (l->output[i] + flip[i])/2.; + } +} + +int get_yolo_detections(layer* l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets) +{ + int i,j,n; + float *predictions = l->output; + if (l->batch == 2) avg_flipped_yolo(l); + int count = 0; + for (i = 0; i < l->w*l->h; ++i){ + int row = i / l->w; + int col = i % l->w; + for(n = 0; n < l->n; ++n){ + int obj_index = entry_index(l, 0, n*l->w*l->h + i, 4); + float objectness = predictions[obj_index]; + if(objectness <= thresh) continue; + int box_index = entry_index(l, 0, n*l->w*l->h + i, 0); + dets[count].bbox = get_yolo_box(predictions, l->biases, l->mask[n], box_index, col, row, l->w, l->h, netw, neth, l->w*l->h); + dets[count].objectness = objectness; + dets[count].classes = l->classes; + for(j = 0; j < l->classes; ++j){ + int class_index = entry_index(l, 0, n*l->w*l->h + i, 4 + 1 + j); + float prob = objectness*predictions[class_index]; + dets[count].prob[j] = (prob > thresh) ? prob : 0; + } + ++count; + } + } + correct_yolo_boxes(dets, count, w, h, netw, neth, relative); + return count; +} + +#ifdef GPU + +void forward_yolo_layer_gpu(layer* l, network net) +{ + copy_gpu(l->batch*l->inputs, net.input_gpu, 1, l->output_gpu, 1, net.streams[l->stream_index]); + int b, n; + for (b = 0; b < l->batch; ++b){ + for(n = 0; n < l->n; ++n){ + int index = entry_index(l, b, n*l->w*l->h, 0); + activate_array_gpu(l->output_gpu + index, 2*l->w*l->h, LOGISTIC, net.streams[l->stream_index]); + index = entry_index(l, b, n*l->w*l->h, 4); + activate_array_gpu(l->output_gpu + index, (1+l->classes)*l->w*l->h, LOGISTIC, net.streams[l->stream_index]); + } + } + if(!net.train || l->onlyforward){ + cuda_pull_array(l->output_gpu, l->output, l->batch*l->outputs); + return; + } + + cuda_pull_array(l->output_gpu, net.input, l->batch*l->inputs); + forward_yolo_layer(l, net); + cuda_push_array(l->delta_gpu, l->delta, l->batch*l->outputs); +} + +void backward_yolo_layer_gpu(layer* l, network net) +{ + axpy_gpu(l->batch*l->inputs, 1, l->delta_gpu, 1, net.delta_gpu, 1, net.streams[l->stream_index]); +} +#endif + diff --git a/workloads/realworld/pipeline/darknet/src/yolo_layer.h b/workloads/realworld/pipeline/darknet/src/yolo_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..174c2bba3de4932e991753733029bfeefa698227 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/src/yolo_layer.h @@ -0,0 +1,19 @@ +#ifndef YOLO_LAYER_H +#define YOLO_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer* make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes); +void forward_yolo_layer(layer* l, network net); +void backward_yolo_layer(layer* l, network net); +void resize_yolo_layer(layer *l, int w, int h); +int yolo_num_detections(layer* l, float thresh); + +#ifdef GPU +void forward_yolo_layer_gpu(layer* l, network net); +void backward_yolo_layer_gpu(layer* l, network net); +#endif + +#endif diff --git a/workloads/realworld/pipeline/darknet/yolov3-tiny/predictions.jpg b/workloads/realworld/pipeline/darknet/yolov3-tiny/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0ee103d8066df7ffec67b555431c058709c9530d Binary files /dev/null and b/workloads/realworld/pipeline/darknet/yolov3-tiny/predictions.jpg differ diff --git a/workloads/realworld/pipeline/darknet/yolov3-tiny/run_super.sh b/workloads/realworld/pipeline/darknet/yolov3-tiny/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..d890a7c581dc59167fab1b93778d143fc7e679a9 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/yolov3-tiny/run_super.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg diff --git a/workloads/realworld/pipeline/darknet/yolov3-tiny/run_yolov3-tiny.sh b/workloads/realworld/pipeline/darknet/yolov3-tiny/run_yolov3-tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..d890a7c581dc59167fab1b93778d143fc7e679a9 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/yolov3-tiny/run_yolov3-tiny.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg diff --git a/workloads/realworld/pipeline/darknet/yolov3-tiny_b/run_super.sh b/workloads/realworld/pipeline/darknet/yolov3-tiny_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..31669e62fb94142e7dc24b3f905c8f1d25950367 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/yolov3-tiny_b/run_super.sh @@ -0,0 +1,2 @@ +#../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg +../darknet detector infer ../cfg/coco.data ../cfg/yolov3-tiny_b.cfg diff --git a/workloads/realworld/pipeline/darknet/yolov3-tiny_t/run_super.sh b/workloads/realworld/pipeline/darknet/yolov3-tiny_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..6cc56bc601476fa212f615b5aec964f12e044473 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/yolov3-tiny_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg +../darknet detector train ../cfg/coco.data ../cfg/yolov3-tiny_t.cfg diff --git a/workloads/realworld/pipeline/darknet/yolov3/predictions.jpg b/workloads/realworld/pipeline/darknet/yolov3/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..49c1abe30cdcdceadad4353da30ce5660c96be1a Binary files /dev/null and b/workloads/realworld/pipeline/darknet/yolov3/predictions.jpg differ diff --git a/workloads/realworld/pipeline/darknet/yolov3/run_super.sh b/workloads/realworld/pipeline/darknet/yolov3/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..440a4b456a80a434243dffa9614d5a501790990f --- /dev/null +++ b/workloads/realworld/pipeline/darknet/yolov3/run_super.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg diff --git a/workloads/realworld/pipeline/darknet/yolov3/run_yolov3.sh b/workloads/realworld/pipeline/darknet/yolov3/run_yolov3.sh new file mode 100755 index 0000000000000000000000000000000000000000..440a4b456a80a434243dffa9614d5a501790990f --- /dev/null +++ b/workloads/realworld/pipeline/darknet/yolov3/run_yolov3.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg diff --git a/workloads/realworld/pipeline/darknet/yolov3_b/run_super.sh b/workloads/realworld/pipeline/darknet/yolov3_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..913790c1ee53a0d442a89306fbd8bda93faa2581 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/yolov3_b/run_super.sh @@ -0,0 +1,2 @@ +#../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg +../darknet detector infer ../cfg/coco.data ../cfg/yolov3_b.cfg diff --git a/workloads/realworld/pipeline/darknet/yolov3_t/run_super.sh b/workloads/realworld/pipeline/darknet/yolov3_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ee7df07c1a4172f25c1129d8027095d2e3861e28 --- /dev/null +++ b/workloads/realworld/pipeline/darknet/yolov3_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg +../darknet detector train ../cfg/coco.data ../cfg/yolov3_t.cfg diff --git a/workloads/realworld/real_super_avg_std.pdf b/workloads/realworld/real_super_avg_std.pdf new file mode 100644 index 0000000000000000000000000000000000000000..20b7590e88112ceba041c0b3a620e7e4f037341a Binary files /dev/null and b/workloads/realworld/real_super_avg_std.pdf differ diff --git a/workloads/realworld/real_super_avg_std.png b/workloads/realworld/real_super_avg_std.png new file mode 100644 index 0000000000000000000000000000000000000000..8ab41f2e4f1cc4d1096ad6f92ea57dfd10e2d9e0 Binary files /dev/null and b/workloads/realworld/real_super_avg_std.png differ diff --git a/workloads/realworld/realworld_super.pdf b/workloads/realworld/realworld_super.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f4851dfa442d69f7eeb7563f8ee889881b633892 Binary files /dev/null and b/workloads/realworld/realworld_super.pdf differ diff --git a/workloads/realworld/realworld_super.png b/workloads/realworld/realworld_super.png new file mode 100644 index 0000000000000000000000000000000000000000..0b72f6b40c82582ecf96a9256be70bd55ff95015 Binary files /dev/null and b/workloads/realworld/realworld_super.png differ diff --git a/workloads/realworld/run_real_all.py b/workloads/realworld/run_real_all.py new file mode 100644 index 0000000000000000000000000000000000000000..6970334f535faa347928ee64cab8584b72b1a197 --- /dev/null +++ b/workloads/realworld/run_real_all.py @@ -0,0 +1,681 @@ +import os +import argparse +import sys +import subprocess +import psutil + +import os +import collections +import csv + +import numpy as np +import pandas as pd +import matplotlib +from matplotlib import pyplot as plt +import matplotlib.ticker as mticker +import xlsxwriter +import seaborn as sns + +from matplotlib.ticker import FormatStrFormatter +from matplotlib.legend_handler import HandlerTuple + +from subprocess import Popen, PIPE + +from scipy.stats import gmean + + +prefix = 'run_' +parameter_super_list = ['super'] + +config_super_list = ['standard', 'async', 'uvm', 'uvm_prefetch', 'uvm_prefetch_async', 'pinned', 'pipeline'] +# config_super_list = ['standard', 'async', 'uvm', 'uvm_prefetch', 'uvm_prefetch_async'] +# config_super_list = ['standard', 'pinned'] +# config_super_list = ['pinned'] + +# config_super_list = ['standard', 'pipeline', 'uvm', 'uvm_prefetch', 'uvm_prefetch_async'] + +workload_super_list = ['lavaMD', 'nw', 'kmeans', 'srad', 'backprop', 'pathfinder', 'hotspot', 'lud', 'BN', 'knn'] +workload_super_list = [] +darknet_super_list = ['resnet18', 'resnet50', 'yolov3-tiny', 'yolov3', \ + 'resnet18_t', 'resnet50_t', 'yolov3-tiny_t', 'yolov3_t', \ + 'resnet18_b', 'resnet50_b', 'yolov3-tiny_b', 'yolov3_b'] +# darknet_super_list = ['resnet18_t', 'resnet50_t', 'yolov3-tiny_t', 'yolov3_t'] +# darknet_super_list = ['resnet18', 'resnet50', 'yolov3-tiny', 'yolov3'] +# darknet_super_list = [] + +def dict_to_list(input_dict): + return_list = [] + for elemement in input_dict: + return_list.append(elemement) + return return_list + +def addOptions(parser): + parser.add_argument("-i", "--iterations", type=int, default=1, + help="Number of iterations") + parser.add_argument("-o", "--output", type=str, default='output.xlsx', + help="output trace log file") + parser.add_argument("-f", "--figure", type=str, default='realworld', + help="output pdf file") + parser.add_argument("-p", "--profiling", action='store_true', + help="whether to run profiling or just parse results") + parser.add_argument("-c", "--clean", action='store_true', + help="whether to clean all results") + + +def get_config_list(root_directory): + config_list = [] + for dict in os.listdir(root_directory): + if os.path.isdir(dict) and dict in config_super_list: + config_list.append(dict) + return config_list + + +def get_workload_dict(root_directory, config_list): + workload_list = [] + workload_dict = dict() + for config in config_list: + config_dir = root_directory + '/' + config + + for root, directories, files in os.walk(config_dir, topdown=False): + for dir in directories: + if dir in workload_super_list: + if dir not in workload_dict: + workload_dict[dir] = dict() + workload_dict[dir][config] = os.path.join(root, dir) + if dir not in workload_list: + workload_list.append(dir) + if dir == 'darknet': + for root_darnet, directories_darknet, files_darknet in os.walk(config_dir + '/darknet', topdown=False): + for dir in directories_darknet: + if dir in darknet_super_list: + if dir not in workload_dict: + workload_dict[dir] = dict() + workload_dict[dir][config] = os.path.join(root_darnet, dir) + if dir not in workload_list: + workload_list.append(dir) + + return workload_list, workload_dict + +def execute_bashes(workload_dict, iterations): + for workload in workload_dict: + if workload in workload_super_list: + for config in config_super_list: + cur_dir = workload_dict[workload][config] + pwd = os.getcwd() + os.chdir(cur_dir) + os.system('make') + for para in parameter_super_list: + for i in range(0, iterations): + sh_file = './' + prefix + para + '.sh' + exe_cmd = sh_file + ' > ' + para + '_' + str(i) + '.log' + os.system(exe_cmd) + os.chdir(pwd) + elif workload in darknet_super_list: + for config in config_super_list: + print(workload, config) + cur_dir = workload_dict[workload][config] + darknet_dir = cur_dir + '/../' + pwd = os.getcwd() + os.chdir(darknet_dir) + os.system('make clean') + os.system('PROFILE=1 make -j16') + # os.system('make -j16') + os.chdir(pwd) + os.chdir(cur_dir) + for para in parameter_super_list: + for i in range(0, iterations): + sh_file = './' + prefix + para + '.sh' + # exe_cmd = sh_file + ' > ' + para + '_' + str(i) + '.log' + ' 2> ' + para + '_error_' + str(i) + '.log' + exe_cmd = sh_file + ' > ' + para + '_' + str(i) + '.log' + print(exe_cmd) + os.system(exe_cmd) + os.chdir(pwd) + +def execute_clean_bashes(workload_dict): + for workload in workload_dict: + if workload in workload_super_list: + for config in config_super_list: + cur_dir = workload_dict[workload][config] + pwd = os.getcwd() + os.chdir(cur_dir) + os.system('make clean') + os.system('rm *.log') + os.system('rm *.out') + os.chdir(pwd) + elif workload in darknet_super_list: + for config in config_super_list: + print(workload, config) + cur_dir = workload_dict[workload][config] + darknet_dir = cur_dir + '/../' + pwd = os.getcwd() + os.chdir(darknet_dir) + os.system('make clean') + os.chdir(pwd) + os.chdir(cur_dir) + os.system('rm *.log') + os.chdir(pwd) + + +def process_file(log_file, config): + result_dict = dict() + text = open(log_file, "r") + + overlap = 0 + + result_dict['gpu_kernel'] = 0 + result_dict['memcpy'] = 0 + result_dict['memcpy_HtoD'] = 0 + result_dict['memcpy_DtoH'] = 0 + result_dict['allocation'] = 0 + + for line in text: + line = line.replace(':', '') + line = line.strip() + words = line.split(',') + + if 'KERNEL' in words[0] and len(words) >= 4: + result_dict['gpu_kernel'] += int(words[-1]) + elif 'MEMCPY' in words[0]: + if 'HTOD' in words[0] or 'HtoD' in words[0]: + result_dict['memcpy_HtoD'] += int(words[-1]) + else: + result_dict['memcpy_DtoH'] += int(words[-1]) + elif 'cudaMalloc' in words[0]: + result_dict['allocation'] += int(words[3]) + elif 'cudaFree' in words[0]: + result_dict['allocation'] += int(words[3]) + + return_dict = dict() + + if config == 'uvm': + return_dict['gpu_kernel'] = result_dict['gpu_kernel'] - result_dict['memcpy_HtoD'] + else: + return_dict['gpu_kernel'] = result_dict['gpu_kernel'] + return_dict['memcpy'] = result_dict['memcpy_HtoD'] + result_dict['memcpy_DtoH'] + return_dict['allocation'] = result_dict['allocation'] + + return return_dict + + +def process_results(workload_dict, iterations): + result_dict = dict() + for workload in workload_dict: + if workload in workload_super_list or workload in darknet_super_list: + for config in config_super_list: + cur_dir = workload_dict[workload][config] + for para in parameter_super_list: + if para not in result_dict: + result_dict[para] = dict() + if workload not in result_dict[para]: + result_dict[para][workload] = dict() + + # if config not in result_dict[para][workload]: + result_dict[para][workload][config] = [] + for i in range(0, iterations): + log_file = cur_dir + '/' + para + '_' + str(i) + '.log' + result_dict[para][workload][config].append(process_file(log_file, config)) + sorted(result_dict[para][workload]) + sorted(result_dict[para]) + return result_dict + + +def export_xlsx(result_dict, config_list, iterations, output_file): + workbook = xlsxwriter.Workbook(output_file) + + for para in result_dict: + worksheet = workbook.add_worksheet(para) + first_col = 'B' + first_row = 3 + + col_index = 0 + row_index = first_row + for workload in result_dict[para]: + if col_index + (ord(first_col) - ord('A')) < 26: + current_col = chr(ord(first_col) + col_index) + else: + all = (ord(first_col) - ord('A')) + col_index + dig_1 = all // 26 - 1 + dig_2 = all % 26 + current_col = chr(ord('A') + dig_1) + chr(ord('A') + dig_2) + worksheet.write(current_col + '1', workload) + for config in config_list: + if col_index + (ord(first_col) - ord('A')) < 26: + current_col = chr(ord(first_col) + col_index) + else: + all = (ord(first_col) - ord('A')) + col_index + dig_1 = all // 26 - 1 + dig_2 = all % 26 + current_col = chr(ord('A') + dig_1) + chr(ord('A') + dig_2) + worksheet.write(current_col + '2', config) + row_index = first_row + result_list = dict_to_list(result_dict[para][workload][config][0]) + + for i in range(0, iterations): + tmp_result_dict = dict() + for result in result_dict[para][workload][config][i]: + tmp_result_dict[result] = result_dict[para][workload][config][i][result] + + if col_index == 0: + for j in range(0, len(result_list)): + worksheet.write('A' + str(j + row_index), result_list[j]) + + for j in range(0, len(result_list)): + worksheet.write(current_col + str(j + row_index), tmp_result_dict[result_list[j]]) + + row_index += len(result_list) + col_index += 1 + workbook.close() + + +def plot_results(result_dict, config_list, workload_list, iterations, output_file): + + config_ordered_list = [] + for config in config_super_list: + if config in config_list: + config_ordered_list.append(config) + + workload_ordered_list = [] + for workload in workload_super_list: + if workload in workload_list: + workload_ordered_list.append(workload) + + for workload in darknet_super_list: + if workload in workload_list: + workload_ordered_list.append(workload) + + for para in result_dict: + pandas_list = [] + pandas_list.append('workload') + for config in config_list: + pandas_list.append(config) + + pandas_dict = dict() + for ele in pandas_list: + pandas_dict[ele] = [] + + + for workload in result_dict[para]: + for i in range(0, iterations): + pandas_dict['workload'].append(workload) + for config in config_list: + overall_time = 0 + overall_time += result_dict[para][workload][config][i]['gpu_kernel'] + overall_time += result_dict[para][workload][config][i]['memcpy'] + overall_time += result_dict[para][workload][config][i]['allocation'] + pandas_dict[config].append(overall_time) + + df = pd.DataFrame(pandas_dict) + dd=pd.melt(df,id_vars='workload',value_vars=config_list,var_name='configs') + + sns.boxplot(data=dd, x='workload', y='value', hue='configs', order=workload_ordered_list, hue_order=config_ordered_list) + # sns.tight_layout() + + + plt.xticks(fontsize=12, rotation=15) + plt.yticks(fontsize=12) + plt.grid(axis='y') + plt.xlabel("") + plt.ylabel("Execution time (ns)") + plt.tight_layout() + + plt.savefig(output_file + '_' + para + '.pdf', bbox_inches='tight') + plt.savefig(output_file + '_' + para + '.png', bbox_inches='tight') + plt.close() + +def export_xlsx_all(result_dict, config_list, iterations, output_file): + std_dict = dict() + + workbook = xlsxwriter.Workbook(output_file.replace(".xlsx", '') + '_all.xlsx') + + for para in result_dict: + worksheet = workbook.add_worksheet(para) + first_col = 'B' + first_row = 3 + + col_index = 0 + row_index = first_row + for workload in result_dict[para]: + if col_index + (ord(first_col) - ord('A')) < 26: + current_col = chr(ord(first_col) + col_index) + else: + all = (ord(first_col) - ord('A')) + col_index + dig_1 = all // 26 - 1 + dig_2 = all % 26 + current_col = chr(ord('A') + dig_1) + chr(ord('A') + dig_2) + worksheet.write(current_col + '1', workload) + for config in config_list: + if col_index + (ord(first_col) - ord('A')) < 26: + current_col = chr(ord(first_col) + col_index) + else: + all = (ord(first_col) - ord('A')) + col_index + dig_1 = all // 26 - 1 + dig_2 = all % 26 + current_col = chr(ord('A') + dig_1) + chr(ord('A') + dig_2) + worksheet.write(current_col + '2', config) + row_index = first_row + + all_time_list = [] + + for i in range(0, iterations): + tmp_result_dict = dict() + for result in result_dict[para][workload][config][i]: + tmp_result_dict[result] = result_dict[para][workload][config][i][result] + + overall_time = 0 + for result in tmp_result_dict: + overall_time += tmp_result_dict[result] + + if col_index == 0: + worksheet.write('A' + str(row_index), 'Time') + + worksheet.write(current_col + str(row_index), overall_time) + all_time_list.append(overall_time) + + row_index += 1 + + if para not in std_dict: + std_dict[para] = dict() + if workload not in std_dict[para]: + std_dict[para][workload] = dict() + + std_dict[para][workload][config] = np.std(all_time_list) / np.mean(all_time_list) + + col_index += 1 + + workbook.close() + + avg_std_dict = dict() + mean_avg_std_dict = dict() + workload_list = [] + parameter_list = [] + for para in std_dict: + avg_std_dict[para] = dict() + overall_std_list = [] + for workload in std_dict[para]: + overall_std = 0 + for config in std_dict[para][workload]: + overall_std += std_dict[para][workload][config] / len(config_list) + avg_std_dict[para][workload] = overall_std + overall_std_list.append(overall_std) + sorted(avg_std_dict[para]) + workload_list = dict_to_list(avg_std_dict[para]) + + mean_avg_std_dict[para] = gmean(overall_std_list) + sorted(avg_std_dict) + parameter_list = dict_to_list(avg_std_dict) + + + + avg_std_csv_file = output_file.replace(".xlsx", '') + '_std.csv' + out = open(avg_std_csv_file, "w") + + out.write('group,') + for i in range(0, len(parameter_list)): + out.write(parameter_list[i]) + if i != len(parameter_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + for i in range(0, len(workload_list)): + out.write(workload_list[i]+',') + for j in range(0, len(parameter_list)): + out.write(str(avg_std_dict[parameter_list[j]][workload_list[i]])) + if j != len(parameter_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + out.write('Geo-mean,') + for j in range(0, len(parameter_list)): + out.write(str(mean_avg_std_dict[parameter_list[j]])) + if j != len(parameter_list) - 1: + out.write(',') + else: + out.write(os.linesep) + out.close() + + + super_avg_dict = dict() + for workload in workload_list: + super_avg_dict[workload] = dict() + for c in range(0, len(config_list)): + super_avg_dict[workload][config_list[c]] = dict() + super_avg_dict[workload][config_list[c]]['gpu_kernel'] = 0 + super_avg_dict[workload][config_list[c]]['memcpy'] = 0 + super_avg_dict[workload][config_list[c]]['allocation'] = 0 + super_avg_dict[workload][config_list[c]]['all'] = 0 + for i in range(0, iterations): + super_avg_dict[workload][config_list[c]]['gpu_kernel'] += result_dict['super'][workload][config_list[c]][i]['gpu_kernel'] + super_avg_dict[workload][config_list[c]]['memcpy'] += result_dict['super'][workload][config_list[c]][i]['memcpy'] + super_avg_dict[workload][config_list[c]]['allocation'] += result_dict['super'][workload][config_list[c]][i]['allocation'] + + super_avg_dict[workload][config_list[c]]['all'] += result_dict['super'][workload][config_list[c]][i]['gpu_kernel'] + super_avg_dict[workload][config_list[c]]['all'] += result_dict['super'][workload][config_list[c]][i]['memcpy'] + super_avg_dict[workload][config_list[c]]['all'] += result_dict['super'][workload][config_list[c]][i]['allocation'] + + for c in range(0, len(config_list)): + normarlized_all = super_avg_dict[workload][config_list[c]]['all'] / super_avg_dict[workload]['standard']['all'] + print(super_avg_dict[workload][config_list[c]]) + super_avg_dict[workload][config_list[c]]['gpu_kernel'] = (super_avg_dict[workload][config_list[c]]['gpu_kernel'] / super_avg_dict[workload][config_list[c]]['all']) * normarlized_all + super_avg_dict[workload][config_list[c]]['memcpy'] = (super_avg_dict[workload][config_list[c]]['memcpy'] / super_avg_dict[workload][config_list[c]]['all']) * normarlized_all + super_avg_dict[workload][config_list[c]]['allocation'] = (super_avg_dict[workload][config_list[c]]['allocation'] / super_avg_dict[workload][config_list[c]]['all']) * normarlized_all + print(super_avg_dict[workload][config_list[c]]) + sorted(super_avg_dict) + + + super_avg_csv_file = 'super_avg.csv' + out = open(super_avg_csv_file, "w") + + out.write('group,,') + for i in range(0, len(config_list)): + out.write(config_list[i]) + if i != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + for i in range(0, len(workload_list)): + + out.write(workload_list[i]+',gpu_kernel,') + for j in range(0, len(config_list)): + out.write(str(super_avg_dict[workload_list[i]][config_list[j]]['gpu_kernel'])) + if j != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + out.write(workload_list[i]+',memcpy,') + for j in range(0, len(config_list)): + out.write(str(super_avg_dict[workload_list[i]][config_list[j]]['memcpy'])) + if j != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + out.write(workload_list[i]+',allocation,') + for j in range(0, len(config_list)): + out.write(str(super_avg_dict[workload_list[i]][config_list[j]]['allocation'])) + if j != len(config_list) - 1: + out.write(',') + else: + out.write(os.linesep) + + out.close() + + return avg_std_csv_file, super_avg_csv_file + + +def plot_std_results(csv_file, output_file): + df = pd.read_csv(csv_file, index_col=0) + + group_list = [] + for index in df.index: + if index not in group_list: + group_list.append(index) + col_list = df.columns + + ngroups = len(group_list) + x = np.arange(ngroups) + nbars = len(col_list) + width = (1 - 0.4) / (1.5 * nbars) # the width of the bars + + matplotlib.rcParams["hatch.linewidth"] = 2 + + # patterns = [ "|" , "/", "-", "", "x", "-", "\\", "+", "o", "O" ] + # patterns = [ "|" , "/", "x", "*", ".", "-", "\\", "+", "o", "O" ] + # patterns = ["//", "//", "//", "//", "//", "//", "//"] + patterns = ["", "", "", "", "", ""] + # color_tab = ['#D9D9D9', '#BFBFBF', '#A6A6A6', '#7F7F7F', '#7F7F7F', '#7F7F7F'] + color_tab = ['#ff0000', '#ff6d01','#46bdc6', '#4285f4', '#ea4335', '#34a853'] + edge_color_tab = ['#000000', '#000000', '#000000', '#000000', '#000000', '#000000'] + + fig, ax = plt.subplots(figsize=[10, 3]) + + rects = [] + + for i in range(0, nbars): + # height_cum = np.array([0.0] * ngroups) + height_total = np.array([1 for g in group_list]) # y coo + height_curr = np.array([float(df[col_list[i]][g]) for g in group_list]) # y coo + rect_base = ax.bar(x - 0.3 + (3 * i + 1.5) * width / 2, # x coo + height_curr / height_total, # y coo + width, label=col_list[i], + color=color_tab[i], + edgecolor=edge_color_tab[i], + linewidth=0.5 + ) + rects.append(rect_base) + + hdl_pair = [(rects[i]) for i in range(nbars)] + + ax.set_xticks(x) + ax.set_xticklabels(group_list, rotation=0) + ax.legend() + + plt.xticks(fontsize=12, rotation=15) + plt.yticks(fontsize=12) + plt.grid(axis='y') + plt.xlabel("") + plt.ylabel("std / mean") + plt.tight_layout() + + plt.savefig(output_file + '_std.pdf', bbox_inches='tight') + plt.savefig(output_file + '_std.png', bbox_inches='tight') + plt.close() + +def plot_breakdown_avg_results(csv_file, output_file): + df = pd.read_csv(csv_file, index_col=[0, 1]) + group_list = [] + subgrp_list = [] + for index in df.index: + if index[0] not in group_list: + group_list.append(index[0]) + if index[1] not in subgrp_list: + subgrp_list.append(index[1]) + col_list = df.columns + + ngroups = len(group_list) + nsubgrps = len(subgrp_list) + x = np.arange(ngroups) + nbars = len(col_list) + width = (1 - 0.4) / (1.5 * nbars) # the width of the bars + + matplotlib.rcParams["hatch.linewidth"] = 2 + + patterns = ["", "-", "/", "|", "/", "-", "x", "-", "\\", "+", "o", "O"] + color_tab = ['#000000', '#0000ff', '#ff0000', '#ff6666', '#00ff00', '#00ffcc'] + + fig, ax = plt.subplots(figsize=[12, 2.8]) + hdl_pair = [] + + rects = [] + + for i in range(0, nbars): + height_cum = np.array([0.0] * ngroups) + height_total = np.array([df[col_list[i]][g][0] + df[col_list[i]] + [g][1] + df[col_list[i]][g][2] for g in group_list]) # y coo + height_curr = np.array([df[col_list[i]][g][0] + for g in group_list]) # y coo + rect_base = ax.bar(x - 0.3 + (3 * i + 1.5) * width / 2, # x coo + height_curr, # y coo + width, + label=col_list[i]+" "+subgrp_list[0], + color=color_tab[i], + edgecolor=color_tab[0], + linewidth=0.25 + ) + # hatch=patterns[i] + rects.append(rect_base) + height_cum += height_curr + for j in range(1, 3): + height_curr = np.array([df[col_list[i]][g][j] + for g in group_list]) + rect = ax.bar(x - 0.3 + (3 * i + 1.5) * width / 2, # x coo + height_curr, # y coo + width, + label=col_list[i]+" "+subgrp_list[j], + bottom=height_cum, + color=color_tab[i], + edgecolor=color_tab[0], + linewidth=0.25, + alpha = 0.25 * (4 - j) + # alpha = 0.5 * (j - 0.8) + ) + rects.append(rect) + height_cum += height_curr + + hdl_pair = [(rects[i*nsubgrps], rects[i*nsubgrps+1], rects[i*nsubgrps+2]) for i in range(nbars)] + ax.set_xticks(x) + ax.set_xticklabels(group_list, rotation=0) + + ax.legend(hdl_pair, col_list, loc='upper center', ncol=6, bbox_to_anchor=(0.5, 1.08), fontsize=11, handler_map={tuple: HandlerTuple(ndivide=None)}) + + plt.xticks(fontsize=11, rotation=15) + plt.yticks(fontsize=11) + plt.grid(axis='y') + plt.xlabel("") + plt.ylabel("Time (normalized to standard)", fontsize=11) + plt.tight_layout() + + plt.savefig(output_file + '_std.pdf', bbox_inches='tight') + plt.savefig(output_file + '_std.png', bbox_inches='tight') + plt.close() + +def main(): + parser = argparse.ArgumentParser() + addOptions(parser) + + options = parser.parse_args() + + iterations = options.iterations + output_csv_file = options.output + output_figure_file = options.figure + profiling = options.profiling + clean = options.clean + + root_directory = './' + + # config_list = get_config_list(root_directory) + config_list = config_super_list + print(config_list) + workload_list, workload_dict = get_workload_dict(root_directory, config_list) + print(workload_dict) + + if clean: + execute_clean_bashes(workload_dict) + else: + if profiling: + execute_bashes(workload_dict, iterations) + + result_dict = process_results(workload_dict, iterations) + avg_std_csv_file, super_avg_csv_file = export_xlsx_all(result_dict, config_list, iterations, output_csv_file) + plot_results(result_dict, config_list, workload_list, iterations, output_figure_file) + + plot_breakdown_avg_results(super_avg_csv_file, 'real_super_avg') + + +if __name__ == '__main__': + main() + + diff --git a/workloads/realworld/run_real_perf.py b/workloads/realworld/run_real_perf.py new file mode 100644 index 0000000000000000000000000000000000000000..c2af724069302bb58b3b2b4671ebbd82d3b4064e --- /dev/null +++ b/workloads/realworld/run_real_perf.py @@ -0,0 +1,185 @@ +import os +import argparse +import sys +import subprocess +import psutil + +import os +import collections +import csv + +import numpy as np +import pandas as pd +import matplotlib +from matplotlib import pyplot as plt +import matplotlib.ticker as mticker +import xlsxwriter +import seaborn as sns + +from matplotlib.ticker import FormatStrFormatter +from matplotlib.legend_handler import HandlerTuple + +from subprocess import Popen, PIPE + +from scipy.stats import gmean + + +prefix = 'run_' +parameter_super_list = ['super'] + +config_super_list = ['standard', 'async', 'uvm', 'uvm_prefetch', 'uvm_prefetch_async'] + +workload_super_list = ['lud'] +darknet_super_list = ['yolov3'] + + +def dict_to_list(input_dict): + return_list = [] + for elemement in input_dict: + return_list.append(elemement) + return return_list + +def addOptions(parser): + parser.add_argument("-i", "--iterations", type=int, default=1, + help="Number of iterations") + parser.add_argument("-c", "--csv", type=str, default='output.xlsx', + help="output trace log file") + parser.add_argument("-f", "--figure", type=str, default='micro', + help="output pdf file") + parser.add_argument("-p", "--profiling", action='store_true', + help="whether to run profiling or just parse results") + + +def get_config_list(root_directory): + config_list = [] + for dict in os.listdir(root_directory): + if os.path.isdir(dict) and dict in config_super_list: + config_list.append(dict) + return config_list + + +def get_workload_dict(root_directory, config_list): + workload_list = [] + workload_dict = dict() + for config in config_list: + config_dir = root_directory + '/' + config + + for root, directories, files in os.walk(config_dir, topdown=False): + for dir in directories: + if dir in workload_super_list: + if dir not in workload_dict: + workload_dict[dir] = dict() + workload_dict[dir][config] = os.path.join(root, dir + '_perf') + if dir not in workload_list: + workload_list.append(dir) + if dir == 'darknet': + for root_darnet, directories_darknet, files_darknet in os.walk(config_dir + '/darknet_perf', topdown=False): + for dir in directories_darknet: + if dir in darknet_super_list: + if dir not in workload_dict: + workload_dict[dir] = dict() + workload_dict[dir][config] = os.path.join(root_darnet, dir) + if dir not in workload_list: + workload_list.append(dir) + + return workload_list, workload_dict + +def get_run_cmd(bash_file): + return_txt = '' + text = open(bash_file, "r") + for line in text: + return_txt += line.rstrip() + return return_txt + + +def execute_bashes(workload_dict, iterations, perf_list): + for workload in workload_dict: + if workload in workload_super_list: + for config in workload_dict[workload]: + if config in config_super_list: + cur_dir = workload_dict[workload][config] + pwd = os.getcwd() + os.chdir(cur_dir) + os.system('make') + for para in parameter_super_list: + for iter in range(0, iterations): + sh_file = './' + prefix + para + '.sh' + + perf_cmd = '' + for i in range(0, len(perf_list)): + perf_cmd += perf_list[i] + if i != len(perf_list) - 1: + perf_cmd += ',' + profile_cmd = 'ncu --metrics ' + profile_cmd += perf_cmd + profile_cmd += ' --csv --log-file ' + para + '_' + str(iter) + '.profile.csv ' + profile_cmd += get_run_cmd(sh_file) + os.system(profile_cmd) + os.chdir(pwd) + elif workload in darknet_super_list: + for config in workload_dict[workload]: + if config in config_super_list: + cur_dir = workload_dict[workload][config] + darknet_dir = cur_dir + '/../' + pwd = os.getcwd() + os.chdir(darknet_dir) + os.system('make') + os.chdir(pwd) + os.chdir(cur_dir) + for para in parameter_super_list: + for iter in range(0, iterations): + sh_file = './' + prefix + para + '.sh' + + perf_cmd = '' + for i in range(0, len(perf_list)): + perf_cmd += perf_list[i] + if i != len(perf_list) - 1: + perf_cmd += ',' + profile_cmd = 'ncu --metrics ' + profile_cmd += perf_cmd + profile_cmd += ' --csv --log-file ' + para + '_' + str(iter) + '.profile.csv ' + profile_cmd += get_run_cmd(sh_file) + os.system(profile_cmd) + os.chdir(pwd) + + +def main(): + parser = argparse.ArgumentParser() + addOptions(parser) + + options = parser.parse_args() + + iterations = options.iterations + output_csv_file = options.csv + output_figure_file = options.figure + profiling = options.profiling + + perf_list = [] + + perf_list.append('smsp__inst_executed.sum') + + perf_list.append('smsp__sass_thread_inst_executed_op_memory_pred_on.sum') + perf_list.append('smsp__sass_thread_inst_executed_op_control_pred_on.sum') + perf_list.append('smsp__sass_thread_inst_executed_op_fp16_pred_on.sum') + perf_list.append('smsp__sass_thread_inst_executed_op_fp32_pred_on.sum') + perf_list.append('smsp__sass_thread_inst_executed_op_fp64_pred_on.sum') + perf_list.append('smsp__sass_thread_inst_executed_op_integer_pred_on.sum') + + perf_list.append('l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum') + perf_list.append('l1tex__t_sectors_pipe_lsu_mem_global_op_ld_lookup_hit.sum') + perf_list.append('l1tex__t_sectors_pipe_lsu_mem_global_op_st.sum') + perf_list.append('l1tex__t_sectors_pipe_lsu_mem_global_op_st_lookup_hit.sum') + + root_directory = './' + + config_list = get_config_list(root_directory) + print(config_list) + workload_list, workload_dict = get_workload_dict(root_directory, config_list) + print(workload_dict) + + if profiling: + execute_bashes(workload_dict, iterations, perf_list) + +if __name__ == '__main__': + main() + diff --git a/workloads/realworld/standard/BN/.clang-format b/workloads/realworld/standard/BN/.clang-format new file mode 100644 index 0000000000000000000000000000000000000000..3a5940ef65bf1e40df9511da805a7a0440184e84 --- /dev/null +++ b/workloads/realworld/standard/BN/.clang-format @@ -0,0 +1,90 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: false +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: false +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IndentCaseLabels: false +IndentWidth: 2 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 8 +UseTab: Never +... + diff --git a/workloads/realworld/standard/BN/LICENSE b/workloads/realworld/standard/BN/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/workloads/realworld/standard/BN/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/workloads/realworld/standard/BN/Makefile b/workloads/realworld/standard/BN/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..864b8e45401b0fe12162389c2e9d33fa86f4fc9f --- /dev/null +++ b/workloads/realworld/standard/BN/Makefile @@ -0,0 +1,169 @@ +################################################################################ +# +# Copyright 1993-2013 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Makefile project only supported on Mac OS X and Linux Platforms) +# +################################################################################ + +include ../../../common/make.config +include ./findcudalib.mk + +# Location of the CUDA Toolkit +CUDA_PATH ?= $(CUDA_DIR) + +# internal flags +NVCCFLAGS := -m${OS_SIZE} +CCFLAGS := -Wno-narrowing +NVCCLDFLAGS := +LDFLAGS := + +# Extra user flags +EXTRA_NVCCFLAGS ?= +EXTRA_NVCCLDFLAGS ?= +EXTRA_LDFLAGS ?= +EXTRA_CCFLAGS ?= + +# OS-specific build flags +ifneq ($(DARWIN),) + LDFLAGS += -rpath $(CUDA_PATH)/lib + CCFLAGS += -arch $(OS_ARCH) $(STDLIB) +else + ifeq ($(OS_ARCH),armv7l) + ifeq ($(abi),gnueabi) + CCFLAGS += -mfloat-abi=softfp + else + # default to gnueabihf + override abi := gnueabihf + LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3 + CCFLAGS += -mfloat-abi=hard + endif + endif +endif + +ifeq ($(ARMv7),1) +NVCCFLAGS += -target-cpu-arch ARM +ifneq ($(TARGET_FS),) +CCFLAGS += --sysroot=$(TARGET_FS) +LDFLAGS += --sysroot=$(TARGET_FS) +LDFLAGS += -rpath-link=$(TARGET_FS)/lib +LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib +LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-$(abi) +endif +endif + +# Debug build flags +ifeq ($(dbg),1) + NVCCFLAGS += -g -G + TARGET := debug +else + TARGET := release +endif + +ALL_CCFLAGS := +ALL_CCFLAGS += $(NVCCFLAGS) +ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS)) +ALL_CCFLAGS += $(EXTRA_NVCCFLAGS) +ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS)) + +ALL_LDFLAGS := +ALL_LDFLAGS += $(ALL_CCFLAGS) +ALL_LDFLAGS += $(NVCCLDFLAGS) +ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS)) +ALL_LDFLAGS += $(EXTRA_NVCCLDFLAGS) +ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS)) + +# Common includes and paths for CUDA +INCLUDES := -I../../common/inc -I$(INCLUDE) -I$(CUPTI_INCLUDE) +LIBRARIES := -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +################################################################################ + +# CUDA code generation flags +ifneq ($(OS_ARCH),armv7l) +GENCODE_SM10 := -gencode arch=compute_10,code=sm_10 +endif +GENCODE_SM20 := -gencode arch=compute_20,code=sm_20 +GENCODE_SM30 := -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=\"sm_35,compute_35\" +GENCODE_SM80 := -gencode arch=compute_80,code=sm_80 +GENCODE_FLAGS := $(GENCODE_SM80) + +################################################################################ + +# Target rules +all: build + +build: ordergraph_30 ordergraph_40 ordergraph_45 ordergraph_50 + +# ordergraph_25.o: ordergraph.cu ordergraph_kernel.cu +# $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_25 -o $@ -c $< + +ordergraph_30.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_30 -o $@ -c $< + +ordergraph_40.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_40 -o $@ -c $< + +ordergraph_45.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_45 -o $@ -c $< + +ordergraph_50.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_50 -o $@ -c $< + +# ordergraph_125.o: ordergraph.cu ordergraph_kernel.cu +# $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_125 -o $@ -c $< + + + +# ordergraph_25: ordergraph_25.o +# $(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES) + +ordergraph_30: ordergraph_30.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_40: ordergraph_40.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_45: ordergraph_45.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_50: ordergraph_50.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +# ordergraph_125: ordergraph_125.o +# $(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES) + +run: build + ./ordergraph + +clean: + rm -f ordergraph_30 ordergraph_40 ordergraph_45 ordergraph_50 *.o *.bin *.out + +clobber: clean diff --git a/workloads/realworld/standard/BN/README.md b/workloads/realworld/standard/BN/README.md new file mode 100644 index 0000000000000000000000000000000000000000..07158a0bd52af63032c860ff04e243e0d7c76ef1 --- /dev/null +++ b/workloads/realworld/standard/BN/README.md @@ -0,0 +1,21 @@ +The code works with CUDA 6.0. +If you are using this code for your project, please cite [our paper](https://yuemmawang.github.io/publications/wang-tpds2016.pdf): + +``` +Wang Y, Qian W, Zhang S, et al. A Learning Algorithm for Bayesian Networks and +Its Efficient Implementation on GPUs[J]. Parallel and Distributed Systems, IEEE +Transactions on, 2016, 27(1): 17-30. +``` + +``` +@article{wang2015learning, + title={A learning algorithm for Bayesian networks and its efficient implementation on GPUs}, + author={Wang, Yu and Qian, Weikang and Zhang, Shuchang and Liang, Xiaoyao and Yuan, Bo}, + journal={IEEE Transactions on Parallel and Distributed Systems}, + volume={27}, + number={1}, + pages={17--30}, + year={2015}, + publisher={IEEE} +} +``` diff --git a/workloads/realworld/standard/BN/data125.cu b/workloads/realworld/standard/BN/data125.cu new file mode 100644 index 0000000000000000000000000000000000000000..6bb370a636a330992e083f0b52f1f67a9a86040e --- /dev/null +++ b/workloads/realworld/standard/BN/data125.cu @@ -0,0 +1,610 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=25; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,1, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,0,1,0, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,0,1,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,0,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,0,1,0,0,0,1, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,1,0,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,0,0,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,0,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,0, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,0,0,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,1,0, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,1,1,0,0,0,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,0,0,1,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,1,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,1,1, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,1,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,0,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,1,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,1,0, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,1,1,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,1,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,1,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,0,1,0,1,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,1, +} +#endif diff --git a/workloads/realworld/standard/BN/data25.cu b/workloads/realworld/standard/BN/data25.cu new file mode 100644 index 0000000000000000000000000000000000000000..6af94f79766c6e36ee121f6c537f987f841bf7c0 --- /dev/null +++ b/workloads/realworld/standard/BN/data25.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=25; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +} + +#endif + diff --git a/workloads/realworld/standard/BN/data30.cu b/workloads/realworld/standard/BN/data30.cu new file mode 100644 index 0000000000000000000000000000000000000000..bf89729e319e920533f9d134c8a2dff9aa4bc022 --- /dev/null +++ b/workloads/realworld/standard/BN/data30.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=30; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +} + +#endif + diff --git a/workloads/realworld/standard/BN/data40.cu b/workloads/realworld/standard/BN/data40.cu new file mode 100644 index 0000000000000000000000000000000000000000..16d34d9dc4860d1dd24d604e501bccb43cae8095 --- /dev/null +++ b/workloads/realworld/standard/BN/data40.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=40; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1, +} + +#endif + diff --git a/workloads/realworld/standard/BN/data45.cu b/workloads/realworld/standard/BN/data45.cu new file mode 100644 index 0000000000000000000000000000000000000000..b23e9a35e7c27948c0853710a06be462694df57d --- /dev/null +++ b/workloads/realworld/standard/BN/data45.cu @@ -0,0 +1,616 @@ +// The data are synthesized. +#include +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=45; +const int STATE_N=2; +const int DATA_N=600; + + + +int data[DATA_N*NODE_N]= { +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0, +} + + +#endif + diff --git a/workloads/realworld/standard/BN/data50.cu b/workloads/realworld/standard/BN/data50.cu new file mode 100644 index 0000000000000000000000000000000000000000..936a7aa4a67a1f949f1264477388a7eb5a93a1b4 --- /dev/null +++ b/workloads/realworld/standard/BN/data50.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=50; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,0,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,1,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,0,0,1,1,1, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,1,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,1,0,0,0,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,1,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,0,1,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,0,1,0,0,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0, +} + +#endif + diff --git a/workloads/realworld/standard/BN/file_process.py b/workloads/realworld/standard/BN/file_process.py new file mode 100644 index 0000000000000000000000000000000000000000..eeebbee70e59153ca0f1a960f4e8ffa0437693c3 --- /dev/null +++ b/workloads/realworld/standard/BN/file_process.py @@ -0,0 +1,15 @@ +import os +filename = "data125.cu" +file_write = "data45.cu" + +f_w = open(file_write,"w") +with open(filename) as f: + content = f.readlines() + + for i, line in enumerate(content): + if i < 8: + f_w.write(line) + elif i >= 8: + f_w.write(line[0:90]) + f_w.write("\n") +f_w.close() \ No newline at end of file diff --git a/workloads/realworld/standard/BN/findcudalib.mk b/workloads/realworld/standard/BN/findcudalib.mk new file mode 100644 index 0000000000000000000000000000000000000000..f40c2c38e5510fdee2fdf59df00160b547c056c1 --- /dev/null +++ b/workloads/realworld/standard/BN/findcudalib.mk @@ -0,0 +1,226 @@ +################################################################################ +# +# Copyright 1993-2013 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# findcudalib.mk is used to find the locations for CUDA libraries and other +# Unix Platforms. This is supported Mac OS X and Linux. +# +################################################################################ + +# OS Name (Linux or Darwin) +OSUPPER = $(shell uname -s 2>/dev/null | tr "[:lower:]" "[:upper:]") +OSLOWER = $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]") + +# Flags to detect 32-bit or 64-bit OS platform +OS_SIZE = $(shell uname -m | sed -e "s/i.86/32/" -e "s/x86_64/64/" -e "s/armv7l/32/") +OS_ARCH = $(shell uname -m | sed -e "s/i386/i686/") + +# Determine OS platform and unix distribution +ifeq ("$(OSLOWER)","linux") + # first search lsb_release + DISTRO = $(shell lsb_release -i -s 2>/dev/null | tr "[:upper:]" "[:lower:]") + DISTVER = $(shell lsb_release -r -s 2>/dev/null) + ifeq ("$(DISTRO)",'') + # second search and parse /etc/issue + DISTRO = $(shell more /etc/issue | awk '{print $$1}' | sed '1!d' | sed -e "/^$$/d" 2>/dev/null | tr "[:upper:]" "[:lower:]") + DISTVER= $(shell more /etc/issue | awk '{print $$2}' | sed '1!d' 2>/dev/null + endif + ifeq ("$(DISTRO)",'') + # third, we can search in /etc/os-release or /etc/{distro}-release + DISTRO = $(shell awk '/ID/' /etc/*-release | sed 's/ID=//' | grep -v "VERSION" | grep -v "ID" | grep -v "DISTRIB") + DISTVER= $(shell awk '/DISTRIB_RELEASE/' /etc/*-release | sed 's/DISTRIB_RELEASE=//' | grep -v "DISTRIB_RELEASE") + endif +endif + +# search at Darwin (unix based info) +DARWIN = $(strip $(findstring DARWIN, $(OSUPPER))) +ifneq ($(DARWIN),) + SNOWLEOPARD = $(strip $(findstring 10.6, $(shell egrep "10\.6" /System/Library/CoreServices/SystemVersion.plist))) + LION = $(strip $(findstring 10.7, $(shell egrep "10\.7" /System/Library/CoreServices/SystemVersion.plist))) + MOUNTAIN = $(strip $(findstring 10.8, $(shell egrep "10\.8" /System/Library/CoreServices/SystemVersion.plist))) + MAVERICKS = $(strip $(findstring 10.9, $(shell egrep "10\.9" /System/Library/CoreServices/SystemVersion.plist))) +endif + +# Common binaries +GCC ?= g++ +CLANG ?= /usr/bin/clang + +ifeq ("$(OSUPPER)","LINUX") + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(GCC) +else + # for some newer versions of XCode, CLANG is the default compiler, so we need to include this + ifneq ($(MAVERICKS),) + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(CLANG) + STDLIB ?= -stdlib=libstdc++ + else + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(GCC) + endif +endif + +# Take command line flags that override any of these settings +ifeq ($(i386),1) + OS_SIZE = 32 + OS_ARCH = i686 +endif +ifeq ($(x86_64),1) + OS_SIZE = 64 + OS_ARCH = x86_64 +endif +ifeq ($(ARMv7),1) + OS_SIZE = 32 + OS_ARCH = armv7l +endif + +ifeq ("$(OSUPPER)","LINUX") + # Each Linux Distribuion has a set of different paths. This applies especially when using the Linux RPM/debian packages + ifeq ("$(DISTRO)","ubuntu") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","kubuntu") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","debian") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","suse") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","suse linux") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","opensuse") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","fedora") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","redhat") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","red") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","redhatenterpriseworkstation") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH ?= /usr/lib + endif + endif + ifeq ("$(DISTRO)","centos") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + + ifeq ($(ARMv7),1) + CUDAPATH := /usr/arm-linux-gnueabihf/lib + CUDALINK := -L/usr/arm-linux-gnueabihf/lib + ifneq ($(TARGET_FS),) + CUDAPATH += $(TARGET_FS)/usr/lib/nvidia-current + CUDALINK += -L$(TARGET_FS)/usr/lib/nvidia-current + endif + endif + + # Search for Linux distribution path for libcuda.so + CUDALIB ?= $(shell find $(CUDAPATH) $(DFLT_PATH) -name libcuda.so -print 2>/dev/null) + + ifeq ("$(CUDALIB)",'') + $(info >>> WARNING - CUDA Driver libcuda.so is not found. Please check and re-install the NVIDIA driver. <<<) + EXEC=@echo "[@]" + endif +else + # This would be the Mac OS X path if we had to do anything special +endif + diff --git a/workloads/realworld/standard/BN/ordergraph.cu b/workloads/realworld/standard/BN/ordergraph.cu new file mode 100644 index 0000000000000000000000000000000000000000..d96a0b85460d0648a4e632a86b1c6e1aab15633b --- /dev/null +++ b/workloads/realworld/standard/BN/ordergraph.cu @@ -0,0 +1,733 @@ +#include +#include +#include +#include +#include +//#include +// includes CUDA +#include +// includes, kernels +#include "ordergraph_kernel.cu" +; + +#include "../../../common/cpu_timestamps.h" +#include "../../../common/cupti_add.h" + +const int HIGHEST = 3; +int taskperthr = 1; +int sizepernode; +int ITER = 100; + +// global var +float preScore = -99999999999.0f; +float score = 0.0; +float maxScore[HIGHEST] = {-999999999.0f}; +bool orders[NODE_N][NODE_N]; +bool preOrders[NODE_N][NODE_N]; +bool preGraph[NODE_N][NODE_N]; +bool bestGraph[HIGHEST][NODE_N][NODE_N]; +bool graph[NODE_N][NODE_N]; +// float prior[NODE_N][NODE_N]; +float *localscore, *D_localscore, *D_Score, *scores; +float *LG; +bool *D_parent; +int *D_resP, *parents; + +void initial(); // initial orders and data +int genOrders(); // swap +int ConCore(); // discard new order or not +bool getparent(int *bit, int *pre, int posN, int *parent, int *parN, + int time); // get every possible set of parents for a node +void incr(int *bit, int n); // binary code increases 1 each time +void incrS(int *bit, int n); // STATE_N code increases 1 each time +bool getState( + int parN, int *state, + int time); // get every possible combination of state for a parent set +float logGamma(int N); // log and gamma +float findBestGraph(); +void genScore(); +int convert(int *parent, int parN); +void sortGraph(); +void swap(int a, int b); +void Pre_logGamma(); +int findindex(int *arr, int size); +int C(int n, int a); + +FILE *fpout; + +int main(int argc, char *argv[]) { + /* + for(i=0;i maxScore[HIGHEST - 1]) { + maxScore[HIGHEST - 1] = preScore; + for (a = 0; a < NODE_N; a++) { + for (b = 0; b < NODE_N; b++) { + bestGraph[HIGHEST - 1][a][b] = preGraph[a][b]; + } + } + b = HIGHEST - 1; + for (a = HIGHEST - 2; a >= 0; a--) { + if (maxScore[b] > maxScore[a]) { + swap(a, b); + tmpd = maxScore[a]; + maxScore[a] = maxScore[b]; + maxScore[b] = tmpd; + b = a; + } + } + } + } + + } // endwhile + + cudaFreeHost(localscore); + cudaFree(D_localscore); + cudaFree(D_parent); + + cudaFreeHost(scores); + cudaFreeHost(parents); + cudaFree(D_Score); + cudaFree(D_resP); + + /* + for(j=0;j max) { + max = maxScore[i]; + maxi = i; + } + } + + swap(j, maxi); + tmp = maxScore[j]; + maxScore[j] = max; + maxScore[maxi] = tmp; + } +} + +void swap(int a, int b) { + int i, j; + bool tmp; + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + + tmp = bestGraph[a][i][j]; + bestGraph[a][i][j] = bestGraph[b][i][j]; + bestGraph[b][i][j] = tmp; + } + } +} + +void initial() { + int i, j, tmp, a, b, r; + bool tmpd; + tmp = 1; + for (i = 1; i <= 4; i++) { + tmp += C(NODE_N - 1, i); + } + sizepernode = tmp; + tmp *= NODE_N; + + cudaMallocHost((void **)&localscore, tmp * sizeof(float)); + + for (i = 0; i < tmp; i++) + localscore[i] = 0; + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) + orders[i][j] = 0; + } + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < i; j++) + orders[i][j] = 1; + } + r = rand() % 10000; + for (i = 0; i < r; i++) { + a = rand() % NODE_N; + b = rand() % NODE_N; + for (j = 0; j < NODE_N; j++) { + tmpd = orders[j][a]; + orders[j][a] = orders[j][b]; + orders[j][b] = tmpd; + } + + for (j = 0; j < NODE_N; j++) { + tmpd = orders[a][j]; + orders[a][j] = orders[b][j]; + orders[b][j] = tmpd; + } + } + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + preOrders[i][j] = orders[i][j]; + } + } +} + +// generate ramdom order +int genOrders() { + + int a, b, j; + bool tmp; + a = rand() % NODE_N; + b = rand() % NODE_N; + + for (j = 0; j < NODE_N; j++) { + tmp = orders[a][j]; + orders[a][j] = orders[b][j]; + orders[b][j] = tmp; + } + for (j = 0; j < NODE_N; j++) { + tmp = orders[j][a]; + orders[j][a] = orders[j][b]; + orders[j][b] = tmp; + } + + return 1; +} + +// decide leave or discard an order +int ConCore() { + int i, j; + float tmp; + tmp = log((rand() % 100000) / 100000.0); + if (tmp < (score - preScore)) { + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + preOrders[i][j] = orders[i][j]; + preGraph[i][j] = graph[i][j]; + } + } + preScore = score; + + return 1; + } + + return 0; +} + +void genScore() { + int *D_data; + float *D_LG; + dim3 grid(sizepernode / 256 + 1, 1, 1); + dim3 threads(256, 1, 1); + + Pre_logGamma(); + // cudaPrintfInit(); + cudaMalloc((void **)&D_data, NODE_N * DATA_N * sizeof(int)); + cudaMalloc((void **)&D_localscore, NODE_N * sizepernode * sizeof(float)); + cudaMalloc((void **)&D_LG, (DATA_N + 2) * sizeof(float)); + cudaMemset(D_localscore, 0.0, NODE_N * sizepernode * sizeof(float)); + cudaMemcpy(D_data, data, NODE_N * DATA_N * sizeof(int), + cudaMemcpyHostToDevice); + cudaMemcpy(D_LG, LG, (DATA_N + 2) * sizeof(float), cudaMemcpyHostToDevice); + genScoreKernel<<>>(sizepernode, D_localscore, D_data, D_LG); + cudaDeviceSynchronize(); + cudaMemcpy(localscore, D_localscore, NODE_N * sizepernode * sizeof(float), + cudaMemcpyDeviceToHost); + + // cudaPrintfDisplay(stdout, true); + // cudaPrintfEnd(); + + cudaFreeHost(LG); + cudaFree(D_LG); + cudaFree(D_data); + + cudaMallocHost((void **)&scores, + (sizepernode / (256 * taskperthr) + 1) * sizeof(float)); + cudaMallocHost((void **)&parents, + (sizepernode / (256 * taskperthr) + 1) * 4 * sizeof(int)); + cudaMalloc((void **)&D_Score, + (sizepernode / (256 * taskperthr) + 1) * sizeof(float)); + cudaMalloc((void **)&D_parent, NODE_N * sizeof(bool)); + cudaMalloc((void **)&D_resP, + (sizepernode / (256 * taskperthr) + 1) * 4 * sizeof(int)); +} + +int convert(int *parent, int parN) { + int i, j, w = 1, tmp = 0; + j = 0; + for (i = 0; parN > 0 && i <= parent[parN - 1]; i++) { + if (parent[j] == i) { + j++; + tmp += w; + } + w *= 2; + } + + return tmp; +} + +void Pre_logGamma() { + + cudaMallocHost((void **)&LG, (DATA_N + 2) * sizeof(float)); + + LG[1] = log(1.0); + float i; + for (i = 2; i <= DATA_N + 1; i++) { + LG[(int)i] = LG[(int)i - 1] + log((float)i); + } +} + +void incr(int *bit, int n) { + + bit[n]++; + if (bit[n] >= 2) { + bit[n] = 0; + incr(bit, n + 1); + } + + return; +} + +void incrS(int *bit, int n) { + + bit[n]++; + if (bit[n] >= STATE_N) { + bit[n] = 0; + incr(bit, n + 1); + } + + return; +} + +bool getState(int parN, int *state, int time) { + int j = 1; + + j = pow(STATE_N, (float)parN) - 1; + + if (time > j) + return false; + + if (time >= 1) + incrS(state, 0); + + return true; +} + +bool getparent(int *bit, int *pre, int posN, int *parent, int *parN, int time) { + int i, j = 1; + + *parN = 0; + if (time == 0) + return true; + + for (i = 0; i < posN; i++) { + j = j * 2; + } + j--; + + if (time > j) + return false; + + incr(bit, 0); + + for (i = 0; i < posN; i++) { + if (bit[i] == 1) { + parent[(*parN)++] = pre[i]; + } + } + + return true; +} + +float findBestGraph() { + float bestls = -99999999; + int bestparent[5]; + int bestpN, total; + int node, index; + int pre[NODE_N] = {0}; + int parent[NODE_N] = {0}; + int posN = 0, i, j, parN, tmp, k, l; + float ls = -99999999999, score = 0; + int blocknum; + + for (i = 0; i < NODE_N; i++) + for (j = 0; j < NODE_N; j++) + graph[i][j] = 0; + + for (node = 0; node < NODE_N; node++) { + + bestls = -99999999; + posN = 0; + + for (i = 0; i < NODE_N; i++) { + if (orders[node][i] == 1) { + pre[posN++] = i; + } + } + + if (posN >= 0) { + total = C(posN, 4) + C(posN, 3) + C(posN, 2) + posN + 1; + taskperthr = 1; + blocknum = total / (256 * taskperthr) + 1; + + int nbatches = MIN_NBATCHES; + + int blocknum_max = total / (BLOCK_SIZE * MIN_NBATCHES * taskperthr) + 1; + if (blocknum_max >= MAX_NBLOCKS) { + blocknum = MAX_NBLOCKS; + nbatches = (total + 1) / (BLOCK_SIZE * MAX_NBLOCKS * taskperthr); + } else { + blocknum = blocknum_max; + } + + cudaMemset(D_resP, 0, blocknum * 4 * sizeof(int)); + cudaMemset(D_Score, -999999.0, blocknum * nbatches * sizeof(float)); + cudaMemcpy(D_parent, orders[node], NODE_N * sizeof(bool), + cudaMemcpyHostToDevice); + + computeKernel<<>>( + taskperthr, sizepernode, D_localscore, D_parent, node, total, D_Score, + D_resP, nbatches); + cudaDeviceSynchronize(); + cudaMemcpy(parents, D_resP, blocknum * 4 * sizeof(int), + cudaMemcpyDeviceToHost); + cudaMemcpy(scores, D_Score, blocknum * sizeof(float), + cudaMemcpyDeviceToHost); + + for (i = 0; i < blocknum * nbatches; i++) { + + if (scores[i] > bestls) { + + bestls = scores[i]; + + parN = 0; + for (tmp = 0; tmp < 4; tmp++) { + if (parents[i * 4 + tmp] < 0) + break; + + bestparent[tmp] = parents[i * 4 + tmp]; + + parN++; + } + + bestpN = parN; + } + } + } else { + if (posN >= 4) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + for (k = j + 1; k < posN; k++) { + for (l = k + 1; l < posN; l++) { + parN = 4; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + if (pre[k] > node) + parent[3] = pre[k]; + else + parent[3] = pre[k] + 1; + if (pre[l] > node) + parent[4] = pre[l]; + else + parent[4] = pre[l] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + } + } + + if (posN >= 3) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + for (k = j + 1; k < posN; k++) { + + parN = 3; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + if (pre[k] > node) + parent[3] = pre[k]; + else + parent[3] = pre[k] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + } + + if (posN >= 2) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + + parN = 2; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + + if (posN >= 1) { + for (i = 0; i < posN; i++) { + + parN = 1; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + + parN = 0; + index = sizepernode * node; + + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = 0; + } + } + if (bestls > -99999999) { + + for (i = 0; i < bestpN; i++) { + if (bestparent[i] < node) + graph[node][bestparent[i] - 1] = 1; + else + graph[node][bestparent[i]] = 1; + } + score += bestls; + } + } + + return score; +} + +int findindex(int *arr, int size) { // reminder: arr[0] has to be 0 && size == + // array size-1 && index start from 0 + int i, j, index = 0; + + for (i = 1; i < size; i++) { + index += C(NODE_N - 1, i); + } + + for (i = 1; i <= size - 1; i++) { + for (j = arr[i - 1] + 1; j <= arr[i] - 1; j++) { + index += C(NODE_N - 1 - j, size - i); + } + } + + index += arr[size] - arr[size - 1]; + + return index; +} + +int C(int n, int a) { + int i, res = 1, atmp = a; + + for (i = 0; i < atmp; i++) { + res *= n; + n--; + } + + for (i = 0; i < atmp; i++) { + res /= a; + a--; + } + + return res; +} \ No newline at end of file diff --git a/workloads/realworld/standard/BN/ordergraph_kernel.cu b/workloads/realworld/standard/BN/ordergraph_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..a331f73e3dec8f82f5a499023a249761c405dcb1 --- /dev/null +++ b/workloads/realworld/standard/BN/ordergraph_kernel.cu @@ -0,0 +1,350 @@ +#ifndef _ORDERGRAPH_KERNEL_H_ +#define _ORDERGRAPH_KERNEL_H_ + +#include + +#ifdef DATA_25 +#include "data25.cu" +#endif +#ifdef DATA_30 +#include "data30.cu" +#endif +#ifdef DATA_40 +#include "data40.cu" +#endif +#ifdef DATA_45 +#include "data45.cu" +#endif +#ifdef DATA_50 +#include "data50.cu" +#endif +#ifdef DATA_125 +#include "data125.cu" +#endif +; + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define BLOCK_SIZE 256 +#define MAX_NBLOCKS 1024 +#define MIN_NBATCHES 16 + +__device__ void Dincr(int *bit, int n); +__device__ void DincrS(int *bit, int n); +__device__ bool D_getState(int parN, int *sta, int time); +__device__ void D_findComb(int *comb, int l, int n); +__device__ int D_findindex(int *arr, int size); +__device__ int D_C(int n, int a); + +__global__ void genScoreKernel(int sizepernode, float *D_localscore, + int *D_data, float *D_LG) { + int id = blockIdx.x * BLOCK_SIZE + threadIdx.x; + int node, index; + bool flag; + int parent[5] = {0}; + int pre[NODE_N] = {0}; + int state[5] = {0}; + int i, j, parN = 0, tmp, t; + int t1 = 0, t2 = 0; + float ls = 0; + int Nij[STATE_N] = {0}; + + if (id < sizepernode) { + + D_findComb(parent, id, NODE_N - 1); + + for (i = 0; i < 4; i++) { + if (parent[i] > 0) + parN++; + } + + for (node = 0; node < NODE_N; node++) { + + j = 1; + for (i = 0; i < NODE_N; i++) { + if (i != node) + pre[j++] = i; + } + + for (tmp = 0; tmp < parN; tmp++) + state[tmp] = 0; + + index = sizepernode * node + id; + + // priors + /* + for(tmp=1;tmp<=4;tmp++){ + localscore[index]+=100*(prior[node][pre[parent[tmp]]]-0.5)*(prior[node][pre[parent[tmp]]]-0.5)*(prior[node][pre[parent[tmp]]]-0.5); + } + */ + t = 0; + while (D_getState(parN, state, t++)) { // for get state + // printf("test %u\n",id); + ls = 0; + for (tmp = 0; tmp < STATE_N; tmp++) + Nij[tmp] = 0; + + for (t1 = 0; t1 < DATA_N; t1++) { + flag = true; + for (t2 = 0; t2 < parN; t2++) { + if (D_data[t1 * NODE_N + pre[parent[t2]]] != state[t2]) { + flag = false; + break; + } + } + if (!flag) + continue; + + Nij[D_data[t1 * NODE_N + node]]++; + } + + tmp = STATE_N - 1; + + for (t1 = 0; t1 < STATE_N; t1++) { + ls += D_LG[Nij[t1]]; + tmp += Nij[t1]; + } + + ls -= D_LG[tmp]; + ls += D_LG[STATE_N - 1]; + + D_localscore[index] += ls; + } + } + } +} + +__global__ void computeKernel(int taskperthr, int sizepernode, + float *D_localscore, bool *D_parent, int node, + int total, float *D_Score, int *D_resP, + int nbatches) { + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + __shared__ float lsinblock[PREFETCH_COUNT][BLOCK_SIZE]; + + int fetch = 0; + int end_tile = fetch + nbatches; + int bestparent[4] = {0}, parent[5] = {-1}; + + for (int compute = fetch; compute < end_tile; compute++) { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) { + unsigned int bid = blockIdx.x * nbatches + fetch; + unsigned int tid = threadIdx.x; + unsigned int id = bid * (BLOCK_SIZE * nbatches) + tid; + + int posN = 1, i, index, tmp; + int pre[NODE_N] = {0}; + int parN = 0; + + float bestls = -999999999999999, ls; + + for (i = 0; i < NODE_N; i++) { + if (D_parent[i] == 1) { + pre[posN++] = i; + } + } + + for (i = 0; i < taskperthr && ((id * taskperthr + i) < total); i++) { + + D_findComb(parent, id * taskperthr + i, posN); + + for (parN = 0; parN < 4; parN++) { + if (parent[parN] < 0) + break; + if (pre[parent[parN]] > node) + parent[parN] = pre[parent[parN]]; + else + parent[parN] = pre[parent[parN]] + 1; + } + + for (tmp = parN; tmp > 0; tmp--) { + parent[tmp] = parent[tmp - 1]; + } + parent[0] = 0; + + index = D_findindex(parent, parN); + index += sizepernode * node; + + ls = D_localscore[index]; + + if (ls > bestls) { + bestls = ls; + for (tmp = 0; tmp < 4; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + + memcpy_async(lsinblock[fetch % PREFETCH_COUNT][tid], bestls, pipe); + pipe.commit(); + } + if (fetch == end_tile) { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + int i, t; + unsigned int bid = blockIdx.x * nbatches + compute; + unsigned int tid = threadIdx.x; + // unsigned int id = bid * (BLOCK_SIZE * nbatches) + tid; + + for (i = BLOCK_SIZE / 2; i >= 1; i /= 2) { + if (tid < i) { + if (lsinblock[compute % PREFETCH_COUNT][tid + i] > + lsinblock[compute % PREFETCH_COUNT][tid] && + lsinblock[compute % PREFETCH_COUNT][tid + i] < 0) { + lsinblock[compute % PREFETCH_COUNT][tid] = + lsinblock[compute % PREFETCH_COUNT][tid + i]; + lsinblock[compute % PREFETCH_COUNT][tid + i] = (float)(tid + i); + } else if (lsinblock[compute % PREFETCH_COUNT][tid + i] < + lsinblock[compute % PREFETCH_COUNT][tid] && + lsinblock[compute % PREFETCH_COUNT][tid] < 0) { + lsinblock[compute % PREFETCH_COUNT][tid + i] = (float)tid; + } else if (lsinblock[tid] > 0 && lsinblock[tid + i] < 0) { + lsinblock[compute % PREFETCH_COUNT][tid] = + lsinblock[compute % PREFETCH_COUNT][tid + i]; + lsinblock[compute % PREFETCH_COUNT][tid + i] = (float)(tid + i); + } else if (lsinblock[compute % PREFETCH_COUNT][tid] < 0 && + lsinblock[compute % PREFETCH_COUNT][tid + i] > 0) { + lsinblock[compute % PREFETCH_COUNT][tid + i] = (float)tid; + } + } + block.sync(); + } + block.sync(); + + if (tid == 0) { + D_Score[bid] = lsinblock[compute % PREFETCH_COUNT][0]; + t = 0; + for (i = 0; i < 7 && t < 128 && t >= 0; i++) { + t = (int)lsinblock[compute % PREFETCH_COUNT][(int)powf(2.0, i) + t]; + } + lsinblock[compute % PREFETCH_COUNT][0] = (float)t; + } + block.sync(); + + if (tid == (int)lsinblock[compute % PREFETCH_COUNT][0]) { + for (i = 0; i < 4; i++) { + D_resP[bid * 4 + i] = bestparent[i]; + } + } + } +} + +__device__ void Dincr(int *bit, int n) { + + while (n <= NODE_N) { + bit[n]++; + if (bit[n] >= 2) { + bit[n] = 0; + n++; + } else { + break; + } + } + + return; +} + +__device__ void DincrS(int *bit, int n) { + + bit[n]++; + if (bit[n] >= STATE_N) { + bit[n] = 0; + Dincr(bit, n + 1); + } + + return; +} + +__device__ bool D_getState(int parN, int *sta, int time) { + int i, j = 1; + + for (i = 0; i < parN; i++) { + j *= STATE_N; + } + j--; + if (time > j) + return false; + + if (time >= 1) + DincrS(sta, 0); + + return true; +} + +__device__ void D_findComb(int *comb, int l, int n) { + const int len = 4; + if (l == 0) { + for (int i = 0; i < len; i++) + comb[i] = -1; + return; + } + int sum = 0; + int k = 1; + + while (sum < l) + sum += D_C(n, k++); + l -= sum - D_C(n, --k); + int low = 0; + int pos = 0; + while (k > 1) { + sum = 0; + int s = 1; + while (sum < l) + sum += D_C(n - s++, k - 1); + l -= sum - D_C(n - (--s), --k); + low += s; + comb[pos++] = low; + n -= s; + } + comb[pos] = low + l; + for (int i = pos + 1; i < 4; i++) + comb[i] = -1; +} + +__device__ int D_findindex(int *arr, + int size) { // reminder: arr[0] has to be 0 && size + // == array size-1 && index start from 0 + int i, j, index = 0; + + for (i = 1; i < size; i++) { + index += D_C(NODE_N - 1, i); + } + + for (i = 1; i <= size - 1; i++) { + for (j = arr[i - 1] + 1; j <= arr[i] - 1; j++) { + index += D_C(NODE_N - 1 - j, size - i); + } + } + + index += arr[size] - arr[size - 1]; + + return index; +} + +__device__ int D_C(int n, int a) { + int i, res = 1, atmp = a; + + for (i = 0; i < atmp; i++) { + res *= n; + n--; + } + + for (i = 0; i < atmp; i++) { + res /= a; + a--; + } + + return res; +} + +#endif diff --git a/workloads/realworld/standard/BN/run.sh b/workloads/realworld/standard/BN/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..f87817975334cff247b1bdf91651d41062aa8320 --- /dev/null +++ b/workloads/realworld/standard/BN/run.sh @@ -0,0 +1,5 @@ +# ./ordergraph_25 +# ./ordergraph_30 +# ./ordergraph_40 +# ./ordergraph_45 +./ordergraph_50 \ No newline at end of file diff --git a/workloads/realworld/standard/BN/run_super.sh b/workloads/realworld/standard/BN/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..5c45d88db0716b0b4b0828ba397cbd918d1612c0 --- /dev/null +++ b/workloads/realworld/standard/BN/run_super.sh @@ -0,0 +1 @@ +./ordergraph_50 diff --git a/workloads/realworld/standard/backprop/Makefile b/workloads/realworld/standard/backprop/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..286cd40af79bbb80b6d86aad9bd0d2c0d1a846e0 --- /dev/null +++ b/workloads/realworld/standard/backprop/Makefile @@ -0,0 +1,47 @@ +include ../../../common/make.config + +# C compiler +CC = gcc +CC_FLAGS = -g -O2 + +# CUDA compiler +NVCC = $(CUDA_DIR)/bin/nvcc +NVCC_FLAGS = -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -arch=sm_80 + +# 'make dbg=1' enables NVCC debugging +ifeq ($(dbg),1) + NVCC_FLAGS += -g -O0 +else + NVCC_FLAGS += -O2 +endif + +# 'make emu=1' compiles the CUDA kernels for emulation +ifeq ($(emu),1) + NVCC_FLAGS += -deviceemu +endif + + +backprop: backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + $(NVCC) $(NVCC_FLAGS) backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -o backprop $(NVCC_FLAGS) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +# backprop: backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp +# $(CC) $(CC_FLAGS) backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -o backprop -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +%.o: %.[ch] + $(CC) $(CC_FLAGS) $< -c + +facetrain.o: facetrain.c backprop.h + $(CC) $(CC_FLAGS) facetrain.c -c + +backprop.o: backprop.c backprop.h + $(CC) $(CC_FLAGS) backprop.c -c + +backprop_cuda.o: backprop_cuda.cu backprop.h $(CUPTI_ADD_COMMON)/cupti_add.h $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + $(NVCC) $(NVCC_FLAGS) -c backprop_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +imagenet.o: imagenet.c backprop.h + $(CC) $(CC_FLAGS) imagenet.c -c + + +clean: + rm -f *.o *~ backprop backprop_cuda.linkinfo diff --git a/workloads/realworld/standard/backprop/backprop.c b/workloads/realworld/standard/backprop/backprop.c new file mode 100644 index 0000000000000000000000000000000000000000..3a38f012b785f8cbaec7f9c33e9ae58b9ee92ae5 --- /dev/null +++ b/workloads/realworld/standard/backprop/backprop.c @@ -0,0 +1,502 @@ +/* + ****************************************************************** + * HISTORY + * 15-Oct-94 Jeff Shufelt (js), Carnegie Mellon University + * Prepared for 15-681, Fall 1994. + * Modified by Shuai Che + ****************************************************************** + */ +#include +#include +#include +#include +#include "backprop.h" +#include +//#define OPEN + +#define ABS(x) (((x) > 0.0) ? (x) : (-(x))) + +#define fastcopy(to,from,len)\ +{\ + register char *_to,*_from;\ + register int _i,_l;\ + _to = (char *)(to);\ + _from = (char *)(from);\ + _l = (len);\ + for (_i = 0; _i < _l; _i++) *_to++ = *_from++;\ +} + +/*** Return random number between 0.0 and 1.0 ***/ +float drnd() +{ + return ((float) rand() / (float) BIGRND); +} + +/*** Return random number between -1.0 and 1.0 ***/ +float dpn1() +{ + return ((drnd() * 2.0) - 1.0); +} + +/*** The squashing function. Currently, it's a sigmoid. ***/ + +float squash(x) +float x; +{ + float m; + //x = -x; + //m = 1 + x + x*x/2 + x*x*x/6 + x*x*x*x/24 + x*x*x*x*x/120; + //return(1.0 / (1.0 + m)); + return (1.0 / (1.0 + exp(-x))); +} + + +/*** Allocate 1d array of floats ***/ + +float *alloc_1d_dbl(n) +int n; +{ + float *new; + + new = (float *) malloc ((unsigned) (n * sizeof (float))); + if (new == NULL) { + printf("ALLOC_1D_DBL: Couldn't allocate array of floats\n"); + return (NULL); + } + return (new); +} + + +/*** Allocate 2d array of floats ***/ + +float **alloc_2d_dbl(m, n) +int m, n; +{ + int i; + float **new; + + new = (float **) malloc ((unsigned) (m * sizeof (float *))); + if (new == NULL) { + printf("ALLOC_2D_DBL: Couldn't allocate array of dbl ptrs\n"); + return (NULL); + } + + for (i = 0; i < m; i++) { + new[i] = alloc_1d_dbl(n); + } + + return (new); +} + + +bpnn_randomize_weights(w, m, n) +float **w; +int m, n; +{ + int i, j; + + for (i = 0; i <= m; i++) { + for (j = 0; j <= n; j++) { + w[i][j] = (float) rand()/RAND_MAX; + // w[i][j] = dpn1(); + } + } +} + +bpnn_randomize_row(w, m) +float *w; +int m; +{ + int i; + for (i = 0; i <= m; i++) { + //w[i] = (float) rand()/RAND_MAX; + w[i] = 0.1; + } +} + + +bpnn_zero_weights(w, m, n) +float **w; +int m, n; +{ + int i, j; + + for (i = 0; i <= m; i++) { + for (j = 0; j <= n; j++) { + w[i][j] = 0.0; + } + } +} + + +void bpnn_initialize(seed) +{ + printf("Random number generator seed: %d\n", seed); + srand(seed); +} + + +BPNN *bpnn_internal_create(n_in, n_hidden, n_out) +int n_in, n_hidden, n_out; +{ + BPNN *newnet; + + newnet = (BPNN *) malloc (sizeof (BPNN)); + if (newnet == NULL) { + printf("BPNN_CREATE: Couldn't allocate neural network\n"); + return (NULL); + } + + newnet->input_n = n_in; + newnet->hidden_n = n_hidden; + newnet->output_n = n_out; + newnet->input_units = alloc_1d_dbl(n_in + 1); + newnet->hidden_units = alloc_1d_dbl(n_hidden + 1); + newnet->output_units = alloc_1d_dbl(n_out + 1); + + newnet->hidden_delta = alloc_1d_dbl(n_hidden + 1); + newnet->output_delta = alloc_1d_dbl(n_out + 1); + newnet->target = alloc_1d_dbl(n_out + 1); + + newnet->input_weights = alloc_2d_dbl(n_in + 1, n_hidden + 1); + newnet->hidden_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); + + newnet->input_prev_weights = alloc_2d_dbl(n_in + 1, n_hidden + 1); + newnet->hidden_prev_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); + + return (newnet); +} + + +void bpnn_free(net) +BPNN *net; +{ + int n1, n2, i; + + n1 = net->input_n; + n2 = net->hidden_n; + + free((char *) net->input_units); + free((char *) net->hidden_units); + free((char *) net->output_units); + + free((char *) net->hidden_delta); + free((char *) net->output_delta); + free((char *) net->target); + + for (i = 0; i <= n1; i++) { + free((char *) net->input_weights[i]); + free((char *) net->input_prev_weights[i]); + } + free((char *) net->input_weights); + free((char *) net->input_prev_weights); + + for (i = 0; i <= n2; i++) { + free((char *) net->hidden_weights[i]); + free((char *) net->hidden_prev_weights[i]); + } + free((char *) net->hidden_weights); + free((char *) net->hidden_prev_weights); + + free((char *) net); +} + + +/*** Creates a new fully-connected network from scratch, + with the given numbers of input, hidden, and output units. + Threshold units are automatically included. All weights are + randomly initialized. + + Space is also allocated for temporary storage (momentum weights, + error computations, etc). +***/ + +BPNN *bpnn_create(n_in, n_hidden, n_out) +int n_in, n_hidden, n_out; +{ + + BPNN *newnet; + + newnet = bpnn_internal_create(n_in, n_hidden, n_out); + +#ifdef INITZERO + bpnn_zero_weights(newnet->input_weights, n_in, n_hidden); +#else + bpnn_randomize_weights(newnet->input_weights, n_in, n_hidden); +#endif + bpnn_randomize_weights(newnet->hidden_weights, n_hidden, n_out); + bpnn_zero_weights(newnet->input_prev_weights, n_in, n_hidden); + bpnn_zero_weights(newnet->hidden_prev_weights, n_hidden, n_out); + bpnn_randomize_row(newnet->target, n_out); + return (newnet); +} + + +void bpnn_layerforward(l1, l2, conn, n1, n2) +float *l1, *l2, **conn; +int n1, n2; +{ + float sum; + int j, k; + + /*** Set up thresholding unit ***/ + l1[0] = 1.0; +#ifdef OPEN + omp_set_num_threads(NUM_THREAD); + #pragma omp parallel for shared(conn, n1, n2, l1) private(k, j) reduction(+: sum) schedule(static) +#endif + /*** For each unit in second layer ***/ + for (j = 1; j <= n2; j++) { + + /*** Compute weighted sum of its inputs ***/ + sum = 0.0; + for (k = 0; k <= n1; k++) { + sum += conn[k][j] * l1[k]; + } + l2[j] = squash(sum); + } +} + +//extern "C" +void bpnn_output_error(delta, target, output, nj, err) +float *delta, *target, *output, *err; +int nj; +{ + int j; + float o, t, errsum; + errsum = 0.0; + for (j = 1; j <= nj; j++) { + o = output[j]; + t = target[j]; + delta[j] = o * (1.0 - o) * (t - o); + errsum += ABS(delta[j]); + } + *err = errsum; +} + + +void bpnn_hidden_error(delta_h, + nh, + delta_o, + no, + who, + hidden, + err) +float *delta_h, *delta_o, *hidden, **who, *err; +int nh, no; +{ + int j, k; + float h, sum, errsum; + + errsum = 0.0; + for (j = 1; j <= nh; j++) { + h = hidden[j]; + sum = 0.0; + for (k = 1; k <= no; k++) { + sum += delta_o[k] * who[j][k]; + } + delta_h[j] = h * (1.0 - h) * sum; + errsum += ABS(delta_h[j]); + } + *err = errsum; +} + + +void bpnn_adjust_weights(delta, ndelta, ly, nly, w, oldw) +float *delta, *ly, **w, **oldw; +{ + float new_dw; + int k, j; + ly[0] = 1.0; + //eta = 0.3; + //momentum = 0.3; + +#ifdef OPEN + omp_set_num_threads(NUM_THREAD); + #pragma omp parallel for \ + shared(oldw, w, delta) \ + private(j, k, new_dw) \ + firstprivate(ndelta, nly, momentum) +#endif + for (j = 1; j <= ndelta; j++) { + for (k = 0; k <= nly; k++) { + new_dw = ((ETA * delta[j] * ly[k]) + (MOMENTUM * oldw[k][j])); + w[k][j] += new_dw; + oldw[k][j] = new_dw; + } + } +} + + +void bpnn_feedforward(net) +BPNN *net; +{ + int in, hid, out; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + + /*** Feed forward input activations. ***/ + bpnn_layerforward(net->input_units, net->hidden_units, + net->input_weights, in, hid); + bpnn_layerforward(net->hidden_units, net->output_units, + net->hidden_weights, hid, out); + +} + + +void bpnn_train(net, eo, eh) +BPNN *net; +float *eo, *eh; +{ + int in, hid, out; + float out_err, hid_err; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + + /*** Feed forward input activations. ***/ + bpnn_layerforward(net->input_units, net->hidden_units, + net->input_weights, in, hid); + bpnn_layerforward(net->hidden_units, net->output_units, + net->hidden_weights, hid, out); + + /*** Compute error on output and hidden units. ***/ + bpnn_output_error(net->output_delta, net->target, net->output_units, + out, &out_err); + bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, + net->hidden_weights, net->hidden_units, &hid_err); + *eo = out_err; + *eh = hid_err; + + /*** Adjust input and hidden weights. ***/ + bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, + net->hidden_weights, net->hidden_prev_weights); + bpnn_adjust_weights(net->hidden_delta, hid, net->input_units, in, + net->input_weights, net->input_prev_weights); + +} + + + + +void bpnn_save(net, filename) +BPNN *net; +char *filename; +{ + int n1, n2, n3, i, j, memcnt; + float dvalue, **w; + char *mem; + ///add// + FILE *pFile; + pFile = fopen( filename, "w+" ); + /////// + /* + if ((fd = creat(filename, 0644)) == -1) { + printf("BPNN_SAVE: Cannot create '%s'\n", filename); + return; + } + */ + + n1 = net->input_n; n2 = net->hidden_n; n3 = net->output_n; + printf("Saving %dx%dx%d network to '%s'\n", n1, n2, n3, filename); + //fflush(stdout); + + //write(fd, (char *) &n1, sizeof(int)); + //write(fd, (char *) &n2, sizeof(int)); + //write(fd, (char *) &n3, sizeof(int)); + + fwrite( (char *) &n1 , sizeof(char), sizeof(char), pFile); + fwrite( (char *) &n2 , sizeof(char), sizeof(char), pFile); + fwrite( (char *) &n3 , sizeof(char), sizeof(char), pFile); + + + + memcnt = 0; + w = net->input_weights; + mem = (char *) malloc ((unsigned) ((n1+1) * (n2+1) * sizeof(float))); + for (i = 0; i <= n1; i++) { + for (j = 0; j <= n2; j++) { + dvalue = w[i][j]; + fastcopy(&mem[memcnt], &dvalue, sizeof(float)); + memcnt += sizeof(float); + } + } + //write(fd, mem, (n1+1) * (n2+1) * sizeof(float)); + fwrite( mem , (unsigned)(sizeof(float)), (unsigned) ((n1+1) * (n2+1) * sizeof(float)) , pFile); + free(mem); + + memcnt = 0; + w = net->hidden_weights; + mem = (char *) malloc ((unsigned) ((n2+1) * (n3+1) * sizeof(float))); + for (i = 0; i <= n2; i++) { + for (j = 0; j <= n3; j++) { + dvalue = w[i][j]; + fastcopy(&mem[memcnt], &dvalue, sizeof(float)); + memcnt += sizeof(float); + } + } + //write(fd, mem, (n2+1) * (n3+1) * sizeof(float)); + fwrite( mem , sizeof(float), (unsigned) ((n2+1) * (n3+1) * sizeof(float)) , pFile); + free(mem); + + fclose(pFile); + return; +} + + +BPNN *bpnn_read(filename) +char *filename; +{ + char *mem; + BPNN *new; + int fd, n1, n2, n3, i, j, memcnt; + + if ((fd = open(filename, 0, 0644)) == -1) { + return (NULL); + } + + printf("Reading '%s'\n", filename); //fflush(stdout); + + read(fd, (char *) &n1, sizeof(int)); + read(fd, (char *) &n2, sizeof(int)); + read(fd, (char *) &n3, sizeof(int)); + new = bpnn_internal_create(n1, n2, n3); + + printf("'%s' contains a %dx%dx%d network\n", filename, n1, n2, n3); + printf("Reading input weights..."); //fflush(stdout); + + memcnt = 0; + mem = (char *) malloc ((unsigned) ((n1+1) * (n2+1) * sizeof(float))); + read(fd, mem, (n1+1) * (n2+1) * sizeof(float)); + for (i = 0; i <= n1; i++) { + for (j = 0; j <= n2; j++) { + fastcopy(&(new->input_weights[i][j]), &mem[memcnt], sizeof(float)); + memcnt += sizeof(float); + } + } + free(mem); + + printf("Done\nReading hidden weights..."); //fflush(stdout); + + memcnt = 0; + mem = (char *) malloc ((unsigned) ((n2+1) * (n3+1) * sizeof(float))); + read(fd, mem, (n2+1) * (n3+1) * sizeof(float)); + for (i = 0; i <= n2; i++) { + for (j = 0; j <= n3; j++) { + fastcopy(&(new->hidden_weights[i][j]), &mem[memcnt], sizeof(float)); + memcnt += sizeof(float); + } + } + free(mem); + close(fd); + + printf("Done\n"); //fflush(stdout); + + bpnn_zero_weights(new->input_prev_weights, n1, n2); + bpnn_zero_weights(new->hidden_prev_weights, n2, n3); + + return (new); +} diff --git a/workloads/realworld/standard/backprop/backprop.h b/workloads/realworld/standard/backprop/backprop.h new file mode 100644 index 0000000000000000000000000000000000000000..dfaafe39b76a1c9c1455e38bbe0a14d5461d5d2b --- /dev/null +++ b/workloads/realworld/standard/backprop/backprop.h @@ -0,0 +1,53 @@ +#ifndef _BACKPROP_H_ +#define _BACKPROP_H_ + +#define BIGRND 0x7fffffff + +#define GPU +#define THREADS 256 +#define WIDTH 16 // shared memory width +#define HEIGHT 16 // shared memory height + +#define ETA 0.3 //eta value +#define MOMENTUM 0.3 //momentum value +#define NUM_THREAD 4 //OpenMP threads + + +typedef struct { + int input_n; /* number of input units */ + int hidden_n; /* number of hidden units */ + int output_n; /* number of output units */ + + float *input_units; /* the input units */ + float *hidden_units; /* the hidden units */ + float *output_units; /* the output units */ + + float *hidden_delta; /* storage for hidden unit error */ + float *output_delta; /* storage for output unit error */ + + float *target; /* storage for target vector */ + + float **input_weights; /* weights from input to hidden layer */ + float **hidden_weights; /* weights from hidden to output layer */ + + /*** The next two are for momentum ***/ + float **input_prev_weights; /* previous change on input to hidden wgt */ + float **hidden_prev_weights; /* previous change on hidden to output wgt */ +} BPNN; + + +/*** User-level functions ***/ + +void bpnn_initialize(); + +BPNN *bpnn_create(); +void bpnn_free(); + +void bpnn_train(); +void bpnn_feedforward(); + +void bpnn_save(); +BPNN *bpnn_read(); + + +#endif diff --git a/workloads/realworld/standard/backprop/backprop_cuda.cu b/workloads/realworld/standard/backprop/backprop_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..77814eb9bb4c8ed7245663efe6c51ec49047d195 --- /dev/null +++ b/workloads/realworld/standard/backprop/backprop_cuda.cu @@ -0,0 +1,247 @@ + + +// includes, system +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +double t_start, t_end; + +// includes, kernels +#include "backprop_cuda_kernel.cu" +#include "backprop.h" + +//////////////////////////////////////////////////////////////////////////////// + +extern "C" void bpnn_layerforward(float *l1, float *l2, float **conn, int n1, int n2); + +extern "C" void bpnn_output_error(float *delta, float *target, float *output, int nj, float *err); + +extern "C" void bpnn_hidden_error(float *delta_h, int nh, float *delta_o, int no, float **who, float *hidden, float *err); + +extern "C" void bpnn_adjust_weights(float *delta, int ndelta, float *ly, int nly, float **w, float **oldw); + +extern "C" int setup(int argc, char **argv); + +extern "C" float **alloc_2d_dbl(int m, int n); + +extern "C" float squash(float x); + +double gettime() +{ + struct timeval t; + gettimeofday(&t, NULL); + return t.tv_sec + t.tv_usec * 1e-6; +} + +unsigned int num_threads = 0; +unsigned int num_blocks = 0; + +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + num_blocks = atoi(argv[2]); + setup(argc, argv); +} + +extern "C" void bpnn_train_cuda(BPNN *net, float *eo, float *eh) +{ + int in, hid, out; + float out_err, hid_err; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + +#ifdef GPU + int m = 0; + float *input_hidden_cuda; + float *input_cuda; + float *output_hidden_cuda; + float *partial_sum; + float *hidden_partial_sum; + float *hidden_delta_cuda; + float *input_prev_weights_cuda; + float sum; + float *input_weights_one_dim; + float *input_weights_prev_one_dim; + // ruihao + // num_blocks = in / 16; + // dim3 grid(1, num_blocks); + // dim3 threads(16, 16); + + int tile_size = in / num_blocks; + dim3 grid(1, num_blocks); + dim3 threads(16, 16); + // ruihao + + input_weights_one_dim = (float *)malloc((in + 1) * (hid + 1) * sizeof(float)); + input_weights_prev_one_dim = (float *)malloc((in + 1) * (hid + 1) * sizeof(float)); + // ruihao + // partial_sum = (float *) malloc(num_blocks * WIDTH * sizeof(float)); + partial_sum = (float *)malloc(in * sizeof(float)); + // ruihao + + // this preprocessing stage is added to correct the bugs of wrong memcopy using two-dimensional net->inputweights + for (int k = 0; k <= in; k++) + { + for (int j = 0; j <= hid; j++) + { + input_weights_one_dim[m] = net->input_weights[k][j]; + input_weights_prev_one_dim[m] = net->input_prev_weights[k][j]; + m++; + } + } + + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaMalloc((void **)&input_cuda, (in + 1) * sizeof(float)); + cudaMalloc((void **)&output_hidden_cuda, (hid + 1) * sizeof(float)); + cudaMalloc((void **)&input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float)); + // ruihao + // cudaMalloc((void**) &hidden_partial_sum, num_blocks * WIDTH * sizeof(float)); + cudaMalloc((void **)&hidden_partial_sum, in * sizeof(float)); + // ruihao + +#endif + +#ifdef CPU + + printf("Performing CPU computation\n"); + bpnn_layerforward(net->input_units, net->hidden_units, net->input_weights, in, hid); + +#endif + +#ifdef GPU + + //printf("Performing GPU computation\n"); + + // printf("in= %d, hid = %d, numblocks = %d\n", in, hid, num_blocks); + + + cudaMemcpy(input_cuda, net->input_units, (in + 1) * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(input_hidden_cuda, input_weights_one_dim, (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyHostToDevice); + + // ruihao + //t_start = rtclock(); + // ruihao + bpnn_layerforward_CUDA<<>>(input_cuda, + output_hidden_cuda, + input_hidden_cuda, + hidden_partial_sum, + in, + hid, + tile_size); + + cudaDeviceSynchronize(); + + // ruihao + // cudaMemcpy(partial_sum, hidden_partial_sum, num_blocks * WIDTH * sizeof(float), cudaMemcpyDeviceToHost); + //t_end = rtclock(); + //fprintf(stdout, "bpnn_layerforward_CUDA GPU Runtime: %0.6lfs\n", t_end - t_start); + cudaMemcpy(partial_sum, hidden_partial_sum, in * sizeof(float), cudaMemcpyDeviceToHost); + // ruihao + + cudaError_t error = cudaGetLastError(); + if (error != cudaSuccess) + { + printf("bpnn kernel error: %s\n", cudaGetErrorString(error)); + exit(EXIT_FAILURE); + } + + for (int j = 1; j <= hid; j++) + { + sum = 0.0; + // ruihao + // for (int k = 0; k < num_blocks; k++) { + // sum += partial_sum[k * hid + j-1] ; + // } + for (int k = 0; k < in / WIDTH; k++) + { + sum += partial_sum[k * hid + j - 1]; + } + // ruihao + sum += net->input_weights[0][j]; + net->hidden_units[j] = float(1.0 / (1.0 + exp(-sum))); + } +#endif + + bpnn_layerforward(net->hidden_units, net->output_units, net->hidden_weights, hid, out); + bpnn_output_error(net->output_delta, net->target, net->output_units, out, &out_err); + bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, net->hidden_weights, net->hidden_units, &hid_err); + bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, net->hidden_weights, net->hidden_prev_weights); + +#ifdef CPU + + bpnn_adjust_weights(net->hidden_delta, hid, net->input_units, in, net->input_weights, net->input_prev_weights); + +#endif + +#ifdef GPU + + cudaMalloc((void **)&hidden_delta_cuda, (hid + 1) * sizeof(float)); + cudaMalloc((void **)&input_prev_weights_cuda, (in + 1) * (hid + 1) * sizeof(float)); + + cudaMemcpy(hidden_delta_cuda, net->hidden_delta, (hid + 1) * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(input_prev_weights_cuda, input_weights_prev_one_dim, (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(input_hidden_cuda, input_weights_one_dim, (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyHostToDevice); + // ruihao + //t_start = rtclock(); + // ruihao + bpnn_adjust_weights_cuda<<>>(hidden_delta_cuda, + hid, + input_cuda, + in, + input_hidden_cuda, + input_prev_weights_cuda, + tile_size); + // ruihao + cudaDeviceSynchronize(); + //t_end = rtclock(); + //fprintf(stdout, "bpnn_adjust_weights_cuda GPU Runtime: %0.6lfs\n", t_end - t_start); + // ruihao + cudaMemcpy(net->input_units, input_cuda, (in + 1) * sizeof(float), cudaMemcpyDeviceToHost); + cudaMemcpy(input_weights_one_dim, input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyDeviceToHost); + + + cudaFree(input_cuda); + cudaFree(output_hidden_cuda); + cudaFree(input_hidden_cuda); + cudaFree(hidden_partial_sum); + cudaFree(input_prev_weights_cuda); + cudaFree(hidden_delta_cuda); + + endCPU(); + finiTrace(); + + free(partial_sum); + free(input_weights_one_dim); + free(input_weights_prev_one_dim); + +#endif +} diff --git a/workloads/realworld/standard/backprop/backprop_cuda_kernel.cu b/workloads/realworld/standard/backprop/backprop_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..27f07767e27b29a189b99a1a0c6010ad2ee032e6 --- /dev/null +++ b/workloads/realworld/standard/backprop/backprop_cuda_kernel.cu @@ -0,0 +1,110 @@ + + +#ifndef _BACKPROP_CUDA_KERNEL_H_ +#define _BACKPROP_CUDA_KERNEL_H_ + +#include +#include "backprop.h" +#include "math.h" +#include "cuda.h" + +#include +#include + +__global__ void +bpnn_layerforward_CUDA(float *input_cuda, + float *output_hidden_cuda, + float *input_hidden_cuda, + float *hidden_partial_sum, + int in, + int hid, + int tile_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + int by = blockIdx.y; + int tx = threadIdx.x; + int ty = threadIdx.y; + + int batches = tile_size / WIDTH; + + __shared__ float input_node[HEIGHT]; + __shared__ float weight_matrix[HEIGHT * WIDTH]; + + for (int b = 0; b < batches; b++) + { + int index = (hid + 1) * HEIGHT * (batches * by + b) + (hid + 1) * ty + tx + 1 + (hid + 1); + + int index_in = HEIGHT * (batches * by + b) + ty + 1; + + if (tx == 0) + input_node[ty] = input_cuda[index_in]; + + block.sync(); + + weight_matrix[ty * WIDTH + tx] = input_hidden_cuda[index]; + + block.sync(); + + weight_matrix[ty * WIDTH + tx] = weight_matrix[ty * WIDTH + tx] * input_node[ty]; + + block.sync(); + + for (int i = 1; i <= __log2f(HEIGHT); i++) + { + + int power_two = __powf(2, i); + + if (ty % power_two == 0) + weight_matrix[ty * WIDTH + tx] += weight_matrix[(ty + power_two / 2) * WIDTH + tx]; + + block.sync(); + } + + input_hidden_cuda[index] = weight_matrix[ty * WIDTH + tx]; + + block.sync(); + + if (tx == 0) + { + hidden_partial_sum[(batches * by + b) * hid + ty] = weight_matrix[tx * WIDTH + ty]; + } + } +} + +__global__ void bpnn_adjust_weights_cuda(float *delta, + int hid, + float *ly, + int in, + float *w, + float *oldw, + int tile_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + int by = blockIdx.y; + + int tx = threadIdx.x; + int ty = threadIdx.y; + + int batches = tile_size / WIDTH; + + for (int b = 0; b < batches; b++) + { + int index = (hid + 1) * HEIGHT * (batches * by + b) + (hid + 1) * ty + tx + 1 + (hid + 1); + int index_y = HEIGHT * (batches * by + b) + ty + 1; + int index_x = tx + 1; + // eta = 0.3; + // momentum = 0.3; + + w[index] += ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index])); + oldw[index] = ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index])); + + block.sync(); + + if (ty == 0 && by == 0 && b == 0) + { + w[index_x] += ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x])); + oldw[index_x] = ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x])); + } + } +} +#endif diff --git a/workloads/realworld/standard/backprop/facetrain.c b/workloads/realworld/standard/backprop/facetrain.c new file mode 100644 index 0000000000000000000000000000000000000000..cbf83810934b68551d7dd4b7b94fda5eb6837276 --- /dev/null +++ b/workloads/realworld/standard/backprop/facetrain.c @@ -0,0 +1,54 @@ + +#include +#include +#include +#include +#include "backprop.h" +#include "omp.h" + +extern char *strcpy(); +extern void exit(); + +int layer_size = 0; + +backprop_face() +{ + BPNN *net; + int i; + float out_err, hid_err; + net = bpnn_create(layer_size, 16, 1); // (16, 1 can not be changed) + + printf("Input layer size : %d\n", layer_size); + load(net); + // entering the training kernel, only one iteration + printf("Starting training kernel\n"); + bpnn_train_cuda(net, &out_err, &hid_err); + bpnn_free(net); + printf("Training done\n"); +} + +int setup(argc, argv) +int argc; +char *argv[]; +{ + + int seed; + + if (argc != 3) + { + fprintf(stderr, "usage: backprop \n"); + exit(0); + } + layer_size = atoi(argv[1]); + if (layer_size % 16 != 0) + { + fprintf(stderr, "The number of input points must be divided by 16\n"); + exit(0); + } + + seed = 7; + bpnn_initialize(seed); + backprop_face(); + + exit(0); +} diff --git a/workloads/realworld/standard/backprop/imagenet.c b/workloads/realworld/standard/backprop/imagenet.c new file mode 100644 index 0000000000000000000000000000000000000000..255b0d5d8ca67508f6732e266299e7c58012906f --- /dev/null +++ b/workloads/realworld/standard/backprop/imagenet.c @@ -0,0 +1,24 @@ + +#include +#include +#include "backprop.h" + +extern layer_size; + +load(net) +BPNN *net; +{ + float *units; + int nr, nc, imgsize, i, j, k; + + nr = layer_size; + + imgsize = nr * nc; + units = net->input_units; + + k = 1; + for (i = 0; i < nr; i++) { + units[k] = (float) rand()/RAND_MAX ; + k++; + } +} diff --git a/workloads/realworld/standard/backprop/run.sh b/workloads/realworld/standard/backprop/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..3ff765b6ee27aa6718be615c512868bee7530ec7 --- /dev/null +++ b/workloads/realworld/standard/backprop/run.sh @@ -0,0 +1,5 @@ +# ./backprop 524288 +./backprop 8388608 128 + +# ./backprop 66708864 128 +# ./backprop 66708864 1024 \ No newline at end of file diff --git a/workloads/realworld/standard/backprop/run_super.sh b/workloads/realworld/standard/backprop/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..d9e8a3d42354f597bbcd153d180d9b23bed71192 --- /dev/null +++ b/workloads/realworld/standard/backprop/run_super.sh @@ -0,0 +1,3 @@ +# ./backprop 1073741824 1024 +# ./backprop 134217728 1024 +./backprop 67108864 1024 \ No newline at end of file diff --git a/workloads/realworld/standard/darknet/LICENSE b/workloads/realworld/standard/darknet/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..a50f7d700ba02bfacd50f59b315311cf4d0bbda2 --- /dev/null +++ b/workloads/realworld/standard/darknet/LICENSE @@ -0,0 +1,12 @@ + YOLO LICENSE + Version 2, July 29 2016 + +THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER +SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN +TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES +LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY. NOW HERE'S +THE REAL LICENSE: + +0. Darknet is public domain. +1. Do whatever you want with it. +2. Stop emailing me about it! diff --git a/workloads/realworld/standard/darknet/LICENSE.fuck b/workloads/realworld/standard/darknet/LICENSE.fuck new file mode 100644 index 0000000000000000000000000000000000000000..8b1a9d8189b3b9f4479221d52882ce36fdc73a62 --- /dev/null +++ b/workloads/realworld/standard/darknet/LICENSE.fuck @@ -0,0 +1,13 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + +Copyright (C) 2004 Sam Hocevar + +Everyone is permitted to copy and distribute verbatim or modified +copies of this license document, and changing it is allowed as long +as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. diff --git a/workloads/realworld/standard/darknet/LICENSE.gen b/workloads/realworld/standard/darknet/LICENSE.gen new file mode 100644 index 0000000000000000000000000000000000000000..c54113271e15057c4def6676693eb96fd6362b28 --- /dev/null +++ b/workloads/realworld/standard/darknet/LICENSE.gen @@ -0,0 +1,91 @@ +RNN LICENSE Version 3, June 21 2017 + +Copyright (c) 1990, 1989, 1999 Free87337 May 48 THIRD PARTIES OR ANY OTHER THE +COMPLAIN OR CONSEQUENTIAL DAMAGES AND REGARDLESS OF WHETHER IN CONTRACT, TO THE +EXTENT REPAIR OR AGENTS (NOT THE IN ANY EVENT). THE SOFTWARE WILL BE +UNINTERRUPTED OR ERROR-FREE OR ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF ALL THE WORK (GOVERNED CODE) HIM RESPONSES, OR OF FINES, +SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR ANY OTHER OR OTHER HARL UNDER NO +CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), +PATENT PERMITTED BY THE INSTAGRAM PARENT STATE OR TORT (INCLUDING NEGLIGENCE), +PRODUCT LIABILITY OR OTHERWISE, ARISING OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR ANYTHING PROVIDED IN THIS PRODUCT, COMMIS AND SERVICES +ARE LICENSED SOFTWARE AND ANY RESULE OR ANY OTHER THE COPYRIGHT HOLDERS BE +LIABLE FOR ANY SPECIAL, INCIDENTAL, CASE, SUCH WARRANTIES, EXPRESS OR IMPLIED, +INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COPYRIGHT HOLDERS AND/OR ANY +PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY +EXPRESS OR DISTRIBUTE THAT ALL CLAIMS ARE SHALL CREATE DERAVE BE LIABLE TO YOU +WILL HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +6\. TERMINATION. TO THE EXTENT PERMITTED BY LAW, NO USE OF THE COVERED CODE IS +WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE +INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY +SERVICING, REPAIR OR COULT OR IN ANY WAY OUT OF THE USE OF THE WEBSITES OR +SERVICE WILL BE CONSEQUENTIAL DAMAGES OF ANY KIND HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + +This paragraph Agreement constitutes the entire agreement between the parties +with respect to the Work licensed here. However, if you place the name of the +fact that the arbitration was the consultation of the parties as a "patent is". +Subject to the terms and conditions of this License, Contributor has knowledge +that a license under a third party may also be used to endorse or promote +products derived from the Work, and there is no warranty on the Software and +Science Fees. For the purposes of this Agreement, attach the following +disclaimers (without liabilities of written notice to the Subject Software) in a +manner that a product is under common control with you. The Free Software +Foundation may publish revised and/or new versions of the License for the +Modifications made by the applicable terms. The Recipient shall promptly retain +the covered works for any reason be entered in any federal or state or login +Restricted Laws appearing in the United States or any of its own information +that is not disabled from a derivative work except as expressly permitted in +this License, to the extent that they are in receiving the Software and Source +Code or any exercise of the rights granted to You by this License or a +Contributor made by the Licensor or are authorized to make a reasonable +retirement by the courts of the courts located in Santa Clara County, California +printed and related to the Work or “Company” and Apache Software Foundation. If +the Licensor shall be entitled to reflect your rights to use the Software and +the Software to exercise the rights granted to the recipient without a +requirement to exercise the rights granted by the Agreement to the provision +will begin will appear in such cases, you will use such information without such +corporation shall be an officer with respect to any part of the Software or any +portion thereof. Capitalized terms are included in the Initial Contributor and +under no circumstances will license the Service at any time and for any direct, +indirect, special, incidental, or consequential damages of or assist in +connection with any Services or the registration purposes only to the extent +that it includes any or all means including the processing of which you download +any derivative work. Any of the purchases’ transmission purposes are made +available, if any, in other circumstances, we may review the copyright notice. +In the event that this Agreement is required to give us strict content. The +inclusion of the other party hereunder may also notify you Intellectual Property +Rights to any third party. This means that the Source Code exists of the Work +will not charge a program available to you at any time. You must include a +prominent statement that the Software is governed under a particular version of +this Agreement. You must include a provision to the extent that there is no +warranty for the content of others. You agree that the Recipient was appointed +as a Contributor, (c) are effective until terminated by hereunder, then the +registration are not disabled and not limited to, submit any Customer Data +without the updated use of the Software and that no fee is released. You grant +to Use Other Arbitration Rules for Diagnostic or Services may use or modify the +Apple Software and Consolidated Apple Software or Services. The Company may have +full risk as a product of the Compatible Source. A Contribution by the Licensor +or by the updated Software under the following conditions we can redistribute +any General Provision of this Agreement. If the Program is used in accordance +with the terms of this Agreement, Customer may provide advertisements from your +devices that clause you can your employer or a transaction or country that has +been controlled by the arbitrator, that they will be useful of this Agreement. +The term "Open Source Software is available in connection with the program, and +you may not protect the combination of the Covered Code. You should like to +select a user's rights to charge a copy of this License. I are Contributor's +confidentiality of the exercise of the rights granted herein. Such a covered +work is released as a consequence, the Licensor shall be eligible for a purpose +or subcontractor of the person or entity to the user of the user, then the word +"Application" means having the original fee for any reason; and that no patent +license to more than fifty stated close of the license term. The terms of this +License will the license terms and conditions set forth in Section 2.2 (OPEC) +and You will not use the Software or any set of responsibility for any resulting +information that the Original Code warrants that you have the right to disclose +these information (or in the notification; or (iii) late use of the software or +any third party to the three (50) days before such belief to the extent that it +includes a court court obtains the rights granted by this License. diff --git a/workloads/realworld/standard/darknet/LICENSE.gpl b/workloads/realworld/standard/darknet/LICENSE.gpl new file mode 100644 index 0000000000000000000000000000000000000000..9cecc1d4669ee8af2ca727a5d8cde10cd8b2d7cc --- /dev/null +++ b/workloads/realworld/standard/darknet/LICENSE.gpl @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + {one line to give the program's name and a brief idea of what it does.} + Copyright (C) {year} {name of author} + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + {project} Copyright (C) {year} {fullname} + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/workloads/realworld/standard/darknet/LICENSE.meta b/workloads/realworld/standard/darknet/LICENSE.meta new file mode 100644 index 0000000000000000000000000000000000000000..6728bd28d319c68ae04944fb034118dcc4c9aa09 --- /dev/null +++ b/workloads/realworld/standard/darknet/LICENSE.meta @@ -0,0 +1,8 @@ + META-LICENSE + Version 1, June 21 2017 + +Any and all licenses may be applied to the software either individually +or in concert. Any issues, ambiguities, paradoxes, or metaphysical quandries +arising from this combination should be discussed with a local faith leader, +hermit, or guru. The Oxford comma shall be used. + diff --git a/workloads/realworld/standard/darknet/LICENSE.mit b/workloads/realworld/standard/darknet/LICENSE.mit new file mode 100644 index 0000000000000000000000000000000000000000..5bd806ce16ea5053c8631793787362439375026e --- /dev/null +++ b/workloads/realworld/standard/darknet/LICENSE.mit @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2017 Joseph Redmon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/workloads/realworld/standard/darknet/LICENSE.v1 b/workloads/realworld/standard/darknet/LICENSE.v1 new file mode 100644 index 0000000000000000000000000000000000000000..5b8709acc43e7b76ed69758a52a9eaffaba775e6 --- /dev/null +++ b/workloads/realworld/standard/darknet/LICENSE.v1 @@ -0,0 +1,13 @@ + YOLO LICENSE + Version 1, July 10 2015 + +THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER +SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN +TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES +LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY SUBJECT TO +THE FOLLOWING CONDITIONS: + +1. #yolo +2. #swag +3. #blazeit + diff --git a/workloads/realworld/standard/darknet/Makefile b/workloads/realworld/standard/darknet/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..5022f68377d7626ab768f2501883d72b013dafba --- /dev/null +++ b/workloads/realworld/standard/darknet/Makefile @@ -0,0 +1,114 @@ +GPU=1 +CUDNN=0 +OPENCV=0 +OPENMP=0 +DEBUG=0 + +#ARCH= -gencode arch=compute_30,code=sm_30 \ +# -gencode arch=compute_35,code=sm_35 \ +# -gencode arch=compute_50,code=[sm_50,compute_50] \ +# -gencode arch=compute_52,code=[sm_52,compute_52] +# -gencode arch=compute_20,code=[sm_20,sm_21] \ This one is deprecated? + +# This is what I use, uncomment if you know your arch and want to specify +ARCH= -gencode arch=compute_80,code=sm_80 \ +#ARCH= -arch=sm_80 + +VPATH=./src/:./examples:$(CUPTI_ADD_COMMON) +SLIB=libdarknet.so +ALIB=libdarknet.a +EXEC=darknet +OBJDIR=./obj/ + +CC=gcc +CPP=g++ +# NVCC=nvcc --default-stream per-thread +NVCC=nvcc +AR=ar +ARFLAGS=rcs +OPTS=-Ofast +LDFLAGS= -lm -pthread +COMMON= -Iinclude/ -Isrc/ +CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC +ifeq ($(PROFILE), 1) +CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -DPROFILE +endif + + +ifeq ($(OPENMP), 1) +CFLAGS+= -fopenmp +endif + +ifeq ($(DEBUG), 1) +OPTS=-O0 -g +endif + +CFLAGS+=$(OPTS) + +ifeq ($(OPENCV), 1) +COMMON+= -DOPENCV +CFLAGS+= -DOPENCV +LDFLAGS+= `pkg-config --libs opencv` -lstdc++ +COMMON+= `pkg-config --cflags opencv` +endif + +ifeq ($(GPU), 1) +include ../../../common/make.config +COMMON+= -DGPU -I$(CUDA_DIR)/include/ -I$(CUDA_DIR)/extras/CUPTI/include/ +CFLAGS+= -DGPU +LDFLAGS+= -L$(CUDA_DIR)/lib64 -L$(CUDA_DIR)/extras/CUPTI/lib64/ -lcuda -lcudart -lcublas -lcurand -lcupti +endif + + +ifeq ($(CUDNN), 1) +COMMON+= -DCUDNN +CFLAGS+= -DCUDNN +LDFLAGS+= -lcudnn +endif + +OBJ=gemm.o utils.o cuda_dark.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o lstm_layer.o l2norm_layer.o yolo_layer.o iseg_layer.o image_opencv.o +EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o instance-segmenter.o darknet.o +ifeq ($(GPU), 1) +LDFLAGS+= -lstdc++ +OBJ+=gemm_kernel.o convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o +endif + +# cpu_timestamps.o +# cupti_add.o + +EXECOBJ = $(addprefix $(OBJDIR), $(EXECOBJA)) +OBJS = $(addprefix $(OBJDIR), $(OBJ)) +DEPS = $(wildcard src/*.h) Makefile include/darknet.h + +all: obj backup results $(SLIB) $(ALIB) $(EXEC) + +$(EXEC): $(EXECOBJ) $(ALIB) + $(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS) $(ALIB) + +$(ALIB): $(OBJS) + $(AR) $(ARFLAGS) $@ $^ + +$(SLIB): $(OBJS) + $(CC) $(CFLAGS) -shared $^ -o $@ $(LDFLAGS) + +$(OBJDIR)%.o: %.cpp $(DEPS) + $(CPP) $(COMMON) $(CFLAGS) -c $< -o $@ + +$(OBJDIR)%.o: %.c $(DEPS) + $(CC) $(COMMON) $(CFLAGS) -c $< -o $@ + +$(OBJDIR)%.o: %.cu $(DEPS) + $(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@ + +obj: + mkdir -p obj +backup: + mkdir -p backup +results: + mkdir -p results + +.PHONY: clean + +clean: + rm -rf $(OBJS) $(SLIB) $(ALIB) $(EXEC) $(EXECOBJ) $(OBJDIR)/* + diff --git a/workloads/realworld/standard/darknet/README.md b/workloads/realworld/standard/darknet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fb58c2640038a963cd573d121e4fab59399f67dc --- /dev/null +++ b/workloads/realworld/standard/darknet/README.md @@ -0,0 +1,124 @@ +![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png) + +# Darknet # +Darknet is an open source neural network framework written in C and CUDA. It is fast, easy to install, and supports CPU and GPU computation. + +**Discord** invite link for for communication and questions: https://discord.gg/zSq8rtW + +## YOLOv7: + +* **paper** - YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors: https://arxiv.org/abs/2207.02696 + +* **source code - Pytorch (use to reproduce results):** https://github.com/WongKinYiu/yolov7 + +---- + +Official YOLOv7 is more accurate and faster than YOLOv5 by **120%** FPS, than YOLOX by **180%** FPS, than Dual-Swin-T by **1200%** FPS, than ConvNext by **550%** FPS, than SWIN-L by **500%** FPS. + +YOLOv7 surpasses all known object detectors in both speed and accuracy in the range from 5 FPS to 160 FPS and has the highest accuracy 56.8% AP among all known real-time object detectors with 30 FPS or higher on GPU V100, batch=1. + +* YOLOv7-e6 (55.9% AP, 56 FPS V100 b=1) by `+500%` FPS faster than SWIN-L Cascade-Mask R-CNN (53.9% AP, 9.2 FPS A100 b=1) +* YOLOv7-e6 (55.9% AP, 56 FPS V100 b=1) by `+550%` FPS faster than ConvNeXt-XL C-M-RCNN (55.2% AP, 8.6 FPS A100 b=1) +* YOLOv7-w6 (54.6% AP, 84 FPS V100 b=1) by `+120%` FPS faster than YOLOv5-X6-r6.1 (55.0% AP, 38 FPS V100 b=1) +* YOLOv7-w6 (54.6% AP, 84 FPS V100 b=1) by `+1200%` FPS faster than Dual-Swin-T C-M-RCNN (53.6% AP, 6.5 FPS V100 b=1) +* YOLOv7x (52.9% AP, 114 FPS V100 b=1) by `+150%` FPS faster than PPYOLOE-X (51.9% AP, 45 FPS V100 b=1) +* YOLOv7 (51.2% AP, 161 FPS V100 b=1) by `+180%` FPS faster than YOLOX-X (51.1% AP, 58 FPS V100 b=1) + +---- + +![more5](https://user-images.githubusercontent.com/4096485/179425274-f55a36d4-8450-4471-816b-8c105841effd.jpg) + +---- + +![image](https://user-images.githubusercontent.com/4096485/177675030-a929ee00-0eba-4d93-95c2-225231d0fd61.png) + + +---- + +![yolov7_640_1280](https://user-images.githubusercontent.com/4096485/177688869-d75e0c36-63af-46ec-bdbd-81dbb281f257.png) + +---- + +## Scaled-YOLOv4: + +* **paper (CVPR 2021)**: https://openaccess.thecvf.com/content/CVPR2021/html/Wang_Scaled-YOLOv4_Scaling_Cross_Stage_Partial_Network_CVPR_2021_paper.html + +* **source code - Pytorch (use to reproduce results):** https://github.com/WongKinYiu/ScaledYOLOv4 + +* **source code - Darknet:** https://github.com/AlexeyAB/darknet + +* **Medium:** https://alexeyab84.medium.com/scaled-yolo-v4-is-the-best-neural-network-for-object-detection-on-ms-coco-dataset-39dfa22fa982?source=friends_link&sk=c8553bfed861b1a7932f739d26f487c8 + +## YOLOv4: + +* **paper:** https://arxiv.org/abs/2004.10934 + +* **source code:** https://github.com/AlexeyAB/darknet + +* **Wiki:** https://github.com/AlexeyAB/darknet/wiki + +* **useful links:** https://medium.com/@alexeyab84/yolov4-the-most-accurate-real-time-neural-network-on-ms-coco-dataset-73adfd3602fe?source=friends_link&sk=6039748846bbcf1d960c3061542591d7 + +For more information see the [Darknet project website](http://pjreddie.com/darknet). + + +
Expand + +![yolo_progress](https://user-images.githubusercontent.com/4096485/146988929-1ed0cbec-1e01-4ad0-b42c-808dcef32994.png) https://paperswithcode.com/sota/object-detection-on-coco + +---- + +![scaled_yolov4](https://user-images.githubusercontent.com/4096485/112776361-281d8380-9048-11eb-8083-8728b12dcd55.png) AP50:95 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2011.08036 + +---- + +![YOLOv4Tiny](https://user-images.githubusercontent.com/4096485/101363015-e5c21200-38b1-11eb-986f-b3e516e05977.png) + +---- + +![YOLOv4](https://user-images.githubusercontent.com/4096485/90338826-06114c80-dff5-11ea-9ba2-8eb63a7409b3.png) + +
+ +---- + +![OpenCV_TRT](https://user-images.githubusercontent.com/4096485/90338805-e5e18d80-dff4-11ea-8a68-5710956256ff.png) + + +## Citation + + +``` +@misc{https://doi.org/10.48550/arxiv.2207.02696, + doi = {10.48550/ARXIV.2207.02696}, + url = {https://arxiv.org/abs/2207.02696}, + author = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors}, + publisher = {arXiv}, + year = {2022}, + copyright = {arXiv.org perpetual, non-exclusive license} +} +``` + +``` +@misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +``` +@InProceedings{Wang_2021_CVPR, + author = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + title = {{Scaled-YOLOv4}: Scaling Cross Stage Partial Network}, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2021}, + pages = {13029-13038} +} +``` diff --git a/workloads/realworld/standard/darknet/cfg/alexnet.cfg b/workloads/realworld/standard/darknet/cfg/alexnet.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e2ed4bb8e7b1bad7859aef0d802cb4084753cb7a --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/alexnet.cfg @@ -0,0 +1,96 @@ +[net] +# Training +# batch=128 +# subdivisions=1 +# Testing +batch=1 +subdivisions=1 +height=227 +width=227 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=256 + +learning_rate=0.01 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue = .1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +filters=96 +size=11 +stride=4 +pad=0 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[convolutional] +filters=256 +size=5 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[convolutional] +filters=384 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=384 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=1000 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/standard/darknet/cfg/cifar.cfg b/workloads/realworld/standard/darknet/cfg/cifar.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b2f69f53903e55c24718ed12d9adaaa1557e3647 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/cifar.cfg @@ -0,0 +1,121 @@ +[net] +batch=128 +subdivisions=1 +height=28 +width=28 +channels=3 +max_crop=32 +min_crop=32 + +hue=.1 +saturation=.75 +exposure=.75 + +learning_rate=0.4 +policy=poly +power=4 +max_batches = 5000 +momentum=0.9 +decay=0.0005 + + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[dropout] +probability=.5 + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 diff --git a/workloads/realworld/standard/darknet/cfg/cifar.data b/workloads/realworld/standard/darknet/cfg/cifar.data new file mode 100644 index 0000000000000000000000000000000000000000..a52208db1b203b5e1f24d5afaf8c7002adfd71a3 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/cifar.data @@ -0,0 +1,7 @@ +classes=10 +train = data/cifar/train.list +valid = data/cifar/test.list +test = data/cifar/test.list +labels = data/cifar/labels.txt +backup = backup/ +top=2 diff --git a/workloads/realworld/standard/darknet/cfg/cifar.test.cfg b/workloads/realworld/standard/darknet/cfg/cifar.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..18b6c54c909152b1201d6320b85fafc5c36ba1ef --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/cifar.test.cfg @@ -0,0 +1,117 @@ +[net] +batch=128 +subdivisions=1 +height=32 +width=32 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.4 +policy=poly +power=4 +max_batches = 50000 + + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[dropout] +probability=.5 + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 +temperature=3 + diff --git a/workloads/realworld/standard/darknet/cfg/cifar_small.cfg b/workloads/realworld/standard/darknet/cfg/cifar_small.cfg new file mode 100644 index 0000000000000000000000000000000000000000..d48b1231f0131faaa187b18df6705411c3d16a76 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/cifar_small.cfg @@ -0,0 +1,86 @@ +[net] +batch=128 +subdivisions=1 +height=28 +width=28 +channels=3 +max_crop=32 +min_crop=32 + +hue=.1 +saturation=.75 +exposure=.75 + +learning_rate=0.1 +policy=poly +power=4 +max_batches = 5000 +momentum=0.9 +decay=0.0005 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] diff --git a/workloads/realworld/standard/darknet/cfg/coco.data b/workloads/realworld/standard/darknet/cfg/coco.data new file mode 100644 index 0000000000000000000000000000000000000000..5951d5245a7895e8418bc3155de3b03049e76c42 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/coco.data @@ -0,0 +1,6 @@ +classes= 80 +train = /data/darknet/coco/valid.list +valid = /data/darknet/coco/valid.list +backup = /data/darknet/backup/ +names = /data/darknet/coco/coco.names +eval=coco diff --git a/workloads/realworld/standard/darknet/cfg/coco.names b/workloads/realworld/standard/darknet/cfg/coco.names new file mode 100644 index 0000000000000000000000000000000000000000..16315f2becec9705017bfaf1b9fb81ca2a83c0b0 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/coco.names @@ -0,0 +1,80 @@ +person +bicycle +car +motorbike +aeroplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +sofa +pottedplant +bed +diningtable +toilet +tvmonitor +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush \ No newline at end of file diff --git a/workloads/realworld/standard/darknet/cfg/combine9k.data b/workloads/realworld/standard/darknet/cfg/combine9k.data new file mode 100644 index 0000000000000000000000000000000000000000..06a3e76aefac9c1074c3dfe159bc115a92b0791e --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/combine9k.data @@ -0,0 +1,10 @@ +classes= 9418 +#train = /home/pjreddie/data/coco/trainvalno5k.txt +train = data/combine9k.train.list +valid = /home/pjreddie/data/imagenet/det.val.files +labels = data/9k.labels +names = data/9k.names +backup = backup/ +map = data/inet9k.map +eval = imagenet +results = results diff --git a/workloads/realworld/standard/darknet/cfg/darknet.cfg b/workloads/realworld/standard/darknet/cfg/darknet.cfg new file mode 100644 index 0000000000000000000000000000000000000000..375107f7c196baf7adf229a7cfffc84739875828 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/darknet.cfg @@ -0,0 +1,120 @@ +[net] +# Training +# batch=128 +# subdivisions=1 +# Testing +batch=1 +subdivisions=1 +height=256 +width=256 +min_crop=128 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/standard/darknet/cfg/darknet19.cfg b/workloads/realworld/standard/darknet/cfg/darknet19.cfg new file mode 100644 index 0000000000000000000000000000000000000000..28ac9669ef686b4d638a5bf462451962fec45a4e --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/darknet19.cfg @@ -0,0 +1,205 @@ +[net] +# Training +#batch=128 +#subdivisions=2 + +# Testing + batch=1 + subdivisions=1 + +height=256 +width=256 +min_crop=128 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/standard/darknet/cfg/darknet19_448.cfg b/workloads/realworld/standard/darknet/cfg/darknet19_448.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c6df7306d3ef0622e0a0e0cbd6a5603699344e56 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/darknet19_448.cfg @@ -0,0 +1,197 @@ +[net] +batch=128 +subdivisions=4 +height=448 +width=448 +max_crop=512 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 + +angle=7 +hue = .1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/standard/darknet/cfg/darknet53.cfg b/workloads/realworld/standard/darknet/cfg/darknet53.cfg new file mode 100644 index 0000000000000000000000000000000000000000..7b6d5766e9ec48ee19a74321583b44621c1e07b3 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/darknet53.cfg @@ -0,0 +1,566 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/standard/darknet/cfg/darknet53_448.cfg b/workloads/realworld/standard/darknet/cfg/darknet53_448.cfg new file mode 100644 index 0000000000000000000000000000000000000000..dedab1b97c7e5d4226f061e6c983046d7a278dd0 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/darknet53_448.cfg @@ -0,0 +1,559 @@ +[net] +# Training - start training with darknet53.weights +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=448 +width=448 +channels=3 +min_crop=448 +max_crop=512 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 +momentum=0.9 +decay=0.0005 + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/standard/darknet/cfg/darknet9000.cfg b/workloads/realworld/standard/darknet/cfg/darknet9000.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9dd2dfbbf5a7137faada4e091b8e6d48233f09bf --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/darknet9000.cfg @@ -0,0 +1,205 @@ +[net] +# Training +# batch=128 +# subdivisions=4 +# Testing +batch = 1 +subdivisions = 1 +height=448 +width=448 +max_crop=512 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=9418 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 +tree=data/9k.tree + +[cost] +type=masked + diff --git a/workloads/realworld/standard/darknet/cfg/densenet201.cfg b/workloads/realworld/standard/darknet/cfg/densenet201.cfg new file mode 100644 index 0000000000000000000000000000000000000000..65b4aecc52d45075f2913e3d63b9aec0527fa44c --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/densenet201.cfg @@ -0,0 +1,1951 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/standard/darknet/cfg/extraction.cfg b/workloads/realworld/standard/darknet/cfg/extraction.cfg new file mode 100644 index 0000000000000000000000000000000000000000..66cb15f80e9a5e811223299594882a3b5d9485dc --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/extraction.cfg @@ -0,0 +1,209 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=224 +width=224 +max_crop=320 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/standard/darknet/cfg/extraction.conv.cfg b/workloads/realworld/standard/darknet/cfg/extraction.conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..2a7d09ec80fa2f47e1ebb4134b7845d5cae2b828 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/extraction.conv.cfg @@ -0,0 +1,179 @@ +[net] +batch=1 +subdivisions=1 +height=256 +width=256 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.5 +policy=poly +power=6 +max_batches=500000 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[connected] +output=1000 +activation=leaky + +[softmax] +groups=1 + diff --git a/workloads/realworld/standard/darknet/cfg/extraction22k.cfg b/workloads/realworld/standard/darknet/cfg/extraction22k.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b5f54090d00537fdca72f54bb2eed69dd78f5b00 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/extraction22k.cfg @@ -0,0 +1,206 @@ +[net] +batch=128 +subdivisions=1 +height=224 +width=224 +max_crop=320 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +max_batches = 0 +policy=steps +steps=444000,590000,970000 +scales=.5,.2,.1 + +#policy=sigmoid +#gamma=.00008 +#step=100000 +#max_batches=200000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[connected] +output=21842 +activation=leaky + +[softmax] +groups=1 + diff --git a/workloads/realworld/standard/darknet/cfg/go.cfg b/workloads/realworld/standard/darknet/cfg/go.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c730092ff3ffda0124baeace050bd382c442d88d --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/go.cfg @@ -0,0 +1,132 @@ +[net] +batch=512 +subdivisions=1 +height=19 +width=19 +channels=1 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=10000000 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=1 +size=1 +stride=1 +pad=1 +activation=linear + +[reorg] +extra=1 +stride=1 + +[softmax] + diff --git a/workloads/realworld/standard/darknet/cfg/go.test.cfg b/workloads/realworld/standard/darknet/cfg/go.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..1e4e43809bf3ede20a67b5020fcca0f61612e8f7 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/go.test.cfg @@ -0,0 +1,132 @@ +[net] +batch=1 +subdivisions=1 +height=19 +width=19 +channels=1 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +policy=poly +power=4 +max_batches=100000 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=1 +size=1 +stride=1 +pad=1 +activation=linear + +[reorg] +extra=1 +stride=1 + +[softmax] + + diff --git a/workloads/realworld/standard/darknet/cfg/gru.cfg b/workloads/realworld/standard/darknet/cfg/gru.cfg new file mode 100644 index 0000000000000000000000000000000000000000..6064221289d41dc3ee464a715ae05593a02f34f4 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/gru.cfg @@ -0,0 +1,29 @@ +[net] +inputs=256 +momentum=0.9 +decay=0.0 +subdivisions=1 +batch = 1 +time_steps=1 +learning_rate=.002 +adam=1 + +policy=constant +power=4 +max_batches=1000000 + +[gru] +output = 256 + +[gru] +output = 256 + +[gru] +output = 256 + +[connected] +output=256 +activation=linear + +[softmax] + diff --git a/workloads/realworld/standard/darknet/cfg/imagenet.labels.list b/workloads/realworld/standard/darknet/cfg/imagenet.labels.list new file mode 100644 index 0000000000000000000000000000000000000000..e73d41762d311df7f7eefec0f65ab12a7bacc023 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/imagenet.labels.list @@ -0,0 +1,21842 @@ +n02119789 +n02100735 +n02110185 +n02096294 +n02102040 +n02066245 +n02509815 +n02124075 +n02417914 +n02123394 +n02125311 +n02423022 +n02346627 +n02077923 +n02110063 +n02447366 +n02109047 +n02089867 +n02102177 +n02091134 +n02092002 +n02071294 +n02442845 +n02504458 +n02092339 +n02098105 +n02096437 +n02114712 +n02105641 +n02128925 +n02091635 +n02088466 +n02096051 +n02117135 +n02138441 +n02097130 +n02493509 +n02457408 +n02389026 +n02443484 +n02110341 +n02089078 +n02086910 +n02445715 +n02093256 +n02113978 +n02106382 +n02441942 +n02113712 +n02113186 +n02105162 +n02415577 +n02356798 +n02488702 +n02123159 +n02098413 +n02422699 +n02114855 +n02094433 +n02111277 +n02132136 +n02119022 +n02091467 +n02106550 +n02422106 +n02091831 +n02120505 +n02104365 +n02086079 +n02112706 +n02098286 +n02095889 +n02484975 +n02137549 +n02500267 +n02129604 +n02090721 +n02396427 +n02108000 +n02391049 +n02412080 +n02108915 +n02480495 +n02110806 +n02128385 +n02107683 +n02085936 +n02094114 +n02087046 +n02100583 +n02096177 +n02494079 +n02105056 +n02101556 +n02123597 +n02481823 +n02105505 +n02088094 +n02085782 +n02489166 +n02364673 +n02114548 +n02134084 +n02480855 +n02090622 +n02113624 +n02093859 +n02403003 +n02097298 +n02108551 +n02493793 +n02107142 +n02096585 +n02107574 +n02107908 +n02086240 +n02102973 +n02112018 +n02093647 +n02397096 +n02437312 +n02483708 +n02097047 +n02106030 +n02099601 +n02093991 +n02110627 +n02106166 +n02326432 +n02108089 +n02097658 +n02088364 +n02111129 +n02100236 +n02486261 +n02115913 +n02486410 +n02487347 +n02099849 +n02108422 +n02104029 +n02492035 +n02110958 +n02099429 +n02094258 +n02099267 +n02395406 +n02112350 +n02109961 +n02101388 +n02113799 +n02095570 +n02128757 +n02101006 +n02115641 +n02097209 +n02342885 +n02097474 +n02120079 +n02095314 +n02088238 +n02408429 +n02133161 +n02328150 +n02410509 +n02492660 +n02398521 +n02112137 +n02510455 +n02093428 +n02105855 +n02111500 +n02085620 +n02123045 +n02490219 +n02099712 +n02109525 +n02454379 +n02111889 +n02088632 +n02090379 +n02443114 +n02361337 +n02105412 +n02483362 +n02437616 +n02107312 +n02325366 +n02091032 +n02129165 +n02102318 +n02100877 +n02074367 +n02504013 +n02363005 +n02102480 +n02113023 +n02086646 +n02497673 +n02087394 +n02127052 +n02116738 +n02488291 +n02091244 +n02114367 +n02130308 +n02089973 +n02105251 +n02134418 +n02093754 +n02106662 +n02444819 +n01882714 +n01871265 +n01872401 +n01877812 +n01873310 +n01883070 +n04086273 +n04507155 +n04147183 +n04254680 +n02672831 +n02219486 +n02317335 +n01968897 +n03452741 +n03642806 +n07745940 +n02690373 +n04552348 +n02692877 +n02782093 +n04266014 +n03344393 +n03447447 +n04273569 +n03662601 +n02951358 +n04612504 +n02981792 +n04483307 +n03095699 +n03673027 +n03947888 +n02687172 +n04347754 +n04606251 +n03478589 +n04389033 +n03773504 +n02860847 +n03218198 +n02835271 +n03792782 +n03393912 +n03895866 +n02797295 +n04204347 +n03791053 +n03384352 +n03272562 +n04310018 +n02704792 +n02701002 +n02814533 +n02930766 +n03100240 +n03594945 +n03670208 +n03770679 +n03777568 +n04037443 +n04285008 +n03444034 +n03445924 +n03785016 +n04252225 +n03345487 +n03417042 +n03930630 +n04461696 +n04467665 +n03796401 +n03977966 +n04065272 +n04335435 +n04252077 +n04465501 +n03776460 +n04482393 +n04509417 +n03538406 +n03599486 +n03868242 +n02804414 +n03125729 +n03131574 +n03388549 +n02870880 +n03018349 +n03742115 +n03016953 +n04380533 +n03337140 +n03891251 +n02791124 +n04429376 +n03376595 +n04099969 +n04344873 +n04447861 +n03179701 +n03982430 +n03201208 +n03290653 +n04550184 +n07742313 +n07747607 +n07749582 +n07753113 +n07753275 +n07753592 +n07754684 +n07760859 +n07768694 +n12267677 +n12620546 +n13133613 +n11879895 +n12144580 +n12768682 +n03854065 +n04515003 +n03017168 +n03249569 +n03447721 +n03720891 +n03721384 +n04311174 +n02787622 +n02992211 +n04536866 +n03495258 +n02676566 +n03272010 +n03110669 +n03394916 +n04487394 +n03494278 +n03840681 +n03884397 +n02804610 +n03838899 +n04141076 +n03372029 +n11939491 +n12057211 +n09246464 +n09468604 +n09193705 +n09472597 +n09399592 +n09421951 +n09256479 +n09332890 +n09428293 +n09288635 +n03498962 +n03041632 +n03658185 +n03954731 +n03995372 +n03649909 +n03481172 +n03109150 +n02951585 +n03970156 +n04154565 +n04208210 +n03967562 +n03000684 +n01514668 +n01514859 +n01518878 +n01530575 +n01531178 +n01532829 +n01534433 +n01537544 +n01558993 +n01560419 +n01580077 +n01582220 +n01592084 +n01601694 +n01608432 +n01614925 +n01616318 +n01622779 +n01795545 +n01796340 +n01797886 +n01798484 +n01806143 +n01806567 +n01807496 +n01817953 +n01818515 +n01819313 +n01820546 +n01824575 +n01828970 +n01829413 +n01833805 +n01843065 +n01843383 +n01847000 +n01855032 +n01855672 +n01860187 +n02002556 +n02002724 +n02006656 +n02007558 +n02009912 +n02009229 +n02011460 +n02012849 +n02013706 +n02018207 +n02018795 +n02025239 +n02027492 +n02028035 +n02033041 +n02037110 +n02017213 +n02051845 +n02056570 +n02058221 +n01484850 +n01491361 +n01494475 +n01496331 +n01498041 +n02514041 +n02536864 +n01440764 +n01443537 +n02526121 +n02606052 +n02607072 +n02643566 +n02655020 +n02640242 +n02641379 +n01664065 +n01665541 +n01667114 +n01667778 +n01669191 +n01675722 +n01677366 +n01682714 +n01685808 +n01687978 +n01688243 +n01689811 +n01692333 +n01693334 +n01694178 +n01695060 +n01704323 +n01697457 +n01698640 +n01728572 +n01728920 +n01729322 +n01729977 +n01734418 +n01735189 +n01737021 +n01739381 +n01740131 +n01742172 +n01744401 +n01748264 +n01749939 +n01751748 +n01753488 +n01755581 +n01756291 +n01629819 +n01630670 +n01631663 +n01632458 +n01632777 +n01641577 +n01644373 +n01644900 +n04579432 +n04592741 +n03876231 +n03483316 +n03868863 +n04251144 +n03691459 +n03759954 +n04152593 +n03793489 +n03271574 +n03843555 +n04332243 +n04265275 +n04330267 +n03467068 +n02794156 +n04118776 +n03841143 +n04141975 +n02708093 +n03196217 +n04548280 +n03544143 +n04355338 +n03891332 +n04328186 +n03197337 +n04317175 +n04376876 +n03706229 +n02841315 +n04009552 +n04356056 +n03692522 +n04044716 +n02879718 +n02950826 +n02749479 +n04090263 +n04008634 +n03085013 +n04505470 +n03126707 +n03666591 +n02666196 +n02977058 +n04238763 +n03180011 +n03485407 +n03832673 +n06359193 +n03496892 +n04428191 +n04004767 +n04243546 +n04525305 +n04179913 +n03602883 +n04372370 +n03532672 +n02974003 +n03874293 +n03944341 +n03992509 +n03425413 +n02966193 +n04371774 +n04067472 +n04040759 +n04019541 +n03492542 +n04355933 +n03929660 +n02965783 +n04258138 +n04074963 +n03208938 +n02910353 +n03476684 +n03627232 +n03075370 +n03874599 +n03804744 +n04127249 +n04153751 +n03803284 +n04162706 +n04228054 +n02948072 +n03590841 +n04286575 +n04456115 +n03814639 +n03933933 +n04485082 +n03733131 +n03794056 +n04275548 +n01768244 +n01770081 +n01770393 +n01773157 +n01773549 +n01773797 +n01774384 +n01774750 +n01775062 +n01776313 +n01784675 +n01990800 +n01978287 +n01978455 +n01980166 +n01981276 +n01983481 +n01984695 +n01985128 +n01986214 +n02165105 +n02165456 +n02167151 +n02168699 +n02169497 +n02172182 +n02174001 +n02177972 +n02190166 +n02206856 +n02226429 +n02229544 +n02231487 +n02233338 +n02236044 +n02256656 +n02259212 +n02264363 +n02268443 +n02268853 +n02276258 +n02277742 +n02279972 +n02280649 +n02281406 +n02281787 +n01910747 +n01914609 +n01917289 +n01924916 +n01930112 +n01943899 +n01944390 +n01945685 +n01950731 +n01955084 +n02319095 +n02321529 +n03584829 +n03297495 +n03761084 +n03259280 +n04111531 +n04442312 +n04542943 +n04517823 +n03207941 +n04070727 +n04554684 +n03133878 +n03400231 +n04596742 +n02939185 +n03063689 +n04398044 +n04270147 +n02699494 +n04486054 +n03899768 +n04311004 +n04366367 +n04532670 +n02793495 +n03457902 +n03877845 +n03781244 +n03661043 +n02727426 +n02859443 +n03028079 +n03788195 +n04346328 +n03956157 +n04081281 +n03032252 +n03529860 +n03697007 +n03065424 +n03837869 +n04458633 +n02980441 +n04005630 +n03461385 +n02776631 +n02791270 +n02871525 +n02927161 +n03089624 +n04200800 +n04443257 +n04462240 +n03388043 +n03042490 +n04613696 +n03216828 +n02892201 +n03743016 +n02788148 +n02894605 +n03160309 +n03000134 +n03930313 +n04604644 +n04326547 +n03459775 +n04239074 +n04501370 +n03792972 +n04149813 +n03530642 +n03961711 +n03903868 +n02814860 +n07711569 +n07720875 +n07714571 +n07714990 +n07715103 +n07716358 +n07716906 +n07717410 +n07717556 +n07718472 +n07718747 +n07730033 +n07734744 +n04209239 +n03594734 +n02971356 +n03485794 +n04133789 +n02747177 +n04125021 +n07579787 +n03814906 +n03134739 +n03404251 +n04423845 +n03877472 +n04120489 +n03062245 +n03014705 +n03717622 +n03777754 +n04493381 +n04476259 +n02777292 +n07693725 +n03998194 +n03617480 +n07590611 +n04579145 +n03623198 +n07248320 +n04277352 +n04229816 +n02823428 +n03127747 +n02877765 +n04435653 +n03724870 +n03710637 +n03920288 +n03379051 +n02807133 +n04399382 +n03527444 +n03983396 +n03924679 +n04532106 +n06785654 +n03445777 +n07613480 +n04350905 +n04562935 +n03325584 +n03045698 +n07892512 +n03250847 +n04192698 +n03026506 +n03534580 +n07565083 +n04296562 +n02869837 +n07871810 +n02799071 +n03314780 +n04141327 +n04357314 +n02823750 +n13052670 +n07583066 +n03637318 +n04599235 +n07802026 +n02883205 +n03709823 +n04560804 +n02909870 +n03207743 +n04263257 +n07932039 +n03786901 +n04479046 +n03873416 +n02999410 +n04367480 +n03775546 +n07875152 +n04591713 +n04201297 +n02916936 +n03240683 +n02840245 +n02963159 +n04370456 +n03991062 +n02843684 +n03482405 +n03942813 +n03908618 +n03902125 +n07584110 +n02730930 +n04023962 +n02769748 +n10148035 +n02817516 +n03908714 +n02906734 +n03788365 +n02667093 +n03787032 +n03980874 +n03141823 +n03976467 +n04264628 +n07930864 +n04039381 +n06874185 +n04033901 +n04041544 +n07860988 +n03146219 +n03763968 +n03676483 +n04209133 +n03782006 +n03857828 +n03775071 +n02892767 +n07684084 +n04522168 +n03764736 +n04118538 +n03887697 +n13044778 +n03291819 +n03770439 +n03124170 +n04487081 +n03916031 +n02808440 +n07697537 +n12985857 +n02917067 +n03938244 +n15075141 +n02978881 +n02966687 +n03633091 +n13040303 +n03690938 +n03476991 +n02669723 +n03220513 +n03127925 +n04584207 +n07880968 +n03937543 +n03000247 +n04418357 +n04590129 +n02795169 +n04553703 +n02783161 +n02802426 +n02808304 +n03124043 +n03450230 +n04589890 +n12998815 +n02992529 +n03825788 +n02790996 +n03710193 +n03630383 +n03347037 +n03769881 +n03871628 +n03733281 +n03976657 +n03535780 +n04259630 +n03929855 +n04049303 +n04548362 +n02979186 +n06596364 +n03935335 +n06794110 +n02825657 +n03388183 +n04591157 +n04540053 +n03866082 +n04136333 +n04026417 +n02865351 +n02834397 +n03888257 +n04235860 +n04404412 +n04371430 +n03733805 +n07920052 +n07873807 +n02895154 +n04204238 +n04597913 +n04131690 +n07836838 +n09835506 +n03443371 +n13037406 +n04336792 +n04557648 +n03187595 +n04254120 +n03595614 +n04146614 +n03598930 +n03958227 +n04069434 +n03188531 +n02786058 +n07615774 +n04525038 +n04409515 +n03424325 +n03223299 +n03680355 +n07614500 +n07695742 +n04033995 +n03710721 +n04392985 +n03047690 +n03584254 +n13054560 +n10565667 +n03950228 +n03729826 +n02837789 +n04254777 +n02988304 +n03657121 +n04417672 +n04523525 +n02815834 +n09229709 +n07697313 +n03888605 +n03355925 +n03063599 +n04116512 +n04325704 +n07831146 +n03255030 +n00483313 +n02432291 +n02356381 +n02377388 +n04028764 +n04381587 +n02279257 +n04168199 +n00445055 +n02461128 +n03626760 +n04313503 +n00451635 +n02509515 +n04224842 +n09403734 +n02769290 +n13054073 +n03163222 +n00464478 +n03087069 +n04477219 +n03445617 +n00449054 +n00483705 +n04395106 +n03389611 +n04285965 +n04166281 +n04003856 +n03696301 +n00475787 +n04587404 +n09218641 +n02276355 +n03592669 +n04459909 +n04492375 +n09447666 +n00463543 +n04148703 +n04591517 +n03970546 +n04297750 +n02782778 +n02383231 +n03693474 +n02277094 +n03766044 +n02056228 +n03394272 +n03047052 +n00434075 +n04185946 +n02411999 +n03858418 +n12833149 +n02836035 +n03108853 +n04587559 +n04138261 +n02278024 +n03063485 +n02774921 +n09475044 +n02811204 +n03329302 +n04026813 +n03986562 +n03379204 +n03426134 +n02790669 +n03487090 +n03548402 +n08614632 +n04054361 +n03421485 +n03302671 +n03098959 +n02970408 +n03772584 +n03064935 +n09415584 +n11715430 +n12024445 +n02710201 +n03475581 +n13142504 +n03396074 +n03211789 +n03914337 +n03678558 +n03233123 +n00453396 +n00454395 +n00440382 +n04289027 +n00445226 +n11953610 +n04128413 +n00480211 +n00470966 +n12547503 +n03085219 +n02275773 +n02692086 +n04257790 +n00448748 +n02686379 +n12328567 +n03432129 +n03859000 +n12091377 +n02124313 +n00442847 +n04603399 +n03114379 +n02920369 +n03818343 +n02946127 +n02978055 +n12914923 +n02705429 +n00448232 +n12882945 +n04289690 +n07606669 +n02056728 +n11848479 +n03046921 +n12282933 +n02867966 +n12821505 +n02812949 +n04545305 +n02699770 +n04395651 +n02900160 +n04099003 +n02054711 +n12606545 +n03356858 +n01859190 +n03643737 +n02962200 +n03123553 +n09361517 +n02793089 +n00449517 +n02783994 +n10117851 +n12038585 +n04383839 +n10142391 +n07719213 +n03536122 +n02472987 +n03454536 +n11728099 +n02392824 +n03795758 +n04282872 +n00448872 +n02404432 +n03797182 +n03029197 +n03665924 +n12477163 +n02769963 +n03863262 +n01532325 +n04165409 +n04593077 +n04473108 +n03577090 +n09988063 +n00446804 +n03931765 +n00475014 +n02700064 +n03240892 +n12475242 +n11735053 +n04053508 +n02852173 +n02382750 +n03823111 +n04543772 +n04112147 +n04433585 +n03175189 +n03596543 +n00445685 +n03307792 +n04589593 +n01814217 +n02993368 +n04303497 +n02811350 +n03355768 +n03699591 +n04590553 +n01893825 +n12726670 +n09916348 +n11544015 +n01318894 +n02133704 +n02367492 +n04506289 +n02069974 +n01900150 +n03207835 +n03363549 +n02831595 +n04970470 +n04160847 +n03767203 +n03928814 +n02302969 +n02918595 +n10401331 +n04231272 +n03717447 +n03063968 +n03380724 +n00825773 +n09988493 +n02740300 +n04539794 +n04121511 +n01323599 +n12937130 +n02428508 +n02980036 +n12061380 +n01887787 +n04214046 +n01787835 +n00466630 +n02979290 +n03927091 +n03231368 +n03904657 +n04469003 +n04196502 +n02122948 +n04544325 +n07868340 +n13876561 +n11925898 +n12158443 +n01595450 +n12454705 +n02069412 +n09618957 +n02357111 +n00451563 +n04197110 +n02276902 +n03111296 +n03909020 +n12303083 +n02082791 +n01956764 +n04269822 +n04207343 +n02433318 +n01888181 +n12682668 +n01592387 +n09793141 +n00466273 +n04026180 +n06255081 +n12172364 +n10145590 +n12311579 +n12173912 +n03822171 +n03140292 +n03027625 +n02739427 +n02060133 +n02431785 +n03219010 +n00447957 +n11910271 +n03620967 +n12547215 +n02409508 +n04290079 +n12329260 +n13901858 +n02008497 +n10304914 +n04524142 +n04279462 +n04233124 +n09733793 +n12822115 +n09475179 +n10151760 +n03418618 +n12858397 +n07735510 +n03549473 +n10098245 +n03653583 +n10604380 +n03375575 +n03885293 +n01527347 +n03237340 +n02760658 +n11953038 +n03187268 +n03004275 +n02393161 +n11965218 +n08580944 +n03938725 +n03900979 +n04144241 +n03760310 +n02376679 +n03237992 +n09432283 +n02379908 +n09918554 +n04041747 +n12012111 +n10331167 +n01612122 +n10147935 +n07691539 +n11669786 +n09403427 +n01935395 +n09903501 +n04439585 +n04459018 +n02780704 +n03720163 +n12899752 +n04118635 +n03404149 +n02429456 +n00449168 +n04516354 +n04317833 +n12075299 +n07878647 +n09438940 +n03361550 +n02027357 +n04317976 +n03092883 +n04526964 +n03985069 +n03610682 +n04028581 +n02277268 +n09433839 +n03846431 +n03919289 +n10146104 +n10260706 +n02686227 +n03321103 +n00444846 +n01558307 +n01595168 +n03919096 +n11844892 +n04260364 +n02750070 +n03034244 +n03002096 +n04273972 +n11814584 +n04605321 +n07745466 +n02922798 +n03361380 +n12651229 +n08521623 +n04498389 +n00453313 +n04967882 +n12024690 +n03934656 +n02685082 +n04501550 +n09972458 +n03055418 +n07763629 +n03902756 +n09938449 +n09712696 +n02387346 +n03133415 +n07711080 +n03129753 +n03524150 +n02275560 +n03993053 +n03438661 +n11939180 +n00466524 +n11753355 +n03456024 +n03421324 +n07890540 +n11720643 +n02057035 +n00453126 +n04453037 +n01540832 +n03546235 +n03370387 +n02041875 +n02871439 +n03262072 +n01786646 +n02430830 +n02799175 +n05262422 +n03854722 +n12817694 +n04449966 +n01564773 +n02034971 +n03490119 +n02822579 +n07879953 +n04110178 +n04963588 +n04252653 +n01565078 +n02389128 +n02779435 +n10645017 +n04582205 +n08573842 +n10146002 +n03892178 +n03119396 +n03813078 +n07866868 +n03160740 +n03371875 +n02417387 +n03904782 +n03098688 +n02902687 +n01828556 +n04401680 +n04590933 +n01575401 +n07693048 +n02901114 +n03047941 +n04355511 +n11849871 +n10738111 +n03122073 +n12052787 +n01594004 +n01549886 +n02824058 +n03506184 +n11487732 +n12574866 +n12948053 +n10091450 +n00470554 +n00326094 +n12093329 +n04438897 +n07818995 +n12828791 +n13901321 +n10613996 +n10159533 +n02669295 +n02843158 +n06415688 +n14858292 +n09813219 +n12485653 +n03200231 +n02089468 +n03935234 +n01539925 +n12428076 +n10439373 +n01536644 +n02694662 +n02123242 +n03002711 +n03363749 +n02669534 +n03451798 +n11927215 +n02679257 +n09475925 +n10015485 +n12422129 +n03946162 +n02377291 +n07871720 +n12622297 +n12782915 +n01579260 +n11838916 +n10267311 +n12824053 +n03340723 +n02276749 +n04439712 +n02126139 +n04188179 +n02386853 +n07942152 +n02499316 +n04324387 +n10635788 +n04234887 +n12237641 +n03713436 +n04960582 +n04076713 +n01646292 +n03947798 +n02840134 +n04476972 +n09822830 +n03551395 +n04533802 +n02918964 +n00474657 +n12932966 +n01615458 +n01806364 +n12458550 +n11784497 +n03557360 +n10638922 +n09889941 +n10689306 +n03358172 +n04295571 +n06596607 +n11853356 +n00482122 +n11760785 +n03150232 +n11778257 +n03059685 +n10105733 +n04104384 +n07691237 +n04326676 +n07684938 +n12666965 +n04177820 +n13918387 +n03398153 +n03914438 +n09932098 +n02988486 +n02977619 +n03317788 +n03484487 +n02988679 +n04062428 +n02568087 +n12866162 +n04227144 +n07875436 +n04082886 +n11753700 +n00470682 +n02122298 +n10145239 +n12755727 +n04214282 +n01852671 +n02378969 +n04108822 +n10382825 +n12392549 +n03973839 +n12258885 +n11782761 +n12389501 +n02940570 +n03405595 +n02969323 +n03207630 +n10169147 +n03805725 +n09847543 +n02415253 +n07880080 +n04305572 +n02042180 +n07565161 +n02871147 +n04438507 +n04445154 +n07842433 +n12029635 +n09750282 +n09621232 +n01858906 +n02761206 +n03986355 +n12591351 +n13916721 +n02905036 +n11894770 +n02377603 +n12924623 +n03950899 +n09454153 +n10247358 +n05261310 +n11943660 +n10804287 +n03560430 +n01756089 +n10618342 +n04283378 +n13926786 +n04238321 +n04393549 +n04461879 +n03502200 +n00440941 +n03494706 +n04148579 +n13902336 +n02780815 +n10726031 +n04124098 +n12344483 +n04384910 +n07681450 +n02030837 +n04059157 +n09247410 +n02714751 +n08633683 +n04520784 +n10141732 +n12371439 +n04499062 +n02931148 +n07609632 +n04536335 +n02874537 +n03013438 +n11786539 +n11690455 +n07600696 +n00478262 +n00466712 +n03399677 +n12441183 +n07895962 +n11966083 +n02990373 +n04241249 +n02068541 +n12513933 +n02356977 +n04252560 +n04087826 +n03455488 +n07619409 +n09787534 +n03680942 +n00446980 +n12384839 +n03416900 +n07821758 +n11853813 +n01606522 +n11780148 +n04969242 +n12413880 +n04130257 +n01322604 +n03061211 +n01959492 +n02842573 +n04313628 +n03815149 +n02445394 +n08547544 +n03222176 +n04070003 +n03075768 +n09695979 +n02877266 +n08583292 +n02870676 +n03657511 +n01621635 +n04284341 +n04136161 +n02836174 +n10247880 +n01744100 +n02882894 +n03408444 +n03411079 +n02366959 +n04399158 +n04542715 +n02787435 +n04251701 +n13863020 +n07890226 +n12245319 +n12849952 +n11626826 +n00887544 +n03140431 +n03519387 +n03855604 +n07906111 +n02054036 +n11954161 +n03038281 +n00450998 +n12136392 +n02119477 +n04356925 +n02406647 +n04450133 +n12545635 +n01565599 +n02028900 +n07817024 +n02971167 +n04309049 +n02678897 +n12795555 +n11769803 +n01904886 +n02079851 +n12189987 +n04581829 +n12098403 +n01839330 +n12587803 +n03652932 +n08628141 +n03544238 +n04513827 +n01847806 +n03132076 +n10243137 +n03621377 +n10530959 +n14765422 +n04968139 +n12950314 +n02064816 +n02846511 +n10513823 +n11772408 +n03341297 +n03492922 +n03683606 +n02894337 +n02365480 +n09846755 +n03495039 +n01317813 +n12610328 +n02157206 +n01588002 +n03914831 +n03659686 +n10406765 +n09205509 +n02870526 +n07954211 +n10578471 +n11646694 +n03115762 +n07762913 +n12056758 +n12305986 +n11845913 +n02835915 +n02831237 +n07927512 +n12171098 +n02073831 +n07605040 +n02885462 +n02768114 +n04450994 +n11844371 +n03963645 +n02956699 +n02029378 +n01528396 +n10005934 +n04465666 +n04390977 +n11882074 +n03831382 +n04605163 +n06276501 +n02944075 +n05258051 +n07901457 +n12683571 +n02205219 +n13235503 +n02388735 +n03941231 +n14919819 +n12816508 +n11536673 +n13895262 +n02903204 +n10137825 +n07841345 +n07893253 +n01850192 +n07769731 +n11773987 +n03539678 +n12938193 +n10802507 +n03089879 +n00477392 +n01828096 +n09263912 +n13653902 +n04579667 +n01322983 +n08579352 +n07587023 +n07756951 +n07870167 +n10588357 +n01606809 +n13864035 +n02802544 +n07591961 +n02979399 +n04144539 +n02416820 +n11769176 +n09743792 +n09732170 +n04972451 +n13918274 +n01847089 +n01859689 +n04208065 +n07617051 +n10674713 +n07914271 +n07887461 +n03736064 +n03644858 +n03878963 +n04040247 +n07891433 +n01611969 +n07587618 +n02689144 +n10049363 +n04059516 +n10313239 +n03115400 +n01519563 +n01533893 +n03850245 +n11733548 +n03372549 +n01884834 +n02839110 +n07887192 +n03617312 +n07886463 +n03103396 +n07764847 +n01855476 +n07808587 +n12858871 +n03632729 +n10209731 +n04141712 +n03978686 +n03225988 +n00475273 +n09224725 +n04966543 +n01322221 +n03649674 +n13154494 +n03948830 +n03320519 +n03723267 +n07869611 +n12342498 +n01827793 +n03145719 +n11821184 +n11956348 +n11857875 +n10339717 +n09450163 +n10756148 +n01591301 +n07915094 +n04422727 +n09719309 +n03349469 +n03389889 +n10718131 +n04298661 +n09747495 +n03676623 +n03547229 +n03062015 +n10734394 +n07817315 +n02852360 +n01850553 +n02952585 +n03587205 +n02009750 +n01540090 +n02947660 +n03656957 +n03378174 +n02508213 +n01572489 +n12008487 +n12185859 +n11691046 +n01323355 +n05262534 +n00448126 +n02432983 +n12038406 +n03883385 +n02411206 +n01643896 +n10159045 +n11675025 +n01803362 +n02009508 +n07920349 +n04098513 +n11617272 +n09913455 +n12390314 +n04171208 +n02995345 +n10634849 +n03173929 +n02749953 +n11845793 +n12796022 +n11955153 +n11816829 +n03032453 +n11984542 +n02992795 +n03712111 +n02873733 +n02759387 +n14915184 +n02381364 +n12686274 +n07857731 +n04518764 +n03010473 +n02418465 +n02359556 +n07894799 +n04104770 +n04335209 +n01848976 +n02006063 +n04454908 +n03002948 +n04220250 +n09923561 +n04102162 +n11958080 +n04598965 +n10173410 +n03067339 +n02003204 +n12686676 +n11986511 +n02311617 +n03367059 +n02761557 +n05578095 +n04041069 +n10575463 +n03325941 +n10082043 +n01806297 +n09691729 +n04593866 +n01813088 +n01625562 +n03906224 +n01652026 +n10236304 +n04102618 +n04321453 +n07820145 +n01575117 +n12788854 +n07823698 +n04206225 +n03216710 +n02421449 +n03343737 +n07560903 +n02872529 +n11989869 +n12071744 +n06278475 +n04492749 +n02920259 +n03798061 +n02420509 +n03316105 +n12052447 +n03974915 +n02904803 +n03430418 +n12291959 +n06892775 +n03875806 +n07903841 +n10282482 +n02683323 +n07862348 +n01849157 +n04469813 +n09944022 +n03342127 +n07592481 +n02936402 +n02405929 +n10002760 +n02537716 +n05259914 +n01560280 +n12694486 +n07879350 +n02377063 +n03637181 +n03409297 +n01607812 +n02808185 +n09239302 +n12055516 +n09712448 +n02859184 +n12772908 +n02735538 +n10333838 +n12336092 +n02386968 +n04613939 +n00452864 +n04535524 +n03174731 +n04189816 +n07607605 +n12909917 +n02387722 +n02960690 +n07715221 +n02407071 +n10667477 +n09398076 +n04236809 +n01904806 +n01610552 +n12373100 +n12771390 +n04122685 +n07804771 +n15102455 +n03469175 +n03746005 +n02536456 +n03505667 +n11816336 +n09376198 +n10572706 +n03464053 +n02869155 +n07816164 +n04969798 +n02942349 +n14820180 +n01623615 +n12676703 +n03369276 +n03650551 +n02010272 +n02976123 +n01852400 +n02196119 +n04132158 +n03238586 +n07639069 +n03313333 +n10542761 +n12215022 +n00455173 +n10019406 +n12899537 +n04277826 +n09906449 +n04549629 +n11508382 +n15090065 +n10289462 +n04540255 +n02723165 +n04335693 +n01536334 +n03107488 +n12782530 +n14785065 +n02974348 +n09874862 +n04479939 +n03309465 +n09902954 +n12092417 +n03425595 +n12433081 +n07806774 +n12462805 +n01314781 +n10192839 +n01622120 +n07807171 +n03261019 +n02843553 +n04287747 +n02324587 +n09915434 +n01818299 +n01592694 +n03826186 +n03607659 +n01527917 +n03628511 +n02005399 +n04204081 +n02052775 +n04403413 +n03914106 +n12811027 +n01872772 +n04555700 +n02004855 +n04602762 +n02713003 +n04406817 +n11934807 +n03336282 +n09684901 +n03836976 +n11959862 +n03062336 +n03506028 +n04503413 +n07819896 +n03205669 +n11902200 +n07685218 +n03046133 +n10261624 +n10303814 +n03676087 +n04023695 +n07587111 +n07764155 +n01504179 +n03794136 +n03389761 +n13901211 +n02784124 +n04488530 +n02807731 +n07898443 +n04981658 +n04177755 +n03649161 +n04125257 +n10135129 +n03653110 +n10560106 +n07735687 +n03511333 +n11960245 +n03301568 +n03878066 +n10746931 +n04223299 +n04237423 +n07888229 +n01819734 +n12312728 +n09981939 +n03727465 +n13882276 +n02993194 +n11971927 +n09713108 +n03581125 +n09718936 +n14698884 +n03005285 +n03540914 +n03359436 +n03934042 +n07569644 +n04964878 +n07890068 +n07580253 +n01538630 +n03132666 +n03259009 +n02796318 +n12703190 +n01464844 +n11792029 +n04270371 +n13102775 +n02933649 +n02387254 +n02890188 +n04335886 +n04358491 +n02786837 +n03885194 +n04001265 +n03438071 +n10375402 +n02997910 +n03326795 +n00470830 +n02734725 +n03494537 +n08376250 +n07743544 +n02991847 +n04246271 +n04156140 +n04381073 +n07732168 +n04951071 +n07977870 +n04334599 +n02838728 +n03326948 +n11723227 +n08182379 +n03686924 +n03821518 +n02382204 +n02080415 +n11788727 +n07732636 +n03860404 +n03898395 +n07867324 +n04392113 +n13237188 +n03263076 +n07843636 +n04968056 +n04397027 +n03320421 +n06267564 +n02880842 +n04115456 +n13862407 +n10289039 +n03128248 +n01457852 +n01536035 +n04579056 +n03937931 +n03036022 +n01804163 +n09913593 +n12841007 +n03115897 +n03256032 +n02475669 +n07924443 +n03061505 +n10001481 +n03600722 +n07842308 +n10696508 +n04215402 +n10588074 +n03614782 +n03995535 +n12091953 +n04113194 +n10092978 +n03011741 +n04381860 +n07819769 +n07905474 +n03288500 +n04225987 +n13223710 +n02879087 +n02920083 +n08640739 +n03362890 +n03996849 +n03849814 +n09694664 +n02407390 +n02910864 +n02388917 +n01668665 +n07616046 +n02932891 +n10553235 +n03652729 +n01615703 +n12801781 +n12164656 +n05302499 +n03801760 +n03332271 +n02901793 +n03941417 +n09833441 +n01623110 +n02807523 +n10598181 +n03725600 +n10368528 +n04116098 +n12719944 +n02045864 +n02173373 +n02811059 +n04479823 +n07816398 +n10572889 +n04142731 +n07687381 +n02799323 +n07865484 +n01858845 +n12684379 +n01842235 +n09242389 +n02028727 +n03527565 +n03438863 +n15019030 +n13907272 +n09659039 +n04251791 +n03683995 +n04137217 +n04389430 +n09785659 +n02016816 +n03124590 +n01859325 +n03138669 +n02999936 +n11926365 +n12686077 +n03517760 +n09734450 +n04563413 +n12074867 +n01564217 +n12521394 +n06267893 +n03594148 +n04139395 +n12369309 +n01544389 +n12048056 +n04524941 +n03016868 +n03653740 +n02795528 +n03687137 +n03766935 +n03361297 +n04263502 +n10043491 +n03446268 +n01994910 +n03891538 +n10091564 +n10226413 +n02755140 +n03500389 +n10237196 +n03625646 +n06596474 +n03360300 +n09730824 +n10732010 +n04469514 +n02904927 +n04961331 +n02936570 +n03680858 +n07585758 +n09199101 +n04050933 +n03712337 +n03911513 +n01556182 +n03102371 +n07928887 +n12133462 +n03974070 +n03971218 +n03292475 +n03425241 +n03440216 +n11995092 +n02894158 +n02918112 +n10568358 +n11524451 +n03169176 +n04100519 +n07588193 +n06883725 +n02860640 +n07762114 +n04082710 +n07896893 +n10167152 +n03287351 +n02788021 +n08494231 +n01560935 +n03249342 +n04564581 +n09349648 +n07704205 +n03510244 +n12127460 +n09945745 +n11719286 +n11613459 +n12656369 +n03824381 +n07655263 +n09894143 +n04964001 +n02161457 +n07654298 +n07930433 +n02979074 +n02026948 +n13914608 +n07611267 +n02843276 +n09827363 +n10259780 +n04432662 +n11715678 +n12388858 +n03057920 +n10465451 +n03855214 +n07728181 +n09835348 +n03549732 +n04589325 +n03491032 +n00452034 +n03948242 +n01456756 +n07921615 +n02809105 +n12889713 +n07586894 +n07734879 +n07905979 +n12847374 +n12129134 +n02122580 +n04028074 +n02911332 +n09251407 +n07697825 +n04597309 +n02800213 +n03480579 +n07621618 +n04170933 +n03743279 +n01916481 +n04037220 +n10748620 +n02708433 +n12007196 +n02561381 +n04103769 +n03030880 +n04413969 +n03911658 +n04590746 +n00476389 +n04331639 +n07725789 +n01792429 +n02949542 +n07686720 +n04064862 +n04447028 +n01713764 +n09854218 +n04032603 +n04405907 +n15093298 +n04385536 +n11954345 +n01560793 +n09249034 +n03784270 +n03436549 +n01324610 +n02379183 +n07616487 +n04119478 +n03309356 +n12865037 +n12850168 +n04250850 +n03024064 +n04412097 +n02982515 +n00450070 +n10175248 +n11847169 +n12276872 +n12870891 +n10229883 +n10505613 +n03482252 +n09300905 +n02919890 +n07617611 +n10283170 +n01607962 +n01671125 +n07894551 +n04561287 +n00005787 +n10025635 +n02850732 +n03732020 +n02036711 +n07907429 +n03797896 +n03004824 +n12011620 +n10300303 +n03105467 +n03767745 +n07868508 +n07868200 +n03788047 +n07886057 +n04559451 +n09845401 +n04373704 +n02676938 +n02565324 +n02667478 +n02122878 +n03244047 +n01747589 +n04320973 +n13205058 +n02379430 +n11959632 +n10183931 +n07683490 +n10055410 +n04370288 +n03273551 +n13900422 +n07899434 +n04053677 +n07740461 +n11879722 +n04282494 +n02981911 +n03449451 +n07581249 +n03965456 +n11808468 +n13881644 +n11725973 +n12091213 +n13193856 +n02873520 +n02754656 +n02431976 +n01324431 +n02385214 +n01888411 +n12680864 +n07731284 +n04337287 +n07631926 +n02549248 +n04395024 +n07585557 +n02776825 +n09460046 +n12023108 +n00475403 +n10098517 +n07902336 +n03683708 +n02412210 +n04397452 +n04583212 +n13869547 +n03632577 +n01616086 +n02763901 +n08256735 +n03015478 +n02084732 +n12178896 +n11966215 +n07605380 +n13869788 +n01847170 +n07744811 +n01854700 +n00444937 +n10422405 +n07801892 +n09688804 +n11879054 +n02802215 +n07908411 +n07822518 +n01558594 +n07935737 +n10730728 +n04436329 +n04294879 +n04972350 +n12911440 +n13886260 +n07578093 +n02537525 +n03703730 +n09607630 +n13865904 +n02360282 +n11731659 +n04126066 +n04212165 +n11618290 +n07588574 +n09269472 +n11896722 +n02892304 +n03487642 +n02028342 +n03321563 +n03135030 +n03522100 +n03253886 +n04095109 +n06470073 +n12603449 +n10644598 +n10260800 +n01535469 +n09696456 +n03553019 +n03963198 +n11918473 +n10314517 +n03002341 +n07574923 +n10421470 +n05716342 +n03244231 +n01730563 +n11691857 +n12807251 +n12345899 +n03142679 +n01531512 +n12307240 +n07835457 +n04535370 +n00451186 +n12481458 +n03434188 +n09734185 +n04578934 +n04167346 +n02747802 +n03459328 +n03301940 +n01562014 +n07690431 +n10642596 +n03696065 +n12781940 +n02759257 +n04392764 +n04218564 +n03499907 +n01536780 +n09751895 +n03235042 +n04570815 +n12070381 +n09448690 +n07625061 +n10178216 +n04560113 +n09457979 +n03858085 +n02421792 +n02944579 +n10085869 +n09718811 +n04103206 +n04239786 +n04501947 +n01321123 +n02390015 +n03964495 +n01554448 +n02925107 +n03028596 +n12483625 +n03227317 +n10701644 +n11968704 +n03900393 +n01851038 +n02276078 +n03132776 +n07585906 +n04480033 +n07880458 +n12887293 +n07921239 +n03307037 +n04595028 +n04244379 +n13131028 +n10313724 +n09436708 +n02694045 +n09941787 +n00449796 +n01817346 +n07928696 +n03401279 +n12901724 +n11646167 +n07682477 +n09415671 +n07900225 +n03607029 +n02692232 +n11834654 +n07935379 +n12437930 +n03762434 +n07922764 +n03595523 +n04546340 +n10686885 +n03516844 +n03767112 +n09896685 +n03859608 +n03149686 +n07920872 +n12388143 +n10406391 +n04233715 +n04373089 +n02023992 +n01947396 +n12115180 +n00479616 +n03962852 +n02392434 +n12414035 +n14976871 +n03201776 +n10665587 +n03600285 +n04402449 +n08539072 +n03629231 +n12860365 +n03488438 +n03337383 +n12455950 +n10384392 +n02953455 +n03101796 +n07919572 +n03233744 +n01578180 +n01756508 +n04556533 +n02962843 +n02882190 +n03731483 +n01850873 +n05260240 +n03111177 +n09836519 +n03030557 +n11789066 +n02788572 +n07903101 +n04067818 +n07840804 +n01567678 +n12427184 +n03333610 +n02416964 +n10607291 +n07936548 +n05451384 +n02968074 +n07605597 +n02704949 +n07609215 +n01951274 +n07696977 +n03180384 +n04303357 +n03291741 +n02207805 +n10123844 +n03420345 +n12384227 +n02758863 +n02047975 +n03978966 +n03549199 +n04275175 +n09294877 +n09836343 +n11970586 +n02010728 +n10369317 +n12681893 +n03192543 +n12413165 +n12174521 +n11916696 +n10042845 +n07822197 +n04968749 +n10323634 +n12849416 +n02814774 +n05538625 +n03078802 +n12230794 +n07726095 +n03051249 +n12005656 +n11876432 +n12164881 +n09711435 +n01622483 +n09896170 +n07684289 +n03368352 +n07910048 +n03159535 +n00466377 +n01541386 +n11647703 +n09752023 +n07903731 +n12249542 +n03794798 +n11786131 +n02852043 +n10493685 +n09846894 +n01752585 +n01536186 +n07618432 +n09859152 +n02065026 +n02382635 +n07867616 +n03885788 +n04255586 +n03275681 +n11961100 +n12485981 +n04495698 +n03293741 +n13902048 +n03254862 +n07903962 +n01594787 +n11962272 +n03284886 +n07842202 +n10157128 +n02405302 +n04443766 +n06266633 +n02519862 +n01487506 +n03373943 +n04247876 +n04327204 +n03349771 +n09260907 +n10092794 +n12223764 +n03504723 +n11926833 +n01820052 +n13032381 +n03889871 +n03209359 +n04608923 +n15093137 +n15091304 +n03688405 +n09905185 +n03543112 +n11611356 +n03885028 +n03234164 +n07594066 +n02396014 +n03456186 +n09874725 +n11601333 +n02917521 +n03055857 +n02804123 +n12352844 +n12866002 +n09858165 +n12037691 +n02565072 +n04477387 +n02008643 +n07867021 +n04119360 +n09893191 +n02944146 +n12435649 +n13197274 +n04974859 +n07751004 +n12003696 +n02762508 +n02680512 +n01743086 +n06998748 +n10607478 +n07613815 +n01559477 +n01859852 +n03239054 +n04466871 +n05263183 +n13173882 +n07897438 +n12427757 +n04400737 +n03291963 +n07682808 +n11692265 +n04130143 +n09445289 +n07696839 +n03835197 +n12821895 +n09734639 +n03365374 +n04305210 +n04962240 +n09871867 +n07897750 +n07616386 +n09443281 +n03641569 +n13882563 +n07680761 +n10498816 +n04034262 +n03533014 +n07928790 +n07690152 +n10060352 +n04124370 +n12453186 +n04509171 +n03013580 +n10604979 +n12515711 +n04971211 +n07693223 +n03786715 +n07894703 +n02761834 +n04232800 +n03437741 +n04045644 +n14976759 +n03042697 +n12557681 +n06275095 +n11678010 +n01586941 +n07684517 +n07822845 +n03483823 +n09951616 +n03180865 +n07861557 +n03644378 +n12848499 +n11962667 +n03886762 +n04238128 +n11979964 +n13915113 +n12791329 +n12457091 +n03341153 +n10267865 +n03484576 +n10186216 +n07612137 +n03843438 +n11807525 +n11931540 +n02027897 +n07614730 +n04116294 +n03469903 +n10017272 +n03688605 +n07860103 +n03981566 +n01888045 +n03345837 +n11998888 +n02071636 +n02726017 +n04310157 +n04607869 +n01622959 +n08524735 +n03119203 +n12031927 +n03610524 +n02807616 +n04056180 +n03233905 +n03374473 +n14810561 +n11944954 +n03121431 +n09750891 +n08505018 +n10727171 +n12357485 +n12571781 +n12067193 +n07586604 +n02086753 +n03548086 +n02560110 +n07804900 +n02880393 +n04208427 +n12931542 +n01594968 +n05218119 +n03520493 +n03727605 +n12687698 +n03612965 +n04135315 +n07730320 +n10540114 +n07599911 +n01323493 +n02115096 +n04590263 +n12043836 +n02861387 +n09836786 +n04966941 +n02816768 +n13131618 +n10701962 +n02919792 +n03442597 +n04325041 +n03333129 +n04091693 +n04950952 +n10631309 +n04177931 +n13234678 +n01970667 +n07748416 +n07893642 +n07691650 +n03660909 +n04145863 +n11945514 +n10334009 +n12336973 +n03954393 +n04558478 +n09899929 +n03487533 +n07816575 +n07877187 +n07863547 +n01603812 +n02098906 +n04973585 +n03674440 +n04371050 +n12243109 +n07871234 +n02928049 +n07574504 +n07889274 +n12141167 +n04543996 +n03080633 +n03423479 +n07879659 +n04380916 +n10514429 +n07584423 +n04009801 +n12479537 +n07606538 +n07698543 +n12353754 +n10132035 +n03367545 +n04245508 +n09811852 +n02024763 +n04052442 +n10120330 +n12352639 +n12606438 +n07752966 +n09772930 +n02535759 +n11737534 +n10345015 +n12427566 +n09705784 +n04112654 +n02985963 +n03758089 +n12953484 +n07906572 +n02881757 +n12739332 +n03718458 +n03407865 +n07775050 +n03210552 +n09452395 +n09789566 +n10566072 +n10559996 +n07826930 +n12414932 +n01887474 +n03026907 +n07751148 +n10223177 +n03957420 +n03788601 +n12244819 +n12421137 +n04266162 +n10038409 +n02981024 +n03228967 +n11825351 +n12058822 +n11963932 +n03041449 +n03046029 +n07590502 +n02932523 +n02152881 +n04970398 +n07887967 +n12812478 +n12421917 +n02708711 +n11870747 +n04290507 +n07934282 +n01608265 +n12070583 +n03205574 +n02305085 +n07866015 +n02960903 +n10098624 +n00481803 +n07938007 +n02693246 +n03923379 +n04103665 +n11792742 +n12489815 +n04971313 +n01668892 +n01055165 +n03215508 +n12104501 +n07899292 +n12822955 +n07713074 +n03842012 +n02449350 +n07868955 +n02835829 +n12283542 +n04525584 +n07910656 +n11625003 +n03987266 +n02805983 +n15091846 +n09736945 +n04973816 +n02439398 +n01519873 +n07899003 +n03019938 +n07582152 +n01885498 +n12108871 +n02934451 +n04327682 +n07696625 +n09750770 +n12084890 +n03960374 +n07585107 +n01570839 +n11905392 +n06277135 +n07842044 +n03751269 +n04398951 +n12861892 +n12649539 +n07596967 +n07580592 +n12845413 +n07690739 +n07804657 +n04334105 +n03779128 +n03268918 +n03066359 +n02744323 +n12596148 +n04272389 +n07832416 +n10210911 +n01548865 +n03221351 +n15091669 +n07878926 +n07607967 +n12171966 +n02846141 +n07576781 +n02922292 +n10092643 +n01732614 +n02578771 +n02864593 +n03537241 +n09635534 +n03268645 +n07852833 +n13873917 +n12640839 +n03506727 +n10536416 +n09976429 +n10692482 +n07600285 +n04156946 +n07818689 +n02605703 +n02710429 +n02890351 +n03408054 +n03121298 +n02731629 +n12450840 +n04061681 +n10153414 +n07648913 +n07891309 +n01562265 +n14973585 +n01610226 +n06267991 +n03302938 +n07822323 +n07826091 +n02764398 +n10406266 +n09282208 +n01734104 +n04283096 +n03530910 +n11542137 +n02610664 +n03856012 +n01531811 +n07862611 +n11625632 +n12643313 +n02469248 +n03333711 +n02907082 +n02122430 +n01559804 +n09744161 +n10187990 +n12015525 +n07844867 +n07887304 +n02878425 +n02009380 +n11448153 +n10655594 +n12566954 +n11901977 +n03999160 +n02389779 +n07928488 +n12785889 +n04281375 +n03745146 +n03224603 +n04594828 +n12835331 +n09715427 +n11615026 +n09972010 +n04038231 +n02379329 +n03445326 +n10753442 +n04249882 +n11727738 +n07866723 +n04282992 +n11621281 +n01566645 +n03919430 +n11980682 +n03480719 +n11625804 +n10467395 +n09436444 +n07867751 +n03684611 +n03788498 +n12062626 +n07808904 +n07690585 +n03865557 +n10711766 +n10465831 +n04380255 +n12166128 +n04432203 +n07892418 +n10432441 +n12991184 +n04209613 +n04459773 +n09666883 +n07807472 +n09873899 +n12939874 +n04545748 +n09637339 +n07919441 +n03987376 +n03645577 +n03437430 +n10671613 +n02964843 +n09707289 +n11700058 +n03877351 +n03518445 +n07643200 +n02140049 +n12683791 +n12418221 +n04154152 +n03397947 +n03238131 +n11851839 +n04545858 +n07744682 +n02995871 +n07593199 +n03543394 +n10293332 +n12658481 +n11599324 +n02705201 +n03920867 +n08249459 +n02876084 +n03937835 +n01397871 +n03849679 +n12016567 +n04208936 +n07696728 +n13148208 +n01904029 +n08659861 +n07878785 +n07827130 +n03390983 +n02624807 +n03319745 +n03994614 +n00446493 +n12477583 +n02920658 +n04602956 +n02688273 +n07577538 +n04350581 +n09283405 +n04074185 +n04495843 +n03538179 +n03454885 +n03878211 +n10308168 +n08518171 +n02660208 +n07904760 +n07928367 +n10174445 +n02137015 +n02863426 +n07700003 +n04015908 +n03946076 +n11725821 +n01794344 +n04364160 +n01663782 +n04283255 +n02822064 +n04406239 +n02782681 +n11990313 +n03563460 +n02957008 +n07889814 +n07896060 +n03683079 +n04278447 +n13011595 +n11810358 +n03836451 +n12827537 +n03545470 +n03213538 +n07929351 +n03471190 +n02882301 +n03625943 +n03397087 +n11955896 +n04097373 +n03145522 +n03034405 +n02889646 +n02928299 +n09652149 +n01641391 +n04593524 +n07651025 +n03719343 +n03884778 +n03452594 +n02174659 +n12345280 +n03039827 +n03309687 +n11635433 +n02057330 +n01664990 +n09779790 +n02011016 +n09689958 +n07770763 +n03010915 +n03443912 +n02946509 +n13050397 +n03031012 +n04217546 +n04124202 +n12766869 +n04177041 +n12050533 +n03251932 +n03086580 +n03918737 +n04386792 +n03176594 +n01577035 +n01669654 +n01818832 +n10441962 +n03885904 +n03724756 +n02925666 +n03549589 +n03062122 +n02828427 +n12604228 +n03624400 +n07725888 +n03873699 +n01503976 +n02887079 +n03610098 +n02940385 +n04610013 +n03652100 +n04496872 +n04008385 +n02583890 +n10476467 +n03395514 +n03306385 +n04228581 +n02389261 +n12576323 +n01579149 +n01623425 +n02593019 +n03995265 +n02124484 +n12745386 +n04355267 +n02643836 +n01614343 +n03810952 +n04058594 +n12278650 +n03474779 +n02823510 +n00442437 +n12039317 +n04574067 +n03762602 +n02153109 +n03518943 +n04289827 +n02288268 +n07749969 +n04132985 +n03213826 +n04307986 +n03567066 +n02049088 +n04408871 +n03522003 +n09305898 +n04266375 +n08571898 +n03039259 +n01587526 +n03261603 +n00464277 +n02627532 +n02992368 +n03640850 +n03037404 +n04525191 +n02106854 +n07772147 +n04173511 +n12761284 +n03257210 +n02813544 +n07740342 +n04066270 +n03070059 +n03616428 +n02904233 +n03209910 +n04389854 +n03078995 +n03193260 +n01488038 +n01754533 +n12629305 +n02055107 +n11664418 +n04228693 +n03353951 +n03440682 +n03025250 +n03300216 +n02042046 +n04226826 +n03342015 +n03090000 +n02050313 +n03492250 +n01535690 +n01572654 +n03465718 +n02879309 +n06278338 +n04113406 +n03695857 +n09720256 +n01860002 +n02851939 +n09828216 +n02564270 +n03528901 +n02542432 +n11978961 +n01670802 +n03956623 +n01612275 +n09376786 +n03222318 +n02813645 +n02213543 +n13898207 +n03616763 +n03616979 +n11904109 +n04212282 +n04608435 +n02042472 +n04198453 +n03216402 +n02015357 +n12282737 +n02699629 +n12866635 +n02048353 +n02933340 +n01793715 +n12001707 +n02878222 +n03187037 +n03105306 +n04080705 +n04254009 +n01623880 +n02839592 +n03436182 +n01591123 +n01318279 +n03002816 +n13155095 +n03141702 +n03775388 +n12165170 +n03322836 +n03259401 +n04471148 +n03911767 +n12585629 +n04317325 +n04257986 +n03133050 +n02035210 +n12891305 +n11882426 +n04491388 +n12948251 +n03498781 +n04262161 +n03775636 +n09915651 +n07584332 +n07852614 +n11626152 +n03901750 +n09723067 +n04265904 +n09920283 +n02397744 +n03253796 +n07712959 +n03898129 +n01743936 +n02075612 +n04560292 +n03479397 +n04334365 +n04357121 +n10145902 +n03844673 +n09854421 +n12687957 +n12598027 +n03944138 +n01839750 +n07722888 +n04258859 +n03088389 +n03351434 +n03509608 +n01677747 +n03145147 +n12046815 +n03505133 +n01629962 +n03333252 +n03993703 +n02962061 +n04529962 +n03463666 +n07681691 +n12160857 +n04187233 +n09331251 +n11614713 +n04376400 +n12301445 +n12633994 +n03883524 +n11614420 +n13062421 +n03645011 +n03293863 +n11640132 +n02579928 +n02854739 +n04461437 +n07729384 +n02977936 +n02836392 +n03593122 +n01666228 +n07820683 +n07568502 +n11910460 +n09348460 +n09712324 +n02403740 +n03482877 +n04370774 +n07750146 +n12992177 +n03152303 +n04134008 +n09805324 +n01611800 +n04374315 +n07586099 +n02032222 +n01979874 +n04350769 +n02907873 +n03016609 +n02543565 +n03256166 +n03016737 +n02419336 +n03268790 +n03559999 +n07765999 +n04607035 +n02416104 +n02123917 +n12484784 +n03225108 +n10739391 +n03506880 +n02918831 +n03045228 +n12516828 +n01314663 +n04172342 +n02768226 +n12368028 +n01500476 +n01558149 +n03604156 +n04035912 +n02359915 +n12261571 +n03875955 +n01887623 +n03871371 +n03390786 +n12494794 +n03826039 +n04465358 +n03838298 +n03165466 +n04229737 +n01321770 +n04354026 +n02998003 +n04114844 +n10611613 +n03600475 +n01909906 +n00466880 +n04284869 +n07722485 +n04496614 +n03298716 +n02285801 +n04081699 +n07765208 +n12659539 +n11618525 +n11757653 +n07727048 +n03913343 +n12070016 +n02697675 +n04284572 +n02595702 +n04482297 +n03516996 +n03704549 +n02040266 +n04476116 +n01323261 +n03823216 +n07696403 +n03226880 +n09734535 +n03950537 +n01671479 +n03049924 +n12593994 +n04568841 +n03604400 +n01837072 +n01754370 +n03122202 +n12338454 +n04094720 +n04150980 +n03429682 +n03884926 +n03378005 +n02434954 +n03461288 +n02893692 +n04472563 +n10472129 +n04590021 +n07739344 +n04162433 +n03395859 +n12059314 +n03498662 +n03678729 +n02927764 +n02770211 +n11710393 +n07730207 +n04178190 +n07772935 +n03801880 +n04414675 +n12729521 +n12203529 +n04122578 +n04575824 +n06267655 +n03698360 +n02804515 +n02431337 +n08598568 +n02893608 +n02270623 +n00479440 +n11616662 +n02884994 +n04305323 +n02407625 +n04476831 +n04222307 +n03179910 +n11623967 +n00446311 +n00454983 +n02886434 +n12279458 +n03723781 +n11816121 +n02403231 +n11808299 +n07816296 +n03219483 +n02657694 +n00453478 +n02816656 +n02625851 +n04112752 +n03339529 +n12171316 +n02044517 +n04137773 +n01486838 +n03015149 +n12911673 +n03967270 +n03498441 +n11672269 +n03386870 +n11615967 +n02580679 +n01681653 +n02793199 +n02824319 +n10727458 +n02555863 +n01533000 +n02175916 +n12064389 +n04383015 +n02469472 +n03101664 +n03623338 +n12295796 +n02869249 +n01792042 +n03447075 +n04453390 +n04382438 +n04112252 +n03332393 +n12729729 +n01851207 +n04269270 +n12333771 +n06272612 +n03135532 +n02927887 +n11711537 +n12301180 +n04107743 +n01813948 +n03282295 +n09714694 +n00483409 +n01504344 +n04279353 +n04040373 +n12658308 +n04134523 +n10104064 +n12056601 +n04525417 +n07819166 +n12263038 +n02072798 +n03125057 +n03367410 +n04000592 +n03549897 +n01877606 +n01564914 +n12307076 +n02855925 +n03176763 +n12271933 +n04121728 +n07690511 +n02825442 +n04442441 +n01630901 +n03088580 +n02499808 +n10675010 +n01531971 +n02273392 +n01526521 +n01531344 +n03667664 +n02888270 +n04412416 +n07733394 +n04559910 +n04105704 +n11792341 +n04201064 +n01693175 +n04555291 +n02908773 +n01976868 +n03529175 +n03365231 +n03622839 +n04258333 +n03327133 +n03425769 +n12477747 +n03718935 +n11727540 +n07933799 +n03030262 +n12043673 +n02619550 +n07937461 +n12198286 +n08560295 +n12402348 +n01733957 +n12344700 +n02763604 +n11925303 +n01557962 +n03927299 +n11611758 +n03035252 +n09454412 +n04004990 +n03456299 +n02175569 +n03668279 +n12352990 +n03507241 +n01534155 +n12278371 +n02499022 +n03822767 +n01318381 +n04024983 +n04277493 +n11934616 +n02027075 +n11611561 +n03454442 +n02236355 +n01732789 +n07722052 +n01489501 +n04409625 +n10563403 +n01817263 +n07757511 +n03770316 +n02977438 +n01840775 +n03607923 +n03322704 +n02375302 +n01614038 +n01646555 +n03952576 +n02946824 +n12847008 +n03016389 +n11809594 +n03165096 +n03839671 +n02687821 +n01689081 +n03822656 +n02597608 +n12336727 +n01579578 +n03631922 +n03904909 +n11658331 +n04224543 +n12621410 +n03870672 +n04252331 +n09720842 +n01396048 +n11988596 +n00483205 +n02871005 +n01597022 +n02382039 +n07743902 +n02358890 +n07877961 +n05263448 +n01862399 +n04136800 +n10624540 +n11990167 +n02731398 +n03366974 +n03490006 +n01561732 +n02626265 +n10627252 +n12402051 +n08517676 +n10488656 +n03099274 +n03718581 +n11806219 +n01830042 +n07728585 +n03732114 +n10755080 +n03359285 +n07720277 +n03354207 +n01596273 +n04416005 +n01847253 +n07733567 +n09725653 +n04274985 +n00449977 +n07772274 +n12063639 +n01530439 +n01322508 +n04397768 +n07273802 +n04261281 +n10524076 +n01678343 +n03410938 +n01797020 +n02388832 +n07719616 +n03639497 +n09787765 +n07721018 +n11818069 +n04185529 +n11644462 +n12074408 +n00483848 +n01583495 +n11891175 +n03347617 +n03308481 +n02535258 +n07750872 +n07748157 +n02855701 +n04584373 +n02461830 +n02912557 +n12277578 +n03604311 +n03643253 +n03031152 +n04039742 +n03435743 +n13908201 +n04150153 +n03250405 +n01410457 +n02357401 +n12588780 +n12729315 +n01690149 +n02538216 +n03171228 +n02424909 +n06274760 +n03775747 +n04211857 +n12429352 +n12272239 +n11759853 +n03401129 +n12649317 +n02625258 +n12651611 +n03603442 +n02803934 +n03861271 +n02605936 +n02018368 +n12711984 +n02811936 +n04612026 +n01339471 +n02923682 +n09194227 +n04346157 +n03939178 +n12635532 +n01593028 +n01793249 +n02380464 +n12400720 +n07708398 +n12020941 +n12492106 +n12850336 +n12749679 +n02892948 +n12591017 +n03193423 +n01791463 +n11979527 +n12134025 +n12167075 +n09308743 +n13108545 +n01618503 +n07827284 +n07724492 +n02338145 +n04533946 +n01586020 +n07598256 +n01603953 +n12646740 +n03067518 +n04046277 +n01532511 +n07769584 +n11644046 +n12753573 +n02681392 +n08492461 +n07749446 +n04409384 +n01791954 +n12330891 +n04560882 +n10145480 +n04250473 +n02655848 +n02903126 +n11736851 +n11901294 +n12865824 +n03870105 +n00449892 +n04240752 +n11851258 +n04200537 +n12049562 +n01521399 +n03565830 +n07860447 +n03067212 +n01664674 +n07561590 +n02727141 +n02324514 +n02372952 +n01584853 +n07766173 +n11811706 +n03097362 +n04200258 +n02732572 +n01853195 +n12282527 +n09838621 +n02764505 +n04256891 +n12337617 +n12635955 +n07831267 +n11628793 +n12316572 +n07807834 +n02037869 +n01821869 +n02820556 +n04517211 +n01839086 +n03842986 +n07698401 +n02386224 +n07841800 +n01830915 +n11616486 +n11902389 +n03427202 +n12727101 +n01851573 +n02125494 +n07746186 +n11628087 +n07746551 +n03943115 +n11892029 +n02861022 +n11733312 +n01852329 +n09392402 +n12336224 +n07887099 +n03403643 +n04414199 +n07895100 +n02264232 +n02317781 +n07823460 +n07755929 +n02524202 +n04324297 +n11627512 +n01585715 +n02922578 +n00479887 +n02687423 +n02416880 +n11784126 +n12073991 +n01853870 +n01561452 +n04187970 +n10300154 +n02520147 +n12294124 +n07743224 +n12066018 +n11634736 +n02041678 +n11626585 +n02386141 +n03986949 +n07860331 +n12356023 +n12072722 +n03082280 +n12083113 +n12979829 +n01448594 +n03007444 +n07858978 +n01641739 +n02043333 +n12020736 +n02751215 +n04528079 +n01538200 +n07925608 +n12091550 +n03742019 +n03518305 +n01642539 +n03414029 +n04363991 +n03767966 +n02596067 +n01586374 +n02885882 +n04080138 +n11617631 +n02033779 +n09451237 +n02310585 +n12648045 +n03955489 +n01752736 +n07899899 +n02299505 +n01579410 +n02156871 +n02998841 +n03759661 +n02050809 +n02683454 +n11621950 +n02910145 +n04967801 +n07896661 +n11906917 +n12275675 +n11611233 +n07736692 +n02312640 +n12588320 +n04399537 +n12757303 +n04197781 +n12717224 +n11635152 +n03122295 +n01792955 +n13133932 +n02518324 +n01584695 +n02915904 +n02967294 +n04345201 +n03019434 +n02470238 +n03049782 +n03101517 +n12709688 +n03716887 +n02422391 +n12638753 +n00288384 +n02162561 +n02053584 +n01317294 +n03334291 +n07814634 +n12273768 +n12406715 +n11644226 +n01646802 +n03460147 +n12338796 +n01972541 +n02147947 +n03890093 +n04127395 +n01581984 +n01681328 +n02213239 +n04582869 +n03254189 +n03274265 +n03186285 +n11839823 +n01624833 +n09792969 +n07891189 +n12023726 +n07619208 +n03466600 +n01849676 +n12190869 +n03079136 +n12317296 +n13001930 +n00477639 +n02944459 +n03903733 +n04131208 +n12710295 +n12180885 +n11612349 +n03443149 +n03982331 +n04264765 +n12642090 +n03237416 +n13868944 +n04046400 +n11705171 +n11979715 +n12597134 +n01609956 +n01568294 +n01469103 +n00443692 +n01606672 +n04556408 +n07690019 +n03977592 +n03358726 +n12696492 +n01573240 +n11632619 +n01772664 +n03453231 +n04179712 +n03646020 +n01812662 +n04306592 +n07724654 +n13908580 +n02903852 +n04284438 +n13132656 +n04317063 +n07829248 +n01589718 +n02654745 +n12294331 +n12515925 +n07900825 +n07721195 +n04189282 +n11907689 +n01624537 +n12333530 +n07762244 +n11757851 +n01599159 +n04038338 +n01568892 +n12691661 +n09744834 +n04307767 +n03120778 +n07920540 +n03781683 +n04185804 +n12080820 +n04354182 +n07574426 +n02579303 +n03046802 +n12078172 +n03210245 +n01614556 +n02304432 +n07713267 +n09724656 +n02861147 +n12755387 +n01483830 +n12921868 +n12026018 +n07817871 +n12062781 +n04241573 +n11621727 +n03376159 +n11815721 +n13007034 +n03540090 +n00450866 +n11619455 +n01528845 +n01568720 +n12743352 +n02871314 +n03606251 +n01490670 +n04246060 +n02053425 +n10780284 +n01915700 +n04510706 +n00456465 +n01563945 +n11809094 +n09855433 +n04112579 +n03855333 +n09809925 +n03413684 +n02123478 +n12070712 +n03651843 +n02032355 +n01591005 +n01646648 +n02752615 +n02415829 +n03283221 +n04368496 +n01573360 +n02321170 +n10348526 +n04446844 +n07763792 +n12077944 +n04431025 +n02895438 +n10082687 +n07714188 +n02262449 +n03090172 +n12491017 +n01558461 +n12754781 +n04070415 +n04297098 +n03424862 +n01970164 +n09833536 +n01793435 +n01670535 +n09894445 +n09676247 +n01548492 +n12501202 +n03250089 +n03358380 +n02578928 +n12020184 +n02301935 +n03393017 +n12340755 +n01849863 +n01748906 +n03075946 +n01810268 +n01984245 +n04555400 +n12286988 +n04097760 +n02050586 +n12104238 +n01679962 +n02709101 +n01569060 +n12790430 +n01757901 +n13199717 +n11815918 +n07827410 +n02970534 +n12942572 +n07924276 +n04103918 +n11704093 +n07908647 +n07601686 +n12172906 +n04084889 +n02381261 +n02299157 +n11978713 +n12460957 +n02963503 +n03272810 +n12469517 +n03443005 +n01797307 +n02952237 +n11908549 +n13912540 +n03428226 +n10276477 +n01757343 +n01443243 +n01607600 +n03580518 +n12709103 +n07579688 +n04329834 +n12710415 +n11808932 +n10583790 +n02213788 +n11622184 +n12596709 +n02216211 +n07721942 +n07765361 +n01848453 +n11724109 +n02028451 +n02935017 +n12046028 +n10629939 +n00441073 +n07900958 +n12451399 +n02823964 +n04210120 +n01848840 +n10485883 +n07767709 +n02432704 +n11622591 +n03210372 +n07848196 +n11992806 +n02953197 +n07620689 +n01521756 +n03571625 +n03158186 +n12647560 +n02065407 +n01572782 +n09890749 +n05581932 +n07754451 +n03350204 +n13044375 +n12294723 +n12482893 +n04434531 +n12989938 +n12196336 +n01701859 +n07746334 +n11941924 +n02047411 +n12650379 +n10486166 +n01599556 +n01567879 +n12675876 +n01682435 +n02043808 +n12362668 +n12306089 +n02999138 +n01679626 +n03557270 +n01546039 +n11901759 +n01549053 +n11883328 +n06596727 +n03193107 +n11612018 +n03300443 +n03612010 +n03668488 +n12648888 +n01448291 +n11632167 +n10262445 +n09742101 +n09717233 +n04299370 +n03094159 +n04536595 +n03514693 +n02029706 +n02886321 +n07816052 +n04045255 +n01851731 +n02627292 +n01841288 +n02739889 +n02932693 +n03784896 +n04569063 +n07902799 +n03863108 +n02607470 +n13200651 +n07916183 +n01573898 +n04347119 +n10076604 +n13033577 +n01824035 +n03630262 +n04426316 +n03064250 +n12262018 +n12048399 +n12279772 +n04143140 +n07829331 +n12891643 +n01826680 +n12646605 +n13103877 +n02023855 +n03086868 +n04163530 +n03736470 +n04358117 +n13872822 +n03159640 +n01680655 +n11611087 +n03980478 +n02978478 +n01555004 +n12402840 +n07763987 +n04387706 +n04979002 +n03258330 +n09856671 +n11624192 +n01538059 +n02003839 +n12552309 +n10469874 +n01576076 +n03643149 +n04419868 +n04586581 +n00483508 +n03131967 +n01847407 +n07929172 +n09683757 +n03786621 +n04369282 +n12733870 +n11612575 +n11619227 +n03301833 +n02176439 +n01569971 +n07935043 +n02563792 +n02051059 +n04482177 +n11859472 +n11710136 +n04115144 +n07864934 +n07691758 +n02620167 +n07748276 +n03415486 +n07835921 +n00452152 +n01848323 +n12906214 +n12075010 +n01563449 +n01499396 +n01570267 +n12047345 +n07920989 +n07601572 +n02683558 +n04428634 +n04345028 +n12161969 +n03460040 +n02561514 +n02006364 +n03582959 +n11812910 +n13185269 +n04297847 +n07896165 +n01552813 +n12361946 +n02031585 +n12766595 +n11622368 +n11695599 +n11615387 +n02509197 +n12409470 +n01314388 +n11758799 +n09846469 +n02675219 +n04253057 +n04041243 +n12276628 +n04381724 +n01855188 +n02203152 +n04403925 +n11895092 +n11924849 +n04172904 +n11888800 +n01546506 +n07906718 +n01489920 +n03436417 +n03615655 +n07765073 +n02434190 +n02004492 +n12282235 +n12406488 +n11981192 +n10373390 +n13183056 +n04332074 +n12818346 +n07731006 +n02598573 +n02438580 +n01957335 +n03356982 +n10288964 +n02629230 +n02042759 +n12319414 +n01451426 +n03521675 +n02016066 +n01813532 +n13207335 +n11805544 +n04401828 +n02952109 +n03963294 +n10013811 +n12058630 +n01551711 +n01574560 +n01858780 +n10093818 +n03858183 +n01550172 +n03571280 +n02309242 +n10258786 +n01569423 +n10134178 +n08578517 +n04445327 +n03250279 +n02584449 +n03223553 +n04523831 +n04485423 +n02050442 +n04474035 +n04528968 +n02649546 +n01913166 +n09971273 +n04517408 +n02437482 +n03824713 +n03778817 +n07643026 +n01613177 +n12022054 +n07714448 +n07592768 +n00454493 +n03296328 +n02305929 +n03084834 +n03698815 +n12093600 +n08649711 +n03466493 +n04067658 +n03041114 +n03514451 +n01491006 +n04178329 +n03790953 +n03938401 +n02048115 +n07768858 +n03273740 +n10333601 +n05418717 +n12754003 +n02098806 +n03314608 +n01565930 +n12113195 +n12284821 +n12483427 +n04332580 +n10382710 +n03416094 +n02837887 +n03917198 +n14131950 +n04414476 +n11861641 +n11903671 +n01841441 +n09872066 +n01806467 +n04964799 +n00467320 +n01595974 +n03220692 +n01339083 +n01825278 +n11727358 +n04518343 +n11984144 +n07724269 +n02292692 +n02324850 +n01753032 +n01624115 +n11816649 +n07930062 +n02460451 +n12319204 +n04340521 +n12325234 +n01541102 +n02979836 +n00141669 +n01822300 +n11658544 +n12272883 +n03334382 +n11726707 +n03639077 +n07904934 +n03516367 +n03698723 +n03553248 +n11812094 +n03724417 +n01540566 +n02341974 +n11819912 +n07734555 +n02987379 +n03580845 +n12546962 +n02548247 +n12753245 +n07768423 +n12849279 +n11617090 +n02912894 +n07840027 +n12295033 +n12703383 +n02696165 +n10419785 +n04426427 +n03694639 +n11712282 +n04142999 +n01597737 +n03801533 +n01495493 +n07774719 +n03267113 +n01742821 +n03859170 +n03416640 +n03320959 +n12733218 +n02017725 +n13229543 +n09344324 +n04965451 +n01490112 +n10069296 +n12084555 +n04554406 +n04086446 +n02976249 +n02656032 +n02424486 +n02381609 +n09934337 +n04573937 +n07685399 +n02800497 +n02905152 +n02951703 +n07760153 +n03609397 +n00447463 +n03680512 +n02046939 +n03288886 +n11870418 +n03386544 +n07767171 +n07847453 +n12687044 +n01664492 +n03099147 +n03463381 +n02125081 +n12920204 +n03517647 +n02603540 +n12267411 +n11933546 +n11947802 +n04387095 +n12975804 +n02973904 +n13195341 +n04048441 +n11753143 +n03212114 +n03298858 +n04366116 +n01424420 +n10450161 +n01442972 +n07877299 +n04503593 +n04349306 +n12969425 +n12597466 +n03092656 +n07914995 +n03487886 +n12223569 +n01756733 +n13919919 +n04175147 +n02029087 +n03530511 +n02425887 +n03572107 +n03927539 +n03383099 +n04130907 +n01632601 +n07823105 +n10378026 +n02382850 +n07613266 +n03235180 +n02810782 +n12708654 +n11636835 +n02823124 +n03402941 +n12121610 +n03715114 +n04052658 +n00480366 +n12493208 +n04255163 +n12145477 +n01489709 +n12402596 +n01598074 +n03837606 +n02628062 +n04103364 +n03247083 +n02032480 +n07736256 +n12578916 +n09218315 +n02218371 +n03730334 +n02080146 +n03836906 +n02868638 +n02198859 +n12744387 +n02942460 +n11754893 +n12274358 +n02725872 +n09218494 +n03942920 +n07574780 +n02921756 +n01757115 +n02763306 +n11758122 +n10508141 +n02303284 +n04083800 +n13879049 +n12765115 +n12075830 +n02666943 +n11980318 +n07907037 +n12794135 +n02333909 +n03870980 +n07586718 +n11923174 +n10782471 +n01493146 +n12294871 +n11726269 +n12932173 +n07825972 +n12732009 +n03572321 +n07682197 +n03423306 +n12495895 +n03545756 +n03557692 +n03785237 +n07902937 +n09899671 +n12061614 +n07902443 +n01449374 +n12632335 +n03474896 +n03539433 +n04310904 +n03902482 +n12006930 +n03285578 +n04200000 +n03912218 +n07821260 +n03548626 +n03223686 +n11826198 +n03165616 +n02104280 +n09981278 +n09382099 +n03732458 +n03987990 +n09946814 +n12270741 +n07737745 +n04172776 +n10189278 +n03543012 +n12629666 +n02180875 +n04087432 +n12961879 +n03321954 +n12528549 +n02424085 +n09843443 +n03846677 +n12304703 +n09873473 +n03410571 +n03041810 +n02425228 +n01562451 +n03615790 +n10081204 +n03985881 +n07842130 +n02890513 +n03649797 +n02381004 +n12560621 +n12523475 +n07687626 +n11905749 +n11759404 +n12905412 +n03542605 +n03983612 +n12573474 +n11972291 +n03767459 +n02698634 +n12713866 +n13084834 +n02202006 +n13108323 +n02631475 +n10737103 +n03637898 +n03069752 +n12400489 +n09692915 +n10242328 +n02794664 +n12465557 +n12085267 +n03348868 +n12754981 +n02745611 +n10504206 +n12073554 +n02835724 +n04605572 +n02825961 +n03528523 +n12116429 +n02973805 +n12708941 +n01544704 +n04180229 +n09403211 +n08242223 +n02146371 +n12127768 +n09770359 +n03295246 +n01757677 +n04385799 +n02584145 +n07909593 +n12587132 +n13029326 +n04184316 +n07903643 +n01848555 +n10750031 +n02332156 +n12703557 +n03196990 +n12406902 +n02768973 +n12416073 +n02147591 +n09724533 +n09693982 +n12687462 +n01982068 +n03435991 +n03272125 +n07713763 +n03018712 +n03648431 +n03336575 +n07854184 +n12806015 +n07879174 +n03984643 +n03147280 +n02699915 +n07617708 +n01533651 +n12483841 +n01697611 +n02576906 +n03724066 +n03935116 +n09782397 +n01599269 +n10672371 +n12066630 +n03178674 +n15086247 +n03523987 +n02826068 +n12580654 +n02358390 +n01647640 +n10259997 +n03738066 +n13915023 +n02639605 +n03174450 +n12269406 +n09874428 +n03432061 +n04386051 +n03923918 +n04592465 +n12480456 +n10333439 +n04206790 +n01443831 +n02967626 +n07733712 +n03746155 +n12947313 +n11690254 +n12244650 +n12670758 +n08658309 +n12710693 +n11860555 +n03485198 +n03047799 +n04461570 +n07600177 +n02126640 +n12704343 +n02866386 +n03008976 +n04532831 +n03465426 +n12691428 +n01641206 +n04962062 +n03254046 +n04425804 +n02014524 +n03439348 +n02538010 +n11603246 +n12265600 +n12277800 +n04016240 +n12086192 +n09650729 +n01549641 +n03112719 +n04961062 +n02710324 +n12049282 +n12362274 +n11969607 +n12856680 +n02201000 +n07863802 +n03360622 +n07601809 +n04354487 +n12898774 +n12939282 +n03109693 +n12867826 +n12441390 +n12915811 +n12879527 +n04137355 +n04131368 +n03527149 +n10164492 +n09932508 +n12426623 +n12575812 +n02557318 +n10263790 +n04309548 +n00476235 +n04194127 +n11876634 +n10327987 +n03499354 +n02616851 +n04464615 +n03615406 +n02744844 +n11732567 +n10347446 +n09752519 +n04228215 +n10004718 +n07899533 +n12030908 +n15102894 +n12044467 +n11711764 +n02610066 +n03415749 +n04562496 +n02034295 +n02297442 +n03566193 +n12506991 +n07774842 +n12827270 +n14908027 +n12242409 +n04072960 +n02829596 +n12496427 +n02266050 +n13108481 +n12473840 +n08677424 +n12076223 +n15091473 +n02815749 +n04549028 +n12558425 +n12023407 +n04179824 +n02378541 +n03188725 +n12517445 +n07573347 +n02004131 +n11921395 +n12570972 +n10602470 +n12095647 +n03854421 +n02450295 +n02792409 +n03543735 +n12836337 +n12204175 +n12152722 +n07900734 +n12517642 +n02775039 +n12607456 +n03376938 +n12179122 +n09873348 +n01847978 +n07888816 +n10453184 +n09675922 +n01851895 +n12865562 +n01797601 +n03711044 +n02738859 +n12064591 +n04033425 +n08551296 +n01650690 +n01537895 +n04207151 +n10087434 +n12261808 +n09438844 +n10364198 +n01814755 +n01583209 +n12270946 +n11892817 +n03344642 +n04117464 +n07847917 +n04003241 +n10362319 +n10477713 +n03495570 +n07560542 +n04363777 +n04534359 +n02404906 +n03349892 +n07712267 +n02960352 +n07866277 +n07857170 +n00324978 +n02755823 +n03150511 +n04211528 +n01899894 +n07588299 +n11874081 +n03425325 +n04506506 +n11949402 +n02952374 +n03309110 +n12159388 +n07591049 +n03068998 +n03228254 +n10279018 +n04173046 +n07728053 +n13052931 +n01597906 +n12368451 +n02767665 +n09435739 +n03915900 +n09728285 +n03292603 +n03331077 +n07817160 +n07917392 +n12540250 +n04153025 +n10209082 +n03968581 +n12676534 +n11824146 +n03521899 +n01853666 +n04292921 +n12332030 +n03984759 +n02863014 +n07801091 +n07723177 +n03289660 +n01533481 +n04488202 +n03468821 +n02382338 +n03543254 +n01961985 +n07915918 +n03703862 +n02771004 +n02047045 +n03877674 +n13141415 +n03529629 +n02240517 +n03675235 +n04491638 +n12384037 +n04419642 +n03019685 +n07591586 +n04496726 +n12985420 +n12927013 +n12196694 +n03473227 +n11621547 +n02988066 +n10451450 +n07729828 +n09618760 +n12196527 +n01555305 +n12830222 +n11950877 +n13190747 +n12160303 +n12390099 +n02818135 +n03163381 +n04554211 +n03244919 +n07897975 +n03386726 +n04290615 +n02011281 +n12407890 +n04123448 +n07904865 +n03447358 +n02393940 +n07931870 +n02937958 +n04318787 +n04587327 +n12807409 +n04112430 +n07560193 +n12774299 +n02618827 +n07854982 +n03757604 +n03817191 +n12793494 +n02324431 +n03013850 +n04113641 +n01612476 +n03127408 +n02038466 +n03799876 +n04257684 +n03382292 +n10449664 +n04394630 +n10275395 +n07698250 +n12329473 +n07694659 +n07642742 +n02563648 +n08583455 +n02557182 +n02775178 +n09274152 +n03189083 +n12570703 +n04211219 +n12486574 +n03073694 +n11969166 +n02475078 +n02976350 +n08584914 +n07899660 +n10116702 +n01613807 +n12461109 +n04025508 +n12451240 +n12596849 +n12079963 +n03541269 +n04561422 +n11699442 +n07725255 +n03460297 +n07616748 +n12757458 +n03103563 +n02813752 +n07698782 +n12840362 +n01543632 +n01602832 +n01875313 +n12472024 +n02926591 +n02872333 +n10728624 +n12532564 +n03882960 +n12333053 +n03684224 +n13146583 +n03436075 +n04154340 +n03868643 +n02598878 +n04139140 +n03266371 +n04083309 +n12506341 +n12200143 +n03503477 +n12807773 +n03123917 +n13029760 +n10173771 +n03659809 +n12047884 +n12759273 +n04193377 +n04258438 +n04597400 +n04579986 +n03719743 +n04299963 +n02864504 +n10510245 +n03417970 +n09719794 +n03138344 +n02085272 +n07694516 +n12665857 +n01642257 +n03229244 +n10581890 +n10318293 +n03635108 +n10652605 +n12189429 +n09934774 +n11709205 +n04207903 +n10296176 +n10603851 +n03450734 +n13223588 +n12754648 +n09886403 +n07751280 +n11950686 +n07814390 +n12799776 +n01646902 +n09796809 +n12819728 +n01938454 +n02410011 +n07607138 +n02119634 +n10332861 +n09230202 +n02757061 +n02849885 +n15092227 +n12151615 +n03111041 +n02413050 +n03506560 +n07744057 +n04030518 +n12544539 +n04089836 +n02038993 +n13882201 +n12099342 +n01946630 +n10095769 +n02982416 +n12957924 +n13215586 +n07726525 +n12452836 +n03801671 +n04598318 +n01449712 +n12428747 +n04119751 +n10509063 +n07694839 +n02782602 +n11626409 +n02573704 +n12399384 +n12388989 +n01601068 +n11971406 +n04367011 +n07930315 +n12925179 +n04967674 +n03497352 +n03653833 +n01819465 +n03688192 +n02802990 +n03393761 +n04430475 +n13107694 +n10384496 +n07867164 +n12449526 +n01515303 +n12574320 +n01444339 +n07919310 +n03453443 +n04173907 +n02887489 +n07772788 +n03629520 +n02580830 +n11705387 +n12069679 +n01956344 +n02406533 +n03973402 +n03938037 +n04969952 +n04103094 +n04393808 +n07715407 +n04172107 +n01917882 +n12085664 +n07608429 +n09835230 +n04135024 +n07842605 +n12568186 +n04339879 +n07691091 +n01801876 +n00474568 +n01807105 +n12128071 +n01673282 +n11948864 +n03991837 +n09659188 +n02070174 +n02670683 +n12454949 +n10385566 +n11631854 +n12305293 +n12002428 +n12948495 +n12757816 +n11852028 +n10690648 +n09283866 +n03214582 +n03423877 +n04127521 +n03006626 +n09283193 +n07712559 +n01447331 +n02981321 +n02658531 +n11947629 +n02419634 +n02420828 +n11923637 +n12570394 +n11968931 +n12731029 +n09749386 +n07736813 +n03967396 +n11908846 +n03029445 +n02426481 +n01964271 +n13198914 +n04484432 +n12656685 +n10806113 +n11849983 +n03236423 +n10649197 +n07688624 +n03057541 +n12015221 +n02094931 +n02014237 +n07560331 +n02801450 +n04206570 +n07556406 +n11627908 +n11889619 +n07852229 +n04063154 +n02713364 +n02783459 +n12877838 +n02930214 +n02125010 +n02407276 +n07815424 +n12855494 +n12530818 +n07750449 +n01963317 +n10082997 +n03245724 +n03012013 +n03555006 +n02421136 +n03332989 +n04375405 +n03746486 +n12636224 +n03278914 +n07917133 +n12504783 +n09416890 +n03896526 +n02258198 +n12983048 +n03837698 +n12869061 +n04541987 +n01637615 +n04401949 +n02241426 +n13220122 +n07876651 +n03729308 +n02364840 +n01339801 +n03418915 +n09257843 +n11614039 +n09731343 +n03809603 +n05399243 +n01569262 +n11901597 +n03124474 +n01566207 +n03796522 +n12595699 +n04573281 +n09689435 +n11859737 +n03201529 +n12902662 +n03374372 +n03760944 +n09189157 +n01517966 +n10431625 +n02898269 +n03693707 +n04369025 +n07834618 +n04095342 +n02786331 +n03822504 +n02284611 +n09862621 +n03436891 +n07688898 +n12435777 +n03949317 +n12443323 +n12273114 +n12623077 +n04333869 +n07907831 +n07774596 +n05450617 +n03320262 +n04190376 +n12671651 +n11819509 +n07588111 +n09756049 +n07611046 +n04973291 +n11602873 +n00120010 +n03500699 +n03844815 +n03708843 +n04452528 +n04387261 +n09889065 +n10147121 +n03318294 +n12599435 +n04164406 +n01965529 +n11636204 +n11791569 +n12275131 +n02977330 +n07851443 +n04132603 +n07824191 +n09760609 +n12190410 +n07915491 +n12665271 +n10120671 +n02570164 +n10208950 +n02163297 +n02244797 +n09842528 +n08645104 +n01841679 +n11603835 +n04488857 +n07814487 +n01953762 +n04612373 +n11877193 +n03198500 +n03981924 +n01943087 +n11552806 +n04414909 +n03005033 +n02457945 +n10500217 +n10375314 +n04607242 +n07914777 +n09832456 +n12915568 +n12813189 +n10578021 +n03519081 +n07801779 +n12026476 +n03296081 +n03850492 +n07902121 +n09881265 +n12562785 +n03290195 +n10131151 +n10078719 +n01558765 +n03917814 +n02045596 +n07734183 +n03414676 +n07933154 +n02126787 +n12148757 +n12263987 +n07684164 +n03406966 +n01492569 +n02988963 +n12963628 +n09964202 +n03417749 +n01854838 +n02921029 +n02183096 +n11762433 +n11722466 +n02387093 +n02768655 +n12519089 +n09871229 +n07938313 +n10502329 +n11989393 +n03768916 +n13145040 +n11813077 +n04457910 +n03655720 +n03703945 +n11876803 +n01438581 +n07910379 +n07847827 +n02300797 +n09245515 +n10754189 +n04581102 +n12513172 +n02458135 +n03762332 +n11789589 +n09695620 +n03850053 +n07911249 +n12342852 +n12753007 +n07748574 +n07727458 +n03696568 +n04304680 +n07723039 +n07775197 +n07577144 +n03043693 +n04374735 +n01858281 +n09228055 +n09466678 +n01949085 +n02024479 +n11623815 +n02704645 +n07894451 +n01751472 +n01646388 +n01317916 +n13880994 +n10300500 +n11794024 +n03735963 +n04610274 +n11854479 +n07754894 +n02639087 +n02122510 +n02262803 +n12732966 +n04529108 +n13194036 +n09990777 +n10009276 +n12088223 +n12155009 +n07886176 +n04278247 +n04222723 +n11707229 +n01999186 +n07851641 +n12741792 +n01315213 +n10033412 +n04249582 +n03586631 +n03237839 +n12037499 +n12014085 +n07756325 +n01636352 +n03905947 +n08611339 +n07693590 +n03724538 +n09791816 +n01666585 +n10588965 +n11613219 +n10542608 +n12913791 +n10528023 +n03171635 +n11923397 +n12854600 +n10410246 +n12698598 +n04135118 +n09844457 +n04441790 +n03882611 +n02337001 +n07907342 +n12561169 +n12027658 +n10719132 +n09851165 +n02801823 +n12330587 +n01683558 +n12162181 +n04387932 +n11704620 +n09679170 +n07601290 +n04028221 +n10277027 +n09877750 +n11758483 +n10027246 +n03819336 +n10205231 +n12478768 +n03451711 +n12973443 +n01923025 +n03262717 +n07807594 +n00475535 +n07744430 +n02341475 +n04614655 +n07924747 +n03388323 +n12680402 +n03202940 +n04534520 +n09661873 +n15092059 +n11832480 +n04198355 +n12529220 +n12389130 +n12304115 +n03234952 +n07610620 +n02868975 +n04442741 +n05282652 +n02820675 +n12795352 +n12675299 +n08547468 +n04189651 +n04141198 +n04513998 +n12273939 +n12482668 +n12858618 +n01958346 +n03172038 +n10280674 +n04301760 +n02631330 +n12433178 +n07763107 +n03068181 +n07565259 +n03605598 +n13177884 +n04005197 +n09751496 +n12737383 +n07648997 +n09839702 +n09442595 +n07925229 +n12150722 +n11898775 +n09904208 +n02207345 +n07642361 +n07685918 +n03205458 +n10574538 +n09742315 +n02599557 +n03585682 +n04273659 +n02200850 +n03410740 +n03391301 +n07726672 +n09782167 +n13155305 +n02067240 +n07561848 +n07728708 +n12463134 +n12228229 +n09743487 +n12225563 +n03421669 +n03226375 +n03973945 +n12498055 +n04483925 +n04564278 +n11890150 +n12519563 +n12754468 +n04353573 +n11615607 +n04430896 +n04585128 +n10395828 +n10773665 +n02772435 +n09881895 +n12663023 +n01615303 +n12803754 +n09445008 +n03955296 +n05245192 +n05486510 +n07899769 +n07575510 +n02307681 +n03814817 +n02670186 +n03598515 +n12797860 +n03518135 +n07587962 +n12630763 +n06273743 +n09843824 +n03226254 +n12407222 +n02961544 +n12951835 +n06417096 +n02016659 +n01441117 +n07735404 +n09411189 +n13896217 +n03262248 +n03451120 +n02525382 +n03375329 +n04155068 +n12916179 +n10297234 +n11907100 +n03423568 +n04360914 +n12027222 +n12199790 +n01744270 +n09896401 +n07925116 +n03693860 +n04414319 +n07767549 +n03555564 +n04043411 +n07872593 +n03774461 +n03129471 +n04497801 +n11756870 +n09776346 +n04530283 +n01520576 +n12828220 +n01583828 +n04120842 +n09676021 +n04344734 +n01916388 +n12513613 +n09861863 +n02310334 +n03318983 +n04533499 +n02427576 +n12727518 +n04502059 +n11725480 +n11987126 +n11876204 +n03504205 +n09720595 +n12315999 +n12935609 +n04452757 +n12201331 +n01603152 +n10772092 +n03156279 +n12723610 +n02003037 +n03244775 +n07802963 +n11954642 +n07770034 +n09931165 +n10559508 +n01745902 +n07654148 +n10070108 +n01585287 +n13196003 +n04389718 +n10253122 +n03730893 +n02983357 +n02783900 +n01680813 +n03072440 +n03109253 +n03274435 +n11655974 +n10048612 +n07849733 +n07896994 +n03792334 +n03035832 +n03819448 +n03105088 +n11943992 +n01485479 +n01699675 +n11795049 +n12086778 +n01840120 +n07753980 +n10685398 +n04346428 +n04532398 +n07709172 +n02146700 +n09461069 +n03853924 +n01321456 +n12068432 +n09757449 +n03206282 +n03751757 +n13053608 +n11695974 +n12123741 +n03500209 +n04367371 +n02890940 +n01917611 +n07835331 +n02907656 +n04136045 +n12059625 +n03862862 +n12864160 +n00440039 +n03448590 +n12628986 +n04115802 +n03949145 +n12916511 +n12647893 +n09706255 +n13181811 +n07752109 +n04375615 +n01648620 +n04403524 +n09967967 +n12911079 +n03857687 +n02803539 +n01551080 +n10734891 +n13235159 +n04127633 +n07935878 +n12853482 +n10191001 +n03126385 +n10076224 +n01812866 +n12919403 +n03769610 +n09283767 +n03462110 +n11770256 +n12038898 +n09889170 +n11894558 +n10298647 +n02592055 +n02795670 +n11701066 +n12762049 +n02890662 +n07918193 +n02976455 +n03100897 +n13127843 +n12184912 +n00468299 +n12407079 +n12496949 +n03541537 +n05260127 +n01535140 +n01541760 +n11945783 +n07687053 +n07745046 +n12083847 +n02382132 +n12270027 +n10140597 +n03788914 +n01790711 +n02197689 +n03173270 +n10368624 +n04449290 +n01579729 +n07834872 +n07734417 +n02379630 +n01636829 +n12549192 +n12951146 +n13579829 +n03268142 +n11761202 +n02769669 +n09452760 +n04095577 +n12031139 +n02003577 +n12891469 +n03931885 +n01577941 +n04176295 +n12046428 +n03418402 +n13145250 +n11865874 +n12473608 +n11797321 +n01798168 +n09923186 +n02786736 +n01698782 +n09976283 +n03975788 +n14685296 +n01682172 +n07838441 +n02771286 +n03429137 +n03948950 +n02512830 +n02298218 +n10141364 +n02823848 +n02077384 +n12584715 +n11748811 +n02214773 +n03667552 +n04121426 +n04135710 +n07579917 +n12275888 +n07826453 +n12167436 +n04586072 +n09877288 +n04248396 +n02761696 +n03038870 +n01490360 +n12353203 +n09785891 +n12057660 +n04146343 +n12557556 +n02081798 +n02917964 +n07898617 +n12597798 +n07574176 +n07764630 +n03008177 +n04255899 +n04434207 +n07897600 +n09929577 +n11811921 +n12415595 +n02893941 +n12276110 +n02821202 +n09690621 +n02508742 +n02077787 +n02390640 +n03764822 +n02257985 +n13033134 +n04559166 +n07865196 +n10506915 +n12051103 +n10473917 +n12775919 +n02971579 +n12880462 +n11837970 +n02063662 +n09840520 +n12019827 +n09208496 +n12836508 +n02982232 +n04219185 +n03332005 +n07914128 +n07862461 +n04250692 +n09267854 +n04561734 +n02076402 +n12344837 +n02919148 +n06592281 +n03668803 +n03062985 +n04246731 +n12112609 +n04012482 +n03558633 +n03982642 +n01998741 +n07665438 +n04209509 +n07913882 +n01749244 +n07801342 +n02611561 +n04488742 +n01897536 +n10624437 +n13128976 +n07931612 +n04300643 +n03727067 +n03360431 +n07593471 +n10253296 +n03297226 +n03854506 +n07879450 +n10562283 +n12557438 +n13154388 +n12862512 +n02126028 +n07752514 +n02387887 +n12066261 +n07666176 +n02806530 +n09988703 +n03721252 +n03221540 +n12195533 +n02682569 +n03622058 +n03943266 +n04207596 +n11721337 +n02427032 +n07910152 +n01551300 +n12861345 +n11660300 +n03786313 +n12966945 +n02046171 +n02797535 +n03546112 +n07711232 +n02044908 +n02998563 +n02652132 +n12634986 +n12187247 +n11645590 +n07582892 +n03065243 +n09911226 +n04396902 +n10763075 +n02359047 +n10400108 +n04294614 +n03991646 +n11728945 +n07766891 +n12277150 +n13141564 +n10563314 +n12426749 +n07827750 +n12403994 +n12627119 +n03420801 +n10203949 +n12830568 +n12280060 +n13180875 +n12659064 +n04239436 +n03823312 +n04367746 +n12448700 +n01896844 +n07581931 +n09384106 +n11625223 +n04198722 +n01477875 +n09932336 +n03477512 +n12281974 +n10117739 +n07759194 +n12281788 +n01405007 +n03077616 +n02304036 +n12947544 +n03140126 +n12356960 +n07807002 +n07877849 +n02956795 +n04373795 +n07925500 +n10359546 +n09730077 +n01694955 +n10611267 +n04316498 +n07849912 +n12841354 +n07903543 +n10026976 +n04050313 +n03939844 +n03260849 +n07917507 +n12228387 +n03199775 +n01569566 +n02403920 +n04261638 +n02986160 +n03724623 +n01960177 +n03783430 +n07877675 +n10401639 +n04215153 +n03077741 +n02589623 +n12934985 +n03233624 +n04506688 +n12194147 +n09975425 +n07818825 +n12641007 +n10036692 +n02771750 +n12285900 +n01472303 +n10033663 +n10707134 +n03219966 +n11772879 +n10146416 +n10435169 +n10304086 +n12385566 +n03126580 +n12904314 +n03619196 +n02299846 +n03574243 +n12368257 +n03690473 +n01748686 +n09834378 +n07750736 +n02930645 +n01679307 +n03721047 +n02710044 +n07563207 +n02930080 +n09309168 +n03127203 +n02863536 +n02536165 +n01559639 +n09654518 +n02961035 +n12007406 +n12773651 +n04351699 +n03114504 +n06273414 +n02017475 +n01733466 +n02175014 +n07920663 +n03953901 +n09670521 +n09400987 +n11791341 +n02284884 +n12919646 +n07880325 +n03801353 +n01982347 +n07828642 +n01570421 +n03998333 +n03449309 +n10482220 +n12850906 +n12805561 +n12926689 +n03232543 +n04248851 +n03195959 +n04082562 +n03846100 +n07682952 +n07695652 +n11809271 +n09895561 +n04287898 +n09740724 +n02859955 +n09830400 +n03674731 +n02825153 +n04571686 +n13107891 +n10318607 +n07848093 +n13226871 +n08555710 +n03137473 +n02776978 +n03141455 +n12514138 +n01809371 +n09405078 +n01753180 +n02184473 +n11610215 +n03539546 +n12731835 +n04485884 +n03590588 +n10221312 +n04049753 +n03441345 +n02302244 +n12262185 +n15092650 +n11877646 +n10377185 +n01684578 +n03796605 +n07897116 +n03164344 +n12135049 +n10757050 +n01692523 +n04566756 +n07697699 +n07575392 +n10262655 +n04064747 +n07914006 +n12433769 +n07873348 +n04457767 +n10019072 +n02921195 +n03856465 +n04041408 +n12639584 +n12920955 +n11781176 +n07864756 +n03941013 +n03646148 +n04401578 +n11692792 +n02757714 +n02286089 +n04253168 +n03890514 +n07855510 +n03507458 +n04123026 +n11661909 +n12435152 +n04330746 +n09481120 +n03731019 +n03717285 +n03271030 +n02772101 +n07740597 +n02847852 +n12825497 +n12263738 +n03342262 +n03603594 +n07804543 +n12932365 +n12695975 +n10297531 +n04054670 +n03175081 +n12703856 +n03832144 +n03966206 +n02414290 +n03619275 +n09738121 +n03290096 +n10585077 +n07731767 +n12409840 +n12026981 +n02278980 +n02752810 +n01654637 +n02654112 +n10314836 +n13023134 +n01823414 +n07461050 +n11902982 +n04543636 +n02204907 +n04049585 +n12304899 +n03073545 +n04272928 +n10315456 +n03975657 +n09899782 +n12288005 +n07005523 +n03795269 +n09823832 +n02242137 +n02907391 +n03643491 +n03245889 +n12285369 +n03061345 +n03797264 +n07838073 +n09219233 +n02859343 +n07608098 +n03920641 +n12578626 +n10688356 +n04542858 +n07834065 +n00443803 +n04181561 +n04570214 +n02047517 +n03295012 +n01633781 +n10610850 +n04035836 +n03001115 +n04593376 +n02393807 +n13061348 +n10123122 +n11800236 +n13207094 +n10140929 +n12167602 +n01809752 +n10421956 +n02764935 +n03424489 +n12889219 +n04046091 +n07714287 +n07708685 +n07736087 +n04142434 +n11961446 +n04521863 +n02414763 +n02901377 +n00467536 +n13085747 +n03855756 +n11846765 +n02530999 +n03063199 +n04258618 +n12204032 +n04424692 +n11758276 +n02653497 +n03766508 +n02026629 +n02572484 +n12339831 +n01635027 +n01668436 +n07821919 +n01543175 +n02689748 +n12528974 +n04024862 +n04184880 +n11720891 +n13869896 +n01678043 +n01647303 +n11532682 +n03236217 +n04963307 +n03012897 +n11682659 +n03191286 +n07643891 +n12737898 +n10680609 +n07924955 +n03879705 +n10461060 +n02523427 +n02013567 +n09893344 +n04124488 +n09863031 +n12454436 +n12305089 +n07709046 +n03805180 +n11940599 +n01691217 +n04198562 +n03978421 +n02357585 +n07818572 +n12870682 +n03798442 +n04154938 +n10550369 +n11957678 +n01958531 +n09936825 +n02334201 +n07910538 +n11978551 +n10562135 +n12700088 +n12784889 +n04480853 +n03281673 +n07588419 +n02968333 +n11935469 +n13046669 +n11730602 +n09643799 +n11849467 +n01758757 +n09638454 +n03267468 +n07914586 +n12104734 +n02961225 +n09827246 +n09917214 +n13079073 +n12634734 +n04089376 +n13034062 +n11714382 +n12753762 +n07683039 +n11840067 +n07689842 +n12173069 +n12172481 +n04182152 +n07869522 +n10356877 +n02771166 +n03154895 +n07615289 +n12986227 +n12361135 +n03456447 +n12706410 +n12895811 +n02988156 +n03130761 +n10639359 +n03628215 +n02738741 +n01643507 +n07730708 +n03232309 +n02846733 +n04969540 +n03051041 +n12890928 +n03235327 +n04289576 +n07588817 +n10325774 +n03973285 +n09703485 +n02358584 +n03061674 +n03195332 +n02901259 +n07849619 +n04486934 +n07908812 +n01588725 +n03682877 +n11949015 +n04146504 +n04146862 +n07898247 +n03318865 +n04367950 +n07880213 +n04247011 +n01447658 +n12711817 +n03146687 +n02926426 +n12856091 +n11966896 +n02413593 +n09764900 +n03009794 +n03314227 +n10499232 +n10075693 +n04451318 +n12320806 +n11933728 +n07764315 +n12133682 +n09904837 +n12832538 +n03816530 +n07802863 +n04391445 +n09728137 +n03887330 +n04436012 +n03957991 +n07771731 +n06266973 +n10407310 +n10290919 +n07862244 +n01842504 +n10262561 +n12726159 +n07691954 +n07618119 +n03437829 +n11966617 +n03629100 +n04231905 +n04208760 +n03344305 +n03684143 +n12934174 +n08645212 +n03556679 +n12109365 +n03751458 +n02380875 +n02025389 +n02770721 +n09830629 +n02800675 +n04951186 +n04483073 +n12710577 +n12789054 +n12058192 +n11777080 +n07716203 +n09618880 +n04525821 +n04016846 +n02918330 +n10375052 +n13158512 +n13090871 +n02929582 +n02308735 +n10487182 +n02213663 +n07608339 +n04384593 +n12890490 +n03992436 +n02994573 +n13231078 +n12880244 +n01651059 +n02925009 +n09686401 +n13219976 +n09981540 +n04582771 +n06267758 +n09893502 +n13214340 +n03272940 +n12554911 +n02214341 +n04137089 +n03874487 +n04573513 +n12003167 +n12004547 +n13065089 +n01903346 +n04373428 +n02216365 +n02024185 +n12577895 +n11698042 +n07586318 +n11705776 +n03030353 +n04486213 +n07885705 +n07928163 +n02356612 +n02767038 +n02897097 +n11662371 +n04128710 +n09842395 +n07683360 +n11533212 +n08495908 +n12841193 +n03669886 +n07768068 +n02381831 +n12081215 +n02757337 +n02811618 +n10144338 +n01379389 +n09698644 +n12779851 +n10400618 +n11801891 +n12322099 +n12408077 +n02767956 +n08640962 +n07816839 +n03021228 +n10346015 +n07868830 +n07917272 +n10076957 +n12865708 +n04290259 +n03595264 +n03986224 +n07825194 +n01610100 +n04417086 +n12995601 +n12734070 +n15091129 +n12428412 +n07587331 +n02405101 +n03108455 +n03594523 +n04489695 +n03892425 +n13032618 +n04409011 +n07590752 +n15092942 +n03914583 +n13066448 +n03532919 +n10639637 +n04566561 +n13223843 +n07904637 +n12347158 +n02720048 +n03901229 +n03936466 +n10574840 +n03782794 +n12397431 +n07908567 +n12580896 +n02697221 +n09791419 +n02577403 +n07870069 +n02136103 +n04318892 +n01462544 +n09747191 +n12287836 +n03067093 +n03934565 +n03543945 +n13126856 +n02240068 +n01585422 +n12413301 +n03246454 +n01876034 +n03635330 +n11680596 +n03228365 +n03082656 +n11609862 +n12859986 +n03934229 +n10233248 +n03166514 +n12166793 +n10115430 +n03327553 +n03373611 +n02967782 +n12338258 +n01604968 +n01323155 +n02590094 +n03044934 +n07866409 +n12291143 +n14900342 +n12094612 +n07845702 +n07926250 +n10750640 +n04359500 +n09797873 +n09953350 +n03561047 +n12122725 +n12725738 +n01453087 +n04977412 +n04575723 +n13219833 +n12161056 +n04273285 +n12482437 +n12863624 +n04953296 +n03390075 +n10188957 +n02874442 +n04236935 +n09990690 +n12866459 +n04075715 +n09725000 +n12794367 +n12461673 +n03050453 +n03677115 +n12427391 +n07736371 +n02973236 +n02406749 +n12322699 +n12815198 +n10680796 +n03268311 +n02405799 +n12302248 +n09791014 +n01545574 +n07740033 +n07862095 +n09901337 +n04390577 +n03597916 +n12110085 +n11802586 +n04205505 +n07696527 +n12076852 +n04344003 +n03326660 +n02823586 +n03042139 +n01565345 +n07905296 +n01454545 +n07650903 +n07905386 +n12530629 +n02841187 +n02943964 +n03329536 +n09681234 +n03479121 +n03770085 +n04147793 +n11552133 +n03774327 +n13197507 +n07901355 +n10400437 +n07837912 +n02310941 +n07845087 +n02239774 +n04976319 +n03960490 +n05239437 +n06275471 +n01633406 +n04257223 +n12009420 +n10483138 +n02775897 +n07866151 +n07922512 +n02666624 +n03944024 +n03842377 +n01832493 +n07855907 +n03968728 +n04492060 +n07879072 +n11635830 +n11802800 +n02357911 +n02431628 +n03730494 +n13099999 +n07768230 +n13147270 +n12331655 +n10237676 +n11855553 +n09759501 +n10620586 +n13181055 +n12309277 +n13183489 +n04382695 +n07679034 +n10495756 +n02173113 +n12764202 +n03683457 +n10298912 +n07680313 +n10160280 +n02205673 +n12053690 +n11653904 +n02931294 +n04093775 +n12856479 +n02427470 +n07608866 +n09954639 +n11639445 +n03364599 +n09924106 +n09683924 +n10419472 +n03089753 +n12620969 +n07604956 +n12940609 +n12564083 +n03514894 +n10343355 +n13068255 +n03805280 +n12793284 +n03140652 +n02666501 +n11717577 +n04267435 +n04593185 +n12820853 +n03934311 +n02630615 +n07767002 +n07723968 +n01631354 +n07931452 +n12414818 +n03097673 +n09944430 +n04457474 +n11850521 +n12227658 +n10131815 +n12408717 +n03566730 +n12777680 +n06273555 +n04357531 +n03759243 +n09861599 +n03015851 +n04175039 +n03392741 +n07859796 +n07741138 +n04474187 +n02266864 +n04553561 +n02667244 +n12720200 +n12432356 +n07806120 +n10362557 +n11929743 +n07765862 +n02963987 +n02762371 +n02747672 +n04289195 +n04056413 +n03039493 +n03894677 +n12338655 +n04422409 +n12079120 +n10252222 +n10168837 +n12919847 +n10297841 +n01340014 +n11710827 +n10167838 +n12278107 +n01384164 +n10498986 +n02742468 +n02899439 +n11752937 +n12107710 +n12315598 +n03985441 +n07605804 +n07686202 +n12884100 +n13121349 +n11725311 +n10420507 +n11706761 +n01381044 +n03331599 +n12336333 +n10185483 +n07880880 +n01782516 +n12615232 +n03175457 +n12657082 +n01750437 +n07918879 +n13213066 +n12927494 +n02910542 +n06273986 +n02161338 +n10235024 +n12180168 +n03659950 +n02160947 +n11861853 +n09866817 +n09279986 +n12393269 +n01552034 +n05526957 +n02956883 +n12818966 +n09753792 +n03114236 +n12273344 +n12546617 +n13177048 +n02129991 +n01731941 +n01628770 +n12774641 +n07685546 +n03253279 +n10678937 +n12579038 +n08673039 +n01392275 +n02379081 +n10530150 +n12851469 +n12414449 +n11694664 +n11877283 +n09708889 +n03585438 +n00483605 +n12332555 +n03323096 +n07851767 +n02417663 +n10667863 +n02856237 +n09269341 +n01596608 +n09720033 +n13160604 +n04443164 +n02814428 +n11622771 +n10328123 +n04338963 +n01794651 +n12069217 +n07762740 +n02935387 +n11897116 +n10569179 +n12749852 +n10745006 +n07823280 +n12162425 +n09801533 +n03772269 +n04518643 +n07916319 +n12771597 +n02147173 +n10342992 +n03795123 +n11646344 +n12847927 +n07686021 +n12383894 +n04465050 +n14564779 +n04212467 +n12274863 +n02380052 +n04329958 +n12034384 +n04213353 +n04366033 +n04955160 +n02778294 +n12890685 +n03028785 +n03097535 +n04533594 +n01750167 +n01415626 +n12276477 +n07729926 +n07711371 +n12843970 +n10500419 +n12891093 +n03840823 +n12509665 +n11878101 +n04315342 +n07685031 +n12305819 +n10039271 +n12264512 +n03911866 +n13919547 +n12413419 +n03785721 +n02599347 +n03786194 +n04018155 +n12856287 +n09607903 +n02396088 +n10212501 +n10313000 +n07683617 +n03586219 +n03890233 +n03156767 +n12033709 +n01648139 +n04399846 +n10671736 +n07698672 +n10791115 +n07708124 +n02709908 +n04266968 +n01758141 +n10058962 +n09444783 +n03668067 +n02838345 +n02388143 +n12893993 +n12590499 +n01462042 +n02689434 +n13209808 +n04075291 +n02412629 +n01953594 +n03906463 +n03043423 +n02200509 +n10152763 +n12504570 +n04396808 +n03382413 +n03618101 +n02767147 +n02390101 +n03450974 +n12778398 +n03625539 +n02574271 +n04113316 +n07572616 +n11809437 +n04119230 +n03829954 +n10500603 +n04258732 +n02731900 +n10174330 +n01574801 +n08663703 +n12558230 +n03981760 +n07732904 +n11875523 +n11823436 +n03238286 +n03079494 +n04281260 +n07873057 +n11686912 +n10568608 +n07593004 +n04271531 +n10037922 +n07838551 +n03615300 +n12624568 +n12940226 +n05242928 +n03680734 +n01589893 +n11652376 +n11893640 +n04119091 +n09696763 +n07851554 +n02660640 +n12124818 +n10370955 +n02663211 +n02414209 +n13187367 +n03258577 +n04375241 +n07617932 +n12240477 +n03417202 +n07595649 +n03839424 +n03087245 +n02431441 +n04396335 +n03484809 +n03426285 +n03592931 +n02912319 +n03488887 +n12187891 +n07592400 +n12918609 +n07858114 +n07567980 +n01548694 +n02726210 +n02406859 +n10147262 +n05458576 +n02848921 +n03503233 +n02587618 +n03465151 +n03582508 +n11654293 +n03695452 +n02197185 +n04223170 +n10243273 +n03149135 +n02842809 +n03669534 +n03857291 +n02147328 +n12278865 +n12733428 +n03264906 +n09924195 +n10432189 +n12203896 +n03892728 +n12360958 +n10418735 +n01650901 +n12420722 +n03341606 +n02557909 +n07751858 +n03483971 +n12019035 +n03991202 +n02072040 +n03129848 +n04505345 +n02405440 +n03901974 +n11656123 +n11552976 +n10291822 +n10108018 +n09902731 +n03325691 +n12646072 +n04134170 +n12097396 +n07564008 +n01624305 +n03421117 +n02776007 +n10792856 +n07818133 +n03227184 +n10198437 +n04157099 +n12743009 +n07820960 +n12749456 +n13035925 +n05262698 +n03422771 +n02878628 +n12140903 +n07820297 +n03524745 +n09901921 +n03170872 +n10039946 +n12638964 +n11989087 +n03461988 +n04287451 +n04298053 +n07882420 +n04002262 +n02734835 +n11707827 +n07756641 +n12808007 +n10069981 +n12637123 +n12947895 +n04363082 +n04292080 +n11858077 +n04535252 +n12646397 +n12283147 +n12321077 +n02746595 +n02895328 +n07624924 +n12537253 +n11952541 +n02181477 +n01440160 +n03878828 +n12861541 +n02869563 +n04242084 +n03197201 +n09396608 +n04291992 +n07845863 +n04314522 +n12843557 +n04029647 +n12146654 +n13147386 +n12954799 +n11920133 +n03038480 +n03213715 +n02971473 +n04149374 +n04230387 +n00444340 +n11859275 +n07564796 +n02948403 +n10186068 +n04315713 +n02366002 +n02670935 +n13208302 +n10225931 +n07826340 +n04102872 +n02259708 +n11855842 +n09941089 +n08896327 +n10237464 +n12084158 +n03764995 +n03627954 +n12384375 +n10341343 +n07876189 +n04573379 +n07904293 +n07840520 +n12038038 +n03005147 +n10483799 +n02978367 +n01484285 +n13094273 +n04539053 +n01748389 +n10146816 +n07815839 +n12991837 +n03294604 +n03588841 +n04055180 +n03209477 +n09917345 +n04393913 +n12337391 +n12126084 +n01882125 +n07688130 +n02814116 +n09640715 +n12679593 +n12596345 +n03029925 +n11761650 +n04457157 +n12683096 +n07709881 +n03841290 +n13157684 +n07927836 +n03523134 +n03690279 +n10187491 +n12451070 +n02682311 +n03978815 +n11806679 +n07808022 +n01386354 +n03622526 +n02369293 +n11885856 +n02289610 +n12663359 +n02624987 +n13173488 +n03027001 +n07896765 +n11935330 +n07814790 +n04242704 +n09959142 +n07589543 +n03551582 +n07843117 +n03556992 +n02060569 +n04000998 +n03825271 +n11946918 +n02874750 +n03479502 +n09919451 +n02176747 +n02080713 +n03400972 +n10222170 +n07926785 +n07852302 +n03012373 +n10438842 +n12868019 +n03634034 +n04210591 +n07853560 +n12374862 +n09248399 +n04355115 +n12908093 +n12906498 +n12875269 +n02791665 +n03146777 +n02854378 +n12414159 +n07821610 +n07595180 +n12238913 +n12141385 +n10761190 +n12165758 +n01653223 +n12956367 +n03695753 +n12416703 +n12346813 +n03405111 +n04304215 +n01624212 +n12674895 +n09850760 +n12407715 +n04156040 +n11610437 +n03395256 +n09970822 +n04229959 +n02530831 +n07870894 +n12098524 +n12828379 +n04057215 +n10751152 +n10053439 +n03674270 +n07869291 +n12256920 +n02535163 +n04282231 +n02136452 +n02365108 +n10328328 +n02315487 +n03325403 +n09231117 +n03342657 +n09980985 +n10702167 +n11961871 +n02065263 +n12857779 +n03219612 +n07805966 +n10699981 +n07691863 +n12831932 +n04179126 +n10208189 +n09765118 +n07922147 +n01631512 +n01947997 +n01405616 +n01892030 +n07827896 +n12964920 +n07749870 +n03276696 +n10020670 +n11828577 +n07624666 +n10590146 +n02407521 +n10253703 +n03270854 +n11610047 +n12981443 +n12413642 +n12302565 +n03177059 +n04594114 +n10227985 +n07728391 +n10395073 +n02810270 +n03569293 +n07812046 +n03843316 +n12477401 +n03802643 +n07618029 +n10755648 +n12837803 +n12454556 +n01636127 +n02809241 +n03270165 +n12035631 +n02962414 +n09750641 +n01793085 +n04346003 +n07922041 +n04164002 +n12499979 +n03301291 +n07921834 +n09656077 +n07599161 +n13155611 +n10194231 +n10063635 +n03601442 +n10366276 +n00475661 +n03943714 +n10377291 +n02624551 +n02568447 +n07589458 +n09691858 +n02685995 +n11919975 +n01690466 +n13211020 +n04114069 +n10530383 +n04200908 +n12631932 +n07916437 +n03219859 +n07918309 +n10368291 +n10253479 +n03317889 +n13206178 +n02821415 +n10592811 +n12557064 +n12872458 +n10212231 +n07926346 +n09695514 +n09741816 +n03964611 +n07812913 +n09703708 +n02587479 +n10593521 +n03485309 +n03776877 +n12289433 +n07716504 +n10580030 +n03061893 +n03206158 +n09710041 +n04266849 +n07864065 +n12767648 +n02333190 +n12295429 +n02406432 +n01799679 +n07861983 +n02201626 +n03441582 +n03653975 +n02834506 +n12263204 +n10672662 +n03072682 +n03410423 +n11620389 +n04542095 +n07910970 +n03697913 +n02706806 +n09736798 +n12318965 +n07938594 +n12032429 +n03191776 +n04210288 +n01422335 +n03236093 +n11881189 +n02247216 +n12338146 +n03104512 +n00474881 +n04172230 +n01461315 +n04400109 +n10646140 +n02215621 +n10096126 +n03019806 +n11809754 +n02492948 +n10741367 +n10308504 +n07875560 +n02523110 +n07738224 +n02015797 +n10499631 +n03025165 +n03284308 +n03508881 +n10441037 +n10757492 +n07608721 +n09755241 +n04264361 +n04394421 +n03776997 +n03175843 +n04476526 +n02523877 +n13196369 +n10190122 +n03172738 +n02709763 +n02070624 +n04563560 +n04017807 +n03824589 +n07817758 +n03222722 +n01542433 +n13173259 +n04458201 +n12869668 +n12580786 +n02407763 +n09760913 +n10530571 +n11752798 +n09612700 +n07601175 +n11632376 +n10641223 +n03158668 +n03411208 +n01413457 +n03684740 +n10248008 +n12656528 +n11849271 +n07771891 +n12067433 +n12389727 +n11734698 +n04042204 +n07825399 +n12621945 +n07624757 +n03180732 +n09741331 +n10246317 +n04030414 +n07821107 +n04524716 +n03789603 +n12867449 +n10249869 +n02434415 +n07614103 +n03333349 +n04602840 +n09923996 +n02658811 +n13033879 +n03663433 +n02873623 +n07837545 +n12436907 +n02675077 +n01500854 +n04435552 +n01790304 +n11687789 +n03443543 +n09733459 +n01606177 +n12245885 +n11721642 +n02201497 +n12010815 +n04594742 +n02755984 +n07927716 +n04245218 +n03134118 +n13214485 +n12294542 +n12713521 +n03556173 +n12650038 +n07719058 +n04319774 +n10443830 +n10019187 +n09720702 +n07926442 +n10402709 +n03989777 +n11699751 +n09613118 +n02965122 +n04221076 +n01861330 +n12837052 +n02975589 +n09668437 +n03012499 +n01418498 +n12451566 +n03585778 +n07692517 +n09672590 +n09741999 +n09748648 +n07621264 +n03482001 +n10185148 +n01542168 +n12536291 +n07846557 +n11840476 +n03130866 +n02631775 +n11730015 +n03715275 +n07680168 +n12175370 +n05427346 +n03665232 +n08611421 +n11730458 +n02413484 +n09783884 +n07888378 +n04611351 +n02247655 +n02136794 +n11649359 +n01382033 +n07889193 +n10405540 +n03510384 +n04420720 +n03585875 +n03812789 +n01835769 +n12139921 +n09762011 +n10103228 +n03477410 +n11930788 +n10064831 +n12311045 +n07681805 +n03136504 +n12887713 +n03886940 +n03130233 +n10197392 +n12333961 +n07672914 +n12723062 +n12599661 +n04268799 +n03696909 +n12809868 +n12452256 +n10710778 +n02571652 +n12117326 +n02450677 +n03041265 +n12544240 +n01966377 +n10252354 +n02378625 +n09814488 +n10569011 +n13067330 +n07928998 +n07890970 +n02187279 +n02592371 +n07846802 +n03475961 +n05448704 +n10410996 +n02851795 +n10093167 +n12468719 +n09876701 +n03057724 +n03469031 +n02344270 +n04248209 +n02687682 +n04467899 +n12897788 +n03436656 +n12539832 +n09906704 +n03190458 +n11843441 +n12130549 +n11823756 +n03153246 +n03684489 +n04160036 +n02908951 +n12855365 +n03518230 +n12225222 +n12933274 +n10432957 +n02921406 +n10156831 +n12239647 +n02826812 +n03411927 +n11602091 +n13200986 +n04244847 +n01330126 +n14938389 +n03001540 +n04387531 +n03423099 +n07608533 +n11723986 +n07600394 +n12529500 +n02403820 +n02587300 +n10333317 +n07935288 +n12680652 +n01449980 +n12153914 +n07803310 +n11741797 +n01881857 +n13081999 +n08644045 +n02061217 +n02173784 +n02660519 +n03104019 +n13137951 +n04538403 +n02621258 +n04515729 +n04165945 +n11919761 +n13078021 +n07861247 +n11959259 +n11801665 +n04070545 +n13210597 +n10218043 +n10717337 +n01365885 +n10718952 +n11979187 +n03880032 +n03798610 +n03477303 +n01876667 +n11860208 +n03401721 +n03360133 +n13230843 +n13194758 +n13190060 +n02564935 +n13894154 +n12754311 +n07697408 +n13171210 +n02035402 +n03736147 +n10396337 +n04554998 +n02793930 +n04126852 +n03654826 +n09411295 +n06255613 +n01680983 +n10261862 +n01581874 +n10378780 +n10646641 +n03539103 +n03351151 +n04349913 +n03906106 +n02370525 +n03319576 +n04113968 +n09693244 +n02945964 +n03344509 +n04117216 +n03889626 +n03557840 +n09800469 +n04280487 +n07890890 +n12147835 +n12295237 +n03883664 +n04436992 +n02922877 +n10099002 +n01988203 +n10056719 +n11646517 +n03672521 +n04568713 +n10111358 +n03606347 +n04047733 +n12320627 +n10251612 +n10460033 +n01742447 +n11917835 +n10443032 +n13079567 +n04363671 +n10788852 +n10482587 +n03308614 +n12741586 +n12938667 +n04539407 +n01630148 +n02303777 +n13050940 +n04552551 +n02341288 +n04098169 +n04110439 +n11625391 +n12259316 +n02822762 +n10631131 +n04089152 +n03571439 +n04558199 +n12656909 +n03170292 +n02877642 +n12771890 +n03033267 +n12658603 +n13354021 +n12855886 +n11840246 +n03619050 +n07727252 +n12932706 +n13874073 +n01315805 +n02948942 +n12048928 +n03146449 +n10656969 +n09872557 +n03906590 +n04454792 +n12500309 +n04239333 +n01815036 +n09644657 +n10497645 +n02918455 +n07812662 +n04240434 +n10804636 +n11967878 +n04184095 +n11834272 +n05244755 +n02299039 +n12665659 +n12144987 +n07607492 +n11887750 +n13083461 +n04577139 +n09670909 +n07876893 +n02875948 +n04069582 +n10458111 +n10361194 +n09389867 +n01651778 +n11933387 +n13193143 +n12834190 +n03516266 +n02184589 +n10041373 +n02809605 +n04064213 +n04957589 +n12643113 +n02582721 +n07911061 +n07921360 +n10369417 +n10527147 +n04104925 +n03707372 +n01386182 +n10374849 +n09902851 +n08559155 +n02332447 +n11649150 +n11722036 +n01823740 +n04592356 +n10002257 +n10661732 +n07562379 +n07597263 +n04036776 +n13112201 +n09842288 +n07738105 +n04545984 +n09635973 +n02885233 +n02756854 +n07808479 +n03029296 +n01543383 +n02884450 +n09843716 +n04224395 +n10576676 +n10140051 +n07919894 +n07806879 +n10212780 +n09478210 +n12017127 +n03770224 +n07606191 +n03555217 +n09715165 +n12270460 +n12129738 +n11739365 +n02303585 +n07818029 +n05314075 +n03019304 +n09859975 +n09454744 +n13151082 +n12586989 +n00455076 +n07741357 +n04957356 +n08659242 +n04577293 +n04126244 +n03131193 +n12428242 +n03569494 +n03781594 +n07743384 +n02892392 +n12576695 +n12199982 +n07693439 +n07719756 +n11884384 +n03043798 +n12351091 +n03690168 +n02214499 +n01839949 +n01831360 +n12642964 +n02957862 +n03125588 +n12883628 +n04002371 +n10747965 +n09744462 +n02853745 +n13030337 +n12156679 +n02761034 +n12587487 +n03374570 +n12728322 +n01731764 +n07918706 +n03696445 +n03185868 +n02805283 +n03868763 +n02202124 +n12369665 +n12449934 +n12650229 +n02656301 +n07743723 +n11702713 +n02927053 +n03916385 +n01486010 +n03986071 +n04188064 +n13897528 +n12414329 +n07718068 +n07837755 +n11735570 +n10464542 +n04091466 +n01315581 +n10374943 +n03989898 +n13220525 +n04076052 +n04062179 +n02414442 +n04414101 +n04446162 +n00480885 +n03536568 +n03773835 +n10728998 +n12643877 +n02255391 +n03799610 +n07847585 +n00446411 +n11910666 +n03139998 +n02296276 +n02889996 +n02786611 +n10363445 +n07854348 +n08583682 +n09912681 +n07896422 +n02368821 +n11935953 +n12185254 +n11738547 +n03809211 +n02448318 +n13066979 +n01987076 +n12009047 +n12839574 +n13174823 +n07902520 +n03369866 +n13209129 +n02593191 +n03853291 +n02620578 +n10071332 +n01813658 +n09895480 +n10134760 +n01316734 +n07845166 +n03175983 +n13132156 +n12814960 +n12883265 +n03637787 +n04310507 +n04133114 +n03900194 +n04129688 +n04449550 +n01805321 +n01717467 +n01573627 +n12271451 +n11722621 +n09976917 +n12232280 +n12905135 +n03451253 +n01655344 +n12346986 +n11987511 +n10517283 +n02941845 +n12730370 +n03121190 +n07917874 +n10023656 +n10151133 +n07695187 +n03258456 +n10639238 +n10682713 +n02085019 +n12343753 +n10749928 +n04595611 +n04410565 +n08500819 +n07719980 +n04016479 +n03232417 +n03469832 +n09834885 +n07925327 +n10094782 +n03632100 +n12734215 +n09845849 +n04047139 +n10743124 +n02604954 +n12270278 +n03036244 +n11991777 +n10168012 +n02561803 +n10531109 +n10344319 +n03804211 +n10513938 +n10732967 +n09917481 +n02950482 +n03148808 +n07910245 +n07925423 +n07889990 +n04302988 +n07745357 +n04346511 +n07573563 +n02564403 +n12084400 +n10030277 +n09815455 +n04388473 +n12404729 +n10576316 +n12072210 +n11811059 +n01824344 +n03556811 +n03175301 +n07586485 +n13137010 +n11986729 +n04967561 +n03881404 +n07692114 +n07874995 +n02770585 +n07853345 +n02775689 +n04328580 +n01323781 +n07773428 +n02414043 +n02794474 +n02352932 +n07569873 +n12374705 +n03606106 +n04267246 +n04369485 +n11934239 +n12705698 +n11841247 +n07868045 +n03525693 +n12358293 +n02937010 +n09658398 +n12711182 +n03516647 +n04591631 +n10228712 +n11930353 +n03471779 +n12594324 +n02251593 +n04455579 +n02542017 +n03381450 +n03320845 +n12364940 +n09657748 +n12412987 +n01840412 +n10570704 +n10117267 +n03251280 +n10195261 +n12178129 +n12285049 +n02177775 +n10117415 +n03707766 +n04475309 +n05604434 +n03999064 +n12127575 +n01972131 +n09793946 +n01635176 +n02791532 +n07564101 +n07876460 +n02813981 +n10764719 +n03638743 +n12761702 +n02125689 +n11657585 +n09923003 +n13069773 +n02683183 +n04324515 +n11936946 +n12862828 +n02659808 +n02619861 +n13175682 +n11648039 +n07768139 +n12512674 +n12108613 +n02947977 +n12899971 +n03845107 +n07689490 +n02081927 +n07619508 +n10248377 +n10300041 +n10761326 +n09655213 +n02675522 +n04963111 +n01995686 +n03256631 +n10684630 +n04471912 +n12728864 +n03870546 +n02829246 +n09725546 +n03409920 +n13194918 +n10055297 +n02513248 +n01462803 +n11782266 +n13094145 +n07839478 +n13916363 +n07932454 +n09722817 +n07774479 +n10386874 +n12832822 +n01599388 +n02964295 +n04349189 +n07689313 +n11653126 +n02309841 +n02064000 +n04410663 +n04562122 +n02358712 +n09901786 +n10441124 +n12882158 +n12815668 +n10159289 +n01641930 +n03315990 +n12271187 +n10277638 +n07815163 +n12903014 +n07915366 +n04412300 +n01324799 +n03408264 +n09452291 +n03019198 +n11890884 +n10355806 +n03186199 +n04013600 +n12541157 +n06259898 +n06273294 +n11946051 +n01671705 +n04415257 +n01905321 +n04050600 +n12604460 +n04051439 +n02929184 +n11765568 +n10025060 +n02396796 +n04033287 +n13027557 +n03127531 +n10308066 +n09729062 +n01593553 +n02476567 +n07609728 +n12970293 +n01419888 +n03215749 +n01684741 +n13067672 +n03870290 +n07846359 +n12961536 +n03356559 +n07727140 +n09843602 +n02378755 +n12044041 +n01977485 +n07718920 +n12060546 +n04265428 +n12237855 +n04006067 +n10227266 +n04361937 +n12134486 +n10097842 +n02264591 +n03912821 +n07594155 +n03116163 +n11771924 +n04155457 +n12394118 +n10507380 +n01844746 +n11901452 +n03024233 +n03383562 +n11806814 +n10062716 +n04204755 +n08613733 +n12907671 +n03533654 +n09826605 +n03109033 +n07606419 +n03537085 +n11615812 +n07695504 +n11694300 +n04520962 +n09971839 +n02664285 +n03402511 +n02061560 +n13133140 +n03548195 +n12877493 +n02425086 +n12845187 +n12488454 +n02975994 +n02071028 +n01457407 +n03685486 +n07605282 +n07771405 +n07827554 +n10538733 +n03438780 +n04379096 +n12686496 +n10001764 +n11848867 +n12125001 +n09886540 +n03275566 +n01442710 +n12789554 +n07858197 +n12722071 +n12868880 +n10441694 +n12409651 +n07727741 +n12289585 +n04069166 +n12686877 +n03723439 +n07815956 +n12543455 +n10778044 +n02200630 +n10074841 +n12640284 +n12589841 +n07592317 +n07866571 +n12712626 +n04228422 +n11711289 +n03590475 +n13081229 +n03045800 +n03639230 +n02874214 +n07615954 +n03204134 +n12053962 +n12769219 +n15006012 +n09873769 +n11818636 +n01959029 +n03349599 +n12227909 +n07576969 +n03638180 +n07742224 +n03390673 +n02344175 +n03770520 +n00447361 +n13235319 +n01983674 +n10061882 +n04267165 +n12493868 +n12713358 +n02930339 +n10493419 +n12918810 +n02582220 +n12248359 +n02644501 +n04596492 +n04538249 +n07905618 +n13230190 +n07808268 +n15005577 +n09351905 +n12730544 +n11937023 +n04024137 +n02238358 +n11646955 +n11618079 +n09849990 +n04060448 +n04220805 +n12725940 +n12004120 +n01484562 +n02669442 +n12132956 +n01756916 +n03980986 +n02256172 +n07716750 +n12119390 +n04047834 +n11934041 +n12828977 +n03648219 +n11873612 +n12909614 +n04397860 +n03908111 +n03261395 +n03695616 +n11668117 +n12014355 +n02896074 +n03988758 +n04426184 +n10328696 +n02477028 +n04507326 +n04320871 +n03256472 +n01919385 +n03988926 +n13182164 +n07826250 +n03207548 +n01396617 +n04369618 +n07913774 +n13229951 +n03410022 +n12728508 +n01997119 +n03598783 +n01341090 +n03879456 +n01736796 +n02864122 +n13879816 +n02684962 +n12246037 +n02433729 +n04364397 +n09881358 +n02950120 +n03326371 +n02243878 +n01790812 +n12990597 +n03330947 +n07764486 +n03332173 +n10006177 +n03347472 +n07619301 +n10106509 +n12365285 +n01732989 +n07678586 +n04098795 +n07733847 +n03994297 +n12872914 +n02762909 +n07766530 +n13198482 +n02395855 +n12273515 +n04487894 +n07847047 +n12488709 +n02859557 +n04255768 +n02360933 +n03267696 +n03152951 +n10188715 +n10520544 +n13065514 +n02900594 +n03699754 +n01319187 +n01949499 +n10417424 +n01603000 +n12062105 +n09683180 +n09863339 +n01880716 +n10702615 +n03893935 +n10495555 +n04131499 +n02957252 +n02113892 +n07724078 +n12246941 +n04303095 +n01751215 +n04213530 +n12117695 +n12418507 +n01922948 +n12131405 +n13188767 +n01481498 +n03174079 +n02407172 +n11613867 +n10152616 +n10119609 +n04158250 +n11695085 +n07855105 +n02854630 +n03768683 +n12739966 +n12266984 +n12819141 +n12732605 +n13205249 +n11917407 +n01607429 +n02694279 +n07815294 +n06614901 +n07846471 +n12119717 +n02595339 +n12366186 +n10693235 +n12263410 +n12484244 +n10337488 +n04146976 +n01469723 +n07872748 +n03238879 +n12000191 +n07846938 +n03116008 +n12139196 +n04013176 +n10317963 +n12140511 +n02065726 +n01649556 +n10316862 +n01755952 +n04385079 +n12770529 +n02814338 +n01675352 +n11874423 +n01369484 +n10537708 +n07618281 +n07821404 +n02297819 +n03238762 +n03357081 +n02628600 +n07830986 +n12507823 +n04431925 +n11955532 +n03429771 +n10281896 +n12383737 +n12760875 +n09673091 +n12892013 +n06625062 +n04503269 +n03674842 +n12338979 +n04268275 +n12033139 +n11767877 +n07812790 +n12676134 +n04037873 +n10097477 +n12310638 +n12258101 +n09391386 +n13196738 +n13866626 +n12720354 +n10106995 +n07843220 +n03878294 +n04101375 +n07733217 +n10220080 +n04601938 +n10778148 +n12973937 +n10556825 +n12256708 +n12583855 +n04259202 +n07628181 +n04226962 +n02777402 +n09674412 +n12188635 +n03776167 +n04504038 +n04156591 +n02270945 +n02264021 +n07826653 +n02980203 +n02059852 +n02102806 +n12921660 +n04477725 +n10107173 +n12837466 +n02697022 +n04350688 +n12110236 +n02177196 +n07899976 +n12639910 +n02368399 +n10009162 +n03950647 +n09248153 +n02425532 +n04044955 +n11933257 +n03460899 +n10147710 +n02379743 +n02413917 +n02890804 +n12915140 +n02146879 +n07915800 +n01787006 +n03646809 +n11677902 +n04065909 +n02088992 +n02887832 +n10115946 +n02306825 +n03719560 +n10456696 +n03758220 +n12625003 +n04021503 +n07563366 +n02531625 +n10304650 +n12855710 +n09735654 +n07853762 +n03512030 +n12898342 +n02297938 +n12618727 +n04082344 +n12953712 +n12617559 +n03035715 +n02532451 +n05399356 +n03602686 +n10082423 +n04607759 +n07581607 +n07594737 +n04030965 +n03464628 +n12103894 +n03039353 +n03522990 +n02964934 +n03169063 +n10153865 +n09653144 +n09941571 +n12907057 +n07768318 +n02600798 +n02187150 +n01811243 +n12252383 +n04495555 +n07678953 +n13181244 +n13069224 +n13184394 +n12765402 +n03471347 +n10208847 +n03697366 +n09840435 +n02506947 +n09709673 +n07928578 +n11935715 +n07848936 +n02757927 +n01999767 +n02245443 +n10260473 +n13898645 +n02701260 +n07840219 +n11785875 +n12385830 +n12017664 +n12145148 +n04530456 +n01929186 +n02384741 +n04113038 +n03296217 +n09723819 +n03766697 +n12143215 +n09929202 +n02684248 +n12119539 +n03566555 +n12941220 +n04124573 +n10750188 +n07733005 +n04230707 +n03829857 +n07756838 +n12244458 +n12543826 +n03514129 +n02762169 +n04435870 +n03342863 +n09745324 +n12369476 +n11652039 +n03915320 +n07746749 +n07608641 +n12642600 +n02389943 +n12137791 +n04111962 +n12493426 +n12454793 +n01455317 +n10728117 +n03281524 +n12195734 +n12353431 +n02477329 +n02678010 +n04557522 +n10162354 +n14942411 +n07806043 +n12274151 +n09835153 +n03983499 +n04086663 +n07851926 +n07868684 +n11926976 +n03972146 +n04310604 +n09675799 +n13880704 +n13173132 +n07577918 +n10720964 +n11937102 +n03349020 +n12340581 +n03725506 +n03477143 +n10578162 +n01731137 +n03382104 +n11616852 +n01493829 +n09327077 +n03856335 +n03321843 +n02375757 +n02118643 +n08500989 +n03496486 +n04140777 +n12858987 +n02845293 +n04093157 +n07819682 +n10394786 +n12289310 +n02901620 +n01559160 +n07919165 +n12648196 +n11774972 +n11995396 +n10543937 +n10154013 +n03977158 +n01884476 +n12266528 +n11906127 +n12661538 +n04396650 +n12761905 +n04175574 +n10181878 +n12017326 +n12876899 +n09744346 +n07741706 +n04451636 +n07735981 +n03751590 +n03140546 +n03070396 +n03091223 +n12071477 +n07562017 +n09981092 +n09847344 +n12552893 +n12371202 +n02245111 +n01598271 +n04400499 +n02298095 +n15048888 +n02967170 +n04030161 +n10676434 +n01556514 +n13235766 +n02538562 +n12603672 +n03941586 +n02449183 +n07567611 +n12923257 +n02296021 +n11730933 +n12497669 +n02917742 +n07875926 +n02714535 +n13142182 +n02878107 +n07861334 +n02682811 +n03730655 +n03681813 +n12970733 +n02132320 +n12436090 +n07931280 +n04295353 +n12982590 +n01783017 +n13164501 +n02424589 +n01499732 +n12650805 +n04543509 +n10369699 +n03439631 +n13160116 +n07831663 +n05449196 +n13025854 +n10169241 +n02847461 +n10734963 +n13213397 +n03343234 +n12275317 +n02793414 +n04300509 +n01803893 +n11617878 +n02179192 +n03637480 +n04514648 +n03087521 +n10478827 +n11757190 +n12919195 +n04532504 +n01736375 +n04015786 +n04545471 +n12668131 +n04472961 +n14786943 +n07584938 +n02498743 +n07744559 +n10010062 +n10101308 +n07832099 +n02601767 +n10473453 +n02451575 +n02496052 +n03696746 +n12669803 +n07904072 +n04290762 +n11737125 +n07760755 +n12553742 +n12068138 +n12630999 +n02390938 +n02202678 +n02216740 +n02679961 +n13173697 +n11828973 +n02287987 +n04585318 +n10360366 +n07745661 +n03474352 +n07934800 +n12677612 +n03692272 +n13092240 +n04230487 +n11846312 +n12433952 +n11793403 +n03056873 +n05454833 +n12517077 +n12682882 +n02649218 +n09425344 +n07878283 +n02795978 +n10064977 +n12754174 +n02945813 +n01750743 +n03150661 +n13880415 +n12337800 +n04017571 +n09754907 +n04456734 +n02967540 +n10621400 +n11744471 +n01971620 +n04148285 +n10781817 +n11991549 +n12305654 +n03943833 +n10330931 +n12918991 +n01783706 +n11933099 +n12931231 +n07589967 +n09666349 +n07853445 +n12714949 +n03548533 +n04158672 +n03809802 +n03080309 +n12800049 +n02578454 +n02834027 +n10067600 +n03044671 +n04198233 +n07930205 +n04357930 +n12221522 +n11957317 +n03085781 +n03723885 +n03614383 +n02661618 +n04292221 +n03426574 +n03838024 +n10442093 +n12399534 +n01450950 +n07876550 +n11937446 +n09870096 +n02631628 +n05460759 +n01710177 +n03660562 +n04283784 +n01497738 +n02232223 +n04209811 +n12837259 +n02864987 +n04499810 +n12654857 +n03493792 +n09688233 +n02312912 +n10057271 +n07606058 +n03258192 +n10507565 +n11930038 +n08679269 +n03812263 +n11662128 +n04085574 +n07643577 +n03981094 +n02796412 +n02513939 +n07686634 +n07936979 +n03168774 +n03816394 +n07625324 +n04138131 +n10383094 +n10222716 +n10381981 +n12254168 +n13223090 +n03056583 +n09910556 +n03277004 +n12649866 +n02089725 +n03688707 +n09665367 +n07849506 +n02843909 +n13141797 +n02477516 +n09710886 +n03835941 +n11734493 +n10778711 +n10007809 +n02038141 +n12766043 +n02353172 +n02030224 +n10762212 +n06274921 +n13033396 +n03560860 +n01961234 +n13868515 +n03216199 +n01553527 +n04429038 +n10211036 +n02150885 +n02435517 +n02755675 +n09699020 +n12566331 +n03909516 +n02903727 +n02594942 +n04173172 +n04125692 +n12251001 +n02412787 +n01649412 +n01411450 +n01774097 +n09912907 +n03162556 +n07566231 +n12267534 +n03928589 +n04142327 +n11771147 +n07832592 +n04155177 +n07937621 +n07839864 +n03201895 +n13095013 +n10298271 +n03059103 +n03784793 +n11925450 +n03288742 +n02809364 +n04108999 +n04449449 +n03726233 +n07854455 +n03692136 +n12018447 +n03374282 +n06008896 +n07598928 +n03577312 +n04604806 +n09892513 +n04370600 +n08238463 +n01793159 +n07822687 +n03242390 +n07685303 +n03822361 +n01996280 +n10505942 +n06596845 +n04219580 +n12056990 +n10579062 +n10240082 +n10298202 +n07711907 +n03905730 +n12222900 +n07598622 +n04415815 +n12389932 +n12154114 +n04210012 +n12500751 +n03729402 +n12122918 +n04572121 +n12804352 +n02415130 +n12780325 +n11639084 +n12768933 +n02253494 +n13217005 +n03567788 +n12304286 +n10703480 +n07766723 +n05455113 +n07741804 +n12186839 +n01687128 +n01350701 +n03260206 +n07876026 +n12528382 +n04125541 +n10457444 +n01606097 +n11717399 +n04598416 +n12899166 +n09748101 +n12160125 +n07608980 +n07843348 +n02409038 +n02571167 +n09980805 +n09706029 +n02495242 +n12765846 +n10373525 +n12321873 +n03047171 +n12365462 +n03752398 +n02662993 +n10316527 +n10728233 +n06273207 +n01733214 +n12297846 +n12755876 +n02428842 +n02289307 +n04536465 +n03253187 +n02297294 +n05584746 +n03117642 +n12189779 +n10338231 +n07599649 +n04559994 +n12710917 +n09966470 +n12470907 +n04499300 +n12403075 +n11837743 +n02269657 +n12599185 +n07618587 +n03996004 +n12851094 +n03392648 +n01319001 +n12826143 +n12369845 +n01814549 +n10056103 +n12854193 +n02267483 +n04019881 +n03490649 +n04268142 +n10801802 +n12315060 +n10149436 +n04563790 +n09865068 +n03000530 +n10657556 +n07840672 +n12118414 +n02856013 +n02900459 +n04094859 +n12079523 +n11827541 +n12236160 +n02904505 +n02846619 +n09842823 +n12926039 +n02146201 +n03195799 +n12815838 +n09899289 +n01483021 +n02519340 +n05453815 +n10329035 +n02494383 +n09742927 +n13220355 +n03212406 +n11759609 +n10061431 +n12095281 +n04262530 +n03799240 +n02426176 +n04608809 +n12230540 +n13880551 +n11741175 +n11858814 +n11723452 +n07590841 +n12604845 +n10342543 +n12760539 +n09270657 +n02563079 +n10643937 +n12843316 +n01651641 +n07838811 +n04359034 +n07758260 +n02762725 +n11726433 +n03114743 +n01952029 +n12321395 +n11930571 +n12337922 +n12427946 +n12001294 +n12551457 +n13235011 +n02290340 +n06419354 +n12408873 +n01741442 +n12308447 +n10243872 +n03658635 +n03694761 +n02570484 +n12912801 +n04158002 +n02417785 +n01332181 +n03703075 +n10283366 +n03142431 +n02779609 +n02300554 +n09868782 +n10323752 +n03166809 +n03394149 +n02827148 +n02186717 +n01350226 +n03344784 +n03555996 +n04498873 +n13157481 +n04519887 +n12028424 +n12349711 +n10471640 +n07741235 +n04032936 +n12357968 +n10228592 +n13178284 +n04168840 +n13239177 +n03561573 +n02566489 +n11807696 +n07681264 +n02566665 +n10456070 +n10063919 +n10492727 +n01788579 +n11977660 +n02036228 +n02738978 +n03989349 +n10332953 +n12949361 +n09901502 +n07839730 +n13146928 +n10152306 +n04170515 +n11602478 +n02522722 +n01333610 +n13030852 +n02143891 +n12807624 +n04542329 +n12243693 +n12036226 +n13917690 +n02553028 +n02752199 +n10594857 +n11627714 +n04348070 +n13171797 +n04612257 +n07934373 +n04536765 +n02244515 +n04526800 +n04546595 +n02551668 +n12143405 +n07871588 +n07858484 +n03628728 +n13179804 +n03242264 +n12089846 +n07588688 +n07620047 +n01647466 +n09685233 +n03467254 +n12666369 +n05449661 +n10694939 +n12886600 +n12256522 +n04006330 +n03317673 +n04316815 +n12222090 +n04022866 +n04088441 +n07617526 +n10782362 +n04355821 +n13901490 +n12508618 +n03849943 +n04503499 +n13193466 +n09754633 +n07583978 +n13911045 +n07643679 +n12054195 +n10692090 +n04032509 +n10146927 +n02031298 +n04002629 +n04035748 +n10712229 +n02866106 +n07909504 +n04540397 +n06266878 +n10219879 +n12567950 +n07853648 +n03191561 +n07856045 +n12646197 +n03317510 +n10515863 +n13198054 +n02808829 +n12889579 +n02698473 +n09924437 +n03595055 +n12306270 +n07857356 +n09715303 +n03024518 +n04323519 +n09629065 +n04178668 +n12748248 +n02308618 +n07873198 +n10564098 +n03007297 +n04036155 +n02143439 +n10507482 +n12267931 +n03956331 +n12888234 +n04066476 +n07813107 +n02736396 +n10306496 +n12324388 +n01744555 +n01649726 +n06596179 +n03616091 +n07754279 +n02072493 +n12408280 +n04314632 +n02412700 +n04030846 +n09833997 +n03599964 +n05258627 +n12572759 +n12136581 +n02419056 +n12453714 +n11652217 +n03878511 +n03907908 +n12223160 +n10514121 +n04153330 +n12163279 +n12623818 +n03495671 +n13222985 +n10354754 +n04365112 +n12384680 +n12538209 +n03105214 +n12534862 +n13869045 +n03945928 +n11613692 +n11892181 +n13002209 +n02685253 +n07598529 +n02629716 +n13202355 +n07927070 +n02176916 +n04370955 +n11988132 +n03246197 +n01440467 +n07620145 +n03940894 +n01897667 +n03408340 +n12602612 +n02539424 +n03863657 +n04559620 +n02604480 +n11822300 +n03518829 +n11619845 +n10504090 +n03341035 +n02908123 +n04281998 +n03277602 +n03865288 +n10074578 +n13902793 +n03054605 +n04404200 +n12786836 +n12235051 +n04035231 +n12009792 +n12705458 +n04378489 +n02476870 +n11954798 +n03573848 +n02087314 +n03162460 +n04363412 +n02261063 +n09953615 +n01947139 +n03044801 +n04287351 +n04479287 +n03861596 +n12510343 +n07854066 +n03027505 +n12161577 +n04197878 +n01812187 +n10015792 +n08685188 +n11737009 +n10333044 +n02730568 +n10290813 +n13096779 +n05257476 +n07917951 +n12121187 +n03517509 +n07932762 +n02336275 +n12159942 +n12105981 +n02562971 +n13882961 +n12016777 +n02793684 +n12717644 +n01380754 +n07724173 +n04055861 +n11831297 +n03059934 +n03370646 +n10065758 +n09459979 +n07913644 +n04322531 +n03457451 +n02567633 +n04240867 +n10693334 +n10556704 +n04614844 +n07909362 +n12082131 +n09268007 +n04359217 +n09883807 +n02292085 +n04052346 +n03431570 +n02843465 +n04584056 +n04432043 +n09846142 +n07864317 +n04475749 +n04227050 +n04280845 +n03535284 +n07890617 +n03217889 +n02806762 +n11967315 +n11762927 +n02501923 +n03442487 +n09690083 +n02964634 +n02920164 +n07855317 +n10196725 +n03042829 +n11662937 +n12183816 +n12311224 +n13884261 +n02243209 +n03140771 +n02385002 +n03071288 +n12936826 +n04583022 +n07859142 +n04578112 +n04467506 +n12938081 +n09982152 +n12555255 +n03335333 +n10104888 +n12151170 +n12709349 +n10456138 +n02237868 +n07620327 +n12561309 +n12341931 +n12350032 +n01775730 +n12950796 +n01440242 +n04261767 +n10568915 +n12285195 +n07589872 +n13112035 +n07840395 +n11750508 +n12286197 +n03336168 +n03325288 +n02551134 +n04293258 +n13130014 +n07733124 +n04451139 +n11985903 +n03602365 +n11722342 +n11944751 +n12897999 +n02277422 +n03101302 +n07608245 +n03531982 +n01997825 +n11713370 +n04442582 +n02833403 +n04427857 +n01648356 +n10645223 +n10414865 +n10696101 +n12885045 +n10037080 +n12218274 +n07570530 +n04493259 +n10659042 +n10577710 +n03141612 +n10582604 +n00446632 +n02834642 +n07568389 +n04583888 +n04096848 +n12879068 +n04495051 +n09837459 +n12216215 +n03702440 +n10174695 +n10559009 +n10577182 +n07686299 +n04269668 +n02404028 +n03720665 +n09885866 +n03082450 +n12492682 +n12780563 +n03703463 +n02644360 +n02307910 +n01374703 +n04402342 +n04264134 +n03158414 +n04443433 +n12522894 +n10803978 +n11706942 +n10751026 +n13143758 +n02972934 +n04174234 +n12718995 +n11994150 +n11545350 +n12526754 +n07753448 +n02870772 +n11942659 +n11744108 +n12735160 +n12229887 +n04970312 +n02874336 +n10721819 +n13193269 +n03330665 +n09865162 +n10306595 +n12161744 +n03303669 +n07846688 +n02168427 +n01961600 +n03559531 +n09826821 +n03413124 +n09695019 +n03783873 +n11863877 +n13874558 +n02283617 +n11895472 +n13182799 +n07854614 +n03283827 +n01397690 +n02650413 +n09809279 +n10290541 +n10383505 +n11724660 +n07689757 +n10181547 +n07620597 +n11979354 +n02771547 +n13061471 +n12631637 +n11966385 +n03969510 +n11735977 +n07621497 +n12956588 +n03217653 +n04546081 +n11696450 +n10300654 +n02032769 +n01654863 +n09779280 +n02390258 +n03887512 +n10489426 +n10745770 +n10713843 +n03602194 +n10710913 +n07864475 +n04486322 +n07915213 +n08663051 +n10236842 +n02390738 +n02388453 +n03598385 +n12228689 +n11771746 +n12803226 +n11242849 +n02378149 +n10427223 +n05448827 +n11870044 +n12477983 +n12311413 +n03500090 +n10280034 +n02685365 +n03652389 +n12728656 +n07695284 +n09961198 +n03780799 +n03935883 +n01612955 +n12475774 +n02701730 +n07833535 +n12584365 +n03902220 +n12727960 +n10619492 +n04450465 +n10646780 +n10110731 +n04142175 +n12296735 +n09337048 +n12681579 +n12819354 +n12541403 +n04305016 +n12798910 +n10321126 +n08618831 +n09721244 +n02225798 +n01637338 +n12218868 +n05545879 +n12022382 +n03972372 +n02505063 +n01694311 +n10695450 +n10081842 +n12297507 +n07592922 +n12118661 +n01952712 +n10517137 +n01340522 +n07719330 +n03729482 +n04168541 +n03090710 +n07873679 +n07828378 +n07728284 +n10343088 +n07869937 +n14585392 +n01453475 +n12095412 +n04973020 +n12810007 +n07564515 +n01599741 +n11629047 +n09937802 +n12450607 +n12460146 +n02292401 +n03632963 +n09617696 +n12545232 +n02874642 +n09934488 +n10091349 +n01447946 +n05469861 +n11830400 +n03382533 +n02608547 +n12697152 +n03542727 +n10716576 +n03664159 +n07568625 +n02976815 +n13147532 +n02336826 +n12432574 +n07686461 +n04107598 +n02505998 +n09849167 +n03688066 +n02836513 +n01576358 +n01893021 +n12017511 +n12065649 +n01714231 +n11662585 +n12827907 +n12954353 +n11936199 +n01368672 +n03843883 +n12184095 +n10058411 +n11684654 +n08506347 +n10579549 +n01423302 +n11604046 +n07613158 +n03605504 +n02090129 +n02284224 +n01958435 +n12664469 +n04459122 +n09617161 +n09780828 +n11830252 +n12870048 +n04247544 +n09871095 +n02962938 +n09933020 +n13064457 +n10341243 +n07694169 +n13200193 +n07765728 +n01524761 +n07730562 +n07751737 +n07740855 +n04192521 +n12593122 +n07841037 +n02809736 +n10604275 +n12512095 +n01907287 +n04592596 +n09823153 +n03181667 +n12449784 +n07908923 +n12365900 +n03053976 +n15060688 +n04165675 +n02530637 +n09816654 +n12540966 +n07934152 +n09290350 +n03455802 +n10111779 +n01351315 +n10281770 +n13862552 +n12435486 +n12370174 +n12296045 +n03493219 +n12363301 +n11973749 +n03939565 +n02938321 +n13209270 +n12604639 +n12657755 +n03604536 +n10328941 +n04278932 +n10376890 +n01884203 +n02061853 +n04256318 +n07831821 +n10585217 +n07591813 +n10210648 +n07739035 +n01632308 +n10319313 +n02861777 +n03821145 +n13029610 +n04239900 +n10313441 +n04951716 +n10628097 +n02368116 +n08571275 +n04433377 +n10458596 +n12435965 +n12448136 +n12129986 +n04295777 +n07898895 +n07854266 +n12327846 +n12318782 +n07825850 +n10414239 +n11731157 +n04409911 +n10655442 +n11829205 +n01738306 +n02840515 +n04150371 +n03369512 +n02645538 +n12773917 +n07818422 +n03227010 +n10303037 +n12942025 +n12406304 +n06616216 +n02435216 +n12981954 +n03683341 +n09703809 +n07722666 +n11817160 +n10110893 +n10228468 +n03572631 +n01378545 +n02130086 +n04388574 +n11960673 +n12956922 +n11924014 +n09895902 +n03426462 +n07759576 +n02563949 +n03466947 +n02522637 +n09480959 +n02033882 +n02451415 +n12677120 +n10580437 +n04425977 +n03057841 +n12285512 +n07614348 +n03144873 +n03391613 +n12366870 +n02304657 +n07863935 +n07909714 +n02413717 +n12591702 +n07838659 +n02967407 +n12016914 +n02735268 +n09470027 +n10222259 +n03899100 +n10513509 +n11620016 +n12600267 +n04368840 +n03016209 +n04085017 +n03215076 +n10238272 +n09782855 +n07586179 +n12434483 +n12452480 +n01990516 +n12030092 +n11739978 +n12714254 +n13036804 +n07727377 +n07879560 +n03710421 +n12128490 +n11968519 +n03250588 +n10173579 +n03114041 +n02942015 +n12729164 +n07871065 +n02591330 +n09353815 +n10138472 +n02712545 +n12866333 +n07835823 +n03508485 +n01758895 +n02925385 +n03321419 +n09931418 +n02846874 +n12500518 +n07587819 +n03160186 +n04974340 +n13067532 +n11940349 +n13027879 +n02878534 +n10055566 +n07925708 +n12628356 +n11958499 +n03472672 +n04233295 +n04563020 +n03426871 +n04330109 +n03677682 +n04129766 +n02884859 +n12692521 +n10188856 +n03500971 +n10355306 +n12407545 +n11955040 +n10028541 +n10345659 +n14720833 +n09641578 +n12613706 +n11718296 +n03380301 +n01334217 +n03890358 +n03583419 +n12447121 +n09660010 +n11826569 +n11837351 +n12096089 +n03871860 +n01821554 +n12834938 +n02738449 +n02644665 +n03316873 +n12548564 +n03605417 +n12094401 +n13152339 +n03004531 +n03080904 +n03535647 +n12349315 +n04213264 +n07860208 +n01526766 +n03710937 +n11806521 +n10618234 +n12306938 +n10473562 +n10050880 +n04596116 +n02577164 +n04479694 +n07936093 +n07834286 +n12175181 +n03986857 +n02919648 +n12055073 +n04567593 +n07585015 +n12771085 +n10551576 +n09778783 +n01593282 +n02406952 +n12331263 +n10629329 +n12287195 +n07729225 +n07828041 +n01880473 +n12257725 +n02696246 +n07853232 +n11936864 +n09745229 +n03364156 +n04503155 +n03194297 +n04003359 +n07607361 +n10106387 +n10306890 +n10455619 +n01647180 +n07740115 +n12106323 +n03626272 +n11685621 +n11866706 +n04321121 +n01606978 +n12621619 +n11615259 +n07840304 +n02841847 +n05459769 +n03432360 +n04604276 +n12356395 +n12468545 +n03645168 +n00477827 +n03459591 +n04202142 +n12959074 +n07881625 +n12382233 +n02405692 +n12299640 +n12247202 +n12628705 +n12534625 +n09264803 +n12176953 +n09835017 +n10390807 +n04975739 +n12474418 +n11931135 +n07917791 +n10636488 +n09690496 +n11993675 +n03703203 +n11794139 +n13015688 +n04168084 +n01948446 +n10169419 +n04455048 +n04973669 +n12840502 +n12120578 +n10448455 +n01386007 +n02288122 +n01441910 +n02278463 +n03108759 +n02753710 +n03143400 +n13080866 +n13917785 +n13124358 +n13220663 +n02475358 +n01925916 +n02684649 +n10451590 +n03869976 +n03881305 +n07928264 +n01422185 +n04035634 +n11996677 +n04261369 +n12925583 +n12764008 +n09972587 +n03708962 +n01791388 +n02892626 +n04098399 +n07823369 +n07752874 +n13225244 +n03376771 +n01771766 +n13146403 +n12157179 +n13897198 +n07770869 +n13240362 +n07610502 +n03688504 +n02896856 +n12543186 +n09967063 +n05453412 +n12590600 +n02378870 +n07568241 +n01687290 +n00474769 +n11694866 +n02338722 +n02637977 +n04567746 +n10586444 +n11907405 +n03421960 +n07605693 +n10384214 +n12877637 +n12018363 +n10056611 +n13882487 +n12140759 +n04114301 +n11762018 +n12678794 +n11817501 +n02116450 +n12018530 +n03324629 +n12726528 +n03155502 +n10493199 +n04181083 +n10609198 +n04328703 +n03045074 +n07769886 +n01892385 +n12828520 +n03165211 +n11800565 +n07567139 +n13877547 +n12829582 +n02949084 +n07589724 +n01746191 +n12395463 +n05459457 +n10565502 +n11981475 +n09310616 +n12327022 +n02313709 +n12957803 +n11865276 +n12955414 +n12939479 +n13225365 +n07936459 +n03139089 +n07577772 +n12057895 +n03620353 +n12152031 +n01885158 +n04096733 +n12626674 +n10464711 +n10675609 +n07752782 +n03709960 +n02540983 +n02285179 +n01903234 +n07835701 +n04421083 +n02352290 +n09421031 +n03349367 +n02539894 +n04052235 +n07922955 +n03941887 +n04234260 +n04423552 +n11975254 +n08501887 +n12489676 +n04574348 +n10602119 +n02163008 +n02748491 +n10024937 +n10033888 +n12605683 +n01790398 +n10128519 +n14977188 +n10293590 +n12077244 +n09741074 +n11694469 +n12692714 +n12159804 +n12533437 +n03831203 +n03692004 +n09462600 +n04537436 +n06618653 +n07913537 +n12783316 +n10038119 +n10236521 +n01486540 +n07875267 +n04345787 +n07681355 +n13028937 +n03607186 +n07863107 +n12387103 +n09830926 +n03574416 +n04478383 +n11685091 +n03197446 +n03225458 +n09741722 +n07736527 +n02857907 +n10177150 +n12711398 +n10308275 +n02418770 +n02577662 +n09935107 +n03362639 +n12446908 +n04329681 +n04114428 +n09624899 +n12913144 +n12338034 +n02341616 +n12360817 +n12907857 +n02414904 +n05482922 +n11974888 +n04127117 +n12581110 +n04368365 +n01699254 +n12525753 +n04254450 +n11951052 +n12458874 +n12721477 +n07562651 +n02239192 +n10533874 +n12006306 +n09537660 +n10008123 +n02788386 +n03248835 +n04491312 +n11795580 +n04025633 +n10166189 +n07703889 +n11824747 +n07605198 +n12134836 +n03591116 +n02946753 +n13212025 +n11742310 +n02328820 +n02985606 +n09955944 +n12679432 +n10020366 +n12013035 +n02942147 +n04172512 +n11802410 +n10789709 +n03385295 +n02039497 +n01416213 +n11940750 +n12178780 +n01967963 +n12662379 +n12217851 +n02812631 +n12432069 +n09991740 +n03089477 +n12458713 +n03876111 +n10311661 +n12286068 +n02838958 +n11936369 +n03716228 +n13228017 +n06276902 +n12677331 +n04330189 +n10488016 +n12011370 +n04343740 +n07893792 +n02171164 +n03963483 +n12080588 +n07577657 +n12936155 +n03809686 +n04223066 +n04086066 +n12776558 +n07813579 +n01841943 +n12285705 +n02581482 +n11653570 +n10010632 +n04305947 +n12228886 +n12797368 +n01404495 +n09697986 +n11882237 +n10077879 +n07607832 +n09779461 +n13212379 +n10769188 +n10715789 +n01480106 +n02145910 +n04275093 +n01983829 +n01978010 +n09937903 +n11976314 +n11785276 +n12386945 +n04445782 +n10712374 +n10706812 +n10194775 +n12655062 +n10739135 +n02597972 +n02307176 +n04121342 +n02350670 +n12698027 +n02805845 +n02895008 +n13149970 +n03451365 +n04542595 +n07803895 +n07864198 +n09690864 +n03844550 +n12378249 +n10345422 +n13163553 +n10457903 +n10783539 +n10539015 +n11757017 +n10274173 +n08652376 +n10283546 +n04541777 +n02824152 +n12945177 +n02082056 +n03695957 +n07936015 +n07591162 +n03628071 +n02990758 +n07685118 +n04023422 +n04951875 +n03541393 +n10289176 +n04039209 +n07913180 +n07910799 +n12017853 +n03732543 +n10656120 +n10512859 +n04556664 +n12464649 +n12927758 +n12078451 +n07878145 +n10561320 +n12467592 +n07689217 +n07619881 +n11935187 +n09837720 +n03642144 +n12220019 +n02983507 +n03271260 +n02778588 +n10193650 +n01654083 +n02746978 +n10202763 +n02953552 +n07924366 +n08583554 +n02905886 +n07855603 +n09745834 +n12366053 +n04140539 +n03383211 +n11648268 +n03352961 +n12116734 +n07771539 +n07836077 +n03842754 +n11683838 +n03004409 +n11730750 +n13098962 +n12292463 +n02867592 +n01653026 +n07583865 +n12548804 +n12702124 +n03917048 +n12677841 +n12511488 +n04217387 +n12495670 +n03554375 +n12403513 +n08558770 +n02781764 +n12339526 +n12742290 +n01404365 +n03591798 +n12446737 +n10494195 +n12110352 +n01672611 +n10493922 +n03638623 +n09910840 +n02238594 +n02575325 +n13186546 +n11873182 +n10344774 +n04094060 +n10417682 +n02749169 +n02428089 +n04549721 +n03824284 +n12107002 +n12784371 +n09986904 +n01634227 +n07826544 +n12253487 +n01679005 +n12516165 +n09339810 +n03126090 +n07803408 +n11883945 +n03842276 +n03397412 +n03280216 +n12264786 +n02545841 +n11877860 +n01830479 +n13207923 +n12490490 +n02542958 +n04114719 +n12590715 +n13226320 +n11644872 +n04119630 +n10176913 +n04213105 +n11652966 +n12546420 +n12625823 +n11897466 +n02092173 +n10567613 +n04953678 +n10059067 +n12408466 +n03056288 +n13036116 +n04169597 +n12467197 +n02569905 +n02758490 +n12623211 +n04077889 +n04959061 +n04183957 +n11689815 +n03777126 +n03306869 +n07720084 +n02659478 +n12947756 +n04341288 +n04448185 +n04037076 +n09828988 +n03346289 +n04174705 +n13126050 +n04255346 +n09764732 +n11773628 +n14891255 +n04314107 +n02184720 +n02646892 +n04320598 +n01979526 +n03191451 +n03662452 +n10290422 +n01739094 +n02305636 +n04202282 +n05459101 +n02766168 +n09994808 +n03528100 +n10475940 +n03005619 +n12639168 +n02144936 +n13202125 +n10703221 +n03770834 +n12324056 +n03474167 +n02609302 +n12166929 +n12852570 +n12920719 +n12508762 +n11983375 +n01422450 +n12616630 +n09681107 +n10486561 +n13038577 +n12266644 +n02478875 +n02547014 +n02249809 +n03336742 +n12038760 +n01672432 +n09861287 +n03678879 +n01949973 +n09928845 +n02310149 +n12648693 +n10533983 +n12812801 +n04550676 +n01800633 +n12128306 +n12744142 +n13140367 +n07803213 +n07688265 +n13068434 +n02030568 +n12955840 +n01625121 +n13215258 +n04270576 +n02680638 +n02817251 +n01539272 +n04066023 +n12969927 +n10280598 +n04001661 +n09774167 +n10358575 +n01836673 +n02290664 +n09940725 +n12447581 +n07803779 +n04561965 +n10151261 +n01538362 +n10170060 +n13160365 +n09823287 +n12554729 +n10620212 +n11935027 +n03465605 +n03227856 +n08519299 +n07785487 +n03522863 +n02861286 +n12200905 +n04269502 +n02104184 +n07612273 +n01390763 +n11872658 +n12981086 +n10244359 +n01738731 +n12117235 +n12846690 +n02861658 +n08782627 +n09832633 +n02531114 +n01394492 +n03269073 +n03077442 +n09794668 +n13884384 +n08659331 +n02556373 +n02587877 +n03523506 +n03723153 +n12024805 +n13061172 +n03978575 +n07914686 +n13134844 +n12183026 +n03573574 +n03765128 +n03319167 +n01920438 +n07852452 +n07680655 +n03017698 +n12959538 +n04261506 +n01793340 +n03292362 +n12817855 +n03593222 +n01962506 +n12453018 +n04027367 +n12518481 +n09223487 +n07871335 +n03779246 +n09668562 +n01889849 +n02492356 +n07830841 +n03277149 +n09968652 +n03092476 +n10400205 +n06263202 +n07595368 +n12767208 +n02196896 +n12580012 +n10265801 +n02103181 +n02922461 +n01731277 +n12422559 +n04278605 +n02250280 +n03283413 +n11829922 +n10191613 +n02493224 +n04427559 +n12181352 +n12742878 +n10683675 +n04503705 +n03785142 +n12816942 +n10723230 +n11936707 +n12360534 +n12909759 +n03766218 +n02696843 +n11935877 +n07828156 +n10617397 +n12921499 +n13158714 +n10166394 +n12370549 +n03505015 +n12769065 +n02636550 +n10781236 +n09869317 +n10275249 +n04234763 +n10735173 +n13137225 +n02070776 +n04232312 +n07575226 +n03471030 +n07909954 +n02633677 +n01662060 +n07563642 +n04263950 +n11824344 +n13178707 +n02972714 +n10417288 +n12092930 +n11993203 +n10170681 +n03726116 +n03215337 +n12564613 +n14975598 +n07758125 +n03123666 +n07717714 +n01421333 +n02359667 +n09403086 +n03857026 +n12759668 +n02628259 +n02307515 +n12146488 +n09777870 +n07819303 +n12105353 +n10784113 +n11802995 +n12561594 +n02845130 +n12100187 +n03507658 +n02141611 +n01800195 +n03470005 +n12444898 +n02203592 +n09707061 +n00475142 +n12216628 +n01732093 +n02581642 +n03803780 +n12114590 +n04541662 +n12267133 +n11652753 +n07859951 +n04524594 +n12843144 +n04040540 +n10604880 +n12559044 +n03063834 +n12394328 +n12704513 +n10230216 +n10756641 +n02101670 +n12309630 +n03070587 +n11626010 +n04239639 +n01638329 +n01928517 +n13144084 +n10420649 +n03102516 +n12395289 +n09833111 +n01651285 +n11688069 +n12881913 +n12783730 +n07716649 +n03618678 +n10344203 +n03626502 +n10718665 +n03577474 +n01683201 +n03246653 +n12153224 +n02519472 +n02470709 +n15090238 +n03129636 +n07774295 +n04577567 +n09995829 +n09662038 +n10297367 +n03555862 +n12531727 +n09947127 +n12533190 +n04062807 +n00479734 +n12860978 +n01884104 +n09866559 +n12069009 +n04595501 +n12088495 +n02909053 +n12283790 +n02180427 +n10697282 +n07562881 +n13092078 +n11706325 +n01746952 +n01978136 +n07731436 +n02386746 +n12648424 +n12726357 +n10314182 +n07839172 +n11753562 +n12903503 +n12589687 +n02375438 +n03604763 +n11549895 +n13202602 +n12304420 +n10738215 +n12220829 +n10095420 +n12177455 +n11887476 +n04006411 +n09838370 +n02853218 +n12688372 +n03335461 +n02800940 +n03036701 +n09885059 +n10206629 +n11922926 +n01678657 +n12192132 +n12248141 +n03108624 +n01936671 +n02417242 +n03222857 +n03768823 +n04343511 +n03538817 +n12655726 +n12521186 +n01330497 +n12767423 +n12965951 +n09695132 +n04410886 +n12599874 +n07865700 +n07596160 +n10227698 +n03224490 +n11598886 +n02948293 +n09906293 +n12247963 +n03301175 +n03895170 +n04259468 +n07808806 +n13147689 +n09856827 +n13882639 +n02241008 +n03842585 +n02883101 +n12182276 +n13918717 +n12728164 +n10634464 +n02477187 +n03107716 +n02342250 +n01479213 +n12793695 +n09808080 +n10707707 +n04161010 +n02836607 +n10076483 +n07726386 +n03872273 +n10250712 +n07688412 +n13884930 +n12301766 +n10196404 +n07591330 +n03814727 +n09610255 +n12757115 +n09814381 +n02397987 +n07886317 +n03959123 +n02185167 +n03533845 +n11838413 +n10227393 +n07704305 +n03580615 +n02663485 +n10101981 +n04346855 +n10067011 +n04464125 +n02829510 +n10007995 +n07845775 +n03004713 +n02450561 +n09905530 +n10361060 +n12394638 +n12095934 +n10479135 +n03145277 +n12246773 +n13194212 +n04475900 +n03252787 +n14867545 +n10485298 +n09961739 +n02149653 +n01553762 +n03931980 +n02344408 +n11676850 +n04034367 +n04235646 +n12867184 +n12625670 +n12763529 +n07593107 +n04351550 +n02571810 +n13899735 +n03652826 +n09495962 +n03421768 +n04205062 +n11918808 +n07745197 +n07752264 +n01892744 +n04609811 +n10278456 +n11790936 +n09754152 +n13234519 +n09820044 +n00440643 +n02350357 +n03779884 +n07803992 +n03305953 +n01836087 +n10068234 +n10690421 +n03134394 +n12380761 +n12801966 +n03134232 +n02596720 +n07591236 +n11882821 +n02312175 +n02387983 +n01912152 +n10805501 +n12718074 +n03188290 +n02776505 +n10528148 +n09971385 +n10524223 +n09958292 +n02721813 +n10300829 +n12007766 +n12107191 +n04449700 +n02987950 +n11878633 +n12328801 +n04551833 +n10567722 +n11654984 +n02808968 +n12066451 +n02964075 +n11633284 +n02434712 +n03070854 +n07926540 +n01543936 +n10091861 +n09938080 +n11976511 +n03342432 +n12886831 +n12509993 +n12958261 +n12730776 +n10066206 +n07846014 +n13176714 +n03332591 +n04607640 +n02513727 +n12138248 +n11964848 +n01318053 +n10553140 +n07839055 +n02632039 +n11865429 +n02286654 +n02367812 +n12093885 +n10774329 +n02296912 +n01729672 +n10353928 +n12033504 +n11936113 +n03263338 +n07822053 +n09737050 +n13875884 +n13212559 +n11690088 +n05468739 +n09344724 +n02507148 +n01377694 +n04172607 +n10464870 +n07804152 +n02825872 +n03139640 +n11858703 +n10227490 +n12334153 +n03616225 +n12018188 +n12399656 +n10235269 +n11840764 +n01995514 +n03326475 +n12704041 +n10684827 +n03006788 +n13906484 +n02868240 +n03614887 +n03491724 +n12124172 +n03675907 +n13170840 +n03983712 +n03254737 +n07836269 +n01784293 +n02095212 +n12470512 +n12219668 +n12920521 +n04492157 +n02950018 +n01922717 +n11797981 +n12601805 +n02744961 +n07814925 +n09798096 +n03939062 +n13891547 +n07564292 +n01590220 +n09295210 +n03997875 +n03479266 +n01491661 +n03781055 +n12528768 +n10657306 +n12014923 +n10094320 +n02532272 +n02224023 +n04541136 +n12067672 +n02661473 +n04233027 +n12399899 +n12889412 +n01736032 +n12551173 +n01337734 +n10104487 +n02921592 +n02148512 +n10216403 +n03276839 +n01781570 +n03999621 +n02505238 +n12537569 +n10433452 +n02351343 +n12365158 +n08539276 +n01897257 +n12221801 +n10557246 +n10437698 +n01803641 +n11836327 +n07813833 +n03468570 +n06277025 +n10040240 +n03692842 +n03017835 +n01881564 +n10487363 +n07937069 +n10597505 +n01638722 +n10160412 +n09825096 +n12611640 +n03098515 +n10654211 +n13196234 +n03436990 +n04058486 +n09814567 +n10758337 +n03515934 +n07688757 +n10269199 +n12627347 +n04521571 +n01636510 +n03220095 +n09982525 +n12768809 +n02340930 +n02473857 +n12336586 +n12125584 +n02833040 +n02498153 +n01467804 +n12120347 +n11650430 +n11953339 +n12592058 +n05102764 +n10575594 +n09722064 +n01966586 +n10619888 +n07852376 +n12650915 +n10321882 +n11974557 +n09847267 +n13201423 +n12337131 +n13185658 +n02150134 +n10538853 +n10471732 +n07836600 +n03526062 +n02512752 +n04232437 +n03367321 +n04308915 +n07600895 +n11539289 +n03539293 +n12699922 +n07817599 +n02781213 +n03594010 +n12035907 +n04075813 +n05233741 +n07863229 +n10735984 +n12095543 +n12272735 +n04229620 +n12240965 +n07768590 +n04420024 +n12111627 +n02861509 +n02595056 +n12183452 +n04607982 +n13213577 +n07741888 +n03750614 +n10043024 +n03372933 +n10051861 +n10199251 +n03249956 +n03984125 +n02956393 +n11619687 +n03356279 +n07833951 +n10715030 +n02340358 +n10768272 +n01494041 +n02592734 +n03323319 +n02136285 +n03995661 +n09945223 +n03547397 +n10044682 +n12878784 +n02803809 +n13160254 +n12726902 +n12196954 +n03161016 +n03105645 +n04218921 +n09493983 +n10719036 +n12263588 +n12565102 +n10684146 +n03148518 +n04287986 +n02340640 +n04331443 +n10727016 +n03369407 +n07824863 +n07844786 +n12467433 +n07582811 +n02964196 +n02197877 +n10758445 +n03271376 +n13212175 +n03260504 +n12777778 +n11973634 +n05467054 +n11946313 +n02462213 +n13906669 +n10520286 +n02074726 +n01771100 +n13880199 +n09811568 +n13883763 +n02334728 +n11831100 +n12025220 +n12751172 +n03858837 +n10127186 +n12831535 +n07823591 +n02513805 +n03662301 +n09913329 +n02749670 +n10655986 +n01787191 +n03199488 +n12732252 +n12253664 +n07735294 +n03440876 +n09650839 +n03844965 +n10341446 +n12688187 +n12961242 +n03423224 +n13157346 +n09802951 +n11948044 +n03489048 +n12279060 +n03664840 +n03731882 +n07742605 +n07870734 +n03949761 +n10759331 +n07739923 +n02737351 +n01788291 +n11780424 +n03722646 +n12297110 +n12363768 +n04495310 +n10008254 +n03934890 +n01318478 +n03609959 +n10070377 +n04123228 +n13068735 +n02909706 +n10671042 +n10491998 +n07650792 +n12664710 +n10213034 +n03455642 +n10411867 +n09903936 +n10121800 +n02622955 +n03647423 +n07596566 +n09654898 +n12248780 +n02684515 +n04255670 +n06273890 +n03495941 +n12960552 +n09724234 +n03861048 +n03293095 +n11835251 +n12852428 +n04084517 +n01814620 +n13159890 +n03147156 +n02311748 +n10237799 +n07584859 +n01946827 +n09651968 +n12241192 +n03669245 +n07858336 +n11932927 +n04444218 +n10526534 +n03642573 +n09470222 +n10731732 +n12001924 +n03786096 +n01359762 +n03824999 +n13877667 +n10591811 +n10574311 +n03275125 +n11631985 +n10539160 +n10502950 +n12499757 +n12432707 +n12068615 +n07689624 +n02610373 +n03204436 +n13051346 +n13134531 +n07610890 +n04021164 +n03502897 +n02299378 +n10417843 +n10050043 +n07929940 +n02593453 +n10577820 +n12870225 +n03333851 +n09463226 +n11741575 +n09193551 +n12012510 +n11987349 +n09215023 +n07924655 +n10060075 +n11999278 +n03933391 +n02602059 +n11993444 +n02337902 +n10149867 +n04441093 +n02868429 +n10629647 +n04192361 +n12029039 +n02768433 +n12078747 +n12730143 +n03255167 +n12492900 +n01709876 +n09672725 +n07870620 +n02315821 +n12277334 +n12204730 +n07852712 +n01319685 +n07802246 +n13031193 +n00812526 +n09658815 +n11982939 +n04264485 +n07893425 +n04094438 +n03285730 +n13182338 +n10724570 +n07832741 +n13210350 +n10654015 +n04058721 +n07875086 +n03462747 +n03994417 +n02889856 +n11957514 +n10109443 +n10478462 +n03064562 +n02477782 +n11920998 +n02138169 +n04227787 +n11797508 +n10753339 +n12928307 +n11921792 +n12643688 +n01833112 +n03919808 +n09817386 +n01903498 +n03848033 +n12031547 +n01035504 +n12324906 +n01911063 +n02588794 +n03749634 +n03539754 +n02242455 +n03079616 +n03246312 +n09705671 +n07860629 +n10458356 +n10051761 +n09709531 +n02867401 +n12522678 +n13150378 +n04462576 +n03462315 +n03712981 +n07607027 +n10581648 +n02957427 +n04271793 +n02253913 +n12824735 +n11697802 +n02161588 +n12463975 +n02361090 +n09784564 +n09680908 +n03512452 +n13214217 +n10712690 +n04023119 +n07814007 +n09833751 +n12885265 +n02259987 +n11933903 +n03628831 +n11967142 +n02533545 +n03900301 +n07919787 +n12793886 +n10768148 +n03071552 +n02780315 +n12193665 +n03378442 +n04486616 +n07832307 +n03164192 +n12786273 +n04261868 +n12655351 +n12320414 +n04371979 +n10630093 +n13052014 +n01357328 +n07879821 +n09753348 +n03796974 +n11701302 +n11678299 +n04022434 +n11610823 +n07726009 +n04117639 +n10474343 +n11888061 +n01842788 +n10435251 +n03343047 +n03383378 +n12750767 +n09662661 +n05241485 +n10000459 +n12220496 +n02246941 +n12676370 +n02253264 +n07766409 +n02940289 +n12089320 +n10363573 +n12922119 +n09783537 +n11695285 +n12331066 +n12573647 +n10218164 +n12509821 +n07862946 +n12818601 +n02589316 +n13191620 +n03758992 +n12112337 +n10733820 +n02898093 +n02645953 +n10150794 +n04595762 +n02344918 +n13132756 +n12859153 +n12138444 +n04211001 +n12935166 +n07830493 +n10142166 +n11951820 +n03018848 +n01453742 +n11985321 +n10000294 +n01362336 +n02328009 +n12639376 +n03090437 +n02204249 +n04312916 +n13127666 +n09684082 +n03432509 +n10274318 +n09704057 +n07593972 +n10074249 +n13157971 +n01638194 +n04036963 +n11708857 +n03418749 +n12589458 +n11899762 +n07683138 +n01601410 +n07854707 +n04279063 +n03239607 +n10302700 +n12520406 +n12576451 +n03881534 +n07565608 +n02349390 +n12569851 +n12249294 +n04059399 +n03530189 +n09357346 +n04325208 +n13159691 +n04045941 +n13898315 +n11992479 +n02353411 +n07825496 +n12922458 +n03115014 +n11761836 +n03323211 +n02793296 +n03492087 +n05241662 +n05491154 +n10419630 +n04506895 +n10546428 +n02907296 +n10769459 +n11647868 +n13188462 +n03825442 +n13209460 +n10742005 +n07599242 +n12361754 +n04570532 +n04131811 +n07756499 +n02598134 +n01910252 +n02910701 +n10129338 +n13871717 +n12673588 +n12565912 +n07562172 +n02711237 +n10775003 +n07695410 +n02637179 +n12930951 +n10261211 +n02906963 +n01366700 +n10642705 +n09846586 +n02779719 +n04978561 +n01369358 +n12114010 +n03521771 +n10667709 +n02296612 +n10722029 +n03500557 +n01365474 +n10472447 +n07585644 +n07609316 +n04013060 +n04505888 +n09726811 +n12692160 +n12378963 +n03585551 +n13139837 +n10167565 +n03799375 +n11990920 +n09640327 +n04502989 +n10108832 +n10561736 +n01897426 +n11766189 +n12462582 +n12913524 +n02684356 +n13200542 +n10466198 +n04331892 +n01478969 +n07837234 +n07692248 +n04552097 +n12382875 +n01484447 +n04120695 +n12681376 +n10293861 +n11965962 +n11788039 +n03959227 +n01832813 +n09918867 +n09942697 +n07587206 +n10459882 +n01347583 +n02267208 +n03951453 +n03006903 +n12126736 +n10286749 +n03395401 +n04605057 +n03467887 +n12755559 +n04020744 +n11629354 +n01647033 +n02780445 +n10205714 +n09439032 +n03138128 +n02763083 +n07835547 +n12251278 +n11949857 +n01635480 +n10675142 +n07845335 +n07751977 +n10332110 +n11871496 +n11764814 +n12229651 +n07760297 +n09865672 +n02919308 +n12218490 +n03782929 +n12231709 +n11909864 +n03144982 +n11799331 +n10433610 +n10483395 +n03206023 +n05442594 +n03626418 +n07870478 +n10171456 +n11964446 +n12796849 +n02126317 +n03797062 +n01412694 +n07610746 +n03581897 +n04479526 +n12447891 +n11906514 +n09699642 +n12873984 +n10586903 +n13234114 +n02436353 +n11889205 +n01460303 +n04400899 +n11884967 +n02140491 +n12215824 +n03586911 +n01394040 +n10691937 +n12371704 +n09668988 +n04362624 +n01740885 +n01337191 +n09714120 +n02185481 +n08555333 +n10704238 +n12430471 +n12034594 +n10012484 +n12088909 +n03205903 +n04129490 +n13090018 +n10712474 +n12234669 +n13016076 +n00454855 +n13882713 +n02644817 +n03192907 +n03519226 +n01561181 +n04583967 +n11732052 +n10732854 +n04480303 +n07934908 +n03825673 +n10621294 +n04354387 +n03374102 +n02922159 +n13158815 +n04000716 +n09685806 +n04427216 +n12051514 +n09712967 +n12081649 +n09748889 +n03252231 +n10704886 +n12897118 +n12525168 +n11728769 +n02731251 +n02548884 +n12403276 +n09627807 +n08679167 +n09663999 +n04247440 +n07711683 +n09909929 +n03415868 +n05244421 +n07680416 +n12757668 +n11935794 +n03483086 +n01860864 +n10755164 +n03675076 +n12004987 +n07566092 +n04078955 +n03379719 +n01916588 +n10138369 +n09755893 +n03649003 +n03977430 +n02309120 +n10616578 +n12242850 +n12388293 +n03292085 +n09919061 +n10302576 +n01497413 +n01936858 +n01377278 +n04358256 +n02667693 +n12125183 +n07758582 +n07813324 +n09737453 +n12745564 +n03855464 +n03166685 +n01446152 +n09801102 +n10561222 +n10576818 +n13915209 +n10474446 +n03845990 +n04237174 +n12531328 +n07855812 +n10763245 +n04614505 +n07905770 +n12051792 +n12653633 +n03593862 +n10359659 +n10436334 +n07853125 +n12911264 +n12265083 +n03638014 +n04444121 +n02706221 +n10563711 +n07808166 +n11799732 +n04093915 +n10451858 +n04410760 +n10075299 +n12740967 +n12635359 +n09611722 +n12902466 +n13915305 +n05542893 +n04440597 +n03675445 +n12315245 +n10646032 +n10047199 +n12775717 +n10365514 +n10590452 +n11616260 +n02812342 +n07856756 +n04570416 +n03565991 +n12215210 +n04330896 +n02388588 +n02266269 +n10760199 +n14714645 +n02742070 +n03565710 +n12609379 +n03420935 +n03441465 +n00453631 +n01963479 +n04362972 +n09863936 +n03961394 +n03009269 +n12297280 +n04561010 +n12192877 +n02981565 +n12134695 +n07855413 +n03232815 +n10180791 +n09932788 +n10571907 +n02109256 +n02660091 +n07865788 +n13228536 +n10306279 +n02635580 +n03634899 +n10262343 +n12296929 +n04393301 +n06281175 +n04485586 +n13103660 +n10510974 +n04166436 +n01634522 +n07596362 +n12700357 +n08597579 +n11744011 +n12238756 +n01790171 +n04571800 +n11867311 +n03464467 +n12241880 +n09961605 +n12592544 +n03170459 +n09938991 +n02692680 +n10295371 +n04331765 +n02612167 +n02520810 +n11977887 +n04094608 +n07722390 +n07832202 +n12448361 +n04612159 +n12186352 +n13161151 +n12654227 +n09868899 +n10104756 +n09920106 +n12981301 +n02610980 +n12545865 +n10673296 +n04110841 +n01704626 +n04055700 +n12117912 +n10519126 +n12443736 +n01697978 +n02148088 +n03012644 +n12091697 +n10395390 +n10509810 +n10462751 +n02896949 +n03836602 +n03928994 +n07718195 +n02473983 +n08571642 +n02648916 +n11970298 +n06274292 +n04613158 +n09856401 +n12811713 +n13111340 +n12122442 +n10095265 +n04445610 +n11631619 +n07863644 +n12022821 +n10315217 +n12549799 +n03386343 +n03121040 +n03558007 +n12272432 +n11798496 +n02522866 +n02952935 +n10741493 +n12143065 +n07883156 +n09616573 +n02289988 +n13161904 +n02588945 +n00451768 +n12375769 +n10777299 +n04495183 +n11930994 +n09970088 +n02254246 +n12276314 +n07857598 +n04428382 +n03789794 +n03383821 +n12980080 +n01447139 +n12880799 +n03501520 +n10764465 +n13143285 +n12727729 +n12444095 +n02354621 +n13174354 +n01691652 +n07732525 +n10437014 +n04368235 +n10371052 +n02611898 +n03597147 +n09912431 +n03135788 +n07888058 +n02409202 +n14582716 +n11934463 +n04395332 +n12558680 +n05257967 +n11798978 +n10617024 +n04102760 +n12132092 +n12988572 +n10390698 +n11887310 +n12063211 +n12952717 +n13141972 +n12176453 +n10245863 +n10509161 +n10389976 +n10333165 +n01474864 +n09274305 +n11888424 +n10368711 +n13222877 +n10469611 +n07582970 +n09700125 +n12805762 +n07865575 +n07853852 +n03628421 +n04482975 +n03099622 +n01349735 +n11943133 +n12736603 +n12197601 +n10597745 +n04418644 +n12689305 +n07755262 +n10598459 +n04312020 +n03195485 +n09776642 +n10596517 +n10223606 +n01923890 +n12703716 +n03465040 +n12372233 +n12528109 +n03571853 +n10802621 +n10204177 +n02320465 +n03976105 +n02214096 +n02148991 +n10377542 +n10697135 +n03538542 +n07582027 +n04517999 +n12180456 +n02838014 +n03977266 +n03818001 +n12191240 +n11648776 +n10773800 +n04475496 +n03945817 +n04682018 +n02994743 +n02787269 +n11650160 +n03834472 +n03389983 +n09797742 +n06209940 +n12525513 +n12672289 +n01893164 +n10710259 +n01892145 +n11773408 +n10554024 +n09864968 +n10699752 +n11631405 +n10414768 +n04430605 +n10742546 +n10738871 +n12857204 +n09309046 +n01724840 +n04123317 +n07881525 +n03868044 +n02140268 +n10708292 +n09838295 +n09797998 +n10710171 +n11814996 +n11938556 +n03543511 +n02151230 +n01515217 +n03533392 +n02039780 +n12810151 +n02335231 +n12152251 +n13225617 +n09801275 +n01978587 +n14821852 +n11742878 +n12679023 +n03521431 +n09679028 +n02021281 +n10784544 +n04421258 +n12492460 +n03720005 +n02541257 +n03889397 +n02888898 +n10659762 +n12045157 +n12712320 +n10369095 +n09721444 +n12769318 +n01703161 +n12697514 +n07836456 +n03905361 +n10660883 +n07769306 +n11893916 +n07846274 +n04110281 +n03655470 +n07740744 +n01363719 +n12540647 +n09896311 +n12842642 +n07755619 +n07754155 +n11548870 +n02868546 +n04215588 +n04288165 +n13201566 +n07721118 +n12018271 +n11903333 +n02909165 +n02662559 +n11658709 +n13063514 +n07725663 +n10179069 +n10776887 +n12637485 +n03814528 +n12542043 +n07833333 +n07820036 +n02746683 +n07925808 +n10349750 +n03154316 +n04155625 +n03232923 +n02116185 +n09998788 +n02821543 +n03410303 +n10656223 +n07916582 +n12880638 +n10408809 +n04612840 +n11805255 +n12044784 +n10497534 +n03458422 +n12873341 +n07808675 +n09476123 +n07611733 +n10598013 +n02214660 +n05469664 +n03952150 +n11855435 +n04375926 +n08523340 +n01642391 +n04007415 +n09756961 +n12891824 +n02894847 +n11698245 +n12906771 +n02894024 +n04131015 +n11882636 +n04386456 +n03291551 +n07837110 +n12462221 +n08540532 +n10299875 +n12705978 +n10448322 +n10487592 +n12175598 +n02272552 +n03833907 +n10383237 +n12758176 +n12729950 +n10061195 +n07816726 +n03241903 +n12239880 +n10380499 +n07855188 +n10207077 +n02770078 +n12961393 +n03778459 +n10734741 +n03485575 +n09958447 +n12337246 +n11830045 +n09866354 +n03209666 +n01470145 +n10395209 +n03872016 +n04267091 +n12888457 +n12104104 +n04088229 +n01964957 +n12002651 +n02503756 +n00481938 +n01908042 +n03378765 +n04193883 +n09862183 +n11861487 +n02520525 +n02081060 +n10386754 +n12693865 +n04514095 +n01325060 +n02460817 +n07568095 +n03651605 +n02561937 +n12844409 +n12888016 +n02974565 +n12439154 +n13018906 +n12071259 +n03897634 +n02863176 +n10603528 +n03493911 +n12887532 +n12944095 +n12794568 +n09980458 +n03503567 +n11783162 +n13123309 +n11729860 +n03702582 +n04280373 +n10086744 +n01790557 +n12627526 +n10552393 +n12092629 +n03888998 +n12751675 +n01442450 +n02479332 +n07726230 +n03642341 +n03142325 +n06263895 +n12088327 +n09703344 +n10528493 +n02820085 +n07737594 +n04090781 +n09901642 +n02328942 +n02724722 +n09866115 +n12658715 +n10481167 +n13135692 +n11850918 +n10205344 +n12361560 +n03698123 +n03284482 +n12106134 +n04441528 +n02591613 +n02581108 +n07856186 +n12197359 +n12900783 +n01725713 +n12012253 +n03907475 +n02170738 +n03694949 +n13238654 +n04611795 +n02782432 +n13191148 +n02741367 +n04170694 +n12770892 +n01973148 +n10080508 +n10161622 +n09808591 +n07912093 +n02059541 +n02779971 +n03857156 +n12945366 +n03055159 +n12758325 +n10067305 +n02597818 +n07808352 +n13147153 +n10679723 +n02271222 +n04012665 +n12942729 +n10349243 +n01377510 +n07800636 +n10654321 +n10219453 +n09961469 +n10732521 +n04479405 +n11632929 +n03856728 +n08658918 +n10327143 +n10754281 +n02085118 +n09691604 +n09952163 +n10082299 +n03872167 +n03733465 +n04138869 +n01425223 +n12066821 +n02177506 +n09892262 +n02896694 +n12983654 +n13224922 +n09658921 +n12744850 +n03639880 +n02943686 +n10660621 +n11936539 +n03698226 +n04519536 +n12392765 +n09319604 +n07567039 +n04160261 +n01802159 +n02838178 +n07746910 +n02266421 +n10240417 +n12542240 +n12550408 +n01445857 +n04132465 +n03569014 +n12666050 +n12362514 +n10676569 +n09702673 +n12885510 +n04447156 +n04396226 +n12240150 +n11639306 +n02249134 +n01340785 +n02833140 +n10027590 +n02142407 +n11996251 +n07874531 +n04340019 +n03166120 +n10420277 +n04465203 +n12738259 +n12831141 +n03998673 +n01385017 +n12842519 +n02587051 +n10753061 +n12505253 +n13906936 +n01989516 +n12640435 +n07852532 +n04243142 +n10261511 +n12853287 +n12239240 +n03973003 +n09983889 +n10345302 +n14804958 +n02354162 +n03049326 +n10443659 +n01318660 +n12787364 +n04253304 +n11941094 +n09283514 +n09393524 +n11865574 +n01531639 +n04409279 +n02859729 +n10712835 +n03694196 +n04343630 +n10331098 +n12929600 +n02826259 +n10171219 +n07735179 +n07594840 +n03709644 +n09950728 +n09859285 +n07718329 +n01418620 +n09858299 +n12395068 +n10011360 +n07763290 +n02643316 +n03596099 +n04422566 +n11958888 +n09650989 +n10318686 +n01333082 +n12886402 +n03781467 +n12667582 +n02923535 +n09988311 +n08663860 +n02508346 +n13885011 +n03939281 +n10772937 +n04485750 +n09871952 +n10291942 +n07759324 +n10174971 +n03666238 +n01937579 +n02308033 +n07847706 +n10371330 +n04124887 +n11853079 +n11941478 +n12647231 +n04601041 +n12718483 +n02902816 +n01941340 +n04066767 +n07617839 +n02254901 +n03488784 +n07834774 +n02524659 +n03367969 +n10783734 +n03422484 +n09776807 +n03970363 +n10131590 +n03433247 +n02622712 +n10206506 +n12061104 +n11936287 +n07874674 +n10061043 +n07828275 +n03764606 +n12236768 +n01826844 +n09741904 +n05454978 +n03591592 +n01441272 +n03736372 +n07585474 +n12762405 +n12943912 +n01894522 +n03218446 +n11846425 +n11689678 +n04147916 +n02375862 +n10409459 +n09287415 +n10113583 +n03261263 +n02817386 +n09869578 +n10550252 +n02532786 +n12031388 +n07937344 +n11612235 +n01571410 +n09402944 +n04234670 +n02603862 +n04196925 +n09999135 +n10468750 +n15093049 +n03003633 +n11650307 +n12312110 +n02525703 +n10501635 +n09751622 +n10114550 +n10103155 +n12829975 +n04004099 +n12419878 +n02082190 +n03328201 +n03093427 +n07845571 +n12655498 +n02558206 +n12563045 +n07573453 +n12324558 +n13016289 +n10601234 +n10310783 +n03531691 +n02135610 +n03168543 +n09985978 +n10615334 +n07839312 +n09985809 +n10142537 +n10417969 +n07869111 +n12514992 +n04327544 +n10326776 +n12583681 +n01476418 +n12840168 +n03852544 +n11713763 +n07824502 +n07858841 +n12256325 +n03036149 +n07883661 +n04500390 +n10170866 +n01835918 +n10760951 +n10720197 +n12330239 +n02135844 +n10210512 +n03217739 +n10802953 +n03136254 +n02161225 +n03961630 +n12927194 +n02251233 +n13891937 +n09945603 +n02695762 +n12181612 +n13234857 +n10175725 +n11346873 +n07934678 +n02318687 +n10251329 +n04112921 +n04001132 +n03042984 +n11704791 +n04246459 +n12193334 +n10718509 +n10371221 +n05278922 +n03265754 +n12186554 +n12481289 +n10521853 +n10748506 +n11729142 +n10143595 +n09422631 +n07562984 +n07850219 +n04193742 +n11997160 +n12002826 +n12820113 +n04132829 +n10272913 +n03358841 +n12610740 +n12384569 +n10725280 +n02746008 +n13148384 +n12635151 +n02337171 +n10350774 +n12308907 +n04542474 +n04339062 +n03549350 +n10240235 +n10556033 +n10214390 +n01791314 +n02801047 +n07817465 +n11610602 +n10315730 +n14592309 +n10249191 +n12453857 +n12579822 +n09833275 +n04051269 +n11552594 +n04088343 +n04565039 +n03930431 +n10679503 +n11899921 +n10295479 +n01357507 +n13036312 +n03404900 +n12523141 +n01816017 +n02020578 +n12661045 +n06262943 +n02775813 +n12921315 +n09751076 +n09834258 +n10585628 +n12885754 +n04411019 +n10342367 +n10368798 +n09672840 +n12729023 +n04578329 +n10325549 +n03680248 +n11920663 +n10416567 +n10011486 +n01643255 +n03193754 +n07823814 +n04055447 +n10660128 +n07765612 +n07612530 +n04205613 +n09677427 +n03989199 +n11100798 +n12721122 +n10000787 +n10382157 +n07724819 +n12928819 +n11631159 +n02608996 +n10516527 +n09703101 +n12290975 +n03470222 +n03810412 +n03729131 +n03356038 +n12692024 +n12614625 +n10789415 +n02333819 +n01722670 +n03885410 +n12038208 +n02294097 +n02608860 +n02500596 +n07909231 +n03254625 +n09681973 +n12221368 +n01893399 +n10025295 +n03194812 +n13181406 +n12249122 +n03447894 +n09795010 +n02187900 +n10139651 +n10631654 +n01792530 +n02569631 +n07853946 +n09907804 +n03263758 +n04214649 +n02450829 +n02431542 +n11998492 +n02651060 +n04101860 +n01806061 +n13901423 +n12903964 +n03968479 +n04268565 +n12601494 +n02083780 +n04570118 +n12247407 +n03337822 +n09878921 +n02369935 +n10022908 +n09667358 +n13160938 +n11937360 +n07741623 +n03705808 +n12241426 +n10478118 +n03805933 +n10343869 +n09391774 +n03482128 +n10357737 +n10334461 +n09675045 +n09662951 +n10174253 +n01815270 +n13873361 +n04432785 +n09778927 +n10671898 +n05571341 +n10033572 +n09864632 +n10618465 +n03437184 +n12786464 +n01723579 +n11798270 +n07742415 +n02143142 +n10548419 +n03695122 +n02518622 +n04605446 +n10218292 +n11832671 +n12646950 +n03382708 +n09844898 +n09674786 +n01472502 +n07616906 +n09763272 +n03982767 +n10005006 +n03059236 +n01816474 +n03725869 +n01979269 +n04226322 +n13236100 +n03920384 +n11852148 +n04373563 +n04324120 +n11686652 +n03036341 +n02142898 +n09783776 +n13147918 +n03465320 +n07855721 +n10336411 +n10438619 +n07750299 +n12237152 +n03559373 +n10077106 +n10169796 +n09828403 +n09959658 +n12464128 +n12934685 +n04221673 +n02617537 +n11689367 +n10180580 +n07813717 +n12529905 +n02340186 +n01400247 +n11749112 +n04404072 +n03135656 +n12098827 +n12481150 +n10023506 +n03500838 +n01564101 +n04009923 +n10023264 +n03908456 +n03206405 +n07590068 +n09958133 +n10755394 +n01423617 +n11511327 +n10536274 +n01965252 +n11549245 +n11935627 +n09635635 +n03752071 +n07585997 +n03147084 +n12666159 +n09748408 +n03796848 +n01501948 +n02345078 +n12430675 +n03103128 +n11710987 +n03393199 +n09233603 +n10465002 +n04298765 +n01351170 +n02720576 +n03966582 +n10643837 +n12420124 +n10793799 +n01652297 +n09281252 +n11983606 +n10222497 +n11832899 +n02391617 +n12434106 +n03987674 +n02140179 +n07896560 +n04325804 +n10647745 +n01924800 +n10156629 +n03545961 +n03906789 +n01890564 +n10699558 +n12332218 +n03247495 +n11839460 +n03527675 +n12586725 +n13208965 +n02714315 +n02750320 +n04615149 +n12679876 +n12863234 +n03304323 +n12139793 +n11922755 +n12321669 +n04979307 +n01921059 +n09657206 +n13042134 +n04045787 +n11700279 +n02337598 +n01415920 +n01400391 +n13207572 +n10785480 +n02515713 +n12018100 +n02634545 +n03292736 +n02881546 +n12655605 +n03105810 +n10545792 +n03894933 +n09796974 +n10320484 +n12308112 +n11549009 +n13047862 +n14941787 +n12379531 +n10540252 +n11696935 +n12184468 +n12851860 +n12908854 +n10586265 +n12369066 +n10426630 +n12523850 +n03916289 +n04538878 +n09908769 +n02828115 +n07560422 +n10266016 +n03569174 +n06423496 +n10495167 +n03617834 +n09327538 +n10195056 +n10508379 +n13031323 +n11659248 +n04242315 +n10742111 +n10700963 +n12032686 +n09877587 +n07825597 +n07568991 +n11736362 +n12169099 +n13103750 +n03263640 +n12248941 +n10665302 +n01920051 +n09704283 +n11533999 +n04503073 +n11645163 +n10639817 +n09920901 +n06340977 +n03251100 +n10378113 +n03226090 +n10131268 +n02877513 +n13191884 +n02787120 +n11709045 +n02740061 +n12323665 +n02831998 +n10342180 +n12716594 +n04498275 +n09905050 +n03745487 +n07642833 +n10294020 +n10211666 +n12205460 +n02981198 +n01642943 +n07679140 +n04390483 +n10432875 +n09214269 +n10792506 +n10243483 +n13099833 +n10221520 +n13177768 +n04091584 +n10672540 +n10200246 +n13889331 +n02345340 +n10237556 +n01833415 +n01335218 +n09804230 +n09957523 +n05235879 +n10070449 +n10308653 +n10721708 +n04312654 +n10394434 +n12201938 +n12434775 +n07601025 +n02672152 +n10157271 +n02635154 +n12572858 +n13182937 +n10160188 +n03396997 +n10344656 +n02968210 +n10190516 +n07684422 +n03706939 +n07618871 +n02290870 +n03817331 +n03275311 +n12698774 +n04375080 +n07837630 +n04314216 +n11833373 +n07618684 +n03742238 +n12532886 +n03712444 +n11750989 +n10038620 +n09617577 +n03807334 +n10108089 +n01816140 +n10715347 +n02648035 +n13127303 +n02809491 +n02430748 +n12235479 +n01451863 +n01514926 +n10010864 +n01913440 +n09660240 +n11806369 +n01470479 +n12655245 +n07655067 +n03436772 +n11778092 +n03951800 +n10277815 +n07931733 +n01479820 +n03576955 +n07609549 +n12568649 +n05263316 +n02636405 +n01384084 +n03298352 +n07617344 +n09987045 +n10573957 +n07801709 +n02589062 +n02534165 +n02748359 +n09607782 +n07590974 +n02199170 +n02696569 +n09678747 +n12795209 +n13176363 +n10663315 +n10588724 +n09772330 +n10174589 +n12366313 +n11883628 +n07617447 +n01334690 +n03168663 +n11764478 +n08599174 +n03942028 +n12153033 +n03448696 +n12096674 +n10037588 +n03548320 +n09760290 +n10374541 +n09653438 +n10294139 +n10276942 +n12279293 +n12764507 +n12803958 +n10764622 +n02140858 +n07599068 +n10245507 +n12351790 +n12818004 +n10118301 +n03945459 +n09912995 +n12176709 +n03873996 +n10339179 +n10614507 +n10114662 +n10784922 +n03821424 +n04959230 +n13015509 +n12573911 +n11948469 +n09775907 +n12758014 +n01780142 +n09956578 +n12165384 +n10088200 +n10382480 +n04131113 +n09930628 +n09784160 +n11750173 +n13064111 +n03817522 +n12662074 +n03176238 +n12310021 +n11679378 +n09961331 +n02385580 +n11904274 +n03113505 +n10244913 +n02836900 +n09986700 +n11963572 +n13158605 +n10321632 +n02179891 +n02189670 +n10097995 +n10774756 +n10783240 +n10605737 +n02530052 +n10386196 +n10184505 +n09788237 +n03589672 +n12509109 +n10658304 +n12966804 +n12559518 +n03189311 +n01451295 +n12179632 +n12301613 +n10496489 +n03402785 +n10244108 +n02385676 +n03552001 +n03092053 +n02313360 +n02547733 +n02109391 +n01327909 +n04574606 +n03060728 +n07840124 +n10567848 +n10062176 +n02703124 +n10804732 +n12699301 +n04515890 +n07919665 +n10457214 +n09663248 +n03165955 +n12988341 +n03987865 +n03031756 +n10277912 +n10172080 +n09325824 +n03198223 +n09605110 +n10113869 +n11603462 +n03352366 +n11930203 +n09769929 +n12979316 +n02579762 +n09953052 +n03105974 +n00476140 +n11598287 +n02830157 +n10512201 +n09746936 +n10668666 +n02919976 +n09993651 +n02149861 +n09705003 +n10389865 +n11655152 +n10010767 +n10070563 +n03688832 +n10590239 +n11936027 +n02939763 +n03163488 +n03171910 +n09955406 +n03266195 +n10217208 +n09338013 +n07594250 +n03215930 +n09725935 +n10592049 +n03732658 +n12498457 +n09966554 +n10668450 +n10361525 +n04060198 +n11936624 +n02602760 +n03942600 +n03708425 +n10020533 +n12067817 +n07590177 +n01891274 +n11837204 +n01419332 +n03860234 +n12616248 +n07834160 +n09867154 +n09788073 +n12222493 +n03388990 +n04245412 +n10182402 +n11675404 +n10450038 +n13045594 +n13158167 +n13082568 +n12052267 +n12707199 +n07810531 +n07914887 +n13127001 +n02573249 +n08619112 +n10471859 +n09919899 +n03635516 +n12067029 +n03352232 +n07765517 +n10519984 +n02742194 +n03062798 +n13124654 +n09958569 +n02370137 +n10121714 +n04019335 +n07732433 +n02559383 +n12585137 +n09729156 +n10744078 +n09954355 +n03078506 +n10062042 +n10688811 +n02668613 +n03142205 +n10347204 +n10518349 +n09898020 +n12563702 +n05468098 +n10116370 +n07838905 +n03127024 +n03545585 +n12801072 +n09940818 +n04480995 +n10466564 +n02606751 +n10032987 +n10771066 +n01587278 +n11852531 +n01455461 +n10397392 +n02349205 +n10180923 +n09778266 +n04366832 +n10051975 +n10538629 +n09865744 +n12554029 +n13118330 +n12952590 +n04187751 +n09924313 +n10062594 +n01980655 +n10028402 +n02567334 +n10590903 +n10265891 +n10739297 +n01457082 +n03437581 +n03713151 +n03475674 +n05464534 +n11863467 +n06592421 +n12491435 +n14914945 +n10279778 +n03388711 +n10483890 +n10612373 +n03332784 +n02332954 +n02952798 +n13041943 +n01607309 +n04356772 +n07711799 +n12670962 +n12229111 +n07878479 +n12401893 +n07772413 +n12138110 +n09781504 +n07902698 +n02750652 +n13042316 +n12400924 +n02304797 +n03066464 +n12852234 +n10155222 +n05541509 +n10711483 +n04210858 +n02835551 +n12859679 +n02935490 +n03540476 +n05279953 +n09807075 +n09617435 +n03566860 +n10549510 +n10025391 +n10754449 +n11927740 +n03554645 +n01837526 +n02656969 +n08648917 +n07860548 +n01452345 +n04021704 +n07783827 +n10080117 +n02187554 +n03214966 +n10036444 +n04291069 +n12407396 +n02170599 +n09896826 +n12417836 +n07845495 +n02749292 +n03061819 +n03682380 +n10756261 +n10369955 +n09692125 +n09978442 +n04277669 +n10539278 +n09703932 +n01879837 +n02746225 +n13159357 +n11763874 +n10540656 +n07933530 +n12987535 +n02371344 +n10654827 +n09723944 +n12775393 +n11856573 +n12626878 +n12716400 +n09903639 +n09784043 +n03906894 +n10775128 +n03124313 +n10396727 +n02841641 +n10211830 +n12283395 +n03490784 +n14175579 +n04027935 +n12396091 +n02609823 +n01414216 +n09880741 +n11976933 +n03073384 +n09270160 +n11768816 +n12073217 +n11597657 +n09994878 +n11756329 +n12579404 +n03161893 +n01451115 +n07736971 +n02949356 +n03878418 +n12653436 +n10626630 +n12777892 +n13061704 +n10498699 +n03609786 +n03199358 +n10776339 +n10762480 +n13179056 +n10113249 +n04029913 +n12640081 +n10493835 +n11683216 +n03524287 +n04585626 +n02969527 +n12976554 +n08569482 +n10204833 +n12442548 +n02577952 +n09357447 +n10202225 +n02198129 +n11882972 +n10404426 +n01600341 +n12016434 +n09867069 +n10576223 +n09893600 +n01702479 +n04274686 +n04406552 +n02848118 +n02258629 +n03260733 +n03685640 +n11751974 +n09967555 +n06274546 +n09649067 +n10681557 +n07606933 +n03110202 +n11982545 +n10803031 +n02679142 +n04086937 +n10514255 +n04506402 +n03884554 +n09970192 +n10117017 +n12642435 +n10186686 +n02097967 +n03956531 +n11834890 +n02677436 +n10040698 +n11796188 +n03348142 +n04168472 +n02294407 +n12483282 +n09429630 +n04423687 +n09819477 +n09755555 +n10157016 +n03344935 +n07762373 +n12871859 +n09853541 +n09875979 +n13050705 +n02251067 +n10637483 +n03823673 +n10357012 +n03424204 +n04431648 +n01475940 +n02339282 +n10248198 +n07683265 +n13150592 +n10359117 +n10096508 +n03473078 +n13052248 +n10743356 +n03710079 +n10634990 +n04507689 +n07921090 +n02352002 +n03924407 +n03609147 +n02837567 +n03406759 +n03909658 +n10286282 +n12135576 +n01912809 +n10801561 +n10717055 +n03473465 +n03761588 +n03144156 +n09474412 +n10253611 +n12549420 +n02499568 +n09910222 +n10431122 +n12699031 +n01697749 +n11786843 +n03888808 +n12089496 +n10066314 +n10302905 +n12696830 +n09965787 +n11969806 +n04066388 +n13080306 +n03913930 +n09968259 +n10490421 +n10714195 +n07570021 +n10343449 +n10401204 +n03472796 +n10779897 +n11787190 +n03503097 +n10439523 +n12123648 +n04279858 +n10511771 +n09755788 +n08253141 +n02616397 +n12248574 +n01645466 +n04334504 +n07729142 +n05451099 +n10503818 +n10354265 +n09707735 +n02633422 +n11999656 +n01324916 +n02088745 +n09354511 +n10705448 +n09756195 +n10136615 +n10427359 +n09702134 +n12600095 +n04122262 +n10791820 +n03330002 +n02713496 +n11710658 +n09664908 +n02550203 +n02349847 +n12835766 +n04098260 +n11536567 +n11686780 +n12875861 +n12758471 +n09806944 +n11810030 +n10400003 +n10098388 +n11663263 +n10559683 +n07833672 +n10753989 +n10643095 +n01988869 +n03112240 +n12911914 +n09979913 +n09785236 +n09790047 +n02676097 +n01653509 +n04601159 +n01938735 +n10748142 +n12978076 +n11990627 +n10437262 +n12972136 +n04077594 +n10148825 +n02269340 +n12886185 +n03608504 +n11677485 +n10612518 +n12267265 +n10649308 +n05458173 +n10650162 +n03213361 +n02747063 +n01611674 +n02322992 +n01554017 +n03512624 +n12773142 +n12747120 +n09902128 +n03162714 +n03924532 +n10299125 +n12378753 +n02778131 +n09976024 +n13093629 +n10778999 +n07721833 +n12232851 +n07876775 +n10097590 +n03194170 +n13029122 +n04573832 +n12859272 +n09639382 +n07688021 +n02878796 +n10751710 +n03633632 +n07762534 +n10779995 +n13914265 +n13093275 +n10729330 +n10433077 +n03663910 +n10499110 +n02272286 +n10371450 +n01967308 +n12633061 +n11659627 +n12982915 +n10344121 +n10268629 +n02697876 +n09879552 +n10167361 +n10719807 +n04042076 +n01632952 +n03243625 +n02125872 +n10105906 +n12194613 +n03149810 +n10721124 +n03947343 +n02020219 +n10122531 +n01315330 +n08647264 +n00452734 +n03607527 +n10010243 +n09863749 +n04473275 +n11782878 +n03585337 +n09655466 +n12989007 +n11711971 +n10716864 +n10475835 +n10704712 +n01894956 +n10568443 +n12881105 +n10387836 +n10403633 +n08645318 +n03500457 +n10377633 +n10108464 +n09933972 +n02618094 +n11798688 +n04155735 +n09780395 +n12822466 +n04302200 +n11899223 +n10633298 +n02760298 +n12142450 +n10803282 +n10769321 +n10514051 +n10597889 +n11837562 +n02261757 +n01458746 +n09830759 +n10003476 +n09817174 +n10738670 +n10118743 +n12096563 +n03054491 +n12155773 +n10439727 +n04170384 +n03223923 +n12632733 +n07845421 +n10062905 +n11831521 +n04267985 +n12796385 +n04154854 +n00444142 +n09778537 +n03115663 +n04385157 +n10109826 +n02337332 +n09996304 +n09880189 +n12871696 +n11823305 +n02516776 +n12377494 +n08511017 +n04421417 +n10765305 +n09675673 +n03488111 +n03076623 +n11829672 +n10292316 +n10758949 +n13031474 +n02829353 +n10090745 +n09186592 +n12736999 +n12715195 +n11684499 +n03168933 +n09890192 +n10596899 +n12527081 +n10496393 +n10497135 +n02137302 +n03266620 +n12958615 +n12664187 +n02633977 +n04262869 +n04215800 +n13133233 +n02392555 +n09858733 +n10186350 +n01715888 +n03142099 +n08573674 +n11687071 +n02690715 +n03146342 +n12331788 +n08079613 +n10609092 +n12943049 +n12234318 +n02312325 +n12618146 +n10135197 +n11705573 +n02794368 +n02850358 +n09464486 +n01993525 +n03187153 +n10097262 +n02976641 +n12198793 +n12941717 +n10219240 +n12434634 +n03827420 +n10437137 +n10342893 +n04174026 +n10265281 +n07757874 +n10765885 +n01470895 +n02349557 +n11716698 +n03765467 +n10227793 +n07824268 +n12994892 +n10486236 +n02974454 +n10718349 +n11726145 +n09909660 +n03378593 +n07805006 +n09875025 +n02645691 +n10223069 +n03722944 +n04389999 +n02544274 +n10239928 +n04456011 +n10382302 +n01552333 +n10082562 +n12952469 +n09883047 +n10442573 +n01891013 +n10690268 +n13111504 +n02287352 +n03567635 +n10331347 +n09762385 +n09933842 +n02369555 +n12291459 +n09919200 +n01492860 +n02067768 +n10713254 +n10550468 +n12846335 +n03835729 +n12467018 +n11676743 +n03629643 +n12987423 +n10655730 +n08678783 +n10349836 +n10087736 +n10246703 +n10338391 +n04585456 +n04158138 +n10500942 +n09850974 +n10791890 +n10020807 +n03315805 +n02752917 +n04033801 +n10492086 +n04427473 +n02940706 +n12110475 +n09832978 +n12515393 +n07800487 +n09848110 +n02659176 +n09967406 +n10536134 +n10760622 +n09736485 +n07830690 +n07835173 +n09814252 +n10311506 +n10341955 +n03869838 +n07760673 +n09970402 +n12526178 +n11687964 +n09968741 +n10719267 +n07851054 +n10116478 +n10599215 +n09951524 +n03855908 +n03997274 +n02986348 +n08599292 +n02474282 +n04155889 +n09983314 +n01987727 +n10280130 +n10404998 +n02294577 +n02998696 +n08586978 +n11652578 +n13867005 +n12663254 +n10524869 +n02287622 +n10220924 +n03279918 +n02626089 +n10291110 +n12820669 +n07861681 +n08643267 +n07720185 +n12555859 +n03225616 +n09769525 +n03295140 +n12489046 +n10615179 +n12150969 +n02888429 +n10753182 +n10267166 +n03675558 +n12693352 +n02378299 +n02788462 +n03622401 +n12236977 +n10730542 +n12758099 +n10502046 +n11937195 +n10366145 +n10307114 +n12984595 +n10128748 +n09362316 +n09789898 +n09654079 +n04260192 +n10114476 +n08623676 +n10331841 +n05265736 +n10269289 +n03090856 +n12764978 +n02825240 +n10358032 +n09825750 +n03062651 +n11196627 +n11825749 +n04148464 +n04439505 +n07572858 +n04561857 +n12904562 +n03643907 +n10723597 +n01492708 +n10071557 +n10140683 +n01739871 +n12984267 +n03072056 +n10772580 +n10462588 +n11936448 +n10494373 +n12845908 +n09793352 +n10717196 +n12577362 +n09779124 +n10663549 +n02286425 +n10380126 +n01890144 +n02751490 +n03361109 +n01781875 +n13128278 +n09994400 +n09883452 +n13881512 +n02833275 +n10362003 +n01376543 +n12366675 +n09984960 +n10173665 +n10673776 +n02057898 +n01934844 +n04057673 +n10018747 +n02916065 +n13024653 +n05539947 +n09648911 +n04150273 +n01393486 +n10411356 +n12232114 +n02436224 +n12757930 +n03095965 +n10555059 +n01577458 +n09666476 +n10598904 +n11656549 +n02591911 +n13092385 +n10506336 +n13103023 +n09658254 +n04095938 +n11936782 +n07824383 +n09781650 +n10240821 +n01780426 +n02850060 +n02863340 +n13914141 +n12138578 +n13034555 +n12291671 +n12133151 +n04515444 +n04591359 +n02589196 +n02689819 +n11740414 +n07610295 +n10246395 +n09921034 +n12447346 +n12641180 +n01419573 +n04242587 +n07760395 +n03399579 +n09866661 +n02549376 +n11861238 +n01588996 +n04319545 +n09789150 +n03288643 +n10312491 +n03353281 +n02345997 +n09711132 +n03043173 +n02558860 +n03703590 +n03188871 +n12589142 +n12113323 +n09987161 +n05242239 +n09686262 +n09780984 +n09668199 +n09716933 +n11675738 +n04459243 +n11833749 +n10646942 +n07760070 +n10286539 +n04469684 +n13030616 +n03939440 +n01725086 +n09967816 +n10500824 +n13026015 +n03983928 +n02936921 +n04115542 +n10245029 +n12105828 +n12452673 +n10498046 +n10737264 +n11766046 +n04079603 +n10072054 +n12569037 +n10153155 +n09867311 +n02806992 +n10258602 +n10164025 +n10520964 +n02258508 +n12199399 +n05266096 +n08496334 +n10351064 +n12441552 +n12878325 +n13102648 +n02980625 +n03462972 +n12395906 +n13022903 +n11895714 +n03324814 +n11318824 +n01728266 +n07883510 +n10731013 +n10181799 +n12142357 +n09671089 +n11531334 +n01718414 +n04573625 +n10390600 +n11553522 +n01314910 +n04227519 +n10514784 +n02944256 +n12103680 +n03081859 +n11655592 +n12569616 +n10700105 +n09755086 +n03865820 +n01456137 +n10442232 +n02900987 +n04491934 +n07849026 +n04519728 +n09986450 +n03305300 +n10186143 +n02879422 +n03018614 +n10747548 +n10562509 +n10068425 +n12593341 +n11937692 +n08679562 +n09613690 +n10646433 +n12251740 +n10994097 +n13048447 +n03848537 +n12153741 +n12614096 +n11654438 +n09985470 +n10562968 +n02923915 +n10740594 +n07802767 +n12514592 +n10335801 +n03878674 +n12586499 +n10255459 +n02413824 +n10312600 +n02616128 +n12644283 +n04238953 +n04526520 +n01898593 +n09737161 +n03372822 +n09781398 +n10339251 +n02502807 +n10198832 +n10679610 +n13136781 +n11974373 +n11680457 +n10083677 +n04037298 +n09945021 +n09987239 +n02708885 +n13107807 +n10130877 +n12507379 +n08651104 +n12116058 +n10135297 +n04269086 +n03858533 +n10477955 +n04394031 +n10442417 +n10074735 +n03618797 +n03460455 +n04374521 +n10756061 +n08517010 +n12923108 +n02362194 +n01704103 +n10062492 +n01394771 +n10473789 +n10330593 +n02748183 +n12562141 +n09745933 +n02505485 +n11922661 +n12018014 +n09866922 +n04067143 +n13161254 +n07813495 +n01374846 +n10213429 +n03253071 +n02546028 +n01642097 +n01475232 +n03212247 +n10155600 +n11689957 +n11738997 +n10525878 +n03301389 +n10589666 +n01908958 +n10289766 +n03900028 +n03437295 +n02987823 +n02739123 +n10505347 +n02546627 +n10381804 +n10132502 +n10336904 +n10189597 +n09786115 +n12875697 +n10761519 +n01470733 +n02875626 +n12111238 +n07862770 +n07856895 +n09996039 +n03368048 +n07913300 +n10062996 +n10555430 +n04302863 +n12758555 +n10740732 +n02385898 +n02385098 +n12162758 +n03887899 +n03976268 +n04234160 +n03641947 +n07857076 +n10578656 +n12135729 +n12675515 +n09032191 +n12969670 +n02600503 +n12518013 +n10227166 +n10121026 +n01801672 +n10661216 +n03244388 +n04147291 +n09664556 +n02539573 +n04480141 +n10601362 +n02613572 +n10537906 +n02613820 +n11656771 +n03841011 +n02845985 +n12534208 +n10241024 +n03645290 +n12743976 +n11922839 +n07709701 +n03066232 +n03467380 +n09266604 +n09663786 +n12775070 +n02427183 +n04083113 +n12896615 +n10501453 +n02345774 +n09965515 +n09704157 +n10666752 +n03846970 +n04167661 +n03991321 +n09556121 +n10686517 +n02586238 +n03594277 +n03591313 +n10391416 +n10756837 +n13163649 +n03971960 +n10245341 +n02577041 +n04481642 +n12373739 +n10214062 +n10091997 +n10275848 +n02090253 +n03514340 +n04593629 +n11795216 +n03126927 +n11871748 +n10272782 +n12056099 +n04484024 +n03101375 +n12255225 +n10724372 +n10531838 +n02354781 +n02389865 +n02853336 +n01477080 +n01779939 +n10776052 +n10724132 +n10284871 +n10554141 +n03898787 +n02366301 +n10721612 +n04421740 +n04256758 +n01445593 +n10103921 +n02729222 +n02530188 +n02387452 +n02601921 +n01711160 +n02474110 +n09869447 +n12789977 +n10158506 +n10396908 +n07839593 +n02662825 +n02473720 +n13034788 +n07752602 +n03762238 +n10262880 +n07770180 +n04030054 +n10151367 +n03525252 +n10252075 +n10747424 +n10191388 +n04130566 +n03951068 +n13239921 +n03733547 +n10358124 +n11549779 +n09203827 +n04043168 +n10359422 +n04286960 +n04237287 +n10130686 +n02338449 +n12912274 +n10586998 +n02812785 +n10364502 +n03955941 +n12324222 +n09743601 +n03766600 +n01427399 +n12968309 +n11776234 +n01501777 +n10051026 +n10397001 +n01516212 +n02596252 +n02225081 +n10479328 +n02109687 +n10181445 +n02248062 +n03802973 +n01639187 +n02142734 +n02342534 +n02410141 +n02743426 +n03950359 +n12253835 +n07805478 +n03706415 +n03578981 +n04560619 +n09761753 +n03524425 +n01962788 +n04350235 +n10686694 +n13139321 +n10195155 +n12335937 +n12758399 +n03805374 +n12895298 +n03800371 +n11972959 +n11530008 +n03178538 +n02217839 +n10591072 +n04033557 +n01880813 +n12292877 +n02430643 +n07599383 +n01954516 +n09894909 +n02474605 +n03576443 +n07595051 +n03367875 +n12945549 +n02360480 +n14583400 +n04208582 +n02405577 +n02550655 +n02513355 +n04381450 +n00444490 +n03567912 +n09937688 +n07932323 +n04029416 +n01913346 +n13237508 +n04437276 +n12938445 +n03042384 +n12543639 +n03194992 +n04094250 +n12045514 +n03825913 +n03504293 +n12758250 +n03547861 +n03649288 +n04572235 +n07569423 +n03534695 +n03253714 +n01501641 +n13906767 +n12578255 +n11749603 +n07742513 +n07609083 +n04214413 +n07595751 +n12013701 +n12592839 +n12949160 +n04093223 +n02983072 +n03510072 +n02966068 +n03867854 +n01747285 +n10691318 +n13091982 +n12574470 +n02255023 +n03449217 +n03153585 +n04006227 +n13140049 +n02965024 +n03805503 +n03911406 +n13120958 +n12203699 +n01456454 +n10397142 +n12920043 +n02412977 +n08674344 +n07801007 +n03037590 +n10361296 +n13133316 +n03483637 +n04435759 +n12983873 +n02627037 +n03783304 +n07725158 +n02921292 +n01788864 +n01705010 +n12616996 +n03903290 +n08662427 +n03667060 +n07856992 +n03252422 +n02449699 +n12137954 +n10024025 +n07891095 +n04337157 +n04368109 +n03015631 +n02363996 +n12824289 +n03206602 +n12799269 +n02333733 +n01793565 +n01721898 +n03178173 +n02844056 +n11688378 +n13889066 +n02637475 +n03750437 +n01403457 +n01717229 +n02677136 +n12512294 +n03736269 +n02838577 +n08661878 +n01993830 +n02777638 +n02900857 +n04023021 +n03843092 +n07770439 +n12928491 +n03697812 +n02639922 +n13139482 +n07771082 +n12487058 +n07774182 +n02122810 +n02856362 +n11686195 +n11687432 +n02853870 +n04239218 +n02665250 +n02938218 +n11746600 +n10183347 +n10681194 +n04164199 +n04407257 +n12549005 +n02331842 +n03862379 +n02863638 +n11962994 +n03091907 +n04177654 +n02252972 +n02403153 +n01376437 +n02848806 +n08579266 +n07616265 +n10331258 +n10765587 +n09433312 +n03412387 +n10178077 +n13123841 +n02532918 +n04144651 +n03296963 +n03450881 +n04348988 +n10425946 +n03257065 +n02354320 +n11689197 +n04084682 +n10140783 +n03637027 +n02346170 +n02559144 +n01705591 +n09400584 +n03840327 +n03918074 +n04053767 +n02406046 +n00288190 +n03160001 +n03366464 +n09249155 +n01324305 +n07556872 +n03381565 +n12705220 +n11874878 +n02632494 +n02502006 +n03146560 +n02179340 +n04312756 +n10162016 +n03800563 +n04140853 +n07933652 +n03075248 +n04421582 +n10652703 +n02218134 +n12233249 +n04578559 +n01781071 +n02615298 +n04436832 +n04054566 +n02608284 +n11674019 +n03505764 +n02662397 +n09422190 +n04382537 +n04355684 +n04383923 +n09888635 +n03783575 +n03228796 +n07772026 +n02381119 +n15060326 +n10586166 +n12647787 +n02458517 +n10281546 +n03498866 +n02485988 +n10121246 +n09391644 +n03103904 +n08676253 +n02203978 +n04092168 +n03213014 +n03138217 +n04135933 +n12612811 +n04478066 +n02157285 +n02543255 +n03863783 +n01502101 +n03930229 +n12439830 +n09425019 +n02618513 +n02910241 +n12261359 +n03648667 +n04365229 +n03461651 +n04388040 +n03295928 +n03581531 +n04203356 +n02622249 +n13142907 +n04497249 +n11678377 +n02366579 +n02931013 +n02837134 +n03132438 +n13092987 +n04196803 +n03056215 +n03255322 +n02130925 +n10291469 +n02971940 +n01718096 +n12510774 +n11766432 +n04271891 +n03366721 +n03154616 +n03694356 +n10478293 +n11763142 +n07763483 +n03037228 +n09201998 +n01517389 +n00443517 +n12693244 +n03580990 +n03519848 +n10238375 +n10783646 +n03564849 +n03975926 +n02473554 +n02450426 +n03464952 +n04411835 +n04573045 +n10505732 +n04337650 +n10621514 +n10334782 +n12434985 +n07769102 +n10594523 +n05475397 +n01875610 +n03299406 +n10507692 +n02593679 +n03317233 +n13239736 +n03550420 +n03247351 +n03819047 +n03633341 +n03154745 +n04073425 +n04532022 +n02910964 +n04301242 +n04378651 +n13098515 +n11775626 +n14603798 +n10263146 +n01886045 +n03761731 +n02224713 +n04591249 +n02144251 +n03849412 +n11548728 +n04051705 +n12298165 +n03150795 +n03989447 +n02826459 +n07602650 +n03155915 +n09891730 +n02067603 +n01523105 +n03618339 +n03897130 +n02711780 +n05285623 +n03533486 +n04085873 +n01923404 +n10139077 +n01709484 +n02183507 +n03216562 +n01971850 +n03136051 +n02948834 +n03589313 +n03665851 +n02937336 +n02035656 +n07769465 +n07849186 +n12585373 +n12280364 +n02846260 +n02511730 +n02614653 +n04193179 +n11718681 +n09467696 +n01522450 +n03040836 +n03162297 +n11896141 +n04000480 +n10350220 +n07746038 +n02124157 +n10655169 +n03476542 +n03895038 +n00443917 +n07757753 +n01726203 +n02987706 +n12750076 +n03012734 +n02941228 +n04194009 +n04501127 +n09794550 +n03510487 +n08589670 +n03166951 +n03673270 +n09792125 +n08492354 +n02396157 +n01628331 +n03993878 +n07833816 +n04958865 +n13650447 +n04339191 +n02826683 +n02893269 +n02810139 +n02626471 +n02589796 +n08677801 +n04325968 +n03275864 +n02622547 +n04406687 +n04097085 +n02998107 +n07831450 +n03658102 +n02575590 +n03523398 +n02412909 +n02953850 +n04337503 +n03510987 +n12664005 +n03710294 +n13138155 +n10110093 +n07831955 +n03932080 +n12971804 +n03943623 +n03726371 +n10531445 +n12984489 +n07835051 +n12097556 +n02685701 +n03038041 +n02451125 +n04594919 +n02372140 +n02665985 +n03496183 +n03961828 +n03802800 +n01713170 +n03602790 +n04974145 +n02780588 +n04031884 +n03588216 +n02614140 +n04578708 +n04501281 +n03166600 +n03992975 +n04206070 +n03227721 +n02582349 +n02664642 +n07805389 +n09226869 +n02459190 +n12216968 +n03628984 +n02524928 +n09209025 +n04078002 +n03167153 +n03562565 +n07599554 +n10252547 +n03279804 +n07692887 +n14909584 +n02529293 +n04444953 +n04156814 +n07616174 +n03415626 +n03331244 +n03868324 +n03644073 +n02818687 +n10085101 +n02953056 +n03202481 +n02118707 +n03591901 +n12602434 +n02943465 +n02818254 +n07922607 +n02597004 +n04212810 +n04056073 +n12327528 +n02207647 +n01792808 +n03002555 +n03951213 +n12242123 +n10062275 +n12325787 +n10048117 +n11937278 +n03624767 +n04039041 +n04059298 +n03707171 +n07758407 +n01333483 +n02219015 +n02436645 +n02478239 +n04457638 +n01781698 +n09474765 +n03686363 +n10769084 +n09456207 +n02385776 +n13555775 +n03962685 +n13129078 +n03463185 +n01429172 +n04243251 +n12177129 +n03143754 +n03958338 +n02791795 +n04560502 +n12776774 +n02745816 +n03009111 +n02976552 +n03008817 +n03211413 +n03537550 +n12200504 +n01909788 +n11790089 +n03480973 +n10507070 +n01707294 +n04374907 +n04281571 +n00006024 +n03823906 +n12603273 +n03503358 +n04027820 +n12645530 +n02535080 +n04143365 +n08385989 +n12661227 +n12814857 +n11871059 +n04268418 +n13128582 +n01928865 +n04359124 +n12670334 +n03610836 +n04543924 +n02252799 +n15102359 +n04437380 +n04316924 +n11872324 +n09330378 +n10122300 +n03784139 +n00443375 +n14993378 +n01721174 +n00004475 +n00006484 +n00007846 +n00015388 +n00017222 +n00021265 +n00021939 +n00288000 +n00433458 +n00433661 +n00433802 +n00439826 +n00440218 +n00440509 +n00440747 +n00441824 +n00442115 +n00442981 +n00443231 +n00444651 +n00445351 +n00445802 +n00447073 +n00447221 +n00447540 +n00448466 +n00448640 +n00448958 +n00449295 +n00449695 +n00450335 +n00450700 +n00451370 +n00451866 +n00452293 +n00453935 +n00454237 +n00454624 +n00463246 +n00464651 +n00464894 +n00467719 +n00467995 +n00468480 +n00469651 +n00471437 +n00471613 +n00479076 +n00480508 +n00480993 +n00482298 +n00523513 +n01035667 +n01316422 +n01316579 +n01316949 +n01317089 +n01317391 +n01317541 +n01319467 +n01320872 +n01321230 +n01321579 +n01321854 +n01322343 +n01322685 +n01322898 +n01323068 +n01326291 +n01329186 +n01338685 +n01339336 +n01340935 +n01342269 +n01358572 +n01367772 +n01375204 +n01376237 +n01380610 +n01384687 +n01385330 +n01387065 +n01389507 +n01390123 +n01392380 +n01395254 +n01397114 +n01402600 +n01407798 +n01421807 +n01438208 +n01439121 +n01439514 +n01439808 +n01441425 +n01444783 +n01445429 +n01446589 +n01446760 +n01448951 +n01450661 +n01454856 +n01455778 +n01458842 +n01459791 +n01461646 +n01466257 +n01467336 +n01468238 +n01468712 +n01471682 +n01473806 +n01474283 +n01477525 +n01478511 +n01480516 +n01480880 +n01481331 +n01482071 +n01482330 +n01483522 +n01484097 +n01488918 +n01491874 +n01492357 +n01493541 +n01494757 +n01494882 +n01495006 +n01495701 +n01497118 +n01498406 +n01498699 +n01498989 +n01500091 +n01501160 +n01503061 +n01514752 +n01515078 +n01517565 +n01524359 +n01525720 +n01527194 +n01527617 +n01528654 +n01529672 +n01533339 +n01534582 +n01534762 +n01537134 +n01538955 +n01539573 +n01540233 +n01541922 +n01542786 +n01544208 +n01546921 +n01547832 +n01548301 +n01549430 +n01550761 +n01553142 +n01555809 +n01557185 +n01560105 +n01560636 +n01563128 +n01563746 +n01564394 +n01567133 +n01568132 +n01569836 +n01570676 +n01571904 +n01572328 +n01573074 +n01574045 +n01574390 +n01575745 +n01576695 +n01577659 +n01578575 +n01579028 +n01580379 +n01580490 +n01580772 +n01580870 +n01581166 +n01581434 +n01581730 +n01582398 +n01582498 +n01582856 +n01584225 +n01585121 +n01587834 +n01588431 +n01589286 +n01591697 +n01592257 +n01592540 +n01594372 +n01595624 +n01597336 +n01598588 +n01598988 +n01600085 +n01600657 +n01602080 +n01602209 +n01602630 +n01603600 +n01604330 +n01605630 +n01608814 +n01609062 +n01609391 +n01609751 +n01610955 +n01611472 +n01612628 +n01613294 +n01613615 +n01615121 +n01616551 +n01616764 +n01617095 +n01617443 +n01617766 +n01618082 +n01618922 +n01619310 +n01619536 +n01619835 +n01620135 +n01620414 +n01620735 +n01621127 +n01622352 +n01623706 +n01627424 +n01629276 +n01630284 +n01631175 +n01632047 +n01637112 +n01637932 +n01639765 +n01640846 +n01645776 +n01649170 +n01650167 +n01651487 +n01653773 +n01661091 +n01661592 +n01661818 +n01662622 +n01662784 +n01663401 +n01664369 +n01665932 +n01667432 +n01668091 +n01669372 +n01670092 +n01672032 +n01674216 +n01674464 +n01674990 +n01676755 +n01680264 +n01680478 +n01681940 +n01684133 +n01685439 +n01686044 +n01686220 +n01686403 +n01686609 +n01686808 +n01687665 +n01688961 +n01689411 +n01691951 +n01692864 +n01693783 +n01694709 +n01696633 +n01697178 +n01698434 +n01699040 +n01701551 +n01702256 +n01703011 +n01703569 +n01705934 +n01708106 +n01708998 +n01712008 +n01712752 +n01717016 +n01719403 +n01722998 +n01724231 +n01726692 +n01727646 +n01730185 +n01730307 +n01730812 +n01730960 +n01731545 +n01732244 +n01733757 +n01734637 +n01734808 +n01735439 +n01735577 +n01735728 +n01737472 +n01737728 +n01737875 +n01738065 +n01738601 +n01739647 +n01740551 +n01741232 +n01741562 +n01741943 +n01743605 +n01745125 +n01745484 +n01746359 +n01747885 +n01749582 +n01749742 +n01751036 +n01752165 +n01753959 +n01754876 +n01755740 +n01767661 +n01769347 +n01770795 +n01771417 +n01772222 +n01775370 +n01776192 +n01776705 +n01777304 +n01777467 +n01777649 +n01777909 +n01778217 +n01778487 +n01778621 +n01778801 +n01779148 +n01779463 +n01779629 +n01780696 +n01782209 +n01785667 +n01789386 +n01789740 +n01791107 +n01791625 +n01792158 +n01792640 +n01794158 +n01795088 +n01795735 +n01795900 +n01796019 +n01796105 +n01796519 +n01796729 +n01798706 +n01798839 +n01798979 +n01799302 +n01800424 +n01801088 +n01801479 +n01802721 +n01803078 +n01804478 +n01804653 +n01804921 +n01805070 +n01805801 +n01806847 +n01807828 +n01808140 +n01808291 +n01808596 +n01809106 +n01810700 +n01811909 +n01812337 +n01813385 +n01814370 +n01814921 +n01815601 +n01816887 +n01819115 +n01820348 +n01820801 +n01821076 +n01821203 +n01822602 +n01823013 +n01824749 +n01825930 +n01826364 +n01827403 +n01829869 +n01831712 +n01832167 +n01834177 +n01834540 +n01835276 +n01838038 +n01838598 +n01839598 +n01841102 +n01843719 +n01844231 +n01844551 +n01844917 +n01845132 +n01845477 +n01846331 +n01848123 +n01848648 +n01849466 +n01850373 +n01851375 +n01852142 +n01852861 +n01853498 +n01854415 +n01856072 +n01856155 +n01856380 +n01856553 +n01856890 +n01857079 +n01857325 +n01857512 +n01857632 +n01857851 +n01858441 +n01859496 +n01860497 +n01861148 +n01861778 +n01871543 +n01871875 +n01874434 +n01874928 +n01876326 +n01877134 +n01878061 +n01878335 +n01878639 +n01878929 +n01879217 +n01879509 +n01880152 +n01881171 +n01883513 +n01883920 +n01886756 +n01887896 +n01888264 +n01889074 +n01889520 +n01890860 +n01891633 +n01892551 +n01894207 +n01905661 +n01906749 +n01907738 +n01909422 +n01911403 +n01911839 +n01912454 +n01914163 +n01914830 +n01915811 +n01916187 +n01916925 +n01918744 +n01922303 +n01925270 +n01925695 +n01926379 +n01926689 +n01927159 +n01927456 +n01927928 +n01928215 +n01930852 +n01931140 +n01931520 +n01931714 +n01932151 +n01932936 +n01933151 +n01933478 +n01933988 +n01934440 +n01935176 +n01936391 +n01937909 +n01940736 +n01941223 +n01942177 +n01942869 +n01943541 +n01944118 +n01944812 +n01944955 +n01945143 +n01945340 +n01945845 +n01946277 +n01948573 +n01951613 +n01953361 +n01955933 +n01956481 +n01958038 +n01959985 +n01960459 +n01963571 +n01964049 +n01964441 +n01965889 +n01967094 +n01968315 +n01969726 +n01971094 +n01971280 +n01974773 +n01975687 +n01976146 +n01976957 +n01978930 +n01981702 +n01982650 +n01983048 +n01985493 +n01985797 +n01986806 +n01987545 +n01988701 +n01989869 +n01990007 +n01991028 +n01991520 +n01992262 +n01992423 +n01992773 +n01996585 +n01998183 +n02000954 +n02002075 +n02005790 +n02006985 +n02007284 +n02008041 +n02008796 +n02010453 +n02011805 +n02011943 +n02012185 +n02013177 +n02014941 +n02015554 +n02016358 +n02016956 +n02018027 +n02019190 +n02019438 +n02019929 +n02021050 +n02021795 +n02022684 +n02023341 +n02025043 +n02026059 +n02028175 +n02030035 +n02030287 +n02030996 +n02031934 +n02033208 +n02033324 +n02033561 +n02034129 +n02034661 +n02036053 +n02037464 +n02039171 +n02040505 +n02041085 +n02041246 +n02043063 +n02044178 +n02044778 +n02045369 +n02046759 +n02047260 +n02047614 +n02048698 +n02049532 +n02050004 +n02051474 +n02052204 +n02052365 +n02053083 +n02054502 +n02055658 +n02055803 +n02057731 +n02058594 +n02058747 +n02059162 +n02060411 +n02060889 +n02062017 +n02062430 +n02062744 +n02063224 +n02064338 +n02066707 +n02068206 +n02068974 +n02069701 +n02070430 +n02073250 +n02075296 +n02075927 +n02076196 +n02076779 +n02077152 +n02077658 +n02078292 +n02078574 +n02078738 +n02079005 +n02079389 +n02081571 +n02083346 +n02083672 +n02084071 +n02084861 +n02085374 +n02086346 +n02086478 +n02087122 +n02087551 +n02088839 +n02089232 +n02089555 +n02090475 +n02090827 +n02092468 +n02093056 +n02094562 +n02094721 +n02095050 +n02095412 +n02095727 +n02096756 +n02097786 +n02098550 +n02099029 +n02099997 +n02100399 +n02101108 +n02101861 +n02102605 +n02103406 +n02103841 +n02104523 +n02104882 +n02106966 +n02107420 +n02108254 +n02108672 +n02109150 +n02109811 +n02110532 +n02111626 +n02112497 +n02112826 +n02113335 +n02114100 +n02115012 +n02115335 +n02117512 +n02117646 +n02117900 +n02118176 +n02118333 +n02119247 +n02119359 +n02120278 +n02120997 +n02121620 +n02121808 +n02122725 +n02123785 +n02124623 +n02127292 +n02127381 +n02127482 +n02127586 +n02127678 +n02127808 +n02128598 +n02128669 +n02129463 +n02129530 +n02129837 +n02129923 +n02130545 +n02131653 +n02132466 +n02132580 +n02132788 +n02133400 +n02134971 +n02135220 +n02137722 +n02137888 +n02138647 +n02138777 +n02139199 +n02139671 +n02141306 +n02141713 +n02144593 +n02145424 +n02148835 +n02149420 +n02150482 +n02152740 +n02152991 +n02153203 +n02153809 +n02156732 +n02159955 +n02164464 +n02165877 +n02166229 +n02166567 +n02166826 +n02167505 +n02167820 +n02167944 +n02168245 +n02169023 +n02169218 +n02169705 +n02169974 +n02170400 +n02170993 +n02171453 +n02171869 +n02172518 +n02172678 +n02172761 +n02172870 +n02174355 +n02176261 +n02178411 +n02178717 +n02179012 +n02180233 +n02181235 +n02181724 +n02182045 +n02182355 +n02182642 +n02182930 +n02183857 +n02186153 +n02188699 +n02189363 +n02190790 +n02191273 +n02191773 +n02191979 +n02192252 +n02192513 +n02192814 +n02193009 +n02193163 +n02194249 +n02194750 +n02195091 +n02195526 +n02195819 +n02196344 +n02198532 +n02199502 +n02200198 +n02202287 +n02204722 +n02206270 +n02207179 +n02207449 +n02208280 +n02208498 +n02208848 +n02208979 +n02209111 +n02209354 +n02209624 +n02209964 +n02210427 +n02210921 +n02211444 +n02211627 +n02211896 +n02212062 +n02212602 +n02212958 +n02213107 +n02215161 +n02215770 +n02217563 +n02218713 +n02220055 +n02220225 +n02220518 +n02220804 +n02221083 +n02221414 +n02221571 +n02221715 +n02221820 +n02222035 +n02222321 +n02222582 +n02223266 +n02223520 +n02226183 +n02226821 +n02226970 +n02227247 +n02227604 +n02227966 +n02228341 +n02228697 +n02229156 +n02229765 +n02230023 +n02230187 +n02230480 +n02230634 +n02231052 +n02231803 +n02233943 +n02234355 +n02234570 +n02234848 +n02235205 +n02236241 +n02236896 +n02237424 +n02237581 +n02238235 +n02238887 +n02239528 +n02241569 +n02241799 +n02243562 +n02244173 +n02246011 +n02246628 +n02247511 +n02248368 +n02248510 +n02248887 +n02249515 +n02250822 +n02251775 +n02252226 +n02253127 +n02253715 +n02254697 +n02257003 +n02257284 +n02257715 +n02259377 +n02260421 +n02260863 +n02261419 +n02262178 +n02263378 +n02264885 +n02265330 +n02268148 +n02269196 +n02269522 +n02270011 +n02270200 +n02271570 +n02271897 +n02272871 +n02274024 +n02274259 +n02274822 +n02278210 +n02278839 +n02279637 +n02280458 +n02281015 +n02281136 +n02281267 +n02282257 +n02282385 +n02282553 +n02282903 +n02283077 +n02283201 +n02283951 +n02285548 +n02287004 +n02287799 +n02288789 +n02291220 +n02291572 +n02291748 +n02293352 +n02293868 +n02295064 +n02295390 +n02295870 +n02298541 +n02300173 +n02301452 +n02302459 +n02302620 +n02305407 +n02306433 +n02307325 +n02308139 +n02308471 +n02309337 +n02310000 +n02310717 +n02311060 +n02312006 +n02312427 +n02313008 +n02316707 +n02318167 +n02319308 +n02319555 +n02319829 +n02320127 +n02322047 +n02323449 +n02323902 +n02324045 +n02325722 +n02325884 +n02326074 +n02326763 +n02326862 +n02327028 +n02327175 +n02327435 +n02327656 +n02327842 +n02328429 +n02329401 +n02330245 +n02331046 +n02331309 +n02332755 +n02333546 +n02334460 +n02335127 +n02336011 +n02336641 +n02338901 +n02339376 +n02339922 +n02343058 +n02343320 +n02343772 +n02344528 +n02345600 +n02346998 +n02347274 +n02347573 +n02347744 +n02348173 +n02348788 +n02350105 +n02350989 +n02351870 +n02352591 +n02353861 +n02355227 +n02355477 +n02358091 +n02359324 +n02360781 +n02361587 +n02361706 +n02361850 +n02363245 +n02363351 +n02364520 +n02369680 +n02370806 +n02372584 +n02373336 +n02374149 +n02374451 +n02376542 +n02376791 +n02376918 +n02377181 +n02377480 +n02377703 +n02378415 +n02380335 +n02380583 +n02380745 +n02381460 +n02382437 +n02382948 +n02384858 +n02386014 +n02386310 +n02386496 +n02388276 +n02389346 +n02389559 +n02390454 +n02390834 +n02391234 +n02391373 +n02391508 +n02391994 +n02393580 +n02394477 +n02395003 +n02395694 +n02395931 +n02397529 +n02399000 +n02401031 +n02402010 +n02402175 +n02402425 +n02403325 +n02403454 +n02404186 +n02404573 +n02406174 +n02407959 +n02408660 +n02408817 +n02409870 +n02410702 +n02410900 +n02411705 +n02412440 +n02413131 +n02414578 +n02415435 +n02416519 +n02417070 +n02417534 +n02418064 +n02419796 +n02423218 +n02423362 +n02423589 +n02424305 +n02424695 +n02426813 +n02427724 +n02428349 +n02430045 +n02430559 +n02431122 +n02432511 +n02433546 +n02433925 +n02435853 +n02437136 +n02437971 +n02438173 +n02438272 +n02439033 +n02441326 +n02442172 +n02442336 +n02442446 +n02442572 +n02442668 +n02443015 +n02443346 +n02443808 +n02443959 +n02444251 +n02445004 +n02445171 +n02446206 +n02446352 +n02446645 +n02447021 +n02447762 +n02448060 +n02448633 +n02448885 +n02450034 +n02453108 +n02453611 +n02454794 +n02455135 +n02455428 +n02455720 +n02456008 +n02456275 +n02456962 +n02460009 +n02469914 +n02470325 +n02470899 +n02471300 +n02471762 +n02472293 +n02473307 +n02474777 +n02476219 +n02480153 +n02481103 +n02481235 +n02481366 +n02481500 +n02482060 +n02482286 +n02482474 +n02482650 +n02483092 +n02484322 +n02484473 +n02485225 +n02485371 +n02485536 +n02485688 +n02486657 +n02486908 +n02487079 +n02487547 +n02487675 +n02487847 +n02488003 +n02488415 +n02488894 +n02489589 +n02490597 +n02490811 +n02491107 +n02491329 +n02491474 +n02496913 +n02501583 +n02502514 +n02503127 +n02503517 +n02504770 +n02507649 +n02508021 +n02512053 +n02512938 +n02513560 +n02515214 +n02516188 +n02517442 +n02517938 +n02519148 +n02519686 +n02521646 +n02522399 +n02524524 +n02526425 +n02526818 +n02527057 +n02527271 +n02527622 +n02528163 +n02529772 +n02530421 +n02532028 +n02532602 +n02533209 +n02533834 +n02534559 +n02534734 +n02535537 +n02537085 +n02537319 +n02538406 +n02538985 +n02540412 +n02541687 +n02546331 +n02548689 +n02549989 +n02550460 +n02552171 +n02554730 +n02556846 +n02557591 +n02557749 +n02559862 +n02561108 +n02561661 +n02562315 +n02562796 +n02563182 +n02564720 +n02565573 +n02566109 +n02568959 +n02569484 +n02570838 +n02572196 +n02574910 +n02576223 +n02576575 +n02578233 +n02579557 +n02580336 +n02581957 +n02583567 +n02585872 +n02586543 +n02588286 +n02590495 +n02590702 +n02590987 +n02594250 +n02596381 +n02597367 +n02599052 +n02599958 +n02600298 +n02601344 +n02602405 +n02603317 +n02604157 +n02605316 +n02606384 +n02607201 +n02607862 +n02613181 +n02614482 +n02614978 +n02619165 +n02621908 +n02623445 +n02624167 +n02625612 +n02626762 +n02627835 +n02630281 +n02630739 +n02631041 +n02636170 +n02636854 +n02638596 +n02640626 +n02640857 +n02642107 +n02642644 +n02643112 +n02644113 +n02646667 +n02648625 +n02650050 +n02650541 +n02652668 +n02653145 +n02653786 +n02654425 +n02655523 +n02656670 +n02657368 +n02658079 +n02661017 +n02662239 +n02663849 +n02667379 +n02667576 +n02668393 +n02670382 +n02671780 +n02672371 +n02676261 +n02676670 +n02677028 +n02677718 +n02678384 +n02680110 +n02680754 +n02682407 +n02682922 +n02683791 +n02686121 +n02686568 +n02687992 +n02688443 +n02689274 +n02691156 +n02692513 +n02693413 +n02693540 +n02694426 +n02694966 +n02695627 +n02697576 +n02698244 +n02700258 +n02700895 +n02702989 +n02703275 +n02705944 +n02708224 +n02708555 +n02709367 +n02709637 +n02710600 +n02712643 +n02713218 +n02715229 +n02715513 +n02715712 +n02716626 +n02726305 +n02726681 +n02727016 +n02727825 +n02728440 +n02729837 +n02729965 +n02730265 +n02732072 +n02732827 +n02733213 +n02733524 +n02735361 +n02735688 +n02736798 +n02737660 +n02738031 +n02738271 +n02738535 +n02739550 +n02739668 +n02740533 +n02740764 +n02741475 +n02742322 +n02742753 +n02745492 +n02746365 +n02749790 +n02750169 +n02751067 +n02751295 +n02752496 +n02753044 +n02753394 +n02754103 +n02755352 +n02755529 +n02756098 +n02756977 +n02757462 +n02757810 +n02758134 +n02758960 +n02759700 +n02759963 +n02760099 +n02760199 +n02760429 +n02760855 +n02761392 +n02763198 +n02763714 +n02764044 +n02764614 +n02764779 +n02765028 +n02766320 +n02766534 +n02766792 +n02767433 +n02769075 +n02770830 +n02772554 +n02772700 +n02773037 +n02773838 +n02774152 +n02774630 +n02775483 +n02776205 +n02777100 +n02777734 +n02777927 +n02778456 +n02778669 +n02781121 +n02781338 +n02781517 +n02783035 +n02783324 +n02784998 +n02785648 +n02786198 +n02786463 +n02788689 +n02789487 +n02790823 +n02792552 +n02792948 +n02793842 +n02794008 +n02794779 +n02794972 +n02795783 +n02796207 +n02796623 +n02796995 +n02797692 +n02797881 +n02799897 +n02801184 +n02801525 +n02801938 +n02802721 +n02803349 +n02803666 +n02804252 +n02806088 +n02806379 +n02806875 +n02810471 +n02811468 +n02811719 +n02812201 +n02813252 +n02813399 +n02815478 +n02815950 +n02816494 +n02817031 +n02817650 +n02817799 +n02818832 +n02819697 +n02820210 +n02821627 +n02821943 +n02822220 +n02822399 +n02822865 +n02823335 +n02824448 +n02826589 +n02826886 +n02827606 +n02828299 +n02828884 +n02831335 +n02831724 +n02831894 +n02833793 +n02834778 +n02835412 +n02836268 +n02839351 +n02839910 +n02840619 +n02841063 +n02841506 +n02842133 +n02843029 +n02843777 +n02844214 +n02844307 +n02844714 +n02847631 +n02848216 +n02848523 +n02849154 +n02850950 +n02851099 +n02853016 +n02854532 +n02854926 +n02855089 +n02855390 +n02855793 +n02857365 +n02857477 +n02857644 +n02858304 +n02860415 +n02861886 +n02862048 +n02862916 +n02863750 +n02865665 +n02865931 +n02866578 +n02867715 +n02869737 +n02871631 +n02871824 +n02871963 +n02872752 +n02873839 +n02874086 +n02875436 +n02876326 +n02876457 +n02876657 +n02877962 +n02879517 +n02880189 +n02880546 +n02880940 +n02881193 +n02881906 +n02882483 +n02882647 +n02883004 +n02883344 +n02884225 +n02885108 +n02885338 +n02886599 +n02887209 +n02887970 +n02888569 +n02889425 +n02891188 +n02891788 +n02892499 +n02893418 +n02896294 +n02896442 +n02897389 +n02897820 +n02898173 +n02898369 +n02898585 +n02898711 +n02900705 +n02901481 +n02901901 +n02902079 +n02902916 +n02903006 +n02904109 +n02904640 +n02908217 +n02909285 +n02911485 +n02912065 +n02913152 +n02914991 +n02916179 +n02916350 +n02917377 +n02917607 +n02919414 +n02920503 +n02921884 +n02923129 +n02924116 +n02925519 +n02928413 +n02928608 +n02929289 +n02929462 +n02929923 +n02931417 +n02931836 +n02932019 +n02932400 +n02933112 +n02933462 +n02933750 +n02933990 +n02934168 +n02935658 +n02935891 +n02936176 +n02936281 +n02936714 +n02938886 +n02939866 +n02941095 +n02942699 +n02943241 +n02943871 +n02944826 +n02945161 +n02946270 +n02946348 +n02946921 +n02947212 +n02947818 +n02948557 +n02949202 +n02950186 +n02950256 +n02950632 +n02950943 +n02951843 +n02952485 +n02952674 +n02953673 +n02954163 +n02954340 +n02954938 +n02955065 +n02955247 +n02955540 +n02955767 +n02957135 +n02957755 +n02958343 +n02959942 +n02961451 +n02961947 +n02963302 +n02963692 +n02963821 +n02965216 +n02965300 +n02965529 +n02966545 +n02966786 +n02966942 +n02967081 +n02967991 +n02968473 +n02969010 +n02969163 +n02969634 +n02969886 +n02970685 +n02970849 +n02971691 +n02972397 +n02973017 +n02974697 +n02975212 +n02976939 +n02978205 +n02978753 +n02979516 +n02982599 +n02983189 +n02983904 +n02984061 +n02984203 +n02984469 +n02984699 +n02985137 +n02985828 +n02986066 +n02987047 +n02987492 +n02989099 +n02991048 +n02991302 +n02992032 +n02993546 +n02995998 +n02997391 +n02997607 +n03001282 +n03001627 +n03002210 +n03003091 +n03004620 +n03005515 +n03007130 +n03007591 +n03010656 +n03010795 +n03011018 +n03011355 +n03012159 +n03013006 +n03014440 +n03015254 +n03017070 +n03018209 +n03020034 +n03020416 +n03020692 +n03024333 +n03025070 +n03025886 +n03027108 +n03027250 +n03029066 +n03031422 +n03032811 +n03033362 +n03033986 +n03034516 +n03034663 +n03035510 +n03036469 +n03036866 +n03037108 +n03037709 +n03038685 +n03039015 +n03039947 +n03040229 +n03040376 +n03043274 +n03043958 +n03045337 +n03046257 +n03048883 +n03049066 +n03049457 +n03050026 +n03050546 +n03050655 +n03050864 +n03051396 +n03051540 +n03052464 +n03052917 +n03053047 +n03054901 +n03055670 +n03056097 +n03056493 +n03057021 +n03057636 +n03058107 +n03058603 +n03058949 +n03059366 +n03061050 +n03063073 +n03063338 +n03064350 +n03064758 +n03065708 +n03066849 +n03070193 +n03071021 +n03071160 +n03072201 +n03073296 +n03073977 +n03074380 +n03074855 +n03075097 +n03075500 +n03075634 +n03076411 +n03076708 +n03078287 +n03078670 +n03079230 +n03079741 +n03080497 +n03080731 +n03081986 +n03082127 +n03082807 +n03082979 +n03084420 +n03085333 +n03085602 +n03085915 +n03086183 +n03086457 +n03086670 +n03087366 +n03087643 +n03087816 +n03088707 +n03091044 +n03091374 +n03092166 +n03092314 +n03093792 +n03094503 +n03096439 +n03096960 +n03098140 +n03098806 +n03099454 +n03099771 +n03099945 +n03100346 +n03100490 +n03101156 +n03101986 +n03102654 +n03102859 +n03106722 +n03106898 +n03107046 +n03109881 +n03111690 +n03112869 +n03113152 +n03113657 +n03113835 +n03114839 +n03115180 +n03116530 +n03116767 +n03117199 +n03118346 +n03118969 +n03119510 +n03120198 +n03120491 +n03121897 +n03122748 +n03123809 +n03125870 +n03128085 +n03128427 +n03128519 +n03129001 +n03130066 +n03130563 +n03131669 +n03132261 +n03134853 +n03135917 +n03136369 +n03137579 +n03139464 +n03140900 +n03141065 +n03141327 +n03143572 +n03145384 +n03145843 +n03146846 +n03147509 +n03148324 +n03148727 +n03149401 +n03151077 +n03153948 +n03154073 +n03154446 +n03155178 +n03156071 +n03156405 +n03157348 +n03158796 +n03158885 +n03161450 +n03162818 +n03163798 +n03163973 +n03164605 +n03164722 +n03164929 +n03165823 +n03167978 +n03168107 +n03168217 +n03170635 +n03171356 +n03172965 +n03173387 +n03175604 +n03176386 +n03177165 +n03177708 +n03178000 +n03178430 +n03180504 +n03180969 +n03181293 +n03182140 +n03182232 +n03182912 +n03183080 +n03186818 +n03187751 +n03189818 +n03193597 +n03196062 +n03196324 +n03196598 +n03199647 +n03199901 +n03200357 +n03200539 +n03200701 +n03200906 +n03201035 +n03201638 +n03201996 +n03202354 +n03202760 +n03203089 +n03203806 +n03204306 +n03204558 +n03204955 +n03205143 +n03205304 +n03206718 +n03206908 +n03207305 +n03208556 +n03210683 +n03211117 +n03211616 +n03212811 +n03214253 +n03214450 +n03215191 +n03219135 +n03220237 +n03221059 +n03221720 +n03222516 +n03223162 +n03223441 +n03224753 +n03224893 +n03225777 +n03226538 +n03228016 +n03228533 +n03228692 +n03229115 +n03229526 +n03231160 +n03231819 +n03235796 +n03235979 +n03236580 +n03236735 +n03237212 +n03237639 +n03239259 +n03239726 +n03240140 +n03241093 +n03241335 +n03241496 +n03242120 +n03242506 +n03242995 +n03243218 +n03245271 +n03245421 +n03246933 +n03250952 +n03251533 +n03251766 +n03252324 +n03252637 +n03254374 +n03255488 +n03255899 +n03256788 +n03256928 +n03257586 +n03258905 +n03259505 +n03261776 +n03262519 +n03262809 +n03262932 +n03265032 +n03266749 +n03267821 +n03269203 +n03269401 +n03270695 +n03271765 +n03271865 +n03272239 +n03272383 +n03273061 +n03273913 +n03274561 +n03274796 +n03276179 +n03277459 +n03277771 +n03278248 +n03279153 +n03279364 +n03279508 +n03280394 +n03280644 +n03281145 +n03282060 +n03282401 +n03284743 +n03284981 +n03285912 +n03286572 +n03287733 +n03288003 +n03289985 +n03291413 +n03292960 +n03294048 +n03294833 +n03296478 +n03297103 +n03297644 +n03297735 +n03298089 +n03302790 +n03303217 +n03303831 +n03304197 +n03304465 +n03305522 +n03307573 +n03308152 +n03309808 +n03314378 +n03314884 +n03315644 +n03316406 +n03318136 +n03319457 +n03320046 +n03322570 +n03322940 +n03323703 +n03324928 +n03325088 +n03326073 +n03327234 +n03327691 +n03327841 +n03329663 +n03330792 +n03334017 +n03334492 +n03334912 +n03335030 +n03335846 +n03336839 +n03337494 +n03338287 +n03338821 +n03339296 +n03339643 +n03340009 +n03340923 +n03342961 +n03343354 +n03343560 +n03343853 +n03346135 +n03346455 +n03349296 +n03350352 +n03350456 +n03350602 +n03351262 +n03351979 +n03352628 +n03354903 +n03355468 +n03356446 +n03357267 +n03357716 +n03359137 +n03359566 +n03360731 +n03361683 +n03362771 +n03363363 +n03364008 +n03364937 +n03365592 +n03365991 +n03366823 +n03373237 +n03374649 +n03374838 +n03375171 +n03376279 +n03378342 +n03379343 +n03379828 +n03379989 +n03380647 +n03380867 +n03381126 +n03381231 +n03381776 +n03382856 +n03382969 +n03383468 +n03384167 +n03384891 +n03385557 +n03386011 +n03387323 +n03387653 +n03390327 +n03391770 +n03393324 +n03394480 +n03394649 +n03396580 +n03396654 +n03397266 +n03397532 +n03398228 +n03399761 +n03399971 +n03402188 +n03402369 +n03404012 +n03404360 +n03404449 +n03405265 +n03405725 +n03407369 +n03409393 +n03409591 +n03410147 +n03411339 +n03412058 +n03412220 +n03412511 +n03412906 +n03413264 +n03413428 +n03413828 +n03414162 +n03415252 +n03416489 +n03416775 +n03417345 +n03418158 +n03418242 +n03419014 +n03422072 +n03422589 +n03423719 +n03424630 +n03427296 +n03428090 +n03428349 +n03429003 +n03429288 +n03429914 +n03430091 +n03430313 +n03430551 +n03430959 +n03431243 +n03431745 +n03433637 +n03433877 +n03434285 +n03434830 +n03435593 +n03437941 +n03438257 +n03439814 +n03441112 +n03442288 +n03442756 +n03446070 +n03446832 +n03448031 +n03448956 +n03449564 +n03449858 +n03450516 +n03452267 +n03452449 +n03453320 +n03454110 +n03454211 +n03454707 +n03455355 +n03456548 +n03456665 +n03457008 +n03457686 +n03458271 +n03459914 +n03461882 +n03465500 +n03465818 +n03466162 +n03466839 +n03467517 +n03467796 +n03467984 +n03468696 +n03469493 +n03470387 +n03470629 +n03470948 +n03472232 +n03472535 +n03472937 +n03473817 +n03473966 +n03475823 +n03476083 +n03476313 +n03477773 +n03477902 +n03478756 +n03478907 +n03481521 +n03482523 +n03483230 +n03483531 +n03484083 +n03484931 +n03487331 +n03487444 +n03487774 +n03488188 +n03488603 +n03489162 +n03490324 +n03490449 +n03490884 +n03491988 +n03496296 +n03496612 +n03497100 +n03497657 +n03498536 +n03499468 +n03500295 +n03501152 +n03501288 +n03501614 +n03502331 +n03502509 +n03502777 +n03503718 +n03503997 +n03505383 +n03505504 +n03506370 +n03507963 +n03508101 +n03509394 +n03509843 +n03510583 +n03510866 +n03511175 +n03512147 +n03512911 +n03513137 +n03513376 +n03515338 +n03517899 +n03517982 +n03518631 +n03519674 +n03521076 +n03521544 +n03522634 +n03524574 +n03524976 +n03525074 +n03525454 +n03525827 +n03528263 +n03529444 +n03531281 +n03531447 +n03531546 +n03532342 +n03534776 +n03535024 +n03536761 +n03537412 +n03538037 +n03538300 +n03538634 +n03538957 +n03540267 +n03540595 +n03541091 +n03541696 +n03541923 +n03542333 +n03542860 +n03543603 +n03544360 +n03545150 +n03546340 +n03547054 +n03547530 +n03548930 +n03550153 +n03550289 +n03551084 +n03551790 +n03552449 +n03552749 +n03553486 +n03554460 +n03555426 +n03555662 +n03557590 +n03558176 +n03558404 +n03558739 +n03561169 +n03563200 +n03563710 +n03563967 +n03565288 +n03565565 +n03566329 +n03568117 +n03568818 +n03571942 +n03572205 +n03574555 +n03574816 +n03575958 +n03576215 +n03577672 +n03577818 +n03578055 +n03578251 +n03578656 +n03579538 +n03579982 +n03583621 +n03584400 +n03585073 +n03588951 +n03589513 +n03589791 +n03590306 +n03590932 +n03592245 +n03592773 +n03593526 +n03595409 +n03595860 +n03596285 +n03597317 +n03598151 +n03598299 +n03598646 +n03600977 +n03601638 +n03601840 +n03602081 +n03603722 +n03604629 +n03604843 +n03605722 +n03605915 +n03606465 +n03609235 +n03609542 +n03610418 +n03610992 +n03612814 +n03613294 +n03613592 +n03614007 +n03614532 +n03615563 +n03617095 +n03617594 +n03618546 +n03618982 +n03619396 +n03619650 +n03619793 +n03619890 +n03620052 +n03621049 +n03621694 +n03622931 +n03623556 +n03624134 +n03625355 +n03626115 +n03631177 +n03631811 +n03632852 +n03633886 +n03635032 +n03635668 +n03635932 +n03636248 +n03636649 +n03638883 +n03639675 +n03640988 +n03642444 +n03646296 +n03646916 +n03647520 +n03651388 +n03653220 +n03653454 +n03654576 +n03655072 +n03656484 +n03657239 +n03658858 +n03659292 +n03660124 +n03661340 +n03662719 +n03662887 +n03663531 +n03664675 +n03664943 +n03665366 +n03666362 +n03666917 +n03667235 +n03667829 +n03671914 +n03672827 +n03673450 +n03673767 +n03676759 +n03677766 +n03679384 +n03679712 +n03681477 +n03682487 +n03684823 +n03685307 +n03685820 +n03686130 +n03686470 +n03687928 +n03688943 +n03689157 +n03689570 +n03690851 +n03691817 +n03692379 +n03693293 +n03697552 +n03698604 +n03699280 +n03699975 +n03700963 +n03701191 +n03701391 +n03701640 +n03701790 +n03702248 +n03704834 +n03705379 +n03706653 +n03707597 +n03708036 +n03709206 +n03709363 +n03709545 +n03710528 +n03711711 +n03711999 +n03712887 +n03713069 +n03714235 +n03715386 +n03715669 +n03715892 +n03716966 +n03717131 +n03718212 +n03718335 +n03718699 +n03718789 +n03719053 +n03721590 +n03722007 +n03722288 +n03724176 +n03725035 +n03725717 +n03726516 +n03726760 +n03726993 +n03727837 +n03727946 +n03728437 +n03728982 +n03729647 +n03729951 +n03730153 +n03730788 +n03731695 +n03733644 +n03733925 +n03735637 +n03736970 +n03738241 +n03738472 +n03739518 +n03739693 +n03743902 +n03744276 +n03744684 +n03744840 +n03745571 +n03746330 +n03748162 +n03749504 +n03749807 +n03750206 +n03751065 +n03752185 +n03752922 +n03753077 +n03753514 +n03758894 +n03759432 +n03760671 +n03762982 +n03763727 +n03764276 +n03765561 +n03765934 +n03766322 +n03768132 +n03769722 +n03770954 +n03772077 +n03772674 +n03773035 +n03775199 +n03775847 +n03779000 +n03779370 +n03780047 +n03781787 +n03782190 +n03785499 +n03787523 +n03789171 +n03789400 +n03789946 +n03790230 +n03790512 +n03790755 +n03791235 +n03792048 +n03792526 +n03793850 +n03795976 +n03796181 +n03797390 +n03798982 +n03799113 +n03800485 +n03800772 +n03800933 +n03802007 +n03802228 +n03802393 +n03803116 +n03809312 +n03811295 +n03811444 +n03811847 +n03811965 +n03812382 +n03812924 +n03813176 +n03813946 +n03815278 +n03815482 +n03815615 +n03816005 +n03816136 +n03816849 +n03817647 +n03819595 +n03819994 +n03820154 +n03820318 +n03820728 +n03820950 +n03824197 +n03825080 +n03827536 +n03828020 +n03829340 +n03831757 +n03834040 +n03834604 +n03836062 +n03837422 +n03838748 +n03839172 +n03839276 +n03839795 +n03841666 +n03842156 +n03844045 +n03844233 +n03845190 +n03846234 +n03846772 +n03847471 +n03847823 +n03848168 +n03848348 +n03849275 +n03850613 +n03851341 +n03851787 +n03852280 +n03852688 +n03854815 +n03859280 +n03859495 +n03859958 +n03861430 +n03861842 +n03862676 +n03863923 +n03864139 +n03864356 +n03864692 +n03865371 +n03865949 +n03868406 +n03871083 +n03871524 +n03871724 +n03873848 +n03874138 +n03874823 +n03875218 +n03880129 +n03880323 +n03880531 +n03883054 +n03883773 +n03883944 +n03884639 +n03885535 +n03885669 +n03886053 +n03886641 +n03887185 +n03888022 +n03889503 +n03889726 +n03891051 +n03892557 +n03894051 +n03894379 +n03896103 +n03896233 +n03896419 +n03896628 +n03896984 +n03897943 +n03898271 +n03898633 +n03899612 +n03899933 +n03901338 +n03903133 +n03903424 +n03904060 +n03904183 +n03904433 +n03905540 +n03906997 +n03907654 +n03908204 +n03909160 +n03909406 +n03915118 +n03915437 +n03916470 +n03916720 +n03917327 +n03918480 +n03920737 +n03923564 +n03923692 +n03924069 +n03926148 +n03926412 +n03926876 +n03927792 +n03928116 +n03929091 +n03929202 +n03929443 +n03930515 +n03932670 +n03936269 +n03938522 +n03939677 +n03940256 +n03941684 +n03943920 +n03945615 +n03947111 +n03947466 +n03948459 +n03951971 +n03953020 +n03953416 +n03955809 +n03956785 +n03956922 +n03957315 +n03957762 +n03958630 +n03958752 +n03959014 +n03959701 +n03961939 +n03962525 +n03962932 +n03963028 +n03965907 +n03966325 +n03966751 +n03966976 +n03967942 +n03968293 +n03971321 +n03972524 +n03973520 +n03973628 +n03975035 +n03979377 +n03979492 +n03980026 +n03981340 +n03982232 +n03982895 +n03984234 +n03984381 +n03985232 +n03986704 +n03988170 +n03989665 +n03990474 +n03991443 +n03992325 +n03992703 +n03993180 +n03993403 +n03994008 +n03994757 +n03995018 +n03995856 +n03996145 +n03996416 +n03997484 +n03999992 +n04000311 +n04001397 +n04001499 +n04001845 +n04004210 +n04004475 +n04005912 +n04007664 +n04010057 +n04010779 +n04010927 +n04011827 +n04012084 +n04013729 +n04014297 +n04015204 +n04016576 +n04016684 +n04018399 +n04018667 +n04019101 +n04019696 +n04020087 +n04020298 +n04020912 +n04021028 +n04021362 +n04021798 +n04022332 +n04022708 +n04023249 +n04024274 +n04026053 +n04026918 +n04027023 +n04027706 +n04028315 +n04029734 +n04030274 +n04036303 +n04037964 +n04038440 +n04038727 +n04039848 +n04042358 +n04042632 +n04042795 +n04042985 +n04043733 +n04044307 +n04044498 +n04045085 +n04045397 +n04046590 +n04046974 +n04047401 +n04049405 +n04050066 +n04051549 +n04051825 +n04052757 +n04056932 +n04057047 +n04057435 +n04057846 +n04057981 +n04058096 +n04058239 +n04059947 +n04060647 +n04060904 +n04061793 +n04061969 +n04062644 +n04063373 +n04063868 +n04064401 +n04065464 +n04065789 +n04067231 +n04067353 +n04067921 +n04068441 +n04068601 +n04069276 +n04069777 +n04070207 +n04070964 +n04071102 +n04071263 +n04071393 +n04072193 +n04072551 +n04073948 +n04075468 +n04075916 +n04076284 +n04077430 +n04077734 +n04078574 +n04079106 +n04079244 +n04079933 +n04080454 +n04080833 +n04081844 +n04083649 +n04086794 +n04087126 +n04087709 +n04088696 +n04088797 +n04089666 +n04089976 +n04090548 +n04091097 +n04093625 +n04095210 +n04096066 +n04097622 +n04097866 +n04099175 +n04099429 +n04100174 +n04101497 +n04101701 +n04102037 +n04102285 +n04102406 +n04102962 +n04104147 +n04104500 +n04105068 +n04105438 +n04105893 +n04107984 +n04108268 +n04110068 +n04110654 +n04110955 +n04111190 +n04111414 +n04111668 +n04113765 +n04114996 +n04115256 +n04115996 +n04116389 +n04118021 +n04121228 +n04122349 +n04122492 +n04122825 +n04123123 +n04123567 +n04123740 +n04125116 +n04125853 +n04126541 +n04126659 +n04126980 +n04127904 +n04128499 +n04128837 +n04131929 +n04134632 +n04136510 +n04137444 +n04137897 +n04138977 +n04139859 +n04140064 +n04140631 +n04141838 +n04143897 +n04146050 +n04147495 +n04148054 +n04149083 +n04151108 +n04151581 +n04151940 +n04152387 +n04154753 +n04156297 +n04156411 +n04157320 +n04158807 +n04158956 +n04160372 +n04160586 +n04161358 +n04161981 +n04164757 +n04164868 +n04166111 +n04167489 +n04169437 +n04170037 +n04171459 +n04171629 +n04171831 +n04174101 +n04174500 +n04176068 +n04176190 +n04176528 +n04177329 +n04177545 +n04180063 +n04180888 +n04181228 +n04181718 +n04182322 +n04183217 +n04183329 +n04184435 +n04184600 +n04185071 +n04186051 +n04186268 +n04186455 +n04186624 +n04186848 +n04187061 +n04187547 +n04187885 +n04189092 +n04190052 +n04190464 +n04190747 +n04190997 +n04191150 +n04191595 +n04191943 +n04192238 +n04192858 +n04194289 +n04196080 +n04197391 +n04198015 +n04198797 +n04199027 +n04201733 +n04202417 +n04205318 +n04206356 +n04207763 +n04210390 +n04211356 +n04211970 +n04215910 +n04216634 +n04216860 +n04216963 +n04217718 +n04217882 +n04219424 +n04221823 +n04222210 +n04222470 +n04222847 +n04225031 +n04225222 +n04225729 +n04226464 +n04226537 +n04227900 +n04229007 +n04229107 +n04229480 +n04230603 +n04230808 +n04231693 +n04232153 +n04233832 +n04234455 +n04235291 +n04235771 +n04236001 +n04236377 +n04236702 +n04238617 +n04241042 +n04241394 +n04242408 +n04243003 +n04243941 +n04244997 +n04245847 +n04246855 +n04247630 +n04247736 +n04248507 +n04249415 +n04250224 +n04250599 +n04253931 +n04255499 +n04256520 +n04260589 +n04261116 +n04262678 +n04263336 +n04263760 +n04264233 +n04264914 +n04266486 +n04267577 +n04269944 +n04270891 +n04271148 +n04272054 +n04272782 +n04273064 +n04273796 +n04275283 +n04275661 +n04275904 +n04278353 +n04279172 +n04279987 +n04280259 +n04280970 +n04283585 +n04283905 +n04284002 +n04285146 +n04285622 +n04285803 +n04286128 +n04288272 +n04288533 +n04288673 +n04289449 +n04291242 +n04291759 +n04292414 +n04292572 +n04293119 +n04293744 +n04294212 +n04294426 +n04295081 +n04295881 +n04299215 +n04300358 +n04301000 +n04301474 +n04303258 +n04304375 +n04305471 +n04306080 +n04306847 +n04307419 +n04307878 +n04308084 +n04308273 +n04308397 +n04308583 +n04308807 +n04309348 +n04309833 +n04310721 +n04311595 +n04312154 +n04312432 +n04313220 +n04314914 +n04315828 +n04315948 +n04317420 +n04318131 +n04318982 +n04319937 +n04320405 +n04322026 +n04322692 +n04322801 +n04323819 +n04326799 +n04326896 +n04328054 +n04328329 +n04328946 +n04329477 +n04330340 +n04330669 +n04330998 +n04331277 +n04332987 +n04333129 +n04338517 +n04339638 +n04340750 +n04340935 +n04341133 +n04341414 +n04341686 +n04346679 +n04347519 +n04348184 +n04348359 +n04349401 +n04350104 +n04350458 +n04354589 +n04356595 +n04358707 +n04358874 +n04359335 +n04359589 +n04360501 +n04360798 +n04361095 +n04361260 +n04362821 +n04363210 +n04363874 +n04364545 +n04364827 +n04364994 +n04365328 +n04365484 +n04365751 +n04368695 +n04370048 +n04371563 +n04373894 +n04375775 +n04377057 +n04378956 +n04379243 +n04379964 +n04380346 +n04381994 +n04382334 +n04382880 +n04383130 +n04383301 +n04386664 +n04387201 +n04387400 +n04388162 +n04388743 +n04389521 +n04390873 +n04391838 +n04392526 +n04393095 +n04394261 +n04395875 +n04397168 +n04397261 +n04397645 +n04398497 +n04398688 +n04398834 +n04399046 +n04400289 +n04401088 +n04402057 +n04402580 +n04402746 +n04402984 +n04403638 +n04404817 +n04404997 +n04405540 +n04405762 +n04407435 +n04407686 +n04409128 +n04409806 +n04410086 +n04410365 +n04410485 +n04411264 +n04411966 +n04413151 +n04413419 +n04415663 +n04416901 +n04417180 +n04417361 +n04417809 +n04419073 +n04421872 +n04422875 +n04427715 +n04428008 +n04431436 +n04431745 +n04434932 +n04435180 +n04436185 +n04436401 +n04436542 +n04437670 +n04437953 +n04438304 +n04438643 +n04440963 +n04441662 +n04444749 +n04445040 +n04445952 +n04446276 +n04447276 +n04447443 +n04448070 +n04448361 +n04450243 +n04450640 +n04450749 +n04451818 +n04452615 +n04452848 +n04453156 +n04453666 +n04453910 +n04454654 +n04455250 +n04455652 +n04456472 +n04457326 +n04458843 +n04459362 +n04459610 +n04460130 +n04462011 +n04463679 +n04464852 +n04467099 +n04467307 +n04468005 +n04469251 +n04470741 +n04471315 +n04471632 +n04472243 +n04472726 +n04473884 +n04474466 +n04475411 +n04475631 +n04477548 +n04478512 +n04478657 +n04480527 +n04481524 +n04487724 +n04488427 +n04489008 +n04489817 +n04490091 +n04491769 +n04493109 +n04494204 +n04495450 +n04497442 +n04497570 +n04498523 +n04499446 +n04499554 +n04500060 +n04501837 +n04502197 +n04502502 +n04502670 +n04502851 +n04504141 +n04504770 +n04505036 +n04506994 +n04507453 +n04508163 +n04508489 +n04508949 +n04509260 +n04509592 +n04511002 +n04514241 +n04516116 +n04516214 +n04516672 +n04518132 +n04519153 +n04520170 +n04520382 +n04521987 +n04524313 +n04527648 +n04529681 +n04530566 +n04531098 +n04531873 +n04533042 +n04533199 +n04533700 +n04534127 +n04534895 +n04536153 +n04538552 +n04539203 +n04540761 +n04541320 +n04543158 +n04544450 +n04546194 +n04546855 +n04547592 +n04549122 +n04549919 +n04551055 +n04552696 +n04553389 +n04554871 +n04555600 +n04555897 +n04556948 +n04557308 +n04557751 +n04558059 +n04558804 +n04559023 +n04559730 +n04562262 +n04563204 +n04565375 +n04566257 +n04567098 +n04568069 +n04568557 +n04569520 +n04569822 +n04570958 +n04571292 +n04571566 +n04571958 +n04572935 +n04574471 +n04574999 +n04576002 +n04576211 +n04576971 +n04577426 +n04577769 +n04578801 +n04579230 +n04580493 +n04581595 +n04582349 +n04583620 +n04585745 +n04585980 +n04586932 +n04587648 +n04588739 +n04589190 +n04589434 +n04591056 +n04591887 +n04592005 +n04592099 +n04594218 +n04594489 +n04595285 +n04595855 +n04596852 +n04597066 +n04597804 +n04598136 +n04598582 +n04599124 +n04600312 +n04600486 +n04600912 +n04603729 +n04603872 +n04605726 +n04606574 +n04608329 +n04608567 +n04609531 +n04609651 +n04610176 +n04610503 +n04610676 +n04611916 +n04613015 +n04615226 +n04615644 +n04950713 +n04951373 +n04958634 +n04959672 +n04960277 +n04961691 +n04963740 +n04965179 +n04965661 +n04967191 +n04968895 +n04970059 +n04970631 +n04970916 +n04972801 +n04973386 +n04976952 +n05238282 +n05241218 +n05242070 +n05244934 +n05266879 +n05399034 +n05447757 +n05449959 +n05453657 +n05467758 +n05586759 +n06254669 +n06262567 +n06263369 +n06263609 +n06263762 +n06266417 +n06266710 +n06267145 +n06271778 +n06272290 +n06272803 +n06274092 +n06275353 +n06276697 +n06277280 +n06281040 +n06359467 +n06359657 +n06418693 +n06591815 +n06592078 +n06595351 +n06613686 +n06793231 +n07556637 +n07556970 +n07557165 +n07557434 +n07560652 +n07561112 +n07562495 +n07563800 +n07564629 +n07564971 +n07565725 +n07565945 +n07566340 +n07566863 +n07567390 +n07567707 +n07568818 +n07569106 +n07569543 +n07570720 +n07572353 +n07572957 +n07573103 +n07573696 +n07574602 +n07575076 +n07575726 +n07575984 +n07576182 +n07576438 +n07576577 +n07577374 +n07579575 +n07580053 +n07580359 +n07580470 +n07581346 +n07581775 +n07582277 +n07582441 +n07582609 +n07583197 +n07584228 +n07584593 +n07585208 +n07587441 +n07587700 +n07588947 +n07590320 +n07591473 +n07592094 +n07592656 +n07593774 +n07595914 +n07596046 +n07596452 +n07596684 +n07597145 +n07597365 +n07598734 +n07599468 +n07599783 +n07599998 +n07600506 +n07601407 +n07605474 +n07605944 +n07606278 +n07606764 +n07607707 +n07609407 +n07609840 +n07611148 +n07611358 +n07611839 +n07611991 +n07612367 +n07612632 +n07612996 +n07613671 +n07614198 +n07614825 +n07615052 +n07615190 +n07615460 +n07615569 +n07615671 +n07616590 +n07617188 +n07619004 +n07623136 +n07624466 +n07627931 +n07628068 +n07641928 +n07642471 +n07642933 +n07643306 +n07643474 +n07643764 +n07643981 +n07644244 +n07663899 +n07678729 +n07679356 +n07680517 +n07680932 +n07681926 +n07682316 +n07682624 +n07683786 +n07684600 +n07685730 +n07686873 +n07687211 +n07687469 +n07687789 +n07689003 +n07690273 +n07690892 +n07692405 +n07692614 +n07693889 +n07693972 +n07694403 +n07695878 +n07695965 +n07697100 +n07704054 +n07705931 +n07707451 +n07708512 +n07708798 +n07709333 +n07710007 +n07710283 +n07710616 +n07710952 +n07712063 +n07712382 +n07712748 +n07712856 +n07713395 +n07713895 +n07714078 +n07714802 +n07714895 +n07715561 +n07715721 +n07716034 +n07717070 +n07717858 +n07718671 +n07719437 +n07719839 +n07720442 +n07720615 +n07721325 +n07721456 +n07721678 +n07722217 +n07722763 +n07723330 +n07723559 +n07723753 +n07724943 +n07725376 +n07725531 +n07726796 +n07727578 +n07727868 +n07728804 +n07729000 +n07729485 +n07730406 +n07730855 +n07731122 +n07731587 +n07731952 +n07732302 +n07732747 +n07734017 +n07734292 +n07735052 +n07735803 +n07737081 +n07739125 +n07739506 +n07740220 +n07740954 +n07741461 +n07742012 +n07742704 +n07744246 +n07747055 +n07747811 +n07747951 +n07748753 +n07748912 +n07749095 +n07749192 +n07749312 +n07749731 +n07750586 +n07751451 +n07752377 +n07752664 +n07753743 +n07755089 +n07755411 +n07755707 +n07756096 +n07757132 +n07757312 +n07757602 +n07757990 +n07758680 +n07758950 +n07759424 +n07759691 +n07759816 +n07760501 +n07761141 +n07761309 +n07761611 +n07761777 +n07761954 +n07767344 +n07767847 +n07770571 +n07771212 +n07800091 +n07800740 +n07801508 +n07802152 +n07802417 +n07803093 +n07803545 +n07804323 +n07805254 +n07805594 +n07805731 +n07806221 +n07806633 +n07807317 +n07807710 +n07807922 +n07809096 +n07809368 +n07810907 +n07811416 +n07812184 +n07814203 +n07815588 +n07818277 +n07819480 +n07820497 +n07820814 +n07823951 +n07824702 +n07824988 +n07825717 +n07828987 +n07829412 +n07830593 +n07832902 +n07834507 +n07836731 +n07837002 +n07837362 +n07838233 +n07841495 +n07841639 +n07841907 +n07842753 +n07842972 +n07843464 +n07843775 +n07844042 +n07844604 +n07846143 +n07847198 +n07848338 +n07848771 +n07849336 +n07850083 +n07850329 +n07851298 +n07852045 +n07852919 +n07854813 +n07856270 +n07857959 +n07858595 +n07859284 +n07859583 +n07860805 +n07861158 +n07861813 +n07863374 +n07864638 +n07865105 +n07867421 +n07867883 +n07869391 +n07869775 +n07870313 +n07871436 +n07873464 +n07874063 +n07874159 +n07874259 +n07874343 +n07874441 +n07874780 +n07875693 +n07875835 +n07876281 +n07880751 +n07881117 +n07881205 +n07881404 +n07881800 +n07882497 +n07882886 +n07883031 +n07883251 +n07883384 +n07884567 +n07886572 +n07886849 +n07887634 +n07888465 +n07888909 +n07889510 +n07890352 +n07890750 +n07891726 +n07892813 +n07893528 +n07893891 +n07894102 +n07894298 +n07894965 +n07895237 +n07895435 +n07895595 +n07895710 +n07895839 +n07896287 +n07897200 +n07897865 +n07898117 +n07898333 +n07898745 +n07899108 +n07900406 +n07900616 +n07901587 +n07903208 +n07904395 +n07905038 +n07906284 +n07906877 +n07907161 +n07907548 +n07907943 +n07909129 +n07909811 +n07911371 +n07911677 +n07912211 +n07913393 +n07914413 +n07915618 +n07916041 +n07917618 +n07918028 +n07920222 +n07921455 +n07921948 +n07923748 +n07924033 +n07924560 +n07924834 +n07925966 +n07926920 +n07927197 +n07927931 +n07929519 +n07930554 +n07931001 +n07931096 +n07932614 +n07932841 +n07933274 +n07933891 +n07934032 +n07934530 +n07935152 +n07935504 +n07936263 +n07936745 +n07938149 +n07951464 +n08554440 +n08558963 +n08596076 +n08598301 +n08616050 +n08640531 +n08659446 +n09191635 +n09206896 +n09206985 +n09210862 +n09213434 +n09213565 +n09214060 +n09214916 +n09215437 +n09217230 +n09230041 +n09233446 +n09238926 +n09255070 +n09259025 +n09259219 +n09262690 +n09265620 +n09269882 +n09270735 +n09287968 +n09289331 +n09289596 +n09290444 +n09295946 +n09300306 +n09302616 +n09303008 +n09303528 +n09304750 +n09305031 +n09308572 +n09309292 +n09315159 +n09326662 +n09335693 +n09335809 +n09336555 +n09337253 +n09344198 +n09352849 +n09359803 +n09362945 +n09366017 +n09366317 +n09375606 +n09376526 +n09381242 +n09393605 +n09396465 +n09398677 +n09405787 +n09406793 +n09409512 +n09409752 +n09410224 +n09416076 +n09421799 +n09428628 +n09432990 +n09433442 +n09437454 +n09439213 +n09443641 +n09453008 +n09458269 +n09472413 +n09474010 +n09505153 +n09606009 +n09606527 +n09608709 +n09610405 +n09613191 +n09615336 +n09616922 +n09619168 +n09619452 +n09620078 +n09620794 +n09622049 +n09622302 +n09624168 +n09624559 +n09625401 +n09626238 +n09627906 +n09629246 +n09629752 +n09631129 +n09632274 +n09632518 +n09633969 +n09636339 +n09638875 +n09639919 +n09641002 +n09644152 +n09648743 +n09651123 +n09665545 +n09669631 +n09670280 +n09676884 +n09679925 +n09690208 +n09694771 +n09696585 +n09697401 +n09700964 +n09701148 +n09701833 +n09705124 +n09708750 +n09710164 +n09716047 +n09718217 +n09722658 +n09724785 +n09725229 +n09725772 +n09726621 +n09727440 +n09727826 +n09730204 +n09731436 +n09731571 +n09735258 +n09738400 +n09744679 +n09754217 +n09758173 +n09758885 +n09761068 +n09763784 +n09764201 +n09764598 +n09765278 +n09767197 +n09769076 +n09770179 +n09771435 +n09772746 +n09773962 +n09774783 +n09790482 +n09792555 +n09795124 +n09795334 +n09800964 +n09802445 +n09802641 +n09805151 +n09805475 +n09809538 +n09809749 +n09810166 +n09811712 +n09814660 +n09815790 +n09816771 +n09818022 +n09820263 +n09821831 +n09823502 +n09824135 +n09824609 +n09826204 +n09830194 +n09831962 +n09834699 +n09836160 +n09840217 +n09841188 +n09841515 +n09841696 +n09842047 +n09848489 +n09851575 +n09853645 +n09853881 +n09854915 +n09857007 +n09861946 +n09865398 +n09868270 +n09871681 +n09877951 +n09889691 +n09892693 +n09894654 +n09895222 +n09895701 +n09902353 +n09903153 +n09910374 +n09917593 +n09918248 +n09923418 +n09923673 +n09924996 +n09927089 +n09927451 +n09928136 +n09928451 +n09929298 +n09930257 +n09930876 +n09931640 +n09933098 +n09935434 +n09936892 +n09937056 +n09941964 +n09942970 +n09943239 +n09943811 +n09944160 +n09945319 +n09950457 +n09951070 +n09951274 +n09960688 +n09962966 +n09964411 +n09968845 +n09974648 +n09976728 +n09979321 +n09983572 +n09989502 +n09990415 +n09991867 +n09992538 +n09992837 +n09993252 +n09994673 +n09996481 +n09997622 +n10001217 +n10006748 +n10007684 +n10009484 +n10009671 +n10015215 +n10015897 +n10017422 +n10018861 +n10020890 +n10024362 +n10029068 +n10034201 +n10034614 +n10035952 +n10036266 +n10036929 +n10037385 +n10040945 +n10041887 +n10042690 +n10043643 +n10044879 +n10047459 +n10048367 +n10048836 +n10052694 +n10053808 +n10054657 +n10055730 +n10055847 +n10060175 +n10067968 +n10070711 +n10077593 +n10078131 +n10078806 +n10079399 +n10079893 +n10080869 +n10083823 +n10084043 +n10084295 +n10086383 +n10091651 +n10092488 +n10093475 +n10094584 +n10095869 +n10098710 +n10098862 +n10099375 +n10101634 +n10102800 +n10105085 +n10107303 +n10109662 +n10111903 +n10112129 +n10118844 +n10126177 +n10126424 +n10126708 +n10127689 +n10129825 +n10134396 +n10134982 +n10136959 +n10142747 +n10142946 +n10143172 +n10143725 +n10145340 +n10145774 +n10148305 +n10150071 +n10150940 +n10151570 +n10153594 +n10154186 +n10154601 +n10155849 +n10162194 +n10164233 +n10165448 +n10168183 +n10168584 +n10171567 +n10182190 +n10185793 +n10186774 +n10187130 +n10195593 +n10200781 +n10202624 +n10205457 +n10206173 +n10207169 +n10210137 +n10215623 +n10216106 +n10224578 +n10225219 +n10228278 +n10235385 +n10237069 +n10241300 +n10243664 +n10245639 +n10249270 +n10249459 +n10249950 +n10257221 +n10259348 +n10263411 +n10266328 +n10266848 +n10271677 +n10273064 +n10274815 +n10276045 +n10282672 +n10284064 +n10284965 +n10296444 +n10299250 +n10299700 +n10305635 +n10305802 +n10306004 +n10308732 +n10312287 +n10314054 +n10315561 +n10316360 +n10317007 +n10317500 +n10320863 +n10321340 +n10322238 +n10323999 +n10324560 +n10328437 +n10332385 +n10335246 +n10335931 +n10340312 +n10341573 +n10343554 +n10345100 +n10353016 +n10353355 +n10355142 +n10355449 +n10355688 +n10356450 +n10357613 +n10360747 +n10366966 +n10369528 +n10370381 +n10376523 +n10377021 +n10379376 +n10380672 +n10383816 +n10386984 +n10387196 +n10387324 +n10393909 +n10396106 +n10399130 +n10400998 +n10402824 +n10403876 +n10405694 +n10407954 +n10409752 +n10411551 +n10415037 +n10417551 +n10418101 +n10419047 +n10420031 +n10421016 +n10426454 +n10427764 +n10428004 +n10433737 +n10435716 +n10435988 +n10438172 +n10439851 +n10444194 +n10450303 +n10462860 +n10464052 +n10466918 +n10467179 +n10470779 +n10474064 +n10474645 +n10478960 +n10481268 +n10482054 +n10482921 +n10484858 +n10488309 +n10495421 +n10499355 +n10499857 +n10506544 +n10508710 +n10512372 +n10512708 +n10519494 +n10521100 +n10521662 +n10522035 +n10522324 +n10522759 +n10523341 +n10525134 +n10525436 +n10525617 +n10527334 +n10529231 +n10541833 +n10542888 +n10543161 +n10544232 +n10544748 +n10546633 +n10548537 +n10548681 +n10554846 +n10556518 +n10557854 +n10559288 +n10560637 +n10568200 +n10570019 +n10575787 +n10576962 +n10577284 +n10580535 +n10582746 +n10583387 +n10594147 +n10595164 +n10595647 +n10599806 +n10602985 +n10604634 +n10605253 +n10610465 +n10612210 +n10614629 +n10617193 +n10618685 +n10618848 +n10619642 +n10620758 +n10622053 +n10624074 +n10624310 +n10625860 +n10628644 +n10630188 +n10632576 +n10633450 +n10648237 +n10648696 +n10654932 +n10657835 +n10661002 +n10661563 +n10665698 +n10669991 +n10674130 +n10676018 +n10679174 +n10682953 +n10686073 +n10692883 +n10693824 +n10694258 +n10698368 +n10700201 +n10700640 +n10701180 +n10703336 +n10703692 +n10705615 +n10707233 +n10708454 +n10709529 +n10713686 +n10720453 +n10721321 +n10722575 +n10722965 +n10726786 +n10735298 +n10740868 +n10741152 +n10742997 +n10744164 +n10747119 +n10751265 +n10752480 +n10759151 +n10759982 +n10763383 +n10763620 +n10765679 +n10766260 +n10768903 +n10779610 +n10780632 +n10782791 +n10782940 +n10787470 +n10791221 +n10792335 +n10793570 +n10794014 +n11531193 +n11537327 +n11542640 +n11545524 +n11545714 +n11547562 +n11547855 +n11552386 +n11553240 +n11596108 +n11598686 +n11600372 +n11601177 +n11601918 +n11608250 +n11609475 +n11609684 +n11612923 +n11614250 +n11618861 +n11620673 +n11621029 +n11623105 +n11624531 +n11627168 +n11628456 +n11630017 +n11630489 +n11643835 +n11645914 +n11647306 +n11649878 +n11650558 +n11650759 +n11661372 +n11665372 +n11666854 +n11669921 +n11672400 +n11674332 +n11676500 +n11684264 +n11689483 +n11693981 +n11697560 +n11700864 +n11703669 +n11708658 +n11709674 +n11713164 +n11720353 +n11722982 +n11723770 +n11725015 +n11725623 +n11727091 +n11729478 +n11733054 +n11736694 +n11741350 +n11745817 +n11747468 +n11748002 +n11751765 +n11752578 +n11756092 +n11756669 +n11759224 +n11763625 +n11767354 +n11769621 +n11771539 +n11774513 +n11775340 +n11779300 +n11782036 +n11783920 +n11785668 +n11789438 +n11789962 +n11790788 +n11793779 +n11794519 +n11796005 +n11801392 +n11805956 +n11807108 +n11807979 +n11808721 +n11811473 +n11815491 +n11817914 +n11820965 +n11823043 +n11830714 +n11830906 +n11832214 +n11836722 +n11839568 +n11845557 +n11851578 +n11855274 +n11857696 +n11862835 +n11865071 +n11866248 +n11868814 +n11869351 +n11869689 +n11872146 +n11875691 +n11875938 +n11877473 +n11878283 +n11887119 +n11890022 +n11892637 +n11894327 +n11898639 +n11900569 +n11902709 +n11915214 +n11915658 +n11915899 +n11916467 +n11918286 +n11919447 +n11920498 +n11924445 +n11928352 +n11928858 +n11931918 +n11932745 +n11939699 +n11940006 +n11943407 +n11944196 +n11945367 +n11946727 +n11947251 +n11948264 +n11950345 +n11951511 +n11952346 +n11953884 +n11954484 +n11956850 +n11965627 +n11967744 +n11970101 +n11971248 +n11971783 +n11972759 +n11973341 +n11976170 +n11977303 +n11978233 +n11982115 +n11985053 +n11985739 +n11988893 +n11991263 +n11997032 +n11997969 +n12006766 +n12008252 +n12008749 +n12010628 +n12013511 +n12015959 +n12018760 +n12020507 +n12024176 +n12030654 +n12034141 +n12036067 +n12036939 +n12041446 +n12043444 +n12045860 +n12050959 +n12053405 +n12056217 +n12057447 +n12062468 +n12065316 +n12065777 +n12075151 +n12076577 +n12080395 +n12083591 +n12086012 +n12086539 +n12087961 +n12090890 +n12092262 +n12094244 +n12095020 +n12096395 +n12101870 +n12102133 +n12105125 +n12107970 +n12108432 +n12109827 +n12110778 +n12112008 +n12112918 +n12113657 +n12117017 +n12119099 +n12119238 +n12121033 +n12124627 +n12126360 +n12131550 +n12135898 +n12136720 +n12137120 +n12137569 +n12139575 +n12141495 +n12142085 +n12143676 +n12144313 +n12146311 +n12147226 +n12152532 +n12153580 +n12154773 +n12155583 +n12156819 +n12157056 +n12157769 +n12158031 +n12158798 +n12159055 +n12159555 +n12160490 +n12161285 +n12163035 +n12164363 +n12166424 +n12168565 +n12170585 +n12173664 +n12174311 +n12174926 +n12182049 +n12187663 +n12188289 +n12195391 +n12196129 +n12199266 +n12201580 +n12202936 +n12205694 +n12214789 +n12215579 +n12217453 +n12221191 +n12224978 +n12225349 +n12226932 +n12231192 +n12236546 +n12237486 +n12244153 +n12245695 +n12246232 +n12252168 +n12252866 +n12253229 +n12256112 +n12257570 +n12260799 +n12262553 +n12265394 +n12266217 +n12266796 +n12268246 +n12269241 +n12269652 +n12271643 +n12274630 +n12275489 +n12281241 +n12284262 +n12286826 +n12287642 +n12288823 +n12290748 +n12293723 +n12296432 +n12300840 +n12302071 +n12303462 +n12305475 +n12306717 +n12307756 +n12310349 +n12316444 +n12318378 +n12320010 +n12322501 +n12328398 +n12330469 +n12334293 +n12334891 +n12335483 +n12335664 +n12335800 +n12340383 +n12341542 +n12342299 +n12343480 +n12344283 +n12346578 +n12350758 +n12352287 +n12355760 +n12360108 +n12360684 +n12364604 +n12367611 +n12374418 +n12377198 +n12381511 +n12385429 +n12387633 +n12387839 +n12392070 +n12396924 +n12399132 +n12401335 +n12401684 +n12405714 +n12409231 +n12411461 +n12412355 +n12412606 +n12416423 +n12419037 +n12420535 +n12421467 +n12421683 +n12425281 +n12430198 +n12431434 +n12437513 +n12437769 +n12441958 +n12446200 +n12446519 +n12449296 +n12450344 +n12451915 +n12454159 +n12459629 +n12460697 +n12461466 +n12462032 +n12463743 +n12464476 +n12466727 +n12470092 +n12474167 +n12475035 +n12476510 +n12480895 +n12491826 +n12495146 +n12499163 +n12506181 +n12508309 +n12509476 +n12511856 +n12516584 +n12522188 +n12524188 +n12526516 +n12527738 +n12539074 +n12539306 +n12546183 +n12548280 +n12550210 +n12554526 +n12556656 +n12560282 +n12560775 +n12562577 +n12572546 +n12573256 +n12575322 +n12582231 +n12582665 +n12582846 +n12583126 +n12583401 +n12584191 +n12586298 +n12590232 +n12594989 +n12595964 +n12602262 +n12602980 +n12612170 +n12614477 +n12615710 +n12620196 +n12622875 +n12624381 +n12625383 +n12631331 +n12633638 +n12634211 +n12634429 +n12635744 +n12636885 +n12638218 +n12638556 +n12639736 +n12640607 +n12641413 +n12641931 +n12642200 +n12643473 +n12644902 +n12645174 +n12647376 +n12649065 +n12650556 +n12651821 +n12653218 +n12655869 +n12658118 +n12658846 +n12659356 +n12660601 +n12662772 +n12663804 +n12665048 +n12667406 +n12667964 +n12674120 +n12674685 +n12682411 +n12683407 +n12685431 +n12685831 +n12688716 +n12690653 +n12695144 +n12698435 +n12705013 +n12707781 +n12708293 +n12709901 +n12711596 +n12713063 +n12714755 +n12715914 +n12717072 +n12719684 +n12724942 +n12725521 +n12727301 +n12731401 +n12732491 +n12732756 +n12733647 +n12741222 +n12742741 +n12743823 +n12746884 +n12749049 +n12752205 +n12755225 +n12756457 +n12762896 +n12768369 +n12771192 +n12772753 +n12777436 +n12778605 +n12779603 +n12785724 +n12791064 +n12793015 +n12794985 +n12798284 +n12800586 +n12801520 +n12805146 +n12806732 +n12810595 +n12812235 +n12814643 +n12817464 +n12822769 +n12823717 +n12823859 +n12832315 +n12833985 +n12834798 +n12836212 +n12836862 +n12839979 +n12840749 +n12842302 +n12842887 +n12844939 +n12849061 +n12853080 +n12854048 +n12858150 +n12866968 +n12869478 +n12870535 +n12871272 +n12877244 +n12878169 +n12879963 +n12882779 +n12884260 +n12890265 +n12893463 +n12903367 +n12904938 +n12908645 +n12909421 +n12912670 +n12917901 +n12922763 +n12926480 +n12928071 +n12929403 +n12930778 +n12931906 +n12934036 +n12934479 +n12939104 +n12941536 +n12942395 +n12943443 +n12946849 +n12950126 +n12952165 +n12953206 +n12956170 +n12957608 +n12960378 +n12960863 +n12965626 +n12968136 +n12969131 +n12970193 +n12971400 +n12973791 +n12974987 +n12976198 +n12980840 +n12982468 +n12983961 +n12985773 +n12987056 +n12988158 +n12992868 +n12997654 +n12997919 +n13000891 +n13001041 +n13001206 +n13001366 +n13001529 +n13002750 +n13002925 +n13003061 +n13003254 +n13003522 +n13003712 +n13004423 +n13004640 +n13004826 +n13004992 +n13005329 +n13005984 +n13006171 +n13006631 +n13006894 +n13007417 +n13007629 +n13008157 +n13008315 +n13008485 +n13008689 +n13008839 +n13009085 +n13009244 +n13009429 +n13009656 +n13010694 +n13010951 +n13011221 +n13012253 +n13012469 +n13012973 +n13013534 +n13013764 +n13013965 +n13014097 +n13014265 +n13014409 +n13014581 +n13014741 +n13014879 +n13017102 +n13017240 +n13017439 +n13017610 +n13017789 +n13017979 +n13018088 +n13018232 +n13018407 +n13019496 +n13019643 +n13019835 +n13020191 +n13020481 +n13020964 +n13021166 +n13021332 +n13021543 +n13021689 +n13021867 +n13022210 +n13022709 +n13024012 +n13024500 +n13025647 +n13028611 +n13032115 +n13032923 +n13035241 +n13035389 +n13035707 +n13037585 +n13037805 +n13038068 +n13038376 +n13038744 +n13039349 +n13040629 +n13040796 +n13041312 +n13042982 +n13043926 +n13045210 +n13045975 +n13046130 +n13049953 +n13055423 +n13055577 +n13055792 +n13055949 +n13056135 +n13056349 +n13056607 +n13056799 +n13057054 +n13057242 +n13057422 +n13057639 +n13058037 +n13058272 +n13058608 +n13059298 +n13059657 +n13060017 +n13060190 +n13063269 +n13066129 +n13067191 +n13068917 +n13070308 +n13070875 +n13071371 +n13071553 +n13071815 +n13072031 +n13072209 +n13072350 +n13072528 +n13072706 +n13072863 +n13073055 +n13073703 +n13074619 +n13074814 +n13075020 +n13075272 +n13075441 +n13075684 +n13075847 +n13076041 +n13076405 +n13076643 +n13076831 +n13077033 +n13077295 +n13079419 +n13083023 +n13084184 +n13085113 +n13091620 +n13091774 +n13100156 +n13100677 +n13104059 +n13108131 +n13108662 +n13108841 +n13109733 +n13110915 +n13111174 +n13111881 +n13118707 +n13119870 +n13120211 +n13121104 +n13122364 +n13123431 +n13125117 +n13130161 +n13130726 +n13132034 +n13132338 +n13132486 +n13132940 +n13134302 +n13134947 +n13135832 +n13136316 +n13136556 +n13137409 +n13137672 +n13138308 +n13138658 +n13138842 +n13139055 +n13139647 +n13141141 +n13145444 +n13149296 +n13150894 +n13154841 +n13156986 +n13157137 +n13160831 +n13163991 +n13172923 +n13174670 +n13177529 +n13180534 +n13186388 +n13188096 +n13188268 +n13192625 +n13193642 +n13194572 +n13195761 +n13199970 +n13201969 +n13206817 +n13207736 +n13208705 +n13211790 +n13219422 +n13221529 +n13224673 +n13230662 +n13231678 +n13231919 +n13232106 +n13232363 +n13232779 +n13233727 +n13238375 +n13238988 +n13252672 +n13862780 +n13863186 +n13863473 +n13863771 +n13864153 +n13864965 +n13865298 +n13865483 +n13866144 +n13866827 +n13867492 +n13868248 +n13868371 +n13872592 +n13873502 +n13875392 +n13875571 +n13878306 +n13879320 +n13883603 +n13888491 +n13893786 +n13894434 +n13896100 +n13897996 +n13900287 +n13903079 +n13905121 +n13905275 +n13905792 +n13912260 +n13915999 +n14633206 +n14696793 +n14844693 +n14853210 +n14899328 +n14900184 +n14974264 +n14977504 +n14992287 +n15062057 +n15067877 +n15089258 +n15089472 +n15089645 +n15089803 +n15090742 +n15092409 +n15092751 \ No newline at end of file diff --git a/workloads/realworld/standard/darknet/cfg/imagenet.shortnames.list b/workloads/realworld/standard/darknet/cfg/imagenet.shortnames.list new file mode 100644 index 0000000000000000000000000000000000000000..e7a18d44b543086958eaf60e6dc0b7deb0df9400 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/imagenet.shortnames.list @@ -0,0 +1,21842 @@ +kit fox +English setter +Siberian husky +Australian terrier +English springer +grey whale +lesser panda +Egyptian cat +ibex +Persian cat +cougar +gazelle +porcupine +sea lion +malamute +badger +Great Dane +Walker hound +Welsh springer spaniel +whippet +Scottish deerhound +killer whale +mink +African elephant +Weimaraner +soft-coated wheaten terrier +Dandie Dinmont +red wolf +Old English sheepdog +jaguar +otterhound +bloodhound +Airedale +hyena +meerkat +giant schnauzer +titi +three-toed sloth +sorrel +black-footed ferret +dalmatian +black-and-tan coonhound +papillon +skunk +Staffordshire bullterrier +Mexican hairless +Bouvier des Flandres +weasel +miniature poodle +Cardigan +malinois +bighorn +fox squirrel +colobus +tiger cat +Lhasa +impala +coyote +Yorkshire terrier +Newfoundland +brown bear +red fox +Norwegian elkhound +Rottweiler +hartebeest +Saluki +grey fox +schipperke +Pekinese +Brabancon griffon +West Highland white terrier +Sealyham terrier +guenon +mongoose +indri +tiger +Irish wolfhound +wild boar +EntleBucher +zebra +ram +French bulldog +orangutan +basenji +leopard +Bernese mountain dog +Maltese dog +Norfolk terrier +toy terrier +vizsla +cairn +squirrel monkey +groenendael +clumber +Siamese cat +chimpanzee +komondor +Afghan hound +Japanese spaniel +proboscis monkey +guinea pig +white wolf +ice bear +gorilla +borzoi +toy poodle +Kerry blue terrier +ox +Scotch terrier +Tibetan mastiff +spider monkey +Doberman +Boston bull +Greater Swiss Mountain dog +Appenzeller +Shih-Tzu +Irish water spaniel +Pomeranian +Bedlington terrier +warthog +Arabian camel +siamang +miniature schnauzer +collie +golden retriever +Irish terrier +affenpinscher +Border collie +hare +boxer +silky terrier +beagle +Leonberg +German short-haired pointer +patas +dhole +baboon +macaque +Chesapeake Bay retriever +bull mastiff +kuvasz +capuchin +pug +curly-coated retriever +Norwich terrier +flat-coated retriever +hog +keeshond +Eskimo dog +Brittany spaniel +standard poodle +Lakeland terrier +snow leopard +Gordon setter +dingo +standard schnauzer +hamster +Tibetan terrier +Arctic fox +wire-haired fox terrier +basset +water buffalo +American black bear +Angora +bison +howler monkey +hippopotamus +chow +giant panda +American Staffordshire terrier +Shetland sheepdog +Great Pyrenees +Chihuahua +tabby +marmoset +Labrador retriever +Saint Bernard +armadillo +Samoyed +bluetick +redbone +polecat +marmot +kelpie +gibbon +llama +miniature pinscher +wood rabbit +Italian greyhound +lion +cocker spaniel +Irish setter +dugong +Indian elephant +beaver +Sussex spaniel +Pembroke +Blenheim spaniel +Madagascar cat +Rhodesian ridgeback +lynx +African hunting dog +langur +Ibizan hound +timber wolf +cheetah +English foxhound +briard +sloth bear +Border terrier +German shepherd +otter +koala +tusker +echidna +wallaby +platypus +wombat +revolver +umbrella +schooner +soccer ball +accordion +ant +starfish +chambered nautilus +grand piano +laptop +strawberry +airliner +warplane +airship +balloon +space shuttle +fireboat +gondola +speedboat +lifeboat +canoe +yawl +catamaran +trimaran +container ship +liner +pirate +aircraft carrier +submarine +wreck +half track +tank +missile +bobsled +dogsled +bicycle-built-for-two +mountain bike +freight car +passenger car +barrow +shopping cart +motor scooter +forklift +electric locomotive +steam locomotive +amphibian +ambulance +beach wagon +cab +convertible +jeep +limousine +minivan +Model T +racer +sports car +go-kart +golfcart +moped +snowplow +fire engine +garbage truck +pickup +tow truck +trailer truck +moving van +police van +recreational vehicle +streetcar +snowmobile +tractor +mobile home +tricycle +unicycle +horse cart +jinrikisha +oxcart +bassinet +cradle +crib +four-poster +bookcase +china cabinet +medicine chest +chiffonier +table lamp +file +park bench +barber chair +throne +folding chair +rocking chair +studio couch +toilet seat +desk +pool table +dining table +entertainment center +wardrobe +Granny Smith +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +acorn +hip +ear +rapeseed +corn +buckeye +organ +upright +chime +drum +gong +maraca +marimba +steel drum +banjo +cello +violin +harp +acoustic guitar +electric guitar +cornet +French horn +trombone +harmonica +ocarina +panpipe +bassoon +oboe +sax +flute +daisy +yellow lady's slipper +cliff +valley +alp +volcano +promontory +sandbar +coral reef +lakeside +seashore +geyser +hatchet +cleaver +letter opener +plane +power drill +lawn mower +hammer +corkscrew +can opener +plunger +screwdriver +shovel +plow +chain saw +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +white stork +black stork +spoonbill +flamingo +American egret +little blue heron +bittern +crane +limpkin +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +European gallinule +pelican +king penguin +albatross +great white shark +tiger shark +hammerhead +electric ray +stingray +barracouta +coho +tench +goldfish +eel +rock beauty +anemone fish +lionfish +puffer +sturgeon +gar +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +triceratops +African crocodile +American alligator +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +whistle +wing +paintbrush +hand blower +oxygen mask +snorkel +loudspeaker +microphone +screen +mouse +electric fan +oil filter +strainer +space heater +stove +guillotine +barometer +rule +odometer +scale +analog clock +digital clock +wall clock +hourglass +sundial +parking meter +stopwatch +digital watch +stethoscope +syringe +magnetic compass +binoculars +projector +sunglasses +loupe +radio telescope +bow +cannon +assault rifle +rifle +projectile +computer keyboard +typewriter keyboard +crane +lighter +abacus +cash machine +slide rule +desktop computer +hand-held computer +notebook +web site +harvester +thresher +printer +slot +vending machine +sewing machine +joystick +switch +hook +car wheel +paddlewheel +pinwheel +potter's wheel +gas pump +carousel +swing +reel +radiator +puck +hard disc +sunglass +pick +car mirror +solar dish +remote control +disk brake +buckle +hair slide +knot +combination lock +padlock +nail +safety pin +screw +muzzle +seat belt +ski +candle +jack-o'-lantern +spotlight +torch +neck brace +pier +tripod +maypole +mousetrap +spider web +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +isopod +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +sea urchin +sea cucumber +iron +espresso maker +microwave +Dutch oven +rotisserie +toaster +waffle iron +vacuum +dishwasher +refrigerator +washer +Crock Pot +frying pan +wok +caldron +coffeepot +teapot +spatula +altar +triumphal arch +patio +steel arch bridge +suspension bridge +viaduct +barn +greenhouse +palace +monastery +library +apiary +boathouse +church +mosque +stupa +planetarium +restaurant +cinema +home theater +lumbermill +coil +obelisk +totem pole +castle +prison +grocery store +bakery +barbershop +bookshop +butcher shop +confectionery +shoe shop +tobacco shop +toyshop +fountain +cliff dwelling +yurt +dock +brass +megalith +bannister +breakwater +dam +chainlink fence +picket fence +worm fence +stone wall +grille +sliding door +turnstile +mountain tent +scoreboard +honeycomb +plate rack +pedestal +beacon +mashed potato +bell pepper +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +cardoon +mushroom +shower curtain +jean +carton +handkerchief +sandal +ashcan +safe +plate +necklace +croquet ball +fur coat +thimble +pajama +running shoe +cocktail shaker +chest +manhole cover +modem +tub +tray +balance beam +bagel +prayer rug +kimono +hot pot +whiskey jug +knee pad +book jacket +spindle +ski mask +beer bottle +crash helmet +bottlecap +tile roof +mask +maillot +Petri dish +football helmet +bathing cap +teddy +holster +pop bottle +photocopier +vestment +crossword puzzle +golf ball +trifle +suit +water tower +feather boa +cloak +red wine +drumstick +shield +Christmas stocking +hoopskirt +menu +stage +bonnet +meat loaf +baseball +face powder +scabbard +sunscreen +beer glass +hen-of-the-woods +guacamole +lampshade +wool +hay +bow tie +mailbag +water jug +bucket +dishrag +soup bowl +eggnog +mortar +trench coat +paddle +chain +swab +mixing bowl +potpie +wine bottle +shoji +bulletproof vest +drilling platform +binder +cardigan +sweatshirt +pot +birdhouse +hamper +ping-pong ball +pencil box +pay-phone +consomme +apron +punching bag +backpack +groom +bearskin +pencil sharpener +broom +mosquito net +abaya +mortarboard +poncho +crutch +Polaroid camera +space bar +cup +racket +traffic light +quill +radio +dough +cuirass +military uniform +lipstick +shower cap +monitor +oscilloscope +mitten +brassiere +French loaf +vase +milk can +rugby ball +paper towel +earthstar +envelope +miniskirt +cowboy hat +trolleybus +perfume +bathtub +hotdog +coral fungus +bullet train +pillow +toilet tissue +cassette +carpenter's kit +ladle +stinkhorn +lotion +hair spray +academic gown +dome +crate +wig +burrito +pill bottle +chain mail +theater curtain +window shade +barrel +washbasin +ballpoint +basketball +bath towel +cowboy boot +gown +window screen +agaric +cellular telephone +nipple +barbell +mailbox +lab coat +fire screen +minibus +packet +maze +pole +horizontal bar +sombrero +pickelhaube +rain barrel +wallet +cassette player +comic book +piggy bank +street sign +bell cote +fountain pen +Windsor tie +volleyball +overskirt +sarong +purse +bolo tie +bib +parachute +sleeping bag +television +swimming trunks +measuring cup +espresso +pizza +breastplate +shopping basket +wooden spoon +saltshaker +chocolate sauce +ballplayer +goblet +gyromitra +stretcher +water bottle +dial telephone +soap dispenser +jersey +school bus +jigsaw puzzle +plastic bag +reflex camera +diaper +Band Aid +ice lolly +velvet +tennis ball +gasmask +doormat +Loafer +ice cream +pretzel +quilt +maillot +tape player +clog +iPod +bolete +scuba diver +pitcher +matchstick +bikini +sock +CD player +lens cap +thatch +vault +beaker +bubble +cheeseburger +parallel bars +flagpole +coffee mug +rubber eraser +stole +carbonara +dumbbell +singles +Virginia deer +eastern grey squirrel +gelding +pylon +table-tennis table +peacock +Segway +surfing +tamandua +knocker +steering wheel +motorcycling +coati +sitar +range +backhoe +agaric +dashboard +water polo +concrete mixer +treadmill +golf bag +skateboarding +royal tennis +tartan +four-wheel drive +sport utility +sedan +print +luggage rack +softball +windmill +ben +red admiral +jalousie +towel rail +truss +strand +ice hockey +sconce +wind turbine +plush +stained-glass window +ballpark +thoroughbred +love seat +red-spotted purple +miller +Adelie +freight liner +clock tower +acrobatics +shaving brush +ewe +ottoman +African violet +bicycle wheel +cork +windmill +satin +comma +coffee mill +baggage +wasp's nest +batting glove +Ferris wheel +push-bike +porthole +football stadium +gas tank +barbecue +handlebar +hula-hoop +fairground +rapier +garter stitch +exercise bike +control tower +carryall +minute hand +cog +riverbank +water nymph +common dandelion +android +hairbrush +redberry +fret +display window +pepper mill +litterbin +drapery +ducking +fly-fishing +broad jump +sprinkler +water-skiing +chicory +sail +volleyball +rugby +Texas bluebonnet +computer monitor +tortoiseshell +airplane propeller +solar array +figure skating +air conditioner +purple loosestrife +gearshift +outboard motor +cowslip +Abyssinian +dip +workstation +cosy +bunker +neon lamp +campanile +casket +verbena +amphora +sumo +common foxglove +sprocket +jelly bean +emperor penguin +night-blooming cereus +clock radio +black birch +bomber jacket +Virginia bluebell +bayonet +walker +altarpiece +tattoo +bridle +rocker arm +water turkey +spiderwort +flange +mute swan +laser printer +carburetor +coverlet +mountainside +baritone +auto racing +baluster +gal +peach bells +taffeta +grandfather +asparagus +horizontal stabilizer +world +grate +marsh marigold +white rhinoceros +movement +split rail +rollerblading +longhorn +muffler +church tower +light bulb +American agave +backpacking tent +overall +New World goldfinch +sectional +wing chair +transom +integrated circuit +dad +spar +picture frame +no-hit game +alternator +drill press +strawflower +hepatica +rangefinder +blinker +Welsh pony +nib +wagon wheel +rotor +tie +denim +jetliner +sculling +external drive +window frame +mourning dove +censer +stapler +batting helmet +flagon +machete +windshield +hedgehog +weeping willow +chief executive officer +hepatica +pet +Asiatic black bear +chinchilla +uke +Atlantic bottlenose dolphin +hair +dishtowel +flintlock +Bermuda shorts +lavender +searchlight +millwheel +piano keyboard +luna moth +bumper +parrot +skirt +manhole +coffee table +footstool +judo +Dalai Lama +armored personnel carrier +voile +saber +thoroughbred +wild carrot +gemsbok +caster +butterfly orchid +cow +sideboard +horseshoe crab +match play +cassette recorder +photomicrograph +drafting table +pediment +tramline +shipping +kitten +wainscoting +fried rice +helix +marguerite +pumpkin +white-bellied swallow +Tulipa gesneriana +common dolphin +face +red squirrel +bicycling +shipwreck +banded purple +cornice +pendant earring +forsythia +aardvark +seashell +spat +shoulder bag +fallow deer +yearling +common teasel +tufted titmouse +ancient +professional golf +purl +vehicle +okra +great grandmother +common lilac +rose mallow +newspaper +crucifix +chukka +armlet +fulmar +wapiti +doily +Greco-Roman wrestling +bleeding heart +kitchen table +bluebonnet +Cape buffalo +spun yarn +crape myrtle +dewdrop +great blue heron +medalist +vaulting horse +spinning wheel +skyscraper +Tahitian +forget-me-not +watercourse +guitarist +gargoyle +bee balm +pumpkin +hunting knife +flutist +lectern +skateboarder +foil +pant leg +hedge sparrow +dresser +automatic pistol +chicory +dialog box +chamberpot +black rhinoceros +fireweed +half-mast +pillow sham +pavilion +scarf joint +microprocessor +filly +dressing gown +shell +Arabian +child +radio antenna +butterweed +morris dancer +sparrow hawk +groom +brioche +floret +rainbow +earthworm +cellist +tine +toupee +balldress +map +angel's trumpet +ruin +fur +pronghorn +speed skating +used-car +stick +early spider orchid +stuffed peppers +snowdrift +flats +least sandpiper +stick +console table +ventilator +portable +kepi +pylon +viceroy +shoreline +Olympian Zeus +pestle +great-niece +life +air compressor +fanjet +scuba diving +fieldfare +tree swallow +personnel carrier +night-blooming cereus +sonogram +assembly hall +circuit breaker +chair +speed skate +soapwort +worsted +raspberry +burlap +flat panel display +Pyracantha +cemetery +turban +deer hunting +bottle green +dandelion green +pieta +aigrette +turntable +cover girl +clutch bag +kiwi +pea jacket +color guard +Malay +shire +crock +french fries +credenza +hockey stick +mourning cloak +potty seat +glass +balsamroot +medal play +red clover +gravy boat +garter belt +Guinness +meadow buttercup +jackass penguin +coursing +tooth +hawfinch +housetop +fluorescent lamp +black-backed gull +bookshelf +earplug +millipede +fawn +baseball bat +soup-strainer +organ loft +bugloss +tomahawk +blackcap +black-necked stilt +hand truck +bedstead +tempura +rose window +crimson +snow thrower +lesser whitethroat +palomino +ball +staff sergeant +wicker +garbage heap +great-nephew +parquet +coupe +nave +eggs Benedict +damask +flush toilet +Angora +pedometer +control room +bristle brush +kookaburra +telephone booth +Windsor chair +red-winged blackbird +cinnamon roll +briefs +cloister +sundress +mammillaria +unicyclist +covered bridge +coelogyne +fairy bluebird +phoebe +beer mug +headstock +parhelion +gorse +common European dogwood +fire-eater +professional football +rock climbing +cyclamen +tin +marjoram +Japanese morning glory +pipe +smasher +hang glider +abutment +birdbath +jotter +litter +artist's model +butterfly bush +dining area +sausage dog +piggery +English sparrow +Turk's-cap +platinum blond +song sparrow +alarm clock +tortoiseshell +chaise longue +flintlock +academic costume +graffito +Arnica montana +adding machine +waterside +director +jonquil +pipefitting +stud +Swedish meatball +musk rose +Venus's flytrap +raven +bougainvillea +little brother +field bindweed +finder +white admiral +tinfoil +serval +sheet +carthorse +people +potto +stockroom +sphinx +slate roof +mountain laurel +majolica +coal black +repository +bufo +pique +binder +tread +attorney general +hydraulic press +videocassette recorder +bumper car +professional baseball +cow parsley +ern +blue peafowl +common hyacinth +jack-in-the-pulpit +ice hockey rink +sport +camper +tailback +flash +stacks +pulp +Christmas cactus +netball +calliandra +curler +large periwinkle +cobweb +forward +Roman arch +cross bun +stoneware +banana bread +cape jasmine +settle +tongue +frock +pepper shaker +pitching coach +CD-R +casing +faience +hand cream +CD-ROM +recliner +striped bass +clary +sketch +risotto +reticle +white clover +touch football +kitty +great-aunt +Japanese maple +sidecar +muscovy duck +hack +rope bridge +organist +stinging nettle +pocket watch +Indian pipe +amorphophallus +bird's-foot violet +caller ID +furnishing +carriageway +dish rack +heiress +nail polish +beldam +Dall sheep +teriyaki +stateroom +laughing gull +chow +bookmark +timer +toga virilis +deviled egg +coltsfoot +Papuan +native +cygnet +automation +portfolio +cabbage palm +cube +broiler +radish +broodmare +castor-oil plant +pith hat +talus +lass +thatch +common marigold +young buck +igloo +prairie rattlesnake +soccer player +spoke +place +slide fastener +tapestry +toy +headboard +cross-country skiing +harness +sconce +rim +ballet skirt +transvestite +saddlebag +common evening primrose +taillight +challah +willet +ready-to-wear +cloud +answering machine +waterfront +vane +granddaughter +Chinese gooseberry +tureen +cab +truffle +viola +bootlace +chemise +taro +petal +candied apple +soccer +miniature golf +front porch +asparagus +Sauvignon blanc +daisy fleabane +ceiling +slip-on +bottle-nosed whale +redbud +black squirrel +snowsuit +ribbing +gravestone +creme brulee +ambassador +local +archery +love-in-a-mist +garbage +thyme +night-blooming cereus +goshawk +cuckoopint +azure +German iris +salad bowl +puppy +cockhorse +giant clam +biplane +stele +necklet +sea otter +crest +door +reformer +comforter +Byelorussian +bottle +hemline +book bag +leotard +owlet +spoon +sari +bidet +Latin +reticulated python +bowling shoe +futon +gaiter +coypu +tea urn +waders +bangle +snowbank +pencil +porter +azalea +English lavender +red spruce +team sport +cruet +high-rise +O ring +vodka +cormorant +Canada thistle +clasp +showjumping +rattan +red fox +sun parlor +Charolais +Tommy gun +bird's foot trefoil +sedge warbler +knot +chives +car tire +steam engine +adapter +spirea +common allamanda +oyster shell +harbor seal +baobab +wick +plumbago +downy woodpecker +coconut +leash +kasbah +hour hand +upholstery +mallard +cricket bat +lady +kitchenware +right-hander +leopard +olive green +common valerian +blue whale +blackboard +redhead +periwinkle +fingerboard +hard hat +locker +breakfast table +capybara +beekeeper +harness +feeder +water hyacinth +hexapod +brown thrasher +percale +lever +patriarch +arete +book +book +senator +bunya bunya +couch +durian +common lady's-slipper +mountain ash +golden barrel cactus +bicycle seat +beret +pop +musk mallow +manatee +cotton candy +boxing glove +backboard +tongue +saguaro +playground +capitol +sanderling +wagtail +deputy +tractor +tap +lady's smock +noseband +worsted +radiotelephone +camisole +forelock +muscat +sweet scabious +crane fly +butterfly weed +chestnut +pinata +inositol +borage +aquatic +belly +broadcaster +gondolier +egg yolk +blush wine +bufflehead +rambutan +oleander +horse-trail +sea holly +yard bird +conference room +lacrosse +belted kingfisher +defile +extremum +whistle +bear cub +grainfield +potage +watermelon +lasagna +sheik +Cooper's hawk +bulb +basketball court +paella +cassette tape +scatter rug +kid +impala lily +Minnesotan +Sudanese +chocolate +tail +quack-quack +whistling swan +shoulder patch +frozen custard +sumo wrestler +smoothie +bock +meat grinder +latch +palisade +radial +sake +kestrel +corn chowder +airframe +electrician +reamer +metropolitan +cotton flannel +cassowary +crossbill +operating room +winter aconite +flute +Tasmanian devil +billboard +suds +kilt +aperitif +cooling tower +avocado +hooded merganser +coleslaw +bee balm +ladder-back +insurance broker +scaffolding +polo mallet +double bed +two-hitter +bluff +gamboge +baby +lawn chair +frond +pistol grip +fancy dress +marquetry +jambalaya +fireweed +Eurasian kingfisher +cue ball +ice plant +horseweed +rose moss +musher +sun +viscount +white-breasted nuthatch +gin and tonic +thermos +Kenyan +first-aid kit +four-wheeler +tourist +stairwell +Gambian +liqueur glass +hovercraft +cocktail dress +twin +coriander +blister pack +Barrow's goldeneye +canteen +irrigation ditch +great white heron +tree sparrow +canal boat +lens +food processor +common raccoon +Baltimore oriole +black-eyed Susan +bush hibiscus +corolla +sire +mustachio +professional wrestling +elk +clustered bellflower +pannier +musk ox +crapaud +animal trainer +rosebud +ring-necked pheasant +little egret +cappuccino +rocker +bristlecone pine +cheerleader +hedge violet +semaphore +central processing unit +speedskater +delivery truck +assembly +hedgehog cactus +bergenia +bull thistle +bladder campion +cinquefoil +inula +cellulose tape +main rotor +bootee +autogiro +ice +grey +meadow cranesbill +hummus +valise +chassis +mountain goat +blacktail prairie dog +Chardonnay +romper +street +shoveler +wood ibis +topiary +chalice +silo +circus acrobat +Rollerblade +cosmos +woof +heroine +cold cream +marabou +herb robert +garden lettuce +nymph +floor lamp +automobile engine +heel +radiator +seeded player +fedora +father-in-law +peahen +Bahamian +wiper +wood pigeon +barn owl +pegboard +chorus frog +kin +roller skate +stob +rosemary +cowbird +hortensia +cranberry sauce +shot glass +Dixie cup +gnu +fire alarm +diet +booster +oxeye daisy +twayblade +high-definition television +truss bridge +bunk bed +mule +blackbuck +facsimile +frog orchid +point-and-shoot camera +brocade +gazebo +prairie gentian +concert +paintball +Cognac +maid +afghan +barbecued spareribs +pintail +tramway +commissioner +finger-painting +beef stew +caftan +Aberdeen Angus +demonstrator +sea trout +pigtail +thrush nightingale +barbados cherry +sashimi +ridgeling +lamppost +gabardine +red-shouldered hawk +bath salts +cavern +cymbid +Haitian +boater +southern buckthorn +arctic +motorcycle cop +red gum +Clydesdale +Zamboni +beagling +villa +demitasse +Sheetrock +lollipop +hybrid petunia +post horse +carabiner +brussels sprouts +Durham +stylist +pothole +sleigh bed +scallop shell +harrier eagle +papaya +Japanese persimmon +sachet +wild rice +chipboard +gun enclosure +menorah +chinook +headset +white campion +ocean +Secretary of State +G-string +bone china +basil +greenish blue +camcorder +concrete +screech owl +trumpet honeysuckle +flugelhorn +layette +cattle egret +case knife +mandarin duck +robber fly +salwar +dressing table +doughnut +facade +runner +honeypot +surf casting +diver +angel's trumpet +spin dryer +chameleon +wand +snow +vitamin A1 +manageress +volleyball net +antiperspirant +street clothes +tree sparrow +cords +sundew +bricks and mortar +caryatid +bridesmaid +trestle bridge +eyepiece +celebrant +scarlet pimpernel +gas range +onion +green salad +squill +creepy-crawly +hunk +little owl +salad nicoise +earflap +bird feeder +spray gun +bunny +Cheops +amazon +blue tit +Nissen hut +Kalashnikov +skylark +kremlin +shoebill +shopping bag +frigate bird +telephoto lens +peplum +moss pink +echidna +wastepaper basket +wood ibis +workroom +ankle brace +telpherage +Michaelmas daisy +figure skate +swami +nylons +cardoon +cocotte +headstall +twin bed +parsley +dirndl +corn poppy +nut bread +cloche +light heavyweight +mayor +lip-gloss +punch bowl +pottage +mango +fledgling +mousse +four-wheel drive +barrel +banana boat +trouser +bathroom +Sauterne +ring +settee +lavaliere +safe-deposit +godson +leatherette +schoolmate +radish +hedge trimmer +dahlia +euphonium +palace +vaulter +singlet +slicer +Pilsner +cockateel +kangaroo paw +Cub Scout +master bedroom +hexagon +cenotaph +Barberton daisy +Netherlander +intersection +Korean +gravel +chandelier +hospital bed +flash memory +pier +whole wheat flour +maroon +pale ale +special +snow bunting +crinoline +dustpan +barrette +common wood sorrel +yolk +pothos +speakerphone +tendril +cabinetwork +farm horse +brake disk +streetlight +superhighway +bandsaw +panting +pressure cooker +girdle +old man +cereal bowl +felt +hurling +architecture +harmonium +chain +blueberry +cellar +smocking +scrub brush +tablespoon +sweet corn +graining +library +street +bill +felt-tip pen +monkshood +crowd +log cabin +newel post +hack +elephant seal +golden pothos +popcorn +outhouse +patch pocket +fish and chips +tape +wax plant +eaves +fried egg +emerald +tea cart +fan blade +daily +Bowie knife +rowing boat +leaf shape +man +crayon +trumpetfish +chipping sparrow +whiskey bottle +pillion +city hall +golden pheasant +cheerleader +creeping bugle +couch +Dumpster +Homo sapiens sapiens +cranberry juice +cockpit +demagogue +joinery +scrambled eggs +technician +sidewalk +sheep +keyhole +power line +polyanthus +roulette +first lieutenant +checkout +tabletop +nasturtium +schnapps +engineering +skateboard +ground fir +bouquet +bunk +resort area +fleur-de-lis +power steering +opera +Bolivian +Friesian +buckskins +bay +slider +frozen yogurt +cabin cruiser +saunterer +lean-to +fishing eagle +bog star +cantaloupe +mouth +music stand +fiddlestick +brilliantine +pinball machine +bairn +barred owl +bath oil +signorina +Mason jar +nymph +rubber band +garden nasturtium +razorbill +Japanese beetle +batting cage +trestle +borage +Secretary of the Interior +scanner +baguet +baseball cap +chow mein +pen +jewelweed +barbet +chasm +pectoral sandpiper +holster +glasses case +sand +crevice +Kickapoo +snowboard +locket +satchel +tankard +alpinist +moorhen +cow pen +whooper +crown +chain +silversword +wild geranium +hi-fi +Tibetan +waterwheel +bee orchid +ruby-crowned kinglet +common broom +tabloid +javelin +sauna +klammath weed +zebra finch +spider orchid +velour +chiffon +lecture room +barrel +loggia +millstone +flatlet +soupspoon +econometrician +golf-club head +daphnia +parlor +fire-eater +juggler +attache case +hay bale +kisser +knitting needle +news magazine +flatbed +Senegalese +trumpeter +trampoline +brogan +bone +caftan +lobster pot +gazpacho +anthill +ramekin +mainsail +penitentiary +spotted flycatcher +cookstove +root beer +broom beard grass +pogo stick +plywood +epee +gas oven +Global Positioning System +sweet false chamomile +breakfast area +bullring +second cousin +wave +decolletage +rodeo +won ton +swastika +bobby pin +papaw +retaining wall +Muscadet +heavyweight +energizer +banner +amusement park +whinchat +drugstore +waxwork +meander +congee +heat sink +switch grass +commuter +peony +western white pine +wild raspberry +nightgown +saute +cardinal +claret +pollinator +biryani +pina colada +cassette deck +European sandpiper +block +flan +birdcage +baby +lieutenant colonel +ticking +European white lily +dog violet +coat hanger +premature baby +organza +string bean +balloonist +hurricane deck +window box +hang glider +bullfighting +piste +seahorse +hard cider +batik +common mullein +petite marmite +stuffed mushroom +tequila +ground ivy +fountain grass +stray +putter +buffer +comet +bomber +woodcarving +baseball glove +halter +garnish +selvage +megaphone +sea fan +rabbit hutch +very important person +analog watch +long-head coneflower +northern pike +roll-on +cigarette butt +terraced house +penknife +windshield wiper +cricket +straightener +snow pea +cockerel +canister +sour bread +recovery room +toilet bowl +tyrannosaur +big sister +quartz battery +television receiver +vitamin C +tailpipe +field thistle +stonechat +col +monstrance +gift wrapping +herbivore +quarter horse +ice-cream sundae +rumpus room +eyepatch +clary sage +French lavender +snorkel +choir +tent-fly +cat box +horse racing +high priest +barrel cactus +pin oak +wild thyme +keyboardist +raiser +hammock +hail +bungee +chocolate mousse +major +buzzard +gopher tortoise +Chablis +water meter +benthos +donna +blender +Mauser +avocet +rye +mulch +chancel +dusty miller +mate +corbel +minaret +frittata +French toast +mosaic +home brew +water faucet +beard +swivel chair +acropolis +largemouth +abbey +tabby +driver +copperhead +stirrup +Boston fern +Tennessee walker +artichoke +honor guard +chapatti +enchantress +sweat pants +electric organ +column +dry vermouth +range hood +Red Delicious +rape +splint +catapult +gourd +antipasto +plaza +carnation +star +wood anemone +English primrose +male fern +boot +atrium +Japanese deer +carnivore +yearling +doe +guelder rose +chicory +stretch pants +ice-cream cake +frogfish +tarpaulin +chicken soup +balaclava +tor +feverfew +three-hitter +flyweight +aqua vitae +locker room +wether +teacup +wide-angle lens +hook +ladder-back +osprey +awning +wedding +chest protector +pooch +rose mallow +orange daisy +fondant +envelope +duckling +blackberry +goosander +snorkeling +philatelist +broad bean +Frank +bok choy +basket +absinth +cayenne +blackbird +bottled water +trooper +timber +stable +chestnut +tomatillo +bell +banquet +rainbow trout +macrame +appointee +heart +chipmunk +purple clematis +safety bicycle +shuttle bus +Japanese black pine +lentil soup +downhill +field mustard +brass +hand-me-down +greater yellowlegs +fanny pack +croquet mallet +hip roof +duffel bag +Ritz +document +pie plant +staff member +lifeguard +white-throated sparrow +Cameroonian +hydrofoil +platter +common ageratum +middleweight +chairlift +brunch +pharmacist +lemon +driveshaft +green snake +lip +London plane +mangrove +crystal +siskin +common jasmine +hollandaise +villa +cross-country riding +mother-in-law's tongue +generator +Tanzanian +whisk +seeder +ashtray +griddle +evening bag +bluebird +bran muffin +square dancer +luggage compartment +tropical pitcher plant +autofocus +tape drive +silencer +Hawaiian guitar +swamp sparrow +Zimbabwean +drawing room +weekender +liparis +streambed +samosa +hitter +water heater +tidal basin +ossuary +dik-dik +camouflage +fiance +Jordanian +rolling pin +slingback +turret +hen +jennet +playpen +woodhewer +bushing +church bell +bear grass +double knit +tennis pro +Joe-Pye weed +pave +pochard +painted beauty +crinoline +gumbo +trestle table +schnitzel +balloon flower +Turkish coffee +extension cord +wireless local area network +sluice +umbel +microeconomist +sky +aisle +commander in chief +hydroplane racing +poll +Coca Cola +fuel injection +bird pepper +monkey puzzle +English muffin +riverbed +varietal +kachina +airport +saltwort +oolong +red-hot poker +mihrab +cocoa +jersey +Walkman +syndic +Hessian boot +millstone +carpenter +outfall +curbstone +mocha +field pansy +patriarch +slacks +switchblade +killdeer +whelk +pampas grass +racquetball +platform bed +Indian rhinoceros +Japanese iris +blacktop +dinner jacket +stud +jodhpurs +telephone pole +business district +kurta +basil +handset +file folder +gloriosa +orphan +cantle +cookie sheet +cafe au lait +drawbridge +hill myna +Western diamondback +watch case +cardcase +bowling alley +mattress cover +canvasback +pompadour +cornice +matador +cigar cutter +skunk cabbage +baptismal font +bitters +refectory +egg +parula warbler +tiger lily +field house +nanny +skin-diver +soda water +lymphocyte +carport +chocolate fudge +amphitheater +sugar candy +sea hare +open-face sandwich +dessert spoon +staple gun +envelope +worker bee +general +garment bag +maypop +autobahn +Atlantic puffin +polo shirt +Humvee +spice rack +grotto +banderillero +gaillardia +black-crowned night heron +oboist +weigela +Dictaphone +dwarf iris +marsh mallow +yarrow +eccentric +catsup +jade green +mistress +henbit +beachwear +head +commuter +strawberry tree +chickpea +clothespin +fleabane +brussels sprout +winter melon +Laconian +great horned owl +caricaturist +nan +flowerbed +triple sec +dairy +round of golf +cardinal +kauri +Zulu +Armagnac +cowberry +mouthpiece +wild calla +bling +puppeteer +beer drinker +adder +field sparrow +chocolate pudding +blacksmith +finback +Shetland pony +cheese fondue +panty girdle +soda can +electrolytic +florist's chrysanthemum +yellow jasmine +tudung +equalizer +ridge +dulcimer +grappa +barn swallow +coneflower +enamel +poached egg +halfback +yak +toby +Fleet Street +blue catfish +sand tiger +flying buttress +snaffle +stoop +first base +cultivated land +first lady +waratah +headquarters +arnica +lovebird +common morel +parasol +disk clutch +Xerox +vitamin P +vitamin B12 +long sleeve +certified public accountant +hot pants +pitch pine +pantie +drawers +cake mix +boar +grey +bride +false sago +bullion +coach house +bass guitar +Japanese banana +meadow clary +black belt +Canterbury bell +smallmouth +treadmill +great white heron +enchilada +rummer +captain +camisole +wild garlic +oak fern +ultramarine +peach +hawkweed +autostrada +adit +anaconda +artwork +skinhead +jello +hermit thrush +Bewick's swan +dress suit +trail bike +stubble +common polypody +Riesling +Easter lily +telegraph key +envelope +garlic bread +perianth +salad bar +steppe +club sandwich +nude +garden forget-me-not +Tuareg +flood +Statehouse +charcoal +boy scout +Rhone wine +parfait +spoor +lanyard +octagon +brown bread +quarterback +quilted bedspread +hookah +Pepsi +hamburger bun +entrepreneur +saddle oxford +snake's head fritillary +undies +chemise +skidder +chickpea +carnation +honey bun +mortar +Montrachet +automobile horn +skylight +gingham +rafter +pantile +climbing frame +scarlet runner +cable +cornstalk +mockingbird +raisin bread +chili sauce +hand calculator +concert-goer +detached house +coq au vin +lasso +hyssop +globe thistle +paper clip +slide +Jerusalem artichoke +tetrahedron +mock orange +lemon lily +finger +little sister +handcuff +horse wrangler +pavlova +oilcloth +snow-in-summer +common mugwort +greenshank +ice-cream cone +rubber boot +gunnysack +disk jockey +long trousers +sorghum +pontoon +calf +fire extinguisher +cotton thistle +pilot whale +ao dai +steamroller +wristwatch +tawny owl +city +country store +ironweed +kennel +bathrobe +rattan +drawer +fly tent +choline +musk thistle +courthouse +Yugoslav +bush +trawler +shellflower +jade vine +ragged orchid +pea soup +King Charles spaniel +hubcap +snook +paddy +bow and arrow +shovel +dill +cliff swallow +cadaver +hijab +masterpiece +fish geranium +kettle +sanitary napkin +carrot stick +Mountie +peanut brittle +dam +jackal +windowsill +butterfly orchid +bodice +picador +pale yellow +beanie +petiole +tenor saxophonist +bungalow +gnomon +stock saddle +field glass +rigging +wood grain +Speaker +settlement house +swamp milkweed +paper nautilus +tangerine +champagne +crescent roll +library +Schmidt telescope +stemless carline thistle +motorcyclist +alpine ash +planchet +water closet +casuist +hand luggage +hyssop +spaghetti and meatballs +cannelloni +cedar waxwing +water dog +brick red +linkage +sweep hand +purple heather +macaroni and cheese +butter knife +refreshment +malt +St. Augustine grass +wainscot +compass +gas heater +tamale +table saw +referee +borsch +projector +dracaena +peppermint +Reuben +Abyssinian banana +glassblower +floss +small stores +artilleryman +lapwing +ranch +garbage man +dwarf banana +commelina +currant +adulteress +landlocked salmon +pasqueflower +nan +tiger lily +Eritrean +rotunda +catsup bottle +mezzanine +royal fern +blended whiskey +bowler hat +mistletoe +manor +fusee drive +pistachio +dispensary +swamp +amputee +sculptor +schoolmaster +Chinese anise +dwarf iris +livestock +chronograph +nectarine +jockey +plaster +motel room +swamp azalea +hippeastrum +space station +duchess +catacomb +dovetail +cockscomb +common spotted orchid +brittlebush +cleats +cloche +hotchpotch +cabin car +prey +indigo +light beer +bear's breech +jonquil +analyzer +alyssum +spur gear +ice tea +honey buzzard +twayblade +dirndl +atlas moth +croquette +carafe +flyweight +professional basketball +multivitamin +air terminal +phial +roll-on +skunk cabbage +bird of paradise +rose +cooter +camping +divided highway +herbage +sweet vermouth +common comfrey +eggplant +office building +glutton +gefilte fish +bicycle rack +swamp birch +Venetian blind +Pernod +Norway spruce +portrait camera +bastion +vitamin Bc +Ugandan +Indian red +okapi +emu +vin ordinaire +chintz +shrimp cocktail +numbat +tall oat grass +cable car +stopcock +ham sandwich +Yemeni +stanhopea +plate +chicken broth +common yellowthroat +California poppy +radio +chocolate egg +mess jacket +tea table +physostegia +Japanese flowering cherry +confectionery +chicken cacciatore +painted nettle +popover +white rice +strapless +mohair +electrical cable +coil spring +arterial road +miniature fan palm +spectator pump +pesto +interlocutor +eastern kingbird +dongle +vitamin B6 +stuffed tomato +cough drop +okra +black +barbecue +burial mound +firstborn +corn snake +amberjack +bollard +horn +Black African +elbow pad +Camembert +circle +Japanese apricot +hearing aid +rock star +creature +taster +bubble gum +scull +lemon balm +chaetodon +anemometer +brake drum +fuselage +courthouse +aqualung +yellow adder's tongue +reception desk +guy +buffalo wing +ginger beer +robin +pantothenic acid +marsh hawk +yellow journalism +exhaust +cardamom +Tabasco +ax handle +patriarch +floor +pine snake +spoiler +hood +sphagnum +parrotfish +orphanage +redpoll +beef Wellington +white spruce +cherry plum +scapular +field lens +broomstick +mouser +wood thrush +Nebraskan +hotelier +milk thistle +soya milk +Munich beer +boucle +snowy egret +dust storm +steward +kudzu +oriental poppy +presbytery +burro +orange soda +stonecrop +splashboard +menagerie +dormer +wire cutter +yellow bells +Dubliner +shore pine +cousin +racing gig +Morgan +gold plate +villager +snifter +granny's bonnets +egg roll +Spode +amabilis fir +babbler +pestle +heliopsis +halter +black spruce +President of the United States +ski slope +chocolate fondue +lockstitch +motel +Epipactis helleborine +tabbouleh +Yorkshire pudding +overpass +Timorese +presbyter +tablefork +bottle gourd +tiara +vintage +pilgrim +reindeer moss +shower stall +towel rack +kachina +chef's salad +breeder +cow parsnip +walker +Black woman +Irish coffee +portrait lens +lateen +gilt +successor +cargo container +Lithuanian +mayapple +paisley +highchair +strawberry jam +flying fox +field scabious +blue-eyed grass +screw +Frisbee +dressing room +cholla +walkie-talkie +red currant +centrifugal pump +smorgasbord +hot rod +marcher +rowanberry +welwitschia +amphitheater +pew +concert band +bosom +pillbox +seagrass +openwork +meadow goldenrod +shower +chicken sandwich +Boston ivy +plastron +oilfield +stuffed tomato +juniper berries +frame +Spanish mackerel +family room +powder horn +fight +maguey +bunker +work-shirt +air filter +nosh +sugar bowl +foothill +reliquary +tugboat +horsebox +grater +palace +board member +campsite +halibut +geneva +ginger ale +high commissioner +genet +bodywork +spaghetti +protractor +pipe cutter +wood anemone +turkey cock +surge suppressor +green turtle +spoiler +bedsitting room +television room +ballot box +shasta daisy +impeller +capote +bitter +California wine +lock +spinnaker +gill fungus +baby's breath +nut and bolt +moonflower +houseboat +distributor cap +coffee bean +gusset +bowling ball +knitwear +frieze +mistflower +roadster +cue +circuitry +brake +butt hinge +Chickasaw +leopard frog +wing tip +puree +mantel +pantheon +grandfather clock +cockchafer +pomegranate +cleaners +eyeshadow +Oregon cedar +rock hopper +hawksbill turtle +agriculturist +yellow-crowned night heron +Albanian +pumpkin seed +chateau +goggles +camper trailer +bracket fungus +cigarette case +signal box +saddle blanket +poison ivy +set gun +cattleya +dry fly +concert hall +personal digital assistant +talcum +deodorant +common starling +painted turtle +kea +plenipotentiary +pantyhose +masjid +buskin +hurdle +cocktail lounge +belting +sour dock +knife blade +sugar snap pea +paddle +dickeybird +brace +keep +call center +yacht +lead pencil +tumbler +production line +tetra +private +French window +express +ski boot +pinto +broad bean +American crow +screech owl +snapper +power cord +Manx +rambutan +sun deck +stonefish +golden eagle +national monument +readout +cork oak +hacksaw +beer can +bathe +tussock bellflower +wet suit +mihrab +big game +highlighter +sprocket +measuring worm +grapefruit +samovar +distributor point +steak knife +incubator +loon +temporary hookup +hippodrome +hot spring +spacesuit +flea market +clay pigeon +catbird +earmuff +tetherball +yellowfin +cellophane +lanolin +clapperboard +velveteen +police dog +cashew +sequencer +mango +duplex house +bazaar +Golden Delicious +red carpet +collet +kickstand +broadloom +diskette +tank engine +compact +diesel-electric locomotive +whale shark +water moccasin +mountain avens +tropic bird +ginkgo +ski cap +fixative +glockenspiel +chopine +ethernet +herring gull +skeleton key +finger paint +conference table +great crested grebe +harbor +white-crowned sparrow +Bullock's oriole +guestroom +boutique +cable television +roulette wheel +Luger +Latin American +trumpeter +blindfold +baby +freshwater bass +home plate +bonefish +giant sunflower +giant tortoise +planking +pigeon hawk +oceanfront +door +bazaar +common wasp +conformation +kick starter +kid glove +corydalis +shuttlecock +writing desk +ivory gull +shirttail +diving suit +weka +downy birch +altar +wild sage +tufted puffin +cabinet +Orpington +cineraria +bottom +dial +coracle +resort hotel +soap dish +spotted owl +billiard room +ghetto blaster +red-breasted nuthatch +hatchling +chalet +bracteole +crusher +mixer +net melon +farmhouse +Dutch oven +transept +penlight +palmyra +stewing pan +solar cell +crochet needle +black-winged stilt +germander speedwell +crinkleroot +truncheon +bunchberry +hatchback +sounding board +mixing faucet +chess master +bisque +Brie +Sitka spruce +pawn +Mexican-American +space rocket +choreographer +collared peccary +duffel +nacho +patchcord +carpet snake +omnivore +watering can +hall of residence +streamer fly +sunroof +great grandson +oil refinery +billiard player +ivy geranium +key palm +pinwheel +yellow-shafted flicker +purple onion +soldering iron +condominium +fishing gear +heat pump +marine iguana +cuckoo clock +Bletilla striata +headrest +spotted salamander +field hockey ball +pound +carboy +vertical stabilizer +groundsheet +cinnamon bread +acorn squash +sheathing +lakefront +Jeffrey pine +synthesizer +olive +apple +pannier +ponderosa +Jew's-ear +latch +equatorial +metasequoia +permit +bloomers +town hall +fava bean +casino +bier +jampot +common snapping turtle +clary sage +oatmeal +Dutchman's breeches +massif +Guyanese +heifer +handball +sweat suit +pomelo +Iceland moss +customhouse +sandbag +archer +gyrfalcon +sword cane +marmite +whole snipe +blue crab +sugar spoon +brownstone +chicken wire +lizardfish +dump truck +chicken yard +chamois +electric +idle pulley +jujube +wrestling mat +aoudad +Burmese cat +water shamrock +dormitory +Unknown Soldier +hearse +bumper +clipper +desert pea +critter +semitrailer +backboard +common St John's wort +Atlantic manta +song thrush +jukebox +quoin +eastern chipmunk +copper beech +paintball gun +bull +package store +fraise +royal poinciana +niqab +traction engine +objective +day nursery +ski lodge +orphan +summer house +cereal box +router +sleuth +jodhpur +polyp +croquet +sport kite +green onion +tulle +etagere +tussock caterpillar +rest house +elderberry +bridal wreath +Torrey pine +silver wattle +kidney bean +pentode +laelia +Allen wrench +sporran +red drum +tricot +heterodyne receiver +magazine rack +stone curlew +trawler +suckling +niblick +sandwich plate +double door +Togolese +pitching wedge +desert tortoise +cloth cap +date palm +webbing +jumper +frogmouth +copperhead +covered couch +black mallee +riser +scraper +gauntlet +pantheon +food court +muntjac +grocery bag +bread-bin +transmission shaft +primigravida +window seat +crab apple +seat +Fresnel lens +dendrobium +hatchback +little theater +butter dish +back porch +umbrella tree +carrot +seventy-eight +coconut +music stool +Tesla coil +bay willow +American basswood +sabot +wheel and axle +gazette +lute +bassinet +hart +mecca +breadbasket +silverfish +handball +Scotch pine +box camera +stately home +Hereford +tread +single-breasted jacket +desk phone +deodar +professional boxing +fly casting +box wrench +black oak +martello tower +red campion +bullock +sweet William +bay leaf +dollhouse +flounder +fox hunting +beanbag +king mackerel +rouge +film advance +common mallow +parasitic jaeger +satellite receiver +nurse shark +chesterfield +tomatillo +plimsoll +hatbox +bloomer +foul-weather gear +longleaf pine +horse mackerel +tree lizard +bark +belfry +Treasury +perch +purple finch +stag beetle +fragrant orchid +tachymeter +tadpole +cookie jar +knee piece +agueweed +bones +chick +golf glove +toothpick +taboret +rotor blade +field artillery +purple willow +redhead +spark plug +guava +voice mail +cross +butterfly valve +star magnolia +olive +room light +Australian turtledove +embassy +Iraqi +singles +nestling +spinning rod +radial engine +rowan +sandbox +boss +moccasin flower +veneer +mint +American chestnut +white whale +CPU board +florist +press box +hurricane lamp +giant kangaroo +greater whitethroat +winter jasmine +blue +department store +southern red oak +saber saw +corn muffin +bellbottom trousers +toaster oven +red eft +condominium +galago +sunbather +redpoll +common European earwig +songbird +linnet +light meter +bracer +tepee +gumbo +water glass +roofing +spathiphyllum +shofar +sand lizard +washroom +Brussels carpet +brachyuran +home room +floatplane +knee brace +solar heater +felucca +gas ring +maguey +manse +blue columbine +cuppa +cigar band +male orchis +mudskipper +couscous +Chinese parasol tree +dude ranch +banyan +gopher snake +sundrops +aviary +African daisy +missel thrush +Photostat +stone pine +circus tent +tangle +printer cable +grease-gun +rose chafer +light pen +plantain +hearth +bullfinch +post oak +slow loris +Newtonian telescope +head +punt +spindle +New England aster +spotted sandpiper +pond pine +grass skirt +bug +black rat snake +tabasco +bull shark +tennis camp +scrambler +popinjay +bing cherry +ministry +cash register +redheaded woodpecker +kameez +farmer's market +roan +harpy +European toad +pizzeria +camshaft +hemp nettle +chicken coop +cottage pink +daybed +observatory +airdock +mountain devil +newsstand +kingfish +snow gum +jackdaw +lacquerware +peeler +miro +sister ship +damask rose +pack +snowshoe +Liberian +paramecium +tidytips +professional tennis +bookend +wood swallow +cayuse +cranberry +rock squirrel +steak au poivre +soul patch +female mammal +sash fastener +songwriter +oxeye daisy +apse +floor joist +hand towel +wheatear +cero +soul mate +golden fig +bus stop +psycholinguist +convenience store +manor hall +mountain sandwort +Euopean hoopoe +haricot vert +mausoleum +violist +flashlight battery +chard +fixer-upper +bank martin +testudo +diving duck +kohlrabi +Omani +sphygmomanometer +greyhound racing +chestnut +rattlesnake plantain +chaffinch +wolf pup +teakettle +cairn +souk +resident commissioner +chuckwalla +gaiter +capercaillie +liver chestnut +bean sprout +land line +ambassador +green pepper +common chickweed +Sharpie +Oriental arborvitae +oncidium +pallone +currawong +sweet alyssum +fire tower +eyebrow pencil +redfish +apricot +clementine +blucher +wigwam +pangolin +buggy +common oak +jumbojet +laser +cigarette holder +racquetball +georgette +cleft +scouring pad +drum printer +pond scum +American red squirrel +caranday +swamp willow +blindworm +brook trout +defense system +nyala +three-way calling +mizzen +shuttle +African lily +Oregon white oak +rain tree +fuel gauge +oriental cherry +wahoo +pear +jungle gym +bass fiddle +outrigger +angelfish +Old World coot +lime +battlement +yarmulke +herpes varicella zoster +burp gun +Alpine glacier +stun gun +pilot boat +Southern crab apple +bushtit +pullet +polo pony +jackfruit +raw vegetable +French marigold +golden shower tree +spike lavender +wahoo +brass knucks +cabbage palm +diesel-hydraulic locomotive +red jungle fowl +prairie sunflower +rye +loofa +icecap +shade tree +secretary bird +saffron +cos +muskrat +videodisk +Carolina wren +candy bar +Bohemian waxwing +flowering almond +cold frame +raglan +pine siskin +quince +western red cedar +red maple +adobe +agora +kumquat +tenement +bantam +bayberry +water jump +great granddaughter +snips +porcupinefish +brochette +love-in-a-mist +Iceland poppy +common sage +pace car +camel racing +slipcover +nopal +shoehorn +calypso +rhea +in-basket +maple syrup +cold chisel +Pacific ridley +dietary +aperture +lapin +rock hyrax +house wren +litchi +ragged robin +control center +shoebox +arabesque +eider +silver birch +bantamweight +ax head +softball +blue gum +Bechtel crab +tomato sauce +green douglas fir +sweet gum +macaroni salad +red phalarope +budgerigar +Bedford cord +Uzi +green woodpecker +ohmmeter +bacon-lettuce-tomato sandwich +hackney +Easter egg +motmot +red pine +opium poppy +gat +pussy willow +greater scaup +ocelot +persimmon +western hemlock +carambola +pinion +Malcolm stock +bobsled +larkspur +wood drake +pinetum +red gum +draft beer +funnel +terrarium +Pinot blanc +doodlebug +brittle star +salsa +cantaloup +pollack +stockpot +eastern hemlock +rock wren +burqa +squash +aircraft engine +billy +flamingo flower +odontoglossum +old squaw +redstart +sheepskin coat +mate +flathead catfish +gentianella +bilberry +bog rein orchid +incense cedar +mew +Colorado spruce +cob +portmanteau +grenadine +common ginger +masdevallia +compound microscope +sobralia +white fungus +guppy +chapterhouse +honey +green frog +sea swallow +African marigold +astrolabe +verdigris +yellowhammer +carrot juice +oxlip +medicine ball +highboy +grass frog +gamebag +surgery +mincer +mulloway +cactus wren +box office +resonator +table-mountain pine +European curlew +supernova +cabbageworm +peach +plane seat +asp +Yquem +tomato hornworm +rook +quadruped +chador +micrometer +dabchick +Afro-wig +balsam fir +bucket seat +sage green +macon +blue poppy +chinquapin oak +black pine +spinach +chrysalis +carnauba +tee +bearberry +shirt button +tree of heaven +southern white cedar +covered wagon +brood hen +spadix +European catfish +winter wren +bulldog clip +carpetbag +study hall +chino +simian +closeup lens +cookie cutter +grapefruit +mandola +sassaby +Allegheny plum +piaffe +scorpion fly +booby +draft animal +field tent +cumin +laurel oak +smooth-leaved elm +American arborvitae +American toad +grinding wheel +mountain ash +cuttlefish +pipistrelle +parer +safety rail +Clark's nutcracker +side-blotched lizard +giant hornet +wicket +dugout +electric toothbrush +dhow +common four-o'clock +long-eared owl +anchor +near beer +tansy +creme caramel +guided missile frigate +shelduck +durian +compact +iron tree +shiitake +polo +camouflage +pedal pusher +salon +tangerine +lacebark +Swiss mountain pine +goalpost +poolroom +space capsule +wild cherry +dress hat +wave +raglan sleeve +cassia +Jerusalem artichoke +cabbage palmetto +marsh harrier +American redstart +sea squirt +cliff diving +sparrow hawk +watch cap +frankfurter bun +police boat +flash camera +neem +eastern meadowlark +Italian cypress +orb-weaving spider +graniteware +sewing basket +latex paint +rock dove +stator +leaf lettuce +roulette +broadcloth +Spork +panicle +sternwheeler +cider vinegar +brown creeper +cowfish +closed gentian +chickpea +port +pimento +sheeting +matilija poppy +hawk owl +guava +papaya +huisache +European shrike +racing skiff +yellow warbler +gumbo-limbo +North Carolinian +staysail +court +iced coffee +money belt +shaver +Psychopsis papilio +sumo ring +refection +kingfish +clock pendulum +greater butterfly orchid +disk harrow +tawny eagle +polyphemus moth +pieplant +Nicaraguan +bocce ball +California box elder +porbeagle +crown of thorns +Mexican sunflower +fennel +stream orchid +slip ring +white fir +fold +moss campion +fairy ring +hose +pony-trekking +western larch +meadow pipit +Cape May warbler +longan +bookmobile +junk shop +lemon shark +smelling bottle +solan +widow +sea pen +universal joint +day game +goldcrest +maiden pink +biographer +rotunda +oriel +arranger +gambrel +Angora +fen orchid +leading rein +Wilson's snipe +European nuthatch +natterjack +athletic supporter +mouflon +emergency room +swallow-tailed coat +western meadowlark +feather star +Navy SEAL +toilet bag +loquat +lesser butterfly orchid +thumbhole +breathalyzer +featherweight +collards +mayfly +confessional +mountain ebony +redwing +Norway maple +refractometer +stagecoach +gasoline gauge +octopus +baker +Rhode Island red +European tortoise +cardiologist +Punjabi +Arkansas kingbird +tamarind +drum brake +flash +yellowtail +stokes' aster +emperor +free house +sour gum +ruddy duck +hamadryad +command module +tinamou +Norway lobster +washstand +European hornbeam +roaster +black-necked grebe +tallgrass +leopard lizard +anastigmat +Blackburn +deutzia +ground rattler +Christmas fern +wild pink +sesame seed +carrycot +Italian parsley +nectar +roll-on roll-off +true laurel +anisette +candy corn +flowering maple +revers +dun +tobacco hornworm +common sunflower +common grape hyacinth +cardiograph +electric meter +herb Paris +goalmouth +spruce grouse +canopy +wind poppy +stemma +gateleg table +lumper +speckled rattlesnake +gudgeon +rough-legged hawk +internal drive +pomelo +piece de resistance +storm door +clementine +Japanese pink +settler +yellow jacket +Fraser fir +royal palm +cicada killer +cayenne +guava +bluewing +red baneberry +lesser yellowlegs +cache +bog rose +sparring partner +ski jumping +sherry +glacier lily +beer mat +shredder +American widgeon +protectionist +green olive +black-tailed deer +Alpine fir +dispatch case +whipping cream +African daisy +cantilever bridge +maraschino +rhea +ink bottle +dacha +hagberry tree +lesser rorqual +orchard oriole +candidate +cuticle +breadfruit +fishbowl +giant puffball +closed gentian +Joshua tree +tie rod +beard lichen +flame tree +stegosaur +acerola +Swan River daisy +common murre +flowering almond +protegee +loggerhead shrike +Wilson's warbler +Japanese honeysuckle +basilisk +skimmer +hybrid tuberous begonia +pumpkin ash +chafing dish +collared lizard +iced-tea spoon +scrubbird +Iceland poppy +grey kingbird +wallflower +slick +diesel +Swiss pine +ethernet cable +ketch +lightship +black cherry +swordtail +Monterey cypress +lightweight +Floridian +Sabine +stall +contact +viola da gamba +hemstitch +upland sandpiper +box spring +sassafras +radome +lesser scaup +bluefin +yellow-bellied sapsucker +armored car +cabin class +Moorish arch +webcam +aquavit +overall +sergeant major +soft shield fern +gin and it +bobolink +subcompact +falconer +black morel +roadrunner +lab bench +thong +coffee urn +weeping beech +caladenia +southern live oak +scanner +wine vinegar +common speedwell +European roller +fuji +snag +piping plover +concertina +secateurs +meat thermometer +supercomputer +funnel +dais +western fence lizard +spruce pine +pommel horse +Cassegrainian telescope +pitta +India-rubber tree +mangosteen +tamp +aposematic coloration +dustcloth +birth +Atlas cedar +reed bunting +jabiru +sainfoin +press photographer +golden oriole +laryngoscope +thermal printer +winder +doubles +cricket ball +dabbling duck +tonic +Buddhist +Morris chair +swatter +quaking aspen +ancient pine +American larch +evaporative cooler +click beetle +yellow-breasted chat +souchong +bluegill +pied-billed grebe +tricorn +spring beauty +southern magnolia +rowel +chili +hard roll +flathead +satsuma +gangplank +bourguignon +cockfighting +greenwing +plum tomato +fly orchid +gnatcatcher +spotted eagle ray +ovenbird +brassavola +mocha +candy cane +afterburner +thriftshop +study +winter crookneck +grinder +muskellunge +sacred ibis +inverter +sandwort +deer fern +stair-carpet +Cotes de Provence +ovenbird +rex begonia +American woodcock +poison ash +lowland fir +pawpaw +loblolly pine +kinkajou +European hackberry +pest +coralwood +Bedouin +acetate rayon +snuffbox +radiator cap +basket oak +table-tennis racquet +smew +midge +telescopic sight +radish +great burdock +separate +damask violet +broadbill +bourbon +blacktip shark +gift shop +khimar +date +woodland caribou +policeman bird +grey birch +American elm +strawflower +officiant +hart's-tongue +straight razor +Spanish elm +radicchio +white croaker +vicuna +soft-shell clam +flannel +adonis +bonito +kittiwake +English walnut +soldierfish +hipflask +spotted crake +Streptopelia turtur +American maidenhair fern +corn cockle +telephone cord +canopy +playback +diocesan +marsh orchid +manakin +purple grackle +cob +fishmonger +otoscope +vermillion flycatcher +inhaler +instar +licentiate +myrtle warbler +goat herder +benthos +toggle +drumhead +piranha +doorplate +vault +triptych +red-necked grebe +transporter +vernier caliper +flathead +Portuguese man-of-war +countrywoman +vacation home +Bactrian camel +night-light +module +lemon curd +carancha +painted daisy +bok choy +ratatouille +troll +escarpment +cinnabar +computerized axial tomography scanner +lychgate +sowbread +bedside +guided missile cruiser +reel +cleat +hemostat +blue shark +Seven Wonders of the Ancient World +motorized wheelchair +pillow block +horned puffin +prickly pear +electric range +mother's daughter +vein +Oregon maple +bird dog +faceplate +wren warbler +feather reed grass +common alder +Adam's needle +straitjacket +organ-grinder +gantry +bikini pants +peristyle +herpes +terry +toad lily +celandine +red-breasted sapsucker +bragger +green peafowl +fuschia +quoits +house martin +dome +herpes simplex 1 +touraco +meeting house +vacuum gauge +cat's-ear +crisphead lettuce +carpet moth +European rabbit +puff adder +Old World scops owl +fire pink +fruit punch +ant bear +black walnut +stroboscope +white mangrove +pine grosbeak +cast +check-in +ring-necked parakeet +matai +shingle oak +fieldwork +rue anemone +landing net +ouzo +herringbone +lyceum +hydrogen bomb +mullein pink +masher +evening grosbeak +water vole +livingstone daisy +tomatillo +cavalier hat +interphone +wild lupine +goosefish +sugar maple +plantain +white dead nettle +Monterey pine +bugle +veloute +marsh gentian +Bermuda buttercup +alehouse +Peter Pan +thong +LP +tulip tree +scanner +scarlet tanager +music hall +angel shark +pecan +eight ball +rosy boa +outboard motorboat +garage +fanlight +black cottonwood +notornis +mountain fern +lunar crater +reddish orange +whitetip shark +executant +European ladies' tresses +washboard +revolving door +case knife +balloonfish +greater kudu +tarpan +cog +wet fly +Irish soda bread +basement +broken arch +canopic jar +muscat +kazoo +bobsledding +loaner +black guillemot +English saddle +garlic mustard +Foucault pendulum +mulberry +clotted cream +dove's foot geranium +Atlantic ridley +convector +ground floor +European wildcat +poinsettia +hideaway +great barracuda +black beech +bushy aster +cornflower +tam +true slime mold +carving knife +holly fern +railroad tunnel +crimson clover +disposal +etamine +suspension +plasmodium +political scientist +minnow +Spanish rice +twist bit +subway train +Scleroderma citrinum +saw palmetto +console +gimlet +hand pump +waratah +rock rattlesnake +keel +server +curlew sandpiper +hone +sable antelope +inkle +photostat +foresail +sallet +tiger salamander +chutney +onlooker +Exmoor +tiramisu +drawing room +battery +sour orange +juniper berry +beeper +funeral home +fescue +Maksutov telescope +ranch house +jai alai +carob +socket +popcorn +sandbar shark +pipal +summer tanager +oast +skipjack +rolling stock +dropper +great snipe +turnip greens +cowpea +honeycomb +ichneumon fly +maternity hospital +harp seal +nylon +bomb shelter +horse tick +litchi +camel's hair +mimosa +bur oak +anvil +belay +pinhead +continental breakfast +burglar alarm +Mojave rattlesnake +auxiliary storage +lightwood +ratepayer +cecropia +retractor +quadrate +pepper tree +Venus' slipper +abattoir +strawflower +firewater +purple saxifrage +black rat +pack +pepper pot +mayweed +winger +whitetip shark +great yellow gentian +snowdrop anemone +garden angelica +soy sauce +white poplar +inkwell +crouton +gas gun +honey locust +house of cards +ice maker +moquette +arrack +casualty +butterfly orchid +eau de vie +mosquitofish +prairie smoke +haft +horseshoe +steel +peach orchard +Mexican hat +encaustic +shoe +pennywhistle +sweet woodruff +hull +doorsill +globe amaranth +day school +housedog +crown princess +oxbow +maxi +positron emission tomography scanner +compere +European turkey oak +peanut +sentry box +house physician +hot line +loquat +rove beetle +riband +flowering fern +fan vaulting +ceibo +bongo +bat boy +omelet pan +European ash +breadwinner +gaff topsail +clerestory +bushbuck +bluethroat +khukuri +Father +portcullis +candy egg +brake lining +lawn furniture +buckskin +garden pea +Brazilian rosewood +Italian bread +horn poppy +silk tree +Christmasberry +hotel-casino +poplin +false lupine +desert sunflower +mimeograph +alpenstock +cork tree +cultivar +common mosquito +pollard +black marlin +understudy +lancet window +college +breadfruit +Herero +Labourite +bar printer +squaw grass +stelis +firing chamber +sycamore +artificial horizon +radiologist +pansy orchid +bicycle pump +wraparound +bell gable +home computer +orchard grass +carving fork +bergamot +honeycreeper +sewing room +radiator +core +brown bat +goose grass +adjutant general +Erlenmeyer flask +massasauga +tail rotor +cardinal tetra +Drambuie +wine palm +Sarcoscypha coccinea +shantung +Calvados +garganey +vicar +house mouse +creeping oxalis +digital subscriber line +cedar elm +backgammon board +blackberry-lily +pallid bat +New Zealander +Barbadian +rose geranium +European spider crab +gharry +electric hammer +mustard +Chinese lantern +laundry cart +filament +mozzarella +gooseberry +sukiyaki +porkpie +culvert +altazimuth +plum pudding +serin +Spanish dagger +Asian crocodile +crevalle jack +mascara +pig bed +alderman +northern shrike +Sufi +purple-fringed orchid +derringer +linseed +hockey skate +bell jar +Japanese wistaria +mantled ground squirrel +western toad +lieutenant commander +mechanical piano +ovoid +paddlefish +demijohn +coast live oak +brick +gearset +tailstock +phonograph needle +winery +tuberose +mother's boy +shot tower +crucian carp +carpet pad +lamb's-quarter +Menorah +common white dogwood +hypanthium +rosebay +wild medlar +soil horizon +sweet orange +bitterroot +hand glass +cloisonne +towpath +gum ball +margay +carambola +bolt cutter +charger +vibraphone +gueridon +elephant tree +wood-frog +ash grey +duffel coat +third base +chunga +glebe house +lake trout +encephalartos +Japanese oak +northern red oak +pruner +blue orchid +Biloxi +western wood pewee +corselet +alabaster +anechoic chamber +grass pink +wax begonia +blue daisy +pennyroyal +Asian tiger mosquito +cheese souffle +flat bench +caramel +sump pump +bush violet +common fennel +corner +skullcap +asparagus fern +white mangrove +calceolaria +sateen +saltbox +hollowware +head nurse +coal miner +mountain lily +tufted vetch +European perch +line officer +steamer +stickball +shin guard +cauliflower +Monegasque +hatpin +wolffish +trackball +khaki +arthrogram +rocket larkspur +naval commander +Gemini +ski binding +department head +Chenin blanc +wingstem +knothole +aerides +sweet bay +tautog +gangway +waterspout +Hudsonian godwit +armyworm +incinerator +kidney vetch +pine nut +cypress vine +hip tile +sorrel tree +relay +bench press +Kentucky coffee tree +dobson +sapling +false lily of the valley +veld +phaius +vitamin B2 +beaker +wall tent +sieva bean +dusty miller +sewing kit +cavalry horse +diaper +butterfly pea +Spam +saddlebill +pearly everlasting +kowhai +Sister +moneywort +organdy +pine marten +bareboat +hot-water bottle +baby blue-eyes +silver lime +common cotton grass +malmsey +blue pea +baggage car +pineapple +folding saw +cotton rose +brawler +black duck +Weizenbock +pool player +Gujarati +wild duck +purple sage +sage grouse +mail train +arm guard +short-spurred fragrant orchid +queen +eparchy +spring peeper +ortolan +shoulder +fighter pilot +American beech +snowcap +novitiate +roller +butcherbird +canyon oak +brompton stock +firebrick +rudder +light cream +Primus stove +nonsmoker +probationer +harp +kosher +surcoat +videotape +zebu +first class +yam +car +rissole +miso +funambulism +attic +curling iron +shutter +encolure +split-pea soup +yellow rocket +gas oven +ultracentrifuge +chamomile +canteen +eyeliner +yellow squash +Irish stew +collar +doublet +machinist +septic tank +snap bean +Polyporus squamosus +western tanager +creeping St John's wort +back +sinkhole +perforation +Romanian +epergne +fez +comfrey +sidecar +beach pea +screen door +instigator +plughole +woodbine +pigweed +hip pocket +common scoter +squeegee +Surinam cherry +porringer +body stocking +eatage +shallot +enlarger +common canary +trophy case +gun case +plow horse +hot plate +pearl oyster +margarita +madras +backspace key +pigeon guillemot +pajama +buckthorn berry +homestead +bedbug +Linotype +trundle bed +granadilla +theremin +chin rest +bouillabaisse +tumble-dryer +truffle +cassava +kurrajong +gyroscope +European silver fir +C-clamp +politician +green soybean +exponent +flame tree +scissortail +achimenes +crown daisy +soft tree fern +spaghetti squash +pale violet +beaver +dashiki +washboard +driving wheel +sack +foulard +sputnik +boatbill +English elm +sack coat +grog +golliwog +Malayan tapir +May wine +calash +stile +windjammer +American sycamore +rotor head +fast food +balata +dragonet +Emmenthal +metronome +negative +meadow saxifrage +rabbit ears +chenille +round +hobby +crankshaft +Wilson's phalarope +Murphy bed +soil pipe +forecourt +policyholder +tarmacadam +loyalist +gyro +Queen's crape myrtle +shortcake +apple butter +pumpkinseed +heronry +yellow perch +baggage claim +escarpment +diaphragm +mescal bean +shunter +flax +columbarium +Joe-Pye weed +Neandertal man +casement +hole-in-the-wall +Verdicchio +futurist +eaglet +tassel hyacinth +pup tent +fawn lily +cabbage palm +pogonia +hospital ship +water mill +Oregon grape +lentil +grindstone +banana split +inkberry +coonskin cap +bazooka +wrap +anise hyssop +Java sparrow +red-eyed vireo +common opossum +clintonia +bustle +booster +tribesman +soy +panhandle +jaboticaba +locking pliers +Sauvignon grape +ghat +screw +oximeter +white croaker +saucepot +eggbeater +reticule +cabbage bark +looking-glass plant +head gasket +California sycamore +cowbell +Aleuria aurantia +Herr +lever +spider orchid +cashew +shift key +solar house +wood chisel +white +mantilla +stamp +bolero +rear admiral +garden rake +Lao +crowbar +lapdog +buttermilk biscuit +yellow bedstraw +pickerel frog +dowel +serjeant-at-law +mill-hand +lambrequin +state treasurer +red silk-cotton tree +coiffeur +star anise +shoulder pad +marshal +sitar player +gown +ground cedar +hedge maple +caddie +pitahaya +corn marigold +stick cinnamon +woodland star +Eurasian green toad +anti +blueweed +medicinal leech +gaur +chocolate kiss +kit fox +mother +butte +audio CD +blast furnace +vitamin D +nutgrass +cornice +black sheep +hearing aid +lingonberry +quad +lentil +riding crop +pratincole +pentagon +sea lavender +nerita +flatmate +catboat +water clover +angiopteris +mushy peas +crown imperial +music school +woodshed +platy +Turk's-cap +rundle +reading teacher +hardtack +balloon sail +oriental spruce +bluefish +white mulberry +horned violet +satin bowerbird +treasure flower +sustaining pedal +mimosa +spurge nettle +sea green +hasp +lederhosen +pink cockatoo +long johns +basket weave +freewheel +thrust bearing +timber tree +orphan +falafel +common camas +bird of passage +bird's foot trefoil +electric eel +fizz +grape arbor +serape +brace +hazelnut +kylix +horse mackerel +cassia bark +lizard orchid +spat +Brown Swiss +pocket flap +pillory +purplish blue +rolling mill +tappet +broccoli rabe +semi-detached house +mushroom coral +fly orchid +nougat bar +ball hawk +sand wedge +shirred egg +black locust +strip lighting +drop scone +brush turkey +ball +tragopan +dallisgrass +tuatara +great knapweed +potentiometer +Kiliwa +Pacific bottlenose dolphin +accelerator +Darwin tulip +osteopath +Arizona cypress +manna ash +butterbur +cornelian cherry +American holly +nopal +tanker +foreshore +ditty bag +gas lamp +safety razor +chanter +fomite +chip +striped killifish +catalytic converter +plaice +dusty miller +takin +gerenuk +corn chamomile +Japanese pagoda tree +boneset +common osier +Guinean +taro +plotter +celandine poppy +churn +steenbok +edible mussel +sensitive fern +triode +black raspberry +zoo keeper +feather ball +dredger +starlet +cornpone +coat button +rosinweed +toy Manchester +crested cariama +finger food +basilisk +shotgun shell +comfort food +mountain hemlock +candytuft +Stilton +record changer +anklet +ball valve +Mediterranean snapdragon +BVD +sand cat +Galloway +nutmeg +water-mint +woodwaxen +citron +ark shell +federalist +drone +cheekpiece +hyperbaric chamber +addax +field-emission microscope +synchronous converter +men's room +medlar +electronic fetal monitor +Sazerac +false indigo +roof +passe-partout +meadow spittlebug +Phytophthora infestans +oast house +hedge nettle +voting booth +slender salamander +telephone jack +true bug +scouring rush +Scotch egg +matchbook +aperea +cytomegalovirus +garlic press +cove +whitebark pine +Slovene +narrow wale +mother's milk +Audubon's warbler +prickly poppy +cowl +tailorbird +mud brick +bamboo palm +welt +Afghan +Virginia spring beauty +dinner bell +night jasmine +fly rod +microtome +aerie +carinate +picker +brick trowel +loving cup +swathe +green mayonnaise +rivet +bandbox +newsroom +tea tortrix +bobby +gig +hush puppy +garlic chive +piston rod +aspidistra +bluejack oak +harvest-lice +strap hinge +sour mash +macadamia nut +histiocyte +fan belt +shelf bracket +abelia +Hottentot fig +fish chowder +abettor +compote +beige +dioon +hop +haymaker +oilskin +magnetometer +tool bag +tambour +call girl +gringo +fairy light +broad-leaved plantain +second base +zebra mussel +Japanese cedar +pistia +swamp chestnut oak +cashmere +double cream +samisen +lamb curry +companion +kapok +julep +sweet woodruff +gardener +jewfish +inspector general +collembolan +wheel bug +bass +scrubland +wryneck +macrozamia +trouser press +clove +tiger cowrie +yawl +collard +dildo +pony cart +ormer +annual +tessera +chancellery +two-toed sloth +queen +old lady +wringer +spritzer +baggage +black mangrove +black-eyed Susan +semifinalist +highlighter +alfalfa +Easter daisy +escapement +operating table +neutral spirits +bursar +roble +entablature +girl wonder +farm boy +ring ouzel +permanent press +auklet +beefsteak tomato +gaming table +tea bag +manul +giant bamboo +Ozark chinkapin +matzo +furrow +smoothhound +CD-ROM drive +powdery mildew +copilot +garden +American merganser +bunsen burner +Asian longhorned beetle +lead tree +creeping buttercup +Percheron +back brace +axseed +cub +soul food +rabbi +edelweiss +mineshaft +fox grape +sandwort +torque wrench +leisure wear +Mae West +broccoli +loach +maraschino +heavy cream +silkworm +cirque +vintner +whitewash +butterfly pea +two-toed sloth +midiron +ceriman +Bulgarian +operating microscope +sambuca +California fuchsia +silver maple +tangelo +black bean +lugsail +starting gate +leek +sunflower seed +fish fry +clinker +synagogue +coscoroba +brae +uphill +common limpet +golden plover +cedar of Lebanon +amphibian +Canary wine +taipan +agua +feeder +parallel +mater +pink calla +meat counter +yagi +crab cactus +cacao bean +bowfin +alley cat +stonefly +Eastern cottonwood +vernier scale +marginal wood fern +dancing-master +detective +yam +textile screw pine +hooch +spinet +single prop +sassafras +goose barnacle +triple cream +China tree +peeper +dressmaker +snatch block +ironmongery +dressing case +creeping bellflower +silver sage +honeydew +eastern red-backed salamander +peg +nombril +danish +mashie +anarchist +alligator snapping turtle +shepherd +American white pine +runner +chalice vine +rheumatologist +defibrillator +yellow chamomile +lemon balm +peacekeeper +native beech +sandwich board +Bavarian +titrator +paneling +deer mouse +poteen +sugar snap pea +meadow salsify +town crier +best +basinet +common myrtle +night lizard +cushaw +Tampax +camphor tree +gentile +orange peel +putty knife +pyromaniac +Brummie +fever tree +double +nest +inferior +cabbage tree +graduated cylinder +mucor +woodborer +earthwork +potato salad +four-hitter +gooseberry +water vole +ziggurat +grapefruit juice +four-in-hand +cranberry bush +diode +videotape +Mohican +niacin +beetroot +shirtsleeve +cork tree +two-eyed violet +white ash +drawing chalk +baked Alaska +bone-ash cup +toastrack +diastema +bed jacket +dwarf astilbe +yellow honeysuckle +cow pasture +sheet pile +saxhorn +upholstery material +California white oak +Spanish bayonet +horsemint +littleneck +deflector +magician +standard transmission +blue marlin +shallot +feijoa +collar +board +jump suit +common staghorn fern +priory +Xhosa +Loranthaceae +barbecued wing +barmaid +spit +lemon juice +umbrella plant +field pennycress +centenarian +queen bee +fish stick +black bread +dirk +secularist +German American +spotted weakfish +iron foundry +speed bump +yellow-fever mosquito +gag +frame +black-eyed pea +alcoholic +involucre +sperm whale +balanced diet +wax bean +butcher's broom +winter heath +Mainer +Australian pine +gas guzzler +double-breasted jacket +pod +palo verde +trimmer +wattmeter +dyer's woad +crotalaria +vine maple +sulky +jack pine +thumb +Wilton +Panchen Lama +welder +badminton court +business editor +Arabian coffee +Kamchatkan sea eagle +foamflower +steep +plane +freckle +cerebral cortex +Vouvray +tea +forest tent caterpillar +neckerchief +accelerator +jig +bridal wreath +highball glass +New England clam chowder +beach strawberry +call waiting +baton twirler +double boiler +Dutch elm +car bomb +filmy fern +breviary +Florida gallinule +dace +parsnip +riparian forest +crescent +earplug +grab bar +cusk +foglamp +screwtop +black mangrove +mascot +Welsh poppy +gas holder +support hose +salsify +red beech +Indian python +caroler +pineapple juice +lowboy +terra sigillata +black olive +hypodermic needle +radio-phonograph +moussaka +miter joint +creche +tuning fork +black wattle +affiliate +vertical tail +kiwi +red morning-glory +piping crow +runway +Kashmiri +studio apartment +sea feather +Judas tree +boatbuilder +corn earworm +fallboard +Victrola +lechwe +goat willow +turret clock +Canada anemone +leaf lettuce +savoy cabbage +headpiece +Lebanese +fothergilla +hemlock +toolshed +silver tree +blue-headed vireo +weatherman +cylinder +caltrop +adjutant bird +driving iron +millet +European woolly thistle +rose apple +clown +schoolfriend +eastern coral snake +barbecue +executive vice president +long-billed marsh wren +brittle bladder fern +tank destroyer +left-hander +matting +catchment +balsa raft +eastern fence lizard +color tube +corncrib +electric typewriter +westland pine +elder statesman +whey +plonk +mound +cittern +nest egg +copyholder +China aster +basking shark +gavial +common duckweed +vanilla orchid +red-shafted flicker +granadilla +sylph +sty +vest pocket +potherb +little brown bat +Trapezium +ordinary +adult +purple-fringed orchid +abseiler +disco +metal detector +beefsteak fungus +ilang-ilang +barley grass +hawser +suture +brake shoe +staghorn coral +barbecue sauce +Browning machine gun +sarcophagus +disa +oven thermometer +rosemary +track +gorget +quince +royal +piston ring +teak +pin cherry +Komi +walking fern +sloe +synchronous motor +fire-bellied toad +Teleprompter +co-star +cape gooseberry +oscillograph +bass clarinet +cock of the rock +Tyke +showy milkweed +safety valve +branch water +sweet marjoram +hugger +crampon +fairy godmother +band-tailed pigeon +snow-on-the-mountain +minibar +foreland +grosgrain +dita +rampion +calligrapher +jointed charlock +master +sheepshead +barrelhouse +Carolina allspice +mastic +brake pad +whiskey sour +casement window +conveyer belt +stolon +pavonia +shinny +witch elm +logwood +hostel +pageboy +vesper sparrow +pyrrhuloxia +common carline thistle +wafer +boysenberry +screw augur +hack +American white oak +governor general +Mother Hubbard +game fowl +drosophila +delft +nymphet +tollbooth +chough +Russian dressing +plum tomato +American saddle horse +dusky salamander +black medick +red valerian +cordage +Elastoplast +conacaste +backlighting +swell +riveting machine +cowpen daisy +openbill +water speedwell +picture hat +crested myna +servo +bletia +garden trowel +muscadine +common caper +false lily of the valley +aralia +sharp-tailed grouse +cigar smoker +bandoneon +Chinese alligator +crazy +point lace +charcoal +Texas horned lizard +marinara +backstay +Gatling gun +piston +game fish +fall armyworm +grammarian +beer hall +guadalupe fur seal +sugar palm +peanut +velvet ant +light machine gun +rya +cling film +adobo +myrtle oak +angelica +balsam apple +windbreak +brother-in-law +snap brim +automobile factory +clavichord +dusky shark +edible banana +altar boy +California lady's slipper +schoolbag +wax bean +Atlantic walrus +bullpen +straw wine +thatch palm +potluck +tamarind +charcuterie +sod house +tie rack +liebfraumilch +clinician +scarlet lychnis +Spanish iris +bread knife +water oak +bedpan +Angolan +bassarisk +Alaska fur seal +African wild ass +milk float +froghopper +Verpa bohemica +water cooler +chop suey +ranker +red helleborine +Prince of Wales +marmalade tree +car train +giant red paintbrush +desert sand verbena +right whale +baron +stevia +asterism +five-spot +catapult +Silex +fiberscope +refresher +beef Bourguignonne +snood +divot +waterproof +crabeater seal +Missouri primrose +bumper guard +rock opera +Lilo +coffee can +smokehouse +buffalo grass +propjet +ice tongs +poop deck +acorn barnacle +veal parmesan +shower room +collins +ringhals +silage +jawfish +trouser cuff +contour feather +songstress +rachis +White Russian +stanchion +mastaba +flatbed press +viand +legal representative +espalier +organic light-emitting diode +sushi +scorer +haricot +pinna +plectranthus +jungle cat +dried apricot +coach horse +white fringed orchis +veal cordon bleu +bath +dallier +marching order +donkey jacket +Panama tree +aerator +klaxon +pinnacle +shouldered arch +lesser celandine +common eland +Grand Marnier +cock of the rock +phlomis +Japanese umbrella pine +morning room +dead-man's-fingers +little auk +bascule +house paint +home fries +great skua +cesspool +flying gurnard +wild crab +checkerbloom +Wollemi pine +cheese dip +coif +charwoman +tea ball +waif +Arctic ground squirrel +parishioner +stabilizer bar +potentiometer +black cohosh +medlar +willow oak +cascara buckthorn +scoutmaster +Canada lily +poppy seed +paper mulberry +blackthorn +garrison cap +inductee +aeschynanthus +interior live oak +black spleenwort +wild service tree +sling +nicad +swab +sego lily +eiderdown +fruit cocktail +pallasite +weeping spruce +shiv +sea lamprey +coachman +half binding +American white birch +gainer +Concord grape +yellow birch +fucus +common room +io moth +red osier +crucible +galangal +salmagundi +pepper steak +cap opener +swizzle stick +tomato juice +Nobelist +Sarawakian +African monitor +sleeping beauty +stereoscope +curd +pyramid bugle +applejack +dosser +rake handle +pilot light +Eames chair +Scotch and soda +bell heather +dinette +blackpoll +dogie +sound camera +cattle guard +mashie niblick +edible cockle +monocle +steak tartare +partaker +sidesaddle +communications satellite +porkfish +water hemlock +drawbar +ultramicroscope +Jamaican cherry +craftsman +lovage +common apricot +drum majorette +backsword +smooth alder +Amniota +dribbler +theosophist +dolman +ivory tree +Green Beret +pipe smoker +mayoress +mignonette +crampon +henbane +kirtle +death's-head moth +instep +great St John's wort +lorry +black-necked cobra +ball carrier +Jordan almond +byway +earless lizard +marble +andiron +high-protein diet +buzzer +ice floe +crankcase +Bofors gun +sockeye +veery +Delaware +caravansary +prairie coneflower +star apple +suiting +cot +call forwarding +American gallinule +glossy snake +rose chafer +instant coffee +placket +Tarahumara +pulsar +philodendron +orange tortrix +cypress spurge +Welsh rarebit +music box +giant crab +vanilla bean +water thrush +prayer shawl +gouge +promoter +dagga +black currant +bitter cassava +drain basket +snare +digital audiotape +retainer +olive drab +gluten bread +graham cracker +cheddar pink +caregiver +spray paint +Anglo-American +boatyard +backbencher +Link trainer +bell arch +weir +arbor +millionairess +sour cream +earthtongue +crawlspace +crossjack +balalaika +crupper +western redbud +guinea hen +rangeland +gaboon viper +common louse +single-leaf +horseshoe +balsam poplar +triskelion +jack-in-the-box +jester +rain stick +glove compartment +imperial moth +Japanese beech +biotin +turnip +oligarch +western skink +mudguard +retsina +data system +green bristlegrass +visiting professor +beaded lizard +weathercock +Sloppy Joe +high tea +lightweight +record sleeve +cooler +nodding onion +pigs in blankets +torque converter +district attorney +bunting +orrery +radiator hose +common plum +wood spurge +calamus +chicken Kiev +pin +lath +telephone bell +thistledown +audiotape +gypsy moth +snuffer +pari-mutuel machine +peanut butter +hearthrug +sack +Old World yew +chives +stovepipe +xenolith +mattock +mangle +electric chair +backup system +Empire +blackwash +dodder +Allegheny chinkapin +finger plate +junk +brown rice +wild angelica +chinaberry +mason +rasp +den +violet wood sorrel +nosewheel +plenum +merino +kirtle +Igbo +ensign +sex symbol +Belgian endive +sugarberry +yellow salsify +purple emperor +atlas +African clawed frog +leatherjacket +midwife +sac fungus +European cuckoo +three-day event +Mexican poppy +wagon tire +armyworm +rain gauge +Oregon ash +columbarium +spectrophotometer +Milanese +pointing trowel +casualty +Eastern hop hornbeam +lobe +mouthpiece +au pair girl +giant water bug +Browning automatic rifle +laser-guided bomb +drone +white alder +cockleshell +mufti +gravy +berm +boat hook +marshmallow +pet shop +cowpea +tactician +wading pool +anchovy dressing +flip +shackle +Wedgwood +thick-billed murre +erecting prism +giant salamander +sleeper +quiver +chain store +wing tip +New World tapir +witches' butter +gendarme +ginseng +common maidenhair +graduate nurse +balsam pear +hoatzin +philanthropist +axle bar +gas meter +moth mullein +ragbag +Chinese cabbage +celery stick +rutabaga +scalpel +cape marigold +variometer +argali +brig +shuffleboard +wort +Orlon +epiphyllum +allice shad +coffee filter +solar telescope +Japanese linden +thinning shears +golden wattle +queen triggerfish +millinery +surfbird +flame fish +clove +dicamptodon +red-bellied terrapin +turmeric +baya +air horn +Indian coral tree +punnet +sharkskin +water crowfoot +bight +desert iguana +Texas toad +volva +dredge +Turkey red +chemical plant +gemma +dice cup +orange marmalade +mistletoe +surveyor +frozen orange juice +pallette +poultryman +burbot +courlan +captain +saddlery +bodyguard +dwarf tulip +black ash +pulse +nailbrush +tickseed sunflower +legless lizard +shirtwaist +polling booth +chickeree +garlic chive +common thyme +multichannel recorder +screw thread +sangoma +calliopsis +geoduck +colleen +bandicoot rat +pastis +swamp sunflower +scorekeeper +Honduras mahogany +Australian pitcher plant +triangle +elevator shaft +green pea soup +carrel +prairie aster +bird's-nest fungus +scarlet clematis +gook +mescal button +carcase +mulatto +ejection seat +strawberry daiquiri +goat grass +car battery +babu +chief of staff +monilia +Siberian crab +ridge rope +Morchella semilibera +nutmeg +moosewood +graham bread +California four o'clock +zwieback +velvetleaf +abelmosk +shadow box +corned beef hash +newsreader +backstairs +cutwork +sherbert +tooth fungus +angel-wing begonia +greasepaint +common milkwort +potato vine +CD drive +crepe de Chine +sporting man +koto +armet +barking frog +celeriac +drainage ditch +black box +steel blue +clotheshorse +corn speedwell +drawknife +spritsail +vichyssoise +modeler +pocketcomb +limey +suslik +cockpit +digester +brig +raita +troll +benedictine +rock wren +lock +Barnaby's thistle +school bell +school ship +Soave +falchion +swaddling clothes +terrine +smoke screen +rivulus +sweet lemon +cullis +bustier +peppermint +Philadelphia fleabane +Hampshire +active +charnel house +face guard +Quebecois +facilitator +tongue depressor +bitternut +heath aster +sapodilla +bluestem +centrist +Canterbury bell +needlenose pliers +groats +tapa +Qatari +paper feed +tilt-top table +plastering trowel +brazil nut +rotogravure +patriot +manicurist +bacon and eggs +puffbird +lightweight +golden willow +kaiser roll +duff +girandole +seaside daisy +Kurdistan +Skivvies +showboat +fire bell +lock-gate +greater masterwort +weald +ice ax +toetoe +mess kit +bucking bronco +black turnstone +backscratcher +backpacker +basement +marbleization +trigger +satsuma +fall-blooming hydrangea +mountain lady's slipper +yellow oleander +crookneck +ex-president +Venn diagram +psaltery +bulwark +old boy +linear leaf +aril +butt weld +fall webworm +pruner +bald-faced hornet +nougat +tailgate +field speedwell +potsherd +center punch +long beech fern +desert paintbrush +canyon treefrog +bushel basket +Eurasian +swamp horsetail +cryptanalyst +wicket +school newspaper +captive +spider brake +electric mixer +tumbleweed +mason wasp +sash window +paddock +wet bar +oxtongue +stevia +wheat rust +scute +switch engine +mud dauber +dotterel +snailflower +common barberry +mulligatawny +cinnamon bark +cigar box +trivet +proof spirit +cream soda +western grey squirrel +baby powder +Bren +Japanese yew +sailcloth +Basket Maker +bannock +basidiocarp +aphelion +erect bugle +limiter +bosc +Przewalski's horse +helmet orchid +audiometer +battle cruiser +grass widower +staphylococcus +Congolese +common pitcher plant +parliamentary agent +Virginia snakeroot +mockernut +Siberian elm +backbench +rough +chervil +chlamys +nationalist +galantine +screwdriver +falsifier +cancerweed +spur +jerkin +porte-cochere +dill pickle +Montagu's harrier +tetrode +true fungus +American quaking aspen +vitamin B1 +leopard lily +eggdrop soup +aurochs +core bit +Jaws of Life +trousseau +parquetry +Disciotis venosa +tender +beef goulash +vitamin K1 +pepper spray +covered smut +hook +sports announcer +weapons carrier +foxtail grass +sloe gin +mezereon +antifouling paint +pavior +pile driver +security consultant +monkey-wrench +Indian hemp +amaretto +American wistaria +A-line +market strategist +rainbow runner +souvlaki +binturong +stiletto +gastrula +Vietnamese +Old World hop hornbeam +cold cathode +pier table +houndstooth check +prop root +leaf-footed bug +sedge wren +Dutch iris +drop curtain +opossum rat +lame +pollen tube +doubletree +compression bandage +pinon pine +catmint +pier arch +kingmaker +deanery +loofah +fullback +fencing mask +flying boat +carpet sweeper +lemon-scented gum +Accipitriformes +kit +pigfish +clipper +dolmas +lesser centaury +blood agar +water violet +raw milk +lemonade +vicar-general +supply closet +Anzac +confectioner +ignition key +velvet grass +white willow +John Dory +ruddiness +wheel +common horsetail +hubbard squash +speculum +Spanish bayonet +mountain mint +glint +foxhole +housemate +bootjack +sleigh bell +clog dancer +Mexican mint +rendering +Hausa +star saxifrage +spring squill +clothesbrush +liquid metal reactor +Columbia tiger lily +sorrel +cartwheel +Jersey +Caucasian walnut +desert willow +surveyor +elbow +Santa Gertrudis +fringe bush +industry analyst +lyrebird +Cortland +arroz con pollo +catechist +tank top +jew's harp +cereal oat +heartleaf +short sleeve +butty +butterfly plant +stud finder +felloe +beer garden +clevis +wood warbler +demerara +cornetfish +mince +Jamaica rum +Spanish broom +binnacle +camise +ferrule +Copt +hall +minicar +scimitar +cryptogam +miter box +limestone fern +Marsala +Parliamentarian +gravy +woolly bear moth +formula +squash bug +pigmentation +plate +skin graft +radiotelegraph +hellbender +soft pedal +lavender cotton +propagator +Bailey bridge +cottage pie +rotgut +A battery +pintle +off-line equipment +European swift +shrimp butter +plumb bob +trunk lid +succotash +yellow cypress +heartleaf +antelope squirrel +sambar +maternity ward +deciduous plant +bartlett +Riesling +sour cherry +Klansman +poke +academician +sociolinguist +bird's nest fern +common privet +scale fern +tachograph +oyster stuffing +pusher +green June beetle +staghorn sumac +lockage +master +bap +harlequin +blackfly +spotted coral root +kahikatea +cabana +riot gun +apple mint +kob +praline +confidant +pahautea +float +city father +Zen Buddhist +pessimist +conference center +banksia rose +comfit +sweet cicely +winged bean +henroost +myope +bunt +nailfile +yellow mountain saxifrage +cruise control +abandoned ship +water chinquapin +spanker +wing nut +puccoon +pier glass +Atlantic sailfish +medlar +buttercrunch +rough-skinned newt +planter's punch +Dutch iris +control key +committeewoman +torpedo-boat destroyer +garambulla +tree heath +gladiator +September elm +inclinometer +snowbell +call-in +sunsuit +microfiche +bluestocking +cheval glass +server +franking machine +sugar syrup +Macoun +transport ship +alderfly +wash-and-wear +Abbe condenser +bush nasturtium +wild leek +canary seed +Northern Baptist +sweet wormwood +jaboticaba +cardroom +autoradiograph +ash-pan +sprinkler system +rattrap +claymore +parts bin +forest red gum +thermonuclear reactor +Indian crocus +lector +heir apparent +leafy spurge +masquerader +varicella zoster virus +cucumber tree +hedger +Shumard oak +zooplankton +quartermaster +arrester +bridge +hop clover +meadow foxtail +winter hazel +portable circular saw +penuche +limpa +blue toadflax +mesophyte +Alpine anemone +pet sitter +avocado +streptococcus +fiber optic cable +river red gum +hornist +chicken taco +red spider +tape grass +densitometer +salmonberry +tiger snake +hot toddy +silver fern +candlenut +buckram +local call +defoliator +king +mahoe +lever lock +social insect +winter purslane +bootblack +fireball +ramie +bellbird +prepuce +capote +Chinese forget-me-not +Pisces +costume +California black oak +tree lupine +golden polypody +liger +California whipsnake +urodele +sapodilla +skillet bread +duckpin +supremo +asparagus bean +kampong +endameba +cow pony +rider +motherwort +Persian iris +soursop +kohlrabi +Parisienne +irons +doubles +feijoa +farmplace +cottage cheese +bezoar goat +subcontractor +blunderbuss +down +purple martin +Lapp +crenate leaf +tobacco pouch +beach towel +Santa Lucia fir +monetarist +stringer +ocellated turkey +Texas purple spike +ackee +caddy +hedge mustard +second-rater +strawberry bush +valedictorian +steak sauce +prairie gourd +aspirant +mint +Valenciennes +vodka martini +American persimmon +big brown bat +Mycenaen +mouthpiece +norfolk island pine +pennyroyal +Jewish rye bread +granadilla +tract house +wall +shuttle helicopter +blackjack oak +Lippizan +storm window +white zinnia +sickle +sushi bar +polish +baldric +brooklime +church hat +control circuit +vicuna +death adder +eukaryote +durmast +field soybean +jacket potato +wild basil +queen consort +brooklime +octant +blue false indigo +broccoli raab +step-down transformer +date bread +blue ash +duffer +oak chestnut +pennant +wedge +Florentine iris +morion +weakfish +morning dress +public address system +spearmint +Ashkenazi +sow +interpreter +Metis +pita +iron lung +parfait glass +cylinder lock +immortelle +obstetrical toad +tee hinge +successor +western +working girl +julienne +AND circuit +spaghetti junction +fer-de-lance +enlisted woman +star +lightning rod +bilge pump +pacer +horse nettle +African oil palm +blastocyst +air hammer +bamboo fern +remote terminal +lambkin +money cowrie +Pelham +clinical thermometer +wiggler +guru +false indigo +tea bag +foredeck +king +baby shoe +mule +grab bag +silver-bell tree +knitting machine +cobia +roulette ball +larder +button pink +rumble seat +noria +queen mother +solar thermal system +aquaplane +highbrow +rusty blackbird +desktop +lima bean +pontoon bridge +watercress +wild cabbage +tumbleweed +dressing sack +compact-disk burner +spittoon +marrow +sporophyte +second fiddle +pot-au-feu +specialty store +dry +mole +khadi +japonica +lovage +squamous cell +lobe +European creeper +brown pine +bladderpod +rumble +French Canadian +mascarpone +Pacific halibut +perennial ryegrass +wine lover +turbot +longwool +silver tree fern +dust cover +synchromesh +corn pudding +alpine azalea +garboard +cane sugar +observation dome +condensation pump +hind +taximeter +hand drill +gas thermometer +jammer +buffing wheel +handstamp +prairie mallow +turkey stew +sun spurge +duck pate +kibble +Cassin's kingbird +apadana +Devon +grinner +oocyte +blank +header +schoolmaster +guard ship +intravenous pyelogram +rimu +luff +Mediterranean fruit fly +singlestick +lady-in-waiting +curb +birch +limekiln +orthoscope +serotine +Spanish oak +swamp cottonwood +edger +city man +picnicker +white basswood +Parsons table +Christmas begonia +perspirer +Pacific tree toad +Cape tulip +finger bowl +blue pike +greengage +handcar +milkweed +potbelly +river dolphin +creel +typewriter carriage +banteng +pawnbroker's shop +huon pine +biennial +man of action +foundress +caveman +featheredge +jordan almond +sandblaster +coralberry +low-calorie diet +hoot owl +garter +bain-marie +wrecker +fenugreek +double-hung window +idol +scullery +balloon vine +summer savory +winged spindle tree +Helvella crispa +walrus mustache +gas engine +boulle +rush grass +rue +hoe handle +cat fancier +deerstalker +dunker +American red plum +fall dandelion +groover +sprag +stair-rod +wish-wash +pricket +architrave +California laurel +net melon +Arizona sycamore +executive secretary +silverweed +silky cornel +surface ship +square sail +common purslane +villa +holly-leaved cherry +sweet birch +pecan +artillery shell +breast pocket +pirogi +scarlet runner +rabbit brush +mealworm +leather carp +palette knife +Jerusalem sage +boneshaker +slit lamp +digital voltmeter +polar glacier +square-rigger +homogenized milk +Sten gun +lesser calamint +pyrograph +Korean lawn grass +Zinfandel +crepe fern +western ragweed +clasp knife +distributor housing +cartouche +scooter +ski parka +jackknife +Carolina spring beauty +soft diet +candlesnuffer +horse trader +step stool +agouti +accelerometer +annual fern +judge advocate +angelica +roll film +treehopper +ombu +comer +sultanate +kitchen help +hooded ladies' tresses +milking machine +knuckle joint +Jamaica honeysuckle +music teacher +sauerkraut +Weston cell +slivovitz +Worcester sauce +tall bellflower +chancery +prophetess +casquet +shortfin mako +sorus +visual display unit +asp +grenadier +black pepper +crottle +erasable programmable read-only memory +jabot +ratchet +disk controller +chief petty officer +tap wrench +white mountain ash +cultivated rice +flying phalanger +skillet corn bread +BB gun +Elamite +European red elder +reed rhapis +ciderpress +inga +torpedo +wild teasel +bean curd +oeil de boeuf +acuminate leaf +bitter lemon +hitchrack +Lorraine cross +hostess +European dogtooth +adz +polonaise +rock sandwort +Waldorf salad +myrmecophile +klystron +mole rat +draba +corn borer +robusta coffee +chub mackerel +leatherleaf +chronometer +Moselle +sea aster +fennel +slop basin +constable +Brunswick stew +hydraulic pump +French omelet +icebreaker +Manx shearwater +press of sail +ninepin +blue succory +bootstrap +hallstand +chit +firefly +bearded seal +fuel filter +jezebel +mate +Roquefort +cheesecloth +plasterer +blue pimpernel +lake dwelling +shrink-wrap +goat cheese +common gum cistus +coastland +Sunday best +wild tobacco +mandrake +common unicorn plant +barbican +culotte +blockhouse +German iris +tarragon +caramel +wild rosemary +grain +voyager +squirting cucumber +eastern narrow-mouthed toad +creeping fern +luge +saffron +garland flower +furnace room +starship +Oriental scops owl +Italian honeysuckle +berserker +Chinese elm +scrubber +bishop pine +French polish +compromiser +skimmer +river shad +lobster thermidor +leadwort +man-of-the-earth +razorblade +vicegerent +empress +link +ham and eggs +wild lily of the valley +blackfish +splicer +fossa +mara +moneygrubber +brachiopod +fauteuil +caldera +finish coat +croupier +termer +leopard's-bane +sei whale +molucca balm +dolly +dog food +term infant +soft roll +episcia +sewer +inquiry agent +active citizen +perry +California newt +moon shell +bladderwrack +common shrew +dill +Dutch elm fungus +key lime +electrometer +divorce lawyer +lamb's-quarters +apple turnover +shipmate +Guernsey +legionnaire +electric blanket +Rocky mountain pinon +tobacco mildew +stinking iris +forestiera +departure lounge +wiper motor +jurist +scarlet runner +pallbearer +batter's box +inertial guidance system +fines herbes +oilcan +sisal +mustache cup +steamed pudding +Visayan +fiesta flower +lady tulip +lungless salamander +batiste +electrical system +blazing star +car carrier +Walloon +mother hen +stump +mulled cider +secondary coil +Alexandria senna +etui +scrumpy +Havasupai +jawbreaker +glume +ex-husband +Eskimo +Joint Direct Attack Munition +number theorist +five-hitter +pinstripe +Olympian +common mackerel +stone bass +bigos +Bahraini +airbrush +great ragweed +glass lizard +hand fern +roundel +riding master +shoetree +yellow avens +old fashioned +dolman +stinger +nursling +legate +faille +golden fern +bedpost +shop steward +kidney bean +bladderwort +internist +limeade +Bruneian +Coloradan +playsuit +wintergreen oil +Cantabrigian +mutton snapper +shot putter +hand grenade +moccasin +cobnut +marrow +separatist +cockscomb +discharge pipe +Gabonese +spade bit +chicken cordon bleu +varnish tree +European wood mouse +striped gentian +Ayrshire +curassow +moo goo gai pan +malarial mosquito +glow tube +ledger board +bib-and-tucker +European chestnut +suffragette +color wash +gaffsail +golden larch +voting machine +Kahlua +lungi +amusement arcade +Uzbek +butternut +mold +mule's ears +dickey +shrimper +trophozoite +dreadnought +shepherd's purse +greenhouse whitefly +spotted gum +copperware +perfect game +semigloss +spawn +telecom hotel +stakeholder +mason wasp +flibbertigibbet +chin strap +fringed pink +saki +urchin +memorizer +roulade +whiting +cling +corncrake +Queen of England +choo-choo +empty +heating pad +playmate +visualizer +popcorn ball +absconder +sou'wester +target acquisition system +mock-up +dental floss +tray cloth +haddock +bulblet fern +housing commissioner +delayed action +anchor light +harbor porpoise +water wings +PT boat +night latch +fennel +doorframe +green-tailed towhee +grey polypody +torture chamber +American germander +Chinese wistaria +cattalo +accompanist +rifleman +alpine clover +contrarian +lemon peel +Mexican cypress +sprog +dado +Galilean telescope +desmid +lockup +Latin +American raspberry +mescal +butternut +prairie orchid +downy yellow violet +green hellebore +radio compass +bread and butter pickle +Cherokee rose +knish +destroyer escort +Arkansan +langlaufer +pyxis +winter savory +velocipede +motley +winter savory +law student +barren ground caribou +apple dumpling +field hospital +works +city editor +European flatfish +Morchella crassipes +life office +boot camp +cream sauce +cape aloe +acetate disk +devil ray +tile cutter +Plymouth Rock +microspore +godown +Syrian +tiercel +American cranberry +lesser spearwort +anopheline +Spanish oyster plant +wire cloth +attic fan +birch beer +small computer system interface +crook +ribbon fern +explorer's gentian +nagami +I-beam +rosebud cherry +Jerusalem artichoke +Stillson wrench +pluralist +district manager +Levantine +orangeade +part-timer +post horn +Oregon grape +contadino +cargo helicopter +silverpoint +chaja +California bluebell +case +Shasta +cheese cutter +Leishmania +avalanche lily +iron horse +bialy +Yana +Delawarean +Prussian +nonpareil +hammer +hoper +chewink +anil +skim milk +desert four o'clock +crescent wrench +white marlin +blue jasmine +malacca +anadama bread +purple poppy mallow +ganglion cell +ligature +no-parking zone +golden clematis +Cotswold +aliterate +shebeen +yardarm +superbug +fanaloka +stinking cedar +spirochete +wort +pater +heaume +thermocouple +ironing +naval tactical data system +European goatsucker +prairie cordgrass +accused +foreign agent +halberd +western mugwort +esthetician +Persian lilac +cracked-wheat bread +crosscut saw +rock penstemon +paper cutter +crematory +ideologist +cattley guava +margarine +creosote bush +hoary plantain +spark gap +lumberjack +Greek valerian +mission bells +tight end +bigeye +large crabgrass +stone marten +cleat +lentil +bay scallop +lector +charger +assemblywoman +second lieutenant +boil smut +sarsaparilla +hydromel +cat flea +pinfish +whole milk +hairnet +myeloblast +peasant +blind curve +first offender +dwarf-white trillium +Brother +coatdress +gun emplacement +tamarisk gerbil +snap +air cushion +trailing edge +potato vine +gig +everlasting pea +champion +dibble +rattail cactus +timothy +prince's-feather +cutlas +lockring +sealing wax +Brussels lace +corn mint +highboard +she-oak +wild celery +pillar +Burberry +Hakka +leucothoe +bell tent +gallery +coontie +leather fern +smack +adenovirus +linoleum +chain wrench +tammy +gas fixture +nut bar +baneberry +butterscotch +goat's rue +bullock +grey snapper +mother-in-law +hyson +wayfaring tree +mollie +needle spike rush +buckwheat +bayberry +brush-tailed phalanger +dry rot +harborage +stormy petrel +Oriental beetle +Atlantic halibut +coping saw +simple fruit +viscose rayon +surgeonfish +upstairs +security system +common ragweed +verticillium +pancake batter +hawk's-beard +Dutchman's-pipe +refrigeration system +European parsley fern +Ivy Leaguer +totalitarian +gonococcus +towhead +showy sunflower +pallium +multiengine airplane +hair trigger +rabbit-eared bandicoot +siskiyou lewisia +fuel system +flat arch +broad beech fern +Alpine lady fern +bracken +Kentucky black bass +rut +mountain maple +tunaburger +umbrella fern +white-headed stilt +meat hook +panhandler +washhouse +barnyard +safety lamp +leg +ripple mark +paper +sagebrush lizard +light heavyweight +common nutcracker +operator +stalking-horse +horseless carriage +fishhook +suction cup +peg +Ungulata +false teeth +round-bottom flask +Luba +campaign hat +firebox +rudder +parapet +ice pack +appellant +spirit stove +metheglin +common bamboo +soapwort gentian +pannikin +time capsule +burn bag +folk poet +tropical prawn +end man +new caledonian pine +linen +web +free trader +jury box +railing +pignut +leaker +potboy +rubber boa +white snakeroot +plumber +Candida albicans +surfboat +woman +promulgator +eyecup +wild China tree +rattlesnake master +Viyella +alpine salamander +ailanthus silkworm +Albatrellus ovinus +war room +meadow vole +robotics equipment +rotary actuator +Engelmann spruce +pinesap +beefcake +native speaker +ridge +injector +water chute +salmonberry +decoupage +bottlebrush +date plum +circlet +American mountain ash +pocketbook +horsemint +sweet four o'clock +kirpan +pinto bean +chervil +equator +range animal +candy thermometer +calanthe +cul +stipendiary +brahman +pelican crossing +topgallant +wild senna +sliding window +carrier pigeon +Tatar +quadruplet +bumboat +spearmint oil +slip clutch +young Turk +golden yarrow +shank +glasswort +dental plaque +Manduca sexta +Northern bedstraw +dent corn +Life Saver +western wall flower +bedder +wherry +Tuscarora +scrapple +borstal +reflux condenser +problem solver +nondriver +perforation +eastern cricket frog +white wood aster +broad buckler-fern +Cape primrose +herringbone +head louse +earl +baton +recording system +primary color for light +cherry laurel +pomfret +ratafia +chocolate milk +obscurantist +revisionist +rood screen +magnetic needle +commensal +oil tycoon +celebrant +domicile +harvest mouse +California nutmeg +greater spearwort +black-billed cuckoo +winepress +demographer +straw boss +diabetic diet +sweetmeat +rabbet +ming tree +basketweaver +freestone +walk-in +Aryan +box coat +audio amplifier +chicken salad +churidars +whydah +box +batman +siren +selectman +gouger +drip coffee +Caesar salad +interpreter +whinstone +grey goldenrod +minicomputer +honey crisp +hypercoaster +Irishman +swamp white oak +reed canary grass +globeflower +cynthia moth +fennel seed +canthus +chino +blind date +tar pit +watermelon begonia +fishtail palm +overcast +Pearmain +primary color for pigments +coal seam +wherry +safety bolt +cretonne +Michigan lily +inflater +moneybag +huckleberry +brassard +bush vetch +looking glass tree +pinwheel roll +alfalfa sprout +sea kale +clinometer +achira +lorgnette +potter wasp +gilded flicker +tody +capulin +captain's chair +crackle +gerardia +prie-dieu +venture capitalist +New Jerseyan +block and tackle +elf cup +bur reed +automatic transmission +wax palm +flytrap +crack willow +coachwhip +swizzle +lugger +Dewar flask +baster +oxyacetylene torch +Culex quinquefasciatus +St Peter's wort +wild hyacinth +Russian almond +burrfish +wintergreen +katsura tree +butcher knife +perfumery +thresher +porte-cochere +sheepwalk +hypotenuse +Dalmatian iris +buttercup squash +demiglace +goldenseal +preceptor +rigger +poikilotherm +old-age pensioner +posthouse +wood horsetail +repeater +reciprocating engine +Rambouillet +terra cotta +togs +battledore +horizontal tail +missile defense system +trier +morello +woolly adelgid +munition +double creme +in-fighting +squirrel corn +crow's nest +antler moth +brake cylinder +bandoleer +noticer +Parmesan +hipline +cheapskate +Dubonnet +mole rat +bog aster +ribbon tree +meadow rue +nard +ratel +loose smut +snapping shrimp +golden glow +basil thyme +Florida strap fern +moonshine +flume +lace fern +black bream +orchestra pit +archerfish +exile +ringdove +career man +godfather +bottom-feeder +pasteurized milk +dental implant +pedicel +Catalpa speciosa +yellow foxglove +lancet arch +steam shovel +sampan +patrol boat +sailor cap +tollgate +monal +velociraptor +cacique +jack oak +cursed crowfoot +creep +Parry manzanita +common matrimony vine +grace cup +caecilian +spurge laurel +prickly lettuce +Regius professor +camail +Sitka willow +Courtelle +gin sling +dogmatist +guest +saltine +dust cover +sport +sweeper +feist +lady's-eardrop +vibist +wire stripper +tenpin +interplanetary space +beet green +pruning knife +drainage system +gunnery +ballet master +lime juice +flak catcher +lacrosse ball +Canadian aspen +beatnik +railhead +utilizer +spadefish +Arizona white oak +city university +dense blazing star +hedger +chain pickerel +right-hand man +namby-pamby +nacelle +redneck +tumbler +Chief Secretary +cannon +cupola +kummel +papaya juice +Burton +Stanley Steamer +loganberry +stylus +square meal +rock bass +western ladies' tresses +dramatist +assignee +tandoor +trumpetwood +segregator +green adder's mouth +coral necklace +ani +iceboat +densimeter +oxtail soup +kernel +cos lettuce +greenishness +panchromatic film +Parker House roll +oatmeal +backsaw +double Gloucester +bailey +storage cell +giant +coconut milk +broadtail +barouche +loir +soybean meal +white-leaved rockrose +junction barrier +spandrel +sweat bag +goldilocks +flowering wintergreen +cockspur +beef fondue +holding cell +cardamom +cagoule +Kamia +tangelo +Herschelian telescope +wine bar +kachina +sand sage +guy +ivory palm +citrus mealybug +topper +ladyfish +force pump +fanion +calaba +Iowa +orrisroot +ivorybill +Secretary of Agriculture +gagman +dry cell +hypnotist +kenaf +grey alder +deathwatch beetle +gagman +magnetic stripe +trap door +abdominal wall +prefab +broomcorn millet +architeuthis +angler +Pacific giant salamander +barbette carriage +low-fat diet +veal scallopini +B battery +wallah +landing flap +pistachio +jaguarundi +nagi +cicerone +felt fungus +Aertex +stocks +smooth aster +patchouli +lemon sole +sleeper +basket fern +dundathu pine +anjou +Moreton Bay chestnut +broom sedge +candid camera +red angel's trumpet +oilstone +cinnamon toast +Pacific walrus +fruit custard +Jehovah's Witness +mate +voyeur +Esselen +achromatic lens +sanguine +brine shrimp +dunce cap +swot +transit instrument +grey willow +pack +bench clamp +Nova Scotian +gadgetry +silvery spleenwort +enchantress +rough fish +morula +giant taro +sorus +roux +polyhedral angle +spruce beer +Chicano +cola extract +outfielder +kohleria +white-rumped shrike +car-ferry +subway token +spoon bread +totara +corn borer +bowhead +tensimeter +water scooter +flickertail +Catholicos +pleaser +blue-eyed Mary +calabash +handyman +cascades frog +facing +scarlet oak +lutist +ginger +tree tomato +Harvey Wallbanger +tent peg +insectivore +fusil +swale +chinning bar +bladderpod +New Dealer +dhoti +proscenium arch +common vetchling +channel +collect call +safflower +Texas tortoise +test equipment +theca +RAM disk +sheep sorrel +rammer +buttonhook +honey mesquite +dominus +babirusa +queen +Aspergillus fumigatus +crash barrier +nonmember +Muscovite +verdin +Australopithecus afarensis +Turkish Delight +stalked puffball +giardia +divider +mountain skink +head smut +pacemaker +evaporated milk +rattlesnake fern +flamethrower +navy bean +bather +steed +showy orchis +stone crab +artichoke heart +phantom orchid +space helmet +swamp laurel +privateer +junior +surcoat +bristlegrass +flower girl +aphid lion +penthouse +lemonade mix +coude telescope +natal plum +scriber +wood nettle +rape suspect +resplendent quetzel +western poppy +choir loft +fore-topsail +thyme-leaved sandwort +erotic +short circuit +outdoors +flowering tobacco +hookup +aviatrix +corker +horehound +horn +swamp pine +water biscuit +cherimoya +vaporizer +courtier +European sole +full skirt +Mother Carey's chicken +cymule +huck +white snapdragon +mountain nyala +country borage +bonduc +casein paint +grampus +shrimpfish +lodge +dragee +black walnut +caraway seed +roper +glass cutter +tab key +Richardson's geranium +demigod +chichipe +Italian ryegrass +cadet +electrograph +rudd +carpenteria +foie gras +lignum vitae +hedge nettle +pledger +American hackberry +flageolet +beaked hazelnut +reflectometer +sticky geranium +marriage bed +white pepper +japanese clover +whiteface +gnat +extrovert +Canada plum +talipot +chicken stew +egg foo yong +fraxinella +skibob +saucer magnolia +jacket +green smut fungus +cloakroom +landing skid +booth +ice milk +dipole +striped coral root +red buckeye +roughcast +breaststroker +cowherb +razor clam +first-aid station +briarroot +clambake +lander +Bramley's Seedling +frail +jird +minisub +luging +poison milkweed +European lobster +epidemiologist +spandex +paloverde +marumi +bypass condenser +punter +petty spurge +Coryphaena hippurus +bilberry +vermillion rockfish +witness box +viscometer +pulque +Massachusetts fern +herring salad +ridge tile +mesa +dwarf grey willow +southern aster +punch pliers +tarnished plant bug +hoop pine +Japanese red pine +benedick +rebozo +silver plate +silver willow +mouse-ear hawkweed +bonito shark +abutment arch +noble cane +tiger rattlesnake +pongee +jumping plant louse +pattypan squash +giant ryegrass +railroad bed +stiff aster +imperial Japanese morning glory +laundry +winter cress +large white petunia +tea maker +pen-and-ink +early warning system +lug +monocot +sea wormwood +breechblock +postage meter +third rail +Mongoloid +Australopithecus boisei +umbrella tent +stirrer +Dumpy level +beroe +post and lintel +green spleenwort +tomato paste +dishpan +stentor +sweatband +cobbler +New York fern +gaff +prairie willow +cyclops +jigsaw +rotavirus +pallet +eastern ground snake +boiling water reactor +acute triangle +agora +European cranberry +roebuck +surgical dressing +busboy +cannikin +feedlot +common pond-skater +cochin +horsehair lichen +fetter +sapote +fichu +dermatologist +fire tongs +creme anglais +foster-mother +laurelwood +chicken snake +mincemeat +rocker +wild spinach +powder and shot +butterwort +auxiliary engine +mamey +hart's-tongue +sucking pig +American turkey oak +troopship +buttermilk +divi-divi +boatswain's chair +soda fountain +southern flying squirrel +elastic +cutaway +housekeeper +renegade +apple rust +bridoon +machicolation +stunt +keyhole limpet +personality +solitary vireo +epidendron +Jihadist +boffin +bettong +terror +partial denture +pusher +saltcellar +capstan +large poodle +Bibb lettuce +low-bush blueberry +staple +banded krait +sickroom +barnyard grass +wandflower +woodworm +bluegrass +squirrel's-foot fern +rabbitfish +delta wing +milking shorthorn +limber pine +guru +gamine +scythe +sweetsop +Gruyere +bloodmobile +mine detector +American mistletoe +silver beech +hound's-tongue +Lombardy poplar +basket fern +pink-and-white everlasting +redtail +Aladdin's lamp +mace +outtake +condensed milk +Canada wild rye +silver perch +waxflower +taxer +Chinese chestnut +Our Lord's candle +mugwump +school system +salp +osso buco +dress shirt +butterweed +low-fat milk +couchette +broomcorn +proscenium +mill agent +smut grass +humpback +southern spadefoot +military leader +canebrake rattlesnake +tailor-made +ebony +beach house +flying gecko +hoary alison +typhoid bacillus +Romanov +vanilla pudding +sweet cicely +Spodoptera exigua +dress rack +flannel +skipjack +bolognese pasta sauce +rooibos +thunderer +blessed thistle +gauntlet +mahatma +granadilla +laurel sumac +Yuma +thyme-leaved speedwell +encyclical +twill +linocut +manna gum +spark arrester +cocklebur +Indian hemp +lemon oil +Hall's honeysuckle +raceway +flop +Himalayan lilac +one-flowered wintergreen +photosphere +silvery spleenwort +convex polygon +canarybird flower +foster-sister +fluffy omelet +palanquin +roll +dandelion green +Javanese +workpiece +Carmelite +bread mold +schlemiel +wild lily of the valley +grugru +solenoid +puff batter +skep +balance wheel +Gadaba +portia tree +mobcap +two-man tent +scuffle +firebrat +ant lion +anise +caster +giant petrel +American water spaniel +naboom +treasure ship +foster-son +fiddleneck +alidade +sugar refinery +wild oat +water beetle +generic +damson plum +abrocome +detainee +pitch pipe +coast +nilgai +radiotherapy equipment +heart-leaved aster +gristmill +grocer +Appaloosa +Cheviot +brake pedal +lantana +cave myotis +Rob Roy +sea spider +latrine +carpophore +recycling plant +coondog +brace and bit +funambulist +eggar +mantelet +postdoc +mezzanine +coco plum +pulse generator +high-vitamin diet +menhaden +mechanical engineer +bergamot mint +Chuvash +grated cheese +helicon +belladonna +beet armyworm +eelgrass +resuscitator +interrupted fern +arrow grass +cistern +Pacific herring +colostrum +journal bearing +Fauve +wrist pin +canape +choice morsel +quadraphony +guard boat +shortgrass +claymore mine +hitching post +cargo door +decoder +gym rat +Cocopa +commander +apple of Peru +seckel +yellow goatfish +dog flea +dodo +oconee bells +Tudor arch +turkey stuffing +ebony spleenwort +wheat flag smut +scolopendrium +Brazilian pepper tree +gusset +inspector +lunar excursion module +baron +plantigrade mammal +Creole +phosphate +aromatic aster +ghee +audiovisual +onychophoran +cotton stainer +lieutenant junior grade +spheroid +amen corner +caper sauce +Caladium bicolor +dyer's rocket +seaside goldenrod +flint corn +Very pistol +rotifer +steeplechaser +rouleau +escape wheel +Namibian +millivoltmeter +emmer +climatologist +agateware +sea lyme grass +inclinometer +water fennel +saddle seat +vicar +garden cress +ski rack +Norfolk jacket +casaba +coast rhododendron +sericea lespedeza +hematocrit +autopilot +tilter +finish coat +Pennsylvanian +shrubby St John's wort +podocarp +percussion cap +ceriman +peanut bar +gean +jack +durra +rotor +carob +cottage tulip +three-spined stickleback +trencher +elevator +kalumpang +abaca +Australopithecus robustus +active matrix screen +water bed +hatmaker +lodestone +cat food +overcup oak +balletomane +popgun +rheometer +process cheese +frog legs +heartleaf arnica +p-n-p transistor +steam turbine +Tulu +scalene triangle +licorice fern +coffee break +trade unionist +starved aster +firing pin +water gum +Masonite +hairspring +seminarian +blue racer +forecastle +scrub pine +Atlantic spiny dogfish +kopje +orphrey +fan tracery +gee-gee +vixen +interstellar space +Harris Tweed +sawmill +lemon mint +bitewing +ringlet +Chinese mustard +paleontologist +American hazel +brigantine +clay-colored robin +zombie +nectarine +West Indian jasmine +pineapple weed +rusher +gynecologist +pole +thylacine +myrtle beech +golden cup +woodruff +T-bar lift +terebinth +service club +homegirl +Blue Mountain tea +figwort +New Hampshirite +Stayman +tonometer +white turnip +messuage +cruet-stand +colliery +connecting room +lesser twayblade +bland diet +crown prince +beggarwoman +restharrow +bower actinidia +firebug +hepatic tanager +telegraph +Spodoptera frugiperda +spackle +carpenter's square +pyx +supermom +thickhead +whorled milkweed +Arctic char +Chinese rhubarb +pince-nez +wolverine +tomato concentrate +cascarilla bark +red underwing +leather flower +Jerusalem thorn +bullpen +Salisbury steak +anode +coffeeberry +bottling plant +fritter batter +aerial torpedo +matrix +local oscillator +stalked puffball +bruin +three-cornered leek +wassail +stabling +damping off fungus +myriapod +osier +lesser kudu +cownose ray +chokecherry +wagon +obstetrician +Glengarry +even-pinnate leaf +wine sauce +osteocyte +baker's yeast +heir presumptive +blackjack +tympanist +golden fern +fipple +Japanese oak +bar mask +stamping machine +argus +knobcone pine +oil beetle +lanai +upper berth +condenser +proctologist +catechu +wild spurge +vestry +ground snake +proton accelerator +walker +scarlet bush +transom +lagging +bouillon +slender loris +black currant +developer +football hero +plum sauce +striped mullet +prince charming +fictional animal +prosimian +lug wrench +lemonwood +kirsch +spy satellite +black caraway +Thompson Seedless +bead tree +purple fringeless orchid +Virginia strawberry +chigetai +punkie +gall wasp +addressing machine +rock polypody +good-king-henry +spring cankerworm +wimple +noncandidate +saskatoon +hacienda +Darjeeling +snowberry +lounging pajama +ascospore +ski-plane +hedgehog cereus +Welsh onion +yautia +coaster brake +sickle cell +parrot's beak +fuller's teasel +painted greenling +scablands +stuffed cabbage +barrel organ +etcher +dwarf maple +camp +Australian blacksnake +currycomb +obtuse triangle +rose gum +psychrometer +abridger +torpedo +carpet loom +sodalist +slender rush +loligo +sclerometer +wimp +dotted gayfeather +green ash +pinstripe +moralist +medusa's head +garden centipede +heath aster +fool's parsley +olla podrida +Potawatomi +Edam +toothache tree +hulk +seabag +narthex +compartment +prairie star +lookdown +B-flat clarinet +event planner +clip lead +shirting +milk punch +supercharger +macadamia nut +giant coreopsis +computer store +martingale +keyboard buffer +summer flounder +squash ball +gas turbine +object ball +plier +black mulberry +reef squirrelfish +scampi +willow aster +bowler +striped marlin +smooth muscle cell +diplodocus +Liberty ship +sponge cloth +guitarfish +walking leaf +showroom +California bluebell +bolo +turnbuckle +boysenberry +hardware +Gael +imago +endorser +jujube +dust bag +rapporteur +field wormwood +low-water mark +naval missile +Pacific yew +reversible +crabapple jelly +poniard +barricade +spawner +simnel +seltzer +deckle edge +needle +timbale +satellite transmitter +organization man +job candidate +orderly +native cranberry +fir clubmoss +coaming +chartered accountant +electron accelerator +Sierra plum +American foxhound +long underwear +Penobscot +blueberry yogurt +biretta +cascara +Paranthropus +Dorian +nun's habit +lenten rose +Augustinian +designer +northern phalarope +mombin +hazel mouse +reeve +waffler +telegraphy +Verpa conica +ignition coil +Japanese oyster +S-shape +divining rod +ant thrush +throat protector +interlocutor +Desmodus rotundus +pere david's deer +attenuator +Cypriot +red sandalwood +pendulum watch +broadcloth +striped drum +sequence +safety arch +diapensia +hog +western spadefoot +chlorella +comb-footed spider +Chechen +darning needle +C-ration +hard beech +piano action +scaling ladder +Nepal trumpet flower +ravigote +screw wrench +ramekin +Lyonnaise sauce +dinner napkin +partial veil +masseuse +coatrack +mooring tower +blue-eyed African daisy +English horn +baton +rope tow +toll bridge +massage parlor +quark cheese +lounging jacket +tall goldenrod +flying jib +coordinate axis +barley-sugar +integrator +worm gear +captain +sweatshop +class +layer +chili powder +dripping pan +oatcake +newsroom +tadpole shrimp +rake +trade magazine +silks +ram's-head +senior +knower +masseur +yam +peg +wheel tree +hardbake +test room +long-spurred violet +creeping spike rush +shrapnel +coffee senna +matchbox +creeping soft grass +welder's mask +pickaback plant +urial +hooded pitcher plant +incense cedar +Ohio buckeye +ant cow +skeleton fork fern +Indiaman +swamp ash +testatrix +marang +spherocyte +Winesap +Indian mallow +teju +Yersinia pestis +dye-works +sauerbraten +coral bean tree +safe house +postulator +eyas +lotus +wood vise +lady-of-the-night +East German +cymling +rock candy +western omelet +anoa +rainbow seaperch +crossover voter +Finn +tree shrew +hog plum +Federal +shagbark +clockwork +Alexandrian laurel +metal wood +brill +military chaplain +trend-setter +call-back +Indian rat snake +spurred gentian +Japanese maple +forest goat +bee moth +viola da braccio +duckboard +armyworm +hangnail +counterbore +cream-of-tartar tree +Mullah +bonbon +water hazard +temple orange +corporatist +rough bindweed +Turkish bath +mistletoe fig +beach sand verbena +caddisworm +English plantain +brown Betty +power pack +lion's-ear +Francis turbine +stayer +dichondra +marsh St-John's wort +squab +energizer +common horehound +mantispid +pullback +handwheel +spark arrester +yakuza +Virginian witch hazel +grunter +waterworks +bondwoman +chain printer +stockjobber +coconut milk +yardgrass +blue chip +bridle path +riser +pleurothallis +saltwort +salal +broadside +blackboard eraser +bastard +Para rubber tree +red bat +digital-analog converter +calabash +cashier +cow shark +horned pout +microphage +monologist +woolly monkey +Illinoisan +marsh horsetail +distaff +siris +eparch +gooseneck loosestrife +sounding rocket +multiprocessor +saiga +xerographic printer +madrona +right triangle +sweet gale +red maids +wolfsbane +pork-and-veal goulash +French sorrel +mutterer +Venetian sumac +drumlin +white crappie +squire +large-flowered calamint +northern cricket frog +mushroom sauce +supertanker +morello +auxiliary boiler +Virginia thimbleweed +cottage tent +bubble shell +big shellbark +wormwood sage +cider gum +coast lily +American feverfew +Peruvian balsam +purple silkweed +tobacco moth +desk dictionary +rock elm +eastern indigo snake +Japanese privet +lamb +levee +L-plate +soapfish +painted tongue +scuttle +markhor +Marburg virus +mackinaw +major +crypt +ball and chain +domestic silkworm moth +bottom feeder +mistress +death house +freight elevator +bellyband +Pulex irritans +Bacillus anthracis +fire control radar +hysterosalpingogram +turbogenerator +decompound leaf +vambrace +scentless camomile +Medinilla magnifica +prima ballerina +Northern Spy +quartz lamp +grains of paradise +justiciar +felt fern +seismograph +Madagascar jasmine +imaret +white perch +Alpine mouse-ear +tea bread +yellow bass +poseuse +espionage agent +punching bag +eurypterid +orange sneezeweed +banded stilt +armhole +postern +mother +kapuka +catechumen +Soubise +Sauvignon blanc +gunnery sergeant +self-starter +ceratozamia +Atlantic cod +Reoviridae +blood cup +horseshoe bat +oriental plane +voussoir +fetterbush +samara +truncated pyramid +lingcod +athenaeum +shyster +Carolina hemlock +submarine torpedo +floating fern +yataghan +sun tea +viola d'amore +conenose +ventilation shaft +walk-up apartment +saury +wild wheat +porcupine ball +tahini +kris +grass fern +drip pan +black bryony +Scotch broth +tapioca pudding +southwestern toad +Hare Krishna +guimpe +wild madder +megalocyte +teaching fellow +shrubby penstemon +lesser wintergreen +privet hedge +Fahrenheit thermometer +stern chaser +prickly ash +pump room +ricer +chicken mousse +wing commander +sun gear +bolus +alpine milk vetch +opera cloak +twinjet +Goldie's fern +abnegator +alphabet soup +node +grape jelly +early coral root +Tarzan +quarterstaff +greeter +Eurasian woodcock +primary coil +quirt +tinkerer +bolt +creme de fraise +voltage regulator +news photography +Jat +bristly locust +Gouda +dickey +lobster butter +dwarf flowering almond +fagot stitch +Reform Jew +ostrich fern +bathyscaphe +purple mullein +alpaca +civic leader +jellaba +Arizona ash +wasabi +Irishwoman +choke +stockinet +religionist +sewage disposal plant +bittersweet +Hyphantria cunea +pheasant under glass +screen actor +chapterhouse +quoit +horseshoe bat +rapper +cupule +planetary gear +cascade penstemon +redoubt +salt +areaway +megalomaniac +bush willow +amethystine python +plains spadefoot +colour supplement +kick pleat +bell apple +narwhal +slippery elm +stenograph +baa-lamb +quadrant +balker +jobcentre +spit curl +bastard indigo +malacca +serow +adobe lily +yacca +palestra +penalty box +scrub beefwood +reenactor +screening +white bryony +alderleaf Juneberry +harpoon +alpine clubmoss +neurosurgeon +surrey +sweet calabash +Scotch laburnum +coquille +French honeysuckle +extrados +pipe cleaner +southwestern white pine +Virginian stock +scaly lentinus +aileron +carob bar +swordfish +Alpine woodsia +negus +wireworm +sweep +goldfields +drop arch +European bream +roly-poly +pin +bastard wing +fustian +wild buckwheat +lake whitefish +overcoat +water filter +Bermuda chub +New Zealand spinach +high-hat cymbal +European larch +radiologic technologist +fine-tooth comb +brunch coat +splice +electronic converter +overmantel +extern +taper +cluster bomb +teletypewriter +pinwheel +trailing arbutus +quipu +creeping zinnia +orange milkwort +tabard +Australopithecus africanus +melancholy thistle +insole +courser +darkroom +surface-to-air missile system +bark-louse +Confederate +neritina +clip-on +spouter +trench knife +outside caliper +dhak +Limburger +chuck wagon +buttercup squash +shirtdress +pouter pigeon +dirty old man +zodiac +fennel flower +mother figure +appointment +Manichaean +lignum +bouffant +rum sling +Ravenna grass +hibachi +gin rickey +American harvest mouse +cocozelle +western wheatgrass +black crappie +rhombus +Missouri goldenrod +barndoor +wild mango +pneumococcus +Boston lettuce +ratline +desert holly +cobweb +fluoroscope +ethnologist +tor +bullshot +stockade +greave +rock sea bass +slip-joint pliers +taxi dancer +schizophrenic +zill +creme de menthe +orange-blossom orchid +divot +supplejack +busybody +casemaking clothes moth +ramrod +gearbox +birdcall +Wiffle +thwart +beauty consultant +chicken paprika +trawl +skep +spirometer +hopper +kvass +doggie bag +bath chair +showy daisy +wild tamarind +Tarsius syrichta +glyptics +Algerian +cargo area +bunk +Velveeta +iconoclast +clinch +New Caledonian yew +false mallow +Japanese tree lilac +convex polyhedron +water boatman +cruise missile +finisher +colonoscope +cumin +wickiup +saccharin +whipcord +trailer camp +eryngo +cuckold +yam bean +fighting chair +forewoman +galingale +citron +positivist +four-lined plant bug +suet pudding +field pea +Circaea lutetiana +deer grass +trap-door spider +common corn salad +mirror carp +sounder +second-in-command +seaside alder +burgoo +ming tree +curry sauce +courbaril +green alder +figure loom +fauld +halfbeak +squelch circuit +cladode +winter cress +tongue and groove joint +dwarf dandelion +joss house +western buttercup +welted thistle +potato tree +anglewing +cookfire +marzipan +hood latch +seed shrimp +common moonseed +toasting fork +bevel +three-quarter binding +midwife toad +stage director +Pentecostal +technical sergeant +golden-beard penstemon +drunk +silky oak +corn gluten feed +T-square +stoker +selling agent +cruse +server +rope-a-dope +bicorn +matzo meal +wide wale +roadblock +false foxglove +tuck box +bandsman +smoke bush +machinist's vise +Highlander +scholiast +self-starter +Swedish rye bread +spark transmitter +maverick +maquiladora +cabinetmaker +compress +rainbow shower +huntsman's horn +mackinaw +copper rockfish +lappet +nitrate bacterium +telephone plug +soutache +Dacron +toboggan +sissoo +yogi +laurel-tree +vice chancellor +Christ's-thorn +cartridge fuse +serial port +quassia +tarweed +pecopteris +beggarweed +anchovy pear +bookbindery +woodland oxeye +toad rush +sandalwood tree +marsh andromeda +Tyrian purple +boothose +tragedienne +fragrant cliff fern +festoon +bondwoman +melancholic +butternut squash +exhaust valve +semi-skimmed milk +glowworm +Virginia oyster +Identikit +ayah +gallows tree +Carioca +monoplane +jewels-of-opar +scallop +moth miller +marsh cress +lobed spleenwort +ricotta +emitter +arame +tub gurnard +army attache +maniac +organizer +pheasant's-eye +Melba toast +homeboy +Bavarian cream +Maximilian's sunflower +backstop +Tremella foliacea +yellow avens +spreading fleabane +plumb level +false rue anemone +zabaglione +climbing maidenhair +doeskin +walking shoe +lancewood +material +jacksnipe +South American poison toad +agonist +hinny +paper mill +psychophysicist +valley girl +toast mistress +jorum +tiler +chicken Tetrazzini +trivet +grasshopper +three-mile limit +kink +kiang +pole horse +jig +Cornish heath +hedge thorn +false alumroot +Popper +remount +photojournalist +sideroblast +stonecress +Agave tequilana +Japanese lilac +hawse +maenad +air bag +leaf spring +dwarf willow +soda cracker +contralto +moleskin +pilaster +Audubon's caracara +pia +American organ +bleu cheese dressing +betel palm +PC board +almond willow +socializer +tone arm +stammerer +free-liver +scaler +Gentianopsis crinita +leak +black haw +hound's-tongue +grass pea +Stassano furnace +coralbells +ministrant +perihelion +Luxemburger +powder-post termite +arboreal salamander +cushion flower +foramen magnum +pyrethrum +poacher +woolly mammoth +horned chameleon +tearaway +father-figure +tufted gentian +salmi +finger millet +physa +registrar +polyoma +bamboo shoot +matchlock +seine +congress boot +bulgur pilaf +monosodium glutamate +Kentucky wonder +mycologist +kedgeree +ragweed pollen +boarfish +yellow pimpernel +tan +northern Jacob's ladder +macrobiotic diet +migrant shrike +big-cone spruce +colonialist +white dogtooth violet +bath asparagus +webbing clothes moth +ladies' room +experimenter +prairie bird's-foot trefoil +bootleg +cognitive neuroscientist +fire chief +flagfish +dendrite +stinking goosefoot +fore edge +hogfish +Spanish cedar +hotel-casino +Tory +life-support system +pea flour +cash bar +Chenin blanc +white-footed mouse +Canada garlic +salt-rising bread +roomette +mastodon +bell founder +long iron +bi-fold door +fig-bird +European water shrew +dyer's weed +frog orchid +allosaur +Florida yew +wild potato vine +crape fern +flat-topped white aster +klebsiella +oil heater +waxmallow +enjoyer +mesocarp +semidesert +senior vice president +coccidium +burrawong +syllabub +jump suit +harrier +leaf roller +cherrystone +cinchona tree +touring car +eulogist +air force officer +red goosefoot +cat thyme +smoothbore +slugger +cardiac monitor +cobber +blister rust +musicologist +rolled biscuit +Braun's holly fern +hog plum +nonpasserine bird +pascal celery +damson +Jonathan +Sheraton +cohune palm +egg white +baton +sixth-former +Siberian pea tree +choanocyte +wineskin +auditor +detention home +Leichtlin's camas +Chartreuse +clusia +club car +wattle and daub +security blanket +common American shad +assistant professor +marsh pea +camomile tea +gopher hole +gravure +Freudian +spirillum +maharani +equilateral +crow garlic +mammee apple +felwort +hardtop +dillenia +curlycup gumweed +pilot engine +calcimine +wooly lip fern +bitter dock +wineberry +jumper +monolingual +spinning frame +old-timer +native cat +diving petrel +sodium-vapor lamp +marchand de vin +sexton +matelote +interior designer +windfall +mole salamander +minder +bodkin +neutron bomb +Caloscypha fulgens +slinger ring +mezzo-soprano +aura +Southern Baptist +viscacha +midfield +tie +prosthetist +round-headed leek +yellow mariposa tulip +canary grass +staddle +Tokay +Muenster +brazil nut +California black walnut +applesauce +penologist +virgin's bower +tenon +steward +Jerusalem oak +red-bellied snake +bindery +scow +fluid flywheel +bullhead +satinleaf +clove +double glazing +matron +wild parsnip +winged elm +shoot-'em-up +musk deer +white rust +lock +Cornishman +Vidalia onion +corn spurry +freeloader +justice of the peace +inlay +myxobacteria +tiglon +tangram +German ivy +scented fern +woolly daisy +caretaker +gastroscope +scuppernong +spotted sunfish +guilloche +codling +wormcast +Eskimo curlew +tayra +European fly honeysuckle +septuagenarian +third gear +coatee +red alder +water ice +cubitiere +frame buffer +gamboge tree +pernyi moth +chicken Marengo +Galliano +Lincoln +true sago palm +hunter's sauce +carpet beater +alpine goldenrod +arch support +vehicle-borne transmission +jilt +paternoster +redcap +Siberian larch +hoary plantain +swan's down +chicane +reverse +divan +kneeler +alexic +mock turtle soup +daffodil garlic +mission bells +squilla +ursinia +winter's bark +trifoliate orange +discina +frijole +Swiss steak +maildrop +knotgrass +dog fennel +drum sander +heroin addict +costume +camber arch +shining willow +lutefisk +red porgy +microfossil +good old boy +angle bracket +pitcher sage +bordelaise +heat exchanger +carrion +bush jacket +fanjet +coach +blackface +sicklepod +Manhattan clam chowder +daisywheel printer +olive +Sphacelotheca +Spanish needles +brown root rot fungus +boudoir +encyclopedist +V-8 juice +red haw +brass buttons +gym suit +skywalk +water wagon +gas-turbine ship +stoup +lisle +sailor suit +box beam +balm of gilead +housemaster +hayrack +neutralist +water elm +brook thistle +doyenne +nark +alpha-tocopheral +WASP +hydrilla +water-shield +footlocker +variola major +pargeting +ion engine +yellow globe lily +Malecite +bloodleaf +yellow sand verbena +whorled loosestrife +packinghouse +Carolina parakeet +Virginia waterleaf +armband +red rockfish +factory ship +moon trefoil +jump seat +water gillyflower +yerba mansa +chamfer bit +compass saw +hopsacking +Indian rhododendron +sickbed +treacle +honey eater +mailsorter +seabeach sandwort +sob sister +primrose jasmine +prince consort +elocutionist +wishing cap +runner +trestle +sugar water +half-and-half dressing +fringed poppy mallow +portiere +bung +swan orchid +weather satellite +beef broth +marblewood +sapper +agitator +wren-tit +grade +allspice tree +spacewalker +American hornbeam +sieva bean +dill seed +potoroo +love-in-winter +alembic +Cheshire cheese +small white aster +Oregonian +flipper +twill +differential gear +Prince Albert +licorice +foster-father +Melkite +portraitist +Yosemite toad +Cox's Orange Pippin +slender wheatgrass +knob +silique +Rocky Mountain bee plant +stirrup pump +chicken hawk +sweetbrier +Sierra lodgepole pine +poulette +biohazard suit +striated muscle cell +Geiger counter +World Wide Web +turmeric +prairie wake-robin +latchet +pushball +grill +shooting lodge +floating-moss +refried beans +boojum tree +red poll +toothbrush tree +rabbiteye blueberry +red haw +sweet vetch +delta +upland cotton +ballet mistress +padrone +complementary color +great Solomon's-seal +bud brush +brandy sling +spinster +Andorran +Mojave aster +mackinaw +golden calla +bottom rot fungus +segmental arch +periwinkle +hellion +topknot +copper +Mexican hyssop +weeping love grass +point woman +pathogen +fall cankerworm +common shiner +silverspot +corer +atomic pile +crystal detector +yellow spot fungus +truncated cone +saprobe +variegated horsetail +Cro-magnon +cercaria +aglet +pollster +oyster bed +pancake turner +egg cream +sporozoite +quirk molding +mutisia +sound bow +physic nut +sugar-bush +cow +magnetron +jungle hen +brassie +rock bit +taco sauce +seeded raisin +desert selaginella +folding door +vinegarroon +Pinot blanc +rye +ellipsoid +betel nut +tree of knowledge +ambrosia +long tom +breechloader +bicolor lespediza +cosmetician +monoblast +American oil palm +prancer +farina +caiman lizard +hardball +bullock's heart +cotton rat +whiting +weather ship +sharecropper +creamcups +gas bracket +divinity +ornithologist +yellow twining snapdragon +showy goldenrod +end man +heptagon +sand dropseed +round file +guama +blue elder +sand spurry +raccoon dog +zigzag goldenrod +fast reactor +arctic willow +cyclopean masonry +punter +sgraffito +slattern +storage ring +clipper +pulasan +short-tailed shrew +scammony +daybook +umbrella tree +coloring +element of a cone +gesneriad +cane +burgoo +western coral snake +friendship plant +Leydig cell +scrutineer +hairy golden aster +inclined fault +water milfoil +bryozoan +nardoo +native pomegranate +curly grass +Florence fennel +resurrection plant +ice water +crown +ploughman's lunch +clustered lady's slipper +kitchenette +sand sedge +pouched mouse +roadbed +parsley haw +predecessor +super heavyweight +seedless raisin +mailbag +sparling +codling moth +squama +Bercy +thermoelectric thermometer +Jaculus jaculus +saltpan +firmer chisel +round whitefish +ramrod +criollo +pinch bar +slash pocket +thigh pad +velvet plant +intergalactic space +brazilian ironwood +whaleboat +sirrah +hanging fly +aspirator +Dominican +dribbler +yellow-eyed grass +Cornish +geophysicist +tarmacadam +marchioness +rattlesnake orchid +Alaska Native +ilama +myrrh tree +zucchini +licorice root +nosebag +lounger +troposphere +virginal +spaghetti Western +Virgin Mary +waterwheel plant +dry nurse +enate +carpet shark +rijsttaffel +stuffing nut +caraway seed bread +Leotia lubrica +kaffiyeh +Boston baked beans +halophyte +backscratcher +instillator +trefoil arch +pip +digitizer +dosemeter +Carolinian +French sorrel +boards +historian +rangpur +clansman +goral +leatherjacket +coiner +fleece +white globe lily +storm cellar +roundhouse +mediatrix +butterfly flower +swamp gum +prairie vole +rhizomatous begonia +common tobacco +Marco Polo sheep +subarachnoid space +broomweed +safety net +silky wisteria +swagger stick +spectacled caiman +derris root +soap pad +chop-suey greens +summer hyacinth +palo santo +carbohydrate loading +chinch bug +roadman +sheep plant +messiah +desk officer +banquette +drugget +trumpet arch +great duckweed +purdah +heartbreaker +hasty pudding +alligator weed +dragee +yellow bristlegrass +Jacob's ladder +campstool +coffee fern +sweet fern +little chief hare +cat-o'-nine-tails +rep +American red elder +divorcee +black salsify +cambric +sennit +Canada ginger +wonderer +Formica +cream-colored courser +zooid +European beggar-ticks +sorrel tree +piddock +blolly +red-flowered silky oak +bay +Hooker's onion +dark horse +cone clutch +Roman hyacinth +paintbox +mestiza +green alder +bill +panicled aster +mammogram +snuffbox fern +Rediffusion +swamp fly honeysuckle +stoup +psychiatrist +nodding groundsel +student union +cold duck +bee beetle +playbox +Psychopsis krameriana +nosh-up +earthnut +narthex +single-rotor helicopter +revetment +sweetleaf +seasoned salt +piculet +speckled alder +mackerel scad +common yellowwood +devisee +static tube +Spanish heath +umbrella plant +fucoid +Chilean +coral-root bittercress +fanatic +cachou +agony aunt +bird's-foot fern +washwoman +torchbearer +placoderm +frosted bat +spicemill +Cape lobster +hard-shell crab +colonizer +camphor daisy +friar's-cowl +false tamarisk +toggle joint +tinsmith +theorist +hydrologist +loganberry +universal donor +northern whiting +tent-caterpillar moth +russet +kangaroo mouse +African scented mahogany +bastinado +breast implant +betel +grade separation +vox humana +stodge +Maryland chicken +Anguillan +oil pump +governor's plum +narcissist +deadwood +private citizen +winker +ropewalker +gidgee +Lothario +ski resort +major-domo +von Neumann machine +belaying pin +water parsnip +Fissipedia +luggage carrier +spring water +oyster stew +kohl +celesta +date-nut bread +punchboard +sunniness +hospital train +man +rack and pinion +mixer +pousse-cafe +narrow goldenrod +Maxim gun +stiff +recruiting-sergeant +watch glass +white hellebore +tung tree +prairie white-fringed orchid +beef Stroganoff +scoffer +grassy death camas +Shawnee cake +tapioca +Short's aster +banker +laparoscope +honeyflower +Caterpillar +electric clock +baling wire +huntress +Surinam toad +art school +incurable +Canton crepe +apple juice +hipline +bronchoscope +marshmallow fluff +Texan +wild fig +sawed-off shotgun +forestay +red kauri +fish slice +Egyptian grass +English walnut +brown sauce +ogee arch +nectary +chambray +leather flower +phloem +Persian violet +bomb calorimeter +western narrow-mouthed toad +soup du jour +sickle alfalfa +caracolito +periscope +coralberry +sword bean +sigmoidoscope +water locust +hygrodeik +sycamore +sheikdom +ballistocardiograph +clove +akee +fucoid +jacquard +cat's-ear +puritan +slender wild oat +smooth softshell +purchasing agent +landing craft +chartist +lace bug +sharksucker +Virginia chain fern +horseradish +namer +ripcord +personage +aspirin powder +puku +Wankel engine +nightcap +velvet bent +roridula +cytogeneticist +olm +almond extract +common heath +fringe-toed lizard +Kentucky yellowwood +lithosphere +cramp +bulgur +scurvy grass +officer's mess +frigate +electroscope +giant chinkapin +opah +rutabaga +wood hoopoe +Farley maidenhair +shingle tree +argentine +router +palm nut +quillwort +hiba arborvitae +runcible spoon +hireling +sickbay +alpine totara +white lupine +Cotoneaster horizontalis +desert plume +staghound +Sea Scout +opalescence +enophile +Jersey elm +coal house +Helvella acetabulum +selenium cell +white camas +creole-fish +auger +fragrant agrimony +research center +achromia +shank +cottonseed +mod con +extension +sugar beet +winter flounder +silky dogwood +strop +tokamak +rabbit ears +baby farmer +fireman's ax +serration +taproot +socket wrench +action officer +Chilean jasmine +Greek fire +stem-winder +body louse +lumpsucker +stink bomb +American lady crab +dicer +lie detector +maneuverer +black-headed snake +tiger moth +shooting stick +spermatid +babushka +deaconess +home +prior +chanfron +chickasaw plum +big-eared bat +rusty woodsia +tertigravida +miniver +combretum +habit +bluehead +angled loofah +gipsywort +fire-on-the-mountain +purple milk vetch +alpine gold +merozoite +loddon pondweed +Uniat +provost marshal +Gyromitra fastigiata +Coigue +proconsul +oarfish +San Jose scale +filature +chimney plant +spiny softshell +bluecoat +live axle +river limpet +clever Dick +pink bollworm +Japanese plum +roarer +caricature plant +wardroom +Texas chachalaca +Bahia grass +Moreton Bay tulipwood +accessory fruit +pearl barley +ashcake +bunt +Polynesian tattler +pine fern +laughing owl +potato fern +speaking trumpet +adjoining room +bearing rein +banana quit +redbrick university +Scleroderma bovista +magdalen +pressurized water reactor +advisee +NIMBY +poorwill +almond moth +comedian +star tulip +cracked wheat +water pump +guest of honor +yellow-breasted bunting +hire +pedate leaf +augur +purple locoweed +Socinian +upland white aster +guesthouse +double reed +detention basin +rollmops +hitch +bodega +mayeng +sparkplug wrench +attack dog +peach melba +heliozoan +tower mustard +blue mold fungus +lamplighter +banded sand snake +smooth crabgrass +elsholtzia +bodkin +Aegean island +bag lady +alewife +arcella +electrical contact +common ax +animist +concave polyhedron +coalface +climbing perch +yellowtail +hobble skirt +marquee +Russian dandelion +snow mushroom +polo ball +NADA daiquiri +cormous plant +chaparral mallow +inside caliper +milking stool +fallout shelter +sea gooseberry +Danish blue +grissino +chimney breast +mosquito fern +soundbox +spring chicken +epauliere +cape forget-me-not +japan +saddle oyster +white fritillary +push-button radio +bladder senna +bladder stone +macedoine +moire +Shawnee +starnose mole +douroucouli +horseradish sauce +electron gun +cotter +console +park commissioner +free press +lump sugar +western poison oak +apple maggot +keurboom +lisper +griffon +burin +horseshoe whipsnake +Jacobean lily +spinner +cochineal insect +emesis basin +sowbane +humanitarian +uakari +three-dimensional radar +wild hollyhock +heartseed +swinger +two-by-four +mop handle +common amsinckia +traitress +rush aster +fibrous-rooted begonia +violet-flowered petunia +milliammeter +alidade +azure aster +celery seed +snorer +scarlet plume +obtuse leaf +heathen +rose chestnut +headrace +dwarf buckeye +Pacific tripletail +wiggler +bounty hunter +Lowlander +slate pencil +typist +syconium +vaquita +skybox +business lunch +gusher +curacao +palometa +Diapsida +light diet +sourdine +thorny amaranth +potato fern +cartridge extractor +peshmerga +chaffweed +tahoka daisy +hematologist +massage parlor +diverging lens +breadroot +papyrus +amarelle +cover plate +hubbard squash +cryptomonad +whitetail prairie dog +rabbit burrow +orthochromatic film +goncalo alves +Chile bonito +tent-caterpillar moth +Manila grass +buck sergeant +mustard seed +crested wheatgrass +wise guy +asarabacca +field pea +bite plate +barbasco +heart-lung machine +mouse-eared bat +piping guan +gun pendulum +climbing onion +fungus gnat +Livonian +one-hitter +Chilean firebush +Sonoran whipsnake +round scad +myelogram +Rhodes grass +vomitory +roble beech +South-African yellowwood +molasses +Velcro +common calamint +radiation pyrometer +sketcher +chaparral pea +coffee stall +Australian nettle +bilimbi +Khedive +visionary +field spaniel +devilwood +collimator +Siberian spruce +sling +limestone salamander +ribbon worm +hazel +petter +coolant system +artillery plant +bailiff +chameleon tree frog +microsporophyll +maiden blue-eyed Mary +Drosophyllum lusitanicum +cocozelle +king post +nailer +knobkerrie +tovarich +Intelnet +worm lizard +drop forge +wool grass +brown bullhead +anthropoid +vitamin A2 +creche +hickory nut +whiffletree +deipnosophist +Muskhogean +masochist +hypsometer +gliricidia +complexifier +wild licorice +reconnaissance vehicle +fives +beefsteak plant +eastern dasyure +bookworm +crested coral root +wire recorder +cinnamon vine +bubble +Newfoundland dwarf birch +spruce bark beetle +teetotaler +fad diet +ascus +spicebush +African coral snake +soft-shell crab +Postum +packhorse +sand cherry +cricket-bat willow +middlebrow +Hungarian sauce +buffalo clover +jimsonweed +latanier +stablemate +jumper +zoospore +smooth woodsia +flowering ash +unilateralist +lomatia +flapper +wild cotton +Siberian wall flower +probe +bankrupt +blockade +lemon geranium +fig leaf +basic point defense missile system +clack valve +buttinsky +ingenue +mountain everlasting +zebra-tailed lizard +shaving-brush tree +evergreen huckleberry +core drill +lugworm +Cashmere goat +doorjamb +minelayer +student center +horsehair +European dewberry +white broom +arenavirus +eastern poison oak +rye ergot +Tupi +tensiometer +fleawort +coquille +icing sugar +junior lightweight +Doppler radar +mahuang +candlepin +chambermaid +evergreen blueberry +Eton jacket +parvis +solleret +molded salad +malvasia +birth-control campaigner +nonagon +backswimmer +ogee +bowstring +salt marsh mallow +trapezohedron +hoary willow +speech therapist +Zinjanthropus +core +red-backed mouse +eptatretus +mossy saxifrage +Aristotelian +Thessalonian +searing iron +bifocals +falangist +field pea +packsaddle +lay reader +hoecake +cuboid +white maire +iceman +lobscouse +neckcloth +color-blind person +Chinese holly +assemblyman +white-lipped peccary +kava +plastron +crab louse +hook wrench +trailing four o'clock +junior +skilly +internet +tonguefish +footman +sub-assembly +evangelist +track +bench lathe +desk clerk +scalded milk +chamois cloth +American marten +chachka +nondescript +pellitory-of-the-wall +swamp candles +procurator +cuddy +farkleberry +mountain male fern +trawl +dual scan display +fish meal +prospector +convener +guano bat +ant shrike +picture rail +sand rat +gynophore +quilting +sleeper +summer savory +Cotoneaster dammeri +smooth sumac +slumgullion +suite +catalufa +spherule +lean-to tent +gryphon +gas shell +short iron +sweet sultan +dewberry +Victoria plum +American water shrew +X-ray tube +macebearer +green arrow arum +abbe +poke milkweed +atheist +Fosbury flop +Ord kangaroo rat +moldboard +wheat germ +explosive trace detection +whippoorwill +examiner +tallyman +Crookes tube +wild peach +fringed grass of Parnassus +Crookes radiometer +Atlantic croaker +lobster stew +spring cress +maggot +pacer +hydra +Zionist +pepper tree +diamante +baize +Rhodesian man +county agent +respecter +Anglican +antimacassar +materialist +Swan River everlasting +cloud grass +toll line +C battery +chinese mustard +grass poly +warming pan +seasonal worker +common sickle pine +bathysphere +elegant Habenaria +card table +Chilean cedar +brocket +collimator +malted milk +avadavat +fire marshall +coloratura +yellow spiny daisy +fingerstall +narrow-leaf penstemon +indigo broom +pillwort +bearberry willow +Etonian +certified milk +climbing bird's nest fern +field coil +wrist pad +parr +kaoliang +engelmannia +stocker +satrap +Nantua +spearfish +caper tree +gold-tail moth +mountain chinchilla +sea milkwort +westerner +army cutworm +leaf-nosed snake +neurobiologist +xeranthemum +Eastern silvery aster +ecclesiastical attire +caper +Ukranian +bight +button fern +peach pit +oligodendrocyte +maar +digitigrade mammal +streptobacillus +sensitometer +preemptor +oat +bell foundry +crown lens +rock purslane +Junior +Brazilian guava +kicksorter +Ohio goldenrod +red mulberry +King's Counsel +mountain four o'clock +fairy shrimp +fell +oca +sycophant +chantry +dermatoglyphic +bomblet +keyhole saw +hangman's rope +little barley +lion-jaw forceps +giant scrambling fern +popper +dulcimer +Espagnole +tardigrade +smooth-haired fox terrier +bullbrier +rewa-rewa +Japanese poinsettia +trunk line +cannery +helminth +American spikenard +prince's-feather +arthroscope +ginger +aphakic +pilot bit +angle of refraction +low-sodium diet +wall creeper +growler +praetorium +Hall of Fame +soupfin shark +Molotov cocktail +kaffir boom +stitcher +sawwort +flagellant +Atlantic herring +Reticulitermes lucifugus +voltaic pile +snowy orchid +southern flounder +skysail +osage orange +white mullein +lined snake +tolu tree +poliovirus +foreman +burette +jackass bat +invigilator +electromyograph +acarus +presence chamber +columbian mammoth +hyacinth bean +pilot +meadow jumping mouse +Maria +outskirts +aftershaft +Queensland nut +schlockmeister +plainsman +afropavo +scarlet musk flower +five spice powder +gunboat +multiplex +Dutch uncle +louvered window +chimney corner +cuscus +psalmist +Vichy water +signer +amphiuma +harmonizer +authorizer +naiad +control rod +stentor +mountain bladder fern +gig +read-only memory chip +assenter +vixen +hermitage +corn dab +locksmith +cockspur thorn +variable-pitch propeller +western red-backed salamander +dolman sleeve +cultist +sweet buckeye +pine vole +Peking man +mountain swamp gum +nimblewill +bethel +aye-aye +lancelet +teff +Alpine celery pine +endive +nipa palm +center of curvature +seeder +Sabahan +sea scallop +social secretary +gorgonzola +western chokecherry +misanthrope +rabbitweed +beggarman +button fern +white mallee +doodia +mastiff bat +roper +prima donna +blanc +holding pen +fingerling +skyhook +flophouse +steam chest +crystallized ginger +acrocarp +horse pistol +true mahogany +costmary +ballistic galvanometer +jaunting car +bartonia +rep +mandibular notch +bubble and squeak +umpire +fringed loosestrife +bear oak +ski jump +staggerbush +plumcot +thermal reactor +field brome +bodkin +jackknife-fish +malope +writing arm +gold fern +Stayman Winesap +merlon +eclectic +fluxmeter +emeritus +imam +drum +pop tent +capital ship +subalpine larch +flail +Lorenzo dressing +tomboy +eastern woodrat +warrantee +Pacific spiny dogfish +sheepshead porgy +farthingale +Cryptoprocta +power loom +communicant +howdah +ectomorph +false foxglove +basset horn +odd-pinnate leaf +Wisconsin weeping willow +Queensland bottletree +dampener +corbel arch +silent butler +Circe +town clerk +Japanese chestnut +bloodwood tree +switcher +cup hook +spreader +rice rat +straightedge +traverser +fluid drive +Spanish paprika +sour milk +poison camas +bean dip +card table +vinegar fly +vizier +electric-discharge lamp +purple rock brake +dynamo +Japanese snowbell +Grindelia robusta +neuroglia +safflower seed +coronet +frown line +Renaissance man +Steller's sea cow +book scorpion +isosceles triangle +arthritic +spherical triangle +kangaroo mouse +garden orache +stemless hymenoxys +titi +out-basket +gent +columnea +mint sauce +mouthbreeder +Liebig condenser +cheerer +assegai +stickler +Merostomata +dimmer +grey poplar +common heath +scorzonera +glory hole +Blackfoot +oil slick +musketeer +apple geranium +daisyleaf grape fern +gas furnace +bijugate leaf +Arabist +star-thistle +hand throttle +huckleberry oak +lift pump +maulstick +Rome Beauty +Newburg sauce +pit +volunteer +Baldwin +ark +Asian horseshoe crab +black calla +marlinespike +Gentianopsid procera +guinea gold vine +tucker-bag +desk sergeant +piezometer +migrator +keelson +executrix +sackcloth +onion smut +buckboard +substitute +pudge +mess +cinchona +intervenor +gravimeter +pederast +censor +gastroenterologist +cutlassfish +launch +demerara +Diegueno +bog bilberry +aglet +soda fountain +crank call +harpoon gun +ribbon fern +Gurkha +output device +epilating wax +greasewood +water horehound +return key +fairy swallow +spatulate leaf +culverin +leptocephalus +kleptomaniac +barley water +bleeding tooth +Cheyenne +maleberry +limber +tapenade +whorled aster +toe +revenant +lap joint +vein +truant +florest's cineraria +morning dress +trichodesmium +nightshirt +element of a cylinder +shopaholic +section hand +electrodynamometer +Guadalupe cypress +rosebud +racist +avaram +keeled garlic +Alaska rein orchid +orange toast +cunner +dipstick +Neolentinus ponderosus +bulbil +charlotte +pull-through +header +Manduca quinquemaculata +persona grata +elegist +cafe royale +scup +semanticist +wood sage +field magnet +tundra +bay myrtle +alluvial flat +arrowleaf groundsel +celtuce +baryon +must +entrant +othonna +pied-a-terre +liza +sticky aster +grasshopper mouse +prison guard +tire iron +bomb rack +Spanish American +sheltered workshop +turfing daisy +backbone +tangle orchid +creeping willow +dumb bomb +horse cassia +barosaur +Yavapai +shrimp Newburg +peanut worm +dwarf chinkapin oak +corchorus +brick cheese +by-catch +stover +Urnula craterium +clasp +Kekchi +alpine coltsfoot +soybean future +altar wine +ripping chisel +encephalogram +mountain spleenwort +transferee +remoulade sauce +American rock brake +stenographer +read/write head +loblolly +ground +powdered mustard +brake band +sea dahlia +freak +proconsul +Coffey still +Sivapithecus +pellitory +palm cat +skew arch +American angelica tree +vigilante +candelilla +andryala +amarelle +swiftlet +petcock +associate professor +sclerite +open circuit +Virginia crownbeard +Last Supper +button tree +scyphozoan +margate +mercury cell +horsewhip +water scorpion +companionway +drop cloth +Amhara +miraculous food +pro-lifer +embryologist +Creole +bombazine +Indian blackwood +cubeb +trace detector +gros point +main-topsail +meringue kiss +spree killer +capstone +specimen bottle +woolly apple aphid +silverweed +American barberry +gallfly +European bog asphodel +northern flying squirrel +alliterator +Old Catholic +heliograph +Pteris cretica +tippler +pump well +allspice +balancer +scarlet bugler +lantern fly +white prairie aster +krummhorn +robin's plantain +Pacific sardine +patty-pan +decaffeinated coffee +western saxifrage +warrantee +colorimeter +ball bearing +makomako +foot +troika +apricot sauce +data multiplexer +rose-root +sound film +Northern dewberry +water hickory +swing door +spastic +Oligoporus leucospongia +botulinus +tamale pie +Sagittarius +muff +spicebush +petiolule +pump action +Parry's pinyon +split-pea +rudder blade +princess royal +wormseed mustard +honey guide +pip-squeak +fin keel +foretop +cyrilla +Navaho +melanocyte +deist +silver tree +citrus whitefly +Morrow's honeysuckle +green peach aphid +longanberry +call-board +wild yam +novelist +toothed spurge +alienee +pond apple +allspice +Carolina lupine +Jack of all trades +white false indigo +boiled dinner +princewood +sailor's-choice +false bracken +microbrewery +black grama +tutee +brickkiln +sea raven +guesser +wirework +European lemming +thyrse +plains lemon monarda +milo +shunt +spotted cowbane +anchovy sauce +grande dame +Maryland golden aster +Chinese puzzle +boarfish +burweed marsh elder +defense contractor +nitric bacteria +Belgian hare +beach plum +conformal projection +sand fly +steering linkage +quickset +Mahayanist +Geiger tube +loudmouth +Lancastrian +brownie mix +ex-spouse +deltoid leaf +Shasta salamander +rabbet joint +purple anise +garibaldi +gebang palm +bladderpod +Host +great bowerbird +string cheese +spinning jenny +drift net +matriarch +guar +bitter betch +panda car +mess +plains pocket mouse +scarlet wisteria tree +deerberry +reamer +homing torpedo +molehill +stockyard +reniform leaf +rag +symmetry +Texas star +lerot +pickle relish +three-seeded mercury +cotter pin +ice-cream bean +farmyard +bar magnet +hansom +prickle cell +renal cortex +pest +Ultrasuede +sailing master +brougham +wastrel +amboina pine +Canary Island hare's foot fern +ninepin ball +southwestern lip fern +usherette +lemon drop +star begonia +weeds +saltworks +Persian melon +corbina +medusa +bucksaw +Gibson girl +diameter +American twinflower +kino +clear liquid diet +angiocardiogram +wetter +oyster cracker +yellowfin mojarra +wild parsley +life tenant +broom closet +Corynebacterium diphtheriae +square shooter +bedwetter +ball-and-socket joint +nonsolid color +Salmonella typhimurium +buffel grass +hip pad +subaltern +heliothis moth +trail boss +hayloft +Francisella +primordial dwarf +cock-a-leekie +sugarplum +propulsion system +tyrolean +Carib +salai +ketembilla +ironclad +cornhusk +heckler +multistage rocket +north island edelweiss +Chaldean +twenty-two pistol +Francophobe +scofflaw +sickle feather +screw bean +sea squill +Scopolia carniolica +agglomerator +western holly fern +presenter +straight pin +Myxine glutinosa +Colbert +clover-leaf roll +war paint +bird's-eye bush +longfin mako +running suit +arrow wood +margrave +blue fleabane +dracontium +plastron +chimney swift +child prodigy +commissar +turtle soup +postulant +archaebacteria +snakefly +Pitot tube +chap +smilo +Malthusian +French roof +worm wheel +gulag +pointed-leaf maple +pull-off +Cathaya +American green toad +ball cartridge +infiltrator +snowfield +crotchet +auxiliary pump +bearnaise +galax +chaenactis +olympic salamander +sundowner +cows' milk +beach plum +moss-trooper +Arabidopsis thaliana +cat's-claw +bog rosemary +ribier +book agent +bumper jack +beefwood +monk's cloth +alpine bearberry +climbing fumitory +cucking stool +puka +Piltdown man +property man +discharge lamp +X chromosome +knobble +lobster Newburg +herbalist +sunray +golden saxifrage +leopard cat +muffle +stonewort +blancmange +intraocular lens +trepan +desert mariposa tulip +plume poppy +Dane +martynia +shaver +white milkweed +napu +tansy-leaved rocket +abortus +telemeter +tansy mustard +harpy +honeysuckle +ironworks +testacean +Tartuffe +silvervine +Sihasapa +surface gauge +western blind snake +paramyxovirus +Icelander +bird louse +stockbroker belt +test-tube baby +ague root +little golden zinnia +dietician +elephant's-foot +dirty bomb +sailing warship +brier +tinter +Connemara heath +potato fungus +bait casting +decagon +rosefish +die +high-pass filter +solitaire +widow's walk +goldthread +Tudor +trews +orange pekoe +ninon +soda jerk +sump +flying carpet +burial garment +oblanceolate leaf +press gallery +Shintoist +three-centered arch +spreading pogonia +Moro +foxtail orchid +Ghanian +dry kiln +thane +naranjilla +bitter pea +American bugbane +apron string +oyster fish +Port Jackson fig +prize winner +high-water mark +Oneida +smoking room +potato skin +charge d'affaires +gantlet +amyloid plaque +barmbrack +mate +arrow leaved aster +handbarrow +horned screamer +virago +linoleum knife +rattlesnake root +K ration +reset +foot brake +red coral +good guy +aberrant +lavalava +poleax +garden webworm +sneezer +mountain heath +American dog violet +eolith +chimneysweeper +matriarch +smalltooth sawfish +sea mouse +tubercle bacillus +superconducting supercollider +Abney level +darnel +gherkin +celery salt +Tungus +pulasan +oriflamme +death camp +redhorse +apprehender +scion +selectwoman +pentahedron +principal +old school tie +slice bar +chanar +pimento butter +wailer +zero +mescal +rosebud orchid +stone bramble +Jarvik heart +NOC +pitchman +rat cheese +strawberry tomato +dwarf golden chinkapin +landau +tocsin +ampulla +scratcher +crab Louis +ginseng +ripcord +polluter +tensiometer +eyewitness +aalii +Oregon crab apple +conservator +day jessamine +hexahedron +suture +tippet +linsey-woolsey +vernal witch hazel +stainer +egocentric +canistel +nudger +shipping agent +shortleaf pine +battle sight +cheese spread +weeder +incendiary bomb +honeyflower +stovepipe iron +stepper +hellgrammiate +votary +aflatoxin +arquebus +impulse turbine +pipewort +garrote +glow lamp +pigsticking +blood clam +surface search radar +Bolshevik +platen +chariot +Gentianopsis thermalis +water level +quandong +catalytic cracker +giant foxtail +nut butter +drainplug +holdover +coastguardsman +Secretary of Health and Human Services +Seeing Eye dog +American plaice +coquilles Saint-Jacques +christella +medium +clingfish +lally +light-o'-love +Gentianopsis detonsa +taper file +signal detection +trip wire +lignosae +receiver +sedan +mud puppy +corn sugar +Philippine mahogany +magnetic pole +jointed rush +trapper's tea +Dorking +welcome wagon +clammyweed +guard +false azalea +convalescent +babassu +dedicated file server +colossus +air search radar +marquess +straight flute +sand stargazer +sea catfish +rosilla +ripsaw +Bermuda onion +peach sauce +sagebrush mariposa tulip +yashmak +Virginia mallow +erose leaf +sand blackberry +boulevardier +forester +choragus +onion mildew +threadfin +winged pea +sugar daddy +rotary press +styracosaur +rathskeller +Japanese millet +anchorite +coral drops +false gavial +eastern pipistrel +cheese press +Chinese primrose +pamperer +real estate broker +power worker +breeder reactor +nutcracker +piano wire +cushaw +Sinanthropus +firebreak +kelp greenling +herba impia +toll call +yoke +bird fancier +evening-snow +fever tree +reed meadow grass +flanker back +toggle bolt +Santa Cruz cypress +carbonnade flamande +northern dune tansy +mikado +millettia +forty-five +court +icepick +holm oak +Japanese angelica tree +Pacific cod +cant hook +urologist +spelt +lekvar +enologist +Mediterranean flour moth +prickly-edged leaf +Spanish grunt +dune cycling +frostweed +whisperer +tucker +Roman wormwood +counterterrorist +woolly alder aphid +Nuttall oak +snail butter +threshing floor +motley +forge +water mold +mummichog +sulfur paintbrush +head +walking delegate +jujube +peachleaf willow +Christmas bells +valley pocket gopher +bear's-paw fern +Lanthanotus borneensis +pearl hominy +placeman +swage block +offerer +stargazer +jeweler's glass +male chauvinist +crossbar +Oktoberfest +tamarau +micronutrient +large-leaved aster +tasset +tepary bean +sausage curl +ivy +snob +roller towel +wood meadowgrass +archil +padrone +prairie rocket +tongueflower +kidney fern +Carolina buckthorn +sea island cotton +landscape architect +realist +oyabun +mother hen +ostracoderm +esker +heliophila +nympholept +shining clubmoss +press agent +clam dip +Djiboutian +white currant +codfish ball +hand cheese +kraal +trident +conventicle +bacteroid +Indian plantain +quandong +kola nut +signor +theater light +musk clover +canistel +silent partner +steel-wool pad +diggings +affluent +sightreader +John Doe +arrowworm +goatsfoot +guardroom +wild cinnamon +kaffir boom +ink eraser +yardie +industrialist +sea lily +polarimeter +Polistes annularis +western big-eared bat +omnivore +Ted +horsecloth +crab cocktail +vacuum chamber +flower-of-an-hour +bilge +poleax +neolith +Montezuma +plum-yew +welfare case +trave +pipe bomb +shading +Centigrade thermometer +bangalore torpedo +celery top pine +nuclear rocket +fowling piece +anti-Semite +landscape +derris +bush honeysuckle +Mediterranean water shrew +ticket collector +masked shrew +white dipladenia +Savoyard +bondman +tempter +pygmy cypress +pentathlete +thruster +usurper +Arminian +yerba buena +ice field +ichthyosaurus +sackcloth +bean tostada +Oxbridge +Pteropus hypomelanus +thinker +bank robber +ape-man +thurifer +knawel +mule fat +hot spot +hairy-legged vampire bat +night raven +hook and eye +crocodile bird +skunkweed +beaver rat +cypress sedge +florida selaginella +April fool +Jonah crab +glass wool +corkwood +dwarf elder +hinging post +gentile +Brazilian trumpeter +witch doctor +thermograph +pink shower +Mao jacket +capelin +parang +bradawl +stooper +jewel orchid +citrange +oarswoman +Macedonian +particolored buckeye +pachycephalosaur +satinwood +Chinese brown sauce +peep sight +straight man +quandong +chamois cress +nonfat dry milk +rosin bag +Leiden jar +Grimes' golden +spirillum +grass vetch +carillonneur +downy wood mint +melon ball +sweet calabash +chlamydospore +bombshell +sidewall +sprig +Indian button fern +globe pepper +rough-stemmed goldenrod +bocconia +bubble chamber +sand dab +plum-fruited yew +aecium +marrowfat pea +hobbyist +whipper-in +salad burnet +neckband +Tangier pea +sauce Louis +salad burnet +artist's loft +koumiss +Nazarene +cutter +scrim +drape +crab-eating dog +deckhand +bedroll +gaff +stifler +pink lady +great plains paintbrush +patternmaker +yoke +caryophyllaceous plant +angrecum +quadriplegic +grid +genlisea +aspic +water table +junket +signore +Mutillidae +proprioceptor +pivoting window +Indian poke +synchroscope +trichion +tarahumara frog +proctoscope +abomination +purslane speedwell +breast drill +Japanese barberry +mandrake root +breakable +salon +American watercress +take-up +entrenchment +cocktail sauce +Scotch asphodel +borough +matchmaker +Seneca snakeroot +pointsman +psephologist +clustered poppy mallow +onion thrips +nuclear-powered ship +organizer +deciduous holly +balsam willow +enzymologist +caraway +drip loop +dog laurel +Orangeman +sapsago +polymath +backplate +leathery grape fern +modillion +two-timer +handhold +consignee +white stringybark +nettle-leaved goosefoot +bookmaker +disk drive +doliolum +palmist +packinghouse +Spandau +Whipple's penstemon +sword grass +ribbon development +pearly-shelled mussel +winter heliotrope +rogue elephant +deck tennis +Venus's flower basket +football +shim +boatswain +blinks +armored catfish +hooded seal +outdoorswoman +water starwort +upholstery needle +pleurodont +silky anteater +cornmeal +lead-in +redfin pickerel +horse balm +Rydberg's penstemon +cascade transformer +fly poison +Volvaria bombycina +broad-leaved twayblade +pastry cart +body plethysmograph +waverer +hardware store +Parry's penstemon +European sanicle +strawberry geranium +cross-examiner +head gate +devil's tongue +hemiepiphyte +pine hyacinth +machmeter +spirit lamp +field judge +Rock Cornish +mayhaw +Sassenach +bog pimpernel +parallel interface +crowberry +roach +Aegyptopithecus +cajan pea +lapboard +cryostat +magnetic storage medium +white yam +Lombard +rhymer +bed and breakfast +bunya bunya +rifle grenade +caterer +collared pika +anti-submarine rocket +bookkeeper +Western mountain ash +profit taker +fruitlet +Knowlton's cactus +infernal +beefsteak begonia +lunula +emulsion +intermediate wheatgrass +titfer +European sea bream +bigeye scad +yak butter +kola +cone pepper +plesiosaur +ragwort +penal colony +black carpet beetle +lubber's hole +Stapelias asterias +yard marker +balloon bomb +Scythian lamb +armory +selsyn +marblewood +spirula +fatalist +hash head +armiger +Dom Pedro +white-chinned petrel +ballast +orthopter +greater water parsnip +clutch +largeleaf holly +Evangelist +king whiting +tuna fish salad +Muscadet +surpriser +jumping bristletail +proportional counter tube +Hamburg parsley +obstructionist +pus-forming bacteria +creep feed +stepbrother +janissary +control freak +trusty +trepan +King William pine +orthicon +geological horizon +molecular biologist +violator +pariah dog +Austrian +conciliator +Fauntleroy +packing needle +mazer +Saturday night special +leucocytozoan +coastal rein orchid +whirligig beetle +capitalist +breeches buoy +clubroot fungus +meadow spikemoss +Kichai +Spanish lime +land office +camera obscura +strafer +purple-stemmed aster +lusterware +valve +Roman nettle +isthmus +breadstuff +sealskin +maleo +bilge keel +carissa plum +fish fly +kolkhoznik +heath pea +cowage +hog sucker +Sam Browne belt +inductor +wild licorice +Socotra begonia +supernumerary +Angle +red shrubby penstemon +toilet kit +tawse +sweet bells +kawaka +brown soft scale +lyssavirus +betting shop +double-crosser +macrotus +climbing hempweed +poi +strip mall +deadhead +petit juror +tract housing +American mistletoe +lace-flower vine +precipitator +endoparasite +hairy wood mint +red snapper +Victorian +hog peanut +line of heart +opossum shrimp +plumcot +Bavarian blue +slops +light flyweight +oregano +sand myrtle +pocket battleship +curator +narc +hydraulic cement +plains pocket gopher +closed loop +pluralist +molter +Christmas bush +snuffers +slender knapweed +footwall +plage +caper tree +red siskin +tender +boat train +tipster +low-pass filter +student lamp +morosoph +japonica +bellows +herald +oyster plant +savory +mail +computational linguist +blade +winter crookneck squash +zoomastigote +blackmailer +richweed +dialectician +genip +plumed scorpionfish +jet bridge +thermopile +billy buttons +Brule +millwright +Arenaviridae +Jones' penstemon +monastic habit +genipap fruit +burnous +dairyman +top +crab-eating raccoon +quadrangular prism +pilot burner +weeder +trireme +boy wonder +man of letters +Catawba +high-muck-a-muck +light circuit +bloodworm +lappet caterpillar +half-and-half +office boy +saddle stitch +mistletoe cactus +false chamomile +Catalina cherry +workhouse +Jamaica quassia +britches +tooth shell +reduction gear +carrot pudding +balsam woolly aphid +handspike +aioli +silver hake +flour bin +wireman +gas-cooled reactor +aficionado +plus fours +gitano +gene chip +oilfish +ingenue +tulip orchid +late purple aster +pork and beans +envoy +lemon extract +milk bar +black huckleberry +ground roller +Connecticuter +siderocyte +Jacquard loom +chub +meat safe +stock cube +Australian sumac +purple sanicle +tailless tenrec +dog wrench +rainbow cactus +castor bean +scintillation counter +eohippus +pawnbroker +gauge boson +front man +early warning radar +bearing wall +Bourbon +sandwichman +sild +gravelweed +perishable +cembra nut +riflebird +quicksand +slate +sweeper +ship-towed long-range acoustic detection system +defamer +president +vitamin K3 +challis +tanekaha +bloodwort +grenadier +quietist +Zairese +fucker +foremother +gesneria +print buffer +salsilla +fissiped mammal +fender +consulate +acidophilus milk +Southern dewberry +snail darter +Panama redwood tree +dehydrated food +bush willow +coffee fungus +Sinologist +Mesoamerican +hood +large civet +deck-house +cyborg +smuggler +pepper sauce +cyberpunk +Grand Inquisitor +persona non grata +haggis +weeping tree broom +stop bath +modifier +coyol +conodont +yellow giant hyssop +optical pyrometer +Carolina moonseed +marinade +aspartame +false wintergreen +cityscape +philter +turnery +hemiplegic +chuck-will's-widow +vower +track star +myrtaceous tree +small civet +intelligence analyst +dogcart +yardman +cross bit +holometabola +platen +sweet cassava +Comstock mealybug +acute angle +Communist +alcohol thermometer +mountain hollyhock +Mead's milkweed +highjacker +Townes +congou +Astrophyton muricatum +lazybones +roughcast +pressure cabin +clinch +cinnamon +smoke bomb +quandong +tout +office-bearer +punctum +efficiency apartment +Queensland hemp +Ceylon bowstring hemp +newswoman +vermin +fetid bugbane +grantee +sanitary landfill +gluten-free diet +clabber +shillelagh +white lettuce +sweet coltsfoot +beggar's lice +samite +loser +flasher +water star grass +banana passion fruit +translator +artificial kidney +Virginia creeper +American crab apple +cactus mouse +nebbish +Ligustrum obtusifolium +vox angelica +stringer +hunter +know-it-all +scene painter +invalidator +jungle cock +basilica +coriander +California single-leaf pinyon +miles gloriosus +pina cloth +law agent +scarlet fritillary +keurboom +bailor +ramjet +seedling +rib joint pliers +ways +picket ship +Surgeon General +wasabi +marquis +clostridium perfringens +Helvella sulcata +furnace lining +kingwood +painted sandgrouse +plain wanderer +Indian madder +silver screen +bailey +dwarf spurge +Serbian +ball-buster +shaheed +Platte River penstemon +tensiometer +mute +nymphomaniac +Yokuts +arroyo willow +whipping post +class act +load +winged everlasting +periodontist +diarist +robber frog +diestock +curry powder +ratchet wheel +store detective +hog plum +prune whip +shortwave diathermy machine +Anabaptist +post chaise +Kennan +bean caper +delegate +orderly sergeant +celtuce +jumping bean +gowen cypress +puddingwife +registered nurse +West Saxon +rosita +gun room +nasotracheal tube +matchboard +flagship +Boswellia carteri +Canadian pondweed +wonder boy +sewer rat +dimetrodon +pantograph +marsh bellflower +angoumois moth +slippery dick +woolly indris +creme de cacao +dulciana +Jewess +Macadamia integrifolia +least shrew +don +diffuser +black-stem spleenwort +grouseberry +goniometer +annotator +sticktight +gossip columnist +speechwriter +capon +rock hind +Liederkranz +chandler +echocardiograph +sidelight +fisher +brocket +New Zealand daisybush +northern sea robin +roller bandage +peachick +pellet +pichi +plug fuse +spark coil +buckwheat +brood bitch +wedgie +dwarf bilberry +filigree +bull +queen +dodo +Salish +denticulate leaf +Western silvery aster +Prima +magnetic bottle +fetterbush +process-server +nainsook +mythologist +Piedmont glacier +hammerhead +niggard +Mound Builder +Kui +Nootka +highbinder +passenger pigeon +oblong +tickler coil +agnostic +succorer +esophagogastric junction +dressmaker's model +bombshell +social anthropologist +gildhall +orpine +pterodactyl +bristly sarsaparilla +Lane's Prince Albert +hognose bat +salesgirl +lubricating system +electric catfish +wrap +Jacksonian +chard +cherry laurel +foreground +beadsman +Kolam +amniote +frozen pudding +acid head +poor box +depositor +coattail +pallas's sandgrouse +mason's level +English lady crab +skeg +cruel plant +petrolatum gauze +tuna +swivel +stock-in-trade +perisperm +civies +Phyllostomus hastatus +alienor +Verdicchio +guard's van +onion butter +moviegoer +planter +citrange +box huckleberry +iconoscope +familiar +helmsman +baby boomer +constructivist +American bog asphodel +whorled caraway +simple pendulum +viviparous eelpout +Job's tears +holdout +sour salt +poison bush +dusky-footed woodrat +golden algae +granadilla tree +telethermometer +crossbar +thrift +African bowstring hemp +dog in the manger +hayrack +gold-crowned kinglet +prolonge +doge +pencil +discount house +mulligan stew +Nonconformist +virologist +gregarine +facula +rocket scientist +thin-shelled mussel +oospore +annual salt-marsh aster +Afrikaner +metallic +julienne +culverin +cleavers +Berliner +mudhif +thorny skate +brown lemming +yellow colicroot +cooling system +large-leaved magnolia +free-reed +canyonside +preemptor +stake +Brucella +anti-G suit +pleximeter +squire +salsilla +write-in candidate +lowland burrowing treefrog +flare star +dwarf hulsea +jobber +mangel-wurzel +quagga +red-skinned onion +positive pole +Pteropus capestratus +jug wine +stomacher +standee +bladder worm +hakim +house of correction +pelisse +golden mole +temporizer +rose apple +drove +umbrellawort +holy of holies +lawyer cane +smooth lip fern +anode +astatic coils +zip gun +feverroot +self-heal +expansion bit +salt reed grass +field pussytoes +nutmeg hickory +cryptic coloration +Venus's girdle +Hunkpapa +Calostoma cinnabarina +raft foundation +May apple +pygmy mouse +prokaryote +yellow-green algae +Bermuda maidenhair +withdrawer +coelacanth +Elliott's goldenrod +driftfish +epicyclic train +bowl +swamp dewberry +corbel step +sadist +party line +anti-American +mining engineer +Amur privet +conidium +Gastrocybe lateritia +lithia water +chaulmoogra +Rough Rider +Guinea pepper +glade mallow +pitcher sage +whitecup +shanghaier +low St Andrew's cross +phonologist +cocobolo +perfumery +visor +prison chaplain +belt +ingesta +literary critic +industrial watercourse +reckoner +pursuer +Kinetoscope +Kuiper belt +hyperope +raw recruit +Galiella rufa +Prince Albert yew +slit trench +usher +tenderfoot +white-rayed mule's ears +browser +piccalilli +bran +giant buttercup +water lobelia +arborescent plant +echinus +dryland blueberry +struggler +platyctenean +Geordie +domatium +twenty-two rifle +keteleeria +sports editor +chorus girl +Hakham +dry-bulb thermometer +onomancer +double-bitted ax +Girondist +bottle bank +thyrsopteris +bandwagon +star anise +armored car +dhawa +Bessemer converter +mutineer +paradise tree +tupik +centurion +mending +chowchow +margrave +International Grandmaster +African hemp +catafalque +leptodactylid frog +forcemeat +tank shell +pill +barbecue pit +worthy +lady's maid +evergreen +Jesuit +South American staghorn +rigger +suffragan +imperialist +spherical angle +grey lemming +kitchen police +tree swift +coliphage +archaist +Conservative +rib +exegete +Mendelian +tragedian +steerage +Paleo-American +obeche +garlic +grapefruit peel +accommodating lens implant +half blood +barrelfish +catgut +lanceolate spleenwort +hardliner +frieze +name dropper +carrack +huckster +onion bread +magnetic head +pease pudding +raisin moth +negative magnetic pole +electroencephalograph +bunji-bunji +synchroflash +Mornay sauce +stencil +winged pigweed +Nesselrode +MEDLINE +licorice +mainspring +melilotus +duke +experimenter +Napier's bones +four-minute man +pin-tailed sandgrouse +toolmaker +pogge +rootstock +baton +pricket +creeping snowberry +anomalops +nester +devourer +apolemia +Maricopa +pine-barren sandwort +larvacean +American dewberry +escalope de veau Orloff +gig +myrtle +pitsaw +Lutheran +fish house punch +gnathostome +intake valve +molasses taffy +clammy locust +vandyke beard +Atlantic tripletail +planktonic algae +estradiol patch +flummery +cytologist +sectarian +oil meal +tomtate +mediterranean anchovy +aspersorium +argonaut +porkholt +sheep ked +algometer +Adventist +false goatsbeard +snake polypody +streetwalker +shelver +adoptee +highflier +pitch apple +prairie rocket +fish mousse +viroid +deckle +manila tamarind +observer's meridian +pincurl clip +hardstem bulrush +gossamer +brookweed +Druze +hug-me-tight +accessory before the fact +oilman +Comanche +Marine +bedlamite +Chinese cork oak +squawbush +false miterwort +walk-on +Cynopterus sphinx +brandyball +landlubber +arrowroot +cape forget-me-not +galoot +tabor pipe +checker +Levant cotton +paddle box +murderess +smirker +fuddy-duddy +withdrawer +newel +shade +pink disease fungus +tipu +sweet sultan +aeronautical engineer +tall gallberry holly +acarid +conqueror +cucumber +film director +ordinary +salon +closet queen +allegorizer +tonka bean +flax rust +negative pole +dagame +dentist's drill +mock privet +micropyle +contributor +dark horse +climbing corydalis +cosmotron +land agent +Big Blue +Cynic +tassel flower +lyrate leaf +Minuteman +Dutch-elm beetle +Hessian fly +flower girl +West-sider +window dresser +skinny-dipper +whitebait +out-and-outer +hooker +amicus curiae +jack +camwood +stockist +black root rot fungus +Jamaica dogwood +diaphragm +Holocentrus ascensionis +roselle +black maire +Pygmy +fumigator +lame duck +mudder +hydraulic transmission +conning tower +phoronid +batfish +hearing dog +monohybrid +whaling gun +Cockcroft and Walton accelerator +allemande +seasoner +epileptic +ammonia clock +Young Turk +lanseh tree +urceole +cafe noir +poster girl +Oglala +deadeye +manna lichen +positive pole +cinch +lyricist +hermaphrodite +kidney stone +dilator +number one +frotteur +kaffir bread +fish knife +tarragon +adjuster +potato wart fungus +Florida pompano +conductor +corbie gable +rounders +Catha edulis +bender +recruit +Uruguayan +subject +bunghole +day boarder +pocketed bat +Oxonian +owner-occupier +yellow-leaf sickle pine +devisor +exhibitor +looking glass +shipowner +crooked-stemmed aster +calico +dash-pot +defilade +Confucian +egg-and-dart +irreligionist +lepton +self-rising flour +diving bell +Brahui +shop girl +maximum and minimum thermometer +Dalmatian laburnum +correspondent +subduer +nonperson +Reaumur thermometer +rough-leaved aster +jacksmelt +pinfold +magneto +ex-wife +round-leaved rein orchid +purloo +American shrew mole +sweet sand verbena +polymastigote +outfitter +curled leaf pondweed +Italian dressing +borderer +ambusher +geebung +four-stroke engine +small ship +homeopath +gynostegium +political prisoner +Radiigera fuscogleba +ensiform leaf +rhizoctinia +satyr orchid +rue +bouillon cube +flip +prophyll +tilefish +periselene +prima donna +choker +laminar flow clean room +Hooker's orchid +fish joint +mombin +remover +array +coelostat +autophyte +consigner +Damaraland mole rat +gasman +public works +lye hominy +pearlfish +piassava palm +Georgian +uxoricide +confessor +community center +epigone +tagger +abrading stone +cryoscope +nautch girl +reliever +Cartesian +Indian beech +protoplasmic astrocyte +fundamentalist +mustard sauce +crank +houselights +five-point bishop's cap +comedienne +triangle +presentist +beaugregory +dreamer +Wave +blue mockingbird +Barbados gooseberry +ten-spined stickleback +papoose +silky pocket mouse +holdup man +agent-in-place +suspensory +emigrant +ropemaker +bookbinder +jumby bead +undershrub +Killarney fern +sheep bell +city slicker +equerry +pea crab +down-and-out +blackmouth bass +shirtmaker +lister +UNIX guru +snipefish +gimbal +maisonette +haircloth +Ranvier's nodes +pigmy talinum +tribute album +msasa +hydroxide ion +madame +four-pounder +prophet +sloganeer +field-effect transistor +nude mouse +canteen +Calostoma lutescens +buteonine +sunlamp +Uruguay potato +Spanish tamarind +Prince-of-Wales'-heath +kishke +caprifig +chincapin +hegari +alarmist +bathtub gin +astatic galvanometer +Calostoma ravenelii +marang +tussah +coin box +bugleweed +hacker +frontal eminence +timekeeper +shunt +bicycle clip +mustang mint +caesium clock +hospice +glenoid fossa +archpriest +ex-gambler +incrustation +salvager +Donatist +violator +lamb succory +hygroscope +oilbird +sharptail mola +showplace +corn syrup +flashlight fish +pulse timing circuit +anchovy paste +fascista +chigoe +divan +Druid +squad room +Huntingdon elm +buffalo carpet beetle +carper +corn lily +goats' milk +assault gun +cockpit +Lochaber ax +Visigoth +occupier +Basotho +criminologist +spindle +Rosicrucian +Cornishwoman +musk kangaroo +artificial skin +pandurate leaf +Parkia javanica +roundhead +tea-like drink +basidiolichen +unguiculate +stepmother +Nauruan +gutta-percha tree +bloodberry +scarlet haw +marupa +censor +algebraist +pelvimeter +whaler +cowhide +paparazzo +biochip +internationalist +Yukon white birch +hangar queen +chlamydia +puttee +Pipturus albidus +pearly razorfish +sea moss +burglar +hoary golden bush +colter +drey +bushman's poison +maxillaria +gnetum +deadeye +shittah +swamp oak +damper block +deepwater squirrelfish +truffle +cangue +paleolith +lawyerbush +sorehead +Texas snowbell +Tremella reticulata +quarter +keelboat +dimity +whiner +Wagnerian +myrmecophyte +frontierswoman +pyrometric cone +big-tree plum +puppy +galbulus +hod +winceyette +carriage wrench +dictostylium +farmland +infanticide +Jacob's rod +threadfish +monocline +inamorato +leaf miner +purple cress +passer +black-fronted bush shrike +silverrod +bootmaker +segregate +captive +Edmontonia +spherometer +television transmitter +bladder +Saratoga spittlebug +dynamometer +lodge +smooth darling pea +Cossack +wake-up call +Olmec +sutler +molasses kiss +corner post +rattlesnake weed +yardmaster +adder +rhinoscope +referral +ulster +pantaloon +counterspy +gadgeteer +heart cherry +hospital chaplain +Clydesdale terrier +plank-bed +Russian thistle +actinometer +dyspeptic +common wolffia +firewall +seidel +potato moth +soapweed +seif dune +thill +cosmographer +absolver +halberdier +fire control system +kai apple +bastard pennyroyal +Big Brother +broadcast journalist +Albatrellus dispansus +citrophilous mealybug +split end +nickel-iron battery +Newtonian +gas maser +thumbstall +anaspid +dusky-footed wood rat +latitudinarian +flatbrod +schizocarp +niqaabi +flight surgeon +gyrocompass +Polyporus tenuiculus +Utopian +mailboat +spellbinder +undercoat +cassareep +typical jerboa +photocathode +katharometer +bight +fur-piece +penetration bomb +malik +Siberian millet +nanomia +Wykehamist +tosser +gyrostabilizer +microwave diathermy machine +crystal set +wall +legatee +alfalfa +angwantibo +charioteer +piano maker +African mahogany +Morlett's crocodile +taro +parallel circuit +cush-cush +etymologist +matriculate +neem seed +cornerback +kingfisher daisy +redoubt +blastomycete +peplos +costumier +publican +tobogganist +semolina +myrmidon +parricide +gymslip +whoremaster +cryptocoryne +header +platitudinarian +barleycorn +spiral bandage +reciter +abecedarian +dance +wrymouth +bilberry +Liopelma hamiltoni +streamliner +Fordhooks +fixed phagocyte +radiobiologist +neurologist +Selkup +dollarfish +cascade everlasting +acrodont +boarhound +midstream +theatrical producer +abhorrer +goldsmith +photometrist +Anglo-Saxon +rugel's plantain +sable +workmate +ferule +ankus +earleaved umbrella tree +Passamaquody +timucu +Mexican pocket mouse +yerba santa +Rochon prism +apomict +monocarp +sweet unicorn plant +common winterberry holly +archivist +drypis +paretic +fly-by-night +white-berry yew +Schoolman +blue cheese dressing +vintager +squatter +Euphausia pacifica +corrugated fastener +yellow henbane +Croesus +almoner +analphabet +acoustic delay line +sheep frog +workhouse +horseleech +venturer +pond-scum parasite +Pyrenees daisy +plagiarist +Truncocolumella citrina +rerebrace +group captain +caddis fly +hot-rock penstemon +kanzu +stylopodium +slopseller +rauli beech +starter +ootid +statesman +distributor cam +ascot +falcon-gentle +Duplicidentata +spotted antbird +heliometer +false buckthorn +Allegheny spurge +Cavalier +dart +photocoagulator +master-at-arms +kei apple +baldachin +crapshooter +gametangium +white hope +chipotle +spike heath +Scotch woodcock +Florentine +differential analyzer +Mitrula elegans +wet cell +basil balm +Circassian +corn cake +bouncing betty +vice-regent +lagerphone +ketembilla +whoremaster +fork +tetrasporangium +trifler +pill head +life-support system +quartermaster general +tobacco thrips +officeholder +teredo +toyon +Sundacarpus amara +Phytophthora citrophthora +naif +lobbyist +alligator wrench +bully +heavy +toxicologist +radio chassis +waterdog +drive line +kaffir cat +foster-brother +breakax +curette +traditionalist +pipe vise +striped button quail +gawker +homeotherm +schoolyard +battue +kalansuwa +deviationist +Bolshevik +transponder +pungapung +iron +Eyeish +roccella +manglietia +Tory +print seller +Texas Ranger +otter shrew +seconder +shellflower +outlier +party man +wold +hayfork +oncologist +framer +co-beneficiary +ocean pout +Chinese angelica +scrimshaw +air attache +false gromwell +standing press +fringepod +specifier +automatic choke +durum +yenta +wassailer +reeler +signora +beach pancake +common booklouse +pellicle +backroom boy +den mother +associate +Unitarian +gambist +brookweed +clubroom +cat's-tail +playboy +self-registering thermometer +doorstop +bennet +yak's milk +escapee +quail bush +sparge pipe +coast boykinia +screw key +half gainer +aggravator +cotton mill +tailor's chalk +free agent +cotton mouse +deadhead +bunny +turpentine camphor weed +amaranth +ceratodus +red lauan +beam-ends +thermograph +wally +Toda +handrest +commissary +oak-leaved goosefoot +manufacturer +voicer +Jafnea semitosta +bench hook +finder +abyssal zone +rabbitwood +Hercules'-club +epicarp +declinometer +camp follower +signaler +Australian pea +putz +qadi +banded palm civet +egg timer +regnellidium +calisaya +harvestfish +sound spectrograph +side-wheeler +glomerule +woolly rhinoceros +Black Muslim +horticulturist +ornithomimid +cryometer +battlefront +gametophyte +airmailer +cuisse +nakedwood +baseball club +slasher +anise +leatherleaf +leatherjacket +horned pondweed +gofer +Saigon cinnamon +barong +blazer +twinkler +skeleton shrimp +dial +floorwalker +case shot +flannelbush +cultivated parsnip +Jane Doe +few-flowered leek +nogging +placer miner +muzzler +serge +lion-hunter +capulin +Wandering Jew +ascidian tadpole +hispid pocket mouse +southern spatterdock +milk wagon +junior middleweight +duck sauce +promycelium +protozoologist +cascade liquefier +tout +longheaded thimbleweed +charcoal burner +footage +slop +bridge agent +miller's-thumb +Job's comforter +marocain +tanker plane +lancetfish +knocker +toque +ordinand +umbrella bird +favorite son +hare's-foot bristle fern +business traveler +plotter +Asiatic shrew mole +tallyman +stump +Paleacrita vernata +index register +mortgagee +accuser +codger +sand rat +seaside centaury +chiropractor +Florida smoothhound +dwarf sperm whale +T-man +sannup +dragonhead +numdah +alkali grass +gynobase +kymograph +ascolichen +steward +waterline +Nazarene +filer +lapidary +muncher +wincey +scyphus +question master +besieger +worldling +docent +facing +atmometer +quern +puerpera +three-decker +calliope +wild red oat +bailee +flame pea +cattle cake +theist +yellowtail flounder +cosmopolitan +rocket engineer +vouchee +Turkoman +hard sauce +Thousand Island dressing +assayer +messmate +mutilator +oyster bar +flame tokay +countess +prairie mimosa +microsporangium +cotter +townsman +paring +fundraiser +simperer +Comrade +orlop deck +power takeoff +cattleship +prime meridian +Javanthropus +scriptorium +curandera +long-clawed prawn +maestro +paster +potato tuberworm +chachka +junkyard +cape yellowwood +reentrant polygon +Liberian coffee +restaurateur +Alsophila pometaria +Jekyll and Hyde +electrophorus +Scomberomorus maculatus +manipulator +gromwell +chicken provencale +ashram +mangel-wurzel +shamrock pea +dossal +adducer +erection +Mysore thorn +smoothie +chufa +brace wrench +victualer +litterer +linstock +Protium guianense +palfrey +banyan +klieg light +dangleberry +trooper +yaupon holly +quitter +tradescant's aster +nullipara +melter +devil's urn +ghostwriter +mouth +analogist +Creek +sonic depth finder +fucker +locus of infection +mortician +esophageal smear +locum tenens +conic projection +aroeira blanca +bellarmine +night porter +automobile mechanic +codpiece +Munro +cottonweed +scoinson arch +tinderbox +frozen food +waterproofing +Egyptian henbane +lash +transactor +American smooth dogfish +existentialist +grabber +Sonoran lyre snake +Rufous rubber cup +colors +weekend warrior +power user +perennial salt marsh aster +Puritan +Apalachicola rosemary +anecdotist +tosser +moth bean +agnostic +stretcher-bearer +browntail +optimist +brewer's mole +astronomy satellite +flat file +rust mite +tuberous plant +day laborer +buster +trapezoid +bevatron +nonresident +Streptomyces griseus +mangosteen +customer agent +hero worshiper +suicide bomber +procellariiform seabird +archiannelid +reaction turbine +distortionist +bulldog wrench +grainy club +scalp +Aztec +scow +globigerina +pedant +heartleaf manzanita +kanchil +low gallberry holly +containment +scandalmonger +rose-colored starling +Powhatan +addle-head +Chilean rimu +Atlantic sea bream +arthrospore +ramrod +root climber +Kalapooia +roach clip +Schreiber's aster +horseradish +albino +Kshatriya +trombidiid +blasting cap +body pad +brachium +shallu +Wynnea americana +slender centaury +munj +upset +wind tunnel +cottonwick +airing cupboard +pepper shrub +ambrosia +languisher +chosen +rose globe lily +purple apricot +costia +sloop of war +sultana +frontlet +booster +sargassum fish +broad-leaved montia +rifleman bird +stillroom +amoralist +enginery +meter maid +fitment +southern bog lemming +Athenian +clincher +cusk-eel +mackintosh +diaphone +corozo +Australian reed grass +czar +spongioblast +Eurafrican +airhead +Shahaptian +Roman +pollinium +tourist class +halogeton +stamper +emperor +malingerer +tramp steamer +Peziza domicilina +pilot cloth +stenopterygius +cost accountant +Queen's Counsel +wine-maker's yeast +poppet +cage +rowlock arch +landgrave +bearded wheatgrass +stink bell +quaker +undesirable +algarroba +resistance pyrometer +exorcist +carib wood +guvnor +border patrolman +bathhouse +licenser +headman +rentier +pine spittlebug +nut-leaved screw tree +paraduodenal smear +apron +necker +smilax +Alpine besseya +creeper +castle +ground bait +Queensland grass-cloth plant +sclerotium +great yellowcress +fat farm +Stoker +hoop snake +elixir of life +Trotskyite +home buyer +wheat berry +Tutelo +semi-climber +utahraptor +wet-bulb thermometer +packrat +hygrophyte +darter +sketcher +refiner +camlet +midgrass +compound +tarwood +Colorado River hemp +toiler +abstractor +override +dwarf pipefish +plodder +briefcase computer +trunk hose +brown butter +valve-in-head engine +cymbalist +explosive detection system +horsewoman +boutonniere +chinchilla +venerator +scourer +exarch +cohune nut +ayapana +continental divide +cosigner +stalker +pyxie +Genet +Macowanites americanus +open-hearth furnace +water chestnut +American frogbit +tarwood +cutter +scout +burr +upsetter +grist +tagasaste +mouthpiece +palette +rattan +letterman +Exmoor +Methodist +eelblenny +marasca +slide valve +ventilation +saddle hackle +Yakut +flux applicator +air traveler +murder suspect +Cynocephalus variegatus +idolizer +Surgeon General +nutlet +little-head snakeweed +germ tube +fellow traveler +raceabout +commodore +czar +anamorphosis +treelet +girlfriend +groundnut +sideline +giant star grass +goffer +spark lever +oubliette +processor +tare +plodder +extremist +Kipp's apparatus +gripsack +S wrench +viscountess +bridgehead +cascarilla +Asiatic flying squirrel +protoceratops +equerry +difflugia +princeling +moonlighter +aspergill +common flat pea +Utahan +imperial mammoth +plantain-leaved pussytoes +Boott's goldenrod +bootlegger +reed pipe +runcinate leaf +onion salt +nitrite bacterium +introvert +duck +New World opah +goliath frog +heterostracan +disrupting explosive +haggler +candlenut +false bugbane +returning officer +eudiometer +ship-breaker +metazoan +mandarin +patka +gill net +cavity wall +armilla +rainmaker +dealfish +orderly +gleaner +muffin man +house sitter +alto +sand devil's claw +vulcanizer +appendicularia +boron chamber +chess +bitok +anchovy butter +dropout +flour mill +bishop +escapist +scapegrace +stanhope +smooth winterberry holly +upstager +stalking-horse +pony +prairie gourd +parabolic mirror +Polaroid +slasher +lap +garlic butter +sendee +German millet +hairy honeysuckle +Swiss canton +Scleroderma flavidium +red goatfish +telegraph plant +Jungian +garment cutter +mallee hen +stranger +driveway +schooner +Paiute +cisco +trestlework +sipper +shanny +romanticist +Molly Miller +mountain rimu +odd-leg caliper +bitumastic +Western Australia coral pea +labor coach +latchkey +harpulla +solitary pussytoes +chop-suey greens +coil +guimpe +diapir +Osage +gutta-percha tree +giant eland +reticulation +garden huckleberry +quick study +Hudson bay collared lemming +coreligionist +Lancastrian +stumblebum +omnirange +seersucker +Potemkin village +Rhea Silvia +symphonist +bolti +jaw +jaconet +page +visiting fireman +haulm +p-n junction +landlubber +yellow jack +triclinium +souari +invader +fire walker +Luddite +Plott hound +hemming-stitch +winker +star-duckweed +craniometer +Arabidopsis lyrata +loser +cypripedia +trimmer arch +cookhouse +pink fivecorner +transfer +ringleader +northern pocket gopher +moke +blockade-runner +cyclostome +web-spinning mite +Whig +transcriber +malahini +sawyer +patent log +paca +tragedian +thermojunction +soffit +black buffalo +foreigner +applecart +brit +pole horse +white mullet +argentinosaur +Homo soloensis +bounty hunter +decumary +hand +paperboy +Smitane +windowpane +Java man +Wynnea sparassoides +prune +middy +lilliputian +sorb +pyrostat +guest worker +hold +leaseholder +vegan +humanist +salinometer +piton +zygospore +means +night rider +tetraspore +archipelago +radiomicrometer +nitpicker +spot weld +slicer +girlfriend +round-tailed muskrat +cock's eggs +Shavian +bay +nuclear chemist +planetarium +hiccup nut +Marylander +milling +microsporidian +brown cup +Strophanthus kombe +little skate +emancipator +paperhanger +archaeopteryx +maigre +Mastotermes electrodominicus +procurer +seizure-alert dog +homeboy +cotton strain +mute +siren +spearnose bat +phenacomys +gayal +arsenal +pitchfork +Port Jackson heath +cud +magnetic core memory +interferometer +water jacket +account executive +hodoscope +window oyster +sudatorium +syncopator +loment +hypertensive +smoothbark +Geogia holly +nailhead +African holly +musette +chafeweed +microflora +derrick +strawworm +shogun +queen post +jerboa kangaroo +columbo +royal +sourball +solenogaster +cardsharp +Homo habilis +intaglio +calf's-foot jelly +flotsam +skirret +baronduki +chyme +shovel hat +Welsh +monoplane flying fish +groundfish +tablet-armed chair +swan dive +Indian club +colonial +cassiri +pyramidal tent +praya +silk vine +time clock +button snakeroot +clews +Korean lespedeza +diffuser +ripping bar +puttyroot +nipple shield +headpin +juneberry holly +hub-and-spoke +laver +weldment +plain flour +hoosegow +dudeen +grey skate +line of life +mung +arariba +Newtown Wonder +rock candy +side chapel +castor sugar +narrow-leaved white-topped aster +babassu nut +puka +rings +catchall +heat shield +caroche +oxbow +Australian coral snake +tapper +sporangiophore +fenugreek +spruce gall aphid +gouache +cutoff +private line +pod +cargo hatch +nailhead +penile implant +geophyte +small-leaved linden +deepwater pipefish +paperhanger +hairy spurge +Persian lamb +subtropics +feed grain +clarence +nonparticipant +scorpioid cyme +hand brake +tiller +Geglossaceae +albacore +monochrome +goa bean +bur +tongue worm +psittacosaur +frog's lettuce +pectoral +terreplein +light filter +fishpaste +dry point +grison +feterita +dolichocephalic +oenomel +stretcher +swag +cheval-de-frise +mountain beaver +scammony +discus +leatherleaf saxifrage +wharf rat +Dominique +pelycosaur +depth gauge +bishop +archespore +true anomaly +silver jenny +mercy seat +kelp +oviraptorid +acrylic +Chinese pea tree +meat house +bilge well +Temperate Zone +whale louse +balbriggan +briefcase bomb +pump-type pliers +oil +sour gourd +Jewbush +lunette +Chinese paddlefish +pyxidium +beechnut +calabar bean +grugru nut +gib +blunt file +cataphyll +megasporangium +blockbuster +sliding seat +hogchoker +calceus +Connarus guianensis +honest woman +survivor +second balcony +tempera +Calvary clover +murine +outwork +bogy +elephant's-foot +conning tower +set square +blackfly +stirk +Streptomyces erythreus +blade +goldfield +snowball +mortal enemy +waltzer +shoal +galley +hitchhiker +lithophyte +brisling +scauper +esophagoscope +grab +subtracter +philosopher +duplex apartment +southeastern pocket gopher +bonduc nut +reverberatory furnace +grader +lamp house +northern bog lemming +brotula +ornithopod +ptyalith +obturator +perpetual motion machine +range pole +Africander +curvet +daisy print wheel +floor +collector +mutant +tuck +fore-and-after +senega +buckler mustard +louvar +Tarsius glis +culdoscope +Spanish fly +steering gear +hatchet man +museum +saw set +cambric tea +comber +thermohydrometer +stationer +chalcis fly +bryanthus +whipstitch +harvest mite +rock gunnel +time bomb +rariora +pigfish +apetalous flower +head shop +horned whiff +sandpit +tachistoscope +sundries +taffrail +caller +monofocal lens implant +Dover's powder +souari nut +crowbait +render +Shakespearian +hagberry +megatherian +magus +hatchel +mangabey +garroter +piedmont +cope +barrio +psychodid +rigout +distributor +croupier's rake +sarcenet +narrow-leaved water plantain +treenail +biped +lanternfish +overdrive +barndoor skate +picket boat +amber lily +sawpit +sand lance +bucket shop +common beech +laundry truck +surtout +grogram +tampion +escape hatch +interstice +shop bell +snake mackerel +nakedwood +tumbrel +mericarp +mountain paca +cab +big board +cringle +eusporangium +shipping room +coal chute +dumbwaiter +Smiledon californicus +man-at-arms +cartridge +deinonychus +pigeon pea +screw bean +spectacle +floorboard +cutting room +low-warp-loom +proconsul +sabicu +genipap +clapper +aquifer +archaeornis +belly flop +Protium heptaphyllum +interrupter +high-warp loom +knight +wiper +impression +poker +Pithecanthropus +sable +guardroom +tenter +wellhead +raja +strickle +sodomite +mountebank +sand leek +Barbados gooseberry +shuffler +sensory fiber +crab-eating opossum +etching +rare bird +scup +fagot +negro vine +hutment +droshky +nephoscope +lady chapel +cutty stool +release +vestiture +buff +standard +Tabernacle +vascular ray +snakewood +chlorobenzylidenemalononitrile +limnologist +pouched mole +microwave linear accelerator +Mastotermes darwiniensis +wind tee +orange bat +open sight +carpospore +rampant arch +sabbatia +cursor +post exchange +bellpull +center +cyclostyle +canonist +pygmy sperm whale +moa +king +pass-through +angioscope +marrow +hookup +revetment +acanthocephalan +good Samaritan +apatosaur +web spinner +dixie +ommastrephes +crossbench +candlewick +jack +light arm +caisson +kaki +quandong nut +Meuniere butter +coquilla nut +mast +black +twitterer +bluethroat pikeblenny +shielding +water-shield +urolith +elephant bird +clearway +dark lantern +schizopetalon +press +Nazi +sugarberry +Maltese +stevedore +hair shirt +party wall +gainer +blackheart +nothosaur +cavetto +evergreen bittersweet +chemical bomb +calpac +shingle +turnpike +animator +heaver +isoclinic line +death knell +liner +anathema +aerie +razorback +Ichyostega +pound net +French dressing +mottle +yard +string tie +bell seat +brattice +battering ram +sierra +pompon +vertex +stomach pump +electrolytic cell +escolar +telpher +roadhouse +cerecloth +tartare sauce +letter case +whale sucker +hob +teg +canvas +strickle +hectograph +Cartagena bark +mail car +acinus +freedom rider +bread sauce +picture window +Rhizopogon idahoensis +pinprick +mass spectrograph +ringer +devil's cigar +salad cream +marlberry +airbrake +Clark cell +yellow-throated marten +wire gauge +dinoceras +aba +harpoon log +plate rail +mustard plaster +coelophysis +journal box +puce +ballcock +quartering +izar +clinid +whirler +turnspit +deathbed +pottle +shot +doubler +Coryphaena equisetis +English sole +chicken feed +borrow pit +mylodontid +Chilean nut +Kundt's tube +ling +asthenosphere +reseau +death seat +immovable bandage +peppermint patty +lecturer +electron multiplier +bear claw +hyacinth +beaked salmon +toehold +scull +snowball +gangsaw +fiber +oxeye +lashing +Beckman thermometer +fence +cantilever +dinner theater +Reynard +jag +umbrella plant +camera lucida +beaver +slug +yellowfin croaker +Sibley tent +rat-tail file +anchovy pear +soldier +cackler +chaise +Pitot-static tube +minniebush +Episcopalian +oleaster +ejaculator +wavy-leaved aster +knight +rack +real storage +magnetic mine +cocoa plum +vesiculovirus +birch leaf miner +water chevrotain +rudapithecus +torpedo tube +itch mite +warren +loft +washerman +terrace +nonstarter +shit +platform +caudex +ground control +Ostariophysi +slopshop +Peruvian cotton +crystal oscillator +plastic bomb +bar bit +watering cart +Asiatic sweetleaf +artificial joint +chariot +casern +charge-exchange accelerator +display adapter +hornpipe +honey bell +planula +Nephthytis afzelii +hame +ranter +trachodon +synchrocyclotron +splasher +heterotroph +Nicol prism +Himalayan rhubarb +headfast +put-put +bitter almond +parr +scantling +power breakfast +madder +Catalpa bignioides +rose of Jericho +spark chamber +rhizome +beard worm +supper club +negro peach +keratoscope +wain +apple aphid +planking +time-delay measuring instrument +sternpost +sicklepod +lake bed +gatherer +monotype +dead-man's float +poison gas +dicynodont +organism +cell +person +animal +plant +food +artifact +dressage +contact sport +outdoor sport +gymnastics +track and field +jumping +high jump +skiing +water sport +swimming +dive +floating +skin diving +rowing +boxing +sledding +tobogganing +wrestling +skating +ice skating +roller skating +racing +boat racing +riding +equestrian sport +cycling +blood sport +hunt +fishing +angling +casting +athletic game +outdoor game +golf +field game +field hockey +football +American football +ball game +baseball +court game +badminton +basketball +tennis +sport +Seder +scavenger +bottom-feeder +work animal +beast of burden +pack animal +domestic animal +marine animal +female +male +young +young mammal +pup +cub +lion cub +tiger cub +microorganism +arbovirus +herpes +herpes zoster +reovirus +moneran +cyanobacteria +enteric bacteria +actinomycete +streptomyces +diplococcus +parasite +ectoparasite +protoctist +protozoan +sarcodinian +ameba +ciliate +alga +brown algae +green algae +sporozoan +cypriniform fish +cyprinid +carp +domestic carp +shiner +catostomid +buffalo fish +cyprinodont +killifish +topminnow +squirrelfish +stickleback +pipefish +embryo +fetus +blastula +chordate +cephalochordate +tunicate +ascidian +vertebrate +aquatic vertebrate +jawless vertebrate +lamprey +hagfish +cartilaginous fish +holocephalan +chimaera +elasmobranch +shark +mackerel shark +mako +requiem shark +dogfish +smooth dogfish +spiny dogfish +smooth hammerhead +smalleye hammerhead +shovelhead +ray +sawfish +roughtail stingray +butterfly ray +eagle ray +manta +skate +bird +gamecock +night bird +ratite +passerine +oscine +accentor +lark +pipit +finch +canary +dark-eyed junco +New World sparrow +bunting +honeycreeper +sparrow +grosbeak +towhee +weaver +grassfinch +tyrannid +New World flycatcher +kingbird +pewee +cotinga +antbird +Old World flycatcher +thrush +nightingale +Old World chat +warbler +kinglet +Old World warbler +New World warbler +flycatching warbler +New World chat +yellowthroat +New World oriole +northern oriole +meadowlark +New World blackbird +grackle +Old World oriole +starling +myna +corvine bird +crow +Old World jay +common European jay +New World jay +blue jay +Canada jay +Rocky Mountain jay +nutcracker +European magpie +American magpie +Australian magpie +wren +marsh wren +thrasher +New Zealand wren +creeper +titmouse +black-capped chickadee +Carolina chickadee +swallow +martin +tanager +shrike +butcherbird +bush shrike +bowerbird +European water ouzel +American water ouzel +vireo +waxwing +bird of prey +hawk +black kite +swallow-tailed kite +white-tailed kite +harrier +falcon +peregrine +caracara +eagle +young bird +sea eagle +Aegypiidae +Old World vulture +griffon vulture +bearded vulture +Egyptian vulture +black vulture +New World vulture +buzzard +condor +Andean condor +California condor +black vulture +king vulture +owl +horned owl +scops owl +amphibian +salamander +newt +Pacific newt +ambystomid +climbing salamander +web-toed salamander +frog +true frog +true toad +spadefoot +tree toad +cricket frog +tongueless frog +reptile +anapsid +diapsid +chelonian +turtle +sea turtle +ridley +snapping turtle +musk turtle +diamondback terrapin +Western box turtle +tortoise +soft-shelled turtle +saurian +lizard +gecko +iguanid +spiny lizard +fence lizard +horned lizard +skink +teiid lizard +racerunner +plateau striped whiptail +Chihuahuan spotted whiptail +western whiptail +checkered whiptail +agamid +moloch +anguid lizard +venomous lizard +lacertid lizard +chameleon +monitor +crocodilian reptile +crocodile +alligator +caiman +armored dinosaur +ankylosaur +bone-headed dinosaur +ceratopsian +hadrosaur +saurischian +sauropod +theropod +ceratosaur +maniraptor +synapsid +pterosaur +ichthyosaur +snake +colubrid snake +smooth green snake +rough green snake +racer +blacksnake +whip-snake +rat snake +bull snake +common kingsnake +milk snake +common garter snake +ribbon snake +Western ribbon snake +common water snake +water moccasin +grass snake +viperine grass snake +sand snake +lyre snake +blind snake +indigo snake +constrictor +boa +python +elapid +coral snake +coral snake +cobra +mamba +black mamba +krait +viper +pit viper +rattlesnake +timber rattlesnake +arthropod +arachnid +false scorpion +whip-scorpion +spider +European wolf spider +acarine +hard tick +Ixodes dammini +Ixodes neotomae +Ixodes pacificus +Ixodes scapularis +sheep-tick +Ixodes persulcatus +Ixodes dentatus +Ixodes spinipalpis +wood tick +soft tick +mite +trombiculid +spider mite +house centipede +gallinaceous bird +domestic fowl +jungle fowl +chicken +cock +hen +turkey +grouse +European black grouse +Asian black grouse +blackcock +greyhen +red grouse +moorhen +greater prairie chicken +lesser prairie chicken +heath hen +guan +chachalaca +megapode +mallee fowl +phasianid +pheasant +bobwhite +northern bobwhite +Old World quail +migratory quail +peafowl +California quail +Hungarian partridge +red-legged partridge +Greek partridge +mountain quail +guinea fowl +columbiform bird +pigeon +dove +turtledove +domestic pigeon +homing pigeon +sandgrouse +parrot +cockatoo +lory +varied Lorikeet +rainbow lorikeet +parakeet +cuculiform bird +cuckoo +crow pheasant +coraciiform bird +roller +kingfisher +hoopoe +apodiform bird +swift +Archilochus colubris +thornbill +goatsucker +piciform bird +woodpecker +flicker +sapsucker +toucanet +trogon +quetzal +aquatic bird +waterfowl +anseriform bird +duck +teal +widgeon +sheldrake +goldeneye +scaup +wood duck +sea duck +scoter +merganser +gosling +gander +Chinese goose +greylag +blue goose +snow goose +brant +common brant goose +honker +barnacle goose +swan +tundra swan +screamer +crested screamer +mammal +prototherian +monotreme +marsupial +opossum +bandicoot +kangaroo +common wallaby +hare wallaby +nail-tailed wallaby +rock wallaby +pademelon +tree wallaby +rat kangaroo +phalanger +dasyurid marsupial +dasyure +placental +calf +buck +insectivore +mole +shrew mole +shrew +water shrew +tenrec +invertebrate +sponge +glass sponge +coelenterate +Chrysaora quinquecirrha +hydrozoan +siphonophore +anthozoan +actinia +coral +gorgonian +stony coral +ctenophore +worm +planarian +fluke +liver fluke +Fasciolopsis buski +schistosome +tapeworm +echinococcus +taenia +common roundworm +chicken roundworm +pinworm +eelworm +vinegar eel +trichina +hookworm +filaria +Guinea worm +annelid +oligochaete +polychaete +leech +mollusk +scaphopod +gastropod +abalone +scorpion shell +giant conch +edible snail +garden snail +brown snail +Helix hortensis +seasnail +neritid +limpet +Hermissenda crassicornis +cowrie +bivalve +clam +quahog +cockle +oyster +mussel +marine mussel +freshwater mussel +scallop +shipworm +cephalopod +octopod +decapod +squid +crustacean +malacostracan crustacean +decapod crustacean +crab +swimming crab +spider crab +lobster +true lobster +Old World crayfish +American crayfish +shrimp +prawn +krill +stomatopod +mantis shrimp +woodlouse +pill bug +sow bug +sea louse +amphipod +copepod +barnacle +wading bird +stork +ibis +common spoonbill +roseate spoonbill +heron +egret +night heron +American bittern +European bittern +least bittern +whooping crane +rail +crake +gallinule +purple gallinule +coot +great bustard +plain turkey +button quail +trumpeter +seabird +shorebird +plover +turnstone +sandpiper +yellowlegs +ruff +tattler +woodcock +snipe +greyback +red-breasted snipe +curlew +godwit +stilt +stilt +phalarope +courser +coastal diving bird +larid +gull +tern +jaeger +skua +auk +guillemot +murre +puffin +gaviiform seabird +podicipitiform seabird +grebe +pelecaniform seabird +white pelican +Old world white pelican +gannet +snakebird +sphenisciform seabird +penguin +pelagic bird +wandering albatross +black-footed albatross +petrel +shearwater +storm petrel +aquatic mammal +cetacean +whale +baleen whale +rorqual +toothed whale +beaked whale +dolphin +bottlenose dolphin +porpoise +sea cow +carnivore +pinniped mammal +seal +eared seal +fur seal +fur seal +South American sea lion +California sea lion +Australian sea lion +Steller sea lion +earless seal +walrus +canine +bitch +dog +cur +toy dog +toy spaniel +English toy spaniel +hunting dog +hound +coonhound +dachshund +foxhound +wolfhound +greyhound +terrier +bullterrier +rat terrier +Manchester terrier +fox terrier +wirehair +Welsh terrier +schnauzer +Skye terrier +sporting dog +retriever +pointer +setter +spaniel +springer spaniel +water spaniel +working dog +watchdog +shepherd dog +Belgian sheepdog +pinscher +Sennenhunde +mastiff +bulldog +guide dog +sled dog +liver-spotted dalmatian +spitz +griffon +corgi +poodle +wolf +coydog +wild dog +striped hyena +brown hyena +spotted hyena +aardwolf +fox +black fox +silver fox +blue fox +feline +cat +domestic cat +tom +blue point Siamese +wildcat +common lynx +Canada lynx +bobcat +spotted lynx +caracal +big cat +leopardess +panther +lioness +lionet +Bengal tiger +tigress +saber-toothed tiger +bear +Syrian bear +grizzly +Alaskan brown bear +cinnamon bear +viverrine +civet +Indian mongoose +ichneumon +slender-tailed meerkat +suricate +bat +fruit bat +carnivorous bat +leafnose bat +false vampire +vespertilian bat +long-eared bat +freetail +vampire bat +predator +game +game bird +fossorial mammal +tetrapod +insect +beetle +two-spotted ladybug +Mexican bean beetle +Hippodamia convergens +vedalia +bombardier beetle +calosoma +searcher +firefly +sawyer +pine sawyer +flea beetle +Colorado potato beetle +carpet beetle +clerid beetle +lamellicorn beetle +scarabaeid beetle +scarab +tumblebug +dorbeetle +June beetle +melolonthid beetle +elaterid beetle +snout beetle +boll weevil +blister beetle +bark beetle +darkling beetle +flour beetle +seed beetle +pea weevil +bean weevil +rice weevil +louse +flea +dipterous insect +gall midge +housefly +tsetse fly +blowfly +bluebottle +greenbottle +flesh fly +tachina fly +gadfly +botfly +human botfly +sheep botfly +warble fly +horsefly +bee fly +fruit fly +louse fly +horn fly +mosquito +gnat +fungus gnat +hymenopterous insect +drone +worker +honeybee +Africanized bee +black bee +Carniolan bee +Italian bee +carpenter bee +bumblebee +cuckoo-bumblebee +andrena +Nomia melanderi +leaf-cutting bee +mason bee +potter bee +wasp +vespid +paper wasp +hornet +sphecoid wasp +digger wasp +chalcid fly +sawfly +pharaoh ant +little black ant +army ant +carpenter ant +fire ant +wood ant +slave ant +Formica fusca +slave-making ant +sanguinary ant +bulldog ant +Amazon ant +termite +dry-wood termite +orthopterous insect +short-horned grasshopper +locust +migratory locust +migratory grasshopper +long-horned grasshopper +katydid +mormon cricket +sand cricket +mole cricket +European house cricket +field cricket +tree cricket +snowy tree cricket +phasmid +diapheromera +oriental cockroach +American cockroach +Australian cockroach +German cockroach +giant cockroach +praying mantis +hemipterous insect +leaf bug +mirid bug +lygus bug +lygaeid +coreid bug +heteropterous insect +water bug +water strider +assassin bug +homopterous insect +whitefly +sweet-potato whitefly +coccid insect +scale insect +soft scale +armored scale +mealybug +plant louse +aphid +greenfly +woolly aphid +adelgid +dog-day cicada +seventeen-year locust +spittle insect +plant hopper +psocopterous insect +psocid +booklouse +ephemerid +neuropteron +green lacewing +brown lacewing +odonate +trichopterous insect +caseworm +thysanuran insect +bristletail +thysanopter +thrips +earwig +lepidopterous insect +butterfly +nymphalid +fritillary +emperor butterfly +danaid +pierid +small white +large white +southern cabbage butterfly +blue +copper +American copper +hairstreak +Strymon melinus +moth +tortricid +lymantriid +geometrid +cankerworm +pyralid +tineoid +tineid +clothes moth +gelechiid +grain moth +noctuid moth +cutworm +underwing +hawkmoth +bombycid +saturniid +giant silkworm moth +silkworm +arctiid +lasiocampid +tent caterpillar +webworm +webworm moth +caterpillar +bollworm +woolly bear +larva +grub +pupa +queen +echinoderm +basket star +edible sea urchin +sand dollar +heart urchin +crinoid +trepang +lagomorph +leporid +rabbit +eastern cottontail +swamp rabbit +marsh hare +leveret +European hare +jackrabbit +white-tailed jackrabbit +blacktail jackrabbit +polar hare +snowshoe hare +pika +rodent +mouse +rat +pocket rat +field mouse +brown rat +jerboa rat +water rat +New World mouse +wood mouse +wood rat +vole +packrat +Eurasian hamster +golden hamster +gerbil +lemming +pied lemming +Old World porcupine +brush-tailed porcupine +long-tailed porcupine +New World porcupine +Canada porcupine +pocket mouse +kangaroo rat +jumping mouse +jerboa +dormouse +gopher +squirrel +tree squirrel +ground squirrel +prairie dog +American flying squirrel +groundhog +hoary marmot +yellowbelly marmot +Old World beaver +New World beaver +cavy +naked mole rat +ungulate +hyrax +odd-toed ungulate +equine +horse +foal +colt +male horse +stallion +mare +saddle horse +warhorse +pony +mustang +bronco +wild horse +pony +racehorse +racer +harness horse +workhorse +draft horse +trotting horse +ass +domestic ass +wild ass +onager +common zebra +mountain zebra +grevy's zebra +rhinoceros +tapir +even-toed ungulate +swine +piglet +porker +peccary +ruminant +bovid +bovine +ox +cattle +bull +cow +beef +Brahman +dairy cattle +Old World buffalo +Indian buffalo +carabao +Asian wild ox +American bison +wisent +sheep +lamb +domestic sheep +wild sheep +mountain sheep +goat +domestic goat +wild goat +goat antelope +antelope +Thomson's gazelle +Gazella subgutturosa +springbok +kudu +harnessed antelope +eland +waterbuck +oryx +deer +stag +red deer +mule deer +roe deer +caribou +chevrotain +camel +domestic llama +guanaco +alpaca +giraffe +musteline mammal +ermine +stoat +New World least weasel +Old World least weasel +longtail weasel +American mink +ferret +muishond +snake muishond +striped muishond +river otter +Eurasian otter +striped skunk +hooded skunk +hog-nosed skunk +spotted skunk +American badger +Eurasian badger +ferret badger +hog badger +marten +pachyderm +edentate +peba +apar +tatouay +peludo +giant armadillo +pichiciago +sloth +anteater +primate +ape +anthropoid ape +hominoid +hominid +homo +Homo erectus +Homo sapiens +australopithecine +great ape +western lowland gorilla +eastern lowland gorilla +mountain gorilla +silverback +western chimpanzee +eastern chimpanzee +central chimpanzee +pygmy chimpanzee +lesser ape +monkey +Old World monkey +talapoin +grivet +vervet +green monkey +chacma +mandrill +drill +rhesus +bonnet macaque +Barbary ape +crab-eating macaque +entellus +guereza +New World monkey +true marmoset +pygmy marmoset +tamarin +silky tamarin +pinche +lemur +tarsier +flying lemur +proboscidean +elephant +mammoth +procyonid +raccoon +fish +food fish +young fish +crossopterygian +lungfish +catfish +silurid +bullhead +channel catfish +gadoid +cod +hake +elver +common eel +tuna +moray +conger +teleost fish +clupeid fish +shad +herring +sardine +pilchard +anchovy +salmonid +salmon +Atlantic salmon +trout +brown trout +char +whitefish +smelt +tarpon +ribbonfish +toadfish +needlefish +flying fish +spiny-finned fish +percoid fish +perch +pike-perch +walleye +robalo +pike +pickerel +sunfish +crappie +freshwater bream +black bass +bass +serranid fish +grouper +hind +surfperch +cardinalfish +remora +carangid fish +jack +moonfish +pompano +scad +dolphinfish +characin +cichlid +snapper +grunt +sparid +sea bream +porgy +sciaenid fish +croaker +whiting +sea trout +mullet +goatfish +mullet +silversides +barracuda +sea chub +butterfly fish +damselfish +clown anemone fish +wrasse +blenny +pikeblenny +gunnel +goby +gempylid +scombroid +mackerel +Spanish mackerel +tuna +bonito +sailfish +billfish +marlin +tripletail +mojarra +ganoid +Pacific sturgeon +beluga +scorpaenoid +scorpaenid +scorpionfish +rockfish +lumpfish +greenling +gurnard +sea robin +plectognath +triggerfish +filefish +boxfish +spiny puffer +ocean sunfish +flatfish +righteye flounder +lefteye flounder +whiff +sole +abbey +abbey +abrader +accelerator +accessory +accommodation +acoustic device +acoustic modem +acrylic +action +actuator +adhesive bandage +adjustable wrench +aeolian harp +aerosol +after-shave +airbus +aircraft +airfield +airfoil +air gun +airplane +air pump +air-to-air missile +air-to-ground missile +alarm +alb +alcazar +Allen screw +alms dish +altimeter +Amati +ammeter +ammunition +amplifier +analog computer +analytical balance +anchor +anchor chain +aneroid barometer +angledozer +anklet +antenna +anteroom +antiaircraft +antiballistic missile +apartment +apartment building +aperture +apparatus +apparel +appliance +appliance +applicator +aquarium +arbor +arcade +arch +arc lamp +area +argyle +arm +armament +armature +armchair +armoire +armor +armored vehicle +armor plate +armrest +array +arrow +artificial heart +artillery +assembly +assembly plant +astrodome +astronomical telescope +athletic sock +atom bomb +atomic clock +atomizer +attachment +attack submarine +attire +audiocassette +audio system +audiotape +auditorium +autoclave +autoinjector +autoloader +automat +automat +automatic firearm +automatic rifle +automaton +auxiliary research submarine +awl +ax +axis +axle +axletree +baby bed +baby buggy +baby grand +back +background +backseat +badminton equipment +badminton racket +bag +bag +bag +baggage +bagpipe +bait +balance +balcony +balcony +bale +ball +ball gown +ballistic missile +ballistic pendulum +ball-peen hammer +ballroom +band +bandage +bandanna +banderilla +bar +bar +barbed wire +barge +barge pole +barn door +barograph +barrack +barrage balloon +barrel knot +barrel vault +barrier +barroom +base +base +baseball equipment +basilica +basin +basket +basketball equipment +bass +bass drum +bass horn +bastion +bat +bathhouse +battery +battle-ax +battle dress +battleship +bay rum +bay window +beading plane +beam +beam balance +bearing +beater +beating-reed instrument +bed +bed +bedclothes +bedroom +bedroom furniture +bedspread +bedspring +beehive +beer barrel +bell +bell push +bell tower +belt +belt buckle +bench +berlin +berth +besom +bevel gear +bicycle +bicycle chain +bier +billiard ball +bin +binding +bin liner +binocular microscope +bioscope +birchbark canoe +bird shot +bistro +bit +bit +black tie +blade +blade +blanket +blimp +blind +block +block plane +blouse +blower +blowtorch +bludgeon +boarding +boarding house +boardroom +boat +bobbin +body +body armor +body lotion +boiler +bolt +bolt +bomb +bomber +bongo +boom +boom +boomerang +boot +booth +booth +bore bit +Boston rocker +bota +bottle +bottle opener +bow +bow +bowed stringed instrument +bowl +bowl +bowline +bowling equipment +bowling pin +bowsprit +box +box +boxcar +boxing equipment +brace +brace +bracelet +bracket +brake +brake system +brass +brasserie +brazier +breechcloth +breeches +brewpub +brick +bricklayer's hammer +brickwork +bridal gown +bridge +briefcase +brigandine +brilliant pebble +brim +broad arrow +broadax +broad hatchet +broadsword +brush +bubble jet printer +buffer +buffet +building +building complex +bulldozer +bullet +bullhorn +bullnose +bundle +bunker +burial chamber +burner +bus +business suit +butt joint +button +buttress +butt shaft +buzz bomb +cabaret +caber +cabin +cabin +cabinet +cabinet +cabin liner +cable +cable +cafe +cafeteria +cafeteria tray +caff +cage +calculator +caliper +calorimeter +camera +camera lens +camera tripod +camp +camp +camp chair +camper +can +canal +candelabrum +candlestick +cane +cannikin +cannon +cannon +cannonball +canopy +canteen +canteen +canvas +canvas tent +cap +cap +cap +capacitor +caparison +cape +cap screw +capsule +car +car +carbine +carbon arc lamp +card index +cardioid microphone +car door +cargo liner +cargo ship +carillon +carpenter's hammer +carpenter's level +carpenter's mallet +carpenter's rule +carpet tack +carriage +carriage +carriage bolt +carrick bend +carrier +car seat +cart +cartridge +cartridge belt +cartridge holder +case +case +cashbox +casque +casserole +cassock +catch +catcher's mask +cathedra +cathedral +cathedral +catheter +cathode +cathode-ray tube +cat's-paw +cattle car +cautery +cavalry sword +cedar chest +cell +cell +cellblock +center +centrifuge +ceramic +ceramic ware +chain tongs +chair +chair of state +chalk +chamfer plane +chandlery +chapel +character printer +chassis +chasuble +chatelaine +checker +cheeseboard +chemical reactor +chessman +chest of drawers +child's room +china +chip +chip +chisel +choke +chokey +chordophone +chronoscope +chuck +church key +cigar lighter +circle +circuit +circuit board +circular plane +circular saw +cistern +civilian clothing +clamp +clamshell +clarinet +classroom +clavier +cleaning implement +cleaning pad +clean room +clinic +clip +cloak +clock +closed circuit +closed-circuit television +closet +cloth covering +clothes closet +clothes dryer +clothes hamper +clothes tree +clothing +clothing store +clout nail +clove hitch +clutch +coach +coal car +coal shovel +coat +coat closet +coating +coating +coat of paint +coaxial cable +cocked hat +coffee cup +coffee maker +coffer +coffin +coil +colander +collider +cologne +colonnade +color television +Colt +column +column +comb +comb +combination plane +combine +commissary +commodity +communication system +commutator +compact disk +compartment +compass +compass card +compound lens +compound lever +compressor +computer +computer circuit +computer network +computer screen +computer system +concentration camp +concert grand +concertina +condenser +condenser +condenser microphone +conductor +connecting rod +connection +conservatory +conservatory +contact +container +contrabassoon +control +control panel +control system +convent +converging lens +converter +convertible +conveyance +cooker +cooking utensil +cooler +cooling system +cord +cord +cordage +corner +correctional institution +corset +cosmetic +costume +costume +cotter +cotton +counter +counter +counter +counter tube +country house +coupling +court +court +coverall +covering +cowbarn +craft +cravat +crazy quilt +cream +cream pitcher +crematory +crepe +crib +cricket equipment +croquet equipment +crossbar +crossbow +crosspiece +crown jewels +cruiser +cruiser +cruise ship +crystal microphone +cudgel +cuff +cultivator +cup +cupboard +cupola +curb roof +curtain +cutout +cutter +cutting implement +cybercafe +cyclotron +cylinder +cymbal +dado plane +dagger +damper +dart +data converter +data input device +davenport +davenport +davit +dead axle +deck +deck +deck chair +deep-freeze +defensive structure +delay line +delicatessen +dental appliance +denture +depilatory +depressor +depth finder +derrick +destroyer +detector +detector +detonating fuse +detonator +developer +device +dial +dialyzer +diathermy machine +diesel locomotive +digital camera +digital computer +digital display +diner +dinghy +dining car +dining-hall +dining room +dining-room furniture +dining-room table +dinner dress +dinner pail +dinner table +diode +dip +diplomatic building +dipper +DIP switch +directional antenna +directional microphone +direction finder +disguise +dish +dish +disk +dispenser +display +display panel +distillery +ditch +ditch spade +dive bomber +doll +dolmen +domino +door +doorbell +doorlock +doornail +dormer window +dormitory +dot matrix printer +double-breasted suit +double-reed instrument +douche +dovecote +dovetail plane +downstage +drafting instrument +Dragunov +drawstring bag +dray +dredging bucket +dress +dress blues +dressing +dress uniform +drill +electric drill +drill rig +drinking fountain +drinking vessel +drip mat +drip pot +drive +drive +drogue +drogue parachute +drop-leaf table +dry battery +dry dock +dryer +dry masonry +dry wall +dugout canoe +dumdum +dumpcart +dune buggy +dungeon +duplicator +dustmop +dwelling +earphone +earthenware +easel +easy chair +edge tool +eiderdown +elastic bandage +electrical converter +electrical device +electric bell +electric frying pan +electric furnace +electric heater +electric lamp +electric motor +electric refrigerator +electro-acoustic transducer +electrode +electromagnet +electronic balance +electronic device +electronic equipment +electronic instrument +electronic voltmeter +electron microscope +electrostatic generator +electrostatic printer +elevator +embankment +embellishment +enamel +enamelware +enclosure +endoscope +engine +engine +ensemble +entrenching tool +epidiascope +equipment +eraser +escutcheon +espadrille +espresso shop +establishment +estaminet +exercise device +exhaust fan +exhibition hall +Exocet +expansion bolt +explosive device +external-combustion engine +extractor +fabric +face mask +face veil +facing +factory +fairlead +false face +fan +farm building +farm machine +fastener +fatigues +faucet +feedback circuit +fence +fencing sword +fender +ferry +fetoscope +field-sequential color television +fife +fifth wheel +fighter +figure eight +file +file server +filling +film +film +filter +filter +finery +finisher +fipple flute +fire +firearm +fire iron +fireplace +firkin +fisherman's bend +fisherman's knot +fisherman's lure +fishing boat +fishing rod +fishnet +flag +flageolet +flambeau +flannelette +flap +flashlight +flask +flatcar +flat tip screwdriver +fleet ballistic missile submarine +flight simulator +flip-flop +floating dock +floor +floor +floor cover +fly +flywheel +fob +foghorn +folder +food hamper +footbath +footbridge +foothold +foot rule +footwear +footwear +forceps +fore-and-aft sail +foremast +fore plane +fore-topmast +fork +formalwear +fortification +fortress +foundation garment +foundry +fragmentation bomb +framework +free-reed instrument +freight train +French door +friary +friction clutch +frigate +frill +frock coat +front projector +fruit machine +full-dress uniform +full metal jacket +funny wagon +fur hat +furnace +furnishing +furniture +fuse +gable +gable roof +gaff +galleon +gallery +galley +galley +gallows +galvanometer +gambling house +game +game equipment +gamp +garage +Garand rifle +garden +garden spade +garden tool +garment +gas burner +gas-discharge tube +gasket +gasoline engine +gate +gatehouse +gatepost +gathered skirt +gauge +gauze +gauze +gavel +gear +gear +gear +gearing +general-purpose bomb +generator +generator +Geneva gown +geodesic dome +girder +glass +glider +glove +glyptic art +goal +golf club +golf equipment +Gordian knot +Gothic arch +government building +government office +gown +gramophone +granary +granny knot +grapnel +grapnel +grate +graver +greasy spoon +greatcoat +great hall +greengrocery +grenade +grillroom +groined vault +Guarnerius +guidance system +guided missile +guildhall +guitar +guitar pick +gun +gun carriage +gunlock +gunsight +gun trigger +gurney +gymnastic apparatus +gym shoe +gypsy cab +habergeon +habit +hairdressing +hairpiece +hairpin +half hatchet +half hitch +hall +hall +hammer +hand +handbell +handbow +handcart +hand glass +handloom +hand lotion +hand mower +handsaw +hand shovel +hand tool +handwear +handwheel +hanger +hank +harpsichord +harrow +hash house +hat +hatch +hauberk +hawser bend +hazard +head +head +head covering +headdress +header +headgear +headlight +headsail +headscarf +health spa +heat engine +heater +heat lamp +heat-seeking missile +heavier-than-air craft +heckelphone +hedge +helicopter +helm +helmet +helmet +heraldry +high altar +high-angle gun +high gear +high table +hinge +hip boot +hitch +hoe +hogshead +hoist +holder +holding device +home appliance +homespun +hood +hood +hood +hook +Hoover +hope chest +horn +horn button +horse +horsecar +horse-drawn vehicle +horsehair wig +hosiery +hospital +hospital room +hostel +hot-air balloon +hotel +hotel room +hot tub +house +house +housing +hovel +huarache +humeral veil +hut +hutch +hydraulic brake +hydraulic system +hydroelectric turbine +hydrofoil +hydrometer +hygrometer +hypermarket +hypodermic syringe +ice machine +ice rink +ice skate +icetray +ignition switch +impact printer +implant +implement +imprint +improvised explosive device +inclined plane +indicator +induction coil +ink-jet printer +inkstand +institution +instrument +instrument of punishment +instrument of torture +interceptor +interchange +intercommunication system +intercontinental ballistic missile +interface +interior door +internal-combustion engine +ionization chamber +video iPod +iron +jack +jack +jacket +jacket +jack plane +jail +jamb +jar +jeroboam +jet +jet engine +jewelled headdress +jib +jibboom +jiggermast +joint +jointer +joist +jolly boat +jug +jumper +jumper cable +junction +junction +jury mast +kayak +keel +keg +kerchief +kettle +key +key +keyboard +keyboard instrument +khakis +kiln +kinescope +kingbolt +kirk +kit +kit +kitbag +kitchen +kitchen appliance +kitchen utensil +kite balloon +knee-high +knife +knife +knit +knob +lace +lacquer +ladder truck +lag screw +lamasery +laminate +lamination +lamp +lamp +landing gear +land mine +lantern +lapel +lathe +lattice +launcher +lead-acid battery +leather strip +Leclanche cell +leg +legging +lens +lens implant +level +lever +Levi's +lid +life buoy +life jacket +life preserver +lifting device +ligament +light +light-emitting diode +lighter-than-air craft +lighting +light microscope +linear accelerator +line printer +lingerie +lining +liquid crystal display +lister +living quarters +living room +local area network +lock +locomotive +lodge +lodging house +loft +loft +longbow +lookout +loom +loop knot +lota +lounge +loungewear +love knot +lunchroom +luxury liner +lyre +machine +machine +machine bolt +machine gun +machinery +machine screw +machine tool +magic lantern +magnet +magnetic disk +magnetic recorder +magnetic tape +magnifier +magnum +magnus hitch +mailer +mainframe +mainmast +main-topmast +main yard +makeup +mallet +mallet +mallet +mandolin +manger +man-of-war +manometer +MANPAD +mansard +mansion +marina +marker +marketplace +maser +mask +masonry +mass spectrometer +mast +mast +mat +mat +match +match +match plane +material +materiel +Matthew Walker +maul +measure +measuring instrument +measuring stick +mechanical device +mechanical system +mechanism +medical building +medical instrument +memorial +memory +memory chip +memory device +menhir +man's clothing +mercantile establishment +mercury barometer +mercury thermometer +mercury-vapor lamp +mess +metal screw +meteorological balloon +meter +meterstick +microbalance +microfilm +microscope +military hospital +military quarters +military vehicle +mill +milldam +millinery +mine +minibike +mink +minster +Minuteman +mirror +mixer +mizzenmast +module +mold +moldboard plow +monitor +monitor +morgue +mortise joint +motion-picture camera +motion-picture film +motor +motorboat +motorcycle +motor hotel +motor vehicle +mound +mount +mouse button +movie projector +moving-coil galvanometer +mug +multiplex +multiplexer +musette pipe +mushroom anchor +musical instrument +musket +musket ball +muslin +muzzle loader +narrowbody aircraft +nautilus +navigational system +naval equipment +naval gun +naval radar +naval weaponry +navigational instrument +nebuchadnezzar +neckline +neckpiece +necktie +neckwear +needle +needlework +negligee +net +net +net +net +network +network +night bell +nightwear +noisemaker +nonsmoker +non-volatile storage +nose flute +nuclear reactor +nuclear weapon +nursery +oar +oblique bandage +oboe da caccia +oboe d'amore +obstacle +office +office furniture +oil lamp +oil paint +oil tanker +olive drab +omnidirectional antenna +onion dome +open-air market +open-end wrench +opener +openside plane +ophthalmoscope +optical device +optical disk +optical instrument +optical telescope +organ pipe +outbuilding +outerwear +outfit +outrigger canoe +outside mirror +oven +overgarment +overhand knot +overhang +overhead projector +overnighter +overshoe +oxford +package +packaging +packing box +paddle +paddle steamer +page printer +paint +pallium +pan +pan +panic button +panopticon +panopticon +pantechnicon +pantry +pants suit +panzer +paper chain +paper fastener +parabolic reflector +parapet +parasail +parka +parsonage +particle detector +partition +passenger ship +passenger train +passenger van +passive matrix display +passkey +patch +patchouli +patchwork +patina +patisserie +pavis +peavey +pedal +pedestal table +pedestrian crossing +pedicab +peg +pen +penal institution +pencil +pendulum +pendulum clock +percolator +percussion instrument +perfumery +peripheral +periwig +personal computer +petticoat +Phillips screw +Phillips screwdriver +phonograph record +photographic equipment +photographic paper +photometer +physical pendulum +piano +piccolo +pick +pick +pickle barrel +piece of cloth +pile +pillow lace +pilothouse +pin +pincer +pinstripe +pipe +pipet +pipe wrench +pistol +pivot +place of business +place of worship +planetarium +planner +plant +planter +plasterboard +plastic laminate +plastic wrap +plastron +plate +platform +platform +platform rocker +plating +pleat +plethysmograph +plexor +pliers +plug +plug +pneumatic drill +pocket +pocket-handkerchief +pocketknife +pointed arch +polyester +polygraph +pomade +pontifical +pool ball +poorhouse +porcelain +porch +portable computer +portico +post +posthole digger +pot +potential divider +potpourri +pottery +pouch +poultice +powder +powder keg +power brake +power mower +power saw +power shovel +power tool +press +press +pressure dome +pressure gauge +pressure suit +printed circuit +printer +prison camp +prod +prolonge knot +prompter +prong +propeller +propeller plane +prosthesis +protective covering +protective garment +pruning saw +pruning shears +public house +public toilet +public transport +pull +pull chain +pulley +Pullman +pullover +pulse counter +pump +pump +pump house +punch +punch press +purifier +push broom +push button +pusher +puzzle +pyrometer +pyx +QWERTY keyboard +racing boat +rack +rack +radar +radiogram +radio interferometer +radio link +radiometer +radio receiver +radiotelegraph +radiotelephone +radio transmitter +raft +rail +rail fence +railing +raincoat +rake +ramp +rampart +random-access memory +rayon +razor +reaction-propulsion engine +reactor +reading lamp +reading room +read-only memory +rearview mirror +receiver +receptacle +reception room +recess +reconnaissance plane +recorder +recording +record player +recreation room +recycling bin +reed stop +reef knot +refectory table +refinery +reflecting telescope +reflector +reformatory +refracting telescope +refrigerator car +refuge +regalia +regimentals +regulator +rein +religious residence +removable disk +repair shop +repeating firearm +reproducer +rescue equipment +reservoir +reset button +residence +resistor +resonator +respirator +restraint +retort +rheostat +rib +ribbed vault +riddle +ride +riding boot +riding mower +rifle ball +rig +rink +river boat +road +roadway +robe +rocket +rocket +rod +roller +roller +in-line skate +roller blind +roller coaster +rolling hitch +Rolodex +Roman building +roof +roof +room +roost +rope +rose water +rotary engine +rotating mechanism +rotating shaft +rotisserie +rotor +round arch +router plane +row house +royal mast +rubber bullet +rug +rushlight +sable +sable coat +sack +sackbut +sacking +saddle +safe +safety belt +safety curtain +safety fuse +safety match +sail +sailboat +sailing vessel +salver +sandglass +sash +satellite +satellite television +saucepan +savings bank +saw +sawhorse +scale +scarf +school +scientific instrument +scissors +scoop +scratcher +screen +screen +screen +screw eye +scrub plane +scuffer +sculpture +sea boat +sea chest +seam +seaplane +seat +seat +second hand +secretary +security system +seeker +selector +self-propelled vehicle +semiautomatic firearm +semiautomatic pistol +semiconductor device +serger +serpent +serving cart +serving dish +set +setscrew +setscrew +sewing needle +sextant +shackle +shade +shaft +shag rug +shaker +shaper +shaping tool +sharpener +shaving cream +shaving foam +shawl +shawm +shears +sheath +shed +sheepshank +sheet bend +shelf +shell +shell +shell +shellac +shelter +shelter +shelter +shield +ship +shipboard system +shirt +shirtfront +shock absorber +shoe +shooting brake +shop +short pants +shotgun +shoulder holster +shrine +shutter +shuttle +sidewinder +sieve +sifter +sights +signaling device +signboard +silk +simulator +single bed +single-breasted suit +single-reed instrument +sitz bath +six-pack +skate +skein +skeleton +skewer +skidder +skid lid +skiff +ski pole +skirt +ski tow +skullcap +slack suit +slat +sled +sleeper +sleeping car +sleeve +sleeve +slide projector +slipknot +slipper +sloop +slop pail +slot machine +small boat +smart bomb +smoker +smooth plane +snack bar +snap-brim hat +snare drum +sniper rifle +Sno-cat +soapbox +socle +sofa +sonograph +sorter +sound recording +soup ladle +source of illumination +soutane +spacecraft +spade +spar +spatula +spear +spear +spectacles +spectrograph +spectroscope +speedometer +spider +spike +spike +spinet +spinning machine +spiral ratchet screwdriver +spiral spring +spit +spokeshave +sponge mop +spoon +sports equipment +sports implement +sportswear +spot +spring +spring balance +springboard +sprit +square +square knot +squash racket +squawk box +squeezer +squinch +stabilizer +stabilizer +stable gear +stadium +stall +stamp mill +stand +standard cell +staple +starter +state prison +station +statue +stay +steakhouse +stealth aircraft +stealth bomber +stealth fighter +steam bath +steamboat +steamer +steam iron +steam whistle +steel mill +steelyard +steeple +steering system +step +step-up transformer +stereo +stick +stick +still +stilt +Stinger +stock +stockcar +stock car +stocking +stonework +stool +stopper knot +storage battery +storage space +storeroom +stove +stove bolt +Stradavarius +straight chair +strap +strap +stringed instrument +strip +strongbox +stronghold +strongroom +structural member +structure +stylus +submachine gun +submersible +submersible +subwoofer +suction pump +suede cloth +sunbonnet +sunhat +supermarket +superstructure +supply chamber +support +support +support column +supporting structure +supporting tower +surface lift +surface-to-air missile +surgeon's knot +surgical instrument +surgical knife +surplice +surveillance system +surveying instrument +surveyor's level +swamp buggy +sweater +swimsuit +sword +synchrotron +system +tabi +table +table +table knife +tableware +tabor +tachometer +tack +tack hammer +talaria +tambour +tambourine +tampon +tank +tank car +tannoy +tape +tape deck +tape recorder +target +tavern +tea chest +teaching aid +tea gown +teashop +teaspoon +tea-strainer +tea tray +telecommunication system +telephone +telephone line +telephone receiver +telephone system +telephone wire +telescope +television antenna +television camera +television equipment +television monitor +temple +temple +tender +tennis racket +tenor drum +tenoroon +tenpenny nail +tent +tenterhook +terminal +terminal +test rocket +tetraskelion +textile machine +textile mill +theater +theodolite +thermometer +thermostat +three-piece suit +three-way switch +thumbscrew +thumbtack +tights +tile +timber +timber hitch +timbrel +time-fuse +timepiece +timer +time-switch +tire chain +tithe barn +toecap +toga +toggle switch +toilet +toilet powder +toiletry +toilet water +token +tomograph +toner +tongs +tool +toolbox +tooth +toothbrush +top +top +topgallant +topmast +topsail +torpedo +torpedo boat +touch screen +towel +toweling +tower +toy box +track +tracked vehicle +trailer +trailer +train +trammel +transdermal patch +transformer +transistor +transmission +transmitter +transporter +trap +trapeze +travel iron +treasure chest +trellis +trench +trial balloon +triclinium +troop carrier +trough +trouser +trowel +truck +trunk +try square +tube +tuck shop +tun +tunic +turbine +Turkish towel +Turk's head +turner +turntable +turtleneck +tweed +tweeter +twenty-two +two-piece +typesetting machine +typewriter +ultraviolet lamp +undercarriage +undergarment +underpants +underwear +uneven parallel bars +uniform +university +uplift +urn +urn +utensil +vacuum flask +valve +van +van +varnish +vehicle +veranda +vertical file +vessel +vessel +vest +vibrator +vibrator +videocassette +video recording +vigil light +viol +vise +vivarium +voltaic cell +voltmeter +wagon +waist pack +walking stick +wall +wall +wall unit +ward +warehouse +warship +wash +washer +washtub +watch +watchtower +water-base paint +water butt +water cart +watercolor +water-cooled reactor +water gauge +water ski +waterwheel +weapon +weaponry +weatherglass +weathervane +web +wedge +wedge +weighbridge +weight +weir +weld +well +whaler +wheel +wheelchair +wheeled vehicle +wheelwork +whetstone +whip +whisk +whispering gallery +white goods +whorehouse +wicker basket +widebody aircraft +winch +Winchester +wind instrument +window +window +window blind +window envelope +Windsor knot +wine bucket +wine cask +wineglass +wire +wire +wire matrix printer +wiring +woman's clothing +wood +woodenware +woodscrew +woodwind +woofer +workbasket +workbench +work-clothing +worktable +workwear +wrapping +wrench +writing desk +writing implement +X-ray film +X-ray machine +yacht chair +yard +yard +yardstick +yoke +zither +zoot suit +grain +light +colorlessness +chromatic color +black +gray +dark red +orange +yellow +green +blue +purple +reddish purple +pink +light brown +reddish brown +complexion +skin +epidermal cell +columnar cell +macule +specimen +milk +embryonic cell +leukocyte +neutrophil +astrocyte +exoskeleton +medium +film +press +print media +storage medium +journalism +photojournalism +newspaper +telecommunication +telephone +call +long distance +wireless +broadcasting +television +reception +chat room +portal site +wordbook +album +concept album +magazine +movie +sign +comestible +course +dainty +dish +fare +diet +dietary supplement +liquid diet +reducing diet +vegetarianism +ration +field ration +foodstuff +starches +concentrate +meal +roughage +flour +wheat flour +nutriment +commissariat +canned food +canned meat +meal +breakfast +lunch +dinner +supper +buffet +picnic +cookout +bite +entree +side dish +casserole +chicken casserole +appetizer +cocktail +hors d'oeuvre +relish +dip +soup +madrilene +broth +broth +chowder +clam chowder +stew +goulash +fish stew +fricassee +ragout +ready-mix +powdered sugar +granulated sugar +brown sugar +sweet +confiture +candy +hard candy +patty +brittle +chewing gum +candied fruit +candied citrus peel +fudge +gumdrop +mint +kiss +lozenge +taffy +dessert +dumpling +frozen dessert +mousse +mousse +whip +pudding +pudding +tipsy cake +ice +chocolate ice cream +Neapolitan ice cream +peach ice cream +strawberry ice cream +tutti-frutti +vanilla ice cream +split +pudding +custard +pastry +turnover +puff paste +phyllo +fish cake +conserve +jam +jelly +apple jelly +marmalade +gelatin +gelatin dessert +patty +stuffing +bread +breadstick +bun +cracker +dark bread +flatbread +loaf of bread +quick bread +rye bread +toast +white bread +French bread +cornbread +johnnycake +muffin +scone +onion roll +sweet roll +onion bagel +biscuit +baking-powder biscuit +soft pretzel +sandwich +hamburger +gruel +edible fruit +vegetable +crudites +legume +greens +solanaceous vegetable +root vegetable +potato +baked potato +sweet potato +snack food +corn chip +tortilla chip +cruciferous vegetable +cabbage +kale +red cabbage +savoy cabbage +squash +summer squash +yellow squash +winter squash +turban squash +gherkin +sprout +beet +pepper +sweet pepper +hot pepper +chili +jalapeno +onion +Spanish onion +salad green +lettuce +butterhead lettuce +bean +pea +green pea +common bean +fresh bean +green bean +shell bean +lima bean +soy +celery +chicory +coffee substitute +chicory escarole +corn +hominy +cress +tomato +cherry tomato +salsify +turnip +edible nut +apple +eating apple +Delicious +McIntosh +Pippin +cooking apple +berry +currant +citrus +temple orange +mandarin +bitter orange +sweet orange +Jaffa orange +navel orange +Valencia orange +lime +almond +plum +dried fruit +raisin +passion fruit +cocoa +melon +muskmelon +winter melon +cherry +sweet cherry +heart cherry +sour cherry +grape +fox grape +muscadine +slipskin grape +vinifera grape +Tokay +cherimoya +soursop +sweetsop +ilama +pond apple +olive +pear +edible seed +walnut +feed +fodder +oil cake +timothy +grain +barley +wheat +rice +mash +bird feed +petfood +salad +tossed salad +combination salad +pasta salad +fruit salad +ingredient +flavorer +condiment +herb +spice +cinnamon +pepper +garlic +mustard +sage +savory +curry +paprika +pickle +sweet pickle +vinegar +sauce +hot sauce +dressing +mayonnaise +cheese sauce +hot-fudge sauce +white sauce +spaghetti sauce +boiled egg +hard-boiled egg +Easter egg +omelet +firm omelet +souffle +dairy product +milk +milk +powdered milk +cream +butter +clarified butter +yogurt +curd +cheese +cream cheese +bleu +cheddar +Swiss cheese +spread +pate +sweetening +sugar +syrup +batter +bread dough +chicken and rice +pasta +Tetrazzini +chili dog +fondue +fondue +hash +kabob +seafood Newburg +meatball +pilaf +sausage pizza +pepperoni pizza +cheese pizza +anchovy pizza +Sicilian pizza +porridge +fish loaf +salmon loaf +scallopine +taco +beef burrito +quesadilla +tostada +beverage +concoction +mix +filling +potion +elixir +alcohol +brew +beer +lager +Weissbier +malt +ale +stout +mead +wine +white wine +sparkling wine +Burgundy +Beaujolais +Medoc +Pinot noir +Bordeaux +claret +Chianti +Cabernet +Merlot +dessert wine +Rhine wine +Rioja +Saint Emilion +zinfandel +table wine +vermouth +fortified wine +Madeira +liquor +brandy +gin +rum +whiskey +corn whiskey +Irish +Scotch +liqueur +coffee liqueur +orange liqueur +mixed drink +cocktail +highball +Bloody Mary +daiquiri +manhattan +martini +sling +sour +caffe latte +cider +sweet cider +juice +fruit juice +grape juice +orange juice +fruit drink +mulled wine +soft drink +cola +coffee +punch +champagne cup +claret cup +rickey +tea +tea +herb tea +tisane +black tea +green tea +water +drinking water +mineral water +vitamin pill +collection +suburb +residence +littoral +grassland +pasture +resort +field +air bubble +arroyo +ascent +atoll +bank +bank +bar +barrier reef +basin +beach +burrow +canyon +cave +continental glacier +crag +crater +dale +descent +draw +dune +geological formation +glacier +glen +gorge +gulch +gully +highland +hill +hillside +hole +hollow +iceberg +ice mass +ion +knoll +landfall +landfill +lather +ledge +lowland +meteorite +mountain +mull +natural depression +natural elevation +nullah +ocean floor +outcrop +plain +point +precipice +ravine +reef +ridge +ridge +rift valley +rock +sandbank +seaside +shiner +shore +slope +soapsuds +spume +tableland +tideland +volcanic crater +wadi +spiritual leader +adventurer +anomaly +benefactor +commoner +contestant +discussant +entertainer +female +finisher +inhabitant +native +juvenile +lover +male +mediator +national +peer +recipient +sensualist +traveler +unwelcome person +unskilled person +worker +wrongdoer +Black +White +Semite +white man +Mongol +Nahuatl +Caddo +Penutian +Teton +Taracahitian +Slav +Catholic +Altaic +Bornean +Canadian +Central American +Britisher +English person +Englishwoman +Ethiopian +Parisian +Greek +Italian +Japanese +Mexican +Nigerian +North American +Pakistani +South American Indian +Filipino +Polynesian +Scandinavian +South African +South American +Turki +American +New Yorker +abbess +abstainer +academic administrator +accomplice +acquaintance +acquirer +aerialist +actor +actor +addict +adjutant +admirer +adulterer +advertiser +advocate +analyst +ancestor +announcer +announcer +appointee +appreciator +appropriator +archbishop +architect +army engineer +army officer +arrival +articulator +asserter +assistant +associate +astronaut +athlete +attendant +aunt +authoritarian +authority +aviator +back +bad person +ballet dancer +bullfighter +baron +bartender +baseball coach +base runner +basketball player +believer +betrothed +bigot +big shot +biochemist +bisexual +boatman +bond servant +botanist +Boy Scout +buddy +campaigner +captain +card player +careerist +caretaker +cavalryman +celebrity +charmer +child +child +cipher +citizen +civil rights leader +cleaner +clergyman +cleric +clerk +climber +closer +clown +coach +cobbler +collaborator +college student +collegian +commanding officer +commissioned officer +commissioned military officer +commissioner +committee member +communist +compulsive +computer scientist +computer user +contractor +convict +copycat +counselor +craftsman +creditor +critic +curate +dancer +dancer +darling +date +daughter +dawdler +deacon +deaf person +debtor +deliveryman +descender +designated hitter +detective +detractor +director +disbeliever +dispatcher +distributor +doctor +domestic partner +draftsman +drinker +drinker +drug addict +drug user +drummer +drunkard +eager beaver +earner +eavesdropper +economist +editor +egotist +elder +elected official +emissary +employee +employer +endomorph +enemy +entrant +examiner +exhibitionist +fan +fancier +farmer +farmhand +fascist +father +female aristocrat +female offspring +female child +fielder +fireman +first baseman +first sergeant +flag officer +flatterer +foe +folk dancer +follower +football player +forefather +forger +founder +free agent +friar +monk +gambler +generator +geneticist +genitor +geologist +girl +godchild +godparent +golfer +grandma +grandmaster +grandparent +granter +great grandchild +great grandparent +grouch +guard +guest +guide +gymnast +Gypsy +hack +hairdresser +hater +headmaster +hearer +hedonist +heir +herder +homeless +horseman +host +host +hypocrite +important person +incumbent +infielder +informer +in-law +insurgent +investigator +investor +journalist +judge +juror +Counsel to the Crown +kinswoman +laborer +lama +landowner +lawgiver +lawman +lawyer +liberator +lieutenant +lineman +literate +litigant +Lord +failure +lowerclassman +lumberman +maid +maker +malcontent +martinet +master of ceremonies +masturbator +medical officer +medical practitioner +medical scientist +mender +meteorologist +middle-aged man +miler +military attache +military officer +military policeman +minister +minor leaguer +misfit +mixed-blood +model +moneymaker +mother +mourner +mover +musician +Muslimah +mystic +nanny +neonate +nephew +neutral +newcomer +newcomer +newspaper editor +niece +noncommissioned officer +nurse +observer +occultist +oldster +old woman +opportunist +orator +originator +outfielder +right fielder +right-handed pitcher +painter +panelist +pardoner +parodist +party +passenger +patient +patron +payer +peddler +percussionist +personal representative +personification +pervert +petitioner +Pharaoh +phonetician +physical therapist +physicist +pimp +pisser +pitcher +planner +player +poet +politician +practitioner +prayer +preserver +president +priest +princess +principal +proctor +programmer +promiser +propagandist +prosecutor +psychic +pusher +queen +queen +ranch hand +reader +recruit +recruiter +religious leader +repairman +reporter +representative +reprobate +rescuer +reservist +restrainer +retailer +retiree +revolutionist +rich person +civil authority +runner +running back +rustic +saboteur +sailor +salesman +salesperson +scalper +schemer +scholar +schoolchild +scientist +second baseman +secretary +seeker +selfish person +seller +serf +serviceman +settler +shrew +sibling +sick person +singer +sister +skeptic +skier +sleeper +slob +smith +snoop +social climber +socialist +social scientist +sociologist +soldier +son +songster +sorcerer +sovereign +speaker +specialist +spectator +stand-in +star +stepparent +stock trader +stranger +strategist +student +subordinate +suitor +superior +surgeon +sweetheart +sympathizer +tax assessor +taxonomist +teacher +television reporter +tenant +tenant +tennis player +testator +testee +theologian +therapist +thinker +thrower +toastmaster +trader +traffic cop +trainer +traitor +traveling salesman +tyrant +upstart +upstart +utility man +vacationer +vegetarian +vice president +victim +volunteer +votary +waiter +waitress +wanderer +wanton +washer +white supremacist +wife +winner +winner +woman +workman +worshiper +wright +writer +wilding +bryophyte +liverwort +pteridophyte +fern +fern ally +spore +spermatophyte +perennial +gymnosperm +ephedra +cycad +sago palm +zamia +pine +pinon +nut pine +white pine +yellow pine +larch +fir +silver fir +cedar +spruce +hemlock +douglas fir +cedar +cypress +arborvitae +araucaria +kauri pine +celery pine +yellowwood +gymnospermous yellowwood +yew +angiosperm +dicot +flower +wildflower +inflorescence +pistil +pericarp +oilseed +custard apple +barberry +allspice +laurel +anise tree +magnolia +moonseed +buttercup +aconite +baneberry +anemone +thimbleweed +columbine +clematis +delphinium +nigella +wax myrtle +zebrawood +legume +legume +darling pea +clover +acacia +wattle +albizzia +nitta tree +dogbane +allamanda +carissa +frangipani +rauwolfia +arum +alocasia +anthurium +caladium +monstera +nephthytis +arrow arum +calla lily +duckweed +watermeal +birthwort +sandwort +mouse-ear chickweed +pink +china pink +lychnis +silene +chickweed +fig marigold +amaranth +orach +saltbush +beet +sand verbena +four o'clock +echinocactus +prickly pear +pokeweed +portulaca +flame flower +caper +spiderflower +crucifer +cress +watercress +rock cress +cabbage +head cabbage +turnip plant +mustard +wallflower +woad +stock +radish plant +pennycress +poppy +prickly poppy +composite +compass plant +everlasting +achillea +ageratum +ragweed +ammobium +burdock +artemisia +mugwort +aster +wood aster +common daisy +bur marigold +calendula +thistle +carline thistle +catananche +centaury +knapweed +chrysanthemum +golden aster +goldenbush +plume thistle +woolly thistle +coreopsis +fleabane +woolly sunflower +cotton rose +gazania +African daisy +cudweed +gumweed +goldenbush +sneezeweed +sunflower +hawkweed +marsh elder +krigia +hawkbit +blazing star +rattlesnake root +daisybush +coneflower +coneflower +cutleaved coneflower +golden thistle +white-topped aster +goldenrod +sow thistle +marigold +dandelion +crownbeard +zinnia +achene +campanula +orchid +orchis +arethusa +helleborine +coral root +lady's slipper +large yellow lady's slipper +helleborine +fringed orchis +rein orchid +spider orchid +moth orchid +butterfly orchid +ladies' tresses +vanda +vanilla +yam +primrose +pimpernel +featherfoil +loosestrife +water pimpernel +gramineous plant +grass +wheatgrass +foxtail +broom grass +oat +brome +grama +reed grass +burgrass +crabgrass +lyme grass +wild rye +plume grass +rye grass +ricegrass +meadowgrass +millet +reed +sorghum +grain sorghum +cordgrass +cereal +wheat +corn +mealie +zoysia +bamboo +cotton grass +spike rush +pandanus +cattail +grain +kernel +gourd +gourd +squash +summer squash +marrow +winter squash +turban squash +bryony +sweet melon +luffa +lobelia +mallow +hollyhock +althea +poppy mallow +seashore mallow +globe mallow +tulipwood tree +sterculia +bottle-tree +screw tree +cacao +linden +herb +protea +banksia +grevillea +macadamia +casuarina +beefwood +heath +bearberry +huckleberry +kalmia +rhododendron +cranberry +blueberry +shortia +Australian heath +epacris +wintergreen +pipsissewa +beech +chestnut +tanbark oak +southern beech +New Zealand beech +oak +live oak +white oak +red oak +scrub oak +chestnut oak +birch +alder +hornbeam +hop hornbeam +hazelnut +centaury +gentian +fringed gentian +olive tree +fringe tree +ash +red ash +jasmine +privet +lilac +liquidambar +walnut +hickory +wing nut +loosestrife +myrtle +gum tree +eucalyptus +flooded gum +mallee +stringybark +tupelo +enchanter's nightshade +willowherb +fuchsia +evening primrose +daphne +canna +banana +ginger +begonia +tuberous begonia +poon +St John's wort +rockrose +dipterocarp +candlewood +reseda +viola +violet +nettle +cannabis +mulberry +fig tree +fig +elm +hackberry +iridaceous plant +bearded iris +beardless iris +crocus +amaryllis +blood lily +narcissus +daffodil +liliaceous plant +colicroot +alliaceous plant +kniphofia +poker plant +asphodel +mariposa +globe lily +camas +dogtooth violet +fritillary +tulip +star-of-Bethlehem +grape hyacinth +scilla +false asphodel +bog asphodel +hellebore +death camas +sarsaparilla +Solomon's-seal +bellwort +agave +sansevieria +cassia +locust tree +senna +angelim +milk vetch +wild indigo +pea tree +glory pea +rosewood +blackwood +tick trefoil +coral tree +vetchling +wild pea +lupine +medic +mucuna +locoweed +pole bean +pea +edible-pod pea +quira +hoary pea +bush pea +vetch +palm +sago palm +feather palm +fan palm +palmetto +areca +calamus +oil palm +raffia palm +lady palm +eriogonum +rhubarb +water plantain +waterweed +pondweed +rose +agrimonia +flowering quince +cotoneaster +avens +apple tree +wild apple +crab apple +Iowa crab +cinquefoil +plum +wild plum +bullace +apricot +cherry +wild cherry +sweet cherry +sour cherry +almond tree +almond +bird cherry +flowering cherry +chokecherry +fruit tree +bramble bush +raspberry +mountain ash +service tree +spirea +madderwort +coffee +cinchona +bedstraw +genipa +hamelia +honeysuckle +American fly honeysuckle +teasel +scabious +geranium +cranesbill +storksbill +incense tree +mahogany +silver ash +milkwort +citrus +orange +mandarin +lemon +kumquat +prickly ash +bitterwood tree +ailanthus +nasturtium +willow +osier +sallow +poplar +black poplar +cottonwood +aspen +soapberry +soapberry vine +harpullia +pachysandra +spindle tree +maple +box elder +holly +sumac +horse chestnut +persimmon +buckthorn +styrax +carnivorous plant +pitcher plant +sedum +philadelphus +saxifrage +astilbe +alumroot +miterwort +parnassia +currant +plane tree +phlox +acanthus +catalpa +anchusa +comfrey +convolvulus +bindweed +gloxinia +streptocarpus +waterleaf +nemophila +scorpionweed +giant hyssop +bugle +wood mint +calamint +coleus +dead nettle +origanum +horehound +monarda +savory +germander +thyme +blue curls +snapdragon +kitten-tails +Indian paintbrush +foxglove +toadflax +veronica +nightshade +thorn apple +matrimony vine +cupflower +petunia +salpiglossis +spurge +croton +cassava +slipper spurge +camellia +umbellifer +angelica +astrantia +caraway +fennel +parsnip +parsley +sanicle +dogwood +valerian +bristle fern +flowering fern +climbing fern +clover fern +adder's tongue +grape fern +ergot +sclerotinia +earthball +Podaxaceae +false truffle +rhizopus +slime mold +cellular slime mold +downy mildew +pythium +Sarcosomataceae +club fungus +lichen +lecanora +fungus +basidiomycete +mushroom +mushroom +mushroom +toadstool +horse mushroom +meadow mushroom +royal agaric +false deathcap +fly agaric +death cap +blushing mushroom +destroying angel +chanterelle +floccose chanterelle +pig's ears +cinnabar chanterelle +jack-o-lantern fungus +inky cap +shaggymane +milkcap +fairy-ring mushroom +oyster mushroom +olive-tree agaric +Pholiota astragalina +Pholiota aurea +Pholiota destruens +Pholiota flammans +Pholiota flavida +nameko +Pholiota squarrosa-adiposa +Pholiota squarrosa +Pholiota squarrosoides +Stropharia ambigua +Stropharia hornemannii +Stropharia rugoso-annulata +Entoloma lividum +Entoloma aprile +Chlorophyllum molybdites +lepiota +parasol mushroom +poisonous parasol +Lepiota naucina +Lepiota rhacodes +American parasol +Lepiota rubrotincta +Lepiota clypeolaria +onion stem +blewits +sandy mushroom +Tricholoma pessundatum +Tricholoma sejunctum +man-on-a-horse +Tricholoma venenata +Tricholoma pardinum +Tricholoma vaccinum +Tricholoma aurantium +Pluteus aurantiorugosus +Pluteus magnus +deer mushroom +straw mushroom +Volvariella bombycina +Clitocybe clavipes +Clitocybe dealbata +Clitocybe inornata +Clitocybe robusta +Clitocybe irina +Clitocybe subconnexa +winter mushroom +mycelium +ascomycete +Clavicipitaceae +yeast +discomycete +morel +Verpa +false morel +lorchel +helvella +Gyromitra californica +Gyromitra sphaerospora +Gyromitra esculenta +Gyromitra infula +Gyromitra gigas +gasteromycete +common stinkhorn +Phallus ravenelii +dog stinkhorn +stinky squid +puffball +Geastrum coronatum +Astreus pteridis +Astreus hygrometricus +polypore +Boletus chrysenteron +Boletus edulis +Frost's bolete +Boletus luridus +Boletus mirabilis +Boletus pallidus +Boletus pulcherrimus +Boletus pulverulentus +Boletus roxanae +Boletus subvelutipes +Boletus variipes +Boletus zelleri +Fuscoboletinus paluster +Fuscoboletinus serotinus +Leccinum fibrillosum +Suillus albivelatus +old-man-of-the-woods +Boletellus russellii +jelly fungus +rust +smut +cornsmut +flag smut fungus +waxycap +Hygrocybe acutoconica +Hygrophorus borealis +Hygrophorus caeruleus +Hygrophorus inocybiformis +Hygrophorus kauffmanii +Hygrophorus marzuolus +Hygrophorus purpurascens +Hygrophorus russula +Hygrophorus sordidus +Hygrophorus tennesseensis +Hygrophorus turundus +Neohygrophorus angelesianus +Cortinarius armillatus +Cortinarius atkinsonianus +Cortinarius corrugatus +Cortinarius gentilis +Cortinarius mutabilis +Cortinarius semisanguineus +Cortinarius subfoetidus +Cortinarius violaceus +Gymnopilus spectabilis +Gymnopilus validipes +Gymnopilus ventricosus +mold +mildew +candida +houseplant +succulent +weed +sporophyll +sporangium +poisonous plant +vine +tree +bean tree +gymnospermous tree +conifer +angiospermous tree +nut tree +spice tree +bonsai +subshrub +bramble +liana +desert plant +marsh plant +strangler +root +receptacle +scape +peduncle +flower cluster +raceme +cyme +bulbous plant +fruit +seed +bean +nut +berry +aggregate fruit +drupe +drupelet +pome +pod +husk +buckthorn +vinifera +true pepper +peperomia +bract +palmate leaf +pinnate leaf +dentate leaf +branchlet +polypody +strap fern +staghorn fern +spleenwort +chain fern +davallia +hare's-foot fern +shield fern +wood fern +lady fern +bladder fern +holly fern +woodsia +maidenhair +brittle maidenhair +lip fern +cliff brake +horsetail +club moss +spikemoss +beech fern +shoestring fungus +Armillaria caligata +Armillaria ponderosa +Armillaria zelleri +honey mushroom +milkweed +stapelia +stephanotis +orangery +figure +plane figure +solid figure +line +convex shape +concave shape +cylinder +round shape +polygon +concave polygon +amorphous shape +closed curve +simple closed curve +cone +circle +ring +loop +ellipse +triangle +spherical polygon +angular distance +groove +bulge +bow +balance +toroid +boundary +incisure +notch +wrinkle +tree +regular polyhedron +carbon +rock +soil +high explosive +culture medium +agar +paper +paving +plaster +stucco +tear gas +vitamin +fat-soluble vitamin +water-soluble vitamin +vitamin A +B-complex vitamin +vitamin E +vitamin K \ No newline at end of file diff --git a/workloads/realworld/standard/darknet/cfg/imagenet1k.data b/workloads/realworld/standard/darknet/cfg/imagenet1k.data new file mode 100644 index 0000000000000000000000000000000000000000..daf120a3c020003e8ed08096c51304272ca3ba27 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/imagenet1k.data @@ -0,0 +1,9 @@ +classes=1000 +train = /data/darknet/imagenet_mini/valid.list +valid = /data/darknet/imagenet_mini/valid.list +test = /data/darknet/imagenet_mini/valid.list +backup = /data/darknet/backup/ +labels = /data/darknet/imagenet_mini/imagenet.labels.list +names = /data/darknet/imagenet_mini/imagenet.shortnames.list +top=5 + diff --git a/workloads/realworld/standard/darknet/cfg/imagenet22k.dataset b/workloads/realworld/standard/darknet/cfg/imagenet22k.dataset new file mode 100644 index 0000000000000000000000000000000000000000..e25ef007ecceb096e5846ee7cacd1fd54fb8f9e4 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/imagenet22k.dataset @@ -0,0 +1,9 @@ +classes=21842 +train = /data/imagenet/imagenet22k.train.list +valid = /data/imagenet/imagenet22k.valid.list +#valid = /data/imagenet/imagenet1k.valid.list +backup = /home/pjreddie/backup/ +labels = data/imagenet.labels.list +names = data/imagenet.shortnames.list +top = 5 + diff --git a/workloads/realworld/standard/darknet/cfg/imagenet9k.hierarchy.dataset b/workloads/realworld/standard/darknet/cfg/imagenet9k.hierarchy.dataset new file mode 100644 index 0000000000000000000000000000000000000000..41fb71b065544b919bc8ed7d723afb5d04ad85ac --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/imagenet9k.hierarchy.dataset @@ -0,0 +1,9 @@ +classes=9418 +train = data/9k.train.list +valid = /data/imagenet/imagenet1k.valid.list +leaves = data/imagenet1k.labels +backup = /home/pjreddie/backup/ +labels = data/9k.labels +names = data/9k.names +top=5 + diff --git a/workloads/realworld/standard/darknet/cfg/jnet-conv.cfg b/workloads/realworld/standard/darknet/cfg/jnet-conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..056f82aa6e2a0710a664c4740ab763961e4de33d --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/jnet-conv.cfg @@ -0,0 +1,118 @@ +[net] +batch=1 +subdivisions=1 +height=10 +width=10 +channels=3 +learning_rate=0.01 +momentum=0.9 +decay=0.0005 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + diff --git a/workloads/realworld/standard/darknet/cfg/openimages.data b/workloads/realworld/standard/darknet/cfg/openimages.data new file mode 100644 index 0000000000000000000000000000000000000000..fa80e5ab7d8576d391c7cac9dfc8367aab704139 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/openimages.data @@ -0,0 +1,8 @@ +classes= 601 +train = /home/pjreddie/data/openimsv4/openimages.train.list +#valid = coco_testdev +valid = data/coco_val_5k.list +names = data/openimages.names +backup = /home/pjreddie/backup/ +eval=coco + diff --git a/workloads/realworld/standard/darknet/cfg/resnet101.cfg b/workloads/realworld/standard/darknet/cfg/resnet101.cfg new file mode 100644 index 0000000000000000000000000000000000000000..de458820bcd35f5e65d858f9f661e42653ed0184 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/resnet101.cfg @@ -0,0 +1,990 @@ +[net] +# Training +# batch=128 +# subdivisions=2 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + +[cost] +type=sse + diff --git a/workloads/realworld/standard/darknet/cfg/resnet152.cfg b/workloads/realworld/standard/darknet/cfg/resnet152.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e8e3297ac2364b95f28fa0a0bdd4ca71f14ac82c --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/resnet152.cfg @@ -0,0 +1,1460 @@ +[net] +# Training +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/standard/darknet/cfg/resnet18.cfg b/workloads/realworld/standard/darknet/cfg/resnet18.cfg new file mode 100644 index 0000000000000000000000000000000000000000..275f4bdb5962d77c16f353cd3d2751e189b9344c --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/resnet18.cfg @@ -0,0 +1,228 @@ +[net] +# Training +# batch=128 +# subdivisions=1 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/standard/darknet/cfg/resnet18_b.cfg b/workloads/realworld/standard/darknet/cfg/resnet18_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c0712e9cdfaf1f28dbe3a358355405d2758e6d2b --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/resnet18_b.cfg @@ -0,0 +1,228 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/standard/darknet/cfg/resnet18_t.cfg b/workloads/realworld/standard/darknet/cfg/resnet18_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c0712e9cdfaf1f28dbe3a358355405d2758e6d2b --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/resnet18_t.cfg @@ -0,0 +1,228 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/standard/darknet/cfg/resnet34.cfg b/workloads/realworld/standard/darknet/cfg/resnet34.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9f68f096741ae3b4898f40b76af7569d4697729f --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/resnet34.cfg @@ -0,0 +1,392 @@ +[net] +# Training +# batch=128 +# subdivisions=2 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/standard/darknet/cfg/resnet50.cfg b/workloads/realworld/standard/darknet/cfg/resnet50.cfg new file mode 100644 index 0000000000000000000000000000000000000000..d0d7c511516e997a392bb5ba77682740c0494972 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/resnet50.cfg @@ -0,0 +1,510 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/standard/darknet/cfg/resnet50_b.cfg b/workloads/realworld/standard/darknet/cfg/resnet50_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..462479918fd608c4a0761b84c7299e51081193c6 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/resnet50_b.cfg @@ -0,0 +1,510 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/standard/darknet/cfg/resnet50_t.cfg b/workloads/realworld/standard/darknet/cfg/resnet50_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..462479918fd608c4a0761b84c7299e51081193c6 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/resnet50_t.cfg @@ -0,0 +1,510 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/standard/darknet/cfg/resnext101-32x4d.cfg b/workloads/realworld/standard/darknet/cfg/resnext101-32x4d.cfg new file mode 100644 index 0000000000000000000000000000000000000000..8538ccc3daee2e3de589eb4e2edf868340d4924b --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/resnext101-32x4d.cfg @@ -0,0 +1,1053 @@ +[net] +# Training +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/standard/darknet/cfg/resnext152-32x4d.cfg b/workloads/realworld/standard/darknet/cfg/resnext152-32x4d.cfg new file mode 100644 index 0000000000000000000000000000000000000000..48279fd28eb0dbe23c7b0c593f67051cb6a62374 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/resnext152-32x4d.cfg @@ -0,0 +1,1562 @@ +[net] +# Training +# batch=128 +# subdivisions=16 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/standard/darknet/cfg/resnext50.cfg b/workloads/realworld/standard/darknet/cfg/resnext50.cfg new file mode 100644 index 0000000000000000000000000000000000000000..12aebdf6fbd48bde40ee22c4257e06f2e0cf46eb --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/resnext50.cfg @@ -0,0 +1,523 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/standard/darknet/cfg/rnn.cfg b/workloads/realworld/standard/darknet/cfg/rnn.cfg new file mode 100644 index 0000000000000000000000000000000000000000..61b202f3a441b701f76d9b007c6276467c639e11 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/rnn.cfg @@ -0,0 +1,38 @@ +[net] +subdivisions=1 +inputs=256 +batch = 1 +momentum=0.9 +decay=0.001 +max_batches = 2000 +time_steps=1 +learning_rate=0.1 +policy=steps +steps=1000,1500 +scales=.1,.1 + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[connected] +output=256 +activation=leaky + +[softmax] + + diff --git a/workloads/realworld/standard/darknet/cfg/rnn.train.cfg b/workloads/realworld/standard/darknet/cfg/rnn.train.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b9748990aceaa85cc2e43358073114606725dcbd --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/rnn.train.cfg @@ -0,0 +1,38 @@ +[net] +subdivisions=1 +inputs=256 +batch = 128 +momentum=0.9 +decay=0.001 +max_batches = 2000 +time_steps=576 +learning_rate=0.1 +policy=steps +steps=1000,1500 +scales=.1,.1 + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[connected] +output=256 +activation=leaky + +[softmax] + + diff --git a/workloads/realworld/standard/darknet/cfg/strided.cfg b/workloads/realworld/standard/darknet/cfg/strided.cfg new file mode 100644 index 0000000000000000000000000000000000000000..2f745085adc268a3e99bd7895bd4dda28227bffd --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/strided.cfg @@ -0,0 +1,182 @@ +[net] +batch=128 +subdivisions=4 +height=256 +width=256 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +policy=steps +scales=.1,.1,.1 +steps=200000,300000,400000 +max_batches=800000 + + +[crop] +crop_height=224 +crop_width=224 +flip=1 +angle=0 +saturation=1 +exposure=1 +shift=.2 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=192 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=1024 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=ramp + +[maxpool] +size=3 +stride=2 + +[connected] +output=4096 +activation=ramp + +[dropout] +probability=0.5 + +[connected] +output=1000 +activation=ramp + +[softmax] + diff --git a/workloads/realworld/standard/darknet/cfg/t1.test.cfg b/workloads/realworld/standard/darknet/cfg/t1.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b3628114e048dc78f4797342afd95a757c81c977 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/t1.test.cfg @@ -0,0 +1,117 @@ +[net] +batch=1 +subdivisions=1 +height=224 +width=224 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,20000,30000 +scales=2.5,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[connected] +output= 1470 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/standard/darknet/cfg/tiny.cfg b/workloads/realworld/standard/darknet/cfg/tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..f97327cfceebf868998bf2bb16224bd0994ebd82 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/tiny.cfg @@ -0,0 +1,174 @@ +[net] +# Train +batch=128 +subdivisions=1 +# Test +# batch=1 +# subdivisions=1 +height=224 +width=224 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=320 + +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + + diff --git a/workloads/realworld/standard/darknet/cfg/vgg-16.cfg b/workloads/realworld/standard/darknet/cfg/vgg-16.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c73b17b6ccfdcc9cae9b67591b662571463569ab --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/vgg-16.cfg @@ -0,0 +1,157 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +learning_rate=0.00001 +momentum=0.9 +decay=0.0005 + +[crop] +crop_height=224 +crop_width=224 +flip=1 +exposure=1 +saturation=1 +angle=0 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=1000 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/standard/darknet/cfg/vgg-conv.cfg b/workloads/realworld/standard/darknet/cfg/vgg-conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..21e1d724c9418107f9cf82f9bffb9ae64d3e2084 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/vgg-conv.cfg @@ -0,0 +1,121 @@ +[net] +batch=1 +subdivisions=1 +width=224 +height=224 +channels=3 +learning_rate=0.00001 +momentum=0.9 +decay=0.0005 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + diff --git a/workloads/realworld/standard/darknet/cfg/voc.data b/workloads/realworld/standard/darknet/cfg/voc.data new file mode 100644 index 0000000000000000000000000000000000000000..7807b5d2a8fd0f855a8c68e82c064dc320551da1 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/voc.data @@ -0,0 +1,6 @@ +classes= 20 +train = /home/pjreddie/data/voc/train.txt +valid = /home/pjreddie/data/voc/2007_test.txt +names = data/voc.names +backup = backup + diff --git a/workloads/realworld/standard/darknet/cfg/writing.cfg b/workloads/realworld/standard/darknet/cfg/writing.cfg new file mode 100644 index 0000000000000000000000000000000000000000..1ed899bcd63d6354e8320ace7e7f513ba1174886 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/writing.cfg @@ -0,0 +1,41 @@ +[net] +batch=128 +subdivisions=2 +height=256 +width=256 +channels=3 +learning_rate=0.00000001 +momentum=0.9 +decay=0.0005 +seen=0 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1 +size=3 +stride=1 +pad=1 +activation=logistic + +[cost] + diff --git a/workloads/realworld/standard/darknet/cfg/yolo9000.cfg b/workloads/realworld/standard/darknet/cfg/yolo9000.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e745f78a6e37611fb0f13c2d848c292cea1a89d3 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/yolo9000.cfg @@ -0,0 +1,218 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +batch=1 +subdivisions=1 +height=544 +width=544 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +hue=.1 +saturation=.75 +exposure=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=28269 +size=1 +stride=1 +pad=1 +activation=linear + +[region] +anchors = 0.77871, 1.14074, 3.00525, 4.31277, 9.22725, 9.61974 +bias_match=1 +classes=9418 +coords=4 +num=3 +softmax=1 +jitter=.2 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +thresh = .6 +absolute=1 +random=1 + +tree=data/9k.tree +map = data/coco9k.map diff --git a/workloads/realworld/standard/darknet/cfg/yolov1-tiny.cfg b/workloads/realworld/standard/darknet/cfg/yolov1-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a5e7b4920289ccb507a3a0356a33362bc7633581 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/yolov1-tiny.cfg @@ -0,0 +1,130 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 + +saturation=.75 +exposure=.75 +hue = .1 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,800,20000,30000 +scales=2.5,2,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[connected] +output= 1470 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/standard/darknet/cfg/yolov1.cfg b/workloads/realworld/standard/darknet/cfg/yolov1.cfg new file mode 100644 index 0000000000000000000000000000000000000000..06cf6e676170e41d24e63ec08d7b27a31c411718 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/yolov1.cfg @@ -0,0 +1,261 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 +saturation=1.5 +exposure=1.5 +hue=.1 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,20000,30000 +scales=2.5,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[local] +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[dropout] +probability=.5 + +[connected] +output= 1715 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=3 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/standard/darknet/cfg/yolov2-tiny-voc.cfg b/workloads/realworld/standard/darknet/cfg/yolov2-tiny-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c4c127cdd352bd98b3b7a3336d5c3b2efc6fadcd --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/yolov2-tiny-voc.cfg @@ -0,0 +1,138 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +max_batches = 40200 +policy=steps +steps=-1,100,20000,30000 +scales=.1,10,.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=125 +activation=linear + +[region] +anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 +bias_match=1 +classes=20 +coords=4 +num=5 +softmax=1 +jitter=.2 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/standard/darknet/cfg/yolov2-tiny.cfg b/workloads/realworld/standard/darknet/cfg/yolov2-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..81d0ac45d6dca10f50875bfe85f7496ded8e0f63 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/yolov2-tiny.cfg @@ -0,0 +1,139 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=425 +activation=linear + +[region] +anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 +bias_match=1 +classes=80 +coords=4 +num=5 +softmax=1 +jitter=.2 +rescore=0 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/standard/darknet/cfg/yolov2-voc.cfg b/workloads/realworld/standard/darknet/cfg/yolov2-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..dbf2de281c1200cb4889409c616e775080823268 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/yolov2-voc.cfg @@ -0,0 +1,258 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=416 +width=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 80200 +policy=steps +steps=40000,60000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[route] +layers=-9 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=64 +activation=leaky + +[reorg] +stride=2 + +[route] +layers=-1,-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=125 +activation=linear + + +[region] +anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 +bias_match=1 +classes=20 +coords=4 +num=5 +softmax=1 +jitter=.3 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/standard/darknet/cfg/yolov2.cfg b/workloads/realworld/standard/darknet/cfg/yolov2.cfg new file mode 100644 index 0000000000000000000000000000000000000000..088edf81573e83c59edd7137cbc07b6fe1433591 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/yolov2.cfg @@ -0,0 +1,258 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[route] +layers=-9 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=64 +activation=leaky + +[reorg] +stride=2 + +[route] +layers=-1,-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=425 +activation=linear + + +[region] +anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 +bias_match=1 +classes=80 +coords=4 +num=5 +softmax=1 +jitter=.3 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/standard/darknet/cfg/yolov3-openimages.cfg b/workloads/realworld/standard/darknet/cfg/yolov3-openimages.cfg new file mode 100644 index 0000000000000000000000000000000000000000..65d241a74c4c4995dbd997b1750575a83b0a17d4 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/yolov3-openimages.cfg @@ -0,0 +1,789 @@ +[net] +# Testing + batch=1 + subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=5000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/standard/darknet/cfg/yolov3-spp.cfg b/workloads/realworld/standard/darknet/cfg/yolov3-spp.cfg new file mode 100644 index 0000000000000000000000000000000000000000..4ad2a052d88328a79cff5686ff4dd1df6993a2fd --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/yolov3-spp.cfg @@ -0,0 +1,822 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 + +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/standard/darknet/cfg/yolov3-tiny.cfg b/workloads/realworld/standard/darknet/cfg/yolov3-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..cfca3cfa6415b7b61eae238aa71107dedbe5d607 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/yolov3-tiny.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/standard/darknet/cfg/yolov3-tiny_b.cfg b/workloads/realworld/standard/darknet/cfg/yolov3-tiny_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..0d8ded69bcf909f4cca02ab202cc99b1800a1562 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/yolov3-tiny_b.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/standard/darknet/cfg/yolov3-tiny_t.cfg b/workloads/realworld/standard/darknet/cfg/yolov3-tiny_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..0d8ded69bcf909f4cca02ab202cc99b1800a1562 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/yolov3-tiny_t.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/standard/darknet/cfg/yolov3-voc.cfg b/workloads/realworld/standard/darknet/cfg/yolov3-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..3f3e8dfb31b7103cf7ca00cd0bef83d6d426bb8d --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/yolov3-voc.cfg @@ -0,0 +1,785 @@ +[net] +# Testing + batch=1 + subdivisions=1 +# Training +# batch=64 +# subdivisions=16 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 50200 +policy=steps +steps=40000,45000 +scales=.1,.1 + + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/standard/darknet/cfg/yolov3.cfg b/workloads/realworld/standard/darknet/cfg/yolov3.cfg new file mode 100644 index 0000000000000000000000000000000000000000..938ffff23f106d65290faae217f6a9b0a715c023 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/yolov3.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/standard/darknet/cfg/yolov3_b.cfg b/workloads/realworld/standard/darknet/cfg/yolov3_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a86d73b42225ef65d8ff8692b0d72827ba3a8484 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/yolov3_b.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/standard/darknet/cfg/yolov3_t.cfg b/workloads/realworld/standard/darknet/cfg/yolov3_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a86d73b42225ef65d8ff8692b0d72827ba3a8484 --- /dev/null +++ b/workloads/realworld/standard/darknet/cfg/yolov3_t.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/standard/darknet/examples/art.c b/workloads/realworld/standard/darknet/examples/art.c new file mode 100644 index 0000000000000000000000000000000000000000..932688e7b9ecbfd1a359a5d373dddf52815da9bb --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/art.c @@ -0,0 +1,59 @@ +#include "darknet.h" + +#include + +void demo_art(char *cfgfile, char *weightfile, int cam_index) +{ +#ifdef OPENCV + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + + void * cap = open_video_stream(0, cam_index, 0,0,0); + + char *window = "ArtJudgementBot9000!!!"; + if(!cap) error("Couldn't connect to webcam.\n"); + int i; + int idx[] = {37, 401, 434}; + int n = sizeof(idx)/sizeof(idx[0]); + + while(1){ + image in = get_image_from_stream(cap); + image in_s = resize_image(in, net->w, net->h); + + float *p = network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + + float score = 0; + for(i = 0; i < n; ++i){ + float s = p[idx[i]]; + if (s > score) score = s; + } + score = score; + printf("I APPRECIATE THIS ARTWORK: %10.7f%%\n", score*100); + printf("["); + int upper = 30; + for(i = 0; i < upper; ++i){ + printf("%c", ((i+.5) < score*upper) ? 219 : ' '); + } + printf("]\n"); + + show_image(in, window, 1); + free_image(in_s); + free_image(in); + } +#endif +} + + +void run_art(int argc, char **argv) +{ + int cam_index = find_int_arg(argc, argv, "-c", 0); + char *cfg = argv[2]; + char *weights = argv[3]; + demo_art(cfg, weights, cam_index); +} + diff --git a/workloads/realworld/standard/darknet/examples/attention.c b/workloads/realworld/standard/darknet/examples/attention.c new file mode 100644 index 0000000000000000000000000000000000000000..cd1e579d375be8ffed5620c70180f0a59a927159 --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/attention.c @@ -0,0 +1,459 @@ +#include "darknet.h" + +#include +#include + +void extend_data_truth(data *d, int n, float val) +{ + int i, j; + for(i = 0; i < d->y.rows; ++i){ + d->y.vals[i] = realloc(d->y.vals[i], (d->y.cols+n)*sizeof(float)); + for(j = 0; j < n; ++j){ + d->y.vals[i][d->y.cols + j] = val; + } + } + d->y.cols += n; +} + +matrix network_loss_data(network *net, data test) +{ + int i,b; + int k = 1; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.cols, sizeof(float)); + float *y = calloc(net->batch*test.y.cols, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + memcpy(y+b*test.y.cols, test.y.vals[i+b], test.y.cols*sizeof(float)); + } + + network orig = *net; + net->input = X; + net->truth = y; + net->train = 0; + net->delta = 0; + forward_network(net); + *net = orig; + + float *delta = net->layers[net->n-1].output; + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + int t = max_index(y + b*test.y.cols, 1000); + float err = sum_array(delta + b*net->outputs, net->outputs); + pred.vals[i+b][0] = -err; + //pred.vals[i+b][0] = 1-delta[b*net->outputs + t]; + } + } + free(X); + free(y); + return pred; +} + +void train_attention(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i, j; + + float avg_cls_loss = -1; + float avg_att_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *train_list = option_find_str(options, "train", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + + char **labels = get_labels(label_list); + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + double time; + + int divs=3; + int size=2; + + load_args args = {0}; + args.w = divs*net->w/size; + args.h = divs*net->h/size; + args.size = divs*net->w/size; + args.threads = 32; + args.hierarchy = net->hierarchy; + + args.min = net->min_ratio*args.w; + args.max = net->max_ratio*args.w; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + args.paths = paths; + args.classes = classes; + args.n = imgs; + args.m = N; + args.labels = labels; + args.type = CLASSIFICATION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + data resized = resize_data(train, net->w, net->h); + extend_data_truth(&resized, divs*divs, 0); + data *tiles = tile_data(train, divs, size); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float aloss = 0; + float closs = 0; + int z; + for (i = 0; i < divs*divs/ngpus; ++i) { +#pragma omp parallel for + for(j = 0; j < ngpus; ++j){ + int index = i*ngpus + j; + extend_data_truth(tiles+index, divs*divs, SECRET_NUM); + matrix deltas = network_loss_data(nets[j], tiles[index]); + for(z = 0; z < resized.y.rows; ++z){ + resized.y.vals[z][train.y.cols + index] = deltas.vals[z][0]; + } + free_matrix(deltas); + } + } + int *inds = calloc(resized.y.rows, sizeof(int)); + for(z = 0; z < resized.y.rows; ++z){ + int index = max_index(resized.y.vals[z] + train.y.cols, divs*divs); + inds[z] = index; + for(i = 0; i < divs*divs; ++i){ + resized.y.vals[z][train.y.cols + i] = (i == index)? 1 : 0; + } + } + data best = select_data(tiles, inds); + free(inds); + #ifdef GPU + if (ngpus == 1) { + closs = train_network(net, best); + } else { + closs = train_networks(nets, ngpus, best, 4); + } + #endif + for (i = 0; i < divs*divs; ++i) { + printf("%.2f ", resized.y.vals[0][train.y.cols + i]); + if((i+1)%divs == 0) printf("\n"); + free_data(tiles[i]); + } + free_data(best); + printf("\n"); + image im = float_to_image(64,64,3,resized.X.vals[0]); + //show_image(im, "orig"); + //cvWaitKey(100); + /* + image im1 = float_to_image(64,64,3,tiles[i].X.vals[0]); + image im2 = float_to_image(64,64,3,resized.X.vals[0]); + show_image(im1, "tile"); + show_image(im2, "res"); + */ +#ifdef GPU + if (ngpus == 1) { + aloss = train_network(net, resized); + } else { + aloss = train_networks(nets, ngpus, resized, 4); + } +#endif + for(i = 0; i < divs*divs; ++i){ + printf("%f ", nets[0]->output[1000 + i]); + if ((i+1) % divs == 0) printf("\n"); + } + printf("\n"); + + free_data(resized); + free_data(train); + if(avg_cls_loss == -1) avg_cls_loss = closs; + if(avg_att_loss == -1) avg_att_loss = aloss; + avg_cls_loss = avg_cls_loss*.9 + closs*.1; + avg_att_loss = avg_att_loss*.9 + aloss*.1; + + printf("%ld, %.3f: Att: %f, %f avg, Class: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, aloss, avg_att_loss, closs, avg_cls_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + pthread_join(load_thread, 0); + + free_network(net); + free_ptrs((void**)labels, classes); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_attention_single(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *leaf_list = option_find_str(options, "leaves", 0); + if(leaf_list) change_leaves(net->hierarchy, leaf_list); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + int divs = 4; + int size = 2; + int extra = 0; + float *avgs = calloc(classes, sizeof(float)); + int *inds = calloc(divs*divs, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w*divs/size); + image crop = crop_image(resized, (resized.w - net->w*divs/size)/2, (resized.h - net->h*divs/size)/2, net->w*divs/size, net->h*divs/size); + image rcrop = resize_image(crop, net->w, net->h); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, rcrop.data); + //pred[classes + 56] = 0; + for(j = 0; j < divs*divs; ++j){ + printf("%.2f ", pred[classes + j]); + if((j+1)%divs == 0) printf("\n"); + } + printf("\n"); + copy_cpu(classes, pred, 1, avgs, 1); + top_k(pred + classes, divs*divs, divs*divs, inds); + show_image(crop, "crop"); + for(j = 0; j < extra; ++j){ + int index = inds[j]; + int row = index / divs; + int col = index % divs; + int y = row * crop.h / divs - (net->h - crop.h/divs)/2; + int x = col * crop.w / divs - (net->w - crop.w/divs)/2; + printf("%d %d %d %d\n", row, col, y, x); + image tile = crop_image(crop, x, y, net->w, net->h); + float *pred = network_predict(net, tile.data); + axpy_cpu(classes, 1., pred, 1, avgs, 1); + show_image(tile, "tile"); + //cvWaitKey(10); + } + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + if(rcrop.data != resized.data) free_image(rcrop); + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_attention_multi(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + int scales[] = {224, 288, 320, 352, 384}; + int nscales = sizeof(scales)/sizeof(scales[0]); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + float *pred = calloc(classes, sizeof(float)); + image im = load_image_color(paths[i], 0, 0); + for(j = 0; j < nscales; ++j){ + image r = resize_min(im, scales[j]); + resize_network(net, r.w, r.h); + float *p = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + flip_image(r); + p = network_predict(net, r.data); + axpy_cpu(classes, 1, p, 1, pred, 1); + if(r.data != im.data) free_image(r); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void predict_attention(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + if(top == 0) top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = letterbox_image(im, net->w, net->h); + //resize_network(&net, r.w, r.h); + //printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + float *predictions = network_predict(net, X); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root"); + //else printf("%s: %f\n",names[index], predictions[index]); + printf("%5.2f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void run_attention(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int ngpus; + int *gpus = read_intlist(gpu_list, &ngpus, gpu_index); + + + int top = find_int_arg(argc, argv, "-t", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + char *layer_s = (argc > 7) ? argv[7]: 0; + if(0==strcmp(argv[2], "predict")) predict_attention(data, cfg, weights, filename, top); + else if(0==strcmp(argv[2], "train")) train_attention(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) validate_attention_single(data, cfg, weights); + else if(0==strcmp(argv[2], "validmulti")) validate_attention_multi(data, cfg, weights); +} + + diff --git a/workloads/realworld/standard/darknet/examples/captcha.c b/workloads/realworld/standard/darknet/examples/captcha.c new file mode 100644 index 0000000000000000000000000000000000000000..41d6d07c30801b35da34c05984be488e6f6767e9 --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/captcha.c @@ -0,0 +1,353 @@ +#include "darknet.h" + +void fix_data_captcha(data d, int mask) +{ + matrix labels = d.y; + int i, j; + for(i = 0; i < d.y.rows; ++i){ + for(j = 0; j < d.y.cols; j += 2){ + if (mask){ + if(!labels.vals[i][j]){ + labels.vals[i][j] = SECRET_NUM; + labels.vals[i][j+1] = SECRET_NUM; + }else if(labels.vals[i][j+1]){ + labels.vals[i][j] = 0; + } + } else{ + if (labels.vals[i][j]) { + labels.vals[i][j+1] = 0; + } else { + labels.vals[i][j+1] = 1; + } + } + } + } +} + +void train_captcha(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + int i = *net->seen/imgs; + int solved = 1; + list *plist; + char **labels = get_labels("/data/captcha/reimgs.labels.list"); + if (solved){ + plist = get_paths("/data/captcha/reimgs.solved.list"); + }else{ + plist = get_paths("/data/captcha/reimgs.raw.list"); + } + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = 26; + args.n = imgs; + args.m = plist->size; + args.labels = labels; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + + load_thread = load_data_in_thread(args); + while(1){ + ++i; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + fix_data_captcha(train, solved); + + /* + image im = float_to_image(256, 256, 3, train.X.vals[114]); + show_image(im, "training"); + cvWaitKey(0); + */ + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net->seen); + free_data(train); + if(i%100==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } + } +} + +void test_captcha(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + int i = 0; + char **names = get_labels("/data/captcha/reimgs.labels.list"); + char buff[256]; + char *input = buff; + int indexes[26]; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + //printf("Enter Image Path: "); + //fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, net->w, net->h); + float *X = im.data; + float *predictions = network_predict(net, X); + top_predictions(net, 26, indexes); + //printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < 26; ++i){ + int index = indexes[i]; + if(i != 0) printf(", "); + printf("%s %f", names[index], predictions[index]); + } + printf("\n"); + fflush(stdout); + free_image(im); + if (filename) break; + } +} + +void valid_captcha(char *cfgfile, char *weightfile, char *filename) +{ + char **labels = get_labels("/data/captcha/reimgs.labels.list"); + network *net = load_network(cfgfile, weightfile, 0); + list *plist = get_paths("/data/captcha/reimgs.fg.list"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + int outputs = net->outputs; + + set_batch_network(net, 1); + srand(2222222); + int i, j; + for(i = 0; i < N; ++i){ + if (i%100 == 0) fprintf(stderr, "%d\n", i); + image im = load_image_color(paths[i], net->w, net->h); + float *X = im.data; + float *predictions = network_predict(net, X); + //printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + int truth = -1; + for(j = 0; j < 13; ++j){ + if (strstr(paths[i], labels[j])) truth = j; + } + if (truth == -1){ + fprintf(stderr, "bad: %s\n", paths[i]); + return; + } + printf("%d, ", truth); + for(j = 0; j < outputs; ++j){ + if (j != 0) printf(", "); + printf("%f", predictions[j]); + } + printf("\n"); + fflush(stdout); + free_image(im); + if (filename) break; + } +} + +/* + void train_captcha(char *cfgfile, char *weightfile) + { + float avg_loss = -1; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + int i = net->seen/imgs; + list *plist = get_paths("/data/captcha/train.auto5"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + while(1){ + ++i; + time=clock(); + data train = load_data_captcha(paths, imgs, plist->size, 10, 200, 60); + translate_data_rows(train, -128); + scale_data_rows(train, 1./128); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + net->seen += imgs; + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net->seen); + free_data(train); + if(i%10==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } + } + } + + void decode_captcha(char *cfgfile, char *weightfile) + { + setbuf(stdout, NULL); + srand(time(0)); + network net = parse_network_cfg(cfgfile); + set_batch_network(&net, 1); + if(weightfile){ + load_weights(&net, weightfile); + } + char filename[256]; + while(1){ + printf("Enter filename: "); + fgets(filename, 256, stdin); + strtok(filename, "\n"); + image im = load_image_color(filename, 300, 57); + scale_image(im, 1./255.); + float *X = im.data; + float *predictions = network_predict(net, X); + image out = float_to_image(300, 57, 1, predictions); + show_image(out, "decoded"); +#ifdef OPENCV +cvWaitKey(0); +#endif +free_image(im); +} +} + +void encode_captcha(char *cfgfile, char *weightfile) +{ +float avg_loss = -1; +srand(time(0)); +char *base = basecfg(cfgfile); +printf("%s\n", base); +network net = parse_network_cfg(cfgfile); +if(weightfile){ + load_weights(&net, weightfile); +} +printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); +int imgs = 1024; +int i = net->seen/imgs; +list *plist = get_paths("/data/captcha/encode.list"); +char **paths = (char **)list_to_array(plist); +printf("%d\n", plist->size); +clock_t time; +while(1){ + ++i; + time=clock(); + data train = load_data_captcha_encode(paths, imgs, plist->size, 300, 57); + scale_data_rows(train, 1./255); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + net->seen += imgs; + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net->seen); + free_matrix(train.X); + if(i%100==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } +} +} + +void validate_captcha(char *cfgfile, char *weightfile) +{ + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + int numchars = 37; + list *plist = get_paths("/data/captcha/solved.hard"); + char **paths = (char **)list_to_array(plist); + int imgs = plist->size; + data valid = load_data_captcha(paths, imgs, 0, 10, 200, 60); + translate_data_rows(valid, -128); + scale_data_rows(valid, 1./128); + matrix pred = network_predict_data(net, valid); + int i, k; + int correct = 0; + int total = 0; + int accuracy = 0; + for(i = 0; i < imgs; ++i){ + int allcorrect = 1; + for(k = 0; k < 10; ++k){ + char truth = int_to_alphanum(max_index(valid.y.vals[i]+k*numchars, numchars)); + char prediction = int_to_alphanum(max_index(pred.vals[i]+k*numchars, numchars)); + if (truth != prediction) allcorrect=0; + if (truth != '.' && truth == prediction) ++correct; + if (truth != '.' || truth != prediction) ++total; + } + accuracy += allcorrect; + } + printf("Word Accuracy: %f, Char Accuracy %f\n", (float)accuracy/imgs, (float)correct/total); + free_data(valid); +} + +void test_captcha(char *cfgfile, char *weightfile) +{ + setbuf(stdout, NULL); + srand(time(0)); + //char *base = basecfg(cfgfile); + //printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + set_batch_network(&net, 1); + if(weightfile){ + load_weights(&net, weightfile); + } + char filename[256]; + while(1){ + //printf("Enter filename: "); + fgets(filename, 256, stdin); + strtok(filename, "\n"); + image im = load_image_color(filename, 200, 60); + translate_image(im, -128); + scale_image(im, 1/128.); + float *X = im.data; + float *predictions = network_predict(net, X); + print_letters(predictions, 10); + free_image(im); + } +} + */ +void run_captcha(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_captcha(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights, filename); + else if(0==strcmp(argv[2], "valid")) valid_captcha(cfg, weights, filename); + //if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "encode")) encode_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "decode")) decode_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "valid")) validate_captcha(cfg, weights); +} + diff --git a/workloads/realworld/standard/darknet/examples/cifar.c b/workloads/realworld/standard/darknet/examples/cifar.c new file mode 100644 index 0000000000000000000000000000000000000000..a5f5f240b9f680acd9b5890042300d3b683e0f82 --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/cifar.c @@ -0,0 +1,251 @@ +#include "darknet.h" + +void train_cifar(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + int classes = 10; + int N = 50000; + + char **labels = get_labels("data/cifar/labels.txt"); + int epoch = (*net->seen)/N; + data train = load_all_cifar10(); + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + clock_t time=clock(); + + float loss = train_network_sgd(net, train, 1); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)labels, classes); + free(base); + free_data(train); +} + +void train_cifar_distill(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + int classes = 10; + int N = 50000; + + char **labels = get_labels("data/cifar/labels.txt"); + int epoch = (*net->seen)/N; + + data train = load_all_cifar10(); + matrix soft = csv_to_matrix("results/ensemble.csv"); + + float weight = .9; + scale_matrix(soft, weight); + scale_matrix(train.y, 1. - weight); + matrix_add_matrix(soft, train.y); + + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + clock_t time=clock(); + + float loss = train_network_sgd(net, train, 1); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)labels, classes); + free(base); + free_data(train); +} + +void test_cifar_multi(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + float avg_acc = 0; + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + + float pred[10] = {0}; + + float *p = network_predict(net, im.data); + axpy_cpu(10, 1, p, 1, pred, 1); + flip_image(im); + p = network_predict(net, im.data); + axpy_cpu(10, 1, p, 1, pred, 1); + + int index = max_index(pred, 10); + int class = max_index(test.y.vals[i], 10); + if(index == class) avg_acc += 1; + free_image(im); + printf("%4d: %.2f%%\n", i, 100.*avg_acc/(i+1)); + } +} + +void test_cifar(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + clock_t time; + float avg_acc = 0; + float avg_top5 = 0; + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + time=clock(); + + float *acc = network_accuracies(net, test, 2); + avg_acc += acc[0]; + avg_top5 += acc[1]; + printf("top1: %f, %lf seconds, %d images\n", avg_acc, sec(clock()-time), test.X.rows); + free_data(test); +} + +void extract_cifar() +{ +char *labels[] = {"airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"}; + int i; + data train = load_all_cifar10(); + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + for(i = 0; i < train.X.rows; ++i){ + image im = float_to_image(32, 32, 3, train.X.vals[i]); + int class = max_index(train.y.vals[i], 10); + char buff[256]; + sprintf(buff, "data/cifar/train/%d_%s",i,labels[class]); + save_image_options(im, buff, PNG, 0); + } + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + int class = max_index(test.y.vals[i], 10); + char buff[256]; + sprintf(buff, "data/cifar/test/%d_%s",i,labels[class]); + save_image_options(im, buff, PNG, 0); + } +} + +void test_cifar_csv(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + matrix pred = network_predict_data(net, test); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + flip_image(im); + } + matrix pred2 = network_predict_data(net, test); + scale_matrix(pred, .5); + scale_matrix(pred2, .5); + matrix_add_matrix(pred2, pred); + + matrix_to_csv(pred); + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); +} + +void test_cifar_csvtrain(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + srand(time(0)); + + data test = load_all_cifar10(); + + matrix pred = network_predict_data(net, test); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + flip_image(im); + } + matrix pred2 = network_predict_data(net, test); + scale_matrix(pred, .5); + scale_matrix(pred2, .5); + matrix_add_matrix(pred2, pred); + + matrix_to_csv(pred); + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); +} + +void eval_cifar_csv() +{ + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + matrix pred = csv_to_matrix("results/combined.csv"); + fprintf(stderr, "%d %d\n", pred.rows, pred.cols); + + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); + free_matrix(pred); +} + + +void run_cifar(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_cifar(cfg, weights); + else if(0==strcmp(argv[2], "extract")) extract_cifar(); + else if(0==strcmp(argv[2], "distill")) train_cifar_distill(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_cifar(cfg, weights); + else if(0==strcmp(argv[2], "multi")) test_cifar_multi(cfg, weights); + else if(0==strcmp(argv[2], "csv")) test_cifar_csv(cfg, weights); + else if(0==strcmp(argv[2], "csvtrain")) test_cifar_csvtrain(cfg, weights); + else if(0==strcmp(argv[2], "eval")) eval_cifar_csv(); +} + + diff --git a/workloads/realworld/standard/darknet/examples/classifier.c b/workloads/realworld/standard/darknet/examples/classifier.c new file mode 100644 index 0000000000000000000000000000000000000000..932f5a4652022288029617d0722c3bfa3073e536 --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/classifier.c @@ -0,0 +1,1122 @@ +#include "darknet.h" + +#include +#include + +float *get_regression_values(char **labels, int n) +{ + float *v = calloc(n, sizeof(float)); + int i; + for(i = 0; i < n; ++i){ + char *p = strchr(labels[i], ' '); + *p = 0; + v[i] = atof(p+1); + } + return v; +} + +void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + // Ruihao + int tag = option_find_int_quiet(options, "tag", 0); + + char *backup_directory_cfg = option_find_str(options, "backup", "/backup/"); + char *label_list_cfg = option_find_str(options, "labels", "data/labels.list"); + char *train_list_cfg = option_find_str(options, "train", "data/train.list"); + + char *env = getenv("UVMAsyncBench_BASE"); + char backup_directory[256]; + char label_list[256]; + char train_list[256]; + sprintf(backup_directory, "%s/%s", env, backup_directory_cfg); + sprintf(label_list, "%s/%s", env, label_list_cfg); + sprintf(train_list, "%s/%s", env, train_list_cfg); + // Ruihao + char *tree = option_find_str(options, "tree", 0); + if (tree) net->hierarchy = read_tree(tree); + int classes = option_find_int(options, "classes", 2); + + char **labels = 0; + if(!tag){ + labels = get_labels(label_list); + } + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + double time; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.hierarchy = net->hierarchy; + + args.min = net->min_ratio*net->w; + args.max = net->max_ratio*net->w; + printf("%d %d\n", args.min, args.max); + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + + args.paths = paths; + args.classes = classes; + args.n = imgs; + args.m = N; + args.labels = labels; + if (tag){ + args.type = TAG_DATA; + } else { + args.type = CLASSIFICATION_DATA; + } + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int count = 0; + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + if(net->random && count++%40 == 0){ + printf("Resizing\n"); + int dim = (rand() % 11 + 4) * 32; + //if (get_current_batch(net)+200 > net->max_batches) dim = 608; + //int dim = (rand() % 4 + 16) * 32; + printf("%d\n", dim); + args.w = dim; + args.h = dim; + args.size = dim; + args.min = net->min_ratio*dim; + args.max = net->max_ratio*dim; + printf("%d %d\n", args.min, args.max); + + pthread_join(load_thread, 0); + train = buffer; + free_data(train); + load_thread = load_data(args); + + for(i = 0; i < ngpus; ++i){ + resize_network(nets[i], dim, dim); + } + net = nets[0]; + } + time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + pthread_join(load_thread, 0); + + // free_network(net); + if(labels) free_ptrs((void**)labels, classes); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_classifier_crop(char *datacfg, char *filename, char *weightfile) +{ + int i = 0; + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + clock_t time; + float avg_acc = 0; + float avg_topk = 0; + int splits = m/1000; + int num = (i+1)*m/splits - i*m/splits; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + + args.paths = paths; + args.classes = classes; + args.n = num; + args.m = 0; + args.labels = labels; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(i = 1; i <= splits; ++i){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + num = (i+1)*m/splits - i*m/splits; + char **part = paths+(i*m/splits); + if(i != splits){ + args.paths = part; + load_thread = load_data_in_thread(args); + } + printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + float *acc = network_accuracies(net, val, topk); + avg_acc += acc[0]; + avg_topk += acc[1]; + printf("%d: top 1: %f, top %d: %f, %lf seconds, %d images\n", i, avg_acc/i, topk, avg_topk/i, sec(clock()-time), val.X.rows); + free_data(val); + } +} + +void validate_classifier_10(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + int w = net->w; + int h = net->h; + int shift = 32; + image im = load_image_color(paths[i], w+shift, h+shift); + image images[10]; + images[0] = crop_image(im, -shift, -shift, w, h); + images[1] = crop_image(im, shift, -shift, w, h); + images[2] = crop_image(im, 0, 0, w, h); + images[3] = crop_image(im, -shift, shift, w, h); + images[4] = crop_image(im, shift, shift, w, h); + flip_image(im); + images[5] = crop_image(im, -shift, -shift, w, h); + images[6] = crop_image(im, shift, -shift, w, h); + images[7] = crop_image(im, 0, 0, w, h); + images[8] = crop_image(im, -shift, shift, w, h); + images[9] = crop_image(im, shift, shift, w, h); + float *pred = calloc(classes, sizeof(float)); + for(j = 0; j < 10; ++j){ + float *p = network_predict(net, images[j].data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1, 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + free_image(images[j]); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_classifier_full(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + int size = net->w; + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, size); + resize_network(net, resized.w, resized.h); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, resized.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + free_image(im); + free_image(resized); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + + +void validate_classifier_single(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *leaf_list = option_find_str(options, "leaves", 0); + if(leaf_list) change_leaves(net->hierarchy, leaf_list); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image crop = center_crop_image(im, net->w, net->h); + //grayscale_image_3c(crop); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, crop.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + free_image(im); + free_image(crop); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%s, %d, %f, %f, \n", paths[i], class, pred[0], pred[1]); + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_classifier_multi(char *datacfg, char *cfg, char *weights) +{ + int i, j; + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + //int scales[] = {224, 288, 320, 352, 384}; + int scales[] = {224, 256, 288, 320}; + int nscales = sizeof(scales)/sizeof(scales[0]); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + float *pred = calloc(classes, sizeof(float)); + image im = load_image_color(paths[i], 0, 0); + for(j = 0; j < nscales; ++j){ + image r = resize_max(im, scales[j]); + resize_network(net, r.w, r.h); + float *p = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + flip_image(r); + p = network_predict(net, r.data); + axpy_cpu(classes, 1, p, 1, pred, 1); + if(r.data != im.data) free_image(r); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int layer_num) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + int top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image orig = load_image_color(input, 0, 0); + image r = resize_min(orig, 256); + image im = crop_image(r, (r.w - 224 - 1)/2 + 1, (r.h - 224 - 1)/2 + 1, 224, 224); + float mean[] = {0.48263312050943, 0.45230225481413, 0.40099074308742}; + float std[] = {0.22590347483426, 0.22120921437787, 0.22103996251583}; + float var[3]; + var[0] = std[0]*std[0]; + var[1] = std[1]*std[1]; + var[2] = std[2]*std[2]; + + normalize_cpu(im.data, mean, var, 1, 3, im.w*im.h); + + float *X = im.data; + time=clock(); + float *predictions = network_predict(net, X); + + layer l = net->layers[layer_num]; + for(i = 0; i < l.c; ++i){ + if(l.rolling_mean) printf("%f %f %f\n", l.rolling_mean[i], l.rolling_variance[i], l.scales[i]); + } +#ifdef GPU + cuda_pull_array(l.output_gpu, l.output, l.outputs); +#endif + for(i = 0; i < l.outputs; ++i){ + printf("%f\n", l.output[i]); + } + /* + + printf("\n\nWeights\n"); + for(i = 0; i < l.n*l.size*l.size*l.c; ++i){ + printf("%f\n", l.filters[i]); + } + + printf("\n\nBiases\n"); + for(i = 0; i < l.n; ++i){ + printf("%f\n", l.biases[i]); + } + */ + + top_predictions(net, top, indexes); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + printf("%s: %f\n", names[index], predictions[index]); + } + free_image(im); + if (filename) break; + } +} + +void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *name_list_cfg = option_find_str(options, "names", 0); + char *env = getenv("UVMAsyncBench_BASE"); + char name_list[256]; + sprintf(name_list, "%s/%s", env, name_list_cfg); + // if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + // Ruihao + if(top == 0) top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = letterbox_image(im, net->w, net->h); + //image r = resize_min(im, 320); + //printf("%d %d\n", r.w, r.h); + //resize_network(net, r.w, r.h); + //printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + startCPU(); + float *predictions = network_predict(net, X); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + endCPU(); + fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root"); + //else printf("%s: %f\n",names[index], predictions[index]); + printf("%5.2f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void label_classifier(char *datacfg, char *filename, char *weightfile) +{ + int i; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "names", "data/labels.list"); + char *test_list = option_find_str(options, "test", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + + char **labels = get_labels(label_list); + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + for(i = 0; i < m; ++i){ + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + float *pred = network_predict(net, crop.data); + + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + int ind = max_index(pred, classes); + + printf("%s\n", labels[ind]); + } +} + +void csv_classifier(char *datacfg, char *cfgfile, char *weightfile) +{ + int i,j; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *test_list = option_find_str(options, "test", "data/test.list"); + int top = option_find_int(options, "top", 1); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + int *indexes = calloc(top, sizeof(int)); + + for(i = 0; i < m; ++i){ + double time = what_time_is_it_now(); + char *path = paths[i]; + image im = load_image_color(path, 0, 0); + image r = letterbox_image(im, net->w, net->h); + float *predictions = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + + printf("%s", path); + for(j = 0; j < top; ++j){ + printf("\t%d", indexes[j]); + } + printf("\n"); + + free_image(im); + free_image(r); + + fprintf(stderr, "%lf seconds, %d images, %d total\n", what_time_is_it_now() - time, i+1, m); + } +} + +void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_layer) +{ + int curr = 0; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *test_list_cfg = option_find_str(options, "test", "data/test.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char test_list[256]; + sprintf(test_list, "%s/%s", env, test_list_cfg); + // Ruihao + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + m = net->max_batches * net->batch; + free_list(plist); + + clock_t time; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = classes; + args.n = net->batch; + args.m = 0; + args.labels = 0; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(curr = net->batch; curr <= m; curr += net->batch){ + // while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + if(curr < m){ + args.paths = paths + curr; + if (curr + net->batch > m) args.n = m - curr; + load_thread = load_data_in_thread(args); + } + fprintf(stderr, "Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + + int i, j; + if (target_layer >= 0){ + //layer l = net->layers[target_layer]; + } + + // for(i = 0; i < pred.rows; ++i){ + // printf("%s", paths[curr-net->batch+i]); + // for(j = 0; j < pred.cols; ++j){ + // printf("\t%g", pred.vals[i][j]); + // } + // printf("\n"); + // } + + fprintf(stderr, "%lf seconds, %d images, %d total\n", sec(clock()-time), val.X.rows, curr); + free_matrix(pred); + free_data(val); + } +} + +void file_output_classifier(char *datacfg, char *filename, char *weightfile, char *listfile) +{ + int i,j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + //char *label_list = option_find_str(options, "names", "data/labels.list"); + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(listfile); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + for(i = 0; i < m; ++i){ + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + + float *pred = network_predict(net, crop.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 0, 1); + + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + + printf("%s", paths[i]); + for(j = 0; j < classes; ++j){ + printf("\t%g", pred[j]); + } + printf("\n"); + } +} + + +void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + float threat = 0; + float roll = .2; + + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + int top = option_find_int(options, "top", 1); + + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + //cvNamedWindow("Threat", CV_WINDOW_NORMAL); + //cvResizeWindow("Threat", 512, 512); + float fps = 0; + int i; + + int count = 0; + + while(1){ + ++count; + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + if(!in.data) break; + image in_s = resize_image(in, net->w, net->h); + + image out = in; + int x1 = out.w / 20; + int y1 = out.h / 20; + int x2 = 2*x1; + int y2 = out.h - out.h/20; + + int border = .01*out.h; + int h = y2 - y1 - 2*border; + int w = x2 - x1 - 2*border; + + float *predictions = network_predict(net, in_s.data); + float curr_threat = 0; + if(1){ + curr_threat = predictions[0] * 0 + + predictions[1] * .6 + + predictions[2]; + } else { + curr_threat = predictions[218] + + predictions[539] + + predictions[540] + + predictions[368] + + predictions[369] + + predictions[370]; + } + threat = roll * curr_threat + (1-roll) * threat; + + draw_box_width(out, x2 + border, y1 + .02*h, x2 + .5 * w, y1 + .02*h + border, border, 0,0,0); + if(threat > .97) { + draw_box_width(out, x2 + .5 * w + border, + y1 + .02*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .02*h + 3*border, 3*border, 1,0,0); + } + draw_box_width(out, x2 + .5 * w + border, + y1 + .02*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .02*h + 3*border, .5*border, 0,0,0); + draw_box_width(out, x2 + border, y1 + .42*h, x2 + .5 * w, y1 + .42*h + border, border, 0,0,0); + if(threat > .57) { + draw_box_width(out, x2 + .5 * w + border, + y1 + .42*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .42*h + 3*border, 3*border, 1,1,0); + } + draw_box_width(out, x2 + .5 * w + border, + y1 + .42*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .42*h + 3*border, .5*border, 0,0,0); + + draw_box_width(out, x1, y1, x2, y2, border, 0,0,0); + for(i = 0; i < threat * h ; ++i){ + float ratio = (float) i / h; + float r = (ratio < .5) ? (2*(ratio)) : 1; + float g = (ratio < .5) ? 1 : 1 - 2*(ratio - .5); + draw_box_width(out, x1 + border, y2 - border - i, x2 - border, y2 - border - i, 1, r, g, 0); + } + top_predictions(net, top, indexes); + char buff[256]; + sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count); + //save_image(out, buff); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + for(i = 0; i < top; ++i){ + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + } + + if(1){ + show_image(out, "Threat", 10); + } + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + int bad_cats[] = {218, 539, 540, 1213, 1501, 1742, 1911, 2415, 4348, 19223, 368, 369, 370, 1133, 1200, 1306, 2122, 2301, 2537, 2823, 3179, 3596, 3639, 4489, 5107, 5140, 5289, 6240, 6631, 6762, 7048, 7171, 7969, 7984, 7989, 8824, 8927, 9915, 10270, 10448, 13401, 15205, 18358, 18894, 18895, 19249, 19697}; + + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + int top = option_find_int(options, "top", 1); + + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + int i; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = resize_image(in, net->w, net->h); + + float *predictions = network_predict(net, in_s.data); + top_predictions(net, top, indexes); + + printf("\033[2J"); + printf("\033[1;1H"); + + int threat = 0; + for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){ + int index = bad_cats[i]; + if(predictions[index] > .01){ + printf("Threat Detected!\n"); + threat = 1; + break; + } + } + if(!threat) printf("Scanning...\n"); + for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){ + int index = bad_cats[i]; + if(predictions[index] > .01){ + printf("%s\n", names[index]); + } + } + + show_image(in, "Threat Detection", 10); + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + +void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + image **alphabet = load_alphabet(); + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + + int w = 1280; + int h = 720; + void * cap = open_video_stream(filename, cam_index, w, h, 0); + + int top = option_find_int(options, "top", 1); + + char *label_list = option_find_str(options, "labels", 0); + char *name_list = option_find_str(options, "names", label_list); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + int i; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + + float *predictions = network_predict(net, in_s.data); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_predictions(net, top, indexes); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + int lh = in.h*.03; + int toph = 3*lh; + + float rgb[3] = {1,1,1}; + for(i = 0; i < top; ++i){ + printf("%d\n", toph); + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + + char buff[1024]; + sprintf(buff, "%3.1f%%: %s\n", predictions[index]*100, names[index]); + image label = get_label(alphabet, buff, lh); + draw_label(in, toph, lh, label, rgb); + toph += 2*lh; + free_image(label); + } + + show_image(in, base, 10); + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_classifier(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int ngpus; + int *gpus = read_intlist(gpu_list, &ngpus, gpu_index); + + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int top = find_int_arg(argc, argv, "-t", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + char *layer_s = (argc > 7) ? argv[7]: 0; + int layer = layer_s ? atoi(layer_s) : -1; + if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename, top); + else if(0==strcmp(argv[2], "fout")) file_output_classifier(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s)); + else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer); + else if(0==strcmp(argv[2], "csv")) csv_classifier(data, cfg, weights); + else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_classifier_single(data, cfg, weights); + else if(0==strcmp(argv[2], "validmulti")) validate_classifier_multi(data, cfg, weights); + else if(0==strcmp(argv[2], "valid10")) validate_classifier_10(data, cfg, weights); + else if(0==strcmp(argv[2], "validcrop")) validate_classifier_crop(data, cfg, weights); + else if(0==strcmp(argv[2], "validfull")) validate_classifier_full(data, cfg, weights); +} + + diff --git a/workloads/realworld/standard/darknet/examples/coco.c b/workloads/realworld/standard/darknet/examples/coco.c new file mode 100644 index 0000000000000000000000000000000000000000..6a50b89abd2abc7fb217b5118034a746f790f690 --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/coco.c @@ -0,0 +1,357 @@ +#include "darknet.h" + +#include + +char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"}; + +int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; + +void train_coco(char *cfgfile, char *weightfile) +{ + //char *train_images = "/home/pjreddie/data/voc/test/train.txt"; + //char *train_images = "/home/pjreddie/data/coco/train.txt"; + char *train_images = "data/coco.trainval.txt"; + //char *train_images = "data/bags.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + layer l = net->layers[net->n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + /* + image im = float_to_image(net->w, net->h, 3, train.X.vals[113]); + image copy = copy_image(im); + draw_coco(copy, train.y.vals[113], 7, "truth"); + cvWaitKey(0); + free_image(copy); + */ + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || (i < 1000 && i%100 == 0)){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +static void print_cocos(FILE *fp, int image_id, detection *dets, int num_boxes, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < num_boxes; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + float bx = xmin; + float by = ymin; + float bw = xmax - xmin; + float bh = ymax - ymin; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); + } + } +} + +int get_coco_image_id(char *filename) +{ + char *p = strrchr(filename, '_'); + return atoi(p+1); +} + +void validate_coco(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/"; + list *plist = get_paths("data/coco_val_5k.list"); + //list *plist = get_paths("/home/pjreddie/data/people-art/test.txt"); + //list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + snprintf(buff, 1024, "%s/coco_results.json", base); + FILE *fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + + int m = plist->size; + int i=0; + int t; + + float thresh = .01; + int nms = 1; + float iou_thresh = .5; + + int nthreads = 8; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.type = IMAGE_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + time_t start = time(0); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + int image_id = get_coco_image_id(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh); + print_cocos(fp, image_id, dets, l.side*l.side*l.n, classes, w, h); + free_detections(dets, nboxes); + free_image(val[t]); + free_image(val_resized[t]); + } + } + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); +} + +void validate_coco_recall(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + int side = l.side; + + int j, k; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + + float thresh = .001; + int nms = 0; + float iou_thresh = .5; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + + int nboxes = 0; + detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, side*side*l.n, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < side*side*l.n; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < side*side*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + free_detections(dets, nboxes); + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free(id); + free_image(orig); + free_image(sized); + } +} + +void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) +{ + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + layer l = net->layers[net->n-1]; + set_batch_network(net, 1); + srand(2222222); + float nms = .4; + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = resize_image(im, net->w, net->h); + float *X = sized.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + + int nboxes = 0; + detection *dets = get_network_boxes(net, 1, 1, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms); + + draw_detections(im, dets, l.side*l.side*l.n, thresh, coco_classes, alphabet, 80); + save_image(im, "prediction"); + show_image(im, "predictions", 0); + free_detections(dets, nboxes); + free_image(im); + free_image(sized); + if (filename) break; + } +} + +void run_coco(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .2); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + int avg = find_int_arg(argc, argv, "-avg", 1); + if(0==strcmp(argv[2], "test")) test_coco(cfg, weights, filename, thresh); + else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights); + else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, 80, frame_skip, prefix, avg, .5, 0,0,0,0); +} diff --git a/workloads/realworld/standard/darknet/examples/darknet.c b/workloads/realworld/standard/darknet/examples/darknet.c new file mode 100644 index 0000000000000000000000000000000000000000..f1c5e43b66391a9674c13a193e329cf3dfc26439 --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/darknet.c @@ -0,0 +1,559 @@ +#include "darknet.h" + +// #include "../../../../common/cupti_add.h" +// #include "../../../../common/cpu_timestamps.h" +// #include "cpu_timestamps.h" + +static uint64_t startCPUTime; +static uint64_t endCPUTime; + +void startCPU() +{ + struct timespec tv; + if (clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + startCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); +} + +void endCPU() +{ + struct timespec tv; + if (clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + + endCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); + // endCPUTimestamp1 = std::chrono::system_clock::now(); + printf("CPU_Times,%lu,%lu,%lu\n", startCPUTime, endCPUTime, endCPUTime - startCPUTime); +} + +#ifdef __cplusplus +extern "C" { +#endif +extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top); +extern void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen); +extern void run_yolo(int argc, char **argv); +extern void run_detector(int argc, char **argv); +extern void run_coco(int argc, char **argv); +extern void run_nightmare(int argc, char **argv); +extern void run_classifier(int argc, char **argv); +extern void run_regressor(int argc, char **argv); +extern void run_segmenter(int argc, char **argv); +extern void run_isegmenter(int argc, char **argv); +extern void run_char_rnn(int argc, char **argv); +extern void run_tag(int argc, char **argv); +extern void run_cifar(int argc, char **argv); +extern void run_go(int argc, char **argv); +extern void run_art(int argc, char **argv); +extern void run_super(int argc, char **argv); +extern void run_lsd(int argc, char **argv); +#ifdef __cplusplus +} +#endif + +void average(int argc, char *argv[]) +{ + char *cfgfile = argv[2]; + char *outfile = argv[3]; + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + network *sum = parse_network_cfg(cfgfile); + + char *weightfile = argv[4]; + load_weights(sum, weightfile); + + int i, j; + int n = argc - 5; + for(i = 0; i < n; ++i){ + weightfile = argv[i+5]; + load_weights(net, weightfile); + for(j = 0; j < net->n; ++j){ + layer l = net->layers[j]; + layer out = sum->layers[j]; + if(l.type == CONVOLUTIONAL){ + int num = l.n*l.c*l.size*l.size; + axpy_cpu(l.n, 1, l.biases, 1, out.biases, 1); + axpy_cpu(num, 1, l.weights, 1, out.weights, 1); + if(l.batch_normalize){ + axpy_cpu(l.n, 1, l.scales, 1, out.scales, 1); + axpy_cpu(l.n, 1, l.rolling_mean, 1, out.rolling_mean, 1); + axpy_cpu(l.n, 1, l.rolling_variance, 1, out.rolling_variance, 1); + } + } + if(l.type == CONNECTED){ + axpy_cpu(l.outputs, 1, l.biases, 1, out.biases, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weights, 1, out.weights, 1); + } + } + } + n = n+1; + for(j = 0; j < net->n; ++j){ + layer l = sum->layers[j]; + if(l.type == CONVOLUTIONAL){ + int num = l.n*l.c*l.size*l.size; + scal_cpu(l.n, 1./n, l.biases, 1); + scal_cpu(num, 1./n, l.weights, 1); + if(l.batch_normalize){ + scal_cpu(l.n, 1./n, l.scales, 1); + scal_cpu(l.n, 1./n, l.rolling_mean, 1); + scal_cpu(l.n, 1./n, l.rolling_variance, 1); + } + } + if(l.type == CONNECTED){ + scal_cpu(l.outputs, 1./n, l.biases, 1); + scal_cpu(l.outputs*l.inputs, 1./n, l.weights, 1); + } + } + save_weights(sum, outfile); +} + +long numops(network *net) +{ + int i; + long ops = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + ops += 2l * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w; + } else if(l.type == CONNECTED){ + ops += 2l * l.inputs * l.outputs; + } else if (l.type == RNN){ + ops += 2l * l.input_layer->inputs * l.input_layer->outputs; + ops += 2l * l.self_layer->inputs * l.self_layer->outputs; + ops += 2l * l.output_layer->inputs * l.output_layer->outputs; + } else if (l.type == GRU){ + ops += 2l * l.uz->inputs * l.uz->outputs; + ops += 2l * l.uh->inputs * l.uh->outputs; + ops += 2l * l.ur->inputs * l.ur->outputs; + ops += 2l * l.wz->inputs * l.wz->outputs; + ops += 2l * l.wh->inputs * l.wh->outputs; + ops += 2l * l.wr->inputs * l.wr->outputs; + } else if (l.type == LSTM){ + ops += 2l * l.uf->inputs * l.uf->outputs; + ops += 2l * l.ui->inputs * l.ui->outputs; + ops += 2l * l.ug->inputs * l.ug->outputs; + ops += 2l * l.uo->inputs * l.uo->outputs; + ops += 2l * l.wf->inputs * l.wf->outputs; + ops += 2l * l.wi->inputs * l.wi->outputs; + ops += 2l * l.wg->inputs * l.wg->outputs; + ops += 2l * l.wo->inputs * l.wo->outputs; + } + } + return ops; +} + +void speed(char *cfgfile, int tics) +{ + if (tics == 0) tics = 1000; + network *net = parse_network_cfg(cfgfile); + set_batch_network(net, 1); + int i; + double time=what_time_is_it_now(); + image im = make_image(net->w, net->h, net->c*net->batch); + for(i = 0; i < tics; ++i){ + network_predict(net, im.data); + } + double t = what_time_is_it_now() - time; + long ops = numops(net); + printf("\n%d evals, %f Seconds\n", tics, t); + printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); + printf("FLOPS: %.2f Bn\n", (float)ops/1000000000.*tics/t); + printf("Speed: %f sec/eval\n", t/tics); + printf("Speed: %f Hz\n", tics/t); +} + +void operations(char *cfgfile) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + long ops = numops(net); + printf("Floating Point Operations: %ld\n", ops); + printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); +} + +void oneoff(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + int oldn = net->layers[net->n - 2].n; + int c = net->layers[net->n - 2].c; + scal_cpu(oldn*c, .1, net->layers[net->n - 2].weights, 1); + scal_cpu(oldn, 0, net->layers[net->n - 2].biases, 1); + net->layers[net->n - 2].n = 11921; + net->layers[net->n - 2].biases += 5; + net->layers[net->n - 2].weights += 5*c; + if(weightfile){ + load_weights(net, weightfile); + } + net->layers[net->n - 2].biases -= 5; + net->layers[net->n - 2].weights -= 5*c; + net->layers[net->n - 2].n = oldn; + printf("%d\n", oldn); + layer l = net->layers[net->n - 2]; + copy_cpu(l.n/3, l.biases, 1, l.biases + l.n/3, 1); + copy_cpu(l.n/3, l.biases, 1, l.biases + 2*l.n/3, 1); + copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + l.n/3*l.c, 1); + copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + 2*l.n/3*l.c, 1); + *net->seen = 0; + save_weights(net, outfile); +} + +void oneoff2(char *cfgfile, char *weightfile, char *outfile, int l) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights_upto(net, weightfile, 0, net->n); + load_weights_upto(net, weightfile, l, net->n); + } + *net->seen = 0; + save_weights_upto(net, outfile, net->n); +} + +void partial(char *cfgfile, char *weightfile, char *outfile, int max) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 1); + save_weights_upto(net, outfile, max); +} + +void print_weights(char *cfgfile, char *weightfile, int n) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 1); + layer l = net->layers[n]; + int i, j; + //printf("["); + for(i = 0; i < l.n; ++i){ + //printf("["); + for(j = 0; j < l.size*l.size*l.c; ++j){ + //if(j > 0) printf(","); + printf("%g ", l.weights[i*l.size*l.size*l.c + j]); + } + printf("\n"); + //printf("]%s\n", (i == l.n-1)?"":","); + } + //printf("]"); +} + +void rescale_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + rescale_weights(l, 2, -.5); + break; + } + } + save_weights(net, outfile); +} + +void rgbgr_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + rgbgr_weights(l); + break; + } + } + save_weights(net, outfile); +} + +void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if (l.type == CONVOLUTIONAL && l.batch_normalize) { + denormalize_convolutional_layer(l); + } + if (l.type == CONNECTED && l.batch_normalize) { + denormalize_connected_layer(l); + } + if (l.type == GRU && l.batch_normalize) { + denormalize_connected_layer(*l.input_z_layer); + denormalize_connected_layer(*l.input_r_layer); + denormalize_connected_layer(*l.input_h_layer); + denormalize_connected_layer(*l.state_z_layer); + denormalize_connected_layer(*l.state_r_layer); + denormalize_connected_layer(*l.state_h_layer); + } + } + save_weights(net, outfile); +} + +layer normalize_layer(layer l, int n) +{ + int j; + l.batch_normalize=1; + l.scales = (float *) calloc(n, sizeof(float)); + for(j = 0; j < n; ++j){ + l.scales[j] = 1; + } + l.rolling_mean = (float *) calloc(n, sizeof(float)); + l.rolling_variance = (float *) calloc(n, sizeof(float)); + return l; +} + +void normalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL && !l.batch_normalize){ + net->layers[i] = normalize_layer(l, l.n); + } + if (l.type == CONNECTED && !l.batch_normalize) { + net->layers[i] = normalize_layer(l, l.outputs); + } + if (l.type == GRU && l.batch_normalize) { + *l.input_z_layer = normalize_layer(*l.input_z_layer, l.input_z_layer->outputs); + *l.input_r_layer = normalize_layer(*l.input_r_layer, l.input_r_layer->outputs); + *l.input_h_layer = normalize_layer(*l.input_h_layer, l.input_h_layer->outputs); + *l.state_z_layer = normalize_layer(*l.state_z_layer, l.state_z_layer->outputs); + *l.state_r_layer = normalize_layer(*l.state_r_layer, l.state_r_layer->outputs); + *l.state_h_layer = normalize_layer(*l.state_h_layer, l.state_h_layer->outputs); + net->layers[i].batch_normalize=1; + } + } + save_weights(net, outfile); +} + +void statistics_net(char *cfgfile, char *weightfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if (l.type == CONNECTED && l.batch_normalize) { + printf("Connected Layer %d\n", i); + statistics_connected_layer(l); + } + if (l.type == GRU && l.batch_normalize) { + printf("GRU Layer %d\n", i); + printf("Input Z\n"); + statistics_connected_layer(*l.input_z_layer); + printf("Input R\n"); + statistics_connected_layer(*l.input_r_layer); + printf("Input H\n"); + statistics_connected_layer(*l.input_h_layer); + printf("State Z\n"); + statistics_connected_layer(*l.state_z_layer); + printf("State R\n"); + statistics_connected_layer(*l.state_r_layer); + printf("State H\n"); + statistics_connected_layer(*l.state_h_layer); + } + printf("\n"); + } +} + +void denormalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if ((l.type == DECONVOLUTIONAL || l.type == CONVOLUTIONAL) && l.batch_normalize) { + denormalize_convolutional_layer(l); + net->layers[i].batch_normalize=0; + } + if (l.type == CONNECTED && l.batch_normalize) { + denormalize_connected_layer(l); + net->layers[i].batch_normalize=0; + } + if (l.type == GRU && l.batch_normalize) { + denormalize_connected_layer(*l.input_z_layer); + denormalize_connected_layer(*l.input_r_layer); + denormalize_connected_layer(*l.input_h_layer); + denormalize_connected_layer(*l.state_z_layer); + denormalize_connected_layer(*l.state_r_layer); + denormalize_connected_layer(*l.state_h_layer); + l.input_z_layer->batch_normalize = 0; + l.input_r_layer->batch_normalize = 0; + l.input_h_layer->batch_normalize = 0; + l.state_z_layer->batch_normalize = 0; + l.state_r_layer->batch_normalize = 0; + l.state_h_layer->batch_normalize = 0; + net->layers[i].batch_normalize=0; + } + } + save_weights(net, outfile); +} + +void mkimg(char *cfgfile, char *weightfile, int h, int w, int num, char *prefix) +{ + network *net = load_network(cfgfile, weightfile, 0); + image *ims = get_weights(net->layers[0]); + int n = net->layers[0].n; + int z; + for(z = 0; z < num; ++z){ + image im = make_image(h, w, 3); + fill_image(im, .5); + int i; + for(i = 0; i < 100; ++i){ + image r = copy_image(ims[rand()%n]); + rotate_image_cw(r, rand()%4); + random_distort_image(r, 1, 1.5, 1.5); + int dx = rand()%(w-r.w); + int dy = rand()%(h-r.h); + ghost_image(r, im, dx, dy); + free_image(r); + } + char buff[256]; + sprintf(buff, "%s/gen_%d", prefix, z); + save_image(im, buff); + free_image(im); + } +} + +void visualize(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + visualize_network(net); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsc() +{ + unsigned long a, d; + + __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); + + return (a | (d << 32)); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsp() { + struct timespec tms; + if (clock_gettime(CLOCK_REALTIME, &tms)) { + return -1; + } + unsigned long ns = tms.tv_sec * 1000000000; + ns += tms.tv_nsec; + return ns; +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + //test_resize("data/bad.jpg"); + //test_box(); + //test_convolutional_layer(); + if(argc < 2){ + fprintf(stderr, "usage: %s \n", argv[0]); + return 0; + } + gpu_index = find_int_arg(argc, argv, "-i", GPU_DEVICE); + if(find_arg(argc, argv, "-nogpu")) { + gpu_index = -1; + } + +#ifndef GPU + gpu_index = -1; +#else + if(gpu_index >= 0){ + cuda_set_device(gpu_index); + } + initTrace(); +#endif + + if (0 == strcmp(argv[1], "average")){ + average(argc, argv); + } else if (0 == strcmp(argv[1], "yolo")){ + run_yolo(argc, argv); + } else if (0 == strcmp(argv[1], "super")){ + run_super(argc, argv); + } else if (0 == strcmp(argv[1], "lsd")){ + run_lsd(argc, argv); + } else if (0 == strcmp(argv[1], "detector")){ + run_detector(argc, argv); + } else if (0 == strcmp(argv[1], "detect")){ + float thresh = find_float_arg(argc, argv, "-thresh", .5); + char *filename = (argc > 4) ? argv[4]: 0; + char *outfile = find_char_arg(argc, argv, "-out", 0); + int fullscreen = find_arg(argc, argv, "-fullscreen"); + char *value = getenv("UVMAsyncBench_BASE"); + char buff[256]; + sprintf(buff, "%s/workloads/realworld/standard/darknet/cfg/coco.data", value); + test_detector(buff, argv[2], argv[3], filename, thresh, .5, outfile, fullscreen); + } else if (0 == strcmp(argv[1], "cifar")){ + run_cifar(argc, argv); + } else if (0 == strcmp(argv[1], "go")){ + run_go(argc, argv); + } else if (0 == strcmp(argv[1], "rnn")){ + run_char_rnn(argc, argv); + } else if (0 == strcmp(argv[1], "coco")){ + run_coco(argc, argv); + } else if (0 == strcmp(argv[1], "classify")){ + predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5); + } else if (0 == strcmp(argv[1], "classifier")){ + run_classifier(argc, argv); + } else if (0 == strcmp(argv[1], "regressor")){ + run_regressor(argc, argv); + } else if (0 == strcmp(argv[1], "isegmenter")){ + run_isegmenter(argc, argv); + } else if (0 == strcmp(argv[1], "segmenter")){ + run_segmenter(argc, argv); + } else if (0 == strcmp(argv[1], "art")){ + run_art(argc, argv); + } else if (0 == strcmp(argv[1], "tag")){ + run_tag(argc, argv); + } else if (0 == strcmp(argv[1], "3d")){ + composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0); + } else if (0 == strcmp(argv[1], "test")){ + test_resize(argv[2]); + } else if (0 == strcmp(argv[1], "nightmare")){ + run_nightmare(argc, argv); + } else if (0 == strcmp(argv[1], "rgbgr")){ + rgbgr_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "reset")){ + reset_normalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "denormalize")){ + denormalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "statistics")){ + statistics_net(argv[2], argv[3]); + } else if (0 == strcmp(argv[1], "normalize")){ + normalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "rescale")){ + rescale_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "ops")){ + operations(argv[2]); + } else if (0 == strcmp(argv[1], "speed")){ + speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0); + } else if (0 == strcmp(argv[1], "oneoff")){ + oneoff(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "oneoff2")){ + oneoff2(argv[2], argv[3], argv[4], atoi(argv[5])); + } else if (0 == strcmp(argv[1], "print")){ + print_weights(argv[2], argv[3], atoi(argv[4])); + } else if (0 == strcmp(argv[1], "partial")){ + partial(argv[2], argv[3], argv[4], atoi(argv[5])); + } else if (0 == strcmp(argv[1], "average")){ + average(argc, argv); + } else if (0 == strcmp(argv[1], "visualize")){ + visualize(argv[2], (argc > 3) ? argv[3] : 0); + } else if (0 == strcmp(argv[1], "mkimg")){ + mkimg(argv[2], argv[3], atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), argv[7]); + } else if (0 == strcmp(argv[1], "imtest")){ + test_resize(argv[2]); + } else { + fprintf(stderr, "Not an option: %s\n", argv[1]); + } + finiTrace(); + return 0; +} + diff --git a/workloads/realworld/standard/darknet/examples/detector-scipy-opencv.py b/workloads/realworld/standard/darknet/examples/detector-scipy-opencv.py new file mode 100644 index 0000000000000000000000000000000000000000..3bfc591312ad89ff2b026ffac0daecd461c80447 --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/detector-scipy-opencv.py @@ -0,0 +1,56 @@ +# Stupid python path shit. +# Instead just add darknet.py to somewhere in your python path +# OK actually that might not be a great idea, idk, work in progress +# Use at your own risk. or don't, i don't care + +from scipy.misc import imread +import cv2 + +def array_to_image(arr): + arr = arr.transpose(2,0,1) + c = arr.shape[0] + h = arr.shape[1] + w = arr.shape[2] + arr = (arr/255.0).flatten() + data = dn.c_array(dn.c_float, arr) + im = dn.IMAGE(w,h,c,data) + return im + +def detect2(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): + boxes = dn.make_boxes(net) + probs = dn.make_probs(net) + num = dn.num_boxes(net) + dn.network_detect(net, image, thresh, hier_thresh, nms, boxes, probs) + res = [] + for j in range(num): + for i in range(meta.classes): + if probs[j][i] > 0: + res.append((meta.names[i], probs[j][i], (boxes[j].x, boxes[j].y, boxes[j].w, boxes[j].h))) + res = sorted(res, key=lambda x: -x[1]) + dn.free_ptrs(dn.cast(probs, dn.POINTER(dn.c_void_p)), num) + return res + +import sys, os +sys.path.append(os.path.join(os.getcwd(),'python/')) + +import darknet as dn + +# Darknet +net = dn.load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0) +meta = dn.load_meta("cfg/coco.data") +r = dn.detect(net, meta, "data/dog.jpg") +print r + +# scipy +arr= imread('data/dog.jpg') +im = array_to_image(arr) +r = detect2(net, meta, im) +print r + +# OpenCV +arr = cv2.imread('data/dog.jpg') +im = array_to_image(arr) +dn.rgbgr_image(im) +r = detect2(net, meta, im) +print r + diff --git a/workloads/realworld/standard/darknet/examples/detector.c b/workloads/realworld/standard/darknet/examples/detector.c new file mode 100644 index 0000000000000000000000000000000000000000..6ff1fcdff3d3d81abb458e001091cf2757b8d837 --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/detector.c @@ -0,0 +1,931 @@ +#include "darknet.h" + +static int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; + + +void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + list *options = read_data_cfg(datacfg); + // Ruihao + char *train_images_cfg = option_find_str(options, "train", "data/train.list"); + char *backup_directory_cfg = option_find_str(options, "backup", "/backup/"); + + char *env = getenv("UVMAsyncBench_BASE"); + char train_images[256]; + char backup_directory[256]; + sprintf(train_images, "%s/%s", env, train_images_cfg); + sprintf(backup_directory, "%s/%s", env, backup_directory_cfg); + // Ruihao + + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network **nets = calloc(ngpus, sizeof(network)); + + srand(time(0)); + int seed = rand(); + int i; + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + data train, buffer; + + layer l = net->layers[net->n - 1]; + + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = get_base_args(net); + args.coords = l.coords; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = l.max_boxes; + args.d = &buffer; + args.type = DETECTION_DATA; + //args.type = INSTANCE_DATA; + args.threads = 64; + + pthread_t load_thread = load_data(args); + double time; + int count = 0; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + if(l.random && count++%10 == 0){ + printf("Resizing\n"); + int dim = (rand() % 10 + 10) * 32; + if (get_current_batch(net)+200 > net->max_batches) dim = 608; + //int dim = (rand() % 4 + 16) * 32; + printf("%d\n", dim); + args.w = dim; + args.h = dim; + + pthread_join(load_thread, 0); + train = buffer; + free_data(train); + load_thread = load_data(args); + + #pragma omp parallel for + for(i = 0; i < ngpus; ++i){ + resize_network(nets[i], dim, dim); + } + net = nets[0]; + } + time=what_time_is_it_now(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + /* + int k; + for(k = 0; k < l.max_boxes; ++k){ + box b = float_to_box(train.y.vals[10] + 1 + k*5); + if(!b.x) break; + printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h); + } + */ + /* + int zz; + for(zz = 0; zz < train.X.cols; ++zz){ + image im = float_to_image(net->w, net->h, 3, train.X.vals[zz]); + int k; + for(k = 0; k < l.max_boxes; ++k){ + box b = float_to_box(train.y.vals[zz] + k*5, 1); + printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + draw_bbox(im, b, 1, 1,0,0); + } + show_image(im, "truth11"); + cvWaitKey(0); + save_image(im, "truth11"); + } + */ + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + + time=what_time_is_it_now(); + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + i = get_current_batch(net); + printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, i*imgs); + if(i%100==0){ +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + if(i%10000==0 || (i < 1000 && i%100 == 0)){ +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + + +static int get_coco_image_id(char *filename) +{ + char *p = strrchr(filename, '/'); + char *c = strrchr(filename, '_'); + if(c) p = c; + return atoi(p+1); +} + +static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_boxes, int classes, int w, int h) +{ + int i, j; + int image_id = get_coco_image_id(image_path); + for(i = 0; i < num_boxes; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + float bx = xmin; + float by = ymin; + float bw = xmax - xmin; + float bh = ymax - ymin; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); + } + } +} + +void print_detector_detections(FILE **fps, char *id, detection *dets, int total, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2. + 1; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2. + 1; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2. + 1; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2. + 1; + + if (xmin < 1) xmin = 1; + if (ymin < 1) ymin = 1; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], + xmin, ymin, xmax, ymax); + } + } +} + +void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + int class = j; + if (dets[i].prob[class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j+1, dets[i].prob[class], + xmin, ymin, xmax, ymax); + } + } +} + +void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char *outfile) +{ + int j; + list *options = read_data_cfg(datacfg); + char *valid_images = option_find_str(options, "valid", "data/train.list"); + char *name_list = option_find_str(options, "names", "data/names.list"); + char *prefix = option_find_str(options, "results", "results"); + char **names = get_labels(name_list); + char *mapf = option_find_str(options, "map", 0); + int *map = 0; + if (mapf) map = read_map(mapf); + + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 2); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths(valid_images); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + char *type = option_find_str(options, "eval", "voc"); + FILE *fp = 0; + FILE **fps = 0; + int coco = 0; + int imagenet = 0; + if(0==strcmp(type, "coco")){ + if(!outfile) outfile = "coco_results"; + snprintf(buff, 1024, "%s/%s.json", prefix, outfile); + fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + coco = 1; + } else if(0==strcmp(type, "imagenet")){ + if(!outfile) outfile = "imagenet-detection"; + snprintf(buff, 1024, "%s/%s.txt", prefix, outfile); + fp = fopen(buff, "w"); + imagenet = 1; + classes = 200; + } else { + if(!outfile) outfile = "comp4_det_test_"; + fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); + fps[j] = fopen(buff, "w"); + } + } + + int m = plist->size; + int i=0; + int t; + + float thresh = .005; + float nms = .45; + + int nthreads = 4; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + image input = make_image(net->w, net->h, net->c*2); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + //args.type = IMAGE_DATA; + args.type = LETTERBOX_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + double start = what_time_is_it_now(); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data, 1); + flip_image(val_resized[t]); + copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data + net->w*net->h*net->c, 1); + + network_predict(net, input.data); + int w = val[t].w; + int h = val[t].h; + int num = 0; + detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &num); + if (nms) do_nms_sort(dets, num, classes, nms); + if (coco){ + print_cocos(fp, path, dets, num, classes, w, h); + } else if (imagenet){ + print_imagenet_detections(fp, i+t-nthreads+1, dets, num, classes, w, h); + } else { + print_detector_detections(fps, id, dets, num, classes, w, h); + } + free_detections(dets, num); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + for(j = 0; j < classes; ++j){ + if(fps) fclose(fps[j]); + } + if(coco){ + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start); +} + + +void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile) +{ + int j; + list *options = read_data_cfg(datacfg); + char *valid_images = option_find_str(options, "valid", "data/train.list"); + char *name_list = option_find_str(options, "names", "data/names.list"); + char *prefix = option_find_str(options, "results", "results"); + char **names = get_labels(name_list); + char *mapf = option_find_str(options, "map", 0); + int *map = 0; + if (mapf) map = read_map(mapf); + + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths(valid_images); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + char *type = option_find_str(options, "eval", "voc"); + FILE *fp = 0; + FILE **fps = 0; + int coco = 0; + int imagenet = 0; + if(0==strcmp(type, "coco")){ + if(!outfile) outfile = "coco_results"; + snprintf(buff, 1024, "%s/%s.json", prefix, outfile); + fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + coco = 1; + } else if(0==strcmp(type, "imagenet")){ + if(!outfile) outfile = "imagenet-detection"; + snprintf(buff, 1024, "%s/%s.txt", prefix, outfile); + fp = fopen(buff, "w"); + imagenet = 1; + classes = 200; + } else { + if(!outfile) outfile = "comp4_det_test_"; + fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); + fps[j] = fopen(buff, "w"); + } + } + + + int m = plist->size; + int i=0; + int t; + + float thresh = .005; + float nms = .45; + + int nthreads = 4; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + //args.type = IMAGE_DATA; + args.type = LETTERBOX_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + double start = what_time_is_it_now(); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &nboxes); + if (nms) do_nms_sort(dets, nboxes, classes, nms); + if (coco){ + print_cocos(fp, path, dets, nboxes, classes, w, h); + } else if (imagenet){ + print_imagenet_detections(fp, i+t-nthreads+1, dets, nboxes, classes, w, h); + } else { + print_detector_detections(fps, id, dets, nboxes, classes, w, h); + } + free_detections(dets, nboxes); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + for(j = 0; j < classes; ++j){ + if(fps) fclose(fps[j]); + } + if(coco){ + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start); +} + +void validate_detector_recall(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths("data/coco_val_5k.list"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + + int j, k; + + int m = plist->size; + int i=0; + + float thresh = .001; + float iou_thresh = .5; + float nms = .4; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + int nboxes = 0; + detection *dets = get_network_boxes(net, sized.w, sized.h, thresh, .5, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, nboxes, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < nboxes; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < l.w*l.h*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free(id); + free_image(orig); + free_image(sized); + } +} + + +void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen) +{ + list *options = read_data_cfg(datacfg); + // Ruihao + char *name_list_cfg = option_find_str(options, "names", "data/names.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char name_list[256]; + sprintf(name_list, "%s/%s", env, name_list_cfg); + // Ruihao + char **names = get_labels(name_list); + + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + double time; + char buff[256]; + char *input = buff; + float nms=.45; + while(1){ + printf("fine name is %s\n", filename); + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = letterbox_image(im, net->w, net->h); + //image sized = resize_image(im, net->w, net->h); + //image sized2 = resize_max(im, net->w); + //image sized = crop_image(sized2, -((net->w - sized2.w)/2), -((net->h - sized2.h)/2), net->w, net->h); + //resize_network(net, sized.w, sized.h); + layer l = net->layers[net->n-1]; + + + float *X = sized.data; + time=what_time_is_it_now(); + startCPU(); + network_predict(net, X); + endCPU(); + printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time); + int nboxes = 0; + detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes); + //printf("%d\n", nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes); + free_detections(dets, nboxes); + if(outfile){ + save_image(im, outfile); + } + else{ + save_image(im, "predictions"); +#ifdef OPENCV + make_window("predictions", 512, 512, 0); + show_image(im, "predictions", 0); +#endif + } + + free_image(im); + free_image(sized); + if (filename) break; + } +} + +/* +void censor_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + CvCapture * cap; + + int w = 1280; + int h = 720; + + if(filename){ + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + } + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + cvNamedWindow(base, CV_WINDOW_NORMAL); + cvResizeWindow(base, 512, 512); + float fps = 0; + int i; + float nms = .45; + + while(1){ + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + layer l = net->layers[net->n-1]; + + float *X = in_s.data; + network_predict(net, X); + int nboxes = 0; + detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 0, &nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + + for(i = 0; i < nboxes; ++i){ + if(dets[i].prob[class] > thresh){ + box b = dets[i].bbox; + int left = b.x-b.w/2.; + int top = b.y-b.h/2.; + censor_image(in, left, top, b.w, b.h); + } + } + show_image(in, base); + cvWaitKey(10); + free_detections(dets, nboxes); + + + free_image(in_s); + free_image(in); + + + float curr = 0; + fps = .9*fps + .1*curr; + for(i = 0; i < skip; ++i){ + image in = get_image_from_stream(cap); + free_image(in); + } + } + #endif +} + +void extract_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + CvCapture * cap; + + int w = 1280; + int h = 720; + + if(filename){ + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + } + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + cvNamedWindow(base, CV_WINDOW_NORMAL); + cvResizeWindow(base, 512, 512); + float fps = 0; + int i; + int count = 0; + float nms = .45; + + while(1){ + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + layer l = net->layers[net->n-1]; + + show_image(in, base); + + int nboxes = 0; + float *X = in_s.data; + network_predict(net, X); + detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 1, &nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + + for(i = 0; i < nboxes; ++i){ + if(dets[i].prob[class] > thresh){ + box b = dets[i].bbox; + int size = b.w*in.w > b.h*in.h ? b.w*in.w : b.h*in.h; + int dx = b.x*in.w-size/2.; + int dy = b.y*in.h-size/2.; + image bim = crop_image(in, dx, dy, size, size); + char buff[2048]; + sprintf(buff, "results/extract/%07d", count); + ++count; + save_image(bim, buff); + free_image(bim); + } + } + free_detections(dets, nboxes); + + + free_image(in_s); + free_image(in); + + + float curr = 0; + fps = .9*fps + .1*curr; + for(i = 0; i < skip; ++i){ + image in = get_image_from_stream(cap); + free_image(in); + } + } + #endif +} +*/ + +/* +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets) +{ + network_predict_image(net, im); + layer l = net->layers[net->n-1]; + int nboxes = num_boxes(net); + fill_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 0, dets); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); +} +*/ + +void infer_detector(char *datacfg, char *cfgfile, char *weightfile) +{ + int curr = 0; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *test_list_cfg = option_find_str(options, "valid", "data/valid.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char test_list[256]; + sprintf(test_list, "%s/%s", env, test_list_cfg); + // Ruihao + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + m = net->max_batches * net->batch; + free_list(plist); + + clock_t time; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = classes; + args.n = net->batch; + args.m = 0; + args.labels = 0; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(curr = net->batch; curr <= m; curr += net->batch){ + // while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + if(curr < m){ + args.paths = paths + curr; + if (curr + net->batch > m) args.n = m - curr; + load_thread = load_data_in_thread(args); + } + fprintf(stderr, "Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + + fprintf(stderr, "%lf seconds, %d images, %d total\n", sec(clock()-time), val.X.rows, curr); + free_matrix(pred); + free_data(val); + } +} + +void run_detector(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .5); + float hier_thresh = find_float_arg(argc, argv, "-hier", .5); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + int avg = find_int_arg(argc, argv, "-avg", 3); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + char *outfile = find_char_arg(argc, argv, "-out", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int clear = find_arg(argc, argv, "-clear"); + int fullscreen = find_arg(argc, argv, "-fullscreen"); + int width = find_int_arg(argc, argv, "-w", 0); + int height = find_int_arg(argc, argv, "-h", 0); + int fps = find_int_arg(argc, argv, "-fps", 0); + //int class = find_int_arg(argc, argv, "-class", 0); + + char *datacfg = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen); + else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile); + // Ruihao + else if(0==strcmp(argv[2], "infer")) infer_detector(datacfg, cfg, weights); + // Ruihao + else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile); + else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) { + list *options = read_data_cfg(datacfg); + int classes = option_find_int(options, "classes", 20); + char *name_list = option_find_str(options, "names", "data/names.list"); + char **names = get_labels(name_list); + demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen); + } + //else if(0==strcmp(argv[2], "extract")) extract_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); + //else if(0==strcmp(argv[2], "censor")) censor_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); +} diff --git a/workloads/realworld/standard/darknet/examples/detector.py b/workloads/realworld/standard/darknet/examples/detector.py new file mode 100644 index 0000000000000000000000000000000000000000..40bb365e68211c513db9d63847ac95070f5eab98 --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/detector.py @@ -0,0 +1,27 @@ +# Stupid python path shit. +# Instead just add darknet.py to somewhere in your python path +# OK actually that might not be a great idea, idk, work in progress +# Use at your own risk. or don't, i don't care + +import sys, os +sys.path.append(os.path.join(os.getcwd(),'python/')) + +import darknet as dn +import pdb + +dn.set_gpu(0) +net = dn.load_net("cfg/yolo-thor.cfg", "/home/pjreddie/backup/yolo-thor_final.weights", 0) +meta = dn.load_meta("cfg/thor.data") +r = dn.detect(net, meta, "data/bedroom.jpg") +print r + +# And then down here you could detect a lot more images like: +r = dn.detect(net, meta, "data/eagle.jpg") +print r +r = dn.detect(net, meta, "data/giraffe.jpg") +print r +r = dn.detect(net, meta, "data/horses.jpg") +print r +r = dn.detect(net, meta, "data/person.jpg") +print r + diff --git a/workloads/realworld/standard/darknet/examples/dice.c b/workloads/realworld/standard/darknet/examples/dice.c new file mode 100644 index 0000000000000000000000000000000000000000..f56d76c0bb66c7f630ba1c4d1dc9195398b87cfb --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/dice.c @@ -0,0 +1,116 @@ +#include "darknet.h" + +char *dice_labels[] = {"face1","face2","face3","face4","face5","face6"}; + +void train_dice(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + int i = *net.seen/imgs; + char **labels = dice_labels; + list *plist = get_paths("data/dice/dice.train.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + while(1){ + ++i; + time=clock(); + data train = load_data_old(paths, imgs, plist->size, labels, 6, net.w, net.h); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); + free_data(train); + if((i % 100) == 0) net.learning_rate *= .1; + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, i); + save_weights(net, buff); + } + } +} + +void validate_dice(char *filename, char *weightfile) +{ + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + + char **labels = dice_labels; + list *plist = get_paths("data/dice/dice.val.list"); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + data val = load_data_old(paths, m, 0, labels, 6, net.w, net.h); + float *acc = network_accuracies(net, val, 2); + printf("Validation Accuracy: %f, %d images\n", acc[0], m); + free_data(val); +} + +void test_dice(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + int i = 0; + char **names = dice_labels; + char buff[256]; + char *input = buff; + int indexes[6]; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, net.w, net.h); + float *X = im.data; + float *predictions = network_predict(net, X); + top_predictions(net, 6, indexes); + for(i = 0; i < 6; ++i){ + int index = indexes[i]; + printf("%s: %f\n", names[index], predictions[index]); + } + free_image(im); + if (filename) break; + } +} + +void run_dice(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "test")) test_dice(cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_dice(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_dice(cfg, weights); +} + diff --git a/workloads/realworld/standard/darknet/examples/go.c b/workloads/realworld/standard/darknet/examples/go.c new file mode 100644 index 0000000000000000000000000000000000000000..688579dcb3a3e35e9a79b8fb8aa684f28f44290d --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/go.c @@ -0,0 +1,1370 @@ +#include "darknet.h" + +#include +#include +#include + +int inverted = 1; +int noi = 1; +static const int nind = 10; +int legal_go(float *b, float *ko, int p, int r, int c); +int check_ko(float *x, float *ko); + +typedef struct { + char **data; + int n; +} moves; + +char *fgetgo(FILE *fp) +{ + if(feof(fp)) return 0; + size_t size = 96; + char *line = malloc(size*sizeof(char)); + if(size != fread(line, sizeof(char), size, fp)){ + free(line); + return 0; + } + + return line; +} + +moves load_go_moves(char *filename) +{ + moves m; + m.n = 128; + m.data = calloc(128, sizeof(char*)); + FILE *fp = fopen(filename, "rb"); + int count = 0; + char *line = 0; + while ((line = fgetgo(fp))) { + if (count >= m.n) { + m.n *= 2; + m.data = realloc(m.data, m.n*sizeof(char*)); + } + m.data[count] = line; + ++count; + } + printf("%d\n", count); + m.n = count; + m.data = realloc(m.data, count*sizeof(char*)); + return m; +} + +void string_to_board(char *s, float *board) +{ + int i, j; + memset(board, 0, 2*19*19*sizeof(float)); + int count = 0; + for(i = 0; i < 91; ++i){ + char c = s[i]; + for(j = 0; j < 4; ++j){ + int me = (c >> (2*j)) & 1; + int you = (c >> (2*j + 1)) & 1; + if (me) board[count] = 1; + else if (you) board[count + 19*19] = 1; + ++count; + if(count >= 19*19) break; + } + } +} + +void board_to_string(char *s, float *board) +{ + int i, j; + memset(s, 0, (19*19/4+1)*sizeof(char)); + int count = 0; + for(i = 0; i < 91; ++i){ + for(j = 0; j < 4; ++j){ + int me = (board[count] == 1); + int you = (board[count + 19*19] == 1); + if (me) s[i] = s[i] | (1<<(2*j)); + if (you) s[i] = s[i] | (1<<(2*j + 1)); + ++count; + if(count >= 19*19) break; + } + } +} + +static int occupied(float *b, int i) +{ + if (b[i]) return 1; + if (b[i+19*19]) return -1; + return 0; +} + +data random_go_moves(moves m, int n) +{ + data d = {0}; + d.X = make_matrix(n, 19*19*3); + d.y = make_matrix(n, 19*19+2); + int i, j; + for(i = 0; i < n; ++i){ + float *board = d.X.vals[i]; + float *label = d.y.vals[i]; + char *b = m.data[rand()%m.n]; + int player = b[0] - '0'; + int result = b[1] - '0'; + int row = b[2]; + int col = b[3]; + string_to_board(b+4, board); + if(player > 0) for(j = 0; j < 19*19; ++j) board[19*19*2 + j] = 1; + label[19*19+1] = (player==result); + if(row >= 19 || col >= 19){ + label[19*19] = 1; + } else { + label[col + 19*row] = 1; + if(occupied(board, col + 19*row)) printf("hey\n"); + } + + int flip = rand()%2; + int rotate = rand()%4; + image in = float_to_image(19, 19, 3, board); + image out = float_to_image(19, 19, 1, label); + if(flip){ + flip_image(in); + flip_image(out); + } + rotate_image_cw(in, rotate); + rotate_image_cw(out, rotate); + } + return d; +} + + +void train_go(char *cfgfile, char *weightfile, char *filename, int *gpus, int ngpus, int clear) +{ + int i; + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + network *net = nets[0]; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + + char buff[256]; + moves m = load_go_moves(filename); + //moves m = load_go_moves("games.txt"); + + int N = m.n; + printf("Moves: %d\n", N); + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time=what_time_is_it_now(); + + data train = random_go_moves(m, net->batch*net->subdivisions*ngpus); + printf("Loaded: %lf seconds\n", what_time_is_it_now() - time); + time=what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 10); + } +#else + loss = train_network(net, train); +#endif + free_data(train); + + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory,base, epoch); + save_weights(net, buff); + + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + if(get_current_batch(net)%10000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_%ld.backup",backup_directory,base,get_current_batch(net)); + save_weights(net, buff); + } + } + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free(base); +} + +static void propagate_liberty(float *board, int *lib, int *visited, int row, int col, int side) +{ + if (row < 0 || row > 18 || col < 0 || col > 18) return; + int index = row*19 + col; + if (occupied(board,index) != side) return; + if (visited[index]) return; + visited[index] = 1; + lib[index] += 1; + propagate_liberty(board, lib, visited, row+1, col, side); + propagate_liberty(board, lib, visited, row-1, col, side); + propagate_liberty(board, lib, visited, row, col+1, side); + propagate_liberty(board, lib, visited, row, col-1, side); +} + + +static int *calculate_liberties(float *board) +{ + int *lib = calloc(19*19, sizeof(int)); + int visited[19*19]; + int i, j; + for(j = 0; j < 19; ++j){ + for(i = 0; i < 19; ++i){ + memset(visited, 0, 19*19*sizeof(int)); + int index = j*19 + i; + if(!occupied(board,index)){ + if ((i > 0) && occupied(board,index - 1)) propagate_liberty(board, lib, visited, j, i-1, occupied(board,index-1)); + if ((i < 18) && occupied(board,index + 1)) propagate_liberty(board, lib, visited, j, i+1, occupied(board,index+1)); + if ((j > 0) && occupied(board,index - 19)) propagate_liberty(board, lib, visited, j-1, i, occupied(board,index-19)); + if ((j < 18) && occupied(board,index + 19)) propagate_liberty(board, lib, visited, j+1, i, occupied(board,index+19)); + } + } + } + return lib; +} + +void print_board(FILE *stream, float *board, int player, int *indexes) +{ + int i,j,n; + fprintf(stream, " "); + for(i = 0; i < 19; ++i){ + fprintf(stream, "%c ", 'A' + i + 1*(i > 7 && noi)); + } + fprintf(stream, "\n"); + for(j = 0; j < 19; ++j){ + fprintf(stream, "%2d", (inverted) ? 19-j : j+1); + for(i = 0; i < 19; ++i){ + int index = j*19 + i; + if(indexes){ + int found = 0; + for(n = 0; n < nind; ++n){ + if(index == indexes[n]){ + found = 1; + /* + if(n == 0) fprintf(stream, "\uff11"); + else if(n == 1) fprintf(stream, "\uff12"); + else if(n == 2) fprintf(stream, "\uff13"); + else if(n == 3) fprintf(stream, "\uff14"); + else if(n == 4) fprintf(stream, "\uff15"); + */ + fprintf(stream, " %d", n+1); + } + } + if(found) continue; + } + //if(board[index]*-swap > 0) fprintf(stream, "\u25C9 "); + //else if(board[index]*-swap < 0) fprintf(stream, "\u25EF "); + if (occupied(board, index) == player) fprintf(stream, " X"); + else if (occupied(board, index) ==-player) fprintf(stream, " O"); + else fprintf(stream, " ."); + } + fprintf(stream, "\n"); + } +} + +void flip_board(float *board) +{ + int i; + for(i = 0; i < 19*19; ++i){ + float swap = board[i]; + board[i] = board[i+19*19]; + board[i+19*19] = swap; + board[i+19*19*2] = 1-board[i+19*19*2]; + } +} + +float predict_move2(network *net, float *board, float *move, int multi) +{ + float *output = network_predict(net, board); + copy_cpu(19*19+1, output, 1, move, 1); + float result = output[19*19 + 1]; + int i; + if(multi){ + image bim = float_to_image(19, 19, 3, board); + for(i = 1; i < 8; ++i){ + rotate_image_cw(bim, i); + if(i >= 4) flip_image(bim); + + float *output = network_predict(net, board); + image oim = float_to_image(19, 19, 1, output); + result += output[19*19 + 1]; + + if(i >= 4) flip_image(oim); + rotate_image_cw(oim, -i); + + axpy_cpu(19*19+1, 1, output, 1, move, 1); + + if(i >= 4) flip_image(bim); + rotate_image_cw(bim, -i); + } + result = result/8; + scal_cpu(19*19+1, 1./8., move, 1); + } + for(i = 0; i < 19*19; ++i){ + if(board[i] || board[i+19*19]) move[i] = 0; + } + return result; +} + +static void remove_connected(float *b, int *lib, int p, int r, int c) +{ + if (r < 0 || r >= 19 || c < 0 || c >= 19) return; + if (occupied(b, r*19 + c) != p) return; + if (lib[r*19 + c] != 1) return; + b[r*19 + c] = 0; + b[19*19 + r*19 + c] = 0; + remove_connected(b, lib, p, r+1, c); + remove_connected(b, lib, p, r-1, c); + remove_connected(b, lib, p, r, c+1); + remove_connected(b, lib, p, r, c-1); +} + + +void move_go(float *b, int p, int r, int c) +{ + int *l = calculate_liberties(b); + if(p > 0) b[r*19 + c] = 1; + else b[19*19 + r*19 + c] = 1; + remove_connected(b, l, -p, r+1, c); + remove_connected(b, l, -p, r-1, c); + remove_connected(b, l, -p, r, c+1); + remove_connected(b, l, -p, r, c-1); + free(l); +} + +int compare_board(float *a, float *b) +{ + if(memcmp(a, b, 19*19*3*sizeof(float)) == 0) return 1; + return 0; +} + +typedef struct mcts_tree{ + float *board; + struct mcts_tree **children; + float *prior; + int *visit_count; + float *value; + float *mean; + float *prob; + int total_count; + float result; + int done; + int pass; +} mcts_tree; + +void free_mcts(mcts_tree *root) +{ + if(!root) return; + int i; + free(root->board); + for(i = 0; i < 19*19+1; ++i){ + if(root->children[i]) free_mcts(root->children[i]); + } + free(root->children); + free(root->prior); + free(root->visit_count); + free(root->value); + free(root->mean); + free(root->prob); + free(root); +} + +float *network_predict_rotations(network *net, float *next) +{ + int n = net->batch; + float *in = calloc(19*19*3*n, sizeof(float)); + image im = float_to_image(19, 19, 3, next); + int i,j; + int *inds = random_index_order(0, 8); + for(j = 0; j < n; ++j){ + i = inds[j]; + rotate_image_cw(im, i); + if(i >= 4) flip_image(im); + memcpy(in + 19*19*3*j, im.data, 19*19*3*sizeof(float)); + if(i >= 4) flip_image(im); + rotate_image_cw(im, -i); + } + float *pred = network_predict(net, in); + for(j = 0; j < n; ++j){ + i = inds[j]; + image im = float_to_image(19, 19, 1, pred + j*(19*19 + 2)); + if(i >= 4) flip_image(im); + rotate_image_cw(im, -i); + if(j > 0){ + axpy_cpu(19*19+2, 1, im.data, 1, pred, 1); + } + } + free(in); + free(inds); + scal_cpu(19*19+2, 1./n, pred, 1); + return pred; +} + +mcts_tree *expand(float *next, float *ko, network *net) +{ + mcts_tree *root = calloc(1, sizeof(mcts_tree)); + root->board = next; + root->children = calloc(19*19+1, sizeof(mcts_tree*)); + root->prior = calloc(19*19 + 1, sizeof(float)); + root->prob = calloc(19*19 + 1, sizeof(float)); + root->mean = calloc(19*19 + 1, sizeof(float)); + root->value = calloc(19*19 + 1, sizeof(float)); + root->visit_count = calloc(19*19 + 1, sizeof(int)); + root->total_count = 1; + int i; + float *pred = network_predict_rotations(net, next); + copy_cpu(19*19+1, pred, 1, root->prior, 1); + float val = 2*pred[19*19 + 1] - 1; + root->result = val; + for(i = 0; i < 19*19+1; ++i) { + root->visit_count[i] = 0; + root->value[i] = 0; + root->mean[i] = val; + if(i < 19*19 && occupied(next, i)){ + root->value[i] = -1; + root->mean[i] = -1; + root->prior[i] = 0; + } + } + //print_board(stderr, next, flip?-1:1, 0); + return root; +} + +float *copy_board(float *board) +{ + float *next = calloc(19*19*3, sizeof(float)); + copy_cpu(19*19*3, board, 1, next, 1); + return next; +} + +float select_mcts(mcts_tree *root, network *net, float *prev, float cpuct) +{ + if(root->done) return -root->result; + int i; + float max = -1000; + int max_i = 0; + for(i = 0; i < 19*19+1; ++i){ + root->prob[i] = root->mean[i] + cpuct*root->prior[i] * sqrt(root->total_count) / (1. + root->visit_count[i]); + if(root->prob[i] > max){ + max = root->prob[i]; + max_i = i; + } + } + float val; + i = max_i; + root->visit_count[i]++; + root->total_count++; + if (root->children[i]) { + val = select_mcts(root->children[i], net, root->board, cpuct); + } else { + if(max_i < 19*19 && !legal_go(root->board, prev, 1, max_i/19, max_i%19)) { + root->mean[i] = -1; + root->value[i] = -1; + root->prior[i] = 0; + --root->total_count; + return select_mcts(root, net, prev, cpuct); + //printf("Detected ko\n"); + //getchar(); + } else { + float *next = copy_board(root->board); + if (max_i < 19*19) { + move_go(next, 1, max_i / 19, max_i % 19); + } + flip_board(next); + root->children[i] = expand(next, root->board, net); + val = -root->children[i]->result; + if(max_i == 19*19){ + root->children[i]->pass = 1; + if (root->pass){ + root->children[i]->done = 1; + } + } + } + } + root->value[i] += val; + root->mean[i] = root->value[i]/root->visit_count[i]; + return -val; +} + +mcts_tree *run_mcts(mcts_tree *tree, network *net, float *board, float *ko, int player, int n, float cpuct, float secs) +{ + int i; + double t = what_time_is_it_now(); + if(player < 0) flip_board(board); + if(!tree) tree = expand(copy_board(board), ko, net); + assert(compare_board(tree->board, board)); + for(i = 0; i < n; ++i){ + if (secs > 0 && (what_time_is_it_now() - t) > secs) break; + int max_i = max_int_index(tree->visit_count, 19*19+1); + if (tree->visit_count[max_i] >= n) break; + select_mcts(tree, net, ko, cpuct); + } + if(player < 0) flip_board(board); + //fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + return tree; +} + +mcts_tree *move_mcts(mcts_tree *tree, int index) +{ + if(index < 0 || index > 19*19 || !tree || !tree->children[index]) { + free_mcts(tree); + tree = 0; + } else { + mcts_tree *swap = tree; + tree = tree->children[index]; + swap->children[index] = 0; + free_mcts(swap); + } + return tree; +} + +typedef struct { + float value; + float mcts; + int row; + int col; +} move; + +move pick_move(mcts_tree *tree, float temp, int player) +{ + int i; + float probs[19*19+1] = {0}; + move m = {0}; + double sum = 0; + /* + for(i = 0; i < 19*19+1; ++i){ + probs[i] = tree->visit_count[i]; + } + */ + //softmax(probs, 19*19+1, temp, 1, probs); + for(i = 0; i < 19*19+1; ++i){ + sum += pow(tree->visit_count[i], 1./temp); + } + for(i = 0; i < 19*19+1; ++i){ + probs[i] = pow(tree->visit_count[i], 1./temp) / sum; + } + + int index = sample_array(probs, 19*19+1); + m.row = index / 19; + m.col = index % 19; + m.value = (tree->result+1.)/2.; + m.mcts = (tree->mean[index]+1.)/2.; + + int indexes[nind]; + top_k(probs, 19*19+1, nind, indexes); + print_board(stderr, tree->board, player, indexes); + + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", index/19, index%19, tree->result, tree->prior[index], probs[index], tree->mean[index], (tree->children[index])?tree->children[index]->result:0, tree->visit_count[index]); + int ind = max_index(probs, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, tree->result, tree->prior[ind], probs[ind], tree->mean[ind], (tree->children[ind])?tree->children[ind]->result:0, tree->visit_count[ind]); + ind = max_index(tree->prior, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, tree->result, tree->prior[ind], probs[ind], tree->mean[ind], (tree->children[ind])?tree->children[ind]->result:0, tree->visit_count[ind]); + return m; +} + +/* + float predict_move(network *net, float *board, float *move, int multi, float *ko, float temp) + { + + int i; + + int max_v = 0; + int max_i = 0; + for(i = 0; i < 19*19+1; ++i){ + if(root->visit_count[i] > max_v){ + max_v = root->visit_count[i]; + max_i = i; + } + } + fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + int ind = max_index(root->mean, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", max_i/19, max_i%19, root->result, root->prior[max_i], root->prob[max_i], root->mean[max_i], (root->children[max_i])?root->children[max_i]->result:0, root->visit_count[max_i]); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, root->result, root->prior[ind], root->prob[ind], root->mean[ind], (root->children[ind])?root->children[ind]->result:0, root->visit_count[ind]); + ind = max_index(root->prior, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, root->result, root->prior[ind], root->prob[ind], root->mean[ind], (root->children[ind])?root->children[ind]->result:0, root->visit_count[ind]); + if(root->result < -.9 && root->mean[max_i] < -.9) return -1000.f; + + float val = root->result; + free_mcts(root); + return val; + } + */ + +static int makes_safe_go(float *b, int *lib, int p, int r, int c){ + if (r < 0 || r >= 19 || c < 0 || c >= 19) return 0; + if (occupied(b,r*19 + c) == -p){ + if (lib[r*19 + c] > 1) return 0; + else return 1; + } + if (!occupied(b,r*19 + c)) return 1; + if (lib[r*19 + c] > 1) return 1; + return 0; +} + +int suicide_go(float *b, int p, int r, int c) +{ + int *l = calculate_liberties(b); + int safe = 0; + safe = safe || makes_safe_go(b, l, p, r+1, c); + safe = safe || makes_safe_go(b, l, p, r-1, c); + safe = safe || makes_safe_go(b, l, p, r, c+1); + safe = safe || makes_safe_go(b, l, p, r, c-1); + free(l); + return !safe; +} + +int check_ko(float *x, float *ko) +{ + if(!ko) return 0; + float curr[19*19*3]; + copy_cpu(19*19*3, x, 1, curr, 1); + if(curr[19*19*2] != ko[19*19*2]) flip_board(curr); + if(compare_board(curr, ko)) return 1; + return 0; +} + +int legal_go(float *b, float *ko, int p, int r, int c) +{ + if (occupied(b, r*19+c)) return 0; + float curr[19*19*3]; + copy_cpu(19*19*3, b, 1, curr, 1); + move_go(curr, p, r, c); + if(check_ko(curr, ko)) return 0; + if(suicide_go(b, p, r, c)) return 0; + return 1; +} + +/* + move generate_move(mcts_tree *root, network *net, int player, float *board, int multi, float temp, float *ko, int print) + { + move m = {0}; +//root = run_mcts(tree, network *net, float *board, float *ko, int n, float cpuct) +int i, j; +int empty = 1; +for(i = 0; i < 19*19; ++i){ +if (occupied(board, i)) { +empty = 0; +break; +} +} +if(empty) { +m.value = .5; +m.mcts = .5; +m.row = 3; +m.col = 15; +return m; +} + +float move[362]; +if (player < 0) flip_board(board); +float result = predict_move(net, board, move, multi, ko, temp); +if (player < 0) flip_board(board); +if(result == -1000.f) return -2; + +for(i = 0; i < 19; ++i){ +for(j = 0; j < 19; ++j){ +if (!legal_go(board, ko, player, i, j)) move[i*19 + j] = 0; +} +} + +int indexes[nind]; +top_k(move, 19*19+1, nind, indexes); + + +int max = max_index(move, 19*19+1); +int row = max / 19; +int col = max % 19; +int index = sample_array(move, 19*19+1); + +if(print){ +top_k(move, 19*19+1, nind, indexes); +for(i = 0; i < nind; ++i){ +if (!move[indexes[i]]) indexes[i] = -1; +} +print_board(stderr, board, 1, indexes); +fprintf(stderr, "%s To Move\n", player > 0 ? "X" : "O"); +fprintf(stderr, "%.2f%% Win Chance\n", (result+1)/2*100); +for(i = 0; i < nind; ++i){ +int index = indexes[i]; +int row = index / 19; +int col = index % 19; +if(row == 19){ +fprintf(stderr, "%d: Pass, %.2f%%\n", i+1, move[index]*100); +} else { +fprintf(stderr, "%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100); +} +} +} +if (row == 19) return -1; + +if (suicide_go(board, player, row, col)){ +return -1; +} + +if (suicide_go(board, player, index/19, index%19)){ +index = max; +} +if (index == 19*19) return -1; +return index; +} +*/ + +void valid_go(char *cfgfile, char *weightfile, int multi, char *filename) +{ + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + float *board = calloc(19*19*3, sizeof(float)); + float *move = calloc(19*19+2, sizeof(float)); + // moves m = load_go_moves("/home/pjreddie/backup/go.test"); + moves m = load_go_moves(filename); + + int N = m.n; + int i,j; + int correct = 0; + for (i = 0; i 0) for(j = 0; j < 19*19; ++j) board[19*19*2 + j] = 1; + predict_move2(net, board, move, multi); + int index = max_index(move, 19*19+1); + if(index == truth) ++correct; + printf("%d Accuracy %f\n", i, (float) correct/(i+1)); + } +} + +int print_game(float *board, FILE *fp) +{ + int i, j; + int count = 3; + fprintf(fp, "komi 6.5\n"); + fprintf(fp, "boardsize 19\n"); + fprintf(fp, "clear_board\n"); + for(j = 0; j < 19; ++j){ + for(i = 0; i < 19; ++i){ + if(occupied(board,j*19 + i) == 1) fprintf(fp, "play black %c%d\n", 'A'+i+(i>=8), 19-j); + if(occupied(board,j*19 + i) == -1) fprintf(fp, "play white %c%d\n", 'A'+i+(i>=8), 19-j); + if(occupied(board,j*19 + i)) ++count; + } + } + return count; +} + + +int stdin_ready() +{ + fd_set readfds; + FD_ZERO(&readfds); + + struct timeval timeout; + timeout.tv_sec = 0; + timeout.tv_usec = 0; + FD_SET(STDIN_FILENO, &readfds); + + if (select(1, &readfds, NULL, NULL, &timeout)){ + return 1; + } + return 0; +} + +mcts_tree *ponder(mcts_tree *tree, network *net, float *b, float *ko, int player, float cpuct) +{ + double t = what_time_is_it_now(); + int count = 0; + if (tree) count = tree->total_count; + while(!stdin_ready()){ + if (what_time_is_it_now() - t > 120) break; + tree = run_mcts(tree, net, b, ko, player, 100000, cpuct, .1); + } + fprintf(stderr, "Pondered %d moves...\n", tree->total_count - count); + return tree; +} + +void engine_go(char *filename, char *weightfile, int mcts_iters, float secs, float temp, float cpuct, int anon, int resign) +{ + mcts_tree *root = 0; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *one = calloc(19*19*3, sizeof(float)); + float *two = calloc(19*19*3, sizeof(float)); + int ponder_player = 0; + int passed = 0; + int move_num = 0; + int main_time = 0; + int byo_yomi_time = 0; + int byo_yomi_stones = 0; + int black_time_left = 0; + int black_stones_left = 0; + int white_time_left = 0; + int white_stones_left = 0; + float orig_time = secs; + int old_ponder = 0; + while(1){ + if(ponder_player){ + root = ponder(root, net, board, two, ponder_player, cpuct); + } + old_ponder = ponder_player; + ponder_player = 0; + char buff[256]; + int id = 0; + int has_id = (scanf("%d", &id) == 1); + scanf("%s", buff); + if (feof(stdin)) break; + fprintf(stderr, "%s\n", buff); + char ids[256]; + sprintf(ids, "%d", id); + //fprintf(stderr, "%s\n", buff); + if (!has_id) ids[0] = 0; + if (!strcmp(buff, "protocol_version")){ + printf("=%s 2\n\n", ids); + } else if (!strcmp(buff, "name")){ + if(anon){ + printf("=%s The Fool!\n\n", ids); + }else{ + printf("=%s DarkGo\n\n", ids); + } + } else if (!strcmp(buff, "time_settings")){ + ponder_player = old_ponder; + scanf("%d %d %d", &main_time, &byo_yomi_time, &byo_yomi_stones); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "time_left")){ + ponder_player = old_ponder; + char color[256]; + int time = 0, stones = 0; + scanf("%s %d %d", color, &time, &stones); + if (color[0] == 'b' || color[0] == 'B'){ + black_time_left = time; + black_stones_left = stones; + } else { + white_time_left = time; + white_stones_left = stones; + } + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "version")){ + if(anon){ + printf("=%s :-DDDD\n\n", ids); + }else { + printf("=%s 1.0. Want more DarkGo? You can find me on OGS, unlimited games, no waiting! https://online-go.com/user/view/434218\n\n", ids); + } + } else if (!strcmp(buff, "known_command")){ + char comm[256]; + scanf("%s", comm); + int known = (!strcmp(comm, "protocol_version") || + !strcmp(comm, "name") || + !strcmp(comm, "version") || + !strcmp(comm, "known_command") || + !strcmp(comm, "list_commands") || + !strcmp(comm, "quit") || + !strcmp(comm, "boardsize") || + !strcmp(comm, "clear_board") || + !strcmp(comm, "komi") || + !strcmp(comm, "final_status_list") || + !strcmp(comm, "play") || + !strcmp(comm, "genmove_white") || + !strcmp(comm, "genmove_black") || + !strcmp(comm, "fixed_handicap") || + !strcmp(comm, "genmove")); + if(known) printf("=%s true\n\n", ids); + else printf("=%s false\n\n", ids); + } else if (!strcmp(buff, "list_commands")){ + printf("=%s protocol_version\nshowboard\nname\nversion\nknown_command\nlist_commands\nquit\nboardsize\nclear_board\nkomi\nplay\ngenmove_black\ngenmove_white\ngenmove\nfinal_status_list\nfixed_handicap\n\n", ids); + } else if (!strcmp(buff, "quit")){ + break; + } else if (!strcmp(buff, "boardsize")){ + int boardsize = 0; + scanf("%d", &boardsize); + //fprintf(stderr, "%d\n", boardsize); + if(boardsize != 19){ + printf("?%s unacceptable size\n\n", ids); + } else { + root = move_mcts(root, -1); + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + move_num = 0; + printf("=%s \n\n", ids); + } + } else if (!strcmp(buff, "fixed_handicap")){ + int handicap = 0; + scanf("%d", &handicap); + int indexes[] = {72, 288, 300, 60, 180, 174, 186, 66, 294}; + int i; + for(i = 0; i < handicap; ++i){ + board[indexes[i]] = 1; + ++move_num; + } + root = move_mcts(root, -1); + } else if (!strcmp(buff, "clear_board")){ + passed = 0; + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + move_num = 0; + root = move_mcts(root, -1); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "komi")){ + float komi = 0; + scanf("%f", &komi); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "showboard")){ + printf("=%s \n", ids); + print_board(stdout, board, 1, 0); + printf("\n"); + } else if (!strcmp(buff, "play") || !strcmp(buff, "black") || !strcmp(buff, "white")){ + ++move_num; + char color[256]; + if(!strcmp(buff, "play")) + { + scanf("%s ", color); + } else { + scanf(" "); + color[0] = buff[0]; + } + char c; + int r; + int count = scanf("%c%d", &c, &r); + int player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1; + if((c == 'p' || c == 'P') && count < 2) { + passed = 1; + printf("=%s \n\n", ids); + char *line = fgetl(stdin); + free(line); + fflush(stdout); + fflush(stderr); + root = move_mcts(root, 19*19); + continue; + } else { + passed = 0; + } + if(c >= 'A' && c <= 'Z') c = c - 'A'; + if(c >= 'a' && c <= 'z') c = c - 'a'; + if(c >= 8) --c; + r = 19 - r; + fprintf(stderr, "move: %d %d\n", r, c); + + float *swap = two; + two = one; + one = swap; + move_go(board, player, r, c); + copy_cpu(19*19*3, board, 1, one, 1); + if(root) fprintf(stderr, "Prior: %f\n", root->prior[r*19 + c]); + if(root) fprintf(stderr, "Mean: %f\n", root->mean[r*19 + c]); + if(root) fprintf(stderr, "Result: %f\n", root->result); + root = move_mcts(root, r*19 + c); + if(root) fprintf(stderr, "Visited: %d\n", root->total_count); + else fprintf(stderr, "NOT VISITED\n"); + + printf("=%s \n\n", ids); + //print_board(stderr, board, 1, 0); + } else if (!strcmp(buff, "genmove") || !strcmp(buff, "genmove_black") || !strcmp(buff, "genmove_white")){ + ++move_num; + int player = 0; + if(!strcmp(buff, "genmove")){ + char color[256]; + scanf("%s", color); + player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1; + } else if (!strcmp(buff, "genmove_black")){ + player = 1; + } else { + player = -1; + } + if(player > 0){ + if(black_time_left <= 30) secs = 2.5; + else secs = orig_time; + } else { + if(white_time_left <= 30) secs = 2.5; + else secs = orig_time; + } + ponder_player = -player; + + //tree = generate_move(net, player, board, multi, .1, two, 1); + double t = what_time_is_it_now(); + root = run_mcts(root, net, board, two, player, mcts_iters, cpuct, secs); + fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + move m = pick_move(root, temp, player); + root = move_mcts(root, m.row*19 + m.col); + + + if(move_num > resign && m.value < .1 && m.mcts < .1){ + printf("=%s resign\n\n", ids); + } else if(m.row == 19){ + printf("=%s pass\n\n", ids); + passed = 0; + } else { + int row = m.row; + int col = m.col; + + float *swap = two; + two = one; + one = swap; + + move_go(board, player, row, col); + copy_cpu(19*19*3, board, 1, one, 1); + row = 19 - row; + if (col >= 8) ++col; + printf("=%s %c%d\n\n", ids, 'A' + col, row); + } + + } else if (!strcmp(buff, "p")){ + //print_board(board, 1, 0); + } else if (!strcmp(buff, "final_status_list")){ + char type[256]; + scanf("%s", type); + fprintf(stderr, "final_status\n"); + char *line = fgetl(stdin); + free(line); + if(type[0] == 'd' || type[0] == 'D'){ + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "%s final_status_list dead\n", ids); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + while((l = fgetl(p))){ + printf("%s\n", l); + free(l); + } + } else { + printf("?%s unknown command\n\n", ids); + } + } else if (!strcmp(buff, "kgs-genmove_cleanup")){ + char type[256]; + scanf("%s", type); + fprintf(stderr, "kgs-genmove_cleanup\n"); + char *line = fgetl(stdin); + free(line); + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "%s kgs-genmove_cleanup %s\n", ids, type); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + while((l = fgetl(p))){ + printf("%s\n", l); + free(l); + } + } else { + char *line = fgetl(stdin); + free(line); + printf("?%s unknown command\n\n", ids); + } + fflush(stdout); + fflush(stderr); + } + printf("%d %d %d\n",passed, black_stones_left, white_stones_left); +} + +void test_go(char *cfg, char *weights, int multi) +{ + int i; + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(time(0)); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *move = calloc(19*19+1, sizeof(float)); + int color = 1; + while(1){ + float result = predict_move2(net, board, move, multi); + printf("%.2f%% Win Chance\n", (result+1)/2*100); + + int indexes[nind]; + int row, col; + top_k(move, 19*19+1, nind, indexes); + print_board(stderr, board, color, indexes); + for(i = 0; i < nind; ++i){ + int index = indexes[i]; + row = index / 19; + col = index % 19; + if(row == 19){ + printf("%d: Pass, %.2f%%\n", i+1, move[index]*100); + } else { + printf("%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100); + } + } + //if(color == 1) printf("\u25EF Enter move: "); + //else printf("\u25C9 Enter move: "); + if(color == 1) printf("X Enter move: "); + else printf("O Enter move: "); + + char c; + char *line = fgetl(stdin); + int picked = 1; + int dnum = sscanf(line, "%d", &picked); + int cnum = sscanf(line, "%c", &c); + if (strlen(line) == 0 || dnum) { + --picked; + if (picked < nind){ + int index = indexes[picked]; + row = index / 19; + col = index % 19; + if(row < 19){ + move_go(board, 1, row, col); + } + } + } else if (cnum){ + if (c <= 'T' && c >= 'A'){ + int num = sscanf(line, "%c %d", &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 2) move_go(board, 1, row, col); + } else if (c == 'p') { + // Pass + } else if(c=='b' || c == 'w'){ + char g; + int num = sscanf(line, "%c %c %d", &g, &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 3) { + int mc = (g == 'b') ? 1 : -1; + if (mc == color) { + board[row*19 + col] = 1; + } else { + board[19*19 + row*19 + col] = 1; + } + } + } else if(c == 'c'){ + char g; + int num = sscanf(line, "%c %c %d", &g, &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 3) { + board[row*19 + col] = 0; + board[19*19 + row*19 + col] = 0; + } + } + } + free(line); + flip_board(board); + color = -color; + } +} + +float score_game(float *board) +{ + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "final_score\n"); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + float score = 0; + char player = 0; + while((l = fgetl(p))){ + fprintf(stderr, "%s \t", l); + int n = sscanf(l, "= %c+%f", &player, &score); + free(l); + if (n == 2) break; + } + if(player == 'W') score = -score; + pclose(p); + return score; +} + +void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi) +{ + mcts_tree *tree1 = 0; + mcts_tree *tree2 = 0; + network *net = load_network(filename, weightfile, 0); + //set_batch_network(net, 1); + + network *net2; + if (f2) { + net2 = parse_network_cfg(f2); + if(w2){ + load_weights(net2, w2); + } + } else { + net2 = calloc(1, sizeof(network)); + *net2 = *net; + } + srand(time(0)); + char boards[600][93]; + int count = 0; + //set_batch_network(net, 1); + //set_batch_network(net2, 1); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *one = calloc(19*19*3, sizeof(float)); + float *two = calloc(19*19*3, sizeof(float)); + int done = 0; + int player = 1; + int p1 = 0; + int p2 = 0; + int total = 0; + float temp = .1; + int mcts_iters = 500; + float cpuct = 5; + while(1){ + if (done){ + tree1 = move_mcts(tree1, -1); + tree2 = move_mcts(tree2, -1); + float score = score_game(board); + if((score > 0) == (total%2==0)) ++p1; + else ++p2; + ++total; + fprintf(stderr, "Total: %d, Player 1: %f, Player 2: %f\n", total, (float)p1/total, (float)p2/total); + sleep(1); + /* + int i = (score > 0)? 0 : 1; + int j; + for(; i < count; i += 2){ + for(j = 0; j < 93; ++j){ + printf("%c", boards[i][j]); + } + printf("\n"); + } + */ + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + player = 1; + done = 0; + count = 0; + fflush(stdout); + fflush(stderr); + } + //print_board(stderr, board, 1, 0); + //sleep(1); + + if ((total%2==0) == (player==1)){ + //mcts_iters = 4500; + cpuct = 5; + } else { + //mcts_iters = 500; + cpuct = 1; + } + network *use = ((total%2==0) == (player==1)) ? net : net2; + mcts_tree *t = ((total%2==0) == (player==1)) ? tree1 : tree2; + t = run_mcts(t, use, board, two, player, mcts_iters, cpuct, 0); + move m = pick_move(t, temp, player); + if(((total%2==0) == (player==1))) tree1 = t; + else tree2 = t; + + tree1 = move_mcts(tree1, m.row*19 + m.col); + tree2 = move_mcts(tree2, m.row*19 + m.col); + + if(m.row == 19){ + done = 1; + continue; + } + int row = m.row; + int col = m.col; + + float *swap = two; + two = one; + one = swap; + + if(player < 0) flip_board(board); + boards[count][0] = row; + boards[count][1] = col; + board_to_string(boards[count] + 2, board); + if(player < 0) flip_board(board); + ++count; + + move_go(board, player, row, col); + copy_cpu(19*19*3, board, 1, one, 1); + + player = -player; + } +} + +void run_go(int argc, char **argv) +{ + //boards_go(); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + int clear = find_arg(argc, argv, "-clear"); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *c2 = (argc > 5) ? argv[5] : 0; + char *w2 = (argc > 6) ? argv[6] : 0; + int multi = find_arg(argc, argv, "-multi"); + int anon = find_arg(argc, argv, "-anon"); + int iters = find_int_arg(argc, argv, "-iters", 500); + int resign = find_int_arg(argc, argv, "-resign", 175); + float cpuct = find_float_arg(argc, argv, "-cpuct", 5); + float temp = find_float_arg(argc, argv, "-temp", .1); + float time = find_float_arg(argc, argv, "-time", 0); + if(0==strcmp(argv[2], "train")) train_go(cfg, weights, c2, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) valid_go(cfg, weights, multi, c2); + else if(0==strcmp(argv[2], "self")) self_go(cfg, weights, c2, w2, multi); + else if(0==strcmp(argv[2], "test")) test_go(cfg, weights, multi); + else if(0==strcmp(argv[2], "engine")) engine_go(cfg, weights, iters, time, temp, cpuct, anon, resign); +} + + diff --git a/workloads/realworld/standard/darknet/examples/instance-segmenter.c b/workloads/realworld/standard/darknet/examples/instance-segmenter.c new file mode 100644 index 0000000000000000000000000000000000000000..664e71426d58e19f758bab198783eac178a3cdc4 --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/instance-segmenter.c @@ -0,0 +1,267 @@ +#include "darknet.h" +#include +#include + +void normalize_image2(image p); +void train_isegmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + image pred = get_network_image(net); + + image embed = pred; + embed.c = 3; + embed.data += embed.w*embed.h*80; + + int div = net->w/pred.w; + assert(pred.w * div == net->w); + assert(pred.h * div == net->h); + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.scale = div; + args.num_boxes = 90; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.classes = 80; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = ISEG_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(display){ + image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]); + image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]); + pred.c = 80; + image mask = mask_to_rgb(tr); + image prmask = mask_to_rgb(pred); + image ecopy = copy_image(embed); + normalize_image2(ecopy); + show_image(ecopy, "embed", 1); + free_image(ecopy); + + show_image(im, "input", 1); + show_image(prmask, "pred", 1); + show_image(mask, "truth", 100); + free_image(mask); + free_image(prmask); + } + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_isegmenter(char *datafile, char *cfg, char *weights, char *filename) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + show_image(sized, "orig", 1); + show_image(prmask, "pred", 0); + free_image(im); + free_image(sized); + free_image(prmask); + if (filename) break; + } +} + + +void demo_isegmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Classifier Demo\n"); + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = letterbox_image(in, net->w, net->h); + + network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + show_image(prmask, "Segmenter", 10); + + free_image(in_s); + free_image(in); + free_image(prmask); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_isegmenter(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_isegmenter(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_isegmenter(data, cfg, weights, gpus, ngpus, clear, display); + else if(0==strcmp(argv[2], "demo")) demo_isegmenter(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/standard/darknet/examples/lsd.c b/workloads/realworld/standard/darknet/examples/lsd.c new file mode 100644 index 0000000000000000000000000000000000000000..4ab944c884b9df422cd2b273b1faee128f2ab112 --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/lsd.c @@ -0,0 +1,1378 @@ +#include +#include "darknet.h" + +/* +void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg, char *aweight, int clear) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + //char *style_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *style_images = "/home/pjreddie/zelda.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + network fnet = load_network(fcfg, fweight, clear); + network gnet = load_network(gcfg, gweight, clear); + network anet = load_network(acfg, aweight, clear); + char *gbase = basecfg(gcfg); + char *abase = basecfg(acfg); + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + int i = *gnet->seen/imgs; + data train, tbuffer; + data style, sbuffer; + + + list *slist = get_paths(style_images); + char **spaths = (char **)list_to_array(slist); + + list *tlist = get_paths(train_images); + char **tpaths = (char **)list_to_array(tlist); + + load_args targs= get_base_args(gnet); + targs.paths = tpaths; + targs.n = imgs; + targs.m = tlist->size; + targs.d = &tbuffer; + targs.type = CLASSIFICATION_DATA; + targs.classes = 1; + char *ls[1] = {"zelda"}; + targs.labels = ls; + + load_args sargs = get_base_args(gnet); + sargs.paths = spaths; + sargs.n = imgs; + sargs.m = slist->size; + sargs.d = &sbuffer; + sargs.type = CLASSIFICATION_DATA; + sargs.classes = 1; + sargs.labels = ls; + + pthread_t tload_thread = load_data_in_thread(targs); + pthread_t sload_thread = load_data_in_thread(sargs); + clock_t time; + + float aloss_avg = -1; + float floss_avg = -1; + + fnet->train=1; + int x_size = fnet->inputs*fnet->batch; + int y_size = fnet->truths*fnet->batch; + float *X = calloc(x_size, sizeof(float)); + float *y = calloc(y_size, sizeof(float)); + + + int ax_size = anet->inputs*anet->batch; + int ay_size = anet->truths*anet->batch; + fill_gpu(ay_size, .9, anet->truth_gpu, 1); + anet->delta_gpu = cuda_make_array(0, ax_size); + anet->train = 1; + + int gx_size = gnet->inputs*gnet->batch; + int gy_size = gnet->truths*gnet->batch; + gstate.input = cuda_make_array(0, gx_size); + gstate.truth = 0; + gstate.delta = 0; + gstate.train = 1; + + while (get_current_batch(gnet) < gnet->max_batches) { + i += 1; + time=clock(); + pthread_join(tload_thread, 0); + pthread_join(sload_thread, 0); + train = tbuffer; + style = sbuffer; + tload_thread = load_data_in_thread(targs); + sload_thread = load_data_in_thread(sargs); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data generated = copy_data(train); + time=clock(); + + int j, k; + float floss = 0; + for(j = 0; j < fnet->subdivisions; ++j){ + layer imlayer = gnet->layers[gnet->n - 1]; + get_next_batch(train, fnet->batch, j*fnet->batch, X, y); + + cuda_push_array(fstate.input, X, x_size); + cuda_push_array(gstate.input, X, gx_size); + *gnet->seen += gnet->batch; + + forward_network_gpu(fnet, fstate); + float *feats = fnet->layers[fnet->n - 2].output_gpu; + copy_gpu(y_size, feats, 1, fstate.truth, 1); + + forward_network_gpu(gnet, gstate); + float *gen = gnet->layers[gnet->n-1].output_gpu; + copy_gpu(x_size, gen, 1, fstate.input, 1); + + fill_gpu(x_size, 0, fstate.delta, 1); + forward_network_gpu(fnet, fstate); + backward_network_gpu(fnet, fstate); + //HERE + + astate.input = gen; + fill_gpu(ax_size, 0, astate.delta, 1); + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + float *delta = imlayer.delta_gpu; + fill_gpu(x_size, 0, delta, 1); + scal_gpu(x_size, 100, astate.delta, 1); + scal_gpu(x_size, .001, fstate.delta, 1); + axpy_gpu(x_size, 1, fstate.delta, 1, delta, 1); + axpy_gpu(x_size, 1, astate.delta, 1, delta, 1); + + //fill_gpu(x_size, 0, delta, 1); + //cuda_push_array(delta, X, x_size); + //axpy_gpu(x_size, -1, imlayer.output_gpu, 1, delta, 1); + //printf("pix error: %f\n", cuda_mag_array(delta, x_size)); + printf("fea error: %f\n", cuda_mag_array(fstate.delta, x_size)); + printf("adv error: %f\n", cuda_mag_array(astate.delta, x_size)); + //axpy_gpu(x_size, 1, astate.delta, 1, delta, 1); + + backward_network_gpu(gnet, gstate); + + floss += get_network_cost(fnet) /(fnet->subdivisions*fnet->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1); + generated.y.vals[index][0] = .1; + style.y.vals[index][0] = .9; + } + } + +*/ +/* + image sim = float_to_image(anet->w, anet->h, anet->c, style.X.vals[j]); + show_image(sim, "style"); + cvWaitKey(0); + */ + /* + + harmless_update_network_gpu(anet); + + data merge = concat_data(style, generated); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(generated); + free_data(style); + if (aloss_avg < 0) aloss_avg = aloss; + if (floss_avg < 0) floss_avg = floss; + aloss_avg = aloss_avg*.9 + aloss*.1; + floss_avg = floss_avg*.9 + floss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, floss, aloss, floss_avg, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, gbase, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, gbase); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } +#endif +} +*/ + +/* +void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/train1.txt"; + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network net = load_network(cfg, weight, clear); + network anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = net->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.type = CLASSIFICATION_DATA; + args.classes = 1; + char *ls[1] = {"coco"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + network_state gstate = {0}; + gstate.index = 0; + gstate.net = net; + int x_size = get_network_input_size(net)*net->batch; + int y_size = x_size; + gstate.input = cuda_make_array(0, x_size); + gstate.truth = cuda_make_array(0, y_size); + gstate.delta = 0; + gstate.train = 1; + float *pixs = calloc(x_size, sizeof(float)); + float *graypixs = calloc(x_size, sizeof(float)); + float *y = calloc(y_size, sizeof(float)); + + network_state astate = {0}; + astate.index = 0; + astate.net = anet; + int ay_size = get_network_output_size(anet)*anet->batch; + astate.input = 0; + astate.truth = 0; + astate.delta = 0; + astate.train = 1; + + float *imerror = cuda_make_array(0, imlayer.outputs); + float *ones_gpu = cuda_make_array(0, ay_size); + fill_gpu(ay_size, .9, ones_gpu, 1); + + float aloss_avg = -1; + float gloss_avg = -1; + + //data generated = copy_data(train); + + while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gray = copy_data(train); + for(j = 0; j < imgs; ++j){ + image gim = float_to_image(net->w, net->h, net->c, gray.X.vals[j]); + grayscale_image_3c(gim); + train.y.vals[j][0] = .9; + + image yim = float_to_image(net->w, net->h, net->c, train.X.vals[j]); + //rgb_to_yuv(yim); + } + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, pixs, y); + get_next_batch(gray, net->batch, j*net->batch, graypixs, y); + cuda_push_array(gstate.input, graypixs, x_size); + cuda_push_array(gstate.truth, pixs, y_size); + */ + /* + image origi = float_to_image(net->w, net->h, 3, pixs); + image grayi = float_to_image(net->w, net->h, 3, graypixs); + show_image(grayi, "gray"); + show_image(origi, "orig"); + cvWaitKey(0); + */ + /* + *net->seen += net->batch; + forward_network_gpu(net, gstate); + + fill_gpu(imlayer.outputs, 0, imerror, 1); + astate.input = imlayer.output_gpu; + astate.delta = imerror; + astate.truth = ones_gpu; + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + scal_gpu(imlayer.outputs, .1, net->layers[net->n-1].delta_gpu, 1); + + backward_network_gpu(net, gstate); + + scal_gpu(imlayer.outputs, 1000, imerror, 1); + + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs)); + printf("features %f\n", cuda_mag_array(net->layers[net->n-1].delta_gpu, imlayer.outputs)); + + axpy_gpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1); + + gloss += get_network_cost(net) /(net->subdivisions*net->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1); + gray.y.vals[index][0] = .1; + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gray); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + update_network_gpu(anet); + free_data(merge); + free_data(train); + free_data(gray); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +#endif +} +*/ + +void slerp(float *start, float *end, float s, int n, float *out) +{ + float omega = acos(dot_cpu(n, start, 1, end, 1)); + float so = sin(omega); + fill_cpu(n, 0, out, 1); + axpy_cpu(n, sin((1-s)*omega)/so, start, 1, out, 1); + axpy_cpu(n, sin(s*omega)/so, end, 1, out, 1); + + float mag = mag_array(out, n); + scale_array(out, n, 1./mag); +} + +image random_unit_vector_image(int w, int h, int c) +{ + image im = make_image(w, h, c); + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + im.data[i] = rand_normal(); + } + float mag = mag_array(im.data, im.w*im.h*im.c); + scale_array(im.data, im.w*im.h*im.c, 1./mag); + return im; +} + +void inter_dcgan(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int i, imlayer = 0; + + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = i; + printf("%d\n", i); + break; + } + } + image start = random_unit_vector_image(net->w, net->h, net->c); + image end = random_unit_vector_image(net->w, net->h, net->c); + image im = make_image(net->w, net->h, net->c); + image orig = copy_image(start); + + int c = 0; + int count = 0; + int max_count = 15; + while(1){ + ++c; + + if(count == max_count){ + count = 0; + free_image(start); + start = end; + end = random_unit_vector_image(net->w, net->h, net->c); + if(c > 300){ + end = orig; + } + if(c>300 + max_count) return; + } + ++count; + + slerp(start.data, end.data, (float)count / max_count, im.w*im.h*im.c, im.data); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + normalize_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + //char buff[256]; + sprintf(buff, "out%05d", c); + save_image(out, "out"); + save_image(out, buff); + show_image(out, "out", 0); + } +} + +void test_dcgan(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int imlayer = 0; + + imlayer = net->n-1; + + while(1){ + image im = make_image(net->w, net->h, net->c); + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + im.data[i] = rand_normal(); + } + //float mag = mag_array(im.data, im.w*im.h*im.c); + //scale_array(im.data, im.w*im.h*im.c, 1./mag); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + normalize_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 0); + + free_image(im); + } +} + +void set_network_alpha_beta(network *net, float alpha, float beta) +{ + int i; + for(i = 0; i < net->n; ++i){ + if(net->layers[i].type == SHORTCUT){ + net->layers[i].alpha = alpha; + net->layers[i].beta = beta; + } + } +} + +void train_prog(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) +{ +#ifdef GPU + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *gnet = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = gnet->layers[gnet->n-1]; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + i = *gnet->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(anet); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + args.threads=16; + args.classes = 1; + char *ls[2] = {"imagenet", "zzzzzzzz"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + gnet->train = 1; + anet->train = 1; + + int x_size = gnet->inputs*gnet->batch; + int y_size = gnet->truths*gnet->batch; + float *imerror = cuda_make_array(0, y_size); + + float aloss_avg = -1; + + if (maxbatch == 0) maxbatch = gnet->max_batches; + while (get_current_batch(gnet) < maxbatch) { + { + int cb = get_current_batch(gnet); + float alpha = (float) cb / (maxbatch/2); + if(alpha > 1) alpha = 1; + float beta = 1 - alpha; + printf("%f %f\n", alpha, beta); + set_network_alpha_beta(gnet, alpha, beta); + set_network_alpha_beta(anet, beta, alpha); + } + + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gen = copy_data(train); + for (j = 0; j < imgs; ++j) { + train.y.vals[j][0] = 1; + gen.y.vals[j][0] = 0; + } + time=clock(); + + for (j = 0; j < gnet->subdivisions; ++j) { + get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); + int z; + for(z = 0; z < x_size; ++z){ + gnet->input[z] = rand_normal(); + } + /* + for(z = 0; z < gnet->batch; ++z){ + float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); + scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); + } + */ + *gnet->seen += gnet->batch; + forward_network(gnet); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); + copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1); + anet->delta_gpu = imerror; + forward_network(anet); + backward_network(anet); + + //float genaloss = *anet->cost / anet->batch; + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1); + + backward_network(gnet); + + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gen); + float aloss = train_network(anet, merge); + +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + save_image(im, "gen"); + save_image(im2, "train"); + } +#endif + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(gen); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + + printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(gnet, buff); +#endif +} + +void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) +{ +#ifdef GPU + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *gnet = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + //float orig_rate = anet->learning_rate; + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < gnet->n; ++i) { + if (gnet->layers[i].out_c == 3) { + imlayer = gnet->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + i = *gnet->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(anet); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + args.threads=16; + args.classes = 1; + char *ls[2] = {"imagenet", "zzzzzzzz"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + gnet->train = 1; + anet->train = 1; + + int x_size = gnet->inputs*gnet->batch; + int y_size = gnet->truths*gnet->batch; + float *imerror = cuda_make_array(0, y_size); + + //int ay_size = anet->truths*anet->batch; + + float aloss_avg = -1; + + //data generated = copy_data(train); + + if (maxbatch == 0) maxbatch = gnet->max_batches; + while (get_current_batch(gnet) < maxbatch) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + //translate_data_rows(train, -.5); + //scale_data_rows(train, 2); + + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gen = copy_data(train); + for (j = 0; j < imgs; ++j) { + train.y.vals[j][0] = 1; + gen.y.vals[j][0] = 0; + } + time=clock(); + + for(j = 0; j < gnet->subdivisions; ++j){ + get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); + int z; + for(z = 0; z < x_size; ++z){ + gnet->input[z] = rand_normal(); + } + for(z = 0; z < gnet->batch; ++z){ + float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); + scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); + } + /* + for(z = 0; z < 100; ++z){ + printf("%f, ", gnet->input[z]); + } + printf("\n"); + printf("input: %f %f\n", mean_array(gnet->input, x_size), variance_array(gnet->input, x_size)); + */ + + //cuda_push_array(gnet->input_gpu, gnet->input, x_size); + //cuda_push_array(gnet->truth_gpu, gnet->truth, y_size); + *gnet->seen += gnet->batch; + forward_network(gnet); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); + copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1); + anet->delta_gpu = imerror; + forward_network(anet); + backward_network(anet); + + //float genaloss = *anet->cost / anet->batch; + //printf("%f\n", genaloss); + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1); + + //printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch)); + //printf("features %f\n", cuda_mag_array(gnet->layers[gnet->n-1].delta_gpu, imlayer.outputs*imlayer.batch)); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1); + + backward_network(gnet); + + /* + for(k = 0; k < gnet->n; ++k){ + layer l = gnet->layers[k]; + cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); + printf("%d: %f %f\n", k, mean_array(l.output, l.outputs*l.batch), variance_array(l.output, l.outputs*l.batch)); + } + */ + + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gen); + //randomize_data(merge); + float aloss = train_network(anet, merge); + + //translate_image(im, 1); + //scale_image(im, .5); + //translate_image(im2, 1); + //scale_image(im2, .5); +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + save_image(im, "gen"); + save_image(im2, "train"); + } +#endif + + /* + if(aloss < .1){ + anet->learning_rate = 0; + } else if (aloss > .3){ + anet->learning_rate = orig_rate; + } + */ + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(gen); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + + printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(gnet, buff); +#endif +} + +void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/train1.txt"; + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *net = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = net->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(net); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + + args.type = CLASSIFICATION_DATA; + args.classes = 1; + char *ls[2] = {"imagenet"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + int x_size = net->inputs*net->batch; + //int y_size = x_size; + net->delta = 0; + net->train = 1; + float *pixs = calloc(x_size, sizeof(float)); + float *graypixs = calloc(x_size, sizeof(float)); + //float *y = calloc(y_size, sizeof(float)); + + //int ay_size = anet->outputs*anet->batch; + anet->delta = 0; + anet->train = 1; + + float *imerror = cuda_make_array(0, imlayer.outputs*imlayer.batch); + + float aloss_avg = -1; + float gloss_avg = -1; + + //data generated = copy_data(train); + + while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gray = copy_data(train); + for(j = 0; j < imgs; ++j){ + image gim = float_to_image(net->w, net->h, net->c, gray.X.vals[j]); + grayscale_image_3c(gim); + train.y.vals[j][0] = .95; + gray.y.vals[j][0] = .05; + } + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, pixs, 0); + get_next_batch(gray, net->batch, j*net->batch, graypixs, 0); + cuda_push_array(net->input_gpu, graypixs, net->inputs*net->batch); + cuda_push_array(net->truth_gpu, pixs, net->truths*net->batch); + /* + image origi = float_to_image(net->w, net->h, 3, pixs); + image grayi = float_to_image(net->w, net->h, 3, graypixs); + show_image(grayi, "gray"); + show_image(origi, "orig"); + cvWaitKey(0); + */ + *net->seen += net->batch; + forward_network_gpu(net); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + copy_gpu(anet->inputs*anet->batch, imlayer.output_gpu, 1, anet->input_gpu, 1); + fill_gpu(anet->inputs*anet->batch, .95, anet->truth_gpu, 1); + anet->delta_gpu = imerror; + forward_network_gpu(anet); + backward_network_gpu(anet); + + scal_gpu(imlayer.outputs*imlayer.batch, 1./100., net->layers[net->n-1].delta_gpu, 1); + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch)); + printf("features %f\n", cuda_mag_array(net->layers[net->n-1].delta_gpu, imlayer.outputs*imlayer.batch)); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, net->layers[net->n-1].delta_gpu, 1); + + backward_network_gpu(net); + + + gloss += *net->cost /(net->subdivisions*net->batch); + + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gray); + //randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gray.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + } +#endif + free_data(merge); + free_data(train); + free_data(gray); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +#endif +} + +/* + void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfile, int clear) + { +#ifdef GPU +char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; +char *backup_directory = "/home/pjreddie/backup/"; +srand(time(0)); +char *base = basecfg(cfgfile); +printf("%s\n", base); +network net = parse_network_cfg(cfgfile); +if(weightfile){ +load_weights(&net, weightfile); +} +if(clear) *net->seen = 0; + +char *abase = basecfg(acfgfile); +network anet = parse_network_cfg(acfgfile); +if(aweightfile){ +load_weights(&anet, aweightfile); +} +if(clear) *anet->seen = 0; + +int i, j, k; +layer imlayer = {0}; +for (i = 0; i < net->n; ++i) { +if (net->layers[i].out_c == 3) { +imlayer = net->layers[i]; +break; +} +} + +printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); +int imgs = net->batch*net->subdivisions; +i = *net->seen/imgs; +data train, buffer; + + +list *plist = get_paths(train_images); +//int N = plist->size; +char **paths = (char **)list_to_array(plist); + +load_args args = {0}; +args.w = net->w; +args.h = net->h; +args.paths = paths; +args.n = imgs; +args.m = plist->size; +args.d = &buffer; + +args.min = net->min_crop; +args.max = net->max_crop; +args.angle = net->angle; +args.aspect = net->aspect; +args.exposure = net->exposure; +args.saturation = net->saturation; +args.hue = net->hue; +args.size = net->w; +args.type = CLASSIFICATION_DATA; +args.classes = 1; +char *ls[1] = {"coco"}; +args.labels = ls; + +pthread_t load_thread = load_data_in_thread(args); +clock_t time; + +network_state gstate = {0}; +gstate.index = 0; +gstate.net = net; +int x_size = get_network_input_size(net)*net->batch; +int y_size = 1*net->batch; +gstate.input = cuda_make_array(0, x_size); +gstate.truth = 0; +gstate.delta = 0; +gstate.train = 1; +float *X = calloc(x_size, sizeof(float)); +float *y = calloc(y_size, sizeof(float)); + +network_state astate = {0}; +astate.index = 0; +astate.net = anet; +int ay_size = get_network_output_size(anet)*anet->batch; +astate.input = 0; +astate.truth = 0; +astate.delta = 0; +astate.train = 1; + +float *imerror = cuda_make_array(0, imlayer.outputs); +float *ones_gpu = cuda_make_array(0, ay_size); +fill_gpu(ay_size, 1, ones_gpu, 1); + +float aloss_avg = -1; +float gloss_avg = -1; + +//data generated = copy_data(train); + +while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data generated = copy_data(train); + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, X, y); + cuda_push_array(gstate.input, X, x_size); + *net->seen += net->batch; + forward_network_gpu(net, gstate); + + fill_gpu(imlayer.outputs, 0, imerror, 1); + astate.input = imlayer.output_gpu; + astate.delta = imerror; + astate.truth = ones_gpu; + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + scal_gpu(imlayer.outputs, 1, imerror, 1); + axpy_gpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1); + + backward_network_gpu(net, gstate); + + printf("features %f\n", cuda_mag_array(imlayer.delta_gpu, imlayer.outputs)); + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs)); + + gloss += get_network_cost(net) /(net->subdivisions*net->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1); + generated.y.vals[index][0] = 0; + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, generated); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + update_network_gpu(anet); + free_data(merge); + free_data(train); + free_data(generated); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } +} +char buff[256]; +sprintf(buff, "%s/%s_final.weights", backup_directory, base); +save_weights(net, buff); +#endif +} +*/ + +/* + void train_lsd(char *cfgfile, char *weightfile, int clear) + { + char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + if(clear) *net->seen = 0; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); +//int N = plist->size; +char **paths = (char **)list_to_array(plist); + +load_args args = {0}; +args.w = net->w; +args.h = net->h; +args.paths = paths; +args.n = imgs; +args.m = plist->size; +args.d = &buffer; + +args.min = net->min_crop; +args.max = net->max_crop; +args.angle = net->angle; +args.aspect = net->aspect; +args.exposure = net->exposure; +args.saturation = net->saturation; +args.hue = net->hue; +args.size = net->w; +args.type = CLASSIFICATION_DATA; +args.classes = 1; +char *ls[1] = {"coco"}; +args.labels = ls; + +pthread_t load_thread = load_data_in_thread(args); +clock_t time; +//while(i*imgs < N*120){ +while(get_current_batch(net) < net->max_batches){ +i += 1; +time=clock(); +pthread_join(load_thread, 0); +train = buffer; +load_thread = load_data_in_thread(args); + +printf("Loaded: %lf seconds\n", sec(clock()-time)); + +time=clock(); +float loss = train_network(net, train); +if (avg_loss < 0) avg_loss = loss; +avg_loss = avg_loss*.9 + loss*.1; + +printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); +if(i%1000==0){ +char buff[256]; +sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); +save_weights(net, buff); +} +if(i%100==0){ +char buff[256]; +sprintf(buff, "%s/%s.backup", backup_directory, base); +save_weights(net, buff); +} +free_data(train); +} +char buff[256]; +sprintf(buff, "%s/%s_final.weights", backup_directory, base); +save_weights(net, buff); +} +*/ + +void test_lsd(char *cfg, char *weights, char *filename, int gray) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int i, imlayer = 0; + + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = i; + printf("%d\n", i); + break; + } + } + + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + if(gray) grayscale_image_3c(crop); + + float *X = crop.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + constrain_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 1); + show_image(crop, "crop", 0); + + free_image(im); + free_image(resized); + free_image(crop); + if (filename) break; + } +} + + +void run_lsd(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + int batches = find_int_arg(argc, argv, "-b", 0); + char *file = find_char_arg(argc, argv, "-file", "/home/pjreddie/data/imagenet/imagenet1k.train.list"); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + char *acfg = argv[5]; + char *aweights = (argc > 6) ? argv[6] : 0; + //if(0==strcmp(argv[2], "train")) train_lsd(cfg, weights, clear); + //else if(0==strcmp(argv[2], "train2")) train_lsd2(cfg, weights, acfg, aweights, clear); + //else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear); + //else if(0==strcmp(argv[2], "train3")) train_lsd3(argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], clear); + if(0==strcmp(argv[2], "traingan")) train_dcgan(cfg, weights, acfg, aweights, clear, display, file, batches); + else if(0==strcmp(argv[2], "trainprog")) train_prog(cfg, weights, acfg, aweights, clear, display, file, batches); + else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear, display); + else if(0==strcmp(argv[2], "gan")) test_dcgan(cfg, weights); + else if(0==strcmp(argv[2], "inter")) inter_dcgan(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_lsd(cfg, weights, filename, 0); + else if(0==strcmp(argv[2], "color")) test_lsd(cfg, weights, filename, 1); + /* + else if(0==strcmp(argv[2], "valid")) validate_lsd(cfg, weights); + */ +} diff --git a/workloads/realworld/standard/darknet/examples/nightmare.c b/workloads/realworld/standard/darknet/examples/nightmare.c new file mode 100644 index 0000000000000000000000000000000000000000..2978eb61193e96325441c5b830a786eccb203569 --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/nightmare.c @@ -0,0 +1,414 @@ +#include "darknet.h" + +#include + +// ./darknet nightmare cfg/extractor.recon.cfg ~/trained/yolo-coco.conv frame6.png -reconstruct -iters 500 -i 3 -lambda .1 -rate .01 -smooth 2 + +float abs_mean(float *x, int n) +{ + int i; + float sum = 0; + for (i = 0; i < n; ++i){ + sum += fabs(x[i]); + } + return sum/n; +} + +void calculate_loss(float *output, float *delta, int n, float thresh) +{ + int i; + float mean = mean_array(output, n); + float var = variance_array(output, n); + for(i = 0; i < n; ++i){ + if(delta[i] > mean + thresh*sqrt(var)) delta[i] = output[i]; + else delta[i] = 0; + } +} + +void optimize_picture(network *net, image orig, int max_layer, float scale, float rate, float thresh, int norm) +{ + //scale_image(orig, 2); + //translate_image(orig, -1); + net->n = max_layer + 1; + + int dx = rand()%16 - 8; + int dy = rand()%16 - 8; + int flip = rand()%2; + + image crop = crop_image(orig, dx, dy, orig.w, orig.h); + image im = resize_image(crop, (int)(orig.w * scale), (int)(orig.h * scale)); + if(flip) flip_image(im); + + resize_network(net, im.w, im.h); + layer last = net->layers[net->n-1]; + //net->layers[net->n - 1].activation = LINEAR; + + image delta = make_image(im.w, im.h, im.c); + +#ifdef GPU + net->delta_gpu = cuda_make_array(delta.data, im.w*im.h*im.c); + copy_cpu(net->inputs, im.data, 1, net->input, 1); + + forward_network_gpu(net); + copy_gpu(last.outputs, last.output_gpu, 1, last.delta_gpu, 1); + + cuda_pull_array(last.delta_gpu, last.delta, last.outputs); + calculate_loss(last.delta, last.delta, last.outputs, thresh); + cuda_push_array(last.delta_gpu, last.delta, last.outputs); + + backward_network_gpu(net); + + cuda_pull_array(net->delta_gpu, delta.data, im.w*im.h*im.c); + cuda_free(net->delta_gpu); + net->delta_gpu = 0; +#else + printf("\nnet: %d %d %d im: %d %d %d\n", net->w, net->h, net->inputs, im.w, im.h, im.c); + copy_cpu(net->inputs, im.data, 1, net->input, 1); + net->delta = delta.data; + forward_network(net); + copy_cpu(last.outputs, last.output, 1, last.delta, 1); + calculate_loss(last.output, last.delta, last.outputs, thresh); + backward_network(net); +#endif + + if(flip) flip_image(delta); + //normalize_array(delta.data, delta.w*delta.h*delta.c); + image resized = resize_image(delta, orig.w, orig.h); + image out = crop_image(resized, -dx, -dy, orig.w, orig.h); + + /* + image g = grayscale_image(out); + free_image(out); + out = g; + */ + + //rate = rate / abs_mean(out.data, out.w*out.h*out.c); + image gray = make_image(out.w, out.h, out.c); + fill_image(gray, .5); + axpy_cpu(orig.w*orig.h*orig.c, -1, orig.data, 1, gray.data, 1); + axpy_cpu(orig.w*orig.h*orig.c, .1, gray.data, 1, out.data, 1); + + if(norm) normalize_array(out.data, out.w*out.h*out.c); + axpy_cpu(orig.w*orig.h*orig.c, rate, out.data, 1, orig.data, 1); + + /* + normalize_array(orig.data, orig.w*orig.h*orig.c); + scale_image(orig, sqrt(var)); + translate_image(orig, mean); + */ + + //translate_image(orig, 1); + //scale_image(orig, .5); + //normalize_image(orig); + + constrain_image(orig); + + free_image(crop); + free_image(im); + free_image(delta); + free_image(resized); + free_image(out); + +} + +void smooth(image recon, image update, float lambda, int num) +{ + int i, j, k; + int ii, jj; + for(k = 0; k < recon.c; ++k){ + for(j = 0; j < recon.h; ++j){ + for(i = 0; i < recon.w; ++i){ + int out_index = i + recon.w*(j + recon.h*k); + for(jj = j-num; jj <= j + num && jj < recon.h; ++jj){ + if (jj < 0) continue; + for(ii = i-num; ii <= i + num && ii < recon.w; ++ii){ + if (ii < 0) continue; + int in_index = ii + recon.w*(jj + recon.h*k); + update.data[out_index] += lambda * (recon.data[in_index] - recon.data[out_index]); + } + } + } + } + } +} + +void reconstruct_picture(network *net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters) +{ + int iter = 0; + for (iter = 0; iter < iters; ++iter) { + image delta = make_image(recon.w, recon.h, recon.c); + +#ifdef GPU + layer l = get_network_output_layer(net); + cuda_push_array(net->input_gpu, recon.data, recon.w*recon.h*recon.c); + //cuda_push_array(net->truth_gpu, features, net->truths); + net->delta_gpu = cuda_make_array(delta.data, delta.w*delta.h*delta.c); + + forward_network_gpu(net); + cuda_push_array(l.delta_gpu, features, l.outputs); + axpy_gpu(l.outputs, -1, l.output_gpu, 1, l.delta_gpu, 1); + backward_network_gpu(net); + + cuda_pull_array(net->delta_gpu, delta.data, delta.w*delta.h*delta.c); + + cuda_free(net->delta_gpu); +#else + net->input = recon.data; + net->delta = delta.data; + net->truth = features; + + forward_network(net); + backward_network(net); +#endif + + //normalize_array(delta.data, delta.w*delta.h*delta.c); + axpy_cpu(recon.w*recon.h*recon.c, 1, delta.data, 1, update.data, 1); + //smooth(recon, update, lambda, smooth_size); + + axpy_cpu(recon.w*recon.h*recon.c, rate, update.data, 1, recon.data, 1); + scal_cpu(recon.w*recon.h*recon.c, momentum, update.data, 1); + + float mag = mag_array(delta.data, recon.w*recon.h*recon.c); + printf("mag: %f\n", mag); + //scal_cpu(recon.w*recon.h*recon.c, 600/mag, recon.data, 1); + + constrain_image(recon); + free_image(delta); + } +} + +/* +void run_lsd(int argc, char **argv) +{ + srand(0); + if(argc < 3){ + fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [options! (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[2]; + char *weights = argv[3]; + char *input = argv[4]; + + int norm = find_int_arg(argc, argv, "-norm", 1); + int rounds = find_int_arg(argc, argv, "-rounds", 1); + int iters = find_int_arg(argc, argv, "-iters", 10); + float rate = find_float_arg(argc, argv, "-rate", .04); + float momentum = find_float_arg(argc, argv, "-momentum", .9); + float lambda = find_float_arg(argc, argv, "-lambda", .01); + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + int reconstruct = find_arg(argc, argv, "-reconstruct"); + int smooth_size = find_int_arg(argc, argv, "-smooth", 1); + + network net = parse_network_cfg(cfg); + load_weights(&net, weights); + char *cfgbase = basecfg(cfg); + char *imbase = basecfg(input); + + set_batch_network(&net, 1); + image im = load_image_color(input, 0, 0); + + float *features = 0; + image update; + if (reconstruct){ + im = letterbox_image(im, net->w, net->h); + + int zz = 0; + network_predict(net, im.data); + image out_im = get_network_image(net); + image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz); + //flip_image(crop); + image f_im = resize_image(crop, out_im.w, out_im.h); + free_image(crop); + printf("%d features\n", out_im.w*out_im.h*out_im.c); + + + im = resize_image(im, im.w, im.h); + f_im = resize_image(f_im, f_im.w, f_im.h); + features = f_im.data; + + int i; + for(i = 0; i < 14*14*512; ++i){ + features[i] += rand_uniform(-.19, .19); + } + + free_image(im); + im = make_random_image(im.w, im.h, im.c); + update = make_image(im.w, im.h, im.c); + + } + + int e; + int n; + for(e = 0; e < rounds; ++e){ + fprintf(stderr, "Iteration: "); + fflush(stderr); + for(n = 0; n < iters; ++n){ + fprintf(stderr, "%d, ", n); + fflush(stderr); + if(reconstruct){ + reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1); + //if ((n+1)%30 == 0) rate *= .5; + show_image(im, "reconstruction"); +#ifdef OPENCV + cvWaitKey(10); +#endif + }else{ + int layer = max_layer + rand()%range - range/2; + int octave = rand()%octaves; + optimize_picture(&net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); + } + } + fprintf(stderr, "done\n"); + char buff[256]; + if (prefix){ + sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e); + }else{ + sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e); + } + printf("%d %s\n", e, buff); + save_image(im, buff); + //show_image(im, buff); + //cvWaitKey(0); + + if(rotate){ + image rot = rotate_image(im, rotate); + free_image(im); + im = rot; + } + image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom); + image resized = resize_image(crop, im.w, im.h); + free_image(im); + free_image(crop); + im = resized; + } +} +*/ + +void run_nightmare(int argc, char **argv) +{ + srand(0); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [layer] [options! (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[2]; + char *weights = argv[3]; + char *input = argv[4]; + int max_layer = atoi(argv[5]); + + int range = find_int_arg(argc, argv, "-range", 1); + int norm = find_int_arg(argc, argv, "-norm", 1); + int rounds = find_int_arg(argc, argv, "-rounds", 1); + int iters = find_int_arg(argc, argv, "-iters", 10); + int octaves = find_int_arg(argc, argv, "-octaves", 4); + float zoom = find_float_arg(argc, argv, "-zoom", 1.); + float rate = find_float_arg(argc, argv, "-rate", .04); + float thresh = find_float_arg(argc, argv, "-thresh", 1.); + float rotate = find_float_arg(argc, argv, "-rotate", 0); + float momentum = find_float_arg(argc, argv, "-momentum", .9); + float lambda = find_float_arg(argc, argv, "-lambda", .01); + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + int reconstruct = find_arg(argc, argv, "-reconstruct"); + int smooth_size = find_int_arg(argc, argv, "-smooth", 1); + + network *net = load_network(cfg, weights, 0); + char *cfgbase = basecfg(cfg); + char *imbase = basecfg(input); + + set_batch_network(net, 1); + image im = load_image_color(input, 0, 0); + if(0){ + float scale = 1; + if(im.w > 512 || im.h > 512){ + if(im.w > im.h) scale = 512.0/im.w; + else scale = 512.0/im.h; + } + image resized = resize_image(im, scale*im.w, scale*im.h); + free_image(im); + im = resized; + } + //im = letterbox_image(im, net->w, net->h); + + float *features = 0; + image update; + if (reconstruct){ + net->n = max_layer; + im = letterbox_image(im, net->w, net->h); + //resize_network(&net, im.w, im.h); + + network_predict(net, im.data); + if(net->layers[net->n-1].type == REGION){ + printf("region!\n"); + zero_objectness(net->layers[net->n-1]); + } + image out_im = copy_image(get_network_image(net)); + /* + image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz); + //flip_image(crop); + image f_im = resize_image(crop, out_im.w, out_im.h); + free_image(crop); + */ + printf("%d features\n", out_im.w*out_im.h*out_im.c); + + features = out_im.data; + + /* + int i; + for(i = 0; i < 14*14*512; ++i){ + //features[i] += rand_uniform(-.19, .19); + } + free_image(im); + im = make_random_image(im.w, im.h, im.c); + */ + update = make_image(im.w, im.h, im.c); + } + + int e; + int n; + for(e = 0; e < rounds; ++e){ + fprintf(stderr, "Iteration: "); + fflush(stderr); + for(n = 0; n < iters; ++n){ + fprintf(stderr, "%d, ", n); + fflush(stderr); + if(reconstruct){ + reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1); + //if ((n+1)%30 == 0) rate *= .5; + show_image(im, "reconstruction", 10); + }else{ + int layer = max_layer + rand()%range - range/2; + int octave = rand()%octaves; + optimize_picture(net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); + } + } + fprintf(stderr, "done\n"); + if(0){ + image g = grayscale_image(im); + free_image(im); + im = g; + } + char buff[256]; + if (prefix){ + sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e); + }else{ + sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e); + } + printf("%d %s\n", e, buff); + save_image(im, buff); + //show_image(im, buff, 0); + + if(rotate){ + image rot = rotate_image(im, rotate); + free_image(im); + im = rot; + } + image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom); + image resized = resize_image(crop, im.w, im.h); + free_image(im); + free_image(crop); + im = resized; + } +} + diff --git a/workloads/realworld/standard/darknet/examples/regressor.c b/workloads/realworld/standard/darknet/examples/regressor.c new file mode 100644 index 0000000000000000000000000000000000000000..20cec0fad9f0a2ccb2c46a30d0a01793119b43ce --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/regressor.c @@ -0,0 +1,240 @@ +#include "darknet.h" +#include +#include + +void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + int classes = option_find_int(options, "classes", 1); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + clock_t time; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.classes = classes; + + args.min = net->min_ratio*net->w; + args.max = net->max_ratio*net->w; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = REGRESSION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_regressor(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + free_image(im); + free_image(sized); + if (filename) break; + } +} + + +void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Regressor Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + list *options = read_data_cfg(datacfg); + int classes = option_find_int(options, "classes", 1); + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + void * cap = open_video_stream(filename, cam_index, 0,0,0); + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image crop = center_crop_image(in, net->w, net->h); + grayscale_image_3c(crop); + + float *predictions = network_predict(net, crop.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + int i; + for(i = 0; i < classes; ++i){ + printf("%s: %f\n", names[i], predictions[i]); + } + + show_image(crop, "Regressor", 10); + free_image(in); + free_image(crop); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_regressor(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_regressor(data, cfg, weights); + else if(0==strcmp(argv[2], "train")) train_regressor(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "demo")) demo_regressor(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/standard/darknet/examples/rnn.c b/workloads/realworld/standard/darknet/examples/rnn.c new file mode 100644 index 0000000000000000000000000000000000000000..5d49eaae7070eb1dc9a87b5627b7ec6f7cb09e46 --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/rnn.c @@ -0,0 +1,542 @@ +#include "darknet.h" + +#include + +typedef struct { + float *x; + float *y; +} float_pair; + +unsigned char **load_files(char *filename, int *n) +{ + list *paths = get_paths(filename); + *n = paths->size; + unsigned char **contents = calloc(*n, sizeof(char *)); + int i; + node *x = paths->front; + for(i = 0; i < *n; ++i){ + contents[i] = read_file((char *)x->val); + x = x->next; + } + return contents; +} + +int *read_tokenized_data(char *filename, size_t *read) +{ + size_t size = 512; + size_t count = 0; + FILE *fp = fopen(filename, "r"); + int *d = calloc(size, sizeof(int)); + int n, one; + one = fscanf(fp, "%d", &n); + while(one == 1){ + ++count; + if(count > size){ + size = size*2; + d = realloc(d, size*sizeof(int)); + } + d[count-1] = n; + one = fscanf(fp, "%d", &n); + } + fclose(fp); + d = realloc(d, count*sizeof(int)); + *read = count; + return d; +} + +char **read_tokens(char *filename, size_t *read) +{ + size_t size = 512; + size_t count = 0; + FILE *fp = fopen(filename, "r"); + char **d = calloc(size, sizeof(char *)); + char *line; + while((line=fgetl(fp)) != 0){ + ++count; + if(count > size){ + size = size*2; + d = realloc(d, size*sizeof(char *)); + } + if(0==strcmp(line, "")) line = "\n"; + d[count-1] = line; + } + fclose(fp); + d = realloc(d, count*sizeof(char *)); + *read = count; + return d; +} + + +float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size_t len, int batch, int steps) +{ + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + int i,j; + for(i = 0; i < batch; ++i){ + for(j = 0; j < steps; ++j){ + int curr = tokens[(offsets[i])%len]; + int next = tokens[(offsets[i] + 1)%len]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + offsets[i] = (offsets[i] + 1) % len; + + if(curr >= characters || curr < 0 || next >= characters || next < 0){ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +float_pair get_seq2seq_data(char **source, char **dest, int n, int characters, size_t len, int batch, int steps) +{ + int i,j; + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + for(i = 0; i < batch; ++i){ + int index = rand()%n; + //int slen = strlen(source[index]); + //int dlen = strlen(dest[index]); + for(j = 0; j < steps; ++j){ + unsigned char curr = source[index][j]; + unsigned char next = dest[index][j]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + if(curr > 255 || curr <= 0 || next > 255 || next <= 0){ + /*text[(index+j+2)%len] = 0; + printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]); + printf("%s", text+index); + */ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +float_pair get_rnn_data(unsigned char *text, size_t *offsets, int characters, size_t len, int batch, int steps) +{ + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + int i,j; + for(i = 0; i < batch; ++i){ + for(j = 0; j < steps; ++j){ + unsigned char curr = text[(offsets[i])%len]; + unsigned char next = text[(offsets[i] + 1)%len]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + offsets[i] = (offsets[i] + 1) % len; + + if(curr > 255 || curr <= 0 || next > 255 || next <= 0){ + /*text[(index+j+2)%len] = 0; + printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]); + printf("%s", text+index); + */ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear, int tokenized) +{ + srand(time(0)); + unsigned char *text = 0; + int *tokens = 0; + size_t size; + if(tokenized){ + tokens = read_tokenized_data(filename, &size); + } else { + text = read_file(filename); + size = strlen((const char*)text); + } + + char *backup_directory = "/home/pjreddie/backup/"; + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, clear); + + int inputs = net->inputs; + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g, Inputs: %d %d %d\n", net->learning_rate, net->momentum, net->decay, inputs, net->batch, net->time_steps); + int batch = net->batch; + int steps = net->time_steps; + if(clear) *net->seen = 0; + int i = (*net->seen)/net->batch; + + int streams = batch/steps; + size_t *offsets = calloc(streams, sizeof(size_t)); + int j; + for(j = 0; j < streams; ++j){ + offsets[j] = rand_size_t()%size; + } + + clock_t time; + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + float_pair p; + if(tokenized){ + p = get_rnn_token_data(tokens, offsets, inputs, size, streams, steps); + }else{ + p = get_rnn_data(text, offsets, inputs, size, streams, steps); + } + + copy_cpu(net->inputs*net->batch, p.x, 1, net->input, 1); + copy_cpu(net->truths*net->batch, p.y, 1, net->truth, 1); + float loss = train_network_datum(net) / (batch); + free(p.x); + free(p.y); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + size_t chars = get_current_batch(net)*batch; + fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds, %f epochs\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), (float) chars/size); + + for(j = 0; j < streams; ++j){ + //printf("%d\n", j); + if(rand()%64 == 0){ + //fprintf(stderr, "Reset\n"); + offsets[j] = rand_size_t()%size; + reset_network_state(net, j); + } + } + + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void print_symbol(int n, char **tokens){ + if(tokens){ + printf("%s ", tokens[n]); + } else { + printf("%c", n); + } +} + +void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + + /* + fill_cpu(inputs, 0, input, 1); + for(i = 0; i < 10; ++i){ + network_predict(net, input); + } + fill_cpu(inputs, 0, input, 1); + */ + + for(i = 0; i < len-1; ++i){ + c = seed[i]; + input[c] = 1; + network_predict(net, input); + input[c] = 0; + print_symbol(c, tokens); + } + if(len) c = seed[len-1]; + print_symbol(c, tokens); + for(i = 0; i < num; ++i){ + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + for(j = 32; j < 127; ++j){ + //printf("%d %c %f\n",j, j, out[j]); + } + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + c = sample_array(out, inputs); + print_symbol(c, tokens); + } + printf("\n"); +} + +void test_tactic_rnn_multi(char *cfgfile, char *weightfile, int num, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + float *input = calloc(inputs, sizeof(float)); + float *out = 0; + + while(1){ + reset_network_state(net, 0); + while((c = getc(stdin)) != EOF && c != 0){ + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + for(i = 0; i < num; ++i){ + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + int next = sample_array(out, inputs); + if(c == '.' && next == '\n') break; + c = next; + print_symbol(c, tokens); + + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + printf("\n"); + } +} + +void test_tactic_rnn(char *cfgfile, char *weightfile, int num, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + float *input = calloc(inputs, sizeof(float)); + float *out = 0; + + while((c = getc(stdin)) != EOF){ + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + for(i = 0; i < num; ++i){ + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + int next = sample_array(out, inputs); + if(c == '.' && next == '\n') break; + c = next; + print_symbol(c, tokens); + + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + printf("\n"); +} + +void valid_tactic_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int count = 0; + int words = 1; + int c; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + for(i = 0; i < len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + float sum = 0; + c = getc(stdin); + float log2 = log(2); + int in = 0; + while(c != EOF){ + int next = getc(stdin); + if(next == EOF) break; + if(next < 0 || next >= 255) error("Out of range character"); + + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + + if(c == '.' && next == '\n') in = 0; + if(!in) { + if(c == '>' && next == '>'){ + in = 1; + ++words; + } + c = next; + continue; + } + ++count; + sum += log(out[next])/log2; + c = next; + printf("%d %d Perplexity: %4.4f Word Perplexity: %4.4f\n", count, words, pow(2, -sum/count), pow(2, -sum/words)); + } +} + +void valid_char_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int count = 0; + int words = 1; + int c; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + for(i = 0; i < len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + float sum = 0; + c = getc(stdin); + float log2 = log(2); + while(c != EOF){ + int next = getc(stdin); + if(next == EOF) break; + if(next < 0 || next >= 255) error("Out of range character"); + ++count; + if(next == ' ' || next == '\n' || next == '\t') ++words; + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + sum += log(out[next])/log2; + c = next; + printf("%d BPC: %4.4f Perplexity: %4.4f Word Perplexity: %4.4f\n", count, -sum/count, pow(2, -sum/count), pow(2, -sum/words)); + } +} + +void vec_char_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int c; + int seed_len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + char *line; + while((line=fgetl(stdin)) != 0){ + reset_network_state(net, 0); + for(i = 0; i < seed_len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + strip(line); + int str_len = strlen(line); + for(i = 0; i < str_len; ++i){ + c = line[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + c = ' '; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + + layer l = net->layers[0]; + #ifdef GPU + cuda_pull_array(l.output_gpu, l.output, l.outputs); + #endif + printf("%s", line); + for(i = 0; i < l.outputs; ++i){ + printf(",%g", l.output[i]); + } + printf("\n"); + } +} + +void run_char_rnn(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + char *filename = find_char_arg(argc, argv, "-file", "data/shakespeare.txt"); + char *seed = find_char_arg(argc, argv, "-seed", "\n\n"); + int len = find_int_arg(argc, argv, "-len", 1000); + float temp = find_float_arg(argc, argv, "-temp", .7); + int rseed = find_int_arg(argc, argv, "-srand", time(0)); + int clear = find_arg(argc, argv, "-clear"); + int tokenized = find_arg(argc, argv, "-tokenized"); + char *tokens = find_char_arg(argc, argv, "-tokens", 0); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_char_rnn(cfg, weights, filename, clear, tokenized); + else if(0==strcmp(argv[2], "valid")) valid_char_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "validtactic")) valid_tactic_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "vec")) vec_char_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "generate")) test_char_rnn(cfg, weights, len, seed, temp, rseed, tokens); + else if(0==strcmp(argv[2], "generatetactic")) test_tactic_rnn(cfg, weights, len, temp, rseed, tokens); +} diff --git a/workloads/realworld/standard/darknet/examples/rnn_vid.c b/workloads/realworld/standard/darknet/examples/rnn_vid.c new file mode 100644 index 0000000000000000000000000000000000000000..e88792352311438d0fcb25bb7befd0677f70bae5 --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/rnn_vid.c @@ -0,0 +1,208 @@ +#include "darknet.h" + +#ifdef OPENCV +image get_image_from_stream(CvCapture *cap); +image ipl_to_image(IplImage* src); + +void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters); + + +typedef struct { + float *x; + float *y; +} float_pair; + +float_pair get_rnn_vid_data(network net, char **files, int n, int batch, int steps) +{ + int b; + assert(net.batch == steps + 1); + image out_im = get_network_image(net); + int output_size = out_im.w*out_im.h*out_im.c; + printf("%d %d %d\n", out_im.w, out_im.h, out_im.c); + float *feats = calloc(net.batch*batch*output_size, sizeof(float)); + for(b = 0; b < batch; ++b){ + int input_size = net.w*net.h*net.c; + float *input = calloc(input_size*net.batch, sizeof(float)); + char *filename = files[rand()%n]; + CvCapture *cap = cvCaptureFromFile(filename); + int frames = cvGetCaptureProperty(cap, CV_CAP_PROP_FRAME_COUNT); + int index = rand() % (frames - steps - 2); + if (frames < (steps + 4)){ + --b; + free(input); + continue; + } + + printf("frames: %d, index: %d\n", frames, index); + cvSetCaptureProperty(cap, CV_CAP_PROP_POS_FRAMES, index); + + int i; + for(i = 0; i < net.batch; ++i){ + IplImage* src = cvQueryFrame(cap); + image im = ipl_to_image(src); + rgbgr_image(im); + image re = resize_image(im, net.w, net.h); + //show_image(re, "loaded"); + //cvWaitKey(10); + memcpy(input + i*input_size, re.data, input_size*sizeof(float)); + free_image(im); + free_image(re); + } + float *output = network_predict(net, input); + + free(input); + + for(i = 0; i < net.batch; ++i){ + memcpy(feats + (b + i*batch)*output_size, output + i*output_size, output_size*sizeof(float)); + } + + cvReleaseCapture(&cap); + } + + //printf("%d %d %d\n", out_im.w, out_im.h, out_im.c); + float_pair p = {0}; + p.x = feats; + p.y = feats + output_size*batch; //+ out_im.w*out_im.h*out_im.c; + + return p; +} + + +void train_vid_rnn(char *cfgfile, char *weightfile) +{ + char *train_videos = "data/vid/train.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + + list *plist = get_paths(train_videos); + int N = plist->size; + char **paths = (char **)list_to_array(plist); + clock_t time; + int steps = net.time_steps; + int batch = net.batch / net.time_steps; + + network extractor = parse_network_cfg("cfg/extractor.cfg"); + load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv"); + + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + float_pair p = get_rnn_vid_data(extractor, paths, N, batch, steps); + + copy_cpu(net.inputs*net.batch, p.x, 1, net.input, 1); + copy_cpu(net.truths*net.batch, p.y, 1, net.truth, 1); + float loss = train_network_datum(net) / (net.batch); + + + free(p.x); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time)); + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%10==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + + +image save_reconstruction(network net, image *init, float *feat, char *name, int i) +{ + image recon; + if (init) { + recon = copy_image(*init); + } else { + recon = make_random_image(net.w, net.h, 3); + } + + image update = make_image(net.w, net.h, 3); + reconstruct_picture(net, feat, recon, update, .01, .9, .1, 2, 50); + char buff[256]; + sprintf(buff, "%s%d", name, i); + save_image(recon, buff); + free_image(update); + return recon; +} + +void generate_vid_rnn(char *cfgfile, char *weightfile) +{ + network extractor = parse_network_cfg("cfg/extractor.recon.cfg"); + load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv"); + + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&extractor, 1); + set_batch_network(&net, 1); + + int i; + CvCapture *cap = cvCaptureFromFile("/extra/vid/ILSVRC2015/Data/VID/snippets/val/ILSVRC2015_val_00007030.mp4"); + float *feat; + float *next; + image last; + for(i = 0; i < 25; ++i){ + image im = get_image_from_stream(cap); + image re = resize_image(im, extractor.w, extractor.h); + feat = network_predict(extractor, re.data); + if(i > 0){ + printf("%f %f\n", mean_array(feat, 14*14*512), variance_array(feat, 14*14*512)); + printf("%f %f\n", mean_array(next, 14*14*512), variance_array(next, 14*14*512)); + printf("%f\n", mse_array(feat, 14*14*512)); + axpy_cpu(14*14*512, -1, feat, 1, next, 1); + printf("%f\n", mse_array(next, 14*14*512)); + } + next = network_predict(net, feat); + + free_image(im); + + free_image(save_reconstruction(extractor, 0, feat, "feat", i)); + free_image(save_reconstruction(extractor, 0, next, "next", i)); + if (i==24) last = copy_image(re); + free_image(re); + } + for(i = 0; i < 30; ++i){ + next = network_predict(net, next); + image new = save_reconstruction(extractor, &last, next, "new", i); + free_image(last); + last = new; + } +} + +void run_vid_rnn(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + //char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_vid_rnn(cfg, weights); + else if(0==strcmp(argv[2], "generate")) generate_vid_rnn(cfg, weights); +} +#else +void run_vid_rnn(int argc, char **argv){} +#endif + diff --git a/workloads/realworld/standard/darknet/examples/segmenter.c b/workloads/realworld/standard/darknet/examples/segmenter.c new file mode 100644 index 0000000000000000000000000000000000000000..2e7cea0b730754b74a125bcd865aa12f0bdd3be0 --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/segmenter.c @@ -0,0 +1,255 @@ +#include "darknet.h" +#include +#include + +void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + image pred = get_network_image(net); + + int div = net->w/pred.w; + assert(pred.w * div == net->w); + assert(pred.h * div == net->h); + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.scale = div; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.classes = 80; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = SEGMENTATION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(display){ + image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]); + image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]); + image mask = mask_to_rgb(tr); + image prmask = mask_to_rgb(pred); + show_image(im, "input", 1); + show_image(prmask, "pred", 1); + show_image(mask, "truth", 100); + free_image(mask); + free_image(prmask); + } + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_segmenter(char *datafile, char *cfg, char *weights, char *filename) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + show_image(sized, "orig", 1); + show_image(prmask, "pred", 0); + free_image(im); + free_image(sized); + free_image(prmask); + if (filename) break; + } +} + + +void demo_segmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Classifier Demo\n"); + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = letterbox_image(in, net->w, net->h); + + network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + show_image(prmask, "Segmenter", 10); + + free_image(in_s); + free_image(in); + free_image(prmask); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_segmenter(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_segmenter(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_segmenter(data, cfg, weights, gpus, ngpus, clear, display); + else if(0==strcmp(argv[2], "demo")) demo_segmenter(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/standard/darknet/examples/super.c b/workloads/realworld/standard/darknet/examples/super.c new file mode 100644 index 0000000000000000000000000000000000000000..d34406b1f2ce70cd36eecb8298bf1ca3e736f01b --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/super.c @@ -0,0 +1,120 @@ +#include "darknet.h" + +void train_super(char *cfgfile, char *weightfile, int clear) +{ + char *train_images = "/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, clear); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.scale = 4; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = SUPER_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void test_super(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + resize_network(net, im.w, im.h); + printf("%d %d\n", im.w, im.h); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image(net); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 0); + + free_image(im); + if (filename) break; + } +} + + +void run_super(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + int clear = find_arg(argc, argv, "-clear"); + if(0==strcmp(argv[2], "train")) train_super(cfg, weights, clear); + else if(0==strcmp(argv[2], "test")) test_super(cfg, weights, filename); + /* + else if(0==strcmp(argv[2], "valid")) validate_super(cfg, weights); + */ +} diff --git a/workloads/realworld/standard/darknet/examples/swag.c b/workloads/realworld/standard/darknet/examples/swag.c new file mode 100644 index 0000000000000000000000000000000000000000..c22d7855c46a975ecd1e94a60f9b7059bc288fee --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/swag.c @@ -0,0 +1,83 @@ +#include "darknet.h" +#include + +void train_swag(char *cfgfile, char *weightfile) +{ + char *train_images = "data/voc.0712.trainval"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + data train, buffer; + + layer l = net.layers[net.n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || i == 600){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void run_swag(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_swag(cfg, weights); +} diff --git a/workloads/realworld/standard/darknet/examples/tag.c b/workloads/realworld/standard/darknet/examples/tag.c new file mode 100644 index 0000000000000000000000000000000000000000..4caf8cba18f39f62deb54ea913fd40c194b3e33c --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/tag.c @@ -0,0 +1,140 @@ +#include "darknet.h" + +void train_tag(char *cfgfile, char *weightfile, int clear) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, clear); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + list *plist = get_paths("/home/pjreddie/tag/train.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + + args.min = net->w; + args.max = net->max_crop; + args.size = net->w; + + args.paths = paths; + args.classes = net->outputs; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = TAG_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + fprintf(stderr, "%d classes\n", net->outputs); + + load_thread = load_data_in_thread(args); + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + pthread_join(load_thread, 0); + free_data(buffer); + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void test_tag(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + int i = 0; + char **names = get_labels("data/tags.txt"); + clock_t time; + int indexes[10]; + char buff[256]; + char *input = buff; + int size = net->w; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = resize_min(im, size); + resize_network(net, r.w, r.h); + printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + float *predictions = network_predict(net, X); + top_predictions(net, 10, indexes); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < 10; ++i){ + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void run_tag(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int clear = find_arg(argc, argv, "-clear"); + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_tag(cfg, weights, clear); + else if(0==strcmp(argv[2], "test")) test_tag(cfg, weights, filename); +} + diff --git a/workloads/realworld/standard/darknet/examples/voxel.c b/workloads/realworld/standard/darknet/examples/voxel.c new file mode 100644 index 0000000000000000000000000000000000000000..01ea9bb98987590227758364bbfff50996cf9a2d --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/voxel.c @@ -0,0 +1,161 @@ +#include "darknet.h" + +void extract_voxel(char *lfile, char *rfile, char *prefix) +{ +#ifdef OPENCV + int w = 1920; + int h = 1080; + int shift = 0; + int count = 0; + CvCapture *lcap = cvCaptureFromFile(lfile); + CvCapture *rcap = cvCaptureFromFile(rfile); + while(1){ + image l = get_image_from_stream(lcap); + image r = get_image_from_stream(rcap); + if(!l.w || !r.w) break; + if(count%100 == 0) { + shift = best_3d_shift_r(l, r, -l.h/100, l.h/100); + printf("%d\n", shift); + } + image ls = crop_image(l, (l.w - w)/2, (l.h - h)/2, w, h); + image rs = crop_image(r, 105 + (r.w - w)/2, (r.h - h)/2 + shift, w, h); + char buff[256]; + sprintf(buff, "%s_%05d_l", prefix, count); + save_image(ls, buff); + sprintf(buff, "%s_%05d_r", prefix, count); + save_image(rs, buff); + free_image(l); + free_image(r); + free_image(ls); + free_image(rs); + ++count; + } + +#else + printf("need OpenCV for extraction\n"); +#endif +} + +void train_voxel(char *cfgfile, char *weightfile) +{ + char *train_images = "/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.scale = 4; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = SUPER_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void test_voxel(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + resize_network(&net, im.w, im.h); + printf("%d %d\n", im.w, im.h); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image(net); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + + free_image(im); + if (filename) break; + } +} + + +void run_voxel(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_voxel(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_voxel(cfg, weights, filename); + else if(0==strcmp(argv[2], "extract")) extract_voxel(argv[3], argv[4], argv[5]); + /* + else if(0==strcmp(argv[2], "valid")) validate_voxel(cfg, weights); + */ +} diff --git a/workloads/realworld/standard/darknet/examples/writing.c b/workloads/realworld/standard/darknet/examples/writing.c new file mode 100644 index 0000000000000000000000000000000000000000..1b6ff83b5838b654e0fd1b6664156daf6d7a889b --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/writing.c @@ -0,0 +1,144 @@ +#include "darknet.h" + +void train_writing(char *cfgfile, char *weightfile) +{ + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + list *plist = get_paths("figures.list"); + char **paths = (char **)list_to_array(plist); + clock_t time; + int N = plist->size; + printf("N: %d\n", N); + image out = get_network_image(net); + + data train, buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.out_w = out.w; + args.out_h = out.h; + args.paths = paths; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = WRITING_DATA; + + pthread_t load_thread = load_data_in_thread(args); + int epoch = (*net.seen)/N; + while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + printf("Loaded %lf seconds\n",sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + + /* + image pred = float_to_image(64, 64, 1, out); + print_image(pred); + */ + + /* + image im = float_to_image(256, 256, 3, train.X.vals[0]); + image lab = float_to_image(64, 64, 1, train.y.vals[0]); + image pred = float_to_image(64, 64, 1, out); + show_image(im, "image"); + show_image(lab, "label"); + print_image(lab); + show_image(pred, "pred"); + cvWaitKey(0); + */ + + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + free_data(train); + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_batch_%ld.weights", backup_directory, base, get_current_batch(net)); + save_weights(net, buff); + } + if(*net.seen/N > epoch){ + epoch = *net.seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + } +} + +void test_writing(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + + image im = load_image_color(input, 0, 0); + resize_network(&net, im.w, im.h); + printf("%d %d %d\n", im.h, im.w, im.c); + float *X = im.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + image pred = get_network_image(net); + + image upsampled = resize_image(pred, im.w, im.h); + image thresh = threshold_image(upsampled, .5); + pred = thresh; + + show_image(pred, "prediction"); + show_image(im, "orig"); +#ifdef OPENCV + cvWaitKey(0); + cvDestroyAllWindows(); +#endif + + free_image(upsampled); + free_image(thresh); + free_image(im); + if (filename) break; + } +} + +void run_writing(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_writing(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_writing(cfg, weights, filename); +} + diff --git a/workloads/realworld/standard/darknet/examples/yolo.c b/workloads/realworld/standard/darknet/examples/yolo.c new file mode 100644 index 0000000000000000000000000000000000000000..4ddb69a3e53b2123ccb89026645a66c044047faa --- /dev/null +++ b/workloads/realworld/standard/darknet/examples/yolo.c @@ -0,0 +1,327 @@ +#include "darknet.h" + +char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; + +void train_yolo(char *cfgfile, char *weightfile) +{ + char *train_images = "/data/voc/train.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + layer l = net->layers[net->n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || (i < 1000 && i%100 == 0)){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void print_yolo_detections(FILE **fps, char *id, int total, int classes, int w, int h, detection *dets) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], + xmin, ymin, xmax, ymax); + } + } +} + +void validate_yolo(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + //list *plist = get_paths("data/voc.2007.test"); + list *plist = get_paths("/home/pjreddie/data/voc/2007_test.txt"); + //list *plist = get_paths("data/voc.2012.test"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + int j; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + int t; + + float thresh = .001; + int nms = 1; + float iou_thresh = .5; + + int nthreads = 8; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.type = IMAGE_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + time_t start = time(0); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh); + print_yolo_detections(fps, id, l.side*l.side*l.n, classes, w, h, dets); + free_detections(dets, nboxes); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); +} + +void validate_yolo_recall(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + list *plist = get_paths("data/voc.2007.test"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + int side = l.side; + + int j, k; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + + float thresh = .001; + float iou_thresh = .5; + float nms = 0; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + + int nboxes = 0; + detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, side*side*l.n, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < side*side*l.n; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < side*side*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free_detections(dets, nboxes); + free(id); + free_image(orig); + free_image(sized); + } +} + +void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) +{ + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + layer l = net->layers[net->n-1]; + set_batch_network(net, 1); + srand(2222222); + clock_t time; + char buff[256]; + char *input = buff; + float nms=.4; + while(1){ + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = resize_image(im, net->w, net->h); + float *X = sized.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + + int nboxes = 0; + detection *dets = get_network_boxes(net, 1, 1, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms); + + draw_detections(im, dets, l.side*l.side*l.n, thresh, voc_names, alphabet, 20); + save_image(im, "predictions"); + show_image(im, "predictions", 0); + free_detections(dets, nboxes); + free_image(im); + free_image(sized); + if (filename) break; + } +} + +void run_yolo(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .2); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int avg = find_int_arg(argc, argv, "-avg", 1); + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "test")) test_yolo(cfg, weights, filename, thresh); + else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights); + else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix, avg, .5, 0,0,0,0); +} diff --git a/workloads/realworld/standard/darknet/include/darknet.h b/workloads/realworld/standard/darknet/include/darknet.h new file mode 100644 index 0000000000000000000000000000000000000000..7be8225e2d39f079ca0a15da6980b42f8966af40 --- /dev/null +++ b/workloads/realworld/standard/darknet/include/darknet.h @@ -0,0 +1,810 @@ +#ifndef DARKNET_API +#define DARKNET_API +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef GPU + #define GPU_DEVICE 7 + #define BLOCK 512 + + #include "cuda_runtime.h" + #include "curand.h" + #include "cublas_v2.h" + + #ifdef CUDNN + #include "cudnn.h" + #endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define SECRET_NUM -1234 +extern int gpu_index; + +typedef struct{ + int classes; + char **names; +} metadata; + +metadata get_metadata(char *file); + +typedef struct{ + int *leaf; + int n; + int *parent; + int *child; + int *group; + char **name; + + int groups; + int *group_size; + int *group_offset; +} tree; +tree *read_tree(char *filename); + +typedef enum{ + LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU +} ACTIVATION; + +typedef enum{ + PNG, BMP, TGA, JPG +} IMTYPE; + +typedef enum{ + MULT, ADD, SUB, DIV +} BINARY_ACTIVATION; + +typedef enum { + CONVOLUTIONAL, + DECONVOLUTIONAL, + CONNECTED, + MAXPOOL, + SOFTMAX, + DETECTION, + DROPOUT, + CROP, + ROUTE, + COST, + NORMALIZATION, + AVGPOOL, + LOCAL, + SHORTCUT, + ACTIVE, + RNN, + GRU, + LSTM, + CRNN, + BATCHNORM, + NETWORK, + XNOR, + REGION, + YOLO, + ISEG, + REORG, + UPSAMPLE, + LOGXENT, + L2NORM, + BLANK +} LAYER_TYPE; + +typedef enum{ + SSE, MASKED, L1, SEG, SMOOTH,WGAN +} COST_TYPE; + +typedef struct{ + int batch; + float learning_rate; + float momentum; + float decay; + int adam; + float B1; + float B2; + float eps; + int t; +} update_args; + +struct network; +typedef struct network network; + +struct layer; +typedef struct layer layer; + +struct layer{ + LAYER_TYPE type; + ACTIVATION activation; + COST_TYPE cost_type; + void (*forward) (struct layer, struct network); + void (*backward) (struct layer, struct network); + void (*update) (struct layer, update_args); + void (*forward_gpu) (struct layer, struct network); + void (*backward_gpu) (struct layer, struct network); + void (*update_gpu) (struct layer, update_args); + int batch_normalize; + int shortcut; + int batch; + int forced; + int flipped; + int inputs; + int outputs; + int nweights; + int nbiases; + int extra; + int truths; + int h,w,c; + int out_h, out_w, out_c; + int n; + int max_boxes; + int groups; + int size; + int side; + int stride; + int reverse; + int flatten; + int spatial; + int pad; + int sqrt; + int flip; + int index; + int binary; + int xnor; + int steps; + int hidden; + int truth; + float smooth; + float dot; + float angle; + float jitter; + float saturation; + float exposure; + float shift; + float ratio; + float learning_rate_scale; + float clip; + int noloss; + int softmax; + int classes; + int coords; + int background; + int rescore; + int objectness; + int joint; + int noadjust; + int reorg; + int log; + int tanh; + int *mask; + int total; + + float alpha; + float beta; + float kappa; + + float coord_scale; + float object_scale; + float noobject_scale; + float mask_scale; + float class_scale; + int bias_match; + int random; + float ignore_thresh; + float truth_thresh; + float thresh; + float focus; + int classfix; + int absolute; + + int onlyforward; + int stopbackward; + int dontload; + int dontsave; + int dontloadscales; + int numload; + + float temperature; + float probability; + float scale; + + char * cweights; + int * indexes; + int * input_layers; + int * input_sizes; + int * map; + int * counts; + float ** sums; + float * rand; + float * cost; + float * state; + float * prev_state; + float * forgot_state; + float * forgot_delta; + float * state_delta; + float * combine_cpu; + float * combine_delta_cpu; + + float * concat; + float * concat_delta; + + float * binary_weights; + + float * biases; + float * bias_updates; + + float * scales; + float * scale_updates; + + float * weights; + float * weight_updates; + + float * delta; + float * output; + float * loss; + float * squared; + float * norms; + + float * spatial_mean; + float * mean; + float * variance; + + float * mean_delta; + float * variance_delta; + + float * rolling_mean; + float * rolling_variance; + + float * x; + float * x_norm; + + float * m; + float * v; + + float * bias_m; + float * bias_v; + float * scale_m; + float * scale_v; + + + float *z_cpu; + float *r_cpu; + float *h_cpu; + float * prev_state_cpu; + + float *temp_cpu; + float *temp2_cpu; + float *temp3_cpu; + + float *dh_cpu; + float *hh_cpu; + float *prev_cell_cpu; + float *cell_cpu; + float *f_cpu; + float *i_cpu; + float *g_cpu; + float *o_cpu; + float *c_cpu; + float *dc_cpu; + + float * binary_input; + + struct layer *input_layer; + struct layer *self_layer; + struct layer *output_layer; + + struct layer *reset_layer; + struct layer *update_layer; + struct layer *state_layer; + + struct layer *input_gate_layer; + struct layer *state_gate_layer; + struct layer *input_save_layer; + struct layer *state_save_layer; + struct layer *input_state_layer; + struct layer *state_state_layer; + + struct layer *input_z_layer; + struct layer *state_z_layer; + + struct layer *input_r_layer; + struct layer *state_r_layer; + + struct layer *input_h_layer; + struct layer *state_h_layer; + + struct layer *wz; + struct layer *uz; + struct layer *wr; + struct layer *ur; + struct layer *wh; + struct layer *uh; + struct layer *uo; + struct layer *wo; + struct layer *uf; + struct layer *wf; + struct layer *ui; + struct layer *wi; + struct layer *ug; + struct layer *wg; + + tree *softmax_tree; + + size_t workspace_size; + +#ifdef GPU + int *indexes_gpu; + + float *z_gpu; + float *r_gpu; + float *h_gpu; + + float *temp_gpu; + float *temp2_gpu; + float *temp3_gpu; + + float *dh_gpu; + float *hh_gpu; + float *prev_cell_gpu; + float *cell_gpu; + float *f_gpu; + float *i_gpu; + float *g_gpu; + float *o_gpu; + float *c_gpu; + float *dc_gpu; + + float *m_gpu; + float *v_gpu; + float *bias_m_gpu; + float *scale_m_gpu; + float *bias_v_gpu; + float *scale_v_gpu; + + float * combine_gpu; + float * combine_delta_gpu; + + float * prev_state_gpu; + float * forgot_state_gpu; + float * forgot_delta_gpu; + float * state_gpu; + float * state_delta_gpu; + float * gate_gpu; + float * gate_delta_gpu; + float * save_gpu; + float * save_delta_gpu; + float * concat_gpu; + float * concat_delta_gpu; + + float * binary_input_gpu; + float * binary_weights_gpu; + + float * mean_gpu; + float * variance_gpu; + + float * rolling_mean_gpu; + float * rolling_variance_gpu; + + float * variance_delta_gpu; + float * mean_delta_gpu; + + float * x_gpu; + float * x_norm_gpu; + float * weights_gpu; + float * weight_updates_gpu; + float * weight_change_gpu; + + float * biases_gpu; + float * bias_updates_gpu; + float * bias_change_gpu; + + float * scales_gpu; + float * scale_updates_gpu; + float * scale_change_gpu; + + float * output_gpu; + float * loss_gpu; + float * delta_gpu; + float * rand_gpu; + float * squared_gpu; + float * norms_gpu; +#ifdef CUDNN + cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc; + cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc; + cudnnTensorDescriptor_t normTensorDesc; + cudnnFilterDescriptor_t weightDesc; + cudnnFilterDescriptor_t dweightDesc; + cudnnConvolutionDescriptor_t convDesc; + cudnnConvolutionFwdAlgo_t fw_algo; + cudnnConvolutionBwdDataAlgo_t bd_algo; + cudnnConvolutionBwdFilterAlgo_t bf_algo; +#endif +#endif +}; + +void free_layer(layer); + +typedef enum { + CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM +} learning_rate_policy; + +typedef struct network{ + int n; + int batch; + size_t *seen; + int *t; + float epoch; + int subdivisions; + layer *layers; + float *output; + learning_rate_policy policy; + + float learning_rate; + float momentum; + float decay; + float gamma; + float scale; + float power; + int time_steps; + int step; + int max_batches; + float *scales; + int *steps; + int num_steps; + int burn_in; + + int adam; + float B1; + float B2; + float eps; + + int inputs; + int outputs; + int truths; + int notruth; + int h, w, c; + int max_crop; + int min_crop; + float max_ratio; + float min_ratio; + int center; + float angle; + float aspect; + float exposure; + float saturation; + float hue; + int random; + + int gpu_index; + tree *hierarchy; + + float *input; + float *truth; + float *delta; + float *workspace; + int train; + int index; + float *cost; + float clip; + +#ifdef GPU + float *input_gpu; + float *truth_gpu; + float *delta_gpu; + float *output_gpu; +#endif + +} network; + +typedef struct { + int w; + int h; + float scale; + float rad; + float dx; + float dy; + float aspect; +} augment_args; + +typedef struct { + int w; + int h; + int c; + float *data; +} image; + +typedef struct{ + float x, y, w, h; +} box; + +typedef struct detection{ + box bbox; + int classes; + float *prob; + float *mask; + float objectness; + int sort_class; +} detection; + +typedef struct matrix{ + int rows, cols; + float **vals; +} matrix; + + +typedef struct{ + int w, h; + matrix X; + matrix y; + int shallow; + int *num_boxes; + box **boxes; +} data; + +typedef enum { + CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA, ISEG_DATA +} data_type; + +typedef struct load_args{ + int threads; + char **paths; + char *path; + int n; + int m; + char **labels; + int h; + int w; + int out_w; + int out_h; + int nh; + int nw; + int num_boxes; + int min, max, size; + int classes; + int background; + int scale; + int center; + int coords; + float jitter; + float angle; + float aspect; + float saturation; + float exposure; + float hue; + data *d; + image *im; + image *resized; + data_type type; + tree *hierarchy; +} load_args; + +typedef struct{ + int id; + float x,y,w,h; + float left, right, top, bottom; +} box_label; + + +network *load_network(char *cfg, char *weights, int clear); +load_args get_base_args(network *net); + +void free_data(data d); + +typedef struct node{ + void *val; + struct node *next; + struct node *prev; +} node; + +typedef struct list{ + int size; + node *front; + node *back; +} list; + +pthread_t load_data(load_args args); +list *read_data_cfg(char *filename); +list *read_cfg(char *filename); +unsigned char *read_file(char *filename); +data resize_data(data orig, int w, int h); +data *tile_data(data orig, int divs, int size); +data select_data(data *orig, int *inds); + +void forward_network(network *net); +void backward_network(network *net); +void update_network(network *net); + + +float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); +void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void copy_cpu(int N, float *X, int INCX, float *Y, int INCY); +void scal_cpu(int N, float ALPHA, float *X, int INCX); +void fill_cpu(int N, float ALPHA, float * X, int INCX); +void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); +void softmax(float *input, int n, float temp, int stride, float *output); + +int best_3d_shift_r(image a, image b, int min, int max); +#ifdef GPU +void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); +void fill_gpu(int N, float ALPHA, float * X, int INCX); +void scal_gpu(int N, float ALPHA, float * X, int INCX); +void copy_gpu(int N, float * X, int INCX, float * Y, int INCY); + +void cuda_set_device(int n); +void cuda_free(float *x_gpu); +float *cuda_make_array(float *x, size_t n); +void cuda_pull_array(float *x_gpu, float *x, size_t n); +float cuda_mag_array(float *x_gpu, size_t n); +void cuda_push_array(float *x_gpu, float *x, size_t n); + +void forward_network_gpu(network *net); +void backward_network_gpu(network *net); +void update_network_gpu(network *net); + +float train_networks(network **nets, int n, data d, int interval); +void sync_nets(network **nets, int n, int interval); +void harmless_update_network_gpu(network *net); +#endif +image get_label(image **characters, char *string, int size); +void draw_label(image a, int r, int c, image label, const float *rgb); +void save_image(image im, const char *name); +void save_image_options(image im, const char *name, IMTYPE f, int quality); +void get_next_batch(data d, int n, int offset, float *X, float *y); +void grayscale_image_3c(image im); +void normalize_image(image p); +void matrix_to_csv(matrix m); +float train_network_sgd(network *net, data d, int n); +void rgbgr_image(image im); +data copy_data(data d); +data concat_data(data d1, data d2); +data load_cifar10_data(char *filename); +float matrix_topk_accuracy(matrix truth, matrix guess, int k); +void matrix_add_matrix(matrix from, matrix to); +void scale_matrix(matrix m, float scale); +matrix csv_to_matrix(char *filename); +float *network_accuracies(network *net, data d, int n); +float train_network_datum(network *net); +image make_random_image(int w, int h, int c); + +void denormalize_connected_layer(layer l); +void denormalize_convolutional_layer(layer l); +void statistics_connected_layer(layer l); +void rescale_weights(layer l, float scale, float trans); +void rgbgr_weights(layer l); +image *get_weights(layer l); + +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, int avg, float hier_thresh, int w, int h, int fps, int fullscreen); +void get_detection_detections(layer l, int w, int h, float thresh, detection *dets); + +char *option_find_str(list *l, char *key, char *def); +int option_find_int(list *l, char *key, int def); +int option_find_int_quiet(list *l, char *key, int def); + +network *parse_network_cfg(char *filename); +void save_weights(network *net, char *filename); +void load_weights(network *net, char *filename); +void save_weights_upto(network *net, char *filename, int cutoff); +void load_weights_upto(network *net, char *filename, int start, int cutoff); + +void zero_objectness(layer l); +void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets); +int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets); +void free_network(network *net); +void set_batch_network(network *net, int b); +void set_temp_network(network *net, float t); +image load_image(char *filename, int w, int h, int c); +image load_image_color(char *filename, int w, int h); +image make_image(int w, int h, int c); +image resize_image(image im, int w, int h); +void censor_image(image im, int dx, int dy, int w, int h); +image letterbox_image(image im, int w, int h); +image crop_image(image im, int dx, int dy, int w, int h); +image center_crop_image(image im, int w, int h); +image resize_min(image im, int min); +image resize_max(image im, int max); +image threshold_image(image im, float thresh); +image mask_to_rgb(image mask); +int resize_network(network *net, int w, int h); +void free_matrix(matrix m); +void test_resize(char *filename); +int show_image(image p, const char *name, int ms); +image copy_image(image p); +void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b); +float get_current_rate(network *net); +void composite_3d(char *f1, char *f2, char *out, int delta); +data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h); +size_t get_current_batch(network *net); +void constrain_image(image im); +image get_network_image_layer(network *net, int i); +layer get_network_output_layer(network *net); +void top_predictions(network *net, int n, int *index); +void flip_image(image a); +image float_to_image(int w, int h, int c, float *data); +void ghost_image(image source, image dest, int dx, int dy); +float network_accuracy(network *net, data d); +void random_distort_image(image im, float hue, float saturation, float exposure); +void fill_image(image m, float s); +image grayscale_image(image im); +void rotate_image_cw(image im, int times); +double what_time_is_it_now(); +image rotate_image(image m, float rad); +void visualize_network(network *net); +float box_iou(box a, box b); +data load_all_cifar10(); +box_label *read_boxes(char *filename, int *n); +box float_to_box(float *f, int stride); +void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes); + +matrix network_predict_data(network *net, data test); +image **load_alphabet(); +image get_network_image(network *net); +float *network_predict(network *net, float *input); + +int network_width(network *net); +int network_height(network *net); +float *network_predict_image(network *net, image im); +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets); +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num); +void free_detections(detection *dets, int n); + +void reset_network_state(network *net, int b); + +char **get_labels(char *filename); +void do_nms_obj(detection *dets, int total, int classes, float thresh); +void do_nms_sort(detection *dets, int total, int classes, float thresh); + +matrix make_matrix(int rows, int cols); + +#ifdef OPENCV +void *open_video_stream(const char *f, int c, int w, int h, int fps); +image get_image_from_stream(void *p); +void make_window(char *name, int w, int h, int fullscreen); +#endif + +void free_image(image m); +float train_network(network *net, data d); +pthread_t load_data_in_thread(load_args args); +void load_data_blocking(load_args args); +list *get_paths(char *filename); +void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride); +void change_leaves(tree *t, char *leaf_list); + +int find_int_arg(int argc, char **argv, char *arg, int def); +float find_float_arg(int argc, char **argv, char *arg, float def); +int find_arg(int argc, char* argv[], char *arg); +char *find_char_arg(int argc, char **argv, char *arg, char *def); +char *basecfg(char *cfgfile); +void find_replace(char *str, char *orig, char *rep, char *output); +void free_ptrs(void **ptrs, int n); +char *fgetl(FILE *fp); +void strip(char *s); +float sec(clock_t clocks); +void **list_to_array(list *l); +void top_k(float *a, int n, int k, int *index); +int *read_map(char *filename); +void error(const char *s); +int max_index(float *a, int n); +int max_int_index(int *a, int n); +int sample_array(float *a, int n); +int *random_index_order(int min, int max); +void free_list(list *l); +float mse_array(float *a, int n); +float variance_array(float *a, int n); +float mag_array(float *a, int n); +void scale_array(float *a, int n, float s); +float mean_array(float *a, int n); +float sum_array(float *a, int n); +void normalize_array(float *a, int n); +int *read_intlist(char *s, int *n, int d); +size_t rand_size_t(); +float rand_normal(); +float rand_uniform(float min, float max); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/workloads/realworld/standard/darknet/predictions.jpg b/workloads/realworld/standard/darknet/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c92d70d77e70e11853e9838ca90b46eb71a18ffa Binary files /dev/null and b/workloads/realworld/standard/darknet/predictions.jpg differ diff --git a/workloads/realworld/standard/darknet/python/darknet.py b/workloads/realworld/standard/darknet/python/darknet.py new file mode 100644 index 0000000000000000000000000000000000000000..b14d24485d86aa69f3991be79ec4f25c2b8e5a59 --- /dev/null +++ b/workloads/realworld/standard/darknet/python/darknet.py @@ -0,0 +1,156 @@ +from ctypes import * +import math +import random + +def sample(probs): + s = sum(probs) + probs = [a/s for a in probs] + r = random.uniform(0, 1) + for i in range(len(probs)): + r = r - probs[i] + if r <= 0: + return i + return len(probs)-1 + +def c_array(ctype, values): + arr = (ctype*len(values))() + arr[:] = values + return arr + +class BOX(Structure): + _fields_ = [("x", c_float), + ("y", c_float), + ("w", c_float), + ("h", c_float)] + +class DETECTION(Structure): + _fields_ = [("bbox", BOX), + ("classes", c_int), + ("prob", POINTER(c_float)), + ("mask", POINTER(c_float)), + ("objectness", c_float), + ("sort_class", c_int)] + + +class IMAGE(Structure): + _fields_ = [("w", c_int), + ("h", c_int), + ("c", c_int), + ("data", POINTER(c_float))] + +class METADATA(Structure): + _fields_ = [("classes", c_int), + ("names", POINTER(c_char_p))] + + + +#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL) +lib = CDLL("libdarknet.so", RTLD_GLOBAL) +lib.network_width.argtypes = [c_void_p] +lib.network_width.restype = c_int +lib.network_height.argtypes = [c_void_p] +lib.network_height.restype = c_int + +predict = lib.network_predict +predict.argtypes = [c_void_p, POINTER(c_float)] +predict.restype = POINTER(c_float) + +set_gpu = lib.cuda_set_device +set_gpu.argtypes = [c_int] + +make_image = lib.make_image +make_image.argtypes = [c_int, c_int, c_int] +make_image.restype = IMAGE + +get_network_boxes = lib.get_network_boxes +get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)] +get_network_boxes.restype = POINTER(DETECTION) + +make_network_boxes = lib.make_network_boxes +make_network_boxes.argtypes = [c_void_p] +make_network_boxes.restype = POINTER(DETECTION) + +free_detections = lib.free_detections +free_detections.argtypes = [POINTER(DETECTION), c_int] + +free_ptrs = lib.free_ptrs +free_ptrs.argtypes = [POINTER(c_void_p), c_int] + +network_predict = lib.network_predict +network_predict.argtypes = [c_void_p, POINTER(c_float)] + +reset_rnn = lib.reset_rnn +reset_rnn.argtypes = [c_void_p] + +load_net = lib.load_network +load_net.argtypes = [c_char_p, c_char_p, c_int] +load_net.restype = c_void_p + +do_nms_obj = lib.do_nms_obj +do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +do_nms_sort = lib.do_nms_sort +do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +free_image = lib.free_image +free_image.argtypes = [IMAGE] + +letterbox_image = lib.letterbox_image +letterbox_image.argtypes = [IMAGE, c_int, c_int] +letterbox_image.restype = IMAGE + +load_meta = lib.get_metadata +lib.get_metadata.argtypes = [c_char_p] +lib.get_metadata.restype = METADATA + +load_image = lib.load_image_color +load_image.argtypes = [c_char_p, c_int, c_int] +load_image.restype = IMAGE + +rgbgr_image = lib.rgbgr_image +rgbgr_image.argtypes = [IMAGE] + +predict_image = lib.network_predict_image +predict_image.argtypes = [c_void_p, IMAGE] +predict_image.restype = POINTER(c_float) + +def classify(net, meta, im): + out = predict_image(net, im) + res = [] + for i in range(meta.classes): + res.append((meta.names[i], out[i])) + res = sorted(res, key=lambda x: -x[1]) + return res + +def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): + im = load_image(image, 0, 0) + num = c_int(0) + pnum = pointer(num) + predict_image(net, im) + dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum) + num = pnum[0] + if (nms): do_nms_obj(dets, num, meta.classes, nms); + + res = [] + for j in range(num): + for i in range(meta.classes): + if dets[j].prob[i] > 0: + b = dets[j].bbox + res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h))) + res = sorted(res, key=lambda x: -x[1]) + free_image(im) + free_detections(dets, num) + return res + +if __name__ == "__main__": + #net = load_net("cfg/densenet201.cfg", "/home/pjreddie/trained/densenet201.weights", 0) + #im = load_image("data/wolf.jpg", 0, 0) + #meta = load_meta("cfg/imagenet1k.data") + #r = classify(net, meta, im) + #print r[:10] + net = load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0) + meta = load_meta("cfg/coco.data") + r = detect(net, meta, "data/dog.jpg") + print(r) + + diff --git a/workloads/realworld/standard/darknet/python/proverbot.py b/workloads/realworld/standard/darknet/python/proverbot.py new file mode 100644 index 0000000000000000000000000000000000000000..095aae8f8bf8bbe47ea1768a6e2c948bb0ff8f85 --- /dev/null +++ b/workloads/realworld/standard/darknet/python/proverbot.py @@ -0,0 +1,37 @@ +from darknet import * + +def predict_tactic(net, s): + prob = 0 + d = c_array(c_float, [0.0]*256) + tac = '' + if not len(s): + s = '\n' + for c in s[:-1]: + d[ord(c)] = 1 + pred = predict(net, d) + d[ord(c)] = 0 + c = s[-1] + while 1: + d[ord(c)] = 1 + pred = predict(net, d) + d[ord(c)] = 0 + pred = [pred[i] for i in range(256)] + ind = sample(pred) + c = chr(ind) + prob += math.log(pred[ind]) + if len(tac) and tac[-1] == '.': + break + tac = tac + c + return (tac, prob) + +def predict_tactics(net, s, n): + tacs = [] + for i in range(n): + reset_rnn(net) + tacs.append(predict_tactic(net, s)) + tacs = sorted(tacs, key=lambda x: -x[1]) + return tacs + +net = load_net("cfg/coq.test.cfg", "/home/pjreddie/backup/coq.backup", 0) +t = predict_tactics(net, "+++++\n", 10) +print t diff --git a/workloads/realworld/standard/darknet/resnet18/run_resnet18.sh b/workloads/realworld/standard/darknet/resnet18/run_resnet18.sh new file mode 100755 index 0000000000000000000000000000000000000000..10257ad02affc17f8287ea42917813fe320f5f23 --- /dev/null +++ b/workloads/realworld/standard/darknet/resnet18/run_resnet18.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg diff --git a/workloads/realworld/standard/darknet/resnet18/run_super.sh b/workloads/realworld/standard/darknet/resnet18/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..10257ad02affc17f8287ea42917813fe320f5f23 --- /dev/null +++ b/workloads/realworld/standard/darknet/resnet18/run_super.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg diff --git a/workloads/realworld/standard/darknet/resnet18_b/run_super.sh b/workloads/realworld/standard/darknet/resnet18_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..012635a1ce64ecda462e50097be554185989ae7a --- /dev/null +++ b/workloads/realworld/standard/darknet/resnet18_b/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier test ../cfg/imagenet1k.data ../cfg/resnet18_b.cfg diff --git a/workloads/realworld/standard/darknet/resnet18_t/run_super.sh b/workloads/realworld/standard/darknet/resnet18_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..0eb59b3bd65cf0186c5ed5f36eff5ec34d54298c --- /dev/null +++ b/workloads/realworld/standard/darknet/resnet18_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier train ../cfg/imagenet1k.data ../cfg/resnet18_t.cfg \ No newline at end of file diff --git a/workloads/realworld/standard/darknet/resnet50/run_resnet50.sh b/workloads/realworld/standard/darknet/resnet50/run_resnet50.sh new file mode 100755 index 0000000000000000000000000000000000000000..ce88111af06600203c1ae94421134eb3404f51a4 --- /dev/null +++ b/workloads/realworld/standard/darknet/resnet50/run_resnet50.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet50.cfg ../../../../../data/darknet/resnet50.weights ../data/dog.jpg diff --git a/workloads/realworld/standard/darknet/resnet50/run_super.sh b/workloads/realworld/standard/darknet/resnet50/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ce88111af06600203c1ae94421134eb3404f51a4 --- /dev/null +++ b/workloads/realworld/standard/darknet/resnet50/run_super.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet50.cfg ../../../../../data/darknet/resnet50.weights ../data/dog.jpg diff --git a/workloads/realworld/standard/darknet/resnet50_b/run_super.sh b/workloads/realworld/standard/darknet/resnet50_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..e6f1b1d59b612bef36d04af547bf61808261eb12 --- /dev/null +++ b/workloads/realworld/standard/darknet/resnet50_b/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier test ../cfg/imagenet1k.data ../cfg/resnet50_b.cfg diff --git a/workloads/realworld/standard/darknet/resnet50_t/run_super.sh b/workloads/realworld/standard/darknet/resnet50_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..4d4c7feebd1bd5bdcded72e3d4cf58045949ac90 --- /dev/null +++ b/workloads/realworld/standard/darknet/resnet50_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier train ../cfg/imagenet1k.data ../cfg/resnet50_t.cfg diff --git a/workloads/realworld/standard/darknet/scripts/dice_label.sh b/workloads/realworld/standard/darknet/scripts/dice_label.sh new file mode 100644 index 0000000000000000000000000000000000000000..f19f8a49481b46d5a04dd18b1b05af8928b21957 --- /dev/null +++ b/workloads/realworld/standard/darknet/scripts/dice_label.sh @@ -0,0 +1,20 @@ +mkdir -p images +mkdir -p images/orig +mkdir -p images/train +mkdir -p images/val + +ffmpeg -i Face1.mp4 images/orig/face1_%6d.jpg +ffmpeg -i Face2.mp4 images/orig/face2_%6d.jpg +ffmpeg -i Face3.mp4 images/orig/face3_%6d.jpg +ffmpeg -i Face4.mp4 images/orig/face4_%6d.jpg +ffmpeg -i Face5.mp4 images/orig/face5_%6d.jpg +ffmpeg -i Face6.mp4 images/orig/face6_%6d.jpg + +mogrify -resize 100x100^ -gravity center -crop 100x100+0+0 +repage images/orig/* + +ls images/orig/* | shuf | head -n 1000 | xargs mv -t images/val +mv images/orig/* images/train + +find `pwd`/images/train > dice.train.list -name \*.jpg +find `pwd`/images/val > dice.val.list -name \*.jpg + diff --git a/workloads/realworld/standard/darknet/scripts/gen_tactic.sh b/workloads/realworld/standard/darknet/scripts/gen_tactic.sh new file mode 100755 index 0000000000000000000000000000000000000000..ffa30d27754dacdd03bd5996d41cbfab14db0f39 --- /dev/null +++ b/workloads/realworld/standard/darknet/scripts/gen_tactic.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Usage: +# wget http://pjreddie.com/media/files/peek.weights +# scripts/gen_tactic.sh < data/goal.txt +./darknet rnn generatetactic cfg/gru.cfg peek.weights 2>/dev/null diff --git a/workloads/realworld/standard/darknet/scripts/get_coco_dataset.sh b/workloads/realworld/standard/darknet/scripts/get_coco_dataset.sh new file mode 100644 index 0000000000000000000000000000000000000000..28463015d1748fd331e071a0a778c6d4500b29ef --- /dev/null +++ b/workloads/realworld/standard/darknet/scripts/get_coco_dataset.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Clone COCO API +git clone https://github.com/pdollar/coco +cd coco + +mkdir images +cd images + +# Download Images +wget -c https://pjreddie.com/media/files/train2014.zip +wget -c https://pjreddie.com/media/files/val2014.zip + +# Unzip +unzip -q train2014.zip +unzip -q val2014.zip + +cd .. + +# Download COCO Metadata +wget -c https://pjreddie.com/media/files/instances_train-val2014.zip +wget -c https://pjreddie.com/media/files/coco/5k.part +wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part +wget -c https://pjreddie.com/media/files/coco/labels.tgz +tar xzf labels.tgz +unzip -q instances_train-val2014.zip + +# Set Up Image Lists +paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt +paste <(awk "{print \"$PWD\"}" trainvalno5k.txt + diff --git a/workloads/realworld/standard/darknet/scripts/imagenet_label.sh b/workloads/realworld/standard/darknet/scripts/imagenet_label.sh new file mode 100644 index 0000000000000000000000000000000000000000..01e4306ee3cf7322427374f01c766bcdef970922 --- /dev/null +++ b/workloads/realworld/standard/darknet/scripts/imagenet_label.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +mkdir -p labelled +wd=`pwd` + +for f in val/*.xml; +do +label=`grep -m1 "" $f | grep -oP '\K[^<]*'` +im=`echo $f | sed 's/val/imgs/; s/xml/JPEG/'` +out=`echo $im | sed 's/JPEG/'${label}'.JPEG/; s/imgs/labelled/'` +ln -s ${wd}/$im ${wd}/$out +done + +find ${wd}/labelled -name \*.JPEG > inet.val.list + diff --git a/workloads/realworld/standard/darknet/scripts/voc_label.py b/workloads/realworld/standard/darknet/scripts/voc_label.py new file mode 100644 index 0000000000000000000000000000000000000000..679fc366890d9eccf15124f950a274d8ad24fc83 --- /dev/null +++ b/workloads/realworld/standard/darknet/scripts/voc_label.py @@ -0,0 +1,59 @@ +import xml.etree.ElementTree as ET +import pickle +import os +from os import listdir, getcwd +from os.path import join + +sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')] + +classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + + +def convert(size, box): + dw = 1./(size[0]) + dh = 1./(size[1]) + x = (box[0] + box[1])/2.0 - 1 + y = (box[2] + box[3])/2.0 - 1 + w = box[1] - box[0] + h = box[3] - box[2] + x = x*dw + w = w*dw + y = y*dh + h = h*dh + return (x,y,w,h) + +def convert_annotation(year, image_id): + in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id)) + out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w') + tree=ET.parse(in_file) + root = tree.getroot() + size = root.find('size') + w = int(size.find('width').text) + h = int(size.find('height').text) + + for obj in root.iter('object'): + difficult = obj.find('difficult').text + cls = obj.find('name').text + if cls not in classes or int(difficult)==1: + continue + cls_id = classes.index(cls) + xmlbox = obj.find('bndbox') + b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) + bb = convert((w,h), b) + out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') + +wd = getcwd() + +for year, image_set in sets: + if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)): + os.makedirs('VOCdevkit/VOC%s/labels/'%(year)) + image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split() + list_file = open('%s_%s.txt'%(year, image_set), 'w') + for image_id in image_ids: + list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id)) + convert_annotation(year, image_id) + list_file.close() + +os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt") +os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt") + diff --git a/workloads/realworld/standard/darknet/src/activation_kernels.cu b/workloads/realworld/standard/darknet/src/activation_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..659b44fb85fba664e37b6e8d6aa1abee39accdd2 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/activation_kernels.cu @@ -0,0 +1,206 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "activations.h" +#include "cuda_dark.h" +} + + +__device__ float lhtan_activate_kernel(float x) +{ + if(x < 0) return .001f*x; + if(x > 1) return .001f*(x-1.f) + 1.f; + return x; +} +__device__ float lhtan_gradient_kernel(float x) +{ + if(x > 0 && x < 1) return 1; + return .001; +} + +__device__ float hardtan_activate_kernel(float x) +{ + if (x < -1) return -1; + if (x > 1) return 1; + return x; +} +__device__ float linear_activate_kernel(float x){return x;} +__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));} +__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;} +__device__ float relu_activate_kernel(float x){return x*(x>0);} +__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);} +__device__ float selu_activate_kernel(float x){return (x >= 0)*1.0507f*x + (x < 0)*1.0507f*1.6732f*(expf(x)-1);} +__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;} +__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;} +__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;} +__device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);} +__device__ float plse_activate_kernel(float x) +{ + if(x < -4) return .01f * (x + 4); + if(x > 4) return .01f * (x - 4) + 1; + return .125f*x + .5f; +} +__device__ float stair_activate_kernel(float x) +{ + int n = floorf(x); + if (n%2 == 0) return floorf(x/2); + else return (x - n) + floorf(x/2); +} + + +__device__ float hardtan_gradient_kernel(float x) +{ + if (x > -1 && x < 1) return 1; + return 0; +} +__device__ float linear_gradient_kernel(float x){return 1;} +__device__ float logistic_gradient_kernel(float x){return (1-x)*x;} +__device__ float loggy_gradient_kernel(float x) +{ + float y = (x+1)/2; + return 2*(1-y)*y; +} +__device__ float relu_gradient_kernel(float x){return (x>0);} +__device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);} +__device__ float selu_gradient_kernel(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);} +__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;} +__device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;} +__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;} +__device__ float tanh_gradient_kernel(float x){return 1-x*x;} +__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;} +__device__ float stair_gradient_kernel(float x) +{ + if (floorf(x) == x) return 0; + return 1; +} + +__device__ float activate_kernel(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_activate_kernel(x); + case LOGISTIC: + return logistic_activate_kernel(x); + case LOGGY: + return loggy_activate_kernel(x); + case RELU: + return relu_activate_kernel(x); + case ELU: + return elu_activate_kernel(x); + case SELU: + return selu_activate_kernel(x); + case RELIE: + return relie_activate_kernel(x); + case RAMP: + return ramp_activate_kernel(x); + case LEAKY: + return leaky_activate_kernel(x); + case TANH: + return tanh_activate_kernel(x); + case PLSE: + return plse_activate_kernel(x); + case STAIR: + return stair_activate_kernel(x); + case HARDTAN: + return hardtan_activate_kernel(x); + case LHTAN: + return lhtan_activate_kernel(x); + } + return 0; +} + +__device__ float gradient_kernel(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_gradient_kernel(x); + case LOGISTIC: + return logistic_gradient_kernel(x); + case LOGGY: + return loggy_gradient_kernel(x); + case RELU: + return relu_gradient_kernel(x); + case ELU: + return elu_gradient_kernel(x); + case SELU: + return selu_gradient_kernel(x); + case RELIE: + return relie_gradient_kernel(x); + case RAMP: + return ramp_gradient_kernel(x); + case LEAKY: + return leaky_gradient_kernel(x); + case TANH: + return tanh_gradient_kernel(x); + case PLSE: + return plse_gradient_kernel(x); + case STAIR: + return stair_gradient_kernel(x); + case HARDTAN: + return hardtan_gradient_kernel(x); + case LHTAN: + return lhtan_gradient_kernel(x); + } + return 0; +} + +__global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int i = id % s; + int b = id / s; + float x1 = x[b*s + i]; + float x2 = x[b*s + s/2 + i]; + if(id < n) { + float de = dy[id]; + dx[b*s + i] = x2*de; + dx[b*s + s/2 + i] = x1*de; + } +} + +extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y) +{ + binary_gradient_array_kernel<<>>(x, dx, n/2, size, a, y); + check_error(cudaPeekAtLastError()); +} +__global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int i = id % s; + int b = id / s; + float x1 = x[b*s + i]; + float x2 = x[b*s + s/2 + i]; + if(id < n) y[id] = x1*x2; +} + +extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y) +{ + binary_activate_array_kernel<<>>(x, n/2, size, a, y); + check_error(cudaPeekAtLastError()); +} + +__global__ void activate_array_kernel(float *x, int n, ACTIVATION a) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n) x[i] = activate_kernel(x[i], a); +} + +__global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n) delta[i] *= gradient_kernel(x[i], a); +} + +extern "C" void activate_array_gpu(float *x, int n, ACTIVATION a) +{ + activate_array_kernel<<>>(x, n, a); + check_error(cudaPeekAtLastError()); +} + +extern "C" void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta) +{ + gradient_array_kernel<<>>(x, n, a, delta); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/standard/darknet/src/activation_layer.c b/workloads/realworld/standard/darknet/src/activation_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..0791772336e4d1b001ed1b76bbbf21ee8d6fa24f --- /dev/null +++ b/workloads/realworld/standard/darknet/src/activation_layer.c @@ -0,0 +1,63 @@ +#include "activation_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +layer make_activation_layer(int batch, int inputs, ACTIVATION activation) +{ + layer l = {0}; + l.type = ACTIVE; + + l.inputs = inputs; + l.outputs = inputs; + l.batch=batch; + + l.output = calloc(batch*inputs, sizeof(float*)); + l.delta = calloc(batch*inputs, sizeof(float*)); + + l.forward = forward_activation_layer; + l.backward = backward_activation_layer; +#ifdef GPU + l.forward_gpu = forward_activation_layer_gpu; + l.backward_gpu = backward_activation_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); +#endif + l.activation = activation; + fprintf(stderr, "Activation Layer: %d inputs\n", inputs); + return l; +} + +void forward_activation_layer(layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_activation_layer(layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_activation_layer_gpu(layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_activation_layer_gpu(layer l, network net) +{ + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/standard/darknet/src/activation_layer.h b/workloads/realworld/standard/darknet/src/activation_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..42118a84e83f59a8997e354959404d1283a3004c --- /dev/null +++ b/workloads/realworld/standard/darknet/src/activation_layer.h @@ -0,0 +1,19 @@ +#ifndef ACTIVATION_LAYER_H +#define ACTIVATION_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_activation_layer(int batch, int inputs, ACTIVATION activation); + +void forward_activation_layer(layer l, network net); +void backward_activation_layer(layer l, network net); + +#ifdef GPU +void forward_activation_layer_gpu(layer l, network net); +void backward_activation_layer_gpu(layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/standard/darknet/src/activations.c b/workloads/realworld/standard/darknet/src/activations.c new file mode 100644 index 0000000000000000000000000000000000000000..da1a17a89b46b6c41fa80b5dd113e1b30c910712 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/activations.c @@ -0,0 +1,150 @@ +#include "activations.h" + +#include +#include +#include +#include + +char *get_activation_string(ACTIVATION a) +{ + switch(a){ + case LOGISTIC: + return "logistic"; + case LOGGY: + return "loggy"; + case RELU: + return "relu"; + case ELU: + return "elu"; + case SELU: + return "selu"; + case RELIE: + return "relie"; + case RAMP: + return "ramp"; + case LINEAR: + return "linear"; + case TANH: + return "tanh"; + case PLSE: + return "plse"; + case LEAKY: + return "leaky"; + case STAIR: + return "stair"; + case HARDTAN: + return "hardtan"; + case LHTAN: + return "lhtan"; + default: + break; + } + return "relu"; +} + +ACTIVATION get_activation(char *s) +{ + if (strcmp(s, "logistic")==0) return LOGISTIC; + if (strcmp(s, "loggy")==0) return LOGGY; + if (strcmp(s, "relu")==0) return RELU; + if (strcmp(s, "elu")==0) return ELU; + if (strcmp(s, "selu")==0) return SELU; + if (strcmp(s, "relie")==0) return RELIE; + if (strcmp(s, "plse")==0) return PLSE; + if (strcmp(s, "hardtan")==0) return HARDTAN; + if (strcmp(s, "lhtan")==0) return LHTAN; + if (strcmp(s, "linear")==0) return LINEAR; + if (strcmp(s, "ramp")==0) return RAMP; + if (strcmp(s, "leaky")==0) return LEAKY; + if (strcmp(s, "tanh")==0) return TANH; + if (strcmp(s, "stair")==0) return STAIR; + fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s); + return RELU; +} + +float activate(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_activate(x); + case LOGISTIC: + return logistic_activate(x); + case LOGGY: + return loggy_activate(x); + case RELU: + return relu_activate(x); + case ELU: + return elu_activate(x); + case SELU: + return selu_activate(x); + case RELIE: + return relie_activate(x); + case RAMP: + return ramp_activate(x); + case LEAKY: + return leaky_activate(x); + case TANH: + return tanh_activate(x); + case PLSE: + return plse_activate(x); + case STAIR: + return stair_activate(x); + case HARDTAN: + return hardtan_activate(x); + case LHTAN: + return lhtan_activate(x); + } + return 0; +} + +void activate_array(float *x, const int n, const ACTIVATION a) +{ + int i; + for(i = 0; i < n; ++i){ + x[i] = activate(x[i], a); + } +} + +float gradient(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_gradient(x); + case LOGISTIC: + return logistic_gradient(x); + case LOGGY: + return loggy_gradient(x); + case RELU: + return relu_gradient(x); + case ELU: + return elu_gradient(x); + case SELU: + return selu_gradient(x); + case RELIE: + return relie_gradient(x); + case RAMP: + return ramp_gradient(x); + case LEAKY: + return leaky_gradient(x); + case TANH: + return tanh_gradient(x); + case PLSE: + return plse_gradient(x); + case STAIR: + return stair_gradient(x); + case HARDTAN: + return hardtan_gradient(x); + case LHTAN: + return lhtan_gradient(x); + } + return 0; +} + +void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta) +{ + int i; + for(i = 0; i < n; ++i){ + delta[i] *= gradient(x[i], a); + } +} + diff --git a/workloads/realworld/standard/darknet/src/activations.h b/workloads/realworld/standard/darknet/src/activations.h new file mode 100644 index 0000000000000000000000000000000000000000..eec28d5b692ede3975e01a4d454ace20e8a9fdd8 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/activations.h @@ -0,0 +1,87 @@ +#ifndef ACTIVATIONS_H +#define ACTIVATIONS_H +#include "darknet.h" +#include "cuda_dark.h" +#include "math.h" + +ACTIVATION get_activation(char *s); + +char *get_activation_string(ACTIVATION a); +float activate(float x, ACTIVATION a); +float gradient(float x, ACTIVATION a); +void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta); +void activate_array(float *x, const int n, const ACTIVATION a); +#ifdef GPU +void activate_array_gpu(float *x, int n, ACTIVATION a); +void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta); +#endif + +static inline float stair_activate(float x) +{ + int n = floor(x); + if (n%2 == 0) return floor(x/2.); + else return (x - n) + floor(x/2.); +} +static inline float hardtan_activate(float x) +{ + if (x < -1) return -1; + if (x > 1) return 1; + return x; +} +static inline float linear_activate(float x){return x;} +static inline float logistic_activate(float x){return 1./(1. + exp(-x));} +static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;} +static inline float relu_activate(float x){return x*(x>0);} +static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} +static inline float selu_activate(float x){return (x >= 0)*1.0507*x + (x < 0)*1.0507*1.6732*(exp(x)-1);} +static inline float relie_activate(float x){return (x>0) ? x : .01*x;} +static inline float ramp_activate(float x){return x*(x>0)+.1*x;} +static inline float leaky_activate(float x){return (x>0) ? x : .1*x;} +static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);} +static inline float plse_activate(float x) +{ + if(x < -4) return .01 * (x + 4); + if(x > 4) return .01 * (x - 4) + 1; + return .125*x + .5; +} + +static inline float lhtan_activate(float x) +{ + if(x < 0) return .001*x; + if(x > 1) return .001*(x-1) + 1; + return x; +} +static inline float lhtan_gradient(float x) +{ + if(x > 0 && x < 1) return 1; + return .001; +} + +static inline float hardtan_gradient(float x) +{ + if (x > -1 && x < 1) return 1; + return 0; +} +static inline float linear_gradient(float x){return 1;} +static inline float logistic_gradient(float x){return (1-x)*x;} +static inline float loggy_gradient(float x) +{ + float y = (x+1.)/2.; + return 2*(1-y)*y; +} +static inline float stair_gradient(float x) +{ + if (floor(x) == x) return 0; + return 1; +} +static inline float relu_gradient(float x){return (x>0);} +static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);} +static inline float selu_gradient(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);} +static inline float relie_gradient(float x){return (x>0) ? 1 : .01;} +static inline float ramp_gradient(float x){return (x>0)+.1;} +static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;} +static inline float tanh_gradient(float x){return 1-x*x;} +static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;} + +#endif + diff --git a/workloads/realworld/standard/darknet/src/avgpool_layer.c b/workloads/realworld/standard/darknet/src/avgpool_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..7d17fa8f829aba43652117c141fb8b54ef4cf5dc --- /dev/null +++ b/workloads/realworld/standard/darknet/src/avgpool_layer.c @@ -0,0 +1,71 @@ +#include "avgpool_layer.h" +#include "cuda_dark.h" +#include + +avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) +{ + fprintf(stderr, "avg %4d x%4d x%4d -> %4d\n", w, h, c, c); + avgpool_layer l = {0}; + l.type = AVGPOOL; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.out_w = 1; + l.out_h = 1; + l.out_c = c; + l.outputs = l.out_c; + l.inputs = h*w*c; + int output_size = l.outputs * batch; + l.output = calloc(output_size, sizeof(float)); + l.delta = calloc(output_size, sizeof(float)); + l.forward = forward_avgpool_layer; + l.backward = backward_avgpool_layer; + #ifdef GPU + l.forward_gpu = forward_avgpool_layer_gpu; + l.backward_gpu = backward_avgpool_layer_gpu; + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); + #endif + return l; +} + +void resize_avgpool_layer(avgpool_layer *l, int w, int h) +{ + l->w = w; + l->h = h; + l->inputs = h*w*l->c; +} + +void forward_avgpool_layer(const avgpool_layer l, network net) +{ + int b,i,k; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < l.c; ++k){ + int out_index = k + b*l.c; + l.output[out_index] = 0; + for(i = 0; i < l.h*l.w; ++i){ + int in_index = i + l.h*l.w*(k + b*l.c); + l.output[out_index] += net.input[in_index]; + } + l.output[out_index] /= l.h*l.w; + } + } +} + +void backward_avgpool_layer(const avgpool_layer l, network net) +{ + int b,i,k; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < l.c; ++k){ + int out_index = k + b*l.c; + for(i = 0; i < l.h*l.w; ++i){ + int in_index = i + l.h*l.w*(k + b*l.c); + net.delta[in_index] += l.delta[out_index] / (l.h*l.w); + } + } + } +} + diff --git a/workloads/realworld/standard/darknet/src/avgpool_layer.h b/workloads/realworld/standard/darknet/src/avgpool_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..576ad1db9e9cb87640b0c3f764e2bbfbaae4b2b3 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/avgpool_layer.h @@ -0,0 +1,23 @@ +#ifndef AVGPOOL_LAYER_H +#define AVGPOOL_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +typedef layer avgpool_layer; + +image get_avgpool_image(avgpool_layer l); +avgpool_layer make_avgpool_layer(int batch, int w, int h, int c); +void resize_avgpool_layer(avgpool_layer *l, int w, int h); +void forward_avgpool_layer(const avgpool_layer l, network net); +void backward_avgpool_layer(const avgpool_layer l, network net); + +#ifdef GPU +void forward_avgpool_layer_gpu(avgpool_layer l, network net); +void backward_avgpool_layer_gpu(avgpool_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/standard/darknet/src/avgpool_layer_kernels.cu b/workloads/realworld/standard/darknet/src/avgpool_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..55e5ec372d251e1d4b0c501563f9240437595795 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/avgpool_layer_kernels.cu @@ -0,0 +1,61 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "avgpool_layer.h" +#include "cuda_dark.h" +} + +__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int k = id % c; + id /= c; + int b = id; + + int i; + int out_index = (k + c*b); + output[out_index] = 0; + for(i = 0; i < w*h; ++i){ + int in_index = i + h*w*(k + b*c); + output[out_index] += input[in_index]; + } + output[out_index] /= w*h; +} + +__global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int k = id % c; + id /= c; + int b = id; + + int i; + int out_index = (k + c*b); + for(i = 0; i < w*h; ++i){ + int in_index = i + h*w*(k + b*c); + in_delta[in_index] += out_delta[out_index] / (w*h); + } +} + +extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network net) +{ + size_t n = layer.c*layer.batch; + + forward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, net.input_gpu, layer.output_gpu); + check_error(cudaPeekAtLastError()); +} + +extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network net) +{ + size_t n = layer.c*layer.batch; + + backward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, net.delta_gpu, layer.delta_gpu); + check_error(cudaPeekAtLastError()); +} + diff --git a/workloads/realworld/standard/darknet/src/batchnorm_layer.c b/workloads/realworld/standard/darknet/src/batchnorm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..ebff387cc4b0365173fb6727efd80ebc80bfbd41 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/batchnorm_layer.c @@ -0,0 +1,279 @@ +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "blas.h" +#include + +layer make_batchnorm_layer(int batch, int w, int h, int c) +{ + fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c); + layer l = {0}; + l.type = BATCHNORM; + l.batch = batch; + l.h = l.out_h = h; + l.w = l.out_w = w; + l.c = l.out_c = c; + l.output = calloc(h * w * c * batch, sizeof(float)); + l.delta = calloc(h * w * c * batch, sizeof(float)); + l.inputs = w*h*c; + l.outputs = l.inputs; + + l.scales = calloc(c, sizeof(float)); + l.scale_updates = calloc(c, sizeof(float)); + l.biases = calloc(c, sizeof(float)); + l.bias_updates = calloc(c, sizeof(float)); + int i; + for(i = 0; i < c; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(c, sizeof(float)); + l.variance = calloc(c, sizeof(float)); + + l.rolling_mean = calloc(c, sizeof(float)); + l.rolling_variance = calloc(c, sizeof(float)); + + l.forward = forward_batchnorm_layer; + l.backward = backward_batchnorm_layer; +#ifdef GPU + l.forward_gpu = forward_batchnorm_layer_gpu; + l.backward_gpu = backward_batchnorm_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, h * w * c * batch); + l.delta_gpu = cuda_make_array(l.delta, h * w * c * batch); + + l.biases_gpu = cuda_make_array(l.biases, c); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, c); + + l.scales_gpu = cuda_make_array(l.scales, c); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, c); + + l.mean_gpu = cuda_make_array(l.mean, c); + l.variance_gpu = cuda_make_array(l.variance, c); + + l.rolling_mean_gpu = cuda_make_array(l.mean, c); + l.rolling_variance_gpu = cuda_make_array(l.variance, c); + + l.mean_delta_gpu = cuda_make_array(l.mean, c); + l.variance_delta_gpu = cuda_make_array(l.variance, c); + + l.x_gpu = cuda_make_array(l.output, l.batch*l.outputs); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*l.outputs); + #ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); + + #endif +#endif + return l; +} + +void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + int i,b,f; + for(f = 0; f < n; ++f){ + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int index = i + size*(f + n*b); + sum += delta[index] * x_norm[index]; + } + } + scale_updates[f] += sum; + } +} + +void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + + int i,j,k; + for(i = 0; i < filters; ++i){ + mean_delta[i] = 0; + for (j = 0; j < batch; ++j) { + for (k = 0; k < spatial; ++k) { + int index = j*filters*spatial + i*spatial + k; + mean_delta[i] += delta[index]; + } + } + mean_delta[i] *= (-1./sqrt(variance[i] + .00001f)); + } +} +void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + + int i,j,k; + for(i = 0; i < filters; ++i){ + variance_delta[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance_delta[i] += delta[index]*(x[index] - mean[i]); + } + } + variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float)(-3./2.)); + } +} +void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + int f, j, k; + for(j = 0; j < batch; ++j){ + for(f = 0; f < filters; ++f){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + f*spatial + k; + delta[index] = delta[index] * 1./(sqrt(variance[f] + .00001f)) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); + } + } + } +} + +void resize_batchnorm_layer(layer *layer, int w, int h) +{ + fprintf(stderr, "Not implemented\n"); +} + +void forward_batchnorm_layer(layer l, network net) +{ + if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1); + if(net.train){ + mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean); + variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance); + + scal_cpu(l.out_c, .99, l.rolling_mean, 1); + axpy_cpu(l.out_c, .01, l.mean, 1, l.rolling_mean, 1); + scal_cpu(l.out_c, .99, l.rolling_variance, 1); + axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1); + + normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w); + copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1); + } else { + normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w); + } + scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w); + add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h*l.out_w); +} + +void backward_batchnorm_layer(layer l, network net) +{ + if(!net.train){ + l.mean = l.rolling_mean; + l.variance = l.rolling_variance; + } + backward_bias(l.bias_updates, l.delta, l.batch, l.out_c, l.out_w*l.out_h); + backward_scale_cpu(l.x_norm, l.delta, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates); + + scale_bias(l.delta, l.scales, l.batch, l.out_c, l.out_h*l.out_w); + + mean_delta_cpu(l.delta, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta); + variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta); + normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.out_c, l.out_w*l.out_h, l.delta); + if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_batchnorm_layer(layer l) +{ + cuda_pull_array(l.scales_gpu, l.scales, l.c); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.c); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.c); +} +void push_batchnorm_layer(layer l) +{ + cuda_push_array(l.scales_gpu, l.scales, l.c); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.c); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.c); +} + +void forward_batchnorm_layer_gpu(layer l, network net) +{ + if(l.type == BATCHNORM) copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); + if (net.train) { +#ifdef CUDNN + float one = 1; + float zero = 0; + cudnnBatchNormalizationForwardTraining(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + l.dstTensorDesc, + l.x_gpu, + l.dstTensorDesc, + l.output_gpu, + l.normTensorDesc, + l.scales_gpu, + l.biases_gpu, + .01, + l.rolling_mean_gpu, + l.rolling_variance_gpu, + .00001, + l.mean_gpu, + l.variance_gpu); +#else + fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu); + fast_variance_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.variance_gpu); + + scal_gpu(l.out_c, .99, l.rolling_mean_gpu, 1); + axpy_gpu(l.out_c, .01, l.mean_gpu, 1, l.rolling_mean_gpu, 1); + scal_gpu(l.out_c, .99, l.rolling_variance_gpu, 1); + axpy_gpu(l.out_c, .01, l.variance_gpu, 1, l.rolling_variance_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); + normalize_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_norm_gpu, 1); + + scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); +#endif + } else { + normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); + scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); + } + +} + +void backward_batchnorm_layer_gpu(layer l, network net) +{ + if(!net.train){ + l.mean_gpu = l.rolling_mean_gpu; + l.variance_gpu = l.rolling_variance_gpu; + } +#ifdef CUDNN + float one = 1; + float zero = 0; + cudnnBatchNormalizationBackward(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + &one, + &one, + l.dstTensorDesc, + l.x_gpu, + l.dstTensorDesc, + l.delta_gpu, + l.dstTensorDesc, + l.x_norm_gpu, + l.normTensorDesc, + l.scales_gpu, + l.scale_updates_gpu, + l.bias_updates_gpu, + .00001, + l.mean_gpu, + l.variance_gpu); + copy_gpu(l.outputs*l.batch, l.x_norm_gpu, 1, l.delta_gpu, 1); +#else + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h); + backward_scale_gpu(l.x_norm_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates_gpu); + + scale_bias_gpu(l.delta_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + + fast_mean_delta_gpu(l.delta_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta_gpu); + fast_variance_delta_gpu(l.x_gpu, l.delta_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta_gpu); + normalize_delta_gpu(l.x_gpu, l.mean_gpu, l.variance_gpu, l.mean_delta_gpu, l.variance_delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu); +#endif + if(l.type == BATCHNORM) copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/standard/darknet/src/batchnorm_layer.h b/workloads/realworld/standard/darknet/src/batchnorm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..25a18a3c8f2569bab135b088501248159e1cae11 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/batchnorm_layer.h @@ -0,0 +1,19 @@ +#ifndef BATCHNORM_LAYER_H +#define BATCHNORM_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +layer make_batchnorm_layer(int batch, int w, int h, int c); +void forward_batchnorm_layer(layer l, network net); +void backward_batchnorm_layer(layer l, network net); + +#ifdef GPU +void forward_batchnorm_layer_gpu(layer l, network net); +void backward_batchnorm_layer_gpu(layer l, network net); +void pull_batchnorm_layer(layer l); +void push_batchnorm_layer(layer l); +#endif + +#endif diff --git a/workloads/realworld/standard/darknet/src/blas.c b/workloads/realworld/standard/darknet/src/blas.c new file mode 100644 index 0000000000000000000000000000000000000000..9e1604449ba9aeb9decdc7f0395a38bd3b478671 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/blas.c @@ -0,0 +1,351 @@ +#include "blas.h" + +#include +#include +#include +#include +#include +#include +void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int b,i,j,k; + int out_c = c/(stride*stride); + + for(b = 0; b < batch; ++b){ + for(k = 0; k < c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int in_index = i + w*(j + h*(k + c*b)); + int c2 = k % out_c; + int offset = k / out_c; + int w2 = i*stride + offset % stride; + int h2 = j*stride + offset / stride; + int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); + if(forward) out[out_index] = x[in_index]; + else out[in_index] = x[out_index]; + } + } + } + } +} + +void flatten(float *x, int size, int layers, int batch, int forward) +{ + float *swap = calloc(size*layers*batch, sizeof(float)); + int i,c,b; + for(b = 0; b < batch; ++b){ + for(c = 0; c < layers; ++c){ + for(i = 0; i < size; ++i){ + int i1 = b*layers*size + c*size + i; + int i2 = b*layers*size + i*layers + c; + if (forward) swap[i2] = x[i1]; + else swap[i1] = x[i2]; + } + } + } + memcpy(x, swap, size*layers*batch*sizeof(float)); + free(swap); +} + +void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c) +{ + int i; + for(i = 0; i < n; ++i){ + c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); + } +} + +void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc) +{ + int i; + for(i = 0; i < n; ++i){ + if(da) da[i] += dc[i] * s[i]; + if(db) db[i] += dc[i] * (1-s[i]); + ds[i] += dc[i] * (a[i] - b[i]); + } +} + +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int stride = w1/w2; + int sample = w2/w1; + assert(stride == h1/h2); + assert(sample == h2/h1); + if(stride < 1) stride = 1; + if(sample < 1) sample = 1; + int minw = (w1 < w2) ? w1 : w2; + int minh = (h1 < h2) ? h1 : h2; + int minc = (c1 < c2) ? c1 : c2; + + int i,j,k,b; + for(b = 0; b < batch; ++b){ + for(k = 0; k < minc; ++k){ + for(j = 0; j < minh; ++j){ + for(i = 0; i < minw; ++i){ + int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); + int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); + out[out_index] = s1*out[out_index] + s2*add[add_index]; + } + } + } + } +} + +void mean_cpu(float *x, int batch, int filters, int spatial, float *mean) +{ + float scale = 1./(batch * spatial); + int i,j,k; + for(i = 0; i < filters; ++i){ + mean[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + mean[i] += x[index]; + } + } + mean[i] *= scale; + } +} + +void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + float scale = 1./(batch * spatial - 1); + int i,j,k; + for(i = 0; i < filters; ++i){ + variance[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance[i] += pow((x[index] - mean[i]), 2); + } + } + variance[i] *= scale; + } +} + +void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial) +{ + int b,f,i; + for(b = 0; b < batch; ++b){ + for(i = 0; i < spatial; ++i){ + float sum = 0; + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + sum += powf(x[index], 2); + } + sum = sqrtf(sum); + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + x[index] /= sum; + dx[index] = (1 - x[index]) / sum; + } + } + } +} + + +void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + int b, f, i; + for(b = 0; b < batch; ++b){ + for(f = 0; f < filters; ++f){ + for(i = 0; i < spatial; ++i){ + int index = b*filters*spatial + f*spatial + i; + x[index] = (x[index] - mean[f])/(sqrt(variance[f]) + .000001f); + } + } + } +} + +void const_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; +} + +void mul_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] *= X[i*INCX]; +} + +void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] = pow(X[i*INCX], ALPHA); +} + +void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX]; +} + +void scal_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA; +} + +void fill_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; +} + +void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i, j; + int index = 0; + for(j = 0; j < B; ++j) { + for(i = 0; i < NX; ++i){ + if(X) X[j*NX + i] += OUT[index]; + ++index; + } + for(i = 0; i < NY; ++i){ + if(Y) Y[j*NY + i] += OUT[index]; + ++index; + } + } +} + +void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i, j; + int index = 0; + for(j = 0; j < B; ++j) { + for(i = 0; i < NX; ++i){ + OUT[index++] = X[j*NX + i]; + } + for(i = 0; i < NY; ++i){ + OUT[index++] = Y[j*NY + i]; + } + } +} + +void copy_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX]; +} + +void mult_add_into_cpu(int N, float *X, float *Y, float *Z) +{ + int i; + for(i = 0; i < N; ++i) Z[i] += X[i]*Y[i]; +} + +void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + float abs_val = fabs(diff); + if(abs_val < 1) { + error[i] = diff * diff; + delta[i] = diff; + } + else { + error[i] = 2*abs_val - 1; + delta[i] = (diff < 0) ? 1 : -1; + } + } +} + +void l1_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + error[i] = fabs(diff); + delta[i] = diff > 0 ? 1 : -1; + } +} + +void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float t = truth[i]; + float p = pred[i]; + error[i] = (t) ? -log(p) : 0; + delta[i] = t-p; + } +} + +void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float t = truth[i]; + float p = pred[i]; + error[i] = -t*log(p) - (1-t)*log(1-p); + delta[i] = t-p; + } +} + +void l2_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + error[i] = diff * diff; + delta[i] = diff; + } +} + +float dot_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + float dot = 0; + for(i = 0; i < N; ++i) dot += X[i*INCX] * Y[i*INCY]; + return dot; +} + +void softmax(float *input, int n, float temp, int stride, float *output) +{ + int i; + float sum = 0; + float largest = -FLT_MAX; + for(i = 0; i < n; ++i){ + if(input[i*stride] > largest) largest = input[i*stride]; + } + for(i = 0; i < n; ++i){ + float e = exp(input[i*stride]/temp - largest/temp); + sum += e; + output[i*stride] = e; + } + for(i = 0; i < n; ++i){ + output[i*stride] /= sum; + } +} + + +void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + int g, b; + for(b = 0; b < batch; ++b){ + for(g = 0; g < groups; ++g){ + softmax(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); + } + } +} + +void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + int i, j, k, b; + for(b = 0; b < batch; ++b){ + for(k = 0; k < c; ++k){ + for(j = 0; j < h*stride; ++j){ + for(i = 0; i < w*stride; ++i){ + int in_index = b*w*h*c + k*w*h + (j/stride)*w + i/stride; + int out_index = b*w*h*c*stride*stride + k*w*h*stride*stride + j*w*stride + i; + if(forward) out[out_index] = scale*in[in_index]; + else in[in_index] += scale*out[out_index]; + } + } + } + } +} + + diff --git a/workloads/realworld/standard/darknet/src/blas.h b/workloads/realworld/standard/darknet/src/blas.h new file mode 100644 index 0000000000000000000000000000000000000000..5d24a9aea70d8050b05098aa7a5634576444a32c --- /dev/null +++ b/workloads/realworld/standard/darknet/src/blas.h @@ -0,0 +1,105 @@ +#ifndef BLAS_H +#define BLAS_H +#include "darknet.h" + +void flatten(float *x, int size, int layers, int batch, int forward); +void pm(int M, int N, float *A); +float *random_matrix(int rows, int cols); +void time_random_matrix(int TA, int TB, int m, int k, int n); +void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); + +void test_blas(); + +void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void mult_add_into_cpu(int N, float *X, float *Y, float *Z); + +void const_cpu(int N, float ALPHA, float *X, int INCX); +void constrain_gpu(int N, float ALPHA, float * X, int INCX); +void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void mul_cpu(int N, float *X, int INCX, float *Y, int INCY); + +int test_gpu_blas(); +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out); + +void mean_cpu(float *x, int batch, int filters, int spatial, float *mean); +void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); + +void scale_bias(float *output, float *scales, int batch, int n, int size); +void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); +void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); +void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); +void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); +void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial); + +void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error); +void l2_cpu(int n, float *pred, float *truth, float *delta, float *error); +void l1_cpu(int n, float *pred, float *truth, float *delta, float *error); +void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); +void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); +void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c); +void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc); + +void softmax(float *input, int n, float temp, int stride, float *output); +void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); +void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); + +#ifdef GPU +#include "cuda_dark.h" +#include "tree.h" + +void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); +void axpy_gpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); +void copy_gpu(int N, float * X, int INCX, float * Y, int INCY); +void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); +void add_gpu(int N, float ALPHA, float * X, int INCX); +void supp_gpu(int N, float ALPHA, float * X, int INCX); +void mask_gpu(int N, float * X, float mask_num, float * mask, float val); +void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale); +void const_gpu(int N, float ALPHA, float *X, int INCX); +void pow_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void mul_gpu(int N, float *X, int INCX, float *Y, int INCY); + +void mean_gpu(float *x, int batch, int filters, int spatial, float *mean); +void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); +void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); +void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial); + +void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); + +void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); +void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); + +void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); +void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean); +void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out); +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); +void add_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); + +void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error); +void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error); +void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error); +void l2_gpu(int n, float *pred, float *truth, float *delta, float *error); +void l1_gpu(int n, float *pred, float *truth, float *delta, float *error); +void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error); +void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc); +void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c); +void mult_add_into_gpu(int num, float *a, float *b, float *c); +void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT); + +void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); + +void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); +void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t); +void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t); + +void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out); +void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier); +void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); + +#endif +#endif diff --git a/workloads/realworld/standard/darknet/src/blas_kernels.cu b/workloads/realworld/standard/darknet/src/blas_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..3db34a057b318e87769058c4b7fdc81f02780a9d --- /dev/null +++ b/workloads/realworld/standard/darknet/src/blas_kernels.cu @@ -0,0 +1,1035 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" +#include + +extern "C" { +#include "blas.h" +#include "cuda_dark.h" +#include "utils.h" +} + +__global__ void scale_bias_kernel(float *output, float *biases, int n, int size) +{ + int offset = blockIdx.x * blockDim.x + threadIdx.x; + int filter = blockIdx.y; + int batch = blockIdx.z; + + if(offset < size) output[(batch*n+filter)*size + offset] *= biases[filter]; +} + +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size) +{ + dim3 dimGrid((size-1)/BLOCK + 1, n, batch); + dim3 dimBlock(BLOCK, 1, 1); + + scale_bias_kernel<<>>(output, biases, n, size); + check_error(cudaPeekAtLastError()); +} + +__global__ void backward_scale_kernel(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + __shared__ float part[BLOCK]; + int i,b; + int filter = blockIdx.x; + int p = threadIdx.x; + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; i += BLOCK){ + int index = p + i + size*(filter + n*b); + sum += (p+i < size) ? delta[index]*x_norm[index] : 0; + } + } + part[p] = sum; + __syncthreads(); + if (p == 0) { + for(i = 0; i < BLOCK; ++i) scale_updates[filter] += part[i]; + } +} + +void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + backward_scale_kernel<<>>(x_norm, delta, batch, n, size, scale_updates); + check_error(cudaPeekAtLastError()); +} + +__global__ void add_bias_kernel(float *output, float *biases, int batch, int n, int size) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= n*size*batch) return; + int i = index % size; + index /= size; + int j = index % n; + index /= n; + int k = index; + + output[(k*n+j)*size + i] += biases[j]; +} + +void add_bias_gpu(float *output, float *biases, int batch, int n, int size) +{ + int num = n*size*batch; + + add_bias_kernel<<>>(output, biases, batch, n, size); + check_error(cudaPeekAtLastError()); +} + +__global__ void backward_bias_conn_kernel(float *bias_updates, float *delta, int batch, int n) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= n) return; + int b; + float sum = 0; + for(b = 0; b < batch; ++b){ + int i = b*n + index; + sum += delta[i]; + } + bias_updates[index] += sum; +} + +__global__ void backward_bias_kernel(float *bias_updates, float *delta, int batch, int n, int size) +{ + __shared__ float part[BLOCK]; + int i,b; + int filter = blockIdx.x; + int p = threadIdx.x; + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; i += BLOCK){ + int index = p + i + size*(filter + n*b); + sum += (p+i < size) ? delta[index] : 0; + } + } + part[p] = sum; + __syncthreads(); + if (p == 0) { + for(i = 0; i < BLOCK; ++i) bias_updates[filter] += part[i]; + } +} + +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size) +{ + if(size == 1){ + backward_bias_conn_kernel<<>>(bias_updates, delta, batch, n); + }else{ + backward_bias_kernel<<>>(bias_updates, delta, batch, n, size); + } + check_error(cudaPeekAtLastError()); +} + +/* +__global__ void dot_kernel(float *output, float scale, int batch, int n, int size, float *delta) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int f1 = index / n; + int f2 = index % n; + if (f2 <= f1) return; + + float sum = 0; + float norm1 = 0; + float norm2 = 0; + int b, i; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int i1 = b * size * n + f1 * size + i; + int i2 = b * size * n + f2 * size + i; + sum += output[i1] * output[i2]; + norm1 += output[i1] * output[i1]; + norm2 += output[i2] * output[i2]; + } + } + norm1 = sqrt(norm1); + norm2 = sqrt(norm2); + float norm = norm1 * norm2; + sum = sum / norm; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int i1 = b * size * n + f1 * size + i; + int i2 = b * size * n + f2 * size + i; + delta[i1] += - scale * sum * output[i2] / norm; + delta[i2] += - scale * sum * output[i1] / norm; + } + } +} + +void dot_error_gpu(layer l) +{ + dot_kernel<<>>(l.output_gpu, l.dot, l.batch, l.n, l.out_w * l.out_h, l.delta_gpu); + check_error(cudaPeekAtLastError()); +} +*/ + + +__global__ void adam_kernel(int N, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + + float mhat = m[index] / (1.f - powf(B1, t)); + float vhat = v[index] / (1.f - powf(B2, t)); + + x[index] = x[index] + rate * mhat / (sqrtf(vhat) + eps); +} + +extern "C" void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) +{ + adam_kernel<<>>(n, x, m, v, B1, B2, rate, eps, t); + check_error(cudaPeekAtLastError()); +} + +extern "C" void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t) +{ + scal_gpu(n, B1, m, 1); + scal_gpu(n, B2, v, 1); + axpy_gpu(n, -decay*batch, w, 1, d, 1); + + axpy_gpu(n, (1-B1), d, 1, m, 1); + mul_gpu(n, d, 1, d, 1); + axpy_gpu(n, (1-B2), d, 1, v, 1); + + adam_gpu(n, w, m, v, B1, B2, rate, eps, t); + fill_gpu(n, 0, d, 1); +} + +__global__ void normalize_kernel(int N, float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int f = (index/spatial)%filters; + + x[index] = (x[index] - mean[f])/(sqrtf(variance[f] + .00001f)); +} + +__global__ void normalize_delta_kernel(int N, float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int f = (index/spatial)%filters; + + delta[index] = delta[index] * 1.f/(sqrtf(variance[f] + .00001f)) + variance_delta[f] * 2.f * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); +} + +extern "C" void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + size_t N = batch*filters*spatial; + normalize_delta_kernel<<>>(N, x, mean, variance, mean_delta, variance_delta, batch, filters, spatial, delta); + check_error(cudaPeekAtLastError()); +} + +__global__ void variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + variance_delta[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance_delta[i] += delta[index]*(x[index] - mean[i]); + } + } + variance_delta[i] *= -.5f * powf(variance[i] + .00001f, (float)(-3.f/2.f)); +} + +__global__ void accumulate_kernel(float *x, int n, int groups, float *sum) +{ + int k; + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= groups) return; + sum[i] = 0; + for(k = 0; k < n; ++k){ + sum[i] += x[k*groups + i]; + } +} + +__global__ void fast_mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + local[id] += (i+id < spatial) ? delta[index] : 0; + } + } + + __syncthreads(); + + if(id == 0){ + mean_delta[filter] = 0; + for(i = 0; i < threads; ++i){ + mean_delta[filter] += local[i]; + } + mean_delta[filter] *= (-1.f/sqrtf(variance[filter] + .00001f)); + } +} + +__global__ void fast_variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + + local[id] += (i+id < spatial) ? delta[index]*(x[index] - mean[filter]) : 0; + } + } + + __syncthreads(); + + if(id == 0){ + variance_delta[filter] = 0; + for(i = 0; i < threads; ++i){ + variance_delta[filter] += local[i]; + } + variance_delta[filter] *= -.5f * powf(variance[filter] + .00001f, (float)(-3.f/2.f)); + } +} + + +__global__ void mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + mean_delta[i] = 0; + for (j = 0; j < batch; ++j) { + for (k = 0; k < spatial; ++k) { + int index = j*filters*spatial + i*spatial + k; + mean_delta[i] += delta[index]; + } + } + mean_delta[i] *= (-1.f/sqrtf(variance[i] + .00001f)); +} + +extern "C" void mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + mean_delta_kernel<<>>(delta, variance, batch, filters, spatial, mean_delta); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + fast_mean_delta_kernel<<>>(delta, variance, batch, filters, spatial, mean_delta); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + fast_variance_delta_kernel<<>>(x, delta, mean, variance, batch, filters, spatial, variance_delta); + check_error(cudaPeekAtLastError()); +} + +__global__ void mean_kernel(float *x, int batch, int filters, int spatial, float *mean) +{ + float scale = 1.f/(batch * spatial); + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + mean[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + mean[i] += x[index]; + } + } + mean[i] *= scale; +} + +__global__ void variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + float scale = 1.f/(batch * spatial - 1); + int j,k; + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + variance[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance[i] += powf((x[index] - mean[i]), 2); + } + } + variance[i] *= scale; +} + +__global__ void reorg_kernel(int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int in_index = i; + int in_w = i%w; + i = i/w; + int in_h = i%h; + i = i/h; + int in_c = i%c; + i = i/c; + int b = i%batch; + + int out_c = c/(stride*stride); + + int c2 = in_c % out_c; + int offset = in_c / out_c; + int w2 = in_w*stride + offset % stride; + int h2 = in_h*stride + offset / stride; + //printf("%d\n", offset); + int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); + + // printf("%d %d %d\n", w2, h2, c2); + //printf("%d %d\n", in_index, out_index); + //if(out_index >= N || out_index < 0) printf("bad bad bad \n"); + + if(forward) out[out_index] = x[in_index]; + else out[in_index] = x[out_index]; + //if(forward) out[1] = x[1]; + //else out[0] = x[0]; +} + +__global__ void axpy_kernel(int N, float ALPHA, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[OFFY+i*INCY] += ALPHA*X[OFFX+i*INCX]; +} + +__global__ void pow_kernel(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY] = pow(X[i*INCX], ALPHA); +} + +__global__ void const_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = ALPHA; +} + +__global__ void constrain_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = fminf(ALPHA, fmaxf(-ALPHA, X[i*INCX])); +} + +__global__ void supp_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) { + if((X[i*INCX] * X[i*INCX]) < (ALPHA * ALPHA)) X[i*INCX] = 0; + } +} + +__global__ void add_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] += ALPHA; +} + +__global__ void scal_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] *= ALPHA; +} + +__global__ void fill_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = ALPHA; +} + +__global__ void copy_kernel(int N, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY + OFFY] = X[i*INCX + OFFX]; +} + +__global__ void mul_kernel(int N, float *X, int INCX, float *Y, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY] *= X[i*INCX]; +} + + +extern "C" void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + size_t N = batch*filters*spatial; + normalize_kernel<<>>(N, x, mean, variance, batch, filters, spatial); + check_error(cudaPeekAtLastError()); +} + +__global__ void l2norm_kernel(int N, float *x, float *dx, int batch, int filters, int spatial) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int b = index / spatial; + int i = index % spatial; + int f; + float sum = 0; + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + sum += powf(x[index], 2); + } + sum = sqrtf(sum); + if(sum == 0) sum = 1; + //printf("%f\n", sum); + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + x[index] /= sum; + dx[index] = (1 - x[index]) / sum; + } +} + +extern "C" void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial) +{ + size_t N = batch*spatial; + l2norm_kernel<<>>(N, x, dx, batch, filters, spatial); + check_error(cudaPeekAtLastError()); +} + +__global__ void fast_mean_kernel(float *x, int batch, int filters, int spatial, float *mean) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + local[id] += (i+id < spatial) ? x[index] : 0; + } + } + + __syncthreads(); + + if(id == 0){ + mean[filter] = 0; + for(i = 0; i < threads; ++i){ + mean[filter] += local[i]; + } + mean[filter] /= spatial * batch; + } +} + +__global__ void fast_variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + + local[id] += (i+id < spatial) ? powf((x[index] - mean[filter]), 2) : 0; + } + } + + __syncthreads(); + + if(id == 0){ + variance[filter] = 0; + for(i = 0; i < threads; ++i){ + variance[filter] += local[i]; + } + variance[filter] /= (spatial * batch - 1); + } +} + +extern "C" void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean) +{ + fast_mean_kernel<<>>(x, batch, filters, spatial, mean); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + fast_variance_kernel<<>>(x, mean, batch, filters, spatial, variance); + check_error(cudaPeekAtLastError()); +} + + +extern "C" void mean_gpu(float *x, int batch, int filters, int spatial, float *mean) +{ + mean_kernel<<>>(x, batch, filters, spatial, mean); + check_error(cudaPeekAtLastError()); +} + +extern "C" void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + variance_kernel<<>>(x, mean, batch, filters, spatial, variance); + check_error(cudaPeekAtLastError()); +} + +extern "C" void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY) +{ + axpy_gpu_offset(N, ALPHA, X, 0, INCX, Y, 0, INCY); +} + +extern "C" void pow_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY) +{ + pow_kernel<<>>(N, ALPHA, X, INCX, Y, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void axpy_gpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY) +{ + axpy_kernel<<>>(N, ALPHA, X, OFFX, INCX, Y, OFFY, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void copy_gpu(int N, float * X, int INCX, float * Y, int INCY) +{ + copy_gpu_offset(N, X, 0, INCX, Y, 0, INCY); +} + +extern "C" void mul_gpu(int N, float * X, int INCX, float * Y, int INCY) +{ + mul_kernel<<>>(N, X, INCX, Y, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY) +{ + copy_kernel<<>>(N, X, OFFX, INCX, Y, OFFY, INCY); + check_error(cudaPeekAtLastError()); +} + +__global__ void flatten_kernel(int N, float *x, int spatial, int layers, int batch, int forward, float *out) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int in_s = i%spatial; + i = i/spatial; + int in_c = i%layers; + i = i/layers; + int b = i; + + int i1 = b*layers*spatial + in_c*spatial + in_s; + int i2 = b*layers*spatial + in_s*layers + in_c; + + if (forward) out[i2] = x[i1]; + else out[i1] = x[i2]; +} + +extern "C" void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out) +{ + int size = spatial*batch*layers; + flatten_kernel<<>>(size, x, spatial, layers, batch, forward, out); + check_error(cudaPeekAtLastError()); +} + +extern "C" void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int size = w*h*c*batch; + reorg_kernel<<>>(size, x, w, h, c, batch, stride, forward, out); + check_error(cudaPeekAtLastError()); +} + +__global__ void mask_kernel(int n, float *x, float mask_num, float *mask, float val) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n && mask[i] == mask_num) x[i] = val; +} + +extern "C" void mask_gpu(int N, float * X, float mask_num, float * mask, float val) +{ + mask_kernel<<>>(N, X, mask_num, mask, val); + check_error(cudaPeekAtLastError()); +} + +__global__ void scale_mask_kernel(int n, float *x, float mask_num, float *mask, float scale) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n && mask[i] == mask_num) x[i] *= scale; +} + +extern "C" void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale) +{ + scale_mask_kernel<<>>(N, X, mask_num, mask, scale); + check_error(cudaPeekAtLastError()); +} + +extern "C" void const_gpu(int N, float ALPHA, float * X, int INCX) +{ + const_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void constrain_gpu(int N, float ALPHA, float * X, int INCX) +{ + constrain_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + + +extern "C" void add_gpu(int N, float ALPHA, float * X, int INCX) +{ + add_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void scal_gpu(int N, float ALPHA, float * X, int INCX) +{ + scal_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void supp_gpu(int N, float ALPHA, float * X, int INCX) +{ + supp_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fill_gpu(int N, float ALPHA, float * X, int INCX) +{ + fill_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +__global__ void shortcut_kernel(int size, int minw, int minh, int minc, int stride, int sample, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= size) return; + int i = id % minw; + id /= minw; + int j = id % minh; + id /= minh; + int k = id % minc; + id /= minc; + int b = id % batch; + + int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); + int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); + out[out_index] = s1*out[out_index] + s2*add[add_index]; + //out[out_index] += add[add_index]; +} + +extern "C" void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int minw = (w1 < w2) ? w1 : w2; + int minh = (h1 < h2) ? h1 : h2; + int minc = (c1 < c2) ? c1 : c2; + + int stride = w1/w2; + int sample = w2/w1; + assert(stride == h1/h2); + assert(sample == h2/h1); + if(stride < 1) stride = 1; + if(sample < 1) sample = 1; + + int size = batch * minw * minh * minc; + shortcut_kernel<<>>(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, s1, s2, out); + check_error(cudaPeekAtLastError()); +} + +__global__ void smooth_l1_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + float abs_val = fabsf(diff); + if(abs_val < 1) { + error[i] = diff * diff; + delta[i] = diff; + } + else { + error[i] = 2*abs_val - 1; + delta[i] = (diff > 0) ? 1 : -1; + } + } +} + +extern "C" void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + smooth_l1_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void softmax_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float t = truth[i]; + float p = pred[i]; + error[i] = (t) ? -log(p) : 0; + delta[i] = t-p; + } +} + +extern "C" void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + softmax_x_ent_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void logistic_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float t = truth[i]; + float p = pred[i]; + error[i] = -t*log(p+.0000001) - (1-t)*log(1-p+.0000001); + delta[i] = t-p; + } +} + +extern "C" void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + logistic_x_ent_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void l2_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + error[i] = diff * diff; //I know this is technically wrong, deal with it. + delta[i] = diff; + } +} + +extern "C" void l2_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + l2_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void l1_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + error[i] = abs(diff); + delta[i] = (diff > 0) ? 1 : -1; + } +} + +extern "C" void l1_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + l1_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void wgan_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + error[i] = truth[i] ? -pred[i] : pred[i]; + delta[i] = (truth[i] > 0) ? 1 : -1; + } +} + +extern "C" void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + wgan_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + + + + +__global__ void weighted_sum_kernel(int n, float *a, float *b, float *s, float *c) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); + } +} + +__global__ void deinter_kernel(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < (NX+NY)*B){ + int b = i / (NX+NY); + int j = i % (NX+NY); + if (j < NX){ + if(X) X[b*NX + j] += OUT[i]; + } else { + if(Y) Y[b*NY + j - NX] += OUT[i]; + } + } +} + +extern "C" void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + deinter_kernel<<>>(NX, X, NY, Y, B, OUT); + check_error(cudaPeekAtLastError()); +} + +__global__ void inter_kernel(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < (NX+NY)*B){ + int b = i / (NX+NY); + int j = i % (NX+NY); + if (j < NX){ + OUT[i] = X[b*NX + j]; + } else { + OUT[i] = Y[b*NY + j - NX]; + } + } +} + +extern "C" void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + inter_kernel<<>>(NX, X, NY, Y, B, OUT); + check_error(cudaPeekAtLastError()); +} + +extern "C" void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c) +{ + weighted_sum_kernel<<>>(num, a, b, s, c); + check_error(cudaPeekAtLastError()); +} + +__global__ void weighted_delta_kernel(int n, float *a, float *b, float *s, float *da, float *db, float *ds, float *dc) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + if(da) da[i] += dc[i] * s[i]; + if(db) db[i] += dc[i] * (1-s[i]); + ds[i] += dc[i] * (a[i] - b[i]); + } +} + +extern "C" void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc) +{ + weighted_delta_kernel<<>>(num, a, b, s, da, db, ds, dc); + check_error(cudaPeekAtLastError()); +} + +__global__ void mult_add_into_kernel(int n, float *a, float *b, float *c) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + c[i] += a[i]*b[i]; + } +} + +extern "C" void mult_add_into_gpu(int num, float *a, float *b, float *c) +{ + mult_add_into_kernel<<>>(num, a, b, c); + check_error(cudaPeekAtLastError()); +} + + +__device__ void softmax_device(float *input, int n, float temp, int stride, float *output) +{ + int i; + float sum = 0; + float largest = -INFINITY; + for(i = 0; i < n; ++i){ + int val = input[i*stride]; + largest = (val>largest) ? val : largest; + } + for(i = 0; i < n; ++i){ + float e = expf(input[i*stride]/temp - largest/temp); + sum += e; + output[i*stride] = e; + } + for(i = 0; i < n; ++i){ + output[i*stride] /= sum; + } +} + + +__global__ void softmax_tree_kernel(float *input, int spatial, int batch, int stride, float temp, float *output, int groups, int *group_size, int *group_offset) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= spatial*batch*groups) return; + int s = id % spatial; + id = id / spatial; + int g = id % groups; + int b = id / groups; + int goff = group_offset[g]*spatial; + int boff = b*stride; + softmax_device(input + goff + boff + s, group_size[g], temp, spatial, output + goff + boff + s); +} + +extern "C" void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier) +{ + int *tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); + int *tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); + /* + static int *tree_groups_size = 0; + static int *tree_groups_offset = 0; + if(!tree_groups_size){ + tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); + tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); + } + */ + int num = spatial*batch*hier.groups; + softmax_tree_kernel<<>>(input, spatial, batch, stride, temp, output, hier.groups, tree_groups_size, tree_groups_offset); + check_error(cudaPeekAtLastError()); + cuda_free((float *)tree_groups_size); + cuda_free((float *)tree_groups_offset); +} + +__global__ void softmax_kernel(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= batch*groups) return; + int b = id / groups; + int g = id % groups; + softmax_device(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); +} + +extern "C" void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + softmax_kernel<<>>(input, n, batch, batch_offset, groups, group_offset, stride, temp, output); + check_error(cudaPeekAtLastError()); +} + + +__global__ void upsample_kernel(size_t N, float *x, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + size_t i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int out_index = i; + int out_w = i%(w*stride); + i = i/(w*stride); + int out_h = i%(h*stride); + i = i/(h*stride); + int out_c = i%c; + i = i/c; + int b = i%batch; + + int in_w = out_w / stride; + int in_h = out_h / stride; + int in_c = out_c; + + int in_index = b*w*h*c + in_c*w*h + in_h*w + in_w; + + + if(forward) out[out_index] += scale * x[in_index]; + else atomicAdd(x+in_index, scale * out[out_index]); +} +extern "C" void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + size_t size = w*h*c*batch*stride*stride; + upsample_kernel<<>>(size, in, w, h, c, batch, stride, forward, scale, out); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/standard/darknet/src/box.c b/workloads/realworld/standard/darknet/src/box.c new file mode 100644 index 0000000000000000000000000000000000000000..8a1772c9ae05dede6ddc83d9b6465f64cf974ae8 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/box.c @@ -0,0 +1,357 @@ +#include "box.h" +#include +#include +#include + +int nms_comparator(const void *pa, const void *pb) +{ + detection a = *(detection *)pa; + detection b = *(detection *)pb; + float diff = 0; + if(b.sort_class >= 0){ + diff = a.prob[b.sort_class] - b.prob[b.sort_class]; + } else { + diff = a.objectness - b.objectness; + } + if(diff < 0) return 1; + else if(diff > 0) return -1; + return 0; +} + +void do_nms_obj(detection *dets, int total, int classes, float thresh) +{ + int i, j, k; + k = total-1; + for(i = 0; i <= k; ++i){ + if(dets[i].objectness == 0){ + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k+1; + + for(i = 0; i < total; ++i){ + dets[i].sort_class = -1; + } + + qsort(dets, total, sizeof(detection), nms_comparator); + for(i = 0; i < total; ++i){ + if(dets[i].objectness == 0) continue; + box a = dets[i].bbox; + for(j = i+1; j < total; ++j){ + if(dets[j].objectness == 0) continue; + box b = dets[j].bbox; + if (box_iou(a, b) > thresh){ + dets[j].objectness = 0; + for(k = 0; k < classes; ++k){ + dets[j].prob[k] = 0; + } + } + } + } +} + + +void do_nms_sort(detection *dets, int total, int classes, float thresh) +{ + int i, j, k; + k = total-1; + for(i = 0; i <= k; ++i){ + if(dets[i].objectness == 0){ + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k+1; + + for(k = 0; k < classes; ++k){ + for(i = 0; i < total; ++i){ + dets[i].sort_class = k; + } + qsort(dets, total, sizeof(detection), nms_comparator); + for(i = 0; i < total; ++i){ + if(dets[i].prob[k] == 0) continue; + box a = dets[i].bbox; + for(j = i+1; j < total; ++j){ + box b = dets[j].bbox; + if (box_iou(a, b) > thresh){ + dets[j].prob[k] = 0; + } + } + } + } +} + +box float_to_box(float *f, int stride) +{ + box b = {0}; + b.x = f[0]; + b.y = f[1*stride]; + b.w = f[2*stride]; + b.h = f[3*stride]; + return b; +} + +dbox derivative(box a, box b) +{ + dbox d; + d.dx = 0; + d.dw = 0; + float l1 = a.x - a.w/2; + float l2 = b.x - b.w/2; + if (l1 > l2){ + d.dx -= 1; + d.dw += .5; + } + float r1 = a.x + a.w/2; + float r2 = b.x + b.w/2; + if(r1 < r2){ + d.dx += 1; + d.dw += .5; + } + if (l1 > r2) { + d.dx = -1; + d.dw = 0; + } + if (r1 < l2){ + d.dx = 1; + d.dw = 0; + } + + d.dy = 0; + d.dh = 0; + float t1 = a.y - a.h/2; + float t2 = b.y - b.h/2; + if (t1 > t2){ + d.dy -= 1; + d.dh += .5; + } + float b1 = a.y + a.h/2; + float b2 = b.y + b.h/2; + if(b1 < b2){ + d.dy += 1; + d.dh += .5; + } + if (t1 > b2) { + d.dy = -1; + d.dh = 0; + } + if (b1 < t2){ + d.dy = 1; + d.dh = 0; + } + return d; +} + +float overlap(float x1, float w1, float x2, float w2) +{ + float l1 = x1 - w1/2; + float l2 = x2 - w2/2; + float left = l1 > l2 ? l1 : l2; + float r1 = x1 + w1/2; + float r2 = x2 + w2/2; + float right = r1 < r2 ? r1 : r2; + return right - left; +} + +float box_intersection(box a, box b) +{ + float w = overlap(a.x, a.w, b.x, b.w); + float h = overlap(a.y, a.h, b.y, b.h); + if(w < 0 || h < 0) return 0; + float area = w*h; + return area; +} + +float box_union(box a, box b) +{ + float i = box_intersection(a, b); + float u = a.w*a.h + b.w*b.h - i; + return u; +} + +float box_iou(box a, box b) +{ + return box_intersection(a, b)/box_union(a, b); +} + +float box_rmse(box a, box b) +{ + return sqrt(pow(a.x-b.x, 2) + + pow(a.y-b.y, 2) + + pow(a.w-b.w, 2) + + pow(a.h-b.h, 2)); +} + +dbox dintersect(box a, box b) +{ + float w = overlap(a.x, a.w, b.x, b.w); + float h = overlap(a.y, a.h, b.y, b.h); + dbox dover = derivative(a, b); + dbox di; + + di.dw = dover.dw*h; + di.dx = dover.dx*h; + di.dh = dover.dh*w; + di.dy = dover.dy*w; + + return di; +} + +dbox dunion(box a, box b) +{ + dbox du; + + dbox di = dintersect(a, b); + du.dw = a.h - di.dw; + du.dh = a.w - di.dh; + du.dx = -di.dx; + du.dy = -di.dy; + + return du; +} + + +void test_dunion() +{ + box a = {0, 0, 1, 1}; + box dxa= {0+.0001, 0, 1, 1}; + box dya= {0, 0+.0001, 1, 1}; + box dwa= {0, 0, 1+.0001, 1}; + box dha= {0, 0, 1, 1+.0001}; + + box b = {.5, .5, .2, .2}; + dbox di = dunion(a,b); + printf("Union: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); + float inter = box_union(a, b); + float xinter = box_union(dxa, b); + float yinter = box_union(dya, b); + float winter = box_union(dwa, b); + float hinter = box_union(dha, b); + xinter = (xinter - inter)/(.0001); + yinter = (yinter - inter)/(.0001); + winter = (winter - inter)/(.0001); + hinter = (hinter - inter)/(.0001); + printf("Union Manual %f %f %f %f\n", xinter, yinter, winter, hinter); +} +void test_dintersect() +{ + box a = {0, 0, 1, 1}; + box dxa= {0+.0001, 0, 1, 1}; + box dya= {0, 0+.0001, 1, 1}; + box dwa= {0, 0, 1+.0001, 1}; + box dha= {0, 0, 1, 1+.0001}; + + box b = {.5, .5, .2, .2}; + dbox di = dintersect(a,b); + printf("Inter: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); + float inter = box_intersection(a, b); + float xinter = box_intersection(dxa, b); + float yinter = box_intersection(dya, b); + float winter = box_intersection(dwa, b); + float hinter = box_intersection(dha, b); + xinter = (xinter - inter)/(.0001); + yinter = (yinter - inter)/(.0001); + winter = (winter - inter)/(.0001); + hinter = (hinter - inter)/(.0001); + printf("Inter Manual %f %f %f %f\n", xinter, yinter, winter, hinter); +} + +void test_box() +{ + test_dintersect(); + test_dunion(); + box a = {0, 0, 1, 1}; + box dxa= {0+.00001, 0, 1, 1}; + box dya= {0, 0+.00001, 1, 1}; + box dwa= {0, 0, 1+.00001, 1}; + box dha= {0, 0, 1, 1+.00001}; + + box b = {.5, 0, .2, .2}; + + float iou = box_iou(a,b); + iou = (1-iou)*(1-iou); + printf("%f\n", iou); + dbox d = diou(a, b); + printf("%f %f %f %f\n", d.dx, d.dy, d.dw, d.dh); + + float xiou = box_iou(dxa, b); + float yiou = box_iou(dya, b); + float wiou = box_iou(dwa, b); + float hiou = box_iou(dha, b); + xiou = ((1-xiou)*(1-xiou) - iou)/(.00001); + yiou = ((1-yiou)*(1-yiou) - iou)/(.00001); + wiou = ((1-wiou)*(1-wiou) - iou)/(.00001); + hiou = ((1-hiou)*(1-hiou) - iou)/(.00001); + printf("manual %f %f %f %f\n", xiou, yiou, wiou, hiou); +} + +dbox diou(box a, box b) +{ + float u = box_union(a,b); + float i = box_intersection(a,b); + dbox di = dintersect(a,b); + dbox du = dunion(a,b); + dbox dd = {0,0,0,0}; + + if(i <= 0 || 1) { + dd.dx = b.x - a.x; + dd.dy = b.y - a.y; + dd.dw = b.w - a.w; + dd.dh = b.h - a.h; + return dd; + } + + dd.dx = 2*pow((1-(i/u)),1)*(di.dx*u - du.dx*i)/(u*u); + dd.dy = 2*pow((1-(i/u)),1)*(di.dy*u - du.dy*i)/(u*u); + dd.dw = 2*pow((1-(i/u)),1)*(di.dw*u - du.dw*i)/(u*u); + dd.dh = 2*pow((1-(i/u)),1)*(di.dh*u - du.dh*i)/(u*u); + return dd; +} + + +void do_nms(box *boxes, float **probs, int total, int classes, float thresh) +{ + int i, j, k; + for(i = 0; i < total; ++i){ + int any = 0; + for(k = 0; k < classes; ++k) any = any || (probs[i][k] > 0); + if(!any) { + continue; + } + for(j = i+1; j < total; ++j){ + if (box_iou(boxes[i], boxes[j]) > thresh){ + for(k = 0; k < classes; ++k){ + if (probs[i][k] < probs[j][k]) probs[i][k] = 0; + else probs[j][k] = 0; + } + } + } + } +} + +box encode_box(box b, box anchor) +{ + box encode; + encode.x = (b.x - anchor.x) / anchor.w; + encode.y = (b.y - anchor.y) / anchor.h; + encode.w = log2(b.w / anchor.w); + encode.h = log2(b.h / anchor.h); + return encode; +} + +box decode_box(box b, box anchor) +{ + box decode; + decode.x = b.x * anchor.w + anchor.x; + decode.y = b.y * anchor.h + anchor.y; + decode.w = pow(2., b.w) * anchor.w; + decode.h = pow(2., b.h) * anchor.h; + return decode; +} diff --git a/workloads/realworld/standard/darknet/src/box.h b/workloads/realworld/standard/darknet/src/box.h new file mode 100644 index 0000000000000000000000000000000000000000..dda3e59100c3d9e0a6bb05a80070155d9fcbc876 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/box.h @@ -0,0 +1,14 @@ +#ifndef BOX_H +#define BOX_H +#include "darknet.h" + +typedef struct{ + float dx, dy, dw, dh; +} dbox; + +float box_rmse(box a, box b); +dbox diou(box a, box b); +box decode_box(box b, box anchor); +box encode_box(box b, box anchor); + +#endif diff --git a/workloads/realworld/standard/darknet/src/classifier.h b/workloads/realworld/standard/darknet/src/classifier.h new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/workloads/realworld/standard/darknet/src/classifier.h @@ -0,0 +1 @@ + diff --git a/workloads/realworld/standard/darknet/src/col2im.c b/workloads/realworld/standard/darknet/src/col2im.c new file mode 100644 index 0000000000000000000000000000000000000000..5c4605e197439f79fe05c41337a5f2b8103f63ba --- /dev/null +++ b/workloads/realworld/standard/darknet/src/col2im.c @@ -0,0 +1,39 @@ +#include +#include +void col2im_add_pixel(float *im, int height, int width, int channels, + int row, int col, int channel, int pad, float val) +{ + row -= pad; + col -= pad; + + if (row < 0 || col < 0 || + row >= height || col >= width) return; + im[col + width*(row + height*channel)] += val; +} +//This one might be too, can't remember. +void col2im_cpu(float* data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_im) +{ + int c,h,w; + int height_col = (height + 2*pad - ksize) / stride + 1; + int width_col = (width + 2*pad - ksize) / stride + 1; + + int channels_col = channels * ksize * ksize; + for (c = 0; c < channels_col; ++c) { + int w_offset = c % ksize; + int h_offset = (c / ksize) % ksize; + int c_im = c / ksize / ksize; + for (h = 0; h < height_col; ++h) { + for (w = 0; w < width_col; ++w) { + int im_row = h_offset + h * stride; + int im_col = w_offset + w * stride; + int col_index = (c * height_col + h) * width_col + w; + double val = data_col[col_index]; + col2im_add_pixel(data_im, height, width, channels, + im_row, im_col, c_im, pad, val); + } + } + } +} + diff --git a/workloads/realworld/standard/darknet/src/col2im.h b/workloads/realworld/standard/darknet/src/col2im.h new file mode 100644 index 0000000000000000000000000000000000000000..3fbe05307db65a1f511f801670a23734e21b7dff --- /dev/null +++ b/workloads/realworld/standard/darknet/src/col2im.h @@ -0,0 +1,13 @@ +#ifndef COL2IM_H +#define COL2IM_H + +void col2im_cpu(float* data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_im); + +#ifdef GPU +void col2im_gpu(float *data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_im); +#endif +#endif diff --git a/workloads/realworld/standard/darknet/src/col2im_kernels.cu b/workloads/realworld/standard/darknet/src/col2im_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..30ea71e2c6ac0bb81235729c37568abbaa987d3d --- /dev/null +++ b/workloads/realworld/standard/darknet/src/col2im_kernels.cu @@ -0,0 +1,58 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "col2im.h" +#include "cuda_dark.h" +} + +// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu +// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE + +__global__ void col2im_gpu_kernel(const int n, const float* data_col, + const int height, const int width, const int ksize, + const int pad, + const int stride, + const int height_col, const int width_col, + float *data_im) { + int index = blockIdx.x*blockDim.x+threadIdx.x; + for(; index < n; index += blockDim.x*gridDim.x){ + float val = 0; + int w = index % width + pad; + int h = (index / width) % height + pad; + int c = index / (width * height); + // compute the start and end of the output + int w_col_start = (w < ksize) ? 0 : (w - ksize) / stride + 1; + int w_col_end = min(w / stride + 1, width_col); + int h_col_start = (h < ksize) ? 0 : (h - ksize) / stride + 1; + int h_col_end = min(h / stride + 1, height_col); + // equivalent implementation + int offset = + (c * ksize * ksize + h * ksize + w) * height_col * width_col; + int coeff_h_col = (1 - stride * ksize * height_col) * width_col; + int coeff_w_col = (1 - stride * height_col * width_col); + for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { + for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { + val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col]; + } + } + data_im[index] += val; + } +} + +void col2im_gpu(float *data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_im){ + // We are going to launch channels * height_col * width_col kernels, each + // kernel responsible for copying a single-channel grid. + int height_col = (height + 2 * pad - ksize) / stride + 1; + int width_col = (width + 2 * pad - ksize) / stride + 1; + int num_kernels = channels * height * width; + col2im_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, + BLOCK>>>( + num_kernels, data_col, height, width, ksize, pad, + stride, height_col, + width_col, data_im); +} + diff --git a/workloads/realworld/standard/darknet/src/compare.c b/workloads/realworld/standard/darknet/src/compare.c new file mode 100644 index 0000000000000000000000000000000000000000..d2d2b3bdc675cf808f483d1607550e072e245396 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/compare.c @@ -0,0 +1,352 @@ +#include + +#include "network.h" +#include "detection_layer.h" +#include "cost_layer.h" +#include "utils.h" +#include "parser.h" +#include "box.h" + +void train_compare(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + list *plist = get_paths("data/compare.train.list"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + printf("%d\n", N); + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.classes = 20; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = COMPARE_DATA; + + load_thread = load_data_in_thread(args); + int epoch = *net.seen/N; + int i = 0; + while(1){ + ++i; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%.3f: %f, %f avg, %lf seconds, %ld images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen); + free_data(train); + if(i%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_%d_minor_%d.weights",backup_directory,base, epoch, i); + save_weights(net, buff); + } + if(*net.seen/N > epoch){ + epoch = *net.seen/N; + i = 0; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + if(epoch%22 == 0) net.learning_rate *= .1; + } + } + pthread_join(load_thread, 0); + free_data(buffer); + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_compare(char *filename, char *weightfile) +{ + int i = 0; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + + list *plist = get_paths("data/compare.val.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size/2; + free_list(plist); + + clock_t time; + int correct = 0; + int total = 0; + int splits = 10; + int num = (i+1)*N/splits - i*N/splits; + + data val, buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.classes = 20; + args.n = num; + args.m = 0; + args.d = &buffer; + args.type = COMPARE_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(i = 1; i <= splits; ++i){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + num = (i+1)*N/splits - i*N/splits; + char **part = paths+(i*N/splits); + if(i != splits){ + args.paths = part; + load_thread = load_data_in_thread(args); + } + printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + int j,k; + for(j = 0; j < val.y.rows; ++j){ + for(k = 0; k < 20; ++k){ + if(val.y.vals[j][k*2] != val.y.vals[j][k*2+1]){ + ++total; + if((val.y.vals[j][k*2] < val.y.vals[j][k*2+1]) == (pred.vals[j][k*2] < pred.vals[j][k*2+1])){ + ++correct; + } + } + } + } + free_matrix(pred); + printf("%d: Acc: %f, %lf seconds, %d images\n", i, (float)correct/total, sec(clock()-time), val.X.rows); + free_data(val); + } +} + +typedef struct { + network net; + char *filename; + int class; + int classes; + float elo; + float *elos; +} sortable_bbox; + +int total_compares = 0; +int current_class = 0; + +int elo_comparator(const void*a, const void *b) +{ + sortable_bbox box1 = *(sortable_bbox*)a; + sortable_bbox box2 = *(sortable_bbox*)b; + if(box1.elos[current_class] == box2.elos[current_class]) return 0; + if(box1.elos[current_class] > box2.elos[current_class]) return -1; + return 1; +} + +int bbox_comparator(const void *a, const void *b) +{ + ++total_compares; + sortable_bbox box1 = *(sortable_bbox*)a; + sortable_bbox box2 = *(sortable_bbox*)b; + network net = box1.net; + int class = box1.class; + + image im1 = load_image_color(box1.filename, net.w, net.h); + image im2 = load_image_color(box2.filename, net.w, net.h); + float *X = calloc(net.w*net.h*net.c, sizeof(float)); + memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); + memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); + float *predictions = network_predict(net, X); + + free_image(im1); + free_image(im2); + free(X); + if (predictions[class*2] > predictions[class*2+1]){ + return 1; + } + return -1; +} + +void bbox_update(sortable_bbox *a, sortable_bbox *b, int class, int result) +{ + int k = 32; + float EA = 1./(1+pow(10, (b->elos[class] - a->elos[class])/400.)); + float EB = 1./(1+pow(10, (a->elos[class] - b->elos[class])/400.)); + float SA = result ? 1 : 0; + float SB = result ? 0 : 1; + a->elos[class] += k*(SA - EA); + b->elos[class] += k*(SB - EB); +} + +void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, int class) +{ + image im1 = load_image_color(a->filename, net.w, net.h); + image im2 = load_image_color(b->filename, net.w, net.h); + float *X = calloc(net.w*net.h*net.c, sizeof(float)); + memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); + memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); + float *predictions = network_predict(net, X); + ++total_compares; + + int i; + for(i = 0; i < classes; ++i){ + if(class < 0 || class == i){ + int result = predictions[i*2] > predictions[i*2+1]; + bbox_update(a, b, i, result); + } + } + + free_image(im1); + free_image(im2); + free(X); +} + +void SortMaster3000(char *filename, char *weightfile) +{ + int i = 0; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + set_batch_network(&net, 1); + + list *plist = get_paths("data/compare.sort.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + free_list(plist); + sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); + printf("Sorting %d boxes...\n", N); + for(i = 0; i < N; ++i){ + boxes[i].filename = paths[i]; + boxes[i].net = net; + boxes[i].class = 7; + boxes[i].elo = 1500; + } + clock_t time=clock(); + qsort(boxes, N, sizeof(sortable_bbox), bbox_comparator); + for(i = 0; i < N; ++i){ + printf("%s\n", boxes[i].filename); + } + printf("Sorted in %d compares, %f secs\n", total_compares, sec(clock()-time)); +} + +void BattleRoyaleWithCheese(char *filename, char *weightfile) +{ + int classes = 20; + int i,j; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + set_batch_network(&net, 1); + + list *plist = get_paths("data/compare.sort.list"); + //list *plist = get_paths("data/compare.small.list"); + //list *plist = get_paths("data/compare.cat.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + int total = N; + free_list(plist); + sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); + printf("Battling %d boxes...\n", N); + for(i = 0; i < N; ++i){ + boxes[i].filename = paths[i]; + boxes[i].net = net; + boxes[i].classes = classes; + boxes[i].elos = calloc(classes, sizeof(float));; + for(j = 0; j < classes; ++j){ + boxes[i].elos[j] = 1500; + } + } + int round; + clock_t time=clock(); + for(round = 1; round <= 4; ++round){ + clock_t round_time=clock(); + printf("Round: %d\n", round); + shuffle(boxes, N, sizeof(sortable_bbox)); + for(i = 0; i < N/2; ++i){ + bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, -1); + } + printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N); + } + + int class; + + for (class = 0; class < classes; ++class){ + + N = total; + current_class = class; + qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); + N /= 2; + + for(round = 1; round <= 100; ++round){ + clock_t round_time=clock(); + printf("Round: %d\n", round); + + sorta_shuffle(boxes, N, sizeof(sortable_bbox), 10); + for(i = 0; i < N/2; ++i){ + bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, class); + } + qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); + if(round <= 20) N = (N*9/10)/2*2; + + printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N); + } + char buff[256]; + sprintf(buff, "results/battle_%d.log", class); + FILE *outfp = fopen(buff, "w"); + for(i = 0; i < N; ++i){ + fprintf(outfp, "%s %f\n", boxes[i].filename, boxes[i].elos[class]); + } + fclose(outfp); + } + printf("Tournament in %d compares, %f secs\n", total_compares, sec(clock()-time)); +} + +void run_compare(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + //char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_compare(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_compare(cfg, weights); + else if(0==strcmp(argv[2], "sort")) SortMaster3000(cfg, weights); + else if(0==strcmp(argv[2], "battle")) BattleRoyaleWithCheese(cfg, weights); + /* + else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); + else if(0==strcmp(argv[2], "extract")) extract_boxes(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_recall(cfg, weights); + */ +} diff --git a/workloads/realworld/standard/darknet/src/connected_layer.c b/workloads/realworld/standard/darknet/src/connected_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..ec25b44d998661c4735cd9a8a86f2355a0ae0080 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/connected_layer.c @@ -0,0 +1,336 @@ +#include "connected_layer.h" +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam) +{ + int i; + layer l = {0}; + l.learning_rate_scale = 1; + l.type = CONNECTED; + + l.inputs = inputs; + l.outputs = outputs; + l.batch=batch; + l.batch_normalize = batch_normalize; + l.h = 1; + l.w = 1; + l.c = inputs; + l.out_h = 1; + l.out_w = 1; + l.out_c = outputs; + + l.output = calloc(batch*outputs, sizeof(float)); + l.delta = calloc(batch*outputs, sizeof(float)); + + l.weight_updates = calloc(inputs*outputs, sizeof(float)); + l.bias_updates = calloc(outputs, sizeof(float)); + + l.weights = calloc(outputs*inputs, sizeof(float)); + l.biases = calloc(outputs, sizeof(float)); + + l.forward = forward_connected_layer; + l.backward = backward_connected_layer; + l.update = update_connected_layer; + + //float scale = 1./sqrt(inputs); + float scale = sqrt(2./inputs); + for(i = 0; i < outputs*inputs; ++i){ + l.weights[i] = scale*rand_uniform(-1, 1); + } + + for(i = 0; i < outputs; ++i){ + l.biases[i] = 0; + } + + if(adam){ + l.m = calloc(l.inputs*l.outputs, sizeof(float)); + l.v = calloc(l.inputs*l.outputs, sizeof(float)); + l.bias_m = calloc(l.outputs, sizeof(float)); + l.scale_m = calloc(l.outputs, sizeof(float)); + l.bias_v = calloc(l.outputs, sizeof(float)); + l.scale_v = calloc(l.outputs, sizeof(float)); + } + if(batch_normalize){ + l.scales = calloc(outputs, sizeof(float)); + l.scale_updates = calloc(outputs, sizeof(float)); + for(i = 0; i < outputs; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(outputs, sizeof(float)); + l.mean_delta = calloc(outputs, sizeof(float)); + l.variance = calloc(outputs, sizeof(float)); + l.variance_delta = calloc(outputs, sizeof(float)); + + l.rolling_mean = calloc(outputs, sizeof(float)); + l.rolling_variance = calloc(outputs, sizeof(float)); + + l.x = calloc(batch*outputs, sizeof(float)); + l.x_norm = calloc(batch*outputs, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_connected_layer_gpu; + l.backward_gpu = backward_connected_layer_gpu; + l.update_gpu = update_connected_layer_gpu; + + l.weights_gpu = cuda_make_array(l.weights, outputs*inputs); + l.biases_gpu = cuda_make_array(l.biases, outputs); + + l.weight_updates_gpu = cuda_make_array(l.weight_updates, outputs*inputs); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, outputs); + + l.output_gpu = cuda_make_array(l.output, outputs*batch); + l.delta_gpu = cuda_make_array(l.delta, outputs*batch); + if (adam) { + l.m_gpu = cuda_make_array(0, inputs*outputs); + l.v_gpu = cuda_make_array(0, inputs*outputs); + l.bias_m_gpu = cuda_make_array(0, outputs); + l.bias_v_gpu = cuda_make_array(0, outputs); + l.scale_m_gpu = cuda_make_array(0, outputs); + l.scale_v_gpu = cuda_make_array(0, outputs); + } + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(l.mean, outputs); + l.variance_gpu = cuda_make_array(l.variance, outputs); + + l.rolling_mean_gpu = cuda_make_array(l.mean, outputs); + l.rolling_variance_gpu = cuda_make_array(l.variance, outputs); + + l.mean_delta_gpu = cuda_make_array(l.mean, outputs); + l.variance_delta_gpu = cuda_make_array(l.variance, outputs); + + l.scales_gpu = cuda_make_array(l.scales, outputs); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, outputs); + + l.x_gpu = cuda_make_array(l.output, l.batch*outputs); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*outputs); +#ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); +#endif + } +#endif + l.activation = activation; + fprintf(stderr, "connected %4d -> %4d\n", inputs, outputs); + return l; +} + +void update_connected_layer(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.outputs, momentum, l.bias_updates, 1); + + if(l.batch_normalize){ + axpy_cpu(l.outputs, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.outputs, momentum, l.scale_updates, 1); + } + + axpy_cpu(l.inputs*l.outputs, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(l.inputs*l.outputs, momentum, l.weight_updates, 1); +} + +void forward_connected_layer(layer l, network net) +{ + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + int m = l.batch; + int k = l.inputs; + int n = l.outputs; + float *a = net.input; + float *b = l.weights; + float *c = l.output; + gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); + if(l.batch_normalize){ + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.outputs, 1); + } + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_connected_layer(layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.outputs, 1); + } + + int m = l.outputs; + int k = l.batch; + int n = l.inputs; + float *a = l.delta; + float *b = net.input; + float *c = l.weight_updates; + gemm(1,0,m,n,k,1,a,m,b,n,1,c,n); + + m = l.batch; + k = l.outputs; + n = l.inputs; + + a = l.delta; + b = l.weights; + c = net.delta; + + if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); +} + + +void denormalize_connected_layer(layer l) +{ + int i, j; + for(i = 0; i < l.outputs; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .000001); + for(j = 0; j < l.inputs; ++j){ + l.weights[i*l.inputs + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + + +void statistics_connected_layer(layer l) +{ + if(l.batch_normalize){ + printf("Scales "); + print_statistics(l.scales, l.outputs); + /* + printf("Rolling Mean "); + print_statistics(l.rolling_mean, l.outputs); + printf("Rolling Variance "); + print_statistics(l.rolling_variance, l.outputs); + */ + } + printf("Biases "); + print_statistics(l.biases, l.outputs); + printf("Weights "); + print_statistics(l.weights, l.outputs); +} + +#ifdef GPU + +void pull_connected_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.inputs*l.outputs); + cuda_pull_array(l.biases_gpu, l.biases, l.outputs); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.outputs); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs); + } +} + +void push_connected_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.inputs*l.outputs); + cuda_push_array(l.biases_gpu, l.biases, l.outputs); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.outputs); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs); + } +} + +void update_connected_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.inputs*l.outputs, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.outputs, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.outputs, batch, a.t); + } + }else{ + axpy_gpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.outputs, momentum, l.bias_updates_gpu, 1); + + if(l.batch_normalize){ + axpy_gpu(l.outputs, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.outputs, momentum, l.scale_updates_gpu, 1); + } + + axpy_gpu(l.inputs*l.outputs, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.inputs*l.outputs, momentum, l.weight_updates_gpu, 1); + } +} + +void forward_connected_layer_gpu(layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + + int m = l.batch; + int k = l.inputs; + int n = l.outputs; + float * a = net.input_gpu; + float * b = l.weights_gpu; + float * c = l.output_gpu; + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.outputs, 1); + } + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_connected_layer_gpu(layer l, network net) +{ + constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.outputs, 1); + } + + int m = l.outputs; + int k = l.batch; + int n = l.inputs; + float * a = l.delta_gpu; + float * b = net.input_gpu; + float * c = l.weight_updates_gpu; + gemm_gpu(1,0,m,n,k,1,a,m,b,n,1,c,n); + + m = l.batch; + k = l.outputs; + n = l.inputs; + + a = l.delta_gpu; + b = l.weights_gpu; + c = net.delta_gpu; + + if(c) gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); +} +#endif diff --git a/workloads/realworld/standard/darknet/src/connected_layer.h b/workloads/realworld/standard/darknet/src/connected_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..6727a964eaa923906b202ff337aa69ad91817117 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/connected_layer.h @@ -0,0 +1,23 @@ +#ifndef CONNECTED_LAYER_H +#define CONNECTED_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam); + +void forward_connected_layer(layer l, network net); +void backward_connected_layer(layer l, network net); +void update_connected_layer(layer l, update_args a); + +#ifdef GPU +void forward_connected_layer_gpu(layer l, network net); +void backward_connected_layer_gpu(layer l, network net); +void update_connected_layer_gpu(layer l, update_args a); +void push_connected_layer(layer l); +void pull_connected_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/standard/darknet/src/convolutional_kernels.cu b/workloads/realworld/standard/darknet/src/convolutional_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..ed9d76e49548e4636c545d4e5d33ddc9b63e5905 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/convolutional_kernels.cu @@ -0,0 +1,330 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "gemm.h" +#include "blas.h" +#include "im2col.h" +#include "col2im.h" +#include "utils.h" +#include "cuda_dark.h" +} + +__global__ void binarize_kernel(float *x, int n, float *binary) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= n) return; + binary[i] = (x[i] >= 0) ? 1 : -1; +} + +void binarize_gpu(float *x, int n, float *binary) +{ + binarize_kernel<<>>(x, n, binary); + check_error(cudaPeekAtLastError()); +} + +__global__ void binarize_input_kernel(float *input, int n, int size, float *binary) +{ + int s = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (s >= size) return; + int i = 0; + float mean = 0; + for(i = 0; i < n; ++i){ + mean += fabsf(input[i*size + s]); + } + mean = mean / n; + for(i = 0; i < n; ++i){ + binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean; + } +} + +void binarize_input_gpu(float *input, int n, int size, float *binary) +{ + binarize_input_kernel<<>>(input, n, size, binary); + check_error(cudaPeekAtLastError()); +} + + +__global__ void binarize_weights_kernel(float *weights, int n, int size, float *binary) +{ + int f = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (f >= n) return; + int i = 0; + float mean = 0; + for(i = 0; i < size; ++i){ + mean += fabsf(weights[f*size + i]); + } + mean = mean / size; + for(i = 0; i < size; ++i){ + binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean; + //binary[f*size + i] = weights[f*size + i]; + } +} + +void binarize_weights_gpu(float *weights, int n, int size, float *binary) +{ + binarize_weights_kernel<<>>(weights, n, size, binary); + check_error(cudaPeekAtLastError()); +} + +void forward_convolutional_layer_gpu(convolutional_layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + if(l.binary){ + binarize_weights_gpu(l.weights_gpu, l.n, l.c/l.groups*l.size*l.size, l.binary_weights_gpu); + swap_binary(&l); + } + + if(l.xnor){ + binarize_weights_gpu(l.weights_gpu, l.n, l.c/l.groups*l.size*l.size, l.binary_weights_gpu); + swap_binary(&l); + binarize_gpu(net.input_gpu, l.c*l.h*l.w*l.batch, l.binary_input_gpu); + net.input_gpu = l.binary_input_gpu; + } + +#ifdef CUDNN + float one = 1; + cudnnConvolutionForward(cudnn_handle(), + &one, + l.srcTensorDesc, + net.input_gpu, + l.weightDesc, + l.weights_gpu, + l.convDesc, + l.fw_algo, + net.workspace, + l.workspace_size, + &one, + l.dstTensorDesc, + l.output_gpu); + +#else + int i, j; + int m = l.n/l.groups; + int k = l.size*l.size*l.c/l.groups; + int n = l.out_w*l.out_h; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.weights_gpu + j*l.nweights/l.groups; + float *b = net.workspace; + float *c = l.output_gpu + (i*l.groups + j)*n*m; + float *im = net.input_gpu + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if (l.size == 1){ + b = im; + } else { + im2col_gpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + } + gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +#endif + + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); + } + + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); + //if(l.dot > 0) dot_error_gpu(l); + if(l.binary || l.xnor) swap_binary(&l); +} + +__global__ void smooth_kernel(float *x, int n, int w, int h, int c, int size, float rate, float *delta) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int j = id % w; + id /= w; + int i = id % h; + id /= h; + int k = id % c; + id /= c; + int b = id; + + int w_offset = -(size/2.f); + int h_offset = -(size/2.f); + + int out_index = j + w*(i + h*(k + c*b)); + int l, m; + for(l = 0; l < size; ++l){ + for(m = 0; m < size; ++m){ + int cur_h = h_offset + i + l; + int cur_w = w_offset + j + m; + int index = cur_w + w*(cur_h + h*(k + b*c)); + int valid = (cur_h >= 0 && cur_h < h && + cur_w >= 0 && cur_w < w); + delta[out_index] += valid ? rate*(x[index] - x[out_index]) : 0; + } + } +} + +extern "C" void smooth_layer(layer l, int size, float rate) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.out_c; + + size_t n = h*w*c*l.batch; + + smooth_kernel<<>>(l.output_gpu, n, l.w, l.h, l.c, size, rate, l.delta_gpu); + check_error(cudaPeekAtLastError()); +} + +void backward_convolutional_layer_gpu(convolutional_layer l, network net) +{ + if(l.smooth){ + smooth_layer(l, 5, l.smooth); + } + //constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + + + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); + } + float *original_input = net.input_gpu; + + if(l.xnor) net.input_gpu = l.binary_input_gpu; +#ifdef CUDNN + float one = 1; + cudnnConvolutionBackwardFilter(cudnn_handle(), + &one, + l.srcTensorDesc, + net.input_gpu, + l.ddstTensorDesc, + l.delta_gpu, + l.convDesc, + l.bf_algo, + net.workspace, + l.workspace_size, + &one, + l.dweightDesc, + l.weight_updates_gpu); + + if(net.delta_gpu){ + if(l.binary || l.xnor) swap_binary(&l); + cudnnConvolutionBackwardData(cudnn_handle(), + &one, + l.weightDesc, + l.weights_gpu, + l.ddstTensorDesc, + l.delta_gpu, + l.convDesc, + l.bd_algo, + net.workspace, + l.workspace_size, + &one, + l.dsrcTensorDesc, + net.delta_gpu); + if(l.binary || l.xnor) swap_binary(&l); + if(l.xnor) gradient_array_gpu(original_input, l.batch*l.c*l.h*l.w, HARDTAN, net.delta_gpu); + } + +#else + int m = l.n/l.groups; + int n = l.size*l.size*l.c/l.groups; + int k = l.out_w*l.out_h; + + int i, j; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.delta_gpu + (i*l.groups + j)*m*k; + float *b = net.workspace; + float *c = l.weight_updates_gpu + j*l.nweights/l.groups; + + float *im = net.input_gpu+(i*l.groups + j)*l.c/l.groups*l.h*l.w; + float *imd = net.delta_gpu+(i*l.groups + j)*l.c/l.groups*l.h*l.w; + + im2col_gpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (net.delta_gpu) { + if (l.binary || l.xnor) swap_binary(&l); + a = l.weights_gpu + j*l.nweights/l.groups; + b = l.delta_gpu + (i*l.groups + j)*m*k; + c = net.workspace; + if (l.size == 1) { + c = imd; + } + + gemm_gpu(1,0,n,k,m,1,a,n,b,k,0,c,k); + + if (l.size != 1) { + col2im_gpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd); + } + if(l.binary || l.xnor) { + swap_binary(&l); + } + } + if(l.xnor) gradient_array_gpu(original_input + i*l.c*l.h*l.w, l.c*l.h*l.w, HARDTAN, net.delta_gpu + i*l.c*l.h*l.w); + } + } +#endif +} + +void pull_convolutional_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.nweights); + cuda_pull_array(l.biases_gpu, l.biases, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.n); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void push_convolutional_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.nweights); + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.n); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void update_convolutional_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + } + }else{ + axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1); + + axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.n, momentum, l.bias_updates_gpu, 1); + + if(l.scales_gpu){ + axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.n, momentum, l.scale_updates_gpu, 1); + } + } + if(l.clip){ + constrain_gpu(l.nweights, l.clip, l.weights_gpu, 1); + } +} + + diff --git a/workloads/realworld/standard/darknet/src/convolutional_layer.c b/workloads/realworld/standard/darknet/src/convolutional_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..1fb58b0933b06f2b27ec89f9f7c05f0b2b8a87eb --- /dev/null +++ b/workloads/realworld/standard/darknet/src/convolutional_layer.c @@ -0,0 +1,622 @@ +#include "convolutional_layer.h" +#include "utils.h" +#include "batchnorm_layer.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" +#include +#include + +#ifdef AI2 +#include "xnor_layer.h" +#endif + +void swap_binary(convolutional_layer *l) +{ + float *swap = l->weights; + l->weights = l->binary_weights; + l->binary_weights = swap; + +#ifdef GPU + swap = l->weights_gpu; + l->weights_gpu = l->binary_weights_gpu; + l->binary_weights_gpu = swap; +#endif +} + +void binarize_weights(float *weights, int n, int size, float *binary) +{ + int i, f; + for(f = 0; f < n; ++f){ + float mean = 0; + for(i = 0; i < size; ++i){ + mean += fabs(weights[f*size + i]); + } + mean = mean / size; + for(i = 0; i < size; ++i){ + binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean; + } + } +} + +void binarize_cpu(float *input, int n, float *binary) +{ + int i; + for(i = 0; i < n; ++i){ + binary[i] = (input[i] > 0) ? 1 : -1; + } +} + +void binarize_input(float *input, int n, int size, float *binary) +{ + int i, s; + for(s = 0; s < size; ++s){ + float mean = 0; + for(i = 0; i < n; ++i){ + mean += fabs(input[i*size + s]); + } + mean = mean / n; + for(i = 0; i < n; ++i){ + binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean; + } + } +} + +int convolutional_out_height(convolutional_layer l) +{ + return (l.h + 2*l.pad - l.size) / l.stride + 1; +} + +int convolutional_out_width(convolutional_layer l) +{ + return (l.w + 2*l.pad - l.size) / l.stride + 1; +} + +image get_convolutional_image(convolutional_layer l) +{ + return float_to_image(l.out_w,l.out_h,l.out_c,l.output); +} + +image get_convolutional_delta(convolutional_layer l) +{ + return float_to_image(l.out_w,l.out_h,l.out_c,l.delta); +} + +static size_t get_workspace_size(layer l){ +#ifdef CUDNN + if(gpu_index >= 0){ + size_t most = 0; + size_t s = 0; + cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle(), + l.srcTensorDesc, + l.weightDesc, + l.convDesc, + l.dstTensorDesc, + l.fw_algo, + &s); + if (s > most) most = s; + cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnn_handle(), + l.srcTensorDesc, + l.ddstTensorDesc, + l.convDesc, + l.dweightDesc, + l.bf_algo, + &s); + if (s > most) most = s; + cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(), + l.weightDesc, + l.ddstTensorDesc, + l.convDesc, + l.dsrcTensorDesc, + l.bd_algo, + &s); + if (s > most) most = s; + return most; + } +#endif + return (size_t)l.out_h*l.out_w*l.size*l.size*l.c/l.groups*sizeof(float); +} + +#ifdef GPU +#ifdef CUDNN +void cudnn_convolutional_setup(layer *l) +{ + cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); + cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + + cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + + cudnnSetFilter4dDescriptor(l->dweightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); + cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); + #if CUDNN_MAJOR >= 6 + cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT); + #else + cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION); + #endif + + #if CUDNN_MAJOR >= 7 + cudnnSetConvolutionGroupCount(l->convDesc, l->groups); + #else + if(l->groups > 1){ + error("CUDNN < 7 doesn't support groups, please upgrade!"); + } + #endif + + cudnnGetConvolutionForwardAlgorithm(cudnn_handle(), + l->srcTensorDesc, + l->weightDesc, + l->convDesc, + l->dstTensorDesc, + CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->fw_algo); + cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(), + l->weightDesc, + l->ddstTensorDesc, + l->convDesc, + l->dsrcTensorDesc, + CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->bd_algo); + cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(), + l->srcTensorDesc, + l->ddstTensorDesc, + l->convDesc, + l->dweightDesc, + CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->bf_algo); +} +#endif +#endif + +convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam) +{ + int i; + convolutional_layer l = {0}; + l.type = CONVOLUTIONAL; + + l.groups = groups; + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.binary = binary; + l.xnor = xnor; + l.batch = batch; + l.stride = stride; + l.size = size; + l.pad = padding; + l.batch_normalize = batch_normalize; + + l.weights = calloc(c/groups*n*size*size, sizeof(float)); + l.weight_updates = calloc(c/groups*n*size*size, sizeof(float)); + + l.biases = calloc(n, sizeof(float)); + l.bias_updates = calloc(n, sizeof(float)); + + l.nweights = c/groups*n*size*size; + l.nbiases = n; + + // float scale = 1./sqrt(size*size*c); + float scale = sqrt(2./(size*size*c/l.groups)); + //printf("convscale %f\n", scale); + //scale = .02; + //for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1); + for(i = 0; i < l.nweights; ++i) l.weights[i] = scale*rand_normal(); + int out_w = convolutional_out_width(l); + int out_h = convolutional_out_height(l); + l.out_h = out_h; + l.out_w = out_w; + l.out_c = n; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = l.w * l.h * l.c; + + l.output = calloc(l.batch*l.outputs, sizeof(float)); + l.delta = calloc(l.batch*l.outputs, sizeof(float)); + + l.forward = forward_convolutional_layer; + l.backward = backward_convolutional_layer; + l.update = update_convolutional_layer; + if(binary){ + l.binary_weights = calloc(l.nweights, sizeof(float)); + l.cweights = calloc(l.nweights, sizeof(char)); + l.scales = calloc(n, sizeof(float)); + } + if(xnor){ + l.binary_weights = calloc(l.nweights, sizeof(float)); + l.binary_input = calloc(l.inputs*l.batch, sizeof(float)); + } + + if(batch_normalize){ + l.scales = calloc(n, sizeof(float)); + l.scale_updates = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(n, sizeof(float)); + l.variance = calloc(n, sizeof(float)); + + l.mean_delta = calloc(n, sizeof(float)); + l.variance_delta = calloc(n, sizeof(float)); + + l.rolling_mean = calloc(n, sizeof(float)); + l.rolling_variance = calloc(n, sizeof(float)); + l.x = calloc(l.batch*l.outputs, sizeof(float)); + l.x_norm = calloc(l.batch*l.outputs, sizeof(float)); + } + if(adam){ + l.m = calloc(l.nweights, sizeof(float)); + l.v = calloc(l.nweights, sizeof(float)); + l.bias_m = calloc(n, sizeof(float)); + l.scale_m = calloc(n, sizeof(float)); + l.bias_v = calloc(n, sizeof(float)); + l.scale_v = calloc(n, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_convolutional_layer_gpu; + l.backward_gpu = backward_convolutional_layer_gpu; + l.update_gpu = update_convolutional_layer_gpu; + + if(gpu_index >= 0){ + if (adam) { + l.m_gpu = cuda_make_array(l.m, l.nweights); + l.v_gpu = cuda_make_array(l.v, l.nweights); + l.bias_m_gpu = cuda_make_array(l.bias_m, n); + l.bias_v_gpu = cuda_make_array(l.bias_v, n); + l.scale_m_gpu = cuda_make_array(l.scale_m, n); + l.scale_v_gpu = cuda_make_array(l.scale_v, n); + } + + l.weights_gpu = cuda_make_array(l.weights, l.nweights); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights); + + l.biases_gpu = cuda_make_array(l.biases, n); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + + if(binary){ + l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights); + } + if(xnor){ + l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights); + l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch); + } + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(l.mean, n); + l.variance_gpu = cuda_make_array(l.variance, n); + + l.rolling_mean_gpu = cuda_make_array(l.mean, n); + l.rolling_variance_gpu = cuda_make_array(l.variance, n); + + l.mean_delta_gpu = cuda_make_array(l.mean, n); + l.variance_delta_gpu = cuda_make_array(l.variance, n); + + l.scales_gpu = cuda_make_array(l.scales, n); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); + + l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + } +#ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.srcTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnCreateFilterDescriptor(&l.weightDesc); + cudnnCreateTensorDescriptor(&l.dsrcTensorDesc); + cudnnCreateTensorDescriptor(&l.ddstTensorDesc); + cudnnCreateFilterDescriptor(&l.dweightDesc); + cudnnCreateConvolutionDescriptor(&l.convDesc); + cudnn_convolutional_setup(&l); +#endif + } +#endif + l.workspace_size = get_workspace_size(l); + l.activation = activation; + + fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BFLOPs\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, (2.0 * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w)/1000000000.); + + return l; +} + +void denormalize_convolutional_layer(convolutional_layer l) +{ + int i, j; + for(i = 0; i < l.n; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001); + for(j = 0; j < l.c/l.groups*l.size*l.size; ++j){ + l.weights[i*l.c/l.groups*l.size*l.size + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + +/* +void test_convolutional_layer() +{ + convolutional_layer l = make_convolutional_layer(1, 5, 5, 3, 2, 5, 2, 1, LEAKY, 1, 0, 0, 0); + l.batch_normalize = 1; + float data[] = {1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3}; + //net.input = data; + //forward_convolutional_layer(l); +} +*/ + +void resize_convolutional_layer(convolutional_layer *l, int w, int h) +{ + l->w = w; + l->h = h; + int out_w = convolutional_out_width(*l); + int out_h = convolutional_out_height(*l); + + l->out_w = out_w; + l->out_h = out_h; + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->w * l->h * l->c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + if(l->batch_normalize){ + l->x = realloc(l->x, l->batch*l->outputs*sizeof(float)); + l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float)); + } + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); + + if(l->batch_normalize){ + cuda_free(l->x_gpu); + cuda_free(l->x_norm_gpu); + + l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); + } +#ifdef CUDNN + cudnn_convolutional_setup(l); +#endif +#endif + l->workspace_size = get_workspace_size(*l); +} + +void add_bias(float *output, float *biases, int batch, int n, int size) +{ + int i,j,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + for(j = 0; j < size; ++j){ + output[(b*n + i)*size + j] += biases[i]; + } + } + } +} + +void scale_bias(float *output, float *scales, int batch, int n, int size) +{ + int i,j,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + for(j = 0; j < size; ++j){ + output[(b*n + i)*size + j] *= scales[i]; + } + } + } +} + +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size) +{ + int i,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + bias_updates[i] += sum_array(delta+size*(i+b*n), size); + } + } +} + +void forward_convolutional_layer(convolutional_layer l, network net) +{ + int i, j; + + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + + if(l.xnor){ + binarize_weights(l.weights, l.n, l.c/l.groups*l.size*l.size, l.binary_weights); + swap_binary(&l); + binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input); + net.input = l.binary_input; + } + + int m = l.n/l.groups; + int k = l.size*l.size*l.c/l.groups; + int n = l.out_w*l.out_h; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.weights + j*l.nweights/l.groups; + float *b = net.workspace; + float *c = l.output + (i*l.groups + j)*n*m; + float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if (l.size == 1) { + b = im; + } else { + im2col_cpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + } + gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } + + if(l.batch_normalize){ + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w); + } + + activate_array(l.output, l.outputs*l.batch, l.activation); + if(l.binary || l.xnor) swap_binary(&l); +} + +void backward_convolutional_layer(convolutional_layer l, network net) +{ + int i, j; + int m = l.n/l.groups; + int n = l.size*l.size*l.c/l.groups; + int k = l.out_w*l.out_h; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.n, k); + } + + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.delta + (i*l.groups + j)*m*k; + float *b = net.workspace; + float *c = l.weight_updates + j*l.nweights/l.groups; + + float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + float *imd = net.delta + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if(l.size == 1){ + b = im; + } else { + im2col_cpu(im, l.c/l.groups, l.h, l.w, + l.size, l.stride, l.pad, b); + } + + gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (net.delta) { + a = l.weights + j*l.nweights/l.groups; + b = l.delta + (i*l.groups + j)*m*k; + c = net.workspace; + if (l.size == 1) { + c = imd; + } + + gemm(1,0,n,k,m,1,a,n,b,k,0,c,k); + + if (l.size != 1) { + col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd); + } + } + } + } +} + +void update_convolutional_layer(convolutional_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.n, momentum, l.bias_updates, 1); + + if(l.scales){ + axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.n, momentum, l.scale_updates, 1); + } + + axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(l.nweights, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(l.nweights, momentum, l.weight_updates, 1); +} + + +image get_convolutional_weight(convolutional_layer l, int i) +{ + int h = l.size; + int w = l.size; + int c = l.c/l.groups; + return float_to_image(w,h,c,l.weights+i*h*w*c); +} + +void rgbgr_weights(convolutional_layer l) +{ + int i; + for(i = 0; i < l.n; ++i){ + image im = get_convolutional_weight(l, i); + if (im.c == 3) { + rgbgr_image(im); + } + } +} + +void rescale_weights(convolutional_layer l, float scale, float trans) +{ + int i; + for(i = 0; i < l.n; ++i){ + image im = get_convolutional_weight(l, i); + if (im.c == 3) { + scale_image(im, scale); + float sum = sum_array(im.data, im.w*im.h*im.c); + l.biases[i] += sum*trans; + } + } +} + +image *get_weights(convolutional_layer l) +{ + image *weights = calloc(l.n, sizeof(image)); + int i; + for(i = 0; i < l.n; ++i){ + weights[i] = copy_image(get_convolutional_weight(l, i)); + normalize_image(weights[i]); + /* + char buff[256]; + sprintf(buff, "filter%d", i); + save_image(weights[i], buff); + */ + } + //error("hey"); + return weights; +} + +image *visualize_convolutional_layer(convolutional_layer l, char *window, image *prev_weights) +{ + image *single_weights = get_weights(l); + show_images(single_weights, l.n, window); + + image delta = get_convolutional_image(l); + image dc = collapse_image_layers(delta, 1); + char buff[256]; + sprintf(buff, "%s: Output", window); + //show_image(dc, buff); + //save_image(dc, buff); + free_image(dc); + return single_weights; +} + diff --git a/workloads/realworld/standard/darknet/src/convolutional_layer.h b/workloads/realworld/standard/darknet/src/convolutional_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..baacf38f4127a42abe009ef8aa3b59543433a286 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/convolutional_layer.h @@ -0,0 +1,50 @@ +#ifndef CONVOLUTIONAL_LAYER_H +#define CONVOLUTIONAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +typedef layer convolutional_layer; + +#ifdef GPU +void forward_convolutional_layer_gpu(convolutional_layer layer, network net); +void backward_convolutional_layer_gpu(convolutional_layer layer, network net); +void update_convolutional_layer_gpu(convolutional_layer layer, update_args a); + +void push_convolutional_layer(convolutional_layer layer); +void pull_convolutional_layer(convolutional_layer layer); + +void add_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); +void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t); +#ifdef CUDNN +void cudnn_convolutional_setup(layer *l); +#endif +#endif + +convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam); +void resize_convolutional_layer(convolutional_layer *layer, int w, int h); +void forward_convolutional_layer(const convolutional_layer layer, network net); +void update_convolutional_layer(convolutional_layer layer, update_args a); +image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_weights); +void binarize_weights(float *weights, int n, int size, float *binary); +void swap_binary(convolutional_layer *l); +void binarize_weights2(float *weights, int n, int size, char *binary, float *scales); + +void backward_convolutional_layer(convolutional_layer layer, network net); + +void add_bias(float *output, float *biases, int batch, int n, int size); +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); + +image get_convolutional_image(convolutional_layer layer); +image get_convolutional_delta(convolutional_layer layer); +image get_convolutional_weight(convolutional_layer layer, int i); + +int convolutional_out_height(convolutional_layer layer); +int convolutional_out_width(convolutional_layer layer); + +#endif + diff --git a/workloads/realworld/standard/darknet/src/cost_layer.c b/workloads/realworld/standard/darknet/src/cost_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..85fa85daf306dda03c113a6bbdc2d92b25d0b00d --- /dev/null +++ b/workloads/realworld/standard/darknet/src/cost_layer.c @@ -0,0 +1,176 @@ +#include "cost_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include +#include +#include +#include + +COST_TYPE get_cost_type(char *s) +{ + if (strcmp(s, "seg")==0) return SEG; + if (strcmp(s, "sse")==0) return SSE; + if (strcmp(s, "masked")==0) return MASKED; + if (strcmp(s, "smooth")==0) return SMOOTH; + if (strcmp(s, "L1")==0) return L1; + if (strcmp(s, "wgan")==0) return WGAN; + fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s); + return SSE; +} + +char *get_cost_string(COST_TYPE a) +{ + switch(a){ + case SEG: + return "seg"; + case SSE: + return "sse"; + case MASKED: + return "masked"; + case SMOOTH: + return "smooth"; + case L1: + return "L1"; + case WGAN: + return "wgan"; + } + return "sse"; +} + +cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale) +{ + fprintf(stderr, "cost %4d\n", inputs); + cost_layer l = {0}; + l.type = COST; + + l.scale = scale; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.cost_type = cost_type; + l.delta = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_cost_layer; + l.backward = backward_cost_layer; + #ifdef GPU + l.forward_gpu = forward_cost_layer_gpu; + l.backward_gpu = backward_cost_layer_gpu; + + l.delta_gpu = cuda_make_array(l.output, inputs*batch); + l.output_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void resize_cost_layer(cost_layer *l, int inputs) +{ + l->inputs = inputs; + l->outputs = inputs; + l->delta = realloc(l->delta, inputs*l->batch*sizeof(float)); + l->output = realloc(l->output, inputs*l->batch*sizeof(float)); +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch); + l->output_gpu = cuda_make_array(l->output, inputs*l->batch); +#endif +} + +void forward_cost_layer(cost_layer l, network net) +{ + if (!net.truth) return; + if(l.cost_type == MASKED){ + int i; + for(i = 0; i < l.batch*l.inputs; ++i){ + if(net.truth[i] == SECRET_NUM) net.input[i] = SECRET_NUM; + } + } + if(l.cost_type == SMOOTH){ + smooth_l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + }else if(l.cost_type == L1){ + l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + } else { + l2_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + } + l.cost[0] = sum_array(l.output, l.batch*l.inputs); +} + +void backward_cost_layer(const cost_layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_cost_layer(cost_layer l) +{ + cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +void push_cost_layer(cost_layer l) +{ + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +int float_abs_compare (const void * a, const void * b) +{ + float fa = *(const float*) a; + if(fa < 0) fa = -fa; + float fb = *(const float*) b; + if(fb < 0) fb = -fb; + return (fa > fb) - (fa < fb); +} + +void forward_cost_layer_gpu(cost_layer l, network net) +{ + if (!net.truth) return; + if(l.smooth){ + scal_gpu(l.batch*l.inputs, (1-l.smooth), net.truth_gpu, 1); + add_gpu(l.batch*l.inputs, l.smooth * 1./l.inputs, net.truth_gpu, 1); + } + + if(l.cost_type == SMOOTH){ + smooth_l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else if (l.cost_type == L1){ + l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else if (l.cost_type == WGAN){ + wgan_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else { + l2_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } + + if (l.cost_type == SEG && l.noobject_scale != 1) { + scale_mask_gpu(l.batch*l.inputs, l.delta_gpu, 0, net.truth_gpu, l.noobject_scale); + scale_mask_gpu(l.batch*l.inputs, l.output_gpu, 0, net.truth_gpu, l.noobject_scale); + } + if (l.cost_type == MASKED) { + mask_gpu(l.batch*l.inputs, net.delta_gpu, SECRET_NUM, net.truth_gpu, 0); + } + + if(l.ratio){ + cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); + qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare); + int n = (1-l.ratio) * l.batch*l.inputs; + float thresh = l.delta[n]; + thresh = 0; + printf("%f\n", thresh); + supp_gpu(l.batch*l.inputs, thresh, l.delta_gpu, 1); + } + + if(l.thresh){ + supp_gpu(l.batch*l.inputs, l.thresh*1./l.inputs, l.delta_gpu, 1); + } + + cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs); + l.cost[0] = sum_array(l.output, l.batch*l.inputs); +} + +void backward_cost_layer_gpu(const cost_layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/standard/darknet/src/cost_layer.h b/workloads/realworld/standard/darknet/src/cost_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..ceb64de00bf66839c2f34852a05ea71114608a35 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/cost_layer.h @@ -0,0 +1,20 @@ +#ifndef COST_LAYER_H +#define COST_LAYER_H +#include "layer.h" +#include "network.h" + +typedef layer cost_layer; + +COST_TYPE get_cost_type(char *s); +char *get_cost_string(COST_TYPE a); +cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale); +void forward_cost_layer(const cost_layer l, network net); +void backward_cost_layer(const cost_layer l, network net); +void resize_cost_layer(cost_layer *l, int inputs); + +#ifdef GPU +void forward_cost_layer_gpu(cost_layer l, network net); +void backward_cost_layer_gpu(const cost_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/standard/darknet/src/cpu_timestamps.c b/workloads/realworld/standard/darknet/src/cpu_timestamps.c new file mode 100644 index 0000000000000000000000000000000000000000..35114479c7a9cce3debe2204b6886ad5528041d5 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/cpu_timestamps.c @@ -0,0 +1,20 @@ +#include "cpu_timestamps.h" + +void startCPU() { + struct timespec tv; + if(clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + startCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); +} + + + +void endCPU() { + struct timespec tv; + if(clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + + endCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); + //endCPUTimestamp1 = std::chrono::system_clock::now(); + printf("CPU_Times,%lu,%lu,%lu\n", startCPUTime, endCPUTime, endCPUTime-startCPUTime); +} diff --git a/workloads/realworld/standard/darknet/src/cpu_timestamps.h b/workloads/realworld/standard/darknet/src/cpu_timestamps.h new file mode 100644 index 0000000000000000000000000000000000000000..e53e995a5603b4610759c02a4a179eb9f0124e48 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/cpu_timestamps.h @@ -0,0 +1,21 @@ +#ifndef CPU_TIMESTAMP_ +#define CPU_TIMESTAMP_ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +static uint64_t startCPUTime; +static uint64_t endCPUTime; + +void startCPU(); +void endCPU(); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/workloads/realworld/standard/darknet/src/crnn_layer.c b/workloads/realworld/standard/darknet/src/crnn_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..159e17f92d45693461c92d482bf3aa7354a148d8 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/crnn_layer.c @@ -0,0 +1,283 @@ +#include "crnn_layer.h" +#include "convolutional_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize) +{ + fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = CRNN; + l.steps = steps; + l.h = h; + l.w = w; + l.c = c; + l.out_h = h; + l.out_w = w; + l.out_c = output_filters; + l.inputs = h*w*c; + l.hidden = h * w * hidden_filters; + l.outputs = l.out_h * l.out_w * l.out_c; + + l.state = calloc(l.hidden*batch*(steps+1), sizeof(float)); + + l.input_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.input_layer) = make_convolutional_layer(batch*steps, h, w, c, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.input_layer->batch = batch; + + l.self_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.self_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.self_layer->batch = batch; + + l.output_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.output_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, output_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.output_layer->batch = batch; + + l.output = l.output_layer->output; + l.delta = l.output_layer->delta; + + l.forward = forward_crnn_layer; + l.backward = backward_crnn_layer; + l.update = update_crnn_layer; + +#ifdef GPU + l.forward_gpu = forward_crnn_layer_gpu; + l.backward_gpu = backward_crnn_layer_gpu; + l.update_gpu = update_crnn_layer_gpu; + + l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1)); + l.output_gpu = l.output_layer->output_gpu; + l.delta_gpu = l.output_layer->delta_gpu; +#endif + + return l; +} + +void update_crnn_layer(layer l, update_args a) +{ + update_convolutional_layer(*(l.input_layer), a); + update_convolutional_layer(*(l.self_layer), a); + update_convolutional_layer(*(l.output_layer), a); +} + +void forward_crnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1); + fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1); + fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1); + if(net.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1); + + for (i = 0; i < l.steps; ++i) { + s.input = net.input; + forward_convolutional_layer(input_layer, s); + + s.input = l.state; + forward_convolutional_layer(self_layer, s); + + float *old_state = l.state; + if(net.train) l.state += l.hidden*l.batch; + if(l.shortcut){ + copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1); + }else{ + fill_cpu(l.hidden * l.batch, 0, l.state, 1); + } + axpy_cpu(l.hidden * l.batch, 1, input_layer.output, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + forward_convolutional_layer(output_layer, s); + + net.input += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_crnn_layer(layer l, network net) +{ + network s = net; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + increment_layer(&input_layer, l.steps-1); + increment_layer(&self_layer, l.steps-1); + increment_layer(&output_layer, l.steps-1); + + l.state += l.hidden*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + s.delta = self_layer.delta; + backward_convolutional_layer(output_layer, s); + + l.state -= l.hidden*l.batch; + /* + if(i > 0){ + copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output - l.hidden*l.batch, 1, l.state, 1); + }else{ + fill_cpu(l.hidden * l.batch, 0, l.state, 1); + } + */ + + s.input = l.state; + s.delta = self_layer.delta - l.hidden*l.batch; + if (i == 0) s.delta = 0; + backward_convolutional_layer(self_layer, s); + + copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1); + if (i > 0 && l.shortcut) axpy_cpu(l.hidden*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden*l.batch, 1); + s.input = net.input + i*l.inputs*l.batch; + if(net.delta) s.delta = net.delta + i*l.inputs*l.batch; + else s.delta = 0; + backward_convolutional_layer(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} + +#ifdef GPU + +void pull_crnn_layer(layer l) +{ + pull_convolutional_layer(*(l.input_layer)); + pull_convolutional_layer(*(l.self_layer)); + pull_convolutional_layer(*(l.output_layer)); +} + +void push_crnn_layer(layer l) +{ + push_convolutional_layer(*(l.input_layer)); + push_convolutional_layer(*(l.self_layer)); + push_convolutional_layer(*(l.output_layer)); +} + +void update_crnn_layer_gpu(layer l, update_args a) +{ + update_convolutional_layer_gpu(*(l.input_layer), a); + update_convolutional_layer_gpu(*(l.self_layer), a); + update_convolutional_layer_gpu(*(l.output_layer), a); +} + +void forward_crnn_layer_gpu(layer l, network net) +{ + network s = net; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_gpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); + fill_gpu(l.hidden * l.batch * l.steps, 0, self_layer.delta_gpu, 1); + fill_gpu(l.hidden * l.batch * l.steps, 0, input_layer.delta_gpu, 1); + if(net.train) fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1); + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = net.input_gpu; + forward_convolutional_layer_gpu(input_layer, s); + + s.input_gpu = l.state_gpu; + forward_convolutional_layer_gpu(self_layer, s); + + float *old_state = l.state_gpu; + if(net.train) l.state_gpu += l.hidden*l.batch; + if(l.shortcut){ + copy_gpu(l.hidden * l.batch, old_state, 1, l.state_gpu, 1); + }else{ + fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1); + } + axpy_gpu(l.hidden * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + forward_convolutional_layer_gpu(output_layer, s); + + net.input_gpu += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_crnn_layer_gpu(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + increment_layer(&input_layer, l.steps - 1); + increment_layer(&self_layer, l.steps - 1); + increment_layer(&output_layer, l.steps - 1); + l.state_gpu += l.hidden*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_gpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu; + backward_convolutional_layer_gpu(output_layer, s); + + l.state_gpu -= l.hidden*l.batch; + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu - l.hidden*l.batch; + if (i == 0) s.delta_gpu = 0; + backward_convolutional_layer_gpu(self_layer, s); + + copy_gpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); + if (i > 0 && l.shortcut) axpy_gpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1); + s.input_gpu = net.input_gpu + i*l.inputs*l.batch; + if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch; + else s.delta_gpu = 0; + backward_convolutional_layer_gpu(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} +#endif diff --git a/workloads/realworld/standard/darknet/src/crnn_layer.h b/workloads/realworld/standard/darknet/src/crnn_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..515f378354e9cc6149e7a1ac60ffc86ace112991 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/crnn_layer.h @@ -0,0 +1,24 @@ + +#ifndef CRNN_LAYER_H +#define CRNN_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize); + +void forward_crnn_layer(layer l, network net); +void backward_crnn_layer(layer l, network net); +void update_crnn_layer(layer l, update_args a); + +#ifdef GPU +void forward_crnn_layer_gpu(layer l, network net); +void backward_crnn_layer_gpu(layer l, network net); +void update_crnn_layer_gpu(layer l, update_args a); +void push_crnn_layer(layer l); +void pull_crnn_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/standard/darknet/src/crop_layer.c b/workloads/realworld/standard/darknet/src/crop_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..3c8c9650bda6dcf4485ce8da8e2fa1984f2b244d --- /dev/null +++ b/workloads/realworld/standard/darknet/src/crop_layer.c @@ -0,0 +1,103 @@ +#include "crop_layer.h" +#include "cuda_dark.h" +#include + +image get_crop_image(crop_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.out_c; + return float_to_image(w,h,c,l.output); +} + +void backward_crop_layer(const crop_layer l, network net){} +void backward_crop_layer_gpu(const crop_layer l, network net){} + +crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure) +{ + fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c); + crop_layer l = {0}; + l.type = CROP; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.scale = (float)crop_height / h; + l.flip = flip; + l.angle = angle; + l.saturation = saturation; + l.exposure = exposure; + l.out_w = crop_width; + l.out_h = crop_height; + l.out_c = c; + l.inputs = l.w * l.h * l.c; + l.outputs = l.out_w * l.out_h * l.out_c; + l.output = calloc(l.outputs*batch, sizeof(float)); + l.forward = forward_crop_layer; + l.backward = backward_crop_layer; + + #ifdef GPU + l.forward_gpu = forward_crop_layer_gpu; + l.backward_gpu = backward_crop_layer_gpu; + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + l.rand_gpu = cuda_make_array(0, l.batch*8); + #endif + return l; +} + +void resize_crop_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->out_w = l->scale*w; + l->out_h = l->scale*h; + + l->inputs = l->w * l->h * l->c; + l->outputs = l->out_h * l->out_w * l->out_c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + #ifdef GPU + cuda_free(l->output_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + #endif +} + + +void forward_crop_layer(const crop_layer l, network net) +{ + int i,j,c,b,row,col; + int index; + int count = 0; + int flip = (l.flip && rand()%2); + int dh = rand()%(l.h - l.out_h + 1); + int dw = rand()%(l.w - l.out_w + 1); + float scale = 2; + float trans = -1; + if(l.noadjust){ + scale = 1; + trans = 0; + } + if(!net.train){ + flip = 0; + dh = (l.h - l.out_h)/2; + dw = (l.w - l.out_w)/2; + } + for(b = 0; b < l.batch; ++b){ + for(c = 0; c < l.c; ++c){ + for(i = 0; i < l.out_h; ++i){ + for(j = 0; j < l.out_w; ++j){ + if(flip){ + col = l.w - dw - j - 1; + }else{ + col = j + dw; + } + row = i + dh; + index = col+l.w*(row+l.h*(c + l.c*b)); + l.output[count++] = net.input[index]*scale + trans; + } + } + } + } +} + diff --git a/workloads/realworld/standard/darknet/src/crop_layer.h b/workloads/realworld/standard/darknet/src/crop_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..3b5883c47d6df0987700e1b0434010eebd6312af --- /dev/null +++ b/workloads/realworld/standard/darknet/src/crop_layer.h @@ -0,0 +1,20 @@ +#ifndef CROP_LAYER_H +#define CROP_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +typedef layer crop_layer; + +image get_crop_image(crop_layer l); +crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure); +void forward_crop_layer(const crop_layer l, network net); +void resize_crop_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_crop_layer_gpu(crop_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/standard/darknet/src/crop_layer_kernels.cu b/workloads/realworld/standard/darknet/src/crop_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..7e262fce4ff8beb52de23d7c79bd1917410ef136 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/crop_layer_kernels.cu @@ -0,0 +1,225 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "crop_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "image.h" +} + +__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c) +{ + if(x < 0 || x >= w || y < 0 || y >= h) return 0; + return image[x + w*(y + c*h)]; +} + +__device__ float3 rgb_to_hsv_kernel(float3 rgb) +{ + float r = rgb.x; + float g = rgb.y; + float b = rgb.z; + + float h, s, v; + float max = (r > g) ? ( (r > b) ? r : b) : ( (g > b) ? g : b); + float min = (r < g) ? ( (r < b) ? r : b) : ( (g < b) ? g : b); + float delta = max - min; + v = max; + if(max == 0){ + s = 0; + h = -1; + }else{ + s = delta/max; + if(r == max){ + h = (g - b) / delta; + } else if (g == max) { + h = 2 + (b - r) / delta; + } else { + h = 4 + (r - g) / delta; + } + if (h < 0) h += 6; + } + return make_float3(h, s, v); +} + +__device__ float3 hsv_to_rgb_kernel(float3 hsv) +{ + float h = hsv.x; + float s = hsv.y; + float v = hsv.z; + + float r, g, b; + float f, p, q, t; + + if (s == 0) { + r = g = b = v; + } else { + int index = (int) floorf(h); + f = h - index; + p = v*(1-s); + q = v*(1-s*f); + t = v*(1-s*(1-f)); + if(index == 0){ + r = v; g = t; b = p; + } else if(index == 1){ + r = q; g = v; b = p; + } else if(index == 2){ + r = p; g = v; b = t; + } else if(index == 3){ + r = p; g = q; b = v; + } else if(index == 4){ + r = t; g = p; b = v; + } else { + r = v; g = p; b = q; + } + } + r = (r < 0) ? 0 : ((r > 1) ? 1 : r); + g = (g < 0) ? 0 : ((g > 1) ? 1 : g); + b = (b < 0) ? 0 : ((b > 1) ? 1 : b); + return make_float3(r, g, b); +} + +__device__ float bilinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c) +{ + int ix = (int) floorf(x); + int iy = (int) floorf(y); + + float dx = x - ix; + float dy = y - iy; + + float val = (1-dy) * (1-dx) * get_pixel_kernel(image, w, h, ix, iy, c) + + dy * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) + + (1-dy) * dx * get_pixel_kernel(image, w, h, ix+1, iy, c) + + dy * dx * get_pixel_kernel(image, w, h, ix+1, iy+1, c); + return val; +} + +__global__ void levels_image_kernel(float *image, float *rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale, float shift) +{ + int size = batch * w * h; + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= size) return; + int x = id % w; + id /= w; + int y = id % h; + id /= h; + float rshift = rand[0]; + float gshift = rand[1]; + float bshift = rand[2]; + float r0 = rand[8*id + 0]; + float r1 = rand[8*id + 1]; + float r2 = rand[8*id + 2]; + float r3 = rand[8*id + 3]; + + saturation = r0*(saturation - 1) + 1; + saturation = (r1 > .5f) ? 1.f/saturation : saturation; + exposure = r2*(exposure - 1) + 1; + exposure = (r3 > .5f) ? 1.f/exposure : exposure; + + size_t offset = id * h * w * 3; + image += offset; + float r = image[x + w*(y + h*0)]; + float g = image[x + w*(y + h*1)]; + float b = image[x + w*(y + h*2)]; + float3 rgb = make_float3(r,g,b); + if(train){ + float3 hsv = rgb_to_hsv_kernel(rgb); + hsv.y *= saturation; + hsv.z *= exposure; + rgb = hsv_to_rgb_kernel(hsv); + } else { + shift = 0; + } + image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5f)*shift; + image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5f)*shift; + image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5f)*shift; +} + +__global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= size) return; + + float cx = w/2.f; + float cy = h/2.f; + + int count = id; + int j = id % crop_width; + id /= crop_width; + int i = id % crop_height; + id /= crop_height; + int k = id % c; + id /= c; + int b = id; + + float r4 = rand[8*b + 4]; + float r5 = rand[8*b + 5]; + float r6 = rand[8*b + 6]; + float r7 = rand[8*b + 7]; + + float dw = (w - crop_width)*r4; + float dh = (h - crop_height)*r5; + flip = (flip && (r6 > .5f)); + angle = 2*angle*r7 - angle; + if(!train){ + dw = (w - crop_width)/2.f; + dh = (h - crop_height)/2.f; + flip = 0; + angle = 0; + } + + input += w*h*c*b; + + float x = (flip) ? w - dw - j - 1 : j + dw; + float y = i + dh; + + float rx = cosf(angle)*(x-cx) - sinf(angle)*(y-cy) + cx; + float ry = sinf(angle)*(x-cx) + cosf(angle)*(y-cy) + cy; + + output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k); +} + +extern "C" void forward_crop_layer_gpu(crop_layer layer, network net) +{ + cuda_random(layer.rand_gpu, layer.batch*8); + + float radians = layer.angle*3.14159265f/180.f; + + float scale = 2; + float translate = -1; + if(layer.noadjust){ + scale = 1; + translate = 0; + } + + int size = layer.batch * layer.w * layer.h; + + levels_image_kernel<<>>(net.input_gpu, layer.rand_gpu, layer.batch, layer.w, layer.h, net.train, layer.saturation, layer.exposure, translate, scale, layer.shift); + check_error(cudaPeekAtLastError()); + + size = layer.batch*layer.c*layer.out_w*layer.out_h; + + forward_crop_layer_kernel<<>>(net.input_gpu, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, net.train, layer.flip, radians, layer.output_gpu); + check_error(cudaPeekAtLastError()); + +/* + cuda_pull_array(layer.output_gpu, layer.output, size); + image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 0*(size/layer.batch)); + image im2 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 1*(size/layer.batch)); + image im3 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 2*(size/layer.batch)); + + translate_image(im, -translate); + scale_image(im, 1/scale); + translate_image(im2, -translate); + scale_image(im2, 1/scale); + translate_image(im3, -translate); + scale_image(im3, 1/scale); + + show_image(im, "cropped"); + show_image(im2, "cropped2"); + show_image(im3, "cropped3"); + cvWaitKey(0); + */ +} + diff --git a/workloads/realworld/standard/darknet/src/cuda_dark.cu b/workloads/realworld/standard/darknet/src/cuda_dark.cu new file mode 100644 index 0000000000000000000000000000000000000000..e478ff79abe59a1b9203c30b5a6564a3e2ca0dc5 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/cuda_dark.cu @@ -0,0 +1,419 @@ +int gpu_index = 0; + +#ifdef GPU + +#include "cuda.h" +#include "utils.h" +#include "blas.h" +#include +#include +#include + + +#include + +void cuda_set_device(int n) +{ + gpu_index = n; + cudaError_t status = cudaSetDevice(n); + check_error(status); +} + +int cuda_get_device() +{ + int n = 0; + cudaError_t status = cudaGetDevice(&n); + check_error(status); + return n; +} + +void check_error(cudaError_t status) +{ + cudaDeviceSynchronize(); + cudaError_t status2 = cudaGetLastError(); + if (status != cudaSuccess) + { + const char *s = cudaGetErrorString(status); + char buffer[256]; + printf("CUDA Error: %s\n", s); + assert(0); + snprintf(buffer, 256, "CUDA Error: %s", s); + error(buffer); + } + if (status2 != cudaSuccess) + { + const char *s = cudaGetErrorString(status); + char buffer[256]; + printf("CUDA Error Prev: %s\n", s); + assert(0); + snprintf(buffer, 256, "CUDA Error Prev: %s", s); + error(buffer); + } +} + +dim3 cuda_gridsize(size_t n){ + size_t k = (n-1) / BLOCK + 1; + size_t x = k; + size_t y = 1; + if(x > 65535){ + x = ceil(sqrt(k)); + y = (n-1)/(x*BLOCK) + 1; + } + dim3 d = {x, y, 1}; + //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK); + return d; +} + +#ifdef CUDNN +cudnnHandle_t cudnn_handle() +{ + static int init[16] = {0}; + static cudnnHandle_t handle[16]; + int i = cuda_get_device(); + if(!init[i]) { + cudnnCreate(&handle[i]); + init[i] = 1; + } + return handle[i]; +} +#endif + +cublasHandle_t blas_handle() +{ + static int init[16] = {0}; + static cublasHandle_t handle[16]; + int i = cuda_get_device(); + if(!init[i]) { + cublasCreate(&handle[i]); + init[i] = 1; + } + return handle[i]; +} + +float *cuda_make_array(float *x, size_t n) +{ + float *x_gpu; + size_t size = sizeof(float)*n; + cudaError_t status = cudaMalloc((void **)&x_gpu, size); + check_error(status); + if(x){ + status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + check_error(status); + } else { + fill_gpu(n, 0, x_gpu, 1); + } + if(!x_gpu) error("Cuda malloc failed\n"); + return x_gpu; +} + +void cuda_random(float *x_gpu, size_t n) +{ + static curandGenerator_t gen[16]; + static int init[16] = {0}; + int i = cuda_get_device(); + if(!init[i]){ + curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT); + curandSetPseudoRandomGeneratorSeed(gen[i], time(0)); + init[i] = 1; + } + curandGenerateUniform(gen[i], x_gpu, n); + check_error(cudaPeekAtLastError()); +} + +float cuda_compare(float *x_gpu, float *x, size_t n, char *s) +{ + float *tmp = (float *) calloc(n, sizeof(float)); + cuda_pull_array(x_gpu, tmp, n); + //int i; + //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]); + axpy_cpu(n, -1, x, 1, tmp, 1); + float err = dot_cpu(n, tmp, 1, tmp, 1); + printf("Error %s: %f\n", s, sqrt(err/n)); + free(tmp); + return err; +} + +int *cuda_make_int_array(int *x, size_t n) +{ + int *x_gpu; + size_t size = sizeof(int)*n; + cudaError_t status = cudaMalloc((void **)&x_gpu, size); + check_error(status); + if(x){ + status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + check_error(status); + } + if(!x_gpu) error("Cuda malloc failed\n"); + return x_gpu; +} + +void cuda_free(float *x_gpu) +{ + cudaError_t status = cudaFree(x_gpu); + check_error(status); +} + +void cuda_push_array(float *x_gpu, float *x, size_t n) +{ + size_t size = sizeof(float)*n; + cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + check_error(status); +} + +void cuda_pull_array(float *x_gpu, float *x, size_t n) +{ + size_t size = sizeof(float)*n; + cudaError_t status = cudaMemcpy(x, x_gpu, size, cudaMemcpyDeviceToHost); + check_error(status); +} + +float cuda_mag_array(float *x_gpu, size_t n) +{ + float *temp = (float *) calloc(n, sizeof(float)); + cuda_pull_array(x_gpu, temp, n); + float m = mag_array(temp, n); + free(temp); + return m; +} + +static const char * +getMemcpyKindString(CUpti_ActivityMemcpyKind kind) +{ + switch (kind) + { + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOD: + return "HtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOH: + return "DtoH"; + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOA: + return "HtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOH: + return "AtoH"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOA: + return "AtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOD: + return "AtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOA: + return "DtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOD: + return "DtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOH: + return "HtoH"; + default: + break; + } + + return ""; +} + +static const char * +getUvmCounterKindString(CUpti_ActivityUnifiedMemoryCounterKind kind) +{ + switch (kind) + { + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD: + return "BYTES_TRANSFER_HTOD"; + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH: + return "BYTES_TRANSFER_DTOH"; + default: + break; + } + return ""; +} + +static void +printActivity(CUpti_Activity *record) +{ + switch (record->kind) + { + case CUPTI_ACTIVITY_KIND_KERNEL: + { + int status; + CUpti_ActivityKernel4 *kernel = (CUpti_ActivityKernel4 *)record; + printf("KERNEL %s, %llu, %llu, %llu\n", + abi::__cxa_demangle(kernel->name, 0, 0, &status), + (unsigned long long)(kernel->start), + (unsigned long long)(kernel->end), + (unsigned long long)(kernel->end) - (kernel->start)); + break; + } + case CUPTI_ACTIVITY_KIND_RUNTIME: + { + CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; + const char *callback_name; + cuptiGetCallbackName(CUPTI_CB_DOMAIN_RUNTIME_API, api->cbid, &callback_name); + // printf("RUNTIME %s (cbid=%u) [ %llu - %llu ] process %u, thread %u, correlation %u\n", + // callback_name, api->cbid, + // (unsigned long long)(api->start - startTimestamp), + // (unsigned long long)(api->end - startTimestamp), + // api->processId, api->threadId, api->correlationId); + printf("RUNTIME %s (cbid=%u), %llu,%llu,%llu, process %u, thread %u, correlation %u\n", + callback_name, api->cbid, + (unsigned long long)(api->start), + (unsigned long long)(api->end), + (unsigned long long)(api->end - api->start), + api->processId, api->threadId, api->correlationId); + break; + } + case CUPTI_ACTIVITY_KIND_MEMCPY: + { + CUpti_ActivityMemcpy4 *memcpy = (CUpti_ActivityMemcpy4 *)record; + printf("MEMCPY %s, size %llu, %llu, %llu, %llu\n", + getMemcpyKindString((CUpti_ActivityMemcpyKind)memcpy->copyKind), + (unsigned long long)memcpy->bytes, + (unsigned long long)(memcpy->start), + (unsigned long long)(memcpy->end), + (unsigned long long)(memcpy->end) - (memcpy->start)); + break; + } + case CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER: + { + CUpti_ActivityUnifiedMemoryCounter2 *uvm = (CUpti_ActivityUnifiedMemoryCounter2 *)record; + printf("UVM MEMCPY %s, size %llu, %llu, %llu, %llu \n", + getUvmCounterKindString(uvm->counterKind), + (unsigned long long)uvm->value, + (unsigned long long)(uvm->start), + (unsigned long long)(uvm->end), + (unsigned long long)(uvm->end - uvm->start)); + break; + } + } +} + +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) +{ + uint8_t *bfr = (uint8_t *)malloc(BUF_SIZE + ALIGN_SIZE); + if (bfr == NULL) + { + printf("Error: out of memory\n"); + exit(-1); + } + + *size = BUF_SIZE; + *buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE); + *maxNumRecords = 0; +} + +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) +{ + CUptiResult status; + CUpti_Activity *record = NULL; + if (validSize > 0) + { + do + { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if (status == CUPTI_SUCCESS) + { + printActivity(record); + } + else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) + break; + else + { + CUPTI_CALL(status); + } + } while (1); + + // report any records dropped from the queue + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if (dropped != 0) + { + printf("Dropped %u activity records\n", (unsigned int)dropped); + } + } + + free(buffer); +} + +#ifndef PROFILE +void initTrace() { + printf("not Profile initTrace()\n"); + return; +} + +void finiTrace() { + return; +} + +#else +void initTrace() +{ + printf("Profile initTrace()\n"); + size_t attrValue = 0, attrValueSize = sizeof(size_t); + + CUpti_ActivityUnifiedMemoryCounterConfig config[2]; + + // configure unified memory counters + config[0].scope = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE; + config[0].kind = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD; + config[0].deviceId = 0; + config[0].enable = 1; + + config[1].scope = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE; + config[1].kind = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH; + config[1].deviceId = 0; + config[1].enable = 1; + + CUptiResult res = cuptiActivityConfigureUnifiedMemoryCounter(config, 2); + if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED) + { + printf("Test is waived, unified memory is not supported on the underlying platform.\n"); + } + else if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE) + { + printf("Test is waived, unified memory is not supported on the device.\n"); + } + else if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES) + { + printf("Test is waived, unified memory is not supported on the non-P2P multi-gpu setup.\n"); + } + else + { + CUPTI_CALL(res); + } + + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER)); + + // Register callbacks for buffer requests and for buffers completed by CUPTI. + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + + // Optionally get and set activity attributes. + // Attributes can be set by the CUPTI client to change behavior of the activity API. + // Some attributes require to be set before any CUDA context is created to be effective, + // e.g. to be applied to all device buffer allocations (see documentation). + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + printf("%s = %llu B\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); +} + +void finiTrace() +{ + // Force flush any remaining activity buffers before termination of the application + CUPTI_CALL(cuptiActivityFlushAll(1)); +} +#endif + +void GPU_argv_init() +{ + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, GPU_DEVICE); + printf("setting device %d with name %s\n", GPU_DEVICE, deviceProp.name); + cudaSetDevice(GPU_DEVICE); +} +#else +void cuda_set_device(int n){} + +#endif diff --git a/workloads/realworld/standard/darknet/src/cuda_dark.h b/workloads/realworld/standard/darknet/src/cuda_dark.h new file mode 100644 index 0000000000000000000000000000000000000000..ac6b60bc3d27ec1ebc8190463648b946f6c809ef --- /dev/null +++ b/workloads/realworld/standard/darknet/src/cuda_dark.h @@ -0,0 +1,63 @@ +#ifndef CUDA_H +#define CUDA_H + +#include "darknet.h" + +#ifdef GPU + +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) (((uintptr_t)(buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t)(buffer) & ((align)-1))) : (buffer)) + +static uint64_t startTimestamp; +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +#define CUPTI_CALL(call) \ + do \ + { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) \ + { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + if (_status == CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED) \ + exit(0); \ + else \ + exit(-1); \ + } \ + } while (0) + +#include + +#ifdef __cplusplus +extern "C" { +#endif +void check_error(cudaError_t status); +cublasHandle_t blas_handle(); +int *cuda_make_int_array(int *x, size_t n); +void cuda_random(float *x_gpu, size_t n); +float cuda_compare(float *x_gpu, float *x, size_t n, char *s); +dim3 cuda_gridsize(size_t n); + +void GPU_argv_init(); +void initTrace(); +void finiTrace(); +void startCPU(); +void endCPU(); +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords); +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); +static void printActivity(CUpti_Activity *record); + +#ifdef __cplusplus +} +#endif + +#ifdef CUDNN +cudnnHandle_t cudnn_handle(); +#endif + +#endif +#endif diff --git a/workloads/realworld/standard/darknet/src/cupti_add.cpp b/workloads/realworld/standard/darknet/src/cupti_add.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a0d16eb72f41f8e858a59354d2de9d6b470c0e76 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/cupti_add.cpp @@ -0,0 +1,112 @@ +#include "cupti_add.h" + +static void +printActivity(CUpti_Activity *record) +{ + switch (record->kind) + { + case CUPTI_ACTIVITY_KIND_KERNEL: + { + int status; + CUpti_ActivityKernel4 *kernel = (CUpti_ActivityKernel4 *)record; + printf("CUPTI,%s,%llu,%llu,%llu\n", + abi::__cxa_demangle(kernel->name, 0, 0, &status), + (unsigned long long)(kernel->start), + (unsigned long long)(kernel->end), + (unsigned long long)(kernel->end - startTimestamp) - (kernel->start - startTimestamp)); + break; + } + case CUPTI_ACTIVITY_KIND_RUNTIME: + { + CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; + const char *callback_name; + cuptiGetCallbackName(CUPTI_CB_DOMAIN_RUNTIME_API, api->cbid, &callback_name); + printf("RUNTIME %s (cbid=%u) [ %llu - %llu ] process %u, thread %u, correlation %u\n", + callback_name, api->cbid, + (unsigned long long)(api->start - startTimestamp), + (unsigned long long)(api->end - startTimestamp), + api->processId, api->threadId, api->correlationId); + break; + } + } +} + +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) +{ + uint8_t *bfr = (uint8_t *)malloc(BUF_SIZE + ALIGN_SIZE); + if (bfr == NULL) + { + printf("Error: out of memory\n"); + exit(-1); + } + + *size = BUF_SIZE; + *buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE); + *maxNumRecords = 0; +} + +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) +{ + CUptiResult status; + CUpti_Activity *record = NULL; + if (validSize > 0) + { + do + { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if (status == CUPTI_SUCCESS) + { + printActivity(record); + } + else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) + break; + else + { + CUPTI_CALL(status); + } + } while (1); + + // report any records dropped from the queue + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if (dropped != 0) + { + printf("Dropped %u activity records\n", (unsigned int)dropped); + } + } + + free(buffer); +} + +void initTrace() +{ + size_t attrValue = 0, attrValueSize = sizeof(size_t); + + // CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); + + // Register callbacks for buffer requests and for buffers completed by CUPTI. + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + + // Optionally get and set activity attributes. + // Attributes can be set by the CUPTI client to change behavior of the activity API. + // Some attributes require to be set before any CUDA context is created to be effective, + // e.g. to be applied to all device buffer allocations (see documentation). + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + printf("%s = %llu B\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); +} + +void finiTrace() +{ + // Force flush any remaining activity buffers before termination of the application + CUPTI_CALL(cuptiActivityFlushAll(1)); +} \ No newline at end of file diff --git a/workloads/realworld/standard/darknet/src/cupti_add.h b/workloads/realworld/standard/darknet/src/cupti_add.h new file mode 100644 index 0000000000000000000000000000000000000000..a30b7b847ad13381032d2f60eac2955d30146485 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/cupti_add.h @@ -0,0 +1,36 @@ +#include +#include +#include + + +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) \ + (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer)) + +static uint64_t startTimestamp; +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +#define CUPTI_CALL(call) \ + do { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + if(_status == CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED) \ + exit(0); \ + else \ + exit(-1); \ + } \ + } while (0) + +void initTrace(); +void finiTrace(); +void startCPU(); +void endCPU(); +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords); +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); +static void printActivity(CUpti_Activity *record); diff --git a/workloads/realworld/standard/darknet/src/data.c b/workloads/realworld/standard/darknet/src/data.c new file mode 100644 index 0000000000000000000000000000000000000000..d50f1346c5cdcfe1dbeb2d0f70ec408fb4f33960 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/data.c @@ -0,0 +1,1685 @@ +#include "data.h" +#include "utils.h" +#include "image.h" +#include "cuda_dark.h" + +#include +#include +#include + +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + +list *get_paths(char *filename) +{ + char *path; + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + list *lines = make_list(); + while((path=fgetl(file))){ + list_insert(lines, path); + } + fclose(file); + return lines; +} + +/* +char **get_random_paths_indexes(char **paths, int n, int m, int *indexes) +{ + char **random_paths = calloc(n, sizeof(char*)); + int i; + pthread_mutex_lock(&mutex); + for(i = 0; i < n; ++i){ + int index = rand()%m; + indexes[i] = index; + random_paths[i] = paths[index]; + if(i == 0) printf("%s\n", paths[index]); + } + pthread_mutex_unlock(&mutex); + return random_paths; +} +*/ + +char **get_random_paths(char **paths, int n, int m) +{ + char **random_paths = calloc(n, sizeof(char*)); + int i; + pthread_mutex_lock(&mutex); + for(i = 0; i < n; ++i){ + int index = rand()%m; + random_paths[i] = paths[index]; + //if(i == 0) printf("%s\n", paths[index]); + } + pthread_mutex_unlock(&mutex); + return random_paths; +} + +char **find_replace_paths(char **paths, int n, char *find, char *replace) +{ + char **replace_paths = calloc(n, sizeof(char*)); + int i; + for(i = 0; i < n; ++i){ + char replaced[4096]; + find_replace(paths[i], find, replace, replaced); + replace_paths[i] = copy_string(replaced); + } + return replace_paths; +} + +matrix load_image_paths_gray(char **paths, int n, int w, int h) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image(paths[i], w, h, 3); + + image gray = grayscale_image(im); + free_image(im); + im = gray; + + X.vals[i] = im.data; + X.cols = im.h*im.w*im.c; + } + return X; +} + +matrix load_image_paths(char **paths, int n, int w, int h) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], w, h); + X.vals[i] = im.data; + X.cols = im.h*im.w*im.c; + } + return X; +} + +matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], 0, 0); + image crop; + if(center){ + crop = center_crop_image(im, size, size); + } else { + crop = random_augment_image(im, angle, aspect, min, max, size, size); + } + int flip = rand()%2; + if (flip) flip_image(crop); + random_distort_image(crop, hue, saturation, exposure); + + /* + show_image(im, "orig"); + show_image(crop, "crop"); + cvWaitKey(0); + */ + //grayscale_image_3c(crop); + free_image(im); + X.vals[i] = crop.data; + X.cols = crop.h*crop.w*crop.c; + } + return X; +} + + +box_label *read_boxes(char *filename, int *n) +{ + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + float x, y, h, w; + int id; + int count = 0; + int size = 64; + box_label *boxes = calloc(size, sizeof(box_label)); + while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){ + if(count == size) { + size = size * 2; + boxes = realloc(boxes, size*sizeof(box_label)); + } + boxes[count].id = id; + boxes[count].x = x; + boxes[count].y = y; + boxes[count].h = h; + boxes[count].w = w; + boxes[count].left = x - w/2; + boxes[count].right = x + w/2; + boxes[count].top = y - h/2; + boxes[count].bottom = y + h/2; + ++count; + } + fclose(file); + *n = count; + return boxes; +} + +void randomize_boxes(box_label *b, int n) +{ + int i; + for(i = 0; i < n; ++i){ + box_label swap = b[i]; + int index = rand()%n; + b[i] = b[index]; + b[index] = swap; + } +} + +void correct_boxes(box_label *boxes, int n, float dx, float dy, float sx, float sy, int flip) +{ + int i; + for(i = 0; i < n; ++i){ + if(boxes[i].x == 0 && boxes[i].y == 0) { + boxes[i].x = 999999; + boxes[i].y = 999999; + boxes[i].w = 999999; + boxes[i].h = 999999; + continue; + } + boxes[i].left = boxes[i].left * sx - dx; + boxes[i].right = boxes[i].right * sx - dx; + boxes[i].top = boxes[i].top * sy - dy; + boxes[i].bottom = boxes[i].bottom* sy - dy; + + if(flip){ + float swap = boxes[i].left; + boxes[i].left = 1. - boxes[i].right; + boxes[i].right = 1. - swap; + } + + boxes[i].left = constrain(0, 1, boxes[i].left); + boxes[i].right = constrain(0, 1, boxes[i].right); + boxes[i].top = constrain(0, 1, boxes[i].top); + boxes[i].bottom = constrain(0, 1, boxes[i].bottom); + + boxes[i].x = (boxes[i].left+boxes[i].right)/2; + boxes[i].y = (boxes[i].top+boxes[i].bottom)/2; + boxes[i].w = (boxes[i].right - boxes[i].left); + boxes[i].h = (boxes[i].bottom - boxes[i].top); + + boxes[i].w = constrain(0, 1, boxes[i].w); + boxes[i].h = constrain(0, 1, boxes[i].h); + } +} + +void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + float x,y,w,h; + int id; + int i; + + for (i = 0; i < count && i < 90; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if (w < .0 || h < .0) continue; + + int index = (4+classes) * i; + + truth[index++] = x; + truth[index++] = y; + truth[index++] = w; + truth[index++] = h; + + if (id < classes) truth[index+id] = 1; + } + free(boxes); +} + +void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + float x,y,w,h; + int id; + int i; + + for (i = 0; i < count; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if (w < .005 || h < .005) continue; + + int col = (int)(x*num_boxes); + int row = (int)(y*num_boxes); + + x = x*num_boxes - col; + y = y*num_boxes - row; + + int index = (col+row*num_boxes)*(5+classes); + if (truth[index]) continue; + truth[index++] = 1; + + if (id < classes) truth[index+id] = 1; + index += classes; + + truth[index++] = x; + truth[index++] = y; + truth[index++] = w; + truth[index++] = h; + } + free(boxes); +} + +void load_rle(image im, int *rle, int n) +{ + int count = 0; + int curr = 0; + int i,j; + for(i = 0; i < n; ++i){ + for(j = 0; j < rle[i]; ++j){ + im.data[count++] = curr; + } + curr = 1 - curr; + } + for(; count < im.h*im.w*im.c; ++count){ + im.data[count] = curr; + } +} + +void or_image(image src, image dest, int c) +{ + int i; + for(i = 0; i < src.w*src.h; ++i){ + if(src.data[i]) dest.data[dest.w*dest.h*c + i] = 1; + } +} + +void exclusive_image(image src) +{ + int k, j, i; + int s = src.w*src.h; + for(k = 0; k < src.c-1; ++k){ + for(i = 0; i < s; ++i){ + if (src.data[k*s + i]){ + for(j = k+1; j < src.c; ++j){ + src.data[j*s + i] = 0; + } + } + } + } +} + +box bound_image(image im) +{ + int x,y; + int minx = im.w; + int miny = im.h; + int maxx = 0; + int maxy = 0; + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + if(im.data[y*im.w + x]){ + minx = (x < minx) ? x : minx; + miny = (y < miny) ? y : miny; + maxx = (x > maxx) ? x : maxx; + maxy = (y > maxy) ? y : maxy; + } + } + } + box b = {minx, miny, maxx-minx + 1, maxy-miny + 1}; + //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + return b; +} + +void fill_truth_iseg(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + int i = 0; + int j; + image part = make_image(w, h, 1); + while((fscanf(file, "%d %s", &id, buff) == 2) && i < num_boxes){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + image sized = rotate_crop_image(part, aug.rad, aug.scale, aug.w, aug.h, aug.dx, aug.dy, aug.aspect); + if(flip) flip_image(sized); + + image mask = resize_image(sized, mw, mh); + truth[i*(mw*mh+1)] = id; + for(j = 0; j < mw*mh; ++j){ + truth[i*(mw*mh + 1) + 1 + j] = mask.data[j]; + } + ++i; + + free_image(mask); + free_image(sized); + free(rle); + } + if(i < num_boxes) truth[i*(mw*mh+1)] = -1; + fclose(file); + free_image(part); +} + +void fill_truth_mask(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + int i = 0; + image part = make_image(w, h, 1); + while((fscanf(file, "%d %s", &id, buff) == 2) && i < num_boxes){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + image sized = rotate_crop_image(part, aug.rad, aug.scale, aug.w, aug.h, aug.dx, aug.dy, aug.aspect); + if(flip) flip_image(sized); + box b = bound_image(sized); + if(b.w > 0){ + image crop = crop_image(sized, b.x, b.y, b.w, b.h); + image mask = resize_image(crop, mw, mh); + truth[i*(4 + mw*mh + 1) + 0] = (b.x + b.w/2.)/sized.w; + truth[i*(4 + mw*mh + 1) + 1] = (b.y + b.h/2.)/sized.h; + truth[i*(4 + mw*mh + 1) + 2] = b.w/sized.w; + truth[i*(4 + mw*mh + 1) + 3] = b.h/sized.h; + int j; + for(j = 0; j < mw*mh; ++j){ + truth[i*(4 + mw*mh + 1) + 4 + j] = mask.data[j]; + } + truth[i*(4 + mw*mh + 1) + 4 + mw*mh] = id; + free_image(crop); + free_image(mask); + ++i; + } + free_image(sized); + free(rle); + } + fclose(file); + free_image(part); +} + + +void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + + find_replace(labelpath, "raw", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + if(count > num_boxes) count = num_boxes; + float x,y,w,h; + int id; + int i; + int sub = 0; + + for (i = 0; i < count; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if ((w < .001 || h < .001)) { + ++sub; + continue; + } + + truth[(i-sub)*5+0] = x; + truth[(i-sub)*5+1] = y; + truth[(i-sub)*5+2] = w; + truth[(i-sub)*5+3] = h; + truth[(i-sub)*5+4] = id; + } + free(boxes); +} + +#define NUMCHARS 37 + +void print_letters(float *pred, int n) +{ + int i; + for(i = 0; i < n; ++i){ + int index = max_index(pred+i*NUMCHARS, NUMCHARS); + printf("%c", int_to_alphanum(index)); + } + printf("\n"); +} + +void fill_truth_captcha(char *path, int n, float *truth) +{ + char *begin = strrchr(path, '/'); + ++begin; + int i; + for(i = 0; i < strlen(begin) && i < n && begin[i] != '.'; ++i){ + int index = alphanum_to_int(begin[i]); + if(index > 35) printf("Bad %c\n", begin[i]); + truth[i*NUMCHARS+index] = 1; + } + for(;i < n; ++i){ + truth[i*NUMCHARS + NUMCHARS-1] = 1; + } +} + +data load_data_captcha(char **paths, int n, int m, int k, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = make_matrix(n, k*NUMCHARS); + int i; + for(i = 0; i < n; ++i){ + fill_truth_captcha(paths[i], k, d.y.vals[i]); + } + if(m) free(paths); + return d; +} + +data load_data_captcha_encode(char **paths, int n, int m, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.X.cols = 17100; + d.y = d.X; + if(m) free(paths); + return d; +} + +void fill_truth(char *path, char **labels, int k, float *truth) +{ + int i; + memset(truth, 0, k*sizeof(float)); + int count = 0; + for(i = 0; i < k; ++i){ + if(strstr(path, labels[i])){ + truth[i] = 1; + ++count; + //printf("%s %s %d\n", path, labels[i], i); + } + } + if(count != 1 && (k != 1 || count != 0)) printf("Too many or too few labels: %d, %s\n", count, path); +} + +void fill_hierarchy(float *truth, int k, tree *hierarchy) +{ + int j; + for(j = 0; j < k; ++j){ + if(truth[j]){ + int parent = hierarchy->parent[j]; + while(parent >= 0){ + truth[parent] = 1; + parent = hierarchy->parent[parent]; + } + } + } + int i; + int count = 0; + for(j = 0; j < hierarchy->groups; ++j){ + //printf("%d\n", count); + int mask = 1; + for(i = 0; i < hierarchy->group_size[j]; ++i){ + if(truth[count + i]){ + mask = 0; + break; + } + } + if (mask) { + for(i = 0; i < hierarchy->group_size[j]; ++i){ + truth[count + i] = SECRET_NUM; + } + } + count += hierarchy->group_size[j]; + } +} + +matrix load_regression_labels_paths(char **paths, int n, int k) +{ + matrix y = make_matrix(n, k); + int i,j; + for(i = 0; i < n; ++i){ + char labelpath[4096]; + find_replace(paths[i], "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".BMP", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPeG", ".txt", labelpath); + find_replace(labelpath, ".Jpeg", ".txt", labelpath); + find_replace(labelpath, ".PNG", ".txt", labelpath); + find_replace(labelpath, ".TIF", ".txt", labelpath); + find_replace(labelpath, ".bmp", ".txt", labelpath); + find_replace(labelpath, ".jpeg", ".txt", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".tif", ".txt", labelpath); + + FILE *file = fopen(labelpath, "r"); + for(j = 0; j < k; ++j){ + fscanf(file, "%f", &(y.vals[i][j])); + } + fclose(file); + } + return y; +} + +matrix load_labels_paths(char **paths, int n, char **labels, int k, tree *hierarchy) +{ + matrix y = make_matrix(n, k); + int i; + for(i = 0; i < n && labels; ++i){ + fill_truth(paths[i], labels, k, y.vals[i]); + if(hierarchy){ + fill_hierarchy(y.vals[i], k, hierarchy); + } + } + return y; +} + +matrix load_tags_paths(char **paths, int n, int k) +{ + matrix y = make_matrix(n, k); + int i; + //int count = 0; + for(i = 0; i < n; ++i){ + char label[4096]; + find_replace(paths[i], "images", "labels", label); + find_replace(label, ".jpg", ".txt", label); + FILE *file = fopen(label, "r"); + if (!file) continue; + //++count; + int tag; + while(fscanf(file, "%d", &tag) == 1){ + if(tag < k){ + y.vals[i][tag] = 1; + } + } + fclose(file); + } + //printf("%d/%d\n", count, n); + return y; +} + +char **get_labels(char *filename) +{ + list *plist = get_paths(filename); + char **labels = (char **)list_to_array(plist); + free_list(plist); + return labels; +} + +void free_data(data d) +{ + if(!d.shallow){ + free_matrix(d.X); + free_matrix(d.y); + }else{ + free(d.X.vals); + free(d.y.vals); + } +} + +image get_segmentation_image(char *path, int w, int h, int classes) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + image mask = make_image(w, h, classes); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + image part = make_image(w, h, 1); + while(fscanf(file, "%d %s", &id, buff) == 2){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + or_image(part, mask, id); + free(rle); + } + //exclusive_image(mask); + fclose(file); + free_image(part); + return mask; +} + +image get_segmentation_image2(char *path, int w, int h, int classes) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + image mask = make_image(w, h, classes+1); + int i; + for(i = 0; i < w*h; ++i){ + mask.data[w*h*classes + i] = 1; + } + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + image part = make_image(w, h, 1); + while(fscanf(file, "%d %s", &id, buff) == 2){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + or_image(part, mask, id); + for(i = 0; i < w*h; ++i){ + if(part.data[i]) mask.data[w*h*classes + i] = 0; + } + free(rle); + } + //exclusive_image(mask); + fclose(file); + free_image(part); + return mask; +} + +data load_data_seg(int n, char **paths, int m, int w, int h, int classes, int min, int max, float angle, float aspect, float hue, float saturation, float exposure, int div) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + + d.y.rows = n; + d.y.cols = h*w*classes/div/div; + d.y.vals = calloc(d.X.rows, sizeof(float*)); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + + image mask = get_segmentation_image(random_paths[i], orig.w, orig.h, classes); + //image mask = make_image(orig.w, orig.h, classes+1); + image sized_m = rotate_crop_image(mask, a.rad, a.scale/div, a.w/div, a.h/div, a.dx/div, a.dy/div, a.aspect); + + if(flip) flip_image(sized_m); + d.y.vals[i] = sized_m.data; + + free_image(orig); + free_image(mask); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_iseg(int n, char **paths, int m, int w, int h, int classes, int boxes, int div, int min, int max, float angle, float aspect, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, (((w/div)*(h/div))+1)*boxes); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + //show_image(sized, "image"); + + fill_truth_iseg(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, w/div, h/div); + + free_image(orig); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_mask(int n, char **paths, int m, int w, int h, int classes, int boxes, int coords, int min, int max, float angle, float aspect, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, (coords+1)*boxes); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + //show_image(sized, "image"); + + fill_truth_mask(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, 14, 14); + + free_image(orig); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + + int k = size*size*(5+classes); + d.y = make_matrix(n, k); + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + + int oh = orig.h; + int ow = orig.w; + + int dw = (ow*jitter); + int dh = (oh*jitter); + + int pleft = rand_uniform(-dw, dw); + int pright = rand_uniform(-dw, dw); + int ptop = rand_uniform(-dh, dh); + int pbot = rand_uniform(-dh, dh); + + int swidth = ow - pleft - pright; + int sheight = oh - ptop - pbot; + + float sx = (float)swidth / ow; + float sy = (float)sheight / oh; + + int flip = rand()%2; + image cropped = crop_image(orig, pleft, ptop, swidth, sheight); + + float dx = ((float)pleft/ow)/sx; + float dy = ((float)ptop /oh)/sy; + + image sized = resize_image(cropped, w, h); + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + + fill_truth_region(random_paths[i], d.y.vals[i], classes, size, flip, dx, dy, 1./sx, 1./sy); + + free_image(orig); + free_image(cropped); + } + free(random_paths); + return d; +} + +data load_data_compare(int n, char **paths, int m, int classes, int w, int h) +{ + if(m) paths = get_random_paths(paths, 2*n, m); + int i,j; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*6; + + int k = 2*(classes); + d.y = make_matrix(n, k); + for(i = 0; i < n; ++i){ + image im1 = load_image_color(paths[i*2], w, h); + image im2 = load_image_color(paths[i*2+1], w, h); + + d.X.vals[i] = calloc(d.X.cols, sizeof(float)); + memcpy(d.X.vals[i], im1.data, h*w*3*sizeof(float)); + memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float)); + + int id; + float iou; + + char imlabel1[4096]; + char imlabel2[4096]; + find_replace(paths[i*2], "imgs", "labels", imlabel1); + find_replace(imlabel1, "jpg", "txt", imlabel1); + FILE *fp1 = fopen(imlabel1, "r"); + + while(fscanf(fp1, "%d %f", &id, &iou) == 2){ + if (d.y.vals[i][2*id] < iou) d.y.vals[i][2*id] = iou; + } + + find_replace(paths[i*2+1], "imgs", "labels", imlabel2); + find_replace(imlabel2, "jpg", "txt", imlabel2); + FILE *fp2 = fopen(imlabel2, "r"); + + while(fscanf(fp2, "%d %f", &id, &iou) == 2){ + if (d.y.vals[i][2*id + 1] < iou) d.y.vals[i][2*id + 1] = iou; + } + + for (j = 0; j < classes; ++j){ + if (d.y.vals[i][2*j] > .5 && d.y.vals[i][2*j+1] < .5){ + d.y.vals[i][2*j] = 1; + d.y.vals[i][2*j+1] = 0; + } else if (d.y.vals[i][2*j] < .5 && d.y.vals[i][2*j+1] > .5){ + d.y.vals[i][2*j] = 0; + d.y.vals[i][2*j+1] = 1; + } else { + d.y.vals[i][2*j] = SECRET_NUM; + d.y.vals[i][2*j+1] = SECRET_NUM; + } + } + fclose(fp1); + fclose(fp2); + + free_image(im1); + free_image(im2); + } + if(m) free(paths); + return d; +} + +data load_data_swag(char **paths, int n, int classes, float jitter) +{ + int index = rand()%n; + char *random_path = paths[index]; + + image orig = load_image_color(random_path, 0, 0); + int h = orig.h; + int w = orig.w; + + data d = {0}; + d.shallow = 0; + d.w = w; + d.h = h; + + d.X.rows = 1; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + int k = (4+classes)*90; + d.y = make_matrix(1, k); + + int dw = w*jitter; + int dh = h*jitter; + + int pleft = rand_uniform(-dw, dw); + int pright = rand_uniform(-dw, dw); + int ptop = rand_uniform(-dh, dh); + int pbot = rand_uniform(-dh, dh); + + int swidth = w - pleft - pright; + int sheight = h - ptop - pbot; + + float sx = (float)swidth / w; + float sy = (float)sheight / h; + + int flip = rand()%2; + image cropped = crop_image(orig, pleft, ptop, swidth, sheight); + + float dx = ((float)pleft/w)/sx; + float dy = ((float)ptop /h)/sy; + + image sized = resize_image(cropped, w, h); + if(flip) flip_image(sized); + d.X.vals[0] = sized.data; + + fill_truth_swag(random_path, d.y.vals[0], classes, flip, dx, dy, 1./sx, 1./sy); + + free_image(orig); + free_image(cropped); + + return d; +} + +data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, 5*boxes); + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + image sized = make_image(w, h, orig.c); + fill_image(sized, .5); + + float dw = jitter * orig.w; + float dh = jitter * orig.h; + + float new_ar = (orig.w + rand_uniform(-dw, dw)) / (orig.h + rand_uniform(-dh, dh)); + //float scale = rand_uniform(.25, 2); + float scale = 1; + + float nw, nh; + + if(new_ar < 1){ + nh = scale * h; + nw = nh * new_ar; + } else { + nw = scale * w; + nh = nw / new_ar; + } + + float dx = rand_uniform(0, w - nw); + float dy = rand_uniform(0, h - nh); + + place_image(orig, nw, nh, dx, dy, sized); + + random_distort_image(sized, hue, saturation, exposure); + + int flip = rand()%2; + if(flip) flip_image(sized); + d.X.vals[i] = sized.data; + + + fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, -dx/w, -dy/h, nw/w, nh/h); + + free_image(orig); + } + free(random_paths); + return d; +} + +void *load_thread(void *ptr) +{ + //printf("Loading data: %d\n", rand()); + load_args a = *(struct load_args*)ptr; + if(a.exposure == 0) a.exposure = 1; + if(a.saturation == 0) a.saturation = 1; + if(a.aspect == 0) a.aspect = 1; + + if (a.type == OLD_CLASSIFICATION_DATA){ + *a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h); + } else if (a.type == REGRESSION_DATA){ + *a.d = load_data_regression(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == CLASSIFICATION_DATA){ + *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.center); + } else if (a.type == SUPER_DATA){ + *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale); + } else if (a.type == WRITING_DATA){ + *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h); + } else if (a.type == ISEG_DATA){ + *a.d = load_data_iseg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.scale, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == INSTANCE_DATA){ + *a.d = load_data_mask(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.coords, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == SEGMENTATION_DATA){ + *a.d = load_data_seg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.scale); + } else if (a.type == REGION_DATA){ + *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); + } else if (a.type == DETECTION_DATA){ + *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); + } else if (a.type == SWAG_DATA){ + *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter); + } else if (a.type == COMPARE_DATA){ + *a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h); + } else if (a.type == IMAGE_DATA){ + *(a.im) = load_image_color(a.path, 0, 0); + *(a.resized) = resize_image(*(a.im), a.w, a.h); + } else if (a.type == LETTERBOX_DATA){ + *(a.im) = load_image_color(a.path, 0, 0); + *(a.resized) = letterbox_image(*(a.im), a.w, a.h); + } else if (a.type == TAG_DATA){ + *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } + free(ptr); + return 0; +} + +pthread_t load_data_in_thread(load_args args) +{ + pthread_t thread; + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + if(pthread_create(&thread, 0, load_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void *load_threads(void *ptr) +{ + int i; + load_args args = *(load_args *)ptr; + if (args.threads == 0) args.threads = 1; + data *out = args.d; + int total = args.n; + free(ptr); + data *buffers = calloc(args.threads, sizeof(data)); + pthread_t *threads = calloc(args.threads, sizeof(pthread_t)); + for(i = 0; i < args.threads; ++i){ + args.d = buffers + i; + args.n = (i+1) * total/args.threads - i * total/args.threads; + threads[i] = load_data_in_thread(args); + } + for(i = 0; i < args.threads; ++i){ + pthread_join(threads[i], 0); + } + *out = concat_datas(buffers, args.threads); + out->shallow = 0; + for(i = 0; i < args.threads; ++i){ + buffers[i].shallow = 1; + free_data(buffers[i]); + } + free(buffers); + free(threads); + return 0; +} + +void load_data_blocking(load_args args) +{ + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + load_thread(ptr); +} + +pthread_t load_data(load_args args) +{ + pthread_t thread; + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed"); + return thread; +} + +data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h) +{ + if(m) paths = get_random_paths(paths, n, m); + char **replace_paths = find_replace_paths(paths, n, ".png", "-label.png"); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = load_image_paths_gray(replace_paths, n, out_w, out_h); + if(m) free(paths); + int i; + for(i = 0; i < n; ++i) free(replace_paths[i]); + free(replace_paths); + return d; +} + +data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = load_labels_paths(paths, n, labels, k, 0); + if(m) free(paths); + return d; +} + +/* + data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) + { + data d = {0}; + d.indexes = calloc(n, sizeof(int)); + if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes); + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure); + d.y = load_labels_paths(paths, n, labels, k); + if(m) free(paths); + return d; + } + */ + +data load_data_super(char **paths, int n, int m, int w, int h, int scale) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + + int i; + d.X.rows = n; + d.X.vals = calloc(n, sizeof(float*)); + d.X.cols = w*h*3; + + d.y.rows = n; + d.y.vals = calloc(n, sizeof(float*)); + d.y.cols = w*scale * h*scale * 3; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], 0, 0); + image crop = random_crop_image(im, w*scale, h*scale); + int flip = rand()%2; + if (flip) flip_image(crop); + image resize = resize_image(crop, w, h); + d.X.vals[i] = resize.data; + d.y.vals[i] = crop.data; + free_image(im); + } + + if(m) free(paths); + return d; +} + +data load_data_regression(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, 0); + d.y = load_regression_labels_paths(paths, n, k); + if(m) free(paths); + return d; +} + +data select_data(data *orig, int *inds) +{ + data d = {0}; + d.shallow = 1; + d.w = orig[0].w; + d.h = orig[0].h; + + d.X.rows = orig[0].X.rows; + d.y.rows = orig[0].X.rows; + + d.X.cols = orig[0].X.cols; + d.y.cols = orig[0].y.cols; + + d.X.vals = calloc(orig[0].X.rows, sizeof(float *)); + d.y.vals = calloc(orig[0].y.rows, sizeof(float *)); + int i; + for(i = 0; i < d.X.rows; ++i){ + d.X.vals[i] = orig[inds[i]].X.vals[i]; + d.y.vals[i] = orig[inds[i]].y.vals[i]; + } + return d; +} + +data *tile_data(data orig, int divs, int size) +{ + data *ds = calloc(divs*divs, sizeof(data)); + int i, j; +#pragma omp parallel for + for(i = 0; i < divs*divs; ++i){ + data d; + d.shallow = 0; + d.w = orig.w/divs * size; + d.h = orig.h/divs * size; + d.X.rows = orig.X.rows; + d.X.cols = d.w*d.h*3; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + + d.y = copy_matrix(orig.y); +#pragma omp parallel for + for(j = 0; j < orig.X.rows; ++j){ + int x = (i%divs) * orig.w / divs - (d.w - orig.w/divs)/2; + int y = (i/divs) * orig.h / divs - (d.h - orig.h/divs)/2; + image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[j]); + d.X.vals[j] = crop_image(im, x, y, d.w, d.h).data; + } + ds[i] = d; + } + return ds; +} + +data resize_data(data orig, int w, int h) +{ + data d = {0}; + d.shallow = 0; + d.w = w; + d.h = h; + int i; + d.X.rows = orig.X.rows; + d.X.cols = w*h*3; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + + d.y = copy_matrix(orig.y); +#pragma omp parallel for + for(i = 0; i < orig.X.rows; ++i){ + image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[i]); + d.X.vals[i] = resize_image(im, w, h).data; + } + return d; +} + +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.w=size; + d.h=size; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, center); + d.y = load_labels_paths(paths, n, labels, k, hierarchy); + if(m) free(paths); + return d; +} + +data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.w = size; + d.h = size; + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, 0); + d.y = load_tags_paths(paths, n, k); + if(m) free(paths); + return d; +} + +matrix concat_matrix(matrix m1, matrix m2) +{ + int i, count = 0; + matrix m; + m.cols = m1.cols; + m.rows = m1.rows+m2.rows; + m.vals = calloc(m1.rows + m2.rows, sizeof(float*)); + for(i = 0; i < m1.rows; ++i){ + m.vals[count++] = m1.vals[i]; + } + for(i = 0; i < m2.rows; ++i){ + m.vals[count++] = m2.vals[i]; + } + return m; +} + +data concat_data(data d1, data d2) +{ + data d = {0}; + d.shallow = 1; + d.X = concat_matrix(d1.X, d2.X); + d.y = concat_matrix(d1.y, d2.y); + d.w = d1.w; + d.h = d1.h; + return d; +} + +data concat_datas(data *d, int n) +{ + int i; + data out = {0}; + for(i = 0; i < n; ++i){ + data new = concat_data(d[i], out); + free_data(out); + out = new; + } + return out; +} + +data load_categorical_data_csv(char *filename, int target, int k) +{ + data d = {0}; + d.shallow = 0; + matrix X = csv_to_matrix(filename); + float *truth_1d = pop_column(&X, target); + float **truth = one_hot_encode(truth_1d, X.rows, k); + matrix y; + y.rows = X.rows; + y.cols = k; + y.vals = truth; + d.X = X; + d.y = y; + free(truth_1d); + return d; +} + +data load_cifar10_data(char *filename) +{ + data d = {0}; + d.shallow = 0; + long i,j; + matrix X = make_matrix(10000, 3072); + matrix y = make_matrix(10000, 10); + d.X = X; + d.y = y; + + FILE *fp = fopen(filename, "rb"); + if(!fp) file_error(filename); + for(i = 0; i < 10000; ++i){ + unsigned char bytes[3073]; + fread(bytes, 1, 3073, fp); + int class = bytes[0]; + y.vals[i][class] = 1; + for(j = 0; j < X.cols; ++j){ + X.vals[i][j] = (double)bytes[j+1]; + } + } + scale_data_rows(d, 1./255); + //normalize_data_rows(d); + fclose(fp); + return d; +} + +void get_random_batch(data d, int n, float *X, float *y) +{ + int j; + for(j = 0; j < n; ++j){ + int index = rand()%d.X.rows; + memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float)); + memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float)); + } +} + +void get_next_batch(data d, int n, int offset, float *X, float *y) +{ + int j; + for(j = 0; j < n; ++j){ + int index = offset + j; + memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float)); + if(y) memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float)); + } +} + +void smooth_data(data d) +{ + int i, j; + float scale = 1. / d.y.cols; + float eps = .1; + for(i = 0; i < d.y.rows; ++i){ + for(j = 0; j < d.y.cols; ++j){ + d.y.vals[i][j] = eps * scale + (1-eps) * d.y.vals[i][j]; + } + } +} + +data load_all_cifar10() +{ + data d = {0}; + d.shallow = 0; + int i,j,b; + matrix X = make_matrix(50000, 3072); + matrix y = make_matrix(50000, 10); + d.X = X; + d.y = y; + + + for(b = 0; b < 5; ++b){ + char buff[256]; + sprintf(buff, "data/cifar/cifar-10-batches-bin/data_batch_%d.bin", b+1); + FILE *fp = fopen(buff, "rb"); + if(!fp) file_error(buff); + for(i = 0; i < 10000; ++i){ + unsigned char bytes[3073]; + fread(bytes, 1, 3073, fp); + int class = bytes[0]; + y.vals[i+b*10000][class] = 1; + for(j = 0; j < X.cols; ++j){ + X.vals[i+b*10000][j] = (double)bytes[j+1]; + } + } + fclose(fp); + } + //normalize_data_rows(d); + scale_data_rows(d, 1./255); + smooth_data(d); + return d; +} + +data load_go(char *filename) +{ + FILE *fp = fopen(filename, "rb"); + matrix X = make_matrix(3363059, 361); + matrix y = make_matrix(3363059, 361); + int row, col; + + if(!fp) file_error(filename); + char *label; + int count = 0; + while((label = fgetl(fp))){ + int i; + if(count == X.rows){ + X = resize_matrix(X, count*2); + y = resize_matrix(y, count*2); + } + sscanf(label, "%d %d", &row, &col); + char *board = fgetl(fp); + + int index = row*19 + col; + y.vals[count][index] = 1; + + for(i = 0; i < 19*19; ++i){ + float val = 0; + if(board[i] == '1') val = 1; + else if(board[i] == '2') val = -1; + X.vals[count][i] = val; + } + ++count; + free(label); + free(board); + } + X = resize_matrix(X, count); + y = resize_matrix(y, count); + + data d = {0}; + d.shallow = 0; + d.X = X; + d.y = y; + + + fclose(fp); + + return d; +} + + +void randomize_data(data d) +{ + int i; + for(i = d.X.rows-1; i > 0; --i){ + int index = rand()%i; + float *swap = d.X.vals[index]; + d.X.vals[index] = d.X.vals[i]; + d.X.vals[i] = swap; + + swap = d.y.vals[index]; + d.y.vals[index] = d.y.vals[i]; + d.y.vals[i] = swap; + } +} + +void scale_data_rows(data d, float s) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + scale_array(d.X.vals[i], d.X.cols, s); + } +} + +void translate_data_rows(data d, float s) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + translate_array(d.X.vals[i], d.X.cols, s); + } +} + +data copy_data(data d) +{ + data c = {0}; + c.w = d.w; + c.h = d.h; + c.shallow = 0; + c.num_boxes = d.num_boxes; + c.boxes = d.boxes; + c.X = copy_matrix(d.X); + c.y = copy_matrix(d.y); + return c; +} + +void normalize_data_rows(data d) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + normalize_array(d.X.vals[i], d.X.cols); + } +} + +data get_data_part(data d, int part, int total) +{ + data p = {0}; + p.shallow = 1; + p.X.rows = d.X.rows * (part + 1) / total - d.X.rows * part / total; + p.y.rows = d.y.rows * (part + 1) / total - d.y.rows * part / total; + p.X.cols = d.X.cols; + p.y.cols = d.y.cols; + p.X.vals = d.X.vals + d.X.rows * part / total; + p.y.vals = d.y.vals + d.y.rows * part / total; + return p; +} + +data get_random_data(data d, int num) +{ + data r = {0}; + r.shallow = 1; + + r.X.rows = num; + r.y.rows = num; + + r.X.cols = d.X.cols; + r.y.cols = d.y.cols; + + r.X.vals = calloc(num, sizeof(float *)); + r.y.vals = calloc(num, sizeof(float *)); + + int i; + for(i = 0; i < num; ++i){ + int index = rand()%d.X.rows; + r.X.vals[i] = d.X.vals[index]; + r.y.vals[i] = d.y.vals[index]; + } + return r; +} + +data *split_data(data d, int part, int total) +{ + data *split = calloc(2, sizeof(data)); + int i; + int start = part*d.X.rows/total; + int end = (part+1)*d.X.rows/total; + data train; + data test; + train.shallow = test.shallow = 1; + + test.X.rows = test.y.rows = end-start; + train.X.rows = train.y.rows = d.X.rows - (end-start); + train.X.cols = test.X.cols = d.X.cols; + train.y.cols = test.y.cols = d.y.cols; + + train.X.vals = calloc(train.X.rows, sizeof(float*)); + test.X.vals = calloc(test.X.rows, sizeof(float*)); + train.y.vals = calloc(train.y.rows, sizeof(float*)); + test.y.vals = calloc(test.y.rows, sizeof(float*)); + + for(i = 0; i < start; ++i){ + train.X.vals[i] = d.X.vals[i]; + train.y.vals[i] = d.y.vals[i]; + } + for(i = start; i < end; ++i){ + test.X.vals[i-start] = d.X.vals[i]; + test.y.vals[i-start] = d.y.vals[i]; + } + for(i = end; i < d.X.rows; ++i){ + train.X.vals[i-(end-start)] = d.X.vals[i]; + train.y.vals[i-(end-start)] = d.y.vals[i]; + } + split[0] = train; + split[1] = test; + return split; +} + diff --git a/workloads/realworld/standard/darknet/src/data.h b/workloads/realworld/standard/darknet/src/data.h new file mode 100644 index 0000000000000000000000000000000000000000..781906f8743c7d88c0fa134403d0ae020b544053 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/data.h @@ -0,0 +1,50 @@ +#ifndef DATA_H +#define DATA_H +#include + +#include "darknet.h" +#include "matrix.h" +#include "list.h" +#include "image.h" +#include "tree.h" + +static inline float distance_from_edge(int x, int max) +{ + int dx = (max/2) - x; + if (dx < 0) dx = -dx; + dx = (max/2) + 1 - dx; + dx *= 2; + float dist = (float)dx/max; + if (dist > 1) dist = 1; + return dist; +} +void load_data_blocking(load_args args); + + +void print_letters(float *pred, int n); +data load_data_captcha(char **paths, int n, int m, int k, int w, int h); +data load_data_captcha_encode(char **paths, int n, int m, int w, int h); +data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure); +data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); +matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); +data load_data_super(char **paths, int n, int m, int w, int h, int scale); +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); +data load_data_regression(char **paths, int n, int m, int classes, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); +data load_go(char *filename); + + +data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h); + +void get_random_batch(data d, int n, float *X, float *y); +data get_data_part(data d, int part, int total); +data get_random_data(data d, int num); +data load_categorical_data_csv(char *filename, int target, int k); +void normalize_data_rows(data d); +void scale_data_rows(data d, float s); +void translate_data_rows(data d, float s); +void randomize_data(data d); +data *split_data(data d, int part, int total); +data concat_datas(data *d, int n); +void fill_truth(char *path, char **labels, int k, float *truth); + +#endif diff --git a/workloads/realworld/standard/darknet/src/deconvolutional_kernels.cu b/workloads/realworld/standard/darknet/src/deconvolutional_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..ed12e7a3dc5148b1cbff746f13901a9653bc0f6d --- /dev/null +++ b/workloads/realworld/standard/darknet/src/deconvolutional_kernels.cu @@ -0,0 +1,139 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "convolutional_layer.h" +#include "deconvolutional_layer.h" +#include "batchnorm_layer.h" +#include "gemm.h" +#include "blas.h" +#include "im2col.h" +#include "col2im.h" +#include "utils.h" +#include "cuda_dark.h" +} + +extern "C" void forward_deconvolutional_layer_gpu(layer l, network net) +{ + int i; + + int m = l.size*l.size*l.n; + int n = l.h*l.w; + int k = l.c; + + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + + for(i = 0; i < l.batch; ++i){ + float *a = l.weights_gpu; + float *b = net.input_gpu + i*l.c*l.h*l.w; + float *c = net.workspace; + + gemm_gpu(1,0,m,n,k,1,a,m,b,n,0,c,n); + + col2im_gpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output_gpu+i*l.outputs); + } + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); + } + activate_array_gpu(l.output_gpu, l.batch*l.n*l.out_w*l.out_h, l.activation); +} + +extern "C" void backward_deconvolutional_layer_gpu(layer l, network net) +{ + int i; + + //constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); + } + + //if(net.delta_gpu) memset(net.delta_gpu, 0, l.batch*l.h*l.w*l.c*sizeof(float)); + + for(i = 0; i < l.batch; ++i){ + int m = l.c; + int n = l.size*l.size*l.n; + int k = l.h*l.w; + + float *a = net.input_gpu + i*m*k; + float *b = net.workspace; + float *c = l.weight_updates_gpu; + + im2col_gpu(l.delta_gpu + i*l.outputs, l.out_c, l.out_h, l.out_w, + l.size, l.stride, l.pad, b); + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if(net.delta_gpu){ + int m = l.c; + int n = l.h*l.w; + int k = l.size*l.size*l.n; + + float *a = l.weights_gpu; + float *b = net.workspace; + float *c = net.delta_gpu + i*n*m; + + gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +} + +extern "C" void pull_deconvolutional_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size); + cuda_pull_array(l.biases_gpu, l.biases, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.n); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +extern "C" void push_deconvolutional_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size); + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.n); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void update_deconvolutional_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + } + }else{ + axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1); + + axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.n, momentum, l.bias_updates_gpu, 1); + + if(l.scales_gpu){ + axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.n, momentum, l.scale_updates_gpu, 1); + } + } +} + diff --git a/workloads/realworld/standard/darknet/src/deconvolutional_layer.c b/workloads/realworld/standard/darknet/src/deconvolutional_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..00c0e85771d42f99de969f9fd03e5f0f359d405c --- /dev/null +++ b/workloads/realworld/standard/darknet/src/deconvolutional_layer.c @@ -0,0 +1,312 @@ +#include "deconvolutional_layer.h" +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "utils.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" + +#include +#include + + +static size_t get_workspace_size(layer l){ + return (size_t)l.h*l.w*l.size*l.size*l.n*sizeof(float); +} + +void bilinear_init(layer l) +{ + int i,j,f; + float center = (l.size-1) / 2.; + for(f = 0; f < l.n; ++f){ + for(j = 0; j < l.size; ++j){ + for(i = 0; i < l.size; ++i){ + float val = (1 - fabs(i - center)) * (1 - fabs(j - center)); + int c = f%l.c; + int ind = f*l.size*l.size*l.c + c*l.size*l.size + j*l.size + i; + l.weights[ind] = val; + } + } + } +} + + +layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam) +{ + int i; + layer l = {0}; + l.type = DECONVOLUTIONAL; + + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.batch = batch; + l.stride = stride; + l.size = size; + + l.nweights = c*n*size*size; + l.nbiases = n; + + l.weights = calloc(c*n*size*size, sizeof(float)); + l.weight_updates = calloc(c*n*size*size, sizeof(float)); + + l.biases = calloc(n, sizeof(float)); + l.bias_updates = calloc(n, sizeof(float)); + //float scale = n/(size*size*c); + //printf("scale: %f\n", scale); + float scale = .02; + for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal(); + //bilinear_init(l); + for(i = 0; i < n; ++i){ + l.biases[i] = 0; + } + l.pad = padding; + + l.out_h = (l.h - 1) * l.stride + l.size - 2*l.pad; + l.out_w = (l.w - 1) * l.stride + l.size - 2*l.pad; + l.out_c = n; + l.outputs = l.out_w * l.out_h * l.out_c; + l.inputs = l.w * l.h * l.c; + + scal_cpu(l.nweights, (float)l.out_w*l.out_h/(l.w*l.h), l.weights, 1); + + l.output = calloc(l.batch*l.outputs, sizeof(float)); + l.delta = calloc(l.batch*l.outputs, sizeof(float)); + + l.forward = forward_deconvolutional_layer; + l.backward = backward_deconvolutional_layer; + l.update = update_deconvolutional_layer; + + l.batch_normalize = batch_normalize; + + if(batch_normalize){ + l.scales = calloc(n, sizeof(float)); + l.scale_updates = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(n, sizeof(float)); + l.variance = calloc(n, sizeof(float)); + + l.mean_delta = calloc(n, sizeof(float)); + l.variance_delta = calloc(n, sizeof(float)); + + l.rolling_mean = calloc(n, sizeof(float)); + l.rolling_variance = calloc(n, sizeof(float)); + l.x = calloc(l.batch*l.outputs, sizeof(float)); + l.x_norm = calloc(l.batch*l.outputs, sizeof(float)); + } + if(adam){ + l.m = calloc(c*n*size*size, sizeof(float)); + l.v = calloc(c*n*size*size, sizeof(float)); + l.bias_m = calloc(n, sizeof(float)); + l.scale_m = calloc(n, sizeof(float)); + l.bias_v = calloc(n, sizeof(float)); + l.scale_v = calloc(n, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_deconvolutional_layer_gpu; + l.backward_gpu = backward_deconvolutional_layer_gpu; + l.update_gpu = update_deconvolutional_layer_gpu; + + if(gpu_index >= 0){ + + if (adam) { + l.m_gpu = cuda_make_array(l.m, c*n*size*size); + l.v_gpu = cuda_make_array(l.v, c*n*size*size); + l.bias_m_gpu = cuda_make_array(l.bias_m, n); + l.bias_v_gpu = cuda_make_array(l.bias_v, n); + l.scale_m_gpu = cuda_make_array(l.scale_m, n); + l.scale_v_gpu = cuda_make_array(l.scale_v, n); + } + l.weights_gpu = cuda_make_array(l.weights, c*n*size*size); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size); + + l.biases_gpu = cuda_make_array(l.biases, n); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*l.out_h*l.out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n); + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(0, n); + l.variance_gpu = cuda_make_array(0, n); + + l.rolling_mean_gpu = cuda_make_array(0, n); + l.rolling_variance_gpu = cuda_make_array(0, n); + + l.mean_delta_gpu = cuda_make_array(0, n); + l.variance_delta_gpu = cuda_make_array(0, n); + + l.scales_gpu = cuda_make_array(l.scales, n); + l.scale_updates_gpu = cuda_make_array(0, n); + + l.x_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n); + l.x_norm_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n); + } + } + #ifdef CUDNN + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); + #endif +#endif + + l.activation = activation; + l.workspace_size = get_workspace_size(l); + + fprintf(stderr, "deconv%5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); + + return l; +} + +void denormalize_deconvolutional_layer(layer l) +{ + int i, j; + for(i = 0; i < l.n; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001); + for(j = 0; j < l.c*l.size*l.size; ++j){ + l.weights[i*l.c*l.size*l.size + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + +void resize_deconvolutional_layer(layer *l, int h, int w) +{ + l->h = h; + l->w = w; + l->out_h = (l->h - 1) * l->stride + l->size - 2*l->pad; + l->out_w = (l->w - 1) * l->stride + l->size - 2*l->pad; + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->w * l->h * l->c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + if(l->batch_normalize){ + l->x = realloc(l->x, l->batch*l->outputs*sizeof(float)); + l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float)); + } + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); + + if(l->batch_normalize){ + cuda_free(l->x_gpu); + cuda_free(l->x_norm_gpu); + + l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); + } + #ifdef CUDNN + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + #endif +#endif + l->workspace_size = get_workspace_size(*l); +} + +void forward_deconvolutional_layer(const layer l, network net) +{ + int i; + + int m = l.size*l.size*l.n; + int n = l.h*l.w; + int k = l.c; + + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + + for(i = 0; i < l.batch; ++i){ + float *a = l.weights; + float *b = net.input + i*l.c*l.h*l.w; + float *c = net.workspace; + + gemm_cpu(1,0,m,n,k,1,a,m,b,n,0,c,n); + + col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output+i*l.outputs); + } + if (l.batch_normalize) { + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.n, l.out_w*l.out_h); + } + activate_array(l.output, l.batch*l.n*l.out_w*l.out_h, l.activation); +} + +void backward_deconvolutional_layer(layer l, network net) +{ + int i; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h); + } + + //if(net.delta) memset(net.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float)); + + for(i = 0; i < l.batch; ++i){ + int m = l.c; + int n = l.size*l.size*l.n; + int k = l.h*l.w; + + float *a = net.input + i*m*k; + float *b = net.workspace; + float *c = l.weight_updates; + + im2col_cpu(l.delta + i*l.outputs, l.out_c, l.out_h, l.out_w, + l.size, l.stride, l.pad, b); + gemm_cpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if(net.delta){ + int m = l.c; + int n = l.h*l.w; + int k = l.size*l.size*l.n; + + float *a = l.weights; + float *b = net.workspace; + float *c = net.delta + i*n*m; + + gemm_cpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +} + +void update_deconvolutional_layer(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int size = l.size*l.size*l.c*l.n; + axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.n, momentum, l.bias_updates, 1); + + if(l.scales){ + axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.n, momentum, l.scale_updates, 1); + } + + axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(size, momentum, l.weight_updates, 1); +} + + + diff --git a/workloads/realworld/standard/darknet/src/deconvolutional_layer.h b/workloads/realworld/standard/darknet/src/deconvolutional_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..722a1a58feec4ef13dac2b811df98e3f9960d4ef --- /dev/null +++ b/workloads/realworld/standard/darknet/src/deconvolutional_layer.h @@ -0,0 +1,25 @@ +#ifndef DECONVOLUTIONAL_LAYER_H +#define DECONVOLUTIONAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +#ifdef GPU +void forward_deconvolutional_layer_gpu(layer l, network net); +void backward_deconvolutional_layer_gpu(layer l, network net); +void update_deconvolutional_layer_gpu(layer l, update_args a); +void push_deconvolutional_layer(layer l); +void pull_deconvolutional_layer(layer l); +#endif + +layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam); +void resize_deconvolutional_layer(layer *l, int h, int w); +void forward_deconvolutional_layer(const layer l, network net); +void update_deconvolutional_layer(layer l, update_args a); +void backward_deconvolutional_layer(layer l, network net); + +#endif + diff --git a/workloads/realworld/standard/darknet/src/demo.c b/workloads/realworld/standard/darknet/src/demo.c new file mode 100644 index 0000000000000000000000000000000000000000..b89efb8dc4c044c0240b7442e39222405409a676 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/demo.c @@ -0,0 +1,349 @@ +#include "network.h" +#include "detection_layer.h" +#include "region_layer.h" +#include "cost_layer.h" +#include "utils.h" +#include "parser.h" +#include "box.h" +#include "image.h" +#include "demo.h" +#include + +#define DEMO 1 + +#ifdef OPENCV + +static char **demo_names; +static image **demo_alphabet; +static int demo_classes; + +static network *net; +static image buff [3]; +static image buff_letter[3]; +static int buff_index = 0; +static void * cap; +static float fps = 0; +static float demo_thresh = 0; +static float demo_hier = .5; +static int running = 0; + +static int demo_frame = 3; +static int demo_index = 0; +static float **predictions; +static float *avg; +static int demo_done = 0; +static int demo_total = 0; +double demo_time; + +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num); + +int size_network(network *net) +{ + int i; + int count = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + count += l.outputs; + } + } + return count; +} + +void remember_network(network *net) +{ + int i; + int count = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + memcpy(predictions[demo_index] + count, net->layers[i].output, sizeof(float) * l.outputs); + count += l.outputs; + } + } +} + +detection *avg_predictions(network *net, int *nboxes) +{ + int i, j; + int count = 0; + fill_cpu(demo_total, 0, avg, 1); + for(j = 0; j < demo_frame; ++j){ + axpy_cpu(demo_total, 1./demo_frame, predictions[j], 1, avg, 1); + } + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + memcpy(l.output, avg + count, sizeof(float) * l.outputs); + count += l.outputs; + } + } + detection *dets = get_network_boxes(net, buff[0].w, buff[0].h, demo_thresh, demo_hier, 0, 1, nboxes); + return dets; +} + +void *detect_in_thread(void *ptr) +{ + running = 1; + float nms = .4; + + layer l = net->layers[net->n-1]; + float *X = buff_letter[(buff_index+2)%3].data; + network_predict(net, X); + + /* + if(l.type == DETECTION){ + get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0); + } else */ + remember_network(net); + detection *dets = 0; + int nboxes = 0; + dets = avg_predictions(net, &nboxes); + + + /* + int i,j; + box zero = {0}; + int classes = l.classes; + for(i = 0; i < demo_detections; ++i){ + avg[i].objectness = 0; + avg[i].bbox = zero; + memset(avg[i].prob, 0, classes*sizeof(float)); + for(j = 0; j < demo_frame; ++j){ + axpy_cpu(classes, 1./demo_frame, dets[j][i].prob, 1, avg[i].prob, 1); + avg[i].objectness += dets[j][i].objectness * 1./demo_frame; + avg[i].bbox.x += dets[j][i].bbox.x * 1./demo_frame; + avg[i].bbox.y += dets[j][i].bbox.y * 1./demo_frame; + avg[i].bbox.w += dets[j][i].bbox.w * 1./demo_frame; + avg[i].bbox.h += dets[j][i].bbox.h * 1./demo_frame; + } + //copy_cpu(classes, dets[0][i].prob, 1, avg[i].prob, 1); + //avg[i].objectness = dets[0][i].objectness; + } + */ + + if (nms > 0) do_nms_obj(dets, nboxes, l.classes, nms); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.1f\n",fps); + printf("Objects:\n\n"); + image display = buff[(buff_index+2) % 3]; + draw_detections(display, dets, nboxes, demo_thresh, demo_names, demo_alphabet, demo_classes); + free_detections(dets, nboxes); + + demo_index = (demo_index + 1)%demo_frame; + running = 0; + return 0; +} + +void *fetch_in_thread(void *ptr) +{ + free_image(buff[buff_index]); + buff[buff_index] = get_image_from_stream(cap); + if(buff[buff_index].data == 0) { + demo_done = 1; + return 0; + } + letterbox_image_into(buff[buff_index], net->w, net->h, buff_letter[buff_index]); + return 0; +} + +void *display_in_thread(void *ptr) +{ + int c = show_image(buff[(buff_index + 1)%3], "Demo", 1); + if (c != -1) c = c%256; + if (c == 27) { + demo_done = 1; + return 0; + } else if (c == 82) { + demo_thresh += .02; + } else if (c == 84) { + demo_thresh -= .02; + if(demo_thresh <= .02) demo_thresh = .02; + } else if (c == 83) { + demo_hier += .02; + } else if (c == 81) { + demo_hier -= .02; + if(demo_hier <= .0) demo_hier = .0; + } + return 0; +} + +void *display_loop(void *ptr) +{ + while(1){ + display_in_thread(0); + } +} + +void *detect_loop(void *ptr) +{ + while(1){ + detect_in_thread(0); + } +} + +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) +{ + //demo_frame = avg_frames; + image **alphabet = load_alphabet(); + demo_names = names; + demo_alphabet = alphabet; + demo_classes = classes; + demo_thresh = thresh; + demo_hier = hier; + printf("Demo\n"); + net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + pthread_t detect_thread; + pthread_t fetch_thread; + + srand(2222222); + + int i; + demo_total = size_network(net); + predictions = calloc(demo_frame, sizeof(float*)); + for (i = 0; i < demo_frame; ++i){ + predictions[i] = calloc(demo_total, sizeof(float)); + } + avg = calloc(demo_total, sizeof(float)); + + if(filename){ + printf("video file: %s\n", filename); + cap = open_video_stream(filename, 0, 0, 0, 0); + }else{ + cap = open_video_stream(0, cam_index, w, h, frames); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + + buff[0] = get_image_from_stream(cap); + buff[1] = copy_image(buff[0]); + buff[2] = copy_image(buff[0]); + buff_letter[0] = letterbox_image(buff[0], net->w, net->h); + buff_letter[1] = letterbox_image(buff[0], net->w, net->h); + buff_letter[2] = letterbox_image(buff[0], net->w, net->h); + + int count = 0; + if(!prefix){ + make_window("Demo", 1352, 1013, fullscreen); + } + + demo_time = what_time_is_it_now(); + + while(!demo_done){ + buff_index = (buff_index + 1) %3; + if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); + if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); + if(!prefix){ + fps = 1./(what_time_is_it_now() - demo_time); + demo_time = what_time_is_it_now(); + display_in_thread(0); + }else{ + char name[256]; + sprintf(name, "%s_%08d", prefix, count); + save_image(buff[(buff_index + 1)%3], name); + } + pthread_join(fetch_thread, 0); + pthread_join(detect_thread, 0); + ++count; + } +} + +/* + void demo_compare(char *cfg1, char *weight1, char *cfg2, char *weight2, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) + { + demo_frame = avg_frames; + predictions = calloc(demo_frame, sizeof(float*)); + image **alphabet = load_alphabet(); + demo_names = names; + demo_alphabet = alphabet; + demo_classes = classes; + demo_thresh = thresh; + demo_hier = hier; + printf("Demo\n"); + net = load_network(cfg1, weight1, 0); + set_batch_network(net, 1); + pthread_t detect_thread; + pthread_t fetch_thread; + + srand(2222222); + + if(filename){ + printf("video file: %s\n", filename); + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + if(frames){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FPS, frames); + } + } + + if(!cap) error("Couldn't connect to webcam.\n"); + + layer l = net->layers[net->n-1]; + demo_detections = l.n*l.w*l.h; + int j; + + avg = (float *) calloc(l.outputs, sizeof(float)); + for(j = 0; j < demo_frame; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float)); + + boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box)); + probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *)); + for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes+1, sizeof(float)); + + buff[0] = get_image_from_stream(cap); + buff[1] = copy_image(buff[0]); + buff[2] = copy_image(buff[0]); + buff_letter[0] = letterbox_image(buff[0], net->w, net->h); + buff_letter[1] = letterbox_image(buff[0], net->w, net->h); + buff_letter[2] = letterbox_image(buff[0], net->w, net->h); + ipl = cvCreateImage(cvSize(buff[0].w,buff[0].h), IPL_DEPTH_8U, buff[0].c); + + int count = 0; + if(!prefix){ + cvNamedWindow("Demo", CV_WINDOW_NORMAL); + if(fullscreen){ + cvSetWindowProperty("Demo", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); + } else { + cvMoveWindow("Demo", 0, 0); + cvResizeWindow("Demo", 1352, 1013); + } + } + + demo_time = what_time_is_it_now(); + + while(!demo_done){ +buff_index = (buff_index + 1) %3; +if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); +if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); +if(!prefix){ + fps = 1./(what_time_is_it_now() - demo_time); + demo_time = what_time_is_it_now(); + display_in_thread(0); +}else{ + char name[256]; + sprintf(name, "%s_%08d", prefix, count); + save_image(buff[(buff_index + 1)%3], name); +} +pthread_join(fetch_thread, 0); +pthread_join(detect_thread, 0); +++count; +} +} +*/ +#else +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg, float hier, int w, int h, int frames, int fullscreen) +{ + fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); +} +#endif + diff --git a/workloads/realworld/standard/darknet/src/demo.h b/workloads/realworld/standard/darknet/src/demo.h new file mode 100644 index 0000000000000000000000000000000000000000..86e46541d1a7473b22373b29bc6ff9cc281d4939 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/demo.h @@ -0,0 +1,6 @@ +#ifndef DEMO_H +#define DEMO_H + +#include "image.h" + +#endif diff --git a/workloads/realworld/standard/darknet/src/detection_layer.c b/workloads/realworld/standard/darknet/src/detection_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..565fa3c3f7d123736d65661d3be8ea91e26b3d5c --- /dev/null +++ b/workloads/realworld/standard/darknet/src/detection_layer.c @@ -0,0 +1,275 @@ +#include "detection_layer.h" +#include "activations.h" +#include "softmax_layer.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +detection_layer make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore) +{ + detection_layer l = {0}; + l.type = DETECTION; + + l.n = n; + l.batch = batch; + l.inputs = inputs; + l.classes = classes; + l.coords = coords; + l.rescore = rescore; + l.side = side; + l.w = side; + l.h = side; + assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs); + l.cost = calloc(1, sizeof(float)); + l.outputs = l.inputs; + l.truths = l.side*l.side*(1+l.coords+l.classes); + l.output = calloc(batch*l.outputs, sizeof(float)); + l.delta = calloc(batch*l.outputs, sizeof(float)); + + l.forward = forward_detection_layer; + l.backward = backward_detection_layer; +#ifdef GPU + l.forward_gpu = forward_detection_layer_gpu; + l.backward_gpu = backward_detection_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "Detection Layer\n"); + srand(0); + + return l; +} + +void forward_detection_layer(const detection_layer l, network net) +{ + int locations = l.side*l.side; + int i,j; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + //if(l.reorg) reorg(l.output, l.w*l.h, size*l.n, l.batch, 1); + int b; + if (l.softmax){ + for(b = 0; b < l.batch; ++b){ + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + int offset = i*l.classes; + softmax(l.output + index + offset, l.classes, 1, 1, + l.output + index + offset); + } + } + } + if(net.train){ + float avg_iou = 0; + float avg_cat = 0; + float avg_allcat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + *(l.cost) = 0; + int size = l.inputs * l.batch; + memset(l.delta, 0, size * sizeof(float)); + for (b = 0; b < l.batch; ++b){ + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + int truth_index = (b*locations + i)*(1+l.coords+l.classes); + int is_obj = net.truth[truth_index]; + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + l.delta[p_index] = l.noobject_scale*(0 - l.output[p_index]); + *(l.cost) += l.noobject_scale*pow(l.output[p_index], 2); + avg_anyobj += l.output[p_index]; + } + + int best_index = -1; + float best_iou = 0; + float best_rmse = 20; + + if (!is_obj){ + continue; + } + + int class_index = index + i*l.classes; + for(j = 0; j < l.classes; ++j) { + l.delta[class_index+j] = l.class_scale * (net.truth[truth_index+1+j] - l.output[class_index+j]); + *(l.cost) += l.class_scale * pow(net.truth[truth_index+1+j] - l.output[class_index+j], 2); + if(net.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j]; + avg_allcat += l.output[class_index+j]; + } + + box truth = float_to_box(net.truth + truth_index + 1 + l.classes, 1); + truth.x /= l.side; + truth.y /= l.side; + + for(j = 0; j < l.n; ++j){ + int box_index = index + locations*(l.classes + l.n) + (i*l.n + j) * l.coords; + box out = float_to_box(l.output + box_index, 1); + out.x /= l.side; + out.y /= l.side; + + if (l.sqrt){ + out.w = out.w*out.w; + out.h = out.h*out.h; + } + + float iou = box_iou(out, truth); + //iou = 0; + float rmse = box_rmse(out, truth); + if(best_iou > 0 || iou > 0){ + if(iou > best_iou){ + best_iou = iou; + best_index = j; + } + }else{ + if(rmse < best_rmse){ + best_rmse = rmse; + best_index = j; + } + } + } + + if(l.forced){ + if(truth.w*truth.h < .1){ + best_index = 1; + }else{ + best_index = 0; + } + } + if(l.random && *(net.seen) < 64000){ + best_index = rand()%l.n; + } + + int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords; + int tbox_index = truth_index + 1 + l.classes; + + box out = float_to_box(l.output + box_index, 1); + out.x /= l.side; + out.y /= l.side; + if (l.sqrt) { + out.w = out.w*out.w; + out.h = out.h*out.h; + } + float iou = box_iou(out, truth); + + //printf("%d,", best_index); + int p_index = index + locations*l.classes + i*l.n + best_index; + *(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2); + *(l.cost) += l.object_scale * pow(1-l.output[p_index], 2); + avg_obj += l.output[p_index]; + l.delta[p_index] = l.object_scale * (1.-l.output[p_index]); + + if(l.rescore){ + l.delta[p_index] = l.object_scale * (iou - l.output[p_index]); + } + + l.delta[box_index+0] = l.coord_scale*(net.truth[tbox_index + 0] - l.output[box_index + 0]); + l.delta[box_index+1] = l.coord_scale*(net.truth[tbox_index + 1] - l.output[box_index + 1]); + l.delta[box_index+2] = l.coord_scale*(net.truth[tbox_index + 2] - l.output[box_index + 2]); + l.delta[box_index+3] = l.coord_scale*(net.truth[tbox_index + 3] - l.output[box_index + 3]); + if(l.sqrt){ + l.delta[box_index+2] = l.coord_scale*(sqrt(net.truth[tbox_index + 2]) - l.output[box_index + 2]); + l.delta[box_index+3] = l.coord_scale*(sqrt(net.truth[tbox_index + 3]) - l.output[box_index + 3]); + } + + *(l.cost) += pow(1-iou, 2); + avg_iou += iou; + ++count; + } + } + + if(0){ + float *costs = calloc(l.batch*locations*l.n, sizeof(float)); + for (b = 0; b < l.batch; ++b) { + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + costs[b*locations*l.n + i*l.n + j] = l.delta[p_index]*l.delta[p_index]; + } + } + } + int indexes[100]; + top_k(costs, l.batch*locations*l.n, 100, indexes); + float cutoff = costs[indexes[99]]; + for (b = 0; b < l.batch; ++b) { + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + if (l.delta[p_index]*l.delta[p_index] < cutoff) l.delta[p_index] = 0; + } + } + } + free(costs); + } + + + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + + + printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count); + //if(l.reorg) reorg(l.delta, l.w*l.h, size*l.n, l.batch, 0); + } +} + +void backward_detection_layer(const detection_layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +void get_detection_detections(layer l, int w, int h, float thresh, detection *dets) +{ + int i,j,n; + float *predictions = l.output; + //int per_cell = 5*num+classes; + for (i = 0; i < l.side*l.side; ++i){ + int row = i / l.side; + int col = i % l.side; + for(n = 0; n < l.n; ++n){ + int index = i*l.n + n; + int p_index = l.side*l.side*l.classes + i*l.n + n; + float scale = predictions[p_index]; + int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n)*4; + box b; + b.x = (predictions[box_index + 0] + col) / l.side * w; + b.y = (predictions[box_index + 1] + row) / l.side * h; + b.w = pow(predictions[box_index + 2], (l.sqrt?2:1)) * w; + b.h = pow(predictions[box_index + 3], (l.sqrt?2:1)) * h; + dets[index].bbox = b; + dets[index].objectness = scale; + for(j = 0; j < l.classes; ++j){ + int class_index = i*l.classes; + float prob = scale*predictions[class_index+j]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } + } +} + +#ifdef GPU + +void forward_detection_layer_gpu(const detection_layer l, network net) +{ + if(!net.train){ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + return; + } + + cuda_pull_array(net.input_gpu, net.input, l.batch*l.inputs); + forward_detection_layer(l, net); + cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +void backward_detection_layer_gpu(detection_layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); + //copy_gpu(l.batch*l.inputs, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/standard/darknet/src/detection_layer.h b/workloads/realworld/standard/darknet/src/detection_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1c818535700c770c7a5d9387534b199b58876198 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/detection_layer.h @@ -0,0 +1,18 @@ +#ifndef DETECTION_LAYER_H +#define DETECTION_LAYER_H + +#include "layer.h" +#include "network.h" + +typedef layer detection_layer; + +detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore); +void forward_detection_layer(const detection_layer l, network net); +void backward_detection_layer(const detection_layer l, network net); + +#ifdef GPU +void forward_detection_layer_gpu(const detection_layer l, network net); +void backward_detection_layer_gpu(detection_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/standard/darknet/src/dropout_layer.c b/workloads/realworld/standard/darknet/src/dropout_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..8fefa22caeddd174b2a7010274fadae854c742c1 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/dropout_layer.c @@ -0,0 +1,60 @@ +#include "dropout_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include +#include + +dropout_layer make_dropout_layer(int batch, int inputs, float probability) +{ + dropout_layer l = {0}; + l.type = DROPOUT; + l.probability = probability; + l.inputs = inputs; + l.outputs = inputs; + l.batch = batch; + l.rand = calloc(inputs*batch, sizeof(float)); + l.scale = 1./(1.-probability); + l.forward = forward_dropout_layer; + l.backward = backward_dropout_layer; + #ifdef GPU + l.forward_gpu = forward_dropout_layer_gpu; + l.backward_gpu = backward_dropout_layer_gpu; + l.rand_gpu = cuda_make_array(l.rand, inputs*batch); + #endif + fprintf(stderr, "dropout p = %.2f %4d -> %4d\n", probability, inputs, inputs); + return l; +} + +void resize_dropout_layer(dropout_layer *l, int inputs) +{ + l->rand = realloc(l->rand, l->inputs*l->batch*sizeof(float)); + #ifdef GPU + cuda_free(l->rand_gpu); + + l->rand_gpu = cuda_make_array(l->rand, inputs*l->batch); + #endif +} + +void forward_dropout_layer(dropout_layer l, network net) +{ + int i; + if (!net.train) return; + for(i = 0; i < l.batch * l.inputs; ++i){ + float r = rand_uniform(0, 1); + l.rand[i] = r; + if(r < l.probability) net.input[i] = 0; + else net.input[i] *= l.scale; + } +} + +void backward_dropout_layer(dropout_layer l, network net) +{ + int i; + if(!net.delta) return; + for(i = 0; i < l.batch * l.inputs; ++i){ + float r = l.rand[i]; + if(r < l.probability) net.delta[i] = 0; + else net.delta[i] *= l.scale; + } +} + diff --git a/workloads/realworld/standard/darknet/src/dropout_layer.h b/workloads/realworld/standard/darknet/src/dropout_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..01f94d4d7d10b732fb0e558089579e95128a70bd --- /dev/null +++ b/workloads/realworld/standard/darknet/src/dropout_layer.h @@ -0,0 +1,20 @@ +#ifndef DROPOUT_LAYER_H +#define DROPOUT_LAYER_H + +#include "layer.h" +#include "network.h" + +typedef layer dropout_layer; + +dropout_layer make_dropout_layer(int batch, int inputs, float probability); + +void forward_dropout_layer(dropout_layer l, network net); +void backward_dropout_layer(dropout_layer l, network net); +void resize_dropout_layer(dropout_layer *l, int inputs); + +#ifdef GPU +void forward_dropout_layer_gpu(dropout_layer l, network net); +void backward_dropout_layer_gpu(dropout_layer l, network net); + +#endif +#endif diff --git a/workloads/realworld/standard/darknet/src/dropout_layer_kernels.cu b/workloads/realworld/standard/darknet/src/dropout_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..23aef8f12ffc390504e05f0839878f7787a5381f --- /dev/null +++ b/workloads/realworld/standard/darknet/src/dropout_layer_kernels.cu @@ -0,0 +1,41 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "dropout_layer.h" +#include "cuda_dark.h" +#include "utils.h" +} + +__global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale; +} + +void forward_dropout_layer_gpu(dropout_layer layer, network net) +{ + if (!net.train) return; + int size = layer.inputs*layer.batch; + cuda_random(layer.rand_gpu, size); + /* + int i; + for(i = 0; i < size; ++i){ + layer.rand[i] = rand_uniform(); + } + cuda_push_array(layer.rand_gpu, layer.rand, size); + */ + + yoloswag420blazeit360noscope<<>>(net.input_gpu, size, layer.rand_gpu, layer.probability, layer.scale); + check_error(cudaPeekAtLastError()); +} + +void backward_dropout_layer_gpu(dropout_layer layer, network net) +{ + if(!net.delta_gpu) return; + int size = layer.inputs*layer.batch; + + yoloswag420blazeit360noscope<<>>(net.delta_gpu, size, layer.rand_gpu, layer.probability, layer.scale); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/standard/darknet/src/gemm.c b/workloads/realworld/standard/darknet/src/gemm.c new file mode 100644 index 0000000000000000000000000000000000000000..756ae595d7348fc2d343a48715b05ea882d6aa7c --- /dev/null +++ b/workloads/realworld/standard/darknet/src/gemm.c @@ -0,0 +1,324 @@ +#include "gemm.h" +#include "utils.h" +#include "cuda_dark.h" +#include +#include +#include + +void gemm_bin(int M, int N, int K, float ALPHA, + char *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + char A_PART = A[i*lda+k]; + if(A_PART){ + for(j = 0; j < N; ++j){ + C[i*ldc+j] += B[k*ldb+j]; + } + } else { + for(j = 0; j < N; ++j){ + C[i*ldc+j] -= B[k*ldb+j]; + } + } + } + } +} + +float *random_matrix(int rows, int cols) +{ + int i; + float *m = calloc(rows*cols, sizeof(float)); + for(i = 0; i < rows*cols; ++i){ + m[i] = (float)rand()/RAND_MAX; + } + return m; +} + +void time_random_matrix(int TA, int TB, int m, int k, int n) +{ + float *a; + if(!TA) a = random_matrix(m,k); + else a = random_matrix(k,m); + int lda = (!TA)?k:m; + float *b; + if(!TB) b = random_matrix(k,n); + else b = random_matrix(n,k); + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + int i; + clock_t start = clock(), end; + for(i = 0; i<10; ++i){ + gemm_cpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); + } + end = clock(); + printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf ms\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); + free(a); + free(b); + free(c); +} + + +void gemm(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + gemm_cpu( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); +} + +void gemm_nn(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + register float A_PART = ALPHA*A[i*lda+k]; + for(j = 0; j < N; ++j){ + C[i*ldc+j] += A_PART*B[k*ldb+j]; + } + } + } +} + +void gemm_nt(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + register float sum = 0; + for(k = 0; k < K; ++k){ + sum += ALPHA*A[i*lda+k]*B[j*ldb + k]; + } + C[i*ldc+j] += sum; + } + } +} + +void gemm_tn(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + register float A_PART = ALPHA*A[k*lda+i]; + for(j = 0; j < N; ++j){ + C[i*ldc+j] += A_PART*B[k*ldb+j]; + } + } + } +} + +void gemm_tt(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + register float sum = 0; + for(k = 0; k < K; ++k){ + sum += ALPHA*A[i+k*lda]*B[k+j*ldb]; + } + C[i*ldc+j] += sum; + } + } +} + + +void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + //printf("cpu: %d %d %d %d %d %f %d %d %f %d\n",TA, TB, M, N, K, ALPHA, lda, ldb, BETA, ldc); + int i, j; + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + C[i*ldc + j] *= BETA; + } + } + if(!TA && !TB) + gemm_nn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else if(TA && !TB) + gemm_tn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else if(!TA && TB) + gemm_nt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else + gemm_tt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); +} + + +// #ifdef GPU + +// #include + +// void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, +// float *A_gpu, int lda, +// float *B_gpu, int ldb, +// float BETA, +// float *C_gpu, int ldc) +// { +// cublasHandle_t handle = blas_handle(); +// cudaError_t status = (cudaError_t) cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N), +// (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc); +// check_error(status); +// } + +// #include +// #include +// #include +// #include + +// void time_gpu_random_matrix(int TA, int TB, int m, int k, int n) +// { +// float *a; +// if(!TA) a = random_matrix(m,k); +// else a = random_matrix(k,m); +// int lda = (!TA)?k:m; +// float *b; +// if(!TB) b = random_matrix(k,n); +// else b = random_matrix(n,k); +// int ldb = (!TB)?n:k; + +// float *c = random_matrix(m,n); +// int i; +// clock_t start = clock(), end; +// for(i = 0; i<32; ++i){ +// gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); +// } +// end = clock(); +// printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); +// free(a); +// free(b); +// free(c); +// } + +// void time_gpu(int TA, int TB, int m, int k, int n) +// { +// int iter = 10; +// float *a = random_matrix(m,k); +// float *b = random_matrix(k,n); + +// int lda = (!TA)?k:m; +// int ldb = (!TB)?n:k; + +// float *c = random_matrix(m,n); + +// float *a_cl = cuda_make_array(a, m*k); +// float *b_cl = cuda_make_array(b, k*n); +// float *c_cl = cuda_make_array(c, m*n); + +// int i; +// clock_t start = clock(), end; +// for(i = 0; i +#include +#include +#include +#include + +#include "gemm.h" +#include "utils.h" +#include "cuda_dark.h" + +#include +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK_X 16 +#define DIM_THREAD_BLOCK_Y 16 + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +__global__ void gemm_kernel(float *a, float *b, float *c, int M, int K, int N, float alpha, float beta) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // Compute each thread's global row and column index + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + + + // Statically allocated shared memory + __shared__ float s_a[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + __shared__ float s_b[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + // Accumulate in temporary variable + + float tmp = 0.0f; + if (row < M && col < N) { + + tmp = beta * c[row * N + col]; + + // Sweep tile across matrix + for (int i = 0; i < K; i += blockDim.x) { + int left = K - i; + + if ((i + threadIdx.x) < K) + s_a[threadIdx.y * blockDim.x + threadIdx.x] = a[row * K + i + threadIdx.x]; + + if ((i + threadIdx.y) < K) + s_b[threadIdx.y * blockDim.x + threadIdx.x] = b[(i + threadIdx.y) * N + col]; + + block.sync(); + + for (int k = 0; k < blockDim.x && k < left ; k++) { + tmp += alpha * s_a[threadIdx.y * blockDim.x + k] * s_b[k * blockDim.x + threadIdx.x]; + } + block.sync(); + } + c[row * N + col] = tmp; + } + block.sync(); +} + +void gemmCuda(float *A, float *B, float *C, int M, int N, int K, float alpha, float beta) +{ + // float *A_gpu; + // float *B_gpu; + // float *C_gpu; + + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + dim3 grid((size_t)(ceil(((float)N) / ((float)block.y))), (size_t)(ceil(((float)M) / ((float)block.x)))); + + // cudaMalloc(&A_gpu, sizeof(float) * M * K); + // cudaMalloc(&B_gpu, sizeof(float) * K * N); + // cudaMalloc(&C_gpu, sizeof(float) * M * N); + + // cudaMemcpy(A_gpu, A, sizeof(float) * M * K, cudaMemcpyHostToDevice); + // cudaMemcpy(B_gpu, B, sizeof(float) * K * N, cudaMemcpyHostToDevice); + // cudaMemcpy(C_gpu, C, sizeof(float) * M * N, cudaMemcpyHostToDevice); + gemm_kernel<<>>(A, B, C, M, K, N, alpha, beta); + // cudaDeviceSynchronize(); + // cudaMemcpy(C, C_gpu, sizeof(float) * M * N, cudaMemcpyDeviceToHost); + + // cudaFree(A_gpu); + // cudaFree(B_gpu); + // cudaFree(C_gpu); +} + +// void gemmCuda(float *A, float *B, float *C, int M, int N, int K, float alpha, float beta) +// { +// float *A_gpu; +// float *B_gpu; +// float *C_gpu; + +// dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); +// dim3 grid((size_t)(ceil(((float)N) / ((float)block.y))), (size_t)(ceil(((float)M) / ((float)block.x)))); + +// cudaMalloc(&A_gpu, sizeof(float) * M * K); +// cudaMalloc(&B_gpu, sizeof(float) * K * N); +// cudaMalloc(&C_gpu, sizeof(float) * M * N); + +// cudaMemcpy(A_gpu, A, sizeof(float) * M * K, cudaMemcpyHostToDevice); +// cudaMemcpy(B_gpu, B, sizeof(float) * K * N, cudaMemcpyHostToDevice); +// cudaMemcpy(C_gpu, C, sizeof(float) * M * N, cudaMemcpyHostToDevice); +// gemm_kernel<<>>(A_gpu, B_gpu, C_gpu, M, K, N, alpha, beta); +// // cudaDeviceSynchronize(); +// cudaMemcpy(C, C_gpu, sizeof(float) * M * N, cudaMemcpyDeviceToHost); + +// cudaFree(A_gpu); +// cudaFree(B_gpu); +// cudaFree(C_gpu); +// } + +// void gemmCuda(float *A, float *B, float *C, int M, int N, int K, float alpha, float beta) +// { +// float *A_gpu; +// float *B_gpu; +// float *C_gpu; + +// dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); +// dim3 grid((size_t)(ceil(((float)N) / ((float)block.y))), (size_t)(ceil(((float)M) / ((float)block.x)))); + +// cudaMallocManaged(&A_gpu, sizeof(float) * M * K); +// cudaMallocManaged(&B_gpu, sizeof(float) * K * N); +// cudaMallocManaged(&C_gpu, sizeof(float) * M * N); +// printf("allocation succeed!\n"); + +// memcpy(A_gpu, A, sizeof(float) * M * K); +// memcpy(B_gpu, B, sizeof(float) * K * N); +// printf("memcpy succeed!\n"); +// // cudaMemcpy(C_gpu, C, sizeof(float) * M * N, cudaMemcpyHostToDevice); +// gemm_kernel<<>>(A_gpu, B_gpu, C_gpu, M, K, N, alpha, beta); +// cudaDeviceSynchronize(); +// memcpy(C, C_gpu, sizeof(float) * M * N); + +// cudaFree(A_gpu); +// cudaFree(B_gpu); +// cudaFree(C_gpu); +// } + +void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, + float *A_gpu, int lda, + float *B_gpu, int ldb, + float BETA, + float *C_gpu, int ldc) +{ + // printf("TA is %d, TB is %d, M is %d, N is %d, K is %d, lda is %d, ldb is %d, ldc is %d.\n", TA, TB, M, N, K, lda, ldb, ldc); + if (TA == 0 && TB == 0) { + gemmCuda(A_gpu, B_gpu, C_gpu, M, N, K, ALPHA, BETA); + } else { + cublasHandle_t handle = blas_handle(); + cudaError_t status = (cudaError_t) cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N), + (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc); + check_error(status); + } +} + + +void time_gpu_random_matrix(int TA, int TB, int m, int k, int n) +{ + float *a; + if(!TA) a = random_matrix(m,k); + else a = random_matrix(k,m); + int lda = (!TA)?k:m; + float *b; + if(!TB) b = random_matrix(k,n); + else b = random_matrix(n,k); + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + int i; + clock_t start = clock(), end; + for(i = 0; i<32; ++i){ + gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); + } + end = clock(); + printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); + free(a); + free(b); + free(c); +} + +void time_gpu(int TA, int TB, int m, int k, int n) +{ + int iter = 10; + float *a = random_matrix(m,k); + float *b = random_matrix(k,n); + + int lda = (!TA)?k:m; + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + + float *a_cl = cuda_make_array(a, m*k); + float *b_cl = cuda_make_array(b, k*n); + float *c_cl = cuda_make_array(c, m*n); + + int i; + clock_t start = clock(), end; + for(i = 0; i +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam) +{ + fprintf(stderr, "GRU Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = GRU; + l.steps = steps; + l.inputs = inputs; + + l.uz = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uz) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uz->batch = batch; + + l.wz = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wz) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wz->batch = batch; + + l.ur = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ur) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ur->batch = batch; + + l.wr = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wr) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wr->batch = batch; + + + + l.uh = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uh) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uh->batch = batch; + + l.wh = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wh) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wh->batch = batch; + + l.batch_normalize = batch_normalize; + + + l.outputs = outputs; + l.output = calloc(outputs*batch*steps, sizeof(float)); + l.delta = calloc(outputs*batch*steps, sizeof(float)); + l.state = calloc(outputs*batch, sizeof(float)); + l.prev_state = calloc(outputs*batch, sizeof(float)); + l.forgot_state = calloc(outputs*batch, sizeof(float)); + l.forgot_delta = calloc(outputs*batch, sizeof(float)); + + l.r_cpu = calloc(outputs*batch, sizeof(float)); + l.z_cpu = calloc(outputs*batch, sizeof(float)); + l.h_cpu = calloc(outputs*batch, sizeof(float)); + + l.forward = forward_gru_layer; + l.backward = backward_gru_layer; + l.update = update_gru_layer; + +#ifdef GPU + l.forward_gpu = forward_gru_layer_gpu; + l.backward_gpu = backward_gru_layer_gpu; + l.update_gpu = update_gru_layer_gpu; + + l.forgot_state_gpu = cuda_make_array(0, batch*outputs); + l.forgot_delta_gpu = cuda_make_array(0, batch*outputs); + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.state_gpu = cuda_make_array(0, batch*outputs); + l.output_gpu = cuda_make_array(0, batch*outputs*steps); + l.delta_gpu = cuda_make_array(0, batch*outputs*steps); + l.r_gpu = cuda_make_array(0, batch*outputs); + l.z_gpu = cuda_make_array(0, batch*outputs); + l.h_gpu = cuda_make_array(0, batch*outputs); + +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.uz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uz->out_c, l.uz->out_h, l.uz->out_w); + cudnnSetTensor4dDescriptor(l.uh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uh->out_c, l.uh->out_h, l.uh->out_w); + cudnnSetTensor4dDescriptor(l.ur->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ur->out_c, l.ur->out_h, l.ur->out_w); + cudnnSetTensor4dDescriptor(l.wz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wz->out_c, l.wz->out_h, l.wz->out_w); + cudnnSetTensor4dDescriptor(l.wh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wh->out_c, l.wh->out_h, l.wh->out_w); + cudnnSetTensor4dDescriptor(l.wr->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wr->out_c, l.wr->out_h, l.wr->out_w); +#endif +#endif + + return l; +} + +void update_gru_layer(layer l, update_args a) +{ + update_connected_layer(*(l.ur), a); + update_connected_layer(*(l.uz), a); + update_connected_layer(*(l.uh), a); + update_connected_layer(*(l.wr), a); + update_connected_layer(*(l.wz), a); + update_connected_layer(*(l.wh), a); +} + +void forward_gru_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + fill_cpu(l.outputs * l.batch * l.steps, 0, uz.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ur.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, uh.delta, 1); + + fill_cpu(l.outputs * l.batch * l.steps, 0, wz.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wr.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wh.delta, 1); + if(net.train) { + fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); + copy_cpu(l.outputs*l.batch, l.state, 1, l.prev_state, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input = l.state; + forward_connected_layer(wz, s); + forward_connected_layer(wr, s); + + s.input = net.input; + forward_connected_layer(uz, s); + forward_connected_layer(ur, s); + forward_connected_layer(uh, s); + + + copy_cpu(l.outputs*l.batch, uz.output, 1, l.z_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wz.output, 1, l.z_cpu, 1); + + copy_cpu(l.outputs*l.batch, ur.output, 1, l.r_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wr.output, 1, l.r_cpu, 1); + + activate_array(l.z_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.r_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.state, 1, l.forgot_state, 1); + mul_cpu(l.outputs*l.batch, l.r_cpu, 1, l.forgot_state, 1); + + s.input = l.forgot_state; + forward_connected_layer(wh, s); + + copy_cpu(l.outputs*l.batch, uh.output, 1, l.h_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wh.output, 1, l.h_cpu, 1); + + if(l.tanh){ + activate_array(l.h_cpu, l.outputs*l.batch, TANH); + } else { + activate_array(l.h_cpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_sum_cpu(l.state, l.h_cpu, l.z_cpu, l.outputs*l.batch, l.output); + + copy_cpu(l.outputs*l.batch, l.output, 1, l.state, 1); + + net.input += l.inputs*l.batch; + l.output += l.outputs*l.batch; + increment_layer(&uz, 1); + increment_layer(&ur, 1); + increment_layer(&uh, 1); + + increment_layer(&wz, 1); + increment_layer(&wr, 1); + increment_layer(&wh, 1); + } +} + +void backward_gru_layer(layer l, network net) +{ +} + +#ifdef GPU + +void pull_gru_layer(layer l) +{ +} + +void push_gru_layer(layer l) +{ +} + +void update_gru_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.ur), a); + update_connected_layer_gpu(*(l.uz), a); + update_connected_layer_gpu(*(l.uh), a); + update_connected_layer_gpu(*(l.wr), a); + update_connected_layer_gpu(*(l.wz), a); + update_connected_layer_gpu(*(l.wh), a); +} + +void forward_gru_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + fill_gpu(l.outputs * l.batch * l.steps, 0, uz.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ur.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, uh.delta_gpu, 1); + + fill_gpu(l.outputs * l.batch * l.steps, 0, wz.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wr.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wh.delta_gpu, 1); + if(net.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.prev_state_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(wz, s); + forward_connected_layer_gpu(wr, s); + + s.input_gpu = net.input_gpu; + forward_connected_layer_gpu(uz, s); + forward_connected_layer_gpu(ur, s); + forward_connected_layer_gpu(uh, s); + + copy_gpu(l.outputs*l.batch, uz.output_gpu, 1, l.z_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wz.output_gpu, 1, l.z_gpu, 1); + + copy_gpu(l.outputs*l.batch, ur.output_gpu, 1, l.r_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wr.output_gpu, 1, l.r_gpu, 1); + + activate_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.forgot_state_gpu, 1); + mul_gpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1); + + s.input_gpu = l.forgot_state_gpu; + forward_connected_layer_gpu(wh, s); + + copy_gpu(l.outputs*l.batch, uh.output_gpu, 1, l.h_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wh.output_gpu, 1, l.h_gpu, 1); + + if(l.tanh){ + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + } else { + activate_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_sum_gpu(l.state_gpu, l.h_gpu, l.z_gpu, l.outputs*l.batch, l.output_gpu); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.state_gpu, 1); + + net.input_gpu += l.inputs*l.batch; + l.output_gpu += l.outputs*l.batch; + increment_layer(&uz, 1); + increment_layer(&ur, 1); + increment_layer(&uh, 1); + + increment_layer(&wz, 1); + increment_layer(&wr, 1); + increment_layer(&wh, 1); + } +} + +void backward_gru_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + increment_layer(&uz, l.steps - 1); + increment_layer(&ur, l.steps - 1); + increment_layer(&uh, l.steps - 1); + + increment_layer(&wz, l.steps - 1); + increment_layer(&wr, l.steps - 1); + increment_layer(&wh, l.steps - 1); + + net.input_gpu += l.inputs*l.batch*(l.steps-1); + if(net.delta_gpu) net.delta_gpu += l.inputs*l.batch*(l.steps-1); + l.output_gpu += l.outputs*l.batch*(l.steps-1); + l.delta_gpu += l.outputs*l.batch*(l.steps-1); + float *end_state = l.output_gpu; + for (i = l.steps-1; i >= 0; --i) { + if(i != 0) copy_gpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + else copy_gpu(l.outputs*l.batch, l.prev_state_gpu, 1, l.state_gpu, 1); + float *prev_delta_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch; + + copy_gpu(l.outputs*l.batch, uz.output_gpu, 1, l.z_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wz.output_gpu, 1, l.z_gpu, 1); + + copy_gpu(l.outputs*l.batch, ur.output_gpu, 1, l.r_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wr.output_gpu, 1, l.r_gpu, 1); + + activate_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, uh.output_gpu, 1, l.h_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wh.output_gpu, 1, l.h_gpu, 1); + + if(l.tanh){ + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + } else { + activate_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_delta_gpu(l.state_gpu, l.h_gpu, l.z_gpu, prev_delta_gpu, uh.delta_gpu, uz.delta_gpu, l.outputs*l.batch, l.delta_gpu); + + if(l.tanh){ + gradient_array_gpu(l.h_gpu, l.outputs*l.batch, TANH, uh.delta_gpu); + } else { + gradient_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC, uh.delta_gpu); + } + + copy_gpu(l.outputs*l.batch, uh.delta_gpu, 1, wh.delta_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.forgot_state_gpu, 1); + mul_gpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1); + fill_gpu(l.outputs*l.batch, 0, l.forgot_delta_gpu, 1); + + s.input_gpu = l.forgot_state_gpu; + s.delta_gpu = l.forgot_delta_gpu; + + backward_connected_layer_gpu(wh, s); + if(prev_delta_gpu) mult_add_into_gpu(l.outputs*l.batch, l.forgot_delta_gpu, l.r_gpu, prev_delta_gpu); + mult_add_into_gpu(l.outputs*l.batch, l.forgot_delta_gpu, l.state_gpu, ur.delta_gpu); + + gradient_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC, ur.delta_gpu); + copy_gpu(l.outputs*l.batch, ur.delta_gpu, 1, wr.delta_gpu, 1); + + gradient_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC, uz.delta_gpu); + copy_gpu(l.outputs*l.batch, uz.delta_gpu, 1, wz.delta_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = prev_delta_gpu; + + backward_connected_layer_gpu(wr, s); + backward_connected_layer_gpu(wz, s); + + s.input_gpu = net.input_gpu; + s.delta_gpu = net.delta_gpu; + + backward_connected_layer_gpu(uh, s); + backward_connected_layer_gpu(ur, s); + backward_connected_layer_gpu(uz, s); + + + net.input_gpu -= l.inputs*l.batch; + if(net.delta_gpu) net.delta_gpu -= l.inputs*l.batch; + l.output_gpu -= l.outputs*l.batch; + l.delta_gpu -= l.outputs*l.batch; + increment_layer(&uz, -1); + increment_layer(&ur, -1); + increment_layer(&uh, -1); + + increment_layer(&wz, -1); + increment_layer(&wr, -1); + increment_layer(&wh, -1); + } + copy_gpu(l.outputs*l.batch, end_state, 1, l.state_gpu, 1); +} +#endif diff --git a/workloads/realworld/standard/darknet/src/gru_layer.h b/workloads/realworld/standard/darknet/src/gru_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9067942e9499d53c8d54f7728d64a5030200f4de --- /dev/null +++ b/workloads/realworld/standard/darknet/src/gru_layer.h @@ -0,0 +1,24 @@ + +#ifndef GRU_LAYER_H +#define GRU_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam); + +void forward_gru_layer(layer l, network state); +void backward_gru_layer(layer l, network state); +void update_gru_layer(layer l, update_args a); + +#ifdef GPU +void forward_gru_layer_gpu(layer l, network state); +void backward_gru_layer_gpu(layer l, network state); +void update_gru_layer_gpu(layer l, update_args a); +void push_gru_layer(layer l); +void pull_gru_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/standard/darknet/src/im2col.c b/workloads/realworld/standard/darknet/src/im2col.c new file mode 100644 index 0000000000000000000000000000000000000000..69ec98a9d12b2e21a3859611ad709d62fc80dcf3 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/im2col.c @@ -0,0 +1,40 @@ +#include "im2col.h" +#include +float im2col_get_pixel(float *im, int height, int width, int channels, + int row, int col, int channel, int pad) +{ + row -= pad; + col -= pad; + + if (row < 0 || col < 0 || + row >= height || col >= width) return 0; + return im[col + width*(row + height*channel)]; +} + +//From Berkeley Vision's Caffe! +//https://github.com/BVLC/caffe/blob/master/LICENSE +void im2col_cpu(float* data_im, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_col) +{ + int c,h,w; + int height_col = (height + 2*pad - ksize) / stride + 1; + int width_col = (width + 2*pad - ksize) / stride + 1; + + int channels_col = channels * ksize * ksize; + for (c = 0; c < channels_col; ++c) { + int w_offset = c % ksize; + int h_offset = (c / ksize) % ksize; + int c_im = c / ksize / ksize; + for (h = 0; h < height_col; ++h) { + for (w = 0; w < width_col; ++w) { + int im_row = h_offset + h * stride; + int im_col = w_offset + w * stride; + int col_index = (c * height_col + h) * width_col + w; + data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, + im_row, im_col, c_im, pad); + } + } + } +} + diff --git a/workloads/realworld/standard/darknet/src/im2col.h b/workloads/realworld/standard/darknet/src/im2col.h new file mode 100644 index 0000000000000000000000000000000000000000..02c4247fad9b8428a8e89fc8caec0b5b6ba5b36a --- /dev/null +++ b/workloads/realworld/standard/darknet/src/im2col.h @@ -0,0 +1,15 @@ +#ifndef IM2COL_H +#define IM2COL_H + +void im2col_cpu(float* data_im, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_col); + +#ifdef GPU + +void im2col_gpu(float *im, + int channels, int height, int width, + int ksize, int stride, int pad,float *data_col); + +#endif +#endif diff --git a/workloads/realworld/standard/darknet/src/im2col_kernels.cu b/workloads/realworld/standard/darknet/src/im2col_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..decbc1202a9ebd3916312527cc0cdad3fef9b264 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/im2col_kernels.cu @@ -0,0 +1,61 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "im2col.h" +#include "cuda_dark.h" +} + +// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu +// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE + +__global__ void im2col_gpu_kernel(const int n, const float* data_im, + const int height, const int width, const int ksize, + const int pad, + const int stride, + const int height_col, const int width_col, + float *data_col) { + int index = blockIdx.x*blockDim.x+threadIdx.x; + for(; index < n; index += blockDim.x*gridDim.x){ + int w_out = index % width_col; + int h_index = index / width_col; + int h_out = h_index % height_col; + int channel_in = h_index / height_col; + int channel_out = channel_in * ksize * ksize; + int h_in = h_out * stride - pad; + int w_in = w_out * stride - pad; + float* data_col_ptr = data_col; + data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out; + const float* data_im_ptr = data_im; + data_im_ptr += (channel_in * height + h_in) * width + w_in; + for (int i = 0; i < ksize; ++i) { + for (int j = 0; j < ksize; ++j) { + int h = h_in + i; + int w = w_in + j; + + *data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ? + data_im_ptr[i * width + j] : 0; + + //*data_col_ptr = data_im_ptr[ii * width + jj]; + + data_col_ptr += height_col * width_col; + } + } + } +} + +void im2col_gpu(float *im, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_col){ + // We are going to launch channels * height_col * width_col kernels, each + // kernel responsible for copying a single-channel grid. + int height_col = (height + 2 * pad - ksize) / stride + 1; + int width_col = (width + 2 * pad - ksize) / stride + 1; + int num_kernels = channels * height_col * width_col; + im2col_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, + BLOCK>>>( + num_kernels, im, height, width, ksize, pad, + stride, height_col, + width_col, data_col); +} diff --git a/workloads/realworld/standard/darknet/src/image.c b/workloads/realworld/standard/darknet/src/image.c new file mode 100644 index 0000000000000000000000000000000000000000..3edf6d1045f4637d7bb108440302fd36d5ef9a18 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/image.c @@ -0,0 +1,1467 @@ +#include "image.h" +#include "utils.h" +#include "blas.h" +#include "cuda_dark.h" +#include +#include + +#define STB_IMAGE_IMPLEMENTATION +#include "stb_image.h" +#define STB_IMAGE_WRITE_IMPLEMENTATION +#include "stb_image_write.h" + +int windows = 0; + +float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} }; + +float get_color(int c, int x, int max) +{ + float ratio = ((float)x/max)*5; + int i = floor(ratio); + int j = ceil(ratio); + ratio -= i; + float r = (1-ratio) * colors[i][c] + ratio*colors[j][c]; + //printf("%f\n", r); + return r; +} + +image mask_to_rgb(image mask) +{ + int n = mask.c; + image im = make_image(mask.w, mask.h, 3); + int i, j; + for(j = 0; j < n; ++j){ + int offset = j*123457 % n; + float red = get_color(2,offset,n); + float green = get_color(1,offset,n); + float blue = get_color(0,offset,n); + for(i = 0; i < im.w*im.h; ++i){ + im.data[i + 0*im.w*im.h] += mask.data[j*im.h*im.w + i]*red; + im.data[i + 1*im.w*im.h] += mask.data[j*im.h*im.w + i]*green; + im.data[i + 2*im.w*im.h] += mask.data[j*im.h*im.w + i]*blue; + } + } + return im; +} + +static float get_pixel(image m, int x, int y, int c) +{ + assert(x < m.w && y < m.h && c < m.c); + return m.data[c*m.h*m.w + y*m.w + x]; +} +static float get_pixel_extend(image m, int x, int y, int c) +{ + if(x < 0 || x >= m.w || y < 0 || y >= m.h) return 0; + /* + if(x < 0) x = 0; + if(x >= m.w) x = m.w-1; + if(y < 0) y = 0; + if(y >= m.h) y = m.h-1; + */ + if(c < 0 || c >= m.c) return 0; + return get_pixel(m, x, y, c); +} +static void set_pixel(image m, int x, int y, int c, float val) +{ + if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return; + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] = val; +} +static void add_pixel(image m, int x, int y, int c, float val) +{ + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] += val; +} + +static float bilinear_interpolate(image im, float x, float y, int c) +{ + int ix = (int) floorf(x); + int iy = (int) floorf(y); + + float dx = x - ix; + float dy = y - iy; + + float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) + + dy * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + + (1-dy) * dx * get_pixel_extend(im, ix+1, iy, c) + + dy * dx * get_pixel_extend(im, ix+1, iy+1, c); + return val; +} + + +void composite_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float val = get_pixel(source, x, y, k); + float val2 = get_pixel_extend(dest, dx+x, dy+y, k); + set_pixel(dest, dx+x, dy+y, k, val * val2); + } + } + } +} + +image border_image(image a, int border) +{ + image b = make_image(a.w + 2*border, a.h + 2*border, a.c); + int x,y,k; + for(k = 0; k < b.c; ++k){ + for(y = 0; y < b.h; ++y){ + for(x = 0; x < b.w; ++x){ + float val = get_pixel_extend(a, x - border, y - border, k); + if(x - border < 0 || x - border >= a.w || y - border < 0 || y - border >= a.h) val = 1; + set_pixel(b, x, y, k, val); + } + } + } + return b; +} + +image tile_images(image a, image b, int dx) +{ + if(a.w == 0) return copy_image(b); + image c = make_image(a.w + b.w + dx, (a.h > b.h) ? a.h : b.h, (a.c > b.c) ? a.c : b.c); + fill_cpu(c.w*c.h*c.c, 1, c.data, 1); + embed_image(a, c, 0, 0); + composite_image(b, c, a.w + dx, 0); + return c; +} + +image get_label(image **characters, char *string, int size) +{ + size = size/10; + if(size > 7) size = 7; + image label = make_empty_image(0,0,0); + while(*string){ + image l = characters[size][(int)*string]; + image n = tile_images(label, l, -size - 1 + (size+1)/2); + free_image(label); + label = n; + ++string; + } + image b = border_image(label, label.h*.25); + free_image(label); + return b; +} + +void draw_label(image a, int r, int c, image label, const float *rgb) +{ + int w = label.w; + int h = label.h; + if (r - h >= 0) r = r - h; + + int i, j, k; + for(j = 0; j < h && j + r < a.h; ++j){ + for(i = 0; i < w && i + c < a.w; ++i){ + for(k = 0; k < label.c; ++k){ + float val = get_pixel(label, i, j, k); + set_pixel(a, i+c, j+r, k, rgb[k] * val); + } + } + } +} + +void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b) +{ + //normalize_image(a); + int i; + if(x1 < 0) x1 = 0; + if(x1 >= a.w) x1 = a.w-1; + if(x2 < 0) x2 = 0; + if(x2 >= a.w) x2 = a.w-1; + + if(y1 < 0) y1 = 0; + if(y1 >= a.h) y1 = a.h-1; + if(y2 < 0) y2 = 0; + if(y2 >= a.h) y2 = a.h-1; + + for(i = x1; i <= x2; ++i){ + a.data[i + y1*a.w + 0*a.w*a.h] = r; + a.data[i + y2*a.w + 0*a.w*a.h] = r; + + a.data[i + y1*a.w + 1*a.w*a.h] = g; + a.data[i + y2*a.w + 1*a.w*a.h] = g; + + a.data[i + y1*a.w + 2*a.w*a.h] = b; + a.data[i + y2*a.w + 2*a.w*a.h] = b; + } + for(i = y1; i <= y2; ++i){ + a.data[x1 + i*a.w + 0*a.w*a.h] = r; + a.data[x2 + i*a.w + 0*a.w*a.h] = r; + + a.data[x1 + i*a.w + 1*a.w*a.h] = g; + a.data[x2 + i*a.w + 1*a.w*a.h] = g; + + a.data[x1 + i*a.w + 2*a.w*a.h] = b; + a.data[x2 + i*a.w + 2*a.w*a.h] = b; + } +} + +void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b) +{ + int i; + for(i = 0; i < w; ++i){ + draw_box(a, x1+i, y1+i, x2-i, y2-i, r, g, b); + } +} + +void draw_bbox(image a, box bbox, int w, float r, float g, float b) +{ + int left = (bbox.x-bbox.w/2)*a.w; + int right = (bbox.x+bbox.w/2)*a.w; + int top = (bbox.y-bbox.h/2)*a.h; + int bot = (bbox.y+bbox.h/2)*a.h; + + int i; + for(i = 0; i < w; ++i){ + draw_box(a, left+i, top+i, right-i, bot-i, r, g, b); + } +} + +image **load_alphabet() +{ + char *value = getenv("UVMAsyncBench_BASE"); + int i, j; + const int nsize = 8; + image **alphabets = calloc(nsize, sizeof(image)); + for(j = 0; j < nsize; ++j){ + alphabets[j] = calloc(128, sizeof(image)); + for(i = 32; i < 127; ++i){ + char buff[256]; + sprintf(buff, "%s/workloads/realworld/standard/darknet/data/labels/%d_%d.png", value, i, j); + alphabets[j][i] = load_image_color(buff, 0, 0); + } + } + return alphabets; +} + +void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes) +{ + int i,j; + + for(i = 0; i < num; ++i){ + char labelstr[4096] = {0}; + int class = -1; + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j] > thresh){ + if (class < 0) { + strcat(labelstr, names[j]); + class = j; + } else { + strcat(labelstr, ", "); + strcat(labelstr, names[j]); + } + printf("%s: %.0f%%\n", names[j], dets[i].prob[j]*100); + } + } + if(class >= 0){ + int width = im.h * .006; + + /* + if(0){ + width = pow(prob, 1./2.)*10+1; + alphabet = 0; + } + */ + + //printf("%d %s: %.0f%%\n", i, names[class], prob*100); + int offset = class*123457 % classes; + float red = get_color(2,offset,classes); + float green = get_color(1,offset,classes); + float blue = get_color(0,offset,classes); + float rgb[3]; + + //width = prob*20+2; + + rgb[0] = red; + rgb[1] = green; + rgb[2] = blue; + box b = dets[i].bbox; + //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + + int left = (b.x-b.w/2.)*im.w; + int right = (b.x+b.w/2.)*im.w; + int top = (b.y-b.h/2.)*im.h; + int bot = (b.y+b.h/2.)*im.h; + + if(left < 0) left = 0; + if(right > im.w-1) right = im.w-1; + if(top < 0) top = 0; + if(bot > im.h-1) bot = im.h-1; + + draw_box_width(im, left, top, right, bot, width, red, green, blue); + if (alphabet) { + image label = get_label(alphabet, labelstr, (im.h*.03)); + draw_label(im, top + width, left, label, rgb); + free_image(label); + } + if (dets[i].mask){ + image mask = float_to_image(14, 14, 1, dets[i].mask); + image resized_mask = resize_image(mask, b.w*im.w, b.h*im.h); + image tmask = threshold_image(resized_mask, .5); + embed_image(tmask, im, left, top); + free_image(mask); + free_image(resized_mask); + free_image(tmask); + } + } + } +} + +void transpose_image(image im) +{ + assert(im.w == im.h); + int n, m; + int c; + for(c = 0; c < im.c; ++c){ + for(n = 0; n < im.w-1; ++n){ + for(m = n + 1; m < im.w; ++m){ + float swap = im.data[m + im.w*(n + im.h*c)]; + im.data[m + im.w*(n + im.h*c)] = im.data[n + im.w*(m + im.h*c)]; + im.data[n + im.w*(m + im.h*c)] = swap; + } + } + } +} + +void rotate_image_cw(image im, int times) +{ + assert(im.w == im.h); + times = (times + 400) % 4; + int i, x, y, c; + int n = im.w; + for(i = 0; i < times; ++i){ + for(c = 0; c < im.c; ++c){ + for(x = 0; x < n/2; ++x){ + for(y = 0; y < (n-1)/2 + 1; ++y){ + float temp = im.data[y + im.w*(x + im.h*c)]; + im.data[y + im.w*(x + im.h*c)] = im.data[n-1-x + im.w*(y + im.h*c)]; + im.data[n-1-x + im.w*(y + im.h*c)] = im.data[n-1-y + im.w*(n-1-x + im.h*c)]; + im.data[n-1-y + im.w*(n-1-x + im.h*c)] = im.data[x + im.w*(n-1-y + im.h*c)]; + im.data[x + im.w*(n-1-y + im.h*c)] = temp; + } + } + } + } +} + +void flip_image(image a) +{ + int i,j,k; + for(k = 0; k < a.c; ++k){ + for(i = 0; i < a.h; ++i){ + for(j = 0; j < a.w/2; ++j){ + int index = j + a.w*(i + a.h*(k)); + int flip = (a.w - j - 1) + a.w*(i + a.h*(k)); + float swap = a.data[flip]; + a.data[flip] = a.data[index]; + a.data[index] = swap; + } + } + } +} + +image image_distance(image a, image b) +{ + int i,j; + image dist = make_image(a.w, a.h, 1); + for(i = 0; i < a.c; ++i){ + for(j = 0; j < a.h*a.w; ++j){ + dist.data[j] += pow(a.data[i*a.h*a.w+j]-b.data[i*a.h*a.w+j],2); + } + } + for(j = 0; j < a.h*a.w; ++j){ + dist.data[j] = sqrt(dist.data[j]); + } + return dist; +} + +void ghost_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + float max_dist = sqrt((-source.w/2. + .5)*(-source.w/2. + .5)); + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float dist = sqrt((x - source.w/2. + .5)*(x - source.w/2. + .5) + (y - source.h/2. + .5)*(y - source.h/2. + .5)); + float alpha = (1 - dist/max_dist); + if(alpha < 0) alpha = 0; + float v1 = get_pixel(source, x,y,k); + float v2 = get_pixel(dest, dx+x,dy+y,k); + float val = alpha*v1 + (1-alpha)*v2; + set_pixel(dest, dx+x, dy+y, k, val); + } + } + } +} + +void blocky_image(image im, int s) +{ + int i,j,k; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)]; + } + } + } +} + +void censor_image(image im, int dx, int dy, int w, int h) +{ + int i,j,k; + int s = 32; + if(dx < 0) dx = 0; + if(dy < 0) dy = 0; + + for(k = 0; k < im.c; ++k){ + for(j = dy; j < dy + h && j < im.h; ++j){ + for(i = dx; i < dx + w && i < im.w; ++i){ + im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)]; + //im.data[i + j*im.w + k*im.w*im.h] = 0; + } + } + } +} + +void embed_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float val = get_pixel(source, x,y,k); + set_pixel(dest, dx+x, dy+y, k, val); + } + } + } +} + +image collapse_image_layers(image source, int border) +{ + int h = source.h; + h = (h+border)*source.c - border; + image dest = make_image(source.w, h, 1); + int i; + for(i = 0; i < source.c; ++i){ + image layer = get_image_layer(source, i); + int h_offset = i*(source.h+border); + embed_image(layer, dest, 0, h_offset); + free_image(layer); + } + return dest; +} + +void constrain_image(image im) +{ + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + if(im.data[i] < 0) im.data[i] = 0; + if(im.data[i] > 1) im.data[i] = 1; + } +} + +void normalize_image(image p) +{ + int i; + float min = 9999999; + float max = -999999; + + for(i = 0; i < p.h*p.w*p.c; ++i){ + float v = p.data[i]; + if(v < min) min = v; + if(v > max) max = v; + } + if(max - min < .000000001){ + min = 0; + max = 1; + } + for(i = 0; i < p.c*p.w*p.h; ++i){ + p.data[i] = (p.data[i] - min)/(max-min); + } +} + +void normalize_image2(image p) +{ + float *min = calloc(p.c, sizeof(float)); + float *max = calloc(p.c, sizeof(float)); + int i,j; + for(i = 0; i < p.c; ++i) min[i] = max[i] = p.data[i*p.h*p.w]; + + for(j = 0; j < p.c; ++j){ + for(i = 0; i < p.h*p.w; ++i){ + float v = p.data[i+j*p.h*p.w]; + if(v < min[j]) min[j] = v; + if(v > max[j]) max[j] = v; + } + } + for(i = 0; i < p.c; ++i){ + if(max[i] - min[i] < .000000001){ + min[i] = 0; + max[i] = 1; + } + } + for(j = 0; j < p.c; ++j){ + for(i = 0; i < p.w*p.h; ++i){ + p.data[i+j*p.h*p.w] = (p.data[i+j*p.h*p.w] - min[j])/(max[j]-min[j]); + } + } + free(min); + free(max); +} + +void copy_image_into(image src, image dest) +{ + memcpy(dest.data, src.data, src.h*src.w*src.c*sizeof(float)); +} + +image copy_image(image p) +{ + image copy = p; + copy.data = calloc(p.h*p.w*p.c, sizeof(float)); + memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float)); + return copy; +} + +void rgbgr_image(image im) +{ + int i; + for(i = 0; i < im.w*im.h; ++i){ + float swap = im.data[i]; + im.data[i] = im.data[i+im.w*im.h*2]; + im.data[i+im.w*im.h*2] = swap; + } +} + +int show_image(image p, const char *name, int ms) +{ +#ifdef OPENCV + int c = show_image_cv(p, name, ms); + return c; +#else + fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name); + save_image(p, name); + return -1; +#endif +} + +void save_image_options(image im, const char *name, IMTYPE f, int quality) +{ + char buff[256]; + //sprintf(buff, "%s (%d)", name, windows); + if(f == PNG) sprintf(buff, "%s.png", name); + else if (f == BMP) sprintf(buff, "%s.bmp", name); + else if (f == TGA) sprintf(buff, "%s.tga", name); + else if (f == JPG) sprintf(buff, "%s.jpg", name); + else sprintf(buff, "%s.png", name); + unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char)); + int i,k; + for(k = 0; k < im.c; ++k){ + for(i = 0; i < im.w*im.h; ++i){ + data[i*im.c+k] = (unsigned char) (255*im.data[i + k*im.w*im.h]); + } + } + int success = 0; + if(f == PNG) success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c); + else if (f == BMP) success = stbi_write_bmp(buff, im.w, im.h, im.c, data); + else if (f == TGA) success = stbi_write_tga(buff, im.w, im.h, im.c, data); + else if (f == JPG) success = stbi_write_jpg(buff, im.w, im.h, im.c, data, quality); + free(data); + if(!success) fprintf(stderr, "Failed to write image %s\n", buff); +} + +void save_image(image im, const char *name) +{ + save_image_options(im, name, JPG, 80); +} + +void show_image_layers(image p, char *name) +{ + int i; + char buff[256]; + for(i = 0; i < p.c; ++i){ + sprintf(buff, "%s - Layer %d", name, i); + image layer = get_image_layer(p, i); + show_image(layer, buff, 1); + free_image(layer); + } +} + +void show_image_collapsed(image p, char *name) +{ + image c = collapse_image_layers(p, 1); + show_image(c, name, 1); + free_image(c); +} + +image make_empty_image(int w, int h, int c) +{ + image out; + out.data = 0; + out.h = h; + out.w = w; + out.c = c; + return out; +} + +image make_image(int w, int h, int c) +{ + image out = make_empty_image(w,h,c); + out.data = calloc(h*w*c, sizeof(float)); + return out; +} + +image make_random_image(int w, int h, int c) +{ + image out = make_empty_image(w,h,c); + out.data = calloc(h*w*c, sizeof(float)); + int i; + for(i = 0; i < w*h*c; ++i){ + out.data[i] = (rand_normal() * .25) + .5; + } + return out; +} + +image float_to_image(int w, int h, int c, float *data) +{ + image out = make_empty_image(w,h,c); + out.data = data; + return out; +} + +void place_image(image im, int w, int h, int dx, int dy, image canvas) +{ + int x, y, c; + for(c = 0; c < im.c; ++c){ + for(y = 0; y < h; ++y){ + for(x = 0; x < w; ++x){ + float rx = ((float)x / w) * im.w; + float ry = ((float)y / h) * im.h; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(canvas, x + dx, y + dy, c, val); + } + } + } +} + +image center_crop_image(image im, int w, int h) +{ + int m = (im.w < im.h) ? im.w : im.h; + image c = crop_image(im, (im.w - m) / 2, (im.h - m)/2, m, m); + image r = resize_image(c, w, h); + free_image(c); + return r; +} + +image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect) +{ + int x, y, c; + float cx = im.w/2.; + float cy = im.h/2.; + image rot = make_image(w, h, im.c); + for(c = 0; c < im.c; ++c){ + for(y = 0; y < h; ++y){ + for(x = 0; x < w; ++x){ + float rx = cos(rad)*((x - w/2.)/s*aspect + dx/s*aspect) - sin(rad)*((y - h/2.)/s + dy/s) + cx; + float ry = sin(rad)*((x - w/2.)/s*aspect + dx/s*aspect) + cos(rad)*((y - h/2.)/s + dy/s) + cy; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(rot, x, y, c, val); + } + } + } + return rot; +} + +image rotate_image(image im, float rad) +{ + int x, y, c; + float cx = im.w/2.; + float cy = im.h/2.; + image rot = make_image(im.w, im.h, im.c); + for(c = 0; c < im.c; ++c){ + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx; + float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(rot, x, y, c, val); + } + } + } + return rot; +} + +void fill_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] = s; +} + +void translate_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s; +} + +void scale_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s; +} + +image crop_image(image im, int dx, int dy, int w, int h) +{ + image cropped = make_image(w, h, im.c); + int i, j, k; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int r = j + dy; + int c = i + dx; + float val = 0; + r = constrain_int(r, 0, im.h-1); + c = constrain_int(c, 0, im.w-1); + val = get_pixel(im, c, r, k); + set_pixel(cropped, i, j, k, val); + } + } + } + return cropped; +} + +int best_3d_shift_r(image a, image b, int min, int max) +{ + if(min == max) return min; + int mid = floor((min + max) / 2.); + image c1 = crop_image(b, 0, mid, b.w, b.h); + image c2 = crop_image(b, 0, mid+1, b.w, b.h); + float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 10); + float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 10); + free_image(c1); + free_image(c2); + if(d1 < d2) return best_3d_shift_r(a, b, min, mid); + else return best_3d_shift_r(a, b, mid+1, max); +} + +int best_3d_shift(image a, image b, int min, int max) +{ + int i; + int best = 0; + float best_distance = FLT_MAX; + for(i = min; i <= max; i += 2){ + image c = crop_image(b, 0, i, b.w, b.h); + float d = dist_array(c.data, a.data, a.w*a.h*a.c, 100); + if(d < best_distance){ + best_distance = d; + best = i; + } + printf("%d %f\n", i, d); + free_image(c); + } + return best; +} + +void composite_3d(char *f1, char *f2, char *out, int delta) +{ + if(!out) out = "out"; + image a = load_image(f1, 0,0,0); + image b = load_image(f2, 0,0,0); + int shift = best_3d_shift_r(a, b, -a.h/100, a.h/100); + + image c1 = crop_image(b, 10, shift, b.w, b.h); + float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 100); + image c2 = crop_image(b, -10, shift, b.w, b.h); + float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 100); + + if(d2 < d1 && 0){ + image swap = a; + a = b; + b = swap; + shift = -shift; + printf("swapped, %d\n", shift); + } + else{ + printf("%d\n", shift); + } + + image c = crop_image(b, delta, shift, a.w, a.h); + int i; + for(i = 0; i < c.w*c.h; ++i){ + c.data[i] = a.data[i]; + } + save_image(c, out); +} + +void letterbox_image_into(image im, int w, int h, image boxed) +{ + int new_w = im.w; + int new_h = im.h; + if (((float)w/im.w) < ((float)h/im.h)) { + new_w = w; + new_h = (im.h * w)/im.w; + } else { + new_h = h; + new_w = (im.w * h)/im.h; + } + image resized = resize_image(im, new_w, new_h); + embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); + free_image(resized); +} + +image letterbox_image(image im, int w, int h) +{ + int new_w = im.w; + int new_h = im.h; + if (((float)w/im.w) < ((float)h/im.h)) { + new_w = w; + new_h = (im.h * w)/im.w; + } else { + new_h = h; + new_w = (im.w * h)/im.h; + } + image resized = resize_image(im, new_w, new_h); + image boxed = make_image(w, h, im.c); + fill_image(boxed, .5); + //int i; + //for(i = 0; i < boxed.w*boxed.h*boxed.c; ++i) boxed.data[i] = 0; + embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); + free_image(resized); + return boxed; +} + +image resize_max(image im, int max) +{ + int w = im.w; + int h = im.h; + if(w > h){ + h = (h * max) / w; + w = max; + } else { + w = (w * max) / h; + h = max; + } + if(w == im.w && h == im.h) return im; + image resized = resize_image(im, w, h); + return resized; +} + +image resize_min(image im, int min) +{ + int w = im.w; + int h = im.h; + if(w < h){ + h = (h * min) / w; + w = min; + } else { + w = (w * min) / h; + h = min; + } + if(w == im.w && h == im.h) return im; + image resized = resize_image(im, w, h); + return resized; +} + +image random_crop_image(image im, int w, int h) +{ + int dx = rand_int(0, im.w - w); + int dy = rand_int(0, im.h - h); + image crop = crop_image(im, dx, dy, w, h); + return crop; +} + +augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h) +{ + augment_args a = {0}; + aspect = rand_scale(aspect); + int r = rand_int(low, high); + int min = (im.h < im.w*aspect) ? im.h : im.w*aspect; + float scale = (float)r / min; + + float rad = rand_uniform(-angle, angle) * TWO_PI / 360.; + + float dx = (im.w*scale/aspect - w) / 2.; + float dy = (im.h*scale - w) / 2.; + //if(dx < 0) dx = 0; + //if(dy < 0) dy = 0; + dx = rand_uniform(-dx, dx); + dy = rand_uniform(-dy, dy); + + a.rad = rad; + a.scale = scale; + a.w = w; + a.h = h; + a.dx = dx; + a.dy = dy; + a.aspect = aspect; + return a; +} + +image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h) +{ + augment_args a = random_augment_args(im, angle, aspect, low, high, w, h); + image crop = rotate_crop_image(im, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + return crop; +} + +float three_way_max(float a, float b, float c) +{ + return (a > b) ? ( (a > c) ? a : c) : ( (b > c) ? b : c) ; +} + +float three_way_min(float a, float b, float c) +{ + return (a < b) ? ( (a < c) ? a : c) : ( (b < c) ? b : c) ; +} + +void yuv_to_rgb(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float y, u, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + y = get_pixel(im, i , j, 0); + u = get_pixel(im, i , j, 1); + v = get_pixel(im, i , j, 2); + + r = y + 1.13983*v; + g = y + -.39465*u + -.58060*v; + b = y + 2.03211*u; + + set_pixel(im, i, j, 0, r); + set_pixel(im, i, j, 1, g); + set_pixel(im, i, j, 2, b); + } + } +} + +void rgb_to_yuv(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float y, u, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + r = get_pixel(im, i , j, 0); + g = get_pixel(im, i , j, 1); + b = get_pixel(im, i , j, 2); + + y = .299*r + .587*g + .114*b; + u = -.14713*r + -.28886*g + .436*b; + v = .615*r + -.51499*g + -.10001*b; + + set_pixel(im, i, j, 0, y); + set_pixel(im, i, j, 1, u); + set_pixel(im, i, j, 2, v); + } + } +} + +// http://www.cs.rit.edu/~ncs/color/t_convert.html +void rgb_to_hsv(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float h, s, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + r = get_pixel(im, i , j, 0); + g = get_pixel(im, i , j, 1); + b = get_pixel(im, i , j, 2); + float max = three_way_max(r,g,b); + float min = three_way_min(r,g,b); + float delta = max - min; + v = max; + if(max == 0){ + s = 0; + h = 0; + }else{ + s = delta/max; + if(r == max){ + h = (g - b) / delta; + } else if (g == max) { + h = 2 + (b - r) / delta; + } else { + h = 4 + (r - g) / delta; + } + if (h < 0) h += 6; + h = h/6.; + } + set_pixel(im, i, j, 0, h); + set_pixel(im, i, j, 1, s); + set_pixel(im, i, j, 2, v); + } + } +} + +void hsv_to_rgb(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float h, s, v; + float f, p, q, t; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + h = 6 * get_pixel(im, i , j, 0); + s = get_pixel(im, i , j, 1); + v = get_pixel(im, i , j, 2); + if (s == 0) { + r = g = b = v; + } else { + int index = floor(h); + f = h - index; + p = v*(1-s); + q = v*(1-s*f); + t = v*(1-s*(1-f)); + if(index == 0){ + r = v; g = t; b = p; + } else if(index == 1){ + r = q; g = v; b = p; + } else if(index == 2){ + r = p; g = v; b = t; + } else if(index == 3){ + r = p; g = q; b = v; + } else if(index == 4){ + r = t; g = p; b = v; + } else { + r = v; g = p; b = q; + } + } + set_pixel(im, i, j, 0, r); + set_pixel(im, i, j, 1, g); + set_pixel(im, i, j, 2, b); + } + } +} + +void grayscale_image_3c(image im) +{ + assert(im.c == 3); + int i, j, k; + float scale[] = {0.299, 0.587, 0.114}; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float val = 0; + for(k = 0; k < 3; ++k){ + val += scale[k]*get_pixel(im, i, j, k); + } + im.data[0*im.h*im.w + im.w*j + i] = val; + im.data[1*im.h*im.w + im.w*j + i] = val; + im.data[2*im.h*im.w + im.w*j + i] = val; + } + } +} + +image grayscale_image(image im) +{ + assert(im.c == 3); + int i, j, k; + image gray = make_image(im.w, im.h, 1); + float scale[] = {0.299, 0.587, 0.114}; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + gray.data[i+im.w*j] += scale[k]*get_pixel(im, i, j, k); + } + } + } + return gray; +} + +image threshold_image(image im, float thresh) +{ + int i; + image t = make_image(im.w, im.h, im.c); + for(i = 0; i < im.w*im.h*im.c; ++i){ + t.data[i] = im.data[i]>thresh ? 1 : 0; + } + return t; +} + +image blend_image(image fore, image back, float alpha) +{ + assert(fore.w == back.w && fore.h == back.h && fore.c == back.c); + image blend = make_image(fore.w, fore.h, fore.c); + int i, j, k; + for(k = 0; k < fore.c; ++k){ + for(j = 0; j < fore.h; ++j){ + for(i = 0; i < fore.w; ++i){ + float val = alpha * get_pixel(fore, i, j, k) + + (1 - alpha)* get_pixel(back, i, j, k); + set_pixel(blend, i, j, k, val); + } + } + } + return blend; +} + +void scale_image_channel(image im, int c, float v) +{ + int i, j; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float pix = get_pixel(im, i, j, c); + pix = pix*v; + set_pixel(im, i, j, c, pix); + } + } +} + +void translate_image_channel(image im, int c, float v) +{ + int i, j; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float pix = get_pixel(im, i, j, c); + pix = pix+v; + set_pixel(im, i, j, c, pix); + } + } +} + +image binarize_image(image im) +{ + image c = copy_image(im); + int i; + for(i = 0; i < im.w * im.h * im.c; ++i){ + if(c.data[i] > .5) c.data[i] = 1; + else c.data[i] = 0; + } + return c; +} + +void saturate_image(image im, float sat) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + hsv_to_rgb(im); + constrain_image(im); +} + +void hue_image(image im, float hue) +{ + rgb_to_hsv(im); + int i; + for(i = 0; i < im.w*im.h; ++i){ + im.data[i] = im.data[i] + hue; + if (im.data[i] > 1) im.data[i] -= 1; + if (im.data[i] < 0) im.data[i] += 1; + } + hsv_to_rgb(im); + constrain_image(im); +} + +void exposure_image(image im, float sat) +{ + rgb_to_hsv(im); + scale_image_channel(im, 2, sat); + hsv_to_rgb(im); + constrain_image(im); +} + +void distort_image(image im, float hue, float sat, float val) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + scale_image_channel(im, 2, val); + int i; + for(i = 0; i < im.w*im.h; ++i){ + im.data[i] = im.data[i] + hue; + if (im.data[i] > 1) im.data[i] -= 1; + if (im.data[i] < 0) im.data[i] += 1; + } + hsv_to_rgb(im); + constrain_image(im); +} + +void random_distort_image(image im, float hue, float saturation, float exposure) +{ + float dhue = rand_uniform(-hue, hue); + float dsat = rand_scale(saturation); + float dexp = rand_scale(exposure); + distort_image(im, dhue, dsat, dexp); +} + +void saturate_exposure_image(image im, float sat, float exposure) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + scale_image_channel(im, 2, exposure); + hsv_to_rgb(im); + constrain_image(im); +} + +image resize_image(image im, int w, int h) +{ + image resized = make_image(w, h, im.c); + image part = make_image(w, im.h, im.c); + int r, c, k; + float w_scale = (float)(im.w - 1) / (w - 1); + float h_scale = (float)(im.h - 1) / (h - 1); + for(k = 0; k < im.c; ++k){ + for(r = 0; r < im.h; ++r){ + for(c = 0; c < w; ++c){ + float val = 0; + if(c == w-1 || im.w == 1){ + val = get_pixel(im, im.w-1, r, k); + } else { + float sx = c*w_scale; + int ix = (int) sx; + float dx = sx - ix; + val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k); + } + set_pixel(part, c, r, k, val); + } + } + } + for(k = 0; k < im.c; ++k){ + for(r = 0; r < h; ++r){ + float sy = r*h_scale; + int iy = (int) sy; + float dy = sy - iy; + for(c = 0; c < w; ++c){ + float val = (1-dy) * get_pixel(part, c, iy, k); + set_pixel(resized, c, r, k, val); + } + if(r == h-1 || im.h == 1) continue; + for(c = 0; c < w; ++c){ + float val = dy * get_pixel(part, c, iy+1, k); + add_pixel(resized, c, r, k, val); + } + } + } + + free_image(part); + return resized; +} + + +void test_resize(char *filename) +{ + image im = load_image(filename, 0,0, 3); + float mag = mag_array(im.data, im.w*im.h*im.c); + printf("L2 Norm: %f\n", mag); + image gray = grayscale_image(im); + + image c1 = copy_image(im); + image c2 = copy_image(im); + image c3 = copy_image(im); + image c4 = copy_image(im); + distort_image(c1, .1, 1.5, 1.5); + distort_image(c2, -.1, .66666, .66666); + distort_image(c3, .1, 1.5, .66666); + distort_image(c4, .1, .66666, 1.5); + + + show_image(im, "Original", 1); + show_image(gray, "Gray", 1); + show_image(c1, "C1", 1); + show_image(c2, "C2", 1); + show_image(c3, "C3", 1); + show_image(c4, "C4", 1); +#ifdef OPENCV + while(1){ + image aug = random_augment_image(im, 0, .75, 320, 448, 320, 320); + show_image(aug, "aug", 1); + free_image(aug); + + + float exposure = 1.15; + float saturation = 1.15; + float hue = .05; + + image c = copy_image(im); + + float dexp = rand_scale(exposure); + float dsat = rand_scale(saturation); + float dhue = rand_uniform(-hue, hue); + + distort_image(c, dhue, dsat, dexp); + show_image(c, "rand", 1); + printf("%f %f %f\n", dhue, dsat, dexp); + free_image(c); + } +#endif +} + + +image load_image_stb(char *filename, int channels) +{ + int w, h, c; + unsigned char *data = stbi_load(filename, &w, &h, &c, channels); + if (!data) { + fprintf(stderr, "Cannot load image \"%s\"\nSTB Reason: %s\n", filename, stbi_failure_reason()); + exit(0); + } + if(channels) c = channels; + int i,j,k; + image im = make_image(w, h, c); + for(k = 0; k < c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int dst_index = i + w*j + w*h*k; + int src_index = k + c*i + c*w*j; + im.data[dst_index] = (float)data[src_index]/255.; + } + } + } + free(data); + return im; +} + +image load_image(char *filename, int w, int h, int c) +{ +#ifdef OPENCV + image out = load_image_cv(filename, c); +#else + image out = load_image_stb(filename, c); +#endif + + if((h && w) && (h != out.h || w != out.w)){ + image resized = resize_image(out, w, h); + free_image(out); + out = resized; + } + return out; +} + +image load_image_color(char *filename, int w, int h) +{ + return load_image(filename, w, h, 3); +} + +image get_image_layer(image m, int l) +{ + image out = make_image(m.w, m.h, 1); + int i; + for(i = 0; i < m.h*m.w; ++i){ + out.data[i] = m.data[i+l*m.h*m.w]; + } + return out; +} +void print_image(image m) +{ + int i, j, k; + for(i =0 ; i < m.c; ++i){ + for(j =0 ; j < m.h; ++j){ + for(k = 0; k < m.w; ++k){ + printf("%.2lf, ", m.data[i*m.h*m.w + j*m.w + k]); + if(k > 30) break; + } + printf("\n"); + if(j > 30) break; + } + printf("\n"); + } + printf("\n"); +} + +image collapse_images_vert(image *ims, int n) +{ + int color = 1; + int border = 1; + int h,w,c; + w = ims[0].w; + h = (ims[0].h + border) * n - border; + c = ims[0].c; + if(c != 3 || !color){ + w = (w+border)*c - border; + c = 1; + } + + image filters = make_image(w, h, c); + int i,j; + for(i = 0; i < n; ++i){ + int h_offset = i*(ims[0].h+border); + image copy = copy_image(ims[i]); + //normalize_image(copy); + if(c == 3 && color){ + embed_image(copy, filters, 0, h_offset); + } + else{ + for(j = 0; j < copy.c; ++j){ + int w_offset = j*(ims[0].w+border); + image layer = get_image_layer(copy, j); + embed_image(layer, filters, w_offset, h_offset); + free_image(layer); + } + } + free_image(copy); + } + return filters; +} + +image collapse_images_horz(image *ims, int n) +{ + int color = 1; + int border = 1; + int h,w,c; + int size = ims[0].h; + h = size; + w = (ims[0].w + border) * n - border; + c = ims[0].c; + if(c != 3 || !color){ + h = (h+border)*c - border; + c = 1; + } + + image filters = make_image(w, h, c); + int i,j; + for(i = 0; i < n; ++i){ + int w_offset = i*(size+border); + image copy = copy_image(ims[i]); + //normalize_image(copy); + if(c == 3 && color){ + embed_image(copy, filters, w_offset, 0); + } + else{ + for(j = 0; j < copy.c; ++j){ + int h_offset = j*(size+border); + image layer = get_image_layer(copy, j); + embed_image(layer, filters, w_offset, h_offset); + free_image(layer); + } + } + free_image(copy); + } + return filters; +} + +void show_image_normalized(image im, const char *name) +{ + image c = copy_image(im); + normalize_image(c); + show_image(c, name, 1); + free_image(c); +} + +void show_images(image *ims, int n, char *window) +{ + image m = collapse_images_vert(ims, n); + /* + int w = 448; + int h = ((float)m.h/m.w) * 448; + if(h > 896){ + h = 896; + w = ((float)m.w/m.h) * 896; + } + image sized = resize_image(m, w, h); + */ + normalize_image(m); + save_image(m, window); + show_image(m, window, 1); + free_image(m); +} + +void free_image(image m) +{ + if(m.data){ + free(m.data); + } +} diff --git a/workloads/realworld/standard/darknet/src/image.h b/workloads/realworld/standard/darknet/src/image.h new file mode 100644 index 0000000000000000000000000000000000000000..3392bb9787fc542929cda064bcefa0f3f893b76c --- /dev/null +++ b/workloads/realworld/standard/darknet/src/image.h @@ -0,0 +1,69 @@ +#ifndef IMAGE_H +#define IMAGE_H + +#include +#include +#include +#include +#include +#include "box.h" +#include "darknet.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef OPENCV +void *open_video_stream(const char *f, int c, int w, int h, int fps); +image get_image_from_stream(void *p); +image load_image_cv(char *filename, int channels); +int show_image_cv(image im, const char* name, int ms); +#endif + +float get_color(int c, int x, int max); +void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); +void draw_bbox(image a, box bbox, int w, float r, float g, float b); +void write_label(image a, int r, int c, image *characters, char *string, float *rgb); +image image_distance(image a, image b); +void scale_image(image m, float s); +image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect); +image random_crop_image(image im, int w, int h); +image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h); +augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h); +void letterbox_image_into(image im, int w, int h, image boxed); +image resize_max(image im, int max); +void translate_image(image m, float s); +void embed_image(image source, image dest, int dx, int dy); +void place_image(image im, int w, int h, int dx, int dy, image canvas); +void saturate_image(image im, float sat); +void exposure_image(image im, float sat); +void distort_image(image im, float hue, float sat, float val); +void saturate_exposure_image(image im, float sat, float exposure); +void rgb_to_hsv(image im); +void hsv_to_rgb(image im); +void yuv_to_rgb(image im); +void rgb_to_yuv(image im); + + +image collapse_image_layers(image source, int border); +image collapse_images_horz(image *ims, int n); +image collapse_images_vert(image *ims, int n); + +void show_image_normalized(image im, const char *name); +void show_images(image *ims, int n, char *window); +void show_image_layers(image p, char *name); +void show_image_collapsed(image p, char *name); + +void print_image(image m); + +image make_empty_image(int w, int h, int c); +void copy_image_into(image src, image dest); + +image get_image_layer(image m, int l); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/workloads/realworld/standard/darknet/src/image_opencv.cpp b/workloads/realworld/standard/darknet/src/image_opencv.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7511280be07ca987fd51fa54aea55910cd34a706 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/image_opencv.cpp @@ -0,0 +1,135 @@ +#ifdef OPENCV + +#include "stdio.h" +#include "stdlib.h" +#include "opencv2/opencv.hpp" +#include "image.h" + +using namespace cv; + +extern "C" { + +IplImage *image_to_ipl(image im) +{ + int x,y,c; + IplImage *disp = cvCreateImage(cvSize(im.w,im.h), IPL_DEPTH_8U, im.c); + int step = disp->widthStep; + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + for(c= 0; c < im.c; ++c){ + float val = im.data[c*im.h*im.w + y*im.w + x]; + disp->imageData[y*step + x*im.c + c] = (unsigned char)(val*255); + } + } + } + return disp; +} + +image ipl_to_image(IplImage* src) +{ + int h = src->height; + int w = src->width; + int c = src->nChannels; + image im = make_image(w, h, c); + unsigned char *data = (unsigned char *)src->imageData; + int step = src->widthStep; + int i, j, k; + + for(i = 0; i < h; ++i){ + for(k= 0; k < c; ++k){ + for(j = 0; j < w; ++j){ + im.data[k*w*h + i*w + j] = data[i*step + j*c + k]/255.; + } + } + } + return im; +} + +Mat image_to_mat(image im) +{ + image copy = copy_image(im); + constrain_image(copy); + if(im.c == 3) rgbgr_image(copy); + + IplImage *ipl = image_to_ipl(copy); + Mat m = cvarrToMat(ipl, true); + cvReleaseImage(&ipl); + free_image(copy); + return m; +} + +image mat_to_image(Mat m) +{ + IplImage ipl = m; + image im = ipl_to_image(&ipl); + rgbgr_image(im); + return im; +} + +void *open_video_stream(const char *f, int c, int w, int h, int fps) +{ + VideoCapture *cap; + if(f) cap = new VideoCapture(f); + else cap = new VideoCapture(c); + if(!cap->isOpened()) return 0; + if(w) cap->set(CV_CAP_PROP_FRAME_WIDTH, w); + if(h) cap->set(CV_CAP_PROP_FRAME_HEIGHT, w); + if(fps) cap->set(CV_CAP_PROP_FPS, w); + return (void *) cap; +} + +image get_image_from_stream(void *p) +{ + VideoCapture *cap = (VideoCapture *)p; + Mat m; + *cap >> m; + if(m.empty()) return make_empty_image(0,0,0); + return mat_to_image(m); +} + +image load_image_cv(char *filename, int channels) +{ + int flag = -1; + if (channels == 0) flag = -1; + else if (channels == 1) flag = 0; + else if (channels == 3) flag = 1; + else { + fprintf(stderr, "OpenCV can't force load with %d channels\n", channels); + } + Mat m; + m = imread(filename, flag); + if(!m.data){ + fprintf(stderr, "Cannot load image \"%s\"\n", filename); + char buff[256]; + sprintf(buff, "echo %s >> bad.list", filename); + system(buff); + return make_image(10,10,3); + //exit(0); + } + image im = mat_to_image(m); + return im; +} + +int show_image_cv(image im, const char* name, int ms) +{ + Mat m = image_to_mat(im); + imshow(name, m); + int c = waitKey(ms); + if (c != -1) c = c%256; + return c; +} + +void make_window(char *name, int w, int h, int fullscreen) +{ + namedWindow(name, WINDOW_NORMAL); + if (fullscreen) { + setWindowProperty(name, CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); + } else { + resizeWindow(name, w, h); + if(strcmp(name, "Demo") == 0) moveWindow(name, 0, 0); + } +} + +} + +#endif diff --git a/workloads/realworld/standard/darknet/src/iseg_layer.c b/workloads/realworld/standard/darknet/src/iseg_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..a1b822a5797a6d04b0f3756f106cb2b20ba31a5b --- /dev/null +++ b/workloads/realworld/standard/darknet/src/iseg_layer.c @@ -0,0 +1,225 @@ +#include "iseg_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_iseg_layer(int batch, int w, int h, int classes, int ids) +{ + layer l = {0}; + l.type = ISEG; + + l.h = h; + l.w = w; + l.c = classes + ids; + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.batch = batch; + l.extra = ids; + l.cost = calloc(1, sizeof(float)); + l.outputs = h*w*l.c; + l.inputs = l.outputs; + l.truths = 90*(l.w*l.h+1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + + l.counts = calloc(90, sizeof(int)); + l.sums = calloc(90, sizeof(float*)); + if(ids){ + int i; + for(i = 0; i < 90; ++i){ + l.sums[i] = calloc(ids, sizeof(float)); + } + } + + l.forward = forward_iseg_layer; + l.backward = backward_iseg_layer; +#ifdef GPU + l.forward_gpu = forward_iseg_layer_gpu; + l.backward_gpu = backward_iseg_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "iseg\n"); + srand(0); + + return l; +} + +void resize_iseg_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->c; + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +void forward_iseg_layer(const layer l, network net) +{ + + double time = what_time_is_it_now(); + int i,b,j,k; + int ids = l.extra; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + int index = b*l.outputs; + activate_array(l.output + index, l.classes*l.w*l.h, LOGISTIC); + } +#endif + + for (b = 0; b < l.batch; ++b){ + // a priori, each pixel has no class + for(i = 0; i < l.classes; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + i*l.w*l.h + k; + l.delta[index] = 0 - l.output[index]; + } + } + + // a priori, embedding should be small magnitude + for(i = 0; i < ids; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + (i+l.classes)*l.w*l.h + k; + l.delta[index] = .1 * (0 - l.output[index]); + } + } + + + memset(l.counts, 0, 90*sizeof(int)); + for(i = 0; i < 90; ++i){ + fill_cpu(ids, 0, l.sums[i], 1); + + int c = net.truth[b*l.truths + i*(l.w*l.h+1)]; + if(c < 0) break; + // add up metric embeddings for each instance + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + c*l.w*l.h + k; + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + l.delta[index] = v - l.output[index]; + axpy_cpu(ids, 1, l.output + b*l.outputs + l.classes*l.w*l.h + k, l.w*l.h, l.sums[i], 1); + ++l.counts[i]; + } + } + } + + float *mse = calloc(90, sizeof(float)); + for(i = 0; i < 90; ++i){ + int c = net.truth[b*l.truths + i*(l.w*l.h+1)]; + if(c < 0) break; + for(k = 0; k < l.w*l.h; ++k){ + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + int z; + float sum = 0; + for(z = 0; z < ids; ++z){ + int index = b*l.outputs + (l.classes + z)*l.w*l.h + k; + sum += pow(l.sums[i][z]/l.counts[i] - l.output[index], 2); + } + mse[i] += sum; + } + } + mse[i] /= l.counts[i]; + } + + // Calculate average embedding + for(i = 0; i < 90; ++i){ + if(!l.counts[i]) continue; + scal_cpu(ids, 1.f/l.counts[i], l.sums[i], 1); + if(b == 0 && net.gpu_index == 0){ + printf("%4d, %6.3f, ", l.counts[i], mse[i]); + for(j = 0; j < ids; ++j){ + printf("%6.3f,", l.sums[i][j]); + } + printf("\n"); + } + } + free(mse); + + // Calculate embedding loss + for(i = 0; i < 90; ++i){ + if(!l.counts[i]) continue; + for(k = 0; k < l.w*l.h; ++k){ + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + for(j = 0; j < 90; ++j){ + if(!l.counts[j])continue; + int z; + for(z = 0; z < ids; ++z){ + int index = b*l.outputs + (l.classes + z)*l.w*l.h + k; + float diff = l.sums[j][z] - l.output[index]; + if (j == i) l.delta[index] += diff < 0? -.1 : .1; + else l.delta[index] += -(diff < 0? -.1 : .1); + } + } + } + } + } + + for(i = 0; i < ids; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + (i+l.classes)*l.w*l.h + k; + l.delta[index] *= .01; + } + } + } + + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("took %lf sec\n", what_time_is_it_now() - time); +} + +void backward_iseg_layer(const layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_iseg_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b; + for (b = 0; b < l.batch; ++b){ + activate_array_gpu(l.output_gpu + b*l.outputs, l.classes*l.w*l.h, LOGISTIC); + //if(l.extra) activate_array_gpu(l.output_gpu + b*l.outputs + l.classes*l.w*l.h, l.extra*l.w*l.h, LOGISTIC); + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_iseg_layer(l, net); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_iseg_layer_gpu(const layer l, network net) +{ + int b; + for (b = 0; b < l.batch; ++b){ + //if(l.extra) gradient_array_gpu(l.output_gpu + b*l.outputs + l.classes*l.w*l.h, l.extra*l.w*l.h, LOGISTIC, l.delta_gpu + b*l.outputs + l.classes*l.w*l.h); + } + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/standard/darknet/src/iseg_layer.h b/workloads/realworld/standard/darknet/src/iseg_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..dd8e64e023caf1e1fd0c30af57f9983f24ddd691 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/iseg_layer.h @@ -0,0 +1,19 @@ +#ifndef ISEG_LAYER_H +#define ISEG_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_iseg_layer(int batch, int w, int h, int classes, int ids); +void forward_iseg_layer(const layer l, network net); +void backward_iseg_layer(const layer l, network net); +void resize_iseg_layer(layer *l, int w, int h); +int iseg_num_detections(layer l, float thresh); + +#ifdef GPU +void forward_iseg_layer_gpu(const layer l, network net); +void backward_iseg_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/standard/darknet/src/l2norm_layer.c b/workloads/realworld/standard/darknet/src/l2norm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..0cf7f844170cb2c3dba15be94d4a435aaa63067c --- /dev/null +++ b/workloads/realworld/standard/darknet/src/l2norm_layer.c @@ -0,0 +1,63 @@ +#include "l2norm_layer.h" +#include "activations.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +layer make_l2norm_layer(int batch, int inputs) +{ + fprintf(stderr, "l2norm %4d\n", inputs); + layer l = {0}; + l.type = L2NORM; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.output = calloc(inputs*batch, sizeof(float)); + l.scales = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + + l.forward = forward_l2norm_layer; + l.backward = backward_l2norm_layer; + #ifdef GPU + l.forward_gpu = forward_l2norm_layer_gpu; + l.backward_gpu = backward_l2norm_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.scales_gpu = cuda_make_array(l.output, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_l2norm_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + l2normalize_cpu(l.output, l.scales, l.batch, l.out_c, l.out_w*l.out_h); +} + +void backward_l2norm_layer(const layer l, network net) +{ + //axpy_cpu(l.inputs*l.batch, 1, l.scales, 1, l.delta, 1); + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_l2norm_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + l2normalize_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_w*l.out_h); +} + +void backward_l2norm_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.scales_gpu, 1, l.delta_gpu, 1); + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/standard/darknet/src/l2norm_layer.h b/workloads/realworld/standard/darknet/src/l2norm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1ca6f710f017f2857f566eaed90634698d72b26d --- /dev/null +++ b/workloads/realworld/standard/darknet/src/l2norm_layer.h @@ -0,0 +1,15 @@ +#ifndef L2NORM_LAYER_H +#define L2NORM_LAYER_H +#include "layer.h" +#include "network.h" + +layer make_l2norm_layer(int batch, int inputs); +void forward_l2norm_layer(const layer l, network net); +void backward_l2norm_layer(const layer l, network net); + +#ifdef GPU +void forward_l2norm_layer_gpu(const layer l, network net); +void backward_l2norm_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/standard/darknet/src/layer.c b/workloads/realworld/standard/darknet/src/layer.c new file mode 100644 index 0000000000000000000000000000000000000000..3bffe436f06a455e2d1043158ff6da9b07bbb61f --- /dev/null +++ b/workloads/realworld/standard/darknet/src/layer.c @@ -0,0 +1,97 @@ +#include "layer.h" +#include "cuda_dark.h" + +#include + +void free_layer(layer l) +{ + if(l.type == DROPOUT){ + if(l.rand) free(l.rand); +#ifdef GPU + if(l.rand_gpu) cuda_free(l.rand_gpu); +#endif + return; + } + if(l.cweights) free(l.cweights); + if(l.indexes) free(l.indexes); + if(l.input_layers) free(l.input_layers); + if(l.input_sizes) free(l.input_sizes); + if(l.map) free(l.map); + if(l.rand) free(l.rand); + if(l.cost) free(l.cost); + if(l.state) free(l.state); + if(l.prev_state) free(l.prev_state); + if(l.forgot_state) free(l.forgot_state); + if(l.forgot_delta) free(l.forgot_delta); + if(l.state_delta) free(l.state_delta); + if(l.concat) free(l.concat); + if(l.concat_delta) free(l.concat_delta); + if(l.binary_weights) free(l.binary_weights); + if(l.biases) free(l.biases); + if(l.bias_updates) free(l.bias_updates); + if(l.scales) free(l.scales); + if(l.scale_updates) free(l.scale_updates); + if(l.weights) free(l.weights); + if(l.weight_updates) free(l.weight_updates); + if(l.delta) free(l.delta); + if(l.output) free(l.output); + if(l.squared) free(l.squared); + if(l.norms) free(l.norms); + if(l.spatial_mean) free(l.spatial_mean); + if(l.mean) free(l.mean); + if(l.variance) free(l.variance); + if(l.mean_delta) free(l.mean_delta); + if(l.variance_delta) free(l.variance_delta); + if(l.rolling_mean) free(l.rolling_mean); + if(l.rolling_variance) free(l.rolling_variance); + if(l.x) free(l.x); + if(l.x_norm) free(l.x_norm); + if(l.m) free(l.m); + if(l.v) free(l.v); + if(l.z_cpu) free(l.z_cpu); + if(l.r_cpu) free(l.r_cpu); + if(l.h_cpu) free(l.h_cpu); + if(l.binary_input) free(l.binary_input); + +#ifdef GPU + if(l.indexes_gpu) cuda_free((float *)l.indexes_gpu); + + if(l.z_gpu) cuda_free(l.z_gpu); + if(l.r_gpu) cuda_free(l.r_gpu); + if(l.h_gpu) cuda_free(l.h_gpu); + if(l.m_gpu) cuda_free(l.m_gpu); + if(l.v_gpu) cuda_free(l.v_gpu); + if(l.prev_state_gpu) cuda_free(l.prev_state_gpu); + if(l.forgot_state_gpu) cuda_free(l.forgot_state_gpu); + if(l.forgot_delta_gpu) cuda_free(l.forgot_delta_gpu); + if(l.state_gpu) cuda_free(l.state_gpu); + if(l.state_delta_gpu) cuda_free(l.state_delta_gpu); + if(l.gate_gpu) cuda_free(l.gate_gpu); + if(l.gate_delta_gpu) cuda_free(l.gate_delta_gpu); + if(l.save_gpu) cuda_free(l.save_gpu); + if(l.save_delta_gpu) cuda_free(l.save_delta_gpu); + if(l.concat_gpu) cuda_free(l.concat_gpu); + if(l.concat_delta_gpu) cuda_free(l.concat_delta_gpu); + if(l.binary_input_gpu) cuda_free(l.binary_input_gpu); + if(l.binary_weights_gpu) cuda_free(l.binary_weights_gpu); + if(l.mean_gpu) cuda_free(l.mean_gpu); + if(l.variance_gpu) cuda_free(l.variance_gpu); + if(l.rolling_mean_gpu) cuda_free(l.rolling_mean_gpu); + if(l.rolling_variance_gpu) cuda_free(l.rolling_variance_gpu); + if(l.variance_delta_gpu) cuda_free(l.variance_delta_gpu); + if(l.mean_delta_gpu) cuda_free(l.mean_delta_gpu); + if(l.x_gpu) cuda_free(l.x_gpu); + if(l.x_norm_gpu) cuda_free(l.x_norm_gpu); + if(l.weights_gpu) cuda_free(l.weights_gpu); + if(l.weight_updates_gpu) cuda_free(l.weight_updates_gpu); + if(l.biases_gpu) cuda_free(l.biases_gpu); + if(l.bias_updates_gpu) cuda_free(l.bias_updates_gpu); + if(l.scales_gpu) cuda_free(l.scales_gpu); + if(l.scale_updates_gpu) cuda_free(l.scale_updates_gpu); + if(l.output_gpu) cuda_free(l.output_gpu); + if(l.delta_gpu) cuda_free(l.delta_gpu); + if(l.rand_gpu) cuda_free(l.rand_gpu); + if(l.squared_gpu) cuda_free(l.squared_gpu); + if(l.norms_gpu) cuda_free(l.norms_gpu); +#endif +} diff --git a/workloads/realworld/standard/darknet/src/layer.h b/workloads/realworld/standard/darknet/src/layer.h new file mode 100644 index 0000000000000000000000000000000000000000..af6cd2ab5054f5ef3bbdfca2da45f08d710a7bd0 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/layer.h @@ -0,0 +1 @@ +#include "darknet.h" diff --git a/workloads/realworld/standard/darknet/src/list.c b/workloads/realworld/standard/darknet/src/list.c new file mode 100644 index 0000000000000000000000000000000000000000..0e4165d37800e1b4c7c33992cd64a6634fe4688c --- /dev/null +++ b/workloads/realworld/standard/darknet/src/list.c @@ -0,0 +1,92 @@ +#include +#include +#include "list.h" + +list *make_list() +{ + list *l = malloc(sizeof(list)); + l->size = 0; + l->front = 0; + l->back = 0; + return l; +} + +/* +void transfer_node(list *s, list *d, node *n) +{ + node *prev, *next; + prev = n->prev; + next = n->next; + if(prev) prev->next = next; + if(next) next->prev = prev; + --s->size; + if(s->front == n) s->front = next; + if(s->back == n) s->back = prev; +} +*/ + +void *list_pop(list *l){ + if(!l->back) return 0; + node *b = l->back; + void *val = b->val; + l->back = b->prev; + if(l->back) l->back->next = 0; + free(b); + --l->size; + + return val; +} + +void list_insert(list *l, void *val) +{ + node *new = malloc(sizeof(node)); + new->val = val; + new->next = 0; + + if(!l->back){ + l->front = new; + new->prev = 0; + }else{ + l->back->next = new; + new->prev = l->back; + } + l->back = new; + ++l->size; +} + +void free_node(node *n) +{ + node *next; + while(n) { + next = n->next; + free(n); + n = next; + } +} + +void free_list(list *l) +{ + free_node(l->front); + free(l); +} + +void free_list_contents(list *l) +{ + node *n = l->front; + while(n){ + free(n->val); + n = n->next; + } +} + +void **list_to_array(list *l) +{ + void **a = calloc(l->size, sizeof(void*)); + int count = 0; + node *n = l->front; + while(n){ + a[count++] = n->val; + n = n->next; + } + return a; +} diff --git a/workloads/realworld/standard/darknet/src/list.h b/workloads/realworld/standard/darknet/src/list.h new file mode 100644 index 0000000000000000000000000000000000000000..6b445c717c2937b9c90536654ba82a33e14bb4ec --- /dev/null +++ b/workloads/realworld/standard/darknet/src/list.h @@ -0,0 +1,13 @@ +#ifndef LIST_H +#define LIST_H +#include "darknet.h" + +list *make_list(); +int list_find(list *l, void *val); + +void list_insert(list *, void *); + + +void free_list_contents(list *l); + +#endif diff --git a/workloads/realworld/standard/darknet/src/local_layer.c b/workloads/realworld/standard/darknet/src/local_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..74f6910a8fd751ad9f3b41fc67be737399a151d0 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/local_layer.c @@ -0,0 +1,293 @@ +#include "local_layer.h" +#include "utils.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" +#include +#include + +int local_out_height(local_layer l) +{ + int h = l.h; + if (!l.pad) h -= l.size; + else h -= 1; + return h/l.stride + 1; +} + +int local_out_width(local_layer l) +{ + int w = l.w; + if (!l.pad) w -= l.size; + else w -= 1; + return w/l.stride + 1; +} + +local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation) +{ + int i; + local_layer l = {0}; + l.type = LOCAL; + + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.batch = batch; + l.stride = stride; + l.size = size; + l.pad = pad; + + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int locations = out_h*out_w; + l.out_h = out_h; + l.out_w = out_w; + l.out_c = n; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = l.w * l.h * l.c; + + l.weights = calloc(c*n*size*size*locations, sizeof(float)); + l.weight_updates = calloc(c*n*size*size*locations, sizeof(float)); + + l.biases = calloc(l.outputs, sizeof(float)); + l.bias_updates = calloc(l.outputs, sizeof(float)); + + // float scale = 1./sqrt(size*size*c); + float scale = sqrt(2./(size*size*c)); + for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1); + + l.output = calloc(l.batch*out_h * out_w * n, sizeof(float)); + l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float)); + + l.workspace_size = out_h*out_w*size*size*c; + + l.forward = forward_local_layer; + l.backward = backward_local_layer; + l.update = update_local_layer; + +#ifdef GPU + l.forward_gpu = forward_local_layer_gpu; + l.backward_gpu = backward_local_layer_gpu; + l.update_gpu = update_local_layer_gpu; + + l.weights_gpu = cuda_make_array(l.weights, c*n*size*size*locations); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size*locations); + + l.biases_gpu = cuda_make_array(l.biases, l.outputs); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, l.outputs); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + +#endif + l.activation = activation; + + fprintf(stderr, "Local Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); + + return l; +} + +void forward_local_layer(const local_layer l, network net) +{ + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int i, j; + int locations = out_h * out_w; + + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.outputs, l.biases, 1, l.output + i*l.outputs, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input + i*l.w*l.h*l.c; + im2col_cpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + float *output = l.output + i*l.outputs; + for(j = 0; j < locations; ++j){ + float *a = l.weights + j*l.size*l.size*l.c*l.n; + float *b = net.workspace + j; + float *c = output + j; + + int m = l.n; + int n = 1; + int k = l.size*l.size*l.c; + + gemm(0,0,m,n,k,1,a,k,b,locations,1,c,locations); + } + } + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_local_layer(local_layer l, network net) +{ + int i, j; + int locations = l.out_w*l.out_h; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + for(i = 0; i < l.batch; ++i){ + axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input + i*l.w*l.h*l.c; + im2col_cpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + + for(j = 0; j < locations; ++j){ + float *a = l.delta + i*l.outputs + j; + float *b = net.workspace + j; + float *c = l.weight_updates + j*l.size*l.size*l.c*l.n; + int m = l.n; + int n = l.size*l.size*l.c; + int k = 1; + + gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n); + } + + if(net.delta){ + for(j = 0; j < locations; ++j){ + float *a = l.weights + j*l.size*l.size*l.c*l.n; + float *b = l.delta + i*l.outputs + j; + float *c = net.workspace + j; + + int m = l.size*l.size*l.c; + int n = 1; + int k = l.n; + + gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations); + } + + col2im_cpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta+i*l.c*l.h*l.w); + } + } +} + +void update_local_layer(local_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.outputs, momentum, l.bias_updates, 1); + + axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(size, momentum, l.weight_updates, 1); +} + +#ifdef GPU + +void forward_local_layer_gpu(const local_layer l, network net) +{ + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int i, j; + int locations = out_h * out_w; + + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.outputs, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input_gpu + i*l.w*l.h*l.c; + im2col_gpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + float *output = l.output_gpu + i*l.outputs; + for(j = 0; j < locations; ++j){ + float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n; + float *b = net.workspace + j; + float *c = output + j; + + int m = l.n; + int n = 1; + int k = l.size*l.size*l.c; + + gemm_gpu(0,0,m,n,k,1,a,k,b,locations,1,c,locations); + } + } + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_local_layer_gpu(local_layer l, network net) +{ + int i, j; + int locations = l.out_w*l.out_h; + + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + for(i = 0; i < l.batch; ++i){ + axpy_gpu(l.outputs, 1, l.delta_gpu + i*l.outputs, 1, l.bias_updates_gpu, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input_gpu + i*l.w*l.h*l.c; + im2col_gpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + + for(j = 0; j < locations; ++j){ + float *a = l.delta_gpu + i*l.outputs + j; + float *b = net.workspace + j; + float *c = l.weight_updates_gpu + j*l.size*l.size*l.c*l.n; + int m = l.n; + int n = l.size*l.size*l.c; + int k = 1; + + gemm_gpu(0,1,m,n,k,1,a,locations,b,locations,1,c,n); + } + + if(net.delta_gpu){ + for(j = 0; j < locations; ++j){ + float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n; + float *b = l.delta_gpu + i*l.outputs + j; + float *c = net.workspace + j; + + int m = l.size*l.size*l.c; + int n = 1; + int k = l.n; + + gemm_gpu(1,0,m,n,k,1,a,m,b,locations,0,c,locations); + } + + col2im_gpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta_gpu+i*l.c*l.h*l.w); + } + } +} + +void update_local_layer_gpu(local_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + axpy_gpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.outputs, momentum, l.bias_updates_gpu, 1); + + axpy_gpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(size, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(size, momentum, l.weight_updates_gpu, 1); +} + +void pull_local_layer(local_layer l) +{ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + cuda_pull_array(l.weights_gpu, l.weights, size); + cuda_pull_array(l.biases_gpu, l.biases, l.outputs); +} + +void push_local_layer(local_layer l) +{ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + cuda_push_array(l.weights_gpu, l.weights, size); + cuda_push_array(l.biases_gpu, l.biases, l.outputs); +} +#endif diff --git a/workloads/realworld/standard/darknet/src/local_layer.h b/workloads/realworld/standard/darknet/src/local_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..56805c4f1cb51fed9ef0e771d2befb430df60df5 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/local_layer.h @@ -0,0 +1,31 @@ +#ifndef LOCAL_LAYER_H +#define LOCAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +typedef layer local_layer; + +#ifdef GPU +void forward_local_layer_gpu(local_layer layer, network net); +void backward_local_layer_gpu(local_layer layer, network net); +void update_local_layer_gpu(local_layer layer, update_args a); + +void push_local_layer(local_layer layer); +void pull_local_layer(local_layer layer); +#endif + +local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation); + +void forward_local_layer(const local_layer layer, network net); +void backward_local_layer(local_layer layer, network net); +void update_local_layer(local_layer layer, update_args a); + +void bias_output(float *output, float *biases, int batch, int n, int size); +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); + +#endif + diff --git a/workloads/realworld/standard/darknet/src/logistic_layer.c b/workloads/realworld/standard/darknet/src/logistic_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..8d98986f67a17da70df75e3d56a46615cfc8eaf1 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/logistic_layer.c @@ -0,0 +1,71 @@ +#include "logistic_layer.h" +#include "activations.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +layer make_logistic_layer(int batch, int inputs) +{ + fprintf(stderr, "logistic x entropy %4d\n", inputs); + layer l = {0}; + l.type = LOGXENT; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.loss = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_logistic_layer; + l.backward = backward_logistic_layer; + #ifdef GPU + l.forward_gpu = forward_logistic_layer_gpu; + l.backward_gpu = backward_logistic_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.loss_gpu = cuda_make_array(l.loss, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_logistic_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + activate_array(l.output, l.outputs*l.batch, LOGISTIC); + if(net.truth){ + logistic_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_logistic_layer(const layer l, network net) +{ + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_logistic_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, LOGISTIC); + if(net.truth){ + logistic_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu); + cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_logistic_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/standard/darknet/src/logistic_layer.h b/workloads/realworld/standard/darknet/src/logistic_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9c25bee3c2a6eb1013ed43ce0c4aeaa63b7a293f --- /dev/null +++ b/workloads/realworld/standard/darknet/src/logistic_layer.h @@ -0,0 +1,15 @@ +#ifndef LOGISTIC_LAYER_H +#define LOGISTIC_LAYER_H +#include "layer.h" +#include "network.h" + +layer make_logistic_layer(int batch, int inputs); +void forward_logistic_layer(const layer l, network net); +void backward_logistic_layer(const layer l, network net); + +#ifdef GPU +void forward_logistic_layer_gpu(const layer l, network net); +void backward_logistic_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/standard/darknet/src/lstm_layer.c b/workloads/realworld/standard/darknet/src/lstm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..16f43914df8f35fb7f7b16bd93ff1d83f513dda0 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/lstm_layer.c @@ -0,0 +1,626 @@ +#include "lstm_layer.h" +#include "connected_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam) +{ + fprintf(stderr, "LSTM Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = { 0 }; + l.batch = batch; + l.type = LSTM; + l.steps = steps; + l.inputs = inputs; + + l.uf = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uf) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uf->batch = batch; + + l.ui = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ui) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ui->batch = batch; + + l.ug = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ug) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ug->batch = batch; + + l.uo = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uo) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uo->batch = batch; + + l.wf = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wf) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wf->batch = batch; + + l.wi = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wi) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wi->batch = batch; + + l.wg = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wg) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wg->batch = batch; + + l.wo = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wo) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wo->batch = batch; + + l.batch_normalize = batch_normalize; + l.outputs = outputs; + + l.output = calloc(outputs*batch*steps, sizeof(float)); + l.state = calloc(outputs*batch, sizeof(float)); + + l.forward = forward_lstm_layer; + l.update = update_lstm_layer; + + l.prev_state_cpu = calloc(batch*outputs, sizeof(float)); + l.prev_cell_cpu = calloc(batch*outputs, sizeof(float)); + l.cell_cpu = calloc(batch*outputs*steps, sizeof(float)); + + l.f_cpu = calloc(batch*outputs, sizeof(float)); + l.i_cpu = calloc(batch*outputs, sizeof(float)); + l.g_cpu = calloc(batch*outputs, sizeof(float)); + l.o_cpu = calloc(batch*outputs, sizeof(float)); + l.c_cpu = calloc(batch*outputs, sizeof(float)); + l.h_cpu = calloc(batch*outputs, sizeof(float)); + l.temp_cpu = calloc(batch*outputs, sizeof(float)); + l.temp2_cpu = calloc(batch*outputs, sizeof(float)); + l.temp3_cpu = calloc(batch*outputs, sizeof(float)); + l.dc_cpu = calloc(batch*outputs, sizeof(float)); + l.dh_cpu = calloc(batch*outputs, sizeof(float)); + +#ifdef GPU + l.forward_gpu = forward_lstm_layer_gpu; + l.backward_gpu = backward_lstm_layer_gpu; + l.update_gpu = update_lstm_layer_gpu; + + l.output_gpu = cuda_make_array(0, batch*outputs*steps); + l.delta_gpu = cuda_make_array(0, batch*l.outputs*steps); + + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.prev_cell_gpu = cuda_make_array(0, batch*outputs); + l.cell_gpu = cuda_make_array(0, batch*outputs*steps); + + l.f_gpu = cuda_make_array(0, batch*outputs); + l.i_gpu = cuda_make_array(0, batch*outputs); + l.g_gpu = cuda_make_array(0, batch*outputs); + l.o_gpu = cuda_make_array(0, batch*outputs); + l.c_gpu = cuda_make_array(0, batch*outputs); + l.h_gpu = cuda_make_array(0, batch*outputs); + l.temp_gpu = cuda_make_array(0, batch*outputs); + l.temp2_gpu = cuda_make_array(0, batch*outputs); + l.temp3_gpu = cuda_make_array(0, batch*outputs); + l.dc_gpu = cuda_make_array(0, batch*outputs); + l.dh_gpu = cuda_make_array(0, batch*outputs); +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.wf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wf->out_c, l.wf->out_h, l.wf->out_w); + cudnnSetTensor4dDescriptor(l.wi->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wi->out_c, l.wi->out_h, l.wi->out_w); + cudnnSetTensor4dDescriptor(l.wg->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wg->out_c, l.wg->out_h, l.wg->out_w); + cudnnSetTensor4dDescriptor(l.wo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wo->out_c, l.wo->out_h, l.wo->out_w); + + cudnnSetTensor4dDescriptor(l.uf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uf->out_c, l.uf->out_h, l.uf->out_w); + cudnnSetTensor4dDescriptor(l.ui->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ui->out_c, l.ui->out_h, l.ui->out_w); + cudnnSetTensor4dDescriptor(l.ug->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ug->out_c, l.ug->out_h, l.ug->out_w); + cudnnSetTensor4dDescriptor(l.uo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uo->out_c, l.uo->out_h, l.uo->out_w); +#endif + +#endif + + return l; +} + +void update_lstm_layer(layer l, update_args a) +{ + update_connected_layer(*(l.wf), a); + update_connected_layer(*(l.wi), a); + update_connected_layer(*(l.wg), a); + update_connected_layer(*(l.wo), a); + update_connected_layer(*(l.uf), a); + update_connected_layer(*(l.ui), a); + update_connected_layer(*(l.ug), a); + update_connected_layer(*(l.uo), a); +} + +void forward_lstm_layer(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + fill_cpu(l.outputs * l.batch * l.steps, 0, wf.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wi.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wg.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wo.delta, 1); + + fill_cpu(l.outputs * l.batch * l.steps, 0, uf.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ui.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ug.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, uo.delta, 1); + if (state.train) { + fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input = l.h_cpu; + forward_connected_layer(wf, s); + forward_connected_layer(wi, s); + forward_connected_layer(wg, s); + forward_connected_layer(wo, s); + + s.input = state.input; + forward_connected_layer(uf, s); + forward_connected_layer(ui, s); + forward_connected_layer(ug, s); + forward_connected_layer(uo, s); + + copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); + + copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); + + copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); + + copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); + + activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.g_cpu, l.outputs*l.batch, TANH); + activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.c_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, l.temp_cpu, 1, l.c_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.h_cpu, 1); + activate_array(l.h_cpu, l.outputs*l.batch, TANH); + mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.h_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.cell_cpu, 1); + copy_cpu(l.outputs*l.batch, l.h_cpu, 1, l.output, 1); + + state.input += l.inputs*l.batch; + l.output += l.outputs*l.batch; + l.cell_cpu += l.outputs*l.batch; + + increment_layer(&wf, 1); + increment_layer(&wi, 1); + increment_layer(&wg, 1); + increment_layer(&wo, 1); + + increment_layer(&uf, 1); + increment_layer(&ui, 1); + increment_layer(&ug, 1); + increment_layer(&uo, 1); + } +} + +void backward_lstm_layer(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + increment_layer(&wf, l.steps - 1); + increment_layer(&wi, l.steps - 1); + increment_layer(&wg, l.steps - 1); + increment_layer(&wo, l.steps - 1); + + increment_layer(&uf, l.steps - 1); + increment_layer(&ui, l.steps - 1); + increment_layer(&ug, l.steps - 1); + increment_layer(&uo, l.steps - 1); + + state.input += l.inputs*l.batch*(l.steps - 1); + if (state.delta) state.delta += l.inputs*l.batch*(l.steps - 1); + + l.output += l.outputs*l.batch*(l.steps - 1); + l.cell_cpu += l.outputs*l.batch*(l.steps - 1); + l.delta += l.outputs*l.batch*(l.steps - 1); + + for (i = l.steps - 1; i >= 0; --i) { + if (i != 0) copy_cpu(l.outputs*l.batch, l.cell_cpu - l.outputs*l.batch, 1, l.prev_cell_cpu, 1); + copy_cpu(l.outputs*l.batch, l.cell_cpu, 1, l.c_cpu, 1); + if (i != 0) copy_cpu(l.outputs*l.batch, l.output - l.outputs*l.batch, 1, l.prev_state_cpu, 1); + copy_cpu(l.outputs*l.batch, l.output, 1, l.h_cpu, 1); + + l.dh_cpu = (i == 0) ? 0 : l.delta - l.outputs*l.batch; + + copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); + + copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); + + copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); + + copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); + + activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.g_cpu, l.outputs*l.batch, TANH); + activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.delta, 1, l.temp3_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); + activate_array(l.temp_cpu, l.outputs*l.batch, TANH); + + copy_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp2_cpu, 1); + mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.temp2_cpu, 1); + + gradient_array(l.temp_cpu, l.outputs*l.batch, TANH, l.temp2_cpu); + axpy_cpu(l.outputs*l.batch, 1, l.dc_cpu, 1, l.temp2_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); + activate_array(l.temp_cpu, l.outputs*l.batch, TANH); + mul_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp_cpu, 1); + gradient_array(l.o_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wo.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wo, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uo.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(uo, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); + gradient_array(l.g_cpu, l.outputs*l.batch, TANH, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wg.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wg, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ug.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(ug, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); + gradient_array(l.i_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wi.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wi, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ui.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(ui, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.prev_cell_cpu, 1, l.temp_cpu, 1); + gradient_array(l.f_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wf.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wf, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uf.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(uf, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.temp_cpu, 1); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, l.dc_cpu, 1); + + state.input -= l.inputs*l.batch; + if (state.delta) state.delta -= l.inputs*l.batch; + l.output -= l.outputs*l.batch; + l.cell_cpu -= l.outputs*l.batch; + l.delta -= l.outputs*l.batch; + + increment_layer(&wf, -1); + increment_layer(&wi, -1); + increment_layer(&wg, -1); + increment_layer(&wo, -1); + + increment_layer(&uf, -1); + increment_layer(&ui, -1); + increment_layer(&ug, -1); + increment_layer(&uo, -1); + } +} + +#ifdef GPU +void update_lstm_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.wf), a); + update_connected_layer_gpu(*(l.wi), a); + update_connected_layer_gpu(*(l.wg), a); + update_connected_layer_gpu(*(l.wo), a); + update_connected_layer_gpu(*(l.uf), a); + update_connected_layer_gpu(*(l.ui), a); + update_connected_layer_gpu(*(l.ug), a); + update_connected_layer_gpu(*(l.uo), a); +} + +void forward_lstm_layer_gpu(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + fill_gpu(l.outputs * l.batch * l.steps, 0, wf.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wi.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wg.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wo.delta_gpu, 1); + + fill_gpu(l.outputs * l.batch * l.steps, 0, uf.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ui.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ug.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, uo.delta_gpu, 1); + if (state.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = l.h_gpu; + forward_connected_layer_gpu(wf, s); + forward_connected_layer_gpu(wi, s); + forward_connected_layer_gpu(wg, s); + forward_connected_layer_gpu(wo, s); + + s.input_gpu = state.input_gpu; + forward_connected_layer_gpu(uf, s); + forward_connected_layer_gpu(ui, s); + forward_connected_layer_gpu(ug, s); + forward_connected_layer_gpu(uo, s); + + copy_gpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); + + copy_gpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); + + copy_gpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); + + copy_gpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); + + activate_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.g_gpu, l.outputs*l.batch, TANH); + activate_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.f_gpu, 1, l.c_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, l.temp_gpu, 1, l.c_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.h_gpu, 1); + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + mul_gpu(l.outputs*l.batch, l.o_gpu, 1, l.h_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.cell_gpu, 1); + copy_gpu(l.outputs*l.batch, l.h_gpu, 1, l.output_gpu, 1); + + state.input_gpu += l.inputs*l.batch; + l.output_gpu += l.outputs*l.batch; + l.cell_gpu += l.outputs*l.batch; + + increment_layer(&wf, 1); + increment_layer(&wi, 1); + increment_layer(&wg, 1); + increment_layer(&wo, 1); + + increment_layer(&uf, 1); + increment_layer(&ui, 1); + increment_layer(&ug, 1); + increment_layer(&uo, 1); + } +} + +void backward_lstm_layer_gpu(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + increment_layer(&wf, l.steps - 1); + increment_layer(&wi, l.steps - 1); + increment_layer(&wg, l.steps - 1); + increment_layer(&wo, l.steps - 1); + + increment_layer(&uf, l.steps - 1); + increment_layer(&ui, l.steps - 1); + increment_layer(&ug, l.steps - 1); + increment_layer(&uo, l.steps - 1); + + state.input_gpu += l.inputs*l.batch*(l.steps - 1); + if (state.delta_gpu) state.delta_gpu += l.inputs*l.batch*(l.steps - 1); + + l.output_gpu += l.outputs*l.batch*(l.steps - 1); + l.cell_gpu += l.outputs*l.batch*(l.steps - 1); + l.delta_gpu += l.outputs*l.batch*(l.steps - 1); + + for (i = l.steps - 1; i >= 0; --i) { + if (i != 0) copy_gpu(l.outputs*l.batch, l.cell_gpu - l.outputs*l.batch, 1, l.prev_cell_gpu, 1); + copy_gpu(l.outputs*l.batch, l.cell_gpu, 1, l.c_gpu, 1); + if (i != 0) copy_gpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, 1, l.prev_state_gpu, 1); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.h_gpu, 1); + + l.dh_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch; + + copy_gpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); + + copy_gpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); + + copy_gpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); + + copy_gpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); + + activate_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.g_gpu, l.outputs*l.batch, TANH); + activate_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, l.temp3_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.temp_gpu, 1); + activate_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH); + + copy_gpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp2_gpu, 1); + mul_gpu(l.outputs*l.batch, l.o_gpu, 1, l.temp2_gpu, 1); + + gradient_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH, l.temp2_gpu); + axpy_gpu(l.outputs*l.batch, 1, l.dc_gpu, 1, l.temp2_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.temp_gpu, 1); + activate_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH); + mul_gpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wo.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wo, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, uo.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(uo, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.g_gpu, l.outputs*l.batch, TANH, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wg.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wg, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, ug.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(ug, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wi.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wi, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, ui.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(ui, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.prev_cell_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wf.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wf, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, uf.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(uf, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.f_gpu, 1, l.temp_gpu, 1); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, l.dc_gpu, 1); + + state.input_gpu -= l.inputs*l.batch; + if (state.delta_gpu) state.delta_gpu -= l.inputs*l.batch; + l.output_gpu -= l.outputs*l.batch; + l.cell_gpu -= l.outputs*l.batch; + l.delta_gpu -= l.outputs*l.batch; + + increment_layer(&wf, -1); + increment_layer(&wi, -1); + increment_layer(&wg, -1); + increment_layer(&wo, -1); + + increment_layer(&uf, -1); + increment_layer(&ui, -1); + increment_layer(&ug, -1); + increment_layer(&uo, -1); + } +} +#endif diff --git a/workloads/realworld/standard/darknet/src/lstm_layer.h b/workloads/realworld/standard/darknet/src/lstm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..b9f07e6424b55c336e692aa6f1028d0bc1cae0b3 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/lstm_layer.h @@ -0,0 +1,20 @@ +#ifndef LSTM_LAYER_H +#define LSTM_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" +#define USET + +layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam); + +void forward_lstm_layer(layer l, network net); +void update_lstm_layer(layer l, update_args a); + +#ifdef GPU +void forward_lstm_layer_gpu(layer l, network net); +void backward_lstm_layer_gpu(layer l, network net); +void update_lstm_layer_gpu(layer l, update_args a); + +#endif +#endif diff --git a/workloads/realworld/standard/darknet/src/matrix.c b/workloads/realworld/standard/darknet/src/matrix.c new file mode 100644 index 0000000000000000000000000000000000000000..799916bff017180e220ae48748f495007793d168 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/matrix.c @@ -0,0 +1,196 @@ +#include "matrix.h" +#include "utils.h" +#include "blas.h" +#include +#include +#include +#include +#include + +void free_matrix(matrix m) +{ + int i; + for(i = 0; i < m.rows; ++i) free(m.vals[i]); + free(m.vals); +} + +float matrix_topk_accuracy(matrix truth, matrix guess, int k) +{ + int *indexes = calloc(k, sizeof(int)); + int n = truth.cols; + int i,j; + int correct = 0; + for(i = 0; i < truth.rows; ++i){ + top_k(guess.vals[i], n, k, indexes); + for(j = 0; j < k; ++j){ + int class = indexes[j]; + if(truth.vals[i][class]){ + ++correct; + break; + } + } + } + free(indexes); + return (float)correct/truth.rows; +} + +void scale_matrix(matrix m, float scale) +{ + int i,j; + for(i = 0; i < m.rows; ++i){ + for(j = 0; j < m.cols; ++j){ + m.vals[i][j] *= scale; + } + } +} + +matrix resize_matrix(matrix m, int size) +{ + int i; + if (m.rows == size) return m; + if (m.rows < size) { + m.vals = realloc(m.vals, size*sizeof(float*)); + for (i = m.rows; i < size; ++i) { + m.vals[i] = calloc(m.cols, sizeof(float)); + } + } else if (m.rows > size) { + for (i = size; i < m.rows; ++i) { + free(m.vals[i]); + } + m.vals = realloc(m.vals, size*sizeof(float*)); + } + m.rows = size; + return m; +} + +void matrix_add_matrix(matrix from, matrix to) +{ + assert(from.rows == to.rows && from.cols == to.cols); + int i,j; + for(i = 0; i < from.rows; ++i){ + for(j = 0; j < from.cols; ++j){ + to.vals[i][j] += from.vals[i][j]; + } + } +} + +matrix copy_matrix(matrix m) +{ + matrix c = {0}; + c.rows = m.rows; + c.cols = m.cols; + c.vals = calloc(c.rows, sizeof(float *)); + int i; + for(i = 0; i < c.rows; ++i){ + c.vals[i] = calloc(c.cols, sizeof(float)); + copy_cpu(c.cols, m.vals[i], 1, c.vals[i], 1); + } + return c; +} + +matrix make_matrix(int rows, int cols) +{ + int i; + matrix m; + m.rows = rows; + m.cols = cols; + m.vals = calloc(m.rows, sizeof(float *)); + for(i = 0; i < m.rows; ++i){ + m.vals[i] = calloc(m.cols, sizeof(float)); + } + return m; +} + +matrix hold_out_matrix(matrix *m, int n) +{ + int i; + matrix h; + h.rows = n; + h.cols = m->cols; + h.vals = calloc(h.rows, sizeof(float *)); + for(i = 0; i < n; ++i){ + int index = rand()%m->rows; + h.vals[i] = m->vals[index]; + m->vals[index] = m->vals[--(m->rows)]; + } + return h; +} + +float *pop_column(matrix *m, int c) +{ + float *col = calloc(m->rows, sizeof(float)); + int i, j; + for(i = 0; i < m->rows; ++i){ + col[i] = m->vals[i][c]; + for(j = c; j < m->cols-1; ++j){ + m->vals[i][j] = m->vals[i][j+1]; + } + } + --m->cols; + return col; +} + +matrix csv_to_matrix(char *filename) +{ + FILE *fp = fopen(filename, "r"); + if(!fp) file_error(filename); + + matrix m; + m.cols = -1; + + char *line; + + int n = 0; + int size = 1024; + m.vals = calloc(size, sizeof(float*)); + while((line = fgetl(fp))){ + if(m.cols == -1) m.cols = count_fields(line); + if(n == size){ + size *= 2; + m.vals = realloc(m.vals, size*sizeof(float*)); + } + m.vals[n] = parse_fields(line, m.cols); + free(line); + ++n; + } + m.vals = realloc(m.vals, n*sizeof(float*)); + m.rows = n; + return m; +} + +void matrix_to_csv(matrix m) +{ + int i, j; + + for(i = 0; i < m.rows; ++i){ + for(j = 0; j < m.cols; ++j){ + if(j > 0) printf(","); + printf("%.17g", m.vals[i][j]); + } + printf("\n"); + } +} + +void print_matrix(matrix m) +{ + int i, j; + printf("%d X %d Matrix:\n",m.rows, m.cols); + printf(" __"); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf("__ \n"); + + printf("| "); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf(" |\n"); + + for(i = 0; i < m.rows; ++i){ + printf("| "); + for(j = 0; j < m.cols; ++j){ + printf("%15.7f ", m.vals[i][j]); + } + printf(" |\n"); + } + printf("|__"); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf("__|\n"); +} diff --git a/workloads/realworld/standard/darknet/src/matrix.h b/workloads/realworld/standard/darknet/src/matrix.h new file mode 100644 index 0000000000000000000000000000000000000000..879acd70d26c084931b30067ddcc77057068e58c --- /dev/null +++ b/workloads/realworld/standard/darknet/src/matrix.h @@ -0,0 +1,13 @@ +#ifndef MATRIX_H +#define MATRIX_H +#include "darknet.h" + +matrix copy_matrix(matrix m); +void print_matrix(matrix m); + +matrix hold_out_matrix(matrix *m, int n); +matrix resize_matrix(matrix m, int size); + +float *pop_column(matrix *m, int c); + +#endif diff --git a/workloads/realworld/standard/darknet/src/maxpool_layer.c b/workloads/realworld/standard/darknet/src/maxpool_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..b54be838662ebfc53abc539da22413becc1805a3 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/maxpool_layer.c @@ -0,0 +1,127 @@ +#include "maxpool_layer.h" +#include "cuda_dark.h" +#include + +image get_maxpool_image(maxpool_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.c; + return float_to_image(w,h,c,l.output); +} + +image get_maxpool_delta(maxpool_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.c; + return float_to_image(w,h,c,l.delta); +} + +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding) +{ + maxpool_layer l = {0}; + l.type = MAXPOOL; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.pad = padding; + l.out_w = (w + padding - size)/stride + 1; + l.out_h = (h + padding - size)/stride + 1; + l.out_c = c; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = h*w*c; + l.size = size; + l.stride = stride; + int output_size = l.out_h * l.out_w * l.out_c * batch; + l.indexes = calloc(output_size, sizeof(int)); + l.output = calloc(output_size, sizeof(float)); + l.delta = calloc(output_size, sizeof(float)); + l.forward = forward_maxpool_layer; + l.backward = backward_maxpool_layer; + #ifdef GPU + l.forward_gpu = forward_maxpool_layer_gpu; + l.backward_gpu = backward_maxpool_layer_gpu; + l.indexes_gpu = cuda_make_int_array(0, output_size); + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); + #endif + fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); + return l; +} + +void resize_maxpool_layer(maxpool_layer *l, int w, int h) +{ + l->h = h; + l->w = w; + l->inputs = h*w*l->c; + + l->out_w = (w + l->pad - l->size)/l->stride + 1; + l->out_h = (h + l->pad - l->size)/l->stride + 1; + l->outputs = l->out_w * l->out_h * l->c; + int output_size = l->outputs * l->batch; + + l->indexes = realloc(l->indexes, output_size * sizeof(int)); + l->output = realloc(l->output, output_size * sizeof(float)); + l->delta = realloc(l->delta, output_size * sizeof(float)); + + #ifdef GPU + cuda_free((float *)l->indexes_gpu); + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->indexes_gpu = cuda_make_int_array(0, output_size); + l->output_gpu = cuda_make_array(l->output, output_size); + l->delta_gpu = cuda_make_array(l->delta, output_size); + #endif +} + +void forward_maxpool_layer(const maxpool_layer l, network net) +{ + int b,i,j,k,m,n; + int w_offset = -l.pad/2; + int h_offset = -l.pad/2; + + int h = l.out_h; + int w = l.out_w; + int c = l.c; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < c; ++k){ + for(i = 0; i < h; ++i){ + for(j = 0; j < w; ++j){ + int out_index = j + w*(i + h*(k + c*b)); + float max = -FLT_MAX; + int max_i = -1; + for(n = 0; n < l.size; ++n){ + for(m = 0; m < l.size; ++m){ + int cur_h = h_offset + i*l.stride + n; + int cur_w = w_offset + j*l.stride + m; + int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c)); + int valid = (cur_h >= 0 && cur_h < l.h && + cur_w >= 0 && cur_w < l.w); + float val = (valid != 0) ? net.input[index] : -FLT_MAX; + max_i = (val > max) ? index : max_i; + max = (val > max) ? val : max; + } + } + l.output[out_index] = max; + l.indexes[out_index] = max_i; + } + } + } + } +} + +void backward_maxpool_layer(const maxpool_layer l, network net) +{ + int i; + int h = l.out_h; + int w = l.out_w; + int c = l.c; + for(i = 0; i < h*w*c*l.batch; ++i){ + int index = l.indexes[i]; + net.delta[index] += l.delta[i]; + } +} + diff --git a/workloads/realworld/standard/darknet/src/maxpool_layer.h b/workloads/realworld/standard/darknet/src/maxpool_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..f01adb957e8bd8ce01a06e5a1ff14a988ae07149 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/maxpool_layer.h @@ -0,0 +1,23 @@ +#ifndef MAXPOOL_LAYER_H +#define MAXPOOL_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +typedef layer maxpool_layer; + +image get_maxpool_image(maxpool_layer l); +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding); +void resize_maxpool_layer(maxpool_layer *l, int w, int h); +void forward_maxpool_layer(const maxpool_layer l, network net); +void backward_maxpool_layer(const maxpool_layer l, network net); + +#ifdef GPU +void forward_maxpool_layer_gpu(maxpool_layer l, network net); +void backward_maxpool_layer_gpu(maxpool_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/standard/darknet/src/maxpool_layer_kernels.cu b/workloads/realworld/standard/darknet/src/maxpool_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..23302f8295682c5d9112fb12a7f63cd47a82954b --- /dev/null +++ b/workloads/realworld/standard/darknet/src/maxpool_layer_kernels.cu @@ -0,0 +1,106 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "maxpool_layer.h" +#include "cuda_dark.h" +} + +__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) +{ + int h = (in_h + pad - size)/stride + 1; + int w = (in_w + pad - size)/stride + 1; + int c = in_c; + + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int j = id % w; + id /= w; + int i = id % h; + id /= h; + int k = id % c; + id /= c; + int b = id; + + int w_offset = -pad/2; + int h_offset = -pad/2; + + int out_index = j + w*(i + h*(k + c*b)); + float max = -INFINITY; + int max_i = -1; + int l, m; + for(l = 0; l < size; ++l){ + for(m = 0; m < size; ++m){ + int cur_h = h_offset + i*stride + l; + int cur_w = w_offset + j*stride + m; + int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); + int valid = (cur_h >= 0 && cur_h < in_h && + cur_w >= 0 && cur_w < in_w); + float val = (valid != 0) ? input[index] : -INFINITY; + max_i = (val > max) ? index : max_i; + max = (val > max) ? val : max; + } + } + output[out_index] = max; + indexes[out_index] = max_i; +} + +__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes) +{ + int h = (in_h + pad - size)/stride + 1; + int w = (in_w + pad - size)/stride + 1; + int c = in_c; + int area = (size-1)/stride; + + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int index = id; + int j = id % in_w; + id /= in_w; + int i = id % in_h; + id /= in_h; + int k = id % in_c; + id /= in_c; + int b = id; + + int w_offset = -pad/2; + int h_offset = -pad/2; + + float d = 0; + int l, m; + for(l = -area; l < area+1; ++l){ + for(m = -area; m < area+1; ++m){ + int out_w = (j-w_offset)/stride + m; + int out_h = (i-h_offset)/stride + l; + int out_index = out_w + w*(out_h + h*(k + c*b)); + int valid = (out_w >= 0 && out_w < w && + out_h >= 0 && out_h < h); + d += (valid && indexes[out_index] == index) ? delta[out_index] : 0; + } + } + prev_delta[index] += d; +} + +extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network net) +{ + int h = layer.out_h; + int w = layer.out_w; + int c = layer.c; + + size_t n = h*w*c*layer.batch; + + forward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, net.input_gpu, layer.output_gpu, layer.indexes_gpu); + check_error(cudaPeekAtLastError()); +} + +extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network net) +{ + size_t n = layer.h*layer.w*layer.c*layer.batch; + + backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, net.delta_gpu, layer.indexes_gpu); + check_error(cudaPeekAtLastError()); +} + diff --git a/workloads/realworld/standard/darknet/src/network.c b/workloads/realworld/standard/darknet/src/network.c new file mode 100644 index 0000000000000000000000000000000000000000..aaab7997b5ee7da829289fa153f942a066b43d8c --- /dev/null +++ b/workloads/realworld/standard/darknet/src/network.c @@ -0,0 +1,1129 @@ +#include +#include +#include +#include "network.h" +#include "image.h" +#include "data.h" +#include "utils.h" +#include "blas.h" + +#include "crop_layer.h" +#include "connected_layer.h" +#include "gru_layer.h" +#include "rnn_layer.h" +#include "crnn_layer.h" +#include "local_layer.h" +#include "convolutional_layer.h" +#include "activation_layer.h" +#include "detection_layer.h" +#include "region_layer.h" +#include "yolo_layer.h" +#include "normalization_layer.h" +#include "batchnorm_layer.h" +#include "maxpool_layer.h" +#include "reorg_layer.h" +#include "avgpool_layer.h" +#include "cost_layer.h" +#include "softmax_layer.h" +#include "dropout_layer.h" +#include "route_layer.h" +#include "upsample_layer.h" +#include "shortcut_layer.h" +#include "parser.h" +#include "data.h" + +load_args get_base_args(network *net) +{ + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.size = net->w; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.center = net->center; + args.saturation = net->saturation; + args.hue = net->hue; + return args; +} + +network *load_network(char *cfg, char *weights, int clear) +{ + network *net = parse_network_cfg(cfg); + if(weights && weights[0] != 0){ + load_weights(net, weights); + } + if(clear) (*net->seen) = 0; + return net; +} + +size_t get_current_batch(network *net) +{ + size_t batch_num = (*net->seen)/(net->batch*net->subdivisions); + return batch_num; +} + +void reset_network_state(network *net, int b) +{ + int i; + for (i = 0; i < net->n; ++i) { + #ifdef GPU + layer l = net->layers[i]; + if(l.state_gpu){ + fill_gpu(l.outputs, 0, l.state_gpu + l.outputs*b, 1); + } + if(l.h_gpu){ + fill_gpu(l.outputs, 0, l.h_gpu + l.outputs*b, 1); + } + #endif + } +} + +void reset_rnn(network *net) +{ + reset_network_state(net, 0); +} + +float get_current_rate(network *net) +{ + size_t batch_num = get_current_batch(net); + int i; + float rate; + if (batch_num < net->burn_in) return net->learning_rate * pow((float)batch_num / net->burn_in, net->power); + switch (net->policy) { + case CONSTANT: + return net->learning_rate; + case STEP: + return net->learning_rate * pow(net->scale, batch_num/net->step); + case STEPS: + rate = net->learning_rate; + for(i = 0; i < net->num_steps; ++i){ + if(net->steps[i] > batch_num) return rate; + rate *= net->scales[i]; + } + return rate; + case EXP: + return net->learning_rate * pow(net->gamma, batch_num); + case POLY: + return net->learning_rate * pow(1 - (float)batch_num / net->max_batches, net->power); + case RANDOM: + return net->learning_rate * pow(rand_uniform(0,1), net->power); + case SIG: + return net->learning_rate * (1./(1.+exp(net->gamma*(batch_num - net->step)))); + default: + fprintf(stderr, "Policy is weird!\n"); + return net->learning_rate; + } +} + +char *get_layer_string(LAYER_TYPE a) +{ + switch(a){ + case CONVOLUTIONAL: + return "convolutional"; + case ACTIVE: + return "activation"; + case LOCAL: + return "local"; + case DECONVOLUTIONAL: + return "deconvolutional"; + case CONNECTED: + return "connected"; + case RNN: + return "rnn"; + case GRU: + return "gru"; + case LSTM: + return "lstm"; + case CRNN: + return "crnn"; + case MAXPOOL: + return "maxpool"; + case REORG: + return "reorg"; + case AVGPOOL: + return "avgpool"; + case SOFTMAX: + return "softmax"; + case DETECTION: + return "detection"; + case REGION: + return "region"; + case YOLO: + return "yolo"; + case DROPOUT: + return "dropout"; + case CROP: + return "crop"; + case COST: + return "cost"; + case ROUTE: + return "route"; + case SHORTCUT: + return "shortcut"; + case NORMALIZATION: + return "normalization"; + case BATCHNORM: + return "batchnorm"; + default: + break; + } + return "none"; +} + +network *make_network(int n) +{ + network *net = calloc(1, sizeof(network)); + net->n = n; + net->layers = calloc(net->n, sizeof(layer)); + net->seen = calloc(1, sizeof(size_t)); + net->t = calloc(1, sizeof(int)); + net->cost = calloc(1, sizeof(float)); + return net; +} + +void forward_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + forward_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + for(i = 0; i < net.n; ++i){ + net.index = i; + layer l = net.layers[i]; + if(l.delta){ + fill_cpu(l.outputs * l.batch, 0, l.delta, 1); + } + l.forward(l, net); + net.input = l.output; + if(l.truth) { + net.truth = l.output; + } + } + calc_network_cost(netp); +} + +void update_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + update_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + update_args a = {0}; + a.batch = net.batch*net.subdivisions; + a.learning_rate = get_current_rate(netp); + a.momentum = net.momentum; + a.decay = net.decay; + a.adam = net.adam; + a.B1 = net.B1; + a.B2 = net.B2; + a.eps = net.eps; + ++*net.t; + a.t = *net.t; + + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.update){ + l.update(l, a); + } + } +} + +void calc_network_cost(network *netp) +{ + network net = *netp; + int i; + float sum = 0; + int count = 0; + for(i = 0; i < net.n; ++i){ + if(net.layers[i].cost){ + sum += net.layers[i].cost[0]; + ++count; + } + } + *net.cost = sum/count; +} + +int get_predicted_class_network(network *net) +{ + return max_index(net->output, net->outputs); +} + +void backward_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + backward_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + network orig = net; + for(i = net.n-1; i >= 0; --i){ + layer l = net.layers[i]; + if(l.stopbackward) break; + if(i == 0){ + net = orig; + }else{ + layer prev = net.layers[i-1]; + net.input = prev.output; + net.delta = prev.delta; + } + net.index = i; + l.backward(l, net); + } +} + +float train_network_datum(network *net) +{ + *net->seen += net->batch; + net->train = 1; + forward_network(net); + backward_network(net); + float error = *net->cost; + if(((*net->seen)/net->batch)%net->subdivisions == 0) update_network(net); + return error; +} + +float train_network_sgd(network *net, data d, int n) +{ + int batch = net->batch; + + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + get_random_batch(d, batch, net->input, net->truth); + float err = train_network_datum(net); + sum += err; + } + return (float)sum/(n*batch); +} + +float train_network(network *net, data d) +{ + assert(d.X.rows % net->batch == 0); + int batch = net->batch; + int n = d.X.rows / batch; + + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + get_next_batch(d, batch, i*batch, net->input, net->truth); + float err = train_network_datum(net); + sum += err; + } + return (float)sum/(n*batch); +} + +void set_temp_network(network *net, float t) +{ + int i; + for(i = 0; i < net->n; ++i){ + net->layers[i].temperature = t; + } +} + + +void set_batch_network(network *net, int b) +{ + net->batch = b; + int i; + for(i = 0; i < net->n; ++i){ + net->layers[i].batch = b; +#ifdef CUDNN + if(net->layers[i].type == CONVOLUTIONAL){ + cudnn_convolutional_setup(net->layers + i); + } + if(net->layers[i].type == DECONVOLUTIONAL){ + layer *l = net->layers + i; + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + } +#endif + } +} + +int resize_network(network *net, int w, int h) +{ +#ifdef GPU + cuda_set_device(net->gpu_index); + cuda_free(net->workspace); +#endif + int i; + //if(w == net->w && h == net->h) return 0; + net->w = w; + net->h = h; + int inputs = 0; + size_t workspace_size = 0; + //fprintf(stderr, "Resizing to %d x %d...\n", w, h); + //fflush(stderr); + for (i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + resize_convolutional_layer(&l, w, h); + }else if(l.type == CROP){ + resize_crop_layer(&l, w, h); + }else if(l.type == MAXPOOL){ + resize_maxpool_layer(&l, w, h); + }else if(l.type == REGION){ + resize_region_layer(&l, w, h); + }else if(l.type == YOLO){ + resize_yolo_layer(&l, w, h); + }else if(l.type == ROUTE){ + resize_route_layer(&l, net); + }else if(l.type == SHORTCUT){ + resize_shortcut_layer(&l, w, h); + }else if(l.type == UPSAMPLE){ + resize_upsample_layer(&l, w, h); + }else if(l.type == REORG){ + resize_reorg_layer(&l, w, h); + }else if(l.type == AVGPOOL){ + resize_avgpool_layer(&l, w, h); + }else if(l.type == NORMALIZATION){ + resize_normalization_layer(&l, w, h); + }else if(l.type == COST){ + resize_cost_layer(&l, inputs); + }else{ + error("Cannot resize this type of layer"); + } + if(l.workspace_size > workspace_size) workspace_size = l.workspace_size; + if(l.workspace_size > 2000000000) assert(0); + inputs = l.outputs; + net->layers[i] = l; + w = l.out_w; + h = l.out_h; + if(l.type == AVGPOOL) break; + } + layer out = get_network_output_layer(net); + net->inputs = net->layers[0].inputs; + net->outputs = out.outputs; + net->truths = out.outputs; + if(net->layers[net->n-1].truths) net->truths = net->layers[net->n-1].truths; + net->output = out.output; + free(net->input); + free(net->truth); + net->input = calloc(net->inputs*net->batch, sizeof(float)); + net->truth = calloc(net->truths*net->batch, sizeof(float)); +#ifdef GPU + if(gpu_index >= 0){ + cuda_free(net->input_gpu); + cuda_free(net->truth_gpu); + net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch); + net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch); + if(workspace_size){ + net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1); + } + }else { + free(net->workspace); + net->workspace = calloc(1, workspace_size); + } +#else + free(net->workspace); + net->workspace = calloc(1, workspace_size); +#endif + //fprintf(stderr, " Done!\n"); + return 0; +} + +layer get_network_detection_layer(network *net) +{ + int i; + for(i = 0; i < net->n; ++i){ + if(net->layers[i].type == DETECTION){ + return net->layers[i]; + } + } + fprintf(stderr, "Detection layer not found!!\n"); + layer l = {0}; + return l; +} + +image get_network_image_layer(network *net, int i) +{ + layer l = net->layers[i]; +#ifdef GPU + //cuda_pull_array(l.output_gpu, l.output, l.outputs); +#endif + if (l.out_w && l.out_h && l.out_c){ + return float_to_image(l.out_w, l.out_h, l.out_c, l.output); + } + image def = {0}; + return def; +} + +image get_network_image(network *net) +{ + int i; + for(i = net->n-1; i >= 0; --i){ + image m = get_network_image_layer(net, i); + if(m.h != 0) return m; + } + image def = {0}; + return def; +} + +void visualize_network(network *net) +{ + image *prev = 0; + int i; + char buff[256]; + for(i = 0; i < net->n; ++i){ + sprintf(buff, "Layer %d", i); + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + prev = visualize_convolutional_layer(l, buff, prev); + } + } +} + +void top_predictions(network *net, int k, int *index) +{ + top_k(net->output, net->outputs, k, index); +} + + +float *network_predict(network *net, float *input) +{ + network orig = *net; + net->input = input; + net->truth = 0; + net->train = 0; + net->delta = 0; + forward_network(net); + float *out = net->output; + *net = orig; + return out; +} + +int num_detections(network *net, float thresh) +{ + int i; + int s = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO){ + s += yolo_num_detections(l, thresh); + } + if(l.type == DETECTION || l.type == REGION){ + s += l.w*l.h*l.n; + } + } + return s; +} + +detection *make_network_boxes(network *net, float thresh, int *num) +{ + layer l = net->layers[net->n - 1]; + int i; + int nboxes = num_detections(net, thresh); + if(num) *num = nboxes; + detection *dets = calloc(nboxes, sizeof(detection)); + for(i = 0; i < nboxes; ++i){ + dets[i].prob = calloc(l.classes, sizeof(float)); + if(l.coords > 4){ + dets[i].mask = calloc(l.coords-4, sizeof(float)); + } + } + return dets; +} + +void fill_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets) +{ + int j; + for(j = 0; j < net->n; ++j){ + layer l = net->layers[j]; + if(l.type == YOLO){ + int count = get_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets); + dets += count; + } + if(l.type == REGION){ + get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets); + dets += l.w*l.h*l.n; + } + if(l.type == DETECTION){ + get_detection_detections(l, w, h, thresh, dets); + dets += l.w*l.h*l.n; + } + } +} + +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num) +{ + detection *dets = make_network_boxes(net, thresh, num); + fill_network_boxes(net, w, h, thresh, hier, map, relative, dets); + return dets; +} + +void free_detections(detection *dets, int n) +{ + int i; + for(i = 0; i < n; ++i){ + free(dets[i].prob); + if(dets[i].mask) free(dets[i].mask); + } + free(dets); +} + +float *network_predict_image(network *net, image im) +{ + image imr = letterbox_image(im, net->w, net->h); + set_batch_network(net, 1); + float *p = network_predict(net, imr.data); + free_image(imr); + return p; +} + +int network_width(network *net){return net->w;} +int network_height(network *net){return net->h;} + +matrix network_predict_data_multi(network *net, data test, int n) +{ + int i,j,b,m; + int k = net->outputs; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.rows, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + } + for(m = 0; m < n; ++m){ + float *out = network_predict(net, X); + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + for(j = 0; j < k; ++j){ + pred.vals[i+b][j] += out[j+b*k]/n; + } + } + } + } + free(X); + return pred; +} + +matrix network_predict_data(network *net, data test) +{ + int i,j,b; + int k = net->outputs; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.cols, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + } + float *out = network_predict(net, X); + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + for(j = 0; j < k; ++j){ + pred.vals[i+b][j] = out[j+b*k]; + } + } + } + free(X); + return pred; +} + +void print_network(network *net) +{ + int i,j; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + float *output = l.output; + int n = l.outputs; + float mean = mean_array(output, n); + float vari = variance_array(output, n); + fprintf(stderr, "Layer %d - Mean: %f, Variance: %f\n",i,mean, vari); + if(n > 100) n = 100; + for(j = 0; j < n; ++j) fprintf(stderr, "%f, ", output[j]); + if(n == 100)fprintf(stderr,".....\n"); + fprintf(stderr, "\n"); + } +} + +void compare_networks(network *n1, network *n2, data test) +{ + matrix g1 = network_predict_data(n1, test); + matrix g2 = network_predict_data(n2, test); + int i; + int a,b,c,d; + a = b = c = d = 0; + for(i = 0; i < g1.rows; ++i){ + int truth = max_index(test.y.vals[i], test.y.cols); + int p1 = max_index(g1.vals[i], g1.cols); + int p2 = max_index(g2.vals[i], g2.cols); + if(p1 == truth){ + if(p2 == truth) ++d; + else ++c; + }else{ + if(p2 == truth) ++b; + else ++a; + } + } + printf("%5d %5d\n%5d %5d\n", a, b, c, d); + float num = pow((abs(b - c) - 1.), 2.); + float den = b + c; + printf("%f\n", num/den); +} + +float network_accuracy(network *net, data d) +{ + matrix guess = network_predict_data(net, d); + float acc = matrix_topk_accuracy(d.y, guess,1); + free_matrix(guess); + return acc; +} + +float *network_accuracies(network *net, data d, int n) +{ + static float acc[2]; + matrix guess = network_predict_data(net, d); + acc[0] = matrix_topk_accuracy(d.y, guess, 1); + acc[1] = matrix_topk_accuracy(d.y, guess, n); + free_matrix(guess); + return acc; +} + +layer get_network_output_layer(network *net) +{ + int i; + for(i = net->n - 1; i >= 0; --i){ + if(net->layers[i].type != COST) break; + } + return net->layers[i]; +} + +float network_accuracy_multi(network *net, data d, int n) +{ + matrix guess = network_predict_data_multi(net, d, n); + float acc = matrix_topk_accuracy(d.y, guess,1); + free_matrix(guess); + return acc; +} + +void free_network(network *net) +{ + int i; + for(i = 0; i < net->n; ++i){ + free_layer(net->layers[i]); + } + free(net->layers); + if(net->input) free(net->input); + if(net->truth) free(net->truth); +#ifdef GPU + if(net->input_gpu) cuda_free(net->input_gpu); + if(net->truth_gpu) cuda_free(net->truth_gpu); +#endif + free(net); +} + +// Some day... +// ^ What the hell is this comment for? + + +layer network_output_layer(network *net) +{ + int i; + for(i = net->n - 1; i >= 0; --i){ + if(net->layers[i].type != COST) break; + } + return net->layers[i]; +} + +int network_inputs(network *net) +{ + return net->layers[0].inputs; +} + +int network_outputs(network *net) +{ + return network_output_layer(net).outputs; +} + +float *network_output(network *net) +{ + return network_output_layer(net).output; +} + +#ifdef GPU + +void forward_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + cuda_push_array(net.input_gpu, net.input, net.inputs*net.batch); + if(net.truth){ + cuda_push_array(net.truth_gpu, net.truth, net.truths*net.batch); + } + + int i; + for(i = 0; i < net.n; ++i){ + net.index = i; + layer l = net.layers[i]; + if(l.delta_gpu){ + fill_gpu(l.outputs * l.batch, 0, l.delta_gpu, 1); + } + l.forward_gpu(l, net); + net.input_gpu = l.output_gpu; + net.input = l.output; + if(l.truth) { + net.truth_gpu = l.output_gpu; + net.truth = l.output; + } + } + pull_network_output(netp); + calc_network_cost(netp); +} + +void backward_network_gpu(network *netp) +{ + int i; + network net = *netp; + network orig = net; + cuda_set_device(net.gpu_index); + for(i = net.n-1; i >= 0; --i){ + layer l = net.layers[i]; + if(l.stopbackward) break; + if(i == 0){ + net = orig; + }else{ + layer prev = net.layers[i-1]; + net.input = prev.output; + net.delta = prev.delta; + net.input_gpu = prev.output_gpu; + net.delta_gpu = prev.delta_gpu; + } + net.index = i; + l.backward_gpu(l, net); + } +} + +void update_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + int i; + update_args a = {0}; + a.batch = net.batch*net.subdivisions; + a.learning_rate = get_current_rate(netp); + a.momentum = net.momentum; + a.decay = net.decay; + a.adam = net.adam; + a.B1 = net.B1; + a.B2 = net.B2; + a.eps = net.eps; + ++*net.t; + a.t = (*net.t); + + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.update_gpu){ + l.update_gpu(l, a); + } + } +} + +void harmless_update_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + int i; + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.weight_updates_gpu) fill_gpu(l.nweights, 0, l.weight_updates_gpu, 1); + if(l.bias_updates_gpu) fill_gpu(l.nbiases, 0, l.bias_updates_gpu, 1); + if(l.scale_updates_gpu) fill_gpu(l.nbiases, 0, l.scale_updates_gpu, 1); + } +} + +typedef struct { + network *net; + data d; + float *err; +} train_args; + +void *train_thread(void *ptr) +{ + train_args args = *(train_args*)ptr; + free(ptr); + cuda_set_device(args.net->gpu_index); + *args.err = train_network(args.net, args.d); + return 0; +} + +pthread_t train_network_in_thread(network *net, data d, float *err) +{ + pthread_t thread; + train_args *ptr = (train_args *)calloc(1, sizeof(train_args)); + ptr->net = net; + ptr->d = d; + ptr->err = err; + if(pthread_create(&thread, 0, train_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void merge_weights(layer l, layer base) +{ + if (l.type == CONVOLUTIONAL) { + axpy_cpu(l.n, 1, l.bias_updates, 1, base.biases, 1); + axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weights, 1); + if (l.scales) { + axpy_cpu(l.n, 1, l.scale_updates, 1, base.scales, 1); + } + } else if(l.type == CONNECTED) { + axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.biases, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weights, 1); + } +} + +void scale_weights(layer l, float s) +{ + if (l.type == CONVOLUTIONAL) { + scal_cpu(l.n, s, l.biases, 1); + scal_cpu(l.nweights, s, l.weights, 1); + if (l.scales) { + scal_cpu(l.n, s, l.scales, 1); + } + } else if(l.type == CONNECTED) { + scal_cpu(l.outputs, s, l.biases, 1); + scal_cpu(l.outputs*l.inputs, s, l.weights, 1); + } +} + + +void pull_weights(layer l) +{ + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_pull_array(l.biases_gpu, l.bias_updates, l.n); + cuda_pull_array(l.weights_gpu, l.weight_updates, l.nweights); + if(l.scales) cuda_pull_array(l.scales_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_pull_array(l.biases_gpu, l.bias_updates, l.outputs); + cuda_pull_array(l.weights_gpu, l.weight_updates, l.outputs*l.inputs); + } +} + +void push_weights(layer l) +{ + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weights_gpu, l.weights, l.nweights); + if(l.scales) cuda_push_array(l.scales_gpu, l.scales, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.biases_gpu, l.biases, l.outputs); + cuda_push_array(l.weights_gpu, l.weights, l.outputs*l.inputs); + } +} + +void distribute_weights(layer l, layer base) +{ + if (l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL) { + cuda_push_array(l.biases_gpu, base.biases, l.n); + cuda_push_array(l.weights_gpu, base.weights, l.nweights); + if (base.scales) cuda_push_array(l.scales_gpu, base.scales, l.n); + } else if (l.type == CONNECTED) { + cuda_push_array(l.biases_gpu, base.biases, l.outputs); + cuda_push_array(l.weights_gpu, base.weights, l.outputs*l.inputs); + } +} + + +/* + + void pull_updates(layer l) + { + if(l.type == CONVOLUTIONAL){ + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + if(l.scale_updates) cuda_pull_array(l.scale_updates_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs); + } + } + + void push_updates(layer l) + { + if(l.type == CONVOLUTIONAL){ + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + if(l.scale_updates) cuda_push_array(l.scale_updates_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs); + } + } + + void update_layer(layer l, network net) + { + int update_batch = net.batch*net.subdivisions; + float rate = get_current_rate(net); + l.t = get_current_batch(net); + if(l.update_gpu){ + l.update_gpu(l, update_batch, rate*l.learning_rate_scale, net.momentum, net.decay); + } + } + void merge_updates(layer l, layer base) + { + if (l.type == CONVOLUTIONAL) { + axpy_cpu(l.n, 1, l.bias_updates, 1, base.bias_updates, 1); + axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weight_updates, 1); + if (l.scale_updates) { + axpy_cpu(l.n, 1, l.scale_updates, 1, base.scale_updates, 1); + } + } else if(l.type == CONNECTED) { + axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.bias_updates, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weight_updates, 1); + } + } + + void distribute_updates(layer l, layer base) + { + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.n); + cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.nweights); + if(base.scale_updates) cuda_push_array(l.scale_updates_gpu, base.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.outputs); + cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.outputs*l.inputs); + } + } + */ + +/* + void sync_layer(network *nets, int n, int j) + { + int i; + network net = nets[0]; + layer base = net.layers[j]; + scale_weights(base, 0); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i].gpu_index); + layer l = nets[i].layers[j]; + pull_weights(l); + merge_weights(l, base); + } + scale_weights(base, 1./n); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i].gpu_index); + layer l = nets[i].layers[j]; + distribute_weights(l, base); + } + } + */ + +void sync_layer(network **nets, int n, int j) +{ + int i; + network *net = nets[0]; + layer base = net->layers[j]; + scale_weights(base, 0); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i]->gpu_index); + layer l = nets[i]->layers[j]; + pull_weights(l); + merge_weights(l, base); + } + scale_weights(base, 1./n); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i]->gpu_index); + layer l = nets[i]->layers[j]; + distribute_weights(l, base); + } +} + +typedef struct{ + network **nets; + int n; + int j; +} sync_args; + +void *sync_layer_thread(void *ptr) +{ + sync_args args = *(sync_args*)ptr; + sync_layer(args.nets, args.n, args.j); + free(ptr); + return 0; +} + +pthread_t sync_layer_in_thread(network **nets, int n, int j) +{ + pthread_t thread; + sync_args *ptr = (sync_args *)calloc(1, sizeof(sync_args)); + ptr->nets = nets; + ptr->n = n; + ptr->j = j; + if(pthread_create(&thread, 0, sync_layer_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void sync_nets(network **nets, int n, int interval) +{ + int j; + int layers = nets[0]->n; + pthread_t *threads = (pthread_t *) calloc(layers, sizeof(pthread_t)); + + *(nets[0]->seen) += interval * (n-1) * nets[0]->batch * nets[0]->subdivisions; + for (j = 0; j < n; ++j){ + *(nets[j]->seen) = *(nets[0]->seen); + } + for (j = 0; j < layers; ++j) { + threads[j] = sync_layer_in_thread(nets, n, j); + } + for (j = 0; j < layers; ++j) { + pthread_join(threads[j], 0); + } + free(threads); +} + +float train_networks(network **nets, int n, data d, int interval) +{ + int i; + int batch = nets[0]->batch; + int subdivisions = nets[0]->subdivisions; + assert(batch * subdivisions * n == d.X.rows); + pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t)); + float *errors = (float *) calloc(n, sizeof(float)); + + float sum = 0; + for(i = 0; i < n; ++i){ + data p = get_data_part(d, i, n); + threads[i] = train_network_in_thread(nets[i], p, errors + i); + } + for(i = 0; i < n; ++i){ + pthread_join(threads[i], 0); + //printf("%f\n", errors[i]); + sum += errors[i]; + } + //cudaDeviceSynchronize(); + if (get_current_batch(nets[0]) % interval == 0) { + printf("Syncing... "); + fflush(stdout); + sync_nets(nets, n, interval); + printf("Done!\n"); + } + //cudaDeviceSynchronize(); + free(threads); + free(errors); + return (float)sum/(n); +} + +void pull_network_output(network *net) +{ + layer l = get_network_output_layer(net); + cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); +} + +#endif diff --git a/workloads/realworld/standard/darknet/src/network.h b/workloads/realworld/standard/darknet/src/network.h new file mode 100644 index 0000000000000000000000000000000000000000..1b0dfd1aaa3e090c6ce276d26f24d127de2cb66d --- /dev/null +++ b/workloads/realworld/standard/darknet/src/network.h @@ -0,0 +1,29 @@ +// Oh boy, why am I about to do this.... +#ifndef NETWORK_H +#define NETWORK_H +#include "darknet.h" + +#include "image.h" +#include "layer.h" +#include "data.h" +#include "tree.h" + + +#ifdef GPU +void pull_network_output(network *net); +#endif + +void compare_networks(network *n1, network *n2, data d); +char *get_layer_string(LAYER_TYPE a); + +network *make_network(int n); + + +float network_accuracy_multi(network *net, data d, int n); +int get_predicted_class_network(network *net); +void print_network(network *net); +int resize_network(network *net, int w, int h); +void calc_network_cost(network *net); + +#endif + diff --git a/workloads/realworld/standard/darknet/src/normalization_layer.c b/workloads/realworld/standard/darknet/src/normalization_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..424714fe8653f79b57fd4cde625997749d8eff83 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/normalization_layer.c @@ -0,0 +1,151 @@ +#include "normalization_layer.h" +#include "blas.h" + +#include + +layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa) +{ + fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size); + layer layer = {0}; + layer.type = NORMALIZATION; + layer.batch = batch; + layer.h = layer.out_h = h; + layer.w = layer.out_w = w; + layer.c = layer.out_c = c; + layer.kappa = kappa; + layer.size = size; + layer.alpha = alpha; + layer.beta = beta; + layer.output = calloc(h * w * c * batch, sizeof(float)); + layer.delta = calloc(h * w * c * batch, sizeof(float)); + layer.squared = calloc(h * w * c * batch, sizeof(float)); + layer.norms = calloc(h * w * c * batch, sizeof(float)); + layer.inputs = w*h*c; + layer.outputs = layer.inputs; + + layer.forward = forward_normalization_layer; + layer.backward = backward_normalization_layer; + #ifdef GPU + layer.forward_gpu = forward_normalization_layer_gpu; + layer.backward_gpu = backward_normalization_layer_gpu; + + layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch); + layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch); + layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch); + layer.norms_gpu = cuda_make_array(layer.norms, h * w * c * batch); + #endif + return layer; +} + +void resize_normalization_layer(layer *layer, int w, int h) +{ + int c = layer->c; + int batch = layer->batch; + layer->h = h; + layer->w = w; + layer->out_h = h; + layer->out_w = w; + layer->inputs = w*h*c; + layer->outputs = layer->inputs; + layer->output = realloc(layer->output, h * w * c * batch * sizeof(float)); + layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float)); + layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float)); + layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float)); +#ifdef GPU + cuda_free(layer->output_gpu); + cuda_free(layer->delta_gpu); + cuda_free(layer->squared_gpu); + cuda_free(layer->norms_gpu); + layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch); + layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch); + layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch); + layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch); +#endif +} + +void forward_normalization_layer(const layer layer, network net) +{ + int k,b; + int w = layer.w; + int h = layer.h; + int c = layer.c; + scal_cpu(w*h*c*layer.batch, 0, layer.squared, 1); + + for(b = 0; b < layer.batch; ++b){ + float *squared = layer.squared + w*h*c*b; + float *norms = layer.norms + w*h*c*b; + float *input = net.input + w*h*c*b; + pow_cpu(w*h*c, 2, input, 1, squared, 1); + + const_cpu(w*h, layer.kappa, norms, 1); + for(k = 0; k < layer.size/2; ++k){ + axpy_cpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); + } + + for(k = 1; k < layer.c; ++k){ + copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); + int prev = k - ((layer.size-1)/2) - 1; + int next = k + (layer.size/2); + if(prev >= 0) axpy_cpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); + if(next < layer.c) axpy_cpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); + } + } + pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1); + mul_cpu(w*h*c*layer.batch, net.input, 1, layer.output, 1); +} + +void backward_normalization_layer(const layer layer, network net) +{ + // TODO This is approximate ;-) + // Also this should add in to delta instead of overwritting. + + int w = layer.w; + int h = layer.h; + int c = layer.c; + pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, net.delta, 1); + mul_cpu(w*h*c*layer.batch, layer.delta, 1, net.delta, 1); +} + +#ifdef GPU +void forward_normalization_layer_gpu(const layer layer, network net) +{ + int k,b; + int w = layer.w; + int h = layer.h; + int c = layer.c; + scal_gpu(w*h*c*layer.batch, 0, layer.squared_gpu, 1); + + for(b = 0; b < layer.batch; ++b){ + float *squared = layer.squared_gpu + w*h*c*b; + float *norms = layer.norms_gpu + w*h*c*b; + float *input = net.input_gpu + w*h*c*b; + pow_gpu(w*h*c, 2, input, 1, squared, 1); + + const_gpu(w*h, layer.kappa, norms, 1); + for(k = 0; k < layer.size/2; ++k){ + axpy_gpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); + } + + for(k = 1; k < layer.c; ++k){ + copy_gpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); + int prev = k - ((layer.size-1)/2) - 1; + int next = k + (layer.size/2); + if(prev >= 0) axpy_gpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); + if(next < layer.c) axpy_gpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); + } + } + pow_gpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1); + mul_gpu(w*h*c*layer.batch, net.input_gpu, 1, layer.output_gpu, 1); +} + +void backward_normalization_layer_gpu(const layer layer, network net) +{ + // TODO This is approximate ;-) + + int w = layer.w; + int h = layer.h; + int c = layer.c; + pow_gpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, net.delta_gpu, 1); + mul_gpu(w*h*c*layer.batch, layer.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/standard/darknet/src/normalization_layer.h b/workloads/realworld/standard/darknet/src/normalization_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..665baa5066282335b6625618ce07c2fcc833d952 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/normalization_layer.h @@ -0,0 +1,19 @@ +#ifndef NORMALIZATION_LAYER_H +#define NORMALIZATION_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa); +void resize_normalization_layer(layer *layer, int h, int w); +void forward_normalization_layer(const layer layer, network net); +void backward_normalization_layer(const layer layer, network net); +void visualize_normalization_layer(layer layer, char *window); + +#ifdef GPU +void forward_normalization_layer_gpu(const layer layer, network net); +void backward_normalization_layer_gpu(const layer layer, network net); +#endif + +#endif diff --git a/workloads/realworld/standard/darknet/src/option_list.c b/workloads/realworld/standard/darknet/src/option_list.c new file mode 100644 index 0000000000000000000000000000000000000000..2f52781f8096fecc5e9d1db3cfbfa10685506b93 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/option_list.c @@ -0,0 +1,140 @@ +#include +#include +#include +#include "option_list.h" +#include "utils.h" + +list *read_data_cfg(char *filename) +{ + FILE *file = fopen(filename, "r"); + if(file == 0) file_error(filename); + char *line; + int nu = 0; + list *options = make_list(); + while((line=fgetl(file)) != 0){ + ++ nu; + strip(line); + switch(line[0]){ + case '\0': + case '#': + case ';': + free(line); + break; + default: + if(!read_option(line, options)){ + fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); + free(line); + } + break; + } + } + fclose(file); + return options; +} + +metadata get_metadata(char *file) +{ + metadata m = {0}; + list *options = read_data_cfg(file); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", 0); + if(!name_list) { + fprintf(stderr, "No names or labels found\n"); + } else { + m.names = get_labels(name_list); + } + m.classes = option_find_int(options, "classes", 2); + free_list(options); + return m; +} + +int read_option(char *s, list *options) +{ + size_t i; + size_t len = strlen(s); + char *val = 0; + for(i = 0; i < len; ++i){ + if(s[i] == '='){ + s[i] = '\0'; + val = s+i+1; + break; + } + } + if(i == len-1) return 0; + char *key = s; + option_insert(options, key, val); + return 1; +} + +void option_insert(list *l, char *key, char *val) +{ + kvp *p = malloc(sizeof(kvp)); + p->key = key; + p->val = val; + p->used = 0; + list_insert(l, p); +} + +void option_unused(list *l) +{ + node *n = l->front; + while(n){ + kvp *p = (kvp *)n->val; + if(!p->used){ + fprintf(stderr, "Unused field: '%s = %s'\n", p->key, p->val); + } + n = n->next; + } +} + +char *option_find(list *l, char *key) +{ + node *n = l->front; + while(n){ + kvp *p = (kvp *)n->val; + if(strcmp(p->key, key) == 0){ + p->used = 1; + return p->val; + } + n = n->next; + } + return 0; +} +char *option_find_str(list *l, char *key, char *def) +{ + char *v = option_find(l, key); + if(v) return v; + if(def) fprintf(stderr, "%s: Using default '%s'\n", key, def); + return def; +} + +int option_find_int(list *l, char *key, int def) +{ + char *v = option_find(l, key); + if(v) return atoi(v); + fprintf(stderr, "%s: Using default '%d'\n", key, def); + return def; +} + +int option_find_int_quiet(list *l, char *key, int def) +{ + char *v = option_find(l, key); + if(v) return atoi(v); + return def; +} + +float option_find_float_quiet(list *l, char *key, float def) +{ + char *v = option_find(l, key); + if(v) return atof(v); + return def; +} + +float option_find_float(list *l, char *key, float def) +{ + char *v = option_find(l, key); + if(v) return atof(v); + fprintf(stderr, "%s: Using default '%lf'\n", key, def); + return def; +} diff --git a/workloads/realworld/standard/darknet/src/option_list.h b/workloads/realworld/standard/darknet/src/option_list.h new file mode 100644 index 0000000000000000000000000000000000000000..844bd8724b77889d9ab6e6e70f62305e3339048c --- /dev/null +++ b/workloads/realworld/standard/darknet/src/option_list.h @@ -0,0 +1,19 @@ +#ifndef OPTION_LIST_H +#define OPTION_LIST_H +#include "list.h" + +typedef struct{ + char *key; + char *val; + int used; +} kvp; + + +int read_option(char *s, list *options); +void option_insert(list *l, char *key, char *val); +char *option_find(list *l, char *key); +float option_find_float(list *l, char *key, float def); +float option_find_float_quiet(list *l, char *key, float def); +void option_unused(list *l); + +#endif diff --git a/workloads/realworld/standard/darknet/src/parser.c b/workloads/realworld/standard/darknet/src/parser.c new file mode 100644 index 0000000000000000000000000000000000000000..c8141c9f2ddc95941900d11006ff583fadf22290 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/parser.c @@ -0,0 +1,1312 @@ +#include +#include +#include +#include + +#include "activation_layer.h" +#include "logistic_layer.h" +#include "l2norm_layer.h" +#include "activations.h" +#include "avgpool_layer.h" +#include "batchnorm_layer.h" +#include "blas.h" +#include "connected_layer.h" +#include "deconvolutional_layer.h" +#include "convolutional_layer.h" +#include "cost_layer.h" +#include "crnn_layer.h" +#include "crop_layer.h" +#include "detection_layer.h" +#include "dropout_layer.h" +#include "gru_layer.h" +#include "list.h" +#include "local_layer.h" +#include "maxpool_layer.h" +#include "normalization_layer.h" +#include "option_list.h" +#include "parser.h" +#include "region_layer.h" +#include "yolo_layer.h" +#include "iseg_layer.h" +#include "reorg_layer.h" +#include "rnn_layer.h" +#include "route_layer.h" +#include "upsample_layer.h" +#include "shortcut_layer.h" +#include "softmax_layer.h" +#include "lstm_layer.h" +#include "utils.h" + +typedef struct{ + char *type; + list *options; +}section; + +list *read_cfg(char *filename); + +LAYER_TYPE string_to_layer_type(char * type) +{ + + if (strcmp(type, "[shortcut]")==0) return SHORTCUT; + if (strcmp(type, "[crop]")==0) return CROP; + if (strcmp(type, "[cost]")==0) return COST; + if (strcmp(type, "[detection]")==0) return DETECTION; + if (strcmp(type, "[region]")==0) return REGION; + if (strcmp(type, "[yolo]")==0) return YOLO; + if (strcmp(type, "[iseg]")==0) return ISEG; + if (strcmp(type, "[local]")==0) return LOCAL; + if (strcmp(type, "[conv]")==0 + || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL; + if (strcmp(type, "[deconv]")==0 + || strcmp(type, "[deconvolutional]")==0) return DECONVOLUTIONAL; + if (strcmp(type, "[activation]")==0) return ACTIVE; + if (strcmp(type, "[logistic]")==0) return LOGXENT; + if (strcmp(type, "[l2norm]")==0) return L2NORM; + if (strcmp(type, "[net]")==0 + || strcmp(type, "[network]")==0) return NETWORK; + if (strcmp(type, "[crnn]")==0) return CRNN; + if (strcmp(type, "[gru]")==0) return GRU; + if (strcmp(type, "[lstm]") == 0) return LSTM; + if (strcmp(type, "[rnn]")==0) return RNN; + if (strcmp(type, "[conn]")==0 + || strcmp(type, "[connected]")==0) return CONNECTED; + if (strcmp(type, "[max]")==0 + || strcmp(type, "[maxpool]")==0) return MAXPOOL; + if (strcmp(type, "[reorg]")==0) return REORG; + if (strcmp(type, "[avg]")==0 + || strcmp(type, "[avgpool]")==0) return AVGPOOL; + if (strcmp(type, "[dropout]")==0) return DROPOUT; + if (strcmp(type, "[lrn]")==0 + || strcmp(type, "[normalization]")==0) return NORMALIZATION; + if (strcmp(type, "[batchnorm]")==0) return BATCHNORM; + if (strcmp(type, "[soft]")==0 + || strcmp(type, "[softmax]")==0) return SOFTMAX; + if (strcmp(type, "[route]")==0) return ROUTE; + if (strcmp(type, "[upsample]")==0) return UPSAMPLE; + return BLANK; +} + +void free_section(section *s) +{ + free(s->type); + node *n = s->options->front; + while(n){ + kvp *pair = (kvp *)n->val; + free(pair->key); + free(pair); + node *next = n->next; + free(n); + n = next; + } + free(s->options); + free(s); +} + +void parse_data(char *data, float *a, int n) +{ + int i; + if(!data) return; + char *curr = data; + char *next = data; + int done = 0; + for(i = 0; i < n && !done; ++i){ + while(*++next !='\0' && *next != ','); + if(*next == '\0') done = 1; + *next = '\0'; + sscanf(curr, "%g", &a[i]); + curr = next+1; + } +} + +typedef struct size_params{ + int batch; + int inputs; + int h; + int w; + int c; + int index; + int time_steps; + network *net; +} size_params; + +local_layer parse_local(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + int pad = option_find_int(options, "pad",0); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before local layer must output image."); + + local_layer layer = make_local_layer(batch,h,w,c,n,size,stride,pad,activation); + + return layer; +} + +layer parse_deconvolutional(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before deconvolutional layer must output image."); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + int pad = option_find_int_quiet(options, "pad",0); + int padding = option_find_int_quiet(options, "padding",0); + if(pad) padding = size/2; + + layer l = make_deconvolutional_layer(batch,h,w,c,n,size,stride,padding, activation, batch_normalize, params.net->adam); + + return l; +} + + +convolutional_layer parse_convolutional(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + int pad = option_find_int_quiet(options, "pad",0); + int padding = option_find_int_quiet(options, "padding",0); + int groups = option_find_int_quiet(options, "groups", 1); + if(pad) padding = size/2; + + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before convolutional layer must output image."); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + int binary = option_find_int_quiet(options, "binary", 0); + int xnor = option_find_int_quiet(options, "xnor", 0); + + convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,groups,size,stride,padding,activation, batch_normalize, binary, xnor, params.net->adam); + layer.flipped = option_find_int_quiet(options, "flipped", 0); + layer.dot = option_find_float_quiet(options, "dot", 0); + + return layer; +} + +layer parse_crnn(list *options, size_params params) +{ + int output_filters = option_find_int(options, "output_filters",1); + int hidden_filters = option_find_int(options, "hidden_filters",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_crnn_layer(params.batch, params.w, params.h, params.c, hidden_filters, output_filters, params.time_steps, activation, batch_normalize); + + l.shortcut = option_find_int_quiet(options, "shortcut", 0); + + return l; +} + +layer parse_rnn(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_rnn_layer(params.batch, params.inputs, output, params.time_steps, activation, batch_normalize, params.net->adam); + + l.shortcut = option_find_int_quiet(options, "shortcut", 0); + + return l; +} + +layer parse_gru(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_gru_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net->adam); + l.tanh = option_find_int_quiet(options, "tanh", 0); + + return l; +} + +layer parse_lstm(list *options, size_params params) +{ + int output = option_find_int(options, "output", 1); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_lstm_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net->adam); + + return l; +} + +layer parse_connected(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_connected_layer(params.batch, params.inputs, output, activation, batch_normalize, params.net->adam); + return l; +} + +layer parse_softmax(list *options, size_params params) +{ + int groups = option_find_int_quiet(options, "groups",1); + layer l = make_softmax_layer(params.batch, params.inputs, groups); + l.temperature = option_find_float_quiet(options, "temperature", 1); + char *tree_file = option_find_str(options, "tree", 0); + if (tree_file) l.softmax_tree = read_tree(tree_file); + l.w = params.w; + l.h = params.h; + l.c = params.c; + l.spatial = option_find_float_quiet(options, "spatial", 0); + l.noloss = option_find_int_quiet(options, "noloss", 0); + return l; +} + +int *parse_yolo_mask(char *a, int *num) +{ + int *mask = 0; + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + mask = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + int val = atoi(a); + mask[i] = val; + a = strchr(a, ',')+1; + } + *num = n; + } + return mask; +} + +layer parse_yolo(list *options, size_params params) +{ + int classes = option_find_int(options, "classes", 20); + int total = option_find_int(options, "num", 1); + int num = total; + + char *a = option_find_str(options, "mask", 0); + int *mask = parse_yolo_mask(a, &num); + layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes); + assert(l.outputs == params.inputs); + + l.max_boxes = option_find_int_quiet(options, "max",90); + l.jitter = option_find_float(options, "jitter", .2); + + l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); + l.truth_thresh = option_find_float(options, "truth_thresh", 1); + l.random = option_find_int_quiet(options, "random", 0); + + char *map_file = option_find_str(options, "map", 0); + if (map_file) l.map = read_map(map_file); + + a = option_find_str(options, "anchors", 0); + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + for(i = 0; i < n; ++i){ + float bias = atof(a); + l.biases[i] = bias; + a = strchr(a, ',')+1; + } + } + return l; +} + +layer parse_iseg(list *options, size_params params) +{ + int classes = option_find_int(options, "classes", 20); + int ids = option_find_int(options, "ids", 32); + layer l = make_iseg_layer(params.batch, params.w, params.h, classes, ids); + assert(l.outputs == params.inputs); + return l; +} + +layer parse_region(list *options, size_params params) +{ + int coords = option_find_int(options, "coords", 4); + int classes = option_find_int(options, "classes", 20); + int num = option_find_int(options, "num", 1); + + layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords); + assert(l.outputs == params.inputs); + + l.log = option_find_int_quiet(options, "log", 0); + l.sqrt = option_find_int_quiet(options, "sqrt", 0); + + l.softmax = option_find_int(options, "softmax", 0); + l.background = option_find_int_quiet(options, "background", 0); + l.max_boxes = option_find_int_quiet(options, "max",30); + l.jitter = option_find_float(options, "jitter", .2); + l.rescore = option_find_int_quiet(options, "rescore",0); + + l.thresh = option_find_float(options, "thresh", .5); + l.classfix = option_find_int_quiet(options, "classfix", 0); + l.absolute = option_find_int_quiet(options, "absolute", 0); + l.random = option_find_int_quiet(options, "random", 0); + + l.coord_scale = option_find_float(options, "coord_scale", 1); + l.object_scale = option_find_float(options, "object_scale", 1); + l.noobject_scale = option_find_float(options, "noobject_scale", 1); + l.mask_scale = option_find_float(options, "mask_scale", 1); + l.class_scale = option_find_float(options, "class_scale", 1); + l.bias_match = option_find_int_quiet(options, "bias_match",0); + + char *tree_file = option_find_str(options, "tree", 0); + if (tree_file) l.softmax_tree = read_tree(tree_file); + char *map_file = option_find_str(options, "map", 0); + if (map_file) l.map = read_map(map_file); + + char *a = option_find_str(options, "anchors", 0); + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + for(i = 0; i < n; ++i){ + float bias = atof(a); + l.biases[i] = bias; + a = strchr(a, ',')+1; + } + } + return l; +} + +detection_layer parse_detection(list *options, size_params params) +{ + int coords = option_find_int(options, "coords", 1); + int classes = option_find_int(options, "classes", 1); + int rescore = option_find_int(options, "rescore", 0); + int num = option_find_int(options, "num", 1); + int side = option_find_int(options, "side", 7); + detection_layer layer = make_detection_layer(params.batch, params.inputs, num, side, classes, coords, rescore); + + layer.softmax = option_find_int(options, "softmax", 0); + layer.sqrt = option_find_int(options, "sqrt", 0); + + layer.max_boxes = option_find_int_quiet(options, "max",90); + layer.coord_scale = option_find_float(options, "coord_scale", 1); + layer.forced = option_find_int(options, "forced", 0); + layer.object_scale = option_find_float(options, "object_scale", 1); + layer.noobject_scale = option_find_float(options, "noobject_scale", 1); + layer.class_scale = option_find_float(options, "class_scale", 1); + layer.jitter = option_find_float(options, "jitter", .2); + layer.random = option_find_int_quiet(options, "random", 0); + layer.reorg = option_find_int_quiet(options, "reorg", 0); + return layer; +} + +cost_layer parse_cost(list *options, size_params params) +{ + char *type_s = option_find_str(options, "type", "sse"); + COST_TYPE type = get_cost_type(type_s); + float scale = option_find_float_quiet(options, "scale",1); + cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale); + layer.ratio = option_find_float_quiet(options, "ratio",0); + layer.noobject_scale = option_find_float_quiet(options, "noobj", 1); + layer.thresh = option_find_float_quiet(options, "thresh",0); + return layer; +} + +crop_layer parse_crop(list *options, size_params params) +{ + int crop_height = option_find_int(options, "crop_height",1); + int crop_width = option_find_int(options, "crop_width",1); + int flip = option_find_int(options, "flip",0); + float angle = option_find_float(options, "angle",0); + float saturation = option_find_float(options, "saturation",1); + float exposure = option_find_float(options, "exposure",1); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before crop layer must output image."); + + int noadjust = option_find_int_quiet(options, "noadjust",0); + + crop_layer l = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip, angle, saturation, exposure); + l.shift = option_find_float(options, "shift", 0); + l.noadjust = noadjust; + return l; +} + +layer parse_reorg(list *options, size_params params) +{ + int stride = option_find_int(options, "stride",1); + int reverse = option_find_int_quiet(options, "reverse",0); + int flatten = option_find_int_quiet(options, "flatten",0); + int extra = option_find_int_quiet(options, "extra",0); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before reorg layer must output image."); + + layer layer = make_reorg_layer(batch,w,h,c,stride,reverse, flatten, extra); + return layer; +} + +maxpool_layer parse_maxpool(list *options, size_params params) +{ + int stride = option_find_int(options, "stride",1); + int size = option_find_int(options, "size",stride); + int padding = option_find_int_quiet(options, "padding", size-1); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before maxpool layer must output image."); + + maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride,padding); + return layer; +} + +avgpool_layer parse_avgpool(list *options, size_params params) +{ + int batch,w,h,c; + w = params.w; + h = params.h; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before avgpool layer must output image."); + + avgpool_layer layer = make_avgpool_layer(batch,w,h,c); + return layer; +} + +dropout_layer parse_dropout(list *options, size_params params) +{ + float probability = option_find_float(options, "probability", .5); + dropout_layer layer = make_dropout_layer(params.batch, params.inputs, probability); + layer.out_w = params.w; + layer.out_h = params.h; + layer.out_c = params.c; + return layer; +} + +layer parse_normalization(list *options, size_params params) +{ + float alpha = option_find_float(options, "alpha", .0001); + float beta = option_find_float(options, "beta" , .75); + float kappa = option_find_float(options, "kappa", 1); + int size = option_find_int(options, "size", 5); + layer l = make_normalization_layer(params.batch, params.w, params.h, params.c, size, alpha, beta, kappa); + return l; +} + +layer parse_batchnorm(list *options, size_params params) +{ + layer l = make_batchnorm_layer(params.batch, params.w, params.h, params.c); + return l; +} + +layer parse_shortcut(list *options, size_params params, network *net) +{ + char *l = option_find(options, "from"); + int index = atoi(l); + if(index < 0) index = params.index + index; + + int batch = params.batch; + layer from = net->layers[index]; + + layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c); + + char *activation_s = option_find_str(options, "activation", "linear"); + ACTIVATION activation = get_activation(activation_s); + s.activation = activation; + s.alpha = option_find_float_quiet(options, "alpha", 1); + s.beta = option_find_float_quiet(options, "beta", 1); + return s; +} + + +layer parse_l2norm(list *options, size_params params) +{ + layer l = make_l2norm_layer(params.batch, params.inputs); + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + return l; +} + + +layer parse_logistic(list *options, size_params params) +{ + layer l = make_logistic_layer(params.batch, params.inputs); + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + return l; +} + +layer parse_activation(list *options, size_params params) +{ + char *activation_s = option_find_str(options, "activation", "linear"); + ACTIVATION activation = get_activation(activation_s); + + layer l = make_activation_layer(params.batch, params.inputs, activation); + + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + + return l; +} + +layer parse_upsample(list *options, size_params params, network *net) +{ + + int stride = option_find_int(options, "stride",2); + layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride); + l.scale = option_find_float_quiet(options, "scale", 1); + return l; +} + +route_layer parse_route(list *options, size_params params, network *net) +{ + char *l = option_find(options, "layers"); + int len = strlen(l); + if(!l) error("Route Layer must specify input layers"); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (l[i] == ',') ++n; + } + + int *layers = calloc(n, sizeof(int)); + int *sizes = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + int index = atoi(l); + l = strchr(l, ',')+1; + if(index < 0) index = params.index + index; + layers[i] = index; + sizes[i] = net->layers[index].outputs; + } + int batch = params.batch; + + route_layer layer = make_route_layer(batch, n, layers, sizes); + + convolutional_layer first = net->layers[layers[0]]; + layer.out_w = first.out_w; + layer.out_h = first.out_h; + layer.out_c = first.out_c; + for(i = 1; i < n; ++i){ + int index = layers[i]; + convolutional_layer next = net->layers[index]; + if(next.out_w == first.out_w && next.out_h == first.out_h){ + layer.out_c += next.out_c; + }else{ + layer.out_h = layer.out_w = layer.out_c = 0; + } + } + + return layer; +} + +learning_rate_policy get_policy(char *s) +{ + if (strcmp(s, "random")==0) return RANDOM; + if (strcmp(s, "poly")==0) return POLY; + if (strcmp(s, "constant")==0) return CONSTANT; + if (strcmp(s, "step")==0) return STEP; + if (strcmp(s, "exp")==0) return EXP; + if (strcmp(s, "sigmoid")==0) return SIG; + if (strcmp(s, "steps")==0) return STEPS; + fprintf(stderr, "Couldn't find policy %s, going with constant\n", s); + return CONSTANT; +} + +void parse_net_options(list *options, network *net) +{ + net->batch = option_find_int(options, "batch",1); + net->learning_rate = option_find_float(options, "learning_rate", .001); + net->momentum = option_find_float(options, "momentum", .9); + net->decay = option_find_float(options, "decay", .0001); + int subdivs = option_find_int(options, "subdivisions",1); + net->time_steps = option_find_int_quiet(options, "time_steps",1); + net->notruth = option_find_int_quiet(options, "notruth",0); + net->batch /= subdivs; + net->batch *= net->time_steps; + net->subdivisions = subdivs; + net->random = option_find_int_quiet(options, "random", 0); + + net->adam = option_find_int_quiet(options, "adam", 0); + if(net->adam){ + net->B1 = option_find_float(options, "B1", .9); + net->B2 = option_find_float(options, "B2", .999); + net->eps = option_find_float(options, "eps", .0000001); + } + + net->h = option_find_int_quiet(options, "height",0); + net->w = option_find_int_quiet(options, "width",0); + net->c = option_find_int_quiet(options, "channels",0); + net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c); + net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2); + net->min_crop = option_find_int_quiet(options, "min_crop",net->w); + net->max_ratio = option_find_float_quiet(options, "max_ratio", (float) net->max_crop / net->w); + net->min_ratio = option_find_float_quiet(options, "min_ratio", (float) net->min_crop / net->w); + net->center = option_find_int_quiet(options, "center",0); + net->clip = option_find_float_quiet(options, "clip", 0); + + net->angle = option_find_float_quiet(options, "angle", 0); + net->aspect = option_find_float_quiet(options, "aspect", 1); + net->saturation = option_find_float_quiet(options, "saturation", 1); + net->exposure = option_find_float_quiet(options, "exposure", 1); + net->hue = option_find_float_quiet(options, "hue", 0); + + if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied"); + + char *policy_s = option_find_str(options, "policy", "constant"); + net->policy = get_policy(policy_s); + net->burn_in = option_find_int_quiet(options, "burn_in", 0); + net->power = option_find_float_quiet(options, "power", 4); + if(net->policy == STEP){ + net->step = option_find_int(options, "step", 1); + net->scale = option_find_float(options, "scale", 1); + } else if (net->policy == STEPS){ + char *l = option_find(options, "steps"); + char *p = option_find(options, "scales"); + if(!l || !p) error("STEPS policy must have steps and scales in cfg file"); + + int len = strlen(l); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (l[i] == ',') ++n; + } + int *steps = calloc(n, sizeof(int)); + float *scales = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + int step = atoi(l); + float scale = atof(p); + l = strchr(l, ',')+1; + p = strchr(p, ',')+1; + steps[i] = step; + scales[i] = scale; + } + net->scales = scales; + net->steps = steps; + net->num_steps = n; + } else if (net->policy == EXP){ + net->gamma = option_find_float(options, "gamma", 1); + } else if (net->policy == SIG){ + net->gamma = option_find_float(options, "gamma", 1); + net->step = option_find_int(options, "step", 1); + } else if (net->policy == POLY || net->policy == RANDOM){ + } + net->max_batches = option_find_int(options, "max_batches", 0); +} + +int is_network(section *s) +{ + return (strcmp(s->type, "[net]")==0 + || strcmp(s->type, "[network]")==0); +} + +network *parse_network_cfg(char *filename) +{ + list *sections = read_cfg(filename); + node *n = sections->front; + if(!n) error("Config file has no sections"); + network *net = make_network(sections->size - 1); + net->gpu_index = gpu_index; + size_params params; + + section *s = (section *)n->val; + list *options = s->options; + if(!is_network(s)) error("First section must be [net] or [network]"); + parse_net_options(options, net); + + params.h = net->h; + params.w = net->w; + params.c = net->c; + params.inputs = net->inputs; + params.batch = net->batch; + params.time_steps = net->time_steps; + params.net = net; + + size_t workspace_size = 0; + n = n->next; + int count = 0; + free_section(s); + fprintf(stderr, "layer filters size input output\n"); + while(n){ + params.index = count; + fprintf(stderr, "%5d ", count); + s = (section *)n->val; + options = s->options; + layer l = {0}; + LAYER_TYPE lt = string_to_layer_type(s->type); + if(lt == CONVOLUTIONAL){ + l = parse_convolutional(options, params); + }else if(lt == DECONVOLUTIONAL){ + l = parse_deconvolutional(options, params); + }else if(lt == LOCAL){ + l = parse_local(options, params); + }else if(lt == ACTIVE){ + l = parse_activation(options, params); + }else if(lt == LOGXENT){ + l = parse_logistic(options, params); + }else if(lt == L2NORM){ + l = parse_l2norm(options, params); + }else if(lt == RNN){ + l = parse_rnn(options, params); + }else if(lt == GRU){ + l = parse_gru(options, params); + }else if (lt == LSTM) { + l = parse_lstm(options, params); + }else if(lt == CRNN){ + l = parse_crnn(options, params); + }else if(lt == CONNECTED){ + l = parse_connected(options, params); + }else if(lt == CROP){ + l = parse_crop(options, params); + }else if(lt == COST){ + l = parse_cost(options, params); + }else if(lt == REGION){ + l = parse_region(options, params); + }else if(lt == YOLO){ + l = parse_yolo(options, params); + }else if(lt == ISEG){ + l = parse_iseg(options, params); + }else if(lt == DETECTION){ + l = parse_detection(options, params); + }else if(lt == SOFTMAX){ + l = parse_softmax(options, params); + net->hierarchy = l.softmax_tree; + }else if(lt == NORMALIZATION){ + l = parse_normalization(options, params); + }else if(lt == BATCHNORM){ + l = parse_batchnorm(options, params); + }else if(lt == MAXPOOL){ + l = parse_maxpool(options, params); + }else if(lt == REORG){ + l = parse_reorg(options, params); + }else if(lt == AVGPOOL){ + l = parse_avgpool(options, params); + }else if(lt == ROUTE){ + l = parse_route(options, params, net); + }else if(lt == UPSAMPLE){ + l = parse_upsample(options, params, net); + }else if(lt == SHORTCUT){ + l = parse_shortcut(options, params, net); + }else if(lt == DROPOUT){ + l = parse_dropout(options, params); + l.output = net->layers[count-1].output; + l.delta = net->layers[count-1].delta; +#ifdef GPU + l.output_gpu = net->layers[count-1].output_gpu; + l.delta_gpu = net->layers[count-1].delta_gpu; +#endif + }else{ + fprintf(stderr, "Type not recognized: %s\n", s->type); + } + l.clip = net->clip; + l.truth = option_find_int_quiet(options, "truth", 0); + l.onlyforward = option_find_int_quiet(options, "onlyforward", 0); + l.stopbackward = option_find_int_quiet(options, "stopbackward", 0); + l.dontsave = option_find_int_quiet(options, "dontsave", 0); + l.dontload = option_find_int_quiet(options, "dontload", 0); + l.numload = option_find_int_quiet(options, "numload", 0); + l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0); + l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1); + l.smooth = option_find_float_quiet(options, "smooth", 0); + option_unused(options); + net->layers[count] = l; + if (l.workspace_size > workspace_size) workspace_size = l.workspace_size; + free_section(s); + n = n->next; + ++count; + if(n){ + params.h = l.out_h; + params.w = l.out_w; + params.c = l.out_c; + params.inputs = l.outputs; + } + } + free_list(sections); + layer out = get_network_output_layer(net); + net->outputs = out.outputs; + net->truths = out.outputs; + if(net->layers[net->n-1].truths) net->truths = net->layers[net->n-1].truths; + net->output = out.output; + net->input = calloc(net->inputs*net->batch, sizeof(float)); + net->truth = calloc(net->truths*net->batch, sizeof(float)); +#ifdef GPU + net->output_gpu = out.output_gpu; + net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch); + net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch); +#endif + if(workspace_size){ + //printf("%ld\n", workspace_size); +#ifdef GPU + if(gpu_index >= 0){ + net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1); + }else { + net->workspace = calloc(1, workspace_size); + } +#else + net->workspace = calloc(1, workspace_size); +#endif + } + return net; +} + +list *read_cfg(char *filename) +{ + FILE *file = fopen(filename, "r"); + if(file == 0) file_error(filename); + char *line; + int nu = 0; + list *options = make_list(); + section *current = 0; + while((line=fgetl(file)) != 0){ + ++ nu; + strip(line); + switch(line[0]){ + case '[': + current = malloc(sizeof(section)); + list_insert(options, current); + current->options = make_list(); + current->type = line; + break; + case '\0': + case '#': + case ';': + free(line); + break; + default: + if(!read_option(line, current->options)){ + fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); + free(line); + } + break; + } + } + fclose(file); + return options; +} + +void save_convolutional_weights_binary(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_convolutional_layer(l); + } +#endif + binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.binary_weights); + int size = l.c*l.size*l.size; + int i, j, k; + fwrite(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.n, fp); + fwrite(l.rolling_mean, sizeof(float), l.n, fp); + fwrite(l.rolling_variance, sizeof(float), l.n, fp); + } + for(i = 0; i < l.n; ++i){ + float mean = l.binary_weights[i*size]; + if(mean < 0) mean = -mean; + fwrite(&mean, sizeof(float), 1, fp); + for(j = 0; j < size/8; ++j){ + int index = i*size + j*8; + unsigned char c = 0; + for(k = 0; k < 8; ++k){ + if (j*8 + k >= size) break; + if (l.binary_weights[index + k] > 0) c = (c | 1<= 0){ + pull_convolutional_layer(l); + } +#endif + int num = l.nweights; + fwrite(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.n, fp); + fwrite(l.rolling_mean, sizeof(float), l.n, fp); + fwrite(l.rolling_variance, sizeof(float), l.n, fp); + } + fwrite(l.weights, sizeof(float), num, fp); +} + +void save_batchnorm_weights(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_batchnorm_layer(l); + } +#endif + fwrite(l.scales, sizeof(float), l.c, fp); + fwrite(l.rolling_mean, sizeof(float), l.c, fp); + fwrite(l.rolling_variance, sizeof(float), l.c, fp); +} + +void save_connected_weights(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_connected_layer(l); + } +#endif + fwrite(l.biases, sizeof(float), l.outputs, fp); + fwrite(l.weights, sizeof(float), l.outputs*l.inputs, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.outputs, fp); + fwrite(l.rolling_mean, sizeof(float), l.outputs, fp); + fwrite(l.rolling_variance, sizeof(float), l.outputs, fp); + } +} + +void save_weights_upto(network *net, char *filename, int cutoff) +{ +#ifdef GPU + if(net->gpu_index >= 0){ + cuda_set_device(net->gpu_index); + } +#endif + fprintf(stderr, "Saving weights to %s\n", filename); + FILE *fp = fopen(filename, "wb"); + if(!fp) file_error(filename); + + int major = 0; + int minor = 2; + int revision = 0; + fwrite(&major, sizeof(int), 1, fp); + fwrite(&minor, sizeof(int), 1, fp); + fwrite(&revision, sizeof(int), 1, fp); + fwrite(net->seen, sizeof(size_t), 1, fp); + + int i; + for(i = 0; i < net->n && i < cutoff; ++i){ + layer l = net->layers[i]; + if (l.dontsave) continue; + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + save_convolutional_weights(l, fp); + } if(l.type == CONNECTED){ + save_connected_weights(l, fp); + } if(l.type == BATCHNORM){ + save_batchnorm_weights(l, fp); + } if(l.type == RNN){ + save_connected_weights(*(l.input_layer), fp); + save_connected_weights(*(l.self_layer), fp); + save_connected_weights(*(l.output_layer), fp); + } if (l.type == LSTM) { + save_connected_weights(*(l.wi), fp); + save_connected_weights(*(l.wf), fp); + save_connected_weights(*(l.wo), fp); + save_connected_weights(*(l.wg), fp); + save_connected_weights(*(l.ui), fp); + save_connected_weights(*(l.uf), fp); + save_connected_weights(*(l.uo), fp); + save_connected_weights(*(l.ug), fp); + } if (l.type == GRU) { + if(1){ + save_connected_weights(*(l.wz), fp); + save_connected_weights(*(l.wr), fp); + save_connected_weights(*(l.wh), fp); + save_connected_weights(*(l.uz), fp); + save_connected_weights(*(l.ur), fp); + save_connected_weights(*(l.uh), fp); + }else{ + save_connected_weights(*(l.reset_layer), fp); + save_connected_weights(*(l.update_layer), fp); + save_connected_weights(*(l.state_layer), fp); + } + } if(l.type == CRNN){ + save_convolutional_weights(*(l.input_layer), fp); + save_convolutional_weights(*(l.self_layer), fp); + save_convolutional_weights(*(l.output_layer), fp); + } if(l.type == LOCAL){ +#ifdef GPU + if(gpu_index >= 0){ + pull_local_layer(l); + } +#endif + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + fwrite(l.biases, sizeof(float), l.outputs, fp); + fwrite(l.weights, sizeof(float), size, fp); + } + } + fclose(fp); +} +void save_weights(network *net, char *filename) +{ + save_weights_upto(net, filename, net->n); +} + +void transpose_matrix(float *a, int rows, int cols) +{ + float *transpose = calloc(rows*cols, sizeof(float)); + int x, y; + for(x = 0; x < rows; ++x){ + for(y = 0; y < cols; ++y){ + transpose[y*rows + x] = a[x*cols + y]; + } + } + memcpy(a, transpose, rows*cols*sizeof(float)); + free(transpose); +} + +void load_connected_weights(layer l, FILE *fp, int transpose) +{ + fread(l.biases, sizeof(float), l.outputs, fp); + fread(l.weights, sizeof(float), l.outputs*l.inputs, fp); + if(transpose){ + transpose_matrix(l.weights, l.inputs, l.outputs); + } + //printf("Biases: %f mean %f variance\n", mean_array(l.biases, l.outputs), variance_array(l.biases, l.outputs)); + //printf("Weights: %f mean %f variance\n", mean_array(l.weights, l.outputs*l.inputs), variance_array(l.weights, l.outputs*l.inputs)); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.outputs, fp); + fread(l.rolling_mean, sizeof(float), l.outputs, fp); + fread(l.rolling_variance, sizeof(float), l.outputs, fp); + //printf("Scales: %f mean %f variance\n", mean_array(l.scales, l.outputs), variance_array(l.scales, l.outputs)); + //printf("rolling_mean: %f mean %f variance\n", mean_array(l.rolling_mean, l.outputs), variance_array(l.rolling_mean, l.outputs)); + //printf("rolling_variance: %f mean %f variance\n", mean_array(l.rolling_variance, l.outputs), variance_array(l.rolling_variance, l.outputs)); + } +#ifdef GPU + if(gpu_index >= 0){ + push_connected_layer(l); + } +#endif +} + +void load_batchnorm_weights(layer l, FILE *fp) +{ + fread(l.scales, sizeof(float), l.c, fp); + fread(l.rolling_mean, sizeof(float), l.c, fp); + fread(l.rolling_variance, sizeof(float), l.c, fp); +#ifdef GPU + if(gpu_index >= 0){ + push_batchnorm_layer(l); + } +#endif +} + +void load_convolutional_weights_binary(layer l, FILE *fp) +{ + fread(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.n, fp); + fread(l.rolling_mean, sizeof(float), l.n, fp); + fread(l.rolling_variance, sizeof(float), l.n, fp); + } + int size = l.c*l.size*l.size; + int i, j, k; + for(i = 0; i < l.n; ++i){ + float mean = 0; + fread(&mean, sizeof(float), 1, fp); + for(j = 0; j < size/8; ++j){ + int index = i*size + j*8; + unsigned char c = 0; + fread(&c, sizeof(char), 1, fp); + for(k = 0; k < 8; ++k){ + if (j*8 + k >= size) break; + l.weights[index + k] = (c & 1<= 0){ + push_convolutional_layer(l); + } +#endif +} + +void load_convolutional_weights(layer l, FILE *fp) +{ + if(l.binary){ + //load_convolutional_weights_binary(l, fp); + //return; + } + if(l.numload) l.n = l.numload; + int num = l.c/l.groups*l.n*l.size*l.size; + fread(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.n, fp); + fread(l.rolling_mean, sizeof(float), l.n, fp); + fread(l.rolling_variance, sizeof(float), l.n, fp); + if(0){ + int i; + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_mean[i]); + } + printf("\n"); + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_variance[i]); + } + printf("\n"); + } + if(0){ + fill_cpu(l.n, 0, l.rolling_mean, 1); + fill_cpu(l.n, 0, l.rolling_variance, 1); + } + if(0){ + int i; + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_mean[i]); + } + printf("\n"); + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_variance[i]); + } + printf("\n"); + } + } + fread(l.weights, sizeof(float), num, fp); + //if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1); + if (l.flipped) { + transpose_matrix(l.weights, l.c*l.size*l.size, l.n); + } + //if (l.binary) binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.weights); +#ifdef GPU + if(gpu_index >= 0){ + push_convolutional_layer(l); + } +#endif +} + + +void load_weights_upto(network *net, char *filename, int start, int cutoff) +{ +#ifdef GPU + if(net->gpu_index >= 0){ + cuda_set_device(net->gpu_index); + } +#endif + fprintf(stderr, "Loading weights from %s...", filename); + fflush(stdout); + FILE *fp = fopen(filename, "rb"); + if(!fp) file_error(filename); + + int major; + int minor; + int revision; + fread(&major, sizeof(int), 1, fp); + fread(&minor, sizeof(int), 1, fp); + fread(&revision, sizeof(int), 1, fp); + if ((major*10 + minor) >= 2 && major < 1000 && minor < 1000){ + fread(net->seen, sizeof(size_t), 1, fp); + } else { + int iseen = 0; + fread(&iseen, sizeof(int), 1, fp); + *net->seen = iseen; + } + int transpose = (major > 1000) || (minor > 1000); + + int i; + for(i = start; i < net->n && i < cutoff; ++i){ + layer l = net->layers[i]; + if (l.dontload) continue; + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + load_convolutional_weights(l, fp); + } + if(l.type == CONNECTED){ + load_connected_weights(l, fp, transpose); + } + if(l.type == BATCHNORM){ + load_batchnorm_weights(l, fp); + } + if(l.type == CRNN){ + load_convolutional_weights(*(l.input_layer), fp); + load_convolutional_weights(*(l.self_layer), fp); + load_convolutional_weights(*(l.output_layer), fp); + } + if(l.type == RNN){ + load_connected_weights(*(l.input_layer), fp, transpose); + load_connected_weights(*(l.self_layer), fp, transpose); + load_connected_weights(*(l.output_layer), fp, transpose); + } + if (l.type == LSTM) { + load_connected_weights(*(l.wi), fp, transpose); + load_connected_weights(*(l.wf), fp, transpose); + load_connected_weights(*(l.wo), fp, transpose); + load_connected_weights(*(l.wg), fp, transpose); + load_connected_weights(*(l.ui), fp, transpose); + load_connected_weights(*(l.uf), fp, transpose); + load_connected_weights(*(l.uo), fp, transpose); + load_connected_weights(*(l.ug), fp, transpose); + } + if (l.type == GRU) { + if(1){ + load_connected_weights(*(l.wz), fp, transpose); + load_connected_weights(*(l.wr), fp, transpose); + load_connected_weights(*(l.wh), fp, transpose); + load_connected_weights(*(l.uz), fp, transpose); + load_connected_weights(*(l.ur), fp, transpose); + load_connected_weights(*(l.uh), fp, transpose); + }else{ + load_connected_weights(*(l.reset_layer), fp, transpose); + load_connected_weights(*(l.update_layer), fp, transpose); + load_connected_weights(*(l.state_layer), fp, transpose); + } + } + if(l.type == LOCAL){ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + fread(l.biases, sizeof(float), l.outputs, fp); + fread(l.weights, sizeof(float), size, fp); +#ifdef GPU + if(gpu_index >= 0){ + push_local_layer(l); + } +#endif + } + } + fprintf(stderr, "Done!\n"); + fclose(fp); +} + +void load_weights(network *net, char *filename) +{ + load_weights_upto(net, filename, 0, net->n); +} + diff --git a/workloads/realworld/standard/darknet/src/parser.h b/workloads/realworld/standard/darknet/src/parser.h new file mode 100644 index 0000000000000000000000000000000000000000..81aef2c86f3e6cb362f8bde9695ce9d5699ca77f --- /dev/null +++ b/workloads/realworld/standard/darknet/src/parser.h @@ -0,0 +1,9 @@ +#ifndef PARSER_H +#define PARSER_H +#include "darknet.h" +#include "network.h" + +void save_network(network net, char *filename); +void save_weights_double(network net, char *filename); + +#endif diff --git a/workloads/realworld/standard/darknet/src/region_layer.c b/workloads/realworld/standard/darknet/src/region_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..9df1b8bc252239ca520fa3dabeff55e5eb5959b8 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/region_layer.c @@ -0,0 +1,507 @@ +#include "region_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_region_layer(int batch, int w, int h, int n, int classes, int coords) +{ + layer l = {0}; + l.type = REGION; + + l.n = n; + l.batch = batch; + l.h = h; + l.w = w; + l.c = n*(classes + coords + 1); + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.coords = coords; + l.cost = calloc(1, sizeof(float)); + l.biases = calloc(n*2, sizeof(float)); + l.bias_updates = calloc(n*2, sizeof(float)); + l.outputs = h*w*n*(classes + coords + 1); + l.inputs = l.outputs; + l.truths = 30*(l.coords + 1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + int i; + for(i = 0; i < n*2; ++i){ + l.biases[i] = .5; + } + + l.forward = forward_region_layer; + l.backward = backward_region_layer; +#ifdef GPU + l.forward_gpu = forward_region_layer_gpu; + l.backward_gpu = backward_region_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "detection\n"); + srand(0); + + return l; +} + +void resize_region_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->n*(l->classes + l->coords + 1); + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride) +{ + box b; + b.x = (i + x[index + 0*stride]) / w; + b.y = (j + x[index + 1*stride]) / h; + b.w = exp(x[index + 2*stride]) * biases[2*n] / w; + b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h; + return b; +} + +float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale, int stride) +{ + box pred = get_region_box(x, biases, n, index, i, j, w, h, stride); + float iou = box_iou(pred, truth); + + float tx = (truth.x*w - i); + float ty = (truth.y*h - j); + float tw = log(truth.w*w / biases[2*n]); + float th = log(truth.h*h / biases[2*n + 1]); + + delta[index + 0*stride] = scale * (tx - x[index + 0*stride]); + delta[index + 1*stride] = scale * (ty - x[index + 1*stride]); + delta[index + 2*stride] = scale * (tw - x[index + 2*stride]); + delta[index + 3*stride] = scale * (th - x[index + 3*stride]); + return iou; +} + +void delta_region_mask(float *truth, float *x, int n, int index, float *delta, int stride, int scale) +{ + int i; + for(i = 0; i < n; ++i){ + delta[index + i*stride] = scale*(truth[i] - x[index + i*stride]); + } +} + + +void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, int stride, float *avg_cat, int tag) +{ + int i, n; + if(hier){ + float pred = 1; + while(class >= 0){ + pred *= output[index + stride*class]; + int g = hier->group[class]; + int offset = hier->group_offset[g]; + for(i = 0; i < hier->group_size[g]; ++i){ + delta[index + stride*(offset + i)] = scale * (0 - output[index + stride*(offset + i)]); + } + delta[index + stride*class] = scale * (1 - output[index + stride*class]); + + class = hier->parent[class]; + } + *avg_cat += pred; + } else { + if (delta[index] && tag){ + delta[index + stride*class] = scale * (1 - output[index + stride*class]); + return; + } + for(n = 0; n < classes; ++n){ + delta[index + stride*n] = scale * (((n == class)?1 : 0) - output[index + stride*n]); + if(n == class) *avg_cat += output[index + stride*n]; + } + } +} + +float logit(float x) +{ + return log(x/(1.-x)); +} + +float tisnan(float x) +{ + return (x != x); +} + +int entry_index(layer l, int batch, int location, int entry) +{ + int n = location / (l.w*l.h); + int loc = location % (l.w*l.h); + return batch*l.outputs + n*l.w*l.h*(l.coords+l.classes+1) + entry*l.w*l.h + loc; +} + +void forward_region_layer(const layer l, network net) +{ + int i,j,b,t,n; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) activate_array(l.output + index, l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords + 1); + if(!l.softmax && !l.softmax_tree) activate_array(l.output + index, l.classes*l.w*l.h, LOGISTIC); + } + } + if (l.softmax_tree){ + int i; + int count = l.coords + 1; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_cpu(net.input + count, group_size, l.batch, l.inputs, l.n*l.w*l.h, 1, l.n*l.w*l.h, l.temperature, l.output + count); + count += group_size; + } + } else if (l.softmax){ + int index = entry_index(l, 0, 0, l.coords + !l.background); + softmax_cpu(net.input + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output + index); + } +#endif + + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + if(!net.train) return; + float avg_iou = 0; + float recall = 0; + float avg_cat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + int class_count = 0; + *(l.cost) = 0; + for (b = 0; b < l.batch; ++b) { + if(l.softmax_tree){ + int onlyclass = 0; + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + if(!truth.x) break; + int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords]; + float maxp = 0; + int maxi = 0; + if(truth.x > 100000 && truth.y > 100000){ + for(n = 0; n < l.n*l.w*l.h; ++n){ + int class_index = entry_index(l, b, n, l.coords + 1); + int obj_index = entry_index(l, b, n, l.coords); + float scale = l.output[obj_index]; + l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]); + float p = scale*get_hierarchy_probability(l.output + class_index, l.softmax_tree, class, l.w*l.h); + if(p > maxp){ + maxp = p; + maxi = n; + } + } + int class_index = entry_index(l, b, maxi, l.coords + 1); + int obj_index = entry_index(l, b, maxi, l.coords); + delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax); + if(l.output[obj_index] < .3) l.delta[obj_index] = l.object_scale * (.3 - l.output[obj_index]); + else l.delta[obj_index] = 0; + l.delta[obj_index] = 0; + ++class_count; + onlyclass = 1; + break; + } + } + if(onlyclass) continue; + } + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h); + float best_iou = 0; + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + if(!truth.x) break; + float iou = box_iou(pred, truth); + if (iou > best_iou) { + best_iou = iou; + } + } + int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, l.coords); + avg_anyobj += l.output[obj_index]; + l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]); + if(l.background) l.delta[obj_index] = l.noobject_scale * (1 - l.output[obj_index]); + if (best_iou > l.thresh) { + l.delta[obj_index] = 0; + } + + if(*(net.seen) < 12800){ + box truth = {0}; + truth.x = (i + .5)/l.w; + truth.y = (j + .5)/l.h; + truth.w = l.biases[2*n]/l.w; + truth.h = l.biases[2*n+1]/l.h; + delta_region_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, l.delta, .01, l.w*l.h); + } + } + } + } + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + + if(!truth.x) break; + float best_iou = 0; + int best_n = 0; + i = (truth.x * l.w); + j = (truth.y * l.h); + box truth_shift = truth; + truth_shift.x = 0; + truth_shift.y = 0; + for(n = 0; n < l.n; ++n){ + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h); + if(l.bias_match){ + pred.w = l.biases[2*n]/l.w; + pred.h = l.biases[2*n+1]/l.h; + } + pred.x = 0; + pred.y = 0; + float iou = box_iou(pred, truth_shift); + if (iou > best_iou){ + best_iou = iou; + best_n = n; + } + } + + int box_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 0); + float iou = delta_region_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, l.delta, l.coord_scale * (2 - truth.w*truth.h), l.w*l.h); + if(l.coords > 4){ + int mask_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 4); + delta_region_mask(net.truth + t*(l.coords + 1) + b*l.truths + 5, l.output, l.coords - 4, mask_index, l.delta, l.w*l.h, l.mask_scale); + } + if(iou > .5) recall += 1; + avg_iou += iou; + + int obj_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords); + avg_obj += l.output[obj_index]; + l.delta[obj_index] = l.object_scale * (1 - l.output[obj_index]); + if (l.rescore) { + l.delta[obj_index] = l.object_scale * (iou - l.output[obj_index]); + } + if(l.background){ + l.delta[obj_index] = l.object_scale * (0 - l.output[obj_index]); + } + + int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords + 1); + delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax); + ++count; + ++class_count; + } + } + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count); +} + +void backward_region_layer(const layer l, network net) +{ + /* + int b; + int size = l.coords + l.classes + 1; + for (b = 0; b < l.batch*l.n; ++b){ + int index = (b*size + 4)*l.w*l.h; + gradient_array(l.output + index, l.w*l.h, LOGISTIC, l.delta + index); + } + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); + */ +} + +void correct_region_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +{ + int i; + int new_w=0; + int new_h=0; + if (((float)netw/w) < ((float)neth/h)) { + new_w = netw; + new_h = (h * netw)/w; + } else { + new_h = neth; + new_w = (w * neth)/h; + } + for (i = 0; i < n; ++i){ + box b = dets[i].bbox; + b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); + b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); + b.w *= (float)netw/new_w; + b.h *= (float)neth/new_h; + if(!relative){ + b.x *= w; + b.w *= w; + b.y *= h; + b.h *= h; + } + dets[i].bbox = b; + } +} + +void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets) +{ + int i,j,n,z; + float *predictions = l.output; + if (l.batch == 2) { + float *flip = l.output + l.outputs; + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w/2; ++i) { + for (n = 0; n < l.n; ++n) { + for(z = 0; z < l.classes + l.coords + 1; ++z){ + int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; + int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); + float swap = flip[i1]; + flip[i1] = flip[i2]; + flip[i2] = swap; + if(z == 0){ + flip[i1] = -flip[i1]; + flip[i2] = -flip[i2]; + } + } + } + } + } + for(i = 0; i < l.outputs; ++i){ + l.output[i] = (l.output[i] + flip[i])/2.; + } + } + for (i = 0; i < l.w*l.h; ++i){ + int row = i / l.w; + int col = i % l.w; + for(n = 0; n < l.n; ++n){ + int index = n*l.w*l.h + i; + for(j = 0; j < l.classes; ++j){ + dets[index].prob[j] = 0; + } + int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); + int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); + int mask_index = entry_index(l, 0, n*l.w*l.h + i, 4); + float scale = l.background ? 1 : predictions[obj_index]; + dets[index].bbox = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h); + dets[index].objectness = scale > thresh ? scale : 0; + if(dets[index].mask){ + for(j = 0; j < l.coords - 4; ++j){ + dets[index].mask[j] = l.output[mask_index + j*l.w*l.h]; + } + } + + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + !l.background); + if(l.softmax_tree){ + + hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0, l.w*l.h); + if(map){ + for(j = 0; j < 200; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + map[j]); + float prob = scale*predictions[class_index]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } else { + int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh, l.w*l.h); + dets[index].prob[j] = (scale > thresh) ? scale : 0; + } + } else { + if(dets[index].objectness){ + for(j = 0; j < l.classes; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + j); + float prob = scale*predictions[class_index]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } + } + } + } + correct_region_boxes(dets, l.w*l.h*l.n, w, h, netw, neth, relative); +} + +#ifdef GPU + +void forward_region_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + if(l.coords > 4){ + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC); + } + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) activate_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords + 1); + if(!l.softmax && !l.softmax_tree) activate_array_gpu(l.output_gpu + index, l.classes*l.w*l.h, LOGISTIC); + } + } + if (l.softmax_tree){ + int index = entry_index(l, 0, 0, l.coords + 1); + softmax_tree(net.input_gpu + index, l.w*l.h, l.batch*l.n, l.inputs/l.n, 1, l.output_gpu + index, *l.softmax_tree); + } else if (l.softmax) { + int index = entry_index(l, 0, 0, l.coords + !l.background); + softmax_gpu(net.input_gpu + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index); + } + if(!net.train || l.onlyforward){ + cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); + return; + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_region_layer(l, net); + //cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs); + if(!net.train) return; + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_region_layer_gpu(const layer l, network net) +{ + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + gradient_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC, l.delta_gpu + index); + if(l.coords > 4){ + index = entry_index(l, b, n*l.w*l.h, 4); + gradient_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC, l.delta_gpu + index); + } + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) gradient_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC, l.delta_gpu + index); + } + } + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + +void zero_objectness(layer l) +{ + int i, n; + for (i = 0; i < l.w*l.h; ++i){ + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); + l.output[obj_index] = 0; + } + } +} + diff --git a/workloads/realworld/standard/darknet/src/region_layer.h b/workloads/realworld/standard/darknet/src/region_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9f12fd187fd490d10cbc21af8251e0e2a870b7cb --- /dev/null +++ b/workloads/realworld/standard/darknet/src/region_layer.h @@ -0,0 +1,18 @@ +#ifndef REGION_LAYER_H +#define REGION_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_region_layer(int batch, int w, int h, int n, int classes, int coords); +void forward_region_layer(const layer l, network net); +void backward_region_layer(const layer l, network net); +void resize_region_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_region_layer_gpu(const layer l, network net); +void backward_region_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/standard/darknet/src/reorg_layer.c b/workloads/realworld/standard/darknet/src/reorg_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..b3634d53a5e01a8bbcf00e62a90f70f40108e1d7 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/reorg_layer.c @@ -0,0 +1,173 @@ +#include "reorg_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + + +layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra) +{ + layer l = {0}; + l.type = REORG; + l.batch = batch; + l.stride = stride; + l.extra = extra; + l.h = h; + l.w = w; + l.c = c; + l.flatten = flatten; + if(reverse){ + l.out_w = w*stride; + l.out_h = h*stride; + l.out_c = c/(stride*stride); + }else{ + l.out_w = w/stride; + l.out_h = h/stride; + l.out_c = c*(stride*stride); + } + l.reverse = reverse; + + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = h*w*c; + if(l.extra){ + l.out_w = l.out_h = l.out_c = 0; + l.outputs = l.inputs + l.extra; + } + + if(extra){ + fprintf(stderr, "reorg %4d -> %4d\n", l.inputs, l.outputs); + } else { + fprintf(stderr, "reorg /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + } + int output_size = l.outputs * batch; + l.output = calloc(output_size, sizeof(float)); + l.delta = calloc(output_size, sizeof(float)); + + l.forward = forward_reorg_layer; + l.backward = backward_reorg_layer; +#ifdef GPU + l.forward_gpu = forward_reorg_layer_gpu; + l.backward_gpu = backward_reorg_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); +#endif + return l; +} + +void resize_reorg_layer(layer *l, int w, int h) +{ + int stride = l->stride; + int c = l->c; + + l->h = h; + l->w = w; + + if(l->reverse){ + l->out_w = w*stride; + l->out_h = h*stride; + l->out_c = c/(stride*stride); + }else{ + l->out_w = w/stride; + l->out_h = h/stride; + l->out_c = c*(stride*stride); + } + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->outputs; + int output_size = l->outputs * l->batch; + + l->output = realloc(l->output, output_size * sizeof(float)); + l->delta = realloc(l->delta, output_size * sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, output_size); + l->delta_gpu = cuda_make_array(l->delta, output_size); +#endif +} + +void forward_reorg_layer(const layer l, network net) +{ + int i; + if(l.flatten){ + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + if(l.reverse){ + flatten(l.output, l.w*l.h, l.c, l.batch, 0); + }else{ + flatten(l.output, l.w*l.h, l.c, l.batch, 1); + } + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.inputs, net.input + i*l.inputs, 1, l.output + i*l.outputs, 1); + } + } else if (l.reverse){ + reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output); + } else { + reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output); + } +} + +void backward_reorg_layer(const layer l, network net) +{ + int i; + if(l.flatten){ + memcpy(net.delta, l.delta, l.outputs*l.batch*sizeof(float)); + if(l.reverse){ + flatten(net.delta, l.w*l.h, l.c, l.batch, 1); + }else{ + flatten(net.delta, l.w*l.h, l.c, l.batch, 0); + } + } else if(l.reverse){ + reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta); + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.inputs, l.delta + i*l.outputs, 1, net.delta + i*l.inputs, 1); + } + }else{ + reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta); + } +} + +#ifdef GPU +void forward_reorg_layer_gpu(layer l, network net) +{ + int i; + if(l.flatten){ + if(l.reverse){ + flatten_gpu(net.input_gpu, l.w*l.h, l.c, l.batch, 0, l.output_gpu); + }else{ + flatten_gpu(net.input_gpu, l.w*l.h, l.c, l.batch, 1, l.output_gpu); + } + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.inputs, net.input_gpu + i*l.inputs, 1, l.output_gpu + i*l.outputs, 1); + } + } else if (l.reverse) { + reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu); + }else { + reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu); + } +} + +void backward_reorg_layer_gpu(layer l, network net) +{ + if(l.flatten){ + if(l.reverse){ + flatten_gpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 1, net.delta_gpu); + }else{ + flatten_gpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 0, net.delta_gpu); + } + } else if (l.extra) { + int i; + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.inputs, l.delta_gpu + i*l.outputs, 1, net.delta_gpu + i*l.inputs, 1); + } + } else if(l.reverse){ + reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta_gpu); + } else { + reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta_gpu); + } +} +#endif diff --git a/workloads/realworld/standard/darknet/src/reorg_layer.h b/workloads/realworld/standard/darknet/src/reorg_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1d1445f17d2874835ee19d033b50e09761374de3 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/reorg_layer.h @@ -0,0 +1,20 @@ +#ifndef REORG_LAYER_H +#define REORG_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra); +void resize_reorg_layer(layer *l, int w, int h); +void forward_reorg_layer(const layer l, network net); +void backward_reorg_layer(const layer l, network net); + +#ifdef GPU +void forward_reorg_layer_gpu(layer l, network net); +void backward_reorg_layer_gpu(layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/standard/darknet/src/rnn_layer.c b/workloads/realworld/standard/darknet/src/rnn_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..c07e338caee5418657eb1127058419566d9ef787 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/rnn_layer.c @@ -0,0 +1,292 @@ +#include "rnn_layer.h" +#include "connected_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTIVATION activation, int batch_normalize, int adam) +{ + fprintf(stderr, "RNN Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = RNN; + l.steps = steps; + l.inputs = inputs; + + l.state = calloc(batch*outputs, sizeof(float)); + l.prev_state = calloc(batch*outputs, sizeof(float)); + + l.input_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.input_layer) = make_connected_layer(batch*steps, inputs, outputs, activation, batch_normalize, adam); + l.input_layer->batch = batch; + + l.self_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.self_layer) = make_connected_layer(batch*steps, outputs, outputs, activation, batch_normalize, adam); + l.self_layer->batch = batch; + + l.output_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.output_layer) = make_connected_layer(batch*steps, outputs, outputs, activation, batch_normalize, adam); + l.output_layer->batch = batch; + + l.outputs = outputs; + l.output = l.output_layer->output; + l.delta = l.output_layer->delta; + + l.forward = forward_rnn_layer; + l.backward = backward_rnn_layer; + l.update = update_rnn_layer; +#ifdef GPU + l.forward_gpu = forward_rnn_layer_gpu; + l.backward_gpu = backward_rnn_layer_gpu; + l.update_gpu = update_rnn_layer_gpu; + l.state_gpu = cuda_make_array(0, batch*outputs); + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.output_gpu = l.output_layer->output_gpu; + l.delta_gpu = l.output_layer->delta_gpu; +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.input_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.input_layer->out_c, l.input_layer->out_h, l.input_layer->out_w); + cudnnSetTensor4dDescriptor(l.self_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.self_layer->out_c, l.self_layer->out_h, l.self_layer->out_w); + cudnnSetTensor4dDescriptor(l.output_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.output_layer->out_c, l.output_layer->out_h, l.output_layer->out_w); +#endif +#endif + + return l; +} + +void update_rnn_layer(layer l, update_args a) +{ + update_connected_layer(*(l.input_layer), a); + update_connected_layer(*(l.self_layer), a); + update_connected_layer(*(l.output_layer), a); +} + +void forward_rnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, self_layer.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, input_layer.delta, 1); + if(net.train) fill_cpu(l.outputs * l.batch, 0, l.state, 1); + + for (i = 0; i < l.steps; ++i) { + s.input = net.input; + forward_connected_layer(input_layer, s); + + s.input = l.state; + forward_connected_layer(self_layer, s); + + float *old_state = l.state; + if(net.train) l.state += l.outputs*l.batch; + if(l.shortcut){ + copy_cpu(l.outputs * l.batch, old_state, 1, l.state, 1); + }else{ + fill_cpu(l.outputs * l.batch, 0, l.state, 1); + } + axpy_cpu(l.outputs * l.batch, 1, input_layer.output, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + forward_connected_layer(output_layer, s); + + net.input += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_rnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + increment_layer(&input_layer, l.steps-1); + increment_layer(&self_layer, l.steps-1); + increment_layer(&output_layer, l.steps-1); + + l.state += l.outputs*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_cpu(l.outputs * l.batch, input_layer.output, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + s.delta = self_layer.delta; + backward_connected_layer(output_layer, s); + + l.state -= l.outputs*l.batch; + /* + if(i > 0){ + copy_cpu(l.outputs * l.batch, input_layer.output - l.outputs*l.batch, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output - l.outputs*l.batch, 1, l.state, 1); + }else{ + fill_cpu(l.outputs * l.batch, 0, l.state, 1); + } + */ + + s.input = l.state; + s.delta = self_layer.delta - l.outputs*l.batch; + if (i == 0) s.delta = 0; + backward_connected_layer(self_layer, s); + + copy_cpu(l.outputs*l.batch, self_layer.delta, 1, input_layer.delta, 1); + if (i > 0 && l.shortcut) axpy_cpu(l.outputs*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.outputs*l.batch, 1); + s.input = net.input + i*l.inputs*l.batch; + if(net.delta) s.delta = net.delta + i*l.inputs*l.batch; + else s.delta = 0; + backward_connected_layer(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} + +#ifdef GPU + +void pull_rnn_layer(layer l) +{ + pull_connected_layer(*(l.input_layer)); + pull_connected_layer(*(l.self_layer)); + pull_connected_layer(*(l.output_layer)); +} + +void push_rnn_layer(layer l) +{ + push_connected_layer(*(l.input_layer)); + push_connected_layer(*(l.self_layer)); + push_connected_layer(*(l.output_layer)); +} + +void update_rnn_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.input_layer), a); + update_connected_layer_gpu(*(l.self_layer), a); + update_connected_layer_gpu(*(l.output_layer), a); +} + +void forward_rnn_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_gpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, self_layer.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, input_layer.delta_gpu, 1); + + if(net.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.prev_state_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = net.input_gpu; + forward_connected_layer_gpu(input_layer, s); + + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(self_layer, s); + + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(output_layer, s); + + net.input_gpu += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_rnn_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + increment_layer(&input_layer, l.steps - 1); + increment_layer(&self_layer, l.steps - 1); + increment_layer(&output_layer, l.steps - 1); + float *last_input = input_layer.output_gpu; + float *last_self = self_layer.output_gpu; + for (i = l.steps-1; i >= 0; --i) { + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu; + backward_connected_layer_gpu(output_layer, s); + + if(i != 0) { + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + }else { + copy_gpu(l.outputs*l.batch, l.prev_state_gpu, 1, l.state_gpu, 1); + } + + copy_gpu(l.outputs*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = (i > 0) ? self_layer.delta_gpu - l.outputs*l.batch : 0; + if (i == 0) s.delta_gpu = 0; + backward_connected_layer_gpu(self_layer, s); + + s.input_gpu = net.input_gpu + i*l.inputs*l.batch; + if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch; + else s.delta_gpu = 0; + backward_connected_layer_gpu(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, last_input, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, last_self, 1, l.state_gpu, 1); +} +#endif diff --git a/workloads/realworld/standard/darknet/src/rnn_layer.h b/workloads/realworld/standard/darknet/src/rnn_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..270a63ffafca9a9adb7b995ed674f93c70bdeb51 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/rnn_layer.h @@ -0,0 +1,25 @@ + +#ifndef RNN_LAYER_H +#define RNN_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" +#define USET + +layer make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTIVATION activation, int batch_normalize, int adam); + +void forward_rnn_layer(layer l, network net); +void backward_rnn_layer(layer l, network net); +void update_rnn_layer(layer l, update_args a); + +#ifdef GPU +void forward_rnn_layer_gpu(layer l, network net); +void backward_rnn_layer_gpu(layer l, network net); +void update_rnn_layer_gpu(layer l, update_args a); +void push_rnn_layer(layer l); +void pull_rnn_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/standard/darknet/src/route_layer.c b/workloads/realworld/standard/darknet/src/route_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..608abe9a1c729eb6bdfd5e0d65c58196b51da496 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/route_layer.c @@ -0,0 +1,134 @@ +#include "route_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + +route_layer make_route_layer(int batch, int n, int *input_layers, int *input_sizes) +{ + fprintf(stderr,"route "); + route_layer l = {0}; + l.type = ROUTE; + l.batch = batch; + l.n = n; + l.input_layers = input_layers; + l.input_sizes = input_sizes; + int i; + int outputs = 0; + for(i = 0; i < n; ++i){ + fprintf(stderr," %d", input_layers[i]); + outputs += input_sizes[i]; + } + fprintf(stderr, "\n"); + l.outputs = outputs; + l.inputs = outputs; + l.delta = calloc(outputs*batch, sizeof(float)); + l.output = calloc(outputs*batch, sizeof(float));; + + l.forward = forward_route_layer; + l.backward = backward_route_layer; + #ifdef GPU + l.forward_gpu = forward_route_layer_gpu; + l.backward_gpu = backward_route_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, outputs*batch); + l.output_gpu = cuda_make_array(l.output, outputs*batch); + #endif + return l; +} + +void resize_route_layer(route_layer *l, network *net) +{ + int i; + layer first = net->layers[l->input_layers[0]]; + l->out_w = first.out_w; + l->out_h = first.out_h; + l->out_c = first.out_c; + l->outputs = first.outputs; + l->input_sizes[0] = first.outputs; + for(i = 1; i < l->n; ++i){ + int index = l->input_layers[i]; + layer next = net->layers[index]; + l->outputs += next.outputs; + l->input_sizes[i] = next.outputs; + if(next.out_w == first.out_w && next.out_h == first.out_h){ + l->out_c += next.out_c; + }else{ + printf("%d %d, %d %d\n", next.out_w, next.out_h, first.out_w, first.out_h); + l->out_h = l->out_w = l->out_c = 0; + } + } + l->inputs = l->outputs; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + +void forward_route_layer(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *input = net.layers[index].output; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1); + } + offset += input_size; + } +} + +void backward_route_layer(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *delta = net.layers[index].delta; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1); + } + offset += input_size; + } +} + +#ifdef GPU +void forward_route_layer_gpu(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *input = net.layers[index].output_gpu; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + copy_gpu(input_size, input + j*input_size, 1, l.output_gpu + offset + j*l.outputs, 1); + } + offset += input_size; + } +} + +void backward_route_layer_gpu(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *delta = net.layers[index].delta_gpu; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + axpy_gpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1); + } + offset += input_size; + } +} +#endif diff --git a/workloads/realworld/standard/darknet/src/route_layer.h b/workloads/realworld/standard/darknet/src/route_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1d40330ff30c9c93a2180a696d5f67f628ea481c --- /dev/null +++ b/workloads/realworld/standard/darknet/src/route_layer.h @@ -0,0 +1,18 @@ +#ifndef ROUTE_LAYER_H +#define ROUTE_LAYER_H +#include "network.h" +#include "layer.h" + +typedef layer route_layer; + +route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size); +void forward_route_layer(const route_layer l, network net); +void backward_route_layer(const route_layer l, network net); +void resize_route_layer(route_layer *l, network *net); + +#ifdef GPU +void forward_route_layer_gpu(const route_layer l, network net); +void backward_route_layer_gpu(const route_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/standard/darknet/src/shortcut_layer.c b/workloads/realworld/standard/darknet/src/shortcut_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..e5b9e14301c0a6b7e03b270824352f1ba40163cd --- /dev/null +++ b/workloads/realworld/standard/darknet/src/shortcut_layer.c @@ -0,0 +1,90 @@ +#include "shortcut_layer.h" +#include "cuda_dark.h" +#include "blas.h" +#include "activations.h" + +#include +#include + +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2) +{ + fprintf(stderr, "res %3d %4d x%4d x%4d -> %4d x%4d x%4d\n",index, w2,h2,c2, w,h,c); + layer l = {0}; + l.type = SHORTCUT; + l.batch = batch; + l.w = w2; + l.h = h2; + l.c = c2; + l.out_w = w; + l.out_h = h; + l.out_c = c; + l.outputs = w*h*c; + l.inputs = l.outputs; + + l.index = index; + + l.delta = calloc(l.outputs*batch, sizeof(float)); + l.output = calloc(l.outputs*batch, sizeof(float));; + + l.forward = forward_shortcut_layer; + l.backward = backward_shortcut_layer; + #ifdef GPU + l.forward_gpu = forward_shortcut_layer_gpu; + l.backward_gpu = backward_shortcut_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + #endif + return l; +} + +void resize_shortcut_layer(layer *l, int w, int h) +{ + assert(l->w == l->out_w); + assert(l->h == l->out_h); + l->w = l->out_w = w; + l->h = l->out_h = h; + l->outputs = w*h*l->out_c; + l->inputs = l->outputs; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + + +void forward_shortcut_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + shortcut_cpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output); + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_shortcut_layer(const layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + axpy_cpu(l.outputs*l.batch, l.alpha, l.delta, 1, net.delta, 1); + shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta); +} + +#ifdef GPU +void forward_shortcut_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + shortcut_gpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output_gpu); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_shortcut_layer_gpu(const layer l, network net) +{ + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + axpy_gpu(l.outputs*l.batch, l.alpha, l.delta_gpu, 1, net.delta_gpu, 1); + shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta_gpu); +} +#endif diff --git a/workloads/realworld/standard/darknet/src/shortcut_layer.h b/workloads/realworld/standard/darknet/src/shortcut_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..5f684fc1eadea2c6902be96bf4a4bf9a3b533da9 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/shortcut_layer.h @@ -0,0 +1,17 @@ +#ifndef SHORTCUT_LAYER_H +#define SHORTCUT_LAYER_H + +#include "layer.h" +#include "network.h" + +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2); +void forward_shortcut_layer(const layer l, network net); +void backward_shortcut_layer(const layer l, network net); +void resize_shortcut_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_shortcut_layer_gpu(const layer l, network net); +void backward_shortcut_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/standard/darknet/src/softmax_layer.c b/workloads/realworld/standard/darknet/src/softmax_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..569b62b14097ed226d9939d8e1f1fd2899083ee6 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/softmax_layer.c @@ -0,0 +1,107 @@ +#include "softmax_layer.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +softmax_layer make_softmax_layer(int batch, int inputs, int groups) +{ + assert(inputs%groups == 0); + fprintf(stderr, "softmax %4d\n", inputs); + softmax_layer l = {0}; + l.type = SOFTMAX; + l.batch = batch; + l.groups = groups; + l.inputs = inputs; + l.outputs = inputs; + l.loss = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_softmax_layer; + l.backward = backward_softmax_layer; + #ifdef GPU + l.forward_gpu = forward_softmax_layer_gpu; + l.backward_gpu = backward_softmax_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.loss_gpu = cuda_make_array(l.loss, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_softmax_layer(const softmax_layer l, network net) +{ + if(l.softmax_tree){ + int i; + int count = 0; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_cpu(net.input + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output + count); + count += group_size; + } + } else { + softmax_cpu(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output); + } + + if(net.truth && !l.noloss){ + softmax_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_softmax_layer(const softmax_layer l, network net) +{ + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_softmax_layer_output(const softmax_layer layer) +{ + cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch); +} + +void forward_softmax_layer_gpu(const softmax_layer l, network net) +{ + if(l.softmax_tree){ + softmax_tree(net.input_gpu, 1, l.batch, l.inputs, l.temperature, l.output_gpu, *l.softmax_tree); + /* + int i; + int count = 0; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_gpu(net.input_gpu + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output_gpu + count); + count += group_size; + } + */ + } else { + if(l.spatial){ + softmax_gpu(net.input_gpu, l.c, l.batch*l.c, l.inputs/l.c, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu); + }else{ + softmax_gpu(net.input_gpu, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu); + } + } + if(net.truth && !l.noloss){ + softmax_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu); + if(l.softmax_tree){ + mask_gpu(l.batch*l.inputs, l.delta_gpu, SECRET_NUM, net.truth_gpu, 0); + mask_gpu(l.batch*l.inputs, l.loss_gpu, SECRET_NUM, net.truth_gpu, 0); + } + cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_softmax_layer_gpu(const softmax_layer layer, network net) +{ + axpy_gpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/standard/darknet/src/softmax_layer.h b/workloads/realworld/standard/darknet/src/softmax_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..2e3ffe01a6c5d273a9f6139bc9f265cd7e2bc860 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/softmax_layer.h @@ -0,0 +1,19 @@ +#ifndef SOFTMAX_LAYER_H +#define SOFTMAX_LAYER_H +#include "layer.h" +#include "network.h" + +typedef layer softmax_layer; + +void softmax_array(float *input, int n, float temp, float *output); +softmax_layer make_softmax_layer(int batch, int inputs, int groups); +void forward_softmax_layer(const softmax_layer l, network net); +void backward_softmax_layer(const softmax_layer l, network net); + +#ifdef GPU +void pull_softmax_layer_output(const softmax_layer l); +void forward_softmax_layer_gpu(const softmax_layer l, network net); +void backward_softmax_layer_gpu(const softmax_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/standard/darknet/src/stb_image.h b/workloads/realworld/standard/darknet/src/stb_image.h new file mode 100644 index 0000000000000000000000000000000000000000..d9c21bc813f1f24de2a25ee3cc82bdce9413eaa5 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/stb_image.h @@ -0,0 +1,7462 @@ +/* stb_image - v2.19 - public domain image loader - http://nothings.org/stb + no warranty implied; use at your own risk + + Do this: + #define STB_IMAGE_IMPLEMENTATION + before you include this file in *one* C or C++ file to create the implementation. + + // i.e. it should look like this: + #include ... + #include ... + #include ... + #define STB_IMAGE_IMPLEMENTATION + #include "stb_image.h" + + You can #define STBI_ASSERT(x) before the #include to avoid using assert.h. + And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free + + + QUICK NOTES: + Primarily of interest to game developers and other people who can + avoid problematic images and only need the trivial interface + + JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) + PNG 1/2/4/8/16-bit-per-channel + + TGA (not sure what subset, if a subset) + BMP non-1bpp, non-RLE + PSD (composited view only, no extra channels, 8/16 bit-per-channel) + + GIF (*comp always reports as 4-channel) + HDR (radiance rgbE format) + PIC (Softimage PIC) + PNM (PPM and PGM binary only) + + Animated GIF still needs a proper API, but here's one way to do it: + http://gist.github.com/urraka/685d9a6340b26b830d49 + + - decode from memory or through FILE (define STBI_NO_STDIO to remove code) + - decode from arbitrary I/O callbacks + - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON) + + Full documentation under "DOCUMENTATION" below. + + +LICENSE + + See end of file for license information. + +RECENT REVISION HISTORY: + + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings + 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes + 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 + RGB-format JPEG; remove white matting in PSD; + allocate large structures on the stack; + correct channel count for PNG & BMP + 2.10 (2016-01-22) avoid warning introduced in 2.09 + 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED + + See end of file for full revision history. + + + ============================ Contributors ========================= + + Image formats Extensions, features + Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) + Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) + Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) + Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) + Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) + Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) + Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) + github:urraka (animated gif) Junggon Kim (PNM comments) + Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) + socks-the-fox (16-bit PNG) + Jeremy Sawicki (handle all ImageNet JPGs) + Optimizations & bugfixes Mikhail Morozov (1-bit BMP) + Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query) + Arseny Kapoulkine + John-Mark Allen + + Bug & warning fixes + Marc LeBlanc David Woo Guillaume George Martins Mozeiko + Christpher Lloyd Jerry Jansson Joseph Thomson Phil Jordan + Dave Moore Roy Eltham Hayaki Saito Nathan Reed + Won Chun Luke Graham Johan Duparc Nick Verigakis + the Horde3D community Thomas Ruf Ronny Chevalier github:rlyeh + Janez Zemva John Bartholomew Michal Cichon github:romigrou + Jonathan Blow Ken Hamada Tero Hanninen github:svdijk + Laurent Gomila Cort Stratton Sergio Gonzalez github:snagar + Aruelien Pocheville Thibault Reuille Cass Everitt github:Zelex + Ryamond Barbiero Paul Du Bois Engin Manap github:grim210 + Aldo Culquicondor Philipp Wiesemann Dale Weiler github:sammyhw + Oriol Ferrer Mesia Josh Tobin Matthew Gregan github:phprus + Julian Raschke Gregory Mullen Baldur Karlsson github:poppolopoppo + Christian Floisand Kevin Schmidt github:darealshinji + Blazej Dariusz Roszkowski github:Michaelangel007 +*/ + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H + +// DOCUMENTATION +// +// Limitations: +// - no 12-bit-per-channel JPEG +// - no JPEGs with arithmetic coding +// - GIF always returns *comp=4 +// +// Basic usage (see HDR discussion below for HDR usage): +// int x,y,n; +// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +// // ... process data if not NULL ... +// // ... x = width, y = height, n = # 8-bit components per pixel ... +// // ... replace '0' with '1'..'4' to force that many components per pixel +// // ... but 'n' will always be the number that it would have been if you said 0 +// stbi_image_free(data) +// +// Standard parameters: +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *channels_in_file -- outputs # of image components in image file +// int desired_channels -- if non-zero, # of image components requested in result +// +// The return value from an image loader is an 'unsigned char *' which points +// to the pixel data, or NULL on an allocation failure or if the image is +// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels, +// with each pixel consisting of N interleaved 8-bit components; the first +// pixel pointed to is top-left-most in the image. There is no padding between +// image scanlines or between pixels, regardless of format. The number of +// components N is 'desired_channels' if desired_channels is non-zero, or +// *channels_in_file otherwise. If desired_channels is non-zero, +// *channels_in_file has the number of components that _would_ have been +// output otherwise. E.g. if you set desired_channels to 4, you will always +// get RGBA output, but you can check *channels_in_file to see if it's trivially +// opaque because e.g. there were only 3 channels in the source image. +// +// An output image with N components has the following components interleaved +// in this order in each pixel: +// +// N=#comp components +// 1 grey +// 2 grey, alpha +// 3 red, green, blue +// 4 red, green, blue, alpha +// +// If image loading fails for any reason, the return value will be NULL, +// and *x, *y, *channels_in_file will be unchanged. The function +// stbi_failure_reason() can be queried for an extremely brief, end-user +// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS +// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// more user-friendly ones. +// +// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. +// +// =========================================================================== +// +// Philosophy +// +// stb libraries are designed with the following priorities: +// +// 1. easy to use +// 2. easy to maintain +// 3. good performance +// +// Sometimes I let "good performance" creep up in priority over "easy to maintain", +// and for best performance I may provide less-easy-to-use APIs that give higher +// performance, in addition to the easy to use ones. Nevertheless, it's important +// to keep in mind that from the standpoint of you, a client of this library, +// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all. +// +// Some secondary priorities arise directly from the first two, some of which +// make more explicit reasons why performance can't be emphasized. +// +// - Portable ("ease of use") +// - Small source code footprint ("easy to maintain") +// - No dependencies ("ease of use") +// +// =========================================================================== +// +// I/O callbacks +// +// I/O callbacks allow you to read from arbitrary sources, like packaged +// files or some other source. Data read from callbacks are processed +// through a small internal buffer (currently 128 bytes) to try to reduce +// overhead. +// +// The three functions you must define are "read" (reads some bytes of data), +// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end). +// +// =========================================================================== +// +// SIMD support +// +// The JPEG decoder will try to automatically use SIMD kernels on x86 when +// supported by the compiler. For ARM Neon support, you must explicitly +// request it. +// +// (The old do-it-yourself SIMD API is no longer supported in the current +// code.) +// +// On x86, SSE2 will automatically be used when available based on a run-time +// test; if not, the generic C versions are used as a fall-back. On ARM targets, +// the typical path is to have separate builds for NEON and non-NEON devices +// (at least this is true for iOS and Android). Therefore, the NEON support is +// toggled by a build flag: define STBI_NEON to get NEON loops. +// +// If for some reason you do not want to use any of SIMD code, or if +// you have issues compiling it, you can disable it entirely by +// defining STBI_NO_SIMD. +// +// =========================================================================== +// +// HDR image support (disable by defining STBI_NO_HDR) +// +// stb_image now supports loading HDR images in general, and currently +// the Radiance .HDR file format, although the support is provided +// generically. You can still load any file through the existing interface; +// if you attempt to load an HDR file, it will be automatically remapped to +// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// both of these constants can be reconfigured through this interface: +// +// stbi_hdr_to_ldr_gamma(2.2f); +// stbi_hdr_to_ldr_scale(1.0f); +// +// (note, do not use _inverse_ constants; stbi_image will invert them +// appropriately). +// +// Additionally, there is a new, parallel interface for loading files as +// (linear) floats to preserve the full dynamic range: +// +// float *data = stbi_loadf(filename, &x, &y, &n, 0); +// +// If you load LDR images through this interface, those images will +// be promoted to floating point values, run through the inverse of +// constants corresponding to the above: +// +// stbi_ldr_to_hdr_scale(1.0f); +// stbi_ldr_to_hdr_gamma(2.2f); +// +// Finally, given a filename (or an open file or memory block--see header +// file for details) containing image data, you can query for the "most +// appropriate" interface to use (that is, whether the image is HDR or +// not), using: +// +// stbi_is_hdr(char *filename); +// +// =========================================================================== +// +// iPhone PNG support: +// +// By default we convert iphone-formatted PNGs back to RGB, even though +// they are internally encoded differently. You can disable this conversion +// by by calling stbi_convert_iphone_png_to_rgb(0), in which case +// you will always just get the native iphone "format" through (which +// is BGR stored in RGB). +// +// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per +// pixel to remove any premultiplied alpha *only* if the image file explicitly +// says there's premultiplied data (currently only happens in iPhone images, +// and only if iPhone convert-to-rgb processing is on). +// +// =========================================================================== +// +// ADDITIONAL CONFIGURATION +// +// - You can suppress implementation of any of the decoders to reduce +// your code footprint by #defining one or more of the following +// symbols before creating the implementation. +// +// STBI_NO_JPEG +// STBI_NO_PNG +// STBI_NO_BMP +// STBI_NO_PSD +// STBI_NO_TGA +// STBI_NO_GIF +// STBI_NO_HDR +// STBI_NO_PIC +// STBI_NO_PNM (.ppm and .pgm) +// +// - You can request *only* certain decoders and suppress all other ones +// (this will be more forward-compatible, as addition of new decoders +// doesn't require you to disable them explicitly): +// +// STBI_ONLY_JPEG +// STBI_ONLY_PNG +// STBI_ONLY_BMP +// STBI_ONLY_PSD +// STBI_ONLY_TGA +// STBI_ONLY_GIF +// STBI_ONLY_HDR +// STBI_ONLY_PIC +// STBI_ONLY_PNM (.ppm and .pgm) +// +// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still +// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB +// + + +#ifndef STBI_NO_STDIO +#include +#endif // STBI_NO_STDIO + +#define STBI_VERSION 1 + +enum +{ + STBI_default = 0, // only used for desired_channels + + STBI_grey = 1, + STBI_grey_alpha = 2, + STBI_rgb = 3, + STBI_rgb_alpha = 4 +}; + +typedef unsigned char stbi_uc; +typedef unsigned short stbi_us; + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef STB_IMAGE_STATIC +#define STBIDEF static +#else +#define STBIDEF extern +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// PRIMARY API - works on images of any type +// + +// +// load image by filename, open file, or memory buffer +// + +typedef struct +{ + int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read + void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative + int (*eof) (void *user); // returns nonzero if we are at end of file/data +} stbi_io_callbacks; + +//////////////////////////////////// +// +// 8-bits-per-channel interface +// + +STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels); +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +#endif + + +#ifndef STBI_NO_STDIO +STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +// for stbi_load_from_file, file pointer is left pointing immediately after image +#endif + +//////////////////////////////////// +// +// 16-bits-per-channel interface +// + +STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +#endif + +//////////////////////////////////// +// +// float-per-channel interface +// +#ifndef STBI_NO_LINEAR + STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + + #ifndef STBI_NO_STDIO + STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); + #endif +#endif + +#ifndef STBI_NO_HDR + STBIDEF void stbi_hdr_to_ldr_gamma(float gamma); + STBIDEF void stbi_hdr_to_ldr_scale(float scale); +#endif // STBI_NO_HDR + +#ifndef STBI_NO_LINEAR + STBIDEF void stbi_ldr_to_hdr_gamma(float gamma); + STBIDEF void stbi_ldr_to_hdr_scale(float scale); +#endif // STBI_NO_LINEAR + +// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user); +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename); +STBIDEF int stbi_is_hdr_from_file(FILE *f); +#endif // STBI_NO_STDIO + + +// get a VERY brief reason for failure +// NOT THREADSAFE +STBIDEF const char *stbi_failure_reason (void); + +// free the loaded image -- this is just free() +STBIDEF void stbi_image_free (void *retval_from_stbi_load); + +// get image dimensions & components without fully decoding +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len); +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user); + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit (char const *filename); +STBIDEF int stbi_is_16_bit_from_file(FILE *f); +#endif + + + +// for image formats that explicitly notate that they have premultiplied alpha, +// we just return the colors as stored in the file. set this flag to force +// unpremultiplication. results are undefined if the unpremultiply overflow. +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply); + +// indicate whether we should process iphone images back to canonical format, +// or just pass them through "as-is" +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); + +// flip the image vertically, so the first pixel in the output array is the bottom left +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip); + +// ZLIB client - used by PNG, available for other purposes + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header); +STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + + +#ifdef __cplusplus +} +#endif + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBI_INCLUDE_STB_IMAGE_H + +#ifdef STB_IMAGE_IMPLEMENTATION + +#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \ + || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \ + || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \ + || defined(STBI_ONLY_ZLIB) + #ifndef STBI_ONLY_JPEG + #define STBI_NO_JPEG + #endif + #ifndef STBI_ONLY_PNG + #define STBI_NO_PNG + #endif + #ifndef STBI_ONLY_BMP + #define STBI_NO_BMP + #endif + #ifndef STBI_ONLY_PSD + #define STBI_NO_PSD + #endif + #ifndef STBI_ONLY_TGA + #define STBI_NO_TGA + #endif + #ifndef STBI_ONLY_GIF + #define STBI_NO_GIF + #endif + #ifndef STBI_ONLY_HDR + #define STBI_NO_HDR + #endif + #ifndef STBI_ONLY_PIC + #define STBI_NO_PIC + #endif + #ifndef STBI_ONLY_PNM + #define STBI_NO_PNM + #endif +#endif + +#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB) +#define STBI_NO_ZLIB +#endif + + +#include +#include // ptrdiff_t on osx +#include +#include +#include + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +#include // ldexp, pow +#endif + +#ifndef STBI_NO_STDIO +#include +#endif + +#ifndef STBI_ASSERT +#include +#define STBI_ASSERT(x) assert(x) +#endif + + +#ifndef _MSC_VER + #ifdef __cplusplus + #define stbi_inline inline + #else + #define stbi_inline + #endif +#else + #define stbi_inline __forceinline +#endif + + +#ifdef _MSC_VER +typedef unsigned short stbi__uint16; +typedef signed short stbi__int16; +typedef unsigned int stbi__uint32; +typedef signed int stbi__int32; +#else +#include +typedef uint16_t stbi__uint16; +typedef int16_t stbi__int16; +typedef uint32_t stbi__uint32; +typedef int32_t stbi__int32; +#endif + +// should produce compiler error if size is wrong +typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; + +#ifdef _MSC_VER +#define STBI_NOTUSED(v) (void)(v) +#else +#define STBI_NOTUSED(v) (void)sizeof(v) +#endif + +#ifdef _MSC_VER +#define STBI_HAS_LROTL +#endif + +#ifdef STBI_HAS_LROTL + #define stbi_lrot(x,y) _lrotl(x,y) +#else + #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y)))) +#endif + +#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) +// ok +#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)." +#endif + +#ifndef STBI_MALLOC +#define STBI_MALLOC(sz) malloc(sz) +#define STBI_REALLOC(p,newsz) realloc(p,newsz) +#define STBI_FREE(p) free(p) +#endif + +#ifndef STBI_REALLOC_SIZED +#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz) +#endif + +// x86/x64 detection +#if defined(__x86_64__) || defined(_M_X64) +#define STBI__X64_TARGET +#elif defined(__i386) || defined(_M_IX86) +#define STBI__X86_TARGET +#endif + +#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) +// gcc doesn't support sse2 intrinsics unless you compile with -msse2, +// which in turn means it gets to use SSE2 everywhere. This is unfortunate, +// but previous attempts to provide the SSE2 functions with runtime +// detection caused numerous issues. The way architecture extensions are +// exposed in GCC/Clang is, sadly, not really suited for one-file libs. +// New behavior: if compiled with -msse2, we use SSE2 without any +// detection; if not, we don't use it at all. +#define STBI_NO_SIMD +#endif + +#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD) +// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET +// +// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the +// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant. +// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not +// simultaneously enabling "-mstackrealign". +// +// See https://github.com/nothings/stb/issues/81 for more information. +// +// So default to no SSE2 on 32-bit MinGW. If you've read this far and added +// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2. +#define STBI_NO_SIMD +#endif + +#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) +#define STBI_SSE2 +#include + +#ifdef _MSC_VER + +#if _MSC_VER >= 1400 // not VC6 +#include // __cpuid +static int stbi__cpuid3(void) +{ + int info[4]; + __cpuid(info,1); + return info[3]; +} +#else +static int stbi__cpuid3(void) +{ + int res; + __asm { + mov eax,1 + cpuid + mov res,edx + } + return res; +} +#endif + +#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name + +static int stbi__sse2_available(void) +{ + int info3 = stbi__cpuid3(); + return ((info3 >> 26) & 1) != 0; +} +#else // assume GCC-style if not VC++ +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) + +static int stbi__sse2_available(void) +{ + // If we're even attempting to compile this on GCC/Clang, that means + // -msse2 is on, which means the compiler is allowed to use SSE2 + // instructions at will, and so are we. + return 1; +} +#endif +#endif + +// ARM NEON +#if defined(STBI_NO_SIMD) && defined(STBI_NEON) +#undef STBI_NEON +#endif + +#ifdef STBI_NEON +#include +// assume GCC or Clang on ARM targets +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) +#endif + +#ifndef STBI_SIMD_ALIGN +#define STBI_SIMD_ALIGN(type, name) type name +#endif + +/////////////////////////////////////////////// +// +// stbi__context struct and start_xxx functions + +// stbi__context structure is our basic context used by all images, so it +// contains all the IO context, plus some basic image information +typedef struct +{ + stbi__uint32 img_x, img_y; + int img_n, img_out_n; + + stbi_io_callbacks io; + void *io_user_data; + + int read_from_callbacks; + int buflen; + stbi_uc buffer_start[128]; + + stbi_uc *img_buffer, *img_buffer_end; + stbi_uc *img_buffer_original, *img_buffer_original_end; +} stbi__context; + + +static void stbi__refill_buffer(stbi__context *s); + +// initialize a memory-decode context +static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len) +{ + s->io.read = NULL; + s->read_from_callbacks = 0; + s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer; + s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len; +} + +// initialize a callback-based context +static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user) +{ + s->io = *c; + s->io_user_data = user; + s->buflen = sizeof(s->buffer_start); + s->read_from_callbacks = 1; + s->img_buffer_original = s->buffer_start; + stbi__refill_buffer(s); + s->img_buffer_original_end = s->img_buffer_end; +} + +#ifndef STBI_NO_STDIO + +static int stbi__stdio_read(void *user, char *data, int size) +{ + return (int) fread(data,1,size,(FILE*) user); +} + +static void stbi__stdio_skip(void *user, int n) +{ + fseek((FILE*) user, n, SEEK_CUR); +} + +static int stbi__stdio_eof(void *user) +{ + return feof((FILE*) user); +} + +static stbi_io_callbacks stbi__stdio_callbacks = +{ + stbi__stdio_read, + stbi__stdio_skip, + stbi__stdio_eof, +}; + +static void stbi__start_file(stbi__context *s, FILE *f) +{ + stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f); +} + +//static void stop_file(stbi__context *s) { } + +#endif // !STBI_NO_STDIO + +static void stbi__rewind(stbi__context *s) +{ + // conceptually rewind SHOULD rewind to the beginning of the stream, + // but we just rewind to the beginning of the initial buffer, because + // we only use it after doing 'test', which only ever looks at at most 92 bytes + s->img_buffer = s->img_buffer_original; + s->img_buffer_end = s->img_buffer_original_end; +} + +enum +{ + STBI_ORDER_RGB, + STBI_ORDER_BGR +}; + +typedef struct +{ + int bits_per_channel; + int num_channels; + int channel_order; +} stbi__result_info; + +#ifndef STBI_NO_JPEG +static int stbi__jpeg_test(stbi__context *s); +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNG +static int stbi__png_test(stbi__context *s); +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__png_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_BMP +static int stbi__bmp_test(stbi__context *s); +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_TGA +static int stbi__tga_test(stbi__context *s); +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s); +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__psd_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_HDR +static int stbi__hdr_test(stbi__context *s); +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_test(stbi__context *s); +static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_GIF +static int stbi__gif_test(stbi__context *s); +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNM +static int stbi__pnm_test(stbi__context *s); +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +// this is not threadsafe +static const char *stbi__g_failure_reason; + +STBIDEF const char *stbi_failure_reason(void) +{ + return stbi__g_failure_reason; +} + +static int stbi__err(const char *str) +{ + stbi__g_failure_reason = str; + return 0; +} + +static void *stbi__malloc(size_t size) +{ + return STBI_MALLOC(size); +} + +// stb_image uses ints pervasively, including for offset calculations. +// therefore the largest decoded image size we can support with the +// current code, even on 64-bit targets, is INT_MAX. this is not a +// significant limitation for the intended use case. +// +// we do, however, need to make sure our size calculations don't +// overflow. hence a few helper functions for size calculations that +// multiply integers together, making sure that they're non-negative +// and no overflow occurs. + +// return 1 if the sum is valid, 0 on overflow. +// negative terms are considered invalid. +static int stbi__addsizes_valid(int a, int b) +{ + if (b < 0) return 0; + // now 0 <= b <= INT_MAX, hence also + // 0 <= INT_MAX - b <= INTMAX. + // And "a + b <= INT_MAX" (which might overflow) is the + // same as a <= INT_MAX - b (no overflow) + return a <= INT_MAX - b; +} + +// returns 1 if the product is valid, 0 on overflow. +// negative factors are considered invalid. +static int stbi__mul2sizes_valid(int a, int b) +{ + if (a < 0 || b < 0) return 0; + if (b == 0) return 1; // mul-by-0 is always safe + // portable way to check for no overflows in a*b + return a <= INT_MAX/b; +} + +// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow +static int stbi__mad2sizes_valid(int a, int b, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add); +} + +// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow +static int stbi__mad3sizes_valid(int a, int b, int c, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__addsizes_valid(a*b*c, add); +} + +// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); +} +#endif + +// mallocs with size overflow checking +static void *stbi__malloc_mad2(int a, int b, int add) +{ + if (!stbi__mad2sizes_valid(a, b, add)) return NULL; + return stbi__malloc(a*b + add); +} + +static void *stbi__malloc_mad3(int a, int b, int c, int add) +{ + if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL; + return stbi__malloc(a*b*c + add); +} + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static void *stbi__malloc_mad4(int a, int b, int c, int d, int add) +{ + if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL; + return stbi__malloc(a*b*c*d + add); +} +#endif + +// stbi__err - error +// stbi__errpf - error returning pointer to float +// stbi__errpuc - error returning pointer to unsigned char + +#ifdef STBI_NO_FAILURE_STRINGS + #define stbi__err(x,y) 0 +#elif defined(STBI_FAILURE_USERMSG) + #define stbi__err(x,y) stbi__err(y) +#else + #define stbi__err(x,y) stbi__err(x) +#endif + +#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL)) +#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL)) + +STBIDEF void stbi_image_free(void *retval_from_stbi_load) +{ + STBI_FREE(retval_from_stbi_load); +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp); +#endif + +#ifndef STBI_NO_HDR +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp); +#endif + +static int stbi__vertically_flip_on_load = 0; + +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) +{ + stbi__vertically_flip_on_load = flag_true_if_should_flip; +} + +static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields + ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed + ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order + ri->num_channels = 0; + + #ifndef STBI_NO_JPEG + if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNG + if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_BMP + if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_GIF + if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PSD + if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc); + #endif + #ifndef STBI_NO_PIC + if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNM + if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri); + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); + return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + + #ifndef STBI_NO_TGA + // test tga last because it's a crappy test! + if (stbi__tga_test(s)) + return stbi__tga_load(s,x,y,comp,req_comp, ri); + #endif + + return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); +} + +static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi_uc *reduced; + + reduced = (stbi_uc *) stbi__malloc(img_len); + if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling + + STBI_FREE(orig); + return reduced; +} + +static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi__uint16 *enlarged; + + enlarged = (stbi__uint16 *) stbi__malloc(img_len*2); + if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff + + STBI_FREE(orig); + return enlarged; +} + +static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel) +{ + int row; + size_t bytes_per_row = (size_t)w * bytes_per_pixel; + stbi_uc temp[2048]; + stbi_uc *bytes = (stbi_uc *)image; + + for (row = 0; row < (h>>1); row++) { + stbi_uc *row0 = bytes + row*bytes_per_row; + stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; + // swap row0 with row1 + size_t bytes_left = bytes_per_row; + while (bytes_left) { + size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); + memcpy(temp, row0, bytes_copy); + memcpy(row0, row1, bytes_copy); + memcpy(row1, temp, bytes_copy); + row0 += bytes_copy; + row1 += bytes_copy; + bytes_left -= bytes_copy; + } + } +} + +static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel) +{ + int slice; + int slice_size = w * h * bytes_per_pixel; + + stbi_uc *bytes = (stbi_uc *)image; + for (slice = 0; slice < z; ++slice) { + stbi__vertical_flip(bytes, w, h, bytes_per_pixel); + bytes += slice_size; + } +} + +static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); + + if (result == NULL) + return NULL; + + if (ri.bits_per_channel != 8) { + STBI_ASSERT(ri.bits_per_channel == 16); + result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 8; + } + + // @TODO: move stbi__convert_format to here + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); + } + + return (unsigned char *) result; +} + +static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); + + if (result == NULL) + return NULL; + + if (ri.bits_per_channel != 16) { + STBI_ASSERT(ri.bits_per_channel == 8); + result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 16; + } + + // @TODO: move stbi__convert_format16 to here + // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); + } + + return (stbi__uint16 *) result; +} + +#if !defined(STBI_NO_HDR) || !defined(STBI_NO_LINEAR) +static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp) +{ + if (stbi__vertically_flip_on_load && result != NULL) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); + } +} +#endif + +#ifndef STBI_NO_STDIO + +static FILE *stbi__fopen(char const *filename, char const *mode) +{ + FILE *f; +#if defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != fopen_s(&f, filename, mode)) + f=0; +#else + f = fopen(filename, mode); +#endif + return f; +} + + +STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + unsigned char *result; + if (!f) return stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__uint16 *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + stbi__uint16 *result; + if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file_16(f,x,y,comp,req_comp); + fclose(f); + return result; +} + + +#endif //!STBI_NO_STDIO + +STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_mem(&s,buffer,len); + + result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp); + if (stbi__vertically_flip_on_load) { + stbi__vertical_flip_slices( result, *x, *y, *z, *comp ); + } + + return result; +} +#endif + +#ifndef STBI_NO_LINEAR +static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + stbi__result_info ri; + float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); + if (hdr_data) + stbi__float_postprocess(hdr_data,x,y,comp,req_comp); + return hdr_data; + } + #endif + data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp); + if (data) + return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); +} + +STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + float *result; + FILE *f = stbi__fopen(filename, "rb"); + if (!f) return stbi__errpf("can't fopen", "Unable to open file"); + result = stbi_loadf_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_file(&s,f); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} +#endif // !STBI_NO_STDIO + +#endif // !STBI_NO_LINEAR + +// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is +// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always +// reports false! + +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(buffer); + STBI_NOTUSED(len); + return 0; + #endif +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result=0; + if (f) { + result = stbi_is_hdr_from_file(f); + fclose(f); + } + return result; +} + +STBIDEF int stbi_is_hdr_from_file(FILE *f) +{ + #ifndef STBI_NO_HDR + long pos = ftell(f); + int res; + stbi__context s; + stbi__start_file(&s,f); + res = stbi__hdr_test(&s); + fseek(f, pos, SEEK_SET); + return res; + #else + STBI_NOTUSED(f); + return 0; + #endif +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(clbk); + STBI_NOTUSED(user); + return 0; + #endif +} + +#ifndef STBI_NO_LINEAR +static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f; + +STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; } +STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; } +#endif + +static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f; + +STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; } +STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; } + + +////////////////////////////////////////////////////////////////////////////// +// +// Common code used by all image loaders +// + +enum +{ + STBI__SCAN_load=0, + STBI__SCAN_type, + STBI__SCAN_header +}; + +static void stbi__refill_buffer(stbi__context *s) +{ + int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen); + if (n == 0) { + // at end of file, treat same as if from memory, but need to handle case + // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file + s->read_from_callbacks = 0; + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start+1; + *s->img_buffer = 0; + } else { + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start + n; + } +} + +stbi_inline static stbi_uc stbi__get8(stbi__context *s) +{ + if (s->img_buffer < s->img_buffer_end) + return *s->img_buffer++; + if (s->read_from_callbacks) { + stbi__refill_buffer(s); + return *s->img_buffer++; + } + return 0; +} + +stbi_inline static int stbi__at_eof(stbi__context *s) +{ + if (s->io.read) { + if (!(s->io.eof)(s->io_user_data)) return 0; + // if feof() is true, check if buffer = end + // special case: we've only got the special 0 character at the end + if (s->read_from_callbacks == 0) return 1; + } + + return s->img_buffer >= s->img_buffer_end; +} + +static void stbi__skip(stbi__context *s, int n) +{ + if (n < 0) { + s->img_buffer = s->img_buffer_end; + return; + } + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + s->img_buffer = s->img_buffer_end; + (s->io.skip)(s->io_user_data, n - blen); + return; + } + } + s->img_buffer += n; +} + +static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) +{ + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + int res, count; + + memcpy(buffer, s->img_buffer, blen); + + count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen); + res = (count == (n-blen)); + s->img_buffer = s->img_buffer_end; + return res; + } + } + + if (s->img_buffer+n <= s->img_buffer_end) { + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; + return 1; + } else + return 0; +} + +static int stbi__get16be(stbi__context *s) +{ + int z = stbi__get8(s); + return (z << 8) + stbi__get8(s); +} + +static stbi__uint32 stbi__get32be(stbi__context *s) +{ + stbi__uint32 z = stbi__get16be(s); + return (z << 16) + stbi__get16be(s); +} + +#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) +// nothing +#else +static int stbi__get16le(stbi__context *s) +{ + int z = stbi__get8(s); + return z + (stbi__get8(s) << 8); +} +#endif + +#ifndef STBI_NO_BMP +static stbi__uint32 stbi__get32le(stbi__context *s) +{ + stbi__uint32 z = stbi__get16le(s); + return z + (stbi__get16le(s) << 16); +} +#endif + +#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings + + +////////////////////////////////////////////////////////////////////////////// +// +// generic converter from built-in img_n to req_comp +// individual types do this automatically as much as possible (e.g. jpeg +// does all cases internally since it needs to colorspace convert anyway, +// and it never has alpha, so very few cases ). png can automatically +// interleave an alpha=255 channel, but falls back to this for other cases +// +// assume data buffer is malloced, so malloc a new one and free that one +// only failure mode is malloc failing + +static stbi_uc stbi__compute_y(int r, int g, int b) +{ + return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + unsigned char *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0); + if (good == NULL) { + STBI_FREE(data); + return stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; + default: STBI_ASSERT(0); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} + +static stbi__uint16 stbi__compute_y_16(int r, int g, int b) +{ + return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + stbi__uint16 *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2); + if (good == NULL) { + STBI_FREE(data); + return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + stbi__uint16 *src = data + j * x * img_n ; + stbi__uint16 *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; + default: STBI_ASSERT(0); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) +{ + int i,k,n; + float *output; + if (!data) return NULL; + output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); + } + if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f; + } + STBI_FREE(data); + return output; +} +#endif + +#ifndef STBI_NO_HDR +#define stbi__float2int(x) ((int) (x)) +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) +{ + int i,k,n; + stbi_uc *output; + if (!data) return NULL; + output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + if (k < comp) { + float z = data[i*comp+k] * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + } + STBI_FREE(data); + return output; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// "baseline" JPEG/JFIF decoder +// +// simple implementation +// - doesn't support delayed output of y-dimension +// - simple interface (only one output format: 8-bit interleaved RGB) +// - doesn't try to recover corrupt jpegs +// - doesn't allow partial loading, loading multiple at once +// - still fast on x86 (copying globals into locals doesn't help x86) +// - allocates lots of intermediate memory (full size of all components) +// - non-interleaved case requires this anyway +// - allows good upsampling (see next) +// high-quality +// - upsampled channels are bilinearly interpolated, even across blocks +// - quality integer IDCT derived from IJG's 'slow' +// performance +// - fast huffman; reasonable integer IDCT +// - some SIMD kernels for common paths on targets with SSE2/NEON +// - uses a lot of intermediate memory, could cache poorly + +#ifndef STBI_NO_JPEG + +// huffman decoding acceleration +#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache + +typedef struct +{ + stbi_uc fast[1 << FAST_BITS]; + // weirdly, repacking this into AoS is a 10% speed loss, instead of a win + stbi__uint16 code[256]; + stbi_uc values[256]; + stbi_uc size[257]; + unsigned int maxcode[18]; + int delta[17]; // old 'firstsymbol' - old 'firstcode' +} stbi__huffman; + +typedef struct +{ + stbi__context *s; + stbi__huffman huff_dc[4]; + stbi__huffman huff_ac[4]; + stbi__uint16 dequant[4][64]; + stbi__int16 fast_ac[4][1 << FAST_BITS]; + +// sizes for components, interleaved MCUs + int img_h_max, img_v_max; + int img_mcu_x, img_mcu_y; + int img_mcu_w, img_mcu_h; + +// definition of jpeg image component + struct + { + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + stbi_uc *data; + void *raw_data, *raw_coeff; + stbi_uc *linebuf; + short *coeff; // progressive only + int coeff_w, coeff_h; // number of 8x8 coefficient blocks + } img_comp[4]; + + stbi__uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int progressive; + int spec_start; + int spec_end; + int succ_high; + int succ_low; + int eob_run; + int jfif; + int app14_color_transform; // Adobe APP14 tag + int rgb; + + int scan_n, order[4]; + int restart_interval, todo; + +// kernels + void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]); + void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step); + stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs); +} stbi__jpeg; + +static int stbi__build_huffman(stbi__huffman *h, int *count) +{ + int i,j,k=0; + unsigned int code; + // build size list for each symbol (from JPEG spec) + for (i=0; i < 16; ++i) + for (j=0; j < count[i]; ++j) + h->size[k++] = (stbi_uc) (i+1); + h->size[k] = 0; + + // compute actual symbols (from jpeg spec) + code = 0; + k = 0; + for(j=1; j <= 16; ++j) { + // compute delta to add to code to compute symbol id + h->delta[j] = k - code; + if (h->size[k] == j) { + while (h->size[k] == j) + h->code[k++] = (stbi__uint16) (code++); + if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG"); + } + // compute largest code + 1 for this size, preshifted as needed later + h->maxcode[j] = code << (16-j); + code <<= 1; + } + h->maxcode[j] = 0xffffffff; + + // build non-spec acceleration table; 255 is flag for not-accelerated + memset(h->fast, 255, 1 << FAST_BITS); + for (i=0; i < k; ++i) { + int s = h->size[i]; + if (s <= FAST_BITS) { + int c = h->code[i] << (FAST_BITS-s); + int m = 1 << (FAST_BITS-s); + for (j=0; j < m; ++j) { + h->fast[c+j] = (stbi_uc) i; + } + } + } + return 1; +} + +// build a table that decodes both magnitude and value of small ACs in +// one go. +static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) +{ + int i; + for (i=0; i < (1 << FAST_BITS); ++i) { + stbi_uc fast = h->fast[i]; + fast_ac[i] = 0; + if (fast < 255) { + int rs = h->values[fast]; + int run = (rs >> 4) & 15; + int magbits = rs & 15; + int len = h->size[fast]; + + if (magbits && len + magbits <= FAST_BITS) { + // magnitude code followed by receive_extend code + int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); + int m = 1 << (magbits - 1); + if (k < m) k += (~0U << magbits) + 1; + // if the result is small enough, we can fit it in fast_ac table + if (k >= -128 && k <= 127) + fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits)); + } + } + } +} + +static void stbi__grow_buffer_unsafe(stbi__jpeg *j) +{ + do { + unsigned int b = j->nomore ? 0 : stbi__get8(j->s); + if (b == 0xff) { + int c = stbi__get8(j->s); + while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes + if (c != 0) { + j->marker = (unsigned char) c; + j->nomore = 1; + return; + } + } + j->code_buffer |= b << (24 - j->code_bits); + j->code_bits += 8; + } while (j->code_bits <= 24); +} + +// (1 << n) - 1 +static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; + +// decode a jpeg huffman value from the bitstream +stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) +{ + unsigned int temp; + int c,k; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + // look at the top FAST_BITS and determine what symbol ID it is, + // if the code is <= FAST_BITS + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + k = h->fast[c]; + if (k < 255) { + int s = h->size[k]; + if (s > j->code_bits) + return -1; + j->code_buffer <<= s; + j->code_bits -= s; + return h->values[k]; + } + + // naive test is to shift the code_buffer down so k bits are + // valid, then test against maxcode. To speed this up, we've + // preshifted maxcode left so that it has (16-k) 0s at the + // end; in other words, regardless of the number of bits, it + // wants to be compared against something shifted to have 16; + // that way we don't need to shift inside the loop. + temp = j->code_buffer >> 16; + for (k=FAST_BITS+1 ; ; ++k) + if (temp < h->maxcode[k]) + break; + if (k == 17) { + // error! code not found + j->code_bits -= 16; + return -1; + } + + if (k > j->code_bits) + return -1; + + // convert the huffman code to the symbol id + c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; + STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]); + + // convert the id to a symbol + j->code_bits -= k; + j->code_buffer <<= k; + return h->values[c]; +} + +// bias[n] = (-1<code_bits < n) stbi__grow_buffer_unsafe(j); + + sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB + k = stbi_lrot(j->code_buffer, n); + STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask))); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k + (stbi__jbias[n] & ~sgn); +} + +// get some unsigned bits +stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n) +{ + unsigned int k; + if (j->code_bits < n) stbi__grow_buffer_unsafe(j); + k = stbi_lrot(j->code_buffer, n); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k; +} + +stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) +{ + unsigned int k; + if (j->code_bits < 1) stbi__grow_buffer_unsafe(j); + k = j->code_buffer; + j->code_buffer <<= 1; + --j->code_bits; + return k & 0x80000000; +} + +// given a value that's at position X in the zigzag stream, +// where does it appear in the 8x8 matrix coded as row-major? +static const stbi_uc stbi__jpeg_dezigzag[64+15] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // let corrupt input sample past end + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63 +}; + +// decode one 64-entry block-- +static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant) +{ + int diff,dc,k; + int t; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + t = stbi__jpeg_huff_decode(j, hdc); + if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + + // 0 all the ac values now so we can do it 32-bits at a time + memset(data,0,64*sizeof(data[0])); + + diff = t ? stbi__extend_receive(j, t) : 0; + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc * dequant[0]); + + // decode AC components, see JPEG spec + k = 1; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) * dequant[zig]); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (rs != 0xf0) break; // end block + k += 16; + } else { + k += r; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]); + } + } + } while (k < 64); + return 1; +} + +static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b) +{ + int diff,dc; + int t; + if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + if (j->succ_high == 0) { + // first scan for DC coefficient, must be first + memset(data,0,64*sizeof(data[0])); // 0 all the ac values now + t = stbi__jpeg_huff_decode(j, hdc); + diff = t ? stbi__extend_receive(j, t) : 0; + + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc << j->succ_low); + } else { + // refinement scan for DC coefficient + if (stbi__jpeg_get_bit(j)) + data[0] += (short) (1 << j->succ_low); + } + return 1; +} + +// @OPTIMIZE: store non-zigzagged during the decode passes, +// and only de-zigzag when dequantizing +static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac) +{ + int k; + if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->succ_high == 0) { + int shift = j->succ_low; + + if (j->eob_run) { + --j->eob_run; + return 1; + } + + k = j->spec_start; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) << shift); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r); + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + --j->eob_run; + break; + } + k += 16; + } else { + k += r; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) << shift); + } + } + } while (k <= j->spec_end); + } else { + // refinement scan for these AC coefficients + + short bit = (short) (1 << j->succ_low); + + if (j->eob_run) { + --j->eob_run; + for (k = j->spec_start; k <= j->spec_end; ++k) { + short *p = &data[stbi__jpeg_dezigzag[k]]; + if (*p != 0) + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } + } else { + k = j->spec_start; + do { + int r,s; + int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r) - 1; + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + r = 64; // force end of block + } else { + // r=15 s=0 should write 16 0s, so we just do + // a run of 15 0s and then write s (which is 0), + // so we don't have to do anything special here + } + } else { + if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG"); + // sign bit + if (stbi__jpeg_get_bit(j)) + s = bit; + else + s = -bit; + } + + // advance by r + while (k <= j->spec_end) { + short *p = &data[stbi__jpeg_dezigzag[k++]]; + if (*p != 0) { + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } else { + if (r == 0) { + *p = (short) s; + break; + } + --r; + } + } + } while (k <= j->spec_end); + } + } + return 1; +} + +// take a -128..127 value and stbi__clamp it and convert to 0..255 +stbi_inline static stbi_uc stbi__clamp(int x) +{ + // trick to use a single test to catch both cases + if ((unsigned int) x > 255) { + if (x < 0) return 0; + if (x > 255) return 255; + } + return (stbi_uc) x; +} + +#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5))) +#define stbi__fsh(x) ((x) * 4096) + +// derived from jidctint -- DCT_ISLOW +#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ + int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2+p3) * stbi__f2f(0.5411961f); \ + t2 = p1 + p3*stbi__f2f(-1.847759065f); \ + t3 = p1 + p2*stbi__f2f( 0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = stbi__fsh(p2+p3); \ + t1 = stbi__fsh(p2-p3); \ + x0 = t0+t3; \ + x3 = t0-t3; \ + x1 = t1+t2; \ + x2 = t1-t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0+t2; \ + p4 = t1+t3; \ + p1 = t0+t3; \ + p2 = t1+t2; \ + p5 = (p3+p4)*stbi__f2f( 1.175875602f); \ + t0 = t0*stbi__f2f( 0.298631336f); \ + t1 = t1*stbi__f2f( 2.053119869f); \ + t2 = t2*stbi__f2f( 3.072711026f); \ + t3 = t3*stbi__f2f( 1.501321110f); \ + p1 = p5 + p1*stbi__f2f(-0.899976223f); \ + p2 = p5 + p2*stbi__f2f(-2.562915447f); \ + p3 = p3*stbi__f2f(-1.961570560f); \ + p4 = p4*stbi__f2f(-0.390180644f); \ + t3 += p1+p4; \ + t2 += p2+p3; \ + t1 += p2+p4; \ + t0 += p1+p3; + +static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) +{ + int i,val[64],*v=val; + stbi_uc *o; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0]*4; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + x0 += 65536 + (128<<17); + x1 += 65536 + (128<<17); + x2 += 65536 + (128<<17); + x3 += 65536 + (128<<17); + // tried computing the shifts into temps, or'ing the temps to see + // if any were out of range, but that was slower + o[0] = stbi__clamp((x0+t3) >> 17); + o[7] = stbi__clamp((x0-t3) >> 17); + o[1] = stbi__clamp((x1+t2) >> 17); + o[6] = stbi__clamp((x1-t2) >> 17); + o[2] = stbi__clamp((x2+t1) >> 17); + o[5] = stbi__clamp((x2-t1) >> 17); + o[3] = stbi__clamp((x3+t0) >> 17); + o[4] = stbi__clamp((x3-t0) >> 17); + } +} + +#ifdef STBI_SSE2 +// sse2 integer IDCT. not the fastest possible implementation but it +// produces bit-identical results to the generic C version so it's +// fully "transparent". +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + // This is constructed to match our regular (generic) integer IDCT exactly. + __m128i row0, row1, row2, row3, row4, row5, row6, row7; + __m128i tmp; + + // dot product constant: even elems=x, odd elems=y + #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y)) + + // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit) + // out(1) = c1[even]*x + c1[odd]*y + #define dct_rot(out0,out1, x,y,c0,c1) \ + __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \ + __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \ + __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ + __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ + __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ + __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) + + // out = in << 12 (in 16-bit, out 32-bit) + #define dct_widen(out, in) \ + __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ + __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) + + // wide add + #define dct_wadd(out, a, b) \ + __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_add_epi32(a##_h, b##_h) + + // wide sub + #define dct_wsub(out, a, b) \ + __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) + + // butterfly a/b, add bias, then shift by "s" and pack + #define dct_bfly32o(out0, out1, a,b,bias,s) \ + { \ + __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ + __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ + dct_wadd(sum, abiased, b); \ + dct_wsub(dif, abiased, b); \ + out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ + out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ + } + + // 8-bit interleave step (for transposes) + #define dct_interleave8(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi8(a, b); \ + b = _mm_unpackhi_epi8(tmp, b) + + // 16-bit interleave step (for transposes) + #define dct_interleave16(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi16(a, b); \ + b = _mm_unpackhi_epi16(tmp, b) + + #define dct_pass(bias,shift) \ + { \ + /* even part */ \ + dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \ + __m128i sum04 = _mm_add_epi16(row0, row4); \ + __m128i dif04 = _mm_sub_epi16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ + dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \ + __m128i sum17 = _mm_add_epi16(row1, row7); \ + __m128i sum35 = _mm_add_epi16(row3, row5); \ + dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \ + dct_wadd(x4, y0o, y4o); \ + dct_wadd(x5, y1o, y5o); \ + dct_wadd(x6, y2o, y5o); \ + dct_wadd(x7, y3o, y4o); \ + dct_bfly32o(row0,row7, x0,x7,bias,shift); \ + dct_bfly32o(row1,row6, x1,x6,bias,shift); \ + dct_bfly32o(row2,row5, x2,x5,bias,shift); \ + dct_bfly32o(row3,row4, x3,x4,bias,shift); \ + } + + __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); + __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f)); + __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f)); + __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f)); + __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f)); + __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f)); + __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f)); + __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f)); + + // rounding biases in column/row passes, see stbi__idct_block for explanation. + __m128i bias_0 = _mm_set1_epi32(512); + __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17)); + + // load + row0 = _mm_load_si128((const __m128i *) (data + 0*8)); + row1 = _mm_load_si128((const __m128i *) (data + 1*8)); + row2 = _mm_load_si128((const __m128i *) (data + 2*8)); + row3 = _mm_load_si128((const __m128i *) (data + 3*8)); + row4 = _mm_load_si128((const __m128i *) (data + 4*8)); + row5 = _mm_load_si128((const __m128i *) (data + 5*8)); + row6 = _mm_load_si128((const __m128i *) (data + 6*8)); + row7 = _mm_load_si128((const __m128i *) (data + 7*8)); + + // column pass + dct_pass(bias_0, 10); + + { + // 16bit 8x8 transpose pass 1 + dct_interleave16(row0, row4); + dct_interleave16(row1, row5); + dct_interleave16(row2, row6); + dct_interleave16(row3, row7); + + // transpose pass 2 + dct_interleave16(row0, row2); + dct_interleave16(row1, row3); + dct_interleave16(row4, row6); + dct_interleave16(row5, row7); + + // transpose pass 3 + dct_interleave16(row0, row1); + dct_interleave16(row2, row3); + dct_interleave16(row4, row5); + dct_interleave16(row6, row7); + } + + // row pass + dct_pass(bias_1, 17); + + { + // pack + __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 + __m128i p1 = _mm_packus_epi16(row2, row3); + __m128i p2 = _mm_packus_epi16(row4, row5); + __m128i p3 = _mm_packus_epi16(row6, row7); + + // 8bit 8x8 transpose pass 1 + dct_interleave8(p0, p2); // a0e0a1e1... + dct_interleave8(p1, p3); // c0g0c1g1... + + // transpose pass 2 + dct_interleave8(p0, p1); // a0c0e0g0... + dct_interleave8(p2, p3); // b0d0f0h0... + + // transpose pass 3 + dct_interleave8(p0, p2); // a0b0c0d0... + dct_interleave8(p1, p3); // a4b4c4d4... + + // store + _mm_storel_epi64((__m128i *) out, p0); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p2); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p1); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p3); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e)); + } + +#undef dct_const +#undef dct_rot +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_interleave8 +#undef dct_interleave16 +#undef dct_pass +} + +#endif // STBI_SSE2 + +#ifdef STBI_NEON + +// NEON integer IDCT. should produce bit-identical +// results to the generic C version. +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + int16x8_t row0, row1, row2, row3, row4, row5, row6, row7; + + int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f)); + int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f)); + int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f)); + int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f)); + int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f)); + int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f)); + int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f)); + int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f)); + int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f)); + int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f)); + int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f)); + int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f)); + +#define dct_long_mul(out, inq, coeff) \ + int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff) + +#define dct_long_mac(out, acc, inq, coeff) \ + int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff) + +#define dct_widen(out, inq) \ + int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \ + int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12) + +// wide add +#define dct_wadd(out, a, b) \ + int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vaddq_s32(a##_h, b##_h) + +// wide sub +#define dct_wsub(out, a, b) \ + int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vsubq_s32(a##_h, b##_h) + +// butterfly a/b, then shift using "shiftop" by "s" and pack +#define dct_bfly32o(out0,out1, a,b,shiftop,s) \ + { \ + dct_wadd(sum, a, b); \ + dct_wsub(dif, a, b); \ + out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ + out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ + } + +#define dct_pass(shiftop, shift) \ + { \ + /* even part */ \ + int16x8_t sum26 = vaddq_s16(row2, row6); \ + dct_long_mul(p1e, sum26, rot0_0); \ + dct_long_mac(t2e, p1e, row6, rot0_1); \ + dct_long_mac(t3e, p1e, row2, rot0_2); \ + int16x8_t sum04 = vaddq_s16(row0, row4); \ + int16x8_t dif04 = vsubq_s16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + int16x8_t sum15 = vaddq_s16(row1, row5); \ + int16x8_t sum17 = vaddq_s16(row1, row7); \ + int16x8_t sum35 = vaddq_s16(row3, row5); \ + int16x8_t sum37 = vaddq_s16(row3, row7); \ + int16x8_t sumodd = vaddq_s16(sum17, sum35); \ + dct_long_mul(p5o, sumodd, rot1_0); \ + dct_long_mac(p1o, p5o, sum17, rot1_1); \ + dct_long_mac(p2o, p5o, sum35, rot1_2); \ + dct_long_mul(p3o, sum37, rot2_0); \ + dct_long_mul(p4o, sum15, rot2_1); \ + dct_wadd(sump13o, p1o, p3o); \ + dct_wadd(sump24o, p2o, p4o); \ + dct_wadd(sump23o, p2o, p3o); \ + dct_wadd(sump14o, p1o, p4o); \ + dct_long_mac(x4, sump13o, row7, rot3_0); \ + dct_long_mac(x5, sump24o, row5, rot3_1); \ + dct_long_mac(x6, sump23o, row3, rot3_2); \ + dct_long_mac(x7, sump14o, row1, rot3_3); \ + dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \ + dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \ + dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \ + dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \ + } + + // load + row0 = vld1q_s16(data + 0*8); + row1 = vld1q_s16(data + 1*8); + row2 = vld1q_s16(data + 2*8); + row3 = vld1q_s16(data + 3*8); + row4 = vld1q_s16(data + 4*8); + row5 = vld1q_s16(data + 5*8); + row6 = vld1q_s16(data + 6*8); + row7 = vld1q_s16(data + 7*8); + + // add DC bias + row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0)); + + // column pass + dct_pass(vrshrn_n_s32, 10); + + // 16bit 8x8 transpose + { +// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively. +// whether compilers actually get this is another story, sadly. +#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); } +#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); } + + // pass 1 + dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 + dct_trn16(row2, row3); + dct_trn16(row4, row5); + dct_trn16(row6, row7); + + // pass 2 + dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 + dct_trn32(row1, row3); + dct_trn32(row4, row6); + dct_trn32(row5, row7); + + // pass 3 + dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 + dct_trn64(row1, row5); + dct_trn64(row2, row6); + dct_trn64(row3, row7); + +#undef dct_trn16 +#undef dct_trn32 +#undef dct_trn64 + } + + // row pass + // vrshrn_n_s32 only supports shifts up to 16, we need + // 17. so do a non-rounding shift of 16 first then follow + // up with a rounding shift by 1. + dct_pass(vshrn_n_s32, 16); + + { + // pack and round + uint8x8_t p0 = vqrshrun_n_s16(row0, 1); + uint8x8_t p1 = vqrshrun_n_s16(row1, 1); + uint8x8_t p2 = vqrshrun_n_s16(row2, 1); + uint8x8_t p3 = vqrshrun_n_s16(row3, 1); + uint8x8_t p4 = vqrshrun_n_s16(row4, 1); + uint8x8_t p5 = vqrshrun_n_s16(row5, 1); + uint8x8_t p6 = vqrshrun_n_s16(row6, 1); + uint8x8_t p7 = vqrshrun_n_s16(row7, 1); + + // again, these can translate into one instruction, but often don't. +#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); } +#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); } + + // sadly can't use interleaved stores here since we only write + // 8 bytes to each scan line! + + // 8x8 8-bit transpose pass 1 + dct_trn8_8(p0, p1); + dct_trn8_8(p2, p3); + dct_trn8_8(p4, p5); + dct_trn8_8(p6, p7); + + // pass 2 + dct_trn8_16(p0, p2); + dct_trn8_16(p1, p3); + dct_trn8_16(p4, p6); + dct_trn8_16(p5, p7); + + // pass 3 + dct_trn8_32(p0, p4); + dct_trn8_32(p1, p5); + dct_trn8_32(p2, p6); + dct_trn8_32(p3, p7); + + // store + vst1_u8(out, p0); out += out_stride; + vst1_u8(out, p1); out += out_stride; + vst1_u8(out, p2); out += out_stride; + vst1_u8(out, p3); out += out_stride; + vst1_u8(out, p4); out += out_stride; + vst1_u8(out, p5); out += out_stride; + vst1_u8(out, p6); out += out_stride; + vst1_u8(out, p7); + +#undef dct_trn8_8 +#undef dct_trn8_16 +#undef dct_trn8_32 + } + +#undef dct_long_mul +#undef dct_long_mac +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_pass +} + +#endif // STBI_NEON + +#define STBI__MARKER_none 0xff +// if there's a pending marker from the entropy stream, return that +// otherwise, fetch from the stream and get a marker. if there's no +// marker, return 0xff, which is never a valid marker value +static stbi_uc stbi__get_marker(stbi__jpeg *j) +{ + stbi_uc x; + if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; } + x = stbi__get8(j->s); + if (x != 0xff) return STBI__MARKER_none; + while (x == 0xff) + x = stbi__get8(j->s); // consume repeated 0xff fill bytes + return x; +} + +// in each scan, we'll have scan_n components, and the order +// of the components is specified by order[] +#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) + +// after a restart interval, stbi__jpeg_reset the entropy decoder and +// the dc prediction +static void stbi__jpeg_reset(stbi__jpeg *j) +{ + j->code_bits = 0; + j->code_buffer = 0; + j->nomore = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0; + j->marker = STBI__MARKER_none; + j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; + j->eob_run = 0; + // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, + // since we don't even allow 1<<30 pixels +} + +static int stbi__parse_entropy_coded_data(stbi__jpeg *z) +{ + stbi__jpeg_reset(z); + if (!z->progressive) { + if (z->scan_n == 1) { + int i,j; + STBI_SIMD_ALIGN(short, data[64]); + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + STBI_SIMD_ALIGN(short, data[64]); + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data); + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } else { + if (z->scan_n == 1) { + int i,j; + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + if (z->spec_start == 0) { + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } else { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha])) + return 0; + } + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x); + int y2 = (j*z->img_comp[n].v + y); + short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w); + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } +} + +static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant) +{ + int i; + for (i=0; i < 64; ++i) + data[i] *= dequant[i]; +} + +static void stbi__jpeg_finish(stbi__jpeg *z) +{ + if (z->progressive) { + // dequantize and idct the data + int i,j,n; + for (n=0; n < z->s->img_n; ++n) { + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + } + } + } + } +} + +static int stbi__process_marker(stbi__jpeg *z, int m) +{ + int L; + switch (m) { + case STBI__MARKER_none: // no marker found + return stbi__err("expected marker","Corrupt JPEG"); + + case 0xDD: // DRI - specify restart interval + if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG"); + z->restart_interval = stbi__get16be(z->s); + return 1; + + case 0xDB: // DQT - define quantization table + L = stbi__get16be(z->s)-2; + while (L > 0) { + int q = stbi__get8(z->s); + int p = q >> 4, sixteen = (p != 0); + int t = q & 15,i; + if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); + if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); + + for (i=0; i < 64; ++i) + z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); + L -= (sixteen ? 129 : 65); + } + return L==0; + + case 0xC4: // DHT - define huffman table + L = stbi__get16be(z->s)-2; + while (L > 0) { + stbi_uc *v; + int sizes[16],i,n=0; + int q = stbi__get8(z->s); + int tc = q >> 4; + int th = q & 15; + if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG"); + for (i=0; i < 16; ++i) { + sizes[i] = stbi__get8(z->s); + n += sizes[i]; + } + L -= 17; + if (tc == 0) { + if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; + } else { + if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; + } + for (i=0; i < n; ++i) + v[i] = stbi__get8(z->s); + if (tc != 0) + stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th); + L -= n; + } + return L==0; + } + + // check for comment block or APP blocks + if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { + L = stbi__get16be(z->s); + if (L < 2) { + if (m == 0xFE) + return stbi__err("bad COM len","Corrupt JPEG"); + else + return stbi__err("bad APP len","Corrupt JPEG"); + } + L -= 2; + + if (m == 0xE0 && L >= 5) { // JFIF APP0 segment + static const unsigned char tag[5] = {'J','F','I','F','\0'}; + int ok = 1; + int i; + for (i=0; i < 5; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 5; + if (ok) + z->jfif = 1; + } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment + static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; + int ok = 1; + int i; + for (i=0; i < 6; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 6; + if (ok) { + stbi__get8(z->s); // version + stbi__get16be(z->s); // flags0 + stbi__get16be(z->s); // flags1 + z->app14_color_transform = stbi__get8(z->s); // color transform + L -= 6; + } + } + + stbi__skip(z->s, L); + return 1; + } + + return stbi__err("unknown marker","Corrupt JPEG"); +} + +// after we see SOS +static int stbi__process_scan_header(stbi__jpeg *z) +{ + int i; + int Ls = stbi__get16be(z->s); + z->scan_n = stbi__get8(z->s); + if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG"); + if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG"); + for (i=0; i < z->scan_n; ++i) { + int id = stbi__get8(z->s), which; + int q = stbi__get8(z->s); + for (which = 0; which < z->s->img_n; ++which) + if (z->img_comp[which].id == id) + break; + if (which == z->s->img_n) return 0; // no match + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG"); + z->order[i] = which; + } + + { + int aa; + z->spec_start = stbi__get8(z->s); + z->spec_end = stbi__get8(z->s); // should be 63, but might be 0 + aa = stbi__get8(z->s); + z->succ_high = (aa >> 4); + z->succ_low = (aa & 15); + if (z->progressive) { + if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13) + return stbi__err("bad SOS", "Corrupt JPEG"); + } else { + if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG"); + if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG"); + z->spec_end = 63; + } + } + + return 1; +} + +static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why) +{ + int i; + for (i=0; i < ncomp; ++i) { + if (z->img_comp[i].raw_data) { + STBI_FREE(z->img_comp[i].raw_data); + z->img_comp[i].raw_data = NULL; + z->img_comp[i].data = NULL; + } + if (z->img_comp[i].raw_coeff) { + STBI_FREE(z->img_comp[i].raw_coeff); + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].coeff = 0; + } + if (z->img_comp[i].linebuf) { + STBI_FREE(z->img_comp[i].linebuf); + z->img_comp[i].linebuf = NULL; + } + } + return why; +} + +static int stbi__process_frame_header(stbi__jpeg *z, int scan) +{ + stbi__context *s = z->s; + int Lf,p,i,q, h_max=1,v_max=1,c; + Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG + p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires + c = stbi__get8(s); + if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG"); + s->img_n = c; + for (i=0; i < c; ++i) { + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; + } + + if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG"); + + z->rgb = 0; + for (i=0; i < s->img_n; ++i) { + static const unsigned char rgb[3] = { 'R', 'G', 'B' }; + z->img_comp[i].id = stbi__get8(s); + if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) + ++z->rgb; + q = stbi__get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); + z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG"); + } + + if (scan != STBI__SCAN_load) return 1; + + if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode"); + + for (i=0; i < s->img_n; ++i) { + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; + } + + // compute interleaved mcu info + z->img_h_max = h_max; + z->img_v_max = v_max; + z->img_mcu_w = h_max * 8; + z->img_mcu_h = v_max * 8; + // these sizes can't be more than 17 bits + z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; + z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; + + for (i=0; i < s->img_n; ++i) { + // number of effective pixels (e.g. for non-interleaved MCU) + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; + // to simplify generation, we'll allocate enough memory to decode + // the bogus oversized data from using interleaved MCUs and their + // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't + // discard the extra data until colorspace conversion + // + // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) + // so these muls can't overflow with 32-bit ints (which we require) + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].coeff = 0; + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].linebuf = NULL; + z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); + if (z->img_comp[i].raw_data == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + // align blocks for idct using mmx/sse + z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + if (z->progressive) { + // w2, h2 are multiples of 8 (see above) + z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; + z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; + z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); + if (z->img_comp[i].raw_coeff == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); + } + } + + return 1; +} + +// use comparisons since in some cases we handle more than one case (e.g. SOF) +#define stbi__DNL(x) ((x) == 0xdc) +#define stbi__SOI(x) ((x) == 0xd8) +#define stbi__EOI(x) ((x) == 0xd9) +#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2) +#define stbi__SOS(x) ((x) == 0xda) + +#define stbi__SOF_progressive(x) ((x) == 0xc2) + +static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) +{ + int m; + z->jfif = 0; + z->app14_color_transform = -1; // valid values are 0,1,2 + z->marker = STBI__MARKER_none; // initialize cached marker to empty + m = stbi__get_marker(z); + if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG"); + if (scan == STBI__SCAN_type) return 1; + m = stbi__get_marker(z); + while (!stbi__SOF(m)) { + if (!stbi__process_marker(z,m)) return 0; + m = stbi__get_marker(z); + while (m == STBI__MARKER_none) { + // some files have extra padding after their blocks, so ok, we'll scan + if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG"); + m = stbi__get_marker(z); + } + } + z->progressive = stbi__SOF_progressive(m); + if (!stbi__process_frame_header(z, scan)) return 0; + return 1; +} + +// decode image to YCbCr format +static int stbi__decode_jpeg_image(stbi__jpeg *j) +{ + int m; + for (m = 0; m < 4; m++) { + j->img_comp[m].raw_data = NULL; + j->img_comp[m].raw_coeff = NULL; + } + j->restart_interval = 0; + if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0; + m = stbi__get_marker(j); + while (!stbi__EOI(m)) { + if (stbi__SOS(m)) { + if (!stbi__process_scan_header(j)) return 0; + if (!stbi__parse_entropy_coded_data(j)) return 0; + if (j->marker == STBI__MARKER_none ) { + // handle 0s at the end of image data from IP Kamera 9060 + while (!stbi__at_eof(j->s)) { + int x = stbi__get8(j->s); + if (x == 255) { + j->marker = stbi__get8(j->s); + break; + } + } + // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 + } + } else if (stbi__DNL(m)) { + int Ld = stbi__get16be(j->s); + stbi__uint32 NL = stbi__get16be(j->s); + if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); + if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); + } else { + if (!stbi__process_marker(j, m)) return 0; + } + m = stbi__get_marker(j); + } + if (j->progressive) + stbi__jpeg_finish(j); + return 1; +} + +// static jfif-centered resampling (across block boundaries) + +typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1, + int w, int hs); + +#define stbi__div4(x) ((stbi_uc) ((x) >> 2)) + +static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + STBI_NOTUSED(out); + STBI_NOTUSED(in_far); + STBI_NOTUSED(w); + STBI_NOTUSED(hs); + return in_near; +} + +static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples vertically for every one in input + int i; + STBI_NOTUSED(hs); + for (i=0; i < w; ++i) + out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2); + return out; +} + +static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples horizontally for every one in input + int i; + stbi_uc *input = in_near; + + if (w == 1) { + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; + } + + out[0] = input[0]; + out[1] = stbi__div4(input[0]*3 + input[1] + 2); + for (i=1; i < w-1; ++i) { + int n = 3*input[i]+2; + out[i*2+0] = stbi__div4(n+input[i-1]); + out[i*2+1] = stbi__div4(n+input[i+1]); + } + out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2); + out[i*2+1] = input[w-1]; + + STBI_NOTUSED(in_far); + STBI_NOTUSED(hs); + + return out; +} + +#define stbi__div16(x) ((stbi_uc) ((x) >> 4)) + +static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i,t0,t1; + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + out[0] = stbi__div4(t1+2); + for (i=1; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i=0,t0,t1; + + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + // process groups of 8 pixels for as long as we can. + // note we can't handle the last pixel in a row in this loop + // because we need to handle the filter boundary conditions. + for (; i < ((w-1) & ~7); i += 8) { +#if defined(STBI_SSE2) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + __m128i zero = _mm_setzero_si128(); + __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i)); + __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i)); + __m128i farw = _mm_unpacklo_epi8(farb, zero); + __m128i nearw = _mm_unpacklo_epi8(nearb, zero); + __m128i diff = _mm_sub_epi16(farw, nearw); + __m128i nears = _mm_slli_epi16(nearw, 2); + __m128i curr = _mm_add_epi16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + __m128i prv0 = _mm_slli_si128(curr, 2); + __m128i nxt0 = _mm_srli_si128(curr, 2); + __m128i prev = _mm_insert_epi16(prv0, t1, 0); + __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + __m128i bias = _mm_set1_epi16(8); + __m128i curs = _mm_slli_epi16(curr, 2); + __m128i prvd = _mm_sub_epi16(prev, curr); + __m128i nxtd = _mm_sub_epi16(next, curr); + __m128i curb = _mm_add_epi16(curs, bias); + __m128i even = _mm_add_epi16(prvd, curb); + __m128i odd = _mm_add_epi16(nxtd, curb); + + // interleave even and odd pixels, then undo scaling. + __m128i int0 = _mm_unpacklo_epi16(even, odd); + __m128i int1 = _mm_unpackhi_epi16(even, odd); + __m128i de0 = _mm_srli_epi16(int0, 4); + __m128i de1 = _mm_srli_epi16(int1, 4); + + // pack and write output + __m128i outv = _mm_packus_epi16(de0, de1); + _mm_storeu_si128((__m128i *) (out + i*2), outv); +#elif defined(STBI_NEON) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + uint8x8_t farb = vld1_u8(in_far + i); + uint8x8_t nearb = vld1_u8(in_near + i); + int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb)); + int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2)); + int16x8_t curr = vaddq_s16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + int16x8_t prv0 = vextq_s16(curr, curr, 7); + int16x8_t nxt0 = vextq_s16(curr, curr, 1); + int16x8_t prev = vsetq_lane_s16(t1, prv0, 0); + int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + int16x8_t curs = vshlq_n_s16(curr, 2); + int16x8_t prvd = vsubq_s16(prev, curr); + int16x8_t nxtd = vsubq_s16(next, curr); + int16x8_t even = vaddq_s16(curs, prvd); + int16x8_t odd = vaddq_s16(curs, nxtd); + + // undo scaling and round, then store with even/odd phases interleaved + uint8x8x2_t o; + o.val[0] = vqrshrun_n_s16(even, 4); + o.val[1] = vqrshrun_n_s16(odd, 4); + vst2_u8(out + i*2, o); +#endif + + // "previous" value for next iter + t1 = 3*in_near[i+7] + in_far[i+7]; + } + + t0 = t1; + t1 = 3*in_near[i] + in_far[i]; + out[i*2] = stbi__div16(3*t1 + t0 + 8); + + for (++i; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} +#endif + +static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // resample with nearest-neighbor + int i,j; + STBI_NOTUSED(in_far); + for (i=0; i < w; ++i) + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; + return out; +} + +// this is a reduced-precision calculation of YCbCr-to-RGB introduced +// to make sure the code produces the same results in both SIMD and scalar +#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8) +static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step) +{ + int i; + for (i=0; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step) +{ + int i = 0; + +#ifdef STBI_SSE2 + // step == 3 is pretty ugly on the final interleave, and i'm not convinced + // it's useful in practice (you wouldn't use it for textures, for example). + // so just accelerate step == 4 case. + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + __m128i signflip = _mm_set1_epi8(-0x80); + __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f)); + __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); + __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); + __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f)); + __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128); + __m128i xw = _mm_set1_epi16(255); // alpha channel + + for (; i+7 < count; i += 8) { + // load + __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i)); + __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i)); + __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i)); + __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 + __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 + + // unpack to short (and left-shift cr, cb by 8) + __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes); + __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); + __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); + + // color transform + __m128i yws = _mm_srli_epi16(yw, 4); + __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); + __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); + __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); + __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); + __m128i rws = _mm_add_epi16(cr0, yws); + __m128i gwt = _mm_add_epi16(cb0, yws); + __m128i bws = _mm_add_epi16(yws, cb1); + __m128i gws = _mm_add_epi16(gwt, cr1); + + // descale + __m128i rw = _mm_srai_epi16(rws, 4); + __m128i bw = _mm_srai_epi16(bws, 4); + __m128i gw = _mm_srai_epi16(gws, 4); + + // back to byte, set up for transpose + __m128i brb = _mm_packus_epi16(rw, bw); + __m128i gxb = _mm_packus_epi16(gw, xw); + + // transpose to interleave channels + __m128i t0 = _mm_unpacklo_epi8(brb, gxb); + __m128i t1 = _mm_unpackhi_epi8(brb, gxb); + __m128i o0 = _mm_unpacklo_epi16(t0, t1); + __m128i o1 = _mm_unpackhi_epi16(t0, t1); + + // store + _mm_storeu_si128((__m128i *) (out + 0), o0); + _mm_storeu_si128((__m128i *) (out + 16), o1); + out += 32; + } + } +#endif + +#ifdef STBI_NEON + // in this version, step=3 support would be easy to add. but is there demand? + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + uint8x8_t signflip = vdup_n_u8(0x80); + int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f)); + int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f)); + int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f)); + int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f)); + + for (; i+7 < count; i += 8) { + // load + uint8x8_t y_bytes = vld1_u8(y + i); + uint8x8_t cr_bytes = vld1_u8(pcr + i); + uint8x8_t cb_bytes = vld1_u8(pcb + i); + int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); + int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); + + // expand to s16 + int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); + int16x8_t crw = vshll_n_s8(cr_biased, 7); + int16x8_t cbw = vshll_n_s8(cb_biased, 7); + + // color transform + int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); + int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); + int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); + int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); + int16x8_t rws = vaddq_s16(yws, cr0); + int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); + int16x8_t bws = vaddq_s16(yws, cb1); + + // undo scaling, round, convert to byte + uint8x8x4_t o; + o.val[0] = vqrshrun_n_s16(rws, 4); + o.val[1] = vqrshrun_n_s16(gws, 4); + o.val[2] = vqrshrun_n_s16(bws, 4); + o.val[3] = vdup_n_u8(255); + + // store, interleaving r/g/b/a + vst4_u8(out, o); + out += 8*4; + } + } +#endif + + for (; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} +#endif + +// set up the kernels +static void stbi__setup_jpeg(stbi__jpeg *j) +{ + j->idct_block_kernel = stbi__idct_block; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2; + +#ifdef STBI_SSE2 + if (stbi__sse2_available()) { + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; + } +#endif + +#ifdef STBI_NEON + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; +#endif +} + +// clean up the temporary component buffers +static void stbi__cleanup_jpeg(stbi__jpeg *j) +{ + stbi__free_jpeg_components(j, j->s->img_n, 0); +} + +typedef struct +{ + resample_row_func resample; + stbi_uc *line0,*line1; + int hs,vs; // expansion factor in each axis + int w_lores; // horizontal pixels pre-expansion + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on +} stbi__resample; + +// fast 0..255 * 0..255 => 0..255 rounded multiplication +static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) +{ + unsigned int t = x*y + 128; + return (stbi_uc) ((t + (t >>8)) >> 8); +} + +static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ + int n, decode_n, is_rgb; + z->s->img_n = 0; // make stbi__cleanup_jpeg safe + + // validate req_comp + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + + // load a jpeg image from whichever source, but leave in YCbCr format + if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; } + + // determine actual number of components to generate + n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1; + + is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif)); + + if (z->s->img_n == 3 && n < 3 && !is_rgb) + decode_n = 1; + else + decode_n = z->s->img_n; + + // resample and color-convert + { + int k; + unsigned int i,j; + stbi_uc *output; + stbi_uc *coutput[4]; + + stbi__resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3); + if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s->img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel; + else r->resample = stbi__resample_row_generic; + } + + // can't error after this so, this is safe + output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); + if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s->img_y; ++j) { + stbi_uc *out = output + n * z->s->img_x * j; + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; + } + } + if (n >= 3) { + stbi_uc *y = coutput[0]; + if (z->s->img_n == 3) { + if (is_rgb) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = y[i]; + out[1] = coutput[1][i]; + out[2] = coutput[2][i]; + out[3] = 255; + out += n; + } + } else { + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else if (z->s->img_n == 4) { + if (z->app14_color_transform == 0) { // CMYK + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(coutput[0][i], m); + out[1] = stbi__blinn_8x8(coutput[1][i], m); + out[2] = stbi__blinn_8x8(coutput[2][i], m); + out[3] = 255; + out += n; + } + } else if (z->app14_color_transform == 2) { // YCCK + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(255 - out[0], m); + out[1] = stbi__blinn_8x8(255 - out[1], m); + out[2] = stbi__blinn_8x8(255 - out[2], m); + out += n; + } + } else { // YCbCr + alpha? Ignore the fourth channel for now + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else + for (i=0; i < z->s->img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + if (is_rgb) { + if (n == 1) + for (i=0; i < z->s->img_x; ++i) + *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + else { + for (i=0; i < z->s->img_x; ++i, out += 2) { + out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + out[1] = 255; + } + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); + stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); + stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); + out[0] = stbi__compute_y(r, g, b); + out[1] = 255; + out += n; + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); + out[1] = 255; + out += n; + } + } else { + stbi_uc *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255; + } + } + } + stbi__cleanup_jpeg(z); + *out_x = z->s->img_x; + *out_y = z->s->img_y; + if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output + return output; + } +} + +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + unsigned char* result; + stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg)); + STBI_NOTUSED(ri); + j->s = s; + stbi__setup_jpeg(j); + result = load_jpeg_image(j, x,y,comp,req_comp); + STBI_FREE(j); + return result; +} + +static int stbi__jpeg_test(stbi__context *s) +{ + int r; + stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg)); + j->s = s; + stbi__setup_jpeg(j); + r = stbi__decode_jpeg_header(j, STBI__SCAN_type); + stbi__rewind(s); + STBI_FREE(j); + return r; +} + +static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) +{ + if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) { + stbi__rewind( j->s ); + return 0; + } + if (x) *x = j->s->img_x; + if (y) *y = j->s->img_y; + if (comp) *comp = j->s->img_n >= 3 ? 3 : 1; + return 1; +} + +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) +{ + int result; + stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg))); + j->s = s; + result = stbi__jpeg_info_raw(j, x, y, comp); + STBI_FREE(j); + return result; +} +#endif + +// public domain zlib decode v0.2 Sean Barrett 2006-11-18 +// simple implementation +// - all input must be provided in an upfront buffer +// - all output is written to a single output buffer (can malloc/realloc) +// performance +// - fast huffman + +#ifndef STBI_NO_ZLIB + +// fast-way is faster to check than jpeg huffman, but slow way is slower +#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables +#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1) + +// zlib-style huffman encoding +// (jpegs packs from left, zlib from right, so can't share code) +typedef struct +{ + stbi__uint16 fast[1 << STBI__ZFAST_BITS]; + stbi__uint16 firstcode[16]; + int maxcode[17]; + stbi__uint16 firstsymbol[16]; + stbi_uc size[288]; + stbi__uint16 value[288]; +} stbi__zhuffman; + +stbi_inline static int stbi__bitreverse16(int n) +{ + n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); + n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); + n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); + n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); + return n; +} + +stbi_inline static int stbi__bit_reverse(int v, int bits) +{ + STBI_ASSERT(bits <= 16); + // to bit reverse n bits, reverse 16 and shift + // e.g. 11 bits, bit reverse and shift away 5 + return stbi__bitreverse16(v) >> (16-bits); +} + +static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num) +{ + int i,k=0; + int code, next_code[16], sizes[17]; + + // DEFLATE spec for generating codes + memset(sizes, 0, sizeof(sizes)); + memset(z->fast, 0, sizeof(z->fast)); + for (i=0; i < num; ++i) + ++sizes[sizelist[i]]; + sizes[0] = 0; + for (i=1; i < 16; ++i) + if (sizes[i] > (1 << i)) + return stbi__err("bad sizes", "Corrupt PNG"); + code = 0; + for (i=1; i < 16; ++i) { + next_code[i] = code; + z->firstcode[i] = (stbi__uint16) code; + z->firstsymbol[i] = (stbi__uint16) k; + code = (code + sizes[i]); + if (sizes[i]) + if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG"); + z->maxcode[i] = code << (16-i); // preshift for inner loop + code <<= 1; + k += sizes[i]; + } + z->maxcode[16] = 0x10000; // sentinel + for (i=0; i < num; ++i) { + int s = sizelist[i]; + if (s) { + int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); + z->size [c] = (stbi_uc ) s; + z->value[c] = (stbi__uint16) i; + if (s <= STBI__ZFAST_BITS) { + int j = stbi__bit_reverse(next_code[s],s); + while (j < (1 << STBI__ZFAST_BITS)) { + z->fast[j] = fastv; + j += (1 << s); + } + } + ++next_code[s]; + } + } + return 1; +} + +// zlib-from-memory implementation for PNG reading +// because PNG allows splitting the zlib stream arbitrarily, +// and it's annoying structurally to have PNG call ZLIB call PNG, +// we require PNG read all the IDATs and combine them into a single +// memory buffer + +typedef struct +{ + stbi_uc *zbuffer, *zbuffer_end; + int num_bits; + stbi__uint32 code_buffer; + + char *zout; + char *zout_start; + char *zout_end; + int z_expandable; + + stbi__zhuffman z_length, z_distance; +} stbi__zbuf; + +stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z) +{ + if (z->zbuffer >= z->zbuffer_end) return 0; + return *z->zbuffer++; +} + +static void stbi__fill_bits(stbi__zbuf *z) +{ + do { + STBI_ASSERT(z->code_buffer < (1U << z->num_bits)); + z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; + z->num_bits += 8; + } while (z->num_bits <= 24); +} + +stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n) +{ + unsigned int k; + if (z->num_bits < n) stbi__fill_bits(z); + k = z->code_buffer & ((1 << n) - 1); + z->code_buffer >>= n; + z->num_bits -= n; + return k; +} + +static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s,k; + // not resolved by fast table, so compute it the slow way + // use jpeg approach, which requires MSbits at top + k = stbi__bit_reverse(a->code_buffer, 16); + for (s=STBI__ZFAST_BITS+1; ; ++s) + if (k < z->maxcode[s]) + break; + if (s == 16) return -1; // invalid code! + // code size is s, so: + b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; + STBI_ASSERT(z->size[b] == s); + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; +} + +stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s; + if (a->num_bits < 16) stbi__fill_bits(a); + b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; + if (b) { + s = b >> 9; + a->code_buffer >>= s; + a->num_bits -= s; + return b & 511; + } + return stbi__zhuffman_decode_slowpath(a, z); +} + +static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes +{ + char *q; + int cur, limit, old_limit; + z->zout = zout; + if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); + cur = (int) (z->zout - z->zout_start); + limit = old_limit = (int) (z->zout_end - z->zout_start); + while (cur + n > limit) + limit *= 2; + q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); + STBI_NOTUSED(old_limit); + if (q == NULL) return stbi__err("outofmem", "Out of memory"); + z->zout_start = q; + z->zout = q + cur; + z->zout_end = q + limit; + return 1; +} + +static const int stbi__zlength_base[31] = { + 3,4,5,6,7,8,9,10,11,13, + 15,17,19,23,27,31,35,43,51,59, + 67,83,99,115,131,163,195,227,258,0,0 }; + +static const int stbi__zlength_extra[31]= +{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + +static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + +static const int stbi__zdist_extra[32] = +{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int stbi__parse_huffman_block(stbi__zbuf *a) +{ + char *zout = a->zout; + for(;;) { + int z = stbi__zhuffman_decode(a, &a->z_length); + if (z < 256) { + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes + if (zout >= a->zout_end) { + if (!stbi__zexpand(a, zout, 1)) return 0; + zout = a->zout; + } + *zout++ = (char) z; + } else { + stbi_uc *p; + int len,dist; + if (z == 256) { + a->zout = zout; + return 1; + } + z -= 257; + len = stbi__zlength_base[z]; + if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); + z = stbi__zhuffman_decode(a, &a->z_distance); + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); + dist = stbi__zdist_base[z]; + if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); + if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); + if (zout + len > a->zout_end) { + if (!stbi__zexpand(a, zout, len)) return 0; + zout = a->zout; + } + p = (stbi_uc *) (zout - dist); + if (dist == 1) { // run of one byte; common in images. + stbi_uc v = *p; + if (len) { do *zout++ = v; while (--len); } + } else { + if (len) { do *zout++ = *p++; while (--len); } + } + } + } +} + +static int stbi__compute_huffman_codes(stbi__zbuf *a) +{ + static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + stbi__zhuffman z_codelength; + stbi_uc lencodes[286+32+137];//padding for maximum single op + stbi_uc codelength_sizes[19]; + int i,n; + + int hlit = stbi__zreceive(a,5) + 257; + int hdist = stbi__zreceive(a,5) + 1; + int hclen = stbi__zreceive(a,4) + 4; + int ntot = hlit + hdist; + + memset(codelength_sizes, 0, sizeof(codelength_sizes)); + for (i=0; i < hclen; ++i) { + int s = stbi__zreceive(a,3); + codelength_sizes[length_dezigzag[i]] = (stbi_uc) s; + } + if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; + + n = 0; + while (n < ntot) { + int c = stbi__zhuffman_decode(a, &z_codelength); + if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); + if (c < 16) + lencodes[n++] = (stbi_uc) c; + else { + stbi_uc fill = 0; + if (c == 16) { + c = stbi__zreceive(a,2)+3; + if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); + fill = lencodes[n-1]; + } else if (c == 17) + c = stbi__zreceive(a,3)+3; + else { + STBI_ASSERT(c == 18); + c = stbi__zreceive(a,7)+11; + } + if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); + memset(lencodes+n, fill, c); + n += c; + } + } + if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG"); + if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; + return 1; +} + +static int stbi__parse_uncompressed_block(stbi__zbuf *a) +{ + stbi_uc header[4]; + int len,nlen,k; + if (a->num_bits & 7) + stbi__zreceive(a, a->num_bits & 7); // discard + // drain the bit-packed data into header + k = 0; + while (a->num_bits > 0) { + header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check + a->code_buffer >>= 8; + a->num_bits -= 8; + } + STBI_ASSERT(a->num_bits == 0); + // now fill header the normal way + while (k < 4) + header[k++] = stbi__zget8(a); + len = header[1] * 256 + header[0]; + nlen = header[3] * 256 + header[2]; + if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG"); + if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG"); + if (a->zout + len > a->zout_end) + if (!stbi__zexpand(a, a->zout, len)) return 0; + memcpy(a->zout, a->zbuffer, len); + a->zbuffer += len; + a->zout += len; + return 1; +} + +static int stbi__parse_zlib_header(stbi__zbuf *a) +{ + int cmf = stbi__zget8(a); + int cm = cmf & 15; + /* int cinfo = cmf >> 4; */ + int flg = stbi__zget8(a); + if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec + if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png + if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png + // window = 1 << (8 + cinfo)... but who cares, we fully buffer output + return 1; +} + +static const stbi_uc stbi__zdefault_length[288] = +{ + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8 +}; +static const stbi_uc stbi__zdefault_distance[32] = +{ + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 +}; +/* +Init algorithm: +{ + int i; // use <= to match clearly with spec + for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8; + for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9; + for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7; + for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8; + + for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5; +} +*/ + +static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) +{ + int final, type; + if (parse_header) + if (!stbi__parse_zlib_header(a)) return 0; + a->num_bits = 0; + a->code_buffer = 0; + do { + final = stbi__zreceive(a,1); + type = stbi__zreceive(a,2); + if (type == 0) { + if (!stbi__parse_uncompressed_block(a)) return 0; + } else if (type == 3) { + return 0; + } else { + if (type == 1) { + // use fixed code lengths + if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; + } else { + if (!stbi__compute_huffman_codes(a)) return 0; + } + if (!stbi__parse_huffman_block(a)) return 0; + } + } while (!final); + return 1; +} + +static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header) +{ + a->zout_start = obuf; + a->zout = obuf; + a->zout_end = obuf + olen; + a->z_expandable = exp; + + return stbi__parse_zlib(a, parse_header); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, 1)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) +{ + return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 1)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(16384); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer+len; + if (stbi__do_zlib(&a, p, 16384, 1, 0)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 0)) + return (int) (a.zout - a.zout_start); + else + return -1; +} +#endif + +// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 +// simple implementation +// - only 8-bit samples +// - no CRC checking +// - allocates lots of intermediate memory +// - avoids problem of streaming data between subsystems +// - avoids explicit window management +// performance +// - uses stb_zlib, a PD zlib implementation with fast huffman decoding + +#ifndef STBI_NO_PNG +typedef struct +{ + stbi__uint32 length; + stbi__uint32 type; +} stbi__pngchunk; + +static stbi__pngchunk stbi__get_chunk_header(stbi__context *s) +{ + stbi__pngchunk c; + c.length = stbi__get32be(s); + c.type = stbi__get32be(s); + return c; +} + +static int stbi__check_png_header(stbi__context *s) +{ + static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; + int i; + for (i=0; i < 8; ++i) + if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); + return 1; +} + +typedef struct +{ + stbi__context *s; + stbi_uc *idata, *expanded, *out; + int depth; +} stbi__png; + + +enum { + STBI__F_none=0, + STBI__F_sub=1, + STBI__F_up=2, + STBI__F_avg=3, + STBI__F_paeth=4, + // synthetic filters used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static stbi_uc first_row_filter[5] = +{ + STBI__F_none, + STBI__F_sub, + STBI__F_none, + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static int stbi__paeth(int a, int b, int c) +{ + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; +} + +static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; + +// create the png data from post-deflated data +static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) +{ + int bytes = (depth == 16? 2 : 1); + stbi__context *s = a->s; + stbi__uint32 i,j,stride = x*out_n*bytes; + stbi__uint32 img_len, img_width_bytes; + int k; + int img_n = s->img_n; // copy it into a local for later + + int output_bytes = out_n*bytes; + int filter_bytes = img_n*bytes; + int width = x; + + STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1); + a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into + if (!a->out) return stbi__err("outofmem", "Out of memory"); + + if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); + img_width_bytes = (((img_n * x * depth) + 7) >> 3); + img_len = (img_width_bytes + 1) * y; + + // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, + // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros), + // so just check for raw_len < img_len always. + if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *prior; + int filter = *raw++; + + if (filter > 4) + return stbi__err("invalid filter","Corrupt PNG"); + + if (depth < 8) { + STBI_ASSERT(img_width_bytes <= x); + cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place + filter_bytes = 1; + width = img_width_bytes; + } + prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above + + // if first row, use special filter that doesn't sample previous row + if (j == 0) filter = first_row_filter[filter]; + + // handle first byte explicitly + for (k=0; k < filter_bytes; ++k) { + switch (filter) { + case STBI__F_none : cur[k] = raw[k]; break; + case STBI__F_sub : cur[k] = raw[k]; break; + case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; + case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; + case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; + case STBI__F_avg_first : cur[k] = raw[k]; break; + case STBI__F_paeth_first: cur[k] = raw[k]; break; + } + } + + if (depth == 8) { + if (img_n != out_n) + cur[img_n] = 255; // first pixel + raw += img_n; + cur += out_n; + prior += out_n; + } else if (depth == 16) { + if (img_n != out_n) { + cur[filter_bytes] = 255; // first pixel top byte + cur[filter_bytes+1] = 255; // first pixel bottom byte + } + raw += filter_bytes; + cur += output_bytes; + prior += output_bytes; + } else { + raw += 1; + cur += 1; + prior += 1; + } + + // this is a little gross, so that we don't switch per-pixel or per-component + if (depth < 8 || img_n == out_n) { + int nk = (width - 1)*filter_bytes; + #define STBI__CASE(f) \ + case f: \ + for (k=0; k < nk; ++k) + switch (filter) { + // "none" filter turns into a memcpy here; make that explicit. + case STBI__F_none: memcpy(cur, raw, nk); break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; + } + #undef STBI__CASE + raw += nk; + } else { + STBI_ASSERT(img_n+1 == out_n); + #define STBI__CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ + for (k=0; k < filter_bytes; ++k) + switch (filter) { + STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; + } + #undef STBI__CASE + + // the loop above sets the high byte of the pixels' alpha, but for + // 16 bit png files we also need the low byte set. we'll do that here. + if (depth == 16) { + cur = a->out + stride*j; // start at the beginning of the row again + for (i=0; i < x; ++i,cur+=output_bytes) { + cur[filter_bytes+1] = 255; + } + } + } + } + + // we make a separate pass to expand bits to pixels; for performance, + // this could run two scanlines behind the above code, so it won't + // intefere with filtering but will still be in the cache. + if (depth < 8) { + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; + // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit + // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop + stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + + // note that the final byte might overshoot and write more data than desired. + // we can allocate enough data that this never writes out of memory, but it + // could also overwrite the next scanline. can it overwrite non-empty data + // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. + // so we need to explicitly clamp the final ones + + if (depth == 4) { + for (k=x*img_n; k >= 2; k-=2, ++in) { + *cur++ = scale * ((*in >> 4) ); + *cur++ = scale * ((*in ) & 0x0f); + } + if (k > 0) *cur++ = scale * ((*in >> 4) ); + } else if (depth == 2) { + for (k=x*img_n; k >= 4; k-=4, ++in) { + *cur++ = scale * ((*in >> 6) ); + *cur++ = scale * ((*in >> 4) & 0x03); + *cur++ = scale * ((*in >> 2) & 0x03); + *cur++ = scale * ((*in ) & 0x03); + } + if (k > 0) *cur++ = scale * ((*in >> 6) ); + if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); + if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); + } else if (depth == 1) { + for (k=x*img_n; k >= 8; k-=8, ++in) { + *cur++ = scale * ((*in >> 7) ); + *cur++ = scale * ((*in >> 6) & 0x01); + *cur++ = scale * ((*in >> 5) & 0x01); + *cur++ = scale * ((*in >> 4) & 0x01); + *cur++ = scale * ((*in >> 3) & 0x01); + *cur++ = scale * ((*in >> 2) & 0x01); + *cur++ = scale * ((*in >> 1) & 0x01); + *cur++ = scale * ((*in ) & 0x01); + } + if (k > 0) *cur++ = scale * ((*in >> 7) ); + if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); + if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); + if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); + if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); + if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); + if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); + } + if (img_n != out_n) { + int q; + // insert alpha = 255 + cur = a->out + stride*j; + if (img_n == 1) { + for (q=x-1; q >= 0; --q) { + cur[q*2+1] = 255; + cur[q*2+0] = cur[q]; + } + } else { + STBI_ASSERT(img_n == 3); + for (q=x-1; q >= 0; --q) { + cur[q*4+3] = 255; + cur[q*4+2] = cur[q*3+2]; + cur[q*4+1] = cur[q*3+1]; + cur[q*4+0] = cur[q*3+0]; + } + } + } + } + } else if (depth == 16) { + // force the image data from big-endian to platform-native. + // this is done in a separate pass due to the decoding relying + // on the data being untouched, but could probably be done + // per-line during decode if care is taken. + stbi_uc *cur = a->out; + stbi__uint16 *cur16 = (stbi__uint16*)cur; + + for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { + *cur16 = (cur[0] << 8) | cur[1]; + } + } + + return 1; +} + +static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) +{ + int bytes = (depth == 16 ? 2 : 1); + int out_bytes = out_n * bytes; + stbi_uc *final; + int p; + if (!interlaced) + return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); + + // de-interlacing + final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); + for (p=0; p < 7; ++p) { + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y; + if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) { + STBI_FREE(final); + return 0; + } + for (j=0; j < y; ++j) { + for (i=0; i < x; ++i) { + int out_y = j*yspc[p]+yorig[p]; + int out_x = i*xspc[p]+xorig[p]; + memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, + a->out + (j*x+i)*out_bytes, out_bytes); + } + } + STBI_FREE(a->out); + image_data += img_len; + image_data_len -= img_len; + } + } + a->out = final; + + return 1; +} + +static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + // compute color-based transparency, assuming we've + // already got 255 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i=0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 255); + p += 2; + } + } else { + for (i=0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi__uint16 *p = (stbi__uint16*) z->out; + + // compute color-based transparency, assuming we've + // already got 65535 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i = 0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 65535); + p += 2; + } + } else { + for (i = 0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n) +{ + stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y; + stbi_uc *p, *temp_out, *orig = a->out; + + p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0); + if (p == NULL) return stbi__err("outofmem", "Out of memory"); + + // between here and free(out) below, exitting would leak + temp_out = p; + + if (pal_img_n == 3) { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p += 3; + } + } else { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p[3] = palette[n+3]; + p += 4; + } + } + STBI_FREE(a->out); + a->out = temp_out; + + STBI_NOTUSED(len); + + return 1; +} + +static int stbi__unpremultiply_on_load = 0; +static int stbi__de_iphone_flag = 0; + +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) +{ + stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply; +} + +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) +{ + stbi__de_iphone_flag = flag_true_if_should_convert; +} + +static void stbi__de_iphone(stbi__png *z) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + if (s->img_out_n == 3) { // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 3; + } + } else { + STBI_ASSERT(s->img_out_n == 4); + if (stbi__unpremultiply_on_load) { + // convert bgr to rgb and unpremultiply + for (i=0; i < pixel_count; ++i) { + stbi_uc a = p[3]; + stbi_uc t = p[0]; + if (a) { + stbi_uc half = a / 2; + p[0] = (p[2] * 255 + half) / a; + p[1] = (p[1] * 255 + half) / a; + p[2] = ( t * 255 + half) / a; + } else { + p[0] = p[2]; + p[2] = t; + } + p += 4; + } + } else { + // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 4; + } + } + } +} + +#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d)) + +static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) +{ + stbi_uc palette[1024], pal_img_n=0; + stbi_uc has_trans=0, tc[3]; + stbi__uint16 tc16[3]; + stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0; + int first=1,k,interlace=0, color=0, is_iphone=0; + stbi__context *s = z->s; + + z->expanded = NULL; + z->idata = NULL; + z->out = NULL; + + if (!stbi__check_png_header(s)) return 0; + + if (scan == STBI__SCAN_type) return 1; + + for (;;) { + stbi__pngchunk c = stbi__get_chunk_header(s); + switch (c.type) { + case STBI__PNG_TYPE('C','g','B','I'): + is_iphone = 1; + stbi__skip(s, c.length); + break; + case STBI__PNG_TYPE('I','H','D','R'): { + int comp,filter; + if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); + first = 0; + if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); + s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); + s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); + z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); + color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); + comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG"); + filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG"); + interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG"); + if (!pal_img_n) { + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); + if (scan == STBI__SCAN_header) return 1; + } else { + // if paletted, then pal_n is our final components, and + // img_n is # components to decompress/filter. + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); + // if SCAN_header, have to scan to see if we have a tRNS + } + break; + } + + case STBI__PNG_TYPE('P','L','T','E'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG"); + pal_len = c.length / 3; + if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG"); + for (i=0; i < pal_len; ++i) { + palette[i*4+0] = stbi__get8(s); + palette[i*4+1] = stbi__get8(s); + palette[i*4+2] = stbi__get8(s); + palette[i*4+3] = 255; + } + break; + } + + case STBI__PNG_TYPE('t','R','N','S'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG"); + if (pal_img_n) { + if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; } + if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG"); + if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG"); + pal_img_n = 4; + for (i=0; i < c.length; ++i) + palette[i*4+3] = stbi__get8(s); + } else { + if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); + if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); + has_trans = 1; + if (z->depth == 16) { + for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is + } else { + for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger + } + } + break; + } + + case STBI__PNG_TYPE('I','D','A','T'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); + if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; } + if ((int)(ioff + c.length) < (int)ioff) return 0; + if (ioff + c.length > idata_limit) { + stbi__uint32 idata_limit_old = idata_limit; + stbi_uc *p; + if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; + while (ioff + c.length > idata_limit) + idata_limit *= 2; + STBI_NOTUSED(idata_limit_old); + p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); + z->idata = p; + } + if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG"); + ioff += c.length; + break; + } + + case STBI__PNG_TYPE('I','E','N','D'): { + stbi__uint32 raw_len, bpl; + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (scan != STBI__SCAN_load) return 1; + if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); + // initial guess for decoded data size to avoid unnecessary reallocs + bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component + raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; + z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); + if (z->expanded == NULL) return 0; // zlib should set error + STBI_FREE(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; + else + s->img_out_n = s->img_n; + if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0; + if (has_trans) { + if (z->depth == 16) { + if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0; + } else { + if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0; + } + } + if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) + stbi__de_iphone(z); + if (pal_img_n) { + // pal_img_n == 3 or 4 + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) + return 0; + } else if (has_trans) { + // non-paletted image with tRNS -> source image has (constant) alpha + ++s->img_n; + } + STBI_FREE(z->expanded); z->expanded = NULL; + return 1; + } + + default: + // if critical, fail + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if ((c.type & (1 << 29)) == 0) { + #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe + static char invalid_chunk[] = "XXXX PNG chunk not known"; + invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); + invalid_chunk[1] = STBI__BYTECAST(c.type >> 16); + invalid_chunk[2] = STBI__BYTECAST(c.type >> 8); + invalid_chunk[3] = STBI__BYTECAST(c.type >> 0); + #endif + return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type"); + } + stbi__skip(s, c.length); + break; + } + // end of PNG chunk, read and skip CRC + stbi__get32be(s); + } +} + +static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri) +{ + void *result=NULL; + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { + if (p->depth < 8) + ri->bits_per_channel = 8; + else + ri->bits_per_channel = p->depth; + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s->img_out_n) { + if (ri->bits_per_channel == 8) + result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + else + result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + p->s->img_out_n = req_comp; + if (result == NULL) return result; + } + *x = p->s->img_x; + *y = p->s->img_y; + if (n) *n = p->s->img_n; + } + STBI_FREE(p->out); p->out = NULL; + STBI_FREE(p->expanded); p->expanded = NULL; + STBI_FREE(p->idata); p->idata = NULL; + + return result; +} + +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi__png p; + p.s = s; + return stbi__do_png(&p, x,y,comp,req_comp, ri); +} + +static int stbi__png_test(stbi__context *s) +{ + int r; + r = stbi__check_png_header(s); + stbi__rewind(s); + return r; +} + +static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp) +{ + if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) { + stbi__rewind( p->s ); + return 0; + } + if (x) *x = p->s->img_x; + if (y) *y = p->s->img_y; + if (comp) *comp = p->s->img_n; + return 1; +} + +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__png p; + p.s = s; + return stbi__png_info_raw(&p, x, y, comp); +} + +static int stbi__png_is16(stbi__context *s) +{ + stbi__png p; + p.s = s; + if (!stbi__png_info_raw(&p, NULL, NULL, NULL)) + return 0; + if (p.depth != 16) { + stbi__rewind(p.s); + return 0; + } + return 1; +} +#endif + +// Microsoft/Windows BMP image + +#ifndef STBI_NO_BMP +static int stbi__bmp_test_raw(stbi__context *s) +{ + int r; + int sz; + if (stbi__get8(s) != 'B') return 0; + if (stbi__get8(s) != 'M') return 0; + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + stbi__get32le(s); // discard data offset + sz = stbi__get32le(s); + r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124); + return r; +} + +static int stbi__bmp_test(stbi__context *s) +{ + int r = stbi__bmp_test_raw(s); + stbi__rewind(s); + return r; +} + + +// returns 0..31 for the highest set bit +static int stbi__high_bit(unsigned int z) +{ + int n=0; + if (z == 0) return -1; + if (z >= 0x10000) n += 16, z >>= 16; + if (z >= 0x00100) n += 8, z >>= 8; + if (z >= 0x00010) n += 4, z >>= 4; + if (z >= 0x00004) n += 2, z >>= 2; + if (z >= 0x00002) n += 1, z >>= 1; + return n; +} + +static int stbi__bitcount(unsigned int a) +{ + a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 + a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 + a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits + a = (a + (a >> 8)); // max 16 per 8 bits + a = (a + (a >> 16)); // max 32 per 8 bits + return a & 0xff; +} + +// extract an arbitrarily-aligned N-bit value (N=bits) +// from v, and then make it 8-bits long and fractionally +// extend it to full full range. +static int stbi__shiftsigned(int v, int shift, int bits) +{ + static unsigned int mul_table[9] = { + 0, + 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/, + 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/, + }; + static unsigned int shift_table[9] = { + 0, 0,0,1,0,2,4,6,0, + }; + if (shift < 0) + v <<= -shift; + else + v >>= shift; + STBI_ASSERT(v >= 0 && v < 256); + v >>= (8-bits); + STBI_ASSERT(bits >= 0 && bits <= 8); + return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits]; +} + +typedef struct +{ + int bpp, offset, hsz; + unsigned int mr,mg,mb,ma, all_a; +} stbi__bmp_data; + +static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) +{ + int hsz; + if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP"); + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + info->offset = stbi__get32le(s); + info->hsz = hsz = stbi__get32le(s); + info->mr = info->mg = info->mb = info->ma = 0; + + if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown"); + if (hsz == 12) { + s->img_x = stbi__get16le(s); + s->img_y = stbi__get16le(s); + } else { + s->img_x = stbi__get32le(s); + s->img_y = stbi__get32le(s); + } + if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP"); + info->bpp = stbi__get16le(s); + if (hsz != 12) { + int compress = stbi__get32le(s); + if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); + stbi__get32le(s); // discard sizeof + stbi__get32le(s); // discard hres + stbi__get32le(s); // discard vres + stbi__get32le(s); // discard colorsused + stbi__get32le(s); // discard max important + if (hsz == 40 || hsz == 56) { + if (hsz == 56) { + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + } + if (info->bpp == 16 || info->bpp == 32) { + if (compress == 0) { + if (info->bpp == 32) { + info->mr = 0xffu << 16; + info->mg = 0xffu << 8; + info->mb = 0xffu << 0; + info->ma = 0xffu << 24; + info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 + } else { + info->mr = 31u << 10; + info->mg = 31u << 5; + info->mb = 31u << 0; + } + } else if (compress == 3) { + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + // not documented, but generated by photoshop and handled by mspaint + if (info->mr == info->mg && info->mg == info->mb) { + // ?!?!? + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else { + int i; + if (hsz != 108 && hsz != 124) + return stbi__errpuc("bad BMP", "bad BMP"); + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + info->ma = stbi__get32le(s); + stbi__get32le(s); // discard color space + for (i=0; i < 12; ++i) + stbi__get32le(s); // discard color space parameters + if (hsz == 124) { + stbi__get32le(s); // discard rendering intent + stbi__get32le(s); // discard offset of profile data + stbi__get32le(s); // discard size of profile data + stbi__get32le(s); // discard reserved + } + } + } + return (void *) 1; +} + + +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + unsigned int mr=0,mg=0,mb=0,ma=0, all_a; + stbi_uc pal[256][4]; + int psize=0,i,j,width; + int flip_vertically, pad, target; + stbi__bmp_data info; + STBI_NOTUSED(ri); + + info.all_a = 255; + if (stbi__bmp_parse_header(s, &info) == NULL) + return NULL; // error code already set + + flip_vertically = ((int) s->img_y) > 0; + s->img_y = abs((int) s->img_y); + + mr = info.mr; + mg = info.mg; + mb = info.mb; + ma = info.ma; + all_a = info.all_a; + + if (info.hsz == 12) { + if (info.bpp < 24) + psize = (info.offset - 14 - 24) / 3; + } else { + if (info.bpp < 16) + psize = (info.offset - 14 - info.hsz) >> 2; + } + + s->img_n = ma ? 4 : 3; + if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 + target = req_comp; + else + target = s->img_n; // if they want monochrome, we'll post-convert + + // sanity-check size + if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "Corrupt BMP"); + + out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + if (info.bpp < 16) { + int z=0; + if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); } + for (i=0; i < psize; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + if (info.hsz != 12) stbi__get8(s); + pal[i][3] = 255; + } + stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); + if (info.bpp == 1) width = (s->img_x + 7) >> 3; + else if (info.bpp == 4) width = (s->img_x + 1) >> 1; + else if (info.bpp == 8) width = s->img_x; + else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } + pad = (-width)&3; + if (info.bpp == 1) { + for (j=0; j < (int) s->img_y; ++j) { + int bit_offset = 7, v = stbi__get8(s); + for (i=0; i < (int) s->img_x; ++i) { + int color = (v>>bit_offset)&0x1; + out[z++] = pal[color][0]; + out[z++] = pal[color][1]; + out[z++] = pal[color][2]; + if((--bit_offset) < 0) { + bit_offset = 7; + v = stbi__get8(s); + } + } + stbi__skip(s, pad); + } + } else { + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=stbi__get8(s),v2=0; + if (info.bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (info.bpp == 8) ? stbi__get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + stbi__skip(s, pad); + } + } + } else { + int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; + int z = 0; + int easy=0; + stbi__skip(s, info.offset - 14 - info.hsz); + if (info.bpp == 24) width = 3 * s->img_x; + else if (info.bpp == 16) width = 2*s->img_x; + else /* bpp = 32 and pad = 0 */ width=0; + pad = (-width) & 3; + if (info.bpp == 24) { + easy = 1; + } else if (info.bpp == 32) { + if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) + easy = 2; + } + if (!easy) { + if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } + // right shift amt to put high bit in position #7 + rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr); + gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); + bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); + ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); + } + for (j=0; j < (int) s->img_y; ++j) { + if (easy) { + for (i=0; i < (int) s->img_x; ++i) { + unsigned char a; + out[z+2] = stbi__get8(s); + out[z+1] = stbi__get8(s); + out[z+0] = stbi__get8(s); + z += 3; + a = (easy == 2 ? stbi__get8(s) : 255); + all_a |= a; + if (target == 4) out[z++] = a; + } + } else { + int bpp = info.bpp; + for (i=0; i < (int) s->img_x; ++i) { + stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); + unsigned int a; + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); + a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255); + all_a |= a; + if (target == 4) out[z++] = STBI__BYTECAST(a); + } + } + stbi__skip(s, pad); + } + } + + // if alpha channel is all 0s, replace with all 255s + if (target == 4 && all_a == 0) + for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) + out[i] = 255; + + if (flip_vertically) { + stbi_uc t; + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { + t = p1[i], p1[i] = p2[i], p2[i] = t; + } + } + } + + if (req_comp && req_comp != target) { + out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + return out; +} +#endif + +// Targa Truevision - TGA +// by Jonathan Dummer +#ifndef STBI_NO_TGA +// returns STBI_rgb or whatever, 0 on error +static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) +{ + // only RGB or RGBA (incl. 16bit) or grey allowed + if (is_rgb16) *is_rgb16 = 0; + switch(bits_per_pixel) { + case 8: return STBI_grey; + case 16: if(is_grey) return STBI_grey_alpha; + // fallthrough + case 15: if(is_rgb16) *is_rgb16 = 1; + return STBI_rgb; + case 24: // fallthrough + case 32: return bits_per_pixel/8; + default: return 0; + } +} + +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp) +{ + int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp; + int sz, tga_colormap_type; + stbi__get8(s); // discard Offset + tga_colormap_type = stbi__get8(s); // colormap type + if( tga_colormap_type > 1 ) { + stbi__rewind(s); + return 0; // only RGB or indexed allowed + } + tga_image_type = stbi__get8(s); // image type + if ( tga_colormap_type == 1 ) { // colormapped (paletted) image + if (tga_image_type != 1 && tga_image_type != 9) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip image x and y origin + tga_colormap_bpp = sz; + } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE + if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) { + stbi__rewind(s); + return 0; // only RGB or grey allowed, +/- RLE + } + stbi__skip(s,9); // skip colormap specification and image x/y origin + tga_colormap_bpp = 0; + } + tga_w = stbi__get16le(s); + if( tga_w < 1 ) { + stbi__rewind(s); + return 0; // test width + } + tga_h = stbi__get16le(s); + if( tga_h < 1 ) { + stbi__rewind(s); + return 0; // test height + } + tga_bits_per_pixel = stbi__get8(s); // bits per pixel + stbi__get8(s); // ignore alpha bits + if (tga_colormap_bpp != 0) { + if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) { + // when using a colormap, tga_bits_per_pixel is the size of the indexes + // I don't think anything but 8 or 16bit indexes makes sense + stbi__rewind(s); + return 0; + } + tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL); + } else { + tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL); + } + if(!tga_comp) { + stbi__rewind(s); + return 0; + } + if (x) *x = tga_w; + if (y) *y = tga_h; + if (comp) *comp = tga_comp; + return 1; // seems to have passed everything +} + +static int stbi__tga_test(stbi__context *s) +{ + int res = 0; + int sz, tga_color_type; + stbi__get8(s); // discard Offset + tga_color_type = stbi__get8(s); // color type + if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed + sz = stbi__get8(s); // image type + if ( tga_color_type == 1 ) { // colormapped (paletted) image + if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9 + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + stbi__skip(s,4); // skip image x and y origin + } else { // "normal" image w/o colormap + if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE + stbi__skip(s,9); // skip colormap specification and image x/y origin + } + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height + sz = stbi__get8(s); // bits per pixel + if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + + res = 1; // if we got this far, everything's good and we can return 1 instead of 0 + +errorEnd: + stbi__rewind(s); + return res; +} + +// read 16bit value and convert to 24bit RGB +static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) +{ + stbi__uint16 px = (stbi__uint16)stbi__get16le(s); + stbi__uint16 fiveBitMask = 31; + // we have 3 channels with 5bits each + int r = (px >> 10) & fiveBitMask; + int g = (px >> 5) & fiveBitMask; + int b = px & fiveBitMask; + // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later + out[0] = (stbi_uc)((r * 255)/31); + out[1] = (stbi_uc)((g * 255)/31); + out[2] = (stbi_uc)((b * 255)/31); + + // some people claim that the most significant bit might be used for alpha + // (possibly if an alpha-bit is set in the "image descriptor byte") + // but that only made 16bit test images completely translucent.. + // so let's treat all 15 and 16bit TGAs as RGB with no alpha. +} + +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + // read in the TGA header stuff + int tga_offset = stbi__get8(s); + int tga_indexed = stbi__get8(s); + int tga_image_type = stbi__get8(s); + int tga_is_RLE = 0; + int tga_palette_start = stbi__get16le(s); + int tga_palette_len = stbi__get16le(s); + int tga_palette_bits = stbi__get8(s); + int tga_x_origin = stbi__get16le(s); + int tga_y_origin = stbi__get16le(s); + int tga_width = stbi__get16le(s); + int tga_height = stbi__get16le(s); + int tga_bits_per_pixel = stbi__get8(s); + int tga_comp, tga_rgb16=0; + int tga_inverted = stbi__get8(s); + // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?) + // image data + unsigned char *tga_data; + unsigned char *tga_palette = NULL; + int i, j; + unsigned char raw_data[4] = {0}; + int RLE_count = 0; + int RLE_repeating = 0; + int read_next_pixel = 1; + STBI_NOTUSED(ri); + + // do a tiny bit of precessing + if ( tga_image_type >= 8 ) + { + tga_image_type -= 8; + tga_is_RLE = 1; + } + tga_inverted = 1 - ((tga_inverted >> 5) & 1); + + // If I'm paletted, then I'll use the number of bits from the palette + if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16); + else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16); + + if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency + return stbi__errpuc("bad format", "Can't find out TGA pixelformat"); + + // tga info + *x = tga_width; + *y = tga_height; + if (comp) *comp = tga_comp; + + if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) + return stbi__errpuc("too large", "Corrupt TGA"); + + tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0); + if (!tga_data) return stbi__errpuc("outofmem", "Out of memory"); + + // skip to the data's starting position (offset usually = 0) + stbi__skip(s, tga_offset ); + + if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) { + for (i=0; i < tga_height; ++i) { + int row = tga_inverted ? tga_height -i - 1 : i; + stbi_uc *tga_row = tga_data + row*tga_width*tga_comp; + stbi__getn(s, tga_row, tga_width * tga_comp); + } + } else { + // do I need to load a palette? + if ( tga_indexed) + { + // any data to skip? (offset usually = 0) + stbi__skip(s, tga_palette_start ); + // load the palette + tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); + if (!tga_palette) { + STBI_FREE(tga_data); + return stbi__errpuc("outofmem", "Out of memory"); + } + if (tga_rgb16) { + stbi_uc *pal_entry = tga_palette; + STBI_ASSERT(tga_comp == STBI_rgb); + for (i=0; i < tga_palette_len; ++i) { + stbi__tga_read_rgb16(s, pal_entry); + pal_entry += tga_comp; + } + } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) { + STBI_FREE(tga_data); + STBI_FREE(tga_palette); + return stbi__errpuc("bad palette", "Corrupt TGA"); + } + } + // load the data + for (i=0; i < tga_width * tga_height; ++i) + { + // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk? + if ( tga_is_RLE ) + { + if ( RLE_count == 0 ) + { + // yep, get the next byte as a RLE command + int RLE_cmd = stbi__get8(s); + RLE_count = 1 + (RLE_cmd & 127); + RLE_repeating = RLE_cmd >> 7; + read_next_pixel = 1; + } else if ( !RLE_repeating ) + { + read_next_pixel = 1; + } + } else + { + read_next_pixel = 1; + } + // OK, if I need to read a pixel, do it now + if ( read_next_pixel ) + { + // load however much data we did have + if ( tga_indexed ) + { + // read in index, then perform the lookup + int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s); + if ( pal_idx >= tga_palette_len ) { + // invalid index + pal_idx = 0; + } + pal_idx *= tga_comp; + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = tga_palette[pal_idx+j]; + } + } else if(tga_rgb16) { + STBI_ASSERT(tga_comp == STBI_rgb); + stbi__tga_read_rgb16(s, raw_data); + } else { + // read in the data raw + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = stbi__get8(s); + } + } + // clear the reading flag for the next pixel + read_next_pixel = 0; + } // end of reading a pixel + + // copy data + for (j = 0; j < tga_comp; ++j) + tga_data[i*tga_comp+j] = raw_data[j]; + + // in case we're in RLE mode, keep counting down + --RLE_count; + } + // do I need to invert the image? + if ( tga_inverted ) + { + for (j = 0; j*2 < tga_height; ++j) + { + int index1 = j * tga_width * tga_comp; + int index2 = (tga_height - 1 - j) * tga_width * tga_comp; + for (i = tga_width * tga_comp; i > 0; --i) + { + unsigned char temp = tga_data[index1]; + tga_data[index1] = tga_data[index2]; + tga_data[index2] = temp; + ++index1; + ++index2; + } + } + } + // clear my palette, if I had one + if ( tga_palette != NULL ) + { + STBI_FREE( tga_palette ); + } + } + + // swap RGB - if the source data was RGB16, it already is in the right order + if (tga_comp >= 3 && !tga_rgb16) + { + unsigned char* tga_pixel = tga_data; + for (i=0; i < tga_width * tga_height; ++i) + { + unsigned char temp = tga_pixel[0]; + tga_pixel[0] = tga_pixel[2]; + tga_pixel[2] = temp; + tga_pixel += tga_comp; + } + } + + // convert to target component count + if (req_comp && req_comp != tga_comp) + tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height); + + // the things I do to get rid of an error message, and yet keep + // Microsoft's C compilers happy... [8^( + tga_palette_start = tga_palette_len = tga_palette_bits = + tga_x_origin = tga_y_origin = 0; + // OK, done + return tga_data; +} +#endif + +// ************************************************************************************************* +// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s) +{ + int r = (stbi__get32be(s) == 0x38425053); + stbi__rewind(s); + return r; +} + +static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount) +{ + int count, nleft, len; + + count = 0; + while ((nleft = pixelCount - count) > 0) { + len = stbi__get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + if (len > nleft) return 0; // corrupt data + count += len; + while (len) { + *p = stbi__get8(s); + p += 4; + len--; + } + } else if (len > 128) { + stbi_uc val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len = 257 - len; + if (len > nleft) return 0; // corrupt data + val = stbi__get8(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } + } + + return 1; +} + +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + int pixelCount; + int channelCount, compression; + int channel, i; + int bitdepth; + int w,h; + stbi_uc *out; + STBI_NOTUSED(ri); + + // Check identifier + if (stbi__get32be(s) != 0x38425053) // "8BPS" + return stbi__errpuc("not PSD", "Corrupt PSD image"); + + // Check file type version. + if (stbi__get16be(s) != 1) + return stbi__errpuc("wrong version", "Unsupported version of PSD image"); + + // Skip 6 reserved bytes. + stbi__skip(s, 6 ); + + // Read the number of channels (R, G, B, A, etc). + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) + return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image"); + + // Read the rows and columns of the image. + h = stbi__get32be(s); + w = stbi__get32be(s); + + // Make sure the depth is 8 bits. + bitdepth = stbi__get16be(s); + if (bitdepth != 8 && bitdepth != 16) + return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit"); + + // Make sure the color mode is RGB. + // Valid options are: + // 0: Bitmap + // 1: Grayscale + // 2: Indexed color + // 3: RGB color + // 4: CMYK color + // 7: Multichannel + // 8: Duotone + // 9: Lab color + if (stbi__get16be(s) != 3) + return stbi__errpuc("wrong color format", "PSD is not in RGB color format"); + + // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) + stbi__skip(s,stbi__get32be(s) ); + + // Skip the image resources. (resolution, pen tool paths, etc) + stbi__skip(s, stbi__get32be(s) ); + + // Skip the reserved data. + stbi__skip(s, stbi__get32be(s) ); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + compression = stbi__get16be(s); + if (compression > 1) + return stbi__errpuc("bad compression", "PSD has an unknown compression format"); + + // Check size + if (!stbi__mad3sizes_valid(4, w, h, 0)) + return stbi__errpuc("too large", "Corrupt PSD"); + + // Create the destination image. + + if (!compression && bitdepth == 16 && bpc == 16) { + out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); + ri->bits_per_channel = 16; + } else + out = (stbi_uc *) stbi__malloc(4 * w*h); + + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + pixelCount = w*h; + + // Initialize the data to zero. + //memset( out, 0, pixelCount * 4 ); + + // Finally, the image data. + if (compression) { + // RLE as used by .PSD and .TIFF + // Loop until you get the number of unpacked bytes you are expecting: + // Read the next source byte into n. + // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. + // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. + // Else if n is 128, noop. + // Endloop + + // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data, + // which we're going to just skip. + stbi__skip(s, h * channelCount * 2 ); + + // Read the RLE data by channel. + for (channel = 0; channel < 4; channel++) { + stbi_uc *p; + + p = out+channel; + if (channel >= channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++, p += 4) + *p = (channel == 3 ? 255 : 0); + } else { + // Read the RLE data. + if (!stbi__psd_decode_rle(s, p, pixelCount)) { + STBI_FREE(out); + return stbi__errpuc("corrupt", "bad RLE data"); + } + } + } + + } else { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. + + // Read the data by channel. + for (channel = 0; channel < 4; channel++) { + if (channel >= channelCount) { + // Fill this channel with default data. + if (bitdepth == 16 && bpc == 16) { + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + stbi__uint16 val = channel == 3 ? 65535 : 0; + for (i = 0; i < pixelCount; i++, q += 4) + *q = val; + } else { + stbi_uc *p = out+channel; + stbi_uc val = channel == 3 ? 255 : 0; + for (i = 0; i < pixelCount; i++, p += 4) + *p = val; + } + } else { + if (ri->bits_per_channel == 16) { // output bpc + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + for (i = 0; i < pixelCount; i++, q += 4) + *q = (stbi__uint16) stbi__get16be(s); + } else { + stbi_uc *p = out+channel; + if (bitdepth == 16) { // input bpc + for (i = 0; i < pixelCount; i++, p += 4) + *p = (stbi_uc) (stbi__get16be(s) >> 8); + } else { + for (i = 0; i < pixelCount; i++, p += 4) + *p = stbi__get8(s); + } + } + } + } + } + + // remove weird white matte from PSD + if (channelCount >= 4) { + if (ri->bits_per_channel == 16) { + for (i=0; i < w*h; ++i) { + stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; + if (pixel[3] != 0 && pixel[3] != 65535) { + float a = pixel[3] / 65535.0f; + float ra = 1.0f / a; + float inv_a = 65535.0f * (1 - ra); + pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); + pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); + pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); + } + } + } else { + for (i=0; i < w*h; ++i) { + unsigned char *pixel = out + 4*i; + if (pixel[3] != 0 && pixel[3] != 255) { + float a = pixel[3] / 255.0f; + float ra = 1.0f / a; + float inv_a = 255.0f * (1 - ra); + pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); + pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); + pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); + } + } + } + } + + // convert to desired output format + if (req_comp && req_comp != 4) { + if (ri->bits_per_channel == 16) + out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); + else + out = stbi__convert_format(out, 4, req_comp, w, h); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + if (comp) *comp = 4; + *y = h; + *x = w; + + return out; +} +#endif + +// ************************************************************************************************* +// Softimage PIC loader +// by Tom Seddon +// +// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format +// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/ + +#ifndef STBI_NO_PIC +static int stbi__pic_is4(stbi__context *s,const char *str) +{ + int i; + for (i=0; i<4; ++i) + if (stbi__get8(s) != (stbi_uc)str[i]) + return 0; + + return 1; +} + +static int stbi__pic_test_core(stbi__context *s) +{ + int i; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) + return 0; + + for(i=0;i<84;++i) + stbi__get8(s); + + if (!stbi__pic_is4(s,"PICT")) + return 0; + + return 1; +} + +typedef struct +{ + stbi_uc size,type,channel; +} stbi__pic_packet; + +static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest) +{ + int mask=0x80, i; + + for (i=0; i<4; ++i, mask>>=1) { + if (channel & mask) { + if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short"); + dest[i]=stbi__get8(s); + } + } + + return dest; +} + +static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src) +{ + int mask=0x80,i; + + for (i=0;i<4; ++i, mask>>=1) + if (channel&mask) + dest[i]=src[i]; +} + +static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result) +{ + int act_comp=0,num_packets=0,y,chained; + stbi__pic_packet packets[10]; + + // this will (should...) cater for even some bizarre stuff like having data + // for the same channel in multiple packets. + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return stbi__errpuc("bad format","too many packets"); + + packet = &packets[num_packets++]; + + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + + act_comp |= packet->channel; + + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)"); + if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp"); + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? + + for(y=0; ytype) { + default: + return stbi__errpuc("bad format","packet has bad compression type"); + + case 0: {//uncompressed + int x; + + for(x=0;xchannel,dest)) + return 0; + break; + } + + case 1://Pure RLE + { + int left=width, i; + + while (left>0) { + stbi_uc count,value[4]; + + count=stbi__get8(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)"); + + if (count > left) + count = (stbi_uc) left; + + if (!stbi__readval(s,packet->channel,value)) return 0; + + for(i=0; ichannel,dest,value); + left -= count; + } + } + break; + + case 2: {//Mixed RLE + int left=width; + while (left>0) { + int count = stbi__get8(s), i; + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)"); + + if (count >= 128) { // Repeated + stbi_uc value[4]; + + if (count==128) + count = stbi__get16be(s); + else + count -= 127; + if (count > left) + return stbi__errpuc("bad file","scanline overrun"); + + if (!stbi__readval(s,packet->channel,value)) + return 0; + + for(i=0;ichannel,dest,value); + } else { // Raw + ++count; + if (count>left) return stbi__errpuc("bad file","scanline overrun"); + + for(i=0;ichannel,dest)) + return 0; + } + left-=count; + } + break; + } + } + } + } + + return result; +} + +static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri) +{ + stbi_uc *result; + int i, x,y, internal_comp; + STBI_NOTUSED(ri); + + if (!comp) comp = &internal_comp; + + for (i=0; i<92; ++i) + stbi__get8(s); + + x = stbi__get16be(s); + y = stbi__get16be(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)"); + if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode"); + + stbi__get32be(s); //skip `ratio' + stbi__get16be(s); //skip `fields' + stbi__get16be(s); //skip `pad' + + // intermediate buffer is RGBA + result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0); + memset(result, 0xff, x*y*4); + + if (!stbi__pic_load_core(s,x,y,comp, result)) { + STBI_FREE(result); + result=0; + } + *px = x; + *py = y; + if (req_comp == 0) req_comp = *comp; + result=stbi__convert_format(result,4,req_comp,x,y); + + return result; +} + +static int stbi__pic_test(stbi__context *s) +{ + int r = stbi__pic_test_core(s); + stbi__rewind(s); + return r; +} +#endif + +// ************************************************************************************************* +// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb + +#ifndef STBI_NO_GIF +typedef struct +{ + stbi__int16 prefix; + stbi_uc first; + stbi_uc suffix; +} stbi__gif_lzw; + +typedef struct +{ + int w,h; + stbi_uc *out; // output buffer (always 4 components) + stbi_uc *background; // The current "background" as far as a gif is concerned + stbi_uc *history; + int flags, bgindex, ratio, transparent, eflags; + stbi_uc pal[256][4]; + stbi_uc lpal[256][4]; + stbi__gif_lzw codes[8192]; + stbi_uc *color_table; + int parse, step; + int lflags; + int start_x, start_y; + int max_x, max_y; + int cur_x, cur_y; + int line_size; + int delay; +} stbi__gif; + +static int stbi__gif_test_raw(stbi__context *s) +{ + int sz; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0; + sz = stbi__get8(s); + if (sz != '9' && sz != '7') return 0; + if (stbi__get8(s) != 'a') return 0; + return 1; +} + +static int stbi__gif_test(stbi__context *s) +{ + int r = stbi__gif_test_raw(s); + stbi__rewind(s); + return r; +} + +static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp) +{ + int i; + for (i=0; i < num_entries; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + pal[i][3] = transp == i ? 0 : 255; + } +} + +static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info) +{ + stbi_uc version; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') + return stbi__err("not GIF", "Corrupt GIF"); + + version = stbi__get8(s); + if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF"); + if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF"); + + stbi__g_failure_reason = ""; + g->w = stbi__get16le(s); + g->h = stbi__get16le(s); + g->flags = stbi__get8(s); + g->bgindex = stbi__get8(s); + g->ratio = stbi__get8(s); + g->transparent = -1; + + if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments + + if (is_info) return 1; + + if (g->flags & 0x80) + stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); + + return 1; +} + +static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); + if (!stbi__gif_header(s, g, comp, 1)) { + STBI_FREE(g); + stbi__rewind( s ); + return 0; + } + if (x) *x = g->w; + if (y) *y = g->h; + STBI_FREE(g); + return 1; +} + +static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) +{ + stbi_uc *p, *c; + int idx; + + // recurse to decode the prefixes, since the linked-list is backwards, + // and working backwards through an interleaved image would be nasty + if (g->codes[code].prefix >= 0) + stbi__out_gif_code(g, g->codes[code].prefix); + + if (g->cur_y >= g->max_y) return; + + idx = g->cur_x + g->cur_y; + p = &g->out[idx]; + g->history[idx / 4] = 1; + + c = &g->color_table[g->codes[code].suffix * 4]; + if (c[3] > 128) { // don't render transparent pixels; + p[0] = c[2]; + p[1] = c[1]; + p[2] = c[0]; + p[3] = c[3]; + } + g->cur_x += 4; + + if (g->cur_x >= g->max_x) { + g->cur_x = g->start_x; + g->cur_y += g->step; + + while (g->cur_y >= g->max_y && g->parse > 0) { + g->step = (1 << g->parse) * g->line_size; + g->cur_y = g->start_y + (g->step >> 1); + --g->parse; + } + } +} + +static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) +{ + stbi_uc lzw_cs; + stbi__int32 len, init_code; + stbi__uint32 first; + stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear; + stbi__gif_lzw *p; + + lzw_cs = stbi__get8(s); + if (lzw_cs > 12) return NULL; + clear = 1 << lzw_cs; + first = 1; + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + bits = 0; + valid_bits = 0; + for (init_code = 0; init_code < clear; init_code++) { + g->codes[init_code].prefix = -1; + g->codes[init_code].first = (stbi_uc) init_code; + g->codes[init_code].suffix = (stbi_uc) init_code; + } + + // support no starting clear code + avail = clear+2; + oldcode = -1; + + len = 0; + for(;;) { + if (valid_bits < codesize) { + if (len == 0) { + len = stbi__get8(s); // start new block + if (len == 0) + return g->out; + } + --len; + bits |= (stbi__int32) stbi__get8(s) << valid_bits; + valid_bits += 8; + } else { + stbi__int32 code = bits & codemask; + bits >>= codesize; + valid_bits -= codesize; + // @OPTIMIZE: is there some way we can accelerate the non-clear path? + if (code == clear) { // clear code + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + avail = clear + 2; + oldcode = -1; + first = 0; + } else if (code == clear + 1) { // end of stream code + stbi__skip(s, len); + while ((len = stbi__get8(s)) > 0) + stbi__skip(s,len); + return g->out; + } else if (code <= avail) { + if (first) { + return stbi__errpuc("no clear code", "Corrupt GIF"); + } + + if (oldcode >= 0) { + p = &g->codes[avail++]; + if (avail > 8192) { + return stbi__errpuc("too many codes", "Corrupt GIF"); + } + + p->prefix = (stbi__int16) oldcode; + p->first = g->codes[oldcode].first; + p->suffix = (code == avail) ? p->first : g->codes[code].first; + } else if (code == avail) + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + + stbi__out_gif_code(g, (stbi__uint16) code); + + if ((avail & codemask) == 0 && avail <= 0x0FFF) { + codesize++; + codemask = (1 << codesize) - 1; + } + + oldcode = code; + } else { + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + } + } + } +} + +// this function is designed to support animated gifs, although stb_image doesn't support it +// two back is the image from two frames ago, used for a very specific disposal format +static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back) +{ + int dispose; + int first_frame; + int pi; + int pcount; + + // on first frame, any non-written pixels get the background colour (non-transparent) + first_frame = 0; + if (g->out == 0) { + if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header + g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h); + g->background = (stbi_uc *) stbi__malloc(4 * g->w * g->h); + g->history = (stbi_uc *) stbi__malloc(g->w * g->h); + if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory"); + + // image is treated as "tranparent" at the start - ie, nothing overwrites the current background; + // background colour is only used for pixels that are not rendered first frame, after that "background" + // color refers to teh color that was there the previous frame. + memset( g->out, 0x00, 4 * g->w * g->h ); + memset( g->background, 0x00, 4 * g->w * g->h ); // state of the background (starts transparent) + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + first_frame = 1; + } else { + // second frame - how do we dispoase of the previous one? + dispose = (g->eflags & 0x1C) >> 2; + pcount = g->w * g->h; + + if ((dispose == 3) && (two_back == 0)) { + dispose = 2; // if I don't have an image to revert back to, default to the old background + } + + if (dispose == 3) { // use previous graphic + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 ); + } + } + } else if (dispose == 2) { + // restore what was changed last frame to background before that frame; + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 ); + } + } + } else { + // This is a non-disposal case eithe way, so just + // leave the pixels as is, and they will become the new background + // 1: do not dispose + // 0: not specified. + } + + // background is what out is after the undoing of the previou frame; + memcpy( g->background, g->out, 4 * g->w * g->h ); + } + + // clear my history; + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + + for (;;) { + int tag = stbi__get8(s); + switch (tag) { + case 0x2C: /* Image Descriptor */ + { + stbi__int32 x, y, w, h; + stbi_uc *o; + + x = stbi__get16le(s); + y = stbi__get16le(s); + w = stbi__get16le(s); + h = stbi__get16le(s); + if (((x + w) > (g->w)) || ((y + h) > (g->h))) + return stbi__errpuc("bad Image Descriptor", "Corrupt GIF"); + + g->line_size = g->w * 4; + g->start_x = x * 4; + g->start_y = y * g->line_size; + g->max_x = g->start_x + w * 4; + g->max_y = g->start_y + h * g->line_size; + g->cur_x = g->start_x; + g->cur_y = g->start_y; + + g->lflags = stbi__get8(s); + + if (g->lflags & 0x40) { + g->step = 8 * g->line_size; // first interlaced spacing + g->parse = 3; + } else { + g->step = g->line_size; + g->parse = 0; + } + + if (g->lflags & 0x80) { + stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); + g->color_table = (stbi_uc *) g->lpal; + } else if (g->flags & 0x80) { + g->color_table = (stbi_uc *) g->pal; + } else + return stbi__errpuc("missing color table", "Corrupt GIF"); + + o = stbi__process_gif_raster(s, g); + if (o == NULL) return NULL; + + // if this was the first frame, + pcount = g->w * g->h; + if (first_frame && (g->bgindex > 0)) { + // if first frame, any pixel not drawn to gets the background color + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi] == 0) { + g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; + memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 ); + } + } + } + + return o; + } + + case 0x21: // Comment Extension. + { + int len; + int ext = stbi__get8(s); + if (ext == 0xF9) { // Graphic Control Extension. + len = stbi__get8(s); + if (len == 4) { + g->eflags = stbi__get8(s); + g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths. + + // unset old transparent + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 255; + } + if (g->eflags & 0x01) { + g->transparent = stbi__get8(s); + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 0; + } + } else { + // don't need transparent + stbi__skip(s, 1); + g->transparent = -1; + } + } else { + stbi__skip(s, len); + break; + } + } + while ((len = stbi__get8(s)) != 0) { + stbi__skip(s, len); + } + break; + } + + case 0x3B: // gif stream termination code + return (stbi_uc *) s; // using '1' causes warning on some compilers + + default: + return stbi__errpuc("unknown code", "Corrupt GIF"); + } + } +} + +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + if (stbi__gif_test(s)) { + int layers = 0; + stbi_uc *u = 0; + stbi_uc *out = 0; + stbi_uc *two_back = 0; + stbi__gif g; + int stride; + memset(&g, 0, sizeof(g)); + if (delays) { + *delays = 0; + } + + do { + u = stbi__gif_load_next(s, &g, comp, req_comp, two_back); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + + if (u) { + *x = g.w; + *y = g.h; + ++layers; + stride = g.w * g.h * 4; + + if (out) { + out = (stbi_uc*) STBI_REALLOC( out, layers * stride ); + if (delays) { + *delays = (int*) STBI_REALLOC( *delays, sizeof(int) * layers ); + } + } else { + out = (stbi_uc*)stbi__malloc( layers * stride ); + if (delays) { + *delays = (int*) stbi__malloc( layers * sizeof(int) ); + } + } + memcpy( out + ((layers - 1) * stride), u, stride ); + if (layers >= 2) { + two_back = out - 2 * stride; + } + + if (delays) { + (*delays)[layers - 1U] = g.delay; + } + } + } while (u != 0); + + // free temp buffer; + STBI_FREE(g.out); + STBI_FREE(g.history); + STBI_FREE(g.background); + + // do the final conversion after loading everything; + if (req_comp && req_comp != 4) + out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h); + + *z = layers; + return out; + } else { + return stbi__errpuc("not GIF", "Image was not as a gif type."); + } +} + +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *u = 0; + stbi__gif g; + memset(&g, 0, sizeof(g)); + + u = stbi__gif_load_next(s, &g, comp, req_comp, 0); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + if (u) { + *x = g.w; + *y = g.h; + + // moved conversion to after successful load so that the same + // can be done for multiple frames. + if (req_comp && req_comp != 4) + u = stbi__convert_format(u, 4, req_comp, g.w, g.h); + } + + // free buffers needed for multiple frame loading; + STBI_FREE(g.history); + STBI_FREE(g.background); + + return u; +} + +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp) +{ + return stbi__gif_info_raw(s,x,y,comp); +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR loader +// originally by Nicolas Schulz +#ifndef STBI_NO_HDR +static int stbi__hdr_test_core(stbi__context *s, const char *signature) +{ + int i; + for (i=0; signature[i]; ++i) + if (stbi__get8(s) != signature[i]) + return 0; + stbi__rewind(s); + return 1; +} + +static int stbi__hdr_test(stbi__context* s) +{ + int r = stbi__hdr_test_core(s, "#?RADIANCE\n"); + stbi__rewind(s); + if(!r) { + r = stbi__hdr_test_core(s, "#?RGBE\n"); + stbi__rewind(s); + } + return r; +} + +#define STBI__HDR_BUFLEN 1024 +static char *stbi__hdr_gettoken(stbi__context *z, char *buffer) +{ + int len=0; + char c = '\0'; + + c = (char) stbi__get8(z); + + while (!stbi__at_eof(z) && c != '\n') { + buffer[len++] = c; + if (len == STBI__HDR_BUFLEN-1) { + // flush to end of line + while (!stbi__at_eof(z) && stbi__get8(z) != '\n') + ; + break; + } + c = (char) stbi__get8(z); + } + + buffer[len] = 0; + return buffer; +} + +static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp) +{ + if ( input[3] != 0 ) { + float f1; + // Exponent + f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); + if (req_comp <= 2) + output[0] = (input[0] + input[1] + input[2]) * f1 / 3; + else { + output[0] = input[0] * f1; + output[1] = input[1] * f1; + output[2] = input[2] * f1; + } + if (req_comp == 2) output[1] = 1; + if (req_comp == 4) output[3] = 1; + } else { + switch (req_comp) { + case 4: output[3] = 1; /* fallthrough */ + case 3: output[0] = output[1] = output[2] = 0; + break; + case 2: output[1] = 1; /* fallthrough */ + case 1: output[0] = 0; + break; + } + } +} + +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int width, height; + stbi_uc *scanline; + float *hdr_data; + int len; + unsigned char count, value; + int i, j, k, c1,c2, z; + const char *headerToken; + STBI_NOTUSED(ri); + + // Check identifier + headerToken = stbi__hdr_gettoken(s,buffer); + if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0) + return stbi__errpf("not HDR", "Corrupt HDR image"); + + // Parse header + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format"); + + // Parse width and height + // can't use sscanf() if we're not using stdio! + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + height = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + width = (int) strtol(token, NULL, 10); + + *x = width; + *y = height; + + if (comp) *comp = 3; + if (req_comp == 0) req_comp = 3; + + if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) + return stbi__errpf("too large", "HDR image is too large"); + + // Read data + hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0); + if (!hdr_data) + return stbi__errpf("outofmem", "Out of memory"); + + // Load image data + // image data is stored as some number of sca + if ( width < 8 || width >= 32768) { + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + stbi_uc rgbe[4]; + main_decode_loop: + stbi__getn(s, rgbe, 4); + stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); + } + } + } else { + // Read RLE-encoded data + scanline = NULL; + + for (j = 0; j < height; ++j) { + c1 = stbi__get8(s); + c2 = stbi__get8(s); + len = stbi__get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + stbi_uc rgbe[4]; + rgbe[0] = (stbi_uc) c1; + rgbe[1] = (stbi_uc) c2; + rgbe[2] = (stbi_uc) len; + rgbe[3] = (stbi_uc) stbi__get8(s); + stbi__hdr_convert(hdr_data, rgbe, req_comp); + i = 1; + j = 0; + STBI_FREE(scanline); + goto main_decode_loop; // yes, this makes no sense + } + len <<= 8; + len |= stbi__get8(s); + if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } + if (scanline == NULL) { + scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); + if (!scanline) { + STBI_FREE(hdr_data); + return stbi__errpf("outofmem", "Out of memory"); + } + } + + for (k = 0; k < 4; ++k) { + int nleft; + i = 0; + while ((nleft = width - i) > 0) { + count = stbi__get8(s); + if (count > 128) { + // Run + value = stbi__get8(s); + count -= 128; + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = stbi__get8(s); + } + } + } + for (i=0; i < width; ++i) + stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); + } + if (scanline) + STBI_FREE(scanline); + } + + return hdr_data; +} + +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int dummy; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (stbi__hdr_test(s) == 0) { + stbi__rewind( s ); + return 0; + } + + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) { + stbi__rewind( s ); + return 0; + } + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *y = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *x = (int) strtol(token, NULL, 10); + *comp = 3; + return 1; +} +#endif // STBI_NO_HDR + +#ifndef STBI_NO_BMP +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) +{ + void *p; + stbi__bmp_data info; + + info.all_a = 255; + p = stbi__bmp_parse_header(s, &info); + stbi__rewind( s ); + if (p == NULL) + return 0; + if (x) *x = s->img_x; + if (y) *y = s->img_y; + if (comp) *comp = info.ma ? 4 : 3; + return 1; +} +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) +{ + int channelCount, dummy, depth; + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + *y = stbi__get32be(s); + *x = stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 8 && depth != 16) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 3) { + stbi__rewind( s ); + return 0; + } + *comp = 4; + return 1; +} + +static int stbi__psd_is16(stbi__context *s) +{ + int channelCount, depth; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + (void) stbi__get32be(s); + (void) stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 16) { + stbi__rewind( s ); + return 0; + } + return 1; +} +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) +{ + int act_comp=0,num_packets=0,chained,dummy; + stbi__pic_packet packets[10]; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) { + stbi__rewind(s); + return 0; + } + + stbi__skip(s, 88); + + *x = stbi__get16be(s); + *y = stbi__get16be(s); + if (stbi__at_eof(s)) { + stbi__rewind( s); + return 0; + } + if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) { + stbi__rewind( s ); + return 0; + } + + stbi__skip(s, 8); + + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return 0; + + packet = &packets[num_packets++]; + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + act_comp |= packet->channel; + + if (stbi__at_eof(s)) { + stbi__rewind( s ); + return 0; + } + if (packet->size != 8) { + stbi__rewind( s ); + return 0; + } + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); + + return 1; +} +#endif + +// ************************************************************************************************* +// Portable Gray Map and Portable Pixel Map loader +// by Ken Miller +// +// PGM: http://netpbm.sourceforge.net/doc/pgm.html +// PPM: http://netpbm.sourceforge.net/doc/ppm.html +// +// Known limitations: +// Does not support comments in the header section +// Does not support ASCII image data (formats P2 and P3) +// Does not support 16-bit-per-channel + +#ifndef STBI_NO_PNM + +static int stbi__pnm_test(stbi__context *s) +{ + char p, t; + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind( s ); + return 0; + } + return 1; +} + +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + STBI_NOTUSED(ri); + + if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n)) + return 0; + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + + if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "PNM too large"); + + out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + stbi__getn(s, out, s->img_n * s->img_x * s->img_y); + + if (req_comp && req_comp != s->img_n) { + out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + return out; +} + +static int stbi__pnm_isspace(char c) +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; +} + +static void stbi__pnm_skip_whitespace(stbi__context *s, char *c) +{ + for (;;) { + while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) + *c = (char) stbi__get8(s); + + if (stbi__at_eof(s) || *c != '#') + break; + + while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' ) + *c = (char) stbi__get8(s); + } +} + +static int stbi__pnm_isdigit(char c) +{ + return c >= '0' && c <= '9'; +} + +static int stbi__pnm_getinteger(stbi__context *s, char *c) +{ + int value = 0; + + while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { + value = value*10 + (*c - '0'); + *c = (char) stbi__get8(s); + } + + return value; +} + +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) +{ + int maxv, dummy; + char c, p, t; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + stbi__rewind(s); + + // Get identifier + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind(s); + return 0; + } + + *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm + + c = (char) stbi__get8(s); + stbi__pnm_skip_whitespace(s, &c); + + *x = stbi__pnm_getinteger(s, &c); // read width + stbi__pnm_skip_whitespace(s, &c); + + *y = stbi__pnm_getinteger(s, &c); // read height + stbi__pnm_skip_whitespace(s, &c); + + maxv = stbi__pnm_getinteger(s, &c); // read max value + + if (maxv > 255) + return stbi__err("max value > 255", "PPM image not 8-bit"); + else + return 1; +} +#endif + +static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) +{ + #ifndef STBI_NO_JPEG + if (stbi__jpeg_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNG + if (stbi__png_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_GIF + if (stbi__gif_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_BMP + if (stbi__bmp_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PIC + if (stbi__pic_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNM + if (stbi__pnm_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_info(s, x, y, comp)) return 1; + #endif + + // test tga last because it's a crappy test! + #ifndef STBI_NO_TGA + if (stbi__tga_info(s, x, y, comp)) + return 1; + #endif + return stbi__err("unknown image type", "Image not of any known type, or corrupt"); +} + +static int stbi__is_16_main(stbi__context *s) +{ + #ifndef STBI_NO_PNG + if (stbi__png_is16(s)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_is16(s)) return 1; + #endif + + return 0; +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_info_from_file(f, x, y, comp); + fclose(f); + return result; +} + +STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__info_main(&s,x,y,comp); + fseek(f,pos,SEEK_SET); + return r; +} + +STBIDEF int stbi_is_16_bit(char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_is_16_bit_from_file(f); + fclose(f); + return result; +} + +STBIDEF int stbi_is_16_bit_from_file(FILE *f) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__is_16_main(&s); + fseek(f,pos,SEEK_SET); + return r; +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__is_16_main(&s); +} + +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__is_16_main(&s); +} + +#endif // STB_IMAGE_IMPLEMENTATION + +/* + revision history: + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug + 1-bit BMP + *_is_16_bit api + avoid warnings + 2.16 (2017-07-23) all functions have 16-bit variants; + STBI_NO_STDIO works again; + compilation fixes; + fix rounding in unpremultiply; + optimize vertical flip; + disable raw_len validation; + documentation fixes + 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; + warning fixes; disable run-time SSE detection on gcc; + uniform handling of optional "return" values; + thread-safe initialization of zlib tables + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) allocate large structures on the stack + remove white matting for transparent PSD + fix reported channel count for PNG & BMP + re-enable SSE2 in non-gcc 64-bit + support RGB-formatted JPEG + read 16-bit PNGs (only as 8-bit) + 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED + 2.09 (2016-01-16) allow comments in PNM files + 16-bit-per-pixel TGA (not bit-per-component) + info() for TGA could break due to .hdr handling + info() for BMP to shares code instead of sloppy parse + can use STBI_REALLOC_SIZED if allocator doesn't support realloc + code cleanup + 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA + 2.07 (2015-09-13) fix compiler warnings + partial animated GIF support + limited 16-bpc PSD support + #ifdef unused functions + bug with < 92 byte PIC,PNM,HDR,TGA + 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value + 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning + 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit + 2.03 (2015-04-12) extra corruption checking (mmozeiko) + stbi_set_flip_vertically_on_load (nguillemot) + fix NEON support; fix mingw support + 2.02 (2015-01-19) fix incorrect assert, fix warning + 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2 + 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG + 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) + progressive JPEG (stb) + PGM/PPM support (Ken Miller) + STBI_MALLOC,STBI_REALLOC,STBI_FREE + GIF bugfix -- seemingly never worked + STBI_NO_*, STBI_ONLY_* + 1.48 (2014-12-14) fix incorrectly-named assert() + 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb) + optimize PNG (ryg) + fix bug in interlaced PNG with user-specified channel count (stb) + 1.46 (2014-08-26) + fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG + 1.45 (2014-08-16) + fix MSVC-ARM internal compiler error by wrapping malloc + 1.44 (2014-08-07) + various warning fixes from Ronny Chevalier + 1.43 (2014-07-15) + fix MSVC-only compiler problem in code changed in 1.42 + 1.42 (2014-07-09) + don't define _CRT_SECURE_NO_WARNINGS (affects user code) + fixes to stbi__cleanup_jpeg path + added STBI_ASSERT to avoid requiring assert.h + 1.41 (2014-06-25) + fix search&replace from 1.36 that messed up comments/error messages + 1.40 (2014-06-22) + fix gcc struct-initialization warning + 1.39 (2014-06-15) + fix to TGA optimization when req_comp != number of components in TGA; + fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite) + add support for BMP version 5 (more ignored fields) + 1.38 (2014-06-06) + suppress MSVC warnings on integer casts truncating values + fix accidental rename of 'skip' field of I/O + 1.37 (2014-06-04) + remove duplicate typedef + 1.36 (2014-06-03) + convert to header file single-file library + if de-iphone isn't set, load iphone images color-swapped instead of returning NULL + 1.35 (2014-05-27) + various warnings + fix broken STBI_SIMD path + fix bug where stbi_load_from_file no longer left file pointer in correct place + fix broken non-easy path for 32-bit BMP (possibly never used) + TGA optimization by Arseny Kapoulkine + 1.34 (unknown) + use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case + 1.33 (2011-07-14) + make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements + 1.32 (2011-07-13) + support for "info" function for all supported filetypes (SpartanJ) + 1.31 (2011-06-20) + a few more leak fixes, bug in PNG handling (SpartanJ) + 1.30 (2011-06-11) + added ability to load files via callbacks to accomidate custom input streams (Ben Wenger) + removed deprecated format-specific test/load functions + removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway + error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) + fix inefficiency in decoding 32-bit BMP (David Woo) + 1.29 (2010-08-16) + various warning fixes from Aurelien Pocheville + 1.28 (2010-08-01) + fix bug in GIF palette transparency (SpartanJ) + 1.27 (2010-08-01) + cast-to-stbi_uc to fix warnings + 1.26 (2010-07-24) + fix bug in file buffering for PNG reported by SpartanJ + 1.25 (2010-07-17) + refix trans_data warning (Won Chun) + 1.24 (2010-07-12) + perf improvements reading from files on platforms with lock-heavy fgetc() + minor perf improvements for jpeg + deprecated type-specific functions so we'll get feedback if they're needed + attempt to fix trans_data warning (Won Chun) + 1.23 fixed bug in iPhone support + 1.22 (2010-07-10) + removed image *writing* support + stbi_info support from Jetro Lauha + GIF support from Jean-Marc Lienher + iPhone PNG-extensions from James Brown + warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva) + 1.21 fix use of 'stbi_uc' in header (reported by jon blow) + 1.20 added support for Softimage PIC, by Tom Seddon + 1.19 bug in interlaced PNG corruption check (found by ryg) + 1.18 (2008-08-02) + fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - stbi__convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug + header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) + 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz + 1.07 attempt to fix C++ warning/errors again + 1.06 attempt to fix C++ warning/errors again + 1.05 fix TGA loading to return correct *comp and use good luminance calc + 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free + 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR + 1.02 support for (subset of) HDR files, float interface for preferred access to them + 1.01 fix bug: possible bug in handling right-side up bmps... not sure + fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all + 1.00 interface to zlib that skips zlib header + 0.99 correct handling of alpha in palette + 0.98 TGA loader by lonesock; dynamically add loaders (untested) + 0.97 jpeg errors on too large a file; also catch another malloc failure + 0.96 fix detection of invalid v value - particleman@mollyrocket forum + 0.95 during header scan, seek to markers in case of padding + 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same + 0.93 handle jpegtran output; verbose errors + 0.92 read 4,8,16,24,32-bit BMP files of several formats + 0.91 output 24-bit Windows 3.0 BMP files + 0.90 fix a few more warnings; bump version number to approach 1.0 + 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd + 0.60 fix compiling as c++ + 0.59 fix warnings: merge Dave Moore's -Wall fixes + 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian + 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available + 0.56 fix bug: zlib uncompressed mode len vs. nlen + 0.55 fix bug: restart_interval not initialized to 0 + 0.54 allow NULL for 'int *comp' + 0.53 fix bug in png 3->4; speedup png decoding + 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments + 0.51 obey req_comp requests, 1-component jpegs return as 1-component, + on 'test' only check type, not whether we support this variant + 0.50 (2006-11-19) + first released version +*/ + + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/workloads/realworld/standard/darknet/src/stb_image_write.h b/workloads/realworld/standard/darknet/src/stb_image_write.h new file mode 100644 index 0000000000000000000000000000000000000000..c05e95812b96232abd3617f98255832cc3fe4716 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/stb_image_write.h @@ -0,0 +1,1568 @@ +/* stb_image_write - v1.09 - public domain - http://nothings.org/stb/stb_image_write.h + writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 + no warranty implied; use at your own risk + + Before #including, + + #define STB_IMAGE_WRITE_IMPLEMENTATION + + in the file that you want to have the implementation. + + Will probably not work correctly with strict-aliasing optimizations. + + If using a modern Microsoft Compiler, non-safe versions of CRT calls may cause + compilation warnings or even errors. To avoid this, also before #including, + + #define STBI_MSC_SECURE_CRT + +ABOUT: + + This header file is a library for writing images to C stdio. It could be + adapted to write to memory or a general streaming interface; let me know. + + The PNG output is not optimal; it is 20-50% larger than the file + written by a decent optimizing implementation; though providing a custom + zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that. + This library is designed for source code compactness and simplicity, + not optimal image file size or run-time performance. + +BUILDING: + + You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h. + You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace + malloc,realloc,free. + You can #define STBIW_MEMMOVE() to replace memmove() + You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function + for PNG compression (instead of the builtin one), it must have the following signature: + unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality); + The returned data will be freed with STBIW_FREE() (free() by default), + so it must be heap allocated with STBIW_MALLOC() (malloc() by default), + +USAGE: + + There are five functions, one for each image file format: + + int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality); + int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); + + void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically + + There are also five equivalent functions that use an arbitrary write function. You are + expected to open/close your file-equivalent before and after calling these: + + int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); + int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + + where the callback is: + void stbi_write_func(void *context, void *data, int size); + + You can configure it with these global variables: + int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE + int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression + int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode + + + You can define STBI_WRITE_NO_STDIO to disable the file variant of these + functions, so the library will not use stdio.h at all. However, this will + also disable HDR writing, because it requires stdio for formatted output. + + Each function returns 0 on failure and non-0 on success. + + The functions create an image file defined by the parameters. The image + is a rectangle of pixels stored from left-to-right, top-to-bottom. + Each pixel contains 'comp' channels of data stored interleaved with 8-bits + per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is + monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall. + The *data pointer points to the first byte of the top-left-most pixel. + For PNG, "stride_in_bytes" is the distance in bytes from the first byte of + a row of pixels to the first byte of the next row of pixels. + + PNG creates output files with the same number of components as the input. + The BMP format expands Y to RGB in the file format and does not + output alpha. + + PNG supports writing rectangles of data even when the bytes storing rows of + data are not consecutive in memory (e.g. sub-rectangles of a larger image), + by supplying the stride between the beginning of adjacent rows. The other + formats do not. (Thus you cannot write a native-format BMP through the BMP + writer, both because it is in BGR order and because it may have padding + at the end of the line.) + + PNG allows you to set the deflate compression level by setting the global + variable 'stbi_write_png_compression_level' (it defaults to 8). + + HDR expects linear float data. Since the format is always 32-bit rgb(e) + data, alpha (if provided) is discarded, and for monochrome data it is + replicated across all three channels. + + TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed + data, set the global variable 'stbi_write_tga_with_rle' to 0. + + JPEG does ignore alpha channels in input data; quality is between 1 and 100. + Higher quality looks better but results in a bigger image. + JPEG baseline (no JPEG progressive). + +CREDITS: + + + Sean Barrett - PNG/BMP/TGA + Baldur Karlsson - HDR + Jean-Sebastien Guay - TGA monochrome + Tim Kelsey - misc enhancements + Alan Hickman - TGA RLE + Emmanuel Julien - initial file IO callback implementation + Jon Olick - original jo_jpeg.cpp code + Daniel Gibson - integrate JPEG, allow external zlib + Aarni Koskela - allow choosing PNG filter + + bugfixes: + github:Chribba + Guillaume Chereau + github:jry2 + github:romigrou + Sergio Gonzalez + Jonas Karlsson + Filip Wasil + Thatcher Ulrich + github:poppolopoppo + Patrick Boettcher + github:xeekworx + Cap Petschulat + Simon Rodriguez + Ivan Tikhonov + github:ignotion + Adam Schackart + +LICENSE + + See end of file for license information. + +*/ + +#ifndef INCLUDE_STB_IMAGE_WRITE_H +#define INCLUDE_STB_IMAGE_WRITE_H + +// if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline' or 'static inline' +#ifndef STBIWDEF +#ifdef STB_IMAGE_WRITE_STATIC +#define STBIWDEF static +#else +#ifdef __cplusplus +#define STBIWDEF extern "C" +#else +#define STBIWDEF extern +#endif +#endif +#endif + +#ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations +extern int stbi_write_tga_with_rle; +extern int stbi_write_png_compression_level; +extern int stbi_write_force_png_filter; +#endif + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality); +#endif + +typedef void stbi_write_func(void *context, void *data, int size); + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + +STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); + +#endif//INCLUDE_STB_IMAGE_WRITE_H + +#ifdef STB_IMAGE_WRITE_IMPLEMENTATION + +#ifdef _WIN32 + #ifndef _CRT_SECURE_NO_WARNINGS + #define _CRT_SECURE_NO_WARNINGS + #endif + #ifndef _CRT_NONSTDC_NO_DEPRECATE + #define _CRT_NONSTDC_NO_DEPRECATE + #endif +#endif + +#ifndef STBI_WRITE_NO_STDIO +#include +#endif // STBI_WRITE_NO_STDIO + +#include +#include +#include +#include + +#if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED)) +// ok +#elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)." +#endif + +#ifndef STBIW_MALLOC +#define STBIW_MALLOC(sz) malloc(sz) +#define STBIW_REALLOC(p,newsz) realloc(p,newsz) +#define STBIW_FREE(p) free(p) +#endif + +#ifndef STBIW_REALLOC_SIZED +#define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz) +#endif + + +#ifndef STBIW_MEMMOVE +#define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz) +#endif + + +#ifndef STBIW_ASSERT +#include +#define STBIW_ASSERT(x) assert(x) +#endif + +#define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff) + +#ifdef STB_IMAGE_WRITE_STATIC +static int stbi__flip_vertically_on_write=0; +static int stbi_write_png_compression_level = 8; +static int stbi_write_tga_with_rle = 1; +static int stbi_write_force_png_filter = -1; +#else +int stbi_write_png_compression_level = 8; +int stbi__flip_vertically_on_write=0; +int stbi_write_tga_with_rle = 1; +int stbi_write_force_png_filter = -1; +#endif + +STBIWDEF void stbi_flip_vertically_on_write(int flag) +{ + stbi__flip_vertically_on_write = flag; +} + +typedef struct +{ + stbi_write_func *func; + void *context; +} stbi__write_context; + +// initialize a callback-based context +static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context) +{ + s->func = c; + s->context = context; +} + +#ifndef STBI_WRITE_NO_STDIO + +static void stbi__stdio_write(void *context, void *data, int size) +{ + fwrite(data,1,size,(FILE*) context); +} + +static int stbi__start_write_file(stbi__write_context *s, const char *filename) +{ + FILE *f; +#ifdef STBI_MSC_SECURE_CRT + if (fopen_s(&f, filename, "wb")) + f = NULL; +#else + f = fopen(filename, "wb"); +#endif + stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f); + return f != NULL; +} + +static void stbi__end_write_file(stbi__write_context *s) +{ + fclose((FILE *)s->context); +} + +#endif // !STBI_WRITE_NO_STDIO + +typedef unsigned int stbiw_uint32; +typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1]; + +static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) +{ + while (*fmt) { + switch (*fmt++) { + case ' ': break; + case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int)); + s->func(s->context,&x,1); + break; } + case '2': { int x = va_arg(v,int); + unsigned char b[2]; + b[0] = STBIW_UCHAR(x); + b[1] = STBIW_UCHAR(x>>8); + s->func(s->context,b,2); + break; } + case '4': { stbiw_uint32 x = va_arg(v,int); + unsigned char b[4]; + b[0]=STBIW_UCHAR(x); + b[1]=STBIW_UCHAR(x>>8); + b[2]=STBIW_UCHAR(x>>16); + b[3]=STBIW_UCHAR(x>>24); + s->func(s->context,b,4); + break; } + default: + STBIW_ASSERT(0); + return; + } + } +} + +static void stbiw__writef(stbi__write_context *s, const char *fmt, ...) +{ + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); +} + +static void stbiw__putc(stbi__write_context *s, unsigned char c) +{ + s->func(s->context, &c, 1); +} + +static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c) +{ + unsigned char arr[3]; + arr[0] = a, arr[1] = b, arr[2] = c; + s->func(s->context, arr, 3); +} + +static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d) +{ + unsigned char bg[3] = { 255, 0, 255}, px[3]; + int k; + + if (write_alpha < 0) + s->func(s->context, &d[comp - 1], 1); + + switch (comp) { + case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case + case 1: + if (expand_mono) + stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp + else + s->func(s->context, d, 1); // monochrome TGA + break; + case 4: + if (!write_alpha) { + // composite against pink background + for (k = 0; k < 3; ++k) + px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255; + stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]); + break; + } + /* FALLTHROUGH */ + case 3: + stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); + break; + } + if (write_alpha > 0) + s->func(s->context, &d[comp - 1], 1); +} + +static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) +{ + stbiw_uint32 zero = 0; + int i,j, j_end; + + if (y <= 0) + return; + + if (stbi__flip_vertically_on_write) + vdir *= -1; + + if (vdir < 0) + j_end = -1, j = y-1; + else + j_end = y, j = 0; + + for (; j != j_end; j += vdir) { + for (i=0; i < x; ++i) { + unsigned char *d = (unsigned char *) data + (j*x+i)*comp; + stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); + } + s->func(s->context, &zero, scanline_pad); + } +} + +static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...) +{ + if (y < 0 || x < 0) { + return 0; + } else { + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); + stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono); + return 1; + } +} + +static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data) +{ + int pad = (-x*3) & 3; + return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad, + "11 4 22 4" "4 44 22 444444", + 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header + 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header +} + +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_bmp_core(&s, x, y, comp, data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_bmp_core(&s, x, y, comp, data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif //!STBI_WRITE_NO_STDIO + +static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data) +{ + int has_alpha = (comp == 2 || comp == 4); + int colorbytes = has_alpha ? comp-1 : comp; + int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3 + + if (y < 0 || x < 0) + return 0; + + if (!stbi_write_tga_with_rle) { + return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0, + "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); + } else { + int i,j,k; + int jend, jdir; + + stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8); + + if (stbi__flip_vertically_on_write) { + j = 0; + jend = y; + jdir = 1; + } else { + j = y-1; + jend = -1; + jdir = -1; + } + for (; j != jend; j += jdir) { + unsigned char *row = (unsigned char *) data + j * x * comp; + int len; + + for (i = 0; i < x; i += len) { + unsigned char *begin = row + i * comp; + int diff = 1; + len = 1; + + if (i < x - 1) { + ++len; + diff = memcmp(begin, row + (i + 1) * comp, comp); + if (diff) { + const unsigned char *prev = begin; + for (k = i + 2; k < x && len < 128; ++k) { + if (memcmp(prev, row + k * comp, comp)) { + prev += comp; + ++len; + } else { + --len; + break; + } + } + } else { + for (k = i + 2; k < x && len < 128; ++k) { + if (!memcmp(begin, row + k * comp, comp)) { + ++len; + } else { + break; + } + } + } + } + + if (diff) { + unsigned char header = STBIW_UCHAR(len - 1); + s->func(s->context, &header, 1); + for (k = 0; k < len; ++k) { + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); + } + } else { + unsigned char header = STBIW_UCHAR(len - 129); + s->func(s->context, &header, 1); + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); + } + } + } + } + return 1; +} + +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_tga_core(&s, x, y, comp, (void *) data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_tga_core(&s, x, y, comp, (void *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR writer +// by Baldur Karlsson + +#define stbiw__max(a, b) ((a) > (b) ? (a) : (b)) + +void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear) +{ + int exponent; + float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2])); + + if (maxcomp < 1e-32f) { + rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; + } else { + float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp; + + rgbe[0] = (unsigned char)(linear[0] * normalize); + rgbe[1] = (unsigned char)(linear[1] * normalize); + rgbe[2] = (unsigned char)(linear[2] * normalize); + rgbe[3] = (unsigned char)(exponent + 128); + } +} + +void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte) +{ + unsigned char lengthbyte = STBIW_UCHAR(length+128); + STBIW_ASSERT(length+128 <= 255); + s->func(s->context, &lengthbyte, 1); + s->func(s->context, &databyte, 1); +} + +void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data) +{ + unsigned char lengthbyte = STBIW_UCHAR(length); + STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code + s->func(s->context, &lengthbyte, 1); + s->func(s->context, data, length); +} + +void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline) +{ + unsigned char scanlineheader[4] = { 2, 2, 0, 0 }; + unsigned char rgbe[4]; + float linear[3]; + int x; + + scanlineheader[2] = (width&0xff00)>>8; + scanlineheader[3] = (width&0x00ff); + + /* skip RLE for images too small or large */ + if (width < 8 || width >= 32768) { + for (x=0; x < width; x++) { + switch (ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + s->func(s->context, rgbe, 4); + } + } else { + int c,r; + /* encode into scratch buffer */ + for (x=0; x < width; x++) { + switch(ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + scratch[x + width*0] = rgbe[0]; + scratch[x + width*1] = rgbe[1]; + scratch[x + width*2] = rgbe[2]; + scratch[x + width*3] = rgbe[3]; + } + + s->func(s->context, scanlineheader, 4); + + /* RLE each component separately */ + for (c=0; c < 4; c++) { + unsigned char *comp = &scratch[width*c]; + + x = 0; + while (x < width) { + // find first run + r = x; + while (r+2 < width) { + if (comp[r] == comp[r+1] && comp[r] == comp[r+2]) + break; + ++r; + } + if (r+2 >= width) + r = width; + // dump up to first run + while (x < r) { + int len = r-x; + if (len > 128) len = 128; + stbiw__write_dump_data(s, len, &comp[x]); + x += len; + } + // if there's a run, output it + if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd + // find next byte after run + while (r < width && comp[r] == comp[x]) + ++r; + // output run up to r + while (x < r) { + int len = r-x; + if (len > 127) len = 127; + stbiw__write_run_data(s, len, comp[x]); + x += len; + } + } + } + } + } +} + +static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data) +{ + if (y <= 0 || x <= 0 || data == NULL) + return 0; + else { + // Each component is stored separately. Allocate scratch space for full output scanline. + unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4); + int i, len; + char buffer[128]; + char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"; + s->func(s->context, header, sizeof(header)-1); + +#ifdef STBI_MSC_SECURE_CRT + len = sprintf_s(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#else + len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#endif + s->func(s->context, buffer, len); + + for(i=0; i < y; i++) + stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i)*x); + STBIW_FREE(scratch); + return 1; + } +} + +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_hdr_core(&s, x, y, comp, (float *) data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif // STBI_WRITE_NO_STDIO + + +////////////////////////////////////////////////////////////////////////////// +// +// PNG writer +// + +#ifndef STBIW_ZLIB_COMPRESS +// stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size() +#define stbiw__sbraw(a) ((int *) (a) - 2) +#define stbiw__sbm(a) stbiw__sbraw(a)[0] +#define stbiw__sbn(a) stbiw__sbraw(a)[1] + +#define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) +#define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0) +#define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) + +#define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) +#define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) +#define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) + +static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) +{ + int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1; + void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2); + STBIW_ASSERT(p); + if (p) { + if (!*arr) ((int *) p)[1] = 0; + *arr = (void *) ((int *) p + 2); + stbiw__sbm(*arr) = m; + } + return *arr; +} + +static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount) +{ + while (*bitcount >= 8) { + stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer)); + *bitbuffer >>= 8; + *bitcount -= 8; + } + return data; +} + +static int stbiw__zlib_bitrev(int code, int codebits) +{ + int res=0; + while (codebits--) { + res = (res << 1) | (code & 1); + code >>= 1; + } + return res; +} + +static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit) +{ + int i; + for (i=0; i < limit && i < 258; ++i) + if (a[i] != b[i]) break; + return i; +} + +static unsigned int stbiw__zhash(unsigned char *data) +{ + stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16); + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + return hash; +} + +#define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount)) +#define stbiw__zlib_add(code,codebits) \ + (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) +#define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) +// default huffman tables +#define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8) +#define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9) +#define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7) +#define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8) +#define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) +#define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n)) + +#define stbiw__ZHASH 16384 + +#endif // STBIW_ZLIB_COMPRESS + +unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality) +{ +#ifdef STBIW_ZLIB_COMPRESS + // user provided a zlib compress implementation, use that + return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality); +#else // use builtin + static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 }; + static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; + static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 }; + static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; + unsigned int bitbuf=0; + int i,j, bitcount=0; + unsigned char *out = NULL; + unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(char**)); + if (hash_table == NULL) + return NULL; + if (quality < 5) quality = 5; + + stbiw__sbpush(out, 0x78); // DEFLATE 32K window + stbiw__sbpush(out, 0x5e); // FLEVEL = 1 + stbiw__zlib_add(1,1); // BFINAL = 1 + stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman + + for (i=0; i < stbiw__ZHASH; ++i) + hash_table[i] = NULL; + + i=0; + while (i < data_len-3) { + // hash next 3 bytes of data to be compressed + int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3; + unsigned char *bestloc = 0; + unsigned char **hlist = hash_table[h]; + int n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32768) { // if entry lies within window + int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i); + if (d >= best) best=d,bestloc=hlist[j]; + } + } + // when hash table entry is too long, delete half the entries + if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) { + STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality); + stbiw__sbn(hash_table[h]) = quality; + } + stbiw__sbpush(hash_table[h],data+i); + + if (bestloc) { + // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal + h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1); + hlist = hash_table[h]; + n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32767) { + int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1); + if (e > best) { // if next match is better, bail on current match + bestloc = NULL; + break; + } + } + } + } + + if (bestloc) { + int d = (int) (data+i - bestloc); // distance back + STBIW_ASSERT(d <= 32767 && best <= 258); + for (j=0; best > lengthc[j+1]-1; ++j); + stbiw__zlib_huff(j+257); + if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); + for (j=0; d > distc[j+1]-1; ++j); + stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5); + if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]); + i += best; + } else { + stbiw__zlib_huffb(data[i]); + ++i; + } + } + // write out final bytes + for (;i < data_len; ++i) + stbiw__zlib_huffb(data[i]); + stbiw__zlib_huff(256); // end of block + // pad with 0 bits to byte boundary + while (bitcount) + stbiw__zlib_add(0,1); + + for (i=0; i < stbiw__ZHASH; ++i) + (void) stbiw__sbfree(hash_table[i]); + STBIW_FREE(hash_table); + + { + // compute adler32 on input + unsigned int s1=1, s2=0; + int blocklen = (int) (data_len % 5552); + j=0; + while (j < data_len) { + for (i=0; i < blocklen; ++i) s1 += data[j+i], s2 += s1; + s1 %= 65521, s2 %= 65521; + j += blocklen; + blocklen = 5552; + } + stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s2)); + stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s1)); + } + *out_len = stbiw__sbn(out); + // make returned pointer freeable + STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len); + return (unsigned char *) stbiw__sbraw(out); +#endif // STBIW_ZLIB_COMPRESS +} + +static unsigned int stbiw__crc32(unsigned char *buffer, int len) +{ + static unsigned int crc_table[256] = + { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + }; + + unsigned int crc = ~0u; + int i; + for (i=0; i < len; ++i) + crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; + return ~crc; +} + +#define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4) +#define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v)); +#define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3]) + +static void stbiw__wpcrc(unsigned char **data, int len) +{ + unsigned int crc = stbiw__crc32(*data - len - 4, len+4); + stbiw__wp32(*data, crc); +} + +static unsigned char stbiw__paeth(int a, int b, int c) +{ + int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c); + if (pa <= pb && pa <= pc) return STBIW_UCHAR(a); + if (pb <= pc) return STBIW_UCHAR(b); + return STBIW_UCHAR(c); +} + +// @OPTIMIZE: provide an option that always forces left-predict or paeth predict +static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer) +{ + static int mapping[] = { 0,1,2,3,4 }; + static int firstmap[] = { 0,1,0,5,6 }; + int *mymap = (y != 0) ? mapping : firstmap; + int i; + int type = mymap[filter_type]; + unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y); + int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes; + for (i = 0; i < n; ++i) { + switch (type) { + case 0: line_buffer[i] = z[i]; break; + case 1: line_buffer[i] = z[i]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break; + case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break; + case 5: line_buffer[i] = z[i]; break; + case 6: line_buffer[i] = z[i]; break; + } + } + for (i=n; i < width*n; ++i) { + switch (type) { + case 0: line_buffer[i] = z[i]; break; + case 1: line_buffer[i] = z[i] - z[i-n]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break; + case 4: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break; + case 5: line_buffer[i] = z[i] - (z[i-n]>>1); break; + case 6: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break; + } + } +} + +unsigned char *stbi_write_png_to_mem(unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len) +{ + int force_filter = stbi_write_force_png_filter; + int ctype[5] = { -1, 0, 4, 2, 6 }; + unsigned char sig[8] = { 137,80,78,71,13,10,26,10 }; + unsigned char *out,*o, *filt, *zlib; + signed char *line_buffer; + int j,zlen; + + if (stride_bytes == 0) + stride_bytes = x * n; + + if (force_filter >= 5) { + force_filter = -1; + } + + filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0; + line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; } + for (j=0; j < y; ++j) { + int filter_type; + if (force_filter > -1) { + filter_type = force_filter; + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, force_filter, line_buffer); + } else { // Estimate the best filter by running through all of them: + int best_filter = 0, best_filter_val = 0x7fffffff, est, i; + for (filter_type = 0; filter_type < 5; filter_type++) { + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, filter_type, line_buffer); + + // Estimate the entropy of the line using this filter; the less, the better. + est = 0; + for (i = 0; i < x*n; ++i) { + est += abs((signed char) line_buffer[i]); + } + if (est < best_filter_val) { + best_filter_val = est; + best_filter = filter_type; + } + } + if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, best_filter, line_buffer); + filter_type = best_filter; + } + } + // when we get here, filter_type contains the filter type, and line_buffer contains the data + filt[j*(x*n+1)] = (unsigned char) filter_type; + STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); + } + STBIW_FREE(line_buffer); + zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level); + STBIW_FREE(filt); + if (!zlib) return 0; + + // each tag requires 12 bytes of overhead + out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12); + if (!out) return 0; + *out_len = 8 + 12+13 + 12+zlen + 12; + + o=out; + STBIW_MEMMOVE(o,sig,8); o+= 8; + stbiw__wp32(o, 13); // header length + stbiw__wptag(o, "IHDR"); + stbiw__wp32(o, x); + stbiw__wp32(o, y); + *o++ = 8; + *o++ = STBIW_UCHAR(ctype[n]); + *o++ = 0; + *o++ = 0; + *o++ = 0; + stbiw__wpcrc(&o,13); + + stbiw__wp32(o, zlen); + stbiw__wptag(o, "IDAT"); + STBIW_MEMMOVE(o, zlib, zlen); + o += zlen; + STBIW_FREE(zlib); + stbiw__wpcrc(&o, zlen); + + stbiw__wp32(o,0); + stbiw__wptag(o, "IEND"); + stbiw__wpcrc(&o,0); + + STBIW_ASSERT(o == out + *out_len); + + return out; +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes) +{ + FILE *f; + int len; + unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; +#ifdef STBI_MSC_SECURE_CRT + if (fopen_s(&f, filename, "wb")) + f = NULL; +#else + f = fopen(filename, "wb"); +#endif + if (!f) { STBIW_FREE(png); return 0; } + fwrite(png, 1, len, f); + fclose(f); + STBIW_FREE(png); + return 1; +} +#endif + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes) +{ + int len; + unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; + func(context, png, len); + STBIW_FREE(png); + return 1; +} + + +/* *************************************************************************** + * + * JPEG writer + * + * This is based on Jon Olick's jo_jpeg.cpp: + * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html + */ + +static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18, + 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; + +static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) { + int bitBuf = *bitBufP, bitCnt = *bitCntP; + bitCnt += bs[1]; + bitBuf |= bs[0] << (24 - bitCnt); + while(bitCnt >= 8) { + unsigned char c = (bitBuf >> 16) & 255; + stbiw__putc(s, c); + if(c == 255) { + stbiw__putc(s, 0); + } + bitBuf <<= 8; + bitCnt -= 8; + } + *bitBufP = bitBuf; + *bitCntP = bitCnt; +} + +static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) { + float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p; + float z1, z2, z3, z4, z5, z11, z13; + + float tmp0 = d0 + d7; + float tmp7 = d0 - d7; + float tmp1 = d1 + d6; + float tmp6 = d1 - d6; + float tmp2 = d2 + d5; + float tmp5 = d2 - d5; + float tmp3 = d3 + d4; + float tmp4 = d3 - d4; + + // Even part + float tmp10 = tmp0 + tmp3; // phase 2 + float tmp13 = tmp0 - tmp3; + float tmp11 = tmp1 + tmp2; + float tmp12 = tmp1 - tmp2; + + d0 = tmp10 + tmp11; // phase 3 + d4 = tmp10 - tmp11; + + z1 = (tmp12 + tmp13) * 0.707106781f; // c4 + d2 = tmp13 + z1; // phase 5 + d6 = tmp13 - z1; + + // Odd part + tmp10 = tmp4 + tmp5; // phase 2 + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + // The rotator is modified from fig 4-8 to avoid extra negations. + z5 = (tmp10 - tmp12) * 0.382683433f; // c6 + z2 = tmp10 * 0.541196100f + z5; // c2-c6 + z4 = tmp12 * 1.306562965f + z5; // c2+c6 + z3 = tmp11 * 0.707106781f; // c4 + + z11 = tmp7 + z3; // phase 5 + z13 = tmp7 - z3; + + *d5p = z13 + z2; // phase 6 + *d3p = z13 - z2; + *d1p = z11 + z4; + *d7p = z11 - z4; + + *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6; +} + +static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) { + int tmp1 = val < 0 ? -val : val; + val = val < 0 ? val-1 : val; + bits[1] = 1; + while(tmp1 >>= 1) { + ++bits[1]; + } + bits[0] = val & ((1<0)&&(DU[end0pos]==0); --end0pos) { + } + // end0pos = first element in reverse order !=0 + if(end0pos == 0) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + return DU[0]; + } + for(i = 1; i <= end0pos; ++i) { + int startpos = i; + int nrzeroes; + unsigned short bits[2]; + for (; DU[i]==0 && i<=end0pos; ++i) { + } + nrzeroes = i-startpos; + if ( nrzeroes >= 16 ) { + int lng = nrzeroes>>4; + int nrmarker; + for (nrmarker=1; nrmarker <= lng; ++nrmarker) + stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes); + nrzeroes &= 15; + } + stbiw__jpg_calcBits(DU[i], bits); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); + } + if(end0pos != 63) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + } + return DU[0]; +} + +static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) { + // Constants that don't pollute global namespace + static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0}; + static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d}; + static const unsigned char std_ac_luminance_values[] = { + 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08, + 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28, + 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59, + 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, + 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6, + 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2, + 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0}; + static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77}; + static const unsigned char std_ac_chrominance_values[] = { + 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, + 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26, + 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58, + 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, + 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4, + 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda, + 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + // Huffman tables + static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}}; + static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}}; + static const unsigned short YAC_HT[256][2] = { + {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const unsigned short UVAC_HT[256][2] = { + {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22, + 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99}; + static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99, + 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99}; + static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, + 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; + + int row, col, i, k; + float fdtbl_Y[64], fdtbl_UV[64]; + unsigned char YTable[64], UVTable[64]; + + if(!data || !width || !height || comp > 4 || comp < 1) { + return 0; + } + + quality = quality ? quality : 90; + quality = quality < 1 ? 1 : quality > 100 ? 100 : quality; + quality = quality < 50 ? 5000 / quality : 200 - quality * 2; + + for(i = 0; i < 64; ++i) { + int uvti, yti = (YQT[i]*quality+50)/100; + YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti); + uvti = (UVQT[i]*quality+50)/100; + UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); + } + + for(row = 0, k = 0; row < 8; ++row) { + for(col = 0; col < 8; ++col, ++k) { + fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + } + } + + // Write Headers + { + static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 }; + static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 }; + const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width), + 3,1,0x11,0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 }; + s->func(s->context, (void*)head0, sizeof(head0)); + s->func(s->context, (void*)YTable, sizeof(YTable)); + stbiw__putc(s, 1); + s->func(s->context, UVTable, sizeof(UVTable)); + s->func(s->context, (void*)head1, sizeof(head1)); + s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1); + s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values)); + stbiw__putc(s, 0x10); // HTYACinfo + s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1); + s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values)); + stbiw__putc(s, 1); // HTUDCinfo + s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values)); + stbiw__putc(s, 0x11); // HTUACinfo + s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values)); + s->func(s->context, (void*)head2, sizeof(head2)); + } + + // Encode 8x8 macroblocks + { + static const unsigned short fillBits[] = {0x7F, 7}; + const unsigned char *imageData = (const unsigned char *)data; + int DCY=0, DCU=0, DCV=0; + int bitBuf=0, bitCnt=0; + // comp == 2 is grey+alpha (alpha is ignored) + int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; + int x, y, pos; + for(y = 0; y < height; y += 8) { + for(x = 0; x < width; x += 8) { + float YDU[64], UDU[64], VDU[64]; + for(row = y, pos = 0; row < y+8; ++row) { + for(col = x; col < x+8; ++col, ++pos) { + int p = (stbi__flip_vertically_on_write ? height-1-row : row)*width*comp + col*comp; + float r, g, b; + if(row >= height) { + p -= width*comp*(row+1 - height); + } + if(col >= width) { + p -= comp*(col+1 - width); + } + + r = imageData[p+0]; + g = imageData[p+ofsG]; + b = imageData[p+ofsB]; + YDU[pos]=+0.29900f*r+0.58700f*g+0.11400f*b-128; + UDU[pos]=-0.16874f*r-0.33126f*g+0.50000f*b; + VDU[pos]=+0.50000f*r-0.41869f*g-0.08131f*b; + } + } + + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, YDU, fdtbl_Y, DCY, YDC_HT, YAC_HT); + DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, UDU, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); + DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, VDU, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); + } + } + + // Do the bit alignment of the EOI marker + stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits); + } + + // EOI + stbiw__putc(s, 0xFF); + stbiw__putc(s, 0xD9); + + return 1; +} + +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality); +} + + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +#endif // STB_IMAGE_WRITE_IMPLEMENTATION + +/* Revision history + 1.09 (2018-02-11) + fix typo in zlib quality API, improve STB_I_W_STATIC in C++ + 1.08 (2018-01-29) + add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter + 1.07 (2017-07-24) + doc fix + 1.06 (2017-07-23) + writing JPEG (using Jon Olick's code) + 1.05 ??? + 1.04 (2017-03-03) + monochrome BMP expansion + 1.03 ??? + 1.02 (2016-04-02) + avoid allocating large structures on the stack + 1.01 (2016-01-16) + STBIW_REALLOC_SIZED: support allocators with no realloc support + avoid race-condition in crc initialization + minor compile issues + 1.00 (2015-09-14) + installable file IO function + 0.99 (2015-09-13) + warning fixes; TGA rle support + 0.98 (2015-04-08) + added STBIW_MALLOC, STBIW_ASSERT etc + 0.97 (2015-01-18) + fixed HDR asserts, rewrote HDR rle logic + 0.96 (2015-01-17) + add HDR output + fix monochrome BMP + 0.95 (2014-08-17) + add monochrome TGA output + 0.94 (2014-05-31) + rename private functions to avoid conflicts with stb_image.h + 0.93 (2014-05-27) + warning fixes + 0.92 (2010-08-01) + casts to unsigned char to fix warnings + 0.91 (2010-07-17) + first public release + 0.90 first internal release +*/ + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/workloads/realworld/standard/darknet/src/tree.c b/workloads/realworld/standard/darknet/src/tree.c new file mode 100644 index 0000000000000000000000000000000000000000..67b6d431f6f7e92ede234c71ecae9bd9146dc71f --- /dev/null +++ b/workloads/realworld/standard/darknet/src/tree.c @@ -0,0 +1,139 @@ +#include +#include +#include "tree.h" +#include "utils.h" +#include "data.h" + +void change_leaves(tree *t, char *leaf_list) +{ + list *llist = get_paths(leaf_list); + char **leaves = (char **)list_to_array(llist); + int n = llist->size; + int i,j; + int found = 0; + for(i = 0; i < t->n; ++i){ + t->leaf[i] = 0; + for(j = 0; j < n; ++j){ + if (0==strcmp(t->name[i], leaves[j])){ + t->leaf[i] = 1; + ++found; + break; + } + } + } + fprintf(stderr, "Found %d leaves.\n", found); +} + +float get_hierarchy_probability(float *x, tree *hier, int c, int stride) +{ + float p = 1; + while(c >= 0){ + p = p * x[c*stride]; + c = hier->parent[c]; + } + return p; +} + +void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride) +{ + int j; + for(j = 0; j < n; ++j){ + int parent = hier->parent[j]; + if(parent >= 0){ + predictions[j*stride] *= predictions[parent*stride]; + } + } + if(only_leaves){ + for(j = 0; j < n; ++j){ + if(!hier->leaf[j]) predictions[j*stride] = 0; + } + } +} + +int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride) +{ + float p = 1; + int group = 0; + int i; + while(1){ + float max = 0; + int max_i = 0; + + for(i = 0; i < hier->group_size[group]; ++i){ + int index = i + hier->group_offset[group]; + float val = predictions[(i + hier->group_offset[group])*stride]; + if(val > max){ + max_i = index; + max = val; + } + } + if(p*max > thresh){ + p = p*max; + group = hier->child[max_i]; + if(hier->child[max_i] < 0) return max_i; + } else if (group == 0){ + return max_i; + } else { + return hier->parent[hier->group_offset[group]]; + } + } + return 0; +} + +tree *read_tree(char *filename) +{ + tree t = {0}; + FILE *fp = fopen(filename, "r"); + + char *line; + int last_parent = -1; + int group_size = 0; + int groups = 0; + int n = 0; + while((line=fgetl(fp)) != 0){ + char *id = calloc(256, sizeof(char)); + int parent = -1; + sscanf(line, "%s %d", id, &parent); + t.parent = realloc(t.parent, (n+1)*sizeof(int)); + t.parent[n] = parent; + + t.child = realloc(t.child, (n+1)*sizeof(int)); + t.child[n] = -1; + + t.name = realloc(t.name, (n+1)*sizeof(char *)); + t.name[n] = id; + if(parent != last_parent){ + ++groups; + t.group_offset = realloc(t.group_offset, groups * sizeof(int)); + t.group_offset[groups - 1] = n - group_size; + t.group_size = realloc(t.group_size, groups * sizeof(int)); + t.group_size[groups - 1] = group_size; + group_size = 0; + last_parent = parent; + } + t.group = realloc(t.group, (n+1)*sizeof(int)); + t.group[n] = groups; + if (parent >= 0) { + t.child[parent] = groups; + } + ++n; + ++group_size; + } + ++groups; + t.group_offset = realloc(t.group_offset, groups * sizeof(int)); + t.group_offset[groups - 1] = n - group_size; + t.group_size = realloc(t.group_size, groups * sizeof(int)); + t.group_size[groups - 1] = group_size; + t.n = n; + t.groups = groups; + t.leaf = calloc(n, sizeof(int)); + int i; + for(i = 0; i < n; ++i) t.leaf[i] = 1; + for(i = 0; i < n; ++i) if(t.parent[i] >= 0) t.leaf[t.parent[i]] = 0; + + fclose(fp); + tree *tree_ptr = calloc(1, sizeof(tree)); + *tree_ptr = t; + //error(0); + return tree_ptr; +} diff --git a/workloads/realworld/standard/darknet/src/tree.h b/workloads/realworld/standard/darknet/src/tree.h new file mode 100644 index 0000000000000000000000000000000000000000..3802b8ead806266edd291de5407b08c2d7ed5dd1 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/tree.h @@ -0,0 +1,8 @@ +#ifndef TREE_H +#define TREE_H +#include "darknet.h" + +int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride); +float get_hierarchy_probability(float *x, tree *hier, int c, int stride); + +#endif diff --git a/workloads/realworld/standard/darknet/src/upsample_layer.c b/workloads/realworld/standard/darknet/src/upsample_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..83f32ea5f41b4c787c38e5324e3e7dd4909ca928 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/upsample_layer.c @@ -0,0 +1,106 @@ +#include "upsample_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + +layer make_upsample_layer(int batch, int w, int h, int c, int stride) +{ + layer l = {0}; + l.type = UPSAMPLE; + l.batch = batch; + l.w = w; + l.h = h; + l.c = c; + l.out_w = w*stride; + l.out_h = h*stride; + l.out_c = c; + if(stride < 0){ + stride = -stride; + l.reverse=1; + l.out_w = w/stride; + l.out_h = h/stride; + } + l.stride = stride; + l.outputs = l.out_w*l.out_h*l.out_c; + l.inputs = l.w*l.h*l.c; + l.delta = calloc(l.outputs*batch, sizeof(float)); + l.output = calloc(l.outputs*batch, sizeof(float));; + + l.forward = forward_upsample_layer; + l.backward = backward_upsample_layer; + #ifdef GPU + l.forward_gpu = forward_upsample_layer_gpu; + l.backward_gpu = backward_upsample_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + #endif + if(l.reverse) fprintf(stderr, "downsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + else fprintf(stderr, "upsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + return l; +} + +void resize_upsample_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + l->out_w = w*l->stride; + l->out_h = h*l->stride; + if(l->reverse){ + l->out_w = w/l->stride; + l->out_h = h/l->stride; + } + l->outputs = l->out_w*l->out_h*l->out_c; + l->inputs = l->h*l->w*l->c; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + +void forward_upsample_layer(const layer l, network net) +{ + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + if(l.reverse){ + upsample_cpu(l.output, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input); + }else{ + upsample_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output); + } +} + +void backward_upsample_layer(const layer l, network net) +{ + if(l.reverse){ + upsample_cpu(l.delta, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, net.delta); + }else{ + upsample_cpu(net.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta); + } +} + +#ifdef GPU +void forward_upsample_layer_gpu(const layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + if(l.reverse){ + upsample_gpu(l.output_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input_gpu); + }else{ + upsample_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output_gpu); + } +} + +void backward_upsample_layer_gpu(const layer l, network net) +{ + if(l.reverse){ + upsample_gpu(l.delta_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, net.delta_gpu); + }else{ + upsample_gpu(net.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta_gpu); + } +} +#endif diff --git a/workloads/realworld/standard/darknet/src/upsample_layer.h b/workloads/realworld/standard/darknet/src/upsample_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..86790d1088354ea9c46a4b20fbe1dacf36925ca8 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/upsample_layer.h @@ -0,0 +1,15 @@ +#ifndef UPSAMPLE_LAYER_H +#define UPSAMPLE_LAYER_H +#include "darknet.h" + +layer make_upsample_layer(int batch, int w, int h, int c, int stride); +void forward_upsample_layer(const layer l, network net); +void backward_upsample_layer(const layer l, network net); +void resize_upsample_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_upsample_layer_gpu(const layer l, network net); +void backward_upsample_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/standard/darknet/src/utils.c b/workloads/realworld/standard/darknet/src/utils.c new file mode 100644 index 0000000000000000000000000000000000000000..626b4678c1e2779552ed9d34f19ce4b0f57d9ded --- /dev/null +++ b/workloads/realworld/standard/darknet/src/utils.c @@ -0,0 +1,726 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + + +/* +// old timing. is it better? who knows!! +double get_wall_time() +{ + struct timeval time; + if (gettimeofday(&time,NULL)){ + return 0; + } + return (double)time.tv_sec + (double)time.tv_usec * .000001; +} +*/ + +double what_time_is_it_now() +{ + struct timeval time; + if (gettimeofday(&time,NULL)){ + return 0; + } + return (double)time.tv_sec + (double)time.tv_usec * .000001; +} + +int *read_intlist(char *gpu_list, int *ngpus, int d) +{ + int *gpus = 0; + if(gpu_list){ + int len = strlen(gpu_list); + *ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++*ngpus; + } + gpus = calloc(*ngpus, sizeof(int)); + for(i = 0; i < *ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpus = calloc(1, sizeof(float)); + *gpus = d; + *ngpus = 1; + } + return gpus; +} + +int *read_map(char *filename) +{ + int n = 0; + int *map = 0; + char *str; + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + while((str=fgetl(file))){ + ++n; + map = realloc(map, n*sizeof(int)); + map[n-1] = atoi(str); + } + return map; +} + +void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections) +{ + size_t i; + for(i = 0; i < sections; ++i){ + size_t start = n*i/sections; + size_t end = n*(i+1)/sections; + size_t num = end-start; + shuffle(arr+(start*size), num, size); + } +} + +void shuffle(void *arr, size_t n, size_t size) +{ + size_t i; + void *swp = calloc(1, size); + for(i = 0; i < n-1; ++i){ + size_t j = i + rand()/(RAND_MAX / (n-i)+1); + memcpy(swp, arr+(j*size), size); + memcpy(arr+(j*size), arr+(i*size), size); + memcpy(arr+(i*size), swp, size); + } +} + +int *random_index_order(int min, int max) +{ + int *inds = calloc(max-min, sizeof(int)); + int i; + for(i = min; i < max; ++i){ + inds[i] = i; + } + for(i = min; i < max-1; ++i){ + int swap = inds[i]; + int index = i + rand()%(max-i); + inds[i] = inds[index]; + inds[index] = swap; + } + return inds; +} + +void del_arg(int argc, char **argv, int index) +{ + int i; + for(i = index; i < argc-1; ++i) argv[i] = argv[i+1]; + argv[i] = 0; +} + +int find_arg(int argc, char* argv[], char *arg) +{ + int i; + for(i = 0; i < argc; ++i) { + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)) { + del_arg(argc, argv, i); + return 1; + } + } + return 0; +} + +int find_int_arg(int argc, char **argv, char *arg, int def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = atoi(argv[i+1]); + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + +float find_float_arg(int argc, char **argv, char *arg, float def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = atof(argv[i+1]); + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + +char *find_char_arg(int argc, char **argv, char *arg, char *def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = argv[i+1]; + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + + +char *basecfg(char *cfgfile) +{ + char *c = cfgfile; + char *next; + while((next = strchr(c, '/'))) + { + c = next+1; + } + c = copy_string(c); + next = strchr(c, '.'); + if (next) *next = 0; + return c; +} + +int alphanum_to_int(char c) +{ + return (c < 58) ? c - 48 : c-87; +} +char int_to_alphanum(int i) +{ + if (i == 36) return '.'; + return (i < 10) ? i + 48 : i + 87; +} + +void pm(int M, int N, float *A) +{ + int i,j; + for(i =0 ; i < M; ++i){ + printf("%d ", i+1); + for(j = 0; j < N; ++j){ + printf("%2.4f, ", A[i*N+j]); + } + printf("\n"); + } + printf("\n"); +} + +void find_replace(char *str, char *orig, char *rep, char *output) +{ + char buffer[4096] = {0}; + char *p; + + sprintf(buffer, "%s", str); + if(!(p = strstr(buffer, orig))){ // Is 'orig' even in 'str'? + sprintf(output, "%s", str); + return; + } + + *p = '\0'; + + sprintf(output, "%s%s%s", buffer, rep, p+strlen(orig)); +} + +float sec(clock_t clocks) +{ + return (float)clocks/CLOCKS_PER_SEC; +} + +void top_k(float *a, int n, int k, int *index) +{ + int i,j; + for(j = 0; j < k; ++j) index[j] = -1; + for(i = 0; i < n; ++i){ + int curr = i; + for(j = 0; j < k; ++j){ + if((index[j] < 0) || a[curr] > a[index[j]]){ + int swap = curr; + curr = index[j]; + index[j] = swap; + } + } + } +} + +void error(const char *s) +{ + perror(s); + assert(0); + exit(-1); +} + +unsigned char *read_file(char *filename) +{ + FILE *fp = fopen(filename, "rb"); + size_t size; + + fseek(fp, 0, SEEK_END); + size = ftell(fp); + fseek(fp, 0, SEEK_SET); + + unsigned char *text = calloc(size+1, sizeof(char)); + fread(text, 1, size, fp); + fclose(fp); + return text; +} + +void malloc_error() +{ + fprintf(stderr, "Malloc error\n"); + exit(-1); +} + +void file_error(char *s) +{ + fprintf(stderr, "Couldn't open file: %s\n", s); + exit(0); +} + +list *split_str(char *s, char delim) +{ + size_t i; + size_t len = strlen(s); + list *l = make_list(); + list_insert(l, s); + for(i = 0; i < len; ++i){ + if(s[i] == delim){ + s[i] = '\0'; + list_insert(l, &(s[i+1])); + } + } + return l; +} + +void strip(char *s) +{ + size_t i; + size_t len = strlen(s); + size_t offset = 0; + for(i = 0; i < len; ++i){ + char c = s[i]; + if(c==' '||c=='\t'||c=='\n') ++offset; + else s[i-offset] = c; + } + s[len-offset] = '\0'; +} + +void strip_char(char *s, char bad) +{ + size_t i; + size_t len = strlen(s); + size_t offset = 0; + for(i = 0; i < len; ++i){ + char c = s[i]; + if(c==bad) ++offset; + else s[i-offset] = c; + } + s[len-offset] = '\0'; +} + +void free_ptrs(void **ptrs, int n) +{ + int i; + for(i = 0; i < n; ++i) free(ptrs[i]); + free(ptrs); +} + +char *fgetl(FILE *fp) +{ + if(feof(fp)) return 0; + size_t size = 512; + char *line = malloc(size*sizeof(char)); + if(!fgets(line, size, fp)){ + free(line); + return 0; + } + + size_t curr = strlen(line); + + while((line[curr-1] != '\n') && !feof(fp)){ + if(curr == size-1){ + size *= 2; + line = realloc(line, size*sizeof(char)); + if(!line) { + printf("%ld\n", size); + malloc_error(); + } + } + size_t readsize = size-curr; + if(readsize > INT_MAX) readsize = INT_MAX-1; + fgets(&line[curr], readsize, fp); + curr = strlen(line); + } + if(line[curr-1] == '\n') line[curr-1] = '\0'; + + return line; +} + +int read_int(int fd) +{ + int n = 0; + int next = read(fd, &n, sizeof(int)); + if(next <= 0) return -1; + return n; +} + +void write_int(int fd, int n) +{ + int next = write(fd, &n, sizeof(int)); + if(next <= 0) error("read failed"); +} + +int read_all_fail(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + int next = read(fd, buffer + n, bytes-n); + if(next <= 0) return 1; + n += next; + } + return 0; +} + +int write_all_fail(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + size_t next = write(fd, buffer + n, bytes-n); + if(next <= 0) return 1; + n += next; + } + return 0; +} + +void read_all(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + int next = read(fd, buffer + n, bytes-n); + if(next <= 0) error("read failed"); + n += next; + } +} + +void write_all(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + size_t next = write(fd, buffer + n, bytes-n); + if(next <= 0) error("write failed"); + n += next; + } +} + + +char *copy_string(char *s) +{ + char *copy = malloc(strlen(s)+1); + strncpy(copy, s, strlen(s)+1); + return copy; +} + +list *parse_csv_line(char *line) +{ + list *l = make_list(); + char *c, *p; + int in = 0; + for(c = line, p = line; *c != '\0'; ++c){ + if(*c == '"') in = !in; + else if(*c == ',' && !in){ + *c = '\0'; + list_insert(l, copy_string(p)); + p = c+1; + } + } + list_insert(l, copy_string(p)); + return l; +} + +int count_fields(char *line) +{ + int count = 0; + int done = 0; + char *c; + for(c = line; !done; ++c){ + done = (*c == '\0'); + if(*c == ',' || done) ++count; + } + return count; +} + +float *parse_fields(char *line, int n) +{ + float *field = calloc(n, sizeof(float)); + char *c, *p, *end; + int count = 0; + int done = 0; + for(c = line, p = line; !done; ++c){ + done = (*c == '\0'); + if(*c == ',' || done){ + *c = '\0'; + field[count] = strtod(p, &end); + if(p == c) field[count] = nan(""); + if(end != c && (end != c-1 || *end != '\r')) field[count] = nan(""); //DOS file formats! + p = c+1; + ++count; + } + } + return field; +} + +float sum_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i) sum += a[i]; + return sum; +} + +float mean_array(float *a, int n) +{ + return sum_array(a,n)/n; +} + +void mean_arrays(float **a, int n, int els, float *avg) +{ + int i; + int j; + memset(avg, 0, els*sizeof(float)); + for(j = 0; j < n; ++j){ + for(i = 0; i < els; ++i){ + avg[i] += a[j][i]; + } + } + for(i = 0; i < els; ++i){ + avg[i] /= n; + } +} + +void print_statistics(float *a, int n) +{ + float m = mean_array(a, n); + float v = variance_array(a, n); + printf("MSE: %.6f, Mean: %.6f, Variance: %.6f\n", mse_array(a, n), m, v); +} + +float variance_array(float *a, int n) +{ + int i; + float sum = 0; + float mean = mean_array(a, n); + for(i = 0; i < n; ++i) sum += (a[i] - mean)*(a[i]-mean); + float variance = sum/n; + return variance; +} + +int constrain_int(int a, int min, int max) +{ + if (a < min) return min; + if (a > max) return max; + return a; +} + +float constrain(float min, float max, float a) +{ + if (a < min) return min; + if (a > max) return max; + return a; +} + +float dist_array(float *a, float *b, int n, int sub) +{ + int i; + float sum = 0; + for(i = 0; i < n; i += sub) sum += pow(a[i]-b[i], 2); + return sqrt(sum); +} + +float mse_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i) sum += a[i]*a[i]; + return sqrt(sum/n); +} + +void normalize_array(float *a, int n) +{ + int i; + float mu = mean_array(a,n); + float sigma = sqrt(variance_array(a,n)); + for(i = 0; i < n; ++i){ + a[i] = (a[i] - mu)/sigma; + } + mu = mean_array(a,n); + sigma = sqrt(variance_array(a,n)); +} + +void translate_array(float *a, int n, float s) +{ + int i; + for(i = 0; i < n; ++i){ + a[i] += s; + } +} + +float mag_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + sum += a[i]*a[i]; + } + return sqrt(sum); +} + +void scale_array(float *a, int n, float s) +{ + int i; + for(i = 0; i < n; ++i){ + a[i] *= s; + } +} + +int sample_array(float *a, int n) +{ + float sum = sum_array(a, n); + scale_array(a, n, 1./sum); + float r = rand_uniform(0, 1); + int i; + for(i = 0; i < n; ++i){ + r = r - a[i]; + if (r <= 0) return i; + } + return n-1; +} + +int max_int_index(int *a, int n) +{ + if(n <= 0) return -1; + int i, max_i = 0; + int max = a[0]; + for(i = 1; i < n; ++i){ + if(a[i] > max){ + max = a[i]; + max_i = i; + } + } + return max_i; +} + +int max_index(float *a, int n) +{ + if(n <= 0) return -1; + int i, max_i = 0; + float max = a[0]; + for(i = 1; i < n; ++i){ + if(a[i] > max){ + max = a[i]; + max_i = i; + } + } + return max_i; +} + +int int_index(int *a, int val, int n) +{ + int i; + for(i = 0; i < n; ++i){ + if(a[i] == val) return i; + } + return -1; +} + +int rand_int(int min, int max) +{ + if (max < min){ + int s = min; + min = max; + max = s; + } + int r = (rand()%(max - min + 1)) + min; + return r; +} + +// From http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform +float rand_normal() +{ + static int haveSpare = 0; + static double rand1, rand2; + + if(haveSpare) + { + haveSpare = 0; + return sqrt(rand1) * sin(rand2); + } + + haveSpare = 1; + + rand1 = rand() / ((double) RAND_MAX); + if(rand1 < 1e-100) rand1 = 1e-100; + rand1 = -2 * log(rand1); + rand2 = (rand() / ((double) RAND_MAX)) * TWO_PI; + + return sqrt(rand1) * cos(rand2); +} + +/* + float rand_normal() + { + int n = 12; + int i; + float sum= 0; + for(i = 0; i < n; ++i) sum += (float)rand()/RAND_MAX; + return sum-n/2.; + } + */ + +size_t rand_size_t() +{ + return ((size_t)(rand()&0xff) << 56) | + ((size_t)(rand()&0xff) << 48) | + ((size_t)(rand()&0xff) << 40) | + ((size_t)(rand()&0xff) << 32) | + ((size_t)(rand()&0xff) << 24) | + ((size_t)(rand()&0xff) << 16) | + ((size_t)(rand()&0xff) << 8) | + ((size_t)(rand()&0xff) << 0); +} + +float rand_uniform(float min, float max) +{ + if(max < min){ + float swap = min; + min = max; + max = swap; + } + return ((float)rand()/RAND_MAX * (max - min)) + min; +} + +float rand_scale(float s) +{ + float scale = rand_uniform(1, s); + if(rand()%2) return scale; + return 1./scale; +} + +float **one_hot_encode(float *a, int n, int k) +{ + int i; + float **t = calloc(n, sizeof(float*)); + for(i = 0; i < n; ++i){ + t[i] = calloc(k, sizeof(float)); + int index = (int)a[i]; + t[i][index] = 1; + } + return t; +} + diff --git a/workloads/realworld/standard/darknet/src/utils.h b/workloads/realworld/standard/darknet/src/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..ef24da79888612f5b48fbb4dc233c483590e0c34 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/utils.h @@ -0,0 +1,53 @@ +#ifndef UTILS_H +#define UTILS_H +#include +#include +#include "darknet.h" +#include "list.h" + +#define TIME(a) \ + do { \ + double start = what_time_is_it_now(); \ + a; \ + printf("%s took: %f seconds\n", #a, what_time_is_it_now() - start); \ + } while (0) + +#define TWO_PI 6.2831853071795864769252866f + +double what_time_is_it_now(); +void shuffle(void *arr, size_t n, size_t size); +void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections); +void free_ptrs(void **ptrs, int n); +int alphanum_to_int(char c); +char int_to_alphanum(int i); +int read_int(int fd); +void write_int(int fd, int n); +void read_all(int fd, char *buffer, size_t bytes); +void write_all(int fd, char *buffer, size_t bytes); +int read_all_fail(int fd, char *buffer, size_t bytes); +int write_all_fail(int fd, char *buffer, size_t bytes); +void find_replace(char *str, char *orig, char *rep, char *output); +void malloc_error(); +void file_error(char *s); +void strip(char *s); +void strip_char(char *s, char bad); +list *split_str(char *s, char delim); +char *fgetl(FILE *fp); +list *parse_csv_line(char *line); +char *copy_string(char *s); +int count_fields(char *line); +float *parse_fields(char *line, int n); +void translate_array(float *a, int n, float s); +float constrain(float min, float max, float a); +int constrain_int(int a, int min, int max); +float rand_scale(float s); +int rand_int(int min, int max); +void mean_arrays(float **a, int n, int els, float *avg); +float dist_array(float *a, float *b, int n, int sub); +float **one_hot_encode(float *a, int n, int k); +float sec(clock_t clocks); +void print_statistics(float *a, int n); +int int_index(int *a, int val, int n); + +#endif + diff --git a/workloads/realworld/standard/darknet/src/yolo_layer.c b/workloads/realworld/standard/darknet/src/yolo_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..049a4d6a92cf7fea667b8de2340822834408bb05 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/yolo_layer.c @@ -0,0 +1,374 @@ +#include "yolo_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes) +{ + int i; + layer l = {0}; + l.type = YOLO; + + l.n = n; + l.total = total; + l.batch = batch; + l.h = h; + l.w = w; + l.c = n*(classes + 4 + 1); + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.cost = calloc(1, sizeof(float)); + l.biases = calloc(total*2, sizeof(float)); + if(mask) l.mask = mask; + else{ + l.mask = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + l.mask[i] = i; + } + } + l.bias_updates = calloc(n*2, sizeof(float)); + l.outputs = h*w*n*(classes + 4 + 1); + l.inputs = l.outputs; + l.truths = 90*(4 + 1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + for(i = 0; i < total*2; ++i){ + l.biases[i] = .5; + } + + l.forward = forward_yolo_layer; + l.backward = backward_yolo_layer; +#ifdef GPU + l.forward_gpu = forward_yolo_layer_gpu; + l.backward_gpu = backward_yolo_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "yolo\n"); + srand(0); + + return l; +} + +void resize_yolo_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->n*(l->classes + 4 + 1); + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride) +{ + box b; + b.x = (i + x[index + 0*stride]) / lw; + b.y = (j + x[index + 1*stride]) / lh; + b.w = exp(x[index + 2*stride]) * biases[2*n] / w; + b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h; + return b; +} + +float delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride) +{ + box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride); + float iou = box_iou(pred, truth); + + float tx = (truth.x*lw - i); + float ty = (truth.y*lh - j); + float tw = log(truth.w*w / biases[2*n]); + float th = log(truth.h*h / biases[2*n + 1]); + + delta[index + 0*stride] = scale * (tx - x[index + 0*stride]); + delta[index + 1*stride] = scale * (ty - x[index + 1*stride]); + delta[index + 2*stride] = scale * (tw - x[index + 2*stride]); + delta[index + 3*stride] = scale * (th - x[index + 3*stride]); + return iou; +} + + +void delta_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat) +{ + int n; + if (delta[index]){ + delta[index + stride*class] = 1 - output[index + stride*class]; + if(avg_cat) *avg_cat += output[index + stride*class]; + return; + } + for(n = 0; n < classes; ++n){ + delta[index + stride*n] = ((n == class)?1 : 0) - output[index + stride*n]; + if(n == class && avg_cat) *avg_cat += output[index + stride*n]; + } +} + +static int entry_index(layer l, int batch, int location, int entry) +{ + int n = location / (l.w*l.h); + int loc = location % (l.w*l.h); + return batch*l.outputs + n*l.w*l.h*(4+l.classes+1) + entry*l.w*l.h + loc; +} + +void forward_yolo_layer(const layer l, network net) +{ + int i,j,b,t,n; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array(l.output + index, (1+l.classes)*l.w*l.h, LOGISTIC); + } + } +#endif + + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + if(!net.train) return; + float avg_iou = 0; + float recall = 0; + float recall75 = 0; + float avg_cat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + int class_count = 0; + *(l.cost) = 0; + for (b = 0; b < l.batch; ++b) { + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.w*l.h); + float best_iou = 0; + int best_t = 0; + for(t = 0; t < l.max_boxes; ++t){ + box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1); + if(!truth.x) break; + float iou = box_iou(pred, truth); + if (iou > best_iou) { + best_iou = iou; + best_t = t; + } + } + int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4); + avg_anyobj += l.output[obj_index]; + l.delta[obj_index] = 0 - l.output[obj_index]; + if (best_iou > l.ignore_thresh) { + l.delta[obj_index] = 0; + } + if (best_iou > l.truth_thresh) { + l.delta[obj_index] = 1 - l.output[obj_index]; + + int class = net.truth[best_t*(4 + 1) + b*l.truths + 4]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); + delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, 0); + box truth = float_to_box(net.truth + best_t*(4 + 1) + b*l.truths, 1); + delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + } + } + } + } + for(t = 0; t < l.max_boxes; ++t){ + box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1); + + if(!truth.x) break; + float best_iou = 0; + int best_n = 0; + i = (truth.x * l.w); + j = (truth.y * l.h); + box truth_shift = truth; + truth_shift.x = truth_shift.y = 0; + for(n = 0; n < l.total; ++n){ + box pred = {0}; + pred.w = l.biases[2*n]/net.w; + pred.h = l.biases[2*n+1]/net.h; + float iou = box_iou(pred, truth_shift); + if (iou > best_iou){ + best_iou = iou; + best_n = n; + } + } + + int mask_n = int_index(l.mask, best_n, l.n); + if(mask_n >= 0){ + int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); + float iou = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + + int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4); + avg_obj += l.output[obj_index]; + l.delta[obj_index] = 1 - l.output[obj_index]; + + int class = net.truth[t*(4 + 1) + b*l.truths + 4]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4 + 1); + delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, &avg_cat); + + ++count; + ++class_count; + if(iou > .5) recall += 1; + if(iou > .75) recall75 += 1; + avg_iou += iou; + } + } + } + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", net.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count); +} + +void backward_yolo_layer(const layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +{ + int i; + int new_w=0; + int new_h=0; + if (((float)netw/w) < ((float)neth/h)) { + new_w = netw; + new_h = (h * netw)/w; + } else { + new_h = neth; + new_w = (w * neth)/h; + } + for (i = 0; i < n; ++i){ + box b = dets[i].bbox; + b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); + b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); + b.w *= (float)netw/new_w; + b.h *= (float)neth/new_h; + if(!relative){ + b.x *= w; + b.w *= w; + b.y *= h; + b.h *= h; + } + dets[i].bbox = b; + } +} + +int yolo_num_detections(layer l, float thresh) +{ + int i, n; + int count = 0; + for (i = 0; i < l.w*l.h; ++i){ + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); + if(l.output[obj_index] > thresh){ + ++count; + } + } + } + return count; +} + +void avg_flipped_yolo(layer l) +{ + int i,j,n,z; + float *flip = l.output + l.outputs; + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w/2; ++i) { + for (n = 0; n < l.n; ++n) { + for(z = 0; z < l.classes + 4 + 1; ++z){ + int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; + int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); + float swap = flip[i1]; + flip[i1] = flip[i2]; + flip[i2] = swap; + if(z == 0){ + flip[i1] = -flip[i1]; + flip[i2] = -flip[i2]; + } + } + } + } + } + for(i = 0; i < l.outputs; ++i){ + l.output[i] = (l.output[i] + flip[i])/2.; + } +} + +int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets) +{ + int i,j,n; + float *predictions = l.output; + if (l.batch == 2) avg_flipped_yolo(l); + int count = 0; + for (i = 0; i < l.w*l.h; ++i){ + int row = i / l.w; + int col = i % l.w; + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); + float objectness = predictions[obj_index]; + if(objectness <= thresh) continue; + int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); + dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); + dets[count].objectness = objectness; + dets[count].classes = l.classes; + for(j = 0; j < l.classes; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j); + float prob = objectness*predictions[class_index]; + dets[count].prob[j] = (prob > thresh) ? prob : 0; + } + ++count; + } + } + correct_yolo_boxes(dets, count, w, h, netw, neth, relative); + return count; +} + +#ifdef GPU + +void forward_yolo_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array_gpu(l.output_gpu + index, (1+l.classes)*l.w*l.h, LOGISTIC); + } + } + if(!net.train || l.onlyforward){ + cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); + return; + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_yolo_layer(l, net); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_yolo_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/standard/darknet/src/yolo_layer.h b/workloads/realworld/standard/darknet/src/yolo_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..d2a0243268146e00ebff2b4b11bce23f830689d1 --- /dev/null +++ b/workloads/realworld/standard/darknet/src/yolo_layer.h @@ -0,0 +1,19 @@ +#ifndef YOLO_LAYER_H +#define YOLO_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes); +void forward_yolo_layer(const layer l, network net); +void backward_yolo_layer(const layer l, network net); +void resize_yolo_layer(layer *l, int w, int h); +int yolo_num_detections(layer l, float thresh); + +#ifdef GPU +void forward_yolo_layer_gpu(const layer l, network net); +void backward_yolo_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/standard/darknet/yolov3-tiny/predictions.jpg b/workloads/realworld/standard/darknet/yolov3-tiny/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e76dabc8b31ad049905fe65ca8aeee298ae22f2e Binary files /dev/null and b/workloads/realworld/standard/darknet/yolov3-tiny/predictions.jpg differ diff --git a/workloads/realworld/standard/darknet/yolov3-tiny/run_super.sh b/workloads/realworld/standard/darknet/yolov3-tiny/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..d890a7c581dc59167fab1b93778d143fc7e679a9 --- /dev/null +++ b/workloads/realworld/standard/darknet/yolov3-tiny/run_super.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg diff --git a/workloads/realworld/standard/darknet/yolov3-tiny/run_yolov3-tiny.sh b/workloads/realworld/standard/darknet/yolov3-tiny/run_yolov3-tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..d890a7c581dc59167fab1b93778d143fc7e679a9 --- /dev/null +++ b/workloads/realworld/standard/darknet/yolov3-tiny/run_yolov3-tiny.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg diff --git a/workloads/realworld/standard/darknet/yolov3-tiny_b/run_super.sh b/workloads/realworld/standard/darknet/yolov3-tiny_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..31669e62fb94142e7dc24b3f905c8f1d25950367 --- /dev/null +++ b/workloads/realworld/standard/darknet/yolov3-tiny_b/run_super.sh @@ -0,0 +1,2 @@ +#../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg +../darknet detector infer ../cfg/coco.data ../cfg/yolov3-tiny_b.cfg diff --git a/workloads/realworld/standard/darknet/yolov3-tiny_t/run_super.sh b/workloads/realworld/standard/darknet/yolov3-tiny_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..6cc56bc601476fa212f615b5aec964f12e044473 --- /dev/null +++ b/workloads/realworld/standard/darknet/yolov3-tiny_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg +../darknet detector train ../cfg/coco.data ../cfg/yolov3-tiny_t.cfg diff --git a/workloads/realworld/standard/darknet/yolov3/predictions.jpg b/workloads/realworld/standard/darknet/yolov3/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..49c1abe30cdcdceadad4353da30ce5660c96be1a Binary files /dev/null and b/workloads/realworld/standard/darknet/yolov3/predictions.jpg differ diff --git a/workloads/realworld/standard/darknet/yolov3/run_super.sh b/workloads/realworld/standard/darknet/yolov3/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..440a4b456a80a434243dffa9614d5a501790990f --- /dev/null +++ b/workloads/realworld/standard/darknet/yolov3/run_super.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg diff --git a/workloads/realworld/standard/darknet/yolov3/run_yolov3.sh b/workloads/realworld/standard/darknet/yolov3/run_yolov3.sh new file mode 100755 index 0000000000000000000000000000000000000000..440a4b456a80a434243dffa9614d5a501790990f --- /dev/null +++ b/workloads/realworld/standard/darknet/yolov3/run_yolov3.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg diff --git a/workloads/realworld/standard/darknet/yolov3_b/run_super.sh b/workloads/realworld/standard/darknet/yolov3_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..913790c1ee53a0d442a89306fbd8bda93faa2581 --- /dev/null +++ b/workloads/realworld/standard/darknet/yolov3_b/run_super.sh @@ -0,0 +1,2 @@ +#../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg +../darknet detector infer ../cfg/coco.data ../cfg/yolov3_b.cfg diff --git a/workloads/realworld/standard/darknet/yolov3_t/run_super.sh b/workloads/realworld/standard/darknet/yolov3_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ee7df07c1a4172f25c1129d8027095d2e3861e28 --- /dev/null +++ b/workloads/realworld/standard/darknet/yolov3_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg +../darknet detector train ../cfg/coco.data ../cfg/yolov3_t.cfg diff --git a/workloads/realworld/standard/hotspot/Makefile b/workloads/realworld/standard/hotspot/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..793c7c13d91b4f492c5df5801f2a9ddadf023470 --- /dev/null +++ b/workloads/realworld/standard/hotspot/Makefile @@ -0,0 +1,24 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include + +SRC = hotspot.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = hotspot + +release: $(SRC) + $(CC) $(KERNEL_DIM) $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +enum: $(SRC) + $(CC) $(KERNEL_DIM) -deviceemu $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +debug: $(SRC) + $(CC) $(KERNEL_DIM) -g $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +debugenum: $(SRC) + $(CC) $(KERNEL_DIM) -g -deviceemu $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt diff --git a/workloads/realworld/standard/hotspot/Makefile_nvidia b/workloads/realworld/standard/hotspot/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..1f7ae25a90c968563e96208c693b927b6765490a --- /dev/null +++ b/workloads/realworld/standard/hotspot/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := hotspot +# CUDA source files (compiled with cudacc) +CUFILES := hotspot.cu +# CUDA dependency files +# CU_DEPS := needle_kernel.cu +# C/C++ source files (compiled with gcc / c++) +# CCFILES := + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/standard/hotspot/README b/workloads/realworld/standard/hotspot/README new file mode 100644 index 0000000000000000000000000000000000000000..f24239abebe938fe3e8d3c1a7a97f915bd09a90b --- /dev/null +++ b/workloads/realworld/standard/hotspot/README @@ -0,0 +1,8 @@ +******Adjustable work group size***** +The kernel has square shape +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe one dimension +The actually dimension = RD_WG_SIZE_0 * RD_WG_SIZE_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" \ No newline at end of file diff --git a/workloads/realworld/standard/hotspot/hotspot.cu b/workloads/realworld/standard/hotspot/hotspot.cu new file mode 100644 index 0000000000000000000000000000000000000000..b68b5d0a2aad18cedf5808ec496e10b9ae068fe3 --- /dev/null +++ b/workloads/realworld/standard/hotspot/hotspot.cu @@ -0,0 +1,386 @@ +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#ifdef RD_WG_SIZE_0_0 +#define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) +#define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) +#define BLOCK_SIZE RD_WG_SIZE +#else +#define BLOCK_SIZE 16 +#endif + +#define STR_SIZE 256 + +/* maximum power density possible (say 300W for a 10mm x 10mm chip) */ +#define MAX_PD (3.0e6) +/* required precision in degrees */ +#define PRECISION 0.001 +#define SPEC_HEAT_SI 1.75e6 +#define K_SI 100 +/* capacitance fitting factor */ +#define FACTOR_CHIP 0.5 + +/* chip parameters */ +float t_chip = 0.0005; +float chip_height = 0.016; +float chip_width = 0.016; +/* ambient temperature, assuming no package at all */ +float amb_temp = 80.0; + +void run(int argc, char **argv); + +/* define timer macros */ +#define pin_stats_reset() startCycle() +#define pin_stats_pause(cycles) stopCycle(cycles) +#define pin_stats_dump(cycles) printf("timer: %Lu\n", cycles) + +void fatal(char *s) +{ + fprintf(stderr, "error: %s\n", s); +} + +void writeoutput(float *vect, int grid_rows, int grid_cols, char *file) +{ + + int i, j, index = 0; + FILE *fp; + char str[STR_SIZE]; + + if ((fp = fopen(file, "w")) == 0) + printf("The file was not opened\n"); + + for (i = 0; i < grid_rows; i++) + for (j = 0; j < grid_cols; j++) + { + + sprintf(str, "%d\t%g\n", index, vect[i * grid_cols + j]); + fputs(str, fp); + index++; + } + + fclose(fp); +} + +void readinput(float *vect, int grid_rows, int grid_cols, char *file) +{ + + int i, j; + FILE *fp; + char str[STR_SIZE]; + float val; + + if ((fp = fopen(file, "r")) == 0) + printf("The file was not opened\n"); + + for (i = 0; i <= grid_rows - 1; i++) + for (j = 0; j <= grid_cols - 1; j++) + { + fgets(str, STR_SIZE, fp); + if (feof(fp)) + fatal("not enough lines in file"); + // if ((sscanf(str, "%d%f", &index, &val) != 2) || (index != ((i-1)*(grid_cols-2)+j-1))) + if ((sscanf(str, "%f", &val) != 1)) + fatal("invalid file format"); + vect[i * grid_cols + j] = val; + } + + fclose(fp); +} + +#define IN_RANGE(x, min, max) ((x) >= (min) && (x) <= (max)) +#define CLAMP_RANGE(x, min, max) x = (x < (min)) ? min : ((x > (max)) ? max : x) +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +__global__ void calculate_temp(int iteration, // number of iteration + float *power, // power input + float *temp_src, // temperature input/output + float *temp_dst, // temperature input/output + int grid_cols, // Col of grid + int grid_rows, // Row of grid + int border_cols, // border offset + int border_rows, // border offset + float Cap, // Capacitance + float Rx, + float Ry, + float Rz, + float step, + int batch_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + __shared__ float temp_on_cuda[BLOCK_SIZE][BLOCK_SIZE]; + __shared__ float power_on_cuda[BLOCK_SIZE][BLOCK_SIZE]; + __shared__ float temp_t[BLOCK_SIZE][BLOCK_SIZE]; // saving temparary temperature result + + float amb_temp = 80.0; + float step_div_Cap; + float Rx_1, Ry_1, Rz_1; + + // int bx = blockIdx.x; + // int by = blockIdx.y; + + int tx = threadIdx.x; + int ty = threadIdx.y; + + step_div_Cap = step / Cap; + + Rx_1 = 1 / Rx; + Ry_1 = 1 / Ry; + Rz_1 = 1 / Rz; + + // each block finally computes result for a small block + // after N iterations. + // it is the non-overlapping small blocks that cover + // all the input data + + // calculate the small block size + int small_block_rows = BLOCK_SIZE - iteration * 2; // EXPAND_RATE + int small_block_cols = BLOCK_SIZE - iteration * 2; // EXPAND_RATE + + // if (bx == 0 && by == 0 && tx == 0 && ty == 0) + // printf("iteration is %d, small_block_rows is %d\n", iteration, small_block_rows); + + int tile_dim_x = gridDim.x * batch_size; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = batch_size * batch_size; + int tiles_this_block_x = batch_size; + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + // block id + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + // calculate the boundary for the block according to + // the boundary of its small block + int blkY = small_block_rows * by - border_rows; + int blkX = small_block_cols * bx - border_cols; + int blkYmax = blkY + BLOCK_SIZE - 1; + int blkXmax = blkX + BLOCK_SIZE - 1; + + // calculate the global thread coordination + int yidx = blkY + ty; + int xidx = blkX + tx; + + // load data if it is within the valid input range + int loadYidx = yidx, loadXidx = xidx; + int index = grid_cols * loadYidx + loadXidx; + + if (IN_RANGE(loadYidx, 0, grid_rows - 1) && IN_RANGE(loadXidx, 0, grid_cols - 1)) + { + temp_on_cuda[ty][tx] = temp_src[index]; // Load the temperature data from global memory to shared memory + power_on_cuda[ty][tx] = power[index]; // Load the power data from global memory to shared memory + } + block.sync(); + + // effective range within this block that falls within + // the valid range of the input data + // used to rule out computation outside the boundary. + int validYmin = (blkY < 0) ? -blkY : 0; + int validYmax = (blkYmax > grid_rows - 1) ? BLOCK_SIZE - 1 - (blkYmax - grid_rows + 1) : BLOCK_SIZE - 1; + int validXmin = (blkX < 0) ? -blkX : 0; + int validXmax = (blkXmax > grid_cols - 1) ? BLOCK_SIZE - 1 - (blkXmax - grid_cols + 1) : BLOCK_SIZE - 1; + + int N = ty - 1; + int S = ty + 1; + int W = tx - 1; + int E = tx + 1; + + N = (N < validYmin) ? validYmin : N; + S = (S > validYmax) ? validYmax : S; + W = (W < validXmin) ? validXmin : W; + E = (E > validXmax) ? validXmax : E; + + bool computed; + for (int i = 0; i < iteration; i++) + { + computed = false; + if (IN_RANGE(tx, i + 1, BLOCK_SIZE - i - 2) && + IN_RANGE(ty, i + 1, BLOCK_SIZE - i - 2) && + IN_RANGE(tx, validXmin, validXmax) && + IN_RANGE(ty, validYmin, validYmax)) + { + computed = true; + temp_t[ty][tx] = temp_on_cuda[ty][tx] + step_div_Cap * (power_on_cuda[ty][tx] + + (temp_on_cuda[S][tx] + temp_on_cuda[N][tx] - 2.0 * temp_on_cuda[ty][tx]) * Ry_1 + + (temp_on_cuda[ty][E] + temp_on_cuda[ty][W] - 2.0 * temp_on_cuda[ty][tx]) * Rx_1 + + (amb_temp - temp_on_cuda[ty][tx]) * Rz_1); + } + block.sync(); + if (i == iteration - 1) + break; + if (computed) // Assign the computation range + temp_on_cuda[ty][tx] = temp_t[ty][tx]; + block.sync(); + } + + // update the global memory + // after the last iteration, only threads coordinated within the + // small block perform the calculation and switch on ``computed'' + if (computed) + { + temp_dst[index] = temp_t[ty][tx]; + } + } +} + +/* + compute N time steps +*/ + +int compute_tran_temp(float *MatrixPower, float *MatrixTemp[2], int col, int row, + int total_iterations, int num_iterations, int blockCols, int blockRows, int borderCols, int borderRows, int batch_size) +{ + dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE); + dim3 dimGrid(blockCols, blockRows); + + float grid_height = chip_height / row; + float grid_width = chip_width / col; + + float Cap = FACTOR_CHIP * SPEC_HEAT_SI * t_chip * grid_width * grid_height; + float Rx = grid_width / (2.0 * K_SI * t_chip * grid_height); + float Ry = grid_height / (2.0 * K_SI * t_chip * grid_width); + float Rz = t_chip / (K_SI * grid_height * grid_width); + + float max_slope = MAX_PD / (FACTOR_CHIP * t_chip * SPEC_HEAT_SI); + float step = PRECISION / max_slope; + float t; + + int src = 1, dst = 0; + + for (t = 0; t < total_iterations; t += num_iterations) + { + int temp = src; + src = dst; + dst = temp; + calculate_temp<<>>(MIN(num_iterations, total_iterations - t), MatrixPower, MatrixTemp[src], MatrixTemp[dst], + col, row, borderCols, borderRows, Cap, Rx, Ry, Rz, step, batch_size); + } + return dst; +} + +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - number of rows/cols in the grid (positive integer)\n"); + fprintf(stderr, "\t - pyramid heigh(positive integer)\n"); + fprintf(stderr, "\t - number of iterations\n"); + fprintf(stderr, "\t - name of the file containing the initial temperature values of each cell\n"); + fprintf(stderr, "\t - name of the file containing the dissipated power values of each cell\n"); + fprintf(stderr, "\t - name of the output file\n"); + fprintf(stderr, "\t - batch_size * batch_size per block\n"); + exit(1); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE); + + run(argc, argv); + + return EXIT_SUCCESS; +} + +void run(int argc, char **argv) +{ + int size; + int grid_rows, grid_cols; + float *FilesavingTemp, *FilesavingPower, *MatrixOut; + char *tfile, *pfile, *ofile; + + int total_iterations = 60; + int pyramid_height = 1; // number of iterations + + if (argc != 8) + usage(argc, argv); + if ((grid_rows = atoi(argv[1])) <= 0 || + (grid_cols = atoi(argv[1])) <= 0 || + (pyramid_height = atoi(argv[2])) <= 0 || + (total_iterations = atoi(argv[3])) <= 0) + usage(argc, argv); + + tfile = argv[4]; + pfile = argv[5]; + ofile = argv[6]; + + int batch_size = atoi(argv[7]); + + size = grid_rows * grid_cols; + +/* --------------- pyramid parameters --------------- */ +#define EXPAND_RATE 2 // add one iteration will extend the pyramid base by 2 per each borderline + int borderCols = (pyramid_height)*EXPAND_RATE / 2; + int borderRows = (pyramid_height)*EXPAND_RATE / 2; + int smallBlockCol = BLOCK_SIZE - (pyramid_height)*EXPAND_RATE; + int smallBlockRow = BLOCK_SIZE - (pyramid_height)*EXPAND_RATE; + // int blockCols = grid_cols / smallBlockCol + ((grid_cols % smallBlockCol == 0) ? 0 : 1); + // int blockRows = grid_rows / smallBlockRow + ((grid_rows % smallBlockRow == 0) ? 0 : 1); + + int blockCols = (grid_cols + smallBlockCol * batch_size - 1) / (smallBlockCol * batch_size); + int blockRows = (grid_rows + smallBlockRow * batch_size - 1) / (smallBlockRow * batch_size); + + // printf("borderCols is %d, smallBlockCol is %d, blockCols is %d, grid_cols is %d \n", borderCols, smallBlockCol, blockCols, grid_cols); + + FilesavingTemp = (float *)malloc(size * sizeof(float)); + FilesavingPower = (float *)malloc(size * sizeof(float)); + MatrixOut = (float *)calloc(size, sizeof(float)); + + if (!FilesavingPower || !FilesavingTemp || !MatrixOut) + fatal("unable to allocate memory"); + + printf("pyramidHeight: %d\ngridSize: [%d, %d]\nborder:[%d, %d]\nblockGrid:[%d, %d]\ntargetBlock:[%d, %d]\n", + pyramid_height, grid_cols, grid_rows, borderCols, borderRows, blockCols, blockRows, smallBlockCol, smallBlockRow); + + readinput(FilesavingTemp, grid_rows, grid_cols, tfile); + readinput(FilesavingPower, grid_rows, grid_cols, pfile); + + GPU_argv_init(); + + initTrace(); + startCPU(); + float *MatrixTemp[2], *MatrixPower; + cudaMalloc((void **)&MatrixTemp[0], sizeof(float) * size); + cudaMalloc((void **)&MatrixTemp[1], sizeof(float) * size); + cudaMemcpy(MatrixTemp[0], FilesavingTemp, sizeof(float) * size, cudaMemcpyHostToDevice); + + cudaMalloc((void **)&MatrixPower, sizeof(float) * size); + cudaMemcpy(MatrixPower, FilesavingPower, sizeof(float) * size, cudaMemcpyHostToDevice); + // printf("Start computing the transient temperature\n"); + int ret = compute_tran_temp(MatrixPower, MatrixTemp, grid_cols, grid_rows, + total_iterations, pyramid_height, blockCols, blockRows, borderCols, borderRows, batch_size); + // printf("Ending simulation\n"); + cudaMemcpy(MatrixOut, MatrixTemp[ret], sizeof(float) * size, cudaMemcpyDeviceToHost); + + cudaFree(MatrixPower); + cudaFree(MatrixTemp[0]); + cudaFree(MatrixTemp[1]); + + endCPU(); + finiTrace(); + + writeoutput(MatrixOut, grid_rows, grid_cols, ofile); + free(MatrixOut); +} diff --git a/workloads/realworld/standard/hotspot/run.sh b/workloads/realworld/standard/hotspot/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..66b814725286fb6699d2b893740518ad43dc307a --- /dev/null +++ b/workloads/realworld/standard/hotspot/run.sh @@ -0,0 +1 @@ +./hotspot 512 2 2 ../../../../data/hotspot/temp_512 ../../../../data/hotspot/power_512 output.out 4 diff --git a/workloads/realworld/standard/hotspot/run_super.sh b/workloads/realworld/standard/hotspot/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ad31b9dcd0ce3e1d53df5aaf445082ab86cf2366 --- /dev/null +++ b/workloads/realworld/standard/hotspot/run_super.sh @@ -0,0 +1 @@ +./hotspot 8192 2 2 ../../../../data/hotspot/temp_8192.txt ../../../../data/hotspot/power_8192.txt output.out 8 diff --git a/workloads/realworld/standard/kmeans/Makefile b/workloads/realworld/standard/kmeans/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..e473b45be17e5805399c15b04dda57742451d913 --- /dev/null +++ b/workloads/realworld/standard/kmeans/Makefile @@ -0,0 +1,33 @@ +include ../../../common/make.config + +# C compiler +CC = gcc +CC_FLAGS = -g -fopenmp -O2 + +# CUDA compiler +NVCC = $(CUDA_DIR)/bin/nvcc +NVCC_FLAGS = -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) + +# 'make dbg=1' enables NVCC debugging + + +# 'make emu=1' compiles the CUDA kernels for emulation +ifeq ($(emu),1) + NVCC_FLAGS += -deviceemu +endif + + +kmeans: cluster.o getopt.o kmeans.o kmeans_clustering.o kmeans_cuda.o rmse.o $(CUPTI_ADD_COMMON)/cpu_timestamps.o + $(CC) $(CC_FLAGS) cluster.o getopt.o kmeans.o kmeans_clustering.o kmeans_cuda.o rmse.o $(CUPTI_ADD_COMMON)/cpu_timestamps.o $(CUPTI_ADD_COMMON)/cupti_add.cpp -o kmeans $(NVCC_FLAGS) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +kmeans.o: kmeans.c + $(CC) $(CC_FLAGS) $< -c $(NVCC_FLAGS) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +%.o: %.[ch] + $(CC) $(CC_FLAGS) $< -c -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lstdc++ + +kmeans_cuda.o: kmeans_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp + $(NVCC) -O2 -c kmeans_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(NVCC_FLAGS) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +clean: + rm -f *.o *~ kmeans kmeans_cuda.linkinfo diff --git a/workloads/realworld/standard/kmeans/Makefile_nvidia b/workloads/realworld/standard/kmeans/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..5d612b9dc8b5c19310162e1f721e1d2d4e33fb72 --- /dev/null +++ b/workloads/realworld/standard/kmeans/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := kmeans +# CUDA source files (compiled with cudacc) +CUFILES := kmeans_cuda.cu +# CUDA dependency files +CU_DEPS := kmeans_cuda_kernel.cu +# C/C++ source files (compiled with gcc / c++) +CFILES := cluster.c getopt.c kmeans.c kmeans_clustering.c rmse.c + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/standard/kmeans/README b/workloads/realworld/standard/kmeans/README new file mode 100755 index 0000000000000000000000000000000000000000..bebae52d716986889b30a435214e095346424190 --- /dev/null +++ b/workloads/realworld/standard/kmeans/README @@ -0,0 +1,10 @@ +Usage: ./kmeans [switches] -i filename + + -i filename :file containing data to be clustered + -m max_nclusters :maximum number of clusters allowed [default=5] + -n min_nclusters :minimum number of clusters allowed [default=5] + -t threshold :threshold value [default=0.001] + -l nloops :iteration for each number of clusters [default=1] + -b :input file is in binary format + -r :calculate RMSE [default=off] + -o :output cluster center coordinates [default=off] \ No newline at end of file diff --git a/workloads/realworld/standard/kmeans/cluster.c b/workloads/realworld/standard/kmeans/cluster.c new file mode 100755 index 0000000000000000000000000000000000000000..1dfba11084300cdc4355f08e35722ae725cd6ce5 --- /dev/null +++ b/workloads/realworld/standard/kmeans/cluster.c @@ -0,0 +1,160 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: cluster.c **/ +/** Description: Takes as input a file, containing 1 data point per **/ +/** per line, and performs a fuzzy c-means clustering **/ +/** on the data. Fuzzy clustering is performed using **/ +/** min to max clusters and the clustering that gets **/ +/** the best score according to a compactness and **/ +/** separation criterion are returned. **/ +/** Author: Brendan McCane **/ +/** James Cook University of North Queensland. **/ +/** Australia. email: mccane@cs.jcu.edu.au **/ +/** **/ +/** Edited by: Jay Pisharath, Wei-keng Liao **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "kmeans.h" + +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +extern double wtime(void); +float min_rmse_ref = FLT_MAX; /* reference min_rmse value */ + +/*---< cluster() >-----------------------------------------------------------*/ +int cluster(int npoints, /* number of data points */ + int nfeatures, /* number of attributes for each point */ + float **features, /* array: [npoints][nfeatures] */ + int min_nclusters, /* range of min to max number of clusters */ + int max_nclusters, + float threshold, /* loop terminating factor */ + int *best_nclusters, /* out: number between min and max with lowest RMSE */ + float ***cluster_centres, /* out: [best_nclusters][nfeatures] */ + float *min_rmse, /* out: minimum RMSE */ + int isRMSE, /* calculate RMSE */ + int nloops /* number of iteration for each number of clusters */ + ) +{ + + //initTrace(); + + int nclusters; /* number of clusters k */ + int index =0; /* number of iteration to reach the best RMSE */ + int rmse; /* RMSE for each clustering */ + int *membership; /* which cluster a data point belongs to */ + float **tmp_cluster_centres; /* hold coordinates of cluster centers */ + int i; + + /* allocate memory for membership */ + membership = (int*) malloc(npoints * sizeof(int)); + + /* sweep k from min to max_nclusters to find the best number of clusters */ + for(nclusters = min_nclusters; nclusters <= max_nclusters; nclusters++) + { + if (nclusters > npoints) break; /* cannot have more clusters than points */ + + /* allocate device memory, invert data array (@ kmeans_cuda.cu) */ + allocateMemory(npoints, nfeatures, nclusters, features); + + /* iterate nloops times for each number of clusters */ + for(i = 0; i < nloops; i++) + { + /* initialize initial cluster centers, CUDA calls (@ kmeans_cuda.cu) */ + tmp_cluster_centres = kmeans_clustering(features, + nfeatures, + npoints, + nclusters, + threshold, + membership); + + if (*cluster_centres) { + free((*cluster_centres)[0]); + free(*cluster_centres); + } + *cluster_centres = tmp_cluster_centres; + + /* find the number of clusters with the best RMSE */ + if(isRMSE) + { + rmse = rms_err(features, + nfeatures, + npoints, + tmp_cluster_centres, + nclusters); + + if(rmse < min_rmse_ref){ + min_rmse_ref = rmse; //update reference min RMSE + *min_rmse = min_rmse_ref; //update return min RMSE + *best_nclusters = nclusters; //update optimum number of clusters + index = i; //update number of iteration to reach best RMSE + } + } + } + deallocateMemory(); /* free device memory (@ kmeans_cuda.cu) */ + } + + free(membership); + + return index; +} + diff --git a/workloads/realworld/standard/kmeans/cp.sh b/workloads/realworld/standard/kmeans/cp.sh new file mode 100755 index 0000000000000000000000000000000000000000..243885047459789eb8eae5d6c5b2b4822eb3aabf --- /dev/null +++ b/workloads/realworld/standard/kmeans/cp.sh @@ -0,0 +1,27 @@ +cp super_0.log super_3.log +cp super_0.log super_4.log +cp super_0.log super_5.log +cp super_0.log super_6.log +cp super_0.log super_7.log +cp super_0.log super_8.log +cp super_0.log super_9.log +cp super_0.log super_10.log +cp super_0.log super_11.log +cp super_0.log super_12.log +cp super_0.log super_13.log +cp super_0.log super_14.log +cp super_0.log super_15.log +cp super_0.log super_17.log +cp super_0.log super_17.log +cp super_0.log super_18.log +cp super_0.log super_19.log +cp super_0.log super_20.log +cp super_0.log super_21.log +cp super_0.log super_22.log +cp super_0.log super_23.log +cp super_0.log super_24.log +cp super_0.log super_25.log +cp super_0.log super_26.log +cp super_0.log super_27.log +cp super_0.log super_28.log +cp super_0.log super_29.log diff --git a/workloads/realworld/standard/kmeans/getopt.c b/workloads/realworld/standard/kmeans/getopt.c new file mode 100755 index 0000000000000000000000000000000000000000..fa2f31378fb2978f65267ba2e810aae3ff1ee016 --- /dev/null +++ b/workloads/realworld/standard/kmeans/getopt.c @@ -0,0 +1,1184 @@ +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to drepper@gnu.org + before changing it! + Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This tells Alpha OSF/1 not to define a getopt prototype in . + Ditto for AIX 3.2 and . */ +#ifndef _NO_PROTO +# define _NO_PROTO +#endif + +#ifdef HAVE_CONFIG_H +# include +#endif + +#if !defined __STDC__ || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +# ifndef const +# define const +# endif +#endif + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +# include +# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +# define ELIDE_CODE +# endif +#endif + +#ifndef ELIDE_CODE + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +/* Don't include stdlib.h for non-GNU C libraries because some of them + contain conflicting prototypes for getopt. */ +# include +# include +#endif /* GNU C library. */ + +#ifdef VMS +# include +# if HAVE_STRING_H - 0 +# include +# endif +#endif + +#ifndef _ +/* This is for other GNU distributions with internationalized messages. */ +# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include +# ifndef _ +# define _(msgid) gettext (msgid) +# endif +# else +# define _(msgid) (msgid) +# endif +# if defined _LIBC && defined USE_IN_LIBIO +# include +# endif +#endif + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* 1003.2 says this must be 1 before any call. */ +int optind = 1; + +/* Formerly, initialization of getopt depended on optind==0, which + causes problems with re-calling getopt as programs generally don't + know that. */ + +int __getopt_initialized; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return -1 with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +/* Value of POSIXLY_CORRECT environment variable. */ +static char *posixly_correct; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +# include +# define my_index strchr +#else + +//# if HAVE_STRING_H || WIN32 /* Pete Wilson mod 7/28/02 */ +# include +//# else +//# include +//# endif + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +#ifndef getenv +extern char *getenv (); +#endif + +static char * +my_index (str, chr) + const char *str; + int chr; +{ + while (*str) + { + if (*str == chr) + return (char *) str; + str++; + } + return 0; +} + +/* If using GCC, we can safely declare strlen this way. + If not using GCC, it is ok not to declare it. */ +#ifdef __GNUC__ +/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. + That was relevant to code that was here before. */ +# if (!defined __STDC__ || !__STDC__) && !defined strlen +/* gcc with -traditional declares the built-in strlen to return int, + and has done so at least since version 2.4.5. -- rms. */ +extern int strlen (const char *); +# endif /* not __STDC__ */ +#endif /* __GNUC__ */ + +#endif /* not __GNU_LIBRARY__ */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +#ifdef _LIBC +/* Stored original parameters. + XXX This is no good solution. We should rather copy the args so + that we can compare them later. But we must not use malloc(3). */ +extern int __libc_argc; +extern char **__libc_argv; + +/* Bash 2.0 gives us an environment variable containing flags + indicating ARGV elements that should not be considered arguments. */ + +# ifdef USE_NONOPTION_FLAGS +/* Defined in getopt_init.c */ +extern char *__getopt_nonoption_flags; + +static int nonoption_flags_max_len; +static int nonoption_flags_len; +# endif + +# ifdef USE_NONOPTION_FLAGS +# define SWAP_FLAGS(ch1, ch2) \ + if (nonoption_flags_len > 0) \ + { \ + char __tmp = __getopt_nonoption_flags[ch1]; \ + __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ + __getopt_nonoption_flags[ch2] = __tmp; \ + } +# else +# define SWAP_FLAGS(ch1, ch2) +# endif +#else /* !_LIBC */ +# define SWAP_FLAGS(ch1, ch2) +#endif /* _LIBC */ + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +#if defined __STDC__ && __STDC__ +static void exchange (char **); +#endif + +static void +exchange (argv) + char **argv; +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + /* First make sure the handling of the `__getopt_nonoption_flags' + string can work normally. Our top argument must be in the range + of the string. */ + if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len) + { + /* We must extend the array. The user plays games with us and + presents new arguments. */ + char *new_str = malloc (top + 1); + if (new_str == NULL) + nonoption_flags_len = nonoption_flags_max_len = 0; + else + { + memset (__mempcpy (new_str, __getopt_nonoption_flags, + nonoption_flags_max_len), + '\0', top + 1 - nonoption_flags_max_len); + nonoption_flags_max_len = top + 1; + __getopt_nonoption_flags = new_str; + } + } +#endif + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + SWAP_FLAGS (bottom + i, middle + i); + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Initialize the internal data when the first call is made. */ + +#if defined __STDC__ && __STDC__ +static const char *_getopt_initialize (int, char *const *, const char *); +#endif +static const char * +_getopt_initialize (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = optind; + + nextchar = NULL; + + posixly_correct = getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + if (posixly_correct == NULL + && argc == __libc_argc && argv == __libc_argv) + { + if (nonoption_flags_max_len == 0) + { + if (__getopt_nonoption_flags == NULL + || __getopt_nonoption_flags[0] == '\0') + nonoption_flags_max_len = -1; + else + { + const char *orig_str = __getopt_nonoption_flags; + int len = nonoption_flags_max_len = strlen (orig_str); + if (nonoption_flags_max_len < argc) + nonoption_flags_max_len = argc; + __getopt_nonoption_flags = + (char *) malloc (nonoption_flags_max_len); + if (__getopt_nonoption_flags == NULL) + nonoption_flags_max_len = -1; + else + memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), + '\0', nonoption_flags_max_len - len); + } + } + nonoption_flags_len = nonoption_flags_max_len; + } + else + nonoption_flags_len = 0; +#endif + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns -1. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (argc, argv, optstring, longopts, longind, long_only) + int argc; + char *const *argv; + const char *optstring; + const struct option *longopts; + int *longind; + int long_only; +{ + int print_errors = opterr; + if (optstring[0] == ':') + print_errors = 0; + + if (argc < 1) + return -1; + + optarg = NULL; + + if (optind == 0 || !__getopt_initialized) + { + if (optind == 0) + optind = 1; /* Don't scan ARGV[0], the program name. */ + optstring = _getopt_initialize (argc, argv, optstring); + __getopt_initialized = 1; + } + + /* Test whether ARGV[optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ +#if defined _LIBC && defined USE_NONOPTION_FLAGS +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \ + || (optind < nonoption_flags_len \ + && __getopt_nonoption_flags[optind] == '1')) +#else +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0') +#endif + + if (nextchar == NULL || *nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT and LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (last_nonopt > optind) + last_nonopt = optind; + if (first_nonopt > optind) + first_nonopt = optind; + + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc && NONOPTION_P) + optind++; + last_nonopt = optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) + { + if (ordering == REQUIRE_ORDER) + return -1; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[optind][1] == '-' + || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = -1; + int option_index; + + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) + == (unsigned int) strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else if (long_only + || pfound->has_arg != p->has_arg + || pfound->flag != p->flag + || pfound->val != p->val) + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + optopt = 0; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind - 1][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#else + fprintf (stderr, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], + pfound->name); +#else + fprintf (stderr, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], pfound->name); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + + nextchar += strlen (nextchar); + + optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' + || my_index (optstring, *nextchar) == NULL) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + nextchar = (char *) ""; + optind++; + optopt = 0; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (posixly_correct) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: illegal option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c); +#endif + } + else + { +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: invalid option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + optopt = c; + return '?'; + } + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + + /* optarg is now the argument, see if it's in the + table of longopts. */ + + for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + return '?'; + } + if (pfound != NULL) + { + option_index = indfound; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = NULL; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +#endif /* Not ELIDE_CODE. */ + + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ \ No newline at end of file diff --git a/workloads/realworld/standard/kmeans/getopt.h b/workloads/realworld/standard/kmeans/getopt.h new file mode 100755 index 0000000000000000000000000000000000000000..bae04bf7d418206d73892a94ff94923a36549362 --- /dev/null +++ b/workloads/realworld/standard/kmeans/getopt.h @@ -0,0 +1,191 @@ + + +/* getopt.h */ +/* Declarations for getopt. + Copyright (C) 1989-1994, 1996-1999, 2001 Free Software + Foundation, Inc. This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute + it and/or modify it under the terms of the GNU Lesser + General Public License as published by the Free Software + Foundation; either version 2.1 of the License, or + (at your option) any later version. + + The GNU C Library is distributed in the hope that it will + be useful, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A + PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General + Public License along with the GNU C Library; if not, write + to the Free Software Foundation, Inc., 59 Temple Place, + Suite 330, Boston, MA 02111-1307 USA. */ + + + + + +#ifndef _GETOPT_H + +#ifndef __need_getopt +# define _GETOPT_H 1 +#endif + +/* If __GNU_LIBRARY__ is not already defined, either we are being used + standalone, or this is the first header included in the source file. + If we are being used with glibc, we need to include , but + that does not exist if we are standalone. So: if __GNU_LIBRARY__ is + not defined, include , which will pull in for us + if it's from glibc. (Why ctype.h? It's guaranteed to exist and it + doesn't flood the namespace with stuff the way some other headers do.) */ +#if !defined __GNU_LIBRARY__ +# include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +#ifndef __need_getopt +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +# if (defined __STDC__ && __STDC__) || defined __cplusplus + const char *name; +# else + char *name; +# endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +# define no_argument 0 +# define required_argument 1 +# define optional_argument 2 +#endif /* need getopt */ + + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. + + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, `optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in `optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU `getopt'. + + The argument `--' causes premature termination of argument + scanning, explicitly telling `getopt' that there are no more + options. + + If OPTS begins with `--', then non-option arguments are treated as + arguments to the option '\0'. This behavior is specific to the GNU + `getopt'. */ + +#if (defined __STDC__ && __STDC__) || defined __cplusplus +# ifdef __GNU_LIBRARY__ +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int ___argc, char *const *___argv, const char *__shortopts); +# else /* not __GNU_LIBRARY__ */ +extern int getopt (); +# endif /* __GNU_LIBRARY__ */ + +# ifndef __need_getopt +extern int getopt_long (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); +extern int getopt_long_only (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only); +# endif +#else /* not __STDC__ */ +extern int getopt (); +# ifndef __need_getopt +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +# endif +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +/* Make sure we later can get all the definitions and declarations. */ +#undef __need_getopt + +#endif /* getopt.h */ + diff --git a/workloads/realworld/standard/kmeans/kmeans.c b/workloads/realworld/standard/kmeans/kmeans.c new file mode 100755 index 0000000000000000000000000000000000000000..e0725dfaab140d5cbbf60aa80cae1e50fe613b7d --- /dev/null +++ b/workloads/realworld/standard/kmeans/kmeans.c @@ -0,0 +1,308 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: example.c **/ +/** Description: Takes as input a file: **/ +/** ascii file: containing 1 data point per line **/ +/** binary file: first int is the number of objects **/ +/** 2nd int is the no. of features of each **/ +/** object **/ +/** This example performs a fuzzy c-means clustering **/ +/** on the data. Fuzzy clustering is performed using **/ +/** min to max clusters and the clustering that gets **/ +/** the best score according to a compactness and **/ +/** separation criterion are returned. **/ +/** Author: Wei-keng Liao **/ +/** ECE Department Northwestern University **/ +/** email: wkliao@ece.northwestern.edu **/ +/** **/ +/** Edited by: Jay Pisharath **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ +#define _CRT_SECURE_NO_DEPRECATE 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#define _POSIX_C_SOURCE 200809L +#include +#include +#include "kmeans.h" + +extern double wtime(void); + + + +/*---< usage() >------------------------------------------------------------*/ +void usage(char *argv0) { + char *help = + "\nUsage: %s [switches] -i filename\n\n" + " -i filename :file containing data to be clustered\n" + " -m max_nclusters :maximum number of clusters allowed [default=5]\n" + " -n min_nclusters :minimum number of clusters allowed [default=5]\n" + " -t threshold :threshold value [default=0.001]\n" + " -l nloops :iteration for each number of clusters [default=1]\n" + " -b :input file is in binary format\n" + " -r :calculate RMSE [default=off]\n" + " -o :output cluster center coordinates [default=off]\n"; + fprintf(stderr, help, argv0); + exit(-1); +} + +/*---< main() >-------------------------------------------------------------*/ +int setup(int argc, char **argv) { + int opt; + extern char *optarg; + char *filename = 0; + float *buf; + char line[1024]; + int isBinaryFile = 0; + + float threshold = 0.001; /* default value */ + int max_nclusters=5; /* default value */ + int min_nclusters=5; /* default value */ + int best_nclusters = 0; + int nfeatures = 0; + int npoints = 0; + float len; + + float **features; + float **cluster_centres=NULL; + int i, j, index; + int nloops = 1; /* default value */ + + int isRMSE = 0; + float rmse; + + int isOutput = 0; + //float cluster_timing, io_timing; + + /* obtain command line arguments and change appropriate options */ + while ( (opt=getopt(argc,argv,"i:t:m:n:l:bro"))!= EOF) { + switch (opt) { + case 'i': filename=optarg; + break; + case 'b': isBinaryFile = 1; + break; + case 't': threshold=atof(optarg); + break; + case 'm': max_nclusters = atoi(optarg); + break; + case 'n': min_nclusters = atoi(optarg); + break; + case 'r': isRMSE = 1; + break; + case 'o': isOutput = 1; + break; + case 'l': nloops = atoi(optarg); + break; + case '?': usage(argv[0]); + break; + default: usage(argv[0]); + break; + } + } + + if (filename == 0) usage(argv[0]); + + /* ============== I/O begin ==============*/ + /* get nfeatures and npoints */ + //io_timing = omp_get_wtime(); + if (isBinaryFile) { //Binary file input + int infile; + if ((infile = open(filename, O_RDONLY, "0600")) == -1) { + fprintf(stderr, "Error: no such file (%s)\n", filename); + exit(1); + } + read(infile, &npoints, sizeof(int)); + read(infile, &nfeatures, sizeof(int)); + + /* allocate space for features[][] and read attributes of all objects */ + buf = (float*) malloc(npoints*nfeatures*sizeof(float)); + features = (float**)malloc(npoints* sizeof(float*)); + features[0] = (float*) malloc(npoints*nfeatures*sizeof(float)); + for (i=1; i npoints(%d) -- cannot proceed\n", min_nclusters, npoints); + exit(0); + } + + srand(7); /* seed for future random number generator */ + memcpy(features[0], buf, npoints*nfeatures*sizeof(float)); /* now features holds 2-dimensional array of features */ + free(buf); + + /* ======================= core of the clustering ===================*/ + + //cluster_timing = omp_get_wtime(); /* Total clustering time */ + cluster_centres = NULL; + index = cluster(npoints, /* number of data points */ + nfeatures, /* number of features for each point */ + features, /* array: [npoints][nfeatures] */ + min_nclusters, /* range of min to max number of clusters */ + max_nclusters, + threshold, /* loop termination factor */ + &best_nclusters, /* return: number between min and max */ + &cluster_centres, /* return: [best_nclusters][nfeatures] */ + &rmse, /* Root Mean Squared Error */ + isRMSE, /* calculate RMSE */ + nloops); /* number of iteration for each number of clusters */ + //cluster_timing = omp_get_wtime() - cluster_timing; + + + /* =============== Command Line Output =============== */ + + /* cluster center coordinates + :displayed only for when k=1*/ + if((min_nclusters == max_nclusters) && (isOutput == 1)) { + printf("\n================= Centroid Coordinates =================\n"); + for(i = 0; i < max_nclusters; i++){ + printf("%d:", i); + for(j = 0; j < nfeatures; j++){ + printf(" %.2f", cluster_centres[i][j]); + } + printf("\n\n"); + } + } + + len = (float) ((max_nclusters - min_nclusters + 1)*nloops); + + printf("Number of Iteration: %d\n", nloops); + //printf("Time for I/O: %.5fsec\n", io_timing); + //printf("Time for Entire Clustering: %.5fsec\n", cluster_timing); + + if(min_nclusters != max_nclusters){ + if(nloops != 1){ //range of k, multiple iteration + //printf("Average Clustering Time: %fsec\n", + // cluster_timing / len); + printf("Best number of clusters is %d\n", best_nclusters); + } + else{ //range of k, single iteration + //printf("Average Clustering Time: %fsec\n", + // cluster_timing / len); + printf("Best number of clusters is %d\n", best_nclusters); + } + } + else{ + if(nloops != 1){ // single k, multiple iteration + //printf("Average Clustering Time: %.5fsec\n", + // cluster_timing / nloops); + if(isRMSE) // if calculated RMSE + printf("Number of trials to approach the best RMSE of %.3f is %d\n", rmse, index + 1); + } + else{ // single k, single iteration + if(isRMSE) // if calculated RMSE + printf("Root Mean Squared Error: %.3f\n", rmse); + } + } + + + /* free up memory */ + free(features[0]); + free(features); + return(0); +} + diff --git a/workloads/realworld/standard/kmeans/kmeans.h b/workloads/realworld/standard/kmeans/kmeans.h new file mode 100755 index 0000000000000000000000000000000000000000..28b6c34732313f04c02b59b095361bc8142d4b05 --- /dev/null +++ b/workloads/realworld/standard/kmeans/kmeans.h @@ -0,0 +1,60 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +#ifndef _H_FUZZY_KMEANS +#define _H_FUZZY_KMEANS + +#ifndef FLT_MAX +#define FLT_MAX 3.40282347e+38 +#endif + +#include + +/* rmse.c */ +float euclid_dist_2 (float*, float*, int); +int find_nearest_point (float* , int, float**, int); +float rms_err(float**, int, int, float**, int); + +/* cluster.c */ +int cluster(int, int, float**, int, int, float, int*, float***, float*, int, int); + +/* kmeans_clustering.c */ +float **kmeans_clustering(float**, int, int, int, float, int*); + +#endif diff --git a/workloads/realworld/standard/kmeans/kmeans_clustering.c b/workloads/realworld/standard/kmeans/kmeans_clustering.c new file mode 100755 index 0000000000000000000000000000000000000000..54ddcd6d8ff6f0d075ff16e6b6aef84fda4716d2 --- /dev/null +++ b/workloads/realworld/standard/kmeans/kmeans_clustering.c @@ -0,0 +1,178 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: kmeans_clustering.c **/ +/** Description: Implementation of regular k-means clustering **/ +/** algorithm **/ +/** Author: Wei-keng Liao **/ +/** ECE Department, Northwestern University **/ +/** email: wkliao@ece.northwestern.edu **/ +/** **/ +/** Edited by: Jay Pisharath **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include + +#include "kmeans.h" + +#define RANDOM_MAX 2147483647 + +extern double wtime(void); + +/*----< kmeans_clustering() >---------------------------------------------*/ +float** kmeans_clustering(float **feature, /* in: [npoints][nfeatures] */ + int nfeatures, + int npoints, + int nclusters, + float threshold, + int *membership) /* out: [npoints] */ +{ + int i, j, n = 0; /* counters */ + int loop=0, temp; + int *new_centers_len; /* [nclusters]: no. of points in each cluster */ + float delta; /* if the point moved */ + float **clusters; /* out: [nclusters][nfeatures] */ + float **new_centers; /* [nclusters][nfeatures] */ + + int *initial; /* used to hold the index of points not yet selected + prevents the "birthday problem" of dual selection (?) + considered holding initial cluster indices, but changed due to + possible, though unlikely, infinite loops */ + int initial_points; + int c = 0; + + /* nclusters should never be > npoints + that would guarantee a cluster without points */ + if (nclusters > npoints) + nclusters = npoints; + + /* allocate space for and initialize returning variable clusters[] */ + clusters = (float**) malloc(nclusters * sizeof(float*)); + clusters[0] = (float*) malloc(nclusters * nfeatures * sizeof(float)); + for (i=1; i= 0; i++) { + //n = (int)rand() % initial_points; + + for (j=0; j 0) + clusters[i][j] = new_centers[i][j] / new_centers_len[i]; /* take average i.e. sum/n */ + new_centers[i][j] = 0.0; /* set back to 0 */ + } + new_centers_len[i] = 0; /* set back to 0 */ + } + c++; + } while ((delta > threshold) && (loop++ < 500)); /* makes sure loop terminates */ + printf("iterated %d times\n", c); + free(new_centers[0]); + free(new_centers); + free(new_centers_len); + + return clusters; +} + diff --git a/workloads/realworld/standard/kmeans/kmeans_cuda.cu b/workloads/realworld/standard/kmeans/kmeans_cuda.cu new file mode 100755 index 0000000000000000000000000000000000000000..81e3b6dda591e8198764aa505d6527b1a31a6feb --- /dev/null +++ b/workloads/realworld/standard/kmeans/kmeans_cuda.cu @@ -0,0 +1,220 @@ +#include +#include +#include +#include +#include +#include + +#include + +#define THREADS_PER_DIM 16 +#define BLOCKS_PER_DIM 64 +#define THREADS_PER_BLOCK THREADS_PER_DIM*THREADS_PER_DIM +#include +#include "kmeans_cuda_kernel.cu" +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + + +//#define BLOCK_DELTA_REDUCE +//#define BLOCK_CENTER_REDUCE + +#define CPU_DELTA_REDUCE +#define CPU_CENTER_REDUCE + +extern "C" +int setup(int argc, char** argv); /* function prototype */ + +// GLOBAL!!!!! +unsigned int num_threads_perdim = THREADS_PER_DIM; /* sqrt(256) -- see references for this choice */ +unsigned int num_blocks_perdim = BLOCKS_PER_DIM; /* temporary */ +unsigned int num_threads = num_threads_perdim*num_threads_perdim; /* number of threads */ +unsigned int num_blocks = num_blocks_perdim*num_blocks_perdim; /* number of blocks */ + +/* _d denotes it resides on the device */ +int *membership_new; /* newly assignment membership */ +float *feature_d; /* inverted data array */ +float *feature_flipped_d; /* original (not inverted) data array */ +int *membership_d; /* membership on the device */ +float *block_new_centers; /* sum of points in a cluster (per block) */ +float *clusters_d; /* cluster centers on the device */ +float *block_clusters_d; /* per block calculation of cluster centers */ +int *block_deltas_d; /* per block calculation of deltas */ + + +/* -------------- allocateMemory() ------------------- */ +/* allocate device memory, calculate number of blocks and threads, and invert the data array */ +extern "C" +void allocateMemory(int npoints, int nfeatures, int nclusters, float **features) +{ + // printf("npoints is %d, num_threads is %d\n", npoints, num_threads); + // num_blocks = npoints / num_threads; + // if (npoints % num_threads > 0) /* defeat truncation */ + // num_blocks++; + + // num_blocks_perdim = sqrt((double) num_blocks); + // while (num_blocks_perdim * num_blocks_perdim < num_blocks) // defeat truncation (should run once) + // num_blocks_perdim++; + + num_blocks = num_blocks_perdim*num_blocks_perdim; + + /* allocate memory for memory_new[] and initialize to -1 (host) */ + membership_new = (int*) malloc(npoints * sizeof(int)); + for(int i=0;i>>(feature_flipped_d, feature_d, npoints, (num_blocks_perdim * num_blocks_perdim * num_threads_perdim * num_threads_perdim), nfeatures); + + /* allocate memory for membership_d[] and clusters_d[][] (device) */ + cudaMalloc((void**) &membership_d, npoints*sizeof(int)); + cudaMalloc((void**) &clusters_d, nclusters*nfeatures*sizeof(float)); +} +/* -------------- allocateMemory() end ------------------- */ + +/* -------------- deallocateMemory() ------------------- */ +/* free host and device memory */ +extern "C" +void deallocateMemory() +{ + free(membership_new); + free(block_new_centers); + cudaFree(feature_d); + cudaFree(feature_flipped_d); + cudaFree(membership_d); + + cudaFree(clusters_d); + + endCPU(); +} +/* -------------- deallocateMemory() end ------------------- */ + +//////////////////////////////////////////////////////////////////////////////// +// Program main // + +int +main( int argc, char** argv) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + // make sure we're running on the big card + GPU_argv_init(); + // as done in the CUDA start/help document provided + initTrace(); + setup(argc, argv); + finiTrace(); +} + +// // +//////////////////////////////////////////////////////////////////////////////// + +/* ------------------- kmeansCuda() ------------------------ */ +extern "C" +int // delta -- had problems when return value was of float type +kmeansCuda(float **feature, /* in: [npoints][nfeatures] */ + int nfeatures, /* number of attributes for each point */ + int npoints, /* number of data points */ + int nclusters, /* number of clusters */ + int *membership, /* which cluster the point belongs to */ + float **clusters, /* coordinates of cluster centers */ + int *new_centers_len, /* number of elements in each cluster */ + float **new_centers /* sum of elements in each cluster */ + ) +{ + int delta = 0; /* if point has moved */ + int i,j; /* counters */ + + // cudaSetDevice(1); + + + /* copy membership (host to device) */ + cudaMemcpy(membership_d, membership_new, npoints*sizeof(int), cudaMemcpyHostToDevice); + + // /* copy clusters (host to device) */ + // cudaMemcpy(clusters_d, clusters[0], nclusters*nfeatures*sizeof(float), cudaMemcpyHostToDevice); + + // /* set up texture */ + // cudaChannelFormatDesc chDesc0 = cudaCreateChannelDesc(); + // t_features.filterMode = cudaFilterModePoint; + // t_features.normalized = false; + // t_features.channelDesc = chDesc0; + + // if(cudaBindTexture(NULL, &t_features, feature_d, &chDesc0, npoints*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind features array to texture!\n"); + + // cudaChannelFormatDesc chDesc1 = cudaCreateChannelDesc(); + // t_features_flipped.filterMode = cudaFilterModePoint; + // t_features_flipped.normalized = false; + // t_features_flipped.channelDesc = chDesc1; + + // if(cudaBindTexture(NULL, &t_features_flipped, feature_flipped_d, &chDesc1, npoints*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind features_flipped array to texture!\n"); + + // cudaChannelFormatDesc chDesc2 = cudaCreateChannelDesc(); + // t_clusters.filterMode = cudaFilterModePoint; + // t_clusters.normalized = false; + // t_clusters.channelDesc = chDesc2; + + // if(cudaBindTexture(NULL, &t_clusters, clusters_d, &chDesc2, nclusters*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind clusters array to texture!\n"); + + // /* copy clusters to constant memory */ + // cudaMemcpyToSymbol("c_clusters",clusters[0],nclusters*nfeatures*sizeof(float),0,cudaMemcpyHostToDevice); + + // cudaMemcpy(feature_d, feature, npoints * nfeatures * sizeof(float), cudaMemcpyHostToDevice); + + /* setup execution parameters. + changed to 2d (source code on NVIDIA CUDA Programming Guide) */ + dim3 grid( num_blocks_perdim, num_blocks_perdim ); + dim3 threads( num_threads_perdim*num_threads_perdim ); + /* execute the kernel */ + kmeansPoint<<>>(feature_d, + nfeatures, + npoints, + (num_blocks_perdim * num_blocks_perdim * num_threads_perdim * num_threads_perdim), + nclusters, + membership_d); + cudaDeviceSynchronize(); + + /* copy back membership (device to host) */ + cudaMemcpy(membership_new, membership_d, npoints * sizeof(int), cudaMemcpyDeviceToHost); + + /* for each point, sum data points in each cluster + and see if membership has changed: + if so, increase delta and change old membership, and update new_centers; + otherwise, update new_centers */ + delta = 0; + for (i = 0; i < npoints; i++) + { + int cluster_id = membership_new[i]; + new_centers_len[cluster_id]++; + if (membership_new[i] != membership[i]) + { +#ifdef CPU_DELTA_REDUCE + delta++; +#endif + membership[i] = membership_new[i]; + } +#ifdef CPU_CENTER_REDUCE + for (j = 0; j < nfeatures; j++) + { + new_centers[cluster_id][j] += feature[i][j]; + } +#endif + } + return delta; + +} +/* ------------------- kmeansCuda() end ------------------------ */ + diff --git a/workloads/realworld/standard/kmeans/kmeans_cuda_kernel.cu b/workloads/realworld/standard/kmeans/kmeans_cuda_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..d5d94f15588c4097081c1fa9f2dab392ffe57dc9 --- /dev/null +++ b/workloads/realworld/standard/kmeans/kmeans_cuda_kernel.cu @@ -0,0 +1,136 @@ +#ifndef _KMEANS_CUDA_KERNEL_H_ +#define _KMEANS_CUDA_KERNEL_H_ + +#include +#include + +#include "kmeans.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +// FIXME: Make this a runtime selectable variable! +#define ASSUMED_NR_CLUSTERS 32 + +#define SDATA(index) CUT_BANK_CHECKER(sdata, index) + +// t_features has the layout dim0[points 0-m-1]dim1[ points 0-m-1]... +texture t_features; +// t_features_flipped has the layout point0[dim 0-n-1]point1[dim 0-n-1] +texture t_features_flipped; +texture t_clusters; + +__constant__ float c_clusters[ASSUMED_NR_CLUSTERS * 34]; /* constant memory for cluster centers */ + +/* ----------------- invert_mapping() --------------------- */ +/* inverts data array from row-major to column-major. + + [p0,dim0][p0,dim1][p0,dim2] ... + [p1,dim0][p1,dim1][p1,dim2] ... + [p2,dim0][p2,dim1][p2,dim2] ... + to + [dim0,p0][dim0,p1][dim0,p2] ... + [dim1,p0][dim1,p1][dim1,p2] ... + [dim2,p0][dim2,p1][dim2,p2] ... +*/ +__global__ void invert_mapping(float *input, /* original */ + float *output, /* inverted */ + int npoints, /* npoints */ + int batch_size, + int nfeatures) /* nfeatures */ +{ + int point_id = threadIdx.x + blockDim.x * blockIdx.x; /* id of thread */ + + int batches = npoints / batch_size; + + for (int b = 0; b < batches; b++) + { + for (int i = 0; i < nfeatures; i++) + { + output[b * batch_size + point_id + npoints * i] = input[(b * batch_size + point_id) * nfeatures + i]; + } + } + + + + return; +} +/* ----------------- invert_mapping() end --------------------- */ + +/* to turn on the GPU delta and center reduction */ +// #define GPU_DELTA_REDUCTION +// #define GPU_NEW_CENTER_REDUCTION + +/* ----------------- kmeansPoint() --------------------- */ +/* find the index of nearest cluster centers and change membership*/ +__global__ void +kmeansPoint(float *features, /* in: [npoints*nfeatures] */ + int nfeatures, + int npoints, + int batch_size, + int nclusters, + int *membership) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // block ID + const unsigned int block_id = gridDim.x * blockIdx.y + blockIdx.x; + // point/thread ID + const unsigned int point_id = block_id * blockDim.x * blockDim.y + threadIdx.x; + + __shared__ float tmp_features[THREADS_PER_DIM][THREADS_PER_DIM][16]; + + int batches = npoints / batch_size; + int tile = 0; + int end_tile = tile + batches; + + for (; tile < end_tile; tile += 1) + { + for (int i = 0; i < 16; i++) + { + int addr = tile * batch_size + point_id + i * npoints; + tmp_features[threadIdx.y][threadIdx.x][i] = features[addr]; + } + block.sync(); + + int index = -1; + + float min_dist = FLT_MAX; + float dist; /* distance square between a point to cluster center */ + + /* find the cluster center id with min distance to pt */ + for (int i = 0; i < nclusters; i++) + { + int cluster_base_index = i * nfeatures; /* base index of cluster centers for inverted array */ + float ans = 0.0; /* Euclidean distance sqaure */ + + for (int j = 0; j < nfeatures; j++) + { + // int addr = point_id + j * npoints; /* appropriate index of data point */ + // float diff = (tex1Dfetch(t_features,addr) - c_clusters[cluster_base_index + j]); /* distance between a data point to cluster centers */ + + // int addr = point_id + j * npoints; /* appropriate index of data point */ + // float diff = features[addr] - c_clusters[cluster_base_index + j]; /* distance between a data point to cluster centers */ + float diff = tmp_features[threadIdx.y][threadIdx.x][j] - c_clusters[cluster_base_index + j]; /* distance between a data point to cluster centers */ + ans += diff * diff; /* sum of squares */ + } + dist = ans; + block.sync(); + + /* see if distance is smaller than previous ones: + if so, change minimum distance and save index of cluster center */ + if (dist < min_dist) + { + min_dist = dist; + index = i; + } + } + membership[tile * batch_size + point_id] = index; + block.sync(); + } + +} +#endif // #ifndef _KMEANS_CUDA_KERNEL_H_ diff --git a/workloads/realworld/standard/kmeans/kmeans_cuda_kernel.cu.old b/workloads/realworld/standard/kmeans/kmeans_cuda_kernel.cu.old new file mode 100755 index 0000000000000000000000000000000000000000..dd0dec27eebf197d811a58063b13a86c8f72e1ed --- /dev/null +++ b/workloads/realworld/standard/kmeans/kmeans_cuda_kernel.cu.old @@ -0,0 +1,185 @@ +#ifndef _KMEANS_CUDA_KERNEL_H_ +#define _KMEANS_CUDA_KERNEL_H_ + +#include +#include + +#include "kmeans.h" + +// FIXME: Make this a runtime selectable variable! +#define ASSUMED_NR_CLUSTERS 32 + +#define SDATA( index) CUT_BANK_CHECKER(sdata, index) + +// t_features has the layout dim0[points 0-m-1]dim1[ points 0-m-1]... +texture t_features; +// t_features_flipped has the layout point0[dim 0-n-1]point1[dim 0-n-1] +texture t_features_flipped; +texture t_clusters; + + +__constant__ float c_clusters[ASSUMED_NR_CLUSTERS*34]; /* constant memory for cluster centers */ + +/* ----------------- invert_mapping() --------------------- */ +/* inverts data array from row-major to column-major. + + [p0,dim0][p0,dim1][p0,dim2] ... + [p1,dim0][p1,dim1][p1,dim2] ... + [p2,dim0][p2,dim1][p2,dim2] ... + to + [dim0,p0][dim0,p1][dim0,p2] ... + [dim1,p0][dim1,p1][dim1,p2] ... + [dim2,p0][dim2,p1][dim2,p2] ... +*/ +__global__ void invert_mapping(float *input, /* original */ + float *output, /* inverted */ + int npoints, /* npoints */ + int nfeatures) /* nfeatures */ +{ + int point_id = threadIdx.x + blockDim.x*blockIdx.x; /* id of thread */ + int i; + + if(point_id < npoints){ + for(i=0;i 1; threadids_participating /= 2) { + if(threadIdx.x < threadids_participating) { + deltas[threadIdx.x] += deltas[threadIdx.x + threadids_participating]; + } + __syncthreads(); + } + if(threadIdx.x < 1) {deltas[threadIdx.x] += deltas[threadIdx.x + 1];} + __syncthreads(); + // propagate number of changes to global counter + if(threadIdx.x == 0) { + block_deltas[blockIdx.y * gridDim.x + blockIdx.x] = deltas[0]; + //printf("original id: %d, modified: %d\n", blockIdx.y*gridDim.x+blockIdx.x, blockIdx.x); + + } + +#endif + + +#ifdef GPU_NEW_CENTER_REDUCTION + int center_id = threadIdx.x / nfeatures; + int dim_id = threadIdx.x - nfeatures*center_id; + + __shared__ int new_center_ids[THREADS_PER_BLOCK]; + + new_center_ids[threadIdx.x] = index; + __syncthreads(); + + /*** + determine which dimension calculte the sum for + mapping of threads is + center0[dim0,dim1,dim2,...]center1[dim0,dim1,dim2,...]... + ***/ + + int new_base_index = (point_id - threadIdx.x)*nfeatures + dim_id; + float accumulator = 0.f; + + if(threadIdx.x < nfeatures * nclusters) { + // accumulate over all the elements of this threadblock + for(int i = 0; i< (THREADS_PER_BLOCK); i++) { + float val = tex1Dfetch(t_features_flipped,new_base_index+i*nfeatures); + if(new_center_ids[i] == center_id) + accumulator += val; + } + + // now store the sum for this threadblock + /*** + mapping to global array is + block0[center0[dim0,dim1,dim2,...]center1[dim0,dim1,dim2,...]...]block1[...]... + ***/ + block_clusters[(blockIdx.y*gridDim.x + blockIdx.x) * nclusters * nfeatures + threadIdx.x] = accumulator; + } +#endif + +} +#endif // #ifndef _KMEANS_CUDA_KERNEL_H_ diff --git a/workloads/realworld/standard/kmeans/rmse.c b/workloads/realworld/standard/kmeans/rmse.c new file mode 100755 index 0000000000000000000000000000000000000000..fe7786342bf77cab12958e630cb8d99834312f0d --- /dev/null +++ b/workloads/realworld/standard/kmeans/rmse.c @@ -0,0 +1,95 @@ +/*************************************************************************/ +/** File: rmse.c **/ +/** Description: calculate root mean squared error of particular **/ +/** clustering. **/ +/** Author: Sang-Ha Lee **/ +/** University of Virginia. **/ +/** **/ +/** Note: euclid_dist_2() and find_nearest_point() adopted from **/ +/** Minebench code. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include + +#include "kmeans.h" + +extern double wtime(void); + +/*----< euclid_dist_2() >----------------------------------------------------*/ +/* multi-dimensional spatial Euclid distance square */ +__inline +float euclid_dist_2(float *pt1, + float *pt2, + int numdims) +{ + int i; + float ans=0.0; + + for (i=0; i-----------------------------------------------*/ +__inline +int find_nearest_point(float *pt, /* [nfeatures] */ + int nfeatures, + float **pts, /* [npts][nfeatures] */ + int npts) +{ + int index, i; + float max_dist=FLT_MAX; + + /* find the cluster center id with min distance to pt */ + for (i=0; i-------------------------------------*/ +float rms_err (float **feature, /* [npoints][nfeatures] */ + int nfeatures, + int npoints, + float **cluster_centres, /* [nclusters][nfeatures] */ + int nclusters) +{ + int i; + int nearest_cluster_index; /* cluster center id with min distance to pt */ + float sum_euclid = 0.0; /* sum of Euclidean distance squares */ + float ret; /* return value */ + + /* calculate and sum the sqaure of euclidean distance*/ + #pragma omp parallel for \ + shared(feature,cluster_centres) \ + firstprivate(npoints,nfeatures,nclusters) \ + private(i, nearest_cluster_index) \ + schedule (static) + for (i=0; i +#endif + +#include + +#ifndef _H_TYPES +#include +#endif + +#include + +#ifndef _H_ACCESS +#include /* for the "access" function */ +#endif + +/* + * POSIX requires that certain values be included in unistd.h. It also + * requires that when _POSIX_SOURCE is defined only those standard + * specific values are present. This header includes all the POSIX + * required entries. + */ + +#ifdef _POSIX_SOURCE +#ifdef _LARGE_FILES +#define lseek lseek64 +#endif + + +/* Symbolic constants for the "lseek" function: */ +#ifndef SEEK_SET +#define SEEK_SET 0 /* Set file pointer to "offset" */ +#define SEEK_CUR 1 /* Set file pointer to current plus "offset" */ +#define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif /* SEEK_SET */ + +#ifdef _NO_PROTO + +#ifndef _KERNEL +extern int access(); +extern unsigned int alarm(); +extern int chdir(); +extern int chown(); +extern int close(); +extern char *ctermid(); +extern int dup(); +extern int dup2(); +extern int execl(); +extern int execv(); +extern int execle(); +extern int execve(); +extern int execlp(); +extern int execvp(); +extern void _exit(); +extern pid_t fork(); +extern long fpathconf(); +extern char *getcwd(); +extern gid_t getegid(); +extern uid_t geteuid(); +extern gid_t getgid(); +extern int getgroups(); +extern char *getlogin(); +extern pid_t getpgrp(); +extern pid_t getpid(); +extern pid_t getppid(); +extern uid_t getuid(); +extern int isatty(); +extern int link(); +extern off_t lseek(); +extern long pathconf(); +extern int pause(); +extern int pipe(); +#if defined(_XOPEN_SOURCE) && ( _XOPEN_SOURCE >= 500 ) +extern int pthread_atfork(); +#endif +extern int read(); +extern int rmdir(); +extern int setgid(); +extern int setpgid(); +extern int setsid(); +extern int setuid(); +extern unsigned int sleep(); +extern long sysconf(); +extern pid_t tcgetpgrp(); +extern int tcsetpgrp(); +extern char *ttyname(); +extern int unlink(); +extern int write(); +#endif /* !_KERNEL */ + +#else /* POSIX required prototypes */ + +#ifndef _KERNEL +extern int access(const char *, int); +extern unsigned int alarm(unsigned int); +extern int chdir(const char *); +extern int chown(const char *, uid_t, gid_t); +extern int close(int); +extern char *ctermid(char *); +extern int dup(int); +extern int dup2(int, int); +extern int execl(const char *, const char *, ...); +extern int execv(const char *, char *const []); +extern int execle(const char *, const char *, ...); +extern int execve(const char *, char *const [], char *const []); +extern int execlp(const char *, const char *, ...); +extern int execvp(const char *, char *const []); +extern void _exit(int); +extern pid_t fork(void); +extern long fpathconf(int, int); +extern char *getcwd(char *, size_t); +extern gid_t getegid(void); +extern uid_t geteuid(void); +extern gid_t getgid(void); +extern int getgroups(int, gid_t []); +extern char *getlogin(void); +#ifndef _BSD +extern pid_t getpgrp(void); +#endif /* _BSD */ +extern pid_t getpid(void); +extern pid_t getppid(void); +extern uid_t getuid(void); +extern int isatty(int); +extern int link(const char *, const char *); +extern off_t lseek(int, off_t, int); +#ifdef _LARGE_FILE_API +extern off64_t lseek64(int, off64_t, int); +#endif +extern long pathconf(const char *, int); +extern int pause(void); +extern int pipe(int []); +#if defined(_XOPEN_SOURCE) && ( _XOPEN_SOURCE >= 500 ) +extern int pthread_atfork(void (*)(void), void (*)(void), void (*)(void)); +#endif +extern ssize_t read(int, void *, size_t); +extern int rmdir(const char *); +extern int setgid(gid_t); +extern int setpgid(pid_t, pid_t); +extern pid_t setsid(void); +extern int setuid(uid_t); +extern unsigned int sleep(unsigned int); +extern long sysconf(int); +extern pid_t tcgetpgrp(int); +extern int tcsetpgrp(int, pid_t); +extern char *ttyname(int); +extern int unlink(const char *); +extern ssize_t write(int, const void *, size_t); +#endif /* !_KERNEL */ +#endif /* !_NO_PROTO */ + +#define STDIN_FILENO 0 +#define STDOUT_FILENO 1 +#define STDERR_FILENO 2 + +#define _POSIX_JOB_CONTROL 1 +#define _POSIX_SAVED_IDS 1 + +#define _POSIX_VERSION 200112L +#define _POSIX2_VERSION 200112L +#define _POSIX2_C_VERSION 200112L + + +#ifdef _XOPEN_SOURCE + +#define _XOPEN_VERSION 600 +#define _XOPEN_XCU_VERSION 4 +#define _XOPEN_XPG3 1 +#define _XOPEN_XPG4 1 +#define _XOPEN_UNIX 1 + +#define _XOPEN_REALTIME (-1) +#define _XOPEN_REALTIME_THREADS (-1) + +#if (_XOPEN_SOURCE >= 600) +#define _XOPEN_STREAMS 1 +#endif + +#define _XBS5_ILP32_OFF32 1 +#define _XBS5_ILP32_OFFBIG 1 +#define _XBS5_LP64_OFF64 1 +#define _XBS5_LPBIG_OFFBIG 1 + +#define _POSIX2_C_BIND 200112L +#define _POSIX2_C_DEV 200112L +#define _POSIX2_CHAR_TERM 1 +#define _POSIX2_LOCALEDEF 200112L +#define _POSIX2_UPE 200112L +#define _POSIX2_FORT_DEV (-1) +#define _POSIX2_FORT_RUN (-1) +#define _POSIX2_SW_DEV (-1) + +#if (_POSIX_C_SOURCE >= 200112L) +#define _POSIX_REGEXP 1 +#define _POSIX_SHELL 1 +#define _POSIX2_PBS (-1) +#define _POSIX2_PBS_ACCOUNTING (-1) +#define _POSIX2_PBS_CHECKPOINT (-1) +#define _POSIX2_PBS_LOCATE (-1) +#define _POSIX2_PBS_MESSAGE (-1) +#define _POSIX2_PBS_TRACK (-1) +#define _V6_ILP32_OFF32 1 +#define _V6_ILP32_OFFBIG 1 +#define _V6_LP64_OFF64 1 +#define _V6_LPBIG_OFFBIG 1 + +#define _POSIX_ADVISORY_INFO 200112L +#define _POSIX_BARRIERS 200112L +#define _POSIX_CLOCK_SELECTION 200112L +#define _POSIX_CPUTIME 200112L +#define _POSIX_MONOTONIC_CLOCK 200112L + +#ifdef _POSIX_RAW_SOCKETS +#undef _POSIX_RAW_SOCKETS +#endif + +#define _POSIX_SPAWN 200112L +#define _POSIX_SPIN_LOCKS 200112L +#define _POSIX_SPORADIC_SERVER (-1) +#define _POSIX_THREAD_CPUTIME 200112L +#define _POSIX_THREAD_SPORADIC_SERVER (-1) +#define _POSIX_TIMEOUTS 200112L +#define _POSIX_TRACE (-1) +#define _POSIX_TRACE_EVENT_FILTER (-1) +#define _POSIX_TRACE_INHERIT (-1) +#define _POSIX_TRACE_LOG (-1) +#define _POSIX_TYPED_MEMORY_OBJECTS (-1) + +#endif /* _POSIX_C_SOURCE >= 200112L */ + +#define _XOPEN_CRYPT 1 +#define _XOPEN_SHM 1 +#define _XOPEN_ENH_I18N 1 +#define _XOPEN_LEGACY (-1) +#ifndef __64BIT__ +#define _UNIX_ABI (-1) +#define _UNIX_ABI_IA64 (-1) +#define _UNIX_ABI_BIG_ENDIAN (-1) +#define _UNIX_ABI_LITTLE_ENDIAN (-1) +#endif /* __64BIT__ */ + +extern char *optarg; +extern int optind, opterr, optopt; + +#ifdef _NO_PROTO + extern size_t confstr(); + extern char *crypt(); + extern void encrypt(); + extern int fsync(); + extern int getopt(); + extern int nice(); + extern void swab(); +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern char *getpass(); + extern int chroot(); +#endif +#else + extern size_t confstr(int, char*, size_t); + extern char *crypt(const char *, const char *); + extern void encrypt(char *, int); + extern int fsync(int); + extern int getopt(int, char* const*, const char*); + extern int nice(int); + extern void swab(const void *, void *, ssize_t); + extern int fdatasync(int); +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern char *getpass(const char *); + extern int chroot(const char *); +#endif +#endif + +#endif /* _XOPEN _SOURCE */ + +/* Threads options for 1003.1c and XPG UNIX98 */ +#define _POSIX_THREADS 200112L +#define _POSIX_THREAD_ATTR_STACKADDR 200112L +#define _POSIX_THREAD_ATTR_STACKSIZE 200112L +#define _POSIX_THREAD_PROCESS_SHARED 200112L +#define _POSIX_THREAD_SAFE_FUNCTIONS 200112L +#ifdef _ALL_SOURCE +#define _POSIX_REENTRANT_FUNCTIONS _POSIX_THREAD_SAFE_FUNCTIONS +#endif + +/* Realtime threads options for 1003.1c and XPG UNIX98 */ +#define _POSIX_THREAD_PRIORITY_SCHEDULING (-1) +#define _POSIX_THREAD_PRIO_INHERIT (-1) +#define _POSIX_THREAD_PRIO_PROTECT (-1) + +#undef _POSIX_THREAD_FORKALL + +/* Realtime options for 1003.1c and XPG UNIX98 */ +#define _POSIX_ASYNCHRONOUS_IO 200112L +#define _POSIX_FSYNC 200112L +#define _POSIX_MAPPED_FILES 200112L +#define _POSIX_MEMLOCK 200112L +#define _POSIX_MEMLOCK_RANGE 200112L +#define _POSIX_MEMORY_PROTECTION 200112L +#define _POSIX_MESSAGE_PASSING 200112L +#define _POSIX_PRIORITIZED_IO 200112L +#define _POSIX_PRIORITY_SCHEDULING 200112L +#define _POSIX_REALTIME_SIGNALS 200112L +#define _POSIX_SEMAPHORES 200112L +#define _POSIX_SHARED_MEMORY_OBJECTS 200112L +#define _POSIX_SYNCHRONIZED_IO 200112L +#define _POSIX_TIMERS 200112L + +#define _POSIX_ASYNC_IO (-1) +#undef _POSIX_SYNC_IO +#define _POSIX_PRIO_IO (-1) + +#define _POSIX_CHOWN_RESTRICTED 0 +#define _POSIX_VDISABLE 0xFF +#define _POSIX_NO_TRUNC 0 + + /* UNIX03 and POSIX01 */ + /* Always enabled */ +#define _POSIX_IPV6 200112L +#define _POSIX_RAW_SOCKETS 200112L + + +#ifndef NULL +#define NULL 0 +#endif + +#if (_POSIX_C_SOURCE >= 200112L) +#define _POSIX_READER_WRITER_LOCKS 200112L +#endif + +/* arguments for the confstr() function */ + +#define _CS_PATH 1 + + /* compile,link,lib,lint flags for 32bit, no_LARGE_FILES system */ +#define _CS_XBS5_ILP32_OFF32_CFLAGS 2 +#define _CS_XBS5_ILP32_OFF32_LDFLAGS 3 +#define _CS_XBS5_ILP32_OFF32_LIBS 4 +#define _CS_XBS5_ILP32_OFF32_LINTFLAGS 5 + + /* compile,link,lib,lint flags for 32bit, _LARGE_FILES system */ +#define _CS_XBS5_ILP32_OFFBIG_CFLAGS 6 +#define _CS_XBS5_ILP32_OFFBIG_LDFLAGS 7 +#define _CS_XBS5_ILP32_OFFBIG_LIBS 8 +#define _CS_XBS5_ILP32_OFFBIG_LINTFLAGS 9 + + /* compile,link,lib,lint flags for LP64 64bit system */ +#define _CS_XBS5_LP64_OFF64_CFLAGS 10 +#define _CS_XBS5_LP64_OFF64_LDFLAGS 11 +#define _CS_XBS5_LP64_OFF64_LIBS 12 +#define _CS_XBS5_LP64_OFF64_LINTFLAGS 13 + + /* compile,link,lib,lint flags for ILP64 64bit system */ + /* AIX does not currently support this */ +#define _CS_XBS5_LPBIG_OFFBIG_CFLAGS 14 +#define _CS_XBS5_LPBIG_OFFBIG_LDFLAGS 15 +#define _CS_XBS5_LPBIG_OFFBIG_LIBS 16 +#define _CS_XBS5_LPBIG_OFFBIG_LINTFLAGS 17 + +#define _CS_AIX_BOOTDEV 24 +#define _CS_AIX_MODEL_CODE 25 +#define _CS_AIX_ARCHITECTURE 26 +#define _CS_AIX_MODEL_CLASS 40 + +#if (_POSIX_C_SOURCE >= 200112L) +#define _CS_POSIX_V6_ILP32_OFF32_CFLAGS 27 +#define _CS_POSIX_V6_ILP32_OFF32_LDFLAGS 28 +#define _CS_POSIX_V6_ILP32_OFF32_LIBS 29 +#define _CS_POSIX_V6_ILP32_OFFBIG_CFLAGS 30 +#define _CS_POSIX_V6_ILP32_OFFBIG_LDFLAGS 31 +#define _CS_POSIX_V6_ILP32_OFFBIG_LIBS 32 +#define _CS_POSIX_V6_LP64_OFF64_CFLAGS 33 +#define _CS_POSIX_V6_LP64_OFF64_LDFLAGS 34 +#define _CS_POSIX_V6_LP64_OFF64_LIBS 35 +#define _CS_POSIX_V6_LPBIG_OFFBIG_CFLAGS 36 +#define _CS_POSIX_V6_LPBIG_OFFBIG_LDFLAGS 37 +#define _CS_POSIX_V6_LPBIG_OFFBIG_LIBS 38 +#define _CS_POSIX_V6_WIDTH_RESTRICTED_ENVS 39 +#endif + + /* Values for the above */ +#define _CSPATH "/usr/bin:/usr/vac/bin" + + /* ILP32_OFF32 */ +#define _CSPOSIX_V6_ILP32_OFF32_CFLAGS "-q32" +#define _CSXBS5_ILP32_OFF32_CFLAGS _CSPOSIX_V6_ILP32_OFF32_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_ILP32_OFF32_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_ILP32_OFF32_LDFLAGS "-b32" +#define _CSXBS5_ILP32_OFF32_LDFLAGS _CSPOSIX_V6_ILP32_OFF32_LDFLAGS +#endif + +#define _CSPOSIX_V6_ILP32_OFF32_LIBS "-lc -lpthread -lm" +#define _CSXBS5_ILP32_OFF32_LIBS _CSPOSIX_V6_ILP32_OFF32_LIBS + +#define _CSXBS5_ILP32_OFF32_LINTFLAGS "" + + /* ILP32_OFFOFFBIG */ +#define _CSPOSIX_V6_ILP32_OFFBIG_CFLAGS "-q32 -D_LARGE_FILES -qlonglong" +#define _CSXBS5_ILP32_OFFBIG_CFLAGS _CSPOSIX_V6_ILP32_OFFBIG_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_ILP32_OFFBIG_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_ILP32_OFFBIG_LDFLAGS "-b32" +#define _CSXBS5_ILP32_OFFBIG_LDFLAGS _CSPOSIX_V6_ILP32_OFFBIG_LDFLAGS +#endif + +#define _CSPOSIX_V6_ILP32_OFFBIG_LIBS "-lc -lpthread -lm" +#define _CSXBS5_ILP32_OFFBIG_LIBS _CSPOSIX_V6_ILP32_OFFBIG_LIBS + +#define _CSXBS5_ILP32_OFFBIG_LINTFLAGS "-D_LARGE_FILES -qlonglong" + + /* LP64_OFF64 */ +#define _CSPOSIX_V6_LP64_OFF64_CFLAGS "-q64" +#define _CSXBS5_LP64_OFF64_CFLAGS _CSPOSIX_V6_LP64_OFF64_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_LP64_OFF64_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_LP64_OFF64_LDFLAGS "-b64" +#define _CSXBS5_LP64_OFF64_LDFLAGS _CSPOSIX_V6_LP64_OFF64_LDFLAGS +#endif + +#define _CSPOSIX_V6_LP64_OFF64_LIBS "-lc -lpthread -lm" +#define _CSXBS5_LP64_OFF64_LIBS _CSPOSIX_V6_LP64_OFF64_LIBS + +#define _CSXBS5_LP64_OFF64_LINTFLAGS "-D__64BIT__" + + /* LPBIG_OFFBIG */ +#define _CSPOSIX_V6_LPBIG_OFFBIG_CFLAGS "-q64" +#define _CSXBS5_LPBIG_OFFBIG_CFLAGS _CSPOSIX_V6_LPBIG_OFFBIG_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_LPBIG_OFFBIG_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_LPBIG_OFFBIG_LDFLAGS "-b64" +#define _CSXBS5_LPBIG_OFFBIG_LDFLAGS _CSPOSIX_V6_LPBIG_OFFBIG_LDFLAGS +#endif + +#define _CSPOSIX_V6_LPBIG_OFFBIG_LIBS "-lc -lpthread -lm" +#define _CSXBS5_LPBIG_OFFBIG_LIBS _CSPOSIX_V6_LPBIG_OFFBIG_LIBS + +#define _CSXBS5_LPBIG_OFFBIG_LINTFLAGS "-D__64BIT__" + +#if (_POSIX_C_SOURCE >= 200112L) +#define _CSPOSIX_V6_WIDTH_RESTRICTED_ENVS \ + "POSIX_V6_ILP32_OFF32\n" \ + "POSIX_V6_ILP32_OFFBIG\n" \ + "POSIX_V6_LP64_OFF64\n" \ + "POSIX_V6_LPBIG_OFFBIG" +#endif + +/* arguments for the pathconf() function */ + +#define _PC_CHOWN_RESTRICTED 10 +#define _PC_LINK_MAX 11 +#define _PC_MAX_CANON 12 +#define _PC_MAX_INPUT 13 +#define _PC_NAME_MAX 14 +#define _PC_NO_TRUNC 15 +#define _PC_PATH_MAX 16 +#define _PC_PIPE_BUF 17 +#define _PC_VDISABLE 18 +#define _PC_ASYNC_IO 19 +#define _PC_SYNC_IO 20 +#define _PC_PRIO_IO 21 +#define _PC_FILESIZEBITS 22 /* # bits needed to hold offset */ +#define _PC_AIX_DISK_PARTITION 23 +#define _PC_AIX_DISK_SIZE 24 +#if (_POSIX_C_SOURCE >= 200112L) +#define _PC_SYMLINK_MAX 25 +#define _PC_ALLOC_SIZE_MIN 26 +#define _PC_REC_INCR_XFER_SIZE 27 +#define _PC_REC_MAX_XFER_SIZE 28 +#define _PC_REC_MIN_XFER_SIZE 29 +#define _PC_REC_XFER_ALIGN 30 +#define _PC_2_SYMLINKS 31 +#endif + +/* arguments for the sysconf() function, the defined numbers are used as + * array index in sysconf(). + * + * POSIX.1(1990), Table 4-2 + */ +#define _SC_ARG_MAX 0 +#define _SC_CHILD_MAX 1 +#define _SC_CLK_TCK 2 +#define _SC_NGROUPS_MAX 3 +#define _SC_OPEN_MAX 4 +#define _SC_STREAM_MAX 5 +#define _SC_TZNAME_MAX 6 +#define _SC_JOB_CONTROL 7 +#define _SC_SAVED_IDS 8 +#define _SC_VERSION 9 + +/* POSIX.1(1990), Table 2-3, required by command getconf */ + +#define _SC_POSIX_ARG_MAX 10 +#define _SC_POSIX_CHILD_MAX 11 +#define _SC_POSIX_LINK_MAX 12 +#define _SC_POSIX_MAX_CANON 13 +#define _SC_POSIX_MAX_INPUT 14 +#define _SC_POSIX_NAME_MAX 15 +#define _SC_POSIX_NGROUPS_MAX 16 +#define _SC_POSIX_OPEN_MAX 17 +#define _SC_POSIX_PATH_MAX 18 +#define _SC_POSIX_PIPE_BUF 19 +#define _SC_POSIX_SSIZE_MAX 20 +#define _SC_POSIX_STREAM_MAX 21 +#define _SC_POSIX_TZNAME_MAX 22 + +/* POSIX.2 (Draft 10), Table 41) */ + +#define _SC_BC_BASE_MAX 23 +#define _SC_BC_DIM_MAX 24 +#define _SC_BC_SCALE_MAX 25 +#define _SC_BC_STRING_MAX 26 +#define _SC_EQUIV_CLASS_MAX 27 +#define _SC_EXPR_NEST_MAX 28 +#define _SC_LINE_MAX 29 +#define _SC_RE_DUP_MAX 30 +#define _SC_2_VERSION 31 +#define _SC_2_C_DEV 32 +#define _SC_2_FORT_DEV 33 +#define _SC_2_FORT_RUN 34 +#define _SC_2_LOCALEDEF 35 +#define _SC_2_SW_DEV 36 + +/* POSIX.2 (Draft 10), Table 13) */ + +#define _SC_POSIX2_BC_BASE_MAX 37 +#define _SC_POSIX2_BC_DIM_MAX 38 +#define _SC_POSIX2_BC_SCALE_MAX 39 +#define _SC_POSIX2_BC_STRING_MAX 40 +#define _SC_POSIX2_EQUIV_CLASS_MAX 41 +#define _SC_POSIX2_EXPR_NEST_MAX 42 +#define _SC_POSIX2_LINE_MAX 43 +#define _SC_POSIX2_RE_DUP_MAX 44 +#define _SC_PASS_MAX 45 +#define _SC_XOPEN_VERSION 46 +#define _SC_ATEXIT_MAX 47 +#if _XOPEN_SOURCE_EXTENDED==1 +#define _SC_PAGE_SIZE 48 +#endif /* _XOPEN_SOURCE_EXTENDED */ +#define _SC_AES_OS_VERSION 49 +#define _SC_COLL_WEIGHTS_MAX 50 +#define _SC_2_C_BIND 51 +#define _SC_2_C_VERSION 52 +#define _SC_2_UPE 53 +#define _SC_2_CHAR_TERM 54 +#define _SC_XOPEN_SHM 55 +#define _SC_XOPEN_CRYPT 56 +#define _SC_XOPEN_ENH_I18N 57 +#if _XOPEN_SOURCE_EXTENDED==1 +#define _SC_PAGESIZE _SC_PAGE_SIZE +#define _SC_IOV_MAX 58 +#endif /* _XOPEN_SOURCE_EXTENDED */ +#define _SC_THREAD_SAFE_FUNCTIONS 59 +#define _SC_THREADS 60 +#define _SC_THREAD_ATTR_STACKADDR 61 +#define _SC_THREAD_ATTR_STACKSIZE 62 +#define _SC_THREAD_FORKALL 63 +#define _SC_THREAD_PRIORITY_SCHEDULING 64 +#define _SC_THREAD_PRIO_INHERIT 65 +#define _SC_THREAD_PRIO_PROTECT 66 +#define _SC_THREAD_PROCESS_SHARED 67 +#define _SC_THREAD_KEYS_MAX 68 +#define _SC_THREAD_DATAKEYS_MAX _SC_THREAD_KEYS_MAX +#define _SC_THREAD_STACK_MIN 69 +#define _SC_THREAD_THREADS_MAX 70 +#ifdef _ALL_SOURCE +#define _SC_NPROCESSORS_CONF 71 +#define _SC_NPROCESSORS_ONLN 72 +#endif /* _ALL_SOURCE */ +#define _SC_XOPEN_UNIX 73 + +#if (_XOPEN_SOURCE >= 500) + +/* POSIX 1003.1c and XPG UNIX98 */ +/* look to defines above for meanings */ +#define _SC_AIO_LISTIO_MAX 75 +#define _SC_AIO_MAX 76 +#define _SC_AIO_PRIO_DELTA_MAX 77 +#define _SC_ASYNCHRONOUS_IO 78 +#define _SC_DELAYTIMER_MAX 79 +#define _SC_FSYNC 80 +#define _SC_GETGR_R_SIZE_MAX 81 +#define _SC_GETPW_R_SIZE_MAX 82 +#define _SC_LOGIN_NAME_MAX 83 +#define _SC_MAPPED_FILES 84 +#define _SC_MEMLOCK 85 +#define _SC_MEMLOCK_RANGE 86 +#define _SC_MEMORY_PROTECTION 87 +#define _SC_MESSAGE_PASSING 88 +#define _SC_MQ_OPEN_MAX 89 +#define _SC_MQ_PRIO_MAX 90 +#define _SC_PRIORITIZED_IO 91 +#define _SC_PRIORITY_SCHEDULING 92 +#define _SC_REALTIME_SIGNALS 93 +#define _SC_RTSIG_MAX 94 +#define _SC_SEMAPHORES 95 +#define _SC_SEM_NSEMS_MAX 96 +#define _SC_SEM_VALUE_MAX 97 +#define _SC_SHARED_MEMORY_OBJECTS 98 +#define _SC_SIGQUEUE_MAX 99 +#define _SC_SYNCHRONIZED_IO 100 +#define _SC_THREAD_DESTRUCTOR_ITERATIONS 101 +#define _SC_TIMERS 102 +#define _SC_TIMER_MAX 103 +#define _SC_TTY_NAME_MAX 104 +#define _SC_XBS5_ILP32_OFF32 105 +#define _SC_XBS5_ILP32_OFFBIG 106 +#define _SC_XBS5_LP64_OFF64 107 +#define _SC_XBS5_LPBIG_OFFBIG 108 +#define _SC_XOPEN_XCU_VERSION 109 +#define _SC_XOPEN_REALTIME 110 +#define _SC_XOPEN_REALTIME_THREADS 111 +#define _SC_XOPEN_LEGACY 112 +#endif /* _XOPEN_SOURCE >= 500 */ + +#ifdef _ALL_SOURCE +#define _SC_REENTRANT_FUNCTIONS _SC_THREAD_SAFE_FUNCTIONS +#define _SC_PHYS_PAGES 113 +#define _SC_AVPHYS_PAGES 114 +#define _SC_LPAR_ENABLED 115 +#define _SC_LARGE_PAGESIZE 116 +#endif /* _ALL_SOURCE */ + +#define _SC_AIX_KERNEL_BITMODE 117 +#define _SC_AIX_REALMEM 118 +#define _SC_AIX_HARDWARE_BITMODE 119 +#define _SC_AIX_MP_CAPABLE 120 + +#define _SC_V6_ILP32_OFF32 121 +#define _SC_V6_ILP32_OFFBIG 122 +#define _SC_V6_LP64_OFF64 123 +#define _SC_V6_LPBIG_OFFBIG 124 + +#define _SC_XOPEN_STREAMS 125 + +#if (_POSIX_C_SOURCE >= 200112L) +#define _SC_HOST_NAME_MAX 126 +#define _SC_REGEXP 127 +#define _SC_SHELL 128 +#define _SC_SYMLOOP_MAX 129 +#define _SC_ADVISORY_INFO 130 +#define _SC_FILE_LOCKING 131 +#define _SC_2_PBS 132 +#define _SC_2_PBS_ACCOUNTING 133 +#define _SC_2_PBS_CHECKPOINT 134 +#define _SC_2_PBS_LOCATE 135 +#define _SC_2_PBS_MESSAGE 136 +#define _SC_2_PBS_TRACK 137 +#define _SC_BARRIERS 138 +#define _SC_CLOCK_SELECTION 139 +#define _SC_CPUTIME 140 +#define _SC_MONOTONIC_CLOCK 141 +#define _SC_READER_WRITER_LOCKS 142 +#define _SC_SPAWN 143 +#define _SC_SPIN_LOCKS 144 +#define _SC_SPORADIC_SERVER 145 +#define _SC_THREAD_CPUTIME 146 +#define _SC_THREAD_SPORADIC_SERVER 147 +#define _SC_TIMEOUTS 148 +#define _SC_TRACE 149 +#define _SC_TRACE_EVENT_FILTER 150 +#define _SC_TRACE_INHERIT 151 +#define _SC_TRACE_LOG 152 +#define _SC_TYPED_MEMORY_OBJECTS 153 +#define _SC_IPV6 154 +#define _SC_RAW_SOCKETS 155 +#define _SC_SS_REPL_MAX 156 +#define _SC_TRACE_EVENT_NAME_MAX 157 +#define _SC_TRACE_NAME_MAX 158 +#define _SC_TRACE_SYS_MAX 159 +#define _SC_TRACE_USER_EVENT_MAX 160 +#endif /* _POSIX_C_SOURCE >= 200112L */ + +#ifdef _ALL_SOURCE +#define _SC_AIX_UKEYS 161 +#endif /* _ALL_SOURCE */ + +#endif /* _POSIX_SOURCE */ + + +#if _XOPEN_SOURCE_EXTENDED==1 +#ifdef _LARGE_FILES +#define ftruncate ftruncate64 +#define truncate truncate64 +#endif + +#ifndef _H_LOCKF +#include /* lockf definitions for portability */ +#endif + +#ifdef _NO_PROTO +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern int brk(); + extern int getpagesize(); +#ifndef _MSGQSUPPORT + extern int __fd_getdtablesize(); + static int getdtablesize() + { + return __fd_getdtablesize(); + } +#else + extern int getdtablesize(); +#endif /* _MSGQSUPPORT */ + + extern void *sbrk(); +#endif /* _POSIX_C_SOURCE<200112L */ + extern int fchdir(); + extern int fchown(); + extern int ftruncate(); + extern long gethostid(); + extern int gethostname(); + extern pid_t getpgid(); + extern pid_t getsid(); + extern char *getwd(); + extern int lchown(); + extern int readlink(); + extern pid_t setpgrp(); + extern int setregid(); + extern int setreuid(); + extern int symlink(); + extern void sync(); + extern int truncate(); + extern useconds_t ualarm(); + extern int usleep(); + extern pid_t vfork(); +#else /* _NO_PROTO */ +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern int brk(void *); + extern int getpagesize(void); +#ifndef _MSGQSUPPORT + extern int __fd_getdtablesize(void); + static int getdtablesize() + { + return __fd_getdtablesize(); + } +#else + extern int getdtablesize(void); +#endif /* _MSGQSUPPORT */ +#ifdef _LINUX_SOURCE_COMPAT + extern void *sbrk(ptrdiff_t); +#elif (_XOPEN_SOURCE >= 500) || defined(__64BIT__) + extern void *sbrk(intptr_t); +#else + extern void *sbrk(int); +#endif +#endif /* _POSIX_C_SOURCE<200112L */ + extern int fchdir(int); + extern int fchown(int, uid_t, gid_t); + extern int ftruncate(int, off_t); +#ifdef _LARGE_FILE_API + extern int ftruncate64(int, off64_t); +#endif + extern int gethostname(char *, size_t); + extern long gethostid(void); + extern pid_t getpgid(pid_t); + extern pid_t getsid(pid_t); + extern char *getwd(char *); + extern int lchown(const char *, uid_t, gid_t); + +#if (defined(_SUSV3_READLINK) || \ + (!defined(_ALL_SOURCE) && (_POSIX_C_SOURCE >= 200112L))) + /* If SUSV3 readlink specifically requested or if strict SUSv3 + * environment requested */ +#ifdef __64BIT__ +static ssize_t readlink(const char *__restrict__ __path, + char *__restrict__ __buf, size_t __bufsize) +{ + extern ssize_t __readlink64(const char *__restrict__, char *__restrict__, size_t); + return __readlink64(__path, __buf, __bufsize); +} +#else + extern ssize_t readlink(const char *__restrict__, char *__restrict__, size_t); +#endif /* __64BIT__ */ +#else + extern int readlink(const char *, char *, size_t); +#endif /* _SUSV3_READLINK || !_ALL_SOURCE && _POSIX_C_SOURCE >= 200112L */ + +#ifndef _BSD + extern pid_t setpgrp(void); +#endif /* _BSD */ + extern int setregid(gid_t, gid_t); + extern int setreuid(uid_t, uid_t); + extern int symlink(const char *, const char *); + extern void sync(void); + extern int truncate(const char *, off_t); +#ifdef _LARGE_FILE_API + extern int truncate64(const char *, off64_t); +#endif + extern useconds_t ualarm(useconds_t, useconds_t); + extern int usleep(useconds_t); + extern pid_t vfork(void); +#if _XOPEN_SOURCE>=500 + extern int getlogin_r(char *, size_t); + extern int ttyname_r(int, char *, size_t); + +#ifdef _LARGE_FILES +#define pread pread64 +#define pwrite pwrite64 +#endif /* _LARGE_FILES */ + + extern ssize_t pread(int, void *, size_t, off_t); + extern ssize_t pwrite(int, const void *, size_t, off_t); +#ifdef _LARGE_FILE_API + extern ssize_t pread64(int, void *, size_t, off64_t); + extern ssize_t pwrite64(int, const void *, size_t, off64_t); +#endif /* _LARGE_FILE_API */ +#endif /* _XOPEN_SOURCE>=500 */ + +#endif /* _NO_PROTO */ + +#endif /* _XOPEN_SOURCE_EXTENDED */ + +#ifdef _ALL_SOURCE + +extern char **environ; + +#ifndef _KERNEL +#ifdef _NO_PROTO + extern pid_t f_fork(); +#else /* _NO_PROTO */ + extern pid_t f_fork(void); +#endif /* _NO_PROTO */ +#endif /* _KERNEL */ + +#ifdef _NO_PROTO + extern char * cuserid(); + extern int ioctl(); +#ifdef __64BIT__ + extern int ioctlx(); + extern int ioctl32(); + extern int ioctl32x(); +#endif /* __64BIT__ */ + extern int readx(); + extern int setgroups(); + extern int writex(); + extern int setegid(); + extern int seteuid(); + extern int setrgid(); + extern int setruid(); + extern offset_t llseek(); + extern char * getusershell(); + extern void setusershell(); + extern void endusershell(); + extern char * get_current_dir_name(); + extern int sysfs(); +#else + extern char * cuserid(char *); + extern int setegid(gid_t); + extern int seteuid(uid_t); + extern int setrgid(gid_t); + extern int setruid(uid_t); +#ifndef _BSD + extern int ioctl(int, int, ...); +#endif /* _BSD */ +#ifdef __64BIT__ + extern int ioctlx(int, int, void *, long); + extern int ioctl32(int, int, ...); + extern int ioctl32x(int, int, unsigned int, unsigned int); +#endif /* __64BIT__ */ + extern int setgroups(int, gid_t []); +#ifndef _KERNEL + extern int readx(int, char*, unsigned, long); + extern int writex(int, char*, unsigned, long); + +#ifdef _LARGE_FILES +#define fclear fclear64 +#define fsync_range fsync_range64 +#endif + extern off_t fclear(int, off_t); + extern int fsync_range(int, int, off_t, off_t); +#ifdef _LARGE_FILE_API + extern off64_t fclear64(int, off64_t); + extern int fsync_range64(int, int, off64_t, off64_t); +#endif + extern offset_t llseek(int, offset_t, int); + extern char * getusershell(void); + extern void setusershell(void); + extern void endusershell(void); + extern char * get_current_dir_name(void); + extern int sysfs(int, ...); + extern int finfo(const char *, int, void *, int32long64_t); + extern int ffinfo(int, int, void *, int32long64_t); + +#endif /* ndef _KERNEL */ + +#endif /* _NO_PROTO */ + +#define _AES_OS_VERSION 1 /* OSF, AES version */ + +#endif /* _ALL_SOURCE */ + +#ifdef __cplusplus +} +#endif + +#endif /* _H_UNISTD */ diff --git a/workloads/realworld/standard/knn/Makefile b/workloads/realworld/standard/knn/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..6ebd397ea3d2a2082eb070de41b7f1eb452dbf53 --- /dev/null +++ b/workloads/realworld/standard/knn/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := knn +CUFILES := knn_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o knn + diff --git a/workloads/realworld/standard/knn/knn_cuda.cu b/workloads/realworld/standard/knn/knn_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..d67babcdd7236c2568843c634353bd0087928397 --- /dev/null +++ b/workloads/realworld/standard/knn/knn_cuda.cu @@ -0,0 +1,588 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +int ref_nb = 4096; // Reference point number, max=65535 +int query_nb = 4096; // Query point number, max=65535 +int dim = 128; // Dimension of points +int k = 20; // Nearest neighbors to consider +int iterations = 100; + +//-----------------------------------------------------------------------------------------------// +// KERNELS // +//-----------------------------------------------------------------------------------------------// +__global__ void extract_with_interpolation(int nthreads, float *data, + float *n_xy_coords, + float *extracted_data, + int n_max_coord, int channels, + int height, int width) { + + int x0, x1, y0, y1, nc; + float wx0, wx1, wy0, wy1; + int n, nd; + float x, y; + + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); + index += blockDim.x * gridDim.x) { + n = (index / n_max_coord); + nd = n * n_max_coord * channels; + x = n_xy_coords[index * 2]; + y = n_xy_coords[index * 2 + 1]; + + x0 = static_cast(floor(x)); + x1 = x0 + 1; + y0 = static_cast(floor(y)); + y1 = y0 + 1; + + x0 = x0 <= 0 ? 0 : (x0 >= (width - 1) ? (width - 1) : x0); + y0 = y0 <= 0 ? 0 : (y0 >= (height - 1) ? (height - 1) : y0); + x1 = x1 <= 0 ? 0 : (x1 >= (width - 1) ? (width - 1) : x1); + y1 = y1 <= 0 ? 0 : (y1 >= (height - 1) ? (height - 1) : y1); + + wx0 = static_cast(x1) - x; + wx1 = x - x0; + wy0 = static_cast(y1) - y; + wy1 = y - y0; + + if (x0 == x1) { + wx0 = 1; + wx1 = 0; + } + if (y0 == y1) { + wy0 = 1; + wy1 = 0; + } + for (int c = 0; c < channels; c++) { + nc = (n * channels + c) * height; + // extracted_data[index * channels + c] = wy0 * wx0 * data[(nc + y0) * + // width + x0] + // extracted_data[nd + index % n_max_coord + n_max_coord * c] = index; + extracted_data[nd + index % n_max_coord + n_max_coord * c] = + wy0 * wx0 * data[(nc + y0) * width + x0] + + wy1 * wx0 * data[(nc + y1) * width + x0] + + wy0 * wx1 * data[(nc + y0) * width + x1] + + wy1 * wx1 * data[(nc + y1) * width + x1]; + } + } +} + +/** + * Computes the distance between two matrix A (reference points) and + * B (query points) containing respectively wA and wB points. + * + * @param A pointer on the matrix A + * @param wA width of the matrix A = number of points in A + * @param B pointer on the matrix B + * @param wB width of the matrix B = number of points in B + * @param dim dimension of points = height of matrices A and B + * @param AB pointer on the matrix containing the wA*wB distances computed + */ +__global__ void cuComputeDistanceGlobal(float *A, int wA, float *B, int wB, + int dim, float *AB) { + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // Declaration of the shared memory arrays As and Bs used to store the + // sub-matrix of A and B + __shared__ float shared_A[BLOCK_DIM][BLOCK_DIM]; + __shared__ float shared_B[BLOCK_DIM][BLOCK_DIM]; + + // Sub-matrix of A (begin, step, end) and Sub-matrix of B (begin, step) + __shared__ int begin_A; + __shared__ int begin_B; + __shared__ int step_A; + __shared__ int step_B; + __shared__ int end_A; + + // Thread index + int tx = threadIdx.x; + int ty = threadIdx.y; + + // Other variables + float tmp; + float ssd = 0; + + // Loop parameters + begin_A = BLOCK_DIM * blockIdx.y; + begin_B = BLOCK_DIM * blockIdx.x; + step_A = BLOCK_DIM * wA; + step_B = BLOCK_DIM * wB; + end_A = begin_A + (dim - 1) * wA; + + // if (blockIdx.x == 0 && blockIdx.y == 0 && tx == 0 && ty == 0) + // printf("begin_A is %d, end_A is %d, step_A is %d, begin_B is %d, step_B is %d\n", begin_A, end_A, step_A, begin_B, step_B); + + // Conditions + int cond0 = (begin_A + tx < wA); // used to write in shared memory + int cond1 = (begin_B + tx < wB); // used to write in shared memory & to + // computations and to write in output matrix + int cond2 = + (begin_A + ty < wA); // used to computations and to write in output matrix + // Loop over all the sub-matrices of A and B required to compute the block + // sub-matrix + for (int a = begin_A, b = begin_B; a <= end_A; a += step_A, b += step_B) { + // if (blockIdx.x == 0 && blockIdx.y == 0 && tx == 0 && ty == 0) + // printf("a is %d, end_A is %d, step_A is %d, b is %d, step_B is %d\n", begin_A, end_A, step_A, begin_B, step_B); + // Load the matrices from device memory to shared memory; each thread loads + // one element of each matrix + if (a / wA + ty < dim) { + shared_A[ty][tx] = (cond0) ? A[a + wA * ty + tx] : 0; + shared_B[ty][tx] = (cond1) ? B[b + wB * ty + tx] : 0; + } else { + shared_A[ty][tx] = 0; + shared_B[ty][tx] = 0; + } + + // Synchronize to make sure the matrices are loaded + block.sync(); + // Compute the difference between the two matrixes; each thread computes one + // element of the block sub-matrix + if (cond2 && cond1) { + for (int k = 0; k < BLOCK_DIM; ++k) { + tmp = shared_A[k][ty] - shared_B[k][tx]; + ssd += tmp * tmp; + } + } + + // Synchronize to make sure that the preceding computation is done before + // loading two new sub-matrices of A and B in the next iteration + block.sync(); + } + + // Write the block sub-matrix to device memory; each thread writes one element + if (cond2 && cond1) { + AB[(begin_A + ty) * wB + begin_B + tx] = ssd; + } +} + +/** + * Gathers k-th smallest distances for each column of the distance matrix in + * the top. + * + * @param dist distance matrix + * @param ind index matrix + * @param width width of the distance matrix and of the index matrix + * @param height height of the distance matrix and of the index matrix + * @param k number of neighbors to consider + */ +__global__ void cuInsertionSort(float *dist, int *ind, int width, int height, + int k) { + // printf("test2\n"); + // Variables + int l, i, j; + float *p_dist; + int *p_ind; + float curr_dist, max_dist; + int curr_row, max_row; + unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x; + + if (xIndex < width) { + // Pointer shift, initialization, and max value + p_dist = dist + xIndex; + p_ind = ind + xIndex; + max_dist = p_dist[0]; + p_ind[0] = 0; + + // Part 1 : sort kth firt elementZ + for (l = 1; l < k; l++) { + curr_row = l * width; + curr_dist = p_dist[curr_row]; + if (curr_dist < max_dist) { + i = l - 1; + for (int a = 0; a < l - 1; a++) { + if (p_dist[a * width] > curr_dist) { + i = a; + break; + } + } + for (j = l; j > i; j--) { + p_dist[j * width] = p_dist[(j - 1) * width]; + p_ind[j * width] = p_ind[(j - 1) * width]; + } + p_dist[i * width] = curr_dist; + p_ind[i * width] = l; + } else { + p_ind[l * width] = l; + } + max_dist = p_dist[curr_row]; + } + + // Part 2 : insert element in the k-th first lines + max_row = (k - 1) * width; + for (l = k; l < height; l++) { + curr_dist = p_dist[l * width]; + if (curr_dist < max_dist) { + i = k - 1; + for (int a = 0; a < k - 1; a++) { + if (p_dist[a * width] > curr_dist) { + i = a; + break; + } + } + for (j = k - 1; j > i; j--) { + p_dist[j * width] = p_dist[(j - 1) * width]; + p_ind[j * width] = p_ind[(j - 1) * width]; + } + p_dist[i * width] = curr_dist; + p_ind[i * width] = l; + max_dist = p_dist[max_row]; + } + } + } +} + +/** + * Computes the square root of the first line (width-th first element) + * of the distance matrix. + * + * @param dist distance matrix + * @param width width of the distance matrix + * @param k number of neighbors to consider + */ +__global__ void cuParallelSqrt(float *dist, int width, int k) { + unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int yIndex = blockIdx.y * blockDim.y + threadIdx.y; + // printf("test3\n"); + if (xIndex < width && yIndex < k) + dist[yIndex * width + xIndex] = sqrt(dist[yIndex * width + xIndex]); +} + +//-----------------------------------------------------------------------------------------------// +// K-th NEAREST NEIGHBORS // +//-----------------------------------------------------------------------------------------------// + +/** + * Prints the error message return during the memory allocation. + * + * @param error error value return by the memory allocation function + * @param memorySize size of memory tried to be allocated + */ +void printErrorMessage(cudaError_t error, int memorySize) { + printf("==================================================\n"); + printf("MEMORY ALLOCATION ERROR : %s\n", cudaGetErrorString(error)); + printf("Whished allocated memory : %d\n", memorySize); + printf("==================================================\n"); +} + +/** + * K nearest neighbor algorithm + * - Initialize CUDA + * - Allocate device memory + * - Copy point sets (reference and query points) from host to device memory + * - Compute the distances + indexes to the k nearest neighbors for each query + * point + * - Copy distances from device to host memory + * + * @param ref_host reference points ; pointer to linear matrix + * @param ref_width number of reference points ; width of the matrix + * @param query_host query points ; pointer to linear matrix + * @param query_width number of query points ; width of the matrix + * @param height dimension of points ; height of the matrices + * @param k number of neighbor to consider + * @param dist_host distances to k nearest neighbors ; pointer to linear + * matrix + * @param dist_host indexes of the k nearest neighbors ; pointer to linear + * matrix + * + */ +// void knn_cuda(float *ref_host, int ref_width, float *query_host, +// int query_width, int height, int k, float *dist_host, +// int *ind_host) { +void knn_cuda(float *ref_device, int ref_width, float *query_device, + int query_width, int height, int k, float *dist_device, + int *ind_device) { + // Grids ans threads + dim3 g_16x16(query_width / 16, ref_width / 16, 1); + dim3 t_16x16(16, 16, 1); + if (query_width % 16 != 0) + g_16x16.x += 1; + if (ref_width % 16 != 0) + g_16x16.y += 1; + // + dim3 g_256x1(query_width / 256, 1, 1); + dim3 t_256x1(256, 1, 1); + if (query_width % 256 != 0) + g_256x1.x += 1; + + dim3 g_k_16x16(query_width / 16, k / 16, 1); + dim3 t_k_16x16(16, 16, 1); + if (query_width % 16 != 0) + g_k_16x16.x += 1; + if (k % 16 != 0) + g_k_16x16.y += 1; + + // printf("ref_width is %d, query_width is %d, height is %d\n", ref_width, query_width, height); + + // Kernel 1: Compute all the distances + cuComputeDistanceGlobal<<>>(ref_device, ref_width, query_device, + query_width, height, dist_device); + // Kernel 2: Sort each column + cuInsertionSort<<>>(dist_device, ind_device, query_width, + ref_width, k); + // Kernel 3: Compute square root of k first elements + cuParallelSqrt<<>>(dist_device, query_width, k); + cudaDeviceSynchronize(); +} + +float compute_distance(const float *ref, int ref_nb, const float *query, + int query_nb, int dim, int ref_index, int query_index) { + float sum = 0.f; + for (int d = 0; d < dim; ++d) { + const float diff = + ref[d * ref_nb + ref_index] - query[d * query_nb + query_index]; + sum += diff * diff; + } + return sqrtf(sum); +} + +void modified_insertion_sort(float *dist, int *index, int length, int k) { + + // Initialise the first index + index[0] = 0; + + // Go through all points + for (int i = 1; i < length; ++i) { + + // Store current distance and associated index + float curr_dist = dist[i]; + int curr_index = i; + + // Skip the current value if its index is >= k and if it's higher the k-th + // slready sorted mallest value + if (i >= k && curr_dist >= dist[k - 1]) { + continue; + } + + // Shift values (and indexes) higher that the current distance to the right + int j = min(i, k - 1); + while (j > 0 && dist[j - 1] > curr_dist) { + dist[j] = dist[j - 1]; + index[j] = index[j - 1]; + --j; + } + + // Write the current distance and index at their position + dist[j] = curr_dist; + index[j] = curr_index; + } +} + +bool knn_c(const float *ref, int ref_nb, const float *query, int query_nb, + int dim, int k, float *knn_dist, int *knn_index) { + // Allocate local array to store all the distances / indexes for a given query + // point + float *dist = (float *)malloc(ref_nb * sizeof(float)); + int *index = (int *)malloc(ref_nb * sizeof(int)); + + // Allocation checks + if (!dist || !index) { + printf("Memory allocation error\n"); + free(dist); + free(index); + return false; + } + + // Process one query point at the time + for (int i = 0; i < query_nb; ++i) { + + // Compute all distances / indexes + for (int j = 0; j < ref_nb; ++j) { + dist[j] = compute_distance(ref, ref_nb, query, query_nb, dim, j, i); + index[j] = j; + } + + // Sort distances / indexes + modified_insertion_sort(dist, index, ref_nb, k); + + // Copy k smallest distances and their associated index + for (int j = 0; j < k; ++j) { + knn_dist[j * query_nb + i] = dist[j]; + knn_index[j * query_nb + i] = index[j]; + } + } + + // Memory clean-up + free(dist); + free(index); + return true; +} + +/** + * Example of use of kNN search CUDA. + */ +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + // Variables and parameters + float *ref; // Pointer to reference point array + float *query; // Pointer to query point array + float *dist, *dist_c; // Pointer to distance array + int *ind, *ind_c; // Pointer to index array + + + if (argc >= 4) { + ref_nb = atoi(argv[1]); + query_nb = atoi(argv[2]); + dim = atoi(argv[3]); + } + + int c_iterations = 10; + int i; + const float precision = 0.001f; // distance error max + int nb_correct_precisions = 0; + int nb_correct_indexes = 0; + float *knn_dist = (float *)malloc(query_nb * k * sizeof(float)); + int *knn_index = (int *)malloc(query_nb * k * sizeof(int)); + + // Memory allocation + ref = (float *)malloc(ref_nb * dim * sizeof(float)); + query = (float *)malloc(query_nb * dim * sizeof(float)); + dist = (float *)malloc(query_nb * ref_nb * sizeof(float)); + ind = (int *)malloc(query_nb * k * sizeof(int)); + // cudaMallocManaged(&ref, ref_nb * dim * sizeof(float)); + // cudaMallocManaged(&query, query_nb * dim * sizeof(float)); + // cudaMallocManaged(&dist, query_nb * ref_nb * sizeof(float)); + // cudaMallocManaged(&ind, query_nb * k * sizeof(int)); + dist_c = (float *)malloc(query_nb * k * sizeof(float)); + ind_c = (int *)malloc(query_nb * k * sizeof(float)); + + // Init + srand(time(NULL)); + for (i = 0; i < ref_nb * dim; i++) + ref[i] = (float)rand() / (float)RAND_MAX; + for (i = 0; i < query_nb * dim; i++) + query[i] = (float)rand() / (float)RAND_MAX; + + // printf("Ground truth computation in progress...\n\n"); + // if (!knn_c(ref, ref_nb, query, query_nb, dim, k, knn_dist, knn_index)) { + // free(knn_dist); + // free(knn_index); + // return EXIT_FAILURE; + // } + + // Variables for duration evaluation + float elapsed_time; + + // Display informations + printf("Number of reference points : %6d\n", ref_nb); + printf("Number of query points : %6d\n", query_nb); + printf("Dimension of points : %4d\n", dim); + printf("Number of neighbors to consider : %4d\n", k); + printf("Processing kNN search :\n"); + + float precision_accuracy = 0.0f; + float index_accuracy = 0.0f; + /* + printf("On CPU: \n"); + struct timeval tic; + gettimeofday(&tic, NULL); + for (i = 0; i < c_iterations; i++) { + knn_c(ref, ref_nb, query, query_nb, dim, k, dist_c, ind_c); + } + + for (int i = 0; i < query_nb * k; ++i) { + if (fabs(dist_c[i] - knn_dist[i]) <= precision) { + nb_correct_precisions++; + } + if (ind_c[i] == knn_index[i]) { + nb_correct_indexes++; + } + } + + struct timeval toc; + gettimeofday(&toc, NULL); + elapsed_time = toc.tv_sec - tic.tv_sec; + elapsed_time += (toc.tv_usec - tic.tv_usec) / 1000000.; + precision_accuracy = nb_correct_precisions / ((float)query_nb * k); + index_accuracy = nb_correct_indexes / ((float)query_nb * k); + printf("%f, %f\n", precision_accuracy, index_accuracy); + printf(" done in %f s for %d iterations (%f s by iteration)\n", elapsed_time, + c_iterations, elapsed_time / (c_iterations)); + */ + printf("on GPU: \n"); + + // Call kNN search CUDA + GPU_argv_init(); + + initTrace(); + startCPU(); + + float *ref_device; + float *query_device; + float *dist_device; + int *ind_device; + + cudaMalloc(&ref_device, ref_nb * dim * sizeof(float)); + cudaMalloc(&query_device, query_nb * dim * sizeof(float)); + cudaMalloc(&dist_device, query_nb * ref_nb * sizeof(float)); + cudaMalloc(&ind_device, query_nb * k * sizeof(int)); + + cudaMemcpy(ref_device, ref, ref_nb * dim * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(query_device, query, query_nb * dim * sizeof(float), cudaMemcpyHostToDevice); + + for (i = 0; i < iterations; i++) { + // knn_cuda(ref, ref_nb, query, query_nb, dim, k, dist, ind); + knn_cuda(ref_device, ref_nb, query_device, query_nb, dim, k, dist_device, ind_device); + } + + cudaMemcpy(dist, dist_device, query_nb * ref_nb * sizeof(float), cudaMemcpyDeviceToHost); + cudaMemcpy(ind, ind_device, query_nb * k * sizeof(int), cudaMemcpyDeviceToHost); + + cudaFree(ind_device); + cudaFree(dist_device); + cudaFree(query_device); + cudaFree(ref_device); + + endCPU(); + finiTrace(); + + nb_correct_precisions = 0; + nb_correct_indexes = 0; + for (int i = 0; i < query_nb * k; ++i) { + if (fabs(dist[i] - knn_dist[i]) <= precision) { + nb_correct_precisions++; + } + if (ind[i] == knn_index[i]) { + nb_correct_indexes++; + } + } + + precision_accuracy = nb_correct_precisions / ((float)query_nb * k); + index_accuracy = nb_correct_indexes / ((float)query_nb * k); + printf("%f, %f\n", precision_accuracy, index_accuracy); + + // Destroy cuda event object and free memory + // cudaFree(ind); + // cudaFree(dist); + // cudaFree(query); + // cudaFree(ref); + free(ind); + free(dist); + free(query); + free(ref); + free(dist_c); + free(ind_c); +} \ No newline at end of file diff --git a/workloads/realworld/standard/knn/run.sh b/workloads/realworld/standard/knn/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..541db1387ce3ebe87b1338f079609b8b4a2736c6 --- /dev/null +++ b/workloads/realworld/standard/knn/run.sh @@ -0,0 +1 @@ +./knn 4096 4096 128 \ No newline at end of file diff --git a/workloads/realworld/standard/knn/run_super.sh b/workloads/realworld/standard/knn/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..86ad9321b470072e5e84e706e1619ee200cf2b31 --- /dev/null +++ b/workloads/realworld/standard/knn/run_super.sh @@ -0,0 +1 @@ +./knn 32768 32768 128 \ No newline at end of file diff --git a/workloads/realworld/standard/lavaMD/README b/workloads/realworld/standard/lavaMD/README new file mode 100755 index 0000000000000000000000000000000000000000..27b526ff669e9632b11193634307bfe778a2dfff --- /dev/null +++ b/workloads/realworld/standard/lavaMD/README @@ -0,0 +1,50 @@ +//======================================================================================================================================================150 +// DESCRIPTION +//======================================================================================================================================================150 + +This is the CUDA version of the code. + +The code calculates particle potential and relocation due to mutual forces between particles within a large 3D space. This space is +divided into cubes, or large boxes, that are allocated to individual cluster nodes. The large box at each node is further divided into +cubes, called boxes. 26 neighbor boxes surround each box (the home box). Home boxes at the boundaries of the particle space have fewer neighbors. +Particles only interact with those other particles that are within a cutoff radius since ones at larger distances exert negligible forces. Thus the +box size s chosen so that cutoff radius does not span beyond any neighbor box for any particle in a home box, thus limiting the reference space to +a finite number of boxes. + +This code [1] was derived from the ddcMD application [2] by rewriting the front end and structuring it for parallelization. This code represents MPI +task that runs on a single cluster node. While the details of the code are somewhat different than the original, the code retains the structure of the +MPI task in the original code. Since the rest of MPI code is not included here, the application first emulates MPI partitioning of the particle space +into boxes. Then, for every particle in the home box, the nested loop processes interactions first with other particles in the home box and then with +particles in all neighbor boxes. The processing of each particle consists of a single stage of calculation that is enclosed in the innermost loop. The +nested loops in the application were parallelized in such a way that at any point of time GPU warp/wavefront accesses adjacent memory locations. The +speedup depends on the number of boxes, particles (fixed) and the actualcal culation for each particle (fixed). The application is memory bound, and +GPU speedup seems to saturate at about 16x when compared to single-core CPU. + +More information about the parallel version of this code can be found in: +[1] L. G. Szafaryn, T. Gamblin, B. deSupinski and K. Skadron. "Experiences with Achieving Portability across Heterogeneous Architectures." Submitted to +WOLFHPC workshop at 25th International Conference on Supercomputing (ICS). Tucson, AZ. 2010. +More about the original ddcMD application can be found in: +[2] F. H. Streitz, J. N. Glosli, M. V. Patel, B. Chan, R. K. Yates, B. R. de Supinski, J. Sexton, J and A. Gunnels. "100+ TFlop Solidification Simulations +on BlueGene/L." In Proceedings of the 2005 Supercomputing Conference (SC 05). Seattle, WA. 2005. + +//======================================================================================================================================================150 +// USE +//======================================================================================================================================================150 + +The code takes the followint parameters: +-boxes1d (number of boxes in one dimension, the total number of boxes will be that^3) + +The code can be run as follows: +./lavaMD -boxes1d 10 + +******Adjustable work group size***** +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=128" + +######OUTPUT FOR VALIDATION######## +USAGE: +make clean +make OUTPUT=Y \ No newline at end of file diff --git a/workloads/realworld/standard/lavaMD/kernel/kernel_gpu_cuda.cu b/workloads/realworld/standard/lavaMD/kernel/kernel_gpu_cuda.cu new file mode 100755 index 0000000000000000000000000000000000000000..15164488f40349d583134da5d2a03a1ffc854c52 --- /dev/null +++ b/workloads/realworld/standard/lavaMD/kernel/kernel_gpu_cuda.cu @@ -0,0 +1,199 @@ +//----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------200 +// plasmaKernel_gpu_2 +//----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------200 + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +__global__ void kernel_gpu_cuda(par_str d_par_gpu, + dim_str d_dim_gpu, + box_str *d_box_gpu, + FOUR_VECTOR *d_rv_gpu, + fp *d_qv_gpu, + FOUR_VECTOR *d_fv_gpu, + int boxes_per_block) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + // THREAD PARAMETERS + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + int bx = blockIdx.x; // get current horizontal block index (0-n) + int tx = threadIdx.x; // get current horizontal thread index (0-n) + int wtx = tx; + + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // Extract input parameters + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + // parameters + fp a2 = 2.0 * d_par_gpu.alpha * d_par_gpu.alpha; + + // home box + int first_i; + FOUR_VECTOR *rA; + FOUR_VECTOR *fA; + __shared__ FOUR_VECTOR rA_shared[100]; + + // nei box + int pointer; + int k = 0; + int first_j; + FOUR_VECTOR *rB; + fp *qB; + int j = 0; + __shared__ FOUR_VECTOR rB_shared[100]; + __shared__ double qB_shared[100]; + + // common + fp r2; + fp u2; + fp vij; + fp fs; + fp fxij; + fp fyij; + fp fzij; + THREE_VECTOR d; + + + int box = bx * boxes_per_block; + int end_box = box + boxes_per_block; + + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + // DO FOR THE NUMBER OF BOXES + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + for (; box < end_box; box++) + { + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // Home box + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // home box - box parameters + first_i = d_box_gpu[box].offset; + + // home box - distance, force, charge and type parameters + rA = &d_rv_gpu[first_i]; + fA = &d_fv_gpu[first_i]; + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Copy to shared memory + //----------------------------------------------------------------------------------------------------------------------------------140 + + // home box - shared memory + while (wtx < NUMBER_PAR_PER_BOX) + { + rA_shared[wtx] = rA[wtx]; + wtx = wtx + NUMBER_THREADS; + } + wtx = tx; + + // synchronize threads - not needed, but just to be safe + block.sync(); + + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // nei box loop + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + // if (wtx == 0) + // printf("d_box_gpu[%d].nn is %d\n", bx, d_box_gpu[bx].nn); + + int tile = 0; + int end_tile = 1 + d_box_gpu[box].nn; + + // loop over neiing boxes of home box + for (; tile < end_tile; tile++) + { + + //----------------------------------------50 + // nei box - get pointer to the right box + //----------------------------------------50 + + if (tile == 0) + { + pointer = box; // set first box to be processed to home box + } + else + { + pointer = d_box_gpu[box].nei[tile - 1].number; // remaining boxes are nei boxes + } + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // nei box - box parameters + first_j = d_box_gpu[pointer].offset; + + // nei box - distance, (force), charge and (type) parameters + rB = &d_rv_gpu[first_j]; + qB = &d_qv_gpu[first_j]; + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // nei box - shared memory + while (wtx < NUMBER_PAR_PER_BOX) + { + rB_shared[wtx] = rB[wtx]; + qB_shared[wtx] = qB[wtx]; + wtx = wtx + NUMBER_THREADS; + } + wtx = tx; + + // synchronize threads because in next section each thread accesses data brought in by different threads here + block.sync(); + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Calculation + //----------------------------------------------------------------------------------------------------------------------------------140 + + // loop for the number of particles in the home box + // for (int i=0; i +#include "../../../../common/cupti_add.h" +#include "../../../../common/cpu_timestamps.h" + +void +kernel_gpu_cuda_wrapper(par_str par_cpu, + dim_str dim_cpu, + box_str* box_cpu, + FOUR_VECTOR* rv_cpu, + fp* qv_cpu, + FOUR_VECTOR* fv_cpu, + int nblocks) +{ + + //======================================================================================================================================================150 + // CPU VARIABLES + //======================================================================================================================================================150 + + // timer + long long time0; + long long time1; + long long time2; + long long time3; + long long time4; + long long time5; + long long time6; + + time0 = get_time(); + + //======================================================================================================================================================150 + // GPU SETUP + //======================================================================================================================================================150 + + //====================================================================================================100 + // INITIAL DRIVER OVERHEAD + //====================================================================================================100 + + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaThreadSynchronize(); + + //====================================================================================================100 + // VARIABLES + //====================================================================================================100 + + box_str* d_box_gpu; + FOUR_VECTOR* d_rv_gpu; + fp* d_qv_gpu; + FOUR_VECTOR* d_fv_gpu; + + dim3 threads; + dim3 blocks; + + //====================================================================================================100 + // EXECUTION PARAMETERS + //====================================================================================================100 + + // blocks.x = dim_cpu.number_boxes; + blocks.x = nblocks * nblocks * nblocks; + blocks.y = 1; + threads.x = NUMBER_THREADS; // define the number of threads in the block + threads.y = 1; + + int boxes_per_block = 1; + if (dim_cpu.number_boxes >= blocks.x) + { + boxes_per_block = (dim_cpu.number_boxes + blocks.x - 1) / blocks.x; + } + + time1 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY (MALLOC) + //======================================================================================================================================================150 + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY IN + //====================================================================================================100 + + //==================================================50 + // boxes + //==================================================50 + + cudaMalloc( (void **)&d_box_gpu, + dim_cpu.box_mem); + + //==================================================50 + // rv + //==================================================50 + + cudaMalloc( (void **)&d_rv_gpu, + dim_cpu.space_mem); + + //==================================================50 + // qv + //==================================================50 + + cudaMalloc( (void **)&d_qv_gpu, + dim_cpu.space_mem2); + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY + //====================================================================================================100 + + //==================================================50 + // fv + //==================================================50 + + cudaMalloc( (void **)&d_fv_gpu, + dim_cpu.space_mem); + + time2 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY COPY + //======================================================================================================================================================150 + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY IN + //====================================================================================================100 + + //==================================================50 + // boxes + //==================================================50 + + cudaMemcpy( d_box_gpu, + box_cpu, + dim_cpu.box_mem, + cudaMemcpyHostToDevice); + + //==================================================50 + // rv + //==================================================50 + + cudaMemcpy( d_rv_gpu, + rv_cpu, + dim_cpu.space_mem, + cudaMemcpyHostToDevice); + + //==================================================50 + // qv + //==================================================50 + + cudaMemcpy( d_qv_gpu, + qv_cpu, + dim_cpu.space_mem2, + cudaMemcpyHostToDevice); + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY + //====================================================================================================100 + + //==================================================50 + // fv + //==================================================50 + + cudaMemcpy( d_fv_gpu, + fv_cpu, + dim_cpu.space_mem, + cudaMemcpyHostToDevice); + + time3 = get_time(); + + //======================================================================================================================================================150 + // KERNEL + //======================================================================================================================================================150 + // launch kernel - all boxes + kernel_gpu_cuda<<>>(par_cpu, + dim_cpu, + d_box_gpu, + d_rv_gpu, + d_qv_gpu, + d_fv_gpu, + boxes_per_block); + + checkCUDAError("Start"); + cudaDeviceSynchronize(); + + + time4 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY COPY (CONTD.) + //======================================================================================================================================================150 + + cudaMemcpy( fv_cpu, + d_fv_gpu, + dim_cpu.space_mem, + cudaMemcpyDeviceToHost); + + time5 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY DEALLOCATION + //======================================================================================================================================================150 + + cudaFree(d_rv_gpu); + cudaFree(d_qv_gpu); + cudaFree(d_fv_gpu); + cudaFree(d_box_gpu); + + endCPU(); + finiTrace(); + + time6 = get_time(); + + //======================================================================================================================================================150 + // DISPLAY TIMING + //======================================================================================================================================================150 + + printf("Time spent in different stages of GPU_CUDA KERNEL:\n"); + + printf("%15.12f s, %15.12f % : GPU: SET DEVICE / DRIVER INIT\n", (float) (time1-time0) / 1000000, (float) (time1-time0) / (float) (time6-time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: ALO\n", (float) (time2-time1) / 1000000, (float) (time2-time1) / (float) (time6-time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: COPY IN\n", (float) (time3-time2) / 1000000, (float) (time3-time2) / (float) (time6-time0) * 100); + + printf("%15.12f s, %15.12f % : GPU: KERNEL\n", (float) (time4-time3) / 1000000, (float) (time4-time3) / (float) (time6-time0) * 100); + + printf("%15.12f s, %15.12f % : GPU MEM: COPY OUT\n", (float) (time5-time4) / 1000000, (float) (time5-time4) / (float) (time6-time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: FRE\n", (float) (time6-time5) / 1000000, (float) (time6-time5) / (float) (time6-time0) * 100); + + printf("Total time:\n"); + printf("%.12f s\n", (float) (time6-time0) / 1000000); + +} diff --git a/workloads/realworld/standard/lavaMD/kernel/kernel_gpu_cuda_wrapper.h b/workloads/realworld/standard/lavaMD/kernel/kernel_gpu_cuda_wrapper.h new file mode 100755 index 0000000000000000000000000000000000000000..cf499f1480469569c649eccf174cc8ba0655ddbd --- /dev/null +++ b/workloads/realworld/standard/lavaMD/kernel/kernel_gpu_cuda_wrapper.h @@ -0,0 +1,19 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//========================================================================================================================================================================================================200 +// KERNEL_GPU_CUDA_WRAPPER HEADER +//========================================================================================================================================================================================================200 + +void kernel_gpu_cuda_wrapper( par_str parms_cpu, + dim_str dim_cpu, + box_str* box_cpu, + FOUR_VECTOR* rv_cpu, + fp* qv_cpu, + FOUR_VECTOR* fv_cpu, + int nblocks); + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/standard/lavaMD/main.c b/workloads/realworld/standard/lavaMD/main.c new file mode 100755 index 0000000000000000000000000000000000000000..a7c88472e3939414bbdc314e2bfb1c46bc345bea --- /dev/null +++ b/workloads/realworld/standard/lavaMD/main.c @@ -0,0 +1,318 @@ +//========================================================================================================================================================================================================200 +//======================================================================================================================================================150 +//====================================================================================================100 +//==================================================50 + +//========================================================================================================================================================================================================200 +// UPDATE +//========================================================================================================================================================================================================200 + +// 14 APR 2011 Lukasz G. Szafaryn + +//========================================================================================================================================================================================================200 +// DEFINE/INCLUDE +//========================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// LIBRARIES +//======================================================================================================================================================150 + +#include // (in path known to compiler) needed by printf +#include // (in path known to compiler) needed by malloc +#include // (in path known to compiler) needed by true/false + +//======================================================================================================================================================150 +// UTILITIES +//======================================================================================================================================================150 + +#include "./util/timer/timer.h" // (in path specified here) +#include "./util/num/num.h" // (in path specified here) + +//======================================================================================================================================================150 +// MAIN FUNCTION HEADER +//======================================================================================================================================================150 + +#include "./main.h" // (in the current directory) + +//======================================================================================================================================================150 +// KERNEL +//======================================================================================================================================================150 + +#include "./kernel/kernel_gpu_cuda_wrapper.h" // (in library path specified here) + +//========================================================================================================================================================================================================200 +// MAIN FUNCTION +//========================================================================================================================================================================================================200 +#define _POSIX_C_SOURCE 200809L +#include +#include +#include +extern inline __attribute__((always_inline)) unsigned long rdtsc() +{ + unsigned long a, d; + + __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); + + return (a | (d << 32)); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsp() { + struct timespec tms; + if (clock_gettime(CLOCK_REALTIME, &tms)) { + return -1; + } + unsigned long ns = tms.tv_sec * 1000000000; + ns += tms.tv_nsec; + return ns; +} + + +int +main( int argc, + char *argv []) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + + printf("thread block size of kernel = %d \n", NUMBER_THREADS); + //======================================================================================================================================================150 + // CPU/MCPU VARIABLES + //======================================================================================================================================================150 + + // timer + long long time0; + + time0 = get_time(); + + // timer + long long time1; + long long time2; + long long time3; + long long time4; + long long time5; + long long time6; + long long time7; + + // counters + int i, j, k, l, m, n; + + // system memory + par_str par_cpu; + dim_str dim_cpu; + box_str* box_cpu; + FOUR_VECTOR* rv_cpu; + fp* qv_cpu; + FOUR_VECTOR* fv_cpu; + int nh; + + time1 = get_time(); + + //======================================================================================================================================================150 + // CHECK INPUT ARGUMENTS + //======================================================================================================================================================150 + + // assing default values + dim_cpu.boxes1d_arg = 1; + + // go through arguments + dim_cpu.boxes1d_arg = atoi(argv[1]); + int nblocks = atoi(argv[2]); + + // Print configuration + printf("Configuration used: boxes1d = %d\n", dim_cpu.boxes1d_arg); + + time2 = get_time(); + + //======================================================================================================================================================150 + // INPUTS + //======================================================================================================================================================150 + + par_cpu.alpha = 0.5; + + time3 = get_time(); + + //======================================================================================================================================================150 + // DIMENSIONS + //======================================================================================================================================================150 + + // total number of boxes + dim_cpu.number_boxes = dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg; + + // how many particles space has in each direction + dim_cpu.space_elem = dim_cpu.number_boxes * NUMBER_PAR_PER_BOX; + dim_cpu.space_mem = dim_cpu.space_elem * sizeof(FOUR_VECTOR); + dim_cpu.space_mem2 = dim_cpu.space_elem * sizeof(fp); + + // box array + dim_cpu.box_mem = dim_cpu.number_boxes * sizeof(box_str); + + time4 = get_time(); + + //======================================================================================================================================================150 + // SYSTEM MEMORY + //======================================================================================================================================================150 + + //====================================================================================================100 + // BOX + //====================================================================================================100 + + // allocate boxes + box_cpu = (box_str*)malloc(dim_cpu.box_mem); + + // initialize number of home boxes + nh = 0; + + // home boxes in z direction + for(i=0; i=0 && (j+m)>=0 && (k+n)>=0)==true && ((i+l) 1) { + + // variables + int max_multiprocessors; + int max_device; + cudaDeviceProp properties; + + // initialize variables + max_multiprocessors = 0; + max_device = 0; + + for (device = 0; device < num_devices; device++) { + cudaGetDeviceProperties(&properties, device); + if (max_multiprocessors < properties.multiProcessorCount) { + max_multiprocessors = properties.multiProcessorCount; + max_device = device; + } + } + cudaSetDevice(max_device); + } + +} + +//====================================================================================================100 +// GET LAST ERROR +//====================================================================================================100 + +void checkCUDAError(const char *msg) +{ + cudaError_t err = cudaGetLastError(); + if( cudaSuccess != err) { + // fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + printf("Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + fflush(NULL); + exit(EXIT_FAILURE); + } +} + +//===============================================================================================================================================================================================================200 +// END SET_DEVICE CODE +//===============================================================================================================================================================================================================200 diff --git a/workloads/realworld/standard/lavaMD/util/device/device.h b/workloads/realworld/standard/lavaMD/util/device/device.h new file mode 100755 index 0000000000000000000000000000000000000000..23bb31d26c1bc0e607c9b2faf7bddaa5a5c06d98 --- /dev/null +++ b/workloads/realworld/standard/lavaMD/util/device/device.h @@ -0,0 +1,29 @@ +//===============================================================================================================================================================================================================200 +// SET_DEVICE HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// INCLUDE/DEFINE +//======================================================================================================================================================150 + +#include // (in library path known to compiler) needed by printf + +//======================================================================================================================================================150 +// FUNCTION PROTOTYPES +//======================================================================================================================================================150 + +//====================================================================================================100 +// SET DEVICE +//====================================================================================================100 + +void setdevice(void); + +//====================================================================================================100 +// GET LAST ERROR +//====================================================================================================100 + +void checkCUDAError(const char *msg); + +//===============================================================================================================================================================================================================200 +// END SET_DEVICE HEADER +//===============================================================================================================================================================================================================200 diff --git a/workloads/realworld/standard/lavaMD/util/num/num.c b/workloads/realworld/standard/lavaMD/util/num/num.c new file mode 100755 index 0000000000000000000000000000000000000000..980ff7498832c784eab718a8b886e82891047599 --- /dev/null +++ b/workloads/realworld/standard/lavaMD/util/num/num.c @@ -0,0 +1,53 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// DESCRIPTION +//===============================================================================================================================================================================================================200 + +// Returns: 0 if string does not represent integer +// 1 if string represents integer + +//===============================================================================================================================================================================================================200 +// NUM CODE +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// ISINTEGER FUNCTION +//======================================================================================================================================================150 + +int isInteger(char *str){ + + //====================================================================================================100 + // make sure it's not empty + //====================================================================================================100 + + if (*str == '\0'){ + return 0; + } + + //====================================================================================================100 + // if any digit is not a number, return false + //====================================================================================================100 + + for(; *str != '\0'; str++){ + if (*str < 48 || *str > 57){ // digit characters (need to include . if checking for float) + return 0; + } + } + + //====================================================================================================100 + // it got past all my checks so I think it's a number + //====================================================================================================100 + + return 1; +} + +//===============================================================================================================================================================================================================200 +// END NUM CODE +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/standard/lavaMD/util/num/num.h b/workloads/realworld/standard/lavaMD/util/num/num.h new file mode 100755 index 0000000000000000000000000000000000000000..27a5e42fe2819d9ecc2f569b5979fb451985976f --- /dev/null +++ b/workloads/realworld/standard/lavaMD/util/num/num.h @@ -0,0 +1,21 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// FILE HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// ISINTEGER FUNCTION PROTOTYPE +//======================================================================================================================================================150 + +int isInteger(char *str); + +//===============================================================================================================================================================================================================200 +// END FILE HEADER +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/standard/lavaMD/util/timer/timer.c b/workloads/realworld/standard/lavaMD/util/timer/timer.c new file mode 100755 index 0000000000000000000000000000000000000000..c7cc252b4e67b3a868722b7b2c58f5b863ae0cfc --- /dev/null +++ b/workloads/realworld/standard/lavaMD/util/timer/timer.c @@ -0,0 +1,36 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// TIMER CODE +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// INCLUDE/DEFINE +//======================================================================================================================================================150 + +#include + +//======================================================================================================================================================150 +// FUNCTIONS +//======================================================================================================================================================150 + +//====================================================================================================100 +// DISPLAY TIME +//====================================================================================================100 + + // Returns the current system time in microseconds +long long get_time() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000000) + tv.tv_usec; +} + +//===============================================================================================================================================================================================================200 +// END TIMER CODE +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/standard/lavaMD/util/timer/timer.h b/workloads/realworld/standard/lavaMD/util/timer/timer.h new file mode 100755 index 0000000000000000000000000000000000000000..1744df4b8607f95c057ac4db6e9ced5ff84c4ab7 --- /dev/null +++ b/workloads/realworld/standard/lavaMD/util/timer/timer.h @@ -0,0 +1,21 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// TIMER HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// FUNCTION PROTOTYPES +//======================================================================================================================================================150 + +long long get_time(); + +//===============================================================================================================================================================================================================200 +// END TIMER HEADER +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/standard/lud/Makefile b/workloads/realworld/standard/lud/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1dfc37ac7fa0db46535d0583970dba1cb5cfb80e --- /dev/null +++ b/workloads/realworld/standard/lud/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := lud +CUFILES := lud_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o lud + diff --git a/workloads/realworld/standard/lud/lud_cuda.cu b/workloads/realworld/standard/lud/lud_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..989ee0c96e91f78ba9976a5376e78c93bffdeaec --- /dev/null +++ b/workloads/realworld/standard/lud/lud_cuda.cu @@ -0,0 +1,278 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK 256 + +#ifndef SIZE +#define SIZE 4096 +#endif + +// __global__ void add(float *a, float *b, float *c) +// { +// int tid = blockIdx.x; // Handle the data at the index + +// c[tid] = a[tid] + b[tid]; +// } + +// __global__ void scale(float *a, int size, int index) +// { +// int i; +// int start = (index * size + index); +// int end = (index * size + size); + +// for (i = start + 1; i < end; i++) +// { +// a[i] = (a[i] / a[start]); +// } +// } + +// __global__ void reduce(float *a, int size, int index, int b_size) +// { +// extern __shared__ float pivot[]; +// int i; + +// int tid = threadIdx.x; +// int bid = blockIdx.x; +// int block_size = b_size; + +// int pivot_start = (index * size + index); +// int pivot_end = (index * size + size); + +// int start; +// int end; +// int pivot_row; +// int my_row; + +// if (tid == 0) +// { +// for (i = index; i < size; i++) +// pivot[i] = a[(index * size) + i]; +// } + +// __syncthreads(); + +// pivot_row = (index * size); +// my_row = (((block_size * bid) + tid) * size); +// start = my_row + index; +// end = my_row + size; + +// if (my_row > pivot_row) +// { +// for (i = start + 1; i < end; i++) +// { +// a[i] = a[i] - (a[start] * pivot[(i - my_row)]); +// } +// } +// } + +void initCPU(float *a, int N) +{ + srand((unsigned)2); + // fill the arrays 'a' on the CPU + for (int i = 0; i < (N * N); i++) + { + a[i] = ((rand() % 10) + 1); + } +} + +void initGPU(float *a_dev, int N) +{ + srand((unsigned)2); + for (int i = 0; i < (N * N); i++) + { + a_dev[i] = ((rand() % 10) + 1); + } +} + +__global__ void lud_kernel(float *a, int N) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // extern __shared__ float pivot[]; + __shared__ float pivot[SIZE]; + + for (int tile = 0; tile < N; tile += 1) { + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = blockDim.x; + + if (tid == 0 && bid == 0) { + int start = (tile * N + tile); + int end = (tile * N + N); + + for (int i = start + 1; i < end; i++) + a[i] = (a[i] / a[start]); + } + block.sync(); + + if (tid == 0) + { + for (int i = tile; i < N; i++) + pivot[i] = a[(tile * N) + i]; + } + block.sync(); + + int pivot_row = (tile * N); + int my_row = (((block_size * bid) + tid) * N); + int start = my_row + tile; + int end = my_row + N; + + if (my_row > pivot_row) + { + for (int i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(i - my_row)]); + } + } + block.sync(); + } +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + float *a; + float *c; + float error; + int N; + int flag = 0; + + float **result; + float **a_ref; + int blocks; + + float *dev_a; + int i; + int j; + int k; + float l1; + float u1; + + N = SIZE; + // allocate memory on CPU + a = (float *)malloc(sizeof(float) * N * N); + c = (float *)malloc(sizeof(float) * N * N); + + result = (float **)malloc(sizeof(float *) * N); + a_ref = (float **)malloc(sizeof(float *) * N); + + for (i = 0; i < N; i++) + { + result[i] = (float *)malloc(sizeof(float) * N); + a_ref[i] = (float *)malloc(sizeof(float) * N); + } + + GPU_argv_init(); + initCPU(a, N); + + initTrace(); + startCPU(); + // allocate the memory on the GPU + cudaMalloc((void **)&dev_a, N * N * sizeof(float)); + + cudaMemcpy(dev_a, a, N * N * sizeof(float), cudaMemcpyHostToDevice); // copy array to device memory + + /*Perform LU Decomposition*/ + // for (i = 0; i < N; i++) + // { + // scale<<<1, 1>>>(dev_a, N, i); + // // blocks= ((N-i-1)/512)+1; + // blocks = ((N / 512)); + // // printf("Number of blocks rxd : %d \n",blocks); + // reduce<<>>(dev_a, N, i, 512); + // } + blocks = ((N / DIM_THREAD_BLOCK)); + // lud_kernel<<>>(dev_a, N); + lud_kernel<<>>(dev_a, N); + /*LU decomposition ends here*/ + + cudaMemcpy(c, dev_a, N * N * sizeof(float), cudaMemcpyDeviceToHost); // copy array back to host + + // free the memory allocated on the GPU + cudaFree(dev_a); + endCPU(); + finiTrace(); + + /*copy the result matrix into explicit 2D matrix for verification*/ + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + result[i][j] = c[i * N + j]; + } + } + + printf("======================================================="); + printf("\n Performing inplace verification \n"); + + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + a_ref[i][j] = 0; + for (k = 0; k < N; k++) + { + if (i >= k) + l1 = result[i][k]; + else + l1 = 0; + + if (k == j) + u1 = 1; + else if (k < j) + u1 = result[k][j]; // figured it out + else + u1 = 0.0; + + a_ref[i][j] = a_ref[i][j] + (l1 * u1); + } + } + } + + // for (i = 0; i < N; i++) + // { + // for (j = 0; j < N; j++) + // { + // error = abs(a[(i * N + j)] - a_ref[i][j]); + // if (error > 1) + // { + // // printf("No match occured at %d %d Error is %lf \n ", i, j, abs(a[(i*N+j)]-b[i][j])); + // flag = flag + 1; + // } + // } + // } + + // if (flag == 0) + // printf("Match \n"); + // else + // printf("No Matchs %d \n", flag); + + + + return 0; +} diff --git a/workloads/realworld/standard/lud/run.sh b/workloads/realworld/standard/lud/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..ea7db937489e328f5e923d2b18774e4256eef123 --- /dev/null +++ b/workloads/realworld/standard/lud/run.sh @@ -0,0 +1 @@ +./lud 1024 diff --git a/workloads/realworld/standard/lud/run_super.sh b/workloads/realworld/standard/lud/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2791fe07c43d75b40894206ba79ed441f207ee26 --- /dev/null +++ b/workloads/realworld/standard/lud/run_super.sh @@ -0,0 +1 @@ +./lud 4096 diff --git a/workloads/realworld/standard/lud_perf/Makefile b/workloads/realworld/standard/lud_perf/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1dfc37ac7fa0db46535d0583970dba1cb5cfb80e --- /dev/null +++ b/workloads/realworld/standard/lud_perf/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := lud +CUFILES := lud_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o lud + diff --git a/workloads/realworld/standard/lud_perf/lud b/workloads/realworld/standard/lud_perf/lud new file mode 100755 index 0000000000000000000000000000000000000000..9ac25127a55eb2d03b73565edaddd232dd6d5069 Binary files /dev/null and b/workloads/realworld/standard/lud_perf/lud differ diff --git a/workloads/realworld/standard/lud_perf/lud_cuda.cu b/workloads/realworld/standard/lud_perf/lud_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..bfe586362809ae646019c2828bc038f757610e97 --- /dev/null +++ b/workloads/realworld/standard/lud_perf/lud_cuda.cu @@ -0,0 +1,278 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK 256 + +#ifndef SIZE +#define SIZE 4096 +#endif + +// __global__ void add(float *a, float *b, float *c) +// { +// int tid = blockIdx.x; // Handle the data at the index + +// c[tid] = a[tid] + b[tid]; +// } + +// __global__ void scale(float *a, int size, int index) +// { +// int i; +// int start = (index * size + index); +// int end = (index * size + size); + +// for (i = start + 1; i < end; i++) +// { +// a[i] = (a[i] / a[start]); +// } +// } + +// __global__ void reduce(float *a, int size, int index, int b_size) +// { +// extern __shared__ float pivot[]; +// int i; + +// int tid = threadIdx.x; +// int bid = blockIdx.x; +// int block_size = b_size; + +// int pivot_start = (index * size + index); +// int pivot_end = (index * size + size); + +// int start; +// int end; +// int pivot_row; +// int my_row; + +// if (tid == 0) +// { +// for (i = index; i < size; i++) +// pivot[i] = a[(index * size) + i]; +// } + +// __syncthreads(); + +// pivot_row = (index * size); +// my_row = (((block_size * bid) + tid) * size); +// start = my_row + index; +// end = my_row + size; + +// if (my_row > pivot_row) +// { +// for (i = start + 1; i < end; i++) +// { +// a[i] = a[i] - (a[start] * pivot[(i - my_row)]); +// } +// } +// } + +void initCPU(float *a, int N) +{ + srand((unsigned)2); + // fill the arrays 'a' on the CPU + for (int i = 0; i < (N * N); i++) + { + a[i] = ((rand() % 10) + 1); + } +} + +void initGPU(float *a_dev, int N) +{ + srand((unsigned)2); + for (int i = 0; i < (N * N); i++) + { + a_dev[i] = ((rand() % 10) + 1); + } +} + +__global__ void lud_kernel(float *a, int N) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // extern __shared__ float pivot[]; + __shared__ float pivot[SIZE]; + + for (int tile = 0; tile < N; tile += 1) { + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = blockDim.x; + + if (tid == 0 && bid == 0) { + int start = (tile * N + tile); + int end = (tile * N + N); + + for (int i = start + 1; i < end; i++) + a[i] = (a[i] / a[start]); + } + block.sync(); + + if (tid == 0) + { + for (int i = tile; i < N; i++) + pivot[i] = a[(tile * N) + i]; + } + block.sync(); + + int pivot_row = (tile * N); + int my_row = (((block_size * bid) + tid) * N); + int start = my_row + tile; + int end = my_row + N; + + if (my_row > pivot_row) + { + for (int i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(i - my_row)]); + } + } + block.sync(); + } +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + float *a; + float *c; + float error; + int N; + int flag = 0; + + float **result; + float **a_ref; + int blocks; + + float *dev_a; + int i; + int j; + int k; + float l1; + float u1; + + N = SIZE; + // allocate memory on CPU + a = (float *)malloc(sizeof(float) * N * N); + c = (float *)malloc(sizeof(float) * N * N); + + result = (float **)malloc(sizeof(float *) * N); + a_ref = (float **)malloc(sizeof(float *) * N); + + for (i = 0; i < N; i++) + { + result[i] = (float *)malloc(sizeof(float) * N); + a_ref[i] = (float *)malloc(sizeof(float) * N); + } + + GPU_argv_init(); + initCPU(a, N); + + // initTrace(); + startCPU(); + // allocate the memory on the GPU + cudaMalloc((void **)&dev_a, N * N * sizeof(float)); + + cudaMemcpy(dev_a, a, N * N * sizeof(float), cudaMemcpyHostToDevice); // copy array to device memory + + /*Perform LU Decomposition*/ + // for (i = 0; i < N; i++) + // { + // scale<<<1, 1>>>(dev_a, N, i); + // // blocks= ((N-i-1)/512)+1; + // blocks = ((N / 512)); + // // printf("Number of blocks rxd : %d \n",blocks); + // reduce<<>>(dev_a, N, i, 512); + // } + blocks = ((N / DIM_THREAD_BLOCK)); + // lud_kernel<<>>(dev_a, N); + lud_kernel<<>>(dev_a, N); + /*LU decomposition ends here*/ + + cudaMemcpy(c, dev_a, N * N * sizeof(float), cudaMemcpyDeviceToHost); // copy array back to host + + // free the memory allocated on the GPU + cudaFree(dev_a); + endCPU(); + // finiTrace(); + + /*copy the result matrix into explicit 2D matrix for verification*/ + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + result[i][j] = c[i * N + j]; + } + } + + printf("======================================================="); + printf("\n Performing inplace verification \n"); + + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + a_ref[i][j] = 0; + for (k = 0; k < N; k++) + { + if (i >= k) + l1 = result[i][k]; + else + l1 = 0; + + if (k == j) + u1 = 1; + else if (k < j) + u1 = result[k][j]; // figured it out + else + u1 = 0.0; + + a_ref[i][j] = a_ref[i][j] + (l1 * u1); + } + } + } + + // for (i = 0; i < N; i++) + // { + // for (j = 0; j < N; j++) + // { + // error = abs(a[(i * N + j)] - a_ref[i][j]); + // if (error > 1) + // { + // // printf("No match occured at %d %d Error is %lf \n ", i, j, abs(a[(i*N+j)]-b[i][j])); + // flag = flag + 1; + // } + // } + // } + + // if (flag == 0) + // printf("Match \n"); + // else + // printf("No Matchs %d \n", flag); + + + + return 0; +} diff --git a/workloads/realworld/standard/lud_perf/run.sh b/workloads/realworld/standard/lud_perf/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..ea7db937489e328f5e923d2b18774e4256eef123 --- /dev/null +++ b/workloads/realworld/standard/lud_perf/run.sh @@ -0,0 +1 @@ +./lud 1024 diff --git a/workloads/realworld/standard/lud_perf/run_super.sh b/workloads/realworld/standard/lud_perf/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2791fe07c43d75b40894206ba79ed441f207ee26 --- /dev/null +++ b/workloads/realworld/standard/lud_perf/run_super.sh @@ -0,0 +1 @@ +./lud 4096 diff --git a/workloads/realworld/standard/nw/Makefile b/workloads/realworld/standard/nw/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..b33ae6462826357f9665bfd7fc9929ed176f9b35 --- /dev/null +++ b/workloads/realworld/standard/nw/Makefile @@ -0,0 +1,15 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include + +SRC = needle.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = needle + +release: $(SRC) + $(CC) ${KERNEL_DIM} $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt diff --git a/workloads/realworld/standard/nw/Makefile_nvidia b/workloads/realworld/standard/nw/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..2fd0b98d07beea56ae69a96a0c8cb3af87d602f6 --- /dev/null +++ b/workloads/realworld/standard/nw/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := needle +# CUDA source files (compiled with cudacc) +CUFILES := needle.cu +# CUDA dependency files +CU_DEPS := needle_kernel.cu +# C/C++ source files (compiled with gcc / c++) +# CCFILES := BlackScholes_gold.cpp + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/standard/nw/README b/workloads/realworld/standard/nw/README new file mode 100755 index 0000000000000000000000000000000000000000..683cbd53db81f0ece4f926fa01316582ea0d5fc9 --- /dev/null +++ b/workloads/realworld/standard/nw/README @@ -0,0 +1,12 @@ +Note: This program generate two sequences randomly. Please specify your own sequences for different uses. + At the current stage, the program only supports two sequences with the same lengh, which can be divided by 16. +Usage: needle 32 10 + 32 //the length of both sequences + 10 //penalty value + +******Adjustable work group size***** +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" diff --git a/workloads/realworld/standard/nw/needle.cu b/workloads/realworld/standard/nw/needle.cu new file mode 100755 index 0000000000000000000000000000000000000000..7246ec604cf38867be161b8df3ebb0fd9318e832 --- /dev/null +++ b/workloads/realworld/standard/nw/needle.cu @@ -0,0 +1,285 @@ +#define LIMIT -999 +#include +#include +#include +#include +#include "needle.h" +#include +#include +#include + +// includes, kernels +#include "needle_kernel.cu" +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#ifdef TIMING +#include "timing.h" + +struct timeval tv; +struct timeval tv_total_start, tv_total_end; +struct timeval tv_h2d_start, tv_h2d_end; +struct timeval tv_d2h_start, tv_d2h_end; +struct timeval tv_kernel_start, tv_kernel_end; +struct timeval tv_mem_alloc_start, tv_mem_alloc_end; +struct timeval tv_close_start, tv_close_end; +float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, + d2h_time = 0, close_time = 0, total_time = 0; +#endif + +//////////////////////////////////////////////////////////////////////////////// +// declaration, forward +void runTest( int argc, char** argv); + + +int blosum62[24][24] = { +{ 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0, -4}, +{-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1, -4}, +{-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1, -4}, +{-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1, -4}, +{ 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4}, +{-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1, -4}, +{-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4}, +{ 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -4}, +{-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1, -4}, +{-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1, -4}, +{-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1, -4}, +{-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1, -4}, +{-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1, -4}, +{-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1, -4}, +{-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2, -4}, +{ 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0, -4}, +{ 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0, -4}, +{-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2, -4}, +{-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1, -4}, +{ 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1, -4}, +{-2, -1, 3, 4, -3, 0, 1, -1, 0, -3, -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1, -4}, +{-1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4}, +{ 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1, -4}, +{-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1} +}; + +double gettime() { + struct timeval t; + gettimeofday(&t,NULL); + return t.tv_sec+t.tv_usec*1e-6; +} + +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int +main( int argc, char** argv) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + + printf("WG size of kernel = %d \n", BLOCK_SIZE); + + runTest( argc, argv); + + return EXIT_SUCCESS; +} + +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - x and y dimensions\n"); + fprintf(stderr, "\t - penalty(positive integer)\n"); + exit(1); +} + +void runTest( int argc, char** argv) +{ + int max_rows, max_cols, penalty, nblocks; + int *input_itemsets, *output_itemsets, *referrence; + int *matrix_cuda, *referrence_cuda; + int size; + + + // the lengths of the two sequences should be able to divided by 16. + // And at current stage max_rows needs to equal max_cols + if (argc == 4) + { + max_rows = atoi(argv[1]); + max_cols = atoi(argv[1]); + penalty = atoi(argv[2]); + nblocks = atoi(argv[3]); + } + else{ + usage(argc, argv); + } + + if(atoi(argv[1])%16!=0){ + fprintf(stderr,"The dimension values must be a multiple of 16\n"); + exit(1); + } + + + max_rows = max_rows + 1; + max_cols = max_cols + 1; + referrence = (int *)malloc( max_rows * max_cols * sizeof(int) ); + input_itemsets = (int *)malloc( max_rows * max_cols * sizeof(int) ); + output_itemsets = (int *)malloc( max_rows * max_cols * sizeof(int) ); + + + if (!input_itemsets) + fprintf(stderr, "error: can not allocate memory"); + + srand ( 7 ); + + + for (int i = 0 ; i < max_cols; i++){ + for (int j = 0 ; j < max_rows; j++){ + input_itemsets[i*max_cols+j] = 0; + } + } + + printf("Start Needleman-Wunsch\n"); + + for( int i=1; i< max_rows ; i++){ //please define your own sequence. + input_itemsets[i*max_cols] = rand() % 10 + 1; + } + for( int j=1; j< max_cols ; j++){ //please define your own sequence. + input_itemsets[j] = rand() % 10 + 1; + } + + + for (int i = 1 ; i < max_cols; i++){ + for (int j = 1 ; j < max_rows; j++){ + referrence[i*max_cols+j] = blosum62[input_itemsets[i*max_cols]][input_itemsets[j]]; + } + } + + for( int i = 1; i< max_rows ; i++) + input_itemsets[i*max_cols] = -i * penalty; + for( int j = 1; j< max_cols ; j++) + input_itemsets[j] = -j * penalty; + + + size = max_cols * max_rows; + + GPU_argv_init(); + initTrace(); + startCPU(); + + cudaMalloc((void**)& referrence_cuda, sizeof(int)*size); + cudaMalloc((void**)& matrix_cuda, sizeof(int)*size); + + cudaMemcpy(referrence_cuda, referrence, sizeof(int) * size, cudaMemcpyHostToDevice); + cudaMemcpy(matrix_cuda, input_itemsets, sizeof(int) * size, cudaMemcpyHostToDevice); + + dim3 dimGrid; + dim3 dimBlock(BLOCK_SIZE, 1); + // int block_width = ( max_cols - 1 )/BLOCK_SIZE; + int block_width = nblocks - 1; + int block_size = (max_cols - 1) / (nblocks * BLOCK_SIZE); + +#ifdef TIMING + gettimeofday(&tv_kernel_start, NULL); +#endif + + //printf("Processing top-left matrix\n"); + //process top-left matrix + for( int i = 1 ; i <= block_width ; i++) { + dimGrid.x = i; + dimGrid.y = 1; + needle_cuda_shared_1<<>>(referrence_cuda, matrix_cuda + ,max_cols, penalty, i, block_width, block_size); + } + //printf("Processing bottom-right matrix\n"); + //process bottom-right matrix + for( int i = block_width - 1 ; i >= 1 ; i--){ + dimGrid.x = i; + dimGrid.y = 1; + needle_cuda_shared_2<<>>(referrence_cuda, matrix_cuda + ,max_cols, penalty, i, block_width, block_size); + } + cudaDeviceSynchronize(); + +#ifdef TIMING + gettimeofday(&tv_kernel_end, NULL); + tvsub(&tv_kernel_end, &tv_kernel_start, &tv); + kernel_time += tv.tv_sec * 1000.0 + (float) tv.tv_usec / 1000.0; +#endif + + cudaMemcpy(output_itemsets, matrix_cuda, sizeof(int) * size, cudaMemcpyDeviceToHost); + + //sanjana - moved cudaFree up bc it's unused + cudaFree(referrence_cuda); + cudaFree(matrix_cuda); + + endCPU(); + finiTrace(); + +//#define TRACEBACK +#ifdef TRACEBACK + + FILE *fpo = fopen("result.txt","w"); + fprintf(fpo, "print traceback value GPU:\n"); + + for (int i = max_rows - 2, j = max_rows - 2; i>=0, j>=0;){ + int nw, n, w, traceback; + if ( i == max_rows - 2 && j == max_rows - 2 ) + fprintf(fpo, "%d ", output_itemsets[ i * max_cols + j]); //print the first element + if ( i == 0 && j == 0 ) + break; + if ( i > 0 && j > 0 ){ + nw = output_itemsets[(i - 1) * max_cols + j - 1]; + w = output_itemsets[ i * max_cols + j - 1 ]; + n = output_itemsets[(i - 1) * max_cols + j]; + } + else if ( i == 0 ){ + nw = n = LIMIT; + w = output_itemsets[ i * max_cols + j - 1 ]; + } + else if ( j == 0 ){ + nw = w = LIMIT; + n = output_itemsets[(i - 1) * max_cols + j]; + } + else{ + } + + //traceback = maximum(nw, w, n); + int new_nw, new_w, new_n; + new_nw = nw + referrence[i * max_cols + j]; + new_w = w - penalty; + new_n = n - penalty; + + traceback = maximum(new_nw, new_w, new_n); + if(traceback == new_nw) + traceback = nw; + if(traceback == new_w) + traceback = w; + if(traceback == new_n) + traceback = n; + + fprintf(fpo, "%d ", traceback); + + if(traceback == nw ) + {i--; j--; continue;} + + else if(traceback == w ) + {j--; continue;} + + else if(traceback == n ) + {i--; continue;} + + else + ; + } + + fclose(fpo); + +#endif + + free(referrence); + free(input_itemsets); + free(output_itemsets); + +#ifdef TIMING + printf("Exec: %f\n", kernel_time); +#endif +} + diff --git a/workloads/realworld/standard/nw/needle.h b/workloads/realworld/standard/nw/needle.h new file mode 100755 index 0000000000000000000000000000000000000000..e73320d6496262665592117d242e9bc383298b5b --- /dev/null +++ b/workloads/realworld/standard/nw/needle.h @@ -0,0 +1,11 @@ +#ifdef RD_WG_SIZE_0_0 + #define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) + #define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) + #define BLOCK_SIZE RD_WG_SIZE +#else + #define BLOCK_SIZE 16 +#endif +//#define TRACE + diff --git a/workloads/realworld/standard/nw/needle_kernel.cu b/workloads/realworld/standard/nw/needle_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..d7b4a0a1984521fa4a3d0dab3e1a3b3645ad5a4c --- /dev/null +++ b/workloads/realworld/standard/nw/needle_kernel.cu @@ -0,0 +1,197 @@ + +#include "needle.h" +#include + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SDATA( index) CUT_BANK_CHECKER(sdata, index) + +__device__ __host__ int +maximum( int a, + int b, + int c){ + +int k; +if( a <= b ) +k = b; +else +k = a; + +if( k <=c ) +return(c); +else +return(k); + +} + +__global__ void +needle_cuda_shared_1( int* referrence, + int* matrix_cuda, + int cols, + int penalty, + int i, + int block_width, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + int bx = blockIdx.x; + int tx = threadIdx.x; + + int b_index_x = bx; + int b_index_y = i - 1 - bx; + + __shared__ int temp[BLOCK_SIZE+1][BLOCK_SIZE+1]; + __shared__ int ref[BLOCK_SIZE][BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (b_index_y * gridDim.x + b_index_x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int b_index_x = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int b_index_y = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + int index = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( cols + 1 ); + int index_n = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( 1 ); + int index_w = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + ( cols ); + int index_nw = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x; + + if (tx == 0) + temp[tx][0] = matrix_cuda[index_nw]; + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + ref[ty][tx] = referrence[index + cols * ty]; + block.sync(); + + temp[tx + 1][0] = matrix_cuda[index_w + cols * tx]; + block.sync(); + + temp[0][tx + 1] = matrix_cuda[index_n]; + block.sync(); + + for( int m = 0 ; m < BLOCK_SIZE ; m++){ + if ( tx <= m ){ + int t_index_x = tx + 1; + int t_index_y = m - tx + 1; + + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[t_index_y-1][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + } + block.sync(); + } + + for( int m = BLOCK_SIZE - 2 ; m >=0 ; m--){ + if ( tx <= m){ + int t_index_x = tx + BLOCK_SIZE - m ; + int t_index_y = BLOCK_SIZE - tx; + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[t_index_y-1][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + + } + block.sync(); + } + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + matrix_cuda[index + ty * cols] = temp[ty+1][tx+1]; + } +} + + +__global__ void +needle_cuda_shared_2( int* referrence, + int* matrix_cuda, + int cols, + int penalty, + int i, + int block_width, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + int bx = blockIdx.x; + int tx = threadIdx.x; + + int b_index_x = bx + block_width - i; + int b_index_y = block_width - bx -1; + + __shared__ int temp[BLOCK_SIZE+1][BLOCK_SIZE+1]; + __shared__ int ref[BLOCK_SIZE][BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (b_index_y * gridDim.x + b_index_x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int b_index_x = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int b_index_y = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + int index = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( cols + 1 ); + int index_n = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( 1 ); + int index_w = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + ( cols ); + int index_nw = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x; + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + ref[ty][tx] = referrence[index + cols * ty]; + block.sync(); + + if (tx == 0) + temp[tx][0] = matrix_cuda[index_nw]; + temp[tx + 1][0] = matrix_cuda[index_w + cols * tx]; + block.sync(); + + temp[0][tx + 1] = matrix_cuda[index_n]; + block.sync(); + + for( int m = 0 ; m < BLOCK_SIZE ; m++){ + if ( tx <= m ){ + int t_index_x = tx + 1; + int t_index_y = m - tx + 1; + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[t_index_y-1][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + + } + block.sync(); + } + + for( int m = BLOCK_SIZE - 2 ; m >=0 ; m--){ + if ( tx <= m){ + int t_index_x = tx + BLOCK_SIZE - m ; + int t_index_y = BLOCK_SIZE - tx; + + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[t_index_y-1][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + } + block.sync(); + } + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + matrix_cuda[index + ty * cols] = temp[ty+1][tx+1]; + } +} + diff --git a/workloads/realworld/standard/nw/run.sh b/workloads/realworld/standard/nw/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..e3d20f9f4402de57c2a49c4db5c04d917907d741 --- /dev/null +++ b/workloads/realworld/standard/nw/run.sh @@ -0,0 +1 @@ +./needle 32768 10 256 diff --git a/workloads/realworld/standard/nw/run_super.sh b/workloads/realworld/standard/nw/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..23b570be1ac96cce67094a9469de1c6d24c03b08 --- /dev/null +++ b/workloads/realworld/standard/nw/run_super.sh @@ -0,0 +1 @@ +./needle 32768 10 64 diff --git a/workloads/realworld/standard/pathfinder/Makefile b/workloads/realworld/standard/pathfinder/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d740e79027f3651c458e229179bbbd46fb4fcbec --- /dev/null +++ b/workloads/realworld/standard/pathfinder/Makefile @@ -0,0 +1,14 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc +INCLUDE := $(CUDA_DIR)/include + +SRC = pathfinder.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = pathfinder + +release: + $(CC) $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +clean: + rm -f pathfinder diff --git a/workloads/realworld/standard/pathfinder/README b/workloads/realworld/standard/pathfinder/README new file mode 100644 index 0000000000000000000000000000000000000000..9af75abe201eb95c5c89a038c6d79f54b276f94e --- /dev/null +++ b/workloads/realworld/standard/pathfinder/README @@ -0,0 +1,6 @@ +To compile the program: + +nvcc -cuda dynproc.cu +nvcc -o dynproc dynproc.cu.cpp + +Usage: dynproc row_len col_len pyramid_height diff --git a/workloads/realworld/standard/pathfinder/pathfinder.cu b/workloads/realworld/standard/pathfinder/pathfinder.cu new file mode 100644 index 0000000000000000000000000000000000000000..a3c6069ab53f8197d1f61d5fdb417ffdba9be5cc --- /dev/null +++ b/workloads/realworld/standard/pathfinder/pathfinder.cu @@ -0,0 +1,299 @@ +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#ifdef TIMING +#include "timing.h" + +struct timeval tv; +struct timeval tv_total_start, tv_total_end; +struct timeval tv_h2d_start, tv_h2d_end; +struct timeval tv_d2h_start, tv_d2h_end; +struct timeval tv_kernel_start, tv_kernel_end; +struct timeval tv_mem_alloc_start, tv_mem_alloc_end; +struct timeval tv_close_start, tv_close_end; +float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, + d2h_time = 0, close_time = 0, total_time = 0; +#endif + +#define BLOCK_SIZE 256 +#define STR_SIZE 256 +#define DEVICE 0 +#define HALO 1 // halo width along one direction when advancing to the next iteration + +// #define BENCH_PRINT + +void run(int argc, char **argv); + +int rows, cols; +int *data; +int **wall; +int *result; +#define M_SEED 9 +int pyramid_height; +int nblocks; + +void init(int argc, char **argv) +{ + if (argc == 5) + { + cols = atoi(argv[1]); + rows = atoi(argv[2]); + pyramid_height = atoi(argv[3]); + nblocks = atoi(argv[4]); + } + else + { + printf("Usage: dynproc row_len col_len pyramid_height\n"); + exit(0); + } + data = new int[rows * cols]; + wall = new int *[rows]; + for (int n = 0; n < rows; n++) + wall[n] = data + cols * n; + result = new int[cols]; + + int seed = M_SEED; + srand(seed); + + for (int i = 0; i < rows; i++) + { + for (int j = 0; j < cols; j++) + { + wall[i][j] = rand() % 10; + } + } +#ifdef BENCH_PRINT + for (int i = 0; i < rows; i++) + { + for (int j = 0; j < cols; j++) + { + printf("%d ", wall[i][j]); + } + printf("\n"); + } +#endif +} + +void fatal(char *s) +{ + fprintf(stderr, "error: %s\n", s); +} + +#define IN_RANGE(x, min, max) ((x) >= (min) && (x) <= (max)) +#define CLAMP_RANGE(x, min, max) x = (x < (min)) ? min : ((x > (max)) ? max : x) +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +__global__ void dynproc_kernel( + int iteration, + int *gpuWall, + int *gpuSrc, + int *gpuResults, + int cols, + int rows, + int startStep, + int border, + int small_block_cols, + int tile_size, + int batches) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + __shared__ int prev[BLOCK_SIZE]; + __shared__ int result[BLOCK_SIZE]; + + int bx = blockIdx.x; + int tx = threadIdx.x; + + for (int b = 0; b < batches; b++) + { + // each block finally computes result for a small block + // after N iterations. + // it is the non-overlapping small blocks that cover + // all the input data + + // calculate the boundary for the block according to + // the boundary of its small block + int blkX = bx * tile_size + small_block_cols * b - border; + int blkXmax = blkX + BLOCK_SIZE - 1; + + // calculate the global thread coordination + int xidx = blkX + tx; + + // effective range within this block that falls within + // the valid range of the input data + // used to rule out computation outside the boundary. + int validXmin = (blkX < 0) ? -blkX : 0; + int validXmax = (blkXmax > cols - 1) ? BLOCK_SIZE - 1 - (blkXmax - cols + 1) : BLOCK_SIZE - 1; + + int W = tx - 1; + int E = tx + 1; + + W = (W < validXmin) ? validXmin : W; + E = (E > validXmax) ? validXmax : E; + + bool isValid = IN_RANGE(tx, validXmin, validXmax); + + if (IN_RANGE(xidx, 0, cols - 1)) + { + prev[tx] = gpuSrc[xidx]; + } + block.sync(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012 + bool computed; + for (int i = 0; i < iteration; i++) + { + computed = false; + if (IN_RANGE(tx, i + 1, BLOCK_SIZE - i - 2) && + isValid) + { + computed = true; + int left = prev[W]; + int up = prev[tx]; + int right = prev[E]; + int shortest = MIN(left, up); + shortest = MIN(shortest, right); + int index = cols * (startStep + i) + xidx; + result[tx] = shortest + gpuWall[index]; + } + block.sync(); + if (i == iteration - 1) + break; + if (computed) // Assign the computation range + prev[tx] = result[tx]; + block.sync(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012 + } + + // update the global memory + // after the last iteration, only threads coordinated within the + // small block perform the calculation and switch on ``computed'' + if (computed) + { + gpuResults[xidx] = result[tx]; + } + } + + +} + +/* + compute N time steps +*/ +int calc_path(int *gpuWall, int *gpuResult[2], int rows, int cols, + int pyramid_height, int blockCols, int borderCols, int tile_size, int batches) +{ + dim3 dimBlock(BLOCK_SIZE); + dim3 dimGrid(nblocks); + + int src = 1, dst = 0; + for (int t = 0; t < rows - 1; t += pyramid_height) + { + int temp = src; + src = dst; + dst = temp; + + int iteration = MIN(pyramid_height, rows - t - 1); + int small_block_cols = BLOCK_SIZE - iteration * HALO * 2; + dynproc_kernel<<>>( + iteration, gpuWall, gpuResult[src], gpuResult[dst], + cols, rows, t, borderCols, small_block_cols, tile_size, batches); + + // for the measurement fairness + cudaDeviceSynchronize(); + } + return dst; +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + GPU_argv_init(); + + run(argc, argv); + + return EXIT_SUCCESS; +} + +void run(int argc, char **argv) +{ + init(argc, argv); + + /* --------------- pyramid parameters --------------- */ + int borderCols = (pyramid_height)*HALO; + int smallBlockCol = BLOCK_SIZE - (pyramid_height)*HALO * 2; + int blockCols = cols / smallBlockCol + ((cols % smallBlockCol == 0) ? 0 : 1); + + //ruihao + int cols_per_block = cols / nblocks; + if (cols_per_block < BLOCK_SIZE) cols_per_block = BLOCK_SIZE; + int batches = cols_per_block / smallBlockCol + ((cols_per_block % smallBlockCol == 0) ? 0 : 1); + + // printf("pyramidHeight: %d\ngridSize: [%d]\nborder:[%d]\nblockSize: %d\nblockGrid:[%d]\ntargetBlock:[%d]\n", + // pyramid_height, cols, borderCols, BLOCK_SIZE, blockCols, smallBlockCol); + printf("pyramidHeight: %d\ngridSize: [%d]\nborder:[%d]\nblockSize: %d\nblockGrid:[%d]\ntargetBlock:[%d]\n", + pyramid_height, cols, borderCols, BLOCK_SIZE, nblocks, smallBlockCol); + + int *gpuWall, *gpuResult[2]; + int size = rows * cols; + + initTrace(); + startCPU(); + + cudaMalloc((void **)&gpuResult[0], sizeof(int) * cols); + cudaMalloc((void **)&gpuResult[1], sizeof(int) * cols); + cudaMemcpy(gpuResult[0], data, sizeof(int) * cols, cudaMemcpyHostToDevice); + cudaMalloc((void **)&gpuWall, sizeof(int) * (size - cols)); + cudaMemcpy(gpuWall, data + cols, sizeof(int) * (size - cols), cudaMemcpyHostToDevice); + +#ifdef TIMING + gettimeofday(&tv_kernel_start, NULL); +#endif + + // int final_ret = calc_path(gpuWall, gpuResult, rows, cols, + // pyramid_height, blockCols, borderCols); + int final_ret = calc_path(gpuWall, gpuResult, rows, cols, + pyramid_height, blockCols, borderCols, cols_per_block, batches); + +#ifdef TIMING + gettimeofday(&tv_kernel_end, NULL); + tvsub(&tv_kernel_end, &tv_kernel_start, &tv); + kernel_time += tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; +#endif + + cudaMemcpy(result, gpuResult[final_ret], sizeof(int) * cols, cudaMemcpyDeviceToHost); + +#ifdef BENCH_PRINT + for (int i = 0; i < cols; i++) + printf("%d ", data[i]); + printf("\n"); + for (int i = 0; i < cols; i++) + printf("%d ", result[i]); + printf("\n"); +#endif + + cudaFree(gpuWall); + cudaFree(gpuResult[0]); + cudaFree(gpuResult[1]); + + endCPU(); + finiTrace(); + + delete[] data; + delete[] wall; + delete[] result; + +#ifdef TIMING + printf("Exec: %f\n", kernel_time); +#endif +} diff --git a/workloads/realworld/standard/pathfinder/result.txt b/workloads/realworld/standard/pathfinder/result.txt new file mode 100644 index 0000000000000000000000000000000000000000..fa67c591d071682e1842a455f4477b397825e250 --- /dev/null +++ b/workloads/realworld/standard/pathfinder/result.txt @@ -0,0 +1,11 @@ +pyramidHeight: 20 +gridSize: [100000] +border:[20] +blockSize: 256 +blockGrid:[463] +targetBlock:[216] +CUPTI dynproc_kernel iter 0 start: 1679530155077329959 end: 1679530155078946951 +CUPTI dynproc_kernel iter 20 start: 1679530155078953603 end: 1679530155081441509 +CUPTI dynproc_kernel iter 40 start: 1679530155081441880 end: 1679530155083936228 +CUPTI dynproc_kernel iter 60 start: 1679530155083936508 end: 1679530155086433891 +CUPTI dynproc_kernel iter 80 start: 1679530155086434172 end: 1679530155088929332 diff --git a/workloads/realworld/standard/pathfinder/run.sh b/workloads/realworld/standard/pathfinder/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..11a9e6199ea4b2ff7fd3e0ebf893dc96fa89ff45 --- /dev/null +++ b/workloads/realworld/standard/pathfinder/run.sh @@ -0,0 +1,2 @@ +#./pathfinder 100000 100 20 1024 > result.txt +./pathfinder 10000000 100 20 1024 diff --git a/workloads/realworld/standard/pathfinder/run_super.sh b/workloads/realworld/standard/pathfinder/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..b35cc0b44511def3912323c1e0d58c2daa280722 --- /dev/null +++ b/workloads/realworld/standard/pathfinder/run_super.sh @@ -0,0 +1 @@ +./pathfinder 10000000 100 20 1024 diff --git a/workloads/realworld/standard/srad/Makefile b/workloads/realworld/standard/srad/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..47da520a663446e36c04461d54cfbb3d12cfa328 --- /dev/null +++ b/workloads/realworld/standard/srad/Makefile @@ -0,0 +1,15 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -std=c++11 -arch=sm_80 -O3 + +SRC = srad.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = srad + +release: $(SRC) + $(CC) $(KERNEL_DIM) $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt diff --git a/workloads/realworld/standard/srad/Makefile_nvidia b/workloads/realworld/standard/srad/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..e1f345c41c0f838dcf159958f628276455ef4dd7 --- /dev/null +++ b/workloads/realworld/standard/srad/Makefile_nvidia @@ -0,0 +1,22 @@ +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := srad +# CUDA source files (compiled with cudacc) +CUFILES := srad.cu +# CUDA dependency files +CU_DEPS := \ + srad_kernel.cu \ + +# C/C++ source files (compiled with gcc / c++) +CCFILES := \ + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/standard/srad/README b/workloads/realworld/standard/srad/README new file mode 100755 index 0000000000000000000000000000000000000000..91e803b576bdeebe232c00a5112dadd836ffc33f --- /dev/null +++ b/workloads/realworld/standard/srad/README @@ -0,0 +1,24 @@ +In srad.h, define either GPU or CPU computation +Currently, the GPU implementation can only support x-, y-dimensions that can be divided by 16. + +Usage: +srad 128 128 0 31 0 31 0.5 2 + +128 //number of rows in the domain +128 //number of cols in the domain +0 //y1 position of the speckle +31 //y2 position of the speckle +0 //x1 position of the speckle +31 //x2 position of the speckle +0.5 //Lambda value +2 //number of iterations + + +******Adjustable work group size***** +The kernel has square shape +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe one dimesion +The total thread number for one block is RD_WG_SIZE_0*RD_WG_SIZE_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" \ No newline at end of file diff --git a/workloads/realworld/standard/srad/run.sh b/workloads/realworld/standard/srad/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..46b08d0b82af4817b9d19fe704601ee98d058357 --- /dev/null +++ b/workloads/realworld/standard/srad/run.sh @@ -0,0 +1,5 @@ +./srad 8192 8192 0 127 0 127 0.5 2 8 + +# ./srad 128 128 0 127 0 127 0.5 2 8 + +# ./srad 16384 16384 0 16383 0 16383 0.5 2 8 diff --git a/workloads/realworld/standard/srad/run_super.sh b/workloads/realworld/standard/srad/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2d0f1b8ebd049e33bbb74a15722fc7172167641f --- /dev/null +++ b/workloads/realworld/standard/srad/run_super.sh @@ -0,0 +1 @@ +./srad 32768 32768 0 127 0 127 0.5 2 8 \ No newline at end of file diff --git a/workloads/realworld/standard/srad/srad.cu b/workloads/realworld/standard/srad/srad.cu new file mode 100755 index 0000000000000000000000000000000000000000..2dd1c82c5b3ee97949a20969ddf1d16ca4cf1909 --- /dev/null +++ b/workloads/realworld/standard/srad/srad.cu @@ -0,0 +1,299 @@ +// includes, system +#include +#include +#include +#include +#include "srad.h" +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +// includes, project +#include +#define GPU + +// includes, kernels +#include "srad_kernel.cu" + +void random_matrix(float *I, int rows, int cols); +void runTest( int argc, char** argv); +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - number of rows\n"); + fprintf(stderr, "\t - number of cols\n"); + fprintf(stderr, "\t - y1 value of the speckle\n"); + fprintf(stderr, "\t - y2 value of the speckle\n"); + fprintf(stderr, "\t - x1 value of the speckle\n"); + fprintf(stderr, "\t - x2 value of the speckle\n"); + fprintf(stderr, "\t - lambda (0,1)\n"); + fprintf(stderr, "\t - number of iterations\n"); + + exit(1); +} +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE); + runTest( argc, argv); + + return EXIT_SUCCESS; +} + +void +runTest( int argc, char** argv) +{ + int rows, cols, size_I, size_R, niter = 10, iter, nblocks; + float *I, *J, lambda, q0sqr, sum, sum2, tmp, meanROI,varROI ; + +#ifdef CPU + float Jc, G2, L, num, den, qsqr; + int *iN,*iS,*jE,*jW, k; + float *dN,*dS,*dW,*dE; + float cN,cS,cW,cE,D; +#endif + +#ifdef GPU + + float *J_cuda; + float *C_cuda; + float *E_C, *W_C, *N_C, *S_C; + +#endif + + unsigned int r1, r2, c1, c2; + float *c; + + + + if (argc == 10) + { + rows = atoi(argv[1]); //number of rows in the domain + cols = atoi(argv[2]); //number of cols in the domain + if ((rows%16!=0) || (cols%16!=0)){ + fprintf(stderr, "rows and cols must be multiples of 16\n"); + exit(1); + } + r1 = atoi(argv[3]); //y1 position of the speckle + r2 = atoi(argv[4]); //y2 position of the speckle + c1 = atoi(argv[5]); //x1 position of the speckle + c2 = atoi(argv[6]); //x2 position of the speckle + lambda = atof(argv[7]); //Lambda value + niter = atoi(argv[8]); //number of iterations + nblocks = atoi(argv[9]); // number of blocks + } + else{ + usage(argc, argv); + } + + size_I = cols * rows; + size_R = (r2-r1+1)*(c2-c1+1); + + I = (float *)malloc( size_I * sizeof(float) ); + J = (float *)malloc( size_I * sizeof(float) ); + c = (float *)malloc(sizeof(float)* size_I) ; + + +#ifdef CPU + + iN = (int *)malloc(sizeof(unsigned int*) * rows) ; + iS = (int *)malloc(sizeof(unsigned int*) * rows) ; + jW = (int *)malloc(sizeof(unsigned int*) * cols) ; + jE = (int *)malloc(sizeof(unsigned int*) * cols) ; + + + dN = (float *)malloc(sizeof(float)* size_I) ; + dS = (float *)malloc(sizeof(float)* size_I) ; + dW = (float *)malloc(sizeof(float)* size_I) ; + dE = (float *)malloc(sizeof(float)* size_I) ; + + + for (int i=0; i< rows; i++) { + iN[i] = i-1; + iS[i] = i+1; + } + for (int j=0; j< cols; j++) { + jW[j] = j-1; + jE[j] = j+1; + } + iN[0] = 0; + iS[rows-1] = rows-1; + jW[0] = 0; + jE[cols-1] = cols-1; + +#endif +GPU_argv_init(); +initTrace(); +startCPU(); + +#ifdef GPU + //Allocate device memory + cudaMalloc((void**)& J_cuda, sizeof(float)* size_I); + cudaMalloc((void**)& C_cuda, sizeof(float)* size_I); + cudaMalloc((void**)& E_C, sizeof(float)* size_I); + cudaMalloc((void**)& W_C, sizeof(float)* size_I); + cudaMalloc((void**)& S_C, sizeof(float)* size_I); + cudaMalloc((void**)& N_C, sizeof(float)* size_I); + + (E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr, cols / nblocks); +#endif + + printf("Randomizing the input matrix\n"); + //Generate a random matrix + random_matrix(I, rows, cols); + + for (int k = 0; k < size_I; k++ ) { + J[k] = (float)exp(I[k]) ; + } + printf("Start the SRAD main loop\n"); + for (iter=0; iter< niter; iter++){ + sum=0; sum2=0; + for (int i=r1; i<=r2; i++) { + for (int j=c1; j<=c2; j++) { + tmp = J[i * cols + j]; + sum += tmp ; + sum2 += tmp*tmp; + } + } + meanROI = sum / size_R; + varROI = (sum2 / size_R) - meanROI*meanROI; + q0sqr = varROI / (meanROI*meanROI); + +#ifdef CPU + + for (int i = 0 ; i < rows ; i++) { + for (int j = 0; j < cols; j++) { + + k = i * cols + j; + Jc = J[k]; + + // directional derivates + dN[k] = J[iN[i] * cols + j] - Jc; + dS[k] = J[iS[i] * cols + j] - Jc; + dW[k] = J[i * cols + jW[j]] - Jc; + dE[k] = J[i * cols + jE[j]] - Jc; + + G2 = (dN[k]*dN[k] + dS[k]*dS[k] + + dW[k]*dW[k] + dE[k]*dE[k]) / (Jc*Jc); + + L = (dN[k] + dS[k] + dW[k] + dE[k]) / Jc; + + num = (0.5*G2) - ((1.0/16.0)*(L*L)) ; + den = 1 + (.25*L); + qsqr = num/(den*den); + + // diffusion coefficent (equ 33) + den = (qsqr-q0sqr) / (q0sqr * (1+q0sqr)) ; + c[k] = 1.0 / (1.0+den) ; + + // saturate diffusion coefficent + if (c[k] < 0) {c[k] = 0;} + else if (c[k] > 1) {c[k] = 1;} + } + } + + for (int i = 0; i < rows; i++) { + for (int j = 0; j < cols; j++) { + + // current index + k = i * cols + j; + + // diffusion coefficent + cN = c[k]; + cS = c[iS[i] * cols + j]; + cW = c[k]; + cE = c[i * cols + jE[j]]; + + // divergence (equ 58) + D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k]; + + // image update (equ 61) + J[k] = J[k] + 0.25*lambda*D; + } + } + +#endif // CPU + + +#ifdef GPU + + //Currently the input size must be divided by 16 - the block size + + // ruihao + int block_x = cols/BLOCK_SIZE ; + int block_y = rows/BLOCK_SIZE ; + + if (nblocks > block_x) nblocks = block_x; + + dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE); + // dim3 dimGrid(block_x, block_y); + dim3 dimGrid(nblocks, nblocks); + // ruihao + + //Copy data from main memory to device memory + cudaMemcpy(J_cuda, J, sizeof(float) * size_I, cudaMemcpyHostToDevice); + + //Run kernels + // srad_cuda_1<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr); + // srad_cuda_2<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, lambda, q0sqr); + srad_cuda_1<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr, cols / nblocks); + srad_cuda_2<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, lambda, q0sqr, cols / nblocks); + //Copy data from device memory to main memory + cudaMemcpy(J, J_cuda, sizeof(float) * size_I, cudaMemcpyDeviceToHost); + +#endif +} + + cudaThreadSynchronize(); + + #ifdef GPU + cudaFree(C_cuda); + cudaFree(J_cuda); + cudaFree(E_C); + cudaFree(W_C); + cudaFree(N_C); + cudaFree(S_C); +#endif +endCPU(); +finiTrace(); + +#ifdef OUTPUT + //Printing output + printf("Printing Output:\n"); + for( int i = 0 ; i < rows ; i++){ + for ( int j = 0 ; j < cols ; j++){ + printf("%.5f ", J[i * cols + j]); + } + printf("\n"); + } +#endif + + printf("Computation Done\n"); + + free(I); + free(J); +#ifdef CPU + free(iN); free(iS); free(jW); free(jE); + free(dN); free(dS); free(dW); free(dE); +#endif + free(c); + +} + + +void random_matrix(float *I, int rows, int cols){ + + srand(7); + + for( int i = 0 ; i < rows ; i++){ + for ( int j = 0 ; j < cols ; j++){ + I[i * cols + j] = rand()/(float)RAND_MAX ; + } + } + +} + diff --git a/workloads/realworld/standard/srad/srad.h b/workloads/realworld/standard/srad/srad.h new file mode 100755 index 0000000000000000000000000000000000000000..2b2adb6d956b697c5b0ace9bccb89162ef98be50 --- /dev/null +++ b/workloads/realworld/standard/srad/srad.h @@ -0,0 +1,16 @@ +#define STR_SIZE 256 + +#ifdef RD_WG_SIZE_0_0 + #define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) + #define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) + #define BLOCK_SIZE RD_WG_SIZE +#else + #define BLOCK_SIZE 16 +#endif + +#define GPU +#define TIMER +//#define OUTPUT + diff --git a/workloads/realworld/standard/srad/srad_kernel.cu b/workloads/realworld/standard/srad/srad_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..a81386576462e82375b7826f9b203005cca803ab --- /dev/null +++ b/workloads/realworld/standard/srad/srad_kernel.cu @@ -0,0 +1,316 @@ +#include "srad.h" +#include + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +__global__ void +srad_cuda_1( + float *E_C, + float *W_C, + float *N_C, + float *S_C, + float *J_cuda, + float *C_cuda, + int cols, + int rows, + float q0sqr, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // shared memory allocation + __shared__ float temp[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float temp_result[BLOCK_SIZE * BLOCK_SIZE]; + + __shared__ float north[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float south[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float east[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float west[BLOCK_SIZE * BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + // block id + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + // thread id + int tx = threadIdx.x; + int ty = threadIdx.y; + + // indices + int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; + int index_n = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + tx - cols; + int index_s = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * BLOCK_SIZE + tx; + int index_w = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty - 1; + int index_e = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + BLOCK_SIZE; + + if (index_n < 0) index_n = 0; + if (index_s >= (cols * rows)) index_s = cols * rows - 1; + if (index_w < 0) index_w = 0; + if (index_e >= (cols * rows)) index_e = cols * rows - 1; + + float n, w, e, s, jc, g2, l, num, den, qsqr, c; + + // load data to shared memory + north[ty * BLOCK_SIZE + tx] = J_cuda[index_n]; + south[ty * BLOCK_SIZE + tx] = J_cuda[index_s]; + if (by == 0) + { + north[ty * BLOCK_SIZE + tx] = J_cuda[BLOCK_SIZE * bx + tx]; + } + else if (by == tile_dim_x - 1) + { + south[ty * BLOCK_SIZE + tx] = J_cuda[cols * BLOCK_SIZE * (tile_dim_x - 1) + BLOCK_SIZE * bx + cols * (BLOCK_SIZE - 1) + tx]; + } + block.sync(); + + west[ty * BLOCK_SIZE + tx] = J_cuda[index_w]; + east[ty * BLOCK_SIZE + tx] = J_cuda[index_e]; + + if (bx == 0) + { + west[ty * BLOCK_SIZE + tx] = J_cuda[cols * BLOCK_SIZE * by + cols * ty]; + } + else if (bx == tile_dim_x - 1) + { + east[ty * BLOCK_SIZE + tx] = J_cuda[cols * BLOCK_SIZE * by + BLOCK_SIZE * (tile_dim_x - 1) + cols * ty + BLOCK_SIZE - 1]; + } + + block.sync(); + temp[ty * BLOCK_SIZE + tx] = J_cuda[index]; + + block.sync(); + + jc = temp[ty * BLOCK_SIZE + tx]; + + if (ty == 0 && tx == 0) + { // nw + n = north[ty * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = west[ty * BLOCK_SIZE + tx] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (ty == 0 && tx == BLOCK_SIZE - 1) + { // ne + n = north[ty * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = east[ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1 && tx == BLOCK_SIZE - 1) + { // se + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[ty * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = east[ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1 && tx == 0) + { // sw + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[ty * BLOCK_SIZE + tx] - jc; + w = west[ty * BLOCK_SIZE + tx] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + + else if (ty == 0) + { // n + n = north[ty * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (tx == BLOCK_SIZE - 1) + { // e + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = east[ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1) + { // s + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[ty * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (tx == 0) + { // w + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = west[ty * BLOCK_SIZE + tx] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + else + { // the data elements which are not on the borders + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + + g2 = (n * n + s * s + w * w + e * e) / (jc * jc); + + l = (n + s + w + e) / jc; + + num = (0.5 * g2) - ((1.0 / 16.0) * (l * l)); + den = 1 + (.25 * l); + qsqr = num / (den * den); + + // diffusion coefficent (equ 33) + den = (qsqr - q0sqr) / (q0sqr * (1 + q0sqr)); + c = 1.0 / (1.0 + den); + + // saturate diffusion coefficent + if (c < 0) + { + temp_result[ty * BLOCK_SIZE + tx] = 0; + } + else if (c > 1) + { + temp_result[ty * BLOCK_SIZE + tx] = 1; + } + else + { + temp_result[ty * BLOCK_SIZE + tx] = c; + } + + block.sync(); + + C_cuda[index] = temp_result[ty * BLOCK_SIZE + tx]; + E_C[index] = e; + W_C[index] = w; + S_C[index] = s; + N_C[index] = n; + } +} + +__global__ void +srad_cuda_2( + float *E_C, + float *W_C, + float *N_C, + float *S_C, + float *J_cuda, + float *C_cuda, + int cols, + int rows, + float lambda, + float q0sqr, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // shared memory allocation + __shared__ float south_c[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float east_c[BLOCK_SIZE * BLOCK_SIZE]; + + __shared__ float c_cuda_temp[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float c_cuda_result[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float temp[BLOCK_SIZE * BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + //block id + int bx = tile % tile_dim_x; + int by = tile / tile_dim_x; + + //thread id + int tx = threadIdx.x; + int ty = threadIdx.y; + + // indices + int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; + int index_s = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * BLOCK_SIZE + tx; + int index_e = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + BLOCK_SIZE; + + if (index_s >= (cols * rows)) index_s = cols * rows - 1; + if (index_e >= (cols * rows)) index_e = cols * rows - 1; + + float cc, cn, cs, ce, cw, d_sum; + + // load data to shared memory + temp[ty * BLOCK_SIZE + tx] = J_cuda[index]; + block.sync(); + + south_c[ty * BLOCK_SIZE + tx] = C_cuda[index_s]; + if (by == tile_dim_x - 1) + { + south_c[ty * BLOCK_SIZE + tx] = C_cuda[cols * BLOCK_SIZE * (tile_dim_x - 1) + BLOCK_SIZE * bx + cols * (BLOCK_SIZE - 1) + tx]; + } + block.sync(); + + east_c[ty * BLOCK_SIZE + tx] = C_cuda[index_e]; + if (bx == tile_dim_x - 1) + { + east_c[ty * BLOCK_SIZE + tx] = C_cuda[cols * BLOCK_SIZE * by + BLOCK_SIZE * (tile_dim_x - 1) + cols * ty + BLOCK_SIZE - 1]; + } + block.sync(); + + c_cuda_temp[ty * BLOCK_SIZE + tx] = C_cuda[index]; + block.sync(); + cc = c_cuda_temp[ty * BLOCK_SIZE + tx]; + + if (ty == BLOCK_SIZE - 1 && tx == BLOCK_SIZE - 1) + { // se + cn = cc; + cs = south_c[ty * BLOCK_SIZE + tx]; + cw = cc; + ce = east_c[ty * BLOCK_SIZE + tx]; + } + else if (tx == BLOCK_SIZE - 1) + { // e + cn = cc; + cs = c_cuda_temp[(ty + 1) * BLOCK_SIZE + tx]; + cw = cc; + ce = east_c[ty * BLOCK_SIZE + tx]; + } + else if (ty == BLOCK_SIZE - 1) + { // s + cn = cc; + cs = south_c[ty * BLOCK_SIZE + tx]; + cw = cc; + ce = c_cuda_temp[ty * BLOCK_SIZE + tx + 1]; + } + else + { // the data elements which are not on the borders + cn = cc; + cs = c_cuda_temp[(ty + 1) * BLOCK_SIZE + tx]; + cw = cc; + ce = c_cuda_temp[ty * BLOCK_SIZE + tx + 1]; + } + + // divergence (equ 58) + d_sum = cn * N_C[index] + cs * S_C[index] + cw * W_C[index] + ce * E_C[index]; + + // image update (equ 61) + c_cuda_result[ty * BLOCK_SIZE + tx] = temp[ty * BLOCK_SIZE + tx] + 0.25 * lambda * d_sum; + + block.sync(); + + J_cuda[index] = c_cuda_result[ty * BLOCK_SIZE + tx]; + } +} diff --git a/workloads/realworld/super_avg.csv b/workloads/realworld/super_avg.csv new file mode 100644 index 0000000000000000000000000000000000000000..1ee28e54805850f58330c21540be31836dcbef15 --- /dev/null +++ b/workloads/realworld/super_avg.csv @@ -0,0 +1,4 @@ +group,,standard,async,uvm,uvm_prefetch,uvm_prefetch_async +hotspot,gpu_kernel,0.0008643917211176661,0.0011879836843930126,0.029078856471561974,0.0019113087473676784,0.0018392048491718462 +hotspot,memcpy,0.08021131692906831,0.08486139827519462,0.041964134593608114,0.017332375037630857,0.017350988607509867 +hotspot,allocation,0.918924291349814,0.6125303448909771,0.727413209942327,0.9240858197070886,0.5651164640365434 diff --git a/workloads/realworld/uvm/BN/.clang-format b/workloads/realworld/uvm/BN/.clang-format new file mode 100644 index 0000000000000000000000000000000000000000..3a5940ef65bf1e40df9511da805a7a0440184e84 --- /dev/null +++ b/workloads/realworld/uvm/BN/.clang-format @@ -0,0 +1,90 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: false +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: false +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IndentCaseLabels: false +IndentWidth: 2 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 8 +UseTab: Never +... + diff --git a/workloads/realworld/uvm/BN/LICENSE b/workloads/realworld/uvm/BN/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/workloads/realworld/uvm/BN/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/workloads/realworld/uvm/BN/Makefile b/workloads/realworld/uvm/BN/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..864b8e45401b0fe12162389c2e9d33fa86f4fc9f --- /dev/null +++ b/workloads/realworld/uvm/BN/Makefile @@ -0,0 +1,169 @@ +################################################################################ +# +# Copyright 1993-2013 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Makefile project only supported on Mac OS X and Linux Platforms) +# +################################################################################ + +include ../../../common/make.config +include ./findcudalib.mk + +# Location of the CUDA Toolkit +CUDA_PATH ?= $(CUDA_DIR) + +# internal flags +NVCCFLAGS := -m${OS_SIZE} +CCFLAGS := -Wno-narrowing +NVCCLDFLAGS := +LDFLAGS := + +# Extra user flags +EXTRA_NVCCFLAGS ?= +EXTRA_NVCCLDFLAGS ?= +EXTRA_LDFLAGS ?= +EXTRA_CCFLAGS ?= + +# OS-specific build flags +ifneq ($(DARWIN),) + LDFLAGS += -rpath $(CUDA_PATH)/lib + CCFLAGS += -arch $(OS_ARCH) $(STDLIB) +else + ifeq ($(OS_ARCH),armv7l) + ifeq ($(abi),gnueabi) + CCFLAGS += -mfloat-abi=softfp + else + # default to gnueabihf + override abi := gnueabihf + LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3 + CCFLAGS += -mfloat-abi=hard + endif + endif +endif + +ifeq ($(ARMv7),1) +NVCCFLAGS += -target-cpu-arch ARM +ifneq ($(TARGET_FS),) +CCFLAGS += --sysroot=$(TARGET_FS) +LDFLAGS += --sysroot=$(TARGET_FS) +LDFLAGS += -rpath-link=$(TARGET_FS)/lib +LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib +LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-$(abi) +endif +endif + +# Debug build flags +ifeq ($(dbg),1) + NVCCFLAGS += -g -G + TARGET := debug +else + TARGET := release +endif + +ALL_CCFLAGS := +ALL_CCFLAGS += $(NVCCFLAGS) +ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS)) +ALL_CCFLAGS += $(EXTRA_NVCCFLAGS) +ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS)) + +ALL_LDFLAGS := +ALL_LDFLAGS += $(ALL_CCFLAGS) +ALL_LDFLAGS += $(NVCCLDFLAGS) +ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS)) +ALL_LDFLAGS += $(EXTRA_NVCCLDFLAGS) +ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS)) + +# Common includes and paths for CUDA +INCLUDES := -I../../common/inc -I$(INCLUDE) -I$(CUPTI_INCLUDE) +LIBRARIES := -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +################################################################################ + +# CUDA code generation flags +ifneq ($(OS_ARCH),armv7l) +GENCODE_SM10 := -gencode arch=compute_10,code=sm_10 +endif +GENCODE_SM20 := -gencode arch=compute_20,code=sm_20 +GENCODE_SM30 := -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=\"sm_35,compute_35\" +GENCODE_SM80 := -gencode arch=compute_80,code=sm_80 +GENCODE_FLAGS := $(GENCODE_SM80) + +################################################################################ + +# Target rules +all: build + +build: ordergraph_30 ordergraph_40 ordergraph_45 ordergraph_50 + +# ordergraph_25.o: ordergraph.cu ordergraph_kernel.cu +# $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_25 -o $@ -c $< + +ordergraph_30.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_30 -o $@ -c $< + +ordergraph_40.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_40 -o $@ -c $< + +ordergraph_45.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_45 -o $@ -c $< + +ordergraph_50.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_50 -o $@ -c $< + +# ordergraph_125.o: ordergraph.cu ordergraph_kernel.cu +# $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_125 -o $@ -c $< + + + +# ordergraph_25: ordergraph_25.o +# $(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES) + +ordergraph_30: ordergraph_30.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_40: ordergraph_40.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_45: ordergraph_45.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_50: ordergraph_50.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +# ordergraph_125: ordergraph_125.o +# $(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES) + +run: build + ./ordergraph + +clean: + rm -f ordergraph_30 ordergraph_40 ordergraph_45 ordergraph_50 *.o *.bin *.out + +clobber: clean diff --git a/workloads/realworld/uvm/BN/README.md b/workloads/realworld/uvm/BN/README.md new file mode 100644 index 0000000000000000000000000000000000000000..07158a0bd52af63032c860ff04e243e0d7c76ef1 --- /dev/null +++ b/workloads/realworld/uvm/BN/README.md @@ -0,0 +1,21 @@ +The code works with CUDA 6.0. +If you are using this code for your project, please cite [our paper](https://yuemmawang.github.io/publications/wang-tpds2016.pdf): + +``` +Wang Y, Qian W, Zhang S, et al. A Learning Algorithm for Bayesian Networks and +Its Efficient Implementation on GPUs[J]. Parallel and Distributed Systems, IEEE +Transactions on, 2016, 27(1): 17-30. +``` + +``` +@article{wang2015learning, + title={A learning algorithm for Bayesian networks and its efficient implementation on GPUs}, + author={Wang, Yu and Qian, Weikang and Zhang, Shuchang and Liang, Xiaoyao and Yuan, Bo}, + journal={IEEE Transactions on Parallel and Distributed Systems}, + volume={27}, + number={1}, + pages={17--30}, + year={2015}, + publisher={IEEE} +} +``` diff --git a/workloads/realworld/uvm/BN/data125.cu b/workloads/realworld/uvm/BN/data125.cu new file mode 100644 index 0000000000000000000000000000000000000000..6bb370a636a330992e083f0b52f1f67a9a86040e --- /dev/null +++ b/workloads/realworld/uvm/BN/data125.cu @@ -0,0 +1,610 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=25; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,1, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,0,1,0, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,0,1,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,0,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,0,1,0,0,0,1, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,1,0,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,0,0,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,0,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,0, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,0,0,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,1,0, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,1,1,0,0,0,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,0,0,1,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,1,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,1,1, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,1,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,0,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,1,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,1,0, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,1,1,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,1,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,1,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,0,1,0,1,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,1, +} +#endif diff --git a/workloads/realworld/uvm/BN/data25.cu b/workloads/realworld/uvm/BN/data25.cu new file mode 100644 index 0000000000000000000000000000000000000000..6af94f79766c6e36ee121f6c537f987f841bf7c0 --- /dev/null +++ b/workloads/realworld/uvm/BN/data25.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=25; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +} + +#endif + diff --git a/workloads/realworld/uvm/BN/data30.cu b/workloads/realworld/uvm/BN/data30.cu new file mode 100644 index 0000000000000000000000000000000000000000..bf89729e319e920533f9d134c8a2dff9aa4bc022 --- /dev/null +++ b/workloads/realworld/uvm/BN/data30.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=30; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +} + +#endif + diff --git a/workloads/realworld/uvm/BN/data40.cu b/workloads/realworld/uvm/BN/data40.cu new file mode 100644 index 0000000000000000000000000000000000000000..16d34d9dc4860d1dd24d604e501bccb43cae8095 --- /dev/null +++ b/workloads/realworld/uvm/BN/data40.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=40; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1, +} + +#endif + diff --git a/workloads/realworld/uvm/BN/data45.cu b/workloads/realworld/uvm/BN/data45.cu new file mode 100644 index 0000000000000000000000000000000000000000..b23e9a35e7c27948c0853710a06be462694df57d --- /dev/null +++ b/workloads/realworld/uvm/BN/data45.cu @@ -0,0 +1,616 @@ +// The data are synthesized. +#include +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=45; +const int STATE_N=2; +const int DATA_N=600; + + + +int data[DATA_N*NODE_N]= { +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0, +} + + +#endif + diff --git a/workloads/realworld/uvm/BN/data50.cu b/workloads/realworld/uvm/BN/data50.cu new file mode 100644 index 0000000000000000000000000000000000000000..936a7aa4a67a1f949f1264477388a7eb5a93a1b4 --- /dev/null +++ b/workloads/realworld/uvm/BN/data50.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=50; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,0,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,1,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,0,0,1,1,1, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,1,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,1,0,0,0,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,1,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,0,1,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,0,1,0,0,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0, +} + +#endif + diff --git a/workloads/realworld/uvm/BN/file_process.py b/workloads/realworld/uvm/BN/file_process.py new file mode 100644 index 0000000000000000000000000000000000000000..eeebbee70e59153ca0f1a960f4e8ffa0437693c3 --- /dev/null +++ b/workloads/realworld/uvm/BN/file_process.py @@ -0,0 +1,15 @@ +import os +filename = "data125.cu" +file_write = "data45.cu" + +f_w = open(file_write,"w") +with open(filename) as f: + content = f.readlines() + + for i, line in enumerate(content): + if i < 8: + f_w.write(line) + elif i >= 8: + f_w.write(line[0:90]) + f_w.write("\n") +f_w.close() \ No newline at end of file diff --git a/workloads/realworld/uvm/BN/findcudalib.mk b/workloads/realworld/uvm/BN/findcudalib.mk new file mode 100644 index 0000000000000000000000000000000000000000..f40c2c38e5510fdee2fdf59df00160b547c056c1 --- /dev/null +++ b/workloads/realworld/uvm/BN/findcudalib.mk @@ -0,0 +1,226 @@ +################################################################################ +# +# Copyright 1993-2013 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# findcudalib.mk is used to find the locations for CUDA libraries and other +# Unix Platforms. This is supported Mac OS X and Linux. +# +################################################################################ + +# OS Name (Linux or Darwin) +OSUPPER = $(shell uname -s 2>/dev/null | tr "[:lower:]" "[:upper:]") +OSLOWER = $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]") + +# Flags to detect 32-bit or 64-bit OS platform +OS_SIZE = $(shell uname -m | sed -e "s/i.86/32/" -e "s/x86_64/64/" -e "s/armv7l/32/") +OS_ARCH = $(shell uname -m | sed -e "s/i386/i686/") + +# Determine OS platform and unix distribution +ifeq ("$(OSLOWER)","linux") + # first search lsb_release + DISTRO = $(shell lsb_release -i -s 2>/dev/null | tr "[:upper:]" "[:lower:]") + DISTVER = $(shell lsb_release -r -s 2>/dev/null) + ifeq ("$(DISTRO)",'') + # second search and parse /etc/issue + DISTRO = $(shell more /etc/issue | awk '{print $$1}' | sed '1!d' | sed -e "/^$$/d" 2>/dev/null | tr "[:upper:]" "[:lower:]") + DISTVER= $(shell more /etc/issue | awk '{print $$2}' | sed '1!d' 2>/dev/null + endif + ifeq ("$(DISTRO)",'') + # third, we can search in /etc/os-release or /etc/{distro}-release + DISTRO = $(shell awk '/ID/' /etc/*-release | sed 's/ID=//' | grep -v "VERSION" | grep -v "ID" | grep -v "DISTRIB") + DISTVER= $(shell awk '/DISTRIB_RELEASE/' /etc/*-release | sed 's/DISTRIB_RELEASE=//' | grep -v "DISTRIB_RELEASE") + endif +endif + +# search at Darwin (unix based info) +DARWIN = $(strip $(findstring DARWIN, $(OSUPPER))) +ifneq ($(DARWIN),) + SNOWLEOPARD = $(strip $(findstring 10.6, $(shell egrep "10\.6" /System/Library/CoreServices/SystemVersion.plist))) + LION = $(strip $(findstring 10.7, $(shell egrep "10\.7" /System/Library/CoreServices/SystemVersion.plist))) + MOUNTAIN = $(strip $(findstring 10.8, $(shell egrep "10\.8" /System/Library/CoreServices/SystemVersion.plist))) + MAVERICKS = $(strip $(findstring 10.9, $(shell egrep "10\.9" /System/Library/CoreServices/SystemVersion.plist))) +endif + +# Common binaries +GCC ?= g++ +CLANG ?= /usr/bin/clang + +ifeq ("$(OSUPPER)","LINUX") + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(GCC) +else + # for some newer versions of XCode, CLANG is the default compiler, so we need to include this + ifneq ($(MAVERICKS),) + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(CLANG) + STDLIB ?= -stdlib=libstdc++ + else + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(GCC) + endif +endif + +# Take command line flags that override any of these settings +ifeq ($(i386),1) + OS_SIZE = 32 + OS_ARCH = i686 +endif +ifeq ($(x86_64),1) + OS_SIZE = 64 + OS_ARCH = x86_64 +endif +ifeq ($(ARMv7),1) + OS_SIZE = 32 + OS_ARCH = armv7l +endif + +ifeq ("$(OSUPPER)","LINUX") + # Each Linux Distribuion has a set of different paths. This applies especially when using the Linux RPM/debian packages + ifeq ("$(DISTRO)","ubuntu") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","kubuntu") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","debian") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","suse") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","suse linux") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","opensuse") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","fedora") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","redhat") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","red") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","redhatenterpriseworkstation") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH ?= /usr/lib + endif + endif + ifeq ("$(DISTRO)","centos") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + + ifeq ($(ARMv7),1) + CUDAPATH := /usr/arm-linux-gnueabihf/lib + CUDALINK := -L/usr/arm-linux-gnueabihf/lib + ifneq ($(TARGET_FS),) + CUDAPATH += $(TARGET_FS)/usr/lib/nvidia-current + CUDALINK += -L$(TARGET_FS)/usr/lib/nvidia-current + endif + endif + + # Search for Linux distribution path for libcuda.so + CUDALIB ?= $(shell find $(CUDAPATH) $(DFLT_PATH) -name libcuda.so -print 2>/dev/null) + + ifeq ("$(CUDALIB)",'') + $(info >>> WARNING - CUDA Driver libcuda.so is not found. Please check and re-install the NVIDIA driver. <<<) + EXEC=@echo "[@]" + endif +else + # This would be the Mac OS X path if we had to do anything special +endif + diff --git a/workloads/realworld/uvm/BN/ordergraph.cu b/workloads/realworld/uvm/BN/ordergraph.cu new file mode 100644 index 0000000000000000000000000000000000000000..a1d71a81543a7365be2bcd94a45310ce443f9b66 --- /dev/null +++ b/workloads/realworld/uvm/BN/ordergraph.cu @@ -0,0 +1,728 @@ +#include +#include +#include +#include +#include +// #include +// includes CUDA +#include +// includes, kernels +#include "ordergraph_kernel.cu" +; + +#include "../../../common/cpu_timestamps.h" +#include "../../../common/cupti_add.h" + +const int HIGHEST = 3; +int taskperthr = 1; +int sizepernode; +int ITER = 100; + +// global var +float preScore = -99999999999.0f; +float score = 0.0; +float maxScore[HIGHEST] = {-999999999.0f}; +bool orders[NODE_N][NODE_N]; +bool preOrders[NODE_N][NODE_N]; +bool preGraph[NODE_N][NODE_N]; +bool bestGraph[HIGHEST][NODE_N][NODE_N]; +bool graph[NODE_N][NODE_N]; +// float prior[NODE_N][NODE_N]; +float *localscore, *D_localscore, *D_Score, *scores; +float *LG; +bool *D_parent; +int *D_resP, *parents; + +void initial(); // initial orders and data +int genOrders(); // swap +int ConCore(); // discard new order or not +bool getparent(int *bit, int *pre, int posN, int *parent, int *parN, + int time); // get every possible set of parents for a node +void incr(int *bit, int n); // binary code increases 1 each time +void incrS(int *bit, int n); // STATE_N code increases 1 each time +bool getState( + int parN, int *state, + int time); // get every possible combination of state for a parent set +float logGamma(int N); // log and gamma +float findBestGraph(); +void genScore(); +int convert(int *parent, int parN); +void sortGraph(); +void swap(int a, int b); +void Pre_logGamma(); +int findindex(int *arr, int size); +int C(int n, int a); + +FILE *fpout; + +int main(int argc, char *argv[]) { + /* + for(i=0;i maxScore[HIGHEST - 1]) { + maxScore[HIGHEST - 1] = preScore; + for (a = 0; a < NODE_N; a++) { + for (b = 0; b < NODE_N; b++) { + bestGraph[HIGHEST - 1][a][b] = preGraph[a][b]; + } + } + b = HIGHEST - 1; + for (a = HIGHEST - 2; a >= 0; a--) { + if (maxScore[b] > maxScore[a]) { + swap(a, b); + tmpd = maxScore[a]; + maxScore[a] = maxScore[b]; + maxScore[b] = tmpd; + b = a; + } + } + } + } + + } // endwhile + + cudaFreeHost(localscore); + cudaFree(D_localscore); + cudaFree(D_parent); + + cudaFreeHost(scores); + cudaFreeHost(parents); + cudaFree(D_Score); + cudaFree(D_resP); + + /* + for(j=0;j max) { + max = maxScore[i]; + maxi = i; + } + } + + swap(j, maxi); + tmp = maxScore[j]; + maxScore[j] = max; + maxScore[maxi] = tmp; + } +} + +void swap(int a, int b) { + int i, j; + bool tmp; + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + + tmp = bestGraph[a][i][j]; + bestGraph[a][i][j] = bestGraph[b][i][j]; + bestGraph[b][i][j] = tmp; + } + } +} + +void initial() { + int i, j, tmp, a, b, r; + bool tmpd; + tmp = 1; + for (i = 1; i <= 4; i++) { + tmp += C(NODE_N - 1, i); + } + sizepernode = tmp; + tmp *= NODE_N; + + cudaMallocHost((void **)&localscore, tmp * sizeof(float)); + + for (i = 0; i < tmp; i++) + localscore[i] = 0; + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) + orders[i][j] = 0; + } + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < i; j++) + orders[i][j] = 1; + } + r = rand() % 10000; + for (i = 0; i < r; i++) { + a = rand() % NODE_N; + b = rand() % NODE_N; + for (j = 0; j < NODE_N; j++) { + tmpd = orders[j][a]; + orders[j][a] = orders[j][b]; + orders[j][b] = tmpd; + } + + for (j = 0; j < NODE_N; j++) { + tmpd = orders[a][j]; + orders[a][j] = orders[b][j]; + orders[b][j] = tmpd; + } + } + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + preOrders[i][j] = orders[i][j]; + } + } +} + +// generate ramdom order +int genOrders() { + + int a, b, j; + bool tmp; + a = rand() % NODE_N; + b = rand() % NODE_N; + + for (j = 0; j < NODE_N; j++) { + tmp = orders[a][j]; + orders[a][j] = orders[b][j]; + orders[b][j] = tmp; + } + for (j = 0; j < NODE_N; j++) { + tmp = orders[j][a]; + orders[j][a] = orders[j][b]; + orders[j][b] = tmp; + } + + return 1; +} + +// decide leave or discard an order +int ConCore() { + int i, j; + float tmp; + tmp = log((rand() % 100000) / 100000.0); + if (tmp < (score - preScore)) { + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + preOrders[i][j] = orders[i][j]; + preGraph[i][j] = graph[i][j]; + } + } + preScore = score; + + return 1; + } + + return 0; +} + +void genScore() { + int *D_data; + float *D_LG; + dim3 grid(sizepernode / 256 + 1, 1, 1); + dim3 threads(256, 1, 1); + + Pre_logGamma(); + // cudaPrintfInit(); + cudaMallocManaged((void **)&D_data, NODE_N * DATA_N * sizeof(int)); + cudaMallocManaged((void **)&D_localscore, NODE_N * sizepernode * sizeof(float)); + cudaMallocManaged((void **)&D_LG, (DATA_N + 2) * sizeof(float)); + cudaMemset(D_localscore, 0.0, NODE_N * sizepernode * sizeof(float)); + memcpy(D_data, data, NODE_N * DATA_N * sizeof(int)); + memcpy(D_LG, LG, (DATA_N + 2) * sizeof(float)); + genScoreKernel<<>>(sizepernode, D_localscore, D_data, D_LG); + cudaDeviceSynchronize(); + memcpy(localscore, D_localscore, NODE_N * sizepernode * sizeof(float)); + + // cudaPrintfDisplay(stdout, true); + // cudaPrintfEnd(); + + cudaFreeHost(LG); + cudaFree(D_LG); + cudaFree(D_data); + + cudaMallocHost((void **)&scores, + (sizepernode / (256 * taskperthr) + 1) * sizeof(float)); + cudaMallocHost((void **)&parents, + (sizepernode / (256 * taskperthr) + 1) * 4 * sizeof(int)); + cudaMallocManaged((void **)&D_Score, + (sizepernode / (256 * taskperthr) + 1) * sizeof(float)); + cudaMallocManaged((void **)&D_parent, NODE_N * sizeof(bool)); + cudaMallocManaged((void **)&D_resP, + (sizepernode / (256 * taskperthr) + 1) * 4 * sizeof(int)); +} + +int convert(int *parent, int parN) { + int i, j, w = 1, tmp = 0; + j = 0; + for (i = 0; parN > 0 && i <= parent[parN - 1]; i++) { + if (parent[j] == i) { + j++; + tmp += w; + } + w *= 2; + } + + return tmp; +} + +void Pre_logGamma() { + + cudaMallocHost((void **)&LG, (DATA_N + 2) * sizeof(float)); + + LG[1] = log(1.0); + float i; + for (i = 2; i <= DATA_N + 1; i++) { + LG[(int)i] = LG[(int)i - 1] + log((float)i); + } +} + +void incr(int *bit, int n) { + + bit[n]++; + if (bit[n] >= 2) { + bit[n] = 0; + incr(bit, n + 1); + } + + return; +} + +void incrS(int *bit, int n) { + + bit[n]++; + if (bit[n] >= STATE_N) { + bit[n] = 0; + incr(bit, n + 1); + } + + return; +} + +bool getState(int parN, int *state, int time) { + int j = 1; + + j = pow(STATE_N, (float)parN) - 1; + + if (time > j) + return false; + + if (time >= 1) + incrS(state, 0); + + return true; +} + +bool getparent(int *bit, int *pre, int posN, int *parent, int *parN, int time) { + int i, j = 1; + + *parN = 0; + if (time == 0) + return true; + + for (i = 0; i < posN; i++) { + j = j * 2; + } + j--; + + if (time > j) + return false; + + incr(bit, 0); + + for (i = 0; i < posN; i++) { + if (bit[i] == 1) { + parent[(*parN)++] = pre[i]; + } + } + + return true; +} + +float findBestGraph() { + float bestls = -99999999; + int bestparent[5]; + int bestpN, total; + int node, index; + int pre[NODE_N] = {0}; + int parent[NODE_N] = {0}; + int posN = 0, i, j, parN, tmp, k, l; + float ls = -99999999999, score = 0; + int blocknum; + + for (i = 0; i < NODE_N; i++) + for (j = 0; j < NODE_N; j++) + graph[i][j] = 0; + + for (node = 0; node < NODE_N; node++) { + + bestls = -99999999; + posN = 0; + + for (i = 0; i < NODE_N; i++) { + if (orders[node][i] == 1) { + pre[posN++] = i; + } + } + + if (posN >= 0) { + total = C(posN, 4) + C(posN, 3) + C(posN, 2) + posN + 1; + taskperthr = 1; + blocknum = total / (256 * taskperthr) + 1; + + int nbatches = MIN_NBATCHES; + + int blocknum_max = total / (BLOCK_SIZE * MIN_NBATCHES * taskperthr) + 1; + if (blocknum_max >= MAX_NBLOCKS) { + blocknum = MAX_NBLOCKS; + nbatches = (total + 1) / (BLOCK_SIZE * MAX_NBLOCKS * taskperthr); + } else { + blocknum = blocknum_max; + } + + cudaMemset(D_resP, 0, blocknum * 4 * sizeof(int)); + cudaMemset(D_Score, -999999.0, blocknum * nbatches * sizeof(float)); + memcpy(D_parent, orders[node], NODE_N * sizeof(bool)); + + computeKernel<<>>( + taskperthr, sizepernode, D_localscore, D_parent, node, total, D_Score, + D_resP, nbatches); + cudaDeviceSynchronize(); + memcpy(parents, D_resP, blocknum * 4 * sizeof(int)); + memcpy(scores, D_Score, blocknum * sizeof(float)); + + for (i = 0; i < blocknum * nbatches; i++) { + + if (scores[i] > bestls) { + + bestls = scores[i]; + + parN = 0; + for (tmp = 0; tmp < 4; tmp++) { + if (parents[i * 4 + tmp] < 0) + break; + + bestparent[tmp] = parents[i * 4 + tmp]; + + parN++; + } + + bestpN = parN; + } + } + } else { + if (posN >= 4) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + for (k = j + 1; k < posN; k++) { + for (l = k + 1; l < posN; l++) { + parN = 4; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + if (pre[k] > node) + parent[3] = pre[k]; + else + parent[3] = pre[k] + 1; + if (pre[l] > node) + parent[4] = pre[l]; + else + parent[4] = pre[l] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + } + } + + if (posN >= 3) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + for (k = j + 1; k < posN; k++) { + + parN = 3; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + if (pre[k] > node) + parent[3] = pre[k]; + else + parent[3] = pre[k] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + } + + if (posN >= 2) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + + parN = 2; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + + if (posN >= 1) { + for (i = 0; i < posN; i++) { + + parN = 1; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + + parN = 0; + index = sizepernode * node; + + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = 0; + } + } + if (bestls > -99999999) { + + for (i = 0; i < bestpN; i++) { + if (bestparent[i] < node) + graph[node][bestparent[i] - 1] = 1; + else + graph[node][bestparent[i]] = 1; + } + score += bestls; + } + } + + return score; +} + +int findindex(int *arr, int size) { // reminder: arr[0] has to be 0 && size == + // array size-1 && index start from 0 + int i, j, index = 0; + + for (i = 1; i < size; i++) { + index += C(NODE_N - 1, i); + } + + for (i = 1; i <= size - 1; i++) { + for (j = arr[i - 1] + 1; j <= arr[i] - 1; j++) { + index += C(NODE_N - 1 - j, size - i); + } + } + + index += arr[size] - arr[size - 1]; + + return index; +} + +int C(int n, int a) { + int i, res = 1, atmp = a; + + for (i = 0; i < atmp; i++) { + res *= n; + n--; + } + + for (i = 0; i < atmp; i++) { + res /= a; + a--; + } + + return res; +} \ No newline at end of file diff --git a/workloads/realworld/uvm/BN/ordergraph_kernel.cu b/workloads/realworld/uvm/BN/ordergraph_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..1c2c3693aacaf1f9749b6cf5b7dafcb076406326 --- /dev/null +++ b/workloads/realworld/uvm/BN/ordergraph_kernel.cu @@ -0,0 +1,325 @@ +#ifndef _ORDERGRAPH_KERNEL_H_ +#define _ORDERGRAPH_KERNEL_H_ + +#include + +#ifdef DATA_25 +#include "data25.cu" +#endif +#ifdef DATA_30 +#include "data30.cu" +#endif +#ifdef DATA_40 +#include "data40.cu" +#endif +#ifdef DATA_45 +#include "data45.cu" +#endif +#ifdef DATA_50 +#include "data50.cu" +#endif +#ifdef DATA_125 +#include "data125.cu" +#endif +; + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define BLOCK_SIZE 256 +#define MAX_NBLOCKS 1024 +#define MIN_NBATCHES 16 + + +__device__ void Dincr(int *bit, int n); +__device__ void DincrS(int *bit, int n); +__device__ bool D_getState(int parN, int *sta, int time); +__device__ void D_findComb(int *comb, int l, int n); +__device__ int D_findindex(int *arr, int size); +__device__ int D_C(int n, int a); + +__global__ void genScoreKernel(int sizepernode, float *D_localscore, + int *D_data, float *D_LG) { + int id = blockIdx.x * BLOCK_SIZE + threadIdx.x; + int node, index; + bool flag; + int parent[5] = {0}; + int pre[NODE_N] = {0}; + int state[5] = {0}; + int i, j, parN = 0, tmp, t; + int t1 = 0, t2 = 0; + float ls = 0; + int Nij[STATE_N] = {0}; + + if (id < sizepernode) { + + D_findComb(parent, id, NODE_N - 1); + + for (i = 0; i < 4; i++) { + if (parent[i] > 0) + parN++; + } + + for (node = 0; node < NODE_N; node++) { + + j = 1; + for (i = 0; i < NODE_N; i++) { + if (i != node) + pre[j++] = i; + } + + for (tmp = 0; tmp < parN; tmp++) + state[tmp] = 0; + + index = sizepernode * node + id; + + // priors + /* + for(tmp=1;tmp<=4;tmp++){ + localscore[index]+=100*(prior[node][pre[parent[tmp]]]-0.5)*(prior[node][pre[parent[tmp]]]-0.5)*(prior[node][pre[parent[tmp]]]-0.5); + } + */ + t = 0; + while (D_getState(parN, state, t++)) { // for get state + // printf("test %u\n",id); + ls = 0; + for (tmp = 0; tmp < STATE_N; tmp++) + Nij[tmp] = 0; + + for (t1 = 0; t1 < DATA_N; t1++) { + flag = true; + for (t2 = 0; t2 < parN; t2++) { + if (D_data[t1 * NODE_N + pre[parent[t2]]] != state[t2]) { + flag = false; + break; + } + } + if (!flag) + continue; + + Nij[D_data[t1 * NODE_N + node]]++; + } + + tmp = STATE_N - 1; + + for (t1 = 0; t1 < STATE_N; t1++) { + ls += D_LG[Nij[t1]]; + tmp += Nij[t1]; + } + + ls -= D_LG[tmp]; + ls += D_LG[STATE_N - 1]; + + D_localscore[index] += ls; + } + } + } +} + +__global__ void computeKernel(int taskperthr, int sizepernode, + float *D_localscore, bool *D_parent, int node, + int total, float *D_Score, int *D_resP, int nbatches) { + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + __shared__ float lsinblock[BLOCK_SIZE]; + + for (int b = 0; b < nbatches; b++) { + unsigned int bid = blockIdx.x * nbatches + b; + unsigned int tid = threadIdx.x; + unsigned int id = bid * (BLOCK_SIZE * nbatches) + tid; + + int posN = 1, i, index, t, tmp; + int pre[NODE_N] = {0}; + int parN = 0; + int bestparent[4] = {0}, parent[5] = {-1}; + float bestls = -999999999999999, ls; + + for (i = 0; i < NODE_N; i++) { + if (D_parent[i] == 1) { + pre[posN++] = i; + } + } + + for (i = 0; i < taskperthr && ((id * taskperthr + i) < total); i++) { + + D_findComb(parent, id * taskperthr + i, posN); + + for (parN = 0; parN < 4; parN++) { + if (parent[parN] < 0) + break; + if (pre[parent[parN]] > node) + parent[parN] = pre[parent[parN]]; + else + parent[parN] = pre[parent[parN]] + 1; + } + + for (tmp = parN; tmp > 0; tmp--) { + parent[tmp] = parent[tmp - 1]; + } + parent[0] = 0; + + index = D_findindex(parent, parN); + index += sizepernode * node; + + ls = D_localscore[index]; + + if (ls > bestls) { + bestls = ls; + for (tmp = 0; tmp < 4; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + + lsinblock[tid] = bestls; + block.sync(); + + for (i = BLOCK_SIZE / 2; i >= 1; i /= 2) { + + if (tid < i) { + if (lsinblock[tid + i] > lsinblock[tid] && lsinblock[tid + i] < 0) { + lsinblock[tid] = lsinblock[tid + i]; + lsinblock[tid + i] = (float)(tid + i); + } else if (lsinblock[tid + i] < lsinblock[tid] && lsinblock[tid] < 0) { + lsinblock[tid + i] = (float)tid; + } else if (lsinblock[tid] > 0 && lsinblock[tid + i] < 0) { + lsinblock[tid] = lsinblock[tid + i]; + lsinblock[tid + i] = (float)(tid + i); + } else if (lsinblock[tid] < 0 && lsinblock[tid + i] > 0) { + lsinblock[tid + i] = (float)tid; + } + } + block.sync(); + } + block.sync(); + + if (tid == 0) { + D_Score[bid] = lsinblock[0]; + t = 0; + for (i = 0; i < 7 && t < 128 && t >= 0; i++) { + t = (int)lsinblock[(int)powf(2.0, i) + t]; + } + lsinblock[0] = (float)t; + } + block.sync(); + + if (tid == (int)lsinblock[0]) { + for (i = 0; i < 4; i++) { + D_resP[bid * 4 + i] = bestparent[i]; + } + } + + } +} + +__device__ void Dincr(int *bit, int n) { + + while (n <= NODE_N) { + bit[n]++; + if (bit[n] >= 2) { + bit[n] = 0; + n++; + } else { + break; + } + } + + return; +} + +__device__ void DincrS(int *bit, int n) { + + bit[n]++; + if (bit[n] >= STATE_N) { + bit[n] = 0; + Dincr(bit, n + 1); + } + + return; +} + +__device__ bool D_getState(int parN, int *sta, int time) { + int i, j = 1; + + for (i = 0; i < parN; i++) { + j *= STATE_N; + } + j--; + if (time > j) + return false; + + if (time >= 1) + DincrS(sta, 0); + + return true; +} + +__device__ void D_findComb(int *comb, int l, int n) { + const int len = 4; + if (l == 0) { + for (int i = 0; i < len; i++) + comb[i] = -1; + return; + } + int sum = 0; + int k = 1; + + while (sum < l) + sum += D_C(n, k++); + l -= sum - D_C(n, --k); + int low = 0; + int pos = 0; + while (k > 1) { + sum = 0; + int s = 1; + while (sum < l) + sum += D_C(n - s++, k - 1); + l -= sum - D_C(n - (--s), --k); + low += s; + comb[pos++] = low; + n -= s; + } + comb[pos] = low + l; + for (int i = pos + 1; i < 4; i++) + comb[i] = -1; +} + +__device__ int D_findindex(int *arr, + int size) { // reminder: arr[0] has to be 0 && size + // == array size-1 && index start from 0 + int i, j, index = 0; + + for (i = 1; i < size; i++) { + index += D_C(NODE_N - 1, i); + } + + for (i = 1; i <= size - 1; i++) { + for (j = arr[i - 1] + 1; j <= arr[i] - 1; j++) { + index += D_C(NODE_N - 1 - j, size - i); + } + } + + index += arr[size] - arr[size - 1]; + + return index; +} + +__device__ int D_C(int n, int a) { + int i, res = 1, atmp = a; + + for (i = 0; i < atmp; i++) { + res *= n; + n--; + } + + for (i = 0; i < atmp; i++) { + res /= a; + a--; + } + + return res; +} + +#endif diff --git a/workloads/realworld/uvm/BN/run.sh b/workloads/realworld/uvm/BN/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..f87817975334cff247b1bdf91651d41062aa8320 --- /dev/null +++ b/workloads/realworld/uvm/BN/run.sh @@ -0,0 +1,5 @@ +# ./ordergraph_25 +# ./ordergraph_30 +# ./ordergraph_40 +# ./ordergraph_45 +./ordergraph_50 \ No newline at end of file diff --git a/workloads/realworld/uvm/BN/run_super.sh b/workloads/realworld/uvm/BN/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..5c45d88db0716b0b4b0828ba397cbd918d1612c0 --- /dev/null +++ b/workloads/realworld/uvm/BN/run_super.sh @@ -0,0 +1 @@ +./ordergraph_50 diff --git a/workloads/realworld/uvm/backprop/Makefile b/workloads/realworld/uvm/backprop/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..286cd40af79bbb80b6d86aad9bd0d2c0d1a846e0 --- /dev/null +++ b/workloads/realworld/uvm/backprop/Makefile @@ -0,0 +1,47 @@ +include ../../../common/make.config + +# C compiler +CC = gcc +CC_FLAGS = -g -O2 + +# CUDA compiler +NVCC = $(CUDA_DIR)/bin/nvcc +NVCC_FLAGS = -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -arch=sm_80 + +# 'make dbg=1' enables NVCC debugging +ifeq ($(dbg),1) + NVCC_FLAGS += -g -O0 +else + NVCC_FLAGS += -O2 +endif + +# 'make emu=1' compiles the CUDA kernels for emulation +ifeq ($(emu),1) + NVCC_FLAGS += -deviceemu +endif + + +backprop: backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + $(NVCC) $(NVCC_FLAGS) backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -o backprop $(NVCC_FLAGS) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +# backprop: backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp +# $(CC) $(CC_FLAGS) backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -o backprop -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +%.o: %.[ch] + $(CC) $(CC_FLAGS) $< -c + +facetrain.o: facetrain.c backprop.h + $(CC) $(CC_FLAGS) facetrain.c -c + +backprop.o: backprop.c backprop.h + $(CC) $(CC_FLAGS) backprop.c -c + +backprop_cuda.o: backprop_cuda.cu backprop.h $(CUPTI_ADD_COMMON)/cupti_add.h $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + $(NVCC) $(NVCC_FLAGS) -c backprop_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +imagenet.o: imagenet.c backprop.h + $(CC) $(CC_FLAGS) imagenet.c -c + + +clean: + rm -f *.o *~ backprop backprop_cuda.linkinfo diff --git a/workloads/realworld/uvm/backprop/backprop.c b/workloads/realworld/uvm/backprop/backprop.c new file mode 100644 index 0000000000000000000000000000000000000000..3a38f012b785f8cbaec7f9c33e9ae58b9ee92ae5 --- /dev/null +++ b/workloads/realworld/uvm/backprop/backprop.c @@ -0,0 +1,502 @@ +/* + ****************************************************************** + * HISTORY + * 15-Oct-94 Jeff Shufelt (js), Carnegie Mellon University + * Prepared for 15-681, Fall 1994. + * Modified by Shuai Che + ****************************************************************** + */ +#include +#include +#include +#include +#include "backprop.h" +#include +//#define OPEN + +#define ABS(x) (((x) > 0.0) ? (x) : (-(x))) + +#define fastcopy(to,from,len)\ +{\ + register char *_to,*_from;\ + register int _i,_l;\ + _to = (char *)(to);\ + _from = (char *)(from);\ + _l = (len);\ + for (_i = 0; _i < _l; _i++) *_to++ = *_from++;\ +} + +/*** Return random number between 0.0 and 1.0 ***/ +float drnd() +{ + return ((float) rand() / (float) BIGRND); +} + +/*** Return random number between -1.0 and 1.0 ***/ +float dpn1() +{ + return ((drnd() * 2.0) - 1.0); +} + +/*** The squashing function. Currently, it's a sigmoid. ***/ + +float squash(x) +float x; +{ + float m; + //x = -x; + //m = 1 + x + x*x/2 + x*x*x/6 + x*x*x*x/24 + x*x*x*x*x/120; + //return(1.0 / (1.0 + m)); + return (1.0 / (1.0 + exp(-x))); +} + + +/*** Allocate 1d array of floats ***/ + +float *alloc_1d_dbl(n) +int n; +{ + float *new; + + new = (float *) malloc ((unsigned) (n * sizeof (float))); + if (new == NULL) { + printf("ALLOC_1D_DBL: Couldn't allocate array of floats\n"); + return (NULL); + } + return (new); +} + + +/*** Allocate 2d array of floats ***/ + +float **alloc_2d_dbl(m, n) +int m, n; +{ + int i; + float **new; + + new = (float **) malloc ((unsigned) (m * sizeof (float *))); + if (new == NULL) { + printf("ALLOC_2D_DBL: Couldn't allocate array of dbl ptrs\n"); + return (NULL); + } + + for (i = 0; i < m; i++) { + new[i] = alloc_1d_dbl(n); + } + + return (new); +} + + +bpnn_randomize_weights(w, m, n) +float **w; +int m, n; +{ + int i, j; + + for (i = 0; i <= m; i++) { + for (j = 0; j <= n; j++) { + w[i][j] = (float) rand()/RAND_MAX; + // w[i][j] = dpn1(); + } + } +} + +bpnn_randomize_row(w, m) +float *w; +int m; +{ + int i; + for (i = 0; i <= m; i++) { + //w[i] = (float) rand()/RAND_MAX; + w[i] = 0.1; + } +} + + +bpnn_zero_weights(w, m, n) +float **w; +int m, n; +{ + int i, j; + + for (i = 0; i <= m; i++) { + for (j = 0; j <= n; j++) { + w[i][j] = 0.0; + } + } +} + + +void bpnn_initialize(seed) +{ + printf("Random number generator seed: %d\n", seed); + srand(seed); +} + + +BPNN *bpnn_internal_create(n_in, n_hidden, n_out) +int n_in, n_hidden, n_out; +{ + BPNN *newnet; + + newnet = (BPNN *) malloc (sizeof (BPNN)); + if (newnet == NULL) { + printf("BPNN_CREATE: Couldn't allocate neural network\n"); + return (NULL); + } + + newnet->input_n = n_in; + newnet->hidden_n = n_hidden; + newnet->output_n = n_out; + newnet->input_units = alloc_1d_dbl(n_in + 1); + newnet->hidden_units = alloc_1d_dbl(n_hidden + 1); + newnet->output_units = alloc_1d_dbl(n_out + 1); + + newnet->hidden_delta = alloc_1d_dbl(n_hidden + 1); + newnet->output_delta = alloc_1d_dbl(n_out + 1); + newnet->target = alloc_1d_dbl(n_out + 1); + + newnet->input_weights = alloc_2d_dbl(n_in + 1, n_hidden + 1); + newnet->hidden_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); + + newnet->input_prev_weights = alloc_2d_dbl(n_in + 1, n_hidden + 1); + newnet->hidden_prev_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); + + return (newnet); +} + + +void bpnn_free(net) +BPNN *net; +{ + int n1, n2, i; + + n1 = net->input_n; + n2 = net->hidden_n; + + free((char *) net->input_units); + free((char *) net->hidden_units); + free((char *) net->output_units); + + free((char *) net->hidden_delta); + free((char *) net->output_delta); + free((char *) net->target); + + for (i = 0; i <= n1; i++) { + free((char *) net->input_weights[i]); + free((char *) net->input_prev_weights[i]); + } + free((char *) net->input_weights); + free((char *) net->input_prev_weights); + + for (i = 0; i <= n2; i++) { + free((char *) net->hidden_weights[i]); + free((char *) net->hidden_prev_weights[i]); + } + free((char *) net->hidden_weights); + free((char *) net->hidden_prev_weights); + + free((char *) net); +} + + +/*** Creates a new fully-connected network from scratch, + with the given numbers of input, hidden, and output units. + Threshold units are automatically included. All weights are + randomly initialized. + + Space is also allocated for temporary storage (momentum weights, + error computations, etc). +***/ + +BPNN *bpnn_create(n_in, n_hidden, n_out) +int n_in, n_hidden, n_out; +{ + + BPNN *newnet; + + newnet = bpnn_internal_create(n_in, n_hidden, n_out); + +#ifdef INITZERO + bpnn_zero_weights(newnet->input_weights, n_in, n_hidden); +#else + bpnn_randomize_weights(newnet->input_weights, n_in, n_hidden); +#endif + bpnn_randomize_weights(newnet->hidden_weights, n_hidden, n_out); + bpnn_zero_weights(newnet->input_prev_weights, n_in, n_hidden); + bpnn_zero_weights(newnet->hidden_prev_weights, n_hidden, n_out); + bpnn_randomize_row(newnet->target, n_out); + return (newnet); +} + + +void bpnn_layerforward(l1, l2, conn, n1, n2) +float *l1, *l2, **conn; +int n1, n2; +{ + float sum; + int j, k; + + /*** Set up thresholding unit ***/ + l1[0] = 1.0; +#ifdef OPEN + omp_set_num_threads(NUM_THREAD); + #pragma omp parallel for shared(conn, n1, n2, l1) private(k, j) reduction(+: sum) schedule(static) +#endif + /*** For each unit in second layer ***/ + for (j = 1; j <= n2; j++) { + + /*** Compute weighted sum of its inputs ***/ + sum = 0.0; + for (k = 0; k <= n1; k++) { + sum += conn[k][j] * l1[k]; + } + l2[j] = squash(sum); + } +} + +//extern "C" +void bpnn_output_error(delta, target, output, nj, err) +float *delta, *target, *output, *err; +int nj; +{ + int j; + float o, t, errsum; + errsum = 0.0; + for (j = 1; j <= nj; j++) { + o = output[j]; + t = target[j]; + delta[j] = o * (1.0 - o) * (t - o); + errsum += ABS(delta[j]); + } + *err = errsum; +} + + +void bpnn_hidden_error(delta_h, + nh, + delta_o, + no, + who, + hidden, + err) +float *delta_h, *delta_o, *hidden, **who, *err; +int nh, no; +{ + int j, k; + float h, sum, errsum; + + errsum = 0.0; + for (j = 1; j <= nh; j++) { + h = hidden[j]; + sum = 0.0; + for (k = 1; k <= no; k++) { + sum += delta_o[k] * who[j][k]; + } + delta_h[j] = h * (1.0 - h) * sum; + errsum += ABS(delta_h[j]); + } + *err = errsum; +} + + +void bpnn_adjust_weights(delta, ndelta, ly, nly, w, oldw) +float *delta, *ly, **w, **oldw; +{ + float new_dw; + int k, j; + ly[0] = 1.0; + //eta = 0.3; + //momentum = 0.3; + +#ifdef OPEN + omp_set_num_threads(NUM_THREAD); + #pragma omp parallel for \ + shared(oldw, w, delta) \ + private(j, k, new_dw) \ + firstprivate(ndelta, nly, momentum) +#endif + for (j = 1; j <= ndelta; j++) { + for (k = 0; k <= nly; k++) { + new_dw = ((ETA * delta[j] * ly[k]) + (MOMENTUM * oldw[k][j])); + w[k][j] += new_dw; + oldw[k][j] = new_dw; + } + } +} + + +void bpnn_feedforward(net) +BPNN *net; +{ + int in, hid, out; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + + /*** Feed forward input activations. ***/ + bpnn_layerforward(net->input_units, net->hidden_units, + net->input_weights, in, hid); + bpnn_layerforward(net->hidden_units, net->output_units, + net->hidden_weights, hid, out); + +} + + +void bpnn_train(net, eo, eh) +BPNN *net; +float *eo, *eh; +{ + int in, hid, out; + float out_err, hid_err; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + + /*** Feed forward input activations. ***/ + bpnn_layerforward(net->input_units, net->hidden_units, + net->input_weights, in, hid); + bpnn_layerforward(net->hidden_units, net->output_units, + net->hidden_weights, hid, out); + + /*** Compute error on output and hidden units. ***/ + bpnn_output_error(net->output_delta, net->target, net->output_units, + out, &out_err); + bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, + net->hidden_weights, net->hidden_units, &hid_err); + *eo = out_err; + *eh = hid_err; + + /*** Adjust input and hidden weights. ***/ + bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, + net->hidden_weights, net->hidden_prev_weights); + bpnn_adjust_weights(net->hidden_delta, hid, net->input_units, in, + net->input_weights, net->input_prev_weights); + +} + + + + +void bpnn_save(net, filename) +BPNN *net; +char *filename; +{ + int n1, n2, n3, i, j, memcnt; + float dvalue, **w; + char *mem; + ///add// + FILE *pFile; + pFile = fopen( filename, "w+" ); + /////// + /* + if ((fd = creat(filename, 0644)) == -1) { + printf("BPNN_SAVE: Cannot create '%s'\n", filename); + return; + } + */ + + n1 = net->input_n; n2 = net->hidden_n; n3 = net->output_n; + printf("Saving %dx%dx%d network to '%s'\n", n1, n2, n3, filename); + //fflush(stdout); + + //write(fd, (char *) &n1, sizeof(int)); + //write(fd, (char *) &n2, sizeof(int)); + //write(fd, (char *) &n3, sizeof(int)); + + fwrite( (char *) &n1 , sizeof(char), sizeof(char), pFile); + fwrite( (char *) &n2 , sizeof(char), sizeof(char), pFile); + fwrite( (char *) &n3 , sizeof(char), sizeof(char), pFile); + + + + memcnt = 0; + w = net->input_weights; + mem = (char *) malloc ((unsigned) ((n1+1) * (n2+1) * sizeof(float))); + for (i = 0; i <= n1; i++) { + for (j = 0; j <= n2; j++) { + dvalue = w[i][j]; + fastcopy(&mem[memcnt], &dvalue, sizeof(float)); + memcnt += sizeof(float); + } + } + //write(fd, mem, (n1+1) * (n2+1) * sizeof(float)); + fwrite( mem , (unsigned)(sizeof(float)), (unsigned) ((n1+1) * (n2+1) * sizeof(float)) , pFile); + free(mem); + + memcnt = 0; + w = net->hidden_weights; + mem = (char *) malloc ((unsigned) ((n2+1) * (n3+1) * sizeof(float))); + for (i = 0; i <= n2; i++) { + for (j = 0; j <= n3; j++) { + dvalue = w[i][j]; + fastcopy(&mem[memcnt], &dvalue, sizeof(float)); + memcnt += sizeof(float); + } + } + //write(fd, mem, (n2+1) * (n3+1) * sizeof(float)); + fwrite( mem , sizeof(float), (unsigned) ((n2+1) * (n3+1) * sizeof(float)) , pFile); + free(mem); + + fclose(pFile); + return; +} + + +BPNN *bpnn_read(filename) +char *filename; +{ + char *mem; + BPNN *new; + int fd, n1, n2, n3, i, j, memcnt; + + if ((fd = open(filename, 0, 0644)) == -1) { + return (NULL); + } + + printf("Reading '%s'\n", filename); //fflush(stdout); + + read(fd, (char *) &n1, sizeof(int)); + read(fd, (char *) &n2, sizeof(int)); + read(fd, (char *) &n3, sizeof(int)); + new = bpnn_internal_create(n1, n2, n3); + + printf("'%s' contains a %dx%dx%d network\n", filename, n1, n2, n3); + printf("Reading input weights..."); //fflush(stdout); + + memcnt = 0; + mem = (char *) malloc ((unsigned) ((n1+1) * (n2+1) * sizeof(float))); + read(fd, mem, (n1+1) * (n2+1) * sizeof(float)); + for (i = 0; i <= n1; i++) { + for (j = 0; j <= n2; j++) { + fastcopy(&(new->input_weights[i][j]), &mem[memcnt], sizeof(float)); + memcnt += sizeof(float); + } + } + free(mem); + + printf("Done\nReading hidden weights..."); //fflush(stdout); + + memcnt = 0; + mem = (char *) malloc ((unsigned) ((n2+1) * (n3+1) * sizeof(float))); + read(fd, mem, (n2+1) * (n3+1) * sizeof(float)); + for (i = 0; i <= n2; i++) { + for (j = 0; j <= n3; j++) { + fastcopy(&(new->hidden_weights[i][j]), &mem[memcnt], sizeof(float)); + memcnt += sizeof(float); + } + } + free(mem); + close(fd); + + printf("Done\n"); //fflush(stdout); + + bpnn_zero_weights(new->input_prev_weights, n1, n2); + bpnn_zero_weights(new->hidden_prev_weights, n2, n3); + + return (new); +} diff --git a/workloads/realworld/uvm/backprop/backprop.h b/workloads/realworld/uvm/backprop/backprop.h new file mode 100644 index 0000000000000000000000000000000000000000..dfaafe39b76a1c9c1455e38bbe0a14d5461d5d2b --- /dev/null +++ b/workloads/realworld/uvm/backprop/backprop.h @@ -0,0 +1,53 @@ +#ifndef _BACKPROP_H_ +#define _BACKPROP_H_ + +#define BIGRND 0x7fffffff + +#define GPU +#define THREADS 256 +#define WIDTH 16 // shared memory width +#define HEIGHT 16 // shared memory height + +#define ETA 0.3 //eta value +#define MOMENTUM 0.3 //momentum value +#define NUM_THREAD 4 //OpenMP threads + + +typedef struct { + int input_n; /* number of input units */ + int hidden_n; /* number of hidden units */ + int output_n; /* number of output units */ + + float *input_units; /* the input units */ + float *hidden_units; /* the hidden units */ + float *output_units; /* the output units */ + + float *hidden_delta; /* storage for hidden unit error */ + float *output_delta; /* storage for output unit error */ + + float *target; /* storage for target vector */ + + float **input_weights; /* weights from input to hidden layer */ + float **hidden_weights; /* weights from hidden to output layer */ + + /*** The next two are for momentum ***/ + float **input_prev_weights; /* previous change on input to hidden wgt */ + float **hidden_prev_weights; /* previous change on hidden to output wgt */ +} BPNN; + + +/*** User-level functions ***/ + +void bpnn_initialize(); + +BPNN *bpnn_create(); +void bpnn_free(); + +void bpnn_train(); +void bpnn_feedforward(); + +void bpnn_save(); +BPNN *bpnn_read(); + + +#endif diff --git a/workloads/realworld/uvm/backprop/backprop_cuda.cu b/workloads/realworld/uvm/backprop/backprop_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..d6c1473bd7b9589e3fa78708524de4ac13275105 --- /dev/null +++ b/workloads/realworld/uvm/backprop/backprop_cuda.cu @@ -0,0 +1,246 @@ + + +// includes, system +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +double t_start, t_end; + +// includes, kernels +#include "backprop_cuda_kernel.cu" +#include "backprop.h" + +//////////////////////////////////////////////////////////////////////////////// + +extern "C" void bpnn_layerforward(float *l1, float *l2, float **conn, int n1, int n2); + +extern "C" void bpnn_output_error(float *delta, float *target, float *output, int nj, float *err); + +extern "C" void bpnn_hidden_error(float *delta_h, int nh, float *delta_o, int no, float **who, float *hidden, float *err); + +extern "C" void bpnn_adjust_weights(float *delta, int ndelta, float *ly, int nly, float **w, float **oldw); + +extern "C" int setup(int argc, char **argv); + +extern "C" float **alloc_2d_dbl(int m, int n); + +extern "C" float squash(float x); + +double gettime() +{ + struct timeval t; + gettimeofday(&t, NULL); + return t.tv_sec + t.tv_usec * 1e-6; +} + +unsigned int num_threads = 0; +unsigned int num_blocks = 0; + +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + GPU_argv_init(); + + initTrace(); + startCPU(); + + num_blocks = atoi(argv[2]); + setup(argc, argv); +} + +extern "C" void bpnn_train_cuda(BPNN *net, float *eo, float *eh) +{ + int in, hid, out; + float out_err, hid_err; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + +#ifdef GPU + int m = 0; + float *input_hidden_cuda; + float *input_cuda; + float *output_hidden_cuda; + float *partial_sum; + float *hidden_partial_sum; + float *hidden_delta_cuda; + float *input_prev_weights_cuda; + float sum; + float *input_weights_one_dim; + float *input_weights_prev_one_dim; + // ruihao + // num_blocks = in / 16; + // dim3 grid(1, num_blocks); + // dim3 threads(16, 16); + + int tile_size = in / num_blocks; + dim3 grid(1, num_blocks); + dim3 threads(16, 16); + // ruihao + + input_weights_one_dim = (float *)malloc((in + 1) * (hid + 1) * sizeof(float)); + input_weights_prev_one_dim = (float *)malloc((in + 1) * (hid + 1) * sizeof(float)); + // ruihao + // partial_sum = (float *) malloc(num_blocks * WIDTH * sizeof(float)); + partial_sum = (float *)malloc(in * sizeof(float)); + // ruihao + + // this preprocessing stage is added to correct the bugs of wrong memcopy using two-dimensional net->inputweights + for (int k = 0; k <= in; k++) + { + for (int j = 0; j <= hid; j++) + { + input_weights_one_dim[m] = net->input_weights[k][j]; + input_weights_prev_one_dim[m] = net->input_prev_weights[k][j]; + m++; + } + } + + // GPU_argv_init(); + + // initTrace(); + // startCPU(); + + cudaMallocManaged((void **)&input_cuda, (in + 1) * sizeof(float)); + cudaMallocManaged((void **)&output_hidden_cuda, (hid + 1) * sizeof(float)); + cudaMallocManaged((void **)&input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float)); + // ruihao + // cudaMalloc((void**) &hidden_partial_sum, num_blocks * WIDTH * sizeof(float)); + cudaMallocManaged((void **)&hidden_partial_sum, in * sizeof(float)); + // ruihao + +#endif + +#ifdef CPU + + printf("Performing CPU computation\n"); + bpnn_layerforward(net->input_units, net->hidden_units, net->input_weights, in, hid); + +#endif + +#ifdef GPU + + //printf("Performing GPU computation\n"); + + memcpy(input_cuda, net->input_units, (in + 1) * sizeof(float)); + memcpy(input_hidden_cuda, input_weights_one_dim, (in + 1) * (hid + 1) * sizeof(float)); + + // ruihao + //t_start = rtclock(); + // ruihao + bpnn_layerforward_CUDA<<>>(input_cuda, + output_hidden_cuda, + input_hidden_cuda, + hidden_partial_sum, + in, + hid, + tile_size); + + cudaDeviceSynchronize(); + + // ruihao + //t_end = rtclock(); + //fprintf(stdout, "bpnn_layerforward_CUDA GPU Runtime: %0.6lfs\n", t_end - t_start); + memcpy(partial_sum, hidden_partial_sum, in * sizeof(float)); + // ruihao + + cudaError_t error = cudaGetLastError(); + if (error != cudaSuccess) + { + printf("bpnn kernel error: %s\n", cudaGetErrorString(error)); + exit(EXIT_FAILURE); + } + + for (int j = 1; j <= hid; j++) + { + sum = 0.0; + // ruihao + // for (int k = 0; k < num_blocks; k++) { + // sum += partial_sum[k * hid + j-1] ; + // } + for (int k = 0; k < in / WIDTH; k++) + { + sum += partial_sum[k * hid + j - 1]; + } + // ruihao + sum += net->input_weights[0][j]; + net->hidden_units[j] = float(1.0 / (1.0 + exp(-sum))); + } +#endif + + bpnn_layerforward(net->hidden_units, net->output_units, net->hidden_weights, hid, out); + bpnn_output_error(net->output_delta, net->target, net->output_units, out, &out_err); + bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, net->hidden_weights, net->hidden_units, &hid_err); + bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, net->hidden_weights, net->hidden_prev_weights); + +#ifdef CPU + + bpnn_adjust_weights(net->hidden_delta, hid, net->input_units, in, net->input_weights, net->input_prev_weights); + +#endif + +#ifdef GPU + + cudaMallocManaged((void **)&hidden_delta_cuda, (hid + 1) * sizeof(float)); + cudaMallocManaged((void **)&input_prev_weights_cuda, (in + 1) * (hid + 1) * sizeof(float)); + // ruihao + //t_start = rtclock(); + memcpy(hidden_delta_cuda, net->hidden_delta, (hid + 1) * sizeof(float)); + memcpy(input_prev_weights_cuda, input_weights_prev_one_dim, (in + 1) * (hid + 1) * sizeof(float)); + memcpy(input_hidden_cuda, input_weights_one_dim, (in + 1) * (hid + 1) * sizeof(float)); + // ruihao + bpnn_adjust_weights_cuda<<>>(hidden_delta_cuda, + hid, + input_cuda, + in, + input_hidden_cuda, + input_prev_weights_cuda, + tile_size); + // ruihao + cudaDeviceSynchronize(); + //t_end = rtclock(); + memcpy(net->input_units, input_cuda, (in + 1) * sizeof(float)); + memcpy(input_weights_one_dim, input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float)); + //fprintf(stdout, "bpnn_adjust_weights_cuda GPU Runtime: %0.6lfs\n", t_end - t_start); + // ruihao + + cudaFree(input_cuda); + cudaFree(output_hidden_cuda); + cudaFree(input_hidden_cuda); + cudaFree(hidden_partial_sum); + cudaFree(input_prev_weights_cuda); + cudaFree(hidden_delta_cuda); + + endCPU(); + finiTrace(); + + free(partial_sum); + free(input_weights_one_dim); + free(input_weights_prev_one_dim); + +#endif +} diff --git a/workloads/realworld/uvm/backprop/backprop_cuda_kernel.cu b/workloads/realworld/uvm/backprop/backprop_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..27f07767e27b29a189b99a1a0c6010ad2ee032e6 --- /dev/null +++ b/workloads/realworld/uvm/backprop/backprop_cuda_kernel.cu @@ -0,0 +1,110 @@ + + +#ifndef _BACKPROP_CUDA_KERNEL_H_ +#define _BACKPROP_CUDA_KERNEL_H_ + +#include +#include "backprop.h" +#include "math.h" +#include "cuda.h" + +#include +#include + +__global__ void +bpnn_layerforward_CUDA(float *input_cuda, + float *output_hidden_cuda, + float *input_hidden_cuda, + float *hidden_partial_sum, + int in, + int hid, + int tile_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + int by = blockIdx.y; + int tx = threadIdx.x; + int ty = threadIdx.y; + + int batches = tile_size / WIDTH; + + __shared__ float input_node[HEIGHT]; + __shared__ float weight_matrix[HEIGHT * WIDTH]; + + for (int b = 0; b < batches; b++) + { + int index = (hid + 1) * HEIGHT * (batches * by + b) + (hid + 1) * ty + tx + 1 + (hid + 1); + + int index_in = HEIGHT * (batches * by + b) + ty + 1; + + if (tx == 0) + input_node[ty] = input_cuda[index_in]; + + block.sync(); + + weight_matrix[ty * WIDTH + tx] = input_hidden_cuda[index]; + + block.sync(); + + weight_matrix[ty * WIDTH + tx] = weight_matrix[ty * WIDTH + tx] * input_node[ty]; + + block.sync(); + + for (int i = 1; i <= __log2f(HEIGHT); i++) + { + + int power_two = __powf(2, i); + + if (ty % power_two == 0) + weight_matrix[ty * WIDTH + tx] += weight_matrix[(ty + power_two / 2) * WIDTH + tx]; + + block.sync(); + } + + input_hidden_cuda[index] = weight_matrix[ty * WIDTH + tx]; + + block.sync(); + + if (tx == 0) + { + hidden_partial_sum[(batches * by + b) * hid + ty] = weight_matrix[tx * WIDTH + ty]; + } + } +} + +__global__ void bpnn_adjust_weights_cuda(float *delta, + int hid, + float *ly, + int in, + float *w, + float *oldw, + int tile_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + int by = blockIdx.y; + + int tx = threadIdx.x; + int ty = threadIdx.y; + + int batches = tile_size / WIDTH; + + for (int b = 0; b < batches; b++) + { + int index = (hid + 1) * HEIGHT * (batches * by + b) + (hid + 1) * ty + tx + 1 + (hid + 1); + int index_y = HEIGHT * (batches * by + b) + ty + 1; + int index_x = tx + 1; + // eta = 0.3; + // momentum = 0.3; + + w[index] += ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index])); + oldw[index] = ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index])); + + block.sync(); + + if (ty == 0 && by == 0 && b == 0) + { + w[index_x] += ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x])); + oldw[index_x] = ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x])); + } + } +} +#endif diff --git a/workloads/realworld/uvm/backprop/facetrain.c b/workloads/realworld/uvm/backprop/facetrain.c new file mode 100644 index 0000000000000000000000000000000000000000..cbf83810934b68551d7dd4b7b94fda5eb6837276 --- /dev/null +++ b/workloads/realworld/uvm/backprop/facetrain.c @@ -0,0 +1,54 @@ + +#include +#include +#include +#include +#include "backprop.h" +#include "omp.h" + +extern char *strcpy(); +extern void exit(); + +int layer_size = 0; + +backprop_face() +{ + BPNN *net; + int i; + float out_err, hid_err; + net = bpnn_create(layer_size, 16, 1); // (16, 1 can not be changed) + + printf("Input layer size : %d\n", layer_size); + load(net); + // entering the training kernel, only one iteration + printf("Starting training kernel\n"); + bpnn_train_cuda(net, &out_err, &hid_err); + bpnn_free(net); + printf("Training done\n"); +} + +int setup(argc, argv) +int argc; +char *argv[]; +{ + + int seed; + + if (argc != 3) + { + fprintf(stderr, "usage: backprop \n"); + exit(0); + } + layer_size = atoi(argv[1]); + if (layer_size % 16 != 0) + { + fprintf(stderr, "The number of input points must be divided by 16\n"); + exit(0); + } + + seed = 7; + bpnn_initialize(seed); + backprop_face(); + + exit(0); +} diff --git a/workloads/realworld/uvm/backprop/imagenet.c b/workloads/realworld/uvm/backprop/imagenet.c new file mode 100644 index 0000000000000000000000000000000000000000..255b0d5d8ca67508f6732e266299e7c58012906f --- /dev/null +++ b/workloads/realworld/uvm/backprop/imagenet.c @@ -0,0 +1,24 @@ + +#include +#include +#include "backprop.h" + +extern layer_size; + +load(net) +BPNN *net; +{ + float *units; + int nr, nc, imgsize, i, j, k; + + nr = layer_size; + + imgsize = nr * nc; + units = net->input_units; + + k = 1; + for (i = 0; i < nr; i++) { + units[k] = (float) rand()/RAND_MAX ; + k++; + } +} diff --git a/workloads/realworld/uvm/backprop/run.sh b/workloads/realworld/uvm/backprop/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..874cbb88032622578f319cce3800a3793151cb92 --- /dev/null +++ b/workloads/realworld/uvm/backprop/run.sh @@ -0,0 +1,5 @@ +# ./backprop 524288 +# ./backprop 8388608 128 + +# ./backprop 66708864 128 +./backprop 66708864 1024 \ No newline at end of file diff --git a/workloads/realworld/uvm/backprop/run_super.sh b/workloads/realworld/uvm/backprop/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..d9e8a3d42354f597bbcd153d180d9b23bed71192 --- /dev/null +++ b/workloads/realworld/uvm/backprop/run_super.sh @@ -0,0 +1,3 @@ +# ./backprop 1073741824 1024 +# ./backprop 134217728 1024 +./backprop 67108864 1024 \ No newline at end of file diff --git a/workloads/realworld/uvm/darknet/LICENSE b/workloads/realworld/uvm/darknet/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..a50f7d700ba02bfacd50f59b315311cf4d0bbda2 --- /dev/null +++ b/workloads/realworld/uvm/darknet/LICENSE @@ -0,0 +1,12 @@ + YOLO LICENSE + Version 2, July 29 2016 + +THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER +SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN +TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES +LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY. NOW HERE'S +THE REAL LICENSE: + +0. Darknet is public domain. +1. Do whatever you want with it. +2. Stop emailing me about it! diff --git a/workloads/realworld/uvm/darknet/LICENSE.fuck b/workloads/realworld/uvm/darknet/LICENSE.fuck new file mode 100644 index 0000000000000000000000000000000000000000..8b1a9d8189b3b9f4479221d52882ce36fdc73a62 --- /dev/null +++ b/workloads/realworld/uvm/darknet/LICENSE.fuck @@ -0,0 +1,13 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + +Copyright (C) 2004 Sam Hocevar + +Everyone is permitted to copy and distribute verbatim or modified +copies of this license document, and changing it is allowed as long +as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. diff --git a/workloads/realworld/uvm/darknet/LICENSE.gen b/workloads/realworld/uvm/darknet/LICENSE.gen new file mode 100644 index 0000000000000000000000000000000000000000..c54113271e15057c4def6676693eb96fd6362b28 --- /dev/null +++ b/workloads/realworld/uvm/darknet/LICENSE.gen @@ -0,0 +1,91 @@ +RNN LICENSE Version 3, June 21 2017 + +Copyright (c) 1990, 1989, 1999 Free87337 May 48 THIRD PARTIES OR ANY OTHER THE +COMPLAIN OR CONSEQUENTIAL DAMAGES AND REGARDLESS OF WHETHER IN CONTRACT, TO THE +EXTENT REPAIR OR AGENTS (NOT THE IN ANY EVENT). THE SOFTWARE WILL BE +UNINTERRUPTED OR ERROR-FREE OR ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF ALL THE WORK (GOVERNED CODE) HIM RESPONSES, OR OF FINES, +SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR ANY OTHER OR OTHER HARL UNDER NO +CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), +PATENT PERMITTED BY THE INSTAGRAM PARENT STATE OR TORT (INCLUDING NEGLIGENCE), +PRODUCT LIABILITY OR OTHERWISE, ARISING OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR ANYTHING PROVIDED IN THIS PRODUCT, COMMIS AND SERVICES +ARE LICENSED SOFTWARE AND ANY RESULE OR ANY OTHER THE COPYRIGHT HOLDERS BE +LIABLE FOR ANY SPECIAL, INCIDENTAL, CASE, SUCH WARRANTIES, EXPRESS OR IMPLIED, +INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COPYRIGHT HOLDERS AND/OR ANY +PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY +EXPRESS OR DISTRIBUTE THAT ALL CLAIMS ARE SHALL CREATE DERAVE BE LIABLE TO YOU +WILL HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +6\. TERMINATION. TO THE EXTENT PERMITTED BY LAW, NO USE OF THE COVERED CODE IS +WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE +INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY +SERVICING, REPAIR OR COULT OR IN ANY WAY OUT OF THE USE OF THE WEBSITES OR +SERVICE WILL BE CONSEQUENTIAL DAMAGES OF ANY KIND HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + +This paragraph Agreement constitutes the entire agreement between the parties +with respect to the Work licensed here. However, if you place the name of the +fact that the arbitration was the consultation of the parties as a "patent is". +Subject to the terms and conditions of this License, Contributor has knowledge +that a license under a third party may also be used to endorse or promote +products derived from the Work, and there is no warranty on the Software and +Science Fees. For the purposes of this Agreement, attach the following +disclaimers (without liabilities of written notice to the Subject Software) in a +manner that a product is under common control with you. The Free Software +Foundation may publish revised and/or new versions of the License for the +Modifications made by the applicable terms. The Recipient shall promptly retain +the covered works for any reason be entered in any federal or state or login +Restricted Laws appearing in the United States or any of its own information +that is not disabled from a derivative work except as expressly permitted in +this License, to the extent that they are in receiving the Software and Source +Code or any exercise of the rights granted to You by this License or a +Contributor made by the Licensor or are authorized to make a reasonable +retirement by the courts of the courts located in Santa Clara County, California +printed and related to the Work or “Company” and Apache Software Foundation. If +the Licensor shall be entitled to reflect your rights to use the Software and +the Software to exercise the rights granted to the recipient without a +requirement to exercise the rights granted by the Agreement to the provision +will begin will appear in such cases, you will use such information without such +corporation shall be an officer with respect to any part of the Software or any +portion thereof. Capitalized terms are included in the Initial Contributor and +under no circumstances will license the Service at any time and for any direct, +indirect, special, incidental, or consequential damages of or assist in +connection with any Services or the registration purposes only to the extent +that it includes any or all means including the processing of which you download +any derivative work. Any of the purchases’ transmission purposes are made +available, if any, in other circumstances, we may review the copyright notice. +In the event that this Agreement is required to give us strict content. The +inclusion of the other party hereunder may also notify you Intellectual Property +Rights to any third party. This means that the Source Code exists of the Work +will not charge a program available to you at any time. You must include a +prominent statement that the Software is governed under a particular version of +this Agreement. You must include a provision to the extent that there is no +warranty for the content of others. You agree that the Recipient was appointed +as a Contributor, (c) are effective until terminated by hereunder, then the +registration are not disabled and not limited to, submit any Customer Data +without the updated use of the Software and that no fee is released. You grant +to Use Other Arbitration Rules for Diagnostic or Services may use or modify the +Apple Software and Consolidated Apple Software or Services. The Company may have +full risk as a product of the Compatible Source. A Contribution by the Licensor +or by the updated Software under the following conditions we can redistribute +any General Provision of this Agreement. If the Program is used in accordance +with the terms of this Agreement, Customer may provide advertisements from your +devices that clause you can your employer or a transaction or country that has +been controlled by the arbitrator, that they will be useful of this Agreement. +The term "Open Source Software is available in connection with the program, and +you may not protect the combination of the Covered Code. You should like to +select a user's rights to charge a copy of this License. I are Contributor's +confidentiality of the exercise of the rights granted herein. Such a covered +work is released as a consequence, the Licensor shall be eligible for a purpose +or subcontractor of the person or entity to the user of the user, then the word +"Application" means having the original fee for any reason; and that no patent +license to more than fifty stated close of the license term. The terms of this +License will the license terms and conditions set forth in Section 2.2 (OPEC) +and You will not use the Software or any set of responsibility for any resulting +information that the Original Code warrants that you have the right to disclose +these information (or in the notification; or (iii) late use of the software or +any third party to the three (50) days before such belief to the extent that it +includes a court court obtains the rights granted by this License. diff --git a/workloads/realworld/uvm/darknet/LICENSE.gpl b/workloads/realworld/uvm/darknet/LICENSE.gpl new file mode 100644 index 0000000000000000000000000000000000000000..9cecc1d4669ee8af2ca727a5d8cde10cd8b2d7cc --- /dev/null +++ b/workloads/realworld/uvm/darknet/LICENSE.gpl @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + {one line to give the program's name and a brief idea of what it does.} + Copyright (C) {year} {name of author} + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + {project} Copyright (C) {year} {fullname} + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/workloads/realworld/uvm/darknet/LICENSE.meta b/workloads/realworld/uvm/darknet/LICENSE.meta new file mode 100644 index 0000000000000000000000000000000000000000..6728bd28d319c68ae04944fb034118dcc4c9aa09 --- /dev/null +++ b/workloads/realworld/uvm/darknet/LICENSE.meta @@ -0,0 +1,8 @@ + META-LICENSE + Version 1, June 21 2017 + +Any and all licenses may be applied to the software either individually +or in concert. Any issues, ambiguities, paradoxes, or metaphysical quandries +arising from this combination should be discussed with a local faith leader, +hermit, or guru. The Oxford comma shall be used. + diff --git a/workloads/realworld/uvm/darknet/LICENSE.mit b/workloads/realworld/uvm/darknet/LICENSE.mit new file mode 100644 index 0000000000000000000000000000000000000000..5bd806ce16ea5053c8631793787362439375026e --- /dev/null +++ b/workloads/realworld/uvm/darknet/LICENSE.mit @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2017 Joseph Redmon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/workloads/realworld/uvm/darknet/LICENSE.v1 b/workloads/realworld/uvm/darknet/LICENSE.v1 new file mode 100644 index 0000000000000000000000000000000000000000..5b8709acc43e7b76ed69758a52a9eaffaba775e6 --- /dev/null +++ b/workloads/realworld/uvm/darknet/LICENSE.v1 @@ -0,0 +1,13 @@ + YOLO LICENSE + Version 1, July 10 2015 + +THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER +SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN +TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES +LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY SUBJECT TO +THE FOLLOWING CONDITIONS: + +1. #yolo +2. #swag +3. #blazeit + diff --git a/workloads/realworld/uvm/darknet/Makefile b/workloads/realworld/uvm/darknet/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..5022f68377d7626ab768f2501883d72b013dafba --- /dev/null +++ b/workloads/realworld/uvm/darknet/Makefile @@ -0,0 +1,114 @@ +GPU=1 +CUDNN=0 +OPENCV=0 +OPENMP=0 +DEBUG=0 + +#ARCH= -gencode arch=compute_30,code=sm_30 \ +# -gencode arch=compute_35,code=sm_35 \ +# -gencode arch=compute_50,code=[sm_50,compute_50] \ +# -gencode arch=compute_52,code=[sm_52,compute_52] +# -gencode arch=compute_20,code=[sm_20,sm_21] \ This one is deprecated? + +# This is what I use, uncomment if you know your arch and want to specify +ARCH= -gencode arch=compute_80,code=sm_80 \ +#ARCH= -arch=sm_80 + +VPATH=./src/:./examples:$(CUPTI_ADD_COMMON) +SLIB=libdarknet.so +ALIB=libdarknet.a +EXEC=darknet +OBJDIR=./obj/ + +CC=gcc +CPP=g++ +# NVCC=nvcc --default-stream per-thread +NVCC=nvcc +AR=ar +ARFLAGS=rcs +OPTS=-Ofast +LDFLAGS= -lm -pthread +COMMON= -Iinclude/ -Isrc/ +CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC +ifeq ($(PROFILE), 1) +CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -DPROFILE +endif + + +ifeq ($(OPENMP), 1) +CFLAGS+= -fopenmp +endif + +ifeq ($(DEBUG), 1) +OPTS=-O0 -g +endif + +CFLAGS+=$(OPTS) + +ifeq ($(OPENCV), 1) +COMMON+= -DOPENCV +CFLAGS+= -DOPENCV +LDFLAGS+= `pkg-config --libs opencv` -lstdc++ +COMMON+= `pkg-config --cflags opencv` +endif + +ifeq ($(GPU), 1) +include ../../../common/make.config +COMMON+= -DGPU -I$(CUDA_DIR)/include/ -I$(CUDA_DIR)/extras/CUPTI/include/ +CFLAGS+= -DGPU +LDFLAGS+= -L$(CUDA_DIR)/lib64 -L$(CUDA_DIR)/extras/CUPTI/lib64/ -lcuda -lcudart -lcublas -lcurand -lcupti +endif + + +ifeq ($(CUDNN), 1) +COMMON+= -DCUDNN +CFLAGS+= -DCUDNN +LDFLAGS+= -lcudnn +endif + +OBJ=gemm.o utils.o cuda_dark.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o lstm_layer.o l2norm_layer.o yolo_layer.o iseg_layer.o image_opencv.o +EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o instance-segmenter.o darknet.o +ifeq ($(GPU), 1) +LDFLAGS+= -lstdc++ +OBJ+=gemm_kernel.o convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o +endif + +# cpu_timestamps.o +# cupti_add.o + +EXECOBJ = $(addprefix $(OBJDIR), $(EXECOBJA)) +OBJS = $(addprefix $(OBJDIR), $(OBJ)) +DEPS = $(wildcard src/*.h) Makefile include/darknet.h + +all: obj backup results $(SLIB) $(ALIB) $(EXEC) + +$(EXEC): $(EXECOBJ) $(ALIB) + $(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS) $(ALIB) + +$(ALIB): $(OBJS) + $(AR) $(ARFLAGS) $@ $^ + +$(SLIB): $(OBJS) + $(CC) $(CFLAGS) -shared $^ -o $@ $(LDFLAGS) + +$(OBJDIR)%.o: %.cpp $(DEPS) + $(CPP) $(COMMON) $(CFLAGS) -c $< -o $@ + +$(OBJDIR)%.o: %.c $(DEPS) + $(CC) $(COMMON) $(CFLAGS) -c $< -o $@ + +$(OBJDIR)%.o: %.cu $(DEPS) + $(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@ + +obj: + mkdir -p obj +backup: + mkdir -p backup +results: + mkdir -p results + +.PHONY: clean + +clean: + rm -rf $(OBJS) $(SLIB) $(ALIB) $(EXEC) $(EXECOBJ) $(OBJDIR)/* + diff --git a/workloads/realworld/uvm/darknet/README.md b/workloads/realworld/uvm/darknet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fb58c2640038a963cd573d121e4fab59399f67dc --- /dev/null +++ b/workloads/realworld/uvm/darknet/README.md @@ -0,0 +1,124 @@ +![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png) + +# Darknet # +Darknet is an open source neural network framework written in C and CUDA. It is fast, easy to install, and supports CPU and GPU computation. + +**Discord** invite link for for communication and questions: https://discord.gg/zSq8rtW + +## YOLOv7: + +* **paper** - YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors: https://arxiv.org/abs/2207.02696 + +* **source code - Pytorch (use to reproduce results):** https://github.com/WongKinYiu/yolov7 + +---- + +Official YOLOv7 is more accurate and faster than YOLOv5 by **120%** FPS, than YOLOX by **180%** FPS, than Dual-Swin-T by **1200%** FPS, than ConvNext by **550%** FPS, than SWIN-L by **500%** FPS. + +YOLOv7 surpasses all known object detectors in both speed and accuracy in the range from 5 FPS to 160 FPS and has the highest accuracy 56.8% AP among all known real-time object detectors with 30 FPS or higher on GPU V100, batch=1. + +* YOLOv7-e6 (55.9% AP, 56 FPS V100 b=1) by `+500%` FPS faster than SWIN-L Cascade-Mask R-CNN (53.9% AP, 9.2 FPS A100 b=1) +* YOLOv7-e6 (55.9% AP, 56 FPS V100 b=1) by `+550%` FPS faster than ConvNeXt-XL C-M-RCNN (55.2% AP, 8.6 FPS A100 b=1) +* YOLOv7-w6 (54.6% AP, 84 FPS V100 b=1) by `+120%` FPS faster than YOLOv5-X6-r6.1 (55.0% AP, 38 FPS V100 b=1) +* YOLOv7-w6 (54.6% AP, 84 FPS V100 b=1) by `+1200%` FPS faster than Dual-Swin-T C-M-RCNN (53.6% AP, 6.5 FPS V100 b=1) +* YOLOv7x (52.9% AP, 114 FPS V100 b=1) by `+150%` FPS faster than PPYOLOE-X (51.9% AP, 45 FPS V100 b=1) +* YOLOv7 (51.2% AP, 161 FPS V100 b=1) by `+180%` FPS faster than YOLOX-X (51.1% AP, 58 FPS V100 b=1) + +---- + +![more5](https://user-images.githubusercontent.com/4096485/179425274-f55a36d4-8450-4471-816b-8c105841effd.jpg) + +---- + +![image](https://user-images.githubusercontent.com/4096485/177675030-a929ee00-0eba-4d93-95c2-225231d0fd61.png) + + +---- + +![yolov7_640_1280](https://user-images.githubusercontent.com/4096485/177688869-d75e0c36-63af-46ec-bdbd-81dbb281f257.png) + +---- + +## Scaled-YOLOv4: + +* **paper (CVPR 2021)**: https://openaccess.thecvf.com/content/CVPR2021/html/Wang_Scaled-YOLOv4_Scaling_Cross_Stage_Partial_Network_CVPR_2021_paper.html + +* **source code - Pytorch (use to reproduce results):** https://github.com/WongKinYiu/ScaledYOLOv4 + +* **source code - Darknet:** https://github.com/AlexeyAB/darknet + +* **Medium:** https://alexeyab84.medium.com/scaled-yolo-v4-is-the-best-neural-network-for-object-detection-on-ms-coco-dataset-39dfa22fa982?source=friends_link&sk=c8553bfed861b1a7932f739d26f487c8 + +## YOLOv4: + +* **paper:** https://arxiv.org/abs/2004.10934 + +* **source code:** https://github.com/AlexeyAB/darknet + +* **Wiki:** https://github.com/AlexeyAB/darknet/wiki + +* **useful links:** https://medium.com/@alexeyab84/yolov4-the-most-accurate-real-time-neural-network-on-ms-coco-dataset-73adfd3602fe?source=friends_link&sk=6039748846bbcf1d960c3061542591d7 + +For more information see the [Darknet project website](http://pjreddie.com/darknet). + + +
Expand + +![yolo_progress](https://user-images.githubusercontent.com/4096485/146988929-1ed0cbec-1e01-4ad0-b42c-808dcef32994.png) https://paperswithcode.com/sota/object-detection-on-coco + +---- + +![scaled_yolov4](https://user-images.githubusercontent.com/4096485/112776361-281d8380-9048-11eb-8083-8728b12dcd55.png) AP50:95 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2011.08036 + +---- + +![YOLOv4Tiny](https://user-images.githubusercontent.com/4096485/101363015-e5c21200-38b1-11eb-986f-b3e516e05977.png) + +---- + +![YOLOv4](https://user-images.githubusercontent.com/4096485/90338826-06114c80-dff5-11ea-9ba2-8eb63a7409b3.png) + +
+ +---- + +![OpenCV_TRT](https://user-images.githubusercontent.com/4096485/90338805-e5e18d80-dff4-11ea-8a68-5710956256ff.png) + + +## Citation + + +``` +@misc{https://doi.org/10.48550/arxiv.2207.02696, + doi = {10.48550/ARXIV.2207.02696}, + url = {https://arxiv.org/abs/2207.02696}, + author = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors}, + publisher = {arXiv}, + year = {2022}, + copyright = {arXiv.org perpetual, non-exclusive license} +} +``` + +``` +@misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +``` +@InProceedings{Wang_2021_CVPR, + author = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + title = {{Scaled-YOLOv4}: Scaling Cross Stage Partial Network}, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2021}, + pages = {13029-13038} +} +``` diff --git a/workloads/realworld/uvm/darknet/cfg/alexnet.cfg b/workloads/realworld/uvm/darknet/cfg/alexnet.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e2ed4bb8e7b1bad7859aef0d802cb4084753cb7a --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/alexnet.cfg @@ -0,0 +1,96 @@ +[net] +# Training +# batch=128 +# subdivisions=1 +# Testing +batch=1 +subdivisions=1 +height=227 +width=227 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=256 + +learning_rate=0.01 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue = .1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +filters=96 +size=11 +stride=4 +pad=0 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[convolutional] +filters=256 +size=5 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[convolutional] +filters=384 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=384 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=1000 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/cifar.cfg b/workloads/realworld/uvm/darknet/cfg/cifar.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b2f69f53903e55c24718ed12d9adaaa1557e3647 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/cifar.cfg @@ -0,0 +1,121 @@ +[net] +batch=128 +subdivisions=1 +height=28 +width=28 +channels=3 +max_crop=32 +min_crop=32 + +hue=.1 +saturation=.75 +exposure=.75 + +learning_rate=0.4 +policy=poly +power=4 +max_batches = 5000 +momentum=0.9 +decay=0.0005 + + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[dropout] +probability=.5 + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 diff --git a/workloads/realworld/uvm/darknet/cfg/cifar.data b/workloads/realworld/uvm/darknet/cfg/cifar.data new file mode 100644 index 0000000000000000000000000000000000000000..a52208db1b203b5e1f24d5afaf8c7002adfd71a3 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/cifar.data @@ -0,0 +1,7 @@ +classes=10 +train = data/cifar/train.list +valid = data/cifar/test.list +test = data/cifar/test.list +labels = data/cifar/labels.txt +backup = backup/ +top=2 diff --git a/workloads/realworld/uvm/darknet/cfg/cifar.test.cfg b/workloads/realworld/uvm/darknet/cfg/cifar.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..18b6c54c909152b1201d6320b85fafc5c36ba1ef --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/cifar.test.cfg @@ -0,0 +1,117 @@ +[net] +batch=128 +subdivisions=1 +height=32 +width=32 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.4 +policy=poly +power=4 +max_batches = 50000 + + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[dropout] +probability=.5 + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 +temperature=3 + diff --git a/workloads/realworld/uvm/darknet/cfg/cifar_small.cfg b/workloads/realworld/uvm/darknet/cfg/cifar_small.cfg new file mode 100644 index 0000000000000000000000000000000000000000..d48b1231f0131faaa187b18df6705411c3d16a76 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/cifar_small.cfg @@ -0,0 +1,86 @@ +[net] +batch=128 +subdivisions=1 +height=28 +width=28 +channels=3 +max_crop=32 +min_crop=32 + +hue=.1 +saturation=.75 +exposure=.75 + +learning_rate=0.1 +policy=poly +power=4 +max_batches = 5000 +momentum=0.9 +decay=0.0005 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] diff --git a/workloads/realworld/uvm/darknet/cfg/coco.data b/workloads/realworld/uvm/darknet/cfg/coco.data new file mode 100644 index 0000000000000000000000000000000000000000..5951d5245a7895e8418bc3155de3b03049e76c42 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/coco.data @@ -0,0 +1,6 @@ +classes= 80 +train = /data/darknet/coco/valid.list +valid = /data/darknet/coco/valid.list +backup = /data/darknet/backup/ +names = /data/darknet/coco/coco.names +eval=coco diff --git a/workloads/realworld/uvm/darknet/cfg/coco.names b/workloads/realworld/uvm/darknet/cfg/coco.names new file mode 100644 index 0000000000000000000000000000000000000000..16315f2becec9705017bfaf1b9fb81ca2a83c0b0 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/coco.names @@ -0,0 +1,80 @@ +person +bicycle +car +motorbike +aeroplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +sofa +pottedplant +bed +diningtable +toilet +tvmonitor +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush \ No newline at end of file diff --git a/workloads/realworld/uvm/darknet/cfg/combine9k.data b/workloads/realworld/uvm/darknet/cfg/combine9k.data new file mode 100644 index 0000000000000000000000000000000000000000..06a3e76aefac9c1074c3dfe159bc115a92b0791e --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/combine9k.data @@ -0,0 +1,10 @@ +classes= 9418 +#train = /home/pjreddie/data/coco/trainvalno5k.txt +train = data/combine9k.train.list +valid = /home/pjreddie/data/imagenet/det.val.files +labels = data/9k.labels +names = data/9k.names +backup = backup/ +map = data/inet9k.map +eval = imagenet +results = results diff --git a/workloads/realworld/uvm/darknet/cfg/darknet.cfg b/workloads/realworld/uvm/darknet/cfg/darknet.cfg new file mode 100644 index 0000000000000000000000000000000000000000..375107f7c196baf7adf229a7cfffc84739875828 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/darknet.cfg @@ -0,0 +1,120 @@ +[net] +# Training +# batch=128 +# subdivisions=1 +# Testing +batch=1 +subdivisions=1 +height=256 +width=256 +min_crop=128 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/darknet19.cfg b/workloads/realworld/uvm/darknet/cfg/darknet19.cfg new file mode 100644 index 0000000000000000000000000000000000000000..28ac9669ef686b4d638a5bf462451962fec45a4e --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/darknet19.cfg @@ -0,0 +1,205 @@ +[net] +# Training +#batch=128 +#subdivisions=2 + +# Testing + batch=1 + subdivisions=1 + +height=256 +width=256 +min_crop=128 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/darknet19_448.cfg b/workloads/realworld/uvm/darknet/cfg/darknet19_448.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c6df7306d3ef0622e0a0e0cbd6a5603699344e56 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/darknet19_448.cfg @@ -0,0 +1,197 @@ +[net] +batch=128 +subdivisions=4 +height=448 +width=448 +max_crop=512 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 + +angle=7 +hue = .1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/darknet53.cfg b/workloads/realworld/uvm/darknet/cfg/darknet53.cfg new file mode 100644 index 0000000000000000000000000000000000000000..7b6d5766e9ec48ee19a74321583b44621c1e07b3 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/darknet53.cfg @@ -0,0 +1,566 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/darknet53_448.cfg b/workloads/realworld/uvm/darknet/cfg/darknet53_448.cfg new file mode 100644 index 0000000000000000000000000000000000000000..dedab1b97c7e5d4226f061e6c983046d7a278dd0 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/darknet53_448.cfg @@ -0,0 +1,559 @@ +[net] +# Training - start training with darknet53.weights +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=448 +width=448 +channels=3 +min_crop=448 +max_crop=512 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 +momentum=0.9 +decay=0.0005 + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/darknet9000.cfg b/workloads/realworld/uvm/darknet/cfg/darknet9000.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9dd2dfbbf5a7137faada4e091b8e6d48233f09bf --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/darknet9000.cfg @@ -0,0 +1,205 @@ +[net] +# Training +# batch=128 +# subdivisions=4 +# Testing +batch = 1 +subdivisions = 1 +height=448 +width=448 +max_crop=512 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=9418 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 +tree=data/9k.tree + +[cost] +type=masked + diff --git a/workloads/realworld/uvm/darknet/cfg/densenet201.cfg b/workloads/realworld/uvm/darknet/cfg/densenet201.cfg new file mode 100644 index 0000000000000000000000000000000000000000..65b4aecc52d45075f2913e3d63b9aec0527fa44c --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/densenet201.cfg @@ -0,0 +1,1951 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/extraction.cfg b/workloads/realworld/uvm/darknet/cfg/extraction.cfg new file mode 100644 index 0000000000000000000000000000000000000000..66cb15f80e9a5e811223299594882a3b5d9485dc --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/extraction.cfg @@ -0,0 +1,209 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=224 +width=224 +max_crop=320 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/extraction.conv.cfg b/workloads/realworld/uvm/darknet/cfg/extraction.conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..2a7d09ec80fa2f47e1ebb4134b7845d5cae2b828 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/extraction.conv.cfg @@ -0,0 +1,179 @@ +[net] +batch=1 +subdivisions=1 +height=256 +width=256 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.5 +policy=poly +power=6 +max_batches=500000 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[connected] +output=1000 +activation=leaky + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/extraction22k.cfg b/workloads/realworld/uvm/darknet/cfg/extraction22k.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b5f54090d00537fdca72f54bb2eed69dd78f5b00 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/extraction22k.cfg @@ -0,0 +1,206 @@ +[net] +batch=128 +subdivisions=1 +height=224 +width=224 +max_crop=320 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +max_batches = 0 +policy=steps +steps=444000,590000,970000 +scales=.5,.2,.1 + +#policy=sigmoid +#gamma=.00008 +#step=100000 +#max_batches=200000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[connected] +output=21842 +activation=leaky + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/go.cfg b/workloads/realworld/uvm/darknet/cfg/go.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c730092ff3ffda0124baeace050bd382c442d88d --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/go.cfg @@ -0,0 +1,132 @@ +[net] +batch=512 +subdivisions=1 +height=19 +width=19 +channels=1 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=10000000 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=1 +size=1 +stride=1 +pad=1 +activation=linear + +[reorg] +extra=1 +stride=1 + +[softmax] + diff --git a/workloads/realworld/uvm/darknet/cfg/go.test.cfg b/workloads/realworld/uvm/darknet/cfg/go.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..1e4e43809bf3ede20a67b5020fcca0f61612e8f7 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/go.test.cfg @@ -0,0 +1,132 @@ +[net] +batch=1 +subdivisions=1 +height=19 +width=19 +channels=1 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +policy=poly +power=4 +max_batches=100000 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=1 +size=1 +stride=1 +pad=1 +activation=linear + +[reorg] +extra=1 +stride=1 + +[softmax] + + diff --git a/workloads/realworld/uvm/darknet/cfg/gru.cfg b/workloads/realworld/uvm/darknet/cfg/gru.cfg new file mode 100644 index 0000000000000000000000000000000000000000..6064221289d41dc3ee464a715ae05593a02f34f4 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/gru.cfg @@ -0,0 +1,29 @@ +[net] +inputs=256 +momentum=0.9 +decay=0.0 +subdivisions=1 +batch = 1 +time_steps=1 +learning_rate=.002 +adam=1 + +policy=constant +power=4 +max_batches=1000000 + +[gru] +output = 256 + +[gru] +output = 256 + +[gru] +output = 256 + +[connected] +output=256 +activation=linear + +[softmax] + diff --git a/workloads/realworld/uvm/darknet/cfg/imagenet.labels.list b/workloads/realworld/uvm/darknet/cfg/imagenet.labels.list new file mode 100644 index 0000000000000000000000000000000000000000..e73d41762d311df7f7eefec0f65ab12a7bacc023 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/imagenet.labels.list @@ -0,0 +1,21842 @@ +n02119789 +n02100735 +n02110185 +n02096294 +n02102040 +n02066245 +n02509815 +n02124075 +n02417914 +n02123394 +n02125311 +n02423022 +n02346627 +n02077923 +n02110063 +n02447366 +n02109047 +n02089867 +n02102177 +n02091134 +n02092002 +n02071294 +n02442845 +n02504458 +n02092339 +n02098105 +n02096437 +n02114712 +n02105641 +n02128925 +n02091635 +n02088466 +n02096051 +n02117135 +n02138441 +n02097130 +n02493509 +n02457408 +n02389026 +n02443484 +n02110341 +n02089078 +n02086910 +n02445715 +n02093256 +n02113978 +n02106382 +n02441942 +n02113712 +n02113186 +n02105162 +n02415577 +n02356798 +n02488702 +n02123159 +n02098413 +n02422699 +n02114855 +n02094433 +n02111277 +n02132136 +n02119022 +n02091467 +n02106550 +n02422106 +n02091831 +n02120505 +n02104365 +n02086079 +n02112706 +n02098286 +n02095889 +n02484975 +n02137549 +n02500267 +n02129604 +n02090721 +n02396427 +n02108000 +n02391049 +n02412080 +n02108915 +n02480495 +n02110806 +n02128385 +n02107683 +n02085936 +n02094114 +n02087046 +n02100583 +n02096177 +n02494079 +n02105056 +n02101556 +n02123597 +n02481823 +n02105505 +n02088094 +n02085782 +n02489166 +n02364673 +n02114548 +n02134084 +n02480855 +n02090622 +n02113624 +n02093859 +n02403003 +n02097298 +n02108551 +n02493793 +n02107142 +n02096585 +n02107574 +n02107908 +n02086240 +n02102973 +n02112018 +n02093647 +n02397096 +n02437312 +n02483708 +n02097047 +n02106030 +n02099601 +n02093991 +n02110627 +n02106166 +n02326432 +n02108089 +n02097658 +n02088364 +n02111129 +n02100236 +n02486261 +n02115913 +n02486410 +n02487347 +n02099849 +n02108422 +n02104029 +n02492035 +n02110958 +n02099429 +n02094258 +n02099267 +n02395406 +n02112350 +n02109961 +n02101388 +n02113799 +n02095570 +n02128757 +n02101006 +n02115641 +n02097209 +n02342885 +n02097474 +n02120079 +n02095314 +n02088238 +n02408429 +n02133161 +n02328150 +n02410509 +n02492660 +n02398521 +n02112137 +n02510455 +n02093428 +n02105855 +n02111500 +n02085620 +n02123045 +n02490219 +n02099712 +n02109525 +n02454379 +n02111889 +n02088632 +n02090379 +n02443114 +n02361337 +n02105412 +n02483362 +n02437616 +n02107312 +n02325366 +n02091032 +n02129165 +n02102318 +n02100877 +n02074367 +n02504013 +n02363005 +n02102480 +n02113023 +n02086646 +n02497673 +n02087394 +n02127052 +n02116738 +n02488291 +n02091244 +n02114367 +n02130308 +n02089973 +n02105251 +n02134418 +n02093754 +n02106662 +n02444819 +n01882714 +n01871265 +n01872401 +n01877812 +n01873310 +n01883070 +n04086273 +n04507155 +n04147183 +n04254680 +n02672831 +n02219486 +n02317335 +n01968897 +n03452741 +n03642806 +n07745940 +n02690373 +n04552348 +n02692877 +n02782093 +n04266014 +n03344393 +n03447447 +n04273569 +n03662601 +n02951358 +n04612504 +n02981792 +n04483307 +n03095699 +n03673027 +n03947888 +n02687172 +n04347754 +n04606251 +n03478589 +n04389033 +n03773504 +n02860847 +n03218198 +n02835271 +n03792782 +n03393912 +n03895866 +n02797295 +n04204347 +n03791053 +n03384352 +n03272562 +n04310018 +n02704792 +n02701002 +n02814533 +n02930766 +n03100240 +n03594945 +n03670208 +n03770679 +n03777568 +n04037443 +n04285008 +n03444034 +n03445924 +n03785016 +n04252225 +n03345487 +n03417042 +n03930630 +n04461696 +n04467665 +n03796401 +n03977966 +n04065272 +n04335435 +n04252077 +n04465501 +n03776460 +n04482393 +n04509417 +n03538406 +n03599486 +n03868242 +n02804414 +n03125729 +n03131574 +n03388549 +n02870880 +n03018349 +n03742115 +n03016953 +n04380533 +n03337140 +n03891251 +n02791124 +n04429376 +n03376595 +n04099969 +n04344873 +n04447861 +n03179701 +n03982430 +n03201208 +n03290653 +n04550184 +n07742313 +n07747607 +n07749582 +n07753113 +n07753275 +n07753592 +n07754684 +n07760859 +n07768694 +n12267677 +n12620546 +n13133613 +n11879895 +n12144580 +n12768682 +n03854065 +n04515003 +n03017168 +n03249569 +n03447721 +n03720891 +n03721384 +n04311174 +n02787622 +n02992211 +n04536866 +n03495258 +n02676566 +n03272010 +n03110669 +n03394916 +n04487394 +n03494278 +n03840681 +n03884397 +n02804610 +n03838899 +n04141076 +n03372029 +n11939491 +n12057211 +n09246464 +n09468604 +n09193705 +n09472597 +n09399592 +n09421951 +n09256479 +n09332890 +n09428293 +n09288635 +n03498962 +n03041632 +n03658185 +n03954731 +n03995372 +n03649909 +n03481172 +n03109150 +n02951585 +n03970156 +n04154565 +n04208210 +n03967562 +n03000684 +n01514668 +n01514859 +n01518878 +n01530575 +n01531178 +n01532829 +n01534433 +n01537544 +n01558993 +n01560419 +n01580077 +n01582220 +n01592084 +n01601694 +n01608432 +n01614925 +n01616318 +n01622779 +n01795545 +n01796340 +n01797886 +n01798484 +n01806143 +n01806567 +n01807496 +n01817953 +n01818515 +n01819313 +n01820546 +n01824575 +n01828970 +n01829413 +n01833805 +n01843065 +n01843383 +n01847000 +n01855032 +n01855672 +n01860187 +n02002556 +n02002724 +n02006656 +n02007558 +n02009912 +n02009229 +n02011460 +n02012849 +n02013706 +n02018207 +n02018795 +n02025239 +n02027492 +n02028035 +n02033041 +n02037110 +n02017213 +n02051845 +n02056570 +n02058221 +n01484850 +n01491361 +n01494475 +n01496331 +n01498041 +n02514041 +n02536864 +n01440764 +n01443537 +n02526121 +n02606052 +n02607072 +n02643566 +n02655020 +n02640242 +n02641379 +n01664065 +n01665541 +n01667114 +n01667778 +n01669191 +n01675722 +n01677366 +n01682714 +n01685808 +n01687978 +n01688243 +n01689811 +n01692333 +n01693334 +n01694178 +n01695060 +n01704323 +n01697457 +n01698640 +n01728572 +n01728920 +n01729322 +n01729977 +n01734418 +n01735189 +n01737021 +n01739381 +n01740131 +n01742172 +n01744401 +n01748264 +n01749939 +n01751748 +n01753488 +n01755581 +n01756291 +n01629819 +n01630670 +n01631663 +n01632458 +n01632777 +n01641577 +n01644373 +n01644900 +n04579432 +n04592741 +n03876231 +n03483316 +n03868863 +n04251144 +n03691459 +n03759954 +n04152593 +n03793489 +n03271574 +n03843555 +n04332243 +n04265275 +n04330267 +n03467068 +n02794156 +n04118776 +n03841143 +n04141975 +n02708093 +n03196217 +n04548280 +n03544143 +n04355338 +n03891332 +n04328186 +n03197337 +n04317175 +n04376876 +n03706229 +n02841315 +n04009552 +n04356056 +n03692522 +n04044716 +n02879718 +n02950826 +n02749479 +n04090263 +n04008634 +n03085013 +n04505470 +n03126707 +n03666591 +n02666196 +n02977058 +n04238763 +n03180011 +n03485407 +n03832673 +n06359193 +n03496892 +n04428191 +n04004767 +n04243546 +n04525305 +n04179913 +n03602883 +n04372370 +n03532672 +n02974003 +n03874293 +n03944341 +n03992509 +n03425413 +n02966193 +n04371774 +n04067472 +n04040759 +n04019541 +n03492542 +n04355933 +n03929660 +n02965783 +n04258138 +n04074963 +n03208938 +n02910353 +n03476684 +n03627232 +n03075370 +n03874599 +n03804744 +n04127249 +n04153751 +n03803284 +n04162706 +n04228054 +n02948072 +n03590841 +n04286575 +n04456115 +n03814639 +n03933933 +n04485082 +n03733131 +n03794056 +n04275548 +n01768244 +n01770081 +n01770393 +n01773157 +n01773549 +n01773797 +n01774384 +n01774750 +n01775062 +n01776313 +n01784675 +n01990800 +n01978287 +n01978455 +n01980166 +n01981276 +n01983481 +n01984695 +n01985128 +n01986214 +n02165105 +n02165456 +n02167151 +n02168699 +n02169497 +n02172182 +n02174001 +n02177972 +n02190166 +n02206856 +n02226429 +n02229544 +n02231487 +n02233338 +n02236044 +n02256656 +n02259212 +n02264363 +n02268443 +n02268853 +n02276258 +n02277742 +n02279972 +n02280649 +n02281406 +n02281787 +n01910747 +n01914609 +n01917289 +n01924916 +n01930112 +n01943899 +n01944390 +n01945685 +n01950731 +n01955084 +n02319095 +n02321529 +n03584829 +n03297495 +n03761084 +n03259280 +n04111531 +n04442312 +n04542943 +n04517823 +n03207941 +n04070727 +n04554684 +n03133878 +n03400231 +n04596742 +n02939185 +n03063689 +n04398044 +n04270147 +n02699494 +n04486054 +n03899768 +n04311004 +n04366367 +n04532670 +n02793495 +n03457902 +n03877845 +n03781244 +n03661043 +n02727426 +n02859443 +n03028079 +n03788195 +n04346328 +n03956157 +n04081281 +n03032252 +n03529860 +n03697007 +n03065424 +n03837869 +n04458633 +n02980441 +n04005630 +n03461385 +n02776631 +n02791270 +n02871525 +n02927161 +n03089624 +n04200800 +n04443257 +n04462240 +n03388043 +n03042490 +n04613696 +n03216828 +n02892201 +n03743016 +n02788148 +n02894605 +n03160309 +n03000134 +n03930313 +n04604644 +n04326547 +n03459775 +n04239074 +n04501370 +n03792972 +n04149813 +n03530642 +n03961711 +n03903868 +n02814860 +n07711569 +n07720875 +n07714571 +n07714990 +n07715103 +n07716358 +n07716906 +n07717410 +n07717556 +n07718472 +n07718747 +n07730033 +n07734744 +n04209239 +n03594734 +n02971356 +n03485794 +n04133789 +n02747177 +n04125021 +n07579787 +n03814906 +n03134739 +n03404251 +n04423845 +n03877472 +n04120489 +n03062245 +n03014705 +n03717622 +n03777754 +n04493381 +n04476259 +n02777292 +n07693725 +n03998194 +n03617480 +n07590611 +n04579145 +n03623198 +n07248320 +n04277352 +n04229816 +n02823428 +n03127747 +n02877765 +n04435653 +n03724870 +n03710637 +n03920288 +n03379051 +n02807133 +n04399382 +n03527444 +n03983396 +n03924679 +n04532106 +n06785654 +n03445777 +n07613480 +n04350905 +n04562935 +n03325584 +n03045698 +n07892512 +n03250847 +n04192698 +n03026506 +n03534580 +n07565083 +n04296562 +n02869837 +n07871810 +n02799071 +n03314780 +n04141327 +n04357314 +n02823750 +n13052670 +n07583066 +n03637318 +n04599235 +n07802026 +n02883205 +n03709823 +n04560804 +n02909870 +n03207743 +n04263257 +n07932039 +n03786901 +n04479046 +n03873416 +n02999410 +n04367480 +n03775546 +n07875152 +n04591713 +n04201297 +n02916936 +n03240683 +n02840245 +n02963159 +n04370456 +n03991062 +n02843684 +n03482405 +n03942813 +n03908618 +n03902125 +n07584110 +n02730930 +n04023962 +n02769748 +n10148035 +n02817516 +n03908714 +n02906734 +n03788365 +n02667093 +n03787032 +n03980874 +n03141823 +n03976467 +n04264628 +n07930864 +n04039381 +n06874185 +n04033901 +n04041544 +n07860988 +n03146219 +n03763968 +n03676483 +n04209133 +n03782006 +n03857828 +n03775071 +n02892767 +n07684084 +n04522168 +n03764736 +n04118538 +n03887697 +n13044778 +n03291819 +n03770439 +n03124170 +n04487081 +n03916031 +n02808440 +n07697537 +n12985857 +n02917067 +n03938244 +n15075141 +n02978881 +n02966687 +n03633091 +n13040303 +n03690938 +n03476991 +n02669723 +n03220513 +n03127925 +n04584207 +n07880968 +n03937543 +n03000247 +n04418357 +n04590129 +n02795169 +n04553703 +n02783161 +n02802426 +n02808304 +n03124043 +n03450230 +n04589890 +n12998815 +n02992529 +n03825788 +n02790996 +n03710193 +n03630383 +n03347037 +n03769881 +n03871628 +n03733281 +n03976657 +n03535780 +n04259630 +n03929855 +n04049303 +n04548362 +n02979186 +n06596364 +n03935335 +n06794110 +n02825657 +n03388183 +n04591157 +n04540053 +n03866082 +n04136333 +n04026417 +n02865351 +n02834397 +n03888257 +n04235860 +n04404412 +n04371430 +n03733805 +n07920052 +n07873807 +n02895154 +n04204238 +n04597913 +n04131690 +n07836838 +n09835506 +n03443371 +n13037406 +n04336792 +n04557648 +n03187595 +n04254120 +n03595614 +n04146614 +n03598930 +n03958227 +n04069434 +n03188531 +n02786058 +n07615774 +n04525038 +n04409515 +n03424325 +n03223299 +n03680355 +n07614500 +n07695742 +n04033995 +n03710721 +n04392985 +n03047690 +n03584254 +n13054560 +n10565667 +n03950228 +n03729826 +n02837789 +n04254777 +n02988304 +n03657121 +n04417672 +n04523525 +n02815834 +n09229709 +n07697313 +n03888605 +n03355925 +n03063599 +n04116512 +n04325704 +n07831146 +n03255030 +n00483313 +n02432291 +n02356381 +n02377388 +n04028764 +n04381587 +n02279257 +n04168199 +n00445055 +n02461128 +n03626760 +n04313503 +n00451635 +n02509515 +n04224842 +n09403734 +n02769290 +n13054073 +n03163222 +n00464478 +n03087069 +n04477219 +n03445617 +n00449054 +n00483705 +n04395106 +n03389611 +n04285965 +n04166281 +n04003856 +n03696301 +n00475787 +n04587404 +n09218641 +n02276355 +n03592669 +n04459909 +n04492375 +n09447666 +n00463543 +n04148703 +n04591517 +n03970546 +n04297750 +n02782778 +n02383231 +n03693474 +n02277094 +n03766044 +n02056228 +n03394272 +n03047052 +n00434075 +n04185946 +n02411999 +n03858418 +n12833149 +n02836035 +n03108853 +n04587559 +n04138261 +n02278024 +n03063485 +n02774921 +n09475044 +n02811204 +n03329302 +n04026813 +n03986562 +n03379204 +n03426134 +n02790669 +n03487090 +n03548402 +n08614632 +n04054361 +n03421485 +n03302671 +n03098959 +n02970408 +n03772584 +n03064935 +n09415584 +n11715430 +n12024445 +n02710201 +n03475581 +n13142504 +n03396074 +n03211789 +n03914337 +n03678558 +n03233123 +n00453396 +n00454395 +n00440382 +n04289027 +n00445226 +n11953610 +n04128413 +n00480211 +n00470966 +n12547503 +n03085219 +n02275773 +n02692086 +n04257790 +n00448748 +n02686379 +n12328567 +n03432129 +n03859000 +n12091377 +n02124313 +n00442847 +n04603399 +n03114379 +n02920369 +n03818343 +n02946127 +n02978055 +n12914923 +n02705429 +n00448232 +n12882945 +n04289690 +n07606669 +n02056728 +n11848479 +n03046921 +n12282933 +n02867966 +n12821505 +n02812949 +n04545305 +n02699770 +n04395651 +n02900160 +n04099003 +n02054711 +n12606545 +n03356858 +n01859190 +n03643737 +n02962200 +n03123553 +n09361517 +n02793089 +n00449517 +n02783994 +n10117851 +n12038585 +n04383839 +n10142391 +n07719213 +n03536122 +n02472987 +n03454536 +n11728099 +n02392824 +n03795758 +n04282872 +n00448872 +n02404432 +n03797182 +n03029197 +n03665924 +n12477163 +n02769963 +n03863262 +n01532325 +n04165409 +n04593077 +n04473108 +n03577090 +n09988063 +n00446804 +n03931765 +n00475014 +n02700064 +n03240892 +n12475242 +n11735053 +n04053508 +n02852173 +n02382750 +n03823111 +n04543772 +n04112147 +n04433585 +n03175189 +n03596543 +n00445685 +n03307792 +n04589593 +n01814217 +n02993368 +n04303497 +n02811350 +n03355768 +n03699591 +n04590553 +n01893825 +n12726670 +n09916348 +n11544015 +n01318894 +n02133704 +n02367492 +n04506289 +n02069974 +n01900150 +n03207835 +n03363549 +n02831595 +n04970470 +n04160847 +n03767203 +n03928814 +n02302969 +n02918595 +n10401331 +n04231272 +n03717447 +n03063968 +n03380724 +n00825773 +n09988493 +n02740300 +n04539794 +n04121511 +n01323599 +n12937130 +n02428508 +n02980036 +n12061380 +n01887787 +n04214046 +n01787835 +n00466630 +n02979290 +n03927091 +n03231368 +n03904657 +n04469003 +n04196502 +n02122948 +n04544325 +n07868340 +n13876561 +n11925898 +n12158443 +n01595450 +n12454705 +n02069412 +n09618957 +n02357111 +n00451563 +n04197110 +n02276902 +n03111296 +n03909020 +n12303083 +n02082791 +n01956764 +n04269822 +n04207343 +n02433318 +n01888181 +n12682668 +n01592387 +n09793141 +n00466273 +n04026180 +n06255081 +n12172364 +n10145590 +n12311579 +n12173912 +n03822171 +n03140292 +n03027625 +n02739427 +n02060133 +n02431785 +n03219010 +n00447957 +n11910271 +n03620967 +n12547215 +n02409508 +n04290079 +n12329260 +n13901858 +n02008497 +n10304914 +n04524142 +n04279462 +n04233124 +n09733793 +n12822115 +n09475179 +n10151760 +n03418618 +n12858397 +n07735510 +n03549473 +n10098245 +n03653583 +n10604380 +n03375575 +n03885293 +n01527347 +n03237340 +n02760658 +n11953038 +n03187268 +n03004275 +n02393161 +n11965218 +n08580944 +n03938725 +n03900979 +n04144241 +n03760310 +n02376679 +n03237992 +n09432283 +n02379908 +n09918554 +n04041747 +n12012111 +n10331167 +n01612122 +n10147935 +n07691539 +n11669786 +n09403427 +n01935395 +n09903501 +n04439585 +n04459018 +n02780704 +n03720163 +n12899752 +n04118635 +n03404149 +n02429456 +n00449168 +n04516354 +n04317833 +n12075299 +n07878647 +n09438940 +n03361550 +n02027357 +n04317976 +n03092883 +n04526964 +n03985069 +n03610682 +n04028581 +n02277268 +n09433839 +n03846431 +n03919289 +n10146104 +n10260706 +n02686227 +n03321103 +n00444846 +n01558307 +n01595168 +n03919096 +n11844892 +n04260364 +n02750070 +n03034244 +n03002096 +n04273972 +n11814584 +n04605321 +n07745466 +n02922798 +n03361380 +n12651229 +n08521623 +n04498389 +n00453313 +n04967882 +n12024690 +n03934656 +n02685082 +n04501550 +n09972458 +n03055418 +n07763629 +n03902756 +n09938449 +n09712696 +n02387346 +n03133415 +n07711080 +n03129753 +n03524150 +n02275560 +n03993053 +n03438661 +n11939180 +n00466524 +n11753355 +n03456024 +n03421324 +n07890540 +n11720643 +n02057035 +n00453126 +n04453037 +n01540832 +n03546235 +n03370387 +n02041875 +n02871439 +n03262072 +n01786646 +n02430830 +n02799175 +n05262422 +n03854722 +n12817694 +n04449966 +n01564773 +n02034971 +n03490119 +n02822579 +n07879953 +n04110178 +n04963588 +n04252653 +n01565078 +n02389128 +n02779435 +n10645017 +n04582205 +n08573842 +n10146002 +n03892178 +n03119396 +n03813078 +n07866868 +n03160740 +n03371875 +n02417387 +n03904782 +n03098688 +n02902687 +n01828556 +n04401680 +n04590933 +n01575401 +n07693048 +n02901114 +n03047941 +n04355511 +n11849871 +n10738111 +n03122073 +n12052787 +n01594004 +n01549886 +n02824058 +n03506184 +n11487732 +n12574866 +n12948053 +n10091450 +n00470554 +n00326094 +n12093329 +n04438897 +n07818995 +n12828791 +n13901321 +n10613996 +n10159533 +n02669295 +n02843158 +n06415688 +n14858292 +n09813219 +n12485653 +n03200231 +n02089468 +n03935234 +n01539925 +n12428076 +n10439373 +n01536644 +n02694662 +n02123242 +n03002711 +n03363749 +n02669534 +n03451798 +n11927215 +n02679257 +n09475925 +n10015485 +n12422129 +n03946162 +n02377291 +n07871720 +n12622297 +n12782915 +n01579260 +n11838916 +n10267311 +n12824053 +n03340723 +n02276749 +n04439712 +n02126139 +n04188179 +n02386853 +n07942152 +n02499316 +n04324387 +n10635788 +n04234887 +n12237641 +n03713436 +n04960582 +n04076713 +n01646292 +n03947798 +n02840134 +n04476972 +n09822830 +n03551395 +n04533802 +n02918964 +n00474657 +n12932966 +n01615458 +n01806364 +n12458550 +n11784497 +n03557360 +n10638922 +n09889941 +n10689306 +n03358172 +n04295571 +n06596607 +n11853356 +n00482122 +n11760785 +n03150232 +n11778257 +n03059685 +n10105733 +n04104384 +n07691237 +n04326676 +n07684938 +n12666965 +n04177820 +n13918387 +n03398153 +n03914438 +n09932098 +n02988486 +n02977619 +n03317788 +n03484487 +n02988679 +n04062428 +n02568087 +n12866162 +n04227144 +n07875436 +n04082886 +n11753700 +n00470682 +n02122298 +n10145239 +n12755727 +n04214282 +n01852671 +n02378969 +n04108822 +n10382825 +n12392549 +n03973839 +n12258885 +n11782761 +n12389501 +n02940570 +n03405595 +n02969323 +n03207630 +n10169147 +n03805725 +n09847543 +n02415253 +n07880080 +n04305572 +n02042180 +n07565161 +n02871147 +n04438507 +n04445154 +n07842433 +n12029635 +n09750282 +n09621232 +n01858906 +n02761206 +n03986355 +n12591351 +n13916721 +n02905036 +n11894770 +n02377603 +n12924623 +n03950899 +n09454153 +n10247358 +n05261310 +n11943660 +n10804287 +n03560430 +n01756089 +n10618342 +n04283378 +n13926786 +n04238321 +n04393549 +n04461879 +n03502200 +n00440941 +n03494706 +n04148579 +n13902336 +n02780815 +n10726031 +n04124098 +n12344483 +n04384910 +n07681450 +n02030837 +n04059157 +n09247410 +n02714751 +n08633683 +n04520784 +n10141732 +n12371439 +n04499062 +n02931148 +n07609632 +n04536335 +n02874537 +n03013438 +n11786539 +n11690455 +n07600696 +n00478262 +n00466712 +n03399677 +n12441183 +n07895962 +n11966083 +n02990373 +n04241249 +n02068541 +n12513933 +n02356977 +n04252560 +n04087826 +n03455488 +n07619409 +n09787534 +n03680942 +n00446980 +n12384839 +n03416900 +n07821758 +n11853813 +n01606522 +n11780148 +n04969242 +n12413880 +n04130257 +n01322604 +n03061211 +n01959492 +n02842573 +n04313628 +n03815149 +n02445394 +n08547544 +n03222176 +n04070003 +n03075768 +n09695979 +n02877266 +n08583292 +n02870676 +n03657511 +n01621635 +n04284341 +n04136161 +n02836174 +n10247880 +n01744100 +n02882894 +n03408444 +n03411079 +n02366959 +n04399158 +n04542715 +n02787435 +n04251701 +n13863020 +n07890226 +n12245319 +n12849952 +n11626826 +n00887544 +n03140431 +n03519387 +n03855604 +n07906111 +n02054036 +n11954161 +n03038281 +n00450998 +n12136392 +n02119477 +n04356925 +n02406647 +n04450133 +n12545635 +n01565599 +n02028900 +n07817024 +n02971167 +n04309049 +n02678897 +n12795555 +n11769803 +n01904886 +n02079851 +n12189987 +n04581829 +n12098403 +n01839330 +n12587803 +n03652932 +n08628141 +n03544238 +n04513827 +n01847806 +n03132076 +n10243137 +n03621377 +n10530959 +n14765422 +n04968139 +n12950314 +n02064816 +n02846511 +n10513823 +n11772408 +n03341297 +n03492922 +n03683606 +n02894337 +n02365480 +n09846755 +n03495039 +n01317813 +n12610328 +n02157206 +n01588002 +n03914831 +n03659686 +n10406765 +n09205509 +n02870526 +n07954211 +n10578471 +n11646694 +n03115762 +n07762913 +n12056758 +n12305986 +n11845913 +n02835915 +n02831237 +n07927512 +n12171098 +n02073831 +n07605040 +n02885462 +n02768114 +n04450994 +n11844371 +n03963645 +n02956699 +n02029378 +n01528396 +n10005934 +n04465666 +n04390977 +n11882074 +n03831382 +n04605163 +n06276501 +n02944075 +n05258051 +n07901457 +n12683571 +n02205219 +n13235503 +n02388735 +n03941231 +n14919819 +n12816508 +n11536673 +n13895262 +n02903204 +n10137825 +n07841345 +n07893253 +n01850192 +n07769731 +n11773987 +n03539678 +n12938193 +n10802507 +n03089879 +n00477392 +n01828096 +n09263912 +n13653902 +n04579667 +n01322983 +n08579352 +n07587023 +n07756951 +n07870167 +n10588357 +n01606809 +n13864035 +n02802544 +n07591961 +n02979399 +n04144539 +n02416820 +n11769176 +n09743792 +n09732170 +n04972451 +n13918274 +n01847089 +n01859689 +n04208065 +n07617051 +n10674713 +n07914271 +n07887461 +n03736064 +n03644858 +n03878963 +n04040247 +n07891433 +n01611969 +n07587618 +n02689144 +n10049363 +n04059516 +n10313239 +n03115400 +n01519563 +n01533893 +n03850245 +n11733548 +n03372549 +n01884834 +n02839110 +n07887192 +n03617312 +n07886463 +n03103396 +n07764847 +n01855476 +n07808587 +n12858871 +n03632729 +n10209731 +n04141712 +n03978686 +n03225988 +n00475273 +n09224725 +n04966543 +n01322221 +n03649674 +n13154494 +n03948830 +n03320519 +n03723267 +n07869611 +n12342498 +n01827793 +n03145719 +n11821184 +n11956348 +n11857875 +n10339717 +n09450163 +n10756148 +n01591301 +n07915094 +n04422727 +n09719309 +n03349469 +n03389889 +n10718131 +n04298661 +n09747495 +n03676623 +n03547229 +n03062015 +n10734394 +n07817315 +n02852360 +n01850553 +n02952585 +n03587205 +n02009750 +n01540090 +n02947660 +n03656957 +n03378174 +n02508213 +n01572489 +n12008487 +n12185859 +n11691046 +n01323355 +n05262534 +n00448126 +n02432983 +n12038406 +n03883385 +n02411206 +n01643896 +n10159045 +n11675025 +n01803362 +n02009508 +n07920349 +n04098513 +n11617272 +n09913455 +n12390314 +n04171208 +n02995345 +n10634849 +n03173929 +n02749953 +n11845793 +n12796022 +n11955153 +n11816829 +n03032453 +n11984542 +n02992795 +n03712111 +n02873733 +n02759387 +n14915184 +n02381364 +n12686274 +n07857731 +n04518764 +n03010473 +n02418465 +n02359556 +n07894799 +n04104770 +n04335209 +n01848976 +n02006063 +n04454908 +n03002948 +n04220250 +n09923561 +n04102162 +n11958080 +n04598965 +n10173410 +n03067339 +n02003204 +n12686676 +n11986511 +n02311617 +n03367059 +n02761557 +n05578095 +n04041069 +n10575463 +n03325941 +n10082043 +n01806297 +n09691729 +n04593866 +n01813088 +n01625562 +n03906224 +n01652026 +n10236304 +n04102618 +n04321453 +n07820145 +n01575117 +n12788854 +n07823698 +n04206225 +n03216710 +n02421449 +n03343737 +n07560903 +n02872529 +n11989869 +n12071744 +n06278475 +n04492749 +n02920259 +n03798061 +n02420509 +n03316105 +n12052447 +n03974915 +n02904803 +n03430418 +n12291959 +n06892775 +n03875806 +n07903841 +n10282482 +n02683323 +n07862348 +n01849157 +n04469813 +n09944022 +n03342127 +n07592481 +n02936402 +n02405929 +n10002760 +n02537716 +n05259914 +n01560280 +n12694486 +n07879350 +n02377063 +n03637181 +n03409297 +n01607812 +n02808185 +n09239302 +n12055516 +n09712448 +n02859184 +n12772908 +n02735538 +n10333838 +n12336092 +n02386968 +n04613939 +n00452864 +n04535524 +n03174731 +n04189816 +n07607605 +n12909917 +n02387722 +n02960690 +n07715221 +n02407071 +n10667477 +n09398076 +n04236809 +n01904806 +n01610552 +n12373100 +n12771390 +n04122685 +n07804771 +n15102455 +n03469175 +n03746005 +n02536456 +n03505667 +n11816336 +n09376198 +n10572706 +n03464053 +n02869155 +n07816164 +n04969798 +n02942349 +n14820180 +n01623615 +n12676703 +n03369276 +n03650551 +n02010272 +n02976123 +n01852400 +n02196119 +n04132158 +n03238586 +n07639069 +n03313333 +n10542761 +n12215022 +n00455173 +n10019406 +n12899537 +n04277826 +n09906449 +n04549629 +n11508382 +n15090065 +n10289462 +n04540255 +n02723165 +n04335693 +n01536334 +n03107488 +n12782530 +n14785065 +n02974348 +n09874862 +n04479939 +n03309465 +n09902954 +n12092417 +n03425595 +n12433081 +n07806774 +n12462805 +n01314781 +n10192839 +n01622120 +n07807171 +n03261019 +n02843553 +n04287747 +n02324587 +n09915434 +n01818299 +n01592694 +n03826186 +n03607659 +n01527917 +n03628511 +n02005399 +n04204081 +n02052775 +n04403413 +n03914106 +n12811027 +n01872772 +n04555700 +n02004855 +n04602762 +n02713003 +n04406817 +n11934807 +n03336282 +n09684901 +n03836976 +n11959862 +n03062336 +n03506028 +n04503413 +n07819896 +n03205669 +n11902200 +n07685218 +n03046133 +n10261624 +n10303814 +n03676087 +n04023695 +n07587111 +n07764155 +n01504179 +n03794136 +n03389761 +n13901211 +n02784124 +n04488530 +n02807731 +n07898443 +n04981658 +n04177755 +n03649161 +n04125257 +n10135129 +n03653110 +n10560106 +n07735687 +n03511333 +n11960245 +n03301568 +n03878066 +n10746931 +n04223299 +n04237423 +n07888229 +n01819734 +n12312728 +n09981939 +n03727465 +n13882276 +n02993194 +n11971927 +n09713108 +n03581125 +n09718936 +n14698884 +n03005285 +n03540914 +n03359436 +n03934042 +n07569644 +n04964878 +n07890068 +n07580253 +n01538630 +n03132666 +n03259009 +n02796318 +n12703190 +n01464844 +n11792029 +n04270371 +n13102775 +n02933649 +n02387254 +n02890188 +n04335886 +n04358491 +n02786837 +n03885194 +n04001265 +n03438071 +n10375402 +n02997910 +n03326795 +n00470830 +n02734725 +n03494537 +n08376250 +n07743544 +n02991847 +n04246271 +n04156140 +n04381073 +n07732168 +n04951071 +n07977870 +n04334599 +n02838728 +n03326948 +n11723227 +n08182379 +n03686924 +n03821518 +n02382204 +n02080415 +n11788727 +n07732636 +n03860404 +n03898395 +n07867324 +n04392113 +n13237188 +n03263076 +n07843636 +n04968056 +n04397027 +n03320421 +n06267564 +n02880842 +n04115456 +n13862407 +n10289039 +n03128248 +n01457852 +n01536035 +n04579056 +n03937931 +n03036022 +n01804163 +n09913593 +n12841007 +n03115897 +n03256032 +n02475669 +n07924443 +n03061505 +n10001481 +n03600722 +n07842308 +n10696508 +n04215402 +n10588074 +n03614782 +n03995535 +n12091953 +n04113194 +n10092978 +n03011741 +n04381860 +n07819769 +n07905474 +n03288500 +n04225987 +n13223710 +n02879087 +n02920083 +n08640739 +n03362890 +n03996849 +n03849814 +n09694664 +n02407390 +n02910864 +n02388917 +n01668665 +n07616046 +n02932891 +n10553235 +n03652729 +n01615703 +n12801781 +n12164656 +n05302499 +n03801760 +n03332271 +n02901793 +n03941417 +n09833441 +n01623110 +n02807523 +n10598181 +n03725600 +n10368528 +n04116098 +n12719944 +n02045864 +n02173373 +n02811059 +n04479823 +n07816398 +n10572889 +n04142731 +n07687381 +n02799323 +n07865484 +n01858845 +n12684379 +n01842235 +n09242389 +n02028727 +n03527565 +n03438863 +n15019030 +n13907272 +n09659039 +n04251791 +n03683995 +n04137217 +n04389430 +n09785659 +n02016816 +n03124590 +n01859325 +n03138669 +n02999936 +n11926365 +n12686077 +n03517760 +n09734450 +n04563413 +n12074867 +n01564217 +n12521394 +n06267893 +n03594148 +n04139395 +n12369309 +n01544389 +n12048056 +n04524941 +n03016868 +n03653740 +n02795528 +n03687137 +n03766935 +n03361297 +n04263502 +n10043491 +n03446268 +n01994910 +n03891538 +n10091564 +n10226413 +n02755140 +n03500389 +n10237196 +n03625646 +n06596474 +n03360300 +n09730824 +n10732010 +n04469514 +n02904927 +n04961331 +n02936570 +n03680858 +n07585758 +n09199101 +n04050933 +n03712337 +n03911513 +n01556182 +n03102371 +n07928887 +n12133462 +n03974070 +n03971218 +n03292475 +n03425241 +n03440216 +n11995092 +n02894158 +n02918112 +n10568358 +n11524451 +n03169176 +n04100519 +n07588193 +n06883725 +n02860640 +n07762114 +n04082710 +n07896893 +n10167152 +n03287351 +n02788021 +n08494231 +n01560935 +n03249342 +n04564581 +n09349648 +n07704205 +n03510244 +n12127460 +n09945745 +n11719286 +n11613459 +n12656369 +n03824381 +n07655263 +n09894143 +n04964001 +n02161457 +n07654298 +n07930433 +n02979074 +n02026948 +n13914608 +n07611267 +n02843276 +n09827363 +n10259780 +n04432662 +n11715678 +n12388858 +n03057920 +n10465451 +n03855214 +n07728181 +n09835348 +n03549732 +n04589325 +n03491032 +n00452034 +n03948242 +n01456756 +n07921615 +n02809105 +n12889713 +n07586894 +n07734879 +n07905979 +n12847374 +n12129134 +n02122580 +n04028074 +n02911332 +n09251407 +n07697825 +n04597309 +n02800213 +n03480579 +n07621618 +n04170933 +n03743279 +n01916481 +n04037220 +n10748620 +n02708433 +n12007196 +n02561381 +n04103769 +n03030880 +n04413969 +n03911658 +n04590746 +n00476389 +n04331639 +n07725789 +n01792429 +n02949542 +n07686720 +n04064862 +n04447028 +n01713764 +n09854218 +n04032603 +n04405907 +n15093298 +n04385536 +n11954345 +n01560793 +n09249034 +n03784270 +n03436549 +n01324610 +n02379183 +n07616487 +n04119478 +n03309356 +n12865037 +n12850168 +n04250850 +n03024064 +n04412097 +n02982515 +n00450070 +n10175248 +n11847169 +n12276872 +n12870891 +n10229883 +n10505613 +n03482252 +n09300905 +n02919890 +n07617611 +n10283170 +n01607962 +n01671125 +n07894551 +n04561287 +n00005787 +n10025635 +n02850732 +n03732020 +n02036711 +n07907429 +n03797896 +n03004824 +n12011620 +n10300303 +n03105467 +n03767745 +n07868508 +n07868200 +n03788047 +n07886057 +n04559451 +n09845401 +n04373704 +n02676938 +n02565324 +n02667478 +n02122878 +n03244047 +n01747589 +n04320973 +n13205058 +n02379430 +n11959632 +n10183931 +n07683490 +n10055410 +n04370288 +n03273551 +n13900422 +n07899434 +n04053677 +n07740461 +n11879722 +n04282494 +n02981911 +n03449451 +n07581249 +n03965456 +n11808468 +n13881644 +n11725973 +n12091213 +n13193856 +n02873520 +n02754656 +n02431976 +n01324431 +n02385214 +n01888411 +n12680864 +n07731284 +n04337287 +n07631926 +n02549248 +n04395024 +n07585557 +n02776825 +n09460046 +n12023108 +n00475403 +n10098517 +n07902336 +n03683708 +n02412210 +n04397452 +n04583212 +n13869547 +n03632577 +n01616086 +n02763901 +n08256735 +n03015478 +n02084732 +n12178896 +n11966215 +n07605380 +n13869788 +n01847170 +n07744811 +n01854700 +n00444937 +n10422405 +n07801892 +n09688804 +n11879054 +n02802215 +n07908411 +n07822518 +n01558594 +n07935737 +n10730728 +n04436329 +n04294879 +n04972350 +n12911440 +n13886260 +n07578093 +n02537525 +n03703730 +n09607630 +n13865904 +n02360282 +n11731659 +n04126066 +n04212165 +n11618290 +n07588574 +n09269472 +n11896722 +n02892304 +n03487642 +n02028342 +n03321563 +n03135030 +n03522100 +n03253886 +n04095109 +n06470073 +n12603449 +n10644598 +n10260800 +n01535469 +n09696456 +n03553019 +n03963198 +n11918473 +n10314517 +n03002341 +n07574923 +n10421470 +n05716342 +n03244231 +n01730563 +n11691857 +n12807251 +n12345899 +n03142679 +n01531512 +n12307240 +n07835457 +n04535370 +n00451186 +n12481458 +n03434188 +n09734185 +n04578934 +n04167346 +n02747802 +n03459328 +n03301940 +n01562014 +n07690431 +n10642596 +n03696065 +n12781940 +n02759257 +n04392764 +n04218564 +n03499907 +n01536780 +n09751895 +n03235042 +n04570815 +n12070381 +n09448690 +n07625061 +n10178216 +n04560113 +n09457979 +n03858085 +n02421792 +n02944579 +n10085869 +n09718811 +n04103206 +n04239786 +n04501947 +n01321123 +n02390015 +n03964495 +n01554448 +n02925107 +n03028596 +n12483625 +n03227317 +n10701644 +n11968704 +n03900393 +n01851038 +n02276078 +n03132776 +n07585906 +n04480033 +n07880458 +n12887293 +n07921239 +n03307037 +n04595028 +n04244379 +n13131028 +n10313724 +n09436708 +n02694045 +n09941787 +n00449796 +n01817346 +n07928696 +n03401279 +n12901724 +n11646167 +n07682477 +n09415671 +n07900225 +n03607029 +n02692232 +n11834654 +n07935379 +n12437930 +n03762434 +n07922764 +n03595523 +n04546340 +n10686885 +n03516844 +n03767112 +n09896685 +n03859608 +n03149686 +n07920872 +n12388143 +n10406391 +n04233715 +n04373089 +n02023992 +n01947396 +n12115180 +n00479616 +n03962852 +n02392434 +n12414035 +n14976871 +n03201776 +n10665587 +n03600285 +n04402449 +n08539072 +n03629231 +n12860365 +n03488438 +n03337383 +n12455950 +n10384392 +n02953455 +n03101796 +n07919572 +n03233744 +n01578180 +n01756508 +n04556533 +n02962843 +n02882190 +n03731483 +n01850873 +n05260240 +n03111177 +n09836519 +n03030557 +n11789066 +n02788572 +n07903101 +n04067818 +n07840804 +n01567678 +n12427184 +n03333610 +n02416964 +n10607291 +n07936548 +n05451384 +n02968074 +n07605597 +n02704949 +n07609215 +n01951274 +n07696977 +n03180384 +n04303357 +n03291741 +n02207805 +n10123844 +n03420345 +n12384227 +n02758863 +n02047975 +n03978966 +n03549199 +n04275175 +n09294877 +n09836343 +n11970586 +n02010728 +n10369317 +n12681893 +n03192543 +n12413165 +n12174521 +n11916696 +n10042845 +n07822197 +n04968749 +n10323634 +n12849416 +n02814774 +n05538625 +n03078802 +n12230794 +n07726095 +n03051249 +n12005656 +n11876432 +n12164881 +n09711435 +n01622483 +n09896170 +n07684289 +n03368352 +n07910048 +n03159535 +n00466377 +n01541386 +n11647703 +n09752023 +n07903731 +n12249542 +n03794798 +n11786131 +n02852043 +n10493685 +n09846894 +n01752585 +n01536186 +n07618432 +n09859152 +n02065026 +n02382635 +n07867616 +n03885788 +n04255586 +n03275681 +n11961100 +n12485981 +n04495698 +n03293741 +n13902048 +n03254862 +n07903962 +n01594787 +n11962272 +n03284886 +n07842202 +n10157128 +n02405302 +n04443766 +n06266633 +n02519862 +n01487506 +n03373943 +n04247876 +n04327204 +n03349771 +n09260907 +n10092794 +n12223764 +n03504723 +n11926833 +n01820052 +n13032381 +n03889871 +n03209359 +n04608923 +n15093137 +n15091304 +n03688405 +n09905185 +n03543112 +n11611356 +n03885028 +n03234164 +n07594066 +n02396014 +n03456186 +n09874725 +n11601333 +n02917521 +n03055857 +n02804123 +n12352844 +n12866002 +n09858165 +n12037691 +n02565072 +n04477387 +n02008643 +n07867021 +n04119360 +n09893191 +n02944146 +n12435649 +n13197274 +n04974859 +n07751004 +n12003696 +n02762508 +n02680512 +n01743086 +n06998748 +n10607478 +n07613815 +n01559477 +n01859852 +n03239054 +n04466871 +n05263183 +n13173882 +n07897438 +n12427757 +n04400737 +n03291963 +n07682808 +n11692265 +n04130143 +n09445289 +n07696839 +n03835197 +n12821895 +n09734639 +n03365374 +n04305210 +n04962240 +n09871867 +n07897750 +n07616386 +n09443281 +n03641569 +n13882563 +n07680761 +n10498816 +n04034262 +n03533014 +n07928790 +n07690152 +n10060352 +n04124370 +n12453186 +n04509171 +n03013580 +n10604979 +n12515711 +n04971211 +n07693223 +n03786715 +n07894703 +n02761834 +n04232800 +n03437741 +n04045644 +n14976759 +n03042697 +n12557681 +n06275095 +n11678010 +n01586941 +n07684517 +n07822845 +n03483823 +n09951616 +n03180865 +n07861557 +n03644378 +n12848499 +n11962667 +n03886762 +n04238128 +n11979964 +n13915113 +n12791329 +n12457091 +n03341153 +n10267865 +n03484576 +n10186216 +n07612137 +n03843438 +n11807525 +n11931540 +n02027897 +n07614730 +n04116294 +n03469903 +n10017272 +n03688605 +n07860103 +n03981566 +n01888045 +n03345837 +n11998888 +n02071636 +n02726017 +n04310157 +n04607869 +n01622959 +n08524735 +n03119203 +n12031927 +n03610524 +n02807616 +n04056180 +n03233905 +n03374473 +n14810561 +n11944954 +n03121431 +n09750891 +n08505018 +n10727171 +n12357485 +n12571781 +n12067193 +n07586604 +n02086753 +n03548086 +n02560110 +n07804900 +n02880393 +n04208427 +n12931542 +n01594968 +n05218119 +n03520493 +n03727605 +n12687698 +n03612965 +n04135315 +n07730320 +n10540114 +n07599911 +n01323493 +n02115096 +n04590263 +n12043836 +n02861387 +n09836786 +n04966941 +n02816768 +n13131618 +n10701962 +n02919792 +n03442597 +n04325041 +n03333129 +n04091693 +n04950952 +n10631309 +n04177931 +n13234678 +n01970667 +n07748416 +n07893642 +n07691650 +n03660909 +n04145863 +n11945514 +n10334009 +n12336973 +n03954393 +n04558478 +n09899929 +n03487533 +n07816575 +n07877187 +n07863547 +n01603812 +n02098906 +n04973585 +n03674440 +n04371050 +n12243109 +n07871234 +n02928049 +n07574504 +n07889274 +n12141167 +n04543996 +n03080633 +n03423479 +n07879659 +n04380916 +n10514429 +n07584423 +n04009801 +n12479537 +n07606538 +n07698543 +n12353754 +n10132035 +n03367545 +n04245508 +n09811852 +n02024763 +n04052442 +n10120330 +n12352639 +n12606438 +n07752966 +n09772930 +n02535759 +n11737534 +n10345015 +n12427566 +n09705784 +n04112654 +n02985963 +n03758089 +n12953484 +n07906572 +n02881757 +n12739332 +n03718458 +n03407865 +n07775050 +n03210552 +n09452395 +n09789566 +n10566072 +n10559996 +n07826930 +n12414932 +n01887474 +n03026907 +n07751148 +n10223177 +n03957420 +n03788601 +n12244819 +n12421137 +n04266162 +n10038409 +n02981024 +n03228967 +n11825351 +n12058822 +n11963932 +n03041449 +n03046029 +n07590502 +n02932523 +n02152881 +n04970398 +n07887967 +n12812478 +n12421917 +n02708711 +n11870747 +n04290507 +n07934282 +n01608265 +n12070583 +n03205574 +n02305085 +n07866015 +n02960903 +n10098624 +n00481803 +n07938007 +n02693246 +n03923379 +n04103665 +n11792742 +n12489815 +n04971313 +n01668892 +n01055165 +n03215508 +n12104501 +n07899292 +n12822955 +n07713074 +n03842012 +n02449350 +n07868955 +n02835829 +n12283542 +n04525584 +n07910656 +n11625003 +n03987266 +n02805983 +n15091846 +n09736945 +n04973816 +n02439398 +n01519873 +n07899003 +n03019938 +n07582152 +n01885498 +n12108871 +n02934451 +n04327682 +n07696625 +n09750770 +n12084890 +n03960374 +n07585107 +n01570839 +n11905392 +n06277135 +n07842044 +n03751269 +n04398951 +n12861892 +n12649539 +n07596967 +n07580592 +n12845413 +n07690739 +n07804657 +n04334105 +n03779128 +n03268918 +n03066359 +n02744323 +n12596148 +n04272389 +n07832416 +n10210911 +n01548865 +n03221351 +n15091669 +n07878926 +n07607967 +n12171966 +n02846141 +n07576781 +n02922292 +n10092643 +n01732614 +n02578771 +n02864593 +n03537241 +n09635534 +n03268645 +n07852833 +n13873917 +n12640839 +n03506727 +n10536416 +n09976429 +n10692482 +n07600285 +n04156946 +n07818689 +n02605703 +n02710429 +n02890351 +n03408054 +n03121298 +n02731629 +n12450840 +n04061681 +n10153414 +n07648913 +n07891309 +n01562265 +n14973585 +n01610226 +n06267991 +n03302938 +n07822323 +n07826091 +n02764398 +n10406266 +n09282208 +n01734104 +n04283096 +n03530910 +n11542137 +n02610664 +n03856012 +n01531811 +n07862611 +n11625632 +n12643313 +n02469248 +n03333711 +n02907082 +n02122430 +n01559804 +n09744161 +n10187990 +n12015525 +n07844867 +n07887304 +n02878425 +n02009380 +n11448153 +n10655594 +n12566954 +n11901977 +n03999160 +n02389779 +n07928488 +n12785889 +n04281375 +n03745146 +n03224603 +n04594828 +n12835331 +n09715427 +n11615026 +n09972010 +n04038231 +n02379329 +n03445326 +n10753442 +n04249882 +n11727738 +n07866723 +n04282992 +n11621281 +n01566645 +n03919430 +n11980682 +n03480719 +n11625804 +n10467395 +n09436444 +n07867751 +n03684611 +n03788498 +n12062626 +n07808904 +n07690585 +n03865557 +n10711766 +n10465831 +n04380255 +n12166128 +n04432203 +n07892418 +n10432441 +n12991184 +n04209613 +n04459773 +n09666883 +n07807472 +n09873899 +n12939874 +n04545748 +n09637339 +n07919441 +n03987376 +n03645577 +n03437430 +n10671613 +n02964843 +n09707289 +n11700058 +n03877351 +n03518445 +n07643200 +n02140049 +n12683791 +n12418221 +n04154152 +n03397947 +n03238131 +n11851839 +n04545858 +n07744682 +n02995871 +n07593199 +n03543394 +n10293332 +n12658481 +n11599324 +n02705201 +n03920867 +n08249459 +n02876084 +n03937835 +n01397871 +n03849679 +n12016567 +n04208936 +n07696728 +n13148208 +n01904029 +n08659861 +n07878785 +n07827130 +n03390983 +n02624807 +n03319745 +n03994614 +n00446493 +n12477583 +n02920658 +n04602956 +n02688273 +n07577538 +n04350581 +n09283405 +n04074185 +n04495843 +n03538179 +n03454885 +n03878211 +n10308168 +n08518171 +n02660208 +n07904760 +n07928367 +n10174445 +n02137015 +n02863426 +n07700003 +n04015908 +n03946076 +n11725821 +n01794344 +n04364160 +n01663782 +n04283255 +n02822064 +n04406239 +n02782681 +n11990313 +n03563460 +n02957008 +n07889814 +n07896060 +n03683079 +n04278447 +n13011595 +n11810358 +n03836451 +n12827537 +n03545470 +n03213538 +n07929351 +n03471190 +n02882301 +n03625943 +n03397087 +n11955896 +n04097373 +n03145522 +n03034405 +n02889646 +n02928299 +n09652149 +n01641391 +n04593524 +n07651025 +n03719343 +n03884778 +n03452594 +n02174659 +n12345280 +n03039827 +n03309687 +n11635433 +n02057330 +n01664990 +n09779790 +n02011016 +n09689958 +n07770763 +n03010915 +n03443912 +n02946509 +n13050397 +n03031012 +n04217546 +n04124202 +n12766869 +n04177041 +n12050533 +n03251932 +n03086580 +n03918737 +n04386792 +n03176594 +n01577035 +n01669654 +n01818832 +n10441962 +n03885904 +n03724756 +n02925666 +n03549589 +n03062122 +n02828427 +n12604228 +n03624400 +n07725888 +n03873699 +n01503976 +n02887079 +n03610098 +n02940385 +n04610013 +n03652100 +n04496872 +n04008385 +n02583890 +n10476467 +n03395514 +n03306385 +n04228581 +n02389261 +n12576323 +n01579149 +n01623425 +n02593019 +n03995265 +n02124484 +n12745386 +n04355267 +n02643836 +n01614343 +n03810952 +n04058594 +n12278650 +n03474779 +n02823510 +n00442437 +n12039317 +n04574067 +n03762602 +n02153109 +n03518943 +n04289827 +n02288268 +n07749969 +n04132985 +n03213826 +n04307986 +n03567066 +n02049088 +n04408871 +n03522003 +n09305898 +n04266375 +n08571898 +n03039259 +n01587526 +n03261603 +n00464277 +n02627532 +n02992368 +n03640850 +n03037404 +n04525191 +n02106854 +n07772147 +n04173511 +n12761284 +n03257210 +n02813544 +n07740342 +n04066270 +n03070059 +n03616428 +n02904233 +n03209910 +n04389854 +n03078995 +n03193260 +n01488038 +n01754533 +n12629305 +n02055107 +n11664418 +n04228693 +n03353951 +n03440682 +n03025250 +n03300216 +n02042046 +n04226826 +n03342015 +n03090000 +n02050313 +n03492250 +n01535690 +n01572654 +n03465718 +n02879309 +n06278338 +n04113406 +n03695857 +n09720256 +n01860002 +n02851939 +n09828216 +n02564270 +n03528901 +n02542432 +n11978961 +n01670802 +n03956623 +n01612275 +n09376786 +n03222318 +n02813645 +n02213543 +n13898207 +n03616763 +n03616979 +n11904109 +n04212282 +n04608435 +n02042472 +n04198453 +n03216402 +n02015357 +n12282737 +n02699629 +n12866635 +n02048353 +n02933340 +n01793715 +n12001707 +n02878222 +n03187037 +n03105306 +n04080705 +n04254009 +n01623880 +n02839592 +n03436182 +n01591123 +n01318279 +n03002816 +n13155095 +n03141702 +n03775388 +n12165170 +n03322836 +n03259401 +n04471148 +n03911767 +n12585629 +n04317325 +n04257986 +n03133050 +n02035210 +n12891305 +n11882426 +n04491388 +n12948251 +n03498781 +n04262161 +n03775636 +n09915651 +n07584332 +n07852614 +n11626152 +n03901750 +n09723067 +n04265904 +n09920283 +n02397744 +n03253796 +n07712959 +n03898129 +n01743936 +n02075612 +n04560292 +n03479397 +n04334365 +n04357121 +n10145902 +n03844673 +n09854421 +n12687957 +n12598027 +n03944138 +n01839750 +n07722888 +n04258859 +n03088389 +n03351434 +n03509608 +n01677747 +n03145147 +n12046815 +n03505133 +n01629962 +n03333252 +n03993703 +n02962061 +n04529962 +n03463666 +n07681691 +n12160857 +n04187233 +n09331251 +n11614713 +n04376400 +n12301445 +n12633994 +n03883524 +n11614420 +n13062421 +n03645011 +n03293863 +n11640132 +n02579928 +n02854739 +n04461437 +n07729384 +n02977936 +n02836392 +n03593122 +n01666228 +n07820683 +n07568502 +n11910460 +n09348460 +n09712324 +n02403740 +n03482877 +n04370774 +n07750146 +n12992177 +n03152303 +n04134008 +n09805324 +n01611800 +n04374315 +n07586099 +n02032222 +n01979874 +n04350769 +n02907873 +n03016609 +n02543565 +n03256166 +n03016737 +n02419336 +n03268790 +n03559999 +n07765999 +n04607035 +n02416104 +n02123917 +n12484784 +n03225108 +n10739391 +n03506880 +n02918831 +n03045228 +n12516828 +n01314663 +n04172342 +n02768226 +n12368028 +n01500476 +n01558149 +n03604156 +n04035912 +n02359915 +n12261571 +n03875955 +n01887623 +n03871371 +n03390786 +n12494794 +n03826039 +n04465358 +n03838298 +n03165466 +n04229737 +n01321770 +n04354026 +n02998003 +n04114844 +n10611613 +n03600475 +n01909906 +n00466880 +n04284869 +n07722485 +n04496614 +n03298716 +n02285801 +n04081699 +n07765208 +n12659539 +n11618525 +n11757653 +n07727048 +n03913343 +n12070016 +n02697675 +n04284572 +n02595702 +n04482297 +n03516996 +n03704549 +n02040266 +n04476116 +n01323261 +n03823216 +n07696403 +n03226880 +n09734535 +n03950537 +n01671479 +n03049924 +n12593994 +n04568841 +n03604400 +n01837072 +n01754370 +n03122202 +n12338454 +n04094720 +n04150980 +n03429682 +n03884926 +n03378005 +n02434954 +n03461288 +n02893692 +n04472563 +n10472129 +n04590021 +n07739344 +n04162433 +n03395859 +n12059314 +n03498662 +n03678729 +n02927764 +n02770211 +n11710393 +n07730207 +n04178190 +n07772935 +n03801880 +n04414675 +n12729521 +n12203529 +n04122578 +n04575824 +n06267655 +n03698360 +n02804515 +n02431337 +n08598568 +n02893608 +n02270623 +n00479440 +n11616662 +n02884994 +n04305323 +n02407625 +n04476831 +n04222307 +n03179910 +n11623967 +n00446311 +n00454983 +n02886434 +n12279458 +n03723781 +n11816121 +n02403231 +n11808299 +n07816296 +n03219483 +n02657694 +n00453478 +n02816656 +n02625851 +n04112752 +n03339529 +n12171316 +n02044517 +n04137773 +n01486838 +n03015149 +n12911673 +n03967270 +n03498441 +n11672269 +n03386870 +n11615967 +n02580679 +n01681653 +n02793199 +n02824319 +n10727458 +n02555863 +n01533000 +n02175916 +n12064389 +n04383015 +n02469472 +n03101664 +n03623338 +n12295796 +n02869249 +n01792042 +n03447075 +n04453390 +n04382438 +n04112252 +n03332393 +n12729729 +n01851207 +n04269270 +n12333771 +n06272612 +n03135532 +n02927887 +n11711537 +n12301180 +n04107743 +n01813948 +n03282295 +n09714694 +n00483409 +n01504344 +n04279353 +n04040373 +n12658308 +n04134523 +n10104064 +n12056601 +n04525417 +n07819166 +n12263038 +n02072798 +n03125057 +n03367410 +n04000592 +n03549897 +n01877606 +n01564914 +n12307076 +n02855925 +n03176763 +n12271933 +n04121728 +n07690511 +n02825442 +n04442441 +n01630901 +n03088580 +n02499808 +n10675010 +n01531971 +n02273392 +n01526521 +n01531344 +n03667664 +n02888270 +n04412416 +n07733394 +n04559910 +n04105704 +n11792341 +n04201064 +n01693175 +n04555291 +n02908773 +n01976868 +n03529175 +n03365231 +n03622839 +n04258333 +n03327133 +n03425769 +n12477747 +n03718935 +n11727540 +n07933799 +n03030262 +n12043673 +n02619550 +n07937461 +n12198286 +n08560295 +n12402348 +n01733957 +n12344700 +n02763604 +n11925303 +n01557962 +n03927299 +n11611758 +n03035252 +n09454412 +n04004990 +n03456299 +n02175569 +n03668279 +n12352990 +n03507241 +n01534155 +n12278371 +n02499022 +n03822767 +n01318381 +n04024983 +n04277493 +n11934616 +n02027075 +n11611561 +n03454442 +n02236355 +n01732789 +n07722052 +n01489501 +n04409625 +n10563403 +n01817263 +n07757511 +n03770316 +n02977438 +n01840775 +n03607923 +n03322704 +n02375302 +n01614038 +n01646555 +n03952576 +n02946824 +n12847008 +n03016389 +n11809594 +n03165096 +n03839671 +n02687821 +n01689081 +n03822656 +n02597608 +n12336727 +n01579578 +n03631922 +n03904909 +n11658331 +n04224543 +n12621410 +n03870672 +n04252331 +n09720842 +n01396048 +n11988596 +n00483205 +n02871005 +n01597022 +n02382039 +n07743902 +n02358890 +n07877961 +n05263448 +n01862399 +n04136800 +n10624540 +n11990167 +n02731398 +n03366974 +n03490006 +n01561732 +n02626265 +n10627252 +n12402051 +n08517676 +n10488656 +n03099274 +n03718581 +n11806219 +n01830042 +n07728585 +n03732114 +n10755080 +n03359285 +n07720277 +n03354207 +n01596273 +n04416005 +n01847253 +n07733567 +n09725653 +n04274985 +n00449977 +n07772274 +n12063639 +n01530439 +n01322508 +n04397768 +n07273802 +n04261281 +n10524076 +n01678343 +n03410938 +n01797020 +n02388832 +n07719616 +n03639497 +n09787765 +n07721018 +n11818069 +n04185529 +n11644462 +n12074408 +n00483848 +n01583495 +n11891175 +n03347617 +n03308481 +n02535258 +n07750872 +n07748157 +n02855701 +n04584373 +n02461830 +n02912557 +n12277578 +n03604311 +n03643253 +n03031152 +n04039742 +n03435743 +n13908201 +n04150153 +n03250405 +n01410457 +n02357401 +n12588780 +n12729315 +n01690149 +n02538216 +n03171228 +n02424909 +n06274760 +n03775747 +n04211857 +n12429352 +n12272239 +n11759853 +n03401129 +n12649317 +n02625258 +n12651611 +n03603442 +n02803934 +n03861271 +n02605936 +n02018368 +n12711984 +n02811936 +n04612026 +n01339471 +n02923682 +n09194227 +n04346157 +n03939178 +n12635532 +n01593028 +n01793249 +n02380464 +n12400720 +n07708398 +n12020941 +n12492106 +n12850336 +n12749679 +n02892948 +n12591017 +n03193423 +n01791463 +n11979527 +n12134025 +n12167075 +n09308743 +n13108545 +n01618503 +n07827284 +n07724492 +n02338145 +n04533946 +n01586020 +n07598256 +n01603953 +n12646740 +n03067518 +n04046277 +n01532511 +n07769584 +n11644046 +n12753573 +n02681392 +n08492461 +n07749446 +n04409384 +n01791954 +n12330891 +n04560882 +n10145480 +n04250473 +n02655848 +n02903126 +n11736851 +n11901294 +n12865824 +n03870105 +n00449892 +n04240752 +n11851258 +n04200537 +n12049562 +n01521399 +n03565830 +n07860447 +n03067212 +n01664674 +n07561590 +n02727141 +n02324514 +n02372952 +n01584853 +n07766173 +n11811706 +n03097362 +n04200258 +n02732572 +n01853195 +n12282527 +n09838621 +n02764505 +n04256891 +n12337617 +n12635955 +n07831267 +n11628793 +n12316572 +n07807834 +n02037869 +n01821869 +n02820556 +n04517211 +n01839086 +n03842986 +n07698401 +n02386224 +n07841800 +n01830915 +n11616486 +n11902389 +n03427202 +n12727101 +n01851573 +n02125494 +n07746186 +n11628087 +n07746551 +n03943115 +n11892029 +n02861022 +n11733312 +n01852329 +n09392402 +n12336224 +n07887099 +n03403643 +n04414199 +n07895100 +n02264232 +n02317781 +n07823460 +n07755929 +n02524202 +n04324297 +n11627512 +n01585715 +n02922578 +n00479887 +n02687423 +n02416880 +n11784126 +n12073991 +n01853870 +n01561452 +n04187970 +n10300154 +n02520147 +n12294124 +n07743224 +n12066018 +n11634736 +n02041678 +n11626585 +n02386141 +n03986949 +n07860331 +n12356023 +n12072722 +n03082280 +n12083113 +n12979829 +n01448594 +n03007444 +n07858978 +n01641739 +n02043333 +n12020736 +n02751215 +n04528079 +n01538200 +n07925608 +n12091550 +n03742019 +n03518305 +n01642539 +n03414029 +n04363991 +n03767966 +n02596067 +n01586374 +n02885882 +n04080138 +n11617631 +n02033779 +n09451237 +n02310585 +n12648045 +n03955489 +n01752736 +n07899899 +n02299505 +n01579410 +n02156871 +n02998841 +n03759661 +n02050809 +n02683454 +n11621950 +n02910145 +n04967801 +n07896661 +n11906917 +n12275675 +n11611233 +n07736692 +n02312640 +n12588320 +n04399537 +n12757303 +n04197781 +n12717224 +n11635152 +n03122295 +n01792955 +n13133932 +n02518324 +n01584695 +n02915904 +n02967294 +n04345201 +n03019434 +n02470238 +n03049782 +n03101517 +n12709688 +n03716887 +n02422391 +n12638753 +n00288384 +n02162561 +n02053584 +n01317294 +n03334291 +n07814634 +n12273768 +n12406715 +n11644226 +n01646802 +n03460147 +n12338796 +n01972541 +n02147947 +n03890093 +n04127395 +n01581984 +n01681328 +n02213239 +n04582869 +n03254189 +n03274265 +n03186285 +n11839823 +n01624833 +n09792969 +n07891189 +n12023726 +n07619208 +n03466600 +n01849676 +n12190869 +n03079136 +n12317296 +n13001930 +n00477639 +n02944459 +n03903733 +n04131208 +n12710295 +n12180885 +n11612349 +n03443149 +n03982331 +n04264765 +n12642090 +n03237416 +n13868944 +n04046400 +n11705171 +n11979715 +n12597134 +n01609956 +n01568294 +n01469103 +n00443692 +n01606672 +n04556408 +n07690019 +n03977592 +n03358726 +n12696492 +n01573240 +n11632619 +n01772664 +n03453231 +n04179712 +n03646020 +n01812662 +n04306592 +n07724654 +n13908580 +n02903852 +n04284438 +n13132656 +n04317063 +n07829248 +n01589718 +n02654745 +n12294331 +n12515925 +n07900825 +n07721195 +n04189282 +n11907689 +n01624537 +n12333530 +n07762244 +n11757851 +n01599159 +n04038338 +n01568892 +n12691661 +n09744834 +n04307767 +n03120778 +n07920540 +n03781683 +n04185804 +n12080820 +n04354182 +n07574426 +n02579303 +n03046802 +n12078172 +n03210245 +n01614556 +n02304432 +n07713267 +n09724656 +n02861147 +n12755387 +n01483830 +n12921868 +n12026018 +n07817871 +n12062781 +n04241573 +n11621727 +n03376159 +n11815721 +n13007034 +n03540090 +n00450866 +n11619455 +n01528845 +n01568720 +n12743352 +n02871314 +n03606251 +n01490670 +n04246060 +n02053425 +n10780284 +n01915700 +n04510706 +n00456465 +n01563945 +n11809094 +n09855433 +n04112579 +n03855333 +n09809925 +n03413684 +n02123478 +n12070712 +n03651843 +n02032355 +n01591005 +n01646648 +n02752615 +n02415829 +n03283221 +n04368496 +n01573360 +n02321170 +n10348526 +n04446844 +n07763792 +n12077944 +n04431025 +n02895438 +n10082687 +n07714188 +n02262449 +n03090172 +n12491017 +n01558461 +n12754781 +n04070415 +n04297098 +n03424862 +n01970164 +n09833536 +n01793435 +n01670535 +n09894445 +n09676247 +n01548492 +n12501202 +n03250089 +n03358380 +n02578928 +n12020184 +n02301935 +n03393017 +n12340755 +n01849863 +n01748906 +n03075946 +n01810268 +n01984245 +n04555400 +n12286988 +n04097760 +n02050586 +n12104238 +n01679962 +n02709101 +n01569060 +n12790430 +n01757901 +n13199717 +n11815918 +n07827410 +n02970534 +n12942572 +n07924276 +n04103918 +n11704093 +n07908647 +n07601686 +n12172906 +n04084889 +n02381261 +n02299157 +n11978713 +n12460957 +n02963503 +n03272810 +n12469517 +n03443005 +n01797307 +n02952237 +n11908549 +n13912540 +n03428226 +n10276477 +n01757343 +n01443243 +n01607600 +n03580518 +n12709103 +n07579688 +n04329834 +n12710415 +n11808932 +n10583790 +n02213788 +n11622184 +n12596709 +n02216211 +n07721942 +n07765361 +n01848453 +n11724109 +n02028451 +n02935017 +n12046028 +n10629939 +n00441073 +n07900958 +n12451399 +n02823964 +n04210120 +n01848840 +n10485883 +n07767709 +n02432704 +n11622591 +n03210372 +n07848196 +n11992806 +n02953197 +n07620689 +n01521756 +n03571625 +n03158186 +n12647560 +n02065407 +n01572782 +n09890749 +n05581932 +n07754451 +n03350204 +n13044375 +n12294723 +n12482893 +n04434531 +n12989938 +n12196336 +n01701859 +n07746334 +n11941924 +n02047411 +n12650379 +n10486166 +n01599556 +n01567879 +n12675876 +n01682435 +n02043808 +n12362668 +n12306089 +n02999138 +n01679626 +n03557270 +n01546039 +n11901759 +n01549053 +n11883328 +n06596727 +n03193107 +n11612018 +n03300443 +n03612010 +n03668488 +n12648888 +n01448291 +n11632167 +n10262445 +n09742101 +n09717233 +n04299370 +n03094159 +n04536595 +n03514693 +n02029706 +n02886321 +n07816052 +n04045255 +n01851731 +n02627292 +n01841288 +n02739889 +n02932693 +n03784896 +n04569063 +n07902799 +n03863108 +n02607470 +n13200651 +n07916183 +n01573898 +n04347119 +n10076604 +n13033577 +n01824035 +n03630262 +n04426316 +n03064250 +n12262018 +n12048399 +n12279772 +n04143140 +n07829331 +n12891643 +n01826680 +n12646605 +n13103877 +n02023855 +n03086868 +n04163530 +n03736470 +n04358117 +n13872822 +n03159640 +n01680655 +n11611087 +n03980478 +n02978478 +n01555004 +n12402840 +n07763987 +n04387706 +n04979002 +n03258330 +n09856671 +n11624192 +n01538059 +n02003839 +n12552309 +n10469874 +n01576076 +n03643149 +n04419868 +n04586581 +n00483508 +n03131967 +n01847407 +n07929172 +n09683757 +n03786621 +n04369282 +n12733870 +n11612575 +n11619227 +n03301833 +n02176439 +n01569971 +n07935043 +n02563792 +n02051059 +n04482177 +n11859472 +n11710136 +n04115144 +n07864934 +n07691758 +n02620167 +n07748276 +n03415486 +n07835921 +n00452152 +n01848323 +n12906214 +n12075010 +n01563449 +n01499396 +n01570267 +n12047345 +n07920989 +n07601572 +n02683558 +n04428634 +n04345028 +n12161969 +n03460040 +n02561514 +n02006364 +n03582959 +n11812910 +n13185269 +n04297847 +n07896165 +n01552813 +n12361946 +n02031585 +n12766595 +n11622368 +n11695599 +n11615387 +n02509197 +n12409470 +n01314388 +n11758799 +n09846469 +n02675219 +n04253057 +n04041243 +n12276628 +n04381724 +n01855188 +n02203152 +n04403925 +n11895092 +n11924849 +n04172904 +n11888800 +n01546506 +n07906718 +n01489920 +n03436417 +n03615655 +n07765073 +n02434190 +n02004492 +n12282235 +n12406488 +n11981192 +n10373390 +n13183056 +n04332074 +n12818346 +n07731006 +n02598573 +n02438580 +n01957335 +n03356982 +n10288964 +n02629230 +n02042759 +n12319414 +n01451426 +n03521675 +n02016066 +n01813532 +n13207335 +n11805544 +n04401828 +n02952109 +n03963294 +n10013811 +n12058630 +n01551711 +n01574560 +n01858780 +n10093818 +n03858183 +n01550172 +n03571280 +n02309242 +n10258786 +n01569423 +n10134178 +n08578517 +n04445327 +n03250279 +n02584449 +n03223553 +n04523831 +n04485423 +n02050442 +n04474035 +n04528968 +n02649546 +n01913166 +n09971273 +n04517408 +n02437482 +n03824713 +n03778817 +n07643026 +n01613177 +n12022054 +n07714448 +n07592768 +n00454493 +n03296328 +n02305929 +n03084834 +n03698815 +n12093600 +n08649711 +n03466493 +n04067658 +n03041114 +n03514451 +n01491006 +n04178329 +n03790953 +n03938401 +n02048115 +n07768858 +n03273740 +n10333601 +n05418717 +n12754003 +n02098806 +n03314608 +n01565930 +n12113195 +n12284821 +n12483427 +n04332580 +n10382710 +n03416094 +n02837887 +n03917198 +n14131950 +n04414476 +n11861641 +n11903671 +n01841441 +n09872066 +n01806467 +n04964799 +n00467320 +n01595974 +n03220692 +n01339083 +n01825278 +n11727358 +n04518343 +n11984144 +n07724269 +n02292692 +n02324850 +n01753032 +n01624115 +n11816649 +n07930062 +n02460451 +n12319204 +n04340521 +n12325234 +n01541102 +n02979836 +n00141669 +n01822300 +n11658544 +n12272883 +n03334382 +n11726707 +n03639077 +n07904934 +n03516367 +n03698723 +n03553248 +n11812094 +n03724417 +n01540566 +n02341974 +n11819912 +n07734555 +n02987379 +n03580845 +n12546962 +n02548247 +n12753245 +n07768423 +n12849279 +n11617090 +n02912894 +n07840027 +n12295033 +n12703383 +n02696165 +n10419785 +n04426427 +n03694639 +n11712282 +n04142999 +n01597737 +n03801533 +n01495493 +n07774719 +n03267113 +n01742821 +n03859170 +n03416640 +n03320959 +n12733218 +n02017725 +n13229543 +n09344324 +n04965451 +n01490112 +n10069296 +n12084555 +n04554406 +n04086446 +n02976249 +n02656032 +n02424486 +n02381609 +n09934337 +n04573937 +n07685399 +n02800497 +n02905152 +n02951703 +n07760153 +n03609397 +n00447463 +n03680512 +n02046939 +n03288886 +n11870418 +n03386544 +n07767171 +n07847453 +n12687044 +n01664492 +n03099147 +n03463381 +n02125081 +n12920204 +n03517647 +n02603540 +n12267411 +n11933546 +n11947802 +n04387095 +n12975804 +n02973904 +n13195341 +n04048441 +n11753143 +n03212114 +n03298858 +n04366116 +n01424420 +n10450161 +n01442972 +n07877299 +n04503593 +n04349306 +n12969425 +n12597466 +n03092656 +n07914995 +n03487886 +n12223569 +n01756733 +n13919919 +n04175147 +n02029087 +n03530511 +n02425887 +n03572107 +n03927539 +n03383099 +n04130907 +n01632601 +n07823105 +n10378026 +n02382850 +n07613266 +n03235180 +n02810782 +n12708654 +n11636835 +n02823124 +n03402941 +n12121610 +n03715114 +n04052658 +n00480366 +n12493208 +n04255163 +n12145477 +n01489709 +n12402596 +n01598074 +n03837606 +n02628062 +n04103364 +n03247083 +n02032480 +n07736256 +n12578916 +n09218315 +n02218371 +n03730334 +n02080146 +n03836906 +n02868638 +n02198859 +n12744387 +n02942460 +n11754893 +n12274358 +n02725872 +n09218494 +n03942920 +n07574780 +n02921756 +n01757115 +n02763306 +n11758122 +n10508141 +n02303284 +n04083800 +n13879049 +n12765115 +n12075830 +n02666943 +n11980318 +n07907037 +n12794135 +n02333909 +n03870980 +n07586718 +n11923174 +n10782471 +n01493146 +n12294871 +n11726269 +n12932173 +n07825972 +n12732009 +n03572321 +n07682197 +n03423306 +n12495895 +n03545756 +n03557692 +n03785237 +n07902937 +n09899671 +n12061614 +n07902443 +n01449374 +n12632335 +n03474896 +n03539433 +n04310904 +n03902482 +n12006930 +n03285578 +n04200000 +n03912218 +n07821260 +n03548626 +n03223686 +n11826198 +n03165616 +n02104280 +n09981278 +n09382099 +n03732458 +n03987990 +n09946814 +n12270741 +n07737745 +n04172776 +n10189278 +n03543012 +n12629666 +n02180875 +n04087432 +n12961879 +n03321954 +n12528549 +n02424085 +n09843443 +n03846677 +n12304703 +n09873473 +n03410571 +n03041810 +n02425228 +n01562451 +n03615790 +n10081204 +n03985881 +n07842130 +n02890513 +n03649797 +n02381004 +n12560621 +n12523475 +n07687626 +n11905749 +n11759404 +n12905412 +n03542605 +n03983612 +n12573474 +n11972291 +n03767459 +n02698634 +n12713866 +n13084834 +n02202006 +n13108323 +n02631475 +n10737103 +n03637898 +n03069752 +n12400489 +n09692915 +n10242328 +n02794664 +n12465557 +n12085267 +n03348868 +n12754981 +n02745611 +n10504206 +n12073554 +n02835724 +n04605572 +n02825961 +n03528523 +n12116429 +n02973805 +n12708941 +n01544704 +n04180229 +n09403211 +n08242223 +n02146371 +n12127768 +n09770359 +n03295246 +n01757677 +n04385799 +n02584145 +n07909593 +n12587132 +n13029326 +n04184316 +n07903643 +n01848555 +n10750031 +n02332156 +n12703557 +n03196990 +n12406902 +n02768973 +n12416073 +n02147591 +n09724533 +n09693982 +n12687462 +n01982068 +n03435991 +n03272125 +n07713763 +n03018712 +n03648431 +n03336575 +n07854184 +n12806015 +n07879174 +n03984643 +n03147280 +n02699915 +n07617708 +n01533651 +n12483841 +n01697611 +n02576906 +n03724066 +n03935116 +n09782397 +n01599269 +n10672371 +n12066630 +n03178674 +n15086247 +n03523987 +n02826068 +n12580654 +n02358390 +n01647640 +n10259997 +n03738066 +n13915023 +n02639605 +n03174450 +n12269406 +n09874428 +n03432061 +n04386051 +n03923918 +n04592465 +n12480456 +n10333439 +n04206790 +n01443831 +n02967626 +n07733712 +n03746155 +n12947313 +n11690254 +n12244650 +n12670758 +n08658309 +n12710693 +n11860555 +n03485198 +n03047799 +n04461570 +n07600177 +n02126640 +n12704343 +n02866386 +n03008976 +n04532831 +n03465426 +n12691428 +n01641206 +n04962062 +n03254046 +n04425804 +n02014524 +n03439348 +n02538010 +n11603246 +n12265600 +n12277800 +n04016240 +n12086192 +n09650729 +n01549641 +n03112719 +n04961062 +n02710324 +n12049282 +n12362274 +n11969607 +n12856680 +n02201000 +n07863802 +n03360622 +n07601809 +n04354487 +n12898774 +n12939282 +n03109693 +n12867826 +n12441390 +n12915811 +n12879527 +n04137355 +n04131368 +n03527149 +n10164492 +n09932508 +n12426623 +n12575812 +n02557318 +n10263790 +n04309548 +n00476235 +n04194127 +n11876634 +n10327987 +n03499354 +n02616851 +n04464615 +n03615406 +n02744844 +n11732567 +n10347446 +n09752519 +n04228215 +n10004718 +n07899533 +n12030908 +n15102894 +n12044467 +n11711764 +n02610066 +n03415749 +n04562496 +n02034295 +n02297442 +n03566193 +n12506991 +n07774842 +n12827270 +n14908027 +n12242409 +n04072960 +n02829596 +n12496427 +n02266050 +n13108481 +n12473840 +n08677424 +n12076223 +n15091473 +n02815749 +n04549028 +n12558425 +n12023407 +n04179824 +n02378541 +n03188725 +n12517445 +n07573347 +n02004131 +n11921395 +n12570972 +n10602470 +n12095647 +n03854421 +n02450295 +n02792409 +n03543735 +n12836337 +n12204175 +n12152722 +n07900734 +n12517642 +n02775039 +n12607456 +n03376938 +n12179122 +n09873348 +n01847978 +n07888816 +n10453184 +n09675922 +n01851895 +n12865562 +n01797601 +n03711044 +n02738859 +n12064591 +n04033425 +n08551296 +n01650690 +n01537895 +n04207151 +n10087434 +n12261808 +n09438844 +n10364198 +n01814755 +n01583209 +n12270946 +n11892817 +n03344642 +n04117464 +n07847917 +n04003241 +n10362319 +n10477713 +n03495570 +n07560542 +n04363777 +n04534359 +n02404906 +n03349892 +n07712267 +n02960352 +n07866277 +n07857170 +n00324978 +n02755823 +n03150511 +n04211528 +n01899894 +n07588299 +n11874081 +n03425325 +n04506506 +n11949402 +n02952374 +n03309110 +n12159388 +n07591049 +n03068998 +n03228254 +n10279018 +n04173046 +n07728053 +n13052931 +n01597906 +n12368451 +n02767665 +n09435739 +n03915900 +n09728285 +n03292603 +n03331077 +n07817160 +n07917392 +n12540250 +n04153025 +n10209082 +n03968581 +n12676534 +n11824146 +n03521899 +n01853666 +n04292921 +n12332030 +n03984759 +n02863014 +n07801091 +n07723177 +n03289660 +n01533481 +n04488202 +n03468821 +n02382338 +n03543254 +n01961985 +n07915918 +n03703862 +n02771004 +n02047045 +n03877674 +n13141415 +n03529629 +n02240517 +n03675235 +n04491638 +n12384037 +n04419642 +n03019685 +n07591586 +n04496726 +n12985420 +n12927013 +n12196694 +n03473227 +n11621547 +n02988066 +n10451450 +n07729828 +n09618760 +n12196527 +n01555305 +n12830222 +n11950877 +n13190747 +n12160303 +n12390099 +n02818135 +n03163381 +n04554211 +n03244919 +n07897975 +n03386726 +n04290615 +n02011281 +n12407890 +n04123448 +n07904865 +n03447358 +n02393940 +n07931870 +n02937958 +n04318787 +n04587327 +n12807409 +n04112430 +n07560193 +n12774299 +n02618827 +n07854982 +n03757604 +n03817191 +n12793494 +n02324431 +n03013850 +n04113641 +n01612476 +n03127408 +n02038466 +n03799876 +n04257684 +n03382292 +n10449664 +n04394630 +n10275395 +n07698250 +n12329473 +n07694659 +n07642742 +n02563648 +n08583455 +n02557182 +n02775178 +n09274152 +n03189083 +n12570703 +n04211219 +n12486574 +n03073694 +n11969166 +n02475078 +n02976350 +n08584914 +n07899660 +n10116702 +n01613807 +n12461109 +n04025508 +n12451240 +n12596849 +n12079963 +n03541269 +n04561422 +n11699442 +n07725255 +n03460297 +n07616748 +n12757458 +n03103563 +n02813752 +n07698782 +n12840362 +n01543632 +n01602832 +n01875313 +n12472024 +n02926591 +n02872333 +n10728624 +n12532564 +n03882960 +n12333053 +n03684224 +n13146583 +n03436075 +n04154340 +n03868643 +n02598878 +n04139140 +n03266371 +n04083309 +n12506341 +n12200143 +n03503477 +n12807773 +n03123917 +n13029760 +n10173771 +n03659809 +n12047884 +n12759273 +n04193377 +n04258438 +n04597400 +n04579986 +n03719743 +n04299963 +n02864504 +n10510245 +n03417970 +n09719794 +n03138344 +n02085272 +n07694516 +n12665857 +n01642257 +n03229244 +n10581890 +n10318293 +n03635108 +n10652605 +n12189429 +n09934774 +n11709205 +n04207903 +n10296176 +n10603851 +n03450734 +n13223588 +n12754648 +n09886403 +n07751280 +n11950686 +n07814390 +n12799776 +n01646902 +n09796809 +n12819728 +n01938454 +n02410011 +n07607138 +n02119634 +n10332861 +n09230202 +n02757061 +n02849885 +n15092227 +n12151615 +n03111041 +n02413050 +n03506560 +n07744057 +n04030518 +n12544539 +n04089836 +n02038993 +n13882201 +n12099342 +n01946630 +n10095769 +n02982416 +n12957924 +n13215586 +n07726525 +n12452836 +n03801671 +n04598318 +n01449712 +n12428747 +n04119751 +n10509063 +n07694839 +n02782602 +n11626409 +n02573704 +n12399384 +n12388989 +n01601068 +n11971406 +n04367011 +n07930315 +n12925179 +n04967674 +n03497352 +n03653833 +n01819465 +n03688192 +n02802990 +n03393761 +n04430475 +n13107694 +n10384496 +n07867164 +n12449526 +n01515303 +n12574320 +n01444339 +n07919310 +n03453443 +n04173907 +n02887489 +n07772788 +n03629520 +n02580830 +n11705387 +n12069679 +n01956344 +n02406533 +n03973402 +n03938037 +n04969952 +n04103094 +n04393808 +n07715407 +n04172107 +n01917882 +n12085664 +n07608429 +n09835230 +n04135024 +n07842605 +n12568186 +n04339879 +n07691091 +n01801876 +n00474568 +n01807105 +n12128071 +n01673282 +n11948864 +n03991837 +n09659188 +n02070174 +n02670683 +n12454949 +n10385566 +n11631854 +n12305293 +n12002428 +n12948495 +n12757816 +n11852028 +n10690648 +n09283866 +n03214582 +n03423877 +n04127521 +n03006626 +n09283193 +n07712559 +n01447331 +n02981321 +n02658531 +n11947629 +n02419634 +n02420828 +n11923637 +n12570394 +n11968931 +n12731029 +n09749386 +n07736813 +n03967396 +n11908846 +n03029445 +n02426481 +n01964271 +n13198914 +n04484432 +n12656685 +n10806113 +n11849983 +n03236423 +n10649197 +n07688624 +n03057541 +n12015221 +n02094931 +n02014237 +n07560331 +n02801450 +n04206570 +n07556406 +n11627908 +n11889619 +n07852229 +n04063154 +n02713364 +n02783459 +n12877838 +n02930214 +n02125010 +n02407276 +n07815424 +n12855494 +n12530818 +n07750449 +n01963317 +n10082997 +n03245724 +n03012013 +n03555006 +n02421136 +n03332989 +n04375405 +n03746486 +n12636224 +n03278914 +n07917133 +n12504783 +n09416890 +n03896526 +n02258198 +n12983048 +n03837698 +n12869061 +n04541987 +n01637615 +n04401949 +n02241426 +n13220122 +n07876651 +n03729308 +n02364840 +n01339801 +n03418915 +n09257843 +n11614039 +n09731343 +n03809603 +n05399243 +n01569262 +n11901597 +n03124474 +n01566207 +n03796522 +n12595699 +n04573281 +n09689435 +n11859737 +n03201529 +n12902662 +n03374372 +n03760944 +n09189157 +n01517966 +n10431625 +n02898269 +n03693707 +n04369025 +n07834618 +n04095342 +n02786331 +n03822504 +n02284611 +n09862621 +n03436891 +n07688898 +n12435777 +n03949317 +n12443323 +n12273114 +n12623077 +n04333869 +n07907831 +n07774596 +n05450617 +n03320262 +n04190376 +n12671651 +n11819509 +n07588111 +n09756049 +n07611046 +n04973291 +n11602873 +n00120010 +n03500699 +n03844815 +n03708843 +n04452528 +n04387261 +n09889065 +n10147121 +n03318294 +n12599435 +n04164406 +n01965529 +n11636204 +n11791569 +n12275131 +n02977330 +n07851443 +n04132603 +n07824191 +n09760609 +n12190410 +n07915491 +n12665271 +n10120671 +n02570164 +n10208950 +n02163297 +n02244797 +n09842528 +n08645104 +n01841679 +n11603835 +n04488857 +n07814487 +n01953762 +n04612373 +n11877193 +n03198500 +n03981924 +n01943087 +n11552806 +n04414909 +n03005033 +n02457945 +n10500217 +n10375314 +n04607242 +n07914777 +n09832456 +n12915568 +n12813189 +n10578021 +n03519081 +n07801779 +n12026476 +n03296081 +n03850492 +n07902121 +n09881265 +n12562785 +n03290195 +n10131151 +n10078719 +n01558765 +n03917814 +n02045596 +n07734183 +n03414676 +n07933154 +n02126787 +n12148757 +n12263987 +n07684164 +n03406966 +n01492569 +n02988963 +n12963628 +n09964202 +n03417749 +n01854838 +n02921029 +n02183096 +n11762433 +n11722466 +n02387093 +n02768655 +n12519089 +n09871229 +n07938313 +n10502329 +n11989393 +n03768916 +n13145040 +n11813077 +n04457910 +n03655720 +n03703945 +n11876803 +n01438581 +n07910379 +n07847827 +n02300797 +n09245515 +n10754189 +n04581102 +n12513172 +n02458135 +n03762332 +n11789589 +n09695620 +n03850053 +n07911249 +n12342852 +n12753007 +n07748574 +n07727458 +n03696568 +n04304680 +n07723039 +n07775197 +n07577144 +n03043693 +n04374735 +n01858281 +n09228055 +n09466678 +n01949085 +n02024479 +n11623815 +n02704645 +n07894451 +n01751472 +n01646388 +n01317916 +n13880994 +n10300500 +n11794024 +n03735963 +n04610274 +n11854479 +n07754894 +n02639087 +n02122510 +n02262803 +n12732966 +n04529108 +n13194036 +n09990777 +n10009276 +n12088223 +n12155009 +n07886176 +n04278247 +n04222723 +n11707229 +n01999186 +n07851641 +n12741792 +n01315213 +n10033412 +n04249582 +n03586631 +n03237839 +n12037499 +n12014085 +n07756325 +n01636352 +n03905947 +n08611339 +n07693590 +n03724538 +n09791816 +n01666585 +n10588965 +n11613219 +n10542608 +n12913791 +n10528023 +n03171635 +n11923397 +n12854600 +n10410246 +n12698598 +n04135118 +n09844457 +n04441790 +n03882611 +n02337001 +n07907342 +n12561169 +n12027658 +n10719132 +n09851165 +n02801823 +n12330587 +n01683558 +n12162181 +n04387932 +n11704620 +n09679170 +n07601290 +n04028221 +n10277027 +n09877750 +n11758483 +n10027246 +n03819336 +n10205231 +n12478768 +n03451711 +n12973443 +n01923025 +n03262717 +n07807594 +n00475535 +n07744430 +n02341475 +n04614655 +n07924747 +n03388323 +n12680402 +n03202940 +n04534520 +n09661873 +n15092059 +n11832480 +n04198355 +n12529220 +n12389130 +n12304115 +n03234952 +n07610620 +n02868975 +n04442741 +n05282652 +n02820675 +n12795352 +n12675299 +n08547468 +n04189651 +n04141198 +n04513998 +n12273939 +n12482668 +n12858618 +n01958346 +n03172038 +n10280674 +n04301760 +n02631330 +n12433178 +n07763107 +n03068181 +n07565259 +n03605598 +n13177884 +n04005197 +n09751496 +n12737383 +n07648997 +n09839702 +n09442595 +n07925229 +n12150722 +n11898775 +n09904208 +n02207345 +n07642361 +n07685918 +n03205458 +n10574538 +n09742315 +n02599557 +n03585682 +n04273659 +n02200850 +n03410740 +n03391301 +n07726672 +n09782167 +n13155305 +n02067240 +n07561848 +n07728708 +n12463134 +n12228229 +n09743487 +n12225563 +n03421669 +n03226375 +n03973945 +n12498055 +n04483925 +n04564278 +n11890150 +n12519563 +n12754468 +n04353573 +n11615607 +n04430896 +n04585128 +n10395828 +n10773665 +n02772435 +n09881895 +n12663023 +n01615303 +n12803754 +n09445008 +n03955296 +n05245192 +n05486510 +n07899769 +n07575510 +n02307681 +n03814817 +n02670186 +n03598515 +n12797860 +n03518135 +n07587962 +n12630763 +n06273743 +n09843824 +n03226254 +n12407222 +n02961544 +n12951835 +n06417096 +n02016659 +n01441117 +n07735404 +n09411189 +n13896217 +n03262248 +n03451120 +n02525382 +n03375329 +n04155068 +n12916179 +n10297234 +n11907100 +n03423568 +n04360914 +n12027222 +n12199790 +n01744270 +n09896401 +n07925116 +n03693860 +n04414319 +n07767549 +n03555564 +n04043411 +n07872593 +n03774461 +n03129471 +n04497801 +n11756870 +n09776346 +n04530283 +n01520576 +n12828220 +n01583828 +n04120842 +n09676021 +n04344734 +n01916388 +n12513613 +n09861863 +n02310334 +n03318983 +n04533499 +n02427576 +n12727518 +n04502059 +n11725480 +n11987126 +n11876204 +n03504205 +n09720595 +n12315999 +n12935609 +n04452757 +n12201331 +n01603152 +n10772092 +n03156279 +n12723610 +n02003037 +n03244775 +n07802963 +n11954642 +n07770034 +n09931165 +n10559508 +n01745902 +n07654148 +n10070108 +n01585287 +n13196003 +n04389718 +n10253122 +n03730893 +n02983357 +n02783900 +n01680813 +n03072440 +n03109253 +n03274435 +n11655974 +n10048612 +n07849733 +n07896994 +n03792334 +n03035832 +n03819448 +n03105088 +n11943992 +n01485479 +n01699675 +n11795049 +n12086778 +n01840120 +n07753980 +n10685398 +n04346428 +n04532398 +n07709172 +n02146700 +n09461069 +n03853924 +n01321456 +n12068432 +n09757449 +n03206282 +n03751757 +n13053608 +n11695974 +n12123741 +n03500209 +n04367371 +n02890940 +n01917611 +n07835331 +n02907656 +n04136045 +n12059625 +n03862862 +n12864160 +n00440039 +n03448590 +n12628986 +n04115802 +n03949145 +n12916511 +n12647893 +n09706255 +n13181811 +n07752109 +n04375615 +n01648620 +n04403524 +n09967967 +n12911079 +n03857687 +n02803539 +n01551080 +n10734891 +n13235159 +n04127633 +n07935878 +n12853482 +n10191001 +n03126385 +n10076224 +n01812866 +n12919403 +n03769610 +n09283767 +n03462110 +n11770256 +n12038898 +n09889170 +n11894558 +n10298647 +n02592055 +n02795670 +n11701066 +n12762049 +n02890662 +n07918193 +n02976455 +n03100897 +n13127843 +n12184912 +n00468299 +n12407079 +n12496949 +n03541537 +n05260127 +n01535140 +n01541760 +n11945783 +n07687053 +n07745046 +n12083847 +n02382132 +n12270027 +n10140597 +n03788914 +n01790711 +n02197689 +n03173270 +n10368624 +n04449290 +n01579729 +n07834872 +n07734417 +n02379630 +n01636829 +n12549192 +n12951146 +n13579829 +n03268142 +n11761202 +n02769669 +n09452760 +n04095577 +n12031139 +n02003577 +n12891469 +n03931885 +n01577941 +n04176295 +n12046428 +n03418402 +n13145250 +n11865874 +n12473608 +n11797321 +n01798168 +n09923186 +n02786736 +n01698782 +n09976283 +n03975788 +n14685296 +n01682172 +n07838441 +n02771286 +n03429137 +n03948950 +n02512830 +n02298218 +n10141364 +n02823848 +n02077384 +n12584715 +n11748811 +n02214773 +n03667552 +n04121426 +n04135710 +n07579917 +n12275888 +n07826453 +n12167436 +n04586072 +n09877288 +n04248396 +n02761696 +n03038870 +n01490360 +n12353203 +n09785891 +n12057660 +n04146343 +n12557556 +n02081798 +n02917964 +n07898617 +n12597798 +n07574176 +n07764630 +n03008177 +n04255899 +n04434207 +n07897600 +n09929577 +n11811921 +n12415595 +n02893941 +n12276110 +n02821202 +n09690621 +n02508742 +n02077787 +n02390640 +n03764822 +n02257985 +n13033134 +n04559166 +n07865196 +n10506915 +n12051103 +n10473917 +n12775919 +n02971579 +n12880462 +n11837970 +n02063662 +n09840520 +n12019827 +n09208496 +n12836508 +n02982232 +n04219185 +n03332005 +n07914128 +n07862461 +n04250692 +n09267854 +n04561734 +n02076402 +n12344837 +n02919148 +n06592281 +n03668803 +n03062985 +n04246731 +n12112609 +n04012482 +n03558633 +n03982642 +n01998741 +n07665438 +n04209509 +n07913882 +n01749244 +n07801342 +n02611561 +n04488742 +n01897536 +n10624437 +n13128976 +n07931612 +n04300643 +n03727067 +n03360431 +n07593471 +n10253296 +n03297226 +n03854506 +n07879450 +n10562283 +n12557438 +n13154388 +n12862512 +n02126028 +n07752514 +n02387887 +n12066261 +n07666176 +n02806530 +n09988703 +n03721252 +n03221540 +n12195533 +n02682569 +n03622058 +n03943266 +n04207596 +n11721337 +n02427032 +n07910152 +n01551300 +n12861345 +n11660300 +n03786313 +n12966945 +n02046171 +n02797535 +n03546112 +n07711232 +n02044908 +n02998563 +n02652132 +n12634986 +n12187247 +n11645590 +n07582892 +n03065243 +n09911226 +n04396902 +n10763075 +n02359047 +n10400108 +n04294614 +n03991646 +n11728945 +n07766891 +n12277150 +n13141564 +n10563314 +n12426749 +n07827750 +n12403994 +n12627119 +n03420801 +n10203949 +n12830568 +n12280060 +n13180875 +n12659064 +n04239436 +n03823312 +n04367746 +n12448700 +n01896844 +n07581931 +n09384106 +n11625223 +n04198722 +n01477875 +n09932336 +n03477512 +n12281974 +n10117739 +n07759194 +n12281788 +n01405007 +n03077616 +n02304036 +n12947544 +n03140126 +n12356960 +n07807002 +n07877849 +n02956795 +n04373795 +n07925500 +n10359546 +n09730077 +n01694955 +n10611267 +n04316498 +n07849912 +n12841354 +n07903543 +n10026976 +n04050313 +n03939844 +n03260849 +n07917507 +n12228387 +n03199775 +n01569566 +n02403920 +n04261638 +n02986160 +n03724623 +n01960177 +n03783430 +n07877675 +n10401639 +n04215153 +n03077741 +n02589623 +n12934985 +n03233624 +n04506688 +n12194147 +n09975425 +n07818825 +n12641007 +n10036692 +n02771750 +n12285900 +n01472303 +n10033663 +n10707134 +n03219966 +n11772879 +n10146416 +n10435169 +n10304086 +n12385566 +n03126580 +n12904314 +n03619196 +n02299846 +n03574243 +n12368257 +n03690473 +n01748686 +n09834378 +n07750736 +n02930645 +n01679307 +n03721047 +n02710044 +n07563207 +n02930080 +n09309168 +n03127203 +n02863536 +n02536165 +n01559639 +n09654518 +n02961035 +n12007406 +n12773651 +n04351699 +n03114504 +n06273414 +n02017475 +n01733466 +n02175014 +n07920663 +n03953901 +n09670521 +n09400987 +n11791341 +n02284884 +n12919646 +n07880325 +n03801353 +n01982347 +n07828642 +n01570421 +n03998333 +n03449309 +n10482220 +n12850906 +n12805561 +n12926689 +n03232543 +n04248851 +n03195959 +n04082562 +n03846100 +n07682952 +n07695652 +n11809271 +n09895561 +n04287898 +n09740724 +n02859955 +n09830400 +n03674731 +n02825153 +n04571686 +n13107891 +n10318607 +n07848093 +n13226871 +n08555710 +n03137473 +n02776978 +n03141455 +n12514138 +n01809371 +n09405078 +n01753180 +n02184473 +n11610215 +n03539546 +n12731835 +n04485884 +n03590588 +n10221312 +n04049753 +n03441345 +n02302244 +n12262185 +n15092650 +n11877646 +n10377185 +n01684578 +n03796605 +n07897116 +n03164344 +n12135049 +n10757050 +n01692523 +n04566756 +n07697699 +n07575392 +n10262655 +n04064747 +n07914006 +n12433769 +n07873348 +n04457767 +n10019072 +n02921195 +n03856465 +n04041408 +n12639584 +n12920955 +n11781176 +n07864756 +n03941013 +n03646148 +n04401578 +n11692792 +n02757714 +n02286089 +n04253168 +n03890514 +n07855510 +n03507458 +n04123026 +n11661909 +n12435152 +n04330746 +n09481120 +n03731019 +n03717285 +n03271030 +n02772101 +n07740597 +n02847852 +n12825497 +n12263738 +n03342262 +n03603594 +n07804543 +n12932365 +n12695975 +n10297531 +n04054670 +n03175081 +n12703856 +n03832144 +n03966206 +n02414290 +n03619275 +n09738121 +n03290096 +n10585077 +n07731767 +n12409840 +n12026981 +n02278980 +n02752810 +n01654637 +n02654112 +n10314836 +n13023134 +n01823414 +n07461050 +n11902982 +n04543636 +n02204907 +n04049585 +n12304899 +n03073545 +n04272928 +n10315456 +n03975657 +n09899782 +n12288005 +n07005523 +n03795269 +n09823832 +n02242137 +n02907391 +n03643491 +n03245889 +n12285369 +n03061345 +n03797264 +n07838073 +n09219233 +n02859343 +n07608098 +n03920641 +n12578626 +n10688356 +n04542858 +n07834065 +n00443803 +n04181561 +n04570214 +n02047517 +n03295012 +n01633781 +n10610850 +n04035836 +n03001115 +n04593376 +n02393807 +n13061348 +n10123122 +n11800236 +n13207094 +n10140929 +n12167602 +n01809752 +n10421956 +n02764935 +n03424489 +n12889219 +n04046091 +n07714287 +n07708685 +n07736087 +n04142434 +n11961446 +n04521863 +n02414763 +n02901377 +n00467536 +n13085747 +n03855756 +n11846765 +n02530999 +n03063199 +n04258618 +n12204032 +n04424692 +n11758276 +n02653497 +n03766508 +n02026629 +n02572484 +n12339831 +n01635027 +n01668436 +n07821919 +n01543175 +n02689748 +n12528974 +n04024862 +n04184880 +n11720891 +n13869896 +n01678043 +n01647303 +n11532682 +n03236217 +n04963307 +n03012897 +n11682659 +n03191286 +n07643891 +n12737898 +n10680609 +n07924955 +n03879705 +n10461060 +n02523427 +n02013567 +n09893344 +n04124488 +n09863031 +n12454436 +n12305089 +n07709046 +n03805180 +n11940599 +n01691217 +n04198562 +n03978421 +n02357585 +n07818572 +n12870682 +n03798442 +n04154938 +n10550369 +n11957678 +n01958531 +n09936825 +n02334201 +n07910538 +n11978551 +n10562135 +n12700088 +n12784889 +n04480853 +n03281673 +n07588419 +n02968333 +n11935469 +n13046669 +n11730602 +n09643799 +n11849467 +n01758757 +n09638454 +n03267468 +n07914586 +n12104734 +n02961225 +n09827246 +n09917214 +n13079073 +n12634734 +n04089376 +n13034062 +n11714382 +n12753762 +n07683039 +n11840067 +n07689842 +n12173069 +n12172481 +n04182152 +n07869522 +n10356877 +n02771166 +n03154895 +n07615289 +n12986227 +n12361135 +n03456447 +n12706410 +n12895811 +n02988156 +n03130761 +n10639359 +n03628215 +n02738741 +n01643507 +n07730708 +n03232309 +n02846733 +n04969540 +n03051041 +n12890928 +n03235327 +n04289576 +n07588817 +n10325774 +n03973285 +n09703485 +n02358584 +n03061674 +n03195332 +n02901259 +n07849619 +n04486934 +n07908812 +n01588725 +n03682877 +n11949015 +n04146504 +n04146862 +n07898247 +n03318865 +n04367950 +n07880213 +n04247011 +n01447658 +n12711817 +n03146687 +n02926426 +n12856091 +n11966896 +n02413593 +n09764900 +n03009794 +n03314227 +n10499232 +n10075693 +n04451318 +n12320806 +n11933728 +n07764315 +n12133682 +n09904837 +n12832538 +n03816530 +n07802863 +n04391445 +n09728137 +n03887330 +n04436012 +n03957991 +n07771731 +n06266973 +n10407310 +n10290919 +n07862244 +n01842504 +n10262561 +n12726159 +n07691954 +n07618119 +n03437829 +n11966617 +n03629100 +n04231905 +n04208760 +n03344305 +n03684143 +n12934174 +n08645212 +n03556679 +n12109365 +n03751458 +n02380875 +n02025389 +n02770721 +n09830629 +n02800675 +n04951186 +n04483073 +n12710577 +n12789054 +n12058192 +n11777080 +n07716203 +n09618880 +n04525821 +n04016846 +n02918330 +n10375052 +n13158512 +n13090871 +n02929582 +n02308735 +n10487182 +n02213663 +n07608339 +n04384593 +n12890490 +n03992436 +n02994573 +n13231078 +n12880244 +n01651059 +n02925009 +n09686401 +n13219976 +n09981540 +n04582771 +n06267758 +n09893502 +n13214340 +n03272940 +n12554911 +n02214341 +n04137089 +n03874487 +n04573513 +n12003167 +n12004547 +n13065089 +n01903346 +n04373428 +n02216365 +n02024185 +n12577895 +n11698042 +n07586318 +n11705776 +n03030353 +n04486213 +n07885705 +n07928163 +n02356612 +n02767038 +n02897097 +n11662371 +n04128710 +n09842395 +n07683360 +n11533212 +n08495908 +n12841193 +n03669886 +n07768068 +n02381831 +n12081215 +n02757337 +n02811618 +n10144338 +n01379389 +n09698644 +n12779851 +n10400618 +n11801891 +n12322099 +n12408077 +n02767956 +n08640962 +n07816839 +n03021228 +n10346015 +n07868830 +n07917272 +n10076957 +n12865708 +n04290259 +n03595264 +n03986224 +n07825194 +n01610100 +n04417086 +n12995601 +n12734070 +n15091129 +n12428412 +n07587331 +n02405101 +n03108455 +n03594523 +n04489695 +n03892425 +n13032618 +n04409011 +n07590752 +n15092942 +n03914583 +n13066448 +n03532919 +n10639637 +n04566561 +n13223843 +n07904637 +n12347158 +n02720048 +n03901229 +n03936466 +n10574840 +n03782794 +n12397431 +n07908567 +n12580896 +n02697221 +n09791419 +n02577403 +n07870069 +n02136103 +n04318892 +n01462544 +n09747191 +n12287836 +n03067093 +n03934565 +n03543945 +n13126856 +n02240068 +n01585422 +n12413301 +n03246454 +n01876034 +n03635330 +n11680596 +n03228365 +n03082656 +n11609862 +n12859986 +n03934229 +n10233248 +n03166514 +n12166793 +n10115430 +n03327553 +n03373611 +n02967782 +n12338258 +n01604968 +n01323155 +n02590094 +n03044934 +n07866409 +n12291143 +n14900342 +n12094612 +n07845702 +n07926250 +n10750640 +n04359500 +n09797873 +n09953350 +n03561047 +n12122725 +n12725738 +n01453087 +n04977412 +n04575723 +n13219833 +n12161056 +n04273285 +n12482437 +n12863624 +n04953296 +n03390075 +n10188957 +n02874442 +n04236935 +n09990690 +n12866459 +n04075715 +n09725000 +n12794367 +n12461673 +n03050453 +n03677115 +n12427391 +n07736371 +n02973236 +n02406749 +n12322699 +n12815198 +n10680796 +n03268311 +n02405799 +n12302248 +n09791014 +n01545574 +n07740033 +n07862095 +n09901337 +n04390577 +n03597916 +n12110085 +n11802586 +n04205505 +n07696527 +n12076852 +n04344003 +n03326660 +n02823586 +n03042139 +n01565345 +n07905296 +n01454545 +n07650903 +n07905386 +n12530629 +n02841187 +n02943964 +n03329536 +n09681234 +n03479121 +n03770085 +n04147793 +n11552133 +n03774327 +n13197507 +n07901355 +n10400437 +n07837912 +n02310941 +n07845087 +n02239774 +n04976319 +n03960490 +n05239437 +n06275471 +n01633406 +n04257223 +n12009420 +n10483138 +n02775897 +n07866151 +n07922512 +n02666624 +n03944024 +n03842377 +n01832493 +n07855907 +n03968728 +n04492060 +n07879072 +n11635830 +n11802800 +n02357911 +n02431628 +n03730494 +n13099999 +n07768230 +n13147270 +n12331655 +n10237676 +n11855553 +n09759501 +n10620586 +n13181055 +n12309277 +n13183489 +n04382695 +n07679034 +n10495756 +n02173113 +n12764202 +n03683457 +n10298912 +n07680313 +n10160280 +n02205673 +n12053690 +n11653904 +n02931294 +n04093775 +n12856479 +n02427470 +n07608866 +n09954639 +n11639445 +n03364599 +n09924106 +n09683924 +n10419472 +n03089753 +n12620969 +n07604956 +n12940609 +n12564083 +n03514894 +n10343355 +n13068255 +n03805280 +n12793284 +n03140652 +n02666501 +n11717577 +n04267435 +n04593185 +n12820853 +n03934311 +n02630615 +n07767002 +n07723968 +n01631354 +n07931452 +n12414818 +n03097673 +n09944430 +n04457474 +n11850521 +n12227658 +n10131815 +n12408717 +n03566730 +n12777680 +n06273555 +n04357531 +n03759243 +n09861599 +n03015851 +n04175039 +n03392741 +n07859796 +n07741138 +n04474187 +n02266864 +n04553561 +n02667244 +n12720200 +n12432356 +n07806120 +n10362557 +n11929743 +n07765862 +n02963987 +n02762371 +n02747672 +n04289195 +n04056413 +n03039493 +n03894677 +n12338655 +n04422409 +n12079120 +n10252222 +n10168837 +n12919847 +n10297841 +n01340014 +n11710827 +n10167838 +n12278107 +n01384164 +n10498986 +n02742468 +n02899439 +n11752937 +n12107710 +n12315598 +n03985441 +n07605804 +n07686202 +n12884100 +n13121349 +n11725311 +n10420507 +n11706761 +n01381044 +n03331599 +n12336333 +n10185483 +n07880880 +n01782516 +n12615232 +n03175457 +n12657082 +n01750437 +n07918879 +n13213066 +n12927494 +n02910542 +n06273986 +n02161338 +n10235024 +n12180168 +n03659950 +n02160947 +n11861853 +n09866817 +n09279986 +n12393269 +n01552034 +n05526957 +n02956883 +n12818966 +n09753792 +n03114236 +n12273344 +n12546617 +n13177048 +n02129991 +n01731941 +n01628770 +n12774641 +n07685546 +n03253279 +n10678937 +n12579038 +n08673039 +n01392275 +n02379081 +n10530150 +n12851469 +n12414449 +n11694664 +n11877283 +n09708889 +n03585438 +n00483605 +n12332555 +n03323096 +n07851767 +n02417663 +n10667863 +n02856237 +n09269341 +n01596608 +n09720033 +n13160604 +n04443164 +n02814428 +n11622771 +n10328123 +n04338963 +n01794651 +n12069217 +n07762740 +n02935387 +n11897116 +n10569179 +n12749852 +n10745006 +n07823280 +n12162425 +n09801533 +n03772269 +n04518643 +n07916319 +n12771597 +n02147173 +n10342992 +n03795123 +n11646344 +n12847927 +n07686021 +n12383894 +n04465050 +n14564779 +n04212467 +n12274863 +n02380052 +n04329958 +n12034384 +n04213353 +n04366033 +n04955160 +n02778294 +n12890685 +n03028785 +n03097535 +n04533594 +n01750167 +n01415626 +n12276477 +n07729926 +n07711371 +n12843970 +n10500419 +n12891093 +n03840823 +n12509665 +n11878101 +n04315342 +n07685031 +n12305819 +n10039271 +n12264512 +n03911866 +n13919547 +n12413419 +n03785721 +n02599347 +n03786194 +n04018155 +n12856287 +n09607903 +n02396088 +n10212501 +n10313000 +n07683617 +n03586219 +n03890233 +n03156767 +n12033709 +n01648139 +n04399846 +n10671736 +n07698672 +n10791115 +n07708124 +n02709908 +n04266968 +n01758141 +n10058962 +n09444783 +n03668067 +n02838345 +n02388143 +n12893993 +n12590499 +n01462042 +n02689434 +n13209808 +n04075291 +n02412629 +n01953594 +n03906463 +n03043423 +n02200509 +n10152763 +n12504570 +n04396808 +n03382413 +n03618101 +n02767147 +n02390101 +n03450974 +n12778398 +n03625539 +n02574271 +n04113316 +n07572616 +n11809437 +n04119230 +n03829954 +n10500603 +n04258732 +n02731900 +n10174330 +n01574801 +n08663703 +n12558230 +n03981760 +n07732904 +n11875523 +n11823436 +n03238286 +n03079494 +n04281260 +n07873057 +n11686912 +n10568608 +n07593004 +n04271531 +n10037922 +n07838551 +n03615300 +n12624568 +n12940226 +n05242928 +n03680734 +n01589893 +n11652376 +n11893640 +n04119091 +n09696763 +n07851554 +n02660640 +n12124818 +n10370955 +n02663211 +n02414209 +n13187367 +n03258577 +n04375241 +n07617932 +n12240477 +n03417202 +n07595649 +n03839424 +n03087245 +n02431441 +n04396335 +n03484809 +n03426285 +n03592931 +n02912319 +n03488887 +n12187891 +n07592400 +n12918609 +n07858114 +n07567980 +n01548694 +n02726210 +n02406859 +n10147262 +n05458576 +n02848921 +n03503233 +n02587618 +n03465151 +n03582508 +n11654293 +n03695452 +n02197185 +n04223170 +n10243273 +n03149135 +n02842809 +n03669534 +n03857291 +n02147328 +n12278865 +n12733428 +n03264906 +n09924195 +n10432189 +n12203896 +n03892728 +n12360958 +n10418735 +n01650901 +n12420722 +n03341606 +n02557909 +n07751858 +n03483971 +n12019035 +n03991202 +n02072040 +n03129848 +n04505345 +n02405440 +n03901974 +n11656123 +n11552976 +n10291822 +n10108018 +n09902731 +n03325691 +n12646072 +n04134170 +n12097396 +n07564008 +n01624305 +n03421117 +n02776007 +n10792856 +n07818133 +n03227184 +n10198437 +n04157099 +n12743009 +n07820960 +n12749456 +n13035925 +n05262698 +n03422771 +n02878628 +n12140903 +n07820297 +n03524745 +n09901921 +n03170872 +n10039946 +n12638964 +n11989087 +n03461988 +n04287451 +n04298053 +n07882420 +n04002262 +n02734835 +n11707827 +n07756641 +n12808007 +n10069981 +n12637123 +n12947895 +n04363082 +n04292080 +n11858077 +n04535252 +n12646397 +n12283147 +n12321077 +n02746595 +n02895328 +n07624924 +n12537253 +n11952541 +n02181477 +n01440160 +n03878828 +n12861541 +n02869563 +n04242084 +n03197201 +n09396608 +n04291992 +n07845863 +n04314522 +n12843557 +n04029647 +n12146654 +n13147386 +n12954799 +n11920133 +n03038480 +n03213715 +n02971473 +n04149374 +n04230387 +n00444340 +n11859275 +n07564796 +n02948403 +n10186068 +n04315713 +n02366002 +n02670935 +n13208302 +n10225931 +n07826340 +n04102872 +n02259708 +n11855842 +n09941089 +n08896327 +n10237464 +n12084158 +n03764995 +n03627954 +n12384375 +n10341343 +n07876189 +n04573379 +n07904293 +n07840520 +n12038038 +n03005147 +n10483799 +n02978367 +n01484285 +n13094273 +n04539053 +n01748389 +n10146816 +n07815839 +n12991837 +n03294604 +n03588841 +n04055180 +n03209477 +n09917345 +n04393913 +n12337391 +n12126084 +n01882125 +n07688130 +n02814116 +n09640715 +n12679593 +n12596345 +n03029925 +n11761650 +n04457157 +n12683096 +n07709881 +n03841290 +n13157684 +n07927836 +n03523134 +n03690279 +n10187491 +n12451070 +n02682311 +n03978815 +n11806679 +n07808022 +n01386354 +n03622526 +n02369293 +n11885856 +n02289610 +n12663359 +n02624987 +n13173488 +n03027001 +n07896765 +n11935330 +n07814790 +n04242704 +n09959142 +n07589543 +n03551582 +n07843117 +n03556992 +n02060569 +n04000998 +n03825271 +n11946918 +n02874750 +n03479502 +n09919451 +n02176747 +n02080713 +n03400972 +n10222170 +n07926785 +n07852302 +n03012373 +n10438842 +n12868019 +n03634034 +n04210591 +n07853560 +n12374862 +n09248399 +n04355115 +n12908093 +n12906498 +n12875269 +n02791665 +n03146777 +n02854378 +n12414159 +n07821610 +n07595180 +n12238913 +n12141385 +n10761190 +n12165758 +n01653223 +n12956367 +n03695753 +n12416703 +n12346813 +n03405111 +n04304215 +n01624212 +n12674895 +n09850760 +n12407715 +n04156040 +n11610437 +n03395256 +n09970822 +n04229959 +n02530831 +n07870894 +n12098524 +n12828379 +n04057215 +n10751152 +n10053439 +n03674270 +n07869291 +n12256920 +n02535163 +n04282231 +n02136452 +n02365108 +n10328328 +n02315487 +n03325403 +n09231117 +n03342657 +n09980985 +n10702167 +n11961871 +n02065263 +n12857779 +n03219612 +n07805966 +n10699981 +n07691863 +n12831932 +n04179126 +n10208189 +n09765118 +n07922147 +n01631512 +n01947997 +n01405616 +n01892030 +n07827896 +n12964920 +n07749870 +n03276696 +n10020670 +n11828577 +n07624666 +n10590146 +n02407521 +n10253703 +n03270854 +n11610047 +n12981443 +n12413642 +n12302565 +n03177059 +n04594114 +n10227985 +n07728391 +n10395073 +n02810270 +n03569293 +n07812046 +n03843316 +n12477401 +n03802643 +n07618029 +n10755648 +n12837803 +n12454556 +n01636127 +n02809241 +n03270165 +n12035631 +n02962414 +n09750641 +n01793085 +n04346003 +n07922041 +n04164002 +n12499979 +n03301291 +n07921834 +n09656077 +n07599161 +n13155611 +n10194231 +n10063635 +n03601442 +n10366276 +n00475661 +n03943714 +n10377291 +n02624551 +n02568447 +n07589458 +n09691858 +n02685995 +n11919975 +n01690466 +n13211020 +n04114069 +n10530383 +n04200908 +n12631932 +n07916437 +n03219859 +n07918309 +n10368291 +n10253479 +n03317889 +n13206178 +n02821415 +n10592811 +n12557064 +n12872458 +n10212231 +n07926346 +n09695514 +n09741816 +n03964611 +n07812913 +n09703708 +n02587479 +n10593521 +n03485309 +n03776877 +n12289433 +n07716504 +n10580030 +n03061893 +n03206158 +n09710041 +n04266849 +n07864065 +n12767648 +n02333190 +n12295429 +n02406432 +n01799679 +n07861983 +n02201626 +n03441582 +n03653975 +n02834506 +n12263204 +n10672662 +n03072682 +n03410423 +n11620389 +n04542095 +n07910970 +n03697913 +n02706806 +n09736798 +n12318965 +n07938594 +n12032429 +n03191776 +n04210288 +n01422335 +n03236093 +n11881189 +n02247216 +n12338146 +n03104512 +n00474881 +n04172230 +n01461315 +n04400109 +n10646140 +n02215621 +n10096126 +n03019806 +n11809754 +n02492948 +n10741367 +n10308504 +n07875560 +n02523110 +n07738224 +n02015797 +n10499631 +n03025165 +n03284308 +n03508881 +n10441037 +n10757492 +n07608721 +n09755241 +n04264361 +n04394421 +n03776997 +n03175843 +n04476526 +n02523877 +n13196369 +n10190122 +n03172738 +n02709763 +n02070624 +n04563560 +n04017807 +n03824589 +n07817758 +n03222722 +n01542433 +n13173259 +n04458201 +n12869668 +n12580786 +n02407763 +n09760913 +n10530571 +n11752798 +n09612700 +n07601175 +n11632376 +n10641223 +n03158668 +n03411208 +n01413457 +n03684740 +n10248008 +n12656528 +n11849271 +n07771891 +n12067433 +n12389727 +n11734698 +n04042204 +n07825399 +n12621945 +n07624757 +n03180732 +n09741331 +n10246317 +n04030414 +n07821107 +n04524716 +n03789603 +n12867449 +n10249869 +n02434415 +n07614103 +n03333349 +n04602840 +n09923996 +n02658811 +n13033879 +n03663433 +n02873623 +n07837545 +n12436907 +n02675077 +n01500854 +n04435552 +n01790304 +n11687789 +n03443543 +n09733459 +n01606177 +n12245885 +n11721642 +n02201497 +n12010815 +n04594742 +n02755984 +n07927716 +n04245218 +n03134118 +n13214485 +n12294542 +n12713521 +n03556173 +n12650038 +n07719058 +n04319774 +n10443830 +n10019187 +n09720702 +n07926442 +n10402709 +n03989777 +n11699751 +n09613118 +n02965122 +n04221076 +n01861330 +n12837052 +n02975589 +n09668437 +n03012499 +n01418498 +n12451566 +n03585778 +n07692517 +n09672590 +n09741999 +n09748648 +n07621264 +n03482001 +n10185148 +n01542168 +n12536291 +n07846557 +n11840476 +n03130866 +n02631775 +n11730015 +n03715275 +n07680168 +n12175370 +n05427346 +n03665232 +n08611421 +n11730458 +n02413484 +n09783884 +n07888378 +n04611351 +n02247655 +n02136794 +n11649359 +n01382033 +n07889193 +n10405540 +n03510384 +n04420720 +n03585875 +n03812789 +n01835769 +n12139921 +n09762011 +n10103228 +n03477410 +n11930788 +n10064831 +n12311045 +n07681805 +n03136504 +n12887713 +n03886940 +n03130233 +n10197392 +n12333961 +n07672914 +n12723062 +n12599661 +n04268799 +n03696909 +n12809868 +n12452256 +n10710778 +n02571652 +n12117326 +n02450677 +n03041265 +n12544240 +n01966377 +n10252354 +n02378625 +n09814488 +n10569011 +n13067330 +n07928998 +n07890970 +n02187279 +n02592371 +n07846802 +n03475961 +n05448704 +n10410996 +n02851795 +n10093167 +n12468719 +n09876701 +n03057724 +n03469031 +n02344270 +n04248209 +n02687682 +n04467899 +n12897788 +n03436656 +n12539832 +n09906704 +n03190458 +n11843441 +n12130549 +n11823756 +n03153246 +n03684489 +n04160036 +n02908951 +n12855365 +n03518230 +n12225222 +n12933274 +n10432957 +n02921406 +n10156831 +n12239647 +n02826812 +n03411927 +n11602091 +n13200986 +n04244847 +n01330126 +n14938389 +n03001540 +n04387531 +n03423099 +n07608533 +n11723986 +n07600394 +n12529500 +n02403820 +n02587300 +n10333317 +n07935288 +n12680652 +n01449980 +n12153914 +n07803310 +n11741797 +n01881857 +n13081999 +n08644045 +n02061217 +n02173784 +n02660519 +n03104019 +n13137951 +n04538403 +n02621258 +n04515729 +n04165945 +n11919761 +n13078021 +n07861247 +n11959259 +n11801665 +n04070545 +n13210597 +n10218043 +n10717337 +n01365885 +n10718952 +n11979187 +n03880032 +n03798610 +n03477303 +n01876667 +n11860208 +n03401721 +n03360133 +n13230843 +n13194758 +n13190060 +n02564935 +n13894154 +n12754311 +n07697408 +n13171210 +n02035402 +n03736147 +n10396337 +n04554998 +n02793930 +n04126852 +n03654826 +n09411295 +n06255613 +n01680983 +n10261862 +n01581874 +n10378780 +n10646641 +n03539103 +n03351151 +n04349913 +n03906106 +n02370525 +n03319576 +n04113968 +n09693244 +n02945964 +n03344509 +n04117216 +n03889626 +n03557840 +n09800469 +n04280487 +n07890890 +n12147835 +n12295237 +n03883664 +n04436992 +n02922877 +n10099002 +n01988203 +n10056719 +n11646517 +n03672521 +n04568713 +n10111358 +n03606347 +n04047733 +n12320627 +n10251612 +n10460033 +n01742447 +n11917835 +n10443032 +n13079567 +n04363671 +n10788852 +n10482587 +n03308614 +n12741586 +n12938667 +n04539407 +n01630148 +n02303777 +n13050940 +n04552551 +n02341288 +n04098169 +n04110439 +n11625391 +n12259316 +n02822762 +n10631131 +n04089152 +n03571439 +n04558199 +n12656909 +n03170292 +n02877642 +n12771890 +n03033267 +n12658603 +n13354021 +n12855886 +n11840246 +n03619050 +n07727252 +n12932706 +n13874073 +n01315805 +n02948942 +n12048928 +n03146449 +n10656969 +n09872557 +n03906590 +n04454792 +n12500309 +n04239333 +n01815036 +n09644657 +n10497645 +n02918455 +n07812662 +n04240434 +n10804636 +n11967878 +n04184095 +n11834272 +n05244755 +n02299039 +n12665659 +n12144987 +n07607492 +n11887750 +n13083461 +n04577139 +n09670909 +n07876893 +n02875948 +n04069582 +n10458111 +n10361194 +n09389867 +n01651778 +n11933387 +n13193143 +n12834190 +n03516266 +n02184589 +n10041373 +n02809605 +n04064213 +n04957589 +n12643113 +n02582721 +n07911061 +n07921360 +n10369417 +n10527147 +n04104925 +n03707372 +n01386182 +n10374849 +n09902851 +n08559155 +n02332447 +n11649150 +n11722036 +n01823740 +n04592356 +n10002257 +n10661732 +n07562379 +n07597263 +n04036776 +n13112201 +n09842288 +n07738105 +n04545984 +n09635973 +n02885233 +n02756854 +n07808479 +n03029296 +n01543383 +n02884450 +n09843716 +n04224395 +n10576676 +n10140051 +n07919894 +n07806879 +n10212780 +n09478210 +n12017127 +n03770224 +n07606191 +n03555217 +n09715165 +n12270460 +n12129738 +n11739365 +n02303585 +n07818029 +n05314075 +n03019304 +n09859975 +n09454744 +n13151082 +n12586989 +n00455076 +n07741357 +n04957356 +n08659242 +n04577293 +n04126244 +n03131193 +n12428242 +n03569494 +n03781594 +n07743384 +n02892392 +n12576695 +n12199982 +n07693439 +n07719756 +n11884384 +n03043798 +n12351091 +n03690168 +n02214499 +n01839949 +n01831360 +n12642964 +n02957862 +n03125588 +n12883628 +n04002371 +n10747965 +n09744462 +n02853745 +n13030337 +n12156679 +n02761034 +n12587487 +n03374570 +n12728322 +n01731764 +n07918706 +n03696445 +n03185868 +n02805283 +n03868763 +n02202124 +n12369665 +n12449934 +n12650229 +n02656301 +n07743723 +n11702713 +n02927053 +n03916385 +n01486010 +n03986071 +n04188064 +n13897528 +n12414329 +n07718068 +n07837755 +n11735570 +n10464542 +n04091466 +n01315581 +n10374943 +n03989898 +n13220525 +n04076052 +n04062179 +n02414442 +n04414101 +n04446162 +n00480885 +n03536568 +n03773835 +n10728998 +n12643877 +n02255391 +n03799610 +n07847585 +n00446411 +n11910666 +n03139998 +n02296276 +n02889996 +n02786611 +n10363445 +n07854348 +n08583682 +n09912681 +n07896422 +n02368821 +n11935953 +n12185254 +n11738547 +n03809211 +n02448318 +n13066979 +n01987076 +n12009047 +n12839574 +n13174823 +n07902520 +n03369866 +n13209129 +n02593191 +n03853291 +n02620578 +n10071332 +n01813658 +n09895480 +n10134760 +n01316734 +n07845166 +n03175983 +n13132156 +n12814960 +n12883265 +n03637787 +n04310507 +n04133114 +n03900194 +n04129688 +n04449550 +n01805321 +n01717467 +n01573627 +n12271451 +n11722621 +n09976917 +n12232280 +n12905135 +n03451253 +n01655344 +n12346986 +n11987511 +n10517283 +n02941845 +n12730370 +n03121190 +n07917874 +n10023656 +n10151133 +n07695187 +n03258456 +n10639238 +n10682713 +n02085019 +n12343753 +n10749928 +n04595611 +n04410565 +n08500819 +n07719980 +n04016479 +n03232417 +n03469832 +n09834885 +n07925327 +n10094782 +n03632100 +n12734215 +n09845849 +n04047139 +n10743124 +n02604954 +n12270278 +n03036244 +n11991777 +n10168012 +n02561803 +n10531109 +n10344319 +n03804211 +n10513938 +n10732967 +n09917481 +n02950482 +n03148808 +n07910245 +n07925423 +n07889990 +n04302988 +n07745357 +n04346511 +n07573563 +n02564403 +n12084400 +n10030277 +n09815455 +n04388473 +n12404729 +n10576316 +n12072210 +n11811059 +n01824344 +n03556811 +n03175301 +n07586485 +n13137010 +n11986729 +n04967561 +n03881404 +n07692114 +n07874995 +n02770585 +n07853345 +n02775689 +n04328580 +n01323781 +n07773428 +n02414043 +n02794474 +n02352932 +n07569873 +n12374705 +n03606106 +n04267246 +n04369485 +n11934239 +n12705698 +n11841247 +n07868045 +n03525693 +n12358293 +n02937010 +n09658398 +n12711182 +n03516647 +n04591631 +n10228712 +n11930353 +n03471779 +n12594324 +n02251593 +n04455579 +n02542017 +n03381450 +n03320845 +n12364940 +n09657748 +n12412987 +n01840412 +n10570704 +n10117267 +n03251280 +n10195261 +n12178129 +n12285049 +n02177775 +n10117415 +n03707766 +n04475309 +n05604434 +n03999064 +n12127575 +n01972131 +n09793946 +n01635176 +n02791532 +n07564101 +n07876460 +n02813981 +n10764719 +n03638743 +n12761702 +n02125689 +n11657585 +n09923003 +n13069773 +n02683183 +n04324515 +n11936946 +n12862828 +n02659808 +n02619861 +n13175682 +n11648039 +n07768139 +n12512674 +n12108613 +n02947977 +n12899971 +n03845107 +n07689490 +n02081927 +n07619508 +n10248377 +n10300041 +n10761326 +n09655213 +n02675522 +n04963111 +n01995686 +n03256631 +n10684630 +n04471912 +n12728864 +n03870546 +n02829246 +n09725546 +n03409920 +n13194918 +n10055297 +n02513248 +n01462803 +n11782266 +n13094145 +n07839478 +n13916363 +n07932454 +n09722817 +n07774479 +n10386874 +n12832822 +n01599388 +n02964295 +n04349189 +n07689313 +n11653126 +n02309841 +n02064000 +n04410663 +n04562122 +n02358712 +n09901786 +n10441124 +n12882158 +n12815668 +n10159289 +n01641930 +n03315990 +n12271187 +n10277638 +n07815163 +n12903014 +n07915366 +n04412300 +n01324799 +n03408264 +n09452291 +n03019198 +n11890884 +n10355806 +n03186199 +n04013600 +n12541157 +n06259898 +n06273294 +n11946051 +n01671705 +n04415257 +n01905321 +n04050600 +n12604460 +n04051439 +n02929184 +n11765568 +n10025060 +n02396796 +n04033287 +n13027557 +n03127531 +n10308066 +n09729062 +n01593553 +n02476567 +n07609728 +n12970293 +n01419888 +n03215749 +n01684741 +n13067672 +n03870290 +n07846359 +n12961536 +n03356559 +n07727140 +n09843602 +n02378755 +n12044041 +n01977485 +n07718920 +n12060546 +n04265428 +n12237855 +n04006067 +n10227266 +n04361937 +n12134486 +n10097842 +n02264591 +n03912821 +n07594155 +n03116163 +n11771924 +n04155457 +n12394118 +n10507380 +n01844746 +n11901452 +n03024233 +n03383562 +n11806814 +n10062716 +n04204755 +n08613733 +n12907671 +n03533654 +n09826605 +n03109033 +n07606419 +n03537085 +n11615812 +n07695504 +n11694300 +n04520962 +n09971839 +n02664285 +n03402511 +n02061560 +n13133140 +n03548195 +n12877493 +n02425086 +n12845187 +n12488454 +n02975994 +n02071028 +n01457407 +n03685486 +n07605282 +n07771405 +n07827554 +n10538733 +n03438780 +n04379096 +n12686496 +n10001764 +n11848867 +n12125001 +n09886540 +n03275566 +n01442710 +n12789554 +n07858197 +n12722071 +n12868880 +n10441694 +n12409651 +n07727741 +n12289585 +n04069166 +n12686877 +n03723439 +n07815956 +n12543455 +n10778044 +n02200630 +n10074841 +n12640284 +n12589841 +n07592317 +n07866571 +n12712626 +n04228422 +n11711289 +n03590475 +n13081229 +n03045800 +n03639230 +n02874214 +n07615954 +n03204134 +n12053962 +n12769219 +n15006012 +n09873769 +n11818636 +n01959029 +n03349599 +n12227909 +n07576969 +n03638180 +n07742224 +n03390673 +n02344175 +n03770520 +n00447361 +n13235319 +n01983674 +n10061882 +n04267165 +n12493868 +n12713358 +n02930339 +n10493419 +n12918810 +n02582220 +n12248359 +n02644501 +n04596492 +n04538249 +n07905618 +n13230190 +n07808268 +n15005577 +n09351905 +n12730544 +n11937023 +n04024137 +n02238358 +n11646955 +n11618079 +n09849990 +n04060448 +n04220805 +n12725940 +n12004120 +n01484562 +n02669442 +n12132956 +n01756916 +n03980986 +n02256172 +n07716750 +n12119390 +n04047834 +n11934041 +n12828977 +n03648219 +n11873612 +n12909614 +n04397860 +n03908111 +n03261395 +n03695616 +n11668117 +n12014355 +n02896074 +n03988758 +n04426184 +n10328696 +n02477028 +n04507326 +n04320871 +n03256472 +n01919385 +n03988926 +n13182164 +n07826250 +n03207548 +n01396617 +n04369618 +n07913774 +n13229951 +n03410022 +n12728508 +n01997119 +n03598783 +n01341090 +n03879456 +n01736796 +n02864122 +n13879816 +n02684962 +n12246037 +n02433729 +n04364397 +n09881358 +n02950120 +n03326371 +n02243878 +n01790812 +n12990597 +n03330947 +n07764486 +n03332173 +n10006177 +n03347472 +n07619301 +n10106509 +n12365285 +n01732989 +n07678586 +n04098795 +n07733847 +n03994297 +n12872914 +n02762909 +n07766530 +n13198482 +n02395855 +n12273515 +n04487894 +n07847047 +n12488709 +n02859557 +n04255768 +n02360933 +n03267696 +n03152951 +n10188715 +n10520544 +n13065514 +n02900594 +n03699754 +n01319187 +n01949499 +n10417424 +n01603000 +n12062105 +n09683180 +n09863339 +n01880716 +n10702615 +n03893935 +n10495555 +n04131499 +n02957252 +n02113892 +n07724078 +n12246941 +n04303095 +n01751215 +n04213530 +n12117695 +n12418507 +n01922948 +n12131405 +n13188767 +n01481498 +n03174079 +n02407172 +n11613867 +n10152616 +n10119609 +n04158250 +n11695085 +n07855105 +n02854630 +n03768683 +n12739966 +n12266984 +n12819141 +n12732605 +n13205249 +n11917407 +n01607429 +n02694279 +n07815294 +n06614901 +n07846471 +n12119717 +n02595339 +n12366186 +n10693235 +n12263410 +n12484244 +n10337488 +n04146976 +n01469723 +n07872748 +n03238879 +n12000191 +n07846938 +n03116008 +n12139196 +n04013176 +n10317963 +n12140511 +n02065726 +n01649556 +n10316862 +n01755952 +n04385079 +n12770529 +n02814338 +n01675352 +n11874423 +n01369484 +n10537708 +n07618281 +n07821404 +n02297819 +n03238762 +n03357081 +n02628600 +n07830986 +n12507823 +n04431925 +n11955532 +n03429771 +n10281896 +n12383737 +n12760875 +n09673091 +n12892013 +n06625062 +n04503269 +n03674842 +n12338979 +n04268275 +n12033139 +n11767877 +n07812790 +n12676134 +n04037873 +n10097477 +n12310638 +n12258101 +n09391386 +n13196738 +n13866626 +n12720354 +n10106995 +n07843220 +n03878294 +n04101375 +n07733217 +n10220080 +n04601938 +n10778148 +n12973937 +n10556825 +n12256708 +n12583855 +n04259202 +n07628181 +n04226962 +n02777402 +n09674412 +n12188635 +n03776167 +n04504038 +n04156591 +n02270945 +n02264021 +n07826653 +n02980203 +n02059852 +n02102806 +n12921660 +n04477725 +n10107173 +n12837466 +n02697022 +n04350688 +n12110236 +n02177196 +n07899976 +n12639910 +n02368399 +n10009162 +n03950647 +n09248153 +n02425532 +n04044955 +n11933257 +n03460899 +n10147710 +n02379743 +n02413917 +n02890804 +n12915140 +n02146879 +n07915800 +n01787006 +n03646809 +n11677902 +n04065909 +n02088992 +n02887832 +n10115946 +n02306825 +n03719560 +n10456696 +n03758220 +n12625003 +n04021503 +n07563366 +n02531625 +n10304650 +n12855710 +n09735654 +n07853762 +n03512030 +n12898342 +n02297938 +n12618727 +n04082344 +n12953712 +n12617559 +n03035715 +n02532451 +n05399356 +n03602686 +n10082423 +n04607759 +n07581607 +n07594737 +n04030965 +n03464628 +n12103894 +n03039353 +n03522990 +n02964934 +n03169063 +n10153865 +n09653144 +n09941571 +n12907057 +n07768318 +n02600798 +n02187150 +n01811243 +n12252383 +n04495555 +n07678953 +n13181244 +n13069224 +n13184394 +n12765402 +n03471347 +n10208847 +n03697366 +n09840435 +n02506947 +n09709673 +n07928578 +n11935715 +n07848936 +n02757927 +n01999767 +n02245443 +n10260473 +n13898645 +n02701260 +n07840219 +n11785875 +n12385830 +n12017664 +n12145148 +n04530456 +n01929186 +n02384741 +n04113038 +n03296217 +n09723819 +n03766697 +n12143215 +n09929202 +n02684248 +n12119539 +n03566555 +n12941220 +n04124573 +n10750188 +n07733005 +n04230707 +n03829857 +n07756838 +n12244458 +n12543826 +n03514129 +n02762169 +n04435870 +n03342863 +n09745324 +n12369476 +n11652039 +n03915320 +n07746749 +n07608641 +n12642600 +n02389943 +n12137791 +n04111962 +n12493426 +n12454793 +n01455317 +n10728117 +n03281524 +n12195734 +n12353431 +n02477329 +n02678010 +n04557522 +n10162354 +n14942411 +n07806043 +n12274151 +n09835153 +n03983499 +n04086663 +n07851926 +n07868684 +n11926976 +n03972146 +n04310604 +n09675799 +n13880704 +n13173132 +n07577918 +n10720964 +n11937102 +n03349020 +n12340581 +n03725506 +n03477143 +n10578162 +n01731137 +n03382104 +n11616852 +n01493829 +n09327077 +n03856335 +n03321843 +n02375757 +n02118643 +n08500989 +n03496486 +n04140777 +n12858987 +n02845293 +n04093157 +n07819682 +n10394786 +n12289310 +n02901620 +n01559160 +n07919165 +n12648196 +n11774972 +n11995396 +n10543937 +n10154013 +n03977158 +n01884476 +n12266528 +n11906127 +n12661538 +n04396650 +n12761905 +n04175574 +n10181878 +n12017326 +n12876899 +n09744346 +n07741706 +n04451636 +n07735981 +n03751590 +n03140546 +n03070396 +n03091223 +n12071477 +n07562017 +n09981092 +n09847344 +n12552893 +n12371202 +n02245111 +n01598271 +n04400499 +n02298095 +n15048888 +n02967170 +n04030161 +n10676434 +n01556514 +n13235766 +n02538562 +n12603672 +n03941586 +n02449183 +n07567611 +n12923257 +n02296021 +n11730933 +n12497669 +n02917742 +n07875926 +n02714535 +n13142182 +n02878107 +n07861334 +n02682811 +n03730655 +n03681813 +n12970733 +n02132320 +n12436090 +n07931280 +n04295353 +n12982590 +n01783017 +n13164501 +n02424589 +n01499732 +n12650805 +n04543509 +n10369699 +n03439631 +n13160116 +n07831663 +n05449196 +n13025854 +n10169241 +n02847461 +n10734963 +n13213397 +n03343234 +n12275317 +n02793414 +n04300509 +n01803893 +n11617878 +n02179192 +n03637480 +n04514648 +n03087521 +n10478827 +n11757190 +n12919195 +n04532504 +n01736375 +n04015786 +n04545471 +n12668131 +n04472961 +n14786943 +n07584938 +n02498743 +n07744559 +n10010062 +n10101308 +n07832099 +n02601767 +n10473453 +n02451575 +n02496052 +n03696746 +n12669803 +n07904072 +n04290762 +n11737125 +n07760755 +n12553742 +n12068138 +n12630999 +n02390938 +n02202678 +n02216740 +n02679961 +n13173697 +n11828973 +n02287987 +n04585318 +n10360366 +n07745661 +n03474352 +n07934800 +n12677612 +n03692272 +n13092240 +n04230487 +n11846312 +n12433952 +n11793403 +n03056873 +n05454833 +n12517077 +n12682882 +n02649218 +n09425344 +n07878283 +n02795978 +n10064977 +n12754174 +n02945813 +n01750743 +n03150661 +n13880415 +n12337800 +n04017571 +n09754907 +n04456734 +n02967540 +n10621400 +n11744471 +n01971620 +n04148285 +n10781817 +n11991549 +n12305654 +n03943833 +n10330931 +n12918991 +n01783706 +n11933099 +n12931231 +n07589967 +n09666349 +n07853445 +n12714949 +n03548533 +n04158672 +n03809802 +n03080309 +n12800049 +n02578454 +n02834027 +n10067600 +n03044671 +n04198233 +n07930205 +n04357930 +n12221522 +n11957317 +n03085781 +n03723885 +n03614383 +n02661618 +n04292221 +n03426574 +n03838024 +n10442093 +n12399534 +n01450950 +n07876550 +n11937446 +n09870096 +n02631628 +n05460759 +n01710177 +n03660562 +n04283784 +n01497738 +n02232223 +n04209811 +n12837259 +n02864987 +n04499810 +n12654857 +n03493792 +n09688233 +n02312912 +n10057271 +n07606058 +n03258192 +n10507565 +n11930038 +n08679269 +n03812263 +n11662128 +n04085574 +n07643577 +n03981094 +n02796412 +n02513939 +n07686634 +n07936979 +n03168774 +n03816394 +n07625324 +n04138131 +n10383094 +n10222716 +n10381981 +n12254168 +n13223090 +n03056583 +n09910556 +n03277004 +n12649866 +n02089725 +n03688707 +n09665367 +n07849506 +n02843909 +n13141797 +n02477516 +n09710886 +n03835941 +n11734493 +n10778711 +n10007809 +n02038141 +n12766043 +n02353172 +n02030224 +n10762212 +n06274921 +n13033396 +n03560860 +n01961234 +n13868515 +n03216199 +n01553527 +n04429038 +n10211036 +n02150885 +n02435517 +n02755675 +n09699020 +n12566331 +n03909516 +n02903727 +n02594942 +n04173172 +n04125692 +n12251001 +n02412787 +n01649412 +n01411450 +n01774097 +n09912907 +n03162556 +n07566231 +n12267534 +n03928589 +n04142327 +n11771147 +n07832592 +n04155177 +n07937621 +n07839864 +n03201895 +n13095013 +n10298271 +n03059103 +n03784793 +n11925450 +n03288742 +n02809364 +n04108999 +n04449449 +n03726233 +n07854455 +n03692136 +n12018447 +n03374282 +n06008896 +n07598928 +n03577312 +n04604806 +n09892513 +n04370600 +n08238463 +n01793159 +n07822687 +n03242390 +n07685303 +n03822361 +n01996280 +n10505942 +n06596845 +n04219580 +n12056990 +n10579062 +n10240082 +n10298202 +n07711907 +n03905730 +n12222900 +n07598622 +n04415815 +n12389932 +n12154114 +n04210012 +n12500751 +n03729402 +n12122918 +n04572121 +n12804352 +n02415130 +n12780325 +n11639084 +n12768933 +n02253494 +n13217005 +n03567788 +n12304286 +n10703480 +n07766723 +n05455113 +n07741804 +n12186839 +n01687128 +n01350701 +n03260206 +n07876026 +n12528382 +n04125541 +n10457444 +n01606097 +n11717399 +n04598416 +n12899166 +n09748101 +n12160125 +n07608980 +n07843348 +n02409038 +n02571167 +n09980805 +n09706029 +n02495242 +n12765846 +n10373525 +n12321873 +n03047171 +n12365462 +n03752398 +n02662993 +n10316527 +n10728233 +n06273207 +n01733214 +n12297846 +n12755876 +n02428842 +n02289307 +n04536465 +n03253187 +n02297294 +n05584746 +n03117642 +n12189779 +n10338231 +n07599649 +n04559994 +n12710917 +n09966470 +n12470907 +n04499300 +n12403075 +n11837743 +n02269657 +n12599185 +n07618587 +n03996004 +n12851094 +n03392648 +n01319001 +n12826143 +n12369845 +n01814549 +n10056103 +n12854193 +n02267483 +n04019881 +n03490649 +n04268142 +n10801802 +n12315060 +n10149436 +n04563790 +n09865068 +n03000530 +n10657556 +n07840672 +n12118414 +n02856013 +n02900459 +n04094859 +n12079523 +n11827541 +n12236160 +n02904505 +n02846619 +n09842823 +n12926039 +n02146201 +n03195799 +n12815838 +n09899289 +n01483021 +n02519340 +n05453815 +n10329035 +n02494383 +n09742927 +n13220355 +n03212406 +n11759609 +n10061431 +n12095281 +n04262530 +n03799240 +n02426176 +n04608809 +n12230540 +n13880551 +n11741175 +n11858814 +n11723452 +n07590841 +n12604845 +n10342543 +n12760539 +n09270657 +n02563079 +n10643937 +n12843316 +n01651641 +n07838811 +n04359034 +n07758260 +n02762725 +n11726433 +n03114743 +n01952029 +n12321395 +n11930571 +n12337922 +n12427946 +n12001294 +n12551457 +n13235011 +n02290340 +n06419354 +n12408873 +n01741442 +n12308447 +n10243872 +n03658635 +n03694761 +n02570484 +n12912801 +n04158002 +n02417785 +n01332181 +n03703075 +n10283366 +n03142431 +n02779609 +n02300554 +n09868782 +n10323752 +n03166809 +n03394149 +n02827148 +n02186717 +n01350226 +n03344784 +n03555996 +n04498873 +n13157481 +n04519887 +n12028424 +n12349711 +n10471640 +n07741235 +n04032936 +n12357968 +n10228592 +n13178284 +n04168840 +n13239177 +n03561573 +n02566489 +n11807696 +n07681264 +n02566665 +n10456070 +n10063919 +n10492727 +n01788579 +n11977660 +n02036228 +n02738978 +n03989349 +n10332953 +n12949361 +n09901502 +n07839730 +n13146928 +n10152306 +n04170515 +n11602478 +n02522722 +n01333610 +n13030852 +n02143891 +n12807624 +n04542329 +n12243693 +n12036226 +n13917690 +n02553028 +n02752199 +n10594857 +n11627714 +n04348070 +n13171797 +n04612257 +n07934373 +n04536765 +n02244515 +n04526800 +n04546595 +n02551668 +n12143405 +n07871588 +n07858484 +n03628728 +n13179804 +n03242264 +n12089846 +n07588688 +n07620047 +n01647466 +n09685233 +n03467254 +n12666369 +n05449661 +n10694939 +n12886600 +n12256522 +n04006330 +n03317673 +n04316815 +n12222090 +n04022866 +n04088441 +n07617526 +n10782362 +n04355821 +n13901490 +n12508618 +n03849943 +n04503499 +n13193466 +n09754633 +n07583978 +n13911045 +n07643679 +n12054195 +n10692090 +n04032509 +n10146927 +n02031298 +n04002629 +n04035748 +n10712229 +n02866106 +n07909504 +n04540397 +n06266878 +n10219879 +n12567950 +n07853648 +n03191561 +n07856045 +n12646197 +n03317510 +n10515863 +n13198054 +n02808829 +n12889579 +n02698473 +n09924437 +n03595055 +n12306270 +n07857356 +n09715303 +n03024518 +n04323519 +n09629065 +n04178668 +n12748248 +n02308618 +n07873198 +n10564098 +n03007297 +n04036155 +n02143439 +n10507482 +n12267931 +n03956331 +n12888234 +n04066476 +n07813107 +n02736396 +n10306496 +n12324388 +n01744555 +n01649726 +n06596179 +n03616091 +n07754279 +n02072493 +n12408280 +n04314632 +n02412700 +n04030846 +n09833997 +n03599964 +n05258627 +n12572759 +n12136581 +n02419056 +n12453714 +n11652217 +n03878511 +n03907908 +n12223160 +n10514121 +n04153330 +n12163279 +n12623818 +n03495671 +n13222985 +n10354754 +n04365112 +n12384680 +n12538209 +n03105214 +n12534862 +n13869045 +n03945928 +n11613692 +n11892181 +n13002209 +n02685253 +n07598529 +n02629716 +n13202355 +n07927070 +n02176916 +n04370955 +n11988132 +n03246197 +n01440467 +n07620145 +n03940894 +n01897667 +n03408340 +n12602612 +n02539424 +n03863657 +n04559620 +n02604480 +n11822300 +n03518829 +n11619845 +n10504090 +n03341035 +n02908123 +n04281998 +n03277602 +n03865288 +n10074578 +n13902793 +n03054605 +n04404200 +n12786836 +n12235051 +n04035231 +n12009792 +n12705458 +n04378489 +n02476870 +n11954798 +n03573848 +n02087314 +n03162460 +n04363412 +n02261063 +n09953615 +n01947139 +n03044801 +n04287351 +n04479287 +n03861596 +n12510343 +n07854066 +n03027505 +n12161577 +n04197878 +n01812187 +n10015792 +n08685188 +n11737009 +n10333044 +n02730568 +n10290813 +n13096779 +n05257476 +n07917951 +n12121187 +n03517509 +n07932762 +n02336275 +n12159942 +n12105981 +n02562971 +n13882961 +n12016777 +n02793684 +n12717644 +n01380754 +n07724173 +n04055861 +n11831297 +n03059934 +n03370646 +n10065758 +n09459979 +n07913644 +n04322531 +n03457451 +n02567633 +n04240867 +n10693334 +n10556704 +n04614844 +n07909362 +n12082131 +n09268007 +n04359217 +n09883807 +n02292085 +n04052346 +n03431570 +n02843465 +n04584056 +n04432043 +n09846142 +n07864317 +n04475749 +n04227050 +n04280845 +n03535284 +n07890617 +n03217889 +n02806762 +n11967315 +n11762927 +n02501923 +n03442487 +n09690083 +n02964634 +n02920164 +n07855317 +n10196725 +n03042829 +n11662937 +n12183816 +n12311224 +n13884261 +n02243209 +n03140771 +n02385002 +n03071288 +n12936826 +n04583022 +n07859142 +n04578112 +n04467506 +n12938081 +n09982152 +n12555255 +n03335333 +n10104888 +n12151170 +n12709349 +n10456138 +n02237868 +n07620327 +n12561309 +n12341931 +n12350032 +n01775730 +n12950796 +n01440242 +n04261767 +n10568915 +n12285195 +n07589872 +n13112035 +n07840395 +n11750508 +n12286197 +n03336168 +n03325288 +n02551134 +n04293258 +n13130014 +n07733124 +n04451139 +n11985903 +n03602365 +n11722342 +n11944751 +n12897999 +n02277422 +n03101302 +n07608245 +n03531982 +n01997825 +n11713370 +n04442582 +n02833403 +n04427857 +n01648356 +n10645223 +n10414865 +n10696101 +n12885045 +n10037080 +n12218274 +n07570530 +n04493259 +n10659042 +n10577710 +n03141612 +n10582604 +n00446632 +n02834642 +n07568389 +n04583888 +n04096848 +n12879068 +n04495051 +n09837459 +n12216215 +n03702440 +n10174695 +n10559009 +n10577182 +n07686299 +n04269668 +n02404028 +n03720665 +n09885866 +n03082450 +n12492682 +n12780563 +n03703463 +n02644360 +n02307910 +n01374703 +n04402342 +n04264134 +n03158414 +n04443433 +n12522894 +n10803978 +n11706942 +n10751026 +n13143758 +n02972934 +n04174234 +n12718995 +n11994150 +n11545350 +n12526754 +n07753448 +n02870772 +n11942659 +n11744108 +n12735160 +n12229887 +n04970312 +n02874336 +n10721819 +n13193269 +n03330665 +n09865162 +n10306595 +n12161744 +n03303669 +n07846688 +n02168427 +n01961600 +n03559531 +n09826821 +n03413124 +n09695019 +n03783873 +n11863877 +n13874558 +n02283617 +n11895472 +n13182799 +n07854614 +n03283827 +n01397690 +n02650413 +n09809279 +n10290541 +n10383505 +n11724660 +n07689757 +n10181547 +n07620597 +n11979354 +n02771547 +n13061471 +n12631637 +n11966385 +n03969510 +n11735977 +n07621497 +n12956588 +n03217653 +n04546081 +n11696450 +n10300654 +n02032769 +n01654863 +n09779280 +n02390258 +n03887512 +n10489426 +n10745770 +n10713843 +n03602194 +n10710913 +n07864475 +n04486322 +n07915213 +n08663051 +n10236842 +n02390738 +n02388453 +n03598385 +n12228689 +n11771746 +n12803226 +n11242849 +n02378149 +n10427223 +n05448827 +n11870044 +n12477983 +n12311413 +n03500090 +n10280034 +n02685365 +n03652389 +n12728656 +n07695284 +n09961198 +n03780799 +n03935883 +n01612955 +n12475774 +n02701730 +n07833535 +n12584365 +n03902220 +n12727960 +n10619492 +n04450465 +n10646780 +n10110731 +n04142175 +n12296735 +n09337048 +n12681579 +n12819354 +n12541403 +n04305016 +n12798910 +n10321126 +n08618831 +n09721244 +n02225798 +n01637338 +n12218868 +n05545879 +n12022382 +n03972372 +n02505063 +n01694311 +n10695450 +n10081842 +n12297507 +n07592922 +n12118661 +n01952712 +n10517137 +n01340522 +n07719330 +n03729482 +n04168541 +n03090710 +n07873679 +n07828378 +n07728284 +n10343088 +n07869937 +n14585392 +n01453475 +n12095412 +n04973020 +n12810007 +n07564515 +n01599741 +n11629047 +n09937802 +n12450607 +n12460146 +n02292401 +n03632963 +n09617696 +n12545232 +n02874642 +n09934488 +n10091349 +n01447946 +n05469861 +n11830400 +n03382533 +n02608547 +n12697152 +n03542727 +n10716576 +n03664159 +n07568625 +n02976815 +n13147532 +n02336826 +n12432574 +n07686461 +n04107598 +n02505998 +n09849167 +n03688066 +n02836513 +n01576358 +n01893021 +n12017511 +n12065649 +n01714231 +n11662585 +n12827907 +n12954353 +n11936199 +n01368672 +n03843883 +n12184095 +n10058411 +n11684654 +n08506347 +n10579549 +n01423302 +n11604046 +n07613158 +n03605504 +n02090129 +n02284224 +n01958435 +n12664469 +n04459122 +n09617161 +n09780828 +n11830252 +n12870048 +n04247544 +n09871095 +n02962938 +n09933020 +n13064457 +n10341243 +n07694169 +n13200193 +n07765728 +n01524761 +n07730562 +n07751737 +n07740855 +n04192521 +n12593122 +n07841037 +n02809736 +n10604275 +n12512095 +n01907287 +n04592596 +n09823153 +n03181667 +n12449784 +n07908923 +n12365900 +n03053976 +n15060688 +n04165675 +n02530637 +n09816654 +n12540966 +n07934152 +n09290350 +n03455802 +n10111779 +n01351315 +n10281770 +n13862552 +n12435486 +n12370174 +n12296045 +n03493219 +n12363301 +n11973749 +n03939565 +n02938321 +n13209270 +n12604639 +n12657755 +n03604536 +n10328941 +n04278932 +n10376890 +n01884203 +n02061853 +n04256318 +n07831821 +n10585217 +n07591813 +n10210648 +n07739035 +n01632308 +n10319313 +n02861777 +n03821145 +n13029610 +n04239900 +n10313441 +n04951716 +n10628097 +n02368116 +n08571275 +n04433377 +n10458596 +n12435965 +n12448136 +n12129986 +n04295777 +n07898895 +n07854266 +n12327846 +n12318782 +n07825850 +n10414239 +n11731157 +n04409911 +n10655442 +n11829205 +n01738306 +n02840515 +n04150371 +n03369512 +n02645538 +n12773917 +n07818422 +n03227010 +n10303037 +n12942025 +n12406304 +n06616216 +n02435216 +n12981954 +n03683341 +n09703809 +n07722666 +n11817160 +n10110893 +n10228468 +n03572631 +n01378545 +n02130086 +n04388574 +n11960673 +n12956922 +n11924014 +n09895902 +n03426462 +n07759576 +n02563949 +n03466947 +n02522637 +n09480959 +n02033882 +n02451415 +n12677120 +n10580437 +n04425977 +n03057841 +n12285512 +n07614348 +n03144873 +n03391613 +n12366870 +n02304657 +n07863935 +n07909714 +n02413717 +n12591702 +n07838659 +n02967407 +n12016914 +n02735268 +n09470027 +n10222259 +n03899100 +n10513509 +n11620016 +n12600267 +n04368840 +n03016209 +n04085017 +n03215076 +n10238272 +n09782855 +n07586179 +n12434483 +n12452480 +n01990516 +n12030092 +n11739978 +n12714254 +n13036804 +n07727377 +n07879560 +n03710421 +n12128490 +n11968519 +n03250588 +n10173579 +n03114041 +n02942015 +n12729164 +n07871065 +n02591330 +n09353815 +n10138472 +n02712545 +n12866333 +n07835823 +n03508485 +n01758895 +n02925385 +n03321419 +n09931418 +n02846874 +n12500518 +n07587819 +n03160186 +n04974340 +n13067532 +n11940349 +n13027879 +n02878534 +n10055566 +n07925708 +n12628356 +n11958499 +n03472672 +n04233295 +n04563020 +n03426871 +n04330109 +n03677682 +n04129766 +n02884859 +n12692521 +n10188856 +n03500971 +n10355306 +n12407545 +n11955040 +n10028541 +n10345659 +n14720833 +n09641578 +n12613706 +n11718296 +n03380301 +n01334217 +n03890358 +n03583419 +n12447121 +n09660010 +n11826569 +n11837351 +n12096089 +n03871860 +n01821554 +n12834938 +n02738449 +n02644665 +n03316873 +n12548564 +n03605417 +n12094401 +n13152339 +n03004531 +n03080904 +n03535647 +n12349315 +n04213264 +n07860208 +n01526766 +n03710937 +n11806521 +n10618234 +n12306938 +n10473562 +n10050880 +n04596116 +n02577164 +n04479694 +n07936093 +n07834286 +n12175181 +n03986857 +n02919648 +n12055073 +n04567593 +n07585015 +n12771085 +n10551576 +n09778783 +n01593282 +n02406952 +n12331263 +n10629329 +n12287195 +n07729225 +n07828041 +n01880473 +n12257725 +n02696246 +n07853232 +n11936864 +n09745229 +n03364156 +n04503155 +n03194297 +n04003359 +n07607361 +n10106387 +n10306890 +n10455619 +n01647180 +n07740115 +n12106323 +n03626272 +n11685621 +n11866706 +n04321121 +n01606978 +n12621619 +n11615259 +n07840304 +n02841847 +n05459769 +n03432360 +n04604276 +n12356395 +n12468545 +n03645168 +n00477827 +n03459591 +n04202142 +n12959074 +n07881625 +n12382233 +n02405692 +n12299640 +n12247202 +n12628705 +n12534625 +n09264803 +n12176953 +n09835017 +n10390807 +n04975739 +n12474418 +n11931135 +n07917791 +n10636488 +n09690496 +n11993675 +n03703203 +n11794139 +n13015688 +n04168084 +n01948446 +n10169419 +n04455048 +n04973669 +n12840502 +n12120578 +n10448455 +n01386007 +n02288122 +n01441910 +n02278463 +n03108759 +n02753710 +n03143400 +n13080866 +n13917785 +n13124358 +n13220663 +n02475358 +n01925916 +n02684649 +n10451590 +n03869976 +n03881305 +n07928264 +n01422185 +n04035634 +n11996677 +n04261369 +n12925583 +n12764008 +n09972587 +n03708962 +n01791388 +n02892626 +n04098399 +n07823369 +n07752874 +n13225244 +n03376771 +n01771766 +n13146403 +n12157179 +n13897198 +n07770869 +n13240362 +n07610502 +n03688504 +n02896856 +n12543186 +n09967063 +n05453412 +n12590600 +n02378870 +n07568241 +n01687290 +n00474769 +n11694866 +n02338722 +n02637977 +n04567746 +n10586444 +n11907405 +n03421960 +n07605693 +n10384214 +n12877637 +n12018363 +n10056611 +n13882487 +n12140759 +n04114301 +n11762018 +n12678794 +n11817501 +n02116450 +n12018530 +n03324629 +n12726528 +n03155502 +n10493199 +n04181083 +n10609198 +n04328703 +n03045074 +n07769886 +n01892385 +n12828520 +n03165211 +n11800565 +n07567139 +n13877547 +n12829582 +n02949084 +n07589724 +n01746191 +n12395463 +n05459457 +n10565502 +n11981475 +n09310616 +n12327022 +n02313709 +n12957803 +n11865276 +n12955414 +n12939479 +n13225365 +n07936459 +n03139089 +n07577772 +n12057895 +n03620353 +n12152031 +n01885158 +n04096733 +n12626674 +n10464711 +n10675609 +n07752782 +n03709960 +n02540983 +n02285179 +n01903234 +n07835701 +n04421083 +n02352290 +n09421031 +n03349367 +n02539894 +n04052235 +n07922955 +n03941887 +n04234260 +n04423552 +n11975254 +n08501887 +n12489676 +n04574348 +n10602119 +n02163008 +n02748491 +n10024937 +n10033888 +n12605683 +n01790398 +n10128519 +n14977188 +n10293590 +n12077244 +n09741074 +n11694469 +n12692714 +n12159804 +n12533437 +n03831203 +n03692004 +n09462600 +n04537436 +n06618653 +n07913537 +n12783316 +n10038119 +n10236521 +n01486540 +n07875267 +n04345787 +n07681355 +n13028937 +n03607186 +n07863107 +n12387103 +n09830926 +n03574416 +n04478383 +n11685091 +n03197446 +n03225458 +n09741722 +n07736527 +n02857907 +n10177150 +n12711398 +n10308275 +n02418770 +n02577662 +n09935107 +n03362639 +n12446908 +n04329681 +n04114428 +n09624899 +n12913144 +n12338034 +n02341616 +n12360817 +n12907857 +n02414904 +n05482922 +n11974888 +n04127117 +n12581110 +n04368365 +n01699254 +n12525753 +n04254450 +n11951052 +n12458874 +n12721477 +n07562651 +n02239192 +n10533874 +n12006306 +n09537660 +n10008123 +n02788386 +n03248835 +n04491312 +n11795580 +n04025633 +n10166189 +n07703889 +n11824747 +n07605198 +n12134836 +n03591116 +n02946753 +n13212025 +n11742310 +n02328820 +n02985606 +n09955944 +n12679432 +n10020366 +n12013035 +n02942147 +n04172512 +n11802410 +n10789709 +n03385295 +n02039497 +n01416213 +n11940750 +n12178780 +n01967963 +n12662379 +n12217851 +n02812631 +n12432069 +n09991740 +n03089477 +n12458713 +n03876111 +n10311661 +n12286068 +n02838958 +n11936369 +n03716228 +n13228017 +n06276902 +n12677331 +n04330189 +n10488016 +n12011370 +n04343740 +n07893792 +n02171164 +n03963483 +n12080588 +n07577657 +n12936155 +n03809686 +n04223066 +n04086066 +n12776558 +n07813579 +n01841943 +n12285705 +n02581482 +n11653570 +n10010632 +n04305947 +n12228886 +n12797368 +n01404495 +n09697986 +n11882237 +n10077879 +n07607832 +n09779461 +n13212379 +n10769188 +n10715789 +n01480106 +n02145910 +n04275093 +n01983829 +n01978010 +n09937903 +n11976314 +n11785276 +n12386945 +n04445782 +n10712374 +n10706812 +n10194775 +n12655062 +n10739135 +n02597972 +n02307176 +n04121342 +n02350670 +n12698027 +n02805845 +n02895008 +n13149970 +n03451365 +n04542595 +n07803895 +n07864198 +n09690864 +n03844550 +n12378249 +n10345422 +n13163553 +n10457903 +n10783539 +n10539015 +n11757017 +n10274173 +n08652376 +n10283546 +n04541777 +n02824152 +n12945177 +n02082056 +n03695957 +n07936015 +n07591162 +n03628071 +n02990758 +n07685118 +n04023422 +n04951875 +n03541393 +n10289176 +n04039209 +n07913180 +n07910799 +n12017853 +n03732543 +n10656120 +n10512859 +n04556664 +n12464649 +n12927758 +n12078451 +n07878145 +n10561320 +n12467592 +n07689217 +n07619881 +n11935187 +n09837720 +n03642144 +n12220019 +n02983507 +n03271260 +n02778588 +n10193650 +n01654083 +n02746978 +n10202763 +n02953552 +n07924366 +n08583554 +n02905886 +n07855603 +n09745834 +n12366053 +n04140539 +n03383211 +n11648268 +n03352961 +n12116734 +n07771539 +n07836077 +n03842754 +n11683838 +n03004409 +n11730750 +n13098962 +n12292463 +n02867592 +n01653026 +n07583865 +n12548804 +n12702124 +n03917048 +n12677841 +n12511488 +n04217387 +n12495670 +n03554375 +n12403513 +n08558770 +n02781764 +n12339526 +n12742290 +n01404365 +n03591798 +n12446737 +n10494195 +n12110352 +n01672611 +n10493922 +n03638623 +n09910840 +n02238594 +n02575325 +n13186546 +n11873182 +n10344774 +n04094060 +n10417682 +n02749169 +n02428089 +n04549721 +n03824284 +n12107002 +n12784371 +n09986904 +n01634227 +n07826544 +n12253487 +n01679005 +n12516165 +n09339810 +n03126090 +n07803408 +n11883945 +n03842276 +n03397412 +n03280216 +n12264786 +n02545841 +n11877860 +n01830479 +n13207923 +n12490490 +n02542958 +n04114719 +n12590715 +n13226320 +n11644872 +n04119630 +n10176913 +n04213105 +n11652966 +n12546420 +n12625823 +n11897466 +n02092173 +n10567613 +n04953678 +n10059067 +n12408466 +n03056288 +n13036116 +n04169597 +n12467197 +n02569905 +n02758490 +n12623211 +n04077889 +n04959061 +n04183957 +n11689815 +n03777126 +n03306869 +n07720084 +n02659478 +n12947756 +n04341288 +n04448185 +n04037076 +n09828988 +n03346289 +n04174705 +n13126050 +n04255346 +n09764732 +n11773628 +n14891255 +n04314107 +n02184720 +n02646892 +n04320598 +n01979526 +n03191451 +n03662452 +n10290422 +n01739094 +n02305636 +n04202282 +n05459101 +n02766168 +n09994808 +n03528100 +n10475940 +n03005619 +n12639168 +n02144936 +n13202125 +n10703221 +n03770834 +n12324056 +n03474167 +n02609302 +n12166929 +n12852570 +n12920719 +n12508762 +n11983375 +n01422450 +n12616630 +n09681107 +n10486561 +n13038577 +n12266644 +n02478875 +n02547014 +n02249809 +n03336742 +n12038760 +n01672432 +n09861287 +n03678879 +n01949973 +n09928845 +n02310149 +n12648693 +n10533983 +n12812801 +n04550676 +n01800633 +n12128306 +n12744142 +n13140367 +n07803213 +n07688265 +n13068434 +n02030568 +n12955840 +n01625121 +n13215258 +n04270576 +n02680638 +n02817251 +n01539272 +n04066023 +n12969927 +n10280598 +n04001661 +n09774167 +n10358575 +n01836673 +n02290664 +n09940725 +n12447581 +n07803779 +n04561965 +n10151261 +n01538362 +n10170060 +n13160365 +n09823287 +n12554729 +n10620212 +n11935027 +n03465605 +n03227856 +n08519299 +n07785487 +n03522863 +n02861286 +n12200905 +n04269502 +n02104184 +n07612273 +n01390763 +n11872658 +n12981086 +n10244359 +n01738731 +n12117235 +n12846690 +n02861658 +n08782627 +n09832633 +n02531114 +n01394492 +n03269073 +n03077442 +n09794668 +n13884384 +n08659331 +n02556373 +n02587877 +n03523506 +n03723153 +n12024805 +n13061172 +n03978575 +n07914686 +n13134844 +n12183026 +n03573574 +n03765128 +n03319167 +n01920438 +n07852452 +n07680655 +n03017698 +n12959538 +n04261506 +n01793340 +n03292362 +n12817855 +n03593222 +n01962506 +n12453018 +n04027367 +n12518481 +n09223487 +n07871335 +n03779246 +n09668562 +n01889849 +n02492356 +n07830841 +n03277149 +n09968652 +n03092476 +n10400205 +n06263202 +n07595368 +n12767208 +n02196896 +n12580012 +n10265801 +n02103181 +n02922461 +n01731277 +n12422559 +n04278605 +n02250280 +n03283413 +n11829922 +n10191613 +n02493224 +n04427559 +n12181352 +n12742878 +n10683675 +n04503705 +n03785142 +n12816942 +n10723230 +n11936707 +n12360534 +n12909759 +n03766218 +n02696843 +n11935877 +n07828156 +n10617397 +n12921499 +n13158714 +n10166394 +n12370549 +n03505015 +n12769065 +n02636550 +n10781236 +n09869317 +n10275249 +n04234763 +n10735173 +n13137225 +n02070776 +n04232312 +n07575226 +n03471030 +n07909954 +n02633677 +n01662060 +n07563642 +n04263950 +n11824344 +n13178707 +n02972714 +n10417288 +n12092930 +n11993203 +n10170681 +n03726116 +n03215337 +n12564613 +n14975598 +n07758125 +n03123666 +n07717714 +n01421333 +n02359667 +n09403086 +n03857026 +n12759668 +n02628259 +n02307515 +n12146488 +n09777870 +n07819303 +n12105353 +n10784113 +n11802995 +n12561594 +n02845130 +n12100187 +n03507658 +n02141611 +n01800195 +n03470005 +n12444898 +n02203592 +n09707061 +n00475142 +n12216628 +n01732093 +n02581642 +n03803780 +n12114590 +n04541662 +n12267133 +n11652753 +n07859951 +n04524594 +n12843144 +n04040540 +n10604880 +n12559044 +n03063834 +n12394328 +n12704513 +n10230216 +n10756641 +n02101670 +n12309630 +n03070587 +n11626010 +n04239639 +n01638329 +n01928517 +n13144084 +n10420649 +n03102516 +n12395289 +n09833111 +n01651285 +n11688069 +n12881913 +n12783730 +n07716649 +n03618678 +n10344203 +n03626502 +n10718665 +n03577474 +n01683201 +n03246653 +n12153224 +n02519472 +n02470709 +n15090238 +n03129636 +n07774295 +n04577567 +n09995829 +n09662038 +n10297367 +n03555862 +n12531727 +n09947127 +n12533190 +n04062807 +n00479734 +n12860978 +n01884104 +n09866559 +n12069009 +n04595501 +n12088495 +n02909053 +n12283790 +n02180427 +n10697282 +n07562881 +n13092078 +n11706325 +n01746952 +n01978136 +n07731436 +n02386746 +n12648424 +n12726357 +n10314182 +n07839172 +n11753562 +n12903503 +n12589687 +n02375438 +n03604763 +n11549895 +n13202602 +n12304420 +n10738215 +n12220829 +n10095420 +n12177455 +n11887476 +n04006411 +n09838370 +n02853218 +n12688372 +n03335461 +n02800940 +n03036701 +n09885059 +n10206629 +n11922926 +n01678657 +n12192132 +n12248141 +n03108624 +n01936671 +n02417242 +n03222857 +n03768823 +n04343511 +n03538817 +n12655726 +n12521186 +n01330497 +n12767423 +n12965951 +n09695132 +n04410886 +n12599874 +n07865700 +n07596160 +n10227698 +n03224490 +n11598886 +n02948293 +n09906293 +n12247963 +n03301175 +n03895170 +n04259468 +n07808806 +n13147689 +n09856827 +n13882639 +n02241008 +n03842585 +n02883101 +n12182276 +n13918717 +n12728164 +n10634464 +n02477187 +n03107716 +n02342250 +n01479213 +n12793695 +n09808080 +n10707707 +n04161010 +n02836607 +n10076483 +n07726386 +n03872273 +n10250712 +n07688412 +n13884930 +n12301766 +n10196404 +n07591330 +n03814727 +n09610255 +n12757115 +n09814381 +n02397987 +n07886317 +n03959123 +n02185167 +n03533845 +n11838413 +n10227393 +n07704305 +n03580615 +n02663485 +n10101981 +n04346855 +n10067011 +n04464125 +n02829510 +n10007995 +n07845775 +n03004713 +n02450561 +n09905530 +n10361060 +n12394638 +n12095934 +n10479135 +n03145277 +n12246773 +n13194212 +n04475900 +n03252787 +n14867545 +n10485298 +n09961739 +n02149653 +n01553762 +n03931980 +n02344408 +n11676850 +n04034367 +n04235646 +n12867184 +n12625670 +n12763529 +n07593107 +n04351550 +n02571810 +n13899735 +n03652826 +n09495962 +n03421768 +n04205062 +n11918808 +n07745197 +n07752264 +n01892744 +n04609811 +n10278456 +n11790936 +n09754152 +n13234519 +n09820044 +n00440643 +n02350357 +n03779884 +n07803992 +n03305953 +n01836087 +n10068234 +n10690421 +n03134394 +n12380761 +n12801966 +n03134232 +n02596720 +n07591236 +n11882821 +n02312175 +n02387983 +n01912152 +n10805501 +n12718074 +n03188290 +n02776505 +n10528148 +n09971385 +n10524223 +n09958292 +n02721813 +n10300829 +n12007766 +n12107191 +n04449700 +n02987950 +n11878633 +n12328801 +n04551833 +n10567722 +n11654984 +n02808968 +n12066451 +n02964075 +n11633284 +n02434712 +n03070854 +n07926540 +n01543936 +n10091861 +n09938080 +n11976511 +n03342432 +n12886831 +n12509993 +n12958261 +n12730776 +n10066206 +n07846014 +n13176714 +n03332591 +n04607640 +n02513727 +n12138248 +n11964848 +n01318053 +n10553140 +n07839055 +n02632039 +n11865429 +n02286654 +n02367812 +n12093885 +n10774329 +n02296912 +n01729672 +n10353928 +n12033504 +n11936113 +n03263338 +n07822053 +n09737050 +n13875884 +n13212559 +n11690088 +n05468739 +n09344724 +n02507148 +n01377694 +n04172607 +n10464870 +n07804152 +n02825872 +n03139640 +n11858703 +n10227490 +n12334153 +n03616225 +n12018188 +n12399656 +n10235269 +n11840764 +n01995514 +n03326475 +n12704041 +n10684827 +n03006788 +n13906484 +n02868240 +n03614887 +n03491724 +n12124172 +n03675907 +n13170840 +n03983712 +n03254737 +n07836269 +n01784293 +n02095212 +n12470512 +n12219668 +n12920521 +n04492157 +n02950018 +n01922717 +n11797981 +n12601805 +n02744961 +n07814925 +n09798096 +n03939062 +n13891547 +n07564292 +n01590220 +n09295210 +n03997875 +n03479266 +n01491661 +n03781055 +n12528768 +n10657306 +n12014923 +n10094320 +n02532272 +n02224023 +n04541136 +n12067672 +n02661473 +n04233027 +n12399899 +n12889412 +n01736032 +n12551173 +n01337734 +n10104487 +n02921592 +n02148512 +n10216403 +n03276839 +n01781570 +n03999621 +n02505238 +n12537569 +n10433452 +n02351343 +n12365158 +n08539276 +n01897257 +n12221801 +n10557246 +n10437698 +n01803641 +n11836327 +n07813833 +n03468570 +n06277025 +n10040240 +n03692842 +n03017835 +n01881564 +n10487363 +n07937069 +n10597505 +n01638722 +n10160412 +n09825096 +n12611640 +n03098515 +n10654211 +n13196234 +n03436990 +n04058486 +n09814567 +n10758337 +n03515934 +n07688757 +n10269199 +n12627347 +n04521571 +n01636510 +n03220095 +n09982525 +n12768809 +n02340930 +n02473857 +n12336586 +n12125584 +n02833040 +n02498153 +n01467804 +n12120347 +n11650430 +n11953339 +n12592058 +n05102764 +n10575594 +n09722064 +n01966586 +n10619888 +n07852376 +n12650915 +n10321882 +n11974557 +n09847267 +n13201423 +n12337131 +n13185658 +n02150134 +n10538853 +n10471732 +n07836600 +n03526062 +n02512752 +n04232437 +n03367321 +n04308915 +n07600895 +n11539289 +n03539293 +n12699922 +n07817599 +n02781213 +n03594010 +n12035907 +n04075813 +n05233741 +n07863229 +n10735984 +n12095543 +n12272735 +n04229620 +n12240965 +n07768590 +n04420024 +n12111627 +n02861509 +n02595056 +n12183452 +n04607982 +n13213577 +n07741888 +n03750614 +n10043024 +n03372933 +n10051861 +n10199251 +n03249956 +n03984125 +n02956393 +n11619687 +n03356279 +n07833951 +n10715030 +n02340358 +n10768272 +n01494041 +n02592734 +n03323319 +n02136285 +n03995661 +n09945223 +n03547397 +n10044682 +n12878784 +n02803809 +n13160254 +n12726902 +n12196954 +n03161016 +n03105645 +n04218921 +n09493983 +n10719036 +n12263588 +n12565102 +n10684146 +n03148518 +n04287986 +n02340640 +n04331443 +n10727016 +n03369407 +n07824863 +n07844786 +n12467433 +n07582811 +n02964196 +n02197877 +n10758445 +n03271376 +n13212175 +n03260504 +n12777778 +n11973634 +n05467054 +n11946313 +n02462213 +n13906669 +n10520286 +n02074726 +n01771100 +n13880199 +n09811568 +n13883763 +n02334728 +n11831100 +n12025220 +n12751172 +n03858837 +n10127186 +n12831535 +n07823591 +n02513805 +n03662301 +n09913329 +n02749670 +n10655986 +n01787191 +n03199488 +n12732252 +n12253664 +n07735294 +n03440876 +n09650839 +n03844965 +n10341446 +n12688187 +n12961242 +n03423224 +n13157346 +n09802951 +n11948044 +n03489048 +n12279060 +n03664840 +n03731882 +n07742605 +n07870734 +n03949761 +n10759331 +n07739923 +n02737351 +n01788291 +n11780424 +n03722646 +n12297110 +n12363768 +n04495310 +n10008254 +n03934890 +n01318478 +n03609959 +n10070377 +n04123228 +n13068735 +n02909706 +n10671042 +n10491998 +n07650792 +n12664710 +n10213034 +n03455642 +n10411867 +n09903936 +n10121800 +n02622955 +n03647423 +n07596566 +n09654898 +n12248780 +n02684515 +n04255670 +n06273890 +n03495941 +n12960552 +n09724234 +n03861048 +n03293095 +n11835251 +n12852428 +n04084517 +n01814620 +n13159890 +n03147156 +n02311748 +n10237799 +n07584859 +n01946827 +n09651968 +n12241192 +n03669245 +n07858336 +n11932927 +n04444218 +n10526534 +n03642573 +n09470222 +n10731732 +n12001924 +n03786096 +n01359762 +n03824999 +n13877667 +n10591811 +n10574311 +n03275125 +n11631985 +n10539160 +n10502950 +n12499757 +n12432707 +n12068615 +n07689624 +n02610373 +n03204436 +n13051346 +n13134531 +n07610890 +n04021164 +n03502897 +n02299378 +n10417843 +n10050043 +n07929940 +n02593453 +n10577820 +n12870225 +n03333851 +n09463226 +n11741575 +n09193551 +n12012510 +n11987349 +n09215023 +n07924655 +n10060075 +n11999278 +n03933391 +n02602059 +n11993444 +n02337902 +n10149867 +n04441093 +n02868429 +n10629647 +n04192361 +n12029039 +n02768433 +n12078747 +n12730143 +n03255167 +n12492900 +n01709876 +n09672725 +n07870620 +n02315821 +n12277334 +n12204730 +n07852712 +n01319685 +n07802246 +n13031193 +n00812526 +n09658815 +n11982939 +n04264485 +n07893425 +n04094438 +n03285730 +n13182338 +n10724570 +n07832741 +n13210350 +n10654015 +n04058721 +n07875086 +n03462747 +n03994417 +n02889856 +n11957514 +n10109443 +n10478462 +n03064562 +n02477782 +n11920998 +n02138169 +n04227787 +n11797508 +n10753339 +n12928307 +n11921792 +n12643688 +n01833112 +n03919808 +n09817386 +n01903498 +n03848033 +n12031547 +n01035504 +n12324906 +n01911063 +n02588794 +n03749634 +n03539754 +n02242455 +n03079616 +n03246312 +n09705671 +n07860629 +n10458356 +n10051761 +n09709531 +n02867401 +n12522678 +n13150378 +n04462576 +n03462315 +n03712981 +n07607027 +n10581648 +n02957427 +n04271793 +n02253913 +n12824735 +n11697802 +n02161588 +n12463975 +n02361090 +n09784564 +n09680908 +n03512452 +n13214217 +n10712690 +n04023119 +n07814007 +n09833751 +n12885265 +n02259987 +n11933903 +n03628831 +n11967142 +n02533545 +n03900301 +n07919787 +n12793886 +n10768148 +n03071552 +n02780315 +n12193665 +n03378442 +n04486616 +n07832307 +n03164192 +n12786273 +n04261868 +n12655351 +n12320414 +n04371979 +n10630093 +n13052014 +n01357328 +n07879821 +n09753348 +n03796974 +n11701302 +n11678299 +n04022434 +n11610823 +n07726009 +n04117639 +n10474343 +n11888061 +n01842788 +n10435251 +n03343047 +n03383378 +n12750767 +n09662661 +n05241485 +n10000459 +n12220496 +n02246941 +n12676370 +n02253264 +n07766409 +n02940289 +n12089320 +n10363573 +n12922119 +n09783537 +n11695285 +n12331066 +n12573647 +n10218164 +n12509821 +n07862946 +n12818601 +n02589316 +n13191620 +n03758992 +n12112337 +n10733820 +n02898093 +n02645953 +n10150794 +n04595762 +n02344918 +n13132756 +n12859153 +n12138444 +n04211001 +n12935166 +n07830493 +n10142166 +n11951820 +n03018848 +n01453742 +n11985321 +n10000294 +n01362336 +n02328009 +n12639376 +n03090437 +n02204249 +n04312916 +n13127666 +n09684082 +n03432509 +n10274318 +n09704057 +n07593972 +n10074249 +n13157971 +n01638194 +n04036963 +n11708857 +n03418749 +n12589458 +n11899762 +n07683138 +n01601410 +n07854707 +n04279063 +n03239607 +n10302700 +n12520406 +n12576451 +n03881534 +n07565608 +n02349390 +n12569851 +n12249294 +n04059399 +n03530189 +n09357346 +n04325208 +n13159691 +n04045941 +n13898315 +n11992479 +n02353411 +n07825496 +n12922458 +n03115014 +n11761836 +n03323211 +n02793296 +n03492087 +n05241662 +n05491154 +n10419630 +n04506895 +n10546428 +n02907296 +n10769459 +n11647868 +n13188462 +n03825442 +n13209460 +n10742005 +n07599242 +n12361754 +n04570532 +n04131811 +n07756499 +n02598134 +n01910252 +n02910701 +n10129338 +n13871717 +n12673588 +n12565912 +n07562172 +n02711237 +n10775003 +n07695410 +n02637179 +n12930951 +n10261211 +n02906963 +n01366700 +n10642705 +n09846586 +n02779719 +n04978561 +n01369358 +n12114010 +n03521771 +n10667709 +n02296612 +n10722029 +n03500557 +n01365474 +n10472447 +n07585644 +n07609316 +n04013060 +n04505888 +n09726811 +n12692160 +n12378963 +n03585551 +n13139837 +n10167565 +n03799375 +n11990920 +n09640327 +n04502989 +n10108832 +n10561736 +n01897426 +n11766189 +n12462582 +n12913524 +n02684356 +n13200542 +n10466198 +n04331892 +n01478969 +n07837234 +n07692248 +n04552097 +n12382875 +n01484447 +n04120695 +n12681376 +n10293861 +n11965962 +n11788039 +n03959227 +n01832813 +n09918867 +n09942697 +n07587206 +n10459882 +n01347583 +n02267208 +n03951453 +n03006903 +n12126736 +n10286749 +n03395401 +n04605057 +n03467887 +n12755559 +n04020744 +n11629354 +n01647033 +n02780445 +n10205714 +n09439032 +n03138128 +n02763083 +n07835547 +n12251278 +n11949857 +n01635480 +n10675142 +n07845335 +n07751977 +n10332110 +n11871496 +n11764814 +n12229651 +n07760297 +n09865672 +n02919308 +n12218490 +n03782929 +n12231709 +n11909864 +n03144982 +n11799331 +n10433610 +n10483395 +n03206023 +n05442594 +n03626418 +n07870478 +n10171456 +n11964446 +n12796849 +n02126317 +n03797062 +n01412694 +n07610746 +n03581897 +n04479526 +n12447891 +n11906514 +n09699642 +n12873984 +n10586903 +n13234114 +n02436353 +n11889205 +n01460303 +n04400899 +n11884967 +n02140491 +n12215824 +n03586911 +n01394040 +n10691937 +n12371704 +n09668988 +n04362624 +n01740885 +n01337191 +n09714120 +n02185481 +n08555333 +n10704238 +n12430471 +n12034594 +n10012484 +n12088909 +n03205903 +n04129490 +n13090018 +n10712474 +n12234669 +n13016076 +n00454855 +n13882713 +n02644817 +n03192907 +n03519226 +n01561181 +n04583967 +n11732052 +n10732854 +n04480303 +n07934908 +n03825673 +n10621294 +n04354387 +n03374102 +n02922159 +n13158815 +n04000716 +n09685806 +n04427216 +n12051514 +n09712967 +n12081649 +n09748889 +n03252231 +n10704886 +n12897118 +n12525168 +n11728769 +n02731251 +n02548884 +n12403276 +n09627807 +n08679167 +n09663999 +n04247440 +n07711683 +n09909929 +n03415868 +n05244421 +n07680416 +n12757668 +n11935794 +n03483086 +n01860864 +n10755164 +n03675076 +n12004987 +n07566092 +n04078955 +n03379719 +n01916588 +n10138369 +n09755893 +n03649003 +n03977430 +n02309120 +n10616578 +n12242850 +n12388293 +n03292085 +n09919061 +n10302576 +n01497413 +n01936858 +n01377278 +n04358256 +n02667693 +n12125183 +n07758582 +n07813324 +n09737453 +n12745564 +n03855464 +n03166685 +n01446152 +n09801102 +n10561222 +n10576818 +n13915209 +n10474446 +n03845990 +n04237174 +n12531328 +n07855812 +n10763245 +n04614505 +n07905770 +n12051792 +n12653633 +n03593862 +n10359659 +n10436334 +n07853125 +n12911264 +n12265083 +n03638014 +n04444121 +n02706221 +n10563711 +n07808166 +n11799732 +n04093915 +n10451858 +n04410760 +n10075299 +n12740967 +n12635359 +n09611722 +n12902466 +n13915305 +n05542893 +n04440597 +n03675445 +n12315245 +n10646032 +n10047199 +n12775717 +n10365514 +n10590452 +n11616260 +n02812342 +n07856756 +n04570416 +n03565991 +n12215210 +n04330896 +n02388588 +n02266269 +n10760199 +n14714645 +n02742070 +n03565710 +n12609379 +n03420935 +n03441465 +n00453631 +n01963479 +n04362972 +n09863936 +n03961394 +n03009269 +n12297280 +n04561010 +n12192877 +n02981565 +n12134695 +n07855413 +n03232815 +n10180791 +n09932788 +n10571907 +n02109256 +n02660091 +n07865788 +n13228536 +n10306279 +n02635580 +n03634899 +n10262343 +n12296929 +n04393301 +n06281175 +n04485586 +n13103660 +n10510974 +n04166436 +n01634522 +n07596362 +n12700357 +n08597579 +n11744011 +n12238756 +n01790171 +n04571800 +n11867311 +n03464467 +n12241880 +n09961605 +n12592544 +n03170459 +n09938991 +n02692680 +n10295371 +n04331765 +n02612167 +n02520810 +n11977887 +n04094608 +n07722390 +n07832202 +n12448361 +n04612159 +n12186352 +n13161151 +n12654227 +n09868899 +n10104756 +n09920106 +n12981301 +n02610980 +n12545865 +n10673296 +n04110841 +n01704626 +n04055700 +n12117912 +n10519126 +n12443736 +n01697978 +n02148088 +n03012644 +n12091697 +n10395390 +n10509810 +n10462751 +n02896949 +n03836602 +n03928994 +n07718195 +n02473983 +n08571642 +n02648916 +n11970298 +n06274292 +n04613158 +n09856401 +n12811713 +n13111340 +n12122442 +n10095265 +n04445610 +n11631619 +n07863644 +n12022821 +n10315217 +n12549799 +n03386343 +n03121040 +n03558007 +n12272432 +n11798496 +n02522866 +n02952935 +n10741493 +n12143065 +n07883156 +n09616573 +n02289988 +n13161904 +n02588945 +n00451768 +n12375769 +n10777299 +n04495183 +n11930994 +n09970088 +n02254246 +n12276314 +n07857598 +n04428382 +n03789794 +n03383821 +n12980080 +n01447139 +n12880799 +n03501520 +n10764465 +n13143285 +n12727729 +n12444095 +n02354621 +n13174354 +n01691652 +n07732525 +n10437014 +n04368235 +n10371052 +n02611898 +n03597147 +n09912431 +n03135788 +n07888058 +n02409202 +n14582716 +n11934463 +n04395332 +n12558680 +n05257967 +n11798978 +n10617024 +n04102760 +n12132092 +n12988572 +n10390698 +n11887310 +n12063211 +n12952717 +n13141972 +n12176453 +n10245863 +n10509161 +n10389976 +n10333165 +n01474864 +n09274305 +n11888424 +n10368711 +n13222877 +n10469611 +n07582970 +n09700125 +n12805762 +n07865575 +n07853852 +n03628421 +n04482975 +n03099622 +n01349735 +n11943133 +n12736603 +n12197601 +n10597745 +n04418644 +n12689305 +n07755262 +n10598459 +n04312020 +n03195485 +n09776642 +n10596517 +n10223606 +n01923890 +n12703716 +n03465040 +n12372233 +n12528109 +n03571853 +n10802621 +n10204177 +n02320465 +n03976105 +n02214096 +n02148991 +n10377542 +n10697135 +n03538542 +n07582027 +n04517999 +n12180456 +n02838014 +n03977266 +n03818001 +n12191240 +n11648776 +n10773800 +n04475496 +n03945817 +n04682018 +n02994743 +n02787269 +n11650160 +n03834472 +n03389983 +n09797742 +n06209940 +n12525513 +n12672289 +n01893164 +n10710259 +n01892145 +n11773408 +n10554024 +n09864968 +n10699752 +n11631405 +n10414768 +n04430605 +n10742546 +n10738871 +n12857204 +n09309046 +n01724840 +n04123317 +n07881525 +n03868044 +n02140268 +n10708292 +n09838295 +n09797998 +n10710171 +n11814996 +n11938556 +n03543511 +n02151230 +n01515217 +n03533392 +n02039780 +n12810151 +n02335231 +n12152251 +n13225617 +n09801275 +n01978587 +n14821852 +n11742878 +n12679023 +n03521431 +n09679028 +n02021281 +n10784544 +n04421258 +n12492460 +n03720005 +n02541257 +n03889397 +n02888898 +n10659762 +n12045157 +n12712320 +n10369095 +n09721444 +n12769318 +n01703161 +n12697514 +n07836456 +n03905361 +n10660883 +n07769306 +n11893916 +n07846274 +n04110281 +n03655470 +n07740744 +n01363719 +n12540647 +n09896311 +n12842642 +n07755619 +n07754155 +n11548870 +n02868546 +n04215588 +n04288165 +n13201566 +n07721118 +n12018271 +n11903333 +n02909165 +n02662559 +n11658709 +n13063514 +n07725663 +n10179069 +n10776887 +n12637485 +n03814528 +n12542043 +n07833333 +n07820036 +n02746683 +n07925808 +n10349750 +n03154316 +n04155625 +n03232923 +n02116185 +n09998788 +n02821543 +n03410303 +n10656223 +n07916582 +n12880638 +n10408809 +n04612840 +n11805255 +n12044784 +n10497534 +n03458422 +n12873341 +n07808675 +n09476123 +n07611733 +n10598013 +n02214660 +n05469664 +n03952150 +n11855435 +n04375926 +n08523340 +n01642391 +n04007415 +n09756961 +n12891824 +n02894847 +n11698245 +n12906771 +n02894024 +n04131015 +n11882636 +n04386456 +n03291551 +n07837110 +n12462221 +n08540532 +n10299875 +n12705978 +n10448322 +n10487592 +n12175598 +n02272552 +n03833907 +n10383237 +n12758176 +n12729950 +n10061195 +n07816726 +n03241903 +n12239880 +n10380499 +n07855188 +n10207077 +n02770078 +n12961393 +n03778459 +n10734741 +n03485575 +n09958447 +n12337246 +n11830045 +n09866354 +n03209666 +n01470145 +n10395209 +n03872016 +n04267091 +n12888457 +n12104104 +n04088229 +n01964957 +n12002651 +n02503756 +n00481938 +n01908042 +n03378765 +n04193883 +n09862183 +n11861487 +n02520525 +n02081060 +n10386754 +n12693865 +n04514095 +n01325060 +n02460817 +n07568095 +n03651605 +n02561937 +n12844409 +n12888016 +n02974565 +n12439154 +n13018906 +n12071259 +n03897634 +n02863176 +n10603528 +n03493911 +n12887532 +n12944095 +n12794568 +n09980458 +n03503567 +n11783162 +n13123309 +n11729860 +n03702582 +n04280373 +n10086744 +n01790557 +n12627526 +n10552393 +n12092629 +n03888998 +n12751675 +n01442450 +n02479332 +n07726230 +n03642341 +n03142325 +n06263895 +n12088327 +n09703344 +n10528493 +n02820085 +n07737594 +n04090781 +n09901642 +n02328942 +n02724722 +n09866115 +n12658715 +n10481167 +n13135692 +n11850918 +n10205344 +n12361560 +n03698123 +n03284482 +n12106134 +n04441528 +n02591613 +n02581108 +n07856186 +n12197359 +n12900783 +n01725713 +n12012253 +n03907475 +n02170738 +n03694949 +n13238654 +n04611795 +n02782432 +n13191148 +n02741367 +n04170694 +n12770892 +n01973148 +n10080508 +n10161622 +n09808591 +n07912093 +n02059541 +n02779971 +n03857156 +n12945366 +n03055159 +n12758325 +n10067305 +n02597818 +n07808352 +n13147153 +n10679723 +n02271222 +n04012665 +n12942729 +n10349243 +n01377510 +n07800636 +n10654321 +n10219453 +n09961469 +n10732521 +n04479405 +n11632929 +n03856728 +n08658918 +n10327143 +n10754281 +n02085118 +n09691604 +n09952163 +n10082299 +n03872167 +n03733465 +n04138869 +n01425223 +n12066821 +n02177506 +n09892262 +n02896694 +n12983654 +n13224922 +n09658921 +n12744850 +n03639880 +n02943686 +n10660621 +n11936539 +n03698226 +n04519536 +n12392765 +n09319604 +n07567039 +n04160261 +n01802159 +n02838178 +n07746910 +n02266421 +n10240417 +n12542240 +n12550408 +n01445857 +n04132465 +n03569014 +n12666050 +n12362514 +n10676569 +n09702673 +n12885510 +n04447156 +n04396226 +n12240150 +n11639306 +n02249134 +n01340785 +n02833140 +n10027590 +n02142407 +n11996251 +n07874531 +n04340019 +n03166120 +n10420277 +n04465203 +n12738259 +n12831141 +n03998673 +n01385017 +n12842519 +n02587051 +n10753061 +n12505253 +n13906936 +n01989516 +n12640435 +n07852532 +n04243142 +n10261511 +n12853287 +n12239240 +n03973003 +n09983889 +n10345302 +n14804958 +n02354162 +n03049326 +n10443659 +n01318660 +n12787364 +n04253304 +n11941094 +n09283514 +n09393524 +n11865574 +n01531639 +n04409279 +n02859729 +n10712835 +n03694196 +n04343630 +n10331098 +n12929600 +n02826259 +n10171219 +n07735179 +n07594840 +n03709644 +n09950728 +n09859285 +n07718329 +n01418620 +n09858299 +n12395068 +n10011360 +n07763290 +n02643316 +n03596099 +n04422566 +n11958888 +n09650989 +n10318686 +n01333082 +n12886402 +n03781467 +n12667582 +n02923535 +n09988311 +n08663860 +n02508346 +n13885011 +n03939281 +n10772937 +n04485750 +n09871952 +n10291942 +n07759324 +n10174971 +n03666238 +n01937579 +n02308033 +n07847706 +n10371330 +n04124887 +n11853079 +n11941478 +n12647231 +n04601041 +n12718483 +n02902816 +n01941340 +n04066767 +n07617839 +n02254901 +n03488784 +n07834774 +n02524659 +n03367969 +n10783734 +n03422484 +n09776807 +n03970363 +n10131590 +n03433247 +n02622712 +n10206506 +n12061104 +n11936287 +n07874674 +n10061043 +n07828275 +n03764606 +n12236768 +n01826844 +n09741904 +n05454978 +n03591592 +n01441272 +n03736372 +n07585474 +n12762405 +n12943912 +n01894522 +n03218446 +n11846425 +n11689678 +n04147916 +n02375862 +n10409459 +n09287415 +n10113583 +n03261263 +n02817386 +n09869578 +n10550252 +n02532786 +n12031388 +n07937344 +n11612235 +n01571410 +n09402944 +n04234670 +n02603862 +n04196925 +n09999135 +n10468750 +n15093049 +n03003633 +n11650307 +n12312110 +n02525703 +n10501635 +n09751622 +n10114550 +n10103155 +n12829975 +n04004099 +n12419878 +n02082190 +n03328201 +n03093427 +n07845571 +n12655498 +n02558206 +n12563045 +n07573453 +n12324558 +n13016289 +n10601234 +n10310783 +n03531691 +n02135610 +n03168543 +n09985978 +n10615334 +n07839312 +n09985809 +n10142537 +n10417969 +n07869111 +n12514992 +n04327544 +n10326776 +n12583681 +n01476418 +n12840168 +n03852544 +n11713763 +n07824502 +n07858841 +n12256325 +n03036149 +n07883661 +n04500390 +n10170866 +n01835918 +n10760951 +n10720197 +n12330239 +n02135844 +n10210512 +n03217739 +n10802953 +n03136254 +n02161225 +n03961630 +n12927194 +n02251233 +n13891937 +n09945603 +n02695762 +n12181612 +n13234857 +n10175725 +n11346873 +n07934678 +n02318687 +n10251329 +n04112921 +n04001132 +n03042984 +n11704791 +n04246459 +n12193334 +n10718509 +n10371221 +n05278922 +n03265754 +n12186554 +n12481289 +n10521853 +n10748506 +n11729142 +n10143595 +n09422631 +n07562984 +n07850219 +n04193742 +n11997160 +n12002826 +n12820113 +n04132829 +n10272913 +n03358841 +n12610740 +n12384569 +n10725280 +n02746008 +n13148384 +n12635151 +n02337171 +n10350774 +n12308907 +n04542474 +n04339062 +n03549350 +n10240235 +n10556033 +n10214390 +n01791314 +n02801047 +n07817465 +n11610602 +n10315730 +n14592309 +n10249191 +n12453857 +n12579822 +n09833275 +n04051269 +n11552594 +n04088343 +n04565039 +n03930431 +n10679503 +n11899921 +n10295479 +n01357507 +n13036312 +n03404900 +n12523141 +n01816017 +n02020578 +n12661045 +n06262943 +n02775813 +n12921315 +n09751076 +n09834258 +n10585628 +n12885754 +n04411019 +n10342367 +n10368798 +n09672840 +n12729023 +n04578329 +n10325549 +n03680248 +n11920663 +n10416567 +n10011486 +n01643255 +n03193754 +n07823814 +n04055447 +n10660128 +n07765612 +n07612530 +n04205613 +n09677427 +n03989199 +n11100798 +n12721122 +n10000787 +n10382157 +n07724819 +n12928819 +n11631159 +n02608996 +n10516527 +n09703101 +n12290975 +n03470222 +n03810412 +n03729131 +n03356038 +n12692024 +n12614625 +n10789415 +n02333819 +n01722670 +n03885410 +n12038208 +n02294097 +n02608860 +n02500596 +n07909231 +n03254625 +n09681973 +n12221368 +n01893399 +n10025295 +n03194812 +n13181406 +n12249122 +n03447894 +n09795010 +n02187900 +n10139651 +n10631654 +n01792530 +n02569631 +n07853946 +n09907804 +n03263758 +n04214649 +n02450829 +n02431542 +n11998492 +n02651060 +n04101860 +n01806061 +n13901423 +n12903964 +n03968479 +n04268565 +n12601494 +n02083780 +n04570118 +n12247407 +n03337822 +n09878921 +n02369935 +n10022908 +n09667358 +n13160938 +n11937360 +n07741623 +n03705808 +n12241426 +n10478118 +n03805933 +n10343869 +n09391774 +n03482128 +n10357737 +n10334461 +n09675045 +n09662951 +n10174253 +n01815270 +n13873361 +n04432785 +n09778927 +n10671898 +n05571341 +n10033572 +n09864632 +n10618465 +n03437184 +n12786464 +n01723579 +n11798270 +n07742415 +n02143142 +n10548419 +n03695122 +n02518622 +n04605446 +n10218292 +n11832671 +n12646950 +n03382708 +n09844898 +n09674786 +n01472502 +n07616906 +n09763272 +n03982767 +n10005006 +n03059236 +n01816474 +n03725869 +n01979269 +n04226322 +n13236100 +n03920384 +n11852148 +n04373563 +n04324120 +n11686652 +n03036341 +n02142898 +n09783776 +n13147918 +n03465320 +n07855721 +n10336411 +n10438619 +n07750299 +n12237152 +n03559373 +n10077106 +n10169796 +n09828403 +n09959658 +n12464128 +n12934685 +n04221673 +n02617537 +n11689367 +n10180580 +n07813717 +n12529905 +n02340186 +n01400247 +n11749112 +n04404072 +n03135656 +n12098827 +n12481150 +n10023506 +n03500838 +n01564101 +n04009923 +n10023264 +n03908456 +n03206405 +n07590068 +n09958133 +n10755394 +n01423617 +n11511327 +n10536274 +n01965252 +n11549245 +n11935627 +n09635635 +n03752071 +n07585997 +n03147084 +n12666159 +n09748408 +n03796848 +n01501948 +n02345078 +n12430675 +n03103128 +n11710987 +n03393199 +n09233603 +n10465002 +n04298765 +n01351170 +n02720576 +n03966582 +n10643837 +n12420124 +n10793799 +n01652297 +n09281252 +n11983606 +n10222497 +n11832899 +n02391617 +n12434106 +n03987674 +n02140179 +n07896560 +n04325804 +n10647745 +n01924800 +n10156629 +n03545961 +n03906789 +n01890564 +n10699558 +n12332218 +n03247495 +n11839460 +n03527675 +n12586725 +n13208965 +n02714315 +n02750320 +n04615149 +n12679876 +n12863234 +n03304323 +n12139793 +n11922755 +n12321669 +n04979307 +n01921059 +n09657206 +n13042134 +n04045787 +n11700279 +n02337598 +n01415920 +n01400391 +n13207572 +n10785480 +n02515713 +n12018100 +n02634545 +n03292736 +n02881546 +n12655605 +n03105810 +n10545792 +n03894933 +n09796974 +n10320484 +n12308112 +n11549009 +n13047862 +n14941787 +n12379531 +n10540252 +n11696935 +n12184468 +n12851860 +n12908854 +n10586265 +n12369066 +n10426630 +n12523850 +n03916289 +n04538878 +n09908769 +n02828115 +n07560422 +n10266016 +n03569174 +n06423496 +n10495167 +n03617834 +n09327538 +n10195056 +n10508379 +n13031323 +n11659248 +n04242315 +n10742111 +n10700963 +n12032686 +n09877587 +n07825597 +n07568991 +n11736362 +n12169099 +n13103750 +n03263640 +n12248941 +n10665302 +n01920051 +n09704283 +n11533999 +n04503073 +n11645163 +n10639817 +n09920901 +n06340977 +n03251100 +n10378113 +n03226090 +n10131268 +n02877513 +n13191884 +n02787120 +n11709045 +n02740061 +n12323665 +n02831998 +n10342180 +n12716594 +n04498275 +n09905050 +n03745487 +n07642833 +n10294020 +n10211666 +n12205460 +n02981198 +n01642943 +n07679140 +n04390483 +n10432875 +n09214269 +n10792506 +n10243483 +n13099833 +n10221520 +n13177768 +n04091584 +n10672540 +n10200246 +n13889331 +n02345340 +n10237556 +n01833415 +n01335218 +n09804230 +n09957523 +n05235879 +n10070449 +n10308653 +n10721708 +n04312654 +n10394434 +n12201938 +n12434775 +n07601025 +n02672152 +n10157271 +n02635154 +n12572858 +n13182937 +n10160188 +n03396997 +n10344656 +n02968210 +n10190516 +n07684422 +n03706939 +n07618871 +n02290870 +n03817331 +n03275311 +n12698774 +n04375080 +n07837630 +n04314216 +n11833373 +n07618684 +n03742238 +n12532886 +n03712444 +n11750989 +n10038620 +n09617577 +n03807334 +n10108089 +n01816140 +n10715347 +n02648035 +n13127303 +n02809491 +n02430748 +n12235479 +n01451863 +n01514926 +n10010864 +n01913440 +n09660240 +n11806369 +n01470479 +n12655245 +n07655067 +n03436772 +n11778092 +n03951800 +n10277815 +n07931733 +n01479820 +n03576955 +n07609549 +n12568649 +n05263316 +n02636405 +n01384084 +n03298352 +n07617344 +n09987045 +n10573957 +n07801709 +n02589062 +n02534165 +n02748359 +n09607782 +n07590974 +n02199170 +n02696569 +n09678747 +n12795209 +n13176363 +n10663315 +n10588724 +n09772330 +n10174589 +n12366313 +n11883628 +n07617447 +n01334690 +n03168663 +n11764478 +n08599174 +n03942028 +n12153033 +n03448696 +n12096674 +n10037588 +n03548320 +n09760290 +n10374541 +n09653438 +n10294139 +n10276942 +n12279293 +n12764507 +n12803958 +n10764622 +n02140858 +n07599068 +n10245507 +n12351790 +n12818004 +n10118301 +n03945459 +n09912995 +n12176709 +n03873996 +n10339179 +n10614507 +n10114662 +n10784922 +n03821424 +n04959230 +n13015509 +n12573911 +n11948469 +n09775907 +n12758014 +n01780142 +n09956578 +n12165384 +n10088200 +n10382480 +n04131113 +n09930628 +n09784160 +n11750173 +n13064111 +n03817522 +n12662074 +n03176238 +n12310021 +n11679378 +n09961331 +n02385580 +n11904274 +n03113505 +n10244913 +n02836900 +n09986700 +n11963572 +n13158605 +n10321632 +n02179891 +n02189670 +n10097995 +n10774756 +n10783240 +n10605737 +n02530052 +n10386196 +n10184505 +n09788237 +n03589672 +n12509109 +n10658304 +n12966804 +n12559518 +n03189311 +n01451295 +n12179632 +n12301613 +n10496489 +n03402785 +n10244108 +n02385676 +n03552001 +n03092053 +n02313360 +n02547733 +n02109391 +n01327909 +n04574606 +n03060728 +n07840124 +n10567848 +n10062176 +n02703124 +n10804732 +n12699301 +n04515890 +n07919665 +n10457214 +n09663248 +n03165955 +n12988341 +n03987865 +n03031756 +n10277912 +n10172080 +n09325824 +n03198223 +n09605110 +n10113869 +n11603462 +n03352366 +n11930203 +n09769929 +n12979316 +n02579762 +n09953052 +n03105974 +n00476140 +n11598287 +n02830157 +n10512201 +n09746936 +n10668666 +n02919976 +n09993651 +n02149861 +n09705003 +n10389865 +n11655152 +n10010767 +n10070563 +n03688832 +n10590239 +n11936027 +n02939763 +n03163488 +n03171910 +n09955406 +n03266195 +n10217208 +n09338013 +n07594250 +n03215930 +n09725935 +n10592049 +n03732658 +n12498457 +n09966554 +n10668450 +n10361525 +n04060198 +n11936624 +n02602760 +n03942600 +n03708425 +n10020533 +n12067817 +n07590177 +n01891274 +n11837204 +n01419332 +n03860234 +n12616248 +n07834160 +n09867154 +n09788073 +n12222493 +n03388990 +n04245412 +n10182402 +n11675404 +n10450038 +n13045594 +n13158167 +n13082568 +n12052267 +n12707199 +n07810531 +n07914887 +n13127001 +n02573249 +n08619112 +n10471859 +n09919899 +n03635516 +n12067029 +n03352232 +n07765517 +n10519984 +n02742194 +n03062798 +n13124654 +n09958569 +n02370137 +n10121714 +n04019335 +n07732433 +n02559383 +n12585137 +n09729156 +n10744078 +n09954355 +n03078506 +n10062042 +n10688811 +n02668613 +n03142205 +n10347204 +n10518349 +n09898020 +n12563702 +n05468098 +n10116370 +n07838905 +n03127024 +n03545585 +n12801072 +n09940818 +n04480995 +n10466564 +n02606751 +n10032987 +n10771066 +n01587278 +n11852531 +n01455461 +n10397392 +n02349205 +n10180923 +n09778266 +n04366832 +n10051975 +n10538629 +n09865744 +n12554029 +n13118330 +n12952590 +n04187751 +n09924313 +n10062594 +n01980655 +n10028402 +n02567334 +n10590903 +n10265891 +n10739297 +n01457082 +n03437581 +n03713151 +n03475674 +n05464534 +n11863467 +n06592421 +n12491435 +n14914945 +n10279778 +n03388711 +n10483890 +n10612373 +n03332784 +n02332954 +n02952798 +n13041943 +n01607309 +n04356772 +n07711799 +n12670962 +n12229111 +n07878479 +n12401893 +n07772413 +n12138110 +n09781504 +n07902698 +n02750652 +n13042316 +n12400924 +n02304797 +n03066464 +n12852234 +n10155222 +n05541509 +n10711483 +n04210858 +n02835551 +n12859679 +n02935490 +n03540476 +n05279953 +n09807075 +n09617435 +n03566860 +n10549510 +n10025391 +n10754449 +n11927740 +n03554645 +n01837526 +n02656969 +n08648917 +n07860548 +n01452345 +n04021704 +n07783827 +n10080117 +n02187554 +n03214966 +n10036444 +n04291069 +n12407396 +n02170599 +n09896826 +n12417836 +n07845495 +n02749292 +n03061819 +n03682380 +n10756261 +n10369955 +n09692125 +n09978442 +n04277669 +n10539278 +n09703932 +n01879837 +n02746225 +n13159357 +n11763874 +n10540656 +n07933530 +n12987535 +n02371344 +n10654827 +n09723944 +n12775393 +n11856573 +n12626878 +n12716400 +n09903639 +n09784043 +n03906894 +n10775128 +n03124313 +n10396727 +n02841641 +n10211830 +n12283395 +n03490784 +n14175579 +n04027935 +n12396091 +n02609823 +n01414216 +n09880741 +n11976933 +n03073384 +n09270160 +n11768816 +n12073217 +n11597657 +n09994878 +n11756329 +n12579404 +n03161893 +n01451115 +n07736971 +n02949356 +n03878418 +n12653436 +n10626630 +n12777892 +n13061704 +n10498699 +n03609786 +n03199358 +n10776339 +n10762480 +n13179056 +n10113249 +n04029913 +n12640081 +n10493835 +n11683216 +n03524287 +n04585626 +n02969527 +n12976554 +n08569482 +n10204833 +n12442548 +n02577952 +n09357447 +n10202225 +n02198129 +n11882972 +n10404426 +n01600341 +n12016434 +n09867069 +n10576223 +n09893600 +n01702479 +n04274686 +n04406552 +n02848118 +n02258629 +n03260733 +n03685640 +n11751974 +n09967555 +n06274546 +n09649067 +n10681557 +n07606933 +n03110202 +n11982545 +n10803031 +n02679142 +n04086937 +n10514255 +n04506402 +n03884554 +n09970192 +n10117017 +n12642435 +n10186686 +n02097967 +n03956531 +n11834890 +n02677436 +n10040698 +n11796188 +n03348142 +n04168472 +n02294407 +n12483282 +n09429630 +n04423687 +n09819477 +n09755555 +n10157016 +n03344935 +n07762373 +n12871859 +n09853541 +n09875979 +n13050705 +n02251067 +n10637483 +n03823673 +n10357012 +n03424204 +n04431648 +n01475940 +n02339282 +n10248198 +n07683265 +n13150592 +n10359117 +n10096508 +n03473078 +n13052248 +n10743356 +n03710079 +n10634990 +n04507689 +n07921090 +n02352002 +n03924407 +n03609147 +n02837567 +n03406759 +n03909658 +n10286282 +n12135576 +n01912809 +n10801561 +n10717055 +n03473465 +n03761588 +n03144156 +n09474412 +n10253611 +n12549420 +n02499568 +n09910222 +n10431122 +n12699031 +n01697749 +n11786843 +n03888808 +n12089496 +n10066314 +n10302905 +n12696830 +n09965787 +n11969806 +n04066388 +n13080306 +n03913930 +n09968259 +n10490421 +n10714195 +n07570021 +n10343449 +n10401204 +n03472796 +n10779897 +n11787190 +n03503097 +n10439523 +n12123648 +n04279858 +n10511771 +n09755788 +n08253141 +n02616397 +n12248574 +n01645466 +n04334504 +n07729142 +n05451099 +n10503818 +n10354265 +n09707735 +n02633422 +n11999656 +n01324916 +n02088745 +n09354511 +n10705448 +n09756195 +n10136615 +n10427359 +n09702134 +n12600095 +n04122262 +n10791820 +n03330002 +n02713496 +n11710658 +n09664908 +n02550203 +n02349847 +n12835766 +n04098260 +n11536567 +n11686780 +n12875861 +n12758471 +n09806944 +n11810030 +n10400003 +n10098388 +n11663263 +n10559683 +n07833672 +n10753989 +n10643095 +n01988869 +n03112240 +n12911914 +n09979913 +n09785236 +n09790047 +n02676097 +n01653509 +n04601159 +n01938735 +n10748142 +n12978076 +n11990627 +n10437262 +n12972136 +n04077594 +n10148825 +n02269340 +n12886185 +n03608504 +n11677485 +n10612518 +n12267265 +n10649308 +n05458173 +n10650162 +n03213361 +n02747063 +n01611674 +n02322992 +n01554017 +n03512624 +n12773142 +n12747120 +n09902128 +n03162714 +n03924532 +n10299125 +n12378753 +n02778131 +n09976024 +n13093629 +n10778999 +n07721833 +n12232851 +n07876775 +n10097590 +n03194170 +n13029122 +n04573832 +n12859272 +n09639382 +n07688021 +n02878796 +n10751710 +n03633632 +n07762534 +n10779995 +n13914265 +n13093275 +n10729330 +n10433077 +n03663910 +n10499110 +n02272286 +n10371450 +n01967308 +n12633061 +n11659627 +n12982915 +n10344121 +n10268629 +n02697876 +n09879552 +n10167361 +n10719807 +n04042076 +n01632952 +n03243625 +n02125872 +n10105906 +n12194613 +n03149810 +n10721124 +n03947343 +n02020219 +n10122531 +n01315330 +n08647264 +n00452734 +n03607527 +n10010243 +n09863749 +n04473275 +n11782878 +n03585337 +n09655466 +n12989007 +n11711971 +n10716864 +n10475835 +n10704712 +n01894956 +n10568443 +n12881105 +n10387836 +n10403633 +n08645318 +n03500457 +n10377633 +n10108464 +n09933972 +n02618094 +n11798688 +n04155735 +n09780395 +n12822466 +n04302200 +n11899223 +n10633298 +n02760298 +n12142450 +n10803282 +n10769321 +n10514051 +n10597889 +n11837562 +n02261757 +n01458746 +n09830759 +n10003476 +n09817174 +n10738670 +n10118743 +n12096563 +n03054491 +n12155773 +n10439727 +n04170384 +n03223923 +n12632733 +n07845421 +n10062905 +n11831521 +n04267985 +n12796385 +n04154854 +n00444142 +n09778537 +n03115663 +n04385157 +n10109826 +n02337332 +n09996304 +n09880189 +n12871696 +n11823305 +n02516776 +n12377494 +n08511017 +n04421417 +n10765305 +n09675673 +n03488111 +n03076623 +n11829672 +n10292316 +n10758949 +n13031474 +n02829353 +n10090745 +n09186592 +n12736999 +n12715195 +n11684499 +n03168933 +n09890192 +n10596899 +n12527081 +n10496393 +n10497135 +n02137302 +n03266620 +n12958615 +n12664187 +n02633977 +n04262869 +n04215800 +n13133233 +n02392555 +n09858733 +n10186350 +n01715888 +n03142099 +n08573674 +n11687071 +n02690715 +n03146342 +n12331788 +n08079613 +n10609092 +n12943049 +n12234318 +n02312325 +n12618146 +n10135197 +n11705573 +n02794368 +n02850358 +n09464486 +n01993525 +n03187153 +n10097262 +n02976641 +n12198793 +n12941717 +n10219240 +n12434634 +n03827420 +n10437137 +n10342893 +n04174026 +n10265281 +n07757874 +n10765885 +n01470895 +n02349557 +n11716698 +n03765467 +n10227793 +n07824268 +n12994892 +n10486236 +n02974454 +n10718349 +n11726145 +n09909660 +n03378593 +n07805006 +n09875025 +n02645691 +n10223069 +n03722944 +n04389999 +n02544274 +n10239928 +n04456011 +n10382302 +n01552333 +n10082562 +n12952469 +n09883047 +n10442573 +n01891013 +n10690268 +n13111504 +n02287352 +n03567635 +n10331347 +n09762385 +n09933842 +n02369555 +n12291459 +n09919200 +n01492860 +n02067768 +n10713254 +n10550468 +n12846335 +n03835729 +n12467018 +n11676743 +n03629643 +n12987423 +n10655730 +n08678783 +n10349836 +n10087736 +n10246703 +n10338391 +n04585456 +n04158138 +n10500942 +n09850974 +n10791890 +n10020807 +n03315805 +n02752917 +n04033801 +n10492086 +n04427473 +n02940706 +n12110475 +n09832978 +n12515393 +n07800487 +n09848110 +n02659176 +n09967406 +n10536134 +n10760622 +n09736485 +n07830690 +n07835173 +n09814252 +n10311506 +n10341955 +n03869838 +n07760673 +n09970402 +n12526178 +n11687964 +n09968741 +n10719267 +n07851054 +n10116478 +n10599215 +n09951524 +n03855908 +n03997274 +n02986348 +n08599292 +n02474282 +n04155889 +n09983314 +n01987727 +n10280130 +n10404998 +n02294577 +n02998696 +n08586978 +n11652578 +n13867005 +n12663254 +n10524869 +n02287622 +n10220924 +n03279918 +n02626089 +n10291110 +n12820669 +n07861681 +n08643267 +n07720185 +n12555859 +n03225616 +n09769525 +n03295140 +n12489046 +n10615179 +n12150969 +n02888429 +n10753182 +n10267166 +n03675558 +n12693352 +n02378299 +n02788462 +n03622401 +n12236977 +n10730542 +n12758099 +n10502046 +n11937195 +n10366145 +n10307114 +n12984595 +n10128748 +n09362316 +n09789898 +n09654079 +n04260192 +n10114476 +n08623676 +n10331841 +n05265736 +n10269289 +n03090856 +n12764978 +n02825240 +n10358032 +n09825750 +n03062651 +n11196627 +n11825749 +n04148464 +n04439505 +n07572858 +n04561857 +n12904562 +n03643907 +n10723597 +n01492708 +n10071557 +n10140683 +n01739871 +n12984267 +n03072056 +n10772580 +n10462588 +n11936448 +n10494373 +n12845908 +n09793352 +n10717196 +n12577362 +n09779124 +n10663549 +n02286425 +n10380126 +n01890144 +n02751490 +n03361109 +n01781875 +n13128278 +n09994400 +n09883452 +n13881512 +n02833275 +n10362003 +n01376543 +n12366675 +n09984960 +n10173665 +n10673776 +n02057898 +n01934844 +n04057673 +n10018747 +n02916065 +n13024653 +n05539947 +n09648911 +n04150273 +n01393486 +n10411356 +n12232114 +n02436224 +n12757930 +n03095965 +n10555059 +n01577458 +n09666476 +n10598904 +n11656549 +n02591911 +n13092385 +n10506336 +n13103023 +n09658254 +n04095938 +n11936782 +n07824383 +n09781650 +n10240821 +n01780426 +n02850060 +n02863340 +n13914141 +n12138578 +n13034555 +n12291671 +n12133151 +n04515444 +n04591359 +n02589196 +n02689819 +n11740414 +n07610295 +n10246395 +n09921034 +n12447346 +n12641180 +n01419573 +n04242587 +n07760395 +n03399579 +n09866661 +n02549376 +n11861238 +n01588996 +n04319545 +n09789150 +n03288643 +n10312491 +n03353281 +n02345997 +n09711132 +n03043173 +n02558860 +n03703590 +n03188871 +n12589142 +n12113323 +n09987161 +n05242239 +n09686262 +n09780984 +n09668199 +n09716933 +n11675738 +n04459243 +n11833749 +n10646942 +n07760070 +n10286539 +n04469684 +n13030616 +n03939440 +n01725086 +n09967816 +n10500824 +n13026015 +n03983928 +n02936921 +n04115542 +n10245029 +n12105828 +n12452673 +n10498046 +n10737264 +n11766046 +n04079603 +n10072054 +n12569037 +n10153155 +n09867311 +n02806992 +n10258602 +n10164025 +n10520964 +n02258508 +n12199399 +n05266096 +n08496334 +n10351064 +n12441552 +n12878325 +n13102648 +n02980625 +n03462972 +n12395906 +n13022903 +n11895714 +n03324814 +n11318824 +n01728266 +n07883510 +n10731013 +n10181799 +n12142357 +n09671089 +n11531334 +n01718414 +n04573625 +n10390600 +n11553522 +n01314910 +n04227519 +n10514784 +n02944256 +n12103680 +n03081859 +n11655592 +n12569616 +n10700105 +n09755086 +n03865820 +n01456137 +n10442232 +n02900987 +n04491934 +n07849026 +n04519728 +n09986450 +n03305300 +n10186143 +n02879422 +n03018614 +n10747548 +n10562509 +n10068425 +n12593341 +n11937692 +n08679562 +n09613690 +n10646433 +n12251740 +n10994097 +n13048447 +n03848537 +n12153741 +n12614096 +n11654438 +n09985470 +n10562968 +n02923915 +n10740594 +n07802767 +n12514592 +n10335801 +n03878674 +n12586499 +n10255459 +n02413824 +n10312600 +n02616128 +n12644283 +n04238953 +n04526520 +n01898593 +n09737161 +n03372822 +n09781398 +n10339251 +n02502807 +n10198832 +n10679610 +n13136781 +n11974373 +n11680457 +n10083677 +n04037298 +n09945021 +n09987239 +n02708885 +n13107807 +n10130877 +n12507379 +n08651104 +n12116058 +n10135297 +n04269086 +n03858533 +n10477955 +n04394031 +n10442417 +n10074735 +n03618797 +n03460455 +n04374521 +n10756061 +n08517010 +n12923108 +n02362194 +n01704103 +n10062492 +n01394771 +n10473789 +n10330593 +n02748183 +n12562141 +n09745933 +n02505485 +n11922661 +n12018014 +n09866922 +n04067143 +n13161254 +n07813495 +n01374846 +n10213429 +n03253071 +n02546028 +n01642097 +n01475232 +n03212247 +n10155600 +n11689957 +n11738997 +n10525878 +n03301389 +n10589666 +n01908958 +n10289766 +n03900028 +n03437295 +n02987823 +n02739123 +n10505347 +n02546627 +n10381804 +n10132502 +n10336904 +n10189597 +n09786115 +n12875697 +n10761519 +n01470733 +n02875626 +n12111238 +n07862770 +n07856895 +n09996039 +n03368048 +n07913300 +n10062996 +n10555430 +n04302863 +n12758555 +n10740732 +n02385898 +n02385098 +n12162758 +n03887899 +n03976268 +n04234160 +n03641947 +n07857076 +n10578656 +n12135729 +n12675515 +n09032191 +n12969670 +n02600503 +n12518013 +n10227166 +n10121026 +n01801672 +n10661216 +n03244388 +n04147291 +n09664556 +n02539573 +n04480141 +n10601362 +n02613572 +n10537906 +n02613820 +n11656771 +n03841011 +n02845985 +n12534208 +n10241024 +n03645290 +n12743976 +n11922839 +n07709701 +n03066232 +n03467380 +n09266604 +n09663786 +n12775070 +n02427183 +n04083113 +n12896615 +n10501453 +n02345774 +n09965515 +n09704157 +n10666752 +n03846970 +n04167661 +n03991321 +n09556121 +n10686517 +n02586238 +n03594277 +n03591313 +n10391416 +n10756837 +n13163649 +n03971960 +n10245341 +n02577041 +n04481642 +n12373739 +n10214062 +n10091997 +n10275848 +n02090253 +n03514340 +n04593629 +n11795216 +n03126927 +n11871748 +n10272782 +n12056099 +n04484024 +n03101375 +n12255225 +n10724372 +n10531838 +n02354781 +n02389865 +n02853336 +n01477080 +n01779939 +n10776052 +n10724132 +n10284871 +n10554141 +n03898787 +n02366301 +n10721612 +n04421740 +n04256758 +n01445593 +n10103921 +n02729222 +n02530188 +n02387452 +n02601921 +n01711160 +n02474110 +n09869447 +n12789977 +n10158506 +n10396908 +n07839593 +n02662825 +n02473720 +n13034788 +n07752602 +n03762238 +n10262880 +n07770180 +n04030054 +n10151367 +n03525252 +n10252075 +n10747424 +n10191388 +n04130566 +n03951068 +n13239921 +n03733547 +n10358124 +n11549779 +n09203827 +n04043168 +n10359422 +n04286960 +n04237287 +n10130686 +n02338449 +n12912274 +n10586998 +n02812785 +n10364502 +n03955941 +n12324222 +n09743601 +n03766600 +n01427399 +n12968309 +n11776234 +n01501777 +n10051026 +n10397001 +n01516212 +n02596252 +n02225081 +n10479328 +n02109687 +n10181445 +n02248062 +n03802973 +n01639187 +n02142734 +n02342534 +n02410141 +n02743426 +n03950359 +n12253835 +n07805478 +n03706415 +n03578981 +n04560619 +n09761753 +n03524425 +n01962788 +n04350235 +n10686694 +n13139321 +n10195155 +n12335937 +n12758399 +n03805374 +n12895298 +n03800371 +n11972959 +n11530008 +n03178538 +n02217839 +n10591072 +n04033557 +n01880813 +n12292877 +n02430643 +n07599383 +n01954516 +n09894909 +n02474605 +n03576443 +n07595051 +n03367875 +n12945549 +n02360480 +n14583400 +n04208582 +n02405577 +n02550655 +n02513355 +n04381450 +n00444490 +n03567912 +n09937688 +n07932323 +n04029416 +n01913346 +n13237508 +n04437276 +n12938445 +n03042384 +n12543639 +n03194992 +n04094250 +n12045514 +n03825913 +n03504293 +n12758250 +n03547861 +n03649288 +n04572235 +n07569423 +n03534695 +n03253714 +n01501641 +n13906767 +n12578255 +n11749603 +n07742513 +n07609083 +n04214413 +n07595751 +n12013701 +n12592839 +n12949160 +n04093223 +n02983072 +n03510072 +n02966068 +n03867854 +n01747285 +n10691318 +n13091982 +n12574470 +n02255023 +n03449217 +n03153585 +n04006227 +n13140049 +n02965024 +n03805503 +n03911406 +n13120958 +n12203699 +n01456454 +n10397142 +n12920043 +n02412977 +n08674344 +n07801007 +n03037590 +n10361296 +n13133316 +n03483637 +n04435759 +n12983873 +n02627037 +n03783304 +n07725158 +n02921292 +n01788864 +n01705010 +n12616996 +n03903290 +n08662427 +n03667060 +n07856992 +n03252422 +n02449699 +n12137954 +n10024025 +n07891095 +n04337157 +n04368109 +n03015631 +n02363996 +n12824289 +n03206602 +n12799269 +n02333733 +n01793565 +n01721898 +n03178173 +n02844056 +n11688378 +n13889066 +n02637475 +n03750437 +n01403457 +n01717229 +n02677136 +n12512294 +n03736269 +n02838577 +n08661878 +n01993830 +n02777638 +n02900857 +n04023021 +n03843092 +n07770439 +n12928491 +n03697812 +n02639922 +n13139482 +n07771082 +n12487058 +n07774182 +n02122810 +n02856362 +n11686195 +n11687432 +n02853870 +n04239218 +n02665250 +n02938218 +n11746600 +n10183347 +n10681194 +n04164199 +n04407257 +n12549005 +n02331842 +n03862379 +n02863638 +n11962994 +n03091907 +n04177654 +n02252972 +n02403153 +n01376437 +n02848806 +n08579266 +n07616265 +n10331258 +n10765587 +n09433312 +n03412387 +n10178077 +n13123841 +n02532918 +n04144651 +n03296963 +n03450881 +n04348988 +n10425946 +n03257065 +n02354320 +n11689197 +n04084682 +n10140783 +n03637027 +n02346170 +n02559144 +n01705591 +n09400584 +n03840327 +n03918074 +n04053767 +n02406046 +n00288190 +n03160001 +n03366464 +n09249155 +n01324305 +n07556872 +n03381565 +n12705220 +n11874878 +n02632494 +n02502006 +n03146560 +n02179340 +n04312756 +n10162016 +n03800563 +n04140853 +n07933652 +n03075248 +n04421582 +n10652703 +n02218134 +n12233249 +n04578559 +n01781071 +n02615298 +n04436832 +n04054566 +n02608284 +n11674019 +n03505764 +n02662397 +n09422190 +n04382537 +n04355684 +n04383923 +n09888635 +n03783575 +n03228796 +n07772026 +n02381119 +n15060326 +n10586166 +n12647787 +n02458517 +n10281546 +n03498866 +n02485988 +n10121246 +n09391644 +n03103904 +n08676253 +n02203978 +n04092168 +n03213014 +n03138217 +n04135933 +n12612811 +n04478066 +n02157285 +n02543255 +n03863783 +n01502101 +n03930229 +n12439830 +n09425019 +n02618513 +n02910241 +n12261359 +n03648667 +n04365229 +n03461651 +n04388040 +n03295928 +n03581531 +n04203356 +n02622249 +n13142907 +n04497249 +n11678377 +n02366579 +n02931013 +n02837134 +n03132438 +n13092987 +n04196803 +n03056215 +n03255322 +n02130925 +n10291469 +n02971940 +n01718096 +n12510774 +n11766432 +n04271891 +n03366721 +n03154616 +n03694356 +n10478293 +n11763142 +n07763483 +n03037228 +n09201998 +n01517389 +n00443517 +n12693244 +n03580990 +n03519848 +n10238375 +n10783646 +n03564849 +n03975926 +n02473554 +n02450426 +n03464952 +n04411835 +n04573045 +n10505732 +n04337650 +n10621514 +n10334782 +n12434985 +n07769102 +n10594523 +n05475397 +n01875610 +n03299406 +n10507692 +n02593679 +n03317233 +n13239736 +n03550420 +n03247351 +n03819047 +n03633341 +n03154745 +n04073425 +n04532022 +n02910964 +n04301242 +n04378651 +n13098515 +n11775626 +n14603798 +n10263146 +n01886045 +n03761731 +n02224713 +n04591249 +n02144251 +n03849412 +n11548728 +n04051705 +n12298165 +n03150795 +n03989447 +n02826459 +n07602650 +n03155915 +n09891730 +n02067603 +n01523105 +n03618339 +n03897130 +n02711780 +n05285623 +n03533486 +n04085873 +n01923404 +n10139077 +n01709484 +n02183507 +n03216562 +n01971850 +n03136051 +n02948834 +n03589313 +n03665851 +n02937336 +n02035656 +n07769465 +n07849186 +n12585373 +n12280364 +n02846260 +n02511730 +n02614653 +n04193179 +n11718681 +n09467696 +n01522450 +n03040836 +n03162297 +n11896141 +n04000480 +n10350220 +n07746038 +n02124157 +n10655169 +n03476542 +n03895038 +n00443917 +n07757753 +n01726203 +n02987706 +n12750076 +n03012734 +n02941228 +n04194009 +n04501127 +n09794550 +n03510487 +n08589670 +n03166951 +n03673270 +n09792125 +n08492354 +n02396157 +n01628331 +n03993878 +n07833816 +n04958865 +n13650447 +n04339191 +n02826683 +n02893269 +n02810139 +n02626471 +n02589796 +n08677801 +n04325968 +n03275864 +n02622547 +n04406687 +n04097085 +n02998107 +n07831450 +n03658102 +n02575590 +n03523398 +n02412909 +n02953850 +n04337503 +n03510987 +n12664005 +n03710294 +n13138155 +n10110093 +n07831955 +n03932080 +n12971804 +n03943623 +n03726371 +n10531445 +n12984489 +n07835051 +n12097556 +n02685701 +n03038041 +n02451125 +n04594919 +n02372140 +n02665985 +n03496183 +n03961828 +n03802800 +n01713170 +n03602790 +n04974145 +n02780588 +n04031884 +n03588216 +n02614140 +n04578708 +n04501281 +n03166600 +n03992975 +n04206070 +n03227721 +n02582349 +n02664642 +n07805389 +n09226869 +n02459190 +n12216968 +n03628984 +n02524928 +n09209025 +n04078002 +n03167153 +n03562565 +n07599554 +n10252547 +n03279804 +n07692887 +n14909584 +n02529293 +n04444953 +n04156814 +n07616174 +n03415626 +n03331244 +n03868324 +n03644073 +n02818687 +n10085101 +n02953056 +n03202481 +n02118707 +n03591901 +n12602434 +n02943465 +n02818254 +n07922607 +n02597004 +n04212810 +n04056073 +n12327528 +n02207647 +n01792808 +n03002555 +n03951213 +n12242123 +n10062275 +n12325787 +n10048117 +n11937278 +n03624767 +n04039041 +n04059298 +n03707171 +n07758407 +n01333483 +n02219015 +n02436645 +n02478239 +n04457638 +n01781698 +n09474765 +n03686363 +n10769084 +n09456207 +n02385776 +n13555775 +n03962685 +n13129078 +n03463185 +n01429172 +n04243251 +n12177129 +n03143754 +n03958338 +n02791795 +n04560502 +n12776774 +n02745816 +n03009111 +n02976552 +n03008817 +n03211413 +n03537550 +n12200504 +n01909788 +n11790089 +n03480973 +n10507070 +n01707294 +n04374907 +n04281571 +n00006024 +n03823906 +n12603273 +n03503358 +n04027820 +n12645530 +n02535080 +n04143365 +n08385989 +n12661227 +n12814857 +n11871059 +n04268418 +n13128582 +n01928865 +n04359124 +n12670334 +n03610836 +n04543924 +n02252799 +n15102359 +n04437380 +n04316924 +n11872324 +n09330378 +n10122300 +n03784139 +n00443375 +n14993378 +n01721174 +n00004475 +n00006484 +n00007846 +n00015388 +n00017222 +n00021265 +n00021939 +n00288000 +n00433458 +n00433661 +n00433802 +n00439826 +n00440218 +n00440509 +n00440747 +n00441824 +n00442115 +n00442981 +n00443231 +n00444651 +n00445351 +n00445802 +n00447073 +n00447221 +n00447540 +n00448466 +n00448640 +n00448958 +n00449295 +n00449695 +n00450335 +n00450700 +n00451370 +n00451866 +n00452293 +n00453935 +n00454237 +n00454624 +n00463246 +n00464651 +n00464894 +n00467719 +n00467995 +n00468480 +n00469651 +n00471437 +n00471613 +n00479076 +n00480508 +n00480993 +n00482298 +n00523513 +n01035667 +n01316422 +n01316579 +n01316949 +n01317089 +n01317391 +n01317541 +n01319467 +n01320872 +n01321230 +n01321579 +n01321854 +n01322343 +n01322685 +n01322898 +n01323068 +n01326291 +n01329186 +n01338685 +n01339336 +n01340935 +n01342269 +n01358572 +n01367772 +n01375204 +n01376237 +n01380610 +n01384687 +n01385330 +n01387065 +n01389507 +n01390123 +n01392380 +n01395254 +n01397114 +n01402600 +n01407798 +n01421807 +n01438208 +n01439121 +n01439514 +n01439808 +n01441425 +n01444783 +n01445429 +n01446589 +n01446760 +n01448951 +n01450661 +n01454856 +n01455778 +n01458842 +n01459791 +n01461646 +n01466257 +n01467336 +n01468238 +n01468712 +n01471682 +n01473806 +n01474283 +n01477525 +n01478511 +n01480516 +n01480880 +n01481331 +n01482071 +n01482330 +n01483522 +n01484097 +n01488918 +n01491874 +n01492357 +n01493541 +n01494757 +n01494882 +n01495006 +n01495701 +n01497118 +n01498406 +n01498699 +n01498989 +n01500091 +n01501160 +n01503061 +n01514752 +n01515078 +n01517565 +n01524359 +n01525720 +n01527194 +n01527617 +n01528654 +n01529672 +n01533339 +n01534582 +n01534762 +n01537134 +n01538955 +n01539573 +n01540233 +n01541922 +n01542786 +n01544208 +n01546921 +n01547832 +n01548301 +n01549430 +n01550761 +n01553142 +n01555809 +n01557185 +n01560105 +n01560636 +n01563128 +n01563746 +n01564394 +n01567133 +n01568132 +n01569836 +n01570676 +n01571904 +n01572328 +n01573074 +n01574045 +n01574390 +n01575745 +n01576695 +n01577659 +n01578575 +n01579028 +n01580379 +n01580490 +n01580772 +n01580870 +n01581166 +n01581434 +n01581730 +n01582398 +n01582498 +n01582856 +n01584225 +n01585121 +n01587834 +n01588431 +n01589286 +n01591697 +n01592257 +n01592540 +n01594372 +n01595624 +n01597336 +n01598588 +n01598988 +n01600085 +n01600657 +n01602080 +n01602209 +n01602630 +n01603600 +n01604330 +n01605630 +n01608814 +n01609062 +n01609391 +n01609751 +n01610955 +n01611472 +n01612628 +n01613294 +n01613615 +n01615121 +n01616551 +n01616764 +n01617095 +n01617443 +n01617766 +n01618082 +n01618922 +n01619310 +n01619536 +n01619835 +n01620135 +n01620414 +n01620735 +n01621127 +n01622352 +n01623706 +n01627424 +n01629276 +n01630284 +n01631175 +n01632047 +n01637112 +n01637932 +n01639765 +n01640846 +n01645776 +n01649170 +n01650167 +n01651487 +n01653773 +n01661091 +n01661592 +n01661818 +n01662622 +n01662784 +n01663401 +n01664369 +n01665932 +n01667432 +n01668091 +n01669372 +n01670092 +n01672032 +n01674216 +n01674464 +n01674990 +n01676755 +n01680264 +n01680478 +n01681940 +n01684133 +n01685439 +n01686044 +n01686220 +n01686403 +n01686609 +n01686808 +n01687665 +n01688961 +n01689411 +n01691951 +n01692864 +n01693783 +n01694709 +n01696633 +n01697178 +n01698434 +n01699040 +n01701551 +n01702256 +n01703011 +n01703569 +n01705934 +n01708106 +n01708998 +n01712008 +n01712752 +n01717016 +n01719403 +n01722998 +n01724231 +n01726692 +n01727646 +n01730185 +n01730307 +n01730812 +n01730960 +n01731545 +n01732244 +n01733757 +n01734637 +n01734808 +n01735439 +n01735577 +n01735728 +n01737472 +n01737728 +n01737875 +n01738065 +n01738601 +n01739647 +n01740551 +n01741232 +n01741562 +n01741943 +n01743605 +n01745125 +n01745484 +n01746359 +n01747885 +n01749582 +n01749742 +n01751036 +n01752165 +n01753959 +n01754876 +n01755740 +n01767661 +n01769347 +n01770795 +n01771417 +n01772222 +n01775370 +n01776192 +n01776705 +n01777304 +n01777467 +n01777649 +n01777909 +n01778217 +n01778487 +n01778621 +n01778801 +n01779148 +n01779463 +n01779629 +n01780696 +n01782209 +n01785667 +n01789386 +n01789740 +n01791107 +n01791625 +n01792158 +n01792640 +n01794158 +n01795088 +n01795735 +n01795900 +n01796019 +n01796105 +n01796519 +n01796729 +n01798706 +n01798839 +n01798979 +n01799302 +n01800424 +n01801088 +n01801479 +n01802721 +n01803078 +n01804478 +n01804653 +n01804921 +n01805070 +n01805801 +n01806847 +n01807828 +n01808140 +n01808291 +n01808596 +n01809106 +n01810700 +n01811909 +n01812337 +n01813385 +n01814370 +n01814921 +n01815601 +n01816887 +n01819115 +n01820348 +n01820801 +n01821076 +n01821203 +n01822602 +n01823013 +n01824749 +n01825930 +n01826364 +n01827403 +n01829869 +n01831712 +n01832167 +n01834177 +n01834540 +n01835276 +n01838038 +n01838598 +n01839598 +n01841102 +n01843719 +n01844231 +n01844551 +n01844917 +n01845132 +n01845477 +n01846331 +n01848123 +n01848648 +n01849466 +n01850373 +n01851375 +n01852142 +n01852861 +n01853498 +n01854415 +n01856072 +n01856155 +n01856380 +n01856553 +n01856890 +n01857079 +n01857325 +n01857512 +n01857632 +n01857851 +n01858441 +n01859496 +n01860497 +n01861148 +n01861778 +n01871543 +n01871875 +n01874434 +n01874928 +n01876326 +n01877134 +n01878061 +n01878335 +n01878639 +n01878929 +n01879217 +n01879509 +n01880152 +n01881171 +n01883513 +n01883920 +n01886756 +n01887896 +n01888264 +n01889074 +n01889520 +n01890860 +n01891633 +n01892551 +n01894207 +n01905661 +n01906749 +n01907738 +n01909422 +n01911403 +n01911839 +n01912454 +n01914163 +n01914830 +n01915811 +n01916187 +n01916925 +n01918744 +n01922303 +n01925270 +n01925695 +n01926379 +n01926689 +n01927159 +n01927456 +n01927928 +n01928215 +n01930852 +n01931140 +n01931520 +n01931714 +n01932151 +n01932936 +n01933151 +n01933478 +n01933988 +n01934440 +n01935176 +n01936391 +n01937909 +n01940736 +n01941223 +n01942177 +n01942869 +n01943541 +n01944118 +n01944812 +n01944955 +n01945143 +n01945340 +n01945845 +n01946277 +n01948573 +n01951613 +n01953361 +n01955933 +n01956481 +n01958038 +n01959985 +n01960459 +n01963571 +n01964049 +n01964441 +n01965889 +n01967094 +n01968315 +n01969726 +n01971094 +n01971280 +n01974773 +n01975687 +n01976146 +n01976957 +n01978930 +n01981702 +n01982650 +n01983048 +n01985493 +n01985797 +n01986806 +n01987545 +n01988701 +n01989869 +n01990007 +n01991028 +n01991520 +n01992262 +n01992423 +n01992773 +n01996585 +n01998183 +n02000954 +n02002075 +n02005790 +n02006985 +n02007284 +n02008041 +n02008796 +n02010453 +n02011805 +n02011943 +n02012185 +n02013177 +n02014941 +n02015554 +n02016358 +n02016956 +n02018027 +n02019190 +n02019438 +n02019929 +n02021050 +n02021795 +n02022684 +n02023341 +n02025043 +n02026059 +n02028175 +n02030035 +n02030287 +n02030996 +n02031934 +n02033208 +n02033324 +n02033561 +n02034129 +n02034661 +n02036053 +n02037464 +n02039171 +n02040505 +n02041085 +n02041246 +n02043063 +n02044178 +n02044778 +n02045369 +n02046759 +n02047260 +n02047614 +n02048698 +n02049532 +n02050004 +n02051474 +n02052204 +n02052365 +n02053083 +n02054502 +n02055658 +n02055803 +n02057731 +n02058594 +n02058747 +n02059162 +n02060411 +n02060889 +n02062017 +n02062430 +n02062744 +n02063224 +n02064338 +n02066707 +n02068206 +n02068974 +n02069701 +n02070430 +n02073250 +n02075296 +n02075927 +n02076196 +n02076779 +n02077152 +n02077658 +n02078292 +n02078574 +n02078738 +n02079005 +n02079389 +n02081571 +n02083346 +n02083672 +n02084071 +n02084861 +n02085374 +n02086346 +n02086478 +n02087122 +n02087551 +n02088839 +n02089232 +n02089555 +n02090475 +n02090827 +n02092468 +n02093056 +n02094562 +n02094721 +n02095050 +n02095412 +n02095727 +n02096756 +n02097786 +n02098550 +n02099029 +n02099997 +n02100399 +n02101108 +n02101861 +n02102605 +n02103406 +n02103841 +n02104523 +n02104882 +n02106966 +n02107420 +n02108254 +n02108672 +n02109150 +n02109811 +n02110532 +n02111626 +n02112497 +n02112826 +n02113335 +n02114100 +n02115012 +n02115335 +n02117512 +n02117646 +n02117900 +n02118176 +n02118333 +n02119247 +n02119359 +n02120278 +n02120997 +n02121620 +n02121808 +n02122725 +n02123785 +n02124623 +n02127292 +n02127381 +n02127482 +n02127586 +n02127678 +n02127808 +n02128598 +n02128669 +n02129463 +n02129530 +n02129837 +n02129923 +n02130545 +n02131653 +n02132466 +n02132580 +n02132788 +n02133400 +n02134971 +n02135220 +n02137722 +n02137888 +n02138647 +n02138777 +n02139199 +n02139671 +n02141306 +n02141713 +n02144593 +n02145424 +n02148835 +n02149420 +n02150482 +n02152740 +n02152991 +n02153203 +n02153809 +n02156732 +n02159955 +n02164464 +n02165877 +n02166229 +n02166567 +n02166826 +n02167505 +n02167820 +n02167944 +n02168245 +n02169023 +n02169218 +n02169705 +n02169974 +n02170400 +n02170993 +n02171453 +n02171869 +n02172518 +n02172678 +n02172761 +n02172870 +n02174355 +n02176261 +n02178411 +n02178717 +n02179012 +n02180233 +n02181235 +n02181724 +n02182045 +n02182355 +n02182642 +n02182930 +n02183857 +n02186153 +n02188699 +n02189363 +n02190790 +n02191273 +n02191773 +n02191979 +n02192252 +n02192513 +n02192814 +n02193009 +n02193163 +n02194249 +n02194750 +n02195091 +n02195526 +n02195819 +n02196344 +n02198532 +n02199502 +n02200198 +n02202287 +n02204722 +n02206270 +n02207179 +n02207449 +n02208280 +n02208498 +n02208848 +n02208979 +n02209111 +n02209354 +n02209624 +n02209964 +n02210427 +n02210921 +n02211444 +n02211627 +n02211896 +n02212062 +n02212602 +n02212958 +n02213107 +n02215161 +n02215770 +n02217563 +n02218713 +n02220055 +n02220225 +n02220518 +n02220804 +n02221083 +n02221414 +n02221571 +n02221715 +n02221820 +n02222035 +n02222321 +n02222582 +n02223266 +n02223520 +n02226183 +n02226821 +n02226970 +n02227247 +n02227604 +n02227966 +n02228341 +n02228697 +n02229156 +n02229765 +n02230023 +n02230187 +n02230480 +n02230634 +n02231052 +n02231803 +n02233943 +n02234355 +n02234570 +n02234848 +n02235205 +n02236241 +n02236896 +n02237424 +n02237581 +n02238235 +n02238887 +n02239528 +n02241569 +n02241799 +n02243562 +n02244173 +n02246011 +n02246628 +n02247511 +n02248368 +n02248510 +n02248887 +n02249515 +n02250822 +n02251775 +n02252226 +n02253127 +n02253715 +n02254697 +n02257003 +n02257284 +n02257715 +n02259377 +n02260421 +n02260863 +n02261419 +n02262178 +n02263378 +n02264885 +n02265330 +n02268148 +n02269196 +n02269522 +n02270011 +n02270200 +n02271570 +n02271897 +n02272871 +n02274024 +n02274259 +n02274822 +n02278210 +n02278839 +n02279637 +n02280458 +n02281015 +n02281136 +n02281267 +n02282257 +n02282385 +n02282553 +n02282903 +n02283077 +n02283201 +n02283951 +n02285548 +n02287004 +n02287799 +n02288789 +n02291220 +n02291572 +n02291748 +n02293352 +n02293868 +n02295064 +n02295390 +n02295870 +n02298541 +n02300173 +n02301452 +n02302459 +n02302620 +n02305407 +n02306433 +n02307325 +n02308139 +n02308471 +n02309337 +n02310000 +n02310717 +n02311060 +n02312006 +n02312427 +n02313008 +n02316707 +n02318167 +n02319308 +n02319555 +n02319829 +n02320127 +n02322047 +n02323449 +n02323902 +n02324045 +n02325722 +n02325884 +n02326074 +n02326763 +n02326862 +n02327028 +n02327175 +n02327435 +n02327656 +n02327842 +n02328429 +n02329401 +n02330245 +n02331046 +n02331309 +n02332755 +n02333546 +n02334460 +n02335127 +n02336011 +n02336641 +n02338901 +n02339376 +n02339922 +n02343058 +n02343320 +n02343772 +n02344528 +n02345600 +n02346998 +n02347274 +n02347573 +n02347744 +n02348173 +n02348788 +n02350105 +n02350989 +n02351870 +n02352591 +n02353861 +n02355227 +n02355477 +n02358091 +n02359324 +n02360781 +n02361587 +n02361706 +n02361850 +n02363245 +n02363351 +n02364520 +n02369680 +n02370806 +n02372584 +n02373336 +n02374149 +n02374451 +n02376542 +n02376791 +n02376918 +n02377181 +n02377480 +n02377703 +n02378415 +n02380335 +n02380583 +n02380745 +n02381460 +n02382437 +n02382948 +n02384858 +n02386014 +n02386310 +n02386496 +n02388276 +n02389346 +n02389559 +n02390454 +n02390834 +n02391234 +n02391373 +n02391508 +n02391994 +n02393580 +n02394477 +n02395003 +n02395694 +n02395931 +n02397529 +n02399000 +n02401031 +n02402010 +n02402175 +n02402425 +n02403325 +n02403454 +n02404186 +n02404573 +n02406174 +n02407959 +n02408660 +n02408817 +n02409870 +n02410702 +n02410900 +n02411705 +n02412440 +n02413131 +n02414578 +n02415435 +n02416519 +n02417070 +n02417534 +n02418064 +n02419796 +n02423218 +n02423362 +n02423589 +n02424305 +n02424695 +n02426813 +n02427724 +n02428349 +n02430045 +n02430559 +n02431122 +n02432511 +n02433546 +n02433925 +n02435853 +n02437136 +n02437971 +n02438173 +n02438272 +n02439033 +n02441326 +n02442172 +n02442336 +n02442446 +n02442572 +n02442668 +n02443015 +n02443346 +n02443808 +n02443959 +n02444251 +n02445004 +n02445171 +n02446206 +n02446352 +n02446645 +n02447021 +n02447762 +n02448060 +n02448633 +n02448885 +n02450034 +n02453108 +n02453611 +n02454794 +n02455135 +n02455428 +n02455720 +n02456008 +n02456275 +n02456962 +n02460009 +n02469914 +n02470325 +n02470899 +n02471300 +n02471762 +n02472293 +n02473307 +n02474777 +n02476219 +n02480153 +n02481103 +n02481235 +n02481366 +n02481500 +n02482060 +n02482286 +n02482474 +n02482650 +n02483092 +n02484322 +n02484473 +n02485225 +n02485371 +n02485536 +n02485688 +n02486657 +n02486908 +n02487079 +n02487547 +n02487675 +n02487847 +n02488003 +n02488415 +n02488894 +n02489589 +n02490597 +n02490811 +n02491107 +n02491329 +n02491474 +n02496913 +n02501583 +n02502514 +n02503127 +n02503517 +n02504770 +n02507649 +n02508021 +n02512053 +n02512938 +n02513560 +n02515214 +n02516188 +n02517442 +n02517938 +n02519148 +n02519686 +n02521646 +n02522399 +n02524524 +n02526425 +n02526818 +n02527057 +n02527271 +n02527622 +n02528163 +n02529772 +n02530421 +n02532028 +n02532602 +n02533209 +n02533834 +n02534559 +n02534734 +n02535537 +n02537085 +n02537319 +n02538406 +n02538985 +n02540412 +n02541687 +n02546331 +n02548689 +n02549989 +n02550460 +n02552171 +n02554730 +n02556846 +n02557591 +n02557749 +n02559862 +n02561108 +n02561661 +n02562315 +n02562796 +n02563182 +n02564720 +n02565573 +n02566109 +n02568959 +n02569484 +n02570838 +n02572196 +n02574910 +n02576223 +n02576575 +n02578233 +n02579557 +n02580336 +n02581957 +n02583567 +n02585872 +n02586543 +n02588286 +n02590495 +n02590702 +n02590987 +n02594250 +n02596381 +n02597367 +n02599052 +n02599958 +n02600298 +n02601344 +n02602405 +n02603317 +n02604157 +n02605316 +n02606384 +n02607201 +n02607862 +n02613181 +n02614482 +n02614978 +n02619165 +n02621908 +n02623445 +n02624167 +n02625612 +n02626762 +n02627835 +n02630281 +n02630739 +n02631041 +n02636170 +n02636854 +n02638596 +n02640626 +n02640857 +n02642107 +n02642644 +n02643112 +n02644113 +n02646667 +n02648625 +n02650050 +n02650541 +n02652668 +n02653145 +n02653786 +n02654425 +n02655523 +n02656670 +n02657368 +n02658079 +n02661017 +n02662239 +n02663849 +n02667379 +n02667576 +n02668393 +n02670382 +n02671780 +n02672371 +n02676261 +n02676670 +n02677028 +n02677718 +n02678384 +n02680110 +n02680754 +n02682407 +n02682922 +n02683791 +n02686121 +n02686568 +n02687992 +n02688443 +n02689274 +n02691156 +n02692513 +n02693413 +n02693540 +n02694426 +n02694966 +n02695627 +n02697576 +n02698244 +n02700258 +n02700895 +n02702989 +n02703275 +n02705944 +n02708224 +n02708555 +n02709367 +n02709637 +n02710600 +n02712643 +n02713218 +n02715229 +n02715513 +n02715712 +n02716626 +n02726305 +n02726681 +n02727016 +n02727825 +n02728440 +n02729837 +n02729965 +n02730265 +n02732072 +n02732827 +n02733213 +n02733524 +n02735361 +n02735688 +n02736798 +n02737660 +n02738031 +n02738271 +n02738535 +n02739550 +n02739668 +n02740533 +n02740764 +n02741475 +n02742322 +n02742753 +n02745492 +n02746365 +n02749790 +n02750169 +n02751067 +n02751295 +n02752496 +n02753044 +n02753394 +n02754103 +n02755352 +n02755529 +n02756098 +n02756977 +n02757462 +n02757810 +n02758134 +n02758960 +n02759700 +n02759963 +n02760099 +n02760199 +n02760429 +n02760855 +n02761392 +n02763198 +n02763714 +n02764044 +n02764614 +n02764779 +n02765028 +n02766320 +n02766534 +n02766792 +n02767433 +n02769075 +n02770830 +n02772554 +n02772700 +n02773037 +n02773838 +n02774152 +n02774630 +n02775483 +n02776205 +n02777100 +n02777734 +n02777927 +n02778456 +n02778669 +n02781121 +n02781338 +n02781517 +n02783035 +n02783324 +n02784998 +n02785648 +n02786198 +n02786463 +n02788689 +n02789487 +n02790823 +n02792552 +n02792948 +n02793842 +n02794008 +n02794779 +n02794972 +n02795783 +n02796207 +n02796623 +n02796995 +n02797692 +n02797881 +n02799897 +n02801184 +n02801525 +n02801938 +n02802721 +n02803349 +n02803666 +n02804252 +n02806088 +n02806379 +n02806875 +n02810471 +n02811468 +n02811719 +n02812201 +n02813252 +n02813399 +n02815478 +n02815950 +n02816494 +n02817031 +n02817650 +n02817799 +n02818832 +n02819697 +n02820210 +n02821627 +n02821943 +n02822220 +n02822399 +n02822865 +n02823335 +n02824448 +n02826589 +n02826886 +n02827606 +n02828299 +n02828884 +n02831335 +n02831724 +n02831894 +n02833793 +n02834778 +n02835412 +n02836268 +n02839351 +n02839910 +n02840619 +n02841063 +n02841506 +n02842133 +n02843029 +n02843777 +n02844214 +n02844307 +n02844714 +n02847631 +n02848216 +n02848523 +n02849154 +n02850950 +n02851099 +n02853016 +n02854532 +n02854926 +n02855089 +n02855390 +n02855793 +n02857365 +n02857477 +n02857644 +n02858304 +n02860415 +n02861886 +n02862048 +n02862916 +n02863750 +n02865665 +n02865931 +n02866578 +n02867715 +n02869737 +n02871631 +n02871824 +n02871963 +n02872752 +n02873839 +n02874086 +n02875436 +n02876326 +n02876457 +n02876657 +n02877962 +n02879517 +n02880189 +n02880546 +n02880940 +n02881193 +n02881906 +n02882483 +n02882647 +n02883004 +n02883344 +n02884225 +n02885108 +n02885338 +n02886599 +n02887209 +n02887970 +n02888569 +n02889425 +n02891188 +n02891788 +n02892499 +n02893418 +n02896294 +n02896442 +n02897389 +n02897820 +n02898173 +n02898369 +n02898585 +n02898711 +n02900705 +n02901481 +n02901901 +n02902079 +n02902916 +n02903006 +n02904109 +n02904640 +n02908217 +n02909285 +n02911485 +n02912065 +n02913152 +n02914991 +n02916179 +n02916350 +n02917377 +n02917607 +n02919414 +n02920503 +n02921884 +n02923129 +n02924116 +n02925519 +n02928413 +n02928608 +n02929289 +n02929462 +n02929923 +n02931417 +n02931836 +n02932019 +n02932400 +n02933112 +n02933462 +n02933750 +n02933990 +n02934168 +n02935658 +n02935891 +n02936176 +n02936281 +n02936714 +n02938886 +n02939866 +n02941095 +n02942699 +n02943241 +n02943871 +n02944826 +n02945161 +n02946270 +n02946348 +n02946921 +n02947212 +n02947818 +n02948557 +n02949202 +n02950186 +n02950256 +n02950632 +n02950943 +n02951843 +n02952485 +n02952674 +n02953673 +n02954163 +n02954340 +n02954938 +n02955065 +n02955247 +n02955540 +n02955767 +n02957135 +n02957755 +n02958343 +n02959942 +n02961451 +n02961947 +n02963302 +n02963692 +n02963821 +n02965216 +n02965300 +n02965529 +n02966545 +n02966786 +n02966942 +n02967081 +n02967991 +n02968473 +n02969010 +n02969163 +n02969634 +n02969886 +n02970685 +n02970849 +n02971691 +n02972397 +n02973017 +n02974697 +n02975212 +n02976939 +n02978205 +n02978753 +n02979516 +n02982599 +n02983189 +n02983904 +n02984061 +n02984203 +n02984469 +n02984699 +n02985137 +n02985828 +n02986066 +n02987047 +n02987492 +n02989099 +n02991048 +n02991302 +n02992032 +n02993546 +n02995998 +n02997391 +n02997607 +n03001282 +n03001627 +n03002210 +n03003091 +n03004620 +n03005515 +n03007130 +n03007591 +n03010656 +n03010795 +n03011018 +n03011355 +n03012159 +n03013006 +n03014440 +n03015254 +n03017070 +n03018209 +n03020034 +n03020416 +n03020692 +n03024333 +n03025070 +n03025886 +n03027108 +n03027250 +n03029066 +n03031422 +n03032811 +n03033362 +n03033986 +n03034516 +n03034663 +n03035510 +n03036469 +n03036866 +n03037108 +n03037709 +n03038685 +n03039015 +n03039947 +n03040229 +n03040376 +n03043274 +n03043958 +n03045337 +n03046257 +n03048883 +n03049066 +n03049457 +n03050026 +n03050546 +n03050655 +n03050864 +n03051396 +n03051540 +n03052464 +n03052917 +n03053047 +n03054901 +n03055670 +n03056097 +n03056493 +n03057021 +n03057636 +n03058107 +n03058603 +n03058949 +n03059366 +n03061050 +n03063073 +n03063338 +n03064350 +n03064758 +n03065708 +n03066849 +n03070193 +n03071021 +n03071160 +n03072201 +n03073296 +n03073977 +n03074380 +n03074855 +n03075097 +n03075500 +n03075634 +n03076411 +n03076708 +n03078287 +n03078670 +n03079230 +n03079741 +n03080497 +n03080731 +n03081986 +n03082127 +n03082807 +n03082979 +n03084420 +n03085333 +n03085602 +n03085915 +n03086183 +n03086457 +n03086670 +n03087366 +n03087643 +n03087816 +n03088707 +n03091044 +n03091374 +n03092166 +n03092314 +n03093792 +n03094503 +n03096439 +n03096960 +n03098140 +n03098806 +n03099454 +n03099771 +n03099945 +n03100346 +n03100490 +n03101156 +n03101986 +n03102654 +n03102859 +n03106722 +n03106898 +n03107046 +n03109881 +n03111690 +n03112869 +n03113152 +n03113657 +n03113835 +n03114839 +n03115180 +n03116530 +n03116767 +n03117199 +n03118346 +n03118969 +n03119510 +n03120198 +n03120491 +n03121897 +n03122748 +n03123809 +n03125870 +n03128085 +n03128427 +n03128519 +n03129001 +n03130066 +n03130563 +n03131669 +n03132261 +n03134853 +n03135917 +n03136369 +n03137579 +n03139464 +n03140900 +n03141065 +n03141327 +n03143572 +n03145384 +n03145843 +n03146846 +n03147509 +n03148324 +n03148727 +n03149401 +n03151077 +n03153948 +n03154073 +n03154446 +n03155178 +n03156071 +n03156405 +n03157348 +n03158796 +n03158885 +n03161450 +n03162818 +n03163798 +n03163973 +n03164605 +n03164722 +n03164929 +n03165823 +n03167978 +n03168107 +n03168217 +n03170635 +n03171356 +n03172965 +n03173387 +n03175604 +n03176386 +n03177165 +n03177708 +n03178000 +n03178430 +n03180504 +n03180969 +n03181293 +n03182140 +n03182232 +n03182912 +n03183080 +n03186818 +n03187751 +n03189818 +n03193597 +n03196062 +n03196324 +n03196598 +n03199647 +n03199901 +n03200357 +n03200539 +n03200701 +n03200906 +n03201035 +n03201638 +n03201996 +n03202354 +n03202760 +n03203089 +n03203806 +n03204306 +n03204558 +n03204955 +n03205143 +n03205304 +n03206718 +n03206908 +n03207305 +n03208556 +n03210683 +n03211117 +n03211616 +n03212811 +n03214253 +n03214450 +n03215191 +n03219135 +n03220237 +n03221059 +n03221720 +n03222516 +n03223162 +n03223441 +n03224753 +n03224893 +n03225777 +n03226538 +n03228016 +n03228533 +n03228692 +n03229115 +n03229526 +n03231160 +n03231819 +n03235796 +n03235979 +n03236580 +n03236735 +n03237212 +n03237639 +n03239259 +n03239726 +n03240140 +n03241093 +n03241335 +n03241496 +n03242120 +n03242506 +n03242995 +n03243218 +n03245271 +n03245421 +n03246933 +n03250952 +n03251533 +n03251766 +n03252324 +n03252637 +n03254374 +n03255488 +n03255899 +n03256788 +n03256928 +n03257586 +n03258905 +n03259505 +n03261776 +n03262519 +n03262809 +n03262932 +n03265032 +n03266749 +n03267821 +n03269203 +n03269401 +n03270695 +n03271765 +n03271865 +n03272239 +n03272383 +n03273061 +n03273913 +n03274561 +n03274796 +n03276179 +n03277459 +n03277771 +n03278248 +n03279153 +n03279364 +n03279508 +n03280394 +n03280644 +n03281145 +n03282060 +n03282401 +n03284743 +n03284981 +n03285912 +n03286572 +n03287733 +n03288003 +n03289985 +n03291413 +n03292960 +n03294048 +n03294833 +n03296478 +n03297103 +n03297644 +n03297735 +n03298089 +n03302790 +n03303217 +n03303831 +n03304197 +n03304465 +n03305522 +n03307573 +n03308152 +n03309808 +n03314378 +n03314884 +n03315644 +n03316406 +n03318136 +n03319457 +n03320046 +n03322570 +n03322940 +n03323703 +n03324928 +n03325088 +n03326073 +n03327234 +n03327691 +n03327841 +n03329663 +n03330792 +n03334017 +n03334492 +n03334912 +n03335030 +n03335846 +n03336839 +n03337494 +n03338287 +n03338821 +n03339296 +n03339643 +n03340009 +n03340923 +n03342961 +n03343354 +n03343560 +n03343853 +n03346135 +n03346455 +n03349296 +n03350352 +n03350456 +n03350602 +n03351262 +n03351979 +n03352628 +n03354903 +n03355468 +n03356446 +n03357267 +n03357716 +n03359137 +n03359566 +n03360731 +n03361683 +n03362771 +n03363363 +n03364008 +n03364937 +n03365592 +n03365991 +n03366823 +n03373237 +n03374649 +n03374838 +n03375171 +n03376279 +n03378342 +n03379343 +n03379828 +n03379989 +n03380647 +n03380867 +n03381126 +n03381231 +n03381776 +n03382856 +n03382969 +n03383468 +n03384167 +n03384891 +n03385557 +n03386011 +n03387323 +n03387653 +n03390327 +n03391770 +n03393324 +n03394480 +n03394649 +n03396580 +n03396654 +n03397266 +n03397532 +n03398228 +n03399761 +n03399971 +n03402188 +n03402369 +n03404012 +n03404360 +n03404449 +n03405265 +n03405725 +n03407369 +n03409393 +n03409591 +n03410147 +n03411339 +n03412058 +n03412220 +n03412511 +n03412906 +n03413264 +n03413428 +n03413828 +n03414162 +n03415252 +n03416489 +n03416775 +n03417345 +n03418158 +n03418242 +n03419014 +n03422072 +n03422589 +n03423719 +n03424630 +n03427296 +n03428090 +n03428349 +n03429003 +n03429288 +n03429914 +n03430091 +n03430313 +n03430551 +n03430959 +n03431243 +n03431745 +n03433637 +n03433877 +n03434285 +n03434830 +n03435593 +n03437941 +n03438257 +n03439814 +n03441112 +n03442288 +n03442756 +n03446070 +n03446832 +n03448031 +n03448956 +n03449564 +n03449858 +n03450516 +n03452267 +n03452449 +n03453320 +n03454110 +n03454211 +n03454707 +n03455355 +n03456548 +n03456665 +n03457008 +n03457686 +n03458271 +n03459914 +n03461882 +n03465500 +n03465818 +n03466162 +n03466839 +n03467517 +n03467796 +n03467984 +n03468696 +n03469493 +n03470387 +n03470629 +n03470948 +n03472232 +n03472535 +n03472937 +n03473817 +n03473966 +n03475823 +n03476083 +n03476313 +n03477773 +n03477902 +n03478756 +n03478907 +n03481521 +n03482523 +n03483230 +n03483531 +n03484083 +n03484931 +n03487331 +n03487444 +n03487774 +n03488188 +n03488603 +n03489162 +n03490324 +n03490449 +n03490884 +n03491988 +n03496296 +n03496612 +n03497100 +n03497657 +n03498536 +n03499468 +n03500295 +n03501152 +n03501288 +n03501614 +n03502331 +n03502509 +n03502777 +n03503718 +n03503997 +n03505383 +n03505504 +n03506370 +n03507963 +n03508101 +n03509394 +n03509843 +n03510583 +n03510866 +n03511175 +n03512147 +n03512911 +n03513137 +n03513376 +n03515338 +n03517899 +n03517982 +n03518631 +n03519674 +n03521076 +n03521544 +n03522634 +n03524574 +n03524976 +n03525074 +n03525454 +n03525827 +n03528263 +n03529444 +n03531281 +n03531447 +n03531546 +n03532342 +n03534776 +n03535024 +n03536761 +n03537412 +n03538037 +n03538300 +n03538634 +n03538957 +n03540267 +n03540595 +n03541091 +n03541696 +n03541923 +n03542333 +n03542860 +n03543603 +n03544360 +n03545150 +n03546340 +n03547054 +n03547530 +n03548930 +n03550153 +n03550289 +n03551084 +n03551790 +n03552449 +n03552749 +n03553486 +n03554460 +n03555426 +n03555662 +n03557590 +n03558176 +n03558404 +n03558739 +n03561169 +n03563200 +n03563710 +n03563967 +n03565288 +n03565565 +n03566329 +n03568117 +n03568818 +n03571942 +n03572205 +n03574555 +n03574816 +n03575958 +n03576215 +n03577672 +n03577818 +n03578055 +n03578251 +n03578656 +n03579538 +n03579982 +n03583621 +n03584400 +n03585073 +n03588951 +n03589513 +n03589791 +n03590306 +n03590932 +n03592245 +n03592773 +n03593526 +n03595409 +n03595860 +n03596285 +n03597317 +n03598151 +n03598299 +n03598646 +n03600977 +n03601638 +n03601840 +n03602081 +n03603722 +n03604629 +n03604843 +n03605722 +n03605915 +n03606465 +n03609235 +n03609542 +n03610418 +n03610992 +n03612814 +n03613294 +n03613592 +n03614007 +n03614532 +n03615563 +n03617095 +n03617594 +n03618546 +n03618982 +n03619396 +n03619650 +n03619793 +n03619890 +n03620052 +n03621049 +n03621694 +n03622931 +n03623556 +n03624134 +n03625355 +n03626115 +n03631177 +n03631811 +n03632852 +n03633886 +n03635032 +n03635668 +n03635932 +n03636248 +n03636649 +n03638883 +n03639675 +n03640988 +n03642444 +n03646296 +n03646916 +n03647520 +n03651388 +n03653220 +n03653454 +n03654576 +n03655072 +n03656484 +n03657239 +n03658858 +n03659292 +n03660124 +n03661340 +n03662719 +n03662887 +n03663531 +n03664675 +n03664943 +n03665366 +n03666362 +n03666917 +n03667235 +n03667829 +n03671914 +n03672827 +n03673450 +n03673767 +n03676759 +n03677766 +n03679384 +n03679712 +n03681477 +n03682487 +n03684823 +n03685307 +n03685820 +n03686130 +n03686470 +n03687928 +n03688943 +n03689157 +n03689570 +n03690851 +n03691817 +n03692379 +n03693293 +n03697552 +n03698604 +n03699280 +n03699975 +n03700963 +n03701191 +n03701391 +n03701640 +n03701790 +n03702248 +n03704834 +n03705379 +n03706653 +n03707597 +n03708036 +n03709206 +n03709363 +n03709545 +n03710528 +n03711711 +n03711999 +n03712887 +n03713069 +n03714235 +n03715386 +n03715669 +n03715892 +n03716966 +n03717131 +n03718212 +n03718335 +n03718699 +n03718789 +n03719053 +n03721590 +n03722007 +n03722288 +n03724176 +n03725035 +n03725717 +n03726516 +n03726760 +n03726993 +n03727837 +n03727946 +n03728437 +n03728982 +n03729647 +n03729951 +n03730153 +n03730788 +n03731695 +n03733644 +n03733925 +n03735637 +n03736970 +n03738241 +n03738472 +n03739518 +n03739693 +n03743902 +n03744276 +n03744684 +n03744840 +n03745571 +n03746330 +n03748162 +n03749504 +n03749807 +n03750206 +n03751065 +n03752185 +n03752922 +n03753077 +n03753514 +n03758894 +n03759432 +n03760671 +n03762982 +n03763727 +n03764276 +n03765561 +n03765934 +n03766322 +n03768132 +n03769722 +n03770954 +n03772077 +n03772674 +n03773035 +n03775199 +n03775847 +n03779000 +n03779370 +n03780047 +n03781787 +n03782190 +n03785499 +n03787523 +n03789171 +n03789400 +n03789946 +n03790230 +n03790512 +n03790755 +n03791235 +n03792048 +n03792526 +n03793850 +n03795976 +n03796181 +n03797390 +n03798982 +n03799113 +n03800485 +n03800772 +n03800933 +n03802007 +n03802228 +n03802393 +n03803116 +n03809312 +n03811295 +n03811444 +n03811847 +n03811965 +n03812382 +n03812924 +n03813176 +n03813946 +n03815278 +n03815482 +n03815615 +n03816005 +n03816136 +n03816849 +n03817647 +n03819595 +n03819994 +n03820154 +n03820318 +n03820728 +n03820950 +n03824197 +n03825080 +n03827536 +n03828020 +n03829340 +n03831757 +n03834040 +n03834604 +n03836062 +n03837422 +n03838748 +n03839172 +n03839276 +n03839795 +n03841666 +n03842156 +n03844045 +n03844233 +n03845190 +n03846234 +n03846772 +n03847471 +n03847823 +n03848168 +n03848348 +n03849275 +n03850613 +n03851341 +n03851787 +n03852280 +n03852688 +n03854815 +n03859280 +n03859495 +n03859958 +n03861430 +n03861842 +n03862676 +n03863923 +n03864139 +n03864356 +n03864692 +n03865371 +n03865949 +n03868406 +n03871083 +n03871524 +n03871724 +n03873848 +n03874138 +n03874823 +n03875218 +n03880129 +n03880323 +n03880531 +n03883054 +n03883773 +n03883944 +n03884639 +n03885535 +n03885669 +n03886053 +n03886641 +n03887185 +n03888022 +n03889503 +n03889726 +n03891051 +n03892557 +n03894051 +n03894379 +n03896103 +n03896233 +n03896419 +n03896628 +n03896984 +n03897943 +n03898271 +n03898633 +n03899612 +n03899933 +n03901338 +n03903133 +n03903424 +n03904060 +n03904183 +n03904433 +n03905540 +n03906997 +n03907654 +n03908204 +n03909160 +n03909406 +n03915118 +n03915437 +n03916470 +n03916720 +n03917327 +n03918480 +n03920737 +n03923564 +n03923692 +n03924069 +n03926148 +n03926412 +n03926876 +n03927792 +n03928116 +n03929091 +n03929202 +n03929443 +n03930515 +n03932670 +n03936269 +n03938522 +n03939677 +n03940256 +n03941684 +n03943920 +n03945615 +n03947111 +n03947466 +n03948459 +n03951971 +n03953020 +n03953416 +n03955809 +n03956785 +n03956922 +n03957315 +n03957762 +n03958630 +n03958752 +n03959014 +n03959701 +n03961939 +n03962525 +n03962932 +n03963028 +n03965907 +n03966325 +n03966751 +n03966976 +n03967942 +n03968293 +n03971321 +n03972524 +n03973520 +n03973628 +n03975035 +n03979377 +n03979492 +n03980026 +n03981340 +n03982232 +n03982895 +n03984234 +n03984381 +n03985232 +n03986704 +n03988170 +n03989665 +n03990474 +n03991443 +n03992325 +n03992703 +n03993180 +n03993403 +n03994008 +n03994757 +n03995018 +n03995856 +n03996145 +n03996416 +n03997484 +n03999992 +n04000311 +n04001397 +n04001499 +n04001845 +n04004210 +n04004475 +n04005912 +n04007664 +n04010057 +n04010779 +n04010927 +n04011827 +n04012084 +n04013729 +n04014297 +n04015204 +n04016576 +n04016684 +n04018399 +n04018667 +n04019101 +n04019696 +n04020087 +n04020298 +n04020912 +n04021028 +n04021362 +n04021798 +n04022332 +n04022708 +n04023249 +n04024274 +n04026053 +n04026918 +n04027023 +n04027706 +n04028315 +n04029734 +n04030274 +n04036303 +n04037964 +n04038440 +n04038727 +n04039848 +n04042358 +n04042632 +n04042795 +n04042985 +n04043733 +n04044307 +n04044498 +n04045085 +n04045397 +n04046590 +n04046974 +n04047401 +n04049405 +n04050066 +n04051549 +n04051825 +n04052757 +n04056932 +n04057047 +n04057435 +n04057846 +n04057981 +n04058096 +n04058239 +n04059947 +n04060647 +n04060904 +n04061793 +n04061969 +n04062644 +n04063373 +n04063868 +n04064401 +n04065464 +n04065789 +n04067231 +n04067353 +n04067921 +n04068441 +n04068601 +n04069276 +n04069777 +n04070207 +n04070964 +n04071102 +n04071263 +n04071393 +n04072193 +n04072551 +n04073948 +n04075468 +n04075916 +n04076284 +n04077430 +n04077734 +n04078574 +n04079106 +n04079244 +n04079933 +n04080454 +n04080833 +n04081844 +n04083649 +n04086794 +n04087126 +n04087709 +n04088696 +n04088797 +n04089666 +n04089976 +n04090548 +n04091097 +n04093625 +n04095210 +n04096066 +n04097622 +n04097866 +n04099175 +n04099429 +n04100174 +n04101497 +n04101701 +n04102037 +n04102285 +n04102406 +n04102962 +n04104147 +n04104500 +n04105068 +n04105438 +n04105893 +n04107984 +n04108268 +n04110068 +n04110654 +n04110955 +n04111190 +n04111414 +n04111668 +n04113765 +n04114996 +n04115256 +n04115996 +n04116389 +n04118021 +n04121228 +n04122349 +n04122492 +n04122825 +n04123123 +n04123567 +n04123740 +n04125116 +n04125853 +n04126541 +n04126659 +n04126980 +n04127904 +n04128499 +n04128837 +n04131929 +n04134632 +n04136510 +n04137444 +n04137897 +n04138977 +n04139859 +n04140064 +n04140631 +n04141838 +n04143897 +n04146050 +n04147495 +n04148054 +n04149083 +n04151108 +n04151581 +n04151940 +n04152387 +n04154753 +n04156297 +n04156411 +n04157320 +n04158807 +n04158956 +n04160372 +n04160586 +n04161358 +n04161981 +n04164757 +n04164868 +n04166111 +n04167489 +n04169437 +n04170037 +n04171459 +n04171629 +n04171831 +n04174101 +n04174500 +n04176068 +n04176190 +n04176528 +n04177329 +n04177545 +n04180063 +n04180888 +n04181228 +n04181718 +n04182322 +n04183217 +n04183329 +n04184435 +n04184600 +n04185071 +n04186051 +n04186268 +n04186455 +n04186624 +n04186848 +n04187061 +n04187547 +n04187885 +n04189092 +n04190052 +n04190464 +n04190747 +n04190997 +n04191150 +n04191595 +n04191943 +n04192238 +n04192858 +n04194289 +n04196080 +n04197391 +n04198015 +n04198797 +n04199027 +n04201733 +n04202417 +n04205318 +n04206356 +n04207763 +n04210390 +n04211356 +n04211970 +n04215910 +n04216634 +n04216860 +n04216963 +n04217718 +n04217882 +n04219424 +n04221823 +n04222210 +n04222470 +n04222847 +n04225031 +n04225222 +n04225729 +n04226464 +n04226537 +n04227900 +n04229007 +n04229107 +n04229480 +n04230603 +n04230808 +n04231693 +n04232153 +n04233832 +n04234455 +n04235291 +n04235771 +n04236001 +n04236377 +n04236702 +n04238617 +n04241042 +n04241394 +n04242408 +n04243003 +n04243941 +n04244997 +n04245847 +n04246855 +n04247630 +n04247736 +n04248507 +n04249415 +n04250224 +n04250599 +n04253931 +n04255499 +n04256520 +n04260589 +n04261116 +n04262678 +n04263336 +n04263760 +n04264233 +n04264914 +n04266486 +n04267577 +n04269944 +n04270891 +n04271148 +n04272054 +n04272782 +n04273064 +n04273796 +n04275283 +n04275661 +n04275904 +n04278353 +n04279172 +n04279987 +n04280259 +n04280970 +n04283585 +n04283905 +n04284002 +n04285146 +n04285622 +n04285803 +n04286128 +n04288272 +n04288533 +n04288673 +n04289449 +n04291242 +n04291759 +n04292414 +n04292572 +n04293119 +n04293744 +n04294212 +n04294426 +n04295081 +n04295881 +n04299215 +n04300358 +n04301000 +n04301474 +n04303258 +n04304375 +n04305471 +n04306080 +n04306847 +n04307419 +n04307878 +n04308084 +n04308273 +n04308397 +n04308583 +n04308807 +n04309348 +n04309833 +n04310721 +n04311595 +n04312154 +n04312432 +n04313220 +n04314914 +n04315828 +n04315948 +n04317420 +n04318131 +n04318982 +n04319937 +n04320405 +n04322026 +n04322692 +n04322801 +n04323819 +n04326799 +n04326896 +n04328054 +n04328329 +n04328946 +n04329477 +n04330340 +n04330669 +n04330998 +n04331277 +n04332987 +n04333129 +n04338517 +n04339638 +n04340750 +n04340935 +n04341133 +n04341414 +n04341686 +n04346679 +n04347519 +n04348184 +n04348359 +n04349401 +n04350104 +n04350458 +n04354589 +n04356595 +n04358707 +n04358874 +n04359335 +n04359589 +n04360501 +n04360798 +n04361095 +n04361260 +n04362821 +n04363210 +n04363874 +n04364545 +n04364827 +n04364994 +n04365328 +n04365484 +n04365751 +n04368695 +n04370048 +n04371563 +n04373894 +n04375775 +n04377057 +n04378956 +n04379243 +n04379964 +n04380346 +n04381994 +n04382334 +n04382880 +n04383130 +n04383301 +n04386664 +n04387201 +n04387400 +n04388162 +n04388743 +n04389521 +n04390873 +n04391838 +n04392526 +n04393095 +n04394261 +n04395875 +n04397168 +n04397261 +n04397645 +n04398497 +n04398688 +n04398834 +n04399046 +n04400289 +n04401088 +n04402057 +n04402580 +n04402746 +n04402984 +n04403638 +n04404817 +n04404997 +n04405540 +n04405762 +n04407435 +n04407686 +n04409128 +n04409806 +n04410086 +n04410365 +n04410485 +n04411264 +n04411966 +n04413151 +n04413419 +n04415663 +n04416901 +n04417180 +n04417361 +n04417809 +n04419073 +n04421872 +n04422875 +n04427715 +n04428008 +n04431436 +n04431745 +n04434932 +n04435180 +n04436185 +n04436401 +n04436542 +n04437670 +n04437953 +n04438304 +n04438643 +n04440963 +n04441662 +n04444749 +n04445040 +n04445952 +n04446276 +n04447276 +n04447443 +n04448070 +n04448361 +n04450243 +n04450640 +n04450749 +n04451818 +n04452615 +n04452848 +n04453156 +n04453666 +n04453910 +n04454654 +n04455250 +n04455652 +n04456472 +n04457326 +n04458843 +n04459362 +n04459610 +n04460130 +n04462011 +n04463679 +n04464852 +n04467099 +n04467307 +n04468005 +n04469251 +n04470741 +n04471315 +n04471632 +n04472243 +n04472726 +n04473884 +n04474466 +n04475411 +n04475631 +n04477548 +n04478512 +n04478657 +n04480527 +n04481524 +n04487724 +n04488427 +n04489008 +n04489817 +n04490091 +n04491769 +n04493109 +n04494204 +n04495450 +n04497442 +n04497570 +n04498523 +n04499446 +n04499554 +n04500060 +n04501837 +n04502197 +n04502502 +n04502670 +n04502851 +n04504141 +n04504770 +n04505036 +n04506994 +n04507453 +n04508163 +n04508489 +n04508949 +n04509260 +n04509592 +n04511002 +n04514241 +n04516116 +n04516214 +n04516672 +n04518132 +n04519153 +n04520170 +n04520382 +n04521987 +n04524313 +n04527648 +n04529681 +n04530566 +n04531098 +n04531873 +n04533042 +n04533199 +n04533700 +n04534127 +n04534895 +n04536153 +n04538552 +n04539203 +n04540761 +n04541320 +n04543158 +n04544450 +n04546194 +n04546855 +n04547592 +n04549122 +n04549919 +n04551055 +n04552696 +n04553389 +n04554871 +n04555600 +n04555897 +n04556948 +n04557308 +n04557751 +n04558059 +n04558804 +n04559023 +n04559730 +n04562262 +n04563204 +n04565375 +n04566257 +n04567098 +n04568069 +n04568557 +n04569520 +n04569822 +n04570958 +n04571292 +n04571566 +n04571958 +n04572935 +n04574471 +n04574999 +n04576002 +n04576211 +n04576971 +n04577426 +n04577769 +n04578801 +n04579230 +n04580493 +n04581595 +n04582349 +n04583620 +n04585745 +n04585980 +n04586932 +n04587648 +n04588739 +n04589190 +n04589434 +n04591056 +n04591887 +n04592005 +n04592099 +n04594218 +n04594489 +n04595285 +n04595855 +n04596852 +n04597066 +n04597804 +n04598136 +n04598582 +n04599124 +n04600312 +n04600486 +n04600912 +n04603729 +n04603872 +n04605726 +n04606574 +n04608329 +n04608567 +n04609531 +n04609651 +n04610176 +n04610503 +n04610676 +n04611916 +n04613015 +n04615226 +n04615644 +n04950713 +n04951373 +n04958634 +n04959672 +n04960277 +n04961691 +n04963740 +n04965179 +n04965661 +n04967191 +n04968895 +n04970059 +n04970631 +n04970916 +n04972801 +n04973386 +n04976952 +n05238282 +n05241218 +n05242070 +n05244934 +n05266879 +n05399034 +n05447757 +n05449959 +n05453657 +n05467758 +n05586759 +n06254669 +n06262567 +n06263369 +n06263609 +n06263762 +n06266417 +n06266710 +n06267145 +n06271778 +n06272290 +n06272803 +n06274092 +n06275353 +n06276697 +n06277280 +n06281040 +n06359467 +n06359657 +n06418693 +n06591815 +n06592078 +n06595351 +n06613686 +n06793231 +n07556637 +n07556970 +n07557165 +n07557434 +n07560652 +n07561112 +n07562495 +n07563800 +n07564629 +n07564971 +n07565725 +n07565945 +n07566340 +n07566863 +n07567390 +n07567707 +n07568818 +n07569106 +n07569543 +n07570720 +n07572353 +n07572957 +n07573103 +n07573696 +n07574602 +n07575076 +n07575726 +n07575984 +n07576182 +n07576438 +n07576577 +n07577374 +n07579575 +n07580053 +n07580359 +n07580470 +n07581346 +n07581775 +n07582277 +n07582441 +n07582609 +n07583197 +n07584228 +n07584593 +n07585208 +n07587441 +n07587700 +n07588947 +n07590320 +n07591473 +n07592094 +n07592656 +n07593774 +n07595914 +n07596046 +n07596452 +n07596684 +n07597145 +n07597365 +n07598734 +n07599468 +n07599783 +n07599998 +n07600506 +n07601407 +n07605474 +n07605944 +n07606278 +n07606764 +n07607707 +n07609407 +n07609840 +n07611148 +n07611358 +n07611839 +n07611991 +n07612367 +n07612632 +n07612996 +n07613671 +n07614198 +n07614825 +n07615052 +n07615190 +n07615460 +n07615569 +n07615671 +n07616590 +n07617188 +n07619004 +n07623136 +n07624466 +n07627931 +n07628068 +n07641928 +n07642471 +n07642933 +n07643306 +n07643474 +n07643764 +n07643981 +n07644244 +n07663899 +n07678729 +n07679356 +n07680517 +n07680932 +n07681926 +n07682316 +n07682624 +n07683786 +n07684600 +n07685730 +n07686873 +n07687211 +n07687469 +n07687789 +n07689003 +n07690273 +n07690892 +n07692405 +n07692614 +n07693889 +n07693972 +n07694403 +n07695878 +n07695965 +n07697100 +n07704054 +n07705931 +n07707451 +n07708512 +n07708798 +n07709333 +n07710007 +n07710283 +n07710616 +n07710952 +n07712063 +n07712382 +n07712748 +n07712856 +n07713395 +n07713895 +n07714078 +n07714802 +n07714895 +n07715561 +n07715721 +n07716034 +n07717070 +n07717858 +n07718671 +n07719437 +n07719839 +n07720442 +n07720615 +n07721325 +n07721456 +n07721678 +n07722217 +n07722763 +n07723330 +n07723559 +n07723753 +n07724943 +n07725376 +n07725531 +n07726796 +n07727578 +n07727868 +n07728804 +n07729000 +n07729485 +n07730406 +n07730855 +n07731122 +n07731587 +n07731952 +n07732302 +n07732747 +n07734017 +n07734292 +n07735052 +n07735803 +n07737081 +n07739125 +n07739506 +n07740220 +n07740954 +n07741461 +n07742012 +n07742704 +n07744246 +n07747055 +n07747811 +n07747951 +n07748753 +n07748912 +n07749095 +n07749192 +n07749312 +n07749731 +n07750586 +n07751451 +n07752377 +n07752664 +n07753743 +n07755089 +n07755411 +n07755707 +n07756096 +n07757132 +n07757312 +n07757602 +n07757990 +n07758680 +n07758950 +n07759424 +n07759691 +n07759816 +n07760501 +n07761141 +n07761309 +n07761611 +n07761777 +n07761954 +n07767344 +n07767847 +n07770571 +n07771212 +n07800091 +n07800740 +n07801508 +n07802152 +n07802417 +n07803093 +n07803545 +n07804323 +n07805254 +n07805594 +n07805731 +n07806221 +n07806633 +n07807317 +n07807710 +n07807922 +n07809096 +n07809368 +n07810907 +n07811416 +n07812184 +n07814203 +n07815588 +n07818277 +n07819480 +n07820497 +n07820814 +n07823951 +n07824702 +n07824988 +n07825717 +n07828987 +n07829412 +n07830593 +n07832902 +n07834507 +n07836731 +n07837002 +n07837362 +n07838233 +n07841495 +n07841639 +n07841907 +n07842753 +n07842972 +n07843464 +n07843775 +n07844042 +n07844604 +n07846143 +n07847198 +n07848338 +n07848771 +n07849336 +n07850083 +n07850329 +n07851298 +n07852045 +n07852919 +n07854813 +n07856270 +n07857959 +n07858595 +n07859284 +n07859583 +n07860805 +n07861158 +n07861813 +n07863374 +n07864638 +n07865105 +n07867421 +n07867883 +n07869391 +n07869775 +n07870313 +n07871436 +n07873464 +n07874063 +n07874159 +n07874259 +n07874343 +n07874441 +n07874780 +n07875693 +n07875835 +n07876281 +n07880751 +n07881117 +n07881205 +n07881404 +n07881800 +n07882497 +n07882886 +n07883031 +n07883251 +n07883384 +n07884567 +n07886572 +n07886849 +n07887634 +n07888465 +n07888909 +n07889510 +n07890352 +n07890750 +n07891726 +n07892813 +n07893528 +n07893891 +n07894102 +n07894298 +n07894965 +n07895237 +n07895435 +n07895595 +n07895710 +n07895839 +n07896287 +n07897200 +n07897865 +n07898117 +n07898333 +n07898745 +n07899108 +n07900406 +n07900616 +n07901587 +n07903208 +n07904395 +n07905038 +n07906284 +n07906877 +n07907161 +n07907548 +n07907943 +n07909129 +n07909811 +n07911371 +n07911677 +n07912211 +n07913393 +n07914413 +n07915618 +n07916041 +n07917618 +n07918028 +n07920222 +n07921455 +n07921948 +n07923748 +n07924033 +n07924560 +n07924834 +n07925966 +n07926920 +n07927197 +n07927931 +n07929519 +n07930554 +n07931001 +n07931096 +n07932614 +n07932841 +n07933274 +n07933891 +n07934032 +n07934530 +n07935152 +n07935504 +n07936263 +n07936745 +n07938149 +n07951464 +n08554440 +n08558963 +n08596076 +n08598301 +n08616050 +n08640531 +n08659446 +n09191635 +n09206896 +n09206985 +n09210862 +n09213434 +n09213565 +n09214060 +n09214916 +n09215437 +n09217230 +n09230041 +n09233446 +n09238926 +n09255070 +n09259025 +n09259219 +n09262690 +n09265620 +n09269882 +n09270735 +n09287968 +n09289331 +n09289596 +n09290444 +n09295946 +n09300306 +n09302616 +n09303008 +n09303528 +n09304750 +n09305031 +n09308572 +n09309292 +n09315159 +n09326662 +n09335693 +n09335809 +n09336555 +n09337253 +n09344198 +n09352849 +n09359803 +n09362945 +n09366017 +n09366317 +n09375606 +n09376526 +n09381242 +n09393605 +n09396465 +n09398677 +n09405787 +n09406793 +n09409512 +n09409752 +n09410224 +n09416076 +n09421799 +n09428628 +n09432990 +n09433442 +n09437454 +n09439213 +n09443641 +n09453008 +n09458269 +n09472413 +n09474010 +n09505153 +n09606009 +n09606527 +n09608709 +n09610405 +n09613191 +n09615336 +n09616922 +n09619168 +n09619452 +n09620078 +n09620794 +n09622049 +n09622302 +n09624168 +n09624559 +n09625401 +n09626238 +n09627906 +n09629246 +n09629752 +n09631129 +n09632274 +n09632518 +n09633969 +n09636339 +n09638875 +n09639919 +n09641002 +n09644152 +n09648743 +n09651123 +n09665545 +n09669631 +n09670280 +n09676884 +n09679925 +n09690208 +n09694771 +n09696585 +n09697401 +n09700964 +n09701148 +n09701833 +n09705124 +n09708750 +n09710164 +n09716047 +n09718217 +n09722658 +n09724785 +n09725229 +n09725772 +n09726621 +n09727440 +n09727826 +n09730204 +n09731436 +n09731571 +n09735258 +n09738400 +n09744679 +n09754217 +n09758173 +n09758885 +n09761068 +n09763784 +n09764201 +n09764598 +n09765278 +n09767197 +n09769076 +n09770179 +n09771435 +n09772746 +n09773962 +n09774783 +n09790482 +n09792555 +n09795124 +n09795334 +n09800964 +n09802445 +n09802641 +n09805151 +n09805475 +n09809538 +n09809749 +n09810166 +n09811712 +n09814660 +n09815790 +n09816771 +n09818022 +n09820263 +n09821831 +n09823502 +n09824135 +n09824609 +n09826204 +n09830194 +n09831962 +n09834699 +n09836160 +n09840217 +n09841188 +n09841515 +n09841696 +n09842047 +n09848489 +n09851575 +n09853645 +n09853881 +n09854915 +n09857007 +n09861946 +n09865398 +n09868270 +n09871681 +n09877951 +n09889691 +n09892693 +n09894654 +n09895222 +n09895701 +n09902353 +n09903153 +n09910374 +n09917593 +n09918248 +n09923418 +n09923673 +n09924996 +n09927089 +n09927451 +n09928136 +n09928451 +n09929298 +n09930257 +n09930876 +n09931640 +n09933098 +n09935434 +n09936892 +n09937056 +n09941964 +n09942970 +n09943239 +n09943811 +n09944160 +n09945319 +n09950457 +n09951070 +n09951274 +n09960688 +n09962966 +n09964411 +n09968845 +n09974648 +n09976728 +n09979321 +n09983572 +n09989502 +n09990415 +n09991867 +n09992538 +n09992837 +n09993252 +n09994673 +n09996481 +n09997622 +n10001217 +n10006748 +n10007684 +n10009484 +n10009671 +n10015215 +n10015897 +n10017422 +n10018861 +n10020890 +n10024362 +n10029068 +n10034201 +n10034614 +n10035952 +n10036266 +n10036929 +n10037385 +n10040945 +n10041887 +n10042690 +n10043643 +n10044879 +n10047459 +n10048367 +n10048836 +n10052694 +n10053808 +n10054657 +n10055730 +n10055847 +n10060175 +n10067968 +n10070711 +n10077593 +n10078131 +n10078806 +n10079399 +n10079893 +n10080869 +n10083823 +n10084043 +n10084295 +n10086383 +n10091651 +n10092488 +n10093475 +n10094584 +n10095869 +n10098710 +n10098862 +n10099375 +n10101634 +n10102800 +n10105085 +n10107303 +n10109662 +n10111903 +n10112129 +n10118844 +n10126177 +n10126424 +n10126708 +n10127689 +n10129825 +n10134396 +n10134982 +n10136959 +n10142747 +n10142946 +n10143172 +n10143725 +n10145340 +n10145774 +n10148305 +n10150071 +n10150940 +n10151570 +n10153594 +n10154186 +n10154601 +n10155849 +n10162194 +n10164233 +n10165448 +n10168183 +n10168584 +n10171567 +n10182190 +n10185793 +n10186774 +n10187130 +n10195593 +n10200781 +n10202624 +n10205457 +n10206173 +n10207169 +n10210137 +n10215623 +n10216106 +n10224578 +n10225219 +n10228278 +n10235385 +n10237069 +n10241300 +n10243664 +n10245639 +n10249270 +n10249459 +n10249950 +n10257221 +n10259348 +n10263411 +n10266328 +n10266848 +n10271677 +n10273064 +n10274815 +n10276045 +n10282672 +n10284064 +n10284965 +n10296444 +n10299250 +n10299700 +n10305635 +n10305802 +n10306004 +n10308732 +n10312287 +n10314054 +n10315561 +n10316360 +n10317007 +n10317500 +n10320863 +n10321340 +n10322238 +n10323999 +n10324560 +n10328437 +n10332385 +n10335246 +n10335931 +n10340312 +n10341573 +n10343554 +n10345100 +n10353016 +n10353355 +n10355142 +n10355449 +n10355688 +n10356450 +n10357613 +n10360747 +n10366966 +n10369528 +n10370381 +n10376523 +n10377021 +n10379376 +n10380672 +n10383816 +n10386984 +n10387196 +n10387324 +n10393909 +n10396106 +n10399130 +n10400998 +n10402824 +n10403876 +n10405694 +n10407954 +n10409752 +n10411551 +n10415037 +n10417551 +n10418101 +n10419047 +n10420031 +n10421016 +n10426454 +n10427764 +n10428004 +n10433737 +n10435716 +n10435988 +n10438172 +n10439851 +n10444194 +n10450303 +n10462860 +n10464052 +n10466918 +n10467179 +n10470779 +n10474064 +n10474645 +n10478960 +n10481268 +n10482054 +n10482921 +n10484858 +n10488309 +n10495421 +n10499355 +n10499857 +n10506544 +n10508710 +n10512372 +n10512708 +n10519494 +n10521100 +n10521662 +n10522035 +n10522324 +n10522759 +n10523341 +n10525134 +n10525436 +n10525617 +n10527334 +n10529231 +n10541833 +n10542888 +n10543161 +n10544232 +n10544748 +n10546633 +n10548537 +n10548681 +n10554846 +n10556518 +n10557854 +n10559288 +n10560637 +n10568200 +n10570019 +n10575787 +n10576962 +n10577284 +n10580535 +n10582746 +n10583387 +n10594147 +n10595164 +n10595647 +n10599806 +n10602985 +n10604634 +n10605253 +n10610465 +n10612210 +n10614629 +n10617193 +n10618685 +n10618848 +n10619642 +n10620758 +n10622053 +n10624074 +n10624310 +n10625860 +n10628644 +n10630188 +n10632576 +n10633450 +n10648237 +n10648696 +n10654932 +n10657835 +n10661002 +n10661563 +n10665698 +n10669991 +n10674130 +n10676018 +n10679174 +n10682953 +n10686073 +n10692883 +n10693824 +n10694258 +n10698368 +n10700201 +n10700640 +n10701180 +n10703336 +n10703692 +n10705615 +n10707233 +n10708454 +n10709529 +n10713686 +n10720453 +n10721321 +n10722575 +n10722965 +n10726786 +n10735298 +n10740868 +n10741152 +n10742997 +n10744164 +n10747119 +n10751265 +n10752480 +n10759151 +n10759982 +n10763383 +n10763620 +n10765679 +n10766260 +n10768903 +n10779610 +n10780632 +n10782791 +n10782940 +n10787470 +n10791221 +n10792335 +n10793570 +n10794014 +n11531193 +n11537327 +n11542640 +n11545524 +n11545714 +n11547562 +n11547855 +n11552386 +n11553240 +n11596108 +n11598686 +n11600372 +n11601177 +n11601918 +n11608250 +n11609475 +n11609684 +n11612923 +n11614250 +n11618861 +n11620673 +n11621029 +n11623105 +n11624531 +n11627168 +n11628456 +n11630017 +n11630489 +n11643835 +n11645914 +n11647306 +n11649878 +n11650558 +n11650759 +n11661372 +n11665372 +n11666854 +n11669921 +n11672400 +n11674332 +n11676500 +n11684264 +n11689483 +n11693981 +n11697560 +n11700864 +n11703669 +n11708658 +n11709674 +n11713164 +n11720353 +n11722982 +n11723770 +n11725015 +n11725623 +n11727091 +n11729478 +n11733054 +n11736694 +n11741350 +n11745817 +n11747468 +n11748002 +n11751765 +n11752578 +n11756092 +n11756669 +n11759224 +n11763625 +n11767354 +n11769621 +n11771539 +n11774513 +n11775340 +n11779300 +n11782036 +n11783920 +n11785668 +n11789438 +n11789962 +n11790788 +n11793779 +n11794519 +n11796005 +n11801392 +n11805956 +n11807108 +n11807979 +n11808721 +n11811473 +n11815491 +n11817914 +n11820965 +n11823043 +n11830714 +n11830906 +n11832214 +n11836722 +n11839568 +n11845557 +n11851578 +n11855274 +n11857696 +n11862835 +n11865071 +n11866248 +n11868814 +n11869351 +n11869689 +n11872146 +n11875691 +n11875938 +n11877473 +n11878283 +n11887119 +n11890022 +n11892637 +n11894327 +n11898639 +n11900569 +n11902709 +n11915214 +n11915658 +n11915899 +n11916467 +n11918286 +n11919447 +n11920498 +n11924445 +n11928352 +n11928858 +n11931918 +n11932745 +n11939699 +n11940006 +n11943407 +n11944196 +n11945367 +n11946727 +n11947251 +n11948264 +n11950345 +n11951511 +n11952346 +n11953884 +n11954484 +n11956850 +n11965627 +n11967744 +n11970101 +n11971248 +n11971783 +n11972759 +n11973341 +n11976170 +n11977303 +n11978233 +n11982115 +n11985053 +n11985739 +n11988893 +n11991263 +n11997032 +n11997969 +n12006766 +n12008252 +n12008749 +n12010628 +n12013511 +n12015959 +n12018760 +n12020507 +n12024176 +n12030654 +n12034141 +n12036067 +n12036939 +n12041446 +n12043444 +n12045860 +n12050959 +n12053405 +n12056217 +n12057447 +n12062468 +n12065316 +n12065777 +n12075151 +n12076577 +n12080395 +n12083591 +n12086012 +n12086539 +n12087961 +n12090890 +n12092262 +n12094244 +n12095020 +n12096395 +n12101870 +n12102133 +n12105125 +n12107970 +n12108432 +n12109827 +n12110778 +n12112008 +n12112918 +n12113657 +n12117017 +n12119099 +n12119238 +n12121033 +n12124627 +n12126360 +n12131550 +n12135898 +n12136720 +n12137120 +n12137569 +n12139575 +n12141495 +n12142085 +n12143676 +n12144313 +n12146311 +n12147226 +n12152532 +n12153580 +n12154773 +n12155583 +n12156819 +n12157056 +n12157769 +n12158031 +n12158798 +n12159055 +n12159555 +n12160490 +n12161285 +n12163035 +n12164363 +n12166424 +n12168565 +n12170585 +n12173664 +n12174311 +n12174926 +n12182049 +n12187663 +n12188289 +n12195391 +n12196129 +n12199266 +n12201580 +n12202936 +n12205694 +n12214789 +n12215579 +n12217453 +n12221191 +n12224978 +n12225349 +n12226932 +n12231192 +n12236546 +n12237486 +n12244153 +n12245695 +n12246232 +n12252168 +n12252866 +n12253229 +n12256112 +n12257570 +n12260799 +n12262553 +n12265394 +n12266217 +n12266796 +n12268246 +n12269241 +n12269652 +n12271643 +n12274630 +n12275489 +n12281241 +n12284262 +n12286826 +n12287642 +n12288823 +n12290748 +n12293723 +n12296432 +n12300840 +n12302071 +n12303462 +n12305475 +n12306717 +n12307756 +n12310349 +n12316444 +n12318378 +n12320010 +n12322501 +n12328398 +n12330469 +n12334293 +n12334891 +n12335483 +n12335664 +n12335800 +n12340383 +n12341542 +n12342299 +n12343480 +n12344283 +n12346578 +n12350758 +n12352287 +n12355760 +n12360108 +n12360684 +n12364604 +n12367611 +n12374418 +n12377198 +n12381511 +n12385429 +n12387633 +n12387839 +n12392070 +n12396924 +n12399132 +n12401335 +n12401684 +n12405714 +n12409231 +n12411461 +n12412355 +n12412606 +n12416423 +n12419037 +n12420535 +n12421467 +n12421683 +n12425281 +n12430198 +n12431434 +n12437513 +n12437769 +n12441958 +n12446200 +n12446519 +n12449296 +n12450344 +n12451915 +n12454159 +n12459629 +n12460697 +n12461466 +n12462032 +n12463743 +n12464476 +n12466727 +n12470092 +n12474167 +n12475035 +n12476510 +n12480895 +n12491826 +n12495146 +n12499163 +n12506181 +n12508309 +n12509476 +n12511856 +n12516584 +n12522188 +n12524188 +n12526516 +n12527738 +n12539074 +n12539306 +n12546183 +n12548280 +n12550210 +n12554526 +n12556656 +n12560282 +n12560775 +n12562577 +n12572546 +n12573256 +n12575322 +n12582231 +n12582665 +n12582846 +n12583126 +n12583401 +n12584191 +n12586298 +n12590232 +n12594989 +n12595964 +n12602262 +n12602980 +n12612170 +n12614477 +n12615710 +n12620196 +n12622875 +n12624381 +n12625383 +n12631331 +n12633638 +n12634211 +n12634429 +n12635744 +n12636885 +n12638218 +n12638556 +n12639736 +n12640607 +n12641413 +n12641931 +n12642200 +n12643473 +n12644902 +n12645174 +n12647376 +n12649065 +n12650556 +n12651821 +n12653218 +n12655869 +n12658118 +n12658846 +n12659356 +n12660601 +n12662772 +n12663804 +n12665048 +n12667406 +n12667964 +n12674120 +n12674685 +n12682411 +n12683407 +n12685431 +n12685831 +n12688716 +n12690653 +n12695144 +n12698435 +n12705013 +n12707781 +n12708293 +n12709901 +n12711596 +n12713063 +n12714755 +n12715914 +n12717072 +n12719684 +n12724942 +n12725521 +n12727301 +n12731401 +n12732491 +n12732756 +n12733647 +n12741222 +n12742741 +n12743823 +n12746884 +n12749049 +n12752205 +n12755225 +n12756457 +n12762896 +n12768369 +n12771192 +n12772753 +n12777436 +n12778605 +n12779603 +n12785724 +n12791064 +n12793015 +n12794985 +n12798284 +n12800586 +n12801520 +n12805146 +n12806732 +n12810595 +n12812235 +n12814643 +n12817464 +n12822769 +n12823717 +n12823859 +n12832315 +n12833985 +n12834798 +n12836212 +n12836862 +n12839979 +n12840749 +n12842302 +n12842887 +n12844939 +n12849061 +n12853080 +n12854048 +n12858150 +n12866968 +n12869478 +n12870535 +n12871272 +n12877244 +n12878169 +n12879963 +n12882779 +n12884260 +n12890265 +n12893463 +n12903367 +n12904938 +n12908645 +n12909421 +n12912670 +n12917901 +n12922763 +n12926480 +n12928071 +n12929403 +n12930778 +n12931906 +n12934036 +n12934479 +n12939104 +n12941536 +n12942395 +n12943443 +n12946849 +n12950126 +n12952165 +n12953206 +n12956170 +n12957608 +n12960378 +n12960863 +n12965626 +n12968136 +n12969131 +n12970193 +n12971400 +n12973791 +n12974987 +n12976198 +n12980840 +n12982468 +n12983961 +n12985773 +n12987056 +n12988158 +n12992868 +n12997654 +n12997919 +n13000891 +n13001041 +n13001206 +n13001366 +n13001529 +n13002750 +n13002925 +n13003061 +n13003254 +n13003522 +n13003712 +n13004423 +n13004640 +n13004826 +n13004992 +n13005329 +n13005984 +n13006171 +n13006631 +n13006894 +n13007417 +n13007629 +n13008157 +n13008315 +n13008485 +n13008689 +n13008839 +n13009085 +n13009244 +n13009429 +n13009656 +n13010694 +n13010951 +n13011221 +n13012253 +n13012469 +n13012973 +n13013534 +n13013764 +n13013965 +n13014097 +n13014265 +n13014409 +n13014581 +n13014741 +n13014879 +n13017102 +n13017240 +n13017439 +n13017610 +n13017789 +n13017979 +n13018088 +n13018232 +n13018407 +n13019496 +n13019643 +n13019835 +n13020191 +n13020481 +n13020964 +n13021166 +n13021332 +n13021543 +n13021689 +n13021867 +n13022210 +n13022709 +n13024012 +n13024500 +n13025647 +n13028611 +n13032115 +n13032923 +n13035241 +n13035389 +n13035707 +n13037585 +n13037805 +n13038068 +n13038376 +n13038744 +n13039349 +n13040629 +n13040796 +n13041312 +n13042982 +n13043926 +n13045210 +n13045975 +n13046130 +n13049953 +n13055423 +n13055577 +n13055792 +n13055949 +n13056135 +n13056349 +n13056607 +n13056799 +n13057054 +n13057242 +n13057422 +n13057639 +n13058037 +n13058272 +n13058608 +n13059298 +n13059657 +n13060017 +n13060190 +n13063269 +n13066129 +n13067191 +n13068917 +n13070308 +n13070875 +n13071371 +n13071553 +n13071815 +n13072031 +n13072209 +n13072350 +n13072528 +n13072706 +n13072863 +n13073055 +n13073703 +n13074619 +n13074814 +n13075020 +n13075272 +n13075441 +n13075684 +n13075847 +n13076041 +n13076405 +n13076643 +n13076831 +n13077033 +n13077295 +n13079419 +n13083023 +n13084184 +n13085113 +n13091620 +n13091774 +n13100156 +n13100677 +n13104059 +n13108131 +n13108662 +n13108841 +n13109733 +n13110915 +n13111174 +n13111881 +n13118707 +n13119870 +n13120211 +n13121104 +n13122364 +n13123431 +n13125117 +n13130161 +n13130726 +n13132034 +n13132338 +n13132486 +n13132940 +n13134302 +n13134947 +n13135832 +n13136316 +n13136556 +n13137409 +n13137672 +n13138308 +n13138658 +n13138842 +n13139055 +n13139647 +n13141141 +n13145444 +n13149296 +n13150894 +n13154841 +n13156986 +n13157137 +n13160831 +n13163991 +n13172923 +n13174670 +n13177529 +n13180534 +n13186388 +n13188096 +n13188268 +n13192625 +n13193642 +n13194572 +n13195761 +n13199970 +n13201969 +n13206817 +n13207736 +n13208705 +n13211790 +n13219422 +n13221529 +n13224673 +n13230662 +n13231678 +n13231919 +n13232106 +n13232363 +n13232779 +n13233727 +n13238375 +n13238988 +n13252672 +n13862780 +n13863186 +n13863473 +n13863771 +n13864153 +n13864965 +n13865298 +n13865483 +n13866144 +n13866827 +n13867492 +n13868248 +n13868371 +n13872592 +n13873502 +n13875392 +n13875571 +n13878306 +n13879320 +n13883603 +n13888491 +n13893786 +n13894434 +n13896100 +n13897996 +n13900287 +n13903079 +n13905121 +n13905275 +n13905792 +n13912260 +n13915999 +n14633206 +n14696793 +n14844693 +n14853210 +n14899328 +n14900184 +n14974264 +n14977504 +n14992287 +n15062057 +n15067877 +n15089258 +n15089472 +n15089645 +n15089803 +n15090742 +n15092409 +n15092751 \ No newline at end of file diff --git a/workloads/realworld/uvm/darknet/cfg/imagenet.shortnames.list b/workloads/realworld/uvm/darknet/cfg/imagenet.shortnames.list new file mode 100644 index 0000000000000000000000000000000000000000..e7a18d44b543086958eaf60e6dc0b7deb0df9400 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/imagenet.shortnames.list @@ -0,0 +1,21842 @@ +kit fox +English setter +Siberian husky +Australian terrier +English springer +grey whale +lesser panda +Egyptian cat +ibex +Persian cat +cougar +gazelle +porcupine +sea lion +malamute +badger +Great Dane +Walker hound +Welsh springer spaniel +whippet +Scottish deerhound +killer whale +mink +African elephant +Weimaraner +soft-coated wheaten terrier +Dandie Dinmont +red wolf +Old English sheepdog +jaguar +otterhound +bloodhound +Airedale +hyena +meerkat +giant schnauzer +titi +three-toed sloth +sorrel +black-footed ferret +dalmatian +black-and-tan coonhound +papillon +skunk +Staffordshire bullterrier +Mexican hairless +Bouvier des Flandres +weasel +miniature poodle +Cardigan +malinois +bighorn +fox squirrel +colobus +tiger cat +Lhasa +impala +coyote +Yorkshire terrier +Newfoundland +brown bear +red fox +Norwegian elkhound +Rottweiler +hartebeest +Saluki +grey fox +schipperke +Pekinese +Brabancon griffon +West Highland white terrier +Sealyham terrier +guenon +mongoose +indri +tiger +Irish wolfhound +wild boar +EntleBucher +zebra +ram +French bulldog +orangutan +basenji +leopard +Bernese mountain dog +Maltese dog +Norfolk terrier +toy terrier +vizsla +cairn +squirrel monkey +groenendael +clumber +Siamese cat +chimpanzee +komondor +Afghan hound +Japanese spaniel +proboscis monkey +guinea pig +white wolf +ice bear +gorilla +borzoi +toy poodle +Kerry blue terrier +ox +Scotch terrier +Tibetan mastiff +spider monkey +Doberman +Boston bull +Greater Swiss Mountain dog +Appenzeller +Shih-Tzu +Irish water spaniel +Pomeranian +Bedlington terrier +warthog +Arabian camel +siamang +miniature schnauzer +collie +golden retriever +Irish terrier +affenpinscher +Border collie +hare +boxer +silky terrier +beagle +Leonberg +German short-haired pointer +patas +dhole +baboon +macaque +Chesapeake Bay retriever +bull mastiff +kuvasz +capuchin +pug +curly-coated retriever +Norwich terrier +flat-coated retriever +hog +keeshond +Eskimo dog +Brittany spaniel +standard poodle +Lakeland terrier +snow leopard +Gordon setter +dingo +standard schnauzer +hamster +Tibetan terrier +Arctic fox +wire-haired fox terrier +basset +water buffalo +American black bear +Angora +bison +howler monkey +hippopotamus +chow +giant panda +American Staffordshire terrier +Shetland sheepdog +Great Pyrenees +Chihuahua +tabby +marmoset +Labrador retriever +Saint Bernard +armadillo +Samoyed +bluetick +redbone +polecat +marmot +kelpie +gibbon +llama +miniature pinscher +wood rabbit +Italian greyhound +lion +cocker spaniel +Irish setter +dugong +Indian elephant +beaver +Sussex spaniel +Pembroke +Blenheim spaniel +Madagascar cat +Rhodesian ridgeback +lynx +African hunting dog +langur +Ibizan hound +timber wolf +cheetah +English foxhound +briard +sloth bear +Border terrier +German shepherd +otter +koala +tusker +echidna +wallaby +platypus +wombat +revolver +umbrella +schooner +soccer ball +accordion +ant +starfish +chambered nautilus +grand piano +laptop +strawberry +airliner +warplane +airship +balloon +space shuttle +fireboat +gondola +speedboat +lifeboat +canoe +yawl +catamaran +trimaran +container ship +liner +pirate +aircraft carrier +submarine +wreck +half track +tank +missile +bobsled +dogsled +bicycle-built-for-two +mountain bike +freight car +passenger car +barrow +shopping cart +motor scooter +forklift +electric locomotive +steam locomotive +amphibian +ambulance +beach wagon +cab +convertible +jeep +limousine +minivan +Model T +racer +sports car +go-kart +golfcart +moped +snowplow +fire engine +garbage truck +pickup +tow truck +trailer truck +moving van +police van +recreational vehicle +streetcar +snowmobile +tractor +mobile home +tricycle +unicycle +horse cart +jinrikisha +oxcart +bassinet +cradle +crib +four-poster +bookcase +china cabinet +medicine chest +chiffonier +table lamp +file +park bench +barber chair +throne +folding chair +rocking chair +studio couch +toilet seat +desk +pool table +dining table +entertainment center +wardrobe +Granny Smith +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +acorn +hip +ear +rapeseed +corn +buckeye +organ +upright +chime +drum +gong +maraca +marimba +steel drum +banjo +cello +violin +harp +acoustic guitar +electric guitar +cornet +French horn +trombone +harmonica +ocarina +panpipe +bassoon +oboe +sax +flute +daisy +yellow lady's slipper +cliff +valley +alp +volcano +promontory +sandbar +coral reef +lakeside +seashore +geyser +hatchet +cleaver +letter opener +plane +power drill +lawn mower +hammer +corkscrew +can opener +plunger +screwdriver +shovel +plow +chain saw +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +white stork +black stork +spoonbill +flamingo +American egret +little blue heron +bittern +crane +limpkin +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +European gallinule +pelican +king penguin +albatross +great white shark +tiger shark +hammerhead +electric ray +stingray +barracouta +coho +tench +goldfish +eel +rock beauty +anemone fish +lionfish +puffer +sturgeon +gar +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +triceratops +African crocodile +American alligator +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +whistle +wing +paintbrush +hand blower +oxygen mask +snorkel +loudspeaker +microphone +screen +mouse +electric fan +oil filter +strainer +space heater +stove +guillotine +barometer +rule +odometer +scale +analog clock +digital clock +wall clock +hourglass +sundial +parking meter +stopwatch +digital watch +stethoscope +syringe +magnetic compass +binoculars +projector +sunglasses +loupe +radio telescope +bow +cannon +assault rifle +rifle +projectile +computer keyboard +typewriter keyboard +crane +lighter +abacus +cash machine +slide rule +desktop computer +hand-held computer +notebook +web site +harvester +thresher +printer +slot +vending machine +sewing machine +joystick +switch +hook +car wheel +paddlewheel +pinwheel +potter's wheel +gas pump +carousel +swing +reel +radiator +puck +hard disc +sunglass +pick +car mirror +solar dish +remote control +disk brake +buckle +hair slide +knot +combination lock +padlock +nail +safety pin +screw +muzzle +seat belt +ski +candle +jack-o'-lantern +spotlight +torch +neck brace +pier +tripod +maypole +mousetrap +spider web +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +isopod +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +sea urchin +sea cucumber +iron +espresso maker +microwave +Dutch oven +rotisserie +toaster +waffle iron +vacuum +dishwasher +refrigerator +washer +Crock Pot +frying pan +wok +caldron +coffeepot +teapot +spatula +altar +triumphal arch +patio +steel arch bridge +suspension bridge +viaduct +barn +greenhouse +palace +monastery +library +apiary +boathouse +church +mosque +stupa +planetarium +restaurant +cinema +home theater +lumbermill +coil +obelisk +totem pole +castle +prison +grocery store +bakery +barbershop +bookshop +butcher shop +confectionery +shoe shop +tobacco shop +toyshop +fountain +cliff dwelling +yurt +dock +brass +megalith +bannister +breakwater +dam +chainlink fence +picket fence +worm fence +stone wall +grille +sliding door +turnstile +mountain tent +scoreboard +honeycomb +plate rack +pedestal +beacon +mashed potato +bell pepper +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +cardoon +mushroom +shower curtain +jean +carton +handkerchief +sandal +ashcan +safe +plate +necklace +croquet ball +fur coat +thimble +pajama +running shoe +cocktail shaker +chest +manhole cover +modem +tub +tray +balance beam +bagel +prayer rug +kimono +hot pot +whiskey jug +knee pad +book jacket +spindle +ski mask +beer bottle +crash helmet +bottlecap +tile roof +mask +maillot +Petri dish +football helmet +bathing cap +teddy +holster +pop bottle +photocopier +vestment +crossword puzzle +golf ball +trifle +suit +water tower +feather boa +cloak +red wine +drumstick +shield +Christmas stocking +hoopskirt +menu +stage +bonnet +meat loaf +baseball +face powder +scabbard +sunscreen +beer glass +hen-of-the-woods +guacamole +lampshade +wool +hay +bow tie +mailbag +water jug +bucket +dishrag +soup bowl +eggnog +mortar +trench coat +paddle +chain +swab +mixing bowl +potpie +wine bottle +shoji +bulletproof vest +drilling platform +binder +cardigan +sweatshirt +pot +birdhouse +hamper +ping-pong ball +pencil box +pay-phone +consomme +apron +punching bag +backpack +groom +bearskin +pencil sharpener +broom +mosquito net +abaya +mortarboard +poncho +crutch +Polaroid camera +space bar +cup +racket +traffic light +quill +radio +dough +cuirass +military uniform +lipstick +shower cap +monitor +oscilloscope +mitten +brassiere +French loaf +vase +milk can +rugby ball +paper towel +earthstar +envelope +miniskirt +cowboy hat +trolleybus +perfume +bathtub +hotdog +coral fungus +bullet train +pillow +toilet tissue +cassette +carpenter's kit +ladle +stinkhorn +lotion +hair spray +academic gown +dome +crate +wig +burrito +pill bottle +chain mail +theater curtain +window shade +barrel +washbasin +ballpoint +basketball +bath towel +cowboy boot +gown +window screen +agaric +cellular telephone +nipple +barbell +mailbox +lab coat +fire screen +minibus +packet +maze +pole +horizontal bar +sombrero +pickelhaube +rain barrel +wallet +cassette player +comic book +piggy bank +street sign +bell cote +fountain pen +Windsor tie +volleyball +overskirt +sarong +purse +bolo tie +bib +parachute +sleeping bag +television +swimming trunks +measuring cup +espresso +pizza +breastplate +shopping basket +wooden spoon +saltshaker +chocolate sauce +ballplayer +goblet +gyromitra +stretcher +water bottle +dial telephone +soap dispenser +jersey +school bus +jigsaw puzzle +plastic bag +reflex camera +diaper +Band Aid +ice lolly +velvet +tennis ball +gasmask +doormat +Loafer +ice cream +pretzel +quilt +maillot +tape player +clog +iPod +bolete +scuba diver +pitcher +matchstick +bikini +sock +CD player +lens cap +thatch +vault +beaker +bubble +cheeseburger +parallel bars +flagpole +coffee mug +rubber eraser +stole +carbonara +dumbbell +singles +Virginia deer +eastern grey squirrel +gelding +pylon +table-tennis table +peacock +Segway +surfing +tamandua +knocker +steering wheel +motorcycling +coati +sitar +range +backhoe +agaric +dashboard +water polo +concrete mixer +treadmill +golf bag +skateboarding +royal tennis +tartan +four-wheel drive +sport utility +sedan +print +luggage rack +softball +windmill +ben +red admiral +jalousie +towel rail +truss +strand +ice hockey +sconce +wind turbine +plush +stained-glass window +ballpark +thoroughbred +love seat +red-spotted purple +miller +Adelie +freight liner +clock tower +acrobatics +shaving brush +ewe +ottoman +African violet +bicycle wheel +cork +windmill +satin +comma +coffee mill +baggage +wasp's nest +batting glove +Ferris wheel +push-bike +porthole +football stadium +gas tank +barbecue +handlebar +hula-hoop +fairground +rapier +garter stitch +exercise bike +control tower +carryall +minute hand +cog +riverbank +water nymph +common dandelion +android +hairbrush +redberry +fret +display window +pepper mill +litterbin +drapery +ducking +fly-fishing +broad jump +sprinkler +water-skiing +chicory +sail +volleyball +rugby +Texas bluebonnet +computer monitor +tortoiseshell +airplane propeller +solar array +figure skating +air conditioner +purple loosestrife +gearshift +outboard motor +cowslip +Abyssinian +dip +workstation +cosy +bunker +neon lamp +campanile +casket +verbena +amphora +sumo +common foxglove +sprocket +jelly bean +emperor penguin +night-blooming cereus +clock radio +black birch +bomber jacket +Virginia bluebell +bayonet +walker +altarpiece +tattoo +bridle +rocker arm +water turkey +spiderwort +flange +mute swan +laser printer +carburetor +coverlet +mountainside +baritone +auto racing +baluster +gal +peach bells +taffeta +grandfather +asparagus +horizontal stabilizer +world +grate +marsh marigold +white rhinoceros +movement +split rail +rollerblading +longhorn +muffler +church tower +light bulb +American agave +backpacking tent +overall +New World goldfinch +sectional +wing chair +transom +integrated circuit +dad +spar +picture frame +no-hit game +alternator +drill press +strawflower +hepatica +rangefinder +blinker +Welsh pony +nib +wagon wheel +rotor +tie +denim +jetliner +sculling +external drive +window frame +mourning dove +censer +stapler +batting helmet +flagon +machete +windshield +hedgehog +weeping willow +chief executive officer +hepatica +pet +Asiatic black bear +chinchilla +uke +Atlantic bottlenose dolphin +hair +dishtowel +flintlock +Bermuda shorts +lavender +searchlight +millwheel +piano keyboard +luna moth +bumper +parrot +skirt +manhole +coffee table +footstool +judo +Dalai Lama +armored personnel carrier +voile +saber +thoroughbred +wild carrot +gemsbok +caster +butterfly orchid +cow +sideboard +horseshoe crab +match play +cassette recorder +photomicrograph +drafting table +pediment +tramline +shipping +kitten +wainscoting +fried rice +helix +marguerite +pumpkin +white-bellied swallow +Tulipa gesneriana +common dolphin +face +red squirrel +bicycling +shipwreck +banded purple +cornice +pendant earring +forsythia +aardvark +seashell +spat +shoulder bag +fallow deer +yearling +common teasel +tufted titmouse +ancient +professional golf +purl +vehicle +okra +great grandmother +common lilac +rose mallow +newspaper +crucifix +chukka +armlet +fulmar +wapiti +doily +Greco-Roman wrestling +bleeding heart +kitchen table +bluebonnet +Cape buffalo +spun yarn +crape myrtle +dewdrop +great blue heron +medalist +vaulting horse +spinning wheel +skyscraper +Tahitian +forget-me-not +watercourse +guitarist +gargoyle +bee balm +pumpkin +hunting knife +flutist +lectern +skateboarder +foil +pant leg +hedge sparrow +dresser +automatic pistol +chicory +dialog box +chamberpot +black rhinoceros +fireweed +half-mast +pillow sham +pavilion +scarf joint +microprocessor +filly +dressing gown +shell +Arabian +child +radio antenna +butterweed +morris dancer +sparrow hawk +groom +brioche +floret +rainbow +earthworm +cellist +tine +toupee +balldress +map +angel's trumpet +ruin +fur +pronghorn +speed skating +used-car +stick +early spider orchid +stuffed peppers +snowdrift +flats +least sandpiper +stick +console table +ventilator +portable +kepi +pylon +viceroy +shoreline +Olympian Zeus +pestle +great-niece +life +air compressor +fanjet +scuba diving +fieldfare +tree swallow +personnel carrier +night-blooming cereus +sonogram +assembly hall +circuit breaker +chair +speed skate +soapwort +worsted +raspberry +burlap +flat panel display +Pyracantha +cemetery +turban +deer hunting +bottle green +dandelion green +pieta +aigrette +turntable +cover girl +clutch bag +kiwi +pea jacket +color guard +Malay +shire +crock +french fries +credenza +hockey stick +mourning cloak +potty seat +glass +balsamroot +medal play +red clover +gravy boat +garter belt +Guinness +meadow buttercup +jackass penguin +coursing +tooth +hawfinch +housetop +fluorescent lamp +black-backed gull +bookshelf +earplug +millipede +fawn +baseball bat +soup-strainer +organ loft +bugloss +tomahawk +blackcap +black-necked stilt +hand truck +bedstead +tempura +rose window +crimson +snow thrower +lesser whitethroat +palomino +ball +staff sergeant +wicker +garbage heap +great-nephew +parquet +coupe +nave +eggs Benedict +damask +flush toilet +Angora +pedometer +control room +bristle brush +kookaburra +telephone booth +Windsor chair +red-winged blackbird +cinnamon roll +briefs +cloister +sundress +mammillaria +unicyclist +covered bridge +coelogyne +fairy bluebird +phoebe +beer mug +headstock +parhelion +gorse +common European dogwood +fire-eater +professional football +rock climbing +cyclamen +tin +marjoram +Japanese morning glory +pipe +smasher +hang glider +abutment +birdbath +jotter +litter +artist's model +butterfly bush +dining area +sausage dog +piggery +English sparrow +Turk's-cap +platinum blond +song sparrow +alarm clock +tortoiseshell +chaise longue +flintlock +academic costume +graffito +Arnica montana +adding machine +waterside +director +jonquil +pipefitting +stud +Swedish meatball +musk rose +Venus's flytrap +raven +bougainvillea +little brother +field bindweed +finder +white admiral +tinfoil +serval +sheet +carthorse +people +potto +stockroom +sphinx +slate roof +mountain laurel +majolica +coal black +repository +bufo +pique +binder +tread +attorney general +hydraulic press +videocassette recorder +bumper car +professional baseball +cow parsley +ern +blue peafowl +common hyacinth +jack-in-the-pulpit +ice hockey rink +sport +camper +tailback +flash +stacks +pulp +Christmas cactus +netball +calliandra +curler +large periwinkle +cobweb +forward +Roman arch +cross bun +stoneware +banana bread +cape jasmine +settle +tongue +frock +pepper shaker +pitching coach +CD-R +casing +faience +hand cream +CD-ROM +recliner +striped bass +clary +sketch +risotto +reticle +white clover +touch football +kitty +great-aunt +Japanese maple +sidecar +muscovy duck +hack +rope bridge +organist +stinging nettle +pocket watch +Indian pipe +amorphophallus +bird's-foot violet +caller ID +furnishing +carriageway +dish rack +heiress +nail polish +beldam +Dall sheep +teriyaki +stateroom +laughing gull +chow +bookmark +timer +toga virilis +deviled egg +coltsfoot +Papuan +native +cygnet +automation +portfolio +cabbage palm +cube +broiler +radish +broodmare +castor-oil plant +pith hat +talus +lass +thatch +common marigold +young buck +igloo +prairie rattlesnake +soccer player +spoke +place +slide fastener +tapestry +toy +headboard +cross-country skiing +harness +sconce +rim +ballet skirt +transvestite +saddlebag +common evening primrose +taillight +challah +willet +ready-to-wear +cloud +answering machine +waterfront +vane +granddaughter +Chinese gooseberry +tureen +cab +truffle +viola +bootlace +chemise +taro +petal +candied apple +soccer +miniature golf +front porch +asparagus +Sauvignon blanc +daisy fleabane +ceiling +slip-on +bottle-nosed whale +redbud +black squirrel +snowsuit +ribbing +gravestone +creme brulee +ambassador +local +archery +love-in-a-mist +garbage +thyme +night-blooming cereus +goshawk +cuckoopint +azure +German iris +salad bowl +puppy +cockhorse +giant clam +biplane +stele +necklet +sea otter +crest +door +reformer +comforter +Byelorussian +bottle +hemline +book bag +leotard +owlet +spoon +sari +bidet +Latin +reticulated python +bowling shoe +futon +gaiter +coypu +tea urn +waders +bangle +snowbank +pencil +porter +azalea +English lavender +red spruce +team sport +cruet +high-rise +O ring +vodka +cormorant +Canada thistle +clasp +showjumping +rattan +red fox +sun parlor +Charolais +Tommy gun +bird's foot trefoil +sedge warbler +knot +chives +car tire +steam engine +adapter +spirea +common allamanda +oyster shell +harbor seal +baobab +wick +plumbago +downy woodpecker +coconut +leash +kasbah +hour hand +upholstery +mallard +cricket bat +lady +kitchenware +right-hander +leopard +olive green +common valerian +blue whale +blackboard +redhead +periwinkle +fingerboard +hard hat +locker +breakfast table +capybara +beekeeper +harness +feeder +water hyacinth +hexapod +brown thrasher +percale +lever +patriarch +arete +book +book +senator +bunya bunya +couch +durian +common lady's-slipper +mountain ash +golden barrel cactus +bicycle seat +beret +pop +musk mallow +manatee +cotton candy +boxing glove +backboard +tongue +saguaro +playground +capitol +sanderling +wagtail +deputy +tractor +tap +lady's smock +noseband +worsted +radiotelephone +camisole +forelock +muscat +sweet scabious +crane fly +butterfly weed +chestnut +pinata +inositol +borage +aquatic +belly +broadcaster +gondolier +egg yolk +blush wine +bufflehead +rambutan +oleander +horse-trail +sea holly +yard bird +conference room +lacrosse +belted kingfisher +defile +extremum +whistle +bear cub +grainfield +potage +watermelon +lasagna +sheik +Cooper's hawk +bulb +basketball court +paella +cassette tape +scatter rug +kid +impala lily +Minnesotan +Sudanese +chocolate +tail +quack-quack +whistling swan +shoulder patch +frozen custard +sumo wrestler +smoothie +bock +meat grinder +latch +palisade +radial +sake +kestrel +corn chowder +airframe +electrician +reamer +metropolitan +cotton flannel +cassowary +crossbill +operating room +winter aconite +flute +Tasmanian devil +billboard +suds +kilt +aperitif +cooling tower +avocado +hooded merganser +coleslaw +bee balm +ladder-back +insurance broker +scaffolding +polo mallet +double bed +two-hitter +bluff +gamboge +baby +lawn chair +frond +pistol grip +fancy dress +marquetry +jambalaya +fireweed +Eurasian kingfisher +cue ball +ice plant +horseweed +rose moss +musher +sun +viscount +white-breasted nuthatch +gin and tonic +thermos +Kenyan +first-aid kit +four-wheeler +tourist +stairwell +Gambian +liqueur glass +hovercraft +cocktail dress +twin +coriander +blister pack +Barrow's goldeneye +canteen +irrigation ditch +great white heron +tree sparrow +canal boat +lens +food processor +common raccoon +Baltimore oriole +black-eyed Susan +bush hibiscus +corolla +sire +mustachio +professional wrestling +elk +clustered bellflower +pannier +musk ox +crapaud +animal trainer +rosebud +ring-necked pheasant +little egret +cappuccino +rocker +bristlecone pine +cheerleader +hedge violet +semaphore +central processing unit +speedskater +delivery truck +assembly +hedgehog cactus +bergenia +bull thistle +bladder campion +cinquefoil +inula +cellulose tape +main rotor +bootee +autogiro +ice +grey +meadow cranesbill +hummus +valise +chassis +mountain goat +blacktail prairie dog +Chardonnay +romper +street +shoveler +wood ibis +topiary +chalice +silo +circus acrobat +Rollerblade +cosmos +woof +heroine +cold cream +marabou +herb robert +garden lettuce +nymph +floor lamp +automobile engine +heel +radiator +seeded player +fedora +father-in-law +peahen +Bahamian +wiper +wood pigeon +barn owl +pegboard +chorus frog +kin +roller skate +stob +rosemary +cowbird +hortensia +cranberry sauce +shot glass +Dixie cup +gnu +fire alarm +diet +booster +oxeye daisy +twayblade +high-definition television +truss bridge +bunk bed +mule +blackbuck +facsimile +frog orchid +point-and-shoot camera +brocade +gazebo +prairie gentian +concert +paintball +Cognac +maid +afghan +barbecued spareribs +pintail +tramway +commissioner +finger-painting +beef stew +caftan +Aberdeen Angus +demonstrator +sea trout +pigtail +thrush nightingale +barbados cherry +sashimi +ridgeling +lamppost +gabardine +red-shouldered hawk +bath salts +cavern +cymbid +Haitian +boater +southern buckthorn +arctic +motorcycle cop +red gum +Clydesdale +Zamboni +beagling +villa +demitasse +Sheetrock +lollipop +hybrid petunia +post horse +carabiner +brussels sprouts +Durham +stylist +pothole +sleigh bed +scallop shell +harrier eagle +papaya +Japanese persimmon +sachet +wild rice +chipboard +gun enclosure +menorah +chinook +headset +white campion +ocean +Secretary of State +G-string +bone china +basil +greenish blue +camcorder +concrete +screech owl +trumpet honeysuckle +flugelhorn +layette +cattle egret +case knife +mandarin duck +robber fly +salwar +dressing table +doughnut +facade +runner +honeypot +surf casting +diver +angel's trumpet +spin dryer +chameleon +wand +snow +vitamin A1 +manageress +volleyball net +antiperspirant +street clothes +tree sparrow +cords +sundew +bricks and mortar +caryatid +bridesmaid +trestle bridge +eyepiece +celebrant +scarlet pimpernel +gas range +onion +green salad +squill +creepy-crawly +hunk +little owl +salad nicoise +earflap +bird feeder +spray gun +bunny +Cheops +amazon +blue tit +Nissen hut +Kalashnikov +skylark +kremlin +shoebill +shopping bag +frigate bird +telephoto lens +peplum +moss pink +echidna +wastepaper basket +wood ibis +workroom +ankle brace +telpherage +Michaelmas daisy +figure skate +swami +nylons +cardoon +cocotte +headstall +twin bed +parsley +dirndl +corn poppy +nut bread +cloche +light heavyweight +mayor +lip-gloss +punch bowl +pottage +mango +fledgling +mousse +four-wheel drive +barrel +banana boat +trouser +bathroom +Sauterne +ring +settee +lavaliere +safe-deposit +godson +leatherette +schoolmate +radish +hedge trimmer +dahlia +euphonium +palace +vaulter +singlet +slicer +Pilsner +cockateel +kangaroo paw +Cub Scout +master bedroom +hexagon +cenotaph +Barberton daisy +Netherlander +intersection +Korean +gravel +chandelier +hospital bed +flash memory +pier +whole wheat flour +maroon +pale ale +special +snow bunting +crinoline +dustpan +barrette +common wood sorrel +yolk +pothos +speakerphone +tendril +cabinetwork +farm horse +brake disk +streetlight +superhighway +bandsaw +panting +pressure cooker +girdle +old man +cereal bowl +felt +hurling +architecture +harmonium +chain +blueberry +cellar +smocking +scrub brush +tablespoon +sweet corn +graining +library +street +bill +felt-tip pen +monkshood +crowd +log cabin +newel post +hack +elephant seal +golden pothos +popcorn +outhouse +patch pocket +fish and chips +tape +wax plant +eaves +fried egg +emerald +tea cart +fan blade +daily +Bowie knife +rowing boat +leaf shape +man +crayon +trumpetfish +chipping sparrow +whiskey bottle +pillion +city hall +golden pheasant +cheerleader +creeping bugle +couch +Dumpster +Homo sapiens sapiens +cranberry juice +cockpit +demagogue +joinery +scrambled eggs +technician +sidewalk +sheep +keyhole +power line +polyanthus +roulette +first lieutenant +checkout +tabletop +nasturtium +schnapps +engineering +skateboard +ground fir +bouquet +bunk +resort area +fleur-de-lis +power steering +opera +Bolivian +Friesian +buckskins +bay +slider +frozen yogurt +cabin cruiser +saunterer +lean-to +fishing eagle +bog star +cantaloupe +mouth +music stand +fiddlestick +brilliantine +pinball machine +bairn +barred owl +bath oil +signorina +Mason jar +nymph +rubber band +garden nasturtium +razorbill +Japanese beetle +batting cage +trestle +borage +Secretary of the Interior +scanner +baguet +baseball cap +chow mein +pen +jewelweed +barbet +chasm +pectoral sandpiper +holster +glasses case +sand +crevice +Kickapoo +snowboard +locket +satchel +tankard +alpinist +moorhen +cow pen +whooper +crown +chain +silversword +wild geranium +hi-fi +Tibetan +waterwheel +bee orchid +ruby-crowned kinglet +common broom +tabloid +javelin +sauna +klammath weed +zebra finch +spider orchid +velour +chiffon +lecture room +barrel +loggia +millstone +flatlet +soupspoon +econometrician +golf-club head +daphnia +parlor +fire-eater +juggler +attache case +hay bale +kisser +knitting needle +news magazine +flatbed +Senegalese +trumpeter +trampoline +brogan +bone +caftan +lobster pot +gazpacho +anthill +ramekin +mainsail +penitentiary +spotted flycatcher +cookstove +root beer +broom beard grass +pogo stick +plywood +epee +gas oven +Global Positioning System +sweet false chamomile +breakfast area +bullring +second cousin +wave +decolletage +rodeo +won ton +swastika +bobby pin +papaw +retaining wall +Muscadet +heavyweight +energizer +banner +amusement park +whinchat +drugstore +waxwork +meander +congee +heat sink +switch grass +commuter +peony +western white pine +wild raspberry +nightgown +saute +cardinal +claret +pollinator +biryani +pina colada +cassette deck +European sandpiper +block +flan +birdcage +baby +lieutenant colonel +ticking +European white lily +dog violet +coat hanger +premature baby +organza +string bean +balloonist +hurricane deck +window box +hang glider +bullfighting +piste +seahorse +hard cider +batik +common mullein +petite marmite +stuffed mushroom +tequila +ground ivy +fountain grass +stray +putter +buffer +comet +bomber +woodcarving +baseball glove +halter +garnish +selvage +megaphone +sea fan +rabbit hutch +very important person +analog watch +long-head coneflower +northern pike +roll-on +cigarette butt +terraced house +penknife +windshield wiper +cricket +straightener +snow pea +cockerel +canister +sour bread +recovery room +toilet bowl +tyrannosaur +big sister +quartz battery +television receiver +vitamin C +tailpipe +field thistle +stonechat +col +monstrance +gift wrapping +herbivore +quarter horse +ice-cream sundae +rumpus room +eyepatch +clary sage +French lavender +snorkel +choir +tent-fly +cat box +horse racing +high priest +barrel cactus +pin oak +wild thyme +keyboardist +raiser +hammock +hail +bungee +chocolate mousse +major +buzzard +gopher tortoise +Chablis +water meter +benthos +donna +blender +Mauser +avocet +rye +mulch +chancel +dusty miller +mate +corbel +minaret +frittata +French toast +mosaic +home brew +water faucet +beard +swivel chair +acropolis +largemouth +abbey +tabby +driver +copperhead +stirrup +Boston fern +Tennessee walker +artichoke +honor guard +chapatti +enchantress +sweat pants +electric organ +column +dry vermouth +range hood +Red Delicious +rape +splint +catapult +gourd +antipasto +plaza +carnation +star +wood anemone +English primrose +male fern +boot +atrium +Japanese deer +carnivore +yearling +doe +guelder rose +chicory +stretch pants +ice-cream cake +frogfish +tarpaulin +chicken soup +balaclava +tor +feverfew +three-hitter +flyweight +aqua vitae +locker room +wether +teacup +wide-angle lens +hook +ladder-back +osprey +awning +wedding +chest protector +pooch +rose mallow +orange daisy +fondant +envelope +duckling +blackberry +goosander +snorkeling +philatelist +broad bean +Frank +bok choy +basket +absinth +cayenne +blackbird +bottled water +trooper +timber +stable +chestnut +tomatillo +bell +banquet +rainbow trout +macrame +appointee +heart +chipmunk +purple clematis +safety bicycle +shuttle bus +Japanese black pine +lentil soup +downhill +field mustard +brass +hand-me-down +greater yellowlegs +fanny pack +croquet mallet +hip roof +duffel bag +Ritz +document +pie plant +staff member +lifeguard +white-throated sparrow +Cameroonian +hydrofoil +platter +common ageratum +middleweight +chairlift +brunch +pharmacist +lemon +driveshaft +green snake +lip +London plane +mangrove +crystal +siskin +common jasmine +hollandaise +villa +cross-country riding +mother-in-law's tongue +generator +Tanzanian +whisk +seeder +ashtray +griddle +evening bag +bluebird +bran muffin +square dancer +luggage compartment +tropical pitcher plant +autofocus +tape drive +silencer +Hawaiian guitar +swamp sparrow +Zimbabwean +drawing room +weekender +liparis +streambed +samosa +hitter +water heater +tidal basin +ossuary +dik-dik +camouflage +fiance +Jordanian +rolling pin +slingback +turret +hen +jennet +playpen +woodhewer +bushing +church bell +bear grass +double knit +tennis pro +Joe-Pye weed +pave +pochard +painted beauty +crinoline +gumbo +trestle table +schnitzel +balloon flower +Turkish coffee +extension cord +wireless local area network +sluice +umbel +microeconomist +sky +aisle +commander in chief +hydroplane racing +poll +Coca Cola +fuel injection +bird pepper +monkey puzzle +English muffin +riverbed +varietal +kachina +airport +saltwort +oolong +red-hot poker +mihrab +cocoa +jersey +Walkman +syndic +Hessian boot +millstone +carpenter +outfall +curbstone +mocha +field pansy +patriarch +slacks +switchblade +killdeer +whelk +pampas grass +racquetball +platform bed +Indian rhinoceros +Japanese iris +blacktop +dinner jacket +stud +jodhpurs +telephone pole +business district +kurta +basil +handset +file folder +gloriosa +orphan +cantle +cookie sheet +cafe au lait +drawbridge +hill myna +Western diamondback +watch case +cardcase +bowling alley +mattress cover +canvasback +pompadour +cornice +matador +cigar cutter +skunk cabbage +baptismal font +bitters +refectory +egg +parula warbler +tiger lily +field house +nanny +skin-diver +soda water +lymphocyte +carport +chocolate fudge +amphitheater +sugar candy +sea hare +open-face sandwich +dessert spoon +staple gun +envelope +worker bee +general +garment bag +maypop +autobahn +Atlantic puffin +polo shirt +Humvee +spice rack +grotto +banderillero +gaillardia +black-crowned night heron +oboist +weigela +Dictaphone +dwarf iris +marsh mallow +yarrow +eccentric +catsup +jade green +mistress +henbit +beachwear +head +commuter +strawberry tree +chickpea +clothespin +fleabane +brussels sprout +winter melon +Laconian +great horned owl +caricaturist +nan +flowerbed +triple sec +dairy +round of golf +cardinal +kauri +Zulu +Armagnac +cowberry +mouthpiece +wild calla +bling +puppeteer +beer drinker +adder +field sparrow +chocolate pudding +blacksmith +finback +Shetland pony +cheese fondue +panty girdle +soda can +electrolytic +florist's chrysanthemum +yellow jasmine +tudung +equalizer +ridge +dulcimer +grappa +barn swallow +coneflower +enamel +poached egg +halfback +yak +toby +Fleet Street +blue catfish +sand tiger +flying buttress +snaffle +stoop +first base +cultivated land +first lady +waratah +headquarters +arnica +lovebird +common morel +parasol +disk clutch +Xerox +vitamin P +vitamin B12 +long sleeve +certified public accountant +hot pants +pitch pine +pantie +drawers +cake mix +boar +grey +bride +false sago +bullion +coach house +bass guitar +Japanese banana +meadow clary +black belt +Canterbury bell +smallmouth +treadmill +great white heron +enchilada +rummer +captain +camisole +wild garlic +oak fern +ultramarine +peach +hawkweed +autostrada +adit +anaconda +artwork +skinhead +jello +hermit thrush +Bewick's swan +dress suit +trail bike +stubble +common polypody +Riesling +Easter lily +telegraph key +envelope +garlic bread +perianth +salad bar +steppe +club sandwich +nude +garden forget-me-not +Tuareg +flood +Statehouse +charcoal +boy scout +Rhone wine +parfait +spoor +lanyard +octagon +brown bread +quarterback +quilted bedspread +hookah +Pepsi +hamburger bun +entrepreneur +saddle oxford +snake's head fritillary +undies +chemise +skidder +chickpea +carnation +honey bun +mortar +Montrachet +automobile horn +skylight +gingham +rafter +pantile +climbing frame +scarlet runner +cable +cornstalk +mockingbird +raisin bread +chili sauce +hand calculator +concert-goer +detached house +coq au vin +lasso +hyssop +globe thistle +paper clip +slide +Jerusalem artichoke +tetrahedron +mock orange +lemon lily +finger +little sister +handcuff +horse wrangler +pavlova +oilcloth +snow-in-summer +common mugwort +greenshank +ice-cream cone +rubber boot +gunnysack +disk jockey +long trousers +sorghum +pontoon +calf +fire extinguisher +cotton thistle +pilot whale +ao dai +steamroller +wristwatch +tawny owl +city +country store +ironweed +kennel +bathrobe +rattan +drawer +fly tent +choline +musk thistle +courthouse +Yugoslav +bush +trawler +shellflower +jade vine +ragged orchid +pea soup +King Charles spaniel +hubcap +snook +paddy +bow and arrow +shovel +dill +cliff swallow +cadaver +hijab +masterpiece +fish geranium +kettle +sanitary napkin +carrot stick +Mountie +peanut brittle +dam +jackal +windowsill +butterfly orchid +bodice +picador +pale yellow +beanie +petiole +tenor saxophonist +bungalow +gnomon +stock saddle +field glass +rigging +wood grain +Speaker +settlement house +swamp milkweed +paper nautilus +tangerine +champagne +crescent roll +library +Schmidt telescope +stemless carline thistle +motorcyclist +alpine ash +planchet +water closet +casuist +hand luggage +hyssop +spaghetti and meatballs +cannelloni +cedar waxwing +water dog +brick red +linkage +sweep hand +purple heather +macaroni and cheese +butter knife +refreshment +malt +St. Augustine grass +wainscot +compass +gas heater +tamale +table saw +referee +borsch +projector +dracaena +peppermint +Reuben +Abyssinian banana +glassblower +floss +small stores +artilleryman +lapwing +ranch +garbage man +dwarf banana +commelina +currant +adulteress +landlocked salmon +pasqueflower +nan +tiger lily +Eritrean +rotunda +catsup bottle +mezzanine +royal fern +blended whiskey +bowler hat +mistletoe +manor +fusee drive +pistachio +dispensary +swamp +amputee +sculptor +schoolmaster +Chinese anise +dwarf iris +livestock +chronograph +nectarine +jockey +plaster +motel room +swamp azalea +hippeastrum +space station +duchess +catacomb +dovetail +cockscomb +common spotted orchid +brittlebush +cleats +cloche +hotchpotch +cabin car +prey +indigo +light beer +bear's breech +jonquil +analyzer +alyssum +spur gear +ice tea +honey buzzard +twayblade +dirndl +atlas moth +croquette +carafe +flyweight +professional basketball +multivitamin +air terminal +phial +roll-on +skunk cabbage +bird of paradise +rose +cooter +camping +divided highway +herbage +sweet vermouth +common comfrey +eggplant +office building +glutton +gefilte fish +bicycle rack +swamp birch +Venetian blind +Pernod +Norway spruce +portrait camera +bastion +vitamin Bc +Ugandan +Indian red +okapi +emu +vin ordinaire +chintz +shrimp cocktail +numbat +tall oat grass +cable car +stopcock +ham sandwich +Yemeni +stanhopea +plate +chicken broth +common yellowthroat +California poppy +radio +chocolate egg +mess jacket +tea table +physostegia +Japanese flowering cherry +confectionery +chicken cacciatore +painted nettle +popover +white rice +strapless +mohair +electrical cable +coil spring +arterial road +miniature fan palm +spectator pump +pesto +interlocutor +eastern kingbird +dongle +vitamin B6 +stuffed tomato +cough drop +okra +black +barbecue +burial mound +firstborn +corn snake +amberjack +bollard +horn +Black African +elbow pad +Camembert +circle +Japanese apricot +hearing aid +rock star +creature +taster +bubble gum +scull +lemon balm +chaetodon +anemometer +brake drum +fuselage +courthouse +aqualung +yellow adder's tongue +reception desk +guy +buffalo wing +ginger beer +robin +pantothenic acid +marsh hawk +yellow journalism +exhaust +cardamom +Tabasco +ax handle +patriarch +floor +pine snake +spoiler +hood +sphagnum +parrotfish +orphanage +redpoll +beef Wellington +white spruce +cherry plum +scapular +field lens +broomstick +mouser +wood thrush +Nebraskan +hotelier +milk thistle +soya milk +Munich beer +boucle +snowy egret +dust storm +steward +kudzu +oriental poppy +presbytery +burro +orange soda +stonecrop +splashboard +menagerie +dormer +wire cutter +yellow bells +Dubliner +shore pine +cousin +racing gig +Morgan +gold plate +villager +snifter +granny's bonnets +egg roll +Spode +amabilis fir +babbler +pestle +heliopsis +halter +black spruce +President of the United States +ski slope +chocolate fondue +lockstitch +motel +Epipactis helleborine +tabbouleh +Yorkshire pudding +overpass +Timorese +presbyter +tablefork +bottle gourd +tiara +vintage +pilgrim +reindeer moss +shower stall +towel rack +kachina +chef's salad +breeder +cow parsnip +walker +Black woman +Irish coffee +portrait lens +lateen +gilt +successor +cargo container +Lithuanian +mayapple +paisley +highchair +strawberry jam +flying fox +field scabious +blue-eyed grass +screw +Frisbee +dressing room +cholla +walkie-talkie +red currant +centrifugal pump +smorgasbord +hot rod +marcher +rowanberry +welwitschia +amphitheater +pew +concert band +bosom +pillbox +seagrass +openwork +meadow goldenrod +shower +chicken sandwich +Boston ivy +plastron +oilfield +stuffed tomato +juniper berries +frame +Spanish mackerel +family room +powder horn +fight +maguey +bunker +work-shirt +air filter +nosh +sugar bowl +foothill +reliquary +tugboat +horsebox +grater +palace +board member +campsite +halibut +geneva +ginger ale +high commissioner +genet +bodywork +spaghetti +protractor +pipe cutter +wood anemone +turkey cock +surge suppressor +green turtle +spoiler +bedsitting room +television room +ballot box +shasta daisy +impeller +capote +bitter +California wine +lock +spinnaker +gill fungus +baby's breath +nut and bolt +moonflower +houseboat +distributor cap +coffee bean +gusset +bowling ball +knitwear +frieze +mistflower +roadster +cue +circuitry +brake +butt hinge +Chickasaw +leopard frog +wing tip +puree +mantel +pantheon +grandfather clock +cockchafer +pomegranate +cleaners +eyeshadow +Oregon cedar +rock hopper +hawksbill turtle +agriculturist +yellow-crowned night heron +Albanian +pumpkin seed +chateau +goggles +camper trailer +bracket fungus +cigarette case +signal box +saddle blanket +poison ivy +set gun +cattleya +dry fly +concert hall +personal digital assistant +talcum +deodorant +common starling +painted turtle +kea +plenipotentiary +pantyhose +masjid +buskin +hurdle +cocktail lounge +belting +sour dock +knife blade +sugar snap pea +paddle +dickeybird +brace +keep +call center +yacht +lead pencil +tumbler +production line +tetra +private +French window +express +ski boot +pinto +broad bean +American crow +screech owl +snapper +power cord +Manx +rambutan +sun deck +stonefish +golden eagle +national monument +readout +cork oak +hacksaw +beer can +bathe +tussock bellflower +wet suit +mihrab +big game +highlighter +sprocket +measuring worm +grapefruit +samovar +distributor point +steak knife +incubator +loon +temporary hookup +hippodrome +hot spring +spacesuit +flea market +clay pigeon +catbird +earmuff +tetherball +yellowfin +cellophane +lanolin +clapperboard +velveteen +police dog +cashew +sequencer +mango +duplex house +bazaar +Golden Delicious +red carpet +collet +kickstand +broadloom +diskette +tank engine +compact +diesel-electric locomotive +whale shark +water moccasin +mountain avens +tropic bird +ginkgo +ski cap +fixative +glockenspiel +chopine +ethernet +herring gull +skeleton key +finger paint +conference table +great crested grebe +harbor +white-crowned sparrow +Bullock's oriole +guestroom +boutique +cable television +roulette wheel +Luger +Latin American +trumpeter +blindfold +baby +freshwater bass +home plate +bonefish +giant sunflower +giant tortoise +planking +pigeon hawk +oceanfront +door +bazaar +common wasp +conformation +kick starter +kid glove +corydalis +shuttlecock +writing desk +ivory gull +shirttail +diving suit +weka +downy birch +altar +wild sage +tufted puffin +cabinet +Orpington +cineraria +bottom +dial +coracle +resort hotel +soap dish +spotted owl +billiard room +ghetto blaster +red-breasted nuthatch +hatchling +chalet +bracteole +crusher +mixer +net melon +farmhouse +Dutch oven +transept +penlight +palmyra +stewing pan +solar cell +crochet needle +black-winged stilt +germander speedwell +crinkleroot +truncheon +bunchberry +hatchback +sounding board +mixing faucet +chess master +bisque +Brie +Sitka spruce +pawn +Mexican-American +space rocket +choreographer +collared peccary +duffel +nacho +patchcord +carpet snake +omnivore +watering can +hall of residence +streamer fly +sunroof +great grandson +oil refinery +billiard player +ivy geranium +key palm +pinwheel +yellow-shafted flicker +purple onion +soldering iron +condominium +fishing gear +heat pump +marine iguana +cuckoo clock +Bletilla striata +headrest +spotted salamander +field hockey ball +pound +carboy +vertical stabilizer +groundsheet +cinnamon bread +acorn squash +sheathing +lakefront +Jeffrey pine +synthesizer +olive +apple +pannier +ponderosa +Jew's-ear +latch +equatorial +metasequoia +permit +bloomers +town hall +fava bean +casino +bier +jampot +common snapping turtle +clary sage +oatmeal +Dutchman's breeches +massif +Guyanese +heifer +handball +sweat suit +pomelo +Iceland moss +customhouse +sandbag +archer +gyrfalcon +sword cane +marmite +whole snipe +blue crab +sugar spoon +brownstone +chicken wire +lizardfish +dump truck +chicken yard +chamois +electric +idle pulley +jujube +wrestling mat +aoudad +Burmese cat +water shamrock +dormitory +Unknown Soldier +hearse +bumper +clipper +desert pea +critter +semitrailer +backboard +common St John's wort +Atlantic manta +song thrush +jukebox +quoin +eastern chipmunk +copper beech +paintball gun +bull +package store +fraise +royal poinciana +niqab +traction engine +objective +day nursery +ski lodge +orphan +summer house +cereal box +router +sleuth +jodhpur +polyp +croquet +sport kite +green onion +tulle +etagere +tussock caterpillar +rest house +elderberry +bridal wreath +Torrey pine +silver wattle +kidney bean +pentode +laelia +Allen wrench +sporran +red drum +tricot +heterodyne receiver +magazine rack +stone curlew +trawler +suckling +niblick +sandwich plate +double door +Togolese +pitching wedge +desert tortoise +cloth cap +date palm +webbing +jumper +frogmouth +copperhead +covered couch +black mallee +riser +scraper +gauntlet +pantheon +food court +muntjac +grocery bag +bread-bin +transmission shaft +primigravida +window seat +crab apple +seat +Fresnel lens +dendrobium +hatchback +little theater +butter dish +back porch +umbrella tree +carrot +seventy-eight +coconut +music stool +Tesla coil +bay willow +American basswood +sabot +wheel and axle +gazette +lute +bassinet +hart +mecca +breadbasket +silverfish +handball +Scotch pine +box camera +stately home +Hereford +tread +single-breasted jacket +desk phone +deodar +professional boxing +fly casting +box wrench +black oak +martello tower +red campion +bullock +sweet William +bay leaf +dollhouse +flounder +fox hunting +beanbag +king mackerel +rouge +film advance +common mallow +parasitic jaeger +satellite receiver +nurse shark +chesterfield +tomatillo +plimsoll +hatbox +bloomer +foul-weather gear +longleaf pine +horse mackerel +tree lizard +bark +belfry +Treasury +perch +purple finch +stag beetle +fragrant orchid +tachymeter +tadpole +cookie jar +knee piece +agueweed +bones +chick +golf glove +toothpick +taboret +rotor blade +field artillery +purple willow +redhead +spark plug +guava +voice mail +cross +butterfly valve +star magnolia +olive +room light +Australian turtledove +embassy +Iraqi +singles +nestling +spinning rod +radial engine +rowan +sandbox +boss +moccasin flower +veneer +mint +American chestnut +white whale +CPU board +florist +press box +hurricane lamp +giant kangaroo +greater whitethroat +winter jasmine +blue +department store +southern red oak +saber saw +corn muffin +bellbottom trousers +toaster oven +red eft +condominium +galago +sunbather +redpoll +common European earwig +songbird +linnet +light meter +bracer +tepee +gumbo +water glass +roofing +spathiphyllum +shofar +sand lizard +washroom +Brussels carpet +brachyuran +home room +floatplane +knee brace +solar heater +felucca +gas ring +maguey +manse +blue columbine +cuppa +cigar band +male orchis +mudskipper +couscous +Chinese parasol tree +dude ranch +banyan +gopher snake +sundrops +aviary +African daisy +missel thrush +Photostat +stone pine +circus tent +tangle +printer cable +grease-gun +rose chafer +light pen +plantain +hearth +bullfinch +post oak +slow loris +Newtonian telescope +head +punt +spindle +New England aster +spotted sandpiper +pond pine +grass skirt +bug +black rat snake +tabasco +bull shark +tennis camp +scrambler +popinjay +bing cherry +ministry +cash register +redheaded woodpecker +kameez +farmer's market +roan +harpy +European toad +pizzeria +camshaft +hemp nettle +chicken coop +cottage pink +daybed +observatory +airdock +mountain devil +newsstand +kingfish +snow gum +jackdaw +lacquerware +peeler +miro +sister ship +damask rose +pack +snowshoe +Liberian +paramecium +tidytips +professional tennis +bookend +wood swallow +cayuse +cranberry +rock squirrel +steak au poivre +soul patch +female mammal +sash fastener +songwriter +oxeye daisy +apse +floor joist +hand towel +wheatear +cero +soul mate +golden fig +bus stop +psycholinguist +convenience store +manor hall +mountain sandwort +Euopean hoopoe +haricot vert +mausoleum +violist +flashlight battery +chard +fixer-upper +bank martin +testudo +diving duck +kohlrabi +Omani +sphygmomanometer +greyhound racing +chestnut +rattlesnake plantain +chaffinch +wolf pup +teakettle +cairn +souk +resident commissioner +chuckwalla +gaiter +capercaillie +liver chestnut +bean sprout +land line +ambassador +green pepper +common chickweed +Sharpie +Oriental arborvitae +oncidium +pallone +currawong +sweet alyssum +fire tower +eyebrow pencil +redfish +apricot +clementine +blucher +wigwam +pangolin +buggy +common oak +jumbojet +laser +cigarette holder +racquetball +georgette +cleft +scouring pad +drum printer +pond scum +American red squirrel +caranday +swamp willow +blindworm +brook trout +defense system +nyala +three-way calling +mizzen +shuttle +African lily +Oregon white oak +rain tree +fuel gauge +oriental cherry +wahoo +pear +jungle gym +bass fiddle +outrigger +angelfish +Old World coot +lime +battlement +yarmulke +herpes varicella zoster +burp gun +Alpine glacier +stun gun +pilot boat +Southern crab apple +bushtit +pullet +polo pony +jackfruit +raw vegetable +French marigold +golden shower tree +spike lavender +wahoo +brass knucks +cabbage palm +diesel-hydraulic locomotive +red jungle fowl +prairie sunflower +rye +loofa +icecap +shade tree +secretary bird +saffron +cos +muskrat +videodisk +Carolina wren +candy bar +Bohemian waxwing +flowering almond +cold frame +raglan +pine siskin +quince +western red cedar +red maple +adobe +agora +kumquat +tenement +bantam +bayberry +water jump +great granddaughter +snips +porcupinefish +brochette +love-in-a-mist +Iceland poppy +common sage +pace car +camel racing +slipcover +nopal +shoehorn +calypso +rhea +in-basket +maple syrup +cold chisel +Pacific ridley +dietary +aperture +lapin +rock hyrax +house wren +litchi +ragged robin +control center +shoebox +arabesque +eider +silver birch +bantamweight +ax head +softball +blue gum +Bechtel crab +tomato sauce +green douglas fir +sweet gum +macaroni salad +red phalarope +budgerigar +Bedford cord +Uzi +green woodpecker +ohmmeter +bacon-lettuce-tomato sandwich +hackney +Easter egg +motmot +red pine +opium poppy +gat +pussy willow +greater scaup +ocelot +persimmon +western hemlock +carambola +pinion +Malcolm stock +bobsled +larkspur +wood drake +pinetum +red gum +draft beer +funnel +terrarium +Pinot blanc +doodlebug +brittle star +salsa +cantaloup +pollack +stockpot +eastern hemlock +rock wren +burqa +squash +aircraft engine +billy +flamingo flower +odontoglossum +old squaw +redstart +sheepskin coat +mate +flathead catfish +gentianella +bilberry +bog rein orchid +incense cedar +mew +Colorado spruce +cob +portmanteau +grenadine +common ginger +masdevallia +compound microscope +sobralia +white fungus +guppy +chapterhouse +honey +green frog +sea swallow +African marigold +astrolabe +verdigris +yellowhammer +carrot juice +oxlip +medicine ball +highboy +grass frog +gamebag +surgery +mincer +mulloway +cactus wren +box office +resonator +table-mountain pine +European curlew +supernova +cabbageworm +peach +plane seat +asp +Yquem +tomato hornworm +rook +quadruped +chador +micrometer +dabchick +Afro-wig +balsam fir +bucket seat +sage green +macon +blue poppy +chinquapin oak +black pine +spinach +chrysalis +carnauba +tee +bearberry +shirt button +tree of heaven +southern white cedar +covered wagon +brood hen +spadix +European catfish +winter wren +bulldog clip +carpetbag +study hall +chino +simian +closeup lens +cookie cutter +grapefruit +mandola +sassaby +Allegheny plum +piaffe +scorpion fly +booby +draft animal +field tent +cumin +laurel oak +smooth-leaved elm +American arborvitae +American toad +grinding wheel +mountain ash +cuttlefish +pipistrelle +parer +safety rail +Clark's nutcracker +side-blotched lizard +giant hornet +wicket +dugout +electric toothbrush +dhow +common four-o'clock +long-eared owl +anchor +near beer +tansy +creme caramel +guided missile frigate +shelduck +durian +compact +iron tree +shiitake +polo +camouflage +pedal pusher +salon +tangerine +lacebark +Swiss mountain pine +goalpost +poolroom +space capsule +wild cherry +dress hat +wave +raglan sleeve +cassia +Jerusalem artichoke +cabbage palmetto +marsh harrier +American redstart +sea squirt +cliff diving +sparrow hawk +watch cap +frankfurter bun +police boat +flash camera +neem +eastern meadowlark +Italian cypress +orb-weaving spider +graniteware +sewing basket +latex paint +rock dove +stator +leaf lettuce +roulette +broadcloth +Spork +panicle +sternwheeler +cider vinegar +brown creeper +cowfish +closed gentian +chickpea +port +pimento +sheeting +matilija poppy +hawk owl +guava +papaya +huisache +European shrike +racing skiff +yellow warbler +gumbo-limbo +North Carolinian +staysail +court +iced coffee +money belt +shaver +Psychopsis papilio +sumo ring +refection +kingfish +clock pendulum +greater butterfly orchid +disk harrow +tawny eagle +polyphemus moth +pieplant +Nicaraguan +bocce ball +California box elder +porbeagle +crown of thorns +Mexican sunflower +fennel +stream orchid +slip ring +white fir +fold +moss campion +fairy ring +hose +pony-trekking +western larch +meadow pipit +Cape May warbler +longan +bookmobile +junk shop +lemon shark +smelling bottle +solan +widow +sea pen +universal joint +day game +goldcrest +maiden pink +biographer +rotunda +oriel +arranger +gambrel +Angora +fen orchid +leading rein +Wilson's snipe +European nuthatch +natterjack +athletic supporter +mouflon +emergency room +swallow-tailed coat +western meadowlark +feather star +Navy SEAL +toilet bag +loquat +lesser butterfly orchid +thumbhole +breathalyzer +featherweight +collards +mayfly +confessional +mountain ebony +redwing +Norway maple +refractometer +stagecoach +gasoline gauge +octopus +baker +Rhode Island red +European tortoise +cardiologist +Punjabi +Arkansas kingbird +tamarind +drum brake +flash +yellowtail +stokes' aster +emperor +free house +sour gum +ruddy duck +hamadryad +command module +tinamou +Norway lobster +washstand +European hornbeam +roaster +black-necked grebe +tallgrass +leopard lizard +anastigmat +Blackburn +deutzia +ground rattler +Christmas fern +wild pink +sesame seed +carrycot +Italian parsley +nectar +roll-on roll-off +true laurel +anisette +candy corn +flowering maple +revers +dun +tobacco hornworm +common sunflower +common grape hyacinth +cardiograph +electric meter +herb Paris +goalmouth +spruce grouse +canopy +wind poppy +stemma +gateleg table +lumper +speckled rattlesnake +gudgeon +rough-legged hawk +internal drive +pomelo +piece de resistance +storm door +clementine +Japanese pink +settler +yellow jacket +Fraser fir +royal palm +cicada killer +cayenne +guava +bluewing +red baneberry +lesser yellowlegs +cache +bog rose +sparring partner +ski jumping +sherry +glacier lily +beer mat +shredder +American widgeon +protectionist +green olive +black-tailed deer +Alpine fir +dispatch case +whipping cream +African daisy +cantilever bridge +maraschino +rhea +ink bottle +dacha +hagberry tree +lesser rorqual +orchard oriole +candidate +cuticle +breadfruit +fishbowl +giant puffball +closed gentian +Joshua tree +tie rod +beard lichen +flame tree +stegosaur +acerola +Swan River daisy +common murre +flowering almond +protegee +loggerhead shrike +Wilson's warbler +Japanese honeysuckle +basilisk +skimmer +hybrid tuberous begonia +pumpkin ash +chafing dish +collared lizard +iced-tea spoon +scrubbird +Iceland poppy +grey kingbird +wallflower +slick +diesel +Swiss pine +ethernet cable +ketch +lightship +black cherry +swordtail +Monterey cypress +lightweight +Floridian +Sabine +stall +contact +viola da gamba +hemstitch +upland sandpiper +box spring +sassafras +radome +lesser scaup +bluefin +yellow-bellied sapsucker +armored car +cabin class +Moorish arch +webcam +aquavit +overall +sergeant major +soft shield fern +gin and it +bobolink +subcompact +falconer +black morel +roadrunner +lab bench +thong +coffee urn +weeping beech +caladenia +southern live oak +scanner +wine vinegar +common speedwell +European roller +fuji +snag +piping plover +concertina +secateurs +meat thermometer +supercomputer +funnel +dais +western fence lizard +spruce pine +pommel horse +Cassegrainian telescope +pitta +India-rubber tree +mangosteen +tamp +aposematic coloration +dustcloth +birth +Atlas cedar +reed bunting +jabiru +sainfoin +press photographer +golden oriole +laryngoscope +thermal printer +winder +doubles +cricket ball +dabbling duck +tonic +Buddhist +Morris chair +swatter +quaking aspen +ancient pine +American larch +evaporative cooler +click beetle +yellow-breasted chat +souchong +bluegill +pied-billed grebe +tricorn +spring beauty +southern magnolia +rowel +chili +hard roll +flathead +satsuma +gangplank +bourguignon +cockfighting +greenwing +plum tomato +fly orchid +gnatcatcher +spotted eagle ray +ovenbird +brassavola +mocha +candy cane +afterburner +thriftshop +study +winter crookneck +grinder +muskellunge +sacred ibis +inverter +sandwort +deer fern +stair-carpet +Cotes de Provence +ovenbird +rex begonia +American woodcock +poison ash +lowland fir +pawpaw +loblolly pine +kinkajou +European hackberry +pest +coralwood +Bedouin +acetate rayon +snuffbox +radiator cap +basket oak +table-tennis racquet +smew +midge +telescopic sight +radish +great burdock +separate +damask violet +broadbill +bourbon +blacktip shark +gift shop +khimar +date +woodland caribou +policeman bird +grey birch +American elm +strawflower +officiant +hart's-tongue +straight razor +Spanish elm +radicchio +white croaker +vicuna +soft-shell clam +flannel +adonis +bonito +kittiwake +English walnut +soldierfish +hipflask +spotted crake +Streptopelia turtur +American maidenhair fern +corn cockle +telephone cord +canopy +playback +diocesan +marsh orchid +manakin +purple grackle +cob +fishmonger +otoscope +vermillion flycatcher +inhaler +instar +licentiate +myrtle warbler +goat herder +benthos +toggle +drumhead +piranha +doorplate +vault +triptych +red-necked grebe +transporter +vernier caliper +flathead +Portuguese man-of-war +countrywoman +vacation home +Bactrian camel +night-light +module +lemon curd +carancha +painted daisy +bok choy +ratatouille +troll +escarpment +cinnabar +computerized axial tomography scanner +lychgate +sowbread +bedside +guided missile cruiser +reel +cleat +hemostat +blue shark +Seven Wonders of the Ancient World +motorized wheelchair +pillow block +horned puffin +prickly pear +electric range +mother's daughter +vein +Oregon maple +bird dog +faceplate +wren warbler +feather reed grass +common alder +Adam's needle +straitjacket +organ-grinder +gantry +bikini pants +peristyle +herpes +terry +toad lily +celandine +red-breasted sapsucker +bragger +green peafowl +fuschia +quoits +house martin +dome +herpes simplex 1 +touraco +meeting house +vacuum gauge +cat's-ear +crisphead lettuce +carpet moth +European rabbit +puff adder +Old World scops owl +fire pink +fruit punch +ant bear +black walnut +stroboscope +white mangrove +pine grosbeak +cast +check-in +ring-necked parakeet +matai +shingle oak +fieldwork +rue anemone +landing net +ouzo +herringbone +lyceum +hydrogen bomb +mullein pink +masher +evening grosbeak +water vole +livingstone daisy +tomatillo +cavalier hat +interphone +wild lupine +goosefish +sugar maple +plantain +white dead nettle +Monterey pine +bugle +veloute +marsh gentian +Bermuda buttercup +alehouse +Peter Pan +thong +LP +tulip tree +scanner +scarlet tanager +music hall +angel shark +pecan +eight ball +rosy boa +outboard motorboat +garage +fanlight +black cottonwood +notornis +mountain fern +lunar crater +reddish orange +whitetip shark +executant +European ladies' tresses +washboard +revolving door +case knife +balloonfish +greater kudu +tarpan +cog +wet fly +Irish soda bread +basement +broken arch +canopic jar +muscat +kazoo +bobsledding +loaner +black guillemot +English saddle +garlic mustard +Foucault pendulum +mulberry +clotted cream +dove's foot geranium +Atlantic ridley +convector +ground floor +European wildcat +poinsettia +hideaway +great barracuda +black beech +bushy aster +cornflower +tam +true slime mold +carving knife +holly fern +railroad tunnel +crimson clover +disposal +etamine +suspension +plasmodium +political scientist +minnow +Spanish rice +twist bit +subway train +Scleroderma citrinum +saw palmetto +console +gimlet +hand pump +waratah +rock rattlesnake +keel +server +curlew sandpiper +hone +sable antelope +inkle +photostat +foresail +sallet +tiger salamander +chutney +onlooker +Exmoor +tiramisu +drawing room +battery +sour orange +juniper berry +beeper +funeral home +fescue +Maksutov telescope +ranch house +jai alai +carob +socket +popcorn +sandbar shark +pipal +summer tanager +oast +skipjack +rolling stock +dropper +great snipe +turnip greens +cowpea +honeycomb +ichneumon fly +maternity hospital +harp seal +nylon +bomb shelter +horse tick +litchi +camel's hair +mimosa +bur oak +anvil +belay +pinhead +continental breakfast +burglar alarm +Mojave rattlesnake +auxiliary storage +lightwood +ratepayer +cecropia +retractor +quadrate +pepper tree +Venus' slipper +abattoir +strawflower +firewater +purple saxifrage +black rat +pack +pepper pot +mayweed +winger +whitetip shark +great yellow gentian +snowdrop anemone +garden angelica +soy sauce +white poplar +inkwell +crouton +gas gun +honey locust +house of cards +ice maker +moquette +arrack +casualty +butterfly orchid +eau de vie +mosquitofish +prairie smoke +haft +horseshoe +steel +peach orchard +Mexican hat +encaustic +shoe +pennywhistle +sweet woodruff +hull +doorsill +globe amaranth +day school +housedog +crown princess +oxbow +maxi +positron emission tomography scanner +compere +European turkey oak +peanut +sentry box +house physician +hot line +loquat +rove beetle +riband +flowering fern +fan vaulting +ceibo +bongo +bat boy +omelet pan +European ash +breadwinner +gaff topsail +clerestory +bushbuck +bluethroat +khukuri +Father +portcullis +candy egg +brake lining +lawn furniture +buckskin +garden pea +Brazilian rosewood +Italian bread +horn poppy +silk tree +Christmasberry +hotel-casino +poplin +false lupine +desert sunflower +mimeograph +alpenstock +cork tree +cultivar +common mosquito +pollard +black marlin +understudy +lancet window +college +breadfruit +Herero +Labourite +bar printer +squaw grass +stelis +firing chamber +sycamore +artificial horizon +radiologist +pansy orchid +bicycle pump +wraparound +bell gable +home computer +orchard grass +carving fork +bergamot +honeycreeper +sewing room +radiator +core +brown bat +goose grass +adjutant general +Erlenmeyer flask +massasauga +tail rotor +cardinal tetra +Drambuie +wine palm +Sarcoscypha coccinea +shantung +Calvados +garganey +vicar +house mouse +creeping oxalis +digital subscriber line +cedar elm +backgammon board +blackberry-lily +pallid bat +New Zealander +Barbadian +rose geranium +European spider crab +gharry +electric hammer +mustard +Chinese lantern +laundry cart +filament +mozzarella +gooseberry +sukiyaki +porkpie +culvert +altazimuth +plum pudding +serin +Spanish dagger +Asian crocodile +crevalle jack +mascara +pig bed +alderman +northern shrike +Sufi +purple-fringed orchid +derringer +linseed +hockey skate +bell jar +Japanese wistaria +mantled ground squirrel +western toad +lieutenant commander +mechanical piano +ovoid +paddlefish +demijohn +coast live oak +brick +gearset +tailstock +phonograph needle +winery +tuberose +mother's boy +shot tower +crucian carp +carpet pad +lamb's-quarter +Menorah +common white dogwood +hypanthium +rosebay +wild medlar +soil horizon +sweet orange +bitterroot +hand glass +cloisonne +towpath +gum ball +margay +carambola +bolt cutter +charger +vibraphone +gueridon +elephant tree +wood-frog +ash grey +duffel coat +third base +chunga +glebe house +lake trout +encephalartos +Japanese oak +northern red oak +pruner +blue orchid +Biloxi +western wood pewee +corselet +alabaster +anechoic chamber +grass pink +wax begonia +blue daisy +pennyroyal +Asian tiger mosquito +cheese souffle +flat bench +caramel +sump pump +bush violet +common fennel +corner +skullcap +asparagus fern +white mangrove +calceolaria +sateen +saltbox +hollowware +head nurse +coal miner +mountain lily +tufted vetch +European perch +line officer +steamer +stickball +shin guard +cauliflower +Monegasque +hatpin +wolffish +trackball +khaki +arthrogram +rocket larkspur +naval commander +Gemini +ski binding +department head +Chenin blanc +wingstem +knothole +aerides +sweet bay +tautog +gangway +waterspout +Hudsonian godwit +armyworm +incinerator +kidney vetch +pine nut +cypress vine +hip tile +sorrel tree +relay +bench press +Kentucky coffee tree +dobson +sapling +false lily of the valley +veld +phaius +vitamin B2 +beaker +wall tent +sieva bean +dusty miller +sewing kit +cavalry horse +diaper +butterfly pea +Spam +saddlebill +pearly everlasting +kowhai +Sister +moneywort +organdy +pine marten +bareboat +hot-water bottle +baby blue-eyes +silver lime +common cotton grass +malmsey +blue pea +baggage car +pineapple +folding saw +cotton rose +brawler +black duck +Weizenbock +pool player +Gujarati +wild duck +purple sage +sage grouse +mail train +arm guard +short-spurred fragrant orchid +queen +eparchy +spring peeper +ortolan +shoulder +fighter pilot +American beech +snowcap +novitiate +roller +butcherbird +canyon oak +brompton stock +firebrick +rudder +light cream +Primus stove +nonsmoker +probationer +harp +kosher +surcoat +videotape +zebu +first class +yam +car +rissole +miso +funambulism +attic +curling iron +shutter +encolure +split-pea soup +yellow rocket +gas oven +ultracentrifuge +chamomile +canteen +eyeliner +yellow squash +Irish stew +collar +doublet +machinist +septic tank +snap bean +Polyporus squamosus +western tanager +creeping St John's wort +back +sinkhole +perforation +Romanian +epergne +fez +comfrey +sidecar +beach pea +screen door +instigator +plughole +woodbine +pigweed +hip pocket +common scoter +squeegee +Surinam cherry +porringer +body stocking +eatage +shallot +enlarger +common canary +trophy case +gun case +plow horse +hot plate +pearl oyster +margarita +madras +backspace key +pigeon guillemot +pajama +buckthorn berry +homestead +bedbug +Linotype +trundle bed +granadilla +theremin +chin rest +bouillabaisse +tumble-dryer +truffle +cassava +kurrajong +gyroscope +European silver fir +C-clamp +politician +green soybean +exponent +flame tree +scissortail +achimenes +crown daisy +soft tree fern +spaghetti squash +pale violet +beaver +dashiki +washboard +driving wheel +sack +foulard +sputnik +boatbill +English elm +sack coat +grog +golliwog +Malayan tapir +May wine +calash +stile +windjammer +American sycamore +rotor head +fast food +balata +dragonet +Emmenthal +metronome +negative +meadow saxifrage +rabbit ears +chenille +round +hobby +crankshaft +Wilson's phalarope +Murphy bed +soil pipe +forecourt +policyholder +tarmacadam +loyalist +gyro +Queen's crape myrtle +shortcake +apple butter +pumpkinseed +heronry +yellow perch +baggage claim +escarpment +diaphragm +mescal bean +shunter +flax +columbarium +Joe-Pye weed +Neandertal man +casement +hole-in-the-wall +Verdicchio +futurist +eaglet +tassel hyacinth +pup tent +fawn lily +cabbage palm +pogonia +hospital ship +water mill +Oregon grape +lentil +grindstone +banana split +inkberry +coonskin cap +bazooka +wrap +anise hyssop +Java sparrow +red-eyed vireo +common opossum +clintonia +bustle +booster +tribesman +soy +panhandle +jaboticaba +locking pliers +Sauvignon grape +ghat +screw +oximeter +white croaker +saucepot +eggbeater +reticule +cabbage bark +looking-glass plant +head gasket +California sycamore +cowbell +Aleuria aurantia +Herr +lever +spider orchid +cashew +shift key +solar house +wood chisel +white +mantilla +stamp +bolero +rear admiral +garden rake +Lao +crowbar +lapdog +buttermilk biscuit +yellow bedstraw +pickerel frog +dowel +serjeant-at-law +mill-hand +lambrequin +state treasurer +red silk-cotton tree +coiffeur +star anise +shoulder pad +marshal +sitar player +gown +ground cedar +hedge maple +caddie +pitahaya +corn marigold +stick cinnamon +woodland star +Eurasian green toad +anti +blueweed +medicinal leech +gaur +chocolate kiss +kit fox +mother +butte +audio CD +blast furnace +vitamin D +nutgrass +cornice +black sheep +hearing aid +lingonberry +quad +lentil +riding crop +pratincole +pentagon +sea lavender +nerita +flatmate +catboat +water clover +angiopteris +mushy peas +crown imperial +music school +woodshed +platy +Turk's-cap +rundle +reading teacher +hardtack +balloon sail +oriental spruce +bluefish +white mulberry +horned violet +satin bowerbird +treasure flower +sustaining pedal +mimosa +spurge nettle +sea green +hasp +lederhosen +pink cockatoo +long johns +basket weave +freewheel +thrust bearing +timber tree +orphan +falafel +common camas +bird of passage +bird's foot trefoil +electric eel +fizz +grape arbor +serape +brace +hazelnut +kylix +horse mackerel +cassia bark +lizard orchid +spat +Brown Swiss +pocket flap +pillory +purplish blue +rolling mill +tappet +broccoli rabe +semi-detached house +mushroom coral +fly orchid +nougat bar +ball hawk +sand wedge +shirred egg +black locust +strip lighting +drop scone +brush turkey +ball +tragopan +dallisgrass +tuatara +great knapweed +potentiometer +Kiliwa +Pacific bottlenose dolphin +accelerator +Darwin tulip +osteopath +Arizona cypress +manna ash +butterbur +cornelian cherry +American holly +nopal +tanker +foreshore +ditty bag +gas lamp +safety razor +chanter +fomite +chip +striped killifish +catalytic converter +plaice +dusty miller +takin +gerenuk +corn chamomile +Japanese pagoda tree +boneset +common osier +Guinean +taro +plotter +celandine poppy +churn +steenbok +edible mussel +sensitive fern +triode +black raspberry +zoo keeper +feather ball +dredger +starlet +cornpone +coat button +rosinweed +toy Manchester +crested cariama +finger food +basilisk +shotgun shell +comfort food +mountain hemlock +candytuft +Stilton +record changer +anklet +ball valve +Mediterranean snapdragon +BVD +sand cat +Galloway +nutmeg +water-mint +woodwaxen +citron +ark shell +federalist +drone +cheekpiece +hyperbaric chamber +addax +field-emission microscope +synchronous converter +men's room +medlar +electronic fetal monitor +Sazerac +false indigo +roof +passe-partout +meadow spittlebug +Phytophthora infestans +oast house +hedge nettle +voting booth +slender salamander +telephone jack +true bug +scouring rush +Scotch egg +matchbook +aperea +cytomegalovirus +garlic press +cove +whitebark pine +Slovene +narrow wale +mother's milk +Audubon's warbler +prickly poppy +cowl +tailorbird +mud brick +bamboo palm +welt +Afghan +Virginia spring beauty +dinner bell +night jasmine +fly rod +microtome +aerie +carinate +picker +brick trowel +loving cup +swathe +green mayonnaise +rivet +bandbox +newsroom +tea tortrix +bobby +gig +hush puppy +garlic chive +piston rod +aspidistra +bluejack oak +harvest-lice +strap hinge +sour mash +macadamia nut +histiocyte +fan belt +shelf bracket +abelia +Hottentot fig +fish chowder +abettor +compote +beige +dioon +hop +haymaker +oilskin +magnetometer +tool bag +tambour +call girl +gringo +fairy light +broad-leaved plantain +second base +zebra mussel +Japanese cedar +pistia +swamp chestnut oak +cashmere +double cream +samisen +lamb curry +companion +kapok +julep +sweet woodruff +gardener +jewfish +inspector general +collembolan +wheel bug +bass +scrubland +wryneck +macrozamia +trouser press +clove +tiger cowrie +yawl +collard +dildo +pony cart +ormer +annual +tessera +chancellery +two-toed sloth +queen +old lady +wringer +spritzer +baggage +black mangrove +black-eyed Susan +semifinalist +highlighter +alfalfa +Easter daisy +escapement +operating table +neutral spirits +bursar +roble +entablature +girl wonder +farm boy +ring ouzel +permanent press +auklet +beefsteak tomato +gaming table +tea bag +manul +giant bamboo +Ozark chinkapin +matzo +furrow +smoothhound +CD-ROM drive +powdery mildew +copilot +garden +American merganser +bunsen burner +Asian longhorned beetle +lead tree +creeping buttercup +Percheron +back brace +axseed +cub +soul food +rabbi +edelweiss +mineshaft +fox grape +sandwort +torque wrench +leisure wear +Mae West +broccoli +loach +maraschino +heavy cream +silkworm +cirque +vintner +whitewash +butterfly pea +two-toed sloth +midiron +ceriman +Bulgarian +operating microscope +sambuca +California fuchsia +silver maple +tangelo +black bean +lugsail +starting gate +leek +sunflower seed +fish fry +clinker +synagogue +coscoroba +brae +uphill +common limpet +golden plover +cedar of Lebanon +amphibian +Canary wine +taipan +agua +feeder +parallel +mater +pink calla +meat counter +yagi +crab cactus +cacao bean +bowfin +alley cat +stonefly +Eastern cottonwood +vernier scale +marginal wood fern +dancing-master +detective +yam +textile screw pine +hooch +spinet +single prop +sassafras +goose barnacle +triple cream +China tree +peeper +dressmaker +snatch block +ironmongery +dressing case +creeping bellflower +silver sage +honeydew +eastern red-backed salamander +peg +nombril +danish +mashie +anarchist +alligator snapping turtle +shepherd +American white pine +runner +chalice vine +rheumatologist +defibrillator +yellow chamomile +lemon balm +peacekeeper +native beech +sandwich board +Bavarian +titrator +paneling +deer mouse +poteen +sugar snap pea +meadow salsify +town crier +best +basinet +common myrtle +night lizard +cushaw +Tampax +camphor tree +gentile +orange peel +putty knife +pyromaniac +Brummie +fever tree +double +nest +inferior +cabbage tree +graduated cylinder +mucor +woodborer +earthwork +potato salad +four-hitter +gooseberry +water vole +ziggurat +grapefruit juice +four-in-hand +cranberry bush +diode +videotape +Mohican +niacin +beetroot +shirtsleeve +cork tree +two-eyed violet +white ash +drawing chalk +baked Alaska +bone-ash cup +toastrack +diastema +bed jacket +dwarf astilbe +yellow honeysuckle +cow pasture +sheet pile +saxhorn +upholstery material +California white oak +Spanish bayonet +horsemint +littleneck +deflector +magician +standard transmission +blue marlin +shallot +feijoa +collar +board +jump suit +common staghorn fern +priory +Xhosa +Loranthaceae +barbecued wing +barmaid +spit +lemon juice +umbrella plant +field pennycress +centenarian +queen bee +fish stick +black bread +dirk +secularist +German American +spotted weakfish +iron foundry +speed bump +yellow-fever mosquito +gag +frame +black-eyed pea +alcoholic +involucre +sperm whale +balanced diet +wax bean +butcher's broom +winter heath +Mainer +Australian pine +gas guzzler +double-breasted jacket +pod +palo verde +trimmer +wattmeter +dyer's woad +crotalaria +vine maple +sulky +jack pine +thumb +Wilton +Panchen Lama +welder +badminton court +business editor +Arabian coffee +Kamchatkan sea eagle +foamflower +steep +plane +freckle +cerebral cortex +Vouvray +tea +forest tent caterpillar +neckerchief +accelerator +jig +bridal wreath +highball glass +New England clam chowder +beach strawberry +call waiting +baton twirler +double boiler +Dutch elm +car bomb +filmy fern +breviary +Florida gallinule +dace +parsnip +riparian forest +crescent +earplug +grab bar +cusk +foglamp +screwtop +black mangrove +mascot +Welsh poppy +gas holder +support hose +salsify +red beech +Indian python +caroler +pineapple juice +lowboy +terra sigillata +black olive +hypodermic needle +radio-phonograph +moussaka +miter joint +creche +tuning fork +black wattle +affiliate +vertical tail +kiwi +red morning-glory +piping crow +runway +Kashmiri +studio apartment +sea feather +Judas tree +boatbuilder +corn earworm +fallboard +Victrola +lechwe +goat willow +turret clock +Canada anemone +leaf lettuce +savoy cabbage +headpiece +Lebanese +fothergilla +hemlock +toolshed +silver tree +blue-headed vireo +weatherman +cylinder +caltrop +adjutant bird +driving iron +millet +European woolly thistle +rose apple +clown +schoolfriend +eastern coral snake +barbecue +executive vice president +long-billed marsh wren +brittle bladder fern +tank destroyer +left-hander +matting +catchment +balsa raft +eastern fence lizard +color tube +corncrib +electric typewriter +westland pine +elder statesman +whey +plonk +mound +cittern +nest egg +copyholder +China aster +basking shark +gavial +common duckweed +vanilla orchid +red-shafted flicker +granadilla +sylph +sty +vest pocket +potherb +little brown bat +Trapezium +ordinary +adult +purple-fringed orchid +abseiler +disco +metal detector +beefsteak fungus +ilang-ilang +barley grass +hawser +suture +brake shoe +staghorn coral +barbecue sauce +Browning machine gun +sarcophagus +disa +oven thermometer +rosemary +track +gorget +quince +royal +piston ring +teak +pin cherry +Komi +walking fern +sloe +synchronous motor +fire-bellied toad +Teleprompter +co-star +cape gooseberry +oscillograph +bass clarinet +cock of the rock +Tyke +showy milkweed +safety valve +branch water +sweet marjoram +hugger +crampon +fairy godmother +band-tailed pigeon +snow-on-the-mountain +minibar +foreland +grosgrain +dita +rampion +calligrapher +jointed charlock +master +sheepshead +barrelhouse +Carolina allspice +mastic +brake pad +whiskey sour +casement window +conveyer belt +stolon +pavonia +shinny +witch elm +logwood +hostel +pageboy +vesper sparrow +pyrrhuloxia +common carline thistle +wafer +boysenberry +screw augur +hack +American white oak +governor general +Mother Hubbard +game fowl +drosophila +delft +nymphet +tollbooth +chough +Russian dressing +plum tomato +American saddle horse +dusky salamander +black medick +red valerian +cordage +Elastoplast +conacaste +backlighting +swell +riveting machine +cowpen daisy +openbill +water speedwell +picture hat +crested myna +servo +bletia +garden trowel +muscadine +common caper +false lily of the valley +aralia +sharp-tailed grouse +cigar smoker +bandoneon +Chinese alligator +crazy +point lace +charcoal +Texas horned lizard +marinara +backstay +Gatling gun +piston +game fish +fall armyworm +grammarian +beer hall +guadalupe fur seal +sugar palm +peanut +velvet ant +light machine gun +rya +cling film +adobo +myrtle oak +angelica +balsam apple +windbreak +brother-in-law +snap brim +automobile factory +clavichord +dusky shark +edible banana +altar boy +California lady's slipper +schoolbag +wax bean +Atlantic walrus +bullpen +straw wine +thatch palm +potluck +tamarind +charcuterie +sod house +tie rack +liebfraumilch +clinician +scarlet lychnis +Spanish iris +bread knife +water oak +bedpan +Angolan +bassarisk +Alaska fur seal +African wild ass +milk float +froghopper +Verpa bohemica +water cooler +chop suey +ranker +red helleborine +Prince of Wales +marmalade tree +car train +giant red paintbrush +desert sand verbena +right whale +baron +stevia +asterism +five-spot +catapult +Silex +fiberscope +refresher +beef Bourguignonne +snood +divot +waterproof +crabeater seal +Missouri primrose +bumper guard +rock opera +Lilo +coffee can +smokehouse +buffalo grass +propjet +ice tongs +poop deck +acorn barnacle +veal parmesan +shower room +collins +ringhals +silage +jawfish +trouser cuff +contour feather +songstress +rachis +White Russian +stanchion +mastaba +flatbed press +viand +legal representative +espalier +organic light-emitting diode +sushi +scorer +haricot +pinna +plectranthus +jungle cat +dried apricot +coach horse +white fringed orchis +veal cordon bleu +bath +dallier +marching order +donkey jacket +Panama tree +aerator +klaxon +pinnacle +shouldered arch +lesser celandine +common eland +Grand Marnier +cock of the rock +phlomis +Japanese umbrella pine +morning room +dead-man's-fingers +little auk +bascule +house paint +home fries +great skua +cesspool +flying gurnard +wild crab +checkerbloom +Wollemi pine +cheese dip +coif +charwoman +tea ball +waif +Arctic ground squirrel +parishioner +stabilizer bar +potentiometer +black cohosh +medlar +willow oak +cascara buckthorn +scoutmaster +Canada lily +poppy seed +paper mulberry +blackthorn +garrison cap +inductee +aeschynanthus +interior live oak +black spleenwort +wild service tree +sling +nicad +swab +sego lily +eiderdown +fruit cocktail +pallasite +weeping spruce +shiv +sea lamprey +coachman +half binding +American white birch +gainer +Concord grape +yellow birch +fucus +common room +io moth +red osier +crucible +galangal +salmagundi +pepper steak +cap opener +swizzle stick +tomato juice +Nobelist +Sarawakian +African monitor +sleeping beauty +stereoscope +curd +pyramid bugle +applejack +dosser +rake handle +pilot light +Eames chair +Scotch and soda +bell heather +dinette +blackpoll +dogie +sound camera +cattle guard +mashie niblick +edible cockle +monocle +steak tartare +partaker +sidesaddle +communications satellite +porkfish +water hemlock +drawbar +ultramicroscope +Jamaican cherry +craftsman +lovage +common apricot +drum majorette +backsword +smooth alder +Amniota +dribbler +theosophist +dolman +ivory tree +Green Beret +pipe smoker +mayoress +mignonette +crampon +henbane +kirtle +death's-head moth +instep +great St John's wort +lorry +black-necked cobra +ball carrier +Jordan almond +byway +earless lizard +marble +andiron +high-protein diet +buzzer +ice floe +crankcase +Bofors gun +sockeye +veery +Delaware +caravansary +prairie coneflower +star apple +suiting +cot +call forwarding +American gallinule +glossy snake +rose chafer +instant coffee +placket +Tarahumara +pulsar +philodendron +orange tortrix +cypress spurge +Welsh rarebit +music box +giant crab +vanilla bean +water thrush +prayer shawl +gouge +promoter +dagga +black currant +bitter cassava +drain basket +snare +digital audiotape +retainer +olive drab +gluten bread +graham cracker +cheddar pink +caregiver +spray paint +Anglo-American +boatyard +backbencher +Link trainer +bell arch +weir +arbor +millionairess +sour cream +earthtongue +crawlspace +crossjack +balalaika +crupper +western redbud +guinea hen +rangeland +gaboon viper +common louse +single-leaf +horseshoe +balsam poplar +triskelion +jack-in-the-box +jester +rain stick +glove compartment +imperial moth +Japanese beech +biotin +turnip +oligarch +western skink +mudguard +retsina +data system +green bristlegrass +visiting professor +beaded lizard +weathercock +Sloppy Joe +high tea +lightweight +record sleeve +cooler +nodding onion +pigs in blankets +torque converter +district attorney +bunting +orrery +radiator hose +common plum +wood spurge +calamus +chicken Kiev +pin +lath +telephone bell +thistledown +audiotape +gypsy moth +snuffer +pari-mutuel machine +peanut butter +hearthrug +sack +Old World yew +chives +stovepipe +xenolith +mattock +mangle +electric chair +backup system +Empire +blackwash +dodder +Allegheny chinkapin +finger plate +junk +brown rice +wild angelica +chinaberry +mason +rasp +den +violet wood sorrel +nosewheel +plenum +merino +kirtle +Igbo +ensign +sex symbol +Belgian endive +sugarberry +yellow salsify +purple emperor +atlas +African clawed frog +leatherjacket +midwife +sac fungus +European cuckoo +three-day event +Mexican poppy +wagon tire +armyworm +rain gauge +Oregon ash +columbarium +spectrophotometer +Milanese +pointing trowel +casualty +Eastern hop hornbeam +lobe +mouthpiece +au pair girl +giant water bug +Browning automatic rifle +laser-guided bomb +drone +white alder +cockleshell +mufti +gravy +berm +boat hook +marshmallow +pet shop +cowpea +tactician +wading pool +anchovy dressing +flip +shackle +Wedgwood +thick-billed murre +erecting prism +giant salamander +sleeper +quiver +chain store +wing tip +New World tapir +witches' butter +gendarme +ginseng +common maidenhair +graduate nurse +balsam pear +hoatzin +philanthropist +axle bar +gas meter +moth mullein +ragbag +Chinese cabbage +celery stick +rutabaga +scalpel +cape marigold +variometer +argali +brig +shuffleboard +wort +Orlon +epiphyllum +allice shad +coffee filter +solar telescope +Japanese linden +thinning shears +golden wattle +queen triggerfish +millinery +surfbird +flame fish +clove +dicamptodon +red-bellied terrapin +turmeric +baya +air horn +Indian coral tree +punnet +sharkskin +water crowfoot +bight +desert iguana +Texas toad +volva +dredge +Turkey red +chemical plant +gemma +dice cup +orange marmalade +mistletoe +surveyor +frozen orange juice +pallette +poultryman +burbot +courlan +captain +saddlery +bodyguard +dwarf tulip +black ash +pulse +nailbrush +tickseed sunflower +legless lizard +shirtwaist +polling booth +chickeree +garlic chive +common thyme +multichannel recorder +screw thread +sangoma +calliopsis +geoduck +colleen +bandicoot rat +pastis +swamp sunflower +scorekeeper +Honduras mahogany +Australian pitcher plant +triangle +elevator shaft +green pea soup +carrel +prairie aster +bird's-nest fungus +scarlet clematis +gook +mescal button +carcase +mulatto +ejection seat +strawberry daiquiri +goat grass +car battery +babu +chief of staff +monilia +Siberian crab +ridge rope +Morchella semilibera +nutmeg +moosewood +graham bread +California four o'clock +zwieback +velvetleaf +abelmosk +shadow box +corned beef hash +newsreader +backstairs +cutwork +sherbert +tooth fungus +angel-wing begonia +greasepaint +common milkwort +potato vine +CD drive +crepe de Chine +sporting man +koto +armet +barking frog +celeriac +drainage ditch +black box +steel blue +clotheshorse +corn speedwell +drawknife +spritsail +vichyssoise +modeler +pocketcomb +limey +suslik +cockpit +digester +brig +raita +troll +benedictine +rock wren +lock +Barnaby's thistle +school bell +school ship +Soave +falchion +swaddling clothes +terrine +smoke screen +rivulus +sweet lemon +cullis +bustier +peppermint +Philadelphia fleabane +Hampshire +active +charnel house +face guard +Quebecois +facilitator +tongue depressor +bitternut +heath aster +sapodilla +bluestem +centrist +Canterbury bell +needlenose pliers +groats +tapa +Qatari +paper feed +tilt-top table +plastering trowel +brazil nut +rotogravure +patriot +manicurist +bacon and eggs +puffbird +lightweight +golden willow +kaiser roll +duff +girandole +seaside daisy +Kurdistan +Skivvies +showboat +fire bell +lock-gate +greater masterwort +weald +ice ax +toetoe +mess kit +bucking bronco +black turnstone +backscratcher +backpacker +basement +marbleization +trigger +satsuma +fall-blooming hydrangea +mountain lady's slipper +yellow oleander +crookneck +ex-president +Venn diagram +psaltery +bulwark +old boy +linear leaf +aril +butt weld +fall webworm +pruner +bald-faced hornet +nougat +tailgate +field speedwell +potsherd +center punch +long beech fern +desert paintbrush +canyon treefrog +bushel basket +Eurasian +swamp horsetail +cryptanalyst +wicket +school newspaper +captive +spider brake +electric mixer +tumbleweed +mason wasp +sash window +paddock +wet bar +oxtongue +stevia +wheat rust +scute +switch engine +mud dauber +dotterel +snailflower +common barberry +mulligatawny +cinnamon bark +cigar box +trivet +proof spirit +cream soda +western grey squirrel +baby powder +Bren +Japanese yew +sailcloth +Basket Maker +bannock +basidiocarp +aphelion +erect bugle +limiter +bosc +Przewalski's horse +helmet orchid +audiometer +battle cruiser +grass widower +staphylococcus +Congolese +common pitcher plant +parliamentary agent +Virginia snakeroot +mockernut +Siberian elm +backbench +rough +chervil +chlamys +nationalist +galantine +screwdriver +falsifier +cancerweed +spur +jerkin +porte-cochere +dill pickle +Montagu's harrier +tetrode +true fungus +American quaking aspen +vitamin B1 +leopard lily +eggdrop soup +aurochs +core bit +Jaws of Life +trousseau +parquetry +Disciotis venosa +tender +beef goulash +vitamin K1 +pepper spray +covered smut +hook +sports announcer +weapons carrier +foxtail grass +sloe gin +mezereon +antifouling paint +pavior +pile driver +security consultant +monkey-wrench +Indian hemp +amaretto +American wistaria +A-line +market strategist +rainbow runner +souvlaki +binturong +stiletto +gastrula +Vietnamese +Old World hop hornbeam +cold cathode +pier table +houndstooth check +prop root +leaf-footed bug +sedge wren +Dutch iris +drop curtain +opossum rat +lame +pollen tube +doubletree +compression bandage +pinon pine +catmint +pier arch +kingmaker +deanery +loofah +fullback +fencing mask +flying boat +carpet sweeper +lemon-scented gum +Accipitriformes +kit +pigfish +clipper +dolmas +lesser centaury +blood agar +water violet +raw milk +lemonade +vicar-general +supply closet +Anzac +confectioner +ignition key +velvet grass +white willow +John Dory +ruddiness +wheel +common horsetail +hubbard squash +speculum +Spanish bayonet +mountain mint +glint +foxhole +housemate +bootjack +sleigh bell +clog dancer +Mexican mint +rendering +Hausa +star saxifrage +spring squill +clothesbrush +liquid metal reactor +Columbia tiger lily +sorrel +cartwheel +Jersey +Caucasian walnut +desert willow +surveyor +elbow +Santa Gertrudis +fringe bush +industry analyst +lyrebird +Cortland +arroz con pollo +catechist +tank top +jew's harp +cereal oat +heartleaf +short sleeve +butty +butterfly plant +stud finder +felloe +beer garden +clevis +wood warbler +demerara +cornetfish +mince +Jamaica rum +Spanish broom +binnacle +camise +ferrule +Copt +hall +minicar +scimitar +cryptogam +miter box +limestone fern +Marsala +Parliamentarian +gravy +woolly bear moth +formula +squash bug +pigmentation +plate +skin graft +radiotelegraph +hellbender +soft pedal +lavender cotton +propagator +Bailey bridge +cottage pie +rotgut +A battery +pintle +off-line equipment +European swift +shrimp butter +plumb bob +trunk lid +succotash +yellow cypress +heartleaf +antelope squirrel +sambar +maternity ward +deciduous plant +bartlett +Riesling +sour cherry +Klansman +poke +academician +sociolinguist +bird's nest fern +common privet +scale fern +tachograph +oyster stuffing +pusher +green June beetle +staghorn sumac +lockage +master +bap +harlequin +blackfly +spotted coral root +kahikatea +cabana +riot gun +apple mint +kob +praline +confidant +pahautea +float +city father +Zen Buddhist +pessimist +conference center +banksia rose +comfit +sweet cicely +winged bean +henroost +myope +bunt +nailfile +yellow mountain saxifrage +cruise control +abandoned ship +water chinquapin +spanker +wing nut +puccoon +pier glass +Atlantic sailfish +medlar +buttercrunch +rough-skinned newt +planter's punch +Dutch iris +control key +committeewoman +torpedo-boat destroyer +garambulla +tree heath +gladiator +September elm +inclinometer +snowbell +call-in +sunsuit +microfiche +bluestocking +cheval glass +server +franking machine +sugar syrup +Macoun +transport ship +alderfly +wash-and-wear +Abbe condenser +bush nasturtium +wild leek +canary seed +Northern Baptist +sweet wormwood +jaboticaba +cardroom +autoradiograph +ash-pan +sprinkler system +rattrap +claymore +parts bin +forest red gum +thermonuclear reactor +Indian crocus +lector +heir apparent +leafy spurge +masquerader +varicella zoster virus +cucumber tree +hedger +Shumard oak +zooplankton +quartermaster +arrester +bridge +hop clover +meadow foxtail +winter hazel +portable circular saw +penuche +limpa +blue toadflax +mesophyte +Alpine anemone +pet sitter +avocado +streptococcus +fiber optic cable +river red gum +hornist +chicken taco +red spider +tape grass +densitometer +salmonberry +tiger snake +hot toddy +silver fern +candlenut +buckram +local call +defoliator +king +mahoe +lever lock +social insect +winter purslane +bootblack +fireball +ramie +bellbird +prepuce +capote +Chinese forget-me-not +Pisces +costume +California black oak +tree lupine +golden polypody +liger +California whipsnake +urodele +sapodilla +skillet bread +duckpin +supremo +asparagus bean +kampong +endameba +cow pony +rider +motherwort +Persian iris +soursop +kohlrabi +Parisienne +irons +doubles +feijoa +farmplace +cottage cheese +bezoar goat +subcontractor +blunderbuss +down +purple martin +Lapp +crenate leaf +tobacco pouch +beach towel +Santa Lucia fir +monetarist +stringer +ocellated turkey +Texas purple spike +ackee +caddy +hedge mustard +second-rater +strawberry bush +valedictorian +steak sauce +prairie gourd +aspirant +mint +Valenciennes +vodka martini +American persimmon +big brown bat +Mycenaen +mouthpiece +norfolk island pine +pennyroyal +Jewish rye bread +granadilla +tract house +wall +shuttle helicopter +blackjack oak +Lippizan +storm window +white zinnia +sickle +sushi bar +polish +baldric +brooklime +church hat +control circuit +vicuna +death adder +eukaryote +durmast +field soybean +jacket potato +wild basil +queen consort +brooklime +octant +blue false indigo +broccoli raab +step-down transformer +date bread +blue ash +duffer +oak chestnut +pennant +wedge +Florentine iris +morion +weakfish +morning dress +public address system +spearmint +Ashkenazi +sow +interpreter +Metis +pita +iron lung +parfait glass +cylinder lock +immortelle +obstetrical toad +tee hinge +successor +western +working girl +julienne +AND circuit +spaghetti junction +fer-de-lance +enlisted woman +star +lightning rod +bilge pump +pacer +horse nettle +African oil palm +blastocyst +air hammer +bamboo fern +remote terminal +lambkin +money cowrie +Pelham +clinical thermometer +wiggler +guru +false indigo +tea bag +foredeck +king +baby shoe +mule +grab bag +silver-bell tree +knitting machine +cobia +roulette ball +larder +button pink +rumble seat +noria +queen mother +solar thermal system +aquaplane +highbrow +rusty blackbird +desktop +lima bean +pontoon bridge +watercress +wild cabbage +tumbleweed +dressing sack +compact-disk burner +spittoon +marrow +sporophyte +second fiddle +pot-au-feu +specialty store +dry +mole +khadi +japonica +lovage +squamous cell +lobe +European creeper +brown pine +bladderpod +rumble +French Canadian +mascarpone +Pacific halibut +perennial ryegrass +wine lover +turbot +longwool +silver tree fern +dust cover +synchromesh +corn pudding +alpine azalea +garboard +cane sugar +observation dome +condensation pump +hind +taximeter +hand drill +gas thermometer +jammer +buffing wheel +handstamp +prairie mallow +turkey stew +sun spurge +duck pate +kibble +Cassin's kingbird +apadana +Devon +grinner +oocyte +blank +header +schoolmaster +guard ship +intravenous pyelogram +rimu +luff +Mediterranean fruit fly +singlestick +lady-in-waiting +curb +birch +limekiln +orthoscope +serotine +Spanish oak +swamp cottonwood +edger +city man +picnicker +white basswood +Parsons table +Christmas begonia +perspirer +Pacific tree toad +Cape tulip +finger bowl +blue pike +greengage +handcar +milkweed +potbelly +river dolphin +creel +typewriter carriage +banteng +pawnbroker's shop +huon pine +biennial +man of action +foundress +caveman +featheredge +jordan almond +sandblaster +coralberry +low-calorie diet +hoot owl +garter +bain-marie +wrecker +fenugreek +double-hung window +idol +scullery +balloon vine +summer savory +winged spindle tree +Helvella crispa +walrus mustache +gas engine +boulle +rush grass +rue +hoe handle +cat fancier +deerstalker +dunker +American red plum +fall dandelion +groover +sprag +stair-rod +wish-wash +pricket +architrave +California laurel +net melon +Arizona sycamore +executive secretary +silverweed +silky cornel +surface ship +square sail +common purslane +villa +holly-leaved cherry +sweet birch +pecan +artillery shell +breast pocket +pirogi +scarlet runner +rabbit brush +mealworm +leather carp +palette knife +Jerusalem sage +boneshaker +slit lamp +digital voltmeter +polar glacier +square-rigger +homogenized milk +Sten gun +lesser calamint +pyrograph +Korean lawn grass +Zinfandel +crepe fern +western ragweed +clasp knife +distributor housing +cartouche +scooter +ski parka +jackknife +Carolina spring beauty +soft diet +candlesnuffer +horse trader +step stool +agouti +accelerometer +annual fern +judge advocate +angelica +roll film +treehopper +ombu +comer +sultanate +kitchen help +hooded ladies' tresses +milking machine +knuckle joint +Jamaica honeysuckle +music teacher +sauerkraut +Weston cell +slivovitz +Worcester sauce +tall bellflower +chancery +prophetess +casquet +shortfin mako +sorus +visual display unit +asp +grenadier +black pepper +crottle +erasable programmable read-only memory +jabot +ratchet +disk controller +chief petty officer +tap wrench +white mountain ash +cultivated rice +flying phalanger +skillet corn bread +BB gun +Elamite +European red elder +reed rhapis +ciderpress +inga +torpedo +wild teasel +bean curd +oeil de boeuf +acuminate leaf +bitter lemon +hitchrack +Lorraine cross +hostess +European dogtooth +adz +polonaise +rock sandwort +Waldorf salad +myrmecophile +klystron +mole rat +draba +corn borer +robusta coffee +chub mackerel +leatherleaf +chronometer +Moselle +sea aster +fennel +slop basin +constable +Brunswick stew +hydraulic pump +French omelet +icebreaker +Manx shearwater +press of sail +ninepin +blue succory +bootstrap +hallstand +chit +firefly +bearded seal +fuel filter +jezebel +mate +Roquefort +cheesecloth +plasterer +blue pimpernel +lake dwelling +shrink-wrap +goat cheese +common gum cistus +coastland +Sunday best +wild tobacco +mandrake +common unicorn plant +barbican +culotte +blockhouse +German iris +tarragon +caramel +wild rosemary +grain +voyager +squirting cucumber +eastern narrow-mouthed toad +creeping fern +luge +saffron +garland flower +furnace room +starship +Oriental scops owl +Italian honeysuckle +berserker +Chinese elm +scrubber +bishop pine +French polish +compromiser +skimmer +river shad +lobster thermidor +leadwort +man-of-the-earth +razorblade +vicegerent +empress +link +ham and eggs +wild lily of the valley +blackfish +splicer +fossa +mara +moneygrubber +brachiopod +fauteuil +caldera +finish coat +croupier +termer +leopard's-bane +sei whale +molucca balm +dolly +dog food +term infant +soft roll +episcia +sewer +inquiry agent +active citizen +perry +California newt +moon shell +bladderwrack +common shrew +dill +Dutch elm fungus +key lime +electrometer +divorce lawyer +lamb's-quarters +apple turnover +shipmate +Guernsey +legionnaire +electric blanket +Rocky mountain pinon +tobacco mildew +stinking iris +forestiera +departure lounge +wiper motor +jurist +scarlet runner +pallbearer +batter's box +inertial guidance system +fines herbes +oilcan +sisal +mustache cup +steamed pudding +Visayan +fiesta flower +lady tulip +lungless salamander +batiste +electrical system +blazing star +car carrier +Walloon +mother hen +stump +mulled cider +secondary coil +Alexandria senna +etui +scrumpy +Havasupai +jawbreaker +glume +ex-husband +Eskimo +Joint Direct Attack Munition +number theorist +five-hitter +pinstripe +Olympian +common mackerel +stone bass +bigos +Bahraini +airbrush +great ragweed +glass lizard +hand fern +roundel +riding master +shoetree +yellow avens +old fashioned +dolman +stinger +nursling +legate +faille +golden fern +bedpost +shop steward +kidney bean +bladderwort +internist +limeade +Bruneian +Coloradan +playsuit +wintergreen oil +Cantabrigian +mutton snapper +shot putter +hand grenade +moccasin +cobnut +marrow +separatist +cockscomb +discharge pipe +Gabonese +spade bit +chicken cordon bleu +varnish tree +European wood mouse +striped gentian +Ayrshire +curassow +moo goo gai pan +malarial mosquito +glow tube +ledger board +bib-and-tucker +European chestnut +suffragette +color wash +gaffsail +golden larch +voting machine +Kahlua +lungi +amusement arcade +Uzbek +butternut +mold +mule's ears +dickey +shrimper +trophozoite +dreadnought +shepherd's purse +greenhouse whitefly +spotted gum +copperware +perfect game +semigloss +spawn +telecom hotel +stakeholder +mason wasp +flibbertigibbet +chin strap +fringed pink +saki +urchin +memorizer +roulade +whiting +cling +corncrake +Queen of England +choo-choo +empty +heating pad +playmate +visualizer +popcorn ball +absconder +sou'wester +target acquisition system +mock-up +dental floss +tray cloth +haddock +bulblet fern +housing commissioner +delayed action +anchor light +harbor porpoise +water wings +PT boat +night latch +fennel +doorframe +green-tailed towhee +grey polypody +torture chamber +American germander +Chinese wistaria +cattalo +accompanist +rifleman +alpine clover +contrarian +lemon peel +Mexican cypress +sprog +dado +Galilean telescope +desmid +lockup +Latin +American raspberry +mescal +butternut +prairie orchid +downy yellow violet +green hellebore +radio compass +bread and butter pickle +Cherokee rose +knish +destroyer escort +Arkansan +langlaufer +pyxis +winter savory +velocipede +motley +winter savory +law student +barren ground caribou +apple dumpling +field hospital +works +city editor +European flatfish +Morchella crassipes +life office +boot camp +cream sauce +cape aloe +acetate disk +devil ray +tile cutter +Plymouth Rock +microspore +godown +Syrian +tiercel +American cranberry +lesser spearwort +anopheline +Spanish oyster plant +wire cloth +attic fan +birch beer +small computer system interface +crook +ribbon fern +explorer's gentian +nagami +I-beam +rosebud cherry +Jerusalem artichoke +Stillson wrench +pluralist +district manager +Levantine +orangeade +part-timer +post horn +Oregon grape +contadino +cargo helicopter +silverpoint +chaja +California bluebell +case +Shasta +cheese cutter +Leishmania +avalanche lily +iron horse +bialy +Yana +Delawarean +Prussian +nonpareil +hammer +hoper +chewink +anil +skim milk +desert four o'clock +crescent wrench +white marlin +blue jasmine +malacca +anadama bread +purple poppy mallow +ganglion cell +ligature +no-parking zone +golden clematis +Cotswold +aliterate +shebeen +yardarm +superbug +fanaloka +stinking cedar +spirochete +wort +pater +heaume +thermocouple +ironing +naval tactical data system +European goatsucker +prairie cordgrass +accused +foreign agent +halberd +western mugwort +esthetician +Persian lilac +cracked-wheat bread +crosscut saw +rock penstemon +paper cutter +crematory +ideologist +cattley guava +margarine +creosote bush +hoary plantain +spark gap +lumberjack +Greek valerian +mission bells +tight end +bigeye +large crabgrass +stone marten +cleat +lentil +bay scallop +lector +charger +assemblywoman +second lieutenant +boil smut +sarsaparilla +hydromel +cat flea +pinfish +whole milk +hairnet +myeloblast +peasant +blind curve +first offender +dwarf-white trillium +Brother +coatdress +gun emplacement +tamarisk gerbil +snap +air cushion +trailing edge +potato vine +gig +everlasting pea +champion +dibble +rattail cactus +timothy +prince's-feather +cutlas +lockring +sealing wax +Brussels lace +corn mint +highboard +she-oak +wild celery +pillar +Burberry +Hakka +leucothoe +bell tent +gallery +coontie +leather fern +smack +adenovirus +linoleum +chain wrench +tammy +gas fixture +nut bar +baneberry +butterscotch +goat's rue +bullock +grey snapper +mother-in-law +hyson +wayfaring tree +mollie +needle spike rush +buckwheat +bayberry +brush-tailed phalanger +dry rot +harborage +stormy petrel +Oriental beetle +Atlantic halibut +coping saw +simple fruit +viscose rayon +surgeonfish +upstairs +security system +common ragweed +verticillium +pancake batter +hawk's-beard +Dutchman's-pipe +refrigeration system +European parsley fern +Ivy Leaguer +totalitarian +gonococcus +towhead +showy sunflower +pallium +multiengine airplane +hair trigger +rabbit-eared bandicoot +siskiyou lewisia +fuel system +flat arch +broad beech fern +Alpine lady fern +bracken +Kentucky black bass +rut +mountain maple +tunaburger +umbrella fern +white-headed stilt +meat hook +panhandler +washhouse +barnyard +safety lamp +leg +ripple mark +paper +sagebrush lizard +light heavyweight +common nutcracker +operator +stalking-horse +horseless carriage +fishhook +suction cup +peg +Ungulata +false teeth +round-bottom flask +Luba +campaign hat +firebox +rudder +parapet +ice pack +appellant +spirit stove +metheglin +common bamboo +soapwort gentian +pannikin +time capsule +burn bag +folk poet +tropical prawn +end man +new caledonian pine +linen +web +free trader +jury box +railing +pignut +leaker +potboy +rubber boa +white snakeroot +plumber +Candida albicans +surfboat +woman +promulgator +eyecup +wild China tree +rattlesnake master +Viyella +alpine salamander +ailanthus silkworm +Albatrellus ovinus +war room +meadow vole +robotics equipment +rotary actuator +Engelmann spruce +pinesap +beefcake +native speaker +ridge +injector +water chute +salmonberry +decoupage +bottlebrush +date plum +circlet +American mountain ash +pocketbook +horsemint +sweet four o'clock +kirpan +pinto bean +chervil +equator +range animal +candy thermometer +calanthe +cul +stipendiary +brahman +pelican crossing +topgallant +wild senna +sliding window +carrier pigeon +Tatar +quadruplet +bumboat +spearmint oil +slip clutch +young Turk +golden yarrow +shank +glasswort +dental plaque +Manduca sexta +Northern bedstraw +dent corn +Life Saver +western wall flower +bedder +wherry +Tuscarora +scrapple +borstal +reflux condenser +problem solver +nondriver +perforation +eastern cricket frog +white wood aster +broad buckler-fern +Cape primrose +herringbone +head louse +earl +baton +recording system +primary color for light +cherry laurel +pomfret +ratafia +chocolate milk +obscurantist +revisionist +rood screen +magnetic needle +commensal +oil tycoon +celebrant +domicile +harvest mouse +California nutmeg +greater spearwort +black-billed cuckoo +winepress +demographer +straw boss +diabetic diet +sweetmeat +rabbet +ming tree +basketweaver +freestone +walk-in +Aryan +box coat +audio amplifier +chicken salad +churidars +whydah +box +batman +siren +selectman +gouger +drip coffee +Caesar salad +interpreter +whinstone +grey goldenrod +minicomputer +honey crisp +hypercoaster +Irishman +swamp white oak +reed canary grass +globeflower +cynthia moth +fennel seed +canthus +chino +blind date +tar pit +watermelon begonia +fishtail palm +overcast +Pearmain +primary color for pigments +coal seam +wherry +safety bolt +cretonne +Michigan lily +inflater +moneybag +huckleberry +brassard +bush vetch +looking glass tree +pinwheel roll +alfalfa sprout +sea kale +clinometer +achira +lorgnette +potter wasp +gilded flicker +tody +capulin +captain's chair +crackle +gerardia +prie-dieu +venture capitalist +New Jerseyan +block and tackle +elf cup +bur reed +automatic transmission +wax palm +flytrap +crack willow +coachwhip +swizzle +lugger +Dewar flask +baster +oxyacetylene torch +Culex quinquefasciatus +St Peter's wort +wild hyacinth +Russian almond +burrfish +wintergreen +katsura tree +butcher knife +perfumery +thresher +porte-cochere +sheepwalk +hypotenuse +Dalmatian iris +buttercup squash +demiglace +goldenseal +preceptor +rigger +poikilotherm +old-age pensioner +posthouse +wood horsetail +repeater +reciprocating engine +Rambouillet +terra cotta +togs +battledore +horizontal tail +missile defense system +trier +morello +woolly adelgid +munition +double creme +in-fighting +squirrel corn +crow's nest +antler moth +brake cylinder +bandoleer +noticer +Parmesan +hipline +cheapskate +Dubonnet +mole rat +bog aster +ribbon tree +meadow rue +nard +ratel +loose smut +snapping shrimp +golden glow +basil thyme +Florida strap fern +moonshine +flume +lace fern +black bream +orchestra pit +archerfish +exile +ringdove +career man +godfather +bottom-feeder +pasteurized milk +dental implant +pedicel +Catalpa speciosa +yellow foxglove +lancet arch +steam shovel +sampan +patrol boat +sailor cap +tollgate +monal +velociraptor +cacique +jack oak +cursed crowfoot +creep +Parry manzanita +common matrimony vine +grace cup +caecilian +spurge laurel +prickly lettuce +Regius professor +camail +Sitka willow +Courtelle +gin sling +dogmatist +guest +saltine +dust cover +sport +sweeper +feist +lady's-eardrop +vibist +wire stripper +tenpin +interplanetary space +beet green +pruning knife +drainage system +gunnery +ballet master +lime juice +flak catcher +lacrosse ball +Canadian aspen +beatnik +railhead +utilizer +spadefish +Arizona white oak +city university +dense blazing star +hedger +chain pickerel +right-hand man +namby-pamby +nacelle +redneck +tumbler +Chief Secretary +cannon +cupola +kummel +papaya juice +Burton +Stanley Steamer +loganberry +stylus +square meal +rock bass +western ladies' tresses +dramatist +assignee +tandoor +trumpetwood +segregator +green adder's mouth +coral necklace +ani +iceboat +densimeter +oxtail soup +kernel +cos lettuce +greenishness +panchromatic film +Parker House roll +oatmeal +backsaw +double Gloucester +bailey +storage cell +giant +coconut milk +broadtail +barouche +loir +soybean meal +white-leaved rockrose +junction barrier +spandrel +sweat bag +goldilocks +flowering wintergreen +cockspur +beef fondue +holding cell +cardamom +cagoule +Kamia +tangelo +Herschelian telescope +wine bar +kachina +sand sage +guy +ivory palm +citrus mealybug +topper +ladyfish +force pump +fanion +calaba +Iowa +orrisroot +ivorybill +Secretary of Agriculture +gagman +dry cell +hypnotist +kenaf +grey alder +deathwatch beetle +gagman +magnetic stripe +trap door +abdominal wall +prefab +broomcorn millet +architeuthis +angler +Pacific giant salamander +barbette carriage +low-fat diet +veal scallopini +B battery +wallah +landing flap +pistachio +jaguarundi +nagi +cicerone +felt fungus +Aertex +stocks +smooth aster +patchouli +lemon sole +sleeper +basket fern +dundathu pine +anjou +Moreton Bay chestnut +broom sedge +candid camera +red angel's trumpet +oilstone +cinnamon toast +Pacific walrus +fruit custard +Jehovah's Witness +mate +voyeur +Esselen +achromatic lens +sanguine +brine shrimp +dunce cap +swot +transit instrument +grey willow +pack +bench clamp +Nova Scotian +gadgetry +silvery spleenwort +enchantress +rough fish +morula +giant taro +sorus +roux +polyhedral angle +spruce beer +Chicano +cola extract +outfielder +kohleria +white-rumped shrike +car-ferry +subway token +spoon bread +totara +corn borer +bowhead +tensimeter +water scooter +flickertail +Catholicos +pleaser +blue-eyed Mary +calabash +handyman +cascades frog +facing +scarlet oak +lutist +ginger +tree tomato +Harvey Wallbanger +tent peg +insectivore +fusil +swale +chinning bar +bladderpod +New Dealer +dhoti +proscenium arch +common vetchling +channel +collect call +safflower +Texas tortoise +test equipment +theca +RAM disk +sheep sorrel +rammer +buttonhook +honey mesquite +dominus +babirusa +queen +Aspergillus fumigatus +crash barrier +nonmember +Muscovite +verdin +Australopithecus afarensis +Turkish Delight +stalked puffball +giardia +divider +mountain skink +head smut +pacemaker +evaporated milk +rattlesnake fern +flamethrower +navy bean +bather +steed +showy orchis +stone crab +artichoke heart +phantom orchid +space helmet +swamp laurel +privateer +junior +surcoat +bristlegrass +flower girl +aphid lion +penthouse +lemonade mix +coude telescope +natal plum +scriber +wood nettle +rape suspect +resplendent quetzel +western poppy +choir loft +fore-topsail +thyme-leaved sandwort +erotic +short circuit +outdoors +flowering tobacco +hookup +aviatrix +corker +horehound +horn +swamp pine +water biscuit +cherimoya +vaporizer +courtier +European sole +full skirt +Mother Carey's chicken +cymule +huck +white snapdragon +mountain nyala +country borage +bonduc +casein paint +grampus +shrimpfish +lodge +dragee +black walnut +caraway seed +roper +glass cutter +tab key +Richardson's geranium +demigod +chichipe +Italian ryegrass +cadet +electrograph +rudd +carpenteria +foie gras +lignum vitae +hedge nettle +pledger +American hackberry +flageolet +beaked hazelnut +reflectometer +sticky geranium +marriage bed +white pepper +japanese clover +whiteface +gnat +extrovert +Canada plum +talipot +chicken stew +egg foo yong +fraxinella +skibob +saucer magnolia +jacket +green smut fungus +cloakroom +landing skid +booth +ice milk +dipole +striped coral root +red buckeye +roughcast +breaststroker +cowherb +razor clam +first-aid station +briarroot +clambake +lander +Bramley's Seedling +frail +jird +minisub +luging +poison milkweed +European lobster +epidemiologist +spandex +paloverde +marumi +bypass condenser +punter +petty spurge +Coryphaena hippurus +bilberry +vermillion rockfish +witness box +viscometer +pulque +Massachusetts fern +herring salad +ridge tile +mesa +dwarf grey willow +southern aster +punch pliers +tarnished plant bug +hoop pine +Japanese red pine +benedick +rebozo +silver plate +silver willow +mouse-ear hawkweed +bonito shark +abutment arch +noble cane +tiger rattlesnake +pongee +jumping plant louse +pattypan squash +giant ryegrass +railroad bed +stiff aster +imperial Japanese morning glory +laundry +winter cress +large white petunia +tea maker +pen-and-ink +early warning system +lug +monocot +sea wormwood +breechblock +postage meter +third rail +Mongoloid +Australopithecus boisei +umbrella tent +stirrer +Dumpy level +beroe +post and lintel +green spleenwort +tomato paste +dishpan +stentor +sweatband +cobbler +New York fern +gaff +prairie willow +cyclops +jigsaw +rotavirus +pallet +eastern ground snake +boiling water reactor +acute triangle +agora +European cranberry +roebuck +surgical dressing +busboy +cannikin +feedlot +common pond-skater +cochin +horsehair lichen +fetter +sapote +fichu +dermatologist +fire tongs +creme anglais +foster-mother +laurelwood +chicken snake +mincemeat +rocker +wild spinach +powder and shot +butterwort +auxiliary engine +mamey +hart's-tongue +sucking pig +American turkey oak +troopship +buttermilk +divi-divi +boatswain's chair +soda fountain +southern flying squirrel +elastic +cutaway +housekeeper +renegade +apple rust +bridoon +machicolation +stunt +keyhole limpet +personality +solitary vireo +epidendron +Jihadist +boffin +bettong +terror +partial denture +pusher +saltcellar +capstan +large poodle +Bibb lettuce +low-bush blueberry +staple +banded krait +sickroom +barnyard grass +wandflower +woodworm +bluegrass +squirrel's-foot fern +rabbitfish +delta wing +milking shorthorn +limber pine +guru +gamine +scythe +sweetsop +Gruyere +bloodmobile +mine detector +American mistletoe +silver beech +hound's-tongue +Lombardy poplar +basket fern +pink-and-white everlasting +redtail +Aladdin's lamp +mace +outtake +condensed milk +Canada wild rye +silver perch +waxflower +taxer +Chinese chestnut +Our Lord's candle +mugwump +school system +salp +osso buco +dress shirt +butterweed +low-fat milk +couchette +broomcorn +proscenium +mill agent +smut grass +humpback +southern spadefoot +military leader +canebrake rattlesnake +tailor-made +ebony +beach house +flying gecko +hoary alison +typhoid bacillus +Romanov +vanilla pudding +sweet cicely +Spodoptera exigua +dress rack +flannel +skipjack +bolognese pasta sauce +rooibos +thunderer +blessed thistle +gauntlet +mahatma +granadilla +laurel sumac +Yuma +thyme-leaved speedwell +encyclical +twill +linocut +manna gum +spark arrester +cocklebur +Indian hemp +lemon oil +Hall's honeysuckle +raceway +flop +Himalayan lilac +one-flowered wintergreen +photosphere +silvery spleenwort +convex polygon +canarybird flower +foster-sister +fluffy omelet +palanquin +roll +dandelion green +Javanese +workpiece +Carmelite +bread mold +schlemiel +wild lily of the valley +grugru +solenoid +puff batter +skep +balance wheel +Gadaba +portia tree +mobcap +two-man tent +scuffle +firebrat +ant lion +anise +caster +giant petrel +American water spaniel +naboom +treasure ship +foster-son +fiddleneck +alidade +sugar refinery +wild oat +water beetle +generic +damson plum +abrocome +detainee +pitch pipe +coast +nilgai +radiotherapy equipment +heart-leaved aster +gristmill +grocer +Appaloosa +Cheviot +brake pedal +lantana +cave myotis +Rob Roy +sea spider +latrine +carpophore +recycling plant +coondog +brace and bit +funambulist +eggar +mantelet +postdoc +mezzanine +coco plum +pulse generator +high-vitamin diet +menhaden +mechanical engineer +bergamot mint +Chuvash +grated cheese +helicon +belladonna +beet armyworm +eelgrass +resuscitator +interrupted fern +arrow grass +cistern +Pacific herring +colostrum +journal bearing +Fauve +wrist pin +canape +choice morsel +quadraphony +guard boat +shortgrass +claymore mine +hitching post +cargo door +decoder +gym rat +Cocopa +commander +apple of Peru +seckel +yellow goatfish +dog flea +dodo +oconee bells +Tudor arch +turkey stuffing +ebony spleenwort +wheat flag smut +scolopendrium +Brazilian pepper tree +gusset +inspector +lunar excursion module +baron +plantigrade mammal +Creole +phosphate +aromatic aster +ghee +audiovisual +onychophoran +cotton stainer +lieutenant junior grade +spheroid +amen corner +caper sauce +Caladium bicolor +dyer's rocket +seaside goldenrod +flint corn +Very pistol +rotifer +steeplechaser +rouleau +escape wheel +Namibian +millivoltmeter +emmer +climatologist +agateware +sea lyme grass +inclinometer +water fennel +saddle seat +vicar +garden cress +ski rack +Norfolk jacket +casaba +coast rhododendron +sericea lespedeza +hematocrit +autopilot +tilter +finish coat +Pennsylvanian +shrubby St John's wort +podocarp +percussion cap +ceriman +peanut bar +gean +jack +durra +rotor +carob +cottage tulip +three-spined stickleback +trencher +elevator +kalumpang +abaca +Australopithecus robustus +active matrix screen +water bed +hatmaker +lodestone +cat food +overcup oak +balletomane +popgun +rheometer +process cheese +frog legs +heartleaf arnica +p-n-p transistor +steam turbine +Tulu +scalene triangle +licorice fern +coffee break +trade unionist +starved aster +firing pin +water gum +Masonite +hairspring +seminarian +blue racer +forecastle +scrub pine +Atlantic spiny dogfish +kopje +orphrey +fan tracery +gee-gee +vixen +interstellar space +Harris Tweed +sawmill +lemon mint +bitewing +ringlet +Chinese mustard +paleontologist +American hazel +brigantine +clay-colored robin +zombie +nectarine +West Indian jasmine +pineapple weed +rusher +gynecologist +pole +thylacine +myrtle beech +golden cup +woodruff +T-bar lift +terebinth +service club +homegirl +Blue Mountain tea +figwort +New Hampshirite +Stayman +tonometer +white turnip +messuage +cruet-stand +colliery +connecting room +lesser twayblade +bland diet +crown prince +beggarwoman +restharrow +bower actinidia +firebug +hepatic tanager +telegraph +Spodoptera frugiperda +spackle +carpenter's square +pyx +supermom +thickhead +whorled milkweed +Arctic char +Chinese rhubarb +pince-nez +wolverine +tomato concentrate +cascarilla bark +red underwing +leather flower +Jerusalem thorn +bullpen +Salisbury steak +anode +coffeeberry +bottling plant +fritter batter +aerial torpedo +matrix +local oscillator +stalked puffball +bruin +three-cornered leek +wassail +stabling +damping off fungus +myriapod +osier +lesser kudu +cownose ray +chokecherry +wagon +obstetrician +Glengarry +even-pinnate leaf +wine sauce +osteocyte +baker's yeast +heir presumptive +blackjack +tympanist +golden fern +fipple +Japanese oak +bar mask +stamping machine +argus +knobcone pine +oil beetle +lanai +upper berth +condenser +proctologist +catechu +wild spurge +vestry +ground snake +proton accelerator +walker +scarlet bush +transom +lagging +bouillon +slender loris +black currant +developer +football hero +plum sauce +striped mullet +prince charming +fictional animal +prosimian +lug wrench +lemonwood +kirsch +spy satellite +black caraway +Thompson Seedless +bead tree +purple fringeless orchid +Virginia strawberry +chigetai +punkie +gall wasp +addressing machine +rock polypody +good-king-henry +spring cankerworm +wimple +noncandidate +saskatoon +hacienda +Darjeeling +snowberry +lounging pajama +ascospore +ski-plane +hedgehog cereus +Welsh onion +yautia +coaster brake +sickle cell +parrot's beak +fuller's teasel +painted greenling +scablands +stuffed cabbage +barrel organ +etcher +dwarf maple +camp +Australian blacksnake +currycomb +obtuse triangle +rose gum +psychrometer +abridger +torpedo +carpet loom +sodalist +slender rush +loligo +sclerometer +wimp +dotted gayfeather +green ash +pinstripe +moralist +medusa's head +garden centipede +heath aster +fool's parsley +olla podrida +Potawatomi +Edam +toothache tree +hulk +seabag +narthex +compartment +prairie star +lookdown +B-flat clarinet +event planner +clip lead +shirting +milk punch +supercharger +macadamia nut +giant coreopsis +computer store +martingale +keyboard buffer +summer flounder +squash ball +gas turbine +object ball +plier +black mulberry +reef squirrelfish +scampi +willow aster +bowler +striped marlin +smooth muscle cell +diplodocus +Liberty ship +sponge cloth +guitarfish +walking leaf +showroom +California bluebell +bolo +turnbuckle +boysenberry +hardware +Gael +imago +endorser +jujube +dust bag +rapporteur +field wormwood +low-water mark +naval missile +Pacific yew +reversible +crabapple jelly +poniard +barricade +spawner +simnel +seltzer +deckle edge +needle +timbale +satellite transmitter +organization man +job candidate +orderly +native cranberry +fir clubmoss +coaming +chartered accountant +electron accelerator +Sierra plum +American foxhound +long underwear +Penobscot +blueberry yogurt +biretta +cascara +Paranthropus +Dorian +nun's habit +lenten rose +Augustinian +designer +northern phalarope +mombin +hazel mouse +reeve +waffler +telegraphy +Verpa conica +ignition coil +Japanese oyster +S-shape +divining rod +ant thrush +throat protector +interlocutor +Desmodus rotundus +pere david's deer +attenuator +Cypriot +red sandalwood +pendulum watch +broadcloth +striped drum +sequence +safety arch +diapensia +hog +western spadefoot +chlorella +comb-footed spider +Chechen +darning needle +C-ration +hard beech +piano action +scaling ladder +Nepal trumpet flower +ravigote +screw wrench +ramekin +Lyonnaise sauce +dinner napkin +partial veil +masseuse +coatrack +mooring tower +blue-eyed African daisy +English horn +baton +rope tow +toll bridge +massage parlor +quark cheese +lounging jacket +tall goldenrod +flying jib +coordinate axis +barley-sugar +integrator +worm gear +captain +sweatshop +class +layer +chili powder +dripping pan +oatcake +newsroom +tadpole shrimp +rake +trade magazine +silks +ram's-head +senior +knower +masseur +yam +peg +wheel tree +hardbake +test room +long-spurred violet +creeping spike rush +shrapnel +coffee senna +matchbox +creeping soft grass +welder's mask +pickaback plant +urial +hooded pitcher plant +incense cedar +Ohio buckeye +ant cow +skeleton fork fern +Indiaman +swamp ash +testatrix +marang +spherocyte +Winesap +Indian mallow +teju +Yersinia pestis +dye-works +sauerbraten +coral bean tree +safe house +postulator +eyas +lotus +wood vise +lady-of-the-night +East German +cymling +rock candy +western omelet +anoa +rainbow seaperch +crossover voter +Finn +tree shrew +hog plum +Federal +shagbark +clockwork +Alexandrian laurel +metal wood +brill +military chaplain +trend-setter +call-back +Indian rat snake +spurred gentian +Japanese maple +forest goat +bee moth +viola da braccio +duckboard +armyworm +hangnail +counterbore +cream-of-tartar tree +Mullah +bonbon +water hazard +temple orange +corporatist +rough bindweed +Turkish bath +mistletoe fig +beach sand verbena +caddisworm +English plantain +brown Betty +power pack +lion's-ear +Francis turbine +stayer +dichondra +marsh St-John's wort +squab +energizer +common horehound +mantispid +pullback +handwheel +spark arrester +yakuza +Virginian witch hazel +grunter +waterworks +bondwoman +chain printer +stockjobber +coconut milk +yardgrass +blue chip +bridle path +riser +pleurothallis +saltwort +salal +broadside +blackboard eraser +bastard +Para rubber tree +red bat +digital-analog converter +calabash +cashier +cow shark +horned pout +microphage +monologist +woolly monkey +Illinoisan +marsh horsetail +distaff +siris +eparch +gooseneck loosestrife +sounding rocket +multiprocessor +saiga +xerographic printer +madrona +right triangle +sweet gale +red maids +wolfsbane +pork-and-veal goulash +French sorrel +mutterer +Venetian sumac +drumlin +white crappie +squire +large-flowered calamint +northern cricket frog +mushroom sauce +supertanker +morello +auxiliary boiler +Virginia thimbleweed +cottage tent +bubble shell +big shellbark +wormwood sage +cider gum +coast lily +American feverfew +Peruvian balsam +purple silkweed +tobacco moth +desk dictionary +rock elm +eastern indigo snake +Japanese privet +lamb +levee +L-plate +soapfish +painted tongue +scuttle +markhor +Marburg virus +mackinaw +major +crypt +ball and chain +domestic silkworm moth +bottom feeder +mistress +death house +freight elevator +bellyband +Pulex irritans +Bacillus anthracis +fire control radar +hysterosalpingogram +turbogenerator +decompound leaf +vambrace +scentless camomile +Medinilla magnifica +prima ballerina +Northern Spy +quartz lamp +grains of paradise +justiciar +felt fern +seismograph +Madagascar jasmine +imaret +white perch +Alpine mouse-ear +tea bread +yellow bass +poseuse +espionage agent +punching bag +eurypterid +orange sneezeweed +banded stilt +armhole +postern +mother +kapuka +catechumen +Soubise +Sauvignon blanc +gunnery sergeant +self-starter +ceratozamia +Atlantic cod +Reoviridae +blood cup +horseshoe bat +oriental plane +voussoir +fetterbush +samara +truncated pyramid +lingcod +athenaeum +shyster +Carolina hemlock +submarine torpedo +floating fern +yataghan +sun tea +viola d'amore +conenose +ventilation shaft +walk-up apartment +saury +wild wheat +porcupine ball +tahini +kris +grass fern +drip pan +black bryony +Scotch broth +tapioca pudding +southwestern toad +Hare Krishna +guimpe +wild madder +megalocyte +teaching fellow +shrubby penstemon +lesser wintergreen +privet hedge +Fahrenheit thermometer +stern chaser +prickly ash +pump room +ricer +chicken mousse +wing commander +sun gear +bolus +alpine milk vetch +opera cloak +twinjet +Goldie's fern +abnegator +alphabet soup +node +grape jelly +early coral root +Tarzan +quarterstaff +greeter +Eurasian woodcock +primary coil +quirt +tinkerer +bolt +creme de fraise +voltage regulator +news photography +Jat +bristly locust +Gouda +dickey +lobster butter +dwarf flowering almond +fagot stitch +Reform Jew +ostrich fern +bathyscaphe +purple mullein +alpaca +civic leader +jellaba +Arizona ash +wasabi +Irishwoman +choke +stockinet +religionist +sewage disposal plant +bittersweet +Hyphantria cunea +pheasant under glass +screen actor +chapterhouse +quoit +horseshoe bat +rapper +cupule +planetary gear +cascade penstemon +redoubt +salt +areaway +megalomaniac +bush willow +amethystine python +plains spadefoot +colour supplement +kick pleat +bell apple +narwhal +slippery elm +stenograph +baa-lamb +quadrant +balker +jobcentre +spit curl +bastard indigo +malacca +serow +adobe lily +yacca +palestra +penalty box +scrub beefwood +reenactor +screening +white bryony +alderleaf Juneberry +harpoon +alpine clubmoss +neurosurgeon +surrey +sweet calabash +Scotch laburnum +coquille +French honeysuckle +extrados +pipe cleaner +southwestern white pine +Virginian stock +scaly lentinus +aileron +carob bar +swordfish +Alpine woodsia +negus +wireworm +sweep +goldfields +drop arch +European bream +roly-poly +pin +bastard wing +fustian +wild buckwheat +lake whitefish +overcoat +water filter +Bermuda chub +New Zealand spinach +high-hat cymbal +European larch +radiologic technologist +fine-tooth comb +brunch coat +splice +electronic converter +overmantel +extern +taper +cluster bomb +teletypewriter +pinwheel +trailing arbutus +quipu +creeping zinnia +orange milkwort +tabard +Australopithecus africanus +melancholy thistle +insole +courser +darkroom +surface-to-air missile system +bark-louse +Confederate +neritina +clip-on +spouter +trench knife +outside caliper +dhak +Limburger +chuck wagon +buttercup squash +shirtdress +pouter pigeon +dirty old man +zodiac +fennel flower +mother figure +appointment +Manichaean +lignum +bouffant +rum sling +Ravenna grass +hibachi +gin rickey +American harvest mouse +cocozelle +western wheatgrass +black crappie +rhombus +Missouri goldenrod +barndoor +wild mango +pneumococcus +Boston lettuce +ratline +desert holly +cobweb +fluoroscope +ethnologist +tor +bullshot +stockade +greave +rock sea bass +slip-joint pliers +taxi dancer +schizophrenic +zill +creme de menthe +orange-blossom orchid +divot +supplejack +busybody +casemaking clothes moth +ramrod +gearbox +birdcall +Wiffle +thwart +beauty consultant +chicken paprika +trawl +skep +spirometer +hopper +kvass +doggie bag +bath chair +showy daisy +wild tamarind +Tarsius syrichta +glyptics +Algerian +cargo area +bunk +Velveeta +iconoclast +clinch +New Caledonian yew +false mallow +Japanese tree lilac +convex polyhedron +water boatman +cruise missile +finisher +colonoscope +cumin +wickiup +saccharin +whipcord +trailer camp +eryngo +cuckold +yam bean +fighting chair +forewoman +galingale +citron +positivist +four-lined plant bug +suet pudding +field pea +Circaea lutetiana +deer grass +trap-door spider +common corn salad +mirror carp +sounder +second-in-command +seaside alder +burgoo +ming tree +curry sauce +courbaril +green alder +figure loom +fauld +halfbeak +squelch circuit +cladode +winter cress +tongue and groove joint +dwarf dandelion +joss house +western buttercup +welted thistle +potato tree +anglewing +cookfire +marzipan +hood latch +seed shrimp +common moonseed +toasting fork +bevel +three-quarter binding +midwife toad +stage director +Pentecostal +technical sergeant +golden-beard penstemon +drunk +silky oak +corn gluten feed +T-square +stoker +selling agent +cruse +server +rope-a-dope +bicorn +matzo meal +wide wale +roadblock +false foxglove +tuck box +bandsman +smoke bush +machinist's vise +Highlander +scholiast +self-starter +Swedish rye bread +spark transmitter +maverick +maquiladora +cabinetmaker +compress +rainbow shower +huntsman's horn +mackinaw +copper rockfish +lappet +nitrate bacterium +telephone plug +soutache +Dacron +toboggan +sissoo +yogi +laurel-tree +vice chancellor +Christ's-thorn +cartridge fuse +serial port +quassia +tarweed +pecopteris +beggarweed +anchovy pear +bookbindery +woodland oxeye +toad rush +sandalwood tree +marsh andromeda +Tyrian purple +boothose +tragedienne +fragrant cliff fern +festoon +bondwoman +melancholic +butternut squash +exhaust valve +semi-skimmed milk +glowworm +Virginia oyster +Identikit +ayah +gallows tree +Carioca +monoplane +jewels-of-opar +scallop +moth miller +marsh cress +lobed spleenwort +ricotta +emitter +arame +tub gurnard +army attache +maniac +organizer +pheasant's-eye +Melba toast +homeboy +Bavarian cream +Maximilian's sunflower +backstop +Tremella foliacea +yellow avens +spreading fleabane +plumb level +false rue anemone +zabaglione +climbing maidenhair +doeskin +walking shoe +lancewood +material +jacksnipe +South American poison toad +agonist +hinny +paper mill +psychophysicist +valley girl +toast mistress +jorum +tiler +chicken Tetrazzini +trivet +grasshopper +three-mile limit +kink +kiang +pole horse +jig +Cornish heath +hedge thorn +false alumroot +Popper +remount +photojournalist +sideroblast +stonecress +Agave tequilana +Japanese lilac +hawse +maenad +air bag +leaf spring +dwarf willow +soda cracker +contralto +moleskin +pilaster +Audubon's caracara +pia +American organ +bleu cheese dressing +betel palm +PC board +almond willow +socializer +tone arm +stammerer +free-liver +scaler +Gentianopsis crinita +leak +black haw +hound's-tongue +grass pea +Stassano furnace +coralbells +ministrant +perihelion +Luxemburger +powder-post termite +arboreal salamander +cushion flower +foramen magnum +pyrethrum +poacher +woolly mammoth +horned chameleon +tearaway +father-figure +tufted gentian +salmi +finger millet +physa +registrar +polyoma +bamboo shoot +matchlock +seine +congress boot +bulgur pilaf +monosodium glutamate +Kentucky wonder +mycologist +kedgeree +ragweed pollen +boarfish +yellow pimpernel +tan +northern Jacob's ladder +macrobiotic diet +migrant shrike +big-cone spruce +colonialist +white dogtooth violet +bath asparagus +webbing clothes moth +ladies' room +experimenter +prairie bird's-foot trefoil +bootleg +cognitive neuroscientist +fire chief +flagfish +dendrite +stinking goosefoot +fore edge +hogfish +Spanish cedar +hotel-casino +Tory +life-support system +pea flour +cash bar +Chenin blanc +white-footed mouse +Canada garlic +salt-rising bread +roomette +mastodon +bell founder +long iron +bi-fold door +fig-bird +European water shrew +dyer's weed +frog orchid +allosaur +Florida yew +wild potato vine +crape fern +flat-topped white aster +klebsiella +oil heater +waxmallow +enjoyer +mesocarp +semidesert +senior vice president +coccidium +burrawong +syllabub +jump suit +harrier +leaf roller +cherrystone +cinchona tree +touring car +eulogist +air force officer +red goosefoot +cat thyme +smoothbore +slugger +cardiac monitor +cobber +blister rust +musicologist +rolled biscuit +Braun's holly fern +hog plum +nonpasserine bird +pascal celery +damson +Jonathan +Sheraton +cohune palm +egg white +baton +sixth-former +Siberian pea tree +choanocyte +wineskin +auditor +detention home +Leichtlin's camas +Chartreuse +clusia +club car +wattle and daub +security blanket +common American shad +assistant professor +marsh pea +camomile tea +gopher hole +gravure +Freudian +spirillum +maharani +equilateral +crow garlic +mammee apple +felwort +hardtop +dillenia +curlycup gumweed +pilot engine +calcimine +wooly lip fern +bitter dock +wineberry +jumper +monolingual +spinning frame +old-timer +native cat +diving petrel +sodium-vapor lamp +marchand de vin +sexton +matelote +interior designer +windfall +mole salamander +minder +bodkin +neutron bomb +Caloscypha fulgens +slinger ring +mezzo-soprano +aura +Southern Baptist +viscacha +midfield +tie +prosthetist +round-headed leek +yellow mariposa tulip +canary grass +staddle +Tokay +Muenster +brazil nut +California black walnut +applesauce +penologist +virgin's bower +tenon +steward +Jerusalem oak +red-bellied snake +bindery +scow +fluid flywheel +bullhead +satinleaf +clove +double glazing +matron +wild parsnip +winged elm +shoot-'em-up +musk deer +white rust +lock +Cornishman +Vidalia onion +corn spurry +freeloader +justice of the peace +inlay +myxobacteria +tiglon +tangram +German ivy +scented fern +woolly daisy +caretaker +gastroscope +scuppernong +spotted sunfish +guilloche +codling +wormcast +Eskimo curlew +tayra +European fly honeysuckle +septuagenarian +third gear +coatee +red alder +water ice +cubitiere +frame buffer +gamboge tree +pernyi moth +chicken Marengo +Galliano +Lincoln +true sago palm +hunter's sauce +carpet beater +alpine goldenrod +arch support +vehicle-borne transmission +jilt +paternoster +redcap +Siberian larch +hoary plantain +swan's down +chicane +reverse +divan +kneeler +alexic +mock turtle soup +daffodil garlic +mission bells +squilla +ursinia +winter's bark +trifoliate orange +discina +frijole +Swiss steak +maildrop +knotgrass +dog fennel +drum sander +heroin addict +costume +camber arch +shining willow +lutefisk +red porgy +microfossil +good old boy +angle bracket +pitcher sage +bordelaise +heat exchanger +carrion +bush jacket +fanjet +coach +blackface +sicklepod +Manhattan clam chowder +daisywheel printer +olive +Sphacelotheca +Spanish needles +brown root rot fungus +boudoir +encyclopedist +V-8 juice +red haw +brass buttons +gym suit +skywalk +water wagon +gas-turbine ship +stoup +lisle +sailor suit +box beam +balm of gilead +housemaster +hayrack +neutralist +water elm +brook thistle +doyenne +nark +alpha-tocopheral +WASP +hydrilla +water-shield +footlocker +variola major +pargeting +ion engine +yellow globe lily +Malecite +bloodleaf +yellow sand verbena +whorled loosestrife +packinghouse +Carolina parakeet +Virginia waterleaf +armband +red rockfish +factory ship +moon trefoil +jump seat +water gillyflower +yerba mansa +chamfer bit +compass saw +hopsacking +Indian rhododendron +sickbed +treacle +honey eater +mailsorter +seabeach sandwort +sob sister +primrose jasmine +prince consort +elocutionist +wishing cap +runner +trestle +sugar water +half-and-half dressing +fringed poppy mallow +portiere +bung +swan orchid +weather satellite +beef broth +marblewood +sapper +agitator +wren-tit +grade +allspice tree +spacewalker +American hornbeam +sieva bean +dill seed +potoroo +love-in-winter +alembic +Cheshire cheese +small white aster +Oregonian +flipper +twill +differential gear +Prince Albert +licorice +foster-father +Melkite +portraitist +Yosemite toad +Cox's Orange Pippin +slender wheatgrass +knob +silique +Rocky Mountain bee plant +stirrup pump +chicken hawk +sweetbrier +Sierra lodgepole pine +poulette +biohazard suit +striated muscle cell +Geiger counter +World Wide Web +turmeric +prairie wake-robin +latchet +pushball +grill +shooting lodge +floating-moss +refried beans +boojum tree +red poll +toothbrush tree +rabbiteye blueberry +red haw +sweet vetch +delta +upland cotton +ballet mistress +padrone +complementary color +great Solomon's-seal +bud brush +brandy sling +spinster +Andorran +Mojave aster +mackinaw +golden calla +bottom rot fungus +segmental arch +periwinkle +hellion +topknot +copper +Mexican hyssop +weeping love grass +point woman +pathogen +fall cankerworm +common shiner +silverspot +corer +atomic pile +crystal detector +yellow spot fungus +truncated cone +saprobe +variegated horsetail +Cro-magnon +cercaria +aglet +pollster +oyster bed +pancake turner +egg cream +sporozoite +quirk molding +mutisia +sound bow +physic nut +sugar-bush +cow +magnetron +jungle hen +brassie +rock bit +taco sauce +seeded raisin +desert selaginella +folding door +vinegarroon +Pinot blanc +rye +ellipsoid +betel nut +tree of knowledge +ambrosia +long tom +breechloader +bicolor lespediza +cosmetician +monoblast +American oil palm +prancer +farina +caiman lizard +hardball +bullock's heart +cotton rat +whiting +weather ship +sharecropper +creamcups +gas bracket +divinity +ornithologist +yellow twining snapdragon +showy goldenrod +end man +heptagon +sand dropseed +round file +guama +blue elder +sand spurry +raccoon dog +zigzag goldenrod +fast reactor +arctic willow +cyclopean masonry +punter +sgraffito +slattern +storage ring +clipper +pulasan +short-tailed shrew +scammony +daybook +umbrella tree +coloring +element of a cone +gesneriad +cane +burgoo +western coral snake +friendship plant +Leydig cell +scrutineer +hairy golden aster +inclined fault +water milfoil +bryozoan +nardoo +native pomegranate +curly grass +Florence fennel +resurrection plant +ice water +crown +ploughman's lunch +clustered lady's slipper +kitchenette +sand sedge +pouched mouse +roadbed +parsley haw +predecessor +super heavyweight +seedless raisin +mailbag +sparling +codling moth +squama +Bercy +thermoelectric thermometer +Jaculus jaculus +saltpan +firmer chisel +round whitefish +ramrod +criollo +pinch bar +slash pocket +thigh pad +velvet plant +intergalactic space +brazilian ironwood +whaleboat +sirrah +hanging fly +aspirator +Dominican +dribbler +yellow-eyed grass +Cornish +geophysicist +tarmacadam +marchioness +rattlesnake orchid +Alaska Native +ilama +myrrh tree +zucchini +licorice root +nosebag +lounger +troposphere +virginal +spaghetti Western +Virgin Mary +waterwheel plant +dry nurse +enate +carpet shark +rijsttaffel +stuffing nut +caraway seed bread +Leotia lubrica +kaffiyeh +Boston baked beans +halophyte +backscratcher +instillator +trefoil arch +pip +digitizer +dosemeter +Carolinian +French sorrel +boards +historian +rangpur +clansman +goral +leatherjacket +coiner +fleece +white globe lily +storm cellar +roundhouse +mediatrix +butterfly flower +swamp gum +prairie vole +rhizomatous begonia +common tobacco +Marco Polo sheep +subarachnoid space +broomweed +safety net +silky wisteria +swagger stick +spectacled caiman +derris root +soap pad +chop-suey greens +summer hyacinth +palo santo +carbohydrate loading +chinch bug +roadman +sheep plant +messiah +desk officer +banquette +drugget +trumpet arch +great duckweed +purdah +heartbreaker +hasty pudding +alligator weed +dragee +yellow bristlegrass +Jacob's ladder +campstool +coffee fern +sweet fern +little chief hare +cat-o'-nine-tails +rep +American red elder +divorcee +black salsify +cambric +sennit +Canada ginger +wonderer +Formica +cream-colored courser +zooid +European beggar-ticks +sorrel tree +piddock +blolly +red-flowered silky oak +bay +Hooker's onion +dark horse +cone clutch +Roman hyacinth +paintbox +mestiza +green alder +bill +panicled aster +mammogram +snuffbox fern +Rediffusion +swamp fly honeysuckle +stoup +psychiatrist +nodding groundsel +student union +cold duck +bee beetle +playbox +Psychopsis krameriana +nosh-up +earthnut +narthex +single-rotor helicopter +revetment +sweetleaf +seasoned salt +piculet +speckled alder +mackerel scad +common yellowwood +devisee +static tube +Spanish heath +umbrella plant +fucoid +Chilean +coral-root bittercress +fanatic +cachou +agony aunt +bird's-foot fern +washwoman +torchbearer +placoderm +frosted bat +spicemill +Cape lobster +hard-shell crab +colonizer +camphor daisy +friar's-cowl +false tamarisk +toggle joint +tinsmith +theorist +hydrologist +loganberry +universal donor +northern whiting +tent-caterpillar moth +russet +kangaroo mouse +African scented mahogany +bastinado +breast implant +betel +grade separation +vox humana +stodge +Maryland chicken +Anguillan +oil pump +governor's plum +narcissist +deadwood +private citizen +winker +ropewalker +gidgee +Lothario +ski resort +major-domo +von Neumann machine +belaying pin +water parsnip +Fissipedia +luggage carrier +spring water +oyster stew +kohl +celesta +date-nut bread +punchboard +sunniness +hospital train +man +rack and pinion +mixer +pousse-cafe +narrow goldenrod +Maxim gun +stiff +recruiting-sergeant +watch glass +white hellebore +tung tree +prairie white-fringed orchid +beef Stroganoff +scoffer +grassy death camas +Shawnee cake +tapioca +Short's aster +banker +laparoscope +honeyflower +Caterpillar +electric clock +baling wire +huntress +Surinam toad +art school +incurable +Canton crepe +apple juice +hipline +bronchoscope +marshmallow fluff +Texan +wild fig +sawed-off shotgun +forestay +red kauri +fish slice +Egyptian grass +English walnut +brown sauce +ogee arch +nectary +chambray +leather flower +phloem +Persian violet +bomb calorimeter +western narrow-mouthed toad +soup du jour +sickle alfalfa +caracolito +periscope +coralberry +sword bean +sigmoidoscope +water locust +hygrodeik +sycamore +sheikdom +ballistocardiograph +clove +akee +fucoid +jacquard +cat's-ear +puritan +slender wild oat +smooth softshell +purchasing agent +landing craft +chartist +lace bug +sharksucker +Virginia chain fern +horseradish +namer +ripcord +personage +aspirin powder +puku +Wankel engine +nightcap +velvet bent +roridula +cytogeneticist +olm +almond extract +common heath +fringe-toed lizard +Kentucky yellowwood +lithosphere +cramp +bulgur +scurvy grass +officer's mess +frigate +electroscope +giant chinkapin +opah +rutabaga +wood hoopoe +Farley maidenhair +shingle tree +argentine +router +palm nut +quillwort +hiba arborvitae +runcible spoon +hireling +sickbay +alpine totara +white lupine +Cotoneaster horizontalis +desert plume +staghound +Sea Scout +opalescence +enophile +Jersey elm +coal house +Helvella acetabulum +selenium cell +white camas +creole-fish +auger +fragrant agrimony +research center +achromia +shank +cottonseed +mod con +extension +sugar beet +winter flounder +silky dogwood +strop +tokamak +rabbit ears +baby farmer +fireman's ax +serration +taproot +socket wrench +action officer +Chilean jasmine +Greek fire +stem-winder +body louse +lumpsucker +stink bomb +American lady crab +dicer +lie detector +maneuverer +black-headed snake +tiger moth +shooting stick +spermatid +babushka +deaconess +home +prior +chanfron +chickasaw plum +big-eared bat +rusty woodsia +tertigravida +miniver +combretum +habit +bluehead +angled loofah +gipsywort +fire-on-the-mountain +purple milk vetch +alpine gold +merozoite +loddon pondweed +Uniat +provost marshal +Gyromitra fastigiata +Coigue +proconsul +oarfish +San Jose scale +filature +chimney plant +spiny softshell +bluecoat +live axle +river limpet +clever Dick +pink bollworm +Japanese plum +roarer +caricature plant +wardroom +Texas chachalaca +Bahia grass +Moreton Bay tulipwood +accessory fruit +pearl barley +ashcake +bunt +Polynesian tattler +pine fern +laughing owl +potato fern +speaking trumpet +adjoining room +bearing rein +banana quit +redbrick university +Scleroderma bovista +magdalen +pressurized water reactor +advisee +NIMBY +poorwill +almond moth +comedian +star tulip +cracked wheat +water pump +guest of honor +yellow-breasted bunting +hire +pedate leaf +augur +purple locoweed +Socinian +upland white aster +guesthouse +double reed +detention basin +rollmops +hitch +bodega +mayeng +sparkplug wrench +attack dog +peach melba +heliozoan +tower mustard +blue mold fungus +lamplighter +banded sand snake +smooth crabgrass +elsholtzia +bodkin +Aegean island +bag lady +alewife +arcella +electrical contact +common ax +animist +concave polyhedron +coalface +climbing perch +yellowtail +hobble skirt +marquee +Russian dandelion +snow mushroom +polo ball +NADA daiquiri +cormous plant +chaparral mallow +inside caliper +milking stool +fallout shelter +sea gooseberry +Danish blue +grissino +chimney breast +mosquito fern +soundbox +spring chicken +epauliere +cape forget-me-not +japan +saddle oyster +white fritillary +push-button radio +bladder senna +bladder stone +macedoine +moire +Shawnee +starnose mole +douroucouli +horseradish sauce +electron gun +cotter +console +park commissioner +free press +lump sugar +western poison oak +apple maggot +keurboom +lisper +griffon +burin +horseshoe whipsnake +Jacobean lily +spinner +cochineal insect +emesis basin +sowbane +humanitarian +uakari +three-dimensional radar +wild hollyhock +heartseed +swinger +two-by-four +mop handle +common amsinckia +traitress +rush aster +fibrous-rooted begonia +violet-flowered petunia +milliammeter +alidade +azure aster +celery seed +snorer +scarlet plume +obtuse leaf +heathen +rose chestnut +headrace +dwarf buckeye +Pacific tripletail +wiggler +bounty hunter +Lowlander +slate pencil +typist +syconium +vaquita +skybox +business lunch +gusher +curacao +palometa +Diapsida +light diet +sourdine +thorny amaranth +potato fern +cartridge extractor +peshmerga +chaffweed +tahoka daisy +hematologist +massage parlor +diverging lens +breadroot +papyrus +amarelle +cover plate +hubbard squash +cryptomonad +whitetail prairie dog +rabbit burrow +orthochromatic film +goncalo alves +Chile bonito +tent-caterpillar moth +Manila grass +buck sergeant +mustard seed +crested wheatgrass +wise guy +asarabacca +field pea +bite plate +barbasco +heart-lung machine +mouse-eared bat +piping guan +gun pendulum +climbing onion +fungus gnat +Livonian +one-hitter +Chilean firebush +Sonoran whipsnake +round scad +myelogram +Rhodes grass +vomitory +roble beech +South-African yellowwood +molasses +Velcro +common calamint +radiation pyrometer +sketcher +chaparral pea +coffee stall +Australian nettle +bilimbi +Khedive +visionary +field spaniel +devilwood +collimator +Siberian spruce +sling +limestone salamander +ribbon worm +hazel +petter +coolant system +artillery plant +bailiff +chameleon tree frog +microsporophyll +maiden blue-eyed Mary +Drosophyllum lusitanicum +cocozelle +king post +nailer +knobkerrie +tovarich +Intelnet +worm lizard +drop forge +wool grass +brown bullhead +anthropoid +vitamin A2 +creche +hickory nut +whiffletree +deipnosophist +Muskhogean +masochist +hypsometer +gliricidia +complexifier +wild licorice +reconnaissance vehicle +fives +beefsteak plant +eastern dasyure +bookworm +crested coral root +wire recorder +cinnamon vine +bubble +Newfoundland dwarf birch +spruce bark beetle +teetotaler +fad diet +ascus +spicebush +African coral snake +soft-shell crab +Postum +packhorse +sand cherry +cricket-bat willow +middlebrow +Hungarian sauce +buffalo clover +jimsonweed +latanier +stablemate +jumper +zoospore +smooth woodsia +flowering ash +unilateralist +lomatia +flapper +wild cotton +Siberian wall flower +probe +bankrupt +blockade +lemon geranium +fig leaf +basic point defense missile system +clack valve +buttinsky +ingenue +mountain everlasting +zebra-tailed lizard +shaving-brush tree +evergreen huckleberry +core drill +lugworm +Cashmere goat +doorjamb +minelayer +student center +horsehair +European dewberry +white broom +arenavirus +eastern poison oak +rye ergot +Tupi +tensiometer +fleawort +coquille +icing sugar +junior lightweight +Doppler radar +mahuang +candlepin +chambermaid +evergreen blueberry +Eton jacket +parvis +solleret +molded salad +malvasia +birth-control campaigner +nonagon +backswimmer +ogee +bowstring +salt marsh mallow +trapezohedron +hoary willow +speech therapist +Zinjanthropus +core +red-backed mouse +eptatretus +mossy saxifrage +Aristotelian +Thessalonian +searing iron +bifocals +falangist +field pea +packsaddle +lay reader +hoecake +cuboid +white maire +iceman +lobscouse +neckcloth +color-blind person +Chinese holly +assemblyman +white-lipped peccary +kava +plastron +crab louse +hook wrench +trailing four o'clock +junior +skilly +internet +tonguefish +footman +sub-assembly +evangelist +track +bench lathe +desk clerk +scalded milk +chamois cloth +American marten +chachka +nondescript +pellitory-of-the-wall +swamp candles +procurator +cuddy +farkleberry +mountain male fern +trawl +dual scan display +fish meal +prospector +convener +guano bat +ant shrike +picture rail +sand rat +gynophore +quilting +sleeper +summer savory +Cotoneaster dammeri +smooth sumac +slumgullion +suite +catalufa +spherule +lean-to tent +gryphon +gas shell +short iron +sweet sultan +dewberry +Victoria plum +American water shrew +X-ray tube +macebearer +green arrow arum +abbe +poke milkweed +atheist +Fosbury flop +Ord kangaroo rat +moldboard +wheat germ +explosive trace detection +whippoorwill +examiner +tallyman +Crookes tube +wild peach +fringed grass of Parnassus +Crookes radiometer +Atlantic croaker +lobster stew +spring cress +maggot +pacer +hydra +Zionist +pepper tree +diamante +baize +Rhodesian man +county agent +respecter +Anglican +antimacassar +materialist +Swan River everlasting +cloud grass +toll line +C battery +chinese mustard +grass poly +warming pan +seasonal worker +common sickle pine +bathysphere +elegant Habenaria +card table +Chilean cedar +brocket +collimator +malted milk +avadavat +fire marshall +coloratura +yellow spiny daisy +fingerstall +narrow-leaf penstemon +indigo broom +pillwort +bearberry willow +Etonian +certified milk +climbing bird's nest fern +field coil +wrist pad +parr +kaoliang +engelmannia +stocker +satrap +Nantua +spearfish +caper tree +gold-tail moth +mountain chinchilla +sea milkwort +westerner +army cutworm +leaf-nosed snake +neurobiologist +xeranthemum +Eastern silvery aster +ecclesiastical attire +caper +Ukranian +bight +button fern +peach pit +oligodendrocyte +maar +digitigrade mammal +streptobacillus +sensitometer +preemptor +oat +bell foundry +crown lens +rock purslane +Junior +Brazilian guava +kicksorter +Ohio goldenrod +red mulberry +King's Counsel +mountain four o'clock +fairy shrimp +fell +oca +sycophant +chantry +dermatoglyphic +bomblet +keyhole saw +hangman's rope +little barley +lion-jaw forceps +giant scrambling fern +popper +dulcimer +Espagnole +tardigrade +smooth-haired fox terrier +bullbrier +rewa-rewa +Japanese poinsettia +trunk line +cannery +helminth +American spikenard +prince's-feather +arthroscope +ginger +aphakic +pilot bit +angle of refraction +low-sodium diet +wall creeper +growler +praetorium +Hall of Fame +soupfin shark +Molotov cocktail +kaffir boom +stitcher +sawwort +flagellant +Atlantic herring +Reticulitermes lucifugus +voltaic pile +snowy orchid +southern flounder +skysail +osage orange +white mullein +lined snake +tolu tree +poliovirus +foreman +burette +jackass bat +invigilator +electromyograph +acarus +presence chamber +columbian mammoth +hyacinth bean +pilot +meadow jumping mouse +Maria +outskirts +aftershaft +Queensland nut +schlockmeister +plainsman +afropavo +scarlet musk flower +five spice powder +gunboat +multiplex +Dutch uncle +louvered window +chimney corner +cuscus +psalmist +Vichy water +signer +amphiuma +harmonizer +authorizer +naiad +control rod +stentor +mountain bladder fern +gig +read-only memory chip +assenter +vixen +hermitage +corn dab +locksmith +cockspur thorn +variable-pitch propeller +western red-backed salamander +dolman sleeve +cultist +sweet buckeye +pine vole +Peking man +mountain swamp gum +nimblewill +bethel +aye-aye +lancelet +teff +Alpine celery pine +endive +nipa palm +center of curvature +seeder +Sabahan +sea scallop +social secretary +gorgonzola +western chokecherry +misanthrope +rabbitweed +beggarman +button fern +white mallee +doodia +mastiff bat +roper +prima donna +blanc +holding pen +fingerling +skyhook +flophouse +steam chest +crystallized ginger +acrocarp +horse pistol +true mahogany +costmary +ballistic galvanometer +jaunting car +bartonia +rep +mandibular notch +bubble and squeak +umpire +fringed loosestrife +bear oak +ski jump +staggerbush +plumcot +thermal reactor +field brome +bodkin +jackknife-fish +malope +writing arm +gold fern +Stayman Winesap +merlon +eclectic +fluxmeter +emeritus +imam +drum +pop tent +capital ship +subalpine larch +flail +Lorenzo dressing +tomboy +eastern woodrat +warrantee +Pacific spiny dogfish +sheepshead porgy +farthingale +Cryptoprocta +power loom +communicant +howdah +ectomorph +false foxglove +basset horn +odd-pinnate leaf +Wisconsin weeping willow +Queensland bottletree +dampener +corbel arch +silent butler +Circe +town clerk +Japanese chestnut +bloodwood tree +switcher +cup hook +spreader +rice rat +straightedge +traverser +fluid drive +Spanish paprika +sour milk +poison camas +bean dip +card table +vinegar fly +vizier +electric-discharge lamp +purple rock brake +dynamo +Japanese snowbell +Grindelia robusta +neuroglia +safflower seed +coronet +frown line +Renaissance man +Steller's sea cow +book scorpion +isosceles triangle +arthritic +spherical triangle +kangaroo mouse +garden orache +stemless hymenoxys +titi +out-basket +gent +columnea +mint sauce +mouthbreeder +Liebig condenser +cheerer +assegai +stickler +Merostomata +dimmer +grey poplar +common heath +scorzonera +glory hole +Blackfoot +oil slick +musketeer +apple geranium +daisyleaf grape fern +gas furnace +bijugate leaf +Arabist +star-thistle +hand throttle +huckleberry oak +lift pump +maulstick +Rome Beauty +Newburg sauce +pit +volunteer +Baldwin +ark +Asian horseshoe crab +black calla +marlinespike +Gentianopsid procera +guinea gold vine +tucker-bag +desk sergeant +piezometer +migrator +keelson +executrix +sackcloth +onion smut +buckboard +substitute +pudge +mess +cinchona +intervenor +gravimeter +pederast +censor +gastroenterologist +cutlassfish +launch +demerara +Diegueno +bog bilberry +aglet +soda fountain +crank call +harpoon gun +ribbon fern +Gurkha +output device +epilating wax +greasewood +water horehound +return key +fairy swallow +spatulate leaf +culverin +leptocephalus +kleptomaniac +barley water +bleeding tooth +Cheyenne +maleberry +limber +tapenade +whorled aster +toe +revenant +lap joint +vein +truant +florest's cineraria +morning dress +trichodesmium +nightshirt +element of a cylinder +shopaholic +section hand +electrodynamometer +Guadalupe cypress +rosebud +racist +avaram +keeled garlic +Alaska rein orchid +orange toast +cunner +dipstick +Neolentinus ponderosus +bulbil +charlotte +pull-through +header +Manduca quinquemaculata +persona grata +elegist +cafe royale +scup +semanticist +wood sage +field magnet +tundra +bay myrtle +alluvial flat +arrowleaf groundsel +celtuce +baryon +must +entrant +othonna +pied-a-terre +liza +sticky aster +grasshopper mouse +prison guard +tire iron +bomb rack +Spanish American +sheltered workshop +turfing daisy +backbone +tangle orchid +creeping willow +dumb bomb +horse cassia +barosaur +Yavapai +shrimp Newburg +peanut worm +dwarf chinkapin oak +corchorus +brick cheese +by-catch +stover +Urnula craterium +clasp +Kekchi +alpine coltsfoot +soybean future +altar wine +ripping chisel +encephalogram +mountain spleenwort +transferee +remoulade sauce +American rock brake +stenographer +read/write head +loblolly +ground +powdered mustard +brake band +sea dahlia +freak +proconsul +Coffey still +Sivapithecus +pellitory +palm cat +skew arch +American angelica tree +vigilante +candelilla +andryala +amarelle +swiftlet +petcock +associate professor +sclerite +open circuit +Virginia crownbeard +Last Supper +button tree +scyphozoan +margate +mercury cell +horsewhip +water scorpion +companionway +drop cloth +Amhara +miraculous food +pro-lifer +embryologist +Creole +bombazine +Indian blackwood +cubeb +trace detector +gros point +main-topsail +meringue kiss +spree killer +capstone +specimen bottle +woolly apple aphid +silverweed +American barberry +gallfly +European bog asphodel +northern flying squirrel +alliterator +Old Catholic +heliograph +Pteris cretica +tippler +pump well +allspice +balancer +scarlet bugler +lantern fly +white prairie aster +krummhorn +robin's plantain +Pacific sardine +patty-pan +decaffeinated coffee +western saxifrage +warrantee +colorimeter +ball bearing +makomako +foot +troika +apricot sauce +data multiplexer +rose-root +sound film +Northern dewberry +water hickory +swing door +spastic +Oligoporus leucospongia +botulinus +tamale pie +Sagittarius +muff +spicebush +petiolule +pump action +Parry's pinyon +split-pea +rudder blade +princess royal +wormseed mustard +honey guide +pip-squeak +fin keel +foretop +cyrilla +Navaho +melanocyte +deist +silver tree +citrus whitefly +Morrow's honeysuckle +green peach aphid +longanberry +call-board +wild yam +novelist +toothed spurge +alienee +pond apple +allspice +Carolina lupine +Jack of all trades +white false indigo +boiled dinner +princewood +sailor's-choice +false bracken +microbrewery +black grama +tutee +brickkiln +sea raven +guesser +wirework +European lemming +thyrse +plains lemon monarda +milo +shunt +spotted cowbane +anchovy sauce +grande dame +Maryland golden aster +Chinese puzzle +boarfish +burweed marsh elder +defense contractor +nitric bacteria +Belgian hare +beach plum +conformal projection +sand fly +steering linkage +quickset +Mahayanist +Geiger tube +loudmouth +Lancastrian +brownie mix +ex-spouse +deltoid leaf +Shasta salamander +rabbet joint +purple anise +garibaldi +gebang palm +bladderpod +Host +great bowerbird +string cheese +spinning jenny +drift net +matriarch +guar +bitter betch +panda car +mess +plains pocket mouse +scarlet wisteria tree +deerberry +reamer +homing torpedo +molehill +stockyard +reniform leaf +rag +symmetry +Texas star +lerot +pickle relish +three-seeded mercury +cotter pin +ice-cream bean +farmyard +bar magnet +hansom +prickle cell +renal cortex +pest +Ultrasuede +sailing master +brougham +wastrel +amboina pine +Canary Island hare's foot fern +ninepin ball +southwestern lip fern +usherette +lemon drop +star begonia +weeds +saltworks +Persian melon +corbina +medusa +bucksaw +Gibson girl +diameter +American twinflower +kino +clear liquid diet +angiocardiogram +wetter +oyster cracker +yellowfin mojarra +wild parsley +life tenant +broom closet +Corynebacterium diphtheriae +square shooter +bedwetter +ball-and-socket joint +nonsolid color +Salmonella typhimurium +buffel grass +hip pad +subaltern +heliothis moth +trail boss +hayloft +Francisella +primordial dwarf +cock-a-leekie +sugarplum +propulsion system +tyrolean +Carib +salai +ketembilla +ironclad +cornhusk +heckler +multistage rocket +north island edelweiss +Chaldean +twenty-two pistol +Francophobe +scofflaw +sickle feather +screw bean +sea squill +Scopolia carniolica +agglomerator +western holly fern +presenter +straight pin +Myxine glutinosa +Colbert +clover-leaf roll +war paint +bird's-eye bush +longfin mako +running suit +arrow wood +margrave +blue fleabane +dracontium +plastron +chimney swift +child prodigy +commissar +turtle soup +postulant +archaebacteria +snakefly +Pitot tube +chap +smilo +Malthusian +French roof +worm wheel +gulag +pointed-leaf maple +pull-off +Cathaya +American green toad +ball cartridge +infiltrator +snowfield +crotchet +auxiliary pump +bearnaise +galax +chaenactis +olympic salamander +sundowner +cows' milk +beach plum +moss-trooper +Arabidopsis thaliana +cat's-claw +bog rosemary +ribier +book agent +bumper jack +beefwood +monk's cloth +alpine bearberry +climbing fumitory +cucking stool +puka +Piltdown man +property man +discharge lamp +X chromosome +knobble +lobster Newburg +herbalist +sunray +golden saxifrage +leopard cat +muffle +stonewort +blancmange +intraocular lens +trepan +desert mariposa tulip +plume poppy +Dane +martynia +shaver +white milkweed +napu +tansy-leaved rocket +abortus +telemeter +tansy mustard +harpy +honeysuckle +ironworks +testacean +Tartuffe +silvervine +Sihasapa +surface gauge +western blind snake +paramyxovirus +Icelander +bird louse +stockbroker belt +test-tube baby +ague root +little golden zinnia +dietician +elephant's-foot +dirty bomb +sailing warship +brier +tinter +Connemara heath +potato fungus +bait casting +decagon +rosefish +die +high-pass filter +solitaire +widow's walk +goldthread +Tudor +trews +orange pekoe +ninon +soda jerk +sump +flying carpet +burial garment +oblanceolate leaf +press gallery +Shintoist +three-centered arch +spreading pogonia +Moro +foxtail orchid +Ghanian +dry kiln +thane +naranjilla +bitter pea +American bugbane +apron string +oyster fish +Port Jackson fig +prize winner +high-water mark +Oneida +smoking room +potato skin +charge d'affaires +gantlet +amyloid plaque +barmbrack +mate +arrow leaved aster +handbarrow +horned screamer +virago +linoleum knife +rattlesnake root +K ration +reset +foot brake +red coral +good guy +aberrant +lavalava +poleax +garden webworm +sneezer +mountain heath +American dog violet +eolith +chimneysweeper +matriarch +smalltooth sawfish +sea mouse +tubercle bacillus +superconducting supercollider +Abney level +darnel +gherkin +celery salt +Tungus +pulasan +oriflamme +death camp +redhorse +apprehender +scion +selectwoman +pentahedron +principal +old school tie +slice bar +chanar +pimento butter +wailer +zero +mescal +rosebud orchid +stone bramble +Jarvik heart +NOC +pitchman +rat cheese +strawberry tomato +dwarf golden chinkapin +landau +tocsin +ampulla +scratcher +crab Louis +ginseng +ripcord +polluter +tensiometer +eyewitness +aalii +Oregon crab apple +conservator +day jessamine +hexahedron +suture +tippet +linsey-woolsey +vernal witch hazel +stainer +egocentric +canistel +nudger +shipping agent +shortleaf pine +battle sight +cheese spread +weeder +incendiary bomb +honeyflower +stovepipe iron +stepper +hellgrammiate +votary +aflatoxin +arquebus +impulse turbine +pipewort +garrote +glow lamp +pigsticking +blood clam +surface search radar +Bolshevik +platen +chariot +Gentianopsis thermalis +water level +quandong +catalytic cracker +giant foxtail +nut butter +drainplug +holdover +coastguardsman +Secretary of Health and Human Services +Seeing Eye dog +American plaice +coquilles Saint-Jacques +christella +medium +clingfish +lally +light-o'-love +Gentianopsis detonsa +taper file +signal detection +trip wire +lignosae +receiver +sedan +mud puppy +corn sugar +Philippine mahogany +magnetic pole +jointed rush +trapper's tea +Dorking +welcome wagon +clammyweed +guard +false azalea +convalescent +babassu +dedicated file server +colossus +air search radar +marquess +straight flute +sand stargazer +sea catfish +rosilla +ripsaw +Bermuda onion +peach sauce +sagebrush mariposa tulip +yashmak +Virginia mallow +erose leaf +sand blackberry +boulevardier +forester +choragus +onion mildew +threadfin +winged pea +sugar daddy +rotary press +styracosaur +rathskeller +Japanese millet +anchorite +coral drops +false gavial +eastern pipistrel +cheese press +Chinese primrose +pamperer +real estate broker +power worker +breeder reactor +nutcracker +piano wire +cushaw +Sinanthropus +firebreak +kelp greenling +herba impia +toll call +yoke +bird fancier +evening-snow +fever tree +reed meadow grass +flanker back +toggle bolt +Santa Cruz cypress +carbonnade flamande +northern dune tansy +mikado +millettia +forty-five +court +icepick +holm oak +Japanese angelica tree +Pacific cod +cant hook +urologist +spelt +lekvar +enologist +Mediterranean flour moth +prickly-edged leaf +Spanish grunt +dune cycling +frostweed +whisperer +tucker +Roman wormwood +counterterrorist +woolly alder aphid +Nuttall oak +snail butter +threshing floor +motley +forge +water mold +mummichog +sulfur paintbrush +head +walking delegate +jujube +peachleaf willow +Christmas bells +valley pocket gopher +bear's-paw fern +Lanthanotus borneensis +pearl hominy +placeman +swage block +offerer +stargazer +jeweler's glass +male chauvinist +crossbar +Oktoberfest +tamarau +micronutrient +large-leaved aster +tasset +tepary bean +sausage curl +ivy +snob +roller towel +wood meadowgrass +archil +padrone +prairie rocket +tongueflower +kidney fern +Carolina buckthorn +sea island cotton +landscape architect +realist +oyabun +mother hen +ostracoderm +esker +heliophila +nympholept +shining clubmoss +press agent +clam dip +Djiboutian +white currant +codfish ball +hand cheese +kraal +trident +conventicle +bacteroid +Indian plantain +quandong +kola nut +signor +theater light +musk clover +canistel +silent partner +steel-wool pad +diggings +affluent +sightreader +John Doe +arrowworm +goatsfoot +guardroom +wild cinnamon +kaffir boom +ink eraser +yardie +industrialist +sea lily +polarimeter +Polistes annularis +western big-eared bat +omnivore +Ted +horsecloth +crab cocktail +vacuum chamber +flower-of-an-hour +bilge +poleax +neolith +Montezuma +plum-yew +welfare case +trave +pipe bomb +shading +Centigrade thermometer +bangalore torpedo +celery top pine +nuclear rocket +fowling piece +anti-Semite +landscape +derris +bush honeysuckle +Mediterranean water shrew +ticket collector +masked shrew +white dipladenia +Savoyard +bondman +tempter +pygmy cypress +pentathlete +thruster +usurper +Arminian +yerba buena +ice field +ichthyosaurus +sackcloth +bean tostada +Oxbridge +Pteropus hypomelanus +thinker +bank robber +ape-man +thurifer +knawel +mule fat +hot spot +hairy-legged vampire bat +night raven +hook and eye +crocodile bird +skunkweed +beaver rat +cypress sedge +florida selaginella +April fool +Jonah crab +glass wool +corkwood +dwarf elder +hinging post +gentile +Brazilian trumpeter +witch doctor +thermograph +pink shower +Mao jacket +capelin +parang +bradawl +stooper +jewel orchid +citrange +oarswoman +Macedonian +particolored buckeye +pachycephalosaur +satinwood +Chinese brown sauce +peep sight +straight man +quandong +chamois cress +nonfat dry milk +rosin bag +Leiden jar +Grimes' golden +spirillum +grass vetch +carillonneur +downy wood mint +melon ball +sweet calabash +chlamydospore +bombshell +sidewall +sprig +Indian button fern +globe pepper +rough-stemmed goldenrod +bocconia +bubble chamber +sand dab +plum-fruited yew +aecium +marrowfat pea +hobbyist +whipper-in +salad burnet +neckband +Tangier pea +sauce Louis +salad burnet +artist's loft +koumiss +Nazarene +cutter +scrim +drape +crab-eating dog +deckhand +bedroll +gaff +stifler +pink lady +great plains paintbrush +patternmaker +yoke +caryophyllaceous plant +angrecum +quadriplegic +grid +genlisea +aspic +water table +junket +signore +Mutillidae +proprioceptor +pivoting window +Indian poke +synchroscope +trichion +tarahumara frog +proctoscope +abomination +purslane speedwell +breast drill +Japanese barberry +mandrake root +breakable +salon +American watercress +take-up +entrenchment +cocktail sauce +Scotch asphodel +borough +matchmaker +Seneca snakeroot +pointsman +psephologist +clustered poppy mallow +onion thrips +nuclear-powered ship +organizer +deciduous holly +balsam willow +enzymologist +caraway +drip loop +dog laurel +Orangeman +sapsago +polymath +backplate +leathery grape fern +modillion +two-timer +handhold +consignee +white stringybark +nettle-leaved goosefoot +bookmaker +disk drive +doliolum +palmist +packinghouse +Spandau +Whipple's penstemon +sword grass +ribbon development +pearly-shelled mussel +winter heliotrope +rogue elephant +deck tennis +Venus's flower basket +football +shim +boatswain +blinks +armored catfish +hooded seal +outdoorswoman +water starwort +upholstery needle +pleurodont +silky anteater +cornmeal +lead-in +redfin pickerel +horse balm +Rydberg's penstemon +cascade transformer +fly poison +Volvaria bombycina +broad-leaved twayblade +pastry cart +body plethysmograph +waverer +hardware store +Parry's penstemon +European sanicle +strawberry geranium +cross-examiner +head gate +devil's tongue +hemiepiphyte +pine hyacinth +machmeter +spirit lamp +field judge +Rock Cornish +mayhaw +Sassenach +bog pimpernel +parallel interface +crowberry +roach +Aegyptopithecus +cajan pea +lapboard +cryostat +magnetic storage medium +white yam +Lombard +rhymer +bed and breakfast +bunya bunya +rifle grenade +caterer +collared pika +anti-submarine rocket +bookkeeper +Western mountain ash +profit taker +fruitlet +Knowlton's cactus +infernal +beefsteak begonia +lunula +emulsion +intermediate wheatgrass +titfer +European sea bream +bigeye scad +yak butter +kola +cone pepper +plesiosaur +ragwort +penal colony +black carpet beetle +lubber's hole +Stapelias asterias +yard marker +balloon bomb +Scythian lamb +armory +selsyn +marblewood +spirula +fatalist +hash head +armiger +Dom Pedro +white-chinned petrel +ballast +orthopter +greater water parsnip +clutch +largeleaf holly +Evangelist +king whiting +tuna fish salad +Muscadet +surpriser +jumping bristletail +proportional counter tube +Hamburg parsley +obstructionist +pus-forming bacteria +creep feed +stepbrother +janissary +control freak +trusty +trepan +King William pine +orthicon +geological horizon +molecular biologist +violator +pariah dog +Austrian +conciliator +Fauntleroy +packing needle +mazer +Saturday night special +leucocytozoan +coastal rein orchid +whirligig beetle +capitalist +breeches buoy +clubroot fungus +meadow spikemoss +Kichai +Spanish lime +land office +camera obscura +strafer +purple-stemmed aster +lusterware +valve +Roman nettle +isthmus +breadstuff +sealskin +maleo +bilge keel +carissa plum +fish fly +kolkhoznik +heath pea +cowage +hog sucker +Sam Browne belt +inductor +wild licorice +Socotra begonia +supernumerary +Angle +red shrubby penstemon +toilet kit +tawse +sweet bells +kawaka +brown soft scale +lyssavirus +betting shop +double-crosser +macrotus +climbing hempweed +poi +strip mall +deadhead +petit juror +tract housing +American mistletoe +lace-flower vine +precipitator +endoparasite +hairy wood mint +red snapper +Victorian +hog peanut +line of heart +opossum shrimp +plumcot +Bavarian blue +slops +light flyweight +oregano +sand myrtle +pocket battleship +curator +narc +hydraulic cement +plains pocket gopher +closed loop +pluralist +molter +Christmas bush +snuffers +slender knapweed +footwall +plage +caper tree +red siskin +tender +boat train +tipster +low-pass filter +student lamp +morosoph +japonica +bellows +herald +oyster plant +savory +mail +computational linguist +blade +winter crookneck squash +zoomastigote +blackmailer +richweed +dialectician +genip +plumed scorpionfish +jet bridge +thermopile +billy buttons +Brule +millwright +Arenaviridae +Jones' penstemon +monastic habit +genipap fruit +burnous +dairyman +top +crab-eating raccoon +quadrangular prism +pilot burner +weeder +trireme +boy wonder +man of letters +Catawba +high-muck-a-muck +light circuit +bloodworm +lappet caterpillar +half-and-half +office boy +saddle stitch +mistletoe cactus +false chamomile +Catalina cherry +workhouse +Jamaica quassia +britches +tooth shell +reduction gear +carrot pudding +balsam woolly aphid +handspike +aioli +silver hake +flour bin +wireman +gas-cooled reactor +aficionado +plus fours +gitano +gene chip +oilfish +ingenue +tulip orchid +late purple aster +pork and beans +envoy +lemon extract +milk bar +black huckleberry +ground roller +Connecticuter +siderocyte +Jacquard loom +chub +meat safe +stock cube +Australian sumac +purple sanicle +tailless tenrec +dog wrench +rainbow cactus +castor bean +scintillation counter +eohippus +pawnbroker +gauge boson +front man +early warning radar +bearing wall +Bourbon +sandwichman +sild +gravelweed +perishable +cembra nut +riflebird +quicksand +slate +sweeper +ship-towed long-range acoustic detection system +defamer +president +vitamin K3 +challis +tanekaha +bloodwort +grenadier +quietist +Zairese +fucker +foremother +gesneria +print buffer +salsilla +fissiped mammal +fender +consulate +acidophilus milk +Southern dewberry +snail darter +Panama redwood tree +dehydrated food +bush willow +coffee fungus +Sinologist +Mesoamerican +hood +large civet +deck-house +cyborg +smuggler +pepper sauce +cyberpunk +Grand Inquisitor +persona non grata +haggis +weeping tree broom +stop bath +modifier +coyol +conodont +yellow giant hyssop +optical pyrometer +Carolina moonseed +marinade +aspartame +false wintergreen +cityscape +philter +turnery +hemiplegic +chuck-will's-widow +vower +track star +myrtaceous tree +small civet +intelligence analyst +dogcart +yardman +cross bit +holometabola +platen +sweet cassava +Comstock mealybug +acute angle +Communist +alcohol thermometer +mountain hollyhock +Mead's milkweed +highjacker +Townes +congou +Astrophyton muricatum +lazybones +roughcast +pressure cabin +clinch +cinnamon +smoke bomb +quandong +tout +office-bearer +punctum +efficiency apartment +Queensland hemp +Ceylon bowstring hemp +newswoman +vermin +fetid bugbane +grantee +sanitary landfill +gluten-free diet +clabber +shillelagh +white lettuce +sweet coltsfoot +beggar's lice +samite +loser +flasher +water star grass +banana passion fruit +translator +artificial kidney +Virginia creeper +American crab apple +cactus mouse +nebbish +Ligustrum obtusifolium +vox angelica +stringer +hunter +know-it-all +scene painter +invalidator +jungle cock +basilica +coriander +California single-leaf pinyon +miles gloriosus +pina cloth +law agent +scarlet fritillary +keurboom +bailor +ramjet +seedling +rib joint pliers +ways +picket ship +Surgeon General +wasabi +marquis +clostridium perfringens +Helvella sulcata +furnace lining +kingwood +painted sandgrouse +plain wanderer +Indian madder +silver screen +bailey +dwarf spurge +Serbian +ball-buster +shaheed +Platte River penstemon +tensiometer +mute +nymphomaniac +Yokuts +arroyo willow +whipping post +class act +load +winged everlasting +periodontist +diarist +robber frog +diestock +curry powder +ratchet wheel +store detective +hog plum +prune whip +shortwave diathermy machine +Anabaptist +post chaise +Kennan +bean caper +delegate +orderly sergeant +celtuce +jumping bean +gowen cypress +puddingwife +registered nurse +West Saxon +rosita +gun room +nasotracheal tube +matchboard +flagship +Boswellia carteri +Canadian pondweed +wonder boy +sewer rat +dimetrodon +pantograph +marsh bellflower +angoumois moth +slippery dick +woolly indris +creme de cacao +dulciana +Jewess +Macadamia integrifolia +least shrew +don +diffuser +black-stem spleenwort +grouseberry +goniometer +annotator +sticktight +gossip columnist +speechwriter +capon +rock hind +Liederkranz +chandler +echocardiograph +sidelight +fisher +brocket +New Zealand daisybush +northern sea robin +roller bandage +peachick +pellet +pichi +plug fuse +spark coil +buckwheat +brood bitch +wedgie +dwarf bilberry +filigree +bull +queen +dodo +Salish +denticulate leaf +Western silvery aster +Prima +magnetic bottle +fetterbush +process-server +nainsook +mythologist +Piedmont glacier +hammerhead +niggard +Mound Builder +Kui +Nootka +highbinder +passenger pigeon +oblong +tickler coil +agnostic +succorer +esophagogastric junction +dressmaker's model +bombshell +social anthropologist +gildhall +orpine +pterodactyl +bristly sarsaparilla +Lane's Prince Albert +hognose bat +salesgirl +lubricating system +electric catfish +wrap +Jacksonian +chard +cherry laurel +foreground +beadsman +Kolam +amniote +frozen pudding +acid head +poor box +depositor +coattail +pallas's sandgrouse +mason's level +English lady crab +skeg +cruel plant +petrolatum gauze +tuna +swivel +stock-in-trade +perisperm +civies +Phyllostomus hastatus +alienor +Verdicchio +guard's van +onion butter +moviegoer +planter +citrange +box huckleberry +iconoscope +familiar +helmsman +baby boomer +constructivist +American bog asphodel +whorled caraway +simple pendulum +viviparous eelpout +Job's tears +holdout +sour salt +poison bush +dusky-footed woodrat +golden algae +granadilla tree +telethermometer +crossbar +thrift +African bowstring hemp +dog in the manger +hayrack +gold-crowned kinglet +prolonge +doge +pencil +discount house +mulligan stew +Nonconformist +virologist +gregarine +facula +rocket scientist +thin-shelled mussel +oospore +annual salt-marsh aster +Afrikaner +metallic +julienne +culverin +cleavers +Berliner +mudhif +thorny skate +brown lemming +yellow colicroot +cooling system +large-leaved magnolia +free-reed +canyonside +preemptor +stake +Brucella +anti-G suit +pleximeter +squire +salsilla +write-in candidate +lowland burrowing treefrog +flare star +dwarf hulsea +jobber +mangel-wurzel +quagga +red-skinned onion +positive pole +Pteropus capestratus +jug wine +stomacher +standee +bladder worm +hakim +house of correction +pelisse +golden mole +temporizer +rose apple +drove +umbrellawort +holy of holies +lawyer cane +smooth lip fern +anode +astatic coils +zip gun +feverroot +self-heal +expansion bit +salt reed grass +field pussytoes +nutmeg hickory +cryptic coloration +Venus's girdle +Hunkpapa +Calostoma cinnabarina +raft foundation +May apple +pygmy mouse +prokaryote +yellow-green algae +Bermuda maidenhair +withdrawer +coelacanth +Elliott's goldenrod +driftfish +epicyclic train +bowl +swamp dewberry +corbel step +sadist +party line +anti-American +mining engineer +Amur privet +conidium +Gastrocybe lateritia +lithia water +chaulmoogra +Rough Rider +Guinea pepper +glade mallow +pitcher sage +whitecup +shanghaier +low St Andrew's cross +phonologist +cocobolo +perfumery +visor +prison chaplain +belt +ingesta +literary critic +industrial watercourse +reckoner +pursuer +Kinetoscope +Kuiper belt +hyperope +raw recruit +Galiella rufa +Prince Albert yew +slit trench +usher +tenderfoot +white-rayed mule's ears +browser +piccalilli +bran +giant buttercup +water lobelia +arborescent plant +echinus +dryland blueberry +struggler +platyctenean +Geordie +domatium +twenty-two rifle +keteleeria +sports editor +chorus girl +Hakham +dry-bulb thermometer +onomancer +double-bitted ax +Girondist +bottle bank +thyrsopteris +bandwagon +star anise +armored car +dhawa +Bessemer converter +mutineer +paradise tree +tupik +centurion +mending +chowchow +margrave +International Grandmaster +African hemp +catafalque +leptodactylid frog +forcemeat +tank shell +pill +barbecue pit +worthy +lady's maid +evergreen +Jesuit +South American staghorn +rigger +suffragan +imperialist +spherical angle +grey lemming +kitchen police +tree swift +coliphage +archaist +Conservative +rib +exegete +Mendelian +tragedian +steerage +Paleo-American +obeche +garlic +grapefruit peel +accommodating lens implant +half blood +barrelfish +catgut +lanceolate spleenwort +hardliner +frieze +name dropper +carrack +huckster +onion bread +magnetic head +pease pudding +raisin moth +negative magnetic pole +electroencephalograph +bunji-bunji +synchroflash +Mornay sauce +stencil +winged pigweed +Nesselrode +MEDLINE +licorice +mainspring +melilotus +duke +experimenter +Napier's bones +four-minute man +pin-tailed sandgrouse +toolmaker +pogge +rootstock +baton +pricket +creeping snowberry +anomalops +nester +devourer +apolemia +Maricopa +pine-barren sandwort +larvacean +American dewberry +escalope de veau Orloff +gig +myrtle +pitsaw +Lutheran +fish house punch +gnathostome +intake valve +molasses taffy +clammy locust +vandyke beard +Atlantic tripletail +planktonic algae +estradiol patch +flummery +cytologist +sectarian +oil meal +tomtate +mediterranean anchovy +aspersorium +argonaut +porkholt +sheep ked +algometer +Adventist +false goatsbeard +snake polypody +streetwalker +shelver +adoptee +highflier +pitch apple +prairie rocket +fish mousse +viroid +deckle +manila tamarind +observer's meridian +pincurl clip +hardstem bulrush +gossamer +brookweed +Druze +hug-me-tight +accessory before the fact +oilman +Comanche +Marine +bedlamite +Chinese cork oak +squawbush +false miterwort +walk-on +Cynopterus sphinx +brandyball +landlubber +arrowroot +cape forget-me-not +galoot +tabor pipe +checker +Levant cotton +paddle box +murderess +smirker +fuddy-duddy +withdrawer +newel +shade +pink disease fungus +tipu +sweet sultan +aeronautical engineer +tall gallberry holly +acarid +conqueror +cucumber +film director +ordinary +salon +closet queen +allegorizer +tonka bean +flax rust +negative pole +dagame +dentist's drill +mock privet +micropyle +contributor +dark horse +climbing corydalis +cosmotron +land agent +Big Blue +Cynic +tassel flower +lyrate leaf +Minuteman +Dutch-elm beetle +Hessian fly +flower girl +West-sider +window dresser +skinny-dipper +whitebait +out-and-outer +hooker +amicus curiae +jack +camwood +stockist +black root rot fungus +Jamaica dogwood +diaphragm +Holocentrus ascensionis +roselle +black maire +Pygmy +fumigator +lame duck +mudder +hydraulic transmission +conning tower +phoronid +batfish +hearing dog +monohybrid +whaling gun +Cockcroft and Walton accelerator +allemande +seasoner +epileptic +ammonia clock +Young Turk +lanseh tree +urceole +cafe noir +poster girl +Oglala +deadeye +manna lichen +positive pole +cinch +lyricist +hermaphrodite +kidney stone +dilator +number one +frotteur +kaffir bread +fish knife +tarragon +adjuster +potato wart fungus +Florida pompano +conductor +corbie gable +rounders +Catha edulis +bender +recruit +Uruguayan +subject +bunghole +day boarder +pocketed bat +Oxonian +owner-occupier +yellow-leaf sickle pine +devisor +exhibitor +looking glass +shipowner +crooked-stemmed aster +calico +dash-pot +defilade +Confucian +egg-and-dart +irreligionist +lepton +self-rising flour +diving bell +Brahui +shop girl +maximum and minimum thermometer +Dalmatian laburnum +correspondent +subduer +nonperson +Reaumur thermometer +rough-leaved aster +jacksmelt +pinfold +magneto +ex-wife +round-leaved rein orchid +purloo +American shrew mole +sweet sand verbena +polymastigote +outfitter +curled leaf pondweed +Italian dressing +borderer +ambusher +geebung +four-stroke engine +small ship +homeopath +gynostegium +political prisoner +Radiigera fuscogleba +ensiform leaf +rhizoctinia +satyr orchid +rue +bouillon cube +flip +prophyll +tilefish +periselene +prima donna +choker +laminar flow clean room +Hooker's orchid +fish joint +mombin +remover +array +coelostat +autophyte +consigner +Damaraland mole rat +gasman +public works +lye hominy +pearlfish +piassava palm +Georgian +uxoricide +confessor +community center +epigone +tagger +abrading stone +cryoscope +nautch girl +reliever +Cartesian +Indian beech +protoplasmic astrocyte +fundamentalist +mustard sauce +crank +houselights +five-point bishop's cap +comedienne +triangle +presentist +beaugregory +dreamer +Wave +blue mockingbird +Barbados gooseberry +ten-spined stickleback +papoose +silky pocket mouse +holdup man +agent-in-place +suspensory +emigrant +ropemaker +bookbinder +jumby bead +undershrub +Killarney fern +sheep bell +city slicker +equerry +pea crab +down-and-out +blackmouth bass +shirtmaker +lister +UNIX guru +snipefish +gimbal +maisonette +haircloth +Ranvier's nodes +pigmy talinum +tribute album +msasa +hydroxide ion +madame +four-pounder +prophet +sloganeer +field-effect transistor +nude mouse +canteen +Calostoma lutescens +buteonine +sunlamp +Uruguay potato +Spanish tamarind +Prince-of-Wales'-heath +kishke +caprifig +chincapin +hegari +alarmist +bathtub gin +astatic galvanometer +Calostoma ravenelii +marang +tussah +coin box +bugleweed +hacker +frontal eminence +timekeeper +shunt +bicycle clip +mustang mint +caesium clock +hospice +glenoid fossa +archpriest +ex-gambler +incrustation +salvager +Donatist +violator +lamb succory +hygroscope +oilbird +sharptail mola +showplace +corn syrup +flashlight fish +pulse timing circuit +anchovy paste +fascista +chigoe +divan +Druid +squad room +Huntingdon elm +buffalo carpet beetle +carper +corn lily +goats' milk +assault gun +cockpit +Lochaber ax +Visigoth +occupier +Basotho +criminologist +spindle +Rosicrucian +Cornishwoman +musk kangaroo +artificial skin +pandurate leaf +Parkia javanica +roundhead +tea-like drink +basidiolichen +unguiculate +stepmother +Nauruan +gutta-percha tree +bloodberry +scarlet haw +marupa +censor +algebraist +pelvimeter +whaler +cowhide +paparazzo +biochip +internationalist +Yukon white birch +hangar queen +chlamydia +puttee +Pipturus albidus +pearly razorfish +sea moss +burglar +hoary golden bush +colter +drey +bushman's poison +maxillaria +gnetum +deadeye +shittah +swamp oak +damper block +deepwater squirrelfish +truffle +cangue +paleolith +lawyerbush +sorehead +Texas snowbell +Tremella reticulata +quarter +keelboat +dimity +whiner +Wagnerian +myrmecophyte +frontierswoman +pyrometric cone +big-tree plum +puppy +galbulus +hod +winceyette +carriage wrench +dictostylium +farmland +infanticide +Jacob's rod +threadfish +monocline +inamorato +leaf miner +purple cress +passer +black-fronted bush shrike +silverrod +bootmaker +segregate +captive +Edmontonia +spherometer +television transmitter +bladder +Saratoga spittlebug +dynamometer +lodge +smooth darling pea +Cossack +wake-up call +Olmec +sutler +molasses kiss +corner post +rattlesnake weed +yardmaster +adder +rhinoscope +referral +ulster +pantaloon +counterspy +gadgeteer +heart cherry +hospital chaplain +Clydesdale terrier +plank-bed +Russian thistle +actinometer +dyspeptic +common wolffia +firewall +seidel +potato moth +soapweed +seif dune +thill +cosmographer +absolver +halberdier +fire control system +kai apple +bastard pennyroyal +Big Brother +broadcast journalist +Albatrellus dispansus +citrophilous mealybug +split end +nickel-iron battery +Newtonian +gas maser +thumbstall +anaspid +dusky-footed wood rat +latitudinarian +flatbrod +schizocarp +niqaabi +flight surgeon +gyrocompass +Polyporus tenuiculus +Utopian +mailboat +spellbinder +undercoat +cassareep +typical jerboa +photocathode +katharometer +bight +fur-piece +penetration bomb +malik +Siberian millet +nanomia +Wykehamist +tosser +gyrostabilizer +microwave diathermy machine +crystal set +wall +legatee +alfalfa +angwantibo +charioteer +piano maker +African mahogany +Morlett's crocodile +taro +parallel circuit +cush-cush +etymologist +matriculate +neem seed +cornerback +kingfisher daisy +redoubt +blastomycete +peplos +costumier +publican +tobogganist +semolina +myrmidon +parricide +gymslip +whoremaster +cryptocoryne +header +platitudinarian +barleycorn +spiral bandage +reciter +abecedarian +dance +wrymouth +bilberry +Liopelma hamiltoni +streamliner +Fordhooks +fixed phagocyte +radiobiologist +neurologist +Selkup +dollarfish +cascade everlasting +acrodont +boarhound +midstream +theatrical producer +abhorrer +goldsmith +photometrist +Anglo-Saxon +rugel's plantain +sable +workmate +ferule +ankus +earleaved umbrella tree +Passamaquody +timucu +Mexican pocket mouse +yerba santa +Rochon prism +apomict +monocarp +sweet unicorn plant +common winterberry holly +archivist +drypis +paretic +fly-by-night +white-berry yew +Schoolman +blue cheese dressing +vintager +squatter +Euphausia pacifica +corrugated fastener +yellow henbane +Croesus +almoner +analphabet +acoustic delay line +sheep frog +workhouse +horseleech +venturer +pond-scum parasite +Pyrenees daisy +plagiarist +Truncocolumella citrina +rerebrace +group captain +caddis fly +hot-rock penstemon +kanzu +stylopodium +slopseller +rauli beech +starter +ootid +statesman +distributor cam +ascot +falcon-gentle +Duplicidentata +spotted antbird +heliometer +false buckthorn +Allegheny spurge +Cavalier +dart +photocoagulator +master-at-arms +kei apple +baldachin +crapshooter +gametangium +white hope +chipotle +spike heath +Scotch woodcock +Florentine +differential analyzer +Mitrula elegans +wet cell +basil balm +Circassian +corn cake +bouncing betty +vice-regent +lagerphone +ketembilla +whoremaster +fork +tetrasporangium +trifler +pill head +life-support system +quartermaster general +tobacco thrips +officeholder +teredo +toyon +Sundacarpus amara +Phytophthora citrophthora +naif +lobbyist +alligator wrench +bully +heavy +toxicologist +radio chassis +waterdog +drive line +kaffir cat +foster-brother +breakax +curette +traditionalist +pipe vise +striped button quail +gawker +homeotherm +schoolyard +battue +kalansuwa +deviationist +Bolshevik +transponder +pungapung +iron +Eyeish +roccella +manglietia +Tory +print seller +Texas Ranger +otter shrew +seconder +shellflower +outlier +party man +wold +hayfork +oncologist +framer +co-beneficiary +ocean pout +Chinese angelica +scrimshaw +air attache +false gromwell +standing press +fringepod +specifier +automatic choke +durum +yenta +wassailer +reeler +signora +beach pancake +common booklouse +pellicle +backroom boy +den mother +associate +Unitarian +gambist +brookweed +clubroom +cat's-tail +playboy +self-registering thermometer +doorstop +bennet +yak's milk +escapee +quail bush +sparge pipe +coast boykinia +screw key +half gainer +aggravator +cotton mill +tailor's chalk +free agent +cotton mouse +deadhead +bunny +turpentine camphor weed +amaranth +ceratodus +red lauan +beam-ends +thermograph +wally +Toda +handrest +commissary +oak-leaved goosefoot +manufacturer +voicer +Jafnea semitosta +bench hook +finder +abyssal zone +rabbitwood +Hercules'-club +epicarp +declinometer +camp follower +signaler +Australian pea +putz +qadi +banded palm civet +egg timer +regnellidium +calisaya +harvestfish +sound spectrograph +side-wheeler +glomerule +woolly rhinoceros +Black Muslim +horticulturist +ornithomimid +cryometer +battlefront +gametophyte +airmailer +cuisse +nakedwood +baseball club +slasher +anise +leatherleaf +leatherjacket +horned pondweed +gofer +Saigon cinnamon +barong +blazer +twinkler +skeleton shrimp +dial +floorwalker +case shot +flannelbush +cultivated parsnip +Jane Doe +few-flowered leek +nogging +placer miner +muzzler +serge +lion-hunter +capulin +Wandering Jew +ascidian tadpole +hispid pocket mouse +southern spatterdock +milk wagon +junior middleweight +duck sauce +promycelium +protozoologist +cascade liquefier +tout +longheaded thimbleweed +charcoal burner +footage +slop +bridge agent +miller's-thumb +Job's comforter +marocain +tanker plane +lancetfish +knocker +toque +ordinand +umbrella bird +favorite son +hare's-foot bristle fern +business traveler +plotter +Asiatic shrew mole +tallyman +stump +Paleacrita vernata +index register +mortgagee +accuser +codger +sand rat +seaside centaury +chiropractor +Florida smoothhound +dwarf sperm whale +T-man +sannup +dragonhead +numdah +alkali grass +gynobase +kymograph +ascolichen +steward +waterline +Nazarene +filer +lapidary +muncher +wincey +scyphus +question master +besieger +worldling +docent +facing +atmometer +quern +puerpera +three-decker +calliope +wild red oat +bailee +flame pea +cattle cake +theist +yellowtail flounder +cosmopolitan +rocket engineer +vouchee +Turkoman +hard sauce +Thousand Island dressing +assayer +messmate +mutilator +oyster bar +flame tokay +countess +prairie mimosa +microsporangium +cotter +townsman +paring +fundraiser +simperer +Comrade +orlop deck +power takeoff +cattleship +prime meridian +Javanthropus +scriptorium +curandera +long-clawed prawn +maestro +paster +potato tuberworm +chachka +junkyard +cape yellowwood +reentrant polygon +Liberian coffee +restaurateur +Alsophila pometaria +Jekyll and Hyde +electrophorus +Scomberomorus maculatus +manipulator +gromwell +chicken provencale +ashram +mangel-wurzel +shamrock pea +dossal +adducer +erection +Mysore thorn +smoothie +chufa +brace wrench +victualer +litterer +linstock +Protium guianense +palfrey +banyan +klieg light +dangleberry +trooper +yaupon holly +quitter +tradescant's aster +nullipara +melter +devil's urn +ghostwriter +mouth +analogist +Creek +sonic depth finder +fucker +locus of infection +mortician +esophageal smear +locum tenens +conic projection +aroeira blanca +bellarmine +night porter +automobile mechanic +codpiece +Munro +cottonweed +scoinson arch +tinderbox +frozen food +waterproofing +Egyptian henbane +lash +transactor +American smooth dogfish +existentialist +grabber +Sonoran lyre snake +Rufous rubber cup +colors +weekend warrior +power user +perennial salt marsh aster +Puritan +Apalachicola rosemary +anecdotist +tosser +moth bean +agnostic +stretcher-bearer +browntail +optimist +brewer's mole +astronomy satellite +flat file +rust mite +tuberous plant +day laborer +buster +trapezoid +bevatron +nonresident +Streptomyces griseus +mangosteen +customer agent +hero worshiper +suicide bomber +procellariiform seabird +archiannelid +reaction turbine +distortionist +bulldog wrench +grainy club +scalp +Aztec +scow +globigerina +pedant +heartleaf manzanita +kanchil +low gallberry holly +containment +scandalmonger +rose-colored starling +Powhatan +addle-head +Chilean rimu +Atlantic sea bream +arthrospore +ramrod +root climber +Kalapooia +roach clip +Schreiber's aster +horseradish +albino +Kshatriya +trombidiid +blasting cap +body pad +brachium +shallu +Wynnea americana +slender centaury +munj +upset +wind tunnel +cottonwick +airing cupboard +pepper shrub +ambrosia +languisher +chosen +rose globe lily +purple apricot +costia +sloop of war +sultana +frontlet +booster +sargassum fish +broad-leaved montia +rifleman bird +stillroom +amoralist +enginery +meter maid +fitment +southern bog lemming +Athenian +clincher +cusk-eel +mackintosh +diaphone +corozo +Australian reed grass +czar +spongioblast +Eurafrican +airhead +Shahaptian +Roman +pollinium +tourist class +halogeton +stamper +emperor +malingerer +tramp steamer +Peziza domicilina +pilot cloth +stenopterygius +cost accountant +Queen's Counsel +wine-maker's yeast +poppet +cage +rowlock arch +landgrave +bearded wheatgrass +stink bell +quaker +undesirable +algarroba +resistance pyrometer +exorcist +carib wood +guvnor +border patrolman +bathhouse +licenser +headman +rentier +pine spittlebug +nut-leaved screw tree +paraduodenal smear +apron +necker +smilax +Alpine besseya +creeper +castle +ground bait +Queensland grass-cloth plant +sclerotium +great yellowcress +fat farm +Stoker +hoop snake +elixir of life +Trotskyite +home buyer +wheat berry +Tutelo +semi-climber +utahraptor +wet-bulb thermometer +packrat +hygrophyte +darter +sketcher +refiner +camlet +midgrass +compound +tarwood +Colorado River hemp +toiler +abstractor +override +dwarf pipefish +plodder +briefcase computer +trunk hose +brown butter +valve-in-head engine +cymbalist +explosive detection system +horsewoman +boutonniere +chinchilla +venerator +scourer +exarch +cohune nut +ayapana +continental divide +cosigner +stalker +pyxie +Genet +Macowanites americanus +open-hearth furnace +water chestnut +American frogbit +tarwood +cutter +scout +burr +upsetter +grist +tagasaste +mouthpiece +palette +rattan +letterman +Exmoor +Methodist +eelblenny +marasca +slide valve +ventilation +saddle hackle +Yakut +flux applicator +air traveler +murder suspect +Cynocephalus variegatus +idolizer +Surgeon General +nutlet +little-head snakeweed +germ tube +fellow traveler +raceabout +commodore +czar +anamorphosis +treelet +girlfriend +groundnut +sideline +giant star grass +goffer +spark lever +oubliette +processor +tare +plodder +extremist +Kipp's apparatus +gripsack +S wrench +viscountess +bridgehead +cascarilla +Asiatic flying squirrel +protoceratops +equerry +difflugia +princeling +moonlighter +aspergill +common flat pea +Utahan +imperial mammoth +plantain-leaved pussytoes +Boott's goldenrod +bootlegger +reed pipe +runcinate leaf +onion salt +nitrite bacterium +introvert +duck +New World opah +goliath frog +heterostracan +disrupting explosive +haggler +candlenut +false bugbane +returning officer +eudiometer +ship-breaker +metazoan +mandarin +patka +gill net +cavity wall +armilla +rainmaker +dealfish +orderly +gleaner +muffin man +house sitter +alto +sand devil's claw +vulcanizer +appendicularia +boron chamber +chess +bitok +anchovy butter +dropout +flour mill +bishop +escapist +scapegrace +stanhope +smooth winterberry holly +upstager +stalking-horse +pony +prairie gourd +parabolic mirror +Polaroid +slasher +lap +garlic butter +sendee +German millet +hairy honeysuckle +Swiss canton +Scleroderma flavidium +red goatfish +telegraph plant +Jungian +garment cutter +mallee hen +stranger +driveway +schooner +Paiute +cisco +trestlework +sipper +shanny +romanticist +Molly Miller +mountain rimu +odd-leg caliper +bitumastic +Western Australia coral pea +labor coach +latchkey +harpulla +solitary pussytoes +chop-suey greens +coil +guimpe +diapir +Osage +gutta-percha tree +giant eland +reticulation +garden huckleberry +quick study +Hudson bay collared lemming +coreligionist +Lancastrian +stumblebum +omnirange +seersucker +Potemkin village +Rhea Silvia +symphonist +bolti +jaw +jaconet +page +visiting fireman +haulm +p-n junction +landlubber +yellow jack +triclinium +souari +invader +fire walker +Luddite +Plott hound +hemming-stitch +winker +star-duckweed +craniometer +Arabidopsis lyrata +loser +cypripedia +trimmer arch +cookhouse +pink fivecorner +transfer +ringleader +northern pocket gopher +moke +blockade-runner +cyclostome +web-spinning mite +Whig +transcriber +malahini +sawyer +patent log +paca +tragedian +thermojunction +soffit +black buffalo +foreigner +applecart +brit +pole horse +white mullet +argentinosaur +Homo soloensis +bounty hunter +decumary +hand +paperboy +Smitane +windowpane +Java man +Wynnea sparassoides +prune +middy +lilliputian +sorb +pyrostat +guest worker +hold +leaseholder +vegan +humanist +salinometer +piton +zygospore +means +night rider +tetraspore +archipelago +radiomicrometer +nitpicker +spot weld +slicer +girlfriend +round-tailed muskrat +cock's eggs +Shavian +bay +nuclear chemist +planetarium +hiccup nut +Marylander +milling +microsporidian +brown cup +Strophanthus kombe +little skate +emancipator +paperhanger +archaeopteryx +maigre +Mastotermes electrodominicus +procurer +seizure-alert dog +homeboy +cotton strain +mute +siren +spearnose bat +phenacomys +gayal +arsenal +pitchfork +Port Jackson heath +cud +magnetic core memory +interferometer +water jacket +account executive +hodoscope +window oyster +sudatorium +syncopator +loment +hypertensive +smoothbark +Geogia holly +nailhead +African holly +musette +chafeweed +microflora +derrick +strawworm +shogun +queen post +jerboa kangaroo +columbo +royal +sourball +solenogaster +cardsharp +Homo habilis +intaglio +calf's-foot jelly +flotsam +skirret +baronduki +chyme +shovel hat +Welsh +monoplane flying fish +groundfish +tablet-armed chair +swan dive +Indian club +colonial +cassiri +pyramidal tent +praya +silk vine +time clock +button snakeroot +clews +Korean lespedeza +diffuser +ripping bar +puttyroot +nipple shield +headpin +juneberry holly +hub-and-spoke +laver +weldment +plain flour +hoosegow +dudeen +grey skate +line of life +mung +arariba +Newtown Wonder +rock candy +side chapel +castor sugar +narrow-leaved white-topped aster +babassu nut +puka +rings +catchall +heat shield +caroche +oxbow +Australian coral snake +tapper +sporangiophore +fenugreek +spruce gall aphid +gouache +cutoff +private line +pod +cargo hatch +nailhead +penile implant +geophyte +small-leaved linden +deepwater pipefish +paperhanger +hairy spurge +Persian lamb +subtropics +feed grain +clarence +nonparticipant +scorpioid cyme +hand brake +tiller +Geglossaceae +albacore +monochrome +goa bean +bur +tongue worm +psittacosaur +frog's lettuce +pectoral +terreplein +light filter +fishpaste +dry point +grison +feterita +dolichocephalic +oenomel +stretcher +swag +cheval-de-frise +mountain beaver +scammony +discus +leatherleaf saxifrage +wharf rat +Dominique +pelycosaur +depth gauge +bishop +archespore +true anomaly +silver jenny +mercy seat +kelp +oviraptorid +acrylic +Chinese pea tree +meat house +bilge well +Temperate Zone +whale louse +balbriggan +briefcase bomb +pump-type pliers +oil +sour gourd +Jewbush +lunette +Chinese paddlefish +pyxidium +beechnut +calabar bean +grugru nut +gib +blunt file +cataphyll +megasporangium +blockbuster +sliding seat +hogchoker +calceus +Connarus guianensis +honest woman +survivor +second balcony +tempera +Calvary clover +murine +outwork +bogy +elephant's-foot +conning tower +set square +blackfly +stirk +Streptomyces erythreus +blade +goldfield +snowball +mortal enemy +waltzer +shoal +galley +hitchhiker +lithophyte +brisling +scauper +esophagoscope +grab +subtracter +philosopher +duplex apartment +southeastern pocket gopher +bonduc nut +reverberatory furnace +grader +lamp house +northern bog lemming +brotula +ornithopod +ptyalith +obturator +perpetual motion machine +range pole +Africander +curvet +daisy print wheel +floor +collector +mutant +tuck +fore-and-after +senega +buckler mustard +louvar +Tarsius glis +culdoscope +Spanish fly +steering gear +hatchet man +museum +saw set +cambric tea +comber +thermohydrometer +stationer +chalcis fly +bryanthus +whipstitch +harvest mite +rock gunnel +time bomb +rariora +pigfish +apetalous flower +head shop +horned whiff +sandpit +tachistoscope +sundries +taffrail +caller +monofocal lens implant +Dover's powder +souari nut +crowbait +render +Shakespearian +hagberry +megatherian +magus +hatchel +mangabey +garroter +piedmont +cope +barrio +psychodid +rigout +distributor +croupier's rake +sarcenet +narrow-leaved water plantain +treenail +biped +lanternfish +overdrive +barndoor skate +picket boat +amber lily +sawpit +sand lance +bucket shop +common beech +laundry truck +surtout +grogram +tampion +escape hatch +interstice +shop bell +snake mackerel +nakedwood +tumbrel +mericarp +mountain paca +cab +big board +cringle +eusporangium +shipping room +coal chute +dumbwaiter +Smiledon californicus +man-at-arms +cartridge +deinonychus +pigeon pea +screw bean +spectacle +floorboard +cutting room +low-warp-loom +proconsul +sabicu +genipap +clapper +aquifer +archaeornis +belly flop +Protium heptaphyllum +interrupter +high-warp loom +knight +wiper +impression +poker +Pithecanthropus +sable +guardroom +tenter +wellhead +raja +strickle +sodomite +mountebank +sand leek +Barbados gooseberry +shuffler +sensory fiber +crab-eating opossum +etching +rare bird +scup +fagot +negro vine +hutment +droshky +nephoscope +lady chapel +cutty stool +release +vestiture +buff +standard +Tabernacle +vascular ray +snakewood +chlorobenzylidenemalononitrile +limnologist +pouched mole +microwave linear accelerator +Mastotermes darwiniensis +wind tee +orange bat +open sight +carpospore +rampant arch +sabbatia +cursor +post exchange +bellpull +center +cyclostyle +canonist +pygmy sperm whale +moa +king +pass-through +angioscope +marrow +hookup +revetment +acanthocephalan +good Samaritan +apatosaur +web spinner +dixie +ommastrephes +crossbench +candlewick +jack +light arm +caisson +kaki +quandong nut +Meuniere butter +coquilla nut +mast +black +twitterer +bluethroat pikeblenny +shielding +water-shield +urolith +elephant bird +clearway +dark lantern +schizopetalon +press +Nazi +sugarberry +Maltese +stevedore +hair shirt +party wall +gainer +blackheart +nothosaur +cavetto +evergreen bittersweet +chemical bomb +calpac +shingle +turnpike +animator +heaver +isoclinic line +death knell +liner +anathema +aerie +razorback +Ichyostega +pound net +French dressing +mottle +yard +string tie +bell seat +brattice +battering ram +sierra +pompon +vertex +stomach pump +electrolytic cell +escolar +telpher +roadhouse +cerecloth +tartare sauce +letter case +whale sucker +hob +teg +canvas +strickle +hectograph +Cartagena bark +mail car +acinus +freedom rider +bread sauce +picture window +Rhizopogon idahoensis +pinprick +mass spectrograph +ringer +devil's cigar +salad cream +marlberry +airbrake +Clark cell +yellow-throated marten +wire gauge +dinoceras +aba +harpoon log +plate rail +mustard plaster +coelophysis +journal box +puce +ballcock +quartering +izar +clinid +whirler +turnspit +deathbed +pottle +shot +doubler +Coryphaena equisetis +English sole +chicken feed +borrow pit +mylodontid +Chilean nut +Kundt's tube +ling +asthenosphere +reseau +death seat +immovable bandage +peppermint patty +lecturer +electron multiplier +bear claw +hyacinth +beaked salmon +toehold +scull +snowball +gangsaw +fiber +oxeye +lashing +Beckman thermometer +fence +cantilever +dinner theater +Reynard +jag +umbrella plant +camera lucida +beaver +slug +yellowfin croaker +Sibley tent +rat-tail file +anchovy pear +soldier +cackler +chaise +Pitot-static tube +minniebush +Episcopalian +oleaster +ejaculator +wavy-leaved aster +knight +rack +real storage +magnetic mine +cocoa plum +vesiculovirus +birch leaf miner +water chevrotain +rudapithecus +torpedo tube +itch mite +warren +loft +washerman +terrace +nonstarter +shit +platform +caudex +ground control +Ostariophysi +slopshop +Peruvian cotton +crystal oscillator +plastic bomb +bar bit +watering cart +Asiatic sweetleaf +artificial joint +chariot +casern +charge-exchange accelerator +display adapter +hornpipe +honey bell +planula +Nephthytis afzelii +hame +ranter +trachodon +synchrocyclotron +splasher +heterotroph +Nicol prism +Himalayan rhubarb +headfast +put-put +bitter almond +parr +scantling +power breakfast +madder +Catalpa bignioides +rose of Jericho +spark chamber +rhizome +beard worm +supper club +negro peach +keratoscope +wain +apple aphid +planking +time-delay measuring instrument +sternpost +sicklepod +lake bed +gatherer +monotype +dead-man's float +poison gas +dicynodont +organism +cell +person +animal +plant +food +artifact +dressage +contact sport +outdoor sport +gymnastics +track and field +jumping +high jump +skiing +water sport +swimming +dive +floating +skin diving +rowing +boxing +sledding +tobogganing +wrestling +skating +ice skating +roller skating +racing +boat racing +riding +equestrian sport +cycling +blood sport +hunt +fishing +angling +casting +athletic game +outdoor game +golf +field game +field hockey +football +American football +ball game +baseball +court game +badminton +basketball +tennis +sport +Seder +scavenger +bottom-feeder +work animal +beast of burden +pack animal +domestic animal +marine animal +female +male +young +young mammal +pup +cub +lion cub +tiger cub +microorganism +arbovirus +herpes +herpes zoster +reovirus +moneran +cyanobacteria +enteric bacteria +actinomycete +streptomyces +diplococcus +parasite +ectoparasite +protoctist +protozoan +sarcodinian +ameba +ciliate +alga +brown algae +green algae +sporozoan +cypriniform fish +cyprinid +carp +domestic carp +shiner +catostomid +buffalo fish +cyprinodont +killifish +topminnow +squirrelfish +stickleback +pipefish +embryo +fetus +blastula +chordate +cephalochordate +tunicate +ascidian +vertebrate +aquatic vertebrate +jawless vertebrate +lamprey +hagfish +cartilaginous fish +holocephalan +chimaera +elasmobranch +shark +mackerel shark +mako +requiem shark +dogfish +smooth dogfish +spiny dogfish +smooth hammerhead +smalleye hammerhead +shovelhead +ray +sawfish +roughtail stingray +butterfly ray +eagle ray +manta +skate +bird +gamecock +night bird +ratite +passerine +oscine +accentor +lark +pipit +finch +canary +dark-eyed junco +New World sparrow +bunting +honeycreeper +sparrow +grosbeak +towhee +weaver +grassfinch +tyrannid +New World flycatcher +kingbird +pewee +cotinga +antbird +Old World flycatcher +thrush +nightingale +Old World chat +warbler +kinglet +Old World warbler +New World warbler +flycatching warbler +New World chat +yellowthroat +New World oriole +northern oriole +meadowlark +New World blackbird +grackle +Old World oriole +starling +myna +corvine bird +crow +Old World jay +common European jay +New World jay +blue jay +Canada jay +Rocky Mountain jay +nutcracker +European magpie +American magpie +Australian magpie +wren +marsh wren +thrasher +New Zealand wren +creeper +titmouse +black-capped chickadee +Carolina chickadee +swallow +martin +tanager +shrike +butcherbird +bush shrike +bowerbird +European water ouzel +American water ouzel +vireo +waxwing +bird of prey +hawk +black kite +swallow-tailed kite +white-tailed kite +harrier +falcon +peregrine +caracara +eagle +young bird +sea eagle +Aegypiidae +Old World vulture +griffon vulture +bearded vulture +Egyptian vulture +black vulture +New World vulture +buzzard +condor +Andean condor +California condor +black vulture +king vulture +owl +horned owl +scops owl +amphibian +salamander +newt +Pacific newt +ambystomid +climbing salamander +web-toed salamander +frog +true frog +true toad +spadefoot +tree toad +cricket frog +tongueless frog +reptile +anapsid +diapsid +chelonian +turtle +sea turtle +ridley +snapping turtle +musk turtle +diamondback terrapin +Western box turtle +tortoise +soft-shelled turtle +saurian +lizard +gecko +iguanid +spiny lizard +fence lizard +horned lizard +skink +teiid lizard +racerunner +plateau striped whiptail +Chihuahuan spotted whiptail +western whiptail +checkered whiptail +agamid +moloch +anguid lizard +venomous lizard +lacertid lizard +chameleon +monitor +crocodilian reptile +crocodile +alligator +caiman +armored dinosaur +ankylosaur +bone-headed dinosaur +ceratopsian +hadrosaur +saurischian +sauropod +theropod +ceratosaur +maniraptor +synapsid +pterosaur +ichthyosaur +snake +colubrid snake +smooth green snake +rough green snake +racer +blacksnake +whip-snake +rat snake +bull snake +common kingsnake +milk snake +common garter snake +ribbon snake +Western ribbon snake +common water snake +water moccasin +grass snake +viperine grass snake +sand snake +lyre snake +blind snake +indigo snake +constrictor +boa +python +elapid +coral snake +coral snake +cobra +mamba +black mamba +krait +viper +pit viper +rattlesnake +timber rattlesnake +arthropod +arachnid +false scorpion +whip-scorpion +spider +European wolf spider +acarine +hard tick +Ixodes dammini +Ixodes neotomae +Ixodes pacificus +Ixodes scapularis +sheep-tick +Ixodes persulcatus +Ixodes dentatus +Ixodes spinipalpis +wood tick +soft tick +mite +trombiculid +spider mite +house centipede +gallinaceous bird +domestic fowl +jungle fowl +chicken +cock +hen +turkey +grouse +European black grouse +Asian black grouse +blackcock +greyhen +red grouse +moorhen +greater prairie chicken +lesser prairie chicken +heath hen +guan +chachalaca +megapode +mallee fowl +phasianid +pheasant +bobwhite +northern bobwhite +Old World quail +migratory quail +peafowl +California quail +Hungarian partridge +red-legged partridge +Greek partridge +mountain quail +guinea fowl +columbiform bird +pigeon +dove +turtledove +domestic pigeon +homing pigeon +sandgrouse +parrot +cockatoo +lory +varied Lorikeet +rainbow lorikeet +parakeet +cuculiform bird +cuckoo +crow pheasant +coraciiform bird +roller +kingfisher +hoopoe +apodiform bird +swift +Archilochus colubris +thornbill +goatsucker +piciform bird +woodpecker +flicker +sapsucker +toucanet +trogon +quetzal +aquatic bird +waterfowl +anseriform bird +duck +teal +widgeon +sheldrake +goldeneye +scaup +wood duck +sea duck +scoter +merganser +gosling +gander +Chinese goose +greylag +blue goose +snow goose +brant +common brant goose +honker +barnacle goose +swan +tundra swan +screamer +crested screamer +mammal +prototherian +monotreme +marsupial +opossum +bandicoot +kangaroo +common wallaby +hare wallaby +nail-tailed wallaby +rock wallaby +pademelon +tree wallaby +rat kangaroo +phalanger +dasyurid marsupial +dasyure +placental +calf +buck +insectivore +mole +shrew mole +shrew +water shrew +tenrec +invertebrate +sponge +glass sponge +coelenterate +Chrysaora quinquecirrha +hydrozoan +siphonophore +anthozoan +actinia +coral +gorgonian +stony coral +ctenophore +worm +planarian +fluke +liver fluke +Fasciolopsis buski +schistosome +tapeworm +echinococcus +taenia +common roundworm +chicken roundworm +pinworm +eelworm +vinegar eel +trichina +hookworm +filaria +Guinea worm +annelid +oligochaete +polychaete +leech +mollusk +scaphopod +gastropod +abalone +scorpion shell +giant conch +edible snail +garden snail +brown snail +Helix hortensis +seasnail +neritid +limpet +Hermissenda crassicornis +cowrie +bivalve +clam +quahog +cockle +oyster +mussel +marine mussel +freshwater mussel +scallop +shipworm +cephalopod +octopod +decapod +squid +crustacean +malacostracan crustacean +decapod crustacean +crab +swimming crab +spider crab +lobster +true lobster +Old World crayfish +American crayfish +shrimp +prawn +krill +stomatopod +mantis shrimp +woodlouse +pill bug +sow bug +sea louse +amphipod +copepod +barnacle +wading bird +stork +ibis +common spoonbill +roseate spoonbill +heron +egret +night heron +American bittern +European bittern +least bittern +whooping crane +rail +crake +gallinule +purple gallinule +coot +great bustard +plain turkey +button quail +trumpeter +seabird +shorebird +plover +turnstone +sandpiper +yellowlegs +ruff +tattler +woodcock +snipe +greyback +red-breasted snipe +curlew +godwit +stilt +stilt +phalarope +courser +coastal diving bird +larid +gull +tern +jaeger +skua +auk +guillemot +murre +puffin +gaviiform seabird +podicipitiform seabird +grebe +pelecaniform seabird +white pelican +Old world white pelican +gannet +snakebird +sphenisciform seabird +penguin +pelagic bird +wandering albatross +black-footed albatross +petrel +shearwater +storm petrel +aquatic mammal +cetacean +whale +baleen whale +rorqual +toothed whale +beaked whale +dolphin +bottlenose dolphin +porpoise +sea cow +carnivore +pinniped mammal +seal +eared seal +fur seal +fur seal +South American sea lion +California sea lion +Australian sea lion +Steller sea lion +earless seal +walrus +canine +bitch +dog +cur +toy dog +toy spaniel +English toy spaniel +hunting dog +hound +coonhound +dachshund +foxhound +wolfhound +greyhound +terrier +bullterrier +rat terrier +Manchester terrier +fox terrier +wirehair +Welsh terrier +schnauzer +Skye terrier +sporting dog +retriever +pointer +setter +spaniel +springer spaniel +water spaniel +working dog +watchdog +shepherd dog +Belgian sheepdog +pinscher +Sennenhunde +mastiff +bulldog +guide dog +sled dog +liver-spotted dalmatian +spitz +griffon +corgi +poodle +wolf +coydog +wild dog +striped hyena +brown hyena +spotted hyena +aardwolf +fox +black fox +silver fox +blue fox +feline +cat +domestic cat +tom +blue point Siamese +wildcat +common lynx +Canada lynx +bobcat +spotted lynx +caracal +big cat +leopardess +panther +lioness +lionet +Bengal tiger +tigress +saber-toothed tiger +bear +Syrian bear +grizzly +Alaskan brown bear +cinnamon bear +viverrine +civet +Indian mongoose +ichneumon +slender-tailed meerkat +suricate +bat +fruit bat +carnivorous bat +leafnose bat +false vampire +vespertilian bat +long-eared bat +freetail +vampire bat +predator +game +game bird +fossorial mammal +tetrapod +insect +beetle +two-spotted ladybug +Mexican bean beetle +Hippodamia convergens +vedalia +bombardier beetle +calosoma +searcher +firefly +sawyer +pine sawyer +flea beetle +Colorado potato beetle +carpet beetle +clerid beetle +lamellicorn beetle +scarabaeid beetle +scarab +tumblebug +dorbeetle +June beetle +melolonthid beetle +elaterid beetle +snout beetle +boll weevil +blister beetle +bark beetle +darkling beetle +flour beetle +seed beetle +pea weevil +bean weevil +rice weevil +louse +flea +dipterous insect +gall midge +housefly +tsetse fly +blowfly +bluebottle +greenbottle +flesh fly +tachina fly +gadfly +botfly +human botfly +sheep botfly +warble fly +horsefly +bee fly +fruit fly +louse fly +horn fly +mosquito +gnat +fungus gnat +hymenopterous insect +drone +worker +honeybee +Africanized bee +black bee +Carniolan bee +Italian bee +carpenter bee +bumblebee +cuckoo-bumblebee +andrena +Nomia melanderi +leaf-cutting bee +mason bee +potter bee +wasp +vespid +paper wasp +hornet +sphecoid wasp +digger wasp +chalcid fly +sawfly +pharaoh ant +little black ant +army ant +carpenter ant +fire ant +wood ant +slave ant +Formica fusca +slave-making ant +sanguinary ant +bulldog ant +Amazon ant +termite +dry-wood termite +orthopterous insect +short-horned grasshopper +locust +migratory locust +migratory grasshopper +long-horned grasshopper +katydid +mormon cricket +sand cricket +mole cricket +European house cricket +field cricket +tree cricket +snowy tree cricket +phasmid +diapheromera +oriental cockroach +American cockroach +Australian cockroach +German cockroach +giant cockroach +praying mantis +hemipterous insect +leaf bug +mirid bug +lygus bug +lygaeid +coreid bug +heteropterous insect +water bug +water strider +assassin bug +homopterous insect +whitefly +sweet-potato whitefly +coccid insect +scale insect +soft scale +armored scale +mealybug +plant louse +aphid +greenfly +woolly aphid +adelgid +dog-day cicada +seventeen-year locust +spittle insect +plant hopper +psocopterous insect +psocid +booklouse +ephemerid +neuropteron +green lacewing +brown lacewing +odonate +trichopterous insect +caseworm +thysanuran insect +bristletail +thysanopter +thrips +earwig +lepidopterous insect +butterfly +nymphalid +fritillary +emperor butterfly +danaid +pierid +small white +large white +southern cabbage butterfly +blue +copper +American copper +hairstreak +Strymon melinus +moth +tortricid +lymantriid +geometrid +cankerworm +pyralid +tineoid +tineid +clothes moth +gelechiid +grain moth +noctuid moth +cutworm +underwing +hawkmoth +bombycid +saturniid +giant silkworm moth +silkworm +arctiid +lasiocampid +tent caterpillar +webworm +webworm moth +caterpillar +bollworm +woolly bear +larva +grub +pupa +queen +echinoderm +basket star +edible sea urchin +sand dollar +heart urchin +crinoid +trepang +lagomorph +leporid +rabbit +eastern cottontail +swamp rabbit +marsh hare +leveret +European hare +jackrabbit +white-tailed jackrabbit +blacktail jackrabbit +polar hare +snowshoe hare +pika +rodent +mouse +rat +pocket rat +field mouse +brown rat +jerboa rat +water rat +New World mouse +wood mouse +wood rat +vole +packrat +Eurasian hamster +golden hamster +gerbil +lemming +pied lemming +Old World porcupine +brush-tailed porcupine +long-tailed porcupine +New World porcupine +Canada porcupine +pocket mouse +kangaroo rat +jumping mouse +jerboa +dormouse +gopher +squirrel +tree squirrel +ground squirrel +prairie dog +American flying squirrel +groundhog +hoary marmot +yellowbelly marmot +Old World beaver +New World beaver +cavy +naked mole rat +ungulate +hyrax +odd-toed ungulate +equine +horse +foal +colt +male horse +stallion +mare +saddle horse +warhorse +pony +mustang +bronco +wild horse +pony +racehorse +racer +harness horse +workhorse +draft horse +trotting horse +ass +domestic ass +wild ass +onager +common zebra +mountain zebra +grevy's zebra +rhinoceros +tapir +even-toed ungulate +swine +piglet +porker +peccary +ruminant +bovid +bovine +ox +cattle +bull +cow +beef +Brahman +dairy cattle +Old World buffalo +Indian buffalo +carabao +Asian wild ox +American bison +wisent +sheep +lamb +domestic sheep +wild sheep +mountain sheep +goat +domestic goat +wild goat +goat antelope +antelope +Thomson's gazelle +Gazella subgutturosa +springbok +kudu +harnessed antelope +eland +waterbuck +oryx +deer +stag +red deer +mule deer +roe deer +caribou +chevrotain +camel +domestic llama +guanaco +alpaca +giraffe +musteline mammal +ermine +stoat +New World least weasel +Old World least weasel +longtail weasel +American mink +ferret +muishond +snake muishond +striped muishond +river otter +Eurasian otter +striped skunk +hooded skunk +hog-nosed skunk +spotted skunk +American badger +Eurasian badger +ferret badger +hog badger +marten +pachyderm +edentate +peba +apar +tatouay +peludo +giant armadillo +pichiciago +sloth +anteater +primate +ape +anthropoid ape +hominoid +hominid +homo +Homo erectus +Homo sapiens +australopithecine +great ape +western lowland gorilla +eastern lowland gorilla +mountain gorilla +silverback +western chimpanzee +eastern chimpanzee +central chimpanzee +pygmy chimpanzee +lesser ape +monkey +Old World monkey +talapoin +grivet +vervet +green monkey +chacma +mandrill +drill +rhesus +bonnet macaque +Barbary ape +crab-eating macaque +entellus +guereza +New World monkey +true marmoset +pygmy marmoset +tamarin +silky tamarin +pinche +lemur +tarsier +flying lemur +proboscidean +elephant +mammoth +procyonid +raccoon +fish +food fish +young fish +crossopterygian +lungfish +catfish +silurid +bullhead +channel catfish +gadoid +cod +hake +elver +common eel +tuna +moray +conger +teleost fish +clupeid fish +shad +herring +sardine +pilchard +anchovy +salmonid +salmon +Atlantic salmon +trout +brown trout +char +whitefish +smelt +tarpon +ribbonfish +toadfish +needlefish +flying fish +spiny-finned fish +percoid fish +perch +pike-perch +walleye +robalo +pike +pickerel +sunfish +crappie +freshwater bream +black bass +bass +serranid fish +grouper +hind +surfperch +cardinalfish +remora +carangid fish +jack +moonfish +pompano +scad +dolphinfish +characin +cichlid +snapper +grunt +sparid +sea bream +porgy +sciaenid fish +croaker +whiting +sea trout +mullet +goatfish +mullet +silversides +barracuda +sea chub +butterfly fish +damselfish +clown anemone fish +wrasse +blenny +pikeblenny +gunnel +goby +gempylid +scombroid +mackerel +Spanish mackerel +tuna +bonito +sailfish +billfish +marlin +tripletail +mojarra +ganoid +Pacific sturgeon +beluga +scorpaenoid +scorpaenid +scorpionfish +rockfish +lumpfish +greenling +gurnard +sea robin +plectognath +triggerfish +filefish +boxfish +spiny puffer +ocean sunfish +flatfish +righteye flounder +lefteye flounder +whiff +sole +abbey +abbey +abrader +accelerator +accessory +accommodation +acoustic device +acoustic modem +acrylic +action +actuator +adhesive bandage +adjustable wrench +aeolian harp +aerosol +after-shave +airbus +aircraft +airfield +airfoil +air gun +airplane +air pump +air-to-air missile +air-to-ground missile +alarm +alb +alcazar +Allen screw +alms dish +altimeter +Amati +ammeter +ammunition +amplifier +analog computer +analytical balance +anchor +anchor chain +aneroid barometer +angledozer +anklet +antenna +anteroom +antiaircraft +antiballistic missile +apartment +apartment building +aperture +apparatus +apparel +appliance +appliance +applicator +aquarium +arbor +arcade +arch +arc lamp +area +argyle +arm +armament +armature +armchair +armoire +armor +armored vehicle +armor plate +armrest +array +arrow +artificial heart +artillery +assembly +assembly plant +astrodome +astronomical telescope +athletic sock +atom bomb +atomic clock +atomizer +attachment +attack submarine +attire +audiocassette +audio system +audiotape +auditorium +autoclave +autoinjector +autoloader +automat +automat +automatic firearm +automatic rifle +automaton +auxiliary research submarine +awl +ax +axis +axle +axletree +baby bed +baby buggy +baby grand +back +background +backseat +badminton equipment +badminton racket +bag +bag +bag +baggage +bagpipe +bait +balance +balcony +balcony +bale +ball +ball gown +ballistic missile +ballistic pendulum +ball-peen hammer +ballroom +band +bandage +bandanna +banderilla +bar +bar +barbed wire +barge +barge pole +barn door +barograph +barrack +barrage balloon +barrel knot +barrel vault +barrier +barroom +base +base +baseball equipment +basilica +basin +basket +basketball equipment +bass +bass drum +bass horn +bastion +bat +bathhouse +battery +battle-ax +battle dress +battleship +bay rum +bay window +beading plane +beam +beam balance +bearing +beater +beating-reed instrument +bed +bed +bedclothes +bedroom +bedroom furniture +bedspread +bedspring +beehive +beer barrel +bell +bell push +bell tower +belt +belt buckle +bench +berlin +berth +besom +bevel gear +bicycle +bicycle chain +bier +billiard ball +bin +binding +bin liner +binocular microscope +bioscope +birchbark canoe +bird shot +bistro +bit +bit +black tie +blade +blade +blanket +blimp +blind +block +block plane +blouse +blower +blowtorch +bludgeon +boarding +boarding house +boardroom +boat +bobbin +body +body armor +body lotion +boiler +bolt +bolt +bomb +bomber +bongo +boom +boom +boomerang +boot +booth +booth +bore bit +Boston rocker +bota +bottle +bottle opener +bow +bow +bowed stringed instrument +bowl +bowl +bowline +bowling equipment +bowling pin +bowsprit +box +box +boxcar +boxing equipment +brace +brace +bracelet +bracket +brake +brake system +brass +brasserie +brazier +breechcloth +breeches +brewpub +brick +bricklayer's hammer +brickwork +bridal gown +bridge +briefcase +brigandine +brilliant pebble +brim +broad arrow +broadax +broad hatchet +broadsword +brush +bubble jet printer +buffer +buffet +building +building complex +bulldozer +bullet +bullhorn +bullnose +bundle +bunker +burial chamber +burner +bus +business suit +butt joint +button +buttress +butt shaft +buzz bomb +cabaret +caber +cabin +cabin +cabinet +cabinet +cabin liner +cable +cable +cafe +cafeteria +cafeteria tray +caff +cage +calculator +caliper +calorimeter +camera +camera lens +camera tripod +camp +camp +camp chair +camper +can +canal +candelabrum +candlestick +cane +cannikin +cannon +cannon +cannonball +canopy +canteen +canteen +canvas +canvas tent +cap +cap +cap +capacitor +caparison +cape +cap screw +capsule +car +car +carbine +carbon arc lamp +card index +cardioid microphone +car door +cargo liner +cargo ship +carillon +carpenter's hammer +carpenter's level +carpenter's mallet +carpenter's rule +carpet tack +carriage +carriage +carriage bolt +carrick bend +carrier +car seat +cart +cartridge +cartridge belt +cartridge holder +case +case +cashbox +casque +casserole +cassock +catch +catcher's mask +cathedra +cathedral +cathedral +catheter +cathode +cathode-ray tube +cat's-paw +cattle car +cautery +cavalry sword +cedar chest +cell +cell +cellblock +center +centrifuge +ceramic +ceramic ware +chain tongs +chair +chair of state +chalk +chamfer plane +chandlery +chapel +character printer +chassis +chasuble +chatelaine +checker +cheeseboard +chemical reactor +chessman +chest of drawers +child's room +china +chip +chip +chisel +choke +chokey +chordophone +chronoscope +chuck +church key +cigar lighter +circle +circuit +circuit board +circular plane +circular saw +cistern +civilian clothing +clamp +clamshell +clarinet +classroom +clavier +cleaning implement +cleaning pad +clean room +clinic +clip +cloak +clock +closed circuit +closed-circuit television +closet +cloth covering +clothes closet +clothes dryer +clothes hamper +clothes tree +clothing +clothing store +clout nail +clove hitch +clutch +coach +coal car +coal shovel +coat +coat closet +coating +coating +coat of paint +coaxial cable +cocked hat +coffee cup +coffee maker +coffer +coffin +coil +colander +collider +cologne +colonnade +color television +Colt +column +column +comb +comb +combination plane +combine +commissary +commodity +communication system +commutator +compact disk +compartment +compass +compass card +compound lens +compound lever +compressor +computer +computer circuit +computer network +computer screen +computer system +concentration camp +concert grand +concertina +condenser +condenser +condenser microphone +conductor +connecting rod +connection +conservatory +conservatory +contact +container +contrabassoon +control +control panel +control system +convent +converging lens +converter +convertible +conveyance +cooker +cooking utensil +cooler +cooling system +cord +cord +cordage +corner +correctional institution +corset +cosmetic +costume +costume +cotter +cotton +counter +counter +counter +counter tube +country house +coupling +court +court +coverall +covering +cowbarn +craft +cravat +crazy quilt +cream +cream pitcher +crematory +crepe +crib +cricket equipment +croquet equipment +crossbar +crossbow +crosspiece +crown jewels +cruiser +cruiser +cruise ship +crystal microphone +cudgel +cuff +cultivator +cup +cupboard +cupola +curb roof +curtain +cutout +cutter +cutting implement +cybercafe +cyclotron +cylinder +cymbal +dado plane +dagger +damper +dart +data converter +data input device +davenport +davenport +davit +dead axle +deck +deck +deck chair +deep-freeze +defensive structure +delay line +delicatessen +dental appliance +denture +depilatory +depressor +depth finder +derrick +destroyer +detector +detector +detonating fuse +detonator +developer +device +dial +dialyzer +diathermy machine +diesel locomotive +digital camera +digital computer +digital display +diner +dinghy +dining car +dining-hall +dining room +dining-room furniture +dining-room table +dinner dress +dinner pail +dinner table +diode +dip +diplomatic building +dipper +DIP switch +directional antenna +directional microphone +direction finder +disguise +dish +dish +disk +dispenser +display +display panel +distillery +ditch +ditch spade +dive bomber +doll +dolmen +domino +door +doorbell +doorlock +doornail +dormer window +dormitory +dot matrix printer +double-breasted suit +double-reed instrument +douche +dovecote +dovetail plane +downstage +drafting instrument +Dragunov +drawstring bag +dray +dredging bucket +dress +dress blues +dressing +dress uniform +drill +electric drill +drill rig +drinking fountain +drinking vessel +drip mat +drip pot +drive +drive +drogue +drogue parachute +drop-leaf table +dry battery +dry dock +dryer +dry masonry +dry wall +dugout canoe +dumdum +dumpcart +dune buggy +dungeon +duplicator +dustmop +dwelling +earphone +earthenware +easel +easy chair +edge tool +eiderdown +elastic bandage +electrical converter +electrical device +electric bell +electric frying pan +electric furnace +electric heater +electric lamp +electric motor +electric refrigerator +electro-acoustic transducer +electrode +electromagnet +electronic balance +electronic device +electronic equipment +electronic instrument +electronic voltmeter +electron microscope +electrostatic generator +electrostatic printer +elevator +embankment +embellishment +enamel +enamelware +enclosure +endoscope +engine +engine +ensemble +entrenching tool +epidiascope +equipment +eraser +escutcheon +espadrille +espresso shop +establishment +estaminet +exercise device +exhaust fan +exhibition hall +Exocet +expansion bolt +explosive device +external-combustion engine +extractor +fabric +face mask +face veil +facing +factory +fairlead +false face +fan +farm building +farm machine +fastener +fatigues +faucet +feedback circuit +fence +fencing sword +fender +ferry +fetoscope +field-sequential color television +fife +fifth wheel +fighter +figure eight +file +file server +filling +film +film +filter +filter +finery +finisher +fipple flute +fire +firearm +fire iron +fireplace +firkin +fisherman's bend +fisherman's knot +fisherman's lure +fishing boat +fishing rod +fishnet +flag +flageolet +flambeau +flannelette +flap +flashlight +flask +flatcar +flat tip screwdriver +fleet ballistic missile submarine +flight simulator +flip-flop +floating dock +floor +floor +floor cover +fly +flywheel +fob +foghorn +folder +food hamper +footbath +footbridge +foothold +foot rule +footwear +footwear +forceps +fore-and-aft sail +foremast +fore plane +fore-topmast +fork +formalwear +fortification +fortress +foundation garment +foundry +fragmentation bomb +framework +free-reed instrument +freight train +French door +friary +friction clutch +frigate +frill +frock coat +front projector +fruit machine +full-dress uniform +full metal jacket +funny wagon +fur hat +furnace +furnishing +furniture +fuse +gable +gable roof +gaff +galleon +gallery +galley +galley +gallows +galvanometer +gambling house +game +game equipment +gamp +garage +Garand rifle +garden +garden spade +garden tool +garment +gas burner +gas-discharge tube +gasket +gasoline engine +gate +gatehouse +gatepost +gathered skirt +gauge +gauze +gauze +gavel +gear +gear +gear +gearing +general-purpose bomb +generator +generator +Geneva gown +geodesic dome +girder +glass +glider +glove +glyptic art +goal +golf club +golf equipment +Gordian knot +Gothic arch +government building +government office +gown +gramophone +granary +granny knot +grapnel +grapnel +grate +graver +greasy spoon +greatcoat +great hall +greengrocery +grenade +grillroom +groined vault +Guarnerius +guidance system +guided missile +guildhall +guitar +guitar pick +gun +gun carriage +gunlock +gunsight +gun trigger +gurney +gymnastic apparatus +gym shoe +gypsy cab +habergeon +habit +hairdressing +hairpiece +hairpin +half hatchet +half hitch +hall +hall +hammer +hand +handbell +handbow +handcart +hand glass +handloom +hand lotion +hand mower +handsaw +hand shovel +hand tool +handwear +handwheel +hanger +hank +harpsichord +harrow +hash house +hat +hatch +hauberk +hawser bend +hazard +head +head +head covering +headdress +header +headgear +headlight +headsail +headscarf +health spa +heat engine +heater +heat lamp +heat-seeking missile +heavier-than-air craft +heckelphone +hedge +helicopter +helm +helmet +helmet +heraldry +high altar +high-angle gun +high gear +high table +hinge +hip boot +hitch +hoe +hogshead +hoist +holder +holding device +home appliance +homespun +hood +hood +hood +hook +Hoover +hope chest +horn +horn button +horse +horsecar +horse-drawn vehicle +horsehair wig +hosiery +hospital +hospital room +hostel +hot-air balloon +hotel +hotel room +hot tub +house +house +housing +hovel +huarache +humeral veil +hut +hutch +hydraulic brake +hydraulic system +hydroelectric turbine +hydrofoil +hydrometer +hygrometer +hypermarket +hypodermic syringe +ice machine +ice rink +ice skate +icetray +ignition switch +impact printer +implant +implement +imprint +improvised explosive device +inclined plane +indicator +induction coil +ink-jet printer +inkstand +institution +instrument +instrument of punishment +instrument of torture +interceptor +interchange +intercommunication system +intercontinental ballistic missile +interface +interior door +internal-combustion engine +ionization chamber +video iPod +iron +jack +jack +jacket +jacket +jack plane +jail +jamb +jar +jeroboam +jet +jet engine +jewelled headdress +jib +jibboom +jiggermast +joint +jointer +joist +jolly boat +jug +jumper +jumper cable +junction +junction +jury mast +kayak +keel +keg +kerchief +kettle +key +key +keyboard +keyboard instrument +khakis +kiln +kinescope +kingbolt +kirk +kit +kit +kitbag +kitchen +kitchen appliance +kitchen utensil +kite balloon +knee-high +knife +knife +knit +knob +lace +lacquer +ladder truck +lag screw +lamasery +laminate +lamination +lamp +lamp +landing gear +land mine +lantern +lapel +lathe +lattice +launcher +lead-acid battery +leather strip +Leclanche cell +leg +legging +lens +lens implant +level +lever +Levi's +lid +life buoy +life jacket +life preserver +lifting device +ligament +light +light-emitting diode +lighter-than-air craft +lighting +light microscope +linear accelerator +line printer +lingerie +lining +liquid crystal display +lister +living quarters +living room +local area network +lock +locomotive +lodge +lodging house +loft +loft +longbow +lookout +loom +loop knot +lota +lounge +loungewear +love knot +lunchroom +luxury liner +lyre +machine +machine +machine bolt +machine gun +machinery +machine screw +machine tool +magic lantern +magnet +magnetic disk +magnetic recorder +magnetic tape +magnifier +magnum +magnus hitch +mailer +mainframe +mainmast +main-topmast +main yard +makeup +mallet +mallet +mallet +mandolin +manger +man-of-war +manometer +MANPAD +mansard +mansion +marina +marker +marketplace +maser +mask +masonry +mass spectrometer +mast +mast +mat +mat +match +match +match plane +material +materiel +Matthew Walker +maul +measure +measuring instrument +measuring stick +mechanical device +mechanical system +mechanism +medical building +medical instrument +memorial +memory +memory chip +memory device +menhir +man's clothing +mercantile establishment +mercury barometer +mercury thermometer +mercury-vapor lamp +mess +metal screw +meteorological balloon +meter +meterstick +microbalance +microfilm +microscope +military hospital +military quarters +military vehicle +mill +milldam +millinery +mine +minibike +mink +minster +Minuteman +mirror +mixer +mizzenmast +module +mold +moldboard plow +monitor +monitor +morgue +mortise joint +motion-picture camera +motion-picture film +motor +motorboat +motorcycle +motor hotel +motor vehicle +mound +mount +mouse button +movie projector +moving-coil galvanometer +mug +multiplex +multiplexer +musette pipe +mushroom anchor +musical instrument +musket +musket ball +muslin +muzzle loader +narrowbody aircraft +nautilus +navigational system +naval equipment +naval gun +naval radar +naval weaponry +navigational instrument +nebuchadnezzar +neckline +neckpiece +necktie +neckwear +needle +needlework +negligee +net +net +net +net +network +network +night bell +nightwear +noisemaker +nonsmoker +non-volatile storage +nose flute +nuclear reactor +nuclear weapon +nursery +oar +oblique bandage +oboe da caccia +oboe d'amore +obstacle +office +office furniture +oil lamp +oil paint +oil tanker +olive drab +omnidirectional antenna +onion dome +open-air market +open-end wrench +opener +openside plane +ophthalmoscope +optical device +optical disk +optical instrument +optical telescope +organ pipe +outbuilding +outerwear +outfit +outrigger canoe +outside mirror +oven +overgarment +overhand knot +overhang +overhead projector +overnighter +overshoe +oxford +package +packaging +packing box +paddle +paddle steamer +page printer +paint +pallium +pan +pan +panic button +panopticon +panopticon +pantechnicon +pantry +pants suit +panzer +paper chain +paper fastener +parabolic reflector +parapet +parasail +parka +parsonage +particle detector +partition +passenger ship +passenger train +passenger van +passive matrix display +passkey +patch +patchouli +patchwork +patina +patisserie +pavis +peavey +pedal +pedestal table +pedestrian crossing +pedicab +peg +pen +penal institution +pencil +pendulum +pendulum clock +percolator +percussion instrument +perfumery +peripheral +periwig +personal computer +petticoat +Phillips screw +Phillips screwdriver +phonograph record +photographic equipment +photographic paper +photometer +physical pendulum +piano +piccolo +pick +pick +pickle barrel +piece of cloth +pile +pillow lace +pilothouse +pin +pincer +pinstripe +pipe +pipet +pipe wrench +pistol +pivot +place of business +place of worship +planetarium +planner +plant +planter +plasterboard +plastic laminate +plastic wrap +plastron +plate +platform +platform +platform rocker +plating +pleat +plethysmograph +plexor +pliers +plug +plug +pneumatic drill +pocket +pocket-handkerchief +pocketknife +pointed arch +polyester +polygraph +pomade +pontifical +pool ball +poorhouse +porcelain +porch +portable computer +portico +post +posthole digger +pot +potential divider +potpourri +pottery +pouch +poultice +powder +powder keg +power brake +power mower +power saw +power shovel +power tool +press +press +pressure dome +pressure gauge +pressure suit +printed circuit +printer +prison camp +prod +prolonge knot +prompter +prong +propeller +propeller plane +prosthesis +protective covering +protective garment +pruning saw +pruning shears +public house +public toilet +public transport +pull +pull chain +pulley +Pullman +pullover +pulse counter +pump +pump +pump house +punch +punch press +purifier +push broom +push button +pusher +puzzle +pyrometer +pyx +QWERTY keyboard +racing boat +rack +rack +radar +radiogram +radio interferometer +radio link +radiometer +radio receiver +radiotelegraph +radiotelephone +radio transmitter +raft +rail +rail fence +railing +raincoat +rake +ramp +rampart +random-access memory +rayon +razor +reaction-propulsion engine +reactor +reading lamp +reading room +read-only memory +rearview mirror +receiver +receptacle +reception room +recess +reconnaissance plane +recorder +recording +record player +recreation room +recycling bin +reed stop +reef knot +refectory table +refinery +reflecting telescope +reflector +reformatory +refracting telescope +refrigerator car +refuge +regalia +regimentals +regulator +rein +religious residence +removable disk +repair shop +repeating firearm +reproducer +rescue equipment +reservoir +reset button +residence +resistor +resonator +respirator +restraint +retort +rheostat +rib +ribbed vault +riddle +ride +riding boot +riding mower +rifle ball +rig +rink +river boat +road +roadway +robe +rocket +rocket +rod +roller +roller +in-line skate +roller blind +roller coaster +rolling hitch +Rolodex +Roman building +roof +roof +room +roost +rope +rose water +rotary engine +rotating mechanism +rotating shaft +rotisserie +rotor +round arch +router plane +row house +royal mast +rubber bullet +rug +rushlight +sable +sable coat +sack +sackbut +sacking +saddle +safe +safety belt +safety curtain +safety fuse +safety match +sail +sailboat +sailing vessel +salver +sandglass +sash +satellite +satellite television +saucepan +savings bank +saw +sawhorse +scale +scarf +school +scientific instrument +scissors +scoop +scratcher +screen +screen +screen +screw eye +scrub plane +scuffer +sculpture +sea boat +sea chest +seam +seaplane +seat +seat +second hand +secretary +security system +seeker +selector +self-propelled vehicle +semiautomatic firearm +semiautomatic pistol +semiconductor device +serger +serpent +serving cart +serving dish +set +setscrew +setscrew +sewing needle +sextant +shackle +shade +shaft +shag rug +shaker +shaper +shaping tool +sharpener +shaving cream +shaving foam +shawl +shawm +shears +sheath +shed +sheepshank +sheet bend +shelf +shell +shell +shell +shellac +shelter +shelter +shelter +shield +ship +shipboard system +shirt +shirtfront +shock absorber +shoe +shooting brake +shop +short pants +shotgun +shoulder holster +shrine +shutter +shuttle +sidewinder +sieve +sifter +sights +signaling device +signboard +silk +simulator +single bed +single-breasted suit +single-reed instrument +sitz bath +six-pack +skate +skein +skeleton +skewer +skidder +skid lid +skiff +ski pole +skirt +ski tow +skullcap +slack suit +slat +sled +sleeper +sleeping car +sleeve +sleeve +slide projector +slipknot +slipper +sloop +slop pail +slot machine +small boat +smart bomb +smoker +smooth plane +snack bar +snap-brim hat +snare drum +sniper rifle +Sno-cat +soapbox +socle +sofa +sonograph +sorter +sound recording +soup ladle +source of illumination +soutane +spacecraft +spade +spar +spatula +spear +spear +spectacles +spectrograph +spectroscope +speedometer +spider +spike +spike +spinet +spinning machine +spiral ratchet screwdriver +spiral spring +spit +spokeshave +sponge mop +spoon +sports equipment +sports implement +sportswear +spot +spring +spring balance +springboard +sprit +square +square knot +squash racket +squawk box +squeezer +squinch +stabilizer +stabilizer +stable gear +stadium +stall +stamp mill +stand +standard cell +staple +starter +state prison +station +statue +stay +steakhouse +stealth aircraft +stealth bomber +stealth fighter +steam bath +steamboat +steamer +steam iron +steam whistle +steel mill +steelyard +steeple +steering system +step +step-up transformer +stereo +stick +stick +still +stilt +Stinger +stock +stockcar +stock car +stocking +stonework +stool +stopper knot +storage battery +storage space +storeroom +stove +stove bolt +Stradavarius +straight chair +strap +strap +stringed instrument +strip +strongbox +stronghold +strongroom +structural member +structure +stylus +submachine gun +submersible +submersible +subwoofer +suction pump +suede cloth +sunbonnet +sunhat +supermarket +superstructure +supply chamber +support +support +support column +supporting structure +supporting tower +surface lift +surface-to-air missile +surgeon's knot +surgical instrument +surgical knife +surplice +surveillance system +surveying instrument +surveyor's level +swamp buggy +sweater +swimsuit +sword +synchrotron +system +tabi +table +table +table knife +tableware +tabor +tachometer +tack +tack hammer +talaria +tambour +tambourine +tampon +tank +tank car +tannoy +tape +tape deck +tape recorder +target +tavern +tea chest +teaching aid +tea gown +teashop +teaspoon +tea-strainer +tea tray +telecommunication system +telephone +telephone line +telephone receiver +telephone system +telephone wire +telescope +television antenna +television camera +television equipment +television monitor +temple +temple +tender +tennis racket +tenor drum +tenoroon +tenpenny nail +tent +tenterhook +terminal +terminal +test rocket +tetraskelion +textile machine +textile mill +theater +theodolite +thermometer +thermostat +three-piece suit +three-way switch +thumbscrew +thumbtack +tights +tile +timber +timber hitch +timbrel +time-fuse +timepiece +timer +time-switch +tire chain +tithe barn +toecap +toga +toggle switch +toilet +toilet powder +toiletry +toilet water +token +tomograph +toner +tongs +tool +toolbox +tooth +toothbrush +top +top +topgallant +topmast +topsail +torpedo +torpedo boat +touch screen +towel +toweling +tower +toy box +track +tracked vehicle +trailer +trailer +train +trammel +transdermal patch +transformer +transistor +transmission +transmitter +transporter +trap +trapeze +travel iron +treasure chest +trellis +trench +trial balloon +triclinium +troop carrier +trough +trouser +trowel +truck +trunk +try square +tube +tuck shop +tun +tunic +turbine +Turkish towel +Turk's head +turner +turntable +turtleneck +tweed +tweeter +twenty-two +two-piece +typesetting machine +typewriter +ultraviolet lamp +undercarriage +undergarment +underpants +underwear +uneven parallel bars +uniform +university +uplift +urn +urn +utensil +vacuum flask +valve +van +van +varnish +vehicle +veranda +vertical file +vessel +vessel +vest +vibrator +vibrator +videocassette +video recording +vigil light +viol +vise +vivarium +voltaic cell +voltmeter +wagon +waist pack +walking stick +wall +wall +wall unit +ward +warehouse +warship +wash +washer +washtub +watch +watchtower +water-base paint +water butt +water cart +watercolor +water-cooled reactor +water gauge +water ski +waterwheel +weapon +weaponry +weatherglass +weathervane +web +wedge +wedge +weighbridge +weight +weir +weld +well +whaler +wheel +wheelchair +wheeled vehicle +wheelwork +whetstone +whip +whisk +whispering gallery +white goods +whorehouse +wicker basket +widebody aircraft +winch +Winchester +wind instrument +window +window +window blind +window envelope +Windsor knot +wine bucket +wine cask +wineglass +wire +wire +wire matrix printer +wiring +woman's clothing +wood +woodenware +woodscrew +woodwind +woofer +workbasket +workbench +work-clothing +worktable +workwear +wrapping +wrench +writing desk +writing implement +X-ray film +X-ray machine +yacht chair +yard +yard +yardstick +yoke +zither +zoot suit +grain +light +colorlessness +chromatic color +black +gray +dark red +orange +yellow +green +blue +purple +reddish purple +pink +light brown +reddish brown +complexion +skin +epidermal cell +columnar cell +macule +specimen +milk +embryonic cell +leukocyte +neutrophil +astrocyte +exoskeleton +medium +film +press +print media +storage medium +journalism +photojournalism +newspaper +telecommunication +telephone +call +long distance +wireless +broadcasting +television +reception +chat room +portal site +wordbook +album +concept album +magazine +movie +sign +comestible +course +dainty +dish +fare +diet +dietary supplement +liquid diet +reducing diet +vegetarianism +ration +field ration +foodstuff +starches +concentrate +meal +roughage +flour +wheat flour +nutriment +commissariat +canned food +canned meat +meal +breakfast +lunch +dinner +supper +buffet +picnic +cookout +bite +entree +side dish +casserole +chicken casserole +appetizer +cocktail +hors d'oeuvre +relish +dip +soup +madrilene +broth +broth +chowder +clam chowder +stew +goulash +fish stew +fricassee +ragout +ready-mix +powdered sugar +granulated sugar +brown sugar +sweet +confiture +candy +hard candy +patty +brittle +chewing gum +candied fruit +candied citrus peel +fudge +gumdrop +mint +kiss +lozenge +taffy +dessert +dumpling +frozen dessert +mousse +mousse +whip +pudding +pudding +tipsy cake +ice +chocolate ice cream +Neapolitan ice cream +peach ice cream +strawberry ice cream +tutti-frutti +vanilla ice cream +split +pudding +custard +pastry +turnover +puff paste +phyllo +fish cake +conserve +jam +jelly +apple jelly +marmalade +gelatin +gelatin dessert +patty +stuffing +bread +breadstick +bun +cracker +dark bread +flatbread +loaf of bread +quick bread +rye bread +toast +white bread +French bread +cornbread +johnnycake +muffin +scone +onion roll +sweet roll +onion bagel +biscuit +baking-powder biscuit +soft pretzel +sandwich +hamburger +gruel +edible fruit +vegetable +crudites +legume +greens +solanaceous vegetable +root vegetable +potato +baked potato +sweet potato +snack food +corn chip +tortilla chip +cruciferous vegetable +cabbage +kale +red cabbage +savoy cabbage +squash +summer squash +yellow squash +winter squash +turban squash +gherkin +sprout +beet +pepper +sweet pepper +hot pepper +chili +jalapeno +onion +Spanish onion +salad green +lettuce +butterhead lettuce +bean +pea +green pea +common bean +fresh bean +green bean +shell bean +lima bean +soy +celery +chicory +coffee substitute +chicory escarole +corn +hominy +cress +tomato +cherry tomato +salsify +turnip +edible nut +apple +eating apple +Delicious +McIntosh +Pippin +cooking apple +berry +currant +citrus +temple orange +mandarin +bitter orange +sweet orange +Jaffa orange +navel orange +Valencia orange +lime +almond +plum +dried fruit +raisin +passion fruit +cocoa +melon +muskmelon +winter melon +cherry +sweet cherry +heart cherry +sour cherry +grape +fox grape +muscadine +slipskin grape +vinifera grape +Tokay +cherimoya +soursop +sweetsop +ilama +pond apple +olive +pear +edible seed +walnut +feed +fodder +oil cake +timothy +grain +barley +wheat +rice +mash +bird feed +petfood +salad +tossed salad +combination salad +pasta salad +fruit salad +ingredient +flavorer +condiment +herb +spice +cinnamon +pepper +garlic +mustard +sage +savory +curry +paprika +pickle +sweet pickle +vinegar +sauce +hot sauce +dressing +mayonnaise +cheese sauce +hot-fudge sauce +white sauce +spaghetti sauce +boiled egg +hard-boiled egg +Easter egg +omelet +firm omelet +souffle +dairy product +milk +milk +powdered milk +cream +butter +clarified butter +yogurt +curd +cheese +cream cheese +bleu +cheddar +Swiss cheese +spread +pate +sweetening +sugar +syrup +batter +bread dough +chicken and rice +pasta +Tetrazzini +chili dog +fondue +fondue +hash +kabob +seafood Newburg +meatball +pilaf +sausage pizza +pepperoni pizza +cheese pizza +anchovy pizza +Sicilian pizza +porridge +fish loaf +salmon loaf +scallopine +taco +beef burrito +quesadilla +tostada +beverage +concoction +mix +filling +potion +elixir +alcohol +brew +beer +lager +Weissbier +malt +ale +stout +mead +wine +white wine +sparkling wine +Burgundy +Beaujolais +Medoc +Pinot noir +Bordeaux +claret +Chianti +Cabernet +Merlot +dessert wine +Rhine wine +Rioja +Saint Emilion +zinfandel +table wine +vermouth +fortified wine +Madeira +liquor +brandy +gin +rum +whiskey +corn whiskey +Irish +Scotch +liqueur +coffee liqueur +orange liqueur +mixed drink +cocktail +highball +Bloody Mary +daiquiri +manhattan +martini +sling +sour +caffe latte +cider +sweet cider +juice +fruit juice +grape juice +orange juice +fruit drink +mulled wine +soft drink +cola +coffee +punch +champagne cup +claret cup +rickey +tea +tea +herb tea +tisane +black tea +green tea +water +drinking water +mineral water +vitamin pill +collection +suburb +residence +littoral +grassland +pasture +resort +field +air bubble +arroyo +ascent +atoll +bank +bank +bar +barrier reef +basin +beach +burrow +canyon +cave +continental glacier +crag +crater +dale +descent +draw +dune +geological formation +glacier +glen +gorge +gulch +gully +highland +hill +hillside +hole +hollow +iceberg +ice mass +ion +knoll +landfall +landfill +lather +ledge +lowland +meteorite +mountain +mull +natural depression +natural elevation +nullah +ocean floor +outcrop +plain +point +precipice +ravine +reef +ridge +ridge +rift valley +rock +sandbank +seaside +shiner +shore +slope +soapsuds +spume +tableland +tideland +volcanic crater +wadi +spiritual leader +adventurer +anomaly +benefactor +commoner +contestant +discussant +entertainer +female +finisher +inhabitant +native +juvenile +lover +male +mediator +national +peer +recipient +sensualist +traveler +unwelcome person +unskilled person +worker +wrongdoer +Black +White +Semite +white man +Mongol +Nahuatl +Caddo +Penutian +Teton +Taracahitian +Slav +Catholic +Altaic +Bornean +Canadian +Central American +Britisher +English person +Englishwoman +Ethiopian +Parisian +Greek +Italian +Japanese +Mexican +Nigerian +North American +Pakistani +South American Indian +Filipino +Polynesian +Scandinavian +South African +South American +Turki +American +New Yorker +abbess +abstainer +academic administrator +accomplice +acquaintance +acquirer +aerialist +actor +actor +addict +adjutant +admirer +adulterer +advertiser +advocate +analyst +ancestor +announcer +announcer +appointee +appreciator +appropriator +archbishop +architect +army engineer +army officer +arrival +articulator +asserter +assistant +associate +astronaut +athlete +attendant +aunt +authoritarian +authority +aviator +back +bad person +ballet dancer +bullfighter +baron +bartender +baseball coach +base runner +basketball player +believer +betrothed +bigot +big shot +biochemist +bisexual +boatman +bond servant +botanist +Boy Scout +buddy +campaigner +captain +card player +careerist +caretaker +cavalryman +celebrity +charmer +child +child +cipher +citizen +civil rights leader +cleaner +clergyman +cleric +clerk +climber +closer +clown +coach +cobbler +collaborator +college student +collegian +commanding officer +commissioned officer +commissioned military officer +commissioner +committee member +communist +compulsive +computer scientist +computer user +contractor +convict +copycat +counselor +craftsman +creditor +critic +curate +dancer +dancer +darling +date +daughter +dawdler +deacon +deaf person +debtor +deliveryman +descender +designated hitter +detective +detractor +director +disbeliever +dispatcher +distributor +doctor +domestic partner +draftsman +drinker +drinker +drug addict +drug user +drummer +drunkard +eager beaver +earner +eavesdropper +economist +editor +egotist +elder +elected official +emissary +employee +employer +endomorph +enemy +entrant +examiner +exhibitionist +fan +fancier +farmer +farmhand +fascist +father +female aristocrat +female offspring +female child +fielder +fireman +first baseman +first sergeant +flag officer +flatterer +foe +folk dancer +follower +football player +forefather +forger +founder +free agent +friar +monk +gambler +generator +geneticist +genitor +geologist +girl +godchild +godparent +golfer +grandma +grandmaster +grandparent +granter +great grandchild +great grandparent +grouch +guard +guest +guide +gymnast +Gypsy +hack +hairdresser +hater +headmaster +hearer +hedonist +heir +herder +homeless +horseman +host +host +hypocrite +important person +incumbent +infielder +informer +in-law +insurgent +investigator +investor +journalist +judge +juror +Counsel to the Crown +kinswoman +laborer +lama +landowner +lawgiver +lawman +lawyer +liberator +lieutenant +lineman +literate +litigant +Lord +failure +lowerclassman +lumberman +maid +maker +malcontent +martinet +master of ceremonies +masturbator +medical officer +medical practitioner +medical scientist +mender +meteorologist +middle-aged man +miler +military attache +military officer +military policeman +minister +minor leaguer +misfit +mixed-blood +model +moneymaker +mother +mourner +mover +musician +Muslimah +mystic +nanny +neonate +nephew +neutral +newcomer +newcomer +newspaper editor +niece +noncommissioned officer +nurse +observer +occultist +oldster +old woman +opportunist +orator +originator +outfielder +right fielder +right-handed pitcher +painter +panelist +pardoner +parodist +party +passenger +patient +patron +payer +peddler +percussionist +personal representative +personification +pervert +petitioner +Pharaoh +phonetician +physical therapist +physicist +pimp +pisser +pitcher +planner +player +poet +politician +practitioner +prayer +preserver +president +priest +princess +principal +proctor +programmer +promiser +propagandist +prosecutor +psychic +pusher +queen +queen +ranch hand +reader +recruit +recruiter +religious leader +repairman +reporter +representative +reprobate +rescuer +reservist +restrainer +retailer +retiree +revolutionist +rich person +civil authority +runner +running back +rustic +saboteur +sailor +salesman +salesperson +scalper +schemer +scholar +schoolchild +scientist +second baseman +secretary +seeker +selfish person +seller +serf +serviceman +settler +shrew +sibling +sick person +singer +sister +skeptic +skier +sleeper +slob +smith +snoop +social climber +socialist +social scientist +sociologist +soldier +son +songster +sorcerer +sovereign +speaker +specialist +spectator +stand-in +star +stepparent +stock trader +stranger +strategist +student +subordinate +suitor +superior +surgeon +sweetheart +sympathizer +tax assessor +taxonomist +teacher +television reporter +tenant +tenant +tennis player +testator +testee +theologian +therapist +thinker +thrower +toastmaster +trader +traffic cop +trainer +traitor +traveling salesman +tyrant +upstart +upstart +utility man +vacationer +vegetarian +vice president +victim +volunteer +votary +waiter +waitress +wanderer +wanton +washer +white supremacist +wife +winner +winner +woman +workman +worshiper +wright +writer +wilding +bryophyte +liverwort +pteridophyte +fern +fern ally +spore +spermatophyte +perennial +gymnosperm +ephedra +cycad +sago palm +zamia +pine +pinon +nut pine +white pine +yellow pine +larch +fir +silver fir +cedar +spruce +hemlock +douglas fir +cedar +cypress +arborvitae +araucaria +kauri pine +celery pine +yellowwood +gymnospermous yellowwood +yew +angiosperm +dicot +flower +wildflower +inflorescence +pistil +pericarp +oilseed +custard apple +barberry +allspice +laurel +anise tree +magnolia +moonseed +buttercup +aconite +baneberry +anemone +thimbleweed +columbine +clematis +delphinium +nigella +wax myrtle +zebrawood +legume +legume +darling pea +clover +acacia +wattle +albizzia +nitta tree +dogbane +allamanda +carissa +frangipani +rauwolfia +arum +alocasia +anthurium +caladium +monstera +nephthytis +arrow arum +calla lily +duckweed +watermeal +birthwort +sandwort +mouse-ear chickweed +pink +china pink +lychnis +silene +chickweed +fig marigold +amaranth +orach +saltbush +beet +sand verbena +four o'clock +echinocactus +prickly pear +pokeweed +portulaca +flame flower +caper +spiderflower +crucifer +cress +watercress +rock cress +cabbage +head cabbage +turnip plant +mustard +wallflower +woad +stock +radish plant +pennycress +poppy +prickly poppy +composite +compass plant +everlasting +achillea +ageratum +ragweed +ammobium +burdock +artemisia +mugwort +aster +wood aster +common daisy +bur marigold +calendula +thistle +carline thistle +catananche +centaury +knapweed +chrysanthemum +golden aster +goldenbush +plume thistle +woolly thistle +coreopsis +fleabane +woolly sunflower +cotton rose +gazania +African daisy +cudweed +gumweed +goldenbush +sneezeweed +sunflower +hawkweed +marsh elder +krigia +hawkbit +blazing star +rattlesnake root +daisybush +coneflower +coneflower +cutleaved coneflower +golden thistle +white-topped aster +goldenrod +sow thistle +marigold +dandelion +crownbeard +zinnia +achene +campanula +orchid +orchis +arethusa +helleborine +coral root +lady's slipper +large yellow lady's slipper +helleborine +fringed orchis +rein orchid +spider orchid +moth orchid +butterfly orchid +ladies' tresses +vanda +vanilla +yam +primrose +pimpernel +featherfoil +loosestrife +water pimpernel +gramineous plant +grass +wheatgrass +foxtail +broom grass +oat +brome +grama +reed grass +burgrass +crabgrass +lyme grass +wild rye +plume grass +rye grass +ricegrass +meadowgrass +millet +reed +sorghum +grain sorghum +cordgrass +cereal +wheat +corn +mealie +zoysia +bamboo +cotton grass +spike rush +pandanus +cattail +grain +kernel +gourd +gourd +squash +summer squash +marrow +winter squash +turban squash +bryony +sweet melon +luffa +lobelia +mallow +hollyhock +althea +poppy mallow +seashore mallow +globe mallow +tulipwood tree +sterculia +bottle-tree +screw tree +cacao +linden +herb +protea +banksia +grevillea +macadamia +casuarina +beefwood +heath +bearberry +huckleberry +kalmia +rhododendron +cranberry +blueberry +shortia +Australian heath +epacris +wintergreen +pipsissewa +beech +chestnut +tanbark oak +southern beech +New Zealand beech +oak +live oak +white oak +red oak +scrub oak +chestnut oak +birch +alder +hornbeam +hop hornbeam +hazelnut +centaury +gentian +fringed gentian +olive tree +fringe tree +ash +red ash +jasmine +privet +lilac +liquidambar +walnut +hickory +wing nut +loosestrife +myrtle +gum tree +eucalyptus +flooded gum +mallee +stringybark +tupelo +enchanter's nightshade +willowherb +fuchsia +evening primrose +daphne +canna +banana +ginger +begonia +tuberous begonia +poon +St John's wort +rockrose +dipterocarp +candlewood +reseda +viola +violet +nettle +cannabis +mulberry +fig tree +fig +elm +hackberry +iridaceous plant +bearded iris +beardless iris +crocus +amaryllis +blood lily +narcissus +daffodil +liliaceous plant +colicroot +alliaceous plant +kniphofia +poker plant +asphodel +mariposa +globe lily +camas +dogtooth violet +fritillary +tulip +star-of-Bethlehem +grape hyacinth +scilla +false asphodel +bog asphodel +hellebore +death camas +sarsaparilla +Solomon's-seal +bellwort +agave +sansevieria +cassia +locust tree +senna +angelim +milk vetch +wild indigo +pea tree +glory pea +rosewood +blackwood +tick trefoil +coral tree +vetchling +wild pea +lupine +medic +mucuna +locoweed +pole bean +pea +edible-pod pea +quira +hoary pea +bush pea +vetch +palm +sago palm +feather palm +fan palm +palmetto +areca +calamus +oil palm +raffia palm +lady palm +eriogonum +rhubarb +water plantain +waterweed +pondweed +rose +agrimonia +flowering quince +cotoneaster +avens +apple tree +wild apple +crab apple +Iowa crab +cinquefoil +plum +wild plum +bullace +apricot +cherry +wild cherry +sweet cherry +sour cherry +almond tree +almond +bird cherry +flowering cherry +chokecherry +fruit tree +bramble bush +raspberry +mountain ash +service tree +spirea +madderwort +coffee +cinchona +bedstraw +genipa +hamelia +honeysuckle +American fly honeysuckle +teasel +scabious +geranium +cranesbill +storksbill +incense tree +mahogany +silver ash +milkwort +citrus +orange +mandarin +lemon +kumquat +prickly ash +bitterwood tree +ailanthus +nasturtium +willow +osier +sallow +poplar +black poplar +cottonwood +aspen +soapberry +soapberry vine +harpullia +pachysandra +spindle tree +maple +box elder +holly +sumac +horse chestnut +persimmon +buckthorn +styrax +carnivorous plant +pitcher plant +sedum +philadelphus +saxifrage +astilbe +alumroot +miterwort +parnassia +currant +plane tree +phlox +acanthus +catalpa +anchusa +comfrey +convolvulus +bindweed +gloxinia +streptocarpus +waterleaf +nemophila +scorpionweed +giant hyssop +bugle +wood mint +calamint +coleus +dead nettle +origanum +horehound +monarda +savory +germander +thyme +blue curls +snapdragon +kitten-tails +Indian paintbrush +foxglove +toadflax +veronica +nightshade +thorn apple +matrimony vine +cupflower +petunia +salpiglossis +spurge +croton +cassava +slipper spurge +camellia +umbellifer +angelica +astrantia +caraway +fennel +parsnip +parsley +sanicle +dogwood +valerian +bristle fern +flowering fern +climbing fern +clover fern +adder's tongue +grape fern +ergot +sclerotinia +earthball +Podaxaceae +false truffle +rhizopus +slime mold +cellular slime mold +downy mildew +pythium +Sarcosomataceae +club fungus +lichen +lecanora +fungus +basidiomycete +mushroom +mushroom +mushroom +toadstool +horse mushroom +meadow mushroom +royal agaric +false deathcap +fly agaric +death cap +blushing mushroom +destroying angel +chanterelle +floccose chanterelle +pig's ears +cinnabar chanterelle +jack-o-lantern fungus +inky cap +shaggymane +milkcap +fairy-ring mushroom +oyster mushroom +olive-tree agaric +Pholiota astragalina +Pholiota aurea +Pholiota destruens +Pholiota flammans +Pholiota flavida +nameko +Pholiota squarrosa-adiposa +Pholiota squarrosa +Pholiota squarrosoides +Stropharia ambigua +Stropharia hornemannii +Stropharia rugoso-annulata +Entoloma lividum +Entoloma aprile +Chlorophyllum molybdites +lepiota +parasol mushroom +poisonous parasol +Lepiota naucina +Lepiota rhacodes +American parasol +Lepiota rubrotincta +Lepiota clypeolaria +onion stem +blewits +sandy mushroom +Tricholoma pessundatum +Tricholoma sejunctum +man-on-a-horse +Tricholoma venenata +Tricholoma pardinum +Tricholoma vaccinum +Tricholoma aurantium +Pluteus aurantiorugosus +Pluteus magnus +deer mushroom +straw mushroom +Volvariella bombycina +Clitocybe clavipes +Clitocybe dealbata +Clitocybe inornata +Clitocybe robusta +Clitocybe irina +Clitocybe subconnexa +winter mushroom +mycelium +ascomycete +Clavicipitaceae +yeast +discomycete +morel +Verpa +false morel +lorchel +helvella +Gyromitra californica +Gyromitra sphaerospora +Gyromitra esculenta +Gyromitra infula +Gyromitra gigas +gasteromycete +common stinkhorn +Phallus ravenelii +dog stinkhorn +stinky squid +puffball +Geastrum coronatum +Astreus pteridis +Astreus hygrometricus +polypore +Boletus chrysenteron +Boletus edulis +Frost's bolete +Boletus luridus +Boletus mirabilis +Boletus pallidus +Boletus pulcherrimus +Boletus pulverulentus +Boletus roxanae +Boletus subvelutipes +Boletus variipes +Boletus zelleri +Fuscoboletinus paluster +Fuscoboletinus serotinus +Leccinum fibrillosum +Suillus albivelatus +old-man-of-the-woods +Boletellus russellii +jelly fungus +rust +smut +cornsmut +flag smut fungus +waxycap +Hygrocybe acutoconica +Hygrophorus borealis +Hygrophorus caeruleus +Hygrophorus inocybiformis +Hygrophorus kauffmanii +Hygrophorus marzuolus +Hygrophorus purpurascens +Hygrophorus russula +Hygrophorus sordidus +Hygrophorus tennesseensis +Hygrophorus turundus +Neohygrophorus angelesianus +Cortinarius armillatus +Cortinarius atkinsonianus +Cortinarius corrugatus +Cortinarius gentilis +Cortinarius mutabilis +Cortinarius semisanguineus +Cortinarius subfoetidus +Cortinarius violaceus +Gymnopilus spectabilis +Gymnopilus validipes +Gymnopilus ventricosus +mold +mildew +candida +houseplant +succulent +weed +sporophyll +sporangium +poisonous plant +vine +tree +bean tree +gymnospermous tree +conifer +angiospermous tree +nut tree +spice tree +bonsai +subshrub +bramble +liana +desert plant +marsh plant +strangler +root +receptacle +scape +peduncle +flower cluster +raceme +cyme +bulbous plant +fruit +seed +bean +nut +berry +aggregate fruit +drupe +drupelet +pome +pod +husk +buckthorn +vinifera +true pepper +peperomia +bract +palmate leaf +pinnate leaf +dentate leaf +branchlet +polypody +strap fern +staghorn fern +spleenwort +chain fern +davallia +hare's-foot fern +shield fern +wood fern +lady fern +bladder fern +holly fern +woodsia +maidenhair +brittle maidenhair +lip fern +cliff brake +horsetail +club moss +spikemoss +beech fern +shoestring fungus +Armillaria caligata +Armillaria ponderosa +Armillaria zelleri +honey mushroom +milkweed +stapelia +stephanotis +orangery +figure +plane figure +solid figure +line +convex shape +concave shape +cylinder +round shape +polygon +concave polygon +amorphous shape +closed curve +simple closed curve +cone +circle +ring +loop +ellipse +triangle +spherical polygon +angular distance +groove +bulge +bow +balance +toroid +boundary +incisure +notch +wrinkle +tree +regular polyhedron +carbon +rock +soil +high explosive +culture medium +agar +paper +paving +plaster +stucco +tear gas +vitamin +fat-soluble vitamin +water-soluble vitamin +vitamin A +B-complex vitamin +vitamin E +vitamin K \ No newline at end of file diff --git a/workloads/realworld/uvm/darknet/cfg/imagenet1k.data b/workloads/realworld/uvm/darknet/cfg/imagenet1k.data new file mode 100644 index 0000000000000000000000000000000000000000..daf120a3c020003e8ed08096c51304272ca3ba27 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/imagenet1k.data @@ -0,0 +1,9 @@ +classes=1000 +train = /data/darknet/imagenet_mini/valid.list +valid = /data/darknet/imagenet_mini/valid.list +test = /data/darknet/imagenet_mini/valid.list +backup = /data/darknet/backup/ +labels = /data/darknet/imagenet_mini/imagenet.labels.list +names = /data/darknet/imagenet_mini/imagenet.shortnames.list +top=5 + diff --git a/workloads/realworld/uvm/darknet/cfg/imagenet22k.dataset b/workloads/realworld/uvm/darknet/cfg/imagenet22k.dataset new file mode 100644 index 0000000000000000000000000000000000000000..e25ef007ecceb096e5846ee7cacd1fd54fb8f9e4 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/imagenet22k.dataset @@ -0,0 +1,9 @@ +classes=21842 +train = /data/imagenet/imagenet22k.train.list +valid = /data/imagenet/imagenet22k.valid.list +#valid = /data/imagenet/imagenet1k.valid.list +backup = /home/pjreddie/backup/ +labels = data/imagenet.labels.list +names = data/imagenet.shortnames.list +top = 5 + diff --git a/workloads/realworld/uvm/darknet/cfg/imagenet9k.hierarchy.dataset b/workloads/realworld/uvm/darknet/cfg/imagenet9k.hierarchy.dataset new file mode 100644 index 0000000000000000000000000000000000000000..41fb71b065544b919bc8ed7d723afb5d04ad85ac --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/imagenet9k.hierarchy.dataset @@ -0,0 +1,9 @@ +classes=9418 +train = data/9k.train.list +valid = /data/imagenet/imagenet1k.valid.list +leaves = data/imagenet1k.labels +backup = /home/pjreddie/backup/ +labels = data/9k.labels +names = data/9k.names +top=5 + diff --git a/workloads/realworld/uvm/darknet/cfg/jnet-conv.cfg b/workloads/realworld/uvm/darknet/cfg/jnet-conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..056f82aa6e2a0710a664c4740ab763961e4de33d --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/jnet-conv.cfg @@ -0,0 +1,118 @@ +[net] +batch=1 +subdivisions=1 +height=10 +width=10 +channels=3 +learning_rate=0.01 +momentum=0.9 +decay=0.0005 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + diff --git a/workloads/realworld/uvm/darknet/cfg/openimages.data b/workloads/realworld/uvm/darknet/cfg/openimages.data new file mode 100644 index 0000000000000000000000000000000000000000..fa80e5ab7d8576d391c7cac9dfc8367aab704139 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/openimages.data @@ -0,0 +1,8 @@ +classes= 601 +train = /home/pjreddie/data/openimsv4/openimages.train.list +#valid = coco_testdev +valid = data/coco_val_5k.list +names = data/openimages.names +backup = /home/pjreddie/backup/ +eval=coco + diff --git a/workloads/realworld/uvm/darknet/cfg/resnet101.cfg b/workloads/realworld/uvm/darknet/cfg/resnet101.cfg new file mode 100644 index 0000000000000000000000000000000000000000..de458820bcd35f5e65d858f9f661e42653ed0184 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/resnet101.cfg @@ -0,0 +1,990 @@ +[net] +# Training +# batch=128 +# subdivisions=2 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + +[cost] +type=sse + diff --git a/workloads/realworld/uvm/darknet/cfg/resnet152.cfg b/workloads/realworld/uvm/darknet/cfg/resnet152.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e8e3297ac2364b95f28fa0a0bdd4ca71f14ac82c --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/resnet152.cfg @@ -0,0 +1,1460 @@ +[net] +# Training +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/resnet18.cfg b/workloads/realworld/uvm/darknet/cfg/resnet18.cfg new file mode 100644 index 0000000000000000000000000000000000000000..275f4bdb5962d77c16f353cd3d2751e189b9344c --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/resnet18.cfg @@ -0,0 +1,228 @@ +[net] +# Training +# batch=128 +# subdivisions=1 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/resnet18_b.cfg b/workloads/realworld/uvm/darknet/cfg/resnet18_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c0712e9cdfaf1f28dbe3a358355405d2758e6d2b --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/resnet18_b.cfg @@ -0,0 +1,228 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/resnet18_t.cfg b/workloads/realworld/uvm/darknet/cfg/resnet18_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c0712e9cdfaf1f28dbe3a358355405d2758e6d2b --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/resnet18_t.cfg @@ -0,0 +1,228 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/resnet34.cfg b/workloads/realworld/uvm/darknet/cfg/resnet34.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9f68f096741ae3b4898f40b76af7569d4697729f --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/resnet34.cfg @@ -0,0 +1,392 @@ +[net] +# Training +# batch=128 +# subdivisions=2 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/resnet50.cfg b/workloads/realworld/uvm/darknet/cfg/resnet50.cfg new file mode 100644 index 0000000000000000000000000000000000000000..d0d7c511516e997a392bb5ba77682740c0494972 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/resnet50.cfg @@ -0,0 +1,510 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm/darknet/cfg/resnet50_b.cfg b/workloads/realworld/uvm/darknet/cfg/resnet50_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..462479918fd608c4a0761b84c7299e51081193c6 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/resnet50_b.cfg @@ -0,0 +1,510 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm/darknet/cfg/resnet50_t.cfg b/workloads/realworld/uvm/darknet/cfg/resnet50_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..462479918fd608c4a0761b84c7299e51081193c6 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/resnet50_t.cfg @@ -0,0 +1,510 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm/darknet/cfg/resnext101-32x4d.cfg b/workloads/realworld/uvm/darknet/cfg/resnext101-32x4d.cfg new file mode 100644 index 0000000000000000000000000000000000000000..8538ccc3daee2e3de589eb4e2edf868340d4924b --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/resnext101-32x4d.cfg @@ -0,0 +1,1053 @@ +[net] +# Training +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/resnext152-32x4d.cfg b/workloads/realworld/uvm/darknet/cfg/resnext152-32x4d.cfg new file mode 100644 index 0000000000000000000000000000000000000000..48279fd28eb0dbe23c7b0c593f67051cb6a62374 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/resnext152-32x4d.cfg @@ -0,0 +1,1562 @@ +[net] +# Training +# batch=128 +# subdivisions=16 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/resnext50.cfg b/workloads/realworld/uvm/darknet/cfg/resnext50.cfg new file mode 100644 index 0000000000000000000000000000000000000000..12aebdf6fbd48bde40ee22c4257e06f2e0cf46eb --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/resnext50.cfg @@ -0,0 +1,523 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm/darknet/cfg/rnn.cfg b/workloads/realworld/uvm/darknet/cfg/rnn.cfg new file mode 100644 index 0000000000000000000000000000000000000000..61b202f3a441b701f76d9b007c6276467c639e11 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/rnn.cfg @@ -0,0 +1,38 @@ +[net] +subdivisions=1 +inputs=256 +batch = 1 +momentum=0.9 +decay=0.001 +max_batches = 2000 +time_steps=1 +learning_rate=0.1 +policy=steps +steps=1000,1500 +scales=.1,.1 + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[connected] +output=256 +activation=leaky + +[softmax] + + diff --git a/workloads/realworld/uvm/darknet/cfg/rnn.train.cfg b/workloads/realworld/uvm/darknet/cfg/rnn.train.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b9748990aceaa85cc2e43358073114606725dcbd --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/rnn.train.cfg @@ -0,0 +1,38 @@ +[net] +subdivisions=1 +inputs=256 +batch = 128 +momentum=0.9 +decay=0.001 +max_batches = 2000 +time_steps=576 +learning_rate=0.1 +policy=steps +steps=1000,1500 +scales=.1,.1 + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[connected] +output=256 +activation=leaky + +[softmax] + + diff --git a/workloads/realworld/uvm/darknet/cfg/strided.cfg b/workloads/realworld/uvm/darknet/cfg/strided.cfg new file mode 100644 index 0000000000000000000000000000000000000000..2f745085adc268a3e99bd7895bd4dda28227bffd --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/strided.cfg @@ -0,0 +1,182 @@ +[net] +batch=128 +subdivisions=4 +height=256 +width=256 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +policy=steps +scales=.1,.1,.1 +steps=200000,300000,400000 +max_batches=800000 + + +[crop] +crop_height=224 +crop_width=224 +flip=1 +angle=0 +saturation=1 +exposure=1 +shift=.2 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=192 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=1024 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=ramp + +[maxpool] +size=3 +stride=2 + +[connected] +output=4096 +activation=ramp + +[dropout] +probability=0.5 + +[connected] +output=1000 +activation=ramp + +[softmax] + diff --git a/workloads/realworld/uvm/darknet/cfg/t1.test.cfg b/workloads/realworld/uvm/darknet/cfg/t1.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b3628114e048dc78f4797342afd95a757c81c977 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/t1.test.cfg @@ -0,0 +1,117 @@ +[net] +batch=1 +subdivisions=1 +height=224 +width=224 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,20000,30000 +scales=2.5,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[connected] +output= 1470 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/uvm/darknet/cfg/tiny.cfg b/workloads/realworld/uvm/darknet/cfg/tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..f97327cfceebf868998bf2bb16224bd0994ebd82 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/tiny.cfg @@ -0,0 +1,174 @@ +[net] +# Train +batch=128 +subdivisions=1 +# Test +# batch=1 +# subdivisions=1 +height=224 +width=224 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=320 + +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm/darknet/cfg/vgg-16.cfg b/workloads/realworld/uvm/darknet/cfg/vgg-16.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c73b17b6ccfdcc9cae9b67591b662571463569ab --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/vgg-16.cfg @@ -0,0 +1,157 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +learning_rate=0.00001 +momentum=0.9 +decay=0.0005 + +[crop] +crop_height=224 +crop_width=224 +flip=1 +exposure=1 +saturation=1 +angle=0 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=1000 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm/darknet/cfg/vgg-conv.cfg b/workloads/realworld/uvm/darknet/cfg/vgg-conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..21e1d724c9418107f9cf82f9bffb9ae64d3e2084 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/vgg-conv.cfg @@ -0,0 +1,121 @@ +[net] +batch=1 +subdivisions=1 +width=224 +height=224 +channels=3 +learning_rate=0.00001 +momentum=0.9 +decay=0.0005 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + diff --git a/workloads/realworld/uvm/darknet/cfg/voc.data b/workloads/realworld/uvm/darknet/cfg/voc.data new file mode 100644 index 0000000000000000000000000000000000000000..7807b5d2a8fd0f855a8c68e82c064dc320551da1 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/voc.data @@ -0,0 +1,6 @@ +classes= 20 +train = /home/pjreddie/data/voc/train.txt +valid = /home/pjreddie/data/voc/2007_test.txt +names = data/voc.names +backup = backup + diff --git a/workloads/realworld/uvm/darknet/cfg/writing.cfg b/workloads/realworld/uvm/darknet/cfg/writing.cfg new file mode 100644 index 0000000000000000000000000000000000000000..1ed899bcd63d6354e8320ace7e7f513ba1174886 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/writing.cfg @@ -0,0 +1,41 @@ +[net] +batch=128 +subdivisions=2 +height=256 +width=256 +channels=3 +learning_rate=0.00000001 +momentum=0.9 +decay=0.0005 +seen=0 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1 +size=3 +stride=1 +pad=1 +activation=logistic + +[cost] + diff --git a/workloads/realworld/uvm/darknet/cfg/yolo9000.cfg b/workloads/realworld/uvm/darknet/cfg/yolo9000.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e745f78a6e37611fb0f13c2d848c292cea1a89d3 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/yolo9000.cfg @@ -0,0 +1,218 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +batch=1 +subdivisions=1 +height=544 +width=544 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +hue=.1 +saturation=.75 +exposure=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=28269 +size=1 +stride=1 +pad=1 +activation=linear + +[region] +anchors = 0.77871, 1.14074, 3.00525, 4.31277, 9.22725, 9.61974 +bias_match=1 +classes=9418 +coords=4 +num=3 +softmax=1 +jitter=.2 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +thresh = .6 +absolute=1 +random=1 + +tree=data/9k.tree +map = data/coco9k.map diff --git a/workloads/realworld/uvm/darknet/cfg/yolov1-tiny.cfg b/workloads/realworld/uvm/darknet/cfg/yolov1-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a5e7b4920289ccb507a3a0356a33362bc7633581 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/yolov1-tiny.cfg @@ -0,0 +1,130 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 + +saturation=.75 +exposure=.75 +hue = .1 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,800,20000,30000 +scales=2.5,2,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[connected] +output= 1470 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/uvm/darknet/cfg/yolov1.cfg b/workloads/realworld/uvm/darknet/cfg/yolov1.cfg new file mode 100644 index 0000000000000000000000000000000000000000..06cf6e676170e41d24e63ec08d7b27a31c411718 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/yolov1.cfg @@ -0,0 +1,261 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 +saturation=1.5 +exposure=1.5 +hue=.1 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,20000,30000 +scales=2.5,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[local] +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[dropout] +probability=.5 + +[connected] +output= 1715 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=3 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/uvm/darknet/cfg/yolov2-tiny-voc.cfg b/workloads/realworld/uvm/darknet/cfg/yolov2-tiny-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c4c127cdd352bd98b3b7a3336d5c3b2efc6fadcd --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/yolov2-tiny-voc.cfg @@ -0,0 +1,138 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +max_batches = 40200 +policy=steps +steps=-1,100,20000,30000 +scales=.1,10,.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=125 +activation=linear + +[region] +anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 +bias_match=1 +classes=20 +coords=4 +num=5 +softmax=1 +jitter=.2 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/uvm/darknet/cfg/yolov2-tiny.cfg b/workloads/realworld/uvm/darknet/cfg/yolov2-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..81d0ac45d6dca10f50875bfe85f7496ded8e0f63 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/yolov2-tiny.cfg @@ -0,0 +1,139 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=425 +activation=linear + +[region] +anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 +bias_match=1 +classes=80 +coords=4 +num=5 +softmax=1 +jitter=.2 +rescore=0 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/uvm/darknet/cfg/yolov2-voc.cfg b/workloads/realworld/uvm/darknet/cfg/yolov2-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..dbf2de281c1200cb4889409c616e775080823268 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/yolov2-voc.cfg @@ -0,0 +1,258 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=416 +width=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 80200 +policy=steps +steps=40000,60000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[route] +layers=-9 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=64 +activation=leaky + +[reorg] +stride=2 + +[route] +layers=-1,-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=125 +activation=linear + + +[region] +anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 +bias_match=1 +classes=20 +coords=4 +num=5 +softmax=1 +jitter=.3 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/uvm/darknet/cfg/yolov2.cfg b/workloads/realworld/uvm/darknet/cfg/yolov2.cfg new file mode 100644 index 0000000000000000000000000000000000000000..088edf81573e83c59edd7137cbc07b6fe1433591 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/yolov2.cfg @@ -0,0 +1,258 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[route] +layers=-9 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=64 +activation=leaky + +[reorg] +stride=2 + +[route] +layers=-1,-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=425 +activation=linear + + +[region] +anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 +bias_match=1 +classes=80 +coords=4 +num=5 +softmax=1 +jitter=.3 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/uvm/darknet/cfg/yolov3-openimages.cfg b/workloads/realworld/uvm/darknet/cfg/yolov3-openimages.cfg new file mode 100644 index 0000000000000000000000000000000000000000..65d241a74c4c4995dbd997b1750575a83b0a17d4 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/yolov3-openimages.cfg @@ -0,0 +1,789 @@ +[net] +# Testing + batch=1 + subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=5000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/yolov3-spp.cfg b/workloads/realworld/uvm/darknet/cfg/yolov3-spp.cfg new file mode 100644 index 0000000000000000000000000000000000000000..4ad2a052d88328a79cff5686ff4dd1df6993a2fd --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/yolov3-spp.cfg @@ -0,0 +1,822 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 + +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/yolov3-tiny.cfg b/workloads/realworld/uvm/darknet/cfg/yolov3-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..cfca3cfa6415b7b61eae238aa71107dedbe5d607 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/yolov3-tiny.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/uvm/darknet/cfg/yolov3-tiny_b.cfg b/workloads/realworld/uvm/darknet/cfg/yolov3-tiny_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..0d8ded69bcf909f4cca02ab202cc99b1800a1562 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/yolov3-tiny_b.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/uvm/darknet/cfg/yolov3-tiny_t.cfg b/workloads/realworld/uvm/darknet/cfg/yolov3-tiny_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..0d8ded69bcf909f4cca02ab202cc99b1800a1562 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/yolov3-tiny_t.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/uvm/darknet/cfg/yolov3-voc.cfg b/workloads/realworld/uvm/darknet/cfg/yolov3-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..3f3e8dfb31b7103cf7ca00cd0bef83d6d426bb8d --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/yolov3-voc.cfg @@ -0,0 +1,785 @@ +[net] +# Testing + batch=1 + subdivisions=1 +# Training +# batch=64 +# subdivisions=16 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 50200 +policy=steps +steps=40000,45000 +scales=.1,.1 + + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/yolov3.cfg b/workloads/realworld/uvm/darknet/cfg/yolov3.cfg new file mode 100644 index 0000000000000000000000000000000000000000..938ffff23f106d65290faae217f6a9b0a715c023 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/yolov3.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/yolov3_b.cfg b/workloads/realworld/uvm/darknet/cfg/yolov3_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a86d73b42225ef65d8ff8692b0d72827ba3a8484 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/yolov3_b.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm/darknet/cfg/yolov3_t.cfg b/workloads/realworld/uvm/darknet/cfg/yolov3_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a86d73b42225ef65d8ff8692b0d72827ba3a8484 --- /dev/null +++ b/workloads/realworld/uvm/darknet/cfg/yolov3_t.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm/darknet/examples/art.c b/workloads/realworld/uvm/darknet/examples/art.c new file mode 100644 index 0000000000000000000000000000000000000000..932688e7b9ecbfd1a359a5d373dddf52815da9bb --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/art.c @@ -0,0 +1,59 @@ +#include "darknet.h" + +#include + +void demo_art(char *cfgfile, char *weightfile, int cam_index) +{ +#ifdef OPENCV + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + + void * cap = open_video_stream(0, cam_index, 0,0,0); + + char *window = "ArtJudgementBot9000!!!"; + if(!cap) error("Couldn't connect to webcam.\n"); + int i; + int idx[] = {37, 401, 434}; + int n = sizeof(idx)/sizeof(idx[0]); + + while(1){ + image in = get_image_from_stream(cap); + image in_s = resize_image(in, net->w, net->h); + + float *p = network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + + float score = 0; + for(i = 0; i < n; ++i){ + float s = p[idx[i]]; + if (s > score) score = s; + } + score = score; + printf("I APPRECIATE THIS ARTWORK: %10.7f%%\n", score*100); + printf("["); + int upper = 30; + for(i = 0; i < upper; ++i){ + printf("%c", ((i+.5) < score*upper) ? 219 : ' '); + } + printf("]\n"); + + show_image(in, window, 1); + free_image(in_s); + free_image(in); + } +#endif +} + + +void run_art(int argc, char **argv) +{ + int cam_index = find_int_arg(argc, argv, "-c", 0); + char *cfg = argv[2]; + char *weights = argv[3]; + demo_art(cfg, weights, cam_index); +} + diff --git a/workloads/realworld/uvm/darknet/examples/attention.c b/workloads/realworld/uvm/darknet/examples/attention.c new file mode 100644 index 0000000000000000000000000000000000000000..cd1e579d375be8ffed5620c70180f0a59a927159 --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/attention.c @@ -0,0 +1,459 @@ +#include "darknet.h" + +#include +#include + +void extend_data_truth(data *d, int n, float val) +{ + int i, j; + for(i = 0; i < d->y.rows; ++i){ + d->y.vals[i] = realloc(d->y.vals[i], (d->y.cols+n)*sizeof(float)); + for(j = 0; j < n; ++j){ + d->y.vals[i][d->y.cols + j] = val; + } + } + d->y.cols += n; +} + +matrix network_loss_data(network *net, data test) +{ + int i,b; + int k = 1; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.cols, sizeof(float)); + float *y = calloc(net->batch*test.y.cols, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + memcpy(y+b*test.y.cols, test.y.vals[i+b], test.y.cols*sizeof(float)); + } + + network orig = *net; + net->input = X; + net->truth = y; + net->train = 0; + net->delta = 0; + forward_network(net); + *net = orig; + + float *delta = net->layers[net->n-1].output; + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + int t = max_index(y + b*test.y.cols, 1000); + float err = sum_array(delta + b*net->outputs, net->outputs); + pred.vals[i+b][0] = -err; + //pred.vals[i+b][0] = 1-delta[b*net->outputs + t]; + } + } + free(X); + free(y); + return pred; +} + +void train_attention(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i, j; + + float avg_cls_loss = -1; + float avg_att_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *train_list = option_find_str(options, "train", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + + char **labels = get_labels(label_list); + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + double time; + + int divs=3; + int size=2; + + load_args args = {0}; + args.w = divs*net->w/size; + args.h = divs*net->h/size; + args.size = divs*net->w/size; + args.threads = 32; + args.hierarchy = net->hierarchy; + + args.min = net->min_ratio*args.w; + args.max = net->max_ratio*args.w; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + args.paths = paths; + args.classes = classes; + args.n = imgs; + args.m = N; + args.labels = labels; + args.type = CLASSIFICATION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + data resized = resize_data(train, net->w, net->h); + extend_data_truth(&resized, divs*divs, 0); + data *tiles = tile_data(train, divs, size); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float aloss = 0; + float closs = 0; + int z; + for (i = 0; i < divs*divs/ngpus; ++i) { +#pragma omp parallel for + for(j = 0; j < ngpus; ++j){ + int index = i*ngpus + j; + extend_data_truth(tiles+index, divs*divs, SECRET_NUM); + matrix deltas = network_loss_data(nets[j], tiles[index]); + for(z = 0; z < resized.y.rows; ++z){ + resized.y.vals[z][train.y.cols + index] = deltas.vals[z][0]; + } + free_matrix(deltas); + } + } + int *inds = calloc(resized.y.rows, sizeof(int)); + for(z = 0; z < resized.y.rows; ++z){ + int index = max_index(resized.y.vals[z] + train.y.cols, divs*divs); + inds[z] = index; + for(i = 0; i < divs*divs; ++i){ + resized.y.vals[z][train.y.cols + i] = (i == index)? 1 : 0; + } + } + data best = select_data(tiles, inds); + free(inds); + #ifdef GPU + if (ngpus == 1) { + closs = train_network(net, best); + } else { + closs = train_networks(nets, ngpus, best, 4); + } + #endif + for (i = 0; i < divs*divs; ++i) { + printf("%.2f ", resized.y.vals[0][train.y.cols + i]); + if((i+1)%divs == 0) printf("\n"); + free_data(tiles[i]); + } + free_data(best); + printf("\n"); + image im = float_to_image(64,64,3,resized.X.vals[0]); + //show_image(im, "orig"); + //cvWaitKey(100); + /* + image im1 = float_to_image(64,64,3,tiles[i].X.vals[0]); + image im2 = float_to_image(64,64,3,resized.X.vals[0]); + show_image(im1, "tile"); + show_image(im2, "res"); + */ +#ifdef GPU + if (ngpus == 1) { + aloss = train_network(net, resized); + } else { + aloss = train_networks(nets, ngpus, resized, 4); + } +#endif + for(i = 0; i < divs*divs; ++i){ + printf("%f ", nets[0]->output[1000 + i]); + if ((i+1) % divs == 0) printf("\n"); + } + printf("\n"); + + free_data(resized); + free_data(train); + if(avg_cls_loss == -1) avg_cls_loss = closs; + if(avg_att_loss == -1) avg_att_loss = aloss; + avg_cls_loss = avg_cls_loss*.9 + closs*.1; + avg_att_loss = avg_att_loss*.9 + aloss*.1; + + printf("%ld, %.3f: Att: %f, %f avg, Class: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, aloss, avg_att_loss, closs, avg_cls_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + pthread_join(load_thread, 0); + + free_network(net); + free_ptrs((void**)labels, classes); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_attention_single(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *leaf_list = option_find_str(options, "leaves", 0); + if(leaf_list) change_leaves(net->hierarchy, leaf_list); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + int divs = 4; + int size = 2; + int extra = 0; + float *avgs = calloc(classes, sizeof(float)); + int *inds = calloc(divs*divs, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w*divs/size); + image crop = crop_image(resized, (resized.w - net->w*divs/size)/2, (resized.h - net->h*divs/size)/2, net->w*divs/size, net->h*divs/size); + image rcrop = resize_image(crop, net->w, net->h); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, rcrop.data); + //pred[classes + 56] = 0; + for(j = 0; j < divs*divs; ++j){ + printf("%.2f ", pred[classes + j]); + if((j+1)%divs == 0) printf("\n"); + } + printf("\n"); + copy_cpu(classes, pred, 1, avgs, 1); + top_k(pred + classes, divs*divs, divs*divs, inds); + show_image(crop, "crop"); + for(j = 0; j < extra; ++j){ + int index = inds[j]; + int row = index / divs; + int col = index % divs; + int y = row * crop.h / divs - (net->h - crop.h/divs)/2; + int x = col * crop.w / divs - (net->w - crop.w/divs)/2; + printf("%d %d %d %d\n", row, col, y, x); + image tile = crop_image(crop, x, y, net->w, net->h); + float *pred = network_predict(net, tile.data); + axpy_cpu(classes, 1., pred, 1, avgs, 1); + show_image(tile, "tile"); + //cvWaitKey(10); + } + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + if(rcrop.data != resized.data) free_image(rcrop); + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_attention_multi(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + int scales[] = {224, 288, 320, 352, 384}; + int nscales = sizeof(scales)/sizeof(scales[0]); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + float *pred = calloc(classes, sizeof(float)); + image im = load_image_color(paths[i], 0, 0); + for(j = 0; j < nscales; ++j){ + image r = resize_min(im, scales[j]); + resize_network(net, r.w, r.h); + float *p = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + flip_image(r); + p = network_predict(net, r.data); + axpy_cpu(classes, 1, p, 1, pred, 1); + if(r.data != im.data) free_image(r); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void predict_attention(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + if(top == 0) top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = letterbox_image(im, net->w, net->h); + //resize_network(&net, r.w, r.h); + //printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + float *predictions = network_predict(net, X); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root"); + //else printf("%s: %f\n",names[index], predictions[index]); + printf("%5.2f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void run_attention(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int ngpus; + int *gpus = read_intlist(gpu_list, &ngpus, gpu_index); + + + int top = find_int_arg(argc, argv, "-t", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + char *layer_s = (argc > 7) ? argv[7]: 0; + if(0==strcmp(argv[2], "predict")) predict_attention(data, cfg, weights, filename, top); + else if(0==strcmp(argv[2], "train")) train_attention(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) validate_attention_single(data, cfg, weights); + else if(0==strcmp(argv[2], "validmulti")) validate_attention_multi(data, cfg, weights); +} + + diff --git a/workloads/realworld/uvm/darknet/examples/captcha.c b/workloads/realworld/uvm/darknet/examples/captcha.c new file mode 100644 index 0000000000000000000000000000000000000000..41d6d07c30801b35da34c05984be488e6f6767e9 --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/captcha.c @@ -0,0 +1,353 @@ +#include "darknet.h" + +void fix_data_captcha(data d, int mask) +{ + matrix labels = d.y; + int i, j; + for(i = 0; i < d.y.rows; ++i){ + for(j = 0; j < d.y.cols; j += 2){ + if (mask){ + if(!labels.vals[i][j]){ + labels.vals[i][j] = SECRET_NUM; + labels.vals[i][j+1] = SECRET_NUM; + }else if(labels.vals[i][j+1]){ + labels.vals[i][j] = 0; + } + } else{ + if (labels.vals[i][j]) { + labels.vals[i][j+1] = 0; + } else { + labels.vals[i][j+1] = 1; + } + } + } + } +} + +void train_captcha(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + int i = *net->seen/imgs; + int solved = 1; + list *plist; + char **labels = get_labels("/data/captcha/reimgs.labels.list"); + if (solved){ + plist = get_paths("/data/captcha/reimgs.solved.list"); + }else{ + plist = get_paths("/data/captcha/reimgs.raw.list"); + } + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = 26; + args.n = imgs; + args.m = plist->size; + args.labels = labels; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + + load_thread = load_data_in_thread(args); + while(1){ + ++i; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + fix_data_captcha(train, solved); + + /* + image im = float_to_image(256, 256, 3, train.X.vals[114]); + show_image(im, "training"); + cvWaitKey(0); + */ + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net->seen); + free_data(train); + if(i%100==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } + } +} + +void test_captcha(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + int i = 0; + char **names = get_labels("/data/captcha/reimgs.labels.list"); + char buff[256]; + char *input = buff; + int indexes[26]; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + //printf("Enter Image Path: "); + //fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, net->w, net->h); + float *X = im.data; + float *predictions = network_predict(net, X); + top_predictions(net, 26, indexes); + //printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < 26; ++i){ + int index = indexes[i]; + if(i != 0) printf(", "); + printf("%s %f", names[index], predictions[index]); + } + printf("\n"); + fflush(stdout); + free_image(im); + if (filename) break; + } +} + +void valid_captcha(char *cfgfile, char *weightfile, char *filename) +{ + char **labels = get_labels("/data/captcha/reimgs.labels.list"); + network *net = load_network(cfgfile, weightfile, 0); + list *plist = get_paths("/data/captcha/reimgs.fg.list"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + int outputs = net->outputs; + + set_batch_network(net, 1); + srand(2222222); + int i, j; + for(i = 0; i < N; ++i){ + if (i%100 == 0) fprintf(stderr, "%d\n", i); + image im = load_image_color(paths[i], net->w, net->h); + float *X = im.data; + float *predictions = network_predict(net, X); + //printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + int truth = -1; + for(j = 0; j < 13; ++j){ + if (strstr(paths[i], labels[j])) truth = j; + } + if (truth == -1){ + fprintf(stderr, "bad: %s\n", paths[i]); + return; + } + printf("%d, ", truth); + for(j = 0; j < outputs; ++j){ + if (j != 0) printf(", "); + printf("%f", predictions[j]); + } + printf("\n"); + fflush(stdout); + free_image(im); + if (filename) break; + } +} + +/* + void train_captcha(char *cfgfile, char *weightfile) + { + float avg_loss = -1; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + int i = net->seen/imgs; + list *plist = get_paths("/data/captcha/train.auto5"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + while(1){ + ++i; + time=clock(); + data train = load_data_captcha(paths, imgs, plist->size, 10, 200, 60); + translate_data_rows(train, -128); + scale_data_rows(train, 1./128); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + net->seen += imgs; + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net->seen); + free_data(train); + if(i%10==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } + } + } + + void decode_captcha(char *cfgfile, char *weightfile) + { + setbuf(stdout, NULL); + srand(time(0)); + network net = parse_network_cfg(cfgfile); + set_batch_network(&net, 1); + if(weightfile){ + load_weights(&net, weightfile); + } + char filename[256]; + while(1){ + printf("Enter filename: "); + fgets(filename, 256, stdin); + strtok(filename, "\n"); + image im = load_image_color(filename, 300, 57); + scale_image(im, 1./255.); + float *X = im.data; + float *predictions = network_predict(net, X); + image out = float_to_image(300, 57, 1, predictions); + show_image(out, "decoded"); +#ifdef OPENCV +cvWaitKey(0); +#endif +free_image(im); +} +} + +void encode_captcha(char *cfgfile, char *weightfile) +{ +float avg_loss = -1; +srand(time(0)); +char *base = basecfg(cfgfile); +printf("%s\n", base); +network net = parse_network_cfg(cfgfile); +if(weightfile){ + load_weights(&net, weightfile); +} +printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); +int imgs = 1024; +int i = net->seen/imgs; +list *plist = get_paths("/data/captcha/encode.list"); +char **paths = (char **)list_to_array(plist); +printf("%d\n", plist->size); +clock_t time; +while(1){ + ++i; + time=clock(); + data train = load_data_captcha_encode(paths, imgs, plist->size, 300, 57); + scale_data_rows(train, 1./255); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + net->seen += imgs; + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net->seen); + free_matrix(train.X); + if(i%100==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } +} +} + +void validate_captcha(char *cfgfile, char *weightfile) +{ + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + int numchars = 37; + list *plist = get_paths("/data/captcha/solved.hard"); + char **paths = (char **)list_to_array(plist); + int imgs = plist->size; + data valid = load_data_captcha(paths, imgs, 0, 10, 200, 60); + translate_data_rows(valid, -128); + scale_data_rows(valid, 1./128); + matrix pred = network_predict_data(net, valid); + int i, k; + int correct = 0; + int total = 0; + int accuracy = 0; + for(i = 0; i < imgs; ++i){ + int allcorrect = 1; + for(k = 0; k < 10; ++k){ + char truth = int_to_alphanum(max_index(valid.y.vals[i]+k*numchars, numchars)); + char prediction = int_to_alphanum(max_index(pred.vals[i]+k*numchars, numchars)); + if (truth != prediction) allcorrect=0; + if (truth != '.' && truth == prediction) ++correct; + if (truth != '.' || truth != prediction) ++total; + } + accuracy += allcorrect; + } + printf("Word Accuracy: %f, Char Accuracy %f\n", (float)accuracy/imgs, (float)correct/total); + free_data(valid); +} + +void test_captcha(char *cfgfile, char *weightfile) +{ + setbuf(stdout, NULL); + srand(time(0)); + //char *base = basecfg(cfgfile); + //printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + set_batch_network(&net, 1); + if(weightfile){ + load_weights(&net, weightfile); + } + char filename[256]; + while(1){ + //printf("Enter filename: "); + fgets(filename, 256, stdin); + strtok(filename, "\n"); + image im = load_image_color(filename, 200, 60); + translate_image(im, -128); + scale_image(im, 1/128.); + float *X = im.data; + float *predictions = network_predict(net, X); + print_letters(predictions, 10); + free_image(im); + } +} + */ +void run_captcha(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_captcha(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights, filename); + else if(0==strcmp(argv[2], "valid")) valid_captcha(cfg, weights, filename); + //if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "encode")) encode_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "decode")) decode_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "valid")) validate_captcha(cfg, weights); +} + diff --git a/workloads/realworld/uvm/darknet/examples/cifar.c b/workloads/realworld/uvm/darknet/examples/cifar.c new file mode 100644 index 0000000000000000000000000000000000000000..a5f5f240b9f680acd9b5890042300d3b683e0f82 --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/cifar.c @@ -0,0 +1,251 @@ +#include "darknet.h" + +void train_cifar(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + int classes = 10; + int N = 50000; + + char **labels = get_labels("data/cifar/labels.txt"); + int epoch = (*net->seen)/N; + data train = load_all_cifar10(); + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + clock_t time=clock(); + + float loss = train_network_sgd(net, train, 1); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)labels, classes); + free(base); + free_data(train); +} + +void train_cifar_distill(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + int classes = 10; + int N = 50000; + + char **labels = get_labels("data/cifar/labels.txt"); + int epoch = (*net->seen)/N; + + data train = load_all_cifar10(); + matrix soft = csv_to_matrix("results/ensemble.csv"); + + float weight = .9; + scale_matrix(soft, weight); + scale_matrix(train.y, 1. - weight); + matrix_add_matrix(soft, train.y); + + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + clock_t time=clock(); + + float loss = train_network_sgd(net, train, 1); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)labels, classes); + free(base); + free_data(train); +} + +void test_cifar_multi(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + float avg_acc = 0; + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + + float pred[10] = {0}; + + float *p = network_predict(net, im.data); + axpy_cpu(10, 1, p, 1, pred, 1); + flip_image(im); + p = network_predict(net, im.data); + axpy_cpu(10, 1, p, 1, pred, 1); + + int index = max_index(pred, 10); + int class = max_index(test.y.vals[i], 10); + if(index == class) avg_acc += 1; + free_image(im); + printf("%4d: %.2f%%\n", i, 100.*avg_acc/(i+1)); + } +} + +void test_cifar(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + clock_t time; + float avg_acc = 0; + float avg_top5 = 0; + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + time=clock(); + + float *acc = network_accuracies(net, test, 2); + avg_acc += acc[0]; + avg_top5 += acc[1]; + printf("top1: %f, %lf seconds, %d images\n", avg_acc, sec(clock()-time), test.X.rows); + free_data(test); +} + +void extract_cifar() +{ +char *labels[] = {"airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"}; + int i; + data train = load_all_cifar10(); + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + for(i = 0; i < train.X.rows; ++i){ + image im = float_to_image(32, 32, 3, train.X.vals[i]); + int class = max_index(train.y.vals[i], 10); + char buff[256]; + sprintf(buff, "data/cifar/train/%d_%s",i,labels[class]); + save_image_options(im, buff, PNG, 0); + } + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + int class = max_index(test.y.vals[i], 10); + char buff[256]; + sprintf(buff, "data/cifar/test/%d_%s",i,labels[class]); + save_image_options(im, buff, PNG, 0); + } +} + +void test_cifar_csv(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + matrix pred = network_predict_data(net, test); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + flip_image(im); + } + matrix pred2 = network_predict_data(net, test); + scale_matrix(pred, .5); + scale_matrix(pred2, .5); + matrix_add_matrix(pred2, pred); + + matrix_to_csv(pred); + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); +} + +void test_cifar_csvtrain(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + srand(time(0)); + + data test = load_all_cifar10(); + + matrix pred = network_predict_data(net, test); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + flip_image(im); + } + matrix pred2 = network_predict_data(net, test); + scale_matrix(pred, .5); + scale_matrix(pred2, .5); + matrix_add_matrix(pred2, pred); + + matrix_to_csv(pred); + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); +} + +void eval_cifar_csv() +{ + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + matrix pred = csv_to_matrix("results/combined.csv"); + fprintf(stderr, "%d %d\n", pred.rows, pred.cols); + + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); + free_matrix(pred); +} + + +void run_cifar(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_cifar(cfg, weights); + else if(0==strcmp(argv[2], "extract")) extract_cifar(); + else if(0==strcmp(argv[2], "distill")) train_cifar_distill(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_cifar(cfg, weights); + else if(0==strcmp(argv[2], "multi")) test_cifar_multi(cfg, weights); + else if(0==strcmp(argv[2], "csv")) test_cifar_csv(cfg, weights); + else if(0==strcmp(argv[2], "csvtrain")) test_cifar_csvtrain(cfg, weights); + else if(0==strcmp(argv[2], "eval")) eval_cifar_csv(); +} + + diff --git a/workloads/realworld/uvm/darknet/examples/classifier.c b/workloads/realworld/uvm/darknet/examples/classifier.c new file mode 100644 index 0000000000000000000000000000000000000000..932f5a4652022288029617d0722c3bfa3073e536 --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/classifier.c @@ -0,0 +1,1122 @@ +#include "darknet.h" + +#include +#include + +float *get_regression_values(char **labels, int n) +{ + float *v = calloc(n, sizeof(float)); + int i; + for(i = 0; i < n; ++i){ + char *p = strchr(labels[i], ' '); + *p = 0; + v[i] = atof(p+1); + } + return v; +} + +void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + // Ruihao + int tag = option_find_int_quiet(options, "tag", 0); + + char *backup_directory_cfg = option_find_str(options, "backup", "/backup/"); + char *label_list_cfg = option_find_str(options, "labels", "data/labels.list"); + char *train_list_cfg = option_find_str(options, "train", "data/train.list"); + + char *env = getenv("UVMAsyncBench_BASE"); + char backup_directory[256]; + char label_list[256]; + char train_list[256]; + sprintf(backup_directory, "%s/%s", env, backup_directory_cfg); + sprintf(label_list, "%s/%s", env, label_list_cfg); + sprintf(train_list, "%s/%s", env, train_list_cfg); + // Ruihao + char *tree = option_find_str(options, "tree", 0); + if (tree) net->hierarchy = read_tree(tree); + int classes = option_find_int(options, "classes", 2); + + char **labels = 0; + if(!tag){ + labels = get_labels(label_list); + } + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + double time; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.hierarchy = net->hierarchy; + + args.min = net->min_ratio*net->w; + args.max = net->max_ratio*net->w; + printf("%d %d\n", args.min, args.max); + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + + args.paths = paths; + args.classes = classes; + args.n = imgs; + args.m = N; + args.labels = labels; + if (tag){ + args.type = TAG_DATA; + } else { + args.type = CLASSIFICATION_DATA; + } + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int count = 0; + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + if(net->random && count++%40 == 0){ + printf("Resizing\n"); + int dim = (rand() % 11 + 4) * 32; + //if (get_current_batch(net)+200 > net->max_batches) dim = 608; + //int dim = (rand() % 4 + 16) * 32; + printf("%d\n", dim); + args.w = dim; + args.h = dim; + args.size = dim; + args.min = net->min_ratio*dim; + args.max = net->max_ratio*dim; + printf("%d %d\n", args.min, args.max); + + pthread_join(load_thread, 0); + train = buffer; + free_data(train); + load_thread = load_data(args); + + for(i = 0; i < ngpus; ++i){ + resize_network(nets[i], dim, dim); + } + net = nets[0]; + } + time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + pthread_join(load_thread, 0); + + // free_network(net); + if(labels) free_ptrs((void**)labels, classes); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_classifier_crop(char *datacfg, char *filename, char *weightfile) +{ + int i = 0; + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + clock_t time; + float avg_acc = 0; + float avg_topk = 0; + int splits = m/1000; + int num = (i+1)*m/splits - i*m/splits; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + + args.paths = paths; + args.classes = classes; + args.n = num; + args.m = 0; + args.labels = labels; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(i = 1; i <= splits; ++i){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + num = (i+1)*m/splits - i*m/splits; + char **part = paths+(i*m/splits); + if(i != splits){ + args.paths = part; + load_thread = load_data_in_thread(args); + } + printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + float *acc = network_accuracies(net, val, topk); + avg_acc += acc[0]; + avg_topk += acc[1]; + printf("%d: top 1: %f, top %d: %f, %lf seconds, %d images\n", i, avg_acc/i, topk, avg_topk/i, sec(clock()-time), val.X.rows); + free_data(val); + } +} + +void validate_classifier_10(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + int w = net->w; + int h = net->h; + int shift = 32; + image im = load_image_color(paths[i], w+shift, h+shift); + image images[10]; + images[0] = crop_image(im, -shift, -shift, w, h); + images[1] = crop_image(im, shift, -shift, w, h); + images[2] = crop_image(im, 0, 0, w, h); + images[3] = crop_image(im, -shift, shift, w, h); + images[4] = crop_image(im, shift, shift, w, h); + flip_image(im); + images[5] = crop_image(im, -shift, -shift, w, h); + images[6] = crop_image(im, shift, -shift, w, h); + images[7] = crop_image(im, 0, 0, w, h); + images[8] = crop_image(im, -shift, shift, w, h); + images[9] = crop_image(im, shift, shift, w, h); + float *pred = calloc(classes, sizeof(float)); + for(j = 0; j < 10; ++j){ + float *p = network_predict(net, images[j].data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1, 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + free_image(images[j]); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_classifier_full(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + int size = net->w; + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, size); + resize_network(net, resized.w, resized.h); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, resized.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + free_image(im); + free_image(resized); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + + +void validate_classifier_single(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *leaf_list = option_find_str(options, "leaves", 0); + if(leaf_list) change_leaves(net->hierarchy, leaf_list); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image crop = center_crop_image(im, net->w, net->h); + //grayscale_image_3c(crop); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, crop.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + free_image(im); + free_image(crop); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%s, %d, %f, %f, \n", paths[i], class, pred[0], pred[1]); + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_classifier_multi(char *datacfg, char *cfg, char *weights) +{ + int i, j; + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + //int scales[] = {224, 288, 320, 352, 384}; + int scales[] = {224, 256, 288, 320}; + int nscales = sizeof(scales)/sizeof(scales[0]); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + float *pred = calloc(classes, sizeof(float)); + image im = load_image_color(paths[i], 0, 0); + for(j = 0; j < nscales; ++j){ + image r = resize_max(im, scales[j]); + resize_network(net, r.w, r.h); + float *p = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + flip_image(r); + p = network_predict(net, r.data); + axpy_cpu(classes, 1, p, 1, pred, 1); + if(r.data != im.data) free_image(r); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int layer_num) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + int top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image orig = load_image_color(input, 0, 0); + image r = resize_min(orig, 256); + image im = crop_image(r, (r.w - 224 - 1)/2 + 1, (r.h - 224 - 1)/2 + 1, 224, 224); + float mean[] = {0.48263312050943, 0.45230225481413, 0.40099074308742}; + float std[] = {0.22590347483426, 0.22120921437787, 0.22103996251583}; + float var[3]; + var[0] = std[0]*std[0]; + var[1] = std[1]*std[1]; + var[2] = std[2]*std[2]; + + normalize_cpu(im.data, mean, var, 1, 3, im.w*im.h); + + float *X = im.data; + time=clock(); + float *predictions = network_predict(net, X); + + layer l = net->layers[layer_num]; + for(i = 0; i < l.c; ++i){ + if(l.rolling_mean) printf("%f %f %f\n", l.rolling_mean[i], l.rolling_variance[i], l.scales[i]); + } +#ifdef GPU + cuda_pull_array(l.output_gpu, l.output, l.outputs); +#endif + for(i = 0; i < l.outputs; ++i){ + printf("%f\n", l.output[i]); + } + /* + + printf("\n\nWeights\n"); + for(i = 0; i < l.n*l.size*l.size*l.c; ++i){ + printf("%f\n", l.filters[i]); + } + + printf("\n\nBiases\n"); + for(i = 0; i < l.n; ++i){ + printf("%f\n", l.biases[i]); + } + */ + + top_predictions(net, top, indexes); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + printf("%s: %f\n", names[index], predictions[index]); + } + free_image(im); + if (filename) break; + } +} + +void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *name_list_cfg = option_find_str(options, "names", 0); + char *env = getenv("UVMAsyncBench_BASE"); + char name_list[256]; + sprintf(name_list, "%s/%s", env, name_list_cfg); + // if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + // Ruihao + if(top == 0) top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = letterbox_image(im, net->w, net->h); + //image r = resize_min(im, 320); + //printf("%d %d\n", r.w, r.h); + //resize_network(net, r.w, r.h); + //printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + startCPU(); + float *predictions = network_predict(net, X); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + endCPU(); + fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root"); + //else printf("%s: %f\n",names[index], predictions[index]); + printf("%5.2f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void label_classifier(char *datacfg, char *filename, char *weightfile) +{ + int i; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "names", "data/labels.list"); + char *test_list = option_find_str(options, "test", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + + char **labels = get_labels(label_list); + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + for(i = 0; i < m; ++i){ + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + float *pred = network_predict(net, crop.data); + + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + int ind = max_index(pred, classes); + + printf("%s\n", labels[ind]); + } +} + +void csv_classifier(char *datacfg, char *cfgfile, char *weightfile) +{ + int i,j; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *test_list = option_find_str(options, "test", "data/test.list"); + int top = option_find_int(options, "top", 1); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + int *indexes = calloc(top, sizeof(int)); + + for(i = 0; i < m; ++i){ + double time = what_time_is_it_now(); + char *path = paths[i]; + image im = load_image_color(path, 0, 0); + image r = letterbox_image(im, net->w, net->h); + float *predictions = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + + printf("%s", path); + for(j = 0; j < top; ++j){ + printf("\t%d", indexes[j]); + } + printf("\n"); + + free_image(im); + free_image(r); + + fprintf(stderr, "%lf seconds, %d images, %d total\n", what_time_is_it_now() - time, i+1, m); + } +} + +void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_layer) +{ + int curr = 0; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *test_list_cfg = option_find_str(options, "test", "data/test.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char test_list[256]; + sprintf(test_list, "%s/%s", env, test_list_cfg); + // Ruihao + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + m = net->max_batches * net->batch; + free_list(plist); + + clock_t time; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = classes; + args.n = net->batch; + args.m = 0; + args.labels = 0; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(curr = net->batch; curr <= m; curr += net->batch){ + // while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + if(curr < m){ + args.paths = paths + curr; + if (curr + net->batch > m) args.n = m - curr; + load_thread = load_data_in_thread(args); + } + fprintf(stderr, "Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + + int i, j; + if (target_layer >= 0){ + //layer l = net->layers[target_layer]; + } + + // for(i = 0; i < pred.rows; ++i){ + // printf("%s", paths[curr-net->batch+i]); + // for(j = 0; j < pred.cols; ++j){ + // printf("\t%g", pred.vals[i][j]); + // } + // printf("\n"); + // } + + fprintf(stderr, "%lf seconds, %d images, %d total\n", sec(clock()-time), val.X.rows, curr); + free_matrix(pred); + free_data(val); + } +} + +void file_output_classifier(char *datacfg, char *filename, char *weightfile, char *listfile) +{ + int i,j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + //char *label_list = option_find_str(options, "names", "data/labels.list"); + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(listfile); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + for(i = 0; i < m; ++i){ + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + + float *pred = network_predict(net, crop.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 0, 1); + + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + + printf("%s", paths[i]); + for(j = 0; j < classes; ++j){ + printf("\t%g", pred[j]); + } + printf("\n"); + } +} + + +void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + float threat = 0; + float roll = .2; + + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + int top = option_find_int(options, "top", 1); + + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + //cvNamedWindow("Threat", CV_WINDOW_NORMAL); + //cvResizeWindow("Threat", 512, 512); + float fps = 0; + int i; + + int count = 0; + + while(1){ + ++count; + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + if(!in.data) break; + image in_s = resize_image(in, net->w, net->h); + + image out = in; + int x1 = out.w / 20; + int y1 = out.h / 20; + int x2 = 2*x1; + int y2 = out.h - out.h/20; + + int border = .01*out.h; + int h = y2 - y1 - 2*border; + int w = x2 - x1 - 2*border; + + float *predictions = network_predict(net, in_s.data); + float curr_threat = 0; + if(1){ + curr_threat = predictions[0] * 0 + + predictions[1] * .6 + + predictions[2]; + } else { + curr_threat = predictions[218] + + predictions[539] + + predictions[540] + + predictions[368] + + predictions[369] + + predictions[370]; + } + threat = roll * curr_threat + (1-roll) * threat; + + draw_box_width(out, x2 + border, y1 + .02*h, x2 + .5 * w, y1 + .02*h + border, border, 0,0,0); + if(threat > .97) { + draw_box_width(out, x2 + .5 * w + border, + y1 + .02*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .02*h + 3*border, 3*border, 1,0,0); + } + draw_box_width(out, x2 + .5 * w + border, + y1 + .02*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .02*h + 3*border, .5*border, 0,0,0); + draw_box_width(out, x2 + border, y1 + .42*h, x2 + .5 * w, y1 + .42*h + border, border, 0,0,0); + if(threat > .57) { + draw_box_width(out, x2 + .5 * w + border, + y1 + .42*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .42*h + 3*border, 3*border, 1,1,0); + } + draw_box_width(out, x2 + .5 * w + border, + y1 + .42*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .42*h + 3*border, .5*border, 0,0,0); + + draw_box_width(out, x1, y1, x2, y2, border, 0,0,0); + for(i = 0; i < threat * h ; ++i){ + float ratio = (float) i / h; + float r = (ratio < .5) ? (2*(ratio)) : 1; + float g = (ratio < .5) ? 1 : 1 - 2*(ratio - .5); + draw_box_width(out, x1 + border, y2 - border - i, x2 - border, y2 - border - i, 1, r, g, 0); + } + top_predictions(net, top, indexes); + char buff[256]; + sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count); + //save_image(out, buff); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + for(i = 0; i < top; ++i){ + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + } + + if(1){ + show_image(out, "Threat", 10); + } + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + int bad_cats[] = {218, 539, 540, 1213, 1501, 1742, 1911, 2415, 4348, 19223, 368, 369, 370, 1133, 1200, 1306, 2122, 2301, 2537, 2823, 3179, 3596, 3639, 4489, 5107, 5140, 5289, 6240, 6631, 6762, 7048, 7171, 7969, 7984, 7989, 8824, 8927, 9915, 10270, 10448, 13401, 15205, 18358, 18894, 18895, 19249, 19697}; + + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + int top = option_find_int(options, "top", 1); + + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + int i; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = resize_image(in, net->w, net->h); + + float *predictions = network_predict(net, in_s.data); + top_predictions(net, top, indexes); + + printf("\033[2J"); + printf("\033[1;1H"); + + int threat = 0; + for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){ + int index = bad_cats[i]; + if(predictions[index] > .01){ + printf("Threat Detected!\n"); + threat = 1; + break; + } + } + if(!threat) printf("Scanning...\n"); + for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){ + int index = bad_cats[i]; + if(predictions[index] > .01){ + printf("%s\n", names[index]); + } + } + + show_image(in, "Threat Detection", 10); + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + +void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + image **alphabet = load_alphabet(); + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + + int w = 1280; + int h = 720; + void * cap = open_video_stream(filename, cam_index, w, h, 0); + + int top = option_find_int(options, "top", 1); + + char *label_list = option_find_str(options, "labels", 0); + char *name_list = option_find_str(options, "names", label_list); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + int i; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + + float *predictions = network_predict(net, in_s.data); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_predictions(net, top, indexes); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + int lh = in.h*.03; + int toph = 3*lh; + + float rgb[3] = {1,1,1}; + for(i = 0; i < top; ++i){ + printf("%d\n", toph); + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + + char buff[1024]; + sprintf(buff, "%3.1f%%: %s\n", predictions[index]*100, names[index]); + image label = get_label(alphabet, buff, lh); + draw_label(in, toph, lh, label, rgb); + toph += 2*lh; + free_image(label); + } + + show_image(in, base, 10); + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_classifier(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int ngpus; + int *gpus = read_intlist(gpu_list, &ngpus, gpu_index); + + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int top = find_int_arg(argc, argv, "-t", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + char *layer_s = (argc > 7) ? argv[7]: 0; + int layer = layer_s ? atoi(layer_s) : -1; + if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename, top); + else if(0==strcmp(argv[2], "fout")) file_output_classifier(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s)); + else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer); + else if(0==strcmp(argv[2], "csv")) csv_classifier(data, cfg, weights); + else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_classifier_single(data, cfg, weights); + else if(0==strcmp(argv[2], "validmulti")) validate_classifier_multi(data, cfg, weights); + else if(0==strcmp(argv[2], "valid10")) validate_classifier_10(data, cfg, weights); + else if(0==strcmp(argv[2], "validcrop")) validate_classifier_crop(data, cfg, weights); + else if(0==strcmp(argv[2], "validfull")) validate_classifier_full(data, cfg, weights); +} + + diff --git a/workloads/realworld/uvm/darknet/examples/coco.c b/workloads/realworld/uvm/darknet/examples/coco.c new file mode 100644 index 0000000000000000000000000000000000000000..6a50b89abd2abc7fb217b5118034a746f790f690 --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/coco.c @@ -0,0 +1,357 @@ +#include "darknet.h" + +#include + +char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"}; + +int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; + +void train_coco(char *cfgfile, char *weightfile) +{ + //char *train_images = "/home/pjreddie/data/voc/test/train.txt"; + //char *train_images = "/home/pjreddie/data/coco/train.txt"; + char *train_images = "data/coco.trainval.txt"; + //char *train_images = "data/bags.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + layer l = net->layers[net->n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + /* + image im = float_to_image(net->w, net->h, 3, train.X.vals[113]); + image copy = copy_image(im); + draw_coco(copy, train.y.vals[113], 7, "truth"); + cvWaitKey(0); + free_image(copy); + */ + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || (i < 1000 && i%100 == 0)){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +static void print_cocos(FILE *fp, int image_id, detection *dets, int num_boxes, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < num_boxes; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + float bx = xmin; + float by = ymin; + float bw = xmax - xmin; + float bh = ymax - ymin; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); + } + } +} + +int get_coco_image_id(char *filename) +{ + char *p = strrchr(filename, '_'); + return atoi(p+1); +} + +void validate_coco(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/"; + list *plist = get_paths("data/coco_val_5k.list"); + //list *plist = get_paths("/home/pjreddie/data/people-art/test.txt"); + //list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + snprintf(buff, 1024, "%s/coco_results.json", base); + FILE *fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + + int m = plist->size; + int i=0; + int t; + + float thresh = .01; + int nms = 1; + float iou_thresh = .5; + + int nthreads = 8; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.type = IMAGE_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + time_t start = time(0); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + int image_id = get_coco_image_id(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh); + print_cocos(fp, image_id, dets, l.side*l.side*l.n, classes, w, h); + free_detections(dets, nboxes); + free_image(val[t]); + free_image(val_resized[t]); + } + } + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); +} + +void validate_coco_recall(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + int side = l.side; + + int j, k; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + + float thresh = .001; + int nms = 0; + float iou_thresh = .5; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + + int nboxes = 0; + detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, side*side*l.n, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < side*side*l.n; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < side*side*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + free_detections(dets, nboxes); + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free(id); + free_image(orig); + free_image(sized); + } +} + +void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) +{ + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + layer l = net->layers[net->n-1]; + set_batch_network(net, 1); + srand(2222222); + float nms = .4; + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = resize_image(im, net->w, net->h); + float *X = sized.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + + int nboxes = 0; + detection *dets = get_network_boxes(net, 1, 1, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms); + + draw_detections(im, dets, l.side*l.side*l.n, thresh, coco_classes, alphabet, 80); + save_image(im, "prediction"); + show_image(im, "predictions", 0); + free_detections(dets, nboxes); + free_image(im); + free_image(sized); + if (filename) break; + } +} + +void run_coco(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .2); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + int avg = find_int_arg(argc, argv, "-avg", 1); + if(0==strcmp(argv[2], "test")) test_coco(cfg, weights, filename, thresh); + else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights); + else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, 80, frame_skip, prefix, avg, .5, 0,0,0,0); +} diff --git a/workloads/realworld/uvm/darknet/examples/darknet.c b/workloads/realworld/uvm/darknet/examples/darknet.c new file mode 100644 index 0000000000000000000000000000000000000000..f1c5e43b66391a9674c13a193e329cf3dfc26439 --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/darknet.c @@ -0,0 +1,559 @@ +#include "darknet.h" + +// #include "../../../../common/cupti_add.h" +// #include "../../../../common/cpu_timestamps.h" +// #include "cpu_timestamps.h" + +static uint64_t startCPUTime; +static uint64_t endCPUTime; + +void startCPU() +{ + struct timespec tv; + if (clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + startCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); +} + +void endCPU() +{ + struct timespec tv; + if (clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + + endCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); + // endCPUTimestamp1 = std::chrono::system_clock::now(); + printf("CPU_Times,%lu,%lu,%lu\n", startCPUTime, endCPUTime, endCPUTime - startCPUTime); +} + +#ifdef __cplusplus +extern "C" { +#endif +extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top); +extern void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen); +extern void run_yolo(int argc, char **argv); +extern void run_detector(int argc, char **argv); +extern void run_coco(int argc, char **argv); +extern void run_nightmare(int argc, char **argv); +extern void run_classifier(int argc, char **argv); +extern void run_regressor(int argc, char **argv); +extern void run_segmenter(int argc, char **argv); +extern void run_isegmenter(int argc, char **argv); +extern void run_char_rnn(int argc, char **argv); +extern void run_tag(int argc, char **argv); +extern void run_cifar(int argc, char **argv); +extern void run_go(int argc, char **argv); +extern void run_art(int argc, char **argv); +extern void run_super(int argc, char **argv); +extern void run_lsd(int argc, char **argv); +#ifdef __cplusplus +} +#endif + +void average(int argc, char *argv[]) +{ + char *cfgfile = argv[2]; + char *outfile = argv[3]; + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + network *sum = parse_network_cfg(cfgfile); + + char *weightfile = argv[4]; + load_weights(sum, weightfile); + + int i, j; + int n = argc - 5; + for(i = 0; i < n; ++i){ + weightfile = argv[i+5]; + load_weights(net, weightfile); + for(j = 0; j < net->n; ++j){ + layer l = net->layers[j]; + layer out = sum->layers[j]; + if(l.type == CONVOLUTIONAL){ + int num = l.n*l.c*l.size*l.size; + axpy_cpu(l.n, 1, l.biases, 1, out.biases, 1); + axpy_cpu(num, 1, l.weights, 1, out.weights, 1); + if(l.batch_normalize){ + axpy_cpu(l.n, 1, l.scales, 1, out.scales, 1); + axpy_cpu(l.n, 1, l.rolling_mean, 1, out.rolling_mean, 1); + axpy_cpu(l.n, 1, l.rolling_variance, 1, out.rolling_variance, 1); + } + } + if(l.type == CONNECTED){ + axpy_cpu(l.outputs, 1, l.biases, 1, out.biases, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weights, 1, out.weights, 1); + } + } + } + n = n+1; + for(j = 0; j < net->n; ++j){ + layer l = sum->layers[j]; + if(l.type == CONVOLUTIONAL){ + int num = l.n*l.c*l.size*l.size; + scal_cpu(l.n, 1./n, l.biases, 1); + scal_cpu(num, 1./n, l.weights, 1); + if(l.batch_normalize){ + scal_cpu(l.n, 1./n, l.scales, 1); + scal_cpu(l.n, 1./n, l.rolling_mean, 1); + scal_cpu(l.n, 1./n, l.rolling_variance, 1); + } + } + if(l.type == CONNECTED){ + scal_cpu(l.outputs, 1./n, l.biases, 1); + scal_cpu(l.outputs*l.inputs, 1./n, l.weights, 1); + } + } + save_weights(sum, outfile); +} + +long numops(network *net) +{ + int i; + long ops = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + ops += 2l * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w; + } else if(l.type == CONNECTED){ + ops += 2l * l.inputs * l.outputs; + } else if (l.type == RNN){ + ops += 2l * l.input_layer->inputs * l.input_layer->outputs; + ops += 2l * l.self_layer->inputs * l.self_layer->outputs; + ops += 2l * l.output_layer->inputs * l.output_layer->outputs; + } else if (l.type == GRU){ + ops += 2l * l.uz->inputs * l.uz->outputs; + ops += 2l * l.uh->inputs * l.uh->outputs; + ops += 2l * l.ur->inputs * l.ur->outputs; + ops += 2l * l.wz->inputs * l.wz->outputs; + ops += 2l * l.wh->inputs * l.wh->outputs; + ops += 2l * l.wr->inputs * l.wr->outputs; + } else if (l.type == LSTM){ + ops += 2l * l.uf->inputs * l.uf->outputs; + ops += 2l * l.ui->inputs * l.ui->outputs; + ops += 2l * l.ug->inputs * l.ug->outputs; + ops += 2l * l.uo->inputs * l.uo->outputs; + ops += 2l * l.wf->inputs * l.wf->outputs; + ops += 2l * l.wi->inputs * l.wi->outputs; + ops += 2l * l.wg->inputs * l.wg->outputs; + ops += 2l * l.wo->inputs * l.wo->outputs; + } + } + return ops; +} + +void speed(char *cfgfile, int tics) +{ + if (tics == 0) tics = 1000; + network *net = parse_network_cfg(cfgfile); + set_batch_network(net, 1); + int i; + double time=what_time_is_it_now(); + image im = make_image(net->w, net->h, net->c*net->batch); + for(i = 0; i < tics; ++i){ + network_predict(net, im.data); + } + double t = what_time_is_it_now() - time; + long ops = numops(net); + printf("\n%d evals, %f Seconds\n", tics, t); + printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); + printf("FLOPS: %.2f Bn\n", (float)ops/1000000000.*tics/t); + printf("Speed: %f sec/eval\n", t/tics); + printf("Speed: %f Hz\n", tics/t); +} + +void operations(char *cfgfile) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + long ops = numops(net); + printf("Floating Point Operations: %ld\n", ops); + printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); +} + +void oneoff(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + int oldn = net->layers[net->n - 2].n; + int c = net->layers[net->n - 2].c; + scal_cpu(oldn*c, .1, net->layers[net->n - 2].weights, 1); + scal_cpu(oldn, 0, net->layers[net->n - 2].biases, 1); + net->layers[net->n - 2].n = 11921; + net->layers[net->n - 2].biases += 5; + net->layers[net->n - 2].weights += 5*c; + if(weightfile){ + load_weights(net, weightfile); + } + net->layers[net->n - 2].biases -= 5; + net->layers[net->n - 2].weights -= 5*c; + net->layers[net->n - 2].n = oldn; + printf("%d\n", oldn); + layer l = net->layers[net->n - 2]; + copy_cpu(l.n/3, l.biases, 1, l.biases + l.n/3, 1); + copy_cpu(l.n/3, l.biases, 1, l.biases + 2*l.n/3, 1); + copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + l.n/3*l.c, 1); + copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + 2*l.n/3*l.c, 1); + *net->seen = 0; + save_weights(net, outfile); +} + +void oneoff2(char *cfgfile, char *weightfile, char *outfile, int l) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights_upto(net, weightfile, 0, net->n); + load_weights_upto(net, weightfile, l, net->n); + } + *net->seen = 0; + save_weights_upto(net, outfile, net->n); +} + +void partial(char *cfgfile, char *weightfile, char *outfile, int max) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 1); + save_weights_upto(net, outfile, max); +} + +void print_weights(char *cfgfile, char *weightfile, int n) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 1); + layer l = net->layers[n]; + int i, j; + //printf("["); + for(i = 0; i < l.n; ++i){ + //printf("["); + for(j = 0; j < l.size*l.size*l.c; ++j){ + //if(j > 0) printf(","); + printf("%g ", l.weights[i*l.size*l.size*l.c + j]); + } + printf("\n"); + //printf("]%s\n", (i == l.n-1)?"":","); + } + //printf("]"); +} + +void rescale_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + rescale_weights(l, 2, -.5); + break; + } + } + save_weights(net, outfile); +} + +void rgbgr_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + rgbgr_weights(l); + break; + } + } + save_weights(net, outfile); +} + +void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if (l.type == CONVOLUTIONAL && l.batch_normalize) { + denormalize_convolutional_layer(l); + } + if (l.type == CONNECTED && l.batch_normalize) { + denormalize_connected_layer(l); + } + if (l.type == GRU && l.batch_normalize) { + denormalize_connected_layer(*l.input_z_layer); + denormalize_connected_layer(*l.input_r_layer); + denormalize_connected_layer(*l.input_h_layer); + denormalize_connected_layer(*l.state_z_layer); + denormalize_connected_layer(*l.state_r_layer); + denormalize_connected_layer(*l.state_h_layer); + } + } + save_weights(net, outfile); +} + +layer normalize_layer(layer l, int n) +{ + int j; + l.batch_normalize=1; + l.scales = (float *) calloc(n, sizeof(float)); + for(j = 0; j < n; ++j){ + l.scales[j] = 1; + } + l.rolling_mean = (float *) calloc(n, sizeof(float)); + l.rolling_variance = (float *) calloc(n, sizeof(float)); + return l; +} + +void normalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL && !l.batch_normalize){ + net->layers[i] = normalize_layer(l, l.n); + } + if (l.type == CONNECTED && !l.batch_normalize) { + net->layers[i] = normalize_layer(l, l.outputs); + } + if (l.type == GRU && l.batch_normalize) { + *l.input_z_layer = normalize_layer(*l.input_z_layer, l.input_z_layer->outputs); + *l.input_r_layer = normalize_layer(*l.input_r_layer, l.input_r_layer->outputs); + *l.input_h_layer = normalize_layer(*l.input_h_layer, l.input_h_layer->outputs); + *l.state_z_layer = normalize_layer(*l.state_z_layer, l.state_z_layer->outputs); + *l.state_r_layer = normalize_layer(*l.state_r_layer, l.state_r_layer->outputs); + *l.state_h_layer = normalize_layer(*l.state_h_layer, l.state_h_layer->outputs); + net->layers[i].batch_normalize=1; + } + } + save_weights(net, outfile); +} + +void statistics_net(char *cfgfile, char *weightfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if (l.type == CONNECTED && l.batch_normalize) { + printf("Connected Layer %d\n", i); + statistics_connected_layer(l); + } + if (l.type == GRU && l.batch_normalize) { + printf("GRU Layer %d\n", i); + printf("Input Z\n"); + statistics_connected_layer(*l.input_z_layer); + printf("Input R\n"); + statistics_connected_layer(*l.input_r_layer); + printf("Input H\n"); + statistics_connected_layer(*l.input_h_layer); + printf("State Z\n"); + statistics_connected_layer(*l.state_z_layer); + printf("State R\n"); + statistics_connected_layer(*l.state_r_layer); + printf("State H\n"); + statistics_connected_layer(*l.state_h_layer); + } + printf("\n"); + } +} + +void denormalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if ((l.type == DECONVOLUTIONAL || l.type == CONVOLUTIONAL) && l.batch_normalize) { + denormalize_convolutional_layer(l); + net->layers[i].batch_normalize=0; + } + if (l.type == CONNECTED && l.batch_normalize) { + denormalize_connected_layer(l); + net->layers[i].batch_normalize=0; + } + if (l.type == GRU && l.batch_normalize) { + denormalize_connected_layer(*l.input_z_layer); + denormalize_connected_layer(*l.input_r_layer); + denormalize_connected_layer(*l.input_h_layer); + denormalize_connected_layer(*l.state_z_layer); + denormalize_connected_layer(*l.state_r_layer); + denormalize_connected_layer(*l.state_h_layer); + l.input_z_layer->batch_normalize = 0; + l.input_r_layer->batch_normalize = 0; + l.input_h_layer->batch_normalize = 0; + l.state_z_layer->batch_normalize = 0; + l.state_r_layer->batch_normalize = 0; + l.state_h_layer->batch_normalize = 0; + net->layers[i].batch_normalize=0; + } + } + save_weights(net, outfile); +} + +void mkimg(char *cfgfile, char *weightfile, int h, int w, int num, char *prefix) +{ + network *net = load_network(cfgfile, weightfile, 0); + image *ims = get_weights(net->layers[0]); + int n = net->layers[0].n; + int z; + for(z = 0; z < num; ++z){ + image im = make_image(h, w, 3); + fill_image(im, .5); + int i; + for(i = 0; i < 100; ++i){ + image r = copy_image(ims[rand()%n]); + rotate_image_cw(r, rand()%4); + random_distort_image(r, 1, 1.5, 1.5); + int dx = rand()%(w-r.w); + int dy = rand()%(h-r.h); + ghost_image(r, im, dx, dy); + free_image(r); + } + char buff[256]; + sprintf(buff, "%s/gen_%d", prefix, z); + save_image(im, buff); + free_image(im); + } +} + +void visualize(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + visualize_network(net); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsc() +{ + unsigned long a, d; + + __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); + + return (a | (d << 32)); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsp() { + struct timespec tms; + if (clock_gettime(CLOCK_REALTIME, &tms)) { + return -1; + } + unsigned long ns = tms.tv_sec * 1000000000; + ns += tms.tv_nsec; + return ns; +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + //test_resize("data/bad.jpg"); + //test_box(); + //test_convolutional_layer(); + if(argc < 2){ + fprintf(stderr, "usage: %s \n", argv[0]); + return 0; + } + gpu_index = find_int_arg(argc, argv, "-i", GPU_DEVICE); + if(find_arg(argc, argv, "-nogpu")) { + gpu_index = -1; + } + +#ifndef GPU + gpu_index = -1; +#else + if(gpu_index >= 0){ + cuda_set_device(gpu_index); + } + initTrace(); +#endif + + if (0 == strcmp(argv[1], "average")){ + average(argc, argv); + } else if (0 == strcmp(argv[1], "yolo")){ + run_yolo(argc, argv); + } else if (0 == strcmp(argv[1], "super")){ + run_super(argc, argv); + } else if (0 == strcmp(argv[1], "lsd")){ + run_lsd(argc, argv); + } else if (0 == strcmp(argv[1], "detector")){ + run_detector(argc, argv); + } else if (0 == strcmp(argv[1], "detect")){ + float thresh = find_float_arg(argc, argv, "-thresh", .5); + char *filename = (argc > 4) ? argv[4]: 0; + char *outfile = find_char_arg(argc, argv, "-out", 0); + int fullscreen = find_arg(argc, argv, "-fullscreen"); + char *value = getenv("UVMAsyncBench_BASE"); + char buff[256]; + sprintf(buff, "%s/workloads/realworld/standard/darknet/cfg/coco.data", value); + test_detector(buff, argv[2], argv[3], filename, thresh, .5, outfile, fullscreen); + } else if (0 == strcmp(argv[1], "cifar")){ + run_cifar(argc, argv); + } else if (0 == strcmp(argv[1], "go")){ + run_go(argc, argv); + } else if (0 == strcmp(argv[1], "rnn")){ + run_char_rnn(argc, argv); + } else if (0 == strcmp(argv[1], "coco")){ + run_coco(argc, argv); + } else if (0 == strcmp(argv[1], "classify")){ + predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5); + } else if (0 == strcmp(argv[1], "classifier")){ + run_classifier(argc, argv); + } else if (0 == strcmp(argv[1], "regressor")){ + run_regressor(argc, argv); + } else if (0 == strcmp(argv[1], "isegmenter")){ + run_isegmenter(argc, argv); + } else if (0 == strcmp(argv[1], "segmenter")){ + run_segmenter(argc, argv); + } else if (0 == strcmp(argv[1], "art")){ + run_art(argc, argv); + } else if (0 == strcmp(argv[1], "tag")){ + run_tag(argc, argv); + } else if (0 == strcmp(argv[1], "3d")){ + composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0); + } else if (0 == strcmp(argv[1], "test")){ + test_resize(argv[2]); + } else if (0 == strcmp(argv[1], "nightmare")){ + run_nightmare(argc, argv); + } else if (0 == strcmp(argv[1], "rgbgr")){ + rgbgr_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "reset")){ + reset_normalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "denormalize")){ + denormalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "statistics")){ + statistics_net(argv[2], argv[3]); + } else if (0 == strcmp(argv[1], "normalize")){ + normalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "rescale")){ + rescale_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "ops")){ + operations(argv[2]); + } else if (0 == strcmp(argv[1], "speed")){ + speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0); + } else if (0 == strcmp(argv[1], "oneoff")){ + oneoff(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "oneoff2")){ + oneoff2(argv[2], argv[3], argv[4], atoi(argv[5])); + } else if (0 == strcmp(argv[1], "print")){ + print_weights(argv[2], argv[3], atoi(argv[4])); + } else if (0 == strcmp(argv[1], "partial")){ + partial(argv[2], argv[3], argv[4], atoi(argv[5])); + } else if (0 == strcmp(argv[1], "average")){ + average(argc, argv); + } else if (0 == strcmp(argv[1], "visualize")){ + visualize(argv[2], (argc > 3) ? argv[3] : 0); + } else if (0 == strcmp(argv[1], "mkimg")){ + mkimg(argv[2], argv[3], atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), argv[7]); + } else if (0 == strcmp(argv[1], "imtest")){ + test_resize(argv[2]); + } else { + fprintf(stderr, "Not an option: %s\n", argv[1]); + } + finiTrace(); + return 0; +} + diff --git a/workloads/realworld/uvm/darknet/examples/detector-scipy-opencv.py b/workloads/realworld/uvm/darknet/examples/detector-scipy-opencv.py new file mode 100644 index 0000000000000000000000000000000000000000..3bfc591312ad89ff2b026ffac0daecd461c80447 --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/detector-scipy-opencv.py @@ -0,0 +1,56 @@ +# Stupid python path shit. +# Instead just add darknet.py to somewhere in your python path +# OK actually that might not be a great idea, idk, work in progress +# Use at your own risk. or don't, i don't care + +from scipy.misc import imread +import cv2 + +def array_to_image(arr): + arr = arr.transpose(2,0,1) + c = arr.shape[0] + h = arr.shape[1] + w = arr.shape[2] + arr = (arr/255.0).flatten() + data = dn.c_array(dn.c_float, arr) + im = dn.IMAGE(w,h,c,data) + return im + +def detect2(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): + boxes = dn.make_boxes(net) + probs = dn.make_probs(net) + num = dn.num_boxes(net) + dn.network_detect(net, image, thresh, hier_thresh, nms, boxes, probs) + res = [] + for j in range(num): + for i in range(meta.classes): + if probs[j][i] > 0: + res.append((meta.names[i], probs[j][i], (boxes[j].x, boxes[j].y, boxes[j].w, boxes[j].h))) + res = sorted(res, key=lambda x: -x[1]) + dn.free_ptrs(dn.cast(probs, dn.POINTER(dn.c_void_p)), num) + return res + +import sys, os +sys.path.append(os.path.join(os.getcwd(),'python/')) + +import darknet as dn + +# Darknet +net = dn.load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0) +meta = dn.load_meta("cfg/coco.data") +r = dn.detect(net, meta, "data/dog.jpg") +print r + +# scipy +arr= imread('data/dog.jpg') +im = array_to_image(arr) +r = detect2(net, meta, im) +print r + +# OpenCV +arr = cv2.imread('data/dog.jpg') +im = array_to_image(arr) +dn.rgbgr_image(im) +r = detect2(net, meta, im) +print r + diff --git a/workloads/realworld/uvm/darknet/examples/detector.c b/workloads/realworld/uvm/darknet/examples/detector.c new file mode 100644 index 0000000000000000000000000000000000000000..6ff1fcdff3d3d81abb458e001091cf2757b8d837 --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/detector.c @@ -0,0 +1,931 @@ +#include "darknet.h" + +static int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; + + +void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + list *options = read_data_cfg(datacfg); + // Ruihao + char *train_images_cfg = option_find_str(options, "train", "data/train.list"); + char *backup_directory_cfg = option_find_str(options, "backup", "/backup/"); + + char *env = getenv("UVMAsyncBench_BASE"); + char train_images[256]; + char backup_directory[256]; + sprintf(train_images, "%s/%s", env, train_images_cfg); + sprintf(backup_directory, "%s/%s", env, backup_directory_cfg); + // Ruihao + + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network **nets = calloc(ngpus, sizeof(network)); + + srand(time(0)); + int seed = rand(); + int i; + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + data train, buffer; + + layer l = net->layers[net->n - 1]; + + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = get_base_args(net); + args.coords = l.coords; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = l.max_boxes; + args.d = &buffer; + args.type = DETECTION_DATA; + //args.type = INSTANCE_DATA; + args.threads = 64; + + pthread_t load_thread = load_data(args); + double time; + int count = 0; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + if(l.random && count++%10 == 0){ + printf("Resizing\n"); + int dim = (rand() % 10 + 10) * 32; + if (get_current_batch(net)+200 > net->max_batches) dim = 608; + //int dim = (rand() % 4 + 16) * 32; + printf("%d\n", dim); + args.w = dim; + args.h = dim; + + pthread_join(load_thread, 0); + train = buffer; + free_data(train); + load_thread = load_data(args); + + #pragma omp parallel for + for(i = 0; i < ngpus; ++i){ + resize_network(nets[i], dim, dim); + } + net = nets[0]; + } + time=what_time_is_it_now(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + /* + int k; + for(k = 0; k < l.max_boxes; ++k){ + box b = float_to_box(train.y.vals[10] + 1 + k*5); + if(!b.x) break; + printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h); + } + */ + /* + int zz; + for(zz = 0; zz < train.X.cols; ++zz){ + image im = float_to_image(net->w, net->h, 3, train.X.vals[zz]); + int k; + for(k = 0; k < l.max_boxes; ++k){ + box b = float_to_box(train.y.vals[zz] + k*5, 1); + printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + draw_bbox(im, b, 1, 1,0,0); + } + show_image(im, "truth11"); + cvWaitKey(0); + save_image(im, "truth11"); + } + */ + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + + time=what_time_is_it_now(); + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + i = get_current_batch(net); + printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, i*imgs); + if(i%100==0){ +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + if(i%10000==0 || (i < 1000 && i%100 == 0)){ +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + + +static int get_coco_image_id(char *filename) +{ + char *p = strrchr(filename, '/'); + char *c = strrchr(filename, '_'); + if(c) p = c; + return atoi(p+1); +} + +static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_boxes, int classes, int w, int h) +{ + int i, j; + int image_id = get_coco_image_id(image_path); + for(i = 0; i < num_boxes; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + float bx = xmin; + float by = ymin; + float bw = xmax - xmin; + float bh = ymax - ymin; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); + } + } +} + +void print_detector_detections(FILE **fps, char *id, detection *dets, int total, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2. + 1; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2. + 1; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2. + 1; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2. + 1; + + if (xmin < 1) xmin = 1; + if (ymin < 1) ymin = 1; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], + xmin, ymin, xmax, ymax); + } + } +} + +void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + int class = j; + if (dets[i].prob[class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j+1, dets[i].prob[class], + xmin, ymin, xmax, ymax); + } + } +} + +void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char *outfile) +{ + int j; + list *options = read_data_cfg(datacfg); + char *valid_images = option_find_str(options, "valid", "data/train.list"); + char *name_list = option_find_str(options, "names", "data/names.list"); + char *prefix = option_find_str(options, "results", "results"); + char **names = get_labels(name_list); + char *mapf = option_find_str(options, "map", 0); + int *map = 0; + if (mapf) map = read_map(mapf); + + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 2); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths(valid_images); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + char *type = option_find_str(options, "eval", "voc"); + FILE *fp = 0; + FILE **fps = 0; + int coco = 0; + int imagenet = 0; + if(0==strcmp(type, "coco")){ + if(!outfile) outfile = "coco_results"; + snprintf(buff, 1024, "%s/%s.json", prefix, outfile); + fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + coco = 1; + } else if(0==strcmp(type, "imagenet")){ + if(!outfile) outfile = "imagenet-detection"; + snprintf(buff, 1024, "%s/%s.txt", prefix, outfile); + fp = fopen(buff, "w"); + imagenet = 1; + classes = 200; + } else { + if(!outfile) outfile = "comp4_det_test_"; + fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); + fps[j] = fopen(buff, "w"); + } + } + + int m = plist->size; + int i=0; + int t; + + float thresh = .005; + float nms = .45; + + int nthreads = 4; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + image input = make_image(net->w, net->h, net->c*2); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + //args.type = IMAGE_DATA; + args.type = LETTERBOX_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + double start = what_time_is_it_now(); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data, 1); + flip_image(val_resized[t]); + copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data + net->w*net->h*net->c, 1); + + network_predict(net, input.data); + int w = val[t].w; + int h = val[t].h; + int num = 0; + detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &num); + if (nms) do_nms_sort(dets, num, classes, nms); + if (coco){ + print_cocos(fp, path, dets, num, classes, w, h); + } else if (imagenet){ + print_imagenet_detections(fp, i+t-nthreads+1, dets, num, classes, w, h); + } else { + print_detector_detections(fps, id, dets, num, classes, w, h); + } + free_detections(dets, num); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + for(j = 0; j < classes; ++j){ + if(fps) fclose(fps[j]); + } + if(coco){ + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start); +} + + +void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile) +{ + int j; + list *options = read_data_cfg(datacfg); + char *valid_images = option_find_str(options, "valid", "data/train.list"); + char *name_list = option_find_str(options, "names", "data/names.list"); + char *prefix = option_find_str(options, "results", "results"); + char **names = get_labels(name_list); + char *mapf = option_find_str(options, "map", 0); + int *map = 0; + if (mapf) map = read_map(mapf); + + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths(valid_images); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + char *type = option_find_str(options, "eval", "voc"); + FILE *fp = 0; + FILE **fps = 0; + int coco = 0; + int imagenet = 0; + if(0==strcmp(type, "coco")){ + if(!outfile) outfile = "coco_results"; + snprintf(buff, 1024, "%s/%s.json", prefix, outfile); + fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + coco = 1; + } else if(0==strcmp(type, "imagenet")){ + if(!outfile) outfile = "imagenet-detection"; + snprintf(buff, 1024, "%s/%s.txt", prefix, outfile); + fp = fopen(buff, "w"); + imagenet = 1; + classes = 200; + } else { + if(!outfile) outfile = "comp4_det_test_"; + fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); + fps[j] = fopen(buff, "w"); + } + } + + + int m = plist->size; + int i=0; + int t; + + float thresh = .005; + float nms = .45; + + int nthreads = 4; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + //args.type = IMAGE_DATA; + args.type = LETTERBOX_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + double start = what_time_is_it_now(); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &nboxes); + if (nms) do_nms_sort(dets, nboxes, classes, nms); + if (coco){ + print_cocos(fp, path, dets, nboxes, classes, w, h); + } else if (imagenet){ + print_imagenet_detections(fp, i+t-nthreads+1, dets, nboxes, classes, w, h); + } else { + print_detector_detections(fps, id, dets, nboxes, classes, w, h); + } + free_detections(dets, nboxes); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + for(j = 0; j < classes; ++j){ + if(fps) fclose(fps[j]); + } + if(coco){ + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start); +} + +void validate_detector_recall(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths("data/coco_val_5k.list"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + + int j, k; + + int m = plist->size; + int i=0; + + float thresh = .001; + float iou_thresh = .5; + float nms = .4; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + int nboxes = 0; + detection *dets = get_network_boxes(net, sized.w, sized.h, thresh, .5, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, nboxes, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < nboxes; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < l.w*l.h*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free(id); + free_image(orig); + free_image(sized); + } +} + + +void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen) +{ + list *options = read_data_cfg(datacfg); + // Ruihao + char *name_list_cfg = option_find_str(options, "names", "data/names.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char name_list[256]; + sprintf(name_list, "%s/%s", env, name_list_cfg); + // Ruihao + char **names = get_labels(name_list); + + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + double time; + char buff[256]; + char *input = buff; + float nms=.45; + while(1){ + printf("fine name is %s\n", filename); + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = letterbox_image(im, net->w, net->h); + //image sized = resize_image(im, net->w, net->h); + //image sized2 = resize_max(im, net->w); + //image sized = crop_image(sized2, -((net->w - sized2.w)/2), -((net->h - sized2.h)/2), net->w, net->h); + //resize_network(net, sized.w, sized.h); + layer l = net->layers[net->n-1]; + + + float *X = sized.data; + time=what_time_is_it_now(); + startCPU(); + network_predict(net, X); + endCPU(); + printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time); + int nboxes = 0; + detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes); + //printf("%d\n", nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes); + free_detections(dets, nboxes); + if(outfile){ + save_image(im, outfile); + } + else{ + save_image(im, "predictions"); +#ifdef OPENCV + make_window("predictions", 512, 512, 0); + show_image(im, "predictions", 0); +#endif + } + + free_image(im); + free_image(sized); + if (filename) break; + } +} + +/* +void censor_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + CvCapture * cap; + + int w = 1280; + int h = 720; + + if(filename){ + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + } + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + cvNamedWindow(base, CV_WINDOW_NORMAL); + cvResizeWindow(base, 512, 512); + float fps = 0; + int i; + float nms = .45; + + while(1){ + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + layer l = net->layers[net->n-1]; + + float *X = in_s.data; + network_predict(net, X); + int nboxes = 0; + detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 0, &nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + + for(i = 0; i < nboxes; ++i){ + if(dets[i].prob[class] > thresh){ + box b = dets[i].bbox; + int left = b.x-b.w/2.; + int top = b.y-b.h/2.; + censor_image(in, left, top, b.w, b.h); + } + } + show_image(in, base); + cvWaitKey(10); + free_detections(dets, nboxes); + + + free_image(in_s); + free_image(in); + + + float curr = 0; + fps = .9*fps + .1*curr; + for(i = 0; i < skip; ++i){ + image in = get_image_from_stream(cap); + free_image(in); + } + } + #endif +} + +void extract_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + CvCapture * cap; + + int w = 1280; + int h = 720; + + if(filename){ + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + } + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + cvNamedWindow(base, CV_WINDOW_NORMAL); + cvResizeWindow(base, 512, 512); + float fps = 0; + int i; + int count = 0; + float nms = .45; + + while(1){ + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + layer l = net->layers[net->n-1]; + + show_image(in, base); + + int nboxes = 0; + float *X = in_s.data; + network_predict(net, X); + detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 1, &nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + + for(i = 0; i < nboxes; ++i){ + if(dets[i].prob[class] > thresh){ + box b = dets[i].bbox; + int size = b.w*in.w > b.h*in.h ? b.w*in.w : b.h*in.h; + int dx = b.x*in.w-size/2.; + int dy = b.y*in.h-size/2.; + image bim = crop_image(in, dx, dy, size, size); + char buff[2048]; + sprintf(buff, "results/extract/%07d", count); + ++count; + save_image(bim, buff); + free_image(bim); + } + } + free_detections(dets, nboxes); + + + free_image(in_s); + free_image(in); + + + float curr = 0; + fps = .9*fps + .1*curr; + for(i = 0; i < skip; ++i){ + image in = get_image_from_stream(cap); + free_image(in); + } + } + #endif +} +*/ + +/* +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets) +{ + network_predict_image(net, im); + layer l = net->layers[net->n-1]; + int nboxes = num_boxes(net); + fill_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 0, dets); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); +} +*/ + +void infer_detector(char *datacfg, char *cfgfile, char *weightfile) +{ + int curr = 0; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *test_list_cfg = option_find_str(options, "valid", "data/valid.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char test_list[256]; + sprintf(test_list, "%s/%s", env, test_list_cfg); + // Ruihao + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + m = net->max_batches * net->batch; + free_list(plist); + + clock_t time; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = classes; + args.n = net->batch; + args.m = 0; + args.labels = 0; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(curr = net->batch; curr <= m; curr += net->batch){ + // while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + if(curr < m){ + args.paths = paths + curr; + if (curr + net->batch > m) args.n = m - curr; + load_thread = load_data_in_thread(args); + } + fprintf(stderr, "Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + + fprintf(stderr, "%lf seconds, %d images, %d total\n", sec(clock()-time), val.X.rows, curr); + free_matrix(pred); + free_data(val); + } +} + +void run_detector(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .5); + float hier_thresh = find_float_arg(argc, argv, "-hier", .5); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + int avg = find_int_arg(argc, argv, "-avg", 3); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + char *outfile = find_char_arg(argc, argv, "-out", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int clear = find_arg(argc, argv, "-clear"); + int fullscreen = find_arg(argc, argv, "-fullscreen"); + int width = find_int_arg(argc, argv, "-w", 0); + int height = find_int_arg(argc, argv, "-h", 0); + int fps = find_int_arg(argc, argv, "-fps", 0); + //int class = find_int_arg(argc, argv, "-class", 0); + + char *datacfg = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen); + else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile); + // Ruihao + else if(0==strcmp(argv[2], "infer")) infer_detector(datacfg, cfg, weights); + // Ruihao + else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile); + else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) { + list *options = read_data_cfg(datacfg); + int classes = option_find_int(options, "classes", 20); + char *name_list = option_find_str(options, "names", "data/names.list"); + char **names = get_labels(name_list); + demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen); + } + //else if(0==strcmp(argv[2], "extract")) extract_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); + //else if(0==strcmp(argv[2], "censor")) censor_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); +} diff --git a/workloads/realworld/uvm/darknet/examples/detector.py b/workloads/realworld/uvm/darknet/examples/detector.py new file mode 100644 index 0000000000000000000000000000000000000000..40bb365e68211c513db9d63847ac95070f5eab98 --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/detector.py @@ -0,0 +1,27 @@ +# Stupid python path shit. +# Instead just add darknet.py to somewhere in your python path +# OK actually that might not be a great idea, idk, work in progress +# Use at your own risk. or don't, i don't care + +import sys, os +sys.path.append(os.path.join(os.getcwd(),'python/')) + +import darknet as dn +import pdb + +dn.set_gpu(0) +net = dn.load_net("cfg/yolo-thor.cfg", "/home/pjreddie/backup/yolo-thor_final.weights", 0) +meta = dn.load_meta("cfg/thor.data") +r = dn.detect(net, meta, "data/bedroom.jpg") +print r + +# And then down here you could detect a lot more images like: +r = dn.detect(net, meta, "data/eagle.jpg") +print r +r = dn.detect(net, meta, "data/giraffe.jpg") +print r +r = dn.detect(net, meta, "data/horses.jpg") +print r +r = dn.detect(net, meta, "data/person.jpg") +print r + diff --git a/workloads/realworld/uvm/darknet/examples/dice.c b/workloads/realworld/uvm/darknet/examples/dice.c new file mode 100644 index 0000000000000000000000000000000000000000..f56d76c0bb66c7f630ba1c4d1dc9195398b87cfb --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/dice.c @@ -0,0 +1,116 @@ +#include "darknet.h" + +char *dice_labels[] = {"face1","face2","face3","face4","face5","face6"}; + +void train_dice(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + int i = *net.seen/imgs; + char **labels = dice_labels; + list *plist = get_paths("data/dice/dice.train.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + while(1){ + ++i; + time=clock(); + data train = load_data_old(paths, imgs, plist->size, labels, 6, net.w, net.h); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); + free_data(train); + if((i % 100) == 0) net.learning_rate *= .1; + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, i); + save_weights(net, buff); + } + } +} + +void validate_dice(char *filename, char *weightfile) +{ + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + + char **labels = dice_labels; + list *plist = get_paths("data/dice/dice.val.list"); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + data val = load_data_old(paths, m, 0, labels, 6, net.w, net.h); + float *acc = network_accuracies(net, val, 2); + printf("Validation Accuracy: %f, %d images\n", acc[0], m); + free_data(val); +} + +void test_dice(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + int i = 0; + char **names = dice_labels; + char buff[256]; + char *input = buff; + int indexes[6]; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, net.w, net.h); + float *X = im.data; + float *predictions = network_predict(net, X); + top_predictions(net, 6, indexes); + for(i = 0; i < 6; ++i){ + int index = indexes[i]; + printf("%s: %f\n", names[index], predictions[index]); + } + free_image(im); + if (filename) break; + } +} + +void run_dice(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "test")) test_dice(cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_dice(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_dice(cfg, weights); +} + diff --git a/workloads/realworld/uvm/darknet/examples/go.c b/workloads/realworld/uvm/darknet/examples/go.c new file mode 100644 index 0000000000000000000000000000000000000000..688579dcb3a3e35e9a79b8fb8aa684f28f44290d --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/go.c @@ -0,0 +1,1370 @@ +#include "darknet.h" + +#include +#include +#include + +int inverted = 1; +int noi = 1; +static const int nind = 10; +int legal_go(float *b, float *ko, int p, int r, int c); +int check_ko(float *x, float *ko); + +typedef struct { + char **data; + int n; +} moves; + +char *fgetgo(FILE *fp) +{ + if(feof(fp)) return 0; + size_t size = 96; + char *line = malloc(size*sizeof(char)); + if(size != fread(line, sizeof(char), size, fp)){ + free(line); + return 0; + } + + return line; +} + +moves load_go_moves(char *filename) +{ + moves m; + m.n = 128; + m.data = calloc(128, sizeof(char*)); + FILE *fp = fopen(filename, "rb"); + int count = 0; + char *line = 0; + while ((line = fgetgo(fp))) { + if (count >= m.n) { + m.n *= 2; + m.data = realloc(m.data, m.n*sizeof(char*)); + } + m.data[count] = line; + ++count; + } + printf("%d\n", count); + m.n = count; + m.data = realloc(m.data, count*sizeof(char*)); + return m; +} + +void string_to_board(char *s, float *board) +{ + int i, j; + memset(board, 0, 2*19*19*sizeof(float)); + int count = 0; + for(i = 0; i < 91; ++i){ + char c = s[i]; + for(j = 0; j < 4; ++j){ + int me = (c >> (2*j)) & 1; + int you = (c >> (2*j + 1)) & 1; + if (me) board[count] = 1; + else if (you) board[count + 19*19] = 1; + ++count; + if(count >= 19*19) break; + } + } +} + +void board_to_string(char *s, float *board) +{ + int i, j; + memset(s, 0, (19*19/4+1)*sizeof(char)); + int count = 0; + for(i = 0; i < 91; ++i){ + for(j = 0; j < 4; ++j){ + int me = (board[count] == 1); + int you = (board[count + 19*19] == 1); + if (me) s[i] = s[i] | (1<<(2*j)); + if (you) s[i] = s[i] | (1<<(2*j + 1)); + ++count; + if(count >= 19*19) break; + } + } +} + +static int occupied(float *b, int i) +{ + if (b[i]) return 1; + if (b[i+19*19]) return -1; + return 0; +} + +data random_go_moves(moves m, int n) +{ + data d = {0}; + d.X = make_matrix(n, 19*19*3); + d.y = make_matrix(n, 19*19+2); + int i, j; + for(i = 0; i < n; ++i){ + float *board = d.X.vals[i]; + float *label = d.y.vals[i]; + char *b = m.data[rand()%m.n]; + int player = b[0] - '0'; + int result = b[1] - '0'; + int row = b[2]; + int col = b[3]; + string_to_board(b+4, board); + if(player > 0) for(j = 0; j < 19*19; ++j) board[19*19*2 + j] = 1; + label[19*19+1] = (player==result); + if(row >= 19 || col >= 19){ + label[19*19] = 1; + } else { + label[col + 19*row] = 1; + if(occupied(board, col + 19*row)) printf("hey\n"); + } + + int flip = rand()%2; + int rotate = rand()%4; + image in = float_to_image(19, 19, 3, board); + image out = float_to_image(19, 19, 1, label); + if(flip){ + flip_image(in); + flip_image(out); + } + rotate_image_cw(in, rotate); + rotate_image_cw(out, rotate); + } + return d; +} + + +void train_go(char *cfgfile, char *weightfile, char *filename, int *gpus, int ngpus, int clear) +{ + int i; + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + network *net = nets[0]; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + + char buff[256]; + moves m = load_go_moves(filename); + //moves m = load_go_moves("games.txt"); + + int N = m.n; + printf("Moves: %d\n", N); + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time=what_time_is_it_now(); + + data train = random_go_moves(m, net->batch*net->subdivisions*ngpus); + printf("Loaded: %lf seconds\n", what_time_is_it_now() - time); + time=what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 10); + } +#else + loss = train_network(net, train); +#endif + free_data(train); + + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory,base, epoch); + save_weights(net, buff); + + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + if(get_current_batch(net)%10000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_%ld.backup",backup_directory,base,get_current_batch(net)); + save_weights(net, buff); + } + } + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free(base); +} + +static void propagate_liberty(float *board, int *lib, int *visited, int row, int col, int side) +{ + if (row < 0 || row > 18 || col < 0 || col > 18) return; + int index = row*19 + col; + if (occupied(board,index) != side) return; + if (visited[index]) return; + visited[index] = 1; + lib[index] += 1; + propagate_liberty(board, lib, visited, row+1, col, side); + propagate_liberty(board, lib, visited, row-1, col, side); + propagate_liberty(board, lib, visited, row, col+1, side); + propagate_liberty(board, lib, visited, row, col-1, side); +} + + +static int *calculate_liberties(float *board) +{ + int *lib = calloc(19*19, sizeof(int)); + int visited[19*19]; + int i, j; + for(j = 0; j < 19; ++j){ + for(i = 0; i < 19; ++i){ + memset(visited, 0, 19*19*sizeof(int)); + int index = j*19 + i; + if(!occupied(board,index)){ + if ((i > 0) && occupied(board,index - 1)) propagate_liberty(board, lib, visited, j, i-1, occupied(board,index-1)); + if ((i < 18) && occupied(board,index + 1)) propagate_liberty(board, lib, visited, j, i+1, occupied(board,index+1)); + if ((j > 0) && occupied(board,index - 19)) propagate_liberty(board, lib, visited, j-1, i, occupied(board,index-19)); + if ((j < 18) && occupied(board,index + 19)) propagate_liberty(board, lib, visited, j+1, i, occupied(board,index+19)); + } + } + } + return lib; +} + +void print_board(FILE *stream, float *board, int player, int *indexes) +{ + int i,j,n; + fprintf(stream, " "); + for(i = 0; i < 19; ++i){ + fprintf(stream, "%c ", 'A' + i + 1*(i > 7 && noi)); + } + fprintf(stream, "\n"); + for(j = 0; j < 19; ++j){ + fprintf(stream, "%2d", (inverted) ? 19-j : j+1); + for(i = 0; i < 19; ++i){ + int index = j*19 + i; + if(indexes){ + int found = 0; + for(n = 0; n < nind; ++n){ + if(index == indexes[n]){ + found = 1; + /* + if(n == 0) fprintf(stream, "\uff11"); + else if(n == 1) fprintf(stream, "\uff12"); + else if(n == 2) fprintf(stream, "\uff13"); + else if(n == 3) fprintf(stream, "\uff14"); + else if(n == 4) fprintf(stream, "\uff15"); + */ + fprintf(stream, " %d", n+1); + } + } + if(found) continue; + } + //if(board[index]*-swap > 0) fprintf(stream, "\u25C9 "); + //else if(board[index]*-swap < 0) fprintf(stream, "\u25EF "); + if (occupied(board, index) == player) fprintf(stream, " X"); + else if (occupied(board, index) ==-player) fprintf(stream, " O"); + else fprintf(stream, " ."); + } + fprintf(stream, "\n"); + } +} + +void flip_board(float *board) +{ + int i; + for(i = 0; i < 19*19; ++i){ + float swap = board[i]; + board[i] = board[i+19*19]; + board[i+19*19] = swap; + board[i+19*19*2] = 1-board[i+19*19*2]; + } +} + +float predict_move2(network *net, float *board, float *move, int multi) +{ + float *output = network_predict(net, board); + copy_cpu(19*19+1, output, 1, move, 1); + float result = output[19*19 + 1]; + int i; + if(multi){ + image bim = float_to_image(19, 19, 3, board); + for(i = 1; i < 8; ++i){ + rotate_image_cw(bim, i); + if(i >= 4) flip_image(bim); + + float *output = network_predict(net, board); + image oim = float_to_image(19, 19, 1, output); + result += output[19*19 + 1]; + + if(i >= 4) flip_image(oim); + rotate_image_cw(oim, -i); + + axpy_cpu(19*19+1, 1, output, 1, move, 1); + + if(i >= 4) flip_image(bim); + rotate_image_cw(bim, -i); + } + result = result/8; + scal_cpu(19*19+1, 1./8., move, 1); + } + for(i = 0; i < 19*19; ++i){ + if(board[i] || board[i+19*19]) move[i] = 0; + } + return result; +} + +static void remove_connected(float *b, int *lib, int p, int r, int c) +{ + if (r < 0 || r >= 19 || c < 0 || c >= 19) return; + if (occupied(b, r*19 + c) != p) return; + if (lib[r*19 + c] != 1) return; + b[r*19 + c] = 0; + b[19*19 + r*19 + c] = 0; + remove_connected(b, lib, p, r+1, c); + remove_connected(b, lib, p, r-1, c); + remove_connected(b, lib, p, r, c+1); + remove_connected(b, lib, p, r, c-1); +} + + +void move_go(float *b, int p, int r, int c) +{ + int *l = calculate_liberties(b); + if(p > 0) b[r*19 + c] = 1; + else b[19*19 + r*19 + c] = 1; + remove_connected(b, l, -p, r+1, c); + remove_connected(b, l, -p, r-1, c); + remove_connected(b, l, -p, r, c+1); + remove_connected(b, l, -p, r, c-1); + free(l); +} + +int compare_board(float *a, float *b) +{ + if(memcmp(a, b, 19*19*3*sizeof(float)) == 0) return 1; + return 0; +} + +typedef struct mcts_tree{ + float *board; + struct mcts_tree **children; + float *prior; + int *visit_count; + float *value; + float *mean; + float *prob; + int total_count; + float result; + int done; + int pass; +} mcts_tree; + +void free_mcts(mcts_tree *root) +{ + if(!root) return; + int i; + free(root->board); + for(i = 0; i < 19*19+1; ++i){ + if(root->children[i]) free_mcts(root->children[i]); + } + free(root->children); + free(root->prior); + free(root->visit_count); + free(root->value); + free(root->mean); + free(root->prob); + free(root); +} + +float *network_predict_rotations(network *net, float *next) +{ + int n = net->batch; + float *in = calloc(19*19*3*n, sizeof(float)); + image im = float_to_image(19, 19, 3, next); + int i,j; + int *inds = random_index_order(0, 8); + for(j = 0; j < n; ++j){ + i = inds[j]; + rotate_image_cw(im, i); + if(i >= 4) flip_image(im); + memcpy(in + 19*19*3*j, im.data, 19*19*3*sizeof(float)); + if(i >= 4) flip_image(im); + rotate_image_cw(im, -i); + } + float *pred = network_predict(net, in); + for(j = 0; j < n; ++j){ + i = inds[j]; + image im = float_to_image(19, 19, 1, pred + j*(19*19 + 2)); + if(i >= 4) flip_image(im); + rotate_image_cw(im, -i); + if(j > 0){ + axpy_cpu(19*19+2, 1, im.data, 1, pred, 1); + } + } + free(in); + free(inds); + scal_cpu(19*19+2, 1./n, pred, 1); + return pred; +} + +mcts_tree *expand(float *next, float *ko, network *net) +{ + mcts_tree *root = calloc(1, sizeof(mcts_tree)); + root->board = next; + root->children = calloc(19*19+1, sizeof(mcts_tree*)); + root->prior = calloc(19*19 + 1, sizeof(float)); + root->prob = calloc(19*19 + 1, sizeof(float)); + root->mean = calloc(19*19 + 1, sizeof(float)); + root->value = calloc(19*19 + 1, sizeof(float)); + root->visit_count = calloc(19*19 + 1, sizeof(int)); + root->total_count = 1; + int i; + float *pred = network_predict_rotations(net, next); + copy_cpu(19*19+1, pred, 1, root->prior, 1); + float val = 2*pred[19*19 + 1] - 1; + root->result = val; + for(i = 0; i < 19*19+1; ++i) { + root->visit_count[i] = 0; + root->value[i] = 0; + root->mean[i] = val; + if(i < 19*19 && occupied(next, i)){ + root->value[i] = -1; + root->mean[i] = -1; + root->prior[i] = 0; + } + } + //print_board(stderr, next, flip?-1:1, 0); + return root; +} + +float *copy_board(float *board) +{ + float *next = calloc(19*19*3, sizeof(float)); + copy_cpu(19*19*3, board, 1, next, 1); + return next; +} + +float select_mcts(mcts_tree *root, network *net, float *prev, float cpuct) +{ + if(root->done) return -root->result; + int i; + float max = -1000; + int max_i = 0; + for(i = 0; i < 19*19+1; ++i){ + root->prob[i] = root->mean[i] + cpuct*root->prior[i] * sqrt(root->total_count) / (1. + root->visit_count[i]); + if(root->prob[i] > max){ + max = root->prob[i]; + max_i = i; + } + } + float val; + i = max_i; + root->visit_count[i]++; + root->total_count++; + if (root->children[i]) { + val = select_mcts(root->children[i], net, root->board, cpuct); + } else { + if(max_i < 19*19 && !legal_go(root->board, prev, 1, max_i/19, max_i%19)) { + root->mean[i] = -1; + root->value[i] = -1; + root->prior[i] = 0; + --root->total_count; + return select_mcts(root, net, prev, cpuct); + //printf("Detected ko\n"); + //getchar(); + } else { + float *next = copy_board(root->board); + if (max_i < 19*19) { + move_go(next, 1, max_i / 19, max_i % 19); + } + flip_board(next); + root->children[i] = expand(next, root->board, net); + val = -root->children[i]->result; + if(max_i == 19*19){ + root->children[i]->pass = 1; + if (root->pass){ + root->children[i]->done = 1; + } + } + } + } + root->value[i] += val; + root->mean[i] = root->value[i]/root->visit_count[i]; + return -val; +} + +mcts_tree *run_mcts(mcts_tree *tree, network *net, float *board, float *ko, int player, int n, float cpuct, float secs) +{ + int i; + double t = what_time_is_it_now(); + if(player < 0) flip_board(board); + if(!tree) tree = expand(copy_board(board), ko, net); + assert(compare_board(tree->board, board)); + for(i = 0; i < n; ++i){ + if (secs > 0 && (what_time_is_it_now() - t) > secs) break; + int max_i = max_int_index(tree->visit_count, 19*19+1); + if (tree->visit_count[max_i] >= n) break; + select_mcts(tree, net, ko, cpuct); + } + if(player < 0) flip_board(board); + //fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + return tree; +} + +mcts_tree *move_mcts(mcts_tree *tree, int index) +{ + if(index < 0 || index > 19*19 || !tree || !tree->children[index]) { + free_mcts(tree); + tree = 0; + } else { + mcts_tree *swap = tree; + tree = tree->children[index]; + swap->children[index] = 0; + free_mcts(swap); + } + return tree; +} + +typedef struct { + float value; + float mcts; + int row; + int col; +} move; + +move pick_move(mcts_tree *tree, float temp, int player) +{ + int i; + float probs[19*19+1] = {0}; + move m = {0}; + double sum = 0; + /* + for(i = 0; i < 19*19+1; ++i){ + probs[i] = tree->visit_count[i]; + } + */ + //softmax(probs, 19*19+1, temp, 1, probs); + for(i = 0; i < 19*19+1; ++i){ + sum += pow(tree->visit_count[i], 1./temp); + } + for(i = 0; i < 19*19+1; ++i){ + probs[i] = pow(tree->visit_count[i], 1./temp) / sum; + } + + int index = sample_array(probs, 19*19+1); + m.row = index / 19; + m.col = index % 19; + m.value = (tree->result+1.)/2.; + m.mcts = (tree->mean[index]+1.)/2.; + + int indexes[nind]; + top_k(probs, 19*19+1, nind, indexes); + print_board(stderr, tree->board, player, indexes); + + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", index/19, index%19, tree->result, tree->prior[index], probs[index], tree->mean[index], (tree->children[index])?tree->children[index]->result:0, tree->visit_count[index]); + int ind = max_index(probs, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, tree->result, tree->prior[ind], probs[ind], tree->mean[ind], (tree->children[ind])?tree->children[ind]->result:0, tree->visit_count[ind]); + ind = max_index(tree->prior, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, tree->result, tree->prior[ind], probs[ind], tree->mean[ind], (tree->children[ind])?tree->children[ind]->result:0, tree->visit_count[ind]); + return m; +} + +/* + float predict_move(network *net, float *board, float *move, int multi, float *ko, float temp) + { + + int i; + + int max_v = 0; + int max_i = 0; + for(i = 0; i < 19*19+1; ++i){ + if(root->visit_count[i] > max_v){ + max_v = root->visit_count[i]; + max_i = i; + } + } + fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + int ind = max_index(root->mean, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", max_i/19, max_i%19, root->result, root->prior[max_i], root->prob[max_i], root->mean[max_i], (root->children[max_i])?root->children[max_i]->result:0, root->visit_count[max_i]); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, root->result, root->prior[ind], root->prob[ind], root->mean[ind], (root->children[ind])?root->children[ind]->result:0, root->visit_count[ind]); + ind = max_index(root->prior, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, root->result, root->prior[ind], root->prob[ind], root->mean[ind], (root->children[ind])?root->children[ind]->result:0, root->visit_count[ind]); + if(root->result < -.9 && root->mean[max_i] < -.9) return -1000.f; + + float val = root->result; + free_mcts(root); + return val; + } + */ + +static int makes_safe_go(float *b, int *lib, int p, int r, int c){ + if (r < 0 || r >= 19 || c < 0 || c >= 19) return 0; + if (occupied(b,r*19 + c) == -p){ + if (lib[r*19 + c] > 1) return 0; + else return 1; + } + if (!occupied(b,r*19 + c)) return 1; + if (lib[r*19 + c] > 1) return 1; + return 0; +} + +int suicide_go(float *b, int p, int r, int c) +{ + int *l = calculate_liberties(b); + int safe = 0; + safe = safe || makes_safe_go(b, l, p, r+1, c); + safe = safe || makes_safe_go(b, l, p, r-1, c); + safe = safe || makes_safe_go(b, l, p, r, c+1); + safe = safe || makes_safe_go(b, l, p, r, c-1); + free(l); + return !safe; +} + +int check_ko(float *x, float *ko) +{ + if(!ko) return 0; + float curr[19*19*3]; + copy_cpu(19*19*3, x, 1, curr, 1); + if(curr[19*19*2] != ko[19*19*2]) flip_board(curr); + if(compare_board(curr, ko)) return 1; + return 0; +} + +int legal_go(float *b, float *ko, int p, int r, int c) +{ + if (occupied(b, r*19+c)) return 0; + float curr[19*19*3]; + copy_cpu(19*19*3, b, 1, curr, 1); + move_go(curr, p, r, c); + if(check_ko(curr, ko)) return 0; + if(suicide_go(b, p, r, c)) return 0; + return 1; +} + +/* + move generate_move(mcts_tree *root, network *net, int player, float *board, int multi, float temp, float *ko, int print) + { + move m = {0}; +//root = run_mcts(tree, network *net, float *board, float *ko, int n, float cpuct) +int i, j; +int empty = 1; +for(i = 0; i < 19*19; ++i){ +if (occupied(board, i)) { +empty = 0; +break; +} +} +if(empty) { +m.value = .5; +m.mcts = .5; +m.row = 3; +m.col = 15; +return m; +} + +float move[362]; +if (player < 0) flip_board(board); +float result = predict_move(net, board, move, multi, ko, temp); +if (player < 0) flip_board(board); +if(result == -1000.f) return -2; + +for(i = 0; i < 19; ++i){ +for(j = 0; j < 19; ++j){ +if (!legal_go(board, ko, player, i, j)) move[i*19 + j] = 0; +} +} + +int indexes[nind]; +top_k(move, 19*19+1, nind, indexes); + + +int max = max_index(move, 19*19+1); +int row = max / 19; +int col = max % 19; +int index = sample_array(move, 19*19+1); + +if(print){ +top_k(move, 19*19+1, nind, indexes); +for(i = 0; i < nind; ++i){ +if (!move[indexes[i]]) indexes[i] = -1; +} +print_board(stderr, board, 1, indexes); +fprintf(stderr, "%s To Move\n", player > 0 ? "X" : "O"); +fprintf(stderr, "%.2f%% Win Chance\n", (result+1)/2*100); +for(i = 0; i < nind; ++i){ +int index = indexes[i]; +int row = index / 19; +int col = index % 19; +if(row == 19){ +fprintf(stderr, "%d: Pass, %.2f%%\n", i+1, move[index]*100); +} else { +fprintf(stderr, "%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100); +} +} +} +if (row == 19) return -1; + +if (suicide_go(board, player, row, col)){ +return -1; +} + +if (suicide_go(board, player, index/19, index%19)){ +index = max; +} +if (index == 19*19) return -1; +return index; +} +*/ + +void valid_go(char *cfgfile, char *weightfile, int multi, char *filename) +{ + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + float *board = calloc(19*19*3, sizeof(float)); + float *move = calloc(19*19+2, sizeof(float)); + // moves m = load_go_moves("/home/pjreddie/backup/go.test"); + moves m = load_go_moves(filename); + + int N = m.n; + int i,j; + int correct = 0; + for (i = 0; i 0) for(j = 0; j < 19*19; ++j) board[19*19*2 + j] = 1; + predict_move2(net, board, move, multi); + int index = max_index(move, 19*19+1); + if(index == truth) ++correct; + printf("%d Accuracy %f\n", i, (float) correct/(i+1)); + } +} + +int print_game(float *board, FILE *fp) +{ + int i, j; + int count = 3; + fprintf(fp, "komi 6.5\n"); + fprintf(fp, "boardsize 19\n"); + fprintf(fp, "clear_board\n"); + for(j = 0; j < 19; ++j){ + for(i = 0; i < 19; ++i){ + if(occupied(board,j*19 + i) == 1) fprintf(fp, "play black %c%d\n", 'A'+i+(i>=8), 19-j); + if(occupied(board,j*19 + i) == -1) fprintf(fp, "play white %c%d\n", 'A'+i+(i>=8), 19-j); + if(occupied(board,j*19 + i)) ++count; + } + } + return count; +} + + +int stdin_ready() +{ + fd_set readfds; + FD_ZERO(&readfds); + + struct timeval timeout; + timeout.tv_sec = 0; + timeout.tv_usec = 0; + FD_SET(STDIN_FILENO, &readfds); + + if (select(1, &readfds, NULL, NULL, &timeout)){ + return 1; + } + return 0; +} + +mcts_tree *ponder(mcts_tree *tree, network *net, float *b, float *ko, int player, float cpuct) +{ + double t = what_time_is_it_now(); + int count = 0; + if (tree) count = tree->total_count; + while(!stdin_ready()){ + if (what_time_is_it_now() - t > 120) break; + tree = run_mcts(tree, net, b, ko, player, 100000, cpuct, .1); + } + fprintf(stderr, "Pondered %d moves...\n", tree->total_count - count); + return tree; +} + +void engine_go(char *filename, char *weightfile, int mcts_iters, float secs, float temp, float cpuct, int anon, int resign) +{ + mcts_tree *root = 0; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *one = calloc(19*19*3, sizeof(float)); + float *two = calloc(19*19*3, sizeof(float)); + int ponder_player = 0; + int passed = 0; + int move_num = 0; + int main_time = 0; + int byo_yomi_time = 0; + int byo_yomi_stones = 0; + int black_time_left = 0; + int black_stones_left = 0; + int white_time_left = 0; + int white_stones_left = 0; + float orig_time = secs; + int old_ponder = 0; + while(1){ + if(ponder_player){ + root = ponder(root, net, board, two, ponder_player, cpuct); + } + old_ponder = ponder_player; + ponder_player = 0; + char buff[256]; + int id = 0; + int has_id = (scanf("%d", &id) == 1); + scanf("%s", buff); + if (feof(stdin)) break; + fprintf(stderr, "%s\n", buff); + char ids[256]; + sprintf(ids, "%d", id); + //fprintf(stderr, "%s\n", buff); + if (!has_id) ids[0] = 0; + if (!strcmp(buff, "protocol_version")){ + printf("=%s 2\n\n", ids); + } else if (!strcmp(buff, "name")){ + if(anon){ + printf("=%s The Fool!\n\n", ids); + }else{ + printf("=%s DarkGo\n\n", ids); + } + } else if (!strcmp(buff, "time_settings")){ + ponder_player = old_ponder; + scanf("%d %d %d", &main_time, &byo_yomi_time, &byo_yomi_stones); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "time_left")){ + ponder_player = old_ponder; + char color[256]; + int time = 0, stones = 0; + scanf("%s %d %d", color, &time, &stones); + if (color[0] == 'b' || color[0] == 'B'){ + black_time_left = time; + black_stones_left = stones; + } else { + white_time_left = time; + white_stones_left = stones; + } + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "version")){ + if(anon){ + printf("=%s :-DDDD\n\n", ids); + }else { + printf("=%s 1.0. Want more DarkGo? You can find me on OGS, unlimited games, no waiting! https://online-go.com/user/view/434218\n\n", ids); + } + } else if (!strcmp(buff, "known_command")){ + char comm[256]; + scanf("%s", comm); + int known = (!strcmp(comm, "protocol_version") || + !strcmp(comm, "name") || + !strcmp(comm, "version") || + !strcmp(comm, "known_command") || + !strcmp(comm, "list_commands") || + !strcmp(comm, "quit") || + !strcmp(comm, "boardsize") || + !strcmp(comm, "clear_board") || + !strcmp(comm, "komi") || + !strcmp(comm, "final_status_list") || + !strcmp(comm, "play") || + !strcmp(comm, "genmove_white") || + !strcmp(comm, "genmove_black") || + !strcmp(comm, "fixed_handicap") || + !strcmp(comm, "genmove")); + if(known) printf("=%s true\n\n", ids); + else printf("=%s false\n\n", ids); + } else if (!strcmp(buff, "list_commands")){ + printf("=%s protocol_version\nshowboard\nname\nversion\nknown_command\nlist_commands\nquit\nboardsize\nclear_board\nkomi\nplay\ngenmove_black\ngenmove_white\ngenmove\nfinal_status_list\nfixed_handicap\n\n", ids); + } else if (!strcmp(buff, "quit")){ + break; + } else if (!strcmp(buff, "boardsize")){ + int boardsize = 0; + scanf("%d", &boardsize); + //fprintf(stderr, "%d\n", boardsize); + if(boardsize != 19){ + printf("?%s unacceptable size\n\n", ids); + } else { + root = move_mcts(root, -1); + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + move_num = 0; + printf("=%s \n\n", ids); + } + } else if (!strcmp(buff, "fixed_handicap")){ + int handicap = 0; + scanf("%d", &handicap); + int indexes[] = {72, 288, 300, 60, 180, 174, 186, 66, 294}; + int i; + for(i = 0; i < handicap; ++i){ + board[indexes[i]] = 1; + ++move_num; + } + root = move_mcts(root, -1); + } else if (!strcmp(buff, "clear_board")){ + passed = 0; + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + move_num = 0; + root = move_mcts(root, -1); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "komi")){ + float komi = 0; + scanf("%f", &komi); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "showboard")){ + printf("=%s \n", ids); + print_board(stdout, board, 1, 0); + printf("\n"); + } else if (!strcmp(buff, "play") || !strcmp(buff, "black") || !strcmp(buff, "white")){ + ++move_num; + char color[256]; + if(!strcmp(buff, "play")) + { + scanf("%s ", color); + } else { + scanf(" "); + color[0] = buff[0]; + } + char c; + int r; + int count = scanf("%c%d", &c, &r); + int player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1; + if((c == 'p' || c == 'P') && count < 2) { + passed = 1; + printf("=%s \n\n", ids); + char *line = fgetl(stdin); + free(line); + fflush(stdout); + fflush(stderr); + root = move_mcts(root, 19*19); + continue; + } else { + passed = 0; + } + if(c >= 'A' && c <= 'Z') c = c - 'A'; + if(c >= 'a' && c <= 'z') c = c - 'a'; + if(c >= 8) --c; + r = 19 - r; + fprintf(stderr, "move: %d %d\n", r, c); + + float *swap = two; + two = one; + one = swap; + move_go(board, player, r, c); + copy_cpu(19*19*3, board, 1, one, 1); + if(root) fprintf(stderr, "Prior: %f\n", root->prior[r*19 + c]); + if(root) fprintf(stderr, "Mean: %f\n", root->mean[r*19 + c]); + if(root) fprintf(stderr, "Result: %f\n", root->result); + root = move_mcts(root, r*19 + c); + if(root) fprintf(stderr, "Visited: %d\n", root->total_count); + else fprintf(stderr, "NOT VISITED\n"); + + printf("=%s \n\n", ids); + //print_board(stderr, board, 1, 0); + } else if (!strcmp(buff, "genmove") || !strcmp(buff, "genmove_black") || !strcmp(buff, "genmove_white")){ + ++move_num; + int player = 0; + if(!strcmp(buff, "genmove")){ + char color[256]; + scanf("%s", color); + player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1; + } else if (!strcmp(buff, "genmove_black")){ + player = 1; + } else { + player = -1; + } + if(player > 0){ + if(black_time_left <= 30) secs = 2.5; + else secs = orig_time; + } else { + if(white_time_left <= 30) secs = 2.5; + else secs = orig_time; + } + ponder_player = -player; + + //tree = generate_move(net, player, board, multi, .1, two, 1); + double t = what_time_is_it_now(); + root = run_mcts(root, net, board, two, player, mcts_iters, cpuct, secs); + fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + move m = pick_move(root, temp, player); + root = move_mcts(root, m.row*19 + m.col); + + + if(move_num > resign && m.value < .1 && m.mcts < .1){ + printf("=%s resign\n\n", ids); + } else if(m.row == 19){ + printf("=%s pass\n\n", ids); + passed = 0; + } else { + int row = m.row; + int col = m.col; + + float *swap = two; + two = one; + one = swap; + + move_go(board, player, row, col); + copy_cpu(19*19*3, board, 1, one, 1); + row = 19 - row; + if (col >= 8) ++col; + printf("=%s %c%d\n\n", ids, 'A' + col, row); + } + + } else if (!strcmp(buff, "p")){ + //print_board(board, 1, 0); + } else if (!strcmp(buff, "final_status_list")){ + char type[256]; + scanf("%s", type); + fprintf(stderr, "final_status\n"); + char *line = fgetl(stdin); + free(line); + if(type[0] == 'd' || type[0] == 'D'){ + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "%s final_status_list dead\n", ids); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + while((l = fgetl(p))){ + printf("%s\n", l); + free(l); + } + } else { + printf("?%s unknown command\n\n", ids); + } + } else if (!strcmp(buff, "kgs-genmove_cleanup")){ + char type[256]; + scanf("%s", type); + fprintf(stderr, "kgs-genmove_cleanup\n"); + char *line = fgetl(stdin); + free(line); + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "%s kgs-genmove_cleanup %s\n", ids, type); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + while((l = fgetl(p))){ + printf("%s\n", l); + free(l); + } + } else { + char *line = fgetl(stdin); + free(line); + printf("?%s unknown command\n\n", ids); + } + fflush(stdout); + fflush(stderr); + } + printf("%d %d %d\n",passed, black_stones_left, white_stones_left); +} + +void test_go(char *cfg, char *weights, int multi) +{ + int i; + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(time(0)); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *move = calloc(19*19+1, sizeof(float)); + int color = 1; + while(1){ + float result = predict_move2(net, board, move, multi); + printf("%.2f%% Win Chance\n", (result+1)/2*100); + + int indexes[nind]; + int row, col; + top_k(move, 19*19+1, nind, indexes); + print_board(stderr, board, color, indexes); + for(i = 0; i < nind; ++i){ + int index = indexes[i]; + row = index / 19; + col = index % 19; + if(row == 19){ + printf("%d: Pass, %.2f%%\n", i+1, move[index]*100); + } else { + printf("%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100); + } + } + //if(color == 1) printf("\u25EF Enter move: "); + //else printf("\u25C9 Enter move: "); + if(color == 1) printf("X Enter move: "); + else printf("O Enter move: "); + + char c; + char *line = fgetl(stdin); + int picked = 1; + int dnum = sscanf(line, "%d", &picked); + int cnum = sscanf(line, "%c", &c); + if (strlen(line) == 0 || dnum) { + --picked; + if (picked < nind){ + int index = indexes[picked]; + row = index / 19; + col = index % 19; + if(row < 19){ + move_go(board, 1, row, col); + } + } + } else if (cnum){ + if (c <= 'T' && c >= 'A'){ + int num = sscanf(line, "%c %d", &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 2) move_go(board, 1, row, col); + } else if (c == 'p') { + // Pass + } else if(c=='b' || c == 'w'){ + char g; + int num = sscanf(line, "%c %c %d", &g, &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 3) { + int mc = (g == 'b') ? 1 : -1; + if (mc == color) { + board[row*19 + col] = 1; + } else { + board[19*19 + row*19 + col] = 1; + } + } + } else if(c == 'c'){ + char g; + int num = sscanf(line, "%c %c %d", &g, &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 3) { + board[row*19 + col] = 0; + board[19*19 + row*19 + col] = 0; + } + } + } + free(line); + flip_board(board); + color = -color; + } +} + +float score_game(float *board) +{ + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "final_score\n"); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + float score = 0; + char player = 0; + while((l = fgetl(p))){ + fprintf(stderr, "%s \t", l); + int n = sscanf(l, "= %c+%f", &player, &score); + free(l); + if (n == 2) break; + } + if(player == 'W') score = -score; + pclose(p); + return score; +} + +void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi) +{ + mcts_tree *tree1 = 0; + mcts_tree *tree2 = 0; + network *net = load_network(filename, weightfile, 0); + //set_batch_network(net, 1); + + network *net2; + if (f2) { + net2 = parse_network_cfg(f2); + if(w2){ + load_weights(net2, w2); + } + } else { + net2 = calloc(1, sizeof(network)); + *net2 = *net; + } + srand(time(0)); + char boards[600][93]; + int count = 0; + //set_batch_network(net, 1); + //set_batch_network(net2, 1); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *one = calloc(19*19*3, sizeof(float)); + float *two = calloc(19*19*3, sizeof(float)); + int done = 0; + int player = 1; + int p1 = 0; + int p2 = 0; + int total = 0; + float temp = .1; + int mcts_iters = 500; + float cpuct = 5; + while(1){ + if (done){ + tree1 = move_mcts(tree1, -1); + tree2 = move_mcts(tree2, -1); + float score = score_game(board); + if((score > 0) == (total%2==0)) ++p1; + else ++p2; + ++total; + fprintf(stderr, "Total: %d, Player 1: %f, Player 2: %f\n", total, (float)p1/total, (float)p2/total); + sleep(1); + /* + int i = (score > 0)? 0 : 1; + int j; + for(; i < count; i += 2){ + for(j = 0; j < 93; ++j){ + printf("%c", boards[i][j]); + } + printf("\n"); + } + */ + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + player = 1; + done = 0; + count = 0; + fflush(stdout); + fflush(stderr); + } + //print_board(stderr, board, 1, 0); + //sleep(1); + + if ((total%2==0) == (player==1)){ + //mcts_iters = 4500; + cpuct = 5; + } else { + //mcts_iters = 500; + cpuct = 1; + } + network *use = ((total%2==0) == (player==1)) ? net : net2; + mcts_tree *t = ((total%2==0) == (player==1)) ? tree1 : tree2; + t = run_mcts(t, use, board, two, player, mcts_iters, cpuct, 0); + move m = pick_move(t, temp, player); + if(((total%2==0) == (player==1))) tree1 = t; + else tree2 = t; + + tree1 = move_mcts(tree1, m.row*19 + m.col); + tree2 = move_mcts(tree2, m.row*19 + m.col); + + if(m.row == 19){ + done = 1; + continue; + } + int row = m.row; + int col = m.col; + + float *swap = two; + two = one; + one = swap; + + if(player < 0) flip_board(board); + boards[count][0] = row; + boards[count][1] = col; + board_to_string(boards[count] + 2, board); + if(player < 0) flip_board(board); + ++count; + + move_go(board, player, row, col); + copy_cpu(19*19*3, board, 1, one, 1); + + player = -player; + } +} + +void run_go(int argc, char **argv) +{ + //boards_go(); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + int clear = find_arg(argc, argv, "-clear"); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *c2 = (argc > 5) ? argv[5] : 0; + char *w2 = (argc > 6) ? argv[6] : 0; + int multi = find_arg(argc, argv, "-multi"); + int anon = find_arg(argc, argv, "-anon"); + int iters = find_int_arg(argc, argv, "-iters", 500); + int resign = find_int_arg(argc, argv, "-resign", 175); + float cpuct = find_float_arg(argc, argv, "-cpuct", 5); + float temp = find_float_arg(argc, argv, "-temp", .1); + float time = find_float_arg(argc, argv, "-time", 0); + if(0==strcmp(argv[2], "train")) train_go(cfg, weights, c2, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) valid_go(cfg, weights, multi, c2); + else if(0==strcmp(argv[2], "self")) self_go(cfg, weights, c2, w2, multi); + else if(0==strcmp(argv[2], "test")) test_go(cfg, weights, multi); + else if(0==strcmp(argv[2], "engine")) engine_go(cfg, weights, iters, time, temp, cpuct, anon, resign); +} + + diff --git a/workloads/realworld/uvm/darknet/examples/instance-segmenter.c b/workloads/realworld/uvm/darknet/examples/instance-segmenter.c new file mode 100644 index 0000000000000000000000000000000000000000..664e71426d58e19f758bab198783eac178a3cdc4 --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/instance-segmenter.c @@ -0,0 +1,267 @@ +#include "darknet.h" +#include +#include + +void normalize_image2(image p); +void train_isegmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + image pred = get_network_image(net); + + image embed = pred; + embed.c = 3; + embed.data += embed.w*embed.h*80; + + int div = net->w/pred.w; + assert(pred.w * div == net->w); + assert(pred.h * div == net->h); + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.scale = div; + args.num_boxes = 90; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.classes = 80; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = ISEG_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(display){ + image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]); + image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]); + pred.c = 80; + image mask = mask_to_rgb(tr); + image prmask = mask_to_rgb(pred); + image ecopy = copy_image(embed); + normalize_image2(ecopy); + show_image(ecopy, "embed", 1); + free_image(ecopy); + + show_image(im, "input", 1); + show_image(prmask, "pred", 1); + show_image(mask, "truth", 100); + free_image(mask); + free_image(prmask); + } + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_isegmenter(char *datafile, char *cfg, char *weights, char *filename) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + show_image(sized, "orig", 1); + show_image(prmask, "pred", 0); + free_image(im); + free_image(sized); + free_image(prmask); + if (filename) break; + } +} + + +void demo_isegmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Classifier Demo\n"); + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = letterbox_image(in, net->w, net->h); + + network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + show_image(prmask, "Segmenter", 10); + + free_image(in_s); + free_image(in); + free_image(prmask); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_isegmenter(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_isegmenter(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_isegmenter(data, cfg, weights, gpus, ngpus, clear, display); + else if(0==strcmp(argv[2], "demo")) demo_isegmenter(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/uvm/darknet/examples/lsd.c b/workloads/realworld/uvm/darknet/examples/lsd.c new file mode 100644 index 0000000000000000000000000000000000000000..4ab944c884b9df422cd2b273b1faee128f2ab112 --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/lsd.c @@ -0,0 +1,1378 @@ +#include +#include "darknet.h" + +/* +void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg, char *aweight, int clear) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + //char *style_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *style_images = "/home/pjreddie/zelda.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + network fnet = load_network(fcfg, fweight, clear); + network gnet = load_network(gcfg, gweight, clear); + network anet = load_network(acfg, aweight, clear); + char *gbase = basecfg(gcfg); + char *abase = basecfg(acfg); + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + int i = *gnet->seen/imgs; + data train, tbuffer; + data style, sbuffer; + + + list *slist = get_paths(style_images); + char **spaths = (char **)list_to_array(slist); + + list *tlist = get_paths(train_images); + char **tpaths = (char **)list_to_array(tlist); + + load_args targs= get_base_args(gnet); + targs.paths = tpaths; + targs.n = imgs; + targs.m = tlist->size; + targs.d = &tbuffer; + targs.type = CLASSIFICATION_DATA; + targs.classes = 1; + char *ls[1] = {"zelda"}; + targs.labels = ls; + + load_args sargs = get_base_args(gnet); + sargs.paths = spaths; + sargs.n = imgs; + sargs.m = slist->size; + sargs.d = &sbuffer; + sargs.type = CLASSIFICATION_DATA; + sargs.classes = 1; + sargs.labels = ls; + + pthread_t tload_thread = load_data_in_thread(targs); + pthread_t sload_thread = load_data_in_thread(sargs); + clock_t time; + + float aloss_avg = -1; + float floss_avg = -1; + + fnet->train=1; + int x_size = fnet->inputs*fnet->batch; + int y_size = fnet->truths*fnet->batch; + float *X = calloc(x_size, sizeof(float)); + float *y = calloc(y_size, sizeof(float)); + + + int ax_size = anet->inputs*anet->batch; + int ay_size = anet->truths*anet->batch; + fill_gpu(ay_size, .9, anet->truth_gpu, 1); + anet->delta_gpu = cuda_make_array(0, ax_size); + anet->train = 1; + + int gx_size = gnet->inputs*gnet->batch; + int gy_size = gnet->truths*gnet->batch; + gstate.input = cuda_make_array(0, gx_size); + gstate.truth = 0; + gstate.delta = 0; + gstate.train = 1; + + while (get_current_batch(gnet) < gnet->max_batches) { + i += 1; + time=clock(); + pthread_join(tload_thread, 0); + pthread_join(sload_thread, 0); + train = tbuffer; + style = sbuffer; + tload_thread = load_data_in_thread(targs); + sload_thread = load_data_in_thread(sargs); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data generated = copy_data(train); + time=clock(); + + int j, k; + float floss = 0; + for(j = 0; j < fnet->subdivisions; ++j){ + layer imlayer = gnet->layers[gnet->n - 1]; + get_next_batch(train, fnet->batch, j*fnet->batch, X, y); + + cuda_push_array(fstate.input, X, x_size); + cuda_push_array(gstate.input, X, gx_size); + *gnet->seen += gnet->batch; + + forward_network_gpu(fnet, fstate); + float *feats = fnet->layers[fnet->n - 2].output_gpu; + copy_gpu(y_size, feats, 1, fstate.truth, 1); + + forward_network_gpu(gnet, gstate); + float *gen = gnet->layers[gnet->n-1].output_gpu; + copy_gpu(x_size, gen, 1, fstate.input, 1); + + fill_gpu(x_size, 0, fstate.delta, 1); + forward_network_gpu(fnet, fstate); + backward_network_gpu(fnet, fstate); + //HERE + + astate.input = gen; + fill_gpu(ax_size, 0, astate.delta, 1); + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + float *delta = imlayer.delta_gpu; + fill_gpu(x_size, 0, delta, 1); + scal_gpu(x_size, 100, astate.delta, 1); + scal_gpu(x_size, .001, fstate.delta, 1); + axpy_gpu(x_size, 1, fstate.delta, 1, delta, 1); + axpy_gpu(x_size, 1, astate.delta, 1, delta, 1); + + //fill_gpu(x_size, 0, delta, 1); + //cuda_push_array(delta, X, x_size); + //axpy_gpu(x_size, -1, imlayer.output_gpu, 1, delta, 1); + //printf("pix error: %f\n", cuda_mag_array(delta, x_size)); + printf("fea error: %f\n", cuda_mag_array(fstate.delta, x_size)); + printf("adv error: %f\n", cuda_mag_array(astate.delta, x_size)); + //axpy_gpu(x_size, 1, astate.delta, 1, delta, 1); + + backward_network_gpu(gnet, gstate); + + floss += get_network_cost(fnet) /(fnet->subdivisions*fnet->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1); + generated.y.vals[index][0] = .1; + style.y.vals[index][0] = .9; + } + } + +*/ +/* + image sim = float_to_image(anet->w, anet->h, anet->c, style.X.vals[j]); + show_image(sim, "style"); + cvWaitKey(0); + */ + /* + + harmless_update_network_gpu(anet); + + data merge = concat_data(style, generated); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(generated); + free_data(style); + if (aloss_avg < 0) aloss_avg = aloss; + if (floss_avg < 0) floss_avg = floss; + aloss_avg = aloss_avg*.9 + aloss*.1; + floss_avg = floss_avg*.9 + floss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, floss, aloss, floss_avg, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, gbase, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, gbase); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } +#endif +} +*/ + +/* +void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/train1.txt"; + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network net = load_network(cfg, weight, clear); + network anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = net->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.type = CLASSIFICATION_DATA; + args.classes = 1; + char *ls[1] = {"coco"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + network_state gstate = {0}; + gstate.index = 0; + gstate.net = net; + int x_size = get_network_input_size(net)*net->batch; + int y_size = x_size; + gstate.input = cuda_make_array(0, x_size); + gstate.truth = cuda_make_array(0, y_size); + gstate.delta = 0; + gstate.train = 1; + float *pixs = calloc(x_size, sizeof(float)); + float *graypixs = calloc(x_size, sizeof(float)); + float *y = calloc(y_size, sizeof(float)); + + network_state astate = {0}; + astate.index = 0; + astate.net = anet; + int ay_size = get_network_output_size(anet)*anet->batch; + astate.input = 0; + astate.truth = 0; + astate.delta = 0; + astate.train = 1; + + float *imerror = cuda_make_array(0, imlayer.outputs); + float *ones_gpu = cuda_make_array(0, ay_size); + fill_gpu(ay_size, .9, ones_gpu, 1); + + float aloss_avg = -1; + float gloss_avg = -1; + + //data generated = copy_data(train); + + while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gray = copy_data(train); + for(j = 0; j < imgs; ++j){ + image gim = float_to_image(net->w, net->h, net->c, gray.X.vals[j]); + grayscale_image_3c(gim); + train.y.vals[j][0] = .9; + + image yim = float_to_image(net->w, net->h, net->c, train.X.vals[j]); + //rgb_to_yuv(yim); + } + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, pixs, y); + get_next_batch(gray, net->batch, j*net->batch, graypixs, y); + cuda_push_array(gstate.input, graypixs, x_size); + cuda_push_array(gstate.truth, pixs, y_size); + */ + /* + image origi = float_to_image(net->w, net->h, 3, pixs); + image grayi = float_to_image(net->w, net->h, 3, graypixs); + show_image(grayi, "gray"); + show_image(origi, "orig"); + cvWaitKey(0); + */ + /* + *net->seen += net->batch; + forward_network_gpu(net, gstate); + + fill_gpu(imlayer.outputs, 0, imerror, 1); + astate.input = imlayer.output_gpu; + astate.delta = imerror; + astate.truth = ones_gpu; + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + scal_gpu(imlayer.outputs, .1, net->layers[net->n-1].delta_gpu, 1); + + backward_network_gpu(net, gstate); + + scal_gpu(imlayer.outputs, 1000, imerror, 1); + + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs)); + printf("features %f\n", cuda_mag_array(net->layers[net->n-1].delta_gpu, imlayer.outputs)); + + axpy_gpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1); + + gloss += get_network_cost(net) /(net->subdivisions*net->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1); + gray.y.vals[index][0] = .1; + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gray); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + update_network_gpu(anet); + free_data(merge); + free_data(train); + free_data(gray); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +#endif +} +*/ + +void slerp(float *start, float *end, float s, int n, float *out) +{ + float omega = acos(dot_cpu(n, start, 1, end, 1)); + float so = sin(omega); + fill_cpu(n, 0, out, 1); + axpy_cpu(n, sin((1-s)*omega)/so, start, 1, out, 1); + axpy_cpu(n, sin(s*omega)/so, end, 1, out, 1); + + float mag = mag_array(out, n); + scale_array(out, n, 1./mag); +} + +image random_unit_vector_image(int w, int h, int c) +{ + image im = make_image(w, h, c); + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + im.data[i] = rand_normal(); + } + float mag = mag_array(im.data, im.w*im.h*im.c); + scale_array(im.data, im.w*im.h*im.c, 1./mag); + return im; +} + +void inter_dcgan(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int i, imlayer = 0; + + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = i; + printf("%d\n", i); + break; + } + } + image start = random_unit_vector_image(net->w, net->h, net->c); + image end = random_unit_vector_image(net->w, net->h, net->c); + image im = make_image(net->w, net->h, net->c); + image orig = copy_image(start); + + int c = 0; + int count = 0; + int max_count = 15; + while(1){ + ++c; + + if(count == max_count){ + count = 0; + free_image(start); + start = end; + end = random_unit_vector_image(net->w, net->h, net->c); + if(c > 300){ + end = orig; + } + if(c>300 + max_count) return; + } + ++count; + + slerp(start.data, end.data, (float)count / max_count, im.w*im.h*im.c, im.data); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + normalize_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + //char buff[256]; + sprintf(buff, "out%05d", c); + save_image(out, "out"); + save_image(out, buff); + show_image(out, "out", 0); + } +} + +void test_dcgan(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int imlayer = 0; + + imlayer = net->n-1; + + while(1){ + image im = make_image(net->w, net->h, net->c); + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + im.data[i] = rand_normal(); + } + //float mag = mag_array(im.data, im.w*im.h*im.c); + //scale_array(im.data, im.w*im.h*im.c, 1./mag); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + normalize_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 0); + + free_image(im); + } +} + +void set_network_alpha_beta(network *net, float alpha, float beta) +{ + int i; + for(i = 0; i < net->n; ++i){ + if(net->layers[i].type == SHORTCUT){ + net->layers[i].alpha = alpha; + net->layers[i].beta = beta; + } + } +} + +void train_prog(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) +{ +#ifdef GPU + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *gnet = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = gnet->layers[gnet->n-1]; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + i = *gnet->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(anet); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + args.threads=16; + args.classes = 1; + char *ls[2] = {"imagenet", "zzzzzzzz"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + gnet->train = 1; + anet->train = 1; + + int x_size = gnet->inputs*gnet->batch; + int y_size = gnet->truths*gnet->batch; + float *imerror = cuda_make_array(0, y_size); + + float aloss_avg = -1; + + if (maxbatch == 0) maxbatch = gnet->max_batches; + while (get_current_batch(gnet) < maxbatch) { + { + int cb = get_current_batch(gnet); + float alpha = (float) cb / (maxbatch/2); + if(alpha > 1) alpha = 1; + float beta = 1 - alpha; + printf("%f %f\n", alpha, beta); + set_network_alpha_beta(gnet, alpha, beta); + set_network_alpha_beta(anet, beta, alpha); + } + + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gen = copy_data(train); + for (j = 0; j < imgs; ++j) { + train.y.vals[j][0] = 1; + gen.y.vals[j][0] = 0; + } + time=clock(); + + for (j = 0; j < gnet->subdivisions; ++j) { + get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); + int z; + for(z = 0; z < x_size; ++z){ + gnet->input[z] = rand_normal(); + } + /* + for(z = 0; z < gnet->batch; ++z){ + float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); + scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); + } + */ + *gnet->seen += gnet->batch; + forward_network(gnet); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); + copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1); + anet->delta_gpu = imerror; + forward_network(anet); + backward_network(anet); + + //float genaloss = *anet->cost / anet->batch; + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1); + + backward_network(gnet); + + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gen); + float aloss = train_network(anet, merge); + +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + save_image(im, "gen"); + save_image(im2, "train"); + } +#endif + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(gen); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + + printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(gnet, buff); +#endif +} + +void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) +{ +#ifdef GPU + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *gnet = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + //float orig_rate = anet->learning_rate; + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < gnet->n; ++i) { + if (gnet->layers[i].out_c == 3) { + imlayer = gnet->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + i = *gnet->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(anet); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + args.threads=16; + args.classes = 1; + char *ls[2] = {"imagenet", "zzzzzzzz"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + gnet->train = 1; + anet->train = 1; + + int x_size = gnet->inputs*gnet->batch; + int y_size = gnet->truths*gnet->batch; + float *imerror = cuda_make_array(0, y_size); + + //int ay_size = anet->truths*anet->batch; + + float aloss_avg = -1; + + //data generated = copy_data(train); + + if (maxbatch == 0) maxbatch = gnet->max_batches; + while (get_current_batch(gnet) < maxbatch) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + //translate_data_rows(train, -.5); + //scale_data_rows(train, 2); + + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gen = copy_data(train); + for (j = 0; j < imgs; ++j) { + train.y.vals[j][0] = 1; + gen.y.vals[j][0] = 0; + } + time=clock(); + + for(j = 0; j < gnet->subdivisions; ++j){ + get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); + int z; + for(z = 0; z < x_size; ++z){ + gnet->input[z] = rand_normal(); + } + for(z = 0; z < gnet->batch; ++z){ + float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); + scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); + } + /* + for(z = 0; z < 100; ++z){ + printf("%f, ", gnet->input[z]); + } + printf("\n"); + printf("input: %f %f\n", mean_array(gnet->input, x_size), variance_array(gnet->input, x_size)); + */ + + //cuda_push_array(gnet->input_gpu, gnet->input, x_size); + //cuda_push_array(gnet->truth_gpu, gnet->truth, y_size); + *gnet->seen += gnet->batch; + forward_network(gnet); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); + copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1); + anet->delta_gpu = imerror; + forward_network(anet); + backward_network(anet); + + //float genaloss = *anet->cost / anet->batch; + //printf("%f\n", genaloss); + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1); + + //printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch)); + //printf("features %f\n", cuda_mag_array(gnet->layers[gnet->n-1].delta_gpu, imlayer.outputs*imlayer.batch)); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1); + + backward_network(gnet); + + /* + for(k = 0; k < gnet->n; ++k){ + layer l = gnet->layers[k]; + cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); + printf("%d: %f %f\n", k, mean_array(l.output, l.outputs*l.batch), variance_array(l.output, l.outputs*l.batch)); + } + */ + + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gen); + //randomize_data(merge); + float aloss = train_network(anet, merge); + + //translate_image(im, 1); + //scale_image(im, .5); + //translate_image(im2, 1); + //scale_image(im2, .5); +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + save_image(im, "gen"); + save_image(im2, "train"); + } +#endif + + /* + if(aloss < .1){ + anet->learning_rate = 0; + } else if (aloss > .3){ + anet->learning_rate = orig_rate; + } + */ + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(gen); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + + printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(gnet, buff); +#endif +} + +void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/train1.txt"; + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *net = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = net->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(net); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + + args.type = CLASSIFICATION_DATA; + args.classes = 1; + char *ls[2] = {"imagenet"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + int x_size = net->inputs*net->batch; + //int y_size = x_size; + net->delta = 0; + net->train = 1; + float *pixs = calloc(x_size, sizeof(float)); + float *graypixs = calloc(x_size, sizeof(float)); + //float *y = calloc(y_size, sizeof(float)); + + //int ay_size = anet->outputs*anet->batch; + anet->delta = 0; + anet->train = 1; + + float *imerror = cuda_make_array(0, imlayer.outputs*imlayer.batch); + + float aloss_avg = -1; + float gloss_avg = -1; + + //data generated = copy_data(train); + + while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gray = copy_data(train); + for(j = 0; j < imgs; ++j){ + image gim = float_to_image(net->w, net->h, net->c, gray.X.vals[j]); + grayscale_image_3c(gim); + train.y.vals[j][0] = .95; + gray.y.vals[j][0] = .05; + } + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, pixs, 0); + get_next_batch(gray, net->batch, j*net->batch, graypixs, 0); + cuda_push_array(net->input_gpu, graypixs, net->inputs*net->batch); + cuda_push_array(net->truth_gpu, pixs, net->truths*net->batch); + /* + image origi = float_to_image(net->w, net->h, 3, pixs); + image grayi = float_to_image(net->w, net->h, 3, graypixs); + show_image(grayi, "gray"); + show_image(origi, "orig"); + cvWaitKey(0); + */ + *net->seen += net->batch; + forward_network_gpu(net); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + copy_gpu(anet->inputs*anet->batch, imlayer.output_gpu, 1, anet->input_gpu, 1); + fill_gpu(anet->inputs*anet->batch, .95, anet->truth_gpu, 1); + anet->delta_gpu = imerror; + forward_network_gpu(anet); + backward_network_gpu(anet); + + scal_gpu(imlayer.outputs*imlayer.batch, 1./100., net->layers[net->n-1].delta_gpu, 1); + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch)); + printf("features %f\n", cuda_mag_array(net->layers[net->n-1].delta_gpu, imlayer.outputs*imlayer.batch)); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, net->layers[net->n-1].delta_gpu, 1); + + backward_network_gpu(net); + + + gloss += *net->cost /(net->subdivisions*net->batch); + + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gray); + //randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gray.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + } +#endif + free_data(merge); + free_data(train); + free_data(gray); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +#endif +} + +/* + void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfile, int clear) + { +#ifdef GPU +char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; +char *backup_directory = "/home/pjreddie/backup/"; +srand(time(0)); +char *base = basecfg(cfgfile); +printf("%s\n", base); +network net = parse_network_cfg(cfgfile); +if(weightfile){ +load_weights(&net, weightfile); +} +if(clear) *net->seen = 0; + +char *abase = basecfg(acfgfile); +network anet = parse_network_cfg(acfgfile); +if(aweightfile){ +load_weights(&anet, aweightfile); +} +if(clear) *anet->seen = 0; + +int i, j, k; +layer imlayer = {0}; +for (i = 0; i < net->n; ++i) { +if (net->layers[i].out_c == 3) { +imlayer = net->layers[i]; +break; +} +} + +printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); +int imgs = net->batch*net->subdivisions; +i = *net->seen/imgs; +data train, buffer; + + +list *plist = get_paths(train_images); +//int N = plist->size; +char **paths = (char **)list_to_array(plist); + +load_args args = {0}; +args.w = net->w; +args.h = net->h; +args.paths = paths; +args.n = imgs; +args.m = plist->size; +args.d = &buffer; + +args.min = net->min_crop; +args.max = net->max_crop; +args.angle = net->angle; +args.aspect = net->aspect; +args.exposure = net->exposure; +args.saturation = net->saturation; +args.hue = net->hue; +args.size = net->w; +args.type = CLASSIFICATION_DATA; +args.classes = 1; +char *ls[1] = {"coco"}; +args.labels = ls; + +pthread_t load_thread = load_data_in_thread(args); +clock_t time; + +network_state gstate = {0}; +gstate.index = 0; +gstate.net = net; +int x_size = get_network_input_size(net)*net->batch; +int y_size = 1*net->batch; +gstate.input = cuda_make_array(0, x_size); +gstate.truth = 0; +gstate.delta = 0; +gstate.train = 1; +float *X = calloc(x_size, sizeof(float)); +float *y = calloc(y_size, sizeof(float)); + +network_state astate = {0}; +astate.index = 0; +astate.net = anet; +int ay_size = get_network_output_size(anet)*anet->batch; +astate.input = 0; +astate.truth = 0; +astate.delta = 0; +astate.train = 1; + +float *imerror = cuda_make_array(0, imlayer.outputs); +float *ones_gpu = cuda_make_array(0, ay_size); +fill_gpu(ay_size, 1, ones_gpu, 1); + +float aloss_avg = -1; +float gloss_avg = -1; + +//data generated = copy_data(train); + +while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data generated = copy_data(train); + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, X, y); + cuda_push_array(gstate.input, X, x_size); + *net->seen += net->batch; + forward_network_gpu(net, gstate); + + fill_gpu(imlayer.outputs, 0, imerror, 1); + astate.input = imlayer.output_gpu; + astate.delta = imerror; + astate.truth = ones_gpu; + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + scal_gpu(imlayer.outputs, 1, imerror, 1); + axpy_gpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1); + + backward_network_gpu(net, gstate); + + printf("features %f\n", cuda_mag_array(imlayer.delta_gpu, imlayer.outputs)); + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs)); + + gloss += get_network_cost(net) /(net->subdivisions*net->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1); + generated.y.vals[index][0] = 0; + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, generated); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + update_network_gpu(anet); + free_data(merge); + free_data(train); + free_data(generated); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } +} +char buff[256]; +sprintf(buff, "%s/%s_final.weights", backup_directory, base); +save_weights(net, buff); +#endif +} +*/ + +/* + void train_lsd(char *cfgfile, char *weightfile, int clear) + { + char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + if(clear) *net->seen = 0; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); +//int N = plist->size; +char **paths = (char **)list_to_array(plist); + +load_args args = {0}; +args.w = net->w; +args.h = net->h; +args.paths = paths; +args.n = imgs; +args.m = plist->size; +args.d = &buffer; + +args.min = net->min_crop; +args.max = net->max_crop; +args.angle = net->angle; +args.aspect = net->aspect; +args.exposure = net->exposure; +args.saturation = net->saturation; +args.hue = net->hue; +args.size = net->w; +args.type = CLASSIFICATION_DATA; +args.classes = 1; +char *ls[1] = {"coco"}; +args.labels = ls; + +pthread_t load_thread = load_data_in_thread(args); +clock_t time; +//while(i*imgs < N*120){ +while(get_current_batch(net) < net->max_batches){ +i += 1; +time=clock(); +pthread_join(load_thread, 0); +train = buffer; +load_thread = load_data_in_thread(args); + +printf("Loaded: %lf seconds\n", sec(clock()-time)); + +time=clock(); +float loss = train_network(net, train); +if (avg_loss < 0) avg_loss = loss; +avg_loss = avg_loss*.9 + loss*.1; + +printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); +if(i%1000==0){ +char buff[256]; +sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); +save_weights(net, buff); +} +if(i%100==0){ +char buff[256]; +sprintf(buff, "%s/%s.backup", backup_directory, base); +save_weights(net, buff); +} +free_data(train); +} +char buff[256]; +sprintf(buff, "%s/%s_final.weights", backup_directory, base); +save_weights(net, buff); +} +*/ + +void test_lsd(char *cfg, char *weights, char *filename, int gray) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int i, imlayer = 0; + + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = i; + printf("%d\n", i); + break; + } + } + + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + if(gray) grayscale_image_3c(crop); + + float *X = crop.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + constrain_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 1); + show_image(crop, "crop", 0); + + free_image(im); + free_image(resized); + free_image(crop); + if (filename) break; + } +} + + +void run_lsd(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + int batches = find_int_arg(argc, argv, "-b", 0); + char *file = find_char_arg(argc, argv, "-file", "/home/pjreddie/data/imagenet/imagenet1k.train.list"); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + char *acfg = argv[5]; + char *aweights = (argc > 6) ? argv[6] : 0; + //if(0==strcmp(argv[2], "train")) train_lsd(cfg, weights, clear); + //else if(0==strcmp(argv[2], "train2")) train_lsd2(cfg, weights, acfg, aweights, clear); + //else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear); + //else if(0==strcmp(argv[2], "train3")) train_lsd3(argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], clear); + if(0==strcmp(argv[2], "traingan")) train_dcgan(cfg, weights, acfg, aweights, clear, display, file, batches); + else if(0==strcmp(argv[2], "trainprog")) train_prog(cfg, weights, acfg, aweights, clear, display, file, batches); + else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear, display); + else if(0==strcmp(argv[2], "gan")) test_dcgan(cfg, weights); + else if(0==strcmp(argv[2], "inter")) inter_dcgan(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_lsd(cfg, weights, filename, 0); + else if(0==strcmp(argv[2], "color")) test_lsd(cfg, weights, filename, 1); + /* + else if(0==strcmp(argv[2], "valid")) validate_lsd(cfg, weights); + */ +} diff --git a/workloads/realworld/uvm/darknet/examples/nightmare.c b/workloads/realworld/uvm/darknet/examples/nightmare.c new file mode 100644 index 0000000000000000000000000000000000000000..2978eb61193e96325441c5b830a786eccb203569 --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/nightmare.c @@ -0,0 +1,414 @@ +#include "darknet.h" + +#include + +// ./darknet nightmare cfg/extractor.recon.cfg ~/trained/yolo-coco.conv frame6.png -reconstruct -iters 500 -i 3 -lambda .1 -rate .01 -smooth 2 + +float abs_mean(float *x, int n) +{ + int i; + float sum = 0; + for (i = 0; i < n; ++i){ + sum += fabs(x[i]); + } + return sum/n; +} + +void calculate_loss(float *output, float *delta, int n, float thresh) +{ + int i; + float mean = mean_array(output, n); + float var = variance_array(output, n); + for(i = 0; i < n; ++i){ + if(delta[i] > mean + thresh*sqrt(var)) delta[i] = output[i]; + else delta[i] = 0; + } +} + +void optimize_picture(network *net, image orig, int max_layer, float scale, float rate, float thresh, int norm) +{ + //scale_image(orig, 2); + //translate_image(orig, -1); + net->n = max_layer + 1; + + int dx = rand()%16 - 8; + int dy = rand()%16 - 8; + int flip = rand()%2; + + image crop = crop_image(orig, dx, dy, orig.w, orig.h); + image im = resize_image(crop, (int)(orig.w * scale), (int)(orig.h * scale)); + if(flip) flip_image(im); + + resize_network(net, im.w, im.h); + layer last = net->layers[net->n-1]; + //net->layers[net->n - 1].activation = LINEAR; + + image delta = make_image(im.w, im.h, im.c); + +#ifdef GPU + net->delta_gpu = cuda_make_array(delta.data, im.w*im.h*im.c); + copy_cpu(net->inputs, im.data, 1, net->input, 1); + + forward_network_gpu(net); + copy_gpu(last.outputs, last.output_gpu, 1, last.delta_gpu, 1); + + cuda_pull_array(last.delta_gpu, last.delta, last.outputs); + calculate_loss(last.delta, last.delta, last.outputs, thresh); + cuda_push_array(last.delta_gpu, last.delta, last.outputs); + + backward_network_gpu(net); + + cuda_pull_array(net->delta_gpu, delta.data, im.w*im.h*im.c); + cuda_free(net->delta_gpu); + net->delta_gpu = 0; +#else + printf("\nnet: %d %d %d im: %d %d %d\n", net->w, net->h, net->inputs, im.w, im.h, im.c); + copy_cpu(net->inputs, im.data, 1, net->input, 1); + net->delta = delta.data; + forward_network(net); + copy_cpu(last.outputs, last.output, 1, last.delta, 1); + calculate_loss(last.output, last.delta, last.outputs, thresh); + backward_network(net); +#endif + + if(flip) flip_image(delta); + //normalize_array(delta.data, delta.w*delta.h*delta.c); + image resized = resize_image(delta, orig.w, orig.h); + image out = crop_image(resized, -dx, -dy, orig.w, orig.h); + + /* + image g = grayscale_image(out); + free_image(out); + out = g; + */ + + //rate = rate / abs_mean(out.data, out.w*out.h*out.c); + image gray = make_image(out.w, out.h, out.c); + fill_image(gray, .5); + axpy_cpu(orig.w*orig.h*orig.c, -1, orig.data, 1, gray.data, 1); + axpy_cpu(orig.w*orig.h*orig.c, .1, gray.data, 1, out.data, 1); + + if(norm) normalize_array(out.data, out.w*out.h*out.c); + axpy_cpu(orig.w*orig.h*orig.c, rate, out.data, 1, orig.data, 1); + + /* + normalize_array(orig.data, orig.w*orig.h*orig.c); + scale_image(orig, sqrt(var)); + translate_image(orig, mean); + */ + + //translate_image(orig, 1); + //scale_image(orig, .5); + //normalize_image(orig); + + constrain_image(orig); + + free_image(crop); + free_image(im); + free_image(delta); + free_image(resized); + free_image(out); + +} + +void smooth(image recon, image update, float lambda, int num) +{ + int i, j, k; + int ii, jj; + for(k = 0; k < recon.c; ++k){ + for(j = 0; j < recon.h; ++j){ + for(i = 0; i < recon.w; ++i){ + int out_index = i + recon.w*(j + recon.h*k); + for(jj = j-num; jj <= j + num && jj < recon.h; ++jj){ + if (jj < 0) continue; + for(ii = i-num; ii <= i + num && ii < recon.w; ++ii){ + if (ii < 0) continue; + int in_index = ii + recon.w*(jj + recon.h*k); + update.data[out_index] += lambda * (recon.data[in_index] - recon.data[out_index]); + } + } + } + } + } +} + +void reconstruct_picture(network *net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters) +{ + int iter = 0; + for (iter = 0; iter < iters; ++iter) { + image delta = make_image(recon.w, recon.h, recon.c); + +#ifdef GPU + layer l = get_network_output_layer(net); + cuda_push_array(net->input_gpu, recon.data, recon.w*recon.h*recon.c); + //cuda_push_array(net->truth_gpu, features, net->truths); + net->delta_gpu = cuda_make_array(delta.data, delta.w*delta.h*delta.c); + + forward_network_gpu(net); + cuda_push_array(l.delta_gpu, features, l.outputs); + axpy_gpu(l.outputs, -1, l.output_gpu, 1, l.delta_gpu, 1); + backward_network_gpu(net); + + cuda_pull_array(net->delta_gpu, delta.data, delta.w*delta.h*delta.c); + + cuda_free(net->delta_gpu); +#else + net->input = recon.data; + net->delta = delta.data; + net->truth = features; + + forward_network(net); + backward_network(net); +#endif + + //normalize_array(delta.data, delta.w*delta.h*delta.c); + axpy_cpu(recon.w*recon.h*recon.c, 1, delta.data, 1, update.data, 1); + //smooth(recon, update, lambda, smooth_size); + + axpy_cpu(recon.w*recon.h*recon.c, rate, update.data, 1, recon.data, 1); + scal_cpu(recon.w*recon.h*recon.c, momentum, update.data, 1); + + float mag = mag_array(delta.data, recon.w*recon.h*recon.c); + printf("mag: %f\n", mag); + //scal_cpu(recon.w*recon.h*recon.c, 600/mag, recon.data, 1); + + constrain_image(recon); + free_image(delta); + } +} + +/* +void run_lsd(int argc, char **argv) +{ + srand(0); + if(argc < 3){ + fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [options! (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[2]; + char *weights = argv[3]; + char *input = argv[4]; + + int norm = find_int_arg(argc, argv, "-norm", 1); + int rounds = find_int_arg(argc, argv, "-rounds", 1); + int iters = find_int_arg(argc, argv, "-iters", 10); + float rate = find_float_arg(argc, argv, "-rate", .04); + float momentum = find_float_arg(argc, argv, "-momentum", .9); + float lambda = find_float_arg(argc, argv, "-lambda", .01); + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + int reconstruct = find_arg(argc, argv, "-reconstruct"); + int smooth_size = find_int_arg(argc, argv, "-smooth", 1); + + network net = parse_network_cfg(cfg); + load_weights(&net, weights); + char *cfgbase = basecfg(cfg); + char *imbase = basecfg(input); + + set_batch_network(&net, 1); + image im = load_image_color(input, 0, 0); + + float *features = 0; + image update; + if (reconstruct){ + im = letterbox_image(im, net->w, net->h); + + int zz = 0; + network_predict(net, im.data); + image out_im = get_network_image(net); + image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz); + //flip_image(crop); + image f_im = resize_image(crop, out_im.w, out_im.h); + free_image(crop); + printf("%d features\n", out_im.w*out_im.h*out_im.c); + + + im = resize_image(im, im.w, im.h); + f_im = resize_image(f_im, f_im.w, f_im.h); + features = f_im.data; + + int i; + for(i = 0; i < 14*14*512; ++i){ + features[i] += rand_uniform(-.19, .19); + } + + free_image(im); + im = make_random_image(im.w, im.h, im.c); + update = make_image(im.w, im.h, im.c); + + } + + int e; + int n; + for(e = 0; e < rounds; ++e){ + fprintf(stderr, "Iteration: "); + fflush(stderr); + for(n = 0; n < iters; ++n){ + fprintf(stderr, "%d, ", n); + fflush(stderr); + if(reconstruct){ + reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1); + //if ((n+1)%30 == 0) rate *= .5; + show_image(im, "reconstruction"); +#ifdef OPENCV + cvWaitKey(10); +#endif + }else{ + int layer = max_layer + rand()%range - range/2; + int octave = rand()%octaves; + optimize_picture(&net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); + } + } + fprintf(stderr, "done\n"); + char buff[256]; + if (prefix){ + sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e); + }else{ + sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e); + } + printf("%d %s\n", e, buff); + save_image(im, buff); + //show_image(im, buff); + //cvWaitKey(0); + + if(rotate){ + image rot = rotate_image(im, rotate); + free_image(im); + im = rot; + } + image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom); + image resized = resize_image(crop, im.w, im.h); + free_image(im); + free_image(crop); + im = resized; + } +} +*/ + +void run_nightmare(int argc, char **argv) +{ + srand(0); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [layer] [options! (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[2]; + char *weights = argv[3]; + char *input = argv[4]; + int max_layer = atoi(argv[5]); + + int range = find_int_arg(argc, argv, "-range", 1); + int norm = find_int_arg(argc, argv, "-norm", 1); + int rounds = find_int_arg(argc, argv, "-rounds", 1); + int iters = find_int_arg(argc, argv, "-iters", 10); + int octaves = find_int_arg(argc, argv, "-octaves", 4); + float zoom = find_float_arg(argc, argv, "-zoom", 1.); + float rate = find_float_arg(argc, argv, "-rate", .04); + float thresh = find_float_arg(argc, argv, "-thresh", 1.); + float rotate = find_float_arg(argc, argv, "-rotate", 0); + float momentum = find_float_arg(argc, argv, "-momentum", .9); + float lambda = find_float_arg(argc, argv, "-lambda", .01); + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + int reconstruct = find_arg(argc, argv, "-reconstruct"); + int smooth_size = find_int_arg(argc, argv, "-smooth", 1); + + network *net = load_network(cfg, weights, 0); + char *cfgbase = basecfg(cfg); + char *imbase = basecfg(input); + + set_batch_network(net, 1); + image im = load_image_color(input, 0, 0); + if(0){ + float scale = 1; + if(im.w > 512 || im.h > 512){ + if(im.w > im.h) scale = 512.0/im.w; + else scale = 512.0/im.h; + } + image resized = resize_image(im, scale*im.w, scale*im.h); + free_image(im); + im = resized; + } + //im = letterbox_image(im, net->w, net->h); + + float *features = 0; + image update; + if (reconstruct){ + net->n = max_layer; + im = letterbox_image(im, net->w, net->h); + //resize_network(&net, im.w, im.h); + + network_predict(net, im.data); + if(net->layers[net->n-1].type == REGION){ + printf("region!\n"); + zero_objectness(net->layers[net->n-1]); + } + image out_im = copy_image(get_network_image(net)); + /* + image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz); + //flip_image(crop); + image f_im = resize_image(crop, out_im.w, out_im.h); + free_image(crop); + */ + printf("%d features\n", out_im.w*out_im.h*out_im.c); + + features = out_im.data; + + /* + int i; + for(i = 0; i < 14*14*512; ++i){ + //features[i] += rand_uniform(-.19, .19); + } + free_image(im); + im = make_random_image(im.w, im.h, im.c); + */ + update = make_image(im.w, im.h, im.c); + } + + int e; + int n; + for(e = 0; e < rounds; ++e){ + fprintf(stderr, "Iteration: "); + fflush(stderr); + for(n = 0; n < iters; ++n){ + fprintf(stderr, "%d, ", n); + fflush(stderr); + if(reconstruct){ + reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1); + //if ((n+1)%30 == 0) rate *= .5; + show_image(im, "reconstruction", 10); + }else{ + int layer = max_layer + rand()%range - range/2; + int octave = rand()%octaves; + optimize_picture(net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); + } + } + fprintf(stderr, "done\n"); + if(0){ + image g = grayscale_image(im); + free_image(im); + im = g; + } + char buff[256]; + if (prefix){ + sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e); + }else{ + sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e); + } + printf("%d %s\n", e, buff); + save_image(im, buff); + //show_image(im, buff, 0); + + if(rotate){ + image rot = rotate_image(im, rotate); + free_image(im); + im = rot; + } + image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom); + image resized = resize_image(crop, im.w, im.h); + free_image(im); + free_image(crop); + im = resized; + } +} + diff --git a/workloads/realworld/uvm/darknet/examples/regressor.c b/workloads/realworld/uvm/darknet/examples/regressor.c new file mode 100644 index 0000000000000000000000000000000000000000..20cec0fad9f0a2ccb2c46a30d0a01793119b43ce --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/regressor.c @@ -0,0 +1,240 @@ +#include "darknet.h" +#include +#include + +void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + int classes = option_find_int(options, "classes", 1); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + clock_t time; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.classes = classes; + + args.min = net->min_ratio*net->w; + args.max = net->max_ratio*net->w; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = REGRESSION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_regressor(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + free_image(im); + free_image(sized); + if (filename) break; + } +} + + +void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Regressor Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + list *options = read_data_cfg(datacfg); + int classes = option_find_int(options, "classes", 1); + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + void * cap = open_video_stream(filename, cam_index, 0,0,0); + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image crop = center_crop_image(in, net->w, net->h); + grayscale_image_3c(crop); + + float *predictions = network_predict(net, crop.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + int i; + for(i = 0; i < classes; ++i){ + printf("%s: %f\n", names[i], predictions[i]); + } + + show_image(crop, "Regressor", 10); + free_image(in); + free_image(crop); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_regressor(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_regressor(data, cfg, weights); + else if(0==strcmp(argv[2], "train")) train_regressor(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "demo")) demo_regressor(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/uvm/darknet/examples/rnn.c b/workloads/realworld/uvm/darknet/examples/rnn.c new file mode 100644 index 0000000000000000000000000000000000000000..5d49eaae7070eb1dc9a87b5627b7ec6f7cb09e46 --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/rnn.c @@ -0,0 +1,542 @@ +#include "darknet.h" + +#include + +typedef struct { + float *x; + float *y; +} float_pair; + +unsigned char **load_files(char *filename, int *n) +{ + list *paths = get_paths(filename); + *n = paths->size; + unsigned char **contents = calloc(*n, sizeof(char *)); + int i; + node *x = paths->front; + for(i = 0; i < *n; ++i){ + contents[i] = read_file((char *)x->val); + x = x->next; + } + return contents; +} + +int *read_tokenized_data(char *filename, size_t *read) +{ + size_t size = 512; + size_t count = 0; + FILE *fp = fopen(filename, "r"); + int *d = calloc(size, sizeof(int)); + int n, one; + one = fscanf(fp, "%d", &n); + while(one == 1){ + ++count; + if(count > size){ + size = size*2; + d = realloc(d, size*sizeof(int)); + } + d[count-1] = n; + one = fscanf(fp, "%d", &n); + } + fclose(fp); + d = realloc(d, count*sizeof(int)); + *read = count; + return d; +} + +char **read_tokens(char *filename, size_t *read) +{ + size_t size = 512; + size_t count = 0; + FILE *fp = fopen(filename, "r"); + char **d = calloc(size, sizeof(char *)); + char *line; + while((line=fgetl(fp)) != 0){ + ++count; + if(count > size){ + size = size*2; + d = realloc(d, size*sizeof(char *)); + } + if(0==strcmp(line, "")) line = "\n"; + d[count-1] = line; + } + fclose(fp); + d = realloc(d, count*sizeof(char *)); + *read = count; + return d; +} + + +float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size_t len, int batch, int steps) +{ + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + int i,j; + for(i = 0; i < batch; ++i){ + for(j = 0; j < steps; ++j){ + int curr = tokens[(offsets[i])%len]; + int next = tokens[(offsets[i] + 1)%len]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + offsets[i] = (offsets[i] + 1) % len; + + if(curr >= characters || curr < 0 || next >= characters || next < 0){ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +float_pair get_seq2seq_data(char **source, char **dest, int n, int characters, size_t len, int batch, int steps) +{ + int i,j; + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + for(i = 0; i < batch; ++i){ + int index = rand()%n; + //int slen = strlen(source[index]); + //int dlen = strlen(dest[index]); + for(j = 0; j < steps; ++j){ + unsigned char curr = source[index][j]; + unsigned char next = dest[index][j]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + if(curr > 255 || curr <= 0 || next > 255 || next <= 0){ + /*text[(index+j+2)%len] = 0; + printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]); + printf("%s", text+index); + */ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +float_pair get_rnn_data(unsigned char *text, size_t *offsets, int characters, size_t len, int batch, int steps) +{ + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + int i,j; + for(i = 0; i < batch; ++i){ + for(j = 0; j < steps; ++j){ + unsigned char curr = text[(offsets[i])%len]; + unsigned char next = text[(offsets[i] + 1)%len]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + offsets[i] = (offsets[i] + 1) % len; + + if(curr > 255 || curr <= 0 || next > 255 || next <= 0){ + /*text[(index+j+2)%len] = 0; + printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]); + printf("%s", text+index); + */ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear, int tokenized) +{ + srand(time(0)); + unsigned char *text = 0; + int *tokens = 0; + size_t size; + if(tokenized){ + tokens = read_tokenized_data(filename, &size); + } else { + text = read_file(filename); + size = strlen((const char*)text); + } + + char *backup_directory = "/home/pjreddie/backup/"; + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, clear); + + int inputs = net->inputs; + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g, Inputs: %d %d %d\n", net->learning_rate, net->momentum, net->decay, inputs, net->batch, net->time_steps); + int batch = net->batch; + int steps = net->time_steps; + if(clear) *net->seen = 0; + int i = (*net->seen)/net->batch; + + int streams = batch/steps; + size_t *offsets = calloc(streams, sizeof(size_t)); + int j; + for(j = 0; j < streams; ++j){ + offsets[j] = rand_size_t()%size; + } + + clock_t time; + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + float_pair p; + if(tokenized){ + p = get_rnn_token_data(tokens, offsets, inputs, size, streams, steps); + }else{ + p = get_rnn_data(text, offsets, inputs, size, streams, steps); + } + + copy_cpu(net->inputs*net->batch, p.x, 1, net->input, 1); + copy_cpu(net->truths*net->batch, p.y, 1, net->truth, 1); + float loss = train_network_datum(net) / (batch); + free(p.x); + free(p.y); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + size_t chars = get_current_batch(net)*batch; + fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds, %f epochs\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), (float) chars/size); + + for(j = 0; j < streams; ++j){ + //printf("%d\n", j); + if(rand()%64 == 0){ + //fprintf(stderr, "Reset\n"); + offsets[j] = rand_size_t()%size; + reset_network_state(net, j); + } + } + + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void print_symbol(int n, char **tokens){ + if(tokens){ + printf("%s ", tokens[n]); + } else { + printf("%c", n); + } +} + +void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + + /* + fill_cpu(inputs, 0, input, 1); + for(i = 0; i < 10; ++i){ + network_predict(net, input); + } + fill_cpu(inputs, 0, input, 1); + */ + + for(i = 0; i < len-1; ++i){ + c = seed[i]; + input[c] = 1; + network_predict(net, input); + input[c] = 0; + print_symbol(c, tokens); + } + if(len) c = seed[len-1]; + print_symbol(c, tokens); + for(i = 0; i < num; ++i){ + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + for(j = 32; j < 127; ++j){ + //printf("%d %c %f\n",j, j, out[j]); + } + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + c = sample_array(out, inputs); + print_symbol(c, tokens); + } + printf("\n"); +} + +void test_tactic_rnn_multi(char *cfgfile, char *weightfile, int num, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + float *input = calloc(inputs, sizeof(float)); + float *out = 0; + + while(1){ + reset_network_state(net, 0); + while((c = getc(stdin)) != EOF && c != 0){ + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + for(i = 0; i < num; ++i){ + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + int next = sample_array(out, inputs); + if(c == '.' && next == '\n') break; + c = next; + print_symbol(c, tokens); + + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + printf("\n"); + } +} + +void test_tactic_rnn(char *cfgfile, char *weightfile, int num, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + float *input = calloc(inputs, sizeof(float)); + float *out = 0; + + while((c = getc(stdin)) != EOF){ + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + for(i = 0; i < num; ++i){ + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + int next = sample_array(out, inputs); + if(c == '.' && next == '\n') break; + c = next; + print_symbol(c, tokens); + + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + printf("\n"); +} + +void valid_tactic_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int count = 0; + int words = 1; + int c; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + for(i = 0; i < len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + float sum = 0; + c = getc(stdin); + float log2 = log(2); + int in = 0; + while(c != EOF){ + int next = getc(stdin); + if(next == EOF) break; + if(next < 0 || next >= 255) error("Out of range character"); + + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + + if(c == '.' && next == '\n') in = 0; + if(!in) { + if(c == '>' && next == '>'){ + in = 1; + ++words; + } + c = next; + continue; + } + ++count; + sum += log(out[next])/log2; + c = next; + printf("%d %d Perplexity: %4.4f Word Perplexity: %4.4f\n", count, words, pow(2, -sum/count), pow(2, -sum/words)); + } +} + +void valid_char_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int count = 0; + int words = 1; + int c; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + for(i = 0; i < len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + float sum = 0; + c = getc(stdin); + float log2 = log(2); + while(c != EOF){ + int next = getc(stdin); + if(next == EOF) break; + if(next < 0 || next >= 255) error("Out of range character"); + ++count; + if(next == ' ' || next == '\n' || next == '\t') ++words; + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + sum += log(out[next])/log2; + c = next; + printf("%d BPC: %4.4f Perplexity: %4.4f Word Perplexity: %4.4f\n", count, -sum/count, pow(2, -sum/count), pow(2, -sum/words)); + } +} + +void vec_char_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int c; + int seed_len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + char *line; + while((line=fgetl(stdin)) != 0){ + reset_network_state(net, 0); + for(i = 0; i < seed_len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + strip(line); + int str_len = strlen(line); + for(i = 0; i < str_len; ++i){ + c = line[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + c = ' '; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + + layer l = net->layers[0]; + #ifdef GPU + cuda_pull_array(l.output_gpu, l.output, l.outputs); + #endif + printf("%s", line); + for(i = 0; i < l.outputs; ++i){ + printf(",%g", l.output[i]); + } + printf("\n"); + } +} + +void run_char_rnn(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + char *filename = find_char_arg(argc, argv, "-file", "data/shakespeare.txt"); + char *seed = find_char_arg(argc, argv, "-seed", "\n\n"); + int len = find_int_arg(argc, argv, "-len", 1000); + float temp = find_float_arg(argc, argv, "-temp", .7); + int rseed = find_int_arg(argc, argv, "-srand", time(0)); + int clear = find_arg(argc, argv, "-clear"); + int tokenized = find_arg(argc, argv, "-tokenized"); + char *tokens = find_char_arg(argc, argv, "-tokens", 0); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_char_rnn(cfg, weights, filename, clear, tokenized); + else if(0==strcmp(argv[2], "valid")) valid_char_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "validtactic")) valid_tactic_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "vec")) vec_char_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "generate")) test_char_rnn(cfg, weights, len, seed, temp, rseed, tokens); + else if(0==strcmp(argv[2], "generatetactic")) test_tactic_rnn(cfg, weights, len, temp, rseed, tokens); +} diff --git a/workloads/realworld/uvm/darknet/examples/rnn_vid.c b/workloads/realworld/uvm/darknet/examples/rnn_vid.c new file mode 100644 index 0000000000000000000000000000000000000000..e88792352311438d0fcb25bb7befd0677f70bae5 --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/rnn_vid.c @@ -0,0 +1,208 @@ +#include "darknet.h" + +#ifdef OPENCV +image get_image_from_stream(CvCapture *cap); +image ipl_to_image(IplImage* src); + +void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters); + + +typedef struct { + float *x; + float *y; +} float_pair; + +float_pair get_rnn_vid_data(network net, char **files, int n, int batch, int steps) +{ + int b; + assert(net.batch == steps + 1); + image out_im = get_network_image(net); + int output_size = out_im.w*out_im.h*out_im.c; + printf("%d %d %d\n", out_im.w, out_im.h, out_im.c); + float *feats = calloc(net.batch*batch*output_size, sizeof(float)); + for(b = 0; b < batch; ++b){ + int input_size = net.w*net.h*net.c; + float *input = calloc(input_size*net.batch, sizeof(float)); + char *filename = files[rand()%n]; + CvCapture *cap = cvCaptureFromFile(filename); + int frames = cvGetCaptureProperty(cap, CV_CAP_PROP_FRAME_COUNT); + int index = rand() % (frames - steps - 2); + if (frames < (steps + 4)){ + --b; + free(input); + continue; + } + + printf("frames: %d, index: %d\n", frames, index); + cvSetCaptureProperty(cap, CV_CAP_PROP_POS_FRAMES, index); + + int i; + for(i = 0; i < net.batch; ++i){ + IplImage* src = cvQueryFrame(cap); + image im = ipl_to_image(src); + rgbgr_image(im); + image re = resize_image(im, net.w, net.h); + //show_image(re, "loaded"); + //cvWaitKey(10); + memcpy(input + i*input_size, re.data, input_size*sizeof(float)); + free_image(im); + free_image(re); + } + float *output = network_predict(net, input); + + free(input); + + for(i = 0; i < net.batch; ++i){ + memcpy(feats + (b + i*batch)*output_size, output + i*output_size, output_size*sizeof(float)); + } + + cvReleaseCapture(&cap); + } + + //printf("%d %d %d\n", out_im.w, out_im.h, out_im.c); + float_pair p = {0}; + p.x = feats; + p.y = feats + output_size*batch; //+ out_im.w*out_im.h*out_im.c; + + return p; +} + + +void train_vid_rnn(char *cfgfile, char *weightfile) +{ + char *train_videos = "data/vid/train.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + + list *plist = get_paths(train_videos); + int N = plist->size; + char **paths = (char **)list_to_array(plist); + clock_t time; + int steps = net.time_steps; + int batch = net.batch / net.time_steps; + + network extractor = parse_network_cfg("cfg/extractor.cfg"); + load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv"); + + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + float_pair p = get_rnn_vid_data(extractor, paths, N, batch, steps); + + copy_cpu(net.inputs*net.batch, p.x, 1, net.input, 1); + copy_cpu(net.truths*net.batch, p.y, 1, net.truth, 1); + float loss = train_network_datum(net) / (net.batch); + + + free(p.x); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time)); + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%10==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + + +image save_reconstruction(network net, image *init, float *feat, char *name, int i) +{ + image recon; + if (init) { + recon = copy_image(*init); + } else { + recon = make_random_image(net.w, net.h, 3); + } + + image update = make_image(net.w, net.h, 3); + reconstruct_picture(net, feat, recon, update, .01, .9, .1, 2, 50); + char buff[256]; + sprintf(buff, "%s%d", name, i); + save_image(recon, buff); + free_image(update); + return recon; +} + +void generate_vid_rnn(char *cfgfile, char *weightfile) +{ + network extractor = parse_network_cfg("cfg/extractor.recon.cfg"); + load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv"); + + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&extractor, 1); + set_batch_network(&net, 1); + + int i; + CvCapture *cap = cvCaptureFromFile("/extra/vid/ILSVRC2015/Data/VID/snippets/val/ILSVRC2015_val_00007030.mp4"); + float *feat; + float *next; + image last; + for(i = 0; i < 25; ++i){ + image im = get_image_from_stream(cap); + image re = resize_image(im, extractor.w, extractor.h); + feat = network_predict(extractor, re.data); + if(i > 0){ + printf("%f %f\n", mean_array(feat, 14*14*512), variance_array(feat, 14*14*512)); + printf("%f %f\n", mean_array(next, 14*14*512), variance_array(next, 14*14*512)); + printf("%f\n", mse_array(feat, 14*14*512)); + axpy_cpu(14*14*512, -1, feat, 1, next, 1); + printf("%f\n", mse_array(next, 14*14*512)); + } + next = network_predict(net, feat); + + free_image(im); + + free_image(save_reconstruction(extractor, 0, feat, "feat", i)); + free_image(save_reconstruction(extractor, 0, next, "next", i)); + if (i==24) last = copy_image(re); + free_image(re); + } + for(i = 0; i < 30; ++i){ + next = network_predict(net, next); + image new = save_reconstruction(extractor, &last, next, "new", i); + free_image(last); + last = new; + } +} + +void run_vid_rnn(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + //char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_vid_rnn(cfg, weights); + else if(0==strcmp(argv[2], "generate")) generate_vid_rnn(cfg, weights); +} +#else +void run_vid_rnn(int argc, char **argv){} +#endif + diff --git a/workloads/realworld/uvm/darknet/examples/segmenter.c b/workloads/realworld/uvm/darknet/examples/segmenter.c new file mode 100644 index 0000000000000000000000000000000000000000..2e7cea0b730754b74a125bcd865aa12f0bdd3be0 --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/segmenter.c @@ -0,0 +1,255 @@ +#include "darknet.h" +#include +#include + +void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + image pred = get_network_image(net); + + int div = net->w/pred.w; + assert(pred.w * div == net->w); + assert(pred.h * div == net->h); + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.scale = div; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.classes = 80; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = SEGMENTATION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(display){ + image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]); + image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]); + image mask = mask_to_rgb(tr); + image prmask = mask_to_rgb(pred); + show_image(im, "input", 1); + show_image(prmask, "pred", 1); + show_image(mask, "truth", 100); + free_image(mask); + free_image(prmask); + } + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_segmenter(char *datafile, char *cfg, char *weights, char *filename) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + show_image(sized, "orig", 1); + show_image(prmask, "pred", 0); + free_image(im); + free_image(sized); + free_image(prmask); + if (filename) break; + } +} + + +void demo_segmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Classifier Demo\n"); + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = letterbox_image(in, net->w, net->h); + + network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + show_image(prmask, "Segmenter", 10); + + free_image(in_s); + free_image(in); + free_image(prmask); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_segmenter(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_segmenter(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_segmenter(data, cfg, weights, gpus, ngpus, clear, display); + else if(0==strcmp(argv[2], "demo")) demo_segmenter(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/uvm/darknet/examples/super.c b/workloads/realworld/uvm/darknet/examples/super.c new file mode 100644 index 0000000000000000000000000000000000000000..d34406b1f2ce70cd36eecb8298bf1ca3e736f01b --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/super.c @@ -0,0 +1,120 @@ +#include "darknet.h" + +void train_super(char *cfgfile, char *weightfile, int clear) +{ + char *train_images = "/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, clear); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.scale = 4; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = SUPER_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void test_super(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + resize_network(net, im.w, im.h); + printf("%d %d\n", im.w, im.h); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image(net); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 0); + + free_image(im); + if (filename) break; + } +} + + +void run_super(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + int clear = find_arg(argc, argv, "-clear"); + if(0==strcmp(argv[2], "train")) train_super(cfg, weights, clear); + else if(0==strcmp(argv[2], "test")) test_super(cfg, weights, filename); + /* + else if(0==strcmp(argv[2], "valid")) validate_super(cfg, weights); + */ +} diff --git a/workloads/realworld/uvm/darknet/examples/swag.c b/workloads/realworld/uvm/darknet/examples/swag.c new file mode 100644 index 0000000000000000000000000000000000000000..c22d7855c46a975ecd1e94a60f9b7059bc288fee --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/swag.c @@ -0,0 +1,83 @@ +#include "darknet.h" +#include + +void train_swag(char *cfgfile, char *weightfile) +{ + char *train_images = "data/voc.0712.trainval"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + data train, buffer; + + layer l = net.layers[net.n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || i == 600){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void run_swag(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_swag(cfg, weights); +} diff --git a/workloads/realworld/uvm/darknet/examples/tag.c b/workloads/realworld/uvm/darknet/examples/tag.c new file mode 100644 index 0000000000000000000000000000000000000000..4caf8cba18f39f62deb54ea913fd40c194b3e33c --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/tag.c @@ -0,0 +1,140 @@ +#include "darknet.h" + +void train_tag(char *cfgfile, char *weightfile, int clear) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, clear); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + list *plist = get_paths("/home/pjreddie/tag/train.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + + args.min = net->w; + args.max = net->max_crop; + args.size = net->w; + + args.paths = paths; + args.classes = net->outputs; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = TAG_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + fprintf(stderr, "%d classes\n", net->outputs); + + load_thread = load_data_in_thread(args); + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + pthread_join(load_thread, 0); + free_data(buffer); + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void test_tag(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + int i = 0; + char **names = get_labels("data/tags.txt"); + clock_t time; + int indexes[10]; + char buff[256]; + char *input = buff; + int size = net->w; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = resize_min(im, size); + resize_network(net, r.w, r.h); + printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + float *predictions = network_predict(net, X); + top_predictions(net, 10, indexes); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < 10; ++i){ + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void run_tag(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int clear = find_arg(argc, argv, "-clear"); + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_tag(cfg, weights, clear); + else if(0==strcmp(argv[2], "test")) test_tag(cfg, weights, filename); +} + diff --git a/workloads/realworld/uvm/darknet/examples/voxel.c b/workloads/realworld/uvm/darknet/examples/voxel.c new file mode 100644 index 0000000000000000000000000000000000000000..01ea9bb98987590227758364bbfff50996cf9a2d --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/voxel.c @@ -0,0 +1,161 @@ +#include "darknet.h" + +void extract_voxel(char *lfile, char *rfile, char *prefix) +{ +#ifdef OPENCV + int w = 1920; + int h = 1080; + int shift = 0; + int count = 0; + CvCapture *lcap = cvCaptureFromFile(lfile); + CvCapture *rcap = cvCaptureFromFile(rfile); + while(1){ + image l = get_image_from_stream(lcap); + image r = get_image_from_stream(rcap); + if(!l.w || !r.w) break; + if(count%100 == 0) { + shift = best_3d_shift_r(l, r, -l.h/100, l.h/100); + printf("%d\n", shift); + } + image ls = crop_image(l, (l.w - w)/2, (l.h - h)/2, w, h); + image rs = crop_image(r, 105 + (r.w - w)/2, (r.h - h)/2 + shift, w, h); + char buff[256]; + sprintf(buff, "%s_%05d_l", prefix, count); + save_image(ls, buff); + sprintf(buff, "%s_%05d_r", prefix, count); + save_image(rs, buff); + free_image(l); + free_image(r); + free_image(ls); + free_image(rs); + ++count; + } + +#else + printf("need OpenCV for extraction\n"); +#endif +} + +void train_voxel(char *cfgfile, char *weightfile) +{ + char *train_images = "/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.scale = 4; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = SUPER_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void test_voxel(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + resize_network(&net, im.w, im.h); + printf("%d %d\n", im.w, im.h); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image(net); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + + free_image(im); + if (filename) break; + } +} + + +void run_voxel(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_voxel(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_voxel(cfg, weights, filename); + else if(0==strcmp(argv[2], "extract")) extract_voxel(argv[3], argv[4], argv[5]); + /* + else if(0==strcmp(argv[2], "valid")) validate_voxel(cfg, weights); + */ +} diff --git a/workloads/realworld/uvm/darknet/examples/writing.c b/workloads/realworld/uvm/darknet/examples/writing.c new file mode 100644 index 0000000000000000000000000000000000000000..1b6ff83b5838b654e0fd1b6664156daf6d7a889b --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/writing.c @@ -0,0 +1,144 @@ +#include "darknet.h" + +void train_writing(char *cfgfile, char *weightfile) +{ + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + list *plist = get_paths("figures.list"); + char **paths = (char **)list_to_array(plist); + clock_t time; + int N = plist->size; + printf("N: %d\n", N); + image out = get_network_image(net); + + data train, buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.out_w = out.w; + args.out_h = out.h; + args.paths = paths; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = WRITING_DATA; + + pthread_t load_thread = load_data_in_thread(args); + int epoch = (*net.seen)/N; + while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + printf("Loaded %lf seconds\n",sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + + /* + image pred = float_to_image(64, 64, 1, out); + print_image(pred); + */ + + /* + image im = float_to_image(256, 256, 3, train.X.vals[0]); + image lab = float_to_image(64, 64, 1, train.y.vals[0]); + image pred = float_to_image(64, 64, 1, out); + show_image(im, "image"); + show_image(lab, "label"); + print_image(lab); + show_image(pred, "pred"); + cvWaitKey(0); + */ + + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + free_data(train); + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_batch_%ld.weights", backup_directory, base, get_current_batch(net)); + save_weights(net, buff); + } + if(*net.seen/N > epoch){ + epoch = *net.seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + } +} + +void test_writing(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + + image im = load_image_color(input, 0, 0); + resize_network(&net, im.w, im.h); + printf("%d %d %d\n", im.h, im.w, im.c); + float *X = im.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + image pred = get_network_image(net); + + image upsampled = resize_image(pred, im.w, im.h); + image thresh = threshold_image(upsampled, .5); + pred = thresh; + + show_image(pred, "prediction"); + show_image(im, "orig"); +#ifdef OPENCV + cvWaitKey(0); + cvDestroyAllWindows(); +#endif + + free_image(upsampled); + free_image(thresh); + free_image(im); + if (filename) break; + } +} + +void run_writing(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_writing(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_writing(cfg, weights, filename); +} + diff --git a/workloads/realworld/uvm/darknet/examples/yolo.c b/workloads/realworld/uvm/darknet/examples/yolo.c new file mode 100644 index 0000000000000000000000000000000000000000..4ddb69a3e53b2123ccb89026645a66c044047faa --- /dev/null +++ b/workloads/realworld/uvm/darknet/examples/yolo.c @@ -0,0 +1,327 @@ +#include "darknet.h" + +char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; + +void train_yolo(char *cfgfile, char *weightfile) +{ + char *train_images = "/data/voc/train.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + layer l = net->layers[net->n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || (i < 1000 && i%100 == 0)){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void print_yolo_detections(FILE **fps, char *id, int total, int classes, int w, int h, detection *dets) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], + xmin, ymin, xmax, ymax); + } + } +} + +void validate_yolo(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + //list *plist = get_paths("data/voc.2007.test"); + list *plist = get_paths("/home/pjreddie/data/voc/2007_test.txt"); + //list *plist = get_paths("data/voc.2012.test"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + int j; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + int t; + + float thresh = .001; + int nms = 1; + float iou_thresh = .5; + + int nthreads = 8; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.type = IMAGE_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + time_t start = time(0); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh); + print_yolo_detections(fps, id, l.side*l.side*l.n, classes, w, h, dets); + free_detections(dets, nboxes); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); +} + +void validate_yolo_recall(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + list *plist = get_paths("data/voc.2007.test"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + int side = l.side; + + int j, k; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + + float thresh = .001; + float iou_thresh = .5; + float nms = 0; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + + int nboxes = 0; + detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, side*side*l.n, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < side*side*l.n; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < side*side*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free_detections(dets, nboxes); + free(id); + free_image(orig); + free_image(sized); + } +} + +void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) +{ + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + layer l = net->layers[net->n-1]; + set_batch_network(net, 1); + srand(2222222); + clock_t time; + char buff[256]; + char *input = buff; + float nms=.4; + while(1){ + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = resize_image(im, net->w, net->h); + float *X = sized.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + + int nboxes = 0; + detection *dets = get_network_boxes(net, 1, 1, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms); + + draw_detections(im, dets, l.side*l.side*l.n, thresh, voc_names, alphabet, 20); + save_image(im, "predictions"); + show_image(im, "predictions", 0); + free_detections(dets, nboxes); + free_image(im); + free_image(sized); + if (filename) break; + } +} + +void run_yolo(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .2); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int avg = find_int_arg(argc, argv, "-avg", 1); + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "test")) test_yolo(cfg, weights, filename, thresh); + else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights); + else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix, avg, .5, 0,0,0,0); +} diff --git a/workloads/realworld/uvm/darknet/include/darknet.h b/workloads/realworld/uvm/darknet/include/darknet.h new file mode 100644 index 0000000000000000000000000000000000000000..7be8225e2d39f079ca0a15da6980b42f8966af40 --- /dev/null +++ b/workloads/realworld/uvm/darknet/include/darknet.h @@ -0,0 +1,810 @@ +#ifndef DARKNET_API +#define DARKNET_API +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef GPU + #define GPU_DEVICE 7 + #define BLOCK 512 + + #include "cuda_runtime.h" + #include "curand.h" + #include "cublas_v2.h" + + #ifdef CUDNN + #include "cudnn.h" + #endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define SECRET_NUM -1234 +extern int gpu_index; + +typedef struct{ + int classes; + char **names; +} metadata; + +metadata get_metadata(char *file); + +typedef struct{ + int *leaf; + int n; + int *parent; + int *child; + int *group; + char **name; + + int groups; + int *group_size; + int *group_offset; +} tree; +tree *read_tree(char *filename); + +typedef enum{ + LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU +} ACTIVATION; + +typedef enum{ + PNG, BMP, TGA, JPG +} IMTYPE; + +typedef enum{ + MULT, ADD, SUB, DIV +} BINARY_ACTIVATION; + +typedef enum { + CONVOLUTIONAL, + DECONVOLUTIONAL, + CONNECTED, + MAXPOOL, + SOFTMAX, + DETECTION, + DROPOUT, + CROP, + ROUTE, + COST, + NORMALIZATION, + AVGPOOL, + LOCAL, + SHORTCUT, + ACTIVE, + RNN, + GRU, + LSTM, + CRNN, + BATCHNORM, + NETWORK, + XNOR, + REGION, + YOLO, + ISEG, + REORG, + UPSAMPLE, + LOGXENT, + L2NORM, + BLANK +} LAYER_TYPE; + +typedef enum{ + SSE, MASKED, L1, SEG, SMOOTH,WGAN +} COST_TYPE; + +typedef struct{ + int batch; + float learning_rate; + float momentum; + float decay; + int adam; + float B1; + float B2; + float eps; + int t; +} update_args; + +struct network; +typedef struct network network; + +struct layer; +typedef struct layer layer; + +struct layer{ + LAYER_TYPE type; + ACTIVATION activation; + COST_TYPE cost_type; + void (*forward) (struct layer, struct network); + void (*backward) (struct layer, struct network); + void (*update) (struct layer, update_args); + void (*forward_gpu) (struct layer, struct network); + void (*backward_gpu) (struct layer, struct network); + void (*update_gpu) (struct layer, update_args); + int batch_normalize; + int shortcut; + int batch; + int forced; + int flipped; + int inputs; + int outputs; + int nweights; + int nbiases; + int extra; + int truths; + int h,w,c; + int out_h, out_w, out_c; + int n; + int max_boxes; + int groups; + int size; + int side; + int stride; + int reverse; + int flatten; + int spatial; + int pad; + int sqrt; + int flip; + int index; + int binary; + int xnor; + int steps; + int hidden; + int truth; + float smooth; + float dot; + float angle; + float jitter; + float saturation; + float exposure; + float shift; + float ratio; + float learning_rate_scale; + float clip; + int noloss; + int softmax; + int classes; + int coords; + int background; + int rescore; + int objectness; + int joint; + int noadjust; + int reorg; + int log; + int tanh; + int *mask; + int total; + + float alpha; + float beta; + float kappa; + + float coord_scale; + float object_scale; + float noobject_scale; + float mask_scale; + float class_scale; + int bias_match; + int random; + float ignore_thresh; + float truth_thresh; + float thresh; + float focus; + int classfix; + int absolute; + + int onlyforward; + int stopbackward; + int dontload; + int dontsave; + int dontloadscales; + int numload; + + float temperature; + float probability; + float scale; + + char * cweights; + int * indexes; + int * input_layers; + int * input_sizes; + int * map; + int * counts; + float ** sums; + float * rand; + float * cost; + float * state; + float * prev_state; + float * forgot_state; + float * forgot_delta; + float * state_delta; + float * combine_cpu; + float * combine_delta_cpu; + + float * concat; + float * concat_delta; + + float * binary_weights; + + float * biases; + float * bias_updates; + + float * scales; + float * scale_updates; + + float * weights; + float * weight_updates; + + float * delta; + float * output; + float * loss; + float * squared; + float * norms; + + float * spatial_mean; + float * mean; + float * variance; + + float * mean_delta; + float * variance_delta; + + float * rolling_mean; + float * rolling_variance; + + float * x; + float * x_norm; + + float * m; + float * v; + + float * bias_m; + float * bias_v; + float * scale_m; + float * scale_v; + + + float *z_cpu; + float *r_cpu; + float *h_cpu; + float * prev_state_cpu; + + float *temp_cpu; + float *temp2_cpu; + float *temp3_cpu; + + float *dh_cpu; + float *hh_cpu; + float *prev_cell_cpu; + float *cell_cpu; + float *f_cpu; + float *i_cpu; + float *g_cpu; + float *o_cpu; + float *c_cpu; + float *dc_cpu; + + float * binary_input; + + struct layer *input_layer; + struct layer *self_layer; + struct layer *output_layer; + + struct layer *reset_layer; + struct layer *update_layer; + struct layer *state_layer; + + struct layer *input_gate_layer; + struct layer *state_gate_layer; + struct layer *input_save_layer; + struct layer *state_save_layer; + struct layer *input_state_layer; + struct layer *state_state_layer; + + struct layer *input_z_layer; + struct layer *state_z_layer; + + struct layer *input_r_layer; + struct layer *state_r_layer; + + struct layer *input_h_layer; + struct layer *state_h_layer; + + struct layer *wz; + struct layer *uz; + struct layer *wr; + struct layer *ur; + struct layer *wh; + struct layer *uh; + struct layer *uo; + struct layer *wo; + struct layer *uf; + struct layer *wf; + struct layer *ui; + struct layer *wi; + struct layer *ug; + struct layer *wg; + + tree *softmax_tree; + + size_t workspace_size; + +#ifdef GPU + int *indexes_gpu; + + float *z_gpu; + float *r_gpu; + float *h_gpu; + + float *temp_gpu; + float *temp2_gpu; + float *temp3_gpu; + + float *dh_gpu; + float *hh_gpu; + float *prev_cell_gpu; + float *cell_gpu; + float *f_gpu; + float *i_gpu; + float *g_gpu; + float *o_gpu; + float *c_gpu; + float *dc_gpu; + + float *m_gpu; + float *v_gpu; + float *bias_m_gpu; + float *scale_m_gpu; + float *bias_v_gpu; + float *scale_v_gpu; + + float * combine_gpu; + float * combine_delta_gpu; + + float * prev_state_gpu; + float * forgot_state_gpu; + float * forgot_delta_gpu; + float * state_gpu; + float * state_delta_gpu; + float * gate_gpu; + float * gate_delta_gpu; + float * save_gpu; + float * save_delta_gpu; + float * concat_gpu; + float * concat_delta_gpu; + + float * binary_input_gpu; + float * binary_weights_gpu; + + float * mean_gpu; + float * variance_gpu; + + float * rolling_mean_gpu; + float * rolling_variance_gpu; + + float * variance_delta_gpu; + float * mean_delta_gpu; + + float * x_gpu; + float * x_norm_gpu; + float * weights_gpu; + float * weight_updates_gpu; + float * weight_change_gpu; + + float * biases_gpu; + float * bias_updates_gpu; + float * bias_change_gpu; + + float * scales_gpu; + float * scale_updates_gpu; + float * scale_change_gpu; + + float * output_gpu; + float * loss_gpu; + float * delta_gpu; + float * rand_gpu; + float * squared_gpu; + float * norms_gpu; +#ifdef CUDNN + cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc; + cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc; + cudnnTensorDescriptor_t normTensorDesc; + cudnnFilterDescriptor_t weightDesc; + cudnnFilterDescriptor_t dweightDesc; + cudnnConvolutionDescriptor_t convDesc; + cudnnConvolutionFwdAlgo_t fw_algo; + cudnnConvolutionBwdDataAlgo_t bd_algo; + cudnnConvolutionBwdFilterAlgo_t bf_algo; +#endif +#endif +}; + +void free_layer(layer); + +typedef enum { + CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM +} learning_rate_policy; + +typedef struct network{ + int n; + int batch; + size_t *seen; + int *t; + float epoch; + int subdivisions; + layer *layers; + float *output; + learning_rate_policy policy; + + float learning_rate; + float momentum; + float decay; + float gamma; + float scale; + float power; + int time_steps; + int step; + int max_batches; + float *scales; + int *steps; + int num_steps; + int burn_in; + + int adam; + float B1; + float B2; + float eps; + + int inputs; + int outputs; + int truths; + int notruth; + int h, w, c; + int max_crop; + int min_crop; + float max_ratio; + float min_ratio; + int center; + float angle; + float aspect; + float exposure; + float saturation; + float hue; + int random; + + int gpu_index; + tree *hierarchy; + + float *input; + float *truth; + float *delta; + float *workspace; + int train; + int index; + float *cost; + float clip; + +#ifdef GPU + float *input_gpu; + float *truth_gpu; + float *delta_gpu; + float *output_gpu; +#endif + +} network; + +typedef struct { + int w; + int h; + float scale; + float rad; + float dx; + float dy; + float aspect; +} augment_args; + +typedef struct { + int w; + int h; + int c; + float *data; +} image; + +typedef struct{ + float x, y, w, h; +} box; + +typedef struct detection{ + box bbox; + int classes; + float *prob; + float *mask; + float objectness; + int sort_class; +} detection; + +typedef struct matrix{ + int rows, cols; + float **vals; +} matrix; + + +typedef struct{ + int w, h; + matrix X; + matrix y; + int shallow; + int *num_boxes; + box **boxes; +} data; + +typedef enum { + CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA, ISEG_DATA +} data_type; + +typedef struct load_args{ + int threads; + char **paths; + char *path; + int n; + int m; + char **labels; + int h; + int w; + int out_w; + int out_h; + int nh; + int nw; + int num_boxes; + int min, max, size; + int classes; + int background; + int scale; + int center; + int coords; + float jitter; + float angle; + float aspect; + float saturation; + float exposure; + float hue; + data *d; + image *im; + image *resized; + data_type type; + tree *hierarchy; +} load_args; + +typedef struct{ + int id; + float x,y,w,h; + float left, right, top, bottom; +} box_label; + + +network *load_network(char *cfg, char *weights, int clear); +load_args get_base_args(network *net); + +void free_data(data d); + +typedef struct node{ + void *val; + struct node *next; + struct node *prev; +} node; + +typedef struct list{ + int size; + node *front; + node *back; +} list; + +pthread_t load_data(load_args args); +list *read_data_cfg(char *filename); +list *read_cfg(char *filename); +unsigned char *read_file(char *filename); +data resize_data(data orig, int w, int h); +data *tile_data(data orig, int divs, int size); +data select_data(data *orig, int *inds); + +void forward_network(network *net); +void backward_network(network *net); +void update_network(network *net); + + +float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); +void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void copy_cpu(int N, float *X, int INCX, float *Y, int INCY); +void scal_cpu(int N, float ALPHA, float *X, int INCX); +void fill_cpu(int N, float ALPHA, float * X, int INCX); +void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); +void softmax(float *input, int n, float temp, int stride, float *output); + +int best_3d_shift_r(image a, image b, int min, int max); +#ifdef GPU +void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); +void fill_gpu(int N, float ALPHA, float * X, int INCX); +void scal_gpu(int N, float ALPHA, float * X, int INCX); +void copy_gpu(int N, float * X, int INCX, float * Y, int INCY); + +void cuda_set_device(int n); +void cuda_free(float *x_gpu); +float *cuda_make_array(float *x, size_t n); +void cuda_pull_array(float *x_gpu, float *x, size_t n); +float cuda_mag_array(float *x_gpu, size_t n); +void cuda_push_array(float *x_gpu, float *x, size_t n); + +void forward_network_gpu(network *net); +void backward_network_gpu(network *net); +void update_network_gpu(network *net); + +float train_networks(network **nets, int n, data d, int interval); +void sync_nets(network **nets, int n, int interval); +void harmless_update_network_gpu(network *net); +#endif +image get_label(image **characters, char *string, int size); +void draw_label(image a, int r, int c, image label, const float *rgb); +void save_image(image im, const char *name); +void save_image_options(image im, const char *name, IMTYPE f, int quality); +void get_next_batch(data d, int n, int offset, float *X, float *y); +void grayscale_image_3c(image im); +void normalize_image(image p); +void matrix_to_csv(matrix m); +float train_network_sgd(network *net, data d, int n); +void rgbgr_image(image im); +data copy_data(data d); +data concat_data(data d1, data d2); +data load_cifar10_data(char *filename); +float matrix_topk_accuracy(matrix truth, matrix guess, int k); +void matrix_add_matrix(matrix from, matrix to); +void scale_matrix(matrix m, float scale); +matrix csv_to_matrix(char *filename); +float *network_accuracies(network *net, data d, int n); +float train_network_datum(network *net); +image make_random_image(int w, int h, int c); + +void denormalize_connected_layer(layer l); +void denormalize_convolutional_layer(layer l); +void statistics_connected_layer(layer l); +void rescale_weights(layer l, float scale, float trans); +void rgbgr_weights(layer l); +image *get_weights(layer l); + +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, int avg, float hier_thresh, int w, int h, int fps, int fullscreen); +void get_detection_detections(layer l, int w, int h, float thresh, detection *dets); + +char *option_find_str(list *l, char *key, char *def); +int option_find_int(list *l, char *key, int def); +int option_find_int_quiet(list *l, char *key, int def); + +network *parse_network_cfg(char *filename); +void save_weights(network *net, char *filename); +void load_weights(network *net, char *filename); +void save_weights_upto(network *net, char *filename, int cutoff); +void load_weights_upto(network *net, char *filename, int start, int cutoff); + +void zero_objectness(layer l); +void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets); +int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets); +void free_network(network *net); +void set_batch_network(network *net, int b); +void set_temp_network(network *net, float t); +image load_image(char *filename, int w, int h, int c); +image load_image_color(char *filename, int w, int h); +image make_image(int w, int h, int c); +image resize_image(image im, int w, int h); +void censor_image(image im, int dx, int dy, int w, int h); +image letterbox_image(image im, int w, int h); +image crop_image(image im, int dx, int dy, int w, int h); +image center_crop_image(image im, int w, int h); +image resize_min(image im, int min); +image resize_max(image im, int max); +image threshold_image(image im, float thresh); +image mask_to_rgb(image mask); +int resize_network(network *net, int w, int h); +void free_matrix(matrix m); +void test_resize(char *filename); +int show_image(image p, const char *name, int ms); +image copy_image(image p); +void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b); +float get_current_rate(network *net); +void composite_3d(char *f1, char *f2, char *out, int delta); +data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h); +size_t get_current_batch(network *net); +void constrain_image(image im); +image get_network_image_layer(network *net, int i); +layer get_network_output_layer(network *net); +void top_predictions(network *net, int n, int *index); +void flip_image(image a); +image float_to_image(int w, int h, int c, float *data); +void ghost_image(image source, image dest, int dx, int dy); +float network_accuracy(network *net, data d); +void random_distort_image(image im, float hue, float saturation, float exposure); +void fill_image(image m, float s); +image grayscale_image(image im); +void rotate_image_cw(image im, int times); +double what_time_is_it_now(); +image rotate_image(image m, float rad); +void visualize_network(network *net); +float box_iou(box a, box b); +data load_all_cifar10(); +box_label *read_boxes(char *filename, int *n); +box float_to_box(float *f, int stride); +void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes); + +matrix network_predict_data(network *net, data test); +image **load_alphabet(); +image get_network_image(network *net); +float *network_predict(network *net, float *input); + +int network_width(network *net); +int network_height(network *net); +float *network_predict_image(network *net, image im); +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets); +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num); +void free_detections(detection *dets, int n); + +void reset_network_state(network *net, int b); + +char **get_labels(char *filename); +void do_nms_obj(detection *dets, int total, int classes, float thresh); +void do_nms_sort(detection *dets, int total, int classes, float thresh); + +matrix make_matrix(int rows, int cols); + +#ifdef OPENCV +void *open_video_stream(const char *f, int c, int w, int h, int fps); +image get_image_from_stream(void *p); +void make_window(char *name, int w, int h, int fullscreen); +#endif + +void free_image(image m); +float train_network(network *net, data d); +pthread_t load_data_in_thread(load_args args); +void load_data_blocking(load_args args); +list *get_paths(char *filename); +void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride); +void change_leaves(tree *t, char *leaf_list); + +int find_int_arg(int argc, char **argv, char *arg, int def); +float find_float_arg(int argc, char **argv, char *arg, float def); +int find_arg(int argc, char* argv[], char *arg); +char *find_char_arg(int argc, char **argv, char *arg, char *def); +char *basecfg(char *cfgfile); +void find_replace(char *str, char *orig, char *rep, char *output); +void free_ptrs(void **ptrs, int n); +char *fgetl(FILE *fp); +void strip(char *s); +float sec(clock_t clocks); +void **list_to_array(list *l); +void top_k(float *a, int n, int k, int *index); +int *read_map(char *filename); +void error(const char *s); +int max_index(float *a, int n); +int max_int_index(int *a, int n); +int sample_array(float *a, int n); +int *random_index_order(int min, int max); +void free_list(list *l); +float mse_array(float *a, int n); +float variance_array(float *a, int n); +float mag_array(float *a, int n); +void scale_array(float *a, int n, float s); +float mean_array(float *a, int n); +float sum_array(float *a, int n); +void normalize_array(float *a, int n); +int *read_intlist(char *s, int *n, int d); +size_t rand_size_t(); +float rand_normal(); +float rand_uniform(float min, float max); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/workloads/realworld/uvm/darknet/predictions.jpg b/workloads/realworld/uvm/darknet/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c92d70d77e70e11853e9838ca90b46eb71a18ffa Binary files /dev/null and b/workloads/realworld/uvm/darknet/predictions.jpg differ diff --git a/workloads/realworld/uvm/darknet/python/darknet.py b/workloads/realworld/uvm/darknet/python/darknet.py new file mode 100644 index 0000000000000000000000000000000000000000..b14d24485d86aa69f3991be79ec4f25c2b8e5a59 --- /dev/null +++ b/workloads/realworld/uvm/darknet/python/darknet.py @@ -0,0 +1,156 @@ +from ctypes import * +import math +import random + +def sample(probs): + s = sum(probs) + probs = [a/s for a in probs] + r = random.uniform(0, 1) + for i in range(len(probs)): + r = r - probs[i] + if r <= 0: + return i + return len(probs)-1 + +def c_array(ctype, values): + arr = (ctype*len(values))() + arr[:] = values + return arr + +class BOX(Structure): + _fields_ = [("x", c_float), + ("y", c_float), + ("w", c_float), + ("h", c_float)] + +class DETECTION(Structure): + _fields_ = [("bbox", BOX), + ("classes", c_int), + ("prob", POINTER(c_float)), + ("mask", POINTER(c_float)), + ("objectness", c_float), + ("sort_class", c_int)] + + +class IMAGE(Structure): + _fields_ = [("w", c_int), + ("h", c_int), + ("c", c_int), + ("data", POINTER(c_float))] + +class METADATA(Structure): + _fields_ = [("classes", c_int), + ("names", POINTER(c_char_p))] + + + +#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL) +lib = CDLL("libdarknet.so", RTLD_GLOBAL) +lib.network_width.argtypes = [c_void_p] +lib.network_width.restype = c_int +lib.network_height.argtypes = [c_void_p] +lib.network_height.restype = c_int + +predict = lib.network_predict +predict.argtypes = [c_void_p, POINTER(c_float)] +predict.restype = POINTER(c_float) + +set_gpu = lib.cuda_set_device +set_gpu.argtypes = [c_int] + +make_image = lib.make_image +make_image.argtypes = [c_int, c_int, c_int] +make_image.restype = IMAGE + +get_network_boxes = lib.get_network_boxes +get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)] +get_network_boxes.restype = POINTER(DETECTION) + +make_network_boxes = lib.make_network_boxes +make_network_boxes.argtypes = [c_void_p] +make_network_boxes.restype = POINTER(DETECTION) + +free_detections = lib.free_detections +free_detections.argtypes = [POINTER(DETECTION), c_int] + +free_ptrs = lib.free_ptrs +free_ptrs.argtypes = [POINTER(c_void_p), c_int] + +network_predict = lib.network_predict +network_predict.argtypes = [c_void_p, POINTER(c_float)] + +reset_rnn = lib.reset_rnn +reset_rnn.argtypes = [c_void_p] + +load_net = lib.load_network +load_net.argtypes = [c_char_p, c_char_p, c_int] +load_net.restype = c_void_p + +do_nms_obj = lib.do_nms_obj +do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +do_nms_sort = lib.do_nms_sort +do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +free_image = lib.free_image +free_image.argtypes = [IMAGE] + +letterbox_image = lib.letterbox_image +letterbox_image.argtypes = [IMAGE, c_int, c_int] +letterbox_image.restype = IMAGE + +load_meta = lib.get_metadata +lib.get_metadata.argtypes = [c_char_p] +lib.get_metadata.restype = METADATA + +load_image = lib.load_image_color +load_image.argtypes = [c_char_p, c_int, c_int] +load_image.restype = IMAGE + +rgbgr_image = lib.rgbgr_image +rgbgr_image.argtypes = [IMAGE] + +predict_image = lib.network_predict_image +predict_image.argtypes = [c_void_p, IMAGE] +predict_image.restype = POINTER(c_float) + +def classify(net, meta, im): + out = predict_image(net, im) + res = [] + for i in range(meta.classes): + res.append((meta.names[i], out[i])) + res = sorted(res, key=lambda x: -x[1]) + return res + +def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): + im = load_image(image, 0, 0) + num = c_int(0) + pnum = pointer(num) + predict_image(net, im) + dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum) + num = pnum[0] + if (nms): do_nms_obj(dets, num, meta.classes, nms); + + res = [] + for j in range(num): + for i in range(meta.classes): + if dets[j].prob[i] > 0: + b = dets[j].bbox + res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h))) + res = sorted(res, key=lambda x: -x[1]) + free_image(im) + free_detections(dets, num) + return res + +if __name__ == "__main__": + #net = load_net("cfg/densenet201.cfg", "/home/pjreddie/trained/densenet201.weights", 0) + #im = load_image("data/wolf.jpg", 0, 0) + #meta = load_meta("cfg/imagenet1k.data") + #r = classify(net, meta, im) + #print r[:10] + net = load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0) + meta = load_meta("cfg/coco.data") + r = detect(net, meta, "data/dog.jpg") + print(r) + + diff --git a/workloads/realworld/uvm/darknet/python/proverbot.py b/workloads/realworld/uvm/darknet/python/proverbot.py new file mode 100644 index 0000000000000000000000000000000000000000..095aae8f8bf8bbe47ea1768a6e2c948bb0ff8f85 --- /dev/null +++ b/workloads/realworld/uvm/darknet/python/proverbot.py @@ -0,0 +1,37 @@ +from darknet import * + +def predict_tactic(net, s): + prob = 0 + d = c_array(c_float, [0.0]*256) + tac = '' + if not len(s): + s = '\n' + for c in s[:-1]: + d[ord(c)] = 1 + pred = predict(net, d) + d[ord(c)] = 0 + c = s[-1] + while 1: + d[ord(c)] = 1 + pred = predict(net, d) + d[ord(c)] = 0 + pred = [pred[i] for i in range(256)] + ind = sample(pred) + c = chr(ind) + prob += math.log(pred[ind]) + if len(tac) and tac[-1] == '.': + break + tac = tac + c + return (tac, prob) + +def predict_tactics(net, s, n): + tacs = [] + for i in range(n): + reset_rnn(net) + tacs.append(predict_tactic(net, s)) + tacs = sorted(tacs, key=lambda x: -x[1]) + return tacs + +net = load_net("cfg/coq.test.cfg", "/home/pjreddie/backup/coq.backup", 0) +t = predict_tactics(net, "+++++\n", 10) +print t diff --git a/workloads/realworld/uvm/darknet/resnet18/run_resnet18.sh b/workloads/realworld/uvm/darknet/resnet18/run_resnet18.sh new file mode 100755 index 0000000000000000000000000000000000000000..10257ad02affc17f8287ea42917813fe320f5f23 --- /dev/null +++ b/workloads/realworld/uvm/darknet/resnet18/run_resnet18.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm/darknet/resnet18/run_super.sh b/workloads/realworld/uvm/darknet/resnet18/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..10257ad02affc17f8287ea42917813fe320f5f23 --- /dev/null +++ b/workloads/realworld/uvm/darknet/resnet18/run_super.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm/darknet/resnet18_b/run_super.sh b/workloads/realworld/uvm/darknet/resnet18_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..012635a1ce64ecda462e50097be554185989ae7a --- /dev/null +++ b/workloads/realworld/uvm/darknet/resnet18_b/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier test ../cfg/imagenet1k.data ../cfg/resnet18_b.cfg diff --git a/workloads/realworld/uvm/darknet/resnet18_t/run_super.sh b/workloads/realworld/uvm/darknet/resnet18_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..0eb59b3bd65cf0186c5ed5f36eff5ec34d54298c --- /dev/null +++ b/workloads/realworld/uvm/darknet/resnet18_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier train ../cfg/imagenet1k.data ../cfg/resnet18_t.cfg \ No newline at end of file diff --git a/workloads/realworld/uvm/darknet/resnet50/run_resnet50.sh b/workloads/realworld/uvm/darknet/resnet50/run_resnet50.sh new file mode 100755 index 0000000000000000000000000000000000000000..ce88111af06600203c1ae94421134eb3404f51a4 --- /dev/null +++ b/workloads/realworld/uvm/darknet/resnet50/run_resnet50.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet50.cfg ../../../../../data/darknet/resnet50.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm/darknet/resnet50/run_super.sh b/workloads/realworld/uvm/darknet/resnet50/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ce88111af06600203c1ae94421134eb3404f51a4 --- /dev/null +++ b/workloads/realworld/uvm/darknet/resnet50/run_super.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet50.cfg ../../../../../data/darknet/resnet50.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm/darknet/resnet50_b/run_super.sh b/workloads/realworld/uvm/darknet/resnet50_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..e6f1b1d59b612bef36d04af547bf61808261eb12 --- /dev/null +++ b/workloads/realworld/uvm/darknet/resnet50_b/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier test ../cfg/imagenet1k.data ../cfg/resnet50_b.cfg diff --git a/workloads/realworld/uvm/darknet/resnet50_t/run_super.sh b/workloads/realworld/uvm/darknet/resnet50_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..4d4c7feebd1bd5bdcded72e3d4cf58045949ac90 --- /dev/null +++ b/workloads/realworld/uvm/darknet/resnet50_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier train ../cfg/imagenet1k.data ../cfg/resnet50_t.cfg diff --git a/workloads/realworld/uvm/darknet/scripts/dice_label.sh b/workloads/realworld/uvm/darknet/scripts/dice_label.sh new file mode 100644 index 0000000000000000000000000000000000000000..f19f8a49481b46d5a04dd18b1b05af8928b21957 --- /dev/null +++ b/workloads/realworld/uvm/darknet/scripts/dice_label.sh @@ -0,0 +1,20 @@ +mkdir -p images +mkdir -p images/orig +mkdir -p images/train +mkdir -p images/val + +ffmpeg -i Face1.mp4 images/orig/face1_%6d.jpg +ffmpeg -i Face2.mp4 images/orig/face2_%6d.jpg +ffmpeg -i Face3.mp4 images/orig/face3_%6d.jpg +ffmpeg -i Face4.mp4 images/orig/face4_%6d.jpg +ffmpeg -i Face5.mp4 images/orig/face5_%6d.jpg +ffmpeg -i Face6.mp4 images/orig/face6_%6d.jpg + +mogrify -resize 100x100^ -gravity center -crop 100x100+0+0 +repage images/orig/* + +ls images/orig/* | shuf | head -n 1000 | xargs mv -t images/val +mv images/orig/* images/train + +find `pwd`/images/train > dice.train.list -name \*.jpg +find `pwd`/images/val > dice.val.list -name \*.jpg + diff --git a/workloads/realworld/uvm/darknet/scripts/gen_tactic.sh b/workloads/realworld/uvm/darknet/scripts/gen_tactic.sh new file mode 100755 index 0000000000000000000000000000000000000000..ffa30d27754dacdd03bd5996d41cbfab14db0f39 --- /dev/null +++ b/workloads/realworld/uvm/darknet/scripts/gen_tactic.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Usage: +# wget http://pjreddie.com/media/files/peek.weights +# scripts/gen_tactic.sh < data/goal.txt +./darknet rnn generatetactic cfg/gru.cfg peek.weights 2>/dev/null diff --git a/workloads/realworld/uvm/darknet/scripts/get_coco_dataset.sh b/workloads/realworld/uvm/darknet/scripts/get_coco_dataset.sh new file mode 100644 index 0000000000000000000000000000000000000000..28463015d1748fd331e071a0a778c6d4500b29ef --- /dev/null +++ b/workloads/realworld/uvm/darknet/scripts/get_coco_dataset.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Clone COCO API +git clone https://github.com/pdollar/coco +cd coco + +mkdir images +cd images + +# Download Images +wget -c https://pjreddie.com/media/files/train2014.zip +wget -c https://pjreddie.com/media/files/val2014.zip + +# Unzip +unzip -q train2014.zip +unzip -q val2014.zip + +cd .. + +# Download COCO Metadata +wget -c https://pjreddie.com/media/files/instances_train-val2014.zip +wget -c https://pjreddie.com/media/files/coco/5k.part +wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part +wget -c https://pjreddie.com/media/files/coco/labels.tgz +tar xzf labels.tgz +unzip -q instances_train-val2014.zip + +# Set Up Image Lists +paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt +paste <(awk "{print \"$PWD\"}" trainvalno5k.txt + diff --git a/workloads/realworld/uvm/darknet/scripts/imagenet_label.sh b/workloads/realworld/uvm/darknet/scripts/imagenet_label.sh new file mode 100644 index 0000000000000000000000000000000000000000..01e4306ee3cf7322427374f01c766bcdef970922 --- /dev/null +++ b/workloads/realworld/uvm/darknet/scripts/imagenet_label.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +mkdir -p labelled +wd=`pwd` + +for f in val/*.xml; +do +label=`grep -m1 "" $f | grep -oP '\K[^<]*'` +im=`echo $f | sed 's/val/imgs/; s/xml/JPEG/'` +out=`echo $im | sed 's/JPEG/'${label}'.JPEG/; s/imgs/labelled/'` +ln -s ${wd}/$im ${wd}/$out +done + +find ${wd}/labelled -name \*.JPEG > inet.val.list + diff --git a/workloads/realworld/uvm/darknet/scripts/voc_label.py b/workloads/realworld/uvm/darknet/scripts/voc_label.py new file mode 100644 index 0000000000000000000000000000000000000000..679fc366890d9eccf15124f950a274d8ad24fc83 --- /dev/null +++ b/workloads/realworld/uvm/darknet/scripts/voc_label.py @@ -0,0 +1,59 @@ +import xml.etree.ElementTree as ET +import pickle +import os +from os import listdir, getcwd +from os.path import join + +sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')] + +classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + + +def convert(size, box): + dw = 1./(size[0]) + dh = 1./(size[1]) + x = (box[0] + box[1])/2.0 - 1 + y = (box[2] + box[3])/2.0 - 1 + w = box[1] - box[0] + h = box[3] - box[2] + x = x*dw + w = w*dw + y = y*dh + h = h*dh + return (x,y,w,h) + +def convert_annotation(year, image_id): + in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id)) + out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w') + tree=ET.parse(in_file) + root = tree.getroot() + size = root.find('size') + w = int(size.find('width').text) + h = int(size.find('height').text) + + for obj in root.iter('object'): + difficult = obj.find('difficult').text + cls = obj.find('name').text + if cls not in classes or int(difficult)==1: + continue + cls_id = classes.index(cls) + xmlbox = obj.find('bndbox') + b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) + bb = convert((w,h), b) + out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') + +wd = getcwd() + +for year, image_set in sets: + if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)): + os.makedirs('VOCdevkit/VOC%s/labels/'%(year)) + image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split() + list_file = open('%s_%s.txt'%(year, image_set), 'w') + for image_id in image_ids: + list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id)) + convert_annotation(year, image_id) + list_file.close() + +os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt") +os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt") + diff --git a/workloads/realworld/uvm/darknet/src/activation_kernels.cu b/workloads/realworld/uvm/darknet/src/activation_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..659b44fb85fba664e37b6e8d6aa1abee39accdd2 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/activation_kernels.cu @@ -0,0 +1,206 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "activations.h" +#include "cuda_dark.h" +} + + +__device__ float lhtan_activate_kernel(float x) +{ + if(x < 0) return .001f*x; + if(x > 1) return .001f*(x-1.f) + 1.f; + return x; +} +__device__ float lhtan_gradient_kernel(float x) +{ + if(x > 0 && x < 1) return 1; + return .001; +} + +__device__ float hardtan_activate_kernel(float x) +{ + if (x < -1) return -1; + if (x > 1) return 1; + return x; +} +__device__ float linear_activate_kernel(float x){return x;} +__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));} +__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;} +__device__ float relu_activate_kernel(float x){return x*(x>0);} +__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);} +__device__ float selu_activate_kernel(float x){return (x >= 0)*1.0507f*x + (x < 0)*1.0507f*1.6732f*(expf(x)-1);} +__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;} +__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;} +__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;} +__device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);} +__device__ float plse_activate_kernel(float x) +{ + if(x < -4) return .01f * (x + 4); + if(x > 4) return .01f * (x - 4) + 1; + return .125f*x + .5f; +} +__device__ float stair_activate_kernel(float x) +{ + int n = floorf(x); + if (n%2 == 0) return floorf(x/2); + else return (x - n) + floorf(x/2); +} + + +__device__ float hardtan_gradient_kernel(float x) +{ + if (x > -1 && x < 1) return 1; + return 0; +} +__device__ float linear_gradient_kernel(float x){return 1;} +__device__ float logistic_gradient_kernel(float x){return (1-x)*x;} +__device__ float loggy_gradient_kernel(float x) +{ + float y = (x+1)/2; + return 2*(1-y)*y; +} +__device__ float relu_gradient_kernel(float x){return (x>0);} +__device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);} +__device__ float selu_gradient_kernel(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);} +__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;} +__device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;} +__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;} +__device__ float tanh_gradient_kernel(float x){return 1-x*x;} +__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;} +__device__ float stair_gradient_kernel(float x) +{ + if (floorf(x) == x) return 0; + return 1; +} + +__device__ float activate_kernel(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_activate_kernel(x); + case LOGISTIC: + return logistic_activate_kernel(x); + case LOGGY: + return loggy_activate_kernel(x); + case RELU: + return relu_activate_kernel(x); + case ELU: + return elu_activate_kernel(x); + case SELU: + return selu_activate_kernel(x); + case RELIE: + return relie_activate_kernel(x); + case RAMP: + return ramp_activate_kernel(x); + case LEAKY: + return leaky_activate_kernel(x); + case TANH: + return tanh_activate_kernel(x); + case PLSE: + return plse_activate_kernel(x); + case STAIR: + return stair_activate_kernel(x); + case HARDTAN: + return hardtan_activate_kernel(x); + case LHTAN: + return lhtan_activate_kernel(x); + } + return 0; +} + +__device__ float gradient_kernel(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_gradient_kernel(x); + case LOGISTIC: + return logistic_gradient_kernel(x); + case LOGGY: + return loggy_gradient_kernel(x); + case RELU: + return relu_gradient_kernel(x); + case ELU: + return elu_gradient_kernel(x); + case SELU: + return selu_gradient_kernel(x); + case RELIE: + return relie_gradient_kernel(x); + case RAMP: + return ramp_gradient_kernel(x); + case LEAKY: + return leaky_gradient_kernel(x); + case TANH: + return tanh_gradient_kernel(x); + case PLSE: + return plse_gradient_kernel(x); + case STAIR: + return stair_gradient_kernel(x); + case HARDTAN: + return hardtan_gradient_kernel(x); + case LHTAN: + return lhtan_gradient_kernel(x); + } + return 0; +} + +__global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int i = id % s; + int b = id / s; + float x1 = x[b*s + i]; + float x2 = x[b*s + s/2 + i]; + if(id < n) { + float de = dy[id]; + dx[b*s + i] = x2*de; + dx[b*s + s/2 + i] = x1*de; + } +} + +extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y) +{ + binary_gradient_array_kernel<<>>(x, dx, n/2, size, a, y); + check_error(cudaPeekAtLastError()); +} +__global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int i = id % s; + int b = id / s; + float x1 = x[b*s + i]; + float x2 = x[b*s + s/2 + i]; + if(id < n) y[id] = x1*x2; +} + +extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y) +{ + binary_activate_array_kernel<<>>(x, n/2, size, a, y); + check_error(cudaPeekAtLastError()); +} + +__global__ void activate_array_kernel(float *x, int n, ACTIVATION a) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n) x[i] = activate_kernel(x[i], a); +} + +__global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n) delta[i] *= gradient_kernel(x[i], a); +} + +extern "C" void activate_array_gpu(float *x, int n, ACTIVATION a) +{ + activate_array_kernel<<>>(x, n, a); + check_error(cudaPeekAtLastError()); +} + +extern "C" void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta) +{ + gradient_array_kernel<<>>(x, n, a, delta); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/uvm/darknet/src/activation_layer.c b/workloads/realworld/uvm/darknet/src/activation_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..0791772336e4d1b001ed1b76bbbf21ee8d6fa24f --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/activation_layer.c @@ -0,0 +1,63 @@ +#include "activation_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +layer make_activation_layer(int batch, int inputs, ACTIVATION activation) +{ + layer l = {0}; + l.type = ACTIVE; + + l.inputs = inputs; + l.outputs = inputs; + l.batch=batch; + + l.output = calloc(batch*inputs, sizeof(float*)); + l.delta = calloc(batch*inputs, sizeof(float*)); + + l.forward = forward_activation_layer; + l.backward = backward_activation_layer; +#ifdef GPU + l.forward_gpu = forward_activation_layer_gpu; + l.backward_gpu = backward_activation_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); +#endif + l.activation = activation; + fprintf(stderr, "Activation Layer: %d inputs\n", inputs); + return l; +} + +void forward_activation_layer(layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_activation_layer(layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_activation_layer_gpu(layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_activation_layer_gpu(layer l, network net) +{ + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/uvm/darknet/src/activation_layer.h b/workloads/realworld/uvm/darknet/src/activation_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..42118a84e83f59a8997e354959404d1283a3004c --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/activation_layer.h @@ -0,0 +1,19 @@ +#ifndef ACTIVATION_LAYER_H +#define ACTIVATION_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_activation_layer(int batch, int inputs, ACTIVATION activation); + +void forward_activation_layer(layer l, network net); +void backward_activation_layer(layer l, network net); + +#ifdef GPU +void forward_activation_layer_gpu(layer l, network net); +void backward_activation_layer_gpu(layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/uvm/darknet/src/activations.c b/workloads/realworld/uvm/darknet/src/activations.c new file mode 100644 index 0000000000000000000000000000000000000000..da1a17a89b46b6c41fa80b5dd113e1b30c910712 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/activations.c @@ -0,0 +1,150 @@ +#include "activations.h" + +#include +#include +#include +#include + +char *get_activation_string(ACTIVATION a) +{ + switch(a){ + case LOGISTIC: + return "logistic"; + case LOGGY: + return "loggy"; + case RELU: + return "relu"; + case ELU: + return "elu"; + case SELU: + return "selu"; + case RELIE: + return "relie"; + case RAMP: + return "ramp"; + case LINEAR: + return "linear"; + case TANH: + return "tanh"; + case PLSE: + return "plse"; + case LEAKY: + return "leaky"; + case STAIR: + return "stair"; + case HARDTAN: + return "hardtan"; + case LHTAN: + return "lhtan"; + default: + break; + } + return "relu"; +} + +ACTIVATION get_activation(char *s) +{ + if (strcmp(s, "logistic")==0) return LOGISTIC; + if (strcmp(s, "loggy")==0) return LOGGY; + if (strcmp(s, "relu")==0) return RELU; + if (strcmp(s, "elu")==0) return ELU; + if (strcmp(s, "selu")==0) return SELU; + if (strcmp(s, "relie")==0) return RELIE; + if (strcmp(s, "plse")==0) return PLSE; + if (strcmp(s, "hardtan")==0) return HARDTAN; + if (strcmp(s, "lhtan")==0) return LHTAN; + if (strcmp(s, "linear")==0) return LINEAR; + if (strcmp(s, "ramp")==0) return RAMP; + if (strcmp(s, "leaky")==0) return LEAKY; + if (strcmp(s, "tanh")==0) return TANH; + if (strcmp(s, "stair")==0) return STAIR; + fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s); + return RELU; +} + +float activate(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_activate(x); + case LOGISTIC: + return logistic_activate(x); + case LOGGY: + return loggy_activate(x); + case RELU: + return relu_activate(x); + case ELU: + return elu_activate(x); + case SELU: + return selu_activate(x); + case RELIE: + return relie_activate(x); + case RAMP: + return ramp_activate(x); + case LEAKY: + return leaky_activate(x); + case TANH: + return tanh_activate(x); + case PLSE: + return plse_activate(x); + case STAIR: + return stair_activate(x); + case HARDTAN: + return hardtan_activate(x); + case LHTAN: + return lhtan_activate(x); + } + return 0; +} + +void activate_array(float *x, const int n, const ACTIVATION a) +{ + int i; + for(i = 0; i < n; ++i){ + x[i] = activate(x[i], a); + } +} + +float gradient(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_gradient(x); + case LOGISTIC: + return logistic_gradient(x); + case LOGGY: + return loggy_gradient(x); + case RELU: + return relu_gradient(x); + case ELU: + return elu_gradient(x); + case SELU: + return selu_gradient(x); + case RELIE: + return relie_gradient(x); + case RAMP: + return ramp_gradient(x); + case LEAKY: + return leaky_gradient(x); + case TANH: + return tanh_gradient(x); + case PLSE: + return plse_gradient(x); + case STAIR: + return stair_gradient(x); + case HARDTAN: + return hardtan_gradient(x); + case LHTAN: + return lhtan_gradient(x); + } + return 0; +} + +void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta) +{ + int i; + for(i = 0; i < n; ++i){ + delta[i] *= gradient(x[i], a); + } +} + diff --git a/workloads/realworld/uvm/darknet/src/activations.h b/workloads/realworld/uvm/darknet/src/activations.h new file mode 100644 index 0000000000000000000000000000000000000000..eec28d5b692ede3975e01a4d454ace20e8a9fdd8 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/activations.h @@ -0,0 +1,87 @@ +#ifndef ACTIVATIONS_H +#define ACTIVATIONS_H +#include "darknet.h" +#include "cuda_dark.h" +#include "math.h" + +ACTIVATION get_activation(char *s); + +char *get_activation_string(ACTIVATION a); +float activate(float x, ACTIVATION a); +float gradient(float x, ACTIVATION a); +void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta); +void activate_array(float *x, const int n, const ACTIVATION a); +#ifdef GPU +void activate_array_gpu(float *x, int n, ACTIVATION a); +void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta); +#endif + +static inline float stair_activate(float x) +{ + int n = floor(x); + if (n%2 == 0) return floor(x/2.); + else return (x - n) + floor(x/2.); +} +static inline float hardtan_activate(float x) +{ + if (x < -1) return -1; + if (x > 1) return 1; + return x; +} +static inline float linear_activate(float x){return x;} +static inline float logistic_activate(float x){return 1./(1. + exp(-x));} +static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;} +static inline float relu_activate(float x){return x*(x>0);} +static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} +static inline float selu_activate(float x){return (x >= 0)*1.0507*x + (x < 0)*1.0507*1.6732*(exp(x)-1);} +static inline float relie_activate(float x){return (x>0) ? x : .01*x;} +static inline float ramp_activate(float x){return x*(x>0)+.1*x;} +static inline float leaky_activate(float x){return (x>0) ? x : .1*x;} +static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);} +static inline float plse_activate(float x) +{ + if(x < -4) return .01 * (x + 4); + if(x > 4) return .01 * (x - 4) + 1; + return .125*x + .5; +} + +static inline float lhtan_activate(float x) +{ + if(x < 0) return .001*x; + if(x > 1) return .001*(x-1) + 1; + return x; +} +static inline float lhtan_gradient(float x) +{ + if(x > 0 && x < 1) return 1; + return .001; +} + +static inline float hardtan_gradient(float x) +{ + if (x > -1 && x < 1) return 1; + return 0; +} +static inline float linear_gradient(float x){return 1;} +static inline float logistic_gradient(float x){return (1-x)*x;} +static inline float loggy_gradient(float x) +{ + float y = (x+1.)/2.; + return 2*(1-y)*y; +} +static inline float stair_gradient(float x) +{ + if (floor(x) == x) return 0; + return 1; +} +static inline float relu_gradient(float x){return (x>0);} +static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);} +static inline float selu_gradient(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);} +static inline float relie_gradient(float x){return (x>0) ? 1 : .01;} +static inline float ramp_gradient(float x){return (x>0)+.1;} +static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;} +static inline float tanh_gradient(float x){return 1-x*x;} +static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;} + +#endif + diff --git a/workloads/realworld/uvm/darknet/src/avgpool_layer.c b/workloads/realworld/uvm/darknet/src/avgpool_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..7d17fa8f829aba43652117c141fb8b54ef4cf5dc --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/avgpool_layer.c @@ -0,0 +1,71 @@ +#include "avgpool_layer.h" +#include "cuda_dark.h" +#include + +avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) +{ + fprintf(stderr, "avg %4d x%4d x%4d -> %4d\n", w, h, c, c); + avgpool_layer l = {0}; + l.type = AVGPOOL; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.out_w = 1; + l.out_h = 1; + l.out_c = c; + l.outputs = l.out_c; + l.inputs = h*w*c; + int output_size = l.outputs * batch; + l.output = calloc(output_size, sizeof(float)); + l.delta = calloc(output_size, sizeof(float)); + l.forward = forward_avgpool_layer; + l.backward = backward_avgpool_layer; + #ifdef GPU + l.forward_gpu = forward_avgpool_layer_gpu; + l.backward_gpu = backward_avgpool_layer_gpu; + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); + #endif + return l; +} + +void resize_avgpool_layer(avgpool_layer *l, int w, int h) +{ + l->w = w; + l->h = h; + l->inputs = h*w*l->c; +} + +void forward_avgpool_layer(const avgpool_layer l, network net) +{ + int b,i,k; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < l.c; ++k){ + int out_index = k + b*l.c; + l.output[out_index] = 0; + for(i = 0; i < l.h*l.w; ++i){ + int in_index = i + l.h*l.w*(k + b*l.c); + l.output[out_index] += net.input[in_index]; + } + l.output[out_index] /= l.h*l.w; + } + } +} + +void backward_avgpool_layer(const avgpool_layer l, network net) +{ + int b,i,k; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < l.c; ++k){ + int out_index = k + b*l.c; + for(i = 0; i < l.h*l.w; ++i){ + int in_index = i + l.h*l.w*(k + b*l.c); + net.delta[in_index] += l.delta[out_index] / (l.h*l.w); + } + } + } +} + diff --git a/workloads/realworld/uvm/darknet/src/avgpool_layer.h b/workloads/realworld/uvm/darknet/src/avgpool_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..576ad1db9e9cb87640b0c3f764e2bbfbaae4b2b3 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/avgpool_layer.h @@ -0,0 +1,23 @@ +#ifndef AVGPOOL_LAYER_H +#define AVGPOOL_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +typedef layer avgpool_layer; + +image get_avgpool_image(avgpool_layer l); +avgpool_layer make_avgpool_layer(int batch, int w, int h, int c); +void resize_avgpool_layer(avgpool_layer *l, int w, int h); +void forward_avgpool_layer(const avgpool_layer l, network net); +void backward_avgpool_layer(const avgpool_layer l, network net); + +#ifdef GPU +void forward_avgpool_layer_gpu(avgpool_layer l, network net); +void backward_avgpool_layer_gpu(avgpool_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/uvm/darknet/src/avgpool_layer_kernels.cu b/workloads/realworld/uvm/darknet/src/avgpool_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..55e5ec372d251e1d4b0c501563f9240437595795 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/avgpool_layer_kernels.cu @@ -0,0 +1,61 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "avgpool_layer.h" +#include "cuda_dark.h" +} + +__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int k = id % c; + id /= c; + int b = id; + + int i; + int out_index = (k + c*b); + output[out_index] = 0; + for(i = 0; i < w*h; ++i){ + int in_index = i + h*w*(k + b*c); + output[out_index] += input[in_index]; + } + output[out_index] /= w*h; +} + +__global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int k = id % c; + id /= c; + int b = id; + + int i; + int out_index = (k + c*b); + for(i = 0; i < w*h; ++i){ + int in_index = i + h*w*(k + b*c); + in_delta[in_index] += out_delta[out_index] / (w*h); + } +} + +extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network net) +{ + size_t n = layer.c*layer.batch; + + forward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, net.input_gpu, layer.output_gpu); + check_error(cudaPeekAtLastError()); +} + +extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network net) +{ + size_t n = layer.c*layer.batch; + + backward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, net.delta_gpu, layer.delta_gpu); + check_error(cudaPeekAtLastError()); +} + diff --git a/workloads/realworld/uvm/darknet/src/batchnorm_layer.c b/workloads/realworld/uvm/darknet/src/batchnorm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..ebff387cc4b0365173fb6727efd80ebc80bfbd41 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/batchnorm_layer.c @@ -0,0 +1,279 @@ +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "blas.h" +#include + +layer make_batchnorm_layer(int batch, int w, int h, int c) +{ + fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c); + layer l = {0}; + l.type = BATCHNORM; + l.batch = batch; + l.h = l.out_h = h; + l.w = l.out_w = w; + l.c = l.out_c = c; + l.output = calloc(h * w * c * batch, sizeof(float)); + l.delta = calloc(h * w * c * batch, sizeof(float)); + l.inputs = w*h*c; + l.outputs = l.inputs; + + l.scales = calloc(c, sizeof(float)); + l.scale_updates = calloc(c, sizeof(float)); + l.biases = calloc(c, sizeof(float)); + l.bias_updates = calloc(c, sizeof(float)); + int i; + for(i = 0; i < c; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(c, sizeof(float)); + l.variance = calloc(c, sizeof(float)); + + l.rolling_mean = calloc(c, sizeof(float)); + l.rolling_variance = calloc(c, sizeof(float)); + + l.forward = forward_batchnorm_layer; + l.backward = backward_batchnorm_layer; +#ifdef GPU + l.forward_gpu = forward_batchnorm_layer_gpu; + l.backward_gpu = backward_batchnorm_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, h * w * c * batch); + l.delta_gpu = cuda_make_array(l.delta, h * w * c * batch); + + l.biases_gpu = cuda_make_array(l.biases, c); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, c); + + l.scales_gpu = cuda_make_array(l.scales, c); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, c); + + l.mean_gpu = cuda_make_array(l.mean, c); + l.variance_gpu = cuda_make_array(l.variance, c); + + l.rolling_mean_gpu = cuda_make_array(l.mean, c); + l.rolling_variance_gpu = cuda_make_array(l.variance, c); + + l.mean_delta_gpu = cuda_make_array(l.mean, c); + l.variance_delta_gpu = cuda_make_array(l.variance, c); + + l.x_gpu = cuda_make_array(l.output, l.batch*l.outputs); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*l.outputs); + #ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); + + #endif +#endif + return l; +} + +void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + int i,b,f; + for(f = 0; f < n; ++f){ + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int index = i + size*(f + n*b); + sum += delta[index] * x_norm[index]; + } + } + scale_updates[f] += sum; + } +} + +void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + + int i,j,k; + for(i = 0; i < filters; ++i){ + mean_delta[i] = 0; + for (j = 0; j < batch; ++j) { + for (k = 0; k < spatial; ++k) { + int index = j*filters*spatial + i*spatial + k; + mean_delta[i] += delta[index]; + } + } + mean_delta[i] *= (-1./sqrt(variance[i] + .00001f)); + } +} +void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + + int i,j,k; + for(i = 0; i < filters; ++i){ + variance_delta[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance_delta[i] += delta[index]*(x[index] - mean[i]); + } + } + variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float)(-3./2.)); + } +} +void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + int f, j, k; + for(j = 0; j < batch; ++j){ + for(f = 0; f < filters; ++f){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + f*spatial + k; + delta[index] = delta[index] * 1./(sqrt(variance[f] + .00001f)) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); + } + } + } +} + +void resize_batchnorm_layer(layer *layer, int w, int h) +{ + fprintf(stderr, "Not implemented\n"); +} + +void forward_batchnorm_layer(layer l, network net) +{ + if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1); + if(net.train){ + mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean); + variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance); + + scal_cpu(l.out_c, .99, l.rolling_mean, 1); + axpy_cpu(l.out_c, .01, l.mean, 1, l.rolling_mean, 1); + scal_cpu(l.out_c, .99, l.rolling_variance, 1); + axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1); + + normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w); + copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1); + } else { + normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w); + } + scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w); + add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h*l.out_w); +} + +void backward_batchnorm_layer(layer l, network net) +{ + if(!net.train){ + l.mean = l.rolling_mean; + l.variance = l.rolling_variance; + } + backward_bias(l.bias_updates, l.delta, l.batch, l.out_c, l.out_w*l.out_h); + backward_scale_cpu(l.x_norm, l.delta, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates); + + scale_bias(l.delta, l.scales, l.batch, l.out_c, l.out_h*l.out_w); + + mean_delta_cpu(l.delta, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta); + variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta); + normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.out_c, l.out_w*l.out_h, l.delta); + if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_batchnorm_layer(layer l) +{ + cuda_pull_array(l.scales_gpu, l.scales, l.c); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.c); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.c); +} +void push_batchnorm_layer(layer l) +{ + cuda_push_array(l.scales_gpu, l.scales, l.c); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.c); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.c); +} + +void forward_batchnorm_layer_gpu(layer l, network net) +{ + if(l.type == BATCHNORM) copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); + if (net.train) { +#ifdef CUDNN + float one = 1; + float zero = 0; + cudnnBatchNormalizationForwardTraining(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + l.dstTensorDesc, + l.x_gpu, + l.dstTensorDesc, + l.output_gpu, + l.normTensorDesc, + l.scales_gpu, + l.biases_gpu, + .01, + l.rolling_mean_gpu, + l.rolling_variance_gpu, + .00001, + l.mean_gpu, + l.variance_gpu); +#else + fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu); + fast_variance_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.variance_gpu); + + scal_gpu(l.out_c, .99, l.rolling_mean_gpu, 1); + axpy_gpu(l.out_c, .01, l.mean_gpu, 1, l.rolling_mean_gpu, 1); + scal_gpu(l.out_c, .99, l.rolling_variance_gpu, 1); + axpy_gpu(l.out_c, .01, l.variance_gpu, 1, l.rolling_variance_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); + normalize_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_norm_gpu, 1); + + scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); +#endif + } else { + normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); + scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); + } + +} + +void backward_batchnorm_layer_gpu(layer l, network net) +{ + if(!net.train){ + l.mean_gpu = l.rolling_mean_gpu; + l.variance_gpu = l.rolling_variance_gpu; + } +#ifdef CUDNN + float one = 1; + float zero = 0; + cudnnBatchNormalizationBackward(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + &one, + &one, + l.dstTensorDesc, + l.x_gpu, + l.dstTensorDesc, + l.delta_gpu, + l.dstTensorDesc, + l.x_norm_gpu, + l.normTensorDesc, + l.scales_gpu, + l.scale_updates_gpu, + l.bias_updates_gpu, + .00001, + l.mean_gpu, + l.variance_gpu); + copy_gpu(l.outputs*l.batch, l.x_norm_gpu, 1, l.delta_gpu, 1); +#else + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h); + backward_scale_gpu(l.x_norm_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates_gpu); + + scale_bias_gpu(l.delta_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + + fast_mean_delta_gpu(l.delta_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta_gpu); + fast_variance_delta_gpu(l.x_gpu, l.delta_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta_gpu); + normalize_delta_gpu(l.x_gpu, l.mean_gpu, l.variance_gpu, l.mean_delta_gpu, l.variance_delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu); +#endif + if(l.type == BATCHNORM) copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/uvm/darknet/src/batchnorm_layer.h b/workloads/realworld/uvm/darknet/src/batchnorm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..25a18a3c8f2569bab135b088501248159e1cae11 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/batchnorm_layer.h @@ -0,0 +1,19 @@ +#ifndef BATCHNORM_LAYER_H +#define BATCHNORM_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +layer make_batchnorm_layer(int batch, int w, int h, int c); +void forward_batchnorm_layer(layer l, network net); +void backward_batchnorm_layer(layer l, network net); + +#ifdef GPU +void forward_batchnorm_layer_gpu(layer l, network net); +void backward_batchnorm_layer_gpu(layer l, network net); +void pull_batchnorm_layer(layer l); +void push_batchnorm_layer(layer l); +#endif + +#endif diff --git a/workloads/realworld/uvm/darknet/src/blas.c b/workloads/realworld/uvm/darknet/src/blas.c new file mode 100644 index 0000000000000000000000000000000000000000..9e1604449ba9aeb9decdc7f0395a38bd3b478671 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/blas.c @@ -0,0 +1,351 @@ +#include "blas.h" + +#include +#include +#include +#include +#include +#include +void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int b,i,j,k; + int out_c = c/(stride*stride); + + for(b = 0; b < batch; ++b){ + for(k = 0; k < c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int in_index = i + w*(j + h*(k + c*b)); + int c2 = k % out_c; + int offset = k / out_c; + int w2 = i*stride + offset % stride; + int h2 = j*stride + offset / stride; + int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); + if(forward) out[out_index] = x[in_index]; + else out[in_index] = x[out_index]; + } + } + } + } +} + +void flatten(float *x, int size, int layers, int batch, int forward) +{ + float *swap = calloc(size*layers*batch, sizeof(float)); + int i,c,b; + for(b = 0; b < batch; ++b){ + for(c = 0; c < layers; ++c){ + for(i = 0; i < size; ++i){ + int i1 = b*layers*size + c*size + i; + int i2 = b*layers*size + i*layers + c; + if (forward) swap[i2] = x[i1]; + else swap[i1] = x[i2]; + } + } + } + memcpy(x, swap, size*layers*batch*sizeof(float)); + free(swap); +} + +void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c) +{ + int i; + for(i = 0; i < n; ++i){ + c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); + } +} + +void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc) +{ + int i; + for(i = 0; i < n; ++i){ + if(da) da[i] += dc[i] * s[i]; + if(db) db[i] += dc[i] * (1-s[i]); + ds[i] += dc[i] * (a[i] - b[i]); + } +} + +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int stride = w1/w2; + int sample = w2/w1; + assert(stride == h1/h2); + assert(sample == h2/h1); + if(stride < 1) stride = 1; + if(sample < 1) sample = 1; + int minw = (w1 < w2) ? w1 : w2; + int minh = (h1 < h2) ? h1 : h2; + int minc = (c1 < c2) ? c1 : c2; + + int i,j,k,b; + for(b = 0; b < batch; ++b){ + for(k = 0; k < minc; ++k){ + for(j = 0; j < minh; ++j){ + for(i = 0; i < minw; ++i){ + int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); + int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); + out[out_index] = s1*out[out_index] + s2*add[add_index]; + } + } + } + } +} + +void mean_cpu(float *x, int batch, int filters, int spatial, float *mean) +{ + float scale = 1./(batch * spatial); + int i,j,k; + for(i = 0; i < filters; ++i){ + mean[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + mean[i] += x[index]; + } + } + mean[i] *= scale; + } +} + +void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + float scale = 1./(batch * spatial - 1); + int i,j,k; + for(i = 0; i < filters; ++i){ + variance[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance[i] += pow((x[index] - mean[i]), 2); + } + } + variance[i] *= scale; + } +} + +void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial) +{ + int b,f,i; + for(b = 0; b < batch; ++b){ + for(i = 0; i < spatial; ++i){ + float sum = 0; + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + sum += powf(x[index], 2); + } + sum = sqrtf(sum); + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + x[index] /= sum; + dx[index] = (1 - x[index]) / sum; + } + } + } +} + + +void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + int b, f, i; + for(b = 0; b < batch; ++b){ + for(f = 0; f < filters; ++f){ + for(i = 0; i < spatial; ++i){ + int index = b*filters*spatial + f*spatial + i; + x[index] = (x[index] - mean[f])/(sqrt(variance[f]) + .000001f); + } + } + } +} + +void const_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; +} + +void mul_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] *= X[i*INCX]; +} + +void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] = pow(X[i*INCX], ALPHA); +} + +void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX]; +} + +void scal_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA; +} + +void fill_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; +} + +void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i, j; + int index = 0; + for(j = 0; j < B; ++j) { + for(i = 0; i < NX; ++i){ + if(X) X[j*NX + i] += OUT[index]; + ++index; + } + for(i = 0; i < NY; ++i){ + if(Y) Y[j*NY + i] += OUT[index]; + ++index; + } + } +} + +void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i, j; + int index = 0; + for(j = 0; j < B; ++j) { + for(i = 0; i < NX; ++i){ + OUT[index++] = X[j*NX + i]; + } + for(i = 0; i < NY; ++i){ + OUT[index++] = Y[j*NY + i]; + } + } +} + +void copy_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX]; +} + +void mult_add_into_cpu(int N, float *X, float *Y, float *Z) +{ + int i; + for(i = 0; i < N; ++i) Z[i] += X[i]*Y[i]; +} + +void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + float abs_val = fabs(diff); + if(abs_val < 1) { + error[i] = diff * diff; + delta[i] = diff; + } + else { + error[i] = 2*abs_val - 1; + delta[i] = (diff < 0) ? 1 : -1; + } + } +} + +void l1_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + error[i] = fabs(diff); + delta[i] = diff > 0 ? 1 : -1; + } +} + +void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float t = truth[i]; + float p = pred[i]; + error[i] = (t) ? -log(p) : 0; + delta[i] = t-p; + } +} + +void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float t = truth[i]; + float p = pred[i]; + error[i] = -t*log(p) - (1-t)*log(1-p); + delta[i] = t-p; + } +} + +void l2_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + error[i] = diff * diff; + delta[i] = diff; + } +} + +float dot_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + float dot = 0; + for(i = 0; i < N; ++i) dot += X[i*INCX] * Y[i*INCY]; + return dot; +} + +void softmax(float *input, int n, float temp, int stride, float *output) +{ + int i; + float sum = 0; + float largest = -FLT_MAX; + for(i = 0; i < n; ++i){ + if(input[i*stride] > largest) largest = input[i*stride]; + } + for(i = 0; i < n; ++i){ + float e = exp(input[i*stride]/temp - largest/temp); + sum += e; + output[i*stride] = e; + } + for(i = 0; i < n; ++i){ + output[i*stride] /= sum; + } +} + + +void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + int g, b; + for(b = 0; b < batch; ++b){ + for(g = 0; g < groups; ++g){ + softmax(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); + } + } +} + +void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + int i, j, k, b; + for(b = 0; b < batch; ++b){ + for(k = 0; k < c; ++k){ + for(j = 0; j < h*stride; ++j){ + for(i = 0; i < w*stride; ++i){ + int in_index = b*w*h*c + k*w*h + (j/stride)*w + i/stride; + int out_index = b*w*h*c*stride*stride + k*w*h*stride*stride + j*w*stride + i; + if(forward) out[out_index] = scale*in[in_index]; + else in[in_index] += scale*out[out_index]; + } + } + } + } +} + + diff --git a/workloads/realworld/uvm/darknet/src/blas.h b/workloads/realworld/uvm/darknet/src/blas.h new file mode 100644 index 0000000000000000000000000000000000000000..5d24a9aea70d8050b05098aa7a5634576444a32c --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/blas.h @@ -0,0 +1,105 @@ +#ifndef BLAS_H +#define BLAS_H +#include "darknet.h" + +void flatten(float *x, int size, int layers, int batch, int forward); +void pm(int M, int N, float *A); +float *random_matrix(int rows, int cols); +void time_random_matrix(int TA, int TB, int m, int k, int n); +void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); + +void test_blas(); + +void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void mult_add_into_cpu(int N, float *X, float *Y, float *Z); + +void const_cpu(int N, float ALPHA, float *X, int INCX); +void constrain_gpu(int N, float ALPHA, float * X, int INCX); +void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void mul_cpu(int N, float *X, int INCX, float *Y, int INCY); + +int test_gpu_blas(); +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out); + +void mean_cpu(float *x, int batch, int filters, int spatial, float *mean); +void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); + +void scale_bias(float *output, float *scales, int batch, int n, int size); +void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); +void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); +void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); +void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); +void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial); + +void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error); +void l2_cpu(int n, float *pred, float *truth, float *delta, float *error); +void l1_cpu(int n, float *pred, float *truth, float *delta, float *error); +void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); +void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); +void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c); +void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc); + +void softmax(float *input, int n, float temp, int stride, float *output); +void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); +void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); + +#ifdef GPU +#include "cuda_dark.h" +#include "tree.h" + +void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); +void axpy_gpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); +void copy_gpu(int N, float * X, int INCX, float * Y, int INCY); +void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); +void add_gpu(int N, float ALPHA, float * X, int INCX); +void supp_gpu(int N, float ALPHA, float * X, int INCX); +void mask_gpu(int N, float * X, float mask_num, float * mask, float val); +void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale); +void const_gpu(int N, float ALPHA, float *X, int INCX); +void pow_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void mul_gpu(int N, float *X, int INCX, float *Y, int INCY); + +void mean_gpu(float *x, int batch, int filters, int spatial, float *mean); +void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); +void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); +void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial); + +void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); + +void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); +void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); + +void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); +void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean); +void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out); +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); +void add_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); + +void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error); +void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error); +void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error); +void l2_gpu(int n, float *pred, float *truth, float *delta, float *error); +void l1_gpu(int n, float *pred, float *truth, float *delta, float *error); +void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error); +void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc); +void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c); +void mult_add_into_gpu(int num, float *a, float *b, float *c); +void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT); + +void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); + +void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); +void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t); +void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t); + +void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out); +void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier); +void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); + +#endif +#endif diff --git a/workloads/realworld/uvm/darknet/src/blas_kernels.cu b/workloads/realworld/uvm/darknet/src/blas_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..3db34a057b318e87769058c4b7fdc81f02780a9d --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/blas_kernels.cu @@ -0,0 +1,1035 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" +#include + +extern "C" { +#include "blas.h" +#include "cuda_dark.h" +#include "utils.h" +} + +__global__ void scale_bias_kernel(float *output, float *biases, int n, int size) +{ + int offset = blockIdx.x * blockDim.x + threadIdx.x; + int filter = blockIdx.y; + int batch = blockIdx.z; + + if(offset < size) output[(batch*n+filter)*size + offset] *= biases[filter]; +} + +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size) +{ + dim3 dimGrid((size-1)/BLOCK + 1, n, batch); + dim3 dimBlock(BLOCK, 1, 1); + + scale_bias_kernel<<>>(output, biases, n, size); + check_error(cudaPeekAtLastError()); +} + +__global__ void backward_scale_kernel(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + __shared__ float part[BLOCK]; + int i,b; + int filter = blockIdx.x; + int p = threadIdx.x; + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; i += BLOCK){ + int index = p + i + size*(filter + n*b); + sum += (p+i < size) ? delta[index]*x_norm[index] : 0; + } + } + part[p] = sum; + __syncthreads(); + if (p == 0) { + for(i = 0; i < BLOCK; ++i) scale_updates[filter] += part[i]; + } +} + +void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + backward_scale_kernel<<>>(x_norm, delta, batch, n, size, scale_updates); + check_error(cudaPeekAtLastError()); +} + +__global__ void add_bias_kernel(float *output, float *biases, int batch, int n, int size) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= n*size*batch) return; + int i = index % size; + index /= size; + int j = index % n; + index /= n; + int k = index; + + output[(k*n+j)*size + i] += biases[j]; +} + +void add_bias_gpu(float *output, float *biases, int batch, int n, int size) +{ + int num = n*size*batch; + + add_bias_kernel<<>>(output, biases, batch, n, size); + check_error(cudaPeekAtLastError()); +} + +__global__ void backward_bias_conn_kernel(float *bias_updates, float *delta, int batch, int n) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= n) return; + int b; + float sum = 0; + for(b = 0; b < batch; ++b){ + int i = b*n + index; + sum += delta[i]; + } + bias_updates[index] += sum; +} + +__global__ void backward_bias_kernel(float *bias_updates, float *delta, int batch, int n, int size) +{ + __shared__ float part[BLOCK]; + int i,b; + int filter = blockIdx.x; + int p = threadIdx.x; + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; i += BLOCK){ + int index = p + i + size*(filter + n*b); + sum += (p+i < size) ? delta[index] : 0; + } + } + part[p] = sum; + __syncthreads(); + if (p == 0) { + for(i = 0; i < BLOCK; ++i) bias_updates[filter] += part[i]; + } +} + +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size) +{ + if(size == 1){ + backward_bias_conn_kernel<<>>(bias_updates, delta, batch, n); + }else{ + backward_bias_kernel<<>>(bias_updates, delta, batch, n, size); + } + check_error(cudaPeekAtLastError()); +} + +/* +__global__ void dot_kernel(float *output, float scale, int batch, int n, int size, float *delta) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int f1 = index / n; + int f2 = index % n; + if (f2 <= f1) return; + + float sum = 0; + float norm1 = 0; + float norm2 = 0; + int b, i; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int i1 = b * size * n + f1 * size + i; + int i2 = b * size * n + f2 * size + i; + sum += output[i1] * output[i2]; + norm1 += output[i1] * output[i1]; + norm2 += output[i2] * output[i2]; + } + } + norm1 = sqrt(norm1); + norm2 = sqrt(norm2); + float norm = norm1 * norm2; + sum = sum / norm; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int i1 = b * size * n + f1 * size + i; + int i2 = b * size * n + f2 * size + i; + delta[i1] += - scale * sum * output[i2] / norm; + delta[i2] += - scale * sum * output[i1] / norm; + } + } +} + +void dot_error_gpu(layer l) +{ + dot_kernel<<>>(l.output_gpu, l.dot, l.batch, l.n, l.out_w * l.out_h, l.delta_gpu); + check_error(cudaPeekAtLastError()); +} +*/ + + +__global__ void adam_kernel(int N, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + + float mhat = m[index] / (1.f - powf(B1, t)); + float vhat = v[index] / (1.f - powf(B2, t)); + + x[index] = x[index] + rate * mhat / (sqrtf(vhat) + eps); +} + +extern "C" void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) +{ + adam_kernel<<>>(n, x, m, v, B1, B2, rate, eps, t); + check_error(cudaPeekAtLastError()); +} + +extern "C" void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t) +{ + scal_gpu(n, B1, m, 1); + scal_gpu(n, B2, v, 1); + axpy_gpu(n, -decay*batch, w, 1, d, 1); + + axpy_gpu(n, (1-B1), d, 1, m, 1); + mul_gpu(n, d, 1, d, 1); + axpy_gpu(n, (1-B2), d, 1, v, 1); + + adam_gpu(n, w, m, v, B1, B2, rate, eps, t); + fill_gpu(n, 0, d, 1); +} + +__global__ void normalize_kernel(int N, float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int f = (index/spatial)%filters; + + x[index] = (x[index] - mean[f])/(sqrtf(variance[f] + .00001f)); +} + +__global__ void normalize_delta_kernel(int N, float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int f = (index/spatial)%filters; + + delta[index] = delta[index] * 1.f/(sqrtf(variance[f] + .00001f)) + variance_delta[f] * 2.f * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); +} + +extern "C" void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + size_t N = batch*filters*spatial; + normalize_delta_kernel<<>>(N, x, mean, variance, mean_delta, variance_delta, batch, filters, spatial, delta); + check_error(cudaPeekAtLastError()); +} + +__global__ void variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + variance_delta[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance_delta[i] += delta[index]*(x[index] - mean[i]); + } + } + variance_delta[i] *= -.5f * powf(variance[i] + .00001f, (float)(-3.f/2.f)); +} + +__global__ void accumulate_kernel(float *x, int n, int groups, float *sum) +{ + int k; + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= groups) return; + sum[i] = 0; + for(k = 0; k < n; ++k){ + sum[i] += x[k*groups + i]; + } +} + +__global__ void fast_mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + local[id] += (i+id < spatial) ? delta[index] : 0; + } + } + + __syncthreads(); + + if(id == 0){ + mean_delta[filter] = 0; + for(i = 0; i < threads; ++i){ + mean_delta[filter] += local[i]; + } + mean_delta[filter] *= (-1.f/sqrtf(variance[filter] + .00001f)); + } +} + +__global__ void fast_variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + + local[id] += (i+id < spatial) ? delta[index]*(x[index] - mean[filter]) : 0; + } + } + + __syncthreads(); + + if(id == 0){ + variance_delta[filter] = 0; + for(i = 0; i < threads; ++i){ + variance_delta[filter] += local[i]; + } + variance_delta[filter] *= -.5f * powf(variance[filter] + .00001f, (float)(-3.f/2.f)); + } +} + + +__global__ void mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + mean_delta[i] = 0; + for (j = 0; j < batch; ++j) { + for (k = 0; k < spatial; ++k) { + int index = j*filters*spatial + i*spatial + k; + mean_delta[i] += delta[index]; + } + } + mean_delta[i] *= (-1.f/sqrtf(variance[i] + .00001f)); +} + +extern "C" void mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + mean_delta_kernel<<>>(delta, variance, batch, filters, spatial, mean_delta); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + fast_mean_delta_kernel<<>>(delta, variance, batch, filters, spatial, mean_delta); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + fast_variance_delta_kernel<<>>(x, delta, mean, variance, batch, filters, spatial, variance_delta); + check_error(cudaPeekAtLastError()); +} + +__global__ void mean_kernel(float *x, int batch, int filters, int spatial, float *mean) +{ + float scale = 1.f/(batch * spatial); + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + mean[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + mean[i] += x[index]; + } + } + mean[i] *= scale; +} + +__global__ void variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + float scale = 1.f/(batch * spatial - 1); + int j,k; + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + variance[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance[i] += powf((x[index] - mean[i]), 2); + } + } + variance[i] *= scale; +} + +__global__ void reorg_kernel(int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int in_index = i; + int in_w = i%w; + i = i/w; + int in_h = i%h; + i = i/h; + int in_c = i%c; + i = i/c; + int b = i%batch; + + int out_c = c/(stride*stride); + + int c2 = in_c % out_c; + int offset = in_c / out_c; + int w2 = in_w*stride + offset % stride; + int h2 = in_h*stride + offset / stride; + //printf("%d\n", offset); + int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); + + // printf("%d %d %d\n", w2, h2, c2); + //printf("%d %d\n", in_index, out_index); + //if(out_index >= N || out_index < 0) printf("bad bad bad \n"); + + if(forward) out[out_index] = x[in_index]; + else out[in_index] = x[out_index]; + //if(forward) out[1] = x[1]; + //else out[0] = x[0]; +} + +__global__ void axpy_kernel(int N, float ALPHA, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[OFFY+i*INCY] += ALPHA*X[OFFX+i*INCX]; +} + +__global__ void pow_kernel(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY] = pow(X[i*INCX], ALPHA); +} + +__global__ void const_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = ALPHA; +} + +__global__ void constrain_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = fminf(ALPHA, fmaxf(-ALPHA, X[i*INCX])); +} + +__global__ void supp_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) { + if((X[i*INCX] * X[i*INCX]) < (ALPHA * ALPHA)) X[i*INCX] = 0; + } +} + +__global__ void add_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] += ALPHA; +} + +__global__ void scal_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] *= ALPHA; +} + +__global__ void fill_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = ALPHA; +} + +__global__ void copy_kernel(int N, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY + OFFY] = X[i*INCX + OFFX]; +} + +__global__ void mul_kernel(int N, float *X, int INCX, float *Y, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY] *= X[i*INCX]; +} + + +extern "C" void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + size_t N = batch*filters*spatial; + normalize_kernel<<>>(N, x, mean, variance, batch, filters, spatial); + check_error(cudaPeekAtLastError()); +} + +__global__ void l2norm_kernel(int N, float *x, float *dx, int batch, int filters, int spatial) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int b = index / spatial; + int i = index % spatial; + int f; + float sum = 0; + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + sum += powf(x[index], 2); + } + sum = sqrtf(sum); + if(sum == 0) sum = 1; + //printf("%f\n", sum); + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + x[index] /= sum; + dx[index] = (1 - x[index]) / sum; + } +} + +extern "C" void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial) +{ + size_t N = batch*spatial; + l2norm_kernel<<>>(N, x, dx, batch, filters, spatial); + check_error(cudaPeekAtLastError()); +} + +__global__ void fast_mean_kernel(float *x, int batch, int filters, int spatial, float *mean) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + local[id] += (i+id < spatial) ? x[index] : 0; + } + } + + __syncthreads(); + + if(id == 0){ + mean[filter] = 0; + for(i = 0; i < threads; ++i){ + mean[filter] += local[i]; + } + mean[filter] /= spatial * batch; + } +} + +__global__ void fast_variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + + local[id] += (i+id < spatial) ? powf((x[index] - mean[filter]), 2) : 0; + } + } + + __syncthreads(); + + if(id == 0){ + variance[filter] = 0; + for(i = 0; i < threads; ++i){ + variance[filter] += local[i]; + } + variance[filter] /= (spatial * batch - 1); + } +} + +extern "C" void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean) +{ + fast_mean_kernel<<>>(x, batch, filters, spatial, mean); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + fast_variance_kernel<<>>(x, mean, batch, filters, spatial, variance); + check_error(cudaPeekAtLastError()); +} + + +extern "C" void mean_gpu(float *x, int batch, int filters, int spatial, float *mean) +{ + mean_kernel<<>>(x, batch, filters, spatial, mean); + check_error(cudaPeekAtLastError()); +} + +extern "C" void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + variance_kernel<<>>(x, mean, batch, filters, spatial, variance); + check_error(cudaPeekAtLastError()); +} + +extern "C" void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY) +{ + axpy_gpu_offset(N, ALPHA, X, 0, INCX, Y, 0, INCY); +} + +extern "C" void pow_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY) +{ + pow_kernel<<>>(N, ALPHA, X, INCX, Y, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void axpy_gpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY) +{ + axpy_kernel<<>>(N, ALPHA, X, OFFX, INCX, Y, OFFY, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void copy_gpu(int N, float * X, int INCX, float * Y, int INCY) +{ + copy_gpu_offset(N, X, 0, INCX, Y, 0, INCY); +} + +extern "C" void mul_gpu(int N, float * X, int INCX, float * Y, int INCY) +{ + mul_kernel<<>>(N, X, INCX, Y, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY) +{ + copy_kernel<<>>(N, X, OFFX, INCX, Y, OFFY, INCY); + check_error(cudaPeekAtLastError()); +} + +__global__ void flatten_kernel(int N, float *x, int spatial, int layers, int batch, int forward, float *out) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int in_s = i%spatial; + i = i/spatial; + int in_c = i%layers; + i = i/layers; + int b = i; + + int i1 = b*layers*spatial + in_c*spatial + in_s; + int i2 = b*layers*spatial + in_s*layers + in_c; + + if (forward) out[i2] = x[i1]; + else out[i1] = x[i2]; +} + +extern "C" void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out) +{ + int size = spatial*batch*layers; + flatten_kernel<<>>(size, x, spatial, layers, batch, forward, out); + check_error(cudaPeekAtLastError()); +} + +extern "C" void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int size = w*h*c*batch; + reorg_kernel<<>>(size, x, w, h, c, batch, stride, forward, out); + check_error(cudaPeekAtLastError()); +} + +__global__ void mask_kernel(int n, float *x, float mask_num, float *mask, float val) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n && mask[i] == mask_num) x[i] = val; +} + +extern "C" void mask_gpu(int N, float * X, float mask_num, float * mask, float val) +{ + mask_kernel<<>>(N, X, mask_num, mask, val); + check_error(cudaPeekAtLastError()); +} + +__global__ void scale_mask_kernel(int n, float *x, float mask_num, float *mask, float scale) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n && mask[i] == mask_num) x[i] *= scale; +} + +extern "C" void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale) +{ + scale_mask_kernel<<>>(N, X, mask_num, mask, scale); + check_error(cudaPeekAtLastError()); +} + +extern "C" void const_gpu(int N, float ALPHA, float * X, int INCX) +{ + const_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void constrain_gpu(int N, float ALPHA, float * X, int INCX) +{ + constrain_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + + +extern "C" void add_gpu(int N, float ALPHA, float * X, int INCX) +{ + add_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void scal_gpu(int N, float ALPHA, float * X, int INCX) +{ + scal_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void supp_gpu(int N, float ALPHA, float * X, int INCX) +{ + supp_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fill_gpu(int N, float ALPHA, float * X, int INCX) +{ + fill_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +__global__ void shortcut_kernel(int size, int minw, int minh, int minc, int stride, int sample, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= size) return; + int i = id % minw; + id /= minw; + int j = id % minh; + id /= minh; + int k = id % minc; + id /= minc; + int b = id % batch; + + int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); + int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); + out[out_index] = s1*out[out_index] + s2*add[add_index]; + //out[out_index] += add[add_index]; +} + +extern "C" void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int minw = (w1 < w2) ? w1 : w2; + int minh = (h1 < h2) ? h1 : h2; + int minc = (c1 < c2) ? c1 : c2; + + int stride = w1/w2; + int sample = w2/w1; + assert(stride == h1/h2); + assert(sample == h2/h1); + if(stride < 1) stride = 1; + if(sample < 1) sample = 1; + + int size = batch * minw * minh * minc; + shortcut_kernel<<>>(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, s1, s2, out); + check_error(cudaPeekAtLastError()); +} + +__global__ void smooth_l1_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + float abs_val = fabsf(diff); + if(abs_val < 1) { + error[i] = diff * diff; + delta[i] = diff; + } + else { + error[i] = 2*abs_val - 1; + delta[i] = (diff > 0) ? 1 : -1; + } + } +} + +extern "C" void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + smooth_l1_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void softmax_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float t = truth[i]; + float p = pred[i]; + error[i] = (t) ? -log(p) : 0; + delta[i] = t-p; + } +} + +extern "C" void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + softmax_x_ent_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void logistic_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float t = truth[i]; + float p = pred[i]; + error[i] = -t*log(p+.0000001) - (1-t)*log(1-p+.0000001); + delta[i] = t-p; + } +} + +extern "C" void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + logistic_x_ent_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void l2_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + error[i] = diff * diff; //I know this is technically wrong, deal with it. + delta[i] = diff; + } +} + +extern "C" void l2_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + l2_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void l1_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + error[i] = abs(diff); + delta[i] = (diff > 0) ? 1 : -1; + } +} + +extern "C" void l1_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + l1_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void wgan_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + error[i] = truth[i] ? -pred[i] : pred[i]; + delta[i] = (truth[i] > 0) ? 1 : -1; + } +} + +extern "C" void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + wgan_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + + + + +__global__ void weighted_sum_kernel(int n, float *a, float *b, float *s, float *c) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); + } +} + +__global__ void deinter_kernel(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < (NX+NY)*B){ + int b = i / (NX+NY); + int j = i % (NX+NY); + if (j < NX){ + if(X) X[b*NX + j] += OUT[i]; + } else { + if(Y) Y[b*NY + j - NX] += OUT[i]; + } + } +} + +extern "C" void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + deinter_kernel<<>>(NX, X, NY, Y, B, OUT); + check_error(cudaPeekAtLastError()); +} + +__global__ void inter_kernel(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < (NX+NY)*B){ + int b = i / (NX+NY); + int j = i % (NX+NY); + if (j < NX){ + OUT[i] = X[b*NX + j]; + } else { + OUT[i] = Y[b*NY + j - NX]; + } + } +} + +extern "C" void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + inter_kernel<<>>(NX, X, NY, Y, B, OUT); + check_error(cudaPeekAtLastError()); +} + +extern "C" void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c) +{ + weighted_sum_kernel<<>>(num, a, b, s, c); + check_error(cudaPeekAtLastError()); +} + +__global__ void weighted_delta_kernel(int n, float *a, float *b, float *s, float *da, float *db, float *ds, float *dc) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + if(da) da[i] += dc[i] * s[i]; + if(db) db[i] += dc[i] * (1-s[i]); + ds[i] += dc[i] * (a[i] - b[i]); + } +} + +extern "C" void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc) +{ + weighted_delta_kernel<<>>(num, a, b, s, da, db, ds, dc); + check_error(cudaPeekAtLastError()); +} + +__global__ void mult_add_into_kernel(int n, float *a, float *b, float *c) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + c[i] += a[i]*b[i]; + } +} + +extern "C" void mult_add_into_gpu(int num, float *a, float *b, float *c) +{ + mult_add_into_kernel<<>>(num, a, b, c); + check_error(cudaPeekAtLastError()); +} + + +__device__ void softmax_device(float *input, int n, float temp, int stride, float *output) +{ + int i; + float sum = 0; + float largest = -INFINITY; + for(i = 0; i < n; ++i){ + int val = input[i*stride]; + largest = (val>largest) ? val : largest; + } + for(i = 0; i < n; ++i){ + float e = expf(input[i*stride]/temp - largest/temp); + sum += e; + output[i*stride] = e; + } + for(i = 0; i < n; ++i){ + output[i*stride] /= sum; + } +} + + +__global__ void softmax_tree_kernel(float *input, int spatial, int batch, int stride, float temp, float *output, int groups, int *group_size, int *group_offset) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= spatial*batch*groups) return; + int s = id % spatial; + id = id / spatial; + int g = id % groups; + int b = id / groups; + int goff = group_offset[g]*spatial; + int boff = b*stride; + softmax_device(input + goff + boff + s, group_size[g], temp, spatial, output + goff + boff + s); +} + +extern "C" void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier) +{ + int *tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); + int *tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); + /* + static int *tree_groups_size = 0; + static int *tree_groups_offset = 0; + if(!tree_groups_size){ + tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); + tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); + } + */ + int num = spatial*batch*hier.groups; + softmax_tree_kernel<<>>(input, spatial, batch, stride, temp, output, hier.groups, tree_groups_size, tree_groups_offset); + check_error(cudaPeekAtLastError()); + cuda_free((float *)tree_groups_size); + cuda_free((float *)tree_groups_offset); +} + +__global__ void softmax_kernel(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= batch*groups) return; + int b = id / groups; + int g = id % groups; + softmax_device(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); +} + +extern "C" void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + softmax_kernel<<>>(input, n, batch, batch_offset, groups, group_offset, stride, temp, output); + check_error(cudaPeekAtLastError()); +} + + +__global__ void upsample_kernel(size_t N, float *x, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + size_t i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int out_index = i; + int out_w = i%(w*stride); + i = i/(w*stride); + int out_h = i%(h*stride); + i = i/(h*stride); + int out_c = i%c; + i = i/c; + int b = i%batch; + + int in_w = out_w / stride; + int in_h = out_h / stride; + int in_c = out_c; + + int in_index = b*w*h*c + in_c*w*h + in_h*w + in_w; + + + if(forward) out[out_index] += scale * x[in_index]; + else atomicAdd(x+in_index, scale * out[out_index]); +} +extern "C" void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + size_t size = w*h*c*batch*stride*stride; + upsample_kernel<<>>(size, in, w, h, c, batch, stride, forward, scale, out); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/uvm/darknet/src/box.c b/workloads/realworld/uvm/darknet/src/box.c new file mode 100644 index 0000000000000000000000000000000000000000..8a1772c9ae05dede6ddc83d9b6465f64cf974ae8 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/box.c @@ -0,0 +1,357 @@ +#include "box.h" +#include +#include +#include + +int nms_comparator(const void *pa, const void *pb) +{ + detection a = *(detection *)pa; + detection b = *(detection *)pb; + float diff = 0; + if(b.sort_class >= 0){ + diff = a.prob[b.sort_class] - b.prob[b.sort_class]; + } else { + diff = a.objectness - b.objectness; + } + if(diff < 0) return 1; + else if(diff > 0) return -1; + return 0; +} + +void do_nms_obj(detection *dets, int total, int classes, float thresh) +{ + int i, j, k; + k = total-1; + for(i = 0; i <= k; ++i){ + if(dets[i].objectness == 0){ + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k+1; + + for(i = 0; i < total; ++i){ + dets[i].sort_class = -1; + } + + qsort(dets, total, sizeof(detection), nms_comparator); + for(i = 0; i < total; ++i){ + if(dets[i].objectness == 0) continue; + box a = dets[i].bbox; + for(j = i+1; j < total; ++j){ + if(dets[j].objectness == 0) continue; + box b = dets[j].bbox; + if (box_iou(a, b) > thresh){ + dets[j].objectness = 0; + for(k = 0; k < classes; ++k){ + dets[j].prob[k] = 0; + } + } + } + } +} + + +void do_nms_sort(detection *dets, int total, int classes, float thresh) +{ + int i, j, k; + k = total-1; + for(i = 0; i <= k; ++i){ + if(dets[i].objectness == 0){ + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k+1; + + for(k = 0; k < classes; ++k){ + for(i = 0; i < total; ++i){ + dets[i].sort_class = k; + } + qsort(dets, total, sizeof(detection), nms_comparator); + for(i = 0; i < total; ++i){ + if(dets[i].prob[k] == 0) continue; + box a = dets[i].bbox; + for(j = i+1; j < total; ++j){ + box b = dets[j].bbox; + if (box_iou(a, b) > thresh){ + dets[j].prob[k] = 0; + } + } + } + } +} + +box float_to_box(float *f, int stride) +{ + box b = {0}; + b.x = f[0]; + b.y = f[1*stride]; + b.w = f[2*stride]; + b.h = f[3*stride]; + return b; +} + +dbox derivative(box a, box b) +{ + dbox d; + d.dx = 0; + d.dw = 0; + float l1 = a.x - a.w/2; + float l2 = b.x - b.w/2; + if (l1 > l2){ + d.dx -= 1; + d.dw += .5; + } + float r1 = a.x + a.w/2; + float r2 = b.x + b.w/2; + if(r1 < r2){ + d.dx += 1; + d.dw += .5; + } + if (l1 > r2) { + d.dx = -1; + d.dw = 0; + } + if (r1 < l2){ + d.dx = 1; + d.dw = 0; + } + + d.dy = 0; + d.dh = 0; + float t1 = a.y - a.h/2; + float t2 = b.y - b.h/2; + if (t1 > t2){ + d.dy -= 1; + d.dh += .5; + } + float b1 = a.y + a.h/2; + float b2 = b.y + b.h/2; + if(b1 < b2){ + d.dy += 1; + d.dh += .5; + } + if (t1 > b2) { + d.dy = -1; + d.dh = 0; + } + if (b1 < t2){ + d.dy = 1; + d.dh = 0; + } + return d; +} + +float overlap(float x1, float w1, float x2, float w2) +{ + float l1 = x1 - w1/2; + float l2 = x2 - w2/2; + float left = l1 > l2 ? l1 : l2; + float r1 = x1 + w1/2; + float r2 = x2 + w2/2; + float right = r1 < r2 ? r1 : r2; + return right - left; +} + +float box_intersection(box a, box b) +{ + float w = overlap(a.x, a.w, b.x, b.w); + float h = overlap(a.y, a.h, b.y, b.h); + if(w < 0 || h < 0) return 0; + float area = w*h; + return area; +} + +float box_union(box a, box b) +{ + float i = box_intersection(a, b); + float u = a.w*a.h + b.w*b.h - i; + return u; +} + +float box_iou(box a, box b) +{ + return box_intersection(a, b)/box_union(a, b); +} + +float box_rmse(box a, box b) +{ + return sqrt(pow(a.x-b.x, 2) + + pow(a.y-b.y, 2) + + pow(a.w-b.w, 2) + + pow(a.h-b.h, 2)); +} + +dbox dintersect(box a, box b) +{ + float w = overlap(a.x, a.w, b.x, b.w); + float h = overlap(a.y, a.h, b.y, b.h); + dbox dover = derivative(a, b); + dbox di; + + di.dw = dover.dw*h; + di.dx = dover.dx*h; + di.dh = dover.dh*w; + di.dy = dover.dy*w; + + return di; +} + +dbox dunion(box a, box b) +{ + dbox du; + + dbox di = dintersect(a, b); + du.dw = a.h - di.dw; + du.dh = a.w - di.dh; + du.dx = -di.dx; + du.dy = -di.dy; + + return du; +} + + +void test_dunion() +{ + box a = {0, 0, 1, 1}; + box dxa= {0+.0001, 0, 1, 1}; + box dya= {0, 0+.0001, 1, 1}; + box dwa= {0, 0, 1+.0001, 1}; + box dha= {0, 0, 1, 1+.0001}; + + box b = {.5, .5, .2, .2}; + dbox di = dunion(a,b); + printf("Union: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); + float inter = box_union(a, b); + float xinter = box_union(dxa, b); + float yinter = box_union(dya, b); + float winter = box_union(dwa, b); + float hinter = box_union(dha, b); + xinter = (xinter - inter)/(.0001); + yinter = (yinter - inter)/(.0001); + winter = (winter - inter)/(.0001); + hinter = (hinter - inter)/(.0001); + printf("Union Manual %f %f %f %f\n", xinter, yinter, winter, hinter); +} +void test_dintersect() +{ + box a = {0, 0, 1, 1}; + box dxa= {0+.0001, 0, 1, 1}; + box dya= {0, 0+.0001, 1, 1}; + box dwa= {0, 0, 1+.0001, 1}; + box dha= {0, 0, 1, 1+.0001}; + + box b = {.5, .5, .2, .2}; + dbox di = dintersect(a,b); + printf("Inter: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); + float inter = box_intersection(a, b); + float xinter = box_intersection(dxa, b); + float yinter = box_intersection(dya, b); + float winter = box_intersection(dwa, b); + float hinter = box_intersection(dha, b); + xinter = (xinter - inter)/(.0001); + yinter = (yinter - inter)/(.0001); + winter = (winter - inter)/(.0001); + hinter = (hinter - inter)/(.0001); + printf("Inter Manual %f %f %f %f\n", xinter, yinter, winter, hinter); +} + +void test_box() +{ + test_dintersect(); + test_dunion(); + box a = {0, 0, 1, 1}; + box dxa= {0+.00001, 0, 1, 1}; + box dya= {0, 0+.00001, 1, 1}; + box dwa= {0, 0, 1+.00001, 1}; + box dha= {0, 0, 1, 1+.00001}; + + box b = {.5, 0, .2, .2}; + + float iou = box_iou(a,b); + iou = (1-iou)*(1-iou); + printf("%f\n", iou); + dbox d = diou(a, b); + printf("%f %f %f %f\n", d.dx, d.dy, d.dw, d.dh); + + float xiou = box_iou(dxa, b); + float yiou = box_iou(dya, b); + float wiou = box_iou(dwa, b); + float hiou = box_iou(dha, b); + xiou = ((1-xiou)*(1-xiou) - iou)/(.00001); + yiou = ((1-yiou)*(1-yiou) - iou)/(.00001); + wiou = ((1-wiou)*(1-wiou) - iou)/(.00001); + hiou = ((1-hiou)*(1-hiou) - iou)/(.00001); + printf("manual %f %f %f %f\n", xiou, yiou, wiou, hiou); +} + +dbox diou(box a, box b) +{ + float u = box_union(a,b); + float i = box_intersection(a,b); + dbox di = dintersect(a,b); + dbox du = dunion(a,b); + dbox dd = {0,0,0,0}; + + if(i <= 0 || 1) { + dd.dx = b.x - a.x; + dd.dy = b.y - a.y; + dd.dw = b.w - a.w; + dd.dh = b.h - a.h; + return dd; + } + + dd.dx = 2*pow((1-(i/u)),1)*(di.dx*u - du.dx*i)/(u*u); + dd.dy = 2*pow((1-(i/u)),1)*(di.dy*u - du.dy*i)/(u*u); + dd.dw = 2*pow((1-(i/u)),1)*(di.dw*u - du.dw*i)/(u*u); + dd.dh = 2*pow((1-(i/u)),1)*(di.dh*u - du.dh*i)/(u*u); + return dd; +} + + +void do_nms(box *boxes, float **probs, int total, int classes, float thresh) +{ + int i, j, k; + for(i = 0; i < total; ++i){ + int any = 0; + for(k = 0; k < classes; ++k) any = any || (probs[i][k] > 0); + if(!any) { + continue; + } + for(j = i+1; j < total; ++j){ + if (box_iou(boxes[i], boxes[j]) > thresh){ + for(k = 0; k < classes; ++k){ + if (probs[i][k] < probs[j][k]) probs[i][k] = 0; + else probs[j][k] = 0; + } + } + } + } +} + +box encode_box(box b, box anchor) +{ + box encode; + encode.x = (b.x - anchor.x) / anchor.w; + encode.y = (b.y - anchor.y) / anchor.h; + encode.w = log2(b.w / anchor.w); + encode.h = log2(b.h / anchor.h); + return encode; +} + +box decode_box(box b, box anchor) +{ + box decode; + decode.x = b.x * anchor.w + anchor.x; + decode.y = b.y * anchor.h + anchor.y; + decode.w = pow(2., b.w) * anchor.w; + decode.h = pow(2., b.h) * anchor.h; + return decode; +} diff --git a/workloads/realworld/uvm/darknet/src/box.h b/workloads/realworld/uvm/darknet/src/box.h new file mode 100644 index 0000000000000000000000000000000000000000..dda3e59100c3d9e0a6bb05a80070155d9fcbc876 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/box.h @@ -0,0 +1,14 @@ +#ifndef BOX_H +#define BOX_H +#include "darknet.h" + +typedef struct{ + float dx, dy, dw, dh; +} dbox; + +float box_rmse(box a, box b); +dbox diou(box a, box b); +box decode_box(box b, box anchor); +box encode_box(box b, box anchor); + +#endif diff --git a/workloads/realworld/uvm/darknet/src/classifier.h b/workloads/realworld/uvm/darknet/src/classifier.h new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/classifier.h @@ -0,0 +1 @@ + diff --git a/workloads/realworld/uvm/darknet/src/col2im.c b/workloads/realworld/uvm/darknet/src/col2im.c new file mode 100644 index 0000000000000000000000000000000000000000..5c4605e197439f79fe05c41337a5f2b8103f63ba --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/col2im.c @@ -0,0 +1,39 @@ +#include +#include +void col2im_add_pixel(float *im, int height, int width, int channels, + int row, int col, int channel, int pad, float val) +{ + row -= pad; + col -= pad; + + if (row < 0 || col < 0 || + row >= height || col >= width) return; + im[col + width*(row + height*channel)] += val; +} +//This one might be too, can't remember. +void col2im_cpu(float* data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_im) +{ + int c,h,w; + int height_col = (height + 2*pad - ksize) / stride + 1; + int width_col = (width + 2*pad - ksize) / stride + 1; + + int channels_col = channels * ksize * ksize; + for (c = 0; c < channels_col; ++c) { + int w_offset = c % ksize; + int h_offset = (c / ksize) % ksize; + int c_im = c / ksize / ksize; + for (h = 0; h < height_col; ++h) { + for (w = 0; w < width_col; ++w) { + int im_row = h_offset + h * stride; + int im_col = w_offset + w * stride; + int col_index = (c * height_col + h) * width_col + w; + double val = data_col[col_index]; + col2im_add_pixel(data_im, height, width, channels, + im_row, im_col, c_im, pad, val); + } + } + } +} + diff --git a/workloads/realworld/uvm/darknet/src/col2im.h b/workloads/realworld/uvm/darknet/src/col2im.h new file mode 100644 index 0000000000000000000000000000000000000000..3fbe05307db65a1f511f801670a23734e21b7dff --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/col2im.h @@ -0,0 +1,13 @@ +#ifndef COL2IM_H +#define COL2IM_H + +void col2im_cpu(float* data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_im); + +#ifdef GPU +void col2im_gpu(float *data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_im); +#endif +#endif diff --git a/workloads/realworld/uvm/darknet/src/col2im_kernels.cu b/workloads/realworld/uvm/darknet/src/col2im_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..d971880b0fd28c845500ec9d8d725ccf2a172933 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/col2im_kernels.cu @@ -0,0 +1,59 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "col2im.h" +#include "cuda_dark.h" +} + +// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu +// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE + +__global__ void col2im_gpu_kernel(const int n, const float* data_col, + const int height, const int width, const int ksize, + const int pad, + const int stride, + const int height_col, const int width_col, + float *data_im) { + int index = blockIdx.x*blockDim.x+threadIdx.x; + for(; index < n; index += blockDim.x*gridDim.x){ + float val = 0; + int w = index % width + pad; + int h = (index / width) % height + pad; + int c = index / (width * height); + // compute the start and end of the output + int w_col_start = (w < ksize) ? 0 : (w - ksize) / stride + 1; + int w_col_end = min(w / stride + 1, width_col); + int h_col_start = (h < ksize) ? 0 : (h - ksize) / stride + 1; + int h_col_end = min(h / stride + 1, height_col); + // equivalent implementation + int offset = + (c * ksize * ksize + h * ksize + w) * height_col * width_col; + int coeff_h_col = (1 - stride * ksize * height_col) * width_col; + int coeff_w_col = (1 - stride * height_col * width_col); + for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { + for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { + val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col]; + } + } + data_im[index] += val; + } +} + +void col2im_gpu(float *data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_im){ + // We are going to launch channels * height_col * width_col kernels, each + // kernel responsible for copying a single-channel grid. + int height_col = (height + 2 * pad - ksize) / stride + 1; + int width_col = (width + 2 * pad - ksize) / stride + 1; + int num_kernels = channels * height * width; + col2im_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, + BLOCK>>>( + num_kernels, data_col, height, width, ksize, pad, + stride, height_col, + width_col, data_im); + check_error(cudaPeekAtLastError()); +} + diff --git a/workloads/realworld/uvm/darknet/src/compare.c b/workloads/realworld/uvm/darknet/src/compare.c new file mode 100644 index 0000000000000000000000000000000000000000..d2d2b3bdc675cf808f483d1607550e072e245396 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/compare.c @@ -0,0 +1,352 @@ +#include + +#include "network.h" +#include "detection_layer.h" +#include "cost_layer.h" +#include "utils.h" +#include "parser.h" +#include "box.h" + +void train_compare(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + list *plist = get_paths("data/compare.train.list"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + printf("%d\n", N); + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.classes = 20; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = COMPARE_DATA; + + load_thread = load_data_in_thread(args); + int epoch = *net.seen/N; + int i = 0; + while(1){ + ++i; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%.3f: %f, %f avg, %lf seconds, %ld images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen); + free_data(train); + if(i%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_%d_minor_%d.weights",backup_directory,base, epoch, i); + save_weights(net, buff); + } + if(*net.seen/N > epoch){ + epoch = *net.seen/N; + i = 0; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + if(epoch%22 == 0) net.learning_rate *= .1; + } + } + pthread_join(load_thread, 0); + free_data(buffer); + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_compare(char *filename, char *weightfile) +{ + int i = 0; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + + list *plist = get_paths("data/compare.val.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size/2; + free_list(plist); + + clock_t time; + int correct = 0; + int total = 0; + int splits = 10; + int num = (i+1)*N/splits - i*N/splits; + + data val, buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.classes = 20; + args.n = num; + args.m = 0; + args.d = &buffer; + args.type = COMPARE_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(i = 1; i <= splits; ++i){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + num = (i+1)*N/splits - i*N/splits; + char **part = paths+(i*N/splits); + if(i != splits){ + args.paths = part; + load_thread = load_data_in_thread(args); + } + printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + int j,k; + for(j = 0; j < val.y.rows; ++j){ + for(k = 0; k < 20; ++k){ + if(val.y.vals[j][k*2] != val.y.vals[j][k*2+1]){ + ++total; + if((val.y.vals[j][k*2] < val.y.vals[j][k*2+1]) == (pred.vals[j][k*2] < pred.vals[j][k*2+1])){ + ++correct; + } + } + } + } + free_matrix(pred); + printf("%d: Acc: %f, %lf seconds, %d images\n", i, (float)correct/total, sec(clock()-time), val.X.rows); + free_data(val); + } +} + +typedef struct { + network net; + char *filename; + int class; + int classes; + float elo; + float *elos; +} sortable_bbox; + +int total_compares = 0; +int current_class = 0; + +int elo_comparator(const void*a, const void *b) +{ + sortable_bbox box1 = *(sortable_bbox*)a; + sortable_bbox box2 = *(sortable_bbox*)b; + if(box1.elos[current_class] == box2.elos[current_class]) return 0; + if(box1.elos[current_class] > box2.elos[current_class]) return -1; + return 1; +} + +int bbox_comparator(const void *a, const void *b) +{ + ++total_compares; + sortable_bbox box1 = *(sortable_bbox*)a; + sortable_bbox box2 = *(sortable_bbox*)b; + network net = box1.net; + int class = box1.class; + + image im1 = load_image_color(box1.filename, net.w, net.h); + image im2 = load_image_color(box2.filename, net.w, net.h); + float *X = calloc(net.w*net.h*net.c, sizeof(float)); + memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); + memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); + float *predictions = network_predict(net, X); + + free_image(im1); + free_image(im2); + free(X); + if (predictions[class*2] > predictions[class*2+1]){ + return 1; + } + return -1; +} + +void bbox_update(sortable_bbox *a, sortable_bbox *b, int class, int result) +{ + int k = 32; + float EA = 1./(1+pow(10, (b->elos[class] - a->elos[class])/400.)); + float EB = 1./(1+pow(10, (a->elos[class] - b->elos[class])/400.)); + float SA = result ? 1 : 0; + float SB = result ? 0 : 1; + a->elos[class] += k*(SA - EA); + b->elos[class] += k*(SB - EB); +} + +void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, int class) +{ + image im1 = load_image_color(a->filename, net.w, net.h); + image im2 = load_image_color(b->filename, net.w, net.h); + float *X = calloc(net.w*net.h*net.c, sizeof(float)); + memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); + memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); + float *predictions = network_predict(net, X); + ++total_compares; + + int i; + for(i = 0; i < classes; ++i){ + if(class < 0 || class == i){ + int result = predictions[i*2] > predictions[i*2+1]; + bbox_update(a, b, i, result); + } + } + + free_image(im1); + free_image(im2); + free(X); +} + +void SortMaster3000(char *filename, char *weightfile) +{ + int i = 0; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + set_batch_network(&net, 1); + + list *plist = get_paths("data/compare.sort.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + free_list(plist); + sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); + printf("Sorting %d boxes...\n", N); + for(i = 0; i < N; ++i){ + boxes[i].filename = paths[i]; + boxes[i].net = net; + boxes[i].class = 7; + boxes[i].elo = 1500; + } + clock_t time=clock(); + qsort(boxes, N, sizeof(sortable_bbox), bbox_comparator); + for(i = 0; i < N; ++i){ + printf("%s\n", boxes[i].filename); + } + printf("Sorted in %d compares, %f secs\n", total_compares, sec(clock()-time)); +} + +void BattleRoyaleWithCheese(char *filename, char *weightfile) +{ + int classes = 20; + int i,j; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + set_batch_network(&net, 1); + + list *plist = get_paths("data/compare.sort.list"); + //list *plist = get_paths("data/compare.small.list"); + //list *plist = get_paths("data/compare.cat.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + int total = N; + free_list(plist); + sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); + printf("Battling %d boxes...\n", N); + for(i = 0; i < N; ++i){ + boxes[i].filename = paths[i]; + boxes[i].net = net; + boxes[i].classes = classes; + boxes[i].elos = calloc(classes, sizeof(float));; + for(j = 0; j < classes; ++j){ + boxes[i].elos[j] = 1500; + } + } + int round; + clock_t time=clock(); + for(round = 1; round <= 4; ++round){ + clock_t round_time=clock(); + printf("Round: %d\n", round); + shuffle(boxes, N, sizeof(sortable_bbox)); + for(i = 0; i < N/2; ++i){ + bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, -1); + } + printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N); + } + + int class; + + for (class = 0; class < classes; ++class){ + + N = total; + current_class = class; + qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); + N /= 2; + + for(round = 1; round <= 100; ++round){ + clock_t round_time=clock(); + printf("Round: %d\n", round); + + sorta_shuffle(boxes, N, sizeof(sortable_bbox), 10); + for(i = 0; i < N/2; ++i){ + bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, class); + } + qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); + if(round <= 20) N = (N*9/10)/2*2; + + printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N); + } + char buff[256]; + sprintf(buff, "results/battle_%d.log", class); + FILE *outfp = fopen(buff, "w"); + for(i = 0; i < N; ++i){ + fprintf(outfp, "%s %f\n", boxes[i].filename, boxes[i].elos[class]); + } + fclose(outfp); + } + printf("Tournament in %d compares, %f secs\n", total_compares, sec(clock()-time)); +} + +void run_compare(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + //char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_compare(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_compare(cfg, weights); + else if(0==strcmp(argv[2], "sort")) SortMaster3000(cfg, weights); + else if(0==strcmp(argv[2], "battle")) BattleRoyaleWithCheese(cfg, weights); + /* + else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); + else if(0==strcmp(argv[2], "extract")) extract_boxes(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_recall(cfg, weights); + */ +} diff --git a/workloads/realworld/uvm/darknet/src/connected_layer.c b/workloads/realworld/uvm/darknet/src/connected_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..ec25b44d998661c4735cd9a8a86f2355a0ae0080 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/connected_layer.c @@ -0,0 +1,336 @@ +#include "connected_layer.h" +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam) +{ + int i; + layer l = {0}; + l.learning_rate_scale = 1; + l.type = CONNECTED; + + l.inputs = inputs; + l.outputs = outputs; + l.batch=batch; + l.batch_normalize = batch_normalize; + l.h = 1; + l.w = 1; + l.c = inputs; + l.out_h = 1; + l.out_w = 1; + l.out_c = outputs; + + l.output = calloc(batch*outputs, sizeof(float)); + l.delta = calloc(batch*outputs, sizeof(float)); + + l.weight_updates = calloc(inputs*outputs, sizeof(float)); + l.bias_updates = calloc(outputs, sizeof(float)); + + l.weights = calloc(outputs*inputs, sizeof(float)); + l.biases = calloc(outputs, sizeof(float)); + + l.forward = forward_connected_layer; + l.backward = backward_connected_layer; + l.update = update_connected_layer; + + //float scale = 1./sqrt(inputs); + float scale = sqrt(2./inputs); + for(i = 0; i < outputs*inputs; ++i){ + l.weights[i] = scale*rand_uniform(-1, 1); + } + + for(i = 0; i < outputs; ++i){ + l.biases[i] = 0; + } + + if(adam){ + l.m = calloc(l.inputs*l.outputs, sizeof(float)); + l.v = calloc(l.inputs*l.outputs, sizeof(float)); + l.bias_m = calloc(l.outputs, sizeof(float)); + l.scale_m = calloc(l.outputs, sizeof(float)); + l.bias_v = calloc(l.outputs, sizeof(float)); + l.scale_v = calloc(l.outputs, sizeof(float)); + } + if(batch_normalize){ + l.scales = calloc(outputs, sizeof(float)); + l.scale_updates = calloc(outputs, sizeof(float)); + for(i = 0; i < outputs; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(outputs, sizeof(float)); + l.mean_delta = calloc(outputs, sizeof(float)); + l.variance = calloc(outputs, sizeof(float)); + l.variance_delta = calloc(outputs, sizeof(float)); + + l.rolling_mean = calloc(outputs, sizeof(float)); + l.rolling_variance = calloc(outputs, sizeof(float)); + + l.x = calloc(batch*outputs, sizeof(float)); + l.x_norm = calloc(batch*outputs, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_connected_layer_gpu; + l.backward_gpu = backward_connected_layer_gpu; + l.update_gpu = update_connected_layer_gpu; + + l.weights_gpu = cuda_make_array(l.weights, outputs*inputs); + l.biases_gpu = cuda_make_array(l.biases, outputs); + + l.weight_updates_gpu = cuda_make_array(l.weight_updates, outputs*inputs); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, outputs); + + l.output_gpu = cuda_make_array(l.output, outputs*batch); + l.delta_gpu = cuda_make_array(l.delta, outputs*batch); + if (adam) { + l.m_gpu = cuda_make_array(0, inputs*outputs); + l.v_gpu = cuda_make_array(0, inputs*outputs); + l.bias_m_gpu = cuda_make_array(0, outputs); + l.bias_v_gpu = cuda_make_array(0, outputs); + l.scale_m_gpu = cuda_make_array(0, outputs); + l.scale_v_gpu = cuda_make_array(0, outputs); + } + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(l.mean, outputs); + l.variance_gpu = cuda_make_array(l.variance, outputs); + + l.rolling_mean_gpu = cuda_make_array(l.mean, outputs); + l.rolling_variance_gpu = cuda_make_array(l.variance, outputs); + + l.mean_delta_gpu = cuda_make_array(l.mean, outputs); + l.variance_delta_gpu = cuda_make_array(l.variance, outputs); + + l.scales_gpu = cuda_make_array(l.scales, outputs); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, outputs); + + l.x_gpu = cuda_make_array(l.output, l.batch*outputs); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*outputs); +#ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); +#endif + } +#endif + l.activation = activation; + fprintf(stderr, "connected %4d -> %4d\n", inputs, outputs); + return l; +} + +void update_connected_layer(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.outputs, momentum, l.bias_updates, 1); + + if(l.batch_normalize){ + axpy_cpu(l.outputs, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.outputs, momentum, l.scale_updates, 1); + } + + axpy_cpu(l.inputs*l.outputs, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(l.inputs*l.outputs, momentum, l.weight_updates, 1); +} + +void forward_connected_layer(layer l, network net) +{ + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + int m = l.batch; + int k = l.inputs; + int n = l.outputs; + float *a = net.input; + float *b = l.weights; + float *c = l.output; + gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); + if(l.batch_normalize){ + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.outputs, 1); + } + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_connected_layer(layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.outputs, 1); + } + + int m = l.outputs; + int k = l.batch; + int n = l.inputs; + float *a = l.delta; + float *b = net.input; + float *c = l.weight_updates; + gemm(1,0,m,n,k,1,a,m,b,n,1,c,n); + + m = l.batch; + k = l.outputs; + n = l.inputs; + + a = l.delta; + b = l.weights; + c = net.delta; + + if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); +} + + +void denormalize_connected_layer(layer l) +{ + int i, j; + for(i = 0; i < l.outputs; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .000001); + for(j = 0; j < l.inputs; ++j){ + l.weights[i*l.inputs + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + + +void statistics_connected_layer(layer l) +{ + if(l.batch_normalize){ + printf("Scales "); + print_statistics(l.scales, l.outputs); + /* + printf("Rolling Mean "); + print_statistics(l.rolling_mean, l.outputs); + printf("Rolling Variance "); + print_statistics(l.rolling_variance, l.outputs); + */ + } + printf("Biases "); + print_statistics(l.biases, l.outputs); + printf("Weights "); + print_statistics(l.weights, l.outputs); +} + +#ifdef GPU + +void pull_connected_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.inputs*l.outputs); + cuda_pull_array(l.biases_gpu, l.biases, l.outputs); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.outputs); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs); + } +} + +void push_connected_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.inputs*l.outputs); + cuda_push_array(l.biases_gpu, l.biases, l.outputs); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.outputs); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs); + } +} + +void update_connected_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.inputs*l.outputs, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.outputs, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.outputs, batch, a.t); + } + }else{ + axpy_gpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.outputs, momentum, l.bias_updates_gpu, 1); + + if(l.batch_normalize){ + axpy_gpu(l.outputs, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.outputs, momentum, l.scale_updates_gpu, 1); + } + + axpy_gpu(l.inputs*l.outputs, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.inputs*l.outputs, momentum, l.weight_updates_gpu, 1); + } +} + +void forward_connected_layer_gpu(layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + + int m = l.batch; + int k = l.inputs; + int n = l.outputs; + float * a = net.input_gpu; + float * b = l.weights_gpu; + float * c = l.output_gpu; + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.outputs, 1); + } + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_connected_layer_gpu(layer l, network net) +{ + constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.outputs, 1); + } + + int m = l.outputs; + int k = l.batch; + int n = l.inputs; + float * a = l.delta_gpu; + float * b = net.input_gpu; + float * c = l.weight_updates_gpu; + gemm_gpu(1,0,m,n,k,1,a,m,b,n,1,c,n); + + m = l.batch; + k = l.outputs; + n = l.inputs; + + a = l.delta_gpu; + b = l.weights_gpu; + c = net.delta_gpu; + + if(c) gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); +} +#endif diff --git a/workloads/realworld/uvm/darknet/src/connected_layer.h b/workloads/realworld/uvm/darknet/src/connected_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..6727a964eaa923906b202ff337aa69ad91817117 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/connected_layer.h @@ -0,0 +1,23 @@ +#ifndef CONNECTED_LAYER_H +#define CONNECTED_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam); + +void forward_connected_layer(layer l, network net); +void backward_connected_layer(layer l, network net); +void update_connected_layer(layer l, update_args a); + +#ifdef GPU +void forward_connected_layer_gpu(layer l, network net); +void backward_connected_layer_gpu(layer l, network net); +void update_connected_layer_gpu(layer l, update_args a); +void push_connected_layer(layer l); +void pull_connected_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/uvm/darknet/src/convolutional_kernels.cu b/workloads/realworld/uvm/darknet/src/convolutional_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..ed9d76e49548e4636c545d4e5d33ddc9b63e5905 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/convolutional_kernels.cu @@ -0,0 +1,330 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "gemm.h" +#include "blas.h" +#include "im2col.h" +#include "col2im.h" +#include "utils.h" +#include "cuda_dark.h" +} + +__global__ void binarize_kernel(float *x, int n, float *binary) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= n) return; + binary[i] = (x[i] >= 0) ? 1 : -1; +} + +void binarize_gpu(float *x, int n, float *binary) +{ + binarize_kernel<<>>(x, n, binary); + check_error(cudaPeekAtLastError()); +} + +__global__ void binarize_input_kernel(float *input, int n, int size, float *binary) +{ + int s = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (s >= size) return; + int i = 0; + float mean = 0; + for(i = 0; i < n; ++i){ + mean += fabsf(input[i*size + s]); + } + mean = mean / n; + for(i = 0; i < n; ++i){ + binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean; + } +} + +void binarize_input_gpu(float *input, int n, int size, float *binary) +{ + binarize_input_kernel<<>>(input, n, size, binary); + check_error(cudaPeekAtLastError()); +} + + +__global__ void binarize_weights_kernel(float *weights, int n, int size, float *binary) +{ + int f = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (f >= n) return; + int i = 0; + float mean = 0; + for(i = 0; i < size; ++i){ + mean += fabsf(weights[f*size + i]); + } + mean = mean / size; + for(i = 0; i < size; ++i){ + binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean; + //binary[f*size + i] = weights[f*size + i]; + } +} + +void binarize_weights_gpu(float *weights, int n, int size, float *binary) +{ + binarize_weights_kernel<<>>(weights, n, size, binary); + check_error(cudaPeekAtLastError()); +} + +void forward_convolutional_layer_gpu(convolutional_layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + if(l.binary){ + binarize_weights_gpu(l.weights_gpu, l.n, l.c/l.groups*l.size*l.size, l.binary_weights_gpu); + swap_binary(&l); + } + + if(l.xnor){ + binarize_weights_gpu(l.weights_gpu, l.n, l.c/l.groups*l.size*l.size, l.binary_weights_gpu); + swap_binary(&l); + binarize_gpu(net.input_gpu, l.c*l.h*l.w*l.batch, l.binary_input_gpu); + net.input_gpu = l.binary_input_gpu; + } + +#ifdef CUDNN + float one = 1; + cudnnConvolutionForward(cudnn_handle(), + &one, + l.srcTensorDesc, + net.input_gpu, + l.weightDesc, + l.weights_gpu, + l.convDesc, + l.fw_algo, + net.workspace, + l.workspace_size, + &one, + l.dstTensorDesc, + l.output_gpu); + +#else + int i, j; + int m = l.n/l.groups; + int k = l.size*l.size*l.c/l.groups; + int n = l.out_w*l.out_h; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.weights_gpu + j*l.nweights/l.groups; + float *b = net.workspace; + float *c = l.output_gpu + (i*l.groups + j)*n*m; + float *im = net.input_gpu + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if (l.size == 1){ + b = im; + } else { + im2col_gpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + } + gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +#endif + + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); + } + + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); + //if(l.dot > 0) dot_error_gpu(l); + if(l.binary || l.xnor) swap_binary(&l); +} + +__global__ void smooth_kernel(float *x, int n, int w, int h, int c, int size, float rate, float *delta) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int j = id % w; + id /= w; + int i = id % h; + id /= h; + int k = id % c; + id /= c; + int b = id; + + int w_offset = -(size/2.f); + int h_offset = -(size/2.f); + + int out_index = j + w*(i + h*(k + c*b)); + int l, m; + for(l = 0; l < size; ++l){ + for(m = 0; m < size; ++m){ + int cur_h = h_offset + i + l; + int cur_w = w_offset + j + m; + int index = cur_w + w*(cur_h + h*(k + b*c)); + int valid = (cur_h >= 0 && cur_h < h && + cur_w >= 0 && cur_w < w); + delta[out_index] += valid ? rate*(x[index] - x[out_index]) : 0; + } + } +} + +extern "C" void smooth_layer(layer l, int size, float rate) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.out_c; + + size_t n = h*w*c*l.batch; + + smooth_kernel<<>>(l.output_gpu, n, l.w, l.h, l.c, size, rate, l.delta_gpu); + check_error(cudaPeekAtLastError()); +} + +void backward_convolutional_layer_gpu(convolutional_layer l, network net) +{ + if(l.smooth){ + smooth_layer(l, 5, l.smooth); + } + //constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + + + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); + } + float *original_input = net.input_gpu; + + if(l.xnor) net.input_gpu = l.binary_input_gpu; +#ifdef CUDNN + float one = 1; + cudnnConvolutionBackwardFilter(cudnn_handle(), + &one, + l.srcTensorDesc, + net.input_gpu, + l.ddstTensorDesc, + l.delta_gpu, + l.convDesc, + l.bf_algo, + net.workspace, + l.workspace_size, + &one, + l.dweightDesc, + l.weight_updates_gpu); + + if(net.delta_gpu){ + if(l.binary || l.xnor) swap_binary(&l); + cudnnConvolutionBackwardData(cudnn_handle(), + &one, + l.weightDesc, + l.weights_gpu, + l.ddstTensorDesc, + l.delta_gpu, + l.convDesc, + l.bd_algo, + net.workspace, + l.workspace_size, + &one, + l.dsrcTensorDesc, + net.delta_gpu); + if(l.binary || l.xnor) swap_binary(&l); + if(l.xnor) gradient_array_gpu(original_input, l.batch*l.c*l.h*l.w, HARDTAN, net.delta_gpu); + } + +#else + int m = l.n/l.groups; + int n = l.size*l.size*l.c/l.groups; + int k = l.out_w*l.out_h; + + int i, j; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.delta_gpu + (i*l.groups + j)*m*k; + float *b = net.workspace; + float *c = l.weight_updates_gpu + j*l.nweights/l.groups; + + float *im = net.input_gpu+(i*l.groups + j)*l.c/l.groups*l.h*l.w; + float *imd = net.delta_gpu+(i*l.groups + j)*l.c/l.groups*l.h*l.w; + + im2col_gpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (net.delta_gpu) { + if (l.binary || l.xnor) swap_binary(&l); + a = l.weights_gpu + j*l.nweights/l.groups; + b = l.delta_gpu + (i*l.groups + j)*m*k; + c = net.workspace; + if (l.size == 1) { + c = imd; + } + + gemm_gpu(1,0,n,k,m,1,a,n,b,k,0,c,k); + + if (l.size != 1) { + col2im_gpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd); + } + if(l.binary || l.xnor) { + swap_binary(&l); + } + } + if(l.xnor) gradient_array_gpu(original_input + i*l.c*l.h*l.w, l.c*l.h*l.w, HARDTAN, net.delta_gpu + i*l.c*l.h*l.w); + } + } +#endif +} + +void pull_convolutional_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.nweights); + cuda_pull_array(l.biases_gpu, l.biases, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.n); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void push_convolutional_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.nweights); + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.n); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void update_convolutional_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + } + }else{ + axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1); + + axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.n, momentum, l.bias_updates_gpu, 1); + + if(l.scales_gpu){ + axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.n, momentum, l.scale_updates_gpu, 1); + } + } + if(l.clip){ + constrain_gpu(l.nweights, l.clip, l.weights_gpu, 1); + } +} + + diff --git a/workloads/realworld/uvm/darknet/src/convolutional_layer.c b/workloads/realworld/uvm/darknet/src/convolutional_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..1fb58b0933b06f2b27ec89f9f7c05f0b2b8a87eb --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/convolutional_layer.c @@ -0,0 +1,622 @@ +#include "convolutional_layer.h" +#include "utils.h" +#include "batchnorm_layer.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" +#include +#include + +#ifdef AI2 +#include "xnor_layer.h" +#endif + +void swap_binary(convolutional_layer *l) +{ + float *swap = l->weights; + l->weights = l->binary_weights; + l->binary_weights = swap; + +#ifdef GPU + swap = l->weights_gpu; + l->weights_gpu = l->binary_weights_gpu; + l->binary_weights_gpu = swap; +#endif +} + +void binarize_weights(float *weights, int n, int size, float *binary) +{ + int i, f; + for(f = 0; f < n; ++f){ + float mean = 0; + for(i = 0; i < size; ++i){ + mean += fabs(weights[f*size + i]); + } + mean = mean / size; + for(i = 0; i < size; ++i){ + binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean; + } + } +} + +void binarize_cpu(float *input, int n, float *binary) +{ + int i; + for(i = 0; i < n; ++i){ + binary[i] = (input[i] > 0) ? 1 : -1; + } +} + +void binarize_input(float *input, int n, int size, float *binary) +{ + int i, s; + for(s = 0; s < size; ++s){ + float mean = 0; + for(i = 0; i < n; ++i){ + mean += fabs(input[i*size + s]); + } + mean = mean / n; + for(i = 0; i < n; ++i){ + binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean; + } + } +} + +int convolutional_out_height(convolutional_layer l) +{ + return (l.h + 2*l.pad - l.size) / l.stride + 1; +} + +int convolutional_out_width(convolutional_layer l) +{ + return (l.w + 2*l.pad - l.size) / l.stride + 1; +} + +image get_convolutional_image(convolutional_layer l) +{ + return float_to_image(l.out_w,l.out_h,l.out_c,l.output); +} + +image get_convolutional_delta(convolutional_layer l) +{ + return float_to_image(l.out_w,l.out_h,l.out_c,l.delta); +} + +static size_t get_workspace_size(layer l){ +#ifdef CUDNN + if(gpu_index >= 0){ + size_t most = 0; + size_t s = 0; + cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle(), + l.srcTensorDesc, + l.weightDesc, + l.convDesc, + l.dstTensorDesc, + l.fw_algo, + &s); + if (s > most) most = s; + cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnn_handle(), + l.srcTensorDesc, + l.ddstTensorDesc, + l.convDesc, + l.dweightDesc, + l.bf_algo, + &s); + if (s > most) most = s; + cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(), + l.weightDesc, + l.ddstTensorDesc, + l.convDesc, + l.dsrcTensorDesc, + l.bd_algo, + &s); + if (s > most) most = s; + return most; + } +#endif + return (size_t)l.out_h*l.out_w*l.size*l.size*l.c/l.groups*sizeof(float); +} + +#ifdef GPU +#ifdef CUDNN +void cudnn_convolutional_setup(layer *l) +{ + cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); + cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + + cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + + cudnnSetFilter4dDescriptor(l->dweightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); + cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); + #if CUDNN_MAJOR >= 6 + cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT); + #else + cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION); + #endif + + #if CUDNN_MAJOR >= 7 + cudnnSetConvolutionGroupCount(l->convDesc, l->groups); + #else + if(l->groups > 1){ + error("CUDNN < 7 doesn't support groups, please upgrade!"); + } + #endif + + cudnnGetConvolutionForwardAlgorithm(cudnn_handle(), + l->srcTensorDesc, + l->weightDesc, + l->convDesc, + l->dstTensorDesc, + CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->fw_algo); + cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(), + l->weightDesc, + l->ddstTensorDesc, + l->convDesc, + l->dsrcTensorDesc, + CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->bd_algo); + cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(), + l->srcTensorDesc, + l->ddstTensorDesc, + l->convDesc, + l->dweightDesc, + CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->bf_algo); +} +#endif +#endif + +convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam) +{ + int i; + convolutional_layer l = {0}; + l.type = CONVOLUTIONAL; + + l.groups = groups; + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.binary = binary; + l.xnor = xnor; + l.batch = batch; + l.stride = stride; + l.size = size; + l.pad = padding; + l.batch_normalize = batch_normalize; + + l.weights = calloc(c/groups*n*size*size, sizeof(float)); + l.weight_updates = calloc(c/groups*n*size*size, sizeof(float)); + + l.biases = calloc(n, sizeof(float)); + l.bias_updates = calloc(n, sizeof(float)); + + l.nweights = c/groups*n*size*size; + l.nbiases = n; + + // float scale = 1./sqrt(size*size*c); + float scale = sqrt(2./(size*size*c/l.groups)); + //printf("convscale %f\n", scale); + //scale = .02; + //for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1); + for(i = 0; i < l.nweights; ++i) l.weights[i] = scale*rand_normal(); + int out_w = convolutional_out_width(l); + int out_h = convolutional_out_height(l); + l.out_h = out_h; + l.out_w = out_w; + l.out_c = n; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = l.w * l.h * l.c; + + l.output = calloc(l.batch*l.outputs, sizeof(float)); + l.delta = calloc(l.batch*l.outputs, sizeof(float)); + + l.forward = forward_convolutional_layer; + l.backward = backward_convolutional_layer; + l.update = update_convolutional_layer; + if(binary){ + l.binary_weights = calloc(l.nweights, sizeof(float)); + l.cweights = calloc(l.nweights, sizeof(char)); + l.scales = calloc(n, sizeof(float)); + } + if(xnor){ + l.binary_weights = calloc(l.nweights, sizeof(float)); + l.binary_input = calloc(l.inputs*l.batch, sizeof(float)); + } + + if(batch_normalize){ + l.scales = calloc(n, sizeof(float)); + l.scale_updates = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(n, sizeof(float)); + l.variance = calloc(n, sizeof(float)); + + l.mean_delta = calloc(n, sizeof(float)); + l.variance_delta = calloc(n, sizeof(float)); + + l.rolling_mean = calloc(n, sizeof(float)); + l.rolling_variance = calloc(n, sizeof(float)); + l.x = calloc(l.batch*l.outputs, sizeof(float)); + l.x_norm = calloc(l.batch*l.outputs, sizeof(float)); + } + if(adam){ + l.m = calloc(l.nweights, sizeof(float)); + l.v = calloc(l.nweights, sizeof(float)); + l.bias_m = calloc(n, sizeof(float)); + l.scale_m = calloc(n, sizeof(float)); + l.bias_v = calloc(n, sizeof(float)); + l.scale_v = calloc(n, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_convolutional_layer_gpu; + l.backward_gpu = backward_convolutional_layer_gpu; + l.update_gpu = update_convolutional_layer_gpu; + + if(gpu_index >= 0){ + if (adam) { + l.m_gpu = cuda_make_array(l.m, l.nweights); + l.v_gpu = cuda_make_array(l.v, l.nweights); + l.bias_m_gpu = cuda_make_array(l.bias_m, n); + l.bias_v_gpu = cuda_make_array(l.bias_v, n); + l.scale_m_gpu = cuda_make_array(l.scale_m, n); + l.scale_v_gpu = cuda_make_array(l.scale_v, n); + } + + l.weights_gpu = cuda_make_array(l.weights, l.nweights); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights); + + l.biases_gpu = cuda_make_array(l.biases, n); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + + if(binary){ + l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights); + } + if(xnor){ + l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights); + l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch); + } + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(l.mean, n); + l.variance_gpu = cuda_make_array(l.variance, n); + + l.rolling_mean_gpu = cuda_make_array(l.mean, n); + l.rolling_variance_gpu = cuda_make_array(l.variance, n); + + l.mean_delta_gpu = cuda_make_array(l.mean, n); + l.variance_delta_gpu = cuda_make_array(l.variance, n); + + l.scales_gpu = cuda_make_array(l.scales, n); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); + + l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + } +#ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.srcTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnCreateFilterDescriptor(&l.weightDesc); + cudnnCreateTensorDescriptor(&l.dsrcTensorDesc); + cudnnCreateTensorDescriptor(&l.ddstTensorDesc); + cudnnCreateFilterDescriptor(&l.dweightDesc); + cudnnCreateConvolutionDescriptor(&l.convDesc); + cudnn_convolutional_setup(&l); +#endif + } +#endif + l.workspace_size = get_workspace_size(l); + l.activation = activation; + + fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BFLOPs\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, (2.0 * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w)/1000000000.); + + return l; +} + +void denormalize_convolutional_layer(convolutional_layer l) +{ + int i, j; + for(i = 0; i < l.n; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001); + for(j = 0; j < l.c/l.groups*l.size*l.size; ++j){ + l.weights[i*l.c/l.groups*l.size*l.size + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + +/* +void test_convolutional_layer() +{ + convolutional_layer l = make_convolutional_layer(1, 5, 5, 3, 2, 5, 2, 1, LEAKY, 1, 0, 0, 0); + l.batch_normalize = 1; + float data[] = {1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3}; + //net.input = data; + //forward_convolutional_layer(l); +} +*/ + +void resize_convolutional_layer(convolutional_layer *l, int w, int h) +{ + l->w = w; + l->h = h; + int out_w = convolutional_out_width(*l); + int out_h = convolutional_out_height(*l); + + l->out_w = out_w; + l->out_h = out_h; + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->w * l->h * l->c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + if(l->batch_normalize){ + l->x = realloc(l->x, l->batch*l->outputs*sizeof(float)); + l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float)); + } + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); + + if(l->batch_normalize){ + cuda_free(l->x_gpu); + cuda_free(l->x_norm_gpu); + + l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); + } +#ifdef CUDNN + cudnn_convolutional_setup(l); +#endif +#endif + l->workspace_size = get_workspace_size(*l); +} + +void add_bias(float *output, float *biases, int batch, int n, int size) +{ + int i,j,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + for(j = 0; j < size; ++j){ + output[(b*n + i)*size + j] += biases[i]; + } + } + } +} + +void scale_bias(float *output, float *scales, int batch, int n, int size) +{ + int i,j,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + for(j = 0; j < size; ++j){ + output[(b*n + i)*size + j] *= scales[i]; + } + } + } +} + +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size) +{ + int i,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + bias_updates[i] += sum_array(delta+size*(i+b*n), size); + } + } +} + +void forward_convolutional_layer(convolutional_layer l, network net) +{ + int i, j; + + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + + if(l.xnor){ + binarize_weights(l.weights, l.n, l.c/l.groups*l.size*l.size, l.binary_weights); + swap_binary(&l); + binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input); + net.input = l.binary_input; + } + + int m = l.n/l.groups; + int k = l.size*l.size*l.c/l.groups; + int n = l.out_w*l.out_h; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.weights + j*l.nweights/l.groups; + float *b = net.workspace; + float *c = l.output + (i*l.groups + j)*n*m; + float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if (l.size == 1) { + b = im; + } else { + im2col_cpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + } + gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } + + if(l.batch_normalize){ + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w); + } + + activate_array(l.output, l.outputs*l.batch, l.activation); + if(l.binary || l.xnor) swap_binary(&l); +} + +void backward_convolutional_layer(convolutional_layer l, network net) +{ + int i, j; + int m = l.n/l.groups; + int n = l.size*l.size*l.c/l.groups; + int k = l.out_w*l.out_h; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.n, k); + } + + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.delta + (i*l.groups + j)*m*k; + float *b = net.workspace; + float *c = l.weight_updates + j*l.nweights/l.groups; + + float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + float *imd = net.delta + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if(l.size == 1){ + b = im; + } else { + im2col_cpu(im, l.c/l.groups, l.h, l.w, + l.size, l.stride, l.pad, b); + } + + gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (net.delta) { + a = l.weights + j*l.nweights/l.groups; + b = l.delta + (i*l.groups + j)*m*k; + c = net.workspace; + if (l.size == 1) { + c = imd; + } + + gemm(1,0,n,k,m,1,a,n,b,k,0,c,k); + + if (l.size != 1) { + col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd); + } + } + } + } +} + +void update_convolutional_layer(convolutional_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.n, momentum, l.bias_updates, 1); + + if(l.scales){ + axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.n, momentum, l.scale_updates, 1); + } + + axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(l.nweights, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(l.nweights, momentum, l.weight_updates, 1); +} + + +image get_convolutional_weight(convolutional_layer l, int i) +{ + int h = l.size; + int w = l.size; + int c = l.c/l.groups; + return float_to_image(w,h,c,l.weights+i*h*w*c); +} + +void rgbgr_weights(convolutional_layer l) +{ + int i; + for(i = 0; i < l.n; ++i){ + image im = get_convolutional_weight(l, i); + if (im.c == 3) { + rgbgr_image(im); + } + } +} + +void rescale_weights(convolutional_layer l, float scale, float trans) +{ + int i; + for(i = 0; i < l.n; ++i){ + image im = get_convolutional_weight(l, i); + if (im.c == 3) { + scale_image(im, scale); + float sum = sum_array(im.data, im.w*im.h*im.c); + l.biases[i] += sum*trans; + } + } +} + +image *get_weights(convolutional_layer l) +{ + image *weights = calloc(l.n, sizeof(image)); + int i; + for(i = 0; i < l.n; ++i){ + weights[i] = copy_image(get_convolutional_weight(l, i)); + normalize_image(weights[i]); + /* + char buff[256]; + sprintf(buff, "filter%d", i); + save_image(weights[i], buff); + */ + } + //error("hey"); + return weights; +} + +image *visualize_convolutional_layer(convolutional_layer l, char *window, image *prev_weights) +{ + image *single_weights = get_weights(l); + show_images(single_weights, l.n, window); + + image delta = get_convolutional_image(l); + image dc = collapse_image_layers(delta, 1); + char buff[256]; + sprintf(buff, "%s: Output", window); + //show_image(dc, buff); + //save_image(dc, buff); + free_image(dc); + return single_weights; +} + diff --git a/workloads/realworld/uvm/darknet/src/convolutional_layer.h b/workloads/realworld/uvm/darknet/src/convolutional_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..baacf38f4127a42abe009ef8aa3b59543433a286 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/convolutional_layer.h @@ -0,0 +1,50 @@ +#ifndef CONVOLUTIONAL_LAYER_H +#define CONVOLUTIONAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +typedef layer convolutional_layer; + +#ifdef GPU +void forward_convolutional_layer_gpu(convolutional_layer layer, network net); +void backward_convolutional_layer_gpu(convolutional_layer layer, network net); +void update_convolutional_layer_gpu(convolutional_layer layer, update_args a); + +void push_convolutional_layer(convolutional_layer layer); +void pull_convolutional_layer(convolutional_layer layer); + +void add_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); +void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t); +#ifdef CUDNN +void cudnn_convolutional_setup(layer *l); +#endif +#endif + +convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam); +void resize_convolutional_layer(convolutional_layer *layer, int w, int h); +void forward_convolutional_layer(const convolutional_layer layer, network net); +void update_convolutional_layer(convolutional_layer layer, update_args a); +image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_weights); +void binarize_weights(float *weights, int n, int size, float *binary); +void swap_binary(convolutional_layer *l); +void binarize_weights2(float *weights, int n, int size, char *binary, float *scales); + +void backward_convolutional_layer(convolutional_layer layer, network net); + +void add_bias(float *output, float *biases, int batch, int n, int size); +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); + +image get_convolutional_image(convolutional_layer layer); +image get_convolutional_delta(convolutional_layer layer); +image get_convolutional_weight(convolutional_layer layer, int i); + +int convolutional_out_height(convolutional_layer layer); +int convolutional_out_width(convolutional_layer layer); + +#endif + diff --git a/workloads/realworld/uvm/darknet/src/cost_layer.c b/workloads/realworld/uvm/darknet/src/cost_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..85fa85daf306dda03c113a6bbdc2d92b25d0b00d --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/cost_layer.c @@ -0,0 +1,176 @@ +#include "cost_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include +#include +#include +#include + +COST_TYPE get_cost_type(char *s) +{ + if (strcmp(s, "seg")==0) return SEG; + if (strcmp(s, "sse")==0) return SSE; + if (strcmp(s, "masked")==0) return MASKED; + if (strcmp(s, "smooth")==0) return SMOOTH; + if (strcmp(s, "L1")==0) return L1; + if (strcmp(s, "wgan")==0) return WGAN; + fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s); + return SSE; +} + +char *get_cost_string(COST_TYPE a) +{ + switch(a){ + case SEG: + return "seg"; + case SSE: + return "sse"; + case MASKED: + return "masked"; + case SMOOTH: + return "smooth"; + case L1: + return "L1"; + case WGAN: + return "wgan"; + } + return "sse"; +} + +cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale) +{ + fprintf(stderr, "cost %4d\n", inputs); + cost_layer l = {0}; + l.type = COST; + + l.scale = scale; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.cost_type = cost_type; + l.delta = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_cost_layer; + l.backward = backward_cost_layer; + #ifdef GPU + l.forward_gpu = forward_cost_layer_gpu; + l.backward_gpu = backward_cost_layer_gpu; + + l.delta_gpu = cuda_make_array(l.output, inputs*batch); + l.output_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void resize_cost_layer(cost_layer *l, int inputs) +{ + l->inputs = inputs; + l->outputs = inputs; + l->delta = realloc(l->delta, inputs*l->batch*sizeof(float)); + l->output = realloc(l->output, inputs*l->batch*sizeof(float)); +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch); + l->output_gpu = cuda_make_array(l->output, inputs*l->batch); +#endif +} + +void forward_cost_layer(cost_layer l, network net) +{ + if (!net.truth) return; + if(l.cost_type == MASKED){ + int i; + for(i = 0; i < l.batch*l.inputs; ++i){ + if(net.truth[i] == SECRET_NUM) net.input[i] = SECRET_NUM; + } + } + if(l.cost_type == SMOOTH){ + smooth_l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + }else if(l.cost_type == L1){ + l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + } else { + l2_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + } + l.cost[0] = sum_array(l.output, l.batch*l.inputs); +} + +void backward_cost_layer(const cost_layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_cost_layer(cost_layer l) +{ + cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +void push_cost_layer(cost_layer l) +{ + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +int float_abs_compare (const void * a, const void * b) +{ + float fa = *(const float*) a; + if(fa < 0) fa = -fa; + float fb = *(const float*) b; + if(fb < 0) fb = -fb; + return (fa > fb) - (fa < fb); +} + +void forward_cost_layer_gpu(cost_layer l, network net) +{ + if (!net.truth) return; + if(l.smooth){ + scal_gpu(l.batch*l.inputs, (1-l.smooth), net.truth_gpu, 1); + add_gpu(l.batch*l.inputs, l.smooth * 1./l.inputs, net.truth_gpu, 1); + } + + if(l.cost_type == SMOOTH){ + smooth_l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else if (l.cost_type == L1){ + l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else if (l.cost_type == WGAN){ + wgan_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else { + l2_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } + + if (l.cost_type == SEG && l.noobject_scale != 1) { + scale_mask_gpu(l.batch*l.inputs, l.delta_gpu, 0, net.truth_gpu, l.noobject_scale); + scale_mask_gpu(l.batch*l.inputs, l.output_gpu, 0, net.truth_gpu, l.noobject_scale); + } + if (l.cost_type == MASKED) { + mask_gpu(l.batch*l.inputs, net.delta_gpu, SECRET_NUM, net.truth_gpu, 0); + } + + if(l.ratio){ + cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); + qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare); + int n = (1-l.ratio) * l.batch*l.inputs; + float thresh = l.delta[n]; + thresh = 0; + printf("%f\n", thresh); + supp_gpu(l.batch*l.inputs, thresh, l.delta_gpu, 1); + } + + if(l.thresh){ + supp_gpu(l.batch*l.inputs, l.thresh*1./l.inputs, l.delta_gpu, 1); + } + + cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs); + l.cost[0] = sum_array(l.output, l.batch*l.inputs); +} + +void backward_cost_layer_gpu(const cost_layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/uvm/darknet/src/cost_layer.h b/workloads/realworld/uvm/darknet/src/cost_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..ceb64de00bf66839c2f34852a05ea71114608a35 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/cost_layer.h @@ -0,0 +1,20 @@ +#ifndef COST_LAYER_H +#define COST_LAYER_H +#include "layer.h" +#include "network.h" + +typedef layer cost_layer; + +COST_TYPE get_cost_type(char *s); +char *get_cost_string(COST_TYPE a); +cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale); +void forward_cost_layer(const cost_layer l, network net); +void backward_cost_layer(const cost_layer l, network net); +void resize_cost_layer(cost_layer *l, int inputs); + +#ifdef GPU +void forward_cost_layer_gpu(cost_layer l, network net); +void backward_cost_layer_gpu(const cost_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm/darknet/src/cpu_timestamps.c b/workloads/realworld/uvm/darknet/src/cpu_timestamps.c new file mode 100644 index 0000000000000000000000000000000000000000..35114479c7a9cce3debe2204b6886ad5528041d5 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/cpu_timestamps.c @@ -0,0 +1,20 @@ +#include "cpu_timestamps.h" + +void startCPU() { + struct timespec tv; + if(clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + startCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); +} + + + +void endCPU() { + struct timespec tv; + if(clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + + endCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); + //endCPUTimestamp1 = std::chrono::system_clock::now(); + printf("CPU_Times,%lu,%lu,%lu\n", startCPUTime, endCPUTime, endCPUTime-startCPUTime); +} diff --git a/workloads/realworld/uvm/darknet/src/cpu_timestamps.h b/workloads/realworld/uvm/darknet/src/cpu_timestamps.h new file mode 100644 index 0000000000000000000000000000000000000000..e53e995a5603b4610759c02a4a179eb9f0124e48 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/cpu_timestamps.h @@ -0,0 +1,21 @@ +#ifndef CPU_TIMESTAMP_ +#define CPU_TIMESTAMP_ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +static uint64_t startCPUTime; +static uint64_t endCPUTime; + +void startCPU(); +void endCPU(); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/workloads/realworld/uvm/darknet/src/crnn_layer.c b/workloads/realworld/uvm/darknet/src/crnn_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..159e17f92d45693461c92d482bf3aa7354a148d8 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/crnn_layer.c @@ -0,0 +1,283 @@ +#include "crnn_layer.h" +#include "convolutional_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize) +{ + fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = CRNN; + l.steps = steps; + l.h = h; + l.w = w; + l.c = c; + l.out_h = h; + l.out_w = w; + l.out_c = output_filters; + l.inputs = h*w*c; + l.hidden = h * w * hidden_filters; + l.outputs = l.out_h * l.out_w * l.out_c; + + l.state = calloc(l.hidden*batch*(steps+1), sizeof(float)); + + l.input_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.input_layer) = make_convolutional_layer(batch*steps, h, w, c, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.input_layer->batch = batch; + + l.self_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.self_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.self_layer->batch = batch; + + l.output_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.output_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, output_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.output_layer->batch = batch; + + l.output = l.output_layer->output; + l.delta = l.output_layer->delta; + + l.forward = forward_crnn_layer; + l.backward = backward_crnn_layer; + l.update = update_crnn_layer; + +#ifdef GPU + l.forward_gpu = forward_crnn_layer_gpu; + l.backward_gpu = backward_crnn_layer_gpu; + l.update_gpu = update_crnn_layer_gpu; + + l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1)); + l.output_gpu = l.output_layer->output_gpu; + l.delta_gpu = l.output_layer->delta_gpu; +#endif + + return l; +} + +void update_crnn_layer(layer l, update_args a) +{ + update_convolutional_layer(*(l.input_layer), a); + update_convolutional_layer(*(l.self_layer), a); + update_convolutional_layer(*(l.output_layer), a); +} + +void forward_crnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1); + fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1); + fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1); + if(net.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1); + + for (i = 0; i < l.steps; ++i) { + s.input = net.input; + forward_convolutional_layer(input_layer, s); + + s.input = l.state; + forward_convolutional_layer(self_layer, s); + + float *old_state = l.state; + if(net.train) l.state += l.hidden*l.batch; + if(l.shortcut){ + copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1); + }else{ + fill_cpu(l.hidden * l.batch, 0, l.state, 1); + } + axpy_cpu(l.hidden * l.batch, 1, input_layer.output, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + forward_convolutional_layer(output_layer, s); + + net.input += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_crnn_layer(layer l, network net) +{ + network s = net; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + increment_layer(&input_layer, l.steps-1); + increment_layer(&self_layer, l.steps-1); + increment_layer(&output_layer, l.steps-1); + + l.state += l.hidden*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + s.delta = self_layer.delta; + backward_convolutional_layer(output_layer, s); + + l.state -= l.hidden*l.batch; + /* + if(i > 0){ + copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output - l.hidden*l.batch, 1, l.state, 1); + }else{ + fill_cpu(l.hidden * l.batch, 0, l.state, 1); + } + */ + + s.input = l.state; + s.delta = self_layer.delta - l.hidden*l.batch; + if (i == 0) s.delta = 0; + backward_convolutional_layer(self_layer, s); + + copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1); + if (i > 0 && l.shortcut) axpy_cpu(l.hidden*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden*l.batch, 1); + s.input = net.input + i*l.inputs*l.batch; + if(net.delta) s.delta = net.delta + i*l.inputs*l.batch; + else s.delta = 0; + backward_convolutional_layer(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} + +#ifdef GPU + +void pull_crnn_layer(layer l) +{ + pull_convolutional_layer(*(l.input_layer)); + pull_convolutional_layer(*(l.self_layer)); + pull_convolutional_layer(*(l.output_layer)); +} + +void push_crnn_layer(layer l) +{ + push_convolutional_layer(*(l.input_layer)); + push_convolutional_layer(*(l.self_layer)); + push_convolutional_layer(*(l.output_layer)); +} + +void update_crnn_layer_gpu(layer l, update_args a) +{ + update_convolutional_layer_gpu(*(l.input_layer), a); + update_convolutional_layer_gpu(*(l.self_layer), a); + update_convolutional_layer_gpu(*(l.output_layer), a); +} + +void forward_crnn_layer_gpu(layer l, network net) +{ + network s = net; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_gpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); + fill_gpu(l.hidden * l.batch * l.steps, 0, self_layer.delta_gpu, 1); + fill_gpu(l.hidden * l.batch * l.steps, 0, input_layer.delta_gpu, 1); + if(net.train) fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1); + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = net.input_gpu; + forward_convolutional_layer_gpu(input_layer, s); + + s.input_gpu = l.state_gpu; + forward_convolutional_layer_gpu(self_layer, s); + + float *old_state = l.state_gpu; + if(net.train) l.state_gpu += l.hidden*l.batch; + if(l.shortcut){ + copy_gpu(l.hidden * l.batch, old_state, 1, l.state_gpu, 1); + }else{ + fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1); + } + axpy_gpu(l.hidden * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + forward_convolutional_layer_gpu(output_layer, s); + + net.input_gpu += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_crnn_layer_gpu(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + increment_layer(&input_layer, l.steps - 1); + increment_layer(&self_layer, l.steps - 1); + increment_layer(&output_layer, l.steps - 1); + l.state_gpu += l.hidden*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_gpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu; + backward_convolutional_layer_gpu(output_layer, s); + + l.state_gpu -= l.hidden*l.batch; + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu - l.hidden*l.batch; + if (i == 0) s.delta_gpu = 0; + backward_convolutional_layer_gpu(self_layer, s); + + copy_gpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); + if (i > 0 && l.shortcut) axpy_gpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1); + s.input_gpu = net.input_gpu + i*l.inputs*l.batch; + if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch; + else s.delta_gpu = 0; + backward_convolutional_layer_gpu(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} +#endif diff --git a/workloads/realworld/uvm/darknet/src/crnn_layer.h b/workloads/realworld/uvm/darknet/src/crnn_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..515f378354e9cc6149e7a1ac60ffc86ace112991 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/crnn_layer.h @@ -0,0 +1,24 @@ + +#ifndef CRNN_LAYER_H +#define CRNN_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize); + +void forward_crnn_layer(layer l, network net); +void backward_crnn_layer(layer l, network net); +void update_crnn_layer(layer l, update_args a); + +#ifdef GPU +void forward_crnn_layer_gpu(layer l, network net); +void backward_crnn_layer_gpu(layer l, network net); +void update_crnn_layer_gpu(layer l, update_args a); +void push_crnn_layer(layer l); +void pull_crnn_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/uvm/darknet/src/crop_layer.c b/workloads/realworld/uvm/darknet/src/crop_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..3c8c9650bda6dcf4485ce8da8e2fa1984f2b244d --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/crop_layer.c @@ -0,0 +1,103 @@ +#include "crop_layer.h" +#include "cuda_dark.h" +#include + +image get_crop_image(crop_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.out_c; + return float_to_image(w,h,c,l.output); +} + +void backward_crop_layer(const crop_layer l, network net){} +void backward_crop_layer_gpu(const crop_layer l, network net){} + +crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure) +{ + fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c); + crop_layer l = {0}; + l.type = CROP; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.scale = (float)crop_height / h; + l.flip = flip; + l.angle = angle; + l.saturation = saturation; + l.exposure = exposure; + l.out_w = crop_width; + l.out_h = crop_height; + l.out_c = c; + l.inputs = l.w * l.h * l.c; + l.outputs = l.out_w * l.out_h * l.out_c; + l.output = calloc(l.outputs*batch, sizeof(float)); + l.forward = forward_crop_layer; + l.backward = backward_crop_layer; + + #ifdef GPU + l.forward_gpu = forward_crop_layer_gpu; + l.backward_gpu = backward_crop_layer_gpu; + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + l.rand_gpu = cuda_make_array(0, l.batch*8); + #endif + return l; +} + +void resize_crop_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->out_w = l->scale*w; + l->out_h = l->scale*h; + + l->inputs = l->w * l->h * l->c; + l->outputs = l->out_h * l->out_w * l->out_c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + #ifdef GPU + cuda_free(l->output_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + #endif +} + + +void forward_crop_layer(const crop_layer l, network net) +{ + int i,j,c,b,row,col; + int index; + int count = 0; + int flip = (l.flip && rand()%2); + int dh = rand()%(l.h - l.out_h + 1); + int dw = rand()%(l.w - l.out_w + 1); + float scale = 2; + float trans = -1; + if(l.noadjust){ + scale = 1; + trans = 0; + } + if(!net.train){ + flip = 0; + dh = (l.h - l.out_h)/2; + dw = (l.w - l.out_w)/2; + } + for(b = 0; b < l.batch; ++b){ + for(c = 0; c < l.c; ++c){ + for(i = 0; i < l.out_h; ++i){ + for(j = 0; j < l.out_w; ++j){ + if(flip){ + col = l.w - dw - j - 1; + }else{ + col = j + dw; + } + row = i + dh; + index = col+l.w*(row+l.h*(c + l.c*b)); + l.output[count++] = net.input[index]*scale + trans; + } + } + } + } +} + diff --git a/workloads/realworld/uvm/darknet/src/crop_layer.h b/workloads/realworld/uvm/darknet/src/crop_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..3b5883c47d6df0987700e1b0434010eebd6312af --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/crop_layer.h @@ -0,0 +1,20 @@ +#ifndef CROP_LAYER_H +#define CROP_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +typedef layer crop_layer; + +image get_crop_image(crop_layer l); +crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure); +void forward_crop_layer(const crop_layer l, network net); +void resize_crop_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_crop_layer_gpu(crop_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/uvm/darknet/src/crop_layer_kernels.cu b/workloads/realworld/uvm/darknet/src/crop_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..7e262fce4ff8beb52de23d7c79bd1917410ef136 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/crop_layer_kernels.cu @@ -0,0 +1,225 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "crop_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "image.h" +} + +__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c) +{ + if(x < 0 || x >= w || y < 0 || y >= h) return 0; + return image[x + w*(y + c*h)]; +} + +__device__ float3 rgb_to_hsv_kernel(float3 rgb) +{ + float r = rgb.x; + float g = rgb.y; + float b = rgb.z; + + float h, s, v; + float max = (r > g) ? ( (r > b) ? r : b) : ( (g > b) ? g : b); + float min = (r < g) ? ( (r < b) ? r : b) : ( (g < b) ? g : b); + float delta = max - min; + v = max; + if(max == 0){ + s = 0; + h = -1; + }else{ + s = delta/max; + if(r == max){ + h = (g - b) / delta; + } else if (g == max) { + h = 2 + (b - r) / delta; + } else { + h = 4 + (r - g) / delta; + } + if (h < 0) h += 6; + } + return make_float3(h, s, v); +} + +__device__ float3 hsv_to_rgb_kernel(float3 hsv) +{ + float h = hsv.x; + float s = hsv.y; + float v = hsv.z; + + float r, g, b; + float f, p, q, t; + + if (s == 0) { + r = g = b = v; + } else { + int index = (int) floorf(h); + f = h - index; + p = v*(1-s); + q = v*(1-s*f); + t = v*(1-s*(1-f)); + if(index == 0){ + r = v; g = t; b = p; + } else if(index == 1){ + r = q; g = v; b = p; + } else if(index == 2){ + r = p; g = v; b = t; + } else if(index == 3){ + r = p; g = q; b = v; + } else if(index == 4){ + r = t; g = p; b = v; + } else { + r = v; g = p; b = q; + } + } + r = (r < 0) ? 0 : ((r > 1) ? 1 : r); + g = (g < 0) ? 0 : ((g > 1) ? 1 : g); + b = (b < 0) ? 0 : ((b > 1) ? 1 : b); + return make_float3(r, g, b); +} + +__device__ float bilinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c) +{ + int ix = (int) floorf(x); + int iy = (int) floorf(y); + + float dx = x - ix; + float dy = y - iy; + + float val = (1-dy) * (1-dx) * get_pixel_kernel(image, w, h, ix, iy, c) + + dy * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) + + (1-dy) * dx * get_pixel_kernel(image, w, h, ix+1, iy, c) + + dy * dx * get_pixel_kernel(image, w, h, ix+1, iy+1, c); + return val; +} + +__global__ void levels_image_kernel(float *image, float *rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale, float shift) +{ + int size = batch * w * h; + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= size) return; + int x = id % w; + id /= w; + int y = id % h; + id /= h; + float rshift = rand[0]; + float gshift = rand[1]; + float bshift = rand[2]; + float r0 = rand[8*id + 0]; + float r1 = rand[8*id + 1]; + float r2 = rand[8*id + 2]; + float r3 = rand[8*id + 3]; + + saturation = r0*(saturation - 1) + 1; + saturation = (r1 > .5f) ? 1.f/saturation : saturation; + exposure = r2*(exposure - 1) + 1; + exposure = (r3 > .5f) ? 1.f/exposure : exposure; + + size_t offset = id * h * w * 3; + image += offset; + float r = image[x + w*(y + h*0)]; + float g = image[x + w*(y + h*1)]; + float b = image[x + w*(y + h*2)]; + float3 rgb = make_float3(r,g,b); + if(train){ + float3 hsv = rgb_to_hsv_kernel(rgb); + hsv.y *= saturation; + hsv.z *= exposure; + rgb = hsv_to_rgb_kernel(hsv); + } else { + shift = 0; + } + image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5f)*shift; + image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5f)*shift; + image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5f)*shift; +} + +__global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= size) return; + + float cx = w/2.f; + float cy = h/2.f; + + int count = id; + int j = id % crop_width; + id /= crop_width; + int i = id % crop_height; + id /= crop_height; + int k = id % c; + id /= c; + int b = id; + + float r4 = rand[8*b + 4]; + float r5 = rand[8*b + 5]; + float r6 = rand[8*b + 6]; + float r7 = rand[8*b + 7]; + + float dw = (w - crop_width)*r4; + float dh = (h - crop_height)*r5; + flip = (flip && (r6 > .5f)); + angle = 2*angle*r7 - angle; + if(!train){ + dw = (w - crop_width)/2.f; + dh = (h - crop_height)/2.f; + flip = 0; + angle = 0; + } + + input += w*h*c*b; + + float x = (flip) ? w - dw - j - 1 : j + dw; + float y = i + dh; + + float rx = cosf(angle)*(x-cx) - sinf(angle)*(y-cy) + cx; + float ry = sinf(angle)*(x-cx) + cosf(angle)*(y-cy) + cy; + + output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k); +} + +extern "C" void forward_crop_layer_gpu(crop_layer layer, network net) +{ + cuda_random(layer.rand_gpu, layer.batch*8); + + float radians = layer.angle*3.14159265f/180.f; + + float scale = 2; + float translate = -1; + if(layer.noadjust){ + scale = 1; + translate = 0; + } + + int size = layer.batch * layer.w * layer.h; + + levels_image_kernel<<>>(net.input_gpu, layer.rand_gpu, layer.batch, layer.w, layer.h, net.train, layer.saturation, layer.exposure, translate, scale, layer.shift); + check_error(cudaPeekAtLastError()); + + size = layer.batch*layer.c*layer.out_w*layer.out_h; + + forward_crop_layer_kernel<<>>(net.input_gpu, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, net.train, layer.flip, radians, layer.output_gpu); + check_error(cudaPeekAtLastError()); + +/* + cuda_pull_array(layer.output_gpu, layer.output, size); + image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 0*(size/layer.batch)); + image im2 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 1*(size/layer.batch)); + image im3 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 2*(size/layer.batch)); + + translate_image(im, -translate); + scale_image(im, 1/scale); + translate_image(im2, -translate); + scale_image(im2, 1/scale); + translate_image(im3, -translate); + scale_image(im3, 1/scale); + + show_image(im, "cropped"); + show_image(im2, "cropped2"); + show_image(im3, "cropped3"); + cvWaitKey(0); + */ +} + diff --git a/workloads/realworld/uvm/darknet/src/cuda_dark.cu b/workloads/realworld/uvm/darknet/src/cuda_dark.cu new file mode 100644 index 0000000000000000000000000000000000000000..ce415292e8f25ef96de7aa3d4954592fea771195 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/cuda_dark.cu @@ -0,0 +1,435 @@ +int gpu_index = 0; + +#ifdef GPU + +#include "cuda.h" +#include "utils.h" +#include "blas.h" +#include +#include +#include + + +#include + +void cuda_set_device(int n) +{ + gpu_index = n; + cudaError_t status = cudaSetDevice(n); + check_error(status); +} + +int cuda_get_device() +{ + int n = 0; + cudaError_t status = cudaGetDevice(&n); + check_error(status); + return n; +} + +void check_error(cudaError_t status) +{ + cudaDeviceSynchronize(); + cudaError_t status2 = cudaGetLastError(); + if (status != cudaSuccess) + { + const char *s = cudaGetErrorString(status); + char buffer[256]; + printf("CUDA Error: %s\n", s); + assert(0); + snprintf(buffer, 256, "CUDA Error: %s", s); + error(buffer); + } + if (status2 != cudaSuccess) + { + const char *s = cudaGetErrorString(status2); + char buffer[256]; + printf("CUDA Error Prev: %s\n", s); + assert(0); + snprintf(buffer, 256, "CUDA Error Prev: %s", s); + error(buffer); + } +} + +dim3 cuda_gridsize(size_t n){ + size_t k = (n-1) / BLOCK + 1; + size_t x = k; + size_t y = 1; + if(x > 65535){ + x = ceil(sqrt(k)); + y = (n-1)/(x*BLOCK) + 1; + } + dim3 d = {x, y, 1}; + //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK); + return d; +} + +#ifdef CUDNN +cudnnHandle_t cudnn_handle() +{ + static int init[16] = {0}; + static cudnnHandle_t handle[16]; + int i = cuda_get_device(); + if(!init[i]) { + cudnnCreate(&handle[i]); + init[i] = 1; + } + return handle[i]; +} +#endif + +cublasHandle_t blas_handle() +{ + static int init[16] = {0}; + static cublasHandle_t handle[16]; + int i = cuda_get_device(); + if(!init[i]) { + cublasCreate(&handle[i]); + init[i] = 1; + } + return handle[i]; +} + +float *cuda_make_array(float *x, size_t n) +{ + float *x_gpu; + size_t size = sizeof(float)*n; + // cudaError_t status = cudaMalloc((void **)&x_gpu, size); + cudaError_t status = cudaMallocManaged((void **)&x_gpu, size); + check_error(status); + if(x){ + // status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + // check_error(status); + // for (int i = 0; i < n; i++) { + // x_gpu[i] = x[i]; + // } + memcpy(x_gpu, x, size); + } else { + fill_gpu(n, 0, x_gpu, 1); + } + if(!x_gpu) error("Cuda malloc failed\n"); + return x_gpu; +} + +void cuda_random(float *x_gpu, size_t n) +{ + static curandGenerator_t gen[16]; + static int init[16] = {0}; + int i = cuda_get_device(); + if(!init[i]){ + curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT); + curandSetPseudoRandomGeneratorSeed(gen[i], time(0)); + init[i] = 1; + } + curandGenerateUniform(gen[i], x_gpu, n); + check_error(cudaPeekAtLastError()); +} + +float cuda_compare(float *x_gpu, float *x, size_t n, char *s) +{ + float *tmp = (float *) calloc(n, sizeof(float)); + cuda_pull_array(x_gpu, tmp, n); + //int i; + //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]); + axpy_cpu(n, -1, x, 1, tmp, 1); + float err = dot_cpu(n, tmp, 1, tmp, 1); + printf("Error %s: %f\n", s, sqrt(err/n)); + free(tmp); + return err; +} + +int *cuda_make_int_array(int *x, size_t n) +{ + int *x_gpu; + size_t size = sizeof(int)*n; + // cudaError_t status = cudaMalloc((void **)&x_gpu, size); + cudaError_t status = cudaMallocManaged((void **)&x_gpu, size); + check_error(status); + if(x){ + // status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + // check_error(status); + memcpy(x_gpu, x, size); + // for (int i = 0; i < n; i++) + // x_gpu[i] = x[i]; + } + if(!x_gpu) error("Cuda malloc failed\n"); + return x_gpu; +} + +void cuda_free(float *x_gpu) +{ + cudaError_t status = cudaFree(x_gpu); + check_error(status); +} + +void cuda_push_array(float *x_gpu, float *x, size_t n) +{ + size_t size = sizeof(float)*n; + // cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + // check_error(status); + memcpy(x_gpu, x, size); + // for (int i = 0; i < n; i++) + // x_gpu[i] = x[i]; +} + +void cuda_pull_array(float *x_gpu, float *x, size_t n) +{ + size_t size = sizeof(float)*n; + // cudaError_t status = cudaMemcpy(x, x_gpu, size, cudaMemcpyDeviceToHost); + // check_error(status); + memcpy(x, x_gpu, size); + // for (int i = 0; i < n; i++) + // x[i] = x_gpu[i]; +} + +float cuda_mag_array(float *x_gpu, size_t n) +{ + float *temp = (float *) calloc(n, sizeof(float)); + cuda_pull_array(x_gpu, temp, n); + float m = mag_array(temp, n); + free(temp); + return m; +} + +static const char * +getMemcpyKindString(CUpti_ActivityMemcpyKind kind) +{ + switch (kind) + { + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOD: + return "HtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOH: + return "DtoH"; + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOA: + return "HtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOH: + return "AtoH"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOA: + return "AtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOD: + return "AtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOA: + return "DtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOD: + return "DtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOH: + return "HtoH"; + default: + break; + } + + return ""; +} + +static const char * +getUvmCounterKindString(CUpti_ActivityUnifiedMemoryCounterKind kind) +{ + switch (kind) + { + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD: + return "BYTES_TRANSFER_HTOD"; + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH: + return "BYTES_TRANSFER_DTOH"; + default: + break; + } + return ""; +} + +static void +printActivity(CUpti_Activity *record) +{ + switch (record->kind) + { + case CUPTI_ACTIVITY_KIND_KERNEL: + { + int status; + CUpti_ActivityKernel4 *kernel = (CUpti_ActivityKernel4 *)record; + printf("KERNEL %s, %llu, %llu, %llu\n", + abi::__cxa_demangle(kernel->name, 0, 0, &status), + (unsigned long long)(kernel->start), + (unsigned long long)(kernel->end), + (unsigned long long)(kernel->end) - (kernel->start)); + break; + } + case CUPTI_ACTIVITY_KIND_RUNTIME: + { + CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; + const char *callback_name; + cuptiGetCallbackName(CUPTI_CB_DOMAIN_RUNTIME_API, api->cbid, &callback_name); + // printf("RUNTIME %s (cbid=%u) [ %llu - %llu ] process %u, thread %u, correlation %u\n", + // callback_name, api->cbid, + // (unsigned long long)(api->start - startTimestamp), + // (unsigned long long)(api->end - startTimestamp), + // api->processId, api->threadId, api->correlationId); + printf("RUNTIME %s (cbid=%u), %llu,%llu,%llu, process %u, thread %u, correlation %u\n", + callback_name, api->cbid, + (unsigned long long)(api->start), + (unsigned long long)(api->end), + (unsigned long long)(api->end - api->start), + api->processId, api->threadId, api->correlationId); + break; + } + case CUPTI_ACTIVITY_KIND_MEMCPY: + { + CUpti_ActivityMemcpy4 *memcpy = (CUpti_ActivityMemcpy4 *)record; + printf("MEMCPY %s, size %llu, %llu, %llu, %llu\n", + getMemcpyKindString((CUpti_ActivityMemcpyKind)memcpy->copyKind), + (unsigned long long)memcpy->bytes, + (unsigned long long)(memcpy->start), + (unsigned long long)(memcpy->end), + (unsigned long long)(memcpy->end) - (memcpy->start)); + break; + } + case CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER: + { + CUpti_ActivityUnifiedMemoryCounter2 *uvm = (CUpti_ActivityUnifiedMemoryCounter2 *)record; + printf("UVM MEMCPY %s, size %llu, %llu, %llu, %llu \n", + getUvmCounterKindString(uvm->counterKind), + (unsigned long long)uvm->value, + (unsigned long long)(uvm->start), + (unsigned long long)(uvm->end), + (unsigned long long)(uvm->end - uvm->start)); + break; + } + } +} + +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) +{ + uint8_t *bfr = (uint8_t *)malloc(BUF_SIZE + ALIGN_SIZE); + if (bfr == NULL) + { + printf("Error: out of memory\n"); + exit(-1); + } + + *size = BUF_SIZE; + *buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE); + *maxNumRecords = 0; +} + +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) +{ + CUptiResult status; + CUpti_Activity *record = NULL; + if (validSize > 0) + { + do + { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if (status == CUPTI_SUCCESS) + { + printActivity(record); + } + else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) + break; + else + { + CUPTI_CALL(status); + } + } while (1); + + // report any records dropped from the queue + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if (dropped != 0) + { + printf("Dropped %u activity records\n", (unsigned int)dropped); + } + } + + free(buffer); +} + +#ifndef PROFILE +void initTrace() { + printf("not Profile initTrace()\n"); + return; +} + +void finiTrace() { + return; +} + +#else +void initTrace() +{ + printf("Profile initTrace()\n"); + size_t attrValue = 0, attrValueSize = sizeof(size_t); + + CUpti_ActivityUnifiedMemoryCounterConfig config[2]; + + // configure unified memory counters + config[0].scope = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE; + config[0].kind = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD; + config[0].deviceId = 0; + config[0].enable = 1; + + config[1].scope = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE; + config[1].kind = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH; + config[1].deviceId = 0; + config[1].enable = 1; + + CUptiResult res = cuptiActivityConfigureUnifiedMemoryCounter(config, 2); + if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED) + { + printf("Test is waived, unified memory is not supported on the underlying platform.\n"); + } + else if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE) + { + printf("Test is waived, unified memory is not supported on the device.\n"); + } + else if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES) + { + printf("Test is waived, unified memory is not supported on the non-P2P multi-gpu setup.\n"); + } + else + { + CUPTI_CALL(res); + } + + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER)); + + // Register callbacks for buffer requests and for buffers completed by CUPTI. + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + + // Optionally get and set activity attributes. + // Attributes can be set by the CUPTI client to change behavior of the activity API. + // Some attributes require to be set before any CUDA context is created to be effective, + // e.g. to be applied to all device buffer allocations (see documentation). + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + printf("%s = %llu B\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); +} + +void finiTrace() +{ + // Force flush any remaining activity buffers before termination of the application + CUPTI_CALL(cuptiActivityFlushAll(1)); +} +#endif + + +void GPU_argv_init() +{ + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, GPU_DEVICE); + printf("setting device %d with name %s\n", GPU_DEVICE, deviceProp.name); + cudaSetDevice(GPU_DEVICE); +} +#else +void cuda_set_device(int n){} + +#endif diff --git a/workloads/realworld/uvm/darknet/src/cuda_dark.h b/workloads/realworld/uvm/darknet/src/cuda_dark.h new file mode 100644 index 0000000000000000000000000000000000000000..ac6b60bc3d27ec1ebc8190463648b946f6c809ef --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/cuda_dark.h @@ -0,0 +1,63 @@ +#ifndef CUDA_H +#define CUDA_H + +#include "darknet.h" + +#ifdef GPU + +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) (((uintptr_t)(buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t)(buffer) & ((align)-1))) : (buffer)) + +static uint64_t startTimestamp; +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +#define CUPTI_CALL(call) \ + do \ + { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) \ + { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + if (_status == CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED) \ + exit(0); \ + else \ + exit(-1); \ + } \ + } while (0) + +#include + +#ifdef __cplusplus +extern "C" { +#endif +void check_error(cudaError_t status); +cublasHandle_t blas_handle(); +int *cuda_make_int_array(int *x, size_t n); +void cuda_random(float *x_gpu, size_t n); +float cuda_compare(float *x_gpu, float *x, size_t n, char *s); +dim3 cuda_gridsize(size_t n); + +void GPU_argv_init(); +void initTrace(); +void finiTrace(); +void startCPU(); +void endCPU(); +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords); +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); +static void printActivity(CUpti_Activity *record); + +#ifdef __cplusplus +} +#endif + +#ifdef CUDNN +cudnnHandle_t cudnn_handle(); +#endif + +#endif +#endif diff --git a/workloads/realworld/uvm/darknet/src/cupti_add.cpp b/workloads/realworld/uvm/darknet/src/cupti_add.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a0d16eb72f41f8e858a59354d2de9d6b470c0e76 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/cupti_add.cpp @@ -0,0 +1,112 @@ +#include "cupti_add.h" + +static void +printActivity(CUpti_Activity *record) +{ + switch (record->kind) + { + case CUPTI_ACTIVITY_KIND_KERNEL: + { + int status; + CUpti_ActivityKernel4 *kernel = (CUpti_ActivityKernel4 *)record; + printf("CUPTI,%s,%llu,%llu,%llu\n", + abi::__cxa_demangle(kernel->name, 0, 0, &status), + (unsigned long long)(kernel->start), + (unsigned long long)(kernel->end), + (unsigned long long)(kernel->end - startTimestamp) - (kernel->start - startTimestamp)); + break; + } + case CUPTI_ACTIVITY_KIND_RUNTIME: + { + CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; + const char *callback_name; + cuptiGetCallbackName(CUPTI_CB_DOMAIN_RUNTIME_API, api->cbid, &callback_name); + printf("RUNTIME %s (cbid=%u) [ %llu - %llu ] process %u, thread %u, correlation %u\n", + callback_name, api->cbid, + (unsigned long long)(api->start - startTimestamp), + (unsigned long long)(api->end - startTimestamp), + api->processId, api->threadId, api->correlationId); + break; + } + } +} + +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) +{ + uint8_t *bfr = (uint8_t *)malloc(BUF_SIZE + ALIGN_SIZE); + if (bfr == NULL) + { + printf("Error: out of memory\n"); + exit(-1); + } + + *size = BUF_SIZE; + *buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE); + *maxNumRecords = 0; +} + +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) +{ + CUptiResult status; + CUpti_Activity *record = NULL; + if (validSize > 0) + { + do + { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if (status == CUPTI_SUCCESS) + { + printActivity(record); + } + else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) + break; + else + { + CUPTI_CALL(status); + } + } while (1); + + // report any records dropped from the queue + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if (dropped != 0) + { + printf("Dropped %u activity records\n", (unsigned int)dropped); + } + } + + free(buffer); +} + +void initTrace() +{ + size_t attrValue = 0, attrValueSize = sizeof(size_t); + + // CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); + + // Register callbacks for buffer requests and for buffers completed by CUPTI. + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + + // Optionally get and set activity attributes. + // Attributes can be set by the CUPTI client to change behavior of the activity API. + // Some attributes require to be set before any CUDA context is created to be effective, + // e.g. to be applied to all device buffer allocations (see documentation). + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + printf("%s = %llu B\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); +} + +void finiTrace() +{ + // Force flush any remaining activity buffers before termination of the application + CUPTI_CALL(cuptiActivityFlushAll(1)); +} \ No newline at end of file diff --git a/workloads/realworld/uvm/darknet/src/cupti_add.h b/workloads/realworld/uvm/darknet/src/cupti_add.h new file mode 100644 index 0000000000000000000000000000000000000000..a30b7b847ad13381032d2f60eac2955d30146485 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/cupti_add.h @@ -0,0 +1,36 @@ +#include +#include +#include + + +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) \ + (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer)) + +static uint64_t startTimestamp; +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +#define CUPTI_CALL(call) \ + do { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + if(_status == CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED) \ + exit(0); \ + else \ + exit(-1); \ + } \ + } while (0) + +void initTrace(); +void finiTrace(); +void startCPU(); +void endCPU(); +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords); +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); +static void printActivity(CUpti_Activity *record); diff --git a/workloads/realworld/uvm/darknet/src/data.c b/workloads/realworld/uvm/darknet/src/data.c new file mode 100644 index 0000000000000000000000000000000000000000..d50f1346c5cdcfe1dbeb2d0f70ec408fb4f33960 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/data.c @@ -0,0 +1,1685 @@ +#include "data.h" +#include "utils.h" +#include "image.h" +#include "cuda_dark.h" + +#include +#include +#include + +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + +list *get_paths(char *filename) +{ + char *path; + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + list *lines = make_list(); + while((path=fgetl(file))){ + list_insert(lines, path); + } + fclose(file); + return lines; +} + +/* +char **get_random_paths_indexes(char **paths, int n, int m, int *indexes) +{ + char **random_paths = calloc(n, sizeof(char*)); + int i; + pthread_mutex_lock(&mutex); + for(i = 0; i < n; ++i){ + int index = rand()%m; + indexes[i] = index; + random_paths[i] = paths[index]; + if(i == 0) printf("%s\n", paths[index]); + } + pthread_mutex_unlock(&mutex); + return random_paths; +} +*/ + +char **get_random_paths(char **paths, int n, int m) +{ + char **random_paths = calloc(n, sizeof(char*)); + int i; + pthread_mutex_lock(&mutex); + for(i = 0; i < n; ++i){ + int index = rand()%m; + random_paths[i] = paths[index]; + //if(i == 0) printf("%s\n", paths[index]); + } + pthread_mutex_unlock(&mutex); + return random_paths; +} + +char **find_replace_paths(char **paths, int n, char *find, char *replace) +{ + char **replace_paths = calloc(n, sizeof(char*)); + int i; + for(i = 0; i < n; ++i){ + char replaced[4096]; + find_replace(paths[i], find, replace, replaced); + replace_paths[i] = copy_string(replaced); + } + return replace_paths; +} + +matrix load_image_paths_gray(char **paths, int n, int w, int h) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image(paths[i], w, h, 3); + + image gray = grayscale_image(im); + free_image(im); + im = gray; + + X.vals[i] = im.data; + X.cols = im.h*im.w*im.c; + } + return X; +} + +matrix load_image_paths(char **paths, int n, int w, int h) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], w, h); + X.vals[i] = im.data; + X.cols = im.h*im.w*im.c; + } + return X; +} + +matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], 0, 0); + image crop; + if(center){ + crop = center_crop_image(im, size, size); + } else { + crop = random_augment_image(im, angle, aspect, min, max, size, size); + } + int flip = rand()%2; + if (flip) flip_image(crop); + random_distort_image(crop, hue, saturation, exposure); + + /* + show_image(im, "orig"); + show_image(crop, "crop"); + cvWaitKey(0); + */ + //grayscale_image_3c(crop); + free_image(im); + X.vals[i] = crop.data; + X.cols = crop.h*crop.w*crop.c; + } + return X; +} + + +box_label *read_boxes(char *filename, int *n) +{ + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + float x, y, h, w; + int id; + int count = 0; + int size = 64; + box_label *boxes = calloc(size, sizeof(box_label)); + while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){ + if(count == size) { + size = size * 2; + boxes = realloc(boxes, size*sizeof(box_label)); + } + boxes[count].id = id; + boxes[count].x = x; + boxes[count].y = y; + boxes[count].h = h; + boxes[count].w = w; + boxes[count].left = x - w/2; + boxes[count].right = x + w/2; + boxes[count].top = y - h/2; + boxes[count].bottom = y + h/2; + ++count; + } + fclose(file); + *n = count; + return boxes; +} + +void randomize_boxes(box_label *b, int n) +{ + int i; + for(i = 0; i < n; ++i){ + box_label swap = b[i]; + int index = rand()%n; + b[i] = b[index]; + b[index] = swap; + } +} + +void correct_boxes(box_label *boxes, int n, float dx, float dy, float sx, float sy, int flip) +{ + int i; + for(i = 0; i < n; ++i){ + if(boxes[i].x == 0 && boxes[i].y == 0) { + boxes[i].x = 999999; + boxes[i].y = 999999; + boxes[i].w = 999999; + boxes[i].h = 999999; + continue; + } + boxes[i].left = boxes[i].left * sx - dx; + boxes[i].right = boxes[i].right * sx - dx; + boxes[i].top = boxes[i].top * sy - dy; + boxes[i].bottom = boxes[i].bottom* sy - dy; + + if(flip){ + float swap = boxes[i].left; + boxes[i].left = 1. - boxes[i].right; + boxes[i].right = 1. - swap; + } + + boxes[i].left = constrain(0, 1, boxes[i].left); + boxes[i].right = constrain(0, 1, boxes[i].right); + boxes[i].top = constrain(0, 1, boxes[i].top); + boxes[i].bottom = constrain(0, 1, boxes[i].bottom); + + boxes[i].x = (boxes[i].left+boxes[i].right)/2; + boxes[i].y = (boxes[i].top+boxes[i].bottom)/2; + boxes[i].w = (boxes[i].right - boxes[i].left); + boxes[i].h = (boxes[i].bottom - boxes[i].top); + + boxes[i].w = constrain(0, 1, boxes[i].w); + boxes[i].h = constrain(0, 1, boxes[i].h); + } +} + +void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + float x,y,w,h; + int id; + int i; + + for (i = 0; i < count && i < 90; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if (w < .0 || h < .0) continue; + + int index = (4+classes) * i; + + truth[index++] = x; + truth[index++] = y; + truth[index++] = w; + truth[index++] = h; + + if (id < classes) truth[index+id] = 1; + } + free(boxes); +} + +void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + float x,y,w,h; + int id; + int i; + + for (i = 0; i < count; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if (w < .005 || h < .005) continue; + + int col = (int)(x*num_boxes); + int row = (int)(y*num_boxes); + + x = x*num_boxes - col; + y = y*num_boxes - row; + + int index = (col+row*num_boxes)*(5+classes); + if (truth[index]) continue; + truth[index++] = 1; + + if (id < classes) truth[index+id] = 1; + index += classes; + + truth[index++] = x; + truth[index++] = y; + truth[index++] = w; + truth[index++] = h; + } + free(boxes); +} + +void load_rle(image im, int *rle, int n) +{ + int count = 0; + int curr = 0; + int i,j; + for(i = 0; i < n; ++i){ + for(j = 0; j < rle[i]; ++j){ + im.data[count++] = curr; + } + curr = 1 - curr; + } + for(; count < im.h*im.w*im.c; ++count){ + im.data[count] = curr; + } +} + +void or_image(image src, image dest, int c) +{ + int i; + for(i = 0; i < src.w*src.h; ++i){ + if(src.data[i]) dest.data[dest.w*dest.h*c + i] = 1; + } +} + +void exclusive_image(image src) +{ + int k, j, i; + int s = src.w*src.h; + for(k = 0; k < src.c-1; ++k){ + for(i = 0; i < s; ++i){ + if (src.data[k*s + i]){ + for(j = k+1; j < src.c; ++j){ + src.data[j*s + i] = 0; + } + } + } + } +} + +box bound_image(image im) +{ + int x,y; + int minx = im.w; + int miny = im.h; + int maxx = 0; + int maxy = 0; + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + if(im.data[y*im.w + x]){ + minx = (x < minx) ? x : minx; + miny = (y < miny) ? y : miny; + maxx = (x > maxx) ? x : maxx; + maxy = (y > maxy) ? y : maxy; + } + } + } + box b = {minx, miny, maxx-minx + 1, maxy-miny + 1}; + //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + return b; +} + +void fill_truth_iseg(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + int i = 0; + int j; + image part = make_image(w, h, 1); + while((fscanf(file, "%d %s", &id, buff) == 2) && i < num_boxes){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + image sized = rotate_crop_image(part, aug.rad, aug.scale, aug.w, aug.h, aug.dx, aug.dy, aug.aspect); + if(flip) flip_image(sized); + + image mask = resize_image(sized, mw, mh); + truth[i*(mw*mh+1)] = id; + for(j = 0; j < mw*mh; ++j){ + truth[i*(mw*mh + 1) + 1 + j] = mask.data[j]; + } + ++i; + + free_image(mask); + free_image(sized); + free(rle); + } + if(i < num_boxes) truth[i*(mw*mh+1)] = -1; + fclose(file); + free_image(part); +} + +void fill_truth_mask(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + int i = 0; + image part = make_image(w, h, 1); + while((fscanf(file, "%d %s", &id, buff) == 2) && i < num_boxes){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + image sized = rotate_crop_image(part, aug.rad, aug.scale, aug.w, aug.h, aug.dx, aug.dy, aug.aspect); + if(flip) flip_image(sized); + box b = bound_image(sized); + if(b.w > 0){ + image crop = crop_image(sized, b.x, b.y, b.w, b.h); + image mask = resize_image(crop, mw, mh); + truth[i*(4 + mw*mh + 1) + 0] = (b.x + b.w/2.)/sized.w; + truth[i*(4 + mw*mh + 1) + 1] = (b.y + b.h/2.)/sized.h; + truth[i*(4 + mw*mh + 1) + 2] = b.w/sized.w; + truth[i*(4 + mw*mh + 1) + 3] = b.h/sized.h; + int j; + for(j = 0; j < mw*mh; ++j){ + truth[i*(4 + mw*mh + 1) + 4 + j] = mask.data[j]; + } + truth[i*(4 + mw*mh + 1) + 4 + mw*mh] = id; + free_image(crop); + free_image(mask); + ++i; + } + free_image(sized); + free(rle); + } + fclose(file); + free_image(part); +} + + +void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + + find_replace(labelpath, "raw", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + if(count > num_boxes) count = num_boxes; + float x,y,w,h; + int id; + int i; + int sub = 0; + + for (i = 0; i < count; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if ((w < .001 || h < .001)) { + ++sub; + continue; + } + + truth[(i-sub)*5+0] = x; + truth[(i-sub)*5+1] = y; + truth[(i-sub)*5+2] = w; + truth[(i-sub)*5+3] = h; + truth[(i-sub)*5+4] = id; + } + free(boxes); +} + +#define NUMCHARS 37 + +void print_letters(float *pred, int n) +{ + int i; + for(i = 0; i < n; ++i){ + int index = max_index(pred+i*NUMCHARS, NUMCHARS); + printf("%c", int_to_alphanum(index)); + } + printf("\n"); +} + +void fill_truth_captcha(char *path, int n, float *truth) +{ + char *begin = strrchr(path, '/'); + ++begin; + int i; + for(i = 0; i < strlen(begin) && i < n && begin[i] != '.'; ++i){ + int index = alphanum_to_int(begin[i]); + if(index > 35) printf("Bad %c\n", begin[i]); + truth[i*NUMCHARS+index] = 1; + } + for(;i < n; ++i){ + truth[i*NUMCHARS + NUMCHARS-1] = 1; + } +} + +data load_data_captcha(char **paths, int n, int m, int k, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = make_matrix(n, k*NUMCHARS); + int i; + for(i = 0; i < n; ++i){ + fill_truth_captcha(paths[i], k, d.y.vals[i]); + } + if(m) free(paths); + return d; +} + +data load_data_captcha_encode(char **paths, int n, int m, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.X.cols = 17100; + d.y = d.X; + if(m) free(paths); + return d; +} + +void fill_truth(char *path, char **labels, int k, float *truth) +{ + int i; + memset(truth, 0, k*sizeof(float)); + int count = 0; + for(i = 0; i < k; ++i){ + if(strstr(path, labels[i])){ + truth[i] = 1; + ++count; + //printf("%s %s %d\n", path, labels[i], i); + } + } + if(count != 1 && (k != 1 || count != 0)) printf("Too many or too few labels: %d, %s\n", count, path); +} + +void fill_hierarchy(float *truth, int k, tree *hierarchy) +{ + int j; + for(j = 0; j < k; ++j){ + if(truth[j]){ + int parent = hierarchy->parent[j]; + while(parent >= 0){ + truth[parent] = 1; + parent = hierarchy->parent[parent]; + } + } + } + int i; + int count = 0; + for(j = 0; j < hierarchy->groups; ++j){ + //printf("%d\n", count); + int mask = 1; + for(i = 0; i < hierarchy->group_size[j]; ++i){ + if(truth[count + i]){ + mask = 0; + break; + } + } + if (mask) { + for(i = 0; i < hierarchy->group_size[j]; ++i){ + truth[count + i] = SECRET_NUM; + } + } + count += hierarchy->group_size[j]; + } +} + +matrix load_regression_labels_paths(char **paths, int n, int k) +{ + matrix y = make_matrix(n, k); + int i,j; + for(i = 0; i < n; ++i){ + char labelpath[4096]; + find_replace(paths[i], "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".BMP", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPeG", ".txt", labelpath); + find_replace(labelpath, ".Jpeg", ".txt", labelpath); + find_replace(labelpath, ".PNG", ".txt", labelpath); + find_replace(labelpath, ".TIF", ".txt", labelpath); + find_replace(labelpath, ".bmp", ".txt", labelpath); + find_replace(labelpath, ".jpeg", ".txt", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".tif", ".txt", labelpath); + + FILE *file = fopen(labelpath, "r"); + for(j = 0; j < k; ++j){ + fscanf(file, "%f", &(y.vals[i][j])); + } + fclose(file); + } + return y; +} + +matrix load_labels_paths(char **paths, int n, char **labels, int k, tree *hierarchy) +{ + matrix y = make_matrix(n, k); + int i; + for(i = 0; i < n && labels; ++i){ + fill_truth(paths[i], labels, k, y.vals[i]); + if(hierarchy){ + fill_hierarchy(y.vals[i], k, hierarchy); + } + } + return y; +} + +matrix load_tags_paths(char **paths, int n, int k) +{ + matrix y = make_matrix(n, k); + int i; + //int count = 0; + for(i = 0; i < n; ++i){ + char label[4096]; + find_replace(paths[i], "images", "labels", label); + find_replace(label, ".jpg", ".txt", label); + FILE *file = fopen(label, "r"); + if (!file) continue; + //++count; + int tag; + while(fscanf(file, "%d", &tag) == 1){ + if(tag < k){ + y.vals[i][tag] = 1; + } + } + fclose(file); + } + //printf("%d/%d\n", count, n); + return y; +} + +char **get_labels(char *filename) +{ + list *plist = get_paths(filename); + char **labels = (char **)list_to_array(plist); + free_list(plist); + return labels; +} + +void free_data(data d) +{ + if(!d.shallow){ + free_matrix(d.X); + free_matrix(d.y); + }else{ + free(d.X.vals); + free(d.y.vals); + } +} + +image get_segmentation_image(char *path, int w, int h, int classes) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + image mask = make_image(w, h, classes); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + image part = make_image(w, h, 1); + while(fscanf(file, "%d %s", &id, buff) == 2){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + or_image(part, mask, id); + free(rle); + } + //exclusive_image(mask); + fclose(file); + free_image(part); + return mask; +} + +image get_segmentation_image2(char *path, int w, int h, int classes) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + image mask = make_image(w, h, classes+1); + int i; + for(i = 0; i < w*h; ++i){ + mask.data[w*h*classes + i] = 1; + } + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + image part = make_image(w, h, 1); + while(fscanf(file, "%d %s", &id, buff) == 2){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + or_image(part, mask, id); + for(i = 0; i < w*h; ++i){ + if(part.data[i]) mask.data[w*h*classes + i] = 0; + } + free(rle); + } + //exclusive_image(mask); + fclose(file); + free_image(part); + return mask; +} + +data load_data_seg(int n, char **paths, int m, int w, int h, int classes, int min, int max, float angle, float aspect, float hue, float saturation, float exposure, int div) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + + d.y.rows = n; + d.y.cols = h*w*classes/div/div; + d.y.vals = calloc(d.X.rows, sizeof(float*)); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + + image mask = get_segmentation_image(random_paths[i], orig.w, orig.h, classes); + //image mask = make_image(orig.w, orig.h, classes+1); + image sized_m = rotate_crop_image(mask, a.rad, a.scale/div, a.w/div, a.h/div, a.dx/div, a.dy/div, a.aspect); + + if(flip) flip_image(sized_m); + d.y.vals[i] = sized_m.data; + + free_image(orig); + free_image(mask); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_iseg(int n, char **paths, int m, int w, int h, int classes, int boxes, int div, int min, int max, float angle, float aspect, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, (((w/div)*(h/div))+1)*boxes); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + //show_image(sized, "image"); + + fill_truth_iseg(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, w/div, h/div); + + free_image(orig); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_mask(int n, char **paths, int m, int w, int h, int classes, int boxes, int coords, int min, int max, float angle, float aspect, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, (coords+1)*boxes); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + //show_image(sized, "image"); + + fill_truth_mask(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, 14, 14); + + free_image(orig); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + + int k = size*size*(5+classes); + d.y = make_matrix(n, k); + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + + int oh = orig.h; + int ow = orig.w; + + int dw = (ow*jitter); + int dh = (oh*jitter); + + int pleft = rand_uniform(-dw, dw); + int pright = rand_uniform(-dw, dw); + int ptop = rand_uniform(-dh, dh); + int pbot = rand_uniform(-dh, dh); + + int swidth = ow - pleft - pright; + int sheight = oh - ptop - pbot; + + float sx = (float)swidth / ow; + float sy = (float)sheight / oh; + + int flip = rand()%2; + image cropped = crop_image(orig, pleft, ptop, swidth, sheight); + + float dx = ((float)pleft/ow)/sx; + float dy = ((float)ptop /oh)/sy; + + image sized = resize_image(cropped, w, h); + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + + fill_truth_region(random_paths[i], d.y.vals[i], classes, size, flip, dx, dy, 1./sx, 1./sy); + + free_image(orig); + free_image(cropped); + } + free(random_paths); + return d; +} + +data load_data_compare(int n, char **paths, int m, int classes, int w, int h) +{ + if(m) paths = get_random_paths(paths, 2*n, m); + int i,j; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*6; + + int k = 2*(classes); + d.y = make_matrix(n, k); + for(i = 0; i < n; ++i){ + image im1 = load_image_color(paths[i*2], w, h); + image im2 = load_image_color(paths[i*2+1], w, h); + + d.X.vals[i] = calloc(d.X.cols, sizeof(float)); + memcpy(d.X.vals[i], im1.data, h*w*3*sizeof(float)); + memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float)); + + int id; + float iou; + + char imlabel1[4096]; + char imlabel2[4096]; + find_replace(paths[i*2], "imgs", "labels", imlabel1); + find_replace(imlabel1, "jpg", "txt", imlabel1); + FILE *fp1 = fopen(imlabel1, "r"); + + while(fscanf(fp1, "%d %f", &id, &iou) == 2){ + if (d.y.vals[i][2*id] < iou) d.y.vals[i][2*id] = iou; + } + + find_replace(paths[i*2+1], "imgs", "labels", imlabel2); + find_replace(imlabel2, "jpg", "txt", imlabel2); + FILE *fp2 = fopen(imlabel2, "r"); + + while(fscanf(fp2, "%d %f", &id, &iou) == 2){ + if (d.y.vals[i][2*id + 1] < iou) d.y.vals[i][2*id + 1] = iou; + } + + for (j = 0; j < classes; ++j){ + if (d.y.vals[i][2*j] > .5 && d.y.vals[i][2*j+1] < .5){ + d.y.vals[i][2*j] = 1; + d.y.vals[i][2*j+1] = 0; + } else if (d.y.vals[i][2*j] < .5 && d.y.vals[i][2*j+1] > .5){ + d.y.vals[i][2*j] = 0; + d.y.vals[i][2*j+1] = 1; + } else { + d.y.vals[i][2*j] = SECRET_NUM; + d.y.vals[i][2*j+1] = SECRET_NUM; + } + } + fclose(fp1); + fclose(fp2); + + free_image(im1); + free_image(im2); + } + if(m) free(paths); + return d; +} + +data load_data_swag(char **paths, int n, int classes, float jitter) +{ + int index = rand()%n; + char *random_path = paths[index]; + + image orig = load_image_color(random_path, 0, 0); + int h = orig.h; + int w = orig.w; + + data d = {0}; + d.shallow = 0; + d.w = w; + d.h = h; + + d.X.rows = 1; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + int k = (4+classes)*90; + d.y = make_matrix(1, k); + + int dw = w*jitter; + int dh = h*jitter; + + int pleft = rand_uniform(-dw, dw); + int pright = rand_uniform(-dw, dw); + int ptop = rand_uniform(-dh, dh); + int pbot = rand_uniform(-dh, dh); + + int swidth = w - pleft - pright; + int sheight = h - ptop - pbot; + + float sx = (float)swidth / w; + float sy = (float)sheight / h; + + int flip = rand()%2; + image cropped = crop_image(orig, pleft, ptop, swidth, sheight); + + float dx = ((float)pleft/w)/sx; + float dy = ((float)ptop /h)/sy; + + image sized = resize_image(cropped, w, h); + if(flip) flip_image(sized); + d.X.vals[0] = sized.data; + + fill_truth_swag(random_path, d.y.vals[0], classes, flip, dx, dy, 1./sx, 1./sy); + + free_image(orig); + free_image(cropped); + + return d; +} + +data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, 5*boxes); + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + image sized = make_image(w, h, orig.c); + fill_image(sized, .5); + + float dw = jitter * orig.w; + float dh = jitter * orig.h; + + float new_ar = (orig.w + rand_uniform(-dw, dw)) / (orig.h + rand_uniform(-dh, dh)); + //float scale = rand_uniform(.25, 2); + float scale = 1; + + float nw, nh; + + if(new_ar < 1){ + nh = scale * h; + nw = nh * new_ar; + } else { + nw = scale * w; + nh = nw / new_ar; + } + + float dx = rand_uniform(0, w - nw); + float dy = rand_uniform(0, h - nh); + + place_image(orig, nw, nh, dx, dy, sized); + + random_distort_image(sized, hue, saturation, exposure); + + int flip = rand()%2; + if(flip) flip_image(sized); + d.X.vals[i] = sized.data; + + + fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, -dx/w, -dy/h, nw/w, nh/h); + + free_image(orig); + } + free(random_paths); + return d; +} + +void *load_thread(void *ptr) +{ + //printf("Loading data: %d\n", rand()); + load_args a = *(struct load_args*)ptr; + if(a.exposure == 0) a.exposure = 1; + if(a.saturation == 0) a.saturation = 1; + if(a.aspect == 0) a.aspect = 1; + + if (a.type == OLD_CLASSIFICATION_DATA){ + *a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h); + } else if (a.type == REGRESSION_DATA){ + *a.d = load_data_regression(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == CLASSIFICATION_DATA){ + *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.center); + } else if (a.type == SUPER_DATA){ + *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale); + } else if (a.type == WRITING_DATA){ + *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h); + } else if (a.type == ISEG_DATA){ + *a.d = load_data_iseg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.scale, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == INSTANCE_DATA){ + *a.d = load_data_mask(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.coords, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == SEGMENTATION_DATA){ + *a.d = load_data_seg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.scale); + } else if (a.type == REGION_DATA){ + *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); + } else if (a.type == DETECTION_DATA){ + *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); + } else if (a.type == SWAG_DATA){ + *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter); + } else if (a.type == COMPARE_DATA){ + *a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h); + } else if (a.type == IMAGE_DATA){ + *(a.im) = load_image_color(a.path, 0, 0); + *(a.resized) = resize_image(*(a.im), a.w, a.h); + } else if (a.type == LETTERBOX_DATA){ + *(a.im) = load_image_color(a.path, 0, 0); + *(a.resized) = letterbox_image(*(a.im), a.w, a.h); + } else if (a.type == TAG_DATA){ + *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } + free(ptr); + return 0; +} + +pthread_t load_data_in_thread(load_args args) +{ + pthread_t thread; + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + if(pthread_create(&thread, 0, load_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void *load_threads(void *ptr) +{ + int i; + load_args args = *(load_args *)ptr; + if (args.threads == 0) args.threads = 1; + data *out = args.d; + int total = args.n; + free(ptr); + data *buffers = calloc(args.threads, sizeof(data)); + pthread_t *threads = calloc(args.threads, sizeof(pthread_t)); + for(i = 0; i < args.threads; ++i){ + args.d = buffers + i; + args.n = (i+1) * total/args.threads - i * total/args.threads; + threads[i] = load_data_in_thread(args); + } + for(i = 0; i < args.threads; ++i){ + pthread_join(threads[i], 0); + } + *out = concat_datas(buffers, args.threads); + out->shallow = 0; + for(i = 0; i < args.threads; ++i){ + buffers[i].shallow = 1; + free_data(buffers[i]); + } + free(buffers); + free(threads); + return 0; +} + +void load_data_blocking(load_args args) +{ + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + load_thread(ptr); +} + +pthread_t load_data(load_args args) +{ + pthread_t thread; + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed"); + return thread; +} + +data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h) +{ + if(m) paths = get_random_paths(paths, n, m); + char **replace_paths = find_replace_paths(paths, n, ".png", "-label.png"); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = load_image_paths_gray(replace_paths, n, out_w, out_h); + if(m) free(paths); + int i; + for(i = 0; i < n; ++i) free(replace_paths[i]); + free(replace_paths); + return d; +} + +data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = load_labels_paths(paths, n, labels, k, 0); + if(m) free(paths); + return d; +} + +/* + data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) + { + data d = {0}; + d.indexes = calloc(n, sizeof(int)); + if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes); + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure); + d.y = load_labels_paths(paths, n, labels, k); + if(m) free(paths); + return d; + } + */ + +data load_data_super(char **paths, int n, int m, int w, int h, int scale) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + + int i; + d.X.rows = n; + d.X.vals = calloc(n, sizeof(float*)); + d.X.cols = w*h*3; + + d.y.rows = n; + d.y.vals = calloc(n, sizeof(float*)); + d.y.cols = w*scale * h*scale * 3; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], 0, 0); + image crop = random_crop_image(im, w*scale, h*scale); + int flip = rand()%2; + if (flip) flip_image(crop); + image resize = resize_image(crop, w, h); + d.X.vals[i] = resize.data; + d.y.vals[i] = crop.data; + free_image(im); + } + + if(m) free(paths); + return d; +} + +data load_data_regression(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, 0); + d.y = load_regression_labels_paths(paths, n, k); + if(m) free(paths); + return d; +} + +data select_data(data *orig, int *inds) +{ + data d = {0}; + d.shallow = 1; + d.w = orig[0].w; + d.h = orig[0].h; + + d.X.rows = orig[0].X.rows; + d.y.rows = orig[0].X.rows; + + d.X.cols = orig[0].X.cols; + d.y.cols = orig[0].y.cols; + + d.X.vals = calloc(orig[0].X.rows, sizeof(float *)); + d.y.vals = calloc(orig[0].y.rows, sizeof(float *)); + int i; + for(i = 0; i < d.X.rows; ++i){ + d.X.vals[i] = orig[inds[i]].X.vals[i]; + d.y.vals[i] = orig[inds[i]].y.vals[i]; + } + return d; +} + +data *tile_data(data orig, int divs, int size) +{ + data *ds = calloc(divs*divs, sizeof(data)); + int i, j; +#pragma omp parallel for + for(i = 0; i < divs*divs; ++i){ + data d; + d.shallow = 0; + d.w = orig.w/divs * size; + d.h = orig.h/divs * size; + d.X.rows = orig.X.rows; + d.X.cols = d.w*d.h*3; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + + d.y = copy_matrix(orig.y); +#pragma omp parallel for + for(j = 0; j < orig.X.rows; ++j){ + int x = (i%divs) * orig.w / divs - (d.w - orig.w/divs)/2; + int y = (i/divs) * orig.h / divs - (d.h - orig.h/divs)/2; + image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[j]); + d.X.vals[j] = crop_image(im, x, y, d.w, d.h).data; + } + ds[i] = d; + } + return ds; +} + +data resize_data(data orig, int w, int h) +{ + data d = {0}; + d.shallow = 0; + d.w = w; + d.h = h; + int i; + d.X.rows = orig.X.rows; + d.X.cols = w*h*3; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + + d.y = copy_matrix(orig.y); +#pragma omp parallel for + for(i = 0; i < orig.X.rows; ++i){ + image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[i]); + d.X.vals[i] = resize_image(im, w, h).data; + } + return d; +} + +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.w=size; + d.h=size; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, center); + d.y = load_labels_paths(paths, n, labels, k, hierarchy); + if(m) free(paths); + return d; +} + +data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.w = size; + d.h = size; + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, 0); + d.y = load_tags_paths(paths, n, k); + if(m) free(paths); + return d; +} + +matrix concat_matrix(matrix m1, matrix m2) +{ + int i, count = 0; + matrix m; + m.cols = m1.cols; + m.rows = m1.rows+m2.rows; + m.vals = calloc(m1.rows + m2.rows, sizeof(float*)); + for(i = 0; i < m1.rows; ++i){ + m.vals[count++] = m1.vals[i]; + } + for(i = 0; i < m2.rows; ++i){ + m.vals[count++] = m2.vals[i]; + } + return m; +} + +data concat_data(data d1, data d2) +{ + data d = {0}; + d.shallow = 1; + d.X = concat_matrix(d1.X, d2.X); + d.y = concat_matrix(d1.y, d2.y); + d.w = d1.w; + d.h = d1.h; + return d; +} + +data concat_datas(data *d, int n) +{ + int i; + data out = {0}; + for(i = 0; i < n; ++i){ + data new = concat_data(d[i], out); + free_data(out); + out = new; + } + return out; +} + +data load_categorical_data_csv(char *filename, int target, int k) +{ + data d = {0}; + d.shallow = 0; + matrix X = csv_to_matrix(filename); + float *truth_1d = pop_column(&X, target); + float **truth = one_hot_encode(truth_1d, X.rows, k); + matrix y; + y.rows = X.rows; + y.cols = k; + y.vals = truth; + d.X = X; + d.y = y; + free(truth_1d); + return d; +} + +data load_cifar10_data(char *filename) +{ + data d = {0}; + d.shallow = 0; + long i,j; + matrix X = make_matrix(10000, 3072); + matrix y = make_matrix(10000, 10); + d.X = X; + d.y = y; + + FILE *fp = fopen(filename, "rb"); + if(!fp) file_error(filename); + for(i = 0; i < 10000; ++i){ + unsigned char bytes[3073]; + fread(bytes, 1, 3073, fp); + int class = bytes[0]; + y.vals[i][class] = 1; + for(j = 0; j < X.cols; ++j){ + X.vals[i][j] = (double)bytes[j+1]; + } + } + scale_data_rows(d, 1./255); + //normalize_data_rows(d); + fclose(fp); + return d; +} + +void get_random_batch(data d, int n, float *X, float *y) +{ + int j; + for(j = 0; j < n; ++j){ + int index = rand()%d.X.rows; + memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float)); + memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float)); + } +} + +void get_next_batch(data d, int n, int offset, float *X, float *y) +{ + int j; + for(j = 0; j < n; ++j){ + int index = offset + j; + memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float)); + if(y) memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float)); + } +} + +void smooth_data(data d) +{ + int i, j; + float scale = 1. / d.y.cols; + float eps = .1; + for(i = 0; i < d.y.rows; ++i){ + for(j = 0; j < d.y.cols; ++j){ + d.y.vals[i][j] = eps * scale + (1-eps) * d.y.vals[i][j]; + } + } +} + +data load_all_cifar10() +{ + data d = {0}; + d.shallow = 0; + int i,j,b; + matrix X = make_matrix(50000, 3072); + matrix y = make_matrix(50000, 10); + d.X = X; + d.y = y; + + + for(b = 0; b < 5; ++b){ + char buff[256]; + sprintf(buff, "data/cifar/cifar-10-batches-bin/data_batch_%d.bin", b+1); + FILE *fp = fopen(buff, "rb"); + if(!fp) file_error(buff); + for(i = 0; i < 10000; ++i){ + unsigned char bytes[3073]; + fread(bytes, 1, 3073, fp); + int class = bytes[0]; + y.vals[i+b*10000][class] = 1; + for(j = 0; j < X.cols; ++j){ + X.vals[i+b*10000][j] = (double)bytes[j+1]; + } + } + fclose(fp); + } + //normalize_data_rows(d); + scale_data_rows(d, 1./255); + smooth_data(d); + return d; +} + +data load_go(char *filename) +{ + FILE *fp = fopen(filename, "rb"); + matrix X = make_matrix(3363059, 361); + matrix y = make_matrix(3363059, 361); + int row, col; + + if(!fp) file_error(filename); + char *label; + int count = 0; + while((label = fgetl(fp))){ + int i; + if(count == X.rows){ + X = resize_matrix(X, count*2); + y = resize_matrix(y, count*2); + } + sscanf(label, "%d %d", &row, &col); + char *board = fgetl(fp); + + int index = row*19 + col; + y.vals[count][index] = 1; + + for(i = 0; i < 19*19; ++i){ + float val = 0; + if(board[i] == '1') val = 1; + else if(board[i] == '2') val = -1; + X.vals[count][i] = val; + } + ++count; + free(label); + free(board); + } + X = resize_matrix(X, count); + y = resize_matrix(y, count); + + data d = {0}; + d.shallow = 0; + d.X = X; + d.y = y; + + + fclose(fp); + + return d; +} + + +void randomize_data(data d) +{ + int i; + for(i = d.X.rows-1; i > 0; --i){ + int index = rand()%i; + float *swap = d.X.vals[index]; + d.X.vals[index] = d.X.vals[i]; + d.X.vals[i] = swap; + + swap = d.y.vals[index]; + d.y.vals[index] = d.y.vals[i]; + d.y.vals[i] = swap; + } +} + +void scale_data_rows(data d, float s) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + scale_array(d.X.vals[i], d.X.cols, s); + } +} + +void translate_data_rows(data d, float s) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + translate_array(d.X.vals[i], d.X.cols, s); + } +} + +data copy_data(data d) +{ + data c = {0}; + c.w = d.w; + c.h = d.h; + c.shallow = 0; + c.num_boxes = d.num_boxes; + c.boxes = d.boxes; + c.X = copy_matrix(d.X); + c.y = copy_matrix(d.y); + return c; +} + +void normalize_data_rows(data d) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + normalize_array(d.X.vals[i], d.X.cols); + } +} + +data get_data_part(data d, int part, int total) +{ + data p = {0}; + p.shallow = 1; + p.X.rows = d.X.rows * (part + 1) / total - d.X.rows * part / total; + p.y.rows = d.y.rows * (part + 1) / total - d.y.rows * part / total; + p.X.cols = d.X.cols; + p.y.cols = d.y.cols; + p.X.vals = d.X.vals + d.X.rows * part / total; + p.y.vals = d.y.vals + d.y.rows * part / total; + return p; +} + +data get_random_data(data d, int num) +{ + data r = {0}; + r.shallow = 1; + + r.X.rows = num; + r.y.rows = num; + + r.X.cols = d.X.cols; + r.y.cols = d.y.cols; + + r.X.vals = calloc(num, sizeof(float *)); + r.y.vals = calloc(num, sizeof(float *)); + + int i; + for(i = 0; i < num; ++i){ + int index = rand()%d.X.rows; + r.X.vals[i] = d.X.vals[index]; + r.y.vals[i] = d.y.vals[index]; + } + return r; +} + +data *split_data(data d, int part, int total) +{ + data *split = calloc(2, sizeof(data)); + int i; + int start = part*d.X.rows/total; + int end = (part+1)*d.X.rows/total; + data train; + data test; + train.shallow = test.shallow = 1; + + test.X.rows = test.y.rows = end-start; + train.X.rows = train.y.rows = d.X.rows - (end-start); + train.X.cols = test.X.cols = d.X.cols; + train.y.cols = test.y.cols = d.y.cols; + + train.X.vals = calloc(train.X.rows, sizeof(float*)); + test.X.vals = calloc(test.X.rows, sizeof(float*)); + train.y.vals = calloc(train.y.rows, sizeof(float*)); + test.y.vals = calloc(test.y.rows, sizeof(float*)); + + for(i = 0; i < start; ++i){ + train.X.vals[i] = d.X.vals[i]; + train.y.vals[i] = d.y.vals[i]; + } + for(i = start; i < end; ++i){ + test.X.vals[i-start] = d.X.vals[i]; + test.y.vals[i-start] = d.y.vals[i]; + } + for(i = end; i < d.X.rows; ++i){ + train.X.vals[i-(end-start)] = d.X.vals[i]; + train.y.vals[i-(end-start)] = d.y.vals[i]; + } + split[0] = train; + split[1] = test; + return split; +} + diff --git a/workloads/realworld/uvm/darknet/src/data.h b/workloads/realworld/uvm/darknet/src/data.h new file mode 100644 index 0000000000000000000000000000000000000000..781906f8743c7d88c0fa134403d0ae020b544053 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/data.h @@ -0,0 +1,50 @@ +#ifndef DATA_H +#define DATA_H +#include + +#include "darknet.h" +#include "matrix.h" +#include "list.h" +#include "image.h" +#include "tree.h" + +static inline float distance_from_edge(int x, int max) +{ + int dx = (max/2) - x; + if (dx < 0) dx = -dx; + dx = (max/2) + 1 - dx; + dx *= 2; + float dist = (float)dx/max; + if (dist > 1) dist = 1; + return dist; +} +void load_data_blocking(load_args args); + + +void print_letters(float *pred, int n); +data load_data_captcha(char **paths, int n, int m, int k, int w, int h); +data load_data_captcha_encode(char **paths, int n, int m, int w, int h); +data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure); +data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); +matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); +data load_data_super(char **paths, int n, int m, int w, int h, int scale); +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); +data load_data_regression(char **paths, int n, int m, int classes, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); +data load_go(char *filename); + + +data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h); + +void get_random_batch(data d, int n, float *X, float *y); +data get_data_part(data d, int part, int total); +data get_random_data(data d, int num); +data load_categorical_data_csv(char *filename, int target, int k); +void normalize_data_rows(data d); +void scale_data_rows(data d, float s); +void translate_data_rows(data d, float s); +void randomize_data(data d); +data *split_data(data d, int part, int total); +data concat_datas(data *d, int n); +void fill_truth(char *path, char **labels, int k, float *truth); + +#endif diff --git a/workloads/realworld/uvm/darknet/src/deconvolutional_kernels.cu b/workloads/realworld/uvm/darknet/src/deconvolutional_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..ed12e7a3dc5148b1cbff746f13901a9653bc0f6d --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/deconvolutional_kernels.cu @@ -0,0 +1,139 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "convolutional_layer.h" +#include "deconvolutional_layer.h" +#include "batchnorm_layer.h" +#include "gemm.h" +#include "blas.h" +#include "im2col.h" +#include "col2im.h" +#include "utils.h" +#include "cuda_dark.h" +} + +extern "C" void forward_deconvolutional_layer_gpu(layer l, network net) +{ + int i; + + int m = l.size*l.size*l.n; + int n = l.h*l.w; + int k = l.c; + + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + + for(i = 0; i < l.batch; ++i){ + float *a = l.weights_gpu; + float *b = net.input_gpu + i*l.c*l.h*l.w; + float *c = net.workspace; + + gemm_gpu(1,0,m,n,k,1,a,m,b,n,0,c,n); + + col2im_gpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output_gpu+i*l.outputs); + } + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); + } + activate_array_gpu(l.output_gpu, l.batch*l.n*l.out_w*l.out_h, l.activation); +} + +extern "C" void backward_deconvolutional_layer_gpu(layer l, network net) +{ + int i; + + //constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); + } + + //if(net.delta_gpu) memset(net.delta_gpu, 0, l.batch*l.h*l.w*l.c*sizeof(float)); + + for(i = 0; i < l.batch; ++i){ + int m = l.c; + int n = l.size*l.size*l.n; + int k = l.h*l.w; + + float *a = net.input_gpu + i*m*k; + float *b = net.workspace; + float *c = l.weight_updates_gpu; + + im2col_gpu(l.delta_gpu + i*l.outputs, l.out_c, l.out_h, l.out_w, + l.size, l.stride, l.pad, b); + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if(net.delta_gpu){ + int m = l.c; + int n = l.h*l.w; + int k = l.size*l.size*l.n; + + float *a = l.weights_gpu; + float *b = net.workspace; + float *c = net.delta_gpu + i*n*m; + + gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +} + +extern "C" void pull_deconvolutional_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size); + cuda_pull_array(l.biases_gpu, l.biases, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.n); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +extern "C" void push_deconvolutional_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size); + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.n); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void update_deconvolutional_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + } + }else{ + axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1); + + axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.n, momentum, l.bias_updates_gpu, 1); + + if(l.scales_gpu){ + axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.n, momentum, l.scale_updates_gpu, 1); + } + } +} + diff --git a/workloads/realworld/uvm/darknet/src/deconvolutional_layer.c b/workloads/realworld/uvm/darknet/src/deconvolutional_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..00c0e85771d42f99de969f9fd03e5f0f359d405c --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/deconvolutional_layer.c @@ -0,0 +1,312 @@ +#include "deconvolutional_layer.h" +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "utils.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" + +#include +#include + + +static size_t get_workspace_size(layer l){ + return (size_t)l.h*l.w*l.size*l.size*l.n*sizeof(float); +} + +void bilinear_init(layer l) +{ + int i,j,f; + float center = (l.size-1) / 2.; + for(f = 0; f < l.n; ++f){ + for(j = 0; j < l.size; ++j){ + for(i = 0; i < l.size; ++i){ + float val = (1 - fabs(i - center)) * (1 - fabs(j - center)); + int c = f%l.c; + int ind = f*l.size*l.size*l.c + c*l.size*l.size + j*l.size + i; + l.weights[ind] = val; + } + } + } +} + + +layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam) +{ + int i; + layer l = {0}; + l.type = DECONVOLUTIONAL; + + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.batch = batch; + l.stride = stride; + l.size = size; + + l.nweights = c*n*size*size; + l.nbiases = n; + + l.weights = calloc(c*n*size*size, sizeof(float)); + l.weight_updates = calloc(c*n*size*size, sizeof(float)); + + l.biases = calloc(n, sizeof(float)); + l.bias_updates = calloc(n, sizeof(float)); + //float scale = n/(size*size*c); + //printf("scale: %f\n", scale); + float scale = .02; + for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal(); + //bilinear_init(l); + for(i = 0; i < n; ++i){ + l.biases[i] = 0; + } + l.pad = padding; + + l.out_h = (l.h - 1) * l.stride + l.size - 2*l.pad; + l.out_w = (l.w - 1) * l.stride + l.size - 2*l.pad; + l.out_c = n; + l.outputs = l.out_w * l.out_h * l.out_c; + l.inputs = l.w * l.h * l.c; + + scal_cpu(l.nweights, (float)l.out_w*l.out_h/(l.w*l.h), l.weights, 1); + + l.output = calloc(l.batch*l.outputs, sizeof(float)); + l.delta = calloc(l.batch*l.outputs, sizeof(float)); + + l.forward = forward_deconvolutional_layer; + l.backward = backward_deconvolutional_layer; + l.update = update_deconvolutional_layer; + + l.batch_normalize = batch_normalize; + + if(batch_normalize){ + l.scales = calloc(n, sizeof(float)); + l.scale_updates = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(n, sizeof(float)); + l.variance = calloc(n, sizeof(float)); + + l.mean_delta = calloc(n, sizeof(float)); + l.variance_delta = calloc(n, sizeof(float)); + + l.rolling_mean = calloc(n, sizeof(float)); + l.rolling_variance = calloc(n, sizeof(float)); + l.x = calloc(l.batch*l.outputs, sizeof(float)); + l.x_norm = calloc(l.batch*l.outputs, sizeof(float)); + } + if(adam){ + l.m = calloc(c*n*size*size, sizeof(float)); + l.v = calloc(c*n*size*size, sizeof(float)); + l.bias_m = calloc(n, sizeof(float)); + l.scale_m = calloc(n, sizeof(float)); + l.bias_v = calloc(n, sizeof(float)); + l.scale_v = calloc(n, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_deconvolutional_layer_gpu; + l.backward_gpu = backward_deconvolutional_layer_gpu; + l.update_gpu = update_deconvolutional_layer_gpu; + + if(gpu_index >= 0){ + + if (adam) { + l.m_gpu = cuda_make_array(l.m, c*n*size*size); + l.v_gpu = cuda_make_array(l.v, c*n*size*size); + l.bias_m_gpu = cuda_make_array(l.bias_m, n); + l.bias_v_gpu = cuda_make_array(l.bias_v, n); + l.scale_m_gpu = cuda_make_array(l.scale_m, n); + l.scale_v_gpu = cuda_make_array(l.scale_v, n); + } + l.weights_gpu = cuda_make_array(l.weights, c*n*size*size); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size); + + l.biases_gpu = cuda_make_array(l.biases, n); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*l.out_h*l.out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n); + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(0, n); + l.variance_gpu = cuda_make_array(0, n); + + l.rolling_mean_gpu = cuda_make_array(0, n); + l.rolling_variance_gpu = cuda_make_array(0, n); + + l.mean_delta_gpu = cuda_make_array(0, n); + l.variance_delta_gpu = cuda_make_array(0, n); + + l.scales_gpu = cuda_make_array(l.scales, n); + l.scale_updates_gpu = cuda_make_array(0, n); + + l.x_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n); + l.x_norm_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n); + } + } + #ifdef CUDNN + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); + #endif +#endif + + l.activation = activation; + l.workspace_size = get_workspace_size(l); + + fprintf(stderr, "deconv%5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); + + return l; +} + +void denormalize_deconvolutional_layer(layer l) +{ + int i, j; + for(i = 0; i < l.n; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001); + for(j = 0; j < l.c*l.size*l.size; ++j){ + l.weights[i*l.c*l.size*l.size + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + +void resize_deconvolutional_layer(layer *l, int h, int w) +{ + l->h = h; + l->w = w; + l->out_h = (l->h - 1) * l->stride + l->size - 2*l->pad; + l->out_w = (l->w - 1) * l->stride + l->size - 2*l->pad; + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->w * l->h * l->c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + if(l->batch_normalize){ + l->x = realloc(l->x, l->batch*l->outputs*sizeof(float)); + l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float)); + } + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); + + if(l->batch_normalize){ + cuda_free(l->x_gpu); + cuda_free(l->x_norm_gpu); + + l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); + } + #ifdef CUDNN + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + #endif +#endif + l->workspace_size = get_workspace_size(*l); +} + +void forward_deconvolutional_layer(const layer l, network net) +{ + int i; + + int m = l.size*l.size*l.n; + int n = l.h*l.w; + int k = l.c; + + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + + for(i = 0; i < l.batch; ++i){ + float *a = l.weights; + float *b = net.input + i*l.c*l.h*l.w; + float *c = net.workspace; + + gemm_cpu(1,0,m,n,k,1,a,m,b,n,0,c,n); + + col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output+i*l.outputs); + } + if (l.batch_normalize) { + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.n, l.out_w*l.out_h); + } + activate_array(l.output, l.batch*l.n*l.out_w*l.out_h, l.activation); +} + +void backward_deconvolutional_layer(layer l, network net) +{ + int i; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h); + } + + //if(net.delta) memset(net.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float)); + + for(i = 0; i < l.batch; ++i){ + int m = l.c; + int n = l.size*l.size*l.n; + int k = l.h*l.w; + + float *a = net.input + i*m*k; + float *b = net.workspace; + float *c = l.weight_updates; + + im2col_cpu(l.delta + i*l.outputs, l.out_c, l.out_h, l.out_w, + l.size, l.stride, l.pad, b); + gemm_cpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if(net.delta){ + int m = l.c; + int n = l.h*l.w; + int k = l.size*l.size*l.n; + + float *a = l.weights; + float *b = net.workspace; + float *c = net.delta + i*n*m; + + gemm_cpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +} + +void update_deconvolutional_layer(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int size = l.size*l.size*l.c*l.n; + axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.n, momentum, l.bias_updates, 1); + + if(l.scales){ + axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.n, momentum, l.scale_updates, 1); + } + + axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(size, momentum, l.weight_updates, 1); +} + + + diff --git a/workloads/realworld/uvm/darknet/src/deconvolutional_layer.h b/workloads/realworld/uvm/darknet/src/deconvolutional_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..722a1a58feec4ef13dac2b811df98e3f9960d4ef --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/deconvolutional_layer.h @@ -0,0 +1,25 @@ +#ifndef DECONVOLUTIONAL_LAYER_H +#define DECONVOLUTIONAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +#ifdef GPU +void forward_deconvolutional_layer_gpu(layer l, network net); +void backward_deconvolutional_layer_gpu(layer l, network net); +void update_deconvolutional_layer_gpu(layer l, update_args a); +void push_deconvolutional_layer(layer l); +void pull_deconvolutional_layer(layer l); +#endif + +layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam); +void resize_deconvolutional_layer(layer *l, int h, int w); +void forward_deconvolutional_layer(const layer l, network net); +void update_deconvolutional_layer(layer l, update_args a); +void backward_deconvolutional_layer(layer l, network net); + +#endif + diff --git a/workloads/realworld/uvm/darknet/src/demo.c b/workloads/realworld/uvm/darknet/src/demo.c new file mode 100644 index 0000000000000000000000000000000000000000..b89efb8dc4c044c0240b7442e39222405409a676 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/demo.c @@ -0,0 +1,349 @@ +#include "network.h" +#include "detection_layer.h" +#include "region_layer.h" +#include "cost_layer.h" +#include "utils.h" +#include "parser.h" +#include "box.h" +#include "image.h" +#include "demo.h" +#include + +#define DEMO 1 + +#ifdef OPENCV + +static char **demo_names; +static image **demo_alphabet; +static int demo_classes; + +static network *net; +static image buff [3]; +static image buff_letter[3]; +static int buff_index = 0; +static void * cap; +static float fps = 0; +static float demo_thresh = 0; +static float demo_hier = .5; +static int running = 0; + +static int demo_frame = 3; +static int demo_index = 0; +static float **predictions; +static float *avg; +static int demo_done = 0; +static int demo_total = 0; +double demo_time; + +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num); + +int size_network(network *net) +{ + int i; + int count = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + count += l.outputs; + } + } + return count; +} + +void remember_network(network *net) +{ + int i; + int count = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + memcpy(predictions[demo_index] + count, net->layers[i].output, sizeof(float) * l.outputs); + count += l.outputs; + } + } +} + +detection *avg_predictions(network *net, int *nboxes) +{ + int i, j; + int count = 0; + fill_cpu(demo_total, 0, avg, 1); + for(j = 0; j < demo_frame; ++j){ + axpy_cpu(demo_total, 1./demo_frame, predictions[j], 1, avg, 1); + } + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + memcpy(l.output, avg + count, sizeof(float) * l.outputs); + count += l.outputs; + } + } + detection *dets = get_network_boxes(net, buff[0].w, buff[0].h, demo_thresh, demo_hier, 0, 1, nboxes); + return dets; +} + +void *detect_in_thread(void *ptr) +{ + running = 1; + float nms = .4; + + layer l = net->layers[net->n-1]; + float *X = buff_letter[(buff_index+2)%3].data; + network_predict(net, X); + + /* + if(l.type == DETECTION){ + get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0); + } else */ + remember_network(net); + detection *dets = 0; + int nboxes = 0; + dets = avg_predictions(net, &nboxes); + + + /* + int i,j; + box zero = {0}; + int classes = l.classes; + for(i = 0; i < demo_detections; ++i){ + avg[i].objectness = 0; + avg[i].bbox = zero; + memset(avg[i].prob, 0, classes*sizeof(float)); + for(j = 0; j < demo_frame; ++j){ + axpy_cpu(classes, 1./demo_frame, dets[j][i].prob, 1, avg[i].prob, 1); + avg[i].objectness += dets[j][i].objectness * 1./demo_frame; + avg[i].bbox.x += dets[j][i].bbox.x * 1./demo_frame; + avg[i].bbox.y += dets[j][i].bbox.y * 1./demo_frame; + avg[i].bbox.w += dets[j][i].bbox.w * 1./demo_frame; + avg[i].bbox.h += dets[j][i].bbox.h * 1./demo_frame; + } + //copy_cpu(classes, dets[0][i].prob, 1, avg[i].prob, 1); + //avg[i].objectness = dets[0][i].objectness; + } + */ + + if (nms > 0) do_nms_obj(dets, nboxes, l.classes, nms); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.1f\n",fps); + printf("Objects:\n\n"); + image display = buff[(buff_index+2) % 3]; + draw_detections(display, dets, nboxes, demo_thresh, demo_names, demo_alphabet, demo_classes); + free_detections(dets, nboxes); + + demo_index = (demo_index + 1)%demo_frame; + running = 0; + return 0; +} + +void *fetch_in_thread(void *ptr) +{ + free_image(buff[buff_index]); + buff[buff_index] = get_image_from_stream(cap); + if(buff[buff_index].data == 0) { + demo_done = 1; + return 0; + } + letterbox_image_into(buff[buff_index], net->w, net->h, buff_letter[buff_index]); + return 0; +} + +void *display_in_thread(void *ptr) +{ + int c = show_image(buff[(buff_index + 1)%3], "Demo", 1); + if (c != -1) c = c%256; + if (c == 27) { + demo_done = 1; + return 0; + } else if (c == 82) { + demo_thresh += .02; + } else if (c == 84) { + demo_thresh -= .02; + if(demo_thresh <= .02) demo_thresh = .02; + } else if (c == 83) { + demo_hier += .02; + } else if (c == 81) { + demo_hier -= .02; + if(demo_hier <= .0) demo_hier = .0; + } + return 0; +} + +void *display_loop(void *ptr) +{ + while(1){ + display_in_thread(0); + } +} + +void *detect_loop(void *ptr) +{ + while(1){ + detect_in_thread(0); + } +} + +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) +{ + //demo_frame = avg_frames; + image **alphabet = load_alphabet(); + demo_names = names; + demo_alphabet = alphabet; + demo_classes = classes; + demo_thresh = thresh; + demo_hier = hier; + printf("Demo\n"); + net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + pthread_t detect_thread; + pthread_t fetch_thread; + + srand(2222222); + + int i; + demo_total = size_network(net); + predictions = calloc(demo_frame, sizeof(float*)); + for (i = 0; i < demo_frame; ++i){ + predictions[i] = calloc(demo_total, sizeof(float)); + } + avg = calloc(demo_total, sizeof(float)); + + if(filename){ + printf("video file: %s\n", filename); + cap = open_video_stream(filename, 0, 0, 0, 0); + }else{ + cap = open_video_stream(0, cam_index, w, h, frames); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + + buff[0] = get_image_from_stream(cap); + buff[1] = copy_image(buff[0]); + buff[2] = copy_image(buff[0]); + buff_letter[0] = letterbox_image(buff[0], net->w, net->h); + buff_letter[1] = letterbox_image(buff[0], net->w, net->h); + buff_letter[2] = letterbox_image(buff[0], net->w, net->h); + + int count = 0; + if(!prefix){ + make_window("Demo", 1352, 1013, fullscreen); + } + + demo_time = what_time_is_it_now(); + + while(!demo_done){ + buff_index = (buff_index + 1) %3; + if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); + if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); + if(!prefix){ + fps = 1./(what_time_is_it_now() - demo_time); + demo_time = what_time_is_it_now(); + display_in_thread(0); + }else{ + char name[256]; + sprintf(name, "%s_%08d", prefix, count); + save_image(buff[(buff_index + 1)%3], name); + } + pthread_join(fetch_thread, 0); + pthread_join(detect_thread, 0); + ++count; + } +} + +/* + void demo_compare(char *cfg1, char *weight1, char *cfg2, char *weight2, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) + { + demo_frame = avg_frames; + predictions = calloc(demo_frame, sizeof(float*)); + image **alphabet = load_alphabet(); + demo_names = names; + demo_alphabet = alphabet; + demo_classes = classes; + demo_thresh = thresh; + demo_hier = hier; + printf("Demo\n"); + net = load_network(cfg1, weight1, 0); + set_batch_network(net, 1); + pthread_t detect_thread; + pthread_t fetch_thread; + + srand(2222222); + + if(filename){ + printf("video file: %s\n", filename); + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + if(frames){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FPS, frames); + } + } + + if(!cap) error("Couldn't connect to webcam.\n"); + + layer l = net->layers[net->n-1]; + demo_detections = l.n*l.w*l.h; + int j; + + avg = (float *) calloc(l.outputs, sizeof(float)); + for(j = 0; j < demo_frame; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float)); + + boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box)); + probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *)); + for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes+1, sizeof(float)); + + buff[0] = get_image_from_stream(cap); + buff[1] = copy_image(buff[0]); + buff[2] = copy_image(buff[0]); + buff_letter[0] = letterbox_image(buff[0], net->w, net->h); + buff_letter[1] = letterbox_image(buff[0], net->w, net->h); + buff_letter[2] = letterbox_image(buff[0], net->w, net->h); + ipl = cvCreateImage(cvSize(buff[0].w,buff[0].h), IPL_DEPTH_8U, buff[0].c); + + int count = 0; + if(!prefix){ + cvNamedWindow("Demo", CV_WINDOW_NORMAL); + if(fullscreen){ + cvSetWindowProperty("Demo", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); + } else { + cvMoveWindow("Demo", 0, 0); + cvResizeWindow("Demo", 1352, 1013); + } + } + + demo_time = what_time_is_it_now(); + + while(!demo_done){ +buff_index = (buff_index + 1) %3; +if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); +if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); +if(!prefix){ + fps = 1./(what_time_is_it_now() - demo_time); + demo_time = what_time_is_it_now(); + display_in_thread(0); +}else{ + char name[256]; + sprintf(name, "%s_%08d", prefix, count); + save_image(buff[(buff_index + 1)%3], name); +} +pthread_join(fetch_thread, 0); +pthread_join(detect_thread, 0); +++count; +} +} +*/ +#else +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg, float hier, int w, int h, int frames, int fullscreen) +{ + fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); +} +#endif + diff --git a/workloads/realworld/uvm/darknet/src/demo.h b/workloads/realworld/uvm/darknet/src/demo.h new file mode 100644 index 0000000000000000000000000000000000000000..86e46541d1a7473b22373b29bc6ff9cc281d4939 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/demo.h @@ -0,0 +1,6 @@ +#ifndef DEMO_H +#define DEMO_H + +#include "image.h" + +#endif diff --git a/workloads/realworld/uvm/darknet/src/detection_layer.c b/workloads/realworld/uvm/darknet/src/detection_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..565fa3c3f7d123736d65661d3be8ea91e26b3d5c --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/detection_layer.c @@ -0,0 +1,275 @@ +#include "detection_layer.h" +#include "activations.h" +#include "softmax_layer.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +detection_layer make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore) +{ + detection_layer l = {0}; + l.type = DETECTION; + + l.n = n; + l.batch = batch; + l.inputs = inputs; + l.classes = classes; + l.coords = coords; + l.rescore = rescore; + l.side = side; + l.w = side; + l.h = side; + assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs); + l.cost = calloc(1, sizeof(float)); + l.outputs = l.inputs; + l.truths = l.side*l.side*(1+l.coords+l.classes); + l.output = calloc(batch*l.outputs, sizeof(float)); + l.delta = calloc(batch*l.outputs, sizeof(float)); + + l.forward = forward_detection_layer; + l.backward = backward_detection_layer; +#ifdef GPU + l.forward_gpu = forward_detection_layer_gpu; + l.backward_gpu = backward_detection_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "Detection Layer\n"); + srand(0); + + return l; +} + +void forward_detection_layer(const detection_layer l, network net) +{ + int locations = l.side*l.side; + int i,j; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + //if(l.reorg) reorg(l.output, l.w*l.h, size*l.n, l.batch, 1); + int b; + if (l.softmax){ + for(b = 0; b < l.batch; ++b){ + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + int offset = i*l.classes; + softmax(l.output + index + offset, l.classes, 1, 1, + l.output + index + offset); + } + } + } + if(net.train){ + float avg_iou = 0; + float avg_cat = 0; + float avg_allcat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + *(l.cost) = 0; + int size = l.inputs * l.batch; + memset(l.delta, 0, size * sizeof(float)); + for (b = 0; b < l.batch; ++b){ + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + int truth_index = (b*locations + i)*(1+l.coords+l.classes); + int is_obj = net.truth[truth_index]; + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + l.delta[p_index] = l.noobject_scale*(0 - l.output[p_index]); + *(l.cost) += l.noobject_scale*pow(l.output[p_index], 2); + avg_anyobj += l.output[p_index]; + } + + int best_index = -1; + float best_iou = 0; + float best_rmse = 20; + + if (!is_obj){ + continue; + } + + int class_index = index + i*l.classes; + for(j = 0; j < l.classes; ++j) { + l.delta[class_index+j] = l.class_scale * (net.truth[truth_index+1+j] - l.output[class_index+j]); + *(l.cost) += l.class_scale * pow(net.truth[truth_index+1+j] - l.output[class_index+j], 2); + if(net.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j]; + avg_allcat += l.output[class_index+j]; + } + + box truth = float_to_box(net.truth + truth_index + 1 + l.classes, 1); + truth.x /= l.side; + truth.y /= l.side; + + for(j = 0; j < l.n; ++j){ + int box_index = index + locations*(l.classes + l.n) + (i*l.n + j) * l.coords; + box out = float_to_box(l.output + box_index, 1); + out.x /= l.side; + out.y /= l.side; + + if (l.sqrt){ + out.w = out.w*out.w; + out.h = out.h*out.h; + } + + float iou = box_iou(out, truth); + //iou = 0; + float rmse = box_rmse(out, truth); + if(best_iou > 0 || iou > 0){ + if(iou > best_iou){ + best_iou = iou; + best_index = j; + } + }else{ + if(rmse < best_rmse){ + best_rmse = rmse; + best_index = j; + } + } + } + + if(l.forced){ + if(truth.w*truth.h < .1){ + best_index = 1; + }else{ + best_index = 0; + } + } + if(l.random && *(net.seen) < 64000){ + best_index = rand()%l.n; + } + + int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords; + int tbox_index = truth_index + 1 + l.classes; + + box out = float_to_box(l.output + box_index, 1); + out.x /= l.side; + out.y /= l.side; + if (l.sqrt) { + out.w = out.w*out.w; + out.h = out.h*out.h; + } + float iou = box_iou(out, truth); + + //printf("%d,", best_index); + int p_index = index + locations*l.classes + i*l.n + best_index; + *(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2); + *(l.cost) += l.object_scale * pow(1-l.output[p_index], 2); + avg_obj += l.output[p_index]; + l.delta[p_index] = l.object_scale * (1.-l.output[p_index]); + + if(l.rescore){ + l.delta[p_index] = l.object_scale * (iou - l.output[p_index]); + } + + l.delta[box_index+0] = l.coord_scale*(net.truth[tbox_index + 0] - l.output[box_index + 0]); + l.delta[box_index+1] = l.coord_scale*(net.truth[tbox_index + 1] - l.output[box_index + 1]); + l.delta[box_index+2] = l.coord_scale*(net.truth[tbox_index + 2] - l.output[box_index + 2]); + l.delta[box_index+3] = l.coord_scale*(net.truth[tbox_index + 3] - l.output[box_index + 3]); + if(l.sqrt){ + l.delta[box_index+2] = l.coord_scale*(sqrt(net.truth[tbox_index + 2]) - l.output[box_index + 2]); + l.delta[box_index+3] = l.coord_scale*(sqrt(net.truth[tbox_index + 3]) - l.output[box_index + 3]); + } + + *(l.cost) += pow(1-iou, 2); + avg_iou += iou; + ++count; + } + } + + if(0){ + float *costs = calloc(l.batch*locations*l.n, sizeof(float)); + for (b = 0; b < l.batch; ++b) { + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + costs[b*locations*l.n + i*l.n + j] = l.delta[p_index]*l.delta[p_index]; + } + } + } + int indexes[100]; + top_k(costs, l.batch*locations*l.n, 100, indexes); + float cutoff = costs[indexes[99]]; + for (b = 0; b < l.batch; ++b) { + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + if (l.delta[p_index]*l.delta[p_index] < cutoff) l.delta[p_index] = 0; + } + } + } + free(costs); + } + + + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + + + printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count); + //if(l.reorg) reorg(l.delta, l.w*l.h, size*l.n, l.batch, 0); + } +} + +void backward_detection_layer(const detection_layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +void get_detection_detections(layer l, int w, int h, float thresh, detection *dets) +{ + int i,j,n; + float *predictions = l.output; + //int per_cell = 5*num+classes; + for (i = 0; i < l.side*l.side; ++i){ + int row = i / l.side; + int col = i % l.side; + for(n = 0; n < l.n; ++n){ + int index = i*l.n + n; + int p_index = l.side*l.side*l.classes + i*l.n + n; + float scale = predictions[p_index]; + int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n)*4; + box b; + b.x = (predictions[box_index + 0] + col) / l.side * w; + b.y = (predictions[box_index + 1] + row) / l.side * h; + b.w = pow(predictions[box_index + 2], (l.sqrt?2:1)) * w; + b.h = pow(predictions[box_index + 3], (l.sqrt?2:1)) * h; + dets[index].bbox = b; + dets[index].objectness = scale; + for(j = 0; j < l.classes; ++j){ + int class_index = i*l.classes; + float prob = scale*predictions[class_index+j]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } + } +} + +#ifdef GPU + +void forward_detection_layer_gpu(const detection_layer l, network net) +{ + if(!net.train){ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + return; + } + + cuda_pull_array(net.input_gpu, net.input, l.batch*l.inputs); + forward_detection_layer(l, net); + cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +void backward_detection_layer_gpu(detection_layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); + //copy_gpu(l.batch*l.inputs, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/uvm/darknet/src/detection_layer.h b/workloads/realworld/uvm/darknet/src/detection_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1c818535700c770c7a5d9387534b199b58876198 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/detection_layer.h @@ -0,0 +1,18 @@ +#ifndef DETECTION_LAYER_H +#define DETECTION_LAYER_H + +#include "layer.h" +#include "network.h" + +typedef layer detection_layer; + +detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore); +void forward_detection_layer(const detection_layer l, network net); +void backward_detection_layer(const detection_layer l, network net); + +#ifdef GPU +void forward_detection_layer_gpu(const detection_layer l, network net); +void backward_detection_layer_gpu(detection_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm/darknet/src/dropout_layer.c b/workloads/realworld/uvm/darknet/src/dropout_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..8fefa22caeddd174b2a7010274fadae854c742c1 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/dropout_layer.c @@ -0,0 +1,60 @@ +#include "dropout_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include +#include + +dropout_layer make_dropout_layer(int batch, int inputs, float probability) +{ + dropout_layer l = {0}; + l.type = DROPOUT; + l.probability = probability; + l.inputs = inputs; + l.outputs = inputs; + l.batch = batch; + l.rand = calloc(inputs*batch, sizeof(float)); + l.scale = 1./(1.-probability); + l.forward = forward_dropout_layer; + l.backward = backward_dropout_layer; + #ifdef GPU + l.forward_gpu = forward_dropout_layer_gpu; + l.backward_gpu = backward_dropout_layer_gpu; + l.rand_gpu = cuda_make_array(l.rand, inputs*batch); + #endif + fprintf(stderr, "dropout p = %.2f %4d -> %4d\n", probability, inputs, inputs); + return l; +} + +void resize_dropout_layer(dropout_layer *l, int inputs) +{ + l->rand = realloc(l->rand, l->inputs*l->batch*sizeof(float)); + #ifdef GPU + cuda_free(l->rand_gpu); + + l->rand_gpu = cuda_make_array(l->rand, inputs*l->batch); + #endif +} + +void forward_dropout_layer(dropout_layer l, network net) +{ + int i; + if (!net.train) return; + for(i = 0; i < l.batch * l.inputs; ++i){ + float r = rand_uniform(0, 1); + l.rand[i] = r; + if(r < l.probability) net.input[i] = 0; + else net.input[i] *= l.scale; + } +} + +void backward_dropout_layer(dropout_layer l, network net) +{ + int i; + if(!net.delta) return; + for(i = 0; i < l.batch * l.inputs; ++i){ + float r = l.rand[i]; + if(r < l.probability) net.delta[i] = 0; + else net.delta[i] *= l.scale; + } +} + diff --git a/workloads/realworld/uvm/darknet/src/dropout_layer.h b/workloads/realworld/uvm/darknet/src/dropout_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..01f94d4d7d10b732fb0e558089579e95128a70bd --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/dropout_layer.h @@ -0,0 +1,20 @@ +#ifndef DROPOUT_LAYER_H +#define DROPOUT_LAYER_H + +#include "layer.h" +#include "network.h" + +typedef layer dropout_layer; + +dropout_layer make_dropout_layer(int batch, int inputs, float probability); + +void forward_dropout_layer(dropout_layer l, network net); +void backward_dropout_layer(dropout_layer l, network net); +void resize_dropout_layer(dropout_layer *l, int inputs); + +#ifdef GPU +void forward_dropout_layer_gpu(dropout_layer l, network net); +void backward_dropout_layer_gpu(dropout_layer l, network net); + +#endif +#endif diff --git a/workloads/realworld/uvm/darknet/src/dropout_layer_kernels.cu b/workloads/realworld/uvm/darknet/src/dropout_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..23aef8f12ffc390504e05f0839878f7787a5381f --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/dropout_layer_kernels.cu @@ -0,0 +1,41 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "dropout_layer.h" +#include "cuda_dark.h" +#include "utils.h" +} + +__global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale; +} + +void forward_dropout_layer_gpu(dropout_layer layer, network net) +{ + if (!net.train) return; + int size = layer.inputs*layer.batch; + cuda_random(layer.rand_gpu, size); + /* + int i; + for(i = 0; i < size; ++i){ + layer.rand[i] = rand_uniform(); + } + cuda_push_array(layer.rand_gpu, layer.rand, size); + */ + + yoloswag420blazeit360noscope<<>>(net.input_gpu, size, layer.rand_gpu, layer.probability, layer.scale); + check_error(cudaPeekAtLastError()); +} + +void backward_dropout_layer_gpu(dropout_layer layer, network net) +{ + if(!net.delta_gpu) return; + int size = layer.inputs*layer.batch; + + yoloswag420blazeit360noscope<<>>(net.delta_gpu, size, layer.rand_gpu, layer.probability, layer.scale); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/uvm/darknet/src/gemm.c b/workloads/realworld/uvm/darknet/src/gemm.c new file mode 100644 index 0000000000000000000000000000000000000000..756ae595d7348fc2d343a48715b05ea882d6aa7c --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/gemm.c @@ -0,0 +1,324 @@ +#include "gemm.h" +#include "utils.h" +#include "cuda_dark.h" +#include +#include +#include + +void gemm_bin(int M, int N, int K, float ALPHA, + char *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + char A_PART = A[i*lda+k]; + if(A_PART){ + for(j = 0; j < N; ++j){ + C[i*ldc+j] += B[k*ldb+j]; + } + } else { + for(j = 0; j < N; ++j){ + C[i*ldc+j] -= B[k*ldb+j]; + } + } + } + } +} + +float *random_matrix(int rows, int cols) +{ + int i; + float *m = calloc(rows*cols, sizeof(float)); + for(i = 0; i < rows*cols; ++i){ + m[i] = (float)rand()/RAND_MAX; + } + return m; +} + +void time_random_matrix(int TA, int TB, int m, int k, int n) +{ + float *a; + if(!TA) a = random_matrix(m,k); + else a = random_matrix(k,m); + int lda = (!TA)?k:m; + float *b; + if(!TB) b = random_matrix(k,n); + else b = random_matrix(n,k); + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + int i; + clock_t start = clock(), end; + for(i = 0; i<10; ++i){ + gemm_cpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); + } + end = clock(); + printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf ms\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); + free(a); + free(b); + free(c); +} + + +void gemm(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + gemm_cpu( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); +} + +void gemm_nn(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + register float A_PART = ALPHA*A[i*lda+k]; + for(j = 0; j < N; ++j){ + C[i*ldc+j] += A_PART*B[k*ldb+j]; + } + } + } +} + +void gemm_nt(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + register float sum = 0; + for(k = 0; k < K; ++k){ + sum += ALPHA*A[i*lda+k]*B[j*ldb + k]; + } + C[i*ldc+j] += sum; + } + } +} + +void gemm_tn(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + register float A_PART = ALPHA*A[k*lda+i]; + for(j = 0; j < N; ++j){ + C[i*ldc+j] += A_PART*B[k*ldb+j]; + } + } + } +} + +void gemm_tt(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + register float sum = 0; + for(k = 0; k < K; ++k){ + sum += ALPHA*A[i+k*lda]*B[k+j*ldb]; + } + C[i*ldc+j] += sum; + } + } +} + + +void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + //printf("cpu: %d %d %d %d %d %f %d %d %f %d\n",TA, TB, M, N, K, ALPHA, lda, ldb, BETA, ldc); + int i, j; + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + C[i*ldc + j] *= BETA; + } + } + if(!TA && !TB) + gemm_nn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else if(TA && !TB) + gemm_tn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else if(!TA && TB) + gemm_nt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else + gemm_tt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); +} + + +// #ifdef GPU + +// #include + +// void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, +// float *A_gpu, int lda, +// float *B_gpu, int ldb, +// float BETA, +// float *C_gpu, int ldc) +// { +// cublasHandle_t handle = blas_handle(); +// cudaError_t status = (cudaError_t) cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N), +// (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc); +// check_error(status); +// } + +// #include +// #include +// #include +// #include + +// void time_gpu_random_matrix(int TA, int TB, int m, int k, int n) +// { +// float *a; +// if(!TA) a = random_matrix(m,k); +// else a = random_matrix(k,m); +// int lda = (!TA)?k:m; +// float *b; +// if(!TB) b = random_matrix(k,n); +// else b = random_matrix(n,k); +// int ldb = (!TB)?n:k; + +// float *c = random_matrix(m,n); +// int i; +// clock_t start = clock(), end; +// for(i = 0; i<32; ++i){ +// gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); +// } +// end = clock(); +// printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); +// free(a); +// free(b); +// free(c); +// } + +// void time_gpu(int TA, int TB, int m, int k, int n) +// { +// int iter = 10; +// float *a = random_matrix(m,k); +// float *b = random_matrix(k,n); + +// int lda = (!TA)?k:m; +// int ldb = (!TB)?n:k; + +// float *c = random_matrix(m,n); + +// float *a_cl = cuda_make_array(a, m*k); +// float *b_cl = cuda_make_array(b, k*n); +// float *c_cl = cuda_make_array(c, m*n); + +// int i; +// clock_t start = clock(), end; +// for(i = 0; i +#include +#include +#include +#include + +#include "gemm.h" +#include "utils.h" +#include "cuda_dark.h" + +#include +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK_X 16 +#define DIM_THREAD_BLOCK_Y 16 + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +__global__ void gemm_kernel(float *a, float *b, float *c, int M, int K, int N, float alpha, float beta) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // Compute each thread's global row and column index + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + + + // Statically allocated shared memory + __shared__ float s_a[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + __shared__ float s_b[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + // Accumulate in temporary variable + + float tmp = 0.0f; + if (row < M && col < N) { + + tmp = beta * c[row * N + col]; + + // Sweep tile across matrix + for (int i = 0; i < K; i += blockDim.x) { + int left = K - i; + + if ((i + threadIdx.x) < K) + s_a[threadIdx.y * blockDim.x + threadIdx.x] = a[row * K + i + threadIdx.x]; + + if ((i + threadIdx.y) < K) + s_b[threadIdx.y * blockDim.x + threadIdx.x] = b[(i + threadIdx.y) * N + col]; + + block.sync(); + + for (int k = 0; k < blockDim.x && k < left ; k++) { + tmp += alpha * s_a[threadIdx.y * blockDim.x + k] * s_b[k * blockDim.x + threadIdx.x]; + } + block.sync(); + } + c[row * N + col] = tmp; + } + block.sync(); +} + +void gemmCuda(float *A, float *B, float *C, int M, int N, int K, float alpha, float beta) +{ + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + dim3 grid((size_t)(ceil(((float)N) / ((float)block.y))), (size_t)(ceil(((float)M) / ((float)block.x)))); + + gemm_kernel<<>>(A, B, C, M, K, N, alpha, beta); + check_error(cudaPeekAtLastError()); +} + +void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, + float *A_gpu, int lda, + float *B_gpu, int ldb, + float BETA, + float *C_gpu, int ldc) +{ + // printf("TA is %d, TB is %d, M is %d, N is %d, K is %d, lda is %d, ldb is %d, ldc is %d.\n", TA, TB, M, N, K, lda, ldb, ldc); + if (TA == 0 && TB == 0) { + gemmCuda(A_gpu, B_gpu, C_gpu, M, N, K, ALPHA, BETA); + } else { + cublasHandle_t handle = blas_handle(); + cudaError_t status = (cudaError_t) cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N), + (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc); + check_error(status); + } +} + + +void time_gpu_random_matrix(int TA, int TB, int m, int k, int n) +{ + float *a; + if(!TA) a = random_matrix(m,k); + else a = random_matrix(k,m); + int lda = (!TA)?k:m; + float *b; + if(!TB) b = random_matrix(k,n); + else b = random_matrix(n,k); + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + int i; + clock_t start = clock(), end; + for(i = 0; i<32; ++i){ + gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); + } + end = clock(); + printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); + free(a); + free(b); + free(c); +} + +void time_gpu(int TA, int TB, int m, int k, int n) +{ + int iter = 10; + float *a = random_matrix(m,k); + float *b = random_matrix(k,n); + + int lda = (!TA)?k:m; + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + + float *a_cl = cuda_make_array(a, m*k); + float *b_cl = cuda_make_array(b, k*n); + float *c_cl = cuda_make_array(c, m*n); + + int i; + clock_t start = clock(), end; + for(i = 0; i +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam) +{ + fprintf(stderr, "GRU Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = GRU; + l.steps = steps; + l.inputs = inputs; + + l.uz = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uz) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uz->batch = batch; + + l.wz = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wz) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wz->batch = batch; + + l.ur = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ur) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ur->batch = batch; + + l.wr = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wr) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wr->batch = batch; + + + + l.uh = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uh) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uh->batch = batch; + + l.wh = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wh) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wh->batch = batch; + + l.batch_normalize = batch_normalize; + + + l.outputs = outputs; + l.output = calloc(outputs*batch*steps, sizeof(float)); + l.delta = calloc(outputs*batch*steps, sizeof(float)); + l.state = calloc(outputs*batch, sizeof(float)); + l.prev_state = calloc(outputs*batch, sizeof(float)); + l.forgot_state = calloc(outputs*batch, sizeof(float)); + l.forgot_delta = calloc(outputs*batch, sizeof(float)); + + l.r_cpu = calloc(outputs*batch, sizeof(float)); + l.z_cpu = calloc(outputs*batch, sizeof(float)); + l.h_cpu = calloc(outputs*batch, sizeof(float)); + + l.forward = forward_gru_layer; + l.backward = backward_gru_layer; + l.update = update_gru_layer; + +#ifdef GPU + l.forward_gpu = forward_gru_layer_gpu; + l.backward_gpu = backward_gru_layer_gpu; + l.update_gpu = update_gru_layer_gpu; + + l.forgot_state_gpu = cuda_make_array(0, batch*outputs); + l.forgot_delta_gpu = cuda_make_array(0, batch*outputs); + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.state_gpu = cuda_make_array(0, batch*outputs); + l.output_gpu = cuda_make_array(0, batch*outputs*steps); + l.delta_gpu = cuda_make_array(0, batch*outputs*steps); + l.r_gpu = cuda_make_array(0, batch*outputs); + l.z_gpu = cuda_make_array(0, batch*outputs); + l.h_gpu = cuda_make_array(0, batch*outputs); + +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.uz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uz->out_c, l.uz->out_h, l.uz->out_w); + cudnnSetTensor4dDescriptor(l.uh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uh->out_c, l.uh->out_h, l.uh->out_w); + cudnnSetTensor4dDescriptor(l.ur->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ur->out_c, l.ur->out_h, l.ur->out_w); + cudnnSetTensor4dDescriptor(l.wz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wz->out_c, l.wz->out_h, l.wz->out_w); + cudnnSetTensor4dDescriptor(l.wh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wh->out_c, l.wh->out_h, l.wh->out_w); + cudnnSetTensor4dDescriptor(l.wr->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wr->out_c, l.wr->out_h, l.wr->out_w); +#endif +#endif + + return l; +} + +void update_gru_layer(layer l, update_args a) +{ + update_connected_layer(*(l.ur), a); + update_connected_layer(*(l.uz), a); + update_connected_layer(*(l.uh), a); + update_connected_layer(*(l.wr), a); + update_connected_layer(*(l.wz), a); + update_connected_layer(*(l.wh), a); +} + +void forward_gru_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + fill_cpu(l.outputs * l.batch * l.steps, 0, uz.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ur.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, uh.delta, 1); + + fill_cpu(l.outputs * l.batch * l.steps, 0, wz.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wr.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wh.delta, 1); + if(net.train) { + fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); + copy_cpu(l.outputs*l.batch, l.state, 1, l.prev_state, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input = l.state; + forward_connected_layer(wz, s); + forward_connected_layer(wr, s); + + s.input = net.input; + forward_connected_layer(uz, s); + forward_connected_layer(ur, s); + forward_connected_layer(uh, s); + + + copy_cpu(l.outputs*l.batch, uz.output, 1, l.z_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wz.output, 1, l.z_cpu, 1); + + copy_cpu(l.outputs*l.batch, ur.output, 1, l.r_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wr.output, 1, l.r_cpu, 1); + + activate_array(l.z_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.r_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.state, 1, l.forgot_state, 1); + mul_cpu(l.outputs*l.batch, l.r_cpu, 1, l.forgot_state, 1); + + s.input = l.forgot_state; + forward_connected_layer(wh, s); + + copy_cpu(l.outputs*l.batch, uh.output, 1, l.h_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wh.output, 1, l.h_cpu, 1); + + if(l.tanh){ + activate_array(l.h_cpu, l.outputs*l.batch, TANH); + } else { + activate_array(l.h_cpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_sum_cpu(l.state, l.h_cpu, l.z_cpu, l.outputs*l.batch, l.output); + + copy_cpu(l.outputs*l.batch, l.output, 1, l.state, 1); + + net.input += l.inputs*l.batch; + l.output += l.outputs*l.batch; + increment_layer(&uz, 1); + increment_layer(&ur, 1); + increment_layer(&uh, 1); + + increment_layer(&wz, 1); + increment_layer(&wr, 1); + increment_layer(&wh, 1); + } +} + +void backward_gru_layer(layer l, network net) +{ +} + +#ifdef GPU + +void pull_gru_layer(layer l) +{ +} + +void push_gru_layer(layer l) +{ +} + +void update_gru_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.ur), a); + update_connected_layer_gpu(*(l.uz), a); + update_connected_layer_gpu(*(l.uh), a); + update_connected_layer_gpu(*(l.wr), a); + update_connected_layer_gpu(*(l.wz), a); + update_connected_layer_gpu(*(l.wh), a); +} + +void forward_gru_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + fill_gpu(l.outputs * l.batch * l.steps, 0, uz.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ur.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, uh.delta_gpu, 1); + + fill_gpu(l.outputs * l.batch * l.steps, 0, wz.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wr.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wh.delta_gpu, 1); + if(net.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.prev_state_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(wz, s); + forward_connected_layer_gpu(wr, s); + + s.input_gpu = net.input_gpu; + forward_connected_layer_gpu(uz, s); + forward_connected_layer_gpu(ur, s); + forward_connected_layer_gpu(uh, s); + + copy_gpu(l.outputs*l.batch, uz.output_gpu, 1, l.z_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wz.output_gpu, 1, l.z_gpu, 1); + + copy_gpu(l.outputs*l.batch, ur.output_gpu, 1, l.r_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wr.output_gpu, 1, l.r_gpu, 1); + + activate_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.forgot_state_gpu, 1); + mul_gpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1); + + s.input_gpu = l.forgot_state_gpu; + forward_connected_layer_gpu(wh, s); + + copy_gpu(l.outputs*l.batch, uh.output_gpu, 1, l.h_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wh.output_gpu, 1, l.h_gpu, 1); + + if(l.tanh){ + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + } else { + activate_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_sum_gpu(l.state_gpu, l.h_gpu, l.z_gpu, l.outputs*l.batch, l.output_gpu); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.state_gpu, 1); + + net.input_gpu += l.inputs*l.batch; + l.output_gpu += l.outputs*l.batch; + increment_layer(&uz, 1); + increment_layer(&ur, 1); + increment_layer(&uh, 1); + + increment_layer(&wz, 1); + increment_layer(&wr, 1); + increment_layer(&wh, 1); + } +} + +void backward_gru_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + increment_layer(&uz, l.steps - 1); + increment_layer(&ur, l.steps - 1); + increment_layer(&uh, l.steps - 1); + + increment_layer(&wz, l.steps - 1); + increment_layer(&wr, l.steps - 1); + increment_layer(&wh, l.steps - 1); + + net.input_gpu += l.inputs*l.batch*(l.steps-1); + if(net.delta_gpu) net.delta_gpu += l.inputs*l.batch*(l.steps-1); + l.output_gpu += l.outputs*l.batch*(l.steps-1); + l.delta_gpu += l.outputs*l.batch*(l.steps-1); + float *end_state = l.output_gpu; + for (i = l.steps-1; i >= 0; --i) { + if(i != 0) copy_gpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + else copy_gpu(l.outputs*l.batch, l.prev_state_gpu, 1, l.state_gpu, 1); + float *prev_delta_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch; + + copy_gpu(l.outputs*l.batch, uz.output_gpu, 1, l.z_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wz.output_gpu, 1, l.z_gpu, 1); + + copy_gpu(l.outputs*l.batch, ur.output_gpu, 1, l.r_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wr.output_gpu, 1, l.r_gpu, 1); + + activate_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, uh.output_gpu, 1, l.h_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wh.output_gpu, 1, l.h_gpu, 1); + + if(l.tanh){ + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + } else { + activate_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_delta_gpu(l.state_gpu, l.h_gpu, l.z_gpu, prev_delta_gpu, uh.delta_gpu, uz.delta_gpu, l.outputs*l.batch, l.delta_gpu); + + if(l.tanh){ + gradient_array_gpu(l.h_gpu, l.outputs*l.batch, TANH, uh.delta_gpu); + } else { + gradient_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC, uh.delta_gpu); + } + + copy_gpu(l.outputs*l.batch, uh.delta_gpu, 1, wh.delta_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.forgot_state_gpu, 1); + mul_gpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1); + fill_gpu(l.outputs*l.batch, 0, l.forgot_delta_gpu, 1); + + s.input_gpu = l.forgot_state_gpu; + s.delta_gpu = l.forgot_delta_gpu; + + backward_connected_layer_gpu(wh, s); + if(prev_delta_gpu) mult_add_into_gpu(l.outputs*l.batch, l.forgot_delta_gpu, l.r_gpu, prev_delta_gpu); + mult_add_into_gpu(l.outputs*l.batch, l.forgot_delta_gpu, l.state_gpu, ur.delta_gpu); + + gradient_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC, ur.delta_gpu); + copy_gpu(l.outputs*l.batch, ur.delta_gpu, 1, wr.delta_gpu, 1); + + gradient_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC, uz.delta_gpu); + copy_gpu(l.outputs*l.batch, uz.delta_gpu, 1, wz.delta_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = prev_delta_gpu; + + backward_connected_layer_gpu(wr, s); + backward_connected_layer_gpu(wz, s); + + s.input_gpu = net.input_gpu; + s.delta_gpu = net.delta_gpu; + + backward_connected_layer_gpu(uh, s); + backward_connected_layer_gpu(ur, s); + backward_connected_layer_gpu(uz, s); + + + net.input_gpu -= l.inputs*l.batch; + if(net.delta_gpu) net.delta_gpu -= l.inputs*l.batch; + l.output_gpu -= l.outputs*l.batch; + l.delta_gpu -= l.outputs*l.batch; + increment_layer(&uz, -1); + increment_layer(&ur, -1); + increment_layer(&uh, -1); + + increment_layer(&wz, -1); + increment_layer(&wr, -1); + increment_layer(&wh, -1); + } + copy_gpu(l.outputs*l.batch, end_state, 1, l.state_gpu, 1); +} +#endif diff --git a/workloads/realworld/uvm/darknet/src/gru_layer.h b/workloads/realworld/uvm/darknet/src/gru_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9067942e9499d53c8d54f7728d64a5030200f4de --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/gru_layer.h @@ -0,0 +1,24 @@ + +#ifndef GRU_LAYER_H +#define GRU_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam); + +void forward_gru_layer(layer l, network state); +void backward_gru_layer(layer l, network state); +void update_gru_layer(layer l, update_args a); + +#ifdef GPU +void forward_gru_layer_gpu(layer l, network state); +void backward_gru_layer_gpu(layer l, network state); +void update_gru_layer_gpu(layer l, update_args a); +void push_gru_layer(layer l); +void pull_gru_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/uvm/darknet/src/im2col.c b/workloads/realworld/uvm/darknet/src/im2col.c new file mode 100644 index 0000000000000000000000000000000000000000..69ec98a9d12b2e21a3859611ad709d62fc80dcf3 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/im2col.c @@ -0,0 +1,40 @@ +#include "im2col.h" +#include +float im2col_get_pixel(float *im, int height, int width, int channels, + int row, int col, int channel, int pad) +{ + row -= pad; + col -= pad; + + if (row < 0 || col < 0 || + row >= height || col >= width) return 0; + return im[col + width*(row + height*channel)]; +} + +//From Berkeley Vision's Caffe! +//https://github.com/BVLC/caffe/blob/master/LICENSE +void im2col_cpu(float* data_im, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_col) +{ + int c,h,w; + int height_col = (height + 2*pad - ksize) / stride + 1; + int width_col = (width + 2*pad - ksize) / stride + 1; + + int channels_col = channels * ksize * ksize; + for (c = 0; c < channels_col; ++c) { + int w_offset = c % ksize; + int h_offset = (c / ksize) % ksize; + int c_im = c / ksize / ksize; + for (h = 0; h < height_col; ++h) { + for (w = 0; w < width_col; ++w) { + int im_row = h_offset + h * stride; + int im_col = w_offset + w * stride; + int col_index = (c * height_col + h) * width_col + w; + data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, + im_row, im_col, c_im, pad); + } + } + } +} + diff --git a/workloads/realworld/uvm/darknet/src/im2col.h b/workloads/realworld/uvm/darknet/src/im2col.h new file mode 100644 index 0000000000000000000000000000000000000000..02c4247fad9b8428a8e89fc8caec0b5b6ba5b36a --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/im2col.h @@ -0,0 +1,15 @@ +#ifndef IM2COL_H +#define IM2COL_H + +void im2col_cpu(float* data_im, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_col); + +#ifdef GPU + +void im2col_gpu(float *im, + int channels, int height, int width, + int ksize, int stride, int pad,float *data_col); + +#endif +#endif diff --git a/workloads/realworld/uvm/darknet/src/im2col_kernels.cu b/workloads/realworld/uvm/darknet/src/im2col_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..660806208adf57bac0afe8b026de3e97e57cd250 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/im2col_kernels.cu @@ -0,0 +1,62 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "im2col.h" +#include "cuda_dark.h" +} + +// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu +// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE + +__global__ void im2col_gpu_kernel(const int n, const float* data_im, + const int height, const int width, const int ksize, + const int pad, + const int stride, + const int height_col, const int width_col, + float *data_col) { + int index = blockIdx.x*blockDim.x+threadIdx.x; + for(; index < n; index += blockDim.x*gridDim.x){ + int w_out = index % width_col; + int h_index = index / width_col; + int h_out = h_index % height_col; + int channel_in = h_index / height_col; + int channel_out = channel_in * ksize * ksize; + int h_in = h_out * stride - pad; + int w_in = w_out * stride - pad; + float* data_col_ptr = data_col; + data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out; + const float* data_im_ptr = data_im; + data_im_ptr += (channel_in * height + h_in) * width + w_in; + for (int i = 0; i < ksize; ++i) { + for (int j = 0; j < ksize; ++j) { + int h = h_in + i; + int w = w_in + j; + + *data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ? + data_im_ptr[i * width + j] : 0; + + //*data_col_ptr = data_im_ptr[ii * width + jj]; + + data_col_ptr += height_col * width_col; + } + } + } +} + +void im2col_gpu(float *im, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_col){ + // We are going to launch channels * height_col * width_col kernels, each + // kernel responsible for copying a single-channel grid. + int height_col = (height + 2 * pad - ksize) / stride + 1; + int width_col = (width + 2 * pad - ksize) / stride + 1; + int num_kernels = channels * height_col * width_col; + im2col_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, + BLOCK>>>( + num_kernels, im, height, width, ksize, pad, + stride, height_col, + width_col, data_col); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/uvm/darknet/src/image.c b/workloads/realworld/uvm/darknet/src/image.c new file mode 100644 index 0000000000000000000000000000000000000000..3edf6d1045f4637d7bb108440302fd36d5ef9a18 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/image.c @@ -0,0 +1,1467 @@ +#include "image.h" +#include "utils.h" +#include "blas.h" +#include "cuda_dark.h" +#include +#include + +#define STB_IMAGE_IMPLEMENTATION +#include "stb_image.h" +#define STB_IMAGE_WRITE_IMPLEMENTATION +#include "stb_image_write.h" + +int windows = 0; + +float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} }; + +float get_color(int c, int x, int max) +{ + float ratio = ((float)x/max)*5; + int i = floor(ratio); + int j = ceil(ratio); + ratio -= i; + float r = (1-ratio) * colors[i][c] + ratio*colors[j][c]; + //printf("%f\n", r); + return r; +} + +image mask_to_rgb(image mask) +{ + int n = mask.c; + image im = make_image(mask.w, mask.h, 3); + int i, j; + for(j = 0; j < n; ++j){ + int offset = j*123457 % n; + float red = get_color(2,offset,n); + float green = get_color(1,offset,n); + float blue = get_color(0,offset,n); + for(i = 0; i < im.w*im.h; ++i){ + im.data[i + 0*im.w*im.h] += mask.data[j*im.h*im.w + i]*red; + im.data[i + 1*im.w*im.h] += mask.data[j*im.h*im.w + i]*green; + im.data[i + 2*im.w*im.h] += mask.data[j*im.h*im.w + i]*blue; + } + } + return im; +} + +static float get_pixel(image m, int x, int y, int c) +{ + assert(x < m.w && y < m.h && c < m.c); + return m.data[c*m.h*m.w + y*m.w + x]; +} +static float get_pixel_extend(image m, int x, int y, int c) +{ + if(x < 0 || x >= m.w || y < 0 || y >= m.h) return 0; + /* + if(x < 0) x = 0; + if(x >= m.w) x = m.w-1; + if(y < 0) y = 0; + if(y >= m.h) y = m.h-1; + */ + if(c < 0 || c >= m.c) return 0; + return get_pixel(m, x, y, c); +} +static void set_pixel(image m, int x, int y, int c, float val) +{ + if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return; + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] = val; +} +static void add_pixel(image m, int x, int y, int c, float val) +{ + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] += val; +} + +static float bilinear_interpolate(image im, float x, float y, int c) +{ + int ix = (int) floorf(x); + int iy = (int) floorf(y); + + float dx = x - ix; + float dy = y - iy; + + float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) + + dy * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + + (1-dy) * dx * get_pixel_extend(im, ix+1, iy, c) + + dy * dx * get_pixel_extend(im, ix+1, iy+1, c); + return val; +} + + +void composite_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float val = get_pixel(source, x, y, k); + float val2 = get_pixel_extend(dest, dx+x, dy+y, k); + set_pixel(dest, dx+x, dy+y, k, val * val2); + } + } + } +} + +image border_image(image a, int border) +{ + image b = make_image(a.w + 2*border, a.h + 2*border, a.c); + int x,y,k; + for(k = 0; k < b.c; ++k){ + for(y = 0; y < b.h; ++y){ + for(x = 0; x < b.w; ++x){ + float val = get_pixel_extend(a, x - border, y - border, k); + if(x - border < 0 || x - border >= a.w || y - border < 0 || y - border >= a.h) val = 1; + set_pixel(b, x, y, k, val); + } + } + } + return b; +} + +image tile_images(image a, image b, int dx) +{ + if(a.w == 0) return copy_image(b); + image c = make_image(a.w + b.w + dx, (a.h > b.h) ? a.h : b.h, (a.c > b.c) ? a.c : b.c); + fill_cpu(c.w*c.h*c.c, 1, c.data, 1); + embed_image(a, c, 0, 0); + composite_image(b, c, a.w + dx, 0); + return c; +} + +image get_label(image **characters, char *string, int size) +{ + size = size/10; + if(size > 7) size = 7; + image label = make_empty_image(0,0,0); + while(*string){ + image l = characters[size][(int)*string]; + image n = tile_images(label, l, -size - 1 + (size+1)/2); + free_image(label); + label = n; + ++string; + } + image b = border_image(label, label.h*.25); + free_image(label); + return b; +} + +void draw_label(image a, int r, int c, image label, const float *rgb) +{ + int w = label.w; + int h = label.h; + if (r - h >= 0) r = r - h; + + int i, j, k; + for(j = 0; j < h && j + r < a.h; ++j){ + for(i = 0; i < w && i + c < a.w; ++i){ + for(k = 0; k < label.c; ++k){ + float val = get_pixel(label, i, j, k); + set_pixel(a, i+c, j+r, k, rgb[k] * val); + } + } + } +} + +void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b) +{ + //normalize_image(a); + int i; + if(x1 < 0) x1 = 0; + if(x1 >= a.w) x1 = a.w-1; + if(x2 < 0) x2 = 0; + if(x2 >= a.w) x2 = a.w-1; + + if(y1 < 0) y1 = 0; + if(y1 >= a.h) y1 = a.h-1; + if(y2 < 0) y2 = 0; + if(y2 >= a.h) y2 = a.h-1; + + for(i = x1; i <= x2; ++i){ + a.data[i + y1*a.w + 0*a.w*a.h] = r; + a.data[i + y2*a.w + 0*a.w*a.h] = r; + + a.data[i + y1*a.w + 1*a.w*a.h] = g; + a.data[i + y2*a.w + 1*a.w*a.h] = g; + + a.data[i + y1*a.w + 2*a.w*a.h] = b; + a.data[i + y2*a.w + 2*a.w*a.h] = b; + } + for(i = y1; i <= y2; ++i){ + a.data[x1 + i*a.w + 0*a.w*a.h] = r; + a.data[x2 + i*a.w + 0*a.w*a.h] = r; + + a.data[x1 + i*a.w + 1*a.w*a.h] = g; + a.data[x2 + i*a.w + 1*a.w*a.h] = g; + + a.data[x1 + i*a.w + 2*a.w*a.h] = b; + a.data[x2 + i*a.w + 2*a.w*a.h] = b; + } +} + +void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b) +{ + int i; + for(i = 0; i < w; ++i){ + draw_box(a, x1+i, y1+i, x2-i, y2-i, r, g, b); + } +} + +void draw_bbox(image a, box bbox, int w, float r, float g, float b) +{ + int left = (bbox.x-bbox.w/2)*a.w; + int right = (bbox.x+bbox.w/2)*a.w; + int top = (bbox.y-bbox.h/2)*a.h; + int bot = (bbox.y+bbox.h/2)*a.h; + + int i; + for(i = 0; i < w; ++i){ + draw_box(a, left+i, top+i, right-i, bot-i, r, g, b); + } +} + +image **load_alphabet() +{ + char *value = getenv("UVMAsyncBench_BASE"); + int i, j; + const int nsize = 8; + image **alphabets = calloc(nsize, sizeof(image)); + for(j = 0; j < nsize; ++j){ + alphabets[j] = calloc(128, sizeof(image)); + for(i = 32; i < 127; ++i){ + char buff[256]; + sprintf(buff, "%s/workloads/realworld/standard/darknet/data/labels/%d_%d.png", value, i, j); + alphabets[j][i] = load_image_color(buff, 0, 0); + } + } + return alphabets; +} + +void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes) +{ + int i,j; + + for(i = 0; i < num; ++i){ + char labelstr[4096] = {0}; + int class = -1; + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j] > thresh){ + if (class < 0) { + strcat(labelstr, names[j]); + class = j; + } else { + strcat(labelstr, ", "); + strcat(labelstr, names[j]); + } + printf("%s: %.0f%%\n", names[j], dets[i].prob[j]*100); + } + } + if(class >= 0){ + int width = im.h * .006; + + /* + if(0){ + width = pow(prob, 1./2.)*10+1; + alphabet = 0; + } + */ + + //printf("%d %s: %.0f%%\n", i, names[class], prob*100); + int offset = class*123457 % classes; + float red = get_color(2,offset,classes); + float green = get_color(1,offset,classes); + float blue = get_color(0,offset,classes); + float rgb[3]; + + //width = prob*20+2; + + rgb[0] = red; + rgb[1] = green; + rgb[2] = blue; + box b = dets[i].bbox; + //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + + int left = (b.x-b.w/2.)*im.w; + int right = (b.x+b.w/2.)*im.w; + int top = (b.y-b.h/2.)*im.h; + int bot = (b.y+b.h/2.)*im.h; + + if(left < 0) left = 0; + if(right > im.w-1) right = im.w-1; + if(top < 0) top = 0; + if(bot > im.h-1) bot = im.h-1; + + draw_box_width(im, left, top, right, bot, width, red, green, blue); + if (alphabet) { + image label = get_label(alphabet, labelstr, (im.h*.03)); + draw_label(im, top + width, left, label, rgb); + free_image(label); + } + if (dets[i].mask){ + image mask = float_to_image(14, 14, 1, dets[i].mask); + image resized_mask = resize_image(mask, b.w*im.w, b.h*im.h); + image tmask = threshold_image(resized_mask, .5); + embed_image(tmask, im, left, top); + free_image(mask); + free_image(resized_mask); + free_image(tmask); + } + } + } +} + +void transpose_image(image im) +{ + assert(im.w == im.h); + int n, m; + int c; + for(c = 0; c < im.c; ++c){ + for(n = 0; n < im.w-1; ++n){ + for(m = n + 1; m < im.w; ++m){ + float swap = im.data[m + im.w*(n + im.h*c)]; + im.data[m + im.w*(n + im.h*c)] = im.data[n + im.w*(m + im.h*c)]; + im.data[n + im.w*(m + im.h*c)] = swap; + } + } + } +} + +void rotate_image_cw(image im, int times) +{ + assert(im.w == im.h); + times = (times + 400) % 4; + int i, x, y, c; + int n = im.w; + for(i = 0; i < times; ++i){ + for(c = 0; c < im.c; ++c){ + for(x = 0; x < n/2; ++x){ + for(y = 0; y < (n-1)/2 + 1; ++y){ + float temp = im.data[y + im.w*(x + im.h*c)]; + im.data[y + im.w*(x + im.h*c)] = im.data[n-1-x + im.w*(y + im.h*c)]; + im.data[n-1-x + im.w*(y + im.h*c)] = im.data[n-1-y + im.w*(n-1-x + im.h*c)]; + im.data[n-1-y + im.w*(n-1-x + im.h*c)] = im.data[x + im.w*(n-1-y + im.h*c)]; + im.data[x + im.w*(n-1-y + im.h*c)] = temp; + } + } + } + } +} + +void flip_image(image a) +{ + int i,j,k; + for(k = 0; k < a.c; ++k){ + for(i = 0; i < a.h; ++i){ + for(j = 0; j < a.w/2; ++j){ + int index = j + a.w*(i + a.h*(k)); + int flip = (a.w - j - 1) + a.w*(i + a.h*(k)); + float swap = a.data[flip]; + a.data[flip] = a.data[index]; + a.data[index] = swap; + } + } + } +} + +image image_distance(image a, image b) +{ + int i,j; + image dist = make_image(a.w, a.h, 1); + for(i = 0; i < a.c; ++i){ + for(j = 0; j < a.h*a.w; ++j){ + dist.data[j] += pow(a.data[i*a.h*a.w+j]-b.data[i*a.h*a.w+j],2); + } + } + for(j = 0; j < a.h*a.w; ++j){ + dist.data[j] = sqrt(dist.data[j]); + } + return dist; +} + +void ghost_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + float max_dist = sqrt((-source.w/2. + .5)*(-source.w/2. + .5)); + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float dist = sqrt((x - source.w/2. + .5)*(x - source.w/2. + .5) + (y - source.h/2. + .5)*(y - source.h/2. + .5)); + float alpha = (1 - dist/max_dist); + if(alpha < 0) alpha = 0; + float v1 = get_pixel(source, x,y,k); + float v2 = get_pixel(dest, dx+x,dy+y,k); + float val = alpha*v1 + (1-alpha)*v2; + set_pixel(dest, dx+x, dy+y, k, val); + } + } + } +} + +void blocky_image(image im, int s) +{ + int i,j,k; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)]; + } + } + } +} + +void censor_image(image im, int dx, int dy, int w, int h) +{ + int i,j,k; + int s = 32; + if(dx < 0) dx = 0; + if(dy < 0) dy = 0; + + for(k = 0; k < im.c; ++k){ + for(j = dy; j < dy + h && j < im.h; ++j){ + for(i = dx; i < dx + w && i < im.w; ++i){ + im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)]; + //im.data[i + j*im.w + k*im.w*im.h] = 0; + } + } + } +} + +void embed_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float val = get_pixel(source, x,y,k); + set_pixel(dest, dx+x, dy+y, k, val); + } + } + } +} + +image collapse_image_layers(image source, int border) +{ + int h = source.h; + h = (h+border)*source.c - border; + image dest = make_image(source.w, h, 1); + int i; + for(i = 0; i < source.c; ++i){ + image layer = get_image_layer(source, i); + int h_offset = i*(source.h+border); + embed_image(layer, dest, 0, h_offset); + free_image(layer); + } + return dest; +} + +void constrain_image(image im) +{ + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + if(im.data[i] < 0) im.data[i] = 0; + if(im.data[i] > 1) im.data[i] = 1; + } +} + +void normalize_image(image p) +{ + int i; + float min = 9999999; + float max = -999999; + + for(i = 0; i < p.h*p.w*p.c; ++i){ + float v = p.data[i]; + if(v < min) min = v; + if(v > max) max = v; + } + if(max - min < .000000001){ + min = 0; + max = 1; + } + for(i = 0; i < p.c*p.w*p.h; ++i){ + p.data[i] = (p.data[i] - min)/(max-min); + } +} + +void normalize_image2(image p) +{ + float *min = calloc(p.c, sizeof(float)); + float *max = calloc(p.c, sizeof(float)); + int i,j; + for(i = 0; i < p.c; ++i) min[i] = max[i] = p.data[i*p.h*p.w]; + + for(j = 0; j < p.c; ++j){ + for(i = 0; i < p.h*p.w; ++i){ + float v = p.data[i+j*p.h*p.w]; + if(v < min[j]) min[j] = v; + if(v > max[j]) max[j] = v; + } + } + for(i = 0; i < p.c; ++i){ + if(max[i] - min[i] < .000000001){ + min[i] = 0; + max[i] = 1; + } + } + for(j = 0; j < p.c; ++j){ + for(i = 0; i < p.w*p.h; ++i){ + p.data[i+j*p.h*p.w] = (p.data[i+j*p.h*p.w] - min[j])/(max[j]-min[j]); + } + } + free(min); + free(max); +} + +void copy_image_into(image src, image dest) +{ + memcpy(dest.data, src.data, src.h*src.w*src.c*sizeof(float)); +} + +image copy_image(image p) +{ + image copy = p; + copy.data = calloc(p.h*p.w*p.c, sizeof(float)); + memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float)); + return copy; +} + +void rgbgr_image(image im) +{ + int i; + for(i = 0; i < im.w*im.h; ++i){ + float swap = im.data[i]; + im.data[i] = im.data[i+im.w*im.h*2]; + im.data[i+im.w*im.h*2] = swap; + } +} + +int show_image(image p, const char *name, int ms) +{ +#ifdef OPENCV + int c = show_image_cv(p, name, ms); + return c; +#else + fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name); + save_image(p, name); + return -1; +#endif +} + +void save_image_options(image im, const char *name, IMTYPE f, int quality) +{ + char buff[256]; + //sprintf(buff, "%s (%d)", name, windows); + if(f == PNG) sprintf(buff, "%s.png", name); + else if (f == BMP) sprintf(buff, "%s.bmp", name); + else if (f == TGA) sprintf(buff, "%s.tga", name); + else if (f == JPG) sprintf(buff, "%s.jpg", name); + else sprintf(buff, "%s.png", name); + unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char)); + int i,k; + for(k = 0; k < im.c; ++k){ + for(i = 0; i < im.w*im.h; ++i){ + data[i*im.c+k] = (unsigned char) (255*im.data[i + k*im.w*im.h]); + } + } + int success = 0; + if(f == PNG) success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c); + else if (f == BMP) success = stbi_write_bmp(buff, im.w, im.h, im.c, data); + else if (f == TGA) success = stbi_write_tga(buff, im.w, im.h, im.c, data); + else if (f == JPG) success = stbi_write_jpg(buff, im.w, im.h, im.c, data, quality); + free(data); + if(!success) fprintf(stderr, "Failed to write image %s\n", buff); +} + +void save_image(image im, const char *name) +{ + save_image_options(im, name, JPG, 80); +} + +void show_image_layers(image p, char *name) +{ + int i; + char buff[256]; + for(i = 0; i < p.c; ++i){ + sprintf(buff, "%s - Layer %d", name, i); + image layer = get_image_layer(p, i); + show_image(layer, buff, 1); + free_image(layer); + } +} + +void show_image_collapsed(image p, char *name) +{ + image c = collapse_image_layers(p, 1); + show_image(c, name, 1); + free_image(c); +} + +image make_empty_image(int w, int h, int c) +{ + image out; + out.data = 0; + out.h = h; + out.w = w; + out.c = c; + return out; +} + +image make_image(int w, int h, int c) +{ + image out = make_empty_image(w,h,c); + out.data = calloc(h*w*c, sizeof(float)); + return out; +} + +image make_random_image(int w, int h, int c) +{ + image out = make_empty_image(w,h,c); + out.data = calloc(h*w*c, sizeof(float)); + int i; + for(i = 0; i < w*h*c; ++i){ + out.data[i] = (rand_normal() * .25) + .5; + } + return out; +} + +image float_to_image(int w, int h, int c, float *data) +{ + image out = make_empty_image(w,h,c); + out.data = data; + return out; +} + +void place_image(image im, int w, int h, int dx, int dy, image canvas) +{ + int x, y, c; + for(c = 0; c < im.c; ++c){ + for(y = 0; y < h; ++y){ + for(x = 0; x < w; ++x){ + float rx = ((float)x / w) * im.w; + float ry = ((float)y / h) * im.h; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(canvas, x + dx, y + dy, c, val); + } + } + } +} + +image center_crop_image(image im, int w, int h) +{ + int m = (im.w < im.h) ? im.w : im.h; + image c = crop_image(im, (im.w - m) / 2, (im.h - m)/2, m, m); + image r = resize_image(c, w, h); + free_image(c); + return r; +} + +image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect) +{ + int x, y, c; + float cx = im.w/2.; + float cy = im.h/2.; + image rot = make_image(w, h, im.c); + for(c = 0; c < im.c; ++c){ + for(y = 0; y < h; ++y){ + for(x = 0; x < w; ++x){ + float rx = cos(rad)*((x - w/2.)/s*aspect + dx/s*aspect) - sin(rad)*((y - h/2.)/s + dy/s) + cx; + float ry = sin(rad)*((x - w/2.)/s*aspect + dx/s*aspect) + cos(rad)*((y - h/2.)/s + dy/s) + cy; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(rot, x, y, c, val); + } + } + } + return rot; +} + +image rotate_image(image im, float rad) +{ + int x, y, c; + float cx = im.w/2.; + float cy = im.h/2.; + image rot = make_image(im.w, im.h, im.c); + for(c = 0; c < im.c; ++c){ + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx; + float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(rot, x, y, c, val); + } + } + } + return rot; +} + +void fill_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] = s; +} + +void translate_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s; +} + +void scale_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s; +} + +image crop_image(image im, int dx, int dy, int w, int h) +{ + image cropped = make_image(w, h, im.c); + int i, j, k; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int r = j + dy; + int c = i + dx; + float val = 0; + r = constrain_int(r, 0, im.h-1); + c = constrain_int(c, 0, im.w-1); + val = get_pixel(im, c, r, k); + set_pixel(cropped, i, j, k, val); + } + } + } + return cropped; +} + +int best_3d_shift_r(image a, image b, int min, int max) +{ + if(min == max) return min; + int mid = floor((min + max) / 2.); + image c1 = crop_image(b, 0, mid, b.w, b.h); + image c2 = crop_image(b, 0, mid+1, b.w, b.h); + float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 10); + float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 10); + free_image(c1); + free_image(c2); + if(d1 < d2) return best_3d_shift_r(a, b, min, mid); + else return best_3d_shift_r(a, b, mid+1, max); +} + +int best_3d_shift(image a, image b, int min, int max) +{ + int i; + int best = 0; + float best_distance = FLT_MAX; + for(i = min; i <= max; i += 2){ + image c = crop_image(b, 0, i, b.w, b.h); + float d = dist_array(c.data, a.data, a.w*a.h*a.c, 100); + if(d < best_distance){ + best_distance = d; + best = i; + } + printf("%d %f\n", i, d); + free_image(c); + } + return best; +} + +void composite_3d(char *f1, char *f2, char *out, int delta) +{ + if(!out) out = "out"; + image a = load_image(f1, 0,0,0); + image b = load_image(f2, 0,0,0); + int shift = best_3d_shift_r(a, b, -a.h/100, a.h/100); + + image c1 = crop_image(b, 10, shift, b.w, b.h); + float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 100); + image c2 = crop_image(b, -10, shift, b.w, b.h); + float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 100); + + if(d2 < d1 && 0){ + image swap = a; + a = b; + b = swap; + shift = -shift; + printf("swapped, %d\n", shift); + } + else{ + printf("%d\n", shift); + } + + image c = crop_image(b, delta, shift, a.w, a.h); + int i; + for(i = 0; i < c.w*c.h; ++i){ + c.data[i] = a.data[i]; + } + save_image(c, out); +} + +void letterbox_image_into(image im, int w, int h, image boxed) +{ + int new_w = im.w; + int new_h = im.h; + if (((float)w/im.w) < ((float)h/im.h)) { + new_w = w; + new_h = (im.h * w)/im.w; + } else { + new_h = h; + new_w = (im.w * h)/im.h; + } + image resized = resize_image(im, new_w, new_h); + embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); + free_image(resized); +} + +image letterbox_image(image im, int w, int h) +{ + int new_w = im.w; + int new_h = im.h; + if (((float)w/im.w) < ((float)h/im.h)) { + new_w = w; + new_h = (im.h * w)/im.w; + } else { + new_h = h; + new_w = (im.w * h)/im.h; + } + image resized = resize_image(im, new_w, new_h); + image boxed = make_image(w, h, im.c); + fill_image(boxed, .5); + //int i; + //for(i = 0; i < boxed.w*boxed.h*boxed.c; ++i) boxed.data[i] = 0; + embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); + free_image(resized); + return boxed; +} + +image resize_max(image im, int max) +{ + int w = im.w; + int h = im.h; + if(w > h){ + h = (h * max) / w; + w = max; + } else { + w = (w * max) / h; + h = max; + } + if(w == im.w && h == im.h) return im; + image resized = resize_image(im, w, h); + return resized; +} + +image resize_min(image im, int min) +{ + int w = im.w; + int h = im.h; + if(w < h){ + h = (h * min) / w; + w = min; + } else { + w = (w * min) / h; + h = min; + } + if(w == im.w && h == im.h) return im; + image resized = resize_image(im, w, h); + return resized; +} + +image random_crop_image(image im, int w, int h) +{ + int dx = rand_int(0, im.w - w); + int dy = rand_int(0, im.h - h); + image crop = crop_image(im, dx, dy, w, h); + return crop; +} + +augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h) +{ + augment_args a = {0}; + aspect = rand_scale(aspect); + int r = rand_int(low, high); + int min = (im.h < im.w*aspect) ? im.h : im.w*aspect; + float scale = (float)r / min; + + float rad = rand_uniform(-angle, angle) * TWO_PI / 360.; + + float dx = (im.w*scale/aspect - w) / 2.; + float dy = (im.h*scale - w) / 2.; + //if(dx < 0) dx = 0; + //if(dy < 0) dy = 0; + dx = rand_uniform(-dx, dx); + dy = rand_uniform(-dy, dy); + + a.rad = rad; + a.scale = scale; + a.w = w; + a.h = h; + a.dx = dx; + a.dy = dy; + a.aspect = aspect; + return a; +} + +image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h) +{ + augment_args a = random_augment_args(im, angle, aspect, low, high, w, h); + image crop = rotate_crop_image(im, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + return crop; +} + +float three_way_max(float a, float b, float c) +{ + return (a > b) ? ( (a > c) ? a : c) : ( (b > c) ? b : c) ; +} + +float three_way_min(float a, float b, float c) +{ + return (a < b) ? ( (a < c) ? a : c) : ( (b < c) ? b : c) ; +} + +void yuv_to_rgb(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float y, u, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + y = get_pixel(im, i , j, 0); + u = get_pixel(im, i , j, 1); + v = get_pixel(im, i , j, 2); + + r = y + 1.13983*v; + g = y + -.39465*u + -.58060*v; + b = y + 2.03211*u; + + set_pixel(im, i, j, 0, r); + set_pixel(im, i, j, 1, g); + set_pixel(im, i, j, 2, b); + } + } +} + +void rgb_to_yuv(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float y, u, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + r = get_pixel(im, i , j, 0); + g = get_pixel(im, i , j, 1); + b = get_pixel(im, i , j, 2); + + y = .299*r + .587*g + .114*b; + u = -.14713*r + -.28886*g + .436*b; + v = .615*r + -.51499*g + -.10001*b; + + set_pixel(im, i, j, 0, y); + set_pixel(im, i, j, 1, u); + set_pixel(im, i, j, 2, v); + } + } +} + +// http://www.cs.rit.edu/~ncs/color/t_convert.html +void rgb_to_hsv(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float h, s, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + r = get_pixel(im, i , j, 0); + g = get_pixel(im, i , j, 1); + b = get_pixel(im, i , j, 2); + float max = three_way_max(r,g,b); + float min = three_way_min(r,g,b); + float delta = max - min; + v = max; + if(max == 0){ + s = 0; + h = 0; + }else{ + s = delta/max; + if(r == max){ + h = (g - b) / delta; + } else if (g == max) { + h = 2 + (b - r) / delta; + } else { + h = 4 + (r - g) / delta; + } + if (h < 0) h += 6; + h = h/6.; + } + set_pixel(im, i, j, 0, h); + set_pixel(im, i, j, 1, s); + set_pixel(im, i, j, 2, v); + } + } +} + +void hsv_to_rgb(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float h, s, v; + float f, p, q, t; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + h = 6 * get_pixel(im, i , j, 0); + s = get_pixel(im, i , j, 1); + v = get_pixel(im, i , j, 2); + if (s == 0) { + r = g = b = v; + } else { + int index = floor(h); + f = h - index; + p = v*(1-s); + q = v*(1-s*f); + t = v*(1-s*(1-f)); + if(index == 0){ + r = v; g = t; b = p; + } else if(index == 1){ + r = q; g = v; b = p; + } else if(index == 2){ + r = p; g = v; b = t; + } else if(index == 3){ + r = p; g = q; b = v; + } else if(index == 4){ + r = t; g = p; b = v; + } else { + r = v; g = p; b = q; + } + } + set_pixel(im, i, j, 0, r); + set_pixel(im, i, j, 1, g); + set_pixel(im, i, j, 2, b); + } + } +} + +void grayscale_image_3c(image im) +{ + assert(im.c == 3); + int i, j, k; + float scale[] = {0.299, 0.587, 0.114}; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float val = 0; + for(k = 0; k < 3; ++k){ + val += scale[k]*get_pixel(im, i, j, k); + } + im.data[0*im.h*im.w + im.w*j + i] = val; + im.data[1*im.h*im.w + im.w*j + i] = val; + im.data[2*im.h*im.w + im.w*j + i] = val; + } + } +} + +image grayscale_image(image im) +{ + assert(im.c == 3); + int i, j, k; + image gray = make_image(im.w, im.h, 1); + float scale[] = {0.299, 0.587, 0.114}; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + gray.data[i+im.w*j] += scale[k]*get_pixel(im, i, j, k); + } + } + } + return gray; +} + +image threshold_image(image im, float thresh) +{ + int i; + image t = make_image(im.w, im.h, im.c); + for(i = 0; i < im.w*im.h*im.c; ++i){ + t.data[i] = im.data[i]>thresh ? 1 : 0; + } + return t; +} + +image blend_image(image fore, image back, float alpha) +{ + assert(fore.w == back.w && fore.h == back.h && fore.c == back.c); + image blend = make_image(fore.w, fore.h, fore.c); + int i, j, k; + for(k = 0; k < fore.c; ++k){ + for(j = 0; j < fore.h; ++j){ + for(i = 0; i < fore.w; ++i){ + float val = alpha * get_pixel(fore, i, j, k) + + (1 - alpha)* get_pixel(back, i, j, k); + set_pixel(blend, i, j, k, val); + } + } + } + return blend; +} + +void scale_image_channel(image im, int c, float v) +{ + int i, j; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float pix = get_pixel(im, i, j, c); + pix = pix*v; + set_pixel(im, i, j, c, pix); + } + } +} + +void translate_image_channel(image im, int c, float v) +{ + int i, j; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float pix = get_pixel(im, i, j, c); + pix = pix+v; + set_pixel(im, i, j, c, pix); + } + } +} + +image binarize_image(image im) +{ + image c = copy_image(im); + int i; + for(i = 0; i < im.w * im.h * im.c; ++i){ + if(c.data[i] > .5) c.data[i] = 1; + else c.data[i] = 0; + } + return c; +} + +void saturate_image(image im, float sat) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + hsv_to_rgb(im); + constrain_image(im); +} + +void hue_image(image im, float hue) +{ + rgb_to_hsv(im); + int i; + for(i = 0; i < im.w*im.h; ++i){ + im.data[i] = im.data[i] + hue; + if (im.data[i] > 1) im.data[i] -= 1; + if (im.data[i] < 0) im.data[i] += 1; + } + hsv_to_rgb(im); + constrain_image(im); +} + +void exposure_image(image im, float sat) +{ + rgb_to_hsv(im); + scale_image_channel(im, 2, sat); + hsv_to_rgb(im); + constrain_image(im); +} + +void distort_image(image im, float hue, float sat, float val) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + scale_image_channel(im, 2, val); + int i; + for(i = 0; i < im.w*im.h; ++i){ + im.data[i] = im.data[i] + hue; + if (im.data[i] > 1) im.data[i] -= 1; + if (im.data[i] < 0) im.data[i] += 1; + } + hsv_to_rgb(im); + constrain_image(im); +} + +void random_distort_image(image im, float hue, float saturation, float exposure) +{ + float dhue = rand_uniform(-hue, hue); + float dsat = rand_scale(saturation); + float dexp = rand_scale(exposure); + distort_image(im, dhue, dsat, dexp); +} + +void saturate_exposure_image(image im, float sat, float exposure) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + scale_image_channel(im, 2, exposure); + hsv_to_rgb(im); + constrain_image(im); +} + +image resize_image(image im, int w, int h) +{ + image resized = make_image(w, h, im.c); + image part = make_image(w, im.h, im.c); + int r, c, k; + float w_scale = (float)(im.w - 1) / (w - 1); + float h_scale = (float)(im.h - 1) / (h - 1); + for(k = 0; k < im.c; ++k){ + for(r = 0; r < im.h; ++r){ + for(c = 0; c < w; ++c){ + float val = 0; + if(c == w-1 || im.w == 1){ + val = get_pixel(im, im.w-1, r, k); + } else { + float sx = c*w_scale; + int ix = (int) sx; + float dx = sx - ix; + val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k); + } + set_pixel(part, c, r, k, val); + } + } + } + for(k = 0; k < im.c; ++k){ + for(r = 0; r < h; ++r){ + float sy = r*h_scale; + int iy = (int) sy; + float dy = sy - iy; + for(c = 0; c < w; ++c){ + float val = (1-dy) * get_pixel(part, c, iy, k); + set_pixel(resized, c, r, k, val); + } + if(r == h-1 || im.h == 1) continue; + for(c = 0; c < w; ++c){ + float val = dy * get_pixel(part, c, iy+1, k); + add_pixel(resized, c, r, k, val); + } + } + } + + free_image(part); + return resized; +} + + +void test_resize(char *filename) +{ + image im = load_image(filename, 0,0, 3); + float mag = mag_array(im.data, im.w*im.h*im.c); + printf("L2 Norm: %f\n", mag); + image gray = grayscale_image(im); + + image c1 = copy_image(im); + image c2 = copy_image(im); + image c3 = copy_image(im); + image c4 = copy_image(im); + distort_image(c1, .1, 1.5, 1.5); + distort_image(c2, -.1, .66666, .66666); + distort_image(c3, .1, 1.5, .66666); + distort_image(c4, .1, .66666, 1.5); + + + show_image(im, "Original", 1); + show_image(gray, "Gray", 1); + show_image(c1, "C1", 1); + show_image(c2, "C2", 1); + show_image(c3, "C3", 1); + show_image(c4, "C4", 1); +#ifdef OPENCV + while(1){ + image aug = random_augment_image(im, 0, .75, 320, 448, 320, 320); + show_image(aug, "aug", 1); + free_image(aug); + + + float exposure = 1.15; + float saturation = 1.15; + float hue = .05; + + image c = copy_image(im); + + float dexp = rand_scale(exposure); + float dsat = rand_scale(saturation); + float dhue = rand_uniform(-hue, hue); + + distort_image(c, dhue, dsat, dexp); + show_image(c, "rand", 1); + printf("%f %f %f\n", dhue, dsat, dexp); + free_image(c); + } +#endif +} + + +image load_image_stb(char *filename, int channels) +{ + int w, h, c; + unsigned char *data = stbi_load(filename, &w, &h, &c, channels); + if (!data) { + fprintf(stderr, "Cannot load image \"%s\"\nSTB Reason: %s\n", filename, stbi_failure_reason()); + exit(0); + } + if(channels) c = channels; + int i,j,k; + image im = make_image(w, h, c); + for(k = 0; k < c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int dst_index = i + w*j + w*h*k; + int src_index = k + c*i + c*w*j; + im.data[dst_index] = (float)data[src_index]/255.; + } + } + } + free(data); + return im; +} + +image load_image(char *filename, int w, int h, int c) +{ +#ifdef OPENCV + image out = load_image_cv(filename, c); +#else + image out = load_image_stb(filename, c); +#endif + + if((h && w) && (h != out.h || w != out.w)){ + image resized = resize_image(out, w, h); + free_image(out); + out = resized; + } + return out; +} + +image load_image_color(char *filename, int w, int h) +{ + return load_image(filename, w, h, 3); +} + +image get_image_layer(image m, int l) +{ + image out = make_image(m.w, m.h, 1); + int i; + for(i = 0; i < m.h*m.w; ++i){ + out.data[i] = m.data[i+l*m.h*m.w]; + } + return out; +} +void print_image(image m) +{ + int i, j, k; + for(i =0 ; i < m.c; ++i){ + for(j =0 ; j < m.h; ++j){ + for(k = 0; k < m.w; ++k){ + printf("%.2lf, ", m.data[i*m.h*m.w + j*m.w + k]); + if(k > 30) break; + } + printf("\n"); + if(j > 30) break; + } + printf("\n"); + } + printf("\n"); +} + +image collapse_images_vert(image *ims, int n) +{ + int color = 1; + int border = 1; + int h,w,c; + w = ims[0].w; + h = (ims[0].h + border) * n - border; + c = ims[0].c; + if(c != 3 || !color){ + w = (w+border)*c - border; + c = 1; + } + + image filters = make_image(w, h, c); + int i,j; + for(i = 0; i < n; ++i){ + int h_offset = i*(ims[0].h+border); + image copy = copy_image(ims[i]); + //normalize_image(copy); + if(c == 3 && color){ + embed_image(copy, filters, 0, h_offset); + } + else{ + for(j = 0; j < copy.c; ++j){ + int w_offset = j*(ims[0].w+border); + image layer = get_image_layer(copy, j); + embed_image(layer, filters, w_offset, h_offset); + free_image(layer); + } + } + free_image(copy); + } + return filters; +} + +image collapse_images_horz(image *ims, int n) +{ + int color = 1; + int border = 1; + int h,w,c; + int size = ims[0].h; + h = size; + w = (ims[0].w + border) * n - border; + c = ims[0].c; + if(c != 3 || !color){ + h = (h+border)*c - border; + c = 1; + } + + image filters = make_image(w, h, c); + int i,j; + for(i = 0; i < n; ++i){ + int w_offset = i*(size+border); + image copy = copy_image(ims[i]); + //normalize_image(copy); + if(c == 3 && color){ + embed_image(copy, filters, w_offset, 0); + } + else{ + for(j = 0; j < copy.c; ++j){ + int h_offset = j*(size+border); + image layer = get_image_layer(copy, j); + embed_image(layer, filters, w_offset, h_offset); + free_image(layer); + } + } + free_image(copy); + } + return filters; +} + +void show_image_normalized(image im, const char *name) +{ + image c = copy_image(im); + normalize_image(c); + show_image(c, name, 1); + free_image(c); +} + +void show_images(image *ims, int n, char *window) +{ + image m = collapse_images_vert(ims, n); + /* + int w = 448; + int h = ((float)m.h/m.w) * 448; + if(h > 896){ + h = 896; + w = ((float)m.w/m.h) * 896; + } + image sized = resize_image(m, w, h); + */ + normalize_image(m); + save_image(m, window); + show_image(m, window, 1); + free_image(m); +} + +void free_image(image m) +{ + if(m.data){ + free(m.data); + } +} diff --git a/workloads/realworld/uvm/darknet/src/image.h b/workloads/realworld/uvm/darknet/src/image.h new file mode 100644 index 0000000000000000000000000000000000000000..3392bb9787fc542929cda064bcefa0f3f893b76c --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/image.h @@ -0,0 +1,69 @@ +#ifndef IMAGE_H +#define IMAGE_H + +#include +#include +#include +#include +#include +#include "box.h" +#include "darknet.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef OPENCV +void *open_video_stream(const char *f, int c, int w, int h, int fps); +image get_image_from_stream(void *p); +image load_image_cv(char *filename, int channels); +int show_image_cv(image im, const char* name, int ms); +#endif + +float get_color(int c, int x, int max); +void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); +void draw_bbox(image a, box bbox, int w, float r, float g, float b); +void write_label(image a, int r, int c, image *characters, char *string, float *rgb); +image image_distance(image a, image b); +void scale_image(image m, float s); +image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect); +image random_crop_image(image im, int w, int h); +image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h); +augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h); +void letterbox_image_into(image im, int w, int h, image boxed); +image resize_max(image im, int max); +void translate_image(image m, float s); +void embed_image(image source, image dest, int dx, int dy); +void place_image(image im, int w, int h, int dx, int dy, image canvas); +void saturate_image(image im, float sat); +void exposure_image(image im, float sat); +void distort_image(image im, float hue, float sat, float val); +void saturate_exposure_image(image im, float sat, float exposure); +void rgb_to_hsv(image im); +void hsv_to_rgb(image im); +void yuv_to_rgb(image im); +void rgb_to_yuv(image im); + + +image collapse_image_layers(image source, int border); +image collapse_images_horz(image *ims, int n); +image collapse_images_vert(image *ims, int n); + +void show_image_normalized(image im, const char *name); +void show_images(image *ims, int n, char *window); +void show_image_layers(image p, char *name); +void show_image_collapsed(image p, char *name); + +void print_image(image m); + +image make_empty_image(int w, int h, int c); +void copy_image_into(image src, image dest); + +image get_image_layer(image m, int l); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/workloads/realworld/uvm/darknet/src/image_opencv.cpp b/workloads/realworld/uvm/darknet/src/image_opencv.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7511280be07ca987fd51fa54aea55910cd34a706 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/image_opencv.cpp @@ -0,0 +1,135 @@ +#ifdef OPENCV + +#include "stdio.h" +#include "stdlib.h" +#include "opencv2/opencv.hpp" +#include "image.h" + +using namespace cv; + +extern "C" { + +IplImage *image_to_ipl(image im) +{ + int x,y,c; + IplImage *disp = cvCreateImage(cvSize(im.w,im.h), IPL_DEPTH_8U, im.c); + int step = disp->widthStep; + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + for(c= 0; c < im.c; ++c){ + float val = im.data[c*im.h*im.w + y*im.w + x]; + disp->imageData[y*step + x*im.c + c] = (unsigned char)(val*255); + } + } + } + return disp; +} + +image ipl_to_image(IplImage* src) +{ + int h = src->height; + int w = src->width; + int c = src->nChannels; + image im = make_image(w, h, c); + unsigned char *data = (unsigned char *)src->imageData; + int step = src->widthStep; + int i, j, k; + + for(i = 0; i < h; ++i){ + for(k= 0; k < c; ++k){ + for(j = 0; j < w; ++j){ + im.data[k*w*h + i*w + j] = data[i*step + j*c + k]/255.; + } + } + } + return im; +} + +Mat image_to_mat(image im) +{ + image copy = copy_image(im); + constrain_image(copy); + if(im.c == 3) rgbgr_image(copy); + + IplImage *ipl = image_to_ipl(copy); + Mat m = cvarrToMat(ipl, true); + cvReleaseImage(&ipl); + free_image(copy); + return m; +} + +image mat_to_image(Mat m) +{ + IplImage ipl = m; + image im = ipl_to_image(&ipl); + rgbgr_image(im); + return im; +} + +void *open_video_stream(const char *f, int c, int w, int h, int fps) +{ + VideoCapture *cap; + if(f) cap = new VideoCapture(f); + else cap = new VideoCapture(c); + if(!cap->isOpened()) return 0; + if(w) cap->set(CV_CAP_PROP_FRAME_WIDTH, w); + if(h) cap->set(CV_CAP_PROP_FRAME_HEIGHT, w); + if(fps) cap->set(CV_CAP_PROP_FPS, w); + return (void *) cap; +} + +image get_image_from_stream(void *p) +{ + VideoCapture *cap = (VideoCapture *)p; + Mat m; + *cap >> m; + if(m.empty()) return make_empty_image(0,0,0); + return mat_to_image(m); +} + +image load_image_cv(char *filename, int channels) +{ + int flag = -1; + if (channels == 0) flag = -1; + else if (channels == 1) flag = 0; + else if (channels == 3) flag = 1; + else { + fprintf(stderr, "OpenCV can't force load with %d channels\n", channels); + } + Mat m; + m = imread(filename, flag); + if(!m.data){ + fprintf(stderr, "Cannot load image \"%s\"\n", filename); + char buff[256]; + sprintf(buff, "echo %s >> bad.list", filename); + system(buff); + return make_image(10,10,3); + //exit(0); + } + image im = mat_to_image(m); + return im; +} + +int show_image_cv(image im, const char* name, int ms) +{ + Mat m = image_to_mat(im); + imshow(name, m); + int c = waitKey(ms); + if (c != -1) c = c%256; + return c; +} + +void make_window(char *name, int w, int h, int fullscreen) +{ + namedWindow(name, WINDOW_NORMAL); + if (fullscreen) { + setWindowProperty(name, CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); + } else { + resizeWindow(name, w, h); + if(strcmp(name, "Demo") == 0) moveWindow(name, 0, 0); + } +} + +} + +#endif diff --git a/workloads/realworld/uvm/darknet/src/iseg_layer.c b/workloads/realworld/uvm/darknet/src/iseg_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..a1b822a5797a6d04b0f3756f106cb2b20ba31a5b --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/iseg_layer.c @@ -0,0 +1,225 @@ +#include "iseg_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_iseg_layer(int batch, int w, int h, int classes, int ids) +{ + layer l = {0}; + l.type = ISEG; + + l.h = h; + l.w = w; + l.c = classes + ids; + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.batch = batch; + l.extra = ids; + l.cost = calloc(1, sizeof(float)); + l.outputs = h*w*l.c; + l.inputs = l.outputs; + l.truths = 90*(l.w*l.h+1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + + l.counts = calloc(90, sizeof(int)); + l.sums = calloc(90, sizeof(float*)); + if(ids){ + int i; + for(i = 0; i < 90; ++i){ + l.sums[i] = calloc(ids, sizeof(float)); + } + } + + l.forward = forward_iseg_layer; + l.backward = backward_iseg_layer; +#ifdef GPU + l.forward_gpu = forward_iseg_layer_gpu; + l.backward_gpu = backward_iseg_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "iseg\n"); + srand(0); + + return l; +} + +void resize_iseg_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->c; + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +void forward_iseg_layer(const layer l, network net) +{ + + double time = what_time_is_it_now(); + int i,b,j,k; + int ids = l.extra; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + int index = b*l.outputs; + activate_array(l.output + index, l.classes*l.w*l.h, LOGISTIC); + } +#endif + + for (b = 0; b < l.batch; ++b){ + // a priori, each pixel has no class + for(i = 0; i < l.classes; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + i*l.w*l.h + k; + l.delta[index] = 0 - l.output[index]; + } + } + + // a priori, embedding should be small magnitude + for(i = 0; i < ids; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + (i+l.classes)*l.w*l.h + k; + l.delta[index] = .1 * (0 - l.output[index]); + } + } + + + memset(l.counts, 0, 90*sizeof(int)); + for(i = 0; i < 90; ++i){ + fill_cpu(ids, 0, l.sums[i], 1); + + int c = net.truth[b*l.truths + i*(l.w*l.h+1)]; + if(c < 0) break; + // add up metric embeddings for each instance + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + c*l.w*l.h + k; + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + l.delta[index] = v - l.output[index]; + axpy_cpu(ids, 1, l.output + b*l.outputs + l.classes*l.w*l.h + k, l.w*l.h, l.sums[i], 1); + ++l.counts[i]; + } + } + } + + float *mse = calloc(90, sizeof(float)); + for(i = 0; i < 90; ++i){ + int c = net.truth[b*l.truths + i*(l.w*l.h+1)]; + if(c < 0) break; + for(k = 0; k < l.w*l.h; ++k){ + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + int z; + float sum = 0; + for(z = 0; z < ids; ++z){ + int index = b*l.outputs + (l.classes + z)*l.w*l.h + k; + sum += pow(l.sums[i][z]/l.counts[i] - l.output[index], 2); + } + mse[i] += sum; + } + } + mse[i] /= l.counts[i]; + } + + // Calculate average embedding + for(i = 0; i < 90; ++i){ + if(!l.counts[i]) continue; + scal_cpu(ids, 1.f/l.counts[i], l.sums[i], 1); + if(b == 0 && net.gpu_index == 0){ + printf("%4d, %6.3f, ", l.counts[i], mse[i]); + for(j = 0; j < ids; ++j){ + printf("%6.3f,", l.sums[i][j]); + } + printf("\n"); + } + } + free(mse); + + // Calculate embedding loss + for(i = 0; i < 90; ++i){ + if(!l.counts[i]) continue; + for(k = 0; k < l.w*l.h; ++k){ + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + for(j = 0; j < 90; ++j){ + if(!l.counts[j])continue; + int z; + for(z = 0; z < ids; ++z){ + int index = b*l.outputs + (l.classes + z)*l.w*l.h + k; + float diff = l.sums[j][z] - l.output[index]; + if (j == i) l.delta[index] += diff < 0? -.1 : .1; + else l.delta[index] += -(diff < 0? -.1 : .1); + } + } + } + } + } + + for(i = 0; i < ids; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + (i+l.classes)*l.w*l.h + k; + l.delta[index] *= .01; + } + } + } + + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("took %lf sec\n", what_time_is_it_now() - time); +} + +void backward_iseg_layer(const layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_iseg_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b; + for (b = 0; b < l.batch; ++b){ + activate_array_gpu(l.output_gpu + b*l.outputs, l.classes*l.w*l.h, LOGISTIC); + //if(l.extra) activate_array_gpu(l.output_gpu + b*l.outputs + l.classes*l.w*l.h, l.extra*l.w*l.h, LOGISTIC); + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_iseg_layer(l, net); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_iseg_layer_gpu(const layer l, network net) +{ + int b; + for (b = 0; b < l.batch; ++b){ + //if(l.extra) gradient_array_gpu(l.output_gpu + b*l.outputs + l.classes*l.w*l.h, l.extra*l.w*l.h, LOGISTIC, l.delta_gpu + b*l.outputs + l.classes*l.w*l.h); + } + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/uvm/darknet/src/iseg_layer.h b/workloads/realworld/uvm/darknet/src/iseg_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..dd8e64e023caf1e1fd0c30af57f9983f24ddd691 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/iseg_layer.h @@ -0,0 +1,19 @@ +#ifndef ISEG_LAYER_H +#define ISEG_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_iseg_layer(int batch, int w, int h, int classes, int ids); +void forward_iseg_layer(const layer l, network net); +void backward_iseg_layer(const layer l, network net); +void resize_iseg_layer(layer *l, int w, int h); +int iseg_num_detections(layer l, float thresh); + +#ifdef GPU +void forward_iseg_layer_gpu(const layer l, network net); +void backward_iseg_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm/darknet/src/l2norm_layer.c b/workloads/realworld/uvm/darknet/src/l2norm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..0cf7f844170cb2c3dba15be94d4a435aaa63067c --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/l2norm_layer.c @@ -0,0 +1,63 @@ +#include "l2norm_layer.h" +#include "activations.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +layer make_l2norm_layer(int batch, int inputs) +{ + fprintf(stderr, "l2norm %4d\n", inputs); + layer l = {0}; + l.type = L2NORM; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.output = calloc(inputs*batch, sizeof(float)); + l.scales = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + + l.forward = forward_l2norm_layer; + l.backward = backward_l2norm_layer; + #ifdef GPU + l.forward_gpu = forward_l2norm_layer_gpu; + l.backward_gpu = backward_l2norm_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.scales_gpu = cuda_make_array(l.output, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_l2norm_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + l2normalize_cpu(l.output, l.scales, l.batch, l.out_c, l.out_w*l.out_h); +} + +void backward_l2norm_layer(const layer l, network net) +{ + //axpy_cpu(l.inputs*l.batch, 1, l.scales, 1, l.delta, 1); + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_l2norm_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + l2normalize_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_w*l.out_h); +} + +void backward_l2norm_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.scales_gpu, 1, l.delta_gpu, 1); + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/uvm/darknet/src/l2norm_layer.h b/workloads/realworld/uvm/darknet/src/l2norm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1ca6f710f017f2857f566eaed90634698d72b26d --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/l2norm_layer.h @@ -0,0 +1,15 @@ +#ifndef L2NORM_LAYER_H +#define L2NORM_LAYER_H +#include "layer.h" +#include "network.h" + +layer make_l2norm_layer(int batch, int inputs); +void forward_l2norm_layer(const layer l, network net); +void backward_l2norm_layer(const layer l, network net); + +#ifdef GPU +void forward_l2norm_layer_gpu(const layer l, network net); +void backward_l2norm_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm/darknet/src/layer.c b/workloads/realworld/uvm/darknet/src/layer.c new file mode 100644 index 0000000000000000000000000000000000000000..3bffe436f06a455e2d1043158ff6da9b07bbb61f --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/layer.c @@ -0,0 +1,97 @@ +#include "layer.h" +#include "cuda_dark.h" + +#include + +void free_layer(layer l) +{ + if(l.type == DROPOUT){ + if(l.rand) free(l.rand); +#ifdef GPU + if(l.rand_gpu) cuda_free(l.rand_gpu); +#endif + return; + } + if(l.cweights) free(l.cweights); + if(l.indexes) free(l.indexes); + if(l.input_layers) free(l.input_layers); + if(l.input_sizes) free(l.input_sizes); + if(l.map) free(l.map); + if(l.rand) free(l.rand); + if(l.cost) free(l.cost); + if(l.state) free(l.state); + if(l.prev_state) free(l.prev_state); + if(l.forgot_state) free(l.forgot_state); + if(l.forgot_delta) free(l.forgot_delta); + if(l.state_delta) free(l.state_delta); + if(l.concat) free(l.concat); + if(l.concat_delta) free(l.concat_delta); + if(l.binary_weights) free(l.binary_weights); + if(l.biases) free(l.biases); + if(l.bias_updates) free(l.bias_updates); + if(l.scales) free(l.scales); + if(l.scale_updates) free(l.scale_updates); + if(l.weights) free(l.weights); + if(l.weight_updates) free(l.weight_updates); + if(l.delta) free(l.delta); + if(l.output) free(l.output); + if(l.squared) free(l.squared); + if(l.norms) free(l.norms); + if(l.spatial_mean) free(l.spatial_mean); + if(l.mean) free(l.mean); + if(l.variance) free(l.variance); + if(l.mean_delta) free(l.mean_delta); + if(l.variance_delta) free(l.variance_delta); + if(l.rolling_mean) free(l.rolling_mean); + if(l.rolling_variance) free(l.rolling_variance); + if(l.x) free(l.x); + if(l.x_norm) free(l.x_norm); + if(l.m) free(l.m); + if(l.v) free(l.v); + if(l.z_cpu) free(l.z_cpu); + if(l.r_cpu) free(l.r_cpu); + if(l.h_cpu) free(l.h_cpu); + if(l.binary_input) free(l.binary_input); + +#ifdef GPU + if(l.indexes_gpu) cuda_free((float *)l.indexes_gpu); + + if(l.z_gpu) cuda_free(l.z_gpu); + if(l.r_gpu) cuda_free(l.r_gpu); + if(l.h_gpu) cuda_free(l.h_gpu); + if(l.m_gpu) cuda_free(l.m_gpu); + if(l.v_gpu) cuda_free(l.v_gpu); + if(l.prev_state_gpu) cuda_free(l.prev_state_gpu); + if(l.forgot_state_gpu) cuda_free(l.forgot_state_gpu); + if(l.forgot_delta_gpu) cuda_free(l.forgot_delta_gpu); + if(l.state_gpu) cuda_free(l.state_gpu); + if(l.state_delta_gpu) cuda_free(l.state_delta_gpu); + if(l.gate_gpu) cuda_free(l.gate_gpu); + if(l.gate_delta_gpu) cuda_free(l.gate_delta_gpu); + if(l.save_gpu) cuda_free(l.save_gpu); + if(l.save_delta_gpu) cuda_free(l.save_delta_gpu); + if(l.concat_gpu) cuda_free(l.concat_gpu); + if(l.concat_delta_gpu) cuda_free(l.concat_delta_gpu); + if(l.binary_input_gpu) cuda_free(l.binary_input_gpu); + if(l.binary_weights_gpu) cuda_free(l.binary_weights_gpu); + if(l.mean_gpu) cuda_free(l.mean_gpu); + if(l.variance_gpu) cuda_free(l.variance_gpu); + if(l.rolling_mean_gpu) cuda_free(l.rolling_mean_gpu); + if(l.rolling_variance_gpu) cuda_free(l.rolling_variance_gpu); + if(l.variance_delta_gpu) cuda_free(l.variance_delta_gpu); + if(l.mean_delta_gpu) cuda_free(l.mean_delta_gpu); + if(l.x_gpu) cuda_free(l.x_gpu); + if(l.x_norm_gpu) cuda_free(l.x_norm_gpu); + if(l.weights_gpu) cuda_free(l.weights_gpu); + if(l.weight_updates_gpu) cuda_free(l.weight_updates_gpu); + if(l.biases_gpu) cuda_free(l.biases_gpu); + if(l.bias_updates_gpu) cuda_free(l.bias_updates_gpu); + if(l.scales_gpu) cuda_free(l.scales_gpu); + if(l.scale_updates_gpu) cuda_free(l.scale_updates_gpu); + if(l.output_gpu) cuda_free(l.output_gpu); + if(l.delta_gpu) cuda_free(l.delta_gpu); + if(l.rand_gpu) cuda_free(l.rand_gpu); + if(l.squared_gpu) cuda_free(l.squared_gpu); + if(l.norms_gpu) cuda_free(l.norms_gpu); +#endif +} diff --git a/workloads/realworld/uvm/darknet/src/layer.h b/workloads/realworld/uvm/darknet/src/layer.h new file mode 100644 index 0000000000000000000000000000000000000000..af6cd2ab5054f5ef3bbdfca2da45f08d710a7bd0 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/layer.h @@ -0,0 +1 @@ +#include "darknet.h" diff --git a/workloads/realworld/uvm/darknet/src/list.c b/workloads/realworld/uvm/darknet/src/list.c new file mode 100644 index 0000000000000000000000000000000000000000..0e4165d37800e1b4c7c33992cd64a6634fe4688c --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/list.c @@ -0,0 +1,92 @@ +#include +#include +#include "list.h" + +list *make_list() +{ + list *l = malloc(sizeof(list)); + l->size = 0; + l->front = 0; + l->back = 0; + return l; +} + +/* +void transfer_node(list *s, list *d, node *n) +{ + node *prev, *next; + prev = n->prev; + next = n->next; + if(prev) prev->next = next; + if(next) next->prev = prev; + --s->size; + if(s->front == n) s->front = next; + if(s->back == n) s->back = prev; +} +*/ + +void *list_pop(list *l){ + if(!l->back) return 0; + node *b = l->back; + void *val = b->val; + l->back = b->prev; + if(l->back) l->back->next = 0; + free(b); + --l->size; + + return val; +} + +void list_insert(list *l, void *val) +{ + node *new = malloc(sizeof(node)); + new->val = val; + new->next = 0; + + if(!l->back){ + l->front = new; + new->prev = 0; + }else{ + l->back->next = new; + new->prev = l->back; + } + l->back = new; + ++l->size; +} + +void free_node(node *n) +{ + node *next; + while(n) { + next = n->next; + free(n); + n = next; + } +} + +void free_list(list *l) +{ + free_node(l->front); + free(l); +} + +void free_list_contents(list *l) +{ + node *n = l->front; + while(n){ + free(n->val); + n = n->next; + } +} + +void **list_to_array(list *l) +{ + void **a = calloc(l->size, sizeof(void*)); + int count = 0; + node *n = l->front; + while(n){ + a[count++] = n->val; + n = n->next; + } + return a; +} diff --git a/workloads/realworld/uvm/darknet/src/list.h b/workloads/realworld/uvm/darknet/src/list.h new file mode 100644 index 0000000000000000000000000000000000000000..6b445c717c2937b9c90536654ba82a33e14bb4ec --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/list.h @@ -0,0 +1,13 @@ +#ifndef LIST_H +#define LIST_H +#include "darknet.h" + +list *make_list(); +int list_find(list *l, void *val); + +void list_insert(list *, void *); + + +void free_list_contents(list *l); + +#endif diff --git a/workloads/realworld/uvm/darknet/src/local_layer.c b/workloads/realworld/uvm/darknet/src/local_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..74f6910a8fd751ad9f3b41fc67be737399a151d0 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/local_layer.c @@ -0,0 +1,293 @@ +#include "local_layer.h" +#include "utils.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" +#include +#include + +int local_out_height(local_layer l) +{ + int h = l.h; + if (!l.pad) h -= l.size; + else h -= 1; + return h/l.stride + 1; +} + +int local_out_width(local_layer l) +{ + int w = l.w; + if (!l.pad) w -= l.size; + else w -= 1; + return w/l.stride + 1; +} + +local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation) +{ + int i; + local_layer l = {0}; + l.type = LOCAL; + + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.batch = batch; + l.stride = stride; + l.size = size; + l.pad = pad; + + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int locations = out_h*out_w; + l.out_h = out_h; + l.out_w = out_w; + l.out_c = n; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = l.w * l.h * l.c; + + l.weights = calloc(c*n*size*size*locations, sizeof(float)); + l.weight_updates = calloc(c*n*size*size*locations, sizeof(float)); + + l.biases = calloc(l.outputs, sizeof(float)); + l.bias_updates = calloc(l.outputs, sizeof(float)); + + // float scale = 1./sqrt(size*size*c); + float scale = sqrt(2./(size*size*c)); + for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1); + + l.output = calloc(l.batch*out_h * out_w * n, sizeof(float)); + l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float)); + + l.workspace_size = out_h*out_w*size*size*c; + + l.forward = forward_local_layer; + l.backward = backward_local_layer; + l.update = update_local_layer; + +#ifdef GPU + l.forward_gpu = forward_local_layer_gpu; + l.backward_gpu = backward_local_layer_gpu; + l.update_gpu = update_local_layer_gpu; + + l.weights_gpu = cuda_make_array(l.weights, c*n*size*size*locations); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size*locations); + + l.biases_gpu = cuda_make_array(l.biases, l.outputs); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, l.outputs); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + +#endif + l.activation = activation; + + fprintf(stderr, "Local Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); + + return l; +} + +void forward_local_layer(const local_layer l, network net) +{ + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int i, j; + int locations = out_h * out_w; + + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.outputs, l.biases, 1, l.output + i*l.outputs, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input + i*l.w*l.h*l.c; + im2col_cpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + float *output = l.output + i*l.outputs; + for(j = 0; j < locations; ++j){ + float *a = l.weights + j*l.size*l.size*l.c*l.n; + float *b = net.workspace + j; + float *c = output + j; + + int m = l.n; + int n = 1; + int k = l.size*l.size*l.c; + + gemm(0,0,m,n,k,1,a,k,b,locations,1,c,locations); + } + } + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_local_layer(local_layer l, network net) +{ + int i, j; + int locations = l.out_w*l.out_h; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + for(i = 0; i < l.batch; ++i){ + axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input + i*l.w*l.h*l.c; + im2col_cpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + + for(j = 0; j < locations; ++j){ + float *a = l.delta + i*l.outputs + j; + float *b = net.workspace + j; + float *c = l.weight_updates + j*l.size*l.size*l.c*l.n; + int m = l.n; + int n = l.size*l.size*l.c; + int k = 1; + + gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n); + } + + if(net.delta){ + for(j = 0; j < locations; ++j){ + float *a = l.weights + j*l.size*l.size*l.c*l.n; + float *b = l.delta + i*l.outputs + j; + float *c = net.workspace + j; + + int m = l.size*l.size*l.c; + int n = 1; + int k = l.n; + + gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations); + } + + col2im_cpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta+i*l.c*l.h*l.w); + } + } +} + +void update_local_layer(local_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.outputs, momentum, l.bias_updates, 1); + + axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(size, momentum, l.weight_updates, 1); +} + +#ifdef GPU + +void forward_local_layer_gpu(const local_layer l, network net) +{ + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int i, j; + int locations = out_h * out_w; + + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.outputs, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input_gpu + i*l.w*l.h*l.c; + im2col_gpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + float *output = l.output_gpu + i*l.outputs; + for(j = 0; j < locations; ++j){ + float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n; + float *b = net.workspace + j; + float *c = output + j; + + int m = l.n; + int n = 1; + int k = l.size*l.size*l.c; + + gemm_gpu(0,0,m,n,k,1,a,k,b,locations,1,c,locations); + } + } + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_local_layer_gpu(local_layer l, network net) +{ + int i, j; + int locations = l.out_w*l.out_h; + + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + for(i = 0; i < l.batch; ++i){ + axpy_gpu(l.outputs, 1, l.delta_gpu + i*l.outputs, 1, l.bias_updates_gpu, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input_gpu + i*l.w*l.h*l.c; + im2col_gpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + + for(j = 0; j < locations; ++j){ + float *a = l.delta_gpu + i*l.outputs + j; + float *b = net.workspace + j; + float *c = l.weight_updates_gpu + j*l.size*l.size*l.c*l.n; + int m = l.n; + int n = l.size*l.size*l.c; + int k = 1; + + gemm_gpu(0,1,m,n,k,1,a,locations,b,locations,1,c,n); + } + + if(net.delta_gpu){ + for(j = 0; j < locations; ++j){ + float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n; + float *b = l.delta_gpu + i*l.outputs + j; + float *c = net.workspace + j; + + int m = l.size*l.size*l.c; + int n = 1; + int k = l.n; + + gemm_gpu(1,0,m,n,k,1,a,m,b,locations,0,c,locations); + } + + col2im_gpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta_gpu+i*l.c*l.h*l.w); + } + } +} + +void update_local_layer_gpu(local_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + axpy_gpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.outputs, momentum, l.bias_updates_gpu, 1); + + axpy_gpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(size, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(size, momentum, l.weight_updates_gpu, 1); +} + +void pull_local_layer(local_layer l) +{ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + cuda_pull_array(l.weights_gpu, l.weights, size); + cuda_pull_array(l.biases_gpu, l.biases, l.outputs); +} + +void push_local_layer(local_layer l) +{ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + cuda_push_array(l.weights_gpu, l.weights, size); + cuda_push_array(l.biases_gpu, l.biases, l.outputs); +} +#endif diff --git a/workloads/realworld/uvm/darknet/src/local_layer.h b/workloads/realworld/uvm/darknet/src/local_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..56805c4f1cb51fed9ef0e771d2befb430df60df5 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/local_layer.h @@ -0,0 +1,31 @@ +#ifndef LOCAL_LAYER_H +#define LOCAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +typedef layer local_layer; + +#ifdef GPU +void forward_local_layer_gpu(local_layer layer, network net); +void backward_local_layer_gpu(local_layer layer, network net); +void update_local_layer_gpu(local_layer layer, update_args a); + +void push_local_layer(local_layer layer); +void pull_local_layer(local_layer layer); +#endif + +local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation); + +void forward_local_layer(const local_layer layer, network net); +void backward_local_layer(local_layer layer, network net); +void update_local_layer(local_layer layer, update_args a); + +void bias_output(float *output, float *biases, int batch, int n, int size); +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); + +#endif + diff --git a/workloads/realworld/uvm/darknet/src/logistic_layer.c b/workloads/realworld/uvm/darknet/src/logistic_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..8d98986f67a17da70df75e3d56a46615cfc8eaf1 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/logistic_layer.c @@ -0,0 +1,71 @@ +#include "logistic_layer.h" +#include "activations.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +layer make_logistic_layer(int batch, int inputs) +{ + fprintf(stderr, "logistic x entropy %4d\n", inputs); + layer l = {0}; + l.type = LOGXENT; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.loss = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_logistic_layer; + l.backward = backward_logistic_layer; + #ifdef GPU + l.forward_gpu = forward_logistic_layer_gpu; + l.backward_gpu = backward_logistic_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.loss_gpu = cuda_make_array(l.loss, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_logistic_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + activate_array(l.output, l.outputs*l.batch, LOGISTIC); + if(net.truth){ + logistic_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_logistic_layer(const layer l, network net) +{ + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_logistic_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, LOGISTIC); + if(net.truth){ + logistic_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu); + cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_logistic_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/uvm/darknet/src/logistic_layer.h b/workloads/realworld/uvm/darknet/src/logistic_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9c25bee3c2a6eb1013ed43ce0c4aeaa63b7a293f --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/logistic_layer.h @@ -0,0 +1,15 @@ +#ifndef LOGISTIC_LAYER_H +#define LOGISTIC_LAYER_H +#include "layer.h" +#include "network.h" + +layer make_logistic_layer(int batch, int inputs); +void forward_logistic_layer(const layer l, network net); +void backward_logistic_layer(const layer l, network net); + +#ifdef GPU +void forward_logistic_layer_gpu(const layer l, network net); +void backward_logistic_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm/darknet/src/lstm_layer.c b/workloads/realworld/uvm/darknet/src/lstm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..16f43914df8f35fb7f7b16bd93ff1d83f513dda0 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/lstm_layer.c @@ -0,0 +1,626 @@ +#include "lstm_layer.h" +#include "connected_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam) +{ + fprintf(stderr, "LSTM Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = { 0 }; + l.batch = batch; + l.type = LSTM; + l.steps = steps; + l.inputs = inputs; + + l.uf = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uf) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uf->batch = batch; + + l.ui = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ui) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ui->batch = batch; + + l.ug = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ug) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ug->batch = batch; + + l.uo = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uo) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uo->batch = batch; + + l.wf = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wf) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wf->batch = batch; + + l.wi = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wi) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wi->batch = batch; + + l.wg = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wg) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wg->batch = batch; + + l.wo = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wo) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wo->batch = batch; + + l.batch_normalize = batch_normalize; + l.outputs = outputs; + + l.output = calloc(outputs*batch*steps, sizeof(float)); + l.state = calloc(outputs*batch, sizeof(float)); + + l.forward = forward_lstm_layer; + l.update = update_lstm_layer; + + l.prev_state_cpu = calloc(batch*outputs, sizeof(float)); + l.prev_cell_cpu = calloc(batch*outputs, sizeof(float)); + l.cell_cpu = calloc(batch*outputs*steps, sizeof(float)); + + l.f_cpu = calloc(batch*outputs, sizeof(float)); + l.i_cpu = calloc(batch*outputs, sizeof(float)); + l.g_cpu = calloc(batch*outputs, sizeof(float)); + l.o_cpu = calloc(batch*outputs, sizeof(float)); + l.c_cpu = calloc(batch*outputs, sizeof(float)); + l.h_cpu = calloc(batch*outputs, sizeof(float)); + l.temp_cpu = calloc(batch*outputs, sizeof(float)); + l.temp2_cpu = calloc(batch*outputs, sizeof(float)); + l.temp3_cpu = calloc(batch*outputs, sizeof(float)); + l.dc_cpu = calloc(batch*outputs, sizeof(float)); + l.dh_cpu = calloc(batch*outputs, sizeof(float)); + +#ifdef GPU + l.forward_gpu = forward_lstm_layer_gpu; + l.backward_gpu = backward_lstm_layer_gpu; + l.update_gpu = update_lstm_layer_gpu; + + l.output_gpu = cuda_make_array(0, batch*outputs*steps); + l.delta_gpu = cuda_make_array(0, batch*l.outputs*steps); + + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.prev_cell_gpu = cuda_make_array(0, batch*outputs); + l.cell_gpu = cuda_make_array(0, batch*outputs*steps); + + l.f_gpu = cuda_make_array(0, batch*outputs); + l.i_gpu = cuda_make_array(0, batch*outputs); + l.g_gpu = cuda_make_array(0, batch*outputs); + l.o_gpu = cuda_make_array(0, batch*outputs); + l.c_gpu = cuda_make_array(0, batch*outputs); + l.h_gpu = cuda_make_array(0, batch*outputs); + l.temp_gpu = cuda_make_array(0, batch*outputs); + l.temp2_gpu = cuda_make_array(0, batch*outputs); + l.temp3_gpu = cuda_make_array(0, batch*outputs); + l.dc_gpu = cuda_make_array(0, batch*outputs); + l.dh_gpu = cuda_make_array(0, batch*outputs); +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.wf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wf->out_c, l.wf->out_h, l.wf->out_w); + cudnnSetTensor4dDescriptor(l.wi->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wi->out_c, l.wi->out_h, l.wi->out_w); + cudnnSetTensor4dDescriptor(l.wg->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wg->out_c, l.wg->out_h, l.wg->out_w); + cudnnSetTensor4dDescriptor(l.wo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wo->out_c, l.wo->out_h, l.wo->out_w); + + cudnnSetTensor4dDescriptor(l.uf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uf->out_c, l.uf->out_h, l.uf->out_w); + cudnnSetTensor4dDescriptor(l.ui->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ui->out_c, l.ui->out_h, l.ui->out_w); + cudnnSetTensor4dDescriptor(l.ug->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ug->out_c, l.ug->out_h, l.ug->out_w); + cudnnSetTensor4dDescriptor(l.uo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uo->out_c, l.uo->out_h, l.uo->out_w); +#endif + +#endif + + return l; +} + +void update_lstm_layer(layer l, update_args a) +{ + update_connected_layer(*(l.wf), a); + update_connected_layer(*(l.wi), a); + update_connected_layer(*(l.wg), a); + update_connected_layer(*(l.wo), a); + update_connected_layer(*(l.uf), a); + update_connected_layer(*(l.ui), a); + update_connected_layer(*(l.ug), a); + update_connected_layer(*(l.uo), a); +} + +void forward_lstm_layer(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + fill_cpu(l.outputs * l.batch * l.steps, 0, wf.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wi.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wg.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wo.delta, 1); + + fill_cpu(l.outputs * l.batch * l.steps, 0, uf.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ui.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ug.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, uo.delta, 1); + if (state.train) { + fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input = l.h_cpu; + forward_connected_layer(wf, s); + forward_connected_layer(wi, s); + forward_connected_layer(wg, s); + forward_connected_layer(wo, s); + + s.input = state.input; + forward_connected_layer(uf, s); + forward_connected_layer(ui, s); + forward_connected_layer(ug, s); + forward_connected_layer(uo, s); + + copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); + + copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); + + copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); + + copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); + + activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.g_cpu, l.outputs*l.batch, TANH); + activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.c_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, l.temp_cpu, 1, l.c_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.h_cpu, 1); + activate_array(l.h_cpu, l.outputs*l.batch, TANH); + mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.h_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.cell_cpu, 1); + copy_cpu(l.outputs*l.batch, l.h_cpu, 1, l.output, 1); + + state.input += l.inputs*l.batch; + l.output += l.outputs*l.batch; + l.cell_cpu += l.outputs*l.batch; + + increment_layer(&wf, 1); + increment_layer(&wi, 1); + increment_layer(&wg, 1); + increment_layer(&wo, 1); + + increment_layer(&uf, 1); + increment_layer(&ui, 1); + increment_layer(&ug, 1); + increment_layer(&uo, 1); + } +} + +void backward_lstm_layer(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + increment_layer(&wf, l.steps - 1); + increment_layer(&wi, l.steps - 1); + increment_layer(&wg, l.steps - 1); + increment_layer(&wo, l.steps - 1); + + increment_layer(&uf, l.steps - 1); + increment_layer(&ui, l.steps - 1); + increment_layer(&ug, l.steps - 1); + increment_layer(&uo, l.steps - 1); + + state.input += l.inputs*l.batch*(l.steps - 1); + if (state.delta) state.delta += l.inputs*l.batch*(l.steps - 1); + + l.output += l.outputs*l.batch*(l.steps - 1); + l.cell_cpu += l.outputs*l.batch*(l.steps - 1); + l.delta += l.outputs*l.batch*(l.steps - 1); + + for (i = l.steps - 1; i >= 0; --i) { + if (i != 0) copy_cpu(l.outputs*l.batch, l.cell_cpu - l.outputs*l.batch, 1, l.prev_cell_cpu, 1); + copy_cpu(l.outputs*l.batch, l.cell_cpu, 1, l.c_cpu, 1); + if (i != 0) copy_cpu(l.outputs*l.batch, l.output - l.outputs*l.batch, 1, l.prev_state_cpu, 1); + copy_cpu(l.outputs*l.batch, l.output, 1, l.h_cpu, 1); + + l.dh_cpu = (i == 0) ? 0 : l.delta - l.outputs*l.batch; + + copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); + + copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); + + copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); + + copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); + + activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.g_cpu, l.outputs*l.batch, TANH); + activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.delta, 1, l.temp3_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); + activate_array(l.temp_cpu, l.outputs*l.batch, TANH); + + copy_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp2_cpu, 1); + mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.temp2_cpu, 1); + + gradient_array(l.temp_cpu, l.outputs*l.batch, TANH, l.temp2_cpu); + axpy_cpu(l.outputs*l.batch, 1, l.dc_cpu, 1, l.temp2_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); + activate_array(l.temp_cpu, l.outputs*l.batch, TANH); + mul_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp_cpu, 1); + gradient_array(l.o_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wo.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wo, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uo.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(uo, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); + gradient_array(l.g_cpu, l.outputs*l.batch, TANH, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wg.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wg, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ug.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(ug, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); + gradient_array(l.i_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wi.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wi, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ui.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(ui, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.prev_cell_cpu, 1, l.temp_cpu, 1); + gradient_array(l.f_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wf.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wf, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uf.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(uf, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.temp_cpu, 1); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, l.dc_cpu, 1); + + state.input -= l.inputs*l.batch; + if (state.delta) state.delta -= l.inputs*l.batch; + l.output -= l.outputs*l.batch; + l.cell_cpu -= l.outputs*l.batch; + l.delta -= l.outputs*l.batch; + + increment_layer(&wf, -1); + increment_layer(&wi, -1); + increment_layer(&wg, -1); + increment_layer(&wo, -1); + + increment_layer(&uf, -1); + increment_layer(&ui, -1); + increment_layer(&ug, -1); + increment_layer(&uo, -1); + } +} + +#ifdef GPU +void update_lstm_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.wf), a); + update_connected_layer_gpu(*(l.wi), a); + update_connected_layer_gpu(*(l.wg), a); + update_connected_layer_gpu(*(l.wo), a); + update_connected_layer_gpu(*(l.uf), a); + update_connected_layer_gpu(*(l.ui), a); + update_connected_layer_gpu(*(l.ug), a); + update_connected_layer_gpu(*(l.uo), a); +} + +void forward_lstm_layer_gpu(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + fill_gpu(l.outputs * l.batch * l.steps, 0, wf.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wi.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wg.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wo.delta_gpu, 1); + + fill_gpu(l.outputs * l.batch * l.steps, 0, uf.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ui.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ug.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, uo.delta_gpu, 1); + if (state.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = l.h_gpu; + forward_connected_layer_gpu(wf, s); + forward_connected_layer_gpu(wi, s); + forward_connected_layer_gpu(wg, s); + forward_connected_layer_gpu(wo, s); + + s.input_gpu = state.input_gpu; + forward_connected_layer_gpu(uf, s); + forward_connected_layer_gpu(ui, s); + forward_connected_layer_gpu(ug, s); + forward_connected_layer_gpu(uo, s); + + copy_gpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); + + copy_gpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); + + copy_gpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); + + copy_gpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); + + activate_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.g_gpu, l.outputs*l.batch, TANH); + activate_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.f_gpu, 1, l.c_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, l.temp_gpu, 1, l.c_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.h_gpu, 1); + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + mul_gpu(l.outputs*l.batch, l.o_gpu, 1, l.h_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.cell_gpu, 1); + copy_gpu(l.outputs*l.batch, l.h_gpu, 1, l.output_gpu, 1); + + state.input_gpu += l.inputs*l.batch; + l.output_gpu += l.outputs*l.batch; + l.cell_gpu += l.outputs*l.batch; + + increment_layer(&wf, 1); + increment_layer(&wi, 1); + increment_layer(&wg, 1); + increment_layer(&wo, 1); + + increment_layer(&uf, 1); + increment_layer(&ui, 1); + increment_layer(&ug, 1); + increment_layer(&uo, 1); + } +} + +void backward_lstm_layer_gpu(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + increment_layer(&wf, l.steps - 1); + increment_layer(&wi, l.steps - 1); + increment_layer(&wg, l.steps - 1); + increment_layer(&wo, l.steps - 1); + + increment_layer(&uf, l.steps - 1); + increment_layer(&ui, l.steps - 1); + increment_layer(&ug, l.steps - 1); + increment_layer(&uo, l.steps - 1); + + state.input_gpu += l.inputs*l.batch*(l.steps - 1); + if (state.delta_gpu) state.delta_gpu += l.inputs*l.batch*(l.steps - 1); + + l.output_gpu += l.outputs*l.batch*(l.steps - 1); + l.cell_gpu += l.outputs*l.batch*(l.steps - 1); + l.delta_gpu += l.outputs*l.batch*(l.steps - 1); + + for (i = l.steps - 1; i >= 0; --i) { + if (i != 0) copy_gpu(l.outputs*l.batch, l.cell_gpu - l.outputs*l.batch, 1, l.prev_cell_gpu, 1); + copy_gpu(l.outputs*l.batch, l.cell_gpu, 1, l.c_gpu, 1); + if (i != 0) copy_gpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, 1, l.prev_state_gpu, 1); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.h_gpu, 1); + + l.dh_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch; + + copy_gpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); + + copy_gpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); + + copy_gpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); + + copy_gpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); + + activate_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.g_gpu, l.outputs*l.batch, TANH); + activate_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, l.temp3_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.temp_gpu, 1); + activate_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH); + + copy_gpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp2_gpu, 1); + mul_gpu(l.outputs*l.batch, l.o_gpu, 1, l.temp2_gpu, 1); + + gradient_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH, l.temp2_gpu); + axpy_gpu(l.outputs*l.batch, 1, l.dc_gpu, 1, l.temp2_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.temp_gpu, 1); + activate_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH); + mul_gpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wo.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wo, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, uo.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(uo, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.g_gpu, l.outputs*l.batch, TANH, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wg.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wg, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, ug.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(ug, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wi.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wi, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, ui.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(ui, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.prev_cell_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wf.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wf, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, uf.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(uf, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.f_gpu, 1, l.temp_gpu, 1); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, l.dc_gpu, 1); + + state.input_gpu -= l.inputs*l.batch; + if (state.delta_gpu) state.delta_gpu -= l.inputs*l.batch; + l.output_gpu -= l.outputs*l.batch; + l.cell_gpu -= l.outputs*l.batch; + l.delta_gpu -= l.outputs*l.batch; + + increment_layer(&wf, -1); + increment_layer(&wi, -1); + increment_layer(&wg, -1); + increment_layer(&wo, -1); + + increment_layer(&uf, -1); + increment_layer(&ui, -1); + increment_layer(&ug, -1); + increment_layer(&uo, -1); + } +} +#endif diff --git a/workloads/realworld/uvm/darknet/src/lstm_layer.h b/workloads/realworld/uvm/darknet/src/lstm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..b9f07e6424b55c336e692aa6f1028d0bc1cae0b3 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/lstm_layer.h @@ -0,0 +1,20 @@ +#ifndef LSTM_LAYER_H +#define LSTM_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" +#define USET + +layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam); + +void forward_lstm_layer(layer l, network net); +void update_lstm_layer(layer l, update_args a); + +#ifdef GPU +void forward_lstm_layer_gpu(layer l, network net); +void backward_lstm_layer_gpu(layer l, network net); +void update_lstm_layer_gpu(layer l, update_args a); + +#endif +#endif diff --git a/workloads/realworld/uvm/darknet/src/matrix.c b/workloads/realworld/uvm/darknet/src/matrix.c new file mode 100644 index 0000000000000000000000000000000000000000..799916bff017180e220ae48748f495007793d168 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/matrix.c @@ -0,0 +1,196 @@ +#include "matrix.h" +#include "utils.h" +#include "blas.h" +#include +#include +#include +#include +#include + +void free_matrix(matrix m) +{ + int i; + for(i = 0; i < m.rows; ++i) free(m.vals[i]); + free(m.vals); +} + +float matrix_topk_accuracy(matrix truth, matrix guess, int k) +{ + int *indexes = calloc(k, sizeof(int)); + int n = truth.cols; + int i,j; + int correct = 0; + for(i = 0; i < truth.rows; ++i){ + top_k(guess.vals[i], n, k, indexes); + for(j = 0; j < k; ++j){ + int class = indexes[j]; + if(truth.vals[i][class]){ + ++correct; + break; + } + } + } + free(indexes); + return (float)correct/truth.rows; +} + +void scale_matrix(matrix m, float scale) +{ + int i,j; + for(i = 0; i < m.rows; ++i){ + for(j = 0; j < m.cols; ++j){ + m.vals[i][j] *= scale; + } + } +} + +matrix resize_matrix(matrix m, int size) +{ + int i; + if (m.rows == size) return m; + if (m.rows < size) { + m.vals = realloc(m.vals, size*sizeof(float*)); + for (i = m.rows; i < size; ++i) { + m.vals[i] = calloc(m.cols, sizeof(float)); + } + } else if (m.rows > size) { + for (i = size; i < m.rows; ++i) { + free(m.vals[i]); + } + m.vals = realloc(m.vals, size*sizeof(float*)); + } + m.rows = size; + return m; +} + +void matrix_add_matrix(matrix from, matrix to) +{ + assert(from.rows == to.rows && from.cols == to.cols); + int i,j; + for(i = 0; i < from.rows; ++i){ + for(j = 0; j < from.cols; ++j){ + to.vals[i][j] += from.vals[i][j]; + } + } +} + +matrix copy_matrix(matrix m) +{ + matrix c = {0}; + c.rows = m.rows; + c.cols = m.cols; + c.vals = calloc(c.rows, sizeof(float *)); + int i; + for(i = 0; i < c.rows; ++i){ + c.vals[i] = calloc(c.cols, sizeof(float)); + copy_cpu(c.cols, m.vals[i], 1, c.vals[i], 1); + } + return c; +} + +matrix make_matrix(int rows, int cols) +{ + int i; + matrix m; + m.rows = rows; + m.cols = cols; + m.vals = calloc(m.rows, sizeof(float *)); + for(i = 0; i < m.rows; ++i){ + m.vals[i] = calloc(m.cols, sizeof(float)); + } + return m; +} + +matrix hold_out_matrix(matrix *m, int n) +{ + int i; + matrix h; + h.rows = n; + h.cols = m->cols; + h.vals = calloc(h.rows, sizeof(float *)); + for(i = 0; i < n; ++i){ + int index = rand()%m->rows; + h.vals[i] = m->vals[index]; + m->vals[index] = m->vals[--(m->rows)]; + } + return h; +} + +float *pop_column(matrix *m, int c) +{ + float *col = calloc(m->rows, sizeof(float)); + int i, j; + for(i = 0; i < m->rows; ++i){ + col[i] = m->vals[i][c]; + for(j = c; j < m->cols-1; ++j){ + m->vals[i][j] = m->vals[i][j+1]; + } + } + --m->cols; + return col; +} + +matrix csv_to_matrix(char *filename) +{ + FILE *fp = fopen(filename, "r"); + if(!fp) file_error(filename); + + matrix m; + m.cols = -1; + + char *line; + + int n = 0; + int size = 1024; + m.vals = calloc(size, sizeof(float*)); + while((line = fgetl(fp))){ + if(m.cols == -1) m.cols = count_fields(line); + if(n == size){ + size *= 2; + m.vals = realloc(m.vals, size*sizeof(float*)); + } + m.vals[n] = parse_fields(line, m.cols); + free(line); + ++n; + } + m.vals = realloc(m.vals, n*sizeof(float*)); + m.rows = n; + return m; +} + +void matrix_to_csv(matrix m) +{ + int i, j; + + for(i = 0; i < m.rows; ++i){ + for(j = 0; j < m.cols; ++j){ + if(j > 0) printf(","); + printf("%.17g", m.vals[i][j]); + } + printf("\n"); + } +} + +void print_matrix(matrix m) +{ + int i, j; + printf("%d X %d Matrix:\n",m.rows, m.cols); + printf(" __"); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf("__ \n"); + + printf("| "); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf(" |\n"); + + for(i = 0; i < m.rows; ++i){ + printf("| "); + for(j = 0; j < m.cols; ++j){ + printf("%15.7f ", m.vals[i][j]); + } + printf(" |\n"); + } + printf("|__"); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf("__|\n"); +} diff --git a/workloads/realworld/uvm/darknet/src/matrix.h b/workloads/realworld/uvm/darknet/src/matrix.h new file mode 100644 index 0000000000000000000000000000000000000000..879acd70d26c084931b30067ddcc77057068e58c --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/matrix.h @@ -0,0 +1,13 @@ +#ifndef MATRIX_H +#define MATRIX_H +#include "darknet.h" + +matrix copy_matrix(matrix m); +void print_matrix(matrix m); + +matrix hold_out_matrix(matrix *m, int n); +matrix resize_matrix(matrix m, int size); + +float *pop_column(matrix *m, int c); + +#endif diff --git a/workloads/realworld/uvm/darknet/src/maxpool_layer.c b/workloads/realworld/uvm/darknet/src/maxpool_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..b54be838662ebfc53abc539da22413becc1805a3 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/maxpool_layer.c @@ -0,0 +1,127 @@ +#include "maxpool_layer.h" +#include "cuda_dark.h" +#include + +image get_maxpool_image(maxpool_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.c; + return float_to_image(w,h,c,l.output); +} + +image get_maxpool_delta(maxpool_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.c; + return float_to_image(w,h,c,l.delta); +} + +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding) +{ + maxpool_layer l = {0}; + l.type = MAXPOOL; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.pad = padding; + l.out_w = (w + padding - size)/stride + 1; + l.out_h = (h + padding - size)/stride + 1; + l.out_c = c; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = h*w*c; + l.size = size; + l.stride = stride; + int output_size = l.out_h * l.out_w * l.out_c * batch; + l.indexes = calloc(output_size, sizeof(int)); + l.output = calloc(output_size, sizeof(float)); + l.delta = calloc(output_size, sizeof(float)); + l.forward = forward_maxpool_layer; + l.backward = backward_maxpool_layer; + #ifdef GPU + l.forward_gpu = forward_maxpool_layer_gpu; + l.backward_gpu = backward_maxpool_layer_gpu; + l.indexes_gpu = cuda_make_int_array(0, output_size); + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); + #endif + fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); + return l; +} + +void resize_maxpool_layer(maxpool_layer *l, int w, int h) +{ + l->h = h; + l->w = w; + l->inputs = h*w*l->c; + + l->out_w = (w + l->pad - l->size)/l->stride + 1; + l->out_h = (h + l->pad - l->size)/l->stride + 1; + l->outputs = l->out_w * l->out_h * l->c; + int output_size = l->outputs * l->batch; + + l->indexes = realloc(l->indexes, output_size * sizeof(int)); + l->output = realloc(l->output, output_size * sizeof(float)); + l->delta = realloc(l->delta, output_size * sizeof(float)); + + #ifdef GPU + cuda_free((float *)l->indexes_gpu); + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->indexes_gpu = cuda_make_int_array(0, output_size); + l->output_gpu = cuda_make_array(l->output, output_size); + l->delta_gpu = cuda_make_array(l->delta, output_size); + #endif +} + +void forward_maxpool_layer(const maxpool_layer l, network net) +{ + int b,i,j,k,m,n; + int w_offset = -l.pad/2; + int h_offset = -l.pad/2; + + int h = l.out_h; + int w = l.out_w; + int c = l.c; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < c; ++k){ + for(i = 0; i < h; ++i){ + for(j = 0; j < w; ++j){ + int out_index = j + w*(i + h*(k + c*b)); + float max = -FLT_MAX; + int max_i = -1; + for(n = 0; n < l.size; ++n){ + for(m = 0; m < l.size; ++m){ + int cur_h = h_offset + i*l.stride + n; + int cur_w = w_offset + j*l.stride + m; + int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c)); + int valid = (cur_h >= 0 && cur_h < l.h && + cur_w >= 0 && cur_w < l.w); + float val = (valid != 0) ? net.input[index] : -FLT_MAX; + max_i = (val > max) ? index : max_i; + max = (val > max) ? val : max; + } + } + l.output[out_index] = max; + l.indexes[out_index] = max_i; + } + } + } + } +} + +void backward_maxpool_layer(const maxpool_layer l, network net) +{ + int i; + int h = l.out_h; + int w = l.out_w; + int c = l.c; + for(i = 0; i < h*w*c*l.batch; ++i){ + int index = l.indexes[i]; + net.delta[index] += l.delta[i]; + } +} + diff --git a/workloads/realworld/uvm/darknet/src/maxpool_layer.h b/workloads/realworld/uvm/darknet/src/maxpool_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..f01adb957e8bd8ce01a06e5a1ff14a988ae07149 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/maxpool_layer.h @@ -0,0 +1,23 @@ +#ifndef MAXPOOL_LAYER_H +#define MAXPOOL_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +typedef layer maxpool_layer; + +image get_maxpool_image(maxpool_layer l); +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding); +void resize_maxpool_layer(maxpool_layer *l, int w, int h); +void forward_maxpool_layer(const maxpool_layer l, network net); +void backward_maxpool_layer(const maxpool_layer l, network net); + +#ifdef GPU +void forward_maxpool_layer_gpu(maxpool_layer l, network net); +void backward_maxpool_layer_gpu(maxpool_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/uvm/darknet/src/maxpool_layer_kernels.cu b/workloads/realworld/uvm/darknet/src/maxpool_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..23302f8295682c5d9112fb12a7f63cd47a82954b --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/maxpool_layer_kernels.cu @@ -0,0 +1,106 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "maxpool_layer.h" +#include "cuda_dark.h" +} + +__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) +{ + int h = (in_h + pad - size)/stride + 1; + int w = (in_w + pad - size)/stride + 1; + int c = in_c; + + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int j = id % w; + id /= w; + int i = id % h; + id /= h; + int k = id % c; + id /= c; + int b = id; + + int w_offset = -pad/2; + int h_offset = -pad/2; + + int out_index = j + w*(i + h*(k + c*b)); + float max = -INFINITY; + int max_i = -1; + int l, m; + for(l = 0; l < size; ++l){ + for(m = 0; m < size; ++m){ + int cur_h = h_offset + i*stride + l; + int cur_w = w_offset + j*stride + m; + int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); + int valid = (cur_h >= 0 && cur_h < in_h && + cur_w >= 0 && cur_w < in_w); + float val = (valid != 0) ? input[index] : -INFINITY; + max_i = (val > max) ? index : max_i; + max = (val > max) ? val : max; + } + } + output[out_index] = max; + indexes[out_index] = max_i; +} + +__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes) +{ + int h = (in_h + pad - size)/stride + 1; + int w = (in_w + pad - size)/stride + 1; + int c = in_c; + int area = (size-1)/stride; + + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int index = id; + int j = id % in_w; + id /= in_w; + int i = id % in_h; + id /= in_h; + int k = id % in_c; + id /= in_c; + int b = id; + + int w_offset = -pad/2; + int h_offset = -pad/2; + + float d = 0; + int l, m; + for(l = -area; l < area+1; ++l){ + for(m = -area; m < area+1; ++m){ + int out_w = (j-w_offset)/stride + m; + int out_h = (i-h_offset)/stride + l; + int out_index = out_w + w*(out_h + h*(k + c*b)); + int valid = (out_w >= 0 && out_w < w && + out_h >= 0 && out_h < h); + d += (valid && indexes[out_index] == index) ? delta[out_index] : 0; + } + } + prev_delta[index] += d; +} + +extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network net) +{ + int h = layer.out_h; + int w = layer.out_w; + int c = layer.c; + + size_t n = h*w*c*layer.batch; + + forward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, net.input_gpu, layer.output_gpu, layer.indexes_gpu); + check_error(cudaPeekAtLastError()); +} + +extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network net) +{ + size_t n = layer.h*layer.w*layer.c*layer.batch; + + backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, net.delta_gpu, layer.indexes_gpu); + check_error(cudaPeekAtLastError()); +} + diff --git a/workloads/realworld/uvm/darknet/src/network.c b/workloads/realworld/uvm/darknet/src/network.c new file mode 100644 index 0000000000000000000000000000000000000000..aaab7997b5ee7da829289fa153f942a066b43d8c --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/network.c @@ -0,0 +1,1129 @@ +#include +#include +#include +#include "network.h" +#include "image.h" +#include "data.h" +#include "utils.h" +#include "blas.h" + +#include "crop_layer.h" +#include "connected_layer.h" +#include "gru_layer.h" +#include "rnn_layer.h" +#include "crnn_layer.h" +#include "local_layer.h" +#include "convolutional_layer.h" +#include "activation_layer.h" +#include "detection_layer.h" +#include "region_layer.h" +#include "yolo_layer.h" +#include "normalization_layer.h" +#include "batchnorm_layer.h" +#include "maxpool_layer.h" +#include "reorg_layer.h" +#include "avgpool_layer.h" +#include "cost_layer.h" +#include "softmax_layer.h" +#include "dropout_layer.h" +#include "route_layer.h" +#include "upsample_layer.h" +#include "shortcut_layer.h" +#include "parser.h" +#include "data.h" + +load_args get_base_args(network *net) +{ + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.size = net->w; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.center = net->center; + args.saturation = net->saturation; + args.hue = net->hue; + return args; +} + +network *load_network(char *cfg, char *weights, int clear) +{ + network *net = parse_network_cfg(cfg); + if(weights && weights[0] != 0){ + load_weights(net, weights); + } + if(clear) (*net->seen) = 0; + return net; +} + +size_t get_current_batch(network *net) +{ + size_t batch_num = (*net->seen)/(net->batch*net->subdivisions); + return batch_num; +} + +void reset_network_state(network *net, int b) +{ + int i; + for (i = 0; i < net->n; ++i) { + #ifdef GPU + layer l = net->layers[i]; + if(l.state_gpu){ + fill_gpu(l.outputs, 0, l.state_gpu + l.outputs*b, 1); + } + if(l.h_gpu){ + fill_gpu(l.outputs, 0, l.h_gpu + l.outputs*b, 1); + } + #endif + } +} + +void reset_rnn(network *net) +{ + reset_network_state(net, 0); +} + +float get_current_rate(network *net) +{ + size_t batch_num = get_current_batch(net); + int i; + float rate; + if (batch_num < net->burn_in) return net->learning_rate * pow((float)batch_num / net->burn_in, net->power); + switch (net->policy) { + case CONSTANT: + return net->learning_rate; + case STEP: + return net->learning_rate * pow(net->scale, batch_num/net->step); + case STEPS: + rate = net->learning_rate; + for(i = 0; i < net->num_steps; ++i){ + if(net->steps[i] > batch_num) return rate; + rate *= net->scales[i]; + } + return rate; + case EXP: + return net->learning_rate * pow(net->gamma, batch_num); + case POLY: + return net->learning_rate * pow(1 - (float)batch_num / net->max_batches, net->power); + case RANDOM: + return net->learning_rate * pow(rand_uniform(0,1), net->power); + case SIG: + return net->learning_rate * (1./(1.+exp(net->gamma*(batch_num - net->step)))); + default: + fprintf(stderr, "Policy is weird!\n"); + return net->learning_rate; + } +} + +char *get_layer_string(LAYER_TYPE a) +{ + switch(a){ + case CONVOLUTIONAL: + return "convolutional"; + case ACTIVE: + return "activation"; + case LOCAL: + return "local"; + case DECONVOLUTIONAL: + return "deconvolutional"; + case CONNECTED: + return "connected"; + case RNN: + return "rnn"; + case GRU: + return "gru"; + case LSTM: + return "lstm"; + case CRNN: + return "crnn"; + case MAXPOOL: + return "maxpool"; + case REORG: + return "reorg"; + case AVGPOOL: + return "avgpool"; + case SOFTMAX: + return "softmax"; + case DETECTION: + return "detection"; + case REGION: + return "region"; + case YOLO: + return "yolo"; + case DROPOUT: + return "dropout"; + case CROP: + return "crop"; + case COST: + return "cost"; + case ROUTE: + return "route"; + case SHORTCUT: + return "shortcut"; + case NORMALIZATION: + return "normalization"; + case BATCHNORM: + return "batchnorm"; + default: + break; + } + return "none"; +} + +network *make_network(int n) +{ + network *net = calloc(1, sizeof(network)); + net->n = n; + net->layers = calloc(net->n, sizeof(layer)); + net->seen = calloc(1, sizeof(size_t)); + net->t = calloc(1, sizeof(int)); + net->cost = calloc(1, sizeof(float)); + return net; +} + +void forward_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + forward_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + for(i = 0; i < net.n; ++i){ + net.index = i; + layer l = net.layers[i]; + if(l.delta){ + fill_cpu(l.outputs * l.batch, 0, l.delta, 1); + } + l.forward(l, net); + net.input = l.output; + if(l.truth) { + net.truth = l.output; + } + } + calc_network_cost(netp); +} + +void update_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + update_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + update_args a = {0}; + a.batch = net.batch*net.subdivisions; + a.learning_rate = get_current_rate(netp); + a.momentum = net.momentum; + a.decay = net.decay; + a.adam = net.adam; + a.B1 = net.B1; + a.B2 = net.B2; + a.eps = net.eps; + ++*net.t; + a.t = *net.t; + + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.update){ + l.update(l, a); + } + } +} + +void calc_network_cost(network *netp) +{ + network net = *netp; + int i; + float sum = 0; + int count = 0; + for(i = 0; i < net.n; ++i){ + if(net.layers[i].cost){ + sum += net.layers[i].cost[0]; + ++count; + } + } + *net.cost = sum/count; +} + +int get_predicted_class_network(network *net) +{ + return max_index(net->output, net->outputs); +} + +void backward_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + backward_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + network orig = net; + for(i = net.n-1; i >= 0; --i){ + layer l = net.layers[i]; + if(l.stopbackward) break; + if(i == 0){ + net = orig; + }else{ + layer prev = net.layers[i-1]; + net.input = prev.output; + net.delta = prev.delta; + } + net.index = i; + l.backward(l, net); + } +} + +float train_network_datum(network *net) +{ + *net->seen += net->batch; + net->train = 1; + forward_network(net); + backward_network(net); + float error = *net->cost; + if(((*net->seen)/net->batch)%net->subdivisions == 0) update_network(net); + return error; +} + +float train_network_sgd(network *net, data d, int n) +{ + int batch = net->batch; + + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + get_random_batch(d, batch, net->input, net->truth); + float err = train_network_datum(net); + sum += err; + } + return (float)sum/(n*batch); +} + +float train_network(network *net, data d) +{ + assert(d.X.rows % net->batch == 0); + int batch = net->batch; + int n = d.X.rows / batch; + + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + get_next_batch(d, batch, i*batch, net->input, net->truth); + float err = train_network_datum(net); + sum += err; + } + return (float)sum/(n*batch); +} + +void set_temp_network(network *net, float t) +{ + int i; + for(i = 0; i < net->n; ++i){ + net->layers[i].temperature = t; + } +} + + +void set_batch_network(network *net, int b) +{ + net->batch = b; + int i; + for(i = 0; i < net->n; ++i){ + net->layers[i].batch = b; +#ifdef CUDNN + if(net->layers[i].type == CONVOLUTIONAL){ + cudnn_convolutional_setup(net->layers + i); + } + if(net->layers[i].type == DECONVOLUTIONAL){ + layer *l = net->layers + i; + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + } +#endif + } +} + +int resize_network(network *net, int w, int h) +{ +#ifdef GPU + cuda_set_device(net->gpu_index); + cuda_free(net->workspace); +#endif + int i; + //if(w == net->w && h == net->h) return 0; + net->w = w; + net->h = h; + int inputs = 0; + size_t workspace_size = 0; + //fprintf(stderr, "Resizing to %d x %d...\n", w, h); + //fflush(stderr); + for (i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + resize_convolutional_layer(&l, w, h); + }else if(l.type == CROP){ + resize_crop_layer(&l, w, h); + }else if(l.type == MAXPOOL){ + resize_maxpool_layer(&l, w, h); + }else if(l.type == REGION){ + resize_region_layer(&l, w, h); + }else if(l.type == YOLO){ + resize_yolo_layer(&l, w, h); + }else if(l.type == ROUTE){ + resize_route_layer(&l, net); + }else if(l.type == SHORTCUT){ + resize_shortcut_layer(&l, w, h); + }else if(l.type == UPSAMPLE){ + resize_upsample_layer(&l, w, h); + }else if(l.type == REORG){ + resize_reorg_layer(&l, w, h); + }else if(l.type == AVGPOOL){ + resize_avgpool_layer(&l, w, h); + }else if(l.type == NORMALIZATION){ + resize_normalization_layer(&l, w, h); + }else if(l.type == COST){ + resize_cost_layer(&l, inputs); + }else{ + error("Cannot resize this type of layer"); + } + if(l.workspace_size > workspace_size) workspace_size = l.workspace_size; + if(l.workspace_size > 2000000000) assert(0); + inputs = l.outputs; + net->layers[i] = l; + w = l.out_w; + h = l.out_h; + if(l.type == AVGPOOL) break; + } + layer out = get_network_output_layer(net); + net->inputs = net->layers[0].inputs; + net->outputs = out.outputs; + net->truths = out.outputs; + if(net->layers[net->n-1].truths) net->truths = net->layers[net->n-1].truths; + net->output = out.output; + free(net->input); + free(net->truth); + net->input = calloc(net->inputs*net->batch, sizeof(float)); + net->truth = calloc(net->truths*net->batch, sizeof(float)); +#ifdef GPU + if(gpu_index >= 0){ + cuda_free(net->input_gpu); + cuda_free(net->truth_gpu); + net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch); + net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch); + if(workspace_size){ + net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1); + } + }else { + free(net->workspace); + net->workspace = calloc(1, workspace_size); + } +#else + free(net->workspace); + net->workspace = calloc(1, workspace_size); +#endif + //fprintf(stderr, " Done!\n"); + return 0; +} + +layer get_network_detection_layer(network *net) +{ + int i; + for(i = 0; i < net->n; ++i){ + if(net->layers[i].type == DETECTION){ + return net->layers[i]; + } + } + fprintf(stderr, "Detection layer not found!!\n"); + layer l = {0}; + return l; +} + +image get_network_image_layer(network *net, int i) +{ + layer l = net->layers[i]; +#ifdef GPU + //cuda_pull_array(l.output_gpu, l.output, l.outputs); +#endif + if (l.out_w && l.out_h && l.out_c){ + return float_to_image(l.out_w, l.out_h, l.out_c, l.output); + } + image def = {0}; + return def; +} + +image get_network_image(network *net) +{ + int i; + for(i = net->n-1; i >= 0; --i){ + image m = get_network_image_layer(net, i); + if(m.h != 0) return m; + } + image def = {0}; + return def; +} + +void visualize_network(network *net) +{ + image *prev = 0; + int i; + char buff[256]; + for(i = 0; i < net->n; ++i){ + sprintf(buff, "Layer %d", i); + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + prev = visualize_convolutional_layer(l, buff, prev); + } + } +} + +void top_predictions(network *net, int k, int *index) +{ + top_k(net->output, net->outputs, k, index); +} + + +float *network_predict(network *net, float *input) +{ + network orig = *net; + net->input = input; + net->truth = 0; + net->train = 0; + net->delta = 0; + forward_network(net); + float *out = net->output; + *net = orig; + return out; +} + +int num_detections(network *net, float thresh) +{ + int i; + int s = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO){ + s += yolo_num_detections(l, thresh); + } + if(l.type == DETECTION || l.type == REGION){ + s += l.w*l.h*l.n; + } + } + return s; +} + +detection *make_network_boxes(network *net, float thresh, int *num) +{ + layer l = net->layers[net->n - 1]; + int i; + int nboxes = num_detections(net, thresh); + if(num) *num = nboxes; + detection *dets = calloc(nboxes, sizeof(detection)); + for(i = 0; i < nboxes; ++i){ + dets[i].prob = calloc(l.classes, sizeof(float)); + if(l.coords > 4){ + dets[i].mask = calloc(l.coords-4, sizeof(float)); + } + } + return dets; +} + +void fill_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets) +{ + int j; + for(j = 0; j < net->n; ++j){ + layer l = net->layers[j]; + if(l.type == YOLO){ + int count = get_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets); + dets += count; + } + if(l.type == REGION){ + get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets); + dets += l.w*l.h*l.n; + } + if(l.type == DETECTION){ + get_detection_detections(l, w, h, thresh, dets); + dets += l.w*l.h*l.n; + } + } +} + +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num) +{ + detection *dets = make_network_boxes(net, thresh, num); + fill_network_boxes(net, w, h, thresh, hier, map, relative, dets); + return dets; +} + +void free_detections(detection *dets, int n) +{ + int i; + for(i = 0; i < n; ++i){ + free(dets[i].prob); + if(dets[i].mask) free(dets[i].mask); + } + free(dets); +} + +float *network_predict_image(network *net, image im) +{ + image imr = letterbox_image(im, net->w, net->h); + set_batch_network(net, 1); + float *p = network_predict(net, imr.data); + free_image(imr); + return p; +} + +int network_width(network *net){return net->w;} +int network_height(network *net){return net->h;} + +matrix network_predict_data_multi(network *net, data test, int n) +{ + int i,j,b,m; + int k = net->outputs; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.rows, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + } + for(m = 0; m < n; ++m){ + float *out = network_predict(net, X); + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + for(j = 0; j < k; ++j){ + pred.vals[i+b][j] += out[j+b*k]/n; + } + } + } + } + free(X); + return pred; +} + +matrix network_predict_data(network *net, data test) +{ + int i,j,b; + int k = net->outputs; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.cols, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + } + float *out = network_predict(net, X); + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + for(j = 0; j < k; ++j){ + pred.vals[i+b][j] = out[j+b*k]; + } + } + } + free(X); + return pred; +} + +void print_network(network *net) +{ + int i,j; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + float *output = l.output; + int n = l.outputs; + float mean = mean_array(output, n); + float vari = variance_array(output, n); + fprintf(stderr, "Layer %d - Mean: %f, Variance: %f\n",i,mean, vari); + if(n > 100) n = 100; + for(j = 0; j < n; ++j) fprintf(stderr, "%f, ", output[j]); + if(n == 100)fprintf(stderr,".....\n"); + fprintf(stderr, "\n"); + } +} + +void compare_networks(network *n1, network *n2, data test) +{ + matrix g1 = network_predict_data(n1, test); + matrix g2 = network_predict_data(n2, test); + int i; + int a,b,c,d; + a = b = c = d = 0; + for(i = 0; i < g1.rows; ++i){ + int truth = max_index(test.y.vals[i], test.y.cols); + int p1 = max_index(g1.vals[i], g1.cols); + int p2 = max_index(g2.vals[i], g2.cols); + if(p1 == truth){ + if(p2 == truth) ++d; + else ++c; + }else{ + if(p2 == truth) ++b; + else ++a; + } + } + printf("%5d %5d\n%5d %5d\n", a, b, c, d); + float num = pow((abs(b - c) - 1.), 2.); + float den = b + c; + printf("%f\n", num/den); +} + +float network_accuracy(network *net, data d) +{ + matrix guess = network_predict_data(net, d); + float acc = matrix_topk_accuracy(d.y, guess,1); + free_matrix(guess); + return acc; +} + +float *network_accuracies(network *net, data d, int n) +{ + static float acc[2]; + matrix guess = network_predict_data(net, d); + acc[0] = matrix_topk_accuracy(d.y, guess, 1); + acc[1] = matrix_topk_accuracy(d.y, guess, n); + free_matrix(guess); + return acc; +} + +layer get_network_output_layer(network *net) +{ + int i; + for(i = net->n - 1; i >= 0; --i){ + if(net->layers[i].type != COST) break; + } + return net->layers[i]; +} + +float network_accuracy_multi(network *net, data d, int n) +{ + matrix guess = network_predict_data_multi(net, d, n); + float acc = matrix_topk_accuracy(d.y, guess,1); + free_matrix(guess); + return acc; +} + +void free_network(network *net) +{ + int i; + for(i = 0; i < net->n; ++i){ + free_layer(net->layers[i]); + } + free(net->layers); + if(net->input) free(net->input); + if(net->truth) free(net->truth); +#ifdef GPU + if(net->input_gpu) cuda_free(net->input_gpu); + if(net->truth_gpu) cuda_free(net->truth_gpu); +#endif + free(net); +} + +// Some day... +// ^ What the hell is this comment for? + + +layer network_output_layer(network *net) +{ + int i; + for(i = net->n - 1; i >= 0; --i){ + if(net->layers[i].type != COST) break; + } + return net->layers[i]; +} + +int network_inputs(network *net) +{ + return net->layers[0].inputs; +} + +int network_outputs(network *net) +{ + return network_output_layer(net).outputs; +} + +float *network_output(network *net) +{ + return network_output_layer(net).output; +} + +#ifdef GPU + +void forward_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + cuda_push_array(net.input_gpu, net.input, net.inputs*net.batch); + if(net.truth){ + cuda_push_array(net.truth_gpu, net.truth, net.truths*net.batch); + } + + int i; + for(i = 0; i < net.n; ++i){ + net.index = i; + layer l = net.layers[i]; + if(l.delta_gpu){ + fill_gpu(l.outputs * l.batch, 0, l.delta_gpu, 1); + } + l.forward_gpu(l, net); + net.input_gpu = l.output_gpu; + net.input = l.output; + if(l.truth) { + net.truth_gpu = l.output_gpu; + net.truth = l.output; + } + } + pull_network_output(netp); + calc_network_cost(netp); +} + +void backward_network_gpu(network *netp) +{ + int i; + network net = *netp; + network orig = net; + cuda_set_device(net.gpu_index); + for(i = net.n-1; i >= 0; --i){ + layer l = net.layers[i]; + if(l.stopbackward) break; + if(i == 0){ + net = orig; + }else{ + layer prev = net.layers[i-1]; + net.input = prev.output; + net.delta = prev.delta; + net.input_gpu = prev.output_gpu; + net.delta_gpu = prev.delta_gpu; + } + net.index = i; + l.backward_gpu(l, net); + } +} + +void update_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + int i; + update_args a = {0}; + a.batch = net.batch*net.subdivisions; + a.learning_rate = get_current_rate(netp); + a.momentum = net.momentum; + a.decay = net.decay; + a.adam = net.adam; + a.B1 = net.B1; + a.B2 = net.B2; + a.eps = net.eps; + ++*net.t; + a.t = (*net.t); + + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.update_gpu){ + l.update_gpu(l, a); + } + } +} + +void harmless_update_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + int i; + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.weight_updates_gpu) fill_gpu(l.nweights, 0, l.weight_updates_gpu, 1); + if(l.bias_updates_gpu) fill_gpu(l.nbiases, 0, l.bias_updates_gpu, 1); + if(l.scale_updates_gpu) fill_gpu(l.nbiases, 0, l.scale_updates_gpu, 1); + } +} + +typedef struct { + network *net; + data d; + float *err; +} train_args; + +void *train_thread(void *ptr) +{ + train_args args = *(train_args*)ptr; + free(ptr); + cuda_set_device(args.net->gpu_index); + *args.err = train_network(args.net, args.d); + return 0; +} + +pthread_t train_network_in_thread(network *net, data d, float *err) +{ + pthread_t thread; + train_args *ptr = (train_args *)calloc(1, sizeof(train_args)); + ptr->net = net; + ptr->d = d; + ptr->err = err; + if(pthread_create(&thread, 0, train_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void merge_weights(layer l, layer base) +{ + if (l.type == CONVOLUTIONAL) { + axpy_cpu(l.n, 1, l.bias_updates, 1, base.biases, 1); + axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weights, 1); + if (l.scales) { + axpy_cpu(l.n, 1, l.scale_updates, 1, base.scales, 1); + } + } else if(l.type == CONNECTED) { + axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.biases, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weights, 1); + } +} + +void scale_weights(layer l, float s) +{ + if (l.type == CONVOLUTIONAL) { + scal_cpu(l.n, s, l.biases, 1); + scal_cpu(l.nweights, s, l.weights, 1); + if (l.scales) { + scal_cpu(l.n, s, l.scales, 1); + } + } else if(l.type == CONNECTED) { + scal_cpu(l.outputs, s, l.biases, 1); + scal_cpu(l.outputs*l.inputs, s, l.weights, 1); + } +} + + +void pull_weights(layer l) +{ + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_pull_array(l.biases_gpu, l.bias_updates, l.n); + cuda_pull_array(l.weights_gpu, l.weight_updates, l.nweights); + if(l.scales) cuda_pull_array(l.scales_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_pull_array(l.biases_gpu, l.bias_updates, l.outputs); + cuda_pull_array(l.weights_gpu, l.weight_updates, l.outputs*l.inputs); + } +} + +void push_weights(layer l) +{ + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weights_gpu, l.weights, l.nweights); + if(l.scales) cuda_push_array(l.scales_gpu, l.scales, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.biases_gpu, l.biases, l.outputs); + cuda_push_array(l.weights_gpu, l.weights, l.outputs*l.inputs); + } +} + +void distribute_weights(layer l, layer base) +{ + if (l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL) { + cuda_push_array(l.biases_gpu, base.biases, l.n); + cuda_push_array(l.weights_gpu, base.weights, l.nweights); + if (base.scales) cuda_push_array(l.scales_gpu, base.scales, l.n); + } else if (l.type == CONNECTED) { + cuda_push_array(l.biases_gpu, base.biases, l.outputs); + cuda_push_array(l.weights_gpu, base.weights, l.outputs*l.inputs); + } +} + + +/* + + void pull_updates(layer l) + { + if(l.type == CONVOLUTIONAL){ + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + if(l.scale_updates) cuda_pull_array(l.scale_updates_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs); + } + } + + void push_updates(layer l) + { + if(l.type == CONVOLUTIONAL){ + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + if(l.scale_updates) cuda_push_array(l.scale_updates_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs); + } + } + + void update_layer(layer l, network net) + { + int update_batch = net.batch*net.subdivisions; + float rate = get_current_rate(net); + l.t = get_current_batch(net); + if(l.update_gpu){ + l.update_gpu(l, update_batch, rate*l.learning_rate_scale, net.momentum, net.decay); + } + } + void merge_updates(layer l, layer base) + { + if (l.type == CONVOLUTIONAL) { + axpy_cpu(l.n, 1, l.bias_updates, 1, base.bias_updates, 1); + axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weight_updates, 1); + if (l.scale_updates) { + axpy_cpu(l.n, 1, l.scale_updates, 1, base.scale_updates, 1); + } + } else if(l.type == CONNECTED) { + axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.bias_updates, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weight_updates, 1); + } + } + + void distribute_updates(layer l, layer base) + { + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.n); + cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.nweights); + if(base.scale_updates) cuda_push_array(l.scale_updates_gpu, base.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.outputs); + cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.outputs*l.inputs); + } + } + */ + +/* + void sync_layer(network *nets, int n, int j) + { + int i; + network net = nets[0]; + layer base = net.layers[j]; + scale_weights(base, 0); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i].gpu_index); + layer l = nets[i].layers[j]; + pull_weights(l); + merge_weights(l, base); + } + scale_weights(base, 1./n); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i].gpu_index); + layer l = nets[i].layers[j]; + distribute_weights(l, base); + } + } + */ + +void sync_layer(network **nets, int n, int j) +{ + int i; + network *net = nets[0]; + layer base = net->layers[j]; + scale_weights(base, 0); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i]->gpu_index); + layer l = nets[i]->layers[j]; + pull_weights(l); + merge_weights(l, base); + } + scale_weights(base, 1./n); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i]->gpu_index); + layer l = nets[i]->layers[j]; + distribute_weights(l, base); + } +} + +typedef struct{ + network **nets; + int n; + int j; +} sync_args; + +void *sync_layer_thread(void *ptr) +{ + sync_args args = *(sync_args*)ptr; + sync_layer(args.nets, args.n, args.j); + free(ptr); + return 0; +} + +pthread_t sync_layer_in_thread(network **nets, int n, int j) +{ + pthread_t thread; + sync_args *ptr = (sync_args *)calloc(1, sizeof(sync_args)); + ptr->nets = nets; + ptr->n = n; + ptr->j = j; + if(pthread_create(&thread, 0, sync_layer_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void sync_nets(network **nets, int n, int interval) +{ + int j; + int layers = nets[0]->n; + pthread_t *threads = (pthread_t *) calloc(layers, sizeof(pthread_t)); + + *(nets[0]->seen) += interval * (n-1) * nets[0]->batch * nets[0]->subdivisions; + for (j = 0; j < n; ++j){ + *(nets[j]->seen) = *(nets[0]->seen); + } + for (j = 0; j < layers; ++j) { + threads[j] = sync_layer_in_thread(nets, n, j); + } + for (j = 0; j < layers; ++j) { + pthread_join(threads[j], 0); + } + free(threads); +} + +float train_networks(network **nets, int n, data d, int interval) +{ + int i; + int batch = nets[0]->batch; + int subdivisions = nets[0]->subdivisions; + assert(batch * subdivisions * n == d.X.rows); + pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t)); + float *errors = (float *) calloc(n, sizeof(float)); + + float sum = 0; + for(i = 0; i < n; ++i){ + data p = get_data_part(d, i, n); + threads[i] = train_network_in_thread(nets[i], p, errors + i); + } + for(i = 0; i < n; ++i){ + pthread_join(threads[i], 0); + //printf("%f\n", errors[i]); + sum += errors[i]; + } + //cudaDeviceSynchronize(); + if (get_current_batch(nets[0]) % interval == 0) { + printf("Syncing... "); + fflush(stdout); + sync_nets(nets, n, interval); + printf("Done!\n"); + } + //cudaDeviceSynchronize(); + free(threads); + free(errors); + return (float)sum/(n); +} + +void pull_network_output(network *net) +{ + layer l = get_network_output_layer(net); + cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); +} + +#endif diff --git a/workloads/realworld/uvm/darknet/src/network.h b/workloads/realworld/uvm/darknet/src/network.h new file mode 100644 index 0000000000000000000000000000000000000000..1b0dfd1aaa3e090c6ce276d26f24d127de2cb66d --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/network.h @@ -0,0 +1,29 @@ +// Oh boy, why am I about to do this.... +#ifndef NETWORK_H +#define NETWORK_H +#include "darknet.h" + +#include "image.h" +#include "layer.h" +#include "data.h" +#include "tree.h" + + +#ifdef GPU +void pull_network_output(network *net); +#endif + +void compare_networks(network *n1, network *n2, data d); +char *get_layer_string(LAYER_TYPE a); + +network *make_network(int n); + + +float network_accuracy_multi(network *net, data d, int n); +int get_predicted_class_network(network *net); +void print_network(network *net); +int resize_network(network *net, int w, int h); +void calc_network_cost(network *net); + +#endif + diff --git a/workloads/realworld/uvm/darknet/src/normalization_layer.c b/workloads/realworld/uvm/darknet/src/normalization_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..424714fe8653f79b57fd4cde625997749d8eff83 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/normalization_layer.c @@ -0,0 +1,151 @@ +#include "normalization_layer.h" +#include "blas.h" + +#include + +layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa) +{ + fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size); + layer layer = {0}; + layer.type = NORMALIZATION; + layer.batch = batch; + layer.h = layer.out_h = h; + layer.w = layer.out_w = w; + layer.c = layer.out_c = c; + layer.kappa = kappa; + layer.size = size; + layer.alpha = alpha; + layer.beta = beta; + layer.output = calloc(h * w * c * batch, sizeof(float)); + layer.delta = calloc(h * w * c * batch, sizeof(float)); + layer.squared = calloc(h * w * c * batch, sizeof(float)); + layer.norms = calloc(h * w * c * batch, sizeof(float)); + layer.inputs = w*h*c; + layer.outputs = layer.inputs; + + layer.forward = forward_normalization_layer; + layer.backward = backward_normalization_layer; + #ifdef GPU + layer.forward_gpu = forward_normalization_layer_gpu; + layer.backward_gpu = backward_normalization_layer_gpu; + + layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch); + layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch); + layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch); + layer.norms_gpu = cuda_make_array(layer.norms, h * w * c * batch); + #endif + return layer; +} + +void resize_normalization_layer(layer *layer, int w, int h) +{ + int c = layer->c; + int batch = layer->batch; + layer->h = h; + layer->w = w; + layer->out_h = h; + layer->out_w = w; + layer->inputs = w*h*c; + layer->outputs = layer->inputs; + layer->output = realloc(layer->output, h * w * c * batch * sizeof(float)); + layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float)); + layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float)); + layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float)); +#ifdef GPU + cuda_free(layer->output_gpu); + cuda_free(layer->delta_gpu); + cuda_free(layer->squared_gpu); + cuda_free(layer->norms_gpu); + layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch); + layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch); + layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch); + layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch); +#endif +} + +void forward_normalization_layer(const layer layer, network net) +{ + int k,b; + int w = layer.w; + int h = layer.h; + int c = layer.c; + scal_cpu(w*h*c*layer.batch, 0, layer.squared, 1); + + for(b = 0; b < layer.batch; ++b){ + float *squared = layer.squared + w*h*c*b; + float *norms = layer.norms + w*h*c*b; + float *input = net.input + w*h*c*b; + pow_cpu(w*h*c, 2, input, 1, squared, 1); + + const_cpu(w*h, layer.kappa, norms, 1); + for(k = 0; k < layer.size/2; ++k){ + axpy_cpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); + } + + for(k = 1; k < layer.c; ++k){ + copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); + int prev = k - ((layer.size-1)/2) - 1; + int next = k + (layer.size/2); + if(prev >= 0) axpy_cpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); + if(next < layer.c) axpy_cpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); + } + } + pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1); + mul_cpu(w*h*c*layer.batch, net.input, 1, layer.output, 1); +} + +void backward_normalization_layer(const layer layer, network net) +{ + // TODO This is approximate ;-) + // Also this should add in to delta instead of overwritting. + + int w = layer.w; + int h = layer.h; + int c = layer.c; + pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, net.delta, 1); + mul_cpu(w*h*c*layer.batch, layer.delta, 1, net.delta, 1); +} + +#ifdef GPU +void forward_normalization_layer_gpu(const layer layer, network net) +{ + int k,b; + int w = layer.w; + int h = layer.h; + int c = layer.c; + scal_gpu(w*h*c*layer.batch, 0, layer.squared_gpu, 1); + + for(b = 0; b < layer.batch; ++b){ + float *squared = layer.squared_gpu + w*h*c*b; + float *norms = layer.norms_gpu + w*h*c*b; + float *input = net.input_gpu + w*h*c*b; + pow_gpu(w*h*c, 2, input, 1, squared, 1); + + const_gpu(w*h, layer.kappa, norms, 1); + for(k = 0; k < layer.size/2; ++k){ + axpy_gpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); + } + + for(k = 1; k < layer.c; ++k){ + copy_gpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); + int prev = k - ((layer.size-1)/2) - 1; + int next = k + (layer.size/2); + if(prev >= 0) axpy_gpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); + if(next < layer.c) axpy_gpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); + } + } + pow_gpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1); + mul_gpu(w*h*c*layer.batch, net.input_gpu, 1, layer.output_gpu, 1); +} + +void backward_normalization_layer_gpu(const layer layer, network net) +{ + // TODO This is approximate ;-) + + int w = layer.w; + int h = layer.h; + int c = layer.c; + pow_gpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, net.delta_gpu, 1); + mul_gpu(w*h*c*layer.batch, layer.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/uvm/darknet/src/normalization_layer.h b/workloads/realworld/uvm/darknet/src/normalization_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..665baa5066282335b6625618ce07c2fcc833d952 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/normalization_layer.h @@ -0,0 +1,19 @@ +#ifndef NORMALIZATION_LAYER_H +#define NORMALIZATION_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa); +void resize_normalization_layer(layer *layer, int h, int w); +void forward_normalization_layer(const layer layer, network net); +void backward_normalization_layer(const layer layer, network net); +void visualize_normalization_layer(layer layer, char *window); + +#ifdef GPU +void forward_normalization_layer_gpu(const layer layer, network net); +void backward_normalization_layer_gpu(const layer layer, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm/darknet/src/option_list.c b/workloads/realworld/uvm/darknet/src/option_list.c new file mode 100644 index 0000000000000000000000000000000000000000..2f52781f8096fecc5e9d1db3cfbfa10685506b93 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/option_list.c @@ -0,0 +1,140 @@ +#include +#include +#include +#include "option_list.h" +#include "utils.h" + +list *read_data_cfg(char *filename) +{ + FILE *file = fopen(filename, "r"); + if(file == 0) file_error(filename); + char *line; + int nu = 0; + list *options = make_list(); + while((line=fgetl(file)) != 0){ + ++ nu; + strip(line); + switch(line[0]){ + case '\0': + case '#': + case ';': + free(line); + break; + default: + if(!read_option(line, options)){ + fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); + free(line); + } + break; + } + } + fclose(file); + return options; +} + +metadata get_metadata(char *file) +{ + metadata m = {0}; + list *options = read_data_cfg(file); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", 0); + if(!name_list) { + fprintf(stderr, "No names or labels found\n"); + } else { + m.names = get_labels(name_list); + } + m.classes = option_find_int(options, "classes", 2); + free_list(options); + return m; +} + +int read_option(char *s, list *options) +{ + size_t i; + size_t len = strlen(s); + char *val = 0; + for(i = 0; i < len; ++i){ + if(s[i] == '='){ + s[i] = '\0'; + val = s+i+1; + break; + } + } + if(i == len-1) return 0; + char *key = s; + option_insert(options, key, val); + return 1; +} + +void option_insert(list *l, char *key, char *val) +{ + kvp *p = malloc(sizeof(kvp)); + p->key = key; + p->val = val; + p->used = 0; + list_insert(l, p); +} + +void option_unused(list *l) +{ + node *n = l->front; + while(n){ + kvp *p = (kvp *)n->val; + if(!p->used){ + fprintf(stderr, "Unused field: '%s = %s'\n", p->key, p->val); + } + n = n->next; + } +} + +char *option_find(list *l, char *key) +{ + node *n = l->front; + while(n){ + kvp *p = (kvp *)n->val; + if(strcmp(p->key, key) == 0){ + p->used = 1; + return p->val; + } + n = n->next; + } + return 0; +} +char *option_find_str(list *l, char *key, char *def) +{ + char *v = option_find(l, key); + if(v) return v; + if(def) fprintf(stderr, "%s: Using default '%s'\n", key, def); + return def; +} + +int option_find_int(list *l, char *key, int def) +{ + char *v = option_find(l, key); + if(v) return atoi(v); + fprintf(stderr, "%s: Using default '%d'\n", key, def); + return def; +} + +int option_find_int_quiet(list *l, char *key, int def) +{ + char *v = option_find(l, key); + if(v) return atoi(v); + return def; +} + +float option_find_float_quiet(list *l, char *key, float def) +{ + char *v = option_find(l, key); + if(v) return atof(v); + return def; +} + +float option_find_float(list *l, char *key, float def) +{ + char *v = option_find(l, key); + if(v) return atof(v); + fprintf(stderr, "%s: Using default '%lf'\n", key, def); + return def; +} diff --git a/workloads/realworld/uvm/darknet/src/option_list.h b/workloads/realworld/uvm/darknet/src/option_list.h new file mode 100644 index 0000000000000000000000000000000000000000..844bd8724b77889d9ab6e6e70f62305e3339048c --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/option_list.h @@ -0,0 +1,19 @@ +#ifndef OPTION_LIST_H +#define OPTION_LIST_H +#include "list.h" + +typedef struct{ + char *key; + char *val; + int used; +} kvp; + + +int read_option(char *s, list *options); +void option_insert(list *l, char *key, char *val); +char *option_find(list *l, char *key); +float option_find_float(list *l, char *key, float def); +float option_find_float_quiet(list *l, char *key, float def); +void option_unused(list *l); + +#endif diff --git a/workloads/realworld/uvm/darknet/src/parser.c b/workloads/realworld/uvm/darknet/src/parser.c new file mode 100644 index 0000000000000000000000000000000000000000..c8141c9f2ddc95941900d11006ff583fadf22290 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/parser.c @@ -0,0 +1,1312 @@ +#include +#include +#include +#include + +#include "activation_layer.h" +#include "logistic_layer.h" +#include "l2norm_layer.h" +#include "activations.h" +#include "avgpool_layer.h" +#include "batchnorm_layer.h" +#include "blas.h" +#include "connected_layer.h" +#include "deconvolutional_layer.h" +#include "convolutional_layer.h" +#include "cost_layer.h" +#include "crnn_layer.h" +#include "crop_layer.h" +#include "detection_layer.h" +#include "dropout_layer.h" +#include "gru_layer.h" +#include "list.h" +#include "local_layer.h" +#include "maxpool_layer.h" +#include "normalization_layer.h" +#include "option_list.h" +#include "parser.h" +#include "region_layer.h" +#include "yolo_layer.h" +#include "iseg_layer.h" +#include "reorg_layer.h" +#include "rnn_layer.h" +#include "route_layer.h" +#include "upsample_layer.h" +#include "shortcut_layer.h" +#include "softmax_layer.h" +#include "lstm_layer.h" +#include "utils.h" + +typedef struct{ + char *type; + list *options; +}section; + +list *read_cfg(char *filename); + +LAYER_TYPE string_to_layer_type(char * type) +{ + + if (strcmp(type, "[shortcut]")==0) return SHORTCUT; + if (strcmp(type, "[crop]")==0) return CROP; + if (strcmp(type, "[cost]")==0) return COST; + if (strcmp(type, "[detection]")==0) return DETECTION; + if (strcmp(type, "[region]")==0) return REGION; + if (strcmp(type, "[yolo]")==0) return YOLO; + if (strcmp(type, "[iseg]")==0) return ISEG; + if (strcmp(type, "[local]")==0) return LOCAL; + if (strcmp(type, "[conv]")==0 + || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL; + if (strcmp(type, "[deconv]")==0 + || strcmp(type, "[deconvolutional]")==0) return DECONVOLUTIONAL; + if (strcmp(type, "[activation]")==0) return ACTIVE; + if (strcmp(type, "[logistic]")==0) return LOGXENT; + if (strcmp(type, "[l2norm]")==0) return L2NORM; + if (strcmp(type, "[net]")==0 + || strcmp(type, "[network]")==0) return NETWORK; + if (strcmp(type, "[crnn]")==0) return CRNN; + if (strcmp(type, "[gru]")==0) return GRU; + if (strcmp(type, "[lstm]") == 0) return LSTM; + if (strcmp(type, "[rnn]")==0) return RNN; + if (strcmp(type, "[conn]")==0 + || strcmp(type, "[connected]")==0) return CONNECTED; + if (strcmp(type, "[max]")==0 + || strcmp(type, "[maxpool]")==0) return MAXPOOL; + if (strcmp(type, "[reorg]")==0) return REORG; + if (strcmp(type, "[avg]")==0 + || strcmp(type, "[avgpool]")==0) return AVGPOOL; + if (strcmp(type, "[dropout]")==0) return DROPOUT; + if (strcmp(type, "[lrn]")==0 + || strcmp(type, "[normalization]")==0) return NORMALIZATION; + if (strcmp(type, "[batchnorm]")==0) return BATCHNORM; + if (strcmp(type, "[soft]")==0 + || strcmp(type, "[softmax]")==0) return SOFTMAX; + if (strcmp(type, "[route]")==0) return ROUTE; + if (strcmp(type, "[upsample]")==0) return UPSAMPLE; + return BLANK; +} + +void free_section(section *s) +{ + free(s->type); + node *n = s->options->front; + while(n){ + kvp *pair = (kvp *)n->val; + free(pair->key); + free(pair); + node *next = n->next; + free(n); + n = next; + } + free(s->options); + free(s); +} + +void parse_data(char *data, float *a, int n) +{ + int i; + if(!data) return; + char *curr = data; + char *next = data; + int done = 0; + for(i = 0; i < n && !done; ++i){ + while(*++next !='\0' && *next != ','); + if(*next == '\0') done = 1; + *next = '\0'; + sscanf(curr, "%g", &a[i]); + curr = next+1; + } +} + +typedef struct size_params{ + int batch; + int inputs; + int h; + int w; + int c; + int index; + int time_steps; + network *net; +} size_params; + +local_layer parse_local(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + int pad = option_find_int(options, "pad",0); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before local layer must output image."); + + local_layer layer = make_local_layer(batch,h,w,c,n,size,stride,pad,activation); + + return layer; +} + +layer parse_deconvolutional(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before deconvolutional layer must output image."); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + int pad = option_find_int_quiet(options, "pad",0); + int padding = option_find_int_quiet(options, "padding",0); + if(pad) padding = size/2; + + layer l = make_deconvolutional_layer(batch,h,w,c,n,size,stride,padding, activation, batch_normalize, params.net->adam); + + return l; +} + + +convolutional_layer parse_convolutional(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + int pad = option_find_int_quiet(options, "pad",0); + int padding = option_find_int_quiet(options, "padding",0); + int groups = option_find_int_quiet(options, "groups", 1); + if(pad) padding = size/2; + + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before convolutional layer must output image."); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + int binary = option_find_int_quiet(options, "binary", 0); + int xnor = option_find_int_quiet(options, "xnor", 0); + + convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,groups,size,stride,padding,activation, batch_normalize, binary, xnor, params.net->adam); + layer.flipped = option_find_int_quiet(options, "flipped", 0); + layer.dot = option_find_float_quiet(options, "dot", 0); + + return layer; +} + +layer parse_crnn(list *options, size_params params) +{ + int output_filters = option_find_int(options, "output_filters",1); + int hidden_filters = option_find_int(options, "hidden_filters",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_crnn_layer(params.batch, params.w, params.h, params.c, hidden_filters, output_filters, params.time_steps, activation, batch_normalize); + + l.shortcut = option_find_int_quiet(options, "shortcut", 0); + + return l; +} + +layer parse_rnn(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_rnn_layer(params.batch, params.inputs, output, params.time_steps, activation, batch_normalize, params.net->adam); + + l.shortcut = option_find_int_quiet(options, "shortcut", 0); + + return l; +} + +layer parse_gru(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_gru_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net->adam); + l.tanh = option_find_int_quiet(options, "tanh", 0); + + return l; +} + +layer parse_lstm(list *options, size_params params) +{ + int output = option_find_int(options, "output", 1); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_lstm_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net->adam); + + return l; +} + +layer parse_connected(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_connected_layer(params.batch, params.inputs, output, activation, batch_normalize, params.net->adam); + return l; +} + +layer parse_softmax(list *options, size_params params) +{ + int groups = option_find_int_quiet(options, "groups",1); + layer l = make_softmax_layer(params.batch, params.inputs, groups); + l.temperature = option_find_float_quiet(options, "temperature", 1); + char *tree_file = option_find_str(options, "tree", 0); + if (tree_file) l.softmax_tree = read_tree(tree_file); + l.w = params.w; + l.h = params.h; + l.c = params.c; + l.spatial = option_find_float_quiet(options, "spatial", 0); + l.noloss = option_find_int_quiet(options, "noloss", 0); + return l; +} + +int *parse_yolo_mask(char *a, int *num) +{ + int *mask = 0; + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + mask = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + int val = atoi(a); + mask[i] = val; + a = strchr(a, ',')+1; + } + *num = n; + } + return mask; +} + +layer parse_yolo(list *options, size_params params) +{ + int classes = option_find_int(options, "classes", 20); + int total = option_find_int(options, "num", 1); + int num = total; + + char *a = option_find_str(options, "mask", 0); + int *mask = parse_yolo_mask(a, &num); + layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes); + assert(l.outputs == params.inputs); + + l.max_boxes = option_find_int_quiet(options, "max",90); + l.jitter = option_find_float(options, "jitter", .2); + + l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); + l.truth_thresh = option_find_float(options, "truth_thresh", 1); + l.random = option_find_int_quiet(options, "random", 0); + + char *map_file = option_find_str(options, "map", 0); + if (map_file) l.map = read_map(map_file); + + a = option_find_str(options, "anchors", 0); + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + for(i = 0; i < n; ++i){ + float bias = atof(a); + l.biases[i] = bias; + a = strchr(a, ',')+1; + } + } + return l; +} + +layer parse_iseg(list *options, size_params params) +{ + int classes = option_find_int(options, "classes", 20); + int ids = option_find_int(options, "ids", 32); + layer l = make_iseg_layer(params.batch, params.w, params.h, classes, ids); + assert(l.outputs == params.inputs); + return l; +} + +layer parse_region(list *options, size_params params) +{ + int coords = option_find_int(options, "coords", 4); + int classes = option_find_int(options, "classes", 20); + int num = option_find_int(options, "num", 1); + + layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords); + assert(l.outputs == params.inputs); + + l.log = option_find_int_quiet(options, "log", 0); + l.sqrt = option_find_int_quiet(options, "sqrt", 0); + + l.softmax = option_find_int(options, "softmax", 0); + l.background = option_find_int_quiet(options, "background", 0); + l.max_boxes = option_find_int_quiet(options, "max",30); + l.jitter = option_find_float(options, "jitter", .2); + l.rescore = option_find_int_quiet(options, "rescore",0); + + l.thresh = option_find_float(options, "thresh", .5); + l.classfix = option_find_int_quiet(options, "classfix", 0); + l.absolute = option_find_int_quiet(options, "absolute", 0); + l.random = option_find_int_quiet(options, "random", 0); + + l.coord_scale = option_find_float(options, "coord_scale", 1); + l.object_scale = option_find_float(options, "object_scale", 1); + l.noobject_scale = option_find_float(options, "noobject_scale", 1); + l.mask_scale = option_find_float(options, "mask_scale", 1); + l.class_scale = option_find_float(options, "class_scale", 1); + l.bias_match = option_find_int_quiet(options, "bias_match",0); + + char *tree_file = option_find_str(options, "tree", 0); + if (tree_file) l.softmax_tree = read_tree(tree_file); + char *map_file = option_find_str(options, "map", 0); + if (map_file) l.map = read_map(map_file); + + char *a = option_find_str(options, "anchors", 0); + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + for(i = 0; i < n; ++i){ + float bias = atof(a); + l.biases[i] = bias; + a = strchr(a, ',')+1; + } + } + return l; +} + +detection_layer parse_detection(list *options, size_params params) +{ + int coords = option_find_int(options, "coords", 1); + int classes = option_find_int(options, "classes", 1); + int rescore = option_find_int(options, "rescore", 0); + int num = option_find_int(options, "num", 1); + int side = option_find_int(options, "side", 7); + detection_layer layer = make_detection_layer(params.batch, params.inputs, num, side, classes, coords, rescore); + + layer.softmax = option_find_int(options, "softmax", 0); + layer.sqrt = option_find_int(options, "sqrt", 0); + + layer.max_boxes = option_find_int_quiet(options, "max",90); + layer.coord_scale = option_find_float(options, "coord_scale", 1); + layer.forced = option_find_int(options, "forced", 0); + layer.object_scale = option_find_float(options, "object_scale", 1); + layer.noobject_scale = option_find_float(options, "noobject_scale", 1); + layer.class_scale = option_find_float(options, "class_scale", 1); + layer.jitter = option_find_float(options, "jitter", .2); + layer.random = option_find_int_quiet(options, "random", 0); + layer.reorg = option_find_int_quiet(options, "reorg", 0); + return layer; +} + +cost_layer parse_cost(list *options, size_params params) +{ + char *type_s = option_find_str(options, "type", "sse"); + COST_TYPE type = get_cost_type(type_s); + float scale = option_find_float_quiet(options, "scale",1); + cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale); + layer.ratio = option_find_float_quiet(options, "ratio",0); + layer.noobject_scale = option_find_float_quiet(options, "noobj", 1); + layer.thresh = option_find_float_quiet(options, "thresh",0); + return layer; +} + +crop_layer parse_crop(list *options, size_params params) +{ + int crop_height = option_find_int(options, "crop_height",1); + int crop_width = option_find_int(options, "crop_width",1); + int flip = option_find_int(options, "flip",0); + float angle = option_find_float(options, "angle",0); + float saturation = option_find_float(options, "saturation",1); + float exposure = option_find_float(options, "exposure",1); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before crop layer must output image."); + + int noadjust = option_find_int_quiet(options, "noadjust",0); + + crop_layer l = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip, angle, saturation, exposure); + l.shift = option_find_float(options, "shift", 0); + l.noadjust = noadjust; + return l; +} + +layer parse_reorg(list *options, size_params params) +{ + int stride = option_find_int(options, "stride",1); + int reverse = option_find_int_quiet(options, "reverse",0); + int flatten = option_find_int_quiet(options, "flatten",0); + int extra = option_find_int_quiet(options, "extra",0); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before reorg layer must output image."); + + layer layer = make_reorg_layer(batch,w,h,c,stride,reverse, flatten, extra); + return layer; +} + +maxpool_layer parse_maxpool(list *options, size_params params) +{ + int stride = option_find_int(options, "stride",1); + int size = option_find_int(options, "size",stride); + int padding = option_find_int_quiet(options, "padding", size-1); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before maxpool layer must output image."); + + maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride,padding); + return layer; +} + +avgpool_layer parse_avgpool(list *options, size_params params) +{ + int batch,w,h,c; + w = params.w; + h = params.h; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before avgpool layer must output image."); + + avgpool_layer layer = make_avgpool_layer(batch,w,h,c); + return layer; +} + +dropout_layer parse_dropout(list *options, size_params params) +{ + float probability = option_find_float(options, "probability", .5); + dropout_layer layer = make_dropout_layer(params.batch, params.inputs, probability); + layer.out_w = params.w; + layer.out_h = params.h; + layer.out_c = params.c; + return layer; +} + +layer parse_normalization(list *options, size_params params) +{ + float alpha = option_find_float(options, "alpha", .0001); + float beta = option_find_float(options, "beta" , .75); + float kappa = option_find_float(options, "kappa", 1); + int size = option_find_int(options, "size", 5); + layer l = make_normalization_layer(params.batch, params.w, params.h, params.c, size, alpha, beta, kappa); + return l; +} + +layer parse_batchnorm(list *options, size_params params) +{ + layer l = make_batchnorm_layer(params.batch, params.w, params.h, params.c); + return l; +} + +layer parse_shortcut(list *options, size_params params, network *net) +{ + char *l = option_find(options, "from"); + int index = atoi(l); + if(index < 0) index = params.index + index; + + int batch = params.batch; + layer from = net->layers[index]; + + layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c); + + char *activation_s = option_find_str(options, "activation", "linear"); + ACTIVATION activation = get_activation(activation_s); + s.activation = activation; + s.alpha = option_find_float_quiet(options, "alpha", 1); + s.beta = option_find_float_quiet(options, "beta", 1); + return s; +} + + +layer parse_l2norm(list *options, size_params params) +{ + layer l = make_l2norm_layer(params.batch, params.inputs); + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + return l; +} + + +layer parse_logistic(list *options, size_params params) +{ + layer l = make_logistic_layer(params.batch, params.inputs); + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + return l; +} + +layer parse_activation(list *options, size_params params) +{ + char *activation_s = option_find_str(options, "activation", "linear"); + ACTIVATION activation = get_activation(activation_s); + + layer l = make_activation_layer(params.batch, params.inputs, activation); + + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + + return l; +} + +layer parse_upsample(list *options, size_params params, network *net) +{ + + int stride = option_find_int(options, "stride",2); + layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride); + l.scale = option_find_float_quiet(options, "scale", 1); + return l; +} + +route_layer parse_route(list *options, size_params params, network *net) +{ + char *l = option_find(options, "layers"); + int len = strlen(l); + if(!l) error("Route Layer must specify input layers"); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (l[i] == ',') ++n; + } + + int *layers = calloc(n, sizeof(int)); + int *sizes = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + int index = atoi(l); + l = strchr(l, ',')+1; + if(index < 0) index = params.index + index; + layers[i] = index; + sizes[i] = net->layers[index].outputs; + } + int batch = params.batch; + + route_layer layer = make_route_layer(batch, n, layers, sizes); + + convolutional_layer first = net->layers[layers[0]]; + layer.out_w = first.out_w; + layer.out_h = first.out_h; + layer.out_c = first.out_c; + for(i = 1; i < n; ++i){ + int index = layers[i]; + convolutional_layer next = net->layers[index]; + if(next.out_w == first.out_w && next.out_h == first.out_h){ + layer.out_c += next.out_c; + }else{ + layer.out_h = layer.out_w = layer.out_c = 0; + } + } + + return layer; +} + +learning_rate_policy get_policy(char *s) +{ + if (strcmp(s, "random")==0) return RANDOM; + if (strcmp(s, "poly")==0) return POLY; + if (strcmp(s, "constant")==0) return CONSTANT; + if (strcmp(s, "step")==0) return STEP; + if (strcmp(s, "exp")==0) return EXP; + if (strcmp(s, "sigmoid")==0) return SIG; + if (strcmp(s, "steps")==0) return STEPS; + fprintf(stderr, "Couldn't find policy %s, going with constant\n", s); + return CONSTANT; +} + +void parse_net_options(list *options, network *net) +{ + net->batch = option_find_int(options, "batch",1); + net->learning_rate = option_find_float(options, "learning_rate", .001); + net->momentum = option_find_float(options, "momentum", .9); + net->decay = option_find_float(options, "decay", .0001); + int subdivs = option_find_int(options, "subdivisions",1); + net->time_steps = option_find_int_quiet(options, "time_steps",1); + net->notruth = option_find_int_quiet(options, "notruth",0); + net->batch /= subdivs; + net->batch *= net->time_steps; + net->subdivisions = subdivs; + net->random = option_find_int_quiet(options, "random", 0); + + net->adam = option_find_int_quiet(options, "adam", 0); + if(net->adam){ + net->B1 = option_find_float(options, "B1", .9); + net->B2 = option_find_float(options, "B2", .999); + net->eps = option_find_float(options, "eps", .0000001); + } + + net->h = option_find_int_quiet(options, "height",0); + net->w = option_find_int_quiet(options, "width",0); + net->c = option_find_int_quiet(options, "channels",0); + net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c); + net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2); + net->min_crop = option_find_int_quiet(options, "min_crop",net->w); + net->max_ratio = option_find_float_quiet(options, "max_ratio", (float) net->max_crop / net->w); + net->min_ratio = option_find_float_quiet(options, "min_ratio", (float) net->min_crop / net->w); + net->center = option_find_int_quiet(options, "center",0); + net->clip = option_find_float_quiet(options, "clip", 0); + + net->angle = option_find_float_quiet(options, "angle", 0); + net->aspect = option_find_float_quiet(options, "aspect", 1); + net->saturation = option_find_float_quiet(options, "saturation", 1); + net->exposure = option_find_float_quiet(options, "exposure", 1); + net->hue = option_find_float_quiet(options, "hue", 0); + + if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied"); + + char *policy_s = option_find_str(options, "policy", "constant"); + net->policy = get_policy(policy_s); + net->burn_in = option_find_int_quiet(options, "burn_in", 0); + net->power = option_find_float_quiet(options, "power", 4); + if(net->policy == STEP){ + net->step = option_find_int(options, "step", 1); + net->scale = option_find_float(options, "scale", 1); + } else if (net->policy == STEPS){ + char *l = option_find(options, "steps"); + char *p = option_find(options, "scales"); + if(!l || !p) error("STEPS policy must have steps and scales in cfg file"); + + int len = strlen(l); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (l[i] == ',') ++n; + } + int *steps = calloc(n, sizeof(int)); + float *scales = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + int step = atoi(l); + float scale = atof(p); + l = strchr(l, ',')+1; + p = strchr(p, ',')+1; + steps[i] = step; + scales[i] = scale; + } + net->scales = scales; + net->steps = steps; + net->num_steps = n; + } else if (net->policy == EXP){ + net->gamma = option_find_float(options, "gamma", 1); + } else if (net->policy == SIG){ + net->gamma = option_find_float(options, "gamma", 1); + net->step = option_find_int(options, "step", 1); + } else if (net->policy == POLY || net->policy == RANDOM){ + } + net->max_batches = option_find_int(options, "max_batches", 0); +} + +int is_network(section *s) +{ + return (strcmp(s->type, "[net]")==0 + || strcmp(s->type, "[network]")==0); +} + +network *parse_network_cfg(char *filename) +{ + list *sections = read_cfg(filename); + node *n = sections->front; + if(!n) error("Config file has no sections"); + network *net = make_network(sections->size - 1); + net->gpu_index = gpu_index; + size_params params; + + section *s = (section *)n->val; + list *options = s->options; + if(!is_network(s)) error("First section must be [net] or [network]"); + parse_net_options(options, net); + + params.h = net->h; + params.w = net->w; + params.c = net->c; + params.inputs = net->inputs; + params.batch = net->batch; + params.time_steps = net->time_steps; + params.net = net; + + size_t workspace_size = 0; + n = n->next; + int count = 0; + free_section(s); + fprintf(stderr, "layer filters size input output\n"); + while(n){ + params.index = count; + fprintf(stderr, "%5d ", count); + s = (section *)n->val; + options = s->options; + layer l = {0}; + LAYER_TYPE lt = string_to_layer_type(s->type); + if(lt == CONVOLUTIONAL){ + l = parse_convolutional(options, params); + }else if(lt == DECONVOLUTIONAL){ + l = parse_deconvolutional(options, params); + }else if(lt == LOCAL){ + l = parse_local(options, params); + }else if(lt == ACTIVE){ + l = parse_activation(options, params); + }else if(lt == LOGXENT){ + l = parse_logistic(options, params); + }else if(lt == L2NORM){ + l = parse_l2norm(options, params); + }else if(lt == RNN){ + l = parse_rnn(options, params); + }else if(lt == GRU){ + l = parse_gru(options, params); + }else if (lt == LSTM) { + l = parse_lstm(options, params); + }else if(lt == CRNN){ + l = parse_crnn(options, params); + }else if(lt == CONNECTED){ + l = parse_connected(options, params); + }else if(lt == CROP){ + l = parse_crop(options, params); + }else if(lt == COST){ + l = parse_cost(options, params); + }else if(lt == REGION){ + l = parse_region(options, params); + }else if(lt == YOLO){ + l = parse_yolo(options, params); + }else if(lt == ISEG){ + l = parse_iseg(options, params); + }else if(lt == DETECTION){ + l = parse_detection(options, params); + }else if(lt == SOFTMAX){ + l = parse_softmax(options, params); + net->hierarchy = l.softmax_tree; + }else if(lt == NORMALIZATION){ + l = parse_normalization(options, params); + }else if(lt == BATCHNORM){ + l = parse_batchnorm(options, params); + }else if(lt == MAXPOOL){ + l = parse_maxpool(options, params); + }else if(lt == REORG){ + l = parse_reorg(options, params); + }else if(lt == AVGPOOL){ + l = parse_avgpool(options, params); + }else if(lt == ROUTE){ + l = parse_route(options, params, net); + }else if(lt == UPSAMPLE){ + l = parse_upsample(options, params, net); + }else if(lt == SHORTCUT){ + l = parse_shortcut(options, params, net); + }else if(lt == DROPOUT){ + l = parse_dropout(options, params); + l.output = net->layers[count-1].output; + l.delta = net->layers[count-1].delta; +#ifdef GPU + l.output_gpu = net->layers[count-1].output_gpu; + l.delta_gpu = net->layers[count-1].delta_gpu; +#endif + }else{ + fprintf(stderr, "Type not recognized: %s\n", s->type); + } + l.clip = net->clip; + l.truth = option_find_int_quiet(options, "truth", 0); + l.onlyforward = option_find_int_quiet(options, "onlyforward", 0); + l.stopbackward = option_find_int_quiet(options, "stopbackward", 0); + l.dontsave = option_find_int_quiet(options, "dontsave", 0); + l.dontload = option_find_int_quiet(options, "dontload", 0); + l.numload = option_find_int_quiet(options, "numload", 0); + l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0); + l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1); + l.smooth = option_find_float_quiet(options, "smooth", 0); + option_unused(options); + net->layers[count] = l; + if (l.workspace_size > workspace_size) workspace_size = l.workspace_size; + free_section(s); + n = n->next; + ++count; + if(n){ + params.h = l.out_h; + params.w = l.out_w; + params.c = l.out_c; + params.inputs = l.outputs; + } + } + free_list(sections); + layer out = get_network_output_layer(net); + net->outputs = out.outputs; + net->truths = out.outputs; + if(net->layers[net->n-1].truths) net->truths = net->layers[net->n-1].truths; + net->output = out.output; + net->input = calloc(net->inputs*net->batch, sizeof(float)); + net->truth = calloc(net->truths*net->batch, sizeof(float)); +#ifdef GPU + net->output_gpu = out.output_gpu; + net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch); + net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch); +#endif + if(workspace_size){ + //printf("%ld\n", workspace_size); +#ifdef GPU + if(gpu_index >= 0){ + net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1); + }else { + net->workspace = calloc(1, workspace_size); + } +#else + net->workspace = calloc(1, workspace_size); +#endif + } + return net; +} + +list *read_cfg(char *filename) +{ + FILE *file = fopen(filename, "r"); + if(file == 0) file_error(filename); + char *line; + int nu = 0; + list *options = make_list(); + section *current = 0; + while((line=fgetl(file)) != 0){ + ++ nu; + strip(line); + switch(line[0]){ + case '[': + current = malloc(sizeof(section)); + list_insert(options, current); + current->options = make_list(); + current->type = line; + break; + case '\0': + case '#': + case ';': + free(line); + break; + default: + if(!read_option(line, current->options)){ + fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); + free(line); + } + break; + } + } + fclose(file); + return options; +} + +void save_convolutional_weights_binary(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_convolutional_layer(l); + } +#endif + binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.binary_weights); + int size = l.c*l.size*l.size; + int i, j, k; + fwrite(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.n, fp); + fwrite(l.rolling_mean, sizeof(float), l.n, fp); + fwrite(l.rolling_variance, sizeof(float), l.n, fp); + } + for(i = 0; i < l.n; ++i){ + float mean = l.binary_weights[i*size]; + if(mean < 0) mean = -mean; + fwrite(&mean, sizeof(float), 1, fp); + for(j = 0; j < size/8; ++j){ + int index = i*size + j*8; + unsigned char c = 0; + for(k = 0; k < 8; ++k){ + if (j*8 + k >= size) break; + if (l.binary_weights[index + k] > 0) c = (c | 1<= 0){ + pull_convolutional_layer(l); + } +#endif + int num = l.nweights; + fwrite(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.n, fp); + fwrite(l.rolling_mean, sizeof(float), l.n, fp); + fwrite(l.rolling_variance, sizeof(float), l.n, fp); + } + fwrite(l.weights, sizeof(float), num, fp); +} + +void save_batchnorm_weights(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_batchnorm_layer(l); + } +#endif + fwrite(l.scales, sizeof(float), l.c, fp); + fwrite(l.rolling_mean, sizeof(float), l.c, fp); + fwrite(l.rolling_variance, sizeof(float), l.c, fp); +} + +void save_connected_weights(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_connected_layer(l); + } +#endif + fwrite(l.biases, sizeof(float), l.outputs, fp); + fwrite(l.weights, sizeof(float), l.outputs*l.inputs, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.outputs, fp); + fwrite(l.rolling_mean, sizeof(float), l.outputs, fp); + fwrite(l.rolling_variance, sizeof(float), l.outputs, fp); + } +} + +void save_weights_upto(network *net, char *filename, int cutoff) +{ +#ifdef GPU + if(net->gpu_index >= 0){ + cuda_set_device(net->gpu_index); + } +#endif + fprintf(stderr, "Saving weights to %s\n", filename); + FILE *fp = fopen(filename, "wb"); + if(!fp) file_error(filename); + + int major = 0; + int minor = 2; + int revision = 0; + fwrite(&major, sizeof(int), 1, fp); + fwrite(&minor, sizeof(int), 1, fp); + fwrite(&revision, sizeof(int), 1, fp); + fwrite(net->seen, sizeof(size_t), 1, fp); + + int i; + for(i = 0; i < net->n && i < cutoff; ++i){ + layer l = net->layers[i]; + if (l.dontsave) continue; + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + save_convolutional_weights(l, fp); + } if(l.type == CONNECTED){ + save_connected_weights(l, fp); + } if(l.type == BATCHNORM){ + save_batchnorm_weights(l, fp); + } if(l.type == RNN){ + save_connected_weights(*(l.input_layer), fp); + save_connected_weights(*(l.self_layer), fp); + save_connected_weights(*(l.output_layer), fp); + } if (l.type == LSTM) { + save_connected_weights(*(l.wi), fp); + save_connected_weights(*(l.wf), fp); + save_connected_weights(*(l.wo), fp); + save_connected_weights(*(l.wg), fp); + save_connected_weights(*(l.ui), fp); + save_connected_weights(*(l.uf), fp); + save_connected_weights(*(l.uo), fp); + save_connected_weights(*(l.ug), fp); + } if (l.type == GRU) { + if(1){ + save_connected_weights(*(l.wz), fp); + save_connected_weights(*(l.wr), fp); + save_connected_weights(*(l.wh), fp); + save_connected_weights(*(l.uz), fp); + save_connected_weights(*(l.ur), fp); + save_connected_weights(*(l.uh), fp); + }else{ + save_connected_weights(*(l.reset_layer), fp); + save_connected_weights(*(l.update_layer), fp); + save_connected_weights(*(l.state_layer), fp); + } + } if(l.type == CRNN){ + save_convolutional_weights(*(l.input_layer), fp); + save_convolutional_weights(*(l.self_layer), fp); + save_convolutional_weights(*(l.output_layer), fp); + } if(l.type == LOCAL){ +#ifdef GPU + if(gpu_index >= 0){ + pull_local_layer(l); + } +#endif + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + fwrite(l.biases, sizeof(float), l.outputs, fp); + fwrite(l.weights, sizeof(float), size, fp); + } + } + fclose(fp); +} +void save_weights(network *net, char *filename) +{ + save_weights_upto(net, filename, net->n); +} + +void transpose_matrix(float *a, int rows, int cols) +{ + float *transpose = calloc(rows*cols, sizeof(float)); + int x, y; + for(x = 0; x < rows; ++x){ + for(y = 0; y < cols; ++y){ + transpose[y*rows + x] = a[x*cols + y]; + } + } + memcpy(a, transpose, rows*cols*sizeof(float)); + free(transpose); +} + +void load_connected_weights(layer l, FILE *fp, int transpose) +{ + fread(l.biases, sizeof(float), l.outputs, fp); + fread(l.weights, sizeof(float), l.outputs*l.inputs, fp); + if(transpose){ + transpose_matrix(l.weights, l.inputs, l.outputs); + } + //printf("Biases: %f mean %f variance\n", mean_array(l.biases, l.outputs), variance_array(l.biases, l.outputs)); + //printf("Weights: %f mean %f variance\n", mean_array(l.weights, l.outputs*l.inputs), variance_array(l.weights, l.outputs*l.inputs)); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.outputs, fp); + fread(l.rolling_mean, sizeof(float), l.outputs, fp); + fread(l.rolling_variance, sizeof(float), l.outputs, fp); + //printf("Scales: %f mean %f variance\n", mean_array(l.scales, l.outputs), variance_array(l.scales, l.outputs)); + //printf("rolling_mean: %f mean %f variance\n", mean_array(l.rolling_mean, l.outputs), variance_array(l.rolling_mean, l.outputs)); + //printf("rolling_variance: %f mean %f variance\n", mean_array(l.rolling_variance, l.outputs), variance_array(l.rolling_variance, l.outputs)); + } +#ifdef GPU + if(gpu_index >= 0){ + push_connected_layer(l); + } +#endif +} + +void load_batchnorm_weights(layer l, FILE *fp) +{ + fread(l.scales, sizeof(float), l.c, fp); + fread(l.rolling_mean, sizeof(float), l.c, fp); + fread(l.rolling_variance, sizeof(float), l.c, fp); +#ifdef GPU + if(gpu_index >= 0){ + push_batchnorm_layer(l); + } +#endif +} + +void load_convolutional_weights_binary(layer l, FILE *fp) +{ + fread(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.n, fp); + fread(l.rolling_mean, sizeof(float), l.n, fp); + fread(l.rolling_variance, sizeof(float), l.n, fp); + } + int size = l.c*l.size*l.size; + int i, j, k; + for(i = 0; i < l.n; ++i){ + float mean = 0; + fread(&mean, sizeof(float), 1, fp); + for(j = 0; j < size/8; ++j){ + int index = i*size + j*8; + unsigned char c = 0; + fread(&c, sizeof(char), 1, fp); + for(k = 0; k < 8; ++k){ + if (j*8 + k >= size) break; + l.weights[index + k] = (c & 1<= 0){ + push_convolutional_layer(l); + } +#endif +} + +void load_convolutional_weights(layer l, FILE *fp) +{ + if(l.binary){ + //load_convolutional_weights_binary(l, fp); + //return; + } + if(l.numload) l.n = l.numload; + int num = l.c/l.groups*l.n*l.size*l.size; + fread(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.n, fp); + fread(l.rolling_mean, sizeof(float), l.n, fp); + fread(l.rolling_variance, sizeof(float), l.n, fp); + if(0){ + int i; + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_mean[i]); + } + printf("\n"); + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_variance[i]); + } + printf("\n"); + } + if(0){ + fill_cpu(l.n, 0, l.rolling_mean, 1); + fill_cpu(l.n, 0, l.rolling_variance, 1); + } + if(0){ + int i; + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_mean[i]); + } + printf("\n"); + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_variance[i]); + } + printf("\n"); + } + } + fread(l.weights, sizeof(float), num, fp); + //if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1); + if (l.flipped) { + transpose_matrix(l.weights, l.c*l.size*l.size, l.n); + } + //if (l.binary) binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.weights); +#ifdef GPU + if(gpu_index >= 0){ + push_convolutional_layer(l); + } +#endif +} + + +void load_weights_upto(network *net, char *filename, int start, int cutoff) +{ +#ifdef GPU + if(net->gpu_index >= 0){ + cuda_set_device(net->gpu_index); + } +#endif + fprintf(stderr, "Loading weights from %s...", filename); + fflush(stdout); + FILE *fp = fopen(filename, "rb"); + if(!fp) file_error(filename); + + int major; + int minor; + int revision; + fread(&major, sizeof(int), 1, fp); + fread(&minor, sizeof(int), 1, fp); + fread(&revision, sizeof(int), 1, fp); + if ((major*10 + minor) >= 2 && major < 1000 && minor < 1000){ + fread(net->seen, sizeof(size_t), 1, fp); + } else { + int iseen = 0; + fread(&iseen, sizeof(int), 1, fp); + *net->seen = iseen; + } + int transpose = (major > 1000) || (minor > 1000); + + int i; + for(i = start; i < net->n && i < cutoff; ++i){ + layer l = net->layers[i]; + if (l.dontload) continue; + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + load_convolutional_weights(l, fp); + } + if(l.type == CONNECTED){ + load_connected_weights(l, fp, transpose); + } + if(l.type == BATCHNORM){ + load_batchnorm_weights(l, fp); + } + if(l.type == CRNN){ + load_convolutional_weights(*(l.input_layer), fp); + load_convolutional_weights(*(l.self_layer), fp); + load_convolutional_weights(*(l.output_layer), fp); + } + if(l.type == RNN){ + load_connected_weights(*(l.input_layer), fp, transpose); + load_connected_weights(*(l.self_layer), fp, transpose); + load_connected_weights(*(l.output_layer), fp, transpose); + } + if (l.type == LSTM) { + load_connected_weights(*(l.wi), fp, transpose); + load_connected_weights(*(l.wf), fp, transpose); + load_connected_weights(*(l.wo), fp, transpose); + load_connected_weights(*(l.wg), fp, transpose); + load_connected_weights(*(l.ui), fp, transpose); + load_connected_weights(*(l.uf), fp, transpose); + load_connected_weights(*(l.uo), fp, transpose); + load_connected_weights(*(l.ug), fp, transpose); + } + if (l.type == GRU) { + if(1){ + load_connected_weights(*(l.wz), fp, transpose); + load_connected_weights(*(l.wr), fp, transpose); + load_connected_weights(*(l.wh), fp, transpose); + load_connected_weights(*(l.uz), fp, transpose); + load_connected_weights(*(l.ur), fp, transpose); + load_connected_weights(*(l.uh), fp, transpose); + }else{ + load_connected_weights(*(l.reset_layer), fp, transpose); + load_connected_weights(*(l.update_layer), fp, transpose); + load_connected_weights(*(l.state_layer), fp, transpose); + } + } + if(l.type == LOCAL){ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + fread(l.biases, sizeof(float), l.outputs, fp); + fread(l.weights, sizeof(float), size, fp); +#ifdef GPU + if(gpu_index >= 0){ + push_local_layer(l); + } +#endif + } + } + fprintf(stderr, "Done!\n"); + fclose(fp); +} + +void load_weights(network *net, char *filename) +{ + load_weights_upto(net, filename, 0, net->n); +} + diff --git a/workloads/realworld/uvm/darknet/src/parser.h b/workloads/realworld/uvm/darknet/src/parser.h new file mode 100644 index 0000000000000000000000000000000000000000..81aef2c86f3e6cb362f8bde9695ce9d5699ca77f --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/parser.h @@ -0,0 +1,9 @@ +#ifndef PARSER_H +#define PARSER_H +#include "darknet.h" +#include "network.h" + +void save_network(network net, char *filename); +void save_weights_double(network net, char *filename); + +#endif diff --git a/workloads/realworld/uvm/darknet/src/region_layer.c b/workloads/realworld/uvm/darknet/src/region_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..9df1b8bc252239ca520fa3dabeff55e5eb5959b8 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/region_layer.c @@ -0,0 +1,507 @@ +#include "region_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_region_layer(int batch, int w, int h, int n, int classes, int coords) +{ + layer l = {0}; + l.type = REGION; + + l.n = n; + l.batch = batch; + l.h = h; + l.w = w; + l.c = n*(classes + coords + 1); + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.coords = coords; + l.cost = calloc(1, sizeof(float)); + l.biases = calloc(n*2, sizeof(float)); + l.bias_updates = calloc(n*2, sizeof(float)); + l.outputs = h*w*n*(classes + coords + 1); + l.inputs = l.outputs; + l.truths = 30*(l.coords + 1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + int i; + for(i = 0; i < n*2; ++i){ + l.biases[i] = .5; + } + + l.forward = forward_region_layer; + l.backward = backward_region_layer; +#ifdef GPU + l.forward_gpu = forward_region_layer_gpu; + l.backward_gpu = backward_region_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "detection\n"); + srand(0); + + return l; +} + +void resize_region_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->n*(l->classes + l->coords + 1); + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride) +{ + box b; + b.x = (i + x[index + 0*stride]) / w; + b.y = (j + x[index + 1*stride]) / h; + b.w = exp(x[index + 2*stride]) * biases[2*n] / w; + b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h; + return b; +} + +float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale, int stride) +{ + box pred = get_region_box(x, biases, n, index, i, j, w, h, stride); + float iou = box_iou(pred, truth); + + float tx = (truth.x*w - i); + float ty = (truth.y*h - j); + float tw = log(truth.w*w / biases[2*n]); + float th = log(truth.h*h / biases[2*n + 1]); + + delta[index + 0*stride] = scale * (tx - x[index + 0*stride]); + delta[index + 1*stride] = scale * (ty - x[index + 1*stride]); + delta[index + 2*stride] = scale * (tw - x[index + 2*stride]); + delta[index + 3*stride] = scale * (th - x[index + 3*stride]); + return iou; +} + +void delta_region_mask(float *truth, float *x, int n, int index, float *delta, int stride, int scale) +{ + int i; + for(i = 0; i < n; ++i){ + delta[index + i*stride] = scale*(truth[i] - x[index + i*stride]); + } +} + + +void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, int stride, float *avg_cat, int tag) +{ + int i, n; + if(hier){ + float pred = 1; + while(class >= 0){ + pred *= output[index + stride*class]; + int g = hier->group[class]; + int offset = hier->group_offset[g]; + for(i = 0; i < hier->group_size[g]; ++i){ + delta[index + stride*(offset + i)] = scale * (0 - output[index + stride*(offset + i)]); + } + delta[index + stride*class] = scale * (1 - output[index + stride*class]); + + class = hier->parent[class]; + } + *avg_cat += pred; + } else { + if (delta[index] && tag){ + delta[index + stride*class] = scale * (1 - output[index + stride*class]); + return; + } + for(n = 0; n < classes; ++n){ + delta[index + stride*n] = scale * (((n == class)?1 : 0) - output[index + stride*n]); + if(n == class) *avg_cat += output[index + stride*n]; + } + } +} + +float logit(float x) +{ + return log(x/(1.-x)); +} + +float tisnan(float x) +{ + return (x != x); +} + +int entry_index(layer l, int batch, int location, int entry) +{ + int n = location / (l.w*l.h); + int loc = location % (l.w*l.h); + return batch*l.outputs + n*l.w*l.h*(l.coords+l.classes+1) + entry*l.w*l.h + loc; +} + +void forward_region_layer(const layer l, network net) +{ + int i,j,b,t,n; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) activate_array(l.output + index, l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords + 1); + if(!l.softmax && !l.softmax_tree) activate_array(l.output + index, l.classes*l.w*l.h, LOGISTIC); + } + } + if (l.softmax_tree){ + int i; + int count = l.coords + 1; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_cpu(net.input + count, group_size, l.batch, l.inputs, l.n*l.w*l.h, 1, l.n*l.w*l.h, l.temperature, l.output + count); + count += group_size; + } + } else if (l.softmax){ + int index = entry_index(l, 0, 0, l.coords + !l.background); + softmax_cpu(net.input + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output + index); + } +#endif + + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + if(!net.train) return; + float avg_iou = 0; + float recall = 0; + float avg_cat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + int class_count = 0; + *(l.cost) = 0; + for (b = 0; b < l.batch; ++b) { + if(l.softmax_tree){ + int onlyclass = 0; + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + if(!truth.x) break; + int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords]; + float maxp = 0; + int maxi = 0; + if(truth.x > 100000 && truth.y > 100000){ + for(n = 0; n < l.n*l.w*l.h; ++n){ + int class_index = entry_index(l, b, n, l.coords + 1); + int obj_index = entry_index(l, b, n, l.coords); + float scale = l.output[obj_index]; + l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]); + float p = scale*get_hierarchy_probability(l.output + class_index, l.softmax_tree, class, l.w*l.h); + if(p > maxp){ + maxp = p; + maxi = n; + } + } + int class_index = entry_index(l, b, maxi, l.coords + 1); + int obj_index = entry_index(l, b, maxi, l.coords); + delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax); + if(l.output[obj_index] < .3) l.delta[obj_index] = l.object_scale * (.3 - l.output[obj_index]); + else l.delta[obj_index] = 0; + l.delta[obj_index] = 0; + ++class_count; + onlyclass = 1; + break; + } + } + if(onlyclass) continue; + } + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h); + float best_iou = 0; + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + if(!truth.x) break; + float iou = box_iou(pred, truth); + if (iou > best_iou) { + best_iou = iou; + } + } + int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, l.coords); + avg_anyobj += l.output[obj_index]; + l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]); + if(l.background) l.delta[obj_index] = l.noobject_scale * (1 - l.output[obj_index]); + if (best_iou > l.thresh) { + l.delta[obj_index] = 0; + } + + if(*(net.seen) < 12800){ + box truth = {0}; + truth.x = (i + .5)/l.w; + truth.y = (j + .5)/l.h; + truth.w = l.biases[2*n]/l.w; + truth.h = l.biases[2*n+1]/l.h; + delta_region_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, l.delta, .01, l.w*l.h); + } + } + } + } + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + + if(!truth.x) break; + float best_iou = 0; + int best_n = 0; + i = (truth.x * l.w); + j = (truth.y * l.h); + box truth_shift = truth; + truth_shift.x = 0; + truth_shift.y = 0; + for(n = 0; n < l.n; ++n){ + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h); + if(l.bias_match){ + pred.w = l.biases[2*n]/l.w; + pred.h = l.biases[2*n+1]/l.h; + } + pred.x = 0; + pred.y = 0; + float iou = box_iou(pred, truth_shift); + if (iou > best_iou){ + best_iou = iou; + best_n = n; + } + } + + int box_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 0); + float iou = delta_region_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, l.delta, l.coord_scale * (2 - truth.w*truth.h), l.w*l.h); + if(l.coords > 4){ + int mask_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 4); + delta_region_mask(net.truth + t*(l.coords + 1) + b*l.truths + 5, l.output, l.coords - 4, mask_index, l.delta, l.w*l.h, l.mask_scale); + } + if(iou > .5) recall += 1; + avg_iou += iou; + + int obj_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords); + avg_obj += l.output[obj_index]; + l.delta[obj_index] = l.object_scale * (1 - l.output[obj_index]); + if (l.rescore) { + l.delta[obj_index] = l.object_scale * (iou - l.output[obj_index]); + } + if(l.background){ + l.delta[obj_index] = l.object_scale * (0 - l.output[obj_index]); + } + + int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords + 1); + delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax); + ++count; + ++class_count; + } + } + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count); +} + +void backward_region_layer(const layer l, network net) +{ + /* + int b; + int size = l.coords + l.classes + 1; + for (b = 0; b < l.batch*l.n; ++b){ + int index = (b*size + 4)*l.w*l.h; + gradient_array(l.output + index, l.w*l.h, LOGISTIC, l.delta + index); + } + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); + */ +} + +void correct_region_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +{ + int i; + int new_w=0; + int new_h=0; + if (((float)netw/w) < ((float)neth/h)) { + new_w = netw; + new_h = (h * netw)/w; + } else { + new_h = neth; + new_w = (w * neth)/h; + } + for (i = 0; i < n; ++i){ + box b = dets[i].bbox; + b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); + b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); + b.w *= (float)netw/new_w; + b.h *= (float)neth/new_h; + if(!relative){ + b.x *= w; + b.w *= w; + b.y *= h; + b.h *= h; + } + dets[i].bbox = b; + } +} + +void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets) +{ + int i,j,n,z; + float *predictions = l.output; + if (l.batch == 2) { + float *flip = l.output + l.outputs; + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w/2; ++i) { + for (n = 0; n < l.n; ++n) { + for(z = 0; z < l.classes + l.coords + 1; ++z){ + int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; + int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); + float swap = flip[i1]; + flip[i1] = flip[i2]; + flip[i2] = swap; + if(z == 0){ + flip[i1] = -flip[i1]; + flip[i2] = -flip[i2]; + } + } + } + } + } + for(i = 0; i < l.outputs; ++i){ + l.output[i] = (l.output[i] + flip[i])/2.; + } + } + for (i = 0; i < l.w*l.h; ++i){ + int row = i / l.w; + int col = i % l.w; + for(n = 0; n < l.n; ++n){ + int index = n*l.w*l.h + i; + for(j = 0; j < l.classes; ++j){ + dets[index].prob[j] = 0; + } + int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); + int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); + int mask_index = entry_index(l, 0, n*l.w*l.h + i, 4); + float scale = l.background ? 1 : predictions[obj_index]; + dets[index].bbox = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h); + dets[index].objectness = scale > thresh ? scale : 0; + if(dets[index].mask){ + for(j = 0; j < l.coords - 4; ++j){ + dets[index].mask[j] = l.output[mask_index + j*l.w*l.h]; + } + } + + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + !l.background); + if(l.softmax_tree){ + + hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0, l.w*l.h); + if(map){ + for(j = 0; j < 200; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + map[j]); + float prob = scale*predictions[class_index]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } else { + int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh, l.w*l.h); + dets[index].prob[j] = (scale > thresh) ? scale : 0; + } + } else { + if(dets[index].objectness){ + for(j = 0; j < l.classes; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + j); + float prob = scale*predictions[class_index]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } + } + } + } + correct_region_boxes(dets, l.w*l.h*l.n, w, h, netw, neth, relative); +} + +#ifdef GPU + +void forward_region_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + if(l.coords > 4){ + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC); + } + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) activate_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords + 1); + if(!l.softmax && !l.softmax_tree) activate_array_gpu(l.output_gpu + index, l.classes*l.w*l.h, LOGISTIC); + } + } + if (l.softmax_tree){ + int index = entry_index(l, 0, 0, l.coords + 1); + softmax_tree(net.input_gpu + index, l.w*l.h, l.batch*l.n, l.inputs/l.n, 1, l.output_gpu + index, *l.softmax_tree); + } else if (l.softmax) { + int index = entry_index(l, 0, 0, l.coords + !l.background); + softmax_gpu(net.input_gpu + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index); + } + if(!net.train || l.onlyforward){ + cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); + return; + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_region_layer(l, net); + //cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs); + if(!net.train) return; + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_region_layer_gpu(const layer l, network net) +{ + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + gradient_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC, l.delta_gpu + index); + if(l.coords > 4){ + index = entry_index(l, b, n*l.w*l.h, 4); + gradient_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC, l.delta_gpu + index); + } + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) gradient_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC, l.delta_gpu + index); + } + } + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + +void zero_objectness(layer l) +{ + int i, n; + for (i = 0; i < l.w*l.h; ++i){ + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); + l.output[obj_index] = 0; + } + } +} + diff --git a/workloads/realworld/uvm/darknet/src/region_layer.h b/workloads/realworld/uvm/darknet/src/region_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9f12fd187fd490d10cbc21af8251e0e2a870b7cb --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/region_layer.h @@ -0,0 +1,18 @@ +#ifndef REGION_LAYER_H +#define REGION_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_region_layer(int batch, int w, int h, int n, int classes, int coords); +void forward_region_layer(const layer l, network net); +void backward_region_layer(const layer l, network net); +void resize_region_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_region_layer_gpu(const layer l, network net); +void backward_region_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm/darknet/src/reorg_layer.c b/workloads/realworld/uvm/darknet/src/reorg_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..b3634d53a5e01a8bbcf00e62a90f70f40108e1d7 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/reorg_layer.c @@ -0,0 +1,173 @@ +#include "reorg_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + + +layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra) +{ + layer l = {0}; + l.type = REORG; + l.batch = batch; + l.stride = stride; + l.extra = extra; + l.h = h; + l.w = w; + l.c = c; + l.flatten = flatten; + if(reverse){ + l.out_w = w*stride; + l.out_h = h*stride; + l.out_c = c/(stride*stride); + }else{ + l.out_w = w/stride; + l.out_h = h/stride; + l.out_c = c*(stride*stride); + } + l.reverse = reverse; + + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = h*w*c; + if(l.extra){ + l.out_w = l.out_h = l.out_c = 0; + l.outputs = l.inputs + l.extra; + } + + if(extra){ + fprintf(stderr, "reorg %4d -> %4d\n", l.inputs, l.outputs); + } else { + fprintf(stderr, "reorg /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + } + int output_size = l.outputs * batch; + l.output = calloc(output_size, sizeof(float)); + l.delta = calloc(output_size, sizeof(float)); + + l.forward = forward_reorg_layer; + l.backward = backward_reorg_layer; +#ifdef GPU + l.forward_gpu = forward_reorg_layer_gpu; + l.backward_gpu = backward_reorg_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); +#endif + return l; +} + +void resize_reorg_layer(layer *l, int w, int h) +{ + int stride = l->stride; + int c = l->c; + + l->h = h; + l->w = w; + + if(l->reverse){ + l->out_w = w*stride; + l->out_h = h*stride; + l->out_c = c/(stride*stride); + }else{ + l->out_w = w/stride; + l->out_h = h/stride; + l->out_c = c*(stride*stride); + } + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->outputs; + int output_size = l->outputs * l->batch; + + l->output = realloc(l->output, output_size * sizeof(float)); + l->delta = realloc(l->delta, output_size * sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, output_size); + l->delta_gpu = cuda_make_array(l->delta, output_size); +#endif +} + +void forward_reorg_layer(const layer l, network net) +{ + int i; + if(l.flatten){ + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + if(l.reverse){ + flatten(l.output, l.w*l.h, l.c, l.batch, 0); + }else{ + flatten(l.output, l.w*l.h, l.c, l.batch, 1); + } + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.inputs, net.input + i*l.inputs, 1, l.output + i*l.outputs, 1); + } + } else if (l.reverse){ + reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output); + } else { + reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output); + } +} + +void backward_reorg_layer(const layer l, network net) +{ + int i; + if(l.flatten){ + memcpy(net.delta, l.delta, l.outputs*l.batch*sizeof(float)); + if(l.reverse){ + flatten(net.delta, l.w*l.h, l.c, l.batch, 1); + }else{ + flatten(net.delta, l.w*l.h, l.c, l.batch, 0); + } + } else if(l.reverse){ + reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta); + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.inputs, l.delta + i*l.outputs, 1, net.delta + i*l.inputs, 1); + } + }else{ + reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta); + } +} + +#ifdef GPU +void forward_reorg_layer_gpu(layer l, network net) +{ + int i; + if(l.flatten){ + if(l.reverse){ + flatten_gpu(net.input_gpu, l.w*l.h, l.c, l.batch, 0, l.output_gpu); + }else{ + flatten_gpu(net.input_gpu, l.w*l.h, l.c, l.batch, 1, l.output_gpu); + } + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.inputs, net.input_gpu + i*l.inputs, 1, l.output_gpu + i*l.outputs, 1); + } + } else if (l.reverse) { + reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu); + }else { + reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu); + } +} + +void backward_reorg_layer_gpu(layer l, network net) +{ + if(l.flatten){ + if(l.reverse){ + flatten_gpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 1, net.delta_gpu); + }else{ + flatten_gpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 0, net.delta_gpu); + } + } else if (l.extra) { + int i; + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.inputs, l.delta_gpu + i*l.outputs, 1, net.delta_gpu + i*l.inputs, 1); + } + } else if(l.reverse){ + reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta_gpu); + } else { + reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta_gpu); + } +} +#endif diff --git a/workloads/realworld/uvm/darknet/src/reorg_layer.h b/workloads/realworld/uvm/darknet/src/reorg_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1d1445f17d2874835ee19d033b50e09761374de3 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/reorg_layer.h @@ -0,0 +1,20 @@ +#ifndef REORG_LAYER_H +#define REORG_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra); +void resize_reorg_layer(layer *l, int w, int h); +void forward_reorg_layer(const layer l, network net); +void backward_reorg_layer(const layer l, network net); + +#ifdef GPU +void forward_reorg_layer_gpu(layer l, network net); +void backward_reorg_layer_gpu(layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/uvm/darknet/src/rnn_layer.c b/workloads/realworld/uvm/darknet/src/rnn_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..c07e338caee5418657eb1127058419566d9ef787 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/rnn_layer.c @@ -0,0 +1,292 @@ +#include "rnn_layer.h" +#include "connected_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTIVATION activation, int batch_normalize, int adam) +{ + fprintf(stderr, "RNN Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = RNN; + l.steps = steps; + l.inputs = inputs; + + l.state = calloc(batch*outputs, sizeof(float)); + l.prev_state = calloc(batch*outputs, sizeof(float)); + + l.input_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.input_layer) = make_connected_layer(batch*steps, inputs, outputs, activation, batch_normalize, adam); + l.input_layer->batch = batch; + + l.self_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.self_layer) = make_connected_layer(batch*steps, outputs, outputs, activation, batch_normalize, adam); + l.self_layer->batch = batch; + + l.output_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.output_layer) = make_connected_layer(batch*steps, outputs, outputs, activation, batch_normalize, adam); + l.output_layer->batch = batch; + + l.outputs = outputs; + l.output = l.output_layer->output; + l.delta = l.output_layer->delta; + + l.forward = forward_rnn_layer; + l.backward = backward_rnn_layer; + l.update = update_rnn_layer; +#ifdef GPU + l.forward_gpu = forward_rnn_layer_gpu; + l.backward_gpu = backward_rnn_layer_gpu; + l.update_gpu = update_rnn_layer_gpu; + l.state_gpu = cuda_make_array(0, batch*outputs); + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.output_gpu = l.output_layer->output_gpu; + l.delta_gpu = l.output_layer->delta_gpu; +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.input_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.input_layer->out_c, l.input_layer->out_h, l.input_layer->out_w); + cudnnSetTensor4dDescriptor(l.self_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.self_layer->out_c, l.self_layer->out_h, l.self_layer->out_w); + cudnnSetTensor4dDescriptor(l.output_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.output_layer->out_c, l.output_layer->out_h, l.output_layer->out_w); +#endif +#endif + + return l; +} + +void update_rnn_layer(layer l, update_args a) +{ + update_connected_layer(*(l.input_layer), a); + update_connected_layer(*(l.self_layer), a); + update_connected_layer(*(l.output_layer), a); +} + +void forward_rnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, self_layer.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, input_layer.delta, 1); + if(net.train) fill_cpu(l.outputs * l.batch, 0, l.state, 1); + + for (i = 0; i < l.steps; ++i) { + s.input = net.input; + forward_connected_layer(input_layer, s); + + s.input = l.state; + forward_connected_layer(self_layer, s); + + float *old_state = l.state; + if(net.train) l.state += l.outputs*l.batch; + if(l.shortcut){ + copy_cpu(l.outputs * l.batch, old_state, 1, l.state, 1); + }else{ + fill_cpu(l.outputs * l.batch, 0, l.state, 1); + } + axpy_cpu(l.outputs * l.batch, 1, input_layer.output, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + forward_connected_layer(output_layer, s); + + net.input += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_rnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + increment_layer(&input_layer, l.steps-1); + increment_layer(&self_layer, l.steps-1); + increment_layer(&output_layer, l.steps-1); + + l.state += l.outputs*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_cpu(l.outputs * l.batch, input_layer.output, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + s.delta = self_layer.delta; + backward_connected_layer(output_layer, s); + + l.state -= l.outputs*l.batch; + /* + if(i > 0){ + copy_cpu(l.outputs * l.batch, input_layer.output - l.outputs*l.batch, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output - l.outputs*l.batch, 1, l.state, 1); + }else{ + fill_cpu(l.outputs * l.batch, 0, l.state, 1); + } + */ + + s.input = l.state; + s.delta = self_layer.delta - l.outputs*l.batch; + if (i == 0) s.delta = 0; + backward_connected_layer(self_layer, s); + + copy_cpu(l.outputs*l.batch, self_layer.delta, 1, input_layer.delta, 1); + if (i > 0 && l.shortcut) axpy_cpu(l.outputs*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.outputs*l.batch, 1); + s.input = net.input + i*l.inputs*l.batch; + if(net.delta) s.delta = net.delta + i*l.inputs*l.batch; + else s.delta = 0; + backward_connected_layer(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} + +#ifdef GPU + +void pull_rnn_layer(layer l) +{ + pull_connected_layer(*(l.input_layer)); + pull_connected_layer(*(l.self_layer)); + pull_connected_layer(*(l.output_layer)); +} + +void push_rnn_layer(layer l) +{ + push_connected_layer(*(l.input_layer)); + push_connected_layer(*(l.self_layer)); + push_connected_layer(*(l.output_layer)); +} + +void update_rnn_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.input_layer), a); + update_connected_layer_gpu(*(l.self_layer), a); + update_connected_layer_gpu(*(l.output_layer), a); +} + +void forward_rnn_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_gpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, self_layer.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, input_layer.delta_gpu, 1); + + if(net.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.prev_state_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = net.input_gpu; + forward_connected_layer_gpu(input_layer, s); + + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(self_layer, s); + + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(output_layer, s); + + net.input_gpu += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_rnn_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + increment_layer(&input_layer, l.steps - 1); + increment_layer(&self_layer, l.steps - 1); + increment_layer(&output_layer, l.steps - 1); + float *last_input = input_layer.output_gpu; + float *last_self = self_layer.output_gpu; + for (i = l.steps-1; i >= 0; --i) { + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu; + backward_connected_layer_gpu(output_layer, s); + + if(i != 0) { + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + }else { + copy_gpu(l.outputs*l.batch, l.prev_state_gpu, 1, l.state_gpu, 1); + } + + copy_gpu(l.outputs*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = (i > 0) ? self_layer.delta_gpu - l.outputs*l.batch : 0; + if (i == 0) s.delta_gpu = 0; + backward_connected_layer_gpu(self_layer, s); + + s.input_gpu = net.input_gpu + i*l.inputs*l.batch; + if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch; + else s.delta_gpu = 0; + backward_connected_layer_gpu(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, last_input, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, last_self, 1, l.state_gpu, 1); +} +#endif diff --git a/workloads/realworld/uvm/darknet/src/rnn_layer.h b/workloads/realworld/uvm/darknet/src/rnn_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..270a63ffafca9a9adb7b995ed674f93c70bdeb51 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/rnn_layer.h @@ -0,0 +1,25 @@ + +#ifndef RNN_LAYER_H +#define RNN_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" +#define USET + +layer make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTIVATION activation, int batch_normalize, int adam); + +void forward_rnn_layer(layer l, network net); +void backward_rnn_layer(layer l, network net); +void update_rnn_layer(layer l, update_args a); + +#ifdef GPU +void forward_rnn_layer_gpu(layer l, network net); +void backward_rnn_layer_gpu(layer l, network net); +void update_rnn_layer_gpu(layer l, update_args a); +void push_rnn_layer(layer l); +void pull_rnn_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/uvm/darknet/src/route_layer.c b/workloads/realworld/uvm/darknet/src/route_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..608abe9a1c729eb6bdfd5e0d65c58196b51da496 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/route_layer.c @@ -0,0 +1,134 @@ +#include "route_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + +route_layer make_route_layer(int batch, int n, int *input_layers, int *input_sizes) +{ + fprintf(stderr,"route "); + route_layer l = {0}; + l.type = ROUTE; + l.batch = batch; + l.n = n; + l.input_layers = input_layers; + l.input_sizes = input_sizes; + int i; + int outputs = 0; + for(i = 0; i < n; ++i){ + fprintf(stderr," %d", input_layers[i]); + outputs += input_sizes[i]; + } + fprintf(stderr, "\n"); + l.outputs = outputs; + l.inputs = outputs; + l.delta = calloc(outputs*batch, sizeof(float)); + l.output = calloc(outputs*batch, sizeof(float));; + + l.forward = forward_route_layer; + l.backward = backward_route_layer; + #ifdef GPU + l.forward_gpu = forward_route_layer_gpu; + l.backward_gpu = backward_route_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, outputs*batch); + l.output_gpu = cuda_make_array(l.output, outputs*batch); + #endif + return l; +} + +void resize_route_layer(route_layer *l, network *net) +{ + int i; + layer first = net->layers[l->input_layers[0]]; + l->out_w = first.out_w; + l->out_h = first.out_h; + l->out_c = first.out_c; + l->outputs = first.outputs; + l->input_sizes[0] = first.outputs; + for(i = 1; i < l->n; ++i){ + int index = l->input_layers[i]; + layer next = net->layers[index]; + l->outputs += next.outputs; + l->input_sizes[i] = next.outputs; + if(next.out_w == first.out_w && next.out_h == first.out_h){ + l->out_c += next.out_c; + }else{ + printf("%d %d, %d %d\n", next.out_w, next.out_h, first.out_w, first.out_h); + l->out_h = l->out_w = l->out_c = 0; + } + } + l->inputs = l->outputs; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + +void forward_route_layer(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *input = net.layers[index].output; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1); + } + offset += input_size; + } +} + +void backward_route_layer(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *delta = net.layers[index].delta; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1); + } + offset += input_size; + } +} + +#ifdef GPU +void forward_route_layer_gpu(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *input = net.layers[index].output_gpu; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + copy_gpu(input_size, input + j*input_size, 1, l.output_gpu + offset + j*l.outputs, 1); + } + offset += input_size; + } +} + +void backward_route_layer_gpu(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *delta = net.layers[index].delta_gpu; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + axpy_gpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1); + } + offset += input_size; + } +} +#endif diff --git a/workloads/realworld/uvm/darknet/src/route_layer.h b/workloads/realworld/uvm/darknet/src/route_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1d40330ff30c9c93a2180a696d5f67f628ea481c --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/route_layer.h @@ -0,0 +1,18 @@ +#ifndef ROUTE_LAYER_H +#define ROUTE_LAYER_H +#include "network.h" +#include "layer.h" + +typedef layer route_layer; + +route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size); +void forward_route_layer(const route_layer l, network net); +void backward_route_layer(const route_layer l, network net); +void resize_route_layer(route_layer *l, network *net); + +#ifdef GPU +void forward_route_layer_gpu(const route_layer l, network net); +void backward_route_layer_gpu(const route_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm/darknet/src/shortcut_layer.c b/workloads/realworld/uvm/darknet/src/shortcut_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..e5b9e14301c0a6b7e03b270824352f1ba40163cd --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/shortcut_layer.c @@ -0,0 +1,90 @@ +#include "shortcut_layer.h" +#include "cuda_dark.h" +#include "blas.h" +#include "activations.h" + +#include +#include + +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2) +{ + fprintf(stderr, "res %3d %4d x%4d x%4d -> %4d x%4d x%4d\n",index, w2,h2,c2, w,h,c); + layer l = {0}; + l.type = SHORTCUT; + l.batch = batch; + l.w = w2; + l.h = h2; + l.c = c2; + l.out_w = w; + l.out_h = h; + l.out_c = c; + l.outputs = w*h*c; + l.inputs = l.outputs; + + l.index = index; + + l.delta = calloc(l.outputs*batch, sizeof(float)); + l.output = calloc(l.outputs*batch, sizeof(float));; + + l.forward = forward_shortcut_layer; + l.backward = backward_shortcut_layer; + #ifdef GPU + l.forward_gpu = forward_shortcut_layer_gpu; + l.backward_gpu = backward_shortcut_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + #endif + return l; +} + +void resize_shortcut_layer(layer *l, int w, int h) +{ + assert(l->w == l->out_w); + assert(l->h == l->out_h); + l->w = l->out_w = w; + l->h = l->out_h = h; + l->outputs = w*h*l->out_c; + l->inputs = l->outputs; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + + +void forward_shortcut_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + shortcut_cpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output); + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_shortcut_layer(const layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + axpy_cpu(l.outputs*l.batch, l.alpha, l.delta, 1, net.delta, 1); + shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta); +} + +#ifdef GPU +void forward_shortcut_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + shortcut_gpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output_gpu); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_shortcut_layer_gpu(const layer l, network net) +{ + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + axpy_gpu(l.outputs*l.batch, l.alpha, l.delta_gpu, 1, net.delta_gpu, 1); + shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta_gpu); +} +#endif diff --git a/workloads/realworld/uvm/darknet/src/shortcut_layer.h b/workloads/realworld/uvm/darknet/src/shortcut_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..5f684fc1eadea2c6902be96bf4a4bf9a3b533da9 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/shortcut_layer.h @@ -0,0 +1,17 @@ +#ifndef SHORTCUT_LAYER_H +#define SHORTCUT_LAYER_H + +#include "layer.h" +#include "network.h" + +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2); +void forward_shortcut_layer(const layer l, network net); +void backward_shortcut_layer(const layer l, network net); +void resize_shortcut_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_shortcut_layer_gpu(const layer l, network net); +void backward_shortcut_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm/darknet/src/softmax_layer.c b/workloads/realworld/uvm/darknet/src/softmax_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..569b62b14097ed226d9939d8e1f1fd2899083ee6 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/softmax_layer.c @@ -0,0 +1,107 @@ +#include "softmax_layer.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +softmax_layer make_softmax_layer(int batch, int inputs, int groups) +{ + assert(inputs%groups == 0); + fprintf(stderr, "softmax %4d\n", inputs); + softmax_layer l = {0}; + l.type = SOFTMAX; + l.batch = batch; + l.groups = groups; + l.inputs = inputs; + l.outputs = inputs; + l.loss = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_softmax_layer; + l.backward = backward_softmax_layer; + #ifdef GPU + l.forward_gpu = forward_softmax_layer_gpu; + l.backward_gpu = backward_softmax_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.loss_gpu = cuda_make_array(l.loss, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_softmax_layer(const softmax_layer l, network net) +{ + if(l.softmax_tree){ + int i; + int count = 0; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_cpu(net.input + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output + count); + count += group_size; + } + } else { + softmax_cpu(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output); + } + + if(net.truth && !l.noloss){ + softmax_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_softmax_layer(const softmax_layer l, network net) +{ + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_softmax_layer_output(const softmax_layer layer) +{ + cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch); +} + +void forward_softmax_layer_gpu(const softmax_layer l, network net) +{ + if(l.softmax_tree){ + softmax_tree(net.input_gpu, 1, l.batch, l.inputs, l.temperature, l.output_gpu, *l.softmax_tree); + /* + int i; + int count = 0; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_gpu(net.input_gpu + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output_gpu + count); + count += group_size; + } + */ + } else { + if(l.spatial){ + softmax_gpu(net.input_gpu, l.c, l.batch*l.c, l.inputs/l.c, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu); + }else{ + softmax_gpu(net.input_gpu, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu); + } + } + if(net.truth && !l.noloss){ + softmax_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu); + if(l.softmax_tree){ + mask_gpu(l.batch*l.inputs, l.delta_gpu, SECRET_NUM, net.truth_gpu, 0); + mask_gpu(l.batch*l.inputs, l.loss_gpu, SECRET_NUM, net.truth_gpu, 0); + } + cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_softmax_layer_gpu(const softmax_layer layer, network net) +{ + axpy_gpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/uvm/darknet/src/softmax_layer.h b/workloads/realworld/uvm/darknet/src/softmax_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..2e3ffe01a6c5d273a9f6139bc9f265cd7e2bc860 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/softmax_layer.h @@ -0,0 +1,19 @@ +#ifndef SOFTMAX_LAYER_H +#define SOFTMAX_LAYER_H +#include "layer.h" +#include "network.h" + +typedef layer softmax_layer; + +void softmax_array(float *input, int n, float temp, float *output); +softmax_layer make_softmax_layer(int batch, int inputs, int groups); +void forward_softmax_layer(const softmax_layer l, network net); +void backward_softmax_layer(const softmax_layer l, network net); + +#ifdef GPU +void pull_softmax_layer_output(const softmax_layer l); +void forward_softmax_layer_gpu(const softmax_layer l, network net); +void backward_softmax_layer_gpu(const softmax_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm/darknet/src/stb_image.h b/workloads/realworld/uvm/darknet/src/stb_image.h new file mode 100644 index 0000000000000000000000000000000000000000..d9c21bc813f1f24de2a25ee3cc82bdce9413eaa5 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/stb_image.h @@ -0,0 +1,7462 @@ +/* stb_image - v2.19 - public domain image loader - http://nothings.org/stb + no warranty implied; use at your own risk + + Do this: + #define STB_IMAGE_IMPLEMENTATION + before you include this file in *one* C or C++ file to create the implementation. + + // i.e. it should look like this: + #include ... + #include ... + #include ... + #define STB_IMAGE_IMPLEMENTATION + #include "stb_image.h" + + You can #define STBI_ASSERT(x) before the #include to avoid using assert.h. + And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free + + + QUICK NOTES: + Primarily of interest to game developers and other people who can + avoid problematic images and only need the trivial interface + + JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) + PNG 1/2/4/8/16-bit-per-channel + + TGA (not sure what subset, if a subset) + BMP non-1bpp, non-RLE + PSD (composited view only, no extra channels, 8/16 bit-per-channel) + + GIF (*comp always reports as 4-channel) + HDR (radiance rgbE format) + PIC (Softimage PIC) + PNM (PPM and PGM binary only) + + Animated GIF still needs a proper API, but here's one way to do it: + http://gist.github.com/urraka/685d9a6340b26b830d49 + + - decode from memory or through FILE (define STBI_NO_STDIO to remove code) + - decode from arbitrary I/O callbacks + - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON) + + Full documentation under "DOCUMENTATION" below. + + +LICENSE + + See end of file for license information. + +RECENT REVISION HISTORY: + + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings + 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes + 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 + RGB-format JPEG; remove white matting in PSD; + allocate large structures on the stack; + correct channel count for PNG & BMP + 2.10 (2016-01-22) avoid warning introduced in 2.09 + 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED + + See end of file for full revision history. + + + ============================ Contributors ========================= + + Image formats Extensions, features + Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) + Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) + Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) + Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) + Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) + Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) + Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) + github:urraka (animated gif) Junggon Kim (PNM comments) + Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) + socks-the-fox (16-bit PNG) + Jeremy Sawicki (handle all ImageNet JPGs) + Optimizations & bugfixes Mikhail Morozov (1-bit BMP) + Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query) + Arseny Kapoulkine + John-Mark Allen + + Bug & warning fixes + Marc LeBlanc David Woo Guillaume George Martins Mozeiko + Christpher Lloyd Jerry Jansson Joseph Thomson Phil Jordan + Dave Moore Roy Eltham Hayaki Saito Nathan Reed + Won Chun Luke Graham Johan Duparc Nick Verigakis + the Horde3D community Thomas Ruf Ronny Chevalier github:rlyeh + Janez Zemva John Bartholomew Michal Cichon github:romigrou + Jonathan Blow Ken Hamada Tero Hanninen github:svdijk + Laurent Gomila Cort Stratton Sergio Gonzalez github:snagar + Aruelien Pocheville Thibault Reuille Cass Everitt github:Zelex + Ryamond Barbiero Paul Du Bois Engin Manap github:grim210 + Aldo Culquicondor Philipp Wiesemann Dale Weiler github:sammyhw + Oriol Ferrer Mesia Josh Tobin Matthew Gregan github:phprus + Julian Raschke Gregory Mullen Baldur Karlsson github:poppolopoppo + Christian Floisand Kevin Schmidt github:darealshinji + Blazej Dariusz Roszkowski github:Michaelangel007 +*/ + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H + +// DOCUMENTATION +// +// Limitations: +// - no 12-bit-per-channel JPEG +// - no JPEGs with arithmetic coding +// - GIF always returns *comp=4 +// +// Basic usage (see HDR discussion below for HDR usage): +// int x,y,n; +// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +// // ... process data if not NULL ... +// // ... x = width, y = height, n = # 8-bit components per pixel ... +// // ... replace '0' with '1'..'4' to force that many components per pixel +// // ... but 'n' will always be the number that it would have been if you said 0 +// stbi_image_free(data) +// +// Standard parameters: +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *channels_in_file -- outputs # of image components in image file +// int desired_channels -- if non-zero, # of image components requested in result +// +// The return value from an image loader is an 'unsigned char *' which points +// to the pixel data, or NULL on an allocation failure or if the image is +// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels, +// with each pixel consisting of N interleaved 8-bit components; the first +// pixel pointed to is top-left-most in the image. There is no padding between +// image scanlines or between pixels, regardless of format. The number of +// components N is 'desired_channels' if desired_channels is non-zero, or +// *channels_in_file otherwise. If desired_channels is non-zero, +// *channels_in_file has the number of components that _would_ have been +// output otherwise. E.g. if you set desired_channels to 4, you will always +// get RGBA output, but you can check *channels_in_file to see if it's trivially +// opaque because e.g. there were only 3 channels in the source image. +// +// An output image with N components has the following components interleaved +// in this order in each pixel: +// +// N=#comp components +// 1 grey +// 2 grey, alpha +// 3 red, green, blue +// 4 red, green, blue, alpha +// +// If image loading fails for any reason, the return value will be NULL, +// and *x, *y, *channels_in_file will be unchanged. The function +// stbi_failure_reason() can be queried for an extremely brief, end-user +// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS +// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// more user-friendly ones. +// +// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. +// +// =========================================================================== +// +// Philosophy +// +// stb libraries are designed with the following priorities: +// +// 1. easy to use +// 2. easy to maintain +// 3. good performance +// +// Sometimes I let "good performance" creep up in priority over "easy to maintain", +// and for best performance I may provide less-easy-to-use APIs that give higher +// performance, in addition to the easy to use ones. Nevertheless, it's important +// to keep in mind that from the standpoint of you, a client of this library, +// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all. +// +// Some secondary priorities arise directly from the first two, some of which +// make more explicit reasons why performance can't be emphasized. +// +// - Portable ("ease of use") +// - Small source code footprint ("easy to maintain") +// - No dependencies ("ease of use") +// +// =========================================================================== +// +// I/O callbacks +// +// I/O callbacks allow you to read from arbitrary sources, like packaged +// files or some other source. Data read from callbacks are processed +// through a small internal buffer (currently 128 bytes) to try to reduce +// overhead. +// +// The three functions you must define are "read" (reads some bytes of data), +// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end). +// +// =========================================================================== +// +// SIMD support +// +// The JPEG decoder will try to automatically use SIMD kernels on x86 when +// supported by the compiler. For ARM Neon support, you must explicitly +// request it. +// +// (The old do-it-yourself SIMD API is no longer supported in the current +// code.) +// +// On x86, SSE2 will automatically be used when available based on a run-time +// test; if not, the generic C versions are used as a fall-back. On ARM targets, +// the typical path is to have separate builds for NEON and non-NEON devices +// (at least this is true for iOS and Android). Therefore, the NEON support is +// toggled by a build flag: define STBI_NEON to get NEON loops. +// +// If for some reason you do not want to use any of SIMD code, or if +// you have issues compiling it, you can disable it entirely by +// defining STBI_NO_SIMD. +// +// =========================================================================== +// +// HDR image support (disable by defining STBI_NO_HDR) +// +// stb_image now supports loading HDR images in general, and currently +// the Radiance .HDR file format, although the support is provided +// generically. You can still load any file through the existing interface; +// if you attempt to load an HDR file, it will be automatically remapped to +// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// both of these constants can be reconfigured through this interface: +// +// stbi_hdr_to_ldr_gamma(2.2f); +// stbi_hdr_to_ldr_scale(1.0f); +// +// (note, do not use _inverse_ constants; stbi_image will invert them +// appropriately). +// +// Additionally, there is a new, parallel interface for loading files as +// (linear) floats to preserve the full dynamic range: +// +// float *data = stbi_loadf(filename, &x, &y, &n, 0); +// +// If you load LDR images through this interface, those images will +// be promoted to floating point values, run through the inverse of +// constants corresponding to the above: +// +// stbi_ldr_to_hdr_scale(1.0f); +// stbi_ldr_to_hdr_gamma(2.2f); +// +// Finally, given a filename (or an open file or memory block--see header +// file for details) containing image data, you can query for the "most +// appropriate" interface to use (that is, whether the image is HDR or +// not), using: +// +// stbi_is_hdr(char *filename); +// +// =========================================================================== +// +// iPhone PNG support: +// +// By default we convert iphone-formatted PNGs back to RGB, even though +// they are internally encoded differently. You can disable this conversion +// by by calling stbi_convert_iphone_png_to_rgb(0), in which case +// you will always just get the native iphone "format" through (which +// is BGR stored in RGB). +// +// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per +// pixel to remove any premultiplied alpha *only* if the image file explicitly +// says there's premultiplied data (currently only happens in iPhone images, +// and only if iPhone convert-to-rgb processing is on). +// +// =========================================================================== +// +// ADDITIONAL CONFIGURATION +// +// - You can suppress implementation of any of the decoders to reduce +// your code footprint by #defining one or more of the following +// symbols before creating the implementation. +// +// STBI_NO_JPEG +// STBI_NO_PNG +// STBI_NO_BMP +// STBI_NO_PSD +// STBI_NO_TGA +// STBI_NO_GIF +// STBI_NO_HDR +// STBI_NO_PIC +// STBI_NO_PNM (.ppm and .pgm) +// +// - You can request *only* certain decoders and suppress all other ones +// (this will be more forward-compatible, as addition of new decoders +// doesn't require you to disable them explicitly): +// +// STBI_ONLY_JPEG +// STBI_ONLY_PNG +// STBI_ONLY_BMP +// STBI_ONLY_PSD +// STBI_ONLY_TGA +// STBI_ONLY_GIF +// STBI_ONLY_HDR +// STBI_ONLY_PIC +// STBI_ONLY_PNM (.ppm and .pgm) +// +// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still +// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB +// + + +#ifndef STBI_NO_STDIO +#include +#endif // STBI_NO_STDIO + +#define STBI_VERSION 1 + +enum +{ + STBI_default = 0, // only used for desired_channels + + STBI_grey = 1, + STBI_grey_alpha = 2, + STBI_rgb = 3, + STBI_rgb_alpha = 4 +}; + +typedef unsigned char stbi_uc; +typedef unsigned short stbi_us; + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef STB_IMAGE_STATIC +#define STBIDEF static +#else +#define STBIDEF extern +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// PRIMARY API - works on images of any type +// + +// +// load image by filename, open file, or memory buffer +// + +typedef struct +{ + int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read + void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative + int (*eof) (void *user); // returns nonzero if we are at end of file/data +} stbi_io_callbacks; + +//////////////////////////////////// +// +// 8-bits-per-channel interface +// + +STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels); +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +#endif + + +#ifndef STBI_NO_STDIO +STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +// for stbi_load_from_file, file pointer is left pointing immediately after image +#endif + +//////////////////////////////////// +// +// 16-bits-per-channel interface +// + +STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +#endif + +//////////////////////////////////// +// +// float-per-channel interface +// +#ifndef STBI_NO_LINEAR + STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + + #ifndef STBI_NO_STDIO + STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); + #endif +#endif + +#ifndef STBI_NO_HDR + STBIDEF void stbi_hdr_to_ldr_gamma(float gamma); + STBIDEF void stbi_hdr_to_ldr_scale(float scale); +#endif // STBI_NO_HDR + +#ifndef STBI_NO_LINEAR + STBIDEF void stbi_ldr_to_hdr_gamma(float gamma); + STBIDEF void stbi_ldr_to_hdr_scale(float scale); +#endif // STBI_NO_LINEAR + +// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user); +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename); +STBIDEF int stbi_is_hdr_from_file(FILE *f); +#endif // STBI_NO_STDIO + + +// get a VERY brief reason for failure +// NOT THREADSAFE +STBIDEF const char *stbi_failure_reason (void); + +// free the loaded image -- this is just free() +STBIDEF void stbi_image_free (void *retval_from_stbi_load); + +// get image dimensions & components without fully decoding +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len); +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user); + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit (char const *filename); +STBIDEF int stbi_is_16_bit_from_file(FILE *f); +#endif + + + +// for image formats that explicitly notate that they have premultiplied alpha, +// we just return the colors as stored in the file. set this flag to force +// unpremultiplication. results are undefined if the unpremultiply overflow. +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply); + +// indicate whether we should process iphone images back to canonical format, +// or just pass them through "as-is" +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); + +// flip the image vertically, so the first pixel in the output array is the bottom left +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip); + +// ZLIB client - used by PNG, available for other purposes + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header); +STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + + +#ifdef __cplusplus +} +#endif + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBI_INCLUDE_STB_IMAGE_H + +#ifdef STB_IMAGE_IMPLEMENTATION + +#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \ + || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \ + || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \ + || defined(STBI_ONLY_ZLIB) + #ifndef STBI_ONLY_JPEG + #define STBI_NO_JPEG + #endif + #ifndef STBI_ONLY_PNG + #define STBI_NO_PNG + #endif + #ifndef STBI_ONLY_BMP + #define STBI_NO_BMP + #endif + #ifndef STBI_ONLY_PSD + #define STBI_NO_PSD + #endif + #ifndef STBI_ONLY_TGA + #define STBI_NO_TGA + #endif + #ifndef STBI_ONLY_GIF + #define STBI_NO_GIF + #endif + #ifndef STBI_ONLY_HDR + #define STBI_NO_HDR + #endif + #ifndef STBI_ONLY_PIC + #define STBI_NO_PIC + #endif + #ifndef STBI_ONLY_PNM + #define STBI_NO_PNM + #endif +#endif + +#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB) +#define STBI_NO_ZLIB +#endif + + +#include +#include // ptrdiff_t on osx +#include +#include +#include + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +#include // ldexp, pow +#endif + +#ifndef STBI_NO_STDIO +#include +#endif + +#ifndef STBI_ASSERT +#include +#define STBI_ASSERT(x) assert(x) +#endif + + +#ifndef _MSC_VER + #ifdef __cplusplus + #define stbi_inline inline + #else + #define stbi_inline + #endif +#else + #define stbi_inline __forceinline +#endif + + +#ifdef _MSC_VER +typedef unsigned short stbi__uint16; +typedef signed short stbi__int16; +typedef unsigned int stbi__uint32; +typedef signed int stbi__int32; +#else +#include +typedef uint16_t stbi__uint16; +typedef int16_t stbi__int16; +typedef uint32_t stbi__uint32; +typedef int32_t stbi__int32; +#endif + +// should produce compiler error if size is wrong +typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; + +#ifdef _MSC_VER +#define STBI_NOTUSED(v) (void)(v) +#else +#define STBI_NOTUSED(v) (void)sizeof(v) +#endif + +#ifdef _MSC_VER +#define STBI_HAS_LROTL +#endif + +#ifdef STBI_HAS_LROTL + #define stbi_lrot(x,y) _lrotl(x,y) +#else + #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y)))) +#endif + +#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) +// ok +#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)." +#endif + +#ifndef STBI_MALLOC +#define STBI_MALLOC(sz) malloc(sz) +#define STBI_REALLOC(p,newsz) realloc(p,newsz) +#define STBI_FREE(p) free(p) +#endif + +#ifndef STBI_REALLOC_SIZED +#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz) +#endif + +// x86/x64 detection +#if defined(__x86_64__) || defined(_M_X64) +#define STBI__X64_TARGET +#elif defined(__i386) || defined(_M_IX86) +#define STBI__X86_TARGET +#endif + +#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) +// gcc doesn't support sse2 intrinsics unless you compile with -msse2, +// which in turn means it gets to use SSE2 everywhere. This is unfortunate, +// but previous attempts to provide the SSE2 functions with runtime +// detection caused numerous issues. The way architecture extensions are +// exposed in GCC/Clang is, sadly, not really suited for one-file libs. +// New behavior: if compiled with -msse2, we use SSE2 without any +// detection; if not, we don't use it at all. +#define STBI_NO_SIMD +#endif + +#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD) +// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET +// +// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the +// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant. +// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not +// simultaneously enabling "-mstackrealign". +// +// See https://github.com/nothings/stb/issues/81 for more information. +// +// So default to no SSE2 on 32-bit MinGW. If you've read this far and added +// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2. +#define STBI_NO_SIMD +#endif + +#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) +#define STBI_SSE2 +#include + +#ifdef _MSC_VER + +#if _MSC_VER >= 1400 // not VC6 +#include // __cpuid +static int stbi__cpuid3(void) +{ + int info[4]; + __cpuid(info,1); + return info[3]; +} +#else +static int stbi__cpuid3(void) +{ + int res; + __asm { + mov eax,1 + cpuid + mov res,edx + } + return res; +} +#endif + +#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name + +static int stbi__sse2_available(void) +{ + int info3 = stbi__cpuid3(); + return ((info3 >> 26) & 1) != 0; +} +#else // assume GCC-style if not VC++ +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) + +static int stbi__sse2_available(void) +{ + // If we're even attempting to compile this on GCC/Clang, that means + // -msse2 is on, which means the compiler is allowed to use SSE2 + // instructions at will, and so are we. + return 1; +} +#endif +#endif + +// ARM NEON +#if defined(STBI_NO_SIMD) && defined(STBI_NEON) +#undef STBI_NEON +#endif + +#ifdef STBI_NEON +#include +// assume GCC or Clang on ARM targets +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) +#endif + +#ifndef STBI_SIMD_ALIGN +#define STBI_SIMD_ALIGN(type, name) type name +#endif + +/////////////////////////////////////////////// +// +// stbi__context struct and start_xxx functions + +// stbi__context structure is our basic context used by all images, so it +// contains all the IO context, plus some basic image information +typedef struct +{ + stbi__uint32 img_x, img_y; + int img_n, img_out_n; + + stbi_io_callbacks io; + void *io_user_data; + + int read_from_callbacks; + int buflen; + stbi_uc buffer_start[128]; + + stbi_uc *img_buffer, *img_buffer_end; + stbi_uc *img_buffer_original, *img_buffer_original_end; +} stbi__context; + + +static void stbi__refill_buffer(stbi__context *s); + +// initialize a memory-decode context +static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len) +{ + s->io.read = NULL; + s->read_from_callbacks = 0; + s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer; + s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len; +} + +// initialize a callback-based context +static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user) +{ + s->io = *c; + s->io_user_data = user; + s->buflen = sizeof(s->buffer_start); + s->read_from_callbacks = 1; + s->img_buffer_original = s->buffer_start; + stbi__refill_buffer(s); + s->img_buffer_original_end = s->img_buffer_end; +} + +#ifndef STBI_NO_STDIO + +static int stbi__stdio_read(void *user, char *data, int size) +{ + return (int) fread(data,1,size,(FILE*) user); +} + +static void stbi__stdio_skip(void *user, int n) +{ + fseek((FILE*) user, n, SEEK_CUR); +} + +static int stbi__stdio_eof(void *user) +{ + return feof((FILE*) user); +} + +static stbi_io_callbacks stbi__stdio_callbacks = +{ + stbi__stdio_read, + stbi__stdio_skip, + stbi__stdio_eof, +}; + +static void stbi__start_file(stbi__context *s, FILE *f) +{ + stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f); +} + +//static void stop_file(stbi__context *s) { } + +#endif // !STBI_NO_STDIO + +static void stbi__rewind(stbi__context *s) +{ + // conceptually rewind SHOULD rewind to the beginning of the stream, + // but we just rewind to the beginning of the initial buffer, because + // we only use it after doing 'test', which only ever looks at at most 92 bytes + s->img_buffer = s->img_buffer_original; + s->img_buffer_end = s->img_buffer_original_end; +} + +enum +{ + STBI_ORDER_RGB, + STBI_ORDER_BGR +}; + +typedef struct +{ + int bits_per_channel; + int num_channels; + int channel_order; +} stbi__result_info; + +#ifndef STBI_NO_JPEG +static int stbi__jpeg_test(stbi__context *s); +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNG +static int stbi__png_test(stbi__context *s); +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__png_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_BMP +static int stbi__bmp_test(stbi__context *s); +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_TGA +static int stbi__tga_test(stbi__context *s); +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s); +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__psd_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_HDR +static int stbi__hdr_test(stbi__context *s); +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_test(stbi__context *s); +static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_GIF +static int stbi__gif_test(stbi__context *s); +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNM +static int stbi__pnm_test(stbi__context *s); +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +// this is not threadsafe +static const char *stbi__g_failure_reason; + +STBIDEF const char *stbi_failure_reason(void) +{ + return stbi__g_failure_reason; +} + +static int stbi__err(const char *str) +{ + stbi__g_failure_reason = str; + return 0; +} + +static void *stbi__malloc(size_t size) +{ + return STBI_MALLOC(size); +} + +// stb_image uses ints pervasively, including for offset calculations. +// therefore the largest decoded image size we can support with the +// current code, even on 64-bit targets, is INT_MAX. this is not a +// significant limitation for the intended use case. +// +// we do, however, need to make sure our size calculations don't +// overflow. hence a few helper functions for size calculations that +// multiply integers together, making sure that they're non-negative +// and no overflow occurs. + +// return 1 if the sum is valid, 0 on overflow. +// negative terms are considered invalid. +static int stbi__addsizes_valid(int a, int b) +{ + if (b < 0) return 0; + // now 0 <= b <= INT_MAX, hence also + // 0 <= INT_MAX - b <= INTMAX. + // And "a + b <= INT_MAX" (which might overflow) is the + // same as a <= INT_MAX - b (no overflow) + return a <= INT_MAX - b; +} + +// returns 1 if the product is valid, 0 on overflow. +// negative factors are considered invalid. +static int stbi__mul2sizes_valid(int a, int b) +{ + if (a < 0 || b < 0) return 0; + if (b == 0) return 1; // mul-by-0 is always safe + // portable way to check for no overflows in a*b + return a <= INT_MAX/b; +} + +// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow +static int stbi__mad2sizes_valid(int a, int b, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add); +} + +// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow +static int stbi__mad3sizes_valid(int a, int b, int c, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__addsizes_valid(a*b*c, add); +} + +// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); +} +#endif + +// mallocs with size overflow checking +static void *stbi__malloc_mad2(int a, int b, int add) +{ + if (!stbi__mad2sizes_valid(a, b, add)) return NULL; + return stbi__malloc(a*b + add); +} + +static void *stbi__malloc_mad3(int a, int b, int c, int add) +{ + if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL; + return stbi__malloc(a*b*c + add); +} + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static void *stbi__malloc_mad4(int a, int b, int c, int d, int add) +{ + if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL; + return stbi__malloc(a*b*c*d + add); +} +#endif + +// stbi__err - error +// stbi__errpf - error returning pointer to float +// stbi__errpuc - error returning pointer to unsigned char + +#ifdef STBI_NO_FAILURE_STRINGS + #define stbi__err(x,y) 0 +#elif defined(STBI_FAILURE_USERMSG) + #define stbi__err(x,y) stbi__err(y) +#else + #define stbi__err(x,y) stbi__err(x) +#endif + +#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL)) +#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL)) + +STBIDEF void stbi_image_free(void *retval_from_stbi_load) +{ + STBI_FREE(retval_from_stbi_load); +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp); +#endif + +#ifndef STBI_NO_HDR +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp); +#endif + +static int stbi__vertically_flip_on_load = 0; + +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) +{ + stbi__vertically_flip_on_load = flag_true_if_should_flip; +} + +static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields + ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed + ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order + ri->num_channels = 0; + + #ifndef STBI_NO_JPEG + if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNG + if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_BMP + if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_GIF + if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PSD + if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc); + #endif + #ifndef STBI_NO_PIC + if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNM + if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri); + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); + return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + + #ifndef STBI_NO_TGA + // test tga last because it's a crappy test! + if (stbi__tga_test(s)) + return stbi__tga_load(s,x,y,comp,req_comp, ri); + #endif + + return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); +} + +static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi_uc *reduced; + + reduced = (stbi_uc *) stbi__malloc(img_len); + if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling + + STBI_FREE(orig); + return reduced; +} + +static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi__uint16 *enlarged; + + enlarged = (stbi__uint16 *) stbi__malloc(img_len*2); + if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff + + STBI_FREE(orig); + return enlarged; +} + +static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel) +{ + int row; + size_t bytes_per_row = (size_t)w * bytes_per_pixel; + stbi_uc temp[2048]; + stbi_uc *bytes = (stbi_uc *)image; + + for (row = 0; row < (h>>1); row++) { + stbi_uc *row0 = bytes + row*bytes_per_row; + stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; + // swap row0 with row1 + size_t bytes_left = bytes_per_row; + while (bytes_left) { + size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); + memcpy(temp, row0, bytes_copy); + memcpy(row0, row1, bytes_copy); + memcpy(row1, temp, bytes_copy); + row0 += bytes_copy; + row1 += bytes_copy; + bytes_left -= bytes_copy; + } + } +} + +static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel) +{ + int slice; + int slice_size = w * h * bytes_per_pixel; + + stbi_uc *bytes = (stbi_uc *)image; + for (slice = 0; slice < z; ++slice) { + stbi__vertical_flip(bytes, w, h, bytes_per_pixel); + bytes += slice_size; + } +} + +static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); + + if (result == NULL) + return NULL; + + if (ri.bits_per_channel != 8) { + STBI_ASSERT(ri.bits_per_channel == 16); + result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 8; + } + + // @TODO: move stbi__convert_format to here + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); + } + + return (unsigned char *) result; +} + +static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); + + if (result == NULL) + return NULL; + + if (ri.bits_per_channel != 16) { + STBI_ASSERT(ri.bits_per_channel == 8); + result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 16; + } + + // @TODO: move stbi__convert_format16 to here + // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); + } + + return (stbi__uint16 *) result; +} + +#if !defined(STBI_NO_HDR) || !defined(STBI_NO_LINEAR) +static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp) +{ + if (stbi__vertically_flip_on_load && result != NULL) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); + } +} +#endif + +#ifndef STBI_NO_STDIO + +static FILE *stbi__fopen(char const *filename, char const *mode) +{ + FILE *f; +#if defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != fopen_s(&f, filename, mode)) + f=0; +#else + f = fopen(filename, mode); +#endif + return f; +} + + +STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + unsigned char *result; + if (!f) return stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__uint16 *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + stbi__uint16 *result; + if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file_16(f,x,y,comp,req_comp); + fclose(f); + return result; +} + + +#endif //!STBI_NO_STDIO + +STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_mem(&s,buffer,len); + + result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp); + if (stbi__vertically_flip_on_load) { + stbi__vertical_flip_slices( result, *x, *y, *z, *comp ); + } + + return result; +} +#endif + +#ifndef STBI_NO_LINEAR +static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + stbi__result_info ri; + float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); + if (hdr_data) + stbi__float_postprocess(hdr_data,x,y,comp,req_comp); + return hdr_data; + } + #endif + data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp); + if (data) + return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); +} + +STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + float *result; + FILE *f = stbi__fopen(filename, "rb"); + if (!f) return stbi__errpf("can't fopen", "Unable to open file"); + result = stbi_loadf_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_file(&s,f); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} +#endif // !STBI_NO_STDIO + +#endif // !STBI_NO_LINEAR + +// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is +// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always +// reports false! + +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(buffer); + STBI_NOTUSED(len); + return 0; + #endif +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result=0; + if (f) { + result = stbi_is_hdr_from_file(f); + fclose(f); + } + return result; +} + +STBIDEF int stbi_is_hdr_from_file(FILE *f) +{ + #ifndef STBI_NO_HDR + long pos = ftell(f); + int res; + stbi__context s; + stbi__start_file(&s,f); + res = stbi__hdr_test(&s); + fseek(f, pos, SEEK_SET); + return res; + #else + STBI_NOTUSED(f); + return 0; + #endif +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(clbk); + STBI_NOTUSED(user); + return 0; + #endif +} + +#ifndef STBI_NO_LINEAR +static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f; + +STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; } +STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; } +#endif + +static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f; + +STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; } +STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; } + + +////////////////////////////////////////////////////////////////////////////// +// +// Common code used by all image loaders +// + +enum +{ + STBI__SCAN_load=0, + STBI__SCAN_type, + STBI__SCAN_header +}; + +static void stbi__refill_buffer(stbi__context *s) +{ + int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen); + if (n == 0) { + // at end of file, treat same as if from memory, but need to handle case + // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file + s->read_from_callbacks = 0; + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start+1; + *s->img_buffer = 0; + } else { + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start + n; + } +} + +stbi_inline static stbi_uc stbi__get8(stbi__context *s) +{ + if (s->img_buffer < s->img_buffer_end) + return *s->img_buffer++; + if (s->read_from_callbacks) { + stbi__refill_buffer(s); + return *s->img_buffer++; + } + return 0; +} + +stbi_inline static int stbi__at_eof(stbi__context *s) +{ + if (s->io.read) { + if (!(s->io.eof)(s->io_user_data)) return 0; + // if feof() is true, check if buffer = end + // special case: we've only got the special 0 character at the end + if (s->read_from_callbacks == 0) return 1; + } + + return s->img_buffer >= s->img_buffer_end; +} + +static void stbi__skip(stbi__context *s, int n) +{ + if (n < 0) { + s->img_buffer = s->img_buffer_end; + return; + } + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + s->img_buffer = s->img_buffer_end; + (s->io.skip)(s->io_user_data, n - blen); + return; + } + } + s->img_buffer += n; +} + +static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) +{ + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + int res, count; + + memcpy(buffer, s->img_buffer, blen); + + count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen); + res = (count == (n-blen)); + s->img_buffer = s->img_buffer_end; + return res; + } + } + + if (s->img_buffer+n <= s->img_buffer_end) { + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; + return 1; + } else + return 0; +} + +static int stbi__get16be(stbi__context *s) +{ + int z = stbi__get8(s); + return (z << 8) + stbi__get8(s); +} + +static stbi__uint32 stbi__get32be(stbi__context *s) +{ + stbi__uint32 z = stbi__get16be(s); + return (z << 16) + stbi__get16be(s); +} + +#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) +// nothing +#else +static int stbi__get16le(stbi__context *s) +{ + int z = stbi__get8(s); + return z + (stbi__get8(s) << 8); +} +#endif + +#ifndef STBI_NO_BMP +static stbi__uint32 stbi__get32le(stbi__context *s) +{ + stbi__uint32 z = stbi__get16le(s); + return z + (stbi__get16le(s) << 16); +} +#endif + +#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings + + +////////////////////////////////////////////////////////////////////////////// +// +// generic converter from built-in img_n to req_comp +// individual types do this automatically as much as possible (e.g. jpeg +// does all cases internally since it needs to colorspace convert anyway, +// and it never has alpha, so very few cases ). png can automatically +// interleave an alpha=255 channel, but falls back to this for other cases +// +// assume data buffer is malloced, so malloc a new one and free that one +// only failure mode is malloc failing + +static stbi_uc stbi__compute_y(int r, int g, int b) +{ + return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + unsigned char *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0); + if (good == NULL) { + STBI_FREE(data); + return stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; + default: STBI_ASSERT(0); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} + +static stbi__uint16 stbi__compute_y_16(int r, int g, int b) +{ + return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + stbi__uint16 *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2); + if (good == NULL) { + STBI_FREE(data); + return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + stbi__uint16 *src = data + j * x * img_n ; + stbi__uint16 *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; + default: STBI_ASSERT(0); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) +{ + int i,k,n; + float *output; + if (!data) return NULL; + output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); + } + if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f; + } + STBI_FREE(data); + return output; +} +#endif + +#ifndef STBI_NO_HDR +#define stbi__float2int(x) ((int) (x)) +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) +{ + int i,k,n; + stbi_uc *output; + if (!data) return NULL; + output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + if (k < comp) { + float z = data[i*comp+k] * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + } + STBI_FREE(data); + return output; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// "baseline" JPEG/JFIF decoder +// +// simple implementation +// - doesn't support delayed output of y-dimension +// - simple interface (only one output format: 8-bit interleaved RGB) +// - doesn't try to recover corrupt jpegs +// - doesn't allow partial loading, loading multiple at once +// - still fast on x86 (copying globals into locals doesn't help x86) +// - allocates lots of intermediate memory (full size of all components) +// - non-interleaved case requires this anyway +// - allows good upsampling (see next) +// high-quality +// - upsampled channels are bilinearly interpolated, even across blocks +// - quality integer IDCT derived from IJG's 'slow' +// performance +// - fast huffman; reasonable integer IDCT +// - some SIMD kernels for common paths on targets with SSE2/NEON +// - uses a lot of intermediate memory, could cache poorly + +#ifndef STBI_NO_JPEG + +// huffman decoding acceleration +#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache + +typedef struct +{ + stbi_uc fast[1 << FAST_BITS]; + // weirdly, repacking this into AoS is a 10% speed loss, instead of a win + stbi__uint16 code[256]; + stbi_uc values[256]; + stbi_uc size[257]; + unsigned int maxcode[18]; + int delta[17]; // old 'firstsymbol' - old 'firstcode' +} stbi__huffman; + +typedef struct +{ + stbi__context *s; + stbi__huffman huff_dc[4]; + stbi__huffman huff_ac[4]; + stbi__uint16 dequant[4][64]; + stbi__int16 fast_ac[4][1 << FAST_BITS]; + +// sizes for components, interleaved MCUs + int img_h_max, img_v_max; + int img_mcu_x, img_mcu_y; + int img_mcu_w, img_mcu_h; + +// definition of jpeg image component + struct + { + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + stbi_uc *data; + void *raw_data, *raw_coeff; + stbi_uc *linebuf; + short *coeff; // progressive only + int coeff_w, coeff_h; // number of 8x8 coefficient blocks + } img_comp[4]; + + stbi__uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int progressive; + int spec_start; + int spec_end; + int succ_high; + int succ_low; + int eob_run; + int jfif; + int app14_color_transform; // Adobe APP14 tag + int rgb; + + int scan_n, order[4]; + int restart_interval, todo; + +// kernels + void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]); + void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step); + stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs); +} stbi__jpeg; + +static int stbi__build_huffman(stbi__huffman *h, int *count) +{ + int i,j,k=0; + unsigned int code; + // build size list for each symbol (from JPEG spec) + for (i=0; i < 16; ++i) + for (j=0; j < count[i]; ++j) + h->size[k++] = (stbi_uc) (i+1); + h->size[k] = 0; + + // compute actual symbols (from jpeg spec) + code = 0; + k = 0; + for(j=1; j <= 16; ++j) { + // compute delta to add to code to compute symbol id + h->delta[j] = k - code; + if (h->size[k] == j) { + while (h->size[k] == j) + h->code[k++] = (stbi__uint16) (code++); + if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG"); + } + // compute largest code + 1 for this size, preshifted as needed later + h->maxcode[j] = code << (16-j); + code <<= 1; + } + h->maxcode[j] = 0xffffffff; + + // build non-spec acceleration table; 255 is flag for not-accelerated + memset(h->fast, 255, 1 << FAST_BITS); + for (i=0; i < k; ++i) { + int s = h->size[i]; + if (s <= FAST_BITS) { + int c = h->code[i] << (FAST_BITS-s); + int m = 1 << (FAST_BITS-s); + for (j=0; j < m; ++j) { + h->fast[c+j] = (stbi_uc) i; + } + } + } + return 1; +} + +// build a table that decodes both magnitude and value of small ACs in +// one go. +static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) +{ + int i; + for (i=0; i < (1 << FAST_BITS); ++i) { + stbi_uc fast = h->fast[i]; + fast_ac[i] = 0; + if (fast < 255) { + int rs = h->values[fast]; + int run = (rs >> 4) & 15; + int magbits = rs & 15; + int len = h->size[fast]; + + if (magbits && len + magbits <= FAST_BITS) { + // magnitude code followed by receive_extend code + int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); + int m = 1 << (magbits - 1); + if (k < m) k += (~0U << magbits) + 1; + // if the result is small enough, we can fit it in fast_ac table + if (k >= -128 && k <= 127) + fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits)); + } + } + } +} + +static void stbi__grow_buffer_unsafe(stbi__jpeg *j) +{ + do { + unsigned int b = j->nomore ? 0 : stbi__get8(j->s); + if (b == 0xff) { + int c = stbi__get8(j->s); + while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes + if (c != 0) { + j->marker = (unsigned char) c; + j->nomore = 1; + return; + } + } + j->code_buffer |= b << (24 - j->code_bits); + j->code_bits += 8; + } while (j->code_bits <= 24); +} + +// (1 << n) - 1 +static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; + +// decode a jpeg huffman value from the bitstream +stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) +{ + unsigned int temp; + int c,k; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + // look at the top FAST_BITS and determine what symbol ID it is, + // if the code is <= FAST_BITS + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + k = h->fast[c]; + if (k < 255) { + int s = h->size[k]; + if (s > j->code_bits) + return -1; + j->code_buffer <<= s; + j->code_bits -= s; + return h->values[k]; + } + + // naive test is to shift the code_buffer down so k bits are + // valid, then test against maxcode. To speed this up, we've + // preshifted maxcode left so that it has (16-k) 0s at the + // end; in other words, regardless of the number of bits, it + // wants to be compared against something shifted to have 16; + // that way we don't need to shift inside the loop. + temp = j->code_buffer >> 16; + for (k=FAST_BITS+1 ; ; ++k) + if (temp < h->maxcode[k]) + break; + if (k == 17) { + // error! code not found + j->code_bits -= 16; + return -1; + } + + if (k > j->code_bits) + return -1; + + // convert the huffman code to the symbol id + c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; + STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]); + + // convert the id to a symbol + j->code_bits -= k; + j->code_buffer <<= k; + return h->values[c]; +} + +// bias[n] = (-1<code_bits < n) stbi__grow_buffer_unsafe(j); + + sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB + k = stbi_lrot(j->code_buffer, n); + STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask))); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k + (stbi__jbias[n] & ~sgn); +} + +// get some unsigned bits +stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n) +{ + unsigned int k; + if (j->code_bits < n) stbi__grow_buffer_unsafe(j); + k = stbi_lrot(j->code_buffer, n); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k; +} + +stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) +{ + unsigned int k; + if (j->code_bits < 1) stbi__grow_buffer_unsafe(j); + k = j->code_buffer; + j->code_buffer <<= 1; + --j->code_bits; + return k & 0x80000000; +} + +// given a value that's at position X in the zigzag stream, +// where does it appear in the 8x8 matrix coded as row-major? +static const stbi_uc stbi__jpeg_dezigzag[64+15] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // let corrupt input sample past end + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63 +}; + +// decode one 64-entry block-- +static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant) +{ + int diff,dc,k; + int t; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + t = stbi__jpeg_huff_decode(j, hdc); + if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + + // 0 all the ac values now so we can do it 32-bits at a time + memset(data,0,64*sizeof(data[0])); + + diff = t ? stbi__extend_receive(j, t) : 0; + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc * dequant[0]); + + // decode AC components, see JPEG spec + k = 1; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) * dequant[zig]); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (rs != 0xf0) break; // end block + k += 16; + } else { + k += r; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]); + } + } + } while (k < 64); + return 1; +} + +static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b) +{ + int diff,dc; + int t; + if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + if (j->succ_high == 0) { + // first scan for DC coefficient, must be first + memset(data,0,64*sizeof(data[0])); // 0 all the ac values now + t = stbi__jpeg_huff_decode(j, hdc); + diff = t ? stbi__extend_receive(j, t) : 0; + + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc << j->succ_low); + } else { + // refinement scan for DC coefficient + if (stbi__jpeg_get_bit(j)) + data[0] += (short) (1 << j->succ_low); + } + return 1; +} + +// @OPTIMIZE: store non-zigzagged during the decode passes, +// and only de-zigzag when dequantizing +static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac) +{ + int k; + if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->succ_high == 0) { + int shift = j->succ_low; + + if (j->eob_run) { + --j->eob_run; + return 1; + } + + k = j->spec_start; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) << shift); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r); + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + --j->eob_run; + break; + } + k += 16; + } else { + k += r; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) << shift); + } + } + } while (k <= j->spec_end); + } else { + // refinement scan for these AC coefficients + + short bit = (short) (1 << j->succ_low); + + if (j->eob_run) { + --j->eob_run; + for (k = j->spec_start; k <= j->spec_end; ++k) { + short *p = &data[stbi__jpeg_dezigzag[k]]; + if (*p != 0) + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } + } else { + k = j->spec_start; + do { + int r,s; + int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r) - 1; + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + r = 64; // force end of block + } else { + // r=15 s=0 should write 16 0s, so we just do + // a run of 15 0s and then write s (which is 0), + // so we don't have to do anything special here + } + } else { + if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG"); + // sign bit + if (stbi__jpeg_get_bit(j)) + s = bit; + else + s = -bit; + } + + // advance by r + while (k <= j->spec_end) { + short *p = &data[stbi__jpeg_dezigzag[k++]]; + if (*p != 0) { + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } else { + if (r == 0) { + *p = (short) s; + break; + } + --r; + } + } + } while (k <= j->spec_end); + } + } + return 1; +} + +// take a -128..127 value and stbi__clamp it and convert to 0..255 +stbi_inline static stbi_uc stbi__clamp(int x) +{ + // trick to use a single test to catch both cases + if ((unsigned int) x > 255) { + if (x < 0) return 0; + if (x > 255) return 255; + } + return (stbi_uc) x; +} + +#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5))) +#define stbi__fsh(x) ((x) * 4096) + +// derived from jidctint -- DCT_ISLOW +#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ + int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2+p3) * stbi__f2f(0.5411961f); \ + t2 = p1 + p3*stbi__f2f(-1.847759065f); \ + t3 = p1 + p2*stbi__f2f( 0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = stbi__fsh(p2+p3); \ + t1 = stbi__fsh(p2-p3); \ + x0 = t0+t3; \ + x3 = t0-t3; \ + x1 = t1+t2; \ + x2 = t1-t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0+t2; \ + p4 = t1+t3; \ + p1 = t0+t3; \ + p2 = t1+t2; \ + p5 = (p3+p4)*stbi__f2f( 1.175875602f); \ + t0 = t0*stbi__f2f( 0.298631336f); \ + t1 = t1*stbi__f2f( 2.053119869f); \ + t2 = t2*stbi__f2f( 3.072711026f); \ + t3 = t3*stbi__f2f( 1.501321110f); \ + p1 = p5 + p1*stbi__f2f(-0.899976223f); \ + p2 = p5 + p2*stbi__f2f(-2.562915447f); \ + p3 = p3*stbi__f2f(-1.961570560f); \ + p4 = p4*stbi__f2f(-0.390180644f); \ + t3 += p1+p4; \ + t2 += p2+p3; \ + t1 += p2+p4; \ + t0 += p1+p3; + +static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) +{ + int i,val[64],*v=val; + stbi_uc *o; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0]*4; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + x0 += 65536 + (128<<17); + x1 += 65536 + (128<<17); + x2 += 65536 + (128<<17); + x3 += 65536 + (128<<17); + // tried computing the shifts into temps, or'ing the temps to see + // if any were out of range, but that was slower + o[0] = stbi__clamp((x0+t3) >> 17); + o[7] = stbi__clamp((x0-t3) >> 17); + o[1] = stbi__clamp((x1+t2) >> 17); + o[6] = stbi__clamp((x1-t2) >> 17); + o[2] = stbi__clamp((x2+t1) >> 17); + o[5] = stbi__clamp((x2-t1) >> 17); + o[3] = stbi__clamp((x3+t0) >> 17); + o[4] = stbi__clamp((x3-t0) >> 17); + } +} + +#ifdef STBI_SSE2 +// sse2 integer IDCT. not the fastest possible implementation but it +// produces bit-identical results to the generic C version so it's +// fully "transparent". +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + // This is constructed to match our regular (generic) integer IDCT exactly. + __m128i row0, row1, row2, row3, row4, row5, row6, row7; + __m128i tmp; + + // dot product constant: even elems=x, odd elems=y + #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y)) + + // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit) + // out(1) = c1[even]*x + c1[odd]*y + #define dct_rot(out0,out1, x,y,c0,c1) \ + __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \ + __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \ + __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ + __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ + __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ + __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) + + // out = in << 12 (in 16-bit, out 32-bit) + #define dct_widen(out, in) \ + __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ + __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) + + // wide add + #define dct_wadd(out, a, b) \ + __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_add_epi32(a##_h, b##_h) + + // wide sub + #define dct_wsub(out, a, b) \ + __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) + + // butterfly a/b, add bias, then shift by "s" and pack + #define dct_bfly32o(out0, out1, a,b,bias,s) \ + { \ + __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ + __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ + dct_wadd(sum, abiased, b); \ + dct_wsub(dif, abiased, b); \ + out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ + out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ + } + + // 8-bit interleave step (for transposes) + #define dct_interleave8(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi8(a, b); \ + b = _mm_unpackhi_epi8(tmp, b) + + // 16-bit interleave step (for transposes) + #define dct_interleave16(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi16(a, b); \ + b = _mm_unpackhi_epi16(tmp, b) + + #define dct_pass(bias,shift) \ + { \ + /* even part */ \ + dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \ + __m128i sum04 = _mm_add_epi16(row0, row4); \ + __m128i dif04 = _mm_sub_epi16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ + dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \ + __m128i sum17 = _mm_add_epi16(row1, row7); \ + __m128i sum35 = _mm_add_epi16(row3, row5); \ + dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \ + dct_wadd(x4, y0o, y4o); \ + dct_wadd(x5, y1o, y5o); \ + dct_wadd(x6, y2o, y5o); \ + dct_wadd(x7, y3o, y4o); \ + dct_bfly32o(row0,row7, x0,x7,bias,shift); \ + dct_bfly32o(row1,row6, x1,x6,bias,shift); \ + dct_bfly32o(row2,row5, x2,x5,bias,shift); \ + dct_bfly32o(row3,row4, x3,x4,bias,shift); \ + } + + __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); + __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f)); + __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f)); + __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f)); + __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f)); + __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f)); + __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f)); + __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f)); + + // rounding biases in column/row passes, see stbi__idct_block for explanation. + __m128i bias_0 = _mm_set1_epi32(512); + __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17)); + + // load + row0 = _mm_load_si128((const __m128i *) (data + 0*8)); + row1 = _mm_load_si128((const __m128i *) (data + 1*8)); + row2 = _mm_load_si128((const __m128i *) (data + 2*8)); + row3 = _mm_load_si128((const __m128i *) (data + 3*8)); + row4 = _mm_load_si128((const __m128i *) (data + 4*8)); + row5 = _mm_load_si128((const __m128i *) (data + 5*8)); + row6 = _mm_load_si128((const __m128i *) (data + 6*8)); + row7 = _mm_load_si128((const __m128i *) (data + 7*8)); + + // column pass + dct_pass(bias_0, 10); + + { + // 16bit 8x8 transpose pass 1 + dct_interleave16(row0, row4); + dct_interleave16(row1, row5); + dct_interleave16(row2, row6); + dct_interleave16(row3, row7); + + // transpose pass 2 + dct_interleave16(row0, row2); + dct_interleave16(row1, row3); + dct_interleave16(row4, row6); + dct_interleave16(row5, row7); + + // transpose pass 3 + dct_interleave16(row0, row1); + dct_interleave16(row2, row3); + dct_interleave16(row4, row5); + dct_interleave16(row6, row7); + } + + // row pass + dct_pass(bias_1, 17); + + { + // pack + __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 + __m128i p1 = _mm_packus_epi16(row2, row3); + __m128i p2 = _mm_packus_epi16(row4, row5); + __m128i p3 = _mm_packus_epi16(row6, row7); + + // 8bit 8x8 transpose pass 1 + dct_interleave8(p0, p2); // a0e0a1e1... + dct_interleave8(p1, p3); // c0g0c1g1... + + // transpose pass 2 + dct_interleave8(p0, p1); // a0c0e0g0... + dct_interleave8(p2, p3); // b0d0f0h0... + + // transpose pass 3 + dct_interleave8(p0, p2); // a0b0c0d0... + dct_interleave8(p1, p3); // a4b4c4d4... + + // store + _mm_storel_epi64((__m128i *) out, p0); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p2); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p1); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p3); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e)); + } + +#undef dct_const +#undef dct_rot +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_interleave8 +#undef dct_interleave16 +#undef dct_pass +} + +#endif // STBI_SSE2 + +#ifdef STBI_NEON + +// NEON integer IDCT. should produce bit-identical +// results to the generic C version. +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + int16x8_t row0, row1, row2, row3, row4, row5, row6, row7; + + int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f)); + int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f)); + int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f)); + int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f)); + int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f)); + int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f)); + int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f)); + int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f)); + int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f)); + int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f)); + int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f)); + int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f)); + +#define dct_long_mul(out, inq, coeff) \ + int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff) + +#define dct_long_mac(out, acc, inq, coeff) \ + int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff) + +#define dct_widen(out, inq) \ + int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \ + int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12) + +// wide add +#define dct_wadd(out, a, b) \ + int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vaddq_s32(a##_h, b##_h) + +// wide sub +#define dct_wsub(out, a, b) \ + int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vsubq_s32(a##_h, b##_h) + +// butterfly a/b, then shift using "shiftop" by "s" and pack +#define dct_bfly32o(out0,out1, a,b,shiftop,s) \ + { \ + dct_wadd(sum, a, b); \ + dct_wsub(dif, a, b); \ + out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ + out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ + } + +#define dct_pass(shiftop, shift) \ + { \ + /* even part */ \ + int16x8_t sum26 = vaddq_s16(row2, row6); \ + dct_long_mul(p1e, sum26, rot0_0); \ + dct_long_mac(t2e, p1e, row6, rot0_1); \ + dct_long_mac(t3e, p1e, row2, rot0_2); \ + int16x8_t sum04 = vaddq_s16(row0, row4); \ + int16x8_t dif04 = vsubq_s16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + int16x8_t sum15 = vaddq_s16(row1, row5); \ + int16x8_t sum17 = vaddq_s16(row1, row7); \ + int16x8_t sum35 = vaddq_s16(row3, row5); \ + int16x8_t sum37 = vaddq_s16(row3, row7); \ + int16x8_t sumodd = vaddq_s16(sum17, sum35); \ + dct_long_mul(p5o, sumodd, rot1_0); \ + dct_long_mac(p1o, p5o, sum17, rot1_1); \ + dct_long_mac(p2o, p5o, sum35, rot1_2); \ + dct_long_mul(p3o, sum37, rot2_0); \ + dct_long_mul(p4o, sum15, rot2_1); \ + dct_wadd(sump13o, p1o, p3o); \ + dct_wadd(sump24o, p2o, p4o); \ + dct_wadd(sump23o, p2o, p3o); \ + dct_wadd(sump14o, p1o, p4o); \ + dct_long_mac(x4, sump13o, row7, rot3_0); \ + dct_long_mac(x5, sump24o, row5, rot3_1); \ + dct_long_mac(x6, sump23o, row3, rot3_2); \ + dct_long_mac(x7, sump14o, row1, rot3_3); \ + dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \ + dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \ + dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \ + dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \ + } + + // load + row0 = vld1q_s16(data + 0*8); + row1 = vld1q_s16(data + 1*8); + row2 = vld1q_s16(data + 2*8); + row3 = vld1q_s16(data + 3*8); + row4 = vld1q_s16(data + 4*8); + row5 = vld1q_s16(data + 5*8); + row6 = vld1q_s16(data + 6*8); + row7 = vld1q_s16(data + 7*8); + + // add DC bias + row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0)); + + // column pass + dct_pass(vrshrn_n_s32, 10); + + // 16bit 8x8 transpose + { +// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively. +// whether compilers actually get this is another story, sadly. +#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); } +#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); } + + // pass 1 + dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 + dct_trn16(row2, row3); + dct_trn16(row4, row5); + dct_trn16(row6, row7); + + // pass 2 + dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 + dct_trn32(row1, row3); + dct_trn32(row4, row6); + dct_trn32(row5, row7); + + // pass 3 + dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 + dct_trn64(row1, row5); + dct_trn64(row2, row6); + dct_trn64(row3, row7); + +#undef dct_trn16 +#undef dct_trn32 +#undef dct_trn64 + } + + // row pass + // vrshrn_n_s32 only supports shifts up to 16, we need + // 17. so do a non-rounding shift of 16 first then follow + // up with a rounding shift by 1. + dct_pass(vshrn_n_s32, 16); + + { + // pack and round + uint8x8_t p0 = vqrshrun_n_s16(row0, 1); + uint8x8_t p1 = vqrshrun_n_s16(row1, 1); + uint8x8_t p2 = vqrshrun_n_s16(row2, 1); + uint8x8_t p3 = vqrshrun_n_s16(row3, 1); + uint8x8_t p4 = vqrshrun_n_s16(row4, 1); + uint8x8_t p5 = vqrshrun_n_s16(row5, 1); + uint8x8_t p6 = vqrshrun_n_s16(row6, 1); + uint8x8_t p7 = vqrshrun_n_s16(row7, 1); + + // again, these can translate into one instruction, but often don't. +#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); } +#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); } + + // sadly can't use interleaved stores here since we only write + // 8 bytes to each scan line! + + // 8x8 8-bit transpose pass 1 + dct_trn8_8(p0, p1); + dct_trn8_8(p2, p3); + dct_trn8_8(p4, p5); + dct_trn8_8(p6, p7); + + // pass 2 + dct_trn8_16(p0, p2); + dct_trn8_16(p1, p3); + dct_trn8_16(p4, p6); + dct_trn8_16(p5, p7); + + // pass 3 + dct_trn8_32(p0, p4); + dct_trn8_32(p1, p5); + dct_trn8_32(p2, p6); + dct_trn8_32(p3, p7); + + // store + vst1_u8(out, p0); out += out_stride; + vst1_u8(out, p1); out += out_stride; + vst1_u8(out, p2); out += out_stride; + vst1_u8(out, p3); out += out_stride; + vst1_u8(out, p4); out += out_stride; + vst1_u8(out, p5); out += out_stride; + vst1_u8(out, p6); out += out_stride; + vst1_u8(out, p7); + +#undef dct_trn8_8 +#undef dct_trn8_16 +#undef dct_trn8_32 + } + +#undef dct_long_mul +#undef dct_long_mac +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_pass +} + +#endif // STBI_NEON + +#define STBI__MARKER_none 0xff +// if there's a pending marker from the entropy stream, return that +// otherwise, fetch from the stream and get a marker. if there's no +// marker, return 0xff, which is never a valid marker value +static stbi_uc stbi__get_marker(stbi__jpeg *j) +{ + stbi_uc x; + if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; } + x = stbi__get8(j->s); + if (x != 0xff) return STBI__MARKER_none; + while (x == 0xff) + x = stbi__get8(j->s); // consume repeated 0xff fill bytes + return x; +} + +// in each scan, we'll have scan_n components, and the order +// of the components is specified by order[] +#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) + +// after a restart interval, stbi__jpeg_reset the entropy decoder and +// the dc prediction +static void stbi__jpeg_reset(stbi__jpeg *j) +{ + j->code_bits = 0; + j->code_buffer = 0; + j->nomore = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0; + j->marker = STBI__MARKER_none; + j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; + j->eob_run = 0; + // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, + // since we don't even allow 1<<30 pixels +} + +static int stbi__parse_entropy_coded_data(stbi__jpeg *z) +{ + stbi__jpeg_reset(z); + if (!z->progressive) { + if (z->scan_n == 1) { + int i,j; + STBI_SIMD_ALIGN(short, data[64]); + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + STBI_SIMD_ALIGN(short, data[64]); + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data); + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } else { + if (z->scan_n == 1) { + int i,j; + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + if (z->spec_start == 0) { + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } else { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha])) + return 0; + } + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x); + int y2 = (j*z->img_comp[n].v + y); + short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w); + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } +} + +static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant) +{ + int i; + for (i=0; i < 64; ++i) + data[i] *= dequant[i]; +} + +static void stbi__jpeg_finish(stbi__jpeg *z) +{ + if (z->progressive) { + // dequantize and idct the data + int i,j,n; + for (n=0; n < z->s->img_n; ++n) { + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + } + } + } + } +} + +static int stbi__process_marker(stbi__jpeg *z, int m) +{ + int L; + switch (m) { + case STBI__MARKER_none: // no marker found + return stbi__err("expected marker","Corrupt JPEG"); + + case 0xDD: // DRI - specify restart interval + if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG"); + z->restart_interval = stbi__get16be(z->s); + return 1; + + case 0xDB: // DQT - define quantization table + L = stbi__get16be(z->s)-2; + while (L > 0) { + int q = stbi__get8(z->s); + int p = q >> 4, sixteen = (p != 0); + int t = q & 15,i; + if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); + if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); + + for (i=0; i < 64; ++i) + z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); + L -= (sixteen ? 129 : 65); + } + return L==0; + + case 0xC4: // DHT - define huffman table + L = stbi__get16be(z->s)-2; + while (L > 0) { + stbi_uc *v; + int sizes[16],i,n=0; + int q = stbi__get8(z->s); + int tc = q >> 4; + int th = q & 15; + if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG"); + for (i=0; i < 16; ++i) { + sizes[i] = stbi__get8(z->s); + n += sizes[i]; + } + L -= 17; + if (tc == 0) { + if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; + } else { + if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; + } + for (i=0; i < n; ++i) + v[i] = stbi__get8(z->s); + if (tc != 0) + stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th); + L -= n; + } + return L==0; + } + + // check for comment block or APP blocks + if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { + L = stbi__get16be(z->s); + if (L < 2) { + if (m == 0xFE) + return stbi__err("bad COM len","Corrupt JPEG"); + else + return stbi__err("bad APP len","Corrupt JPEG"); + } + L -= 2; + + if (m == 0xE0 && L >= 5) { // JFIF APP0 segment + static const unsigned char tag[5] = {'J','F','I','F','\0'}; + int ok = 1; + int i; + for (i=0; i < 5; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 5; + if (ok) + z->jfif = 1; + } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment + static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; + int ok = 1; + int i; + for (i=0; i < 6; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 6; + if (ok) { + stbi__get8(z->s); // version + stbi__get16be(z->s); // flags0 + stbi__get16be(z->s); // flags1 + z->app14_color_transform = stbi__get8(z->s); // color transform + L -= 6; + } + } + + stbi__skip(z->s, L); + return 1; + } + + return stbi__err("unknown marker","Corrupt JPEG"); +} + +// after we see SOS +static int stbi__process_scan_header(stbi__jpeg *z) +{ + int i; + int Ls = stbi__get16be(z->s); + z->scan_n = stbi__get8(z->s); + if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG"); + if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG"); + for (i=0; i < z->scan_n; ++i) { + int id = stbi__get8(z->s), which; + int q = stbi__get8(z->s); + for (which = 0; which < z->s->img_n; ++which) + if (z->img_comp[which].id == id) + break; + if (which == z->s->img_n) return 0; // no match + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG"); + z->order[i] = which; + } + + { + int aa; + z->spec_start = stbi__get8(z->s); + z->spec_end = stbi__get8(z->s); // should be 63, but might be 0 + aa = stbi__get8(z->s); + z->succ_high = (aa >> 4); + z->succ_low = (aa & 15); + if (z->progressive) { + if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13) + return stbi__err("bad SOS", "Corrupt JPEG"); + } else { + if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG"); + if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG"); + z->spec_end = 63; + } + } + + return 1; +} + +static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why) +{ + int i; + for (i=0; i < ncomp; ++i) { + if (z->img_comp[i].raw_data) { + STBI_FREE(z->img_comp[i].raw_data); + z->img_comp[i].raw_data = NULL; + z->img_comp[i].data = NULL; + } + if (z->img_comp[i].raw_coeff) { + STBI_FREE(z->img_comp[i].raw_coeff); + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].coeff = 0; + } + if (z->img_comp[i].linebuf) { + STBI_FREE(z->img_comp[i].linebuf); + z->img_comp[i].linebuf = NULL; + } + } + return why; +} + +static int stbi__process_frame_header(stbi__jpeg *z, int scan) +{ + stbi__context *s = z->s; + int Lf,p,i,q, h_max=1,v_max=1,c; + Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG + p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires + c = stbi__get8(s); + if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG"); + s->img_n = c; + for (i=0; i < c; ++i) { + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; + } + + if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG"); + + z->rgb = 0; + for (i=0; i < s->img_n; ++i) { + static const unsigned char rgb[3] = { 'R', 'G', 'B' }; + z->img_comp[i].id = stbi__get8(s); + if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) + ++z->rgb; + q = stbi__get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); + z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG"); + } + + if (scan != STBI__SCAN_load) return 1; + + if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode"); + + for (i=0; i < s->img_n; ++i) { + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; + } + + // compute interleaved mcu info + z->img_h_max = h_max; + z->img_v_max = v_max; + z->img_mcu_w = h_max * 8; + z->img_mcu_h = v_max * 8; + // these sizes can't be more than 17 bits + z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; + z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; + + for (i=0; i < s->img_n; ++i) { + // number of effective pixels (e.g. for non-interleaved MCU) + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; + // to simplify generation, we'll allocate enough memory to decode + // the bogus oversized data from using interleaved MCUs and their + // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't + // discard the extra data until colorspace conversion + // + // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) + // so these muls can't overflow with 32-bit ints (which we require) + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].coeff = 0; + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].linebuf = NULL; + z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); + if (z->img_comp[i].raw_data == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + // align blocks for idct using mmx/sse + z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + if (z->progressive) { + // w2, h2 are multiples of 8 (see above) + z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; + z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; + z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); + if (z->img_comp[i].raw_coeff == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); + } + } + + return 1; +} + +// use comparisons since in some cases we handle more than one case (e.g. SOF) +#define stbi__DNL(x) ((x) == 0xdc) +#define stbi__SOI(x) ((x) == 0xd8) +#define stbi__EOI(x) ((x) == 0xd9) +#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2) +#define stbi__SOS(x) ((x) == 0xda) + +#define stbi__SOF_progressive(x) ((x) == 0xc2) + +static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) +{ + int m; + z->jfif = 0; + z->app14_color_transform = -1; // valid values are 0,1,2 + z->marker = STBI__MARKER_none; // initialize cached marker to empty + m = stbi__get_marker(z); + if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG"); + if (scan == STBI__SCAN_type) return 1; + m = stbi__get_marker(z); + while (!stbi__SOF(m)) { + if (!stbi__process_marker(z,m)) return 0; + m = stbi__get_marker(z); + while (m == STBI__MARKER_none) { + // some files have extra padding after their blocks, so ok, we'll scan + if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG"); + m = stbi__get_marker(z); + } + } + z->progressive = stbi__SOF_progressive(m); + if (!stbi__process_frame_header(z, scan)) return 0; + return 1; +} + +// decode image to YCbCr format +static int stbi__decode_jpeg_image(stbi__jpeg *j) +{ + int m; + for (m = 0; m < 4; m++) { + j->img_comp[m].raw_data = NULL; + j->img_comp[m].raw_coeff = NULL; + } + j->restart_interval = 0; + if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0; + m = stbi__get_marker(j); + while (!stbi__EOI(m)) { + if (stbi__SOS(m)) { + if (!stbi__process_scan_header(j)) return 0; + if (!stbi__parse_entropy_coded_data(j)) return 0; + if (j->marker == STBI__MARKER_none ) { + // handle 0s at the end of image data from IP Kamera 9060 + while (!stbi__at_eof(j->s)) { + int x = stbi__get8(j->s); + if (x == 255) { + j->marker = stbi__get8(j->s); + break; + } + } + // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 + } + } else if (stbi__DNL(m)) { + int Ld = stbi__get16be(j->s); + stbi__uint32 NL = stbi__get16be(j->s); + if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); + if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); + } else { + if (!stbi__process_marker(j, m)) return 0; + } + m = stbi__get_marker(j); + } + if (j->progressive) + stbi__jpeg_finish(j); + return 1; +} + +// static jfif-centered resampling (across block boundaries) + +typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1, + int w, int hs); + +#define stbi__div4(x) ((stbi_uc) ((x) >> 2)) + +static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + STBI_NOTUSED(out); + STBI_NOTUSED(in_far); + STBI_NOTUSED(w); + STBI_NOTUSED(hs); + return in_near; +} + +static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples vertically for every one in input + int i; + STBI_NOTUSED(hs); + for (i=0; i < w; ++i) + out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2); + return out; +} + +static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples horizontally for every one in input + int i; + stbi_uc *input = in_near; + + if (w == 1) { + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; + } + + out[0] = input[0]; + out[1] = stbi__div4(input[0]*3 + input[1] + 2); + for (i=1; i < w-1; ++i) { + int n = 3*input[i]+2; + out[i*2+0] = stbi__div4(n+input[i-1]); + out[i*2+1] = stbi__div4(n+input[i+1]); + } + out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2); + out[i*2+1] = input[w-1]; + + STBI_NOTUSED(in_far); + STBI_NOTUSED(hs); + + return out; +} + +#define stbi__div16(x) ((stbi_uc) ((x) >> 4)) + +static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i,t0,t1; + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + out[0] = stbi__div4(t1+2); + for (i=1; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i=0,t0,t1; + + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + // process groups of 8 pixels for as long as we can. + // note we can't handle the last pixel in a row in this loop + // because we need to handle the filter boundary conditions. + for (; i < ((w-1) & ~7); i += 8) { +#if defined(STBI_SSE2) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + __m128i zero = _mm_setzero_si128(); + __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i)); + __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i)); + __m128i farw = _mm_unpacklo_epi8(farb, zero); + __m128i nearw = _mm_unpacklo_epi8(nearb, zero); + __m128i diff = _mm_sub_epi16(farw, nearw); + __m128i nears = _mm_slli_epi16(nearw, 2); + __m128i curr = _mm_add_epi16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + __m128i prv0 = _mm_slli_si128(curr, 2); + __m128i nxt0 = _mm_srli_si128(curr, 2); + __m128i prev = _mm_insert_epi16(prv0, t1, 0); + __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + __m128i bias = _mm_set1_epi16(8); + __m128i curs = _mm_slli_epi16(curr, 2); + __m128i prvd = _mm_sub_epi16(prev, curr); + __m128i nxtd = _mm_sub_epi16(next, curr); + __m128i curb = _mm_add_epi16(curs, bias); + __m128i even = _mm_add_epi16(prvd, curb); + __m128i odd = _mm_add_epi16(nxtd, curb); + + // interleave even and odd pixels, then undo scaling. + __m128i int0 = _mm_unpacklo_epi16(even, odd); + __m128i int1 = _mm_unpackhi_epi16(even, odd); + __m128i de0 = _mm_srli_epi16(int0, 4); + __m128i de1 = _mm_srli_epi16(int1, 4); + + // pack and write output + __m128i outv = _mm_packus_epi16(de0, de1); + _mm_storeu_si128((__m128i *) (out + i*2), outv); +#elif defined(STBI_NEON) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + uint8x8_t farb = vld1_u8(in_far + i); + uint8x8_t nearb = vld1_u8(in_near + i); + int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb)); + int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2)); + int16x8_t curr = vaddq_s16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + int16x8_t prv0 = vextq_s16(curr, curr, 7); + int16x8_t nxt0 = vextq_s16(curr, curr, 1); + int16x8_t prev = vsetq_lane_s16(t1, prv0, 0); + int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + int16x8_t curs = vshlq_n_s16(curr, 2); + int16x8_t prvd = vsubq_s16(prev, curr); + int16x8_t nxtd = vsubq_s16(next, curr); + int16x8_t even = vaddq_s16(curs, prvd); + int16x8_t odd = vaddq_s16(curs, nxtd); + + // undo scaling and round, then store with even/odd phases interleaved + uint8x8x2_t o; + o.val[0] = vqrshrun_n_s16(even, 4); + o.val[1] = vqrshrun_n_s16(odd, 4); + vst2_u8(out + i*2, o); +#endif + + // "previous" value for next iter + t1 = 3*in_near[i+7] + in_far[i+7]; + } + + t0 = t1; + t1 = 3*in_near[i] + in_far[i]; + out[i*2] = stbi__div16(3*t1 + t0 + 8); + + for (++i; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} +#endif + +static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // resample with nearest-neighbor + int i,j; + STBI_NOTUSED(in_far); + for (i=0; i < w; ++i) + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; + return out; +} + +// this is a reduced-precision calculation of YCbCr-to-RGB introduced +// to make sure the code produces the same results in both SIMD and scalar +#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8) +static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step) +{ + int i; + for (i=0; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step) +{ + int i = 0; + +#ifdef STBI_SSE2 + // step == 3 is pretty ugly on the final interleave, and i'm not convinced + // it's useful in practice (you wouldn't use it for textures, for example). + // so just accelerate step == 4 case. + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + __m128i signflip = _mm_set1_epi8(-0x80); + __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f)); + __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); + __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); + __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f)); + __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128); + __m128i xw = _mm_set1_epi16(255); // alpha channel + + for (; i+7 < count; i += 8) { + // load + __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i)); + __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i)); + __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i)); + __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 + __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 + + // unpack to short (and left-shift cr, cb by 8) + __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes); + __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); + __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); + + // color transform + __m128i yws = _mm_srli_epi16(yw, 4); + __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); + __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); + __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); + __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); + __m128i rws = _mm_add_epi16(cr0, yws); + __m128i gwt = _mm_add_epi16(cb0, yws); + __m128i bws = _mm_add_epi16(yws, cb1); + __m128i gws = _mm_add_epi16(gwt, cr1); + + // descale + __m128i rw = _mm_srai_epi16(rws, 4); + __m128i bw = _mm_srai_epi16(bws, 4); + __m128i gw = _mm_srai_epi16(gws, 4); + + // back to byte, set up for transpose + __m128i brb = _mm_packus_epi16(rw, bw); + __m128i gxb = _mm_packus_epi16(gw, xw); + + // transpose to interleave channels + __m128i t0 = _mm_unpacklo_epi8(brb, gxb); + __m128i t1 = _mm_unpackhi_epi8(brb, gxb); + __m128i o0 = _mm_unpacklo_epi16(t0, t1); + __m128i o1 = _mm_unpackhi_epi16(t0, t1); + + // store + _mm_storeu_si128((__m128i *) (out + 0), o0); + _mm_storeu_si128((__m128i *) (out + 16), o1); + out += 32; + } + } +#endif + +#ifdef STBI_NEON + // in this version, step=3 support would be easy to add. but is there demand? + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + uint8x8_t signflip = vdup_n_u8(0x80); + int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f)); + int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f)); + int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f)); + int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f)); + + for (; i+7 < count; i += 8) { + // load + uint8x8_t y_bytes = vld1_u8(y + i); + uint8x8_t cr_bytes = vld1_u8(pcr + i); + uint8x8_t cb_bytes = vld1_u8(pcb + i); + int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); + int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); + + // expand to s16 + int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); + int16x8_t crw = vshll_n_s8(cr_biased, 7); + int16x8_t cbw = vshll_n_s8(cb_biased, 7); + + // color transform + int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); + int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); + int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); + int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); + int16x8_t rws = vaddq_s16(yws, cr0); + int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); + int16x8_t bws = vaddq_s16(yws, cb1); + + // undo scaling, round, convert to byte + uint8x8x4_t o; + o.val[0] = vqrshrun_n_s16(rws, 4); + o.val[1] = vqrshrun_n_s16(gws, 4); + o.val[2] = vqrshrun_n_s16(bws, 4); + o.val[3] = vdup_n_u8(255); + + // store, interleaving r/g/b/a + vst4_u8(out, o); + out += 8*4; + } + } +#endif + + for (; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} +#endif + +// set up the kernels +static void stbi__setup_jpeg(stbi__jpeg *j) +{ + j->idct_block_kernel = stbi__idct_block; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2; + +#ifdef STBI_SSE2 + if (stbi__sse2_available()) { + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; + } +#endif + +#ifdef STBI_NEON + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; +#endif +} + +// clean up the temporary component buffers +static void stbi__cleanup_jpeg(stbi__jpeg *j) +{ + stbi__free_jpeg_components(j, j->s->img_n, 0); +} + +typedef struct +{ + resample_row_func resample; + stbi_uc *line0,*line1; + int hs,vs; // expansion factor in each axis + int w_lores; // horizontal pixels pre-expansion + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on +} stbi__resample; + +// fast 0..255 * 0..255 => 0..255 rounded multiplication +static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) +{ + unsigned int t = x*y + 128; + return (stbi_uc) ((t + (t >>8)) >> 8); +} + +static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ + int n, decode_n, is_rgb; + z->s->img_n = 0; // make stbi__cleanup_jpeg safe + + // validate req_comp + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + + // load a jpeg image from whichever source, but leave in YCbCr format + if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; } + + // determine actual number of components to generate + n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1; + + is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif)); + + if (z->s->img_n == 3 && n < 3 && !is_rgb) + decode_n = 1; + else + decode_n = z->s->img_n; + + // resample and color-convert + { + int k; + unsigned int i,j; + stbi_uc *output; + stbi_uc *coutput[4]; + + stbi__resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3); + if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s->img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel; + else r->resample = stbi__resample_row_generic; + } + + // can't error after this so, this is safe + output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); + if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s->img_y; ++j) { + stbi_uc *out = output + n * z->s->img_x * j; + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; + } + } + if (n >= 3) { + stbi_uc *y = coutput[0]; + if (z->s->img_n == 3) { + if (is_rgb) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = y[i]; + out[1] = coutput[1][i]; + out[2] = coutput[2][i]; + out[3] = 255; + out += n; + } + } else { + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else if (z->s->img_n == 4) { + if (z->app14_color_transform == 0) { // CMYK + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(coutput[0][i], m); + out[1] = stbi__blinn_8x8(coutput[1][i], m); + out[2] = stbi__blinn_8x8(coutput[2][i], m); + out[3] = 255; + out += n; + } + } else if (z->app14_color_transform == 2) { // YCCK + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(255 - out[0], m); + out[1] = stbi__blinn_8x8(255 - out[1], m); + out[2] = stbi__blinn_8x8(255 - out[2], m); + out += n; + } + } else { // YCbCr + alpha? Ignore the fourth channel for now + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else + for (i=0; i < z->s->img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + if (is_rgb) { + if (n == 1) + for (i=0; i < z->s->img_x; ++i) + *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + else { + for (i=0; i < z->s->img_x; ++i, out += 2) { + out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + out[1] = 255; + } + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); + stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); + stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); + out[0] = stbi__compute_y(r, g, b); + out[1] = 255; + out += n; + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); + out[1] = 255; + out += n; + } + } else { + stbi_uc *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255; + } + } + } + stbi__cleanup_jpeg(z); + *out_x = z->s->img_x; + *out_y = z->s->img_y; + if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output + return output; + } +} + +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + unsigned char* result; + stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg)); + STBI_NOTUSED(ri); + j->s = s; + stbi__setup_jpeg(j); + result = load_jpeg_image(j, x,y,comp,req_comp); + STBI_FREE(j); + return result; +} + +static int stbi__jpeg_test(stbi__context *s) +{ + int r; + stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg)); + j->s = s; + stbi__setup_jpeg(j); + r = stbi__decode_jpeg_header(j, STBI__SCAN_type); + stbi__rewind(s); + STBI_FREE(j); + return r; +} + +static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) +{ + if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) { + stbi__rewind( j->s ); + return 0; + } + if (x) *x = j->s->img_x; + if (y) *y = j->s->img_y; + if (comp) *comp = j->s->img_n >= 3 ? 3 : 1; + return 1; +} + +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) +{ + int result; + stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg))); + j->s = s; + result = stbi__jpeg_info_raw(j, x, y, comp); + STBI_FREE(j); + return result; +} +#endif + +// public domain zlib decode v0.2 Sean Barrett 2006-11-18 +// simple implementation +// - all input must be provided in an upfront buffer +// - all output is written to a single output buffer (can malloc/realloc) +// performance +// - fast huffman + +#ifndef STBI_NO_ZLIB + +// fast-way is faster to check than jpeg huffman, but slow way is slower +#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables +#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1) + +// zlib-style huffman encoding +// (jpegs packs from left, zlib from right, so can't share code) +typedef struct +{ + stbi__uint16 fast[1 << STBI__ZFAST_BITS]; + stbi__uint16 firstcode[16]; + int maxcode[17]; + stbi__uint16 firstsymbol[16]; + stbi_uc size[288]; + stbi__uint16 value[288]; +} stbi__zhuffman; + +stbi_inline static int stbi__bitreverse16(int n) +{ + n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); + n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); + n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); + n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); + return n; +} + +stbi_inline static int stbi__bit_reverse(int v, int bits) +{ + STBI_ASSERT(bits <= 16); + // to bit reverse n bits, reverse 16 and shift + // e.g. 11 bits, bit reverse and shift away 5 + return stbi__bitreverse16(v) >> (16-bits); +} + +static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num) +{ + int i,k=0; + int code, next_code[16], sizes[17]; + + // DEFLATE spec for generating codes + memset(sizes, 0, sizeof(sizes)); + memset(z->fast, 0, sizeof(z->fast)); + for (i=0; i < num; ++i) + ++sizes[sizelist[i]]; + sizes[0] = 0; + for (i=1; i < 16; ++i) + if (sizes[i] > (1 << i)) + return stbi__err("bad sizes", "Corrupt PNG"); + code = 0; + for (i=1; i < 16; ++i) { + next_code[i] = code; + z->firstcode[i] = (stbi__uint16) code; + z->firstsymbol[i] = (stbi__uint16) k; + code = (code + sizes[i]); + if (sizes[i]) + if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG"); + z->maxcode[i] = code << (16-i); // preshift for inner loop + code <<= 1; + k += sizes[i]; + } + z->maxcode[16] = 0x10000; // sentinel + for (i=0; i < num; ++i) { + int s = sizelist[i]; + if (s) { + int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); + z->size [c] = (stbi_uc ) s; + z->value[c] = (stbi__uint16) i; + if (s <= STBI__ZFAST_BITS) { + int j = stbi__bit_reverse(next_code[s],s); + while (j < (1 << STBI__ZFAST_BITS)) { + z->fast[j] = fastv; + j += (1 << s); + } + } + ++next_code[s]; + } + } + return 1; +} + +// zlib-from-memory implementation for PNG reading +// because PNG allows splitting the zlib stream arbitrarily, +// and it's annoying structurally to have PNG call ZLIB call PNG, +// we require PNG read all the IDATs and combine them into a single +// memory buffer + +typedef struct +{ + stbi_uc *zbuffer, *zbuffer_end; + int num_bits; + stbi__uint32 code_buffer; + + char *zout; + char *zout_start; + char *zout_end; + int z_expandable; + + stbi__zhuffman z_length, z_distance; +} stbi__zbuf; + +stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z) +{ + if (z->zbuffer >= z->zbuffer_end) return 0; + return *z->zbuffer++; +} + +static void stbi__fill_bits(stbi__zbuf *z) +{ + do { + STBI_ASSERT(z->code_buffer < (1U << z->num_bits)); + z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; + z->num_bits += 8; + } while (z->num_bits <= 24); +} + +stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n) +{ + unsigned int k; + if (z->num_bits < n) stbi__fill_bits(z); + k = z->code_buffer & ((1 << n) - 1); + z->code_buffer >>= n; + z->num_bits -= n; + return k; +} + +static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s,k; + // not resolved by fast table, so compute it the slow way + // use jpeg approach, which requires MSbits at top + k = stbi__bit_reverse(a->code_buffer, 16); + for (s=STBI__ZFAST_BITS+1; ; ++s) + if (k < z->maxcode[s]) + break; + if (s == 16) return -1; // invalid code! + // code size is s, so: + b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; + STBI_ASSERT(z->size[b] == s); + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; +} + +stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s; + if (a->num_bits < 16) stbi__fill_bits(a); + b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; + if (b) { + s = b >> 9; + a->code_buffer >>= s; + a->num_bits -= s; + return b & 511; + } + return stbi__zhuffman_decode_slowpath(a, z); +} + +static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes +{ + char *q; + int cur, limit, old_limit; + z->zout = zout; + if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); + cur = (int) (z->zout - z->zout_start); + limit = old_limit = (int) (z->zout_end - z->zout_start); + while (cur + n > limit) + limit *= 2; + q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); + STBI_NOTUSED(old_limit); + if (q == NULL) return stbi__err("outofmem", "Out of memory"); + z->zout_start = q; + z->zout = q + cur; + z->zout_end = q + limit; + return 1; +} + +static const int stbi__zlength_base[31] = { + 3,4,5,6,7,8,9,10,11,13, + 15,17,19,23,27,31,35,43,51,59, + 67,83,99,115,131,163,195,227,258,0,0 }; + +static const int stbi__zlength_extra[31]= +{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + +static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + +static const int stbi__zdist_extra[32] = +{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int stbi__parse_huffman_block(stbi__zbuf *a) +{ + char *zout = a->zout; + for(;;) { + int z = stbi__zhuffman_decode(a, &a->z_length); + if (z < 256) { + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes + if (zout >= a->zout_end) { + if (!stbi__zexpand(a, zout, 1)) return 0; + zout = a->zout; + } + *zout++ = (char) z; + } else { + stbi_uc *p; + int len,dist; + if (z == 256) { + a->zout = zout; + return 1; + } + z -= 257; + len = stbi__zlength_base[z]; + if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); + z = stbi__zhuffman_decode(a, &a->z_distance); + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); + dist = stbi__zdist_base[z]; + if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); + if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); + if (zout + len > a->zout_end) { + if (!stbi__zexpand(a, zout, len)) return 0; + zout = a->zout; + } + p = (stbi_uc *) (zout - dist); + if (dist == 1) { // run of one byte; common in images. + stbi_uc v = *p; + if (len) { do *zout++ = v; while (--len); } + } else { + if (len) { do *zout++ = *p++; while (--len); } + } + } + } +} + +static int stbi__compute_huffman_codes(stbi__zbuf *a) +{ + static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + stbi__zhuffman z_codelength; + stbi_uc lencodes[286+32+137];//padding for maximum single op + stbi_uc codelength_sizes[19]; + int i,n; + + int hlit = stbi__zreceive(a,5) + 257; + int hdist = stbi__zreceive(a,5) + 1; + int hclen = stbi__zreceive(a,4) + 4; + int ntot = hlit + hdist; + + memset(codelength_sizes, 0, sizeof(codelength_sizes)); + for (i=0; i < hclen; ++i) { + int s = stbi__zreceive(a,3); + codelength_sizes[length_dezigzag[i]] = (stbi_uc) s; + } + if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; + + n = 0; + while (n < ntot) { + int c = stbi__zhuffman_decode(a, &z_codelength); + if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); + if (c < 16) + lencodes[n++] = (stbi_uc) c; + else { + stbi_uc fill = 0; + if (c == 16) { + c = stbi__zreceive(a,2)+3; + if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); + fill = lencodes[n-1]; + } else if (c == 17) + c = stbi__zreceive(a,3)+3; + else { + STBI_ASSERT(c == 18); + c = stbi__zreceive(a,7)+11; + } + if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); + memset(lencodes+n, fill, c); + n += c; + } + } + if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG"); + if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; + return 1; +} + +static int stbi__parse_uncompressed_block(stbi__zbuf *a) +{ + stbi_uc header[4]; + int len,nlen,k; + if (a->num_bits & 7) + stbi__zreceive(a, a->num_bits & 7); // discard + // drain the bit-packed data into header + k = 0; + while (a->num_bits > 0) { + header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check + a->code_buffer >>= 8; + a->num_bits -= 8; + } + STBI_ASSERT(a->num_bits == 0); + // now fill header the normal way + while (k < 4) + header[k++] = stbi__zget8(a); + len = header[1] * 256 + header[0]; + nlen = header[3] * 256 + header[2]; + if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG"); + if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG"); + if (a->zout + len > a->zout_end) + if (!stbi__zexpand(a, a->zout, len)) return 0; + memcpy(a->zout, a->zbuffer, len); + a->zbuffer += len; + a->zout += len; + return 1; +} + +static int stbi__parse_zlib_header(stbi__zbuf *a) +{ + int cmf = stbi__zget8(a); + int cm = cmf & 15; + /* int cinfo = cmf >> 4; */ + int flg = stbi__zget8(a); + if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec + if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png + if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png + // window = 1 << (8 + cinfo)... but who cares, we fully buffer output + return 1; +} + +static const stbi_uc stbi__zdefault_length[288] = +{ + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8 +}; +static const stbi_uc stbi__zdefault_distance[32] = +{ + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 +}; +/* +Init algorithm: +{ + int i; // use <= to match clearly with spec + for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8; + for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9; + for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7; + for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8; + + for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5; +} +*/ + +static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) +{ + int final, type; + if (parse_header) + if (!stbi__parse_zlib_header(a)) return 0; + a->num_bits = 0; + a->code_buffer = 0; + do { + final = stbi__zreceive(a,1); + type = stbi__zreceive(a,2); + if (type == 0) { + if (!stbi__parse_uncompressed_block(a)) return 0; + } else if (type == 3) { + return 0; + } else { + if (type == 1) { + // use fixed code lengths + if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; + } else { + if (!stbi__compute_huffman_codes(a)) return 0; + } + if (!stbi__parse_huffman_block(a)) return 0; + } + } while (!final); + return 1; +} + +static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header) +{ + a->zout_start = obuf; + a->zout = obuf; + a->zout_end = obuf + olen; + a->z_expandable = exp; + + return stbi__parse_zlib(a, parse_header); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, 1)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) +{ + return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 1)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(16384); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer+len; + if (stbi__do_zlib(&a, p, 16384, 1, 0)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 0)) + return (int) (a.zout - a.zout_start); + else + return -1; +} +#endif + +// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 +// simple implementation +// - only 8-bit samples +// - no CRC checking +// - allocates lots of intermediate memory +// - avoids problem of streaming data between subsystems +// - avoids explicit window management +// performance +// - uses stb_zlib, a PD zlib implementation with fast huffman decoding + +#ifndef STBI_NO_PNG +typedef struct +{ + stbi__uint32 length; + stbi__uint32 type; +} stbi__pngchunk; + +static stbi__pngchunk stbi__get_chunk_header(stbi__context *s) +{ + stbi__pngchunk c; + c.length = stbi__get32be(s); + c.type = stbi__get32be(s); + return c; +} + +static int stbi__check_png_header(stbi__context *s) +{ + static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; + int i; + for (i=0; i < 8; ++i) + if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); + return 1; +} + +typedef struct +{ + stbi__context *s; + stbi_uc *idata, *expanded, *out; + int depth; +} stbi__png; + + +enum { + STBI__F_none=0, + STBI__F_sub=1, + STBI__F_up=2, + STBI__F_avg=3, + STBI__F_paeth=4, + // synthetic filters used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static stbi_uc first_row_filter[5] = +{ + STBI__F_none, + STBI__F_sub, + STBI__F_none, + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static int stbi__paeth(int a, int b, int c) +{ + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; +} + +static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; + +// create the png data from post-deflated data +static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) +{ + int bytes = (depth == 16? 2 : 1); + stbi__context *s = a->s; + stbi__uint32 i,j,stride = x*out_n*bytes; + stbi__uint32 img_len, img_width_bytes; + int k; + int img_n = s->img_n; // copy it into a local for later + + int output_bytes = out_n*bytes; + int filter_bytes = img_n*bytes; + int width = x; + + STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1); + a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into + if (!a->out) return stbi__err("outofmem", "Out of memory"); + + if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); + img_width_bytes = (((img_n * x * depth) + 7) >> 3); + img_len = (img_width_bytes + 1) * y; + + // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, + // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros), + // so just check for raw_len < img_len always. + if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *prior; + int filter = *raw++; + + if (filter > 4) + return stbi__err("invalid filter","Corrupt PNG"); + + if (depth < 8) { + STBI_ASSERT(img_width_bytes <= x); + cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place + filter_bytes = 1; + width = img_width_bytes; + } + prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above + + // if first row, use special filter that doesn't sample previous row + if (j == 0) filter = first_row_filter[filter]; + + // handle first byte explicitly + for (k=0; k < filter_bytes; ++k) { + switch (filter) { + case STBI__F_none : cur[k] = raw[k]; break; + case STBI__F_sub : cur[k] = raw[k]; break; + case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; + case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; + case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; + case STBI__F_avg_first : cur[k] = raw[k]; break; + case STBI__F_paeth_first: cur[k] = raw[k]; break; + } + } + + if (depth == 8) { + if (img_n != out_n) + cur[img_n] = 255; // first pixel + raw += img_n; + cur += out_n; + prior += out_n; + } else if (depth == 16) { + if (img_n != out_n) { + cur[filter_bytes] = 255; // first pixel top byte + cur[filter_bytes+1] = 255; // first pixel bottom byte + } + raw += filter_bytes; + cur += output_bytes; + prior += output_bytes; + } else { + raw += 1; + cur += 1; + prior += 1; + } + + // this is a little gross, so that we don't switch per-pixel or per-component + if (depth < 8 || img_n == out_n) { + int nk = (width - 1)*filter_bytes; + #define STBI__CASE(f) \ + case f: \ + for (k=0; k < nk; ++k) + switch (filter) { + // "none" filter turns into a memcpy here; make that explicit. + case STBI__F_none: memcpy(cur, raw, nk); break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; + } + #undef STBI__CASE + raw += nk; + } else { + STBI_ASSERT(img_n+1 == out_n); + #define STBI__CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ + for (k=0; k < filter_bytes; ++k) + switch (filter) { + STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; + } + #undef STBI__CASE + + // the loop above sets the high byte of the pixels' alpha, but for + // 16 bit png files we also need the low byte set. we'll do that here. + if (depth == 16) { + cur = a->out + stride*j; // start at the beginning of the row again + for (i=0; i < x; ++i,cur+=output_bytes) { + cur[filter_bytes+1] = 255; + } + } + } + } + + // we make a separate pass to expand bits to pixels; for performance, + // this could run two scanlines behind the above code, so it won't + // intefere with filtering but will still be in the cache. + if (depth < 8) { + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; + // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit + // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop + stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + + // note that the final byte might overshoot and write more data than desired. + // we can allocate enough data that this never writes out of memory, but it + // could also overwrite the next scanline. can it overwrite non-empty data + // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. + // so we need to explicitly clamp the final ones + + if (depth == 4) { + for (k=x*img_n; k >= 2; k-=2, ++in) { + *cur++ = scale * ((*in >> 4) ); + *cur++ = scale * ((*in ) & 0x0f); + } + if (k > 0) *cur++ = scale * ((*in >> 4) ); + } else if (depth == 2) { + for (k=x*img_n; k >= 4; k-=4, ++in) { + *cur++ = scale * ((*in >> 6) ); + *cur++ = scale * ((*in >> 4) & 0x03); + *cur++ = scale * ((*in >> 2) & 0x03); + *cur++ = scale * ((*in ) & 0x03); + } + if (k > 0) *cur++ = scale * ((*in >> 6) ); + if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); + if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); + } else if (depth == 1) { + for (k=x*img_n; k >= 8; k-=8, ++in) { + *cur++ = scale * ((*in >> 7) ); + *cur++ = scale * ((*in >> 6) & 0x01); + *cur++ = scale * ((*in >> 5) & 0x01); + *cur++ = scale * ((*in >> 4) & 0x01); + *cur++ = scale * ((*in >> 3) & 0x01); + *cur++ = scale * ((*in >> 2) & 0x01); + *cur++ = scale * ((*in >> 1) & 0x01); + *cur++ = scale * ((*in ) & 0x01); + } + if (k > 0) *cur++ = scale * ((*in >> 7) ); + if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); + if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); + if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); + if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); + if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); + if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); + } + if (img_n != out_n) { + int q; + // insert alpha = 255 + cur = a->out + stride*j; + if (img_n == 1) { + for (q=x-1; q >= 0; --q) { + cur[q*2+1] = 255; + cur[q*2+0] = cur[q]; + } + } else { + STBI_ASSERT(img_n == 3); + for (q=x-1; q >= 0; --q) { + cur[q*4+3] = 255; + cur[q*4+2] = cur[q*3+2]; + cur[q*4+1] = cur[q*3+1]; + cur[q*4+0] = cur[q*3+0]; + } + } + } + } + } else if (depth == 16) { + // force the image data from big-endian to platform-native. + // this is done in a separate pass due to the decoding relying + // on the data being untouched, but could probably be done + // per-line during decode if care is taken. + stbi_uc *cur = a->out; + stbi__uint16 *cur16 = (stbi__uint16*)cur; + + for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { + *cur16 = (cur[0] << 8) | cur[1]; + } + } + + return 1; +} + +static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) +{ + int bytes = (depth == 16 ? 2 : 1); + int out_bytes = out_n * bytes; + stbi_uc *final; + int p; + if (!interlaced) + return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); + + // de-interlacing + final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); + for (p=0; p < 7; ++p) { + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y; + if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) { + STBI_FREE(final); + return 0; + } + for (j=0; j < y; ++j) { + for (i=0; i < x; ++i) { + int out_y = j*yspc[p]+yorig[p]; + int out_x = i*xspc[p]+xorig[p]; + memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, + a->out + (j*x+i)*out_bytes, out_bytes); + } + } + STBI_FREE(a->out); + image_data += img_len; + image_data_len -= img_len; + } + } + a->out = final; + + return 1; +} + +static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + // compute color-based transparency, assuming we've + // already got 255 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i=0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 255); + p += 2; + } + } else { + for (i=0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi__uint16 *p = (stbi__uint16*) z->out; + + // compute color-based transparency, assuming we've + // already got 65535 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i = 0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 65535); + p += 2; + } + } else { + for (i = 0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n) +{ + stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y; + stbi_uc *p, *temp_out, *orig = a->out; + + p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0); + if (p == NULL) return stbi__err("outofmem", "Out of memory"); + + // between here and free(out) below, exitting would leak + temp_out = p; + + if (pal_img_n == 3) { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p += 3; + } + } else { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p[3] = palette[n+3]; + p += 4; + } + } + STBI_FREE(a->out); + a->out = temp_out; + + STBI_NOTUSED(len); + + return 1; +} + +static int stbi__unpremultiply_on_load = 0; +static int stbi__de_iphone_flag = 0; + +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) +{ + stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply; +} + +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) +{ + stbi__de_iphone_flag = flag_true_if_should_convert; +} + +static void stbi__de_iphone(stbi__png *z) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + if (s->img_out_n == 3) { // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 3; + } + } else { + STBI_ASSERT(s->img_out_n == 4); + if (stbi__unpremultiply_on_load) { + // convert bgr to rgb and unpremultiply + for (i=0; i < pixel_count; ++i) { + stbi_uc a = p[3]; + stbi_uc t = p[0]; + if (a) { + stbi_uc half = a / 2; + p[0] = (p[2] * 255 + half) / a; + p[1] = (p[1] * 255 + half) / a; + p[2] = ( t * 255 + half) / a; + } else { + p[0] = p[2]; + p[2] = t; + } + p += 4; + } + } else { + // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 4; + } + } + } +} + +#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d)) + +static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) +{ + stbi_uc palette[1024], pal_img_n=0; + stbi_uc has_trans=0, tc[3]; + stbi__uint16 tc16[3]; + stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0; + int first=1,k,interlace=0, color=0, is_iphone=0; + stbi__context *s = z->s; + + z->expanded = NULL; + z->idata = NULL; + z->out = NULL; + + if (!stbi__check_png_header(s)) return 0; + + if (scan == STBI__SCAN_type) return 1; + + for (;;) { + stbi__pngchunk c = stbi__get_chunk_header(s); + switch (c.type) { + case STBI__PNG_TYPE('C','g','B','I'): + is_iphone = 1; + stbi__skip(s, c.length); + break; + case STBI__PNG_TYPE('I','H','D','R'): { + int comp,filter; + if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); + first = 0; + if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); + s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); + s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); + z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); + color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); + comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG"); + filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG"); + interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG"); + if (!pal_img_n) { + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); + if (scan == STBI__SCAN_header) return 1; + } else { + // if paletted, then pal_n is our final components, and + // img_n is # components to decompress/filter. + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); + // if SCAN_header, have to scan to see if we have a tRNS + } + break; + } + + case STBI__PNG_TYPE('P','L','T','E'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG"); + pal_len = c.length / 3; + if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG"); + for (i=0; i < pal_len; ++i) { + palette[i*4+0] = stbi__get8(s); + palette[i*4+1] = stbi__get8(s); + palette[i*4+2] = stbi__get8(s); + palette[i*4+3] = 255; + } + break; + } + + case STBI__PNG_TYPE('t','R','N','S'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG"); + if (pal_img_n) { + if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; } + if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG"); + if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG"); + pal_img_n = 4; + for (i=0; i < c.length; ++i) + palette[i*4+3] = stbi__get8(s); + } else { + if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); + if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); + has_trans = 1; + if (z->depth == 16) { + for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is + } else { + for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger + } + } + break; + } + + case STBI__PNG_TYPE('I','D','A','T'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); + if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; } + if ((int)(ioff + c.length) < (int)ioff) return 0; + if (ioff + c.length > idata_limit) { + stbi__uint32 idata_limit_old = idata_limit; + stbi_uc *p; + if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; + while (ioff + c.length > idata_limit) + idata_limit *= 2; + STBI_NOTUSED(idata_limit_old); + p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); + z->idata = p; + } + if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG"); + ioff += c.length; + break; + } + + case STBI__PNG_TYPE('I','E','N','D'): { + stbi__uint32 raw_len, bpl; + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (scan != STBI__SCAN_load) return 1; + if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); + // initial guess for decoded data size to avoid unnecessary reallocs + bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component + raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; + z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); + if (z->expanded == NULL) return 0; // zlib should set error + STBI_FREE(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; + else + s->img_out_n = s->img_n; + if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0; + if (has_trans) { + if (z->depth == 16) { + if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0; + } else { + if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0; + } + } + if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) + stbi__de_iphone(z); + if (pal_img_n) { + // pal_img_n == 3 or 4 + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) + return 0; + } else if (has_trans) { + // non-paletted image with tRNS -> source image has (constant) alpha + ++s->img_n; + } + STBI_FREE(z->expanded); z->expanded = NULL; + return 1; + } + + default: + // if critical, fail + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if ((c.type & (1 << 29)) == 0) { + #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe + static char invalid_chunk[] = "XXXX PNG chunk not known"; + invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); + invalid_chunk[1] = STBI__BYTECAST(c.type >> 16); + invalid_chunk[2] = STBI__BYTECAST(c.type >> 8); + invalid_chunk[3] = STBI__BYTECAST(c.type >> 0); + #endif + return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type"); + } + stbi__skip(s, c.length); + break; + } + // end of PNG chunk, read and skip CRC + stbi__get32be(s); + } +} + +static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri) +{ + void *result=NULL; + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { + if (p->depth < 8) + ri->bits_per_channel = 8; + else + ri->bits_per_channel = p->depth; + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s->img_out_n) { + if (ri->bits_per_channel == 8) + result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + else + result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + p->s->img_out_n = req_comp; + if (result == NULL) return result; + } + *x = p->s->img_x; + *y = p->s->img_y; + if (n) *n = p->s->img_n; + } + STBI_FREE(p->out); p->out = NULL; + STBI_FREE(p->expanded); p->expanded = NULL; + STBI_FREE(p->idata); p->idata = NULL; + + return result; +} + +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi__png p; + p.s = s; + return stbi__do_png(&p, x,y,comp,req_comp, ri); +} + +static int stbi__png_test(stbi__context *s) +{ + int r; + r = stbi__check_png_header(s); + stbi__rewind(s); + return r; +} + +static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp) +{ + if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) { + stbi__rewind( p->s ); + return 0; + } + if (x) *x = p->s->img_x; + if (y) *y = p->s->img_y; + if (comp) *comp = p->s->img_n; + return 1; +} + +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__png p; + p.s = s; + return stbi__png_info_raw(&p, x, y, comp); +} + +static int stbi__png_is16(stbi__context *s) +{ + stbi__png p; + p.s = s; + if (!stbi__png_info_raw(&p, NULL, NULL, NULL)) + return 0; + if (p.depth != 16) { + stbi__rewind(p.s); + return 0; + } + return 1; +} +#endif + +// Microsoft/Windows BMP image + +#ifndef STBI_NO_BMP +static int stbi__bmp_test_raw(stbi__context *s) +{ + int r; + int sz; + if (stbi__get8(s) != 'B') return 0; + if (stbi__get8(s) != 'M') return 0; + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + stbi__get32le(s); // discard data offset + sz = stbi__get32le(s); + r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124); + return r; +} + +static int stbi__bmp_test(stbi__context *s) +{ + int r = stbi__bmp_test_raw(s); + stbi__rewind(s); + return r; +} + + +// returns 0..31 for the highest set bit +static int stbi__high_bit(unsigned int z) +{ + int n=0; + if (z == 0) return -1; + if (z >= 0x10000) n += 16, z >>= 16; + if (z >= 0x00100) n += 8, z >>= 8; + if (z >= 0x00010) n += 4, z >>= 4; + if (z >= 0x00004) n += 2, z >>= 2; + if (z >= 0x00002) n += 1, z >>= 1; + return n; +} + +static int stbi__bitcount(unsigned int a) +{ + a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 + a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 + a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits + a = (a + (a >> 8)); // max 16 per 8 bits + a = (a + (a >> 16)); // max 32 per 8 bits + return a & 0xff; +} + +// extract an arbitrarily-aligned N-bit value (N=bits) +// from v, and then make it 8-bits long and fractionally +// extend it to full full range. +static int stbi__shiftsigned(int v, int shift, int bits) +{ + static unsigned int mul_table[9] = { + 0, + 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/, + 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/, + }; + static unsigned int shift_table[9] = { + 0, 0,0,1,0,2,4,6,0, + }; + if (shift < 0) + v <<= -shift; + else + v >>= shift; + STBI_ASSERT(v >= 0 && v < 256); + v >>= (8-bits); + STBI_ASSERT(bits >= 0 && bits <= 8); + return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits]; +} + +typedef struct +{ + int bpp, offset, hsz; + unsigned int mr,mg,mb,ma, all_a; +} stbi__bmp_data; + +static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) +{ + int hsz; + if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP"); + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + info->offset = stbi__get32le(s); + info->hsz = hsz = stbi__get32le(s); + info->mr = info->mg = info->mb = info->ma = 0; + + if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown"); + if (hsz == 12) { + s->img_x = stbi__get16le(s); + s->img_y = stbi__get16le(s); + } else { + s->img_x = stbi__get32le(s); + s->img_y = stbi__get32le(s); + } + if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP"); + info->bpp = stbi__get16le(s); + if (hsz != 12) { + int compress = stbi__get32le(s); + if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); + stbi__get32le(s); // discard sizeof + stbi__get32le(s); // discard hres + stbi__get32le(s); // discard vres + stbi__get32le(s); // discard colorsused + stbi__get32le(s); // discard max important + if (hsz == 40 || hsz == 56) { + if (hsz == 56) { + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + } + if (info->bpp == 16 || info->bpp == 32) { + if (compress == 0) { + if (info->bpp == 32) { + info->mr = 0xffu << 16; + info->mg = 0xffu << 8; + info->mb = 0xffu << 0; + info->ma = 0xffu << 24; + info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 + } else { + info->mr = 31u << 10; + info->mg = 31u << 5; + info->mb = 31u << 0; + } + } else if (compress == 3) { + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + // not documented, but generated by photoshop and handled by mspaint + if (info->mr == info->mg && info->mg == info->mb) { + // ?!?!? + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else { + int i; + if (hsz != 108 && hsz != 124) + return stbi__errpuc("bad BMP", "bad BMP"); + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + info->ma = stbi__get32le(s); + stbi__get32le(s); // discard color space + for (i=0; i < 12; ++i) + stbi__get32le(s); // discard color space parameters + if (hsz == 124) { + stbi__get32le(s); // discard rendering intent + stbi__get32le(s); // discard offset of profile data + stbi__get32le(s); // discard size of profile data + stbi__get32le(s); // discard reserved + } + } + } + return (void *) 1; +} + + +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + unsigned int mr=0,mg=0,mb=0,ma=0, all_a; + stbi_uc pal[256][4]; + int psize=0,i,j,width; + int flip_vertically, pad, target; + stbi__bmp_data info; + STBI_NOTUSED(ri); + + info.all_a = 255; + if (stbi__bmp_parse_header(s, &info) == NULL) + return NULL; // error code already set + + flip_vertically = ((int) s->img_y) > 0; + s->img_y = abs((int) s->img_y); + + mr = info.mr; + mg = info.mg; + mb = info.mb; + ma = info.ma; + all_a = info.all_a; + + if (info.hsz == 12) { + if (info.bpp < 24) + psize = (info.offset - 14 - 24) / 3; + } else { + if (info.bpp < 16) + psize = (info.offset - 14 - info.hsz) >> 2; + } + + s->img_n = ma ? 4 : 3; + if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 + target = req_comp; + else + target = s->img_n; // if they want monochrome, we'll post-convert + + // sanity-check size + if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "Corrupt BMP"); + + out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + if (info.bpp < 16) { + int z=0; + if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); } + for (i=0; i < psize; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + if (info.hsz != 12) stbi__get8(s); + pal[i][3] = 255; + } + stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); + if (info.bpp == 1) width = (s->img_x + 7) >> 3; + else if (info.bpp == 4) width = (s->img_x + 1) >> 1; + else if (info.bpp == 8) width = s->img_x; + else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } + pad = (-width)&3; + if (info.bpp == 1) { + for (j=0; j < (int) s->img_y; ++j) { + int bit_offset = 7, v = stbi__get8(s); + for (i=0; i < (int) s->img_x; ++i) { + int color = (v>>bit_offset)&0x1; + out[z++] = pal[color][0]; + out[z++] = pal[color][1]; + out[z++] = pal[color][2]; + if((--bit_offset) < 0) { + bit_offset = 7; + v = stbi__get8(s); + } + } + stbi__skip(s, pad); + } + } else { + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=stbi__get8(s),v2=0; + if (info.bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (info.bpp == 8) ? stbi__get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + stbi__skip(s, pad); + } + } + } else { + int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; + int z = 0; + int easy=0; + stbi__skip(s, info.offset - 14 - info.hsz); + if (info.bpp == 24) width = 3 * s->img_x; + else if (info.bpp == 16) width = 2*s->img_x; + else /* bpp = 32 and pad = 0 */ width=0; + pad = (-width) & 3; + if (info.bpp == 24) { + easy = 1; + } else if (info.bpp == 32) { + if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) + easy = 2; + } + if (!easy) { + if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } + // right shift amt to put high bit in position #7 + rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr); + gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); + bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); + ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); + } + for (j=0; j < (int) s->img_y; ++j) { + if (easy) { + for (i=0; i < (int) s->img_x; ++i) { + unsigned char a; + out[z+2] = stbi__get8(s); + out[z+1] = stbi__get8(s); + out[z+0] = stbi__get8(s); + z += 3; + a = (easy == 2 ? stbi__get8(s) : 255); + all_a |= a; + if (target == 4) out[z++] = a; + } + } else { + int bpp = info.bpp; + for (i=0; i < (int) s->img_x; ++i) { + stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); + unsigned int a; + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); + a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255); + all_a |= a; + if (target == 4) out[z++] = STBI__BYTECAST(a); + } + } + stbi__skip(s, pad); + } + } + + // if alpha channel is all 0s, replace with all 255s + if (target == 4 && all_a == 0) + for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) + out[i] = 255; + + if (flip_vertically) { + stbi_uc t; + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { + t = p1[i], p1[i] = p2[i], p2[i] = t; + } + } + } + + if (req_comp && req_comp != target) { + out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + return out; +} +#endif + +// Targa Truevision - TGA +// by Jonathan Dummer +#ifndef STBI_NO_TGA +// returns STBI_rgb or whatever, 0 on error +static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) +{ + // only RGB or RGBA (incl. 16bit) or grey allowed + if (is_rgb16) *is_rgb16 = 0; + switch(bits_per_pixel) { + case 8: return STBI_grey; + case 16: if(is_grey) return STBI_grey_alpha; + // fallthrough + case 15: if(is_rgb16) *is_rgb16 = 1; + return STBI_rgb; + case 24: // fallthrough + case 32: return bits_per_pixel/8; + default: return 0; + } +} + +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp) +{ + int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp; + int sz, tga_colormap_type; + stbi__get8(s); // discard Offset + tga_colormap_type = stbi__get8(s); // colormap type + if( tga_colormap_type > 1 ) { + stbi__rewind(s); + return 0; // only RGB or indexed allowed + } + tga_image_type = stbi__get8(s); // image type + if ( tga_colormap_type == 1 ) { // colormapped (paletted) image + if (tga_image_type != 1 && tga_image_type != 9) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip image x and y origin + tga_colormap_bpp = sz; + } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE + if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) { + stbi__rewind(s); + return 0; // only RGB or grey allowed, +/- RLE + } + stbi__skip(s,9); // skip colormap specification and image x/y origin + tga_colormap_bpp = 0; + } + tga_w = stbi__get16le(s); + if( tga_w < 1 ) { + stbi__rewind(s); + return 0; // test width + } + tga_h = stbi__get16le(s); + if( tga_h < 1 ) { + stbi__rewind(s); + return 0; // test height + } + tga_bits_per_pixel = stbi__get8(s); // bits per pixel + stbi__get8(s); // ignore alpha bits + if (tga_colormap_bpp != 0) { + if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) { + // when using a colormap, tga_bits_per_pixel is the size of the indexes + // I don't think anything but 8 or 16bit indexes makes sense + stbi__rewind(s); + return 0; + } + tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL); + } else { + tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL); + } + if(!tga_comp) { + stbi__rewind(s); + return 0; + } + if (x) *x = tga_w; + if (y) *y = tga_h; + if (comp) *comp = tga_comp; + return 1; // seems to have passed everything +} + +static int stbi__tga_test(stbi__context *s) +{ + int res = 0; + int sz, tga_color_type; + stbi__get8(s); // discard Offset + tga_color_type = stbi__get8(s); // color type + if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed + sz = stbi__get8(s); // image type + if ( tga_color_type == 1 ) { // colormapped (paletted) image + if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9 + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + stbi__skip(s,4); // skip image x and y origin + } else { // "normal" image w/o colormap + if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE + stbi__skip(s,9); // skip colormap specification and image x/y origin + } + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height + sz = stbi__get8(s); // bits per pixel + if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + + res = 1; // if we got this far, everything's good and we can return 1 instead of 0 + +errorEnd: + stbi__rewind(s); + return res; +} + +// read 16bit value and convert to 24bit RGB +static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) +{ + stbi__uint16 px = (stbi__uint16)stbi__get16le(s); + stbi__uint16 fiveBitMask = 31; + // we have 3 channels with 5bits each + int r = (px >> 10) & fiveBitMask; + int g = (px >> 5) & fiveBitMask; + int b = px & fiveBitMask; + // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later + out[0] = (stbi_uc)((r * 255)/31); + out[1] = (stbi_uc)((g * 255)/31); + out[2] = (stbi_uc)((b * 255)/31); + + // some people claim that the most significant bit might be used for alpha + // (possibly if an alpha-bit is set in the "image descriptor byte") + // but that only made 16bit test images completely translucent.. + // so let's treat all 15 and 16bit TGAs as RGB with no alpha. +} + +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + // read in the TGA header stuff + int tga_offset = stbi__get8(s); + int tga_indexed = stbi__get8(s); + int tga_image_type = stbi__get8(s); + int tga_is_RLE = 0; + int tga_palette_start = stbi__get16le(s); + int tga_palette_len = stbi__get16le(s); + int tga_palette_bits = stbi__get8(s); + int tga_x_origin = stbi__get16le(s); + int tga_y_origin = stbi__get16le(s); + int tga_width = stbi__get16le(s); + int tga_height = stbi__get16le(s); + int tga_bits_per_pixel = stbi__get8(s); + int tga_comp, tga_rgb16=0; + int tga_inverted = stbi__get8(s); + // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?) + // image data + unsigned char *tga_data; + unsigned char *tga_palette = NULL; + int i, j; + unsigned char raw_data[4] = {0}; + int RLE_count = 0; + int RLE_repeating = 0; + int read_next_pixel = 1; + STBI_NOTUSED(ri); + + // do a tiny bit of precessing + if ( tga_image_type >= 8 ) + { + tga_image_type -= 8; + tga_is_RLE = 1; + } + tga_inverted = 1 - ((tga_inverted >> 5) & 1); + + // If I'm paletted, then I'll use the number of bits from the palette + if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16); + else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16); + + if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency + return stbi__errpuc("bad format", "Can't find out TGA pixelformat"); + + // tga info + *x = tga_width; + *y = tga_height; + if (comp) *comp = tga_comp; + + if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) + return stbi__errpuc("too large", "Corrupt TGA"); + + tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0); + if (!tga_data) return stbi__errpuc("outofmem", "Out of memory"); + + // skip to the data's starting position (offset usually = 0) + stbi__skip(s, tga_offset ); + + if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) { + for (i=0; i < tga_height; ++i) { + int row = tga_inverted ? tga_height -i - 1 : i; + stbi_uc *tga_row = tga_data + row*tga_width*tga_comp; + stbi__getn(s, tga_row, tga_width * tga_comp); + } + } else { + // do I need to load a palette? + if ( tga_indexed) + { + // any data to skip? (offset usually = 0) + stbi__skip(s, tga_palette_start ); + // load the palette + tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); + if (!tga_palette) { + STBI_FREE(tga_data); + return stbi__errpuc("outofmem", "Out of memory"); + } + if (tga_rgb16) { + stbi_uc *pal_entry = tga_palette; + STBI_ASSERT(tga_comp == STBI_rgb); + for (i=0; i < tga_palette_len; ++i) { + stbi__tga_read_rgb16(s, pal_entry); + pal_entry += tga_comp; + } + } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) { + STBI_FREE(tga_data); + STBI_FREE(tga_palette); + return stbi__errpuc("bad palette", "Corrupt TGA"); + } + } + // load the data + for (i=0; i < tga_width * tga_height; ++i) + { + // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk? + if ( tga_is_RLE ) + { + if ( RLE_count == 0 ) + { + // yep, get the next byte as a RLE command + int RLE_cmd = stbi__get8(s); + RLE_count = 1 + (RLE_cmd & 127); + RLE_repeating = RLE_cmd >> 7; + read_next_pixel = 1; + } else if ( !RLE_repeating ) + { + read_next_pixel = 1; + } + } else + { + read_next_pixel = 1; + } + // OK, if I need to read a pixel, do it now + if ( read_next_pixel ) + { + // load however much data we did have + if ( tga_indexed ) + { + // read in index, then perform the lookup + int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s); + if ( pal_idx >= tga_palette_len ) { + // invalid index + pal_idx = 0; + } + pal_idx *= tga_comp; + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = tga_palette[pal_idx+j]; + } + } else if(tga_rgb16) { + STBI_ASSERT(tga_comp == STBI_rgb); + stbi__tga_read_rgb16(s, raw_data); + } else { + // read in the data raw + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = stbi__get8(s); + } + } + // clear the reading flag for the next pixel + read_next_pixel = 0; + } // end of reading a pixel + + // copy data + for (j = 0; j < tga_comp; ++j) + tga_data[i*tga_comp+j] = raw_data[j]; + + // in case we're in RLE mode, keep counting down + --RLE_count; + } + // do I need to invert the image? + if ( tga_inverted ) + { + for (j = 0; j*2 < tga_height; ++j) + { + int index1 = j * tga_width * tga_comp; + int index2 = (tga_height - 1 - j) * tga_width * tga_comp; + for (i = tga_width * tga_comp; i > 0; --i) + { + unsigned char temp = tga_data[index1]; + tga_data[index1] = tga_data[index2]; + tga_data[index2] = temp; + ++index1; + ++index2; + } + } + } + // clear my palette, if I had one + if ( tga_palette != NULL ) + { + STBI_FREE( tga_palette ); + } + } + + // swap RGB - if the source data was RGB16, it already is in the right order + if (tga_comp >= 3 && !tga_rgb16) + { + unsigned char* tga_pixel = tga_data; + for (i=0; i < tga_width * tga_height; ++i) + { + unsigned char temp = tga_pixel[0]; + tga_pixel[0] = tga_pixel[2]; + tga_pixel[2] = temp; + tga_pixel += tga_comp; + } + } + + // convert to target component count + if (req_comp && req_comp != tga_comp) + tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height); + + // the things I do to get rid of an error message, and yet keep + // Microsoft's C compilers happy... [8^( + tga_palette_start = tga_palette_len = tga_palette_bits = + tga_x_origin = tga_y_origin = 0; + // OK, done + return tga_data; +} +#endif + +// ************************************************************************************************* +// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s) +{ + int r = (stbi__get32be(s) == 0x38425053); + stbi__rewind(s); + return r; +} + +static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount) +{ + int count, nleft, len; + + count = 0; + while ((nleft = pixelCount - count) > 0) { + len = stbi__get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + if (len > nleft) return 0; // corrupt data + count += len; + while (len) { + *p = stbi__get8(s); + p += 4; + len--; + } + } else if (len > 128) { + stbi_uc val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len = 257 - len; + if (len > nleft) return 0; // corrupt data + val = stbi__get8(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } + } + + return 1; +} + +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + int pixelCount; + int channelCount, compression; + int channel, i; + int bitdepth; + int w,h; + stbi_uc *out; + STBI_NOTUSED(ri); + + // Check identifier + if (stbi__get32be(s) != 0x38425053) // "8BPS" + return stbi__errpuc("not PSD", "Corrupt PSD image"); + + // Check file type version. + if (stbi__get16be(s) != 1) + return stbi__errpuc("wrong version", "Unsupported version of PSD image"); + + // Skip 6 reserved bytes. + stbi__skip(s, 6 ); + + // Read the number of channels (R, G, B, A, etc). + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) + return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image"); + + // Read the rows and columns of the image. + h = stbi__get32be(s); + w = stbi__get32be(s); + + // Make sure the depth is 8 bits. + bitdepth = stbi__get16be(s); + if (bitdepth != 8 && bitdepth != 16) + return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit"); + + // Make sure the color mode is RGB. + // Valid options are: + // 0: Bitmap + // 1: Grayscale + // 2: Indexed color + // 3: RGB color + // 4: CMYK color + // 7: Multichannel + // 8: Duotone + // 9: Lab color + if (stbi__get16be(s) != 3) + return stbi__errpuc("wrong color format", "PSD is not in RGB color format"); + + // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) + stbi__skip(s,stbi__get32be(s) ); + + // Skip the image resources. (resolution, pen tool paths, etc) + stbi__skip(s, stbi__get32be(s) ); + + // Skip the reserved data. + stbi__skip(s, stbi__get32be(s) ); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + compression = stbi__get16be(s); + if (compression > 1) + return stbi__errpuc("bad compression", "PSD has an unknown compression format"); + + // Check size + if (!stbi__mad3sizes_valid(4, w, h, 0)) + return stbi__errpuc("too large", "Corrupt PSD"); + + // Create the destination image. + + if (!compression && bitdepth == 16 && bpc == 16) { + out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); + ri->bits_per_channel = 16; + } else + out = (stbi_uc *) stbi__malloc(4 * w*h); + + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + pixelCount = w*h; + + // Initialize the data to zero. + //memset( out, 0, pixelCount * 4 ); + + // Finally, the image data. + if (compression) { + // RLE as used by .PSD and .TIFF + // Loop until you get the number of unpacked bytes you are expecting: + // Read the next source byte into n. + // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. + // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. + // Else if n is 128, noop. + // Endloop + + // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data, + // which we're going to just skip. + stbi__skip(s, h * channelCount * 2 ); + + // Read the RLE data by channel. + for (channel = 0; channel < 4; channel++) { + stbi_uc *p; + + p = out+channel; + if (channel >= channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++, p += 4) + *p = (channel == 3 ? 255 : 0); + } else { + // Read the RLE data. + if (!stbi__psd_decode_rle(s, p, pixelCount)) { + STBI_FREE(out); + return stbi__errpuc("corrupt", "bad RLE data"); + } + } + } + + } else { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. + + // Read the data by channel. + for (channel = 0; channel < 4; channel++) { + if (channel >= channelCount) { + // Fill this channel with default data. + if (bitdepth == 16 && bpc == 16) { + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + stbi__uint16 val = channel == 3 ? 65535 : 0; + for (i = 0; i < pixelCount; i++, q += 4) + *q = val; + } else { + stbi_uc *p = out+channel; + stbi_uc val = channel == 3 ? 255 : 0; + for (i = 0; i < pixelCount; i++, p += 4) + *p = val; + } + } else { + if (ri->bits_per_channel == 16) { // output bpc + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + for (i = 0; i < pixelCount; i++, q += 4) + *q = (stbi__uint16) stbi__get16be(s); + } else { + stbi_uc *p = out+channel; + if (bitdepth == 16) { // input bpc + for (i = 0; i < pixelCount; i++, p += 4) + *p = (stbi_uc) (stbi__get16be(s) >> 8); + } else { + for (i = 0; i < pixelCount; i++, p += 4) + *p = stbi__get8(s); + } + } + } + } + } + + // remove weird white matte from PSD + if (channelCount >= 4) { + if (ri->bits_per_channel == 16) { + for (i=0; i < w*h; ++i) { + stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; + if (pixel[3] != 0 && pixel[3] != 65535) { + float a = pixel[3] / 65535.0f; + float ra = 1.0f / a; + float inv_a = 65535.0f * (1 - ra); + pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); + pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); + pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); + } + } + } else { + for (i=0; i < w*h; ++i) { + unsigned char *pixel = out + 4*i; + if (pixel[3] != 0 && pixel[3] != 255) { + float a = pixel[3] / 255.0f; + float ra = 1.0f / a; + float inv_a = 255.0f * (1 - ra); + pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); + pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); + pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); + } + } + } + } + + // convert to desired output format + if (req_comp && req_comp != 4) { + if (ri->bits_per_channel == 16) + out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); + else + out = stbi__convert_format(out, 4, req_comp, w, h); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + if (comp) *comp = 4; + *y = h; + *x = w; + + return out; +} +#endif + +// ************************************************************************************************* +// Softimage PIC loader +// by Tom Seddon +// +// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format +// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/ + +#ifndef STBI_NO_PIC +static int stbi__pic_is4(stbi__context *s,const char *str) +{ + int i; + for (i=0; i<4; ++i) + if (stbi__get8(s) != (stbi_uc)str[i]) + return 0; + + return 1; +} + +static int stbi__pic_test_core(stbi__context *s) +{ + int i; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) + return 0; + + for(i=0;i<84;++i) + stbi__get8(s); + + if (!stbi__pic_is4(s,"PICT")) + return 0; + + return 1; +} + +typedef struct +{ + stbi_uc size,type,channel; +} stbi__pic_packet; + +static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest) +{ + int mask=0x80, i; + + for (i=0; i<4; ++i, mask>>=1) { + if (channel & mask) { + if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short"); + dest[i]=stbi__get8(s); + } + } + + return dest; +} + +static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src) +{ + int mask=0x80,i; + + for (i=0;i<4; ++i, mask>>=1) + if (channel&mask) + dest[i]=src[i]; +} + +static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result) +{ + int act_comp=0,num_packets=0,y,chained; + stbi__pic_packet packets[10]; + + // this will (should...) cater for even some bizarre stuff like having data + // for the same channel in multiple packets. + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return stbi__errpuc("bad format","too many packets"); + + packet = &packets[num_packets++]; + + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + + act_comp |= packet->channel; + + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)"); + if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp"); + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? + + for(y=0; ytype) { + default: + return stbi__errpuc("bad format","packet has bad compression type"); + + case 0: {//uncompressed + int x; + + for(x=0;xchannel,dest)) + return 0; + break; + } + + case 1://Pure RLE + { + int left=width, i; + + while (left>0) { + stbi_uc count,value[4]; + + count=stbi__get8(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)"); + + if (count > left) + count = (stbi_uc) left; + + if (!stbi__readval(s,packet->channel,value)) return 0; + + for(i=0; ichannel,dest,value); + left -= count; + } + } + break; + + case 2: {//Mixed RLE + int left=width; + while (left>0) { + int count = stbi__get8(s), i; + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)"); + + if (count >= 128) { // Repeated + stbi_uc value[4]; + + if (count==128) + count = stbi__get16be(s); + else + count -= 127; + if (count > left) + return stbi__errpuc("bad file","scanline overrun"); + + if (!stbi__readval(s,packet->channel,value)) + return 0; + + for(i=0;ichannel,dest,value); + } else { // Raw + ++count; + if (count>left) return stbi__errpuc("bad file","scanline overrun"); + + for(i=0;ichannel,dest)) + return 0; + } + left-=count; + } + break; + } + } + } + } + + return result; +} + +static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri) +{ + stbi_uc *result; + int i, x,y, internal_comp; + STBI_NOTUSED(ri); + + if (!comp) comp = &internal_comp; + + for (i=0; i<92; ++i) + stbi__get8(s); + + x = stbi__get16be(s); + y = stbi__get16be(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)"); + if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode"); + + stbi__get32be(s); //skip `ratio' + stbi__get16be(s); //skip `fields' + stbi__get16be(s); //skip `pad' + + // intermediate buffer is RGBA + result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0); + memset(result, 0xff, x*y*4); + + if (!stbi__pic_load_core(s,x,y,comp, result)) { + STBI_FREE(result); + result=0; + } + *px = x; + *py = y; + if (req_comp == 0) req_comp = *comp; + result=stbi__convert_format(result,4,req_comp,x,y); + + return result; +} + +static int stbi__pic_test(stbi__context *s) +{ + int r = stbi__pic_test_core(s); + stbi__rewind(s); + return r; +} +#endif + +// ************************************************************************************************* +// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb + +#ifndef STBI_NO_GIF +typedef struct +{ + stbi__int16 prefix; + stbi_uc first; + stbi_uc suffix; +} stbi__gif_lzw; + +typedef struct +{ + int w,h; + stbi_uc *out; // output buffer (always 4 components) + stbi_uc *background; // The current "background" as far as a gif is concerned + stbi_uc *history; + int flags, bgindex, ratio, transparent, eflags; + stbi_uc pal[256][4]; + stbi_uc lpal[256][4]; + stbi__gif_lzw codes[8192]; + stbi_uc *color_table; + int parse, step; + int lflags; + int start_x, start_y; + int max_x, max_y; + int cur_x, cur_y; + int line_size; + int delay; +} stbi__gif; + +static int stbi__gif_test_raw(stbi__context *s) +{ + int sz; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0; + sz = stbi__get8(s); + if (sz != '9' && sz != '7') return 0; + if (stbi__get8(s) != 'a') return 0; + return 1; +} + +static int stbi__gif_test(stbi__context *s) +{ + int r = stbi__gif_test_raw(s); + stbi__rewind(s); + return r; +} + +static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp) +{ + int i; + for (i=0; i < num_entries; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + pal[i][3] = transp == i ? 0 : 255; + } +} + +static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info) +{ + stbi_uc version; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') + return stbi__err("not GIF", "Corrupt GIF"); + + version = stbi__get8(s); + if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF"); + if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF"); + + stbi__g_failure_reason = ""; + g->w = stbi__get16le(s); + g->h = stbi__get16le(s); + g->flags = stbi__get8(s); + g->bgindex = stbi__get8(s); + g->ratio = stbi__get8(s); + g->transparent = -1; + + if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments + + if (is_info) return 1; + + if (g->flags & 0x80) + stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); + + return 1; +} + +static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); + if (!stbi__gif_header(s, g, comp, 1)) { + STBI_FREE(g); + stbi__rewind( s ); + return 0; + } + if (x) *x = g->w; + if (y) *y = g->h; + STBI_FREE(g); + return 1; +} + +static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) +{ + stbi_uc *p, *c; + int idx; + + // recurse to decode the prefixes, since the linked-list is backwards, + // and working backwards through an interleaved image would be nasty + if (g->codes[code].prefix >= 0) + stbi__out_gif_code(g, g->codes[code].prefix); + + if (g->cur_y >= g->max_y) return; + + idx = g->cur_x + g->cur_y; + p = &g->out[idx]; + g->history[idx / 4] = 1; + + c = &g->color_table[g->codes[code].suffix * 4]; + if (c[3] > 128) { // don't render transparent pixels; + p[0] = c[2]; + p[1] = c[1]; + p[2] = c[0]; + p[3] = c[3]; + } + g->cur_x += 4; + + if (g->cur_x >= g->max_x) { + g->cur_x = g->start_x; + g->cur_y += g->step; + + while (g->cur_y >= g->max_y && g->parse > 0) { + g->step = (1 << g->parse) * g->line_size; + g->cur_y = g->start_y + (g->step >> 1); + --g->parse; + } + } +} + +static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) +{ + stbi_uc lzw_cs; + stbi__int32 len, init_code; + stbi__uint32 first; + stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear; + stbi__gif_lzw *p; + + lzw_cs = stbi__get8(s); + if (lzw_cs > 12) return NULL; + clear = 1 << lzw_cs; + first = 1; + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + bits = 0; + valid_bits = 0; + for (init_code = 0; init_code < clear; init_code++) { + g->codes[init_code].prefix = -1; + g->codes[init_code].first = (stbi_uc) init_code; + g->codes[init_code].suffix = (stbi_uc) init_code; + } + + // support no starting clear code + avail = clear+2; + oldcode = -1; + + len = 0; + for(;;) { + if (valid_bits < codesize) { + if (len == 0) { + len = stbi__get8(s); // start new block + if (len == 0) + return g->out; + } + --len; + bits |= (stbi__int32) stbi__get8(s) << valid_bits; + valid_bits += 8; + } else { + stbi__int32 code = bits & codemask; + bits >>= codesize; + valid_bits -= codesize; + // @OPTIMIZE: is there some way we can accelerate the non-clear path? + if (code == clear) { // clear code + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + avail = clear + 2; + oldcode = -1; + first = 0; + } else if (code == clear + 1) { // end of stream code + stbi__skip(s, len); + while ((len = stbi__get8(s)) > 0) + stbi__skip(s,len); + return g->out; + } else if (code <= avail) { + if (first) { + return stbi__errpuc("no clear code", "Corrupt GIF"); + } + + if (oldcode >= 0) { + p = &g->codes[avail++]; + if (avail > 8192) { + return stbi__errpuc("too many codes", "Corrupt GIF"); + } + + p->prefix = (stbi__int16) oldcode; + p->first = g->codes[oldcode].first; + p->suffix = (code == avail) ? p->first : g->codes[code].first; + } else if (code == avail) + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + + stbi__out_gif_code(g, (stbi__uint16) code); + + if ((avail & codemask) == 0 && avail <= 0x0FFF) { + codesize++; + codemask = (1 << codesize) - 1; + } + + oldcode = code; + } else { + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + } + } + } +} + +// this function is designed to support animated gifs, although stb_image doesn't support it +// two back is the image from two frames ago, used for a very specific disposal format +static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back) +{ + int dispose; + int first_frame; + int pi; + int pcount; + + // on first frame, any non-written pixels get the background colour (non-transparent) + first_frame = 0; + if (g->out == 0) { + if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header + g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h); + g->background = (stbi_uc *) stbi__malloc(4 * g->w * g->h); + g->history = (stbi_uc *) stbi__malloc(g->w * g->h); + if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory"); + + // image is treated as "tranparent" at the start - ie, nothing overwrites the current background; + // background colour is only used for pixels that are not rendered first frame, after that "background" + // color refers to teh color that was there the previous frame. + memset( g->out, 0x00, 4 * g->w * g->h ); + memset( g->background, 0x00, 4 * g->w * g->h ); // state of the background (starts transparent) + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + first_frame = 1; + } else { + // second frame - how do we dispoase of the previous one? + dispose = (g->eflags & 0x1C) >> 2; + pcount = g->w * g->h; + + if ((dispose == 3) && (two_back == 0)) { + dispose = 2; // if I don't have an image to revert back to, default to the old background + } + + if (dispose == 3) { // use previous graphic + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 ); + } + } + } else if (dispose == 2) { + // restore what was changed last frame to background before that frame; + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 ); + } + } + } else { + // This is a non-disposal case eithe way, so just + // leave the pixels as is, and they will become the new background + // 1: do not dispose + // 0: not specified. + } + + // background is what out is after the undoing of the previou frame; + memcpy( g->background, g->out, 4 * g->w * g->h ); + } + + // clear my history; + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + + for (;;) { + int tag = stbi__get8(s); + switch (tag) { + case 0x2C: /* Image Descriptor */ + { + stbi__int32 x, y, w, h; + stbi_uc *o; + + x = stbi__get16le(s); + y = stbi__get16le(s); + w = stbi__get16le(s); + h = stbi__get16le(s); + if (((x + w) > (g->w)) || ((y + h) > (g->h))) + return stbi__errpuc("bad Image Descriptor", "Corrupt GIF"); + + g->line_size = g->w * 4; + g->start_x = x * 4; + g->start_y = y * g->line_size; + g->max_x = g->start_x + w * 4; + g->max_y = g->start_y + h * g->line_size; + g->cur_x = g->start_x; + g->cur_y = g->start_y; + + g->lflags = stbi__get8(s); + + if (g->lflags & 0x40) { + g->step = 8 * g->line_size; // first interlaced spacing + g->parse = 3; + } else { + g->step = g->line_size; + g->parse = 0; + } + + if (g->lflags & 0x80) { + stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); + g->color_table = (stbi_uc *) g->lpal; + } else if (g->flags & 0x80) { + g->color_table = (stbi_uc *) g->pal; + } else + return stbi__errpuc("missing color table", "Corrupt GIF"); + + o = stbi__process_gif_raster(s, g); + if (o == NULL) return NULL; + + // if this was the first frame, + pcount = g->w * g->h; + if (first_frame && (g->bgindex > 0)) { + // if first frame, any pixel not drawn to gets the background color + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi] == 0) { + g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; + memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 ); + } + } + } + + return o; + } + + case 0x21: // Comment Extension. + { + int len; + int ext = stbi__get8(s); + if (ext == 0xF9) { // Graphic Control Extension. + len = stbi__get8(s); + if (len == 4) { + g->eflags = stbi__get8(s); + g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths. + + // unset old transparent + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 255; + } + if (g->eflags & 0x01) { + g->transparent = stbi__get8(s); + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 0; + } + } else { + // don't need transparent + stbi__skip(s, 1); + g->transparent = -1; + } + } else { + stbi__skip(s, len); + break; + } + } + while ((len = stbi__get8(s)) != 0) { + stbi__skip(s, len); + } + break; + } + + case 0x3B: // gif stream termination code + return (stbi_uc *) s; // using '1' causes warning on some compilers + + default: + return stbi__errpuc("unknown code", "Corrupt GIF"); + } + } +} + +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + if (stbi__gif_test(s)) { + int layers = 0; + stbi_uc *u = 0; + stbi_uc *out = 0; + stbi_uc *two_back = 0; + stbi__gif g; + int stride; + memset(&g, 0, sizeof(g)); + if (delays) { + *delays = 0; + } + + do { + u = stbi__gif_load_next(s, &g, comp, req_comp, two_back); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + + if (u) { + *x = g.w; + *y = g.h; + ++layers; + stride = g.w * g.h * 4; + + if (out) { + out = (stbi_uc*) STBI_REALLOC( out, layers * stride ); + if (delays) { + *delays = (int*) STBI_REALLOC( *delays, sizeof(int) * layers ); + } + } else { + out = (stbi_uc*)stbi__malloc( layers * stride ); + if (delays) { + *delays = (int*) stbi__malloc( layers * sizeof(int) ); + } + } + memcpy( out + ((layers - 1) * stride), u, stride ); + if (layers >= 2) { + two_back = out - 2 * stride; + } + + if (delays) { + (*delays)[layers - 1U] = g.delay; + } + } + } while (u != 0); + + // free temp buffer; + STBI_FREE(g.out); + STBI_FREE(g.history); + STBI_FREE(g.background); + + // do the final conversion after loading everything; + if (req_comp && req_comp != 4) + out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h); + + *z = layers; + return out; + } else { + return stbi__errpuc("not GIF", "Image was not as a gif type."); + } +} + +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *u = 0; + stbi__gif g; + memset(&g, 0, sizeof(g)); + + u = stbi__gif_load_next(s, &g, comp, req_comp, 0); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + if (u) { + *x = g.w; + *y = g.h; + + // moved conversion to after successful load so that the same + // can be done for multiple frames. + if (req_comp && req_comp != 4) + u = stbi__convert_format(u, 4, req_comp, g.w, g.h); + } + + // free buffers needed for multiple frame loading; + STBI_FREE(g.history); + STBI_FREE(g.background); + + return u; +} + +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp) +{ + return stbi__gif_info_raw(s,x,y,comp); +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR loader +// originally by Nicolas Schulz +#ifndef STBI_NO_HDR +static int stbi__hdr_test_core(stbi__context *s, const char *signature) +{ + int i; + for (i=0; signature[i]; ++i) + if (stbi__get8(s) != signature[i]) + return 0; + stbi__rewind(s); + return 1; +} + +static int stbi__hdr_test(stbi__context* s) +{ + int r = stbi__hdr_test_core(s, "#?RADIANCE\n"); + stbi__rewind(s); + if(!r) { + r = stbi__hdr_test_core(s, "#?RGBE\n"); + stbi__rewind(s); + } + return r; +} + +#define STBI__HDR_BUFLEN 1024 +static char *stbi__hdr_gettoken(stbi__context *z, char *buffer) +{ + int len=0; + char c = '\0'; + + c = (char) stbi__get8(z); + + while (!stbi__at_eof(z) && c != '\n') { + buffer[len++] = c; + if (len == STBI__HDR_BUFLEN-1) { + // flush to end of line + while (!stbi__at_eof(z) && stbi__get8(z) != '\n') + ; + break; + } + c = (char) stbi__get8(z); + } + + buffer[len] = 0; + return buffer; +} + +static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp) +{ + if ( input[3] != 0 ) { + float f1; + // Exponent + f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); + if (req_comp <= 2) + output[0] = (input[0] + input[1] + input[2]) * f1 / 3; + else { + output[0] = input[0] * f1; + output[1] = input[1] * f1; + output[2] = input[2] * f1; + } + if (req_comp == 2) output[1] = 1; + if (req_comp == 4) output[3] = 1; + } else { + switch (req_comp) { + case 4: output[3] = 1; /* fallthrough */ + case 3: output[0] = output[1] = output[2] = 0; + break; + case 2: output[1] = 1; /* fallthrough */ + case 1: output[0] = 0; + break; + } + } +} + +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int width, height; + stbi_uc *scanline; + float *hdr_data; + int len; + unsigned char count, value; + int i, j, k, c1,c2, z; + const char *headerToken; + STBI_NOTUSED(ri); + + // Check identifier + headerToken = stbi__hdr_gettoken(s,buffer); + if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0) + return stbi__errpf("not HDR", "Corrupt HDR image"); + + // Parse header + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format"); + + // Parse width and height + // can't use sscanf() if we're not using stdio! + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + height = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + width = (int) strtol(token, NULL, 10); + + *x = width; + *y = height; + + if (comp) *comp = 3; + if (req_comp == 0) req_comp = 3; + + if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) + return stbi__errpf("too large", "HDR image is too large"); + + // Read data + hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0); + if (!hdr_data) + return stbi__errpf("outofmem", "Out of memory"); + + // Load image data + // image data is stored as some number of sca + if ( width < 8 || width >= 32768) { + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + stbi_uc rgbe[4]; + main_decode_loop: + stbi__getn(s, rgbe, 4); + stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); + } + } + } else { + // Read RLE-encoded data + scanline = NULL; + + for (j = 0; j < height; ++j) { + c1 = stbi__get8(s); + c2 = stbi__get8(s); + len = stbi__get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + stbi_uc rgbe[4]; + rgbe[0] = (stbi_uc) c1; + rgbe[1] = (stbi_uc) c2; + rgbe[2] = (stbi_uc) len; + rgbe[3] = (stbi_uc) stbi__get8(s); + stbi__hdr_convert(hdr_data, rgbe, req_comp); + i = 1; + j = 0; + STBI_FREE(scanline); + goto main_decode_loop; // yes, this makes no sense + } + len <<= 8; + len |= stbi__get8(s); + if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } + if (scanline == NULL) { + scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); + if (!scanline) { + STBI_FREE(hdr_data); + return stbi__errpf("outofmem", "Out of memory"); + } + } + + for (k = 0; k < 4; ++k) { + int nleft; + i = 0; + while ((nleft = width - i) > 0) { + count = stbi__get8(s); + if (count > 128) { + // Run + value = stbi__get8(s); + count -= 128; + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = stbi__get8(s); + } + } + } + for (i=0; i < width; ++i) + stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); + } + if (scanline) + STBI_FREE(scanline); + } + + return hdr_data; +} + +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int dummy; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (stbi__hdr_test(s) == 0) { + stbi__rewind( s ); + return 0; + } + + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) { + stbi__rewind( s ); + return 0; + } + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *y = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *x = (int) strtol(token, NULL, 10); + *comp = 3; + return 1; +} +#endif // STBI_NO_HDR + +#ifndef STBI_NO_BMP +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) +{ + void *p; + stbi__bmp_data info; + + info.all_a = 255; + p = stbi__bmp_parse_header(s, &info); + stbi__rewind( s ); + if (p == NULL) + return 0; + if (x) *x = s->img_x; + if (y) *y = s->img_y; + if (comp) *comp = info.ma ? 4 : 3; + return 1; +} +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) +{ + int channelCount, dummy, depth; + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + *y = stbi__get32be(s); + *x = stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 8 && depth != 16) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 3) { + stbi__rewind( s ); + return 0; + } + *comp = 4; + return 1; +} + +static int stbi__psd_is16(stbi__context *s) +{ + int channelCount, depth; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + (void) stbi__get32be(s); + (void) stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 16) { + stbi__rewind( s ); + return 0; + } + return 1; +} +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) +{ + int act_comp=0,num_packets=0,chained,dummy; + stbi__pic_packet packets[10]; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) { + stbi__rewind(s); + return 0; + } + + stbi__skip(s, 88); + + *x = stbi__get16be(s); + *y = stbi__get16be(s); + if (stbi__at_eof(s)) { + stbi__rewind( s); + return 0; + } + if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) { + stbi__rewind( s ); + return 0; + } + + stbi__skip(s, 8); + + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return 0; + + packet = &packets[num_packets++]; + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + act_comp |= packet->channel; + + if (stbi__at_eof(s)) { + stbi__rewind( s ); + return 0; + } + if (packet->size != 8) { + stbi__rewind( s ); + return 0; + } + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); + + return 1; +} +#endif + +// ************************************************************************************************* +// Portable Gray Map and Portable Pixel Map loader +// by Ken Miller +// +// PGM: http://netpbm.sourceforge.net/doc/pgm.html +// PPM: http://netpbm.sourceforge.net/doc/ppm.html +// +// Known limitations: +// Does not support comments in the header section +// Does not support ASCII image data (formats P2 and P3) +// Does not support 16-bit-per-channel + +#ifndef STBI_NO_PNM + +static int stbi__pnm_test(stbi__context *s) +{ + char p, t; + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind( s ); + return 0; + } + return 1; +} + +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + STBI_NOTUSED(ri); + + if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n)) + return 0; + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + + if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "PNM too large"); + + out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + stbi__getn(s, out, s->img_n * s->img_x * s->img_y); + + if (req_comp && req_comp != s->img_n) { + out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + return out; +} + +static int stbi__pnm_isspace(char c) +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; +} + +static void stbi__pnm_skip_whitespace(stbi__context *s, char *c) +{ + for (;;) { + while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) + *c = (char) stbi__get8(s); + + if (stbi__at_eof(s) || *c != '#') + break; + + while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' ) + *c = (char) stbi__get8(s); + } +} + +static int stbi__pnm_isdigit(char c) +{ + return c >= '0' && c <= '9'; +} + +static int stbi__pnm_getinteger(stbi__context *s, char *c) +{ + int value = 0; + + while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { + value = value*10 + (*c - '0'); + *c = (char) stbi__get8(s); + } + + return value; +} + +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) +{ + int maxv, dummy; + char c, p, t; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + stbi__rewind(s); + + // Get identifier + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind(s); + return 0; + } + + *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm + + c = (char) stbi__get8(s); + stbi__pnm_skip_whitespace(s, &c); + + *x = stbi__pnm_getinteger(s, &c); // read width + stbi__pnm_skip_whitespace(s, &c); + + *y = stbi__pnm_getinteger(s, &c); // read height + stbi__pnm_skip_whitespace(s, &c); + + maxv = stbi__pnm_getinteger(s, &c); // read max value + + if (maxv > 255) + return stbi__err("max value > 255", "PPM image not 8-bit"); + else + return 1; +} +#endif + +static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) +{ + #ifndef STBI_NO_JPEG + if (stbi__jpeg_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNG + if (stbi__png_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_GIF + if (stbi__gif_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_BMP + if (stbi__bmp_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PIC + if (stbi__pic_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNM + if (stbi__pnm_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_info(s, x, y, comp)) return 1; + #endif + + // test tga last because it's a crappy test! + #ifndef STBI_NO_TGA + if (stbi__tga_info(s, x, y, comp)) + return 1; + #endif + return stbi__err("unknown image type", "Image not of any known type, or corrupt"); +} + +static int stbi__is_16_main(stbi__context *s) +{ + #ifndef STBI_NO_PNG + if (stbi__png_is16(s)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_is16(s)) return 1; + #endif + + return 0; +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_info_from_file(f, x, y, comp); + fclose(f); + return result; +} + +STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__info_main(&s,x,y,comp); + fseek(f,pos,SEEK_SET); + return r; +} + +STBIDEF int stbi_is_16_bit(char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_is_16_bit_from_file(f); + fclose(f); + return result; +} + +STBIDEF int stbi_is_16_bit_from_file(FILE *f) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__is_16_main(&s); + fseek(f,pos,SEEK_SET); + return r; +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__is_16_main(&s); +} + +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__is_16_main(&s); +} + +#endif // STB_IMAGE_IMPLEMENTATION + +/* + revision history: + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug + 1-bit BMP + *_is_16_bit api + avoid warnings + 2.16 (2017-07-23) all functions have 16-bit variants; + STBI_NO_STDIO works again; + compilation fixes; + fix rounding in unpremultiply; + optimize vertical flip; + disable raw_len validation; + documentation fixes + 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; + warning fixes; disable run-time SSE detection on gcc; + uniform handling of optional "return" values; + thread-safe initialization of zlib tables + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) allocate large structures on the stack + remove white matting for transparent PSD + fix reported channel count for PNG & BMP + re-enable SSE2 in non-gcc 64-bit + support RGB-formatted JPEG + read 16-bit PNGs (only as 8-bit) + 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED + 2.09 (2016-01-16) allow comments in PNM files + 16-bit-per-pixel TGA (not bit-per-component) + info() for TGA could break due to .hdr handling + info() for BMP to shares code instead of sloppy parse + can use STBI_REALLOC_SIZED if allocator doesn't support realloc + code cleanup + 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA + 2.07 (2015-09-13) fix compiler warnings + partial animated GIF support + limited 16-bpc PSD support + #ifdef unused functions + bug with < 92 byte PIC,PNM,HDR,TGA + 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value + 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning + 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit + 2.03 (2015-04-12) extra corruption checking (mmozeiko) + stbi_set_flip_vertically_on_load (nguillemot) + fix NEON support; fix mingw support + 2.02 (2015-01-19) fix incorrect assert, fix warning + 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2 + 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG + 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) + progressive JPEG (stb) + PGM/PPM support (Ken Miller) + STBI_MALLOC,STBI_REALLOC,STBI_FREE + GIF bugfix -- seemingly never worked + STBI_NO_*, STBI_ONLY_* + 1.48 (2014-12-14) fix incorrectly-named assert() + 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb) + optimize PNG (ryg) + fix bug in interlaced PNG with user-specified channel count (stb) + 1.46 (2014-08-26) + fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG + 1.45 (2014-08-16) + fix MSVC-ARM internal compiler error by wrapping malloc + 1.44 (2014-08-07) + various warning fixes from Ronny Chevalier + 1.43 (2014-07-15) + fix MSVC-only compiler problem in code changed in 1.42 + 1.42 (2014-07-09) + don't define _CRT_SECURE_NO_WARNINGS (affects user code) + fixes to stbi__cleanup_jpeg path + added STBI_ASSERT to avoid requiring assert.h + 1.41 (2014-06-25) + fix search&replace from 1.36 that messed up comments/error messages + 1.40 (2014-06-22) + fix gcc struct-initialization warning + 1.39 (2014-06-15) + fix to TGA optimization when req_comp != number of components in TGA; + fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite) + add support for BMP version 5 (more ignored fields) + 1.38 (2014-06-06) + suppress MSVC warnings on integer casts truncating values + fix accidental rename of 'skip' field of I/O + 1.37 (2014-06-04) + remove duplicate typedef + 1.36 (2014-06-03) + convert to header file single-file library + if de-iphone isn't set, load iphone images color-swapped instead of returning NULL + 1.35 (2014-05-27) + various warnings + fix broken STBI_SIMD path + fix bug where stbi_load_from_file no longer left file pointer in correct place + fix broken non-easy path for 32-bit BMP (possibly never used) + TGA optimization by Arseny Kapoulkine + 1.34 (unknown) + use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case + 1.33 (2011-07-14) + make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements + 1.32 (2011-07-13) + support for "info" function for all supported filetypes (SpartanJ) + 1.31 (2011-06-20) + a few more leak fixes, bug in PNG handling (SpartanJ) + 1.30 (2011-06-11) + added ability to load files via callbacks to accomidate custom input streams (Ben Wenger) + removed deprecated format-specific test/load functions + removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway + error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) + fix inefficiency in decoding 32-bit BMP (David Woo) + 1.29 (2010-08-16) + various warning fixes from Aurelien Pocheville + 1.28 (2010-08-01) + fix bug in GIF palette transparency (SpartanJ) + 1.27 (2010-08-01) + cast-to-stbi_uc to fix warnings + 1.26 (2010-07-24) + fix bug in file buffering for PNG reported by SpartanJ + 1.25 (2010-07-17) + refix trans_data warning (Won Chun) + 1.24 (2010-07-12) + perf improvements reading from files on platforms with lock-heavy fgetc() + minor perf improvements for jpeg + deprecated type-specific functions so we'll get feedback if they're needed + attempt to fix trans_data warning (Won Chun) + 1.23 fixed bug in iPhone support + 1.22 (2010-07-10) + removed image *writing* support + stbi_info support from Jetro Lauha + GIF support from Jean-Marc Lienher + iPhone PNG-extensions from James Brown + warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva) + 1.21 fix use of 'stbi_uc' in header (reported by jon blow) + 1.20 added support for Softimage PIC, by Tom Seddon + 1.19 bug in interlaced PNG corruption check (found by ryg) + 1.18 (2008-08-02) + fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - stbi__convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug + header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) + 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz + 1.07 attempt to fix C++ warning/errors again + 1.06 attempt to fix C++ warning/errors again + 1.05 fix TGA loading to return correct *comp and use good luminance calc + 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free + 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR + 1.02 support for (subset of) HDR files, float interface for preferred access to them + 1.01 fix bug: possible bug in handling right-side up bmps... not sure + fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all + 1.00 interface to zlib that skips zlib header + 0.99 correct handling of alpha in palette + 0.98 TGA loader by lonesock; dynamically add loaders (untested) + 0.97 jpeg errors on too large a file; also catch another malloc failure + 0.96 fix detection of invalid v value - particleman@mollyrocket forum + 0.95 during header scan, seek to markers in case of padding + 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same + 0.93 handle jpegtran output; verbose errors + 0.92 read 4,8,16,24,32-bit BMP files of several formats + 0.91 output 24-bit Windows 3.0 BMP files + 0.90 fix a few more warnings; bump version number to approach 1.0 + 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd + 0.60 fix compiling as c++ + 0.59 fix warnings: merge Dave Moore's -Wall fixes + 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian + 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available + 0.56 fix bug: zlib uncompressed mode len vs. nlen + 0.55 fix bug: restart_interval not initialized to 0 + 0.54 allow NULL for 'int *comp' + 0.53 fix bug in png 3->4; speedup png decoding + 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments + 0.51 obey req_comp requests, 1-component jpegs return as 1-component, + on 'test' only check type, not whether we support this variant + 0.50 (2006-11-19) + first released version +*/ + + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/workloads/realworld/uvm/darknet/src/stb_image_write.h b/workloads/realworld/uvm/darknet/src/stb_image_write.h new file mode 100644 index 0000000000000000000000000000000000000000..c05e95812b96232abd3617f98255832cc3fe4716 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/stb_image_write.h @@ -0,0 +1,1568 @@ +/* stb_image_write - v1.09 - public domain - http://nothings.org/stb/stb_image_write.h + writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 + no warranty implied; use at your own risk + + Before #including, + + #define STB_IMAGE_WRITE_IMPLEMENTATION + + in the file that you want to have the implementation. + + Will probably not work correctly with strict-aliasing optimizations. + + If using a modern Microsoft Compiler, non-safe versions of CRT calls may cause + compilation warnings or even errors. To avoid this, also before #including, + + #define STBI_MSC_SECURE_CRT + +ABOUT: + + This header file is a library for writing images to C stdio. It could be + adapted to write to memory or a general streaming interface; let me know. + + The PNG output is not optimal; it is 20-50% larger than the file + written by a decent optimizing implementation; though providing a custom + zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that. + This library is designed for source code compactness and simplicity, + not optimal image file size or run-time performance. + +BUILDING: + + You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h. + You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace + malloc,realloc,free. + You can #define STBIW_MEMMOVE() to replace memmove() + You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function + for PNG compression (instead of the builtin one), it must have the following signature: + unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality); + The returned data will be freed with STBIW_FREE() (free() by default), + so it must be heap allocated with STBIW_MALLOC() (malloc() by default), + +USAGE: + + There are five functions, one for each image file format: + + int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality); + int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); + + void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically + + There are also five equivalent functions that use an arbitrary write function. You are + expected to open/close your file-equivalent before and after calling these: + + int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); + int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + + where the callback is: + void stbi_write_func(void *context, void *data, int size); + + You can configure it with these global variables: + int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE + int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression + int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode + + + You can define STBI_WRITE_NO_STDIO to disable the file variant of these + functions, so the library will not use stdio.h at all. However, this will + also disable HDR writing, because it requires stdio for formatted output. + + Each function returns 0 on failure and non-0 on success. + + The functions create an image file defined by the parameters. The image + is a rectangle of pixels stored from left-to-right, top-to-bottom. + Each pixel contains 'comp' channels of data stored interleaved with 8-bits + per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is + monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall. + The *data pointer points to the first byte of the top-left-most pixel. + For PNG, "stride_in_bytes" is the distance in bytes from the first byte of + a row of pixels to the first byte of the next row of pixels. + + PNG creates output files with the same number of components as the input. + The BMP format expands Y to RGB in the file format and does not + output alpha. + + PNG supports writing rectangles of data even when the bytes storing rows of + data are not consecutive in memory (e.g. sub-rectangles of a larger image), + by supplying the stride between the beginning of adjacent rows. The other + formats do not. (Thus you cannot write a native-format BMP through the BMP + writer, both because it is in BGR order and because it may have padding + at the end of the line.) + + PNG allows you to set the deflate compression level by setting the global + variable 'stbi_write_png_compression_level' (it defaults to 8). + + HDR expects linear float data. Since the format is always 32-bit rgb(e) + data, alpha (if provided) is discarded, and for monochrome data it is + replicated across all three channels. + + TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed + data, set the global variable 'stbi_write_tga_with_rle' to 0. + + JPEG does ignore alpha channels in input data; quality is between 1 and 100. + Higher quality looks better but results in a bigger image. + JPEG baseline (no JPEG progressive). + +CREDITS: + + + Sean Barrett - PNG/BMP/TGA + Baldur Karlsson - HDR + Jean-Sebastien Guay - TGA monochrome + Tim Kelsey - misc enhancements + Alan Hickman - TGA RLE + Emmanuel Julien - initial file IO callback implementation + Jon Olick - original jo_jpeg.cpp code + Daniel Gibson - integrate JPEG, allow external zlib + Aarni Koskela - allow choosing PNG filter + + bugfixes: + github:Chribba + Guillaume Chereau + github:jry2 + github:romigrou + Sergio Gonzalez + Jonas Karlsson + Filip Wasil + Thatcher Ulrich + github:poppolopoppo + Patrick Boettcher + github:xeekworx + Cap Petschulat + Simon Rodriguez + Ivan Tikhonov + github:ignotion + Adam Schackart + +LICENSE + + See end of file for license information. + +*/ + +#ifndef INCLUDE_STB_IMAGE_WRITE_H +#define INCLUDE_STB_IMAGE_WRITE_H + +// if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline' or 'static inline' +#ifndef STBIWDEF +#ifdef STB_IMAGE_WRITE_STATIC +#define STBIWDEF static +#else +#ifdef __cplusplus +#define STBIWDEF extern "C" +#else +#define STBIWDEF extern +#endif +#endif +#endif + +#ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations +extern int stbi_write_tga_with_rle; +extern int stbi_write_png_compression_level; +extern int stbi_write_force_png_filter; +#endif + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality); +#endif + +typedef void stbi_write_func(void *context, void *data, int size); + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + +STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); + +#endif//INCLUDE_STB_IMAGE_WRITE_H + +#ifdef STB_IMAGE_WRITE_IMPLEMENTATION + +#ifdef _WIN32 + #ifndef _CRT_SECURE_NO_WARNINGS + #define _CRT_SECURE_NO_WARNINGS + #endif + #ifndef _CRT_NONSTDC_NO_DEPRECATE + #define _CRT_NONSTDC_NO_DEPRECATE + #endif +#endif + +#ifndef STBI_WRITE_NO_STDIO +#include +#endif // STBI_WRITE_NO_STDIO + +#include +#include +#include +#include + +#if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED)) +// ok +#elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)." +#endif + +#ifndef STBIW_MALLOC +#define STBIW_MALLOC(sz) malloc(sz) +#define STBIW_REALLOC(p,newsz) realloc(p,newsz) +#define STBIW_FREE(p) free(p) +#endif + +#ifndef STBIW_REALLOC_SIZED +#define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz) +#endif + + +#ifndef STBIW_MEMMOVE +#define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz) +#endif + + +#ifndef STBIW_ASSERT +#include +#define STBIW_ASSERT(x) assert(x) +#endif + +#define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff) + +#ifdef STB_IMAGE_WRITE_STATIC +static int stbi__flip_vertically_on_write=0; +static int stbi_write_png_compression_level = 8; +static int stbi_write_tga_with_rle = 1; +static int stbi_write_force_png_filter = -1; +#else +int stbi_write_png_compression_level = 8; +int stbi__flip_vertically_on_write=0; +int stbi_write_tga_with_rle = 1; +int stbi_write_force_png_filter = -1; +#endif + +STBIWDEF void stbi_flip_vertically_on_write(int flag) +{ + stbi__flip_vertically_on_write = flag; +} + +typedef struct +{ + stbi_write_func *func; + void *context; +} stbi__write_context; + +// initialize a callback-based context +static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context) +{ + s->func = c; + s->context = context; +} + +#ifndef STBI_WRITE_NO_STDIO + +static void stbi__stdio_write(void *context, void *data, int size) +{ + fwrite(data,1,size,(FILE*) context); +} + +static int stbi__start_write_file(stbi__write_context *s, const char *filename) +{ + FILE *f; +#ifdef STBI_MSC_SECURE_CRT + if (fopen_s(&f, filename, "wb")) + f = NULL; +#else + f = fopen(filename, "wb"); +#endif + stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f); + return f != NULL; +} + +static void stbi__end_write_file(stbi__write_context *s) +{ + fclose((FILE *)s->context); +} + +#endif // !STBI_WRITE_NO_STDIO + +typedef unsigned int stbiw_uint32; +typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1]; + +static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) +{ + while (*fmt) { + switch (*fmt++) { + case ' ': break; + case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int)); + s->func(s->context,&x,1); + break; } + case '2': { int x = va_arg(v,int); + unsigned char b[2]; + b[0] = STBIW_UCHAR(x); + b[1] = STBIW_UCHAR(x>>8); + s->func(s->context,b,2); + break; } + case '4': { stbiw_uint32 x = va_arg(v,int); + unsigned char b[4]; + b[0]=STBIW_UCHAR(x); + b[1]=STBIW_UCHAR(x>>8); + b[2]=STBIW_UCHAR(x>>16); + b[3]=STBIW_UCHAR(x>>24); + s->func(s->context,b,4); + break; } + default: + STBIW_ASSERT(0); + return; + } + } +} + +static void stbiw__writef(stbi__write_context *s, const char *fmt, ...) +{ + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); +} + +static void stbiw__putc(stbi__write_context *s, unsigned char c) +{ + s->func(s->context, &c, 1); +} + +static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c) +{ + unsigned char arr[3]; + arr[0] = a, arr[1] = b, arr[2] = c; + s->func(s->context, arr, 3); +} + +static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d) +{ + unsigned char bg[3] = { 255, 0, 255}, px[3]; + int k; + + if (write_alpha < 0) + s->func(s->context, &d[comp - 1], 1); + + switch (comp) { + case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case + case 1: + if (expand_mono) + stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp + else + s->func(s->context, d, 1); // monochrome TGA + break; + case 4: + if (!write_alpha) { + // composite against pink background + for (k = 0; k < 3; ++k) + px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255; + stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]); + break; + } + /* FALLTHROUGH */ + case 3: + stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); + break; + } + if (write_alpha > 0) + s->func(s->context, &d[comp - 1], 1); +} + +static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) +{ + stbiw_uint32 zero = 0; + int i,j, j_end; + + if (y <= 0) + return; + + if (stbi__flip_vertically_on_write) + vdir *= -1; + + if (vdir < 0) + j_end = -1, j = y-1; + else + j_end = y, j = 0; + + for (; j != j_end; j += vdir) { + for (i=0; i < x; ++i) { + unsigned char *d = (unsigned char *) data + (j*x+i)*comp; + stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); + } + s->func(s->context, &zero, scanline_pad); + } +} + +static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...) +{ + if (y < 0 || x < 0) { + return 0; + } else { + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); + stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono); + return 1; + } +} + +static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data) +{ + int pad = (-x*3) & 3; + return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad, + "11 4 22 4" "4 44 22 444444", + 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header + 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header +} + +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_bmp_core(&s, x, y, comp, data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_bmp_core(&s, x, y, comp, data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif //!STBI_WRITE_NO_STDIO + +static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data) +{ + int has_alpha = (comp == 2 || comp == 4); + int colorbytes = has_alpha ? comp-1 : comp; + int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3 + + if (y < 0 || x < 0) + return 0; + + if (!stbi_write_tga_with_rle) { + return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0, + "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); + } else { + int i,j,k; + int jend, jdir; + + stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8); + + if (stbi__flip_vertically_on_write) { + j = 0; + jend = y; + jdir = 1; + } else { + j = y-1; + jend = -1; + jdir = -1; + } + for (; j != jend; j += jdir) { + unsigned char *row = (unsigned char *) data + j * x * comp; + int len; + + for (i = 0; i < x; i += len) { + unsigned char *begin = row + i * comp; + int diff = 1; + len = 1; + + if (i < x - 1) { + ++len; + diff = memcmp(begin, row + (i + 1) * comp, comp); + if (diff) { + const unsigned char *prev = begin; + for (k = i + 2; k < x && len < 128; ++k) { + if (memcmp(prev, row + k * comp, comp)) { + prev += comp; + ++len; + } else { + --len; + break; + } + } + } else { + for (k = i + 2; k < x && len < 128; ++k) { + if (!memcmp(begin, row + k * comp, comp)) { + ++len; + } else { + break; + } + } + } + } + + if (diff) { + unsigned char header = STBIW_UCHAR(len - 1); + s->func(s->context, &header, 1); + for (k = 0; k < len; ++k) { + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); + } + } else { + unsigned char header = STBIW_UCHAR(len - 129); + s->func(s->context, &header, 1); + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); + } + } + } + } + return 1; +} + +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_tga_core(&s, x, y, comp, (void *) data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_tga_core(&s, x, y, comp, (void *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR writer +// by Baldur Karlsson + +#define stbiw__max(a, b) ((a) > (b) ? (a) : (b)) + +void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear) +{ + int exponent; + float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2])); + + if (maxcomp < 1e-32f) { + rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; + } else { + float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp; + + rgbe[0] = (unsigned char)(linear[0] * normalize); + rgbe[1] = (unsigned char)(linear[1] * normalize); + rgbe[2] = (unsigned char)(linear[2] * normalize); + rgbe[3] = (unsigned char)(exponent + 128); + } +} + +void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte) +{ + unsigned char lengthbyte = STBIW_UCHAR(length+128); + STBIW_ASSERT(length+128 <= 255); + s->func(s->context, &lengthbyte, 1); + s->func(s->context, &databyte, 1); +} + +void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data) +{ + unsigned char lengthbyte = STBIW_UCHAR(length); + STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code + s->func(s->context, &lengthbyte, 1); + s->func(s->context, data, length); +} + +void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline) +{ + unsigned char scanlineheader[4] = { 2, 2, 0, 0 }; + unsigned char rgbe[4]; + float linear[3]; + int x; + + scanlineheader[2] = (width&0xff00)>>8; + scanlineheader[3] = (width&0x00ff); + + /* skip RLE for images too small or large */ + if (width < 8 || width >= 32768) { + for (x=0; x < width; x++) { + switch (ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + s->func(s->context, rgbe, 4); + } + } else { + int c,r; + /* encode into scratch buffer */ + for (x=0; x < width; x++) { + switch(ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + scratch[x + width*0] = rgbe[0]; + scratch[x + width*1] = rgbe[1]; + scratch[x + width*2] = rgbe[2]; + scratch[x + width*3] = rgbe[3]; + } + + s->func(s->context, scanlineheader, 4); + + /* RLE each component separately */ + for (c=0; c < 4; c++) { + unsigned char *comp = &scratch[width*c]; + + x = 0; + while (x < width) { + // find first run + r = x; + while (r+2 < width) { + if (comp[r] == comp[r+1] && comp[r] == comp[r+2]) + break; + ++r; + } + if (r+2 >= width) + r = width; + // dump up to first run + while (x < r) { + int len = r-x; + if (len > 128) len = 128; + stbiw__write_dump_data(s, len, &comp[x]); + x += len; + } + // if there's a run, output it + if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd + // find next byte after run + while (r < width && comp[r] == comp[x]) + ++r; + // output run up to r + while (x < r) { + int len = r-x; + if (len > 127) len = 127; + stbiw__write_run_data(s, len, comp[x]); + x += len; + } + } + } + } + } +} + +static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data) +{ + if (y <= 0 || x <= 0 || data == NULL) + return 0; + else { + // Each component is stored separately. Allocate scratch space for full output scanline. + unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4); + int i, len; + char buffer[128]; + char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"; + s->func(s->context, header, sizeof(header)-1); + +#ifdef STBI_MSC_SECURE_CRT + len = sprintf_s(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#else + len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#endif + s->func(s->context, buffer, len); + + for(i=0; i < y; i++) + stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i)*x); + STBIW_FREE(scratch); + return 1; + } +} + +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_hdr_core(&s, x, y, comp, (float *) data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif // STBI_WRITE_NO_STDIO + + +////////////////////////////////////////////////////////////////////////////// +// +// PNG writer +// + +#ifndef STBIW_ZLIB_COMPRESS +// stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size() +#define stbiw__sbraw(a) ((int *) (a) - 2) +#define stbiw__sbm(a) stbiw__sbraw(a)[0] +#define stbiw__sbn(a) stbiw__sbraw(a)[1] + +#define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) +#define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0) +#define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) + +#define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) +#define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) +#define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) + +static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) +{ + int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1; + void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2); + STBIW_ASSERT(p); + if (p) { + if (!*arr) ((int *) p)[1] = 0; + *arr = (void *) ((int *) p + 2); + stbiw__sbm(*arr) = m; + } + return *arr; +} + +static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount) +{ + while (*bitcount >= 8) { + stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer)); + *bitbuffer >>= 8; + *bitcount -= 8; + } + return data; +} + +static int stbiw__zlib_bitrev(int code, int codebits) +{ + int res=0; + while (codebits--) { + res = (res << 1) | (code & 1); + code >>= 1; + } + return res; +} + +static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit) +{ + int i; + for (i=0; i < limit && i < 258; ++i) + if (a[i] != b[i]) break; + return i; +} + +static unsigned int stbiw__zhash(unsigned char *data) +{ + stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16); + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + return hash; +} + +#define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount)) +#define stbiw__zlib_add(code,codebits) \ + (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) +#define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) +// default huffman tables +#define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8) +#define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9) +#define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7) +#define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8) +#define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) +#define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n)) + +#define stbiw__ZHASH 16384 + +#endif // STBIW_ZLIB_COMPRESS + +unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality) +{ +#ifdef STBIW_ZLIB_COMPRESS + // user provided a zlib compress implementation, use that + return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality); +#else // use builtin + static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 }; + static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; + static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 }; + static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; + unsigned int bitbuf=0; + int i,j, bitcount=0; + unsigned char *out = NULL; + unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(char**)); + if (hash_table == NULL) + return NULL; + if (quality < 5) quality = 5; + + stbiw__sbpush(out, 0x78); // DEFLATE 32K window + stbiw__sbpush(out, 0x5e); // FLEVEL = 1 + stbiw__zlib_add(1,1); // BFINAL = 1 + stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman + + for (i=0; i < stbiw__ZHASH; ++i) + hash_table[i] = NULL; + + i=0; + while (i < data_len-3) { + // hash next 3 bytes of data to be compressed + int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3; + unsigned char *bestloc = 0; + unsigned char **hlist = hash_table[h]; + int n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32768) { // if entry lies within window + int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i); + if (d >= best) best=d,bestloc=hlist[j]; + } + } + // when hash table entry is too long, delete half the entries + if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) { + STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality); + stbiw__sbn(hash_table[h]) = quality; + } + stbiw__sbpush(hash_table[h],data+i); + + if (bestloc) { + // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal + h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1); + hlist = hash_table[h]; + n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32767) { + int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1); + if (e > best) { // if next match is better, bail on current match + bestloc = NULL; + break; + } + } + } + } + + if (bestloc) { + int d = (int) (data+i - bestloc); // distance back + STBIW_ASSERT(d <= 32767 && best <= 258); + for (j=0; best > lengthc[j+1]-1; ++j); + stbiw__zlib_huff(j+257); + if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); + for (j=0; d > distc[j+1]-1; ++j); + stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5); + if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]); + i += best; + } else { + stbiw__zlib_huffb(data[i]); + ++i; + } + } + // write out final bytes + for (;i < data_len; ++i) + stbiw__zlib_huffb(data[i]); + stbiw__zlib_huff(256); // end of block + // pad with 0 bits to byte boundary + while (bitcount) + stbiw__zlib_add(0,1); + + for (i=0; i < stbiw__ZHASH; ++i) + (void) stbiw__sbfree(hash_table[i]); + STBIW_FREE(hash_table); + + { + // compute adler32 on input + unsigned int s1=1, s2=0; + int blocklen = (int) (data_len % 5552); + j=0; + while (j < data_len) { + for (i=0; i < blocklen; ++i) s1 += data[j+i], s2 += s1; + s1 %= 65521, s2 %= 65521; + j += blocklen; + blocklen = 5552; + } + stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s2)); + stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s1)); + } + *out_len = stbiw__sbn(out); + // make returned pointer freeable + STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len); + return (unsigned char *) stbiw__sbraw(out); +#endif // STBIW_ZLIB_COMPRESS +} + +static unsigned int stbiw__crc32(unsigned char *buffer, int len) +{ + static unsigned int crc_table[256] = + { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + }; + + unsigned int crc = ~0u; + int i; + for (i=0; i < len; ++i) + crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; + return ~crc; +} + +#define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4) +#define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v)); +#define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3]) + +static void stbiw__wpcrc(unsigned char **data, int len) +{ + unsigned int crc = stbiw__crc32(*data - len - 4, len+4); + stbiw__wp32(*data, crc); +} + +static unsigned char stbiw__paeth(int a, int b, int c) +{ + int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c); + if (pa <= pb && pa <= pc) return STBIW_UCHAR(a); + if (pb <= pc) return STBIW_UCHAR(b); + return STBIW_UCHAR(c); +} + +// @OPTIMIZE: provide an option that always forces left-predict or paeth predict +static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer) +{ + static int mapping[] = { 0,1,2,3,4 }; + static int firstmap[] = { 0,1,0,5,6 }; + int *mymap = (y != 0) ? mapping : firstmap; + int i; + int type = mymap[filter_type]; + unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y); + int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes; + for (i = 0; i < n; ++i) { + switch (type) { + case 0: line_buffer[i] = z[i]; break; + case 1: line_buffer[i] = z[i]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break; + case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break; + case 5: line_buffer[i] = z[i]; break; + case 6: line_buffer[i] = z[i]; break; + } + } + for (i=n; i < width*n; ++i) { + switch (type) { + case 0: line_buffer[i] = z[i]; break; + case 1: line_buffer[i] = z[i] - z[i-n]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break; + case 4: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break; + case 5: line_buffer[i] = z[i] - (z[i-n]>>1); break; + case 6: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break; + } + } +} + +unsigned char *stbi_write_png_to_mem(unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len) +{ + int force_filter = stbi_write_force_png_filter; + int ctype[5] = { -1, 0, 4, 2, 6 }; + unsigned char sig[8] = { 137,80,78,71,13,10,26,10 }; + unsigned char *out,*o, *filt, *zlib; + signed char *line_buffer; + int j,zlen; + + if (stride_bytes == 0) + stride_bytes = x * n; + + if (force_filter >= 5) { + force_filter = -1; + } + + filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0; + line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; } + for (j=0; j < y; ++j) { + int filter_type; + if (force_filter > -1) { + filter_type = force_filter; + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, force_filter, line_buffer); + } else { // Estimate the best filter by running through all of them: + int best_filter = 0, best_filter_val = 0x7fffffff, est, i; + for (filter_type = 0; filter_type < 5; filter_type++) { + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, filter_type, line_buffer); + + // Estimate the entropy of the line using this filter; the less, the better. + est = 0; + for (i = 0; i < x*n; ++i) { + est += abs((signed char) line_buffer[i]); + } + if (est < best_filter_val) { + best_filter_val = est; + best_filter = filter_type; + } + } + if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, best_filter, line_buffer); + filter_type = best_filter; + } + } + // when we get here, filter_type contains the filter type, and line_buffer contains the data + filt[j*(x*n+1)] = (unsigned char) filter_type; + STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); + } + STBIW_FREE(line_buffer); + zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level); + STBIW_FREE(filt); + if (!zlib) return 0; + + // each tag requires 12 bytes of overhead + out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12); + if (!out) return 0; + *out_len = 8 + 12+13 + 12+zlen + 12; + + o=out; + STBIW_MEMMOVE(o,sig,8); o+= 8; + stbiw__wp32(o, 13); // header length + stbiw__wptag(o, "IHDR"); + stbiw__wp32(o, x); + stbiw__wp32(o, y); + *o++ = 8; + *o++ = STBIW_UCHAR(ctype[n]); + *o++ = 0; + *o++ = 0; + *o++ = 0; + stbiw__wpcrc(&o,13); + + stbiw__wp32(o, zlen); + stbiw__wptag(o, "IDAT"); + STBIW_MEMMOVE(o, zlib, zlen); + o += zlen; + STBIW_FREE(zlib); + stbiw__wpcrc(&o, zlen); + + stbiw__wp32(o,0); + stbiw__wptag(o, "IEND"); + stbiw__wpcrc(&o,0); + + STBIW_ASSERT(o == out + *out_len); + + return out; +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes) +{ + FILE *f; + int len; + unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; +#ifdef STBI_MSC_SECURE_CRT + if (fopen_s(&f, filename, "wb")) + f = NULL; +#else + f = fopen(filename, "wb"); +#endif + if (!f) { STBIW_FREE(png); return 0; } + fwrite(png, 1, len, f); + fclose(f); + STBIW_FREE(png); + return 1; +} +#endif + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes) +{ + int len; + unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; + func(context, png, len); + STBIW_FREE(png); + return 1; +} + + +/* *************************************************************************** + * + * JPEG writer + * + * This is based on Jon Olick's jo_jpeg.cpp: + * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html + */ + +static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18, + 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; + +static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) { + int bitBuf = *bitBufP, bitCnt = *bitCntP; + bitCnt += bs[1]; + bitBuf |= bs[0] << (24 - bitCnt); + while(bitCnt >= 8) { + unsigned char c = (bitBuf >> 16) & 255; + stbiw__putc(s, c); + if(c == 255) { + stbiw__putc(s, 0); + } + bitBuf <<= 8; + bitCnt -= 8; + } + *bitBufP = bitBuf; + *bitCntP = bitCnt; +} + +static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) { + float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p; + float z1, z2, z3, z4, z5, z11, z13; + + float tmp0 = d0 + d7; + float tmp7 = d0 - d7; + float tmp1 = d1 + d6; + float tmp6 = d1 - d6; + float tmp2 = d2 + d5; + float tmp5 = d2 - d5; + float tmp3 = d3 + d4; + float tmp4 = d3 - d4; + + // Even part + float tmp10 = tmp0 + tmp3; // phase 2 + float tmp13 = tmp0 - tmp3; + float tmp11 = tmp1 + tmp2; + float tmp12 = tmp1 - tmp2; + + d0 = tmp10 + tmp11; // phase 3 + d4 = tmp10 - tmp11; + + z1 = (tmp12 + tmp13) * 0.707106781f; // c4 + d2 = tmp13 + z1; // phase 5 + d6 = tmp13 - z1; + + // Odd part + tmp10 = tmp4 + tmp5; // phase 2 + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + // The rotator is modified from fig 4-8 to avoid extra negations. + z5 = (tmp10 - tmp12) * 0.382683433f; // c6 + z2 = tmp10 * 0.541196100f + z5; // c2-c6 + z4 = tmp12 * 1.306562965f + z5; // c2+c6 + z3 = tmp11 * 0.707106781f; // c4 + + z11 = tmp7 + z3; // phase 5 + z13 = tmp7 - z3; + + *d5p = z13 + z2; // phase 6 + *d3p = z13 - z2; + *d1p = z11 + z4; + *d7p = z11 - z4; + + *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6; +} + +static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) { + int tmp1 = val < 0 ? -val : val; + val = val < 0 ? val-1 : val; + bits[1] = 1; + while(tmp1 >>= 1) { + ++bits[1]; + } + bits[0] = val & ((1<0)&&(DU[end0pos]==0); --end0pos) { + } + // end0pos = first element in reverse order !=0 + if(end0pos == 0) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + return DU[0]; + } + for(i = 1; i <= end0pos; ++i) { + int startpos = i; + int nrzeroes; + unsigned short bits[2]; + for (; DU[i]==0 && i<=end0pos; ++i) { + } + nrzeroes = i-startpos; + if ( nrzeroes >= 16 ) { + int lng = nrzeroes>>4; + int nrmarker; + for (nrmarker=1; nrmarker <= lng; ++nrmarker) + stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes); + nrzeroes &= 15; + } + stbiw__jpg_calcBits(DU[i], bits); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); + } + if(end0pos != 63) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + } + return DU[0]; +} + +static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) { + // Constants that don't pollute global namespace + static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0}; + static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d}; + static const unsigned char std_ac_luminance_values[] = { + 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08, + 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28, + 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59, + 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, + 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6, + 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2, + 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0}; + static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77}; + static const unsigned char std_ac_chrominance_values[] = { + 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, + 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26, + 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58, + 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, + 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4, + 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda, + 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + // Huffman tables + static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}}; + static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}}; + static const unsigned short YAC_HT[256][2] = { + {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const unsigned short UVAC_HT[256][2] = { + {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22, + 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99}; + static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99, + 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99}; + static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, + 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; + + int row, col, i, k; + float fdtbl_Y[64], fdtbl_UV[64]; + unsigned char YTable[64], UVTable[64]; + + if(!data || !width || !height || comp > 4 || comp < 1) { + return 0; + } + + quality = quality ? quality : 90; + quality = quality < 1 ? 1 : quality > 100 ? 100 : quality; + quality = quality < 50 ? 5000 / quality : 200 - quality * 2; + + for(i = 0; i < 64; ++i) { + int uvti, yti = (YQT[i]*quality+50)/100; + YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti); + uvti = (UVQT[i]*quality+50)/100; + UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); + } + + for(row = 0, k = 0; row < 8; ++row) { + for(col = 0; col < 8; ++col, ++k) { + fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + } + } + + // Write Headers + { + static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 }; + static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 }; + const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width), + 3,1,0x11,0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 }; + s->func(s->context, (void*)head0, sizeof(head0)); + s->func(s->context, (void*)YTable, sizeof(YTable)); + stbiw__putc(s, 1); + s->func(s->context, UVTable, sizeof(UVTable)); + s->func(s->context, (void*)head1, sizeof(head1)); + s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1); + s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values)); + stbiw__putc(s, 0x10); // HTYACinfo + s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1); + s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values)); + stbiw__putc(s, 1); // HTUDCinfo + s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values)); + stbiw__putc(s, 0x11); // HTUACinfo + s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values)); + s->func(s->context, (void*)head2, sizeof(head2)); + } + + // Encode 8x8 macroblocks + { + static const unsigned short fillBits[] = {0x7F, 7}; + const unsigned char *imageData = (const unsigned char *)data; + int DCY=0, DCU=0, DCV=0; + int bitBuf=0, bitCnt=0; + // comp == 2 is grey+alpha (alpha is ignored) + int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; + int x, y, pos; + for(y = 0; y < height; y += 8) { + for(x = 0; x < width; x += 8) { + float YDU[64], UDU[64], VDU[64]; + for(row = y, pos = 0; row < y+8; ++row) { + for(col = x; col < x+8; ++col, ++pos) { + int p = (stbi__flip_vertically_on_write ? height-1-row : row)*width*comp + col*comp; + float r, g, b; + if(row >= height) { + p -= width*comp*(row+1 - height); + } + if(col >= width) { + p -= comp*(col+1 - width); + } + + r = imageData[p+0]; + g = imageData[p+ofsG]; + b = imageData[p+ofsB]; + YDU[pos]=+0.29900f*r+0.58700f*g+0.11400f*b-128; + UDU[pos]=-0.16874f*r-0.33126f*g+0.50000f*b; + VDU[pos]=+0.50000f*r-0.41869f*g-0.08131f*b; + } + } + + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, YDU, fdtbl_Y, DCY, YDC_HT, YAC_HT); + DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, UDU, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); + DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, VDU, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); + } + } + + // Do the bit alignment of the EOI marker + stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits); + } + + // EOI + stbiw__putc(s, 0xFF); + stbiw__putc(s, 0xD9); + + return 1; +} + +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality); +} + + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +#endif // STB_IMAGE_WRITE_IMPLEMENTATION + +/* Revision history + 1.09 (2018-02-11) + fix typo in zlib quality API, improve STB_I_W_STATIC in C++ + 1.08 (2018-01-29) + add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter + 1.07 (2017-07-24) + doc fix + 1.06 (2017-07-23) + writing JPEG (using Jon Olick's code) + 1.05 ??? + 1.04 (2017-03-03) + monochrome BMP expansion + 1.03 ??? + 1.02 (2016-04-02) + avoid allocating large structures on the stack + 1.01 (2016-01-16) + STBIW_REALLOC_SIZED: support allocators with no realloc support + avoid race-condition in crc initialization + minor compile issues + 1.00 (2015-09-14) + installable file IO function + 0.99 (2015-09-13) + warning fixes; TGA rle support + 0.98 (2015-04-08) + added STBIW_MALLOC, STBIW_ASSERT etc + 0.97 (2015-01-18) + fixed HDR asserts, rewrote HDR rle logic + 0.96 (2015-01-17) + add HDR output + fix monochrome BMP + 0.95 (2014-08-17) + add monochrome TGA output + 0.94 (2014-05-31) + rename private functions to avoid conflicts with stb_image.h + 0.93 (2014-05-27) + warning fixes + 0.92 (2010-08-01) + casts to unsigned char to fix warnings + 0.91 (2010-07-17) + first public release + 0.90 first internal release +*/ + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/workloads/realworld/uvm/darknet/src/tree.c b/workloads/realworld/uvm/darknet/src/tree.c new file mode 100644 index 0000000000000000000000000000000000000000..67b6d431f6f7e92ede234c71ecae9bd9146dc71f --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/tree.c @@ -0,0 +1,139 @@ +#include +#include +#include "tree.h" +#include "utils.h" +#include "data.h" + +void change_leaves(tree *t, char *leaf_list) +{ + list *llist = get_paths(leaf_list); + char **leaves = (char **)list_to_array(llist); + int n = llist->size; + int i,j; + int found = 0; + for(i = 0; i < t->n; ++i){ + t->leaf[i] = 0; + for(j = 0; j < n; ++j){ + if (0==strcmp(t->name[i], leaves[j])){ + t->leaf[i] = 1; + ++found; + break; + } + } + } + fprintf(stderr, "Found %d leaves.\n", found); +} + +float get_hierarchy_probability(float *x, tree *hier, int c, int stride) +{ + float p = 1; + while(c >= 0){ + p = p * x[c*stride]; + c = hier->parent[c]; + } + return p; +} + +void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride) +{ + int j; + for(j = 0; j < n; ++j){ + int parent = hier->parent[j]; + if(parent >= 0){ + predictions[j*stride] *= predictions[parent*stride]; + } + } + if(only_leaves){ + for(j = 0; j < n; ++j){ + if(!hier->leaf[j]) predictions[j*stride] = 0; + } + } +} + +int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride) +{ + float p = 1; + int group = 0; + int i; + while(1){ + float max = 0; + int max_i = 0; + + for(i = 0; i < hier->group_size[group]; ++i){ + int index = i + hier->group_offset[group]; + float val = predictions[(i + hier->group_offset[group])*stride]; + if(val > max){ + max_i = index; + max = val; + } + } + if(p*max > thresh){ + p = p*max; + group = hier->child[max_i]; + if(hier->child[max_i] < 0) return max_i; + } else if (group == 0){ + return max_i; + } else { + return hier->parent[hier->group_offset[group]]; + } + } + return 0; +} + +tree *read_tree(char *filename) +{ + tree t = {0}; + FILE *fp = fopen(filename, "r"); + + char *line; + int last_parent = -1; + int group_size = 0; + int groups = 0; + int n = 0; + while((line=fgetl(fp)) != 0){ + char *id = calloc(256, sizeof(char)); + int parent = -1; + sscanf(line, "%s %d", id, &parent); + t.parent = realloc(t.parent, (n+1)*sizeof(int)); + t.parent[n] = parent; + + t.child = realloc(t.child, (n+1)*sizeof(int)); + t.child[n] = -1; + + t.name = realloc(t.name, (n+1)*sizeof(char *)); + t.name[n] = id; + if(parent != last_parent){ + ++groups; + t.group_offset = realloc(t.group_offset, groups * sizeof(int)); + t.group_offset[groups - 1] = n - group_size; + t.group_size = realloc(t.group_size, groups * sizeof(int)); + t.group_size[groups - 1] = group_size; + group_size = 0; + last_parent = parent; + } + t.group = realloc(t.group, (n+1)*sizeof(int)); + t.group[n] = groups; + if (parent >= 0) { + t.child[parent] = groups; + } + ++n; + ++group_size; + } + ++groups; + t.group_offset = realloc(t.group_offset, groups * sizeof(int)); + t.group_offset[groups - 1] = n - group_size; + t.group_size = realloc(t.group_size, groups * sizeof(int)); + t.group_size[groups - 1] = group_size; + t.n = n; + t.groups = groups; + t.leaf = calloc(n, sizeof(int)); + int i; + for(i = 0; i < n; ++i) t.leaf[i] = 1; + for(i = 0; i < n; ++i) if(t.parent[i] >= 0) t.leaf[t.parent[i]] = 0; + + fclose(fp); + tree *tree_ptr = calloc(1, sizeof(tree)); + *tree_ptr = t; + //error(0); + return tree_ptr; +} diff --git a/workloads/realworld/uvm/darknet/src/tree.h b/workloads/realworld/uvm/darknet/src/tree.h new file mode 100644 index 0000000000000000000000000000000000000000..3802b8ead806266edd291de5407b08c2d7ed5dd1 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/tree.h @@ -0,0 +1,8 @@ +#ifndef TREE_H +#define TREE_H +#include "darknet.h" + +int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride); +float get_hierarchy_probability(float *x, tree *hier, int c, int stride); + +#endif diff --git a/workloads/realworld/uvm/darknet/src/upsample_layer.c b/workloads/realworld/uvm/darknet/src/upsample_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..83f32ea5f41b4c787c38e5324e3e7dd4909ca928 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/upsample_layer.c @@ -0,0 +1,106 @@ +#include "upsample_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + +layer make_upsample_layer(int batch, int w, int h, int c, int stride) +{ + layer l = {0}; + l.type = UPSAMPLE; + l.batch = batch; + l.w = w; + l.h = h; + l.c = c; + l.out_w = w*stride; + l.out_h = h*stride; + l.out_c = c; + if(stride < 0){ + stride = -stride; + l.reverse=1; + l.out_w = w/stride; + l.out_h = h/stride; + } + l.stride = stride; + l.outputs = l.out_w*l.out_h*l.out_c; + l.inputs = l.w*l.h*l.c; + l.delta = calloc(l.outputs*batch, sizeof(float)); + l.output = calloc(l.outputs*batch, sizeof(float));; + + l.forward = forward_upsample_layer; + l.backward = backward_upsample_layer; + #ifdef GPU + l.forward_gpu = forward_upsample_layer_gpu; + l.backward_gpu = backward_upsample_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + #endif + if(l.reverse) fprintf(stderr, "downsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + else fprintf(stderr, "upsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + return l; +} + +void resize_upsample_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + l->out_w = w*l->stride; + l->out_h = h*l->stride; + if(l->reverse){ + l->out_w = w/l->stride; + l->out_h = h/l->stride; + } + l->outputs = l->out_w*l->out_h*l->out_c; + l->inputs = l->h*l->w*l->c; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + +void forward_upsample_layer(const layer l, network net) +{ + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + if(l.reverse){ + upsample_cpu(l.output, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input); + }else{ + upsample_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output); + } +} + +void backward_upsample_layer(const layer l, network net) +{ + if(l.reverse){ + upsample_cpu(l.delta, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, net.delta); + }else{ + upsample_cpu(net.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta); + } +} + +#ifdef GPU +void forward_upsample_layer_gpu(const layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + if(l.reverse){ + upsample_gpu(l.output_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input_gpu); + }else{ + upsample_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output_gpu); + } +} + +void backward_upsample_layer_gpu(const layer l, network net) +{ + if(l.reverse){ + upsample_gpu(l.delta_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, net.delta_gpu); + }else{ + upsample_gpu(net.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta_gpu); + } +} +#endif diff --git a/workloads/realworld/uvm/darknet/src/upsample_layer.h b/workloads/realworld/uvm/darknet/src/upsample_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..86790d1088354ea9c46a4b20fbe1dacf36925ca8 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/upsample_layer.h @@ -0,0 +1,15 @@ +#ifndef UPSAMPLE_LAYER_H +#define UPSAMPLE_LAYER_H +#include "darknet.h" + +layer make_upsample_layer(int batch, int w, int h, int c, int stride); +void forward_upsample_layer(const layer l, network net); +void backward_upsample_layer(const layer l, network net); +void resize_upsample_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_upsample_layer_gpu(const layer l, network net); +void backward_upsample_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm/darknet/src/utils.c b/workloads/realworld/uvm/darknet/src/utils.c new file mode 100644 index 0000000000000000000000000000000000000000..626b4678c1e2779552ed9d34f19ce4b0f57d9ded --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/utils.c @@ -0,0 +1,726 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + + +/* +// old timing. is it better? who knows!! +double get_wall_time() +{ + struct timeval time; + if (gettimeofday(&time,NULL)){ + return 0; + } + return (double)time.tv_sec + (double)time.tv_usec * .000001; +} +*/ + +double what_time_is_it_now() +{ + struct timeval time; + if (gettimeofday(&time,NULL)){ + return 0; + } + return (double)time.tv_sec + (double)time.tv_usec * .000001; +} + +int *read_intlist(char *gpu_list, int *ngpus, int d) +{ + int *gpus = 0; + if(gpu_list){ + int len = strlen(gpu_list); + *ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++*ngpus; + } + gpus = calloc(*ngpus, sizeof(int)); + for(i = 0; i < *ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpus = calloc(1, sizeof(float)); + *gpus = d; + *ngpus = 1; + } + return gpus; +} + +int *read_map(char *filename) +{ + int n = 0; + int *map = 0; + char *str; + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + while((str=fgetl(file))){ + ++n; + map = realloc(map, n*sizeof(int)); + map[n-1] = atoi(str); + } + return map; +} + +void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections) +{ + size_t i; + for(i = 0; i < sections; ++i){ + size_t start = n*i/sections; + size_t end = n*(i+1)/sections; + size_t num = end-start; + shuffle(arr+(start*size), num, size); + } +} + +void shuffle(void *arr, size_t n, size_t size) +{ + size_t i; + void *swp = calloc(1, size); + for(i = 0; i < n-1; ++i){ + size_t j = i + rand()/(RAND_MAX / (n-i)+1); + memcpy(swp, arr+(j*size), size); + memcpy(arr+(j*size), arr+(i*size), size); + memcpy(arr+(i*size), swp, size); + } +} + +int *random_index_order(int min, int max) +{ + int *inds = calloc(max-min, sizeof(int)); + int i; + for(i = min; i < max; ++i){ + inds[i] = i; + } + for(i = min; i < max-1; ++i){ + int swap = inds[i]; + int index = i + rand()%(max-i); + inds[i] = inds[index]; + inds[index] = swap; + } + return inds; +} + +void del_arg(int argc, char **argv, int index) +{ + int i; + for(i = index; i < argc-1; ++i) argv[i] = argv[i+1]; + argv[i] = 0; +} + +int find_arg(int argc, char* argv[], char *arg) +{ + int i; + for(i = 0; i < argc; ++i) { + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)) { + del_arg(argc, argv, i); + return 1; + } + } + return 0; +} + +int find_int_arg(int argc, char **argv, char *arg, int def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = atoi(argv[i+1]); + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + +float find_float_arg(int argc, char **argv, char *arg, float def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = atof(argv[i+1]); + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + +char *find_char_arg(int argc, char **argv, char *arg, char *def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = argv[i+1]; + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + + +char *basecfg(char *cfgfile) +{ + char *c = cfgfile; + char *next; + while((next = strchr(c, '/'))) + { + c = next+1; + } + c = copy_string(c); + next = strchr(c, '.'); + if (next) *next = 0; + return c; +} + +int alphanum_to_int(char c) +{ + return (c < 58) ? c - 48 : c-87; +} +char int_to_alphanum(int i) +{ + if (i == 36) return '.'; + return (i < 10) ? i + 48 : i + 87; +} + +void pm(int M, int N, float *A) +{ + int i,j; + for(i =0 ; i < M; ++i){ + printf("%d ", i+1); + for(j = 0; j < N; ++j){ + printf("%2.4f, ", A[i*N+j]); + } + printf("\n"); + } + printf("\n"); +} + +void find_replace(char *str, char *orig, char *rep, char *output) +{ + char buffer[4096] = {0}; + char *p; + + sprintf(buffer, "%s", str); + if(!(p = strstr(buffer, orig))){ // Is 'orig' even in 'str'? + sprintf(output, "%s", str); + return; + } + + *p = '\0'; + + sprintf(output, "%s%s%s", buffer, rep, p+strlen(orig)); +} + +float sec(clock_t clocks) +{ + return (float)clocks/CLOCKS_PER_SEC; +} + +void top_k(float *a, int n, int k, int *index) +{ + int i,j; + for(j = 0; j < k; ++j) index[j] = -1; + for(i = 0; i < n; ++i){ + int curr = i; + for(j = 0; j < k; ++j){ + if((index[j] < 0) || a[curr] > a[index[j]]){ + int swap = curr; + curr = index[j]; + index[j] = swap; + } + } + } +} + +void error(const char *s) +{ + perror(s); + assert(0); + exit(-1); +} + +unsigned char *read_file(char *filename) +{ + FILE *fp = fopen(filename, "rb"); + size_t size; + + fseek(fp, 0, SEEK_END); + size = ftell(fp); + fseek(fp, 0, SEEK_SET); + + unsigned char *text = calloc(size+1, sizeof(char)); + fread(text, 1, size, fp); + fclose(fp); + return text; +} + +void malloc_error() +{ + fprintf(stderr, "Malloc error\n"); + exit(-1); +} + +void file_error(char *s) +{ + fprintf(stderr, "Couldn't open file: %s\n", s); + exit(0); +} + +list *split_str(char *s, char delim) +{ + size_t i; + size_t len = strlen(s); + list *l = make_list(); + list_insert(l, s); + for(i = 0; i < len; ++i){ + if(s[i] == delim){ + s[i] = '\0'; + list_insert(l, &(s[i+1])); + } + } + return l; +} + +void strip(char *s) +{ + size_t i; + size_t len = strlen(s); + size_t offset = 0; + for(i = 0; i < len; ++i){ + char c = s[i]; + if(c==' '||c=='\t'||c=='\n') ++offset; + else s[i-offset] = c; + } + s[len-offset] = '\0'; +} + +void strip_char(char *s, char bad) +{ + size_t i; + size_t len = strlen(s); + size_t offset = 0; + for(i = 0; i < len; ++i){ + char c = s[i]; + if(c==bad) ++offset; + else s[i-offset] = c; + } + s[len-offset] = '\0'; +} + +void free_ptrs(void **ptrs, int n) +{ + int i; + for(i = 0; i < n; ++i) free(ptrs[i]); + free(ptrs); +} + +char *fgetl(FILE *fp) +{ + if(feof(fp)) return 0; + size_t size = 512; + char *line = malloc(size*sizeof(char)); + if(!fgets(line, size, fp)){ + free(line); + return 0; + } + + size_t curr = strlen(line); + + while((line[curr-1] != '\n') && !feof(fp)){ + if(curr == size-1){ + size *= 2; + line = realloc(line, size*sizeof(char)); + if(!line) { + printf("%ld\n", size); + malloc_error(); + } + } + size_t readsize = size-curr; + if(readsize > INT_MAX) readsize = INT_MAX-1; + fgets(&line[curr], readsize, fp); + curr = strlen(line); + } + if(line[curr-1] == '\n') line[curr-1] = '\0'; + + return line; +} + +int read_int(int fd) +{ + int n = 0; + int next = read(fd, &n, sizeof(int)); + if(next <= 0) return -1; + return n; +} + +void write_int(int fd, int n) +{ + int next = write(fd, &n, sizeof(int)); + if(next <= 0) error("read failed"); +} + +int read_all_fail(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + int next = read(fd, buffer + n, bytes-n); + if(next <= 0) return 1; + n += next; + } + return 0; +} + +int write_all_fail(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + size_t next = write(fd, buffer + n, bytes-n); + if(next <= 0) return 1; + n += next; + } + return 0; +} + +void read_all(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + int next = read(fd, buffer + n, bytes-n); + if(next <= 0) error("read failed"); + n += next; + } +} + +void write_all(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + size_t next = write(fd, buffer + n, bytes-n); + if(next <= 0) error("write failed"); + n += next; + } +} + + +char *copy_string(char *s) +{ + char *copy = malloc(strlen(s)+1); + strncpy(copy, s, strlen(s)+1); + return copy; +} + +list *parse_csv_line(char *line) +{ + list *l = make_list(); + char *c, *p; + int in = 0; + for(c = line, p = line; *c != '\0'; ++c){ + if(*c == '"') in = !in; + else if(*c == ',' && !in){ + *c = '\0'; + list_insert(l, copy_string(p)); + p = c+1; + } + } + list_insert(l, copy_string(p)); + return l; +} + +int count_fields(char *line) +{ + int count = 0; + int done = 0; + char *c; + for(c = line; !done; ++c){ + done = (*c == '\0'); + if(*c == ',' || done) ++count; + } + return count; +} + +float *parse_fields(char *line, int n) +{ + float *field = calloc(n, sizeof(float)); + char *c, *p, *end; + int count = 0; + int done = 0; + for(c = line, p = line; !done; ++c){ + done = (*c == '\0'); + if(*c == ',' || done){ + *c = '\0'; + field[count] = strtod(p, &end); + if(p == c) field[count] = nan(""); + if(end != c && (end != c-1 || *end != '\r')) field[count] = nan(""); //DOS file formats! + p = c+1; + ++count; + } + } + return field; +} + +float sum_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i) sum += a[i]; + return sum; +} + +float mean_array(float *a, int n) +{ + return sum_array(a,n)/n; +} + +void mean_arrays(float **a, int n, int els, float *avg) +{ + int i; + int j; + memset(avg, 0, els*sizeof(float)); + for(j = 0; j < n; ++j){ + for(i = 0; i < els; ++i){ + avg[i] += a[j][i]; + } + } + for(i = 0; i < els; ++i){ + avg[i] /= n; + } +} + +void print_statistics(float *a, int n) +{ + float m = mean_array(a, n); + float v = variance_array(a, n); + printf("MSE: %.6f, Mean: %.6f, Variance: %.6f\n", mse_array(a, n), m, v); +} + +float variance_array(float *a, int n) +{ + int i; + float sum = 0; + float mean = mean_array(a, n); + for(i = 0; i < n; ++i) sum += (a[i] - mean)*(a[i]-mean); + float variance = sum/n; + return variance; +} + +int constrain_int(int a, int min, int max) +{ + if (a < min) return min; + if (a > max) return max; + return a; +} + +float constrain(float min, float max, float a) +{ + if (a < min) return min; + if (a > max) return max; + return a; +} + +float dist_array(float *a, float *b, int n, int sub) +{ + int i; + float sum = 0; + for(i = 0; i < n; i += sub) sum += pow(a[i]-b[i], 2); + return sqrt(sum); +} + +float mse_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i) sum += a[i]*a[i]; + return sqrt(sum/n); +} + +void normalize_array(float *a, int n) +{ + int i; + float mu = mean_array(a,n); + float sigma = sqrt(variance_array(a,n)); + for(i = 0; i < n; ++i){ + a[i] = (a[i] - mu)/sigma; + } + mu = mean_array(a,n); + sigma = sqrt(variance_array(a,n)); +} + +void translate_array(float *a, int n, float s) +{ + int i; + for(i = 0; i < n; ++i){ + a[i] += s; + } +} + +float mag_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + sum += a[i]*a[i]; + } + return sqrt(sum); +} + +void scale_array(float *a, int n, float s) +{ + int i; + for(i = 0; i < n; ++i){ + a[i] *= s; + } +} + +int sample_array(float *a, int n) +{ + float sum = sum_array(a, n); + scale_array(a, n, 1./sum); + float r = rand_uniform(0, 1); + int i; + for(i = 0; i < n; ++i){ + r = r - a[i]; + if (r <= 0) return i; + } + return n-1; +} + +int max_int_index(int *a, int n) +{ + if(n <= 0) return -1; + int i, max_i = 0; + int max = a[0]; + for(i = 1; i < n; ++i){ + if(a[i] > max){ + max = a[i]; + max_i = i; + } + } + return max_i; +} + +int max_index(float *a, int n) +{ + if(n <= 0) return -1; + int i, max_i = 0; + float max = a[0]; + for(i = 1; i < n; ++i){ + if(a[i] > max){ + max = a[i]; + max_i = i; + } + } + return max_i; +} + +int int_index(int *a, int val, int n) +{ + int i; + for(i = 0; i < n; ++i){ + if(a[i] == val) return i; + } + return -1; +} + +int rand_int(int min, int max) +{ + if (max < min){ + int s = min; + min = max; + max = s; + } + int r = (rand()%(max - min + 1)) + min; + return r; +} + +// From http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform +float rand_normal() +{ + static int haveSpare = 0; + static double rand1, rand2; + + if(haveSpare) + { + haveSpare = 0; + return sqrt(rand1) * sin(rand2); + } + + haveSpare = 1; + + rand1 = rand() / ((double) RAND_MAX); + if(rand1 < 1e-100) rand1 = 1e-100; + rand1 = -2 * log(rand1); + rand2 = (rand() / ((double) RAND_MAX)) * TWO_PI; + + return sqrt(rand1) * cos(rand2); +} + +/* + float rand_normal() + { + int n = 12; + int i; + float sum= 0; + for(i = 0; i < n; ++i) sum += (float)rand()/RAND_MAX; + return sum-n/2.; + } + */ + +size_t rand_size_t() +{ + return ((size_t)(rand()&0xff) << 56) | + ((size_t)(rand()&0xff) << 48) | + ((size_t)(rand()&0xff) << 40) | + ((size_t)(rand()&0xff) << 32) | + ((size_t)(rand()&0xff) << 24) | + ((size_t)(rand()&0xff) << 16) | + ((size_t)(rand()&0xff) << 8) | + ((size_t)(rand()&0xff) << 0); +} + +float rand_uniform(float min, float max) +{ + if(max < min){ + float swap = min; + min = max; + max = swap; + } + return ((float)rand()/RAND_MAX * (max - min)) + min; +} + +float rand_scale(float s) +{ + float scale = rand_uniform(1, s); + if(rand()%2) return scale; + return 1./scale; +} + +float **one_hot_encode(float *a, int n, int k) +{ + int i; + float **t = calloc(n, sizeof(float*)); + for(i = 0; i < n; ++i){ + t[i] = calloc(k, sizeof(float)); + int index = (int)a[i]; + t[i][index] = 1; + } + return t; +} + diff --git a/workloads/realworld/uvm/darknet/src/utils.h b/workloads/realworld/uvm/darknet/src/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..ef24da79888612f5b48fbb4dc233c483590e0c34 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/utils.h @@ -0,0 +1,53 @@ +#ifndef UTILS_H +#define UTILS_H +#include +#include +#include "darknet.h" +#include "list.h" + +#define TIME(a) \ + do { \ + double start = what_time_is_it_now(); \ + a; \ + printf("%s took: %f seconds\n", #a, what_time_is_it_now() - start); \ + } while (0) + +#define TWO_PI 6.2831853071795864769252866f + +double what_time_is_it_now(); +void shuffle(void *arr, size_t n, size_t size); +void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections); +void free_ptrs(void **ptrs, int n); +int alphanum_to_int(char c); +char int_to_alphanum(int i); +int read_int(int fd); +void write_int(int fd, int n); +void read_all(int fd, char *buffer, size_t bytes); +void write_all(int fd, char *buffer, size_t bytes); +int read_all_fail(int fd, char *buffer, size_t bytes); +int write_all_fail(int fd, char *buffer, size_t bytes); +void find_replace(char *str, char *orig, char *rep, char *output); +void malloc_error(); +void file_error(char *s); +void strip(char *s); +void strip_char(char *s, char bad); +list *split_str(char *s, char delim); +char *fgetl(FILE *fp); +list *parse_csv_line(char *line); +char *copy_string(char *s); +int count_fields(char *line); +float *parse_fields(char *line, int n); +void translate_array(float *a, int n, float s); +float constrain(float min, float max, float a); +int constrain_int(int a, int min, int max); +float rand_scale(float s); +int rand_int(int min, int max); +void mean_arrays(float **a, int n, int els, float *avg); +float dist_array(float *a, float *b, int n, int sub); +float **one_hot_encode(float *a, int n, int k); +float sec(clock_t clocks); +void print_statistics(float *a, int n); +int int_index(int *a, int val, int n); + +#endif + diff --git a/workloads/realworld/uvm/darknet/src/yolo_layer.c b/workloads/realworld/uvm/darknet/src/yolo_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..049a4d6a92cf7fea667b8de2340822834408bb05 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/yolo_layer.c @@ -0,0 +1,374 @@ +#include "yolo_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes) +{ + int i; + layer l = {0}; + l.type = YOLO; + + l.n = n; + l.total = total; + l.batch = batch; + l.h = h; + l.w = w; + l.c = n*(classes + 4 + 1); + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.cost = calloc(1, sizeof(float)); + l.biases = calloc(total*2, sizeof(float)); + if(mask) l.mask = mask; + else{ + l.mask = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + l.mask[i] = i; + } + } + l.bias_updates = calloc(n*2, sizeof(float)); + l.outputs = h*w*n*(classes + 4 + 1); + l.inputs = l.outputs; + l.truths = 90*(4 + 1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + for(i = 0; i < total*2; ++i){ + l.biases[i] = .5; + } + + l.forward = forward_yolo_layer; + l.backward = backward_yolo_layer; +#ifdef GPU + l.forward_gpu = forward_yolo_layer_gpu; + l.backward_gpu = backward_yolo_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "yolo\n"); + srand(0); + + return l; +} + +void resize_yolo_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->n*(l->classes + 4 + 1); + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride) +{ + box b; + b.x = (i + x[index + 0*stride]) / lw; + b.y = (j + x[index + 1*stride]) / lh; + b.w = exp(x[index + 2*stride]) * biases[2*n] / w; + b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h; + return b; +} + +float delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride) +{ + box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride); + float iou = box_iou(pred, truth); + + float tx = (truth.x*lw - i); + float ty = (truth.y*lh - j); + float tw = log(truth.w*w / biases[2*n]); + float th = log(truth.h*h / biases[2*n + 1]); + + delta[index + 0*stride] = scale * (tx - x[index + 0*stride]); + delta[index + 1*stride] = scale * (ty - x[index + 1*stride]); + delta[index + 2*stride] = scale * (tw - x[index + 2*stride]); + delta[index + 3*stride] = scale * (th - x[index + 3*stride]); + return iou; +} + + +void delta_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat) +{ + int n; + if (delta[index]){ + delta[index + stride*class] = 1 - output[index + stride*class]; + if(avg_cat) *avg_cat += output[index + stride*class]; + return; + } + for(n = 0; n < classes; ++n){ + delta[index + stride*n] = ((n == class)?1 : 0) - output[index + stride*n]; + if(n == class && avg_cat) *avg_cat += output[index + stride*n]; + } +} + +static int entry_index(layer l, int batch, int location, int entry) +{ + int n = location / (l.w*l.h); + int loc = location % (l.w*l.h); + return batch*l.outputs + n*l.w*l.h*(4+l.classes+1) + entry*l.w*l.h + loc; +} + +void forward_yolo_layer(const layer l, network net) +{ + int i,j,b,t,n; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array(l.output + index, (1+l.classes)*l.w*l.h, LOGISTIC); + } + } +#endif + + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + if(!net.train) return; + float avg_iou = 0; + float recall = 0; + float recall75 = 0; + float avg_cat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + int class_count = 0; + *(l.cost) = 0; + for (b = 0; b < l.batch; ++b) { + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.w*l.h); + float best_iou = 0; + int best_t = 0; + for(t = 0; t < l.max_boxes; ++t){ + box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1); + if(!truth.x) break; + float iou = box_iou(pred, truth); + if (iou > best_iou) { + best_iou = iou; + best_t = t; + } + } + int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4); + avg_anyobj += l.output[obj_index]; + l.delta[obj_index] = 0 - l.output[obj_index]; + if (best_iou > l.ignore_thresh) { + l.delta[obj_index] = 0; + } + if (best_iou > l.truth_thresh) { + l.delta[obj_index] = 1 - l.output[obj_index]; + + int class = net.truth[best_t*(4 + 1) + b*l.truths + 4]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); + delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, 0); + box truth = float_to_box(net.truth + best_t*(4 + 1) + b*l.truths, 1); + delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + } + } + } + } + for(t = 0; t < l.max_boxes; ++t){ + box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1); + + if(!truth.x) break; + float best_iou = 0; + int best_n = 0; + i = (truth.x * l.w); + j = (truth.y * l.h); + box truth_shift = truth; + truth_shift.x = truth_shift.y = 0; + for(n = 0; n < l.total; ++n){ + box pred = {0}; + pred.w = l.biases[2*n]/net.w; + pred.h = l.biases[2*n+1]/net.h; + float iou = box_iou(pred, truth_shift); + if (iou > best_iou){ + best_iou = iou; + best_n = n; + } + } + + int mask_n = int_index(l.mask, best_n, l.n); + if(mask_n >= 0){ + int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); + float iou = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + + int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4); + avg_obj += l.output[obj_index]; + l.delta[obj_index] = 1 - l.output[obj_index]; + + int class = net.truth[t*(4 + 1) + b*l.truths + 4]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4 + 1); + delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, &avg_cat); + + ++count; + ++class_count; + if(iou > .5) recall += 1; + if(iou > .75) recall75 += 1; + avg_iou += iou; + } + } + } + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", net.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count); +} + +void backward_yolo_layer(const layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +{ + int i; + int new_w=0; + int new_h=0; + if (((float)netw/w) < ((float)neth/h)) { + new_w = netw; + new_h = (h * netw)/w; + } else { + new_h = neth; + new_w = (w * neth)/h; + } + for (i = 0; i < n; ++i){ + box b = dets[i].bbox; + b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); + b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); + b.w *= (float)netw/new_w; + b.h *= (float)neth/new_h; + if(!relative){ + b.x *= w; + b.w *= w; + b.y *= h; + b.h *= h; + } + dets[i].bbox = b; + } +} + +int yolo_num_detections(layer l, float thresh) +{ + int i, n; + int count = 0; + for (i = 0; i < l.w*l.h; ++i){ + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); + if(l.output[obj_index] > thresh){ + ++count; + } + } + } + return count; +} + +void avg_flipped_yolo(layer l) +{ + int i,j,n,z; + float *flip = l.output + l.outputs; + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w/2; ++i) { + for (n = 0; n < l.n; ++n) { + for(z = 0; z < l.classes + 4 + 1; ++z){ + int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; + int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); + float swap = flip[i1]; + flip[i1] = flip[i2]; + flip[i2] = swap; + if(z == 0){ + flip[i1] = -flip[i1]; + flip[i2] = -flip[i2]; + } + } + } + } + } + for(i = 0; i < l.outputs; ++i){ + l.output[i] = (l.output[i] + flip[i])/2.; + } +} + +int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets) +{ + int i,j,n; + float *predictions = l.output; + if (l.batch == 2) avg_flipped_yolo(l); + int count = 0; + for (i = 0; i < l.w*l.h; ++i){ + int row = i / l.w; + int col = i % l.w; + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); + float objectness = predictions[obj_index]; + if(objectness <= thresh) continue; + int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); + dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); + dets[count].objectness = objectness; + dets[count].classes = l.classes; + for(j = 0; j < l.classes; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j); + float prob = objectness*predictions[class_index]; + dets[count].prob[j] = (prob > thresh) ? prob : 0; + } + ++count; + } + } + correct_yolo_boxes(dets, count, w, h, netw, neth, relative); + return count; +} + +#ifdef GPU + +void forward_yolo_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array_gpu(l.output_gpu + index, (1+l.classes)*l.w*l.h, LOGISTIC); + } + } + if(!net.train || l.onlyforward){ + cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); + return; + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_yolo_layer(l, net); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_yolo_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/uvm/darknet/src/yolo_layer.h b/workloads/realworld/uvm/darknet/src/yolo_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..d2a0243268146e00ebff2b4b11bce23f830689d1 --- /dev/null +++ b/workloads/realworld/uvm/darknet/src/yolo_layer.h @@ -0,0 +1,19 @@ +#ifndef YOLO_LAYER_H +#define YOLO_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes); +void forward_yolo_layer(const layer l, network net); +void backward_yolo_layer(const layer l, network net); +void resize_yolo_layer(layer *l, int w, int h); +int yolo_num_detections(layer l, float thresh); + +#ifdef GPU +void forward_yolo_layer_gpu(const layer l, network net); +void backward_yolo_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm/darknet/yolov3-tiny/predictions.jpg b/workloads/realworld/uvm/darknet/yolov3-tiny/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e76dabc8b31ad049905fe65ca8aeee298ae22f2e Binary files /dev/null and b/workloads/realworld/uvm/darknet/yolov3-tiny/predictions.jpg differ diff --git a/workloads/realworld/uvm/darknet/yolov3-tiny/run_super.sh b/workloads/realworld/uvm/darknet/yolov3-tiny/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..d890a7c581dc59167fab1b93778d143fc7e679a9 --- /dev/null +++ b/workloads/realworld/uvm/darknet/yolov3-tiny/run_super.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm/darknet/yolov3-tiny/run_yolov3-tiny.sh b/workloads/realworld/uvm/darknet/yolov3-tiny/run_yolov3-tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..d890a7c581dc59167fab1b93778d143fc7e679a9 --- /dev/null +++ b/workloads/realworld/uvm/darknet/yolov3-tiny/run_yolov3-tiny.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm/darknet/yolov3-tiny_b/run_super.sh b/workloads/realworld/uvm/darknet/yolov3-tiny_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..31669e62fb94142e7dc24b3f905c8f1d25950367 --- /dev/null +++ b/workloads/realworld/uvm/darknet/yolov3-tiny_b/run_super.sh @@ -0,0 +1,2 @@ +#../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg +../darknet detector infer ../cfg/coco.data ../cfg/yolov3-tiny_b.cfg diff --git a/workloads/realworld/uvm/darknet/yolov3-tiny_t/run_super.sh b/workloads/realworld/uvm/darknet/yolov3-tiny_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..6cc56bc601476fa212f615b5aec964f12e044473 --- /dev/null +++ b/workloads/realworld/uvm/darknet/yolov3-tiny_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg +../darknet detector train ../cfg/coco.data ../cfg/yolov3-tiny_t.cfg diff --git a/workloads/realworld/uvm/darknet/yolov3/predictions.jpg b/workloads/realworld/uvm/darknet/yolov3/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9c15e9130033a48a5fbe1702440e719f655710cf Binary files /dev/null and b/workloads/realworld/uvm/darknet/yolov3/predictions.jpg differ diff --git a/workloads/realworld/uvm/darknet/yolov3/run_super.sh b/workloads/realworld/uvm/darknet/yolov3/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..440a4b456a80a434243dffa9614d5a501790990f --- /dev/null +++ b/workloads/realworld/uvm/darknet/yolov3/run_super.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm/darknet/yolov3/run_yolov3.sh b/workloads/realworld/uvm/darknet/yolov3/run_yolov3.sh new file mode 100755 index 0000000000000000000000000000000000000000..440a4b456a80a434243dffa9614d5a501790990f --- /dev/null +++ b/workloads/realworld/uvm/darknet/yolov3/run_yolov3.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm/darknet/yolov3_b/run_super.sh b/workloads/realworld/uvm/darknet/yolov3_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..913790c1ee53a0d442a89306fbd8bda93faa2581 --- /dev/null +++ b/workloads/realworld/uvm/darknet/yolov3_b/run_super.sh @@ -0,0 +1,2 @@ +#../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg +../darknet detector infer ../cfg/coco.data ../cfg/yolov3_b.cfg diff --git a/workloads/realworld/uvm/darknet/yolov3_t/run_super.sh b/workloads/realworld/uvm/darknet/yolov3_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ee7df07c1a4172f25c1129d8027095d2e3861e28 --- /dev/null +++ b/workloads/realworld/uvm/darknet/yolov3_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg +../darknet detector train ../cfg/coco.data ../cfg/yolov3_t.cfg diff --git a/workloads/realworld/uvm/hotspot/Makefile b/workloads/realworld/uvm/hotspot/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..ca348f25bcda1ae4de926a3b112cd792a7848251 --- /dev/null +++ b/workloads/realworld/uvm/hotspot/Makefile @@ -0,0 +1,25 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include + +SRC = hotspot.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = hotspot + +release: $(SRC) + $(CC) $(KERNEL_DIM) $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +enum: $(SRC) + $(CC) $(KERNEL_DIM) -deviceemu $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +debug: $(SRC) + $(CC) $(KERNEL_DIM) -g $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +debugenum: $(SRC) + $(CC) $(KERNEL_DIM) -g -deviceemu $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt + diff --git a/workloads/realworld/uvm/hotspot/Makefile_nvidia b/workloads/realworld/uvm/hotspot/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..1f7ae25a90c968563e96208c693b927b6765490a --- /dev/null +++ b/workloads/realworld/uvm/hotspot/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := hotspot +# CUDA source files (compiled with cudacc) +CUFILES := hotspot.cu +# CUDA dependency files +# CU_DEPS := needle_kernel.cu +# C/C++ source files (compiled with gcc / c++) +# CCFILES := + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/uvm/hotspot/README b/workloads/realworld/uvm/hotspot/README new file mode 100644 index 0000000000000000000000000000000000000000..f24239abebe938fe3e8d3c1a7a97f915bd09a90b --- /dev/null +++ b/workloads/realworld/uvm/hotspot/README @@ -0,0 +1,8 @@ +******Adjustable work group size***** +The kernel has square shape +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe one dimension +The actually dimension = RD_WG_SIZE_0 * RD_WG_SIZE_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" \ No newline at end of file diff --git a/workloads/realworld/uvm/hotspot/hotspot.cu b/workloads/realworld/uvm/hotspot/hotspot.cu new file mode 100644 index 0000000000000000000000000000000000000000..22a178ceb4e582e268e2da12e497b6a8091cee84 --- /dev/null +++ b/workloads/realworld/uvm/hotspot/hotspot.cu @@ -0,0 +1,386 @@ +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#ifdef RD_WG_SIZE_0_0 +#define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) +#define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) +#define BLOCK_SIZE RD_WG_SIZE +#else +#define BLOCK_SIZE 16 +#endif + +#define STR_SIZE 256 + +/* maximum power density possible (say 300W for a 10mm x 10mm chip) */ +#define MAX_PD (3.0e6) +/* required precision in degrees */ +#define PRECISION 0.001 +#define SPEC_HEAT_SI 1.75e6 +#define K_SI 100 +/* capacitance fitting factor */ +#define FACTOR_CHIP 0.5 + +/* chip parameters */ +float t_chip = 0.0005; +float chip_height = 0.016; +float chip_width = 0.016; +/* ambient temperature, assuming no package at all */ +float amb_temp = 80.0; + +void run(int argc, char **argv); + +/* define timer macros */ +#define pin_stats_reset() startCycle() +#define pin_stats_pause(cycles) stopCycle(cycles) +#define pin_stats_dump(cycles) printf("timer: %Lu\n", cycles) + +void fatal(char *s) +{ + fprintf(stderr, "error: %s\n", s); +} + +void writeoutput(float *vect, int grid_rows, int grid_cols, char *file) +{ + + int i, j, index = 0; + FILE *fp; + char str[STR_SIZE]; + + if ((fp = fopen(file, "w")) == 0) + printf("The file was not opened\n"); + + for (i = 0; i < grid_rows; i++) + for (j = 0; j < grid_cols; j++) + { + + sprintf(str, "%d\t%g\n", index, vect[i * grid_cols + j]); + fputs(str, fp); + index++; + } + + fclose(fp); +} + +void readinput(float *vect, int grid_rows, int grid_cols, char *file) +{ + + int i, j; + FILE *fp; + char str[STR_SIZE]; + float val; + + if ((fp = fopen(file, "r")) == 0) + printf("The file was not opened\n"); + + for (i = 0; i <= grid_rows - 1; i++) + for (j = 0; j <= grid_cols - 1; j++) + { + fgets(str, STR_SIZE, fp); + if (feof(fp)) + fatal("not enough lines in file"); + // if ((sscanf(str, "%d%f", &index, &val) != 2) || (index != ((i-1)*(grid_cols-2)+j-1))) + if ((sscanf(str, "%f", &val) != 1)) + fatal("invalid file format"); + vect[i * grid_cols + j] = val; + } + + fclose(fp); +} + +#define IN_RANGE(x, min, max) ((x) >= (min) && (x) <= (max)) +#define CLAMP_RANGE(x, min, max) x = (x < (min)) ? min : ((x > (max)) ? max : x) +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +__global__ void calculate_temp(int iteration, // number of iteration + float *power, // power input + float *temp_src, // temperature input/output + float *temp_dst, // temperature input/output + int grid_cols, // Col of grid + int grid_rows, // Row of grid + int border_cols, // border offset + int border_rows, // border offset + float Cap, // Capacitance + float Rx, + float Ry, + float Rz, + float step, + int batch_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + __shared__ float temp_on_cuda[BLOCK_SIZE][BLOCK_SIZE]; + __shared__ float power_on_cuda[BLOCK_SIZE][BLOCK_SIZE]; + __shared__ float temp_t[BLOCK_SIZE][BLOCK_SIZE]; // saving temparary temperature result + + float amb_temp = 80.0; + float step_div_Cap; + float Rx_1, Ry_1, Rz_1; + + // int bx = blockIdx.x; + // int by = blockIdx.y; + + int tx = threadIdx.x; + int ty = threadIdx.y; + + step_div_Cap = step / Cap; + + Rx_1 = 1 / Rx; + Ry_1 = 1 / Ry; + Rz_1 = 1 / Rz; + + // each block finally computes result for a small block + // after N iterations. + // it is the non-overlapping small blocks that cover + // all the input data + + // calculate the small block size + int small_block_rows = BLOCK_SIZE - iteration * 2; // EXPAND_RATE + int small_block_cols = BLOCK_SIZE - iteration * 2; // EXPAND_RATE + + // if (bx == 0 && by == 0 && tx == 0 && ty == 0) + // printf("iteration is %d, small_block_rows is %d\n", iteration, small_block_rows); + + int tile_dim_x = gridDim.x * batch_size; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = batch_size * batch_size; + int tiles_this_block_x = batch_size; + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + // block id + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + // calculate the boundary for the block according to + // the boundary of its small block + int blkY = small_block_rows * by - border_rows; + int blkX = small_block_cols * bx - border_cols; + int blkYmax = blkY + BLOCK_SIZE - 1; + int blkXmax = blkX + BLOCK_SIZE - 1; + + // calculate the global thread coordination + int yidx = blkY + ty; + int xidx = blkX + tx; + + // load data if it is within the valid input range + int loadYidx = yidx, loadXidx = xidx; + int index = grid_cols * loadYidx + loadXidx; + + if (IN_RANGE(loadYidx, 0, grid_rows - 1) && IN_RANGE(loadXidx, 0, grid_cols - 1)) + { + temp_on_cuda[ty][tx] = temp_src[index]; // Load the temperature data from global memory to shared memory + power_on_cuda[ty][tx] = power[index]; // Load the power data from global memory to shared memory + } + block.sync(); + + // effective range within this block that falls within + // the valid range of the input data + // used to rule out computation outside the boundary. + int validYmin = (blkY < 0) ? -blkY : 0; + int validYmax = (blkYmax > grid_rows - 1) ? BLOCK_SIZE - 1 - (blkYmax - grid_rows + 1) : BLOCK_SIZE - 1; + int validXmin = (blkX < 0) ? -blkX : 0; + int validXmax = (blkXmax > grid_cols - 1) ? BLOCK_SIZE - 1 - (blkXmax - grid_cols + 1) : BLOCK_SIZE - 1; + + int N = ty - 1; + int S = ty + 1; + int W = tx - 1; + int E = tx + 1; + + N = (N < validYmin) ? validYmin : N; + S = (S > validYmax) ? validYmax : S; + W = (W < validXmin) ? validXmin : W; + E = (E > validXmax) ? validXmax : E; + + bool computed; + for (int i = 0; i < iteration; i++) + { + computed = false; + if (IN_RANGE(tx, i + 1, BLOCK_SIZE - i - 2) && + IN_RANGE(ty, i + 1, BLOCK_SIZE - i - 2) && + IN_RANGE(tx, validXmin, validXmax) && + IN_RANGE(ty, validYmin, validYmax)) + { + computed = true; + temp_t[ty][tx] = temp_on_cuda[ty][tx] + step_div_Cap * (power_on_cuda[ty][tx] + + (temp_on_cuda[S][tx] + temp_on_cuda[N][tx] - 2.0 * temp_on_cuda[ty][tx]) * Ry_1 + + (temp_on_cuda[ty][E] + temp_on_cuda[ty][W] - 2.0 * temp_on_cuda[ty][tx]) * Rx_1 + + (amb_temp - temp_on_cuda[ty][tx]) * Rz_1); + } + block.sync(); + if (i == iteration - 1) + break; + if (computed) // Assign the computation range + temp_on_cuda[ty][tx] = temp_t[ty][tx]; + block.sync(); + } + + // update the global memory + // after the last iteration, only threads coordinated within the + // small block perform the calculation and switch on ``computed'' + if (computed) + { + temp_dst[index] = temp_t[ty][tx]; + } + } +} + +/* + compute N time steps +*/ + +int compute_tran_temp(float *MatrixPower, float *MatrixTemp[2], int col, int row, + int total_iterations, int num_iterations, int blockCols, int blockRows, int borderCols, int borderRows, int batch_size) +{ + dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE); + dim3 dimGrid(blockCols, blockRows); + + float grid_height = chip_height / row; + float grid_width = chip_width / col; + + float Cap = FACTOR_CHIP * SPEC_HEAT_SI * t_chip * grid_width * grid_height; + float Rx = grid_width / (2.0 * K_SI * t_chip * grid_height); + float Ry = grid_height / (2.0 * K_SI * t_chip * grid_width); + float Rz = t_chip / (K_SI * grid_height * grid_width); + + float max_slope = MAX_PD / (FACTOR_CHIP * t_chip * SPEC_HEAT_SI); + float step = PRECISION / max_slope; + float t; + + int src = 1, dst = 0; + + for (t = 0; t < total_iterations; t += num_iterations) + { + int temp = src; + src = dst; + dst = temp; + calculate_temp<<>>(MIN(num_iterations, total_iterations - t), MatrixPower, MatrixTemp[src], MatrixTemp[dst], + col, row, borderCols, borderRows, Cap, Rx, Ry, Rz, step, batch_size); + } + return dst; +} + +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - number of rows/cols in the grid (positive integer)\n"); + fprintf(stderr, "\t - pyramid heigh(positive integer)\n"); + fprintf(stderr, "\t - number of iterations\n"); + fprintf(stderr, "\t - name of the file containing the initial temperature values of each cell\n"); + fprintf(stderr, "\t - name of the file containing the dissipated power values of each cell\n"); + fprintf(stderr, "\t - name of the output file\n"); + fprintf(stderr, "\t - batch_size * batch_size per block\n"); + exit(1); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE); + + run(argc, argv); + + return EXIT_SUCCESS; +} + +void run(int argc, char **argv) +{ + int size; + int grid_rows, grid_cols; + float *FilesavingTemp, *FilesavingPower, *MatrixOut; + char *tfile, *pfile, *ofile; + + int total_iterations = 60; + int pyramid_height = 1; // number of iterations + + if (argc != 8) + usage(argc, argv); + if ((grid_rows = atoi(argv[1])) <= 0 || + (grid_cols = atoi(argv[1])) <= 0 || + (pyramid_height = atoi(argv[2])) <= 0 || + (total_iterations = atoi(argv[3])) <= 0) + usage(argc, argv); + + tfile = argv[4]; + pfile = argv[5]; + ofile = argv[6]; + + int batch_size = atoi(argv[7]); + + size = grid_rows * grid_cols; + +/* --------------- pyramid parameters --------------- */ +#define EXPAND_RATE 2 // add one iteration will extend the pyramid base by 2 per each borderline + int borderCols = (pyramid_height)*EXPAND_RATE / 2; + int borderRows = (pyramid_height)*EXPAND_RATE / 2; + int smallBlockCol = BLOCK_SIZE - (pyramid_height)*EXPAND_RATE; + int smallBlockRow = BLOCK_SIZE - (pyramid_height)*EXPAND_RATE; + // int blockCols = grid_cols / smallBlockCol + ((grid_cols % smallBlockCol == 0) ? 0 : 1); + // int blockRows = grid_rows / smallBlockRow + ((grid_rows % smallBlockRow == 0) ? 0 : 1); + + int blockCols = (grid_cols + smallBlockCol * batch_size - 1) / (smallBlockCol * batch_size); + int blockRows = (grid_rows + smallBlockRow * batch_size - 1) / (smallBlockRow * batch_size); + + // printf("borderCols is %d, smallBlockCol is %d, blockCols is %d, grid_cols is %d \n", borderCols, smallBlockCol, blockCols, grid_cols); + + FilesavingTemp = (float *)malloc(size * sizeof(float)); + FilesavingPower = (float *)malloc(size * sizeof(float)); + MatrixOut = (float *)calloc(size, sizeof(float)); + + if (!FilesavingPower || !FilesavingTemp || !MatrixOut) + fatal("unable to allocate memory"); + + printf("pyramidHeight: %d\ngridSize: [%d, %d]\nborder:[%d, %d]\nblockGrid:[%d, %d]\ntargetBlock:[%d, %d]\n", + pyramid_height, grid_cols, grid_rows, borderCols, borderRows, blockCols, blockRows, smallBlockCol, smallBlockRow); + + readinput(FilesavingTemp, grid_rows, grid_cols, tfile); + readinput(FilesavingPower, grid_rows, grid_cols, pfile); + + GPU_argv_init(); + + initTrace(); + startCPU(); + float *MatrixTemp[2], *MatrixPower; + cudaMallocManaged((void **)&MatrixTemp[0], sizeof(float) * size); + cudaMallocManaged((void **)&MatrixTemp[1], sizeof(float) * size); + memcpy(MatrixTemp[0], FilesavingTemp, sizeof(float) * size); + + cudaMallocManaged((void **)&MatrixPower, sizeof(float) * size); + memcpy(MatrixPower, FilesavingPower, sizeof(float) * size); + // printf("Start computing the transient temperature\n"); + int ret = compute_tran_temp(MatrixPower, MatrixTemp, grid_cols, grid_rows, + total_iterations, pyramid_height, blockCols, blockRows, borderCols, borderRows, batch_size); + // printf("Ending simulation\n"); + memcpy(MatrixOut, MatrixTemp[ret], sizeof(float) * size); + + cudaFree(MatrixPower); + cudaFree(MatrixTemp[0]); + cudaFree(MatrixTemp[1]); + + endCPU(); + finiTrace(); + + writeoutput(MatrixOut, grid_rows, grid_cols, ofile); + free(MatrixOut); +} diff --git a/workloads/realworld/uvm/hotspot/run.sh b/workloads/realworld/uvm/hotspot/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..66b814725286fb6699d2b893740518ad43dc307a --- /dev/null +++ b/workloads/realworld/uvm/hotspot/run.sh @@ -0,0 +1 @@ +./hotspot 512 2 2 ../../../../data/hotspot/temp_512 ../../../../data/hotspot/power_512 output.out 4 diff --git a/workloads/realworld/uvm/hotspot/run_super.sh b/workloads/realworld/uvm/hotspot/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ad31b9dcd0ce3e1d53df5aaf445082ab86cf2366 --- /dev/null +++ b/workloads/realworld/uvm/hotspot/run_super.sh @@ -0,0 +1 @@ +./hotspot 8192 2 2 ../../../../data/hotspot/temp_8192.txt ../../../../data/hotspot/power_8192.txt output.out 8 diff --git a/workloads/realworld/uvm/kmeans/Makefile b/workloads/realworld/uvm/kmeans/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..e473b45be17e5805399c15b04dda57742451d913 --- /dev/null +++ b/workloads/realworld/uvm/kmeans/Makefile @@ -0,0 +1,33 @@ +include ../../../common/make.config + +# C compiler +CC = gcc +CC_FLAGS = -g -fopenmp -O2 + +# CUDA compiler +NVCC = $(CUDA_DIR)/bin/nvcc +NVCC_FLAGS = -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) + +# 'make dbg=1' enables NVCC debugging + + +# 'make emu=1' compiles the CUDA kernels for emulation +ifeq ($(emu),1) + NVCC_FLAGS += -deviceemu +endif + + +kmeans: cluster.o getopt.o kmeans.o kmeans_clustering.o kmeans_cuda.o rmse.o $(CUPTI_ADD_COMMON)/cpu_timestamps.o + $(CC) $(CC_FLAGS) cluster.o getopt.o kmeans.o kmeans_clustering.o kmeans_cuda.o rmse.o $(CUPTI_ADD_COMMON)/cpu_timestamps.o $(CUPTI_ADD_COMMON)/cupti_add.cpp -o kmeans $(NVCC_FLAGS) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +kmeans.o: kmeans.c + $(CC) $(CC_FLAGS) $< -c $(NVCC_FLAGS) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +%.o: %.[ch] + $(CC) $(CC_FLAGS) $< -c -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lstdc++ + +kmeans_cuda.o: kmeans_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp + $(NVCC) -O2 -c kmeans_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(NVCC_FLAGS) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +clean: + rm -f *.o *~ kmeans kmeans_cuda.linkinfo diff --git a/workloads/realworld/uvm/kmeans/Makefile_nvidia b/workloads/realworld/uvm/kmeans/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..5d612b9dc8b5c19310162e1f721e1d2d4e33fb72 --- /dev/null +++ b/workloads/realworld/uvm/kmeans/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := kmeans +# CUDA source files (compiled with cudacc) +CUFILES := kmeans_cuda.cu +# CUDA dependency files +CU_DEPS := kmeans_cuda_kernel.cu +# C/C++ source files (compiled with gcc / c++) +CFILES := cluster.c getopt.c kmeans.c kmeans_clustering.c rmse.c + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/uvm/kmeans/README b/workloads/realworld/uvm/kmeans/README new file mode 100755 index 0000000000000000000000000000000000000000..bebae52d716986889b30a435214e095346424190 --- /dev/null +++ b/workloads/realworld/uvm/kmeans/README @@ -0,0 +1,10 @@ +Usage: ./kmeans [switches] -i filename + + -i filename :file containing data to be clustered + -m max_nclusters :maximum number of clusters allowed [default=5] + -n min_nclusters :minimum number of clusters allowed [default=5] + -t threshold :threshold value [default=0.001] + -l nloops :iteration for each number of clusters [default=1] + -b :input file is in binary format + -r :calculate RMSE [default=off] + -o :output cluster center coordinates [default=off] \ No newline at end of file diff --git a/workloads/realworld/uvm/kmeans/cluster.c b/workloads/realworld/uvm/kmeans/cluster.c new file mode 100755 index 0000000000000000000000000000000000000000..c4010b11c0306acae331279b89b1a63f3ad5637d --- /dev/null +++ b/workloads/realworld/uvm/kmeans/cluster.c @@ -0,0 +1,165 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: cluster.c **/ +/** Description: Takes as input a file, containing 1 data point per **/ +/** per line, and performs a fuzzy c-means clustering **/ +/** on the data. Fuzzy clustering is performed using **/ +/** min to max clusters and the clustering that gets **/ +/** the best score according to a compactness and **/ +/** separation criterion are returned. **/ +/** Author: Brendan McCane **/ +/** James Cook University of North Queensland. **/ +/** Australia. email: mccane@cs.jcu.edu.au **/ +/** **/ +/** Edited by: Jay Pisharath, Wei-keng Liao **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "kmeans.h" +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) \ + (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer)) + +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +extern double wtime(void); +float min_rmse_ref = FLT_MAX; /* reference min_rmse value */ + +/*---< cluster() >-----------------------------------------------------------*/ +int cluster(int npoints, /* number of data points */ + int nfeatures, /* number of attributes for each point */ + float **features, /* array: [npoints][nfeatures] */ + int min_nclusters, /* range of min to max number of clusters */ + int max_nclusters, + float threshold, /* loop terminating factor */ + int *best_nclusters, /* out: number between min and max with lowest RMSE */ + float ***cluster_centres, /* out: [best_nclusters][nfeatures] */ + float *min_rmse, /* out: minimum RMSE */ + int isRMSE, /* calculate RMSE */ + int nloops /* number of iteration for each number of clusters */ + ) +{ + + + int nclusters; /* number of clusters k */ + int index =0; /* number of iteration to reach the best RMSE */ + int rmse; /* RMSE for each clustering */ + int *membership; /* which cluster a data point belongs to */ + float **tmp_cluster_centres; /* hold coordinates of cluster centers */ + int i; + + /* allocate memory for membership */ + membership = (int*) malloc(npoints * sizeof(int)); + + /* sweep k from min to max_nclusters to find the best number of clusters */ + for(nclusters = min_nclusters; nclusters <= max_nclusters; nclusters++) + { + if (nclusters > npoints) break; /* cannot have more clusters than points */ + + /* allocate device memory, invert data array (@ kmeans_cuda.cu) */ + allocateMemory(npoints, nfeatures, nclusters, features); + + /* iterate nloops times for each number of clusters */ + for(i = 0; i < nloops; i++) + { + /* initialize initial cluster centers, CUDA calls (@ kmeans_cuda.cu) */ + tmp_cluster_centres = kmeans_clustering(features, + nfeatures, + npoints, + nclusters, + threshold, + membership); + + if (*cluster_centres) { + free((*cluster_centres)[0]); + free(*cluster_centres); + } + *cluster_centres = tmp_cluster_centres; + + + /* find the number of clusters with the best RMSE */ + if(isRMSE) + { + rmse = rms_err(features, + nfeatures, + npoints, + tmp_cluster_centres, + nclusters); + + if(rmse < min_rmse_ref){ + min_rmse_ref = rmse; //update reference min RMSE + *min_rmse = min_rmse_ref; //update return min RMSE + *best_nclusters = nclusters; //update optimum number of clusters + index = i; //update number of iteration to reach best RMSE + } + } + } + + deallocateMemory(); /* free device memory (@ kmeans_cuda.cu) */ + } + + free(membership); + + return index; +} + diff --git a/workloads/realworld/uvm/kmeans/cp.sh b/workloads/realworld/uvm/kmeans/cp.sh new file mode 100755 index 0000000000000000000000000000000000000000..243885047459789eb8eae5d6c5b2b4822eb3aabf --- /dev/null +++ b/workloads/realworld/uvm/kmeans/cp.sh @@ -0,0 +1,27 @@ +cp super_0.log super_3.log +cp super_0.log super_4.log +cp super_0.log super_5.log +cp super_0.log super_6.log +cp super_0.log super_7.log +cp super_0.log super_8.log +cp super_0.log super_9.log +cp super_0.log super_10.log +cp super_0.log super_11.log +cp super_0.log super_12.log +cp super_0.log super_13.log +cp super_0.log super_14.log +cp super_0.log super_15.log +cp super_0.log super_17.log +cp super_0.log super_17.log +cp super_0.log super_18.log +cp super_0.log super_19.log +cp super_0.log super_20.log +cp super_0.log super_21.log +cp super_0.log super_22.log +cp super_0.log super_23.log +cp super_0.log super_24.log +cp super_0.log super_25.log +cp super_0.log super_26.log +cp super_0.log super_27.log +cp super_0.log super_28.log +cp super_0.log super_29.log diff --git a/workloads/realworld/uvm/kmeans/getopt.c b/workloads/realworld/uvm/kmeans/getopt.c new file mode 100755 index 0000000000000000000000000000000000000000..fa2f31378fb2978f65267ba2e810aae3ff1ee016 --- /dev/null +++ b/workloads/realworld/uvm/kmeans/getopt.c @@ -0,0 +1,1184 @@ +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to drepper@gnu.org + before changing it! + Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This tells Alpha OSF/1 not to define a getopt prototype in . + Ditto for AIX 3.2 and . */ +#ifndef _NO_PROTO +# define _NO_PROTO +#endif + +#ifdef HAVE_CONFIG_H +# include +#endif + +#if !defined __STDC__ || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +# ifndef const +# define const +# endif +#endif + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +# include +# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +# define ELIDE_CODE +# endif +#endif + +#ifndef ELIDE_CODE + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +/* Don't include stdlib.h for non-GNU C libraries because some of them + contain conflicting prototypes for getopt. */ +# include +# include +#endif /* GNU C library. */ + +#ifdef VMS +# include +# if HAVE_STRING_H - 0 +# include +# endif +#endif + +#ifndef _ +/* This is for other GNU distributions with internationalized messages. */ +# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include +# ifndef _ +# define _(msgid) gettext (msgid) +# endif +# else +# define _(msgid) (msgid) +# endif +# if defined _LIBC && defined USE_IN_LIBIO +# include +# endif +#endif + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* 1003.2 says this must be 1 before any call. */ +int optind = 1; + +/* Formerly, initialization of getopt depended on optind==0, which + causes problems with re-calling getopt as programs generally don't + know that. */ + +int __getopt_initialized; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return -1 with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +/* Value of POSIXLY_CORRECT environment variable. */ +static char *posixly_correct; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +# include +# define my_index strchr +#else + +//# if HAVE_STRING_H || WIN32 /* Pete Wilson mod 7/28/02 */ +# include +//# else +//# include +//# endif + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +#ifndef getenv +extern char *getenv (); +#endif + +static char * +my_index (str, chr) + const char *str; + int chr; +{ + while (*str) + { + if (*str == chr) + return (char *) str; + str++; + } + return 0; +} + +/* If using GCC, we can safely declare strlen this way. + If not using GCC, it is ok not to declare it. */ +#ifdef __GNUC__ +/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. + That was relevant to code that was here before. */ +# if (!defined __STDC__ || !__STDC__) && !defined strlen +/* gcc with -traditional declares the built-in strlen to return int, + and has done so at least since version 2.4.5. -- rms. */ +extern int strlen (const char *); +# endif /* not __STDC__ */ +#endif /* __GNUC__ */ + +#endif /* not __GNU_LIBRARY__ */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +#ifdef _LIBC +/* Stored original parameters. + XXX This is no good solution. We should rather copy the args so + that we can compare them later. But we must not use malloc(3). */ +extern int __libc_argc; +extern char **__libc_argv; + +/* Bash 2.0 gives us an environment variable containing flags + indicating ARGV elements that should not be considered arguments. */ + +# ifdef USE_NONOPTION_FLAGS +/* Defined in getopt_init.c */ +extern char *__getopt_nonoption_flags; + +static int nonoption_flags_max_len; +static int nonoption_flags_len; +# endif + +# ifdef USE_NONOPTION_FLAGS +# define SWAP_FLAGS(ch1, ch2) \ + if (nonoption_flags_len > 0) \ + { \ + char __tmp = __getopt_nonoption_flags[ch1]; \ + __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ + __getopt_nonoption_flags[ch2] = __tmp; \ + } +# else +# define SWAP_FLAGS(ch1, ch2) +# endif +#else /* !_LIBC */ +# define SWAP_FLAGS(ch1, ch2) +#endif /* _LIBC */ + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +#if defined __STDC__ && __STDC__ +static void exchange (char **); +#endif + +static void +exchange (argv) + char **argv; +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + /* First make sure the handling of the `__getopt_nonoption_flags' + string can work normally. Our top argument must be in the range + of the string. */ + if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len) + { + /* We must extend the array. The user plays games with us and + presents new arguments. */ + char *new_str = malloc (top + 1); + if (new_str == NULL) + nonoption_flags_len = nonoption_flags_max_len = 0; + else + { + memset (__mempcpy (new_str, __getopt_nonoption_flags, + nonoption_flags_max_len), + '\0', top + 1 - nonoption_flags_max_len); + nonoption_flags_max_len = top + 1; + __getopt_nonoption_flags = new_str; + } + } +#endif + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + SWAP_FLAGS (bottom + i, middle + i); + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Initialize the internal data when the first call is made. */ + +#if defined __STDC__ && __STDC__ +static const char *_getopt_initialize (int, char *const *, const char *); +#endif +static const char * +_getopt_initialize (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = optind; + + nextchar = NULL; + + posixly_correct = getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + if (posixly_correct == NULL + && argc == __libc_argc && argv == __libc_argv) + { + if (nonoption_flags_max_len == 0) + { + if (__getopt_nonoption_flags == NULL + || __getopt_nonoption_flags[0] == '\0') + nonoption_flags_max_len = -1; + else + { + const char *orig_str = __getopt_nonoption_flags; + int len = nonoption_flags_max_len = strlen (orig_str); + if (nonoption_flags_max_len < argc) + nonoption_flags_max_len = argc; + __getopt_nonoption_flags = + (char *) malloc (nonoption_flags_max_len); + if (__getopt_nonoption_flags == NULL) + nonoption_flags_max_len = -1; + else + memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), + '\0', nonoption_flags_max_len - len); + } + } + nonoption_flags_len = nonoption_flags_max_len; + } + else + nonoption_flags_len = 0; +#endif + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns -1. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (argc, argv, optstring, longopts, longind, long_only) + int argc; + char *const *argv; + const char *optstring; + const struct option *longopts; + int *longind; + int long_only; +{ + int print_errors = opterr; + if (optstring[0] == ':') + print_errors = 0; + + if (argc < 1) + return -1; + + optarg = NULL; + + if (optind == 0 || !__getopt_initialized) + { + if (optind == 0) + optind = 1; /* Don't scan ARGV[0], the program name. */ + optstring = _getopt_initialize (argc, argv, optstring); + __getopt_initialized = 1; + } + + /* Test whether ARGV[optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ +#if defined _LIBC && defined USE_NONOPTION_FLAGS +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \ + || (optind < nonoption_flags_len \ + && __getopt_nonoption_flags[optind] == '1')) +#else +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0') +#endif + + if (nextchar == NULL || *nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT and LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (last_nonopt > optind) + last_nonopt = optind; + if (first_nonopt > optind) + first_nonopt = optind; + + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc && NONOPTION_P) + optind++; + last_nonopt = optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) + { + if (ordering == REQUIRE_ORDER) + return -1; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[optind][1] == '-' + || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = -1; + int option_index; + + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) + == (unsigned int) strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else if (long_only + || pfound->has_arg != p->has_arg + || pfound->flag != p->flag + || pfound->val != p->val) + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + optopt = 0; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind - 1][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#else + fprintf (stderr, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], + pfound->name); +#else + fprintf (stderr, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], pfound->name); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + + nextchar += strlen (nextchar); + + optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' + || my_index (optstring, *nextchar) == NULL) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + nextchar = (char *) ""; + optind++; + optopt = 0; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (posixly_correct) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: illegal option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c); +#endif + } + else + { +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: invalid option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + optopt = c; + return '?'; + } + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + + /* optarg is now the argument, see if it's in the + table of longopts. */ + + for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + return '?'; + } + if (pfound != NULL) + { + option_index = indfound; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = NULL; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +#endif /* Not ELIDE_CODE. */ + + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ \ No newline at end of file diff --git a/workloads/realworld/uvm/kmeans/getopt.h b/workloads/realworld/uvm/kmeans/getopt.h new file mode 100755 index 0000000000000000000000000000000000000000..bae04bf7d418206d73892a94ff94923a36549362 --- /dev/null +++ b/workloads/realworld/uvm/kmeans/getopt.h @@ -0,0 +1,191 @@ + + +/* getopt.h */ +/* Declarations for getopt. + Copyright (C) 1989-1994, 1996-1999, 2001 Free Software + Foundation, Inc. This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute + it and/or modify it under the terms of the GNU Lesser + General Public License as published by the Free Software + Foundation; either version 2.1 of the License, or + (at your option) any later version. + + The GNU C Library is distributed in the hope that it will + be useful, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A + PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General + Public License along with the GNU C Library; if not, write + to the Free Software Foundation, Inc., 59 Temple Place, + Suite 330, Boston, MA 02111-1307 USA. */ + + + + + +#ifndef _GETOPT_H + +#ifndef __need_getopt +# define _GETOPT_H 1 +#endif + +/* If __GNU_LIBRARY__ is not already defined, either we are being used + standalone, or this is the first header included in the source file. + If we are being used with glibc, we need to include , but + that does not exist if we are standalone. So: if __GNU_LIBRARY__ is + not defined, include , which will pull in for us + if it's from glibc. (Why ctype.h? It's guaranteed to exist and it + doesn't flood the namespace with stuff the way some other headers do.) */ +#if !defined __GNU_LIBRARY__ +# include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +#ifndef __need_getopt +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +# if (defined __STDC__ && __STDC__) || defined __cplusplus + const char *name; +# else + char *name; +# endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +# define no_argument 0 +# define required_argument 1 +# define optional_argument 2 +#endif /* need getopt */ + + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. + + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, `optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in `optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU `getopt'. + + The argument `--' causes premature termination of argument + scanning, explicitly telling `getopt' that there are no more + options. + + If OPTS begins with `--', then non-option arguments are treated as + arguments to the option '\0'. This behavior is specific to the GNU + `getopt'. */ + +#if (defined __STDC__ && __STDC__) || defined __cplusplus +# ifdef __GNU_LIBRARY__ +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int ___argc, char *const *___argv, const char *__shortopts); +# else /* not __GNU_LIBRARY__ */ +extern int getopt (); +# endif /* __GNU_LIBRARY__ */ + +# ifndef __need_getopt +extern int getopt_long (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); +extern int getopt_long_only (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only); +# endif +#else /* not __STDC__ */ +extern int getopt (); +# ifndef __need_getopt +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +# endif +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +/* Make sure we later can get all the definitions and declarations. */ +#undef __need_getopt + +#endif /* getopt.h */ + diff --git a/workloads/realworld/uvm/kmeans/kmeans.c b/workloads/realworld/uvm/kmeans/kmeans.c new file mode 100755 index 0000000000000000000000000000000000000000..b2668674074c4b29ede04ec09a06d29486a1ec6a --- /dev/null +++ b/workloads/realworld/uvm/kmeans/kmeans.c @@ -0,0 +1,309 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: example.c **/ +/** Description: Takes as input a file: **/ +/** ascii file: containing 1 data point per line **/ +/** binary file: first int is the number of objects **/ +/** 2nd int is the no. of features of each **/ +/** object **/ +/** This example performs a fuzzy c-means clustering **/ +/** on the data. Fuzzy clustering is performed using **/ +/** min to max clusters and the clustering that gets **/ +/** the best score according to a compactness and **/ +/** separation criterion are returned. **/ +/** Author: Wei-keng Liao **/ +/** ECE Department Northwestern University **/ +/** email: wkliao@ece.northwestern.edu **/ +/** **/ +/** Edited by: Jay Pisharath **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ +#define _CRT_SECURE_NO_DEPRECATE 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#define _POSIX_C_SOURCE 200809L +#include +#include +#include "kmeans.h" + +extern double wtime(void); + + + +/*---< usage() >------------------------------------------------------------*/ +void usage(char *argv0) { + char *help = + "\nUsage: %s [switches] -i filename\n\n" + " -i filename :file containing data to be clustered\n" + " -m max_nclusters :maximum number of clusters allowed [default=5]\n" + " -n min_nclusters :minimum number of clusters allowed [default=5]\n" + " -t threshold :threshold value [default=0.001]\n" + " -l nloops :iteration for each number of clusters [default=1]\n" + " -b :input file is in binary format\n" + " -r :calculate RMSE [default=off]\n" + " -o :output cluster center coordinates [default=off]\n"; + fprintf(stderr, help, argv0); + exit(-1); +} + +/*---< main() >-------------------------------------------------------------*/ +int setup(int argc, char **argv) { + int opt; + extern char *optarg; + char *filename = 0; + float *buf; + char line[1024]; + int isBinaryFile = 0; + + float threshold = 0.001; /* default value */ + int max_nclusters=5; /* default value */ + int min_nclusters=5; /* default value */ + int best_nclusters = 0; + int nfeatures = 0; + int npoints = 0; + float len; + + float **features; + float **cluster_centres=NULL; + int i, j, index; + int nloops = 1; /* default value */ + + int isRMSE = 0; + float rmse; + + int isOutput = 0; + //float cluster_timing, io_timing; + + /* obtain command line arguments and change appropriate options */ + while ( (opt=getopt(argc,argv,"i:t:m:n:l:bro"))!= EOF) { + switch (opt) { + case 'i': filename=optarg; + break; + case 'b': isBinaryFile = 1; + break; + case 't': threshold=atof(optarg); + break; + case 'm': max_nclusters = atoi(optarg); + break; + case 'n': min_nclusters = atoi(optarg); + break; + case 'r': isRMSE = 1; + break; + case 'o': isOutput = 1; + break; + case 'l': nloops = atoi(optarg); + break; + case '?': usage(argv[0]); + break; + default: usage(argv[0]); + break; + } + } + + if (filename == 0) usage(argv[0]); + + /* ============== I/O begin ==============*/ + /* get nfeatures and npoints */ + //io_timing = omp_get_wtime(); + if (isBinaryFile) { //Binary file input + int infile; + if ((infile = open(filename, O_RDONLY, "0600")) == -1) { + fprintf(stderr, "Error: no such file (%s)\n", filename); + exit(1); + } + read(infile, &npoints, sizeof(int)); + read(infile, &nfeatures, sizeof(int)); + + /* allocate space for features[][] and read attributes of all objects */ + buf = (float*) malloc(npoints*nfeatures*sizeof(float)); + features = (float**)malloc(npoints* sizeof(float*)); + features[0] = (float*) malloc(npoints*nfeatures*sizeof(float)); + for (i=1; i npoints(%d) -- cannot proceed\n", min_nclusters, npoints); + exit(0); + } + + srand(7); /* seed for future random number generator */ + memcpy(features[0], buf, npoints*nfeatures*sizeof(float)); /* now features holds 2-dimensional array of features */ + free(buf); + + /* ======================= core of the clustering ===================*/ + + //cluster_timing = omp_get_wtime(); /* Total clustering time */ + cluster_centres = NULL; + index = cluster(npoints, /* number of data points */ + nfeatures, /* number of features for each point */ + features, /* array: [npoints][nfeatures] */ + min_nclusters, /* range of min to max number of clusters */ + max_nclusters, + threshold, /* loop termination factor */ + &best_nclusters, /* return: number between min and max */ + &cluster_centres, /* return: [best_nclusters][nfeatures] */ + &rmse, /* Root Mean Squared Error */ + isRMSE, /* calculate RMSE */ + nloops); /* number of iteration for each number of clusters */ + + //cluster_timing = omp_get_wtime() - cluster_timing; + + + /* =============== Command Line Output =============== */ + + /* cluster center coordinates + :displayed only for when k=1*/ + if((min_nclusters == max_nclusters) && (isOutput == 1)) { + printf("\n================= Centroid Coordinates =================\n"); + for(i = 0; i < max_nclusters; i++){ + printf("%d:", i); + for(j = 0; j < nfeatures; j++){ + printf(" %.2f", cluster_centres[i][j]); + } + printf("\n\n"); + } + } + + len = (float) ((max_nclusters - min_nclusters + 1)*nloops); + + printf("Number of Iteration: %d\n", nloops); + //printf("Time for I/O: %.5fsec\n", io_timing); + //printf("Time for Entire Clustering: %.5fsec\n", cluster_timing); + + if(min_nclusters != max_nclusters){ + if(nloops != 1){ //range of k, multiple iteration + //printf("Average Clustering Time: %fsec\n", + // cluster_timing / len); + printf("Best number of clusters is %d\n", best_nclusters); + } + else{ //range of k, single iteration + //printf("Average Clustering Time: %fsec\n", + // cluster_timing / len); + printf("Best number of clusters is %d\n", best_nclusters); + } + } + else{ + if(nloops != 1){ // single k, multiple iteration + //printf("Average Clustering Time: %.5fsec\n", + // cluster_timing / nloops); + if(isRMSE) // if calculated RMSE + printf("Number of trials to approach the best RMSE of %.3f is %d\n", rmse, index + 1); + } + else{ // single k, single iteration + if(isRMSE) // if calculated RMSE + printf("Root Mean Squared Error: %.3f\n", rmse); + } + } + + + /* free up memory */ + free(features[0]); + free(features); + return(0); +} + diff --git a/workloads/realworld/uvm/kmeans/kmeans.h b/workloads/realworld/uvm/kmeans/kmeans.h new file mode 100755 index 0000000000000000000000000000000000000000..28b6c34732313f04c02b59b095361bc8142d4b05 --- /dev/null +++ b/workloads/realworld/uvm/kmeans/kmeans.h @@ -0,0 +1,60 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +#ifndef _H_FUZZY_KMEANS +#define _H_FUZZY_KMEANS + +#ifndef FLT_MAX +#define FLT_MAX 3.40282347e+38 +#endif + +#include + +/* rmse.c */ +float euclid_dist_2 (float*, float*, int); +int find_nearest_point (float* , int, float**, int); +float rms_err(float**, int, int, float**, int); + +/* cluster.c */ +int cluster(int, int, float**, int, int, float, int*, float***, float*, int, int); + +/* kmeans_clustering.c */ +float **kmeans_clustering(float**, int, int, int, float, int*); + +#endif diff --git a/workloads/realworld/uvm/kmeans/kmeans_clustering.c b/workloads/realworld/uvm/kmeans/kmeans_clustering.c new file mode 100755 index 0000000000000000000000000000000000000000..54ddcd6d8ff6f0d075ff16e6b6aef84fda4716d2 --- /dev/null +++ b/workloads/realworld/uvm/kmeans/kmeans_clustering.c @@ -0,0 +1,178 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: kmeans_clustering.c **/ +/** Description: Implementation of regular k-means clustering **/ +/** algorithm **/ +/** Author: Wei-keng Liao **/ +/** ECE Department, Northwestern University **/ +/** email: wkliao@ece.northwestern.edu **/ +/** **/ +/** Edited by: Jay Pisharath **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include + +#include "kmeans.h" + +#define RANDOM_MAX 2147483647 + +extern double wtime(void); + +/*----< kmeans_clustering() >---------------------------------------------*/ +float** kmeans_clustering(float **feature, /* in: [npoints][nfeatures] */ + int nfeatures, + int npoints, + int nclusters, + float threshold, + int *membership) /* out: [npoints] */ +{ + int i, j, n = 0; /* counters */ + int loop=0, temp; + int *new_centers_len; /* [nclusters]: no. of points in each cluster */ + float delta; /* if the point moved */ + float **clusters; /* out: [nclusters][nfeatures] */ + float **new_centers; /* [nclusters][nfeatures] */ + + int *initial; /* used to hold the index of points not yet selected + prevents the "birthday problem" of dual selection (?) + considered holding initial cluster indices, but changed due to + possible, though unlikely, infinite loops */ + int initial_points; + int c = 0; + + /* nclusters should never be > npoints + that would guarantee a cluster without points */ + if (nclusters > npoints) + nclusters = npoints; + + /* allocate space for and initialize returning variable clusters[] */ + clusters = (float**) malloc(nclusters * sizeof(float*)); + clusters[0] = (float*) malloc(nclusters * nfeatures * sizeof(float)); + for (i=1; i= 0; i++) { + //n = (int)rand() % initial_points; + + for (j=0; j 0) + clusters[i][j] = new_centers[i][j] / new_centers_len[i]; /* take average i.e. sum/n */ + new_centers[i][j] = 0.0; /* set back to 0 */ + } + new_centers_len[i] = 0; /* set back to 0 */ + } + c++; + } while ((delta > threshold) && (loop++ < 500)); /* makes sure loop terminates */ + printf("iterated %d times\n", c); + free(new_centers[0]); + free(new_centers); + free(new_centers_len); + + return clusters; +} + diff --git a/workloads/realworld/uvm/kmeans/kmeans_cuda.cu b/workloads/realworld/uvm/kmeans/kmeans_cuda.cu new file mode 100755 index 0000000000000000000000000000000000000000..5329f842cd3f9760de72f6af92157cdda4dfdea5 --- /dev/null +++ b/workloads/realworld/uvm/kmeans/kmeans_cuda.cu @@ -0,0 +1,312 @@ +#include +#include +#include +#include +#include +#include + +#include + +#define THREADS_PER_DIM 16 +#define BLOCKS_PER_DIM 64 +#define THREADS_PER_BLOCK THREADS_PER_DIM*THREADS_PER_DIM +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" +#include "kmeans_cuda_kernel.cu" + + +//#define BLOCK_DELTA_REDUCE +//#define BLOCK_CENTER_REDUCE + +#define CPU_DELTA_REDUCE +#define CPU_CENTER_REDUCE + +extern "C" +int setup(int argc, char** argv); /* function prototype */ + +// GLOBAL!!!!! +unsigned int num_threads_perdim = THREADS_PER_DIM; /* sqrt(256) -- see references for this choice */ +unsigned int num_blocks_perdim = BLOCKS_PER_DIM; /* temporary */ +unsigned int num_threads = num_threads_perdim*num_threads_perdim; /* number of threads */ +unsigned int num_blocks = num_blocks_perdim*num_blocks_perdim; /* number of blocks */ + +/* _d denotes it resides on the device */ +int *membership_new; /* newly assignment membership */ +float *feature_d; /* inverted data array */ +float *feature_flipped_d; /* original (not inverted) data array */ +int *membership_d; /* membership on the device */ +float *block_new_centers; /* sum of points in a cluster (per block) */ +float *clusters_d; /* cluster centers on the device */ +float *block_clusters_d; /* per block calculation of cluster centers */ +int *block_deltas_d; /* per block calculation of deltas */ + + +/* -------------- allocateMemory() ------------------- */ +/* allocate device memory, calculate number of blocks and threads, and invert the data array */ +extern "C" +void allocateMemory(int npoints, int nfeatures, int nclusters, float **features) +{ + // printf("npoints is %d, num_threads is %d\n", npoints, num_threads); + // num_blocks = npoints / num_threads; + // if (npoints % num_threads > 0) /* defeat truncation */ + // num_blocks++; + + // num_blocks_perdim = sqrt((double) num_blocks); + // while (num_blocks_perdim * num_blocks_perdim < num_blocks) // defeat truncation (should run once) + // num_blocks_perdim++; + + num_blocks = num_blocks_perdim*num_blocks_perdim; + + /* allocate memory for memory_new[] and initialize to -1 (host) */ + membership_new = (int*) malloc(npoints * sizeof(int)); + for(int i=0;i>>(feature_flipped_d, feature_d, npoints, (num_blocks_perdim * num_blocks_perdim * num_threads_perdim * num_threads_perdim), nfeatures); + + /* allocate memory for membership_d[] and clusters_d[][] (device) */ + cudaMallocManaged((void **)&membership_d, npoints * sizeof(int)); + cudaMallocManaged((void **)&clusters_d, nclusters * nfeatures * sizeof(float)); + +#ifdef BLOCK_DELTA_REDUCE + // allocate array to hold the per block deltas on the gpu side + + cudaMallocManaged((void**) &block_deltas_d, num_blocks_perdim * num_blocks_perdim * sizeof(int)); +#endif + +#ifdef BLOCK_CENTER_REDUCE + // allocate memory and copy to card cluster array in which to accumulate center points for the next iteration + cudaMallocManaged((void**) &block_clusters_d, + num_blocks_perdim * num_blocks_perdim * + nclusters * nfeatures * sizeof(float)); +#endif + +} +/* -------------- allocateMemory() end ------------------- */ + +/* -------------- deallocateMemory() ------------------- */ +/* free host and device memory */ +extern "C" +void deallocateMemory() +{ + free(membership_new); + free(block_new_centers); + cudaFree(feature_d); + cudaFree(feature_flipped_d); + cudaFree(membership_d); + + cudaFree(clusters_d); +#ifdef BLOCK_CENTER_REDUCE + cudaFree(block_clusters_d); +#endif +#ifdef BLOCK_DELTA_REDUCE + cudaFree(block_deltas_d); +#endif +endCPU(); +} +/* -------------- deallocateMemory() end ------------------- */ + +//////////////////////////////////////////////////////////////////////////////// +// Program main // + +int main(int argc, char **argv) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + // make sure we're running on the big card + GPU_argv_init(); + // as done in the CUDA start/help document provided + initTrace(); + setup(argc, argv); + finiTrace(); +} + +// // +//////////////////////////////////////////////////////////////////////////////// +/* ------------------- kmeansCuda() ------------------------ */ +extern "C" +int // delta -- had problems when return value was of float type +kmeansCuda(float **feature, /* in: [npoints][nfeatures] */ + int nfeatures, /* number of attributes for each point */ + int npoints, /* number of data points */ + int nclusters, /* number of clusters */ + int *membership, /* which cluster the point belongs to */ + float **clusters, /* coordinates of cluster centers */ + int *new_centers_len, /* number of elements in each cluster */ + float **new_centers /* sum of elements in each cluster */ + ) +{ + int delta = 0; /* if point has moved */ + int i,j; /* counters */ + + // cudaSetDevice(1); + + + /* copy membership (host to device) */ + memcpy(membership_d, membership_new, npoints*sizeof(int)); + + // /* copy clusters (host to device) */ + // memcpy(clusters_d, clusters[0], nclusters * nfeatures * sizeof(float)); + + // /* set up texture */ + // cudaChannelFormatDesc chDesc0 = cudaCreateChannelDesc(); + // t_features.filterMode = cudaFilterModePoint; + // t_features.normalized = false; + // t_features.channelDesc = chDesc0; + + // if(cudaBindTexture(NULL, &t_features, feature_d, &chDesc0, npoints*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind features array to texture!\n"); + + // cudaChannelFormatDesc chDesc1 = cudaCreateChannelDesc(); + // t_features_flipped.filterMode = cudaFilterModePoint; + // t_features_flipped.normalized = false; + // t_features_flipped.channelDesc = chDesc1; + + // if(cudaBindTexture(NULL, &t_features_flipped, feature_flipped_d, &chDesc1, npoints*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind features_flipped array to texture!\n"); + + // cudaChannelFormatDesc chDesc2 = cudaCreateChannelDesc(); + // t_clusters.filterMode = cudaFilterModePoint; + // t_clusters.normalized = false; + // t_clusters.channelDesc = chDesc2; + + // if(cudaBindTexture(NULL, &t_clusters, clusters_d, &chDesc2, nclusters*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind clusters array to texture!\n"); + + // /* copy clusters to constant memory */ + // cudaMemcpyToSymbol("c_clusters",clusters[0],nclusters*nfeatures*sizeof(float),0,cudaMemcpyHostToDevice); + + + /* setup execution parameters. + changed to 2d (source code on NVIDIA CUDA Programming Guide) */ + dim3 grid( num_blocks_perdim, num_blocks_perdim ); + dim3 threads( num_threads_perdim*num_threads_perdim ); + static uint64_t startKernel2; + CUPTI_CALL(cuptiGetTimestamp(&startKernel2)); + /* execute the kernel */ + kmeansPoint<<>>(feature_d, + nfeatures, + npoints, + (num_blocks_perdim * num_blocks_perdim * num_threads_perdim * num_threads_perdim), + nclusters, + membership_d); + static uint64_t endKernel2; + CUPTI_CALL(cuptiGetTimestamp(&endKernel2)); + // cudaThreadSynchronize(); + cudaDeviceSynchronize(); + printf("CUPTI,kmeansPoint,%lu,%lu\n", startKernel2, endKernel2); + + /* copy back membership (device to host) */ + memcpy(membership_new, membership_d, npoints * sizeof(int)); + +#ifdef BLOCK_CENTER_REDUCE + /*** Copy back arrays of per block sums ***/ + float * block_clusters_h = (float *) malloc( + num_blocks_perdim * num_blocks_perdim * + nclusters * nfeatures * sizeof(float)); + + // cudaMemcpy(block_clusters_h, block_clusters_d, + // num_blocks_perdim * num_blocks_perdim * + // nclusters * nfeatures * sizeof(float), + // cudaMemcpyDeviceToHost); +#endif +#ifdef BLOCK_DELTA_REDUCE + int * block_deltas_h = (int *) malloc( + num_blocks_perdim * num_blocks_perdim * sizeof(int)); + + // cudaMemcpy(block_deltas_h, block_deltas_d, + // num_blocks_perdim * num_blocks_perdim * sizeof(int), + // cudaMemcpyDeviceToHost); +#endif + + /* for each point, sum data points in each cluster + and see if membership has changed: + if so, increase delta and change old membership, and update new_centers; + otherwise, update new_centers */ + delta = 0; + for (i = 0; i < npoints; i++) + { + int cluster_id = membership_d[i]; + new_centers_len[cluster_id]++; + if (membership_d[i] != membership[i]) + { +#ifdef CPU_DELTA_REDUCE + delta++; +#endif + membership[i] = membership_d[i]; + } +#ifdef CPU_CENTER_REDUCE + for (j = 0; j < nfeatures; j++) + { + new_centers[cluster_id][j] += feature[i][j]; + } +#endif + } + + +#ifdef BLOCK_DELTA_REDUCE + /*** calculate global sums from per block sums for delta and the new centers ***/ + + //debug + //printf("\t \t reducing %d block sums to global sum \n",num_blocks_perdim * num_blocks_perdim); + for(i = 0; i < num_blocks_perdim * num_blocks_perdim; i++) { + //printf("block %d delta is %d \n",i,block_deltas_h[i]); + delta += block_deltas_h[i]; + } + +#endif +#ifdef BLOCK_CENTER_REDUCE + + for(int j = 0; j < nclusters;j++) { + for(int k = 0; k < nfeatures;k++) { + block_new_centers[j*nfeatures + k] = 0.f; + } + } + + for(i = 0; i < num_blocks_perdim * num_blocks_perdim; i++) { + for(int j = 0; j < nclusters;j++) { + for(int k = 0; k < nfeatures;k++) { + block_new_centers[j*nfeatures + k] += block_clusters_h[i * nclusters*nfeatures + j * nfeatures + k]; + } + } + } + + +#ifdef CPU_CENTER_REDUCE + //debug + /*for(int j = 0; j < nclusters;j++) { + for(int k = 0; k < nfeatures;k++) { + if(new_centers[j][k] > 1.001 * block_new_centers[j*nfeatures + k] || new_centers[j][k] < 0.999 * block_new_centers[j*nfeatures + k]) { + printf("\t \t for %d:%d, normal value is %e and gpu reduced value id %e \n",j,k,new_centers[j][k],block_new_centers[j*nfeatures + k]); + } + } + }*/ +#endif + +#ifdef BLOCK_CENTER_REDUCE + for(int j = 0; j < nclusters;j++) { + for(int k = 0; k < nfeatures;k++) + new_centers[j][k]= block_new_centers[j*nfeatures + k]; + } +#endif + +#endif + finiTrace(); + return delta; + +} +/* ------------------- kmeansCuda() end ------------------------ */ + diff --git a/workloads/realworld/uvm/kmeans/kmeans_cuda_kernel.cu b/workloads/realworld/uvm/kmeans/kmeans_cuda_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..d5d94f15588c4097081c1fa9f2dab392ffe57dc9 --- /dev/null +++ b/workloads/realworld/uvm/kmeans/kmeans_cuda_kernel.cu @@ -0,0 +1,136 @@ +#ifndef _KMEANS_CUDA_KERNEL_H_ +#define _KMEANS_CUDA_KERNEL_H_ + +#include +#include + +#include "kmeans.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +// FIXME: Make this a runtime selectable variable! +#define ASSUMED_NR_CLUSTERS 32 + +#define SDATA(index) CUT_BANK_CHECKER(sdata, index) + +// t_features has the layout dim0[points 0-m-1]dim1[ points 0-m-1]... +texture t_features; +// t_features_flipped has the layout point0[dim 0-n-1]point1[dim 0-n-1] +texture t_features_flipped; +texture t_clusters; + +__constant__ float c_clusters[ASSUMED_NR_CLUSTERS * 34]; /* constant memory for cluster centers */ + +/* ----------------- invert_mapping() --------------------- */ +/* inverts data array from row-major to column-major. + + [p0,dim0][p0,dim1][p0,dim2] ... + [p1,dim0][p1,dim1][p1,dim2] ... + [p2,dim0][p2,dim1][p2,dim2] ... + to + [dim0,p0][dim0,p1][dim0,p2] ... + [dim1,p0][dim1,p1][dim1,p2] ... + [dim2,p0][dim2,p1][dim2,p2] ... +*/ +__global__ void invert_mapping(float *input, /* original */ + float *output, /* inverted */ + int npoints, /* npoints */ + int batch_size, + int nfeatures) /* nfeatures */ +{ + int point_id = threadIdx.x + blockDim.x * blockIdx.x; /* id of thread */ + + int batches = npoints / batch_size; + + for (int b = 0; b < batches; b++) + { + for (int i = 0; i < nfeatures; i++) + { + output[b * batch_size + point_id + npoints * i] = input[(b * batch_size + point_id) * nfeatures + i]; + } + } + + + + return; +} +/* ----------------- invert_mapping() end --------------------- */ + +/* to turn on the GPU delta and center reduction */ +// #define GPU_DELTA_REDUCTION +// #define GPU_NEW_CENTER_REDUCTION + +/* ----------------- kmeansPoint() --------------------- */ +/* find the index of nearest cluster centers and change membership*/ +__global__ void +kmeansPoint(float *features, /* in: [npoints*nfeatures] */ + int nfeatures, + int npoints, + int batch_size, + int nclusters, + int *membership) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // block ID + const unsigned int block_id = gridDim.x * blockIdx.y + blockIdx.x; + // point/thread ID + const unsigned int point_id = block_id * blockDim.x * blockDim.y + threadIdx.x; + + __shared__ float tmp_features[THREADS_PER_DIM][THREADS_PER_DIM][16]; + + int batches = npoints / batch_size; + int tile = 0; + int end_tile = tile + batches; + + for (; tile < end_tile; tile += 1) + { + for (int i = 0; i < 16; i++) + { + int addr = tile * batch_size + point_id + i * npoints; + tmp_features[threadIdx.y][threadIdx.x][i] = features[addr]; + } + block.sync(); + + int index = -1; + + float min_dist = FLT_MAX; + float dist; /* distance square between a point to cluster center */ + + /* find the cluster center id with min distance to pt */ + for (int i = 0; i < nclusters; i++) + { + int cluster_base_index = i * nfeatures; /* base index of cluster centers for inverted array */ + float ans = 0.0; /* Euclidean distance sqaure */ + + for (int j = 0; j < nfeatures; j++) + { + // int addr = point_id + j * npoints; /* appropriate index of data point */ + // float diff = (tex1Dfetch(t_features,addr) - c_clusters[cluster_base_index + j]); /* distance between a data point to cluster centers */ + + // int addr = point_id + j * npoints; /* appropriate index of data point */ + // float diff = features[addr] - c_clusters[cluster_base_index + j]; /* distance between a data point to cluster centers */ + float diff = tmp_features[threadIdx.y][threadIdx.x][j] - c_clusters[cluster_base_index + j]; /* distance between a data point to cluster centers */ + ans += diff * diff; /* sum of squares */ + } + dist = ans; + block.sync(); + + /* see if distance is smaller than previous ones: + if so, change minimum distance and save index of cluster center */ + if (dist < min_dist) + { + min_dist = dist; + index = i; + } + } + membership[tile * batch_size + point_id] = index; + block.sync(); + } + +} +#endif // #ifndef _KMEANS_CUDA_KERNEL_H_ diff --git a/workloads/realworld/uvm/kmeans/kmeans_cuda_kernel.cu.old b/workloads/realworld/uvm/kmeans/kmeans_cuda_kernel.cu.old new file mode 100755 index 0000000000000000000000000000000000000000..dd0dec27eebf197d811a58063b13a86c8f72e1ed --- /dev/null +++ b/workloads/realworld/uvm/kmeans/kmeans_cuda_kernel.cu.old @@ -0,0 +1,185 @@ +#ifndef _KMEANS_CUDA_KERNEL_H_ +#define _KMEANS_CUDA_KERNEL_H_ + +#include +#include + +#include "kmeans.h" + +// FIXME: Make this a runtime selectable variable! +#define ASSUMED_NR_CLUSTERS 32 + +#define SDATA( index) CUT_BANK_CHECKER(sdata, index) + +// t_features has the layout dim0[points 0-m-1]dim1[ points 0-m-1]... +texture t_features; +// t_features_flipped has the layout point0[dim 0-n-1]point1[dim 0-n-1] +texture t_features_flipped; +texture t_clusters; + + +__constant__ float c_clusters[ASSUMED_NR_CLUSTERS*34]; /* constant memory for cluster centers */ + +/* ----------------- invert_mapping() --------------------- */ +/* inverts data array from row-major to column-major. + + [p0,dim0][p0,dim1][p0,dim2] ... + [p1,dim0][p1,dim1][p1,dim2] ... + [p2,dim0][p2,dim1][p2,dim2] ... + to + [dim0,p0][dim0,p1][dim0,p2] ... + [dim1,p0][dim1,p1][dim1,p2] ... + [dim2,p0][dim2,p1][dim2,p2] ... +*/ +__global__ void invert_mapping(float *input, /* original */ + float *output, /* inverted */ + int npoints, /* npoints */ + int nfeatures) /* nfeatures */ +{ + int point_id = threadIdx.x + blockDim.x*blockIdx.x; /* id of thread */ + int i; + + if(point_id < npoints){ + for(i=0;i 1; threadids_participating /= 2) { + if(threadIdx.x < threadids_participating) { + deltas[threadIdx.x] += deltas[threadIdx.x + threadids_participating]; + } + __syncthreads(); + } + if(threadIdx.x < 1) {deltas[threadIdx.x] += deltas[threadIdx.x + 1];} + __syncthreads(); + // propagate number of changes to global counter + if(threadIdx.x == 0) { + block_deltas[blockIdx.y * gridDim.x + blockIdx.x] = deltas[0]; + //printf("original id: %d, modified: %d\n", blockIdx.y*gridDim.x+blockIdx.x, blockIdx.x); + + } + +#endif + + +#ifdef GPU_NEW_CENTER_REDUCTION + int center_id = threadIdx.x / nfeatures; + int dim_id = threadIdx.x - nfeatures*center_id; + + __shared__ int new_center_ids[THREADS_PER_BLOCK]; + + new_center_ids[threadIdx.x] = index; + __syncthreads(); + + /*** + determine which dimension calculte the sum for + mapping of threads is + center0[dim0,dim1,dim2,...]center1[dim0,dim1,dim2,...]... + ***/ + + int new_base_index = (point_id - threadIdx.x)*nfeatures + dim_id; + float accumulator = 0.f; + + if(threadIdx.x < nfeatures * nclusters) { + // accumulate over all the elements of this threadblock + for(int i = 0; i< (THREADS_PER_BLOCK); i++) { + float val = tex1Dfetch(t_features_flipped,new_base_index+i*nfeatures); + if(new_center_ids[i] == center_id) + accumulator += val; + } + + // now store the sum for this threadblock + /*** + mapping to global array is + block0[center0[dim0,dim1,dim2,...]center1[dim0,dim1,dim2,...]...]block1[...]... + ***/ + block_clusters[(blockIdx.y*gridDim.x + blockIdx.x) * nclusters * nfeatures + threadIdx.x] = accumulator; + } +#endif + +} +#endif // #ifndef _KMEANS_CUDA_KERNEL_H_ diff --git a/workloads/realworld/uvm/kmeans/rmse.c b/workloads/realworld/uvm/kmeans/rmse.c new file mode 100755 index 0000000000000000000000000000000000000000..fe7786342bf77cab12958e630cb8d99834312f0d --- /dev/null +++ b/workloads/realworld/uvm/kmeans/rmse.c @@ -0,0 +1,95 @@ +/*************************************************************************/ +/** File: rmse.c **/ +/** Description: calculate root mean squared error of particular **/ +/** clustering. **/ +/** Author: Sang-Ha Lee **/ +/** University of Virginia. **/ +/** **/ +/** Note: euclid_dist_2() and find_nearest_point() adopted from **/ +/** Minebench code. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include + +#include "kmeans.h" + +extern double wtime(void); + +/*----< euclid_dist_2() >----------------------------------------------------*/ +/* multi-dimensional spatial Euclid distance square */ +__inline +float euclid_dist_2(float *pt1, + float *pt2, + int numdims) +{ + int i; + float ans=0.0; + + for (i=0; i-----------------------------------------------*/ +__inline +int find_nearest_point(float *pt, /* [nfeatures] */ + int nfeatures, + float **pts, /* [npts][nfeatures] */ + int npts) +{ + int index, i; + float max_dist=FLT_MAX; + + /* find the cluster center id with min distance to pt */ + for (i=0; i-------------------------------------*/ +float rms_err (float **feature, /* [npoints][nfeatures] */ + int nfeatures, + int npoints, + float **cluster_centres, /* [nclusters][nfeatures] */ + int nclusters) +{ + int i; + int nearest_cluster_index; /* cluster center id with min distance to pt */ + float sum_euclid = 0.0; /* sum of Euclidean distance squares */ + float ret; /* return value */ + + /* calculate and sum the sqaure of euclidean distance*/ + #pragma omp parallel for \ + shared(feature,cluster_centres) \ + firstprivate(npoints,nfeatures,nclusters) \ + private(i, nearest_cluster_index) \ + schedule (static) + for (i=0; i +#endif + +#include + +#ifndef _H_TYPES +#include +#endif + +#include + +#ifndef _H_ACCESS +#include /* for the "access" function */ +#endif + +/* + * POSIX requires that certain values be included in unistd.h. It also + * requires that when _POSIX_SOURCE is defined only those standard + * specific values are present. This header includes all the POSIX + * required entries. + */ + +#ifdef _POSIX_SOURCE +#ifdef _LARGE_FILES +#define lseek lseek64 +#endif + + +/* Symbolic constants for the "lseek" function: */ +#ifndef SEEK_SET +#define SEEK_SET 0 /* Set file pointer to "offset" */ +#define SEEK_CUR 1 /* Set file pointer to current plus "offset" */ +#define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif /* SEEK_SET */ + +#ifdef _NO_PROTO + +#ifndef _KERNEL +extern int access(); +extern unsigned int alarm(); +extern int chdir(); +extern int chown(); +extern int close(); +extern char *ctermid(); +extern int dup(); +extern int dup2(); +extern int execl(); +extern int execv(); +extern int execle(); +extern int execve(); +extern int execlp(); +extern int execvp(); +extern void _exit(); +extern pid_t fork(); +extern long fpathconf(); +extern char *getcwd(); +extern gid_t getegid(); +extern uid_t geteuid(); +extern gid_t getgid(); +extern int getgroups(); +extern char *getlogin(); +extern pid_t getpgrp(); +extern pid_t getpid(); +extern pid_t getppid(); +extern uid_t getuid(); +extern int isatty(); +extern int link(); +extern off_t lseek(); +extern long pathconf(); +extern int pause(); +extern int pipe(); +#if defined(_XOPEN_SOURCE) && ( _XOPEN_SOURCE >= 500 ) +extern int pthread_atfork(); +#endif +extern int read(); +extern int rmdir(); +extern int setgid(); +extern int setpgid(); +extern int setsid(); +extern int setuid(); +extern unsigned int sleep(); +extern long sysconf(); +extern pid_t tcgetpgrp(); +extern int tcsetpgrp(); +extern char *ttyname(); +extern int unlink(); +extern int write(); +#endif /* !_KERNEL */ + +#else /* POSIX required prototypes */ + +#ifndef _KERNEL +extern int access(const char *, int); +extern unsigned int alarm(unsigned int); +extern int chdir(const char *); +extern int chown(const char *, uid_t, gid_t); +extern int close(int); +extern char *ctermid(char *); +extern int dup(int); +extern int dup2(int, int); +extern int execl(const char *, const char *, ...); +extern int execv(const char *, char *const []); +extern int execle(const char *, const char *, ...); +extern int execve(const char *, char *const [], char *const []); +extern int execlp(const char *, const char *, ...); +extern int execvp(const char *, char *const []); +extern void _exit(int); +extern pid_t fork(void); +extern long fpathconf(int, int); +extern char *getcwd(char *, size_t); +extern gid_t getegid(void); +extern uid_t geteuid(void); +extern gid_t getgid(void); +extern int getgroups(int, gid_t []); +extern char *getlogin(void); +#ifndef _BSD +extern pid_t getpgrp(void); +#endif /* _BSD */ +extern pid_t getpid(void); +extern pid_t getppid(void); +extern uid_t getuid(void); +extern int isatty(int); +extern int link(const char *, const char *); +extern off_t lseek(int, off_t, int); +#ifdef _LARGE_FILE_API +extern off64_t lseek64(int, off64_t, int); +#endif +extern long pathconf(const char *, int); +extern int pause(void); +extern int pipe(int []); +#if defined(_XOPEN_SOURCE) && ( _XOPEN_SOURCE >= 500 ) +extern int pthread_atfork(void (*)(void), void (*)(void), void (*)(void)); +#endif +extern ssize_t read(int, void *, size_t); +extern int rmdir(const char *); +extern int setgid(gid_t); +extern int setpgid(pid_t, pid_t); +extern pid_t setsid(void); +extern int setuid(uid_t); +extern unsigned int sleep(unsigned int); +extern long sysconf(int); +extern pid_t tcgetpgrp(int); +extern int tcsetpgrp(int, pid_t); +extern char *ttyname(int); +extern int unlink(const char *); +extern ssize_t write(int, const void *, size_t); +#endif /* !_KERNEL */ +#endif /* !_NO_PROTO */ + +#define STDIN_FILENO 0 +#define STDOUT_FILENO 1 +#define STDERR_FILENO 2 + +#define _POSIX_JOB_CONTROL 1 +#define _POSIX_SAVED_IDS 1 + +#define _POSIX_VERSION 200112L +#define _POSIX2_VERSION 200112L +#define _POSIX2_C_VERSION 200112L + + +#ifdef _XOPEN_SOURCE + +#define _XOPEN_VERSION 600 +#define _XOPEN_XCU_VERSION 4 +#define _XOPEN_XPG3 1 +#define _XOPEN_XPG4 1 +#define _XOPEN_UNIX 1 + +#define _XOPEN_REALTIME (-1) +#define _XOPEN_REALTIME_THREADS (-1) + +#if (_XOPEN_SOURCE >= 600) +#define _XOPEN_STREAMS 1 +#endif + +#define _XBS5_ILP32_OFF32 1 +#define _XBS5_ILP32_OFFBIG 1 +#define _XBS5_LP64_OFF64 1 +#define _XBS5_LPBIG_OFFBIG 1 + +#define _POSIX2_C_BIND 200112L +#define _POSIX2_C_DEV 200112L +#define _POSIX2_CHAR_TERM 1 +#define _POSIX2_LOCALEDEF 200112L +#define _POSIX2_UPE 200112L +#define _POSIX2_FORT_DEV (-1) +#define _POSIX2_FORT_RUN (-1) +#define _POSIX2_SW_DEV (-1) + +#if (_POSIX_C_SOURCE >= 200112L) +#define _POSIX_REGEXP 1 +#define _POSIX_SHELL 1 +#define _POSIX2_PBS (-1) +#define _POSIX2_PBS_ACCOUNTING (-1) +#define _POSIX2_PBS_CHECKPOINT (-1) +#define _POSIX2_PBS_LOCATE (-1) +#define _POSIX2_PBS_MESSAGE (-1) +#define _POSIX2_PBS_TRACK (-1) +#define _V6_ILP32_OFF32 1 +#define _V6_ILP32_OFFBIG 1 +#define _V6_LP64_OFF64 1 +#define _V6_LPBIG_OFFBIG 1 + +#define _POSIX_ADVISORY_INFO 200112L +#define _POSIX_BARRIERS 200112L +#define _POSIX_CLOCK_SELECTION 200112L +#define _POSIX_CPUTIME 200112L +#define _POSIX_MONOTONIC_CLOCK 200112L + +#ifdef _POSIX_RAW_SOCKETS +#undef _POSIX_RAW_SOCKETS +#endif + +#define _POSIX_SPAWN 200112L +#define _POSIX_SPIN_LOCKS 200112L +#define _POSIX_SPORADIC_SERVER (-1) +#define _POSIX_THREAD_CPUTIME 200112L +#define _POSIX_THREAD_SPORADIC_SERVER (-1) +#define _POSIX_TIMEOUTS 200112L +#define _POSIX_TRACE (-1) +#define _POSIX_TRACE_EVENT_FILTER (-1) +#define _POSIX_TRACE_INHERIT (-1) +#define _POSIX_TRACE_LOG (-1) +#define _POSIX_TYPED_MEMORY_OBJECTS (-1) + +#endif /* _POSIX_C_SOURCE >= 200112L */ + +#define _XOPEN_CRYPT 1 +#define _XOPEN_SHM 1 +#define _XOPEN_ENH_I18N 1 +#define _XOPEN_LEGACY (-1) +#ifndef __64BIT__ +#define _UNIX_ABI (-1) +#define _UNIX_ABI_IA64 (-1) +#define _UNIX_ABI_BIG_ENDIAN (-1) +#define _UNIX_ABI_LITTLE_ENDIAN (-1) +#endif /* __64BIT__ */ + +extern char *optarg; +extern int optind, opterr, optopt; + +#ifdef _NO_PROTO + extern size_t confstr(); + extern char *crypt(); + extern void encrypt(); + extern int fsync(); + extern int getopt(); + extern int nice(); + extern void swab(); +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern char *getpass(); + extern int chroot(); +#endif +#else + extern size_t confstr(int, char*, size_t); + extern char *crypt(const char *, const char *); + extern void encrypt(char *, int); + extern int fsync(int); + extern int getopt(int, char* const*, const char*); + extern int nice(int); + extern void swab(const void *, void *, ssize_t); + extern int fdatasync(int); +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern char *getpass(const char *); + extern int chroot(const char *); +#endif +#endif + +#endif /* _XOPEN _SOURCE */ + +/* Threads options for 1003.1c and XPG UNIX98 */ +#define _POSIX_THREADS 200112L +#define _POSIX_THREAD_ATTR_STACKADDR 200112L +#define _POSIX_THREAD_ATTR_STACKSIZE 200112L +#define _POSIX_THREAD_PROCESS_SHARED 200112L +#define _POSIX_THREAD_SAFE_FUNCTIONS 200112L +#ifdef _ALL_SOURCE +#define _POSIX_REENTRANT_FUNCTIONS _POSIX_THREAD_SAFE_FUNCTIONS +#endif + +/* Realtime threads options for 1003.1c and XPG UNIX98 */ +#define _POSIX_THREAD_PRIORITY_SCHEDULING (-1) +#define _POSIX_THREAD_PRIO_INHERIT (-1) +#define _POSIX_THREAD_PRIO_PROTECT (-1) + +#undef _POSIX_THREAD_FORKALL + +/* Realtime options for 1003.1c and XPG UNIX98 */ +#define _POSIX_ASYNCHRONOUS_IO 200112L +#define _POSIX_FSYNC 200112L +#define _POSIX_MAPPED_FILES 200112L +#define _POSIX_MEMLOCK 200112L +#define _POSIX_MEMLOCK_RANGE 200112L +#define _POSIX_MEMORY_PROTECTION 200112L +#define _POSIX_MESSAGE_PASSING 200112L +#define _POSIX_PRIORITIZED_IO 200112L +#define _POSIX_PRIORITY_SCHEDULING 200112L +#define _POSIX_REALTIME_SIGNALS 200112L +#define _POSIX_SEMAPHORES 200112L +#define _POSIX_SHARED_MEMORY_OBJECTS 200112L +#define _POSIX_SYNCHRONIZED_IO 200112L +#define _POSIX_TIMERS 200112L + +#define _POSIX_ASYNC_IO (-1) +#undef _POSIX_SYNC_IO +#define _POSIX_PRIO_IO (-1) + +#define _POSIX_CHOWN_RESTRICTED 0 +#define _POSIX_VDISABLE 0xFF +#define _POSIX_NO_TRUNC 0 + + /* UNIX03 and POSIX01 */ + /* Always enabled */ +#define _POSIX_IPV6 200112L +#define _POSIX_RAW_SOCKETS 200112L + + +#ifndef NULL +#define NULL 0 +#endif + +#if (_POSIX_C_SOURCE >= 200112L) +#define _POSIX_READER_WRITER_LOCKS 200112L +#endif + +/* arguments for the confstr() function */ + +#define _CS_PATH 1 + + /* compile,link,lib,lint flags for 32bit, no_LARGE_FILES system */ +#define _CS_XBS5_ILP32_OFF32_CFLAGS 2 +#define _CS_XBS5_ILP32_OFF32_LDFLAGS 3 +#define _CS_XBS5_ILP32_OFF32_LIBS 4 +#define _CS_XBS5_ILP32_OFF32_LINTFLAGS 5 + + /* compile,link,lib,lint flags for 32bit, _LARGE_FILES system */ +#define _CS_XBS5_ILP32_OFFBIG_CFLAGS 6 +#define _CS_XBS5_ILP32_OFFBIG_LDFLAGS 7 +#define _CS_XBS5_ILP32_OFFBIG_LIBS 8 +#define _CS_XBS5_ILP32_OFFBIG_LINTFLAGS 9 + + /* compile,link,lib,lint flags for LP64 64bit system */ +#define _CS_XBS5_LP64_OFF64_CFLAGS 10 +#define _CS_XBS5_LP64_OFF64_LDFLAGS 11 +#define _CS_XBS5_LP64_OFF64_LIBS 12 +#define _CS_XBS5_LP64_OFF64_LINTFLAGS 13 + + /* compile,link,lib,lint flags for ILP64 64bit system */ + /* AIX does not currently support this */ +#define _CS_XBS5_LPBIG_OFFBIG_CFLAGS 14 +#define _CS_XBS5_LPBIG_OFFBIG_LDFLAGS 15 +#define _CS_XBS5_LPBIG_OFFBIG_LIBS 16 +#define _CS_XBS5_LPBIG_OFFBIG_LINTFLAGS 17 + +#define _CS_AIX_BOOTDEV 24 +#define _CS_AIX_MODEL_CODE 25 +#define _CS_AIX_ARCHITECTURE 26 +#define _CS_AIX_MODEL_CLASS 40 + +#if (_POSIX_C_SOURCE >= 200112L) +#define _CS_POSIX_V6_ILP32_OFF32_CFLAGS 27 +#define _CS_POSIX_V6_ILP32_OFF32_LDFLAGS 28 +#define _CS_POSIX_V6_ILP32_OFF32_LIBS 29 +#define _CS_POSIX_V6_ILP32_OFFBIG_CFLAGS 30 +#define _CS_POSIX_V6_ILP32_OFFBIG_LDFLAGS 31 +#define _CS_POSIX_V6_ILP32_OFFBIG_LIBS 32 +#define _CS_POSIX_V6_LP64_OFF64_CFLAGS 33 +#define _CS_POSIX_V6_LP64_OFF64_LDFLAGS 34 +#define _CS_POSIX_V6_LP64_OFF64_LIBS 35 +#define _CS_POSIX_V6_LPBIG_OFFBIG_CFLAGS 36 +#define _CS_POSIX_V6_LPBIG_OFFBIG_LDFLAGS 37 +#define _CS_POSIX_V6_LPBIG_OFFBIG_LIBS 38 +#define _CS_POSIX_V6_WIDTH_RESTRICTED_ENVS 39 +#endif + + /* Values for the above */ +#define _CSPATH "/usr/bin:/usr/vac/bin" + + /* ILP32_OFF32 */ +#define _CSPOSIX_V6_ILP32_OFF32_CFLAGS "-q32" +#define _CSXBS5_ILP32_OFF32_CFLAGS _CSPOSIX_V6_ILP32_OFF32_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_ILP32_OFF32_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_ILP32_OFF32_LDFLAGS "-b32" +#define _CSXBS5_ILP32_OFF32_LDFLAGS _CSPOSIX_V6_ILP32_OFF32_LDFLAGS +#endif + +#define _CSPOSIX_V6_ILP32_OFF32_LIBS "-lc -lpthread -lm" +#define _CSXBS5_ILP32_OFF32_LIBS _CSPOSIX_V6_ILP32_OFF32_LIBS + +#define _CSXBS5_ILP32_OFF32_LINTFLAGS "" + + /* ILP32_OFFOFFBIG */ +#define _CSPOSIX_V6_ILP32_OFFBIG_CFLAGS "-q32 -D_LARGE_FILES -qlonglong" +#define _CSXBS5_ILP32_OFFBIG_CFLAGS _CSPOSIX_V6_ILP32_OFFBIG_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_ILP32_OFFBIG_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_ILP32_OFFBIG_LDFLAGS "-b32" +#define _CSXBS5_ILP32_OFFBIG_LDFLAGS _CSPOSIX_V6_ILP32_OFFBIG_LDFLAGS +#endif + +#define _CSPOSIX_V6_ILP32_OFFBIG_LIBS "-lc -lpthread -lm" +#define _CSXBS5_ILP32_OFFBIG_LIBS _CSPOSIX_V6_ILP32_OFFBIG_LIBS + +#define _CSXBS5_ILP32_OFFBIG_LINTFLAGS "-D_LARGE_FILES -qlonglong" + + /* LP64_OFF64 */ +#define _CSPOSIX_V6_LP64_OFF64_CFLAGS "-q64" +#define _CSXBS5_LP64_OFF64_CFLAGS _CSPOSIX_V6_LP64_OFF64_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_LP64_OFF64_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_LP64_OFF64_LDFLAGS "-b64" +#define _CSXBS5_LP64_OFF64_LDFLAGS _CSPOSIX_V6_LP64_OFF64_LDFLAGS +#endif + +#define _CSPOSIX_V6_LP64_OFF64_LIBS "-lc -lpthread -lm" +#define _CSXBS5_LP64_OFF64_LIBS _CSPOSIX_V6_LP64_OFF64_LIBS + +#define _CSXBS5_LP64_OFF64_LINTFLAGS "-D__64BIT__" + + /* LPBIG_OFFBIG */ +#define _CSPOSIX_V6_LPBIG_OFFBIG_CFLAGS "-q64" +#define _CSXBS5_LPBIG_OFFBIG_CFLAGS _CSPOSIX_V6_LPBIG_OFFBIG_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_LPBIG_OFFBIG_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_LPBIG_OFFBIG_LDFLAGS "-b64" +#define _CSXBS5_LPBIG_OFFBIG_LDFLAGS _CSPOSIX_V6_LPBIG_OFFBIG_LDFLAGS +#endif + +#define _CSPOSIX_V6_LPBIG_OFFBIG_LIBS "-lc -lpthread -lm" +#define _CSXBS5_LPBIG_OFFBIG_LIBS _CSPOSIX_V6_LPBIG_OFFBIG_LIBS + +#define _CSXBS5_LPBIG_OFFBIG_LINTFLAGS "-D__64BIT__" + +#if (_POSIX_C_SOURCE >= 200112L) +#define _CSPOSIX_V6_WIDTH_RESTRICTED_ENVS \ + "POSIX_V6_ILP32_OFF32\n" \ + "POSIX_V6_ILP32_OFFBIG\n" \ + "POSIX_V6_LP64_OFF64\n" \ + "POSIX_V6_LPBIG_OFFBIG" +#endif + +/* arguments for the pathconf() function */ + +#define _PC_CHOWN_RESTRICTED 10 +#define _PC_LINK_MAX 11 +#define _PC_MAX_CANON 12 +#define _PC_MAX_INPUT 13 +#define _PC_NAME_MAX 14 +#define _PC_NO_TRUNC 15 +#define _PC_PATH_MAX 16 +#define _PC_PIPE_BUF 17 +#define _PC_VDISABLE 18 +#define _PC_ASYNC_IO 19 +#define _PC_SYNC_IO 20 +#define _PC_PRIO_IO 21 +#define _PC_FILESIZEBITS 22 /* # bits needed to hold offset */ +#define _PC_AIX_DISK_PARTITION 23 +#define _PC_AIX_DISK_SIZE 24 +#if (_POSIX_C_SOURCE >= 200112L) +#define _PC_SYMLINK_MAX 25 +#define _PC_ALLOC_SIZE_MIN 26 +#define _PC_REC_INCR_XFER_SIZE 27 +#define _PC_REC_MAX_XFER_SIZE 28 +#define _PC_REC_MIN_XFER_SIZE 29 +#define _PC_REC_XFER_ALIGN 30 +#define _PC_2_SYMLINKS 31 +#endif + +/* arguments for the sysconf() function, the defined numbers are used as + * array index in sysconf(). + * + * POSIX.1(1990), Table 4-2 + */ +#define _SC_ARG_MAX 0 +#define _SC_CHILD_MAX 1 +#define _SC_CLK_TCK 2 +#define _SC_NGROUPS_MAX 3 +#define _SC_OPEN_MAX 4 +#define _SC_STREAM_MAX 5 +#define _SC_TZNAME_MAX 6 +#define _SC_JOB_CONTROL 7 +#define _SC_SAVED_IDS 8 +#define _SC_VERSION 9 + +/* POSIX.1(1990), Table 2-3, required by command getconf */ + +#define _SC_POSIX_ARG_MAX 10 +#define _SC_POSIX_CHILD_MAX 11 +#define _SC_POSIX_LINK_MAX 12 +#define _SC_POSIX_MAX_CANON 13 +#define _SC_POSIX_MAX_INPUT 14 +#define _SC_POSIX_NAME_MAX 15 +#define _SC_POSIX_NGROUPS_MAX 16 +#define _SC_POSIX_OPEN_MAX 17 +#define _SC_POSIX_PATH_MAX 18 +#define _SC_POSIX_PIPE_BUF 19 +#define _SC_POSIX_SSIZE_MAX 20 +#define _SC_POSIX_STREAM_MAX 21 +#define _SC_POSIX_TZNAME_MAX 22 + +/* POSIX.2 (Draft 10), Table 41) */ + +#define _SC_BC_BASE_MAX 23 +#define _SC_BC_DIM_MAX 24 +#define _SC_BC_SCALE_MAX 25 +#define _SC_BC_STRING_MAX 26 +#define _SC_EQUIV_CLASS_MAX 27 +#define _SC_EXPR_NEST_MAX 28 +#define _SC_LINE_MAX 29 +#define _SC_RE_DUP_MAX 30 +#define _SC_2_VERSION 31 +#define _SC_2_C_DEV 32 +#define _SC_2_FORT_DEV 33 +#define _SC_2_FORT_RUN 34 +#define _SC_2_LOCALEDEF 35 +#define _SC_2_SW_DEV 36 + +/* POSIX.2 (Draft 10), Table 13) */ + +#define _SC_POSIX2_BC_BASE_MAX 37 +#define _SC_POSIX2_BC_DIM_MAX 38 +#define _SC_POSIX2_BC_SCALE_MAX 39 +#define _SC_POSIX2_BC_STRING_MAX 40 +#define _SC_POSIX2_EQUIV_CLASS_MAX 41 +#define _SC_POSIX2_EXPR_NEST_MAX 42 +#define _SC_POSIX2_LINE_MAX 43 +#define _SC_POSIX2_RE_DUP_MAX 44 +#define _SC_PASS_MAX 45 +#define _SC_XOPEN_VERSION 46 +#define _SC_ATEXIT_MAX 47 +#if _XOPEN_SOURCE_EXTENDED==1 +#define _SC_PAGE_SIZE 48 +#endif /* _XOPEN_SOURCE_EXTENDED */ +#define _SC_AES_OS_VERSION 49 +#define _SC_COLL_WEIGHTS_MAX 50 +#define _SC_2_C_BIND 51 +#define _SC_2_C_VERSION 52 +#define _SC_2_UPE 53 +#define _SC_2_CHAR_TERM 54 +#define _SC_XOPEN_SHM 55 +#define _SC_XOPEN_CRYPT 56 +#define _SC_XOPEN_ENH_I18N 57 +#if _XOPEN_SOURCE_EXTENDED==1 +#define _SC_PAGESIZE _SC_PAGE_SIZE +#define _SC_IOV_MAX 58 +#endif /* _XOPEN_SOURCE_EXTENDED */ +#define _SC_THREAD_SAFE_FUNCTIONS 59 +#define _SC_THREADS 60 +#define _SC_THREAD_ATTR_STACKADDR 61 +#define _SC_THREAD_ATTR_STACKSIZE 62 +#define _SC_THREAD_FORKALL 63 +#define _SC_THREAD_PRIORITY_SCHEDULING 64 +#define _SC_THREAD_PRIO_INHERIT 65 +#define _SC_THREAD_PRIO_PROTECT 66 +#define _SC_THREAD_PROCESS_SHARED 67 +#define _SC_THREAD_KEYS_MAX 68 +#define _SC_THREAD_DATAKEYS_MAX _SC_THREAD_KEYS_MAX +#define _SC_THREAD_STACK_MIN 69 +#define _SC_THREAD_THREADS_MAX 70 +#ifdef _ALL_SOURCE +#define _SC_NPROCESSORS_CONF 71 +#define _SC_NPROCESSORS_ONLN 72 +#endif /* _ALL_SOURCE */ +#define _SC_XOPEN_UNIX 73 + +#if (_XOPEN_SOURCE >= 500) + +/* POSIX 1003.1c and XPG UNIX98 */ +/* look to defines above for meanings */ +#define _SC_AIO_LISTIO_MAX 75 +#define _SC_AIO_MAX 76 +#define _SC_AIO_PRIO_DELTA_MAX 77 +#define _SC_ASYNCHRONOUS_IO 78 +#define _SC_DELAYTIMER_MAX 79 +#define _SC_FSYNC 80 +#define _SC_GETGR_R_SIZE_MAX 81 +#define _SC_GETPW_R_SIZE_MAX 82 +#define _SC_LOGIN_NAME_MAX 83 +#define _SC_MAPPED_FILES 84 +#define _SC_MEMLOCK 85 +#define _SC_MEMLOCK_RANGE 86 +#define _SC_MEMORY_PROTECTION 87 +#define _SC_MESSAGE_PASSING 88 +#define _SC_MQ_OPEN_MAX 89 +#define _SC_MQ_PRIO_MAX 90 +#define _SC_PRIORITIZED_IO 91 +#define _SC_PRIORITY_SCHEDULING 92 +#define _SC_REALTIME_SIGNALS 93 +#define _SC_RTSIG_MAX 94 +#define _SC_SEMAPHORES 95 +#define _SC_SEM_NSEMS_MAX 96 +#define _SC_SEM_VALUE_MAX 97 +#define _SC_SHARED_MEMORY_OBJECTS 98 +#define _SC_SIGQUEUE_MAX 99 +#define _SC_SYNCHRONIZED_IO 100 +#define _SC_THREAD_DESTRUCTOR_ITERATIONS 101 +#define _SC_TIMERS 102 +#define _SC_TIMER_MAX 103 +#define _SC_TTY_NAME_MAX 104 +#define _SC_XBS5_ILP32_OFF32 105 +#define _SC_XBS5_ILP32_OFFBIG 106 +#define _SC_XBS5_LP64_OFF64 107 +#define _SC_XBS5_LPBIG_OFFBIG 108 +#define _SC_XOPEN_XCU_VERSION 109 +#define _SC_XOPEN_REALTIME 110 +#define _SC_XOPEN_REALTIME_THREADS 111 +#define _SC_XOPEN_LEGACY 112 +#endif /* _XOPEN_SOURCE >= 500 */ + +#ifdef _ALL_SOURCE +#define _SC_REENTRANT_FUNCTIONS _SC_THREAD_SAFE_FUNCTIONS +#define _SC_PHYS_PAGES 113 +#define _SC_AVPHYS_PAGES 114 +#define _SC_LPAR_ENABLED 115 +#define _SC_LARGE_PAGESIZE 116 +#endif /* _ALL_SOURCE */ + +#define _SC_AIX_KERNEL_BITMODE 117 +#define _SC_AIX_REALMEM 118 +#define _SC_AIX_HARDWARE_BITMODE 119 +#define _SC_AIX_MP_CAPABLE 120 + +#define _SC_V6_ILP32_OFF32 121 +#define _SC_V6_ILP32_OFFBIG 122 +#define _SC_V6_LP64_OFF64 123 +#define _SC_V6_LPBIG_OFFBIG 124 + +#define _SC_XOPEN_STREAMS 125 + +#if (_POSIX_C_SOURCE >= 200112L) +#define _SC_HOST_NAME_MAX 126 +#define _SC_REGEXP 127 +#define _SC_SHELL 128 +#define _SC_SYMLOOP_MAX 129 +#define _SC_ADVISORY_INFO 130 +#define _SC_FILE_LOCKING 131 +#define _SC_2_PBS 132 +#define _SC_2_PBS_ACCOUNTING 133 +#define _SC_2_PBS_CHECKPOINT 134 +#define _SC_2_PBS_LOCATE 135 +#define _SC_2_PBS_MESSAGE 136 +#define _SC_2_PBS_TRACK 137 +#define _SC_BARRIERS 138 +#define _SC_CLOCK_SELECTION 139 +#define _SC_CPUTIME 140 +#define _SC_MONOTONIC_CLOCK 141 +#define _SC_READER_WRITER_LOCKS 142 +#define _SC_SPAWN 143 +#define _SC_SPIN_LOCKS 144 +#define _SC_SPORADIC_SERVER 145 +#define _SC_THREAD_CPUTIME 146 +#define _SC_THREAD_SPORADIC_SERVER 147 +#define _SC_TIMEOUTS 148 +#define _SC_TRACE 149 +#define _SC_TRACE_EVENT_FILTER 150 +#define _SC_TRACE_INHERIT 151 +#define _SC_TRACE_LOG 152 +#define _SC_TYPED_MEMORY_OBJECTS 153 +#define _SC_IPV6 154 +#define _SC_RAW_SOCKETS 155 +#define _SC_SS_REPL_MAX 156 +#define _SC_TRACE_EVENT_NAME_MAX 157 +#define _SC_TRACE_NAME_MAX 158 +#define _SC_TRACE_SYS_MAX 159 +#define _SC_TRACE_USER_EVENT_MAX 160 +#endif /* _POSIX_C_SOURCE >= 200112L */ + +#ifdef _ALL_SOURCE +#define _SC_AIX_UKEYS 161 +#endif /* _ALL_SOURCE */ + +#endif /* _POSIX_SOURCE */ + + +#if _XOPEN_SOURCE_EXTENDED==1 +#ifdef _LARGE_FILES +#define ftruncate ftruncate64 +#define truncate truncate64 +#endif + +#ifndef _H_LOCKF +#include /* lockf definitions for portability */ +#endif + +#ifdef _NO_PROTO +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern int brk(); + extern int getpagesize(); +#ifndef _MSGQSUPPORT + extern int __fd_getdtablesize(); + static int getdtablesize() + { + return __fd_getdtablesize(); + } +#else + extern int getdtablesize(); +#endif /* _MSGQSUPPORT */ + + extern void *sbrk(); +#endif /* _POSIX_C_SOURCE<200112L */ + extern int fchdir(); + extern int fchown(); + extern int ftruncate(); + extern long gethostid(); + extern int gethostname(); + extern pid_t getpgid(); + extern pid_t getsid(); + extern char *getwd(); + extern int lchown(); + extern int readlink(); + extern pid_t setpgrp(); + extern int setregid(); + extern int setreuid(); + extern int symlink(); + extern void sync(); + extern int truncate(); + extern useconds_t ualarm(); + extern int usleep(); + extern pid_t vfork(); +#else /* _NO_PROTO */ +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern int brk(void *); + extern int getpagesize(void); +#ifndef _MSGQSUPPORT + extern int __fd_getdtablesize(void); + static int getdtablesize() + { + return __fd_getdtablesize(); + } +#else + extern int getdtablesize(void); +#endif /* _MSGQSUPPORT */ +#ifdef _LINUX_SOURCE_COMPAT + extern void *sbrk(ptrdiff_t); +#elif (_XOPEN_SOURCE >= 500) || defined(__64BIT__) + extern void *sbrk(intptr_t); +#else + extern void *sbrk(int); +#endif +#endif /* _POSIX_C_SOURCE<200112L */ + extern int fchdir(int); + extern int fchown(int, uid_t, gid_t); + extern int ftruncate(int, off_t); +#ifdef _LARGE_FILE_API + extern int ftruncate64(int, off64_t); +#endif + extern int gethostname(char *, size_t); + extern long gethostid(void); + extern pid_t getpgid(pid_t); + extern pid_t getsid(pid_t); + extern char *getwd(char *); + extern int lchown(const char *, uid_t, gid_t); + +#if (defined(_SUSV3_READLINK) || \ + (!defined(_ALL_SOURCE) && (_POSIX_C_SOURCE >= 200112L))) + /* If SUSV3 readlink specifically requested or if strict SUSv3 + * environment requested */ +#ifdef __64BIT__ +static ssize_t readlink(const char *__restrict__ __path, + char *__restrict__ __buf, size_t __bufsize) +{ + extern ssize_t __readlink64(const char *__restrict__, char *__restrict__, size_t); + return __readlink64(__path, __buf, __bufsize); +} +#else + extern ssize_t readlink(const char *__restrict__, char *__restrict__, size_t); +#endif /* __64BIT__ */ +#else + extern int readlink(const char *, char *, size_t); +#endif /* _SUSV3_READLINK || !_ALL_SOURCE && _POSIX_C_SOURCE >= 200112L */ + +#ifndef _BSD + extern pid_t setpgrp(void); +#endif /* _BSD */ + extern int setregid(gid_t, gid_t); + extern int setreuid(uid_t, uid_t); + extern int symlink(const char *, const char *); + extern void sync(void); + extern int truncate(const char *, off_t); +#ifdef _LARGE_FILE_API + extern int truncate64(const char *, off64_t); +#endif + extern useconds_t ualarm(useconds_t, useconds_t); + extern int usleep(useconds_t); + extern pid_t vfork(void); +#if _XOPEN_SOURCE>=500 + extern int getlogin_r(char *, size_t); + extern int ttyname_r(int, char *, size_t); + +#ifdef _LARGE_FILES +#define pread pread64 +#define pwrite pwrite64 +#endif /* _LARGE_FILES */ + + extern ssize_t pread(int, void *, size_t, off_t); + extern ssize_t pwrite(int, const void *, size_t, off_t); +#ifdef _LARGE_FILE_API + extern ssize_t pread64(int, void *, size_t, off64_t); + extern ssize_t pwrite64(int, const void *, size_t, off64_t); +#endif /* _LARGE_FILE_API */ +#endif /* _XOPEN_SOURCE>=500 */ + +#endif /* _NO_PROTO */ + +#endif /* _XOPEN_SOURCE_EXTENDED */ + +#ifdef _ALL_SOURCE + +extern char **environ; + +#ifndef _KERNEL +#ifdef _NO_PROTO + extern pid_t f_fork(); +#else /* _NO_PROTO */ + extern pid_t f_fork(void); +#endif /* _NO_PROTO */ +#endif /* _KERNEL */ + +#ifdef _NO_PROTO + extern char * cuserid(); + extern int ioctl(); +#ifdef __64BIT__ + extern int ioctlx(); + extern int ioctl32(); + extern int ioctl32x(); +#endif /* __64BIT__ */ + extern int readx(); + extern int setgroups(); + extern int writex(); + extern int setegid(); + extern int seteuid(); + extern int setrgid(); + extern int setruid(); + extern offset_t llseek(); + extern char * getusershell(); + extern void setusershell(); + extern void endusershell(); + extern char * get_current_dir_name(); + extern int sysfs(); +#else + extern char * cuserid(char *); + extern int setegid(gid_t); + extern int seteuid(uid_t); + extern int setrgid(gid_t); + extern int setruid(uid_t); +#ifndef _BSD + extern int ioctl(int, int, ...); +#endif /* _BSD */ +#ifdef __64BIT__ + extern int ioctlx(int, int, void *, long); + extern int ioctl32(int, int, ...); + extern int ioctl32x(int, int, unsigned int, unsigned int); +#endif /* __64BIT__ */ + extern int setgroups(int, gid_t []); +#ifndef _KERNEL + extern int readx(int, char*, unsigned, long); + extern int writex(int, char*, unsigned, long); + +#ifdef _LARGE_FILES +#define fclear fclear64 +#define fsync_range fsync_range64 +#endif + extern off_t fclear(int, off_t); + extern int fsync_range(int, int, off_t, off_t); +#ifdef _LARGE_FILE_API + extern off64_t fclear64(int, off64_t); + extern int fsync_range64(int, int, off64_t, off64_t); +#endif + extern offset_t llseek(int, offset_t, int); + extern char * getusershell(void); + extern void setusershell(void); + extern void endusershell(void); + extern char * get_current_dir_name(void); + extern int sysfs(int, ...); + extern int finfo(const char *, int, void *, int32long64_t); + extern int ffinfo(int, int, void *, int32long64_t); + +#endif /* ndef _KERNEL */ + +#endif /* _NO_PROTO */ + +#define _AES_OS_VERSION 1 /* OSF, AES version */ + +#endif /* _ALL_SOURCE */ + +#ifdef __cplusplus +} +#endif + +#endif /* _H_UNISTD */ diff --git a/workloads/realworld/uvm/knn/Makefile b/workloads/realworld/uvm/knn/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..6ebd397ea3d2a2082eb070de41b7f1eb452dbf53 --- /dev/null +++ b/workloads/realworld/uvm/knn/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := knn +CUFILES := knn_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o knn + diff --git a/workloads/realworld/uvm/knn/knn_cuda.cu b/workloads/realworld/uvm/knn/knn_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..bcdaccecbca26f0b9a40fbab0ae19551fa8fd6ac --- /dev/null +++ b/workloads/realworld/uvm/knn/knn_cuda.cu @@ -0,0 +1,577 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +//-----------------------------------------------------------------------------------------------// +// KERNELS // +//-----------------------------------------------------------------------------------------------// +__global__ void extract_with_interpolation(int nthreads, float *data, + float *n_xy_coords, + float *extracted_data, + int n_max_coord, int channels, + int height, int width) { + + int x0, x1, y0, y1, nc; + float wx0, wx1, wy0, wy1; + int n, nd; + float x, y; + + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); + index += blockDim.x * gridDim.x) { + n = (index / n_max_coord); + nd = n * n_max_coord * channels; + x = n_xy_coords[index * 2]; + y = n_xy_coords[index * 2 + 1]; + + x0 = static_cast(floor(x)); + x1 = x0 + 1; + y0 = static_cast(floor(y)); + y1 = y0 + 1; + + x0 = x0 <= 0 ? 0 : (x0 >= (width - 1) ? (width - 1) : x0); + y0 = y0 <= 0 ? 0 : (y0 >= (height - 1) ? (height - 1) : y0); + x1 = x1 <= 0 ? 0 : (x1 >= (width - 1) ? (width - 1) : x1); + y1 = y1 <= 0 ? 0 : (y1 >= (height - 1) ? (height - 1) : y1); + + wx0 = static_cast(x1) - x; + wx1 = x - x0; + wy0 = static_cast(y1) - y; + wy1 = y - y0; + + if (x0 == x1) { + wx0 = 1; + wx1 = 0; + } + if (y0 == y1) { + wy0 = 1; + wy1 = 0; + } + for (int c = 0; c < channels; c++) { + nc = (n * channels + c) * height; + // extracted_data[index * channels + c] = wy0 * wx0 * data[(nc + y0) * + // width + x0] + // extracted_data[nd + index % n_max_coord + n_max_coord * c] = index; + extracted_data[nd + index % n_max_coord + n_max_coord * c] = + wy0 * wx0 * data[(nc + y0) * width + x0] + + wy1 * wx0 * data[(nc + y1) * width + x0] + + wy0 * wx1 * data[(nc + y0) * width + x1] + + wy1 * wx1 * data[(nc + y1) * width + x1]; + } + } +} + +/** + * Computes the distance between two matrix A (reference points) and + * B (query points) containing respectively wA and wB points. + * + * @param A pointer on the matrix A + * @param wA width of the matrix A = number of points in A + * @param B pointer on the matrix B + * @param wB width of the matrix B = number of points in B + * @param dim dimension of points = height of matrices A and B + * @param AB pointer on the matrix containing the wA*wB distances computed + */ +__global__ void cuComputeDistanceGlobal(float *A, int wA, float *B, int wB, + int dim, float *AB) { + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // Declaration of the shared memory arrays As and Bs used to store the + // sub-matrix of A and B + __shared__ float shared_A[BLOCK_DIM][BLOCK_DIM]; + __shared__ float shared_B[BLOCK_DIM][BLOCK_DIM]; + + // Sub-matrix of A (begin, step, end) and Sub-matrix of B (begin, step) + __shared__ int begin_A; + __shared__ int begin_B; + __shared__ int step_A; + __shared__ int step_B; + __shared__ int end_A; + + // Thread index + int tx = threadIdx.x; + int ty = threadIdx.y; + + // Other variables + float tmp; + float ssd = 0; + + // Loop parameters + begin_A = BLOCK_DIM * blockIdx.y; + begin_B = BLOCK_DIM * blockIdx.x; + step_A = BLOCK_DIM * wA; + step_B = BLOCK_DIM * wB; + end_A = begin_A + (dim - 1) * wA; + + // if (blockIdx.x == 0 && blockIdx.y == 0 && tx == 0 && ty == 0) + // printf("begin_A is %d, end_A is %d, step_A is %d, begin_B is %d, step_B is %d\n", begin_A, end_A, step_A, begin_B, step_B); + + // Conditions + int cond0 = (begin_A + tx < wA); // used to write in shared memory + int cond1 = (begin_B + tx < wB); // used to write in shared memory & to + // computations and to write in output matrix + int cond2 = + (begin_A + ty < wA); // used to computations and to write in output matrix + // Loop over all the sub-matrices of A and B required to compute the block + // sub-matrix + for (int a = begin_A, b = begin_B; a <= end_A; a += step_A, b += step_B) { + // if (blockIdx.x == 0 && blockIdx.y == 0 && tx == 0 && ty == 0) + // printf("a is %d, end_A is %d, step_A is %d, b is %d, step_B is %d\n", begin_A, end_A, step_A, begin_B, step_B); + // Load the matrices from device memory to shared memory; each thread loads + // one element of each matrix + if (a / wA + ty < dim) { + shared_A[ty][tx] = (cond0) ? A[a + wA * ty + tx] : 0; + shared_B[ty][tx] = (cond1) ? B[b + wB * ty + tx] : 0; + } else { + shared_A[ty][tx] = 0; + shared_B[ty][tx] = 0; + } + + // Synchronize to make sure the matrices are loaded + block.sync(); + // Compute the difference between the two matrixes; each thread computes one + // element of the block sub-matrix + if (cond2 && cond1) { + for (int k = 0; k < BLOCK_DIM; ++k) { + tmp = shared_A[k][ty] - shared_B[k][tx]; + ssd += tmp * tmp; + } + } + + // Synchronize to make sure that the preceding computation is done before + // loading two new sub-matrices of A and B in the next iteration + block.sync(); + } + + // Write the block sub-matrix to device memory; each thread writes one element + if (cond2 && cond1) { + AB[(begin_A + ty) * wB + begin_B + tx] = ssd; + } +} + +/** + * Gathers k-th smallest distances for each column of the distance matrix in + * the top. + * + * @param dist distance matrix + * @param ind index matrix + * @param width width of the distance matrix and of the index matrix + * @param height height of the distance matrix and of the index matrix + * @param k number of neighbors to consider + */ +__global__ void cuInsertionSort(float *dist, int *ind, int width, int height, + int k) { + // printf("test2\n"); + // Variables + int l, i, j; + float *p_dist; + int *p_ind; + float curr_dist, max_dist; + int curr_row, max_row; + unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x; + + if (xIndex < width) { + // Pointer shift, initialization, and max value + p_dist = dist + xIndex; + p_ind = ind + xIndex; + max_dist = p_dist[0]; + p_ind[0] = 0; + + // Part 1 : sort kth firt elementZ + for (l = 1; l < k; l++) { + curr_row = l * width; + curr_dist = p_dist[curr_row]; + if (curr_dist < max_dist) { + i = l - 1; + for (int a = 0; a < l - 1; a++) { + if (p_dist[a * width] > curr_dist) { + i = a; + break; + } + } + for (j = l; j > i; j--) { + p_dist[j * width] = p_dist[(j - 1) * width]; + p_ind[j * width] = p_ind[(j - 1) * width]; + } + p_dist[i * width] = curr_dist; + p_ind[i * width] = l; + } else { + p_ind[l * width] = l; + } + max_dist = p_dist[curr_row]; + } + + // Part 2 : insert element in the k-th first lines + max_row = (k - 1) * width; + for (l = k; l < height; l++) { + curr_dist = p_dist[l * width]; + if (curr_dist < max_dist) { + i = k - 1; + for (int a = 0; a < k - 1; a++) { + if (p_dist[a * width] > curr_dist) { + i = a; + break; + } + } + for (j = k - 1; j > i; j--) { + p_dist[j * width] = p_dist[(j - 1) * width]; + p_ind[j * width] = p_ind[(j - 1) * width]; + } + p_dist[i * width] = curr_dist; + p_ind[i * width] = l; + max_dist = p_dist[max_row]; + } + } + } +} + +/** + * Computes the square root of the first line (width-th first element) + * of the distance matrix. + * + * @param dist distance matrix + * @param width width of the distance matrix + * @param k number of neighbors to consider + */ +__global__ void cuParallelSqrt(float *dist, int width, int k) { + unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int yIndex = blockIdx.y * blockDim.y + threadIdx.y; + // printf("test3\n"); + if (xIndex < width && yIndex < k) + dist[yIndex * width + xIndex] = sqrt(dist[yIndex * width + xIndex]); +} + +//-----------------------------------------------------------------------------------------------// +// K-th NEAREST NEIGHBORS // +//-----------------------------------------------------------------------------------------------// + +/** + * Prints the error message return during the memory allocation. + * + * @param error error value return by the memory allocation function + * @param memorySize size of memory tried to be allocated + */ +void printErrorMessage(cudaError_t error, int memorySize) { + printf("==================================================\n"); + printf("MEMORY ALLOCATION ERROR : %s\n", cudaGetErrorString(error)); + printf("Whished allocated memory : %d\n", memorySize); + printf("==================================================\n"); +} + +/** + * K nearest neighbor algorithm + * - Initialize CUDA + * - Allocate device memory + * - Copy point sets (reference and query points) from host to device memory + * - Compute the distances + indexes to the k nearest neighbors for each query + * point + * - Copy distances from device to host memory + * + * @param ref_host reference points ; pointer to linear matrix + * @param ref_width number of reference points ; width of the matrix + * @param query_host query points ; pointer to linear matrix + * @param query_width number of query points ; width of the matrix + * @param height dimension of points ; height of the matrices + * @param k number of neighbor to consider + * @param dist_host distances to k nearest neighbors ; pointer to linear + * matrix + * @param dist_host indexes of the k nearest neighbors ; pointer to linear + * matrix + * + */ +void knn_cuda(float *ref_host, int ref_width, float *query_host, + int query_width, int height, int k, float *dist_host, + int *ind_host) { + // Grids ans threads + dim3 g_16x16(query_width / 16, ref_width / 16, 1); + dim3 t_16x16(16, 16, 1); + if (query_width % 16 != 0) + g_16x16.x += 1; + if (ref_width % 16 != 0) + g_16x16.y += 1; + // + dim3 g_256x1(query_width / 256, 1, 1); + dim3 t_256x1(256, 1, 1); + if (query_width % 256 != 0) + g_256x1.x += 1; + + dim3 g_k_16x16(query_width / 16, k / 16, 1); + dim3 t_k_16x16(16, 16, 1); + if (query_width % 16 != 0) + g_k_16x16.x += 1; + if (k % 16 != 0) + g_k_16x16.y += 1; + + // printf("ref_width is %d, query_width is %d, height is %d\n", ref_width, query_width, height); + + // Kernel 1: Compute all the distances + cuComputeDistanceGlobal<<>>(ref_host, ref_width, query_host, + query_width, height, dist_host); + // Kernel 2: Sort each column + cuInsertionSort<<>>(dist_host, ind_host, query_width, + ref_width, k); + // Kernel 3: Compute square root of k first elements + cuParallelSqrt<<>>(dist_host, query_width, k); + cudaDeviceSynchronize(); +} + +float compute_distance(const float *ref, int ref_nb, const float *query, + int query_nb, int dim, int ref_index, int query_index) { + float sum = 0.f; + for (int d = 0; d < dim; ++d) { + const float diff = + ref[d * ref_nb + ref_index] - query[d * query_nb + query_index]; + sum += diff * diff; + } + return sqrtf(sum); +} + +void modified_insertion_sort(float *dist, int *index, int length, int k) { + + // Initialise the first index + index[0] = 0; + + // Go through all points + for (int i = 1; i < length; ++i) { + + // Store current distance and associated index + float curr_dist = dist[i]; + int curr_index = i; + + // Skip the current value if its index is >= k and if it's higher the k-th + // slready sorted mallest value + if (i >= k && curr_dist >= dist[k - 1]) { + continue; + } + + // Shift values (and indexes) higher that the current distance to the right + int j = min(i, k - 1); + while (j > 0 && dist[j - 1] > curr_dist) { + dist[j] = dist[j - 1]; + index[j] = index[j - 1]; + --j; + } + + // Write the current distance and index at their position + dist[j] = curr_dist; + index[j] = curr_index; + } +} + +bool knn_c(const float *ref, int ref_nb, const float *query, int query_nb, + int dim, int k, float *knn_dist, int *knn_index) { + // Allocate local array to store all the distances / indexes for a given query + // point + float *dist = (float *)malloc(ref_nb * sizeof(float)); + int *index = (int *)malloc(ref_nb * sizeof(int)); + + // Allocation checks + if (!dist || !index) { + printf("Memory allocation error\n"); + free(dist); + free(index); + return false; + } + + // Process one query point at the time + for (int i = 0; i < query_nb; ++i) { + + // Compute all distances / indexes + for (int j = 0; j < ref_nb; ++j) { + dist[j] = compute_distance(ref, ref_nb, query, query_nb, dim, j, i); + index[j] = j; + } + + // Sort distances / indexes + modified_insertion_sort(dist, index, ref_nb, k); + + // Copy k smallest distances and their associated index + for (int j = 0; j < k; ++j) { + knn_dist[j * query_nb + i] = dist[j]; + knn_index[j * query_nb + i] = index[j]; + } + } + + // Memory clean-up + free(dist); + free(index); + return true; +} + +/** + * Example of use of kNN search CUDA. + */ +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + // Variables and parameters + float *ref; // Pointer to reference point array + float *query; // Pointer to query point array + float *dist, *dist_c; // Pointer to distance array + int *ind, *ind_c; // Pointer to index array + int ref_nb = 4096; // Reference point number, max=65535 + int query_nb = 4096; // Query point number, max=65535 + int dim = 128; // Dimension of points + int k = 20; // Nearest neighbors to consider + int iterations = 100; + + if (argc >= 4) { + ref_nb = atoi(argv[1]); + query_nb = atoi(argv[2]); + dim = atoi(argv[3]); + } + + int c_iterations = 10; + int i; + const float precision = 0.001f; // distance error max + int nb_correct_precisions = 0; + int nb_correct_indexes = 0; + float *knn_dist = (float *)malloc(query_nb * k * sizeof(float)); + int *knn_index = (int *)malloc(query_nb * k * sizeof(int)); + + // Memory allocation + ref = (float *)malloc(ref_nb * dim * sizeof(float)); + query = (float *)malloc(query_nb * dim * sizeof(float)); + dist = (float *)malloc(query_nb * ref_nb * sizeof(float)); + ind = (int *)malloc(query_nb * k * sizeof(int)); + + dist_c = (float *)malloc(query_nb * k * sizeof(float)); + ind_c = (int *)malloc(query_nb * k * sizeof(float)); + + // Init + srand(time(NULL)); + for (i = 0; i < ref_nb * dim; i++) + ref[i] = (float)rand() / (float)RAND_MAX; + for (i = 0; i < query_nb * dim; i++) + query[i] = (float)rand() / (float)RAND_MAX; + + // printf("Ground truth computation in progress...\n\n"); + // if (!knn_c(ref, ref_nb, query, query_nb, dim, k, knn_dist, knn_index)) { + // free(knn_dist); + // free(knn_index); + // return EXIT_FAILURE; + // } + + // Variables for duration evaluation + float elapsed_time; + + // Display informations + printf("Number of reference points : %6d\n", ref_nb); + printf("Number of query points : %6d\n", query_nb); + printf("Dimension of points : %4d\n", dim); + printf("Number of neighbors to consider : %4d\n", k); + printf("Processing kNN search :\n"); + + float precision_accuracy = 0.0f; + float index_accuracy = 0.0f; + /* + printf("On CPU: \n"); + struct timeval tic; + gettimeofday(&tic, NULL); + for (i = 0; i < c_iterations; i++) { + knn_c(ref, ref_nb, query, query_nb, dim, k, dist_c, ind_c); + } + + for (int i = 0; i < query_nb * k; ++i) { + if (fabs(dist_c[i] - knn_dist[i]) <= precision) { + nb_correct_precisions++; + } + if (ind_c[i] == knn_index[i]) { + nb_correct_indexes++; + } + } + + struct timeval toc; + gettimeofday(&toc, NULL); + elapsed_time = toc.tv_sec - tic.tv_sec; + elapsed_time += (toc.tv_usec - tic.tv_usec) / 1000000.; + precision_accuracy = nb_correct_precisions / ((float)query_nb * k); + index_accuracy = nb_correct_indexes / ((float)query_nb * k); + printf("%f, %f\n", precision_accuracy, index_accuracy); + printf(" done in %f s for %d iterations (%f s by iteration)\n", elapsed_time, + c_iterations, elapsed_time / (c_iterations)); + */ + printf("on GPU: \n"); + + // Call kNN search CUDA + GPU_argv_init(); + + initTrace(); + startCPU(); + + float *ref_device; + float *query_device; + float *dist_device; + int *ind_device; + + cudaMallocManaged(&ref_device, ref_nb * dim * sizeof(float)); + cudaMallocManaged(&query_device, query_nb * dim * sizeof(float)); + cudaMallocManaged(&dist_device, query_nb * ref_nb * sizeof(float)); + cudaMallocManaged(&ind_device, query_nb * k * sizeof(int)); + + memcpy(ref_device, ref, ref_nb * dim * sizeof(float)); + memcpy(query_device, query, query_nb * dim * sizeof(float)); + + for (i = 0; i < iterations; i++) { + // knn_cuda(ref, ref_nb, query, query_nb, dim, k, dist, ind); + knn_cuda(ref_device, ref_nb, query_device, query_nb, dim, k, dist_device, ind_device); + } + + memcpy(dist, dist_device, query_nb * ref_nb * sizeof(float)); + memcpy(ind, ind_device, query_nb * k * sizeof(int)); + + cudaFree(ind_device); + cudaFree(dist_device); + cudaFree(query_device); + cudaFree(ref_device); + + endCPU(); + finiTrace(); + + nb_correct_precisions = 0; + nb_correct_indexes = 0; + for (int i = 0; i < query_nb * k; ++i) { + if (fabs(dist[i] - knn_dist[i]) <= precision) { + nb_correct_precisions++; + } + if (ind[i] == knn_index[i]) { + nb_correct_indexes++; + } + } + + precision_accuracy = nb_correct_precisions / ((float)query_nb * k); + index_accuracy = nb_correct_indexes / ((float)query_nb * k); + printf("%f, %f\n", precision_accuracy, index_accuracy); + + + // Destroy cuda event object and free memory + free(ind); + free(dist); + free(query); + free(ref); + free(dist_c); + free(ind_c); +} \ No newline at end of file diff --git a/workloads/realworld/uvm/knn/run.sh b/workloads/realworld/uvm/knn/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..541db1387ce3ebe87b1338f079609b8b4a2736c6 --- /dev/null +++ b/workloads/realworld/uvm/knn/run.sh @@ -0,0 +1 @@ +./knn 4096 4096 128 \ No newline at end of file diff --git a/workloads/realworld/uvm/knn/run_super.sh b/workloads/realworld/uvm/knn/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..86ad9321b470072e5e84e706e1619ee200cf2b31 --- /dev/null +++ b/workloads/realworld/uvm/knn/run_super.sh @@ -0,0 +1 @@ +./knn 32768 32768 128 \ No newline at end of file diff --git a/workloads/realworld/uvm/lavaMD/README b/workloads/realworld/uvm/lavaMD/README new file mode 100755 index 0000000000000000000000000000000000000000..27b526ff669e9632b11193634307bfe778a2dfff --- /dev/null +++ b/workloads/realworld/uvm/lavaMD/README @@ -0,0 +1,50 @@ +//======================================================================================================================================================150 +// DESCRIPTION +//======================================================================================================================================================150 + +This is the CUDA version of the code. + +The code calculates particle potential and relocation due to mutual forces between particles within a large 3D space. This space is +divided into cubes, or large boxes, that are allocated to individual cluster nodes. The large box at each node is further divided into +cubes, called boxes. 26 neighbor boxes surround each box (the home box). Home boxes at the boundaries of the particle space have fewer neighbors. +Particles only interact with those other particles that are within a cutoff radius since ones at larger distances exert negligible forces. Thus the +box size s chosen so that cutoff radius does not span beyond any neighbor box for any particle in a home box, thus limiting the reference space to +a finite number of boxes. + +This code [1] was derived from the ddcMD application [2] by rewriting the front end and structuring it for parallelization. This code represents MPI +task that runs on a single cluster node. While the details of the code are somewhat different than the original, the code retains the structure of the +MPI task in the original code. Since the rest of MPI code is not included here, the application first emulates MPI partitioning of the particle space +into boxes. Then, for every particle in the home box, the nested loop processes interactions first with other particles in the home box and then with +particles in all neighbor boxes. The processing of each particle consists of a single stage of calculation that is enclosed in the innermost loop. The +nested loops in the application were parallelized in such a way that at any point of time GPU warp/wavefront accesses adjacent memory locations. The +speedup depends on the number of boxes, particles (fixed) and the actualcal culation for each particle (fixed). The application is memory bound, and +GPU speedup seems to saturate at about 16x when compared to single-core CPU. + +More information about the parallel version of this code can be found in: +[1] L. G. Szafaryn, T. Gamblin, B. deSupinski and K. Skadron. "Experiences with Achieving Portability across Heterogeneous Architectures." Submitted to +WOLFHPC workshop at 25th International Conference on Supercomputing (ICS). Tucson, AZ. 2010. +More about the original ddcMD application can be found in: +[2] F. H. Streitz, J. N. Glosli, M. V. Patel, B. Chan, R. K. Yates, B. R. de Supinski, J. Sexton, J and A. Gunnels. "100+ TFlop Solidification Simulations +on BlueGene/L." In Proceedings of the 2005 Supercomputing Conference (SC 05). Seattle, WA. 2005. + +//======================================================================================================================================================150 +// USE +//======================================================================================================================================================150 + +The code takes the followint parameters: +-boxes1d (number of boxes in one dimension, the total number of boxes will be that^3) + +The code can be run as follows: +./lavaMD -boxes1d 10 + +******Adjustable work group size***** +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=128" + +######OUTPUT FOR VALIDATION######## +USAGE: +make clean +make OUTPUT=Y \ No newline at end of file diff --git a/workloads/realworld/uvm/lavaMD/kernel/kernel_gpu_cuda.cu b/workloads/realworld/uvm/lavaMD/kernel/kernel_gpu_cuda.cu new file mode 100755 index 0000000000000000000000000000000000000000..15164488f40349d583134da5d2a03a1ffc854c52 --- /dev/null +++ b/workloads/realworld/uvm/lavaMD/kernel/kernel_gpu_cuda.cu @@ -0,0 +1,199 @@ +//----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------200 +// plasmaKernel_gpu_2 +//----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------200 + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +__global__ void kernel_gpu_cuda(par_str d_par_gpu, + dim_str d_dim_gpu, + box_str *d_box_gpu, + FOUR_VECTOR *d_rv_gpu, + fp *d_qv_gpu, + FOUR_VECTOR *d_fv_gpu, + int boxes_per_block) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + // THREAD PARAMETERS + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + int bx = blockIdx.x; // get current horizontal block index (0-n) + int tx = threadIdx.x; // get current horizontal thread index (0-n) + int wtx = tx; + + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // Extract input parameters + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + // parameters + fp a2 = 2.0 * d_par_gpu.alpha * d_par_gpu.alpha; + + // home box + int first_i; + FOUR_VECTOR *rA; + FOUR_VECTOR *fA; + __shared__ FOUR_VECTOR rA_shared[100]; + + // nei box + int pointer; + int k = 0; + int first_j; + FOUR_VECTOR *rB; + fp *qB; + int j = 0; + __shared__ FOUR_VECTOR rB_shared[100]; + __shared__ double qB_shared[100]; + + // common + fp r2; + fp u2; + fp vij; + fp fs; + fp fxij; + fp fyij; + fp fzij; + THREE_VECTOR d; + + + int box = bx * boxes_per_block; + int end_box = box + boxes_per_block; + + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + // DO FOR THE NUMBER OF BOXES + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + for (; box < end_box; box++) + { + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // Home box + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // home box - box parameters + first_i = d_box_gpu[box].offset; + + // home box - distance, force, charge and type parameters + rA = &d_rv_gpu[first_i]; + fA = &d_fv_gpu[first_i]; + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Copy to shared memory + //----------------------------------------------------------------------------------------------------------------------------------140 + + // home box - shared memory + while (wtx < NUMBER_PAR_PER_BOX) + { + rA_shared[wtx] = rA[wtx]; + wtx = wtx + NUMBER_THREADS; + } + wtx = tx; + + // synchronize threads - not needed, but just to be safe + block.sync(); + + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // nei box loop + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + // if (wtx == 0) + // printf("d_box_gpu[%d].nn is %d\n", bx, d_box_gpu[bx].nn); + + int tile = 0; + int end_tile = 1 + d_box_gpu[box].nn; + + // loop over neiing boxes of home box + for (; tile < end_tile; tile++) + { + + //----------------------------------------50 + // nei box - get pointer to the right box + //----------------------------------------50 + + if (tile == 0) + { + pointer = box; // set first box to be processed to home box + } + else + { + pointer = d_box_gpu[box].nei[tile - 1].number; // remaining boxes are nei boxes + } + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // nei box - box parameters + first_j = d_box_gpu[pointer].offset; + + // nei box - distance, (force), charge and (type) parameters + rB = &d_rv_gpu[first_j]; + qB = &d_qv_gpu[first_j]; + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // nei box - shared memory + while (wtx < NUMBER_PAR_PER_BOX) + { + rB_shared[wtx] = rB[wtx]; + qB_shared[wtx] = qB[wtx]; + wtx = wtx + NUMBER_THREADS; + } + wtx = tx; + + // synchronize threads because in next section each thread accesses data brought in by different threads here + block.sync(); + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Calculation + //----------------------------------------------------------------------------------------------------------------------------------140 + + // loop for the number of particles in the home box + // for (int i=0; i +#include "../../../../common/cupti_add.h" +#include "../../../../common/cpu_timestamps.h" + +void +kernel_gpu_cuda_wrapper(par_str par_cpu, + dim_str dim_cpu, + box_str* box_cpu, + FOUR_VECTOR* rv_cpu, + fp* qv_cpu, + FOUR_VECTOR* fv_cpu, + int nblocks) +{ + + //======================================================================================================================================================150 + // CPU VARIABLES + //======================================================================================================================================================150 + + // timer + long long time0; + long long time1; + long long time2; + long long time3; + long long time4; + long long time5; + long long time6; + + time0 = get_time(); + + //======================================================================================================================================================150 + // GPU SETUP + //======================================================================================================================================================150 + + //====================================================================================================100 + // INITIAL DRIVER OVERHEAD + //====================================================================================================100 + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaThreadSynchronize(); + + //====================================================================================================100 + // VARIABLES + //====================================================================================================100 + + box_str* d_box_gpu; + FOUR_VECTOR* d_rv_gpu; + fp* d_qv_gpu; + FOUR_VECTOR* d_fv_gpu; + + dim3 threads; + dim3 blocks; + + //====================================================================================================100 + // EXECUTION PARAMETERS + //====================================================================================================100 + + // blocks.x = dim_cpu.number_boxes; + blocks.x = nblocks * nblocks * nblocks; + blocks.y = 1; + threads.x = NUMBER_THREADS; // define the number of threads in the block + threads.y = 1; + + int boxes_per_block = 1; + if (dim_cpu.number_boxes >= blocks.x) + { + boxes_per_block = (dim_cpu.number_boxes + blocks.x - 1) / blocks.x; + } + + time1 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY (MALLOC) + //======================================================================================================================================================150 + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY IN + //====================================================================================================100 + + //==================================================50 + // boxes + //==================================================50 + + cudaMallocManaged( (void **)&d_box_gpu, + dim_cpu.box_mem); + + //==================================================50 + // rv + //==================================================50 + + cudaMallocManaged((void **)&d_rv_gpu, + dim_cpu.space_mem); + + //==================================================50 + // qv + //==================================================50 + + cudaMallocManaged((void **)&d_qv_gpu, + dim_cpu.space_mem2); + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY + //====================================================================================================100 + + //==================================================50 + // fv + //==================================================50 + + cudaMallocManaged((void **)&d_fv_gpu, + dim_cpu.space_mem); + + time2 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY COPY + //======================================================================================================================================================150 + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY IN + //====================================================================================================100 + + //==================================================50 + // boxes + //==================================================50 + + memcpy(d_box_gpu, + box_cpu, + dim_cpu.box_mem); + + //==================================================50 + // rv + //==================================================50 + + memcpy(d_rv_gpu, + rv_cpu, + dim_cpu.space_mem); + + //==================================================50 + // qv + //==================================================50 + + memcpy(d_qv_gpu, + qv_cpu, + dim_cpu.space_mem2); + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY + //====================================================================================================100 + + //==================================================50 + // fv + //==================================================50 + + memcpy(d_fv_gpu, + fv_cpu, + dim_cpu.space_mem); + + time3 = get_time(); + + //======================================================================================================================================================150 + // KERNEL + //======================================================================================================================================================150 + // launch kernel - all boxes + kernel_gpu_cuda<<>>(par_cpu, + dim_cpu, + d_box_gpu, + d_rv_gpu, + d_qv_gpu, + d_fv_gpu, + boxes_per_block); + + checkCUDAError("Start"); + cudaDeviceSynchronize(); + + time4 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY COPY (CONTD.) + //======================================================================================================================================================150 + + memcpy(fv_cpu, + d_fv_gpu, + dim_cpu.space_mem); + + time5 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY DEALLOCATION + //======================================================================================================================================================150 + + cudaFree(d_rv_gpu); + cudaFree(d_qv_gpu); + cudaFree(d_fv_gpu); + cudaFree(d_box_gpu); + + endCPU(); + finiTrace(); + + time6 = get_time(); + + //======================================================================================================================================================150 + // DISPLAY TIMING + //======================================================================================================================================================150 + + printf("Time spent in different stages of GPU_CUDA KERNEL:\n"); + + printf("%15.12f s, %15.12f % : GPU: SET DEVICE / DRIVER INIT\n", (float)(time1 - time0) / 1000000, (float)(time1 - time0) / (float)(time6 - time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: ALO\n", (float)(time2 - time1) / 1000000, (float)(time2 - time1) / (float)(time6 - time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: COPY IN\n", (float)(time3 - time2) / 1000000, (float)(time3 - time2) / (float)(time6 - time0) * 100); + + printf("%15.12f s, %15.12f % : GPU: KERNEL\n", (float)(time4 - time3) / 1000000, (float)(time4 - time3) / (float)(time6 - time0) * 100); + + printf("%15.12f s, %15.12f % : GPU MEM: COPY OUT\n", (float)(time5 - time4) / 1000000, (float)(time5 - time4) / (float)(time6 - time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: FRE\n", (float)(time6 - time5) / 1000000, (float)(time6 - time5) / (float)(time6 - time0) * 100); + + printf("Total time:\n"); + printf("%.12f s\n", (float)(time6 - time0) / 1000000); +} diff --git a/workloads/realworld/uvm/lavaMD/kernel/kernel_gpu_cuda_wrapper.h b/workloads/realworld/uvm/lavaMD/kernel/kernel_gpu_cuda_wrapper.h new file mode 100755 index 0000000000000000000000000000000000000000..cf499f1480469569c649eccf174cc8ba0655ddbd --- /dev/null +++ b/workloads/realworld/uvm/lavaMD/kernel/kernel_gpu_cuda_wrapper.h @@ -0,0 +1,19 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//========================================================================================================================================================================================================200 +// KERNEL_GPU_CUDA_WRAPPER HEADER +//========================================================================================================================================================================================================200 + +void kernel_gpu_cuda_wrapper( par_str parms_cpu, + dim_str dim_cpu, + box_str* box_cpu, + FOUR_VECTOR* rv_cpu, + fp* qv_cpu, + FOUR_VECTOR* fv_cpu, + int nblocks); + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/uvm/lavaMD/main.c b/workloads/realworld/uvm/lavaMD/main.c new file mode 100755 index 0000000000000000000000000000000000000000..a7c88472e3939414bbdc314e2bfb1c46bc345bea --- /dev/null +++ b/workloads/realworld/uvm/lavaMD/main.c @@ -0,0 +1,318 @@ +//========================================================================================================================================================================================================200 +//======================================================================================================================================================150 +//====================================================================================================100 +//==================================================50 + +//========================================================================================================================================================================================================200 +// UPDATE +//========================================================================================================================================================================================================200 + +// 14 APR 2011 Lukasz G. Szafaryn + +//========================================================================================================================================================================================================200 +// DEFINE/INCLUDE +//========================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// LIBRARIES +//======================================================================================================================================================150 + +#include // (in path known to compiler) needed by printf +#include // (in path known to compiler) needed by malloc +#include // (in path known to compiler) needed by true/false + +//======================================================================================================================================================150 +// UTILITIES +//======================================================================================================================================================150 + +#include "./util/timer/timer.h" // (in path specified here) +#include "./util/num/num.h" // (in path specified here) + +//======================================================================================================================================================150 +// MAIN FUNCTION HEADER +//======================================================================================================================================================150 + +#include "./main.h" // (in the current directory) + +//======================================================================================================================================================150 +// KERNEL +//======================================================================================================================================================150 + +#include "./kernel/kernel_gpu_cuda_wrapper.h" // (in library path specified here) + +//========================================================================================================================================================================================================200 +// MAIN FUNCTION +//========================================================================================================================================================================================================200 +#define _POSIX_C_SOURCE 200809L +#include +#include +#include +extern inline __attribute__((always_inline)) unsigned long rdtsc() +{ + unsigned long a, d; + + __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); + + return (a | (d << 32)); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsp() { + struct timespec tms; + if (clock_gettime(CLOCK_REALTIME, &tms)) { + return -1; + } + unsigned long ns = tms.tv_sec * 1000000000; + ns += tms.tv_nsec; + return ns; +} + + +int +main( int argc, + char *argv []) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + + printf("thread block size of kernel = %d \n", NUMBER_THREADS); + //======================================================================================================================================================150 + // CPU/MCPU VARIABLES + //======================================================================================================================================================150 + + // timer + long long time0; + + time0 = get_time(); + + // timer + long long time1; + long long time2; + long long time3; + long long time4; + long long time5; + long long time6; + long long time7; + + // counters + int i, j, k, l, m, n; + + // system memory + par_str par_cpu; + dim_str dim_cpu; + box_str* box_cpu; + FOUR_VECTOR* rv_cpu; + fp* qv_cpu; + FOUR_VECTOR* fv_cpu; + int nh; + + time1 = get_time(); + + //======================================================================================================================================================150 + // CHECK INPUT ARGUMENTS + //======================================================================================================================================================150 + + // assing default values + dim_cpu.boxes1d_arg = 1; + + // go through arguments + dim_cpu.boxes1d_arg = atoi(argv[1]); + int nblocks = atoi(argv[2]); + + // Print configuration + printf("Configuration used: boxes1d = %d\n", dim_cpu.boxes1d_arg); + + time2 = get_time(); + + //======================================================================================================================================================150 + // INPUTS + //======================================================================================================================================================150 + + par_cpu.alpha = 0.5; + + time3 = get_time(); + + //======================================================================================================================================================150 + // DIMENSIONS + //======================================================================================================================================================150 + + // total number of boxes + dim_cpu.number_boxes = dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg; + + // how many particles space has in each direction + dim_cpu.space_elem = dim_cpu.number_boxes * NUMBER_PAR_PER_BOX; + dim_cpu.space_mem = dim_cpu.space_elem * sizeof(FOUR_VECTOR); + dim_cpu.space_mem2 = dim_cpu.space_elem * sizeof(fp); + + // box array + dim_cpu.box_mem = dim_cpu.number_boxes * sizeof(box_str); + + time4 = get_time(); + + //======================================================================================================================================================150 + // SYSTEM MEMORY + //======================================================================================================================================================150 + + //====================================================================================================100 + // BOX + //====================================================================================================100 + + // allocate boxes + box_cpu = (box_str*)malloc(dim_cpu.box_mem); + + // initialize number of home boxes + nh = 0; + + // home boxes in z direction + for(i=0; i=0 && (j+m)>=0 && (k+n)>=0)==true && ((i+l) 1) { + + // variables + int max_multiprocessors; + int max_device; + cudaDeviceProp properties; + + // initialize variables + max_multiprocessors = 0; + max_device = 0; + + for (device = 0; device < num_devices; device++) { + cudaGetDeviceProperties(&properties, device); + if (max_multiprocessors < properties.multiProcessorCount) { + max_multiprocessors = properties.multiProcessorCount; + max_device = device; + } + } + cudaSetDevice(max_device); + } + +} + +//====================================================================================================100 +// GET LAST ERROR +//====================================================================================================100 + +void checkCUDAError(const char *msg) +{ + cudaError_t err = cudaGetLastError(); + if( cudaSuccess != err) { + // fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + printf("Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + fflush(NULL); + exit(EXIT_FAILURE); + } +} + +//===============================================================================================================================================================================================================200 +// END SET_DEVICE CODE +//===============================================================================================================================================================================================================200 diff --git a/workloads/realworld/uvm/lavaMD/util/device/device.h b/workloads/realworld/uvm/lavaMD/util/device/device.h new file mode 100755 index 0000000000000000000000000000000000000000..23bb31d26c1bc0e607c9b2faf7bddaa5a5c06d98 --- /dev/null +++ b/workloads/realworld/uvm/lavaMD/util/device/device.h @@ -0,0 +1,29 @@ +//===============================================================================================================================================================================================================200 +// SET_DEVICE HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// INCLUDE/DEFINE +//======================================================================================================================================================150 + +#include // (in library path known to compiler) needed by printf + +//======================================================================================================================================================150 +// FUNCTION PROTOTYPES +//======================================================================================================================================================150 + +//====================================================================================================100 +// SET DEVICE +//====================================================================================================100 + +void setdevice(void); + +//====================================================================================================100 +// GET LAST ERROR +//====================================================================================================100 + +void checkCUDAError(const char *msg); + +//===============================================================================================================================================================================================================200 +// END SET_DEVICE HEADER +//===============================================================================================================================================================================================================200 diff --git a/workloads/realworld/uvm/lavaMD/util/num/num.c b/workloads/realworld/uvm/lavaMD/util/num/num.c new file mode 100755 index 0000000000000000000000000000000000000000..980ff7498832c784eab718a8b886e82891047599 --- /dev/null +++ b/workloads/realworld/uvm/lavaMD/util/num/num.c @@ -0,0 +1,53 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// DESCRIPTION +//===============================================================================================================================================================================================================200 + +// Returns: 0 if string does not represent integer +// 1 if string represents integer + +//===============================================================================================================================================================================================================200 +// NUM CODE +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// ISINTEGER FUNCTION +//======================================================================================================================================================150 + +int isInteger(char *str){ + + //====================================================================================================100 + // make sure it's not empty + //====================================================================================================100 + + if (*str == '\0'){ + return 0; + } + + //====================================================================================================100 + // if any digit is not a number, return false + //====================================================================================================100 + + for(; *str != '\0'; str++){ + if (*str < 48 || *str > 57){ // digit characters (need to include . if checking for float) + return 0; + } + } + + //====================================================================================================100 + // it got past all my checks so I think it's a number + //====================================================================================================100 + + return 1; +} + +//===============================================================================================================================================================================================================200 +// END NUM CODE +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/uvm/lavaMD/util/num/num.h b/workloads/realworld/uvm/lavaMD/util/num/num.h new file mode 100755 index 0000000000000000000000000000000000000000..27a5e42fe2819d9ecc2f569b5979fb451985976f --- /dev/null +++ b/workloads/realworld/uvm/lavaMD/util/num/num.h @@ -0,0 +1,21 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// FILE HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// ISINTEGER FUNCTION PROTOTYPE +//======================================================================================================================================================150 + +int isInteger(char *str); + +//===============================================================================================================================================================================================================200 +// END FILE HEADER +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/uvm/lavaMD/util/timer/timer.c b/workloads/realworld/uvm/lavaMD/util/timer/timer.c new file mode 100755 index 0000000000000000000000000000000000000000..c7cc252b4e67b3a868722b7b2c58f5b863ae0cfc --- /dev/null +++ b/workloads/realworld/uvm/lavaMD/util/timer/timer.c @@ -0,0 +1,36 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// TIMER CODE +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// INCLUDE/DEFINE +//======================================================================================================================================================150 + +#include + +//======================================================================================================================================================150 +// FUNCTIONS +//======================================================================================================================================================150 + +//====================================================================================================100 +// DISPLAY TIME +//====================================================================================================100 + + // Returns the current system time in microseconds +long long get_time() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000000) + tv.tv_usec; +} + +//===============================================================================================================================================================================================================200 +// END TIMER CODE +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/uvm/lavaMD/util/timer/timer.h b/workloads/realworld/uvm/lavaMD/util/timer/timer.h new file mode 100755 index 0000000000000000000000000000000000000000..1744df4b8607f95c057ac4db6e9ced5ff84c4ab7 --- /dev/null +++ b/workloads/realworld/uvm/lavaMD/util/timer/timer.h @@ -0,0 +1,21 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// TIMER HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// FUNCTION PROTOTYPES +//======================================================================================================================================================150 + +long long get_time(); + +//===============================================================================================================================================================================================================200 +// END TIMER HEADER +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/uvm/lud/Makefile b/workloads/realworld/uvm/lud/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1dfc37ac7fa0db46535d0583970dba1cb5cfb80e --- /dev/null +++ b/workloads/realworld/uvm/lud/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := lud +CUFILES := lud_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o lud + diff --git a/workloads/realworld/uvm/lud/lud_cuda.cu b/workloads/realworld/uvm/lud/lud_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..72c77e7a87995c70582744937e159e38137e974e --- /dev/null +++ b/workloads/realworld/uvm/lud/lud_cuda.cu @@ -0,0 +1,286 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK 256 + +#ifndef SIZE +#define SIZE 4096 +#endif + +__global__ void add(float *a, float *b, float *c) +{ + int tid = blockIdx.x; // Handle the data at the index + + c[tid] = a[tid] + b[tid]; +} + +__global__ void scale(float *a, int size, int index) +{ + int i; + int start = (index * size + index); + int end = (index * size + size); + + for (i = start + 1; i < end; i++) + { + a[i] = (a[i] / a[start]); + } +} + +__global__ void reduce(float *a, int size, int index, int b_size) +{ + extern __shared__ float pivot[SIZE]; + int i; + + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = b_size; + + int pivot_start = (index * size + index); + int pivot_end = (index * size + size); + + int start; + int end; + int pivot_row; + int my_row; + + if (tid == 0) + { + for (i = index; i < size; i++) + pivot[i] = a[(index * size) + i]; + } + + __syncthreads(); + + pivot_row = (index * size); + my_row = (((block_size * bid) + tid) * size); + start = my_row + index; + end = my_row + size; + + if (my_row > pivot_row) + { + for (i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(i - my_row)]); + } + } +} + +void initCPU(float *a, int N) +{ + srand((unsigned)2); + // fill the arrays 'a' on the CPU + for (int i = 0; i < (N * N); i++) + { + a[i] = ((rand() % 10) + 1); + // a[i] = 1.0f; + } +} + +void initGPU(float *a_dev, float *a, int N) +{ + for (int i = 0; i < (N * N); i++) + { + a_dev[i] = a[i]; + // a_dev[i] = 1.0f; + } +} + +__global__ void lud_kernel(float *a, int N) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // extern __shared__ float pivot[]; + __shared__ float pivot[SIZE]; + + for (int tile = 0; tile < N; tile += 1) { + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = blockDim.x; + + if (tid == 0 && bid == 0) { + int start = (tile * N + tile); + int end = (tile * N + N); + + for (int i = start + 1; i < end; i++) + a[i] = (a[i] / a[start]); + } + block.sync(); + + if (tid == 0) + { + for (int i = tile; i < N; i++) + pivot[i] = a[(tile * N) + i]; + } + block.sync(); + + int pivot_row = (tile * N); + int my_row = (((block_size * bid) + tid) * N); + int start = my_row + tile; + int end = my_row + N; + + if (my_row > pivot_row) + { + for (int i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(i - my_row)]); + } + } + block.sync(); + } +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + float *a; + float *a_gpu; + float *c; + float error; + int N; + int flag = 0; + + float **result; + float **a_ref; + int blocks; + + int i; + int j; + int k; + float l1; + float u1; + + N = SIZE; + // allocate memory on CPU + a = (float *)malloc(sizeof(float) * N * N); + c = (float *)malloc(sizeof(float) * N * N); + + result = (float **)malloc(sizeof(float *) * N); + a_ref = (float **)malloc(sizeof(float *) * N); + + for (i = 0; i < N; i++) + { + result[i] = (float *)malloc(sizeof(float) * N); + a_ref[i] = (float *)malloc(sizeof(float) * N); + } + initCPU(a, N); + + GPU_argv_init(); + initTrace(); + startCPU(); + // allocate the memory on the GPU + // cudaMalloc((void **)&dev_a, N * N * sizeof(float)); + + cudaMallocManaged(&a_gpu, N * N * sizeof(float)); + memcpy(a_gpu, a, N * N * sizeof(float)); + + // cudaMemcpy(dev_a, a, N * N * sizeof(float), cudaMemcpyHostToDevice); // copy array to device memory + + /*Perform LU Decomposition*/ + // for (i = 0; i < N; i++) + // { + // scale<<<1, 1>>>(a_gpu, N, i); + // // blocks= ((N-i-1)/512)+1; + // blocks = ((N / DIM_THREAD_BLOCK)); + // // printf("Number of blocks rxd : %d \n",blocks); + // reduce<<>>(a_gpu, N, i, DIM_THREAD_BLOCK); + // cudaDeviceSynchronize(); + // } + blocks = ((N / DIM_THREAD_BLOCK)); + lud_kernel<<>>(a_gpu, N); + cudaDeviceSynchronize(); + /*LU decomposition ends here*/ + + // cudaMemcpy(c, dev_a, N * N * sizeof(float), cudaMemcpyDeviceToHost); // copy array back to host + memcpy(c, a_gpu, N * N * sizeof(float)); + // free the memory allocated on the GPU + cudaFree(a_gpu); + + endCPU(); + finiTrace(); + + /*copy the result matrix into explicit 2D matrix for verification*/ + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + // c[i * N + j] = a_gpu[i * N + j]; + result[i][j] = c[i * N + j]; + // printf("result %d %d Error is %lf \n ", i, j, result[i][j]); + } + } + + printf("======================================================="); + printf("\n Performing inplace verification \n"); + + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + a_ref[i][j] = 0; + for (k = 0; k < N; k++) + { + if (i >= k) + l1 = result[i][k]; + else + l1 = 0; + + if (k == j) + u1 = 1; + else if (k < j) + u1 = result[k][j]; // figured it out + else + u1 = 0.0; + + a_ref[i][j] = a_ref[i][j] + (l1 * u1); + } + } + } + + // for (i = 0; i < N; i++) + // { + // for (j = 0; j < N; j++) + // { + // error = abs(a[(i * N + j)] - a_ref[i][j]); + // if (error > 1) + // { + // // printf("No match occured at %d %d Error is %lf \n ", i, j, abs(a[(i * N + j)] - a_ref[i][j])); + // // printf("No match occured at %d %d Error is %lf, %lf \n ", i, j, a[(i * N + j)], a_ref[i][j]); + // flag = flag + 1; + // } + // } + // } + + // if (flag == 0) + // printf("Match \n"); + // else + // printf("No Matchs %d \n", flag); + + + + return 0; +} diff --git a/workloads/realworld/uvm/lud/run.sh b/workloads/realworld/uvm/lud/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..ea7db937489e328f5e923d2b18774e4256eef123 --- /dev/null +++ b/workloads/realworld/uvm/lud/run.sh @@ -0,0 +1 @@ +./lud 1024 diff --git a/workloads/realworld/uvm/lud/run_super.sh b/workloads/realworld/uvm/lud/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2791fe07c43d75b40894206ba79ed441f207ee26 --- /dev/null +++ b/workloads/realworld/uvm/lud/run_super.sh @@ -0,0 +1 @@ +./lud 4096 diff --git a/workloads/realworld/uvm/lud_perf/Makefile b/workloads/realworld/uvm/lud_perf/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1dfc37ac7fa0db46535d0583970dba1cb5cfb80e --- /dev/null +++ b/workloads/realworld/uvm/lud_perf/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := lud +CUFILES := lud_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o lud + diff --git a/workloads/realworld/uvm/lud_perf/lud b/workloads/realworld/uvm/lud_perf/lud new file mode 100755 index 0000000000000000000000000000000000000000..f25d5742b34cb75e3a01e1e2648e71bf4372dcf6 Binary files /dev/null and b/workloads/realworld/uvm/lud_perf/lud differ diff --git a/workloads/realworld/uvm/lud_perf/lud_cuda.cu b/workloads/realworld/uvm/lud_perf/lud_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..e9ec5eba7412791b13b92eefcba5e9df16e533b2 --- /dev/null +++ b/workloads/realworld/uvm/lud_perf/lud_cuda.cu @@ -0,0 +1,286 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK 256 + +#ifndef SIZE +#define SIZE 4096 +#endif + +__global__ void add(float *a, float *b, float *c) +{ + int tid = blockIdx.x; // Handle the data at the index + + c[tid] = a[tid] + b[tid]; +} + +__global__ void scale(float *a, int size, int index) +{ + int i; + int start = (index * size + index); + int end = (index * size + size); + + for (i = start + 1; i < end; i++) + { + a[i] = (a[i] / a[start]); + } +} + +__global__ void reduce(float *a, int size, int index, int b_size) +{ + extern __shared__ float pivot[SIZE]; + int i; + + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = b_size; + + int pivot_start = (index * size + index); + int pivot_end = (index * size + size); + + int start; + int end; + int pivot_row; + int my_row; + + if (tid == 0) + { + for (i = index; i < size; i++) + pivot[i] = a[(index * size) + i]; + } + + __syncthreads(); + + pivot_row = (index * size); + my_row = (((block_size * bid) + tid) * size); + start = my_row + index; + end = my_row + size; + + if (my_row > pivot_row) + { + for (i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(i - my_row)]); + } + } +} + +void initCPU(float *a, int N) +{ + srand((unsigned)2); + // fill the arrays 'a' on the CPU + for (int i = 0; i < (N * N); i++) + { + a[i] = ((rand() % 10) + 1); + // a[i] = 1.0f; + } +} + +void initGPU(float *a_dev, float *a, int N) +{ + for (int i = 0; i < (N * N); i++) + { + a_dev[i] = a[i]; + // a_dev[i] = 1.0f; + } +} + +__global__ void lud_kernel(float *a, int N) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // extern __shared__ float pivot[]; + __shared__ float pivot[SIZE]; + + for (int tile = 0; tile < N; tile += 1) { + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = blockDim.x; + + if (tid == 0 && bid == 0) { + int start = (tile * N + tile); + int end = (tile * N + N); + + for (int i = start + 1; i < end; i++) + a[i] = (a[i] / a[start]); + } + block.sync(); + + if (tid == 0) + { + for (int i = tile; i < N; i++) + pivot[i] = a[(tile * N) + i]; + } + block.sync(); + + int pivot_row = (tile * N); + int my_row = (((block_size * bid) + tid) * N); + int start = my_row + tile; + int end = my_row + N; + + if (my_row > pivot_row) + { + for (int i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(i - my_row)]); + } + } + block.sync(); + } +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + float *a; + float *a_gpu; + float *c; + float error; + int N; + int flag = 0; + + float **result; + float **a_ref; + int blocks; + + int i; + int j; + int k; + float l1; + float u1; + + N = SIZE; + // allocate memory on CPU + a = (float *)malloc(sizeof(float) * N * N); + c = (float *)malloc(sizeof(float) * N * N); + + result = (float **)malloc(sizeof(float *) * N); + a_ref = (float **)malloc(sizeof(float *) * N); + + for (i = 0; i < N; i++) + { + result[i] = (float *)malloc(sizeof(float) * N); + a_ref[i] = (float *)malloc(sizeof(float) * N); + } + initCPU(a, N); + + GPU_argv_init(); + // initTrace(); + startCPU(); + // allocate the memory on the GPU + // cudaMalloc((void **)&dev_a, N * N * sizeof(float)); + + cudaMallocManaged(&a_gpu, N * N * sizeof(float)); + memcpy(a_gpu, a, N * N * sizeof(float)); + + // cudaMemcpy(dev_a, a, N * N * sizeof(float), cudaMemcpyHostToDevice); // copy array to device memory + + /*Perform LU Decomposition*/ + // for (i = 0; i < N; i++) + // { + // scale<<<1, 1>>>(a_gpu, N, i); + // // blocks= ((N-i-1)/512)+1; + // blocks = ((N / DIM_THREAD_BLOCK)); + // // printf("Number of blocks rxd : %d \n",blocks); + // reduce<<>>(a_gpu, N, i, DIM_THREAD_BLOCK); + // cudaDeviceSynchronize(); + // } + blocks = ((N / DIM_THREAD_BLOCK)); + lud_kernel<<>>(a_gpu, N); + cudaDeviceSynchronize(); + /*LU decomposition ends here*/ + + // cudaMemcpy(c, dev_a, N * N * sizeof(float), cudaMemcpyDeviceToHost); // copy array back to host + memcpy(c, a_gpu, N * N * sizeof(float)); + // free the memory allocated on the GPU + cudaFree(a_gpu); + + endCPU(); + // finiTrace(); + + /*copy the result matrix into explicit 2D matrix for verification*/ + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + // c[i * N + j] = a_gpu[i * N + j]; + result[i][j] = c[i * N + j]; + // printf("result %d %d Error is %lf \n ", i, j, result[i][j]); + } + } + + printf("======================================================="); + printf("\n Performing inplace verification \n"); + + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + a_ref[i][j] = 0; + for (k = 0; k < N; k++) + { + if (i >= k) + l1 = result[i][k]; + else + l1 = 0; + + if (k == j) + u1 = 1; + else if (k < j) + u1 = result[k][j]; // figured it out + else + u1 = 0.0; + + a_ref[i][j] = a_ref[i][j] + (l1 * u1); + } + } + } + + // for (i = 0; i < N; i++) + // { + // for (j = 0; j < N; j++) + // { + // error = abs(a[(i * N + j)] - a_ref[i][j]); + // if (error > 1) + // { + // // printf("No match occured at %d %d Error is %lf \n ", i, j, abs(a[(i * N + j)] - a_ref[i][j])); + // // printf("No match occured at %d %d Error is %lf, %lf \n ", i, j, a[(i * N + j)], a_ref[i][j]); + // flag = flag + 1; + // } + // } + // } + + // if (flag == 0) + // printf("Match \n"); + // else + // printf("No Matchs %d \n", flag); + + + + return 0; +} diff --git a/workloads/realworld/uvm/lud_perf/run.sh b/workloads/realworld/uvm/lud_perf/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..ea7db937489e328f5e923d2b18774e4256eef123 --- /dev/null +++ b/workloads/realworld/uvm/lud_perf/run.sh @@ -0,0 +1 @@ +./lud 1024 diff --git a/workloads/realworld/uvm/lud_perf/run_super.sh b/workloads/realworld/uvm/lud_perf/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2791fe07c43d75b40894206ba79ed441f207ee26 --- /dev/null +++ b/workloads/realworld/uvm/lud_perf/run_super.sh @@ -0,0 +1 @@ +./lud 4096 diff --git a/workloads/realworld/uvm/nw/Makefile b/workloads/realworld/uvm/nw/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..b33ae6462826357f9665bfd7fc9929ed176f9b35 --- /dev/null +++ b/workloads/realworld/uvm/nw/Makefile @@ -0,0 +1,15 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include + +SRC = needle.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = needle + +release: $(SRC) + $(CC) ${KERNEL_DIM} $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt diff --git a/workloads/realworld/uvm/nw/Makefile_nvidia b/workloads/realworld/uvm/nw/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..2fd0b98d07beea56ae69a96a0c8cb3af87d602f6 --- /dev/null +++ b/workloads/realworld/uvm/nw/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := needle +# CUDA source files (compiled with cudacc) +CUFILES := needle.cu +# CUDA dependency files +CU_DEPS := needle_kernel.cu +# C/C++ source files (compiled with gcc / c++) +# CCFILES := BlackScholes_gold.cpp + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/uvm/nw/README b/workloads/realworld/uvm/nw/README new file mode 100755 index 0000000000000000000000000000000000000000..683cbd53db81f0ece4f926fa01316582ea0d5fc9 --- /dev/null +++ b/workloads/realworld/uvm/nw/README @@ -0,0 +1,12 @@ +Note: This program generate two sequences randomly. Please specify your own sequences for different uses. + At the current stage, the program only supports two sequences with the same lengh, which can be divided by 16. +Usage: needle 32 10 + 32 //the length of both sequences + 10 //penalty value + +******Adjustable work group size***** +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" diff --git a/workloads/realworld/uvm/nw/needle.cu b/workloads/realworld/uvm/nw/needle.cu new file mode 100755 index 0000000000000000000000000000000000000000..75ec4aa8410d85e9bf9ddfe799732fa435ef1ac1 --- /dev/null +++ b/workloads/realworld/uvm/nw/needle.cu @@ -0,0 +1,283 @@ +#define LIMIT -999 +#include +#include +#include +#include +#include "needle.h" +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +// includes, kernels +#include "needle_kernel.cu" + +#ifdef TIMING +#include "timing.h" + +struct timeval tv; +struct timeval tv_total_start, tv_total_end; +struct timeval tv_h2d_start, tv_h2d_end; +struct timeval tv_d2h_start, tv_d2h_end; +struct timeval tv_kernel_start, tv_kernel_end; +struct timeval tv_mem_alloc_start, tv_mem_alloc_end; +struct timeval tv_close_start, tv_close_end; +float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, + d2h_time = 0, close_time = 0, total_time = 0; +#endif + +//////////////////////////////////////////////////////////////////////////////// +// declaration, forward +void runTest( int argc, char** argv); + + +int blosum62[24][24] = { +{ 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0, -4}, +{-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1, -4}, +{-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1, -4}, +{-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1, -4}, +{ 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4}, +{-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1, -4}, +{-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4}, +{ 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -4}, +{-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1, -4}, +{-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1, -4}, +{-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1, -4}, +{-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1, -4}, +{-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1, -4}, +{-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1, -4}, +{-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2, -4}, +{ 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0, -4}, +{ 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0, -4}, +{-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2, -4}, +{-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1, -4}, +{ 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1, -4}, +{-2, -1, 3, 4, -3, 0, 1, -1, 0, -3, -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1, -4}, +{-1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4}, +{ 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1, -4}, +{-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1} +}; + +double gettime() { + struct timeval t; + gettimeofday(&t,NULL); + return t.tv_sec+t.tv_usec*1e-6; +} + +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int +main( int argc, char** argv) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + + printf("WG size of kernel = %d \n", BLOCK_SIZE); + + runTest( argc, argv); + + return EXIT_SUCCESS; +} + +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - x and y dimensions\n"); + fprintf(stderr, "\t - penalty(positive integer)\n"); + exit(1); +} + +void runTest( int argc, char** argv) +{ + int max_rows, max_cols, penalty, nblocks; + int *input_itemsets, *output_itemsets, *referrence; + int *matrix_cuda, *referrence_cuda; + int size; + + + // the lengths of the two sequences should be able to divided by 16. + // And at current stage max_rows needs to equal max_cols + if (argc == 4) + { + max_rows = atoi(argv[1]); + max_cols = atoi(argv[1]); + penalty = atoi(argv[2]); + nblocks = atoi(argv[3]); + } + else{ + usage(argc, argv); + } + + if(atoi(argv[1])%16!=0){ + fprintf(stderr,"The dimension values must be a multiple of 16\n"); + exit(1); + } + + + max_rows = max_rows + 1; + max_cols = max_cols + 1; + referrence = (int *)malloc( max_rows * max_cols * sizeof(int) ); + input_itemsets = (int *)malloc( max_rows * max_cols * sizeof(int) ); + output_itemsets = (int *)malloc( max_rows * max_cols * sizeof(int) ); + + + if (!input_itemsets) + fprintf(stderr, "error: can not allocate memory"); + + srand ( 7 ); + + + for (int i = 0 ; i < max_cols; i++){ + for (int j = 0 ; j < max_rows; j++){ + input_itemsets[i*max_cols+j] = 0; + } + } + + printf("Start Needleman-Wunsch\n"); + + for( int i=1; i< max_rows ; i++){ //please define your own sequence. + input_itemsets[i*max_cols] = rand() % 10 + 1; + } + for( int j=1; j< max_cols ; j++){ //please define your own sequence. + input_itemsets[j] = rand() % 10 + 1; + } + + + for (int i = 1 ; i < max_cols; i++){ + for (int j = 1 ; j < max_rows; j++){ + referrence[i*max_cols+j] = blosum62[input_itemsets[i*max_cols]][input_itemsets[j]]; + } + } + + for( int i = 1; i< max_rows ; i++) + input_itemsets[i*max_cols] = -i * penalty; + for( int j = 1; j< max_cols ; j++) + input_itemsets[j] = -j * penalty; + + + size = max_cols * max_rows; + + GPU_argv_init(); + initTrace(); + startCPU(); + + cudaMallocManaged((void**)& referrence_cuda, sizeof(int)*size); + cudaMallocManaged((void **)&matrix_cuda, sizeof(int) * size); + + memcpy(referrence_cuda, referrence, sizeof(int) * size); + memcpy(matrix_cuda, input_itemsets, sizeof(int) * size); + + dim3 dimGrid; + dim3 dimBlock(BLOCK_SIZE, 1); + // int block_width = ( max_cols - 1 )/BLOCK_SIZE; + int block_width = nblocks - 1; + int block_size = (max_cols - 1) / (nblocks * BLOCK_SIZE); + +#ifdef TIMING + gettimeofday(&tv_kernel_start, NULL); +#endif + + //printf("Processing top-left matrix\n"); + //process top-left matrix + for( int i = 1 ; i <= block_width ; i++) { + dimGrid.x = i; + dimGrid.y = 1; + needle_cuda_shared_1<<>>(referrence_cuda, matrix_cuda + ,max_cols, penalty, i, block_width, block_size); + } + //printf("Processing bottom-right matrix\n"); + //process bottom-right matrix + for( int i = block_width - 1 ; i >= 1 ; i--){ + dimGrid.x = i; + dimGrid.y = 1; + needle_cuda_shared_2<<>>(referrence_cuda, matrix_cuda + ,max_cols, penalty, i, block_width, block_size); + } + cudaDeviceSynchronize(); + memcpy(output_itemsets, matrix_cuda, sizeof(int) * size); + + cudaFree(referrence_cuda); + cudaFree(matrix_cuda); + +#ifdef TIMING + gettimeofday(&tv_kernel_end, NULL); + tvsub(&tv_kernel_end, &tv_kernel_start, &tv); + kernel_time += tv.tv_sec * 1000.0 + (float) tv.tv_usec / 1000.0; +#endif + + // cudaMemcpy(output_itemsets, matrix_cuda, sizeof(int) * size, cudaMemcpyDeviceToHost); + +//#define TRACEBACK +#ifdef TRACEBACK + + FILE *fpo = fopen("result.txt","w"); + fprintf(fpo, "print traceback value GPU:\n"); + + for (int i = max_rows - 2, j = max_rows - 2; i>=0, j>=0;){ + int nw, n, w, traceback; + if ( i == max_rows - 2 && j == max_rows - 2 ) + fprintf(fpo, "%d ", output_itemsets[ i * max_cols + j]); //print the first element + if ( i == 0 && j == 0 ) + break; + if ( i > 0 && j > 0 ){ + nw = output_itemsets[(i - 1) * max_cols + j - 1]; + w = output_itemsets[ i * max_cols + j - 1 ]; + n = output_itemsets[(i - 1) * max_cols + j]; + } + else if ( i == 0 ){ + nw = n = LIMIT; + w = output_itemsets[ i * max_cols + j - 1 ]; + } + else if ( j == 0 ){ + nw = w = LIMIT; + n = output_itemsets[(i - 1) * max_cols + j]; + } + else{ + } + + //traceback = maximum(nw, w, n); + int new_nw, new_w, new_n; + new_nw = nw + referrence[i * max_cols + j]; + new_w = w - penalty; + new_n = n - penalty; + + traceback = maximum(new_nw, new_w, new_n); + if(traceback == new_nw) + traceback = nw; + if(traceback == new_w) + traceback = w; + if(traceback == new_n) + traceback = n; + + fprintf(fpo, "%d ", traceback); + + if(traceback == nw ) + {i--; j--; continue;} + + else if(traceback == w ) + {j--; continue;} + + else if(traceback == n ) + {i--; continue;} + + else + ; + } + + fclose(fpo); + +#endif + endCPU(); + finiTrace(); + free(referrence); + free(input_itemsets); + free(output_itemsets); + +#ifdef TIMING + printf("Exec: %f\n", kernel_time); +#endif +} + diff --git a/workloads/realworld/uvm/nw/needle.h b/workloads/realworld/uvm/nw/needle.h new file mode 100755 index 0000000000000000000000000000000000000000..e73320d6496262665592117d242e9bc383298b5b --- /dev/null +++ b/workloads/realworld/uvm/nw/needle.h @@ -0,0 +1,11 @@ +#ifdef RD_WG_SIZE_0_0 + #define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) + #define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) + #define BLOCK_SIZE RD_WG_SIZE +#else + #define BLOCK_SIZE 16 +#endif +//#define TRACE + diff --git a/workloads/realworld/uvm/nw/needle_kernel.cu b/workloads/realworld/uvm/nw/needle_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..d7b4a0a1984521fa4a3d0dab3e1a3b3645ad5a4c --- /dev/null +++ b/workloads/realworld/uvm/nw/needle_kernel.cu @@ -0,0 +1,197 @@ + +#include "needle.h" +#include + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SDATA( index) CUT_BANK_CHECKER(sdata, index) + +__device__ __host__ int +maximum( int a, + int b, + int c){ + +int k; +if( a <= b ) +k = b; +else +k = a; + +if( k <=c ) +return(c); +else +return(k); + +} + +__global__ void +needle_cuda_shared_1( int* referrence, + int* matrix_cuda, + int cols, + int penalty, + int i, + int block_width, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + int bx = blockIdx.x; + int tx = threadIdx.x; + + int b_index_x = bx; + int b_index_y = i - 1 - bx; + + __shared__ int temp[BLOCK_SIZE+1][BLOCK_SIZE+1]; + __shared__ int ref[BLOCK_SIZE][BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (b_index_y * gridDim.x + b_index_x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int b_index_x = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int b_index_y = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + int index = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( cols + 1 ); + int index_n = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( 1 ); + int index_w = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + ( cols ); + int index_nw = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x; + + if (tx == 0) + temp[tx][0] = matrix_cuda[index_nw]; + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + ref[ty][tx] = referrence[index + cols * ty]; + block.sync(); + + temp[tx + 1][0] = matrix_cuda[index_w + cols * tx]; + block.sync(); + + temp[0][tx + 1] = matrix_cuda[index_n]; + block.sync(); + + for( int m = 0 ; m < BLOCK_SIZE ; m++){ + if ( tx <= m ){ + int t_index_x = tx + 1; + int t_index_y = m - tx + 1; + + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[t_index_y-1][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + } + block.sync(); + } + + for( int m = BLOCK_SIZE - 2 ; m >=0 ; m--){ + if ( tx <= m){ + int t_index_x = tx + BLOCK_SIZE - m ; + int t_index_y = BLOCK_SIZE - tx; + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[t_index_y-1][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + + } + block.sync(); + } + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + matrix_cuda[index + ty * cols] = temp[ty+1][tx+1]; + } +} + + +__global__ void +needle_cuda_shared_2( int* referrence, + int* matrix_cuda, + int cols, + int penalty, + int i, + int block_width, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + int bx = blockIdx.x; + int tx = threadIdx.x; + + int b_index_x = bx + block_width - i; + int b_index_y = block_width - bx -1; + + __shared__ int temp[BLOCK_SIZE+1][BLOCK_SIZE+1]; + __shared__ int ref[BLOCK_SIZE][BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (b_index_y * gridDim.x + b_index_x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int b_index_x = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int b_index_y = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + int index = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( cols + 1 ); + int index_n = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( 1 ); + int index_w = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + ( cols ); + int index_nw = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x; + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + ref[ty][tx] = referrence[index + cols * ty]; + block.sync(); + + if (tx == 0) + temp[tx][0] = matrix_cuda[index_nw]; + temp[tx + 1][0] = matrix_cuda[index_w + cols * tx]; + block.sync(); + + temp[0][tx + 1] = matrix_cuda[index_n]; + block.sync(); + + for( int m = 0 ; m < BLOCK_SIZE ; m++){ + if ( tx <= m ){ + int t_index_x = tx + 1; + int t_index_y = m - tx + 1; + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[t_index_y-1][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + + } + block.sync(); + } + + for( int m = BLOCK_SIZE - 2 ; m >=0 ; m--){ + if ( tx <= m){ + int t_index_x = tx + BLOCK_SIZE - m ; + int t_index_y = BLOCK_SIZE - tx; + + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[t_index_y-1][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + } + block.sync(); + } + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + matrix_cuda[index + ty * cols] = temp[ty+1][tx+1]; + } +} + diff --git a/workloads/realworld/uvm/nw/run.sh b/workloads/realworld/uvm/nw/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..e3d20f9f4402de57c2a49c4db5c04d917907d741 --- /dev/null +++ b/workloads/realworld/uvm/nw/run.sh @@ -0,0 +1 @@ +./needle 32768 10 256 diff --git a/workloads/realworld/uvm/nw/run_super.sh b/workloads/realworld/uvm/nw/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..23b570be1ac96cce67094a9469de1c6d24c03b08 --- /dev/null +++ b/workloads/realworld/uvm/nw/run_super.sh @@ -0,0 +1 @@ +./needle 32768 10 64 diff --git a/workloads/realworld/uvm/pathfinder/Makefile b/workloads/realworld/uvm/pathfinder/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d740e79027f3651c458e229179bbbd46fb4fcbec --- /dev/null +++ b/workloads/realworld/uvm/pathfinder/Makefile @@ -0,0 +1,14 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc +INCLUDE := $(CUDA_DIR)/include + +SRC = pathfinder.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = pathfinder + +release: + $(CC) $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +clean: + rm -f pathfinder diff --git a/workloads/realworld/uvm/pathfinder/README b/workloads/realworld/uvm/pathfinder/README new file mode 100644 index 0000000000000000000000000000000000000000..9af75abe201eb95c5c89a038c6d79f54b276f94e --- /dev/null +++ b/workloads/realworld/uvm/pathfinder/README @@ -0,0 +1,6 @@ +To compile the program: + +nvcc -cuda dynproc.cu +nvcc -o dynproc dynproc.cu.cpp + +Usage: dynproc row_len col_len pyramid_height diff --git a/workloads/realworld/uvm/pathfinder/pathfinder.cu b/workloads/realworld/uvm/pathfinder/pathfinder.cu new file mode 100644 index 0000000000000000000000000000000000000000..789a2d4efe6a122f106bf35a3aa1645241c3dc9a --- /dev/null +++ b/workloads/realworld/uvm/pathfinder/pathfinder.cu @@ -0,0 +1,299 @@ +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#ifdef TIMING +#include "timing.h" + +struct timeval tv; +struct timeval tv_total_start, tv_total_end; +struct timeval tv_h2d_start, tv_h2d_end; +struct timeval tv_d2h_start, tv_d2h_end; +struct timeval tv_kernel_start, tv_kernel_end; +struct timeval tv_mem_alloc_start, tv_mem_alloc_end; +struct timeval tv_close_start, tv_close_end; +float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, + d2h_time = 0, close_time = 0, total_time = 0; +#endif + +#define BLOCK_SIZE 256 +#define STR_SIZE 256 +#define DEVICE 0 +#define HALO 1 // halo width along one direction when advancing to the next iteration + +// #define BENCH_PRINT + +void run(int argc, char **argv); + +int rows, cols; +int *data; +int **wall; +int *result; +#define M_SEED 9 +int pyramid_height; +int nblocks; + +void init(int argc, char **argv) +{ + if (argc == 5) + { + cols = atoi(argv[1]); + rows = atoi(argv[2]); + pyramid_height = atoi(argv[3]); + nblocks = atoi(argv[4]); + } + else + { + printf("Usage: dynproc row_len col_len pyramid_height\n"); + exit(0); + } + data = new int[rows * cols]; + wall = new int *[rows]; + for (int n = 0; n < rows; n++) + wall[n] = data + cols * n; + result = new int[cols]; + + int seed = M_SEED; + srand(seed); + + for (int i = 0; i < rows; i++) + { + for (int j = 0; j < cols; j++) + { + wall[i][j] = rand() % 10; + } + } +#ifdef BENCH_PRINT + for (int i = 0; i < rows; i++) + { + for (int j = 0; j < cols; j++) + { + printf("%d ", wall[i][j]); + } + printf("\n"); + } +#endif +} + +void fatal(char *s) +{ + fprintf(stderr, "error: %s\n", s); +} + +#define IN_RANGE(x, min, max) ((x) >= (min) && (x) <= (max)) +#define CLAMP_RANGE(x, min, max) x = (x < (min)) ? min : ((x > (max)) ? max : x) +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +__global__ void dynproc_kernel( + int iteration, + int *gpuWall, + int *gpuSrc, + int *gpuResults, + int cols, + int rows, + int startStep, + int border, + int small_block_cols, + int tile_size, + int batches) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + __shared__ int prev[BLOCK_SIZE]; + __shared__ int result[BLOCK_SIZE]; + + int bx = blockIdx.x; + int tx = threadIdx.x; + + for (int b = 0; b < batches; b++) + { + // each block finally computes result for a small block + // after N iterations. + // it is the non-overlapping small blocks that cover + // all the input data + + // calculate the boundary for the block according to + // the boundary of its small block + int blkX = bx * tile_size + small_block_cols * b - border; + int blkXmax = blkX + BLOCK_SIZE - 1; + + // calculate the global thread coordination + int xidx = blkX + tx; + + // effective range within this block that falls within + // the valid range of the input data + // used to rule out computation outside the boundary. + int validXmin = (blkX < 0) ? -blkX : 0; + int validXmax = (blkXmax > cols - 1) ? BLOCK_SIZE - 1 - (blkXmax - cols + 1) : BLOCK_SIZE - 1; + + int W = tx - 1; + int E = tx + 1; + + W = (W < validXmin) ? validXmin : W; + E = (E > validXmax) ? validXmax : E; + + bool isValid = IN_RANGE(tx, validXmin, validXmax); + + if (IN_RANGE(xidx, 0, cols - 1)) + { + prev[tx] = gpuSrc[xidx]; + } + block.sync(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012 + bool computed; + for (int i = 0; i < iteration; i++) + { + computed = false; + if (IN_RANGE(tx, i + 1, BLOCK_SIZE - i - 2) && + isValid) + { + computed = true; + int left = prev[W]; + int up = prev[tx]; + int right = prev[E]; + int shortest = MIN(left, up); + shortest = MIN(shortest, right); + int index = cols * (startStep + i) + xidx; + result[tx] = shortest + gpuWall[index]; + } + block.sync(); + if (i == iteration - 1) + break; + if (computed) // Assign the computation range + prev[tx] = result[tx]; + block.sync(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012 + } + + // update the global memory + // after the last iteration, only threads coordinated within the + // small block perform the calculation and switch on ``computed'' + if (computed) + { + gpuResults[xidx] = result[tx]; + } + } + + +} + +/* + compute N time steps +*/ +int calc_path(int *gpuWall, int *gpuResult[2], int rows, int cols, + int pyramid_height, int blockCols, int borderCols, int tile_size, int batches) +{ + dim3 dimBlock(BLOCK_SIZE); + dim3 dimGrid(nblocks); + + int src = 1, dst = 0; + for (int t = 0; t < rows - 1; t += pyramid_height) + { + int temp = src; + src = dst; + dst = temp; + + int iteration = MIN(pyramid_height, rows - t - 1); + int small_block_cols = BLOCK_SIZE - iteration * HALO * 2; + dynproc_kernel<<>>( + iteration, gpuWall, gpuResult[src], gpuResult[dst], + cols, rows, t, borderCols, small_block_cols, tile_size, batches); + + // for the measurement fairness + cudaDeviceSynchronize(); + } + return dst; +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + GPU_argv_init(); + + run(argc, argv); + + return EXIT_SUCCESS; +} + +void run(int argc, char **argv) +{ + init(argc, argv); + + /* --------------- pyramid parameters --------------- */ + int borderCols = (pyramid_height)*HALO; + int smallBlockCol = BLOCK_SIZE - (pyramid_height)*HALO * 2; + int blockCols = cols / smallBlockCol + ((cols % smallBlockCol == 0) ? 0 : 1); + + //ruihao + int cols_per_block = cols / nblocks; + if (cols_per_block < BLOCK_SIZE) cols_per_block = BLOCK_SIZE; + int batches = cols_per_block / smallBlockCol + ((cols_per_block % smallBlockCol == 0) ? 0 : 1); + + // printf("pyramidHeight: %d\ngridSize: [%d]\nborder:[%d]\nblockSize: %d\nblockGrid:[%d]\ntargetBlock:[%d]\n", + // pyramid_height, cols, borderCols, BLOCK_SIZE, blockCols, smallBlockCol); + printf("pyramidHeight: %d\ngridSize: [%d]\nborder:[%d]\nblockSize: %d\nblockGrid:[%d]\ntargetBlock:[%d]\n", + pyramid_height, cols, borderCols, BLOCK_SIZE, nblocks, smallBlockCol); + + int *gpuWall, *gpuResult[2]; + int size = rows * cols; + + initTrace(); + startCPU(); + + cudaMallocManaged((void **)&gpuResult[0], sizeof(int) * cols); + cudaMallocManaged((void **)&gpuResult[1], sizeof(int) * cols); + memcpy(gpuResult[0], data, sizeof(int) * cols); + cudaMallocManaged((void **)&gpuWall, sizeof(int) * (size - cols)); + memcpy(gpuWall, data + cols, sizeof(int) * (size - cols)); + +#ifdef TIMING + gettimeofday(&tv_kernel_start, NULL); +#endif + + // int final_ret = calc_path(gpuWall, gpuResult, rows, cols, + // pyramid_height, blockCols, borderCols); + int final_ret = calc_path(gpuWall, gpuResult, rows, cols, + pyramid_height, blockCols, borderCols, cols_per_block, batches); + +#ifdef TIMING + gettimeofday(&tv_kernel_end, NULL); + tvsub(&tv_kernel_end, &tv_kernel_start, &tv); + kernel_time += tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; +#endif + + memcpy(result, gpuResult[final_ret], sizeof(int) * cols); + +#ifdef BENCH_PRINT + for (int i = 0; i < cols; i++) + printf("%d ", data[i]); + printf("\n"); + for (int i = 0; i < cols; i++) + printf("%d ", result[i]); + printf("\n"); +#endif + + cudaFree(gpuWall); + cudaFree(gpuResult[0]); + cudaFree(gpuResult[1]); + + endCPU(); + finiTrace(); + + delete[] data; + delete[] wall; + delete[] result; + +#ifdef TIMING + printf("Exec: %f\n", kernel_time); +#endif +} diff --git a/workloads/realworld/uvm/pathfinder/result.txt b/workloads/realworld/uvm/pathfinder/result.txt new file mode 100644 index 0000000000000000000000000000000000000000..fa67c591d071682e1842a455f4477b397825e250 --- /dev/null +++ b/workloads/realworld/uvm/pathfinder/result.txt @@ -0,0 +1,11 @@ +pyramidHeight: 20 +gridSize: [100000] +border:[20] +blockSize: 256 +blockGrid:[463] +targetBlock:[216] +CUPTI dynproc_kernel iter 0 start: 1679530155077329959 end: 1679530155078946951 +CUPTI dynproc_kernel iter 20 start: 1679530155078953603 end: 1679530155081441509 +CUPTI dynproc_kernel iter 40 start: 1679530155081441880 end: 1679530155083936228 +CUPTI dynproc_kernel iter 60 start: 1679530155083936508 end: 1679530155086433891 +CUPTI dynproc_kernel iter 80 start: 1679530155086434172 end: 1679530155088929332 diff --git a/workloads/realworld/uvm/pathfinder/run.sh b/workloads/realworld/uvm/pathfinder/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..11a9e6199ea4b2ff7fd3e0ebf893dc96fa89ff45 --- /dev/null +++ b/workloads/realworld/uvm/pathfinder/run.sh @@ -0,0 +1,2 @@ +#./pathfinder 100000 100 20 1024 > result.txt +./pathfinder 10000000 100 20 1024 diff --git a/workloads/realworld/uvm/pathfinder/run_super.sh b/workloads/realworld/uvm/pathfinder/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..b35cc0b44511def3912323c1e0d58c2daa280722 --- /dev/null +++ b/workloads/realworld/uvm/pathfinder/run_super.sh @@ -0,0 +1 @@ +./pathfinder 10000000 100 20 1024 diff --git a/workloads/realworld/uvm/srad/Makefile b/workloads/realworld/uvm/srad/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..47da520a663446e36c04461d54cfbb3d12cfa328 --- /dev/null +++ b/workloads/realworld/uvm/srad/Makefile @@ -0,0 +1,15 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -std=c++11 -arch=sm_80 -O3 + +SRC = srad.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = srad + +release: $(SRC) + $(CC) $(KERNEL_DIM) $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt diff --git a/workloads/realworld/uvm/srad/Makefile_nvidia b/workloads/realworld/uvm/srad/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..e1f345c41c0f838dcf159958f628276455ef4dd7 --- /dev/null +++ b/workloads/realworld/uvm/srad/Makefile_nvidia @@ -0,0 +1,22 @@ +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := srad +# CUDA source files (compiled with cudacc) +CUFILES := srad.cu +# CUDA dependency files +CU_DEPS := \ + srad_kernel.cu \ + +# C/C++ source files (compiled with gcc / c++) +CCFILES := \ + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/uvm/srad/README b/workloads/realworld/uvm/srad/README new file mode 100755 index 0000000000000000000000000000000000000000..91e803b576bdeebe232c00a5112dadd836ffc33f --- /dev/null +++ b/workloads/realworld/uvm/srad/README @@ -0,0 +1,24 @@ +In srad.h, define either GPU or CPU computation +Currently, the GPU implementation can only support x-, y-dimensions that can be divided by 16. + +Usage: +srad 128 128 0 31 0 31 0.5 2 + +128 //number of rows in the domain +128 //number of cols in the domain +0 //y1 position of the speckle +31 //y2 position of the speckle +0 //x1 position of the speckle +31 //x2 position of the speckle +0.5 //Lambda value +2 //number of iterations + + +******Adjustable work group size***** +The kernel has square shape +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe one dimesion +The total thread number for one block is RD_WG_SIZE_0*RD_WG_SIZE_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" \ No newline at end of file diff --git a/workloads/realworld/uvm/srad/run.sh b/workloads/realworld/uvm/srad/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..982fd1345383490dd093950055b359dc475480cc --- /dev/null +++ b/workloads/realworld/uvm/srad/run.sh @@ -0,0 +1,3 @@ +# ./srad 2048 2048 0 127 0 127 0.5 2 32 + +./srad 16384 16384 0 127 0 127 0.5 2 32 \ No newline at end of file diff --git a/workloads/realworld/uvm/srad/run_super.sh b/workloads/realworld/uvm/srad/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2d0f1b8ebd049e33bbb74a15722fc7172167641f --- /dev/null +++ b/workloads/realworld/uvm/srad/run_super.sh @@ -0,0 +1 @@ +./srad 32768 32768 0 127 0 127 0.5 2 8 \ No newline at end of file diff --git a/workloads/realworld/uvm/srad/srad.cu b/workloads/realworld/uvm/srad/srad.cu new file mode 100755 index 0000000000000000000000000000000000000000..af68954b6f11720a273a161079530e62c5cd799c --- /dev/null +++ b/workloads/realworld/uvm/srad/srad.cu @@ -0,0 +1,298 @@ +// includes, system +#include +#include +#include +#include +#include "srad.h" +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +// includes, project +#include + +// includes, kernels +#include "srad_kernel.cu" + +void random_matrix(float *I, int rows, int cols); +void runTest( int argc, char** argv); +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - number of rows\n"); + fprintf(stderr, "\t - number of cols\n"); + fprintf(stderr, "\t - y1 value of the speckle\n"); + fprintf(stderr, "\t - y2 value of the speckle\n"); + fprintf(stderr, "\t - x1 value of the speckle\n"); + fprintf(stderr, "\t - x2 value of the speckle\n"); + fprintf(stderr, "\t - lambda (0,1)\n"); + fprintf(stderr, "\t - number of iterations\n"); + + exit(1); +} +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE); + runTest( argc, argv); + + return EXIT_SUCCESS; +} + +void +runTest( int argc, char** argv) +{ + int rows, cols, size_I, size_R, niter = 10, iter, nblocks; + float *I, *J, lambda, q0sqr, sum, sum2, tmp, meanROI,varROI ; + +#ifdef CPU + float Jc, G2, L, num, den, qsqr; + int *iN,*iS,*jE,*jW, k; + float *dN,*dS,*dW,*dE; + float cN,cS,cW,cE,D; +#endif + +#ifdef GPU + + float *J_cuda; + float *C_cuda; + float *E_C, *W_C, *N_C, *S_C; + +#endif + + unsigned int r1, r2, c1, c2; + float *c; + + + + if (argc == 10) + { + rows = atoi(argv[1]); //number of rows in the domain + cols = atoi(argv[2]); //number of cols in the domain + if ((rows%16!=0) || (cols%16!=0)){ + fprintf(stderr, "rows and cols must be multiples of 16\n"); + exit(1); + } + r1 = atoi(argv[3]); //y1 position of the speckle + r2 = atoi(argv[4]); //y2 position of the speckle + c1 = atoi(argv[5]); //x1 position of the speckle + c2 = atoi(argv[6]); //x2 position of the speckle + lambda = atof(argv[7]); //Lambda value + niter = atoi(argv[8]); //number of iterations + nblocks = atoi(argv[9]); // number of blocks + } + else{ + usage(argc, argv); + } + + size_I = cols * rows; + size_R = (r2-r1+1)*(c2-c1+1); + + I = (float *)malloc( size_I * sizeof(float) ); + J = (float *)malloc( size_I * sizeof(float) ); + c = (float *)malloc(sizeof(float)* size_I) ; + + +#ifdef CPU + + iN = (int *)malloc(sizeof(unsigned int*) * rows) ; + iS = (int *)malloc(sizeof(unsigned int*) * rows) ; + jW = (int *)malloc(sizeof(unsigned int*) * cols) ; + jE = (int *)malloc(sizeof(unsigned int*) * cols) ; + + + dN = (float *)malloc(sizeof(float)* size_I) ; + dS = (float *)malloc(sizeof(float)* size_I) ; + dW = (float *)malloc(sizeof(float)* size_I) ; + dE = (float *)malloc(sizeof(float)* size_I) ; + + + for (int i=0; i< rows; i++) { + iN[i] = i-1; + iS[i] = i+1; + } + for (int j=0; j< cols; j++) { + jW[j] = j-1; + jE[j] = j+1; + } + iN[0] = 0; + iS[rows-1] = rows-1; + jW[0] = 0; + jE[cols-1] = cols-1; + +#endif + GPU_argv_init(); + initTrace(); + startCPU(); + +#ifdef GPU + + //Allocate device memory + cudaMallocManaged((void**)& J_cuda, sizeof(float)* size_I); + cudaMallocManaged((void **)&C_cuda, sizeof(float) * size_I); + cudaMallocManaged((void **)&E_C, sizeof(float) * size_I); + cudaMallocManaged((void **)&W_C, sizeof(float) * size_I); + cudaMallocManaged((void **)&S_C, sizeof(float) * size_I); + cudaMallocManaged((void **)&N_C, sizeof(float) * size_I); + +#endif + + printf("Randomizing the input matrix\n"); + //Generate a random matrix + random_matrix(I, rows, cols); + + for (int k = 0; k < size_I; k++ ) { + J[k] = (float)exp(I[k]) ; + } + printf("Start the SRAD main loop\n"); + for (iter=0; iter< niter; iter++){ + sum=0; sum2=0; + for (int i=r1; i<=r2; i++) { + for (int j=c1; j<=c2; j++) { + tmp = J[i * cols + j]; + sum += tmp ; + sum2 += tmp*tmp; + } + } + meanROI = sum / size_R; + varROI = (sum2 / size_R) - meanROI*meanROI; + q0sqr = varROI / (meanROI*meanROI); + +#ifdef CPU + + for (int i = 0 ; i < rows ; i++) { + for (int j = 0; j < cols; j++) { + + k = i * cols + j; + Jc = J[k]; + + // directional derivates + dN[k] = J[iN[i] * cols + j] - Jc; + dS[k] = J[iS[i] * cols + j] - Jc; + dW[k] = J[i * cols + jW[j]] - Jc; + dE[k] = J[i * cols + jE[j]] - Jc; + + G2 = (dN[k]*dN[k] + dS[k]*dS[k] + + dW[k]*dW[k] + dE[k]*dE[k]) / (Jc*Jc); + + L = (dN[k] + dS[k] + dW[k] + dE[k]) / Jc; + + num = (0.5*G2) - ((1.0/16.0)*(L*L)) ; + den = 1 + (.25*L); + qsqr = num/(den*den); + + // diffusion coefficent (equ 33) + den = (qsqr-q0sqr) / (q0sqr * (1+q0sqr)) ; + c[k] = 1.0 / (1.0+den) ; + + // saturate diffusion coefficent + if (c[k] < 0) {c[k] = 0;} + else if (c[k] > 1) {c[k] = 1;} + } + } + + for (int i = 0; i < rows; i++) { + for (int j = 0; j < cols; j++) { + + // current index + k = i * cols + j; + + // diffusion coefficent + cN = c[k]; + cS = c[iS[i] * cols + j]; + cW = c[k]; + cE = c[i * cols + jE[j]]; + + // divergence (equ 58) + D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k]; + + // image update (equ 61) + J[k] = J[k] + 0.25*lambda*D; + } + } + +#endif // CPU + + +#ifdef GPU + + //Currently the input size must be divided by 16 - the block size + + // ruihao + int block_x = cols/BLOCK_SIZE ; + int block_y = rows/BLOCK_SIZE ; + + if (nblocks > block_x) nblocks = block_x; + + dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE); + // dim3 dimGrid(block_x, block_y); + dim3 dimGrid(nblocks, nblocks); + // ruihao + + //Copy data from main memory to device memory + memcpy(J_cuda, J, sizeof(float) * size_I); + + //Run kernels + // srad_cuda_1<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr); + // srad_cuda_2<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, lambda, q0sqr); + srad_cuda_1<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr, cols / nblocks); + srad_cuda_2<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, lambda, q0sqr, cols / nblocks); + //Copy data from device memory to main memory + cudaDeviceSynchronize(); + memcpy(J, J_cuda, sizeof(float) * size_I); + +#endif +} + + cudaThreadSynchronize(); +#ifdef GPU + cudaFree(C_cuda); + cudaFree(J_cuda); + cudaFree(E_C); + cudaFree(W_C); + cudaFree(N_C); + cudaFree(S_C); +#endif + endCPU(); + finiTrace(); + +#ifdef OUTPUT + //Printing output + printf("Printing Output:\n"); + for( int i = 0 ; i < rows ; i++){ + for ( int j = 0 ; j < cols ; j++){ + printf("%.5f ", J[i * cols + j]); + } + printf("\n"); + } +#endif + + printf("Computation Done\n"); + + free(I); + free(J); +#ifdef CPU + free(iN); free(iS); free(jW); free(jE); + free(dN); free(dS); free(dW); free(dE); +#endif + free(c); + +} + + +void random_matrix(float *I, int rows, int cols){ + + srand(7); + + for( int i = 0 ; i < rows ; i++){ + for ( int j = 0 ; j < cols ; j++){ + I[i * cols + j] = rand()/(float)RAND_MAX ; + } + } + +} + diff --git a/workloads/realworld/uvm/srad/srad.h b/workloads/realworld/uvm/srad/srad.h new file mode 100755 index 0000000000000000000000000000000000000000..2b2adb6d956b697c5b0ace9bccb89162ef98be50 --- /dev/null +++ b/workloads/realworld/uvm/srad/srad.h @@ -0,0 +1,16 @@ +#define STR_SIZE 256 + +#ifdef RD_WG_SIZE_0_0 + #define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) + #define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) + #define BLOCK_SIZE RD_WG_SIZE +#else + #define BLOCK_SIZE 16 +#endif + +#define GPU +#define TIMER +//#define OUTPUT + diff --git a/workloads/realworld/uvm/srad/srad_kernel.cu b/workloads/realworld/uvm/srad/srad_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..a81386576462e82375b7826f9b203005cca803ab --- /dev/null +++ b/workloads/realworld/uvm/srad/srad_kernel.cu @@ -0,0 +1,316 @@ +#include "srad.h" +#include + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +__global__ void +srad_cuda_1( + float *E_C, + float *W_C, + float *N_C, + float *S_C, + float *J_cuda, + float *C_cuda, + int cols, + int rows, + float q0sqr, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // shared memory allocation + __shared__ float temp[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float temp_result[BLOCK_SIZE * BLOCK_SIZE]; + + __shared__ float north[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float south[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float east[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float west[BLOCK_SIZE * BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + // block id + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + // thread id + int tx = threadIdx.x; + int ty = threadIdx.y; + + // indices + int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; + int index_n = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + tx - cols; + int index_s = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * BLOCK_SIZE + tx; + int index_w = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty - 1; + int index_e = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + BLOCK_SIZE; + + if (index_n < 0) index_n = 0; + if (index_s >= (cols * rows)) index_s = cols * rows - 1; + if (index_w < 0) index_w = 0; + if (index_e >= (cols * rows)) index_e = cols * rows - 1; + + float n, w, e, s, jc, g2, l, num, den, qsqr, c; + + // load data to shared memory + north[ty * BLOCK_SIZE + tx] = J_cuda[index_n]; + south[ty * BLOCK_SIZE + tx] = J_cuda[index_s]; + if (by == 0) + { + north[ty * BLOCK_SIZE + tx] = J_cuda[BLOCK_SIZE * bx + tx]; + } + else if (by == tile_dim_x - 1) + { + south[ty * BLOCK_SIZE + tx] = J_cuda[cols * BLOCK_SIZE * (tile_dim_x - 1) + BLOCK_SIZE * bx + cols * (BLOCK_SIZE - 1) + tx]; + } + block.sync(); + + west[ty * BLOCK_SIZE + tx] = J_cuda[index_w]; + east[ty * BLOCK_SIZE + tx] = J_cuda[index_e]; + + if (bx == 0) + { + west[ty * BLOCK_SIZE + tx] = J_cuda[cols * BLOCK_SIZE * by + cols * ty]; + } + else if (bx == tile_dim_x - 1) + { + east[ty * BLOCK_SIZE + tx] = J_cuda[cols * BLOCK_SIZE * by + BLOCK_SIZE * (tile_dim_x - 1) + cols * ty + BLOCK_SIZE - 1]; + } + + block.sync(); + temp[ty * BLOCK_SIZE + tx] = J_cuda[index]; + + block.sync(); + + jc = temp[ty * BLOCK_SIZE + tx]; + + if (ty == 0 && tx == 0) + { // nw + n = north[ty * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = west[ty * BLOCK_SIZE + tx] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (ty == 0 && tx == BLOCK_SIZE - 1) + { // ne + n = north[ty * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = east[ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1 && tx == BLOCK_SIZE - 1) + { // se + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[ty * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = east[ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1 && tx == 0) + { // sw + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[ty * BLOCK_SIZE + tx] - jc; + w = west[ty * BLOCK_SIZE + tx] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + + else if (ty == 0) + { // n + n = north[ty * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (tx == BLOCK_SIZE - 1) + { // e + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = east[ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1) + { // s + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[ty * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (tx == 0) + { // w + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = west[ty * BLOCK_SIZE + tx] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + else + { // the data elements which are not on the borders + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + + g2 = (n * n + s * s + w * w + e * e) / (jc * jc); + + l = (n + s + w + e) / jc; + + num = (0.5 * g2) - ((1.0 / 16.0) * (l * l)); + den = 1 + (.25 * l); + qsqr = num / (den * den); + + // diffusion coefficent (equ 33) + den = (qsqr - q0sqr) / (q0sqr * (1 + q0sqr)); + c = 1.0 / (1.0 + den); + + // saturate diffusion coefficent + if (c < 0) + { + temp_result[ty * BLOCK_SIZE + tx] = 0; + } + else if (c > 1) + { + temp_result[ty * BLOCK_SIZE + tx] = 1; + } + else + { + temp_result[ty * BLOCK_SIZE + tx] = c; + } + + block.sync(); + + C_cuda[index] = temp_result[ty * BLOCK_SIZE + tx]; + E_C[index] = e; + W_C[index] = w; + S_C[index] = s; + N_C[index] = n; + } +} + +__global__ void +srad_cuda_2( + float *E_C, + float *W_C, + float *N_C, + float *S_C, + float *J_cuda, + float *C_cuda, + int cols, + int rows, + float lambda, + float q0sqr, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // shared memory allocation + __shared__ float south_c[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float east_c[BLOCK_SIZE * BLOCK_SIZE]; + + __shared__ float c_cuda_temp[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float c_cuda_result[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float temp[BLOCK_SIZE * BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + //block id + int bx = tile % tile_dim_x; + int by = tile / tile_dim_x; + + //thread id + int tx = threadIdx.x; + int ty = threadIdx.y; + + // indices + int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; + int index_s = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * BLOCK_SIZE + tx; + int index_e = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + BLOCK_SIZE; + + if (index_s >= (cols * rows)) index_s = cols * rows - 1; + if (index_e >= (cols * rows)) index_e = cols * rows - 1; + + float cc, cn, cs, ce, cw, d_sum; + + // load data to shared memory + temp[ty * BLOCK_SIZE + tx] = J_cuda[index]; + block.sync(); + + south_c[ty * BLOCK_SIZE + tx] = C_cuda[index_s]; + if (by == tile_dim_x - 1) + { + south_c[ty * BLOCK_SIZE + tx] = C_cuda[cols * BLOCK_SIZE * (tile_dim_x - 1) + BLOCK_SIZE * bx + cols * (BLOCK_SIZE - 1) + tx]; + } + block.sync(); + + east_c[ty * BLOCK_SIZE + tx] = C_cuda[index_e]; + if (bx == tile_dim_x - 1) + { + east_c[ty * BLOCK_SIZE + tx] = C_cuda[cols * BLOCK_SIZE * by + BLOCK_SIZE * (tile_dim_x - 1) + cols * ty + BLOCK_SIZE - 1]; + } + block.sync(); + + c_cuda_temp[ty * BLOCK_SIZE + tx] = C_cuda[index]; + block.sync(); + cc = c_cuda_temp[ty * BLOCK_SIZE + tx]; + + if (ty == BLOCK_SIZE - 1 && tx == BLOCK_SIZE - 1) + { // se + cn = cc; + cs = south_c[ty * BLOCK_SIZE + tx]; + cw = cc; + ce = east_c[ty * BLOCK_SIZE + tx]; + } + else if (tx == BLOCK_SIZE - 1) + { // e + cn = cc; + cs = c_cuda_temp[(ty + 1) * BLOCK_SIZE + tx]; + cw = cc; + ce = east_c[ty * BLOCK_SIZE + tx]; + } + else if (ty == BLOCK_SIZE - 1) + { // s + cn = cc; + cs = south_c[ty * BLOCK_SIZE + tx]; + cw = cc; + ce = c_cuda_temp[ty * BLOCK_SIZE + tx + 1]; + } + else + { // the data elements which are not on the borders + cn = cc; + cs = c_cuda_temp[(ty + 1) * BLOCK_SIZE + tx]; + cw = cc; + ce = c_cuda_temp[ty * BLOCK_SIZE + tx + 1]; + } + + // divergence (equ 58) + d_sum = cn * N_C[index] + cs * S_C[index] + cw * W_C[index] + ce * E_C[index]; + + // image update (equ 61) + c_cuda_result[ty * BLOCK_SIZE + tx] = temp[ty * BLOCK_SIZE + tx] + 0.25 * lambda * d_sum; + + block.sync(); + + J_cuda[index] = c_cuda_result[ty * BLOCK_SIZE + tx]; + } +} diff --git a/workloads/realworld/uvm_prefetch/BN/.clang-format b/workloads/realworld/uvm_prefetch/BN/.clang-format new file mode 100644 index 0000000000000000000000000000000000000000..3a5940ef65bf1e40df9511da805a7a0440184e84 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/BN/.clang-format @@ -0,0 +1,90 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: false +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: false +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IndentCaseLabels: false +IndentWidth: 2 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 8 +UseTab: Never +... + diff --git a/workloads/realworld/uvm_prefetch/BN/LICENSE b/workloads/realworld/uvm_prefetch/BN/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/BN/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/workloads/realworld/uvm_prefetch/BN/Makefile b/workloads/realworld/uvm_prefetch/BN/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..864b8e45401b0fe12162389c2e9d33fa86f4fc9f --- /dev/null +++ b/workloads/realworld/uvm_prefetch/BN/Makefile @@ -0,0 +1,169 @@ +################################################################################ +# +# Copyright 1993-2013 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Makefile project only supported on Mac OS X and Linux Platforms) +# +################################################################################ + +include ../../../common/make.config +include ./findcudalib.mk + +# Location of the CUDA Toolkit +CUDA_PATH ?= $(CUDA_DIR) + +# internal flags +NVCCFLAGS := -m${OS_SIZE} +CCFLAGS := -Wno-narrowing +NVCCLDFLAGS := +LDFLAGS := + +# Extra user flags +EXTRA_NVCCFLAGS ?= +EXTRA_NVCCLDFLAGS ?= +EXTRA_LDFLAGS ?= +EXTRA_CCFLAGS ?= + +# OS-specific build flags +ifneq ($(DARWIN),) + LDFLAGS += -rpath $(CUDA_PATH)/lib + CCFLAGS += -arch $(OS_ARCH) $(STDLIB) +else + ifeq ($(OS_ARCH),armv7l) + ifeq ($(abi),gnueabi) + CCFLAGS += -mfloat-abi=softfp + else + # default to gnueabihf + override abi := gnueabihf + LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3 + CCFLAGS += -mfloat-abi=hard + endif + endif +endif + +ifeq ($(ARMv7),1) +NVCCFLAGS += -target-cpu-arch ARM +ifneq ($(TARGET_FS),) +CCFLAGS += --sysroot=$(TARGET_FS) +LDFLAGS += --sysroot=$(TARGET_FS) +LDFLAGS += -rpath-link=$(TARGET_FS)/lib +LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib +LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-$(abi) +endif +endif + +# Debug build flags +ifeq ($(dbg),1) + NVCCFLAGS += -g -G + TARGET := debug +else + TARGET := release +endif + +ALL_CCFLAGS := +ALL_CCFLAGS += $(NVCCFLAGS) +ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS)) +ALL_CCFLAGS += $(EXTRA_NVCCFLAGS) +ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS)) + +ALL_LDFLAGS := +ALL_LDFLAGS += $(ALL_CCFLAGS) +ALL_LDFLAGS += $(NVCCLDFLAGS) +ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS)) +ALL_LDFLAGS += $(EXTRA_NVCCLDFLAGS) +ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS)) + +# Common includes and paths for CUDA +INCLUDES := -I../../common/inc -I$(INCLUDE) -I$(CUPTI_INCLUDE) +LIBRARIES := -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +################################################################################ + +# CUDA code generation flags +ifneq ($(OS_ARCH),armv7l) +GENCODE_SM10 := -gencode arch=compute_10,code=sm_10 +endif +GENCODE_SM20 := -gencode arch=compute_20,code=sm_20 +GENCODE_SM30 := -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=\"sm_35,compute_35\" +GENCODE_SM80 := -gencode arch=compute_80,code=sm_80 +GENCODE_FLAGS := $(GENCODE_SM80) + +################################################################################ + +# Target rules +all: build + +build: ordergraph_30 ordergraph_40 ordergraph_45 ordergraph_50 + +# ordergraph_25.o: ordergraph.cu ordergraph_kernel.cu +# $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_25 -o $@ -c $< + +ordergraph_30.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_30 -o $@ -c $< + +ordergraph_40.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_40 -o $@ -c $< + +ordergraph_45.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_45 -o $@ -c $< + +ordergraph_50.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_50 -o $@ -c $< + +# ordergraph_125.o: ordergraph.cu ordergraph_kernel.cu +# $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_125 -o $@ -c $< + + + +# ordergraph_25: ordergraph_25.o +# $(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES) + +ordergraph_30: ordergraph_30.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_40: ordergraph_40.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_45: ordergraph_45.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_50: ordergraph_50.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +# ordergraph_125: ordergraph_125.o +# $(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES) + +run: build + ./ordergraph + +clean: + rm -f ordergraph_30 ordergraph_40 ordergraph_45 ordergraph_50 *.o *.bin *.out + +clobber: clean diff --git a/workloads/realworld/uvm_prefetch/BN/README.md b/workloads/realworld/uvm_prefetch/BN/README.md new file mode 100644 index 0000000000000000000000000000000000000000..07158a0bd52af63032c860ff04e243e0d7c76ef1 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/BN/README.md @@ -0,0 +1,21 @@ +The code works with CUDA 6.0. +If you are using this code for your project, please cite [our paper](https://yuemmawang.github.io/publications/wang-tpds2016.pdf): + +``` +Wang Y, Qian W, Zhang S, et al. A Learning Algorithm for Bayesian Networks and +Its Efficient Implementation on GPUs[J]. Parallel and Distributed Systems, IEEE +Transactions on, 2016, 27(1): 17-30. +``` + +``` +@article{wang2015learning, + title={A learning algorithm for Bayesian networks and its efficient implementation on GPUs}, + author={Wang, Yu and Qian, Weikang and Zhang, Shuchang and Liang, Xiaoyao and Yuan, Bo}, + journal={IEEE Transactions on Parallel and Distributed Systems}, + volume={27}, + number={1}, + pages={17--30}, + year={2015}, + publisher={IEEE} +} +``` diff --git a/workloads/realworld/uvm_prefetch/BN/data125.cu b/workloads/realworld/uvm_prefetch/BN/data125.cu new file mode 100644 index 0000000000000000000000000000000000000000..6bb370a636a330992e083f0b52f1f67a9a86040e --- /dev/null +++ b/workloads/realworld/uvm_prefetch/BN/data125.cu @@ -0,0 +1,610 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=25; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,1, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,0,1,0, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,0,1,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,0,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,0,1,0,0,0,1, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,1,0,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,0,0,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,0,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,0, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,0,0,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,1,0, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,1,1,0,0,0,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,0,0,1,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,1,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,1,1, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,1,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,0,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,1,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,1,0, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,1,1,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,1,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,1,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,0,1,0,1,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,1, +} +#endif diff --git a/workloads/realworld/uvm_prefetch/BN/data25.cu b/workloads/realworld/uvm_prefetch/BN/data25.cu new file mode 100644 index 0000000000000000000000000000000000000000..6af94f79766c6e36ee121f6c537f987f841bf7c0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/BN/data25.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=25; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +} + +#endif + diff --git a/workloads/realworld/uvm_prefetch/BN/data30.cu b/workloads/realworld/uvm_prefetch/BN/data30.cu new file mode 100644 index 0000000000000000000000000000000000000000..bf89729e319e920533f9d134c8a2dff9aa4bc022 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/BN/data30.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=30; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +} + +#endif + diff --git a/workloads/realworld/uvm_prefetch/BN/data40.cu b/workloads/realworld/uvm_prefetch/BN/data40.cu new file mode 100644 index 0000000000000000000000000000000000000000..16d34d9dc4860d1dd24d604e501bccb43cae8095 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/BN/data40.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=40; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1, +} + +#endif + diff --git a/workloads/realworld/uvm_prefetch/BN/data45.cu b/workloads/realworld/uvm_prefetch/BN/data45.cu new file mode 100644 index 0000000000000000000000000000000000000000..b23e9a35e7c27948c0853710a06be462694df57d --- /dev/null +++ b/workloads/realworld/uvm_prefetch/BN/data45.cu @@ -0,0 +1,616 @@ +// The data are synthesized. +#include +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=45; +const int STATE_N=2; +const int DATA_N=600; + + + +int data[DATA_N*NODE_N]= { +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0, +} + + +#endif + diff --git a/workloads/realworld/uvm_prefetch/BN/data50.cu b/workloads/realworld/uvm_prefetch/BN/data50.cu new file mode 100644 index 0000000000000000000000000000000000000000..936a7aa4a67a1f949f1264477388a7eb5a93a1b4 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/BN/data50.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=50; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,0,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,1,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,0,0,1,1,1, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,1,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,1,0,0,0,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,1,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,0,1,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,0,1,0,0,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0, +} + +#endif + diff --git a/workloads/realworld/uvm_prefetch/BN/file_process.py b/workloads/realworld/uvm_prefetch/BN/file_process.py new file mode 100644 index 0000000000000000000000000000000000000000..eeebbee70e59153ca0f1a960f4e8ffa0437693c3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/BN/file_process.py @@ -0,0 +1,15 @@ +import os +filename = "data125.cu" +file_write = "data45.cu" + +f_w = open(file_write,"w") +with open(filename) as f: + content = f.readlines() + + for i, line in enumerate(content): + if i < 8: + f_w.write(line) + elif i >= 8: + f_w.write(line[0:90]) + f_w.write("\n") +f_w.close() \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/BN/findcudalib.mk b/workloads/realworld/uvm_prefetch/BN/findcudalib.mk new file mode 100644 index 0000000000000000000000000000000000000000..f40c2c38e5510fdee2fdf59df00160b547c056c1 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/BN/findcudalib.mk @@ -0,0 +1,226 @@ +################################################################################ +# +# Copyright 1993-2013 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# findcudalib.mk is used to find the locations for CUDA libraries and other +# Unix Platforms. This is supported Mac OS X and Linux. +# +################################################################################ + +# OS Name (Linux or Darwin) +OSUPPER = $(shell uname -s 2>/dev/null | tr "[:lower:]" "[:upper:]") +OSLOWER = $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]") + +# Flags to detect 32-bit or 64-bit OS platform +OS_SIZE = $(shell uname -m | sed -e "s/i.86/32/" -e "s/x86_64/64/" -e "s/armv7l/32/") +OS_ARCH = $(shell uname -m | sed -e "s/i386/i686/") + +# Determine OS platform and unix distribution +ifeq ("$(OSLOWER)","linux") + # first search lsb_release + DISTRO = $(shell lsb_release -i -s 2>/dev/null | tr "[:upper:]" "[:lower:]") + DISTVER = $(shell lsb_release -r -s 2>/dev/null) + ifeq ("$(DISTRO)",'') + # second search and parse /etc/issue + DISTRO = $(shell more /etc/issue | awk '{print $$1}' | sed '1!d' | sed -e "/^$$/d" 2>/dev/null | tr "[:upper:]" "[:lower:]") + DISTVER= $(shell more /etc/issue | awk '{print $$2}' | sed '1!d' 2>/dev/null + endif + ifeq ("$(DISTRO)",'') + # third, we can search in /etc/os-release or /etc/{distro}-release + DISTRO = $(shell awk '/ID/' /etc/*-release | sed 's/ID=//' | grep -v "VERSION" | grep -v "ID" | grep -v "DISTRIB") + DISTVER= $(shell awk '/DISTRIB_RELEASE/' /etc/*-release | sed 's/DISTRIB_RELEASE=//' | grep -v "DISTRIB_RELEASE") + endif +endif + +# search at Darwin (unix based info) +DARWIN = $(strip $(findstring DARWIN, $(OSUPPER))) +ifneq ($(DARWIN),) + SNOWLEOPARD = $(strip $(findstring 10.6, $(shell egrep "10\.6" /System/Library/CoreServices/SystemVersion.plist))) + LION = $(strip $(findstring 10.7, $(shell egrep "10\.7" /System/Library/CoreServices/SystemVersion.plist))) + MOUNTAIN = $(strip $(findstring 10.8, $(shell egrep "10\.8" /System/Library/CoreServices/SystemVersion.plist))) + MAVERICKS = $(strip $(findstring 10.9, $(shell egrep "10\.9" /System/Library/CoreServices/SystemVersion.plist))) +endif + +# Common binaries +GCC ?= g++ +CLANG ?= /usr/bin/clang + +ifeq ("$(OSUPPER)","LINUX") + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(GCC) +else + # for some newer versions of XCode, CLANG is the default compiler, so we need to include this + ifneq ($(MAVERICKS),) + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(CLANG) + STDLIB ?= -stdlib=libstdc++ + else + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(GCC) + endif +endif + +# Take command line flags that override any of these settings +ifeq ($(i386),1) + OS_SIZE = 32 + OS_ARCH = i686 +endif +ifeq ($(x86_64),1) + OS_SIZE = 64 + OS_ARCH = x86_64 +endif +ifeq ($(ARMv7),1) + OS_SIZE = 32 + OS_ARCH = armv7l +endif + +ifeq ("$(OSUPPER)","LINUX") + # Each Linux Distribuion has a set of different paths. This applies especially when using the Linux RPM/debian packages + ifeq ("$(DISTRO)","ubuntu") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","kubuntu") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","debian") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","suse") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","suse linux") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","opensuse") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","fedora") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","redhat") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","red") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","redhatenterpriseworkstation") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH ?= /usr/lib + endif + endif + ifeq ("$(DISTRO)","centos") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + + ifeq ($(ARMv7),1) + CUDAPATH := /usr/arm-linux-gnueabihf/lib + CUDALINK := -L/usr/arm-linux-gnueabihf/lib + ifneq ($(TARGET_FS),) + CUDAPATH += $(TARGET_FS)/usr/lib/nvidia-current + CUDALINK += -L$(TARGET_FS)/usr/lib/nvidia-current + endif + endif + + # Search for Linux distribution path for libcuda.so + CUDALIB ?= $(shell find $(CUDAPATH) $(DFLT_PATH) -name libcuda.so -print 2>/dev/null) + + ifeq ("$(CUDALIB)",'') + $(info >>> WARNING - CUDA Driver libcuda.so is not found. Please check and re-install the NVIDIA driver. <<<) + EXEC=@echo "[@]" + endif +else + # This would be the Mac OS X path if we had to do anything special +endif + diff --git a/workloads/realworld/uvm_prefetch/BN/ordergraph.cu b/workloads/realworld/uvm_prefetch/BN/ordergraph.cu new file mode 100644 index 0000000000000000000000000000000000000000..30061ec8f3c3395b03be36e159d455a531d77e86 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/BN/ordergraph.cu @@ -0,0 +1,757 @@ +#include +#include +#include +#include +#include +// #include +// includes CUDA +#include +// includes, kernels +#include "ordergraph_kernel.cu" +; + +#include "../../../common/cpu_timestamps.h" +#include "../../../common/cupti_add.h" + +const int HIGHEST = 3; +int taskperthr = 1; +int sizepernode; +int ITER = 100; + +// global var +float preScore = -99999999999.0f; +float score = 0.0; +float maxScore[HIGHEST] = {-999999999.0f}; +bool orders[NODE_N][NODE_N]; +bool preOrders[NODE_N][NODE_N]; +bool preGraph[NODE_N][NODE_N]; +bool bestGraph[HIGHEST][NODE_N][NODE_N]; +bool graph[NODE_N][NODE_N]; +// float prior[NODE_N][NODE_N]; +float *localscore, *D_localscore, *D_Score, *scores; +float *LG; +bool *D_parent; +int *D_resP, *parents; + +void initial(); // initial orders and data +int genOrders(); // swap +int ConCore(); // discard new order or not +bool getparent(int *bit, int *pre, int posN, int *parent, int *parN, + int time); // get every possible set of parents for a node +void incr(int *bit, int n); // binary code increases 1 each time +void incrS(int *bit, int n); // STATE_N code increases 1 each time +bool getState( + int parN, int *state, + int time); // get every possible combination of state for a parent set +float logGamma(int N); // log and gamma +float findBestGraph(); +void genScore(); +int convert(int *parent, int parN); +void sortGraph(); +void swap(int a, int b); +void Pre_logGamma(); +int findindex(int *arr, int size); +int C(int n, int a); + +FILE *fpout; + +int main(int argc, char *argv[]) { + /* + for(i=0;i maxScore[HIGHEST - 1]) { + maxScore[HIGHEST - 1] = preScore; + for (a = 0; a < NODE_N; a++) { + for (b = 0; b < NODE_N; b++) { + bestGraph[HIGHEST - 1][a][b] = preGraph[a][b]; + } + } + b = HIGHEST - 1; + for (a = HIGHEST - 2; a >= 0; a--) { + if (maxScore[b] > maxScore[a]) { + swap(a, b); + tmpd = maxScore[a]; + maxScore[a] = maxScore[b]; + maxScore[b] = tmpd; + b = a; + } + } + } + } + + } // endwhile + + cudaFreeHost(localscore); + cudaFree(D_localscore); + cudaFree(D_parent); + + cudaFreeHost(scores); + cudaFreeHost(parents); + cudaFree(D_Score); + cudaFree(D_resP); + + /* + for(j=0;j max) { + max = maxScore[i]; + maxi = i; + } + } + + swap(j, maxi); + tmp = maxScore[j]; + maxScore[j] = max; + maxScore[maxi] = tmp; + } +} + +void swap(int a, int b) { + int i, j; + bool tmp; + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + + tmp = bestGraph[a][i][j]; + bestGraph[a][i][j] = bestGraph[b][i][j]; + bestGraph[b][i][j] = tmp; + } + } +} + +void initial() { + int i, j, tmp, a, b, r; + bool tmpd; + tmp = 1; + for (i = 1; i <= 4; i++) { + tmp += C(NODE_N - 1, i); + } + sizepernode = tmp; + tmp *= NODE_N; + + cudaMallocHost((void **)&localscore, tmp * sizeof(float)); + + for (i = 0; i < tmp; i++) + localscore[i] = 0; + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) + orders[i][j] = 0; + } + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < i; j++) + orders[i][j] = 1; + } + r = rand() % 10000; + for (i = 0; i < r; i++) { + a = rand() % NODE_N; + b = rand() % NODE_N; + for (j = 0; j < NODE_N; j++) { + tmpd = orders[j][a]; + orders[j][a] = orders[j][b]; + orders[j][b] = tmpd; + } + + for (j = 0; j < NODE_N; j++) { + tmpd = orders[a][j]; + orders[a][j] = orders[b][j]; + orders[b][j] = tmpd; + } + } + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + preOrders[i][j] = orders[i][j]; + } + } +} + +// generate ramdom order +int genOrders() { + + int a, b, j; + bool tmp; + a = rand() % NODE_N; + b = rand() % NODE_N; + + for (j = 0; j < NODE_N; j++) { + tmp = orders[a][j]; + orders[a][j] = orders[b][j]; + orders[b][j] = tmp; + } + for (j = 0; j < NODE_N; j++) { + tmp = orders[j][a]; + orders[j][a] = orders[j][b]; + orders[j][b] = tmp; + } + + return 1; +} + +// decide leave or discard an order +int ConCore() { + int i, j; + float tmp; + tmp = log((rand() % 100000) / 100000.0); + if (tmp < (score - preScore)) { + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + preOrders[i][j] = orders[i][j]; + preGraph[i][j] = graph[i][j]; + } + } + preScore = score; + + return 1; + } + + return 0; +} + +void genScore() { + int *D_data; + float *D_LG; + dim3 grid(sizepernode / 256 + 1, 1, 1); + dim3 threads(256, 1, 1); + + Pre_logGamma(); + // cudaPrintfInit(); + cudaMallocManaged((void **)&D_data, NODE_N * DATA_N * sizeof(int)); + cudaMallocManaged((void **)&D_localscore, NODE_N * sizepernode * sizeof(float)); + cudaMallocManaged((void **)&D_LG, (DATA_N + 2) * sizeof(float)); + cudaMemset(D_localscore, 0.0, NODE_N * sizepernode * sizeof(float)); + memcpy(D_data, data, NODE_N * DATA_N * sizeof(int)); + memcpy(D_LG, LG, (DATA_N + 2) * sizeof(float)); + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + cudaMemPrefetchAsync(D_data, NODE_N * DATA_N * sizeof(int), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(D_LG, (DATA_N + 2) * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(D_localscore,NODE_N * sizepernode * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + genScoreKernel<<>>(sizepernode, D_localscore, D_data, D_LG); + cudaDeviceSynchronize(); + + memcpy(localscore, D_localscore, NODE_N * sizepernode * sizeof(float)); + + // cudaPrintfDisplay(stdout, true); + // cudaPrintfEnd(); + + cudaFreeHost(LG); + cudaFree(D_LG); + cudaFree(D_data); + + cudaMallocHost((void **)&scores, + (sizepernode / (256 * taskperthr) + 1) * sizeof(float)); + cudaMallocHost((void **)&parents, + (sizepernode / (256 * taskperthr) + 1) * 4 * sizeof(int)); + cudaMallocManaged((void **)&D_Score, + (sizepernode / (256 * taskperthr) + 1) * sizeof(float)); + cudaMallocManaged((void **)&D_parent, NODE_N * sizeof(bool)); + cudaMallocManaged((void **)&D_resP, + (sizepernode / (256 * taskperthr) + 1) * 4 * sizeof(int)); +} + +int convert(int *parent, int parN) { + int i, j, w = 1, tmp = 0; + j = 0; + for (i = 0; parN > 0 && i <= parent[parN - 1]; i++) { + if (parent[j] == i) { + j++; + tmp += w; + } + w *= 2; + } + + return tmp; +} + +void Pre_logGamma() { + + cudaMallocHost((void **)&LG, (DATA_N + 2) * sizeof(float)); + + LG[1] = log(1.0); + float i; + for (i = 2; i <= DATA_N + 1; i++) { + LG[(int)i] = LG[(int)i - 1] + log((float)i); + } +} + +void incr(int *bit, int n) { + + bit[n]++; + if (bit[n] >= 2) { + bit[n] = 0; + incr(bit, n + 1); + } + + return; +} + +void incrS(int *bit, int n) { + + bit[n]++; + if (bit[n] >= STATE_N) { + bit[n] = 0; + incr(bit, n + 1); + } + + return; +} + +bool getState(int parN, int *state, int time) { + int j = 1; + + j = pow(STATE_N, (float)parN) - 1; + + if (time > j) + return false; + + if (time >= 1) + incrS(state, 0); + + return true; +} + +bool getparent(int *bit, int *pre, int posN, int *parent, int *parN, int time) { + int i, j = 1; + + *parN = 0; + if (time == 0) + return true; + + for (i = 0; i < posN; i++) { + j = j * 2; + } + j--; + + if (time > j) + return false; + + incr(bit, 0); + + for (i = 0; i < posN; i++) { + if (bit[i] == 1) { + parent[(*parN)++] = pre[i]; + } + } + + return true; +} + +float findBestGraph() { + float bestls = -99999999; + int bestparent[5]; + int bestpN, total; + int node, index; + int pre[NODE_N] = {0}; + int parent[NODE_N] = {0}; + int posN = 0, i, j, parN, tmp, k, l; + float ls = -99999999999, score = 0; + int blocknum; + + for (i = 0; i < NODE_N; i++) + for (j = 0; j < NODE_N; j++) + graph[i][j] = 0; + + for (node = 0; node < NODE_N; node++) { + + bestls = -99999999; + posN = 0; + + for (i = 0; i < NODE_N; i++) { + if (orders[node][i] == 1) { + pre[posN++] = i; + } + } + + if (posN >= 0) { + total = C(posN, 4) + C(posN, 3) + C(posN, 2) + posN + 1; + taskperthr = 1; + blocknum = total / (256 * taskperthr) + 1; + + int nbatches = MIN_NBATCHES; + + int blocknum_max = total / (BLOCK_SIZE * MIN_NBATCHES * taskperthr) + 1; + if (blocknum_max >= MAX_NBLOCKS) { + blocknum = MAX_NBLOCKS; + nbatches = (total + 1) / (BLOCK_SIZE * MAX_NBLOCKS * taskperthr); + } else { + blocknum = blocknum_max; + } + + cudaMemset(D_resP, 0, blocknum * 4 * sizeof(int)); + cudaMemset(D_Score, -999999.0, blocknum * nbatches * sizeof(float)); + memcpy(D_parent, orders[node], NODE_N * sizeof(bool)); + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + cudaMemPrefetchAsync(D_parent, NODE_N * sizeof(bool), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(D_resP, blocknum * 4 * sizeof(int), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(D_Score, blocknum * nbatches * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + computeKernel<<>>( + taskperthr, sizepernode, D_localscore, D_parent, node, total, D_Score, + D_resP, nbatches); + cudaDeviceSynchronize(); + + memcpy(parents, D_resP, blocknum * 4 * sizeof(int)); + memcpy(scores, D_Score, blocknum * sizeof(float)); + + for (i = 0; i < blocknum * nbatches; i++) { + + if (scores[i] > bestls) { + + bestls = scores[i]; + + parN = 0; + for (tmp = 0; tmp < 4; tmp++) { + if (parents[i * 4 + tmp] < 0) + break; + + bestparent[tmp] = parents[i * 4 + tmp]; + + parN++; + } + + bestpN = parN; + } + } + } else { + if (posN >= 4) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + for (k = j + 1; k < posN; k++) { + for (l = k + 1; l < posN; l++) { + parN = 4; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + if (pre[k] > node) + parent[3] = pre[k]; + else + parent[3] = pre[k] + 1; + if (pre[l] > node) + parent[4] = pre[l]; + else + parent[4] = pre[l] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + } + } + + if (posN >= 3) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + for (k = j + 1; k < posN; k++) { + + parN = 3; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + if (pre[k] > node) + parent[3] = pre[k]; + else + parent[3] = pre[k] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + } + + if (posN >= 2) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + + parN = 2; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + + if (posN >= 1) { + for (i = 0; i < posN; i++) { + + parN = 1; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + + parN = 0; + index = sizepernode * node; + + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = 0; + } + } + if (bestls > -99999999) { + + for (i = 0; i < bestpN; i++) { + if (bestparent[i] < node) + graph[node][bestparent[i] - 1] = 1; + else + graph[node][bestparent[i]] = 1; + } + score += bestls; + } + } + + return score; +} + +int findindex(int *arr, int size) { // reminder: arr[0] has to be 0 && size == + // array size-1 && index start from 0 + int i, j, index = 0; + + for (i = 1; i < size; i++) { + index += C(NODE_N - 1, i); + } + + for (i = 1; i <= size - 1; i++) { + for (j = arr[i - 1] + 1; j <= arr[i] - 1; j++) { + index += C(NODE_N - 1 - j, size - i); + } + } + + index += arr[size] - arr[size - 1]; + + return index; +} + +int C(int n, int a) { + int i, res = 1, atmp = a; + + for (i = 0; i < atmp; i++) { + res *= n; + n--; + } + + for (i = 0; i < atmp; i++) { + res /= a; + a--; + } + + return res; +} \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/BN/ordergraph_kernel.cu b/workloads/realworld/uvm_prefetch/BN/ordergraph_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..1c2c3693aacaf1f9749b6cf5b7dafcb076406326 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/BN/ordergraph_kernel.cu @@ -0,0 +1,325 @@ +#ifndef _ORDERGRAPH_KERNEL_H_ +#define _ORDERGRAPH_KERNEL_H_ + +#include + +#ifdef DATA_25 +#include "data25.cu" +#endif +#ifdef DATA_30 +#include "data30.cu" +#endif +#ifdef DATA_40 +#include "data40.cu" +#endif +#ifdef DATA_45 +#include "data45.cu" +#endif +#ifdef DATA_50 +#include "data50.cu" +#endif +#ifdef DATA_125 +#include "data125.cu" +#endif +; + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define BLOCK_SIZE 256 +#define MAX_NBLOCKS 1024 +#define MIN_NBATCHES 16 + + +__device__ void Dincr(int *bit, int n); +__device__ void DincrS(int *bit, int n); +__device__ bool D_getState(int parN, int *sta, int time); +__device__ void D_findComb(int *comb, int l, int n); +__device__ int D_findindex(int *arr, int size); +__device__ int D_C(int n, int a); + +__global__ void genScoreKernel(int sizepernode, float *D_localscore, + int *D_data, float *D_LG) { + int id = blockIdx.x * BLOCK_SIZE + threadIdx.x; + int node, index; + bool flag; + int parent[5] = {0}; + int pre[NODE_N] = {0}; + int state[5] = {0}; + int i, j, parN = 0, tmp, t; + int t1 = 0, t2 = 0; + float ls = 0; + int Nij[STATE_N] = {0}; + + if (id < sizepernode) { + + D_findComb(parent, id, NODE_N - 1); + + for (i = 0; i < 4; i++) { + if (parent[i] > 0) + parN++; + } + + for (node = 0; node < NODE_N; node++) { + + j = 1; + for (i = 0; i < NODE_N; i++) { + if (i != node) + pre[j++] = i; + } + + for (tmp = 0; tmp < parN; tmp++) + state[tmp] = 0; + + index = sizepernode * node + id; + + // priors + /* + for(tmp=1;tmp<=4;tmp++){ + localscore[index]+=100*(prior[node][pre[parent[tmp]]]-0.5)*(prior[node][pre[parent[tmp]]]-0.5)*(prior[node][pre[parent[tmp]]]-0.5); + } + */ + t = 0; + while (D_getState(parN, state, t++)) { // for get state + // printf("test %u\n",id); + ls = 0; + for (tmp = 0; tmp < STATE_N; tmp++) + Nij[tmp] = 0; + + for (t1 = 0; t1 < DATA_N; t1++) { + flag = true; + for (t2 = 0; t2 < parN; t2++) { + if (D_data[t1 * NODE_N + pre[parent[t2]]] != state[t2]) { + flag = false; + break; + } + } + if (!flag) + continue; + + Nij[D_data[t1 * NODE_N + node]]++; + } + + tmp = STATE_N - 1; + + for (t1 = 0; t1 < STATE_N; t1++) { + ls += D_LG[Nij[t1]]; + tmp += Nij[t1]; + } + + ls -= D_LG[tmp]; + ls += D_LG[STATE_N - 1]; + + D_localscore[index] += ls; + } + } + } +} + +__global__ void computeKernel(int taskperthr, int sizepernode, + float *D_localscore, bool *D_parent, int node, + int total, float *D_Score, int *D_resP, int nbatches) { + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + __shared__ float lsinblock[BLOCK_SIZE]; + + for (int b = 0; b < nbatches; b++) { + unsigned int bid = blockIdx.x * nbatches + b; + unsigned int tid = threadIdx.x; + unsigned int id = bid * (BLOCK_SIZE * nbatches) + tid; + + int posN = 1, i, index, t, tmp; + int pre[NODE_N] = {0}; + int parN = 0; + int bestparent[4] = {0}, parent[5] = {-1}; + float bestls = -999999999999999, ls; + + for (i = 0; i < NODE_N; i++) { + if (D_parent[i] == 1) { + pre[posN++] = i; + } + } + + for (i = 0; i < taskperthr && ((id * taskperthr + i) < total); i++) { + + D_findComb(parent, id * taskperthr + i, posN); + + for (parN = 0; parN < 4; parN++) { + if (parent[parN] < 0) + break; + if (pre[parent[parN]] > node) + parent[parN] = pre[parent[parN]]; + else + parent[parN] = pre[parent[parN]] + 1; + } + + for (tmp = parN; tmp > 0; tmp--) { + parent[tmp] = parent[tmp - 1]; + } + parent[0] = 0; + + index = D_findindex(parent, parN); + index += sizepernode * node; + + ls = D_localscore[index]; + + if (ls > bestls) { + bestls = ls; + for (tmp = 0; tmp < 4; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + + lsinblock[tid] = bestls; + block.sync(); + + for (i = BLOCK_SIZE / 2; i >= 1; i /= 2) { + + if (tid < i) { + if (lsinblock[tid + i] > lsinblock[tid] && lsinblock[tid + i] < 0) { + lsinblock[tid] = lsinblock[tid + i]; + lsinblock[tid + i] = (float)(tid + i); + } else if (lsinblock[tid + i] < lsinblock[tid] && lsinblock[tid] < 0) { + lsinblock[tid + i] = (float)tid; + } else if (lsinblock[tid] > 0 && lsinblock[tid + i] < 0) { + lsinblock[tid] = lsinblock[tid + i]; + lsinblock[tid + i] = (float)(tid + i); + } else if (lsinblock[tid] < 0 && lsinblock[tid + i] > 0) { + lsinblock[tid + i] = (float)tid; + } + } + block.sync(); + } + block.sync(); + + if (tid == 0) { + D_Score[bid] = lsinblock[0]; + t = 0; + for (i = 0; i < 7 && t < 128 && t >= 0; i++) { + t = (int)lsinblock[(int)powf(2.0, i) + t]; + } + lsinblock[0] = (float)t; + } + block.sync(); + + if (tid == (int)lsinblock[0]) { + for (i = 0; i < 4; i++) { + D_resP[bid * 4 + i] = bestparent[i]; + } + } + + } +} + +__device__ void Dincr(int *bit, int n) { + + while (n <= NODE_N) { + bit[n]++; + if (bit[n] >= 2) { + bit[n] = 0; + n++; + } else { + break; + } + } + + return; +} + +__device__ void DincrS(int *bit, int n) { + + bit[n]++; + if (bit[n] >= STATE_N) { + bit[n] = 0; + Dincr(bit, n + 1); + } + + return; +} + +__device__ bool D_getState(int parN, int *sta, int time) { + int i, j = 1; + + for (i = 0; i < parN; i++) { + j *= STATE_N; + } + j--; + if (time > j) + return false; + + if (time >= 1) + DincrS(sta, 0); + + return true; +} + +__device__ void D_findComb(int *comb, int l, int n) { + const int len = 4; + if (l == 0) { + for (int i = 0; i < len; i++) + comb[i] = -1; + return; + } + int sum = 0; + int k = 1; + + while (sum < l) + sum += D_C(n, k++); + l -= sum - D_C(n, --k); + int low = 0; + int pos = 0; + while (k > 1) { + sum = 0; + int s = 1; + while (sum < l) + sum += D_C(n - s++, k - 1); + l -= sum - D_C(n - (--s), --k); + low += s; + comb[pos++] = low; + n -= s; + } + comb[pos] = low + l; + for (int i = pos + 1; i < 4; i++) + comb[i] = -1; +} + +__device__ int D_findindex(int *arr, + int size) { // reminder: arr[0] has to be 0 && size + // == array size-1 && index start from 0 + int i, j, index = 0; + + for (i = 1; i < size; i++) { + index += D_C(NODE_N - 1, i); + } + + for (i = 1; i <= size - 1; i++) { + for (j = arr[i - 1] + 1; j <= arr[i] - 1; j++) { + index += D_C(NODE_N - 1 - j, size - i); + } + } + + index += arr[size] - arr[size - 1]; + + return index; +} + +__device__ int D_C(int n, int a) { + int i, res = 1, atmp = a; + + for (i = 0; i < atmp; i++) { + res *= n; + n--; + } + + for (i = 0; i < atmp; i++) { + res /= a; + a--; + } + + return res; +} + +#endif diff --git a/workloads/realworld/uvm_prefetch/BN/run.sh b/workloads/realworld/uvm_prefetch/BN/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..f87817975334cff247b1bdf91651d41062aa8320 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/BN/run.sh @@ -0,0 +1,5 @@ +# ./ordergraph_25 +# ./ordergraph_30 +# ./ordergraph_40 +# ./ordergraph_45 +./ordergraph_50 \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/BN/run_super.sh b/workloads/realworld/uvm_prefetch/BN/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..5c45d88db0716b0b4b0828ba397cbd918d1612c0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/BN/run_super.sh @@ -0,0 +1 @@ +./ordergraph_50 diff --git a/workloads/realworld/uvm_prefetch/backprop/Makefile b/workloads/realworld/uvm_prefetch/backprop/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..286cd40af79bbb80b6d86aad9bd0d2c0d1a846e0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/backprop/Makefile @@ -0,0 +1,47 @@ +include ../../../common/make.config + +# C compiler +CC = gcc +CC_FLAGS = -g -O2 + +# CUDA compiler +NVCC = $(CUDA_DIR)/bin/nvcc +NVCC_FLAGS = -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -arch=sm_80 + +# 'make dbg=1' enables NVCC debugging +ifeq ($(dbg),1) + NVCC_FLAGS += -g -O0 +else + NVCC_FLAGS += -O2 +endif + +# 'make emu=1' compiles the CUDA kernels for emulation +ifeq ($(emu),1) + NVCC_FLAGS += -deviceemu +endif + + +backprop: backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + $(NVCC) $(NVCC_FLAGS) backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -o backprop $(NVCC_FLAGS) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +# backprop: backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp +# $(CC) $(CC_FLAGS) backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -o backprop -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +%.o: %.[ch] + $(CC) $(CC_FLAGS) $< -c + +facetrain.o: facetrain.c backprop.h + $(CC) $(CC_FLAGS) facetrain.c -c + +backprop.o: backprop.c backprop.h + $(CC) $(CC_FLAGS) backprop.c -c + +backprop_cuda.o: backprop_cuda.cu backprop.h $(CUPTI_ADD_COMMON)/cupti_add.h $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + $(NVCC) $(NVCC_FLAGS) -c backprop_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +imagenet.o: imagenet.c backprop.h + $(CC) $(CC_FLAGS) imagenet.c -c + + +clean: + rm -f *.o *~ backprop backprop_cuda.linkinfo diff --git a/workloads/realworld/uvm_prefetch/backprop/backprop.c b/workloads/realworld/uvm_prefetch/backprop/backprop.c new file mode 100644 index 0000000000000000000000000000000000000000..3a38f012b785f8cbaec7f9c33e9ae58b9ee92ae5 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/backprop/backprop.c @@ -0,0 +1,502 @@ +/* + ****************************************************************** + * HISTORY + * 15-Oct-94 Jeff Shufelt (js), Carnegie Mellon University + * Prepared for 15-681, Fall 1994. + * Modified by Shuai Che + ****************************************************************** + */ +#include +#include +#include +#include +#include "backprop.h" +#include +//#define OPEN + +#define ABS(x) (((x) > 0.0) ? (x) : (-(x))) + +#define fastcopy(to,from,len)\ +{\ + register char *_to,*_from;\ + register int _i,_l;\ + _to = (char *)(to);\ + _from = (char *)(from);\ + _l = (len);\ + for (_i = 0; _i < _l; _i++) *_to++ = *_from++;\ +} + +/*** Return random number between 0.0 and 1.0 ***/ +float drnd() +{ + return ((float) rand() / (float) BIGRND); +} + +/*** Return random number between -1.0 and 1.0 ***/ +float dpn1() +{ + return ((drnd() * 2.0) - 1.0); +} + +/*** The squashing function. Currently, it's a sigmoid. ***/ + +float squash(x) +float x; +{ + float m; + //x = -x; + //m = 1 + x + x*x/2 + x*x*x/6 + x*x*x*x/24 + x*x*x*x*x/120; + //return(1.0 / (1.0 + m)); + return (1.0 / (1.0 + exp(-x))); +} + + +/*** Allocate 1d array of floats ***/ + +float *alloc_1d_dbl(n) +int n; +{ + float *new; + + new = (float *) malloc ((unsigned) (n * sizeof (float))); + if (new == NULL) { + printf("ALLOC_1D_DBL: Couldn't allocate array of floats\n"); + return (NULL); + } + return (new); +} + + +/*** Allocate 2d array of floats ***/ + +float **alloc_2d_dbl(m, n) +int m, n; +{ + int i; + float **new; + + new = (float **) malloc ((unsigned) (m * sizeof (float *))); + if (new == NULL) { + printf("ALLOC_2D_DBL: Couldn't allocate array of dbl ptrs\n"); + return (NULL); + } + + for (i = 0; i < m; i++) { + new[i] = alloc_1d_dbl(n); + } + + return (new); +} + + +bpnn_randomize_weights(w, m, n) +float **w; +int m, n; +{ + int i, j; + + for (i = 0; i <= m; i++) { + for (j = 0; j <= n; j++) { + w[i][j] = (float) rand()/RAND_MAX; + // w[i][j] = dpn1(); + } + } +} + +bpnn_randomize_row(w, m) +float *w; +int m; +{ + int i; + for (i = 0; i <= m; i++) { + //w[i] = (float) rand()/RAND_MAX; + w[i] = 0.1; + } +} + + +bpnn_zero_weights(w, m, n) +float **w; +int m, n; +{ + int i, j; + + for (i = 0; i <= m; i++) { + for (j = 0; j <= n; j++) { + w[i][j] = 0.0; + } + } +} + + +void bpnn_initialize(seed) +{ + printf("Random number generator seed: %d\n", seed); + srand(seed); +} + + +BPNN *bpnn_internal_create(n_in, n_hidden, n_out) +int n_in, n_hidden, n_out; +{ + BPNN *newnet; + + newnet = (BPNN *) malloc (sizeof (BPNN)); + if (newnet == NULL) { + printf("BPNN_CREATE: Couldn't allocate neural network\n"); + return (NULL); + } + + newnet->input_n = n_in; + newnet->hidden_n = n_hidden; + newnet->output_n = n_out; + newnet->input_units = alloc_1d_dbl(n_in + 1); + newnet->hidden_units = alloc_1d_dbl(n_hidden + 1); + newnet->output_units = alloc_1d_dbl(n_out + 1); + + newnet->hidden_delta = alloc_1d_dbl(n_hidden + 1); + newnet->output_delta = alloc_1d_dbl(n_out + 1); + newnet->target = alloc_1d_dbl(n_out + 1); + + newnet->input_weights = alloc_2d_dbl(n_in + 1, n_hidden + 1); + newnet->hidden_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); + + newnet->input_prev_weights = alloc_2d_dbl(n_in + 1, n_hidden + 1); + newnet->hidden_prev_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); + + return (newnet); +} + + +void bpnn_free(net) +BPNN *net; +{ + int n1, n2, i; + + n1 = net->input_n; + n2 = net->hidden_n; + + free((char *) net->input_units); + free((char *) net->hidden_units); + free((char *) net->output_units); + + free((char *) net->hidden_delta); + free((char *) net->output_delta); + free((char *) net->target); + + for (i = 0; i <= n1; i++) { + free((char *) net->input_weights[i]); + free((char *) net->input_prev_weights[i]); + } + free((char *) net->input_weights); + free((char *) net->input_prev_weights); + + for (i = 0; i <= n2; i++) { + free((char *) net->hidden_weights[i]); + free((char *) net->hidden_prev_weights[i]); + } + free((char *) net->hidden_weights); + free((char *) net->hidden_prev_weights); + + free((char *) net); +} + + +/*** Creates a new fully-connected network from scratch, + with the given numbers of input, hidden, and output units. + Threshold units are automatically included. All weights are + randomly initialized. + + Space is also allocated for temporary storage (momentum weights, + error computations, etc). +***/ + +BPNN *bpnn_create(n_in, n_hidden, n_out) +int n_in, n_hidden, n_out; +{ + + BPNN *newnet; + + newnet = bpnn_internal_create(n_in, n_hidden, n_out); + +#ifdef INITZERO + bpnn_zero_weights(newnet->input_weights, n_in, n_hidden); +#else + bpnn_randomize_weights(newnet->input_weights, n_in, n_hidden); +#endif + bpnn_randomize_weights(newnet->hidden_weights, n_hidden, n_out); + bpnn_zero_weights(newnet->input_prev_weights, n_in, n_hidden); + bpnn_zero_weights(newnet->hidden_prev_weights, n_hidden, n_out); + bpnn_randomize_row(newnet->target, n_out); + return (newnet); +} + + +void bpnn_layerforward(l1, l2, conn, n1, n2) +float *l1, *l2, **conn; +int n1, n2; +{ + float sum; + int j, k; + + /*** Set up thresholding unit ***/ + l1[0] = 1.0; +#ifdef OPEN + omp_set_num_threads(NUM_THREAD); + #pragma omp parallel for shared(conn, n1, n2, l1) private(k, j) reduction(+: sum) schedule(static) +#endif + /*** For each unit in second layer ***/ + for (j = 1; j <= n2; j++) { + + /*** Compute weighted sum of its inputs ***/ + sum = 0.0; + for (k = 0; k <= n1; k++) { + sum += conn[k][j] * l1[k]; + } + l2[j] = squash(sum); + } +} + +//extern "C" +void bpnn_output_error(delta, target, output, nj, err) +float *delta, *target, *output, *err; +int nj; +{ + int j; + float o, t, errsum; + errsum = 0.0; + for (j = 1; j <= nj; j++) { + o = output[j]; + t = target[j]; + delta[j] = o * (1.0 - o) * (t - o); + errsum += ABS(delta[j]); + } + *err = errsum; +} + + +void bpnn_hidden_error(delta_h, + nh, + delta_o, + no, + who, + hidden, + err) +float *delta_h, *delta_o, *hidden, **who, *err; +int nh, no; +{ + int j, k; + float h, sum, errsum; + + errsum = 0.0; + for (j = 1; j <= nh; j++) { + h = hidden[j]; + sum = 0.0; + for (k = 1; k <= no; k++) { + sum += delta_o[k] * who[j][k]; + } + delta_h[j] = h * (1.0 - h) * sum; + errsum += ABS(delta_h[j]); + } + *err = errsum; +} + + +void bpnn_adjust_weights(delta, ndelta, ly, nly, w, oldw) +float *delta, *ly, **w, **oldw; +{ + float new_dw; + int k, j; + ly[0] = 1.0; + //eta = 0.3; + //momentum = 0.3; + +#ifdef OPEN + omp_set_num_threads(NUM_THREAD); + #pragma omp parallel for \ + shared(oldw, w, delta) \ + private(j, k, new_dw) \ + firstprivate(ndelta, nly, momentum) +#endif + for (j = 1; j <= ndelta; j++) { + for (k = 0; k <= nly; k++) { + new_dw = ((ETA * delta[j] * ly[k]) + (MOMENTUM * oldw[k][j])); + w[k][j] += new_dw; + oldw[k][j] = new_dw; + } + } +} + + +void bpnn_feedforward(net) +BPNN *net; +{ + int in, hid, out; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + + /*** Feed forward input activations. ***/ + bpnn_layerforward(net->input_units, net->hidden_units, + net->input_weights, in, hid); + bpnn_layerforward(net->hidden_units, net->output_units, + net->hidden_weights, hid, out); + +} + + +void bpnn_train(net, eo, eh) +BPNN *net; +float *eo, *eh; +{ + int in, hid, out; + float out_err, hid_err; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + + /*** Feed forward input activations. ***/ + bpnn_layerforward(net->input_units, net->hidden_units, + net->input_weights, in, hid); + bpnn_layerforward(net->hidden_units, net->output_units, + net->hidden_weights, hid, out); + + /*** Compute error on output and hidden units. ***/ + bpnn_output_error(net->output_delta, net->target, net->output_units, + out, &out_err); + bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, + net->hidden_weights, net->hidden_units, &hid_err); + *eo = out_err; + *eh = hid_err; + + /*** Adjust input and hidden weights. ***/ + bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, + net->hidden_weights, net->hidden_prev_weights); + bpnn_adjust_weights(net->hidden_delta, hid, net->input_units, in, + net->input_weights, net->input_prev_weights); + +} + + + + +void bpnn_save(net, filename) +BPNN *net; +char *filename; +{ + int n1, n2, n3, i, j, memcnt; + float dvalue, **w; + char *mem; + ///add// + FILE *pFile; + pFile = fopen( filename, "w+" ); + /////// + /* + if ((fd = creat(filename, 0644)) == -1) { + printf("BPNN_SAVE: Cannot create '%s'\n", filename); + return; + } + */ + + n1 = net->input_n; n2 = net->hidden_n; n3 = net->output_n; + printf("Saving %dx%dx%d network to '%s'\n", n1, n2, n3, filename); + //fflush(stdout); + + //write(fd, (char *) &n1, sizeof(int)); + //write(fd, (char *) &n2, sizeof(int)); + //write(fd, (char *) &n3, sizeof(int)); + + fwrite( (char *) &n1 , sizeof(char), sizeof(char), pFile); + fwrite( (char *) &n2 , sizeof(char), sizeof(char), pFile); + fwrite( (char *) &n3 , sizeof(char), sizeof(char), pFile); + + + + memcnt = 0; + w = net->input_weights; + mem = (char *) malloc ((unsigned) ((n1+1) * (n2+1) * sizeof(float))); + for (i = 0; i <= n1; i++) { + for (j = 0; j <= n2; j++) { + dvalue = w[i][j]; + fastcopy(&mem[memcnt], &dvalue, sizeof(float)); + memcnt += sizeof(float); + } + } + //write(fd, mem, (n1+1) * (n2+1) * sizeof(float)); + fwrite( mem , (unsigned)(sizeof(float)), (unsigned) ((n1+1) * (n2+1) * sizeof(float)) , pFile); + free(mem); + + memcnt = 0; + w = net->hidden_weights; + mem = (char *) malloc ((unsigned) ((n2+1) * (n3+1) * sizeof(float))); + for (i = 0; i <= n2; i++) { + for (j = 0; j <= n3; j++) { + dvalue = w[i][j]; + fastcopy(&mem[memcnt], &dvalue, sizeof(float)); + memcnt += sizeof(float); + } + } + //write(fd, mem, (n2+1) * (n3+1) * sizeof(float)); + fwrite( mem , sizeof(float), (unsigned) ((n2+1) * (n3+1) * sizeof(float)) , pFile); + free(mem); + + fclose(pFile); + return; +} + + +BPNN *bpnn_read(filename) +char *filename; +{ + char *mem; + BPNN *new; + int fd, n1, n2, n3, i, j, memcnt; + + if ((fd = open(filename, 0, 0644)) == -1) { + return (NULL); + } + + printf("Reading '%s'\n", filename); //fflush(stdout); + + read(fd, (char *) &n1, sizeof(int)); + read(fd, (char *) &n2, sizeof(int)); + read(fd, (char *) &n3, sizeof(int)); + new = bpnn_internal_create(n1, n2, n3); + + printf("'%s' contains a %dx%dx%d network\n", filename, n1, n2, n3); + printf("Reading input weights..."); //fflush(stdout); + + memcnt = 0; + mem = (char *) malloc ((unsigned) ((n1+1) * (n2+1) * sizeof(float))); + read(fd, mem, (n1+1) * (n2+1) * sizeof(float)); + for (i = 0; i <= n1; i++) { + for (j = 0; j <= n2; j++) { + fastcopy(&(new->input_weights[i][j]), &mem[memcnt], sizeof(float)); + memcnt += sizeof(float); + } + } + free(mem); + + printf("Done\nReading hidden weights..."); //fflush(stdout); + + memcnt = 0; + mem = (char *) malloc ((unsigned) ((n2+1) * (n3+1) * sizeof(float))); + read(fd, mem, (n2+1) * (n3+1) * sizeof(float)); + for (i = 0; i <= n2; i++) { + for (j = 0; j <= n3; j++) { + fastcopy(&(new->hidden_weights[i][j]), &mem[memcnt], sizeof(float)); + memcnt += sizeof(float); + } + } + free(mem); + close(fd); + + printf("Done\n"); //fflush(stdout); + + bpnn_zero_weights(new->input_prev_weights, n1, n2); + bpnn_zero_weights(new->hidden_prev_weights, n2, n3); + + return (new); +} diff --git a/workloads/realworld/uvm_prefetch/backprop/backprop.h b/workloads/realworld/uvm_prefetch/backprop/backprop.h new file mode 100644 index 0000000000000000000000000000000000000000..dfaafe39b76a1c9c1455e38bbe0a14d5461d5d2b --- /dev/null +++ b/workloads/realworld/uvm_prefetch/backprop/backprop.h @@ -0,0 +1,53 @@ +#ifndef _BACKPROP_H_ +#define _BACKPROP_H_ + +#define BIGRND 0x7fffffff + +#define GPU +#define THREADS 256 +#define WIDTH 16 // shared memory width +#define HEIGHT 16 // shared memory height + +#define ETA 0.3 //eta value +#define MOMENTUM 0.3 //momentum value +#define NUM_THREAD 4 //OpenMP threads + + +typedef struct { + int input_n; /* number of input units */ + int hidden_n; /* number of hidden units */ + int output_n; /* number of output units */ + + float *input_units; /* the input units */ + float *hidden_units; /* the hidden units */ + float *output_units; /* the output units */ + + float *hidden_delta; /* storage for hidden unit error */ + float *output_delta; /* storage for output unit error */ + + float *target; /* storage for target vector */ + + float **input_weights; /* weights from input to hidden layer */ + float **hidden_weights; /* weights from hidden to output layer */ + + /*** The next two are for momentum ***/ + float **input_prev_weights; /* previous change on input to hidden wgt */ + float **hidden_prev_weights; /* previous change on hidden to output wgt */ +} BPNN; + + +/*** User-level functions ***/ + +void bpnn_initialize(); + +BPNN *bpnn_create(); +void bpnn_free(); + +void bpnn_train(); +void bpnn_feedforward(); + +void bpnn_save(); +BPNN *bpnn_read(); + + +#endif diff --git a/workloads/realworld/uvm_prefetch/backprop/backprop_cuda.cu b/workloads/realworld/uvm_prefetch/backprop/backprop_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..24126be67ec657a2b560227d15fb8a96bde19893 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/backprop/backprop_cuda.cu @@ -0,0 +1,279 @@ + + +// includes, system +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +double t_start, t_end; + +// includes, kernels +#include "backprop_cuda_kernel.cu" +#include "backprop.h" + +//////////////////////////////////////////////////////////////////////////////// + +extern "C" void bpnn_layerforward(float *l1, float *l2, float **conn, int n1, int n2); + +extern "C" void bpnn_output_error(float *delta, float *target, float *output, int nj, float *err); + +extern "C" void bpnn_hidden_error(float *delta_h, int nh, float *delta_o, int no, float **who, float *hidden, float *err); + +extern "C" void bpnn_adjust_weights(float *delta, int ndelta, float *ly, int nly, float **w, float **oldw); + +extern "C" int setup(int argc, char **argv); + +extern "C" float **alloc_2d_dbl(int m, int n); + +extern "C" float squash(float x); + +double gettime() +{ + struct timeval t; + gettimeofday(&t, NULL); + return t.tv_sec + t.tv_usec * 1e-6; +} + +unsigned int num_threads = 0; +unsigned int num_blocks = 0; + +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + GPU_argv_init(); + + initTrace(); + startCPU(); + + num_blocks = atoi(argv[2]); + setup(argc, argv); +} + +extern "C" void bpnn_train_cuda(BPNN *net, float *eo, float *eh) +{ + int in, hid, out; + float out_err, hid_err; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + +#ifdef GPU + int m = 0; + float *input_hidden_cuda; + float *input_cuda; + float *output_hidden_cuda; + float *partial_sum; + float *hidden_partial_sum; + float *hidden_delta_cuda; + float *input_prev_weights_cuda; + float sum; + float *input_weights_one_dim; + float *input_weights_prev_one_dim; + // ruihao + // num_blocks = in / 16; + // dim3 grid(1, num_blocks); + // dim3 threads(16, 16); + + int tile_size = in / num_blocks; + dim3 grid(1, num_blocks); + dim3 threads(16, 16); + // ruihao + + input_weights_one_dim = (float *)malloc((in + 1) * (hid + 1) * sizeof(float)); + input_weights_prev_one_dim = (float *)malloc((in + 1) * (hid + 1) * sizeof(float)); + // ruihao + // partial_sum = (float *) malloc(num_blocks * WIDTH * sizeof(float)); + partial_sum = (float *)malloc(in * sizeof(float)); + // ruihao + + // this preprocessing stage is added to correct the bugs of wrong memcopy using two-dimensional net->inputweights + for (int k = 0; k <= in; k++) + { + for (int j = 0; j <= hid; j++) + { + input_weights_one_dim[m] = net->input_weights[k][j]; + input_weights_prev_one_dim[m] = net->input_prev_weights[k][j]; + m++; + } + } + + // GPU_argv_init(); + + // initTrace(); + // startCPU(); + + cudaMallocManaged((void **)&input_cuda, (in + 1) * sizeof(float)); + cudaMallocManaged((void **)&output_hidden_cuda, (hid + 1) * sizeof(float)); + cudaMallocManaged((void **)&input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float)); + // ruihao + // cudaMalloc((void**) &hidden_partial_sum, num_blocks * WIDTH * sizeof(float)); + cudaMallocManaged((void **)&hidden_partial_sum, in * sizeof(float)); + // ruihao + +#endif + +#ifdef CPU + + printf("Performing CPU computation\n"); + bpnn_layerforward(net->input_units, net->hidden_units, net->input_weights, in, hid); + +#endif + +#ifdef GPU + + //printf("Performing GPU computation\n"); + + memcpy(input_cuda, net->input_units, (in + 1) * sizeof(float)); + memcpy(input_hidden_cuda, input_weights_one_dim, (in + 1) * (hid + 1) * sizeof(float)); + + // ruihao + //t_start = rtclock(); + // ruihao + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(input_cuda, (in + 1) * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(hidden_partial_sum, in * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + bpnn_layerforward_CUDA<<>>(input_cuda, + output_hidden_cuda, + input_hidden_cuda, + hidden_partial_sum, + in, + hid, + tile_size); + + cudaDeviceSynchronize(); + + // ruihao + //t_end = rtclock(); + //fprintf(stdout, "bpnn_layerforward_CUDA GPU Runtime: %0.6lfs\n", t_end - t_start); + memcpy(partial_sum, hidden_partial_sum, in * sizeof(float)); + // ruihao + + cudaError_t error = cudaGetLastError(); + if (error != cudaSuccess) + { + printf("bpnn kernel error: %s\n", cudaGetErrorString(error)); + exit(EXIT_FAILURE); + } + + for (int j = 1; j <= hid; j++) + { + sum = 0.0; + // ruihao + // for (int k = 0; k < num_blocks; k++) { + // sum += partial_sum[k * hid + j-1] ; + // } + for (int k = 0; k < in / WIDTH; k++) + { + sum += partial_sum[k * hid + j - 1]; + } + // ruihao + sum += net->input_weights[0][j]; + net->hidden_units[j] = float(1.0 / (1.0 + exp(-sum))); + } +#endif + + bpnn_layerforward(net->hidden_units, net->output_units, net->hidden_weights, hid, out); + bpnn_output_error(net->output_delta, net->target, net->output_units, out, &out_err); + bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, net->hidden_weights, net->hidden_units, &hid_err); + bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, net->hidden_weights, net->hidden_prev_weights); + +#ifdef CPU + + bpnn_adjust_weights(net->hidden_delta, hid, net->input_units, in, net->input_weights, net->input_prev_weights); + +#endif + +#ifdef GPU + + cudaMallocManaged((void **)&hidden_delta_cuda, (hid + 1) * sizeof(float)); + cudaMallocManaged((void **)&input_prev_weights_cuda, (in + 1) * (hid + 1) * sizeof(float)); + // ruihao + //t_start = rtclock(); + memcpy(hidden_delta_cuda, net->hidden_delta, (hid + 1) * sizeof(float)); + memcpy(input_prev_weights_cuda, input_weights_prev_one_dim, (in + 1) * (hid + 1) * sizeof(float)); + memcpy(input_hidden_cuda, input_weights_one_dim, (in + 1) * (hid + 1) * sizeof(float)); + + cudaStream_t stream4; + cudaStream_t stream5; + cudaStream_t stream6; + cudaStream_t stream7; + cudaStreamCreate(&stream4); + cudaStreamCreate(&stream5); + cudaStreamCreate(&stream6); + cudaStreamCreate(&stream7); + + cudaMemPrefetchAsync(hidden_delta_cuda, (hid + 1) * sizeof(float), GPU_DEVICE, stream4); + cudaStreamSynchronize(stream4); + cudaMemPrefetchAsync(input_prev_weights_cuda, (in + 1) * (hid + 1) * sizeof(float), GPU_DEVICE, stream5); + cudaStreamSynchronize(stream5); + cudaMemPrefetchAsync(input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float), GPU_DEVICE, stream6); + cudaStreamSynchronize(stream6); + cudaMemPrefetchAsync(input_cuda, (in + 1) * sizeof(float), GPU_DEVICE, stream7); + cudaStreamSynchronize(stream7); + + // ruihao + bpnn_adjust_weights_cuda<<>>(hidden_delta_cuda, + hid, + input_cuda, + in, + input_hidden_cuda, + input_prev_weights_cuda, + tile_size); + // ruihao + cudaDeviceSynchronize(); + //t_end = rtclock(); + memcpy(net->input_units, input_cuda, (in + 1) * sizeof(float)); + memcpy(input_weights_one_dim, input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float)); + //fprintf(stdout, "bpnn_adjust_weights_cuda GPU Runtime: %0.6lfs\n", t_end - t_start); + // ruihao + + cudaFree(input_cuda); + cudaFree(output_hidden_cuda); + cudaFree(input_hidden_cuda); + cudaFree(hidden_partial_sum); + cudaFree(input_prev_weights_cuda); + cudaFree(hidden_delta_cuda); + + endCPU(); + finiTrace(); + + free(partial_sum); + free(input_weights_one_dim); + free(input_weights_prev_one_dim); + +#endif +} diff --git a/workloads/realworld/uvm_prefetch/backprop/backprop_cuda_kernel.cu b/workloads/realworld/uvm_prefetch/backprop/backprop_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..27f07767e27b29a189b99a1a0c6010ad2ee032e6 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/backprop/backprop_cuda_kernel.cu @@ -0,0 +1,110 @@ + + +#ifndef _BACKPROP_CUDA_KERNEL_H_ +#define _BACKPROP_CUDA_KERNEL_H_ + +#include +#include "backprop.h" +#include "math.h" +#include "cuda.h" + +#include +#include + +__global__ void +bpnn_layerforward_CUDA(float *input_cuda, + float *output_hidden_cuda, + float *input_hidden_cuda, + float *hidden_partial_sum, + int in, + int hid, + int tile_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + int by = blockIdx.y; + int tx = threadIdx.x; + int ty = threadIdx.y; + + int batches = tile_size / WIDTH; + + __shared__ float input_node[HEIGHT]; + __shared__ float weight_matrix[HEIGHT * WIDTH]; + + for (int b = 0; b < batches; b++) + { + int index = (hid + 1) * HEIGHT * (batches * by + b) + (hid + 1) * ty + tx + 1 + (hid + 1); + + int index_in = HEIGHT * (batches * by + b) + ty + 1; + + if (tx == 0) + input_node[ty] = input_cuda[index_in]; + + block.sync(); + + weight_matrix[ty * WIDTH + tx] = input_hidden_cuda[index]; + + block.sync(); + + weight_matrix[ty * WIDTH + tx] = weight_matrix[ty * WIDTH + tx] * input_node[ty]; + + block.sync(); + + for (int i = 1; i <= __log2f(HEIGHT); i++) + { + + int power_two = __powf(2, i); + + if (ty % power_two == 0) + weight_matrix[ty * WIDTH + tx] += weight_matrix[(ty + power_two / 2) * WIDTH + tx]; + + block.sync(); + } + + input_hidden_cuda[index] = weight_matrix[ty * WIDTH + tx]; + + block.sync(); + + if (tx == 0) + { + hidden_partial_sum[(batches * by + b) * hid + ty] = weight_matrix[tx * WIDTH + ty]; + } + } +} + +__global__ void bpnn_adjust_weights_cuda(float *delta, + int hid, + float *ly, + int in, + float *w, + float *oldw, + int tile_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + int by = blockIdx.y; + + int tx = threadIdx.x; + int ty = threadIdx.y; + + int batches = tile_size / WIDTH; + + for (int b = 0; b < batches; b++) + { + int index = (hid + 1) * HEIGHT * (batches * by + b) + (hid + 1) * ty + tx + 1 + (hid + 1); + int index_y = HEIGHT * (batches * by + b) + ty + 1; + int index_x = tx + 1; + // eta = 0.3; + // momentum = 0.3; + + w[index] += ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index])); + oldw[index] = ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index])); + + block.sync(); + + if (ty == 0 && by == 0 && b == 0) + { + w[index_x] += ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x])); + oldw[index_x] = ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x])); + } + } +} +#endif diff --git a/workloads/realworld/uvm_prefetch/backprop/facetrain.c b/workloads/realworld/uvm_prefetch/backprop/facetrain.c new file mode 100644 index 0000000000000000000000000000000000000000..cbf83810934b68551d7dd4b7b94fda5eb6837276 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/backprop/facetrain.c @@ -0,0 +1,54 @@ + +#include +#include +#include +#include +#include "backprop.h" +#include "omp.h" + +extern char *strcpy(); +extern void exit(); + +int layer_size = 0; + +backprop_face() +{ + BPNN *net; + int i; + float out_err, hid_err; + net = bpnn_create(layer_size, 16, 1); // (16, 1 can not be changed) + + printf("Input layer size : %d\n", layer_size); + load(net); + // entering the training kernel, only one iteration + printf("Starting training kernel\n"); + bpnn_train_cuda(net, &out_err, &hid_err); + bpnn_free(net); + printf("Training done\n"); +} + +int setup(argc, argv) +int argc; +char *argv[]; +{ + + int seed; + + if (argc != 3) + { + fprintf(stderr, "usage: backprop \n"); + exit(0); + } + layer_size = atoi(argv[1]); + if (layer_size % 16 != 0) + { + fprintf(stderr, "The number of input points must be divided by 16\n"); + exit(0); + } + + seed = 7; + bpnn_initialize(seed); + backprop_face(); + + exit(0); +} diff --git a/workloads/realworld/uvm_prefetch/backprop/imagenet.c b/workloads/realworld/uvm_prefetch/backprop/imagenet.c new file mode 100644 index 0000000000000000000000000000000000000000..255b0d5d8ca67508f6732e266299e7c58012906f --- /dev/null +++ b/workloads/realworld/uvm_prefetch/backprop/imagenet.c @@ -0,0 +1,24 @@ + +#include +#include +#include "backprop.h" + +extern layer_size; + +load(net) +BPNN *net; +{ + float *units; + int nr, nc, imgsize, i, j, k; + + nr = layer_size; + + imgsize = nr * nc; + units = net->input_units; + + k = 1; + for (i = 0; i < nr; i++) { + units[k] = (float) rand()/RAND_MAX ; + k++; + } +} diff --git a/workloads/realworld/uvm_prefetch/backprop/run.sh b/workloads/realworld/uvm_prefetch/backprop/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..874cbb88032622578f319cce3800a3793151cb92 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/backprop/run.sh @@ -0,0 +1,5 @@ +# ./backprop 524288 +# ./backprop 8388608 128 + +# ./backprop 66708864 128 +./backprop 66708864 1024 \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/backprop/run_super.sh b/workloads/realworld/uvm_prefetch/backprop/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..d9e8a3d42354f597bbcd153d180d9b23bed71192 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/backprop/run_super.sh @@ -0,0 +1,3 @@ +# ./backprop 1073741824 1024 +# ./backprop 134217728 1024 +./backprop 67108864 1024 \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/darknet/LICENSE b/workloads/realworld/uvm_prefetch/darknet/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..a50f7d700ba02bfacd50f59b315311cf4d0bbda2 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/LICENSE @@ -0,0 +1,12 @@ + YOLO LICENSE + Version 2, July 29 2016 + +THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER +SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN +TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES +LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY. NOW HERE'S +THE REAL LICENSE: + +0. Darknet is public domain. +1. Do whatever you want with it. +2. Stop emailing me about it! diff --git a/workloads/realworld/uvm_prefetch/darknet/LICENSE.fuck b/workloads/realworld/uvm_prefetch/darknet/LICENSE.fuck new file mode 100644 index 0000000000000000000000000000000000000000..8b1a9d8189b3b9f4479221d52882ce36fdc73a62 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/LICENSE.fuck @@ -0,0 +1,13 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + +Copyright (C) 2004 Sam Hocevar + +Everyone is permitted to copy and distribute verbatim or modified +copies of this license document, and changing it is allowed as long +as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. diff --git a/workloads/realworld/uvm_prefetch/darknet/LICENSE.gen b/workloads/realworld/uvm_prefetch/darknet/LICENSE.gen new file mode 100644 index 0000000000000000000000000000000000000000..c54113271e15057c4def6676693eb96fd6362b28 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/LICENSE.gen @@ -0,0 +1,91 @@ +RNN LICENSE Version 3, June 21 2017 + +Copyright (c) 1990, 1989, 1999 Free87337 May 48 THIRD PARTIES OR ANY OTHER THE +COMPLAIN OR CONSEQUENTIAL DAMAGES AND REGARDLESS OF WHETHER IN CONTRACT, TO THE +EXTENT REPAIR OR AGENTS (NOT THE IN ANY EVENT). THE SOFTWARE WILL BE +UNINTERRUPTED OR ERROR-FREE OR ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF ALL THE WORK (GOVERNED CODE) HIM RESPONSES, OR OF FINES, +SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR ANY OTHER OR OTHER HARL UNDER NO +CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), +PATENT PERMITTED BY THE INSTAGRAM PARENT STATE OR TORT (INCLUDING NEGLIGENCE), +PRODUCT LIABILITY OR OTHERWISE, ARISING OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR ANYTHING PROVIDED IN THIS PRODUCT, COMMIS AND SERVICES +ARE LICENSED SOFTWARE AND ANY RESULE OR ANY OTHER THE COPYRIGHT HOLDERS BE +LIABLE FOR ANY SPECIAL, INCIDENTAL, CASE, SUCH WARRANTIES, EXPRESS OR IMPLIED, +INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COPYRIGHT HOLDERS AND/OR ANY +PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY +EXPRESS OR DISTRIBUTE THAT ALL CLAIMS ARE SHALL CREATE DERAVE BE LIABLE TO YOU +WILL HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +6\. TERMINATION. TO THE EXTENT PERMITTED BY LAW, NO USE OF THE COVERED CODE IS +WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE +INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY +SERVICING, REPAIR OR COULT OR IN ANY WAY OUT OF THE USE OF THE WEBSITES OR +SERVICE WILL BE CONSEQUENTIAL DAMAGES OF ANY KIND HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + +This paragraph Agreement constitutes the entire agreement between the parties +with respect to the Work licensed here. However, if you place the name of the +fact that the arbitration was the consultation of the parties as a "patent is". +Subject to the terms and conditions of this License, Contributor has knowledge +that a license under a third party may also be used to endorse or promote +products derived from the Work, and there is no warranty on the Software and +Science Fees. For the purposes of this Agreement, attach the following +disclaimers (without liabilities of written notice to the Subject Software) in a +manner that a product is under common control with you. The Free Software +Foundation may publish revised and/or new versions of the License for the +Modifications made by the applicable terms. The Recipient shall promptly retain +the covered works for any reason be entered in any federal or state or login +Restricted Laws appearing in the United States or any of its own information +that is not disabled from a derivative work except as expressly permitted in +this License, to the extent that they are in receiving the Software and Source +Code or any exercise of the rights granted to You by this License or a +Contributor made by the Licensor or are authorized to make a reasonable +retirement by the courts of the courts located in Santa Clara County, California +printed and related to the Work or “Company” and Apache Software Foundation. If +the Licensor shall be entitled to reflect your rights to use the Software and +the Software to exercise the rights granted to the recipient without a +requirement to exercise the rights granted by the Agreement to the provision +will begin will appear in such cases, you will use such information without such +corporation shall be an officer with respect to any part of the Software or any +portion thereof. Capitalized terms are included in the Initial Contributor and +under no circumstances will license the Service at any time and for any direct, +indirect, special, incidental, or consequential damages of or assist in +connection with any Services or the registration purposes only to the extent +that it includes any or all means including the processing of which you download +any derivative work. Any of the purchases’ transmission purposes are made +available, if any, in other circumstances, we may review the copyright notice. +In the event that this Agreement is required to give us strict content. The +inclusion of the other party hereunder may also notify you Intellectual Property +Rights to any third party. This means that the Source Code exists of the Work +will not charge a program available to you at any time. You must include a +prominent statement that the Software is governed under a particular version of +this Agreement. You must include a provision to the extent that there is no +warranty for the content of others. You agree that the Recipient was appointed +as a Contributor, (c) are effective until terminated by hereunder, then the +registration are not disabled and not limited to, submit any Customer Data +without the updated use of the Software and that no fee is released. You grant +to Use Other Arbitration Rules for Diagnostic or Services may use or modify the +Apple Software and Consolidated Apple Software or Services. The Company may have +full risk as a product of the Compatible Source. A Contribution by the Licensor +or by the updated Software under the following conditions we can redistribute +any General Provision of this Agreement. If the Program is used in accordance +with the terms of this Agreement, Customer may provide advertisements from your +devices that clause you can your employer or a transaction or country that has +been controlled by the arbitrator, that they will be useful of this Agreement. +The term "Open Source Software is available in connection with the program, and +you may not protect the combination of the Covered Code. You should like to +select a user's rights to charge a copy of this License. I are Contributor's +confidentiality of the exercise of the rights granted herein. Such a covered +work is released as a consequence, the Licensor shall be eligible for a purpose +or subcontractor of the person or entity to the user of the user, then the word +"Application" means having the original fee for any reason; and that no patent +license to more than fifty stated close of the license term. The terms of this +License will the license terms and conditions set forth in Section 2.2 (OPEC) +and You will not use the Software or any set of responsibility for any resulting +information that the Original Code warrants that you have the right to disclose +these information (or in the notification; or (iii) late use of the software or +any third party to the three (50) days before such belief to the extent that it +includes a court court obtains the rights granted by this License. diff --git a/workloads/realworld/uvm_prefetch/darknet/LICENSE.gpl b/workloads/realworld/uvm_prefetch/darknet/LICENSE.gpl new file mode 100644 index 0000000000000000000000000000000000000000..9cecc1d4669ee8af2ca727a5d8cde10cd8b2d7cc --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/LICENSE.gpl @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + {one line to give the program's name and a brief idea of what it does.} + Copyright (C) {year} {name of author} + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + {project} Copyright (C) {year} {fullname} + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/workloads/realworld/uvm_prefetch/darknet/LICENSE.meta b/workloads/realworld/uvm_prefetch/darknet/LICENSE.meta new file mode 100644 index 0000000000000000000000000000000000000000..6728bd28d319c68ae04944fb034118dcc4c9aa09 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/LICENSE.meta @@ -0,0 +1,8 @@ + META-LICENSE + Version 1, June 21 2017 + +Any and all licenses may be applied to the software either individually +or in concert. Any issues, ambiguities, paradoxes, or metaphysical quandries +arising from this combination should be discussed with a local faith leader, +hermit, or guru. The Oxford comma shall be used. + diff --git a/workloads/realworld/uvm_prefetch/darknet/LICENSE.mit b/workloads/realworld/uvm_prefetch/darknet/LICENSE.mit new file mode 100644 index 0000000000000000000000000000000000000000..5bd806ce16ea5053c8631793787362439375026e --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/LICENSE.mit @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2017 Joseph Redmon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/workloads/realworld/uvm_prefetch/darknet/LICENSE.v1 b/workloads/realworld/uvm_prefetch/darknet/LICENSE.v1 new file mode 100644 index 0000000000000000000000000000000000000000..5b8709acc43e7b76ed69758a52a9eaffaba775e6 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/LICENSE.v1 @@ -0,0 +1,13 @@ + YOLO LICENSE + Version 1, July 10 2015 + +THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER +SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN +TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES +LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY SUBJECT TO +THE FOLLOWING CONDITIONS: + +1. #yolo +2. #swag +3. #blazeit + diff --git a/workloads/realworld/uvm_prefetch/darknet/Makefile b/workloads/realworld/uvm_prefetch/darknet/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..5022f68377d7626ab768f2501883d72b013dafba --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/Makefile @@ -0,0 +1,114 @@ +GPU=1 +CUDNN=0 +OPENCV=0 +OPENMP=0 +DEBUG=0 + +#ARCH= -gencode arch=compute_30,code=sm_30 \ +# -gencode arch=compute_35,code=sm_35 \ +# -gencode arch=compute_50,code=[sm_50,compute_50] \ +# -gencode arch=compute_52,code=[sm_52,compute_52] +# -gencode arch=compute_20,code=[sm_20,sm_21] \ This one is deprecated? + +# This is what I use, uncomment if you know your arch and want to specify +ARCH= -gencode arch=compute_80,code=sm_80 \ +#ARCH= -arch=sm_80 + +VPATH=./src/:./examples:$(CUPTI_ADD_COMMON) +SLIB=libdarknet.so +ALIB=libdarknet.a +EXEC=darknet +OBJDIR=./obj/ + +CC=gcc +CPP=g++ +# NVCC=nvcc --default-stream per-thread +NVCC=nvcc +AR=ar +ARFLAGS=rcs +OPTS=-Ofast +LDFLAGS= -lm -pthread +COMMON= -Iinclude/ -Isrc/ +CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC +ifeq ($(PROFILE), 1) +CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -DPROFILE +endif + + +ifeq ($(OPENMP), 1) +CFLAGS+= -fopenmp +endif + +ifeq ($(DEBUG), 1) +OPTS=-O0 -g +endif + +CFLAGS+=$(OPTS) + +ifeq ($(OPENCV), 1) +COMMON+= -DOPENCV +CFLAGS+= -DOPENCV +LDFLAGS+= `pkg-config --libs opencv` -lstdc++ +COMMON+= `pkg-config --cflags opencv` +endif + +ifeq ($(GPU), 1) +include ../../../common/make.config +COMMON+= -DGPU -I$(CUDA_DIR)/include/ -I$(CUDA_DIR)/extras/CUPTI/include/ +CFLAGS+= -DGPU +LDFLAGS+= -L$(CUDA_DIR)/lib64 -L$(CUDA_DIR)/extras/CUPTI/lib64/ -lcuda -lcudart -lcublas -lcurand -lcupti +endif + + +ifeq ($(CUDNN), 1) +COMMON+= -DCUDNN +CFLAGS+= -DCUDNN +LDFLAGS+= -lcudnn +endif + +OBJ=gemm.o utils.o cuda_dark.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o lstm_layer.o l2norm_layer.o yolo_layer.o iseg_layer.o image_opencv.o +EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o instance-segmenter.o darknet.o +ifeq ($(GPU), 1) +LDFLAGS+= -lstdc++ +OBJ+=gemm_kernel.o convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o +endif + +# cpu_timestamps.o +# cupti_add.o + +EXECOBJ = $(addprefix $(OBJDIR), $(EXECOBJA)) +OBJS = $(addprefix $(OBJDIR), $(OBJ)) +DEPS = $(wildcard src/*.h) Makefile include/darknet.h + +all: obj backup results $(SLIB) $(ALIB) $(EXEC) + +$(EXEC): $(EXECOBJ) $(ALIB) + $(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS) $(ALIB) + +$(ALIB): $(OBJS) + $(AR) $(ARFLAGS) $@ $^ + +$(SLIB): $(OBJS) + $(CC) $(CFLAGS) -shared $^ -o $@ $(LDFLAGS) + +$(OBJDIR)%.o: %.cpp $(DEPS) + $(CPP) $(COMMON) $(CFLAGS) -c $< -o $@ + +$(OBJDIR)%.o: %.c $(DEPS) + $(CC) $(COMMON) $(CFLAGS) -c $< -o $@ + +$(OBJDIR)%.o: %.cu $(DEPS) + $(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@ + +obj: + mkdir -p obj +backup: + mkdir -p backup +results: + mkdir -p results + +.PHONY: clean + +clean: + rm -rf $(OBJS) $(SLIB) $(ALIB) $(EXEC) $(EXECOBJ) $(OBJDIR)/* + diff --git a/workloads/realworld/uvm_prefetch/darknet/README.md b/workloads/realworld/uvm_prefetch/darknet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fb58c2640038a963cd573d121e4fab59399f67dc --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/README.md @@ -0,0 +1,124 @@ +![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png) + +# Darknet # +Darknet is an open source neural network framework written in C and CUDA. It is fast, easy to install, and supports CPU and GPU computation. + +**Discord** invite link for for communication and questions: https://discord.gg/zSq8rtW + +## YOLOv7: + +* **paper** - YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors: https://arxiv.org/abs/2207.02696 + +* **source code - Pytorch (use to reproduce results):** https://github.com/WongKinYiu/yolov7 + +---- + +Official YOLOv7 is more accurate and faster than YOLOv5 by **120%** FPS, than YOLOX by **180%** FPS, than Dual-Swin-T by **1200%** FPS, than ConvNext by **550%** FPS, than SWIN-L by **500%** FPS. + +YOLOv7 surpasses all known object detectors in both speed and accuracy in the range from 5 FPS to 160 FPS and has the highest accuracy 56.8% AP among all known real-time object detectors with 30 FPS or higher on GPU V100, batch=1. + +* YOLOv7-e6 (55.9% AP, 56 FPS V100 b=1) by `+500%` FPS faster than SWIN-L Cascade-Mask R-CNN (53.9% AP, 9.2 FPS A100 b=1) +* YOLOv7-e6 (55.9% AP, 56 FPS V100 b=1) by `+550%` FPS faster than ConvNeXt-XL C-M-RCNN (55.2% AP, 8.6 FPS A100 b=1) +* YOLOv7-w6 (54.6% AP, 84 FPS V100 b=1) by `+120%` FPS faster than YOLOv5-X6-r6.1 (55.0% AP, 38 FPS V100 b=1) +* YOLOv7-w6 (54.6% AP, 84 FPS V100 b=1) by `+1200%` FPS faster than Dual-Swin-T C-M-RCNN (53.6% AP, 6.5 FPS V100 b=1) +* YOLOv7x (52.9% AP, 114 FPS V100 b=1) by `+150%` FPS faster than PPYOLOE-X (51.9% AP, 45 FPS V100 b=1) +* YOLOv7 (51.2% AP, 161 FPS V100 b=1) by `+180%` FPS faster than YOLOX-X (51.1% AP, 58 FPS V100 b=1) + +---- + +![more5](https://user-images.githubusercontent.com/4096485/179425274-f55a36d4-8450-4471-816b-8c105841effd.jpg) + +---- + +![image](https://user-images.githubusercontent.com/4096485/177675030-a929ee00-0eba-4d93-95c2-225231d0fd61.png) + + +---- + +![yolov7_640_1280](https://user-images.githubusercontent.com/4096485/177688869-d75e0c36-63af-46ec-bdbd-81dbb281f257.png) + +---- + +## Scaled-YOLOv4: + +* **paper (CVPR 2021)**: https://openaccess.thecvf.com/content/CVPR2021/html/Wang_Scaled-YOLOv4_Scaling_Cross_Stage_Partial_Network_CVPR_2021_paper.html + +* **source code - Pytorch (use to reproduce results):** https://github.com/WongKinYiu/ScaledYOLOv4 + +* **source code - Darknet:** https://github.com/AlexeyAB/darknet + +* **Medium:** https://alexeyab84.medium.com/scaled-yolo-v4-is-the-best-neural-network-for-object-detection-on-ms-coco-dataset-39dfa22fa982?source=friends_link&sk=c8553bfed861b1a7932f739d26f487c8 + +## YOLOv4: + +* **paper:** https://arxiv.org/abs/2004.10934 + +* **source code:** https://github.com/AlexeyAB/darknet + +* **Wiki:** https://github.com/AlexeyAB/darknet/wiki + +* **useful links:** https://medium.com/@alexeyab84/yolov4-the-most-accurate-real-time-neural-network-on-ms-coco-dataset-73adfd3602fe?source=friends_link&sk=6039748846bbcf1d960c3061542591d7 + +For more information see the [Darknet project website](http://pjreddie.com/darknet). + + +
Expand + +![yolo_progress](https://user-images.githubusercontent.com/4096485/146988929-1ed0cbec-1e01-4ad0-b42c-808dcef32994.png) https://paperswithcode.com/sota/object-detection-on-coco + +---- + +![scaled_yolov4](https://user-images.githubusercontent.com/4096485/112776361-281d8380-9048-11eb-8083-8728b12dcd55.png) AP50:95 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2011.08036 + +---- + +![YOLOv4Tiny](https://user-images.githubusercontent.com/4096485/101363015-e5c21200-38b1-11eb-986f-b3e516e05977.png) + +---- + +![YOLOv4](https://user-images.githubusercontent.com/4096485/90338826-06114c80-dff5-11ea-9ba2-8eb63a7409b3.png) + +
+ +---- + +![OpenCV_TRT](https://user-images.githubusercontent.com/4096485/90338805-e5e18d80-dff4-11ea-8a68-5710956256ff.png) + + +## Citation + + +``` +@misc{https://doi.org/10.48550/arxiv.2207.02696, + doi = {10.48550/ARXIV.2207.02696}, + url = {https://arxiv.org/abs/2207.02696}, + author = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors}, + publisher = {arXiv}, + year = {2022}, + copyright = {arXiv.org perpetual, non-exclusive license} +} +``` + +``` +@misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +``` +@InProceedings{Wang_2021_CVPR, + author = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + title = {{Scaled-YOLOv4}: Scaling Cross Stage Partial Network}, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2021}, + pages = {13029-13038} +} +``` diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/alexnet.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/alexnet.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e2ed4bb8e7b1bad7859aef0d802cb4084753cb7a --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/alexnet.cfg @@ -0,0 +1,96 @@ +[net] +# Training +# batch=128 +# subdivisions=1 +# Testing +batch=1 +subdivisions=1 +height=227 +width=227 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=256 + +learning_rate=0.01 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue = .1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +filters=96 +size=11 +stride=4 +pad=0 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[convolutional] +filters=256 +size=5 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[convolutional] +filters=384 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=384 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=1000 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/cifar.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/cifar.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b2f69f53903e55c24718ed12d9adaaa1557e3647 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/cifar.cfg @@ -0,0 +1,121 @@ +[net] +batch=128 +subdivisions=1 +height=28 +width=28 +channels=3 +max_crop=32 +min_crop=32 + +hue=.1 +saturation=.75 +exposure=.75 + +learning_rate=0.4 +policy=poly +power=4 +max_batches = 5000 +momentum=0.9 +decay=0.0005 + + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[dropout] +probability=.5 + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/cifar.data b/workloads/realworld/uvm_prefetch/darknet/cfg/cifar.data new file mode 100644 index 0000000000000000000000000000000000000000..a52208db1b203b5e1f24d5afaf8c7002adfd71a3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/cifar.data @@ -0,0 +1,7 @@ +classes=10 +train = data/cifar/train.list +valid = data/cifar/test.list +test = data/cifar/test.list +labels = data/cifar/labels.txt +backup = backup/ +top=2 diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/cifar.test.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/cifar.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..18b6c54c909152b1201d6320b85fafc5c36ba1ef --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/cifar.test.cfg @@ -0,0 +1,117 @@ +[net] +batch=128 +subdivisions=1 +height=32 +width=32 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.4 +policy=poly +power=4 +max_batches = 50000 + + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[dropout] +probability=.5 + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 +temperature=3 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/cifar_small.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/cifar_small.cfg new file mode 100644 index 0000000000000000000000000000000000000000..d48b1231f0131faaa187b18df6705411c3d16a76 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/cifar_small.cfg @@ -0,0 +1,86 @@ +[net] +batch=128 +subdivisions=1 +height=28 +width=28 +channels=3 +max_crop=32 +min_crop=32 + +hue=.1 +saturation=.75 +exposure=.75 + +learning_rate=0.1 +policy=poly +power=4 +max_batches = 5000 +momentum=0.9 +decay=0.0005 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/coco.data b/workloads/realworld/uvm_prefetch/darknet/cfg/coco.data new file mode 100644 index 0000000000000000000000000000000000000000..5951d5245a7895e8418bc3155de3b03049e76c42 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/coco.data @@ -0,0 +1,6 @@ +classes= 80 +train = /data/darknet/coco/valid.list +valid = /data/darknet/coco/valid.list +backup = /data/darknet/backup/ +names = /data/darknet/coco/coco.names +eval=coco diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/coco.names b/workloads/realworld/uvm_prefetch/darknet/cfg/coco.names new file mode 100644 index 0000000000000000000000000000000000000000..16315f2becec9705017bfaf1b9fb81ca2a83c0b0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/coco.names @@ -0,0 +1,80 @@ +person +bicycle +car +motorbike +aeroplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +sofa +pottedplant +bed +diningtable +toilet +tvmonitor +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/combine9k.data b/workloads/realworld/uvm_prefetch/darknet/cfg/combine9k.data new file mode 100644 index 0000000000000000000000000000000000000000..06a3e76aefac9c1074c3dfe159bc115a92b0791e --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/combine9k.data @@ -0,0 +1,10 @@ +classes= 9418 +#train = /home/pjreddie/data/coco/trainvalno5k.txt +train = data/combine9k.train.list +valid = /home/pjreddie/data/imagenet/det.val.files +labels = data/9k.labels +names = data/9k.names +backup = backup/ +map = data/inet9k.map +eval = imagenet +results = results diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/darknet.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/darknet.cfg new file mode 100644 index 0000000000000000000000000000000000000000..375107f7c196baf7adf229a7cfffc84739875828 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/darknet.cfg @@ -0,0 +1,120 @@ +[net] +# Training +# batch=128 +# subdivisions=1 +# Testing +batch=1 +subdivisions=1 +height=256 +width=256 +min_crop=128 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/darknet19.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/darknet19.cfg new file mode 100644 index 0000000000000000000000000000000000000000..28ac9669ef686b4d638a5bf462451962fec45a4e --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/darknet19.cfg @@ -0,0 +1,205 @@ +[net] +# Training +#batch=128 +#subdivisions=2 + +# Testing + batch=1 + subdivisions=1 + +height=256 +width=256 +min_crop=128 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/darknet19_448.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/darknet19_448.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c6df7306d3ef0622e0a0e0cbd6a5603699344e56 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/darknet19_448.cfg @@ -0,0 +1,197 @@ +[net] +batch=128 +subdivisions=4 +height=448 +width=448 +max_crop=512 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 + +angle=7 +hue = .1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/darknet53.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/darknet53.cfg new file mode 100644 index 0000000000000000000000000000000000000000..7b6d5766e9ec48ee19a74321583b44621c1e07b3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/darknet53.cfg @@ -0,0 +1,566 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/darknet53_448.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/darknet53_448.cfg new file mode 100644 index 0000000000000000000000000000000000000000..dedab1b97c7e5d4226f061e6c983046d7a278dd0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/darknet53_448.cfg @@ -0,0 +1,559 @@ +[net] +# Training - start training with darknet53.weights +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=448 +width=448 +channels=3 +min_crop=448 +max_crop=512 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 +momentum=0.9 +decay=0.0005 + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/darknet9000.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/darknet9000.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9dd2dfbbf5a7137faada4e091b8e6d48233f09bf --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/darknet9000.cfg @@ -0,0 +1,205 @@ +[net] +# Training +# batch=128 +# subdivisions=4 +# Testing +batch = 1 +subdivisions = 1 +height=448 +width=448 +max_crop=512 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=9418 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 +tree=data/9k.tree + +[cost] +type=masked + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/densenet201.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/densenet201.cfg new file mode 100644 index 0000000000000000000000000000000000000000..65b4aecc52d45075f2913e3d63b9aec0527fa44c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/densenet201.cfg @@ -0,0 +1,1951 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/extraction.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/extraction.cfg new file mode 100644 index 0000000000000000000000000000000000000000..66cb15f80e9a5e811223299594882a3b5d9485dc --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/extraction.cfg @@ -0,0 +1,209 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=224 +width=224 +max_crop=320 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/extraction.conv.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/extraction.conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..2a7d09ec80fa2f47e1ebb4134b7845d5cae2b828 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/extraction.conv.cfg @@ -0,0 +1,179 @@ +[net] +batch=1 +subdivisions=1 +height=256 +width=256 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.5 +policy=poly +power=6 +max_batches=500000 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[connected] +output=1000 +activation=leaky + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/extraction22k.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/extraction22k.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b5f54090d00537fdca72f54bb2eed69dd78f5b00 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/extraction22k.cfg @@ -0,0 +1,206 @@ +[net] +batch=128 +subdivisions=1 +height=224 +width=224 +max_crop=320 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +max_batches = 0 +policy=steps +steps=444000,590000,970000 +scales=.5,.2,.1 + +#policy=sigmoid +#gamma=.00008 +#step=100000 +#max_batches=200000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[connected] +output=21842 +activation=leaky + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/go.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/go.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c730092ff3ffda0124baeace050bd382c442d88d --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/go.cfg @@ -0,0 +1,132 @@ +[net] +batch=512 +subdivisions=1 +height=19 +width=19 +channels=1 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=10000000 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=1 +size=1 +stride=1 +pad=1 +activation=linear + +[reorg] +extra=1 +stride=1 + +[softmax] + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/go.test.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/go.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..1e4e43809bf3ede20a67b5020fcca0f61612e8f7 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/go.test.cfg @@ -0,0 +1,132 @@ +[net] +batch=1 +subdivisions=1 +height=19 +width=19 +channels=1 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +policy=poly +power=4 +max_batches=100000 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=1 +size=1 +stride=1 +pad=1 +activation=linear + +[reorg] +extra=1 +stride=1 + +[softmax] + + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/gru.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/gru.cfg new file mode 100644 index 0000000000000000000000000000000000000000..6064221289d41dc3ee464a715ae05593a02f34f4 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/gru.cfg @@ -0,0 +1,29 @@ +[net] +inputs=256 +momentum=0.9 +decay=0.0 +subdivisions=1 +batch = 1 +time_steps=1 +learning_rate=.002 +adam=1 + +policy=constant +power=4 +max_batches=1000000 + +[gru] +output = 256 + +[gru] +output = 256 + +[gru] +output = 256 + +[connected] +output=256 +activation=linear + +[softmax] + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/imagenet.labels.list b/workloads/realworld/uvm_prefetch/darknet/cfg/imagenet.labels.list new file mode 100644 index 0000000000000000000000000000000000000000..e73d41762d311df7f7eefec0f65ab12a7bacc023 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/imagenet.labels.list @@ -0,0 +1,21842 @@ +n02119789 +n02100735 +n02110185 +n02096294 +n02102040 +n02066245 +n02509815 +n02124075 +n02417914 +n02123394 +n02125311 +n02423022 +n02346627 +n02077923 +n02110063 +n02447366 +n02109047 +n02089867 +n02102177 +n02091134 +n02092002 +n02071294 +n02442845 +n02504458 +n02092339 +n02098105 +n02096437 +n02114712 +n02105641 +n02128925 +n02091635 +n02088466 +n02096051 +n02117135 +n02138441 +n02097130 +n02493509 +n02457408 +n02389026 +n02443484 +n02110341 +n02089078 +n02086910 +n02445715 +n02093256 +n02113978 +n02106382 +n02441942 +n02113712 +n02113186 +n02105162 +n02415577 +n02356798 +n02488702 +n02123159 +n02098413 +n02422699 +n02114855 +n02094433 +n02111277 +n02132136 +n02119022 +n02091467 +n02106550 +n02422106 +n02091831 +n02120505 +n02104365 +n02086079 +n02112706 +n02098286 +n02095889 +n02484975 +n02137549 +n02500267 +n02129604 +n02090721 +n02396427 +n02108000 +n02391049 +n02412080 +n02108915 +n02480495 +n02110806 +n02128385 +n02107683 +n02085936 +n02094114 +n02087046 +n02100583 +n02096177 +n02494079 +n02105056 +n02101556 +n02123597 +n02481823 +n02105505 +n02088094 +n02085782 +n02489166 +n02364673 +n02114548 +n02134084 +n02480855 +n02090622 +n02113624 +n02093859 +n02403003 +n02097298 +n02108551 +n02493793 +n02107142 +n02096585 +n02107574 +n02107908 +n02086240 +n02102973 +n02112018 +n02093647 +n02397096 +n02437312 +n02483708 +n02097047 +n02106030 +n02099601 +n02093991 +n02110627 +n02106166 +n02326432 +n02108089 +n02097658 +n02088364 +n02111129 +n02100236 +n02486261 +n02115913 +n02486410 +n02487347 +n02099849 +n02108422 +n02104029 +n02492035 +n02110958 +n02099429 +n02094258 +n02099267 +n02395406 +n02112350 +n02109961 +n02101388 +n02113799 +n02095570 +n02128757 +n02101006 +n02115641 +n02097209 +n02342885 +n02097474 +n02120079 +n02095314 +n02088238 +n02408429 +n02133161 +n02328150 +n02410509 +n02492660 +n02398521 +n02112137 +n02510455 +n02093428 +n02105855 +n02111500 +n02085620 +n02123045 +n02490219 +n02099712 +n02109525 +n02454379 +n02111889 +n02088632 +n02090379 +n02443114 +n02361337 +n02105412 +n02483362 +n02437616 +n02107312 +n02325366 +n02091032 +n02129165 +n02102318 +n02100877 +n02074367 +n02504013 +n02363005 +n02102480 +n02113023 +n02086646 +n02497673 +n02087394 +n02127052 +n02116738 +n02488291 +n02091244 +n02114367 +n02130308 +n02089973 +n02105251 +n02134418 +n02093754 +n02106662 +n02444819 +n01882714 +n01871265 +n01872401 +n01877812 +n01873310 +n01883070 +n04086273 +n04507155 +n04147183 +n04254680 +n02672831 +n02219486 +n02317335 +n01968897 +n03452741 +n03642806 +n07745940 +n02690373 +n04552348 +n02692877 +n02782093 +n04266014 +n03344393 +n03447447 +n04273569 +n03662601 +n02951358 +n04612504 +n02981792 +n04483307 +n03095699 +n03673027 +n03947888 +n02687172 +n04347754 +n04606251 +n03478589 +n04389033 +n03773504 +n02860847 +n03218198 +n02835271 +n03792782 +n03393912 +n03895866 +n02797295 +n04204347 +n03791053 +n03384352 +n03272562 +n04310018 +n02704792 +n02701002 +n02814533 +n02930766 +n03100240 +n03594945 +n03670208 +n03770679 +n03777568 +n04037443 +n04285008 +n03444034 +n03445924 +n03785016 +n04252225 +n03345487 +n03417042 +n03930630 +n04461696 +n04467665 +n03796401 +n03977966 +n04065272 +n04335435 +n04252077 +n04465501 +n03776460 +n04482393 +n04509417 +n03538406 +n03599486 +n03868242 +n02804414 +n03125729 +n03131574 +n03388549 +n02870880 +n03018349 +n03742115 +n03016953 +n04380533 +n03337140 +n03891251 +n02791124 +n04429376 +n03376595 +n04099969 +n04344873 +n04447861 +n03179701 +n03982430 +n03201208 +n03290653 +n04550184 +n07742313 +n07747607 +n07749582 +n07753113 +n07753275 +n07753592 +n07754684 +n07760859 +n07768694 +n12267677 +n12620546 +n13133613 +n11879895 +n12144580 +n12768682 +n03854065 +n04515003 +n03017168 +n03249569 +n03447721 +n03720891 +n03721384 +n04311174 +n02787622 +n02992211 +n04536866 +n03495258 +n02676566 +n03272010 +n03110669 +n03394916 +n04487394 +n03494278 +n03840681 +n03884397 +n02804610 +n03838899 +n04141076 +n03372029 +n11939491 +n12057211 +n09246464 +n09468604 +n09193705 +n09472597 +n09399592 +n09421951 +n09256479 +n09332890 +n09428293 +n09288635 +n03498962 +n03041632 +n03658185 +n03954731 +n03995372 +n03649909 +n03481172 +n03109150 +n02951585 +n03970156 +n04154565 +n04208210 +n03967562 +n03000684 +n01514668 +n01514859 +n01518878 +n01530575 +n01531178 +n01532829 +n01534433 +n01537544 +n01558993 +n01560419 +n01580077 +n01582220 +n01592084 +n01601694 +n01608432 +n01614925 +n01616318 +n01622779 +n01795545 +n01796340 +n01797886 +n01798484 +n01806143 +n01806567 +n01807496 +n01817953 +n01818515 +n01819313 +n01820546 +n01824575 +n01828970 +n01829413 +n01833805 +n01843065 +n01843383 +n01847000 +n01855032 +n01855672 +n01860187 +n02002556 +n02002724 +n02006656 +n02007558 +n02009912 +n02009229 +n02011460 +n02012849 +n02013706 +n02018207 +n02018795 +n02025239 +n02027492 +n02028035 +n02033041 +n02037110 +n02017213 +n02051845 +n02056570 +n02058221 +n01484850 +n01491361 +n01494475 +n01496331 +n01498041 +n02514041 +n02536864 +n01440764 +n01443537 +n02526121 +n02606052 +n02607072 +n02643566 +n02655020 +n02640242 +n02641379 +n01664065 +n01665541 +n01667114 +n01667778 +n01669191 +n01675722 +n01677366 +n01682714 +n01685808 +n01687978 +n01688243 +n01689811 +n01692333 +n01693334 +n01694178 +n01695060 +n01704323 +n01697457 +n01698640 +n01728572 +n01728920 +n01729322 +n01729977 +n01734418 +n01735189 +n01737021 +n01739381 +n01740131 +n01742172 +n01744401 +n01748264 +n01749939 +n01751748 +n01753488 +n01755581 +n01756291 +n01629819 +n01630670 +n01631663 +n01632458 +n01632777 +n01641577 +n01644373 +n01644900 +n04579432 +n04592741 +n03876231 +n03483316 +n03868863 +n04251144 +n03691459 +n03759954 +n04152593 +n03793489 +n03271574 +n03843555 +n04332243 +n04265275 +n04330267 +n03467068 +n02794156 +n04118776 +n03841143 +n04141975 +n02708093 +n03196217 +n04548280 +n03544143 +n04355338 +n03891332 +n04328186 +n03197337 +n04317175 +n04376876 +n03706229 +n02841315 +n04009552 +n04356056 +n03692522 +n04044716 +n02879718 +n02950826 +n02749479 +n04090263 +n04008634 +n03085013 +n04505470 +n03126707 +n03666591 +n02666196 +n02977058 +n04238763 +n03180011 +n03485407 +n03832673 +n06359193 +n03496892 +n04428191 +n04004767 +n04243546 +n04525305 +n04179913 +n03602883 +n04372370 +n03532672 +n02974003 +n03874293 +n03944341 +n03992509 +n03425413 +n02966193 +n04371774 +n04067472 +n04040759 +n04019541 +n03492542 +n04355933 +n03929660 +n02965783 +n04258138 +n04074963 +n03208938 +n02910353 +n03476684 +n03627232 +n03075370 +n03874599 +n03804744 +n04127249 +n04153751 +n03803284 +n04162706 +n04228054 +n02948072 +n03590841 +n04286575 +n04456115 +n03814639 +n03933933 +n04485082 +n03733131 +n03794056 +n04275548 +n01768244 +n01770081 +n01770393 +n01773157 +n01773549 +n01773797 +n01774384 +n01774750 +n01775062 +n01776313 +n01784675 +n01990800 +n01978287 +n01978455 +n01980166 +n01981276 +n01983481 +n01984695 +n01985128 +n01986214 +n02165105 +n02165456 +n02167151 +n02168699 +n02169497 +n02172182 +n02174001 +n02177972 +n02190166 +n02206856 +n02226429 +n02229544 +n02231487 +n02233338 +n02236044 +n02256656 +n02259212 +n02264363 +n02268443 +n02268853 +n02276258 +n02277742 +n02279972 +n02280649 +n02281406 +n02281787 +n01910747 +n01914609 +n01917289 +n01924916 +n01930112 +n01943899 +n01944390 +n01945685 +n01950731 +n01955084 +n02319095 +n02321529 +n03584829 +n03297495 +n03761084 +n03259280 +n04111531 +n04442312 +n04542943 +n04517823 +n03207941 +n04070727 +n04554684 +n03133878 +n03400231 +n04596742 +n02939185 +n03063689 +n04398044 +n04270147 +n02699494 +n04486054 +n03899768 +n04311004 +n04366367 +n04532670 +n02793495 +n03457902 +n03877845 +n03781244 +n03661043 +n02727426 +n02859443 +n03028079 +n03788195 +n04346328 +n03956157 +n04081281 +n03032252 +n03529860 +n03697007 +n03065424 +n03837869 +n04458633 +n02980441 +n04005630 +n03461385 +n02776631 +n02791270 +n02871525 +n02927161 +n03089624 +n04200800 +n04443257 +n04462240 +n03388043 +n03042490 +n04613696 +n03216828 +n02892201 +n03743016 +n02788148 +n02894605 +n03160309 +n03000134 +n03930313 +n04604644 +n04326547 +n03459775 +n04239074 +n04501370 +n03792972 +n04149813 +n03530642 +n03961711 +n03903868 +n02814860 +n07711569 +n07720875 +n07714571 +n07714990 +n07715103 +n07716358 +n07716906 +n07717410 +n07717556 +n07718472 +n07718747 +n07730033 +n07734744 +n04209239 +n03594734 +n02971356 +n03485794 +n04133789 +n02747177 +n04125021 +n07579787 +n03814906 +n03134739 +n03404251 +n04423845 +n03877472 +n04120489 +n03062245 +n03014705 +n03717622 +n03777754 +n04493381 +n04476259 +n02777292 +n07693725 +n03998194 +n03617480 +n07590611 +n04579145 +n03623198 +n07248320 +n04277352 +n04229816 +n02823428 +n03127747 +n02877765 +n04435653 +n03724870 +n03710637 +n03920288 +n03379051 +n02807133 +n04399382 +n03527444 +n03983396 +n03924679 +n04532106 +n06785654 +n03445777 +n07613480 +n04350905 +n04562935 +n03325584 +n03045698 +n07892512 +n03250847 +n04192698 +n03026506 +n03534580 +n07565083 +n04296562 +n02869837 +n07871810 +n02799071 +n03314780 +n04141327 +n04357314 +n02823750 +n13052670 +n07583066 +n03637318 +n04599235 +n07802026 +n02883205 +n03709823 +n04560804 +n02909870 +n03207743 +n04263257 +n07932039 +n03786901 +n04479046 +n03873416 +n02999410 +n04367480 +n03775546 +n07875152 +n04591713 +n04201297 +n02916936 +n03240683 +n02840245 +n02963159 +n04370456 +n03991062 +n02843684 +n03482405 +n03942813 +n03908618 +n03902125 +n07584110 +n02730930 +n04023962 +n02769748 +n10148035 +n02817516 +n03908714 +n02906734 +n03788365 +n02667093 +n03787032 +n03980874 +n03141823 +n03976467 +n04264628 +n07930864 +n04039381 +n06874185 +n04033901 +n04041544 +n07860988 +n03146219 +n03763968 +n03676483 +n04209133 +n03782006 +n03857828 +n03775071 +n02892767 +n07684084 +n04522168 +n03764736 +n04118538 +n03887697 +n13044778 +n03291819 +n03770439 +n03124170 +n04487081 +n03916031 +n02808440 +n07697537 +n12985857 +n02917067 +n03938244 +n15075141 +n02978881 +n02966687 +n03633091 +n13040303 +n03690938 +n03476991 +n02669723 +n03220513 +n03127925 +n04584207 +n07880968 +n03937543 +n03000247 +n04418357 +n04590129 +n02795169 +n04553703 +n02783161 +n02802426 +n02808304 +n03124043 +n03450230 +n04589890 +n12998815 +n02992529 +n03825788 +n02790996 +n03710193 +n03630383 +n03347037 +n03769881 +n03871628 +n03733281 +n03976657 +n03535780 +n04259630 +n03929855 +n04049303 +n04548362 +n02979186 +n06596364 +n03935335 +n06794110 +n02825657 +n03388183 +n04591157 +n04540053 +n03866082 +n04136333 +n04026417 +n02865351 +n02834397 +n03888257 +n04235860 +n04404412 +n04371430 +n03733805 +n07920052 +n07873807 +n02895154 +n04204238 +n04597913 +n04131690 +n07836838 +n09835506 +n03443371 +n13037406 +n04336792 +n04557648 +n03187595 +n04254120 +n03595614 +n04146614 +n03598930 +n03958227 +n04069434 +n03188531 +n02786058 +n07615774 +n04525038 +n04409515 +n03424325 +n03223299 +n03680355 +n07614500 +n07695742 +n04033995 +n03710721 +n04392985 +n03047690 +n03584254 +n13054560 +n10565667 +n03950228 +n03729826 +n02837789 +n04254777 +n02988304 +n03657121 +n04417672 +n04523525 +n02815834 +n09229709 +n07697313 +n03888605 +n03355925 +n03063599 +n04116512 +n04325704 +n07831146 +n03255030 +n00483313 +n02432291 +n02356381 +n02377388 +n04028764 +n04381587 +n02279257 +n04168199 +n00445055 +n02461128 +n03626760 +n04313503 +n00451635 +n02509515 +n04224842 +n09403734 +n02769290 +n13054073 +n03163222 +n00464478 +n03087069 +n04477219 +n03445617 +n00449054 +n00483705 +n04395106 +n03389611 +n04285965 +n04166281 +n04003856 +n03696301 +n00475787 +n04587404 +n09218641 +n02276355 +n03592669 +n04459909 +n04492375 +n09447666 +n00463543 +n04148703 +n04591517 +n03970546 +n04297750 +n02782778 +n02383231 +n03693474 +n02277094 +n03766044 +n02056228 +n03394272 +n03047052 +n00434075 +n04185946 +n02411999 +n03858418 +n12833149 +n02836035 +n03108853 +n04587559 +n04138261 +n02278024 +n03063485 +n02774921 +n09475044 +n02811204 +n03329302 +n04026813 +n03986562 +n03379204 +n03426134 +n02790669 +n03487090 +n03548402 +n08614632 +n04054361 +n03421485 +n03302671 +n03098959 +n02970408 +n03772584 +n03064935 +n09415584 +n11715430 +n12024445 +n02710201 +n03475581 +n13142504 +n03396074 +n03211789 +n03914337 +n03678558 +n03233123 +n00453396 +n00454395 +n00440382 +n04289027 +n00445226 +n11953610 +n04128413 +n00480211 +n00470966 +n12547503 +n03085219 +n02275773 +n02692086 +n04257790 +n00448748 +n02686379 +n12328567 +n03432129 +n03859000 +n12091377 +n02124313 +n00442847 +n04603399 +n03114379 +n02920369 +n03818343 +n02946127 +n02978055 +n12914923 +n02705429 +n00448232 +n12882945 +n04289690 +n07606669 +n02056728 +n11848479 +n03046921 +n12282933 +n02867966 +n12821505 +n02812949 +n04545305 +n02699770 +n04395651 +n02900160 +n04099003 +n02054711 +n12606545 +n03356858 +n01859190 +n03643737 +n02962200 +n03123553 +n09361517 +n02793089 +n00449517 +n02783994 +n10117851 +n12038585 +n04383839 +n10142391 +n07719213 +n03536122 +n02472987 +n03454536 +n11728099 +n02392824 +n03795758 +n04282872 +n00448872 +n02404432 +n03797182 +n03029197 +n03665924 +n12477163 +n02769963 +n03863262 +n01532325 +n04165409 +n04593077 +n04473108 +n03577090 +n09988063 +n00446804 +n03931765 +n00475014 +n02700064 +n03240892 +n12475242 +n11735053 +n04053508 +n02852173 +n02382750 +n03823111 +n04543772 +n04112147 +n04433585 +n03175189 +n03596543 +n00445685 +n03307792 +n04589593 +n01814217 +n02993368 +n04303497 +n02811350 +n03355768 +n03699591 +n04590553 +n01893825 +n12726670 +n09916348 +n11544015 +n01318894 +n02133704 +n02367492 +n04506289 +n02069974 +n01900150 +n03207835 +n03363549 +n02831595 +n04970470 +n04160847 +n03767203 +n03928814 +n02302969 +n02918595 +n10401331 +n04231272 +n03717447 +n03063968 +n03380724 +n00825773 +n09988493 +n02740300 +n04539794 +n04121511 +n01323599 +n12937130 +n02428508 +n02980036 +n12061380 +n01887787 +n04214046 +n01787835 +n00466630 +n02979290 +n03927091 +n03231368 +n03904657 +n04469003 +n04196502 +n02122948 +n04544325 +n07868340 +n13876561 +n11925898 +n12158443 +n01595450 +n12454705 +n02069412 +n09618957 +n02357111 +n00451563 +n04197110 +n02276902 +n03111296 +n03909020 +n12303083 +n02082791 +n01956764 +n04269822 +n04207343 +n02433318 +n01888181 +n12682668 +n01592387 +n09793141 +n00466273 +n04026180 +n06255081 +n12172364 +n10145590 +n12311579 +n12173912 +n03822171 +n03140292 +n03027625 +n02739427 +n02060133 +n02431785 +n03219010 +n00447957 +n11910271 +n03620967 +n12547215 +n02409508 +n04290079 +n12329260 +n13901858 +n02008497 +n10304914 +n04524142 +n04279462 +n04233124 +n09733793 +n12822115 +n09475179 +n10151760 +n03418618 +n12858397 +n07735510 +n03549473 +n10098245 +n03653583 +n10604380 +n03375575 +n03885293 +n01527347 +n03237340 +n02760658 +n11953038 +n03187268 +n03004275 +n02393161 +n11965218 +n08580944 +n03938725 +n03900979 +n04144241 +n03760310 +n02376679 +n03237992 +n09432283 +n02379908 +n09918554 +n04041747 +n12012111 +n10331167 +n01612122 +n10147935 +n07691539 +n11669786 +n09403427 +n01935395 +n09903501 +n04439585 +n04459018 +n02780704 +n03720163 +n12899752 +n04118635 +n03404149 +n02429456 +n00449168 +n04516354 +n04317833 +n12075299 +n07878647 +n09438940 +n03361550 +n02027357 +n04317976 +n03092883 +n04526964 +n03985069 +n03610682 +n04028581 +n02277268 +n09433839 +n03846431 +n03919289 +n10146104 +n10260706 +n02686227 +n03321103 +n00444846 +n01558307 +n01595168 +n03919096 +n11844892 +n04260364 +n02750070 +n03034244 +n03002096 +n04273972 +n11814584 +n04605321 +n07745466 +n02922798 +n03361380 +n12651229 +n08521623 +n04498389 +n00453313 +n04967882 +n12024690 +n03934656 +n02685082 +n04501550 +n09972458 +n03055418 +n07763629 +n03902756 +n09938449 +n09712696 +n02387346 +n03133415 +n07711080 +n03129753 +n03524150 +n02275560 +n03993053 +n03438661 +n11939180 +n00466524 +n11753355 +n03456024 +n03421324 +n07890540 +n11720643 +n02057035 +n00453126 +n04453037 +n01540832 +n03546235 +n03370387 +n02041875 +n02871439 +n03262072 +n01786646 +n02430830 +n02799175 +n05262422 +n03854722 +n12817694 +n04449966 +n01564773 +n02034971 +n03490119 +n02822579 +n07879953 +n04110178 +n04963588 +n04252653 +n01565078 +n02389128 +n02779435 +n10645017 +n04582205 +n08573842 +n10146002 +n03892178 +n03119396 +n03813078 +n07866868 +n03160740 +n03371875 +n02417387 +n03904782 +n03098688 +n02902687 +n01828556 +n04401680 +n04590933 +n01575401 +n07693048 +n02901114 +n03047941 +n04355511 +n11849871 +n10738111 +n03122073 +n12052787 +n01594004 +n01549886 +n02824058 +n03506184 +n11487732 +n12574866 +n12948053 +n10091450 +n00470554 +n00326094 +n12093329 +n04438897 +n07818995 +n12828791 +n13901321 +n10613996 +n10159533 +n02669295 +n02843158 +n06415688 +n14858292 +n09813219 +n12485653 +n03200231 +n02089468 +n03935234 +n01539925 +n12428076 +n10439373 +n01536644 +n02694662 +n02123242 +n03002711 +n03363749 +n02669534 +n03451798 +n11927215 +n02679257 +n09475925 +n10015485 +n12422129 +n03946162 +n02377291 +n07871720 +n12622297 +n12782915 +n01579260 +n11838916 +n10267311 +n12824053 +n03340723 +n02276749 +n04439712 +n02126139 +n04188179 +n02386853 +n07942152 +n02499316 +n04324387 +n10635788 +n04234887 +n12237641 +n03713436 +n04960582 +n04076713 +n01646292 +n03947798 +n02840134 +n04476972 +n09822830 +n03551395 +n04533802 +n02918964 +n00474657 +n12932966 +n01615458 +n01806364 +n12458550 +n11784497 +n03557360 +n10638922 +n09889941 +n10689306 +n03358172 +n04295571 +n06596607 +n11853356 +n00482122 +n11760785 +n03150232 +n11778257 +n03059685 +n10105733 +n04104384 +n07691237 +n04326676 +n07684938 +n12666965 +n04177820 +n13918387 +n03398153 +n03914438 +n09932098 +n02988486 +n02977619 +n03317788 +n03484487 +n02988679 +n04062428 +n02568087 +n12866162 +n04227144 +n07875436 +n04082886 +n11753700 +n00470682 +n02122298 +n10145239 +n12755727 +n04214282 +n01852671 +n02378969 +n04108822 +n10382825 +n12392549 +n03973839 +n12258885 +n11782761 +n12389501 +n02940570 +n03405595 +n02969323 +n03207630 +n10169147 +n03805725 +n09847543 +n02415253 +n07880080 +n04305572 +n02042180 +n07565161 +n02871147 +n04438507 +n04445154 +n07842433 +n12029635 +n09750282 +n09621232 +n01858906 +n02761206 +n03986355 +n12591351 +n13916721 +n02905036 +n11894770 +n02377603 +n12924623 +n03950899 +n09454153 +n10247358 +n05261310 +n11943660 +n10804287 +n03560430 +n01756089 +n10618342 +n04283378 +n13926786 +n04238321 +n04393549 +n04461879 +n03502200 +n00440941 +n03494706 +n04148579 +n13902336 +n02780815 +n10726031 +n04124098 +n12344483 +n04384910 +n07681450 +n02030837 +n04059157 +n09247410 +n02714751 +n08633683 +n04520784 +n10141732 +n12371439 +n04499062 +n02931148 +n07609632 +n04536335 +n02874537 +n03013438 +n11786539 +n11690455 +n07600696 +n00478262 +n00466712 +n03399677 +n12441183 +n07895962 +n11966083 +n02990373 +n04241249 +n02068541 +n12513933 +n02356977 +n04252560 +n04087826 +n03455488 +n07619409 +n09787534 +n03680942 +n00446980 +n12384839 +n03416900 +n07821758 +n11853813 +n01606522 +n11780148 +n04969242 +n12413880 +n04130257 +n01322604 +n03061211 +n01959492 +n02842573 +n04313628 +n03815149 +n02445394 +n08547544 +n03222176 +n04070003 +n03075768 +n09695979 +n02877266 +n08583292 +n02870676 +n03657511 +n01621635 +n04284341 +n04136161 +n02836174 +n10247880 +n01744100 +n02882894 +n03408444 +n03411079 +n02366959 +n04399158 +n04542715 +n02787435 +n04251701 +n13863020 +n07890226 +n12245319 +n12849952 +n11626826 +n00887544 +n03140431 +n03519387 +n03855604 +n07906111 +n02054036 +n11954161 +n03038281 +n00450998 +n12136392 +n02119477 +n04356925 +n02406647 +n04450133 +n12545635 +n01565599 +n02028900 +n07817024 +n02971167 +n04309049 +n02678897 +n12795555 +n11769803 +n01904886 +n02079851 +n12189987 +n04581829 +n12098403 +n01839330 +n12587803 +n03652932 +n08628141 +n03544238 +n04513827 +n01847806 +n03132076 +n10243137 +n03621377 +n10530959 +n14765422 +n04968139 +n12950314 +n02064816 +n02846511 +n10513823 +n11772408 +n03341297 +n03492922 +n03683606 +n02894337 +n02365480 +n09846755 +n03495039 +n01317813 +n12610328 +n02157206 +n01588002 +n03914831 +n03659686 +n10406765 +n09205509 +n02870526 +n07954211 +n10578471 +n11646694 +n03115762 +n07762913 +n12056758 +n12305986 +n11845913 +n02835915 +n02831237 +n07927512 +n12171098 +n02073831 +n07605040 +n02885462 +n02768114 +n04450994 +n11844371 +n03963645 +n02956699 +n02029378 +n01528396 +n10005934 +n04465666 +n04390977 +n11882074 +n03831382 +n04605163 +n06276501 +n02944075 +n05258051 +n07901457 +n12683571 +n02205219 +n13235503 +n02388735 +n03941231 +n14919819 +n12816508 +n11536673 +n13895262 +n02903204 +n10137825 +n07841345 +n07893253 +n01850192 +n07769731 +n11773987 +n03539678 +n12938193 +n10802507 +n03089879 +n00477392 +n01828096 +n09263912 +n13653902 +n04579667 +n01322983 +n08579352 +n07587023 +n07756951 +n07870167 +n10588357 +n01606809 +n13864035 +n02802544 +n07591961 +n02979399 +n04144539 +n02416820 +n11769176 +n09743792 +n09732170 +n04972451 +n13918274 +n01847089 +n01859689 +n04208065 +n07617051 +n10674713 +n07914271 +n07887461 +n03736064 +n03644858 +n03878963 +n04040247 +n07891433 +n01611969 +n07587618 +n02689144 +n10049363 +n04059516 +n10313239 +n03115400 +n01519563 +n01533893 +n03850245 +n11733548 +n03372549 +n01884834 +n02839110 +n07887192 +n03617312 +n07886463 +n03103396 +n07764847 +n01855476 +n07808587 +n12858871 +n03632729 +n10209731 +n04141712 +n03978686 +n03225988 +n00475273 +n09224725 +n04966543 +n01322221 +n03649674 +n13154494 +n03948830 +n03320519 +n03723267 +n07869611 +n12342498 +n01827793 +n03145719 +n11821184 +n11956348 +n11857875 +n10339717 +n09450163 +n10756148 +n01591301 +n07915094 +n04422727 +n09719309 +n03349469 +n03389889 +n10718131 +n04298661 +n09747495 +n03676623 +n03547229 +n03062015 +n10734394 +n07817315 +n02852360 +n01850553 +n02952585 +n03587205 +n02009750 +n01540090 +n02947660 +n03656957 +n03378174 +n02508213 +n01572489 +n12008487 +n12185859 +n11691046 +n01323355 +n05262534 +n00448126 +n02432983 +n12038406 +n03883385 +n02411206 +n01643896 +n10159045 +n11675025 +n01803362 +n02009508 +n07920349 +n04098513 +n11617272 +n09913455 +n12390314 +n04171208 +n02995345 +n10634849 +n03173929 +n02749953 +n11845793 +n12796022 +n11955153 +n11816829 +n03032453 +n11984542 +n02992795 +n03712111 +n02873733 +n02759387 +n14915184 +n02381364 +n12686274 +n07857731 +n04518764 +n03010473 +n02418465 +n02359556 +n07894799 +n04104770 +n04335209 +n01848976 +n02006063 +n04454908 +n03002948 +n04220250 +n09923561 +n04102162 +n11958080 +n04598965 +n10173410 +n03067339 +n02003204 +n12686676 +n11986511 +n02311617 +n03367059 +n02761557 +n05578095 +n04041069 +n10575463 +n03325941 +n10082043 +n01806297 +n09691729 +n04593866 +n01813088 +n01625562 +n03906224 +n01652026 +n10236304 +n04102618 +n04321453 +n07820145 +n01575117 +n12788854 +n07823698 +n04206225 +n03216710 +n02421449 +n03343737 +n07560903 +n02872529 +n11989869 +n12071744 +n06278475 +n04492749 +n02920259 +n03798061 +n02420509 +n03316105 +n12052447 +n03974915 +n02904803 +n03430418 +n12291959 +n06892775 +n03875806 +n07903841 +n10282482 +n02683323 +n07862348 +n01849157 +n04469813 +n09944022 +n03342127 +n07592481 +n02936402 +n02405929 +n10002760 +n02537716 +n05259914 +n01560280 +n12694486 +n07879350 +n02377063 +n03637181 +n03409297 +n01607812 +n02808185 +n09239302 +n12055516 +n09712448 +n02859184 +n12772908 +n02735538 +n10333838 +n12336092 +n02386968 +n04613939 +n00452864 +n04535524 +n03174731 +n04189816 +n07607605 +n12909917 +n02387722 +n02960690 +n07715221 +n02407071 +n10667477 +n09398076 +n04236809 +n01904806 +n01610552 +n12373100 +n12771390 +n04122685 +n07804771 +n15102455 +n03469175 +n03746005 +n02536456 +n03505667 +n11816336 +n09376198 +n10572706 +n03464053 +n02869155 +n07816164 +n04969798 +n02942349 +n14820180 +n01623615 +n12676703 +n03369276 +n03650551 +n02010272 +n02976123 +n01852400 +n02196119 +n04132158 +n03238586 +n07639069 +n03313333 +n10542761 +n12215022 +n00455173 +n10019406 +n12899537 +n04277826 +n09906449 +n04549629 +n11508382 +n15090065 +n10289462 +n04540255 +n02723165 +n04335693 +n01536334 +n03107488 +n12782530 +n14785065 +n02974348 +n09874862 +n04479939 +n03309465 +n09902954 +n12092417 +n03425595 +n12433081 +n07806774 +n12462805 +n01314781 +n10192839 +n01622120 +n07807171 +n03261019 +n02843553 +n04287747 +n02324587 +n09915434 +n01818299 +n01592694 +n03826186 +n03607659 +n01527917 +n03628511 +n02005399 +n04204081 +n02052775 +n04403413 +n03914106 +n12811027 +n01872772 +n04555700 +n02004855 +n04602762 +n02713003 +n04406817 +n11934807 +n03336282 +n09684901 +n03836976 +n11959862 +n03062336 +n03506028 +n04503413 +n07819896 +n03205669 +n11902200 +n07685218 +n03046133 +n10261624 +n10303814 +n03676087 +n04023695 +n07587111 +n07764155 +n01504179 +n03794136 +n03389761 +n13901211 +n02784124 +n04488530 +n02807731 +n07898443 +n04981658 +n04177755 +n03649161 +n04125257 +n10135129 +n03653110 +n10560106 +n07735687 +n03511333 +n11960245 +n03301568 +n03878066 +n10746931 +n04223299 +n04237423 +n07888229 +n01819734 +n12312728 +n09981939 +n03727465 +n13882276 +n02993194 +n11971927 +n09713108 +n03581125 +n09718936 +n14698884 +n03005285 +n03540914 +n03359436 +n03934042 +n07569644 +n04964878 +n07890068 +n07580253 +n01538630 +n03132666 +n03259009 +n02796318 +n12703190 +n01464844 +n11792029 +n04270371 +n13102775 +n02933649 +n02387254 +n02890188 +n04335886 +n04358491 +n02786837 +n03885194 +n04001265 +n03438071 +n10375402 +n02997910 +n03326795 +n00470830 +n02734725 +n03494537 +n08376250 +n07743544 +n02991847 +n04246271 +n04156140 +n04381073 +n07732168 +n04951071 +n07977870 +n04334599 +n02838728 +n03326948 +n11723227 +n08182379 +n03686924 +n03821518 +n02382204 +n02080415 +n11788727 +n07732636 +n03860404 +n03898395 +n07867324 +n04392113 +n13237188 +n03263076 +n07843636 +n04968056 +n04397027 +n03320421 +n06267564 +n02880842 +n04115456 +n13862407 +n10289039 +n03128248 +n01457852 +n01536035 +n04579056 +n03937931 +n03036022 +n01804163 +n09913593 +n12841007 +n03115897 +n03256032 +n02475669 +n07924443 +n03061505 +n10001481 +n03600722 +n07842308 +n10696508 +n04215402 +n10588074 +n03614782 +n03995535 +n12091953 +n04113194 +n10092978 +n03011741 +n04381860 +n07819769 +n07905474 +n03288500 +n04225987 +n13223710 +n02879087 +n02920083 +n08640739 +n03362890 +n03996849 +n03849814 +n09694664 +n02407390 +n02910864 +n02388917 +n01668665 +n07616046 +n02932891 +n10553235 +n03652729 +n01615703 +n12801781 +n12164656 +n05302499 +n03801760 +n03332271 +n02901793 +n03941417 +n09833441 +n01623110 +n02807523 +n10598181 +n03725600 +n10368528 +n04116098 +n12719944 +n02045864 +n02173373 +n02811059 +n04479823 +n07816398 +n10572889 +n04142731 +n07687381 +n02799323 +n07865484 +n01858845 +n12684379 +n01842235 +n09242389 +n02028727 +n03527565 +n03438863 +n15019030 +n13907272 +n09659039 +n04251791 +n03683995 +n04137217 +n04389430 +n09785659 +n02016816 +n03124590 +n01859325 +n03138669 +n02999936 +n11926365 +n12686077 +n03517760 +n09734450 +n04563413 +n12074867 +n01564217 +n12521394 +n06267893 +n03594148 +n04139395 +n12369309 +n01544389 +n12048056 +n04524941 +n03016868 +n03653740 +n02795528 +n03687137 +n03766935 +n03361297 +n04263502 +n10043491 +n03446268 +n01994910 +n03891538 +n10091564 +n10226413 +n02755140 +n03500389 +n10237196 +n03625646 +n06596474 +n03360300 +n09730824 +n10732010 +n04469514 +n02904927 +n04961331 +n02936570 +n03680858 +n07585758 +n09199101 +n04050933 +n03712337 +n03911513 +n01556182 +n03102371 +n07928887 +n12133462 +n03974070 +n03971218 +n03292475 +n03425241 +n03440216 +n11995092 +n02894158 +n02918112 +n10568358 +n11524451 +n03169176 +n04100519 +n07588193 +n06883725 +n02860640 +n07762114 +n04082710 +n07896893 +n10167152 +n03287351 +n02788021 +n08494231 +n01560935 +n03249342 +n04564581 +n09349648 +n07704205 +n03510244 +n12127460 +n09945745 +n11719286 +n11613459 +n12656369 +n03824381 +n07655263 +n09894143 +n04964001 +n02161457 +n07654298 +n07930433 +n02979074 +n02026948 +n13914608 +n07611267 +n02843276 +n09827363 +n10259780 +n04432662 +n11715678 +n12388858 +n03057920 +n10465451 +n03855214 +n07728181 +n09835348 +n03549732 +n04589325 +n03491032 +n00452034 +n03948242 +n01456756 +n07921615 +n02809105 +n12889713 +n07586894 +n07734879 +n07905979 +n12847374 +n12129134 +n02122580 +n04028074 +n02911332 +n09251407 +n07697825 +n04597309 +n02800213 +n03480579 +n07621618 +n04170933 +n03743279 +n01916481 +n04037220 +n10748620 +n02708433 +n12007196 +n02561381 +n04103769 +n03030880 +n04413969 +n03911658 +n04590746 +n00476389 +n04331639 +n07725789 +n01792429 +n02949542 +n07686720 +n04064862 +n04447028 +n01713764 +n09854218 +n04032603 +n04405907 +n15093298 +n04385536 +n11954345 +n01560793 +n09249034 +n03784270 +n03436549 +n01324610 +n02379183 +n07616487 +n04119478 +n03309356 +n12865037 +n12850168 +n04250850 +n03024064 +n04412097 +n02982515 +n00450070 +n10175248 +n11847169 +n12276872 +n12870891 +n10229883 +n10505613 +n03482252 +n09300905 +n02919890 +n07617611 +n10283170 +n01607962 +n01671125 +n07894551 +n04561287 +n00005787 +n10025635 +n02850732 +n03732020 +n02036711 +n07907429 +n03797896 +n03004824 +n12011620 +n10300303 +n03105467 +n03767745 +n07868508 +n07868200 +n03788047 +n07886057 +n04559451 +n09845401 +n04373704 +n02676938 +n02565324 +n02667478 +n02122878 +n03244047 +n01747589 +n04320973 +n13205058 +n02379430 +n11959632 +n10183931 +n07683490 +n10055410 +n04370288 +n03273551 +n13900422 +n07899434 +n04053677 +n07740461 +n11879722 +n04282494 +n02981911 +n03449451 +n07581249 +n03965456 +n11808468 +n13881644 +n11725973 +n12091213 +n13193856 +n02873520 +n02754656 +n02431976 +n01324431 +n02385214 +n01888411 +n12680864 +n07731284 +n04337287 +n07631926 +n02549248 +n04395024 +n07585557 +n02776825 +n09460046 +n12023108 +n00475403 +n10098517 +n07902336 +n03683708 +n02412210 +n04397452 +n04583212 +n13869547 +n03632577 +n01616086 +n02763901 +n08256735 +n03015478 +n02084732 +n12178896 +n11966215 +n07605380 +n13869788 +n01847170 +n07744811 +n01854700 +n00444937 +n10422405 +n07801892 +n09688804 +n11879054 +n02802215 +n07908411 +n07822518 +n01558594 +n07935737 +n10730728 +n04436329 +n04294879 +n04972350 +n12911440 +n13886260 +n07578093 +n02537525 +n03703730 +n09607630 +n13865904 +n02360282 +n11731659 +n04126066 +n04212165 +n11618290 +n07588574 +n09269472 +n11896722 +n02892304 +n03487642 +n02028342 +n03321563 +n03135030 +n03522100 +n03253886 +n04095109 +n06470073 +n12603449 +n10644598 +n10260800 +n01535469 +n09696456 +n03553019 +n03963198 +n11918473 +n10314517 +n03002341 +n07574923 +n10421470 +n05716342 +n03244231 +n01730563 +n11691857 +n12807251 +n12345899 +n03142679 +n01531512 +n12307240 +n07835457 +n04535370 +n00451186 +n12481458 +n03434188 +n09734185 +n04578934 +n04167346 +n02747802 +n03459328 +n03301940 +n01562014 +n07690431 +n10642596 +n03696065 +n12781940 +n02759257 +n04392764 +n04218564 +n03499907 +n01536780 +n09751895 +n03235042 +n04570815 +n12070381 +n09448690 +n07625061 +n10178216 +n04560113 +n09457979 +n03858085 +n02421792 +n02944579 +n10085869 +n09718811 +n04103206 +n04239786 +n04501947 +n01321123 +n02390015 +n03964495 +n01554448 +n02925107 +n03028596 +n12483625 +n03227317 +n10701644 +n11968704 +n03900393 +n01851038 +n02276078 +n03132776 +n07585906 +n04480033 +n07880458 +n12887293 +n07921239 +n03307037 +n04595028 +n04244379 +n13131028 +n10313724 +n09436708 +n02694045 +n09941787 +n00449796 +n01817346 +n07928696 +n03401279 +n12901724 +n11646167 +n07682477 +n09415671 +n07900225 +n03607029 +n02692232 +n11834654 +n07935379 +n12437930 +n03762434 +n07922764 +n03595523 +n04546340 +n10686885 +n03516844 +n03767112 +n09896685 +n03859608 +n03149686 +n07920872 +n12388143 +n10406391 +n04233715 +n04373089 +n02023992 +n01947396 +n12115180 +n00479616 +n03962852 +n02392434 +n12414035 +n14976871 +n03201776 +n10665587 +n03600285 +n04402449 +n08539072 +n03629231 +n12860365 +n03488438 +n03337383 +n12455950 +n10384392 +n02953455 +n03101796 +n07919572 +n03233744 +n01578180 +n01756508 +n04556533 +n02962843 +n02882190 +n03731483 +n01850873 +n05260240 +n03111177 +n09836519 +n03030557 +n11789066 +n02788572 +n07903101 +n04067818 +n07840804 +n01567678 +n12427184 +n03333610 +n02416964 +n10607291 +n07936548 +n05451384 +n02968074 +n07605597 +n02704949 +n07609215 +n01951274 +n07696977 +n03180384 +n04303357 +n03291741 +n02207805 +n10123844 +n03420345 +n12384227 +n02758863 +n02047975 +n03978966 +n03549199 +n04275175 +n09294877 +n09836343 +n11970586 +n02010728 +n10369317 +n12681893 +n03192543 +n12413165 +n12174521 +n11916696 +n10042845 +n07822197 +n04968749 +n10323634 +n12849416 +n02814774 +n05538625 +n03078802 +n12230794 +n07726095 +n03051249 +n12005656 +n11876432 +n12164881 +n09711435 +n01622483 +n09896170 +n07684289 +n03368352 +n07910048 +n03159535 +n00466377 +n01541386 +n11647703 +n09752023 +n07903731 +n12249542 +n03794798 +n11786131 +n02852043 +n10493685 +n09846894 +n01752585 +n01536186 +n07618432 +n09859152 +n02065026 +n02382635 +n07867616 +n03885788 +n04255586 +n03275681 +n11961100 +n12485981 +n04495698 +n03293741 +n13902048 +n03254862 +n07903962 +n01594787 +n11962272 +n03284886 +n07842202 +n10157128 +n02405302 +n04443766 +n06266633 +n02519862 +n01487506 +n03373943 +n04247876 +n04327204 +n03349771 +n09260907 +n10092794 +n12223764 +n03504723 +n11926833 +n01820052 +n13032381 +n03889871 +n03209359 +n04608923 +n15093137 +n15091304 +n03688405 +n09905185 +n03543112 +n11611356 +n03885028 +n03234164 +n07594066 +n02396014 +n03456186 +n09874725 +n11601333 +n02917521 +n03055857 +n02804123 +n12352844 +n12866002 +n09858165 +n12037691 +n02565072 +n04477387 +n02008643 +n07867021 +n04119360 +n09893191 +n02944146 +n12435649 +n13197274 +n04974859 +n07751004 +n12003696 +n02762508 +n02680512 +n01743086 +n06998748 +n10607478 +n07613815 +n01559477 +n01859852 +n03239054 +n04466871 +n05263183 +n13173882 +n07897438 +n12427757 +n04400737 +n03291963 +n07682808 +n11692265 +n04130143 +n09445289 +n07696839 +n03835197 +n12821895 +n09734639 +n03365374 +n04305210 +n04962240 +n09871867 +n07897750 +n07616386 +n09443281 +n03641569 +n13882563 +n07680761 +n10498816 +n04034262 +n03533014 +n07928790 +n07690152 +n10060352 +n04124370 +n12453186 +n04509171 +n03013580 +n10604979 +n12515711 +n04971211 +n07693223 +n03786715 +n07894703 +n02761834 +n04232800 +n03437741 +n04045644 +n14976759 +n03042697 +n12557681 +n06275095 +n11678010 +n01586941 +n07684517 +n07822845 +n03483823 +n09951616 +n03180865 +n07861557 +n03644378 +n12848499 +n11962667 +n03886762 +n04238128 +n11979964 +n13915113 +n12791329 +n12457091 +n03341153 +n10267865 +n03484576 +n10186216 +n07612137 +n03843438 +n11807525 +n11931540 +n02027897 +n07614730 +n04116294 +n03469903 +n10017272 +n03688605 +n07860103 +n03981566 +n01888045 +n03345837 +n11998888 +n02071636 +n02726017 +n04310157 +n04607869 +n01622959 +n08524735 +n03119203 +n12031927 +n03610524 +n02807616 +n04056180 +n03233905 +n03374473 +n14810561 +n11944954 +n03121431 +n09750891 +n08505018 +n10727171 +n12357485 +n12571781 +n12067193 +n07586604 +n02086753 +n03548086 +n02560110 +n07804900 +n02880393 +n04208427 +n12931542 +n01594968 +n05218119 +n03520493 +n03727605 +n12687698 +n03612965 +n04135315 +n07730320 +n10540114 +n07599911 +n01323493 +n02115096 +n04590263 +n12043836 +n02861387 +n09836786 +n04966941 +n02816768 +n13131618 +n10701962 +n02919792 +n03442597 +n04325041 +n03333129 +n04091693 +n04950952 +n10631309 +n04177931 +n13234678 +n01970667 +n07748416 +n07893642 +n07691650 +n03660909 +n04145863 +n11945514 +n10334009 +n12336973 +n03954393 +n04558478 +n09899929 +n03487533 +n07816575 +n07877187 +n07863547 +n01603812 +n02098906 +n04973585 +n03674440 +n04371050 +n12243109 +n07871234 +n02928049 +n07574504 +n07889274 +n12141167 +n04543996 +n03080633 +n03423479 +n07879659 +n04380916 +n10514429 +n07584423 +n04009801 +n12479537 +n07606538 +n07698543 +n12353754 +n10132035 +n03367545 +n04245508 +n09811852 +n02024763 +n04052442 +n10120330 +n12352639 +n12606438 +n07752966 +n09772930 +n02535759 +n11737534 +n10345015 +n12427566 +n09705784 +n04112654 +n02985963 +n03758089 +n12953484 +n07906572 +n02881757 +n12739332 +n03718458 +n03407865 +n07775050 +n03210552 +n09452395 +n09789566 +n10566072 +n10559996 +n07826930 +n12414932 +n01887474 +n03026907 +n07751148 +n10223177 +n03957420 +n03788601 +n12244819 +n12421137 +n04266162 +n10038409 +n02981024 +n03228967 +n11825351 +n12058822 +n11963932 +n03041449 +n03046029 +n07590502 +n02932523 +n02152881 +n04970398 +n07887967 +n12812478 +n12421917 +n02708711 +n11870747 +n04290507 +n07934282 +n01608265 +n12070583 +n03205574 +n02305085 +n07866015 +n02960903 +n10098624 +n00481803 +n07938007 +n02693246 +n03923379 +n04103665 +n11792742 +n12489815 +n04971313 +n01668892 +n01055165 +n03215508 +n12104501 +n07899292 +n12822955 +n07713074 +n03842012 +n02449350 +n07868955 +n02835829 +n12283542 +n04525584 +n07910656 +n11625003 +n03987266 +n02805983 +n15091846 +n09736945 +n04973816 +n02439398 +n01519873 +n07899003 +n03019938 +n07582152 +n01885498 +n12108871 +n02934451 +n04327682 +n07696625 +n09750770 +n12084890 +n03960374 +n07585107 +n01570839 +n11905392 +n06277135 +n07842044 +n03751269 +n04398951 +n12861892 +n12649539 +n07596967 +n07580592 +n12845413 +n07690739 +n07804657 +n04334105 +n03779128 +n03268918 +n03066359 +n02744323 +n12596148 +n04272389 +n07832416 +n10210911 +n01548865 +n03221351 +n15091669 +n07878926 +n07607967 +n12171966 +n02846141 +n07576781 +n02922292 +n10092643 +n01732614 +n02578771 +n02864593 +n03537241 +n09635534 +n03268645 +n07852833 +n13873917 +n12640839 +n03506727 +n10536416 +n09976429 +n10692482 +n07600285 +n04156946 +n07818689 +n02605703 +n02710429 +n02890351 +n03408054 +n03121298 +n02731629 +n12450840 +n04061681 +n10153414 +n07648913 +n07891309 +n01562265 +n14973585 +n01610226 +n06267991 +n03302938 +n07822323 +n07826091 +n02764398 +n10406266 +n09282208 +n01734104 +n04283096 +n03530910 +n11542137 +n02610664 +n03856012 +n01531811 +n07862611 +n11625632 +n12643313 +n02469248 +n03333711 +n02907082 +n02122430 +n01559804 +n09744161 +n10187990 +n12015525 +n07844867 +n07887304 +n02878425 +n02009380 +n11448153 +n10655594 +n12566954 +n11901977 +n03999160 +n02389779 +n07928488 +n12785889 +n04281375 +n03745146 +n03224603 +n04594828 +n12835331 +n09715427 +n11615026 +n09972010 +n04038231 +n02379329 +n03445326 +n10753442 +n04249882 +n11727738 +n07866723 +n04282992 +n11621281 +n01566645 +n03919430 +n11980682 +n03480719 +n11625804 +n10467395 +n09436444 +n07867751 +n03684611 +n03788498 +n12062626 +n07808904 +n07690585 +n03865557 +n10711766 +n10465831 +n04380255 +n12166128 +n04432203 +n07892418 +n10432441 +n12991184 +n04209613 +n04459773 +n09666883 +n07807472 +n09873899 +n12939874 +n04545748 +n09637339 +n07919441 +n03987376 +n03645577 +n03437430 +n10671613 +n02964843 +n09707289 +n11700058 +n03877351 +n03518445 +n07643200 +n02140049 +n12683791 +n12418221 +n04154152 +n03397947 +n03238131 +n11851839 +n04545858 +n07744682 +n02995871 +n07593199 +n03543394 +n10293332 +n12658481 +n11599324 +n02705201 +n03920867 +n08249459 +n02876084 +n03937835 +n01397871 +n03849679 +n12016567 +n04208936 +n07696728 +n13148208 +n01904029 +n08659861 +n07878785 +n07827130 +n03390983 +n02624807 +n03319745 +n03994614 +n00446493 +n12477583 +n02920658 +n04602956 +n02688273 +n07577538 +n04350581 +n09283405 +n04074185 +n04495843 +n03538179 +n03454885 +n03878211 +n10308168 +n08518171 +n02660208 +n07904760 +n07928367 +n10174445 +n02137015 +n02863426 +n07700003 +n04015908 +n03946076 +n11725821 +n01794344 +n04364160 +n01663782 +n04283255 +n02822064 +n04406239 +n02782681 +n11990313 +n03563460 +n02957008 +n07889814 +n07896060 +n03683079 +n04278447 +n13011595 +n11810358 +n03836451 +n12827537 +n03545470 +n03213538 +n07929351 +n03471190 +n02882301 +n03625943 +n03397087 +n11955896 +n04097373 +n03145522 +n03034405 +n02889646 +n02928299 +n09652149 +n01641391 +n04593524 +n07651025 +n03719343 +n03884778 +n03452594 +n02174659 +n12345280 +n03039827 +n03309687 +n11635433 +n02057330 +n01664990 +n09779790 +n02011016 +n09689958 +n07770763 +n03010915 +n03443912 +n02946509 +n13050397 +n03031012 +n04217546 +n04124202 +n12766869 +n04177041 +n12050533 +n03251932 +n03086580 +n03918737 +n04386792 +n03176594 +n01577035 +n01669654 +n01818832 +n10441962 +n03885904 +n03724756 +n02925666 +n03549589 +n03062122 +n02828427 +n12604228 +n03624400 +n07725888 +n03873699 +n01503976 +n02887079 +n03610098 +n02940385 +n04610013 +n03652100 +n04496872 +n04008385 +n02583890 +n10476467 +n03395514 +n03306385 +n04228581 +n02389261 +n12576323 +n01579149 +n01623425 +n02593019 +n03995265 +n02124484 +n12745386 +n04355267 +n02643836 +n01614343 +n03810952 +n04058594 +n12278650 +n03474779 +n02823510 +n00442437 +n12039317 +n04574067 +n03762602 +n02153109 +n03518943 +n04289827 +n02288268 +n07749969 +n04132985 +n03213826 +n04307986 +n03567066 +n02049088 +n04408871 +n03522003 +n09305898 +n04266375 +n08571898 +n03039259 +n01587526 +n03261603 +n00464277 +n02627532 +n02992368 +n03640850 +n03037404 +n04525191 +n02106854 +n07772147 +n04173511 +n12761284 +n03257210 +n02813544 +n07740342 +n04066270 +n03070059 +n03616428 +n02904233 +n03209910 +n04389854 +n03078995 +n03193260 +n01488038 +n01754533 +n12629305 +n02055107 +n11664418 +n04228693 +n03353951 +n03440682 +n03025250 +n03300216 +n02042046 +n04226826 +n03342015 +n03090000 +n02050313 +n03492250 +n01535690 +n01572654 +n03465718 +n02879309 +n06278338 +n04113406 +n03695857 +n09720256 +n01860002 +n02851939 +n09828216 +n02564270 +n03528901 +n02542432 +n11978961 +n01670802 +n03956623 +n01612275 +n09376786 +n03222318 +n02813645 +n02213543 +n13898207 +n03616763 +n03616979 +n11904109 +n04212282 +n04608435 +n02042472 +n04198453 +n03216402 +n02015357 +n12282737 +n02699629 +n12866635 +n02048353 +n02933340 +n01793715 +n12001707 +n02878222 +n03187037 +n03105306 +n04080705 +n04254009 +n01623880 +n02839592 +n03436182 +n01591123 +n01318279 +n03002816 +n13155095 +n03141702 +n03775388 +n12165170 +n03322836 +n03259401 +n04471148 +n03911767 +n12585629 +n04317325 +n04257986 +n03133050 +n02035210 +n12891305 +n11882426 +n04491388 +n12948251 +n03498781 +n04262161 +n03775636 +n09915651 +n07584332 +n07852614 +n11626152 +n03901750 +n09723067 +n04265904 +n09920283 +n02397744 +n03253796 +n07712959 +n03898129 +n01743936 +n02075612 +n04560292 +n03479397 +n04334365 +n04357121 +n10145902 +n03844673 +n09854421 +n12687957 +n12598027 +n03944138 +n01839750 +n07722888 +n04258859 +n03088389 +n03351434 +n03509608 +n01677747 +n03145147 +n12046815 +n03505133 +n01629962 +n03333252 +n03993703 +n02962061 +n04529962 +n03463666 +n07681691 +n12160857 +n04187233 +n09331251 +n11614713 +n04376400 +n12301445 +n12633994 +n03883524 +n11614420 +n13062421 +n03645011 +n03293863 +n11640132 +n02579928 +n02854739 +n04461437 +n07729384 +n02977936 +n02836392 +n03593122 +n01666228 +n07820683 +n07568502 +n11910460 +n09348460 +n09712324 +n02403740 +n03482877 +n04370774 +n07750146 +n12992177 +n03152303 +n04134008 +n09805324 +n01611800 +n04374315 +n07586099 +n02032222 +n01979874 +n04350769 +n02907873 +n03016609 +n02543565 +n03256166 +n03016737 +n02419336 +n03268790 +n03559999 +n07765999 +n04607035 +n02416104 +n02123917 +n12484784 +n03225108 +n10739391 +n03506880 +n02918831 +n03045228 +n12516828 +n01314663 +n04172342 +n02768226 +n12368028 +n01500476 +n01558149 +n03604156 +n04035912 +n02359915 +n12261571 +n03875955 +n01887623 +n03871371 +n03390786 +n12494794 +n03826039 +n04465358 +n03838298 +n03165466 +n04229737 +n01321770 +n04354026 +n02998003 +n04114844 +n10611613 +n03600475 +n01909906 +n00466880 +n04284869 +n07722485 +n04496614 +n03298716 +n02285801 +n04081699 +n07765208 +n12659539 +n11618525 +n11757653 +n07727048 +n03913343 +n12070016 +n02697675 +n04284572 +n02595702 +n04482297 +n03516996 +n03704549 +n02040266 +n04476116 +n01323261 +n03823216 +n07696403 +n03226880 +n09734535 +n03950537 +n01671479 +n03049924 +n12593994 +n04568841 +n03604400 +n01837072 +n01754370 +n03122202 +n12338454 +n04094720 +n04150980 +n03429682 +n03884926 +n03378005 +n02434954 +n03461288 +n02893692 +n04472563 +n10472129 +n04590021 +n07739344 +n04162433 +n03395859 +n12059314 +n03498662 +n03678729 +n02927764 +n02770211 +n11710393 +n07730207 +n04178190 +n07772935 +n03801880 +n04414675 +n12729521 +n12203529 +n04122578 +n04575824 +n06267655 +n03698360 +n02804515 +n02431337 +n08598568 +n02893608 +n02270623 +n00479440 +n11616662 +n02884994 +n04305323 +n02407625 +n04476831 +n04222307 +n03179910 +n11623967 +n00446311 +n00454983 +n02886434 +n12279458 +n03723781 +n11816121 +n02403231 +n11808299 +n07816296 +n03219483 +n02657694 +n00453478 +n02816656 +n02625851 +n04112752 +n03339529 +n12171316 +n02044517 +n04137773 +n01486838 +n03015149 +n12911673 +n03967270 +n03498441 +n11672269 +n03386870 +n11615967 +n02580679 +n01681653 +n02793199 +n02824319 +n10727458 +n02555863 +n01533000 +n02175916 +n12064389 +n04383015 +n02469472 +n03101664 +n03623338 +n12295796 +n02869249 +n01792042 +n03447075 +n04453390 +n04382438 +n04112252 +n03332393 +n12729729 +n01851207 +n04269270 +n12333771 +n06272612 +n03135532 +n02927887 +n11711537 +n12301180 +n04107743 +n01813948 +n03282295 +n09714694 +n00483409 +n01504344 +n04279353 +n04040373 +n12658308 +n04134523 +n10104064 +n12056601 +n04525417 +n07819166 +n12263038 +n02072798 +n03125057 +n03367410 +n04000592 +n03549897 +n01877606 +n01564914 +n12307076 +n02855925 +n03176763 +n12271933 +n04121728 +n07690511 +n02825442 +n04442441 +n01630901 +n03088580 +n02499808 +n10675010 +n01531971 +n02273392 +n01526521 +n01531344 +n03667664 +n02888270 +n04412416 +n07733394 +n04559910 +n04105704 +n11792341 +n04201064 +n01693175 +n04555291 +n02908773 +n01976868 +n03529175 +n03365231 +n03622839 +n04258333 +n03327133 +n03425769 +n12477747 +n03718935 +n11727540 +n07933799 +n03030262 +n12043673 +n02619550 +n07937461 +n12198286 +n08560295 +n12402348 +n01733957 +n12344700 +n02763604 +n11925303 +n01557962 +n03927299 +n11611758 +n03035252 +n09454412 +n04004990 +n03456299 +n02175569 +n03668279 +n12352990 +n03507241 +n01534155 +n12278371 +n02499022 +n03822767 +n01318381 +n04024983 +n04277493 +n11934616 +n02027075 +n11611561 +n03454442 +n02236355 +n01732789 +n07722052 +n01489501 +n04409625 +n10563403 +n01817263 +n07757511 +n03770316 +n02977438 +n01840775 +n03607923 +n03322704 +n02375302 +n01614038 +n01646555 +n03952576 +n02946824 +n12847008 +n03016389 +n11809594 +n03165096 +n03839671 +n02687821 +n01689081 +n03822656 +n02597608 +n12336727 +n01579578 +n03631922 +n03904909 +n11658331 +n04224543 +n12621410 +n03870672 +n04252331 +n09720842 +n01396048 +n11988596 +n00483205 +n02871005 +n01597022 +n02382039 +n07743902 +n02358890 +n07877961 +n05263448 +n01862399 +n04136800 +n10624540 +n11990167 +n02731398 +n03366974 +n03490006 +n01561732 +n02626265 +n10627252 +n12402051 +n08517676 +n10488656 +n03099274 +n03718581 +n11806219 +n01830042 +n07728585 +n03732114 +n10755080 +n03359285 +n07720277 +n03354207 +n01596273 +n04416005 +n01847253 +n07733567 +n09725653 +n04274985 +n00449977 +n07772274 +n12063639 +n01530439 +n01322508 +n04397768 +n07273802 +n04261281 +n10524076 +n01678343 +n03410938 +n01797020 +n02388832 +n07719616 +n03639497 +n09787765 +n07721018 +n11818069 +n04185529 +n11644462 +n12074408 +n00483848 +n01583495 +n11891175 +n03347617 +n03308481 +n02535258 +n07750872 +n07748157 +n02855701 +n04584373 +n02461830 +n02912557 +n12277578 +n03604311 +n03643253 +n03031152 +n04039742 +n03435743 +n13908201 +n04150153 +n03250405 +n01410457 +n02357401 +n12588780 +n12729315 +n01690149 +n02538216 +n03171228 +n02424909 +n06274760 +n03775747 +n04211857 +n12429352 +n12272239 +n11759853 +n03401129 +n12649317 +n02625258 +n12651611 +n03603442 +n02803934 +n03861271 +n02605936 +n02018368 +n12711984 +n02811936 +n04612026 +n01339471 +n02923682 +n09194227 +n04346157 +n03939178 +n12635532 +n01593028 +n01793249 +n02380464 +n12400720 +n07708398 +n12020941 +n12492106 +n12850336 +n12749679 +n02892948 +n12591017 +n03193423 +n01791463 +n11979527 +n12134025 +n12167075 +n09308743 +n13108545 +n01618503 +n07827284 +n07724492 +n02338145 +n04533946 +n01586020 +n07598256 +n01603953 +n12646740 +n03067518 +n04046277 +n01532511 +n07769584 +n11644046 +n12753573 +n02681392 +n08492461 +n07749446 +n04409384 +n01791954 +n12330891 +n04560882 +n10145480 +n04250473 +n02655848 +n02903126 +n11736851 +n11901294 +n12865824 +n03870105 +n00449892 +n04240752 +n11851258 +n04200537 +n12049562 +n01521399 +n03565830 +n07860447 +n03067212 +n01664674 +n07561590 +n02727141 +n02324514 +n02372952 +n01584853 +n07766173 +n11811706 +n03097362 +n04200258 +n02732572 +n01853195 +n12282527 +n09838621 +n02764505 +n04256891 +n12337617 +n12635955 +n07831267 +n11628793 +n12316572 +n07807834 +n02037869 +n01821869 +n02820556 +n04517211 +n01839086 +n03842986 +n07698401 +n02386224 +n07841800 +n01830915 +n11616486 +n11902389 +n03427202 +n12727101 +n01851573 +n02125494 +n07746186 +n11628087 +n07746551 +n03943115 +n11892029 +n02861022 +n11733312 +n01852329 +n09392402 +n12336224 +n07887099 +n03403643 +n04414199 +n07895100 +n02264232 +n02317781 +n07823460 +n07755929 +n02524202 +n04324297 +n11627512 +n01585715 +n02922578 +n00479887 +n02687423 +n02416880 +n11784126 +n12073991 +n01853870 +n01561452 +n04187970 +n10300154 +n02520147 +n12294124 +n07743224 +n12066018 +n11634736 +n02041678 +n11626585 +n02386141 +n03986949 +n07860331 +n12356023 +n12072722 +n03082280 +n12083113 +n12979829 +n01448594 +n03007444 +n07858978 +n01641739 +n02043333 +n12020736 +n02751215 +n04528079 +n01538200 +n07925608 +n12091550 +n03742019 +n03518305 +n01642539 +n03414029 +n04363991 +n03767966 +n02596067 +n01586374 +n02885882 +n04080138 +n11617631 +n02033779 +n09451237 +n02310585 +n12648045 +n03955489 +n01752736 +n07899899 +n02299505 +n01579410 +n02156871 +n02998841 +n03759661 +n02050809 +n02683454 +n11621950 +n02910145 +n04967801 +n07896661 +n11906917 +n12275675 +n11611233 +n07736692 +n02312640 +n12588320 +n04399537 +n12757303 +n04197781 +n12717224 +n11635152 +n03122295 +n01792955 +n13133932 +n02518324 +n01584695 +n02915904 +n02967294 +n04345201 +n03019434 +n02470238 +n03049782 +n03101517 +n12709688 +n03716887 +n02422391 +n12638753 +n00288384 +n02162561 +n02053584 +n01317294 +n03334291 +n07814634 +n12273768 +n12406715 +n11644226 +n01646802 +n03460147 +n12338796 +n01972541 +n02147947 +n03890093 +n04127395 +n01581984 +n01681328 +n02213239 +n04582869 +n03254189 +n03274265 +n03186285 +n11839823 +n01624833 +n09792969 +n07891189 +n12023726 +n07619208 +n03466600 +n01849676 +n12190869 +n03079136 +n12317296 +n13001930 +n00477639 +n02944459 +n03903733 +n04131208 +n12710295 +n12180885 +n11612349 +n03443149 +n03982331 +n04264765 +n12642090 +n03237416 +n13868944 +n04046400 +n11705171 +n11979715 +n12597134 +n01609956 +n01568294 +n01469103 +n00443692 +n01606672 +n04556408 +n07690019 +n03977592 +n03358726 +n12696492 +n01573240 +n11632619 +n01772664 +n03453231 +n04179712 +n03646020 +n01812662 +n04306592 +n07724654 +n13908580 +n02903852 +n04284438 +n13132656 +n04317063 +n07829248 +n01589718 +n02654745 +n12294331 +n12515925 +n07900825 +n07721195 +n04189282 +n11907689 +n01624537 +n12333530 +n07762244 +n11757851 +n01599159 +n04038338 +n01568892 +n12691661 +n09744834 +n04307767 +n03120778 +n07920540 +n03781683 +n04185804 +n12080820 +n04354182 +n07574426 +n02579303 +n03046802 +n12078172 +n03210245 +n01614556 +n02304432 +n07713267 +n09724656 +n02861147 +n12755387 +n01483830 +n12921868 +n12026018 +n07817871 +n12062781 +n04241573 +n11621727 +n03376159 +n11815721 +n13007034 +n03540090 +n00450866 +n11619455 +n01528845 +n01568720 +n12743352 +n02871314 +n03606251 +n01490670 +n04246060 +n02053425 +n10780284 +n01915700 +n04510706 +n00456465 +n01563945 +n11809094 +n09855433 +n04112579 +n03855333 +n09809925 +n03413684 +n02123478 +n12070712 +n03651843 +n02032355 +n01591005 +n01646648 +n02752615 +n02415829 +n03283221 +n04368496 +n01573360 +n02321170 +n10348526 +n04446844 +n07763792 +n12077944 +n04431025 +n02895438 +n10082687 +n07714188 +n02262449 +n03090172 +n12491017 +n01558461 +n12754781 +n04070415 +n04297098 +n03424862 +n01970164 +n09833536 +n01793435 +n01670535 +n09894445 +n09676247 +n01548492 +n12501202 +n03250089 +n03358380 +n02578928 +n12020184 +n02301935 +n03393017 +n12340755 +n01849863 +n01748906 +n03075946 +n01810268 +n01984245 +n04555400 +n12286988 +n04097760 +n02050586 +n12104238 +n01679962 +n02709101 +n01569060 +n12790430 +n01757901 +n13199717 +n11815918 +n07827410 +n02970534 +n12942572 +n07924276 +n04103918 +n11704093 +n07908647 +n07601686 +n12172906 +n04084889 +n02381261 +n02299157 +n11978713 +n12460957 +n02963503 +n03272810 +n12469517 +n03443005 +n01797307 +n02952237 +n11908549 +n13912540 +n03428226 +n10276477 +n01757343 +n01443243 +n01607600 +n03580518 +n12709103 +n07579688 +n04329834 +n12710415 +n11808932 +n10583790 +n02213788 +n11622184 +n12596709 +n02216211 +n07721942 +n07765361 +n01848453 +n11724109 +n02028451 +n02935017 +n12046028 +n10629939 +n00441073 +n07900958 +n12451399 +n02823964 +n04210120 +n01848840 +n10485883 +n07767709 +n02432704 +n11622591 +n03210372 +n07848196 +n11992806 +n02953197 +n07620689 +n01521756 +n03571625 +n03158186 +n12647560 +n02065407 +n01572782 +n09890749 +n05581932 +n07754451 +n03350204 +n13044375 +n12294723 +n12482893 +n04434531 +n12989938 +n12196336 +n01701859 +n07746334 +n11941924 +n02047411 +n12650379 +n10486166 +n01599556 +n01567879 +n12675876 +n01682435 +n02043808 +n12362668 +n12306089 +n02999138 +n01679626 +n03557270 +n01546039 +n11901759 +n01549053 +n11883328 +n06596727 +n03193107 +n11612018 +n03300443 +n03612010 +n03668488 +n12648888 +n01448291 +n11632167 +n10262445 +n09742101 +n09717233 +n04299370 +n03094159 +n04536595 +n03514693 +n02029706 +n02886321 +n07816052 +n04045255 +n01851731 +n02627292 +n01841288 +n02739889 +n02932693 +n03784896 +n04569063 +n07902799 +n03863108 +n02607470 +n13200651 +n07916183 +n01573898 +n04347119 +n10076604 +n13033577 +n01824035 +n03630262 +n04426316 +n03064250 +n12262018 +n12048399 +n12279772 +n04143140 +n07829331 +n12891643 +n01826680 +n12646605 +n13103877 +n02023855 +n03086868 +n04163530 +n03736470 +n04358117 +n13872822 +n03159640 +n01680655 +n11611087 +n03980478 +n02978478 +n01555004 +n12402840 +n07763987 +n04387706 +n04979002 +n03258330 +n09856671 +n11624192 +n01538059 +n02003839 +n12552309 +n10469874 +n01576076 +n03643149 +n04419868 +n04586581 +n00483508 +n03131967 +n01847407 +n07929172 +n09683757 +n03786621 +n04369282 +n12733870 +n11612575 +n11619227 +n03301833 +n02176439 +n01569971 +n07935043 +n02563792 +n02051059 +n04482177 +n11859472 +n11710136 +n04115144 +n07864934 +n07691758 +n02620167 +n07748276 +n03415486 +n07835921 +n00452152 +n01848323 +n12906214 +n12075010 +n01563449 +n01499396 +n01570267 +n12047345 +n07920989 +n07601572 +n02683558 +n04428634 +n04345028 +n12161969 +n03460040 +n02561514 +n02006364 +n03582959 +n11812910 +n13185269 +n04297847 +n07896165 +n01552813 +n12361946 +n02031585 +n12766595 +n11622368 +n11695599 +n11615387 +n02509197 +n12409470 +n01314388 +n11758799 +n09846469 +n02675219 +n04253057 +n04041243 +n12276628 +n04381724 +n01855188 +n02203152 +n04403925 +n11895092 +n11924849 +n04172904 +n11888800 +n01546506 +n07906718 +n01489920 +n03436417 +n03615655 +n07765073 +n02434190 +n02004492 +n12282235 +n12406488 +n11981192 +n10373390 +n13183056 +n04332074 +n12818346 +n07731006 +n02598573 +n02438580 +n01957335 +n03356982 +n10288964 +n02629230 +n02042759 +n12319414 +n01451426 +n03521675 +n02016066 +n01813532 +n13207335 +n11805544 +n04401828 +n02952109 +n03963294 +n10013811 +n12058630 +n01551711 +n01574560 +n01858780 +n10093818 +n03858183 +n01550172 +n03571280 +n02309242 +n10258786 +n01569423 +n10134178 +n08578517 +n04445327 +n03250279 +n02584449 +n03223553 +n04523831 +n04485423 +n02050442 +n04474035 +n04528968 +n02649546 +n01913166 +n09971273 +n04517408 +n02437482 +n03824713 +n03778817 +n07643026 +n01613177 +n12022054 +n07714448 +n07592768 +n00454493 +n03296328 +n02305929 +n03084834 +n03698815 +n12093600 +n08649711 +n03466493 +n04067658 +n03041114 +n03514451 +n01491006 +n04178329 +n03790953 +n03938401 +n02048115 +n07768858 +n03273740 +n10333601 +n05418717 +n12754003 +n02098806 +n03314608 +n01565930 +n12113195 +n12284821 +n12483427 +n04332580 +n10382710 +n03416094 +n02837887 +n03917198 +n14131950 +n04414476 +n11861641 +n11903671 +n01841441 +n09872066 +n01806467 +n04964799 +n00467320 +n01595974 +n03220692 +n01339083 +n01825278 +n11727358 +n04518343 +n11984144 +n07724269 +n02292692 +n02324850 +n01753032 +n01624115 +n11816649 +n07930062 +n02460451 +n12319204 +n04340521 +n12325234 +n01541102 +n02979836 +n00141669 +n01822300 +n11658544 +n12272883 +n03334382 +n11726707 +n03639077 +n07904934 +n03516367 +n03698723 +n03553248 +n11812094 +n03724417 +n01540566 +n02341974 +n11819912 +n07734555 +n02987379 +n03580845 +n12546962 +n02548247 +n12753245 +n07768423 +n12849279 +n11617090 +n02912894 +n07840027 +n12295033 +n12703383 +n02696165 +n10419785 +n04426427 +n03694639 +n11712282 +n04142999 +n01597737 +n03801533 +n01495493 +n07774719 +n03267113 +n01742821 +n03859170 +n03416640 +n03320959 +n12733218 +n02017725 +n13229543 +n09344324 +n04965451 +n01490112 +n10069296 +n12084555 +n04554406 +n04086446 +n02976249 +n02656032 +n02424486 +n02381609 +n09934337 +n04573937 +n07685399 +n02800497 +n02905152 +n02951703 +n07760153 +n03609397 +n00447463 +n03680512 +n02046939 +n03288886 +n11870418 +n03386544 +n07767171 +n07847453 +n12687044 +n01664492 +n03099147 +n03463381 +n02125081 +n12920204 +n03517647 +n02603540 +n12267411 +n11933546 +n11947802 +n04387095 +n12975804 +n02973904 +n13195341 +n04048441 +n11753143 +n03212114 +n03298858 +n04366116 +n01424420 +n10450161 +n01442972 +n07877299 +n04503593 +n04349306 +n12969425 +n12597466 +n03092656 +n07914995 +n03487886 +n12223569 +n01756733 +n13919919 +n04175147 +n02029087 +n03530511 +n02425887 +n03572107 +n03927539 +n03383099 +n04130907 +n01632601 +n07823105 +n10378026 +n02382850 +n07613266 +n03235180 +n02810782 +n12708654 +n11636835 +n02823124 +n03402941 +n12121610 +n03715114 +n04052658 +n00480366 +n12493208 +n04255163 +n12145477 +n01489709 +n12402596 +n01598074 +n03837606 +n02628062 +n04103364 +n03247083 +n02032480 +n07736256 +n12578916 +n09218315 +n02218371 +n03730334 +n02080146 +n03836906 +n02868638 +n02198859 +n12744387 +n02942460 +n11754893 +n12274358 +n02725872 +n09218494 +n03942920 +n07574780 +n02921756 +n01757115 +n02763306 +n11758122 +n10508141 +n02303284 +n04083800 +n13879049 +n12765115 +n12075830 +n02666943 +n11980318 +n07907037 +n12794135 +n02333909 +n03870980 +n07586718 +n11923174 +n10782471 +n01493146 +n12294871 +n11726269 +n12932173 +n07825972 +n12732009 +n03572321 +n07682197 +n03423306 +n12495895 +n03545756 +n03557692 +n03785237 +n07902937 +n09899671 +n12061614 +n07902443 +n01449374 +n12632335 +n03474896 +n03539433 +n04310904 +n03902482 +n12006930 +n03285578 +n04200000 +n03912218 +n07821260 +n03548626 +n03223686 +n11826198 +n03165616 +n02104280 +n09981278 +n09382099 +n03732458 +n03987990 +n09946814 +n12270741 +n07737745 +n04172776 +n10189278 +n03543012 +n12629666 +n02180875 +n04087432 +n12961879 +n03321954 +n12528549 +n02424085 +n09843443 +n03846677 +n12304703 +n09873473 +n03410571 +n03041810 +n02425228 +n01562451 +n03615790 +n10081204 +n03985881 +n07842130 +n02890513 +n03649797 +n02381004 +n12560621 +n12523475 +n07687626 +n11905749 +n11759404 +n12905412 +n03542605 +n03983612 +n12573474 +n11972291 +n03767459 +n02698634 +n12713866 +n13084834 +n02202006 +n13108323 +n02631475 +n10737103 +n03637898 +n03069752 +n12400489 +n09692915 +n10242328 +n02794664 +n12465557 +n12085267 +n03348868 +n12754981 +n02745611 +n10504206 +n12073554 +n02835724 +n04605572 +n02825961 +n03528523 +n12116429 +n02973805 +n12708941 +n01544704 +n04180229 +n09403211 +n08242223 +n02146371 +n12127768 +n09770359 +n03295246 +n01757677 +n04385799 +n02584145 +n07909593 +n12587132 +n13029326 +n04184316 +n07903643 +n01848555 +n10750031 +n02332156 +n12703557 +n03196990 +n12406902 +n02768973 +n12416073 +n02147591 +n09724533 +n09693982 +n12687462 +n01982068 +n03435991 +n03272125 +n07713763 +n03018712 +n03648431 +n03336575 +n07854184 +n12806015 +n07879174 +n03984643 +n03147280 +n02699915 +n07617708 +n01533651 +n12483841 +n01697611 +n02576906 +n03724066 +n03935116 +n09782397 +n01599269 +n10672371 +n12066630 +n03178674 +n15086247 +n03523987 +n02826068 +n12580654 +n02358390 +n01647640 +n10259997 +n03738066 +n13915023 +n02639605 +n03174450 +n12269406 +n09874428 +n03432061 +n04386051 +n03923918 +n04592465 +n12480456 +n10333439 +n04206790 +n01443831 +n02967626 +n07733712 +n03746155 +n12947313 +n11690254 +n12244650 +n12670758 +n08658309 +n12710693 +n11860555 +n03485198 +n03047799 +n04461570 +n07600177 +n02126640 +n12704343 +n02866386 +n03008976 +n04532831 +n03465426 +n12691428 +n01641206 +n04962062 +n03254046 +n04425804 +n02014524 +n03439348 +n02538010 +n11603246 +n12265600 +n12277800 +n04016240 +n12086192 +n09650729 +n01549641 +n03112719 +n04961062 +n02710324 +n12049282 +n12362274 +n11969607 +n12856680 +n02201000 +n07863802 +n03360622 +n07601809 +n04354487 +n12898774 +n12939282 +n03109693 +n12867826 +n12441390 +n12915811 +n12879527 +n04137355 +n04131368 +n03527149 +n10164492 +n09932508 +n12426623 +n12575812 +n02557318 +n10263790 +n04309548 +n00476235 +n04194127 +n11876634 +n10327987 +n03499354 +n02616851 +n04464615 +n03615406 +n02744844 +n11732567 +n10347446 +n09752519 +n04228215 +n10004718 +n07899533 +n12030908 +n15102894 +n12044467 +n11711764 +n02610066 +n03415749 +n04562496 +n02034295 +n02297442 +n03566193 +n12506991 +n07774842 +n12827270 +n14908027 +n12242409 +n04072960 +n02829596 +n12496427 +n02266050 +n13108481 +n12473840 +n08677424 +n12076223 +n15091473 +n02815749 +n04549028 +n12558425 +n12023407 +n04179824 +n02378541 +n03188725 +n12517445 +n07573347 +n02004131 +n11921395 +n12570972 +n10602470 +n12095647 +n03854421 +n02450295 +n02792409 +n03543735 +n12836337 +n12204175 +n12152722 +n07900734 +n12517642 +n02775039 +n12607456 +n03376938 +n12179122 +n09873348 +n01847978 +n07888816 +n10453184 +n09675922 +n01851895 +n12865562 +n01797601 +n03711044 +n02738859 +n12064591 +n04033425 +n08551296 +n01650690 +n01537895 +n04207151 +n10087434 +n12261808 +n09438844 +n10364198 +n01814755 +n01583209 +n12270946 +n11892817 +n03344642 +n04117464 +n07847917 +n04003241 +n10362319 +n10477713 +n03495570 +n07560542 +n04363777 +n04534359 +n02404906 +n03349892 +n07712267 +n02960352 +n07866277 +n07857170 +n00324978 +n02755823 +n03150511 +n04211528 +n01899894 +n07588299 +n11874081 +n03425325 +n04506506 +n11949402 +n02952374 +n03309110 +n12159388 +n07591049 +n03068998 +n03228254 +n10279018 +n04173046 +n07728053 +n13052931 +n01597906 +n12368451 +n02767665 +n09435739 +n03915900 +n09728285 +n03292603 +n03331077 +n07817160 +n07917392 +n12540250 +n04153025 +n10209082 +n03968581 +n12676534 +n11824146 +n03521899 +n01853666 +n04292921 +n12332030 +n03984759 +n02863014 +n07801091 +n07723177 +n03289660 +n01533481 +n04488202 +n03468821 +n02382338 +n03543254 +n01961985 +n07915918 +n03703862 +n02771004 +n02047045 +n03877674 +n13141415 +n03529629 +n02240517 +n03675235 +n04491638 +n12384037 +n04419642 +n03019685 +n07591586 +n04496726 +n12985420 +n12927013 +n12196694 +n03473227 +n11621547 +n02988066 +n10451450 +n07729828 +n09618760 +n12196527 +n01555305 +n12830222 +n11950877 +n13190747 +n12160303 +n12390099 +n02818135 +n03163381 +n04554211 +n03244919 +n07897975 +n03386726 +n04290615 +n02011281 +n12407890 +n04123448 +n07904865 +n03447358 +n02393940 +n07931870 +n02937958 +n04318787 +n04587327 +n12807409 +n04112430 +n07560193 +n12774299 +n02618827 +n07854982 +n03757604 +n03817191 +n12793494 +n02324431 +n03013850 +n04113641 +n01612476 +n03127408 +n02038466 +n03799876 +n04257684 +n03382292 +n10449664 +n04394630 +n10275395 +n07698250 +n12329473 +n07694659 +n07642742 +n02563648 +n08583455 +n02557182 +n02775178 +n09274152 +n03189083 +n12570703 +n04211219 +n12486574 +n03073694 +n11969166 +n02475078 +n02976350 +n08584914 +n07899660 +n10116702 +n01613807 +n12461109 +n04025508 +n12451240 +n12596849 +n12079963 +n03541269 +n04561422 +n11699442 +n07725255 +n03460297 +n07616748 +n12757458 +n03103563 +n02813752 +n07698782 +n12840362 +n01543632 +n01602832 +n01875313 +n12472024 +n02926591 +n02872333 +n10728624 +n12532564 +n03882960 +n12333053 +n03684224 +n13146583 +n03436075 +n04154340 +n03868643 +n02598878 +n04139140 +n03266371 +n04083309 +n12506341 +n12200143 +n03503477 +n12807773 +n03123917 +n13029760 +n10173771 +n03659809 +n12047884 +n12759273 +n04193377 +n04258438 +n04597400 +n04579986 +n03719743 +n04299963 +n02864504 +n10510245 +n03417970 +n09719794 +n03138344 +n02085272 +n07694516 +n12665857 +n01642257 +n03229244 +n10581890 +n10318293 +n03635108 +n10652605 +n12189429 +n09934774 +n11709205 +n04207903 +n10296176 +n10603851 +n03450734 +n13223588 +n12754648 +n09886403 +n07751280 +n11950686 +n07814390 +n12799776 +n01646902 +n09796809 +n12819728 +n01938454 +n02410011 +n07607138 +n02119634 +n10332861 +n09230202 +n02757061 +n02849885 +n15092227 +n12151615 +n03111041 +n02413050 +n03506560 +n07744057 +n04030518 +n12544539 +n04089836 +n02038993 +n13882201 +n12099342 +n01946630 +n10095769 +n02982416 +n12957924 +n13215586 +n07726525 +n12452836 +n03801671 +n04598318 +n01449712 +n12428747 +n04119751 +n10509063 +n07694839 +n02782602 +n11626409 +n02573704 +n12399384 +n12388989 +n01601068 +n11971406 +n04367011 +n07930315 +n12925179 +n04967674 +n03497352 +n03653833 +n01819465 +n03688192 +n02802990 +n03393761 +n04430475 +n13107694 +n10384496 +n07867164 +n12449526 +n01515303 +n12574320 +n01444339 +n07919310 +n03453443 +n04173907 +n02887489 +n07772788 +n03629520 +n02580830 +n11705387 +n12069679 +n01956344 +n02406533 +n03973402 +n03938037 +n04969952 +n04103094 +n04393808 +n07715407 +n04172107 +n01917882 +n12085664 +n07608429 +n09835230 +n04135024 +n07842605 +n12568186 +n04339879 +n07691091 +n01801876 +n00474568 +n01807105 +n12128071 +n01673282 +n11948864 +n03991837 +n09659188 +n02070174 +n02670683 +n12454949 +n10385566 +n11631854 +n12305293 +n12002428 +n12948495 +n12757816 +n11852028 +n10690648 +n09283866 +n03214582 +n03423877 +n04127521 +n03006626 +n09283193 +n07712559 +n01447331 +n02981321 +n02658531 +n11947629 +n02419634 +n02420828 +n11923637 +n12570394 +n11968931 +n12731029 +n09749386 +n07736813 +n03967396 +n11908846 +n03029445 +n02426481 +n01964271 +n13198914 +n04484432 +n12656685 +n10806113 +n11849983 +n03236423 +n10649197 +n07688624 +n03057541 +n12015221 +n02094931 +n02014237 +n07560331 +n02801450 +n04206570 +n07556406 +n11627908 +n11889619 +n07852229 +n04063154 +n02713364 +n02783459 +n12877838 +n02930214 +n02125010 +n02407276 +n07815424 +n12855494 +n12530818 +n07750449 +n01963317 +n10082997 +n03245724 +n03012013 +n03555006 +n02421136 +n03332989 +n04375405 +n03746486 +n12636224 +n03278914 +n07917133 +n12504783 +n09416890 +n03896526 +n02258198 +n12983048 +n03837698 +n12869061 +n04541987 +n01637615 +n04401949 +n02241426 +n13220122 +n07876651 +n03729308 +n02364840 +n01339801 +n03418915 +n09257843 +n11614039 +n09731343 +n03809603 +n05399243 +n01569262 +n11901597 +n03124474 +n01566207 +n03796522 +n12595699 +n04573281 +n09689435 +n11859737 +n03201529 +n12902662 +n03374372 +n03760944 +n09189157 +n01517966 +n10431625 +n02898269 +n03693707 +n04369025 +n07834618 +n04095342 +n02786331 +n03822504 +n02284611 +n09862621 +n03436891 +n07688898 +n12435777 +n03949317 +n12443323 +n12273114 +n12623077 +n04333869 +n07907831 +n07774596 +n05450617 +n03320262 +n04190376 +n12671651 +n11819509 +n07588111 +n09756049 +n07611046 +n04973291 +n11602873 +n00120010 +n03500699 +n03844815 +n03708843 +n04452528 +n04387261 +n09889065 +n10147121 +n03318294 +n12599435 +n04164406 +n01965529 +n11636204 +n11791569 +n12275131 +n02977330 +n07851443 +n04132603 +n07824191 +n09760609 +n12190410 +n07915491 +n12665271 +n10120671 +n02570164 +n10208950 +n02163297 +n02244797 +n09842528 +n08645104 +n01841679 +n11603835 +n04488857 +n07814487 +n01953762 +n04612373 +n11877193 +n03198500 +n03981924 +n01943087 +n11552806 +n04414909 +n03005033 +n02457945 +n10500217 +n10375314 +n04607242 +n07914777 +n09832456 +n12915568 +n12813189 +n10578021 +n03519081 +n07801779 +n12026476 +n03296081 +n03850492 +n07902121 +n09881265 +n12562785 +n03290195 +n10131151 +n10078719 +n01558765 +n03917814 +n02045596 +n07734183 +n03414676 +n07933154 +n02126787 +n12148757 +n12263987 +n07684164 +n03406966 +n01492569 +n02988963 +n12963628 +n09964202 +n03417749 +n01854838 +n02921029 +n02183096 +n11762433 +n11722466 +n02387093 +n02768655 +n12519089 +n09871229 +n07938313 +n10502329 +n11989393 +n03768916 +n13145040 +n11813077 +n04457910 +n03655720 +n03703945 +n11876803 +n01438581 +n07910379 +n07847827 +n02300797 +n09245515 +n10754189 +n04581102 +n12513172 +n02458135 +n03762332 +n11789589 +n09695620 +n03850053 +n07911249 +n12342852 +n12753007 +n07748574 +n07727458 +n03696568 +n04304680 +n07723039 +n07775197 +n07577144 +n03043693 +n04374735 +n01858281 +n09228055 +n09466678 +n01949085 +n02024479 +n11623815 +n02704645 +n07894451 +n01751472 +n01646388 +n01317916 +n13880994 +n10300500 +n11794024 +n03735963 +n04610274 +n11854479 +n07754894 +n02639087 +n02122510 +n02262803 +n12732966 +n04529108 +n13194036 +n09990777 +n10009276 +n12088223 +n12155009 +n07886176 +n04278247 +n04222723 +n11707229 +n01999186 +n07851641 +n12741792 +n01315213 +n10033412 +n04249582 +n03586631 +n03237839 +n12037499 +n12014085 +n07756325 +n01636352 +n03905947 +n08611339 +n07693590 +n03724538 +n09791816 +n01666585 +n10588965 +n11613219 +n10542608 +n12913791 +n10528023 +n03171635 +n11923397 +n12854600 +n10410246 +n12698598 +n04135118 +n09844457 +n04441790 +n03882611 +n02337001 +n07907342 +n12561169 +n12027658 +n10719132 +n09851165 +n02801823 +n12330587 +n01683558 +n12162181 +n04387932 +n11704620 +n09679170 +n07601290 +n04028221 +n10277027 +n09877750 +n11758483 +n10027246 +n03819336 +n10205231 +n12478768 +n03451711 +n12973443 +n01923025 +n03262717 +n07807594 +n00475535 +n07744430 +n02341475 +n04614655 +n07924747 +n03388323 +n12680402 +n03202940 +n04534520 +n09661873 +n15092059 +n11832480 +n04198355 +n12529220 +n12389130 +n12304115 +n03234952 +n07610620 +n02868975 +n04442741 +n05282652 +n02820675 +n12795352 +n12675299 +n08547468 +n04189651 +n04141198 +n04513998 +n12273939 +n12482668 +n12858618 +n01958346 +n03172038 +n10280674 +n04301760 +n02631330 +n12433178 +n07763107 +n03068181 +n07565259 +n03605598 +n13177884 +n04005197 +n09751496 +n12737383 +n07648997 +n09839702 +n09442595 +n07925229 +n12150722 +n11898775 +n09904208 +n02207345 +n07642361 +n07685918 +n03205458 +n10574538 +n09742315 +n02599557 +n03585682 +n04273659 +n02200850 +n03410740 +n03391301 +n07726672 +n09782167 +n13155305 +n02067240 +n07561848 +n07728708 +n12463134 +n12228229 +n09743487 +n12225563 +n03421669 +n03226375 +n03973945 +n12498055 +n04483925 +n04564278 +n11890150 +n12519563 +n12754468 +n04353573 +n11615607 +n04430896 +n04585128 +n10395828 +n10773665 +n02772435 +n09881895 +n12663023 +n01615303 +n12803754 +n09445008 +n03955296 +n05245192 +n05486510 +n07899769 +n07575510 +n02307681 +n03814817 +n02670186 +n03598515 +n12797860 +n03518135 +n07587962 +n12630763 +n06273743 +n09843824 +n03226254 +n12407222 +n02961544 +n12951835 +n06417096 +n02016659 +n01441117 +n07735404 +n09411189 +n13896217 +n03262248 +n03451120 +n02525382 +n03375329 +n04155068 +n12916179 +n10297234 +n11907100 +n03423568 +n04360914 +n12027222 +n12199790 +n01744270 +n09896401 +n07925116 +n03693860 +n04414319 +n07767549 +n03555564 +n04043411 +n07872593 +n03774461 +n03129471 +n04497801 +n11756870 +n09776346 +n04530283 +n01520576 +n12828220 +n01583828 +n04120842 +n09676021 +n04344734 +n01916388 +n12513613 +n09861863 +n02310334 +n03318983 +n04533499 +n02427576 +n12727518 +n04502059 +n11725480 +n11987126 +n11876204 +n03504205 +n09720595 +n12315999 +n12935609 +n04452757 +n12201331 +n01603152 +n10772092 +n03156279 +n12723610 +n02003037 +n03244775 +n07802963 +n11954642 +n07770034 +n09931165 +n10559508 +n01745902 +n07654148 +n10070108 +n01585287 +n13196003 +n04389718 +n10253122 +n03730893 +n02983357 +n02783900 +n01680813 +n03072440 +n03109253 +n03274435 +n11655974 +n10048612 +n07849733 +n07896994 +n03792334 +n03035832 +n03819448 +n03105088 +n11943992 +n01485479 +n01699675 +n11795049 +n12086778 +n01840120 +n07753980 +n10685398 +n04346428 +n04532398 +n07709172 +n02146700 +n09461069 +n03853924 +n01321456 +n12068432 +n09757449 +n03206282 +n03751757 +n13053608 +n11695974 +n12123741 +n03500209 +n04367371 +n02890940 +n01917611 +n07835331 +n02907656 +n04136045 +n12059625 +n03862862 +n12864160 +n00440039 +n03448590 +n12628986 +n04115802 +n03949145 +n12916511 +n12647893 +n09706255 +n13181811 +n07752109 +n04375615 +n01648620 +n04403524 +n09967967 +n12911079 +n03857687 +n02803539 +n01551080 +n10734891 +n13235159 +n04127633 +n07935878 +n12853482 +n10191001 +n03126385 +n10076224 +n01812866 +n12919403 +n03769610 +n09283767 +n03462110 +n11770256 +n12038898 +n09889170 +n11894558 +n10298647 +n02592055 +n02795670 +n11701066 +n12762049 +n02890662 +n07918193 +n02976455 +n03100897 +n13127843 +n12184912 +n00468299 +n12407079 +n12496949 +n03541537 +n05260127 +n01535140 +n01541760 +n11945783 +n07687053 +n07745046 +n12083847 +n02382132 +n12270027 +n10140597 +n03788914 +n01790711 +n02197689 +n03173270 +n10368624 +n04449290 +n01579729 +n07834872 +n07734417 +n02379630 +n01636829 +n12549192 +n12951146 +n13579829 +n03268142 +n11761202 +n02769669 +n09452760 +n04095577 +n12031139 +n02003577 +n12891469 +n03931885 +n01577941 +n04176295 +n12046428 +n03418402 +n13145250 +n11865874 +n12473608 +n11797321 +n01798168 +n09923186 +n02786736 +n01698782 +n09976283 +n03975788 +n14685296 +n01682172 +n07838441 +n02771286 +n03429137 +n03948950 +n02512830 +n02298218 +n10141364 +n02823848 +n02077384 +n12584715 +n11748811 +n02214773 +n03667552 +n04121426 +n04135710 +n07579917 +n12275888 +n07826453 +n12167436 +n04586072 +n09877288 +n04248396 +n02761696 +n03038870 +n01490360 +n12353203 +n09785891 +n12057660 +n04146343 +n12557556 +n02081798 +n02917964 +n07898617 +n12597798 +n07574176 +n07764630 +n03008177 +n04255899 +n04434207 +n07897600 +n09929577 +n11811921 +n12415595 +n02893941 +n12276110 +n02821202 +n09690621 +n02508742 +n02077787 +n02390640 +n03764822 +n02257985 +n13033134 +n04559166 +n07865196 +n10506915 +n12051103 +n10473917 +n12775919 +n02971579 +n12880462 +n11837970 +n02063662 +n09840520 +n12019827 +n09208496 +n12836508 +n02982232 +n04219185 +n03332005 +n07914128 +n07862461 +n04250692 +n09267854 +n04561734 +n02076402 +n12344837 +n02919148 +n06592281 +n03668803 +n03062985 +n04246731 +n12112609 +n04012482 +n03558633 +n03982642 +n01998741 +n07665438 +n04209509 +n07913882 +n01749244 +n07801342 +n02611561 +n04488742 +n01897536 +n10624437 +n13128976 +n07931612 +n04300643 +n03727067 +n03360431 +n07593471 +n10253296 +n03297226 +n03854506 +n07879450 +n10562283 +n12557438 +n13154388 +n12862512 +n02126028 +n07752514 +n02387887 +n12066261 +n07666176 +n02806530 +n09988703 +n03721252 +n03221540 +n12195533 +n02682569 +n03622058 +n03943266 +n04207596 +n11721337 +n02427032 +n07910152 +n01551300 +n12861345 +n11660300 +n03786313 +n12966945 +n02046171 +n02797535 +n03546112 +n07711232 +n02044908 +n02998563 +n02652132 +n12634986 +n12187247 +n11645590 +n07582892 +n03065243 +n09911226 +n04396902 +n10763075 +n02359047 +n10400108 +n04294614 +n03991646 +n11728945 +n07766891 +n12277150 +n13141564 +n10563314 +n12426749 +n07827750 +n12403994 +n12627119 +n03420801 +n10203949 +n12830568 +n12280060 +n13180875 +n12659064 +n04239436 +n03823312 +n04367746 +n12448700 +n01896844 +n07581931 +n09384106 +n11625223 +n04198722 +n01477875 +n09932336 +n03477512 +n12281974 +n10117739 +n07759194 +n12281788 +n01405007 +n03077616 +n02304036 +n12947544 +n03140126 +n12356960 +n07807002 +n07877849 +n02956795 +n04373795 +n07925500 +n10359546 +n09730077 +n01694955 +n10611267 +n04316498 +n07849912 +n12841354 +n07903543 +n10026976 +n04050313 +n03939844 +n03260849 +n07917507 +n12228387 +n03199775 +n01569566 +n02403920 +n04261638 +n02986160 +n03724623 +n01960177 +n03783430 +n07877675 +n10401639 +n04215153 +n03077741 +n02589623 +n12934985 +n03233624 +n04506688 +n12194147 +n09975425 +n07818825 +n12641007 +n10036692 +n02771750 +n12285900 +n01472303 +n10033663 +n10707134 +n03219966 +n11772879 +n10146416 +n10435169 +n10304086 +n12385566 +n03126580 +n12904314 +n03619196 +n02299846 +n03574243 +n12368257 +n03690473 +n01748686 +n09834378 +n07750736 +n02930645 +n01679307 +n03721047 +n02710044 +n07563207 +n02930080 +n09309168 +n03127203 +n02863536 +n02536165 +n01559639 +n09654518 +n02961035 +n12007406 +n12773651 +n04351699 +n03114504 +n06273414 +n02017475 +n01733466 +n02175014 +n07920663 +n03953901 +n09670521 +n09400987 +n11791341 +n02284884 +n12919646 +n07880325 +n03801353 +n01982347 +n07828642 +n01570421 +n03998333 +n03449309 +n10482220 +n12850906 +n12805561 +n12926689 +n03232543 +n04248851 +n03195959 +n04082562 +n03846100 +n07682952 +n07695652 +n11809271 +n09895561 +n04287898 +n09740724 +n02859955 +n09830400 +n03674731 +n02825153 +n04571686 +n13107891 +n10318607 +n07848093 +n13226871 +n08555710 +n03137473 +n02776978 +n03141455 +n12514138 +n01809371 +n09405078 +n01753180 +n02184473 +n11610215 +n03539546 +n12731835 +n04485884 +n03590588 +n10221312 +n04049753 +n03441345 +n02302244 +n12262185 +n15092650 +n11877646 +n10377185 +n01684578 +n03796605 +n07897116 +n03164344 +n12135049 +n10757050 +n01692523 +n04566756 +n07697699 +n07575392 +n10262655 +n04064747 +n07914006 +n12433769 +n07873348 +n04457767 +n10019072 +n02921195 +n03856465 +n04041408 +n12639584 +n12920955 +n11781176 +n07864756 +n03941013 +n03646148 +n04401578 +n11692792 +n02757714 +n02286089 +n04253168 +n03890514 +n07855510 +n03507458 +n04123026 +n11661909 +n12435152 +n04330746 +n09481120 +n03731019 +n03717285 +n03271030 +n02772101 +n07740597 +n02847852 +n12825497 +n12263738 +n03342262 +n03603594 +n07804543 +n12932365 +n12695975 +n10297531 +n04054670 +n03175081 +n12703856 +n03832144 +n03966206 +n02414290 +n03619275 +n09738121 +n03290096 +n10585077 +n07731767 +n12409840 +n12026981 +n02278980 +n02752810 +n01654637 +n02654112 +n10314836 +n13023134 +n01823414 +n07461050 +n11902982 +n04543636 +n02204907 +n04049585 +n12304899 +n03073545 +n04272928 +n10315456 +n03975657 +n09899782 +n12288005 +n07005523 +n03795269 +n09823832 +n02242137 +n02907391 +n03643491 +n03245889 +n12285369 +n03061345 +n03797264 +n07838073 +n09219233 +n02859343 +n07608098 +n03920641 +n12578626 +n10688356 +n04542858 +n07834065 +n00443803 +n04181561 +n04570214 +n02047517 +n03295012 +n01633781 +n10610850 +n04035836 +n03001115 +n04593376 +n02393807 +n13061348 +n10123122 +n11800236 +n13207094 +n10140929 +n12167602 +n01809752 +n10421956 +n02764935 +n03424489 +n12889219 +n04046091 +n07714287 +n07708685 +n07736087 +n04142434 +n11961446 +n04521863 +n02414763 +n02901377 +n00467536 +n13085747 +n03855756 +n11846765 +n02530999 +n03063199 +n04258618 +n12204032 +n04424692 +n11758276 +n02653497 +n03766508 +n02026629 +n02572484 +n12339831 +n01635027 +n01668436 +n07821919 +n01543175 +n02689748 +n12528974 +n04024862 +n04184880 +n11720891 +n13869896 +n01678043 +n01647303 +n11532682 +n03236217 +n04963307 +n03012897 +n11682659 +n03191286 +n07643891 +n12737898 +n10680609 +n07924955 +n03879705 +n10461060 +n02523427 +n02013567 +n09893344 +n04124488 +n09863031 +n12454436 +n12305089 +n07709046 +n03805180 +n11940599 +n01691217 +n04198562 +n03978421 +n02357585 +n07818572 +n12870682 +n03798442 +n04154938 +n10550369 +n11957678 +n01958531 +n09936825 +n02334201 +n07910538 +n11978551 +n10562135 +n12700088 +n12784889 +n04480853 +n03281673 +n07588419 +n02968333 +n11935469 +n13046669 +n11730602 +n09643799 +n11849467 +n01758757 +n09638454 +n03267468 +n07914586 +n12104734 +n02961225 +n09827246 +n09917214 +n13079073 +n12634734 +n04089376 +n13034062 +n11714382 +n12753762 +n07683039 +n11840067 +n07689842 +n12173069 +n12172481 +n04182152 +n07869522 +n10356877 +n02771166 +n03154895 +n07615289 +n12986227 +n12361135 +n03456447 +n12706410 +n12895811 +n02988156 +n03130761 +n10639359 +n03628215 +n02738741 +n01643507 +n07730708 +n03232309 +n02846733 +n04969540 +n03051041 +n12890928 +n03235327 +n04289576 +n07588817 +n10325774 +n03973285 +n09703485 +n02358584 +n03061674 +n03195332 +n02901259 +n07849619 +n04486934 +n07908812 +n01588725 +n03682877 +n11949015 +n04146504 +n04146862 +n07898247 +n03318865 +n04367950 +n07880213 +n04247011 +n01447658 +n12711817 +n03146687 +n02926426 +n12856091 +n11966896 +n02413593 +n09764900 +n03009794 +n03314227 +n10499232 +n10075693 +n04451318 +n12320806 +n11933728 +n07764315 +n12133682 +n09904837 +n12832538 +n03816530 +n07802863 +n04391445 +n09728137 +n03887330 +n04436012 +n03957991 +n07771731 +n06266973 +n10407310 +n10290919 +n07862244 +n01842504 +n10262561 +n12726159 +n07691954 +n07618119 +n03437829 +n11966617 +n03629100 +n04231905 +n04208760 +n03344305 +n03684143 +n12934174 +n08645212 +n03556679 +n12109365 +n03751458 +n02380875 +n02025389 +n02770721 +n09830629 +n02800675 +n04951186 +n04483073 +n12710577 +n12789054 +n12058192 +n11777080 +n07716203 +n09618880 +n04525821 +n04016846 +n02918330 +n10375052 +n13158512 +n13090871 +n02929582 +n02308735 +n10487182 +n02213663 +n07608339 +n04384593 +n12890490 +n03992436 +n02994573 +n13231078 +n12880244 +n01651059 +n02925009 +n09686401 +n13219976 +n09981540 +n04582771 +n06267758 +n09893502 +n13214340 +n03272940 +n12554911 +n02214341 +n04137089 +n03874487 +n04573513 +n12003167 +n12004547 +n13065089 +n01903346 +n04373428 +n02216365 +n02024185 +n12577895 +n11698042 +n07586318 +n11705776 +n03030353 +n04486213 +n07885705 +n07928163 +n02356612 +n02767038 +n02897097 +n11662371 +n04128710 +n09842395 +n07683360 +n11533212 +n08495908 +n12841193 +n03669886 +n07768068 +n02381831 +n12081215 +n02757337 +n02811618 +n10144338 +n01379389 +n09698644 +n12779851 +n10400618 +n11801891 +n12322099 +n12408077 +n02767956 +n08640962 +n07816839 +n03021228 +n10346015 +n07868830 +n07917272 +n10076957 +n12865708 +n04290259 +n03595264 +n03986224 +n07825194 +n01610100 +n04417086 +n12995601 +n12734070 +n15091129 +n12428412 +n07587331 +n02405101 +n03108455 +n03594523 +n04489695 +n03892425 +n13032618 +n04409011 +n07590752 +n15092942 +n03914583 +n13066448 +n03532919 +n10639637 +n04566561 +n13223843 +n07904637 +n12347158 +n02720048 +n03901229 +n03936466 +n10574840 +n03782794 +n12397431 +n07908567 +n12580896 +n02697221 +n09791419 +n02577403 +n07870069 +n02136103 +n04318892 +n01462544 +n09747191 +n12287836 +n03067093 +n03934565 +n03543945 +n13126856 +n02240068 +n01585422 +n12413301 +n03246454 +n01876034 +n03635330 +n11680596 +n03228365 +n03082656 +n11609862 +n12859986 +n03934229 +n10233248 +n03166514 +n12166793 +n10115430 +n03327553 +n03373611 +n02967782 +n12338258 +n01604968 +n01323155 +n02590094 +n03044934 +n07866409 +n12291143 +n14900342 +n12094612 +n07845702 +n07926250 +n10750640 +n04359500 +n09797873 +n09953350 +n03561047 +n12122725 +n12725738 +n01453087 +n04977412 +n04575723 +n13219833 +n12161056 +n04273285 +n12482437 +n12863624 +n04953296 +n03390075 +n10188957 +n02874442 +n04236935 +n09990690 +n12866459 +n04075715 +n09725000 +n12794367 +n12461673 +n03050453 +n03677115 +n12427391 +n07736371 +n02973236 +n02406749 +n12322699 +n12815198 +n10680796 +n03268311 +n02405799 +n12302248 +n09791014 +n01545574 +n07740033 +n07862095 +n09901337 +n04390577 +n03597916 +n12110085 +n11802586 +n04205505 +n07696527 +n12076852 +n04344003 +n03326660 +n02823586 +n03042139 +n01565345 +n07905296 +n01454545 +n07650903 +n07905386 +n12530629 +n02841187 +n02943964 +n03329536 +n09681234 +n03479121 +n03770085 +n04147793 +n11552133 +n03774327 +n13197507 +n07901355 +n10400437 +n07837912 +n02310941 +n07845087 +n02239774 +n04976319 +n03960490 +n05239437 +n06275471 +n01633406 +n04257223 +n12009420 +n10483138 +n02775897 +n07866151 +n07922512 +n02666624 +n03944024 +n03842377 +n01832493 +n07855907 +n03968728 +n04492060 +n07879072 +n11635830 +n11802800 +n02357911 +n02431628 +n03730494 +n13099999 +n07768230 +n13147270 +n12331655 +n10237676 +n11855553 +n09759501 +n10620586 +n13181055 +n12309277 +n13183489 +n04382695 +n07679034 +n10495756 +n02173113 +n12764202 +n03683457 +n10298912 +n07680313 +n10160280 +n02205673 +n12053690 +n11653904 +n02931294 +n04093775 +n12856479 +n02427470 +n07608866 +n09954639 +n11639445 +n03364599 +n09924106 +n09683924 +n10419472 +n03089753 +n12620969 +n07604956 +n12940609 +n12564083 +n03514894 +n10343355 +n13068255 +n03805280 +n12793284 +n03140652 +n02666501 +n11717577 +n04267435 +n04593185 +n12820853 +n03934311 +n02630615 +n07767002 +n07723968 +n01631354 +n07931452 +n12414818 +n03097673 +n09944430 +n04457474 +n11850521 +n12227658 +n10131815 +n12408717 +n03566730 +n12777680 +n06273555 +n04357531 +n03759243 +n09861599 +n03015851 +n04175039 +n03392741 +n07859796 +n07741138 +n04474187 +n02266864 +n04553561 +n02667244 +n12720200 +n12432356 +n07806120 +n10362557 +n11929743 +n07765862 +n02963987 +n02762371 +n02747672 +n04289195 +n04056413 +n03039493 +n03894677 +n12338655 +n04422409 +n12079120 +n10252222 +n10168837 +n12919847 +n10297841 +n01340014 +n11710827 +n10167838 +n12278107 +n01384164 +n10498986 +n02742468 +n02899439 +n11752937 +n12107710 +n12315598 +n03985441 +n07605804 +n07686202 +n12884100 +n13121349 +n11725311 +n10420507 +n11706761 +n01381044 +n03331599 +n12336333 +n10185483 +n07880880 +n01782516 +n12615232 +n03175457 +n12657082 +n01750437 +n07918879 +n13213066 +n12927494 +n02910542 +n06273986 +n02161338 +n10235024 +n12180168 +n03659950 +n02160947 +n11861853 +n09866817 +n09279986 +n12393269 +n01552034 +n05526957 +n02956883 +n12818966 +n09753792 +n03114236 +n12273344 +n12546617 +n13177048 +n02129991 +n01731941 +n01628770 +n12774641 +n07685546 +n03253279 +n10678937 +n12579038 +n08673039 +n01392275 +n02379081 +n10530150 +n12851469 +n12414449 +n11694664 +n11877283 +n09708889 +n03585438 +n00483605 +n12332555 +n03323096 +n07851767 +n02417663 +n10667863 +n02856237 +n09269341 +n01596608 +n09720033 +n13160604 +n04443164 +n02814428 +n11622771 +n10328123 +n04338963 +n01794651 +n12069217 +n07762740 +n02935387 +n11897116 +n10569179 +n12749852 +n10745006 +n07823280 +n12162425 +n09801533 +n03772269 +n04518643 +n07916319 +n12771597 +n02147173 +n10342992 +n03795123 +n11646344 +n12847927 +n07686021 +n12383894 +n04465050 +n14564779 +n04212467 +n12274863 +n02380052 +n04329958 +n12034384 +n04213353 +n04366033 +n04955160 +n02778294 +n12890685 +n03028785 +n03097535 +n04533594 +n01750167 +n01415626 +n12276477 +n07729926 +n07711371 +n12843970 +n10500419 +n12891093 +n03840823 +n12509665 +n11878101 +n04315342 +n07685031 +n12305819 +n10039271 +n12264512 +n03911866 +n13919547 +n12413419 +n03785721 +n02599347 +n03786194 +n04018155 +n12856287 +n09607903 +n02396088 +n10212501 +n10313000 +n07683617 +n03586219 +n03890233 +n03156767 +n12033709 +n01648139 +n04399846 +n10671736 +n07698672 +n10791115 +n07708124 +n02709908 +n04266968 +n01758141 +n10058962 +n09444783 +n03668067 +n02838345 +n02388143 +n12893993 +n12590499 +n01462042 +n02689434 +n13209808 +n04075291 +n02412629 +n01953594 +n03906463 +n03043423 +n02200509 +n10152763 +n12504570 +n04396808 +n03382413 +n03618101 +n02767147 +n02390101 +n03450974 +n12778398 +n03625539 +n02574271 +n04113316 +n07572616 +n11809437 +n04119230 +n03829954 +n10500603 +n04258732 +n02731900 +n10174330 +n01574801 +n08663703 +n12558230 +n03981760 +n07732904 +n11875523 +n11823436 +n03238286 +n03079494 +n04281260 +n07873057 +n11686912 +n10568608 +n07593004 +n04271531 +n10037922 +n07838551 +n03615300 +n12624568 +n12940226 +n05242928 +n03680734 +n01589893 +n11652376 +n11893640 +n04119091 +n09696763 +n07851554 +n02660640 +n12124818 +n10370955 +n02663211 +n02414209 +n13187367 +n03258577 +n04375241 +n07617932 +n12240477 +n03417202 +n07595649 +n03839424 +n03087245 +n02431441 +n04396335 +n03484809 +n03426285 +n03592931 +n02912319 +n03488887 +n12187891 +n07592400 +n12918609 +n07858114 +n07567980 +n01548694 +n02726210 +n02406859 +n10147262 +n05458576 +n02848921 +n03503233 +n02587618 +n03465151 +n03582508 +n11654293 +n03695452 +n02197185 +n04223170 +n10243273 +n03149135 +n02842809 +n03669534 +n03857291 +n02147328 +n12278865 +n12733428 +n03264906 +n09924195 +n10432189 +n12203896 +n03892728 +n12360958 +n10418735 +n01650901 +n12420722 +n03341606 +n02557909 +n07751858 +n03483971 +n12019035 +n03991202 +n02072040 +n03129848 +n04505345 +n02405440 +n03901974 +n11656123 +n11552976 +n10291822 +n10108018 +n09902731 +n03325691 +n12646072 +n04134170 +n12097396 +n07564008 +n01624305 +n03421117 +n02776007 +n10792856 +n07818133 +n03227184 +n10198437 +n04157099 +n12743009 +n07820960 +n12749456 +n13035925 +n05262698 +n03422771 +n02878628 +n12140903 +n07820297 +n03524745 +n09901921 +n03170872 +n10039946 +n12638964 +n11989087 +n03461988 +n04287451 +n04298053 +n07882420 +n04002262 +n02734835 +n11707827 +n07756641 +n12808007 +n10069981 +n12637123 +n12947895 +n04363082 +n04292080 +n11858077 +n04535252 +n12646397 +n12283147 +n12321077 +n02746595 +n02895328 +n07624924 +n12537253 +n11952541 +n02181477 +n01440160 +n03878828 +n12861541 +n02869563 +n04242084 +n03197201 +n09396608 +n04291992 +n07845863 +n04314522 +n12843557 +n04029647 +n12146654 +n13147386 +n12954799 +n11920133 +n03038480 +n03213715 +n02971473 +n04149374 +n04230387 +n00444340 +n11859275 +n07564796 +n02948403 +n10186068 +n04315713 +n02366002 +n02670935 +n13208302 +n10225931 +n07826340 +n04102872 +n02259708 +n11855842 +n09941089 +n08896327 +n10237464 +n12084158 +n03764995 +n03627954 +n12384375 +n10341343 +n07876189 +n04573379 +n07904293 +n07840520 +n12038038 +n03005147 +n10483799 +n02978367 +n01484285 +n13094273 +n04539053 +n01748389 +n10146816 +n07815839 +n12991837 +n03294604 +n03588841 +n04055180 +n03209477 +n09917345 +n04393913 +n12337391 +n12126084 +n01882125 +n07688130 +n02814116 +n09640715 +n12679593 +n12596345 +n03029925 +n11761650 +n04457157 +n12683096 +n07709881 +n03841290 +n13157684 +n07927836 +n03523134 +n03690279 +n10187491 +n12451070 +n02682311 +n03978815 +n11806679 +n07808022 +n01386354 +n03622526 +n02369293 +n11885856 +n02289610 +n12663359 +n02624987 +n13173488 +n03027001 +n07896765 +n11935330 +n07814790 +n04242704 +n09959142 +n07589543 +n03551582 +n07843117 +n03556992 +n02060569 +n04000998 +n03825271 +n11946918 +n02874750 +n03479502 +n09919451 +n02176747 +n02080713 +n03400972 +n10222170 +n07926785 +n07852302 +n03012373 +n10438842 +n12868019 +n03634034 +n04210591 +n07853560 +n12374862 +n09248399 +n04355115 +n12908093 +n12906498 +n12875269 +n02791665 +n03146777 +n02854378 +n12414159 +n07821610 +n07595180 +n12238913 +n12141385 +n10761190 +n12165758 +n01653223 +n12956367 +n03695753 +n12416703 +n12346813 +n03405111 +n04304215 +n01624212 +n12674895 +n09850760 +n12407715 +n04156040 +n11610437 +n03395256 +n09970822 +n04229959 +n02530831 +n07870894 +n12098524 +n12828379 +n04057215 +n10751152 +n10053439 +n03674270 +n07869291 +n12256920 +n02535163 +n04282231 +n02136452 +n02365108 +n10328328 +n02315487 +n03325403 +n09231117 +n03342657 +n09980985 +n10702167 +n11961871 +n02065263 +n12857779 +n03219612 +n07805966 +n10699981 +n07691863 +n12831932 +n04179126 +n10208189 +n09765118 +n07922147 +n01631512 +n01947997 +n01405616 +n01892030 +n07827896 +n12964920 +n07749870 +n03276696 +n10020670 +n11828577 +n07624666 +n10590146 +n02407521 +n10253703 +n03270854 +n11610047 +n12981443 +n12413642 +n12302565 +n03177059 +n04594114 +n10227985 +n07728391 +n10395073 +n02810270 +n03569293 +n07812046 +n03843316 +n12477401 +n03802643 +n07618029 +n10755648 +n12837803 +n12454556 +n01636127 +n02809241 +n03270165 +n12035631 +n02962414 +n09750641 +n01793085 +n04346003 +n07922041 +n04164002 +n12499979 +n03301291 +n07921834 +n09656077 +n07599161 +n13155611 +n10194231 +n10063635 +n03601442 +n10366276 +n00475661 +n03943714 +n10377291 +n02624551 +n02568447 +n07589458 +n09691858 +n02685995 +n11919975 +n01690466 +n13211020 +n04114069 +n10530383 +n04200908 +n12631932 +n07916437 +n03219859 +n07918309 +n10368291 +n10253479 +n03317889 +n13206178 +n02821415 +n10592811 +n12557064 +n12872458 +n10212231 +n07926346 +n09695514 +n09741816 +n03964611 +n07812913 +n09703708 +n02587479 +n10593521 +n03485309 +n03776877 +n12289433 +n07716504 +n10580030 +n03061893 +n03206158 +n09710041 +n04266849 +n07864065 +n12767648 +n02333190 +n12295429 +n02406432 +n01799679 +n07861983 +n02201626 +n03441582 +n03653975 +n02834506 +n12263204 +n10672662 +n03072682 +n03410423 +n11620389 +n04542095 +n07910970 +n03697913 +n02706806 +n09736798 +n12318965 +n07938594 +n12032429 +n03191776 +n04210288 +n01422335 +n03236093 +n11881189 +n02247216 +n12338146 +n03104512 +n00474881 +n04172230 +n01461315 +n04400109 +n10646140 +n02215621 +n10096126 +n03019806 +n11809754 +n02492948 +n10741367 +n10308504 +n07875560 +n02523110 +n07738224 +n02015797 +n10499631 +n03025165 +n03284308 +n03508881 +n10441037 +n10757492 +n07608721 +n09755241 +n04264361 +n04394421 +n03776997 +n03175843 +n04476526 +n02523877 +n13196369 +n10190122 +n03172738 +n02709763 +n02070624 +n04563560 +n04017807 +n03824589 +n07817758 +n03222722 +n01542433 +n13173259 +n04458201 +n12869668 +n12580786 +n02407763 +n09760913 +n10530571 +n11752798 +n09612700 +n07601175 +n11632376 +n10641223 +n03158668 +n03411208 +n01413457 +n03684740 +n10248008 +n12656528 +n11849271 +n07771891 +n12067433 +n12389727 +n11734698 +n04042204 +n07825399 +n12621945 +n07624757 +n03180732 +n09741331 +n10246317 +n04030414 +n07821107 +n04524716 +n03789603 +n12867449 +n10249869 +n02434415 +n07614103 +n03333349 +n04602840 +n09923996 +n02658811 +n13033879 +n03663433 +n02873623 +n07837545 +n12436907 +n02675077 +n01500854 +n04435552 +n01790304 +n11687789 +n03443543 +n09733459 +n01606177 +n12245885 +n11721642 +n02201497 +n12010815 +n04594742 +n02755984 +n07927716 +n04245218 +n03134118 +n13214485 +n12294542 +n12713521 +n03556173 +n12650038 +n07719058 +n04319774 +n10443830 +n10019187 +n09720702 +n07926442 +n10402709 +n03989777 +n11699751 +n09613118 +n02965122 +n04221076 +n01861330 +n12837052 +n02975589 +n09668437 +n03012499 +n01418498 +n12451566 +n03585778 +n07692517 +n09672590 +n09741999 +n09748648 +n07621264 +n03482001 +n10185148 +n01542168 +n12536291 +n07846557 +n11840476 +n03130866 +n02631775 +n11730015 +n03715275 +n07680168 +n12175370 +n05427346 +n03665232 +n08611421 +n11730458 +n02413484 +n09783884 +n07888378 +n04611351 +n02247655 +n02136794 +n11649359 +n01382033 +n07889193 +n10405540 +n03510384 +n04420720 +n03585875 +n03812789 +n01835769 +n12139921 +n09762011 +n10103228 +n03477410 +n11930788 +n10064831 +n12311045 +n07681805 +n03136504 +n12887713 +n03886940 +n03130233 +n10197392 +n12333961 +n07672914 +n12723062 +n12599661 +n04268799 +n03696909 +n12809868 +n12452256 +n10710778 +n02571652 +n12117326 +n02450677 +n03041265 +n12544240 +n01966377 +n10252354 +n02378625 +n09814488 +n10569011 +n13067330 +n07928998 +n07890970 +n02187279 +n02592371 +n07846802 +n03475961 +n05448704 +n10410996 +n02851795 +n10093167 +n12468719 +n09876701 +n03057724 +n03469031 +n02344270 +n04248209 +n02687682 +n04467899 +n12897788 +n03436656 +n12539832 +n09906704 +n03190458 +n11843441 +n12130549 +n11823756 +n03153246 +n03684489 +n04160036 +n02908951 +n12855365 +n03518230 +n12225222 +n12933274 +n10432957 +n02921406 +n10156831 +n12239647 +n02826812 +n03411927 +n11602091 +n13200986 +n04244847 +n01330126 +n14938389 +n03001540 +n04387531 +n03423099 +n07608533 +n11723986 +n07600394 +n12529500 +n02403820 +n02587300 +n10333317 +n07935288 +n12680652 +n01449980 +n12153914 +n07803310 +n11741797 +n01881857 +n13081999 +n08644045 +n02061217 +n02173784 +n02660519 +n03104019 +n13137951 +n04538403 +n02621258 +n04515729 +n04165945 +n11919761 +n13078021 +n07861247 +n11959259 +n11801665 +n04070545 +n13210597 +n10218043 +n10717337 +n01365885 +n10718952 +n11979187 +n03880032 +n03798610 +n03477303 +n01876667 +n11860208 +n03401721 +n03360133 +n13230843 +n13194758 +n13190060 +n02564935 +n13894154 +n12754311 +n07697408 +n13171210 +n02035402 +n03736147 +n10396337 +n04554998 +n02793930 +n04126852 +n03654826 +n09411295 +n06255613 +n01680983 +n10261862 +n01581874 +n10378780 +n10646641 +n03539103 +n03351151 +n04349913 +n03906106 +n02370525 +n03319576 +n04113968 +n09693244 +n02945964 +n03344509 +n04117216 +n03889626 +n03557840 +n09800469 +n04280487 +n07890890 +n12147835 +n12295237 +n03883664 +n04436992 +n02922877 +n10099002 +n01988203 +n10056719 +n11646517 +n03672521 +n04568713 +n10111358 +n03606347 +n04047733 +n12320627 +n10251612 +n10460033 +n01742447 +n11917835 +n10443032 +n13079567 +n04363671 +n10788852 +n10482587 +n03308614 +n12741586 +n12938667 +n04539407 +n01630148 +n02303777 +n13050940 +n04552551 +n02341288 +n04098169 +n04110439 +n11625391 +n12259316 +n02822762 +n10631131 +n04089152 +n03571439 +n04558199 +n12656909 +n03170292 +n02877642 +n12771890 +n03033267 +n12658603 +n13354021 +n12855886 +n11840246 +n03619050 +n07727252 +n12932706 +n13874073 +n01315805 +n02948942 +n12048928 +n03146449 +n10656969 +n09872557 +n03906590 +n04454792 +n12500309 +n04239333 +n01815036 +n09644657 +n10497645 +n02918455 +n07812662 +n04240434 +n10804636 +n11967878 +n04184095 +n11834272 +n05244755 +n02299039 +n12665659 +n12144987 +n07607492 +n11887750 +n13083461 +n04577139 +n09670909 +n07876893 +n02875948 +n04069582 +n10458111 +n10361194 +n09389867 +n01651778 +n11933387 +n13193143 +n12834190 +n03516266 +n02184589 +n10041373 +n02809605 +n04064213 +n04957589 +n12643113 +n02582721 +n07911061 +n07921360 +n10369417 +n10527147 +n04104925 +n03707372 +n01386182 +n10374849 +n09902851 +n08559155 +n02332447 +n11649150 +n11722036 +n01823740 +n04592356 +n10002257 +n10661732 +n07562379 +n07597263 +n04036776 +n13112201 +n09842288 +n07738105 +n04545984 +n09635973 +n02885233 +n02756854 +n07808479 +n03029296 +n01543383 +n02884450 +n09843716 +n04224395 +n10576676 +n10140051 +n07919894 +n07806879 +n10212780 +n09478210 +n12017127 +n03770224 +n07606191 +n03555217 +n09715165 +n12270460 +n12129738 +n11739365 +n02303585 +n07818029 +n05314075 +n03019304 +n09859975 +n09454744 +n13151082 +n12586989 +n00455076 +n07741357 +n04957356 +n08659242 +n04577293 +n04126244 +n03131193 +n12428242 +n03569494 +n03781594 +n07743384 +n02892392 +n12576695 +n12199982 +n07693439 +n07719756 +n11884384 +n03043798 +n12351091 +n03690168 +n02214499 +n01839949 +n01831360 +n12642964 +n02957862 +n03125588 +n12883628 +n04002371 +n10747965 +n09744462 +n02853745 +n13030337 +n12156679 +n02761034 +n12587487 +n03374570 +n12728322 +n01731764 +n07918706 +n03696445 +n03185868 +n02805283 +n03868763 +n02202124 +n12369665 +n12449934 +n12650229 +n02656301 +n07743723 +n11702713 +n02927053 +n03916385 +n01486010 +n03986071 +n04188064 +n13897528 +n12414329 +n07718068 +n07837755 +n11735570 +n10464542 +n04091466 +n01315581 +n10374943 +n03989898 +n13220525 +n04076052 +n04062179 +n02414442 +n04414101 +n04446162 +n00480885 +n03536568 +n03773835 +n10728998 +n12643877 +n02255391 +n03799610 +n07847585 +n00446411 +n11910666 +n03139998 +n02296276 +n02889996 +n02786611 +n10363445 +n07854348 +n08583682 +n09912681 +n07896422 +n02368821 +n11935953 +n12185254 +n11738547 +n03809211 +n02448318 +n13066979 +n01987076 +n12009047 +n12839574 +n13174823 +n07902520 +n03369866 +n13209129 +n02593191 +n03853291 +n02620578 +n10071332 +n01813658 +n09895480 +n10134760 +n01316734 +n07845166 +n03175983 +n13132156 +n12814960 +n12883265 +n03637787 +n04310507 +n04133114 +n03900194 +n04129688 +n04449550 +n01805321 +n01717467 +n01573627 +n12271451 +n11722621 +n09976917 +n12232280 +n12905135 +n03451253 +n01655344 +n12346986 +n11987511 +n10517283 +n02941845 +n12730370 +n03121190 +n07917874 +n10023656 +n10151133 +n07695187 +n03258456 +n10639238 +n10682713 +n02085019 +n12343753 +n10749928 +n04595611 +n04410565 +n08500819 +n07719980 +n04016479 +n03232417 +n03469832 +n09834885 +n07925327 +n10094782 +n03632100 +n12734215 +n09845849 +n04047139 +n10743124 +n02604954 +n12270278 +n03036244 +n11991777 +n10168012 +n02561803 +n10531109 +n10344319 +n03804211 +n10513938 +n10732967 +n09917481 +n02950482 +n03148808 +n07910245 +n07925423 +n07889990 +n04302988 +n07745357 +n04346511 +n07573563 +n02564403 +n12084400 +n10030277 +n09815455 +n04388473 +n12404729 +n10576316 +n12072210 +n11811059 +n01824344 +n03556811 +n03175301 +n07586485 +n13137010 +n11986729 +n04967561 +n03881404 +n07692114 +n07874995 +n02770585 +n07853345 +n02775689 +n04328580 +n01323781 +n07773428 +n02414043 +n02794474 +n02352932 +n07569873 +n12374705 +n03606106 +n04267246 +n04369485 +n11934239 +n12705698 +n11841247 +n07868045 +n03525693 +n12358293 +n02937010 +n09658398 +n12711182 +n03516647 +n04591631 +n10228712 +n11930353 +n03471779 +n12594324 +n02251593 +n04455579 +n02542017 +n03381450 +n03320845 +n12364940 +n09657748 +n12412987 +n01840412 +n10570704 +n10117267 +n03251280 +n10195261 +n12178129 +n12285049 +n02177775 +n10117415 +n03707766 +n04475309 +n05604434 +n03999064 +n12127575 +n01972131 +n09793946 +n01635176 +n02791532 +n07564101 +n07876460 +n02813981 +n10764719 +n03638743 +n12761702 +n02125689 +n11657585 +n09923003 +n13069773 +n02683183 +n04324515 +n11936946 +n12862828 +n02659808 +n02619861 +n13175682 +n11648039 +n07768139 +n12512674 +n12108613 +n02947977 +n12899971 +n03845107 +n07689490 +n02081927 +n07619508 +n10248377 +n10300041 +n10761326 +n09655213 +n02675522 +n04963111 +n01995686 +n03256631 +n10684630 +n04471912 +n12728864 +n03870546 +n02829246 +n09725546 +n03409920 +n13194918 +n10055297 +n02513248 +n01462803 +n11782266 +n13094145 +n07839478 +n13916363 +n07932454 +n09722817 +n07774479 +n10386874 +n12832822 +n01599388 +n02964295 +n04349189 +n07689313 +n11653126 +n02309841 +n02064000 +n04410663 +n04562122 +n02358712 +n09901786 +n10441124 +n12882158 +n12815668 +n10159289 +n01641930 +n03315990 +n12271187 +n10277638 +n07815163 +n12903014 +n07915366 +n04412300 +n01324799 +n03408264 +n09452291 +n03019198 +n11890884 +n10355806 +n03186199 +n04013600 +n12541157 +n06259898 +n06273294 +n11946051 +n01671705 +n04415257 +n01905321 +n04050600 +n12604460 +n04051439 +n02929184 +n11765568 +n10025060 +n02396796 +n04033287 +n13027557 +n03127531 +n10308066 +n09729062 +n01593553 +n02476567 +n07609728 +n12970293 +n01419888 +n03215749 +n01684741 +n13067672 +n03870290 +n07846359 +n12961536 +n03356559 +n07727140 +n09843602 +n02378755 +n12044041 +n01977485 +n07718920 +n12060546 +n04265428 +n12237855 +n04006067 +n10227266 +n04361937 +n12134486 +n10097842 +n02264591 +n03912821 +n07594155 +n03116163 +n11771924 +n04155457 +n12394118 +n10507380 +n01844746 +n11901452 +n03024233 +n03383562 +n11806814 +n10062716 +n04204755 +n08613733 +n12907671 +n03533654 +n09826605 +n03109033 +n07606419 +n03537085 +n11615812 +n07695504 +n11694300 +n04520962 +n09971839 +n02664285 +n03402511 +n02061560 +n13133140 +n03548195 +n12877493 +n02425086 +n12845187 +n12488454 +n02975994 +n02071028 +n01457407 +n03685486 +n07605282 +n07771405 +n07827554 +n10538733 +n03438780 +n04379096 +n12686496 +n10001764 +n11848867 +n12125001 +n09886540 +n03275566 +n01442710 +n12789554 +n07858197 +n12722071 +n12868880 +n10441694 +n12409651 +n07727741 +n12289585 +n04069166 +n12686877 +n03723439 +n07815956 +n12543455 +n10778044 +n02200630 +n10074841 +n12640284 +n12589841 +n07592317 +n07866571 +n12712626 +n04228422 +n11711289 +n03590475 +n13081229 +n03045800 +n03639230 +n02874214 +n07615954 +n03204134 +n12053962 +n12769219 +n15006012 +n09873769 +n11818636 +n01959029 +n03349599 +n12227909 +n07576969 +n03638180 +n07742224 +n03390673 +n02344175 +n03770520 +n00447361 +n13235319 +n01983674 +n10061882 +n04267165 +n12493868 +n12713358 +n02930339 +n10493419 +n12918810 +n02582220 +n12248359 +n02644501 +n04596492 +n04538249 +n07905618 +n13230190 +n07808268 +n15005577 +n09351905 +n12730544 +n11937023 +n04024137 +n02238358 +n11646955 +n11618079 +n09849990 +n04060448 +n04220805 +n12725940 +n12004120 +n01484562 +n02669442 +n12132956 +n01756916 +n03980986 +n02256172 +n07716750 +n12119390 +n04047834 +n11934041 +n12828977 +n03648219 +n11873612 +n12909614 +n04397860 +n03908111 +n03261395 +n03695616 +n11668117 +n12014355 +n02896074 +n03988758 +n04426184 +n10328696 +n02477028 +n04507326 +n04320871 +n03256472 +n01919385 +n03988926 +n13182164 +n07826250 +n03207548 +n01396617 +n04369618 +n07913774 +n13229951 +n03410022 +n12728508 +n01997119 +n03598783 +n01341090 +n03879456 +n01736796 +n02864122 +n13879816 +n02684962 +n12246037 +n02433729 +n04364397 +n09881358 +n02950120 +n03326371 +n02243878 +n01790812 +n12990597 +n03330947 +n07764486 +n03332173 +n10006177 +n03347472 +n07619301 +n10106509 +n12365285 +n01732989 +n07678586 +n04098795 +n07733847 +n03994297 +n12872914 +n02762909 +n07766530 +n13198482 +n02395855 +n12273515 +n04487894 +n07847047 +n12488709 +n02859557 +n04255768 +n02360933 +n03267696 +n03152951 +n10188715 +n10520544 +n13065514 +n02900594 +n03699754 +n01319187 +n01949499 +n10417424 +n01603000 +n12062105 +n09683180 +n09863339 +n01880716 +n10702615 +n03893935 +n10495555 +n04131499 +n02957252 +n02113892 +n07724078 +n12246941 +n04303095 +n01751215 +n04213530 +n12117695 +n12418507 +n01922948 +n12131405 +n13188767 +n01481498 +n03174079 +n02407172 +n11613867 +n10152616 +n10119609 +n04158250 +n11695085 +n07855105 +n02854630 +n03768683 +n12739966 +n12266984 +n12819141 +n12732605 +n13205249 +n11917407 +n01607429 +n02694279 +n07815294 +n06614901 +n07846471 +n12119717 +n02595339 +n12366186 +n10693235 +n12263410 +n12484244 +n10337488 +n04146976 +n01469723 +n07872748 +n03238879 +n12000191 +n07846938 +n03116008 +n12139196 +n04013176 +n10317963 +n12140511 +n02065726 +n01649556 +n10316862 +n01755952 +n04385079 +n12770529 +n02814338 +n01675352 +n11874423 +n01369484 +n10537708 +n07618281 +n07821404 +n02297819 +n03238762 +n03357081 +n02628600 +n07830986 +n12507823 +n04431925 +n11955532 +n03429771 +n10281896 +n12383737 +n12760875 +n09673091 +n12892013 +n06625062 +n04503269 +n03674842 +n12338979 +n04268275 +n12033139 +n11767877 +n07812790 +n12676134 +n04037873 +n10097477 +n12310638 +n12258101 +n09391386 +n13196738 +n13866626 +n12720354 +n10106995 +n07843220 +n03878294 +n04101375 +n07733217 +n10220080 +n04601938 +n10778148 +n12973937 +n10556825 +n12256708 +n12583855 +n04259202 +n07628181 +n04226962 +n02777402 +n09674412 +n12188635 +n03776167 +n04504038 +n04156591 +n02270945 +n02264021 +n07826653 +n02980203 +n02059852 +n02102806 +n12921660 +n04477725 +n10107173 +n12837466 +n02697022 +n04350688 +n12110236 +n02177196 +n07899976 +n12639910 +n02368399 +n10009162 +n03950647 +n09248153 +n02425532 +n04044955 +n11933257 +n03460899 +n10147710 +n02379743 +n02413917 +n02890804 +n12915140 +n02146879 +n07915800 +n01787006 +n03646809 +n11677902 +n04065909 +n02088992 +n02887832 +n10115946 +n02306825 +n03719560 +n10456696 +n03758220 +n12625003 +n04021503 +n07563366 +n02531625 +n10304650 +n12855710 +n09735654 +n07853762 +n03512030 +n12898342 +n02297938 +n12618727 +n04082344 +n12953712 +n12617559 +n03035715 +n02532451 +n05399356 +n03602686 +n10082423 +n04607759 +n07581607 +n07594737 +n04030965 +n03464628 +n12103894 +n03039353 +n03522990 +n02964934 +n03169063 +n10153865 +n09653144 +n09941571 +n12907057 +n07768318 +n02600798 +n02187150 +n01811243 +n12252383 +n04495555 +n07678953 +n13181244 +n13069224 +n13184394 +n12765402 +n03471347 +n10208847 +n03697366 +n09840435 +n02506947 +n09709673 +n07928578 +n11935715 +n07848936 +n02757927 +n01999767 +n02245443 +n10260473 +n13898645 +n02701260 +n07840219 +n11785875 +n12385830 +n12017664 +n12145148 +n04530456 +n01929186 +n02384741 +n04113038 +n03296217 +n09723819 +n03766697 +n12143215 +n09929202 +n02684248 +n12119539 +n03566555 +n12941220 +n04124573 +n10750188 +n07733005 +n04230707 +n03829857 +n07756838 +n12244458 +n12543826 +n03514129 +n02762169 +n04435870 +n03342863 +n09745324 +n12369476 +n11652039 +n03915320 +n07746749 +n07608641 +n12642600 +n02389943 +n12137791 +n04111962 +n12493426 +n12454793 +n01455317 +n10728117 +n03281524 +n12195734 +n12353431 +n02477329 +n02678010 +n04557522 +n10162354 +n14942411 +n07806043 +n12274151 +n09835153 +n03983499 +n04086663 +n07851926 +n07868684 +n11926976 +n03972146 +n04310604 +n09675799 +n13880704 +n13173132 +n07577918 +n10720964 +n11937102 +n03349020 +n12340581 +n03725506 +n03477143 +n10578162 +n01731137 +n03382104 +n11616852 +n01493829 +n09327077 +n03856335 +n03321843 +n02375757 +n02118643 +n08500989 +n03496486 +n04140777 +n12858987 +n02845293 +n04093157 +n07819682 +n10394786 +n12289310 +n02901620 +n01559160 +n07919165 +n12648196 +n11774972 +n11995396 +n10543937 +n10154013 +n03977158 +n01884476 +n12266528 +n11906127 +n12661538 +n04396650 +n12761905 +n04175574 +n10181878 +n12017326 +n12876899 +n09744346 +n07741706 +n04451636 +n07735981 +n03751590 +n03140546 +n03070396 +n03091223 +n12071477 +n07562017 +n09981092 +n09847344 +n12552893 +n12371202 +n02245111 +n01598271 +n04400499 +n02298095 +n15048888 +n02967170 +n04030161 +n10676434 +n01556514 +n13235766 +n02538562 +n12603672 +n03941586 +n02449183 +n07567611 +n12923257 +n02296021 +n11730933 +n12497669 +n02917742 +n07875926 +n02714535 +n13142182 +n02878107 +n07861334 +n02682811 +n03730655 +n03681813 +n12970733 +n02132320 +n12436090 +n07931280 +n04295353 +n12982590 +n01783017 +n13164501 +n02424589 +n01499732 +n12650805 +n04543509 +n10369699 +n03439631 +n13160116 +n07831663 +n05449196 +n13025854 +n10169241 +n02847461 +n10734963 +n13213397 +n03343234 +n12275317 +n02793414 +n04300509 +n01803893 +n11617878 +n02179192 +n03637480 +n04514648 +n03087521 +n10478827 +n11757190 +n12919195 +n04532504 +n01736375 +n04015786 +n04545471 +n12668131 +n04472961 +n14786943 +n07584938 +n02498743 +n07744559 +n10010062 +n10101308 +n07832099 +n02601767 +n10473453 +n02451575 +n02496052 +n03696746 +n12669803 +n07904072 +n04290762 +n11737125 +n07760755 +n12553742 +n12068138 +n12630999 +n02390938 +n02202678 +n02216740 +n02679961 +n13173697 +n11828973 +n02287987 +n04585318 +n10360366 +n07745661 +n03474352 +n07934800 +n12677612 +n03692272 +n13092240 +n04230487 +n11846312 +n12433952 +n11793403 +n03056873 +n05454833 +n12517077 +n12682882 +n02649218 +n09425344 +n07878283 +n02795978 +n10064977 +n12754174 +n02945813 +n01750743 +n03150661 +n13880415 +n12337800 +n04017571 +n09754907 +n04456734 +n02967540 +n10621400 +n11744471 +n01971620 +n04148285 +n10781817 +n11991549 +n12305654 +n03943833 +n10330931 +n12918991 +n01783706 +n11933099 +n12931231 +n07589967 +n09666349 +n07853445 +n12714949 +n03548533 +n04158672 +n03809802 +n03080309 +n12800049 +n02578454 +n02834027 +n10067600 +n03044671 +n04198233 +n07930205 +n04357930 +n12221522 +n11957317 +n03085781 +n03723885 +n03614383 +n02661618 +n04292221 +n03426574 +n03838024 +n10442093 +n12399534 +n01450950 +n07876550 +n11937446 +n09870096 +n02631628 +n05460759 +n01710177 +n03660562 +n04283784 +n01497738 +n02232223 +n04209811 +n12837259 +n02864987 +n04499810 +n12654857 +n03493792 +n09688233 +n02312912 +n10057271 +n07606058 +n03258192 +n10507565 +n11930038 +n08679269 +n03812263 +n11662128 +n04085574 +n07643577 +n03981094 +n02796412 +n02513939 +n07686634 +n07936979 +n03168774 +n03816394 +n07625324 +n04138131 +n10383094 +n10222716 +n10381981 +n12254168 +n13223090 +n03056583 +n09910556 +n03277004 +n12649866 +n02089725 +n03688707 +n09665367 +n07849506 +n02843909 +n13141797 +n02477516 +n09710886 +n03835941 +n11734493 +n10778711 +n10007809 +n02038141 +n12766043 +n02353172 +n02030224 +n10762212 +n06274921 +n13033396 +n03560860 +n01961234 +n13868515 +n03216199 +n01553527 +n04429038 +n10211036 +n02150885 +n02435517 +n02755675 +n09699020 +n12566331 +n03909516 +n02903727 +n02594942 +n04173172 +n04125692 +n12251001 +n02412787 +n01649412 +n01411450 +n01774097 +n09912907 +n03162556 +n07566231 +n12267534 +n03928589 +n04142327 +n11771147 +n07832592 +n04155177 +n07937621 +n07839864 +n03201895 +n13095013 +n10298271 +n03059103 +n03784793 +n11925450 +n03288742 +n02809364 +n04108999 +n04449449 +n03726233 +n07854455 +n03692136 +n12018447 +n03374282 +n06008896 +n07598928 +n03577312 +n04604806 +n09892513 +n04370600 +n08238463 +n01793159 +n07822687 +n03242390 +n07685303 +n03822361 +n01996280 +n10505942 +n06596845 +n04219580 +n12056990 +n10579062 +n10240082 +n10298202 +n07711907 +n03905730 +n12222900 +n07598622 +n04415815 +n12389932 +n12154114 +n04210012 +n12500751 +n03729402 +n12122918 +n04572121 +n12804352 +n02415130 +n12780325 +n11639084 +n12768933 +n02253494 +n13217005 +n03567788 +n12304286 +n10703480 +n07766723 +n05455113 +n07741804 +n12186839 +n01687128 +n01350701 +n03260206 +n07876026 +n12528382 +n04125541 +n10457444 +n01606097 +n11717399 +n04598416 +n12899166 +n09748101 +n12160125 +n07608980 +n07843348 +n02409038 +n02571167 +n09980805 +n09706029 +n02495242 +n12765846 +n10373525 +n12321873 +n03047171 +n12365462 +n03752398 +n02662993 +n10316527 +n10728233 +n06273207 +n01733214 +n12297846 +n12755876 +n02428842 +n02289307 +n04536465 +n03253187 +n02297294 +n05584746 +n03117642 +n12189779 +n10338231 +n07599649 +n04559994 +n12710917 +n09966470 +n12470907 +n04499300 +n12403075 +n11837743 +n02269657 +n12599185 +n07618587 +n03996004 +n12851094 +n03392648 +n01319001 +n12826143 +n12369845 +n01814549 +n10056103 +n12854193 +n02267483 +n04019881 +n03490649 +n04268142 +n10801802 +n12315060 +n10149436 +n04563790 +n09865068 +n03000530 +n10657556 +n07840672 +n12118414 +n02856013 +n02900459 +n04094859 +n12079523 +n11827541 +n12236160 +n02904505 +n02846619 +n09842823 +n12926039 +n02146201 +n03195799 +n12815838 +n09899289 +n01483021 +n02519340 +n05453815 +n10329035 +n02494383 +n09742927 +n13220355 +n03212406 +n11759609 +n10061431 +n12095281 +n04262530 +n03799240 +n02426176 +n04608809 +n12230540 +n13880551 +n11741175 +n11858814 +n11723452 +n07590841 +n12604845 +n10342543 +n12760539 +n09270657 +n02563079 +n10643937 +n12843316 +n01651641 +n07838811 +n04359034 +n07758260 +n02762725 +n11726433 +n03114743 +n01952029 +n12321395 +n11930571 +n12337922 +n12427946 +n12001294 +n12551457 +n13235011 +n02290340 +n06419354 +n12408873 +n01741442 +n12308447 +n10243872 +n03658635 +n03694761 +n02570484 +n12912801 +n04158002 +n02417785 +n01332181 +n03703075 +n10283366 +n03142431 +n02779609 +n02300554 +n09868782 +n10323752 +n03166809 +n03394149 +n02827148 +n02186717 +n01350226 +n03344784 +n03555996 +n04498873 +n13157481 +n04519887 +n12028424 +n12349711 +n10471640 +n07741235 +n04032936 +n12357968 +n10228592 +n13178284 +n04168840 +n13239177 +n03561573 +n02566489 +n11807696 +n07681264 +n02566665 +n10456070 +n10063919 +n10492727 +n01788579 +n11977660 +n02036228 +n02738978 +n03989349 +n10332953 +n12949361 +n09901502 +n07839730 +n13146928 +n10152306 +n04170515 +n11602478 +n02522722 +n01333610 +n13030852 +n02143891 +n12807624 +n04542329 +n12243693 +n12036226 +n13917690 +n02553028 +n02752199 +n10594857 +n11627714 +n04348070 +n13171797 +n04612257 +n07934373 +n04536765 +n02244515 +n04526800 +n04546595 +n02551668 +n12143405 +n07871588 +n07858484 +n03628728 +n13179804 +n03242264 +n12089846 +n07588688 +n07620047 +n01647466 +n09685233 +n03467254 +n12666369 +n05449661 +n10694939 +n12886600 +n12256522 +n04006330 +n03317673 +n04316815 +n12222090 +n04022866 +n04088441 +n07617526 +n10782362 +n04355821 +n13901490 +n12508618 +n03849943 +n04503499 +n13193466 +n09754633 +n07583978 +n13911045 +n07643679 +n12054195 +n10692090 +n04032509 +n10146927 +n02031298 +n04002629 +n04035748 +n10712229 +n02866106 +n07909504 +n04540397 +n06266878 +n10219879 +n12567950 +n07853648 +n03191561 +n07856045 +n12646197 +n03317510 +n10515863 +n13198054 +n02808829 +n12889579 +n02698473 +n09924437 +n03595055 +n12306270 +n07857356 +n09715303 +n03024518 +n04323519 +n09629065 +n04178668 +n12748248 +n02308618 +n07873198 +n10564098 +n03007297 +n04036155 +n02143439 +n10507482 +n12267931 +n03956331 +n12888234 +n04066476 +n07813107 +n02736396 +n10306496 +n12324388 +n01744555 +n01649726 +n06596179 +n03616091 +n07754279 +n02072493 +n12408280 +n04314632 +n02412700 +n04030846 +n09833997 +n03599964 +n05258627 +n12572759 +n12136581 +n02419056 +n12453714 +n11652217 +n03878511 +n03907908 +n12223160 +n10514121 +n04153330 +n12163279 +n12623818 +n03495671 +n13222985 +n10354754 +n04365112 +n12384680 +n12538209 +n03105214 +n12534862 +n13869045 +n03945928 +n11613692 +n11892181 +n13002209 +n02685253 +n07598529 +n02629716 +n13202355 +n07927070 +n02176916 +n04370955 +n11988132 +n03246197 +n01440467 +n07620145 +n03940894 +n01897667 +n03408340 +n12602612 +n02539424 +n03863657 +n04559620 +n02604480 +n11822300 +n03518829 +n11619845 +n10504090 +n03341035 +n02908123 +n04281998 +n03277602 +n03865288 +n10074578 +n13902793 +n03054605 +n04404200 +n12786836 +n12235051 +n04035231 +n12009792 +n12705458 +n04378489 +n02476870 +n11954798 +n03573848 +n02087314 +n03162460 +n04363412 +n02261063 +n09953615 +n01947139 +n03044801 +n04287351 +n04479287 +n03861596 +n12510343 +n07854066 +n03027505 +n12161577 +n04197878 +n01812187 +n10015792 +n08685188 +n11737009 +n10333044 +n02730568 +n10290813 +n13096779 +n05257476 +n07917951 +n12121187 +n03517509 +n07932762 +n02336275 +n12159942 +n12105981 +n02562971 +n13882961 +n12016777 +n02793684 +n12717644 +n01380754 +n07724173 +n04055861 +n11831297 +n03059934 +n03370646 +n10065758 +n09459979 +n07913644 +n04322531 +n03457451 +n02567633 +n04240867 +n10693334 +n10556704 +n04614844 +n07909362 +n12082131 +n09268007 +n04359217 +n09883807 +n02292085 +n04052346 +n03431570 +n02843465 +n04584056 +n04432043 +n09846142 +n07864317 +n04475749 +n04227050 +n04280845 +n03535284 +n07890617 +n03217889 +n02806762 +n11967315 +n11762927 +n02501923 +n03442487 +n09690083 +n02964634 +n02920164 +n07855317 +n10196725 +n03042829 +n11662937 +n12183816 +n12311224 +n13884261 +n02243209 +n03140771 +n02385002 +n03071288 +n12936826 +n04583022 +n07859142 +n04578112 +n04467506 +n12938081 +n09982152 +n12555255 +n03335333 +n10104888 +n12151170 +n12709349 +n10456138 +n02237868 +n07620327 +n12561309 +n12341931 +n12350032 +n01775730 +n12950796 +n01440242 +n04261767 +n10568915 +n12285195 +n07589872 +n13112035 +n07840395 +n11750508 +n12286197 +n03336168 +n03325288 +n02551134 +n04293258 +n13130014 +n07733124 +n04451139 +n11985903 +n03602365 +n11722342 +n11944751 +n12897999 +n02277422 +n03101302 +n07608245 +n03531982 +n01997825 +n11713370 +n04442582 +n02833403 +n04427857 +n01648356 +n10645223 +n10414865 +n10696101 +n12885045 +n10037080 +n12218274 +n07570530 +n04493259 +n10659042 +n10577710 +n03141612 +n10582604 +n00446632 +n02834642 +n07568389 +n04583888 +n04096848 +n12879068 +n04495051 +n09837459 +n12216215 +n03702440 +n10174695 +n10559009 +n10577182 +n07686299 +n04269668 +n02404028 +n03720665 +n09885866 +n03082450 +n12492682 +n12780563 +n03703463 +n02644360 +n02307910 +n01374703 +n04402342 +n04264134 +n03158414 +n04443433 +n12522894 +n10803978 +n11706942 +n10751026 +n13143758 +n02972934 +n04174234 +n12718995 +n11994150 +n11545350 +n12526754 +n07753448 +n02870772 +n11942659 +n11744108 +n12735160 +n12229887 +n04970312 +n02874336 +n10721819 +n13193269 +n03330665 +n09865162 +n10306595 +n12161744 +n03303669 +n07846688 +n02168427 +n01961600 +n03559531 +n09826821 +n03413124 +n09695019 +n03783873 +n11863877 +n13874558 +n02283617 +n11895472 +n13182799 +n07854614 +n03283827 +n01397690 +n02650413 +n09809279 +n10290541 +n10383505 +n11724660 +n07689757 +n10181547 +n07620597 +n11979354 +n02771547 +n13061471 +n12631637 +n11966385 +n03969510 +n11735977 +n07621497 +n12956588 +n03217653 +n04546081 +n11696450 +n10300654 +n02032769 +n01654863 +n09779280 +n02390258 +n03887512 +n10489426 +n10745770 +n10713843 +n03602194 +n10710913 +n07864475 +n04486322 +n07915213 +n08663051 +n10236842 +n02390738 +n02388453 +n03598385 +n12228689 +n11771746 +n12803226 +n11242849 +n02378149 +n10427223 +n05448827 +n11870044 +n12477983 +n12311413 +n03500090 +n10280034 +n02685365 +n03652389 +n12728656 +n07695284 +n09961198 +n03780799 +n03935883 +n01612955 +n12475774 +n02701730 +n07833535 +n12584365 +n03902220 +n12727960 +n10619492 +n04450465 +n10646780 +n10110731 +n04142175 +n12296735 +n09337048 +n12681579 +n12819354 +n12541403 +n04305016 +n12798910 +n10321126 +n08618831 +n09721244 +n02225798 +n01637338 +n12218868 +n05545879 +n12022382 +n03972372 +n02505063 +n01694311 +n10695450 +n10081842 +n12297507 +n07592922 +n12118661 +n01952712 +n10517137 +n01340522 +n07719330 +n03729482 +n04168541 +n03090710 +n07873679 +n07828378 +n07728284 +n10343088 +n07869937 +n14585392 +n01453475 +n12095412 +n04973020 +n12810007 +n07564515 +n01599741 +n11629047 +n09937802 +n12450607 +n12460146 +n02292401 +n03632963 +n09617696 +n12545232 +n02874642 +n09934488 +n10091349 +n01447946 +n05469861 +n11830400 +n03382533 +n02608547 +n12697152 +n03542727 +n10716576 +n03664159 +n07568625 +n02976815 +n13147532 +n02336826 +n12432574 +n07686461 +n04107598 +n02505998 +n09849167 +n03688066 +n02836513 +n01576358 +n01893021 +n12017511 +n12065649 +n01714231 +n11662585 +n12827907 +n12954353 +n11936199 +n01368672 +n03843883 +n12184095 +n10058411 +n11684654 +n08506347 +n10579549 +n01423302 +n11604046 +n07613158 +n03605504 +n02090129 +n02284224 +n01958435 +n12664469 +n04459122 +n09617161 +n09780828 +n11830252 +n12870048 +n04247544 +n09871095 +n02962938 +n09933020 +n13064457 +n10341243 +n07694169 +n13200193 +n07765728 +n01524761 +n07730562 +n07751737 +n07740855 +n04192521 +n12593122 +n07841037 +n02809736 +n10604275 +n12512095 +n01907287 +n04592596 +n09823153 +n03181667 +n12449784 +n07908923 +n12365900 +n03053976 +n15060688 +n04165675 +n02530637 +n09816654 +n12540966 +n07934152 +n09290350 +n03455802 +n10111779 +n01351315 +n10281770 +n13862552 +n12435486 +n12370174 +n12296045 +n03493219 +n12363301 +n11973749 +n03939565 +n02938321 +n13209270 +n12604639 +n12657755 +n03604536 +n10328941 +n04278932 +n10376890 +n01884203 +n02061853 +n04256318 +n07831821 +n10585217 +n07591813 +n10210648 +n07739035 +n01632308 +n10319313 +n02861777 +n03821145 +n13029610 +n04239900 +n10313441 +n04951716 +n10628097 +n02368116 +n08571275 +n04433377 +n10458596 +n12435965 +n12448136 +n12129986 +n04295777 +n07898895 +n07854266 +n12327846 +n12318782 +n07825850 +n10414239 +n11731157 +n04409911 +n10655442 +n11829205 +n01738306 +n02840515 +n04150371 +n03369512 +n02645538 +n12773917 +n07818422 +n03227010 +n10303037 +n12942025 +n12406304 +n06616216 +n02435216 +n12981954 +n03683341 +n09703809 +n07722666 +n11817160 +n10110893 +n10228468 +n03572631 +n01378545 +n02130086 +n04388574 +n11960673 +n12956922 +n11924014 +n09895902 +n03426462 +n07759576 +n02563949 +n03466947 +n02522637 +n09480959 +n02033882 +n02451415 +n12677120 +n10580437 +n04425977 +n03057841 +n12285512 +n07614348 +n03144873 +n03391613 +n12366870 +n02304657 +n07863935 +n07909714 +n02413717 +n12591702 +n07838659 +n02967407 +n12016914 +n02735268 +n09470027 +n10222259 +n03899100 +n10513509 +n11620016 +n12600267 +n04368840 +n03016209 +n04085017 +n03215076 +n10238272 +n09782855 +n07586179 +n12434483 +n12452480 +n01990516 +n12030092 +n11739978 +n12714254 +n13036804 +n07727377 +n07879560 +n03710421 +n12128490 +n11968519 +n03250588 +n10173579 +n03114041 +n02942015 +n12729164 +n07871065 +n02591330 +n09353815 +n10138472 +n02712545 +n12866333 +n07835823 +n03508485 +n01758895 +n02925385 +n03321419 +n09931418 +n02846874 +n12500518 +n07587819 +n03160186 +n04974340 +n13067532 +n11940349 +n13027879 +n02878534 +n10055566 +n07925708 +n12628356 +n11958499 +n03472672 +n04233295 +n04563020 +n03426871 +n04330109 +n03677682 +n04129766 +n02884859 +n12692521 +n10188856 +n03500971 +n10355306 +n12407545 +n11955040 +n10028541 +n10345659 +n14720833 +n09641578 +n12613706 +n11718296 +n03380301 +n01334217 +n03890358 +n03583419 +n12447121 +n09660010 +n11826569 +n11837351 +n12096089 +n03871860 +n01821554 +n12834938 +n02738449 +n02644665 +n03316873 +n12548564 +n03605417 +n12094401 +n13152339 +n03004531 +n03080904 +n03535647 +n12349315 +n04213264 +n07860208 +n01526766 +n03710937 +n11806521 +n10618234 +n12306938 +n10473562 +n10050880 +n04596116 +n02577164 +n04479694 +n07936093 +n07834286 +n12175181 +n03986857 +n02919648 +n12055073 +n04567593 +n07585015 +n12771085 +n10551576 +n09778783 +n01593282 +n02406952 +n12331263 +n10629329 +n12287195 +n07729225 +n07828041 +n01880473 +n12257725 +n02696246 +n07853232 +n11936864 +n09745229 +n03364156 +n04503155 +n03194297 +n04003359 +n07607361 +n10106387 +n10306890 +n10455619 +n01647180 +n07740115 +n12106323 +n03626272 +n11685621 +n11866706 +n04321121 +n01606978 +n12621619 +n11615259 +n07840304 +n02841847 +n05459769 +n03432360 +n04604276 +n12356395 +n12468545 +n03645168 +n00477827 +n03459591 +n04202142 +n12959074 +n07881625 +n12382233 +n02405692 +n12299640 +n12247202 +n12628705 +n12534625 +n09264803 +n12176953 +n09835017 +n10390807 +n04975739 +n12474418 +n11931135 +n07917791 +n10636488 +n09690496 +n11993675 +n03703203 +n11794139 +n13015688 +n04168084 +n01948446 +n10169419 +n04455048 +n04973669 +n12840502 +n12120578 +n10448455 +n01386007 +n02288122 +n01441910 +n02278463 +n03108759 +n02753710 +n03143400 +n13080866 +n13917785 +n13124358 +n13220663 +n02475358 +n01925916 +n02684649 +n10451590 +n03869976 +n03881305 +n07928264 +n01422185 +n04035634 +n11996677 +n04261369 +n12925583 +n12764008 +n09972587 +n03708962 +n01791388 +n02892626 +n04098399 +n07823369 +n07752874 +n13225244 +n03376771 +n01771766 +n13146403 +n12157179 +n13897198 +n07770869 +n13240362 +n07610502 +n03688504 +n02896856 +n12543186 +n09967063 +n05453412 +n12590600 +n02378870 +n07568241 +n01687290 +n00474769 +n11694866 +n02338722 +n02637977 +n04567746 +n10586444 +n11907405 +n03421960 +n07605693 +n10384214 +n12877637 +n12018363 +n10056611 +n13882487 +n12140759 +n04114301 +n11762018 +n12678794 +n11817501 +n02116450 +n12018530 +n03324629 +n12726528 +n03155502 +n10493199 +n04181083 +n10609198 +n04328703 +n03045074 +n07769886 +n01892385 +n12828520 +n03165211 +n11800565 +n07567139 +n13877547 +n12829582 +n02949084 +n07589724 +n01746191 +n12395463 +n05459457 +n10565502 +n11981475 +n09310616 +n12327022 +n02313709 +n12957803 +n11865276 +n12955414 +n12939479 +n13225365 +n07936459 +n03139089 +n07577772 +n12057895 +n03620353 +n12152031 +n01885158 +n04096733 +n12626674 +n10464711 +n10675609 +n07752782 +n03709960 +n02540983 +n02285179 +n01903234 +n07835701 +n04421083 +n02352290 +n09421031 +n03349367 +n02539894 +n04052235 +n07922955 +n03941887 +n04234260 +n04423552 +n11975254 +n08501887 +n12489676 +n04574348 +n10602119 +n02163008 +n02748491 +n10024937 +n10033888 +n12605683 +n01790398 +n10128519 +n14977188 +n10293590 +n12077244 +n09741074 +n11694469 +n12692714 +n12159804 +n12533437 +n03831203 +n03692004 +n09462600 +n04537436 +n06618653 +n07913537 +n12783316 +n10038119 +n10236521 +n01486540 +n07875267 +n04345787 +n07681355 +n13028937 +n03607186 +n07863107 +n12387103 +n09830926 +n03574416 +n04478383 +n11685091 +n03197446 +n03225458 +n09741722 +n07736527 +n02857907 +n10177150 +n12711398 +n10308275 +n02418770 +n02577662 +n09935107 +n03362639 +n12446908 +n04329681 +n04114428 +n09624899 +n12913144 +n12338034 +n02341616 +n12360817 +n12907857 +n02414904 +n05482922 +n11974888 +n04127117 +n12581110 +n04368365 +n01699254 +n12525753 +n04254450 +n11951052 +n12458874 +n12721477 +n07562651 +n02239192 +n10533874 +n12006306 +n09537660 +n10008123 +n02788386 +n03248835 +n04491312 +n11795580 +n04025633 +n10166189 +n07703889 +n11824747 +n07605198 +n12134836 +n03591116 +n02946753 +n13212025 +n11742310 +n02328820 +n02985606 +n09955944 +n12679432 +n10020366 +n12013035 +n02942147 +n04172512 +n11802410 +n10789709 +n03385295 +n02039497 +n01416213 +n11940750 +n12178780 +n01967963 +n12662379 +n12217851 +n02812631 +n12432069 +n09991740 +n03089477 +n12458713 +n03876111 +n10311661 +n12286068 +n02838958 +n11936369 +n03716228 +n13228017 +n06276902 +n12677331 +n04330189 +n10488016 +n12011370 +n04343740 +n07893792 +n02171164 +n03963483 +n12080588 +n07577657 +n12936155 +n03809686 +n04223066 +n04086066 +n12776558 +n07813579 +n01841943 +n12285705 +n02581482 +n11653570 +n10010632 +n04305947 +n12228886 +n12797368 +n01404495 +n09697986 +n11882237 +n10077879 +n07607832 +n09779461 +n13212379 +n10769188 +n10715789 +n01480106 +n02145910 +n04275093 +n01983829 +n01978010 +n09937903 +n11976314 +n11785276 +n12386945 +n04445782 +n10712374 +n10706812 +n10194775 +n12655062 +n10739135 +n02597972 +n02307176 +n04121342 +n02350670 +n12698027 +n02805845 +n02895008 +n13149970 +n03451365 +n04542595 +n07803895 +n07864198 +n09690864 +n03844550 +n12378249 +n10345422 +n13163553 +n10457903 +n10783539 +n10539015 +n11757017 +n10274173 +n08652376 +n10283546 +n04541777 +n02824152 +n12945177 +n02082056 +n03695957 +n07936015 +n07591162 +n03628071 +n02990758 +n07685118 +n04023422 +n04951875 +n03541393 +n10289176 +n04039209 +n07913180 +n07910799 +n12017853 +n03732543 +n10656120 +n10512859 +n04556664 +n12464649 +n12927758 +n12078451 +n07878145 +n10561320 +n12467592 +n07689217 +n07619881 +n11935187 +n09837720 +n03642144 +n12220019 +n02983507 +n03271260 +n02778588 +n10193650 +n01654083 +n02746978 +n10202763 +n02953552 +n07924366 +n08583554 +n02905886 +n07855603 +n09745834 +n12366053 +n04140539 +n03383211 +n11648268 +n03352961 +n12116734 +n07771539 +n07836077 +n03842754 +n11683838 +n03004409 +n11730750 +n13098962 +n12292463 +n02867592 +n01653026 +n07583865 +n12548804 +n12702124 +n03917048 +n12677841 +n12511488 +n04217387 +n12495670 +n03554375 +n12403513 +n08558770 +n02781764 +n12339526 +n12742290 +n01404365 +n03591798 +n12446737 +n10494195 +n12110352 +n01672611 +n10493922 +n03638623 +n09910840 +n02238594 +n02575325 +n13186546 +n11873182 +n10344774 +n04094060 +n10417682 +n02749169 +n02428089 +n04549721 +n03824284 +n12107002 +n12784371 +n09986904 +n01634227 +n07826544 +n12253487 +n01679005 +n12516165 +n09339810 +n03126090 +n07803408 +n11883945 +n03842276 +n03397412 +n03280216 +n12264786 +n02545841 +n11877860 +n01830479 +n13207923 +n12490490 +n02542958 +n04114719 +n12590715 +n13226320 +n11644872 +n04119630 +n10176913 +n04213105 +n11652966 +n12546420 +n12625823 +n11897466 +n02092173 +n10567613 +n04953678 +n10059067 +n12408466 +n03056288 +n13036116 +n04169597 +n12467197 +n02569905 +n02758490 +n12623211 +n04077889 +n04959061 +n04183957 +n11689815 +n03777126 +n03306869 +n07720084 +n02659478 +n12947756 +n04341288 +n04448185 +n04037076 +n09828988 +n03346289 +n04174705 +n13126050 +n04255346 +n09764732 +n11773628 +n14891255 +n04314107 +n02184720 +n02646892 +n04320598 +n01979526 +n03191451 +n03662452 +n10290422 +n01739094 +n02305636 +n04202282 +n05459101 +n02766168 +n09994808 +n03528100 +n10475940 +n03005619 +n12639168 +n02144936 +n13202125 +n10703221 +n03770834 +n12324056 +n03474167 +n02609302 +n12166929 +n12852570 +n12920719 +n12508762 +n11983375 +n01422450 +n12616630 +n09681107 +n10486561 +n13038577 +n12266644 +n02478875 +n02547014 +n02249809 +n03336742 +n12038760 +n01672432 +n09861287 +n03678879 +n01949973 +n09928845 +n02310149 +n12648693 +n10533983 +n12812801 +n04550676 +n01800633 +n12128306 +n12744142 +n13140367 +n07803213 +n07688265 +n13068434 +n02030568 +n12955840 +n01625121 +n13215258 +n04270576 +n02680638 +n02817251 +n01539272 +n04066023 +n12969927 +n10280598 +n04001661 +n09774167 +n10358575 +n01836673 +n02290664 +n09940725 +n12447581 +n07803779 +n04561965 +n10151261 +n01538362 +n10170060 +n13160365 +n09823287 +n12554729 +n10620212 +n11935027 +n03465605 +n03227856 +n08519299 +n07785487 +n03522863 +n02861286 +n12200905 +n04269502 +n02104184 +n07612273 +n01390763 +n11872658 +n12981086 +n10244359 +n01738731 +n12117235 +n12846690 +n02861658 +n08782627 +n09832633 +n02531114 +n01394492 +n03269073 +n03077442 +n09794668 +n13884384 +n08659331 +n02556373 +n02587877 +n03523506 +n03723153 +n12024805 +n13061172 +n03978575 +n07914686 +n13134844 +n12183026 +n03573574 +n03765128 +n03319167 +n01920438 +n07852452 +n07680655 +n03017698 +n12959538 +n04261506 +n01793340 +n03292362 +n12817855 +n03593222 +n01962506 +n12453018 +n04027367 +n12518481 +n09223487 +n07871335 +n03779246 +n09668562 +n01889849 +n02492356 +n07830841 +n03277149 +n09968652 +n03092476 +n10400205 +n06263202 +n07595368 +n12767208 +n02196896 +n12580012 +n10265801 +n02103181 +n02922461 +n01731277 +n12422559 +n04278605 +n02250280 +n03283413 +n11829922 +n10191613 +n02493224 +n04427559 +n12181352 +n12742878 +n10683675 +n04503705 +n03785142 +n12816942 +n10723230 +n11936707 +n12360534 +n12909759 +n03766218 +n02696843 +n11935877 +n07828156 +n10617397 +n12921499 +n13158714 +n10166394 +n12370549 +n03505015 +n12769065 +n02636550 +n10781236 +n09869317 +n10275249 +n04234763 +n10735173 +n13137225 +n02070776 +n04232312 +n07575226 +n03471030 +n07909954 +n02633677 +n01662060 +n07563642 +n04263950 +n11824344 +n13178707 +n02972714 +n10417288 +n12092930 +n11993203 +n10170681 +n03726116 +n03215337 +n12564613 +n14975598 +n07758125 +n03123666 +n07717714 +n01421333 +n02359667 +n09403086 +n03857026 +n12759668 +n02628259 +n02307515 +n12146488 +n09777870 +n07819303 +n12105353 +n10784113 +n11802995 +n12561594 +n02845130 +n12100187 +n03507658 +n02141611 +n01800195 +n03470005 +n12444898 +n02203592 +n09707061 +n00475142 +n12216628 +n01732093 +n02581642 +n03803780 +n12114590 +n04541662 +n12267133 +n11652753 +n07859951 +n04524594 +n12843144 +n04040540 +n10604880 +n12559044 +n03063834 +n12394328 +n12704513 +n10230216 +n10756641 +n02101670 +n12309630 +n03070587 +n11626010 +n04239639 +n01638329 +n01928517 +n13144084 +n10420649 +n03102516 +n12395289 +n09833111 +n01651285 +n11688069 +n12881913 +n12783730 +n07716649 +n03618678 +n10344203 +n03626502 +n10718665 +n03577474 +n01683201 +n03246653 +n12153224 +n02519472 +n02470709 +n15090238 +n03129636 +n07774295 +n04577567 +n09995829 +n09662038 +n10297367 +n03555862 +n12531727 +n09947127 +n12533190 +n04062807 +n00479734 +n12860978 +n01884104 +n09866559 +n12069009 +n04595501 +n12088495 +n02909053 +n12283790 +n02180427 +n10697282 +n07562881 +n13092078 +n11706325 +n01746952 +n01978136 +n07731436 +n02386746 +n12648424 +n12726357 +n10314182 +n07839172 +n11753562 +n12903503 +n12589687 +n02375438 +n03604763 +n11549895 +n13202602 +n12304420 +n10738215 +n12220829 +n10095420 +n12177455 +n11887476 +n04006411 +n09838370 +n02853218 +n12688372 +n03335461 +n02800940 +n03036701 +n09885059 +n10206629 +n11922926 +n01678657 +n12192132 +n12248141 +n03108624 +n01936671 +n02417242 +n03222857 +n03768823 +n04343511 +n03538817 +n12655726 +n12521186 +n01330497 +n12767423 +n12965951 +n09695132 +n04410886 +n12599874 +n07865700 +n07596160 +n10227698 +n03224490 +n11598886 +n02948293 +n09906293 +n12247963 +n03301175 +n03895170 +n04259468 +n07808806 +n13147689 +n09856827 +n13882639 +n02241008 +n03842585 +n02883101 +n12182276 +n13918717 +n12728164 +n10634464 +n02477187 +n03107716 +n02342250 +n01479213 +n12793695 +n09808080 +n10707707 +n04161010 +n02836607 +n10076483 +n07726386 +n03872273 +n10250712 +n07688412 +n13884930 +n12301766 +n10196404 +n07591330 +n03814727 +n09610255 +n12757115 +n09814381 +n02397987 +n07886317 +n03959123 +n02185167 +n03533845 +n11838413 +n10227393 +n07704305 +n03580615 +n02663485 +n10101981 +n04346855 +n10067011 +n04464125 +n02829510 +n10007995 +n07845775 +n03004713 +n02450561 +n09905530 +n10361060 +n12394638 +n12095934 +n10479135 +n03145277 +n12246773 +n13194212 +n04475900 +n03252787 +n14867545 +n10485298 +n09961739 +n02149653 +n01553762 +n03931980 +n02344408 +n11676850 +n04034367 +n04235646 +n12867184 +n12625670 +n12763529 +n07593107 +n04351550 +n02571810 +n13899735 +n03652826 +n09495962 +n03421768 +n04205062 +n11918808 +n07745197 +n07752264 +n01892744 +n04609811 +n10278456 +n11790936 +n09754152 +n13234519 +n09820044 +n00440643 +n02350357 +n03779884 +n07803992 +n03305953 +n01836087 +n10068234 +n10690421 +n03134394 +n12380761 +n12801966 +n03134232 +n02596720 +n07591236 +n11882821 +n02312175 +n02387983 +n01912152 +n10805501 +n12718074 +n03188290 +n02776505 +n10528148 +n09971385 +n10524223 +n09958292 +n02721813 +n10300829 +n12007766 +n12107191 +n04449700 +n02987950 +n11878633 +n12328801 +n04551833 +n10567722 +n11654984 +n02808968 +n12066451 +n02964075 +n11633284 +n02434712 +n03070854 +n07926540 +n01543936 +n10091861 +n09938080 +n11976511 +n03342432 +n12886831 +n12509993 +n12958261 +n12730776 +n10066206 +n07846014 +n13176714 +n03332591 +n04607640 +n02513727 +n12138248 +n11964848 +n01318053 +n10553140 +n07839055 +n02632039 +n11865429 +n02286654 +n02367812 +n12093885 +n10774329 +n02296912 +n01729672 +n10353928 +n12033504 +n11936113 +n03263338 +n07822053 +n09737050 +n13875884 +n13212559 +n11690088 +n05468739 +n09344724 +n02507148 +n01377694 +n04172607 +n10464870 +n07804152 +n02825872 +n03139640 +n11858703 +n10227490 +n12334153 +n03616225 +n12018188 +n12399656 +n10235269 +n11840764 +n01995514 +n03326475 +n12704041 +n10684827 +n03006788 +n13906484 +n02868240 +n03614887 +n03491724 +n12124172 +n03675907 +n13170840 +n03983712 +n03254737 +n07836269 +n01784293 +n02095212 +n12470512 +n12219668 +n12920521 +n04492157 +n02950018 +n01922717 +n11797981 +n12601805 +n02744961 +n07814925 +n09798096 +n03939062 +n13891547 +n07564292 +n01590220 +n09295210 +n03997875 +n03479266 +n01491661 +n03781055 +n12528768 +n10657306 +n12014923 +n10094320 +n02532272 +n02224023 +n04541136 +n12067672 +n02661473 +n04233027 +n12399899 +n12889412 +n01736032 +n12551173 +n01337734 +n10104487 +n02921592 +n02148512 +n10216403 +n03276839 +n01781570 +n03999621 +n02505238 +n12537569 +n10433452 +n02351343 +n12365158 +n08539276 +n01897257 +n12221801 +n10557246 +n10437698 +n01803641 +n11836327 +n07813833 +n03468570 +n06277025 +n10040240 +n03692842 +n03017835 +n01881564 +n10487363 +n07937069 +n10597505 +n01638722 +n10160412 +n09825096 +n12611640 +n03098515 +n10654211 +n13196234 +n03436990 +n04058486 +n09814567 +n10758337 +n03515934 +n07688757 +n10269199 +n12627347 +n04521571 +n01636510 +n03220095 +n09982525 +n12768809 +n02340930 +n02473857 +n12336586 +n12125584 +n02833040 +n02498153 +n01467804 +n12120347 +n11650430 +n11953339 +n12592058 +n05102764 +n10575594 +n09722064 +n01966586 +n10619888 +n07852376 +n12650915 +n10321882 +n11974557 +n09847267 +n13201423 +n12337131 +n13185658 +n02150134 +n10538853 +n10471732 +n07836600 +n03526062 +n02512752 +n04232437 +n03367321 +n04308915 +n07600895 +n11539289 +n03539293 +n12699922 +n07817599 +n02781213 +n03594010 +n12035907 +n04075813 +n05233741 +n07863229 +n10735984 +n12095543 +n12272735 +n04229620 +n12240965 +n07768590 +n04420024 +n12111627 +n02861509 +n02595056 +n12183452 +n04607982 +n13213577 +n07741888 +n03750614 +n10043024 +n03372933 +n10051861 +n10199251 +n03249956 +n03984125 +n02956393 +n11619687 +n03356279 +n07833951 +n10715030 +n02340358 +n10768272 +n01494041 +n02592734 +n03323319 +n02136285 +n03995661 +n09945223 +n03547397 +n10044682 +n12878784 +n02803809 +n13160254 +n12726902 +n12196954 +n03161016 +n03105645 +n04218921 +n09493983 +n10719036 +n12263588 +n12565102 +n10684146 +n03148518 +n04287986 +n02340640 +n04331443 +n10727016 +n03369407 +n07824863 +n07844786 +n12467433 +n07582811 +n02964196 +n02197877 +n10758445 +n03271376 +n13212175 +n03260504 +n12777778 +n11973634 +n05467054 +n11946313 +n02462213 +n13906669 +n10520286 +n02074726 +n01771100 +n13880199 +n09811568 +n13883763 +n02334728 +n11831100 +n12025220 +n12751172 +n03858837 +n10127186 +n12831535 +n07823591 +n02513805 +n03662301 +n09913329 +n02749670 +n10655986 +n01787191 +n03199488 +n12732252 +n12253664 +n07735294 +n03440876 +n09650839 +n03844965 +n10341446 +n12688187 +n12961242 +n03423224 +n13157346 +n09802951 +n11948044 +n03489048 +n12279060 +n03664840 +n03731882 +n07742605 +n07870734 +n03949761 +n10759331 +n07739923 +n02737351 +n01788291 +n11780424 +n03722646 +n12297110 +n12363768 +n04495310 +n10008254 +n03934890 +n01318478 +n03609959 +n10070377 +n04123228 +n13068735 +n02909706 +n10671042 +n10491998 +n07650792 +n12664710 +n10213034 +n03455642 +n10411867 +n09903936 +n10121800 +n02622955 +n03647423 +n07596566 +n09654898 +n12248780 +n02684515 +n04255670 +n06273890 +n03495941 +n12960552 +n09724234 +n03861048 +n03293095 +n11835251 +n12852428 +n04084517 +n01814620 +n13159890 +n03147156 +n02311748 +n10237799 +n07584859 +n01946827 +n09651968 +n12241192 +n03669245 +n07858336 +n11932927 +n04444218 +n10526534 +n03642573 +n09470222 +n10731732 +n12001924 +n03786096 +n01359762 +n03824999 +n13877667 +n10591811 +n10574311 +n03275125 +n11631985 +n10539160 +n10502950 +n12499757 +n12432707 +n12068615 +n07689624 +n02610373 +n03204436 +n13051346 +n13134531 +n07610890 +n04021164 +n03502897 +n02299378 +n10417843 +n10050043 +n07929940 +n02593453 +n10577820 +n12870225 +n03333851 +n09463226 +n11741575 +n09193551 +n12012510 +n11987349 +n09215023 +n07924655 +n10060075 +n11999278 +n03933391 +n02602059 +n11993444 +n02337902 +n10149867 +n04441093 +n02868429 +n10629647 +n04192361 +n12029039 +n02768433 +n12078747 +n12730143 +n03255167 +n12492900 +n01709876 +n09672725 +n07870620 +n02315821 +n12277334 +n12204730 +n07852712 +n01319685 +n07802246 +n13031193 +n00812526 +n09658815 +n11982939 +n04264485 +n07893425 +n04094438 +n03285730 +n13182338 +n10724570 +n07832741 +n13210350 +n10654015 +n04058721 +n07875086 +n03462747 +n03994417 +n02889856 +n11957514 +n10109443 +n10478462 +n03064562 +n02477782 +n11920998 +n02138169 +n04227787 +n11797508 +n10753339 +n12928307 +n11921792 +n12643688 +n01833112 +n03919808 +n09817386 +n01903498 +n03848033 +n12031547 +n01035504 +n12324906 +n01911063 +n02588794 +n03749634 +n03539754 +n02242455 +n03079616 +n03246312 +n09705671 +n07860629 +n10458356 +n10051761 +n09709531 +n02867401 +n12522678 +n13150378 +n04462576 +n03462315 +n03712981 +n07607027 +n10581648 +n02957427 +n04271793 +n02253913 +n12824735 +n11697802 +n02161588 +n12463975 +n02361090 +n09784564 +n09680908 +n03512452 +n13214217 +n10712690 +n04023119 +n07814007 +n09833751 +n12885265 +n02259987 +n11933903 +n03628831 +n11967142 +n02533545 +n03900301 +n07919787 +n12793886 +n10768148 +n03071552 +n02780315 +n12193665 +n03378442 +n04486616 +n07832307 +n03164192 +n12786273 +n04261868 +n12655351 +n12320414 +n04371979 +n10630093 +n13052014 +n01357328 +n07879821 +n09753348 +n03796974 +n11701302 +n11678299 +n04022434 +n11610823 +n07726009 +n04117639 +n10474343 +n11888061 +n01842788 +n10435251 +n03343047 +n03383378 +n12750767 +n09662661 +n05241485 +n10000459 +n12220496 +n02246941 +n12676370 +n02253264 +n07766409 +n02940289 +n12089320 +n10363573 +n12922119 +n09783537 +n11695285 +n12331066 +n12573647 +n10218164 +n12509821 +n07862946 +n12818601 +n02589316 +n13191620 +n03758992 +n12112337 +n10733820 +n02898093 +n02645953 +n10150794 +n04595762 +n02344918 +n13132756 +n12859153 +n12138444 +n04211001 +n12935166 +n07830493 +n10142166 +n11951820 +n03018848 +n01453742 +n11985321 +n10000294 +n01362336 +n02328009 +n12639376 +n03090437 +n02204249 +n04312916 +n13127666 +n09684082 +n03432509 +n10274318 +n09704057 +n07593972 +n10074249 +n13157971 +n01638194 +n04036963 +n11708857 +n03418749 +n12589458 +n11899762 +n07683138 +n01601410 +n07854707 +n04279063 +n03239607 +n10302700 +n12520406 +n12576451 +n03881534 +n07565608 +n02349390 +n12569851 +n12249294 +n04059399 +n03530189 +n09357346 +n04325208 +n13159691 +n04045941 +n13898315 +n11992479 +n02353411 +n07825496 +n12922458 +n03115014 +n11761836 +n03323211 +n02793296 +n03492087 +n05241662 +n05491154 +n10419630 +n04506895 +n10546428 +n02907296 +n10769459 +n11647868 +n13188462 +n03825442 +n13209460 +n10742005 +n07599242 +n12361754 +n04570532 +n04131811 +n07756499 +n02598134 +n01910252 +n02910701 +n10129338 +n13871717 +n12673588 +n12565912 +n07562172 +n02711237 +n10775003 +n07695410 +n02637179 +n12930951 +n10261211 +n02906963 +n01366700 +n10642705 +n09846586 +n02779719 +n04978561 +n01369358 +n12114010 +n03521771 +n10667709 +n02296612 +n10722029 +n03500557 +n01365474 +n10472447 +n07585644 +n07609316 +n04013060 +n04505888 +n09726811 +n12692160 +n12378963 +n03585551 +n13139837 +n10167565 +n03799375 +n11990920 +n09640327 +n04502989 +n10108832 +n10561736 +n01897426 +n11766189 +n12462582 +n12913524 +n02684356 +n13200542 +n10466198 +n04331892 +n01478969 +n07837234 +n07692248 +n04552097 +n12382875 +n01484447 +n04120695 +n12681376 +n10293861 +n11965962 +n11788039 +n03959227 +n01832813 +n09918867 +n09942697 +n07587206 +n10459882 +n01347583 +n02267208 +n03951453 +n03006903 +n12126736 +n10286749 +n03395401 +n04605057 +n03467887 +n12755559 +n04020744 +n11629354 +n01647033 +n02780445 +n10205714 +n09439032 +n03138128 +n02763083 +n07835547 +n12251278 +n11949857 +n01635480 +n10675142 +n07845335 +n07751977 +n10332110 +n11871496 +n11764814 +n12229651 +n07760297 +n09865672 +n02919308 +n12218490 +n03782929 +n12231709 +n11909864 +n03144982 +n11799331 +n10433610 +n10483395 +n03206023 +n05442594 +n03626418 +n07870478 +n10171456 +n11964446 +n12796849 +n02126317 +n03797062 +n01412694 +n07610746 +n03581897 +n04479526 +n12447891 +n11906514 +n09699642 +n12873984 +n10586903 +n13234114 +n02436353 +n11889205 +n01460303 +n04400899 +n11884967 +n02140491 +n12215824 +n03586911 +n01394040 +n10691937 +n12371704 +n09668988 +n04362624 +n01740885 +n01337191 +n09714120 +n02185481 +n08555333 +n10704238 +n12430471 +n12034594 +n10012484 +n12088909 +n03205903 +n04129490 +n13090018 +n10712474 +n12234669 +n13016076 +n00454855 +n13882713 +n02644817 +n03192907 +n03519226 +n01561181 +n04583967 +n11732052 +n10732854 +n04480303 +n07934908 +n03825673 +n10621294 +n04354387 +n03374102 +n02922159 +n13158815 +n04000716 +n09685806 +n04427216 +n12051514 +n09712967 +n12081649 +n09748889 +n03252231 +n10704886 +n12897118 +n12525168 +n11728769 +n02731251 +n02548884 +n12403276 +n09627807 +n08679167 +n09663999 +n04247440 +n07711683 +n09909929 +n03415868 +n05244421 +n07680416 +n12757668 +n11935794 +n03483086 +n01860864 +n10755164 +n03675076 +n12004987 +n07566092 +n04078955 +n03379719 +n01916588 +n10138369 +n09755893 +n03649003 +n03977430 +n02309120 +n10616578 +n12242850 +n12388293 +n03292085 +n09919061 +n10302576 +n01497413 +n01936858 +n01377278 +n04358256 +n02667693 +n12125183 +n07758582 +n07813324 +n09737453 +n12745564 +n03855464 +n03166685 +n01446152 +n09801102 +n10561222 +n10576818 +n13915209 +n10474446 +n03845990 +n04237174 +n12531328 +n07855812 +n10763245 +n04614505 +n07905770 +n12051792 +n12653633 +n03593862 +n10359659 +n10436334 +n07853125 +n12911264 +n12265083 +n03638014 +n04444121 +n02706221 +n10563711 +n07808166 +n11799732 +n04093915 +n10451858 +n04410760 +n10075299 +n12740967 +n12635359 +n09611722 +n12902466 +n13915305 +n05542893 +n04440597 +n03675445 +n12315245 +n10646032 +n10047199 +n12775717 +n10365514 +n10590452 +n11616260 +n02812342 +n07856756 +n04570416 +n03565991 +n12215210 +n04330896 +n02388588 +n02266269 +n10760199 +n14714645 +n02742070 +n03565710 +n12609379 +n03420935 +n03441465 +n00453631 +n01963479 +n04362972 +n09863936 +n03961394 +n03009269 +n12297280 +n04561010 +n12192877 +n02981565 +n12134695 +n07855413 +n03232815 +n10180791 +n09932788 +n10571907 +n02109256 +n02660091 +n07865788 +n13228536 +n10306279 +n02635580 +n03634899 +n10262343 +n12296929 +n04393301 +n06281175 +n04485586 +n13103660 +n10510974 +n04166436 +n01634522 +n07596362 +n12700357 +n08597579 +n11744011 +n12238756 +n01790171 +n04571800 +n11867311 +n03464467 +n12241880 +n09961605 +n12592544 +n03170459 +n09938991 +n02692680 +n10295371 +n04331765 +n02612167 +n02520810 +n11977887 +n04094608 +n07722390 +n07832202 +n12448361 +n04612159 +n12186352 +n13161151 +n12654227 +n09868899 +n10104756 +n09920106 +n12981301 +n02610980 +n12545865 +n10673296 +n04110841 +n01704626 +n04055700 +n12117912 +n10519126 +n12443736 +n01697978 +n02148088 +n03012644 +n12091697 +n10395390 +n10509810 +n10462751 +n02896949 +n03836602 +n03928994 +n07718195 +n02473983 +n08571642 +n02648916 +n11970298 +n06274292 +n04613158 +n09856401 +n12811713 +n13111340 +n12122442 +n10095265 +n04445610 +n11631619 +n07863644 +n12022821 +n10315217 +n12549799 +n03386343 +n03121040 +n03558007 +n12272432 +n11798496 +n02522866 +n02952935 +n10741493 +n12143065 +n07883156 +n09616573 +n02289988 +n13161904 +n02588945 +n00451768 +n12375769 +n10777299 +n04495183 +n11930994 +n09970088 +n02254246 +n12276314 +n07857598 +n04428382 +n03789794 +n03383821 +n12980080 +n01447139 +n12880799 +n03501520 +n10764465 +n13143285 +n12727729 +n12444095 +n02354621 +n13174354 +n01691652 +n07732525 +n10437014 +n04368235 +n10371052 +n02611898 +n03597147 +n09912431 +n03135788 +n07888058 +n02409202 +n14582716 +n11934463 +n04395332 +n12558680 +n05257967 +n11798978 +n10617024 +n04102760 +n12132092 +n12988572 +n10390698 +n11887310 +n12063211 +n12952717 +n13141972 +n12176453 +n10245863 +n10509161 +n10389976 +n10333165 +n01474864 +n09274305 +n11888424 +n10368711 +n13222877 +n10469611 +n07582970 +n09700125 +n12805762 +n07865575 +n07853852 +n03628421 +n04482975 +n03099622 +n01349735 +n11943133 +n12736603 +n12197601 +n10597745 +n04418644 +n12689305 +n07755262 +n10598459 +n04312020 +n03195485 +n09776642 +n10596517 +n10223606 +n01923890 +n12703716 +n03465040 +n12372233 +n12528109 +n03571853 +n10802621 +n10204177 +n02320465 +n03976105 +n02214096 +n02148991 +n10377542 +n10697135 +n03538542 +n07582027 +n04517999 +n12180456 +n02838014 +n03977266 +n03818001 +n12191240 +n11648776 +n10773800 +n04475496 +n03945817 +n04682018 +n02994743 +n02787269 +n11650160 +n03834472 +n03389983 +n09797742 +n06209940 +n12525513 +n12672289 +n01893164 +n10710259 +n01892145 +n11773408 +n10554024 +n09864968 +n10699752 +n11631405 +n10414768 +n04430605 +n10742546 +n10738871 +n12857204 +n09309046 +n01724840 +n04123317 +n07881525 +n03868044 +n02140268 +n10708292 +n09838295 +n09797998 +n10710171 +n11814996 +n11938556 +n03543511 +n02151230 +n01515217 +n03533392 +n02039780 +n12810151 +n02335231 +n12152251 +n13225617 +n09801275 +n01978587 +n14821852 +n11742878 +n12679023 +n03521431 +n09679028 +n02021281 +n10784544 +n04421258 +n12492460 +n03720005 +n02541257 +n03889397 +n02888898 +n10659762 +n12045157 +n12712320 +n10369095 +n09721444 +n12769318 +n01703161 +n12697514 +n07836456 +n03905361 +n10660883 +n07769306 +n11893916 +n07846274 +n04110281 +n03655470 +n07740744 +n01363719 +n12540647 +n09896311 +n12842642 +n07755619 +n07754155 +n11548870 +n02868546 +n04215588 +n04288165 +n13201566 +n07721118 +n12018271 +n11903333 +n02909165 +n02662559 +n11658709 +n13063514 +n07725663 +n10179069 +n10776887 +n12637485 +n03814528 +n12542043 +n07833333 +n07820036 +n02746683 +n07925808 +n10349750 +n03154316 +n04155625 +n03232923 +n02116185 +n09998788 +n02821543 +n03410303 +n10656223 +n07916582 +n12880638 +n10408809 +n04612840 +n11805255 +n12044784 +n10497534 +n03458422 +n12873341 +n07808675 +n09476123 +n07611733 +n10598013 +n02214660 +n05469664 +n03952150 +n11855435 +n04375926 +n08523340 +n01642391 +n04007415 +n09756961 +n12891824 +n02894847 +n11698245 +n12906771 +n02894024 +n04131015 +n11882636 +n04386456 +n03291551 +n07837110 +n12462221 +n08540532 +n10299875 +n12705978 +n10448322 +n10487592 +n12175598 +n02272552 +n03833907 +n10383237 +n12758176 +n12729950 +n10061195 +n07816726 +n03241903 +n12239880 +n10380499 +n07855188 +n10207077 +n02770078 +n12961393 +n03778459 +n10734741 +n03485575 +n09958447 +n12337246 +n11830045 +n09866354 +n03209666 +n01470145 +n10395209 +n03872016 +n04267091 +n12888457 +n12104104 +n04088229 +n01964957 +n12002651 +n02503756 +n00481938 +n01908042 +n03378765 +n04193883 +n09862183 +n11861487 +n02520525 +n02081060 +n10386754 +n12693865 +n04514095 +n01325060 +n02460817 +n07568095 +n03651605 +n02561937 +n12844409 +n12888016 +n02974565 +n12439154 +n13018906 +n12071259 +n03897634 +n02863176 +n10603528 +n03493911 +n12887532 +n12944095 +n12794568 +n09980458 +n03503567 +n11783162 +n13123309 +n11729860 +n03702582 +n04280373 +n10086744 +n01790557 +n12627526 +n10552393 +n12092629 +n03888998 +n12751675 +n01442450 +n02479332 +n07726230 +n03642341 +n03142325 +n06263895 +n12088327 +n09703344 +n10528493 +n02820085 +n07737594 +n04090781 +n09901642 +n02328942 +n02724722 +n09866115 +n12658715 +n10481167 +n13135692 +n11850918 +n10205344 +n12361560 +n03698123 +n03284482 +n12106134 +n04441528 +n02591613 +n02581108 +n07856186 +n12197359 +n12900783 +n01725713 +n12012253 +n03907475 +n02170738 +n03694949 +n13238654 +n04611795 +n02782432 +n13191148 +n02741367 +n04170694 +n12770892 +n01973148 +n10080508 +n10161622 +n09808591 +n07912093 +n02059541 +n02779971 +n03857156 +n12945366 +n03055159 +n12758325 +n10067305 +n02597818 +n07808352 +n13147153 +n10679723 +n02271222 +n04012665 +n12942729 +n10349243 +n01377510 +n07800636 +n10654321 +n10219453 +n09961469 +n10732521 +n04479405 +n11632929 +n03856728 +n08658918 +n10327143 +n10754281 +n02085118 +n09691604 +n09952163 +n10082299 +n03872167 +n03733465 +n04138869 +n01425223 +n12066821 +n02177506 +n09892262 +n02896694 +n12983654 +n13224922 +n09658921 +n12744850 +n03639880 +n02943686 +n10660621 +n11936539 +n03698226 +n04519536 +n12392765 +n09319604 +n07567039 +n04160261 +n01802159 +n02838178 +n07746910 +n02266421 +n10240417 +n12542240 +n12550408 +n01445857 +n04132465 +n03569014 +n12666050 +n12362514 +n10676569 +n09702673 +n12885510 +n04447156 +n04396226 +n12240150 +n11639306 +n02249134 +n01340785 +n02833140 +n10027590 +n02142407 +n11996251 +n07874531 +n04340019 +n03166120 +n10420277 +n04465203 +n12738259 +n12831141 +n03998673 +n01385017 +n12842519 +n02587051 +n10753061 +n12505253 +n13906936 +n01989516 +n12640435 +n07852532 +n04243142 +n10261511 +n12853287 +n12239240 +n03973003 +n09983889 +n10345302 +n14804958 +n02354162 +n03049326 +n10443659 +n01318660 +n12787364 +n04253304 +n11941094 +n09283514 +n09393524 +n11865574 +n01531639 +n04409279 +n02859729 +n10712835 +n03694196 +n04343630 +n10331098 +n12929600 +n02826259 +n10171219 +n07735179 +n07594840 +n03709644 +n09950728 +n09859285 +n07718329 +n01418620 +n09858299 +n12395068 +n10011360 +n07763290 +n02643316 +n03596099 +n04422566 +n11958888 +n09650989 +n10318686 +n01333082 +n12886402 +n03781467 +n12667582 +n02923535 +n09988311 +n08663860 +n02508346 +n13885011 +n03939281 +n10772937 +n04485750 +n09871952 +n10291942 +n07759324 +n10174971 +n03666238 +n01937579 +n02308033 +n07847706 +n10371330 +n04124887 +n11853079 +n11941478 +n12647231 +n04601041 +n12718483 +n02902816 +n01941340 +n04066767 +n07617839 +n02254901 +n03488784 +n07834774 +n02524659 +n03367969 +n10783734 +n03422484 +n09776807 +n03970363 +n10131590 +n03433247 +n02622712 +n10206506 +n12061104 +n11936287 +n07874674 +n10061043 +n07828275 +n03764606 +n12236768 +n01826844 +n09741904 +n05454978 +n03591592 +n01441272 +n03736372 +n07585474 +n12762405 +n12943912 +n01894522 +n03218446 +n11846425 +n11689678 +n04147916 +n02375862 +n10409459 +n09287415 +n10113583 +n03261263 +n02817386 +n09869578 +n10550252 +n02532786 +n12031388 +n07937344 +n11612235 +n01571410 +n09402944 +n04234670 +n02603862 +n04196925 +n09999135 +n10468750 +n15093049 +n03003633 +n11650307 +n12312110 +n02525703 +n10501635 +n09751622 +n10114550 +n10103155 +n12829975 +n04004099 +n12419878 +n02082190 +n03328201 +n03093427 +n07845571 +n12655498 +n02558206 +n12563045 +n07573453 +n12324558 +n13016289 +n10601234 +n10310783 +n03531691 +n02135610 +n03168543 +n09985978 +n10615334 +n07839312 +n09985809 +n10142537 +n10417969 +n07869111 +n12514992 +n04327544 +n10326776 +n12583681 +n01476418 +n12840168 +n03852544 +n11713763 +n07824502 +n07858841 +n12256325 +n03036149 +n07883661 +n04500390 +n10170866 +n01835918 +n10760951 +n10720197 +n12330239 +n02135844 +n10210512 +n03217739 +n10802953 +n03136254 +n02161225 +n03961630 +n12927194 +n02251233 +n13891937 +n09945603 +n02695762 +n12181612 +n13234857 +n10175725 +n11346873 +n07934678 +n02318687 +n10251329 +n04112921 +n04001132 +n03042984 +n11704791 +n04246459 +n12193334 +n10718509 +n10371221 +n05278922 +n03265754 +n12186554 +n12481289 +n10521853 +n10748506 +n11729142 +n10143595 +n09422631 +n07562984 +n07850219 +n04193742 +n11997160 +n12002826 +n12820113 +n04132829 +n10272913 +n03358841 +n12610740 +n12384569 +n10725280 +n02746008 +n13148384 +n12635151 +n02337171 +n10350774 +n12308907 +n04542474 +n04339062 +n03549350 +n10240235 +n10556033 +n10214390 +n01791314 +n02801047 +n07817465 +n11610602 +n10315730 +n14592309 +n10249191 +n12453857 +n12579822 +n09833275 +n04051269 +n11552594 +n04088343 +n04565039 +n03930431 +n10679503 +n11899921 +n10295479 +n01357507 +n13036312 +n03404900 +n12523141 +n01816017 +n02020578 +n12661045 +n06262943 +n02775813 +n12921315 +n09751076 +n09834258 +n10585628 +n12885754 +n04411019 +n10342367 +n10368798 +n09672840 +n12729023 +n04578329 +n10325549 +n03680248 +n11920663 +n10416567 +n10011486 +n01643255 +n03193754 +n07823814 +n04055447 +n10660128 +n07765612 +n07612530 +n04205613 +n09677427 +n03989199 +n11100798 +n12721122 +n10000787 +n10382157 +n07724819 +n12928819 +n11631159 +n02608996 +n10516527 +n09703101 +n12290975 +n03470222 +n03810412 +n03729131 +n03356038 +n12692024 +n12614625 +n10789415 +n02333819 +n01722670 +n03885410 +n12038208 +n02294097 +n02608860 +n02500596 +n07909231 +n03254625 +n09681973 +n12221368 +n01893399 +n10025295 +n03194812 +n13181406 +n12249122 +n03447894 +n09795010 +n02187900 +n10139651 +n10631654 +n01792530 +n02569631 +n07853946 +n09907804 +n03263758 +n04214649 +n02450829 +n02431542 +n11998492 +n02651060 +n04101860 +n01806061 +n13901423 +n12903964 +n03968479 +n04268565 +n12601494 +n02083780 +n04570118 +n12247407 +n03337822 +n09878921 +n02369935 +n10022908 +n09667358 +n13160938 +n11937360 +n07741623 +n03705808 +n12241426 +n10478118 +n03805933 +n10343869 +n09391774 +n03482128 +n10357737 +n10334461 +n09675045 +n09662951 +n10174253 +n01815270 +n13873361 +n04432785 +n09778927 +n10671898 +n05571341 +n10033572 +n09864632 +n10618465 +n03437184 +n12786464 +n01723579 +n11798270 +n07742415 +n02143142 +n10548419 +n03695122 +n02518622 +n04605446 +n10218292 +n11832671 +n12646950 +n03382708 +n09844898 +n09674786 +n01472502 +n07616906 +n09763272 +n03982767 +n10005006 +n03059236 +n01816474 +n03725869 +n01979269 +n04226322 +n13236100 +n03920384 +n11852148 +n04373563 +n04324120 +n11686652 +n03036341 +n02142898 +n09783776 +n13147918 +n03465320 +n07855721 +n10336411 +n10438619 +n07750299 +n12237152 +n03559373 +n10077106 +n10169796 +n09828403 +n09959658 +n12464128 +n12934685 +n04221673 +n02617537 +n11689367 +n10180580 +n07813717 +n12529905 +n02340186 +n01400247 +n11749112 +n04404072 +n03135656 +n12098827 +n12481150 +n10023506 +n03500838 +n01564101 +n04009923 +n10023264 +n03908456 +n03206405 +n07590068 +n09958133 +n10755394 +n01423617 +n11511327 +n10536274 +n01965252 +n11549245 +n11935627 +n09635635 +n03752071 +n07585997 +n03147084 +n12666159 +n09748408 +n03796848 +n01501948 +n02345078 +n12430675 +n03103128 +n11710987 +n03393199 +n09233603 +n10465002 +n04298765 +n01351170 +n02720576 +n03966582 +n10643837 +n12420124 +n10793799 +n01652297 +n09281252 +n11983606 +n10222497 +n11832899 +n02391617 +n12434106 +n03987674 +n02140179 +n07896560 +n04325804 +n10647745 +n01924800 +n10156629 +n03545961 +n03906789 +n01890564 +n10699558 +n12332218 +n03247495 +n11839460 +n03527675 +n12586725 +n13208965 +n02714315 +n02750320 +n04615149 +n12679876 +n12863234 +n03304323 +n12139793 +n11922755 +n12321669 +n04979307 +n01921059 +n09657206 +n13042134 +n04045787 +n11700279 +n02337598 +n01415920 +n01400391 +n13207572 +n10785480 +n02515713 +n12018100 +n02634545 +n03292736 +n02881546 +n12655605 +n03105810 +n10545792 +n03894933 +n09796974 +n10320484 +n12308112 +n11549009 +n13047862 +n14941787 +n12379531 +n10540252 +n11696935 +n12184468 +n12851860 +n12908854 +n10586265 +n12369066 +n10426630 +n12523850 +n03916289 +n04538878 +n09908769 +n02828115 +n07560422 +n10266016 +n03569174 +n06423496 +n10495167 +n03617834 +n09327538 +n10195056 +n10508379 +n13031323 +n11659248 +n04242315 +n10742111 +n10700963 +n12032686 +n09877587 +n07825597 +n07568991 +n11736362 +n12169099 +n13103750 +n03263640 +n12248941 +n10665302 +n01920051 +n09704283 +n11533999 +n04503073 +n11645163 +n10639817 +n09920901 +n06340977 +n03251100 +n10378113 +n03226090 +n10131268 +n02877513 +n13191884 +n02787120 +n11709045 +n02740061 +n12323665 +n02831998 +n10342180 +n12716594 +n04498275 +n09905050 +n03745487 +n07642833 +n10294020 +n10211666 +n12205460 +n02981198 +n01642943 +n07679140 +n04390483 +n10432875 +n09214269 +n10792506 +n10243483 +n13099833 +n10221520 +n13177768 +n04091584 +n10672540 +n10200246 +n13889331 +n02345340 +n10237556 +n01833415 +n01335218 +n09804230 +n09957523 +n05235879 +n10070449 +n10308653 +n10721708 +n04312654 +n10394434 +n12201938 +n12434775 +n07601025 +n02672152 +n10157271 +n02635154 +n12572858 +n13182937 +n10160188 +n03396997 +n10344656 +n02968210 +n10190516 +n07684422 +n03706939 +n07618871 +n02290870 +n03817331 +n03275311 +n12698774 +n04375080 +n07837630 +n04314216 +n11833373 +n07618684 +n03742238 +n12532886 +n03712444 +n11750989 +n10038620 +n09617577 +n03807334 +n10108089 +n01816140 +n10715347 +n02648035 +n13127303 +n02809491 +n02430748 +n12235479 +n01451863 +n01514926 +n10010864 +n01913440 +n09660240 +n11806369 +n01470479 +n12655245 +n07655067 +n03436772 +n11778092 +n03951800 +n10277815 +n07931733 +n01479820 +n03576955 +n07609549 +n12568649 +n05263316 +n02636405 +n01384084 +n03298352 +n07617344 +n09987045 +n10573957 +n07801709 +n02589062 +n02534165 +n02748359 +n09607782 +n07590974 +n02199170 +n02696569 +n09678747 +n12795209 +n13176363 +n10663315 +n10588724 +n09772330 +n10174589 +n12366313 +n11883628 +n07617447 +n01334690 +n03168663 +n11764478 +n08599174 +n03942028 +n12153033 +n03448696 +n12096674 +n10037588 +n03548320 +n09760290 +n10374541 +n09653438 +n10294139 +n10276942 +n12279293 +n12764507 +n12803958 +n10764622 +n02140858 +n07599068 +n10245507 +n12351790 +n12818004 +n10118301 +n03945459 +n09912995 +n12176709 +n03873996 +n10339179 +n10614507 +n10114662 +n10784922 +n03821424 +n04959230 +n13015509 +n12573911 +n11948469 +n09775907 +n12758014 +n01780142 +n09956578 +n12165384 +n10088200 +n10382480 +n04131113 +n09930628 +n09784160 +n11750173 +n13064111 +n03817522 +n12662074 +n03176238 +n12310021 +n11679378 +n09961331 +n02385580 +n11904274 +n03113505 +n10244913 +n02836900 +n09986700 +n11963572 +n13158605 +n10321632 +n02179891 +n02189670 +n10097995 +n10774756 +n10783240 +n10605737 +n02530052 +n10386196 +n10184505 +n09788237 +n03589672 +n12509109 +n10658304 +n12966804 +n12559518 +n03189311 +n01451295 +n12179632 +n12301613 +n10496489 +n03402785 +n10244108 +n02385676 +n03552001 +n03092053 +n02313360 +n02547733 +n02109391 +n01327909 +n04574606 +n03060728 +n07840124 +n10567848 +n10062176 +n02703124 +n10804732 +n12699301 +n04515890 +n07919665 +n10457214 +n09663248 +n03165955 +n12988341 +n03987865 +n03031756 +n10277912 +n10172080 +n09325824 +n03198223 +n09605110 +n10113869 +n11603462 +n03352366 +n11930203 +n09769929 +n12979316 +n02579762 +n09953052 +n03105974 +n00476140 +n11598287 +n02830157 +n10512201 +n09746936 +n10668666 +n02919976 +n09993651 +n02149861 +n09705003 +n10389865 +n11655152 +n10010767 +n10070563 +n03688832 +n10590239 +n11936027 +n02939763 +n03163488 +n03171910 +n09955406 +n03266195 +n10217208 +n09338013 +n07594250 +n03215930 +n09725935 +n10592049 +n03732658 +n12498457 +n09966554 +n10668450 +n10361525 +n04060198 +n11936624 +n02602760 +n03942600 +n03708425 +n10020533 +n12067817 +n07590177 +n01891274 +n11837204 +n01419332 +n03860234 +n12616248 +n07834160 +n09867154 +n09788073 +n12222493 +n03388990 +n04245412 +n10182402 +n11675404 +n10450038 +n13045594 +n13158167 +n13082568 +n12052267 +n12707199 +n07810531 +n07914887 +n13127001 +n02573249 +n08619112 +n10471859 +n09919899 +n03635516 +n12067029 +n03352232 +n07765517 +n10519984 +n02742194 +n03062798 +n13124654 +n09958569 +n02370137 +n10121714 +n04019335 +n07732433 +n02559383 +n12585137 +n09729156 +n10744078 +n09954355 +n03078506 +n10062042 +n10688811 +n02668613 +n03142205 +n10347204 +n10518349 +n09898020 +n12563702 +n05468098 +n10116370 +n07838905 +n03127024 +n03545585 +n12801072 +n09940818 +n04480995 +n10466564 +n02606751 +n10032987 +n10771066 +n01587278 +n11852531 +n01455461 +n10397392 +n02349205 +n10180923 +n09778266 +n04366832 +n10051975 +n10538629 +n09865744 +n12554029 +n13118330 +n12952590 +n04187751 +n09924313 +n10062594 +n01980655 +n10028402 +n02567334 +n10590903 +n10265891 +n10739297 +n01457082 +n03437581 +n03713151 +n03475674 +n05464534 +n11863467 +n06592421 +n12491435 +n14914945 +n10279778 +n03388711 +n10483890 +n10612373 +n03332784 +n02332954 +n02952798 +n13041943 +n01607309 +n04356772 +n07711799 +n12670962 +n12229111 +n07878479 +n12401893 +n07772413 +n12138110 +n09781504 +n07902698 +n02750652 +n13042316 +n12400924 +n02304797 +n03066464 +n12852234 +n10155222 +n05541509 +n10711483 +n04210858 +n02835551 +n12859679 +n02935490 +n03540476 +n05279953 +n09807075 +n09617435 +n03566860 +n10549510 +n10025391 +n10754449 +n11927740 +n03554645 +n01837526 +n02656969 +n08648917 +n07860548 +n01452345 +n04021704 +n07783827 +n10080117 +n02187554 +n03214966 +n10036444 +n04291069 +n12407396 +n02170599 +n09896826 +n12417836 +n07845495 +n02749292 +n03061819 +n03682380 +n10756261 +n10369955 +n09692125 +n09978442 +n04277669 +n10539278 +n09703932 +n01879837 +n02746225 +n13159357 +n11763874 +n10540656 +n07933530 +n12987535 +n02371344 +n10654827 +n09723944 +n12775393 +n11856573 +n12626878 +n12716400 +n09903639 +n09784043 +n03906894 +n10775128 +n03124313 +n10396727 +n02841641 +n10211830 +n12283395 +n03490784 +n14175579 +n04027935 +n12396091 +n02609823 +n01414216 +n09880741 +n11976933 +n03073384 +n09270160 +n11768816 +n12073217 +n11597657 +n09994878 +n11756329 +n12579404 +n03161893 +n01451115 +n07736971 +n02949356 +n03878418 +n12653436 +n10626630 +n12777892 +n13061704 +n10498699 +n03609786 +n03199358 +n10776339 +n10762480 +n13179056 +n10113249 +n04029913 +n12640081 +n10493835 +n11683216 +n03524287 +n04585626 +n02969527 +n12976554 +n08569482 +n10204833 +n12442548 +n02577952 +n09357447 +n10202225 +n02198129 +n11882972 +n10404426 +n01600341 +n12016434 +n09867069 +n10576223 +n09893600 +n01702479 +n04274686 +n04406552 +n02848118 +n02258629 +n03260733 +n03685640 +n11751974 +n09967555 +n06274546 +n09649067 +n10681557 +n07606933 +n03110202 +n11982545 +n10803031 +n02679142 +n04086937 +n10514255 +n04506402 +n03884554 +n09970192 +n10117017 +n12642435 +n10186686 +n02097967 +n03956531 +n11834890 +n02677436 +n10040698 +n11796188 +n03348142 +n04168472 +n02294407 +n12483282 +n09429630 +n04423687 +n09819477 +n09755555 +n10157016 +n03344935 +n07762373 +n12871859 +n09853541 +n09875979 +n13050705 +n02251067 +n10637483 +n03823673 +n10357012 +n03424204 +n04431648 +n01475940 +n02339282 +n10248198 +n07683265 +n13150592 +n10359117 +n10096508 +n03473078 +n13052248 +n10743356 +n03710079 +n10634990 +n04507689 +n07921090 +n02352002 +n03924407 +n03609147 +n02837567 +n03406759 +n03909658 +n10286282 +n12135576 +n01912809 +n10801561 +n10717055 +n03473465 +n03761588 +n03144156 +n09474412 +n10253611 +n12549420 +n02499568 +n09910222 +n10431122 +n12699031 +n01697749 +n11786843 +n03888808 +n12089496 +n10066314 +n10302905 +n12696830 +n09965787 +n11969806 +n04066388 +n13080306 +n03913930 +n09968259 +n10490421 +n10714195 +n07570021 +n10343449 +n10401204 +n03472796 +n10779897 +n11787190 +n03503097 +n10439523 +n12123648 +n04279858 +n10511771 +n09755788 +n08253141 +n02616397 +n12248574 +n01645466 +n04334504 +n07729142 +n05451099 +n10503818 +n10354265 +n09707735 +n02633422 +n11999656 +n01324916 +n02088745 +n09354511 +n10705448 +n09756195 +n10136615 +n10427359 +n09702134 +n12600095 +n04122262 +n10791820 +n03330002 +n02713496 +n11710658 +n09664908 +n02550203 +n02349847 +n12835766 +n04098260 +n11536567 +n11686780 +n12875861 +n12758471 +n09806944 +n11810030 +n10400003 +n10098388 +n11663263 +n10559683 +n07833672 +n10753989 +n10643095 +n01988869 +n03112240 +n12911914 +n09979913 +n09785236 +n09790047 +n02676097 +n01653509 +n04601159 +n01938735 +n10748142 +n12978076 +n11990627 +n10437262 +n12972136 +n04077594 +n10148825 +n02269340 +n12886185 +n03608504 +n11677485 +n10612518 +n12267265 +n10649308 +n05458173 +n10650162 +n03213361 +n02747063 +n01611674 +n02322992 +n01554017 +n03512624 +n12773142 +n12747120 +n09902128 +n03162714 +n03924532 +n10299125 +n12378753 +n02778131 +n09976024 +n13093629 +n10778999 +n07721833 +n12232851 +n07876775 +n10097590 +n03194170 +n13029122 +n04573832 +n12859272 +n09639382 +n07688021 +n02878796 +n10751710 +n03633632 +n07762534 +n10779995 +n13914265 +n13093275 +n10729330 +n10433077 +n03663910 +n10499110 +n02272286 +n10371450 +n01967308 +n12633061 +n11659627 +n12982915 +n10344121 +n10268629 +n02697876 +n09879552 +n10167361 +n10719807 +n04042076 +n01632952 +n03243625 +n02125872 +n10105906 +n12194613 +n03149810 +n10721124 +n03947343 +n02020219 +n10122531 +n01315330 +n08647264 +n00452734 +n03607527 +n10010243 +n09863749 +n04473275 +n11782878 +n03585337 +n09655466 +n12989007 +n11711971 +n10716864 +n10475835 +n10704712 +n01894956 +n10568443 +n12881105 +n10387836 +n10403633 +n08645318 +n03500457 +n10377633 +n10108464 +n09933972 +n02618094 +n11798688 +n04155735 +n09780395 +n12822466 +n04302200 +n11899223 +n10633298 +n02760298 +n12142450 +n10803282 +n10769321 +n10514051 +n10597889 +n11837562 +n02261757 +n01458746 +n09830759 +n10003476 +n09817174 +n10738670 +n10118743 +n12096563 +n03054491 +n12155773 +n10439727 +n04170384 +n03223923 +n12632733 +n07845421 +n10062905 +n11831521 +n04267985 +n12796385 +n04154854 +n00444142 +n09778537 +n03115663 +n04385157 +n10109826 +n02337332 +n09996304 +n09880189 +n12871696 +n11823305 +n02516776 +n12377494 +n08511017 +n04421417 +n10765305 +n09675673 +n03488111 +n03076623 +n11829672 +n10292316 +n10758949 +n13031474 +n02829353 +n10090745 +n09186592 +n12736999 +n12715195 +n11684499 +n03168933 +n09890192 +n10596899 +n12527081 +n10496393 +n10497135 +n02137302 +n03266620 +n12958615 +n12664187 +n02633977 +n04262869 +n04215800 +n13133233 +n02392555 +n09858733 +n10186350 +n01715888 +n03142099 +n08573674 +n11687071 +n02690715 +n03146342 +n12331788 +n08079613 +n10609092 +n12943049 +n12234318 +n02312325 +n12618146 +n10135197 +n11705573 +n02794368 +n02850358 +n09464486 +n01993525 +n03187153 +n10097262 +n02976641 +n12198793 +n12941717 +n10219240 +n12434634 +n03827420 +n10437137 +n10342893 +n04174026 +n10265281 +n07757874 +n10765885 +n01470895 +n02349557 +n11716698 +n03765467 +n10227793 +n07824268 +n12994892 +n10486236 +n02974454 +n10718349 +n11726145 +n09909660 +n03378593 +n07805006 +n09875025 +n02645691 +n10223069 +n03722944 +n04389999 +n02544274 +n10239928 +n04456011 +n10382302 +n01552333 +n10082562 +n12952469 +n09883047 +n10442573 +n01891013 +n10690268 +n13111504 +n02287352 +n03567635 +n10331347 +n09762385 +n09933842 +n02369555 +n12291459 +n09919200 +n01492860 +n02067768 +n10713254 +n10550468 +n12846335 +n03835729 +n12467018 +n11676743 +n03629643 +n12987423 +n10655730 +n08678783 +n10349836 +n10087736 +n10246703 +n10338391 +n04585456 +n04158138 +n10500942 +n09850974 +n10791890 +n10020807 +n03315805 +n02752917 +n04033801 +n10492086 +n04427473 +n02940706 +n12110475 +n09832978 +n12515393 +n07800487 +n09848110 +n02659176 +n09967406 +n10536134 +n10760622 +n09736485 +n07830690 +n07835173 +n09814252 +n10311506 +n10341955 +n03869838 +n07760673 +n09970402 +n12526178 +n11687964 +n09968741 +n10719267 +n07851054 +n10116478 +n10599215 +n09951524 +n03855908 +n03997274 +n02986348 +n08599292 +n02474282 +n04155889 +n09983314 +n01987727 +n10280130 +n10404998 +n02294577 +n02998696 +n08586978 +n11652578 +n13867005 +n12663254 +n10524869 +n02287622 +n10220924 +n03279918 +n02626089 +n10291110 +n12820669 +n07861681 +n08643267 +n07720185 +n12555859 +n03225616 +n09769525 +n03295140 +n12489046 +n10615179 +n12150969 +n02888429 +n10753182 +n10267166 +n03675558 +n12693352 +n02378299 +n02788462 +n03622401 +n12236977 +n10730542 +n12758099 +n10502046 +n11937195 +n10366145 +n10307114 +n12984595 +n10128748 +n09362316 +n09789898 +n09654079 +n04260192 +n10114476 +n08623676 +n10331841 +n05265736 +n10269289 +n03090856 +n12764978 +n02825240 +n10358032 +n09825750 +n03062651 +n11196627 +n11825749 +n04148464 +n04439505 +n07572858 +n04561857 +n12904562 +n03643907 +n10723597 +n01492708 +n10071557 +n10140683 +n01739871 +n12984267 +n03072056 +n10772580 +n10462588 +n11936448 +n10494373 +n12845908 +n09793352 +n10717196 +n12577362 +n09779124 +n10663549 +n02286425 +n10380126 +n01890144 +n02751490 +n03361109 +n01781875 +n13128278 +n09994400 +n09883452 +n13881512 +n02833275 +n10362003 +n01376543 +n12366675 +n09984960 +n10173665 +n10673776 +n02057898 +n01934844 +n04057673 +n10018747 +n02916065 +n13024653 +n05539947 +n09648911 +n04150273 +n01393486 +n10411356 +n12232114 +n02436224 +n12757930 +n03095965 +n10555059 +n01577458 +n09666476 +n10598904 +n11656549 +n02591911 +n13092385 +n10506336 +n13103023 +n09658254 +n04095938 +n11936782 +n07824383 +n09781650 +n10240821 +n01780426 +n02850060 +n02863340 +n13914141 +n12138578 +n13034555 +n12291671 +n12133151 +n04515444 +n04591359 +n02589196 +n02689819 +n11740414 +n07610295 +n10246395 +n09921034 +n12447346 +n12641180 +n01419573 +n04242587 +n07760395 +n03399579 +n09866661 +n02549376 +n11861238 +n01588996 +n04319545 +n09789150 +n03288643 +n10312491 +n03353281 +n02345997 +n09711132 +n03043173 +n02558860 +n03703590 +n03188871 +n12589142 +n12113323 +n09987161 +n05242239 +n09686262 +n09780984 +n09668199 +n09716933 +n11675738 +n04459243 +n11833749 +n10646942 +n07760070 +n10286539 +n04469684 +n13030616 +n03939440 +n01725086 +n09967816 +n10500824 +n13026015 +n03983928 +n02936921 +n04115542 +n10245029 +n12105828 +n12452673 +n10498046 +n10737264 +n11766046 +n04079603 +n10072054 +n12569037 +n10153155 +n09867311 +n02806992 +n10258602 +n10164025 +n10520964 +n02258508 +n12199399 +n05266096 +n08496334 +n10351064 +n12441552 +n12878325 +n13102648 +n02980625 +n03462972 +n12395906 +n13022903 +n11895714 +n03324814 +n11318824 +n01728266 +n07883510 +n10731013 +n10181799 +n12142357 +n09671089 +n11531334 +n01718414 +n04573625 +n10390600 +n11553522 +n01314910 +n04227519 +n10514784 +n02944256 +n12103680 +n03081859 +n11655592 +n12569616 +n10700105 +n09755086 +n03865820 +n01456137 +n10442232 +n02900987 +n04491934 +n07849026 +n04519728 +n09986450 +n03305300 +n10186143 +n02879422 +n03018614 +n10747548 +n10562509 +n10068425 +n12593341 +n11937692 +n08679562 +n09613690 +n10646433 +n12251740 +n10994097 +n13048447 +n03848537 +n12153741 +n12614096 +n11654438 +n09985470 +n10562968 +n02923915 +n10740594 +n07802767 +n12514592 +n10335801 +n03878674 +n12586499 +n10255459 +n02413824 +n10312600 +n02616128 +n12644283 +n04238953 +n04526520 +n01898593 +n09737161 +n03372822 +n09781398 +n10339251 +n02502807 +n10198832 +n10679610 +n13136781 +n11974373 +n11680457 +n10083677 +n04037298 +n09945021 +n09987239 +n02708885 +n13107807 +n10130877 +n12507379 +n08651104 +n12116058 +n10135297 +n04269086 +n03858533 +n10477955 +n04394031 +n10442417 +n10074735 +n03618797 +n03460455 +n04374521 +n10756061 +n08517010 +n12923108 +n02362194 +n01704103 +n10062492 +n01394771 +n10473789 +n10330593 +n02748183 +n12562141 +n09745933 +n02505485 +n11922661 +n12018014 +n09866922 +n04067143 +n13161254 +n07813495 +n01374846 +n10213429 +n03253071 +n02546028 +n01642097 +n01475232 +n03212247 +n10155600 +n11689957 +n11738997 +n10525878 +n03301389 +n10589666 +n01908958 +n10289766 +n03900028 +n03437295 +n02987823 +n02739123 +n10505347 +n02546627 +n10381804 +n10132502 +n10336904 +n10189597 +n09786115 +n12875697 +n10761519 +n01470733 +n02875626 +n12111238 +n07862770 +n07856895 +n09996039 +n03368048 +n07913300 +n10062996 +n10555430 +n04302863 +n12758555 +n10740732 +n02385898 +n02385098 +n12162758 +n03887899 +n03976268 +n04234160 +n03641947 +n07857076 +n10578656 +n12135729 +n12675515 +n09032191 +n12969670 +n02600503 +n12518013 +n10227166 +n10121026 +n01801672 +n10661216 +n03244388 +n04147291 +n09664556 +n02539573 +n04480141 +n10601362 +n02613572 +n10537906 +n02613820 +n11656771 +n03841011 +n02845985 +n12534208 +n10241024 +n03645290 +n12743976 +n11922839 +n07709701 +n03066232 +n03467380 +n09266604 +n09663786 +n12775070 +n02427183 +n04083113 +n12896615 +n10501453 +n02345774 +n09965515 +n09704157 +n10666752 +n03846970 +n04167661 +n03991321 +n09556121 +n10686517 +n02586238 +n03594277 +n03591313 +n10391416 +n10756837 +n13163649 +n03971960 +n10245341 +n02577041 +n04481642 +n12373739 +n10214062 +n10091997 +n10275848 +n02090253 +n03514340 +n04593629 +n11795216 +n03126927 +n11871748 +n10272782 +n12056099 +n04484024 +n03101375 +n12255225 +n10724372 +n10531838 +n02354781 +n02389865 +n02853336 +n01477080 +n01779939 +n10776052 +n10724132 +n10284871 +n10554141 +n03898787 +n02366301 +n10721612 +n04421740 +n04256758 +n01445593 +n10103921 +n02729222 +n02530188 +n02387452 +n02601921 +n01711160 +n02474110 +n09869447 +n12789977 +n10158506 +n10396908 +n07839593 +n02662825 +n02473720 +n13034788 +n07752602 +n03762238 +n10262880 +n07770180 +n04030054 +n10151367 +n03525252 +n10252075 +n10747424 +n10191388 +n04130566 +n03951068 +n13239921 +n03733547 +n10358124 +n11549779 +n09203827 +n04043168 +n10359422 +n04286960 +n04237287 +n10130686 +n02338449 +n12912274 +n10586998 +n02812785 +n10364502 +n03955941 +n12324222 +n09743601 +n03766600 +n01427399 +n12968309 +n11776234 +n01501777 +n10051026 +n10397001 +n01516212 +n02596252 +n02225081 +n10479328 +n02109687 +n10181445 +n02248062 +n03802973 +n01639187 +n02142734 +n02342534 +n02410141 +n02743426 +n03950359 +n12253835 +n07805478 +n03706415 +n03578981 +n04560619 +n09761753 +n03524425 +n01962788 +n04350235 +n10686694 +n13139321 +n10195155 +n12335937 +n12758399 +n03805374 +n12895298 +n03800371 +n11972959 +n11530008 +n03178538 +n02217839 +n10591072 +n04033557 +n01880813 +n12292877 +n02430643 +n07599383 +n01954516 +n09894909 +n02474605 +n03576443 +n07595051 +n03367875 +n12945549 +n02360480 +n14583400 +n04208582 +n02405577 +n02550655 +n02513355 +n04381450 +n00444490 +n03567912 +n09937688 +n07932323 +n04029416 +n01913346 +n13237508 +n04437276 +n12938445 +n03042384 +n12543639 +n03194992 +n04094250 +n12045514 +n03825913 +n03504293 +n12758250 +n03547861 +n03649288 +n04572235 +n07569423 +n03534695 +n03253714 +n01501641 +n13906767 +n12578255 +n11749603 +n07742513 +n07609083 +n04214413 +n07595751 +n12013701 +n12592839 +n12949160 +n04093223 +n02983072 +n03510072 +n02966068 +n03867854 +n01747285 +n10691318 +n13091982 +n12574470 +n02255023 +n03449217 +n03153585 +n04006227 +n13140049 +n02965024 +n03805503 +n03911406 +n13120958 +n12203699 +n01456454 +n10397142 +n12920043 +n02412977 +n08674344 +n07801007 +n03037590 +n10361296 +n13133316 +n03483637 +n04435759 +n12983873 +n02627037 +n03783304 +n07725158 +n02921292 +n01788864 +n01705010 +n12616996 +n03903290 +n08662427 +n03667060 +n07856992 +n03252422 +n02449699 +n12137954 +n10024025 +n07891095 +n04337157 +n04368109 +n03015631 +n02363996 +n12824289 +n03206602 +n12799269 +n02333733 +n01793565 +n01721898 +n03178173 +n02844056 +n11688378 +n13889066 +n02637475 +n03750437 +n01403457 +n01717229 +n02677136 +n12512294 +n03736269 +n02838577 +n08661878 +n01993830 +n02777638 +n02900857 +n04023021 +n03843092 +n07770439 +n12928491 +n03697812 +n02639922 +n13139482 +n07771082 +n12487058 +n07774182 +n02122810 +n02856362 +n11686195 +n11687432 +n02853870 +n04239218 +n02665250 +n02938218 +n11746600 +n10183347 +n10681194 +n04164199 +n04407257 +n12549005 +n02331842 +n03862379 +n02863638 +n11962994 +n03091907 +n04177654 +n02252972 +n02403153 +n01376437 +n02848806 +n08579266 +n07616265 +n10331258 +n10765587 +n09433312 +n03412387 +n10178077 +n13123841 +n02532918 +n04144651 +n03296963 +n03450881 +n04348988 +n10425946 +n03257065 +n02354320 +n11689197 +n04084682 +n10140783 +n03637027 +n02346170 +n02559144 +n01705591 +n09400584 +n03840327 +n03918074 +n04053767 +n02406046 +n00288190 +n03160001 +n03366464 +n09249155 +n01324305 +n07556872 +n03381565 +n12705220 +n11874878 +n02632494 +n02502006 +n03146560 +n02179340 +n04312756 +n10162016 +n03800563 +n04140853 +n07933652 +n03075248 +n04421582 +n10652703 +n02218134 +n12233249 +n04578559 +n01781071 +n02615298 +n04436832 +n04054566 +n02608284 +n11674019 +n03505764 +n02662397 +n09422190 +n04382537 +n04355684 +n04383923 +n09888635 +n03783575 +n03228796 +n07772026 +n02381119 +n15060326 +n10586166 +n12647787 +n02458517 +n10281546 +n03498866 +n02485988 +n10121246 +n09391644 +n03103904 +n08676253 +n02203978 +n04092168 +n03213014 +n03138217 +n04135933 +n12612811 +n04478066 +n02157285 +n02543255 +n03863783 +n01502101 +n03930229 +n12439830 +n09425019 +n02618513 +n02910241 +n12261359 +n03648667 +n04365229 +n03461651 +n04388040 +n03295928 +n03581531 +n04203356 +n02622249 +n13142907 +n04497249 +n11678377 +n02366579 +n02931013 +n02837134 +n03132438 +n13092987 +n04196803 +n03056215 +n03255322 +n02130925 +n10291469 +n02971940 +n01718096 +n12510774 +n11766432 +n04271891 +n03366721 +n03154616 +n03694356 +n10478293 +n11763142 +n07763483 +n03037228 +n09201998 +n01517389 +n00443517 +n12693244 +n03580990 +n03519848 +n10238375 +n10783646 +n03564849 +n03975926 +n02473554 +n02450426 +n03464952 +n04411835 +n04573045 +n10505732 +n04337650 +n10621514 +n10334782 +n12434985 +n07769102 +n10594523 +n05475397 +n01875610 +n03299406 +n10507692 +n02593679 +n03317233 +n13239736 +n03550420 +n03247351 +n03819047 +n03633341 +n03154745 +n04073425 +n04532022 +n02910964 +n04301242 +n04378651 +n13098515 +n11775626 +n14603798 +n10263146 +n01886045 +n03761731 +n02224713 +n04591249 +n02144251 +n03849412 +n11548728 +n04051705 +n12298165 +n03150795 +n03989447 +n02826459 +n07602650 +n03155915 +n09891730 +n02067603 +n01523105 +n03618339 +n03897130 +n02711780 +n05285623 +n03533486 +n04085873 +n01923404 +n10139077 +n01709484 +n02183507 +n03216562 +n01971850 +n03136051 +n02948834 +n03589313 +n03665851 +n02937336 +n02035656 +n07769465 +n07849186 +n12585373 +n12280364 +n02846260 +n02511730 +n02614653 +n04193179 +n11718681 +n09467696 +n01522450 +n03040836 +n03162297 +n11896141 +n04000480 +n10350220 +n07746038 +n02124157 +n10655169 +n03476542 +n03895038 +n00443917 +n07757753 +n01726203 +n02987706 +n12750076 +n03012734 +n02941228 +n04194009 +n04501127 +n09794550 +n03510487 +n08589670 +n03166951 +n03673270 +n09792125 +n08492354 +n02396157 +n01628331 +n03993878 +n07833816 +n04958865 +n13650447 +n04339191 +n02826683 +n02893269 +n02810139 +n02626471 +n02589796 +n08677801 +n04325968 +n03275864 +n02622547 +n04406687 +n04097085 +n02998107 +n07831450 +n03658102 +n02575590 +n03523398 +n02412909 +n02953850 +n04337503 +n03510987 +n12664005 +n03710294 +n13138155 +n10110093 +n07831955 +n03932080 +n12971804 +n03943623 +n03726371 +n10531445 +n12984489 +n07835051 +n12097556 +n02685701 +n03038041 +n02451125 +n04594919 +n02372140 +n02665985 +n03496183 +n03961828 +n03802800 +n01713170 +n03602790 +n04974145 +n02780588 +n04031884 +n03588216 +n02614140 +n04578708 +n04501281 +n03166600 +n03992975 +n04206070 +n03227721 +n02582349 +n02664642 +n07805389 +n09226869 +n02459190 +n12216968 +n03628984 +n02524928 +n09209025 +n04078002 +n03167153 +n03562565 +n07599554 +n10252547 +n03279804 +n07692887 +n14909584 +n02529293 +n04444953 +n04156814 +n07616174 +n03415626 +n03331244 +n03868324 +n03644073 +n02818687 +n10085101 +n02953056 +n03202481 +n02118707 +n03591901 +n12602434 +n02943465 +n02818254 +n07922607 +n02597004 +n04212810 +n04056073 +n12327528 +n02207647 +n01792808 +n03002555 +n03951213 +n12242123 +n10062275 +n12325787 +n10048117 +n11937278 +n03624767 +n04039041 +n04059298 +n03707171 +n07758407 +n01333483 +n02219015 +n02436645 +n02478239 +n04457638 +n01781698 +n09474765 +n03686363 +n10769084 +n09456207 +n02385776 +n13555775 +n03962685 +n13129078 +n03463185 +n01429172 +n04243251 +n12177129 +n03143754 +n03958338 +n02791795 +n04560502 +n12776774 +n02745816 +n03009111 +n02976552 +n03008817 +n03211413 +n03537550 +n12200504 +n01909788 +n11790089 +n03480973 +n10507070 +n01707294 +n04374907 +n04281571 +n00006024 +n03823906 +n12603273 +n03503358 +n04027820 +n12645530 +n02535080 +n04143365 +n08385989 +n12661227 +n12814857 +n11871059 +n04268418 +n13128582 +n01928865 +n04359124 +n12670334 +n03610836 +n04543924 +n02252799 +n15102359 +n04437380 +n04316924 +n11872324 +n09330378 +n10122300 +n03784139 +n00443375 +n14993378 +n01721174 +n00004475 +n00006484 +n00007846 +n00015388 +n00017222 +n00021265 +n00021939 +n00288000 +n00433458 +n00433661 +n00433802 +n00439826 +n00440218 +n00440509 +n00440747 +n00441824 +n00442115 +n00442981 +n00443231 +n00444651 +n00445351 +n00445802 +n00447073 +n00447221 +n00447540 +n00448466 +n00448640 +n00448958 +n00449295 +n00449695 +n00450335 +n00450700 +n00451370 +n00451866 +n00452293 +n00453935 +n00454237 +n00454624 +n00463246 +n00464651 +n00464894 +n00467719 +n00467995 +n00468480 +n00469651 +n00471437 +n00471613 +n00479076 +n00480508 +n00480993 +n00482298 +n00523513 +n01035667 +n01316422 +n01316579 +n01316949 +n01317089 +n01317391 +n01317541 +n01319467 +n01320872 +n01321230 +n01321579 +n01321854 +n01322343 +n01322685 +n01322898 +n01323068 +n01326291 +n01329186 +n01338685 +n01339336 +n01340935 +n01342269 +n01358572 +n01367772 +n01375204 +n01376237 +n01380610 +n01384687 +n01385330 +n01387065 +n01389507 +n01390123 +n01392380 +n01395254 +n01397114 +n01402600 +n01407798 +n01421807 +n01438208 +n01439121 +n01439514 +n01439808 +n01441425 +n01444783 +n01445429 +n01446589 +n01446760 +n01448951 +n01450661 +n01454856 +n01455778 +n01458842 +n01459791 +n01461646 +n01466257 +n01467336 +n01468238 +n01468712 +n01471682 +n01473806 +n01474283 +n01477525 +n01478511 +n01480516 +n01480880 +n01481331 +n01482071 +n01482330 +n01483522 +n01484097 +n01488918 +n01491874 +n01492357 +n01493541 +n01494757 +n01494882 +n01495006 +n01495701 +n01497118 +n01498406 +n01498699 +n01498989 +n01500091 +n01501160 +n01503061 +n01514752 +n01515078 +n01517565 +n01524359 +n01525720 +n01527194 +n01527617 +n01528654 +n01529672 +n01533339 +n01534582 +n01534762 +n01537134 +n01538955 +n01539573 +n01540233 +n01541922 +n01542786 +n01544208 +n01546921 +n01547832 +n01548301 +n01549430 +n01550761 +n01553142 +n01555809 +n01557185 +n01560105 +n01560636 +n01563128 +n01563746 +n01564394 +n01567133 +n01568132 +n01569836 +n01570676 +n01571904 +n01572328 +n01573074 +n01574045 +n01574390 +n01575745 +n01576695 +n01577659 +n01578575 +n01579028 +n01580379 +n01580490 +n01580772 +n01580870 +n01581166 +n01581434 +n01581730 +n01582398 +n01582498 +n01582856 +n01584225 +n01585121 +n01587834 +n01588431 +n01589286 +n01591697 +n01592257 +n01592540 +n01594372 +n01595624 +n01597336 +n01598588 +n01598988 +n01600085 +n01600657 +n01602080 +n01602209 +n01602630 +n01603600 +n01604330 +n01605630 +n01608814 +n01609062 +n01609391 +n01609751 +n01610955 +n01611472 +n01612628 +n01613294 +n01613615 +n01615121 +n01616551 +n01616764 +n01617095 +n01617443 +n01617766 +n01618082 +n01618922 +n01619310 +n01619536 +n01619835 +n01620135 +n01620414 +n01620735 +n01621127 +n01622352 +n01623706 +n01627424 +n01629276 +n01630284 +n01631175 +n01632047 +n01637112 +n01637932 +n01639765 +n01640846 +n01645776 +n01649170 +n01650167 +n01651487 +n01653773 +n01661091 +n01661592 +n01661818 +n01662622 +n01662784 +n01663401 +n01664369 +n01665932 +n01667432 +n01668091 +n01669372 +n01670092 +n01672032 +n01674216 +n01674464 +n01674990 +n01676755 +n01680264 +n01680478 +n01681940 +n01684133 +n01685439 +n01686044 +n01686220 +n01686403 +n01686609 +n01686808 +n01687665 +n01688961 +n01689411 +n01691951 +n01692864 +n01693783 +n01694709 +n01696633 +n01697178 +n01698434 +n01699040 +n01701551 +n01702256 +n01703011 +n01703569 +n01705934 +n01708106 +n01708998 +n01712008 +n01712752 +n01717016 +n01719403 +n01722998 +n01724231 +n01726692 +n01727646 +n01730185 +n01730307 +n01730812 +n01730960 +n01731545 +n01732244 +n01733757 +n01734637 +n01734808 +n01735439 +n01735577 +n01735728 +n01737472 +n01737728 +n01737875 +n01738065 +n01738601 +n01739647 +n01740551 +n01741232 +n01741562 +n01741943 +n01743605 +n01745125 +n01745484 +n01746359 +n01747885 +n01749582 +n01749742 +n01751036 +n01752165 +n01753959 +n01754876 +n01755740 +n01767661 +n01769347 +n01770795 +n01771417 +n01772222 +n01775370 +n01776192 +n01776705 +n01777304 +n01777467 +n01777649 +n01777909 +n01778217 +n01778487 +n01778621 +n01778801 +n01779148 +n01779463 +n01779629 +n01780696 +n01782209 +n01785667 +n01789386 +n01789740 +n01791107 +n01791625 +n01792158 +n01792640 +n01794158 +n01795088 +n01795735 +n01795900 +n01796019 +n01796105 +n01796519 +n01796729 +n01798706 +n01798839 +n01798979 +n01799302 +n01800424 +n01801088 +n01801479 +n01802721 +n01803078 +n01804478 +n01804653 +n01804921 +n01805070 +n01805801 +n01806847 +n01807828 +n01808140 +n01808291 +n01808596 +n01809106 +n01810700 +n01811909 +n01812337 +n01813385 +n01814370 +n01814921 +n01815601 +n01816887 +n01819115 +n01820348 +n01820801 +n01821076 +n01821203 +n01822602 +n01823013 +n01824749 +n01825930 +n01826364 +n01827403 +n01829869 +n01831712 +n01832167 +n01834177 +n01834540 +n01835276 +n01838038 +n01838598 +n01839598 +n01841102 +n01843719 +n01844231 +n01844551 +n01844917 +n01845132 +n01845477 +n01846331 +n01848123 +n01848648 +n01849466 +n01850373 +n01851375 +n01852142 +n01852861 +n01853498 +n01854415 +n01856072 +n01856155 +n01856380 +n01856553 +n01856890 +n01857079 +n01857325 +n01857512 +n01857632 +n01857851 +n01858441 +n01859496 +n01860497 +n01861148 +n01861778 +n01871543 +n01871875 +n01874434 +n01874928 +n01876326 +n01877134 +n01878061 +n01878335 +n01878639 +n01878929 +n01879217 +n01879509 +n01880152 +n01881171 +n01883513 +n01883920 +n01886756 +n01887896 +n01888264 +n01889074 +n01889520 +n01890860 +n01891633 +n01892551 +n01894207 +n01905661 +n01906749 +n01907738 +n01909422 +n01911403 +n01911839 +n01912454 +n01914163 +n01914830 +n01915811 +n01916187 +n01916925 +n01918744 +n01922303 +n01925270 +n01925695 +n01926379 +n01926689 +n01927159 +n01927456 +n01927928 +n01928215 +n01930852 +n01931140 +n01931520 +n01931714 +n01932151 +n01932936 +n01933151 +n01933478 +n01933988 +n01934440 +n01935176 +n01936391 +n01937909 +n01940736 +n01941223 +n01942177 +n01942869 +n01943541 +n01944118 +n01944812 +n01944955 +n01945143 +n01945340 +n01945845 +n01946277 +n01948573 +n01951613 +n01953361 +n01955933 +n01956481 +n01958038 +n01959985 +n01960459 +n01963571 +n01964049 +n01964441 +n01965889 +n01967094 +n01968315 +n01969726 +n01971094 +n01971280 +n01974773 +n01975687 +n01976146 +n01976957 +n01978930 +n01981702 +n01982650 +n01983048 +n01985493 +n01985797 +n01986806 +n01987545 +n01988701 +n01989869 +n01990007 +n01991028 +n01991520 +n01992262 +n01992423 +n01992773 +n01996585 +n01998183 +n02000954 +n02002075 +n02005790 +n02006985 +n02007284 +n02008041 +n02008796 +n02010453 +n02011805 +n02011943 +n02012185 +n02013177 +n02014941 +n02015554 +n02016358 +n02016956 +n02018027 +n02019190 +n02019438 +n02019929 +n02021050 +n02021795 +n02022684 +n02023341 +n02025043 +n02026059 +n02028175 +n02030035 +n02030287 +n02030996 +n02031934 +n02033208 +n02033324 +n02033561 +n02034129 +n02034661 +n02036053 +n02037464 +n02039171 +n02040505 +n02041085 +n02041246 +n02043063 +n02044178 +n02044778 +n02045369 +n02046759 +n02047260 +n02047614 +n02048698 +n02049532 +n02050004 +n02051474 +n02052204 +n02052365 +n02053083 +n02054502 +n02055658 +n02055803 +n02057731 +n02058594 +n02058747 +n02059162 +n02060411 +n02060889 +n02062017 +n02062430 +n02062744 +n02063224 +n02064338 +n02066707 +n02068206 +n02068974 +n02069701 +n02070430 +n02073250 +n02075296 +n02075927 +n02076196 +n02076779 +n02077152 +n02077658 +n02078292 +n02078574 +n02078738 +n02079005 +n02079389 +n02081571 +n02083346 +n02083672 +n02084071 +n02084861 +n02085374 +n02086346 +n02086478 +n02087122 +n02087551 +n02088839 +n02089232 +n02089555 +n02090475 +n02090827 +n02092468 +n02093056 +n02094562 +n02094721 +n02095050 +n02095412 +n02095727 +n02096756 +n02097786 +n02098550 +n02099029 +n02099997 +n02100399 +n02101108 +n02101861 +n02102605 +n02103406 +n02103841 +n02104523 +n02104882 +n02106966 +n02107420 +n02108254 +n02108672 +n02109150 +n02109811 +n02110532 +n02111626 +n02112497 +n02112826 +n02113335 +n02114100 +n02115012 +n02115335 +n02117512 +n02117646 +n02117900 +n02118176 +n02118333 +n02119247 +n02119359 +n02120278 +n02120997 +n02121620 +n02121808 +n02122725 +n02123785 +n02124623 +n02127292 +n02127381 +n02127482 +n02127586 +n02127678 +n02127808 +n02128598 +n02128669 +n02129463 +n02129530 +n02129837 +n02129923 +n02130545 +n02131653 +n02132466 +n02132580 +n02132788 +n02133400 +n02134971 +n02135220 +n02137722 +n02137888 +n02138647 +n02138777 +n02139199 +n02139671 +n02141306 +n02141713 +n02144593 +n02145424 +n02148835 +n02149420 +n02150482 +n02152740 +n02152991 +n02153203 +n02153809 +n02156732 +n02159955 +n02164464 +n02165877 +n02166229 +n02166567 +n02166826 +n02167505 +n02167820 +n02167944 +n02168245 +n02169023 +n02169218 +n02169705 +n02169974 +n02170400 +n02170993 +n02171453 +n02171869 +n02172518 +n02172678 +n02172761 +n02172870 +n02174355 +n02176261 +n02178411 +n02178717 +n02179012 +n02180233 +n02181235 +n02181724 +n02182045 +n02182355 +n02182642 +n02182930 +n02183857 +n02186153 +n02188699 +n02189363 +n02190790 +n02191273 +n02191773 +n02191979 +n02192252 +n02192513 +n02192814 +n02193009 +n02193163 +n02194249 +n02194750 +n02195091 +n02195526 +n02195819 +n02196344 +n02198532 +n02199502 +n02200198 +n02202287 +n02204722 +n02206270 +n02207179 +n02207449 +n02208280 +n02208498 +n02208848 +n02208979 +n02209111 +n02209354 +n02209624 +n02209964 +n02210427 +n02210921 +n02211444 +n02211627 +n02211896 +n02212062 +n02212602 +n02212958 +n02213107 +n02215161 +n02215770 +n02217563 +n02218713 +n02220055 +n02220225 +n02220518 +n02220804 +n02221083 +n02221414 +n02221571 +n02221715 +n02221820 +n02222035 +n02222321 +n02222582 +n02223266 +n02223520 +n02226183 +n02226821 +n02226970 +n02227247 +n02227604 +n02227966 +n02228341 +n02228697 +n02229156 +n02229765 +n02230023 +n02230187 +n02230480 +n02230634 +n02231052 +n02231803 +n02233943 +n02234355 +n02234570 +n02234848 +n02235205 +n02236241 +n02236896 +n02237424 +n02237581 +n02238235 +n02238887 +n02239528 +n02241569 +n02241799 +n02243562 +n02244173 +n02246011 +n02246628 +n02247511 +n02248368 +n02248510 +n02248887 +n02249515 +n02250822 +n02251775 +n02252226 +n02253127 +n02253715 +n02254697 +n02257003 +n02257284 +n02257715 +n02259377 +n02260421 +n02260863 +n02261419 +n02262178 +n02263378 +n02264885 +n02265330 +n02268148 +n02269196 +n02269522 +n02270011 +n02270200 +n02271570 +n02271897 +n02272871 +n02274024 +n02274259 +n02274822 +n02278210 +n02278839 +n02279637 +n02280458 +n02281015 +n02281136 +n02281267 +n02282257 +n02282385 +n02282553 +n02282903 +n02283077 +n02283201 +n02283951 +n02285548 +n02287004 +n02287799 +n02288789 +n02291220 +n02291572 +n02291748 +n02293352 +n02293868 +n02295064 +n02295390 +n02295870 +n02298541 +n02300173 +n02301452 +n02302459 +n02302620 +n02305407 +n02306433 +n02307325 +n02308139 +n02308471 +n02309337 +n02310000 +n02310717 +n02311060 +n02312006 +n02312427 +n02313008 +n02316707 +n02318167 +n02319308 +n02319555 +n02319829 +n02320127 +n02322047 +n02323449 +n02323902 +n02324045 +n02325722 +n02325884 +n02326074 +n02326763 +n02326862 +n02327028 +n02327175 +n02327435 +n02327656 +n02327842 +n02328429 +n02329401 +n02330245 +n02331046 +n02331309 +n02332755 +n02333546 +n02334460 +n02335127 +n02336011 +n02336641 +n02338901 +n02339376 +n02339922 +n02343058 +n02343320 +n02343772 +n02344528 +n02345600 +n02346998 +n02347274 +n02347573 +n02347744 +n02348173 +n02348788 +n02350105 +n02350989 +n02351870 +n02352591 +n02353861 +n02355227 +n02355477 +n02358091 +n02359324 +n02360781 +n02361587 +n02361706 +n02361850 +n02363245 +n02363351 +n02364520 +n02369680 +n02370806 +n02372584 +n02373336 +n02374149 +n02374451 +n02376542 +n02376791 +n02376918 +n02377181 +n02377480 +n02377703 +n02378415 +n02380335 +n02380583 +n02380745 +n02381460 +n02382437 +n02382948 +n02384858 +n02386014 +n02386310 +n02386496 +n02388276 +n02389346 +n02389559 +n02390454 +n02390834 +n02391234 +n02391373 +n02391508 +n02391994 +n02393580 +n02394477 +n02395003 +n02395694 +n02395931 +n02397529 +n02399000 +n02401031 +n02402010 +n02402175 +n02402425 +n02403325 +n02403454 +n02404186 +n02404573 +n02406174 +n02407959 +n02408660 +n02408817 +n02409870 +n02410702 +n02410900 +n02411705 +n02412440 +n02413131 +n02414578 +n02415435 +n02416519 +n02417070 +n02417534 +n02418064 +n02419796 +n02423218 +n02423362 +n02423589 +n02424305 +n02424695 +n02426813 +n02427724 +n02428349 +n02430045 +n02430559 +n02431122 +n02432511 +n02433546 +n02433925 +n02435853 +n02437136 +n02437971 +n02438173 +n02438272 +n02439033 +n02441326 +n02442172 +n02442336 +n02442446 +n02442572 +n02442668 +n02443015 +n02443346 +n02443808 +n02443959 +n02444251 +n02445004 +n02445171 +n02446206 +n02446352 +n02446645 +n02447021 +n02447762 +n02448060 +n02448633 +n02448885 +n02450034 +n02453108 +n02453611 +n02454794 +n02455135 +n02455428 +n02455720 +n02456008 +n02456275 +n02456962 +n02460009 +n02469914 +n02470325 +n02470899 +n02471300 +n02471762 +n02472293 +n02473307 +n02474777 +n02476219 +n02480153 +n02481103 +n02481235 +n02481366 +n02481500 +n02482060 +n02482286 +n02482474 +n02482650 +n02483092 +n02484322 +n02484473 +n02485225 +n02485371 +n02485536 +n02485688 +n02486657 +n02486908 +n02487079 +n02487547 +n02487675 +n02487847 +n02488003 +n02488415 +n02488894 +n02489589 +n02490597 +n02490811 +n02491107 +n02491329 +n02491474 +n02496913 +n02501583 +n02502514 +n02503127 +n02503517 +n02504770 +n02507649 +n02508021 +n02512053 +n02512938 +n02513560 +n02515214 +n02516188 +n02517442 +n02517938 +n02519148 +n02519686 +n02521646 +n02522399 +n02524524 +n02526425 +n02526818 +n02527057 +n02527271 +n02527622 +n02528163 +n02529772 +n02530421 +n02532028 +n02532602 +n02533209 +n02533834 +n02534559 +n02534734 +n02535537 +n02537085 +n02537319 +n02538406 +n02538985 +n02540412 +n02541687 +n02546331 +n02548689 +n02549989 +n02550460 +n02552171 +n02554730 +n02556846 +n02557591 +n02557749 +n02559862 +n02561108 +n02561661 +n02562315 +n02562796 +n02563182 +n02564720 +n02565573 +n02566109 +n02568959 +n02569484 +n02570838 +n02572196 +n02574910 +n02576223 +n02576575 +n02578233 +n02579557 +n02580336 +n02581957 +n02583567 +n02585872 +n02586543 +n02588286 +n02590495 +n02590702 +n02590987 +n02594250 +n02596381 +n02597367 +n02599052 +n02599958 +n02600298 +n02601344 +n02602405 +n02603317 +n02604157 +n02605316 +n02606384 +n02607201 +n02607862 +n02613181 +n02614482 +n02614978 +n02619165 +n02621908 +n02623445 +n02624167 +n02625612 +n02626762 +n02627835 +n02630281 +n02630739 +n02631041 +n02636170 +n02636854 +n02638596 +n02640626 +n02640857 +n02642107 +n02642644 +n02643112 +n02644113 +n02646667 +n02648625 +n02650050 +n02650541 +n02652668 +n02653145 +n02653786 +n02654425 +n02655523 +n02656670 +n02657368 +n02658079 +n02661017 +n02662239 +n02663849 +n02667379 +n02667576 +n02668393 +n02670382 +n02671780 +n02672371 +n02676261 +n02676670 +n02677028 +n02677718 +n02678384 +n02680110 +n02680754 +n02682407 +n02682922 +n02683791 +n02686121 +n02686568 +n02687992 +n02688443 +n02689274 +n02691156 +n02692513 +n02693413 +n02693540 +n02694426 +n02694966 +n02695627 +n02697576 +n02698244 +n02700258 +n02700895 +n02702989 +n02703275 +n02705944 +n02708224 +n02708555 +n02709367 +n02709637 +n02710600 +n02712643 +n02713218 +n02715229 +n02715513 +n02715712 +n02716626 +n02726305 +n02726681 +n02727016 +n02727825 +n02728440 +n02729837 +n02729965 +n02730265 +n02732072 +n02732827 +n02733213 +n02733524 +n02735361 +n02735688 +n02736798 +n02737660 +n02738031 +n02738271 +n02738535 +n02739550 +n02739668 +n02740533 +n02740764 +n02741475 +n02742322 +n02742753 +n02745492 +n02746365 +n02749790 +n02750169 +n02751067 +n02751295 +n02752496 +n02753044 +n02753394 +n02754103 +n02755352 +n02755529 +n02756098 +n02756977 +n02757462 +n02757810 +n02758134 +n02758960 +n02759700 +n02759963 +n02760099 +n02760199 +n02760429 +n02760855 +n02761392 +n02763198 +n02763714 +n02764044 +n02764614 +n02764779 +n02765028 +n02766320 +n02766534 +n02766792 +n02767433 +n02769075 +n02770830 +n02772554 +n02772700 +n02773037 +n02773838 +n02774152 +n02774630 +n02775483 +n02776205 +n02777100 +n02777734 +n02777927 +n02778456 +n02778669 +n02781121 +n02781338 +n02781517 +n02783035 +n02783324 +n02784998 +n02785648 +n02786198 +n02786463 +n02788689 +n02789487 +n02790823 +n02792552 +n02792948 +n02793842 +n02794008 +n02794779 +n02794972 +n02795783 +n02796207 +n02796623 +n02796995 +n02797692 +n02797881 +n02799897 +n02801184 +n02801525 +n02801938 +n02802721 +n02803349 +n02803666 +n02804252 +n02806088 +n02806379 +n02806875 +n02810471 +n02811468 +n02811719 +n02812201 +n02813252 +n02813399 +n02815478 +n02815950 +n02816494 +n02817031 +n02817650 +n02817799 +n02818832 +n02819697 +n02820210 +n02821627 +n02821943 +n02822220 +n02822399 +n02822865 +n02823335 +n02824448 +n02826589 +n02826886 +n02827606 +n02828299 +n02828884 +n02831335 +n02831724 +n02831894 +n02833793 +n02834778 +n02835412 +n02836268 +n02839351 +n02839910 +n02840619 +n02841063 +n02841506 +n02842133 +n02843029 +n02843777 +n02844214 +n02844307 +n02844714 +n02847631 +n02848216 +n02848523 +n02849154 +n02850950 +n02851099 +n02853016 +n02854532 +n02854926 +n02855089 +n02855390 +n02855793 +n02857365 +n02857477 +n02857644 +n02858304 +n02860415 +n02861886 +n02862048 +n02862916 +n02863750 +n02865665 +n02865931 +n02866578 +n02867715 +n02869737 +n02871631 +n02871824 +n02871963 +n02872752 +n02873839 +n02874086 +n02875436 +n02876326 +n02876457 +n02876657 +n02877962 +n02879517 +n02880189 +n02880546 +n02880940 +n02881193 +n02881906 +n02882483 +n02882647 +n02883004 +n02883344 +n02884225 +n02885108 +n02885338 +n02886599 +n02887209 +n02887970 +n02888569 +n02889425 +n02891188 +n02891788 +n02892499 +n02893418 +n02896294 +n02896442 +n02897389 +n02897820 +n02898173 +n02898369 +n02898585 +n02898711 +n02900705 +n02901481 +n02901901 +n02902079 +n02902916 +n02903006 +n02904109 +n02904640 +n02908217 +n02909285 +n02911485 +n02912065 +n02913152 +n02914991 +n02916179 +n02916350 +n02917377 +n02917607 +n02919414 +n02920503 +n02921884 +n02923129 +n02924116 +n02925519 +n02928413 +n02928608 +n02929289 +n02929462 +n02929923 +n02931417 +n02931836 +n02932019 +n02932400 +n02933112 +n02933462 +n02933750 +n02933990 +n02934168 +n02935658 +n02935891 +n02936176 +n02936281 +n02936714 +n02938886 +n02939866 +n02941095 +n02942699 +n02943241 +n02943871 +n02944826 +n02945161 +n02946270 +n02946348 +n02946921 +n02947212 +n02947818 +n02948557 +n02949202 +n02950186 +n02950256 +n02950632 +n02950943 +n02951843 +n02952485 +n02952674 +n02953673 +n02954163 +n02954340 +n02954938 +n02955065 +n02955247 +n02955540 +n02955767 +n02957135 +n02957755 +n02958343 +n02959942 +n02961451 +n02961947 +n02963302 +n02963692 +n02963821 +n02965216 +n02965300 +n02965529 +n02966545 +n02966786 +n02966942 +n02967081 +n02967991 +n02968473 +n02969010 +n02969163 +n02969634 +n02969886 +n02970685 +n02970849 +n02971691 +n02972397 +n02973017 +n02974697 +n02975212 +n02976939 +n02978205 +n02978753 +n02979516 +n02982599 +n02983189 +n02983904 +n02984061 +n02984203 +n02984469 +n02984699 +n02985137 +n02985828 +n02986066 +n02987047 +n02987492 +n02989099 +n02991048 +n02991302 +n02992032 +n02993546 +n02995998 +n02997391 +n02997607 +n03001282 +n03001627 +n03002210 +n03003091 +n03004620 +n03005515 +n03007130 +n03007591 +n03010656 +n03010795 +n03011018 +n03011355 +n03012159 +n03013006 +n03014440 +n03015254 +n03017070 +n03018209 +n03020034 +n03020416 +n03020692 +n03024333 +n03025070 +n03025886 +n03027108 +n03027250 +n03029066 +n03031422 +n03032811 +n03033362 +n03033986 +n03034516 +n03034663 +n03035510 +n03036469 +n03036866 +n03037108 +n03037709 +n03038685 +n03039015 +n03039947 +n03040229 +n03040376 +n03043274 +n03043958 +n03045337 +n03046257 +n03048883 +n03049066 +n03049457 +n03050026 +n03050546 +n03050655 +n03050864 +n03051396 +n03051540 +n03052464 +n03052917 +n03053047 +n03054901 +n03055670 +n03056097 +n03056493 +n03057021 +n03057636 +n03058107 +n03058603 +n03058949 +n03059366 +n03061050 +n03063073 +n03063338 +n03064350 +n03064758 +n03065708 +n03066849 +n03070193 +n03071021 +n03071160 +n03072201 +n03073296 +n03073977 +n03074380 +n03074855 +n03075097 +n03075500 +n03075634 +n03076411 +n03076708 +n03078287 +n03078670 +n03079230 +n03079741 +n03080497 +n03080731 +n03081986 +n03082127 +n03082807 +n03082979 +n03084420 +n03085333 +n03085602 +n03085915 +n03086183 +n03086457 +n03086670 +n03087366 +n03087643 +n03087816 +n03088707 +n03091044 +n03091374 +n03092166 +n03092314 +n03093792 +n03094503 +n03096439 +n03096960 +n03098140 +n03098806 +n03099454 +n03099771 +n03099945 +n03100346 +n03100490 +n03101156 +n03101986 +n03102654 +n03102859 +n03106722 +n03106898 +n03107046 +n03109881 +n03111690 +n03112869 +n03113152 +n03113657 +n03113835 +n03114839 +n03115180 +n03116530 +n03116767 +n03117199 +n03118346 +n03118969 +n03119510 +n03120198 +n03120491 +n03121897 +n03122748 +n03123809 +n03125870 +n03128085 +n03128427 +n03128519 +n03129001 +n03130066 +n03130563 +n03131669 +n03132261 +n03134853 +n03135917 +n03136369 +n03137579 +n03139464 +n03140900 +n03141065 +n03141327 +n03143572 +n03145384 +n03145843 +n03146846 +n03147509 +n03148324 +n03148727 +n03149401 +n03151077 +n03153948 +n03154073 +n03154446 +n03155178 +n03156071 +n03156405 +n03157348 +n03158796 +n03158885 +n03161450 +n03162818 +n03163798 +n03163973 +n03164605 +n03164722 +n03164929 +n03165823 +n03167978 +n03168107 +n03168217 +n03170635 +n03171356 +n03172965 +n03173387 +n03175604 +n03176386 +n03177165 +n03177708 +n03178000 +n03178430 +n03180504 +n03180969 +n03181293 +n03182140 +n03182232 +n03182912 +n03183080 +n03186818 +n03187751 +n03189818 +n03193597 +n03196062 +n03196324 +n03196598 +n03199647 +n03199901 +n03200357 +n03200539 +n03200701 +n03200906 +n03201035 +n03201638 +n03201996 +n03202354 +n03202760 +n03203089 +n03203806 +n03204306 +n03204558 +n03204955 +n03205143 +n03205304 +n03206718 +n03206908 +n03207305 +n03208556 +n03210683 +n03211117 +n03211616 +n03212811 +n03214253 +n03214450 +n03215191 +n03219135 +n03220237 +n03221059 +n03221720 +n03222516 +n03223162 +n03223441 +n03224753 +n03224893 +n03225777 +n03226538 +n03228016 +n03228533 +n03228692 +n03229115 +n03229526 +n03231160 +n03231819 +n03235796 +n03235979 +n03236580 +n03236735 +n03237212 +n03237639 +n03239259 +n03239726 +n03240140 +n03241093 +n03241335 +n03241496 +n03242120 +n03242506 +n03242995 +n03243218 +n03245271 +n03245421 +n03246933 +n03250952 +n03251533 +n03251766 +n03252324 +n03252637 +n03254374 +n03255488 +n03255899 +n03256788 +n03256928 +n03257586 +n03258905 +n03259505 +n03261776 +n03262519 +n03262809 +n03262932 +n03265032 +n03266749 +n03267821 +n03269203 +n03269401 +n03270695 +n03271765 +n03271865 +n03272239 +n03272383 +n03273061 +n03273913 +n03274561 +n03274796 +n03276179 +n03277459 +n03277771 +n03278248 +n03279153 +n03279364 +n03279508 +n03280394 +n03280644 +n03281145 +n03282060 +n03282401 +n03284743 +n03284981 +n03285912 +n03286572 +n03287733 +n03288003 +n03289985 +n03291413 +n03292960 +n03294048 +n03294833 +n03296478 +n03297103 +n03297644 +n03297735 +n03298089 +n03302790 +n03303217 +n03303831 +n03304197 +n03304465 +n03305522 +n03307573 +n03308152 +n03309808 +n03314378 +n03314884 +n03315644 +n03316406 +n03318136 +n03319457 +n03320046 +n03322570 +n03322940 +n03323703 +n03324928 +n03325088 +n03326073 +n03327234 +n03327691 +n03327841 +n03329663 +n03330792 +n03334017 +n03334492 +n03334912 +n03335030 +n03335846 +n03336839 +n03337494 +n03338287 +n03338821 +n03339296 +n03339643 +n03340009 +n03340923 +n03342961 +n03343354 +n03343560 +n03343853 +n03346135 +n03346455 +n03349296 +n03350352 +n03350456 +n03350602 +n03351262 +n03351979 +n03352628 +n03354903 +n03355468 +n03356446 +n03357267 +n03357716 +n03359137 +n03359566 +n03360731 +n03361683 +n03362771 +n03363363 +n03364008 +n03364937 +n03365592 +n03365991 +n03366823 +n03373237 +n03374649 +n03374838 +n03375171 +n03376279 +n03378342 +n03379343 +n03379828 +n03379989 +n03380647 +n03380867 +n03381126 +n03381231 +n03381776 +n03382856 +n03382969 +n03383468 +n03384167 +n03384891 +n03385557 +n03386011 +n03387323 +n03387653 +n03390327 +n03391770 +n03393324 +n03394480 +n03394649 +n03396580 +n03396654 +n03397266 +n03397532 +n03398228 +n03399761 +n03399971 +n03402188 +n03402369 +n03404012 +n03404360 +n03404449 +n03405265 +n03405725 +n03407369 +n03409393 +n03409591 +n03410147 +n03411339 +n03412058 +n03412220 +n03412511 +n03412906 +n03413264 +n03413428 +n03413828 +n03414162 +n03415252 +n03416489 +n03416775 +n03417345 +n03418158 +n03418242 +n03419014 +n03422072 +n03422589 +n03423719 +n03424630 +n03427296 +n03428090 +n03428349 +n03429003 +n03429288 +n03429914 +n03430091 +n03430313 +n03430551 +n03430959 +n03431243 +n03431745 +n03433637 +n03433877 +n03434285 +n03434830 +n03435593 +n03437941 +n03438257 +n03439814 +n03441112 +n03442288 +n03442756 +n03446070 +n03446832 +n03448031 +n03448956 +n03449564 +n03449858 +n03450516 +n03452267 +n03452449 +n03453320 +n03454110 +n03454211 +n03454707 +n03455355 +n03456548 +n03456665 +n03457008 +n03457686 +n03458271 +n03459914 +n03461882 +n03465500 +n03465818 +n03466162 +n03466839 +n03467517 +n03467796 +n03467984 +n03468696 +n03469493 +n03470387 +n03470629 +n03470948 +n03472232 +n03472535 +n03472937 +n03473817 +n03473966 +n03475823 +n03476083 +n03476313 +n03477773 +n03477902 +n03478756 +n03478907 +n03481521 +n03482523 +n03483230 +n03483531 +n03484083 +n03484931 +n03487331 +n03487444 +n03487774 +n03488188 +n03488603 +n03489162 +n03490324 +n03490449 +n03490884 +n03491988 +n03496296 +n03496612 +n03497100 +n03497657 +n03498536 +n03499468 +n03500295 +n03501152 +n03501288 +n03501614 +n03502331 +n03502509 +n03502777 +n03503718 +n03503997 +n03505383 +n03505504 +n03506370 +n03507963 +n03508101 +n03509394 +n03509843 +n03510583 +n03510866 +n03511175 +n03512147 +n03512911 +n03513137 +n03513376 +n03515338 +n03517899 +n03517982 +n03518631 +n03519674 +n03521076 +n03521544 +n03522634 +n03524574 +n03524976 +n03525074 +n03525454 +n03525827 +n03528263 +n03529444 +n03531281 +n03531447 +n03531546 +n03532342 +n03534776 +n03535024 +n03536761 +n03537412 +n03538037 +n03538300 +n03538634 +n03538957 +n03540267 +n03540595 +n03541091 +n03541696 +n03541923 +n03542333 +n03542860 +n03543603 +n03544360 +n03545150 +n03546340 +n03547054 +n03547530 +n03548930 +n03550153 +n03550289 +n03551084 +n03551790 +n03552449 +n03552749 +n03553486 +n03554460 +n03555426 +n03555662 +n03557590 +n03558176 +n03558404 +n03558739 +n03561169 +n03563200 +n03563710 +n03563967 +n03565288 +n03565565 +n03566329 +n03568117 +n03568818 +n03571942 +n03572205 +n03574555 +n03574816 +n03575958 +n03576215 +n03577672 +n03577818 +n03578055 +n03578251 +n03578656 +n03579538 +n03579982 +n03583621 +n03584400 +n03585073 +n03588951 +n03589513 +n03589791 +n03590306 +n03590932 +n03592245 +n03592773 +n03593526 +n03595409 +n03595860 +n03596285 +n03597317 +n03598151 +n03598299 +n03598646 +n03600977 +n03601638 +n03601840 +n03602081 +n03603722 +n03604629 +n03604843 +n03605722 +n03605915 +n03606465 +n03609235 +n03609542 +n03610418 +n03610992 +n03612814 +n03613294 +n03613592 +n03614007 +n03614532 +n03615563 +n03617095 +n03617594 +n03618546 +n03618982 +n03619396 +n03619650 +n03619793 +n03619890 +n03620052 +n03621049 +n03621694 +n03622931 +n03623556 +n03624134 +n03625355 +n03626115 +n03631177 +n03631811 +n03632852 +n03633886 +n03635032 +n03635668 +n03635932 +n03636248 +n03636649 +n03638883 +n03639675 +n03640988 +n03642444 +n03646296 +n03646916 +n03647520 +n03651388 +n03653220 +n03653454 +n03654576 +n03655072 +n03656484 +n03657239 +n03658858 +n03659292 +n03660124 +n03661340 +n03662719 +n03662887 +n03663531 +n03664675 +n03664943 +n03665366 +n03666362 +n03666917 +n03667235 +n03667829 +n03671914 +n03672827 +n03673450 +n03673767 +n03676759 +n03677766 +n03679384 +n03679712 +n03681477 +n03682487 +n03684823 +n03685307 +n03685820 +n03686130 +n03686470 +n03687928 +n03688943 +n03689157 +n03689570 +n03690851 +n03691817 +n03692379 +n03693293 +n03697552 +n03698604 +n03699280 +n03699975 +n03700963 +n03701191 +n03701391 +n03701640 +n03701790 +n03702248 +n03704834 +n03705379 +n03706653 +n03707597 +n03708036 +n03709206 +n03709363 +n03709545 +n03710528 +n03711711 +n03711999 +n03712887 +n03713069 +n03714235 +n03715386 +n03715669 +n03715892 +n03716966 +n03717131 +n03718212 +n03718335 +n03718699 +n03718789 +n03719053 +n03721590 +n03722007 +n03722288 +n03724176 +n03725035 +n03725717 +n03726516 +n03726760 +n03726993 +n03727837 +n03727946 +n03728437 +n03728982 +n03729647 +n03729951 +n03730153 +n03730788 +n03731695 +n03733644 +n03733925 +n03735637 +n03736970 +n03738241 +n03738472 +n03739518 +n03739693 +n03743902 +n03744276 +n03744684 +n03744840 +n03745571 +n03746330 +n03748162 +n03749504 +n03749807 +n03750206 +n03751065 +n03752185 +n03752922 +n03753077 +n03753514 +n03758894 +n03759432 +n03760671 +n03762982 +n03763727 +n03764276 +n03765561 +n03765934 +n03766322 +n03768132 +n03769722 +n03770954 +n03772077 +n03772674 +n03773035 +n03775199 +n03775847 +n03779000 +n03779370 +n03780047 +n03781787 +n03782190 +n03785499 +n03787523 +n03789171 +n03789400 +n03789946 +n03790230 +n03790512 +n03790755 +n03791235 +n03792048 +n03792526 +n03793850 +n03795976 +n03796181 +n03797390 +n03798982 +n03799113 +n03800485 +n03800772 +n03800933 +n03802007 +n03802228 +n03802393 +n03803116 +n03809312 +n03811295 +n03811444 +n03811847 +n03811965 +n03812382 +n03812924 +n03813176 +n03813946 +n03815278 +n03815482 +n03815615 +n03816005 +n03816136 +n03816849 +n03817647 +n03819595 +n03819994 +n03820154 +n03820318 +n03820728 +n03820950 +n03824197 +n03825080 +n03827536 +n03828020 +n03829340 +n03831757 +n03834040 +n03834604 +n03836062 +n03837422 +n03838748 +n03839172 +n03839276 +n03839795 +n03841666 +n03842156 +n03844045 +n03844233 +n03845190 +n03846234 +n03846772 +n03847471 +n03847823 +n03848168 +n03848348 +n03849275 +n03850613 +n03851341 +n03851787 +n03852280 +n03852688 +n03854815 +n03859280 +n03859495 +n03859958 +n03861430 +n03861842 +n03862676 +n03863923 +n03864139 +n03864356 +n03864692 +n03865371 +n03865949 +n03868406 +n03871083 +n03871524 +n03871724 +n03873848 +n03874138 +n03874823 +n03875218 +n03880129 +n03880323 +n03880531 +n03883054 +n03883773 +n03883944 +n03884639 +n03885535 +n03885669 +n03886053 +n03886641 +n03887185 +n03888022 +n03889503 +n03889726 +n03891051 +n03892557 +n03894051 +n03894379 +n03896103 +n03896233 +n03896419 +n03896628 +n03896984 +n03897943 +n03898271 +n03898633 +n03899612 +n03899933 +n03901338 +n03903133 +n03903424 +n03904060 +n03904183 +n03904433 +n03905540 +n03906997 +n03907654 +n03908204 +n03909160 +n03909406 +n03915118 +n03915437 +n03916470 +n03916720 +n03917327 +n03918480 +n03920737 +n03923564 +n03923692 +n03924069 +n03926148 +n03926412 +n03926876 +n03927792 +n03928116 +n03929091 +n03929202 +n03929443 +n03930515 +n03932670 +n03936269 +n03938522 +n03939677 +n03940256 +n03941684 +n03943920 +n03945615 +n03947111 +n03947466 +n03948459 +n03951971 +n03953020 +n03953416 +n03955809 +n03956785 +n03956922 +n03957315 +n03957762 +n03958630 +n03958752 +n03959014 +n03959701 +n03961939 +n03962525 +n03962932 +n03963028 +n03965907 +n03966325 +n03966751 +n03966976 +n03967942 +n03968293 +n03971321 +n03972524 +n03973520 +n03973628 +n03975035 +n03979377 +n03979492 +n03980026 +n03981340 +n03982232 +n03982895 +n03984234 +n03984381 +n03985232 +n03986704 +n03988170 +n03989665 +n03990474 +n03991443 +n03992325 +n03992703 +n03993180 +n03993403 +n03994008 +n03994757 +n03995018 +n03995856 +n03996145 +n03996416 +n03997484 +n03999992 +n04000311 +n04001397 +n04001499 +n04001845 +n04004210 +n04004475 +n04005912 +n04007664 +n04010057 +n04010779 +n04010927 +n04011827 +n04012084 +n04013729 +n04014297 +n04015204 +n04016576 +n04016684 +n04018399 +n04018667 +n04019101 +n04019696 +n04020087 +n04020298 +n04020912 +n04021028 +n04021362 +n04021798 +n04022332 +n04022708 +n04023249 +n04024274 +n04026053 +n04026918 +n04027023 +n04027706 +n04028315 +n04029734 +n04030274 +n04036303 +n04037964 +n04038440 +n04038727 +n04039848 +n04042358 +n04042632 +n04042795 +n04042985 +n04043733 +n04044307 +n04044498 +n04045085 +n04045397 +n04046590 +n04046974 +n04047401 +n04049405 +n04050066 +n04051549 +n04051825 +n04052757 +n04056932 +n04057047 +n04057435 +n04057846 +n04057981 +n04058096 +n04058239 +n04059947 +n04060647 +n04060904 +n04061793 +n04061969 +n04062644 +n04063373 +n04063868 +n04064401 +n04065464 +n04065789 +n04067231 +n04067353 +n04067921 +n04068441 +n04068601 +n04069276 +n04069777 +n04070207 +n04070964 +n04071102 +n04071263 +n04071393 +n04072193 +n04072551 +n04073948 +n04075468 +n04075916 +n04076284 +n04077430 +n04077734 +n04078574 +n04079106 +n04079244 +n04079933 +n04080454 +n04080833 +n04081844 +n04083649 +n04086794 +n04087126 +n04087709 +n04088696 +n04088797 +n04089666 +n04089976 +n04090548 +n04091097 +n04093625 +n04095210 +n04096066 +n04097622 +n04097866 +n04099175 +n04099429 +n04100174 +n04101497 +n04101701 +n04102037 +n04102285 +n04102406 +n04102962 +n04104147 +n04104500 +n04105068 +n04105438 +n04105893 +n04107984 +n04108268 +n04110068 +n04110654 +n04110955 +n04111190 +n04111414 +n04111668 +n04113765 +n04114996 +n04115256 +n04115996 +n04116389 +n04118021 +n04121228 +n04122349 +n04122492 +n04122825 +n04123123 +n04123567 +n04123740 +n04125116 +n04125853 +n04126541 +n04126659 +n04126980 +n04127904 +n04128499 +n04128837 +n04131929 +n04134632 +n04136510 +n04137444 +n04137897 +n04138977 +n04139859 +n04140064 +n04140631 +n04141838 +n04143897 +n04146050 +n04147495 +n04148054 +n04149083 +n04151108 +n04151581 +n04151940 +n04152387 +n04154753 +n04156297 +n04156411 +n04157320 +n04158807 +n04158956 +n04160372 +n04160586 +n04161358 +n04161981 +n04164757 +n04164868 +n04166111 +n04167489 +n04169437 +n04170037 +n04171459 +n04171629 +n04171831 +n04174101 +n04174500 +n04176068 +n04176190 +n04176528 +n04177329 +n04177545 +n04180063 +n04180888 +n04181228 +n04181718 +n04182322 +n04183217 +n04183329 +n04184435 +n04184600 +n04185071 +n04186051 +n04186268 +n04186455 +n04186624 +n04186848 +n04187061 +n04187547 +n04187885 +n04189092 +n04190052 +n04190464 +n04190747 +n04190997 +n04191150 +n04191595 +n04191943 +n04192238 +n04192858 +n04194289 +n04196080 +n04197391 +n04198015 +n04198797 +n04199027 +n04201733 +n04202417 +n04205318 +n04206356 +n04207763 +n04210390 +n04211356 +n04211970 +n04215910 +n04216634 +n04216860 +n04216963 +n04217718 +n04217882 +n04219424 +n04221823 +n04222210 +n04222470 +n04222847 +n04225031 +n04225222 +n04225729 +n04226464 +n04226537 +n04227900 +n04229007 +n04229107 +n04229480 +n04230603 +n04230808 +n04231693 +n04232153 +n04233832 +n04234455 +n04235291 +n04235771 +n04236001 +n04236377 +n04236702 +n04238617 +n04241042 +n04241394 +n04242408 +n04243003 +n04243941 +n04244997 +n04245847 +n04246855 +n04247630 +n04247736 +n04248507 +n04249415 +n04250224 +n04250599 +n04253931 +n04255499 +n04256520 +n04260589 +n04261116 +n04262678 +n04263336 +n04263760 +n04264233 +n04264914 +n04266486 +n04267577 +n04269944 +n04270891 +n04271148 +n04272054 +n04272782 +n04273064 +n04273796 +n04275283 +n04275661 +n04275904 +n04278353 +n04279172 +n04279987 +n04280259 +n04280970 +n04283585 +n04283905 +n04284002 +n04285146 +n04285622 +n04285803 +n04286128 +n04288272 +n04288533 +n04288673 +n04289449 +n04291242 +n04291759 +n04292414 +n04292572 +n04293119 +n04293744 +n04294212 +n04294426 +n04295081 +n04295881 +n04299215 +n04300358 +n04301000 +n04301474 +n04303258 +n04304375 +n04305471 +n04306080 +n04306847 +n04307419 +n04307878 +n04308084 +n04308273 +n04308397 +n04308583 +n04308807 +n04309348 +n04309833 +n04310721 +n04311595 +n04312154 +n04312432 +n04313220 +n04314914 +n04315828 +n04315948 +n04317420 +n04318131 +n04318982 +n04319937 +n04320405 +n04322026 +n04322692 +n04322801 +n04323819 +n04326799 +n04326896 +n04328054 +n04328329 +n04328946 +n04329477 +n04330340 +n04330669 +n04330998 +n04331277 +n04332987 +n04333129 +n04338517 +n04339638 +n04340750 +n04340935 +n04341133 +n04341414 +n04341686 +n04346679 +n04347519 +n04348184 +n04348359 +n04349401 +n04350104 +n04350458 +n04354589 +n04356595 +n04358707 +n04358874 +n04359335 +n04359589 +n04360501 +n04360798 +n04361095 +n04361260 +n04362821 +n04363210 +n04363874 +n04364545 +n04364827 +n04364994 +n04365328 +n04365484 +n04365751 +n04368695 +n04370048 +n04371563 +n04373894 +n04375775 +n04377057 +n04378956 +n04379243 +n04379964 +n04380346 +n04381994 +n04382334 +n04382880 +n04383130 +n04383301 +n04386664 +n04387201 +n04387400 +n04388162 +n04388743 +n04389521 +n04390873 +n04391838 +n04392526 +n04393095 +n04394261 +n04395875 +n04397168 +n04397261 +n04397645 +n04398497 +n04398688 +n04398834 +n04399046 +n04400289 +n04401088 +n04402057 +n04402580 +n04402746 +n04402984 +n04403638 +n04404817 +n04404997 +n04405540 +n04405762 +n04407435 +n04407686 +n04409128 +n04409806 +n04410086 +n04410365 +n04410485 +n04411264 +n04411966 +n04413151 +n04413419 +n04415663 +n04416901 +n04417180 +n04417361 +n04417809 +n04419073 +n04421872 +n04422875 +n04427715 +n04428008 +n04431436 +n04431745 +n04434932 +n04435180 +n04436185 +n04436401 +n04436542 +n04437670 +n04437953 +n04438304 +n04438643 +n04440963 +n04441662 +n04444749 +n04445040 +n04445952 +n04446276 +n04447276 +n04447443 +n04448070 +n04448361 +n04450243 +n04450640 +n04450749 +n04451818 +n04452615 +n04452848 +n04453156 +n04453666 +n04453910 +n04454654 +n04455250 +n04455652 +n04456472 +n04457326 +n04458843 +n04459362 +n04459610 +n04460130 +n04462011 +n04463679 +n04464852 +n04467099 +n04467307 +n04468005 +n04469251 +n04470741 +n04471315 +n04471632 +n04472243 +n04472726 +n04473884 +n04474466 +n04475411 +n04475631 +n04477548 +n04478512 +n04478657 +n04480527 +n04481524 +n04487724 +n04488427 +n04489008 +n04489817 +n04490091 +n04491769 +n04493109 +n04494204 +n04495450 +n04497442 +n04497570 +n04498523 +n04499446 +n04499554 +n04500060 +n04501837 +n04502197 +n04502502 +n04502670 +n04502851 +n04504141 +n04504770 +n04505036 +n04506994 +n04507453 +n04508163 +n04508489 +n04508949 +n04509260 +n04509592 +n04511002 +n04514241 +n04516116 +n04516214 +n04516672 +n04518132 +n04519153 +n04520170 +n04520382 +n04521987 +n04524313 +n04527648 +n04529681 +n04530566 +n04531098 +n04531873 +n04533042 +n04533199 +n04533700 +n04534127 +n04534895 +n04536153 +n04538552 +n04539203 +n04540761 +n04541320 +n04543158 +n04544450 +n04546194 +n04546855 +n04547592 +n04549122 +n04549919 +n04551055 +n04552696 +n04553389 +n04554871 +n04555600 +n04555897 +n04556948 +n04557308 +n04557751 +n04558059 +n04558804 +n04559023 +n04559730 +n04562262 +n04563204 +n04565375 +n04566257 +n04567098 +n04568069 +n04568557 +n04569520 +n04569822 +n04570958 +n04571292 +n04571566 +n04571958 +n04572935 +n04574471 +n04574999 +n04576002 +n04576211 +n04576971 +n04577426 +n04577769 +n04578801 +n04579230 +n04580493 +n04581595 +n04582349 +n04583620 +n04585745 +n04585980 +n04586932 +n04587648 +n04588739 +n04589190 +n04589434 +n04591056 +n04591887 +n04592005 +n04592099 +n04594218 +n04594489 +n04595285 +n04595855 +n04596852 +n04597066 +n04597804 +n04598136 +n04598582 +n04599124 +n04600312 +n04600486 +n04600912 +n04603729 +n04603872 +n04605726 +n04606574 +n04608329 +n04608567 +n04609531 +n04609651 +n04610176 +n04610503 +n04610676 +n04611916 +n04613015 +n04615226 +n04615644 +n04950713 +n04951373 +n04958634 +n04959672 +n04960277 +n04961691 +n04963740 +n04965179 +n04965661 +n04967191 +n04968895 +n04970059 +n04970631 +n04970916 +n04972801 +n04973386 +n04976952 +n05238282 +n05241218 +n05242070 +n05244934 +n05266879 +n05399034 +n05447757 +n05449959 +n05453657 +n05467758 +n05586759 +n06254669 +n06262567 +n06263369 +n06263609 +n06263762 +n06266417 +n06266710 +n06267145 +n06271778 +n06272290 +n06272803 +n06274092 +n06275353 +n06276697 +n06277280 +n06281040 +n06359467 +n06359657 +n06418693 +n06591815 +n06592078 +n06595351 +n06613686 +n06793231 +n07556637 +n07556970 +n07557165 +n07557434 +n07560652 +n07561112 +n07562495 +n07563800 +n07564629 +n07564971 +n07565725 +n07565945 +n07566340 +n07566863 +n07567390 +n07567707 +n07568818 +n07569106 +n07569543 +n07570720 +n07572353 +n07572957 +n07573103 +n07573696 +n07574602 +n07575076 +n07575726 +n07575984 +n07576182 +n07576438 +n07576577 +n07577374 +n07579575 +n07580053 +n07580359 +n07580470 +n07581346 +n07581775 +n07582277 +n07582441 +n07582609 +n07583197 +n07584228 +n07584593 +n07585208 +n07587441 +n07587700 +n07588947 +n07590320 +n07591473 +n07592094 +n07592656 +n07593774 +n07595914 +n07596046 +n07596452 +n07596684 +n07597145 +n07597365 +n07598734 +n07599468 +n07599783 +n07599998 +n07600506 +n07601407 +n07605474 +n07605944 +n07606278 +n07606764 +n07607707 +n07609407 +n07609840 +n07611148 +n07611358 +n07611839 +n07611991 +n07612367 +n07612632 +n07612996 +n07613671 +n07614198 +n07614825 +n07615052 +n07615190 +n07615460 +n07615569 +n07615671 +n07616590 +n07617188 +n07619004 +n07623136 +n07624466 +n07627931 +n07628068 +n07641928 +n07642471 +n07642933 +n07643306 +n07643474 +n07643764 +n07643981 +n07644244 +n07663899 +n07678729 +n07679356 +n07680517 +n07680932 +n07681926 +n07682316 +n07682624 +n07683786 +n07684600 +n07685730 +n07686873 +n07687211 +n07687469 +n07687789 +n07689003 +n07690273 +n07690892 +n07692405 +n07692614 +n07693889 +n07693972 +n07694403 +n07695878 +n07695965 +n07697100 +n07704054 +n07705931 +n07707451 +n07708512 +n07708798 +n07709333 +n07710007 +n07710283 +n07710616 +n07710952 +n07712063 +n07712382 +n07712748 +n07712856 +n07713395 +n07713895 +n07714078 +n07714802 +n07714895 +n07715561 +n07715721 +n07716034 +n07717070 +n07717858 +n07718671 +n07719437 +n07719839 +n07720442 +n07720615 +n07721325 +n07721456 +n07721678 +n07722217 +n07722763 +n07723330 +n07723559 +n07723753 +n07724943 +n07725376 +n07725531 +n07726796 +n07727578 +n07727868 +n07728804 +n07729000 +n07729485 +n07730406 +n07730855 +n07731122 +n07731587 +n07731952 +n07732302 +n07732747 +n07734017 +n07734292 +n07735052 +n07735803 +n07737081 +n07739125 +n07739506 +n07740220 +n07740954 +n07741461 +n07742012 +n07742704 +n07744246 +n07747055 +n07747811 +n07747951 +n07748753 +n07748912 +n07749095 +n07749192 +n07749312 +n07749731 +n07750586 +n07751451 +n07752377 +n07752664 +n07753743 +n07755089 +n07755411 +n07755707 +n07756096 +n07757132 +n07757312 +n07757602 +n07757990 +n07758680 +n07758950 +n07759424 +n07759691 +n07759816 +n07760501 +n07761141 +n07761309 +n07761611 +n07761777 +n07761954 +n07767344 +n07767847 +n07770571 +n07771212 +n07800091 +n07800740 +n07801508 +n07802152 +n07802417 +n07803093 +n07803545 +n07804323 +n07805254 +n07805594 +n07805731 +n07806221 +n07806633 +n07807317 +n07807710 +n07807922 +n07809096 +n07809368 +n07810907 +n07811416 +n07812184 +n07814203 +n07815588 +n07818277 +n07819480 +n07820497 +n07820814 +n07823951 +n07824702 +n07824988 +n07825717 +n07828987 +n07829412 +n07830593 +n07832902 +n07834507 +n07836731 +n07837002 +n07837362 +n07838233 +n07841495 +n07841639 +n07841907 +n07842753 +n07842972 +n07843464 +n07843775 +n07844042 +n07844604 +n07846143 +n07847198 +n07848338 +n07848771 +n07849336 +n07850083 +n07850329 +n07851298 +n07852045 +n07852919 +n07854813 +n07856270 +n07857959 +n07858595 +n07859284 +n07859583 +n07860805 +n07861158 +n07861813 +n07863374 +n07864638 +n07865105 +n07867421 +n07867883 +n07869391 +n07869775 +n07870313 +n07871436 +n07873464 +n07874063 +n07874159 +n07874259 +n07874343 +n07874441 +n07874780 +n07875693 +n07875835 +n07876281 +n07880751 +n07881117 +n07881205 +n07881404 +n07881800 +n07882497 +n07882886 +n07883031 +n07883251 +n07883384 +n07884567 +n07886572 +n07886849 +n07887634 +n07888465 +n07888909 +n07889510 +n07890352 +n07890750 +n07891726 +n07892813 +n07893528 +n07893891 +n07894102 +n07894298 +n07894965 +n07895237 +n07895435 +n07895595 +n07895710 +n07895839 +n07896287 +n07897200 +n07897865 +n07898117 +n07898333 +n07898745 +n07899108 +n07900406 +n07900616 +n07901587 +n07903208 +n07904395 +n07905038 +n07906284 +n07906877 +n07907161 +n07907548 +n07907943 +n07909129 +n07909811 +n07911371 +n07911677 +n07912211 +n07913393 +n07914413 +n07915618 +n07916041 +n07917618 +n07918028 +n07920222 +n07921455 +n07921948 +n07923748 +n07924033 +n07924560 +n07924834 +n07925966 +n07926920 +n07927197 +n07927931 +n07929519 +n07930554 +n07931001 +n07931096 +n07932614 +n07932841 +n07933274 +n07933891 +n07934032 +n07934530 +n07935152 +n07935504 +n07936263 +n07936745 +n07938149 +n07951464 +n08554440 +n08558963 +n08596076 +n08598301 +n08616050 +n08640531 +n08659446 +n09191635 +n09206896 +n09206985 +n09210862 +n09213434 +n09213565 +n09214060 +n09214916 +n09215437 +n09217230 +n09230041 +n09233446 +n09238926 +n09255070 +n09259025 +n09259219 +n09262690 +n09265620 +n09269882 +n09270735 +n09287968 +n09289331 +n09289596 +n09290444 +n09295946 +n09300306 +n09302616 +n09303008 +n09303528 +n09304750 +n09305031 +n09308572 +n09309292 +n09315159 +n09326662 +n09335693 +n09335809 +n09336555 +n09337253 +n09344198 +n09352849 +n09359803 +n09362945 +n09366017 +n09366317 +n09375606 +n09376526 +n09381242 +n09393605 +n09396465 +n09398677 +n09405787 +n09406793 +n09409512 +n09409752 +n09410224 +n09416076 +n09421799 +n09428628 +n09432990 +n09433442 +n09437454 +n09439213 +n09443641 +n09453008 +n09458269 +n09472413 +n09474010 +n09505153 +n09606009 +n09606527 +n09608709 +n09610405 +n09613191 +n09615336 +n09616922 +n09619168 +n09619452 +n09620078 +n09620794 +n09622049 +n09622302 +n09624168 +n09624559 +n09625401 +n09626238 +n09627906 +n09629246 +n09629752 +n09631129 +n09632274 +n09632518 +n09633969 +n09636339 +n09638875 +n09639919 +n09641002 +n09644152 +n09648743 +n09651123 +n09665545 +n09669631 +n09670280 +n09676884 +n09679925 +n09690208 +n09694771 +n09696585 +n09697401 +n09700964 +n09701148 +n09701833 +n09705124 +n09708750 +n09710164 +n09716047 +n09718217 +n09722658 +n09724785 +n09725229 +n09725772 +n09726621 +n09727440 +n09727826 +n09730204 +n09731436 +n09731571 +n09735258 +n09738400 +n09744679 +n09754217 +n09758173 +n09758885 +n09761068 +n09763784 +n09764201 +n09764598 +n09765278 +n09767197 +n09769076 +n09770179 +n09771435 +n09772746 +n09773962 +n09774783 +n09790482 +n09792555 +n09795124 +n09795334 +n09800964 +n09802445 +n09802641 +n09805151 +n09805475 +n09809538 +n09809749 +n09810166 +n09811712 +n09814660 +n09815790 +n09816771 +n09818022 +n09820263 +n09821831 +n09823502 +n09824135 +n09824609 +n09826204 +n09830194 +n09831962 +n09834699 +n09836160 +n09840217 +n09841188 +n09841515 +n09841696 +n09842047 +n09848489 +n09851575 +n09853645 +n09853881 +n09854915 +n09857007 +n09861946 +n09865398 +n09868270 +n09871681 +n09877951 +n09889691 +n09892693 +n09894654 +n09895222 +n09895701 +n09902353 +n09903153 +n09910374 +n09917593 +n09918248 +n09923418 +n09923673 +n09924996 +n09927089 +n09927451 +n09928136 +n09928451 +n09929298 +n09930257 +n09930876 +n09931640 +n09933098 +n09935434 +n09936892 +n09937056 +n09941964 +n09942970 +n09943239 +n09943811 +n09944160 +n09945319 +n09950457 +n09951070 +n09951274 +n09960688 +n09962966 +n09964411 +n09968845 +n09974648 +n09976728 +n09979321 +n09983572 +n09989502 +n09990415 +n09991867 +n09992538 +n09992837 +n09993252 +n09994673 +n09996481 +n09997622 +n10001217 +n10006748 +n10007684 +n10009484 +n10009671 +n10015215 +n10015897 +n10017422 +n10018861 +n10020890 +n10024362 +n10029068 +n10034201 +n10034614 +n10035952 +n10036266 +n10036929 +n10037385 +n10040945 +n10041887 +n10042690 +n10043643 +n10044879 +n10047459 +n10048367 +n10048836 +n10052694 +n10053808 +n10054657 +n10055730 +n10055847 +n10060175 +n10067968 +n10070711 +n10077593 +n10078131 +n10078806 +n10079399 +n10079893 +n10080869 +n10083823 +n10084043 +n10084295 +n10086383 +n10091651 +n10092488 +n10093475 +n10094584 +n10095869 +n10098710 +n10098862 +n10099375 +n10101634 +n10102800 +n10105085 +n10107303 +n10109662 +n10111903 +n10112129 +n10118844 +n10126177 +n10126424 +n10126708 +n10127689 +n10129825 +n10134396 +n10134982 +n10136959 +n10142747 +n10142946 +n10143172 +n10143725 +n10145340 +n10145774 +n10148305 +n10150071 +n10150940 +n10151570 +n10153594 +n10154186 +n10154601 +n10155849 +n10162194 +n10164233 +n10165448 +n10168183 +n10168584 +n10171567 +n10182190 +n10185793 +n10186774 +n10187130 +n10195593 +n10200781 +n10202624 +n10205457 +n10206173 +n10207169 +n10210137 +n10215623 +n10216106 +n10224578 +n10225219 +n10228278 +n10235385 +n10237069 +n10241300 +n10243664 +n10245639 +n10249270 +n10249459 +n10249950 +n10257221 +n10259348 +n10263411 +n10266328 +n10266848 +n10271677 +n10273064 +n10274815 +n10276045 +n10282672 +n10284064 +n10284965 +n10296444 +n10299250 +n10299700 +n10305635 +n10305802 +n10306004 +n10308732 +n10312287 +n10314054 +n10315561 +n10316360 +n10317007 +n10317500 +n10320863 +n10321340 +n10322238 +n10323999 +n10324560 +n10328437 +n10332385 +n10335246 +n10335931 +n10340312 +n10341573 +n10343554 +n10345100 +n10353016 +n10353355 +n10355142 +n10355449 +n10355688 +n10356450 +n10357613 +n10360747 +n10366966 +n10369528 +n10370381 +n10376523 +n10377021 +n10379376 +n10380672 +n10383816 +n10386984 +n10387196 +n10387324 +n10393909 +n10396106 +n10399130 +n10400998 +n10402824 +n10403876 +n10405694 +n10407954 +n10409752 +n10411551 +n10415037 +n10417551 +n10418101 +n10419047 +n10420031 +n10421016 +n10426454 +n10427764 +n10428004 +n10433737 +n10435716 +n10435988 +n10438172 +n10439851 +n10444194 +n10450303 +n10462860 +n10464052 +n10466918 +n10467179 +n10470779 +n10474064 +n10474645 +n10478960 +n10481268 +n10482054 +n10482921 +n10484858 +n10488309 +n10495421 +n10499355 +n10499857 +n10506544 +n10508710 +n10512372 +n10512708 +n10519494 +n10521100 +n10521662 +n10522035 +n10522324 +n10522759 +n10523341 +n10525134 +n10525436 +n10525617 +n10527334 +n10529231 +n10541833 +n10542888 +n10543161 +n10544232 +n10544748 +n10546633 +n10548537 +n10548681 +n10554846 +n10556518 +n10557854 +n10559288 +n10560637 +n10568200 +n10570019 +n10575787 +n10576962 +n10577284 +n10580535 +n10582746 +n10583387 +n10594147 +n10595164 +n10595647 +n10599806 +n10602985 +n10604634 +n10605253 +n10610465 +n10612210 +n10614629 +n10617193 +n10618685 +n10618848 +n10619642 +n10620758 +n10622053 +n10624074 +n10624310 +n10625860 +n10628644 +n10630188 +n10632576 +n10633450 +n10648237 +n10648696 +n10654932 +n10657835 +n10661002 +n10661563 +n10665698 +n10669991 +n10674130 +n10676018 +n10679174 +n10682953 +n10686073 +n10692883 +n10693824 +n10694258 +n10698368 +n10700201 +n10700640 +n10701180 +n10703336 +n10703692 +n10705615 +n10707233 +n10708454 +n10709529 +n10713686 +n10720453 +n10721321 +n10722575 +n10722965 +n10726786 +n10735298 +n10740868 +n10741152 +n10742997 +n10744164 +n10747119 +n10751265 +n10752480 +n10759151 +n10759982 +n10763383 +n10763620 +n10765679 +n10766260 +n10768903 +n10779610 +n10780632 +n10782791 +n10782940 +n10787470 +n10791221 +n10792335 +n10793570 +n10794014 +n11531193 +n11537327 +n11542640 +n11545524 +n11545714 +n11547562 +n11547855 +n11552386 +n11553240 +n11596108 +n11598686 +n11600372 +n11601177 +n11601918 +n11608250 +n11609475 +n11609684 +n11612923 +n11614250 +n11618861 +n11620673 +n11621029 +n11623105 +n11624531 +n11627168 +n11628456 +n11630017 +n11630489 +n11643835 +n11645914 +n11647306 +n11649878 +n11650558 +n11650759 +n11661372 +n11665372 +n11666854 +n11669921 +n11672400 +n11674332 +n11676500 +n11684264 +n11689483 +n11693981 +n11697560 +n11700864 +n11703669 +n11708658 +n11709674 +n11713164 +n11720353 +n11722982 +n11723770 +n11725015 +n11725623 +n11727091 +n11729478 +n11733054 +n11736694 +n11741350 +n11745817 +n11747468 +n11748002 +n11751765 +n11752578 +n11756092 +n11756669 +n11759224 +n11763625 +n11767354 +n11769621 +n11771539 +n11774513 +n11775340 +n11779300 +n11782036 +n11783920 +n11785668 +n11789438 +n11789962 +n11790788 +n11793779 +n11794519 +n11796005 +n11801392 +n11805956 +n11807108 +n11807979 +n11808721 +n11811473 +n11815491 +n11817914 +n11820965 +n11823043 +n11830714 +n11830906 +n11832214 +n11836722 +n11839568 +n11845557 +n11851578 +n11855274 +n11857696 +n11862835 +n11865071 +n11866248 +n11868814 +n11869351 +n11869689 +n11872146 +n11875691 +n11875938 +n11877473 +n11878283 +n11887119 +n11890022 +n11892637 +n11894327 +n11898639 +n11900569 +n11902709 +n11915214 +n11915658 +n11915899 +n11916467 +n11918286 +n11919447 +n11920498 +n11924445 +n11928352 +n11928858 +n11931918 +n11932745 +n11939699 +n11940006 +n11943407 +n11944196 +n11945367 +n11946727 +n11947251 +n11948264 +n11950345 +n11951511 +n11952346 +n11953884 +n11954484 +n11956850 +n11965627 +n11967744 +n11970101 +n11971248 +n11971783 +n11972759 +n11973341 +n11976170 +n11977303 +n11978233 +n11982115 +n11985053 +n11985739 +n11988893 +n11991263 +n11997032 +n11997969 +n12006766 +n12008252 +n12008749 +n12010628 +n12013511 +n12015959 +n12018760 +n12020507 +n12024176 +n12030654 +n12034141 +n12036067 +n12036939 +n12041446 +n12043444 +n12045860 +n12050959 +n12053405 +n12056217 +n12057447 +n12062468 +n12065316 +n12065777 +n12075151 +n12076577 +n12080395 +n12083591 +n12086012 +n12086539 +n12087961 +n12090890 +n12092262 +n12094244 +n12095020 +n12096395 +n12101870 +n12102133 +n12105125 +n12107970 +n12108432 +n12109827 +n12110778 +n12112008 +n12112918 +n12113657 +n12117017 +n12119099 +n12119238 +n12121033 +n12124627 +n12126360 +n12131550 +n12135898 +n12136720 +n12137120 +n12137569 +n12139575 +n12141495 +n12142085 +n12143676 +n12144313 +n12146311 +n12147226 +n12152532 +n12153580 +n12154773 +n12155583 +n12156819 +n12157056 +n12157769 +n12158031 +n12158798 +n12159055 +n12159555 +n12160490 +n12161285 +n12163035 +n12164363 +n12166424 +n12168565 +n12170585 +n12173664 +n12174311 +n12174926 +n12182049 +n12187663 +n12188289 +n12195391 +n12196129 +n12199266 +n12201580 +n12202936 +n12205694 +n12214789 +n12215579 +n12217453 +n12221191 +n12224978 +n12225349 +n12226932 +n12231192 +n12236546 +n12237486 +n12244153 +n12245695 +n12246232 +n12252168 +n12252866 +n12253229 +n12256112 +n12257570 +n12260799 +n12262553 +n12265394 +n12266217 +n12266796 +n12268246 +n12269241 +n12269652 +n12271643 +n12274630 +n12275489 +n12281241 +n12284262 +n12286826 +n12287642 +n12288823 +n12290748 +n12293723 +n12296432 +n12300840 +n12302071 +n12303462 +n12305475 +n12306717 +n12307756 +n12310349 +n12316444 +n12318378 +n12320010 +n12322501 +n12328398 +n12330469 +n12334293 +n12334891 +n12335483 +n12335664 +n12335800 +n12340383 +n12341542 +n12342299 +n12343480 +n12344283 +n12346578 +n12350758 +n12352287 +n12355760 +n12360108 +n12360684 +n12364604 +n12367611 +n12374418 +n12377198 +n12381511 +n12385429 +n12387633 +n12387839 +n12392070 +n12396924 +n12399132 +n12401335 +n12401684 +n12405714 +n12409231 +n12411461 +n12412355 +n12412606 +n12416423 +n12419037 +n12420535 +n12421467 +n12421683 +n12425281 +n12430198 +n12431434 +n12437513 +n12437769 +n12441958 +n12446200 +n12446519 +n12449296 +n12450344 +n12451915 +n12454159 +n12459629 +n12460697 +n12461466 +n12462032 +n12463743 +n12464476 +n12466727 +n12470092 +n12474167 +n12475035 +n12476510 +n12480895 +n12491826 +n12495146 +n12499163 +n12506181 +n12508309 +n12509476 +n12511856 +n12516584 +n12522188 +n12524188 +n12526516 +n12527738 +n12539074 +n12539306 +n12546183 +n12548280 +n12550210 +n12554526 +n12556656 +n12560282 +n12560775 +n12562577 +n12572546 +n12573256 +n12575322 +n12582231 +n12582665 +n12582846 +n12583126 +n12583401 +n12584191 +n12586298 +n12590232 +n12594989 +n12595964 +n12602262 +n12602980 +n12612170 +n12614477 +n12615710 +n12620196 +n12622875 +n12624381 +n12625383 +n12631331 +n12633638 +n12634211 +n12634429 +n12635744 +n12636885 +n12638218 +n12638556 +n12639736 +n12640607 +n12641413 +n12641931 +n12642200 +n12643473 +n12644902 +n12645174 +n12647376 +n12649065 +n12650556 +n12651821 +n12653218 +n12655869 +n12658118 +n12658846 +n12659356 +n12660601 +n12662772 +n12663804 +n12665048 +n12667406 +n12667964 +n12674120 +n12674685 +n12682411 +n12683407 +n12685431 +n12685831 +n12688716 +n12690653 +n12695144 +n12698435 +n12705013 +n12707781 +n12708293 +n12709901 +n12711596 +n12713063 +n12714755 +n12715914 +n12717072 +n12719684 +n12724942 +n12725521 +n12727301 +n12731401 +n12732491 +n12732756 +n12733647 +n12741222 +n12742741 +n12743823 +n12746884 +n12749049 +n12752205 +n12755225 +n12756457 +n12762896 +n12768369 +n12771192 +n12772753 +n12777436 +n12778605 +n12779603 +n12785724 +n12791064 +n12793015 +n12794985 +n12798284 +n12800586 +n12801520 +n12805146 +n12806732 +n12810595 +n12812235 +n12814643 +n12817464 +n12822769 +n12823717 +n12823859 +n12832315 +n12833985 +n12834798 +n12836212 +n12836862 +n12839979 +n12840749 +n12842302 +n12842887 +n12844939 +n12849061 +n12853080 +n12854048 +n12858150 +n12866968 +n12869478 +n12870535 +n12871272 +n12877244 +n12878169 +n12879963 +n12882779 +n12884260 +n12890265 +n12893463 +n12903367 +n12904938 +n12908645 +n12909421 +n12912670 +n12917901 +n12922763 +n12926480 +n12928071 +n12929403 +n12930778 +n12931906 +n12934036 +n12934479 +n12939104 +n12941536 +n12942395 +n12943443 +n12946849 +n12950126 +n12952165 +n12953206 +n12956170 +n12957608 +n12960378 +n12960863 +n12965626 +n12968136 +n12969131 +n12970193 +n12971400 +n12973791 +n12974987 +n12976198 +n12980840 +n12982468 +n12983961 +n12985773 +n12987056 +n12988158 +n12992868 +n12997654 +n12997919 +n13000891 +n13001041 +n13001206 +n13001366 +n13001529 +n13002750 +n13002925 +n13003061 +n13003254 +n13003522 +n13003712 +n13004423 +n13004640 +n13004826 +n13004992 +n13005329 +n13005984 +n13006171 +n13006631 +n13006894 +n13007417 +n13007629 +n13008157 +n13008315 +n13008485 +n13008689 +n13008839 +n13009085 +n13009244 +n13009429 +n13009656 +n13010694 +n13010951 +n13011221 +n13012253 +n13012469 +n13012973 +n13013534 +n13013764 +n13013965 +n13014097 +n13014265 +n13014409 +n13014581 +n13014741 +n13014879 +n13017102 +n13017240 +n13017439 +n13017610 +n13017789 +n13017979 +n13018088 +n13018232 +n13018407 +n13019496 +n13019643 +n13019835 +n13020191 +n13020481 +n13020964 +n13021166 +n13021332 +n13021543 +n13021689 +n13021867 +n13022210 +n13022709 +n13024012 +n13024500 +n13025647 +n13028611 +n13032115 +n13032923 +n13035241 +n13035389 +n13035707 +n13037585 +n13037805 +n13038068 +n13038376 +n13038744 +n13039349 +n13040629 +n13040796 +n13041312 +n13042982 +n13043926 +n13045210 +n13045975 +n13046130 +n13049953 +n13055423 +n13055577 +n13055792 +n13055949 +n13056135 +n13056349 +n13056607 +n13056799 +n13057054 +n13057242 +n13057422 +n13057639 +n13058037 +n13058272 +n13058608 +n13059298 +n13059657 +n13060017 +n13060190 +n13063269 +n13066129 +n13067191 +n13068917 +n13070308 +n13070875 +n13071371 +n13071553 +n13071815 +n13072031 +n13072209 +n13072350 +n13072528 +n13072706 +n13072863 +n13073055 +n13073703 +n13074619 +n13074814 +n13075020 +n13075272 +n13075441 +n13075684 +n13075847 +n13076041 +n13076405 +n13076643 +n13076831 +n13077033 +n13077295 +n13079419 +n13083023 +n13084184 +n13085113 +n13091620 +n13091774 +n13100156 +n13100677 +n13104059 +n13108131 +n13108662 +n13108841 +n13109733 +n13110915 +n13111174 +n13111881 +n13118707 +n13119870 +n13120211 +n13121104 +n13122364 +n13123431 +n13125117 +n13130161 +n13130726 +n13132034 +n13132338 +n13132486 +n13132940 +n13134302 +n13134947 +n13135832 +n13136316 +n13136556 +n13137409 +n13137672 +n13138308 +n13138658 +n13138842 +n13139055 +n13139647 +n13141141 +n13145444 +n13149296 +n13150894 +n13154841 +n13156986 +n13157137 +n13160831 +n13163991 +n13172923 +n13174670 +n13177529 +n13180534 +n13186388 +n13188096 +n13188268 +n13192625 +n13193642 +n13194572 +n13195761 +n13199970 +n13201969 +n13206817 +n13207736 +n13208705 +n13211790 +n13219422 +n13221529 +n13224673 +n13230662 +n13231678 +n13231919 +n13232106 +n13232363 +n13232779 +n13233727 +n13238375 +n13238988 +n13252672 +n13862780 +n13863186 +n13863473 +n13863771 +n13864153 +n13864965 +n13865298 +n13865483 +n13866144 +n13866827 +n13867492 +n13868248 +n13868371 +n13872592 +n13873502 +n13875392 +n13875571 +n13878306 +n13879320 +n13883603 +n13888491 +n13893786 +n13894434 +n13896100 +n13897996 +n13900287 +n13903079 +n13905121 +n13905275 +n13905792 +n13912260 +n13915999 +n14633206 +n14696793 +n14844693 +n14853210 +n14899328 +n14900184 +n14974264 +n14977504 +n14992287 +n15062057 +n15067877 +n15089258 +n15089472 +n15089645 +n15089803 +n15090742 +n15092409 +n15092751 \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/imagenet.shortnames.list b/workloads/realworld/uvm_prefetch/darknet/cfg/imagenet.shortnames.list new file mode 100644 index 0000000000000000000000000000000000000000..e7a18d44b543086958eaf60e6dc0b7deb0df9400 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/imagenet.shortnames.list @@ -0,0 +1,21842 @@ +kit fox +English setter +Siberian husky +Australian terrier +English springer +grey whale +lesser panda +Egyptian cat +ibex +Persian cat +cougar +gazelle +porcupine +sea lion +malamute +badger +Great Dane +Walker hound +Welsh springer spaniel +whippet +Scottish deerhound +killer whale +mink +African elephant +Weimaraner +soft-coated wheaten terrier +Dandie Dinmont +red wolf +Old English sheepdog +jaguar +otterhound +bloodhound +Airedale +hyena +meerkat +giant schnauzer +titi +three-toed sloth +sorrel +black-footed ferret +dalmatian +black-and-tan coonhound +papillon +skunk +Staffordshire bullterrier +Mexican hairless +Bouvier des Flandres +weasel +miniature poodle +Cardigan +malinois +bighorn +fox squirrel +colobus +tiger cat +Lhasa +impala +coyote +Yorkshire terrier +Newfoundland +brown bear +red fox +Norwegian elkhound +Rottweiler +hartebeest +Saluki +grey fox +schipperke +Pekinese +Brabancon griffon +West Highland white terrier +Sealyham terrier +guenon +mongoose +indri +tiger +Irish wolfhound +wild boar +EntleBucher +zebra +ram +French bulldog +orangutan +basenji +leopard +Bernese mountain dog +Maltese dog +Norfolk terrier +toy terrier +vizsla +cairn +squirrel monkey +groenendael +clumber +Siamese cat +chimpanzee +komondor +Afghan hound +Japanese spaniel +proboscis monkey +guinea pig +white wolf +ice bear +gorilla +borzoi +toy poodle +Kerry blue terrier +ox +Scotch terrier +Tibetan mastiff +spider monkey +Doberman +Boston bull +Greater Swiss Mountain dog +Appenzeller +Shih-Tzu +Irish water spaniel +Pomeranian +Bedlington terrier +warthog +Arabian camel +siamang +miniature schnauzer +collie +golden retriever +Irish terrier +affenpinscher +Border collie +hare +boxer +silky terrier +beagle +Leonberg +German short-haired pointer +patas +dhole +baboon +macaque +Chesapeake Bay retriever +bull mastiff +kuvasz +capuchin +pug +curly-coated retriever +Norwich terrier +flat-coated retriever +hog +keeshond +Eskimo dog +Brittany spaniel +standard poodle +Lakeland terrier +snow leopard +Gordon setter +dingo +standard schnauzer +hamster +Tibetan terrier +Arctic fox +wire-haired fox terrier +basset +water buffalo +American black bear +Angora +bison +howler monkey +hippopotamus +chow +giant panda +American Staffordshire terrier +Shetland sheepdog +Great Pyrenees +Chihuahua +tabby +marmoset +Labrador retriever +Saint Bernard +armadillo +Samoyed +bluetick +redbone +polecat +marmot +kelpie +gibbon +llama +miniature pinscher +wood rabbit +Italian greyhound +lion +cocker spaniel +Irish setter +dugong +Indian elephant +beaver +Sussex spaniel +Pembroke +Blenheim spaniel +Madagascar cat +Rhodesian ridgeback +lynx +African hunting dog +langur +Ibizan hound +timber wolf +cheetah +English foxhound +briard +sloth bear +Border terrier +German shepherd +otter +koala +tusker +echidna +wallaby +platypus +wombat +revolver +umbrella +schooner +soccer ball +accordion +ant +starfish +chambered nautilus +grand piano +laptop +strawberry +airliner +warplane +airship +balloon +space shuttle +fireboat +gondola +speedboat +lifeboat +canoe +yawl +catamaran +trimaran +container ship +liner +pirate +aircraft carrier +submarine +wreck +half track +tank +missile +bobsled +dogsled +bicycle-built-for-two +mountain bike +freight car +passenger car +barrow +shopping cart +motor scooter +forklift +electric locomotive +steam locomotive +amphibian +ambulance +beach wagon +cab +convertible +jeep +limousine +minivan +Model T +racer +sports car +go-kart +golfcart +moped +snowplow +fire engine +garbage truck +pickup +tow truck +trailer truck +moving van +police van +recreational vehicle +streetcar +snowmobile +tractor +mobile home +tricycle +unicycle +horse cart +jinrikisha +oxcart +bassinet +cradle +crib +four-poster +bookcase +china cabinet +medicine chest +chiffonier +table lamp +file +park bench +barber chair +throne +folding chair +rocking chair +studio couch +toilet seat +desk +pool table +dining table +entertainment center +wardrobe +Granny Smith +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +acorn +hip +ear +rapeseed +corn +buckeye +organ +upright +chime +drum +gong +maraca +marimba +steel drum +banjo +cello +violin +harp +acoustic guitar +electric guitar +cornet +French horn +trombone +harmonica +ocarina +panpipe +bassoon +oboe +sax +flute +daisy +yellow lady's slipper +cliff +valley +alp +volcano +promontory +sandbar +coral reef +lakeside +seashore +geyser +hatchet +cleaver +letter opener +plane +power drill +lawn mower +hammer +corkscrew +can opener +plunger +screwdriver +shovel +plow +chain saw +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +white stork +black stork +spoonbill +flamingo +American egret +little blue heron +bittern +crane +limpkin +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +European gallinule +pelican +king penguin +albatross +great white shark +tiger shark +hammerhead +electric ray +stingray +barracouta +coho +tench +goldfish +eel +rock beauty +anemone fish +lionfish +puffer +sturgeon +gar +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +triceratops +African crocodile +American alligator +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +whistle +wing +paintbrush +hand blower +oxygen mask +snorkel +loudspeaker +microphone +screen +mouse +electric fan +oil filter +strainer +space heater +stove +guillotine +barometer +rule +odometer +scale +analog clock +digital clock +wall clock +hourglass +sundial +parking meter +stopwatch +digital watch +stethoscope +syringe +magnetic compass +binoculars +projector +sunglasses +loupe +radio telescope +bow +cannon +assault rifle +rifle +projectile +computer keyboard +typewriter keyboard +crane +lighter +abacus +cash machine +slide rule +desktop computer +hand-held computer +notebook +web site +harvester +thresher +printer +slot +vending machine +sewing machine +joystick +switch +hook +car wheel +paddlewheel +pinwheel +potter's wheel +gas pump +carousel +swing +reel +radiator +puck +hard disc +sunglass +pick +car mirror +solar dish +remote control +disk brake +buckle +hair slide +knot +combination lock +padlock +nail +safety pin +screw +muzzle +seat belt +ski +candle +jack-o'-lantern +spotlight +torch +neck brace +pier +tripod +maypole +mousetrap +spider web +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +isopod +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +sea urchin +sea cucumber +iron +espresso maker +microwave +Dutch oven +rotisserie +toaster +waffle iron +vacuum +dishwasher +refrigerator +washer +Crock Pot +frying pan +wok +caldron +coffeepot +teapot +spatula +altar +triumphal arch +patio +steel arch bridge +suspension bridge +viaduct +barn +greenhouse +palace +monastery +library +apiary +boathouse +church +mosque +stupa +planetarium +restaurant +cinema +home theater +lumbermill +coil +obelisk +totem pole +castle +prison +grocery store +bakery +barbershop +bookshop +butcher shop +confectionery +shoe shop +tobacco shop +toyshop +fountain +cliff dwelling +yurt +dock +brass +megalith +bannister +breakwater +dam +chainlink fence +picket fence +worm fence +stone wall +grille +sliding door +turnstile +mountain tent +scoreboard +honeycomb +plate rack +pedestal +beacon +mashed potato +bell pepper +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +cardoon +mushroom +shower curtain +jean +carton +handkerchief +sandal +ashcan +safe +plate +necklace +croquet ball +fur coat +thimble +pajama +running shoe +cocktail shaker +chest +manhole cover +modem +tub +tray +balance beam +bagel +prayer rug +kimono +hot pot +whiskey jug +knee pad +book jacket +spindle +ski mask +beer bottle +crash helmet +bottlecap +tile roof +mask +maillot +Petri dish +football helmet +bathing cap +teddy +holster +pop bottle +photocopier +vestment +crossword puzzle +golf ball +trifle +suit +water tower +feather boa +cloak +red wine +drumstick +shield +Christmas stocking +hoopskirt +menu +stage +bonnet +meat loaf +baseball +face powder +scabbard +sunscreen +beer glass +hen-of-the-woods +guacamole +lampshade +wool +hay +bow tie +mailbag +water jug +bucket +dishrag +soup bowl +eggnog +mortar +trench coat +paddle +chain +swab +mixing bowl +potpie +wine bottle +shoji +bulletproof vest +drilling platform +binder +cardigan +sweatshirt +pot +birdhouse +hamper +ping-pong ball +pencil box +pay-phone +consomme +apron +punching bag +backpack +groom +bearskin +pencil sharpener +broom +mosquito net +abaya +mortarboard +poncho +crutch +Polaroid camera +space bar +cup +racket +traffic light +quill +radio +dough +cuirass +military uniform +lipstick +shower cap +monitor +oscilloscope +mitten +brassiere +French loaf +vase +milk can +rugby ball +paper towel +earthstar +envelope +miniskirt +cowboy hat +trolleybus +perfume +bathtub +hotdog +coral fungus +bullet train +pillow +toilet tissue +cassette +carpenter's kit +ladle +stinkhorn +lotion +hair spray +academic gown +dome +crate +wig +burrito +pill bottle +chain mail +theater curtain +window shade +barrel +washbasin +ballpoint +basketball +bath towel +cowboy boot +gown +window screen +agaric +cellular telephone +nipple +barbell +mailbox +lab coat +fire screen +minibus +packet +maze +pole +horizontal bar +sombrero +pickelhaube +rain barrel +wallet +cassette player +comic book +piggy bank +street sign +bell cote +fountain pen +Windsor tie +volleyball +overskirt +sarong +purse +bolo tie +bib +parachute +sleeping bag +television +swimming trunks +measuring cup +espresso +pizza +breastplate +shopping basket +wooden spoon +saltshaker +chocolate sauce +ballplayer +goblet +gyromitra +stretcher +water bottle +dial telephone +soap dispenser +jersey +school bus +jigsaw puzzle +plastic bag +reflex camera +diaper +Band Aid +ice lolly +velvet +tennis ball +gasmask +doormat +Loafer +ice cream +pretzel +quilt +maillot +tape player +clog +iPod +bolete +scuba diver +pitcher +matchstick +bikini +sock +CD player +lens cap +thatch +vault +beaker +bubble +cheeseburger +parallel bars +flagpole +coffee mug +rubber eraser +stole +carbonara +dumbbell +singles +Virginia deer +eastern grey squirrel +gelding +pylon +table-tennis table +peacock +Segway +surfing +tamandua +knocker +steering wheel +motorcycling +coati +sitar +range +backhoe +agaric +dashboard +water polo +concrete mixer +treadmill +golf bag +skateboarding +royal tennis +tartan +four-wheel drive +sport utility +sedan +print +luggage rack +softball +windmill +ben +red admiral +jalousie +towel rail +truss +strand +ice hockey +sconce +wind turbine +plush +stained-glass window +ballpark +thoroughbred +love seat +red-spotted purple +miller +Adelie +freight liner +clock tower +acrobatics +shaving brush +ewe +ottoman +African violet +bicycle wheel +cork +windmill +satin +comma +coffee mill +baggage +wasp's nest +batting glove +Ferris wheel +push-bike +porthole +football stadium +gas tank +barbecue +handlebar +hula-hoop +fairground +rapier +garter stitch +exercise bike +control tower +carryall +minute hand +cog +riverbank +water nymph +common dandelion +android +hairbrush +redberry +fret +display window +pepper mill +litterbin +drapery +ducking +fly-fishing +broad jump +sprinkler +water-skiing +chicory +sail +volleyball +rugby +Texas bluebonnet +computer monitor +tortoiseshell +airplane propeller +solar array +figure skating +air conditioner +purple loosestrife +gearshift +outboard motor +cowslip +Abyssinian +dip +workstation +cosy +bunker +neon lamp +campanile +casket +verbena +amphora +sumo +common foxglove +sprocket +jelly bean +emperor penguin +night-blooming cereus +clock radio +black birch +bomber jacket +Virginia bluebell +bayonet +walker +altarpiece +tattoo +bridle +rocker arm +water turkey +spiderwort +flange +mute swan +laser printer +carburetor +coverlet +mountainside +baritone +auto racing +baluster +gal +peach bells +taffeta +grandfather +asparagus +horizontal stabilizer +world +grate +marsh marigold +white rhinoceros +movement +split rail +rollerblading +longhorn +muffler +church tower +light bulb +American agave +backpacking tent +overall +New World goldfinch +sectional +wing chair +transom +integrated circuit +dad +spar +picture frame +no-hit game +alternator +drill press +strawflower +hepatica +rangefinder +blinker +Welsh pony +nib +wagon wheel +rotor +tie +denim +jetliner +sculling +external drive +window frame +mourning dove +censer +stapler +batting helmet +flagon +machete +windshield +hedgehog +weeping willow +chief executive officer +hepatica +pet +Asiatic black bear +chinchilla +uke +Atlantic bottlenose dolphin +hair +dishtowel +flintlock +Bermuda shorts +lavender +searchlight +millwheel +piano keyboard +luna moth +bumper +parrot +skirt +manhole +coffee table +footstool +judo +Dalai Lama +armored personnel carrier +voile +saber +thoroughbred +wild carrot +gemsbok +caster +butterfly orchid +cow +sideboard +horseshoe crab +match play +cassette recorder +photomicrograph +drafting table +pediment +tramline +shipping +kitten +wainscoting +fried rice +helix +marguerite +pumpkin +white-bellied swallow +Tulipa gesneriana +common dolphin +face +red squirrel +bicycling +shipwreck +banded purple +cornice +pendant earring +forsythia +aardvark +seashell +spat +shoulder bag +fallow deer +yearling +common teasel +tufted titmouse +ancient +professional golf +purl +vehicle +okra +great grandmother +common lilac +rose mallow +newspaper +crucifix +chukka +armlet +fulmar +wapiti +doily +Greco-Roman wrestling +bleeding heart +kitchen table +bluebonnet +Cape buffalo +spun yarn +crape myrtle +dewdrop +great blue heron +medalist +vaulting horse +spinning wheel +skyscraper +Tahitian +forget-me-not +watercourse +guitarist +gargoyle +bee balm +pumpkin +hunting knife +flutist +lectern +skateboarder +foil +pant leg +hedge sparrow +dresser +automatic pistol +chicory +dialog box +chamberpot +black rhinoceros +fireweed +half-mast +pillow sham +pavilion +scarf joint +microprocessor +filly +dressing gown +shell +Arabian +child +radio antenna +butterweed +morris dancer +sparrow hawk +groom +brioche +floret +rainbow +earthworm +cellist +tine +toupee +balldress +map +angel's trumpet +ruin +fur +pronghorn +speed skating +used-car +stick +early spider orchid +stuffed peppers +snowdrift +flats +least sandpiper +stick +console table +ventilator +portable +kepi +pylon +viceroy +shoreline +Olympian Zeus +pestle +great-niece +life +air compressor +fanjet +scuba diving +fieldfare +tree swallow +personnel carrier +night-blooming cereus +sonogram +assembly hall +circuit breaker +chair +speed skate +soapwort +worsted +raspberry +burlap +flat panel display +Pyracantha +cemetery +turban +deer hunting +bottle green +dandelion green +pieta +aigrette +turntable +cover girl +clutch bag +kiwi +pea jacket +color guard +Malay +shire +crock +french fries +credenza +hockey stick +mourning cloak +potty seat +glass +balsamroot +medal play +red clover +gravy boat +garter belt +Guinness +meadow buttercup +jackass penguin +coursing +tooth +hawfinch +housetop +fluorescent lamp +black-backed gull +bookshelf +earplug +millipede +fawn +baseball bat +soup-strainer +organ loft +bugloss +tomahawk +blackcap +black-necked stilt +hand truck +bedstead +tempura +rose window +crimson +snow thrower +lesser whitethroat +palomino +ball +staff sergeant +wicker +garbage heap +great-nephew +parquet +coupe +nave +eggs Benedict +damask +flush toilet +Angora +pedometer +control room +bristle brush +kookaburra +telephone booth +Windsor chair +red-winged blackbird +cinnamon roll +briefs +cloister +sundress +mammillaria +unicyclist +covered bridge +coelogyne +fairy bluebird +phoebe +beer mug +headstock +parhelion +gorse +common European dogwood +fire-eater +professional football +rock climbing +cyclamen +tin +marjoram +Japanese morning glory +pipe +smasher +hang glider +abutment +birdbath +jotter +litter +artist's model +butterfly bush +dining area +sausage dog +piggery +English sparrow +Turk's-cap +platinum blond +song sparrow +alarm clock +tortoiseshell +chaise longue +flintlock +academic costume +graffito +Arnica montana +adding machine +waterside +director +jonquil +pipefitting +stud +Swedish meatball +musk rose +Venus's flytrap +raven +bougainvillea +little brother +field bindweed +finder +white admiral +tinfoil +serval +sheet +carthorse +people +potto +stockroom +sphinx +slate roof +mountain laurel +majolica +coal black +repository +bufo +pique +binder +tread +attorney general +hydraulic press +videocassette recorder +bumper car +professional baseball +cow parsley +ern +blue peafowl +common hyacinth +jack-in-the-pulpit +ice hockey rink +sport +camper +tailback +flash +stacks +pulp +Christmas cactus +netball +calliandra +curler +large periwinkle +cobweb +forward +Roman arch +cross bun +stoneware +banana bread +cape jasmine +settle +tongue +frock +pepper shaker +pitching coach +CD-R +casing +faience +hand cream +CD-ROM +recliner +striped bass +clary +sketch +risotto +reticle +white clover +touch football +kitty +great-aunt +Japanese maple +sidecar +muscovy duck +hack +rope bridge +organist +stinging nettle +pocket watch +Indian pipe +amorphophallus +bird's-foot violet +caller ID +furnishing +carriageway +dish rack +heiress +nail polish +beldam +Dall sheep +teriyaki +stateroom +laughing gull +chow +bookmark +timer +toga virilis +deviled egg +coltsfoot +Papuan +native +cygnet +automation +portfolio +cabbage palm +cube +broiler +radish +broodmare +castor-oil plant +pith hat +talus +lass +thatch +common marigold +young buck +igloo +prairie rattlesnake +soccer player +spoke +place +slide fastener +tapestry +toy +headboard +cross-country skiing +harness +sconce +rim +ballet skirt +transvestite +saddlebag +common evening primrose +taillight +challah +willet +ready-to-wear +cloud +answering machine +waterfront +vane +granddaughter +Chinese gooseberry +tureen +cab +truffle +viola +bootlace +chemise +taro +petal +candied apple +soccer +miniature golf +front porch +asparagus +Sauvignon blanc +daisy fleabane +ceiling +slip-on +bottle-nosed whale +redbud +black squirrel +snowsuit +ribbing +gravestone +creme brulee +ambassador +local +archery +love-in-a-mist +garbage +thyme +night-blooming cereus +goshawk +cuckoopint +azure +German iris +salad bowl +puppy +cockhorse +giant clam +biplane +stele +necklet +sea otter +crest +door +reformer +comforter +Byelorussian +bottle +hemline +book bag +leotard +owlet +spoon +sari +bidet +Latin +reticulated python +bowling shoe +futon +gaiter +coypu +tea urn +waders +bangle +snowbank +pencil +porter +azalea +English lavender +red spruce +team sport +cruet +high-rise +O ring +vodka +cormorant +Canada thistle +clasp +showjumping +rattan +red fox +sun parlor +Charolais +Tommy gun +bird's foot trefoil +sedge warbler +knot +chives +car tire +steam engine +adapter +spirea +common allamanda +oyster shell +harbor seal +baobab +wick +plumbago +downy woodpecker +coconut +leash +kasbah +hour hand +upholstery +mallard +cricket bat +lady +kitchenware +right-hander +leopard +olive green +common valerian +blue whale +blackboard +redhead +periwinkle +fingerboard +hard hat +locker +breakfast table +capybara +beekeeper +harness +feeder +water hyacinth +hexapod +brown thrasher +percale +lever +patriarch +arete +book +book +senator +bunya bunya +couch +durian +common lady's-slipper +mountain ash +golden barrel cactus +bicycle seat +beret +pop +musk mallow +manatee +cotton candy +boxing glove +backboard +tongue +saguaro +playground +capitol +sanderling +wagtail +deputy +tractor +tap +lady's smock +noseband +worsted +radiotelephone +camisole +forelock +muscat +sweet scabious +crane fly +butterfly weed +chestnut +pinata +inositol +borage +aquatic +belly +broadcaster +gondolier +egg yolk +blush wine +bufflehead +rambutan +oleander +horse-trail +sea holly +yard bird +conference room +lacrosse +belted kingfisher +defile +extremum +whistle +bear cub +grainfield +potage +watermelon +lasagna +sheik +Cooper's hawk +bulb +basketball court +paella +cassette tape +scatter rug +kid +impala lily +Minnesotan +Sudanese +chocolate +tail +quack-quack +whistling swan +shoulder patch +frozen custard +sumo wrestler +smoothie +bock +meat grinder +latch +palisade +radial +sake +kestrel +corn chowder +airframe +electrician +reamer +metropolitan +cotton flannel +cassowary +crossbill +operating room +winter aconite +flute +Tasmanian devil +billboard +suds +kilt +aperitif +cooling tower +avocado +hooded merganser +coleslaw +bee balm +ladder-back +insurance broker +scaffolding +polo mallet +double bed +two-hitter +bluff +gamboge +baby +lawn chair +frond +pistol grip +fancy dress +marquetry +jambalaya +fireweed +Eurasian kingfisher +cue ball +ice plant +horseweed +rose moss +musher +sun +viscount +white-breasted nuthatch +gin and tonic +thermos +Kenyan +first-aid kit +four-wheeler +tourist +stairwell +Gambian +liqueur glass +hovercraft +cocktail dress +twin +coriander +blister pack +Barrow's goldeneye +canteen +irrigation ditch +great white heron +tree sparrow +canal boat +lens +food processor +common raccoon +Baltimore oriole +black-eyed Susan +bush hibiscus +corolla +sire +mustachio +professional wrestling +elk +clustered bellflower +pannier +musk ox +crapaud +animal trainer +rosebud +ring-necked pheasant +little egret +cappuccino +rocker +bristlecone pine +cheerleader +hedge violet +semaphore +central processing unit +speedskater +delivery truck +assembly +hedgehog cactus +bergenia +bull thistle +bladder campion +cinquefoil +inula +cellulose tape +main rotor +bootee +autogiro +ice +grey +meadow cranesbill +hummus +valise +chassis +mountain goat +blacktail prairie dog +Chardonnay +romper +street +shoveler +wood ibis +topiary +chalice +silo +circus acrobat +Rollerblade +cosmos +woof +heroine +cold cream +marabou +herb robert +garden lettuce +nymph +floor lamp +automobile engine +heel +radiator +seeded player +fedora +father-in-law +peahen +Bahamian +wiper +wood pigeon +barn owl +pegboard +chorus frog +kin +roller skate +stob +rosemary +cowbird +hortensia +cranberry sauce +shot glass +Dixie cup +gnu +fire alarm +diet +booster +oxeye daisy +twayblade +high-definition television +truss bridge +bunk bed +mule +blackbuck +facsimile +frog orchid +point-and-shoot camera +brocade +gazebo +prairie gentian +concert +paintball +Cognac +maid +afghan +barbecued spareribs +pintail +tramway +commissioner +finger-painting +beef stew +caftan +Aberdeen Angus +demonstrator +sea trout +pigtail +thrush nightingale +barbados cherry +sashimi +ridgeling +lamppost +gabardine +red-shouldered hawk +bath salts +cavern +cymbid +Haitian +boater +southern buckthorn +arctic +motorcycle cop +red gum +Clydesdale +Zamboni +beagling +villa +demitasse +Sheetrock +lollipop +hybrid petunia +post horse +carabiner +brussels sprouts +Durham +stylist +pothole +sleigh bed +scallop shell +harrier eagle +papaya +Japanese persimmon +sachet +wild rice +chipboard +gun enclosure +menorah +chinook +headset +white campion +ocean +Secretary of State +G-string +bone china +basil +greenish blue +camcorder +concrete +screech owl +trumpet honeysuckle +flugelhorn +layette +cattle egret +case knife +mandarin duck +robber fly +salwar +dressing table +doughnut +facade +runner +honeypot +surf casting +diver +angel's trumpet +spin dryer +chameleon +wand +snow +vitamin A1 +manageress +volleyball net +antiperspirant +street clothes +tree sparrow +cords +sundew +bricks and mortar +caryatid +bridesmaid +trestle bridge +eyepiece +celebrant +scarlet pimpernel +gas range +onion +green salad +squill +creepy-crawly +hunk +little owl +salad nicoise +earflap +bird feeder +spray gun +bunny +Cheops +amazon +blue tit +Nissen hut +Kalashnikov +skylark +kremlin +shoebill +shopping bag +frigate bird +telephoto lens +peplum +moss pink +echidna +wastepaper basket +wood ibis +workroom +ankle brace +telpherage +Michaelmas daisy +figure skate +swami +nylons +cardoon +cocotte +headstall +twin bed +parsley +dirndl +corn poppy +nut bread +cloche +light heavyweight +mayor +lip-gloss +punch bowl +pottage +mango +fledgling +mousse +four-wheel drive +barrel +banana boat +trouser +bathroom +Sauterne +ring +settee +lavaliere +safe-deposit +godson +leatherette +schoolmate +radish +hedge trimmer +dahlia +euphonium +palace +vaulter +singlet +slicer +Pilsner +cockateel +kangaroo paw +Cub Scout +master bedroom +hexagon +cenotaph +Barberton daisy +Netherlander +intersection +Korean +gravel +chandelier +hospital bed +flash memory +pier +whole wheat flour +maroon +pale ale +special +snow bunting +crinoline +dustpan +barrette +common wood sorrel +yolk +pothos +speakerphone +tendril +cabinetwork +farm horse +brake disk +streetlight +superhighway +bandsaw +panting +pressure cooker +girdle +old man +cereal bowl +felt +hurling +architecture +harmonium +chain +blueberry +cellar +smocking +scrub brush +tablespoon +sweet corn +graining +library +street +bill +felt-tip pen +monkshood +crowd +log cabin +newel post +hack +elephant seal +golden pothos +popcorn +outhouse +patch pocket +fish and chips +tape +wax plant +eaves +fried egg +emerald +tea cart +fan blade +daily +Bowie knife +rowing boat +leaf shape +man +crayon +trumpetfish +chipping sparrow +whiskey bottle +pillion +city hall +golden pheasant +cheerleader +creeping bugle +couch +Dumpster +Homo sapiens sapiens +cranberry juice +cockpit +demagogue +joinery +scrambled eggs +technician +sidewalk +sheep +keyhole +power line +polyanthus +roulette +first lieutenant +checkout +tabletop +nasturtium +schnapps +engineering +skateboard +ground fir +bouquet +bunk +resort area +fleur-de-lis +power steering +opera +Bolivian +Friesian +buckskins +bay +slider +frozen yogurt +cabin cruiser +saunterer +lean-to +fishing eagle +bog star +cantaloupe +mouth +music stand +fiddlestick +brilliantine +pinball machine +bairn +barred owl +bath oil +signorina +Mason jar +nymph +rubber band +garden nasturtium +razorbill +Japanese beetle +batting cage +trestle +borage +Secretary of the Interior +scanner +baguet +baseball cap +chow mein +pen +jewelweed +barbet +chasm +pectoral sandpiper +holster +glasses case +sand +crevice +Kickapoo +snowboard +locket +satchel +tankard +alpinist +moorhen +cow pen +whooper +crown +chain +silversword +wild geranium +hi-fi +Tibetan +waterwheel +bee orchid +ruby-crowned kinglet +common broom +tabloid +javelin +sauna +klammath weed +zebra finch +spider orchid +velour +chiffon +lecture room +barrel +loggia +millstone +flatlet +soupspoon +econometrician +golf-club head +daphnia +parlor +fire-eater +juggler +attache case +hay bale +kisser +knitting needle +news magazine +flatbed +Senegalese +trumpeter +trampoline +brogan +bone +caftan +lobster pot +gazpacho +anthill +ramekin +mainsail +penitentiary +spotted flycatcher +cookstove +root beer +broom beard grass +pogo stick +plywood +epee +gas oven +Global Positioning System +sweet false chamomile +breakfast area +bullring +second cousin +wave +decolletage +rodeo +won ton +swastika +bobby pin +papaw +retaining wall +Muscadet +heavyweight +energizer +banner +amusement park +whinchat +drugstore +waxwork +meander +congee +heat sink +switch grass +commuter +peony +western white pine +wild raspberry +nightgown +saute +cardinal +claret +pollinator +biryani +pina colada +cassette deck +European sandpiper +block +flan +birdcage +baby +lieutenant colonel +ticking +European white lily +dog violet +coat hanger +premature baby +organza +string bean +balloonist +hurricane deck +window box +hang glider +bullfighting +piste +seahorse +hard cider +batik +common mullein +petite marmite +stuffed mushroom +tequila +ground ivy +fountain grass +stray +putter +buffer +comet +bomber +woodcarving +baseball glove +halter +garnish +selvage +megaphone +sea fan +rabbit hutch +very important person +analog watch +long-head coneflower +northern pike +roll-on +cigarette butt +terraced house +penknife +windshield wiper +cricket +straightener +snow pea +cockerel +canister +sour bread +recovery room +toilet bowl +tyrannosaur +big sister +quartz battery +television receiver +vitamin C +tailpipe +field thistle +stonechat +col +monstrance +gift wrapping +herbivore +quarter horse +ice-cream sundae +rumpus room +eyepatch +clary sage +French lavender +snorkel +choir +tent-fly +cat box +horse racing +high priest +barrel cactus +pin oak +wild thyme +keyboardist +raiser +hammock +hail +bungee +chocolate mousse +major +buzzard +gopher tortoise +Chablis +water meter +benthos +donna +blender +Mauser +avocet +rye +mulch +chancel +dusty miller +mate +corbel +minaret +frittata +French toast +mosaic +home brew +water faucet +beard +swivel chair +acropolis +largemouth +abbey +tabby +driver +copperhead +stirrup +Boston fern +Tennessee walker +artichoke +honor guard +chapatti +enchantress +sweat pants +electric organ +column +dry vermouth +range hood +Red Delicious +rape +splint +catapult +gourd +antipasto +plaza +carnation +star +wood anemone +English primrose +male fern +boot +atrium +Japanese deer +carnivore +yearling +doe +guelder rose +chicory +stretch pants +ice-cream cake +frogfish +tarpaulin +chicken soup +balaclava +tor +feverfew +three-hitter +flyweight +aqua vitae +locker room +wether +teacup +wide-angle lens +hook +ladder-back +osprey +awning +wedding +chest protector +pooch +rose mallow +orange daisy +fondant +envelope +duckling +blackberry +goosander +snorkeling +philatelist +broad bean +Frank +bok choy +basket +absinth +cayenne +blackbird +bottled water +trooper +timber +stable +chestnut +tomatillo +bell +banquet +rainbow trout +macrame +appointee +heart +chipmunk +purple clematis +safety bicycle +shuttle bus +Japanese black pine +lentil soup +downhill +field mustard +brass +hand-me-down +greater yellowlegs +fanny pack +croquet mallet +hip roof +duffel bag +Ritz +document +pie plant +staff member +lifeguard +white-throated sparrow +Cameroonian +hydrofoil +platter +common ageratum +middleweight +chairlift +brunch +pharmacist +lemon +driveshaft +green snake +lip +London plane +mangrove +crystal +siskin +common jasmine +hollandaise +villa +cross-country riding +mother-in-law's tongue +generator +Tanzanian +whisk +seeder +ashtray +griddle +evening bag +bluebird +bran muffin +square dancer +luggage compartment +tropical pitcher plant +autofocus +tape drive +silencer +Hawaiian guitar +swamp sparrow +Zimbabwean +drawing room +weekender +liparis +streambed +samosa +hitter +water heater +tidal basin +ossuary +dik-dik +camouflage +fiance +Jordanian +rolling pin +slingback +turret +hen +jennet +playpen +woodhewer +bushing +church bell +bear grass +double knit +tennis pro +Joe-Pye weed +pave +pochard +painted beauty +crinoline +gumbo +trestle table +schnitzel +balloon flower +Turkish coffee +extension cord +wireless local area network +sluice +umbel +microeconomist +sky +aisle +commander in chief +hydroplane racing +poll +Coca Cola +fuel injection +bird pepper +monkey puzzle +English muffin +riverbed +varietal +kachina +airport +saltwort +oolong +red-hot poker +mihrab +cocoa +jersey +Walkman +syndic +Hessian boot +millstone +carpenter +outfall +curbstone +mocha +field pansy +patriarch +slacks +switchblade +killdeer +whelk +pampas grass +racquetball +platform bed +Indian rhinoceros +Japanese iris +blacktop +dinner jacket +stud +jodhpurs +telephone pole +business district +kurta +basil +handset +file folder +gloriosa +orphan +cantle +cookie sheet +cafe au lait +drawbridge +hill myna +Western diamondback +watch case +cardcase +bowling alley +mattress cover +canvasback +pompadour +cornice +matador +cigar cutter +skunk cabbage +baptismal font +bitters +refectory +egg +parula warbler +tiger lily +field house +nanny +skin-diver +soda water +lymphocyte +carport +chocolate fudge +amphitheater +sugar candy +sea hare +open-face sandwich +dessert spoon +staple gun +envelope +worker bee +general +garment bag +maypop +autobahn +Atlantic puffin +polo shirt +Humvee +spice rack +grotto +banderillero +gaillardia +black-crowned night heron +oboist +weigela +Dictaphone +dwarf iris +marsh mallow +yarrow +eccentric +catsup +jade green +mistress +henbit +beachwear +head +commuter +strawberry tree +chickpea +clothespin +fleabane +brussels sprout +winter melon +Laconian +great horned owl +caricaturist +nan +flowerbed +triple sec +dairy +round of golf +cardinal +kauri +Zulu +Armagnac +cowberry +mouthpiece +wild calla +bling +puppeteer +beer drinker +adder +field sparrow +chocolate pudding +blacksmith +finback +Shetland pony +cheese fondue +panty girdle +soda can +electrolytic +florist's chrysanthemum +yellow jasmine +tudung +equalizer +ridge +dulcimer +grappa +barn swallow +coneflower +enamel +poached egg +halfback +yak +toby +Fleet Street +blue catfish +sand tiger +flying buttress +snaffle +stoop +first base +cultivated land +first lady +waratah +headquarters +arnica +lovebird +common morel +parasol +disk clutch +Xerox +vitamin P +vitamin B12 +long sleeve +certified public accountant +hot pants +pitch pine +pantie +drawers +cake mix +boar +grey +bride +false sago +bullion +coach house +bass guitar +Japanese banana +meadow clary +black belt +Canterbury bell +smallmouth +treadmill +great white heron +enchilada +rummer +captain +camisole +wild garlic +oak fern +ultramarine +peach +hawkweed +autostrada +adit +anaconda +artwork +skinhead +jello +hermit thrush +Bewick's swan +dress suit +trail bike +stubble +common polypody +Riesling +Easter lily +telegraph key +envelope +garlic bread +perianth +salad bar +steppe +club sandwich +nude +garden forget-me-not +Tuareg +flood +Statehouse +charcoal +boy scout +Rhone wine +parfait +spoor +lanyard +octagon +brown bread +quarterback +quilted bedspread +hookah +Pepsi +hamburger bun +entrepreneur +saddle oxford +snake's head fritillary +undies +chemise +skidder +chickpea +carnation +honey bun +mortar +Montrachet +automobile horn +skylight +gingham +rafter +pantile +climbing frame +scarlet runner +cable +cornstalk +mockingbird +raisin bread +chili sauce +hand calculator +concert-goer +detached house +coq au vin +lasso +hyssop +globe thistle +paper clip +slide +Jerusalem artichoke +tetrahedron +mock orange +lemon lily +finger +little sister +handcuff +horse wrangler +pavlova +oilcloth +snow-in-summer +common mugwort +greenshank +ice-cream cone +rubber boot +gunnysack +disk jockey +long trousers +sorghum +pontoon +calf +fire extinguisher +cotton thistle +pilot whale +ao dai +steamroller +wristwatch +tawny owl +city +country store +ironweed +kennel +bathrobe +rattan +drawer +fly tent +choline +musk thistle +courthouse +Yugoslav +bush +trawler +shellflower +jade vine +ragged orchid +pea soup +King Charles spaniel +hubcap +snook +paddy +bow and arrow +shovel +dill +cliff swallow +cadaver +hijab +masterpiece +fish geranium +kettle +sanitary napkin +carrot stick +Mountie +peanut brittle +dam +jackal +windowsill +butterfly orchid +bodice +picador +pale yellow +beanie +petiole +tenor saxophonist +bungalow +gnomon +stock saddle +field glass +rigging +wood grain +Speaker +settlement house +swamp milkweed +paper nautilus +tangerine +champagne +crescent roll +library +Schmidt telescope +stemless carline thistle +motorcyclist +alpine ash +planchet +water closet +casuist +hand luggage +hyssop +spaghetti and meatballs +cannelloni +cedar waxwing +water dog +brick red +linkage +sweep hand +purple heather +macaroni and cheese +butter knife +refreshment +malt +St. Augustine grass +wainscot +compass +gas heater +tamale +table saw +referee +borsch +projector +dracaena +peppermint +Reuben +Abyssinian banana +glassblower +floss +small stores +artilleryman +lapwing +ranch +garbage man +dwarf banana +commelina +currant +adulteress +landlocked salmon +pasqueflower +nan +tiger lily +Eritrean +rotunda +catsup bottle +mezzanine +royal fern +blended whiskey +bowler hat +mistletoe +manor +fusee drive +pistachio +dispensary +swamp +amputee +sculptor +schoolmaster +Chinese anise +dwarf iris +livestock +chronograph +nectarine +jockey +plaster +motel room +swamp azalea +hippeastrum +space station +duchess +catacomb +dovetail +cockscomb +common spotted orchid +brittlebush +cleats +cloche +hotchpotch +cabin car +prey +indigo +light beer +bear's breech +jonquil +analyzer +alyssum +spur gear +ice tea +honey buzzard +twayblade +dirndl +atlas moth +croquette +carafe +flyweight +professional basketball +multivitamin +air terminal +phial +roll-on +skunk cabbage +bird of paradise +rose +cooter +camping +divided highway +herbage +sweet vermouth +common comfrey +eggplant +office building +glutton +gefilte fish +bicycle rack +swamp birch +Venetian blind +Pernod +Norway spruce +portrait camera +bastion +vitamin Bc +Ugandan +Indian red +okapi +emu +vin ordinaire +chintz +shrimp cocktail +numbat +tall oat grass +cable car +stopcock +ham sandwich +Yemeni +stanhopea +plate +chicken broth +common yellowthroat +California poppy +radio +chocolate egg +mess jacket +tea table +physostegia +Japanese flowering cherry +confectionery +chicken cacciatore +painted nettle +popover +white rice +strapless +mohair +electrical cable +coil spring +arterial road +miniature fan palm +spectator pump +pesto +interlocutor +eastern kingbird +dongle +vitamin B6 +stuffed tomato +cough drop +okra +black +barbecue +burial mound +firstborn +corn snake +amberjack +bollard +horn +Black African +elbow pad +Camembert +circle +Japanese apricot +hearing aid +rock star +creature +taster +bubble gum +scull +lemon balm +chaetodon +anemometer +brake drum +fuselage +courthouse +aqualung +yellow adder's tongue +reception desk +guy +buffalo wing +ginger beer +robin +pantothenic acid +marsh hawk +yellow journalism +exhaust +cardamom +Tabasco +ax handle +patriarch +floor +pine snake +spoiler +hood +sphagnum +parrotfish +orphanage +redpoll +beef Wellington +white spruce +cherry plum +scapular +field lens +broomstick +mouser +wood thrush +Nebraskan +hotelier +milk thistle +soya milk +Munich beer +boucle +snowy egret +dust storm +steward +kudzu +oriental poppy +presbytery +burro +orange soda +stonecrop +splashboard +menagerie +dormer +wire cutter +yellow bells +Dubliner +shore pine +cousin +racing gig +Morgan +gold plate +villager +snifter +granny's bonnets +egg roll +Spode +amabilis fir +babbler +pestle +heliopsis +halter +black spruce +President of the United States +ski slope +chocolate fondue +lockstitch +motel +Epipactis helleborine +tabbouleh +Yorkshire pudding +overpass +Timorese +presbyter +tablefork +bottle gourd +tiara +vintage +pilgrim +reindeer moss +shower stall +towel rack +kachina +chef's salad +breeder +cow parsnip +walker +Black woman +Irish coffee +portrait lens +lateen +gilt +successor +cargo container +Lithuanian +mayapple +paisley +highchair +strawberry jam +flying fox +field scabious +blue-eyed grass +screw +Frisbee +dressing room +cholla +walkie-talkie +red currant +centrifugal pump +smorgasbord +hot rod +marcher +rowanberry +welwitschia +amphitheater +pew +concert band +bosom +pillbox +seagrass +openwork +meadow goldenrod +shower +chicken sandwich +Boston ivy +plastron +oilfield +stuffed tomato +juniper berries +frame +Spanish mackerel +family room +powder horn +fight +maguey +bunker +work-shirt +air filter +nosh +sugar bowl +foothill +reliquary +tugboat +horsebox +grater +palace +board member +campsite +halibut +geneva +ginger ale +high commissioner +genet +bodywork +spaghetti +protractor +pipe cutter +wood anemone +turkey cock +surge suppressor +green turtle +spoiler +bedsitting room +television room +ballot box +shasta daisy +impeller +capote +bitter +California wine +lock +spinnaker +gill fungus +baby's breath +nut and bolt +moonflower +houseboat +distributor cap +coffee bean +gusset +bowling ball +knitwear +frieze +mistflower +roadster +cue +circuitry +brake +butt hinge +Chickasaw +leopard frog +wing tip +puree +mantel +pantheon +grandfather clock +cockchafer +pomegranate +cleaners +eyeshadow +Oregon cedar +rock hopper +hawksbill turtle +agriculturist +yellow-crowned night heron +Albanian +pumpkin seed +chateau +goggles +camper trailer +bracket fungus +cigarette case +signal box +saddle blanket +poison ivy +set gun +cattleya +dry fly +concert hall +personal digital assistant +talcum +deodorant +common starling +painted turtle +kea +plenipotentiary +pantyhose +masjid +buskin +hurdle +cocktail lounge +belting +sour dock +knife blade +sugar snap pea +paddle +dickeybird +brace +keep +call center +yacht +lead pencil +tumbler +production line +tetra +private +French window +express +ski boot +pinto +broad bean +American crow +screech owl +snapper +power cord +Manx +rambutan +sun deck +stonefish +golden eagle +national monument +readout +cork oak +hacksaw +beer can +bathe +tussock bellflower +wet suit +mihrab +big game +highlighter +sprocket +measuring worm +grapefruit +samovar +distributor point +steak knife +incubator +loon +temporary hookup +hippodrome +hot spring +spacesuit +flea market +clay pigeon +catbird +earmuff +tetherball +yellowfin +cellophane +lanolin +clapperboard +velveteen +police dog +cashew +sequencer +mango +duplex house +bazaar +Golden Delicious +red carpet +collet +kickstand +broadloom +diskette +tank engine +compact +diesel-electric locomotive +whale shark +water moccasin +mountain avens +tropic bird +ginkgo +ski cap +fixative +glockenspiel +chopine +ethernet +herring gull +skeleton key +finger paint +conference table +great crested grebe +harbor +white-crowned sparrow +Bullock's oriole +guestroom +boutique +cable television +roulette wheel +Luger +Latin American +trumpeter +blindfold +baby +freshwater bass +home plate +bonefish +giant sunflower +giant tortoise +planking +pigeon hawk +oceanfront +door +bazaar +common wasp +conformation +kick starter +kid glove +corydalis +shuttlecock +writing desk +ivory gull +shirttail +diving suit +weka +downy birch +altar +wild sage +tufted puffin +cabinet +Orpington +cineraria +bottom +dial +coracle +resort hotel +soap dish +spotted owl +billiard room +ghetto blaster +red-breasted nuthatch +hatchling +chalet +bracteole +crusher +mixer +net melon +farmhouse +Dutch oven +transept +penlight +palmyra +stewing pan +solar cell +crochet needle +black-winged stilt +germander speedwell +crinkleroot +truncheon +bunchberry +hatchback +sounding board +mixing faucet +chess master +bisque +Brie +Sitka spruce +pawn +Mexican-American +space rocket +choreographer +collared peccary +duffel +nacho +patchcord +carpet snake +omnivore +watering can +hall of residence +streamer fly +sunroof +great grandson +oil refinery +billiard player +ivy geranium +key palm +pinwheel +yellow-shafted flicker +purple onion +soldering iron +condominium +fishing gear +heat pump +marine iguana +cuckoo clock +Bletilla striata +headrest +spotted salamander +field hockey ball +pound +carboy +vertical stabilizer +groundsheet +cinnamon bread +acorn squash +sheathing +lakefront +Jeffrey pine +synthesizer +olive +apple +pannier +ponderosa +Jew's-ear +latch +equatorial +metasequoia +permit +bloomers +town hall +fava bean +casino +bier +jampot +common snapping turtle +clary sage +oatmeal +Dutchman's breeches +massif +Guyanese +heifer +handball +sweat suit +pomelo +Iceland moss +customhouse +sandbag +archer +gyrfalcon +sword cane +marmite +whole snipe +blue crab +sugar spoon +brownstone +chicken wire +lizardfish +dump truck +chicken yard +chamois +electric +idle pulley +jujube +wrestling mat +aoudad +Burmese cat +water shamrock +dormitory +Unknown Soldier +hearse +bumper +clipper +desert pea +critter +semitrailer +backboard +common St John's wort +Atlantic manta +song thrush +jukebox +quoin +eastern chipmunk +copper beech +paintball gun +bull +package store +fraise +royal poinciana +niqab +traction engine +objective +day nursery +ski lodge +orphan +summer house +cereal box +router +sleuth +jodhpur +polyp +croquet +sport kite +green onion +tulle +etagere +tussock caterpillar +rest house +elderberry +bridal wreath +Torrey pine +silver wattle +kidney bean +pentode +laelia +Allen wrench +sporran +red drum +tricot +heterodyne receiver +magazine rack +stone curlew +trawler +suckling +niblick +sandwich plate +double door +Togolese +pitching wedge +desert tortoise +cloth cap +date palm +webbing +jumper +frogmouth +copperhead +covered couch +black mallee +riser +scraper +gauntlet +pantheon +food court +muntjac +grocery bag +bread-bin +transmission shaft +primigravida +window seat +crab apple +seat +Fresnel lens +dendrobium +hatchback +little theater +butter dish +back porch +umbrella tree +carrot +seventy-eight +coconut +music stool +Tesla coil +bay willow +American basswood +sabot +wheel and axle +gazette +lute +bassinet +hart +mecca +breadbasket +silverfish +handball +Scotch pine +box camera +stately home +Hereford +tread +single-breasted jacket +desk phone +deodar +professional boxing +fly casting +box wrench +black oak +martello tower +red campion +bullock +sweet William +bay leaf +dollhouse +flounder +fox hunting +beanbag +king mackerel +rouge +film advance +common mallow +parasitic jaeger +satellite receiver +nurse shark +chesterfield +tomatillo +plimsoll +hatbox +bloomer +foul-weather gear +longleaf pine +horse mackerel +tree lizard +bark +belfry +Treasury +perch +purple finch +stag beetle +fragrant orchid +tachymeter +tadpole +cookie jar +knee piece +agueweed +bones +chick +golf glove +toothpick +taboret +rotor blade +field artillery +purple willow +redhead +spark plug +guava +voice mail +cross +butterfly valve +star magnolia +olive +room light +Australian turtledove +embassy +Iraqi +singles +nestling +spinning rod +radial engine +rowan +sandbox +boss +moccasin flower +veneer +mint +American chestnut +white whale +CPU board +florist +press box +hurricane lamp +giant kangaroo +greater whitethroat +winter jasmine +blue +department store +southern red oak +saber saw +corn muffin +bellbottom trousers +toaster oven +red eft +condominium +galago +sunbather +redpoll +common European earwig +songbird +linnet +light meter +bracer +tepee +gumbo +water glass +roofing +spathiphyllum +shofar +sand lizard +washroom +Brussels carpet +brachyuran +home room +floatplane +knee brace +solar heater +felucca +gas ring +maguey +manse +blue columbine +cuppa +cigar band +male orchis +mudskipper +couscous +Chinese parasol tree +dude ranch +banyan +gopher snake +sundrops +aviary +African daisy +missel thrush +Photostat +stone pine +circus tent +tangle +printer cable +grease-gun +rose chafer +light pen +plantain +hearth +bullfinch +post oak +slow loris +Newtonian telescope +head +punt +spindle +New England aster +spotted sandpiper +pond pine +grass skirt +bug +black rat snake +tabasco +bull shark +tennis camp +scrambler +popinjay +bing cherry +ministry +cash register +redheaded woodpecker +kameez +farmer's market +roan +harpy +European toad +pizzeria +camshaft +hemp nettle +chicken coop +cottage pink +daybed +observatory +airdock +mountain devil +newsstand +kingfish +snow gum +jackdaw +lacquerware +peeler +miro +sister ship +damask rose +pack +snowshoe +Liberian +paramecium +tidytips +professional tennis +bookend +wood swallow +cayuse +cranberry +rock squirrel +steak au poivre +soul patch +female mammal +sash fastener +songwriter +oxeye daisy +apse +floor joist +hand towel +wheatear +cero +soul mate +golden fig +bus stop +psycholinguist +convenience store +manor hall +mountain sandwort +Euopean hoopoe +haricot vert +mausoleum +violist +flashlight battery +chard +fixer-upper +bank martin +testudo +diving duck +kohlrabi +Omani +sphygmomanometer +greyhound racing +chestnut +rattlesnake plantain +chaffinch +wolf pup +teakettle +cairn +souk +resident commissioner +chuckwalla +gaiter +capercaillie +liver chestnut +bean sprout +land line +ambassador +green pepper +common chickweed +Sharpie +Oriental arborvitae +oncidium +pallone +currawong +sweet alyssum +fire tower +eyebrow pencil +redfish +apricot +clementine +blucher +wigwam +pangolin +buggy +common oak +jumbojet +laser +cigarette holder +racquetball +georgette +cleft +scouring pad +drum printer +pond scum +American red squirrel +caranday +swamp willow +blindworm +brook trout +defense system +nyala +three-way calling +mizzen +shuttle +African lily +Oregon white oak +rain tree +fuel gauge +oriental cherry +wahoo +pear +jungle gym +bass fiddle +outrigger +angelfish +Old World coot +lime +battlement +yarmulke +herpes varicella zoster +burp gun +Alpine glacier +stun gun +pilot boat +Southern crab apple +bushtit +pullet +polo pony +jackfruit +raw vegetable +French marigold +golden shower tree +spike lavender +wahoo +brass knucks +cabbage palm +diesel-hydraulic locomotive +red jungle fowl +prairie sunflower +rye +loofa +icecap +shade tree +secretary bird +saffron +cos +muskrat +videodisk +Carolina wren +candy bar +Bohemian waxwing +flowering almond +cold frame +raglan +pine siskin +quince +western red cedar +red maple +adobe +agora +kumquat +tenement +bantam +bayberry +water jump +great granddaughter +snips +porcupinefish +brochette +love-in-a-mist +Iceland poppy +common sage +pace car +camel racing +slipcover +nopal +shoehorn +calypso +rhea +in-basket +maple syrup +cold chisel +Pacific ridley +dietary +aperture +lapin +rock hyrax +house wren +litchi +ragged robin +control center +shoebox +arabesque +eider +silver birch +bantamweight +ax head +softball +blue gum +Bechtel crab +tomato sauce +green douglas fir +sweet gum +macaroni salad +red phalarope +budgerigar +Bedford cord +Uzi +green woodpecker +ohmmeter +bacon-lettuce-tomato sandwich +hackney +Easter egg +motmot +red pine +opium poppy +gat +pussy willow +greater scaup +ocelot +persimmon +western hemlock +carambola +pinion +Malcolm stock +bobsled +larkspur +wood drake +pinetum +red gum +draft beer +funnel +terrarium +Pinot blanc +doodlebug +brittle star +salsa +cantaloup +pollack +stockpot +eastern hemlock +rock wren +burqa +squash +aircraft engine +billy +flamingo flower +odontoglossum +old squaw +redstart +sheepskin coat +mate +flathead catfish +gentianella +bilberry +bog rein orchid +incense cedar +mew +Colorado spruce +cob +portmanteau +grenadine +common ginger +masdevallia +compound microscope +sobralia +white fungus +guppy +chapterhouse +honey +green frog +sea swallow +African marigold +astrolabe +verdigris +yellowhammer +carrot juice +oxlip +medicine ball +highboy +grass frog +gamebag +surgery +mincer +mulloway +cactus wren +box office +resonator +table-mountain pine +European curlew +supernova +cabbageworm +peach +plane seat +asp +Yquem +tomato hornworm +rook +quadruped +chador +micrometer +dabchick +Afro-wig +balsam fir +bucket seat +sage green +macon +blue poppy +chinquapin oak +black pine +spinach +chrysalis +carnauba +tee +bearberry +shirt button +tree of heaven +southern white cedar +covered wagon +brood hen +spadix +European catfish +winter wren +bulldog clip +carpetbag +study hall +chino +simian +closeup lens +cookie cutter +grapefruit +mandola +sassaby +Allegheny plum +piaffe +scorpion fly +booby +draft animal +field tent +cumin +laurel oak +smooth-leaved elm +American arborvitae +American toad +grinding wheel +mountain ash +cuttlefish +pipistrelle +parer +safety rail +Clark's nutcracker +side-blotched lizard +giant hornet +wicket +dugout +electric toothbrush +dhow +common four-o'clock +long-eared owl +anchor +near beer +tansy +creme caramel +guided missile frigate +shelduck +durian +compact +iron tree +shiitake +polo +camouflage +pedal pusher +salon +tangerine +lacebark +Swiss mountain pine +goalpost +poolroom +space capsule +wild cherry +dress hat +wave +raglan sleeve +cassia +Jerusalem artichoke +cabbage palmetto +marsh harrier +American redstart +sea squirt +cliff diving +sparrow hawk +watch cap +frankfurter bun +police boat +flash camera +neem +eastern meadowlark +Italian cypress +orb-weaving spider +graniteware +sewing basket +latex paint +rock dove +stator +leaf lettuce +roulette +broadcloth +Spork +panicle +sternwheeler +cider vinegar +brown creeper +cowfish +closed gentian +chickpea +port +pimento +sheeting +matilija poppy +hawk owl +guava +papaya +huisache +European shrike +racing skiff +yellow warbler +gumbo-limbo +North Carolinian +staysail +court +iced coffee +money belt +shaver +Psychopsis papilio +sumo ring +refection +kingfish +clock pendulum +greater butterfly orchid +disk harrow +tawny eagle +polyphemus moth +pieplant +Nicaraguan +bocce ball +California box elder +porbeagle +crown of thorns +Mexican sunflower +fennel +stream orchid +slip ring +white fir +fold +moss campion +fairy ring +hose +pony-trekking +western larch +meadow pipit +Cape May warbler +longan +bookmobile +junk shop +lemon shark +smelling bottle +solan +widow +sea pen +universal joint +day game +goldcrest +maiden pink +biographer +rotunda +oriel +arranger +gambrel +Angora +fen orchid +leading rein +Wilson's snipe +European nuthatch +natterjack +athletic supporter +mouflon +emergency room +swallow-tailed coat +western meadowlark +feather star +Navy SEAL +toilet bag +loquat +lesser butterfly orchid +thumbhole +breathalyzer +featherweight +collards +mayfly +confessional +mountain ebony +redwing +Norway maple +refractometer +stagecoach +gasoline gauge +octopus +baker +Rhode Island red +European tortoise +cardiologist +Punjabi +Arkansas kingbird +tamarind +drum brake +flash +yellowtail +stokes' aster +emperor +free house +sour gum +ruddy duck +hamadryad +command module +tinamou +Norway lobster +washstand +European hornbeam +roaster +black-necked grebe +tallgrass +leopard lizard +anastigmat +Blackburn +deutzia +ground rattler +Christmas fern +wild pink +sesame seed +carrycot +Italian parsley +nectar +roll-on roll-off +true laurel +anisette +candy corn +flowering maple +revers +dun +tobacco hornworm +common sunflower +common grape hyacinth +cardiograph +electric meter +herb Paris +goalmouth +spruce grouse +canopy +wind poppy +stemma +gateleg table +lumper +speckled rattlesnake +gudgeon +rough-legged hawk +internal drive +pomelo +piece de resistance +storm door +clementine +Japanese pink +settler +yellow jacket +Fraser fir +royal palm +cicada killer +cayenne +guava +bluewing +red baneberry +lesser yellowlegs +cache +bog rose +sparring partner +ski jumping +sherry +glacier lily +beer mat +shredder +American widgeon +protectionist +green olive +black-tailed deer +Alpine fir +dispatch case +whipping cream +African daisy +cantilever bridge +maraschino +rhea +ink bottle +dacha +hagberry tree +lesser rorqual +orchard oriole +candidate +cuticle +breadfruit +fishbowl +giant puffball +closed gentian +Joshua tree +tie rod +beard lichen +flame tree +stegosaur +acerola +Swan River daisy +common murre +flowering almond +protegee +loggerhead shrike +Wilson's warbler +Japanese honeysuckle +basilisk +skimmer +hybrid tuberous begonia +pumpkin ash +chafing dish +collared lizard +iced-tea spoon +scrubbird +Iceland poppy +grey kingbird +wallflower +slick +diesel +Swiss pine +ethernet cable +ketch +lightship +black cherry +swordtail +Monterey cypress +lightweight +Floridian +Sabine +stall +contact +viola da gamba +hemstitch +upland sandpiper +box spring +sassafras +radome +lesser scaup +bluefin +yellow-bellied sapsucker +armored car +cabin class +Moorish arch +webcam +aquavit +overall +sergeant major +soft shield fern +gin and it +bobolink +subcompact +falconer +black morel +roadrunner +lab bench +thong +coffee urn +weeping beech +caladenia +southern live oak +scanner +wine vinegar +common speedwell +European roller +fuji +snag +piping plover +concertina +secateurs +meat thermometer +supercomputer +funnel +dais +western fence lizard +spruce pine +pommel horse +Cassegrainian telescope +pitta +India-rubber tree +mangosteen +tamp +aposematic coloration +dustcloth +birth +Atlas cedar +reed bunting +jabiru +sainfoin +press photographer +golden oriole +laryngoscope +thermal printer +winder +doubles +cricket ball +dabbling duck +tonic +Buddhist +Morris chair +swatter +quaking aspen +ancient pine +American larch +evaporative cooler +click beetle +yellow-breasted chat +souchong +bluegill +pied-billed grebe +tricorn +spring beauty +southern magnolia +rowel +chili +hard roll +flathead +satsuma +gangplank +bourguignon +cockfighting +greenwing +plum tomato +fly orchid +gnatcatcher +spotted eagle ray +ovenbird +brassavola +mocha +candy cane +afterburner +thriftshop +study +winter crookneck +grinder +muskellunge +sacred ibis +inverter +sandwort +deer fern +stair-carpet +Cotes de Provence +ovenbird +rex begonia +American woodcock +poison ash +lowland fir +pawpaw +loblolly pine +kinkajou +European hackberry +pest +coralwood +Bedouin +acetate rayon +snuffbox +radiator cap +basket oak +table-tennis racquet +smew +midge +telescopic sight +radish +great burdock +separate +damask violet +broadbill +bourbon +blacktip shark +gift shop +khimar +date +woodland caribou +policeman bird +grey birch +American elm +strawflower +officiant +hart's-tongue +straight razor +Spanish elm +radicchio +white croaker +vicuna +soft-shell clam +flannel +adonis +bonito +kittiwake +English walnut +soldierfish +hipflask +spotted crake +Streptopelia turtur +American maidenhair fern +corn cockle +telephone cord +canopy +playback +diocesan +marsh orchid +manakin +purple grackle +cob +fishmonger +otoscope +vermillion flycatcher +inhaler +instar +licentiate +myrtle warbler +goat herder +benthos +toggle +drumhead +piranha +doorplate +vault +triptych +red-necked grebe +transporter +vernier caliper +flathead +Portuguese man-of-war +countrywoman +vacation home +Bactrian camel +night-light +module +lemon curd +carancha +painted daisy +bok choy +ratatouille +troll +escarpment +cinnabar +computerized axial tomography scanner +lychgate +sowbread +bedside +guided missile cruiser +reel +cleat +hemostat +blue shark +Seven Wonders of the Ancient World +motorized wheelchair +pillow block +horned puffin +prickly pear +electric range +mother's daughter +vein +Oregon maple +bird dog +faceplate +wren warbler +feather reed grass +common alder +Adam's needle +straitjacket +organ-grinder +gantry +bikini pants +peristyle +herpes +terry +toad lily +celandine +red-breasted sapsucker +bragger +green peafowl +fuschia +quoits +house martin +dome +herpes simplex 1 +touraco +meeting house +vacuum gauge +cat's-ear +crisphead lettuce +carpet moth +European rabbit +puff adder +Old World scops owl +fire pink +fruit punch +ant bear +black walnut +stroboscope +white mangrove +pine grosbeak +cast +check-in +ring-necked parakeet +matai +shingle oak +fieldwork +rue anemone +landing net +ouzo +herringbone +lyceum +hydrogen bomb +mullein pink +masher +evening grosbeak +water vole +livingstone daisy +tomatillo +cavalier hat +interphone +wild lupine +goosefish +sugar maple +plantain +white dead nettle +Monterey pine +bugle +veloute +marsh gentian +Bermuda buttercup +alehouse +Peter Pan +thong +LP +tulip tree +scanner +scarlet tanager +music hall +angel shark +pecan +eight ball +rosy boa +outboard motorboat +garage +fanlight +black cottonwood +notornis +mountain fern +lunar crater +reddish orange +whitetip shark +executant +European ladies' tresses +washboard +revolving door +case knife +balloonfish +greater kudu +tarpan +cog +wet fly +Irish soda bread +basement +broken arch +canopic jar +muscat +kazoo +bobsledding +loaner +black guillemot +English saddle +garlic mustard +Foucault pendulum +mulberry +clotted cream +dove's foot geranium +Atlantic ridley +convector +ground floor +European wildcat +poinsettia +hideaway +great barracuda +black beech +bushy aster +cornflower +tam +true slime mold +carving knife +holly fern +railroad tunnel +crimson clover +disposal +etamine +suspension +plasmodium +political scientist +minnow +Spanish rice +twist bit +subway train +Scleroderma citrinum +saw palmetto +console +gimlet +hand pump +waratah +rock rattlesnake +keel +server +curlew sandpiper +hone +sable antelope +inkle +photostat +foresail +sallet +tiger salamander +chutney +onlooker +Exmoor +tiramisu +drawing room +battery +sour orange +juniper berry +beeper +funeral home +fescue +Maksutov telescope +ranch house +jai alai +carob +socket +popcorn +sandbar shark +pipal +summer tanager +oast +skipjack +rolling stock +dropper +great snipe +turnip greens +cowpea +honeycomb +ichneumon fly +maternity hospital +harp seal +nylon +bomb shelter +horse tick +litchi +camel's hair +mimosa +bur oak +anvil +belay +pinhead +continental breakfast +burglar alarm +Mojave rattlesnake +auxiliary storage +lightwood +ratepayer +cecropia +retractor +quadrate +pepper tree +Venus' slipper +abattoir +strawflower +firewater +purple saxifrage +black rat +pack +pepper pot +mayweed +winger +whitetip shark +great yellow gentian +snowdrop anemone +garden angelica +soy sauce +white poplar +inkwell +crouton +gas gun +honey locust +house of cards +ice maker +moquette +arrack +casualty +butterfly orchid +eau de vie +mosquitofish +prairie smoke +haft +horseshoe +steel +peach orchard +Mexican hat +encaustic +shoe +pennywhistle +sweet woodruff +hull +doorsill +globe amaranth +day school +housedog +crown princess +oxbow +maxi +positron emission tomography scanner +compere +European turkey oak +peanut +sentry box +house physician +hot line +loquat +rove beetle +riband +flowering fern +fan vaulting +ceibo +bongo +bat boy +omelet pan +European ash +breadwinner +gaff topsail +clerestory +bushbuck +bluethroat +khukuri +Father +portcullis +candy egg +brake lining +lawn furniture +buckskin +garden pea +Brazilian rosewood +Italian bread +horn poppy +silk tree +Christmasberry +hotel-casino +poplin +false lupine +desert sunflower +mimeograph +alpenstock +cork tree +cultivar +common mosquito +pollard +black marlin +understudy +lancet window +college +breadfruit +Herero +Labourite +bar printer +squaw grass +stelis +firing chamber +sycamore +artificial horizon +radiologist +pansy orchid +bicycle pump +wraparound +bell gable +home computer +orchard grass +carving fork +bergamot +honeycreeper +sewing room +radiator +core +brown bat +goose grass +adjutant general +Erlenmeyer flask +massasauga +tail rotor +cardinal tetra +Drambuie +wine palm +Sarcoscypha coccinea +shantung +Calvados +garganey +vicar +house mouse +creeping oxalis +digital subscriber line +cedar elm +backgammon board +blackberry-lily +pallid bat +New Zealander +Barbadian +rose geranium +European spider crab +gharry +electric hammer +mustard +Chinese lantern +laundry cart +filament +mozzarella +gooseberry +sukiyaki +porkpie +culvert +altazimuth +plum pudding +serin +Spanish dagger +Asian crocodile +crevalle jack +mascara +pig bed +alderman +northern shrike +Sufi +purple-fringed orchid +derringer +linseed +hockey skate +bell jar +Japanese wistaria +mantled ground squirrel +western toad +lieutenant commander +mechanical piano +ovoid +paddlefish +demijohn +coast live oak +brick +gearset +tailstock +phonograph needle +winery +tuberose +mother's boy +shot tower +crucian carp +carpet pad +lamb's-quarter +Menorah +common white dogwood +hypanthium +rosebay +wild medlar +soil horizon +sweet orange +bitterroot +hand glass +cloisonne +towpath +gum ball +margay +carambola +bolt cutter +charger +vibraphone +gueridon +elephant tree +wood-frog +ash grey +duffel coat +third base +chunga +glebe house +lake trout +encephalartos +Japanese oak +northern red oak +pruner +blue orchid +Biloxi +western wood pewee +corselet +alabaster +anechoic chamber +grass pink +wax begonia +blue daisy +pennyroyal +Asian tiger mosquito +cheese souffle +flat bench +caramel +sump pump +bush violet +common fennel +corner +skullcap +asparagus fern +white mangrove +calceolaria +sateen +saltbox +hollowware +head nurse +coal miner +mountain lily +tufted vetch +European perch +line officer +steamer +stickball +shin guard +cauliflower +Monegasque +hatpin +wolffish +trackball +khaki +arthrogram +rocket larkspur +naval commander +Gemini +ski binding +department head +Chenin blanc +wingstem +knothole +aerides +sweet bay +tautog +gangway +waterspout +Hudsonian godwit +armyworm +incinerator +kidney vetch +pine nut +cypress vine +hip tile +sorrel tree +relay +bench press +Kentucky coffee tree +dobson +sapling +false lily of the valley +veld +phaius +vitamin B2 +beaker +wall tent +sieva bean +dusty miller +sewing kit +cavalry horse +diaper +butterfly pea +Spam +saddlebill +pearly everlasting +kowhai +Sister +moneywort +organdy +pine marten +bareboat +hot-water bottle +baby blue-eyes +silver lime +common cotton grass +malmsey +blue pea +baggage car +pineapple +folding saw +cotton rose +brawler +black duck +Weizenbock +pool player +Gujarati +wild duck +purple sage +sage grouse +mail train +arm guard +short-spurred fragrant orchid +queen +eparchy +spring peeper +ortolan +shoulder +fighter pilot +American beech +snowcap +novitiate +roller +butcherbird +canyon oak +brompton stock +firebrick +rudder +light cream +Primus stove +nonsmoker +probationer +harp +kosher +surcoat +videotape +zebu +first class +yam +car +rissole +miso +funambulism +attic +curling iron +shutter +encolure +split-pea soup +yellow rocket +gas oven +ultracentrifuge +chamomile +canteen +eyeliner +yellow squash +Irish stew +collar +doublet +machinist +septic tank +snap bean +Polyporus squamosus +western tanager +creeping St John's wort +back +sinkhole +perforation +Romanian +epergne +fez +comfrey +sidecar +beach pea +screen door +instigator +plughole +woodbine +pigweed +hip pocket +common scoter +squeegee +Surinam cherry +porringer +body stocking +eatage +shallot +enlarger +common canary +trophy case +gun case +plow horse +hot plate +pearl oyster +margarita +madras +backspace key +pigeon guillemot +pajama +buckthorn berry +homestead +bedbug +Linotype +trundle bed +granadilla +theremin +chin rest +bouillabaisse +tumble-dryer +truffle +cassava +kurrajong +gyroscope +European silver fir +C-clamp +politician +green soybean +exponent +flame tree +scissortail +achimenes +crown daisy +soft tree fern +spaghetti squash +pale violet +beaver +dashiki +washboard +driving wheel +sack +foulard +sputnik +boatbill +English elm +sack coat +grog +golliwog +Malayan tapir +May wine +calash +stile +windjammer +American sycamore +rotor head +fast food +balata +dragonet +Emmenthal +metronome +negative +meadow saxifrage +rabbit ears +chenille +round +hobby +crankshaft +Wilson's phalarope +Murphy bed +soil pipe +forecourt +policyholder +tarmacadam +loyalist +gyro +Queen's crape myrtle +shortcake +apple butter +pumpkinseed +heronry +yellow perch +baggage claim +escarpment +diaphragm +mescal bean +shunter +flax +columbarium +Joe-Pye weed +Neandertal man +casement +hole-in-the-wall +Verdicchio +futurist +eaglet +tassel hyacinth +pup tent +fawn lily +cabbage palm +pogonia +hospital ship +water mill +Oregon grape +lentil +grindstone +banana split +inkberry +coonskin cap +bazooka +wrap +anise hyssop +Java sparrow +red-eyed vireo +common opossum +clintonia +bustle +booster +tribesman +soy +panhandle +jaboticaba +locking pliers +Sauvignon grape +ghat +screw +oximeter +white croaker +saucepot +eggbeater +reticule +cabbage bark +looking-glass plant +head gasket +California sycamore +cowbell +Aleuria aurantia +Herr +lever +spider orchid +cashew +shift key +solar house +wood chisel +white +mantilla +stamp +bolero +rear admiral +garden rake +Lao +crowbar +lapdog +buttermilk biscuit +yellow bedstraw +pickerel frog +dowel +serjeant-at-law +mill-hand +lambrequin +state treasurer +red silk-cotton tree +coiffeur +star anise +shoulder pad +marshal +sitar player +gown +ground cedar +hedge maple +caddie +pitahaya +corn marigold +stick cinnamon +woodland star +Eurasian green toad +anti +blueweed +medicinal leech +gaur +chocolate kiss +kit fox +mother +butte +audio CD +blast furnace +vitamin D +nutgrass +cornice +black sheep +hearing aid +lingonberry +quad +lentil +riding crop +pratincole +pentagon +sea lavender +nerita +flatmate +catboat +water clover +angiopteris +mushy peas +crown imperial +music school +woodshed +platy +Turk's-cap +rundle +reading teacher +hardtack +balloon sail +oriental spruce +bluefish +white mulberry +horned violet +satin bowerbird +treasure flower +sustaining pedal +mimosa +spurge nettle +sea green +hasp +lederhosen +pink cockatoo +long johns +basket weave +freewheel +thrust bearing +timber tree +orphan +falafel +common camas +bird of passage +bird's foot trefoil +electric eel +fizz +grape arbor +serape +brace +hazelnut +kylix +horse mackerel +cassia bark +lizard orchid +spat +Brown Swiss +pocket flap +pillory +purplish blue +rolling mill +tappet +broccoli rabe +semi-detached house +mushroom coral +fly orchid +nougat bar +ball hawk +sand wedge +shirred egg +black locust +strip lighting +drop scone +brush turkey +ball +tragopan +dallisgrass +tuatara +great knapweed +potentiometer +Kiliwa +Pacific bottlenose dolphin +accelerator +Darwin tulip +osteopath +Arizona cypress +manna ash +butterbur +cornelian cherry +American holly +nopal +tanker +foreshore +ditty bag +gas lamp +safety razor +chanter +fomite +chip +striped killifish +catalytic converter +plaice +dusty miller +takin +gerenuk +corn chamomile +Japanese pagoda tree +boneset +common osier +Guinean +taro +plotter +celandine poppy +churn +steenbok +edible mussel +sensitive fern +triode +black raspberry +zoo keeper +feather ball +dredger +starlet +cornpone +coat button +rosinweed +toy Manchester +crested cariama +finger food +basilisk +shotgun shell +comfort food +mountain hemlock +candytuft +Stilton +record changer +anklet +ball valve +Mediterranean snapdragon +BVD +sand cat +Galloway +nutmeg +water-mint +woodwaxen +citron +ark shell +federalist +drone +cheekpiece +hyperbaric chamber +addax +field-emission microscope +synchronous converter +men's room +medlar +electronic fetal monitor +Sazerac +false indigo +roof +passe-partout +meadow spittlebug +Phytophthora infestans +oast house +hedge nettle +voting booth +slender salamander +telephone jack +true bug +scouring rush +Scotch egg +matchbook +aperea +cytomegalovirus +garlic press +cove +whitebark pine +Slovene +narrow wale +mother's milk +Audubon's warbler +prickly poppy +cowl +tailorbird +mud brick +bamboo palm +welt +Afghan +Virginia spring beauty +dinner bell +night jasmine +fly rod +microtome +aerie +carinate +picker +brick trowel +loving cup +swathe +green mayonnaise +rivet +bandbox +newsroom +tea tortrix +bobby +gig +hush puppy +garlic chive +piston rod +aspidistra +bluejack oak +harvest-lice +strap hinge +sour mash +macadamia nut +histiocyte +fan belt +shelf bracket +abelia +Hottentot fig +fish chowder +abettor +compote +beige +dioon +hop +haymaker +oilskin +magnetometer +tool bag +tambour +call girl +gringo +fairy light +broad-leaved plantain +second base +zebra mussel +Japanese cedar +pistia +swamp chestnut oak +cashmere +double cream +samisen +lamb curry +companion +kapok +julep +sweet woodruff +gardener +jewfish +inspector general +collembolan +wheel bug +bass +scrubland +wryneck +macrozamia +trouser press +clove +tiger cowrie +yawl +collard +dildo +pony cart +ormer +annual +tessera +chancellery +two-toed sloth +queen +old lady +wringer +spritzer +baggage +black mangrove +black-eyed Susan +semifinalist +highlighter +alfalfa +Easter daisy +escapement +operating table +neutral spirits +bursar +roble +entablature +girl wonder +farm boy +ring ouzel +permanent press +auklet +beefsteak tomato +gaming table +tea bag +manul +giant bamboo +Ozark chinkapin +matzo +furrow +smoothhound +CD-ROM drive +powdery mildew +copilot +garden +American merganser +bunsen burner +Asian longhorned beetle +lead tree +creeping buttercup +Percheron +back brace +axseed +cub +soul food +rabbi +edelweiss +mineshaft +fox grape +sandwort +torque wrench +leisure wear +Mae West +broccoli +loach +maraschino +heavy cream +silkworm +cirque +vintner +whitewash +butterfly pea +two-toed sloth +midiron +ceriman +Bulgarian +operating microscope +sambuca +California fuchsia +silver maple +tangelo +black bean +lugsail +starting gate +leek +sunflower seed +fish fry +clinker +synagogue +coscoroba +brae +uphill +common limpet +golden plover +cedar of Lebanon +amphibian +Canary wine +taipan +agua +feeder +parallel +mater +pink calla +meat counter +yagi +crab cactus +cacao bean +bowfin +alley cat +stonefly +Eastern cottonwood +vernier scale +marginal wood fern +dancing-master +detective +yam +textile screw pine +hooch +spinet +single prop +sassafras +goose barnacle +triple cream +China tree +peeper +dressmaker +snatch block +ironmongery +dressing case +creeping bellflower +silver sage +honeydew +eastern red-backed salamander +peg +nombril +danish +mashie +anarchist +alligator snapping turtle +shepherd +American white pine +runner +chalice vine +rheumatologist +defibrillator +yellow chamomile +lemon balm +peacekeeper +native beech +sandwich board +Bavarian +titrator +paneling +deer mouse +poteen +sugar snap pea +meadow salsify +town crier +best +basinet +common myrtle +night lizard +cushaw +Tampax +camphor tree +gentile +orange peel +putty knife +pyromaniac +Brummie +fever tree +double +nest +inferior +cabbage tree +graduated cylinder +mucor +woodborer +earthwork +potato salad +four-hitter +gooseberry +water vole +ziggurat +grapefruit juice +four-in-hand +cranberry bush +diode +videotape +Mohican +niacin +beetroot +shirtsleeve +cork tree +two-eyed violet +white ash +drawing chalk +baked Alaska +bone-ash cup +toastrack +diastema +bed jacket +dwarf astilbe +yellow honeysuckle +cow pasture +sheet pile +saxhorn +upholstery material +California white oak +Spanish bayonet +horsemint +littleneck +deflector +magician +standard transmission +blue marlin +shallot +feijoa +collar +board +jump suit +common staghorn fern +priory +Xhosa +Loranthaceae +barbecued wing +barmaid +spit +lemon juice +umbrella plant +field pennycress +centenarian +queen bee +fish stick +black bread +dirk +secularist +German American +spotted weakfish +iron foundry +speed bump +yellow-fever mosquito +gag +frame +black-eyed pea +alcoholic +involucre +sperm whale +balanced diet +wax bean +butcher's broom +winter heath +Mainer +Australian pine +gas guzzler +double-breasted jacket +pod +palo verde +trimmer +wattmeter +dyer's woad +crotalaria +vine maple +sulky +jack pine +thumb +Wilton +Panchen Lama +welder +badminton court +business editor +Arabian coffee +Kamchatkan sea eagle +foamflower +steep +plane +freckle +cerebral cortex +Vouvray +tea +forest tent caterpillar +neckerchief +accelerator +jig +bridal wreath +highball glass +New England clam chowder +beach strawberry +call waiting +baton twirler +double boiler +Dutch elm +car bomb +filmy fern +breviary +Florida gallinule +dace +parsnip +riparian forest +crescent +earplug +grab bar +cusk +foglamp +screwtop +black mangrove +mascot +Welsh poppy +gas holder +support hose +salsify +red beech +Indian python +caroler +pineapple juice +lowboy +terra sigillata +black olive +hypodermic needle +radio-phonograph +moussaka +miter joint +creche +tuning fork +black wattle +affiliate +vertical tail +kiwi +red morning-glory +piping crow +runway +Kashmiri +studio apartment +sea feather +Judas tree +boatbuilder +corn earworm +fallboard +Victrola +lechwe +goat willow +turret clock +Canada anemone +leaf lettuce +savoy cabbage +headpiece +Lebanese +fothergilla +hemlock +toolshed +silver tree +blue-headed vireo +weatherman +cylinder +caltrop +adjutant bird +driving iron +millet +European woolly thistle +rose apple +clown +schoolfriend +eastern coral snake +barbecue +executive vice president +long-billed marsh wren +brittle bladder fern +tank destroyer +left-hander +matting +catchment +balsa raft +eastern fence lizard +color tube +corncrib +electric typewriter +westland pine +elder statesman +whey +plonk +mound +cittern +nest egg +copyholder +China aster +basking shark +gavial +common duckweed +vanilla orchid +red-shafted flicker +granadilla +sylph +sty +vest pocket +potherb +little brown bat +Trapezium +ordinary +adult +purple-fringed orchid +abseiler +disco +metal detector +beefsteak fungus +ilang-ilang +barley grass +hawser +suture +brake shoe +staghorn coral +barbecue sauce +Browning machine gun +sarcophagus +disa +oven thermometer +rosemary +track +gorget +quince +royal +piston ring +teak +pin cherry +Komi +walking fern +sloe +synchronous motor +fire-bellied toad +Teleprompter +co-star +cape gooseberry +oscillograph +bass clarinet +cock of the rock +Tyke +showy milkweed +safety valve +branch water +sweet marjoram +hugger +crampon +fairy godmother +band-tailed pigeon +snow-on-the-mountain +minibar +foreland +grosgrain +dita +rampion +calligrapher +jointed charlock +master +sheepshead +barrelhouse +Carolina allspice +mastic +brake pad +whiskey sour +casement window +conveyer belt +stolon +pavonia +shinny +witch elm +logwood +hostel +pageboy +vesper sparrow +pyrrhuloxia +common carline thistle +wafer +boysenberry +screw augur +hack +American white oak +governor general +Mother Hubbard +game fowl +drosophila +delft +nymphet +tollbooth +chough +Russian dressing +plum tomato +American saddle horse +dusky salamander +black medick +red valerian +cordage +Elastoplast +conacaste +backlighting +swell +riveting machine +cowpen daisy +openbill +water speedwell +picture hat +crested myna +servo +bletia +garden trowel +muscadine +common caper +false lily of the valley +aralia +sharp-tailed grouse +cigar smoker +bandoneon +Chinese alligator +crazy +point lace +charcoal +Texas horned lizard +marinara +backstay +Gatling gun +piston +game fish +fall armyworm +grammarian +beer hall +guadalupe fur seal +sugar palm +peanut +velvet ant +light machine gun +rya +cling film +adobo +myrtle oak +angelica +balsam apple +windbreak +brother-in-law +snap brim +automobile factory +clavichord +dusky shark +edible banana +altar boy +California lady's slipper +schoolbag +wax bean +Atlantic walrus +bullpen +straw wine +thatch palm +potluck +tamarind +charcuterie +sod house +tie rack +liebfraumilch +clinician +scarlet lychnis +Spanish iris +bread knife +water oak +bedpan +Angolan +bassarisk +Alaska fur seal +African wild ass +milk float +froghopper +Verpa bohemica +water cooler +chop suey +ranker +red helleborine +Prince of Wales +marmalade tree +car train +giant red paintbrush +desert sand verbena +right whale +baron +stevia +asterism +five-spot +catapult +Silex +fiberscope +refresher +beef Bourguignonne +snood +divot +waterproof +crabeater seal +Missouri primrose +bumper guard +rock opera +Lilo +coffee can +smokehouse +buffalo grass +propjet +ice tongs +poop deck +acorn barnacle +veal parmesan +shower room +collins +ringhals +silage +jawfish +trouser cuff +contour feather +songstress +rachis +White Russian +stanchion +mastaba +flatbed press +viand +legal representative +espalier +organic light-emitting diode +sushi +scorer +haricot +pinna +plectranthus +jungle cat +dried apricot +coach horse +white fringed orchis +veal cordon bleu +bath +dallier +marching order +donkey jacket +Panama tree +aerator +klaxon +pinnacle +shouldered arch +lesser celandine +common eland +Grand Marnier +cock of the rock +phlomis +Japanese umbrella pine +morning room +dead-man's-fingers +little auk +bascule +house paint +home fries +great skua +cesspool +flying gurnard +wild crab +checkerbloom +Wollemi pine +cheese dip +coif +charwoman +tea ball +waif +Arctic ground squirrel +parishioner +stabilizer bar +potentiometer +black cohosh +medlar +willow oak +cascara buckthorn +scoutmaster +Canada lily +poppy seed +paper mulberry +blackthorn +garrison cap +inductee +aeschynanthus +interior live oak +black spleenwort +wild service tree +sling +nicad +swab +sego lily +eiderdown +fruit cocktail +pallasite +weeping spruce +shiv +sea lamprey +coachman +half binding +American white birch +gainer +Concord grape +yellow birch +fucus +common room +io moth +red osier +crucible +galangal +salmagundi +pepper steak +cap opener +swizzle stick +tomato juice +Nobelist +Sarawakian +African monitor +sleeping beauty +stereoscope +curd +pyramid bugle +applejack +dosser +rake handle +pilot light +Eames chair +Scotch and soda +bell heather +dinette +blackpoll +dogie +sound camera +cattle guard +mashie niblick +edible cockle +monocle +steak tartare +partaker +sidesaddle +communications satellite +porkfish +water hemlock +drawbar +ultramicroscope +Jamaican cherry +craftsman +lovage +common apricot +drum majorette +backsword +smooth alder +Amniota +dribbler +theosophist +dolman +ivory tree +Green Beret +pipe smoker +mayoress +mignonette +crampon +henbane +kirtle +death's-head moth +instep +great St John's wort +lorry +black-necked cobra +ball carrier +Jordan almond +byway +earless lizard +marble +andiron +high-protein diet +buzzer +ice floe +crankcase +Bofors gun +sockeye +veery +Delaware +caravansary +prairie coneflower +star apple +suiting +cot +call forwarding +American gallinule +glossy snake +rose chafer +instant coffee +placket +Tarahumara +pulsar +philodendron +orange tortrix +cypress spurge +Welsh rarebit +music box +giant crab +vanilla bean +water thrush +prayer shawl +gouge +promoter +dagga +black currant +bitter cassava +drain basket +snare +digital audiotape +retainer +olive drab +gluten bread +graham cracker +cheddar pink +caregiver +spray paint +Anglo-American +boatyard +backbencher +Link trainer +bell arch +weir +arbor +millionairess +sour cream +earthtongue +crawlspace +crossjack +balalaika +crupper +western redbud +guinea hen +rangeland +gaboon viper +common louse +single-leaf +horseshoe +balsam poplar +triskelion +jack-in-the-box +jester +rain stick +glove compartment +imperial moth +Japanese beech +biotin +turnip +oligarch +western skink +mudguard +retsina +data system +green bristlegrass +visiting professor +beaded lizard +weathercock +Sloppy Joe +high tea +lightweight +record sleeve +cooler +nodding onion +pigs in blankets +torque converter +district attorney +bunting +orrery +radiator hose +common plum +wood spurge +calamus +chicken Kiev +pin +lath +telephone bell +thistledown +audiotape +gypsy moth +snuffer +pari-mutuel machine +peanut butter +hearthrug +sack +Old World yew +chives +stovepipe +xenolith +mattock +mangle +electric chair +backup system +Empire +blackwash +dodder +Allegheny chinkapin +finger plate +junk +brown rice +wild angelica +chinaberry +mason +rasp +den +violet wood sorrel +nosewheel +plenum +merino +kirtle +Igbo +ensign +sex symbol +Belgian endive +sugarberry +yellow salsify +purple emperor +atlas +African clawed frog +leatherjacket +midwife +sac fungus +European cuckoo +three-day event +Mexican poppy +wagon tire +armyworm +rain gauge +Oregon ash +columbarium +spectrophotometer +Milanese +pointing trowel +casualty +Eastern hop hornbeam +lobe +mouthpiece +au pair girl +giant water bug +Browning automatic rifle +laser-guided bomb +drone +white alder +cockleshell +mufti +gravy +berm +boat hook +marshmallow +pet shop +cowpea +tactician +wading pool +anchovy dressing +flip +shackle +Wedgwood +thick-billed murre +erecting prism +giant salamander +sleeper +quiver +chain store +wing tip +New World tapir +witches' butter +gendarme +ginseng +common maidenhair +graduate nurse +balsam pear +hoatzin +philanthropist +axle bar +gas meter +moth mullein +ragbag +Chinese cabbage +celery stick +rutabaga +scalpel +cape marigold +variometer +argali +brig +shuffleboard +wort +Orlon +epiphyllum +allice shad +coffee filter +solar telescope +Japanese linden +thinning shears +golden wattle +queen triggerfish +millinery +surfbird +flame fish +clove +dicamptodon +red-bellied terrapin +turmeric +baya +air horn +Indian coral tree +punnet +sharkskin +water crowfoot +bight +desert iguana +Texas toad +volva +dredge +Turkey red +chemical plant +gemma +dice cup +orange marmalade +mistletoe +surveyor +frozen orange juice +pallette +poultryman +burbot +courlan +captain +saddlery +bodyguard +dwarf tulip +black ash +pulse +nailbrush +tickseed sunflower +legless lizard +shirtwaist +polling booth +chickeree +garlic chive +common thyme +multichannel recorder +screw thread +sangoma +calliopsis +geoduck +colleen +bandicoot rat +pastis +swamp sunflower +scorekeeper +Honduras mahogany +Australian pitcher plant +triangle +elevator shaft +green pea soup +carrel +prairie aster +bird's-nest fungus +scarlet clematis +gook +mescal button +carcase +mulatto +ejection seat +strawberry daiquiri +goat grass +car battery +babu +chief of staff +monilia +Siberian crab +ridge rope +Morchella semilibera +nutmeg +moosewood +graham bread +California four o'clock +zwieback +velvetleaf +abelmosk +shadow box +corned beef hash +newsreader +backstairs +cutwork +sherbert +tooth fungus +angel-wing begonia +greasepaint +common milkwort +potato vine +CD drive +crepe de Chine +sporting man +koto +armet +barking frog +celeriac +drainage ditch +black box +steel blue +clotheshorse +corn speedwell +drawknife +spritsail +vichyssoise +modeler +pocketcomb +limey +suslik +cockpit +digester +brig +raita +troll +benedictine +rock wren +lock +Barnaby's thistle +school bell +school ship +Soave +falchion +swaddling clothes +terrine +smoke screen +rivulus +sweet lemon +cullis +bustier +peppermint +Philadelphia fleabane +Hampshire +active +charnel house +face guard +Quebecois +facilitator +tongue depressor +bitternut +heath aster +sapodilla +bluestem +centrist +Canterbury bell +needlenose pliers +groats +tapa +Qatari +paper feed +tilt-top table +plastering trowel +brazil nut +rotogravure +patriot +manicurist +bacon and eggs +puffbird +lightweight +golden willow +kaiser roll +duff +girandole +seaside daisy +Kurdistan +Skivvies +showboat +fire bell +lock-gate +greater masterwort +weald +ice ax +toetoe +mess kit +bucking bronco +black turnstone +backscratcher +backpacker +basement +marbleization +trigger +satsuma +fall-blooming hydrangea +mountain lady's slipper +yellow oleander +crookneck +ex-president +Venn diagram +psaltery +bulwark +old boy +linear leaf +aril +butt weld +fall webworm +pruner +bald-faced hornet +nougat +tailgate +field speedwell +potsherd +center punch +long beech fern +desert paintbrush +canyon treefrog +bushel basket +Eurasian +swamp horsetail +cryptanalyst +wicket +school newspaper +captive +spider brake +electric mixer +tumbleweed +mason wasp +sash window +paddock +wet bar +oxtongue +stevia +wheat rust +scute +switch engine +mud dauber +dotterel +snailflower +common barberry +mulligatawny +cinnamon bark +cigar box +trivet +proof spirit +cream soda +western grey squirrel +baby powder +Bren +Japanese yew +sailcloth +Basket Maker +bannock +basidiocarp +aphelion +erect bugle +limiter +bosc +Przewalski's horse +helmet orchid +audiometer +battle cruiser +grass widower +staphylococcus +Congolese +common pitcher plant +parliamentary agent +Virginia snakeroot +mockernut +Siberian elm +backbench +rough +chervil +chlamys +nationalist +galantine +screwdriver +falsifier +cancerweed +spur +jerkin +porte-cochere +dill pickle +Montagu's harrier +tetrode +true fungus +American quaking aspen +vitamin B1 +leopard lily +eggdrop soup +aurochs +core bit +Jaws of Life +trousseau +parquetry +Disciotis venosa +tender +beef goulash +vitamin K1 +pepper spray +covered smut +hook +sports announcer +weapons carrier +foxtail grass +sloe gin +mezereon +antifouling paint +pavior +pile driver +security consultant +monkey-wrench +Indian hemp +amaretto +American wistaria +A-line +market strategist +rainbow runner +souvlaki +binturong +stiletto +gastrula +Vietnamese +Old World hop hornbeam +cold cathode +pier table +houndstooth check +prop root +leaf-footed bug +sedge wren +Dutch iris +drop curtain +opossum rat +lame +pollen tube +doubletree +compression bandage +pinon pine +catmint +pier arch +kingmaker +deanery +loofah +fullback +fencing mask +flying boat +carpet sweeper +lemon-scented gum +Accipitriformes +kit +pigfish +clipper +dolmas +lesser centaury +blood agar +water violet +raw milk +lemonade +vicar-general +supply closet +Anzac +confectioner +ignition key +velvet grass +white willow +John Dory +ruddiness +wheel +common horsetail +hubbard squash +speculum +Spanish bayonet +mountain mint +glint +foxhole +housemate +bootjack +sleigh bell +clog dancer +Mexican mint +rendering +Hausa +star saxifrage +spring squill +clothesbrush +liquid metal reactor +Columbia tiger lily +sorrel +cartwheel +Jersey +Caucasian walnut +desert willow +surveyor +elbow +Santa Gertrudis +fringe bush +industry analyst +lyrebird +Cortland +arroz con pollo +catechist +tank top +jew's harp +cereal oat +heartleaf +short sleeve +butty +butterfly plant +stud finder +felloe +beer garden +clevis +wood warbler +demerara +cornetfish +mince +Jamaica rum +Spanish broom +binnacle +camise +ferrule +Copt +hall +minicar +scimitar +cryptogam +miter box +limestone fern +Marsala +Parliamentarian +gravy +woolly bear moth +formula +squash bug +pigmentation +plate +skin graft +radiotelegraph +hellbender +soft pedal +lavender cotton +propagator +Bailey bridge +cottage pie +rotgut +A battery +pintle +off-line equipment +European swift +shrimp butter +plumb bob +trunk lid +succotash +yellow cypress +heartleaf +antelope squirrel +sambar +maternity ward +deciduous plant +bartlett +Riesling +sour cherry +Klansman +poke +academician +sociolinguist +bird's nest fern +common privet +scale fern +tachograph +oyster stuffing +pusher +green June beetle +staghorn sumac +lockage +master +bap +harlequin +blackfly +spotted coral root +kahikatea +cabana +riot gun +apple mint +kob +praline +confidant +pahautea +float +city father +Zen Buddhist +pessimist +conference center +banksia rose +comfit +sweet cicely +winged bean +henroost +myope +bunt +nailfile +yellow mountain saxifrage +cruise control +abandoned ship +water chinquapin +spanker +wing nut +puccoon +pier glass +Atlantic sailfish +medlar +buttercrunch +rough-skinned newt +planter's punch +Dutch iris +control key +committeewoman +torpedo-boat destroyer +garambulla +tree heath +gladiator +September elm +inclinometer +snowbell +call-in +sunsuit +microfiche +bluestocking +cheval glass +server +franking machine +sugar syrup +Macoun +transport ship +alderfly +wash-and-wear +Abbe condenser +bush nasturtium +wild leek +canary seed +Northern Baptist +sweet wormwood +jaboticaba +cardroom +autoradiograph +ash-pan +sprinkler system +rattrap +claymore +parts bin +forest red gum +thermonuclear reactor +Indian crocus +lector +heir apparent +leafy spurge +masquerader +varicella zoster virus +cucumber tree +hedger +Shumard oak +zooplankton +quartermaster +arrester +bridge +hop clover +meadow foxtail +winter hazel +portable circular saw +penuche +limpa +blue toadflax +mesophyte +Alpine anemone +pet sitter +avocado +streptococcus +fiber optic cable +river red gum +hornist +chicken taco +red spider +tape grass +densitometer +salmonberry +tiger snake +hot toddy +silver fern +candlenut +buckram +local call +defoliator +king +mahoe +lever lock +social insect +winter purslane +bootblack +fireball +ramie +bellbird +prepuce +capote +Chinese forget-me-not +Pisces +costume +California black oak +tree lupine +golden polypody +liger +California whipsnake +urodele +sapodilla +skillet bread +duckpin +supremo +asparagus bean +kampong +endameba +cow pony +rider +motherwort +Persian iris +soursop +kohlrabi +Parisienne +irons +doubles +feijoa +farmplace +cottage cheese +bezoar goat +subcontractor +blunderbuss +down +purple martin +Lapp +crenate leaf +tobacco pouch +beach towel +Santa Lucia fir +monetarist +stringer +ocellated turkey +Texas purple spike +ackee +caddy +hedge mustard +second-rater +strawberry bush +valedictorian +steak sauce +prairie gourd +aspirant +mint +Valenciennes +vodka martini +American persimmon +big brown bat +Mycenaen +mouthpiece +norfolk island pine +pennyroyal +Jewish rye bread +granadilla +tract house +wall +shuttle helicopter +blackjack oak +Lippizan +storm window +white zinnia +sickle +sushi bar +polish +baldric +brooklime +church hat +control circuit +vicuna +death adder +eukaryote +durmast +field soybean +jacket potato +wild basil +queen consort +brooklime +octant +blue false indigo +broccoli raab +step-down transformer +date bread +blue ash +duffer +oak chestnut +pennant +wedge +Florentine iris +morion +weakfish +morning dress +public address system +spearmint +Ashkenazi +sow +interpreter +Metis +pita +iron lung +parfait glass +cylinder lock +immortelle +obstetrical toad +tee hinge +successor +western +working girl +julienne +AND circuit +spaghetti junction +fer-de-lance +enlisted woman +star +lightning rod +bilge pump +pacer +horse nettle +African oil palm +blastocyst +air hammer +bamboo fern +remote terminal +lambkin +money cowrie +Pelham +clinical thermometer +wiggler +guru +false indigo +tea bag +foredeck +king +baby shoe +mule +grab bag +silver-bell tree +knitting machine +cobia +roulette ball +larder +button pink +rumble seat +noria +queen mother +solar thermal system +aquaplane +highbrow +rusty blackbird +desktop +lima bean +pontoon bridge +watercress +wild cabbage +tumbleweed +dressing sack +compact-disk burner +spittoon +marrow +sporophyte +second fiddle +pot-au-feu +specialty store +dry +mole +khadi +japonica +lovage +squamous cell +lobe +European creeper +brown pine +bladderpod +rumble +French Canadian +mascarpone +Pacific halibut +perennial ryegrass +wine lover +turbot +longwool +silver tree fern +dust cover +synchromesh +corn pudding +alpine azalea +garboard +cane sugar +observation dome +condensation pump +hind +taximeter +hand drill +gas thermometer +jammer +buffing wheel +handstamp +prairie mallow +turkey stew +sun spurge +duck pate +kibble +Cassin's kingbird +apadana +Devon +grinner +oocyte +blank +header +schoolmaster +guard ship +intravenous pyelogram +rimu +luff +Mediterranean fruit fly +singlestick +lady-in-waiting +curb +birch +limekiln +orthoscope +serotine +Spanish oak +swamp cottonwood +edger +city man +picnicker +white basswood +Parsons table +Christmas begonia +perspirer +Pacific tree toad +Cape tulip +finger bowl +blue pike +greengage +handcar +milkweed +potbelly +river dolphin +creel +typewriter carriage +banteng +pawnbroker's shop +huon pine +biennial +man of action +foundress +caveman +featheredge +jordan almond +sandblaster +coralberry +low-calorie diet +hoot owl +garter +bain-marie +wrecker +fenugreek +double-hung window +idol +scullery +balloon vine +summer savory +winged spindle tree +Helvella crispa +walrus mustache +gas engine +boulle +rush grass +rue +hoe handle +cat fancier +deerstalker +dunker +American red plum +fall dandelion +groover +sprag +stair-rod +wish-wash +pricket +architrave +California laurel +net melon +Arizona sycamore +executive secretary +silverweed +silky cornel +surface ship +square sail +common purslane +villa +holly-leaved cherry +sweet birch +pecan +artillery shell +breast pocket +pirogi +scarlet runner +rabbit brush +mealworm +leather carp +palette knife +Jerusalem sage +boneshaker +slit lamp +digital voltmeter +polar glacier +square-rigger +homogenized milk +Sten gun +lesser calamint +pyrograph +Korean lawn grass +Zinfandel +crepe fern +western ragweed +clasp knife +distributor housing +cartouche +scooter +ski parka +jackknife +Carolina spring beauty +soft diet +candlesnuffer +horse trader +step stool +agouti +accelerometer +annual fern +judge advocate +angelica +roll film +treehopper +ombu +comer +sultanate +kitchen help +hooded ladies' tresses +milking machine +knuckle joint +Jamaica honeysuckle +music teacher +sauerkraut +Weston cell +slivovitz +Worcester sauce +tall bellflower +chancery +prophetess +casquet +shortfin mako +sorus +visual display unit +asp +grenadier +black pepper +crottle +erasable programmable read-only memory +jabot +ratchet +disk controller +chief petty officer +tap wrench +white mountain ash +cultivated rice +flying phalanger +skillet corn bread +BB gun +Elamite +European red elder +reed rhapis +ciderpress +inga +torpedo +wild teasel +bean curd +oeil de boeuf +acuminate leaf +bitter lemon +hitchrack +Lorraine cross +hostess +European dogtooth +adz +polonaise +rock sandwort +Waldorf salad +myrmecophile +klystron +mole rat +draba +corn borer +robusta coffee +chub mackerel +leatherleaf +chronometer +Moselle +sea aster +fennel +slop basin +constable +Brunswick stew +hydraulic pump +French omelet +icebreaker +Manx shearwater +press of sail +ninepin +blue succory +bootstrap +hallstand +chit +firefly +bearded seal +fuel filter +jezebel +mate +Roquefort +cheesecloth +plasterer +blue pimpernel +lake dwelling +shrink-wrap +goat cheese +common gum cistus +coastland +Sunday best +wild tobacco +mandrake +common unicorn plant +barbican +culotte +blockhouse +German iris +tarragon +caramel +wild rosemary +grain +voyager +squirting cucumber +eastern narrow-mouthed toad +creeping fern +luge +saffron +garland flower +furnace room +starship +Oriental scops owl +Italian honeysuckle +berserker +Chinese elm +scrubber +bishop pine +French polish +compromiser +skimmer +river shad +lobster thermidor +leadwort +man-of-the-earth +razorblade +vicegerent +empress +link +ham and eggs +wild lily of the valley +blackfish +splicer +fossa +mara +moneygrubber +brachiopod +fauteuil +caldera +finish coat +croupier +termer +leopard's-bane +sei whale +molucca balm +dolly +dog food +term infant +soft roll +episcia +sewer +inquiry agent +active citizen +perry +California newt +moon shell +bladderwrack +common shrew +dill +Dutch elm fungus +key lime +electrometer +divorce lawyer +lamb's-quarters +apple turnover +shipmate +Guernsey +legionnaire +electric blanket +Rocky mountain pinon +tobacco mildew +stinking iris +forestiera +departure lounge +wiper motor +jurist +scarlet runner +pallbearer +batter's box +inertial guidance system +fines herbes +oilcan +sisal +mustache cup +steamed pudding +Visayan +fiesta flower +lady tulip +lungless salamander +batiste +electrical system +blazing star +car carrier +Walloon +mother hen +stump +mulled cider +secondary coil +Alexandria senna +etui +scrumpy +Havasupai +jawbreaker +glume +ex-husband +Eskimo +Joint Direct Attack Munition +number theorist +five-hitter +pinstripe +Olympian +common mackerel +stone bass +bigos +Bahraini +airbrush +great ragweed +glass lizard +hand fern +roundel +riding master +shoetree +yellow avens +old fashioned +dolman +stinger +nursling +legate +faille +golden fern +bedpost +shop steward +kidney bean +bladderwort +internist +limeade +Bruneian +Coloradan +playsuit +wintergreen oil +Cantabrigian +mutton snapper +shot putter +hand grenade +moccasin +cobnut +marrow +separatist +cockscomb +discharge pipe +Gabonese +spade bit +chicken cordon bleu +varnish tree +European wood mouse +striped gentian +Ayrshire +curassow +moo goo gai pan +malarial mosquito +glow tube +ledger board +bib-and-tucker +European chestnut +suffragette +color wash +gaffsail +golden larch +voting machine +Kahlua +lungi +amusement arcade +Uzbek +butternut +mold +mule's ears +dickey +shrimper +trophozoite +dreadnought +shepherd's purse +greenhouse whitefly +spotted gum +copperware +perfect game +semigloss +spawn +telecom hotel +stakeholder +mason wasp +flibbertigibbet +chin strap +fringed pink +saki +urchin +memorizer +roulade +whiting +cling +corncrake +Queen of England +choo-choo +empty +heating pad +playmate +visualizer +popcorn ball +absconder +sou'wester +target acquisition system +mock-up +dental floss +tray cloth +haddock +bulblet fern +housing commissioner +delayed action +anchor light +harbor porpoise +water wings +PT boat +night latch +fennel +doorframe +green-tailed towhee +grey polypody +torture chamber +American germander +Chinese wistaria +cattalo +accompanist +rifleman +alpine clover +contrarian +lemon peel +Mexican cypress +sprog +dado +Galilean telescope +desmid +lockup +Latin +American raspberry +mescal +butternut +prairie orchid +downy yellow violet +green hellebore +radio compass +bread and butter pickle +Cherokee rose +knish +destroyer escort +Arkansan +langlaufer +pyxis +winter savory +velocipede +motley +winter savory +law student +barren ground caribou +apple dumpling +field hospital +works +city editor +European flatfish +Morchella crassipes +life office +boot camp +cream sauce +cape aloe +acetate disk +devil ray +tile cutter +Plymouth Rock +microspore +godown +Syrian +tiercel +American cranberry +lesser spearwort +anopheline +Spanish oyster plant +wire cloth +attic fan +birch beer +small computer system interface +crook +ribbon fern +explorer's gentian +nagami +I-beam +rosebud cherry +Jerusalem artichoke +Stillson wrench +pluralist +district manager +Levantine +orangeade +part-timer +post horn +Oregon grape +contadino +cargo helicopter +silverpoint +chaja +California bluebell +case +Shasta +cheese cutter +Leishmania +avalanche lily +iron horse +bialy +Yana +Delawarean +Prussian +nonpareil +hammer +hoper +chewink +anil +skim milk +desert four o'clock +crescent wrench +white marlin +blue jasmine +malacca +anadama bread +purple poppy mallow +ganglion cell +ligature +no-parking zone +golden clematis +Cotswold +aliterate +shebeen +yardarm +superbug +fanaloka +stinking cedar +spirochete +wort +pater +heaume +thermocouple +ironing +naval tactical data system +European goatsucker +prairie cordgrass +accused +foreign agent +halberd +western mugwort +esthetician +Persian lilac +cracked-wheat bread +crosscut saw +rock penstemon +paper cutter +crematory +ideologist +cattley guava +margarine +creosote bush +hoary plantain +spark gap +lumberjack +Greek valerian +mission bells +tight end +bigeye +large crabgrass +stone marten +cleat +lentil +bay scallop +lector +charger +assemblywoman +second lieutenant +boil smut +sarsaparilla +hydromel +cat flea +pinfish +whole milk +hairnet +myeloblast +peasant +blind curve +first offender +dwarf-white trillium +Brother +coatdress +gun emplacement +tamarisk gerbil +snap +air cushion +trailing edge +potato vine +gig +everlasting pea +champion +dibble +rattail cactus +timothy +prince's-feather +cutlas +lockring +sealing wax +Brussels lace +corn mint +highboard +she-oak +wild celery +pillar +Burberry +Hakka +leucothoe +bell tent +gallery +coontie +leather fern +smack +adenovirus +linoleum +chain wrench +tammy +gas fixture +nut bar +baneberry +butterscotch +goat's rue +bullock +grey snapper +mother-in-law +hyson +wayfaring tree +mollie +needle spike rush +buckwheat +bayberry +brush-tailed phalanger +dry rot +harborage +stormy petrel +Oriental beetle +Atlantic halibut +coping saw +simple fruit +viscose rayon +surgeonfish +upstairs +security system +common ragweed +verticillium +pancake batter +hawk's-beard +Dutchman's-pipe +refrigeration system +European parsley fern +Ivy Leaguer +totalitarian +gonococcus +towhead +showy sunflower +pallium +multiengine airplane +hair trigger +rabbit-eared bandicoot +siskiyou lewisia +fuel system +flat arch +broad beech fern +Alpine lady fern +bracken +Kentucky black bass +rut +mountain maple +tunaburger +umbrella fern +white-headed stilt +meat hook +panhandler +washhouse +barnyard +safety lamp +leg +ripple mark +paper +sagebrush lizard +light heavyweight +common nutcracker +operator +stalking-horse +horseless carriage +fishhook +suction cup +peg +Ungulata +false teeth +round-bottom flask +Luba +campaign hat +firebox +rudder +parapet +ice pack +appellant +spirit stove +metheglin +common bamboo +soapwort gentian +pannikin +time capsule +burn bag +folk poet +tropical prawn +end man +new caledonian pine +linen +web +free trader +jury box +railing +pignut +leaker +potboy +rubber boa +white snakeroot +plumber +Candida albicans +surfboat +woman +promulgator +eyecup +wild China tree +rattlesnake master +Viyella +alpine salamander +ailanthus silkworm +Albatrellus ovinus +war room +meadow vole +robotics equipment +rotary actuator +Engelmann spruce +pinesap +beefcake +native speaker +ridge +injector +water chute +salmonberry +decoupage +bottlebrush +date plum +circlet +American mountain ash +pocketbook +horsemint +sweet four o'clock +kirpan +pinto bean +chervil +equator +range animal +candy thermometer +calanthe +cul +stipendiary +brahman +pelican crossing +topgallant +wild senna +sliding window +carrier pigeon +Tatar +quadruplet +bumboat +spearmint oil +slip clutch +young Turk +golden yarrow +shank +glasswort +dental plaque +Manduca sexta +Northern bedstraw +dent corn +Life Saver +western wall flower +bedder +wherry +Tuscarora +scrapple +borstal +reflux condenser +problem solver +nondriver +perforation +eastern cricket frog +white wood aster +broad buckler-fern +Cape primrose +herringbone +head louse +earl +baton +recording system +primary color for light +cherry laurel +pomfret +ratafia +chocolate milk +obscurantist +revisionist +rood screen +magnetic needle +commensal +oil tycoon +celebrant +domicile +harvest mouse +California nutmeg +greater spearwort +black-billed cuckoo +winepress +demographer +straw boss +diabetic diet +sweetmeat +rabbet +ming tree +basketweaver +freestone +walk-in +Aryan +box coat +audio amplifier +chicken salad +churidars +whydah +box +batman +siren +selectman +gouger +drip coffee +Caesar salad +interpreter +whinstone +grey goldenrod +minicomputer +honey crisp +hypercoaster +Irishman +swamp white oak +reed canary grass +globeflower +cynthia moth +fennel seed +canthus +chino +blind date +tar pit +watermelon begonia +fishtail palm +overcast +Pearmain +primary color for pigments +coal seam +wherry +safety bolt +cretonne +Michigan lily +inflater +moneybag +huckleberry +brassard +bush vetch +looking glass tree +pinwheel roll +alfalfa sprout +sea kale +clinometer +achira +lorgnette +potter wasp +gilded flicker +tody +capulin +captain's chair +crackle +gerardia +prie-dieu +venture capitalist +New Jerseyan +block and tackle +elf cup +bur reed +automatic transmission +wax palm +flytrap +crack willow +coachwhip +swizzle +lugger +Dewar flask +baster +oxyacetylene torch +Culex quinquefasciatus +St Peter's wort +wild hyacinth +Russian almond +burrfish +wintergreen +katsura tree +butcher knife +perfumery +thresher +porte-cochere +sheepwalk +hypotenuse +Dalmatian iris +buttercup squash +demiglace +goldenseal +preceptor +rigger +poikilotherm +old-age pensioner +posthouse +wood horsetail +repeater +reciprocating engine +Rambouillet +terra cotta +togs +battledore +horizontal tail +missile defense system +trier +morello +woolly adelgid +munition +double creme +in-fighting +squirrel corn +crow's nest +antler moth +brake cylinder +bandoleer +noticer +Parmesan +hipline +cheapskate +Dubonnet +mole rat +bog aster +ribbon tree +meadow rue +nard +ratel +loose smut +snapping shrimp +golden glow +basil thyme +Florida strap fern +moonshine +flume +lace fern +black bream +orchestra pit +archerfish +exile +ringdove +career man +godfather +bottom-feeder +pasteurized milk +dental implant +pedicel +Catalpa speciosa +yellow foxglove +lancet arch +steam shovel +sampan +patrol boat +sailor cap +tollgate +monal +velociraptor +cacique +jack oak +cursed crowfoot +creep +Parry manzanita +common matrimony vine +grace cup +caecilian +spurge laurel +prickly lettuce +Regius professor +camail +Sitka willow +Courtelle +gin sling +dogmatist +guest +saltine +dust cover +sport +sweeper +feist +lady's-eardrop +vibist +wire stripper +tenpin +interplanetary space +beet green +pruning knife +drainage system +gunnery +ballet master +lime juice +flak catcher +lacrosse ball +Canadian aspen +beatnik +railhead +utilizer +spadefish +Arizona white oak +city university +dense blazing star +hedger +chain pickerel +right-hand man +namby-pamby +nacelle +redneck +tumbler +Chief Secretary +cannon +cupola +kummel +papaya juice +Burton +Stanley Steamer +loganberry +stylus +square meal +rock bass +western ladies' tresses +dramatist +assignee +tandoor +trumpetwood +segregator +green adder's mouth +coral necklace +ani +iceboat +densimeter +oxtail soup +kernel +cos lettuce +greenishness +panchromatic film +Parker House roll +oatmeal +backsaw +double Gloucester +bailey +storage cell +giant +coconut milk +broadtail +barouche +loir +soybean meal +white-leaved rockrose +junction barrier +spandrel +sweat bag +goldilocks +flowering wintergreen +cockspur +beef fondue +holding cell +cardamom +cagoule +Kamia +tangelo +Herschelian telescope +wine bar +kachina +sand sage +guy +ivory palm +citrus mealybug +topper +ladyfish +force pump +fanion +calaba +Iowa +orrisroot +ivorybill +Secretary of Agriculture +gagman +dry cell +hypnotist +kenaf +grey alder +deathwatch beetle +gagman +magnetic stripe +trap door +abdominal wall +prefab +broomcorn millet +architeuthis +angler +Pacific giant salamander +barbette carriage +low-fat diet +veal scallopini +B battery +wallah +landing flap +pistachio +jaguarundi +nagi +cicerone +felt fungus +Aertex +stocks +smooth aster +patchouli +lemon sole +sleeper +basket fern +dundathu pine +anjou +Moreton Bay chestnut +broom sedge +candid camera +red angel's trumpet +oilstone +cinnamon toast +Pacific walrus +fruit custard +Jehovah's Witness +mate +voyeur +Esselen +achromatic lens +sanguine +brine shrimp +dunce cap +swot +transit instrument +grey willow +pack +bench clamp +Nova Scotian +gadgetry +silvery spleenwort +enchantress +rough fish +morula +giant taro +sorus +roux +polyhedral angle +spruce beer +Chicano +cola extract +outfielder +kohleria +white-rumped shrike +car-ferry +subway token +spoon bread +totara +corn borer +bowhead +tensimeter +water scooter +flickertail +Catholicos +pleaser +blue-eyed Mary +calabash +handyman +cascades frog +facing +scarlet oak +lutist +ginger +tree tomato +Harvey Wallbanger +tent peg +insectivore +fusil +swale +chinning bar +bladderpod +New Dealer +dhoti +proscenium arch +common vetchling +channel +collect call +safflower +Texas tortoise +test equipment +theca +RAM disk +sheep sorrel +rammer +buttonhook +honey mesquite +dominus +babirusa +queen +Aspergillus fumigatus +crash barrier +nonmember +Muscovite +verdin +Australopithecus afarensis +Turkish Delight +stalked puffball +giardia +divider +mountain skink +head smut +pacemaker +evaporated milk +rattlesnake fern +flamethrower +navy bean +bather +steed +showy orchis +stone crab +artichoke heart +phantom orchid +space helmet +swamp laurel +privateer +junior +surcoat +bristlegrass +flower girl +aphid lion +penthouse +lemonade mix +coude telescope +natal plum +scriber +wood nettle +rape suspect +resplendent quetzel +western poppy +choir loft +fore-topsail +thyme-leaved sandwort +erotic +short circuit +outdoors +flowering tobacco +hookup +aviatrix +corker +horehound +horn +swamp pine +water biscuit +cherimoya +vaporizer +courtier +European sole +full skirt +Mother Carey's chicken +cymule +huck +white snapdragon +mountain nyala +country borage +bonduc +casein paint +grampus +shrimpfish +lodge +dragee +black walnut +caraway seed +roper +glass cutter +tab key +Richardson's geranium +demigod +chichipe +Italian ryegrass +cadet +electrograph +rudd +carpenteria +foie gras +lignum vitae +hedge nettle +pledger +American hackberry +flageolet +beaked hazelnut +reflectometer +sticky geranium +marriage bed +white pepper +japanese clover +whiteface +gnat +extrovert +Canada plum +talipot +chicken stew +egg foo yong +fraxinella +skibob +saucer magnolia +jacket +green smut fungus +cloakroom +landing skid +booth +ice milk +dipole +striped coral root +red buckeye +roughcast +breaststroker +cowherb +razor clam +first-aid station +briarroot +clambake +lander +Bramley's Seedling +frail +jird +minisub +luging +poison milkweed +European lobster +epidemiologist +spandex +paloverde +marumi +bypass condenser +punter +petty spurge +Coryphaena hippurus +bilberry +vermillion rockfish +witness box +viscometer +pulque +Massachusetts fern +herring salad +ridge tile +mesa +dwarf grey willow +southern aster +punch pliers +tarnished plant bug +hoop pine +Japanese red pine +benedick +rebozo +silver plate +silver willow +mouse-ear hawkweed +bonito shark +abutment arch +noble cane +tiger rattlesnake +pongee +jumping plant louse +pattypan squash +giant ryegrass +railroad bed +stiff aster +imperial Japanese morning glory +laundry +winter cress +large white petunia +tea maker +pen-and-ink +early warning system +lug +monocot +sea wormwood +breechblock +postage meter +third rail +Mongoloid +Australopithecus boisei +umbrella tent +stirrer +Dumpy level +beroe +post and lintel +green spleenwort +tomato paste +dishpan +stentor +sweatband +cobbler +New York fern +gaff +prairie willow +cyclops +jigsaw +rotavirus +pallet +eastern ground snake +boiling water reactor +acute triangle +agora +European cranberry +roebuck +surgical dressing +busboy +cannikin +feedlot +common pond-skater +cochin +horsehair lichen +fetter +sapote +fichu +dermatologist +fire tongs +creme anglais +foster-mother +laurelwood +chicken snake +mincemeat +rocker +wild spinach +powder and shot +butterwort +auxiliary engine +mamey +hart's-tongue +sucking pig +American turkey oak +troopship +buttermilk +divi-divi +boatswain's chair +soda fountain +southern flying squirrel +elastic +cutaway +housekeeper +renegade +apple rust +bridoon +machicolation +stunt +keyhole limpet +personality +solitary vireo +epidendron +Jihadist +boffin +bettong +terror +partial denture +pusher +saltcellar +capstan +large poodle +Bibb lettuce +low-bush blueberry +staple +banded krait +sickroom +barnyard grass +wandflower +woodworm +bluegrass +squirrel's-foot fern +rabbitfish +delta wing +milking shorthorn +limber pine +guru +gamine +scythe +sweetsop +Gruyere +bloodmobile +mine detector +American mistletoe +silver beech +hound's-tongue +Lombardy poplar +basket fern +pink-and-white everlasting +redtail +Aladdin's lamp +mace +outtake +condensed milk +Canada wild rye +silver perch +waxflower +taxer +Chinese chestnut +Our Lord's candle +mugwump +school system +salp +osso buco +dress shirt +butterweed +low-fat milk +couchette +broomcorn +proscenium +mill agent +smut grass +humpback +southern spadefoot +military leader +canebrake rattlesnake +tailor-made +ebony +beach house +flying gecko +hoary alison +typhoid bacillus +Romanov +vanilla pudding +sweet cicely +Spodoptera exigua +dress rack +flannel +skipjack +bolognese pasta sauce +rooibos +thunderer +blessed thistle +gauntlet +mahatma +granadilla +laurel sumac +Yuma +thyme-leaved speedwell +encyclical +twill +linocut +manna gum +spark arrester +cocklebur +Indian hemp +lemon oil +Hall's honeysuckle +raceway +flop +Himalayan lilac +one-flowered wintergreen +photosphere +silvery spleenwort +convex polygon +canarybird flower +foster-sister +fluffy omelet +palanquin +roll +dandelion green +Javanese +workpiece +Carmelite +bread mold +schlemiel +wild lily of the valley +grugru +solenoid +puff batter +skep +balance wheel +Gadaba +portia tree +mobcap +two-man tent +scuffle +firebrat +ant lion +anise +caster +giant petrel +American water spaniel +naboom +treasure ship +foster-son +fiddleneck +alidade +sugar refinery +wild oat +water beetle +generic +damson plum +abrocome +detainee +pitch pipe +coast +nilgai +radiotherapy equipment +heart-leaved aster +gristmill +grocer +Appaloosa +Cheviot +brake pedal +lantana +cave myotis +Rob Roy +sea spider +latrine +carpophore +recycling plant +coondog +brace and bit +funambulist +eggar +mantelet +postdoc +mezzanine +coco plum +pulse generator +high-vitamin diet +menhaden +mechanical engineer +bergamot mint +Chuvash +grated cheese +helicon +belladonna +beet armyworm +eelgrass +resuscitator +interrupted fern +arrow grass +cistern +Pacific herring +colostrum +journal bearing +Fauve +wrist pin +canape +choice morsel +quadraphony +guard boat +shortgrass +claymore mine +hitching post +cargo door +decoder +gym rat +Cocopa +commander +apple of Peru +seckel +yellow goatfish +dog flea +dodo +oconee bells +Tudor arch +turkey stuffing +ebony spleenwort +wheat flag smut +scolopendrium +Brazilian pepper tree +gusset +inspector +lunar excursion module +baron +plantigrade mammal +Creole +phosphate +aromatic aster +ghee +audiovisual +onychophoran +cotton stainer +lieutenant junior grade +spheroid +amen corner +caper sauce +Caladium bicolor +dyer's rocket +seaside goldenrod +flint corn +Very pistol +rotifer +steeplechaser +rouleau +escape wheel +Namibian +millivoltmeter +emmer +climatologist +agateware +sea lyme grass +inclinometer +water fennel +saddle seat +vicar +garden cress +ski rack +Norfolk jacket +casaba +coast rhododendron +sericea lespedeza +hematocrit +autopilot +tilter +finish coat +Pennsylvanian +shrubby St John's wort +podocarp +percussion cap +ceriman +peanut bar +gean +jack +durra +rotor +carob +cottage tulip +three-spined stickleback +trencher +elevator +kalumpang +abaca +Australopithecus robustus +active matrix screen +water bed +hatmaker +lodestone +cat food +overcup oak +balletomane +popgun +rheometer +process cheese +frog legs +heartleaf arnica +p-n-p transistor +steam turbine +Tulu +scalene triangle +licorice fern +coffee break +trade unionist +starved aster +firing pin +water gum +Masonite +hairspring +seminarian +blue racer +forecastle +scrub pine +Atlantic spiny dogfish +kopje +orphrey +fan tracery +gee-gee +vixen +interstellar space +Harris Tweed +sawmill +lemon mint +bitewing +ringlet +Chinese mustard +paleontologist +American hazel +brigantine +clay-colored robin +zombie +nectarine +West Indian jasmine +pineapple weed +rusher +gynecologist +pole +thylacine +myrtle beech +golden cup +woodruff +T-bar lift +terebinth +service club +homegirl +Blue Mountain tea +figwort +New Hampshirite +Stayman +tonometer +white turnip +messuage +cruet-stand +colliery +connecting room +lesser twayblade +bland diet +crown prince +beggarwoman +restharrow +bower actinidia +firebug +hepatic tanager +telegraph +Spodoptera frugiperda +spackle +carpenter's square +pyx +supermom +thickhead +whorled milkweed +Arctic char +Chinese rhubarb +pince-nez +wolverine +tomato concentrate +cascarilla bark +red underwing +leather flower +Jerusalem thorn +bullpen +Salisbury steak +anode +coffeeberry +bottling plant +fritter batter +aerial torpedo +matrix +local oscillator +stalked puffball +bruin +three-cornered leek +wassail +stabling +damping off fungus +myriapod +osier +lesser kudu +cownose ray +chokecherry +wagon +obstetrician +Glengarry +even-pinnate leaf +wine sauce +osteocyte +baker's yeast +heir presumptive +blackjack +tympanist +golden fern +fipple +Japanese oak +bar mask +stamping machine +argus +knobcone pine +oil beetle +lanai +upper berth +condenser +proctologist +catechu +wild spurge +vestry +ground snake +proton accelerator +walker +scarlet bush +transom +lagging +bouillon +slender loris +black currant +developer +football hero +plum sauce +striped mullet +prince charming +fictional animal +prosimian +lug wrench +lemonwood +kirsch +spy satellite +black caraway +Thompson Seedless +bead tree +purple fringeless orchid +Virginia strawberry +chigetai +punkie +gall wasp +addressing machine +rock polypody +good-king-henry +spring cankerworm +wimple +noncandidate +saskatoon +hacienda +Darjeeling +snowberry +lounging pajama +ascospore +ski-plane +hedgehog cereus +Welsh onion +yautia +coaster brake +sickle cell +parrot's beak +fuller's teasel +painted greenling +scablands +stuffed cabbage +barrel organ +etcher +dwarf maple +camp +Australian blacksnake +currycomb +obtuse triangle +rose gum +psychrometer +abridger +torpedo +carpet loom +sodalist +slender rush +loligo +sclerometer +wimp +dotted gayfeather +green ash +pinstripe +moralist +medusa's head +garden centipede +heath aster +fool's parsley +olla podrida +Potawatomi +Edam +toothache tree +hulk +seabag +narthex +compartment +prairie star +lookdown +B-flat clarinet +event planner +clip lead +shirting +milk punch +supercharger +macadamia nut +giant coreopsis +computer store +martingale +keyboard buffer +summer flounder +squash ball +gas turbine +object ball +plier +black mulberry +reef squirrelfish +scampi +willow aster +bowler +striped marlin +smooth muscle cell +diplodocus +Liberty ship +sponge cloth +guitarfish +walking leaf +showroom +California bluebell +bolo +turnbuckle +boysenberry +hardware +Gael +imago +endorser +jujube +dust bag +rapporteur +field wormwood +low-water mark +naval missile +Pacific yew +reversible +crabapple jelly +poniard +barricade +spawner +simnel +seltzer +deckle edge +needle +timbale +satellite transmitter +organization man +job candidate +orderly +native cranberry +fir clubmoss +coaming +chartered accountant +electron accelerator +Sierra plum +American foxhound +long underwear +Penobscot +blueberry yogurt +biretta +cascara +Paranthropus +Dorian +nun's habit +lenten rose +Augustinian +designer +northern phalarope +mombin +hazel mouse +reeve +waffler +telegraphy +Verpa conica +ignition coil +Japanese oyster +S-shape +divining rod +ant thrush +throat protector +interlocutor +Desmodus rotundus +pere david's deer +attenuator +Cypriot +red sandalwood +pendulum watch +broadcloth +striped drum +sequence +safety arch +diapensia +hog +western spadefoot +chlorella +comb-footed spider +Chechen +darning needle +C-ration +hard beech +piano action +scaling ladder +Nepal trumpet flower +ravigote +screw wrench +ramekin +Lyonnaise sauce +dinner napkin +partial veil +masseuse +coatrack +mooring tower +blue-eyed African daisy +English horn +baton +rope tow +toll bridge +massage parlor +quark cheese +lounging jacket +tall goldenrod +flying jib +coordinate axis +barley-sugar +integrator +worm gear +captain +sweatshop +class +layer +chili powder +dripping pan +oatcake +newsroom +tadpole shrimp +rake +trade magazine +silks +ram's-head +senior +knower +masseur +yam +peg +wheel tree +hardbake +test room +long-spurred violet +creeping spike rush +shrapnel +coffee senna +matchbox +creeping soft grass +welder's mask +pickaback plant +urial +hooded pitcher plant +incense cedar +Ohio buckeye +ant cow +skeleton fork fern +Indiaman +swamp ash +testatrix +marang +spherocyte +Winesap +Indian mallow +teju +Yersinia pestis +dye-works +sauerbraten +coral bean tree +safe house +postulator +eyas +lotus +wood vise +lady-of-the-night +East German +cymling +rock candy +western omelet +anoa +rainbow seaperch +crossover voter +Finn +tree shrew +hog plum +Federal +shagbark +clockwork +Alexandrian laurel +metal wood +brill +military chaplain +trend-setter +call-back +Indian rat snake +spurred gentian +Japanese maple +forest goat +bee moth +viola da braccio +duckboard +armyworm +hangnail +counterbore +cream-of-tartar tree +Mullah +bonbon +water hazard +temple orange +corporatist +rough bindweed +Turkish bath +mistletoe fig +beach sand verbena +caddisworm +English plantain +brown Betty +power pack +lion's-ear +Francis turbine +stayer +dichondra +marsh St-John's wort +squab +energizer +common horehound +mantispid +pullback +handwheel +spark arrester +yakuza +Virginian witch hazel +grunter +waterworks +bondwoman +chain printer +stockjobber +coconut milk +yardgrass +blue chip +bridle path +riser +pleurothallis +saltwort +salal +broadside +blackboard eraser +bastard +Para rubber tree +red bat +digital-analog converter +calabash +cashier +cow shark +horned pout +microphage +monologist +woolly monkey +Illinoisan +marsh horsetail +distaff +siris +eparch +gooseneck loosestrife +sounding rocket +multiprocessor +saiga +xerographic printer +madrona +right triangle +sweet gale +red maids +wolfsbane +pork-and-veal goulash +French sorrel +mutterer +Venetian sumac +drumlin +white crappie +squire +large-flowered calamint +northern cricket frog +mushroom sauce +supertanker +morello +auxiliary boiler +Virginia thimbleweed +cottage tent +bubble shell +big shellbark +wormwood sage +cider gum +coast lily +American feverfew +Peruvian balsam +purple silkweed +tobacco moth +desk dictionary +rock elm +eastern indigo snake +Japanese privet +lamb +levee +L-plate +soapfish +painted tongue +scuttle +markhor +Marburg virus +mackinaw +major +crypt +ball and chain +domestic silkworm moth +bottom feeder +mistress +death house +freight elevator +bellyband +Pulex irritans +Bacillus anthracis +fire control radar +hysterosalpingogram +turbogenerator +decompound leaf +vambrace +scentless camomile +Medinilla magnifica +prima ballerina +Northern Spy +quartz lamp +grains of paradise +justiciar +felt fern +seismograph +Madagascar jasmine +imaret +white perch +Alpine mouse-ear +tea bread +yellow bass +poseuse +espionage agent +punching bag +eurypterid +orange sneezeweed +banded stilt +armhole +postern +mother +kapuka +catechumen +Soubise +Sauvignon blanc +gunnery sergeant +self-starter +ceratozamia +Atlantic cod +Reoviridae +blood cup +horseshoe bat +oriental plane +voussoir +fetterbush +samara +truncated pyramid +lingcod +athenaeum +shyster +Carolina hemlock +submarine torpedo +floating fern +yataghan +sun tea +viola d'amore +conenose +ventilation shaft +walk-up apartment +saury +wild wheat +porcupine ball +tahini +kris +grass fern +drip pan +black bryony +Scotch broth +tapioca pudding +southwestern toad +Hare Krishna +guimpe +wild madder +megalocyte +teaching fellow +shrubby penstemon +lesser wintergreen +privet hedge +Fahrenheit thermometer +stern chaser +prickly ash +pump room +ricer +chicken mousse +wing commander +sun gear +bolus +alpine milk vetch +opera cloak +twinjet +Goldie's fern +abnegator +alphabet soup +node +grape jelly +early coral root +Tarzan +quarterstaff +greeter +Eurasian woodcock +primary coil +quirt +tinkerer +bolt +creme de fraise +voltage regulator +news photography +Jat +bristly locust +Gouda +dickey +lobster butter +dwarf flowering almond +fagot stitch +Reform Jew +ostrich fern +bathyscaphe +purple mullein +alpaca +civic leader +jellaba +Arizona ash +wasabi +Irishwoman +choke +stockinet +religionist +sewage disposal plant +bittersweet +Hyphantria cunea +pheasant under glass +screen actor +chapterhouse +quoit +horseshoe bat +rapper +cupule +planetary gear +cascade penstemon +redoubt +salt +areaway +megalomaniac +bush willow +amethystine python +plains spadefoot +colour supplement +kick pleat +bell apple +narwhal +slippery elm +stenograph +baa-lamb +quadrant +balker +jobcentre +spit curl +bastard indigo +malacca +serow +adobe lily +yacca +palestra +penalty box +scrub beefwood +reenactor +screening +white bryony +alderleaf Juneberry +harpoon +alpine clubmoss +neurosurgeon +surrey +sweet calabash +Scotch laburnum +coquille +French honeysuckle +extrados +pipe cleaner +southwestern white pine +Virginian stock +scaly lentinus +aileron +carob bar +swordfish +Alpine woodsia +negus +wireworm +sweep +goldfields +drop arch +European bream +roly-poly +pin +bastard wing +fustian +wild buckwheat +lake whitefish +overcoat +water filter +Bermuda chub +New Zealand spinach +high-hat cymbal +European larch +radiologic technologist +fine-tooth comb +brunch coat +splice +electronic converter +overmantel +extern +taper +cluster bomb +teletypewriter +pinwheel +trailing arbutus +quipu +creeping zinnia +orange milkwort +tabard +Australopithecus africanus +melancholy thistle +insole +courser +darkroom +surface-to-air missile system +bark-louse +Confederate +neritina +clip-on +spouter +trench knife +outside caliper +dhak +Limburger +chuck wagon +buttercup squash +shirtdress +pouter pigeon +dirty old man +zodiac +fennel flower +mother figure +appointment +Manichaean +lignum +bouffant +rum sling +Ravenna grass +hibachi +gin rickey +American harvest mouse +cocozelle +western wheatgrass +black crappie +rhombus +Missouri goldenrod +barndoor +wild mango +pneumococcus +Boston lettuce +ratline +desert holly +cobweb +fluoroscope +ethnologist +tor +bullshot +stockade +greave +rock sea bass +slip-joint pliers +taxi dancer +schizophrenic +zill +creme de menthe +orange-blossom orchid +divot +supplejack +busybody +casemaking clothes moth +ramrod +gearbox +birdcall +Wiffle +thwart +beauty consultant +chicken paprika +trawl +skep +spirometer +hopper +kvass +doggie bag +bath chair +showy daisy +wild tamarind +Tarsius syrichta +glyptics +Algerian +cargo area +bunk +Velveeta +iconoclast +clinch +New Caledonian yew +false mallow +Japanese tree lilac +convex polyhedron +water boatman +cruise missile +finisher +colonoscope +cumin +wickiup +saccharin +whipcord +trailer camp +eryngo +cuckold +yam bean +fighting chair +forewoman +galingale +citron +positivist +four-lined plant bug +suet pudding +field pea +Circaea lutetiana +deer grass +trap-door spider +common corn salad +mirror carp +sounder +second-in-command +seaside alder +burgoo +ming tree +curry sauce +courbaril +green alder +figure loom +fauld +halfbeak +squelch circuit +cladode +winter cress +tongue and groove joint +dwarf dandelion +joss house +western buttercup +welted thistle +potato tree +anglewing +cookfire +marzipan +hood latch +seed shrimp +common moonseed +toasting fork +bevel +three-quarter binding +midwife toad +stage director +Pentecostal +technical sergeant +golden-beard penstemon +drunk +silky oak +corn gluten feed +T-square +stoker +selling agent +cruse +server +rope-a-dope +bicorn +matzo meal +wide wale +roadblock +false foxglove +tuck box +bandsman +smoke bush +machinist's vise +Highlander +scholiast +self-starter +Swedish rye bread +spark transmitter +maverick +maquiladora +cabinetmaker +compress +rainbow shower +huntsman's horn +mackinaw +copper rockfish +lappet +nitrate bacterium +telephone plug +soutache +Dacron +toboggan +sissoo +yogi +laurel-tree +vice chancellor +Christ's-thorn +cartridge fuse +serial port +quassia +tarweed +pecopteris +beggarweed +anchovy pear +bookbindery +woodland oxeye +toad rush +sandalwood tree +marsh andromeda +Tyrian purple +boothose +tragedienne +fragrant cliff fern +festoon +bondwoman +melancholic +butternut squash +exhaust valve +semi-skimmed milk +glowworm +Virginia oyster +Identikit +ayah +gallows tree +Carioca +monoplane +jewels-of-opar +scallop +moth miller +marsh cress +lobed spleenwort +ricotta +emitter +arame +tub gurnard +army attache +maniac +organizer +pheasant's-eye +Melba toast +homeboy +Bavarian cream +Maximilian's sunflower +backstop +Tremella foliacea +yellow avens +spreading fleabane +plumb level +false rue anemone +zabaglione +climbing maidenhair +doeskin +walking shoe +lancewood +material +jacksnipe +South American poison toad +agonist +hinny +paper mill +psychophysicist +valley girl +toast mistress +jorum +tiler +chicken Tetrazzini +trivet +grasshopper +three-mile limit +kink +kiang +pole horse +jig +Cornish heath +hedge thorn +false alumroot +Popper +remount +photojournalist +sideroblast +stonecress +Agave tequilana +Japanese lilac +hawse +maenad +air bag +leaf spring +dwarf willow +soda cracker +contralto +moleskin +pilaster +Audubon's caracara +pia +American organ +bleu cheese dressing +betel palm +PC board +almond willow +socializer +tone arm +stammerer +free-liver +scaler +Gentianopsis crinita +leak +black haw +hound's-tongue +grass pea +Stassano furnace +coralbells +ministrant +perihelion +Luxemburger +powder-post termite +arboreal salamander +cushion flower +foramen magnum +pyrethrum +poacher +woolly mammoth +horned chameleon +tearaway +father-figure +tufted gentian +salmi +finger millet +physa +registrar +polyoma +bamboo shoot +matchlock +seine +congress boot +bulgur pilaf +monosodium glutamate +Kentucky wonder +mycologist +kedgeree +ragweed pollen +boarfish +yellow pimpernel +tan +northern Jacob's ladder +macrobiotic diet +migrant shrike +big-cone spruce +colonialist +white dogtooth violet +bath asparagus +webbing clothes moth +ladies' room +experimenter +prairie bird's-foot trefoil +bootleg +cognitive neuroscientist +fire chief +flagfish +dendrite +stinking goosefoot +fore edge +hogfish +Spanish cedar +hotel-casino +Tory +life-support system +pea flour +cash bar +Chenin blanc +white-footed mouse +Canada garlic +salt-rising bread +roomette +mastodon +bell founder +long iron +bi-fold door +fig-bird +European water shrew +dyer's weed +frog orchid +allosaur +Florida yew +wild potato vine +crape fern +flat-topped white aster +klebsiella +oil heater +waxmallow +enjoyer +mesocarp +semidesert +senior vice president +coccidium +burrawong +syllabub +jump suit +harrier +leaf roller +cherrystone +cinchona tree +touring car +eulogist +air force officer +red goosefoot +cat thyme +smoothbore +slugger +cardiac monitor +cobber +blister rust +musicologist +rolled biscuit +Braun's holly fern +hog plum +nonpasserine bird +pascal celery +damson +Jonathan +Sheraton +cohune palm +egg white +baton +sixth-former +Siberian pea tree +choanocyte +wineskin +auditor +detention home +Leichtlin's camas +Chartreuse +clusia +club car +wattle and daub +security blanket +common American shad +assistant professor +marsh pea +camomile tea +gopher hole +gravure +Freudian +spirillum +maharani +equilateral +crow garlic +mammee apple +felwort +hardtop +dillenia +curlycup gumweed +pilot engine +calcimine +wooly lip fern +bitter dock +wineberry +jumper +monolingual +spinning frame +old-timer +native cat +diving petrel +sodium-vapor lamp +marchand de vin +sexton +matelote +interior designer +windfall +mole salamander +minder +bodkin +neutron bomb +Caloscypha fulgens +slinger ring +mezzo-soprano +aura +Southern Baptist +viscacha +midfield +tie +prosthetist +round-headed leek +yellow mariposa tulip +canary grass +staddle +Tokay +Muenster +brazil nut +California black walnut +applesauce +penologist +virgin's bower +tenon +steward +Jerusalem oak +red-bellied snake +bindery +scow +fluid flywheel +bullhead +satinleaf +clove +double glazing +matron +wild parsnip +winged elm +shoot-'em-up +musk deer +white rust +lock +Cornishman +Vidalia onion +corn spurry +freeloader +justice of the peace +inlay +myxobacteria +tiglon +tangram +German ivy +scented fern +woolly daisy +caretaker +gastroscope +scuppernong +spotted sunfish +guilloche +codling +wormcast +Eskimo curlew +tayra +European fly honeysuckle +septuagenarian +third gear +coatee +red alder +water ice +cubitiere +frame buffer +gamboge tree +pernyi moth +chicken Marengo +Galliano +Lincoln +true sago palm +hunter's sauce +carpet beater +alpine goldenrod +arch support +vehicle-borne transmission +jilt +paternoster +redcap +Siberian larch +hoary plantain +swan's down +chicane +reverse +divan +kneeler +alexic +mock turtle soup +daffodil garlic +mission bells +squilla +ursinia +winter's bark +trifoliate orange +discina +frijole +Swiss steak +maildrop +knotgrass +dog fennel +drum sander +heroin addict +costume +camber arch +shining willow +lutefisk +red porgy +microfossil +good old boy +angle bracket +pitcher sage +bordelaise +heat exchanger +carrion +bush jacket +fanjet +coach +blackface +sicklepod +Manhattan clam chowder +daisywheel printer +olive +Sphacelotheca +Spanish needles +brown root rot fungus +boudoir +encyclopedist +V-8 juice +red haw +brass buttons +gym suit +skywalk +water wagon +gas-turbine ship +stoup +lisle +sailor suit +box beam +balm of gilead +housemaster +hayrack +neutralist +water elm +brook thistle +doyenne +nark +alpha-tocopheral +WASP +hydrilla +water-shield +footlocker +variola major +pargeting +ion engine +yellow globe lily +Malecite +bloodleaf +yellow sand verbena +whorled loosestrife +packinghouse +Carolina parakeet +Virginia waterleaf +armband +red rockfish +factory ship +moon trefoil +jump seat +water gillyflower +yerba mansa +chamfer bit +compass saw +hopsacking +Indian rhododendron +sickbed +treacle +honey eater +mailsorter +seabeach sandwort +sob sister +primrose jasmine +prince consort +elocutionist +wishing cap +runner +trestle +sugar water +half-and-half dressing +fringed poppy mallow +portiere +bung +swan orchid +weather satellite +beef broth +marblewood +sapper +agitator +wren-tit +grade +allspice tree +spacewalker +American hornbeam +sieva bean +dill seed +potoroo +love-in-winter +alembic +Cheshire cheese +small white aster +Oregonian +flipper +twill +differential gear +Prince Albert +licorice +foster-father +Melkite +portraitist +Yosemite toad +Cox's Orange Pippin +slender wheatgrass +knob +silique +Rocky Mountain bee plant +stirrup pump +chicken hawk +sweetbrier +Sierra lodgepole pine +poulette +biohazard suit +striated muscle cell +Geiger counter +World Wide Web +turmeric +prairie wake-robin +latchet +pushball +grill +shooting lodge +floating-moss +refried beans +boojum tree +red poll +toothbrush tree +rabbiteye blueberry +red haw +sweet vetch +delta +upland cotton +ballet mistress +padrone +complementary color +great Solomon's-seal +bud brush +brandy sling +spinster +Andorran +Mojave aster +mackinaw +golden calla +bottom rot fungus +segmental arch +periwinkle +hellion +topknot +copper +Mexican hyssop +weeping love grass +point woman +pathogen +fall cankerworm +common shiner +silverspot +corer +atomic pile +crystal detector +yellow spot fungus +truncated cone +saprobe +variegated horsetail +Cro-magnon +cercaria +aglet +pollster +oyster bed +pancake turner +egg cream +sporozoite +quirk molding +mutisia +sound bow +physic nut +sugar-bush +cow +magnetron +jungle hen +brassie +rock bit +taco sauce +seeded raisin +desert selaginella +folding door +vinegarroon +Pinot blanc +rye +ellipsoid +betel nut +tree of knowledge +ambrosia +long tom +breechloader +bicolor lespediza +cosmetician +monoblast +American oil palm +prancer +farina +caiman lizard +hardball +bullock's heart +cotton rat +whiting +weather ship +sharecropper +creamcups +gas bracket +divinity +ornithologist +yellow twining snapdragon +showy goldenrod +end man +heptagon +sand dropseed +round file +guama +blue elder +sand spurry +raccoon dog +zigzag goldenrod +fast reactor +arctic willow +cyclopean masonry +punter +sgraffito +slattern +storage ring +clipper +pulasan +short-tailed shrew +scammony +daybook +umbrella tree +coloring +element of a cone +gesneriad +cane +burgoo +western coral snake +friendship plant +Leydig cell +scrutineer +hairy golden aster +inclined fault +water milfoil +bryozoan +nardoo +native pomegranate +curly grass +Florence fennel +resurrection plant +ice water +crown +ploughman's lunch +clustered lady's slipper +kitchenette +sand sedge +pouched mouse +roadbed +parsley haw +predecessor +super heavyweight +seedless raisin +mailbag +sparling +codling moth +squama +Bercy +thermoelectric thermometer +Jaculus jaculus +saltpan +firmer chisel +round whitefish +ramrod +criollo +pinch bar +slash pocket +thigh pad +velvet plant +intergalactic space +brazilian ironwood +whaleboat +sirrah +hanging fly +aspirator +Dominican +dribbler +yellow-eyed grass +Cornish +geophysicist +tarmacadam +marchioness +rattlesnake orchid +Alaska Native +ilama +myrrh tree +zucchini +licorice root +nosebag +lounger +troposphere +virginal +spaghetti Western +Virgin Mary +waterwheel plant +dry nurse +enate +carpet shark +rijsttaffel +stuffing nut +caraway seed bread +Leotia lubrica +kaffiyeh +Boston baked beans +halophyte +backscratcher +instillator +trefoil arch +pip +digitizer +dosemeter +Carolinian +French sorrel +boards +historian +rangpur +clansman +goral +leatherjacket +coiner +fleece +white globe lily +storm cellar +roundhouse +mediatrix +butterfly flower +swamp gum +prairie vole +rhizomatous begonia +common tobacco +Marco Polo sheep +subarachnoid space +broomweed +safety net +silky wisteria +swagger stick +spectacled caiman +derris root +soap pad +chop-suey greens +summer hyacinth +palo santo +carbohydrate loading +chinch bug +roadman +sheep plant +messiah +desk officer +banquette +drugget +trumpet arch +great duckweed +purdah +heartbreaker +hasty pudding +alligator weed +dragee +yellow bristlegrass +Jacob's ladder +campstool +coffee fern +sweet fern +little chief hare +cat-o'-nine-tails +rep +American red elder +divorcee +black salsify +cambric +sennit +Canada ginger +wonderer +Formica +cream-colored courser +zooid +European beggar-ticks +sorrel tree +piddock +blolly +red-flowered silky oak +bay +Hooker's onion +dark horse +cone clutch +Roman hyacinth +paintbox +mestiza +green alder +bill +panicled aster +mammogram +snuffbox fern +Rediffusion +swamp fly honeysuckle +stoup +psychiatrist +nodding groundsel +student union +cold duck +bee beetle +playbox +Psychopsis krameriana +nosh-up +earthnut +narthex +single-rotor helicopter +revetment +sweetleaf +seasoned salt +piculet +speckled alder +mackerel scad +common yellowwood +devisee +static tube +Spanish heath +umbrella plant +fucoid +Chilean +coral-root bittercress +fanatic +cachou +agony aunt +bird's-foot fern +washwoman +torchbearer +placoderm +frosted bat +spicemill +Cape lobster +hard-shell crab +colonizer +camphor daisy +friar's-cowl +false tamarisk +toggle joint +tinsmith +theorist +hydrologist +loganberry +universal donor +northern whiting +tent-caterpillar moth +russet +kangaroo mouse +African scented mahogany +bastinado +breast implant +betel +grade separation +vox humana +stodge +Maryland chicken +Anguillan +oil pump +governor's plum +narcissist +deadwood +private citizen +winker +ropewalker +gidgee +Lothario +ski resort +major-domo +von Neumann machine +belaying pin +water parsnip +Fissipedia +luggage carrier +spring water +oyster stew +kohl +celesta +date-nut bread +punchboard +sunniness +hospital train +man +rack and pinion +mixer +pousse-cafe +narrow goldenrod +Maxim gun +stiff +recruiting-sergeant +watch glass +white hellebore +tung tree +prairie white-fringed orchid +beef Stroganoff +scoffer +grassy death camas +Shawnee cake +tapioca +Short's aster +banker +laparoscope +honeyflower +Caterpillar +electric clock +baling wire +huntress +Surinam toad +art school +incurable +Canton crepe +apple juice +hipline +bronchoscope +marshmallow fluff +Texan +wild fig +sawed-off shotgun +forestay +red kauri +fish slice +Egyptian grass +English walnut +brown sauce +ogee arch +nectary +chambray +leather flower +phloem +Persian violet +bomb calorimeter +western narrow-mouthed toad +soup du jour +sickle alfalfa +caracolito +periscope +coralberry +sword bean +sigmoidoscope +water locust +hygrodeik +sycamore +sheikdom +ballistocardiograph +clove +akee +fucoid +jacquard +cat's-ear +puritan +slender wild oat +smooth softshell +purchasing agent +landing craft +chartist +lace bug +sharksucker +Virginia chain fern +horseradish +namer +ripcord +personage +aspirin powder +puku +Wankel engine +nightcap +velvet bent +roridula +cytogeneticist +olm +almond extract +common heath +fringe-toed lizard +Kentucky yellowwood +lithosphere +cramp +bulgur +scurvy grass +officer's mess +frigate +electroscope +giant chinkapin +opah +rutabaga +wood hoopoe +Farley maidenhair +shingle tree +argentine +router +palm nut +quillwort +hiba arborvitae +runcible spoon +hireling +sickbay +alpine totara +white lupine +Cotoneaster horizontalis +desert plume +staghound +Sea Scout +opalescence +enophile +Jersey elm +coal house +Helvella acetabulum +selenium cell +white camas +creole-fish +auger +fragrant agrimony +research center +achromia +shank +cottonseed +mod con +extension +sugar beet +winter flounder +silky dogwood +strop +tokamak +rabbit ears +baby farmer +fireman's ax +serration +taproot +socket wrench +action officer +Chilean jasmine +Greek fire +stem-winder +body louse +lumpsucker +stink bomb +American lady crab +dicer +lie detector +maneuverer +black-headed snake +tiger moth +shooting stick +spermatid +babushka +deaconess +home +prior +chanfron +chickasaw plum +big-eared bat +rusty woodsia +tertigravida +miniver +combretum +habit +bluehead +angled loofah +gipsywort +fire-on-the-mountain +purple milk vetch +alpine gold +merozoite +loddon pondweed +Uniat +provost marshal +Gyromitra fastigiata +Coigue +proconsul +oarfish +San Jose scale +filature +chimney plant +spiny softshell +bluecoat +live axle +river limpet +clever Dick +pink bollworm +Japanese plum +roarer +caricature plant +wardroom +Texas chachalaca +Bahia grass +Moreton Bay tulipwood +accessory fruit +pearl barley +ashcake +bunt +Polynesian tattler +pine fern +laughing owl +potato fern +speaking trumpet +adjoining room +bearing rein +banana quit +redbrick university +Scleroderma bovista +magdalen +pressurized water reactor +advisee +NIMBY +poorwill +almond moth +comedian +star tulip +cracked wheat +water pump +guest of honor +yellow-breasted bunting +hire +pedate leaf +augur +purple locoweed +Socinian +upland white aster +guesthouse +double reed +detention basin +rollmops +hitch +bodega +mayeng +sparkplug wrench +attack dog +peach melba +heliozoan +tower mustard +blue mold fungus +lamplighter +banded sand snake +smooth crabgrass +elsholtzia +bodkin +Aegean island +bag lady +alewife +arcella +electrical contact +common ax +animist +concave polyhedron +coalface +climbing perch +yellowtail +hobble skirt +marquee +Russian dandelion +snow mushroom +polo ball +NADA daiquiri +cormous plant +chaparral mallow +inside caliper +milking stool +fallout shelter +sea gooseberry +Danish blue +grissino +chimney breast +mosquito fern +soundbox +spring chicken +epauliere +cape forget-me-not +japan +saddle oyster +white fritillary +push-button radio +bladder senna +bladder stone +macedoine +moire +Shawnee +starnose mole +douroucouli +horseradish sauce +electron gun +cotter +console +park commissioner +free press +lump sugar +western poison oak +apple maggot +keurboom +lisper +griffon +burin +horseshoe whipsnake +Jacobean lily +spinner +cochineal insect +emesis basin +sowbane +humanitarian +uakari +three-dimensional radar +wild hollyhock +heartseed +swinger +two-by-four +mop handle +common amsinckia +traitress +rush aster +fibrous-rooted begonia +violet-flowered petunia +milliammeter +alidade +azure aster +celery seed +snorer +scarlet plume +obtuse leaf +heathen +rose chestnut +headrace +dwarf buckeye +Pacific tripletail +wiggler +bounty hunter +Lowlander +slate pencil +typist +syconium +vaquita +skybox +business lunch +gusher +curacao +palometa +Diapsida +light diet +sourdine +thorny amaranth +potato fern +cartridge extractor +peshmerga +chaffweed +tahoka daisy +hematologist +massage parlor +diverging lens +breadroot +papyrus +amarelle +cover plate +hubbard squash +cryptomonad +whitetail prairie dog +rabbit burrow +orthochromatic film +goncalo alves +Chile bonito +tent-caterpillar moth +Manila grass +buck sergeant +mustard seed +crested wheatgrass +wise guy +asarabacca +field pea +bite plate +barbasco +heart-lung machine +mouse-eared bat +piping guan +gun pendulum +climbing onion +fungus gnat +Livonian +one-hitter +Chilean firebush +Sonoran whipsnake +round scad +myelogram +Rhodes grass +vomitory +roble beech +South-African yellowwood +molasses +Velcro +common calamint +radiation pyrometer +sketcher +chaparral pea +coffee stall +Australian nettle +bilimbi +Khedive +visionary +field spaniel +devilwood +collimator +Siberian spruce +sling +limestone salamander +ribbon worm +hazel +petter +coolant system +artillery plant +bailiff +chameleon tree frog +microsporophyll +maiden blue-eyed Mary +Drosophyllum lusitanicum +cocozelle +king post +nailer +knobkerrie +tovarich +Intelnet +worm lizard +drop forge +wool grass +brown bullhead +anthropoid +vitamin A2 +creche +hickory nut +whiffletree +deipnosophist +Muskhogean +masochist +hypsometer +gliricidia +complexifier +wild licorice +reconnaissance vehicle +fives +beefsteak plant +eastern dasyure +bookworm +crested coral root +wire recorder +cinnamon vine +bubble +Newfoundland dwarf birch +spruce bark beetle +teetotaler +fad diet +ascus +spicebush +African coral snake +soft-shell crab +Postum +packhorse +sand cherry +cricket-bat willow +middlebrow +Hungarian sauce +buffalo clover +jimsonweed +latanier +stablemate +jumper +zoospore +smooth woodsia +flowering ash +unilateralist +lomatia +flapper +wild cotton +Siberian wall flower +probe +bankrupt +blockade +lemon geranium +fig leaf +basic point defense missile system +clack valve +buttinsky +ingenue +mountain everlasting +zebra-tailed lizard +shaving-brush tree +evergreen huckleberry +core drill +lugworm +Cashmere goat +doorjamb +minelayer +student center +horsehair +European dewberry +white broom +arenavirus +eastern poison oak +rye ergot +Tupi +tensiometer +fleawort +coquille +icing sugar +junior lightweight +Doppler radar +mahuang +candlepin +chambermaid +evergreen blueberry +Eton jacket +parvis +solleret +molded salad +malvasia +birth-control campaigner +nonagon +backswimmer +ogee +bowstring +salt marsh mallow +trapezohedron +hoary willow +speech therapist +Zinjanthropus +core +red-backed mouse +eptatretus +mossy saxifrage +Aristotelian +Thessalonian +searing iron +bifocals +falangist +field pea +packsaddle +lay reader +hoecake +cuboid +white maire +iceman +lobscouse +neckcloth +color-blind person +Chinese holly +assemblyman +white-lipped peccary +kava +plastron +crab louse +hook wrench +trailing four o'clock +junior +skilly +internet +tonguefish +footman +sub-assembly +evangelist +track +bench lathe +desk clerk +scalded milk +chamois cloth +American marten +chachka +nondescript +pellitory-of-the-wall +swamp candles +procurator +cuddy +farkleberry +mountain male fern +trawl +dual scan display +fish meal +prospector +convener +guano bat +ant shrike +picture rail +sand rat +gynophore +quilting +sleeper +summer savory +Cotoneaster dammeri +smooth sumac +slumgullion +suite +catalufa +spherule +lean-to tent +gryphon +gas shell +short iron +sweet sultan +dewberry +Victoria plum +American water shrew +X-ray tube +macebearer +green arrow arum +abbe +poke milkweed +atheist +Fosbury flop +Ord kangaroo rat +moldboard +wheat germ +explosive trace detection +whippoorwill +examiner +tallyman +Crookes tube +wild peach +fringed grass of Parnassus +Crookes radiometer +Atlantic croaker +lobster stew +spring cress +maggot +pacer +hydra +Zionist +pepper tree +diamante +baize +Rhodesian man +county agent +respecter +Anglican +antimacassar +materialist +Swan River everlasting +cloud grass +toll line +C battery +chinese mustard +grass poly +warming pan +seasonal worker +common sickle pine +bathysphere +elegant Habenaria +card table +Chilean cedar +brocket +collimator +malted milk +avadavat +fire marshall +coloratura +yellow spiny daisy +fingerstall +narrow-leaf penstemon +indigo broom +pillwort +bearberry willow +Etonian +certified milk +climbing bird's nest fern +field coil +wrist pad +parr +kaoliang +engelmannia +stocker +satrap +Nantua +spearfish +caper tree +gold-tail moth +mountain chinchilla +sea milkwort +westerner +army cutworm +leaf-nosed snake +neurobiologist +xeranthemum +Eastern silvery aster +ecclesiastical attire +caper +Ukranian +bight +button fern +peach pit +oligodendrocyte +maar +digitigrade mammal +streptobacillus +sensitometer +preemptor +oat +bell foundry +crown lens +rock purslane +Junior +Brazilian guava +kicksorter +Ohio goldenrod +red mulberry +King's Counsel +mountain four o'clock +fairy shrimp +fell +oca +sycophant +chantry +dermatoglyphic +bomblet +keyhole saw +hangman's rope +little barley +lion-jaw forceps +giant scrambling fern +popper +dulcimer +Espagnole +tardigrade +smooth-haired fox terrier +bullbrier +rewa-rewa +Japanese poinsettia +trunk line +cannery +helminth +American spikenard +prince's-feather +arthroscope +ginger +aphakic +pilot bit +angle of refraction +low-sodium diet +wall creeper +growler +praetorium +Hall of Fame +soupfin shark +Molotov cocktail +kaffir boom +stitcher +sawwort +flagellant +Atlantic herring +Reticulitermes lucifugus +voltaic pile +snowy orchid +southern flounder +skysail +osage orange +white mullein +lined snake +tolu tree +poliovirus +foreman +burette +jackass bat +invigilator +electromyograph +acarus +presence chamber +columbian mammoth +hyacinth bean +pilot +meadow jumping mouse +Maria +outskirts +aftershaft +Queensland nut +schlockmeister +plainsman +afropavo +scarlet musk flower +five spice powder +gunboat +multiplex +Dutch uncle +louvered window +chimney corner +cuscus +psalmist +Vichy water +signer +amphiuma +harmonizer +authorizer +naiad +control rod +stentor +mountain bladder fern +gig +read-only memory chip +assenter +vixen +hermitage +corn dab +locksmith +cockspur thorn +variable-pitch propeller +western red-backed salamander +dolman sleeve +cultist +sweet buckeye +pine vole +Peking man +mountain swamp gum +nimblewill +bethel +aye-aye +lancelet +teff +Alpine celery pine +endive +nipa palm +center of curvature +seeder +Sabahan +sea scallop +social secretary +gorgonzola +western chokecherry +misanthrope +rabbitweed +beggarman +button fern +white mallee +doodia +mastiff bat +roper +prima donna +blanc +holding pen +fingerling +skyhook +flophouse +steam chest +crystallized ginger +acrocarp +horse pistol +true mahogany +costmary +ballistic galvanometer +jaunting car +bartonia +rep +mandibular notch +bubble and squeak +umpire +fringed loosestrife +bear oak +ski jump +staggerbush +plumcot +thermal reactor +field brome +bodkin +jackknife-fish +malope +writing arm +gold fern +Stayman Winesap +merlon +eclectic +fluxmeter +emeritus +imam +drum +pop tent +capital ship +subalpine larch +flail +Lorenzo dressing +tomboy +eastern woodrat +warrantee +Pacific spiny dogfish +sheepshead porgy +farthingale +Cryptoprocta +power loom +communicant +howdah +ectomorph +false foxglove +basset horn +odd-pinnate leaf +Wisconsin weeping willow +Queensland bottletree +dampener +corbel arch +silent butler +Circe +town clerk +Japanese chestnut +bloodwood tree +switcher +cup hook +spreader +rice rat +straightedge +traverser +fluid drive +Spanish paprika +sour milk +poison camas +bean dip +card table +vinegar fly +vizier +electric-discharge lamp +purple rock brake +dynamo +Japanese snowbell +Grindelia robusta +neuroglia +safflower seed +coronet +frown line +Renaissance man +Steller's sea cow +book scorpion +isosceles triangle +arthritic +spherical triangle +kangaroo mouse +garden orache +stemless hymenoxys +titi +out-basket +gent +columnea +mint sauce +mouthbreeder +Liebig condenser +cheerer +assegai +stickler +Merostomata +dimmer +grey poplar +common heath +scorzonera +glory hole +Blackfoot +oil slick +musketeer +apple geranium +daisyleaf grape fern +gas furnace +bijugate leaf +Arabist +star-thistle +hand throttle +huckleberry oak +lift pump +maulstick +Rome Beauty +Newburg sauce +pit +volunteer +Baldwin +ark +Asian horseshoe crab +black calla +marlinespike +Gentianopsid procera +guinea gold vine +tucker-bag +desk sergeant +piezometer +migrator +keelson +executrix +sackcloth +onion smut +buckboard +substitute +pudge +mess +cinchona +intervenor +gravimeter +pederast +censor +gastroenterologist +cutlassfish +launch +demerara +Diegueno +bog bilberry +aglet +soda fountain +crank call +harpoon gun +ribbon fern +Gurkha +output device +epilating wax +greasewood +water horehound +return key +fairy swallow +spatulate leaf +culverin +leptocephalus +kleptomaniac +barley water +bleeding tooth +Cheyenne +maleberry +limber +tapenade +whorled aster +toe +revenant +lap joint +vein +truant +florest's cineraria +morning dress +trichodesmium +nightshirt +element of a cylinder +shopaholic +section hand +electrodynamometer +Guadalupe cypress +rosebud +racist +avaram +keeled garlic +Alaska rein orchid +orange toast +cunner +dipstick +Neolentinus ponderosus +bulbil +charlotte +pull-through +header +Manduca quinquemaculata +persona grata +elegist +cafe royale +scup +semanticist +wood sage +field magnet +tundra +bay myrtle +alluvial flat +arrowleaf groundsel +celtuce +baryon +must +entrant +othonna +pied-a-terre +liza +sticky aster +grasshopper mouse +prison guard +tire iron +bomb rack +Spanish American +sheltered workshop +turfing daisy +backbone +tangle orchid +creeping willow +dumb bomb +horse cassia +barosaur +Yavapai +shrimp Newburg +peanut worm +dwarf chinkapin oak +corchorus +brick cheese +by-catch +stover +Urnula craterium +clasp +Kekchi +alpine coltsfoot +soybean future +altar wine +ripping chisel +encephalogram +mountain spleenwort +transferee +remoulade sauce +American rock brake +stenographer +read/write head +loblolly +ground +powdered mustard +brake band +sea dahlia +freak +proconsul +Coffey still +Sivapithecus +pellitory +palm cat +skew arch +American angelica tree +vigilante +candelilla +andryala +amarelle +swiftlet +petcock +associate professor +sclerite +open circuit +Virginia crownbeard +Last Supper +button tree +scyphozoan +margate +mercury cell +horsewhip +water scorpion +companionway +drop cloth +Amhara +miraculous food +pro-lifer +embryologist +Creole +bombazine +Indian blackwood +cubeb +trace detector +gros point +main-topsail +meringue kiss +spree killer +capstone +specimen bottle +woolly apple aphid +silverweed +American barberry +gallfly +European bog asphodel +northern flying squirrel +alliterator +Old Catholic +heliograph +Pteris cretica +tippler +pump well +allspice +balancer +scarlet bugler +lantern fly +white prairie aster +krummhorn +robin's plantain +Pacific sardine +patty-pan +decaffeinated coffee +western saxifrage +warrantee +colorimeter +ball bearing +makomako +foot +troika +apricot sauce +data multiplexer +rose-root +sound film +Northern dewberry +water hickory +swing door +spastic +Oligoporus leucospongia +botulinus +tamale pie +Sagittarius +muff +spicebush +petiolule +pump action +Parry's pinyon +split-pea +rudder blade +princess royal +wormseed mustard +honey guide +pip-squeak +fin keel +foretop +cyrilla +Navaho +melanocyte +deist +silver tree +citrus whitefly +Morrow's honeysuckle +green peach aphid +longanberry +call-board +wild yam +novelist +toothed spurge +alienee +pond apple +allspice +Carolina lupine +Jack of all trades +white false indigo +boiled dinner +princewood +sailor's-choice +false bracken +microbrewery +black grama +tutee +brickkiln +sea raven +guesser +wirework +European lemming +thyrse +plains lemon monarda +milo +shunt +spotted cowbane +anchovy sauce +grande dame +Maryland golden aster +Chinese puzzle +boarfish +burweed marsh elder +defense contractor +nitric bacteria +Belgian hare +beach plum +conformal projection +sand fly +steering linkage +quickset +Mahayanist +Geiger tube +loudmouth +Lancastrian +brownie mix +ex-spouse +deltoid leaf +Shasta salamander +rabbet joint +purple anise +garibaldi +gebang palm +bladderpod +Host +great bowerbird +string cheese +spinning jenny +drift net +matriarch +guar +bitter betch +panda car +mess +plains pocket mouse +scarlet wisteria tree +deerberry +reamer +homing torpedo +molehill +stockyard +reniform leaf +rag +symmetry +Texas star +lerot +pickle relish +three-seeded mercury +cotter pin +ice-cream bean +farmyard +bar magnet +hansom +prickle cell +renal cortex +pest +Ultrasuede +sailing master +brougham +wastrel +amboina pine +Canary Island hare's foot fern +ninepin ball +southwestern lip fern +usherette +lemon drop +star begonia +weeds +saltworks +Persian melon +corbina +medusa +bucksaw +Gibson girl +diameter +American twinflower +kino +clear liquid diet +angiocardiogram +wetter +oyster cracker +yellowfin mojarra +wild parsley +life tenant +broom closet +Corynebacterium diphtheriae +square shooter +bedwetter +ball-and-socket joint +nonsolid color +Salmonella typhimurium +buffel grass +hip pad +subaltern +heliothis moth +trail boss +hayloft +Francisella +primordial dwarf +cock-a-leekie +sugarplum +propulsion system +tyrolean +Carib +salai +ketembilla +ironclad +cornhusk +heckler +multistage rocket +north island edelweiss +Chaldean +twenty-two pistol +Francophobe +scofflaw +sickle feather +screw bean +sea squill +Scopolia carniolica +agglomerator +western holly fern +presenter +straight pin +Myxine glutinosa +Colbert +clover-leaf roll +war paint +bird's-eye bush +longfin mako +running suit +arrow wood +margrave +blue fleabane +dracontium +plastron +chimney swift +child prodigy +commissar +turtle soup +postulant +archaebacteria +snakefly +Pitot tube +chap +smilo +Malthusian +French roof +worm wheel +gulag +pointed-leaf maple +pull-off +Cathaya +American green toad +ball cartridge +infiltrator +snowfield +crotchet +auxiliary pump +bearnaise +galax +chaenactis +olympic salamander +sundowner +cows' milk +beach plum +moss-trooper +Arabidopsis thaliana +cat's-claw +bog rosemary +ribier +book agent +bumper jack +beefwood +monk's cloth +alpine bearberry +climbing fumitory +cucking stool +puka +Piltdown man +property man +discharge lamp +X chromosome +knobble +lobster Newburg +herbalist +sunray +golden saxifrage +leopard cat +muffle +stonewort +blancmange +intraocular lens +trepan +desert mariposa tulip +plume poppy +Dane +martynia +shaver +white milkweed +napu +tansy-leaved rocket +abortus +telemeter +tansy mustard +harpy +honeysuckle +ironworks +testacean +Tartuffe +silvervine +Sihasapa +surface gauge +western blind snake +paramyxovirus +Icelander +bird louse +stockbroker belt +test-tube baby +ague root +little golden zinnia +dietician +elephant's-foot +dirty bomb +sailing warship +brier +tinter +Connemara heath +potato fungus +bait casting +decagon +rosefish +die +high-pass filter +solitaire +widow's walk +goldthread +Tudor +trews +orange pekoe +ninon +soda jerk +sump +flying carpet +burial garment +oblanceolate leaf +press gallery +Shintoist +three-centered arch +spreading pogonia +Moro +foxtail orchid +Ghanian +dry kiln +thane +naranjilla +bitter pea +American bugbane +apron string +oyster fish +Port Jackson fig +prize winner +high-water mark +Oneida +smoking room +potato skin +charge d'affaires +gantlet +amyloid plaque +barmbrack +mate +arrow leaved aster +handbarrow +horned screamer +virago +linoleum knife +rattlesnake root +K ration +reset +foot brake +red coral +good guy +aberrant +lavalava +poleax +garden webworm +sneezer +mountain heath +American dog violet +eolith +chimneysweeper +matriarch +smalltooth sawfish +sea mouse +tubercle bacillus +superconducting supercollider +Abney level +darnel +gherkin +celery salt +Tungus +pulasan +oriflamme +death camp +redhorse +apprehender +scion +selectwoman +pentahedron +principal +old school tie +slice bar +chanar +pimento butter +wailer +zero +mescal +rosebud orchid +stone bramble +Jarvik heart +NOC +pitchman +rat cheese +strawberry tomato +dwarf golden chinkapin +landau +tocsin +ampulla +scratcher +crab Louis +ginseng +ripcord +polluter +tensiometer +eyewitness +aalii +Oregon crab apple +conservator +day jessamine +hexahedron +suture +tippet +linsey-woolsey +vernal witch hazel +stainer +egocentric +canistel +nudger +shipping agent +shortleaf pine +battle sight +cheese spread +weeder +incendiary bomb +honeyflower +stovepipe iron +stepper +hellgrammiate +votary +aflatoxin +arquebus +impulse turbine +pipewort +garrote +glow lamp +pigsticking +blood clam +surface search radar +Bolshevik +platen +chariot +Gentianopsis thermalis +water level +quandong +catalytic cracker +giant foxtail +nut butter +drainplug +holdover +coastguardsman +Secretary of Health and Human Services +Seeing Eye dog +American plaice +coquilles Saint-Jacques +christella +medium +clingfish +lally +light-o'-love +Gentianopsis detonsa +taper file +signal detection +trip wire +lignosae +receiver +sedan +mud puppy +corn sugar +Philippine mahogany +magnetic pole +jointed rush +trapper's tea +Dorking +welcome wagon +clammyweed +guard +false azalea +convalescent +babassu +dedicated file server +colossus +air search radar +marquess +straight flute +sand stargazer +sea catfish +rosilla +ripsaw +Bermuda onion +peach sauce +sagebrush mariposa tulip +yashmak +Virginia mallow +erose leaf +sand blackberry +boulevardier +forester +choragus +onion mildew +threadfin +winged pea +sugar daddy +rotary press +styracosaur +rathskeller +Japanese millet +anchorite +coral drops +false gavial +eastern pipistrel +cheese press +Chinese primrose +pamperer +real estate broker +power worker +breeder reactor +nutcracker +piano wire +cushaw +Sinanthropus +firebreak +kelp greenling +herba impia +toll call +yoke +bird fancier +evening-snow +fever tree +reed meadow grass +flanker back +toggle bolt +Santa Cruz cypress +carbonnade flamande +northern dune tansy +mikado +millettia +forty-five +court +icepick +holm oak +Japanese angelica tree +Pacific cod +cant hook +urologist +spelt +lekvar +enologist +Mediterranean flour moth +prickly-edged leaf +Spanish grunt +dune cycling +frostweed +whisperer +tucker +Roman wormwood +counterterrorist +woolly alder aphid +Nuttall oak +snail butter +threshing floor +motley +forge +water mold +mummichog +sulfur paintbrush +head +walking delegate +jujube +peachleaf willow +Christmas bells +valley pocket gopher +bear's-paw fern +Lanthanotus borneensis +pearl hominy +placeman +swage block +offerer +stargazer +jeweler's glass +male chauvinist +crossbar +Oktoberfest +tamarau +micronutrient +large-leaved aster +tasset +tepary bean +sausage curl +ivy +snob +roller towel +wood meadowgrass +archil +padrone +prairie rocket +tongueflower +kidney fern +Carolina buckthorn +sea island cotton +landscape architect +realist +oyabun +mother hen +ostracoderm +esker +heliophila +nympholept +shining clubmoss +press agent +clam dip +Djiboutian +white currant +codfish ball +hand cheese +kraal +trident +conventicle +bacteroid +Indian plantain +quandong +kola nut +signor +theater light +musk clover +canistel +silent partner +steel-wool pad +diggings +affluent +sightreader +John Doe +arrowworm +goatsfoot +guardroom +wild cinnamon +kaffir boom +ink eraser +yardie +industrialist +sea lily +polarimeter +Polistes annularis +western big-eared bat +omnivore +Ted +horsecloth +crab cocktail +vacuum chamber +flower-of-an-hour +bilge +poleax +neolith +Montezuma +plum-yew +welfare case +trave +pipe bomb +shading +Centigrade thermometer +bangalore torpedo +celery top pine +nuclear rocket +fowling piece +anti-Semite +landscape +derris +bush honeysuckle +Mediterranean water shrew +ticket collector +masked shrew +white dipladenia +Savoyard +bondman +tempter +pygmy cypress +pentathlete +thruster +usurper +Arminian +yerba buena +ice field +ichthyosaurus +sackcloth +bean tostada +Oxbridge +Pteropus hypomelanus +thinker +bank robber +ape-man +thurifer +knawel +mule fat +hot spot +hairy-legged vampire bat +night raven +hook and eye +crocodile bird +skunkweed +beaver rat +cypress sedge +florida selaginella +April fool +Jonah crab +glass wool +corkwood +dwarf elder +hinging post +gentile +Brazilian trumpeter +witch doctor +thermograph +pink shower +Mao jacket +capelin +parang +bradawl +stooper +jewel orchid +citrange +oarswoman +Macedonian +particolored buckeye +pachycephalosaur +satinwood +Chinese brown sauce +peep sight +straight man +quandong +chamois cress +nonfat dry milk +rosin bag +Leiden jar +Grimes' golden +spirillum +grass vetch +carillonneur +downy wood mint +melon ball +sweet calabash +chlamydospore +bombshell +sidewall +sprig +Indian button fern +globe pepper +rough-stemmed goldenrod +bocconia +bubble chamber +sand dab +plum-fruited yew +aecium +marrowfat pea +hobbyist +whipper-in +salad burnet +neckband +Tangier pea +sauce Louis +salad burnet +artist's loft +koumiss +Nazarene +cutter +scrim +drape +crab-eating dog +deckhand +bedroll +gaff +stifler +pink lady +great plains paintbrush +patternmaker +yoke +caryophyllaceous plant +angrecum +quadriplegic +grid +genlisea +aspic +water table +junket +signore +Mutillidae +proprioceptor +pivoting window +Indian poke +synchroscope +trichion +tarahumara frog +proctoscope +abomination +purslane speedwell +breast drill +Japanese barberry +mandrake root +breakable +salon +American watercress +take-up +entrenchment +cocktail sauce +Scotch asphodel +borough +matchmaker +Seneca snakeroot +pointsman +psephologist +clustered poppy mallow +onion thrips +nuclear-powered ship +organizer +deciduous holly +balsam willow +enzymologist +caraway +drip loop +dog laurel +Orangeman +sapsago +polymath +backplate +leathery grape fern +modillion +two-timer +handhold +consignee +white stringybark +nettle-leaved goosefoot +bookmaker +disk drive +doliolum +palmist +packinghouse +Spandau +Whipple's penstemon +sword grass +ribbon development +pearly-shelled mussel +winter heliotrope +rogue elephant +deck tennis +Venus's flower basket +football +shim +boatswain +blinks +armored catfish +hooded seal +outdoorswoman +water starwort +upholstery needle +pleurodont +silky anteater +cornmeal +lead-in +redfin pickerel +horse balm +Rydberg's penstemon +cascade transformer +fly poison +Volvaria bombycina +broad-leaved twayblade +pastry cart +body plethysmograph +waverer +hardware store +Parry's penstemon +European sanicle +strawberry geranium +cross-examiner +head gate +devil's tongue +hemiepiphyte +pine hyacinth +machmeter +spirit lamp +field judge +Rock Cornish +mayhaw +Sassenach +bog pimpernel +parallel interface +crowberry +roach +Aegyptopithecus +cajan pea +lapboard +cryostat +magnetic storage medium +white yam +Lombard +rhymer +bed and breakfast +bunya bunya +rifle grenade +caterer +collared pika +anti-submarine rocket +bookkeeper +Western mountain ash +profit taker +fruitlet +Knowlton's cactus +infernal +beefsteak begonia +lunula +emulsion +intermediate wheatgrass +titfer +European sea bream +bigeye scad +yak butter +kola +cone pepper +plesiosaur +ragwort +penal colony +black carpet beetle +lubber's hole +Stapelias asterias +yard marker +balloon bomb +Scythian lamb +armory +selsyn +marblewood +spirula +fatalist +hash head +armiger +Dom Pedro +white-chinned petrel +ballast +orthopter +greater water parsnip +clutch +largeleaf holly +Evangelist +king whiting +tuna fish salad +Muscadet +surpriser +jumping bristletail +proportional counter tube +Hamburg parsley +obstructionist +pus-forming bacteria +creep feed +stepbrother +janissary +control freak +trusty +trepan +King William pine +orthicon +geological horizon +molecular biologist +violator +pariah dog +Austrian +conciliator +Fauntleroy +packing needle +mazer +Saturday night special +leucocytozoan +coastal rein orchid +whirligig beetle +capitalist +breeches buoy +clubroot fungus +meadow spikemoss +Kichai +Spanish lime +land office +camera obscura +strafer +purple-stemmed aster +lusterware +valve +Roman nettle +isthmus +breadstuff +sealskin +maleo +bilge keel +carissa plum +fish fly +kolkhoznik +heath pea +cowage +hog sucker +Sam Browne belt +inductor +wild licorice +Socotra begonia +supernumerary +Angle +red shrubby penstemon +toilet kit +tawse +sweet bells +kawaka +brown soft scale +lyssavirus +betting shop +double-crosser +macrotus +climbing hempweed +poi +strip mall +deadhead +petit juror +tract housing +American mistletoe +lace-flower vine +precipitator +endoparasite +hairy wood mint +red snapper +Victorian +hog peanut +line of heart +opossum shrimp +plumcot +Bavarian blue +slops +light flyweight +oregano +sand myrtle +pocket battleship +curator +narc +hydraulic cement +plains pocket gopher +closed loop +pluralist +molter +Christmas bush +snuffers +slender knapweed +footwall +plage +caper tree +red siskin +tender +boat train +tipster +low-pass filter +student lamp +morosoph +japonica +bellows +herald +oyster plant +savory +mail +computational linguist +blade +winter crookneck squash +zoomastigote +blackmailer +richweed +dialectician +genip +plumed scorpionfish +jet bridge +thermopile +billy buttons +Brule +millwright +Arenaviridae +Jones' penstemon +monastic habit +genipap fruit +burnous +dairyman +top +crab-eating raccoon +quadrangular prism +pilot burner +weeder +trireme +boy wonder +man of letters +Catawba +high-muck-a-muck +light circuit +bloodworm +lappet caterpillar +half-and-half +office boy +saddle stitch +mistletoe cactus +false chamomile +Catalina cherry +workhouse +Jamaica quassia +britches +tooth shell +reduction gear +carrot pudding +balsam woolly aphid +handspike +aioli +silver hake +flour bin +wireman +gas-cooled reactor +aficionado +plus fours +gitano +gene chip +oilfish +ingenue +tulip orchid +late purple aster +pork and beans +envoy +lemon extract +milk bar +black huckleberry +ground roller +Connecticuter +siderocyte +Jacquard loom +chub +meat safe +stock cube +Australian sumac +purple sanicle +tailless tenrec +dog wrench +rainbow cactus +castor bean +scintillation counter +eohippus +pawnbroker +gauge boson +front man +early warning radar +bearing wall +Bourbon +sandwichman +sild +gravelweed +perishable +cembra nut +riflebird +quicksand +slate +sweeper +ship-towed long-range acoustic detection system +defamer +president +vitamin K3 +challis +tanekaha +bloodwort +grenadier +quietist +Zairese +fucker +foremother +gesneria +print buffer +salsilla +fissiped mammal +fender +consulate +acidophilus milk +Southern dewberry +snail darter +Panama redwood tree +dehydrated food +bush willow +coffee fungus +Sinologist +Mesoamerican +hood +large civet +deck-house +cyborg +smuggler +pepper sauce +cyberpunk +Grand Inquisitor +persona non grata +haggis +weeping tree broom +stop bath +modifier +coyol +conodont +yellow giant hyssop +optical pyrometer +Carolina moonseed +marinade +aspartame +false wintergreen +cityscape +philter +turnery +hemiplegic +chuck-will's-widow +vower +track star +myrtaceous tree +small civet +intelligence analyst +dogcart +yardman +cross bit +holometabola +platen +sweet cassava +Comstock mealybug +acute angle +Communist +alcohol thermometer +mountain hollyhock +Mead's milkweed +highjacker +Townes +congou +Astrophyton muricatum +lazybones +roughcast +pressure cabin +clinch +cinnamon +smoke bomb +quandong +tout +office-bearer +punctum +efficiency apartment +Queensland hemp +Ceylon bowstring hemp +newswoman +vermin +fetid bugbane +grantee +sanitary landfill +gluten-free diet +clabber +shillelagh +white lettuce +sweet coltsfoot +beggar's lice +samite +loser +flasher +water star grass +banana passion fruit +translator +artificial kidney +Virginia creeper +American crab apple +cactus mouse +nebbish +Ligustrum obtusifolium +vox angelica +stringer +hunter +know-it-all +scene painter +invalidator +jungle cock +basilica +coriander +California single-leaf pinyon +miles gloriosus +pina cloth +law agent +scarlet fritillary +keurboom +bailor +ramjet +seedling +rib joint pliers +ways +picket ship +Surgeon General +wasabi +marquis +clostridium perfringens +Helvella sulcata +furnace lining +kingwood +painted sandgrouse +plain wanderer +Indian madder +silver screen +bailey +dwarf spurge +Serbian +ball-buster +shaheed +Platte River penstemon +tensiometer +mute +nymphomaniac +Yokuts +arroyo willow +whipping post +class act +load +winged everlasting +periodontist +diarist +robber frog +diestock +curry powder +ratchet wheel +store detective +hog plum +prune whip +shortwave diathermy machine +Anabaptist +post chaise +Kennan +bean caper +delegate +orderly sergeant +celtuce +jumping bean +gowen cypress +puddingwife +registered nurse +West Saxon +rosita +gun room +nasotracheal tube +matchboard +flagship +Boswellia carteri +Canadian pondweed +wonder boy +sewer rat +dimetrodon +pantograph +marsh bellflower +angoumois moth +slippery dick +woolly indris +creme de cacao +dulciana +Jewess +Macadamia integrifolia +least shrew +don +diffuser +black-stem spleenwort +grouseberry +goniometer +annotator +sticktight +gossip columnist +speechwriter +capon +rock hind +Liederkranz +chandler +echocardiograph +sidelight +fisher +brocket +New Zealand daisybush +northern sea robin +roller bandage +peachick +pellet +pichi +plug fuse +spark coil +buckwheat +brood bitch +wedgie +dwarf bilberry +filigree +bull +queen +dodo +Salish +denticulate leaf +Western silvery aster +Prima +magnetic bottle +fetterbush +process-server +nainsook +mythologist +Piedmont glacier +hammerhead +niggard +Mound Builder +Kui +Nootka +highbinder +passenger pigeon +oblong +tickler coil +agnostic +succorer +esophagogastric junction +dressmaker's model +bombshell +social anthropologist +gildhall +orpine +pterodactyl +bristly sarsaparilla +Lane's Prince Albert +hognose bat +salesgirl +lubricating system +electric catfish +wrap +Jacksonian +chard +cherry laurel +foreground +beadsman +Kolam +amniote +frozen pudding +acid head +poor box +depositor +coattail +pallas's sandgrouse +mason's level +English lady crab +skeg +cruel plant +petrolatum gauze +tuna +swivel +stock-in-trade +perisperm +civies +Phyllostomus hastatus +alienor +Verdicchio +guard's van +onion butter +moviegoer +planter +citrange +box huckleberry +iconoscope +familiar +helmsman +baby boomer +constructivist +American bog asphodel +whorled caraway +simple pendulum +viviparous eelpout +Job's tears +holdout +sour salt +poison bush +dusky-footed woodrat +golden algae +granadilla tree +telethermometer +crossbar +thrift +African bowstring hemp +dog in the manger +hayrack +gold-crowned kinglet +prolonge +doge +pencil +discount house +mulligan stew +Nonconformist +virologist +gregarine +facula +rocket scientist +thin-shelled mussel +oospore +annual salt-marsh aster +Afrikaner +metallic +julienne +culverin +cleavers +Berliner +mudhif +thorny skate +brown lemming +yellow colicroot +cooling system +large-leaved magnolia +free-reed +canyonside +preemptor +stake +Brucella +anti-G suit +pleximeter +squire +salsilla +write-in candidate +lowland burrowing treefrog +flare star +dwarf hulsea +jobber +mangel-wurzel +quagga +red-skinned onion +positive pole +Pteropus capestratus +jug wine +stomacher +standee +bladder worm +hakim +house of correction +pelisse +golden mole +temporizer +rose apple +drove +umbrellawort +holy of holies +lawyer cane +smooth lip fern +anode +astatic coils +zip gun +feverroot +self-heal +expansion bit +salt reed grass +field pussytoes +nutmeg hickory +cryptic coloration +Venus's girdle +Hunkpapa +Calostoma cinnabarina +raft foundation +May apple +pygmy mouse +prokaryote +yellow-green algae +Bermuda maidenhair +withdrawer +coelacanth +Elliott's goldenrod +driftfish +epicyclic train +bowl +swamp dewberry +corbel step +sadist +party line +anti-American +mining engineer +Amur privet +conidium +Gastrocybe lateritia +lithia water +chaulmoogra +Rough Rider +Guinea pepper +glade mallow +pitcher sage +whitecup +shanghaier +low St Andrew's cross +phonologist +cocobolo +perfumery +visor +prison chaplain +belt +ingesta +literary critic +industrial watercourse +reckoner +pursuer +Kinetoscope +Kuiper belt +hyperope +raw recruit +Galiella rufa +Prince Albert yew +slit trench +usher +tenderfoot +white-rayed mule's ears +browser +piccalilli +bran +giant buttercup +water lobelia +arborescent plant +echinus +dryland blueberry +struggler +platyctenean +Geordie +domatium +twenty-two rifle +keteleeria +sports editor +chorus girl +Hakham +dry-bulb thermometer +onomancer +double-bitted ax +Girondist +bottle bank +thyrsopteris +bandwagon +star anise +armored car +dhawa +Bessemer converter +mutineer +paradise tree +tupik +centurion +mending +chowchow +margrave +International Grandmaster +African hemp +catafalque +leptodactylid frog +forcemeat +tank shell +pill +barbecue pit +worthy +lady's maid +evergreen +Jesuit +South American staghorn +rigger +suffragan +imperialist +spherical angle +grey lemming +kitchen police +tree swift +coliphage +archaist +Conservative +rib +exegete +Mendelian +tragedian +steerage +Paleo-American +obeche +garlic +grapefruit peel +accommodating lens implant +half blood +barrelfish +catgut +lanceolate spleenwort +hardliner +frieze +name dropper +carrack +huckster +onion bread +magnetic head +pease pudding +raisin moth +negative magnetic pole +electroencephalograph +bunji-bunji +synchroflash +Mornay sauce +stencil +winged pigweed +Nesselrode +MEDLINE +licorice +mainspring +melilotus +duke +experimenter +Napier's bones +four-minute man +pin-tailed sandgrouse +toolmaker +pogge +rootstock +baton +pricket +creeping snowberry +anomalops +nester +devourer +apolemia +Maricopa +pine-barren sandwort +larvacean +American dewberry +escalope de veau Orloff +gig +myrtle +pitsaw +Lutheran +fish house punch +gnathostome +intake valve +molasses taffy +clammy locust +vandyke beard +Atlantic tripletail +planktonic algae +estradiol patch +flummery +cytologist +sectarian +oil meal +tomtate +mediterranean anchovy +aspersorium +argonaut +porkholt +sheep ked +algometer +Adventist +false goatsbeard +snake polypody +streetwalker +shelver +adoptee +highflier +pitch apple +prairie rocket +fish mousse +viroid +deckle +manila tamarind +observer's meridian +pincurl clip +hardstem bulrush +gossamer +brookweed +Druze +hug-me-tight +accessory before the fact +oilman +Comanche +Marine +bedlamite +Chinese cork oak +squawbush +false miterwort +walk-on +Cynopterus sphinx +brandyball +landlubber +arrowroot +cape forget-me-not +galoot +tabor pipe +checker +Levant cotton +paddle box +murderess +smirker +fuddy-duddy +withdrawer +newel +shade +pink disease fungus +tipu +sweet sultan +aeronautical engineer +tall gallberry holly +acarid +conqueror +cucumber +film director +ordinary +salon +closet queen +allegorizer +tonka bean +flax rust +negative pole +dagame +dentist's drill +mock privet +micropyle +contributor +dark horse +climbing corydalis +cosmotron +land agent +Big Blue +Cynic +tassel flower +lyrate leaf +Minuteman +Dutch-elm beetle +Hessian fly +flower girl +West-sider +window dresser +skinny-dipper +whitebait +out-and-outer +hooker +amicus curiae +jack +camwood +stockist +black root rot fungus +Jamaica dogwood +diaphragm +Holocentrus ascensionis +roselle +black maire +Pygmy +fumigator +lame duck +mudder +hydraulic transmission +conning tower +phoronid +batfish +hearing dog +monohybrid +whaling gun +Cockcroft and Walton accelerator +allemande +seasoner +epileptic +ammonia clock +Young Turk +lanseh tree +urceole +cafe noir +poster girl +Oglala +deadeye +manna lichen +positive pole +cinch +lyricist +hermaphrodite +kidney stone +dilator +number one +frotteur +kaffir bread +fish knife +tarragon +adjuster +potato wart fungus +Florida pompano +conductor +corbie gable +rounders +Catha edulis +bender +recruit +Uruguayan +subject +bunghole +day boarder +pocketed bat +Oxonian +owner-occupier +yellow-leaf sickle pine +devisor +exhibitor +looking glass +shipowner +crooked-stemmed aster +calico +dash-pot +defilade +Confucian +egg-and-dart +irreligionist +lepton +self-rising flour +diving bell +Brahui +shop girl +maximum and minimum thermometer +Dalmatian laburnum +correspondent +subduer +nonperson +Reaumur thermometer +rough-leaved aster +jacksmelt +pinfold +magneto +ex-wife +round-leaved rein orchid +purloo +American shrew mole +sweet sand verbena +polymastigote +outfitter +curled leaf pondweed +Italian dressing +borderer +ambusher +geebung +four-stroke engine +small ship +homeopath +gynostegium +political prisoner +Radiigera fuscogleba +ensiform leaf +rhizoctinia +satyr orchid +rue +bouillon cube +flip +prophyll +tilefish +periselene +prima donna +choker +laminar flow clean room +Hooker's orchid +fish joint +mombin +remover +array +coelostat +autophyte +consigner +Damaraland mole rat +gasman +public works +lye hominy +pearlfish +piassava palm +Georgian +uxoricide +confessor +community center +epigone +tagger +abrading stone +cryoscope +nautch girl +reliever +Cartesian +Indian beech +protoplasmic astrocyte +fundamentalist +mustard sauce +crank +houselights +five-point bishop's cap +comedienne +triangle +presentist +beaugregory +dreamer +Wave +blue mockingbird +Barbados gooseberry +ten-spined stickleback +papoose +silky pocket mouse +holdup man +agent-in-place +suspensory +emigrant +ropemaker +bookbinder +jumby bead +undershrub +Killarney fern +sheep bell +city slicker +equerry +pea crab +down-and-out +blackmouth bass +shirtmaker +lister +UNIX guru +snipefish +gimbal +maisonette +haircloth +Ranvier's nodes +pigmy talinum +tribute album +msasa +hydroxide ion +madame +four-pounder +prophet +sloganeer +field-effect transistor +nude mouse +canteen +Calostoma lutescens +buteonine +sunlamp +Uruguay potato +Spanish tamarind +Prince-of-Wales'-heath +kishke +caprifig +chincapin +hegari +alarmist +bathtub gin +astatic galvanometer +Calostoma ravenelii +marang +tussah +coin box +bugleweed +hacker +frontal eminence +timekeeper +shunt +bicycle clip +mustang mint +caesium clock +hospice +glenoid fossa +archpriest +ex-gambler +incrustation +salvager +Donatist +violator +lamb succory +hygroscope +oilbird +sharptail mola +showplace +corn syrup +flashlight fish +pulse timing circuit +anchovy paste +fascista +chigoe +divan +Druid +squad room +Huntingdon elm +buffalo carpet beetle +carper +corn lily +goats' milk +assault gun +cockpit +Lochaber ax +Visigoth +occupier +Basotho +criminologist +spindle +Rosicrucian +Cornishwoman +musk kangaroo +artificial skin +pandurate leaf +Parkia javanica +roundhead +tea-like drink +basidiolichen +unguiculate +stepmother +Nauruan +gutta-percha tree +bloodberry +scarlet haw +marupa +censor +algebraist +pelvimeter +whaler +cowhide +paparazzo +biochip +internationalist +Yukon white birch +hangar queen +chlamydia +puttee +Pipturus albidus +pearly razorfish +sea moss +burglar +hoary golden bush +colter +drey +bushman's poison +maxillaria +gnetum +deadeye +shittah +swamp oak +damper block +deepwater squirrelfish +truffle +cangue +paleolith +lawyerbush +sorehead +Texas snowbell +Tremella reticulata +quarter +keelboat +dimity +whiner +Wagnerian +myrmecophyte +frontierswoman +pyrometric cone +big-tree plum +puppy +galbulus +hod +winceyette +carriage wrench +dictostylium +farmland +infanticide +Jacob's rod +threadfish +monocline +inamorato +leaf miner +purple cress +passer +black-fronted bush shrike +silverrod +bootmaker +segregate +captive +Edmontonia +spherometer +television transmitter +bladder +Saratoga spittlebug +dynamometer +lodge +smooth darling pea +Cossack +wake-up call +Olmec +sutler +molasses kiss +corner post +rattlesnake weed +yardmaster +adder +rhinoscope +referral +ulster +pantaloon +counterspy +gadgeteer +heart cherry +hospital chaplain +Clydesdale terrier +plank-bed +Russian thistle +actinometer +dyspeptic +common wolffia +firewall +seidel +potato moth +soapweed +seif dune +thill +cosmographer +absolver +halberdier +fire control system +kai apple +bastard pennyroyal +Big Brother +broadcast journalist +Albatrellus dispansus +citrophilous mealybug +split end +nickel-iron battery +Newtonian +gas maser +thumbstall +anaspid +dusky-footed wood rat +latitudinarian +flatbrod +schizocarp +niqaabi +flight surgeon +gyrocompass +Polyporus tenuiculus +Utopian +mailboat +spellbinder +undercoat +cassareep +typical jerboa +photocathode +katharometer +bight +fur-piece +penetration bomb +malik +Siberian millet +nanomia +Wykehamist +tosser +gyrostabilizer +microwave diathermy machine +crystal set +wall +legatee +alfalfa +angwantibo +charioteer +piano maker +African mahogany +Morlett's crocodile +taro +parallel circuit +cush-cush +etymologist +matriculate +neem seed +cornerback +kingfisher daisy +redoubt +blastomycete +peplos +costumier +publican +tobogganist +semolina +myrmidon +parricide +gymslip +whoremaster +cryptocoryne +header +platitudinarian +barleycorn +spiral bandage +reciter +abecedarian +dance +wrymouth +bilberry +Liopelma hamiltoni +streamliner +Fordhooks +fixed phagocyte +radiobiologist +neurologist +Selkup +dollarfish +cascade everlasting +acrodont +boarhound +midstream +theatrical producer +abhorrer +goldsmith +photometrist +Anglo-Saxon +rugel's plantain +sable +workmate +ferule +ankus +earleaved umbrella tree +Passamaquody +timucu +Mexican pocket mouse +yerba santa +Rochon prism +apomict +monocarp +sweet unicorn plant +common winterberry holly +archivist +drypis +paretic +fly-by-night +white-berry yew +Schoolman +blue cheese dressing +vintager +squatter +Euphausia pacifica +corrugated fastener +yellow henbane +Croesus +almoner +analphabet +acoustic delay line +sheep frog +workhouse +horseleech +venturer +pond-scum parasite +Pyrenees daisy +plagiarist +Truncocolumella citrina +rerebrace +group captain +caddis fly +hot-rock penstemon +kanzu +stylopodium +slopseller +rauli beech +starter +ootid +statesman +distributor cam +ascot +falcon-gentle +Duplicidentata +spotted antbird +heliometer +false buckthorn +Allegheny spurge +Cavalier +dart +photocoagulator +master-at-arms +kei apple +baldachin +crapshooter +gametangium +white hope +chipotle +spike heath +Scotch woodcock +Florentine +differential analyzer +Mitrula elegans +wet cell +basil balm +Circassian +corn cake +bouncing betty +vice-regent +lagerphone +ketembilla +whoremaster +fork +tetrasporangium +trifler +pill head +life-support system +quartermaster general +tobacco thrips +officeholder +teredo +toyon +Sundacarpus amara +Phytophthora citrophthora +naif +lobbyist +alligator wrench +bully +heavy +toxicologist +radio chassis +waterdog +drive line +kaffir cat +foster-brother +breakax +curette +traditionalist +pipe vise +striped button quail +gawker +homeotherm +schoolyard +battue +kalansuwa +deviationist +Bolshevik +transponder +pungapung +iron +Eyeish +roccella +manglietia +Tory +print seller +Texas Ranger +otter shrew +seconder +shellflower +outlier +party man +wold +hayfork +oncologist +framer +co-beneficiary +ocean pout +Chinese angelica +scrimshaw +air attache +false gromwell +standing press +fringepod +specifier +automatic choke +durum +yenta +wassailer +reeler +signora +beach pancake +common booklouse +pellicle +backroom boy +den mother +associate +Unitarian +gambist +brookweed +clubroom +cat's-tail +playboy +self-registering thermometer +doorstop +bennet +yak's milk +escapee +quail bush +sparge pipe +coast boykinia +screw key +half gainer +aggravator +cotton mill +tailor's chalk +free agent +cotton mouse +deadhead +bunny +turpentine camphor weed +amaranth +ceratodus +red lauan +beam-ends +thermograph +wally +Toda +handrest +commissary +oak-leaved goosefoot +manufacturer +voicer +Jafnea semitosta +bench hook +finder +abyssal zone +rabbitwood +Hercules'-club +epicarp +declinometer +camp follower +signaler +Australian pea +putz +qadi +banded palm civet +egg timer +regnellidium +calisaya +harvestfish +sound spectrograph +side-wheeler +glomerule +woolly rhinoceros +Black Muslim +horticulturist +ornithomimid +cryometer +battlefront +gametophyte +airmailer +cuisse +nakedwood +baseball club +slasher +anise +leatherleaf +leatherjacket +horned pondweed +gofer +Saigon cinnamon +barong +blazer +twinkler +skeleton shrimp +dial +floorwalker +case shot +flannelbush +cultivated parsnip +Jane Doe +few-flowered leek +nogging +placer miner +muzzler +serge +lion-hunter +capulin +Wandering Jew +ascidian tadpole +hispid pocket mouse +southern spatterdock +milk wagon +junior middleweight +duck sauce +promycelium +protozoologist +cascade liquefier +tout +longheaded thimbleweed +charcoal burner +footage +slop +bridge agent +miller's-thumb +Job's comforter +marocain +tanker plane +lancetfish +knocker +toque +ordinand +umbrella bird +favorite son +hare's-foot bristle fern +business traveler +plotter +Asiatic shrew mole +tallyman +stump +Paleacrita vernata +index register +mortgagee +accuser +codger +sand rat +seaside centaury +chiropractor +Florida smoothhound +dwarf sperm whale +T-man +sannup +dragonhead +numdah +alkali grass +gynobase +kymograph +ascolichen +steward +waterline +Nazarene +filer +lapidary +muncher +wincey +scyphus +question master +besieger +worldling +docent +facing +atmometer +quern +puerpera +three-decker +calliope +wild red oat +bailee +flame pea +cattle cake +theist +yellowtail flounder +cosmopolitan +rocket engineer +vouchee +Turkoman +hard sauce +Thousand Island dressing +assayer +messmate +mutilator +oyster bar +flame tokay +countess +prairie mimosa +microsporangium +cotter +townsman +paring +fundraiser +simperer +Comrade +orlop deck +power takeoff +cattleship +prime meridian +Javanthropus +scriptorium +curandera +long-clawed prawn +maestro +paster +potato tuberworm +chachka +junkyard +cape yellowwood +reentrant polygon +Liberian coffee +restaurateur +Alsophila pometaria +Jekyll and Hyde +electrophorus +Scomberomorus maculatus +manipulator +gromwell +chicken provencale +ashram +mangel-wurzel +shamrock pea +dossal +adducer +erection +Mysore thorn +smoothie +chufa +brace wrench +victualer +litterer +linstock +Protium guianense +palfrey +banyan +klieg light +dangleberry +trooper +yaupon holly +quitter +tradescant's aster +nullipara +melter +devil's urn +ghostwriter +mouth +analogist +Creek +sonic depth finder +fucker +locus of infection +mortician +esophageal smear +locum tenens +conic projection +aroeira blanca +bellarmine +night porter +automobile mechanic +codpiece +Munro +cottonweed +scoinson arch +tinderbox +frozen food +waterproofing +Egyptian henbane +lash +transactor +American smooth dogfish +existentialist +grabber +Sonoran lyre snake +Rufous rubber cup +colors +weekend warrior +power user +perennial salt marsh aster +Puritan +Apalachicola rosemary +anecdotist +tosser +moth bean +agnostic +stretcher-bearer +browntail +optimist +brewer's mole +astronomy satellite +flat file +rust mite +tuberous plant +day laborer +buster +trapezoid +bevatron +nonresident +Streptomyces griseus +mangosteen +customer agent +hero worshiper +suicide bomber +procellariiform seabird +archiannelid +reaction turbine +distortionist +bulldog wrench +grainy club +scalp +Aztec +scow +globigerina +pedant +heartleaf manzanita +kanchil +low gallberry holly +containment +scandalmonger +rose-colored starling +Powhatan +addle-head +Chilean rimu +Atlantic sea bream +arthrospore +ramrod +root climber +Kalapooia +roach clip +Schreiber's aster +horseradish +albino +Kshatriya +trombidiid +blasting cap +body pad +brachium +shallu +Wynnea americana +slender centaury +munj +upset +wind tunnel +cottonwick +airing cupboard +pepper shrub +ambrosia +languisher +chosen +rose globe lily +purple apricot +costia +sloop of war +sultana +frontlet +booster +sargassum fish +broad-leaved montia +rifleman bird +stillroom +amoralist +enginery +meter maid +fitment +southern bog lemming +Athenian +clincher +cusk-eel +mackintosh +diaphone +corozo +Australian reed grass +czar +spongioblast +Eurafrican +airhead +Shahaptian +Roman +pollinium +tourist class +halogeton +stamper +emperor +malingerer +tramp steamer +Peziza domicilina +pilot cloth +stenopterygius +cost accountant +Queen's Counsel +wine-maker's yeast +poppet +cage +rowlock arch +landgrave +bearded wheatgrass +stink bell +quaker +undesirable +algarroba +resistance pyrometer +exorcist +carib wood +guvnor +border patrolman +bathhouse +licenser +headman +rentier +pine spittlebug +nut-leaved screw tree +paraduodenal smear +apron +necker +smilax +Alpine besseya +creeper +castle +ground bait +Queensland grass-cloth plant +sclerotium +great yellowcress +fat farm +Stoker +hoop snake +elixir of life +Trotskyite +home buyer +wheat berry +Tutelo +semi-climber +utahraptor +wet-bulb thermometer +packrat +hygrophyte +darter +sketcher +refiner +camlet +midgrass +compound +tarwood +Colorado River hemp +toiler +abstractor +override +dwarf pipefish +plodder +briefcase computer +trunk hose +brown butter +valve-in-head engine +cymbalist +explosive detection system +horsewoman +boutonniere +chinchilla +venerator +scourer +exarch +cohune nut +ayapana +continental divide +cosigner +stalker +pyxie +Genet +Macowanites americanus +open-hearth furnace +water chestnut +American frogbit +tarwood +cutter +scout +burr +upsetter +grist +tagasaste +mouthpiece +palette +rattan +letterman +Exmoor +Methodist +eelblenny +marasca +slide valve +ventilation +saddle hackle +Yakut +flux applicator +air traveler +murder suspect +Cynocephalus variegatus +idolizer +Surgeon General +nutlet +little-head snakeweed +germ tube +fellow traveler +raceabout +commodore +czar +anamorphosis +treelet +girlfriend +groundnut +sideline +giant star grass +goffer +spark lever +oubliette +processor +tare +plodder +extremist +Kipp's apparatus +gripsack +S wrench +viscountess +bridgehead +cascarilla +Asiatic flying squirrel +protoceratops +equerry +difflugia +princeling +moonlighter +aspergill +common flat pea +Utahan +imperial mammoth +plantain-leaved pussytoes +Boott's goldenrod +bootlegger +reed pipe +runcinate leaf +onion salt +nitrite bacterium +introvert +duck +New World opah +goliath frog +heterostracan +disrupting explosive +haggler +candlenut +false bugbane +returning officer +eudiometer +ship-breaker +metazoan +mandarin +patka +gill net +cavity wall +armilla +rainmaker +dealfish +orderly +gleaner +muffin man +house sitter +alto +sand devil's claw +vulcanizer +appendicularia +boron chamber +chess +bitok +anchovy butter +dropout +flour mill +bishop +escapist +scapegrace +stanhope +smooth winterberry holly +upstager +stalking-horse +pony +prairie gourd +parabolic mirror +Polaroid +slasher +lap +garlic butter +sendee +German millet +hairy honeysuckle +Swiss canton +Scleroderma flavidium +red goatfish +telegraph plant +Jungian +garment cutter +mallee hen +stranger +driveway +schooner +Paiute +cisco +trestlework +sipper +shanny +romanticist +Molly Miller +mountain rimu +odd-leg caliper +bitumastic +Western Australia coral pea +labor coach +latchkey +harpulla +solitary pussytoes +chop-suey greens +coil +guimpe +diapir +Osage +gutta-percha tree +giant eland +reticulation +garden huckleberry +quick study +Hudson bay collared lemming +coreligionist +Lancastrian +stumblebum +omnirange +seersucker +Potemkin village +Rhea Silvia +symphonist +bolti +jaw +jaconet +page +visiting fireman +haulm +p-n junction +landlubber +yellow jack +triclinium +souari +invader +fire walker +Luddite +Plott hound +hemming-stitch +winker +star-duckweed +craniometer +Arabidopsis lyrata +loser +cypripedia +trimmer arch +cookhouse +pink fivecorner +transfer +ringleader +northern pocket gopher +moke +blockade-runner +cyclostome +web-spinning mite +Whig +transcriber +malahini +sawyer +patent log +paca +tragedian +thermojunction +soffit +black buffalo +foreigner +applecart +brit +pole horse +white mullet +argentinosaur +Homo soloensis +bounty hunter +decumary +hand +paperboy +Smitane +windowpane +Java man +Wynnea sparassoides +prune +middy +lilliputian +sorb +pyrostat +guest worker +hold +leaseholder +vegan +humanist +salinometer +piton +zygospore +means +night rider +tetraspore +archipelago +radiomicrometer +nitpicker +spot weld +slicer +girlfriend +round-tailed muskrat +cock's eggs +Shavian +bay +nuclear chemist +planetarium +hiccup nut +Marylander +milling +microsporidian +brown cup +Strophanthus kombe +little skate +emancipator +paperhanger +archaeopteryx +maigre +Mastotermes electrodominicus +procurer +seizure-alert dog +homeboy +cotton strain +mute +siren +spearnose bat +phenacomys +gayal +arsenal +pitchfork +Port Jackson heath +cud +magnetic core memory +interferometer +water jacket +account executive +hodoscope +window oyster +sudatorium +syncopator +loment +hypertensive +smoothbark +Geogia holly +nailhead +African holly +musette +chafeweed +microflora +derrick +strawworm +shogun +queen post +jerboa kangaroo +columbo +royal +sourball +solenogaster +cardsharp +Homo habilis +intaglio +calf's-foot jelly +flotsam +skirret +baronduki +chyme +shovel hat +Welsh +monoplane flying fish +groundfish +tablet-armed chair +swan dive +Indian club +colonial +cassiri +pyramidal tent +praya +silk vine +time clock +button snakeroot +clews +Korean lespedeza +diffuser +ripping bar +puttyroot +nipple shield +headpin +juneberry holly +hub-and-spoke +laver +weldment +plain flour +hoosegow +dudeen +grey skate +line of life +mung +arariba +Newtown Wonder +rock candy +side chapel +castor sugar +narrow-leaved white-topped aster +babassu nut +puka +rings +catchall +heat shield +caroche +oxbow +Australian coral snake +tapper +sporangiophore +fenugreek +spruce gall aphid +gouache +cutoff +private line +pod +cargo hatch +nailhead +penile implant +geophyte +small-leaved linden +deepwater pipefish +paperhanger +hairy spurge +Persian lamb +subtropics +feed grain +clarence +nonparticipant +scorpioid cyme +hand brake +tiller +Geglossaceae +albacore +monochrome +goa bean +bur +tongue worm +psittacosaur +frog's lettuce +pectoral +terreplein +light filter +fishpaste +dry point +grison +feterita +dolichocephalic +oenomel +stretcher +swag +cheval-de-frise +mountain beaver +scammony +discus +leatherleaf saxifrage +wharf rat +Dominique +pelycosaur +depth gauge +bishop +archespore +true anomaly +silver jenny +mercy seat +kelp +oviraptorid +acrylic +Chinese pea tree +meat house +bilge well +Temperate Zone +whale louse +balbriggan +briefcase bomb +pump-type pliers +oil +sour gourd +Jewbush +lunette +Chinese paddlefish +pyxidium +beechnut +calabar bean +grugru nut +gib +blunt file +cataphyll +megasporangium +blockbuster +sliding seat +hogchoker +calceus +Connarus guianensis +honest woman +survivor +second balcony +tempera +Calvary clover +murine +outwork +bogy +elephant's-foot +conning tower +set square +blackfly +stirk +Streptomyces erythreus +blade +goldfield +snowball +mortal enemy +waltzer +shoal +galley +hitchhiker +lithophyte +brisling +scauper +esophagoscope +grab +subtracter +philosopher +duplex apartment +southeastern pocket gopher +bonduc nut +reverberatory furnace +grader +lamp house +northern bog lemming +brotula +ornithopod +ptyalith +obturator +perpetual motion machine +range pole +Africander +curvet +daisy print wheel +floor +collector +mutant +tuck +fore-and-after +senega +buckler mustard +louvar +Tarsius glis +culdoscope +Spanish fly +steering gear +hatchet man +museum +saw set +cambric tea +comber +thermohydrometer +stationer +chalcis fly +bryanthus +whipstitch +harvest mite +rock gunnel +time bomb +rariora +pigfish +apetalous flower +head shop +horned whiff +sandpit +tachistoscope +sundries +taffrail +caller +monofocal lens implant +Dover's powder +souari nut +crowbait +render +Shakespearian +hagberry +megatherian +magus +hatchel +mangabey +garroter +piedmont +cope +barrio +psychodid +rigout +distributor +croupier's rake +sarcenet +narrow-leaved water plantain +treenail +biped +lanternfish +overdrive +barndoor skate +picket boat +amber lily +sawpit +sand lance +bucket shop +common beech +laundry truck +surtout +grogram +tampion +escape hatch +interstice +shop bell +snake mackerel +nakedwood +tumbrel +mericarp +mountain paca +cab +big board +cringle +eusporangium +shipping room +coal chute +dumbwaiter +Smiledon californicus +man-at-arms +cartridge +deinonychus +pigeon pea +screw bean +spectacle +floorboard +cutting room +low-warp-loom +proconsul +sabicu +genipap +clapper +aquifer +archaeornis +belly flop +Protium heptaphyllum +interrupter +high-warp loom +knight +wiper +impression +poker +Pithecanthropus +sable +guardroom +tenter +wellhead +raja +strickle +sodomite +mountebank +sand leek +Barbados gooseberry +shuffler +sensory fiber +crab-eating opossum +etching +rare bird +scup +fagot +negro vine +hutment +droshky +nephoscope +lady chapel +cutty stool +release +vestiture +buff +standard +Tabernacle +vascular ray +snakewood +chlorobenzylidenemalononitrile +limnologist +pouched mole +microwave linear accelerator +Mastotermes darwiniensis +wind tee +orange bat +open sight +carpospore +rampant arch +sabbatia +cursor +post exchange +bellpull +center +cyclostyle +canonist +pygmy sperm whale +moa +king +pass-through +angioscope +marrow +hookup +revetment +acanthocephalan +good Samaritan +apatosaur +web spinner +dixie +ommastrephes +crossbench +candlewick +jack +light arm +caisson +kaki +quandong nut +Meuniere butter +coquilla nut +mast +black +twitterer +bluethroat pikeblenny +shielding +water-shield +urolith +elephant bird +clearway +dark lantern +schizopetalon +press +Nazi +sugarberry +Maltese +stevedore +hair shirt +party wall +gainer +blackheart +nothosaur +cavetto +evergreen bittersweet +chemical bomb +calpac +shingle +turnpike +animator +heaver +isoclinic line +death knell +liner +anathema +aerie +razorback +Ichyostega +pound net +French dressing +mottle +yard +string tie +bell seat +brattice +battering ram +sierra +pompon +vertex +stomach pump +electrolytic cell +escolar +telpher +roadhouse +cerecloth +tartare sauce +letter case +whale sucker +hob +teg +canvas +strickle +hectograph +Cartagena bark +mail car +acinus +freedom rider +bread sauce +picture window +Rhizopogon idahoensis +pinprick +mass spectrograph +ringer +devil's cigar +salad cream +marlberry +airbrake +Clark cell +yellow-throated marten +wire gauge +dinoceras +aba +harpoon log +plate rail +mustard plaster +coelophysis +journal box +puce +ballcock +quartering +izar +clinid +whirler +turnspit +deathbed +pottle +shot +doubler +Coryphaena equisetis +English sole +chicken feed +borrow pit +mylodontid +Chilean nut +Kundt's tube +ling +asthenosphere +reseau +death seat +immovable bandage +peppermint patty +lecturer +electron multiplier +bear claw +hyacinth +beaked salmon +toehold +scull +snowball +gangsaw +fiber +oxeye +lashing +Beckman thermometer +fence +cantilever +dinner theater +Reynard +jag +umbrella plant +camera lucida +beaver +slug +yellowfin croaker +Sibley tent +rat-tail file +anchovy pear +soldier +cackler +chaise +Pitot-static tube +minniebush +Episcopalian +oleaster +ejaculator +wavy-leaved aster +knight +rack +real storage +magnetic mine +cocoa plum +vesiculovirus +birch leaf miner +water chevrotain +rudapithecus +torpedo tube +itch mite +warren +loft +washerman +terrace +nonstarter +shit +platform +caudex +ground control +Ostariophysi +slopshop +Peruvian cotton +crystal oscillator +plastic bomb +bar bit +watering cart +Asiatic sweetleaf +artificial joint +chariot +casern +charge-exchange accelerator +display adapter +hornpipe +honey bell +planula +Nephthytis afzelii +hame +ranter +trachodon +synchrocyclotron +splasher +heterotroph +Nicol prism +Himalayan rhubarb +headfast +put-put +bitter almond +parr +scantling +power breakfast +madder +Catalpa bignioides +rose of Jericho +spark chamber +rhizome +beard worm +supper club +negro peach +keratoscope +wain +apple aphid +planking +time-delay measuring instrument +sternpost +sicklepod +lake bed +gatherer +monotype +dead-man's float +poison gas +dicynodont +organism +cell +person +animal +plant +food +artifact +dressage +contact sport +outdoor sport +gymnastics +track and field +jumping +high jump +skiing +water sport +swimming +dive +floating +skin diving +rowing +boxing +sledding +tobogganing +wrestling +skating +ice skating +roller skating +racing +boat racing +riding +equestrian sport +cycling +blood sport +hunt +fishing +angling +casting +athletic game +outdoor game +golf +field game +field hockey +football +American football +ball game +baseball +court game +badminton +basketball +tennis +sport +Seder +scavenger +bottom-feeder +work animal +beast of burden +pack animal +domestic animal +marine animal +female +male +young +young mammal +pup +cub +lion cub +tiger cub +microorganism +arbovirus +herpes +herpes zoster +reovirus +moneran +cyanobacteria +enteric bacteria +actinomycete +streptomyces +diplococcus +parasite +ectoparasite +protoctist +protozoan +sarcodinian +ameba +ciliate +alga +brown algae +green algae +sporozoan +cypriniform fish +cyprinid +carp +domestic carp +shiner +catostomid +buffalo fish +cyprinodont +killifish +topminnow +squirrelfish +stickleback +pipefish +embryo +fetus +blastula +chordate +cephalochordate +tunicate +ascidian +vertebrate +aquatic vertebrate +jawless vertebrate +lamprey +hagfish +cartilaginous fish +holocephalan +chimaera +elasmobranch +shark +mackerel shark +mako +requiem shark +dogfish +smooth dogfish +spiny dogfish +smooth hammerhead +smalleye hammerhead +shovelhead +ray +sawfish +roughtail stingray +butterfly ray +eagle ray +manta +skate +bird +gamecock +night bird +ratite +passerine +oscine +accentor +lark +pipit +finch +canary +dark-eyed junco +New World sparrow +bunting +honeycreeper +sparrow +grosbeak +towhee +weaver +grassfinch +tyrannid +New World flycatcher +kingbird +pewee +cotinga +antbird +Old World flycatcher +thrush +nightingale +Old World chat +warbler +kinglet +Old World warbler +New World warbler +flycatching warbler +New World chat +yellowthroat +New World oriole +northern oriole +meadowlark +New World blackbird +grackle +Old World oriole +starling +myna +corvine bird +crow +Old World jay +common European jay +New World jay +blue jay +Canada jay +Rocky Mountain jay +nutcracker +European magpie +American magpie +Australian magpie +wren +marsh wren +thrasher +New Zealand wren +creeper +titmouse +black-capped chickadee +Carolina chickadee +swallow +martin +tanager +shrike +butcherbird +bush shrike +bowerbird +European water ouzel +American water ouzel +vireo +waxwing +bird of prey +hawk +black kite +swallow-tailed kite +white-tailed kite +harrier +falcon +peregrine +caracara +eagle +young bird +sea eagle +Aegypiidae +Old World vulture +griffon vulture +bearded vulture +Egyptian vulture +black vulture +New World vulture +buzzard +condor +Andean condor +California condor +black vulture +king vulture +owl +horned owl +scops owl +amphibian +salamander +newt +Pacific newt +ambystomid +climbing salamander +web-toed salamander +frog +true frog +true toad +spadefoot +tree toad +cricket frog +tongueless frog +reptile +anapsid +diapsid +chelonian +turtle +sea turtle +ridley +snapping turtle +musk turtle +diamondback terrapin +Western box turtle +tortoise +soft-shelled turtle +saurian +lizard +gecko +iguanid +spiny lizard +fence lizard +horned lizard +skink +teiid lizard +racerunner +plateau striped whiptail +Chihuahuan spotted whiptail +western whiptail +checkered whiptail +agamid +moloch +anguid lizard +venomous lizard +lacertid lizard +chameleon +monitor +crocodilian reptile +crocodile +alligator +caiman +armored dinosaur +ankylosaur +bone-headed dinosaur +ceratopsian +hadrosaur +saurischian +sauropod +theropod +ceratosaur +maniraptor +synapsid +pterosaur +ichthyosaur +snake +colubrid snake +smooth green snake +rough green snake +racer +blacksnake +whip-snake +rat snake +bull snake +common kingsnake +milk snake +common garter snake +ribbon snake +Western ribbon snake +common water snake +water moccasin +grass snake +viperine grass snake +sand snake +lyre snake +blind snake +indigo snake +constrictor +boa +python +elapid +coral snake +coral snake +cobra +mamba +black mamba +krait +viper +pit viper +rattlesnake +timber rattlesnake +arthropod +arachnid +false scorpion +whip-scorpion +spider +European wolf spider +acarine +hard tick +Ixodes dammini +Ixodes neotomae +Ixodes pacificus +Ixodes scapularis +sheep-tick +Ixodes persulcatus +Ixodes dentatus +Ixodes spinipalpis +wood tick +soft tick +mite +trombiculid +spider mite +house centipede +gallinaceous bird +domestic fowl +jungle fowl +chicken +cock +hen +turkey +grouse +European black grouse +Asian black grouse +blackcock +greyhen +red grouse +moorhen +greater prairie chicken +lesser prairie chicken +heath hen +guan +chachalaca +megapode +mallee fowl +phasianid +pheasant +bobwhite +northern bobwhite +Old World quail +migratory quail +peafowl +California quail +Hungarian partridge +red-legged partridge +Greek partridge +mountain quail +guinea fowl +columbiform bird +pigeon +dove +turtledove +domestic pigeon +homing pigeon +sandgrouse +parrot +cockatoo +lory +varied Lorikeet +rainbow lorikeet +parakeet +cuculiform bird +cuckoo +crow pheasant +coraciiform bird +roller +kingfisher +hoopoe +apodiform bird +swift +Archilochus colubris +thornbill +goatsucker +piciform bird +woodpecker +flicker +sapsucker +toucanet +trogon +quetzal +aquatic bird +waterfowl +anseriform bird +duck +teal +widgeon +sheldrake +goldeneye +scaup +wood duck +sea duck +scoter +merganser +gosling +gander +Chinese goose +greylag +blue goose +snow goose +brant +common brant goose +honker +barnacle goose +swan +tundra swan +screamer +crested screamer +mammal +prototherian +monotreme +marsupial +opossum +bandicoot +kangaroo +common wallaby +hare wallaby +nail-tailed wallaby +rock wallaby +pademelon +tree wallaby +rat kangaroo +phalanger +dasyurid marsupial +dasyure +placental +calf +buck +insectivore +mole +shrew mole +shrew +water shrew +tenrec +invertebrate +sponge +glass sponge +coelenterate +Chrysaora quinquecirrha +hydrozoan +siphonophore +anthozoan +actinia +coral +gorgonian +stony coral +ctenophore +worm +planarian +fluke +liver fluke +Fasciolopsis buski +schistosome +tapeworm +echinococcus +taenia +common roundworm +chicken roundworm +pinworm +eelworm +vinegar eel +trichina +hookworm +filaria +Guinea worm +annelid +oligochaete +polychaete +leech +mollusk +scaphopod +gastropod +abalone +scorpion shell +giant conch +edible snail +garden snail +brown snail +Helix hortensis +seasnail +neritid +limpet +Hermissenda crassicornis +cowrie +bivalve +clam +quahog +cockle +oyster +mussel +marine mussel +freshwater mussel +scallop +shipworm +cephalopod +octopod +decapod +squid +crustacean +malacostracan crustacean +decapod crustacean +crab +swimming crab +spider crab +lobster +true lobster +Old World crayfish +American crayfish +shrimp +prawn +krill +stomatopod +mantis shrimp +woodlouse +pill bug +sow bug +sea louse +amphipod +copepod +barnacle +wading bird +stork +ibis +common spoonbill +roseate spoonbill +heron +egret +night heron +American bittern +European bittern +least bittern +whooping crane +rail +crake +gallinule +purple gallinule +coot +great bustard +plain turkey +button quail +trumpeter +seabird +shorebird +plover +turnstone +sandpiper +yellowlegs +ruff +tattler +woodcock +snipe +greyback +red-breasted snipe +curlew +godwit +stilt +stilt +phalarope +courser +coastal diving bird +larid +gull +tern +jaeger +skua +auk +guillemot +murre +puffin +gaviiform seabird +podicipitiform seabird +grebe +pelecaniform seabird +white pelican +Old world white pelican +gannet +snakebird +sphenisciform seabird +penguin +pelagic bird +wandering albatross +black-footed albatross +petrel +shearwater +storm petrel +aquatic mammal +cetacean +whale +baleen whale +rorqual +toothed whale +beaked whale +dolphin +bottlenose dolphin +porpoise +sea cow +carnivore +pinniped mammal +seal +eared seal +fur seal +fur seal +South American sea lion +California sea lion +Australian sea lion +Steller sea lion +earless seal +walrus +canine +bitch +dog +cur +toy dog +toy spaniel +English toy spaniel +hunting dog +hound +coonhound +dachshund +foxhound +wolfhound +greyhound +terrier +bullterrier +rat terrier +Manchester terrier +fox terrier +wirehair +Welsh terrier +schnauzer +Skye terrier +sporting dog +retriever +pointer +setter +spaniel +springer spaniel +water spaniel +working dog +watchdog +shepherd dog +Belgian sheepdog +pinscher +Sennenhunde +mastiff +bulldog +guide dog +sled dog +liver-spotted dalmatian +spitz +griffon +corgi +poodle +wolf +coydog +wild dog +striped hyena +brown hyena +spotted hyena +aardwolf +fox +black fox +silver fox +blue fox +feline +cat +domestic cat +tom +blue point Siamese +wildcat +common lynx +Canada lynx +bobcat +spotted lynx +caracal +big cat +leopardess +panther +lioness +lionet +Bengal tiger +tigress +saber-toothed tiger +bear +Syrian bear +grizzly +Alaskan brown bear +cinnamon bear +viverrine +civet +Indian mongoose +ichneumon +slender-tailed meerkat +suricate +bat +fruit bat +carnivorous bat +leafnose bat +false vampire +vespertilian bat +long-eared bat +freetail +vampire bat +predator +game +game bird +fossorial mammal +tetrapod +insect +beetle +two-spotted ladybug +Mexican bean beetle +Hippodamia convergens +vedalia +bombardier beetle +calosoma +searcher +firefly +sawyer +pine sawyer +flea beetle +Colorado potato beetle +carpet beetle +clerid beetle +lamellicorn beetle +scarabaeid beetle +scarab +tumblebug +dorbeetle +June beetle +melolonthid beetle +elaterid beetle +snout beetle +boll weevil +blister beetle +bark beetle +darkling beetle +flour beetle +seed beetle +pea weevil +bean weevil +rice weevil +louse +flea +dipterous insect +gall midge +housefly +tsetse fly +blowfly +bluebottle +greenbottle +flesh fly +tachina fly +gadfly +botfly +human botfly +sheep botfly +warble fly +horsefly +bee fly +fruit fly +louse fly +horn fly +mosquito +gnat +fungus gnat +hymenopterous insect +drone +worker +honeybee +Africanized bee +black bee +Carniolan bee +Italian bee +carpenter bee +bumblebee +cuckoo-bumblebee +andrena +Nomia melanderi +leaf-cutting bee +mason bee +potter bee +wasp +vespid +paper wasp +hornet +sphecoid wasp +digger wasp +chalcid fly +sawfly +pharaoh ant +little black ant +army ant +carpenter ant +fire ant +wood ant +slave ant +Formica fusca +slave-making ant +sanguinary ant +bulldog ant +Amazon ant +termite +dry-wood termite +orthopterous insect +short-horned grasshopper +locust +migratory locust +migratory grasshopper +long-horned grasshopper +katydid +mormon cricket +sand cricket +mole cricket +European house cricket +field cricket +tree cricket +snowy tree cricket +phasmid +diapheromera +oriental cockroach +American cockroach +Australian cockroach +German cockroach +giant cockroach +praying mantis +hemipterous insect +leaf bug +mirid bug +lygus bug +lygaeid +coreid bug +heteropterous insect +water bug +water strider +assassin bug +homopterous insect +whitefly +sweet-potato whitefly +coccid insect +scale insect +soft scale +armored scale +mealybug +plant louse +aphid +greenfly +woolly aphid +adelgid +dog-day cicada +seventeen-year locust +spittle insect +plant hopper +psocopterous insect +psocid +booklouse +ephemerid +neuropteron +green lacewing +brown lacewing +odonate +trichopterous insect +caseworm +thysanuran insect +bristletail +thysanopter +thrips +earwig +lepidopterous insect +butterfly +nymphalid +fritillary +emperor butterfly +danaid +pierid +small white +large white +southern cabbage butterfly +blue +copper +American copper +hairstreak +Strymon melinus +moth +tortricid +lymantriid +geometrid +cankerworm +pyralid +tineoid +tineid +clothes moth +gelechiid +grain moth +noctuid moth +cutworm +underwing +hawkmoth +bombycid +saturniid +giant silkworm moth +silkworm +arctiid +lasiocampid +tent caterpillar +webworm +webworm moth +caterpillar +bollworm +woolly bear +larva +grub +pupa +queen +echinoderm +basket star +edible sea urchin +sand dollar +heart urchin +crinoid +trepang +lagomorph +leporid +rabbit +eastern cottontail +swamp rabbit +marsh hare +leveret +European hare +jackrabbit +white-tailed jackrabbit +blacktail jackrabbit +polar hare +snowshoe hare +pika +rodent +mouse +rat +pocket rat +field mouse +brown rat +jerboa rat +water rat +New World mouse +wood mouse +wood rat +vole +packrat +Eurasian hamster +golden hamster +gerbil +lemming +pied lemming +Old World porcupine +brush-tailed porcupine +long-tailed porcupine +New World porcupine +Canada porcupine +pocket mouse +kangaroo rat +jumping mouse +jerboa +dormouse +gopher +squirrel +tree squirrel +ground squirrel +prairie dog +American flying squirrel +groundhog +hoary marmot +yellowbelly marmot +Old World beaver +New World beaver +cavy +naked mole rat +ungulate +hyrax +odd-toed ungulate +equine +horse +foal +colt +male horse +stallion +mare +saddle horse +warhorse +pony +mustang +bronco +wild horse +pony +racehorse +racer +harness horse +workhorse +draft horse +trotting horse +ass +domestic ass +wild ass +onager +common zebra +mountain zebra +grevy's zebra +rhinoceros +tapir +even-toed ungulate +swine +piglet +porker +peccary +ruminant +bovid +bovine +ox +cattle +bull +cow +beef +Brahman +dairy cattle +Old World buffalo +Indian buffalo +carabao +Asian wild ox +American bison +wisent +sheep +lamb +domestic sheep +wild sheep +mountain sheep +goat +domestic goat +wild goat +goat antelope +antelope +Thomson's gazelle +Gazella subgutturosa +springbok +kudu +harnessed antelope +eland +waterbuck +oryx +deer +stag +red deer +mule deer +roe deer +caribou +chevrotain +camel +domestic llama +guanaco +alpaca +giraffe +musteline mammal +ermine +stoat +New World least weasel +Old World least weasel +longtail weasel +American mink +ferret +muishond +snake muishond +striped muishond +river otter +Eurasian otter +striped skunk +hooded skunk +hog-nosed skunk +spotted skunk +American badger +Eurasian badger +ferret badger +hog badger +marten +pachyderm +edentate +peba +apar +tatouay +peludo +giant armadillo +pichiciago +sloth +anteater +primate +ape +anthropoid ape +hominoid +hominid +homo +Homo erectus +Homo sapiens +australopithecine +great ape +western lowland gorilla +eastern lowland gorilla +mountain gorilla +silverback +western chimpanzee +eastern chimpanzee +central chimpanzee +pygmy chimpanzee +lesser ape +monkey +Old World monkey +talapoin +grivet +vervet +green monkey +chacma +mandrill +drill +rhesus +bonnet macaque +Barbary ape +crab-eating macaque +entellus +guereza +New World monkey +true marmoset +pygmy marmoset +tamarin +silky tamarin +pinche +lemur +tarsier +flying lemur +proboscidean +elephant +mammoth +procyonid +raccoon +fish +food fish +young fish +crossopterygian +lungfish +catfish +silurid +bullhead +channel catfish +gadoid +cod +hake +elver +common eel +tuna +moray +conger +teleost fish +clupeid fish +shad +herring +sardine +pilchard +anchovy +salmonid +salmon +Atlantic salmon +trout +brown trout +char +whitefish +smelt +tarpon +ribbonfish +toadfish +needlefish +flying fish +spiny-finned fish +percoid fish +perch +pike-perch +walleye +robalo +pike +pickerel +sunfish +crappie +freshwater bream +black bass +bass +serranid fish +grouper +hind +surfperch +cardinalfish +remora +carangid fish +jack +moonfish +pompano +scad +dolphinfish +characin +cichlid +snapper +grunt +sparid +sea bream +porgy +sciaenid fish +croaker +whiting +sea trout +mullet +goatfish +mullet +silversides +barracuda +sea chub +butterfly fish +damselfish +clown anemone fish +wrasse +blenny +pikeblenny +gunnel +goby +gempylid +scombroid +mackerel +Spanish mackerel +tuna +bonito +sailfish +billfish +marlin +tripletail +mojarra +ganoid +Pacific sturgeon +beluga +scorpaenoid +scorpaenid +scorpionfish +rockfish +lumpfish +greenling +gurnard +sea robin +plectognath +triggerfish +filefish +boxfish +spiny puffer +ocean sunfish +flatfish +righteye flounder +lefteye flounder +whiff +sole +abbey +abbey +abrader +accelerator +accessory +accommodation +acoustic device +acoustic modem +acrylic +action +actuator +adhesive bandage +adjustable wrench +aeolian harp +aerosol +after-shave +airbus +aircraft +airfield +airfoil +air gun +airplane +air pump +air-to-air missile +air-to-ground missile +alarm +alb +alcazar +Allen screw +alms dish +altimeter +Amati +ammeter +ammunition +amplifier +analog computer +analytical balance +anchor +anchor chain +aneroid barometer +angledozer +anklet +antenna +anteroom +antiaircraft +antiballistic missile +apartment +apartment building +aperture +apparatus +apparel +appliance +appliance +applicator +aquarium +arbor +arcade +arch +arc lamp +area +argyle +arm +armament +armature +armchair +armoire +armor +armored vehicle +armor plate +armrest +array +arrow +artificial heart +artillery +assembly +assembly plant +astrodome +astronomical telescope +athletic sock +atom bomb +atomic clock +atomizer +attachment +attack submarine +attire +audiocassette +audio system +audiotape +auditorium +autoclave +autoinjector +autoloader +automat +automat +automatic firearm +automatic rifle +automaton +auxiliary research submarine +awl +ax +axis +axle +axletree +baby bed +baby buggy +baby grand +back +background +backseat +badminton equipment +badminton racket +bag +bag +bag +baggage +bagpipe +bait +balance +balcony +balcony +bale +ball +ball gown +ballistic missile +ballistic pendulum +ball-peen hammer +ballroom +band +bandage +bandanna +banderilla +bar +bar +barbed wire +barge +barge pole +barn door +barograph +barrack +barrage balloon +barrel knot +barrel vault +barrier +barroom +base +base +baseball equipment +basilica +basin +basket +basketball equipment +bass +bass drum +bass horn +bastion +bat +bathhouse +battery +battle-ax +battle dress +battleship +bay rum +bay window +beading plane +beam +beam balance +bearing +beater +beating-reed instrument +bed +bed +bedclothes +bedroom +bedroom furniture +bedspread +bedspring +beehive +beer barrel +bell +bell push +bell tower +belt +belt buckle +bench +berlin +berth +besom +bevel gear +bicycle +bicycle chain +bier +billiard ball +bin +binding +bin liner +binocular microscope +bioscope +birchbark canoe +bird shot +bistro +bit +bit +black tie +blade +blade +blanket +blimp +blind +block +block plane +blouse +blower +blowtorch +bludgeon +boarding +boarding house +boardroom +boat +bobbin +body +body armor +body lotion +boiler +bolt +bolt +bomb +bomber +bongo +boom +boom +boomerang +boot +booth +booth +bore bit +Boston rocker +bota +bottle +bottle opener +bow +bow +bowed stringed instrument +bowl +bowl +bowline +bowling equipment +bowling pin +bowsprit +box +box +boxcar +boxing equipment +brace +brace +bracelet +bracket +brake +brake system +brass +brasserie +brazier +breechcloth +breeches +brewpub +brick +bricklayer's hammer +brickwork +bridal gown +bridge +briefcase +brigandine +brilliant pebble +brim +broad arrow +broadax +broad hatchet +broadsword +brush +bubble jet printer +buffer +buffet +building +building complex +bulldozer +bullet +bullhorn +bullnose +bundle +bunker +burial chamber +burner +bus +business suit +butt joint +button +buttress +butt shaft +buzz bomb +cabaret +caber +cabin +cabin +cabinet +cabinet +cabin liner +cable +cable +cafe +cafeteria +cafeteria tray +caff +cage +calculator +caliper +calorimeter +camera +camera lens +camera tripod +camp +camp +camp chair +camper +can +canal +candelabrum +candlestick +cane +cannikin +cannon +cannon +cannonball +canopy +canteen +canteen +canvas +canvas tent +cap +cap +cap +capacitor +caparison +cape +cap screw +capsule +car +car +carbine +carbon arc lamp +card index +cardioid microphone +car door +cargo liner +cargo ship +carillon +carpenter's hammer +carpenter's level +carpenter's mallet +carpenter's rule +carpet tack +carriage +carriage +carriage bolt +carrick bend +carrier +car seat +cart +cartridge +cartridge belt +cartridge holder +case +case +cashbox +casque +casserole +cassock +catch +catcher's mask +cathedra +cathedral +cathedral +catheter +cathode +cathode-ray tube +cat's-paw +cattle car +cautery +cavalry sword +cedar chest +cell +cell +cellblock +center +centrifuge +ceramic +ceramic ware +chain tongs +chair +chair of state +chalk +chamfer plane +chandlery +chapel +character printer +chassis +chasuble +chatelaine +checker +cheeseboard +chemical reactor +chessman +chest of drawers +child's room +china +chip +chip +chisel +choke +chokey +chordophone +chronoscope +chuck +church key +cigar lighter +circle +circuit +circuit board +circular plane +circular saw +cistern +civilian clothing +clamp +clamshell +clarinet +classroom +clavier +cleaning implement +cleaning pad +clean room +clinic +clip +cloak +clock +closed circuit +closed-circuit television +closet +cloth covering +clothes closet +clothes dryer +clothes hamper +clothes tree +clothing +clothing store +clout nail +clove hitch +clutch +coach +coal car +coal shovel +coat +coat closet +coating +coating +coat of paint +coaxial cable +cocked hat +coffee cup +coffee maker +coffer +coffin +coil +colander +collider +cologne +colonnade +color television +Colt +column +column +comb +comb +combination plane +combine +commissary +commodity +communication system +commutator +compact disk +compartment +compass +compass card +compound lens +compound lever +compressor +computer +computer circuit +computer network +computer screen +computer system +concentration camp +concert grand +concertina +condenser +condenser +condenser microphone +conductor +connecting rod +connection +conservatory +conservatory +contact +container +contrabassoon +control +control panel +control system +convent +converging lens +converter +convertible +conveyance +cooker +cooking utensil +cooler +cooling system +cord +cord +cordage +corner +correctional institution +corset +cosmetic +costume +costume +cotter +cotton +counter +counter +counter +counter tube +country house +coupling +court +court +coverall +covering +cowbarn +craft +cravat +crazy quilt +cream +cream pitcher +crematory +crepe +crib +cricket equipment +croquet equipment +crossbar +crossbow +crosspiece +crown jewels +cruiser +cruiser +cruise ship +crystal microphone +cudgel +cuff +cultivator +cup +cupboard +cupola +curb roof +curtain +cutout +cutter +cutting implement +cybercafe +cyclotron +cylinder +cymbal +dado plane +dagger +damper +dart +data converter +data input device +davenport +davenport +davit +dead axle +deck +deck +deck chair +deep-freeze +defensive structure +delay line +delicatessen +dental appliance +denture +depilatory +depressor +depth finder +derrick +destroyer +detector +detector +detonating fuse +detonator +developer +device +dial +dialyzer +diathermy machine +diesel locomotive +digital camera +digital computer +digital display +diner +dinghy +dining car +dining-hall +dining room +dining-room furniture +dining-room table +dinner dress +dinner pail +dinner table +diode +dip +diplomatic building +dipper +DIP switch +directional antenna +directional microphone +direction finder +disguise +dish +dish +disk +dispenser +display +display panel +distillery +ditch +ditch spade +dive bomber +doll +dolmen +domino +door +doorbell +doorlock +doornail +dormer window +dormitory +dot matrix printer +double-breasted suit +double-reed instrument +douche +dovecote +dovetail plane +downstage +drafting instrument +Dragunov +drawstring bag +dray +dredging bucket +dress +dress blues +dressing +dress uniform +drill +electric drill +drill rig +drinking fountain +drinking vessel +drip mat +drip pot +drive +drive +drogue +drogue parachute +drop-leaf table +dry battery +dry dock +dryer +dry masonry +dry wall +dugout canoe +dumdum +dumpcart +dune buggy +dungeon +duplicator +dustmop +dwelling +earphone +earthenware +easel +easy chair +edge tool +eiderdown +elastic bandage +electrical converter +electrical device +electric bell +electric frying pan +electric furnace +electric heater +electric lamp +electric motor +electric refrigerator +electro-acoustic transducer +electrode +electromagnet +electronic balance +electronic device +electronic equipment +electronic instrument +electronic voltmeter +electron microscope +electrostatic generator +electrostatic printer +elevator +embankment +embellishment +enamel +enamelware +enclosure +endoscope +engine +engine +ensemble +entrenching tool +epidiascope +equipment +eraser +escutcheon +espadrille +espresso shop +establishment +estaminet +exercise device +exhaust fan +exhibition hall +Exocet +expansion bolt +explosive device +external-combustion engine +extractor +fabric +face mask +face veil +facing +factory +fairlead +false face +fan +farm building +farm machine +fastener +fatigues +faucet +feedback circuit +fence +fencing sword +fender +ferry +fetoscope +field-sequential color television +fife +fifth wheel +fighter +figure eight +file +file server +filling +film +film +filter +filter +finery +finisher +fipple flute +fire +firearm +fire iron +fireplace +firkin +fisherman's bend +fisherman's knot +fisherman's lure +fishing boat +fishing rod +fishnet +flag +flageolet +flambeau +flannelette +flap +flashlight +flask +flatcar +flat tip screwdriver +fleet ballistic missile submarine +flight simulator +flip-flop +floating dock +floor +floor +floor cover +fly +flywheel +fob +foghorn +folder +food hamper +footbath +footbridge +foothold +foot rule +footwear +footwear +forceps +fore-and-aft sail +foremast +fore plane +fore-topmast +fork +formalwear +fortification +fortress +foundation garment +foundry +fragmentation bomb +framework +free-reed instrument +freight train +French door +friary +friction clutch +frigate +frill +frock coat +front projector +fruit machine +full-dress uniform +full metal jacket +funny wagon +fur hat +furnace +furnishing +furniture +fuse +gable +gable roof +gaff +galleon +gallery +galley +galley +gallows +galvanometer +gambling house +game +game equipment +gamp +garage +Garand rifle +garden +garden spade +garden tool +garment +gas burner +gas-discharge tube +gasket +gasoline engine +gate +gatehouse +gatepost +gathered skirt +gauge +gauze +gauze +gavel +gear +gear +gear +gearing +general-purpose bomb +generator +generator +Geneva gown +geodesic dome +girder +glass +glider +glove +glyptic art +goal +golf club +golf equipment +Gordian knot +Gothic arch +government building +government office +gown +gramophone +granary +granny knot +grapnel +grapnel +grate +graver +greasy spoon +greatcoat +great hall +greengrocery +grenade +grillroom +groined vault +Guarnerius +guidance system +guided missile +guildhall +guitar +guitar pick +gun +gun carriage +gunlock +gunsight +gun trigger +gurney +gymnastic apparatus +gym shoe +gypsy cab +habergeon +habit +hairdressing +hairpiece +hairpin +half hatchet +half hitch +hall +hall +hammer +hand +handbell +handbow +handcart +hand glass +handloom +hand lotion +hand mower +handsaw +hand shovel +hand tool +handwear +handwheel +hanger +hank +harpsichord +harrow +hash house +hat +hatch +hauberk +hawser bend +hazard +head +head +head covering +headdress +header +headgear +headlight +headsail +headscarf +health spa +heat engine +heater +heat lamp +heat-seeking missile +heavier-than-air craft +heckelphone +hedge +helicopter +helm +helmet +helmet +heraldry +high altar +high-angle gun +high gear +high table +hinge +hip boot +hitch +hoe +hogshead +hoist +holder +holding device +home appliance +homespun +hood +hood +hood +hook +Hoover +hope chest +horn +horn button +horse +horsecar +horse-drawn vehicle +horsehair wig +hosiery +hospital +hospital room +hostel +hot-air balloon +hotel +hotel room +hot tub +house +house +housing +hovel +huarache +humeral veil +hut +hutch +hydraulic brake +hydraulic system +hydroelectric turbine +hydrofoil +hydrometer +hygrometer +hypermarket +hypodermic syringe +ice machine +ice rink +ice skate +icetray +ignition switch +impact printer +implant +implement +imprint +improvised explosive device +inclined plane +indicator +induction coil +ink-jet printer +inkstand +institution +instrument +instrument of punishment +instrument of torture +interceptor +interchange +intercommunication system +intercontinental ballistic missile +interface +interior door +internal-combustion engine +ionization chamber +video iPod +iron +jack +jack +jacket +jacket +jack plane +jail +jamb +jar +jeroboam +jet +jet engine +jewelled headdress +jib +jibboom +jiggermast +joint +jointer +joist +jolly boat +jug +jumper +jumper cable +junction +junction +jury mast +kayak +keel +keg +kerchief +kettle +key +key +keyboard +keyboard instrument +khakis +kiln +kinescope +kingbolt +kirk +kit +kit +kitbag +kitchen +kitchen appliance +kitchen utensil +kite balloon +knee-high +knife +knife +knit +knob +lace +lacquer +ladder truck +lag screw +lamasery +laminate +lamination +lamp +lamp +landing gear +land mine +lantern +lapel +lathe +lattice +launcher +lead-acid battery +leather strip +Leclanche cell +leg +legging +lens +lens implant +level +lever +Levi's +lid +life buoy +life jacket +life preserver +lifting device +ligament +light +light-emitting diode +lighter-than-air craft +lighting +light microscope +linear accelerator +line printer +lingerie +lining +liquid crystal display +lister +living quarters +living room +local area network +lock +locomotive +lodge +lodging house +loft +loft +longbow +lookout +loom +loop knot +lota +lounge +loungewear +love knot +lunchroom +luxury liner +lyre +machine +machine +machine bolt +machine gun +machinery +machine screw +machine tool +magic lantern +magnet +magnetic disk +magnetic recorder +magnetic tape +magnifier +magnum +magnus hitch +mailer +mainframe +mainmast +main-topmast +main yard +makeup +mallet +mallet +mallet +mandolin +manger +man-of-war +manometer +MANPAD +mansard +mansion +marina +marker +marketplace +maser +mask +masonry +mass spectrometer +mast +mast +mat +mat +match +match +match plane +material +materiel +Matthew Walker +maul +measure +measuring instrument +measuring stick +mechanical device +mechanical system +mechanism +medical building +medical instrument +memorial +memory +memory chip +memory device +menhir +man's clothing +mercantile establishment +mercury barometer +mercury thermometer +mercury-vapor lamp +mess +metal screw +meteorological balloon +meter +meterstick +microbalance +microfilm +microscope +military hospital +military quarters +military vehicle +mill +milldam +millinery +mine +minibike +mink +minster +Minuteman +mirror +mixer +mizzenmast +module +mold +moldboard plow +monitor +monitor +morgue +mortise joint +motion-picture camera +motion-picture film +motor +motorboat +motorcycle +motor hotel +motor vehicle +mound +mount +mouse button +movie projector +moving-coil galvanometer +mug +multiplex +multiplexer +musette pipe +mushroom anchor +musical instrument +musket +musket ball +muslin +muzzle loader +narrowbody aircraft +nautilus +navigational system +naval equipment +naval gun +naval radar +naval weaponry +navigational instrument +nebuchadnezzar +neckline +neckpiece +necktie +neckwear +needle +needlework +negligee +net +net +net +net +network +network +night bell +nightwear +noisemaker +nonsmoker +non-volatile storage +nose flute +nuclear reactor +nuclear weapon +nursery +oar +oblique bandage +oboe da caccia +oboe d'amore +obstacle +office +office furniture +oil lamp +oil paint +oil tanker +olive drab +omnidirectional antenna +onion dome +open-air market +open-end wrench +opener +openside plane +ophthalmoscope +optical device +optical disk +optical instrument +optical telescope +organ pipe +outbuilding +outerwear +outfit +outrigger canoe +outside mirror +oven +overgarment +overhand knot +overhang +overhead projector +overnighter +overshoe +oxford +package +packaging +packing box +paddle +paddle steamer +page printer +paint +pallium +pan +pan +panic button +panopticon +panopticon +pantechnicon +pantry +pants suit +panzer +paper chain +paper fastener +parabolic reflector +parapet +parasail +parka +parsonage +particle detector +partition +passenger ship +passenger train +passenger van +passive matrix display +passkey +patch +patchouli +patchwork +patina +patisserie +pavis +peavey +pedal +pedestal table +pedestrian crossing +pedicab +peg +pen +penal institution +pencil +pendulum +pendulum clock +percolator +percussion instrument +perfumery +peripheral +periwig +personal computer +petticoat +Phillips screw +Phillips screwdriver +phonograph record +photographic equipment +photographic paper +photometer +physical pendulum +piano +piccolo +pick +pick +pickle barrel +piece of cloth +pile +pillow lace +pilothouse +pin +pincer +pinstripe +pipe +pipet +pipe wrench +pistol +pivot +place of business +place of worship +planetarium +planner +plant +planter +plasterboard +plastic laminate +plastic wrap +plastron +plate +platform +platform +platform rocker +plating +pleat +plethysmograph +plexor +pliers +plug +plug +pneumatic drill +pocket +pocket-handkerchief +pocketknife +pointed arch +polyester +polygraph +pomade +pontifical +pool ball +poorhouse +porcelain +porch +portable computer +portico +post +posthole digger +pot +potential divider +potpourri +pottery +pouch +poultice +powder +powder keg +power brake +power mower +power saw +power shovel +power tool +press +press +pressure dome +pressure gauge +pressure suit +printed circuit +printer +prison camp +prod +prolonge knot +prompter +prong +propeller +propeller plane +prosthesis +protective covering +protective garment +pruning saw +pruning shears +public house +public toilet +public transport +pull +pull chain +pulley +Pullman +pullover +pulse counter +pump +pump +pump house +punch +punch press +purifier +push broom +push button +pusher +puzzle +pyrometer +pyx +QWERTY keyboard +racing boat +rack +rack +radar +radiogram +radio interferometer +radio link +radiometer +radio receiver +radiotelegraph +radiotelephone +radio transmitter +raft +rail +rail fence +railing +raincoat +rake +ramp +rampart +random-access memory +rayon +razor +reaction-propulsion engine +reactor +reading lamp +reading room +read-only memory +rearview mirror +receiver +receptacle +reception room +recess +reconnaissance plane +recorder +recording +record player +recreation room +recycling bin +reed stop +reef knot +refectory table +refinery +reflecting telescope +reflector +reformatory +refracting telescope +refrigerator car +refuge +regalia +regimentals +regulator +rein +religious residence +removable disk +repair shop +repeating firearm +reproducer +rescue equipment +reservoir +reset button +residence +resistor +resonator +respirator +restraint +retort +rheostat +rib +ribbed vault +riddle +ride +riding boot +riding mower +rifle ball +rig +rink +river boat +road +roadway +robe +rocket +rocket +rod +roller +roller +in-line skate +roller blind +roller coaster +rolling hitch +Rolodex +Roman building +roof +roof +room +roost +rope +rose water +rotary engine +rotating mechanism +rotating shaft +rotisserie +rotor +round arch +router plane +row house +royal mast +rubber bullet +rug +rushlight +sable +sable coat +sack +sackbut +sacking +saddle +safe +safety belt +safety curtain +safety fuse +safety match +sail +sailboat +sailing vessel +salver +sandglass +sash +satellite +satellite television +saucepan +savings bank +saw +sawhorse +scale +scarf +school +scientific instrument +scissors +scoop +scratcher +screen +screen +screen +screw eye +scrub plane +scuffer +sculpture +sea boat +sea chest +seam +seaplane +seat +seat +second hand +secretary +security system +seeker +selector +self-propelled vehicle +semiautomatic firearm +semiautomatic pistol +semiconductor device +serger +serpent +serving cart +serving dish +set +setscrew +setscrew +sewing needle +sextant +shackle +shade +shaft +shag rug +shaker +shaper +shaping tool +sharpener +shaving cream +shaving foam +shawl +shawm +shears +sheath +shed +sheepshank +sheet bend +shelf +shell +shell +shell +shellac +shelter +shelter +shelter +shield +ship +shipboard system +shirt +shirtfront +shock absorber +shoe +shooting brake +shop +short pants +shotgun +shoulder holster +shrine +shutter +shuttle +sidewinder +sieve +sifter +sights +signaling device +signboard +silk +simulator +single bed +single-breasted suit +single-reed instrument +sitz bath +six-pack +skate +skein +skeleton +skewer +skidder +skid lid +skiff +ski pole +skirt +ski tow +skullcap +slack suit +slat +sled +sleeper +sleeping car +sleeve +sleeve +slide projector +slipknot +slipper +sloop +slop pail +slot machine +small boat +smart bomb +smoker +smooth plane +snack bar +snap-brim hat +snare drum +sniper rifle +Sno-cat +soapbox +socle +sofa +sonograph +sorter +sound recording +soup ladle +source of illumination +soutane +spacecraft +spade +spar +spatula +spear +spear +spectacles +spectrograph +spectroscope +speedometer +spider +spike +spike +spinet +spinning machine +spiral ratchet screwdriver +spiral spring +spit +spokeshave +sponge mop +spoon +sports equipment +sports implement +sportswear +spot +spring +spring balance +springboard +sprit +square +square knot +squash racket +squawk box +squeezer +squinch +stabilizer +stabilizer +stable gear +stadium +stall +stamp mill +stand +standard cell +staple +starter +state prison +station +statue +stay +steakhouse +stealth aircraft +stealth bomber +stealth fighter +steam bath +steamboat +steamer +steam iron +steam whistle +steel mill +steelyard +steeple +steering system +step +step-up transformer +stereo +stick +stick +still +stilt +Stinger +stock +stockcar +stock car +stocking +stonework +stool +stopper knot +storage battery +storage space +storeroom +stove +stove bolt +Stradavarius +straight chair +strap +strap +stringed instrument +strip +strongbox +stronghold +strongroom +structural member +structure +stylus +submachine gun +submersible +submersible +subwoofer +suction pump +suede cloth +sunbonnet +sunhat +supermarket +superstructure +supply chamber +support +support +support column +supporting structure +supporting tower +surface lift +surface-to-air missile +surgeon's knot +surgical instrument +surgical knife +surplice +surveillance system +surveying instrument +surveyor's level +swamp buggy +sweater +swimsuit +sword +synchrotron +system +tabi +table +table +table knife +tableware +tabor +tachometer +tack +tack hammer +talaria +tambour +tambourine +tampon +tank +tank car +tannoy +tape +tape deck +tape recorder +target +tavern +tea chest +teaching aid +tea gown +teashop +teaspoon +tea-strainer +tea tray +telecommunication system +telephone +telephone line +telephone receiver +telephone system +telephone wire +telescope +television antenna +television camera +television equipment +television monitor +temple +temple +tender +tennis racket +tenor drum +tenoroon +tenpenny nail +tent +tenterhook +terminal +terminal +test rocket +tetraskelion +textile machine +textile mill +theater +theodolite +thermometer +thermostat +three-piece suit +three-way switch +thumbscrew +thumbtack +tights +tile +timber +timber hitch +timbrel +time-fuse +timepiece +timer +time-switch +tire chain +tithe barn +toecap +toga +toggle switch +toilet +toilet powder +toiletry +toilet water +token +tomograph +toner +tongs +tool +toolbox +tooth +toothbrush +top +top +topgallant +topmast +topsail +torpedo +torpedo boat +touch screen +towel +toweling +tower +toy box +track +tracked vehicle +trailer +trailer +train +trammel +transdermal patch +transformer +transistor +transmission +transmitter +transporter +trap +trapeze +travel iron +treasure chest +trellis +trench +trial balloon +triclinium +troop carrier +trough +trouser +trowel +truck +trunk +try square +tube +tuck shop +tun +tunic +turbine +Turkish towel +Turk's head +turner +turntable +turtleneck +tweed +tweeter +twenty-two +two-piece +typesetting machine +typewriter +ultraviolet lamp +undercarriage +undergarment +underpants +underwear +uneven parallel bars +uniform +university +uplift +urn +urn +utensil +vacuum flask +valve +van +van +varnish +vehicle +veranda +vertical file +vessel +vessel +vest +vibrator +vibrator +videocassette +video recording +vigil light +viol +vise +vivarium +voltaic cell +voltmeter +wagon +waist pack +walking stick +wall +wall +wall unit +ward +warehouse +warship +wash +washer +washtub +watch +watchtower +water-base paint +water butt +water cart +watercolor +water-cooled reactor +water gauge +water ski +waterwheel +weapon +weaponry +weatherglass +weathervane +web +wedge +wedge +weighbridge +weight +weir +weld +well +whaler +wheel +wheelchair +wheeled vehicle +wheelwork +whetstone +whip +whisk +whispering gallery +white goods +whorehouse +wicker basket +widebody aircraft +winch +Winchester +wind instrument +window +window +window blind +window envelope +Windsor knot +wine bucket +wine cask +wineglass +wire +wire +wire matrix printer +wiring +woman's clothing +wood +woodenware +woodscrew +woodwind +woofer +workbasket +workbench +work-clothing +worktable +workwear +wrapping +wrench +writing desk +writing implement +X-ray film +X-ray machine +yacht chair +yard +yard +yardstick +yoke +zither +zoot suit +grain +light +colorlessness +chromatic color +black +gray +dark red +orange +yellow +green +blue +purple +reddish purple +pink +light brown +reddish brown +complexion +skin +epidermal cell +columnar cell +macule +specimen +milk +embryonic cell +leukocyte +neutrophil +astrocyte +exoskeleton +medium +film +press +print media +storage medium +journalism +photojournalism +newspaper +telecommunication +telephone +call +long distance +wireless +broadcasting +television +reception +chat room +portal site +wordbook +album +concept album +magazine +movie +sign +comestible +course +dainty +dish +fare +diet +dietary supplement +liquid diet +reducing diet +vegetarianism +ration +field ration +foodstuff +starches +concentrate +meal +roughage +flour +wheat flour +nutriment +commissariat +canned food +canned meat +meal +breakfast +lunch +dinner +supper +buffet +picnic +cookout +bite +entree +side dish +casserole +chicken casserole +appetizer +cocktail +hors d'oeuvre +relish +dip +soup +madrilene +broth +broth +chowder +clam chowder +stew +goulash +fish stew +fricassee +ragout +ready-mix +powdered sugar +granulated sugar +brown sugar +sweet +confiture +candy +hard candy +patty +brittle +chewing gum +candied fruit +candied citrus peel +fudge +gumdrop +mint +kiss +lozenge +taffy +dessert +dumpling +frozen dessert +mousse +mousse +whip +pudding +pudding +tipsy cake +ice +chocolate ice cream +Neapolitan ice cream +peach ice cream +strawberry ice cream +tutti-frutti +vanilla ice cream +split +pudding +custard +pastry +turnover +puff paste +phyllo +fish cake +conserve +jam +jelly +apple jelly +marmalade +gelatin +gelatin dessert +patty +stuffing +bread +breadstick +bun +cracker +dark bread +flatbread +loaf of bread +quick bread +rye bread +toast +white bread +French bread +cornbread +johnnycake +muffin +scone +onion roll +sweet roll +onion bagel +biscuit +baking-powder biscuit +soft pretzel +sandwich +hamburger +gruel +edible fruit +vegetable +crudites +legume +greens +solanaceous vegetable +root vegetable +potato +baked potato +sweet potato +snack food +corn chip +tortilla chip +cruciferous vegetable +cabbage +kale +red cabbage +savoy cabbage +squash +summer squash +yellow squash +winter squash +turban squash +gherkin +sprout +beet +pepper +sweet pepper +hot pepper +chili +jalapeno +onion +Spanish onion +salad green +lettuce +butterhead lettuce +bean +pea +green pea +common bean +fresh bean +green bean +shell bean +lima bean +soy +celery +chicory +coffee substitute +chicory escarole +corn +hominy +cress +tomato +cherry tomato +salsify +turnip +edible nut +apple +eating apple +Delicious +McIntosh +Pippin +cooking apple +berry +currant +citrus +temple orange +mandarin +bitter orange +sweet orange +Jaffa orange +navel orange +Valencia orange +lime +almond +plum +dried fruit +raisin +passion fruit +cocoa +melon +muskmelon +winter melon +cherry +sweet cherry +heart cherry +sour cherry +grape +fox grape +muscadine +slipskin grape +vinifera grape +Tokay +cherimoya +soursop +sweetsop +ilama +pond apple +olive +pear +edible seed +walnut +feed +fodder +oil cake +timothy +grain +barley +wheat +rice +mash +bird feed +petfood +salad +tossed salad +combination salad +pasta salad +fruit salad +ingredient +flavorer +condiment +herb +spice +cinnamon +pepper +garlic +mustard +sage +savory +curry +paprika +pickle +sweet pickle +vinegar +sauce +hot sauce +dressing +mayonnaise +cheese sauce +hot-fudge sauce +white sauce +spaghetti sauce +boiled egg +hard-boiled egg +Easter egg +omelet +firm omelet +souffle +dairy product +milk +milk +powdered milk +cream +butter +clarified butter +yogurt +curd +cheese +cream cheese +bleu +cheddar +Swiss cheese +spread +pate +sweetening +sugar +syrup +batter +bread dough +chicken and rice +pasta +Tetrazzini +chili dog +fondue +fondue +hash +kabob +seafood Newburg +meatball +pilaf +sausage pizza +pepperoni pizza +cheese pizza +anchovy pizza +Sicilian pizza +porridge +fish loaf +salmon loaf +scallopine +taco +beef burrito +quesadilla +tostada +beverage +concoction +mix +filling +potion +elixir +alcohol +brew +beer +lager +Weissbier +malt +ale +stout +mead +wine +white wine +sparkling wine +Burgundy +Beaujolais +Medoc +Pinot noir +Bordeaux +claret +Chianti +Cabernet +Merlot +dessert wine +Rhine wine +Rioja +Saint Emilion +zinfandel +table wine +vermouth +fortified wine +Madeira +liquor +brandy +gin +rum +whiskey +corn whiskey +Irish +Scotch +liqueur +coffee liqueur +orange liqueur +mixed drink +cocktail +highball +Bloody Mary +daiquiri +manhattan +martini +sling +sour +caffe latte +cider +sweet cider +juice +fruit juice +grape juice +orange juice +fruit drink +mulled wine +soft drink +cola +coffee +punch +champagne cup +claret cup +rickey +tea +tea +herb tea +tisane +black tea +green tea +water +drinking water +mineral water +vitamin pill +collection +suburb +residence +littoral +grassland +pasture +resort +field +air bubble +arroyo +ascent +atoll +bank +bank +bar +barrier reef +basin +beach +burrow +canyon +cave +continental glacier +crag +crater +dale +descent +draw +dune +geological formation +glacier +glen +gorge +gulch +gully +highland +hill +hillside +hole +hollow +iceberg +ice mass +ion +knoll +landfall +landfill +lather +ledge +lowland +meteorite +mountain +mull +natural depression +natural elevation +nullah +ocean floor +outcrop +plain +point +precipice +ravine +reef +ridge +ridge +rift valley +rock +sandbank +seaside +shiner +shore +slope +soapsuds +spume +tableland +tideland +volcanic crater +wadi +spiritual leader +adventurer +anomaly +benefactor +commoner +contestant +discussant +entertainer +female +finisher +inhabitant +native +juvenile +lover +male +mediator +national +peer +recipient +sensualist +traveler +unwelcome person +unskilled person +worker +wrongdoer +Black +White +Semite +white man +Mongol +Nahuatl +Caddo +Penutian +Teton +Taracahitian +Slav +Catholic +Altaic +Bornean +Canadian +Central American +Britisher +English person +Englishwoman +Ethiopian +Parisian +Greek +Italian +Japanese +Mexican +Nigerian +North American +Pakistani +South American Indian +Filipino +Polynesian +Scandinavian +South African +South American +Turki +American +New Yorker +abbess +abstainer +academic administrator +accomplice +acquaintance +acquirer +aerialist +actor +actor +addict +adjutant +admirer +adulterer +advertiser +advocate +analyst +ancestor +announcer +announcer +appointee +appreciator +appropriator +archbishop +architect +army engineer +army officer +arrival +articulator +asserter +assistant +associate +astronaut +athlete +attendant +aunt +authoritarian +authority +aviator +back +bad person +ballet dancer +bullfighter +baron +bartender +baseball coach +base runner +basketball player +believer +betrothed +bigot +big shot +biochemist +bisexual +boatman +bond servant +botanist +Boy Scout +buddy +campaigner +captain +card player +careerist +caretaker +cavalryman +celebrity +charmer +child +child +cipher +citizen +civil rights leader +cleaner +clergyman +cleric +clerk +climber +closer +clown +coach +cobbler +collaborator +college student +collegian +commanding officer +commissioned officer +commissioned military officer +commissioner +committee member +communist +compulsive +computer scientist +computer user +contractor +convict +copycat +counselor +craftsman +creditor +critic +curate +dancer +dancer +darling +date +daughter +dawdler +deacon +deaf person +debtor +deliveryman +descender +designated hitter +detective +detractor +director +disbeliever +dispatcher +distributor +doctor +domestic partner +draftsman +drinker +drinker +drug addict +drug user +drummer +drunkard +eager beaver +earner +eavesdropper +economist +editor +egotist +elder +elected official +emissary +employee +employer +endomorph +enemy +entrant +examiner +exhibitionist +fan +fancier +farmer +farmhand +fascist +father +female aristocrat +female offspring +female child +fielder +fireman +first baseman +first sergeant +flag officer +flatterer +foe +folk dancer +follower +football player +forefather +forger +founder +free agent +friar +monk +gambler +generator +geneticist +genitor +geologist +girl +godchild +godparent +golfer +grandma +grandmaster +grandparent +granter +great grandchild +great grandparent +grouch +guard +guest +guide +gymnast +Gypsy +hack +hairdresser +hater +headmaster +hearer +hedonist +heir +herder +homeless +horseman +host +host +hypocrite +important person +incumbent +infielder +informer +in-law +insurgent +investigator +investor +journalist +judge +juror +Counsel to the Crown +kinswoman +laborer +lama +landowner +lawgiver +lawman +lawyer +liberator +lieutenant +lineman +literate +litigant +Lord +failure +lowerclassman +lumberman +maid +maker +malcontent +martinet +master of ceremonies +masturbator +medical officer +medical practitioner +medical scientist +mender +meteorologist +middle-aged man +miler +military attache +military officer +military policeman +minister +minor leaguer +misfit +mixed-blood +model +moneymaker +mother +mourner +mover +musician +Muslimah +mystic +nanny +neonate +nephew +neutral +newcomer +newcomer +newspaper editor +niece +noncommissioned officer +nurse +observer +occultist +oldster +old woman +opportunist +orator +originator +outfielder +right fielder +right-handed pitcher +painter +panelist +pardoner +parodist +party +passenger +patient +patron +payer +peddler +percussionist +personal representative +personification +pervert +petitioner +Pharaoh +phonetician +physical therapist +physicist +pimp +pisser +pitcher +planner +player +poet +politician +practitioner +prayer +preserver +president +priest +princess +principal +proctor +programmer +promiser +propagandist +prosecutor +psychic +pusher +queen +queen +ranch hand +reader +recruit +recruiter +religious leader +repairman +reporter +representative +reprobate +rescuer +reservist +restrainer +retailer +retiree +revolutionist +rich person +civil authority +runner +running back +rustic +saboteur +sailor +salesman +salesperson +scalper +schemer +scholar +schoolchild +scientist +second baseman +secretary +seeker +selfish person +seller +serf +serviceman +settler +shrew +sibling +sick person +singer +sister +skeptic +skier +sleeper +slob +smith +snoop +social climber +socialist +social scientist +sociologist +soldier +son +songster +sorcerer +sovereign +speaker +specialist +spectator +stand-in +star +stepparent +stock trader +stranger +strategist +student +subordinate +suitor +superior +surgeon +sweetheart +sympathizer +tax assessor +taxonomist +teacher +television reporter +tenant +tenant +tennis player +testator +testee +theologian +therapist +thinker +thrower +toastmaster +trader +traffic cop +trainer +traitor +traveling salesman +tyrant +upstart +upstart +utility man +vacationer +vegetarian +vice president +victim +volunteer +votary +waiter +waitress +wanderer +wanton +washer +white supremacist +wife +winner +winner +woman +workman +worshiper +wright +writer +wilding +bryophyte +liverwort +pteridophyte +fern +fern ally +spore +spermatophyte +perennial +gymnosperm +ephedra +cycad +sago palm +zamia +pine +pinon +nut pine +white pine +yellow pine +larch +fir +silver fir +cedar +spruce +hemlock +douglas fir +cedar +cypress +arborvitae +araucaria +kauri pine +celery pine +yellowwood +gymnospermous yellowwood +yew +angiosperm +dicot +flower +wildflower +inflorescence +pistil +pericarp +oilseed +custard apple +barberry +allspice +laurel +anise tree +magnolia +moonseed +buttercup +aconite +baneberry +anemone +thimbleweed +columbine +clematis +delphinium +nigella +wax myrtle +zebrawood +legume +legume +darling pea +clover +acacia +wattle +albizzia +nitta tree +dogbane +allamanda +carissa +frangipani +rauwolfia +arum +alocasia +anthurium +caladium +monstera +nephthytis +arrow arum +calla lily +duckweed +watermeal +birthwort +sandwort +mouse-ear chickweed +pink +china pink +lychnis +silene +chickweed +fig marigold +amaranth +orach +saltbush +beet +sand verbena +four o'clock +echinocactus +prickly pear +pokeweed +portulaca +flame flower +caper +spiderflower +crucifer +cress +watercress +rock cress +cabbage +head cabbage +turnip plant +mustard +wallflower +woad +stock +radish plant +pennycress +poppy +prickly poppy +composite +compass plant +everlasting +achillea +ageratum +ragweed +ammobium +burdock +artemisia +mugwort +aster +wood aster +common daisy +bur marigold +calendula +thistle +carline thistle +catananche +centaury +knapweed +chrysanthemum +golden aster +goldenbush +plume thistle +woolly thistle +coreopsis +fleabane +woolly sunflower +cotton rose +gazania +African daisy +cudweed +gumweed +goldenbush +sneezeweed +sunflower +hawkweed +marsh elder +krigia +hawkbit +blazing star +rattlesnake root +daisybush +coneflower +coneflower +cutleaved coneflower +golden thistle +white-topped aster +goldenrod +sow thistle +marigold +dandelion +crownbeard +zinnia +achene +campanula +orchid +orchis +arethusa +helleborine +coral root +lady's slipper +large yellow lady's slipper +helleborine +fringed orchis +rein orchid +spider orchid +moth orchid +butterfly orchid +ladies' tresses +vanda +vanilla +yam +primrose +pimpernel +featherfoil +loosestrife +water pimpernel +gramineous plant +grass +wheatgrass +foxtail +broom grass +oat +brome +grama +reed grass +burgrass +crabgrass +lyme grass +wild rye +plume grass +rye grass +ricegrass +meadowgrass +millet +reed +sorghum +grain sorghum +cordgrass +cereal +wheat +corn +mealie +zoysia +bamboo +cotton grass +spike rush +pandanus +cattail +grain +kernel +gourd +gourd +squash +summer squash +marrow +winter squash +turban squash +bryony +sweet melon +luffa +lobelia +mallow +hollyhock +althea +poppy mallow +seashore mallow +globe mallow +tulipwood tree +sterculia +bottle-tree +screw tree +cacao +linden +herb +protea +banksia +grevillea +macadamia +casuarina +beefwood +heath +bearberry +huckleberry +kalmia +rhododendron +cranberry +blueberry +shortia +Australian heath +epacris +wintergreen +pipsissewa +beech +chestnut +tanbark oak +southern beech +New Zealand beech +oak +live oak +white oak +red oak +scrub oak +chestnut oak +birch +alder +hornbeam +hop hornbeam +hazelnut +centaury +gentian +fringed gentian +olive tree +fringe tree +ash +red ash +jasmine +privet +lilac +liquidambar +walnut +hickory +wing nut +loosestrife +myrtle +gum tree +eucalyptus +flooded gum +mallee +stringybark +tupelo +enchanter's nightshade +willowherb +fuchsia +evening primrose +daphne +canna +banana +ginger +begonia +tuberous begonia +poon +St John's wort +rockrose +dipterocarp +candlewood +reseda +viola +violet +nettle +cannabis +mulberry +fig tree +fig +elm +hackberry +iridaceous plant +bearded iris +beardless iris +crocus +amaryllis +blood lily +narcissus +daffodil +liliaceous plant +colicroot +alliaceous plant +kniphofia +poker plant +asphodel +mariposa +globe lily +camas +dogtooth violet +fritillary +tulip +star-of-Bethlehem +grape hyacinth +scilla +false asphodel +bog asphodel +hellebore +death camas +sarsaparilla +Solomon's-seal +bellwort +agave +sansevieria +cassia +locust tree +senna +angelim +milk vetch +wild indigo +pea tree +glory pea +rosewood +blackwood +tick trefoil +coral tree +vetchling +wild pea +lupine +medic +mucuna +locoweed +pole bean +pea +edible-pod pea +quira +hoary pea +bush pea +vetch +palm +sago palm +feather palm +fan palm +palmetto +areca +calamus +oil palm +raffia palm +lady palm +eriogonum +rhubarb +water plantain +waterweed +pondweed +rose +agrimonia +flowering quince +cotoneaster +avens +apple tree +wild apple +crab apple +Iowa crab +cinquefoil +plum +wild plum +bullace +apricot +cherry +wild cherry +sweet cherry +sour cherry +almond tree +almond +bird cherry +flowering cherry +chokecherry +fruit tree +bramble bush +raspberry +mountain ash +service tree +spirea +madderwort +coffee +cinchona +bedstraw +genipa +hamelia +honeysuckle +American fly honeysuckle +teasel +scabious +geranium +cranesbill +storksbill +incense tree +mahogany +silver ash +milkwort +citrus +orange +mandarin +lemon +kumquat +prickly ash +bitterwood tree +ailanthus +nasturtium +willow +osier +sallow +poplar +black poplar +cottonwood +aspen +soapberry +soapberry vine +harpullia +pachysandra +spindle tree +maple +box elder +holly +sumac +horse chestnut +persimmon +buckthorn +styrax +carnivorous plant +pitcher plant +sedum +philadelphus +saxifrage +astilbe +alumroot +miterwort +parnassia +currant +plane tree +phlox +acanthus +catalpa +anchusa +comfrey +convolvulus +bindweed +gloxinia +streptocarpus +waterleaf +nemophila +scorpionweed +giant hyssop +bugle +wood mint +calamint +coleus +dead nettle +origanum +horehound +monarda +savory +germander +thyme +blue curls +snapdragon +kitten-tails +Indian paintbrush +foxglove +toadflax +veronica +nightshade +thorn apple +matrimony vine +cupflower +petunia +salpiglossis +spurge +croton +cassava +slipper spurge +camellia +umbellifer +angelica +astrantia +caraway +fennel +parsnip +parsley +sanicle +dogwood +valerian +bristle fern +flowering fern +climbing fern +clover fern +adder's tongue +grape fern +ergot +sclerotinia +earthball +Podaxaceae +false truffle +rhizopus +slime mold +cellular slime mold +downy mildew +pythium +Sarcosomataceae +club fungus +lichen +lecanora +fungus +basidiomycete +mushroom +mushroom +mushroom +toadstool +horse mushroom +meadow mushroom +royal agaric +false deathcap +fly agaric +death cap +blushing mushroom +destroying angel +chanterelle +floccose chanterelle +pig's ears +cinnabar chanterelle +jack-o-lantern fungus +inky cap +shaggymane +milkcap +fairy-ring mushroom +oyster mushroom +olive-tree agaric +Pholiota astragalina +Pholiota aurea +Pholiota destruens +Pholiota flammans +Pholiota flavida +nameko +Pholiota squarrosa-adiposa +Pholiota squarrosa +Pholiota squarrosoides +Stropharia ambigua +Stropharia hornemannii +Stropharia rugoso-annulata +Entoloma lividum +Entoloma aprile +Chlorophyllum molybdites +lepiota +parasol mushroom +poisonous parasol +Lepiota naucina +Lepiota rhacodes +American parasol +Lepiota rubrotincta +Lepiota clypeolaria +onion stem +blewits +sandy mushroom +Tricholoma pessundatum +Tricholoma sejunctum +man-on-a-horse +Tricholoma venenata +Tricholoma pardinum +Tricholoma vaccinum +Tricholoma aurantium +Pluteus aurantiorugosus +Pluteus magnus +deer mushroom +straw mushroom +Volvariella bombycina +Clitocybe clavipes +Clitocybe dealbata +Clitocybe inornata +Clitocybe robusta +Clitocybe irina +Clitocybe subconnexa +winter mushroom +mycelium +ascomycete +Clavicipitaceae +yeast +discomycete +morel +Verpa +false morel +lorchel +helvella +Gyromitra californica +Gyromitra sphaerospora +Gyromitra esculenta +Gyromitra infula +Gyromitra gigas +gasteromycete +common stinkhorn +Phallus ravenelii +dog stinkhorn +stinky squid +puffball +Geastrum coronatum +Astreus pteridis +Astreus hygrometricus +polypore +Boletus chrysenteron +Boletus edulis +Frost's bolete +Boletus luridus +Boletus mirabilis +Boletus pallidus +Boletus pulcherrimus +Boletus pulverulentus +Boletus roxanae +Boletus subvelutipes +Boletus variipes +Boletus zelleri +Fuscoboletinus paluster +Fuscoboletinus serotinus +Leccinum fibrillosum +Suillus albivelatus +old-man-of-the-woods +Boletellus russellii +jelly fungus +rust +smut +cornsmut +flag smut fungus +waxycap +Hygrocybe acutoconica +Hygrophorus borealis +Hygrophorus caeruleus +Hygrophorus inocybiformis +Hygrophorus kauffmanii +Hygrophorus marzuolus +Hygrophorus purpurascens +Hygrophorus russula +Hygrophorus sordidus +Hygrophorus tennesseensis +Hygrophorus turundus +Neohygrophorus angelesianus +Cortinarius armillatus +Cortinarius atkinsonianus +Cortinarius corrugatus +Cortinarius gentilis +Cortinarius mutabilis +Cortinarius semisanguineus +Cortinarius subfoetidus +Cortinarius violaceus +Gymnopilus spectabilis +Gymnopilus validipes +Gymnopilus ventricosus +mold +mildew +candida +houseplant +succulent +weed +sporophyll +sporangium +poisonous plant +vine +tree +bean tree +gymnospermous tree +conifer +angiospermous tree +nut tree +spice tree +bonsai +subshrub +bramble +liana +desert plant +marsh plant +strangler +root +receptacle +scape +peduncle +flower cluster +raceme +cyme +bulbous plant +fruit +seed +bean +nut +berry +aggregate fruit +drupe +drupelet +pome +pod +husk +buckthorn +vinifera +true pepper +peperomia +bract +palmate leaf +pinnate leaf +dentate leaf +branchlet +polypody +strap fern +staghorn fern +spleenwort +chain fern +davallia +hare's-foot fern +shield fern +wood fern +lady fern +bladder fern +holly fern +woodsia +maidenhair +brittle maidenhair +lip fern +cliff brake +horsetail +club moss +spikemoss +beech fern +shoestring fungus +Armillaria caligata +Armillaria ponderosa +Armillaria zelleri +honey mushroom +milkweed +stapelia +stephanotis +orangery +figure +plane figure +solid figure +line +convex shape +concave shape +cylinder +round shape +polygon +concave polygon +amorphous shape +closed curve +simple closed curve +cone +circle +ring +loop +ellipse +triangle +spherical polygon +angular distance +groove +bulge +bow +balance +toroid +boundary +incisure +notch +wrinkle +tree +regular polyhedron +carbon +rock +soil +high explosive +culture medium +agar +paper +paving +plaster +stucco +tear gas +vitamin +fat-soluble vitamin +water-soluble vitamin +vitamin A +B-complex vitamin +vitamin E +vitamin K \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/imagenet1k.data b/workloads/realworld/uvm_prefetch/darknet/cfg/imagenet1k.data new file mode 100644 index 0000000000000000000000000000000000000000..daf120a3c020003e8ed08096c51304272ca3ba27 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/imagenet1k.data @@ -0,0 +1,9 @@ +classes=1000 +train = /data/darknet/imagenet_mini/valid.list +valid = /data/darknet/imagenet_mini/valid.list +test = /data/darknet/imagenet_mini/valid.list +backup = /data/darknet/backup/ +labels = /data/darknet/imagenet_mini/imagenet.labels.list +names = /data/darknet/imagenet_mini/imagenet.shortnames.list +top=5 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/imagenet22k.dataset b/workloads/realworld/uvm_prefetch/darknet/cfg/imagenet22k.dataset new file mode 100644 index 0000000000000000000000000000000000000000..e25ef007ecceb096e5846ee7cacd1fd54fb8f9e4 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/imagenet22k.dataset @@ -0,0 +1,9 @@ +classes=21842 +train = /data/imagenet/imagenet22k.train.list +valid = /data/imagenet/imagenet22k.valid.list +#valid = /data/imagenet/imagenet1k.valid.list +backup = /home/pjreddie/backup/ +labels = data/imagenet.labels.list +names = data/imagenet.shortnames.list +top = 5 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/imagenet9k.hierarchy.dataset b/workloads/realworld/uvm_prefetch/darknet/cfg/imagenet9k.hierarchy.dataset new file mode 100644 index 0000000000000000000000000000000000000000..41fb71b065544b919bc8ed7d723afb5d04ad85ac --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/imagenet9k.hierarchy.dataset @@ -0,0 +1,9 @@ +classes=9418 +train = data/9k.train.list +valid = /data/imagenet/imagenet1k.valid.list +leaves = data/imagenet1k.labels +backup = /home/pjreddie/backup/ +labels = data/9k.labels +names = data/9k.names +top=5 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/jnet-conv.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/jnet-conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..056f82aa6e2a0710a664c4740ab763961e4de33d --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/jnet-conv.cfg @@ -0,0 +1,118 @@ +[net] +batch=1 +subdivisions=1 +height=10 +width=10 +channels=3 +learning_rate=0.01 +momentum=0.9 +decay=0.0005 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/openimages.data b/workloads/realworld/uvm_prefetch/darknet/cfg/openimages.data new file mode 100644 index 0000000000000000000000000000000000000000..fa80e5ab7d8576d391c7cac9dfc8367aab704139 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/openimages.data @@ -0,0 +1,8 @@ +classes= 601 +train = /home/pjreddie/data/openimsv4/openimages.train.list +#valid = coco_testdev +valid = data/coco_val_5k.list +names = data/openimages.names +backup = /home/pjreddie/backup/ +eval=coco + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/resnet101.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet101.cfg new file mode 100644 index 0000000000000000000000000000000000000000..de458820bcd35f5e65d858f9f661e42653ed0184 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet101.cfg @@ -0,0 +1,990 @@ +[net] +# Training +# batch=128 +# subdivisions=2 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + +[cost] +type=sse + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/resnet152.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet152.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e8e3297ac2364b95f28fa0a0bdd4ca71f14ac82c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet152.cfg @@ -0,0 +1,1460 @@ +[net] +# Training +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/resnet18.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet18.cfg new file mode 100644 index 0000000000000000000000000000000000000000..275f4bdb5962d77c16f353cd3d2751e189b9344c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet18.cfg @@ -0,0 +1,228 @@ +[net] +# Training +# batch=128 +# subdivisions=1 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/resnet18_b.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet18_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c0712e9cdfaf1f28dbe3a358355405d2758e6d2b --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet18_b.cfg @@ -0,0 +1,228 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/resnet18_t.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet18_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c0712e9cdfaf1f28dbe3a358355405d2758e6d2b --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet18_t.cfg @@ -0,0 +1,228 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/resnet34.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet34.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9f68f096741ae3b4898f40b76af7569d4697729f --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet34.cfg @@ -0,0 +1,392 @@ +[net] +# Training +# batch=128 +# subdivisions=2 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/resnet50.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet50.cfg new file mode 100644 index 0000000000000000000000000000000000000000..d0d7c511516e997a392bb5ba77682740c0494972 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet50.cfg @@ -0,0 +1,510 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/resnet50_b.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet50_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..462479918fd608c4a0761b84c7299e51081193c6 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet50_b.cfg @@ -0,0 +1,510 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/resnet50_t.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet50_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..462479918fd608c4a0761b84c7299e51081193c6 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/resnet50_t.cfg @@ -0,0 +1,510 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/resnext101-32x4d.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/resnext101-32x4d.cfg new file mode 100644 index 0000000000000000000000000000000000000000..8538ccc3daee2e3de589eb4e2edf868340d4924b --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/resnext101-32x4d.cfg @@ -0,0 +1,1053 @@ +[net] +# Training +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/resnext152-32x4d.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/resnext152-32x4d.cfg new file mode 100644 index 0000000000000000000000000000000000000000..48279fd28eb0dbe23c7b0c593f67051cb6a62374 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/resnext152-32x4d.cfg @@ -0,0 +1,1562 @@ +[net] +# Training +# batch=128 +# subdivisions=16 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/resnext50.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/resnext50.cfg new file mode 100644 index 0000000000000000000000000000000000000000..12aebdf6fbd48bde40ee22c4257e06f2e0cf46eb --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/resnext50.cfg @@ -0,0 +1,523 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/rnn.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/rnn.cfg new file mode 100644 index 0000000000000000000000000000000000000000..61b202f3a441b701f76d9b007c6276467c639e11 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/rnn.cfg @@ -0,0 +1,38 @@ +[net] +subdivisions=1 +inputs=256 +batch = 1 +momentum=0.9 +decay=0.001 +max_batches = 2000 +time_steps=1 +learning_rate=0.1 +policy=steps +steps=1000,1500 +scales=.1,.1 + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[connected] +output=256 +activation=leaky + +[softmax] + + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/rnn.train.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/rnn.train.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b9748990aceaa85cc2e43358073114606725dcbd --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/rnn.train.cfg @@ -0,0 +1,38 @@ +[net] +subdivisions=1 +inputs=256 +batch = 128 +momentum=0.9 +decay=0.001 +max_batches = 2000 +time_steps=576 +learning_rate=0.1 +policy=steps +steps=1000,1500 +scales=.1,.1 + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[connected] +output=256 +activation=leaky + +[softmax] + + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/strided.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/strided.cfg new file mode 100644 index 0000000000000000000000000000000000000000..2f745085adc268a3e99bd7895bd4dda28227bffd --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/strided.cfg @@ -0,0 +1,182 @@ +[net] +batch=128 +subdivisions=4 +height=256 +width=256 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +policy=steps +scales=.1,.1,.1 +steps=200000,300000,400000 +max_batches=800000 + + +[crop] +crop_height=224 +crop_width=224 +flip=1 +angle=0 +saturation=1 +exposure=1 +shift=.2 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=192 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=1024 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=ramp + +[maxpool] +size=3 +stride=2 + +[connected] +output=4096 +activation=ramp + +[dropout] +probability=0.5 + +[connected] +output=1000 +activation=ramp + +[softmax] + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/t1.test.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/t1.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b3628114e048dc78f4797342afd95a757c81c977 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/t1.test.cfg @@ -0,0 +1,117 @@ +[net] +batch=1 +subdivisions=1 +height=224 +width=224 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,20000,30000 +scales=2.5,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[connected] +output= 1470 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/tiny.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..f97327cfceebf868998bf2bb16224bd0994ebd82 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/tiny.cfg @@ -0,0 +1,174 @@ +[net] +# Train +batch=128 +subdivisions=1 +# Test +# batch=1 +# subdivisions=1 +height=224 +width=224 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=320 + +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/vgg-16.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/vgg-16.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c73b17b6ccfdcc9cae9b67591b662571463569ab --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/vgg-16.cfg @@ -0,0 +1,157 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +learning_rate=0.00001 +momentum=0.9 +decay=0.0005 + +[crop] +crop_height=224 +crop_width=224 +flip=1 +exposure=1 +saturation=1 +angle=0 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=1000 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/vgg-conv.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/vgg-conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..21e1d724c9418107f9cf82f9bffb9ae64d3e2084 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/vgg-conv.cfg @@ -0,0 +1,121 @@ +[net] +batch=1 +subdivisions=1 +width=224 +height=224 +channels=3 +learning_rate=0.00001 +momentum=0.9 +decay=0.0005 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/voc.data b/workloads/realworld/uvm_prefetch/darknet/cfg/voc.data new file mode 100644 index 0000000000000000000000000000000000000000..7807b5d2a8fd0f855a8c68e82c064dc320551da1 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/voc.data @@ -0,0 +1,6 @@ +classes= 20 +train = /home/pjreddie/data/voc/train.txt +valid = /home/pjreddie/data/voc/2007_test.txt +names = data/voc.names +backup = backup + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/writing.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/writing.cfg new file mode 100644 index 0000000000000000000000000000000000000000..1ed899bcd63d6354e8320ace7e7f513ba1174886 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/writing.cfg @@ -0,0 +1,41 @@ +[net] +batch=128 +subdivisions=2 +height=256 +width=256 +channels=3 +learning_rate=0.00000001 +momentum=0.9 +decay=0.0005 +seen=0 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1 +size=3 +stride=1 +pad=1 +activation=logistic + +[cost] + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/yolo9000.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/yolo9000.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e745f78a6e37611fb0f13c2d848c292cea1a89d3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/yolo9000.cfg @@ -0,0 +1,218 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +batch=1 +subdivisions=1 +height=544 +width=544 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +hue=.1 +saturation=.75 +exposure=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=28269 +size=1 +stride=1 +pad=1 +activation=linear + +[region] +anchors = 0.77871, 1.14074, 3.00525, 4.31277, 9.22725, 9.61974 +bias_match=1 +classes=9418 +coords=4 +num=3 +softmax=1 +jitter=.2 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +thresh = .6 +absolute=1 +random=1 + +tree=data/9k.tree +map = data/coco9k.map diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/yolov1-tiny.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov1-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a5e7b4920289ccb507a3a0356a33362bc7633581 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov1-tiny.cfg @@ -0,0 +1,130 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 + +saturation=.75 +exposure=.75 +hue = .1 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,800,20000,30000 +scales=2.5,2,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[connected] +output= 1470 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/yolov1.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov1.cfg new file mode 100644 index 0000000000000000000000000000000000000000..06cf6e676170e41d24e63ec08d7b27a31c411718 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov1.cfg @@ -0,0 +1,261 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 +saturation=1.5 +exposure=1.5 +hue=.1 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,20000,30000 +scales=2.5,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[local] +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[dropout] +probability=.5 + +[connected] +output= 1715 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=3 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/yolov2-tiny-voc.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov2-tiny-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c4c127cdd352bd98b3b7a3336d5c3b2efc6fadcd --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov2-tiny-voc.cfg @@ -0,0 +1,138 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +max_batches = 40200 +policy=steps +steps=-1,100,20000,30000 +scales=.1,10,.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=125 +activation=linear + +[region] +anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 +bias_match=1 +classes=20 +coords=4 +num=5 +softmax=1 +jitter=.2 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/yolov2-tiny.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov2-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..81d0ac45d6dca10f50875bfe85f7496ded8e0f63 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov2-tiny.cfg @@ -0,0 +1,139 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=425 +activation=linear + +[region] +anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 +bias_match=1 +classes=80 +coords=4 +num=5 +softmax=1 +jitter=.2 +rescore=0 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/yolov2-voc.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov2-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..dbf2de281c1200cb4889409c616e775080823268 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov2-voc.cfg @@ -0,0 +1,258 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=416 +width=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 80200 +policy=steps +steps=40000,60000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[route] +layers=-9 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=64 +activation=leaky + +[reorg] +stride=2 + +[route] +layers=-1,-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=125 +activation=linear + + +[region] +anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 +bias_match=1 +classes=20 +coords=4 +num=5 +softmax=1 +jitter=.3 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/yolov2.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov2.cfg new file mode 100644 index 0000000000000000000000000000000000000000..088edf81573e83c59edd7137cbc07b6fe1433591 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov2.cfg @@ -0,0 +1,258 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[route] +layers=-9 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=64 +activation=leaky + +[reorg] +stride=2 + +[route] +layers=-1,-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=425 +activation=linear + + +[region] +anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 +bias_match=1 +classes=80 +coords=4 +num=5 +softmax=1 +jitter=.3 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-openimages.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-openimages.cfg new file mode 100644 index 0000000000000000000000000000000000000000..65d241a74c4c4995dbd997b1750575a83b0a17d4 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-openimages.cfg @@ -0,0 +1,789 @@ +[net] +# Testing + batch=1 + subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=5000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-spp.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-spp.cfg new file mode 100644 index 0000000000000000000000000000000000000000..4ad2a052d88328a79cff5686ff4dd1df6993a2fd --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-spp.cfg @@ -0,0 +1,822 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 + +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-tiny.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..cfca3cfa6415b7b61eae238aa71107dedbe5d607 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-tiny.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-tiny_b.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-tiny_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..0d8ded69bcf909f4cca02ab202cc99b1800a1562 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-tiny_b.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-tiny_t.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-tiny_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..0d8ded69bcf909f4cca02ab202cc99b1800a1562 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-tiny_t.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-voc.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..3f3e8dfb31b7103cf7ca00cd0bef83d6d426bb8d --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3-voc.cfg @@ -0,0 +1,785 @@ +[net] +# Testing + batch=1 + subdivisions=1 +# Training +# batch=64 +# subdivisions=16 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 50200 +policy=steps +steps=40000,45000 +scales=.1,.1 + + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3.cfg new file mode 100644 index 0000000000000000000000000000000000000000..938ffff23f106d65290faae217f6a9b0a715c023 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3_b.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a86d73b42225ef65d8ff8692b0d72827ba3a8484 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3_b.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3_t.cfg b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a86d73b42225ef65d8ff8692b0d72827ba3a8484 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/cfg/yolov3_t.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/art.c b/workloads/realworld/uvm_prefetch/darknet/examples/art.c new file mode 100644 index 0000000000000000000000000000000000000000..932688e7b9ecbfd1a359a5d373dddf52815da9bb --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/art.c @@ -0,0 +1,59 @@ +#include "darknet.h" + +#include + +void demo_art(char *cfgfile, char *weightfile, int cam_index) +{ +#ifdef OPENCV + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + + void * cap = open_video_stream(0, cam_index, 0,0,0); + + char *window = "ArtJudgementBot9000!!!"; + if(!cap) error("Couldn't connect to webcam.\n"); + int i; + int idx[] = {37, 401, 434}; + int n = sizeof(idx)/sizeof(idx[0]); + + while(1){ + image in = get_image_from_stream(cap); + image in_s = resize_image(in, net->w, net->h); + + float *p = network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + + float score = 0; + for(i = 0; i < n; ++i){ + float s = p[idx[i]]; + if (s > score) score = s; + } + score = score; + printf("I APPRECIATE THIS ARTWORK: %10.7f%%\n", score*100); + printf("["); + int upper = 30; + for(i = 0; i < upper; ++i){ + printf("%c", ((i+.5) < score*upper) ? 219 : ' '); + } + printf("]\n"); + + show_image(in, window, 1); + free_image(in_s); + free_image(in); + } +#endif +} + + +void run_art(int argc, char **argv) +{ + int cam_index = find_int_arg(argc, argv, "-c", 0); + char *cfg = argv[2]; + char *weights = argv[3]; + demo_art(cfg, weights, cam_index); +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/attention.c b/workloads/realworld/uvm_prefetch/darknet/examples/attention.c new file mode 100644 index 0000000000000000000000000000000000000000..cd1e579d375be8ffed5620c70180f0a59a927159 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/attention.c @@ -0,0 +1,459 @@ +#include "darknet.h" + +#include +#include + +void extend_data_truth(data *d, int n, float val) +{ + int i, j; + for(i = 0; i < d->y.rows; ++i){ + d->y.vals[i] = realloc(d->y.vals[i], (d->y.cols+n)*sizeof(float)); + for(j = 0; j < n; ++j){ + d->y.vals[i][d->y.cols + j] = val; + } + } + d->y.cols += n; +} + +matrix network_loss_data(network *net, data test) +{ + int i,b; + int k = 1; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.cols, sizeof(float)); + float *y = calloc(net->batch*test.y.cols, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + memcpy(y+b*test.y.cols, test.y.vals[i+b], test.y.cols*sizeof(float)); + } + + network orig = *net; + net->input = X; + net->truth = y; + net->train = 0; + net->delta = 0; + forward_network(net); + *net = orig; + + float *delta = net->layers[net->n-1].output; + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + int t = max_index(y + b*test.y.cols, 1000); + float err = sum_array(delta + b*net->outputs, net->outputs); + pred.vals[i+b][0] = -err; + //pred.vals[i+b][0] = 1-delta[b*net->outputs + t]; + } + } + free(X); + free(y); + return pred; +} + +void train_attention(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i, j; + + float avg_cls_loss = -1; + float avg_att_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *train_list = option_find_str(options, "train", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + + char **labels = get_labels(label_list); + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + double time; + + int divs=3; + int size=2; + + load_args args = {0}; + args.w = divs*net->w/size; + args.h = divs*net->h/size; + args.size = divs*net->w/size; + args.threads = 32; + args.hierarchy = net->hierarchy; + + args.min = net->min_ratio*args.w; + args.max = net->max_ratio*args.w; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + args.paths = paths; + args.classes = classes; + args.n = imgs; + args.m = N; + args.labels = labels; + args.type = CLASSIFICATION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + data resized = resize_data(train, net->w, net->h); + extend_data_truth(&resized, divs*divs, 0); + data *tiles = tile_data(train, divs, size); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float aloss = 0; + float closs = 0; + int z; + for (i = 0; i < divs*divs/ngpus; ++i) { +#pragma omp parallel for + for(j = 0; j < ngpus; ++j){ + int index = i*ngpus + j; + extend_data_truth(tiles+index, divs*divs, SECRET_NUM); + matrix deltas = network_loss_data(nets[j], tiles[index]); + for(z = 0; z < resized.y.rows; ++z){ + resized.y.vals[z][train.y.cols + index] = deltas.vals[z][0]; + } + free_matrix(deltas); + } + } + int *inds = calloc(resized.y.rows, sizeof(int)); + for(z = 0; z < resized.y.rows; ++z){ + int index = max_index(resized.y.vals[z] + train.y.cols, divs*divs); + inds[z] = index; + for(i = 0; i < divs*divs; ++i){ + resized.y.vals[z][train.y.cols + i] = (i == index)? 1 : 0; + } + } + data best = select_data(tiles, inds); + free(inds); + #ifdef GPU + if (ngpus == 1) { + closs = train_network(net, best); + } else { + closs = train_networks(nets, ngpus, best, 4); + } + #endif + for (i = 0; i < divs*divs; ++i) { + printf("%.2f ", resized.y.vals[0][train.y.cols + i]); + if((i+1)%divs == 0) printf("\n"); + free_data(tiles[i]); + } + free_data(best); + printf("\n"); + image im = float_to_image(64,64,3,resized.X.vals[0]); + //show_image(im, "orig"); + //cvWaitKey(100); + /* + image im1 = float_to_image(64,64,3,tiles[i].X.vals[0]); + image im2 = float_to_image(64,64,3,resized.X.vals[0]); + show_image(im1, "tile"); + show_image(im2, "res"); + */ +#ifdef GPU + if (ngpus == 1) { + aloss = train_network(net, resized); + } else { + aloss = train_networks(nets, ngpus, resized, 4); + } +#endif + for(i = 0; i < divs*divs; ++i){ + printf("%f ", nets[0]->output[1000 + i]); + if ((i+1) % divs == 0) printf("\n"); + } + printf("\n"); + + free_data(resized); + free_data(train); + if(avg_cls_loss == -1) avg_cls_loss = closs; + if(avg_att_loss == -1) avg_att_loss = aloss; + avg_cls_loss = avg_cls_loss*.9 + closs*.1; + avg_att_loss = avg_att_loss*.9 + aloss*.1; + + printf("%ld, %.3f: Att: %f, %f avg, Class: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, aloss, avg_att_loss, closs, avg_cls_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + pthread_join(load_thread, 0); + + free_network(net); + free_ptrs((void**)labels, classes); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_attention_single(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *leaf_list = option_find_str(options, "leaves", 0); + if(leaf_list) change_leaves(net->hierarchy, leaf_list); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + int divs = 4; + int size = 2; + int extra = 0; + float *avgs = calloc(classes, sizeof(float)); + int *inds = calloc(divs*divs, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w*divs/size); + image crop = crop_image(resized, (resized.w - net->w*divs/size)/2, (resized.h - net->h*divs/size)/2, net->w*divs/size, net->h*divs/size); + image rcrop = resize_image(crop, net->w, net->h); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, rcrop.data); + //pred[classes + 56] = 0; + for(j = 0; j < divs*divs; ++j){ + printf("%.2f ", pred[classes + j]); + if((j+1)%divs == 0) printf("\n"); + } + printf("\n"); + copy_cpu(classes, pred, 1, avgs, 1); + top_k(pred + classes, divs*divs, divs*divs, inds); + show_image(crop, "crop"); + for(j = 0; j < extra; ++j){ + int index = inds[j]; + int row = index / divs; + int col = index % divs; + int y = row * crop.h / divs - (net->h - crop.h/divs)/2; + int x = col * crop.w / divs - (net->w - crop.w/divs)/2; + printf("%d %d %d %d\n", row, col, y, x); + image tile = crop_image(crop, x, y, net->w, net->h); + float *pred = network_predict(net, tile.data); + axpy_cpu(classes, 1., pred, 1, avgs, 1); + show_image(tile, "tile"); + //cvWaitKey(10); + } + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + if(rcrop.data != resized.data) free_image(rcrop); + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_attention_multi(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + int scales[] = {224, 288, 320, 352, 384}; + int nscales = sizeof(scales)/sizeof(scales[0]); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + float *pred = calloc(classes, sizeof(float)); + image im = load_image_color(paths[i], 0, 0); + for(j = 0; j < nscales; ++j){ + image r = resize_min(im, scales[j]); + resize_network(net, r.w, r.h); + float *p = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + flip_image(r); + p = network_predict(net, r.data); + axpy_cpu(classes, 1, p, 1, pred, 1); + if(r.data != im.data) free_image(r); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void predict_attention(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + if(top == 0) top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = letterbox_image(im, net->w, net->h); + //resize_network(&net, r.w, r.h); + //printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + float *predictions = network_predict(net, X); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root"); + //else printf("%s: %f\n",names[index], predictions[index]); + printf("%5.2f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void run_attention(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int ngpus; + int *gpus = read_intlist(gpu_list, &ngpus, gpu_index); + + + int top = find_int_arg(argc, argv, "-t", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + char *layer_s = (argc > 7) ? argv[7]: 0; + if(0==strcmp(argv[2], "predict")) predict_attention(data, cfg, weights, filename, top); + else if(0==strcmp(argv[2], "train")) train_attention(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) validate_attention_single(data, cfg, weights); + else if(0==strcmp(argv[2], "validmulti")) validate_attention_multi(data, cfg, weights); +} + + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/captcha.c b/workloads/realworld/uvm_prefetch/darknet/examples/captcha.c new file mode 100644 index 0000000000000000000000000000000000000000..41d6d07c30801b35da34c05984be488e6f6767e9 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/captcha.c @@ -0,0 +1,353 @@ +#include "darknet.h" + +void fix_data_captcha(data d, int mask) +{ + matrix labels = d.y; + int i, j; + for(i = 0; i < d.y.rows; ++i){ + for(j = 0; j < d.y.cols; j += 2){ + if (mask){ + if(!labels.vals[i][j]){ + labels.vals[i][j] = SECRET_NUM; + labels.vals[i][j+1] = SECRET_NUM; + }else if(labels.vals[i][j+1]){ + labels.vals[i][j] = 0; + } + } else{ + if (labels.vals[i][j]) { + labels.vals[i][j+1] = 0; + } else { + labels.vals[i][j+1] = 1; + } + } + } + } +} + +void train_captcha(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + int i = *net->seen/imgs; + int solved = 1; + list *plist; + char **labels = get_labels("/data/captcha/reimgs.labels.list"); + if (solved){ + plist = get_paths("/data/captcha/reimgs.solved.list"); + }else{ + plist = get_paths("/data/captcha/reimgs.raw.list"); + } + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = 26; + args.n = imgs; + args.m = plist->size; + args.labels = labels; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + + load_thread = load_data_in_thread(args); + while(1){ + ++i; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + fix_data_captcha(train, solved); + + /* + image im = float_to_image(256, 256, 3, train.X.vals[114]); + show_image(im, "training"); + cvWaitKey(0); + */ + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net->seen); + free_data(train); + if(i%100==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } + } +} + +void test_captcha(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + int i = 0; + char **names = get_labels("/data/captcha/reimgs.labels.list"); + char buff[256]; + char *input = buff; + int indexes[26]; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + //printf("Enter Image Path: "); + //fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, net->w, net->h); + float *X = im.data; + float *predictions = network_predict(net, X); + top_predictions(net, 26, indexes); + //printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < 26; ++i){ + int index = indexes[i]; + if(i != 0) printf(", "); + printf("%s %f", names[index], predictions[index]); + } + printf("\n"); + fflush(stdout); + free_image(im); + if (filename) break; + } +} + +void valid_captcha(char *cfgfile, char *weightfile, char *filename) +{ + char **labels = get_labels("/data/captcha/reimgs.labels.list"); + network *net = load_network(cfgfile, weightfile, 0); + list *plist = get_paths("/data/captcha/reimgs.fg.list"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + int outputs = net->outputs; + + set_batch_network(net, 1); + srand(2222222); + int i, j; + for(i = 0; i < N; ++i){ + if (i%100 == 0) fprintf(stderr, "%d\n", i); + image im = load_image_color(paths[i], net->w, net->h); + float *X = im.data; + float *predictions = network_predict(net, X); + //printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + int truth = -1; + for(j = 0; j < 13; ++j){ + if (strstr(paths[i], labels[j])) truth = j; + } + if (truth == -1){ + fprintf(stderr, "bad: %s\n", paths[i]); + return; + } + printf("%d, ", truth); + for(j = 0; j < outputs; ++j){ + if (j != 0) printf(", "); + printf("%f", predictions[j]); + } + printf("\n"); + fflush(stdout); + free_image(im); + if (filename) break; + } +} + +/* + void train_captcha(char *cfgfile, char *weightfile) + { + float avg_loss = -1; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + int i = net->seen/imgs; + list *plist = get_paths("/data/captcha/train.auto5"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + while(1){ + ++i; + time=clock(); + data train = load_data_captcha(paths, imgs, plist->size, 10, 200, 60); + translate_data_rows(train, -128); + scale_data_rows(train, 1./128); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + net->seen += imgs; + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net->seen); + free_data(train); + if(i%10==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } + } + } + + void decode_captcha(char *cfgfile, char *weightfile) + { + setbuf(stdout, NULL); + srand(time(0)); + network net = parse_network_cfg(cfgfile); + set_batch_network(&net, 1); + if(weightfile){ + load_weights(&net, weightfile); + } + char filename[256]; + while(1){ + printf("Enter filename: "); + fgets(filename, 256, stdin); + strtok(filename, "\n"); + image im = load_image_color(filename, 300, 57); + scale_image(im, 1./255.); + float *X = im.data; + float *predictions = network_predict(net, X); + image out = float_to_image(300, 57, 1, predictions); + show_image(out, "decoded"); +#ifdef OPENCV +cvWaitKey(0); +#endif +free_image(im); +} +} + +void encode_captcha(char *cfgfile, char *weightfile) +{ +float avg_loss = -1; +srand(time(0)); +char *base = basecfg(cfgfile); +printf("%s\n", base); +network net = parse_network_cfg(cfgfile); +if(weightfile){ + load_weights(&net, weightfile); +} +printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); +int imgs = 1024; +int i = net->seen/imgs; +list *plist = get_paths("/data/captcha/encode.list"); +char **paths = (char **)list_to_array(plist); +printf("%d\n", plist->size); +clock_t time; +while(1){ + ++i; + time=clock(); + data train = load_data_captcha_encode(paths, imgs, plist->size, 300, 57); + scale_data_rows(train, 1./255); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + net->seen += imgs; + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net->seen); + free_matrix(train.X); + if(i%100==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } +} +} + +void validate_captcha(char *cfgfile, char *weightfile) +{ + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + int numchars = 37; + list *plist = get_paths("/data/captcha/solved.hard"); + char **paths = (char **)list_to_array(plist); + int imgs = plist->size; + data valid = load_data_captcha(paths, imgs, 0, 10, 200, 60); + translate_data_rows(valid, -128); + scale_data_rows(valid, 1./128); + matrix pred = network_predict_data(net, valid); + int i, k; + int correct = 0; + int total = 0; + int accuracy = 0; + for(i = 0; i < imgs; ++i){ + int allcorrect = 1; + for(k = 0; k < 10; ++k){ + char truth = int_to_alphanum(max_index(valid.y.vals[i]+k*numchars, numchars)); + char prediction = int_to_alphanum(max_index(pred.vals[i]+k*numchars, numchars)); + if (truth != prediction) allcorrect=0; + if (truth != '.' && truth == prediction) ++correct; + if (truth != '.' || truth != prediction) ++total; + } + accuracy += allcorrect; + } + printf("Word Accuracy: %f, Char Accuracy %f\n", (float)accuracy/imgs, (float)correct/total); + free_data(valid); +} + +void test_captcha(char *cfgfile, char *weightfile) +{ + setbuf(stdout, NULL); + srand(time(0)); + //char *base = basecfg(cfgfile); + //printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + set_batch_network(&net, 1); + if(weightfile){ + load_weights(&net, weightfile); + } + char filename[256]; + while(1){ + //printf("Enter filename: "); + fgets(filename, 256, stdin); + strtok(filename, "\n"); + image im = load_image_color(filename, 200, 60); + translate_image(im, -128); + scale_image(im, 1/128.); + float *X = im.data; + float *predictions = network_predict(net, X); + print_letters(predictions, 10); + free_image(im); + } +} + */ +void run_captcha(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_captcha(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights, filename); + else if(0==strcmp(argv[2], "valid")) valid_captcha(cfg, weights, filename); + //if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "encode")) encode_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "decode")) decode_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "valid")) validate_captcha(cfg, weights); +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/cifar.c b/workloads/realworld/uvm_prefetch/darknet/examples/cifar.c new file mode 100644 index 0000000000000000000000000000000000000000..a5f5f240b9f680acd9b5890042300d3b683e0f82 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/cifar.c @@ -0,0 +1,251 @@ +#include "darknet.h" + +void train_cifar(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + int classes = 10; + int N = 50000; + + char **labels = get_labels("data/cifar/labels.txt"); + int epoch = (*net->seen)/N; + data train = load_all_cifar10(); + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + clock_t time=clock(); + + float loss = train_network_sgd(net, train, 1); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)labels, classes); + free(base); + free_data(train); +} + +void train_cifar_distill(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + int classes = 10; + int N = 50000; + + char **labels = get_labels("data/cifar/labels.txt"); + int epoch = (*net->seen)/N; + + data train = load_all_cifar10(); + matrix soft = csv_to_matrix("results/ensemble.csv"); + + float weight = .9; + scale_matrix(soft, weight); + scale_matrix(train.y, 1. - weight); + matrix_add_matrix(soft, train.y); + + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + clock_t time=clock(); + + float loss = train_network_sgd(net, train, 1); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)labels, classes); + free(base); + free_data(train); +} + +void test_cifar_multi(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + float avg_acc = 0; + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + + float pred[10] = {0}; + + float *p = network_predict(net, im.data); + axpy_cpu(10, 1, p, 1, pred, 1); + flip_image(im); + p = network_predict(net, im.data); + axpy_cpu(10, 1, p, 1, pred, 1); + + int index = max_index(pred, 10); + int class = max_index(test.y.vals[i], 10); + if(index == class) avg_acc += 1; + free_image(im); + printf("%4d: %.2f%%\n", i, 100.*avg_acc/(i+1)); + } +} + +void test_cifar(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + clock_t time; + float avg_acc = 0; + float avg_top5 = 0; + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + time=clock(); + + float *acc = network_accuracies(net, test, 2); + avg_acc += acc[0]; + avg_top5 += acc[1]; + printf("top1: %f, %lf seconds, %d images\n", avg_acc, sec(clock()-time), test.X.rows); + free_data(test); +} + +void extract_cifar() +{ +char *labels[] = {"airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"}; + int i; + data train = load_all_cifar10(); + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + for(i = 0; i < train.X.rows; ++i){ + image im = float_to_image(32, 32, 3, train.X.vals[i]); + int class = max_index(train.y.vals[i], 10); + char buff[256]; + sprintf(buff, "data/cifar/train/%d_%s",i,labels[class]); + save_image_options(im, buff, PNG, 0); + } + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + int class = max_index(test.y.vals[i], 10); + char buff[256]; + sprintf(buff, "data/cifar/test/%d_%s",i,labels[class]); + save_image_options(im, buff, PNG, 0); + } +} + +void test_cifar_csv(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + matrix pred = network_predict_data(net, test); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + flip_image(im); + } + matrix pred2 = network_predict_data(net, test); + scale_matrix(pred, .5); + scale_matrix(pred2, .5); + matrix_add_matrix(pred2, pred); + + matrix_to_csv(pred); + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); +} + +void test_cifar_csvtrain(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + srand(time(0)); + + data test = load_all_cifar10(); + + matrix pred = network_predict_data(net, test); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + flip_image(im); + } + matrix pred2 = network_predict_data(net, test); + scale_matrix(pred, .5); + scale_matrix(pred2, .5); + matrix_add_matrix(pred2, pred); + + matrix_to_csv(pred); + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); +} + +void eval_cifar_csv() +{ + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + matrix pred = csv_to_matrix("results/combined.csv"); + fprintf(stderr, "%d %d\n", pred.rows, pred.cols); + + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); + free_matrix(pred); +} + + +void run_cifar(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_cifar(cfg, weights); + else if(0==strcmp(argv[2], "extract")) extract_cifar(); + else if(0==strcmp(argv[2], "distill")) train_cifar_distill(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_cifar(cfg, weights); + else if(0==strcmp(argv[2], "multi")) test_cifar_multi(cfg, weights); + else if(0==strcmp(argv[2], "csv")) test_cifar_csv(cfg, weights); + else if(0==strcmp(argv[2], "csvtrain")) test_cifar_csvtrain(cfg, weights); + else if(0==strcmp(argv[2], "eval")) eval_cifar_csv(); +} + + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/classifier.c b/workloads/realworld/uvm_prefetch/darknet/examples/classifier.c new file mode 100644 index 0000000000000000000000000000000000000000..932f5a4652022288029617d0722c3bfa3073e536 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/classifier.c @@ -0,0 +1,1122 @@ +#include "darknet.h" + +#include +#include + +float *get_regression_values(char **labels, int n) +{ + float *v = calloc(n, sizeof(float)); + int i; + for(i = 0; i < n; ++i){ + char *p = strchr(labels[i], ' '); + *p = 0; + v[i] = atof(p+1); + } + return v; +} + +void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + // Ruihao + int tag = option_find_int_quiet(options, "tag", 0); + + char *backup_directory_cfg = option_find_str(options, "backup", "/backup/"); + char *label_list_cfg = option_find_str(options, "labels", "data/labels.list"); + char *train_list_cfg = option_find_str(options, "train", "data/train.list"); + + char *env = getenv("UVMAsyncBench_BASE"); + char backup_directory[256]; + char label_list[256]; + char train_list[256]; + sprintf(backup_directory, "%s/%s", env, backup_directory_cfg); + sprintf(label_list, "%s/%s", env, label_list_cfg); + sprintf(train_list, "%s/%s", env, train_list_cfg); + // Ruihao + char *tree = option_find_str(options, "tree", 0); + if (tree) net->hierarchy = read_tree(tree); + int classes = option_find_int(options, "classes", 2); + + char **labels = 0; + if(!tag){ + labels = get_labels(label_list); + } + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + double time; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.hierarchy = net->hierarchy; + + args.min = net->min_ratio*net->w; + args.max = net->max_ratio*net->w; + printf("%d %d\n", args.min, args.max); + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + + args.paths = paths; + args.classes = classes; + args.n = imgs; + args.m = N; + args.labels = labels; + if (tag){ + args.type = TAG_DATA; + } else { + args.type = CLASSIFICATION_DATA; + } + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int count = 0; + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + if(net->random && count++%40 == 0){ + printf("Resizing\n"); + int dim = (rand() % 11 + 4) * 32; + //if (get_current_batch(net)+200 > net->max_batches) dim = 608; + //int dim = (rand() % 4 + 16) * 32; + printf("%d\n", dim); + args.w = dim; + args.h = dim; + args.size = dim; + args.min = net->min_ratio*dim; + args.max = net->max_ratio*dim; + printf("%d %d\n", args.min, args.max); + + pthread_join(load_thread, 0); + train = buffer; + free_data(train); + load_thread = load_data(args); + + for(i = 0; i < ngpus; ++i){ + resize_network(nets[i], dim, dim); + } + net = nets[0]; + } + time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + pthread_join(load_thread, 0); + + // free_network(net); + if(labels) free_ptrs((void**)labels, classes); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_classifier_crop(char *datacfg, char *filename, char *weightfile) +{ + int i = 0; + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + clock_t time; + float avg_acc = 0; + float avg_topk = 0; + int splits = m/1000; + int num = (i+1)*m/splits - i*m/splits; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + + args.paths = paths; + args.classes = classes; + args.n = num; + args.m = 0; + args.labels = labels; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(i = 1; i <= splits; ++i){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + num = (i+1)*m/splits - i*m/splits; + char **part = paths+(i*m/splits); + if(i != splits){ + args.paths = part; + load_thread = load_data_in_thread(args); + } + printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + float *acc = network_accuracies(net, val, topk); + avg_acc += acc[0]; + avg_topk += acc[1]; + printf("%d: top 1: %f, top %d: %f, %lf seconds, %d images\n", i, avg_acc/i, topk, avg_topk/i, sec(clock()-time), val.X.rows); + free_data(val); + } +} + +void validate_classifier_10(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + int w = net->w; + int h = net->h; + int shift = 32; + image im = load_image_color(paths[i], w+shift, h+shift); + image images[10]; + images[0] = crop_image(im, -shift, -shift, w, h); + images[1] = crop_image(im, shift, -shift, w, h); + images[2] = crop_image(im, 0, 0, w, h); + images[3] = crop_image(im, -shift, shift, w, h); + images[4] = crop_image(im, shift, shift, w, h); + flip_image(im); + images[5] = crop_image(im, -shift, -shift, w, h); + images[6] = crop_image(im, shift, -shift, w, h); + images[7] = crop_image(im, 0, 0, w, h); + images[8] = crop_image(im, -shift, shift, w, h); + images[9] = crop_image(im, shift, shift, w, h); + float *pred = calloc(classes, sizeof(float)); + for(j = 0; j < 10; ++j){ + float *p = network_predict(net, images[j].data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1, 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + free_image(images[j]); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_classifier_full(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + int size = net->w; + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, size); + resize_network(net, resized.w, resized.h); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, resized.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + free_image(im); + free_image(resized); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + + +void validate_classifier_single(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *leaf_list = option_find_str(options, "leaves", 0); + if(leaf_list) change_leaves(net->hierarchy, leaf_list); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image crop = center_crop_image(im, net->w, net->h); + //grayscale_image_3c(crop); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, crop.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + free_image(im); + free_image(crop); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%s, %d, %f, %f, \n", paths[i], class, pred[0], pred[1]); + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_classifier_multi(char *datacfg, char *cfg, char *weights) +{ + int i, j; + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + //int scales[] = {224, 288, 320, 352, 384}; + int scales[] = {224, 256, 288, 320}; + int nscales = sizeof(scales)/sizeof(scales[0]); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + float *pred = calloc(classes, sizeof(float)); + image im = load_image_color(paths[i], 0, 0); + for(j = 0; j < nscales; ++j){ + image r = resize_max(im, scales[j]); + resize_network(net, r.w, r.h); + float *p = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + flip_image(r); + p = network_predict(net, r.data); + axpy_cpu(classes, 1, p, 1, pred, 1); + if(r.data != im.data) free_image(r); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int layer_num) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + int top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image orig = load_image_color(input, 0, 0); + image r = resize_min(orig, 256); + image im = crop_image(r, (r.w - 224 - 1)/2 + 1, (r.h - 224 - 1)/2 + 1, 224, 224); + float mean[] = {0.48263312050943, 0.45230225481413, 0.40099074308742}; + float std[] = {0.22590347483426, 0.22120921437787, 0.22103996251583}; + float var[3]; + var[0] = std[0]*std[0]; + var[1] = std[1]*std[1]; + var[2] = std[2]*std[2]; + + normalize_cpu(im.data, mean, var, 1, 3, im.w*im.h); + + float *X = im.data; + time=clock(); + float *predictions = network_predict(net, X); + + layer l = net->layers[layer_num]; + for(i = 0; i < l.c; ++i){ + if(l.rolling_mean) printf("%f %f %f\n", l.rolling_mean[i], l.rolling_variance[i], l.scales[i]); + } +#ifdef GPU + cuda_pull_array(l.output_gpu, l.output, l.outputs); +#endif + for(i = 0; i < l.outputs; ++i){ + printf("%f\n", l.output[i]); + } + /* + + printf("\n\nWeights\n"); + for(i = 0; i < l.n*l.size*l.size*l.c; ++i){ + printf("%f\n", l.filters[i]); + } + + printf("\n\nBiases\n"); + for(i = 0; i < l.n; ++i){ + printf("%f\n", l.biases[i]); + } + */ + + top_predictions(net, top, indexes); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + printf("%s: %f\n", names[index], predictions[index]); + } + free_image(im); + if (filename) break; + } +} + +void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *name_list_cfg = option_find_str(options, "names", 0); + char *env = getenv("UVMAsyncBench_BASE"); + char name_list[256]; + sprintf(name_list, "%s/%s", env, name_list_cfg); + // if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + // Ruihao + if(top == 0) top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = letterbox_image(im, net->w, net->h); + //image r = resize_min(im, 320); + //printf("%d %d\n", r.w, r.h); + //resize_network(net, r.w, r.h); + //printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + startCPU(); + float *predictions = network_predict(net, X); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + endCPU(); + fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root"); + //else printf("%s: %f\n",names[index], predictions[index]); + printf("%5.2f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void label_classifier(char *datacfg, char *filename, char *weightfile) +{ + int i; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "names", "data/labels.list"); + char *test_list = option_find_str(options, "test", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + + char **labels = get_labels(label_list); + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + for(i = 0; i < m; ++i){ + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + float *pred = network_predict(net, crop.data); + + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + int ind = max_index(pred, classes); + + printf("%s\n", labels[ind]); + } +} + +void csv_classifier(char *datacfg, char *cfgfile, char *weightfile) +{ + int i,j; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *test_list = option_find_str(options, "test", "data/test.list"); + int top = option_find_int(options, "top", 1); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + int *indexes = calloc(top, sizeof(int)); + + for(i = 0; i < m; ++i){ + double time = what_time_is_it_now(); + char *path = paths[i]; + image im = load_image_color(path, 0, 0); + image r = letterbox_image(im, net->w, net->h); + float *predictions = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + + printf("%s", path); + for(j = 0; j < top; ++j){ + printf("\t%d", indexes[j]); + } + printf("\n"); + + free_image(im); + free_image(r); + + fprintf(stderr, "%lf seconds, %d images, %d total\n", what_time_is_it_now() - time, i+1, m); + } +} + +void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_layer) +{ + int curr = 0; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *test_list_cfg = option_find_str(options, "test", "data/test.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char test_list[256]; + sprintf(test_list, "%s/%s", env, test_list_cfg); + // Ruihao + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + m = net->max_batches * net->batch; + free_list(plist); + + clock_t time; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = classes; + args.n = net->batch; + args.m = 0; + args.labels = 0; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(curr = net->batch; curr <= m; curr += net->batch){ + // while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + if(curr < m){ + args.paths = paths + curr; + if (curr + net->batch > m) args.n = m - curr; + load_thread = load_data_in_thread(args); + } + fprintf(stderr, "Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + + int i, j; + if (target_layer >= 0){ + //layer l = net->layers[target_layer]; + } + + // for(i = 0; i < pred.rows; ++i){ + // printf("%s", paths[curr-net->batch+i]); + // for(j = 0; j < pred.cols; ++j){ + // printf("\t%g", pred.vals[i][j]); + // } + // printf("\n"); + // } + + fprintf(stderr, "%lf seconds, %d images, %d total\n", sec(clock()-time), val.X.rows, curr); + free_matrix(pred); + free_data(val); + } +} + +void file_output_classifier(char *datacfg, char *filename, char *weightfile, char *listfile) +{ + int i,j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + //char *label_list = option_find_str(options, "names", "data/labels.list"); + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(listfile); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + for(i = 0; i < m; ++i){ + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + + float *pred = network_predict(net, crop.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 0, 1); + + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + + printf("%s", paths[i]); + for(j = 0; j < classes; ++j){ + printf("\t%g", pred[j]); + } + printf("\n"); + } +} + + +void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + float threat = 0; + float roll = .2; + + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + int top = option_find_int(options, "top", 1); + + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + //cvNamedWindow("Threat", CV_WINDOW_NORMAL); + //cvResizeWindow("Threat", 512, 512); + float fps = 0; + int i; + + int count = 0; + + while(1){ + ++count; + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + if(!in.data) break; + image in_s = resize_image(in, net->w, net->h); + + image out = in; + int x1 = out.w / 20; + int y1 = out.h / 20; + int x2 = 2*x1; + int y2 = out.h - out.h/20; + + int border = .01*out.h; + int h = y2 - y1 - 2*border; + int w = x2 - x1 - 2*border; + + float *predictions = network_predict(net, in_s.data); + float curr_threat = 0; + if(1){ + curr_threat = predictions[0] * 0 + + predictions[1] * .6 + + predictions[2]; + } else { + curr_threat = predictions[218] + + predictions[539] + + predictions[540] + + predictions[368] + + predictions[369] + + predictions[370]; + } + threat = roll * curr_threat + (1-roll) * threat; + + draw_box_width(out, x2 + border, y1 + .02*h, x2 + .5 * w, y1 + .02*h + border, border, 0,0,0); + if(threat > .97) { + draw_box_width(out, x2 + .5 * w + border, + y1 + .02*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .02*h + 3*border, 3*border, 1,0,0); + } + draw_box_width(out, x2 + .5 * w + border, + y1 + .02*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .02*h + 3*border, .5*border, 0,0,0); + draw_box_width(out, x2 + border, y1 + .42*h, x2 + .5 * w, y1 + .42*h + border, border, 0,0,0); + if(threat > .57) { + draw_box_width(out, x2 + .5 * w + border, + y1 + .42*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .42*h + 3*border, 3*border, 1,1,0); + } + draw_box_width(out, x2 + .5 * w + border, + y1 + .42*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .42*h + 3*border, .5*border, 0,0,0); + + draw_box_width(out, x1, y1, x2, y2, border, 0,0,0); + for(i = 0; i < threat * h ; ++i){ + float ratio = (float) i / h; + float r = (ratio < .5) ? (2*(ratio)) : 1; + float g = (ratio < .5) ? 1 : 1 - 2*(ratio - .5); + draw_box_width(out, x1 + border, y2 - border - i, x2 - border, y2 - border - i, 1, r, g, 0); + } + top_predictions(net, top, indexes); + char buff[256]; + sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count); + //save_image(out, buff); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + for(i = 0; i < top; ++i){ + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + } + + if(1){ + show_image(out, "Threat", 10); + } + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + int bad_cats[] = {218, 539, 540, 1213, 1501, 1742, 1911, 2415, 4348, 19223, 368, 369, 370, 1133, 1200, 1306, 2122, 2301, 2537, 2823, 3179, 3596, 3639, 4489, 5107, 5140, 5289, 6240, 6631, 6762, 7048, 7171, 7969, 7984, 7989, 8824, 8927, 9915, 10270, 10448, 13401, 15205, 18358, 18894, 18895, 19249, 19697}; + + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + int top = option_find_int(options, "top", 1); + + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + int i; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = resize_image(in, net->w, net->h); + + float *predictions = network_predict(net, in_s.data); + top_predictions(net, top, indexes); + + printf("\033[2J"); + printf("\033[1;1H"); + + int threat = 0; + for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){ + int index = bad_cats[i]; + if(predictions[index] > .01){ + printf("Threat Detected!\n"); + threat = 1; + break; + } + } + if(!threat) printf("Scanning...\n"); + for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){ + int index = bad_cats[i]; + if(predictions[index] > .01){ + printf("%s\n", names[index]); + } + } + + show_image(in, "Threat Detection", 10); + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + +void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + image **alphabet = load_alphabet(); + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + + int w = 1280; + int h = 720; + void * cap = open_video_stream(filename, cam_index, w, h, 0); + + int top = option_find_int(options, "top", 1); + + char *label_list = option_find_str(options, "labels", 0); + char *name_list = option_find_str(options, "names", label_list); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + int i; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + + float *predictions = network_predict(net, in_s.data); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_predictions(net, top, indexes); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + int lh = in.h*.03; + int toph = 3*lh; + + float rgb[3] = {1,1,1}; + for(i = 0; i < top; ++i){ + printf("%d\n", toph); + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + + char buff[1024]; + sprintf(buff, "%3.1f%%: %s\n", predictions[index]*100, names[index]); + image label = get_label(alphabet, buff, lh); + draw_label(in, toph, lh, label, rgb); + toph += 2*lh; + free_image(label); + } + + show_image(in, base, 10); + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_classifier(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int ngpus; + int *gpus = read_intlist(gpu_list, &ngpus, gpu_index); + + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int top = find_int_arg(argc, argv, "-t", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + char *layer_s = (argc > 7) ? argv[7]: 0; + int layer = layer_s ? atoi(layer_s) : -1; + if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename, top); + else if(0==strcmp(argv[2], "fout")) file_output_classifier(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s)); + else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer); + else if(0==strcmp(argv[2], "csv")) csv_classifier(data, cfg, weights); + else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_classifier_single(data, cfg, weights); + else if(0==strcmp(argv[2], "validmulti")) validate_classifier_multi(data, cfg, weights); + else if(0==strcmp(argv[2], "valid10")) validate_classifier_10(data, cfg, weights); + else if(0==strcmp(argv[2], "validcrop")) validate_classifier_crop(data, cfg, weights); + else if(0==strcmp(argv[2], "validfull")) validate_classifier_full(data, cfg, weights); +} + + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/coco.c b/workloads/realworld/uvm_prefetch/darknet/examples/coco.c new file mode 100644 index 0000000000000000000000000000000000000000..6a50b89abd2abc7fb217b5118034a746f790f690 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/coco.c @@ -0,0 +1,357 @@ +#include "darknet.h" + +#include + +char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"}; + +int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; + +void train_coco(char *cfgfile, char *weightfile) +{ + //char *train_images = "/home/pjreddie/data/voc/test/train.txt"; + //char *train_images = "/home/pjreddie/data/coco/train.txt"; + char *train_images = "data/coco.trainval.txt"; + //char *train_images = "data/bags.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + layer l = net->layers[net->n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + /* + image im = float_to_image(net->w, net->h, 3, train.X.vals[113]); + image copy = copy_image(im); + draw_coco(copy, train.y.vals[113], 7, "truth"); + cvWaitKey(0); + free_image(copy); + */ + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || (i < 1000 && i%100 == 0)){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +static void print_cocos(FILE *fp, int image_id, detection *dets, int num_boxes, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < num_boxes; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + float bx = xmin; + float by = ymin; + float bw = xmax - xmin; + float bh = ymax - ymin; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); + } + } +} + +int get_coco_image_id(char *filename) +{ + char *p = strrchr(filename, '_'); + return atoi(p+1); +} + +void validate_coco(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/"; + list *plist = get_paths("data/coco_val_5k.list"); + //list *plist = get_paths("/home/pjreddie/data/people-art/test.txt"); + //list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + snprintf(buff, 1024, "%s/coco_results.json", base); + FILE *fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + + int m = plist->size; + int i=0; + int t; + + float thresh = .01; + int nms = 1; + float iou_thresh = .5; + + int nthreads = 8; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.type = IMAGE_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + time_t start = time(0); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + int image_id = get_coco_image_id(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh); + print_cocos(fp, image_id, dets, l.side*l.side*l.n, classes, w, h); + free_detections(dets, nboxes); + free_image(val[t]); + free_image(val_resized[t]); + } + } + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); +} + +void validate_coco_recall(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + int side = l.side; + + int j, k; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + + float thresh = .001; + int nms = 0; + float iou_thresh = .5; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + + int nboxes = 0; + detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, side*side*l.n, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < side*side*l.n; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < side*side*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + free_detections(dets, nboxes); + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free(id); + free_image(orig); + free_image(sized); + } +} + +void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) +{ + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + layer l = net->layers[net->n-1]; + set_batch_network(net, 1); + srand(2222222); + float nms = .4; + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = resize_image(im, net->w, net->h); + float *X = sized.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + + int nboxes = 0; + detection *dets = get_network_boxes(net, 1, 1, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms); + + draw_detections(im, dets, l.side*l.side*l.n, thresh, coco_classes, alphabet, 80); + save_image(im, "prediction"); + show_image(im, "predictions", 0); + free_detections(dets, nboxes); + free_image(im); + free_image(sized); + if (filename) break; + } +} + +void run_coco(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .2); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + int avg = find_int_arg(argc, argv, "-avg", 1); + if(0==strcmp(argv[2], "test")) test_coco(cfg, weights, filename, thresh); + else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights); + else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, 80, frame_skip, prefix, avg, .5, 0,0,0,0); +} diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/darknet.c b/workloads/realworld/uvm_prefetch/darknet/examples/darknet.c new file mode 100644 index 0000000000000000000000000000000000000000..f1c5e43b66391a9674c13a193e329cf3dfc26439 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/darknet.c @@ -0,0 +1,559 @@ +#include "darknet.h" + +// #include "../../../../common/cupti_add.h" +// #include "../../../../common/cpu_timestamps.h" +// #include "cpu_timestamps.h" + +static uint64_t startCPUTime; +static uint64_t endCPUTime; + +void startCPU() +{ + struct timespec tv; + if (clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + startCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); +} + +void endCPU() +{ + struct timespec tv; + if (clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + + endCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); + // endCPUTimestamp1 = std::chrono::system_clock::now(); + printf("CPU_Times,%lu,%lu,%lu\n", startCPUTime, endCPUTime, endCPUTime - startCPUTime); +} + +#ifdef __cplusplus +extern "C" { +#endif +extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top); +extern void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen); +extern void run_yolo(int argc, char **argv); +extern void run_detector(int argc, char **argv); +extern void run_coco(int argc, char **argv); +extern void run_nightmare(int argc, char **argv); +extern void run_classifier(int argc, char **argv); +extern void run_regressor(int argc, char **argv); +extern void run_segmenter(int argc, char **argv); +extern void run_isegmenter(int argc, char **argv); +extern void run_char_rnn(int argc, char **argv); +extern void run_tag(int argc, char **argv); +extern void run_cifar(int argc, char **argv); +extern void run_go(int argc, char **argv); +extern void run_art(int argc, char **argv); +extern void run_super(int argc, char **argv); +extern void run_lsd(int argc, char **argv); +#ifdef __cplusplus +} +#endif + +void average(int argc, char *argv[]) +{ + char *cfgfile = argv[2]; + char *outfile = argv[3]; + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + network *sum = parse_network_cfg(cfgfile); + + char *weightfile = argv[4]; + load_weights(sum, weightfile); + + int i, j; + int n = argc - 5; + for(i = 0; i < n; ++i){ + weightfile = argv[i+5]; + load_weights(net, weightfile); + for(j = 0; j < net->n; ++j){ + layer l = net->layers[j]; + layer out = sum->layers[j]; + if(l.type == CONVOLUTIONAL){ + int num = l.n*l.c*l.size*l.size; + axpy_cpu(l.n, 1, l.biases, 1, out.biases, 1); + axpy_cpu(num, 1, l.weights, 1, out.weights, 1); + if(l.batch_normalize){ + axpy_cpu(l.n, 1, l.scales, 1, out.scales, 1); + axpy_cpu(l.n, 1, l.rolling_mean, 1, out.rolling_mean, 1); + axpy_cpu(l.n, 1, l.rolling_variance, 1, out.rolling_variance, 1); + } + } + if(l.type == CONNECTED){ + axpy_cpu(l.outputs, 1, l.biases, 1, out.biases, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weights, 1, out.weights, 1); + } + } + } + n = n+1; + for(j = 0; j < net->n; ++j){ + layer l = sum->layers[j]; + if(l.type == CONVOLUTIONAL){ + int num = l.n*l.c*l.size*l.size; + scal_cpu(l.n, 1./n, l.biases, 1); + scal_cpu(num, 1./n, l.weights, 1); + if(l.batch_normalize){ + scal_cpu(l.n, 1./n, l.scales, 1); + scal_cpu(l.n, 1./n, l.rolling_mean, 1); + scal_cpu(l.n, 1./n, l.rolling_variance, 1); + } + } + if(l.type == CONNECTED){ + scal_cpu(l.outputs, 1./n, l.biases, 1); + scal_cpu(l.outputs*l.inputs, 1./n, l.weights, 1); + } + } + save_weights(sum, outfile); +} + +long numops(network *net) +{ + int i; + long ops = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + ops += 2l * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w; + } else if(l.type == CONNECTED){ + ops += 2l * l.inputs * l.outputs; + } else if (l.type == RNN){ + ops += 2l * l.input_layer->inputs * l.input_layer->outputs; + ops += 2l * l.self_layer->inputs * l.self_layer->outputs; + ops += 2l * l.output_layer->inputs * l.output_layer->outputs; + } else if (l.type == GRU){ + ops += 2l * l.uz->inputs * l.uz->outputs; + ops += 2l * l.uh->inputs * l.uh->outputs; + ops += 2l * l.ur->inputs * l.ur->outputs; + ops += 2l * l.wz->inputs * l.wz->outputs; + ops += 2l * l.wh->inputs * l.wh->outputs; + ops += 2l * l.wr->inputs * l.wr->outputs; + } else if (l.type == LSTM){ + ops += 2l * l.uf->inputs * l.uf->outputs; + ops += 2l * l.ui->inputs * l.ui->outputs; + ops += 2l * l.ug->inputs * l.ug->outputs; + ops += 2l * l.uo->inputs * l.uo->outputs; + ops += 2l * l.wf->inputs * l.wf->outputs; + ops += 2l * l.wi->inputs * l.wi->outputs; + ops += 2l * l.wg->inputs * l.wg->outputs; + ops += 2l * l.wo->inputs * l.wo->outputs; + } + } + return ops; +} + +void speed(char *cfgfile, int tics) +{ + if (tics == 0) tics = 1000; + network *net = parse_network_cfg(cfgfile); + set_batch_network(net, 1); + int i; + double time=what_time_is_it_now(); + image im = make_image(net->w, net->h, net->c*net->batch); + for(i = 0; i < tics; ++i){ + network_predict(net, im.data); + } + double t = what_time_is_it_now() - time; + long ops = numops(net); + printf("\n%d evals, %f Seconds\n", tics, t); + printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); + printf("FLOPS: %.2f Bn\n", (float)ops/1000000000.*tics/t); + printf("Speed: %f sec/eval\n", t/tics); + printf("Speed: %f Hz\n", tics/t); +} + +void operations(char *cfgfile) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + long ops = numops(net); + printf("Floating Point Operations: %ld\n", ops); + printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); +} + +void oneoff(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + int oldn = net->layers[net->n - 2].n; + int c = net->layers[net->n - 2].c; + scal_cpu(oldn*c, .1, net->layers[net->n - 2].weights, 1); + scal_cpu(oldn, 0, net->layers[net->n - 2].biases, 1); + net->layers[net->n - 2].n = 11921; + net->layers[net->n - 2].biases += 5; + net->layers[net->n - 2].weights += 5*c; + if(weightfile){ + load_weights(net, weightfile); + } + net->layers[net->n - 2].biases -= 5; + net->layers[net->n - 2].weights -= 5*c; + net->layers[net->n - 2].n = oldn; + printf("%d\n", oldn); + layer l = net->layers[net->n - 2]; + copy_cpu(l.n/3, l.biases, 1, l.biases + l.n/3, 1); + copy_cpu(l.n/3, l.biases, 1, l.biases + 2*l.n/3, 1); + copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + l.n/3*l.c, 1); + copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + 2*l.n/3*l.c, 1); + *net->seen = 0; + save_weights(net, outfile); +} + +void oneoff2(char *cfgfile, char *weightfile, char *outfile, int l) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights_upto(net, weightfile, 0, net->n); + load_weights_upto(net, weightfile, l, net->n); + } + *net->seen = 0; + save_weights_upto(net, outfile, net->n); +} + +void partial(char *cfgfile, char *weightfile, char *outfile, int max) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 1); + save_weights_upto(net, outfile, max); +} + +void print_weights(char *cfgfile, char *weightfile, int n) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 1); + layer l = net->layers[n]; + int i, j; + //printf("["); + for(i = 0; i < l.n; ++i){ + //printf("["); + for(j = 0; j < l.size*l.size*l.c; ++j){ + //if(j > 0) printf(","); + printf("%g ", l.weights[i*l.size*l.size*l.c + j]); + } + printf("\n"); + //printf("]%s\n", (i == l.n-1)?"":","); + } + //printf("]"); +} + +void rescale_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + rescale_weights(l, 2, -.5); + break; + } + } + save_weights(net, outfile); +} + +void rgbgr_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + rgbgr_weights(l); + break; + } + } + save_weights(net, outfile); +} + +void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if (l.type == CONVOLUTIONAL && l.batch_normalize) { + denormalize_convolutional_layer(l); + } + if (l.type == CONNECTED && l.batch_normalize) { + denormalize_connected_layer(l); + } + if (l.type == GRU && l.batch_normalize) { + denormalize_connected_layer(*l.input_z_layer); + denormalize_connected_layer(*l.input_r_layer); + denormalize_connected_layer(*l.input_h_layer); + denormalize_connected_layer(*l.state_z_layer); + denormalize_connected_layer(*l.state_r_layer); + denormalize_connected_layer(*l.state_h_layer); + } + } + save_weights(net, outfile); +} + +layer normalize_layer(layer l, int n) +{ + int j; + l.batch_normalize=1; + l.scales = (float *) calloc(n, sizeof(float)); + for(j = 0; j < n; ++j){ + l.scales[j] = 1; + } + l.rolling_mean = (float *) calloc(n, sizeof(float)); + l.rolling_variance = (float *) calloc(n, sizeof(float)); + return l; +} + +void normalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL && !l.batch_normalize){ + net->layers[i] = normalize_layer(l, l.n); + } + if (l.type == CONNECTED && !l.batch_normalize) { + net->layers[i] = normalize_layer(l, l.outputs); + } + if (l.type == GRU && l.batch_normalize) { + *l.input_z_layer = normalize_layer(*l.input_z_layer, l.input_z_layer->outputs); + *l.input_r_layer = normalize_layer(*l.input_r_layer, l.input_r_layer->outputs); + *l.input_h_layer = normalize_layer(*l.input_h_layer, l.input_h_layer->outputs); + *l.state_z_layer = normalize_layer(*l.state_z_layer, l.state_z_layer->outputs); + *l.state_r_layer = normalize_layer(*l.state_r_layer, l.state_r_layer->outputs); + *l.state_h_layer = normalize_layer(*l.state_h_layer, l.state_h_layer->outputs); + net->layers[i].batch_normalize=1; + } + } + save_weights(net, outfile); +} + +void statistics_net(char *cfgfile, char *weightfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if (l.type == CONNECTED && l.batch_normalize) { + printf("Connected Layer %d\n", i); + statistics_connected_layer(l); + } + if (l.type == GRU && l.batch_normalize) { + printf("GRU Layer %d\n", i); + printf("Input Z\n"); + statistics_connected_layer(*l.input_z_layer); + printf("Input R\n"); + statistics_connected_layer(*l.input_r_layer); + printf("Input H\n"); + statistics_connected_layer(*l.input_h_layer); + printf("State Z\n"); + statistics_connected_layer(*l.state_z_layer); + printf("State R\n"); + statistics_connected_layer(*l.state_r_layer); + printf("State H\n"); + statistics_connected_layer(*l.state_h_layer); + } + printf("\n"); + } +} + +void denormalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if ((l.type == DECONVOLUTIONAL || l.type == CONVOLUTIONAL) && l.batch_normalize) { + denormalize_convolutional_layer(l); + net->layers[i].batch_normalize=0; + } + if (l.type == CONNECTED && l.batch_normalize) { + denormalize_connected_layer(l); + net->layers[i].batch_normalize=0; + } + if (l.type == GRU && l.batch_normalize) { + denormalize_connected_layer(*l.input_z_layer); + denormalize_connected_layer(*l.input_r_layer); + denormalize_connected_layer(*l.input_h_layer); + denormalize_connected_layer(*l.state_z_layer); + denormalize_connected_layer(*l.state_r_layer); + denormalize_connected_layer(*l.state_h_layer); + l.input_z_layer->batch_normalize = 0; + l.input_r_layer->batch_normalize = 0; + l.input_h_layer->batch_normalize = 0; + l.state_z_layer->batch_normalize = 0; + l.state_r_layer->batch_normalize = 0; + l.state_h_layer->batch_normalize = 0; + net->layers[i].batch_normalize=0; + } + } + save_weights(net, outfile); +} + +void mkimg(char *cfgfile, char *weightfile, int h, int w, int num, char *prefix) +{ + network *net = load_network(cfgfile, weightfile, 0); + image *ims = get_weights(net->layers[0]); + int n = net->layers[0].n; + int z; + for(z = 0; z < num; ++z){ + image im = make_image(h, w, 3); + fill_image(im, .5); + int i; + for(i = 0; i < 100; ++i){ + image r = copy_image(ims[rand()%n]); + rotate_image_cw(r, rand()%4); + random_distort_image(r, 1, 1.5, 1.5); + int dx = rand()%(w-r.w); + int dy = rand()%(h-r.h); + ghost_image(r, im, dx, dy); + free_image(r); + } + char buff[256]; + sprintf(buff, "%s/gen_%d", prefix, z); + save_image(im, buff); + free_image(im); + } +} + +void visualize(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + visualize_network(net); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsc() +{ + unsigned long a, d; + + __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); + + return (a | (d << 32)); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsp() { + struct timespec tms; + if (clock_gettime(CLOCK_REALTIME, &tms)) { + return -1; + } + unsigned long ns = tms.tv_sec * 1000000000; + ns += tms.tv_nsec; + return ns; +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + //test_resize("data/bad.jpg"); + //test_box(); + //test_convolutional_layer(); + if(argc < 2){ + fprintf(stderr, "usage: %s \n", argv[0]); + return 0; + } + gpu_index = find_int_arg(argc, argv, "-i", GPU_DEVICE); + if(find_arg(argc, argv, "-nogpu")) { + gpu_index = -1; + } + +#ifndef GPU + gpu_index = -1; +#else + if(gpu_index >= 0){ + cuda_set_device(gpu_index); + } + initTrace(); +#endif + + if (0 == strcmp(argv[1], "average")){ + average(argc, argv); + } else if (0 == strcmp(argv[1], "yolo")){ + run_yolo(argc, argv); + } else if (0 == strcmp(argv[1], "super")){ + run_super(argc, argv); + } else if (0 == strcmp(argv[1], "lsd")){ + run_lsd(argc, argv); + } else if (0 == strcmp(argv[1], "detector")){ + run_detector(argc, argv); + } else if (0 == strcmp(argv[1], "detect")){ + float thresh = find_float_arg(argc, argv, "-thresh", .5); + char *filename = (argc > 4) ? argv[4]: 0; + char *outfile = find_char_arg(argc, argv, "-out", 0); + int fullscreen = find_arg(argc, argv, "-fullscreen"); + char *value = getenv("UVMAsyncBench_BASE"); + char buff[256]; + sprintf(buff, "%s/workloads/realworld/standard/darknet/cfg/coco.data", value); + test_detector(buff, argv[2], argv[3], filename, thresh, .5, outfile, fullscreen); + } else if (0 == strcmp(argv[1], "cifar")){ + run_cifar(argc, argv); + } else if (0 == strcmp(argv[1], "go")){ + run_go(argc, argv); + } else if (0 == strcmp(argv[1], "rnn")){ + run_char_rnn(argc, argv); + } else if (0 == strcmp(argv[1], "coco")){ + run_coco(argc, argv); + } else if (0 == strcmp(argv[1], "classify")){ + predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5); + } else if (0 == strcmp(argv[1], "classifier")){ + run_classifier(argc, argv); + } else if (0 == strcmp(argv[1], "regressor")){ + run_regressor(argc, argv); + } else if (0 == strcmp(argv[1], "isegmenter")){ + run_isegmenter(argc, argv); + } else if (0 == strcmp(argv[1], "segmenter")){ + run_segmenter(argc, argv); + } else if (0 == strcmp(argv[1], "art")){ + run_art(argc, argv); + } else if (0 == strcmp(argv[1], "tag")){ + run_tag(argc, argv); + } else if (0 == strcmp(argv[1], "3d")){ + composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0); + } else if (0 == strcmp(argv[1], "test")){ + test_resize(argv[2]); + } else if (0 == strcmp(argv[1], "nightmare")){ + run_nightmare(argc, argv); + } else if (0 == strcmp(argv[1], "rgbgr")){ + rgbgr_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "reset")){ + reset_normalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "denormalize")){ + denormalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "statistics")){ + statistics_net(argv[2], argv[3]); + } else if (0 == strcmp(argv[1], "normalize")){ + normalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "rescale")){ + rescale_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "ops")){ + operations(argv[2]); + } else if (0 == strcmp(argv[1], "speed")){ + speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0); + } else if (0 == strcmp(argv[1], "oneoff")){ + oneoff(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "oneoff2")){ + oneoff2(argv[2], argv[3], argv[4], atoi(argv[5])); + } else if (0 == strcmp(argv[1], "print")){ + print_weights(argv[2], argv[3], atoi(argv[4])); + } else if (0 == strcmp(argv[1], "partial")){ + partial(argv[2], argv[3], argv[4], atoi(argv[5])); + } else if (0 == strcmp(argv[1], "average")){ + average(argc, argv); + } else if (0 == strcmp(argv[1], "visualize")){ + visualize(argv[2], (argc > 3) ? argv[3] : 0); + } else if (0 == strcmp(argv[1], "mkimg")){ + mkimg(argv[2], argv[3], atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), argv[7]); + } else if (0 == strcmp(argv[1], "imtest")){ + test_resize(argv[2]); + } else { + fprintf(stderr, "Not an option: %s\n", argv[1]); + } + finiTrace(); + return 0; +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/detector-scipy-opencv.py b/workloads/realworld/uvm_prefetch/darknet/examples/detector-scipy-opencv.py new file mode 100644 index 0000000000000000000000000000000000000000..3bfc591312ad89ff2b026ffac0daecd461c80447 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/detector-scipy-opencv.py @@ -0,0 +1,56 @@ +# Stupid python path shit. +# Instead just add darknet.py to somewhere in your python path +# OK actually that might not be a great idea, idk, work in progress +# Use at your own risk. or don't, i don't care + +from scipy.misc import imread +import cv2 + +def array_to_image(arr): + arr = arr.transpose(2,0,1) + c = arr.shape[0] + h = arr.shape[1] + w = arr.shape[2] + arr = (arr/255.0).flatten() + data = dn.c_array(dn.c_float, arr) + im = dn.IMAGE(w,h,c,data) + return im + +def detect2(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): + boxes = dn.make_boxes(net) + probs = dn.make_probs(net) + num = dn.num_boxes(net) + dn.network_detect(net, image, thresh, hier_thresh, nms, boxes, probs) + res = [] + for j in range(num): + for i in range(meta.classes): + if probs[j][i] > 0: + res.append((meta.names[i], probs[j][i], (boxes[j].x, boxes[j].y, boxes[j].w, boxes[j].h))) + res = sorted(res, key=lambda x: -x[1]) + dn.free_ptrs(dn.cast(probs, dn.POINTER(dn.c_void_p)), num) + return res + +import sys, os +sys.path.append(os.path.join(os.getcwd(),'python/')) + +import darknet as dn + +# Darknet +net = dn.load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0) +meta = dn.load_meta("cfg/coco.data") +r = dn.detect(net, meta, "data/dog.jpg") +print r + +# scipy +arr= imread('data/dog.jpg') +im = array_to_image(arr) +r = detect2(net, meta, im) +print r + +# OpenCV +arr = cv2.imread('data/dog.jpg') +im = array_to_image(arr) +dn.rgbgr_image(im) +r = detect2(net, meta, im) +print r + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/detector.c b/workloads/realworld/uvm_prefetch/darknet/examples/detector.c new file mode 100644 index 0000000000000000000000000000000000000000..6ff1fcdff3d3d81abb458e001091cf2757b8d837 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/detector.c @@ -0,0 +1,931 @@ +#include "darknet.h" + +static int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; + + +void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + list *options = read_data_cfg(datacfg); + // Ruihao + char *train_images_cfg = option_find_str(options, "train", "data/train.list"); + char *backup_directory_cfg = option_find_str(options, "backup", "/backup/"); + + char *env = getenv("UVMAsyncBench_BASE"); + char train_images[256]; + char backup_directory[256]; + sprintf(train_images, "%s/%s", env, train_images_cfg); + sprintf(backup_directory, "%s/%s", env, backup_directory_cfg); + // Ruihao + + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network **nets = calloc(ngpus, sizeof(network)); + + srand(time(0)); + int seed = rand(); + int i; + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + data train, buffer; + + layer l = net->layers[net->n - 1]; + + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = get_base_args(net); + args.coords = l.coords; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = l.max_boxes; + args.d = &buffer; + args.type = DETECTION_DATA; + //args.type = INSTANCE_DATA; + args.threads = 64; + + pthread_t load_thread = load_data(args); + double time; + int count = 0; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + if(l.random && count++%10 == 0){ + printf("Resizing\n"); + int dim = (rand() % 10 + 10) * 32; + if (get_current_batch(net)+200 > net->max_batches) dim = 608; + //int dim = (rand() % 4 + 16) * 32; + printf("%d\n", dim); + args.w = dim; + args.h = dim; + + pthread_join(load_thread, 0); + train = buffer; + free_data(train); + load_thread = load_data(args); + + #pragma omp parallel for + for(i = 0; i < ngpus; ++i){ + resize_network(nets[i], dim, dim); + } + net = nets[0]; + } + time=what_time_is_it_now(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + /* + int k; + for(k = 0; k < l.max_boxes; ++k){ + box b = float_to_box(train.y.vals[10] + 1 + k*5); + if(!b.x) break; + printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h); + } + */ + /* + int zz; + for(zz = 0; zz < train.X.cols; ++zz){ + image im = float_to_image(net->w, net->h, 3, train.X.vals[zz]); + int k; + for(k = 0; k < l.max_boxes; ++k){ + box b = float_to_box(train.y.vals[zz] + k*5, 1); + printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + draw_bbox(im, b, 1, 1,0,0); + } + show_image(im, "truth11"); + cvWaitKey(0); + save_image(im, "truth11"); + } + */ + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + + time=what_time_is_it_now(); + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + i = get_current_batch(net); + printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, i*imgs); + if(i%100==0){ +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + if(i%10000==0 || (i < 1000 && i%100 == 0)){ +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + + +static int get_coco_image_id(char *filename) +{ + char *p = strrchr(filename, '/'); + char *c = strrchr(filename, '_'); + if(c) p = c; + return atoi(p+1); +} + +static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_boxes, int classes, int w, int h) +{ + int i, j; + int image_id = get_coco_image_id(image_path); + for(i = 0; i < num_boxes; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + float bx = xmin; + float by = ymin; + float bw = xmax - xmin; + float bh = ymax - ymin; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); + } + } +} + +void print_detector_detections(FILE **fps, char *id, detection *dets, int total, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2. + 1; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2. + 1; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2. + 1; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2. + 1; + + if (xmin < 1) xmin = 1; + if (ymin < 1) ymin = 1; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], + xmin, ymin, xmax, ymax); + } + } +} + +void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + int class = j; + if (dets[i].prob[class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j+1, dets[i].prob[class], + xmin, ymin, xmax, ymax); + } + } +} + +void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char *outfile) +{ + int j; + list *options = read_data_cfg(datacfg); + char *valid_images = option_find_str(options, "valid", "data/train.list"); + char *name_list = option_find_str(options, "names", "data/names.list"); + char *prefix = option_find_str(options, "results", "results"); + char **names = get_labels(name_list); + char *mapf = option_find_str(options, "map", 0); + int *map = 0; + if (mapf) map = read_map(mapf); + + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 2); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths(valid_images); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + char *type = option_find_str(options, "eval", "voc"); + FILE *fp = 0; + FILE **fps = 0; + int coco = 0; + int imagenet = 0; + if(0==strcmp(type, "coco")){ + if(!outfile) outfile = "coco_results"; + snprintf(buff, 1024, "%s/%s.json", prefix, outfile); + fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + coco = 1; + } else if(0==strcmp(type, "imagenet")){ + if(!outfile) outfile = "imagenet-detection"; + snprintf(buff, 1024, "%s/%s.txt", prefix, outfile); + fp = fopen(buff, "w"); + imagenet = 1; + classes = 200; + } else { + if(!outfile) outfile = "comp4_det_test_"; + fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); + fps[j] = fopen(buff, "w"); + } + } + + int m = plist->size; + int i=0; + int t; + + float thresh = .005; + float nms = .45; + + int nthreads = 4; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + image input = make_image(net->w, net->h, net->c*2); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + //args.type = IMAGE_DATA; + args.type = LETTERBOX_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + double start = what_time_is_it_now(); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data, 1); + flip_image(val_resized[t]); + copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data + net->w*net->h*net->c, 1); + + network_predict(net, input.data); + int w = val[t].w; + int h = val[t].h; + int num = 0; + detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &num); + if (nms) do_nms_sort(dets, num, classes, nms); + if (coco){ + print_cocos(fp, path, dets, num, classes, w, h); + } else if (imagenet){ + print_imagenet_detections(fp, i+t-nthreads+1, dets, num, classes, w, h); + } else { + print_detector_detections(fps, id, dets, num, classes, w, h); + } + free_detections(dets, num); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + for(j = 0; j < classes; ++j){ + if(fps) fclose(fps[j]); + } + if(coco){ + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start); +} + + +void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile) +{ + int j; + list *options = read_data_cfg(datacfg); + char *valid_images = option_find_str(options, "valid", "data/train.list"); + char *name_list = option_find_str(options, "names", "data/names.list"); + char *prefix = option_find_str(options, "results", "results"); + char **names = get_labels(name_list); + char *mapf = option_find_str(options, "map", 0); + int *map = 0; + if (mapf) map = read_map(mapf); + + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths(valid_images); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + char *type = option_find_str(options, "eval", "voc"); + FILE *fp = 0; + FILE **fps = 0; + int coco = 0; + int imagenet = 0; + if(0==strcmp(type, "coco")){ + if(!outfile) outfile = "coco_results"; + snprintf(buff, 1024, "%s/%s.json", prefix, outfile); + fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + coco = 1; + } else if(0==strcmp(type, "imagenet")){ + if(!outfile) outfile = "imagenet-detection"; + snprintf(buff, 1024, "%s/%s.txt", prefix, outfile); + fp = fopen(buff, "w"); + imagenet = 1; + classes = 200; + } else { + if(!outfile) outfile = "comp4_det_test_"; + fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); + fps[j] = fopen(buff, "w"); + } + } + + + int m = plist->size; + int i=0; + int t; + + float thresh = .005; + float nms = .45; + + int nthreads = 4; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + //args.type = IMAGE_DATA; + args.type = LETTERBOX_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + double start = what_time_is_it_now(); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &nboxes); + if (nms) do_nms_sort(dets, nboxes, classes, nms); + if (coco){ + print_cocos(fp, path, dets, nboxes, classes, w, h); + } else if (imagenet){ + print_imagenet_detections(fp, i+t-nthreads+1, dets, nboxes, classes, w, h); + } else { + print_detector_detections(fps, id, dets, nboxes, classes, w, h); + } + free_detections(dets, nboxes); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + for(j = 0; j < classes; ++j){ + if(fps) fclose(fps[j]); + } + if(coco){ + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start); +} + +void validate_detector_recall(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths("data/coco_val_5k.list"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + + int j, k; + + int m = plist->size; + int i=0; + + float thresh = .001; + float iou_thresh = .5; + float nms = .4; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + int nboxes = 0; + detection *dets = get_network_boxes(net, sized.w, sized.h, thresh, .5, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, nboxes, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < nboxes; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < l.w*l.h*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free(id); + free_image(orig); + free_image(sized); + } +} + + +void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen) +{ + list *options = read_data_cfg(datacfg); + // Ruihao + char *name_list_cfg = option_find_str(options, "names", "data/names.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char name_list[256]; + sprintf(name_list, "%s/%s", env, name_list_cfg); + // Ruihao + char **names = get_labels(name_list); + + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + double time; + char buff[256]; + char *input = buff; + float nms=.45; + while(1){ + printf("fine name is %s\n", filename); + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = letterbox_image(im, net->w, net->h); + //image sized = resize_image(im, net->w, net->h); + //image sized2 = resize_max(im, net->w); + //image sized = crop_image(sized2, -((net->w - sized2.w)/2), -((net->h - sized2.h)/2), net->w, net->h); + //resize_network(net, sized.w, sized.h); + layer l = net->layers[net->n-1]; + + + float *X = sized.data; + time=what_time_is_it_now(); + startCPU(); + network_predict(net, X); + endCPU(); + printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time); + int nboxes = 0; + detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes); + //printf("%d\n", nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes); + free_detections(dets, nboxes); + if(outfile){ + save_image(im, outfile); + } + else{ + save_image(im, "predictions"); +#ifdef OPENCV + make_window("predictions", 512, 512, 0); + show_image(im, "predictions", 0); +#endif + } + + free_image(im); + free_image(sized); + if (filename) break; + } +} + +/* +void censor_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + CvCapture * cap; + + int w = 1280; + int h = 720; + + if(filename){ + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + } + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + cvNamedWindow(base, CV_WINDOW_NORMAL); + cvResizeWindow(base, 512, 512); + float fps = 0; + int i; + float nms = .45; + + while(1){ + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + layer l = net->layers[net->n-1]; + + float *X = in_s.data; + network_predict(net, X); + int nboxes = 0; + detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 0, &nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + + for(i = 0; i < nboxes; ++i){ + if(dets[i].prob[class] > thresh){ + box b = dets[i].bbox; + int left = b.x-b.w/2.; + int top = b.y-b.h/2.; + censor_image(in, left, top, b.w, b.h); + } + } + show_image(in, base); + cvWaitKey(10); + free_detections(dets, nboxes); + + + free_image(in_s); + free_image(in); + + + float curr = 0; + fps = .9*fps + .1*curr; + for(i = 0; i < skip; ++i){ + image in = get_image_from_stream(cap); + free_image(in); + } + } + #endif +} + +void extract_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + CvCapture * cap; + + int w = 1280; + int h = 720; + + if(filename){ + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + } + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + cvNamedWindow(base, CV_WINDOW_NORMAL); + cvResizeWindow(base, 512, 512); + float fps = 0; + int i; + int count = 0; + float nms = .45; + + while(1){ + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + layer l = net->layers[net->n-1]; + + show_image(in, base); + + int nboxes = 0; + float *X = in_s.data; + network_predict(net, X); + detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 1, &nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + + for(i = 0; i < nboxes; ++i){ + if(dets[i].prob[class] > thresh){ + box b = dets[i].bbox; + int size = b.w*in.w > b.h*in.h ? b.w*in.w : b.h*in.h; + int dx = b.x*in.w-size/2.; + int dy = b.y*in.h-size/2.; + image bim = crop_image(in, dx, dy, size, size); + char buff[2048]; + sprintf(buff, "results/extract/%07d", count); + ++count; + save_image(bim, buff); + free_image(bim); + } + } + free_detections(dets, nboxes); + + + free_image(in_s); + free_image(in); + + + float curr = 0; + fps = .9*fps + .1*curr; + for(i = 0; i < skip; ++i){ + image in = get_image_from_stream(cap); + free_image(in); + } + } + #endif +} +*/ + +/* +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets) +{ + network_predict_image(net, im); + layer l = net->layers[net->n-1]; + int nboxes = num_boxes(net); + fill_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 0, dets); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); +} +*/ + +void infer_detector(char *datacfg, char *cfgfile, char *weightfile) +{ + int curr = 0; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *test_list_cfg = option_find_str(options, "valid", "data/valid.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char test_list[256]; + sprintf(test_list, "%s/%s", env, test_list_cfg); + // Ruihao + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + m = net->max_batches * net->batch; + free_list(plist); + + clock_t time; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = classes; + args.n = net->batch; + args.m = 0; + args.labels = 0; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(curr = net->batch; curr <= m; curr += net->batch){ + // while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + if(curr < m){ + args.paths = paths + curr; + if (curr + net->batch > m) args.n = m - curr; + load_thread = load_data_in_thread(args); + } + fprintf(stderr, "Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + + fprintf(stderr, "%lf seconds, %d images, %d total\n", sec(clock()-time), val.X.rows, curr); + free_matrix(pred); + free_data(val); + } +} + +void run_detector(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .5); + float hier_thresh = find_float_arg(argc, argv, "-hier", .5); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + int avg = find_int_arg(argc, argv, "-avg", 3); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + char *outfile = find_char_arg(argc, argv, "-out", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int clear = find_arg(argc, argv, "-clear"); + int fullscreen = find_arg(argc, argv, "-fullscreen"); + int width = find_int_arg(argc, argv, "-w", 0); + int height = find_int_arg(argc, argv, "-h", 0); + int fps = find_int_arg(argc, argv, "-fps", 0); + //int class = find_int_arg(argc, argv, "-class", 0); + + char *datacfg = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen); + else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile); + // Ruihao + else if(0==strcmp(argv[2], "infer")) infer_detector(datacfg, cfg, weights); + // Ruihao + else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile); + else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) { + list *options = read_data_cfg(datacfg); + int classes = option_find_int(options, "classes", 20); + char *name_list = option_find_str(options, "names", "data/names.list"); + char **names = get_labels(name_list); + demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen); + } + //else if(0==strcmp(argv[2], "extract")) extract_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); + //else if(0==strcmp(argv[2], "censor")) censor_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); +} diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/detector.py b/workloads/realworld/uvm_prefetch/darknet/examples/detector.py new file mode 100644 index 0000000000000000000000000000000000000000..40bb365e68211c513db9d63847ac95070f5eab98 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/detector.py @@ -0,0 +1,27 @@ +# Stupid python path shit. +# Instead just add darknet.py to somewhere in your python path +# OK actually that might not be a great idea, idk, work in progress +# Use at your own risk. or don't, i don't care + +import sys, os +sys.path.append(os.path.join(os.getcwd(),'python/')) + +import darknet as dn +import pdb + +dn.set_gpu(0) +net = dn.load_net("cfg/yolo-thor.cfg", "/home/pjreddie/backup/yolo-thor_final.weights", 0) +meta = dn.load_meta("cfg/thor.data") +r = dn.detect(net, meta, "data/bedroom.jpg") +print r + +# And then down here you could detect a lot more images like: +r = dn.detect(net, meta, "data/eagle.jpg") +print r +r = dn.detect(net, meta, "data/giraffe.jpg") +print r +r = dn.detect(net, meta, "data/horses.jpg") +print r +r = dn.detect(net, meta, "data/person.jpg") +print r + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/dice.c b/workloads/realworld/uvm_prefetch/darknet/examples/dice.c new file mode 100644 index 0000000000000000000000000000000000000000..f56d76c0bb66c7f630ba1c4d1dc9195398b87cfb --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/dice.c @@ -0,0 +1,116 @@ +#include "darknet.h" + +char *dice_labels[] = {"face1","face2","face3","face4","face5","face6"}; + +void train_dice(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + int i = *net.seen/imgs; + char **labels = dice_labels; + list *plist = get_paths("data/dice/dice.train.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + while(1){ + ++i; + time=clock(); + data train = load_data_old(paths, imgs, plist->size, labels, 6, net.w, net.h); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); + free_data(train); + if((i % 100) == 0) net.learning_rate *= .1; + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, i); + save_weights(net, buff); + } + } +} + +void validate_dice(char *filename, char *weightfile) +{ + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + + char **labels = dice_labels; + list *plist = get_paths("data/dice/dice.val.list"); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + data val = load_data_old(paths, m, 0, labels, 6, net.w, net.h); + float *acc = network_accuracies(net, val, 2); + printf("Validation Accuracy: %f, %d images\n", acc[0], m); + free_data(val); +} + +void test_dice(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + int i = 0; + char **names = dice_labels; + char buff[256]; + char *input = buff; + int indexes[6]; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, net.w, net.h); + float *X = im.data; + float *predictions = network_predict(net, X); + top_predictions(net, 6, indexes); + for(i = 0; i < 6; ++i){ + int index = indexes[i]; + printf("%s: %f\n", names[index], predictions[index]); + } + free_image(im); + if (filename) break; + } +} + +void run_dice(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "test")) test_dice(cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_dice(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_dice(cfg, weights); +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/go.c b/workloads/realworld/uvm_prefetch/darknet/examples/go.c new file mode 100644 index 0000000000000000000000000000000000000000..688579dcb3a3e35e9a79b8fb8aa684f28f44290d --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/go.c @@ -0,0 +1,1370 @@ +#include "darknet.h" + +#include +#include +#include + +int inverted = 1; +int noi = 1; +static const int nind = 10; +int legal_go(float *b, float *ko, int p, int r, int c); +int check_ko(float *x, float *ko); + +typedef struct { + char **data; + int n; +} moves; + +char *fgetgo(FILE *fp) +{ + if(feof(fp)) return 0; + size_t size = 96; + char *line = malloc(size*sizeof(char)); + if(size != fread(line, sizeof(char), size, fp)){ + free(line); + return 0; + } + + return line; +} + +moves load_go_moves(char *filename) +{ + moves m; + m.n = 128; + m.data = calloc(128, sizeof(char*)); + FILE *fp = fopen(filename, "rb"); + int count = 0; + char *line = 0; + while ((line = fgetgo(fp))) { + if (count >= m.n) { + m.n *= 2; + m.data = realloc(m.data, m.n*sizeof(char*)); + } + m.data[count] = line; + ++count; + } + printf("%d\n", count); + m.n = count; + m.data = realloc(m.data, count*sizeof(char*)); + return m; +} + +void string_to_board(char *s, float *board) +{ + int i, j; + memset(board, 0, 2*19*19*sizeof(float)); + int count = 0; + for(i = 0; i < 91; ++i){ + char c = s[i]; + for(j = 0; j < 4; ++j){ + int me = (c >> (2*j)) & 1; + int you = (c >> (2*j + 1)) & 1; + if (me) board[count] = 1; + else if (you) board[count + 19*19] = 1; + ++count; + if(count >= 19*19) break; + } + } +} + +void board_to_string(char *s, float *board) +{ + int i, j; + memset(s, 0, (19*19/4+1)*sizeof(char)); + int count = 0; + for(i = 0; i < 91; ++i){ + for(j = 0; j < 4; ++j){ + int me = (board[count] == 1); + int you = (board[count + 19*19] == 1); + if (me) s[i] = s[i] | (1<<(2*j)); + if (you) s[i] = s[i] | (1<<(2*j + 1)); + ++count; + if(count >= 19*19) break; + } + } +} + +static int occupied(float *b, int i) +{ + if (b[i]) return 1; + if (b[i+19*19]) return -1; + return 0; +} + +data random_go_moves(moves m, int n) +{ + data d = {0}; + d.X = make_matrix(n, 19*19*3); + d.y = make_matrix(n, 19*19+2); + int i, j; + for(i = 0; i < n; ++i){ + float *board = d.X.vals[i]; + float *label = d.y.vals[i]; + char *b = m.data[rand()%m.n]; + int player = b[0] - '0'; + int result = b[1] - '0'; + int row = b[2]; + int col = b[3]; + string_to_board(b+4, board); + if(player > 0) for(j = 0; j < 19*19; ++j) board[19*19*2 + j] = 1; + label[19*19+1] = (player==result); + if(row >= 19 || col >= 19){ + label[19*19] = 1; + } else { + label[col + 19*row] = 1; + if(occupied(board, col + 19*row)) printf("hey\n"); + } + + int flip = rand()%2; + int rotate = rand()%4; + image in = float_to_image(19, 19, 3, board); + image out = float_to_image(19, 19, 1, label); + if(flip){ + flip_image(in); + flip_image(out); + } + rotate_image_cw(in, rotate); + rotate_image_cw(out, rotate); + } + return d; +} + + +void train_go(char *cfgfile, char *weightfile, char *filename, int *gpus, int ngpus, int clear) +{ + int i; + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + network *net = nets[0]; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + + char buff[256]; + moves m = load_go_moves(filename); + //moves m = load_go_moves("games.txt"); + + int N = m.n; + printf("Moves: %d\n", N); + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time=what_time_is_it_now(); + + data train = random_go_moves(m, net->batch*net->subdivisions*ngpus); + printf("Loaded: %lf seconds\n", what_time_is_it_now() - time); + time=what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 10); + } +#else + loss = train_network(net, train); +#endif + free_data(train); + + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory,base, epoch); + save_weights(net, buff); + + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + if(get_current_batch(net)%10000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_%ld.backup",backup_directory,base,get_current_batch(net)); + save_weights(net, buff); + } + } + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free(base); +} + +static void propagate_liberty(float *board, int *lib, int *visited, int row, int col, int side) +{ + if (row < 0 || row > 18 || col < 0 || col > 18) return; + int index = row*19 + col; + if (occupied(board,index) != side) return; + if (visited[index]) return; + visited[index] = 1; + lib[index] += 1; + propagate_liberty(board, lib, visited, row+1, col, side); + propagate_liberty(board, lib, visited, row-1, col, side); + propagate_liberty(board, lib, visited, row, col+1, side); + propagate_liberty(board, lib, visited, row, col-1, side); +} + + +static int *calculate_liberties(float *board) +{ + int *lib = calloc(19*19, sizeof(int)); + int visited[19*19]; + int i, j; + for(j = 0; j < 19; ++j){ + for(i = 0; i < 19; ++i){ + memset(visited, 0, 19*19*sizeof(int)); + int index = j*19 + i; + if(!occupied(board,index)){ + if ((i > 0) && occupied(board,index - 1)) propagate_liberty(board, lib, visited, j, i-1, occupied(board,index-1)); + if ((i < 18) && occupied(board,index + 1)) propagate_liberty(board, lib, visited, j, i+1, occupied(board,index+1)); + if ((j > 0) && occupied(board,index - 19)) propagate_liberty(board, lib, visited, j-1, i, occupied(board,index-19)); + if ((j < 18) && occupied(board,index + 19)) propagate_liberty(board, lib, visited, j+1, i, occupied(board,index+19)); + } + } + } + return lib; +} + +void print_board(FILE *stream, float *board, int player, int *indexes) +{ + int i,j,n; + fprintf(stream, " "); + for(i = 0; i < 19; ++i){ + fprintf(stream, "%c ", 'A' + i + 1*(i > 7 && noi)); + } + fprintf(stream, "\n"); + for(j = 0; j < 19; ++j){ + fprintf(stream, "%2d", (inverted) ? 19-j : j+1); + for(i = 0; i < 19; ++i){ + int index = j*19 + i; + if(indexes){ + int found = 0; + for(n = 0; n < nind; ++n){ + if(index == indexes[n]){ + found = 1; + /* + if(n == 0) fprintf(stream, "\uff11"); + else if(n == 1) fprintf(stream, "\uff12"); + else if(n == 2) fprintf(stream, "\uff13"); + else if(n == 3) fprintf(stream, "\uff14"); + else if(n == 4) fprintf(stream, "\uff15"); + */ + fprintf(stream, " %d", n+1); + } + } + if(found) continue; + } + //if(board[index]*-swap > 0) fprintf(stream, "\u25C9 "); + //else if(board[index]*-swap < 0) fprintf(stream, "\u25EF "); + if (occupied(board, index) == player) fprintf(stream, " X"); + else if (occupied(board, index) ==-player) fprintf(stream, " O"); + else fprintf(stream, " ."); + } + fprintf(stream, "\n"); + } +} + +void flip_board(float *board) +{ + int i; + for(i = 0; i < 19*19; ++i){ + float swap = board[i]; + board[i] = board[i+19*19]; + board[i+19*19] = swap; + board[i+19*19*2] = 1-board[i+19*19*2]; + } +} + +float predict_move2(network *net, float *board, float *move, int multi) +{ + float *output = network_predict(net, board); + copy_cpu(19*19+1, output, 1, move, 1); + float result = output[19*19 + 1]; + int i; + if(multi){ + image bim = float_to_image(19, 19, 3, board); + for(i = 1; i < 8; ++i){ + rotate_image_cw(bim, i); + if(i >= 4) flip_image(bim); + + float *output = network_predict(net, board); + image oim = float_to_image(19, 19, 1, output); + result += output[19*19 + 1]; + + if(i >= 4) flip_image(oim); + rotate_image_cw(oim, -i); + + axpy_cpu(19*19+1, 1, output, 1, move, 1); + + if(i >= 4) flip_image(bim); + rotate_image_cw(bim, -i); + } + result = result/8; + scal_cpu(19*19+1, 1./8., move, 1); + } + for(i = 0; i < 19*19; ++i){ + if(board[i] || board[i+19*19]) move[i] = 0; + } + return result; +} + +static void remove_connected(float *b, int *lib, int p, int r, int c) +{ + if (r < 0 || r >= 19 || c < 0 || c >= 19) return; + if (occupied(b, r*19 + c) != p) return; + if (lib[r*19 + c] != 1) return; + b[r*19 + c] = 0; + b[19*19 + r*19 + c] = 0; + remove_connected(b, lib, p, r+1, c); + remove_connected(b, lib, p, r-1, c); + remove_connected(b, lib, p, r, c+1); + remove_connected(b, lib, p, r, c-1); +} + + +void move_go(float *b, int p, int r, int c) +{ + int *l = calculate_liberties(b); + if(p > 0) b[r*19 + c] = 1; + else b[19*19 + r*19 + c] = 1; + remove_connected(b, l, -p, r+1, c); + remove_connected(b, l, -p, r-1, c); + remove_connected(b, l, -p, r, c+1); + remove_connected(b, l, -p, r, c-1); + free(l); +} + +int compare_board(float *a, float *b) +{ + if(memcmp(a, b, 19*19*3*sizeof(float)) == 0) return 1; + return 0; +} + +typedef struct mcts_tree{ + float *board; + struct mcts_tree **children; + float *prior; + int *visit_count; + float *value; + float *mean; + float *prob; + int total_count; + float result; + int done; + int pass; +} mcts_tree; + +void free_mcts(mcts_tree *root) +{ + if(!root) return; + int i; + free(root->board); + for(i = 0; i < 19*19+1; ++i){ + if(root->children[i]) free_mcts(root->children[i]); + } + free(root->children); + free(root->prior); + free(root->visit_count); + free(root->value); + free(root->mean); + free(root->prob); + free(root); +} + +float *network_predict_rotations(network *net, float *next) +{ + int n = net->batch; + float *in = calloc(19*19*3*n, sizeof(float)); + image im = float_to_image(19, 19, 3, next); + int i,j; + int *inds = random_index_order(0, 8); + for(j = 0; j < n; ++j){ + i = inds[j]; + rotate_image_cw(im, i); + if(i >= 4) flip_image(im); + memcpy(in + 19*19*3*j, im.data, 19*19*3*sizeof(float)); + if(i >= 4) flip_image(im); + rotate_image_cw(im, -i); + } + float *pred = network_predict(net, in); + for(j = 0; j < n; ++j){ + i = inds[j]; + image im = float_to_image(19, 19, 1, pred + j*(19*19 + 2)); + if(i >= 4) flip_image(im); + rotate_image_cw(im, -i); + if(j > 0){ + axpy_cpu(19*19+2, 1, im.data, 1, pred, 1); + } + } + free(in); + free(inds); + scal_cpu(19*19+2, 1./n, pred, 1); + return pred; +} + +mcts_tree *expand(float *next, float *ko, network *net) +{ + mcts_tree *root = calloc(1, sizeof(mcts_tree)); + root->board = next; + root->children = calloc(19*19+1, sizeof(mcts_tree*)); + root->prior = calloc(19*19 + 1, sizeof(float)); + root->prob = calloc(19*19 + 1, sizeof(float)); + root->mean = calloc(19*19 + 1, sizeof(float)); + root->value = calloc(19*19 + 1, sizeof(float)); + root->visit_count = calloc(19*19 + 1, sizeof(int)); + root->total_count = 1; + int i; + float *pred = network_predict_rotations(net, next); + copy_cpu(19*19+1, pred, 1, root->prior, 1); + float val = 2*pred[19*19 + 1] - 1; + root->result = val; + for(i = 0; i < 19*19+1; ++i) { + root->visit_count[i] = 0; + root->value[i] = 0; + root->mean[i] = val; + if(i < 19*19 && occupied(next, i)){ + root->value[i] = -1; + root->mean[i] = -1; + root->prior[i] = 0; + } + } + //print_board(stderr, next, flip?-1:1, 0); + return root; +} + +float *copy_board(float *board) +{ + float *next = calloc(19*19*3, sizeof(float)); + copy_cpu(19*19*3, board, 1, next, 1); + return next; +} + +float select_mcts(mcts_tree *root, network *net, float *prev, float cpuct) +{ + if(root->done) return -root->result; + int i; + float max = -1000; + int max_i = 0; + for(i = 0; i < 19*19+1; ++i){ + root->prob[i] = root->mean[i] + cpuct*root->prior[i] * sqrt(root->total_count) / (1. + root->visit_count[i]); + if(root->prob[i] > max){ + max = root->prob[i]; + max_i = i; + } + } + float val; + i = max_i; + root->visit_count[i]++; + root->total_count++; + if (root->children[i]) { + val = select_mcts(root->children[i], net, root->board, cpuct); + } else { + if(max_i < 19*19 && !legal_go(root->board, prev, 1, max_i/19, max_i%19)) { + root->mean[i] = -1; + root->value[i] = -1; + root->prior[i] = 0; + --root->total_count; + return select_mcts(root, net, prev, cpuct); + //printf("Detected ko\n"); + //getchar(); + } else { + float *next = copy_board(root->board); + if (max_i < 19*19) { + move_go(next, 1, max_i / 19, max_i % 19); + } + flip_board(next); + root->children[i] = expand(next, root->board, net); + val = -root->children[i]->result; + if(max_i == 19*19){ + root->children[i]->pass = 1; + if (root->pass){ + root->children[i]->done = 1; + } + } + } + } + root->value[i] += val; + root->mean[i] = root->value[i]/root->visit_count[i]; + return -val; +} + +mcts_tree *run_mcts(mcts_tree *tree, network *net, float *board, float *ko, int player, int n, float cpuct, float secs) +{ + int i; + double t = what_time_is_it_now(); + if(player < 0) flip_board(board); + if(!tree) tree = expand(copy_board(board), ko, net); + assert(compare_board(tree->board, board)); + for(i = 0; i < n; ++i){ + if (secs > 0 && (what_time_is_it_now() - t) > secs) break; + int max_i = max_int_index(tree->visit_count, 19*19+1); + if (tree->visit_count[max_i] >= n) break; + select_mcts(tree, net, ko, cpuct); + } + if(player < 0) flip_board(board); + //fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + return tree; +} + +mcts_tree *move_mcts(mcts_tree *tree, int index) +{ + if(index < 0 || index > 19*19 || !tree || !tree->children[index]) { + free_mcts(tree); + tree = 0; + } else { + mcts_tree *swap = tree; + tree = tree->children[index]; + swap->children[index] = 0; + free_mcts(swap); + } + return tree; +} + +typedef struct { + float value; + float mcts; + int row; + int col; +} move; + +move pick_move(mcts_tree *tree, float temp, int player) +{ + int i; + float probs[19*19+1] = {0}; + move m = {0}; + double sum = 0; + /* + for(i = 0; i < 19*19+1; ++i){ + probs[i] = tree->visit_count[i]; + } + */ + //softmax(probs, 19*19+1, temp, 1, probs); + for(i = 0; i < 19*19+1; ++i){ + sum += pow(tree->visit_count[i], 1./temp); + } + for(i = 0; i < 19*19+1; ++i){ + probs[i] = pow(tree->visit_count[i], 1./temp) / sum; + } + + int index = sample_array(probs, 19*19+1); + m.row = index / 19; + m.col = index % 19; + m.value = (tree->result+1.)/2.; + m.mcts = (tree->mean[index]+1.)/2.; + + int indexes[nind]; + top_k(probs, 19*19+1, nind, indexes); + print_board(stderr, tree->board, player, indexes); + + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", index/19, index%19, tree->result, tree->prior[index], probs[index], tree->mean[index], (tree->children[index])?tree->children[index]->result:0, tree->visit_count[index]); + int ind = max_index(probs, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, tree->result, tree->prior[ind], probs[ind], tree->mean[ind], (tree->children[ind])?tree->children[ind]->result:0, tree->visit_count[ind]); + ind = max_index(tree->prior, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, tree->result, tree->prior[ind], probs[ind], tree->mean[ind], (tree->children[ind])?tree->children[ind]->result:0, tree->visit_count[ind]); + return m; +} + +/* + float predict_move(network *net, float *board, float *move, int multi, float *ko, float temp) + { + + int i; + + int max_v = 0; + int max_i = 0; + for(i = 0; i < 19*19+1; ++i){ + if(root->visit_count[i] > max_v){ + max_v = root->visit_count[i]; + max_i = i; + } + } + fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + int ind = max_index(root->mean, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", max_i/19, max_i%19, root->result, root->prior[max_i], root->prob[max_i], root->mean[max_i], (root->children[max_i])?root->children[max_i]->result:0, root->visit_count[max_i]); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, root->result, root->prior[ind], root->prob[ind], root->mean[ind], (root->children[ind])?root->children[ind]->result:0, root->visit_count[ind]); + ind = max_index(root->prior, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, root->result, root->prior[ind], root->prob[ind], root->mean[ind], (root->children[ind])?root->children[ind]->result:0, root->visit_count[ind]); + if(root->result < -.9 && root->mean[max_i] < -.9) return -1000.f; + + float val = root->result; + free_mcts(root); + return val; + } + */ + +static int makes_safe_go(float *b, int *lib, int p, int r, int c){ + if (r < 0 || r >= 19 || c < 0 || c >= 19) return 0; + if (occupied(b,r*19 + c) == -p){ + if (lib[r*19 + c] > 1) return 0; + else return 1; + } + if (!occupied(b,r*19 + c)) return 1; + if (lib[r*19 + c] > 1) return 1; + return 0; +} + +int suicide_go(float *b, int p, int r, int c) +{ + int *l = calculate_liberties(b); + int safe = 0; + safe = safe || makes_safe_go(b, l, p, r+1, c); + safe = safe || makes_safe_go(b, l, p, r-1, c); + safe = safe || makes_safe_go(b, l, p, r, c+1); + safe = safe || makes_safe_go(b, l, p, r, c-1); + free(l); + return !safe; +} + +int check_ko(float *x, float *ko) +{ + if(!ko) return 0; + float curr[19*19*3]; + copy_cpu(19*19*3, x, 1, curr, 1); + if(curr[19*19*2] != ko[19*19*2]) flip_board(curr); + if(compare_board(curr, ko)) return 1; + return 0; +} + +int legal_go(float *b, float *ko, int p, int r, int c) +{ + if (occupied(b, r*19+c)) return 0; + float curr[19*19*3]; + copy_cpu(19*19*3, b, 1, curr, 1); + move_go(curr, p, r, c); + if(check_ko(curr, ko)) return 0; + if(suicide_go(b, p, r, c)) return 0; + return 1; +} + +/* + move generate_move(mcts_tree *root, network *net, int player, float *board, int multi, float temp, float *ko, int print) + { + move m = {0}; +//root = run_mcts(tree, network *net, float *board, float *ko, int n, float cpuct) +int i, j; +int empty = 1; +for(i = 0; i < 19*19; ++i){ +if (occupied(board, i)) { +empty = 0; +break; +} +} +if(empty) { +m.value = .5; +m.mcts = .5; +m.row = 3; +m.col = 15; +return m; +} + +float move[362]; +if (player < 0) flip_board(board); +float result = predict_move(net, board, move, multi, ko, temp); +if (player < 0) flip_board(board); +if(result == -1000.f) return -2; + +for(i = 0; i < 19; ++i){ +for(j = 0; j < 19; ++j){ +if (!legal_go(board, ko, player, i, j)) move[i*19 + j] = 0; +} +} + +int indexes[nind]; +top_k(move, 19*19+1, nind, indexes); + + +int max = max_index(move, 19*19+1); +int row = max / 19; +int col = max % 19; +int index = sample_array(move, 19*19+1); + +if(print){ +top_k(move, 19*19+1, nind, indexes); +for(i = 0; i < nind; ++i){ +if (!move[indexes[i]]) indexes[i] = -1; +} +print_board(stderr, board, 1, indexes); +fprintf(stderr, "%s To Move\n", player > 0 ? "X" : "O"); +fprintf(stderr, "%.2f%% Win Chance\n", (result+1)/2*100); +for(i = 0; i < nind; ++i){ +int index = indexes[i]; +int row = index / 19; +int col = index % 19; +if(row == 19){ +fprintf(stderr, "%d: Pass, %.2f%%\n", i+1, move[index]*100); +} else { +fprintf(stderr, "%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100); +} +} +} +if (row == 19) return -1; + +if (suicide_go(board, player, row, col)){ +return -1; +} + +if (suicide_go(board, player, index/19, index%19)){ +index = max; +} +if (index == 19*19) return -1; +return index; +} +*/ + +void valid_go(char *cfgfile, char *weightfile, int multi, char *filename) +{ + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + float *board = calloc(19*19*3, sizeof(float)); + float *move = calloc(19*19+2, sizeof(float)); + // moves m = load_go_moves("/home/pjreddie/backup/go.test"); + moves m = load_go_moves(filename); + + int N = m.n; + int i,j; + int correct = 0; + for (i = 0; i 0) for(j = 0; j < 19*19; ++j) board[19*19*2 + j] = 1; + predict_move2(net, board, move, multi); + int index = max_index(move, 19*19+1); + if(index == truth) ++correct; + printf("%d Accuracy %f\n", i, (float) correct/(i+1)); + } +} + +int print_game(float *board, FILE *fp) +{ + int i, j; + int count = 3; + fprintf(fp, "komi 6.5\n"); + fprintf(fp, "boardsize 19\n"); + fprintf(fp, "clear_board\n"); + for(j = 0; j < 19; ++j){ + for(i = 0; i < 19; ++i){ + if(occupied(board,j*19 + i) == 1) fprintf(fp, "play black %c%d\n", 'A'+i+(i>=8), 19-j); + if(occupied(board,j*19 + i) == -1) fprintf(fp, "play white %c%d\n", 'A'+i+(i>=8), 19-j); + if(occupied(board,j*19 + i)) ++count; + } + } + return count; +} + + +int stdin_ready() +{ + fd_set readfds; + FD_ZERO(&readfds); + + struct timeval timeout; + timeout.tv_sec = 0; + timeout.tv_usec = 0; + FD_SET(STDIN_FILENO, &readfds); + + if (select(1, &readfds, NULL, NULL, &timeout)){ + return 1; + } + return 0; +} + +mcts_tree *ponder(mcts_tree *tree, network *net, float *b, float *ko, int player, float cpuct) +{ + double t = what_time_is_it_now(); + int count = 0; + if (tree) count = tree->total_count; + while(!stdin_ready()){ + if (what_time_is_it_now() - t > 120) break; + tree = run_mcts(tree, net, b, ko, player, 100000, cpuct, .1); + } + fprintf(stderr, "Pondered %d moves...\n", tree->total_count - count); + return tree; +} + +void engine_go(char *filename, char *weightfile, int mcts_iters, float secs, float temp, float cpuct, int anon, int resign) +{ + mcts_tree *root = 0; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *one = calloc(19*19*3, sizeof(float)); + float *two = calloc(19*19*3, sizeof(float)); + int ponder_player = 0; + int passed = 0; + int move_num = 0; + int main_time = 0; + int byo_yomi_time = 0; + int byo_yomi_stones = 0; + int black_time_left = 0; + int black_stones_left = 0; + int white_time_left = 0; + int white_stones_left = 0; + float orig_time = secs; + int old_ponder = 0; + while(1){ + if(ponder_player){ + root = ponder(root, net, board, two, ponder_player, cpuct); + } + old_ponder = ponder_player; + ponder_player = 0; + char buff[256]; + int id = 0; + int has_id = (scanf("%d", &id) == 1); + scanf("%s", buff); + if (feof(stdin)) break; + fprintf(stderr, "%s\n", buff); + char ids[256]; + sprintf(ids, "%d", id); + //fprintf(stderr, "%s\n", buff); + if (!has_id) ids[0] = 0; + if (!strcmp(buff, "protocol_version")){ + printf("=%s 2\n\n", ids); + } else if (!strcmp(buff, "name")){ + if(anon){ + printf("=%s The Fool!\n\n", ids); + }else{ + printf("=%s DarkGo\n\n", ids); + } + } else if (!strcmp(buff, "time_settings")){ + ponder_player = old_ponder; + scanf("%d %d %d", &main_time, &byo_yomi_time, &byo_yomi_stones); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "time_left")){ + ponder_player = old_ponder; + char color[256]; + int time = 0, stones = 0; + scanf("%s %d %d", color, &time, &stones); + if (color[0] == 'b' || color[0] == 'B'){ + black_time_left = time; + black_stones_left = stones; + } else { + white_time_left = time; + white_stones_left = stones; + } + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "version")){ + if(anon){ + printf("=%s :-DDDD\n\n", ids); + }else { + printf("=%s 1.0. Want more DarkGo? You can find me on OGS, unlimited games, no waiting! https://online-go.com/user/view/434218\n\n", ids); + } + } else if (!strcmp(buff, "known_command")){ + char comm[256]; + scanf("%s", comm); + int known = (!strcmp(comm, "protocol_version") || + !strcmp(comm, "name") || + !strcmp(comm, "version") || + !strcmp(comm, "known_command") || + !strcmp(comm, "list_commands") || + !strcmp(comm, "quit") || + !strcmp(comm, "boardsize") || + !strcmp(comm, "clear_board") || + !strcmp(comm, "komi") || + !strcmp(comm, "final_status_list") || + !strcmp(comm, "play") || + !strcmp(comm, "genmove_white") || + !strcmp(comm, "genmove_black") || + !strcmp(comm, "fixed_handicap") || + !strcmp(comm, "genmove")); + if(known) printf("=%s true\n\n", ids); + else printf("=%s false\n\n", ids); + } else if (!strcmp(buff, "list_commands")){ + printf("=%s protocol_version\nshowboard\nname\nversion\nknown_command\nlist_commands\nquit\nboardsize\nclear_board\nkomi\nplay\ngenmove_black\ngenmove_white\ngenmove\nfinal_status_list\nfixed_handicap\n\n", ids); + } else if (!strcmp(buff, "quit")){ + break; + } else if (!strcmp(buff, "boardsize")){ + int boardsize = 0; + scanf("%d", &boardsize); + //fprintf(stderr, "%d\n", boardsize); + if(boardsize != 19){ + printf("?%s unacceptable size\n\n", ids); + } else { + root = move_mcts(root, -1); + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + move_num = 0; + printf("=%s \n\n", ids); + } + } else if (!strcmp(buff, "fixed_handicap")){ + int handicap = 0; + scanf("%d", &handicap); + int indexes[] = {72, 288, 300, 60, 180, 174, 186, 66, 294}; + int i; + for(i = 0; i < handicap; ++i){ + board[indexes[i]] = 1; + ++move_num; + } + root = move_mcts(root, -1); + } else if (!strcmp(buff, "clear_board")){ + passed = 0; + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + move_num = 0; + root = move_mcts(root, -1); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "komi")){ + float komi = 0; + scanf("%f", &komi); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "showboard")){ + printf("=%s \n", ids); + print_board(stdout, board, 1, 0); + printf("\n"); + } else if (!strcmp(buff, "play") || !strcmp(buff, "black") || !strcmp(buff, "white")){ + ++move_num; + char color[256]; + if(!strcmp(buff, "play")) + { + scanf("%s ", color); + } else { + scanf(" "); + color[0] = buff[0]; + } + char c; + int r; + int count = scanf("%c%d", &c, &r); + int player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1; + if((c == 'p' || c == 'P') && count < 2) { + passed = 1; + printf("=%s \n\n", ids); + char *line = fgetl(stdin); + free(line); + fflush(stdout); + fflush(stderr); + root = move_mcts(root, 19*19); + continue; + } else { + passed = 0; + } + if(c >= 'A' && c <= 'Z') c = c - 'A'; + if(c >= 'a' && c <= 'z') c = c - 'a'; + if(c >= 8) --c; + r = 19 - r; + fprintf(stderr, "move: %d %d\n", r, c); + + float *swap = two; + two = one; + one = swap; + move_go(board, player, r, c); + copy_cpu(19*19*3, board, 1, one, 1); + if(root) fprintf(stderr, "Prior: %f\n", root->prior[r*19 + c]); + if(root) fprintf(stderr, "Mean: %f\n", root->mean[r*19 + c]); + if(root) fprintf(stderr, "Result: %f\n", root->result); + root = move_mcts(root, r*19 + c); + if(root) fprintf(stderr, "Visited: %d\n", root->total_count); + else fprintf(stderr, "NOT VISITED\n"); + + printf("=%s \n\n", ids); + //print_board(stderr, board, 1, 0); + } else if (!strcmp(buff, "genmove") || !strcmp(buff, "genmove_black") || !strcmp(buff, "genmove_white")){ + ++move_num; + int player = 0; + if(!strcmp(buff, "genmove")){ + char color[256]; + scanf("%s", color); + player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1; + } else if (!strcmp(buff, "genmove_black")){ + player = 1; + } else { + player = -1; + } + if(player > 0){ + if(black_time_left <= 30) secs = 2.5; + else secs = orig_time; + } else { + if(white_time_left <= 30) secs = 2.5; + else secs = orig_time; + } + ponder_player = -player; + + //tree = generate_move(net, player, board, multi, .1, two, 1); + double t = what_time_is_it_now(); + root = run_mcts(root, net, board, two, player, mcts_iters, cpuct, secs); + fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + move m = pick_move(root, temp, player); + root = move_mcts(root, m.row*19 + m.col); + + + if(move_num > resign && m.value < .1 && m.mcts < .1){ + printf("=%s resign\n\n", ids); + } else if(m.row == 19){ + printf("=%s pass\n\n", ids); + passed = 0; + } else { + int row = m.row; + int col = m.col; + + float *swap = two; + two = one; + one = swap; + + move_go(board, player, row, col); + copy_cpu(19*19*3, board, 1, one, 1); + row = 19 - row; + if (col >= 8) ++col; + printf("=%s %c%d\n\n", ids, 'A' + col, row); + } + + } else if (!strcmp(buff, "p")){ + //print_board(board, 1, 0); + } else if (!strcmp(buff, "final_status_list")){ + char type[256]; + scanf("%s", type); + fprintf(stderr, "final_status\n"); + char *line = fgetl(stdin); + free(line); + if(type[0] == 'd' || type[0] == 'D'){ + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "%s final_status_list dead\n", ids); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + while((l = fgetl(p))){ + printf("%s\n", l); + free(l); + } + } else { + printf("?%s unknown command\n\n", ids); + } + } else if (!strcmp(buff, "kgs-genmove_cleanup")){ + char type[256]; + scanf("%s", type); + fprintf(stderr, "kgs-genmove_cleanup\n"); + char *line = fgetl(stdin); + free(line); + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "%s kgs-genmove_cleanup %s\n", ids, type); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + while((l = fgetl(p))){ + printf("%s\n", l); + free(l); + } + } else { + char *line = fgetl(stdin); + free(line); + printf("?%s unknown command\n\n", ids); + } + fflush(stdout); + fflush(stderr); + } + printf("%d %d %d\n",passed, black_stones_left, white_stones_left); +} + +void test_go(char *cfg, char *weights, int multi) +{ + int i; + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(time(0)); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *move = calloc(19*19+1, sizeof(float)); + int color = 1; + while(1){ + float result = predict_move2(net, board, move, multi); + printf("%.2f%% Win Chance\n", (result+1)/2*100); + + int indexes[nind]; + int row, col; + top_k(move, 19*19+1, nind, indexes); + print_board(stderr, board, color, indexes); + for(i = 0; i < nind; ++i){ + int index = indexes[i]; + row = index / 19; + col = index % 19; + if(row == 19){ + printf("%d: Pass, %.2f%%\n", i+1, move[index]*100); + } else { + printf("%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100); + } + } + //if(color == 1) printf("\u25EF Enter move: "); + //else printf("\u25C9 Enter move: "); + if(color == 1) printf("X Enter move: "); + else printf("O Enter move: "); + + char c; + char *line = fgetl(stdin); + int picked = 1; + int dnum = sscanf(line, "%d", &picked); + int cnum = sscanf(line, "%c", &c); + if (strlen(line) == 0 || dnum) { + --picked; + if (picked < nind){ + int index = indexes[picked]; + row = index / 19; + col = index % 19; + if(row < 19){ + move_go(board, 1, row, col); + } + } + } else if (cnum){ + if (c <= 'T' && c >= 'A'){ + int num = sscanf(line, "%c %d", &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 2) move_go(board, 1, row, col); + } else if (c == 'p') { + // Pass + } else if(c=='b' || c == 'w'){ + char g; + int num = sscanf(line, "%c %c %d", &g, &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 3) { + int mc = (g == 'b') ? 1 : -1; + if (mc == color) { + board[row*19 + col] = 1; + } else { + board[19*19 + row*19 + col] = 1; + } + } + } else if(c == 'c'){ + char g; + int num = sscanf(line, "%c %c %d", &g, &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 3) { + board[row*19 + col] = 0; + board[19*19 + row*19 + col] = 0; + } + } + } + free(line); + flip_board(board); + color = -color; + } +} + +float score_game(float *board) +{ + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "final_score\n"); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + float score = 0; + char player = 0; + while((l = fgetl(p))){ + fprintf(stderr, "%s \t", l); + int n = sscanf(l, "= %c+%f", &player, &score); + free(l); + if (n == 2) break; + } + if(player == 'W') score = -score; + pclose(p); + return score; +} + +void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi) +{ + mcts_tree *tree1 = 0; + mcts_tree *tree2 = 0; + network *net = load_network(filename, weightfile, 0); + //set_batch_network(net, 1); + + network *net2; + if (f2) { + net2 = parse_network_cfg(f2); + if(w2){ + load_weights(net2, w2); + } + } else { + net2 = calloc(1, sizeof(network)); + *net2 = *net; + } + srand(time(0)); + char boards[600][93]; + int count = 0; + //set_batch_network(net, 1); + //set_batch_network(net2, 1); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *one = calloc(19*19*3, sizeof(float)); + float *two = calloc(19*19*3, sizeof(float)); + int done = 0; + int player = 1; + int p1 = 0; + int p2 = 0; + int total = 0; + float temp = .1; + int mcts_iters = 500; + float cpuct = 5; + while(1){ + if (done){ + tree1 = move_mcts(tree1, -1); + tree2 = move_mcts(tree2, -1); + float score = score_game(board); + if((score > 0) == (total%2==0)) ++p1; + else ++p2; + ++total; + fprintf(stderr, "Total: %d, Player 1: %f, Player 2: %f\n", total, (float)p1/total, (float)p2/total); + sleep(1); + /* + int i = (score > 0)? 0 : 1; + int j; + for(; i < count; i += 2){ + for(j = 0; j < 93; ++j){ + printf("%c", boards[i][j]); + } + printf("\n"); + } + */ + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + player = 1; + done = 0; + count = 0; + fflush(stdout); + fflush(stderr); + } + //print_board(stderr, board, 1, 0); + //sleep(1); + + if ((total%2==0) == (player==1)){ + //mcts_iters = 4500; + cpuct = 5; + } else { + //mcts_iters = 500; + cpuct = 1; + } + network *use = ((total%2==0) == (player==1)) ? net : net2; + mcts_tree *t = ((total%2==0) == (player==1)) ? tree1 : tree2; + t = run_mcts(t, use, board, two, player, mcts_iters, cpuct, 0); + move m = pick_move(t, temp, player); + if(((total%2==0) == (player==1))) tree1 = t; + else tree2 = t; + + tree1 = move_mcts(tree1, m.row*19 + m.col); + tree2 = move_mcts(tree2, m.row*19 + m.col); + + if(m.row == 19){ + done = 1; + continue; + } + int row = m.row; + int col = m.col; + + float *swap = two; + two = one; + one = swap; + + if(player < 0) flip_board(board); + boards[count][0] = row; + boards[count][1] = col; + board_to_string(boards[count] + 2, board); + if(player < 0) flip_board(board); + ++count; + + move_go(board, player, row, col); + copy_cpu(19*19*3, board, 1, one, 1); + + player = -player; + } +} + +void run_go(int argc, char **argv) +{ + //boards_go(); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + int clear = find_arg(argc, argv, "-clear"); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *c2 = (argc > 5) ? argv[5] : 0; + char *w2 = (argc > 6) ? argv[6] : 0; + int multi = find_arg(argc, argv, "-multi"); + int anon = find_arg(argc, argv, "-anon"); + int iters = find_int_arg(argc, argv, "-iters", 500); + int resign = find_int_arg(argc, argv, "-resign", 175); + float cpuct = find_float_arg(argc, argv, "-cpuct", 5); + float temp = find_float_arg(argc, argv, "-temp", .1); + float time = find_float_arg(argc, argv, "-time", 0); + if(0==strcmp(argv[2], "train")) train_go(cfg, weights, c2, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) valid_go(cfg, weights, multi, c2); + else if(0==strcmp(argv[2], "self")) self_go(cfg, weights, c2, w2, multi); + else if(0==strcmp(argv[2], "test")) test_go(cfg, weights, multi); + else if(0==strcmp(argv[2], "engine")) engine_go(cfg, weights, iters, time, temp, cpuct, anon, resign); +} + + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/instance-segmenter.c b/workloads/realworld/uvm_prefetch/darknet/examples/instance-segmenter.c new file mode 100644 index 0000000000000000000000000000000000000000..664e71426d58e19f758bab198783eac178a3cdc4 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/instance-segmenter.c @@ -0,0 +1,267 @@ +#include "darknet.h" +#include +#include + +void normalize_image2(image p); +void train_isegmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + image pred = get_network_image(net); + + image embed = pred; + embed.c = 3; + embed.data += embed.w*embed.h*80; + + int div = net->w/pred.w; + assert(pred.w * div == net->w); + assert(pred.h * div == net->h); + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.scale = div; + args.num_boxes = 90; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.classes = 80; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = ISEG_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(display){ + image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]); + image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]); + pred.c = 80; + image mask = mask_to_rgb(tr); + image prmask = mask_to_rgb(pred); + image ecopy = copy_image(embed); + normalize_image2(ecopy); + show_image(ecopy, "embed", 1); + free_image(ecopy); + + show_image(im, "input", 1); + show_image(prmask, "pred", 1); + show_image(mask, "truth", 100); + free_image(mask); + free_image(prmask); + } + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_isegmenter(char *datafile, char *cfg, char *weights, char *filename) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + show_image(sized, "orig", 1); + show_image(prmask, "pred", 0); + free_image(im); + free_image(sized); + free_image(prmask); + if (filename) break; + } +} + + +void demo_isegmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Classifier Demo\n"); + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = letterbox_image(in, net->w, net->h); + + network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + show_image(prmask, "Segmenter", 10); + + free_image(in_s); + free_image(in); + free_image(prmask); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_isegmenter(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_isegmenter(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_isegmenter(data, cfg, weights, gpus, ngpus, clear, display); + else if(0==strcmp(argv[2], "demo")) demo_isegmenter(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/lsd.c b/workloads/realworld/uvm_prefetch/darknet/examples/lsd.c new file mode 100644 index 0000000000000000000000000000000000000000..4ab944c884b9df422cd2b273b1faee128f2ab112 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/lsd.c @@ -0,0 +1,1378 @@ +#include +#include "darknet.h" + +/* +void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg, char *aweight, int clear) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + //char *style_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *style_images = "/home/pjreddie/zelda.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + network fnet = load_network(fcfg, fweight, clear); + network gnet = load_network(gcfg, gweight, clear); + network anet = load_network(acfg, aweight, clear); + char *gbase = basecfg(gcfg); + char *abase = basecfg(acfg); + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + int i = *gnet->seen/imgs; + data train, tbuffer; + data style, sbuffer; + + + list *slist = get_paths(style_images); + char **spaths = (char **)list_to_array(slist); + + list *tlist = get_paths(train_images); + char **tpaths = (char **)list_to_array(tlist); + + load_args targs= get_base_args(gnet); + targs.paths = tpaths; + targs.n = imgs; + targs.m = tlist->size; + targs.d = &tbuffer; + targs.type = CLASSIFICATION_DATA; + targs.classes = 1; + char *ls[1] = {"zelda"}; + targs.labels = ls; + + load_args sargs = get_base_args(gnet); + sargs.paths = spaths; + sargs.n = imgs; + sargs.m = slist->size; + sargs.d = &sbuffer; + sargs.type = CLASSIFICATION_DATA; + sargs.classes = 1; + sargs.labels = ls; + + pthread_t tload_thread = load_data_in_thread(targs); + pthread_t sload_thread = load_data_in_thread(sargs); + clock_t time; + + float aloss_avg = -1; + float floss_avg = -1; + + fnet->train=1; + int x_size = fnet->inputs*fnet->batch; + int y_size = fnet->truths*fnet->batch; + float *X = calloc(x_size, sizeof(float)); + float *y = calloc(y_size, sizeof(float)); + + + int ax_size = anet->inputs*anet->batch; + int ay_size = anet->truths*anet->batch; + fill_gpu(ay_size, .9, anet->truth_gpu, 1); + anet->delta_gpu = cuda_make_array(0, ax_size); + anet->train = 1; + + int gx_size = gnet->inputs*gnet->batch; + int gy_size = gnet->truths*gnet->batch; + gstate.input = cuda_make_array(0, gx_size); + gstate.truth = 0; + gstate.delta = 0; + gstate.train = 1; + + while (get_current_batch(gnet) < gnet->max_batches) { + i += 1; + time=clock(); + pthread_join(tload_thread, 0); + pthread_join(sload_thread, 0); + train = tbuffer; + style = sbuffer; + tload_thread = load_data_in_thread(targs); + sload_thread = load_data_in_thread(sargs); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data generated = copy_data(train); + time=clock(); + + int j, k; + float floss = 0; + for(j = 0; j < fnet->subdivisions; ++j){ + layer imlayer = gnet->layers[gnet->n - 1]; + get_next_batch(train, fnet->batch, j*fnet->batch, X, y); + + cuda_push_array(fstate.input, X, x_size); + cuda_push_array(gstate.input, X, gx_size); + *gnet->seen += gnet->batch; + + forward_network_gpu(fnet, fstate); + float *feats = fnet->layers[fnet->n - 2].output_gpu; + copy_gpu(y_size, feats, 1, fstate.truth, 1); + + forward_network_gpu(gnet, gstate); + float *gen = gnet->layers[gnet->n-1].output_gpu; + copy_gpu(x_size, gen, 1, fstate.input, 1); + + fill_gpu(x_size, 0, fstate.delta, 1); + forward_network_gpu(fnet, fstate); + backward_network_gpu(fnet, fstate); + //HERE + + astate.input = gen; + fill_gpu(ax_size, 0, astate.delta, 1); + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + float *delta = imlayer.delta_gpu; + fill_gpu(x_size, 0, delta, 1); + scal_gpu(x_size, 100, astate.delta, 1); + scal_gpu(x_size, .001, fstate.delta, 1); + axpy_gpu(x_size, 1, fstate.delta, 1, delta, 1); + axpy_gpu(x_size, 1, astate.delta, 1, delta, 1); + + //fill_gpu(x_size, 0, delta, 1); + //cuda_push_array(delta, X, x_size); + //axpy_gpu(x_size, -1, imlayer.output_gpu, 1, delta, 1); + //printf("pix error: %f\n", cuda_mag_array(delta, x_size)); + printf("fea error: %f\n", cuda_mag_array(fstate.delta, x_size)); + printf("adv error: %f\n", cuda_mag_array(astate.delta, x_size)); + //axpy_gpu(x_size, 1, astate.delta, 1, delta, 1); + + backward_network_gpu(gnet, gstate); + + floss += get_network_cost(fnet) /(fnet->subdivisions*fnet->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1); + generated.y.vals[index][0] = .1; + style.y.vals[index][0] = .9; + } + } + +*/ +/* + image sim = float_to_image(anet->w, anet->h, anet->c, style.X.vals[j]); + show_image(sim, "style"); + cvWaitKey(0); + */ + /* + + harmless_update_network_gpu(anet); + + data merge = concat_data(style, generated); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(generated); + free_data(style); + if (aloss_avg < 0) aloss_avg = aloss; + if (floss_avg < 0) floss_avg = floss; + aloss_avg = aloss_avg*.9 + aloss*.1; + floss_avg = floss_avg*.9 + floss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, floss, aloss, floss_avg, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, gbase, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, gbase); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } +#endif +} +*/ + +/* +void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/train1.txt"; + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network net = load_network(cfg, weight, clear); + network anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = net->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.type = CLASSIFICATION_DATA; + args.classes = 1; + char *ls[1] = {"coco"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + network_state gstate = {0}; + gstate.index = 0; + gstate.net = net; + int x_size = get_network_input_size(net)*net->batch; + int y_size = x_size; + gstate.input = cuda_make_array(0, x_size); + gstate.truth = cuda_make_array(0, y_size); + gstate.delta = 0; + gstate.train = 1; + float *pixs = calloc(x_size, sizeof(float)); + float *graypixs = calloc(x_size, sizeof(float)); + float *y = calloc(y_size, sizeof(float)); + + network_state astate = {0}; + astate.index = 0; + astate.net = anet; + int ay_size = get_network_output_size(anet)*anet->batch; + astate.input = 0; + astate.truth = 0; + astate.delta = 0; + astate.train = 1; + + float *imerror = cuda_make_array(0, imlayer.outputs); + float *ones_gpu = cuda_make_array(0, ay_size); + fill_gpu(ay_size, .9, ones_gpu, 1); + + float aloss_avg = -1; + float gloss_avg = -1; + + //data generated = copy_data(train); + + while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gray = copy_data(train); + for(j = 0; j < imgs; ++j){ + image gim = float_to_image(net->w, net->h, net->c, gray.X.vals[j]); + grayscale_image_3c(gim); + train.y.vals[j][0] = .9; + + image yim = float_to_image(net->w, net->h, net->c, train.X.vals[j]); + //rgb_to_yuv(yim); + } + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, pixs, y); + get_next_batch(gray, net->batch, j*net->batch, graypixs, y); + cuda_push_array(gstate.input, graypixs, x_size); + cuda_push_array(gstate.truth, pixs, y_size); + */ + /* + image origi = float_to_image(net->w, net->h, 3, pixs); + image grayi = float_to_image(net->w, net->h, 3, graypixs); + show_image(grayi, "gray"); + show_image(origi, "orig"); + cvWaitKey(0); + */ + /* + *net->seen += net->batch; + forward_network_gpu(net, gstate); + + fill_gpu(imlayer.outputs, 0, imerror, 1); + astate.input = imlayer.output_gpu; + astate.delta = imerror; + astate.truth = ones_gpu; + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + scal_gpu(imlayer.outputs, .1, net->layers[net->n-1].delta_gpu, 1); + + backward_network_gpu(net, gstate); + + scal_gpu(imlayer.outputs, 1000, imerror, 1); + + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs)); + printf("features %f\n", cuda_mag_array(net->layers[net->n-1].delta_gpu, imlayer.outputs)); + + axpy_gpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1); + + gloss += get_network_cost(net) /(net->subdivisions*net->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1); + gray.y.vals[index][0] = .1; + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gray); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + update_network_gpu(anet); + free_data(merge); + free_data(train); + free_data(gray); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +#endif +} +*/ + +void slerp(float *start, float *end, float s, int n, float *out) +{ + float omega = acos(dot_cpu(n, start, 1, end, 1)); + float so = sin(omega); + fill_cpu(n, 0, out, 1); + axpy_cpu(n, sin((1-s)*omega)/so, start, 1, out, 1); + axpy_cpu(n, sin(s*omega)/so, end, 1, out, 1); + + float mag = mag_array(out, n); + scale_array(out, n, 1./mag); +} + +image random_unit_vector_image(int w, int h, int c) +{ + image im = make_image(w, h, c); + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + im.data[i] = rand_normal(); + } + float mag = mag_array(im.data, im.w*im.h*im.c); + scale_array(im.data, im.w*im.h*im.c, 1./mag); + return im; +} + +void inter_dcgan(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int i, imlayer = 0; + + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = i; + printf("%d\n", i); + break; + } + } + image start = random_unit_vector_image(net->w, net->h, net->c); + image end = random_unit_vector_image(net->w, net->h, net->c); + image im = make_image(net->w, net->h, net->c); + image orig = copy_image(start); + + int c = 0; + int count = 0; + int max_count = 15; + while(1){ + ++c; + + if(count == max_count){ + count = 0; + free_image(start); + start = end; + end = random_unit_vector_image(net->w, net->h, net->c); + if(c > 300){ + end = orig; + } + if(c>300 + max_count) return; + } + ++count; + + slerp(start.data, end.data, (float)count / max_count, im.w*im.h*im.c, im.data); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + normalize_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + //char buff[256]; + sprintf(buff, "out%05d", c); + save_image(out, "out"); + save_image(out, buff); + show_image(out, "out", 0); + } +} + +void test_dcgan(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int imlayer = 0; + + imlayer = net->n-1; + + while(1){ + image im = make_image(net->w, net->h, net->c); + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + im.data[i] = rand_normal(); + } + //float mag = mag_array(im.data, im.w*im.h*im.c); + //scale_array(im.data, im.w*im.h*im.c, 1./mag); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + normalize_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 0); + + free_image(im); + } +} + +void set_network_alpha_beta(network *net, float alpha, float beta) +{ + int i; + for(i = 0; i < net->n; ++i){ + if(net->layers[i].type == SHORTCUT){ + net->layers[i].alpha = alpha; + net->layers[i].beta = beta; + } + } +} + +void train_prog(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) +{ +#ifdef GPU + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *gnet = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = gnet->layers[gnet->n-1]; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + i = *gnet->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(anet); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + args.threads=16; + args.classes = 1; + char *ls[2] = {"imagenet", "zzzzzzzz"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + gnet->train = 1; + anet->train = 1; + + int x_size = gnet->inputs*gnet->batch; + int y_size = gnet->truths*gnet->batch; + float *imerror = cuda_make_array(0, y_size); + + float aloss_avg = -1; + + if (maxbatch == 0) maxbatch = gnet->max_batches; + while (get_current_batch(gnet) < maxbatch) { + { + int cb = get_current_batch(gnet); + float alpha = (float) cb / (maxbatch/2); + if(alpha > 1) alpha = 1; + float beta = 1 - alpha; + printf("%f %f\n", alpha, beta); + set_network_alpha_beta(gnet, alpha, beta); + set_network_alpha_beta(anet, beta, alpha); + } + + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gen = copy_data(train); + for (j = 0; j < imgs; ++j) { + train.y.vals[j][0] = 1; + gen.y.vals[j][0] = 0; + } + time=clock(); + + for (j = 0; j < gnet->subdivisions; ++j) { + get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); + int z; + for(z = 0; z < x_size; ++z){ + gnet->input[z] = rand_normal(); + } + /* + for(z = 0; z < gnet->batch; ++z){ + float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); + scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); + } + */ + *gnet->seen += gnet->batch; + forward_network(gnet); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); + copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1); + anet->delta_gpu = imerror; + forward_network(anet); + backward_network(anet); + + //float genaloss = *anet->cost / anet->batch; + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1); + + backward_network(gnet); + + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gen); + float aloss = train_network(anet, merge); + +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + save_image(im, "gen"); + save_image(im2, "train"); + } +#endif + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(gen); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + + printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(gnet, buff); +#endif +} + +void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) +{ +#ifdef GPU + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *gnet = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + //float orig_rate = anet->learning_rate; + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < gnet->n; ++i) { + if (gnet->layers[i].out_c == 3) { + imlayer = gnet->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + i = *gnet->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(anet); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + args.threads=16; + args.classes = 1; + char *ls[2] = {"imagenet", "zzzzzzzz"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + gnet->train = 1; + anet->train = 1; + + int x_size = gnet->inputs*gnet->batch; + int y_size = gnet->truths*gnet->batch; + float *imerror = cuda_make_array(0, y_size); + + //int ay_size = anet->truths*anet->batch; + + float aloss_avg = -1; + + //data generated = copy_data(train); + + if (maxbatch == 0) maxbatch = gnet->max_batches; + while (get_current_batch(gnet) < maxbatch) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + //translate_data_rows(train, -.5); + //scale_data_rows(train, 2); + + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gen = copy_data(train); + for (j = 0; j < imgs; ++j) { + train.y.vals[j][0] = 1; + gen.y.vals[j][0] = 0; + } + time=clock(); + + for(j = 0; j < gnet->subdivisions; ++j){ + get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); + int z; + for(z = 0; z < x_size; ++z){ + gnet->input[z] = rand_normal(); + } + for(z = 0; z < gnet->batch; ++z){ + float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); + scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); + } + /* + for(z = 0; z < 100; ++z){ + printf("%f, ", gnet->input[z]); + } + printf("\n"); + printf("input: %f %f\n", mean_array(gnet->input, x_size), variance_array(gnet->input, x_size)); + */ + + //cuda_push_array(gnet->input_gpu, gnet->input, x_size); + //cuda_push_array(gnet->truth_gpu, gnet->truth, y_size); + *gnet->seen += gnet->batch; + forward_network(gnet); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); + copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1); + anet->delta_gpu = imerror; + forward_network(anet); + backward_network(anet); + + //float genaloss = *anet->cost / anet->batch; + //printf("%f\n", genaloss); + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1); + + //printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch)); + //printf("features %f\n", cuda_mag_array(gnet->layers[gnet->n-1].delta_gpu, imlayer.outputs*imlayer.batch)); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1); + + backward_network(gnet); + + /* + for(k = 0; k < gnet->n; ++k){ + layer l = gnet->layers[k]; + cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); + printf("%d: %f %f\n", k, mean_array(l.output, l.outputs*l.batch), variance_array(l.output, l.outputs*l.batch)); + } + */ + + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gen); + //randomize_data(merge); + float aloss = train_network(anet, merge); + + //translate_image(im, 1); + //scale_image(im, .5); + //translate_image(im2, 1); + //scale_image(im2, .5); +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + save_image(im, "gen"); + save_image(im2, "train"); + } +#endif + + /* + if(aloss < .1){ + anet->learning_rate = 0; + } else if (aloss > .3){ + anet->learning_rate = orig_rate; + } + */ + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(gen); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + + printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(gnet, buff); +#endif +} + +void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/train1.txt"; + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *net = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = net->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(net); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + + args.type = CLASSIFICATION_DATA; + args.classes = 1; + char *ls[2] = {"imagenet"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + int x_size = net->inputs*net->batch; + //int y_size = x_size; + net->delta = 0; + net->train = 1; + float *pixs = calloc(x_size, sizeof(float)); + float *graypixs = calloc(x_size, sizeof(float)); + //float *y = calloc(y_size, sizeof(float)); + + //int ay_size = anet->outputs*anet->batch; + anet->delta = 0; + anet->train = 1; + + float *imerror = cuda_make_array(0, imlayer.outputs*imlayer.batch); + + float aloss_avg = -1; + float gloss_avg = -1; + + //data generated = copy_data(train); + + while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gray = copy_data(train); + for(j = 0; j < imgs; ++j){ + image gim = float_to_image(net->w, net->h, net->c, gray.X.vals[j]); + grayscale_image_3c(gim); + train.y.vals[j][0] = .95; + gray.y.vals[j][0] = .05; + } + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, pixs, 0); + get_next_batch(gray, net->batch, j*net->batch, graypixs, 0); + cuda_push_array(net->input_gpu, graypixs, net->inputs*net->batch); + cuda_push_array(net->truth_gpu, pixs, net->truths*net->batch); + /* + image origi = float_to_image(net->w, net->h, 3, pixs); + image grayi = float_to_image(net->w, net->h, 3, graypixs); + show_image(grayi, "gray"); + show_image(origi, "orig"); + cvWaitKey(0); + */ + *net->seen += net->batch; + forward_network_gpu(net); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + copy_gpu(anet->inputs*anet->batch, imlayer.output_gpu, 1, anet->input_gpu, 1); + fill_gpu(anet->inputs*anet->batch, .95, anet->truth_gpu, 1); + anet->delta_gpu = imerror; + forward_network_gpu(anet); + backward_network_gpu(anet); + + scal_gpu(imlayer.outputs*imlayer.batch, 1./100., net->layers[net->n-1].delta_gpu, 1); + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch)); + printf("features %f\n", cuda_mag_array(net->layers[net->n-1].delta_gpu, imlayer.outputs*imlayer.batch)); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, net->layers[net->n-1].delta_gpu, 1); + + backward_network_gpu(net); + + + gloss += *net->cost /(net->subdivisions*net->batch); + + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gray); + //randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gray.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + } +#endif + free_data(merge); + free_data(train); + free_data(gray); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +#endif +} + +/* + void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfile, int clear) + { +#ifdef GPU +char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; +char *backup_directory = "/home/pjreddie/backup/"; +srand(time(0)); +char *base = basecfg(cfgfile); +printf("%s\n", base); +network net = parse_network_cfg(cfgfile); +if(weightfile){ +load_weights(&net, weightfile); +} +if(clear) *net->seen = 0; + +char *abase = basecfg(acfgfile); +network anet = parse_network_cfg(acfgfile); +if(aweightfile){ +load_weights(&anet, aweightfile); +} +if(clear) *anet->seen = 0; + +int i, j, k; +layer imlayer = {0}; +for (i = 0; i < net->n; ++i) { +if (net->layers[i].out_c == 3) { +imlayer = net->layers[i]; +break; +} +} + +printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); +int imgs = net->batch*net->subdivisions; +i = *net->seen/imgs; +data train, buffer; + + +list *plist = get_paths(train_images); +//int N = plist->size; +char **paths = (char **)list_to_array(plist); + +load_args args = {0}; +args.w = net->w; +args.h = net->h; +args.paths = paths; +args.n = imgs; +args.m = plist->size; +args.d = &buffer; + +args.min = net->min_crop; +args.max = net->max_crop; +args.angle = net->angle; +args.aspect = net->aspect; +args.exposure = net->exposure; +args.saturation = net->saturation; +args.hue = net->hue; +args.size = net->w; +args.type = CLASSIFICATION_DATA; +args.classes = 1; +char *ls[1] = {"coco"}; +args.labels = ls; + +pthread_t load_thread = load_data_in_thread(args); +clock_t time; + +network_state gstate = {0}; +gstate.index = 0; +gstate.net = net; +int x_size = get_network_input_size(net)*net->batch; +int y_size = 1*net->batch; +gstate.input = cuda_make_array(0, x_size); +gstate.truth = 0; +gstate.delta = 0; +gstate.train = 1; +float *X = calloc(x_size, sizeof(float)); +float *y = calloc(y_size, sizeof(float)); + +network_state astate = {0}; +astate.index = 0; +astate.net = anet; +int ay_size = get_network_output_size(anet)*anet->batch; +astate.input = 0; +astate.truth = 0; +astate.delta = 0; +astate.train = 1; + +float *imerror = cuda_make_array(0, imlayer.outputs); +float *ones_gpu = cuda_make_array(0, ay_size); +fill_gpu(ay_size, 1, ones_gpu, 1); + +float aloss_avg = -1; +float gloss_avg = -1; + +//data generated = copy_data(train); + +while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data generated = copy_data(train); + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, X, y); + cuda_push_array(gstate.input, X, x_size); + *net->seen += net->batch; + forward_network_gpu(net, gstate); + + fill_gpu(imlayer.outputs, 0, imerror, 1); + astate.input = imlayer.output_gpu; + astate.delta = imerror; + astate.truth = ones_gpu; + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + scal_gpu(imlayer.outputs, 1, imerror, 1); + axpy_gpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1); + + backward_network_gpu(net, gstate); + + printf("features %f\n", cuda_mag_array(imlayer.delta_gpu, imlayer.outputs)); + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs)); + + gloss += get_network_cost(net) /(net->subdivisions*net->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1); + generated.y.vals[index][0] = 0; + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, generated); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + update_network_gpu(anet); + free_data(merge); + free_data(train); + free_data(generated); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } +} +char buff[256]; +sprintf(buff, "%s/%s_final.weights", backup_directory, base); +save_weights(net, buff); +#endif +} +*/ + +/* + void train_lsd(char *cfgfile, char *weightfile, int clear) + { + char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + if(clear) *net->seen = 0; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); +//int N = plist->size; +char **paths = (char **)list_to_array(plist); + +load_args args = {0}; +args.w = net->w; +args.h = net->h; +args.paths = paths; +args.n = imgs; +args.m = plist->size; +args.d = &buffer; + +args.min = net->min_crop; +args.max = net->max_crop; +args.angle = net->angle; +args.aspect = net->aspect; +args.exposure = net->exposure; +args.saturation = net->saturation; +args.hue = net->hue; +args.size = net->w; +args.type = CLASSIFICATION_DATA; +args.classes = 1; +char *ls[1] = {"coco"}; +args.labels = ls; + +pthread_t load_thread = load_data_in_thread(args); +clock_t time; +//while(i*imgs < N*120){ +while(get_current_batch(net) < net->max_batches){ +i += 1; +time=clock(); +pthread_join(load_thread, 0); +train = buffer; +load_thread = load_data_in_thread(args); + +printf("Loaded: %lf seconds\n", sec(clock()-time)); + +time=clock(); +float loss = train_network(net, train); +if (avg_loss < 0) avg_loss = loss; +avg_loss = avg_loss*.9 + loss*.1; + +printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); +if(i%1000==0){ +char buff[256]; +sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); +save_weights(net, buff); +} +if(i%100==0){ +char buff[256]; +sprintf(buff, "%s/%s.backup", backup_directory, base); +save_weights(net, buff); +} +free_data(train); +} +char buff[256]; +sprintf(buff, "%s/%s_final.weights", backup_directory, base); +save_weights(net, buff); +} +*/ + +void test_lsd(char *cfg, char *weights, char *filename, int gray) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int i, imlayer = 0; + + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = i; + printf("%d\n", i); + break; + } + } + + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + if(gray) grayscale_image_3c(crop); + + float *X = crop.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + constrain_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 1); + show_image(crop, "crop", 0); + + free_image(im); + free_image(resized); + free_image(crop); + if (filename) break; + } +} + + +void run_lsd(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + int batches = find_int_arg(argc, argv, "-b", 0); + char *file = find_char_arg(argc, argv, "-file", "/home/pjreddie/data/imagenet/imagenet1k.train.list"); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + char *acfg = argv[5]; + char *aweights = (argc > 6) ? argv[6] : 0; + //if(0==strcmp(argv[2], "train")) train_lsd(cfg, weights, clear); + //else if(0==strcmp(argv[2], "train2")) train_lsd2(cfg, weights, acfg, aweights, clear); + //else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear); + //else if(0==strcmp(argv[2], "train3")) train_lsd3(argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], clear); + if(0==strcmp(argv[2], "traingan")) train_dcgan(cfg, weights, acfg, aweights, clear, display, file, batches); + else if(0==strcmp(argv[2], "trainprog")) train_prog(cfg, weights, acfg, aweights, clear, display, file, batches); + else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear, display); + else if(0==strcmp(argv[2], "gan")) test_dcgan(cfg, weights); + else if(0==strcmp(argv[2], "inter")) inter_dcgan(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_lsd(cfg, weights, filename, 0); + else if(0==strcmp(argv[2], "color")) test_lsd(cfg, weights, filename, 1); + /* + else if(0==strcmp(argv[2], "valid")) validate_lsd(cfg, weights); + */ +} diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/nightmare.c b/workloads/realworld/uvm_prefetch/darknet/examples/nightmare.c new file mode 100644 index 0000000000000000000000000000000000000000..2978eb61193e96325441c5b830a786eccb203569 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/nightmare.c @@ -0,0 +1,414 @@ +#include "darknet.h" + +#include + +// ./darknet nightmare cfg/extractor.recon.cfg ~/trained/yolo-coco.conv frame6.png -reconstruct -iters 500 -i 3 -lambda .1 -rate .01 -smooth 2 + +float abs_mean(float *x, int n) +{ + int i; + float sum = 0; + for (i = 0; i < n; ++i){ + sum += fabs(x[i]); + } + return sum/n; +} + +void calculate_loss(float *output, float *delta, int n, float thresh) +{ + int i; + float mean = mean_array(output, n); + float var = variance_array(output, n); + for(i = 0; i < n; ++i){ + if(delta[i] > mean + thresh*sqrt(var)) delta[i] = output[i]; + else delta[i] = 0; + } +} + +void optimize_picture(network *net, image orig, int max_layer, float scale, float rate, float thresh, int norm) +{ + //scale_image(orig, 2); + //translate_image(orig, -1); + net->n = max_layer + 1; + + int dx = rand()%16 - 8; + int dy = rand()%16 - 8; + int flip = rand()%2; + + image crop = crop_image(orig, dx, dy, orig.w, orig.h); + image im = resize_image(crop, (int)(orig.w * scale), (int)(orig.h * scale)); + if(flip) flip_image(im); + + resize_network(net, im.w, im.h); + layer last = net->layers[net->n-1]; + //net->layers[net->n - 1].activation = LINEAR; + + image delta = make_image(im.w, im.h, im.c); + +#ifdef GPU + net->delta_gpu = cuda_make_array(delta.data, im.w*im.h*im.c); + copy_cpu(net->inputs, im.data, 1, net->input, 1); + + forward_network_gpu(net); + copy_gpu(last.outputs, last.output_gpu, 1, last.delta_gpu, 1); + + cuda_pull_array(last.delta_gpu, last.delta, last.outputs); + calculate_loss(last.delta, last.delta, last.outputs, thresh); + cuda_push_array(last.delta_gpu, last.delta, last.outputs); + + backward_network_gpu(net); + + cuda_pull_array(net->delta_gpu, delta.data, im.w*im.h*im.c); + cuda_free(net->delta_gpu); + net->delta_gpu = 0; +#else + printf("\nnet: %d %d %d im: %d %d %d\n", net->w, net->h, net->inputs, im.w, im.h, im.c); + copy_cpu(net->inputs, im.data, 1, net->input, 1); + net->delta = delta.data; + forward_network(net); + copy_cpu(last.outputs, last.output, 1, last.delta, 1); + calculate_loss(last.output, last.delta, last.outputs, thresh); + backward_network(net); +#endif + + if(flip) flip_image(delta); + //normalize_array(delta.data, delta.w*delta.h*delta.c); + image resized = resize_image(delta, orig.w, orig.h); + image out = crop_image(resized, -dx, -dy, orig.w, orig.h); + + /* + image g = grayscale_image(out); + free_image(out); + out = g; + */ + + //rate = rate / abs_mean(out.data, out.w*out.h*out.c); + image gray = make_image(out.w, out.h, out.c); + fill_image(gray, .5); + axpy_cpu(orig.w*orig.h*orig.c, -1, orig.data, 1, gray.data, 1); + axpy_cpu(orig.w*orig.h*orig.c, .1, gray.data, 1, out.data, 1); + + if(norm) normalize_array(out.data, out.w*out.h*out.c); + axpy_cpu(orig.w*orig.h*orig.c, rate, out.data, 1, orig.data, 1); + + /* + normalize_array(orig.data, orig.w*orig.h*orig.c); + scale_image(orig, sqrt(var)); + translate_image(orig, mean); + */ + + //translate_image(orig, 1); + //scale_image(orig, .5); + //normalize_image(orig); + + constrain_image(orig); + + free_image(crop); + free_image(im); + free_image(delta); + free_image(resized); + free_image(out); + +} + +void smooth(image recon, image update, float lambda, int num) +{ + int i, j, k; + int ii, jj; + for(k = 0; k < recon.c; ++k){ + for(j = 0; j < recon.h; ++j){ + for(i = 0; i < recon.w; ++i){ + int out_index = i + recon.w*(j + recon.h*k); + for(jj = j-num; jj <= j + num && jj < recon.h; ++jj){ + if (jj < 0) continue; + for(ii = i-num; ii <= i + num && ii < recon.w; ++ii){ + if (ii < 0) continue; + int in_index = ii + recon.w*(jj + recon.h*k); + update.data[out_index] += lambda * (recon.data[in_index] - recon.data[out_index]); + } + } + } + } + } +} + +void reconstruct_picture(network *net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters) +{ + int iter = 0; + for (iter = 0; iter < iters; ++iter) { + image delta = make_image(recon.w, recon.h, recon.c); + +#ifdef GPU + layer l = get_network_output_layer(net); + cuda_push_array(net->input_gpu, recon.data, recon.w*recon.h*recon.c); + //cuda_push_array(net->truth_gpu, features, net->truths); + net->delta_gpu = cuda_make_array(delta.data, delta.w*delta.h*delta.c); + + forward_network_gpu(net); + cuda_push_array(l.delta_gpu, features, l.outputs); + axpy_gpu(l.outputs, -1, l.output_gpu, 1, l.delta_gpu, 1); + backward_network_gpu(net); + + cuda_pull_array(net->delta_gpu, delta.data, delta.w*delta.h*delta.c); + + cuda_free(net->delta_gpu); +#else + net->input = recon.data; + net->delta = delta.data; + net->truth = features; + + forward_network(net); + backward_network(net); +#endif + + //normalize_array(delta.data, delta.w*delta.h*delta.c); + axpy_cpu(recon.w*recon.h*recon.c, 1, delta.data, 1, update.data, 1); + //smooth(recon, update, lambda, smooth_size); + + axpy_cpu(recon.w*recon.h*recon.c, rate, update.data, 1, recon.data, 1); + scal_cpu(recon.w*recon.h*recon.c, momentum, update.data, 1); + + float mag = mag_array(delta.data, recon.w*recon.h*recon.c); + printf("mag: %f\n", mag); + //scal_cpu(recon.w*recon.h*recon.c, 600/mag, recon.data, 1); + + constrain_image(recon); + free_image(delta); + } +} + +/* +void run_lsd(int argc, char **argv) +{ + srand(0); + if(argc < 3){ + fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [options! (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[2]; + char *weights = argv[3]; + char *input = argv[4]; + + int norm = find_int_arg(argc, argv, "-norm", 1); + int rounds = find_int_arg(argc, argv, "-rounds", 1); + int iters = find_int_arg(argc, argv, "-iters", 10); + float rate = find_float_arg(argc, argv, "-rate", .04); + float momentum = find_float_arg(argc, argv, "-momentum", .9); + float lambda = find_float_arg(argc, argv, "-lambda", .01); + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + int reconstruct = find_arg(argc, argv, "-reconstruct"); + int smooth_size = find_int_arg(argc, argv, "-smooth", 1); + + network net = parse_network_cfg(cfg); + load_weights(&net, weights); + char *cfgbase = basecfg(cfg); + char *imbase = basecfg(input); + + set_batch_network(&net, 1); + image im = load_image_color(input, 0, 0); + + float *features = 0; + image update; + if (reconstruct){ + im = letterbox_image(im, net->w, net->h); + + int zz = 0; + network_predict(net, im.data); + image out_im = get_network_image(net); + image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz); + //flip_image(crop); + image f_im = resize_image(crop, out_im.w, out_im.h); + free_image(crop); + printf("%d features\n", out_im.w*out_im.h*out_im.c); + + + im = resize_image(im, im.w, im.h); + f_im = resize_image(f_im, f_im.w, f_im.h); + features = f_im.data; + + int i; + for(i = 0; i < 14*14*512; ++i){ + features[i] += rand_uniform(-.19, .19); + } + + free_image(im); + im = make_random_image(im.w, im.h, im.c); + update = make_image(im.w, im.h, im.c); + + } + + int e; + int n; + for(e = 0; e < rounds; ++e){ + fprintf(stderr, "Iteration: "); + fflush(stderr); + for(n = 0; n < iters; ++n){ + fprintf(stderr, "%d, ", n); + fflush(stderr); + if(reconstruct){ + reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1); + //if ((n+1)%30 == 0) rate *= .5; + show_image(im, "reconstruction"); +#ifdef OPENCV + cvWaitKey(10); +#endif + }else{ + int layer = max_layer + rand()%range - range/2; + int octave = rand()%octaves; + optimize_picture(&net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); + } + } + fprintf(stderr, "done\n"); + char buff[256]; + if (prefix){ + sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e); + }else{ + sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e); + } + printf("%d %s\n", e, buff); + save_image(im, buff); + //show_image(im, buff); + //cvWaitKey(0); + + if(rotate){ + image rot = rotate_image(im, rotate); + free_image(im); + im = rot; + } + image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom); + image resized = resize_image(crop, im.w, im.h); + free_image(im); + free_image(crop); + im = resized; + } +} +*/ + +void run_nightmare(int argc, char **argv) +{ + srand(0); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [layer] [options! (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[2]; + char *weights = argv[3]; + char *input = argv[4]; + int max_layer = atoi(argv[5]); + + int range = find_int_arg(argc, argv, "-range", 1); + int norm = find_int_arg(argc, argv, "-norm", 1); + int rounds = find_int_arg(argc, argv, "-rounds", 1); + int iters = find_int_arg(argc, argv, "-iters", 10); + int octaves = find_int_arg(argc, argv, "-octaves", 4); + float zoom = find_float_arg(argc, argv, "-zoom", 1.); + float rate = find_float_arg(argc, argv, "-rate", .04); + float thresh = find_float_arg(argc, argv, "-thresh", 1.); + float rotate = find_float_arg(argc, argv, "-rotate", 0); + float momentum = find_float_arg(argc, argv, "-momentum", .9); + float lambda = find_float_arg(argc, argv, "-lambda", .01); + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + int reconstruct = find_arg(argc, argv, "-reconstruct"); + int smooth_size = find_int_arg(argc, argv, "-smooth", 1); + + network *net = load_network(cfg, weights, 0); + char *cfgbase = basecfg(cfg); + char *imbase = basecfg(input); + + set_batch_network(net, 1); + image im = load_image_color(input, 0, 0); + if(0){ + float scale = 1; + if(im.w > 512 || im.h > 512){ + if(im.w > im.h) scale = 512.0/im.w; + else scale = 512.0/im.h; + } + image resized = resize_image(im, scale*im.w, scale*im.h); + free_image(im); + im = resized; + } + //im = letterbox_image(im, net->w, net->h); + + float *features = 0; + image update; + if (reconstruct){ + net->n = max_layer; + im = letterbox_image(im, net->w, net->h); + //resize_network(&net, im.w, im.h); + + network_predict(net, im.data); + if(net->layers[net->n-1].type == REGION){ + printf("region!\n"); + zero_objectness(net->layers[net->n-1]); + } + image out_im = copy_image(get_network_image(net)); + /* + image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz); + //flip_image(crop); + image f_im = resize_image(crop, out_im.w, out_im.h); + free_image(crop); + */ + printf("%d features\n", out_im.w*out_im.h*out_im.c); + + features = out_im.data; + + /* + int i; + for(i = 0; i < 14*14*512; ++i){ + //features[i] += rand_uniform(-.19, .19); + } + free_image(im); + im = make_random_image(im.w, im.h, im.c); + */ + update = make_image(im.w, im.h, im.c); + } + + int e; + int n; + for(e = 0; e < rounds; ++e){ + fprintf(stderr, "Iteration: "); + fflush(stderr); + for(n = 0; n < iters; ++n){ + fprintf(stderr, "%d, ", n); + fflush(stderr); + if(reconstruct){ + reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1); + //if ((n+1)%30 == 0) rate *= .5; + show_image(im, "reconstruction", 10); + }else{ + int layer = max_layer + rand()%range - range/2; + int octave = rand()%octaves; + optimize_picture(net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); + } + } + fprintf(stderr, "done\n"); + if(0){ + image g = grayscale_image(im); + free_image(im); + im = g; + } + char buff[256]; + if (prefix){ + sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e); + }else{ + sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e); + } + printf("%d %s\n", e, buff); + save_image(im, buff); + //show_image(im, buff, 0); + + if(rotate){ + image rot = rotate_image(im, rotate); + free_image(im); + im = rot; + } + image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom); + image resized = resize_image(crop, im.w, im.h); + free_image(im); + free_image(crop); + im = resized; + } +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/regressor.c b/workloads/realworld/uvm_prefetch/darknet/examples/regressor.c new file mode 100644 index 0000000000000000000000000000000000000000..20cec0fad9f0a2ccb2c46a30d0a01793119b43ce --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/regressor.c @@ -0,0 +1,240 @@ +#include "darknet.h" +#include +#include + +void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + int classes = option_find_int(options, "classes", 1); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + clock_t time; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.classes = classes; + + args.min = net->min_ratio*net->w; + args.max = net->max_ratio*net->w; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = REGRESSION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_regressor(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + free_image(im); + free_image(sized); + if (filename) break; + } +} + + +void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Regressor Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + list *options = read_data_cfg(datacfg); + int classes = option_find_int(options, "classes", 1); + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + void * cap = open_video_stream(filename, cam_index, 0,0,0); + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image crop = center_crop_image(in, net->w, net->h); + grayscale_image_3c(crop); + + float *predictions = network_predict(net, crop.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + int i; + for(i = 0; i < classes; ++i){ + printf("%s: %f\n", names[i], predictions[i]); + } + + show_image(crop, "Regressor", 10); + free_image(in); + free_image(crop); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_regressor(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_regressor(data, cfg, weights); + else if(0==strcmp(argv[2], "train")) train_regressor(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "demo")) demo_regressor(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/rnn.c b/workloads/realworld/uvm_prefetch/darknet/examples/rnn.c new file mode 100644 index 0000000000000000000000000000000000000000..5d49eaae7070eb1dc9a87b5627b7ec6f7cb09e46 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/rnn.c @@ -0,0 +1,542 @@ +#include "darknet.h" + +#include + +typedef struct { + float *x; + float *y; +} float_pair; + +unsigned char **load_files(char *filename, int *n) +{ + list *paths = get_paths(filename); + *n = paths->size; + unsigned char **contents = calloc(*n, sizeof(char *)); + int i; + node *x = paths->front; + for(i = 0; i < *n; ++i){ + contents[i] = read_file((char *)x->val); + x = x->next; + } + return contents; +} + +int *read_tokenized_data(char *filename, size_t *read) +{ + size_t size = 512; + size_t count = 0; + FILE *fp = fopen(filename, "r"); + int *d = calloc(size, sizeof(int)); + int n, one; + one = fscanf(fp, "%d", &n); + while(one == 1){ + ++count; + if(count > size){ + size = size*2; + d = realloc(d, size*sizeof(int)); + } + d[count-1] = n; + one = fscanf(fp, "%d", &n); + } + fclose(fp); + d = realloc(d, count*sizeof(int)); + *read = count; + return d; +} + +char **read_tokens(char *filename, size_t *read) +{ + size_t size = 512; + size_t count = 0; + FILE *fp = fopen(filename, "r"); + char **d = calloc(size, sizeof(char *)); + char *line; + while((line=fgetl(fp)) != 0){ + ++count; + if(count > size){ + size = size*2; + d = realloc(d, size*sizeof(char *)); + } + if(0==strcmp(line, "")) line = "\n"; + d[count-1] = line; + } + fclose(fp); + d = realloc(d, count*sizeof(char *)); + *read = count; + return d; +} + + +float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size_t len, int batch, int steps) +{ + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + int i,j; + for(i = 0; i < batch; ++i){ + for(j = 0; j < steps; ++j){ + int curr = tokens[(offsets[i])%len]; + int next = tokens[(offsets[i] + 1)%len]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + offsets[i] = (offsets[i] + 1) % len; + + if(curr >= characters || curr < 0 || next >= characters || next < 0){ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +float_pair get_seq2seq_data(char **source, char **dest, int n, int characters, size_t len, int batch, int steps) +{ + int i,j; + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + for(i = 0; i < batch; ++i){ + int index = rand()%n; + //int slen = strlen(source[index]); + //int dlen = strlen(dest[index]); + for(j = 0; j < steps; ++j){ + unsigned char curr = source[index][j]; + unsigned char next = dest[index][j]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + if(curr > 255 || curr <= 0 || next > 255 || next <= 0){ + /*text[(index+j+2)%len] = 0; + printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]); + printf("%s", text+index); + */ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +float_pair get_rnn_data(unsigned char *text, size_t *offsets, int characters, size_t len, int batch, int steps) +{ + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + int i,j; + for(i = 0; i < batch; ++i){ + for(j = 0; j < steps; ++j){ + unsigned char curr = text[(offsets[i])%len]; + unsigned char next = text[(offsets[i] + 1)%len]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + offsets[i] = (offsets[i] + 1) % len; + + if(curr > 255 || curr <= 0 || next > 255 || next <= 0){ + /*text[(index+j+2)%len] = 0; + printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]); + printf("%s", text+index); + */ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear, int tokenized) +{ + srand(time(0)); + unsigned char *text = 0; + int *tokens = 0; + size_t size; + if(tokenized){ + tokens = read_tokenized_data(filename, &size); + } else { + text = read_file(filename); + size = strlen((const char*)text); + } + + char *backup_directory = "/home/pjreddie/backup/"; + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, clear); + + int inputs = net->inputs; + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g, Inputs: %d %d %d\n", net->learning_rate, net->momentum, net->decay, inputs, net->batch, net->time_steps); + int batch = net->batch; + int steps = net->time_steps; + if(clear) *net->seen = 0; + int i = (*net->seen)/net->batch; + + int streams = batch/steps; + size_t *offsets = calloc(streams, sizeof(size_t)); + int j; + for(j = 0; j < streams; ++j){ + offsets[j] = rand_size_t()%size; + } + + clock_t time; + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + float_pair p; + if(tokenized){ + p = get_rnn_token_data(tokens, offsets, inputs, size, streams, steps); + }else{ + p = get_rnn_data(text, offsets, inputs, size, streams, steps); + } + + copy_cpu(net->inputs*net->batch, p.x, 1, net->input, 1); + copy_cpu(net->truths*net->batch, p.y, 1, net->truth, 1); + float loss = train_network_datum(net) / (batch); + free(p.x); + free(p.y); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + size_t chars = get_current_batch(net)*batch; + fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds, %f epochs\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), (float) chars/size); + + for(j = 0; j < streams; ++j){ + //printf("%d\n", j); + if(rand()%64 == 0){ + //fprintf(stderr, "Reset\n"); + offsets[j] = rand_size_t()%size; + reset_network_state(net, j); + } + } + + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void print_symbol(int n, char **tokens){ + if(tokens){ + printf("%s ", tokens[n]); + } else { + printf("%c", n); + } +} + +void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + + /* + fill_cpu(inputs, 0, input, 1); + for(i = 0; i < 10; ++i){ + network_predict(net, input); + } + fill_cpu(inputs, 0, input, 1); + */ + + for(i = 0; i < len-1; ++i){ + c = seed[i]; + input[c] = 1; + network_predict(net, input); + input[c] = 0; + print_symbol(c, tokens); + } + if(len) c = seed[len-1]; + print_symbol(c, tokens); + for(i = 0; i < num; ++i){ + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + for(j = 32; j < 127; ++j){ + //printf("%d %c %f\n",j, j, out[j]); + } + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + c = sample_array(out, inputs); + print_symbol(c, tokens); + } + printf("\n"); +} + +void test_tactic_rnn_multi(char *cfgfile, char *weightfile, int num, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + float *input = calloc(inputs, sizeof(float)); + float *out = 0; + + while(1){ + reset_network_state(net, 0); + while((c = getc(stdin)) != EOF && c != 0){ + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + for(i = 0; i < num; ++i){ + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + int next = sample_array(out, inputs); + if(c == '.' && next == '\n') break; + c = next; + print_symbol(c, tokens); + + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + printf("\n"); + } +} + +void test_tactic_rnn(char *cfgfile, char *weightfile, int num, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + float *input = calloc(inputs, sizeof(float)); + float *out = 0; + + while((c = getc(stdin)) != EOF){ + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + for(i = 0; i < num; ++i){ + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + int next = sample_array(out, inputs); + if(c == '.' && next == '\n') break; + c = next; + print_symbol(c, tokens); + + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + printf("\n"); +} + +void valid_tactic_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int count = 0; + int words = 1; + int c; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + for(i = 0; i < len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + float sum = 0; + c = getc(stdin); + float log2 = log(2); + int in = 0; + while(c != EOF){ + int next = getc(stdin); + if(next == EOF) break; + if(next < 0 || next >= 255) error("Out of range character"); + + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + + if(c == '.' && next == '\n') in = 0; + if(!in) { + if(c == '>' && next == '>'){ + in = 1; + ++words; + } + c = next; + continue; + } + ++count; + sum += log(out[next])/log2; + c = next; + printf("%d %d Perplexity: %4.4f Word Perplexity: %4.4f\n", count, words, pow(2, -sum/count), pow(2, -sum/words)); + } +} + +void valid_char_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int count = 0; + int words = 1; + int c; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + for(i = 0; i < len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + float sum = 0; + c = getc(stdin); + float log2 = log(2); + while(c != EOF){ + int next = getc(stdin); + if(next == EOF) break; + if(next < 0 || next >= 255) error("Out of range character"); + ++count; + if(next == ' ' || next == '\n' || next == '\t') ++words; + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + sum += log(out[next])/log2; + c = next; + printf("%d BPC: %4.4f Perplexity: %4.4f Word Perplexity: %4.4f\n", count, -sum/count, pow(2, -sum/count), pow(2, -sum/words)); + } +} + +void vec_char_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int c; + int seed_len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + char *line; + while((line=fgetl(stdin)) != 0){ + reset_network_state(net, 0); + for(i = 0; i < seed_len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + strip(line); + int str_len = strlen(line); + for(i = 0; i < str_len; ++i){ + c = line[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + c = ' '; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + + layer l = net->layers[0]; + #ifdef GPU + cuda_pull_array(l.output_gpu, l.output, l.outputs); + #endif + printf("%s", line); + for(i = 0; i < l.outputs; ++i){ + printf(",%g", l.output[i]); + } + printf("\n"); + } +} + +void run_char_rnn(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + char *filename = find_char_arg(argc, argv, "-file", "data/shakespeare.txt"); + char *seed = find_char_arg(argc, argv, "-seed", "\n\n"); + int len = find_int_arg(argc, argv, "-len", 1000); + float temp = find_float_arg(argc, argv, "-temp", .7); + int rseed = find_int_arg(argc, argv, "-srand", time(0)); + int clear = find_arg(argc, argv, "-clear"); + int tokenized = find_arg(argc, argv, "-tokenized"); + char *tokens = find_char_arg(argc, argv, "-tokens", 0); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_char_rnn(cfg, weights, filename, clear, tokenized); + else if(0==strcmp(argv[2], "valid")) valid_char_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "validtactic")) valid_tactic_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "vec")) vec_char_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "generate")) test_char_rnn(cfg, weights, len, seed, temp, rseed, tokens); + else if(0==strcmp(argv[2], "generatetactic")) test_tactic_rnn(cfg, weights, len, temp, rseed, tokens); +} diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/rnn_vid.c b/workloads/realworld/uvm_prefetch/darknet/examples/rnn_vid.c new file mode 100644 index 0000000000000000000000000000000000000000..e88792352311438d0fcb25bb7befd0677f70bae5 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/rnn_vid.c @@ -0,0 +1,208 @@ +#include "darknet.h" + +#ifdef OPENCV +image get_image_from_stream(CvCapture *cap); +image ipl_to_image(IplImage* src); + +void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters); + + +typedef struct { + float *x; + float *y; +} float_pair; + +float_pair get_rnn_vid_data(network net, char **files, int n, int batch, int steps) +{ + int b; + assert(net.batch == steps + 1); + image out_im = get_network_image(net); + int output_size = out_im.w*out_im.h*out_im.c; + printf("%d %d %d\n", out_im.w, out_im.h, out_im.c); + float *feats = calloc(net.batch*batch*output_size, sizeof(float)); + for(b = 0; b < batch; ++b){ + int input_size = net.w*net.h*net.c; + float *input = calloc(input_size*net.batch, sizeof(float)); + char *filename = files[rand()%n]; + CvCapture *cap = cvCaptureFromFile(filename); + int frames = cvGetCaptureProperty(cap, CV_CAP_PROP_FRAME_COUNT); + int index = rand() % (frames - steps - 2); + if (frames < (steps + 4)){ + --b; + free(input); + continue; + } + + printf("frames: %d, index: %d\n", frames, index); + cvSetCaptureProperty(cap, CV_CAP_PROP_POS_FRAMES, index); + + int i; + for(i = 0; i < net.batch; ++i){ + IplImage* src = cvQueryFrame(cap); + image im = ipl_to_image(src); + rgbgr_image(im); + image re = resize_image(im, net.w, net.h); + //show_image(re, "loaded"); + //cvWaitKey(10); + memcpy(input + i*input_size, re.data, input_size*sizeof(float)); + free_image(im); + free_image(re); + } + float *output = network_predict(net, input); + + free(input); + + for(i = 0; i < net.batch; ++i){ + memcpy(feats + (b + i*batch)*output_size, output + i*output_size, output_size*sizeof(float)); + } + + cvReleaseCapture(&cap); + } + + //printf("%d %d %d\n", out_im.w, out_im.h, out_im.c); + float_pair p = {0}; + p.x = feats; + p.y = feats + output_size*batch; //+ out_im.w*out_im.h*out_im.c; + + return p; +} + + +void train_vid_rnn(char *cfgfile, char *weightfile) +{ + char *train_videos = "data/vid/train.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + + list *plist = get_paths(train_videos); + int N = plist->size; + char **paths = (char **)list_to_array(plist); + clock_t time; + int steps = net.time_steps; + int batch = net.batch / net.time_steps; + + network extractor = parse_network_cfg("cfg/extractor.cfg"); + load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv"); + + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + float_pair p = get_rnn_vid_data(extractor, paths, N, batch, steps); + + copy_cpu(net.inputs*net.batch, p.x, 1, net.input, 1); + copy_cpu(net.truths*net.batch, p.y, 1, net.truth, 1); + float loss = train_network_datum(net) / (net.batch); + + + free(p.x); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time)); + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%10==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + + +image save_reconstruction(network net, image *init, float *feat, char *name, int i) +{ + image recon; + if (init) { + recon = copy_image(*init); + } else { + recon = make_random_image(net.w, net.h, 3); + } + + image update = make_image(net.w, net.h, 3); + reconstruct_picture(net, feat, recon, update, .01, .9, .1, 2, 50); + char buff[256]; + sprintf(buff, "%s%d", name, i); + save_image(recon, buff); + free_image(update); + return recon; +} + +void generate_vid_rnn(char *cfgfile, char *weightfile) +{ + network extractor = parse_network_cfg("cfg/extractor.recon.cfg"); + load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv"); + + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&extractor, 1); + set_batch_network(&net, 1); + + int i; + CvCapture *cap = cvCaptureFromFile("/extra/vid/ILSVRC2015/Data/VID/snippets/val/ILSVRC2015_val_00007030.mp4"); + float *feat; + float *next; + image last; + for(i = 0; i < 25; ++i){ + image im = get_image_from_stream(cap); + image re = resize_image(im, extractor.w, extractor.h); + feat = network_predict(extractor, re.data); + if(i > 0){ + printf("%f %f\n", mean_array(feat, 14*14*512), variance_array(feat, 14*14*512)); + printf("%f %f\n", mean_array(next, 14*14*512), variance_array(next, 14*14*512)); + printf("%f\n", mse_array(feat, 14*14*512)); + axpy_cpu(14*14*512, -1, feat, 1, next, 1); + printf("%f\n", mse_array(next, 14*14*512)); + } + next = network_predict(net, feat); + + free_image(im); + + free_image(save_reconstruction(extractor, 0, feat, "feat", i)); + free_image(save_reconstruction(extractor, 0, next, "next", i)); + if (i==24) last = copy_image(re); + free_image(re); + } + for(i = 0; i < 30; ++i){ + next = network_predict(net, next); + image new = save_reconstruction(extractor, &last, next, "new", i); + free_image(last); + last = new; + } +} + +void run_vid_rnn(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + //char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_vid_rnn(cfg, weights); + else if(0==strcmp(argv[2], "generate")) generate_vid_rnn(cfg, weights); +} +#else +void run_vid_rnn(int argc, char **argv){} +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/segmenter.c b/workloads/realworld/uvm_prefetch/darknet/examples/segmenter.c new file mode 100644 index 0000000000000000000000000000000000000000..2e7cea0b730754b74a125bcd865aa12f0bdd3be0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/segmenter.c @@ -0,0 +1,255 @@ +#include "darknet.h" +#include +#include + +void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + image pred = get_network_image(net); + + int div = net->w/pred.w; + assert(pred.w * div == net->w); + assert(pred.h * div == net->h); + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.scale = div; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.classes = 80; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = SEGMENTATION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(display){ + image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]); + image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]); + image mask = mask_to_rgb(tr); + image prmask = mask_to_rgb(pred); + show_image(im, "input", 1); + show_image(prmask, "pred", 1); + show_image(mask, "truth", 100); + free_image(mask); + free_image(prmask); + } + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_segmenter(char *datafile, char *cfg, char *weights, char *filename) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + show_image(sized, "orig", 1); + show_image(prmask, "pred", 0); + free_image(im); + free_image(sized); + free_image(prmask); + if (filename) break; + } +} + + +void demo_segmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Classifier Demo\n"); + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = letterbox_image(in, net->w, net->h); + + network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + show_image(prmask, "Segmenter", 10); + + free_image(in_s); + free_image(in); + free_image(prmask); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_segmenter(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_segmenter(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_segmenter(data, cfg, weights, gpus, ngpus, clear, display); + else if(0==strcmp(argv[2], "demo")) demo_segmenter(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/super.c b/workloads/realworld/uvm_prefetch/darknet/examples/super.c new file mode 100644 index 0000000000000000000000000000000000000000..d34406b1f2ce70cd36eecb8298bf1ca3e736f01b --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/super.c @@ -0,0 +1,120 @@ +#include "darknet.h" + +void train_super(char *cfgfile, char *weightfile, int clear) +{ + char *train_images = "/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, clear); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.scale = 4; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = SUPER_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void test_super(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + resize_network(net, im.w, im.h); + printf("%d %d\n", im.w, im.h); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image(net); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 0); + + free_image(im); + if (filename) break; + } +} + + +void run_super(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + int clear = find_arg(argc, argv, "-clear"); + if(0==strcmp(argv[2], "train")) train_super(cfg, weights, clear); + else if(0==strcmp(argv[2], "test")) test_super(cfg, weights, filename); + /* + else if(0==strcmp(argv[2], "valid")) validate_super(cfg, weights); + */ +} diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/swag.c b/workloads/realworld/uvm_prefetch/darknet/examples/swag.c new file mode 100644 index 0000000000000000000000000000000000000000..c22d7855c46a975ecd1e94a60f9b7059bc288fee --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/swag.c @@ -0,0 +1,83 @@ +#include "darknet.h" +#include + +void train_swag(char *cfgfile, char *weightfile) +{ + char *train_images = "data/voc.0712.trainval"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + data train, buffer; + + layer l = net.layers[net.n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || i == 600){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void run_swag(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_swag(cfg, weights); +} diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/tag.c b/workloads/realworld/uvm_prefetch/darknet/examples/tag.c new file mode 100644 index 0000000000000000000000000000000000000000..4caf8cba18f39f62deb54ea913fd40c194b3e33c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/tag.c @@ -0,0 +1,140 @@ +#include "darknet.h" + +void train_tag(char *cfgfile, char *weightfile, int clear) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, clear); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + list *plist = get_paths("/home/pjreddie/tag/train.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + + args.min = net->w; + args.max = net->max_crop; + args.size = net->w; + + args.paths = paths; + args.classes = net->outputs; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = TAG_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + fprintf(stderr, "%d classes\n", net->outputs); + + load_thread = load_data_in_thread(args); + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + pthread_join(load_thread, 0); + free_data(buffer); + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void test_tag(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + int i = 0; + char **names = get_labels("data/tags.txt"); + clock_t time; + int indexes[10]; + char buff[256]; + char *input = buff; + int size = net->w; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = resize_min(im, size); + resize_network(net, r.w, r.h); + printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + float *predictions = network_predict(net, X); + top_predictions(net, 10, indexes); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < 10; ++i){ + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void run_tag(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int clear = find_arg(argc, argv, "-clear"); + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_tag(cfg, weights, clear); + else if(0==strcmp(argv[2], "test")) test_tag(cfg, weights, filename); +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/voxel.c b/workloads/realworld/uvm_prefetch/darknet/examples/voxel.c new file mode 100644 index 0000000000000000000000000000000000000000..01ea9bb98987590227758364bbfff50996cf9a2d --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/voxel.c @@ -0,0 +1,161 @@ +#include "darknet.h" + +void extract_voxel(char *lfile, char *rfile, char *prefix) +{ +#ifdef OPENCV + int w = 1920; + int h = 1080; + int shift = 0; + int count = 0; + CvCapture *lcap = cvCaptureFromFile(lfile); + CvCapture *rcap = cvCaptureFromFile(rfile); + while(1){ + image l = get_image_from_stream(lcap); + image r = get_image_from_stream(rcap); + if(!l.w || !r.w) break; + if(count%100 == 0) { + shift = best_3d_shift_r(l, r, -l.h/100, l.h/100); + printf("%d\n", shift); + } + image ls = crop_image(l, (l.w - w)/2, (l.h - h)/2, w, h); + image rs = crop_image(r, 105 + (r.w - w)/2, (r.h - h)/2 + shift, w, h); + char buff[256]; + sprintf(buff, "%s_%05d_l", prefix, count); + save_image(ls, buff); + sprintf(buff, "%s_%05d_r", prefix, count); + save_image(rs, buff); + free_image(l); + free_image(r); + free_image(ls); + free_image(rs); + ++count; + } + +#else + printf("need OpenCV for extraction\n"); +#endif +} + +void train_voxel(char *cfgfile, char *weightfile) +{ + char *train_images = "/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.scale = 4; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = SUPER_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void test_voxel(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + resize_network(&net, im.w, im.h); + printf("%d %d\n", im.w, im.h); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image(net); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + + free_image(im); + if (filename) break; + } +} + + +void run_voxel(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_voxel(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_voxel(cfg, weights, filename); + else if(0==strcmp(argv[2], "extract")) extract_voxel(argv[3], argv[4], argv[5]); + /* + else if(0==strcmp(argv[2], "valid")) validate_voxel(cfg, weights); + */ +} diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/writing.c b/workloads/realworld/uvm_prefetch/darknet/examples/writing.c new file mode 100644 index 0000000000000000000000000000000000000000..1b6ff83b5838b654e0fd1b6664156daf6d7a889b --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/writing.c @@ -0,0 +1,144 @@ +#include "darknet.h" + +void train_writing(char *cfgfile, char *weightfile) +{ + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + list *plist = get_paths("figures.list"); + char **paths = (char **)list_to_array(plist); + clock_t time; + int N = plist->size; + printf("N: %d\n", N); + image out = get_network_image(net); + + data train, buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.out_w = out.w; + args.out_h = out.h; + args.paths = paths; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = WRITING_DATA; + + pthread_t load_thread = load_data_in_thread(args); + int epoch = (*net.seen)/N; + while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + printf("Loaded %lf seconds\n",sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + + /* + image pred = float_to_image(64, 64, 1, out); + print_image(pred); + */ + + /* + image im = float_to_image(256, 256, 3, train.X.vals[0]); + image lab = float_to_image(64, 64, 1, train.y.vals[0]); + image pred = float_to_image(64, 64, 1, out); + show_image(im, "image"); + show_image(lab, "label"); + print_image(lab); + show_image(pred, "pred"); + cvWaitKey(0); + */ + + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + free_data(train); + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_batch_%ld.weights", backup_directory, base, get_current_batch(net)); + save_weights(net, buff); + } + if(*net.seen/N > epoch){ + epoch = *net.seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + } +} + +void test_writing(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + + image im = load_image_color(input, 0, 0); + resize_network(&net, im.w, im.h); + printf("%d %d %d\n", im.h, im.w, im.c); + float *X = im.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + image pred = get_network_image(net); + + image upsampled = resize_image(pred, im.w, im.h); + image thresh = threshold_image(upsampled, .5); + pred = thresh; + + show_image(pred, "prediction"); + show_image(im, "orig"); +#ifdef OPENCV + cvWaitKey(0); + cvDestroyAllWindows(); +#endif + + free_image(upsampled); + free_image(thresh); + free_image(im); + if (filename) break; + } +} + +void run_writing(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_writing(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_writing(cfg, weights, filename); +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/examples/yolo.c b/workloads/realworld/uvm_prefetch/darknet/examples/yolo.c new file mode 100644 index 0000000000000000000000000000000000000000..4ddb69a3e53b2123ccb89026645a66c044047faa --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/examples/yolo.c @@ -0,0 +1,327 @@ +#include "darknet.h" + +char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; + +void train_yolo(char *cfgfile, char *weightfile) +{ + char *train_images = "/data/voc/train.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + layer l = net->layers[net->n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || (i < 1000 && i%100 == 0)){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void print_yolo_detections(FILE **fps, char *id, int total, int classes, int w, int h, detection *dets) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], + xmin, ymin, xmax, ymax); + } + } +} + +void validate_yolo(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + //list *plist = get_paths("data/voc.2007.test"); + list *plist = get_paths("/home/pjreddie/data/voc/2007_test.txt"); + //list *plist = get_paths("data/voc.2012.test"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + int j; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + int t; + + float thresh = .001; + int nms = 1; + float iou_thresh = .5; + + int nthreads = 8; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.type = IMAGE_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + time_t start = time(0); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh); + print_yolo_detections(fps, id, l.side*l.side*l.n, classes, w, h, dets); + free_detections(dets, nboxes); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); +} + +void validate_yolo_recall(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + list *plist = get_paths("data/voc.2007.test"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + int side = l.side; + + int j, k; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + + float thresh = .001; + float iou_thresh = .5; + float nms = 0; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + + int nboxes = 0; + detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, side*side*l.n, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < side*side*l.n; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < side*side*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free_detections(dets, nboxes); + free(id); + free_image(orig); + free_image(sized); + } +} + +void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) +{ + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + layer l = net->layers[net->n-1]; + set_batch_network(net, 1); + srand(2222222); + clock_t time; + char buff[256]; + char *input = buff; + float nms=.4; + while(1){ + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = resize_image(im, net->w, net->h); + float *X = sized.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + + int nboxes = 0; + detection *dets = get_network_boxes(net, 1, 1, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms); + + draw_detections(im, dets, l.side*l.side*l.n, thresh, voc_names, alphabet, 20); + save_image(im, "predictions"); + show_image(im, "predictions", 0); + free_detections(dets, nboxes); + free_image(im); + free_image(sized); + if (filename) break; + } +} + +void run_yolo(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .2); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int avg = find_int_arg(argc, argv, "-avg", 1); + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "test")) test_yolo(cfg, weights, filename, thresh); + else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights); + else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix, avg, .5, 0,0,0,0); +} diff --git a/workloads/realworld/uvm_prefetch/darknet/include/darknet.h b/workloads/realworld/uvm_prefetch/darknet/include/darknet.h new file mode 100644 index 0000000000000000000000000000000000000000..7be8225e2d39f079ca0a15da6980b42f8966af40 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/include/darknet.h @@ -0,0 +1,810 @@ +#ifndef DARKNET_API +#define DARKNET_API +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef GPU + #define GPU_DEVICE 7 + #define BLOCK 512 + + #include "cuda_runtime.h" + #include "curand.h" + #include "cublas_v2.h" + + #ifdef CUDNN + #include "cudnn.h" + #endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define SECRET_NUM -1234 +extern int gpu_index; + +typedef struct{ + int classes; + char **names; +} metadata; + +metadata get_metadata(char *file); + +typedef struct{ + int *leaf; + int n; + int *parent; + int *child; + int *group; + char **name; + + int groups; + int *group_size; + int *group_offset; +} tree; +tree *read_tree(char *filename); + +typedef enum{ + LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU +} ACTIVATION; + +typedef enum{ + PNG, BMP, TGA, JPG +} IMTYPE; + +typedef enum{ + MULT, ADD, SUB, DIV +} BINARY_ACTIVATION; + +typedef enum { + CONVOLUTIONAL, + DECONVOLUTIONAL, + CONNECTED, + MAXPOOL, + SOFTMAX, + DETECTION, + DROPOUT, + CROP, + ROUTE, + COST, + NORMALIZATION, + AVGPOOL, + LOCAL, + SHORTCUT, + ACTIVE, + RNN, + GRU, + LSTM, + CRNN, + BATCHNORM, + NETWORK, + XNOR, + REGION, + YOLO, + ISEG, + REORG, + UPSAMPLE, + LOGXENT, + L2NORM, + BLANK +} LAYER_TYPE; + +typedef enum{ + SSE, MASKED, L1, SEG, SMOOTH,WGAN +} COST_TYPE; + +typedef struct{ + int batch; + float learning_rate; + float momentum; + float decay; + int adam; + float B1; + float B2; + float eps; + int t; +} update_args; + +struct network; +typedef struct network network; + +struct layer; +typedef struct layer layer; + +struct layer{ + LAYER_TYPE type; + ACTIVATION activation; + COST_TYPE cost_type; + void (*forward) (struct layer, struct network); + void (*backward) (struct layer, struct network); + void (*update) (struct layer, update_args); + void (*forward_gpu) (struct layer, struct network); + void (*backward_gpu) (struct layer, struct network); + void (*update_gpu) (struct layer, update_args); + int batch_normalize; + int shortcut; + int batch; + int forced; + int flipped; + int inputs; + int outputs; + int nweights; + int nbiases; + int extra; + int truths; + int h,w,c; + int out_h, out_w, out_c; + int n; + int max_boxes; + int groups; + int size; + int side; + int stride; + int reverse; + int flatten; + int spatial; + int pad; + int sqrt; + int flip; + int index; + int binary; + int xnor; + int steps; + int hidden; + int truth; + float smooth; + float dot; + float angle; + float jitter; + float saturation; + float exposure; + float shift; + float ratio; + float learning_rate_scale; + float clip; + int noloss; + int softmax; + int classes; + int coords; + int background; + int rescore; + int objectness; + int joint; + int noadjust; + int reorg; + int log; + int tanh; + int *mask; + int total; + + float alpha; + float beta; + float kappa; + + float coord_scale; + float object_scale; + float noobject_scale; + float mask_scale; + float class_scale; + int bias_match; + int random; + float ignore_thresh; + float truth_thresh; + float thresh; + float focus; + int classfix; + int absolute; + + int onlyforward; + int stopbackward; + int dontload; + int dontsave; + int dontloadscales; + int numload; + + float temperature; + float probability; + float scale; + + char * cweights; + int * indexes; + int * input_layers; + int * input_sizes; + int * map; + int * counts; + float ** sums; + float * rand; + float * cost; + float * state; + float * prev_state; + float * forgot_state; + float * forgot_delta; + float * state_delta; + float * combine_cpu; + float * combine_delta_cpu; + + float * concat; + float * concat_delta; + + float * binary_weights; + + float * biases; + float * bias_updates; + + float * scales; + float * scale_updates; + + float * weights; + float * weight_updates; + + float * delta; + float * output; + float * loss; + float * squared; + float * norms; + + float * spatial_mean; + float * mean; + float * variance; + + float * mean_delta; + float * variance_delta; + + float * rolling_mean; + float * rolling_variance; + + float * x; + float * x_norm; + + float * m; + float * v; + + float * bias_m; + float * bias_v; + float * scale_m; + float * scale_v; + + + float *z_cpu; + float *r_cpu; + float *h_cpu; + float * prev_state_cpu; + + float *temp_cpu; + float *temp2_cpu; + float *temp3_cpu; + + float *dh_cpu; + float *hh_cpu; + float *prev_cell_cpu; + float *cell_cpu; + float *f_cpu; + float *i_cpu; + float *g_cpu; + float *o_cpu; + float *c_cpu; + float *dc_cpu; + + float * binary_input; + + struct layer *input_layer; + struct layer *self_layer; + struct layer *output_layer; + + struct layer *reset_layer; + struct layer *update_layer; + struct layer *state_layer; + + struct layer *input_gate_layer; + struct layer *state_gate_layer; + struct layer *input_save_layer; + struct layer *state_save_layer; + struct layer *input_state_layer; + struct layer *state_state_layer; + + struct layer *input_z_layer; + struct layer *state_z_layer; + + struct layer *input_r_layer; + struct layer *state_r_layer; + + struct layer *input_h_layer; + struct layer *state_h_layer; + + struct layer *wz; + struct layer *uz; + struct layer *wr; + struct layer *ur; + struct layer *wh; + struct layer *uh; + struct layer *uo; + struct layer *wo; + struct layer *uf; + struct layer *wf; + struct layer *ui; + struct layer *wi; + struct layer *ug; + struct layer *wg; + + tree *softmax_tree; + + size_t workspace_size; + +#ifdef GPU + int *indexes_gpu; + + float *z_gpu; + float *r_gpu; + float *h_gpu; + + float *temp_gpu; + float *temp2_gpu; + float *temp3_gpu; + + float *dh_gpu; + float *hh_gpu; + float *prev_cell_gpu; + float *cell_gpu; + float *f_gpu; + float *i_gpu; + float *g_gpu; + float *o_gpu; + float *c_gpu; + float *dc_gpu; + + float *m_gpu; + float *v_gpu; + float *bias_m_gpu; + float *scale_m_gpu; + float *bias_v_gpu; + float *scale_v_gpu; + + float * combine_gpu; + float * combine_delta_gpu; + + float * prev_state_gpu; + float * forgot_state_gpu; + float * forgot_delta_gpu; + float * state_gpu; + float * state_delta_gpu; + float * gate_gpu; + float * gate_delta_gpu; + float * save_gpu; + float * save_delta_gpu; + float * concat_gpu; + float * concat_delta_gpu; + + float * binary_input_gpu; + float * binary_weights_gpu; + + float * mean_gpu; + float * variance_gpu; + + float * rolling_mean_gpu; + float * rolling_variance_gpu; + + float * variance_delta_gpu; + float * mean_delta_gpu; + + float * x_gpu; + float * x_norm_gpu; + float * weights_gpu; + float * weight_updates_gpu; + float * weight_change_gpu; + + float * biases_gpu; + float * bias_updates_gpu; + float * bias_change_gpu; + + float * scales_gpu; + float * scale_updates_gpu; + float * scale_change_gpu; + + float * output_gpu; + float * loss_gpu; + float * delta_gpu; + float * rand_gpu; + float * squared_gpu; + float * norms_gpu; +#ifdef CUDNN + cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc; + cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc; + cudnnTensorDescriptor_t normTensorDesc; + cudnnFilterDescriptor_t weightDesc; + cudnnFilterDescriptor_t dweightDesc; + cudnnConvolutionDescriptor_t convDesc; + cudnnConvolutionFwdAlgo_t fw_algo; + cudnnConvolutionBwdDataAlgo_t bd_algo; + cudnnConvolutionBwdFilterAlgo_t bf_algo; +#endif +#endif +}; + +void free_layer(layer); + +typedef enum { + CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM +} learning_rate_policy; + +typedef struct network{ + int n; + int batch; + size_t *seen; + int *t; + float epoch; + int subdivisions; + layer *layers; + float *output; + learning_rate_policy policy; + + float learning_rate; + float momentum; + float decay; + float gamma; + float scale; + float power; + int time_steps; + int step; + int max_batches; + float *scales; + int *steps; + int num_steps; + int burn_in; + + int adam; + float B1; + float B2; + float eps; + + int inputs; + int outputs; + int truths; + int notruth; + int h, w, c; + int max_crop; + int min_crop; + float max_ratio; + float min_ratio; + int center; + float angle; + float aspect; + float exposure; + float saturation; + float hue; + int random; + + int gpu_index; + tree *hierarchy; + + float *input; + float *truth; + float *delta; + float *workspace; + int train; + int index; + float *cost; + float clip; + +#ifdef GPU + float *input_gpu; + float *truth_gpu; + float *delta_gpu; + float *output_gpu; +#endif + +} network; + +typedef struct { + int w; + int h; + float scale; + float rad; + float dx; + float dy; + float aspect; +} augment_args; + +typedef struct { + int w; + int h; + int c; + float *data; +} image; + +typedef struct{ + float x, y, w, h; +} box; + +typedef struct detection{ + box bbox; + int classes; + float *prob; + float *mask; + float objectness; + int sort_class; +} detection; + +typedef struct matrix{ + int rows, cols; + float **vals; +} matrix; + + +typedef struct{ + int w, h; + matrix X; + matrix y; + int shallow; + int *num_boxes; + box **boxes; +} data; + +typedef enum { + CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA, ISEG_DATA +} data_type; + +typedef struct load_args{ + int threads; + char **paths; + char *path; + int n; + int m; + char **labels; + int h; + int w; + int out_w; + int out_h; + int nh; + int nw; + int num_boxes; + int min, max, size; + int classes; + int background; + int scale; + int center; + int coords; + float jitter; + float angle; + float aspect; + float saturation; + float exposure; + float hue; + data *d; + image *im; + image *resized; + data_type type; + tree *hierarchy; +} load_args; + +typedef struct{ + int id; + float x,y,w,h; + float left, right, top, bottom; +} box_label; + + +network *load_network(char *cfg, char *weights, int clear); +load_args get_base_args(network *net); + +void free_data(data d); + +typedef struct node{ + void *val; + struct node *next; + struct node *prev; +} node; + +typedef struct list{ + int size; + node *front; + node *back; +} list; + +pthread_t load_data(load_args args); +list *read_data_cfg(char *filename); +list *read_cfg(char *filename); +unsigned char *read_file(char *filename); +data resize_data(data orig, int w, int h); +data *tile_data(data orig, int divs, int size); +data select_data(data *orig, int *inds); + +void forward_network(network *net); +void backward_network(network *net); +void update_network(network *net); + + +float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); +void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void copy_cpu(int N, float *X, int INCX, float *Y, int INCY); +void scal_cpu(int N, float ALPHA, float *X, int INCX); +void fill_cpu(int N, float ALPHA, float * X, int INCX); +void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); +void softmax(float *input, int n, float temp, int stride, float *output); + +int best_3d_shift_r(image a, image b, int min, int max); +#ifdef GPU +void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); +void fill_gpu(int N, float ALPHA, float * X, int INCX); +void scal_gpu(int N, float ALPHA, float * X, int INCX); +void copy_gpu(int N, float * X, int INCX, float * Y, int INCY); + +void cuda_set_device(int n); +void cuda_free(float *x_gpu); +float *cuda_make_array(float *x, size_t n); +void cuda_pull_array(float *x_gpu, float *x, size_t n); +float cuda_mag_array(float *x_gpu, size_t n); +void cuda_push_array(float *x_gpu, float *x, size_t n); + +void forward_network_gpu(network *net); +void backward_network_gpu(network *net); +void update_network_gpu(network *net); + +float train_networks(network **nets, int n, data d, int interval); +void sync_nets(network **nets, int n, int interval); +void harmless_update_network_gpu(network *net); +#endif +image get_label(image **characters, char *string, int size); +void draw_label(image a, int r, int c, image label, const float *rgb); +void save_image(image im, const char *name); +void save_image_options(image im, const char *name, IMTYPE f, int quality); +void get_next_batch(data d, int n, int offset, float *X, float *y); +void grayscale_image_3c(image im); +void normalize_image(image p); +void matrix_to_csv(matrix m); +float train_network_sgd(network *net, data d, int n); +void rgbgr_image(image im); +data copy_data(data d); +data concat_data(data d1, data d2); +data load_cifar10_data(char *filename); +float matrix_topk_accuracy(matrix truth, matrix guess, int k); +void matrix_add_matrix(matrix from, matrix to); +void scale_matrix(matrix m, float scale); +matrix csv_to_matrix(char *filename); +float *network_accuracies(network *net, data d, int n); +float train_network_datum(network *net); +image make_random_image(int w, int h, int c); + +void denormalize_connected_layer(layer l); +void denormalize_convolutional_layer(layer l); +void statistics_connected_layer(layer l); +void rescale_weights(layer l, float scale, float trans); +void rgbgr_weights(layer l); +image *get_weights(layer l); + +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, int avg, float hier_thresh, int w, int h, int fps, int fullscreen); +void get_detection_detections(layer l, int w, int h, float thresh, detection *dets); + +char *option_find_str(list *l, char *key, char *def); +int option_find_int(list *l, char *key, int def); +int option_find_int_quiet(list *l, char *key, int def); + +network *parse_network_cfg(char *filename); +void save_weights(network *net, char *filename); +void load_weights(network *net, char *filename); +void save_weights_upto(network *net, char *filename, int cutoff); +void load_weights_upto(network *net, char *filename, int start, int cutoff); + +void zero_objectness(layer l); +void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets); +int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets); +void free_network(network *net); +void set_batch_network(network *net, int b); +void set_temp_network(network *net, float t); +image load_image(char *filename, int w, int h, int c); +image load_image_color(char *filename, int w, int h); +image make_image(int w, int h, int c); +image resize_image(image im, int w, int h); +void censor_image(image im, int dx, int dy, int w, int h); +image letterbox_image(image im, int w, int h); +image crop_image(image im, int dx, int dy, int w, int h); +image center_crop_image(image im, int w, int h); +image resize_min(image im, int min); +image resize_max(image im, int max); +image threshold_image(image im, float thresh); +image mask_to_rgb(image mask); +int resize_network(network *net, int w, int h); +void free_matrix(matrix m); +void test_resize(char *filename); +int show_image(image p, const char *name, int ms); +image copy_image(image p); +void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b); +float get_current_rate(network *net); +void composite_3d(char *f1, char *f2, char *out, int delta); +data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h); +size_t get_current_batch(network *net); +void constrain_image(image im); +image get_network_image_layer(network *net, int i); +layer get_network_output_layer(network *net); +void top_predictions(network *net, int n, int *index); +void flip_image(image a); +image float_to_image(int w, int h, int c, float *data); +void ghost_image(image source, image dest, int dx, int dy); +float network_accuracy(network *net, data d); +void random_distort_image(image im, float hue, float saturation, float exposure); +void fill_image(image m, float s); +image grayscale_image(image im); +void rotate_image_cw(image im, int times); +double what_time_is_it_now(); +image rotate_image(image m, float rad); +void visualize_network(network *net); +float box_iou(box a, box b); +data load_all_cifar10(); +box_label *read_boxes(char *filename, int *n); +box float_to_box(float *f, int stride); +void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes); + +matrix network_predict_data(network *net, data test); +image **load_alphabet(); +image get_network_image(network *net); +float *network_predict(network *net, float *input); + +int network_width(network *net); +int network_height(network *net); +float *network_predict_image(network *net, image im); +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets); +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num); +void free_detections(detection *dets, int n); + +void reset_network_state(network *net, int b); + +char **get_labels(char *filename); +void do_nms_obj(detection *dets, int total, int classes, float thresh); +void do_nms_sort(detection *dets, int total, int classes, float thresh); + +matrix make_matrix(int rows, int cols); + +#ifdef OPENCV +void *open_video_stream(const char *f, int c, int w, int h, int fps); +image get_image_from_stream(void *p); +void make_window(char *name, int w, int h, int fullscreen); +#endif + +void free_image(image m); +float train_network(network *net, data d); +pthread_t load_data_in_thread(load_args args); +void load_data_blocking(load_args args); +list *get_paths(char *filename); +void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride); +void change_leaves(tree *t, char *leaf_list); + +int find_int_arg(int argc, char **argv, char *arg, int def); +float find_float_arg(int argc, char **argv, char *arg, float def); +int find_arg(int argc, char* argv[], char *arg); +char *find_char_arg(int argc, char **argv, char *arg, char *def); +char *basecfg(char *cfgfile); +void find_replace(char *str, char *orig, char *rep, char *output); +void free_ptrs(void **ptrs, int n); +char *fgetl(FILE *fp); +void strip(char *s); +float sec(clock_t clocks); +void **list_to_array(list *l); +void top_k(float *a, int n, int k, int *index); +int *read_map(char *filename); +void error(const char *s); +int max_index(float *a, int n); +int max_int_index(int *a, int n); +int sample_array(float *a, int n); +int *random_index_order(int min, int max); +void free_list(list *l); +float mse_array(float *a, int n); +float variance_array(float *a, int n); +float mag_array(float *a, int n); +void scale_array(float *a, int n, float s); +float mean_array(float *a, int n); +float sum_array(float *a, int n); +void normalize_array(float *a, int n); +int *read_intlist(char *s, int *n, int d); +size_t rand_size_t(); +float rand_normal(); +float rand_uniform(float min, float max); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/predictions.jpg b/workloads/realworld/uvm_prefetch/darknet/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c92d70d77e70e11853e9838ca90b46eb71a18ffa Binary files /dev/null and b/workloads/realworld/uvm_prefetch/darknet/predictions.jpg differ diff --git a/workloads/realworld/uvm_prefetch/darknet/python/darknet.py b/workloads/realworld/uvm_prefetch/darknet/python/darknet.py new file mode 100644 index 0000000000000000000000000000000000000000..b14d24485d86aa69f3991be79ec4f25c2b8e5a59 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/python/darknet.py @@ -0,0 +1,156 @@ +from ctypes import * +import math +import random + +def sample(probs): + s = sum(probs) + probs = [a/s for a in probs] + r = random.uniform(0, 1) + for i in range(len(probs)): + r = r - probs[i] + if r <= 0: + return i + return len(probs)-1 + +def c_array(ctype, values): + arr = (ctype*len(values))() + arr[:] = values + return arr + +class BOX(Structure): + _fields_ = [("x", c_float), + ("y", c_float), + ("w", c_float), + ("h", c_float)] + +class DETECTION(Structure): + _fields_ = [("bbox", BOX), + ("classes", c_int), + ("prob", POINTER(c_float)), + ("mask", POINTER(c_float)), + ("objectness", c_float), + ("sort_class", c_int)] + + +class IMAGE(Structure): + _fields_ = [("w", c_int), + ("h", c_int), + ("c", c_int), + ("data", POINTER(c_float))] + +class METADATA(Structure): + _fields_ = [("classes", c_int), + ("names", POINTER(c_char_p))] + + + +#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL) +lib = CDLL("libdarknet.so", RTLD_GLOBAL) +lib.network_width.argtypes = [c_void_p] +lib.network_width.restype = c_int +lib.network_height.argtypes = [c_void_p] +lib.network_height.restype = c_int + +predict = lib.network_predict +predict.argtypes = [c_void_p, POINTER(c_float)] +predict.restype = POINTER(c_float) + +set_gpu = lib.cuda_set_device +set_gpu.argtypes = [c_int] + +make_image = lib.make_image +make_image.argtypes = [c_int, c_int, c_int] +make_image.restype = IMAGE + +get_network_boxes = lib.get_network_boxes +get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)] +get_network_boxes.restype = POINTER(DETECTION) + +make_network_boxes = lib.make_network_boxes +make_network_boxes.argtypes = [c_void_p] +make_network_boxes.restype = POINTER(DETECTION) + +free_detections = lib.free_detections +free_detections.argtypes = [POINTER(DETECTION), c_int] + +free_ptrs = lib.free_ptrs +free_ptrs.argtypes = [POINTER(c_void_p), c_int] + +network_predict = lib.network_predict +network_predict.argtypes = [c_void_p, POINTER(c_float)] + +reset_rnn = lib.reset_rnn +reset_rnn.argtypes = [c_void_p] + +load_net = lib.load_network +load_net.argtypes = [c_char_p, c_char_p, c_int] +load_net.restype = c_void_p + +do_nms_obj = lib.do_nms_obj +do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +do_nms_sort = lib.do_nms_sort +do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +free_image = lib.free_image +free_image.argtypes = [IMAGE] + +letterbox_image = lib.letterbox_image +letterbox_image.argtypes = [IMAGE, c_int, c_int] +letterbox_image.restype = IMAGE + +load_meta = lib.get_metadata +lib.get_metadata.argtypes = [c_char_p] +lib.get_metadata.restype = METADATA + +load_image = lib.load_image_color +load_image.argtypes = [c_char_p, c_int, c_int] +load_image.restype = IMAGE + +rgbgr_image = lib.rgbgr_image +rgbgr_image.argtypes = [IMAGE] + +predict_image = lib.network_predict_image +predict_image.argtypes = [c_void_p, IMAGE] +predict_image.restype = POINTER(c_float) + +def classify(net, meta, im): + out = predict_image(net, im) + res = [] + for i in range(meta.classes): + res.append((meta.names[i], out[i])) + res = sorted(res, key=lambda x: -x[1]) + return res + +def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): + im = load_image(image, 0, 0) + num = c_int(0) + pnum = pointer(num) + predict_image(net, im) + dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum) + num = pnum[0] + if (nms): do_nms_obj(dets, num, meta.classes, nms); + + res = [] + for j in range(num): + for i in range(meta.classes): + if dets[j].prob[i] > 0: + b = dets[j].bbox + res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h))) + res = sorted(res, key=lambda x: -x[1]) + free_image(im) + free_detections(dets, num) + return res + +if __name__ == "__main__": + #net = load_net("cfg/densenet201.cfg", "/home/pjreddie/trained/densenet201.weights", 0) + #im = load_image("data/wolf.jpg", 0, 0) + #meta = load_meta("cfg/imagenet1k.data") + #r = classify(net, meta, im) + #print r[:10] + net = load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0) + meta = load_meta("cfg/coco.data") + r = detect(net, meta, "data/dog.jpg") + print(r) + + diff --git a/workloads/realworld/uvm_prefetch/darknet/python/proverbot.py b/workloads/realworld/uvm_prefetch/darknet/python/proverbot.py new file mode 100644 index 0000000000000000000000000000000000000000..095aae8f8bf8bbe47ea1768a6e2c948bb0ff8f85 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/python/proverbot.py @@ -0,0 +1,37 @@ +from darknet import * + +def predict_tactic(net, s): + prob = 0 + d = c_array(c_float, [0.0]*256) + tac = '' + if not len(s): + s = '\n' + for c in s[:-1]: + d[ord(c)] = 1 + pred = predict(net, d) + d[ord(c)] = 0 + c = s[-1] + while 1: + d[ord(c)] = 1 + pred = predict(net, d) + d[ord(c)] = 0 + pred = [pred[i] for i in range(256)] + ind = sample(pred) + c = chr(ind) + prob += math.log(pred[ind]) + if len(tac) and tac[-1] == '.': + break + tac = tac + c + return (tac, prob) + +def predict_tactics(net, s, n): + tacs = [] + for i in range(n): + reset_rnn(net) + tacs.append(predict_tactic(net, s)) + tacs = sorted(tacs, key=lambda x: -x[1]) + return tacs + +net = load_net("cfg/coq.test.cfg", "/home/pjreddie/backup/coq.backup", 0) +t = predict_tactics(net, "+++++\n", 10) +print t diff --git a/workloads/realworld/uvm_prefetch/darknet/resnet18/run_resnet18.sh b/workloads/realworld/uvm_prefetch/darknet/resnet18/run_resnet18.sh new file mode 100755 index 0000000000000000000000000000000000000000..10257ad02affc17f8287ea42917813fe320f5f23 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/resnet18/run_resnet18.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm_prefetch/darknet/resnet18/run_super.sh b/workloads/realworld/uvm_prefetch/darknet/resnet18/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..10257ad02affc17f8287ea42917813fe320f5f23 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/resnet18/run_super.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm_prefetch/darknet/resnet18_b/run_super.sh b/workloads/realworld/uvm_prefetch/darknet/resnet18_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..012635a1ce64ecda462e50097be554185989ae7a --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/resnet18_b/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier test ../cfg/imagenet1k.data ../cfg/resnet18_b.cfg diff --git a/workloads/realworld/uvm_prefetch/darknet/resnet18_t/run_super.sh b/workloads/realworld/uvm_prefetch/darknet/resnet18_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..0eb59b3bd65cf0186c5ed5f36eff5ec34d54298c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/resnet18_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier train ../cfg/imagenet1k.data ../cfg/resnet18_t.cfg \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/darknet/resnet50/run_resnet50.sh b/workloads/realworld/uvm_prefetch/darknet/resnet50/run_resnet50.sh new file mode 100755 index 0000000000000000000000000000000000000000..ce88111af06600203c1ae94421134eb3404f51a4 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/resnet50/run_resnet50.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet50.cfg ../../../../../data/darknet/resnet50.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm_prefetch/darknet/resnet50/run_super.sh b/workloads/realworld/uvm_prefetch/darknet/resnet50/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ce88111af06600203c1ae94421134eb3404f51a4 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/resnet50/run_super.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet50.cfg ../../../../../data/darknet/resnet50.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm_prefetch/darknet/resnet50_b/run_super.sh b/workloads/realworld/uvm_prefetch/darknet/resnet50_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..e6f1b1d59b612bef36d04af547bf61808261eb12 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/resnet50_b/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier test ../cfg/imagenet1k.data ../cfg/resnet50_b.cfg diff --git a/workloads/realworld/uvm_prefetch/darknet/resnet50_t/run_super.sh b/workloads/realworld/uvm_prefetch/darknet/resnet50_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..4d4c7feebd1bd5bdcded72e3d4cf58045949ac90 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/resnet50_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier train ../cfg/imagenet1k.data ../cfg/resnet50_t.cfg diff --git a/workloads/realworld/uvm_prefetch/darknet/scripts/dice_label.sh b/workloads/realworld/uvm_prefetch/darknet/scripts/dice_label.sh new file mode 100644 index 0000000000000000000000000000000000000000..f19f8a49481b46d5a04dd18b1b05af8928b21957 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/scripts/dice_label.sh @@ -0,0 +1,20 @@ +mkdir -p images +mkdir -p images/orig +mkdir -p images/train +mkdir -p images/val + +ffmpeg -i Face1.mp4 images/orig/face1_%6d.jpg +ffmpeg -i Face2.mp4 images/orig/face2_%6d.jpg +ffmpeg -i Face3.mp4 images/orig/face3_%6d.jpg +ffmpeg -i Face4.mp4 images/orig/face4_%6d.jpg +ffmpeg -i Face5.mp4 images/orig/face5_%6d.jpg +ffmpeg -i Face6.mp4 images/orig/face6_%6d.jpg + +mogrify -resize 100x100^ -gravity center -crop 100x100+0+0 +repage images/orig/* + +ls images/orig/* | shuf | head -n 1000 | xargs mv -t images/val +mv images/orig/* images/train + +find `pwd`/images/train > dice.train.list -name \*.jpg +find `pwd`/images/val > dice.val.list -name \*.jpg + diff --git a/workloads/realworld/uvm_prefetch/darknet/scripts/gen_tactic.sh b/workloads/realworld/uvm_prefetch/darknet/scripts/gen_tactic.sh new file mode 100755 index 0000000000000000000000000000000000000000..ffa30d27754dacdd03bd5996d41cbfab14db0f39 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/scripts/gen_tactic.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Usage: +# wget http://pjreddie.com/media/files/peek.weights +# scripts/gen_tactic.sh < data/goal.txt +./darknet rnn generatetactic cfg/gru.cfg peek.weights 2>/dev/null diff --git a/workloads/realworld/uvm_prefetch/darknet/scripts/get_coco_dataset.sh b/workloads/realworld/uvm_prefetch/darknet/scripts/get_coco_dataset.sh new file mode 100644 index 0000000000000000000000000000000000000000..28463015d1748fd331e071a0a778c6d4500b29ef --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/scripts/get_coco_dataset.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Clone COCO API +git clone https://github.com/pdollar/coco +cd coco + +mkdir images +cd images + +# Download Images +wget -c https://pjreddie.com/media/files/train2014.zip +wget -c https://pjreddie.com/media/files/val2014.zip + +# Unzip +unzip -q train2014.zip +unzip -q val2014.zip + +cd .. + +# Download COCO Metadata +wget -c https://pjreddie.com/media/files/instances_train-val2014.zip +wget -c https://pjreddie.com/media/files/coco/5k.part +wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part +wget -c https://pjreddie.com/media/files/coco/labels.tgz +tar xzf labels.tgz +unzip -q instances_train-val2014.zip + +# Set Up Image Lists +paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt +paste <(awk "{print \"$PWD\"}" trainvalno5k.txt + diff --git a/workloads/realworld/uvm_prefetch/darknet/scripts/imagenet_label.sh b/workloads/realworld/uvm_prefetch/darknet/scripts/imagenet_label.sh new file mode 100644 index 0000000000000000000000000000000000000000..01e4306ee3cf7322427374f01c766bcdef970922 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/scripts/imagenet_label.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +mkdir -p labelled +wd=`pwd` + +for f in val/*.xml; +do +label=`grep -m1 "" $f | grep -oP '\K[^<]*'` +im=`echo $f | sed 's/val/imgs/; s/xml/JPEG/'` +out=`echo $im | sed 's/JPEG/'${label}'.JPEG/; s/imgs/labelled/'` +ln -s ${wd}/$im ${wd}/$out +done + +find ${wd}/labelled -name \*.JPEG > inet.val.list + diff --git a/workloads/realworld/uvm_prefetch/darknet/scripts/voc_label.py b/workloads/realworld/uvm_prefetch/darknet/scripts/voc_label.py new file mode 100644 index 0000000000000000000000000000000000000000..679fc366890d9eccf15124f950a274d8ad24fc83 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/scripts/voc_label.py @@ -0,0 +1,59 @@ +import xml.etree.ElementTree as ET +import pickle +import os +from os import listdir, getcwd +from os.path import join + +sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')] + +classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + + +def convert(size, box): + dw = 1./(size[0]) + dh = 1./(size[1]) + x = (box[0] + box[1])/2.0 - 1 + y = (box[2] + box[3])/2.0 - 1 + w = box[1] - box[0] + h = box[3] - box[2] + x = x*dw + w = w*dw + y = y*dh + h = h*dh + return (x,y,w,h) + +def convert_annotation(year, image_id): + in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id)) + out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w') + tree=ET.parse(in_file) + root = tree.getroot() + size = root.find('size') + w = int(size.find('width').text) + h = int(size.find('height').text) + + for obj in root.iter('object'): + difficult = obj.find('difficult').text + cls = obj.find('name').text + if cls not in classes or int(difficult)==1: + continue + cls_id = classes.index(cls) + xmlbox = obj.find('bndbox') + b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) + bb = convert((w,h), b) + out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') + +wd = getcwd() + +for year, image_set in sets: + if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)): + os.makedirs('VOCdevkit/VOC%s/labels/'%(year)) + image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split() + list_file = open('%s_%s.txt'%(year, image_set), 'w') + for image_id in image_ids: + list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id)) + convert_annotation(year, image_id) + list_file.close() + +os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt") +os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt") + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/activation_kernels.cu b/workloads/realworld/uvm_prefetch/darknet/src/activation_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..ee4d511541d8a5f5fd6f6ba9b538e26bede124e3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/activation_kernels.cu @@ -0,0 +1,242 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "activations.h" +#include "cuda_dark.h" +} + + +__device__ float lhtan_activate_kernel(float x) +{ + if(x < 0) return .001f*x; + if(x > 1) return .001f*(x-1.f) + 1.f; + return x; +} +__device__ float lhtan_gradient_kernel(float x) +{ + if(x > 0 && x < 1) return 1; + return .001; +} + +__device__ float hardtan_activate_kernel(float x) +{ + if (x < -1) return -1; + if (x > 1) return 1; + return x; +} +__device__ float linear_activate_kernel(float x){return x;} +__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));} +__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;} +__device__ float relu_activate_kernel(float x){return x*(x>0);} +__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);} +__device__ float selu_activate_kernel(float x){return (x >= 0)*1.0507f*x + (x < 0)*1.0507f*1.6732f*(expf(x)-1);} +__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;} +__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;} +__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;} +__device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);} +__device__ float plse_activate_kernel(float x) +{ + if(x < -4) return .01f * (x + 4); + if(x > 4) return .01f * (x - 4) + 1; + return .125f*x + .5f; +} +__device__ float stair_activate_kernel(float x) +{ + int n = floorf(x); + if (n%2 == 0) return floorf(x/2); + else return (x - n) + floorf(x/2); +} + + +__device__ float hardtan_gradient_kernel(float x) +{ + if (x > -1 && x < 1) return 1; + return 0; +} +__device__ float linear_gradient_kernel(float x){return 1;} +__device__ float logistic_gradient_kernel(float x){return (1-x)*x;} +__device__ float loggy_gradient_kernel(float x) +{ + float y = (x+1)/2; + return 2*(1-y)*y; +} +__device__ float relu_gradient_kernel(float x){return (x>0);} +__device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);} +__device__ float selu_gradient_kernel(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);} +__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;} +__device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;} +__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;} +__device__ float tanh_gradient_kernel(float x){return 1-x*x;} +__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;} +__device__ float stair_gradient_kernel(float x) +{ + if (floorf(x) == x) return 0; + return 1; +} + +__device__ float activate_kernel(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_activate_kernel(x); + case LOGISTIC: + return logistic_activate_kernel(x); + case LOGGY: + return loggy_activate_kernel(x); + case RELU: + return relu_activate_kernel(x); + case ELU: + return elu_activate_kernel(x); + case SELU: + return selu_activate_kernel(x); + case RELIE: + return relie_activate_kernel(x); + case RAMP: + return ramp_activate_kernel(x); + case LEAKY: + return leaky_activate_kernel(x); + case TANH: + return tanh_activate_kernel(x); + case PLSE: + return plse_activate_kernel(x); + case STAIR: + return stair_activate_kernel(x); + case HARDTAN: + return hardtan_activate_kernel(x); + case LHTAN: + return lhtan_activate_kernel(x); + } + return 0; +} + +__device__ float gradient_kernel(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_gradient_kernel(x); + case LOGISTIC: + return logistic_gradient_kernel(x); + case LOGGY: + return loggy_gradient_kernel(x); + case RELU: + return relu_gradient_kernel(x); + case ELU: + return elu_gradient_kernel(x); + case SELU: + return selu_gradient_kernel(x); + case RELIE: + return relie_gradient_kernel(x); + case RAMP: + return ramp_gradient_kernel(x); + case LEAKY: + return leaky_gradient_kernel(x); + case TANH: + return tanh_gradient_kernel(x); + case PLSE: + return plse_gradient_kernel(x); + case STAIR: + return stair_gradient_kernel(x); + case HARDTAN: + return hardtan_gradient_kernel(x); + case LHTAN: + return lhtan_gradient_kernel(x); + } + return 0; +} + +__global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int i = id % s; + int b = id / s; + float x1 = x[b*s + i]; + float x2 = x[b*s + s/2 + i]; + if(id < n) { + float de = dy[id]; + dx[b*s + i] = x2*de; + dx[b*s + s/2 + i] = x1*de; + } +} + +extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y) +{ + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(x, n / 2 * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(dx, n / 2 * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(y, n / 2 * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + binary_gradient_array_kernel<<>>(x, dx, n/2, size, a, y); + check_error(cudaPeekAtLastError()); +} +__global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int i = id % s; + int b = id / s; + float x1 = x[b*s + i]; + float x2 = x[b*s + s/2 + i]; + if(id < n) y[id] = x1*x2; +} + +extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y) +{ + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(x, n / 2 * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(y, n / 2 * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + binary_activate_array_kernel<<>>(x, n / 2, size, a, y); + check_error(cudaPeekAtLastError()); +} + +__global__ void activate_array_kernel(float *x, int n, ACTIVATION a) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n) x[i] = activate_kernel(x[i], a); +} + +__global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n) delta[i] *= gradient_kernel(x[i], a); +} + +extern "C" void activate_array_gpu(float *x, int n, ACTIVATION a) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(x, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + activate_array_kernel<<>>(x, n, a); + check_error(cudaPeekAtLastError()); +} + +extern "C" void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(x, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + gradient_array_kernel<<>>(x, n, a, delta); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/uvm_prefetch/darknet/src/activation_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/activation_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..0791772336e4d1b001ed1b76bbbf21ee8d6fa24f --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/activation_layer.c @@ -0,0 +1,63 @@ +#include "activation_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +layer make_activation_layer(int batch, int inputs, ACTIVATION activation) +{ + layer l = {0}; + l.type = ACTIVE; + + l.inputs = inputs; + l.outputs = inputs; + l.batch=batch; + + l.output = calloc(batch*inputs, sizeof(float*)); + l.delta = calloc(batch*inputs, sizeof(float*)); + + l.forward = forward_activation_layer; + l.backward = backward_activation_layer; +#ifdef GPU + l.forward_gpu = forward_activation_layer_gpu; + l.backward_gpu = backward_activation_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); +#endif + l.activation = activation; + fprintf(stderr, "Activation Layer: %d inputs\n", inputs); + return l; +} + +void forward_activation_layer(layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_activation_layer(layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_activation_layer_gpu(layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_activation_layer_gpu(layer l, network net) +{ + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/activation_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/activation_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..42118a84e83f59a8997e354959404d1283a3004c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/activation_layer.h @@ -0,0 +1,19 @@ +#ifndef ACTIVATION_LAYER_H +#define ACTIVATION_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_activation_layer(int batch, int inputs, ACTIVATION activation); + +void forward_activation_layer(layer l, network net); +void backward_activation_layer(layer l, network net); + +#ifdef GPU +void forward_activation_layer_gpu(layer l, network net); +void backward_activation_layer_gpu(layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/activations.c b/workloads/realworld/uvm_prefetch/darknet/src/activations.c new file mode 100644 index 0000000000000000000000000000000000000000..da1a17a89b46b6c41fa80b5dd113e1b30c910712 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/activations.c @@ -0,0 +1,150 @@ +#include "activations.h" + +#include +#include +#include +#include + +char *get_activation_string(ACTIVATION a) +{ + switch(a){ + case LOGISTIC: + return "logistic"; + case LOGGY: + return "loggy"; + case RELU: + return "relu"; + case ELU: + return "elu"; + case SELU: + return "selu"; + case RELIE: + return "relie"; + case RAMP: + return "ramp"; + case LINEAR: + return "linear"; + case TANH: + return "tanh"; + case PLSE: + return "plse"; + case LEAKY: + return "leaky"; + case STAIR: + return "stair"; + case HARDTAN: + return "hardtan"; + case LHTAN: + return "lhtan"; + default: + break; + } + return "relu"; +} + +ACTIVATION get_activation(char *s) +{ + if (strcmp(s, "logistic")==0) return LOGISTIC; + if (strcmp(s, "loggy")==0) return LOGGY; + if (strcmp(s, "relu")==0) return RELU; + if (strcmp(s, "elu")==0) return ELU; + if (strcmp(s, "selu")==0) return SELU; + if (strcmp(s, "relie")==0) return RELIE; + if (strcmp(s, "plse")==0) return PLSE; + if (strcmp(s, "hardtan")==0) return HARDTAN; + if (strcmp(s, "lhtan")==0) return LHTAN; + if (strcmp(s, "linear")==0) return LINEAR; + if (strcmp(s, "ramp")==0) return RAMP; + if (strcmp(s, "leaky")==0) return LEAKY; + if (strcmp(s, "tanh")==0) return TANH; + if (strcmp(s, "stair")==0) return STAIR; + fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s); + return RELU; +} + +float activate(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_activate(x); + case LOGISTIC: + return logistic_activate(x); + case LOGGY: + return loggy_activate(x); + case RELU: + return relu_activate(x); + case ELU: + return elu_activate(x); + case SELU: + return selu_activate(x); + case RELIE: + return relie_activate(x); + case RAMP: + return ramp_activate(x); + case LEAKY: + return leaky_activate(x); + case TANH: + return tanh_activate(x); + case PLSE: + return plse_activate(x); + case STAIR: + return stair_activate(x); + case HARDTAN: + return hardtan_activate(x); + case LHTAN: + return lhtan_activate(x); + } + return 0; +} + +void activate_array(float *x, const int n, const ACTIVATION a) +{ + int i; + for(i = 0; i < n; ++i){ + x[i] = activate(x[i], a); + } +} + +float gradient(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_gradient(x); + case LOGISTIC: + return logistic_gradient(x); + case LOGGY: + return loggy_gradient(x); + case RELU: + return relu_gradient(x); + case ELU: + return elu_gradient(x); + case SELU: + return selu_gradient(x); + case RELIE: + return relie_gradient(x); + case RAMP: + return ramp_gradient(x); + case LEAKY: + return leaky_gradient(x); + case TANH: + return tanh_gradient(x); + case PLSE: + return plse_gradient(x); + case STAIR: + return stair_gradient(x); + case HARDTAN: + return hardtan_gradient(x); + case LHTAN: + return lhtan_gradient(x); + } + return 0; +} + +void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta) +{ + int i; + for(i = 0; i < n; ++i){ + delta[i] *= gradient(x[i], a); + } +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/activations.h b/workloads/realworld/uvm_prefetch/darknet/src/activations.h new file mode 100644 index 0000000000000000000000000000000000000000..eec28d5b692ede3975e01a4d454ace20e8a9fdd8 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/activations.h @@ -0,0 +1,87 @@ +#ifndef ACTIVATIONS_H +#define ACTIVATIONS_H +#include "darknet.h" +#include "cuda_dark.h" +#include "math.h" + +ACTIVATION get_activation(char *s); + +char *get_activation_string(ACTIVATION a); +float activate(float x, ACTIVATION a); +float gradient(float x, ACTIVATION a); +void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta); +void activate_array(float *x, const int n, const ACTIVATION a); +#ifdef GPU +void activate_array_gpu(float *x, int n, ACTIVATION a); +void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta); +#endif + +static inline float stair_activate(float x) +{ + int n = floor(x); + if (n%2 == 0) return floor(x/2.); + else return (x - n) + floor(x/2.); +} +static inline float hardtan_activate(float x) +{ + if (x < -1) return -1; + if (x > 1) return 1; + return x; +} +static inline float linear_activate(float x){return x;} +static inline float logistic_activate(float x){return 1./(1. + exp(-x));} +static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;} +static inline float relu_activate(float x){return x*(x>0);} +static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} +static inline float selu_activate(float x){return (x >= 0)*1.0507*x + (x < 0)*1.0507*1.6732*(exp(x)-1);} +static inline float relie_activate(float x){return (x>0) ? x : .01*x;} +static inline float ramp_activate(float x){return x*(x>0)+.1*x;} +static inline float leaky_activate(float x){return (x>0) ? x : .1*x;} +static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);} +static inline float plse_activate(float x) +{ + if(x < -4) return .01 * (x + 4); + if(x > 4) return .01 * (x - 4) + 1; + return .125*x + .5; +} + +static inline float lhtan_activate(float x) +{ + if(x < 0) return .001*x; + if(x > 1) return .001*(x-1) + 1; + return x; +} +static inline float lhtan_gradient(float x) +{ + if(x > 0 && x < 1) return 1; + return .001; +} + +static inline float hardtan_gradient(float x) +{ + if (x > -1 && x < 1) return 1; + return 0; +} +static inline float linear_gradient(float x){return 1;} +static inline float logistic_gradient(float x){return (1-x)*x;} +static inline float loggy_gradient(float x) +{ + float y = (x+1.)/2.; + return 2*(1-y)*y; +} +static inline float stair_gradient(float x) +{ + if (floor(x) == x) return 0; + return 1; +} +static inline float relu_gradient(float x){return (x>0);} +static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);} +static inline float selu_gradient(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);} +static inline float relie_gradient(float x){return (x>0) ? 1 : .01;} +static inline float ramp_gradient(float x){return (x>0)+.1;} +static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;} +static inline float tanh_gradient(float x){return 1-x*x;} +static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;} + +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/avgpool_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/avgpool_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..7d17fa8f829aba43652117c141fb8b54ef4cf5dc --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/avgpool_layer.c @@ -0,0 +1,71 @@ +#include "avgpool_layer.h" +#include "cuda_dark.h" +#include + +avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) +{ + fprintf(stderr, "avg %4d x%4d x%4d -> %4d\n", w, h, c, c); + avgpool_layer l = {0}; + l.type = AVGPOOL; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.out_w = 1; + l.out_h = 1; + l.out_c = c; + l.outputs = l.out_c; + l.inputs = h*w*c; + int output_size = l.outputs * batch; + l.output = calloc(output_size, sizeof(float)); + l.delta = calloc(output_size, sizeof(float)); + l.forward = forward_avgpool_layer; + l.backward = backward_avgpool_layer; + #ifdef GPU + l.forward_gpu = forward_avgpool_layer_gpu; + l.backward_gpu = backward_avgpool_layer_gpu; + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); + #endif + return l; +} + +void resize_avgpool_layer(avgpool_layer *l, int w, int h) +{ + l->w = w; + l->h = h; + l->inputs = h*w*l->c; +} + +void forward_avgpool_layer(const avgpool_layer l, network net) +{ + int b,i,k; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < l.c; ++k){ + int out_index = k + b*l.c; + l.output[out_index] = 0; + for(i = 0; i < l.h*l.w; ++i){ + int in_index = i + l.h*l.w*(k + b*l.c); + l.output[out_index] += net.input[in_index]; + } + l.output[out_index] /= l.h*l.w; + } + } +} + +void backward_avgpool_layer(const avgpool_layer l, network net) +{ + int b,i,k; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < l.c; ++k){ + int out_index = k + b*l.c; + for(i = 0; i < l.h*l.w; ++i){ + int in_index = i + l.h*l.w*(k + b*l.c); + net.delta[in_index] += l.delta[out_index] / (l.h*l.w); + } + } + } +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/avgpool_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/avgpool_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..576ad1db9e9cb87640b0c3f764e2bbfbaae4b2b3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/avgpool_layer.h @@ -0,0 +1,23 @@ +#ifndef AVGPOOL_LAYER_H +#define AVGPOOL_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +typedef layer avgpool_layer; + +image get_avgpool_image(avgpool_layer l); +avgpool_layer make_avgpool_layer(int batch, int w, int h, int c); +void resize_avgpool_layer(avgpool_layer *l, int w, int h); +void forward_avgpool_layer(const avgpool_layer l, network net); +void backward_avgpool_layer(const avgpool_layer l, network net); + +#ifdef GPU +void forward_avgpool_layer_gpu(avgpool_layer l, network net); +void backward_avgpool_layer_gpu(avgpool_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/avgpool_layer_kernels.cu b/workloads/realworld/uvm_prefetch/darknet/src/avgpool_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..bd5afbf394bb702332188fa279af7c2e67583b93 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/avgpool_layer_kernels.cu @@ -0,0 +1,81 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "avgpool_layer.h" +#include "cuda_dark.h" +} + +__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int k = id % c; + id /= c; + int b = id; + + int i; + int out_index = (k + c*b); + output[out_index] = 0; + for(i = 0; i < w*h; ++i){ + int in_index = i + h*w*(k + b*c); + output[out_index] += input[in_index]; + } + output[out_index] /= w*h; +} + +__global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int k = id % c; + id /= c; + int b = id; + + int i; + int out_index = (k + c*b); + for(i = 0; i < w*h; ++i){ + int in_index = i + h*w*(k + b*c); + in_delta[in_index] += out_delta[out_index] / (w*h); + } +} + +extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network net) +{ + size_t n = layer.c*layer.batch; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(net.input_gpu, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(layer.output_gpu, n * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + forward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, net.input_gpu, layer.output_gpu); + check_error(cudaPeekAtLastError()); +} + +extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network net) +{ + size_t n = layer.c*layer.batch; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(net.delta_gpu, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(layer.delta_gpu, n * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + backward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, net.delta_gpu, layer.delta_gpu); + check_error(cudaPeekAtLastError()); +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/batchnorm_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/batchnorm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..ebff387cc4b0365173fb6727efd80ebc80bfbd41 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/batchnorm_layer.c @@ -0,0 +1,279 @@ +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "blas.h" +#include + +layer make_batchnorm_layer(int batch, int w, int h, int c) +{ + fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c); + layer l = {0}; + l.type = BATCHNORM; + l.batch = batch; + l.h = l.out_h = h; + l.w = l.out_w = w; + l.c = l.out_c = c; + l.output = calloc(h * w * c * batch, sizeof(float)); + l.delta = calloc(h * w * c * batch, sizeof(float)); + l.inputs = w*h*c; + l.outputs = l.inputs; + + l.scales = calloc(c, sizeof(float)); + l.scale_updates = calloc(c, sizeof(float)); + l.biases = calloc(c, sizeof(float)); + l.bias_updates = calloc(c, sizeof(float)); + int i; + for(i = 0; i < c; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(c, sizeof(float)); + l.variance = calloc(c, sizeof(float)); + + l.rolling_mean = calloc(c, sizeof(float)); + l.rolling_variance = calloc(c, sizeof(float)); + + l.forward = forward_batchnorm_layer; + l.backward = backward_batchnorm_layer; +#ifdef GPU + l.forward_gpu = forward_batchnorm_layer_gpu; + l.backward_gpu = backward_batchnorm_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, h * w * c * batch); + l.delta_gpu = cuda_make_array(l.delta, h * w * c * batch); + + l.biases_gpu = cuda_make_array(l.biases, c); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, c); + + l.scales_gpu = cuda_make_array(l.scales, c); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, c); + + l.mean_gpu = cuda_make_array(l.mean, c); + l.variance_gpu = cuda_make_array(l.variance, c); + + l.rolling_mean_gpu = cuda_make_array(l.mean, c); + l.rolling_variance_gpu = cuda_make_array(l.variance, c); + + l.mean_delta_gpu = cuda_make_array(l.mean, c); + l.variance_delta_gpu = cuda_make_array(l.variance, c); + + l.x_gpu = cuda_make_array(l.output, l.batch*l.outputs); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*l.outputs); + #ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); + + #endif +#endif + return l; +} + +void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + int i,b,f; + for(f = 0; f < n; ++f){ + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int index = i + size*(f + n*b); + sum += delta[index] * x_norm[index]; + } + } + scale_updates[f] += sum; + } +} + +void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + + int i,j,k; + for(i = 0; i < filters; ++i){ + mean_delta[i] = 0; + for (j = 0; j < batch; ++j) { + for (k = 0; k < spatial; ++k) { + int index = j*filters*spatial + i*spatial + k; + mean_delta[i] += delta[index]; + } + } + mean_delta[i] *= (-1./sqrt(variance[i] + .00001f)); + } +} +void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + + int i,j,k; + for(i = 0; i < filters; ++i){ + variance_delta[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance_delta[i] += delta[index]*(x[index] - mean[i]); + } + } + variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float)(-3./2.)); + } +} +void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + int f, j, k; + for(j = 0; j < batch; ++j){ + for(f = 0; f < filters; ++f){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + f*spatial + k; + delta[index] = delta[index] * 1./(sqrt(variance[f] + .00001f)) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); + } + } + } +} + +void resize_batchnorm_layer(layer *layer, int w, int h) +{ + fprintf(stderr, "Not implemented\n"); +} + +void forward_batchnorm_layer(layer l, network net) +{ + if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1); + if(net.train){ + mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean); + variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance); + + scal_cpu(l.out_c, .99, l.rolling_mean, 1); + axpy_cpu(l.out_c, .01, l.mean, 1, l.rolling_mean, 1); + scal_cpu(l.out_c, .99, l.rolling_variance, 1); + axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1); + + normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w); + copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1); + } else { + normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w); + } + scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w); + add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h*l.out_w); +} + +void backward_batchnorm_layer(layer l, network net) +{ + if(!net.train){ + l.mean = l.rolling_mean; + l.variance = l.rolling_variance; + } + backward_bias(l.bias_updates, l.delta, l.batch, l.out_c, l.out_w*l.out_h); + backward_scale_cpu(l.x_norm, l.delta, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates); + + scale_bias(l.delta, l.scales, l.batch, l.out_c, l.out_h*l.out_w); + + mean_delta_cpu(l.delta, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta); + variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta); + normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.out_c, l.out_w*l.out_h, l.delta); + if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_batchnorm_layer(layer l) +{ + cuda_pull_array(l.scales_gpu, l.scales, l.c); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.c); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.c); +} +void push_batchnorm_layer(layer l) +{ + cuda_push_array(l.scales_gpu, l.scales, l.c); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.c); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.c); +} + +void forward_batchnorm_layer_gpu(layer l, network net) +{ + if(l.type == BATCHNORM) copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); + if (net.train) { +#ifdef CUDNN + float one = 1; + float zero = 0; + cudnnBatchNormalizationForwardTraining(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + l.dstTensorDesc, + l.x_gpu, + l.dstTensorDesc, + l.output_gpu, + l.normTensorDesc, + l.scales_gpu, + l.biases_gpu, + .01, + l.rolling_mean_gpu, + l.rolling_variance_gpu, + .00001, + l.mean_gpu, + l.variance_gpu); +#else + fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu); + fast_variance_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.variance_gpu); + + scal_gpu(l.out_c, .99, l.rolling_mean_gpu, 1); + axpy_gpu(l.out_c, .01, l.mean_gpu, 1, l.rolling_mean_gpu, 1); + scal_gpu(l.out_c, .99, l.rolling_variance_gpu, 1); + axpy_gpu(l.out_c, .01, l.variance_gpu, 1, l.rolling_variance_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); + normalize_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_norm_gpu, 1); + + scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); +#endif + } else { + normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); + scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); + } + +} + +void backward_batchnorm_layer_gpu(layer l, network net) +{ + if(!net.train){ + l.mean_gpu = l.rolling_mean_gpu; + l.variance_gpu = l.rolling_variance_gpu; + } +#ifdef CUDNN + float one = 1; + float zero = 0; + cudnnBatchNormalizationBackward(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + &one, + &one, + l.dstTensorDesc, + l.x_gpu, + l.dstTensorDesc, + l.delta_gpu, + l.dstTensorDesc, + l.x_norm_gpu, + l.normTensorDesc, + l.scales_gpu, + l.scale_updates_gpu, + l.bias_updates_gpu, + .00001, + l.mean_gpu, + l.variance_gpu); + copy_gpu(l.outputs*l.batch, l.x_norm_gpu, 1, l.delta_gpu, 1); +#else + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h); + backward_scale_gpu(l.x_norm_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates_gpu); + + scale_bias_gpu(l.delta_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + + fast_mean_delta_gpu(l.delta_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta_gpu); + fast_variance_delta_gpu(l.x_gpu, l.delta_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta_gpu); + normalize_delta_gpu(l.x_gpu, l.mean_gpu, l.variance_gpu, l.mean_delta_gpu, l.variance_delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu); +#endif + if(l.type == BATCHNORM) copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/batchnorm_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/batchnorm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..25a18a3c8f2569bab135b088501248159e1cae11 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/batchnorm_layer.h @@ -0,0 +1,19 @@ +#ifndef BATCHNORM_LAYER_H +#define BATCHNORM_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +layer make_batchnorm_layer(int batch, int w, int h, int c); +void forward_batchnorm_layer(layer l, network net); +void backward_batchnorm_layer(layer l, network net); + +#ifdef GPU +void forward_batchnorm_layer_gpu(layer l, network net); +void backward_batchnorm_layer_gpu(layer l, network net); +void pull_batchnorm_layer(layer l); +void push_batchnorm_layer(layer l); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/blas.c b/workloads/realworld/uvm_prefetch/darknet/src/blas.c new file mode 100644 index 0000000000000000000000000000000000000000..9e1604449ba9aeb9decdc7f0395a38bd3b478671 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/blas.c @@ -0,0 +1,351 @@ +#include "blas.h" + +#include +#include +#include +#include +#include +#include +void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int b,i,j,k; + int out_c = c/(stride*stride); + + for(b = 0; b < batch; ++b){ + for(k = 0; k < c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int in_index = i + w*(j + h*(k + c*b)); + int c2 = k % out_c; + int offset = k / out_c; + int w2 = i*stride + offset % stride; + int h2 = j*stride + offset / stride; + int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); + if(forward) out[out_index] = x[in_index]; + else out[in_index] = x[out_index]; + } + } + } + } +} + +void flatten(float *x, int size, int layers, int batch, int forward) +{ + float *swap = calloc(size*layers*batch, sizeof(float)); + int i,c,b; + for(b = 0; b < batch; ++b){ + for(c = 0; c < layers; ++c){ + for(i = 0; i < size; ++i){ + int i1 = b*layers*size + c*size + i; + int i2 = b*layers*size + i*layers + c; + if (forward) swap[i2] = x[i1]; + else swap[i1] = x[i2]; + } + } + } + memcpy(x, swap, size*layers*batch*sizeof(float)); + free(swap); +} + +void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c) +{ + int i; + for(i = 0; i < n; ++i){ + c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); + } +} + +void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc) +{ + int i; + for(i = 0; i < n; ++i){ + if(da) da[i] += dc[i] * s[i]; + if(db) db[i] += dc[i] * (1-s[i]); + ds[i] += dc[i] * (a[i] - b[i]); + } +} + +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int stride = w1/w2; + int sample = w2/w1; + assert(stride == h1/h2); + assert(sample == h2/h1); + if(stride < 1) stride = 1; + if(sample < 1) sample = 1; + int minw = (w1 < w2) ? w1 : w2; + int minh = (h1 < h2) ? h1 : h2; + int minc = (c1 < c2) ? c1 : c2; + + int i,j,k,b; + for(b = 0; b < batch; ++b){ + for(k = 0; k < minc; ++k){ + for(j = 0; j < minh; ++j){ + for(i = 0; i < minw; ++i){ + int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); + int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); + out[out_index] = s1*out[out_index] + s2*add[add_index]; + } + } + } + } +} + +void mean_cpu(float *x, int batch, int filters, int spatial, float *mean) +{ + float scale = 1./(batch * spatial); + int i,j,k; + for(i = 0; i < filters; ++i){ + mean[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + mean[i] += x[index]; + } + } + mean[i] *= scale; + } +} + +void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + float scale = 1./(batch * spatial - 1); + int i,j,k; + for(i = 0; i < filters; ++i){ + variance[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance[i] += pow((x[index] - mean[i]), 2); + } + } + variance[i] *= scale; + } +} + +void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial) +{ + int b,f,i; + for(b = 0; b < batch; ++b){ + for(i = 0; i < spatial; ++i){ + float sum = 0; + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + sum += powf(x[index], 2); + } + sum = sqrtf(sum); + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + x[index] /= sum; + dx[index] = (1 - x[index]) / sum; + } + } + } +} + + +void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + int b, f, i; + for(b = 0; b < batch; ++b){ + for(f = 0; f < filters; ++f){ + for(i = 0; i < spatial; ++i){ + int index = b*filters*spatial + f*spatial + i; + x[index] = (x[index] - mean[f])/(sqrt(variance[f]) + .000001f); + } + } + } +} + +void const_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; +} + +void mul_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] *= X[i*INCX]; +} + +void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] = pow(X[i*INCX], ALPHA); +} + +void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX]; +} + +void scal_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA; +} + +void fill_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; +} + +void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i, j; + int index = 0; + for(j = 0; j < B; ++j) { + for(i = 0; i < NX; ++i){ + if(X) X[j*NX + i] += OUT[index]; + ++index; + } + for(i = 0; i < NY; ++i){ + if(Y) Y[j*NY + i] += OUT[index]; + ++index; + } + } +} + +void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i, j; + int index = 0; + for(j = 0; j < B; ++j) { + for(i = 0; i < NX; ++i){ + OUT[index++] = X[j*NX + i]; + } + for(i = 0; i < NY; ++i){ + OUT[index++] = Y[j*NY + i]; + } + } +} + +void copy_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX]; +} + +void mult_add_into_cpu(int N, float *X, float *Y, float *Z) +{ + int i; + for(i = 0; i < N; ++i) Z[i] += X[i]*Y[i]; +} + +void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + float abs_val = fabs(diff); + if(abs_val < 1) { + error[i] = diff * diff; + delta[i] = diff; + } + else { + error[i] = 2*abs_val - 1; + delta[i] = (diff < 0) ? 1 : -1; + } + } +} + +void l1_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + error[i] = fabs(diff); + delta[i] = diff > 0 ? 1 : -1; + } +} + +void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float t = truth[i]; + float p = pred[i]; + error[i] = (t) ? -log(p) : 0; + delta[i] = t-p; + } +} + +void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float t = truth[i]; + float p = pred[i]; + error[i] = -t*log(p) - (1-t)*log(1-p); + delta[i] = t-p; + } +} + +void l2_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + error[i] = diff * diff; + delta[i] = diff; + } +} + +float dot_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + float dot = 0; + for(i = 0; i < N; ++i) dot += X[i*INCX] * Y[i*INCY]; + return dot; +} + +void softmax(float *input, int n, float temp, int stride, float *output) +{ + int i; + float sum = 0; + float largest = -FLT_MAX; + for(i = 0; i < n; ++i){ + if(input[i*stride] > largest) largest = input[i*stride]; + } + for(i = 0; i < n; ++i){ + float e = exp(input[i*stride]/temp - largest/temp); + sum += e; + output[i*stride] = e; + } + for(i = 0; i < n; ++i){ + output[i*stride] /= sum; + } +} + + +void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + int g, b; + for(b = 0; b < batch; ++b){ + for(g = 0; g < groups; ++g){ + softmax(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); + } + } +} + +void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + int i, j, k, b; + for(b = 0; b < batch; ++b){ + for(k = 0; k < c; ++k){ + for(j = 0; j < h*stride; ++j){ + for(i = 0; i < w*stride; ++i){ + int in_index = b*w*h*c + k*w*h + (j/stride)*w + i/stride; + int out_index = b*w*h*c*stride*stride + k*w*h*stride*stride + j*w*stride + i; + if(forward) out[out_index] = scale*in[in_index]; + else in[in_index] += scale*out[out_index]; + } + } + } + } +} + + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/blas.h b/workloads/realworld/uvm_prefetch/darknet/src/blas.h new file mode 100644 index 0000000000000000000000000000000000000000..5d24a9aea70d8050b05098aa7a5634576444a32c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/blas.h @@ -0,0 +1,105 @@ +#ifndef BLAS_H +#define BLAS_H +#include "darknet.h" + +void flatten(float *x, int size, int layers, int batch, int forward); +void pm(int M, int N, float *A); +float *random_matrix(int rows, int cols); +void time_random_matrix(int TA, int TB, int m, int k, int n); +void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); + +void test_blas(); + +void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void mult_add_into_cpu(int N, float *X, float *Y, float *Z); + +void const_cpu(int N, float ALPHA, float *X, int INCX); +void constrain_gpu(int N, float ALPHA, float * X, int INCX); +void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void mul_cpu(int N, float *X, int INCX, float *Y, int INCY); + +int test_gpu_blas(); +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out); + +void mean_cpu(float *x, int batch, int filters, int spatial, float *mean); +void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); + +void scale_bias(float *output, float *scales, int batch, int n, int size); +void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); +void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); +void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); +void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); +void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial); + +void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error); +void l2_cpu(int n, float *pred, float *truth, float *delta, float *error); +void l1_cpu(int n, float *pred, float *truth, float *delta, float *error); +void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); +void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); +void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c); +void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc); + +void softmax(float *input, int n, float temp, int stride, float *output); +void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); +void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); + +#ifdef GPU +#include "cuda_dark.h" +#include "tree.h" + +void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); +void axpy_gpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); +void copy_gpu(int N, float * X, int INCX, float * Y, int INCY); +void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); +void add_gpu(int N, float ALPHA, float * X, int INCX); +void supp_gpu(int N, float ALPHA, float * X, int INCX); +void mask_gpu(int N, float * X, float mask_num, float * mask, float val); +void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale); +void const_gpu(int N, float ALPHA, float *X, int INCX); +void pow_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void mul_gpu(int N, float *X, int INCX, float *Y, int INCY); + +void mean_gpu(float *x, int batch, int filters, int spatial, float *mean); +void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); +void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); +void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial); + +void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); + +void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); +void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); + +void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); +void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean); +void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out); +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); +void add_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); + +void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error); +void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error); +void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error); +void l2_gpu(int n, float *pred, float *truth, float *delta, float *error); +void l1_gpu(int n, float *pred, float *truth, float *delta, float *error); +void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error); +void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc); +void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c); +void mult_add_into_gpu(int num, float *a, float *b, float *c); +void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT); + +void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); + +void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); +void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t); +void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t); + +void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out); +void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier); +void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); + +#endif +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/blas_kernels.cu b/workloads/realworld/uvm_prefetch/darknet/src/blas_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..c8175cc74a61d5f21f1bcbba355507285322eb4a --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/blas_kernels.cu @@ -0,0 +1,1077 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" +#include + +extern "C" { +#include "blas.h" +#include "cuda_dark.h" +#include "utils.h" +} + +__global__ void scale_bias_kernel(float *output, float *biases, int n, int size) +{ + int offset = blockIdx.x * blockDim.x + threadIdx.x; + int filter = blockIdx.y; + int batch = blockIdx.z; + + if(offset < size) output[(batch*n+filter)*size + offset] *= biases[filter]; +} + +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size) +{ + dim3 dimGrid((size-1)/BLOCK + 1, n, batch); + dim3 dimBlock(BLOCK, 1, 1); + + scale_bias_kernel<<>>(output, biases, n, size); + check_error(cudaPeekAtLastError()); +} + +__global__ void backward_scale_kernel(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + __shared__ float part[BLOCK]; + int i,b; + int filter = blockIdx.x; + int p = threadIdx.x; + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; i += BLOCK){ + int index = p + i + size*(filter + n*b); + sum += (p+i < size) ? delta[index]*x_norm[index] : 0; + } + } + part[p] = sum; + __syncthreads(); + if (p == 0) { + for(i = 0; i < BLOCK; ++i) scale_updates[filter] += part[i]; + } +} + +void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + backward_scale_kernel<<>>(x_norm, delta, batch, n, size, scale_updates); + check_error(cudaPeekAtLastError()); +} + +__global__ void add_bias_kernel(float *output, float *biases, int batch, int n, int size) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= n*size*batch) return; + int i = index % size; + index /= size; + int j = index % n; + index /= n; + int k = index; + + output[(k*n+j)*size + i] += biases[j]; +} + +void add_bias_gpu(float *output, float *biases, int batch, int n, int size) +{ + int num = n*size*batch; + + add_bias_kernel<<>>(output, biases, batch, n, size); + check_error(cudaPeekAtLastError()); +} + +__global__ void backward_bias_conn_kernel(float *bias_updates, float *delta, int batch, int n) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= n) return; + int b; + float sum = 0; + for(b = 0; b < batch; ++b){ + int i = b*n + index; + sum += delta[i]; + } + bias_updates[index] += sum; +} + +__global__ void backward_bias_kernel(float *bias_updates, float *delta, int batch, int n, int size) +{ + __shared__ float part[BLOCK]; + int i,b; + int filter = blockIdx.x; + int p = threadIdx.x; + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; i += BLOCK){ + int index = p + i + size*(filter + n*b); + sum += (p+i < size) ? delta[index] : 0; + } + } + part[p] = sum; + __syncthreads(); + if (p == 0) { + for(i = 0; i < BLOCK; ++i) bias_updates[filter] += part[i]; + } +} + +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size) +{ + if(size == 1){ + backward_bias_conn_kernel<<>>(bias_updates, delta, batch, n); + }else{ + backward_bias_kernel<<>>(bias_updates, delta, batch, n, size); + } + check_error(cudaPeekAtLastError()); +} + +/* +__global__ void dot_kernel(float *output, float scale, int batch, int n, int size, float *delta) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int f1 = index / n; + int f2 = index % n; + if (f2 <= f1) return; + + float sum = 0; + float norm1 = 0; + float norm2 = 0; + int b, i; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int i1 = b * size * n + f1 * size + i; + int i2 = b * size * n + f2 * size + i; + sum += output[i1] * output[i2]; + norm1 += output[i1] * output[i1]; + norm2 += output[i2] * output[i2]; + } + } + norm1 = sqrt(norm1); + norm2 = sqrt(norm2); + float norm = norm1 * norm2; + sum = sum / norm; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int i1 = b * size * n + f1 * size + i; + int i2 = b * size * n + f2 * size + i; + delta[i1] += - scale * sum * output[i2] / norm; + delta[i2] += - scale * sum * output[i1] / norm; + } + } +} + +void dot_error_gpu(layer l) +{ + dot_kernel<<>>(l.output_gpu, l.dot, l.batch, l.n, l.out_w * l.out_h, l.delta_gpu); + check_error(cudaPeekAtLastError()); +} +*/ + + +__global__ void adam_kernel(int N, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + + float mhat = m[index] / (1.f - powf(B1, t)); + float vhat = v[index] / (1.f - powf(B2, t)); + + x[index] = x[index] + rate * mhat / (sqrtf(vhat) + eps); +} + +extern "C" void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) +{ + adam_kernel<<>>(n, x, m, v, B1, B2, rate, eps, t); + check_error(cudaPeekAtLastError()); +} + +extern "C" void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t) +{ + scal_gpu(n, B1, m, 1); + scal_gpu(n, B2, v, 1); + axpy_gpu(n, -decay*batch, w, 1, d, 1); + + axpy_gpu(n, (1-B1), d, 1, m, 1); + mul_gpu(n, d, 1, d, 1); + axpy_gpu(n, (1-B2), d, 1, v, 1); + + adam_gpu(n, w, m, v, B1, B2, rate, eps, t); + fill_gpu(n, 0, d, 1); +} + +__global__ void normalize_kernel(int N, float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int f = (index/spatial)%filters; + + x[index] = (x[index] - mean[f])/(sqrtf(variance[f] + .00001f)); +} + +__global__ void normalize_delta_kernel(int N, float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int f = (index/spatial)%filters; + + delta[index] = delta[index] * 1.f/(sqrtf(variance[f] + .00001f)) + variance_delta[f] * 2.f * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); +} + +extern "C" void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + size_t N = batch*filters*spatial; + normalize_delta_kernel<<>>(N, x, mean, variance, mean_delta, variance_delta, batch, filters, spatial, delta); + check_error(cudaPeekAtLastError()); +} + +__global__ void variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + variance_delta[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance_delta[i] += delta[index]*(x[index] - mean[i]); + } + } + variance_delta[i] *= -.5f * powf(variance[i] + .00001f, (float)(-3.f/2.f)); +} + +__global__ void accumulate_kernel(float *x, int n, int groups, float *sum) +{ + int k; + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= groups) return; + sum[i] = 0; + for(k = 0; k < n; ++k){ + sum[i] += x[k*groups + i]; + } +} + +__global__ void fast_mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + local[id] += (i+id < spatial) ? delta[index] : 0; + } + } + + __syncthreads(); + + if(id == 0){ + mean_delta[filter] = 0; + for(i = 0; i < threads; ++i){ + mean_delta[filter] += local[i]; + } + mean_delta[filter] *= (-1.f/sqrtf(variance[filter] + .00001f)); + } +} + +__global__ void fast_variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + + local[id] += (i+id < spatial) ? delta[index]*(x[index] - mean[filter]) : 0; + } + } + + __syncthreads(); + + if(id == 0){ + variance_delta[filter] = 0; + for(i = 0; i < threads; ++i){ + variance_delta[filter] += local[i]; + } + variance_delta[filter] *= -.5f * powf(variance[filter] + .00001f, (float)(-3.f/2.f)); + } +} + + +__global__ void mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + mean_delta[i] = 0; + for (j = 0; j < batch; ++j) { + for (k = 0; k < spatial; ++k) { + int index = j*filters*spatial + i*spatial + k; + mean_delta[i] += delta[index]; + } + } + mean_delta[i] *= (-1.f/sqrtf(variance[i] + .00001f)); +} + +extern "C" void mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + mean_delta_kernel<<>>(delta, variance, batch, filters, spatial, mean_delta); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + fast_mean_delta_kernel<<>>(delta, variance, batch, filters, spatial, mean_delta); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + fast_variance_delta_kernel<<>>(x, delta, mean, variance, batch, filters, spatial, variance_delta); + check_error(cudaPeekAtLastError()); +} + +__global__ void mean_kernel(float *x, int batch, int filters, int spatial, float *mean) +{ + float scale = 1.f/(batch * spatial); + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + mean[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + mean[i] += x[index]; + } + } + mean[i] *= scale; +} + +__global__ void variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + float scale = 1.f/(batch * spatial - 1); + int j,k; + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + variance[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance[i] += powf((x[index] - mean[i]), 2); + } + } + variance[i] *= scale; +} + +__global__ void reorg_kernel(int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int in_index = i; + int in_w = i%w; + i = i/w; + int in_h = i%h; + i = i/h; + int in_c = i%c; + i = i/c; + int b = i%batch; + + int out_c = c/(stride*stride); + + int c2 = in_c % out_c; + int offset = in_c / out_c; + int w2 = in_w*stride + offset % stride; + int h2 = in_h*stride + offset / stride; + //printf("%d\n", offset); + int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); + + // printf("%d %d %d\n", w2, h2, c2); + //printf("%d %d\n", in_index, out_index); + //if(out_index >= N || out_index < 0) printf("bad bad bad \n"); + + if(forward) out[out_index] = x[in_index]; + else out[in_index] = x[out_index]; + //if(forward) out[1] = x[1]; + //else out[0] = x[0]; +} + +__global__ void axpy_kernel(int N, float ALPHA, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[OFFY+i*INCY] += ALPHA*X[OFFX+i*INCX]; +} + +__global__ void pow_kernel(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY] = pow(X[i*INCX], ALPHA); +} + +__global__ void const_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = ALPHA; +} + +__global__ void constrain_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = fminf(ALPHA, fmaxf(-ALPHA, X[i*INCX])); +} + +__global__ void supp_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) { + if((X[i*INCX] * X[i*INCX]) < (ALPHA * ALPHA)) X[i*INCX] = 0; + } +} + +__global__ void add_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] += ALPHA; +} + +__global__ void scal_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] *= ALPHA; +} + +__global__ void fill_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = ALPHA; +} + +__global__ void copy_kernel(int N, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY + OFFY] = X[i*INCX + OFFX]; +} + +__global__ void mul_kernel(int N, float *X, int INCX, float *Y, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY] *= X[i*INCX]; +} + + +extern "C" void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + size_t N = batch*filters*spatial; + normalize_kernel<<>>(N, x, mean, variance, batch, filters, spatial); + check_error(cudaPeekAtLastError()); +} + +__global__ void l2norm_kernel(int N, float *x, float *dx, int batch, int filters, int spatial) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int b = index / spatial; + int i = index % spatial; + int f; + float sum = 0; + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + sum += powf(x[index], 2); + } + sum = sqrtf(sum); + if(sum == 0) sum = 1; + //printf("%f\n", sum); + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + x[index] /= sum; + dx[index] = (1 - x[index]) / sum; + } +} + +extern "C" void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial) +{ + size_t N = batch*spatial; + l2norm_kernel<<>>(N, x, dx, batch, filters, spatial); + check_error(cudaPeekAtLastError()); +} + +__global__ void fast_mean_kernel(float *x, int batch, int filters, int spatial, float *mean) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + local[id] += (i+id < spatial) ? x[index] : 0; + } + } + + __syncthreads(); + + if(id == 0){ + mean[filter] = 0; + for(i = 0; i < threads; ++i){ + mean[filter] += local[i]; + } + mean[filter] /= spatial * batch; + } +} + +__global__ void fast_variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + + local[id] += (i+id < spatial) ? powf((x[index] - mean[filter]), 2) : 0; + } + } + + __syncthreads(); + + if(id == 0){ + variance[filter] = 0; + for(i = 0; i < threads; ++i){ + variance[filter] += local[i]; + } + variance[filter] /= (spatial * batch - 1); + } +} + +extern "C" void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean) +{ + fast_mean_kernel<<>>(x, batch, filters, spatial, mean); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + fast_variance_kernel<<>>(x, mean, batch, filters, spatial, variance); + check_error(cudaPeekAtLastError()); +} + + +extern "C" void mean_gpu(float *x, int batch, int filters, int spatial, float *mean) +{ + mean_kernel<<>>(x, batch, filters, spatial, mean); + check_error(cudaPeekAtLastError()); +} + +extern "C" void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + variance_kernel<<>>(x, mean, batch, filters, spatial, variance); + check_error(cudaPeekAtLastError()); +} + +extern "C" void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY) +{ + axpy_gpu_offset(N, ALPHA, X, 0, INCX, Y, 0, INCY); +} + +extern "C" void pow_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY) +{ + pow_kernel<<>>(N, ALPHA, X, INCX, Y, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void axpy_gpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY) +{ + axpy_kernel<<>>(N, ALPHA, X, OFFX, INCX, Y, OFFY, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void copy_gpu(int N, float * X, int INCX, float * Y, int INCY) +{ + copy_gpu_offset(N, X, 0, INCX, Y, 0, INCY); +} + +extern "C" void mul_gpu(int N, float * X, int INCX, float * Y, int INCY) +{ + mul_kernel<<>>(N, X, INCX, Y, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY) +{ + copy_kernel<<>>(N, X, OFFX, INCX, Y, OFFY, INCY); + check_error(cudaPeekAtLastError()); +} + +__global__ void flatten_kernel(int N, float *x, int spatial, int layers, int batch, int forward, float *out) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int in_s = i%spatial; + i = i/spatial; + int in_c = i%layers; + i = i/layers; + int b = i; + + int i1 = b*layers*spatial + in_c*spatial + in_s; + int i2 = b*layers*spatial + in_s*layers + in_c; + + if (forward) out[i2] = x[i1]; + else out[i1] = x[i2]; +} + +extern "C" void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out) +{ + int size = spatial*batch*layers; + flatten_kernel<<>>(size, x, spatial, layers, batch, forward, out); + check_error(cudaPeekAtLastError()); +} + +extern "C" void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int size = w*h*c*batch; + reorg_kernel<<>>(size, x, w, h, c, batch, stride, forward, out); + check_error(cudaPeekAtLastError()); +} + +__global__ void mask_kernel(int n, float *x, float mask_num, float *mask, float val) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n && mask[i] == mask_num) x[i] = val; +} + +extern "C" void mask_gpu(int N, float * X, float mask_num, float * mask, float val) +{ + mask_kernel<<>>(N, X, mask_num, mask, val); + check_error(cudaPeekAtLastError()); +} + +__global__ void scale_mask_kernel(int n, float *x, float mask_num, float *mask, float scale) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n && mask[i] == mask_num) x[i] *= scale; +} + +extern "C" void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(X, N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + scale_mask_kernel<<>>(N, X, mask_num, mask, scale); + check_error(cudaPeekAtLastError()); +} + +extern "C" void const_gpu(int N, float ALPHA, float * X, int INCX) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(X, N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + const_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void constrain_gpu(int N, float ALPHA, float * X, int INCX) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(X, N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + constrain_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + + +extern "C" void add_gpu(int N, float ALPHA, float * X, int INCX) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(X, N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + add_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void scal_gpu(int N, float ALPHA, float * X, int INCX) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(X, N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + scal_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void supp_gpu(int N, float ALPHA, float * X, int INCX) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(X, N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + supp_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fill_gpu(int N, float ALPHA, float * X, int INCX) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(X, N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + fill_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +__global__ void shortcut_kernel(int size, int minw, int minh, int minc, int stride, int sample, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= size) return; + int i = id % minw; + id /= minw; + int j = id % minh; + id /= minh; + int k = id % minc; + id /= minc; + int b = id % batch; + + int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); + int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); + out[out_index] = s1*out[out_index] + s2*add[add_index]; + //out[out_index] += add[add_index]; +} + +extern "C" void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int minw = (w1 < w2) ? w1 : w2; + int minh = (h1 < h2) ? h1 : h2; + int minc = (c1 < c2) ? c1 : c2; + + int stride = w1/w2; + int sample = w2/w1; + assert(stride == h1/h2); + assert(sample == h2/h1); + if(stride < 1) stride = 1; + if(sample < 1) sample = 1; + + int size = batch * minw * minh * minc; + shortcut_kernel<<>>(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, s1, s2, out); + check_error(cudaPeekAtLastError()); +} + +__global__ void smooth_l1_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + float abs_val = fabsf(diff); + if(abs_val < 1) { + error[i] = diff * diff; + delta[i] = diff; + } + else { + error[i] = 2*abs_val - 1; + delta[i] = (diff > 0) ? 1 : -1; + } + } +} + +extern "C" void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + smooth_l1_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void softmax_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float t = truth[i]; + float p = pred[i]; + error[i] = (t) ? -log(p) : 0; + delta[i] = t-p; + } +} + +extern "C" void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + softmax_x_ent_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void logistic_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float t = truth[i]; + float p = pred[i]; + error[i] = -t*log(p+.0000001) - (1-t)*log(1-p+.0000001); + delta[i] = t-p; + } +} + +extern "C" void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + logistic_x_ent_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void l2_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + error[i] = diff * diff; //I know this is technically wrong, deal with it. + delta[i] = diff; + } +} + +extern "C" void l2_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + l2_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void l1_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + error[i] = abs(diff); + delta[i] = (diff > 0) ? 1 : -1; + } +} + +extern "C" void l1_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + l1_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void wgan_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + error[i] = truth[i] ? -pred[i] : pred[i]; + delta[i] = (truth[i] > 0) ? 1 : -1; + } +} + +extern "C" void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + wgan_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + + + + +__global__ void weighted_sum_kernel(int n, float *a, float *b, float *s, float *c) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); + } +} + +__global__ void deinter_kernel(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < (NX+NY)*B){ + int b = i / (NX+NY); + int j = i % (NX+NY); + if (j < NX){ + if(X) X[b*NX + j] += OUT[i]; + } else { + if(Y) Y[b*NY + j - NX] += OUT[i]; + } + } +} + +extern "C" void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + deinter_kernel<<>>(NX, X, NY, Y, B, OUT); + check_error(cudaPeekAtLastError()); +} + +__global__ void inter_kernel(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < (NX+NY)*B){ + int b = i / (NX+NY); + int j = i % (NX+NY); + if (j < NX){ + OUT[i] = X[b*NX + j]; + } else { + OUT[i] = Y[b*NY + j - NX]; + } + } +} + +extern "C" void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + inter_kernel<<>>(NX, X, NY, Y, B, OUT); + check_error(cudaPeekAtLastError()); +} + +extern "C" void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c) +{ + weighted_sum_kernel<<>>(num, a, b, s, c); + check_error(cudaPeekAtLastError()); +} + +__global__ void weighted_delta_kernel(int n, float *a, float *b, float *s, float *da, float *db, float *ds, float *dc) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + if(da) da[i] += dc[i] * s[i]; + if(db) db[i] += dc[i] * (1-s[i]); + ds[i] += dc[i] * (a[i] - b[i]); + } +} + +extern "C" void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc) +{ + weighted_delta_kernel<<>>(num, a, b, s, da, db, ds, dc); + check_error(cudaPeekAtLastError()); +} + +__global__ void mult_add_into_kernel(int n, float *a, float *b, float *c) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + c[i] += a[i]*b[i]; + } +} + +extern "C" void mult_add_into_gpu(int num, float *a, float *b, float *c) +{ + mult_add_into_kernel<<>>(num, a, b, c); + check_error(cudaPeekAtLastError()); +} + + +__device__ void softmax_device(float *input, int n, float temp, int stride, float *output) +{ + int i; + float sum = 0; + float largest = -INFINITY; + for(i = 0; i < n; ++i){ + int val = input[i*stride]; + largest = (val>largest) ? val : largest; + } + for(i = 0; i < n; ++i){ + float e = expf(input[i*stride]/temp - largest/temp); + sum += e; + output[i*stride] = e; + } + for(i = 0; i < n; ++i){ + output[i*stride] /= sum; + } +} + + +__global__ void softmax_tree_kernel(float *input, int spatial, int batch, int stride, float temp, float *output, int groups, int *group_size, int *group_offset) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= spatial*batch*groups) return; + int s = id % spatial; + id = id / spatial; + int g = id % groups; + int b = id / groups; + int goff = group_offset[g]*spatial; + int boff = b*stride; + softmax_device(input + goff + boff + s, group_size[g], temp, spatial, output + goff + boff + s); +} + +extern "C" void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier) +{ + int *tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); + int *tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); + /* + static int *tree_groups_size = 0; + static int *tree_groups_offset = 0; + if(!tree_groups_size){ + tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); + tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); + } + */ + int num = spatial*batch*hier.groups; + softmax_tree_kernel<<>>(input, spatial, batch, stride, temp, output, hier.groups, tree_groups_size, tree_groups_offset); + check_error(cudaPeekAtLastError()); + cuda_free((float *)tree_groups_size); + cuda_free((float *)tree_groups_offset); +} + +__global__ void softmax_kernel(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= batch*groups) return; + int b = id / groups; + int g = id % groups; + softmax_device(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); +} + +extern "C" void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + softmax_kernel<<>>(input, n, batch, batch_offset, groups, group_offset, stride, temp, output); + check_error(cudaPeekAtLastError()); +} + + +__global__ void upsample_kernel(size_t N, float *x, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + size_t i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int out_index = i; + int out_w = i%(w*stride); + i = i/(w*stride); + int out_h = i%(h*stride); + i = i/(h*stride); + int out_c = i%c; + i = i/c; + int b = i%batch; + + int in_w = out_w / stride; + int in_h = out_h / stride; + int in_c = out_c; + + int in_index = b*w*h*c + in_c*w*h + in_h*w + in_w; + + + if(forward) out[out_index] += scale * x[in_index]; + else atomicAdd(x+in_index, scale * out[out_index]); +} +extern "C" void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + size_t size = w*h*c*batch*stride*stride; + upsample_kernel<<>>(size, in, w, h, c, batch, stride, forward, scale, out); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/uvm_prefetch/darknet/src/box.c b/workloads/realworld/uvm_prefetch/darknet/src/box.c new file mode 100644 index 0000000000000000000000000000000000000000..8a1772c9ae05dede6ddc83d9b6465f64cf974ae8 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/box.c @@ -0,0 +1,357 @@ +#include "box.h" +#include +#include +#include + +int nms_comparator(const void *pa, const void *pb) +{ + detection a = *(detection *)pa; + detection b = *(detection *)pb; + float diff = 0; + if(b.sort_class >= 0){ + diff = a.prob[b.sort_class] - b.prob[b.sort_class]; + } else { + diff = a.objectness - b.objectness; + } + if(diff < 0) return 1; + else if(diff > 0) return -1; + return 0; +} + +void do_nms_obj(detection *dets, int total, int classes, float thresh) +{ + int i, j, k; + k = total-1; + for(i = 0; i <= k; ++i){ + if(dets[i].objectness == 0){ + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k+1; + + for(i = 0; i < total; ++i){ + dets[i].sort_class = -1; + } + + qsort(dets, total, sizeof(detection), nms_comparator); + for(i = 0; i < total; ++i){ + if(dets[i].objectness == 0) continue; + box a = dets[i].bbox; + for(j = i+1; j < total; ++j){ + if(dets[j].objectness == 0) continue; + box b = dets[j].bbox; + if (box_iou(a, b) > thresh){ + dets[j].objectness = 0; + for(k = 0; k < classes; ++k){ + dets[j].prob[k] = 0; + } + } + } + } +} + + +void do_nms_sort(detection *dets, int total, int classes, float thresh) +{ + int i, j, k; + k = total-1; + for(i = 0; i <= k; ++i){ + if(dets[i].objectness == 0){ + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k+1; + + for(k = 0; k < classes; ++k){ + for(i = 0; i < total; ++i){ + dets[i].sort_class = k; + } + qsort(dets, total, sizeof(detection), nms_comparator); + for(i = 0; i < total; ++i){ + if(dets[i].prob[k] == 0) continue; + box a = dets[i].bbox; + for(j = i+1; j < total; ++j){ + box b = dets[j].bbox; + if (box_iou(a, b) > thresh){ + dets[j].prob[k] = 0; + } + } + } + } +} + +box float_to_box(float *f, int stride) +{ + box b = {0}; + b.x = f[0]; + b.y = f[1*stride]; + b.w = f[2*stride]; + b.h = f[3*stride]; + return b; +} + +dbox derivative(box a, box b) +{ + dbox d; + d.dx = 0; + d.dw = 0; + float l1 = a.x - a.w/2; + float l2 = b.x - b.w/2; + if (l1 > l2){ + d.dx -= 1; + d.dw += .5; + } + float r1 = a.x + a.w/2; + float r2 = b.x + b.w/2; + if(r1 < r2){ + d.dx += 1; + d.dw += .5; + } + if (l1 > r2) { + d.dx = -1; + d.dw = 0; + } + if (r1 < l2){ + d.dx = 1; + d.dw = 0; + } + + d.dy = 0; + d.dh = 0; + float t1 = a.y - a.h/2; + float t2 = b.y - b.h/2; + if (t1 > t2){ + d.dy -= 1; + d.dh += .5; + } + float b1 = a.y + a.h/2; + float b2 = b.y + b.h/2; + if(b1 < b2){ + d.dy += 1; + d.dh += .5; + } + if (t1 > b2) { + d.dy = -1; + d.dh = 0; + } + if (b1 < t2){ + d.dy = 1; + d.dh = 0; + } + return d; +} + +float overlap(float x1, float w1, float x2, float w2) +{ + float l1 = x1 - w1/2; + float l2 = x2 - w2/2; + float left = l1 > l2 ? l1 : l2; + float r1 = x1 + w1/2; + float r2 = x2 + w2/2; + float right = r1 < r2 ? r1 : r2; + return right - left; +} + +float box_intersection(box a, box b) +{ + float w = overlap(a.x, a.w, b.x, b.w); + float h = overlap(a.y, a.h, b.y, b.h); + if(w < 0 || h < 0) return 0; + float area = w*h; + return area; +} + +float box_union(box a, box b) +{ + float i = box_intersection(a, b); + float u = a.w*a.h + b.w*b.h - i; + return u; +} + +float box_iou(box a, box b) +{ + return box_intersection(a, b)/box_union(a, b); +} + +float box_rmse(box a, box b) +{ + return sqrt(pow(a.x-b.x, 2) + + pow(a.y-b.y, 2) + + pow(a.w-b.w, 2) + + pow(a.h-b.h, 2)); +} + +dbox dintersect(box a, box b) +{ + float w = overlap(a.x, a.w, b.x, b.w); + float h = overlap(a.y, a.h, b.y, b.h); + dbox dover = derivative(a, b); + dbox di; + + di.dw = dover.dw*h; + di.dx = dover.dx*h; + di.dh = dover.dh*w; + di.dy = dover.dy*w; + + return di; +} + +dbox dunion(box a, box b) +{ + dbox du; + + dbox di = dintersect(a, b); + du.dw = a.h - di.dw; + du.dh = a.w - di.dh; + du.dx = -di.dx; + du.dy = -di.dy; + + return du; +} + + +void test_dunion() +{ + box a = {0, 0, 1, 1}; + box dxa= {0+.0001, 0, 1, 1}; + box dya= {0, 0+.0001, 1, 1}; + box dwa= {0, 0, 1+.0001, 1}; + box dha= {0, 0, 1, 1+.0001}; + + box b = {.5, .5, .2, .2}; + dbox di = dunion(a,b); + printf("Union: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); + float inter = box_union(a, b); + float xinter = box_union(dxa, b); + float yinter = box_union(dya, b); + float winter = box_union(dwa, b); + float hinter = box_union(dha, b); + xinter = (xinter - inter)/(.0001); + yinter = (yinter - inter)/(.0001); + winter = (winter - inter)/(.0001); + hinter = (hinter - inter)/(.0001); + printf("Union Manual %f %f %f %f\n", xinter, yinter, winter, hinter); +} +void test_dintersect() +{ + box a = {0, 0, 1, 1}; + box dxa= {0+.0001, 0, 1, 1}; + box dya= {0, 0+.0001, 1, 1}; + box dwa= {0, 0, 1+.0001, 1}; + box dha= {0, 0, 1, 1+.0001}; + + box b = {.5, .5, .2, .2}; + dbox di = dintersect(a,b); + printf("Inter: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); + float inter = box_intersection(a, b); + float xinter = box_intersection(dxa, b); + float yinter = box_intersection(dya, b); + float winter = box_intersection(dwa, b); + float hinter = box_intersection(dha, b); + xinter = (xinter - inter)/(.0001); + yinter = (yinter - inter)/(.0001); + winter = (winter - inter)/(.0001); + hinter = (hinter - inter)/(.0001); + printf("Inter Manual %f %f %f %f\n", xinter, yinter, winter, hinter); +} + +void test_box() +{ + test_dintersect(); + test_dunion(); + box a = {0, 0, 1, 1}; + box dxa= {0+.00001, 0, 1, 1}; + box dya= {0, 0+.00001, 1, 1}; + box dwa= {0, 0, 1+.00001, 1}; + box dha= {0, 0, 1, 1+.00001}; + + box b = {.5, 0, .2, .2}; + + float iou = box_iou(a,b); + iou = (1-iou)*(1-iou); + printf("%f\n", iou); + dbox d = diou(a, b); + printf("%f %f %f %f\n", d.dx, d.dy, d.dw, d.dh); + + float xiou = box_iou(dxa, b); + float yiou = box_iou(dya, b); + float wiou = box_iou(dwa, b); + float hiou = box_iou(dha, b); + xiou = ((1-xiou)*(1-xiou) - iou)/(.00001); + yiou = ((1-yiou)*(1-yiou) - iou)/(.00001); + wiou = ((1-wiou)*(1-wiou) - iou)/(.00001); + hiou = ((1-hiou)*(1-hiou) - iou)/(.00001); + printf("manual %f %f %f %f\n", xiou, yiou, wiou, hiou); +} + +dbox diou(box a, box b) +{ + float u = box_union(a,b); + float i = box_intersection(a,b); + dbox di = dintersect(a,b); + dbox du = dunion(a,b); + dbox dd = {0,0,0,0}; + + if(i <= 0 || 1) { + dd.dx = b.x - a.x; + dd.dy = b.y - a.y; + dd.dw = b.w - a.w; + dd.dh = b.h - a.h; + return dd; + } + + dd.dx = 2*pow((1-(i/u)),1)*(di.dx*u - du.dx*i)/(u*u); + dd.dy = 2*pow((1-(i/u)),1)*(di.dy*u - du.dy*i)/(u*u); + dd.dw = 2*pow((1-(i/u)),1)*(di.dw*u - du.dw*i)/(u*u); + dd.dh = 2*pow((1-(i/u)),1)*(di.dh*u - du.dh*i)/(u*u); + return dd; +} + + +void do_nms(box *boxes, float **probs, int total, int classes, float thresh) +{ + int i, j, k; + for(i = 0; i < total; ++i){ + int any = 0; + for(k = 0; k < classes; ++k) any = any || (probs[i][k] > 0); + if(!any) { + continue; + } + for(j = i+1; j < total; ++j){ + if (box_iou(boxes[i], boxes[j]) > thresh){ + for(k = 0; k < classes; ++k){ + if (probs[i][k] < probs[j][k]) probs[i][k] = 0; + else probs[j][k] = 0; + } + } + } + } +} + +box encode_box(box b, box anchor) +{ + box encode; + encode.x = (b.x - anchor.x) / anchor.w; + encode.y = (b.y - anchor.y) / anchor.h; + encode.w = log2(b.w / anchor.w); + encode.h = log2(b.h / anchor.h); + return encode; +} + +box decode_box(box b, box anchor) +{ + box decode; + decode.x = b.x * anchor.w + anchor.x; + decode.y = b.y * anchor.h + anchor.y; + decode.w = pow(2., b.w) * anchor.w; + decode.h = pow(2., b.h) * anchor.h; + return decode; +} diff --git a/workloads/realworld/uvm_prefetch/darknet/src/box.h b/workloads/realworld/uvm_prefetch/darknet/src/box.h new file mode 100644 index 0000000000000000000000000000000000000000..dda3e59100c3d9e0a6bb05a80070155d9fcbc876 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/box.h @@ -0,0 +1,14 @@ +#ifndef BOX_H +#define BOX_H +#include "darknet.h" + +typedef struct{ + float dx, dy, dw, dh; +} dbox; + +float box_rmse(box a, box b); +dbox diou(box a, box b); +box decode_box(box b, box anchor); +box encode_box(box b, box anchor); + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/classifier.h b/workloads/realworld/uvm_prefetch/darknet/src/classifier.h new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/classifier.h @@ -0,0 +1 @@ + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/col2im.c b/workloads/realworld/uvm_prefetch/darknet/src/col2im.c new file mode 100644 index 0000000000000000000000000000000000000000..5c4605e197439f79fe05c41337a5f2b8103f63ba --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/col2im.c @@ -0,0 +1,39 @@ +#include +#include +void col2im_add_pixel(float *im, int height, int width, int channels, + int row, int col, int channel, int pad, float val) +{ + row -= pad; + col -= pad; + + if (row < 0 || col < 0 || + row >= height || col >= width) return; + im[col + width*(row + height*channel)] += val; +} +//This one might be too, can't remember. +void col2im_cpu(float* data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_im) +{ + int c,h,w; + int height_col = (height + 2*pad - ksize) / stride + 1; + int width_col = (width + 2*pad - ksize) / stride + 1; + + int channels_col = channels * ksize * ksize; + for (c = 0; c < channels_col; ++c) { + int w_offset = c % ksize; + int h_offset = (c / ksize) % ksize; + int c_im = c / ksize / ksize; + for (h = 0; h < height_col; ++h) { + for (w = 0; w < width_col; ++w) { + int im_row = h_offset + h * stride; + int im_col = w_offset + w * stride; + int col_index = (c * height_col + h) * width_col + w; + double val = data_col[col_index]; + col2im_add_pixel(data_im, height, width, channels, + im_row, im_col, c_im, pad, val); + } + } + } +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/col2im.h b/workloads/realworld/uvm_prefetch/darknet/src/col2im.h new file mode 100644 index 0000000000000000000000000000000000000000..3fbe05307db65a1f511f801670a23734e21b7dff --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/col2im.h @@ -0,0 +1,13 @@ +#ifndef COL2IM_H +#define COL2IM_H + +void col2im_cpu(float* data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_im); + +#ifdef GPU +void col2im_gpu(float *data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_im); +#endif +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/col2im_kernels.cu b/workloads/realworld/uvm_prefetch/darknet/src/col2im_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..3543ec940a807a2dc42eb11fb31bc370c750a0f5 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/col2im_kernels.cu @@ -0,0 +1,70 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "col2im.h" +#include "cuda_dark.h" +} + +// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu +// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE + +__global__ void col2im_gpu_kernel(const int n, const float* data_col, + const int height, const int width, const int ksize, + const int pad, + const int stride, + const int height_col, const int width_col, + float *data_im) { + int index = blockIdx.x*blockDim.x+threadIdx.x; + for(; index < n; index += blockDim.x*gridDim.x){ + float val = 0; + int w = index % width + pad; + int h = (index / width) % height + pad; + int c = index / (width * height); + // compute the start and end of the output + int w_col_start = (w < ksize) ? 0 : (w - ksize) / stride + 1; + int w_col_end = min(w / stride + 1, width_col); + int h_col_start = (h < ksize) ? 0 : (h - ksize) / stride + 1; + int h_col_end = min(h / stride + 1, height_col); + // equivalent implementation + int offset = + (c * ksize * ksize + h * ksize + w) * height_col * width_col; + int coeff_h_col = (1 - stride * ksize * height_col) * width_col; + int coeff_w_col = (1 - stride * height_col * width_col); + for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { + for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { + val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col]; + } + } + data_im[index] += val; + } +} + +void col2im_gpu(float *data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_im){ + // We are going to launch channels * height_col * width_col kernels, each + // kernel responsible for copying a single-channel grid. + int height_col = (height + 2 * pad - ksize) / stride + 1; + int width_col = (width + 2 * pad - ksize) / stride + 1; + int num_kernels = channels * height * width; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(data_col, num_kernels * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(data_im, num_kernels * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + col2im_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, + BLOCK, 0, stream2>>>( + num_kernels, data_col, height, width, ksize, pad, + stride, height_col, + width_col, data_im); + check_error(cudaPeekAtLastError()); +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/compare.c b/workloads/realworld/uvm_prefetch/darknet/src/compare.c new file mode 100644 index 0000000000000000000000000000000000000000..d2d2b3bdc675cf808f483d1607550e072e245396 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/compare.c @@ -0,0 +1,352 @@ +#include + +#include "network.h" +#include "detection_layer.h" +#include "cost_layer.h" +#include "utils.h" +#include "parser.h" +#include "box.h" + +void train_compare(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + list *plist = get_paths("data/compare.train.list"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + printf("%d\n", N); + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.classes = 20; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = COMPARE_DATA; + + load_thread = load_data_in_thread(args); + int epoch = *net.seen/N; + int i = 0; + while(1){ + ++i; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%.3f: %f, %f avg, %lf seconds, %ld images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen); + free_data(train); + if(i%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_%d_minor_%d.weights",backup_directory,base, epoch, i); + save_weights(net, buff); + } + if(*net.seen/N > epoch){ + epoch = *net.seen/N; + i = 0; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + if(epoch%22 == 0) net.learning_rate *= .1; + } + } + pthread_join(load_thread, 0); + free_data(buffer); + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_compare(char *filename, char *weightfile) +{ + int i = 0; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + + list *plist = get_paths("data/compare.val.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size/2; + free_list(plist); + + clock_t time; + int correct = 0; + int total = 0; + int splits = 10; + int num = (i+1)*N/splits - i*N/splits; + + data val, buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.classes = 20; + args.n = num; + args.m = 0; + args.d = &buffer; + args.type = COMPARE_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(i = 1; i <= splits; ++i){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + num = (i+1)*N/splits - i*N/splits; + char **part = paths+(i*N/splits); + if(i != splits){ + args.paths = part; + load_thread = load_data_in_thread(args); + } + printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + int j,k; + for(j = 0; j < val.y.rows; ++j){ + for(k = 0; k < 20; ++k){ + if(val.y.vals[j][k*2] != val.y.vals[j][k*2+1]){ + ++total; + if((val.y.vals[j][k*2] < val.y.vals[j][k*2+1]) == (pred.vals[j][k*2] < pred.vals[j][k*2+1])){ + ++correct; + } + } + } + } + free_matrix(pred); + printf("%d: Acc: %f, %lf seconds, %d images\n", i, (float)correct/total, sec(clock()-time), val.X.rows); + free_data(val); + } +} + +typedef struct { + network net; + char *filename; + int class; + int classes; + float elo; + float *elos; +} sortable_bbox; + +int total_compares = 0; +int current_class = 0; + +int elo_comparator(const void*a, const void *b) +{ + sortable_bbox box1 = *(sortable_bbox*)a; + sortable_bbox box2 = *(sortable_bbox*)b; + if(box1.elos[current_class] == box2.elos[current_class]) return 0; + if(box1.elos[current_class] > box2.elos[current_class]) return -1; + return 1; +} + +int bbox_comparator(const void *a, const void *b) +{ + ++total_compares; + sortable_bbox box1 = *(sortable_bbox*)a; + sortable_bbox box2 = *(sortable_bbox*)b; + network net = box1.net; + int class = box1.class; + + image im1 = load_image_color(box1.filename, net.w, net.h); + image im2 = load_image_color(box2.filename, net.w, net.h); + float *X = calloc(net.w*net.h*net.c, sizeof(float)); + memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); + memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); + float *predictions = network_predict(net, X); + + free_image(im1); + free_image(im2); + free(X); + if (predictions[class*2] > predictions[class*2+1]){ + return 1; + } + return -1; +} + +void bbox_update(sortable_bbox *a, sortable_bbox *b, int class, int result) +{ + int k = 32; + float EA = 1./(1+pow(10, (b->elos[class] - a->elos[class])/400.)); + float EB = 1./(1+pow(10, (a->elos[class] - b->elos[class])/400.)); + float SA = result ? 1 : 0; + float SB = result ? 0 : 1; + a->elos[class] += k*(SA - EA); + b->elos[class] += k*(SB - EB); +} + +void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, int class) +{ + image im1 = load_image_color(a->filename, net.w, net.h); + image im2 = load_image_color(b->filename, net.w, net.h); + float *X = calloc(net.w*net.h*net.c, sizeof(float)); + memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); + memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); + float *predictions = network_predict(net, X); + ++total_compares; + + int i; + for(i = 0; i < classes; ++i){ + if(class < 0 || class == i){ + int result = predictions[i*2] > predictions[i*2+1]; + bbox_update(a, b, i, result); + } + } + + free_image(im1); + free_image(im2); + free(X); +} + +void SortMaster3000(char *filename, char *weightfile) +{ + int i = 0; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + set_batch_network(&net, 1); + + list *plist = get_paths("data/compare.sort.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + free_list(plist); + sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); + printf("Sorting %d boxes...\n", N); + for(i = 0; i < N; ++i){ + boxes[i].filename = paths[i]; + boxes[i].net = net; + boxes[i].class = 7; + boxes[i].elo = 1500; + } + clock_t time=clock(); + qsort(boxes, N, sizeof(sortable_bbox), bbox_comparator); + for(i = 0; i < N; ++i){ + printf("%s\n", boxes[i].filename); + } + printf("Sorted in %d compares, %f secs\n", total_compares, sec(clock()-time)); +} + +void BattleRoyaleWithCheese(char *filename, char *weightfile) +{ + int classes = 20; + int i,j; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + set_batch_network(&net, 1); + + list *plist = get_paths("data/compare.sort.list"); + //list *plist = get_paths("data/compare.small.list"); + //list *plist = get_paths("data/compare.cat.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + int total = N; + free_list(plist); + sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); + printf("Battling %d boxes...\n", N); + for(i = 0; i < N; ++i){ + boxes[i].filename = paths[i]; + boxes[i].net = net; + boxes[i].classes = classes; + boxes[i].elos = calloc(classes, sizeof(float));; + for(j = 0; j < classes; ++j){ + boxes[i].elos[j] = 1500; + } + } + int round; + clock_t time=clock(); + for(round = 1; round <= 4; ++round){ + clock_t round_time=clock(); + printf("Round: %d\n", round); + shuffle(boxes, N, sizeof(sortable_bbox)); + for(i = 0; i < N/2; ++i){ + bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, -1); + } + printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N); + } + + int class; + + for (class = 0; class < classes; ++class){ + + N = total; + current_class = class; + qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); + N /= 2; + + for(round = 1; round <= 100; ++round){ + clock_t round_time=clock(); + printf("Round: %d\n", round); + + sorta_shuffle(boxes, N, sizeof(sortable_bbox), 10); + for(i = 0; i < N/2; ++i){ + bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, class); + } + qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); + if(round <= 20) N = (N*9/10)/2*2; + + printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N); + } + char buff[256]; + sprintf(buff, "results/battle_%d.log", class); + FILE *outfp = fopen(buff, "w"); + for(i = 0; i < N; ++i){ + fprintf(outfp, "%s %f\n", boxes[i].filename, boxes[i].elos[class]); + } + fclose(outfp); + } + printf("Tournament in %d compares, %f secs\n", total_compares, sec(clock()-time)); +} + +void run_compare(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + //char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_compare(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_compare(cfg, weights); + else if(0==strcmp(argv[2], "sort")) SortMaster3000(cfg, weights); + else if(0==strcmp(argv[2], "battle")) BattleRoyaleWithCheese(cfg, weights); + /* + else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); + else if(0==strcmp(argv[2], "extract")) extract_boxes(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_recall(cfg, weights); + */ +} diff --git a/workloads/realworld/uvm_prefetch/darknet/src/connected_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/connected_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..ec25b44d998661c4735cd9a8a86f2355a0ae0080 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/connected_layer.c @@ -0,0 +1,336 @@ +#include "connected_layer.h" +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam) +{ + int i; + layer l = {0}; + l.learning_rate_scale = 1; + l.type = CONNECTED; + + l.inputs = inputs; + l.outputs = outputs; + l.batch=batch; + l.batch_normalize = batch_normalize; + l.h = 1; + l.w = 1; + l.c = inputs; + l.out_h = 1; + l.out_w = 1; + l.out_c = outputs; + + l.output = calloc(batch*outputs, sizeof(float)); + l.delta = calloc(batch*outputs, sizeof(float)); + + l.weight_updates = calloc(inputs*outputs, sizeof(float)); + l.bias_updates = calloc(outputs, sizeof(float)); + + l.weights = calloc(outputs*inputs, sizeof(float)); + l.biases = calloc(outputs, sizeof(float)); + + l.forward = forward_connected_layer; + l.backward = backward_connected_layer; + l.update = update_connected_layer; + + //float scale = 1./sqrt(inputs); + float scale = sqrt(2./inputs); + for(i = 0; i < outputs*inputs; ++i){ + l.weights[i] = scale*rand_uniform(-1, 1); + } + + for(i = 0; i < outputs; ++i){ + l.biases[i] = 0; + } + + if(adam){ + l.m = calloc(l.inputs*l.outputs, sizeof(float)); + l.v = calloc(l.inputs*l.outputs, sizeof(float)); + l.bias_m = calloc(l.outputs, sizeof(float)); + l.scale_m = calloc(l.outputs, sizeof(float)); + l.bias_v = calloc(l.outputs, sizeof(float)); + l.scale_v = calloc(l.outputs, sizeof(float)); + } + if(batch_normalize){ + l.scales = calloc(outputs, sizeof(float)); + l.scale_updates = calloc(outputs, sizeof(float)); + for(i = 0; i < outputs; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(outputs, sizeof(float)); + l.mean_delta = calloc(outputs, sizeof(float)); + l.variance = calloc(outputs, sizeof(float)); + l.variance_delta = calloc(outputs, sizeof(float)); + + l.rolling_mean = calloc(outputs, sizeof(float)); + l.rolling_variance = calloc(outputs, sizeof(float)); + + l.x = calloc(batch*outputs, sizeof(float)); + l.x_norm = calloc(batch*outputs, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_connected_layer_gpu; + l.backward_gpu = backward_connected_layer_gpu; + l.update_gpu = update_connected_layer_gpu; + + l.weights_gpu = cuda_make_array(l.weights, outputs*inputs); + l.biases_gpu = cuda_make_array(l.biases, outputs); + + l.weight_updates_gpu = cuda_make_array(l.weight_updates, outputs*inputs); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, outputs); + + l.output_gpu = cuda_make_array(l.output, outputs*batch); + l.delta_gpu = cuda_make_array(l.delta, outputs*batch); + if (adam) { + l.m_gpu = cuda_make_array(0, inputs*outputs); + l.v_gpu = cuda_make_array(0, inputs*outputs); + l.bias_m_gpu = cuda_make_array(0, outputs); + l.bias_v_gpu = cuda_make_array(0, outputs); + l.scale_m_gpu = cuda_make_array(0, outputs); + l.scale_v_gpu = cuda_make_array(0, outputs); + } + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(l.mean, outputs); + l.variance_gpu = cuda_make_array(l.variance, outputs); + + l.rolling_mean_gpu = cuda_make_array(l.mean, outputs); + l.rolling_variance_gpu = cuda_make_array(l.variance, outputs); + + l.mean_delta_gpu = cuda_make_array(l.mean, outputs); + l.variance_delta_gpu = cuda_make_array(l.variance, outputs); + + l.scales_gpu = cuda_make_array(l.scales, outputs); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, outputs); + + l.x_gpu = cuda_make_array(l.output, l.batch*outputs); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*outputs); +#ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); +#endif + } +#endif + l.activation = activation; + fprintf(stderr, "connected %4d -> %4d\n", inputs, outputs); + return l; +} + +void update_connected_layer(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.outputs, momentum, l.bias_updates, 1); + + if(l.batch_normalize){ + axpy_cpu(l.outputs, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.outputs, momentum, l.scale_updates, 1); + } + + axpy_cpu(l.inputs*l.outputs, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(l.inputs*l.outputs, momentum, l.weight_updates, 1); +} + +void forward_connected_layer(layer l, network net) +{ + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + int m = l.batch; + int k = l.inputs; + int n = l.outputs; + float *a = net.input; + float *b = l.weights; + float *c = l.output; + gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); + if(l.batch_normalize){ + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.outputs, 1); + } + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_connected_layer(layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.outputs, 1); + } + + int m = l.outputs; + int k = l.batch; + int n = l.inputs; + float *a = l.delta; + float *b = net.input; + float *c = l.weight_updates; + gemm(1,0,m,n,k,1,a,m,b,n,1,c,n); + + m = l.batch; + k = l.outputs; + n = l.inputs; + + a = l.delta; + b = l.weights; + c = net.delta; + + if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); +} + + +void denormalize_connected_layer(layer l) +{ + int i, j; + for(i = 0; i < l.outputs; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .000001); + for(j = 0; j < l.inputs; ++j){ + l.weights[i*l.inputs + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + + +void statistics_connected_layer(layer l) +{ + if(l.batch_normalize){ + printf("Scales "); + print_statistics(l.scales, l.outputs); + /* + printf("Rolling Mean "); + print_statistics(l.rolling_mean, l.outputs); + printf("Rolling Variance "); + print_statistics(l.rolling_variance, l.outputs); + */ + } + printf("Biases "); + print_statistics(l.biases, l.outputs); + printf("Weights "); + print_statistics(l.weights, l.outputs); +} + +#ifdef GPU + +void pull_connected_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.inputs*l.outputs); + cuda_pull_array(l.biases_gpu, l.biases, l.outputs); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.outputs); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs); + } +} + +void push_connected_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.inputs*l.outputs); + cuda_push_array(l.biases_gpu, l.biases, l.outputs); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.outputs); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs); + } +} + +void update_connected_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.inputs*l.outputs, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.outputs, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.outputs, batch, a.t); + } + }else{ + axpy_gpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.outputs, momentum, l.bias_updates_gpu, 1); + + if(l.batch_normalize){ + axpy_gpu(l.outputs, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.outputs, momentum, l.scale_updates_gpu, 1); + } + + axpy_gpu(l.inputs*l.outputs, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.inputs*l.outputs, momentum, l.weight_updates_gpu, 1); + } +} + +void forward_connected_layer_gpu(layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + + int m = l.batch; + int k = l.inputs; + int n = l.outputs; + float * a = net.input_gpu; + float * b = l.weights_gpu; + float * c = l.output_gpu; + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.outputs, 1); + } + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_connected_layer_gpu(layer l, network net) +{ + constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.outputs, 1); + } + + int m = l.outputs; + int k = l.batch; + int n = l.inputs; + float * a = l.delta_gpu; + float * b = net.input_gpu; + float * c = l.weight_updates_gpu; + gemm_gpu(1,0,m,n,k,1,a,m,b,n,1,c,n); + + m = l.batch; + k = l.outputs; + n = l.inputs; + + a = l.delta_gpu; + b = l.weights_gpu; + c = net.delta_gpu; + + if(c) gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); +} +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/connected_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/connected_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..6727a964eaa923906b202ff337aa69ad91817117 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/connected_layer.h @@ -0,0 +1,23 @@ +#ifndef CONNECTED_LAYER_H +#define CONNECTED_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam); + +void forward_connected_layer(layer l, network net); +void backward_connected_layer(layer l, network net); +void update_connected_layer(layer l, update_args a); + +#ifdef GPU +void forward_connected_layer_gpu(layer l, network net); +void backward_connected_layer_gpu(layer l, network net); +void update_connected_layer_gpu(layer l, update_args a); +void push_connected_layer(layer l); +void pull_connected_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/convolutional_kernels.cu b/workloads/realworld/uvm_prefetch/darknet/src/convolutional_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..3677bc8597d0e37f4d45848a75bf34165cafa729 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/convolutional_kernels.cu @@ -0,0 +1,370 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "gemm.h" +#include "blas.h" +#include "im2col.h" +#include "col2im.h" +#include "utils.h" +#include "cuda_dark.h" +} + +__global__ void binarize_kernel(float *x, int n, float *binary) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= n) return; + binary[i] = (x[i] >= 0) ? 1 : -1; +} + +void binarize_gpu(float *x, int n, float *binary) +{ + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(x, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(binary, n * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + binarize_kernel<<>>(x, n, binary); + check_error(cudaPeekAtLastError()); +} + +__global__ void binarize_input_kernel(float *input, int n, int size, float *binary) +{ + int s = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (s >= size) return; + int i = 0; + float mean = 0; + for(i = 0; i < n; ++i){ + mean += fabsf(input[i*size + s]); + } + mean = mean / n; + for(i = 0; i < n; ++i){ + binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean; + } +} + +void binarize_input_gpu(float *input, int n, int size, float *binary) +{ + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(input, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(binary, n * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + binarize_input_kernel<<>>(input, n, size, binary); + check_error(cudaPeekAtLastError()); +} + + +__global__ void binarize_weights_kernel(float *weights, int n, int size, float *binary) +{ + int f = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (f >= n) return; + int i = 0; + float mean = 0; + for(i = 0; i < size; ++i){ + mean += fabsf(weights[f*size + i]); + } + mean = mean / size; + for(i = 0; i < size; ++i){ + binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean; + //binary[f*size + i] = weights[f*size + i]; + } +} + +void binarize_weights_gpu(float *weights, int n, int size, float *binary) +{ + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(weights, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(binary, n * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + binarize_weights_kernel<<>>(weights, n, size, binary); + check_error(cudaPeekAtLastError()); +} + +void forward_convolutional_layer_gpu(convolutional_layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + if(l.binary){ + binarize_weights_gpu(l.weights_gpu, l.n, l.c/l.groups*l.size*l.size, l.binary_weights_gpu); + swap_binary(&l); + } + + if(l.xnor){ + binarize_weights_gpu(l.weights_gpu, l.n, l.c/l.groups*l.size*l.size, l.binary_weights_gpu); + swap_binary(&l); + binarize_gpu(net.input_gpu, l.c*l.h*l.w*l.batch, l.binary_input_gpu); + net.input_gpu = l.binary_input_gpu; + } + +#ifdef CUDNN + float one = 1; + cudnnConvolutionForward(cudnn_handle(), + &one, + l.srcTensorDesc, + net.input_gpu, + l.weightDesc, + l.weights_gpu, + l.convDesc, + l.fw_algo, + net.workspace, + l.workspace_size, + &one, + l.dstTensorDesc, + l.output_gpu); + +#else + int i, j; + int m = l.n/l.groups; + int k = l.size*l.size*l.c/l.groups; + int n = l.out_w*l.out_h; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.weights_gpu + j*l.nweights/l.groups; + float *b = net.workspace; + float *c = l.output_gpu + (i*l.groups + j)*n*m; + float *im = net.input_gpu + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if (l.size == 1){ + b = im; + } else { + im2col_gpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + } + gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +#endif + + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); + } + + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); + //if(l.dot > 0) dot_error_gpu(l); + if(l.binary || l.xnor) swap_binary(&l); +} + +__global__ void smooth_kernel(float *x, int n, int w, int h, int c, int size, float rate, float *delta) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int j = id % w; + id /= w; + int i = id % h; + id /= h; + int k = id % c; + id /= c; + int b = id; + + int w_offset = -(size/2.f); + int h_offset = -(size/2.f); + + int out_index = j + w*(i + h*(k + c*b)); + int l, m; + for(l = 0; l < size; ++l){ + for(m = 0; m < size; ++m){ + int cur_h = h_offset + i + l; + int cur_w = w_offset + j + m; + int index = cur_w + w*(cur_h + h*(k + b*c)); + int valid = (cur_h >= 0 && cur_h < h && + cur_w >= 0 && cur_w < w); + delta[out_index] += valid ? rate*(x[index] - x[out_index]) : 0; + } + } +} + +extern "C" void smooth_layer(layer l, int size, float rate) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.out_c; + + size_t n = h*w*c*l.batch; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(l.output_gpu, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(l.delta_gpu, n * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + smooth_kernel<<>>(l.output_gpu, n, l.w, l.h, l.c, size, rate, l.delta_gpu); + check_error(cudaPeekAtLastError()); +} + +void backward_convolutional_layer_gpu(convolutional_layer l, network net) +{ + if(l.smooth){ + smooth_layer(l, 5, l.smooth); + } + //constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + + + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); + } + float *original_input = net.input_gpu; + + if(l.xnor) net.input_gpu = l.binary_input_gpu; +#ifdef CUDNN + float one = 1; + cudnnConvolutionBackwardFilter(cudnn_handle(), + &one, + l.srcTensorDesc, + net.input_gpu, + l.ddstTensorDesc, + l.delta_gpu, + l.convDesc, + l.bf_algo, + net.workspace, + l.workspace_size, + &one, + l.dweightDesc, + l.weight_updates_gpu); + + if(net.delta_gpu){ + if(l.binary || l.xnor) swap_binary(&l); + cudnnConvolutionBackwardData(cudnn_handle(), + &one, + l.weightDesc, + l.weights_gpu, + l.ddstTensorDesc, + l.delta_gpu, + l.convDesc, + l.bd_algo, + net.workspace, + l.workspace_size, + &one, + l.dsrcTensorDesc, + net.delta_gpu); + if(l.binary || l.xnor) swap_binary(&l); + if(l.xnor) gradient_array_gpu(original_input, l.batch*l.c*l.h*l.w, HARDTAN, net.delta_gpu); + } + +#else + int m = l.n/l.groups; + int n = l.size*l.size*l.c/l.groups; + int k = l.out_w*l.out_h; + + int i, j; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.delta_gpu + (i*l.groups + j)*m*k; + float *b = net.workspace; + float *c = l.weight_updates_gpu + j*l.nweights/l.groups; + + float *im = net.input_gpu+(i*l.groups + j)*l.c/l.groups*l.h*l.w; + float *imd = net.delta_gpu+(i*l.groups + j)*l.c/l.groups*l.h*l.w; + + im2col_gpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (net.delta_gpu) { + if (l.binary || l.xnor) swap_binary(&l); + a = l.weights_gpu + j*l.nweights/l.groups; + b = l.delta_gpu + (i*l.groups + j)*m*k; + c = net.workspace; + if (l.size == 1) { + c = imd; + } + + gemm_gpu(1,0,n,k,m,1,a,n,b,k,0,c,k); + + if (l.size != 1) { + col2im_gpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd); + } + if(l.binary || l.xnor) { + swap_binary(&l); + } + } + if(l.xnor) gradient_array_gpu(original_input + i*l.c*l.h*l.w, l.c*l.h*l.w, HARDTAN, net.delta_gpu + i*l.c*l.h*l.w); + } + } +#endif +} + +void pull_convolutional_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.nweights); + cuda_pull_array(l.biases_gpu, l.biases, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.n); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void push_convolutional_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.nweights); + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.n); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void update_convolutional_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + } + }else{ + axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1); + + axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.n, momentum, l.bias_updates_gpu, 1); + + if(l.scales_gpu){ + axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.n, momentum, l.scale_updates_gpu, 1); + } + } + if(l.clip){ + constrain_gpu(l.nweights, l.clip, l.weights_gpu, 1); + } +} + + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/convolutional_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/convolutional_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..1fb58b0933b06f2b27ec89f9f7c05f0b2b8a87eb --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/convolutional_layer.c @@ -0,0 +1,622 @@ +#include "convolutional_layer.h" +#include "utils.h" +#include "batchnorm_layer.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" +#include +#include + +#ifdef AI2 +#include "xnor_layer.h" +#endif + +void swap_binary(convolutional_layer *l) +{ + float *swap = l->weights; + l->weights = l->binary_weights; + l->binary_weights = swap; + +#ifdef GPU + swap = l->weights_gpu; + l->weights_gpu = l->binary_weights_gpu; + l->binary_weights_gpu = swap; +#endif +} + +void binarize_weights(float *weights, int n, int size, float *binary) +{ + int i, f; + for(f = 0; f < n; ++f){ + float mean = 0; + for(i = 0; i < size; ++i){ + mean += fabs(weights[f*size + i]); + } + mean = mean / size; + for(i = 0; i < size; ++i){ + binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean; + } + } +} + +void binarize_cpu(float *input, int n, float *binary) +{ + int i; + for(i = 0; i < n; ++i){ + binary[i] = (input[i] > 0) ? 1 : -1; + } +} + +void binarize_input(float *input, int n, int size, float *binary) +{ + int i, s; + for(s = 0; s < size; ++s){ + float mean = 0; + for(i = 0; i < n; ++i){ + mean += fabs(input[i*size + s]); + } + mean = mean / n; + for(i = 0; i < n; ++i){ + binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean; + } + } +} + +int convolutional_out_height(convolutional_layer l) +{ + return (l.h + 2*l.pad - l.size) / l.stride + 1; +} + +int convolutional_out_width(convolutional_layer l) +{ + return (l.w + 2*l.pad - l.size) / l.stride + 1; +} + +image get_convolutional_image(convolutional_layer l) +{ + return float_to_image(l.out_w,l.out_h,l.out_c,l.output); +} + +image get_convolutional_delta(convolutional_layer l) +{ + return float_to_image(l.out_w,l.out_h,l.out_c,l.delta); +} + +static size_t get_workspace_size(layer l){ +#ifdef CUDNN + if(gpu_index >= 0){ + size_t most = 0; + size_t s = 0; + cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle(), + l.srcTensorDesc, + l.weightDesc, + l.convDesc, + l.dstTensorDesc, + l.fw_algo, + &s); + if (s > most) most = s; + cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnn_handle(), + l.srcTensorDesc, + l.ddstTensorDesc, + l.convDesc, + l.dweightDesc, + l.bf_algo, + &s); + if (s > most) most = s; + cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(), + l.weightDesc, + l.ddstTensorDesc, + l.convDesc, + l.dsrcTensorDesc, + l.bd_algo, + &s); + if (s > most) most = s; + return most; + } +#endif + return (size_t)l.out_h*l.out_w*l.size*l.size*l.c/l.groups*sizeof(float); +} + +#ifdef GPU +#ifdef CUDNN +void cudnn_convolutional_setup(layer *l) +{ + cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); + cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + + cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + + cudnnSetFilter4dDescriptor(l->dweightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); + cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); + #if CUDNN_MAJOR >= 6 + cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT); + #else + cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION); + #endif + + #if CUDNN_MAJOR >= 7 + cudnnSetConvolutionGroupCount(l->convDesc, l->groups); + #else + if(l->groups > 1){ + error("CUDNN < 7 doesn't support groups, please upgrade!"); + } + #endif + + cudnnGetConvolutionForwardAlgorithm(cudnn_handle(), + l->srcTensorDesc, + l->weightDesc, + l->convDesc, + l->dstTensorDesc, + CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->fw_algo); + cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(), + l->weightDesc, + l->ddstTensorDesc, + l->convDesc, + l->dsrcTensorDesc, + CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->bd_algo); + cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(), + l->srcTensorDesc, + l->ddstTensorDesc, + l->convDesc, + l->dweightDesc, + CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->bf_algo); +} +#endif +#endif + +convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam) +{ + int i; + convolutional_layer l = {0}; + l.type = CONVOLUTIONAL; + + l.groups = groups; + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.binary = binary; + l.xnor = xnor; + l.batch = batch; + l.stride = stride; + l.size = size; + l.pad = padding; + l.batch_normalize = batch_normalize; + + l.weights = calloc(c/groups*n*size*size, sizeof(float)); + l.weight_updates = calloc(c/groups*n*size*size, sizeof(float)); + + l.biases = calloc(n, sizeof(float)); + l.bias_updates = calloc(n, sizeof(float)); + + l.nweights = c/groups*n*size*size; + l.nbiases = n; + + // float scale = 1./sqrt(size*size*c); + float scale = sqrt(2./(size*size*c/l.groups)); + //printf("convscale %f\n", scale); + //scale = .02; + //for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1); + for(i = 0; i < l.nweights; ++i) l.weights[i] = scale*rand_normal(); + int out_w = convolutional_out_width(l); + int out_h = convolutional_out_height(l); + l.out_h = out_h; + l.out_w = out_w; + l.out_c = n; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = l.w * l.h * l.c; + + l.output = calloc(l.batch*l.outputs, sizeof(float)); + l.delta = calloc(l.batch*l.outputs, sizeof(float)); + + l.forward = forward_convolutional_layer; + l.backward = backward_convolutional_layer; + l.update = update_convolutional_layer; + if(binary){ + l.binary_weights = calloc(l.nweights, sizeof(float)); + l.cweights = calloc(l.nweights, sizeof(char)); + l.scales = calloc(n, sizeof(float)); + } + if(xnor){ + l.binary_weights = calloc(l.nweights, sizeof(float)); + l.binary_input = calloc(l.inputs*l.batch, sizeof(float)); + } + + if(batch_normalize){ + l.scales = calloc(n, sizeof(float)); + l.scale_updates = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(n, sizeof(float)); + l.variance = calloc(n, sizeof(float)); + + l.mean_delta = calloc(n, sizeof(float)); + l.variance_delta = calloc(n, sizeof(float)); + + l.rolling_mean = calloc(n, sizeof(float)); + l.rolling_variance = calloc(n, sizeof(float)); + l.x = calloc(l.batch*l.outputs, sizeof(float)); + l.x_norm = calloc(l.batch*l.outputs, sizeof(float)); + } + if(adam){ + l.m = calloc(l.nweights, sizeof(float)); + l.v = calloc(l.nweights, sizeof(float)); + l.bias_m = calloc(n, sizeof(float)); + l.scale_m = calloc(n, sizeof(float)); + l.bias_v = calloc(n, sizeof(float)); + l.scale_v = calloc(n, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_convolutional_layer_gpu; + l.backward_gpu = backward_convolutional_layer_gpu; + l.update_gpu = update_convolutional_layer_gpu; + + if(gpu_index >= 0){ + if (adam) { + l.m_gpu = cuda_make_array(l.m, l.nweights); + l.v_gpu = cuda_make_array(l.v, l.nweights); + l.bias_m_gpu = cuda_make_array(l.bias_m, n); + l.bias_v_gpu = cuda_make_array(l.bias_v, n); + l.scale_m_gpu = cuda_make_array(l.scale_m, n); + l.scale_v_gpu = cuda_make_array(l.scale_v, n); + } + + l.weights_gpu = cuda_make_array(l.weights, l.nweights); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights); + + l.biases_gpu = cuda_make_array(l.biases, n); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + + if(binary){ + l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights); + } + if(xnor){ + l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights); + l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch); + } + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(l.mean, n); + l.variance_gpu = cuda_make_array(l.variance, n); + + l.rolling_mean_gpu = cuda_make_array(l.mean, n); + l.rolling_variance_gpu = cuda_make_array(l.variance, n); + + l.mean_delta_gpu = cuda_make_array(l.mean, n); + l.variance_delta_gpu = cuda_make_array(l.variance, n); + + l.scales_gpu = cuda_make_array(l.scales, n); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); + + l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + } +#ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.srcTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnCreateFilterDescriptor(&l.weightDesc); + cudnnCreateTensorDescriptor(&l.dsrcTensorDesc); + cudnnCreateTensorDescriptor(&l.ddstTensorDesc); + cudnnCreateFilterDescriptor(&l.dweightDesc); + cudnnCreateConvolutionDescriptor(&l.convDesc); + cudnn_convolutional_setup(&l); +#endif + } +#endif + l.workspace_size = get_workspace_size(l); + l.activation = activation; + + fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BFLOPs\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, (2.0 * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w)/1000000000.); + + return l; +} + +void denormalize_convolutional_layer(convolutional_layer l) +{ + int i, j; + for(i = 0; i < l.n; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001); + for(j = 0; j < l.c/l.groups*l.size*l.size; ++j){ + l.weights[i*l.c/l.groups*l.size*l.size + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + +/* +void test_convolutional_layer() +{ + convolutional_layer l = make_convolutional_layer(1, 5, 5, 3, 2, 5, 2, 1, LEAKY, 1, 0, 0, 0); + l.batch_normalize = 1; + float data[] = {1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3}; + //net.input = data; + //forward_convolutional_layer(l); +} +*/ + +void resize_convolutional_layer(convolutional_layer *l, int w, int h) +{ + l->w = w; + l->h = h; + int out_w = convolutional_out_width(*l); + int out_h = convolutional_out_height(*l); + + l->out_w = out_w; + l->out_h = out_h; + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->w * l->h * l->c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + if(l->batch_normalize){ + l->x = realloc(l->x, l->batch*l->outputs*sizeof(float)); + l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float)); + } + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); + + if(l->batch_normalize){ + cuda_free(l->x_gpu); + cuda_free(l->x_norm_gpu); + + l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); + } +#ifdef CUDNN + cudnn_convolutional_setup(l); +#endif +#endif + l->workspace_size = get_workspace_size(*l); +} + +void add_bias(float *output, float *biases, int batch, int n, int size) +{ + int i,j,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + for(j = 0; j < size; ++j){ + output[(b*n + i)*size + j] += biases[i]; + } + } + } +} + +void scale_bias(float *output, float *scales, int batch, int n, int size) +{ + int i,j,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + for(j = 0; j < size; ++j){ + output[(b*n + i)*size + j] *= scales[i]; + } + } + } +} + +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size) +{ + int i,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + bias_updates[i] += sum_array(delta+size*(i+b*n), size); + } + } +} + +void forward_convolutional_layer(convolutional_layer l, network net) +{ + int i, j; + + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + + if(l.xnor){ + binarize_weights(l.weights, l.n, l.c/l.groups*l.size*l.size, l.binary_weights); + swap_binary(&l); + binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input); + net.input = l.binary_input; + } + + int m = l.n/l.groups; + int k = l.size*l.size*l.c/l.groups; + int n = l.out_w*l.out_h; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.weights + j*l.nweights/l.groups; + float *b = net.workspace; + float *c = l.output + (i*l.groups + j)*n*m; + float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if (l.size == 1) { + b = im; + } else { + im2col_cpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + } + gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } + + if(l.batch_normalize){ + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w); + } + + activate_array(l.output, l.outputs*l.batch, l.activation); + if(l.binary || l.xnor) swap_binary(&l); +} + +void backward_convolutional_layer(convolutional_layer l, network net) +{ + int i, j; + int m = l.n/l.groups; + int n = l.size*l.size*l.c/l.groups; + int k = l.out_w*l.out_h; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.n, k); + } + + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.delta + (i*l.groups + j)*m*k; + float *b = net.workspace; + float *c = l.weight_updates + j*l.nweights/l.groups; + + float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + float *imd = net.delta + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if(l.size == 1){ + b = im; + } else { + im2col_cpu(im, l.c/l.groups, l.h, l.w, + l.size, l.stride, l.pad, b); + } + + gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (net.delta) { + a = l.weights + j*l.nweights/l.groups; + b = l.delta + (i*l.groups + j)*m*k; + c = net.workspace; + if (l.size == 1) { + c = imd; + } + + gemm(1,0,n,k,m,1,a,n,b,k,0,c,k); + + if (l.size != 1) { + col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd); + } + } + } + } +} + +void update_convolutional_layer(convolutional_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.n, momentum, l.bias_updates, 1); + + if(l.scales){ + axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.n, momentum, l.scale_updates, 1); + } + + axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(l.nweights, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(l.nweights, momentum, l.weight_updates, 1); +} + + +image get_convolutional_weight(convolutional_layer l, int i) +{ + int h = l.size; + int w = l.size; + int c = l.c/l.groups; + return float_to_image(w,h,c,l.weights+i*h*w*c); +} + +void rgbgr_weights(convolutional_layer l) +{ + int i; + for(i = 0; i < l.n; ++i){ + image im = get_convolutional_weight(l, i); + if (im.c == 3) { + rgbgr_image(im); + } + } +} + +void rescale_weights(convolutional_layer l, float scale, float trans) +{ + int i; + for(i = 0; i < l.n; ++i){ + image im = get_convolutional_weight(l, i); + if (im.c == 3) { + scale_image(im, scale); + float sum = sum_array(im.data, im.w*im.h*im.c); + l.biases[i] += sum*trans; + } + } +} + +image *get_weights(convolutional_layer l) +{ + image *weights = calloc(l.n, sizeof(image)); + int i; + for(i = 0; i < l.n; ++i){ + weights[i] = copy_image(get_convolutional_weight(l, i)); + normalize_image(weights[i]); + /* + char buff[256]; + sprintf(buff, "filter%d", i); + save_image(weights[i], buff); + */ + } + //error("hey"); + return weights; +} + +image *visualize_convolutional_layer(convolutional_layer l, char *window, image *prev_weights) +{ + image *single_weights = get_weights(l); + show_images(single_weights, l.n, window); + + image delta = get_convolutional_image(l); + image dc = collapse_image_layers(delta, 1); + char buff[256]; + sprintf(buff, "%s: Output", window); + //show_image(dc, buff); + //save_image(dc, buff); + free_image(dc); + return single_weights; +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/convolutional_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/convolutional_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..baacf38f4127a42abe009ef8aa3b59543433a286 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/convolutional_layer.h @@ -0,0 +1,50 @@ +#ifndef CONVOLUTIONAL_LAYER_H +#define CONVOLUTIONAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +typedef layer convolutional_layer; + +#ifdef GPU +void forward_convolutional_layer_gpu(convolutional_layer layer, network net); +void backward_convolutional_layer_gpu(convolutional_layer layer, network net); +void update_convolutional_layer_gpu(convolutional_layer layer, update_args a); + +void push_convolutional_layer(convolutional_layer layer); +void pull_convolutional_layer(convolutional_layer layer); + +void add_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); +void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t); +#ifdef CUDNN +void cudnn_convolutional_setup(layer *l); +#endif +#endif + +convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam); +void resize_convolutional_layer(convolutional_layer *layer, int w, int h); +void forward_convolutional_layer(const convolutional_layer layer, network net); +void update_convolutional_layer(convolutional_layer layer, update_args a); +image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_weights); +void binarize_weights(float *weights, int n, int size, float *binary); +void swap_binary(convolutional_layer *l); +void binarize_weights2(float *weights, int n, int size, char *binary, float *scales); + +void backward_convolutional_layer(convolutional_layer layer, network net); + +void add_bias(float *output, float *biases, int batch, int n, int size); +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); + +image get_convolutional_image(convolutional_layer layer); +image get_convolutional_delta(convolutional_layer layer); +image get_convolutional_weight(convolutional_layer layer, int i); + +int convolutional_out_height(convolutional_layer layer); +int convolutional_out_width(convolutional_layer layer); + +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/cost_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/cost_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..85fa85daf306dda03c113a6bbdc2d92b25d0b00d --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/cost_layer.c @@ -0,0 +1,176 @@ +#include "cost_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include +#include +#include +#include + +COST_TYPE get_cost_type(char *s) +{ + if (strcmp(s, "seg")==0) return SEG; + if (strcmp(s, "sse")==0) return SSE; + if (strcmp(s, "masked")==0) return MASKED; + if (strcmp(s, "smooth")==0) return SMOOTH; + if (strcmp(s, "L1")==0) return L1; + if (strcmp(s, "wgan")==0) return WGAN; + fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s); + return SSE; +} + +char *get_cost_string(COST_TYPE a) +{ + switch(a){ + case SEG: + return "seg"; + case SSE: + return "sse"; + case MASKED: + return "masked"; + case SMOOTH: + return "smooth"; + case L1: + return "L1"; + case WGAN: + return "wgan"; + } + return "sse"; +} + +cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale) +{ + fprintf(stderr, "cost %4d\n", inputs); + cost_layer l = {0}; + l.type = COST; + + l.scale = scale; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.cost_type = cost_type; + l.delta = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_cost_layer; + l.backward = backward_cost_layer; + #ifdef GPU + l.forward_gpu = forward_cost_layer_gpu; + l.backward_gpu = backward_cost_layer_gpu; + + l.delta_gpu = cuda_make_array(l.output, inputs*batch); + l.output_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void resize_cost_layer(cost_layer *l, int inputs) +{ + l->inputs = inputs; + l->outputs = inputs; + l->delta = realloc(l->delta, inputs*l->batch*sizeof(float)); + l->output = realloc(l->output, inputs*l->batch*sizeof(float)); +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch); + l->output_gpu = cuda_make_array(l->output, inputs*l->batch); +#endif +} + +void forward_cost_layer(cost_layer l, network net) +{ + if (!net.truth) return; + if(l.cost_type == MASKED){ + int i; + for(i = 0; i < l.batch*l.inputs; ++i){ + if(net.truth[i] == SECRET_NUM) net.input[i] = SECRET_NUM; + } + } + if(l.cost_type == SMOOTH){ + smooth_l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + }else if(l.cost_type == L1){ + l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + } else { + l2_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + } + l.cost[0] = sum_array(l.output, l.batch*l.inputs); +} + +void backward_cost_layer(const cost_layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_cost_layer(cost_layer l) +{ + cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +void push_cost_layer(cost_layer l) +{ + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +int float_abs_compare (const void * a, const void * b) +{ + float fa = *(const float*) a; + if(fa < 0) fa = -fa; + float fb = *(const float*) b; + if(fb < 0) fb = -fb; + return (fa > fb) - (fa < fb); +} + +void forward_cost_layer_gpu(cost_layer l, network net) +{ + if (!net.truth) return; + if(l.smooth){ + scal_gpu(l.batch*l.inputs, (1-l.smooth), net.truth_gpu, 1); + add_gpu(l.batch*l.inputs, l.smooth * 1./l.inputs, net.truth_gpu, 1); + } + + if(l.cost_type == SMOOTH){ + smooth_l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else if (l.cost_type == L1){ + l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else if (l.cost_type == WGAN){ + wgan_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else { + l2_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } + + if (l.cost_type == SEG && l.noobject_scale != 1) { + scale_mask_gpu(l.batch*l.inputs, l.delta_gpu, 0, net.truth_gpu, l.noobject_scale); + scale_mask_gpu(l.batch*l.inputs, l.output_gpu, 0, net.truth_gpu, l.noobject_scale); + } + if (l.cost_type == MASKED) { + mask_gpu(l.batch*l.inputs, net.delta_gpu, SECRET_NUM, net.truth_gpu, 0); + } + + if(l.ratio){ + cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); + qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare); + int n = (1-l.ratio) * l.batch*l.inputs; + float thresh = l.delta[n]; + thresh = 0; + printf("%f\n", thresh); + supp_gpu(l.batch*l.inputs, thresh, l.delta_gpu, 1); + } + + if(l.thresh){ + supp_gpu(l.batch*l.inputs, l.thresh*1./l.inputs, l.delta_gpu, 1); + } + + cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs); + l.cost[0] = sum_array(l.output, l.batch*l.inputs); +} + +void backward_cost_layer_gpu(const cost_layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/cost_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/cost_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..ceb64de00bf66839c2f34852a05ea71114608a35 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/cost_layer.h @@ -0,0 +1,20 @@ +#ifndef COST_LAYER_H +#define COST_LAYER_H +#include "layer.h" +#include "network.h" + +typedef layer cost_layer; + +COST_TYPE get_cost_type(char *s); +char *get_cost_string(COST_TYPE a); +cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale); +void forward_cost_layer(const cost_layer l, network net); +void backward_cost_layer(const cost_layer l, network net); +void resize_cost_layer(cost_layer *l, int inputs); + +#ifdef GPU +void forward_cost_layer_gpu(cost_layer l, network net); +void backward_cost_layer_gpu(const cost_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/cpu_timestamps.c b/workloads/realworld/uvm_prefetch/darknet/src/cpu_timestamps.c new file mode 100644 index 0000000000000000000000000000000000000000..35114479c7a9cce3debe2204b6886ad5528041d5 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/cpu_timestamps.c @@ -0,0 +1,20 @@ +#include "cpu_timestamps.h" + +void startCPU() { + struct timespec tv; + if(clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + startCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); +} + + + +void endCPU() { + struct timespec tv; + if(clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + + endCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); + //endCPUTimestamp1 = std::chrono::system_clock::now(); + printf("CPU_Times,%lu,%lu,%lu\n", startCPUTime, endCPUTime, endCPUTime-startCPUTime); +} diff --git a/workloads/realworld/uvm_prefetch/darknet/src/cpu_timestamps.h b/workloads/realworld/uvm_prefetch/darknet/src/cpu_timestamps.h new file mode 100644 index 0000000000000000000000000000000000000000..e53e995a5603b4610759c02a4a179eb9f0124e48 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/cpu_timestamps.h @@ -0,0 +1,21 @@ +#ifndef CPU_TIMESTAMP_ +#define CPU_TIMESTAMP_ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +static uint64_t startCPUTime; +static uint64_t endCPUTime; + +void startCPU(); +void endCPU(); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/crnn_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/crnn_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..159e17f92d45693461c92d482bf3aa7354a148d8 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/crnn_layer.c @@ -0,0 +1,283 @@ +#include "crnn_layer.h" +#include "convolutional_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize) +{ + fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = CRNN; + l.steps = steps; + l.h = h; + l.w = w; + l.c = c; + l.out_h = h; + l.out_w = w; + l.out_c = output_filters; + l.inputs = h*w*c; + l.hidden = h * w * hidden_filters; + l.outputs = l.out_h * l.out_w * l.out_c; + + l.state = calloc(l.hidden*batch*(steps+1), sizeof(float)); + + l.input_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.input_layer) = make_convolutional_layer(batch*steps, h, w, c, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.input_layer->batch = batch; + + l.self_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.self_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.self_layer->batch = batch; + + l.output_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.output_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, output_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.output_layer->batch = batch; + + l.output = l.output_layer->output; + l.delta = l.output_layer->delta; + + l.forward = forward_crnn_layer; + l.backward = backward_crnn_layer; + l.update = update_crnn_layer; + +#ifdef GPU + l.forward_gpu = forward_crnn_layer_gpu; + l.backward_gpu = backward_crnn_layer_gpu; + l.update_gpu = update_crnn_layer_gpu; + + l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1)); + l.output_gpu = l.output_layer->output_gpu; + l.delta_gpu = l.output_layer->delta_gpu; +#endif + + return l; +} + +void update_crnn_layer(layer l, update_args a) +{ + update_convolutional_layer(*(l.input_layer), a); + update_convolutional_layer(*(l.self_layer), a); + update_convolutional_layer(*(l.output_layer), a); +} + +void forward_crnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1); + fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1); + fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1); + if(net.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1); + + for (i = 0; i < l.steps; ++i) { + s.input = net.input; + forward_convolutional_layer(input_layer, s); + + s.input = l.state; + forward_convolutional_layer(self_layer, s); + + float *old_state = l.state; + if(net.train) l.state += l.hidden*l.batch; + if(l.shortcut){ + copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1); + }else{ + fill_cpu(l.hidden * l.batch, 0, l.state, 1); + } + axpy_cpu(l.hidden * l.batch, 1, input_layer.output, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + forward_convolutional_layer(output_layer, s); + + net.input += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_crnn_layer(layer l, network net) +{ + network s = net; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + increment_layer(&input_layer, l.steps-1); + increment_layer(&self_layer, l.steps-1); + increment_layer(&output_layer, l.steps-1); + + l.state += l.hidden*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + s.delta = self_layer.delta; + backward_convolutional_layer(output_layer, s); + + l.state -= l.hidden*l.batch; + /* + if(i > 0){ + copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output - l.hidden*l.batch, 1, l.state, 1); + }else{ + fill_cpu(l.hidden * l.batch, 0, l.state, 1); + } + */ + + s.input = l.state; + s.delta = self_layer.delta - l.hidden*l.batch; + if (i == 0) s.delta = 0; + backward_convolutional_layer(self_layer, s); + + copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1); + if (i > 0 && l.shortcut) axpy_cpu(l.hidden*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden*l.batch, 1); + s.input = net.input + i*l.inputs*l.batch; + if(net.delta) s.delta = net.delta + i*l.inputs*l.batch; + else s.delta = 0; + backward_convolutional_layer(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} + +#ifdef GPU + +void pull_crnn_layer(layer l) +{ + pull_convolutional_layer(*(l.input_layer)); + pull_convolutional_layer(*(l.self_layer)); + pull_convolutional_layer(*(l.output_layer)); +} + +void push_crnn_layer(layer l) +{ + push_convolutional_layer(*(l.input_layer)); + push_convolutional_layer(*(l.self_layer)); + push_convolutional_layer(*(l.output_layer)); +} + +void update_crnn_layer_gpu(layer l, update_args a) +{ + update_convolutional_layer_gpu(*(l.input_layer), a); + update_convolutional_layer_gpu(*(l.self_layer), a); + update_convolutional_layer_gpu(*(l.output_layer), a); +} + +void forward_crnn_layer_gpu(layer l, network net) +{ + network s = net; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_gpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); + fill_gpu(l.hidden * l.batch * l.steps, 0, self_layer.delta_gpu, 1); + fill_gpu(l.hidden * l.batch * l.steps, 0, input_layer.delta_gpu, 1); + if(net.train) fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1); + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = net.input_gpu; + forward_convolutional_layer_gpu(input_layer, s); + + s.input_gpu = l.state_gpu; + forward_convolutional_layer_gpu(self_layer, s); + + float *old_state = l.state_gpu; + if(net.train) l.state_gpu += l.hidden*l.batch; + if(l.shortcut){ + copy_gpu(l.hidden * l.batch, old_state, 1, l.state_gpu, 1); + }else{ + fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1); + } + axpy_gpu(l.hidden * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + forward_convolutional_layer_gpu(output_layer, s); + + net.input_gpu += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_crnn_layer_gpu(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + increment_layer(&input_layer, l.steps - 1); + increment_layer(&self_layer, l.steps - 1); + increment_layer(&output_layer, l.steps - 1); + l.state_gpu += l.hidden*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_gpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu; + backward_convolutional_layer_gpu(output_layer, s); + + l.state_gpu -= l.hidden*l.batch; + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu - l.hidden*l.batch; + if (i == 0) s.delta_gpu = 0; + backward_convolutional_layer_gpu(self_layer, s); + + copy_gpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); + if (i > 0 && l.shortcut) axpy_gpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1); + s.input_gpu = net.input_gpu + i*l.inputs*l.batch; + if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch; + else s.delta_gpu = 0; + backward_convolutional_layer_gpu(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/crnn_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/crnn_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..515f378354e9cc6149e7a1ac60ffc86ace112991 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/crnn_layer.h @@ -0,0 +1,24 @@ + +#ifndef CRNN_LAYER_H +#define CRNN_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize); + +void forward_crnn_layer(layer l, network net); +void backward_crnn_layer(layer l, network net); +void update_crnn_layer(layer l, update_args a); + +#ifdef GPU +void forward_crnn_layer_gpu(layer l, network net); +void backward_crnn_layer_gpu(layer l, network net); +void update_crnn_layer_gpu(layer l, update_args a); +void push_crnn_layer(layer l); +void pull_crnn_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/crop_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/crop_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..3c8c9650bda6dcf4485ce8da8e2fa1984f2b244d --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/crop_layer.c @@ -0,0 +1,103 @@ +#include "crop_layer.h" +#include "cuda_dark.h" +#include + +image get_crop_image(crop_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.out_c; + return float_to_image(w,h,c,l.output); +} + +void backward_crop_layer(const crop_layer l, network net){} +void backward_crop_layer_gpu(const crop_layer l, network net){} + +crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure) +{ + fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c); + crop_layer l = {0}; + l.type = CROP; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.scale = (float)crop_height / h; + l.flip = flip; + l.angle = angle; + l.saturation = saturation; + l.exposure = exposure; + l.out_w = crop_width; + l.out_h = crop_height; + l.out_c = c; + l.inputs = l.w * l.h * l.c; + l.outputs = l.out_w * l.out_h * l.out_c; + l.output = calloc(l.outputs*batch, sizeof(float)); + l.forward = forward_crop_layer; + l.backward = backward_crop_layer; + + #ifdef GPU + l.forward_gpu = forward_crop_layer_gpu; + l.backward_gpu = backward_crop_layer_gpu; + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + l.rand_gpu = cuda_make_array(0, l.batch*8); + #endif + return l; +} + +void resize_crop_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->out_w = l->scale*w; + l->out_h = l->scale*h; + + l->inputs = l->w * l->h * l->c; + l->outputs = l->out_h * l->out_w * l->out_c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + #ifdef GPU + cuda_free(l->output_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + #endif +} + + +void forward_crop_layer(const crop_layer l, network net) +{ + int i,j,c,b,row,col; + int index; + int count = 0; + int flip = (l.flip && rand()%2); + int dh = rand()%(l.h - l.out_h + 1); + int dw = rand()%(l.w - l.out_w + 1); + float scale = 2; + float trans = -1; + if(l.noadjust){ + scale = 1; + trans = 0; + } + if(!net.train){ + flip = 0; + dh = (l.h - l.out_h)/2; + dw = (l.w - l.out_w)/2; + } + for(b = 0; b < l.batch; ++b){ + for(c = 0; c < l.c; ++c){ + for(i = 0; i < l.out_h; ++i){ + for(j = 0; j < l.out_w; ++j){ + if(flip){ + col = l.w - dw - j - 1; + }else{ + col = j + dw; + } + row = i + dh; + index = col+l.w*(row+l.h*(c + l.c*b)); + l.output[count++] = net.input[index]*scale + trans; + } + } + } + } +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/crop_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/crop_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..3b5883c47d6df0987700e1b0434010eebd6312af --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/crop_layer.h @@ -0,0 +1,20 @@ +#ifndef CROP_LAYER_H +#define CROP_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +typedef layer crop_layer; + +image get_crop_image(crop_layer l); +crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure); +void forward_crop_layer(const crop_layer l, network net); +void resize_crop_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_crop_layer_gpu(crop_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/crop_layer_kernels.cu b/workloads/realworld/uvm_prefetch/darknet/src/crop_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..1b355fe6f9924ef8d53ff323f1f542a223fd4e78 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/crop_layer_kernels.cu @@ -0,0 +1,239 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "crop_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "image.h" +} + +__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c) +{ + if(x < 0 || x >= w || y < 0 || y >= h) return 0; + return image[x + w*(y + c*h)]; +} + +__device__ float3 rgb_to_hsv_kernel(float3 rgb) +{ + float r = rgb.x; + float g = rgb.y; + float b = rgb.z; + + float h, s, v; + float max = (r > g) ? ( (r > b) ? r : b) : ( (g > b) ? g : b); + float min = (r < g) ? ( (r < b) ? r : b) : ( (g < b) ? g : b); + float delta = max - min; + v = max; + if(max == 0){ + s = 0; + h = -1; + }else{ + s = delta/max; + if(r == max){ + h = (g - b) / delta; + } else if (g == max) { + h = 2 + (b - r) / delta; + } else { + h = 4 + (r - g) / delta; + } + if (h < 0) h += 6; + } + return make_float3(h, s, v); +} + +__device__ float3 hsv_to_rgb_kernel(float3 hsv) +{ + float h = hsv.x; + float s = hsv.y; + float v = hsv.z; + + float r, g, b; + float f, p, q, t; + + if (s == 0) { + r = g = b = v; + } else { + int index = (int) floorf(h); + f = h - index; + p = v*(1-s); + q = v*(1-s*f); + t = v*(1-s*(1-f)); + if(index == 0){ + r = v; g = t; b = p; + } else if(index == 1){ + r = q; g = v; b = p; + } else if(index == 2){ + r = p; g = v; b = t; + } else if(index == 3){ + r = p; g = q; b = v; + } else if(index == 4){ + r = t; g = p; b = v; + } else { + r = v; g = p; b = q; + } + } + r = (r < 0) ? 0 : ((r > 1) ? 1 : r); + g = (g < 0) ? 0 : ((g > 1) ? 1 : g); + b = (b < 0) ? 0 : ((b > 1) ? 1 : b); + return make_float3(r, g, b); +} + +__device__ float bilinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c) +{ + int ix = (int) floorf(x); + int iy = (int) floorf(y); + + float dx = x - ix; + float dy = y - iy; + + float val = (1-dy) * (1-dx) * get_pixel_kernel(image, w, h, ix, iy, c) + + dy * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) + + (1-dy) * dx * get_pixel_kernel(image, w, h, ix+1, iy, c) + + dy * dx * get_pixel_kernel(image, w, h, ix+1, iy+1, c); + return val; +} + +__global__ void levels_image_kernel(float *image, float *rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale, float shift) +{ + int size = batch * w * h; + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= size) return; + int x = id % w; + id /= w; + int y = id % h; + id /= h; + float rshift = rand[0]; + float gshift = rand[1]; + float bshift = rand[2]; + float r0 = rand[8*id + 0]; + float r1 = rand[8*id + 1]; + float r2 = rand[8*id + 2]; + float r3 = rand[8*id + 3]; + + saturation = r0*(saturation - 1) + 1; + saturation = (r1 > .5f) ? 1.f/saturation : saturation; + exposure = r2*(exposure - 1) + 1; + exposure = (r3 > .5f) ? 1.f/exposure : exposure; + + size_t offset = id * h * w * 3; + image += offset; + float r = image[x + w*(y + h*0)]; + float g = image[x + w*(y + h*1)]; + float b = image[x + w*(y + h*2)]; + float3 rgb = make_float3(r,g,b); + if(train){ + float3 hsv = rgb_to_hsv_kernel(rgb); + hsv.y *= saturation; + hsv.z *= exposure; + rgb = hsv_to_rgb_kernel(hsv); + } else { + shift = 0; + } + image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5f)*shift; + image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5f)*shift; + image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5f)*shift; +} + +__global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= size) return; + + float cx = w/2.f; + float cy = h/2.f; + + int count = id; + int j = id % crop_width; + id /= crop_width; + int i = id % crop_height; + id /= crop_height; + int k = id % c; + id /= c; + int b = id; + + float r4 = rand[8*b + 4]; + float r5 = rand[8*b + 5]; + float r6 = rand[8*b + 6]; + float r7 = rand[8*b + 7]; + + float dw = (w - crop_width)*r4; + float dh = (h - crop_height)*r5; + flip = (flip && (r6 > .5f)); + angle = 2*angle*r7 - angle; + if(!train){ + dw = (w - crop_width)/2.f; + dh = (h - crop_height)/2.f; + flip = 0; + angle = 0; + } + + input += w*h*c*b; + + float x = (flip) ? w - dw - j - 1 : j + dw; + float y = i + dh; + + float rx = cosf(angle)*(x-cx) - sinf(angle)*(y-cy) + cx; + float ry = sinf(angle)*(x-cx) + cosf(angle)*(y-cy) + cy; + + output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k); +} + +extern "C" void forward_crop_layer_gpu(crop_layer layer, network net) +{ + cuda_random(layer.rand_gpu, layer.batch*8); + + float radians = layer.angle*3.14159265f/180.f; + + float scale = 2; + float translate = -1; + if(layer.noadjust){ + scale = 1; + translate = 0; + } + + int size = layer.batch * layer.w * layer.h; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(net.input_gpu, size * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(layer.rand_gpu, size * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(layer.output_gpu, size * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + levels_image_kernel<<>>(net.input_gpu, layer.rand_gpu, layer.batch, layer.w, layer.h, net.train, layer.saturation, layer.exposure, translate, scale, layer.shift); + check_error(cudaPeekAtLastError()); + + size = layer.batch*layer.c*layer.out_w*layer.out_h; + + forward_crop_layer_kernel<<>>(net.input_gpu, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, net.train, layer.flip, radians, layer.output_gpu); + check_error(cudaPeekAtLastError()); + +/* + cuda_pull_array(layer.output_gpu, layer.output, size); + image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 0*(size/layer.batch)); + image im2 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 1*(size/layer.batch)); + image im3 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 2*(size/layer.batch)); + + translate_image(im, -translate); + scale_image(im, 1/scale); + translate_image(im2, -translate); + scale_image(im2, 1/scale); + translate_image(im3, -translate); + scale_image(im3, 1/scale); + + show_image(im, "cropped"); + show_image(im2, "cropped2"); + show_image(im3, "cropped3"); + cvWaitKey(0); + */ +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/cuda_dark.cu b/workloads/realworld/uvm_prefetch/darknet/src/cuda_dark.cu new file mode 100644 index 0000000000000000000000000000000000000000..ce415292e8f25ef96de7aa3d4954592fea771195 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/cuda_dark.cu @@ -0,0 +1,435 @@ +int gpu_index = 0; + +#ifdef GPU + +#include "cuda.h" +#include "utils.h" +#include "blas.h" +#include +#include +#include + + +#include + +void cuda_set_device(int n) +{ + gpu_index = n; + cudaError_t status = cudaSetDevice(n); + check_error(status); +} + +int cuda_get_device() +{ + int n = 0; + cudaError_t status = cudaGetDevice(&n); + check_error(status); + return n; +} + +void check_error(cudaError_t status) +{ + cudaDeviceSynchronize(); + cudaError_t status2 = cudaGetLastError(); + if (status != cudaSuccess) + { + const char *s = cudaGetErrorString(status); + char buffer[256]; + printf("CUDA Error: %s\n", s); + assert(0); + snprintf(buffer, 256, "CUDA Error: %s", s); + error(buffer); + } + if (status2 != cudaSuccess) + { + const char *s = cudaGetErrorString(status2); + char buffer[256]; + printf("CUDA Error Prev: %s\n", s); + assert(0); + snprintf(buffer, 256, "CUDA Error Prev: %s", s); + error(buffer); + } +} + +dim3 cuda_gridsize(size_t n){ + size_t k = (n-1) / BLOCK + 1; + size_t x = k; + size_t y = 1; + if(x > 65535){ + x = ceil(sqrt(k)); + y = (n-1)/(x*BLOCK) + 1; + } + dim3 d = {x, y, 1}; + //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK); + return d; +} + +#ifdef CUDNN +cudnnHandle_t cudnn_handle() +{ + static int init[16] = {0}; + static cudnnHandle_t handle[16]; + int i = cuda_get_device(); + if(!init[i]) { + cudnnCreate(&handle[i]); + init[i] = 1; + } + return handle[i]; +} +#endif + +cublasHandle_t blas_handle() +{ + static int init[16] = {0}; + static cublasHandle_t handle[16]; + int i = cuda_get_device(); + if(!init[i]) { + cublasCreate(&handle[i]); + init[i] = 1; + } + return handle[i]; +} + +float *cuda_make_array(float *x, size_t n) +{ + float *x_gpu; + size_t size = sizeof(float)*n; + // cudaError_t status = cudaMalloc((void **)&x_gpu, size); + cudaError_t status = cudaMallocManaged((void **)&x_gpu, size); + check_error(status); + if(x){ + // status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + // check_error(status); + // for (int i = 0; i < n; i++) { + // x_gpu[i] = x[i]; + // } + memcpy(x_gpu, x, size); + } else { + fill_gpu(n, 0, x_gpu, 1); + } + if(!x_gpu) error("Cuda malloc failed\n"); + return x_gpu; +} + +void cuda_random(float *x_gpu, size_t n) +{ + static curandGenerator_t gen[16]; + static int init[16] = {0}; + int i = cuda_get_device(); + if(!init[i]){ + curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT); + curandSetPseudoRandomGeneratorSeed(gen[i], time(0)); + init[i] = 1; + } + curandGenerateUniform(gen[i], x_gpu, n); + check_error(cudaPeekAtLastError()); +} + +float cuda_compare(float *x_gpu, float *x, size_t n, char *s) +{ + float *tmp = (float *) calloc(n, sizeof(float)); + cuda_pull_array(x_gpu, tmp, n); + //int i; + //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]); + axpy_cpu(n, -1, x, 1, tmp, 1); + float err = dot_cpu(n, tmp, 1, tmp, 1); + printf("Error %s: %f\n", s, sqrt(err/n)); + free(tmp); + return err; +} + +int *cuda_make_int_array(int *x, size_t n) +{ + int *x_gpu; + size_t size = sizeof(int)*n; + // cudaError_t status = cudaMalloc((void **)&x_gpu, size); + cudaError_t status = cudaMallocManaged((void **)&x_gpu, size); + check_error(status); + if(x){ + // status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + // check_error(status); + memcpy(x_gpu, x, size); + // for (int i = 0; i < n; i++) + // x_gpu[i] = x[i]; + } + if(!x_gpu) error("Cuda malloc failed\n"); + return x_gpu; +} + +void cuda_free(float *x_gpu) +{ + cudaError_t status = cudaFree(x_gpu); + check_error(status); +} + +void cuda_push_array(float *x_gpu, float *x, size_t n) +{ + size_t size = sizeof(float)*n; + // cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + // check_error(status); + memcpy(x_gpu, x, size); + // for (int i = 0; i < n; i++) + // x_gpu[i] = x[i]; +} + +void cuda_pull_array(float *x_gpu, float *x, size_t n) +{ + size_t size = sizeof(float)*n; + // cudaError_t status = cudaMemcpy(x, x_gpu, size, cudaMemcpyDeviceToHost); + // check_error(status); + memcpy(x, x_gpu, size); + // for (int i = 0; i < n; i++) + // x[i] = x_gpu[i]; +} + +float cuda_mag_array(float *x_gpu, size_t n) +{ + float *temp = (float *) calloc(n, sizeof(float)); + cuda_pull_array(x_gpu, temp, n); + float m = mag_array(temp, n); + free(temp); + return m; +} + +static const char * +getMemcpyKindString(CUpti_ActivityMemcpyKind kind) +{ + switch (kind) + { + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOD: + return "HtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOH: + return "DtoH"; + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOA: + return "HtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOH: + return "AtoH"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOA: + return "AtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOD: + return "AtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOA: + return "DtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOD: + return "DtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOH: + return "HtoH"; + default: + break; + } + + return ""; +} + +static const char * +getUvmCounterKindString(CUpti_ActivityUnifiedMemoryCounterKind kind) +{ + switch (kind) + { + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD: + return "BYTES_TRANSFER_HTOD"; + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH: + return "BYTES_TRANSFER_DTOH"; + default: + break; + } + return ""; +} + +static void +printActivity(CUpti_Activity *record) +{ + switch (record->kind) + { + case CUPTI_ACTIVITY_KIND_KERNEL: + { + int status; + CUpti_ActivityKernel4 *kernel = (CUpti_ActivityKernel4 *)record; + printf("KERNEL %s, %llu, %llu, %llu\n", + abi::__cxa_demangle(kernel->name, 0, 0, &status), + (unsigned long long)(kernel->start), + (unsigned long long)(kernel->end), + (unsigned long long)(kernel->end) - (kernel->start)); + break; + } + case CUPTI_ACTIVITY_KIND_RUNTIME: + { + CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; + const char *callback_name; + cuptiGetCallbackName(CUPTI_CB_DOMAIN_RUNTIME_API, api->cbid, &callback_name); + // printf("RUNTIME %s (cbid=%u) [ %llu - %llu ] process %u, thread %u, correlation %u\n", + // callback_name, api->cbid, + // (unsigned long long)(api->start - startTimestamp), + // (unsigned long long)(api->end - startTimestamp), + // api->processId, api->threadId, api->correlationId); + printf("RUNTIME %s (cbid=%u), %llu,%llu,%llu, process %u, thread %u, correlation %u\n", + callback_name, api->cbid, + (unsigned long long)(api->start), + (unsigned long long)(api->end), + (unsigned long long)(api->end - api->start), + api->processId, api->threadId, api->correlationId); + break; + } + case CUPTI_ACTIVITY_KIND_MEMCPY: + { + CUpti_ActivityMemcpy4 *memcpy = (CUpti_ActivityMemcpy4 *)record; + printf("MEMCPY %s, size %llu, %llu, %llu, %llu\n", + getMemcpyKindString((CUpti_ActivityMemcpyKind)memcpy->copyKind), + (unsigned long long)memcpy->bytes, + (unsigned long long)(memcpy->start), + (unsigned long long)(memcpy->end), + (unsigned long long)(memcpy->end) - (memcpy->start)); + break; + } + case CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER: + { + CUpti_ActivityUnifiedMemoryCounter2 *uvm = (CUpti_ActivityUnifiedMemoryCounter2 *)record; + printf("UVM MEMCPY %s, size %llu, %llu, %llu, %llu \n", + getUvmCounterKindString(uvm->counterKind), + (unsigned long long)uvm->value, + (unsigned long long)(uvm->start), + (unsigned long long)(uvm->end), + (unsigned long long)(uvm->end - uvm->start)); + break; + } + } +} + +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) +{ + uint8_t *bfr = (uint8_t *)malloc(BUF_SIZE + ALIGN_SIZE); + if (bfr == NULL) + { + printf("Error: out of memory\n"); + exit(-1); + } + + *size = BUF_SIZE; + *buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE); + *maxNumRecords = 0; +} + +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) +{ + CUptiResult status; + CUpti_Activity *record = NULL; + if (validSize > 0) + { + do + { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if (status == CUPTI_SUCCESS) + { + printActivity(record); + } + else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) + break; + else + { + CUPTI_CALL(status); + } + } while (1); + + // report any records dropped from the queue + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if (dropped != 0) + { + printf("Dropped %u activity records\n", (unsigned int)dropped); + } + } + + free(buffer); +} + +#ifndef PROFILE +void initTrace() { + printf("not Profile initTrace()\n"); + return; +} + +void finiTrace() { + return; +} + +#else +void initTrace() +{ + printf("Profile initTrace()\n"); + size_t attrValue = 0, attrValueSize = sizeof(size_t); + + CUpti_ActivityUnifiedMemoryCounterConfig config[2]; + + // configure unified memory counters + config[0].scope = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE; + config[0].kind = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD; + config[0].deviceId = 0; + config[0].enable = 1; + + config[1].scope = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE; + config[1].kind = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH; + config[1].deviceId = 0; + config[1].enable = 1; + + CUptiResult res = cuptiActivityConfigureUnifiedMemoryCounter(config, 2); + if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED) + { + printf("Test is waived, unified memory is not supported on the underlying platform.\n"); + } + else if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE) + { + printf("Test is waived, unified memory is not supported on the device.\n"); + } + else if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES) + { + printf("Test is waived, unified memory is not supported on the non-P2P multi-gpu setup.\n"); + } + else + { + CUPTI_CALL(res); + } + + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER)); + + // Register callbacks for buffer requests and for buffers completed by CUPTI. + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + + // Optionally get and set activity attributes. + // Attributes can be set by the CUPTI client to change behavior of the activity API. + // Some attributes require to be set before any CUDA context is created to be effective, + // e.g. to be applied to all device buffer allocations (see documentation). + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + printf("%s = %llu B\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); +} + +void finiTrace() +{ + // Force flush any remaining activity buffers before termination of the application + CUPTI_CALL(cuptiActivityFlushAll(1)); +} +#endif + + +void GPU_argv_init() +{ + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, GPU_DEVICE); + printf("setting device %d with name %s\n", GPU_DEVICE, deviceProp.name); + cudaSetDevice(GPU_DEVICE); +} +#else +void cuda_set_device(int n){} + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/cuda_dark.h b/workloads/realworld/uvm_prefetch/darknet/src/cuda_dark.h new file mode 100644 index 0000000000000000000000000000000000000000..ac6b60bc3d27ec1ebc8190463648b946f6c809ef --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/cuda_dark.h @@ -0,0 +1,63 @@ +#ifndef CUDA_H +#define CUDA_H + +#include "darknet.h" + +#ifdef GPU + +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) (((uintptr_t)(buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t)(buffer) & ((align)-1))) : (buffer)) + +static uint64_t startTimestamp; +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +#define CUPTI_CALL(call) \ + do \ + { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) \ + { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + if (_status == CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED) \ + exit(0); \ + else \ + exit(-1); \ + } \ + } while (0) + +#include + +#ifdef __cplusplus +extern "C" { +#endif +void check_error(cudaError_t status); +cublasHandle_t blas_handle(); +int *cuda_make_int_array(int *x, size_t n); +void cuda_random(float *x_gpu, size_t n); +float cuda_compare(float *x_gpu, float *x, size_t n, char *s); +dim3 cuda_gridsize(size_t n); + +void GPU_argv_init(); +void initTrace(); +void finiTrace(); +void startCPU(); +void endCPU(); +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords); +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); +static void printActivity(CUpti_Activity *record); + +#ifdef __cplusplus +} +#endif + +#ifdef CUDNN +cudnnHandle_t cudnn_handle(); +#endif + +#endif +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/cupti_add.cpp b/workloads/realworld/uvm_prefetch/darknet/src/cupti_add.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a0d16eb72f41f8e858a59354d2de9d6b470c0e76 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/cupti_add.cpp @@ -0,0 +1,112 @@ +#include "cupti_add.h" + +static void +printActivity(CUpti_Activity *record) +{ + switch (record->kind) + { + case CUPTI_ACTIVITY_KIND_KERNEL: + { + int status; + CUpti_ActivityKernel4 *kernel = (CUpti_ActivityKernel4 *)record; + printf("CUPTI,%s,%llu,%llu,%llu\n", + abi::__cxa_demangle(kernel->name, 0, 0, &status), + (unsigned long long)(kernel->start), + (unsigned long long)(kernel->end), + (unsigned long long)(kernel->end - startTimestamp) - (kernel->start - startTimestamp)); + break; + } + case CUPTI_ACTIVITY_KIND_RUNTIME: + { + CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; + const char *callback_name; + cuptiGetCallbackName(CUPTI_CB_DOMAIN_RUNTIME_API, api->cbid, &callback_name); + printf("RUNTIME %s (cbid=%u) [ %llu - %llu ] process %u, thread %u, correlation %u\n", + callback_name, api->cbid, + (unsigned long long)(api->start - startTimestamp), + (unsigned long long)(api->end - startTimestamp), + api->processId, api->threadId, api->correlationId); + break; + } + } +} + +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) +{ + uint8_t *bfr = (uint8_t *)malloc(BUF_SIZE + ALIGN_SIZE); + if (bfr == NULL) + { + printf("Error: out of memory\n"); + exit(-1); + } + + *size = BUF_SIZE; + *buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE); + *maxNumRecords = 0; +} + +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) +{ + CUptiResult status; + CUpti_Activity *record = NULL; + if (validSize > 0) + { + do + { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if (status == CUPTI_SUCCESS) + { + printActivity(record); + } + else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) + break; + else + { + CUPTI_CALL(status); + } + } while (1); + + // report any records dropped from the queue + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if (dropped != 0) + { + printf("Dropped %u activity records\n", (unsigned int)dropped); + } + } + + free(buffer); +} + +void initTrace() +{ + size_t attrValue = 0, attrValueSize = sizeof(size_t); + + // CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); + + // Register callbacks for buffer requests and for buffers completed by CUPTI. + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + + // Optionally get and set activity attributes. + // Attributes can be set by the CUPTI client to change behavior of the activity API. + // Some attributes require to be set before any CUDA context is created to be effective, + // e.g. to be applied to all device buffer allocations (see documentation). + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + printf("%s = %llu B\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); +} + +void finiTrace() +{ + // Force flush any remaining activity buffers before termination of the application + CUPTI_CALL(cuptiActivityFlushAll(1)); +} \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/darknet/src/cupti_add.h b/workloads/realworld/uvm_prefetch/darknet/src/cupti_add.h new file mode 100644 index 0000000000000000000000000000000000000000..a30b7b847ad13381032d2f60eac2955d30146485 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/cupti_add.h @@ -0,0 +1,36 @@ +#include +#include +#include + + +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) \ + (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer)) + +static uint64_t startTimestamp; +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +#define CUPTI_CALL(call) \ + do { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + if(_status == CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED) \ + exit(0); \ + else \ + exit(-1); \ + } \ + } while (0) + +void initTrace(); +void finiTrace(); +void startCPU(); +void endCPU(); +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords); +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); +static void printActivity(CUpti_Activity *record); diff --git a/workloads/realworld/uvm_prefetch/darknet/src/data.c b/workloads/realworld/uvm_prefetch/darknet/src/data.c new file mode 100644 index 0000000000000000000000000000000000000000..d50f1346c5cdcfe1dbeb2d0f70ec408fb4f33960 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/data.c @@ -0,0 +1,1685 @@ +#include "data.h" +#include "utils.h" +#include "image.h" +#include "cuda_dark.h" + +#include +#include +#include + +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + +list *get_paths(char *filename) +{ + char *path; + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + list *lines = make_list(); + while((path=fgetl(file))){ + list_insert(lines, path); + } + fclose(file); + return lines; +} + +/* +char **get_random_paths_indexes(char **paths, int n, int m, int *indexes) +{ + char **random_paths = calloc(n, sizeof(char*)); + int i; + pthread_mutex_lock(&mutex); + for(i = 0; i < n; ++i){ + int index = rand()%m; + indexes[i] = index; + random_paths[i] = paths[index]; + if(i == 0) printf("%s\n", paths[index]); + } + pthread_mutex_unlock(&mutex); + return random_paths; +} +*/ + +char **get_random_paths(char **paths, int n, int m) +{ + char **random_paths = calloc(n, sizeof(char*)); + int i; + pthread_mutex_lock(&mutex); + for(i = 0; i < n; ++i){ + int index = rand()%m; + random_paths[i] = paths[index]; + //if(i == 0) printf("%s\n", paths[index]); + } + pthread_mutex_unlock(&mutex); + return random_paths; +} + +char **find_replace_paths(char **paths, int n, char *find, char *replace) +{ + char **replace_paths = calloc(n, sizeof(char*)); + int i; + for(i = 0; i < n; ++i){ + char replaced[4096]; + find_replace(paths[i], find, replace, replaced); + replace_paths[i] = copy_string(replaced); + } + return replace_paths; +} + +matrix load_image_paths_gray(char **paths, int n, int w, int h) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image(paths[i], w, h, 3); + + image gray = grayscale_image(im); + free_image(im); + im = gray; + + X.vals[i] = im.data; + X.cols = im.h*im.w*im.c; + } + return X; +} + +matrix load_image_paths(char **paths, int n, int w, int h) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], w, h); + X.vals[i] = im.data; + X.cols = im.h*im.w*im.c; + } + return X; +} + +matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], 0, 0); + image crop; + if(center){ + crop = center_crop_image(im, size, size); + } else { + crop = random_augment_image(im, angle, aspect, min, max, size, size); + } + int flip = rand()%2; + if (flip) flip_image(crop); + random_distort_image(crop, hue, saturation, exposure); + + /* + show_image(im, "orig"); + show_image(crop, "crop"); + cvWaitKey(0); + */ + //grayscale_image_3c(crop); + free_image(im); + X.vals[i] = crop.data; + X.cols = crop.h*crop.w*crop.c; + } + return X; +} + + +box_label *read_boxes(char *filename, int *n) +{ + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + float x, y, h, w; + int id; + int count = 0; + int size = 64; + box_label *boxes = calloc(size, sizeof(box_label)); + while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){ + if(count == size) { + size = size * 2; + boxes = realloc(boxes, size*sizeof(box_label)); + } + boxes[count].id = id; + boxes[count].x = x; + boxes[count].y = y; + boxes[count].h = h; + boxes[count].w = w; + boxes[count].left = x - w/2; + boxes[count].right = x + w/2; + boxes[count].top = y - h/2; + boxes[count].bottom = y + h/2; + ++count; + } + fclose(file); + *n = count; + return boxes; +} + +void randomize_boxes(box_label *b, int n) +{ + int i; + for(i = 0; i < n; ++i){ + box_label swap = b[i]; + int index = rand()%n; + b[i] = b[index]; + b[index] = swap; + } +} + +void correct_boxes(box_label *boxes, int n, float dx, float dy, float sx, float sy, int flip) +{ + int i; + for(i = 0; i < n; ++i){ + if(boxes[i].x == 0 && boxes[i].y == 0) { + boxes[i].x = 999999; + boxes[i].y = 999999; + boxes[i].w = 999999; + boxes[i].h = 999999; + continue; + } + boxes[i].left = boxes[i].left * sx - dx; + boxes[i].right = boxes[i].right * sx - dx; + boxes[i].top = boxes[i].top * sy - dy; + boxes[i].bottom = boxes[i].bottom* sy - dy; + + if(flip){ + float swap = boxes[i].left; + boxes[i].left = 1. - boxes[i].right; + boxes[i].right = 1. - swap; + } + + boxes[i].left = constrain(0, 1, boxes[i].left); + boxes[i].right = constrain(0, 1, boxes[i].right); + boxes[i].top = constrain(0, 1, boxes[i].top); + boxes[i].bottom = constrain(0, 1, boxes[i].bottom); + + boxes[i].x = (boxes[i].left+boxes[i].right)/2; + boxes[i].y = (boxes[i].top+boxes[i].bottom)/2; + boxes[i].w = (boxes[i].right - boxes[i].left); + boxes[i].h = (boxes[i].bottom - boxes[i].top); + + boxes[i].w = constrain(0, 1, boxes[i].w); + boxes[i].h = constrain(0, 1, boxes[i].h); + } +} + +void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + float x,y,w,h; + int id; + int i; + + for (i = 0; i < count && i < 90; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if (w < .0 || h < .0) continue; + + int index = (4+classes) * i; + + truth[index++] = x; + truth[index++] = y; + truth[index++] = w; + truth[index++] = h; + + if (id < classes) truth[index+id] = 1; + } + free(boxes); +} + +void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + float x,y,w,h; + int id; + int i; + + for (i = 0; i < count; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if (w < .005 || h < .005) continue; + + int col = (int)(x*num_boxes); + int row = (int)(y*num_boxes); + + x = x*num_boxes - col; + y = y*num_boxes - row; + + int index = (col+row*num_boxes)*(5+classes); + if (truth[index]) continue; + truth[index++] = 1; + + if (id < classes) truth[index+id] = 1; + index += classes; + + truth[index++] = x; + truth[index++] = y; + truth[index++] = w; + truth[index++] = h; + } + free(boxes); +} + +void load_rle(image im, int *rle, int n) +{ + int count = 0; + int curr = 0; + int i,j; + for(i = 0; i < n; ++i){ + for(j = 0; j < rle[i]; ++j){ + im.data[count++] = curr; + } + curr = 1 - curr; + } + for(; count < im.h*im.w*im.c; ++count){ + im.data[count] = curr; + } +} + +void or_image(image src, image dest, int c) +{ + int i; + for(i = 0; i < src.w*src.h; ++i){ + if(src.data[i]) dest.data[dest.w*dest.h*c + i] = 1; + } +} + +void exclusive_image(image src) +{ + int k, j, i; + int s = src.w*src.h; + for(k = 0; k < src.c-1; ++k){ + for(i = 0; i < s; ++i){ + if (src.data[k*s + i]){ + for(j = k+1; j < src.c; ++j){ + src.data[j*s + i] = 0; + } + } + } + } +} + +box bound_image(image im) +{ + int x,y; + int minx = im.w; + int miny = im.h; + int maxx = 0; + int maxy = 0; + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + if(im.data[y*im.w + x]){ + minx = (x < minx) ? x : minx; + miny = (y < miny) ? y : miny; + maxx = (x > maxx) ? x : maxx; + maxy = (y > maxy) ? y : maxy; + } + } + } + box b = {minx, miny, maxx-minx + 1, maxy-miny + 1}; + //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + return b; +} + +void fill_truth_iseg(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + int i = 0; + int j; + image part = make_image(w, h, 1); + while((fscanf(file, "%d %s", &id, buff) == 2) && i < num_boxes){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + image sized = rotate_crop_image(part, aug.rad, aug.scale, aug.w, aug.h, aug.dx, aug.dy, aug.aspect); + if(flip) flip_image(sized); + + image mask = resize_image(sized, mw, mh); + truth[i*(mw*mh+1)] = id; + for(j = 0; j < mw*mh; ++j){ + truth[i*(mw*mh + 1) + 1 + j] = mask.data[j]; + } + ++i; + + free_image(mask); + free_image(sized); + free(rle); + } + if(i < num_boxes) truth[i*(mw*mh+1)] = -1; + fclose(file); + free_image(part); +} + +void fill_truth_mask(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + int i = 0; + image part = make_image(w, h, 1); + while((fscanf(file, "%d %s", &id, buff) == 2) && i < num_boxes){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + image sized = rotate_crop_image(part, aug.rad, aug.scale, aug.w, aug.h, aug.dx, aug.dy, aug.aspect); + if(flip) flip_image(sized); + box b = bound_image(sized); + if(b.w > 0){ + image crop = crop_image(sized, b.x, b.y, b.w, b.h); + image mask = resize_image(crop, mw, mh); + truth[i*(4 + mw*mh + 1) + 0] = (b.x + b.w/2.)/sized.w; + truth[i*(4 + mw*mh + 1) + 1] = (b.y + b.h/2.)/sized.h; + truth[i*(4 + mw*mh + 1) + 2] = b.w/sized.w; + truth[i*(4 + mw*mh + 1) + 3] = b.h/sized.h; + int j; + for(j = 0; j < mw*mh; ++j){ + truth[i*(4 + mw*mh + 1) + 4 + j] = mask.data[j]; + } + truth[i*(4 + mw*mh + 1) + 4 + mw*mh] = id; + free_image(crop); + free_image(mask); + ++i; + } + free_image(sized); + free(rle); + } + fclose(file); + free_image(part); +} + + +void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + + find_replace(labelpath, "raw", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + if(count > num_boxes) count = num_boxes; + float x,y,w,h; + int id; + int i; + int sub = 0; + + for (i = 0; i < count; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if ((w < .001 || h < .001)) { + ++sub; + continue; + } + + truth[(i-sub)*5+0] = x; + truth[(i-sub)*5+1] = y; + truth[(i-sub)*5+2] = w; + truth[(i-sub)*5+3] = h; + truth[(i-sub)*5+4] = id; + } + free(boxes); +} + +#define NUMCHARS 37 + +void print_letters(float *pred, int n) +{ + int i; + for(i = 0; i < n; ++i){ + int index = max_index(pred+i*NUMCHARS, NUMCHARS); + printf("%c", int_to_alphanum(index)); + } + printf("\n"); +} + +void fill_truth_captcha(char *path, int n, float *truth) +{ + char *begin = strrchr(path, '/'); + ++begin; + int i; + for(i = 0; i < strlen(begin) && i < n && begin[i] != '.'; ++i){ + int index = alphanum_to_int(begin[i]); + if(index > 35) printf("Bad %c\n", begin[i]); + truth[i*NUMCHARS+index] = 1; + } + for(;i < n; ++i){ + truth[i*NUMCHARS + NUMCHARS-1] = 1; + } +} + +data load_data_captcha(char **paths, int n, int m, int k, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = make_matrix(n, k*NUMCHARS); + int i; + for(i = 0; i < n; ++i){ + fill_truth_captcha(paths[i], k, d.y.vals[i]); + } + if(m) free(paths); + return d; +} + +data load_data_captcha_encode(char **paths, int n, int m, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.X.cols = 17100; + d.y = d.X; + if(m) free(paths); + return d; +} + +void fill_truth(char *path, char **labels, int k, float *truth) +{ + int i; + memset(truth, 0, k*sizeof(float)); + int count = 0; + for(i = 0; i < k; ++i){ + if(strstr(path, labels[i])){ + truth[i] = 1; + ++count; + //printf("%s %s %d\n", path, labels[i], i); + } + } + if(count != 1 && (k != 1 || count != 0)) printf("Too many or too few labels: %d, %s\n", count, path); +} + +void fill_hierarchy(float *truth, int k, tree *hierarchy) +{ + int j; + for(j = 0; j < k; ++j){ + if(truth[j]){ + int parent = hierarchy->parent[j]; + while(parent >= 0){ + truth[parent] = 1; + parent = hierarchy->parent[parent]; + } + } + } + int i; + int count = 0; + for(j = 0; j < hierarchy->groups; ++j){ + //printf("%d\n", count); + int mask = 1; + for(i = 0; i < hierarchy->group_size[j]; ++i){ + if(truth[count + i]){ + mask = 0; + break; + } + } + if (mask) { + for(i = 0; i < hierarchy->group_size[j]; ++i){ + truth[count + i] = SECRET_NUM; + } + } + count += hierarchy->group_size[j]; + } +} + +matrix load_regression_labels_paths(char **paths, int n, int k) +{ + matrix y = make_matrix(n, k); + int i,j; + for(i = 0; i < n; ++i){ + char labelpath[4096]; + find_replace(paths[i], "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".BMP", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPeG", ".txt", labelpath); + find_replace(labelpath, ".Jpeg", ".txt", labelpath); + find_replace(labelpath, ".PNG", ".txt", labelpath); + find_replace(labelpath, ".TIF", ".txt", labelpath); + find_replace(labelpath, ".bmp", ".txt", labelpath); + find_replace(labelpath, ".jpeg", ".txt", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".tif", ".txt", labelpath); + + FILE *file = fopen(labelpath, "r"); + for(j = 0; j < k; ++j){ + fscanf(file, "%f", &(y.vals[i][j])); + } + fclose(file); + } + return y; +} + +matrix load_labels_paths(char **paths, int n, char **labels, int k, tree *hierarchy) +{ + matrix y = make_matrix(n, k); + int i; + for(i = 0; i < n && labels; ++i){ + fill_truth(paths[i], labels, k, y.vals[i]); + if(hierarchy){ + fill_hierarchy(y.vals[i], k, hierarchy); + } + } + return y; +} + +matrix load_tags_paths(char **paths, int n, int k) +{ + matrix y = make_matrix(n, k); + int i; + //int count = 0; + for(i = 0; i < n; ++i){ + char label[4096]; + find_replace(paths[i], "images", "labels", label); + find_replace(label, ".jpg", ".txt", label); + FILE *file = fopen(label, "r"); + if (!file) continue; + //++count; + int tag; + while(fscanf(file, "%d", &tag) == 1){ + if(tag < k){ + y.vals[i][tag] = 1; + } + } + fclose(file); + } + //printf("%d/%d\n", count, n); + return y; +} + +char **get_labels(char *filename) +{ + list *plist = get_paths(filename); + char **labels = (char **)list_to_array(plist); + free_list(plist); + return labels; +} + +void free_data(data d) +{ + if(!d.shallow){ + free_matrix(d.X); + free_matrix(d.y); + }else{ + free(d.X.vals); + free(d.y.vals); + } +} + +image get_segmentation_image(char *path, int w, int h, int classes) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + image mask = make_image(w, h, classes); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + image part = make_image(w, h, 1); + while(fscanf(file, "%d %s", &id, buff) == 2){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + or_image(part, mask, id); + free(rle); + } + //exclusive_image(mask); + fclose(file); + free_image(part); + return mask; +} + +image get_segmentation_image2(char *path, int w, int h, int classes) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + image mask = make_image(w, h, classes+1); + int i; + for(i = 0; i < w*h; ++i){ + mask.data[w*h*classes + i] = 1; + } + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + image part = make_image(w, h, 1); + while(fscanf(file, "%d %s", &id, buff) == 2){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + or_image(part, mask, id); + for(i = 0; i < w*h; ++i){ + if(part.data[i]) mask.data[w*h*classes + i] = 0; + } + free(rle); + } + //exclusive_image(mask); + fclose(file); + free_image(part); + return mask; +} + +data load_data_seg(int n, char **paths, int m, int w, int h, int classes, int min, int max, float angle, float aspect, float hue, float saturation, float exposure, int div) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + + d.y.rows = n; + d.y.cols = h*w*classes/div/div; + d.y.vals = calloc(d.X.rows, sizeof(float*)); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + + image mask = get_segmentation_image(random_paths[i], orig.w, orig.h, classes); + //image mask = make_image(orig.w, orig.h, classes+1); + image sized_m = rotate_crop_image(mask, a.rad, a.scale/div, a.w/div, a.h/div, a.dx/div, a.dy/div, a.aspect); + + if(flip) flip_image(sized_m); + d.y.vals[i] = sized_m.data; + + free_image(orig); + free_image(mask); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_iseg(int n, char **paths, int m, int w, int h, int classes, int boxes, int div, int min, int max, float angle, float aspect, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, (((w/div)*(h/div))+1)*boxes); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + //show_image(sized, "image"); + + fill_truth_iseg(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, w/div, h/div); + + free_image(orig); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_mask(int n, char **paths, int m, int w, int h, int classes, int boxes, int coords, int min, int max, float angle, float aspect, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, (coords+1)*boxes); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + //show_image(sized, "image"); + + fill_truth_mask(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, 14, 14); + + free_image(orig); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + + int k = size*size*(5+classes); + d.y = make_matrix(n, k); + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + + int oh = orig.h; + int ow = orig.w; + + int dw = (ow*jitter); + int dh = (oh*jitter); + + int pleft = rand_uniform(-dw, dw); + int pright = rand_uniform(-dw, dw); + int ptop = rand_uniform(-dh, dh); + int pbot = rand_uniform(-dh, dh); + + int swidth = ow - pleft - pright; + int sheight = oh - ptop - pbot; + + float sx = (float)swidth / ow; + float sy = (float)sheight / oh; + + int flip = rand()%2; + image cropped = crop_image(orig, pleft, ptop, swidth, sheight); + + float dx = ((float)pleft/ow)/sx; + float dy = ((float)ptop /oh)/sy; + + image sized = resize_image(cropped, w, h); + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + + fill_truth_region(random_paths[i], d.y.vals[i], classes, size, flip, dx, dy, 1./sx, 1./sy); + + free_image(orig); + free_image(cropped); + } + free(random_paths); + return d; +} + +data load_data_compare(int n, char **paths, int m, int classes, int w, int h) +{ + if(m) paths = get_random_paths(paths, 2*n, m); + int i,j; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*6; + + int k = 2*(classes); + d.y = make_matrix(n, k); + for(i = 0; i < n; ++i){ + image im1 = load_image_color(paths[i*2], w, h); + image im2 = load_image_color(paths[i*2+1], w, h); + + d.X.vals[i] = calloc(d.X.cols, sizeof(float)); + memcpy(d.X.vals[i], im1.data, h*w*3*sizeof(float)); + memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float)); + + int id; + float iou; + + char imlabel1[4096]; + char imlabel2[4096]; + find_replace(paths[i*2], "imgs", "labels", imlabel1); + find_replace(imlabel1, "jpg", "txt", imlabel1); + FILE *fp1 = fopen(imlabel1, "r"); + + while(fscanf(fp1, "%d %f", &id, &iou) == 2){ + if (d.y.vals[i][2*id] < iou) d.y.vals[i][2*id] = iou; + } + + find_replace(paths[i*2+1], "imgs", "labels", imlabel2); + find_replace(imlabel2, "jpg", "txt", imlabel2); + FILE *fp2 = fopen(imlabel2, "r"); + + while(fscanf(fp2, "%d %f", &id, &iou) == 2){ + if (d.y.vals[i][2*id + 1] < iou) d.y.vals[i][2*id + 1] = iou; + } + + for (j = 0; j < classes; ++j){ + if (d.y.vals[i][2*j] > .5 && d.y.vals[i][2*j+1] < .5){ + d.y.vals[i][2*j] = 1; + d.y.vals[i][2*j+1] = 0; + } else if (d.y.vals[i][2*j] < .5 && d.y.vals[i][2*j+1] > .5){ + d.y.vals[i][2*j] = 0; + d.y.vals[i][2*j+1] = 1; + } else { + d.y.vals[i][2*j] = SECRET_NUM; + d.y.vals[i][2*j+1] = SECRET_NUM; + } + } + fclose(fp1); + fclose(fp2); + + free_image(im1); + free_image(im2); + } + if(m) free(paths); + return d; +} + +data load_data_swag(char **paths, int n, int classes, float jitter) +{ + int index = rand()%n; + char *random_path = paths[index]; + + image orig = load_image_color(random_path, 0, 0); + int h = orig.h; + int w = orig.w; + + data d = {0}; + d.shallow = 0; + d.w = w; + d.h = h; + + d.X.rows = 1; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + int k = (4+classes)*90; + d.y = make_matrix(1, k); + + int dw = w*jitter; + int dh = h*jitter; + + int pleft = rand_uniform(-dw, dw); + int pright = rand_uniform(-dw, dw); + int ptop = rand_uniform(-dh, dh); + int pbot = rand_uniform(-dh, dh); + + int swidth = w - pleft - pright; + int sheight = h - ptop - pbot; + + float sx = (float)swidth / w; + float sy = (float)sheight / h; + + int flip = rand()%2; + image cropped = crop_image(orig, pleft, ptop, swidth, sheight); + + float dx = ((float)pleft/w)/sx; + float dy = ((float)ptop /h)/sy; + + image sized = resize_image(cropped, w, h); + if(flip) flip_image(sized); + d.X.vals[0] = sized.data; + + fill_truth_swag(random_path, d.y.vals[0], classes, flip, dx, dy, 1./sx, 1./sy); + + free_image(orig); + free_image(cropped); + + return d; +} + +data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, 5*boxes); + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + image sized = make_image(w, h, orig.c); + fill_image(sized, .5); + + float dw = jitter * orig.w; + float dh = jitter * orig.h; + + float new_ar = (orig.w + rand_uniform(-dw, dw)) / (orig.h + rand_uniform(-dh, dh)); + //float scale = rand_uniform(.25, 2); + float scale = 1; + + float nw, nh; + + if(new_ar < 1){ + nh = scale * h; + nw = nh * new_ar; + } else { + nw = scale * w; + nh = nw / new_ar; + } + + float dx = rand_uniform(0, w - nw); + float dy = rand_uniform(0, h - nh); + + place_image(orig, nw, nh, dx, dy, sized); + + random_distort_image(sized, hue, saturation, exposure); + + int flip = rand()%2; + if(flip) flip_image(sized); + d.X.vals[i] = sized.data; + + + fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, -dx/w, -dy/h, nw/w, nh/h); + + free_image(orig); + } + free(random_paths); + return d; +} + +void *load_thread(void *ptr) +{ + //printf("Loading data: %d\n", rand()); + load_args a = *(struct load_args*)ptr; + if(a.exposure == 0) a.exposure = 1; + if(a.saturation == 0) a.saturation = 1; + if(a.aspect == 0) a.aspect = 1; + + if (a.type == OLD_CLASSIFICATION_DATA){ + *a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h); + } else if (a.type == REGRESSION_DATA){ + *a.d = load_data_regression(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == CLASSIFICATION_DATA){ + *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.center); + } else if (a.type == SUPER_DATA){ + *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale); + } else if (a.type == WRITING_DATA){ + *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h); + } else if (a.type == ISEG_DATA){ + *a.d = load_data_iseg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.scale, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == INSTANCE_DATA){ + *a.d = load_data_mask(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.coords, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == SEGMENTATION_DATA){ + *a.d = load_data_seg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.scale); + } else if (a.type == REGION_DATA){ + *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); + } else if (a.type == DETECTION_DATA){ + *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); + } else if (a.type == SWAG_DATA){ + *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter); + } else if (a.type == COMPARE_DATA){ + *a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h); + } else if (a.type == IMAGE_DATA){ + *(a.im) = load_image_color(a.path, 0, 0); + *(a.resized) = resize_image(*(a.im), a.w, a.h); + } else if (a.type == LETTERBOX_DATA){ + *(a.im) = load_image_color(a.path, 0, 0); + *(a.resized) = letterbox_image(*(a.im), a.w, a.h); + } else if (a.type == TAG_DATA){ + *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } + free(ptr); + return 0; +} + +pthread_t load_data_in_thread(load_args args) +{ + pthread_t thread; + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + if(pthread_create(&thread, 0, load_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void *load_threads(void *ptr) +{ + int i; + load_args args = *(load_args *)ptr; + if (args.threads == 0) args.threads = 1; + data *out = args.d; + int total = args.n; + free(ptr); + data *buffers = calloc(args.threads, sizeof(data)); + pthread_t *threads = calloc(args.threads, sizeof(pthread_t)); + for(i = 0; i < args.threads; ++i){ + args.d = buffers + i; + args.n = (i+1) * total/args.threads - i * total/args.threads; + threads[i] = load_data_in_thread(args); + } + for(i = 0; i < args.threads; ++i){ + pthread_join(threads[i], 0); + } + *out = concat_datas(buffers, args.threads); + out->shallow = 0; + for(i = 0; i < args.threads; ++i){ + buffers[i].shallow = 1; + free_data(buffers[i]); + } + free(buffers); + free(threads); + return 0; +} + +void load_data_blocking(load_args args) +{ + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + load_thread(ptr); +} + +pthread_t load_data(load_args args) +{ + pthread_t thread; + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed"); + return thread; +} + +data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h) +{ + if(m) paths = get_random_paths(paths, n, m); + char **replace_paths = find_replace_paths(paths, n, ".png", "-label.png"); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = load_image_paths_gray(replace_paths, n, out_w, out_h); + if(m) free(paths); + int i; + for(i = 0; i < n; ++i) free(replace_paths[i]); + free(replace_paths); + return d; +} + +data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = load_labels_paths(paths, n, labels, k, 0); + if(m) free(paths); + return d; +} + +/* + data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) + { + data d = {0}; + d.indexes = calloc(n, sizeof(int)); + if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes); + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure); + d.y = load_labels_paths(paths, n, labels, k); + if(m) free(paths); + return d; + } + */ + +data load_data_super(char **paths, int n, int m, int w, int h, int scale) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + + int i; + d.X.rows = n; + d.X.vals = calloc(n, sizeof(float*)); + d.X.cols = w*h*3; + + d.y.rows = n; + d.y.vals = calloc(n, sizeof(float*)); + d.y.cols = w*scale * h*scale * 3; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], 0, 0); + image crop = random_crop_image(im, w*scale, h*scale); + int flip = rand()%2; + if (flip) flip_image(crop); + image resize = resize_image(crop, w, h); + d.X.vals[i] = resize.data; + d.y.vals[i] = crop.data; + free_image(im); + } + + if(m) free(paths); + return d; +} + +data load_data_regression(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, 0); + d.y = load_regression_labels_paths(paths, n, k); + if(m) free(paths); + return d; +} + +data select_data(data *orig, int *inds) +{ + data d = {0}; + d.shallow = 1; + d.w = orig[0].w; + d.h = orig[0].h; + + d.X.rows = orig[0].X.rows; + d.y.rows = orig[0].X.rows; + + d.X.cols = orig[0].X.cols; + d.y.cols = orig[0].y.cols; + + d.X.vals = calloc(orig[0].X.rows, sizeof(float *)); + d.y.vals = calloc(orig[0].y.rows, sizeof(float *)); + int i; + for(i = 0; i < d.X.rows; ++i){ + d.X.vals[i] = orig[inds[i]].X.vals[i]; + d.y.vals[i] = orig[inds[i]].y.vals[i]; + } + return d; +} + +data *tile_data(data orig, int divs, int size) +{ + data *ds = calloc(divs*divs, sizeof(data)); + int i, j; +#pragma omp parallel for + for(i = 0; i < divs*divs; ++i){ + data d; + d.shallow = 0; + d.w = orig.w/divs * size; + d.h = orig.h/divs * size; + d.X.rows = orig.X.rows; + d.X.cols = d.w*d.h*3; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + + d.y = copy_matrix(orig.y); +#pragma omp parallel for + for(j = 0; j < orig.X.rows; ++j){ + int x = (i%divs) * orig.w / divs - (d.w - orig.w/divs)/2; + int y = (i/divs) * orig.h / divs - (d.h - orig.h/divs)/2; + image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[j]); + d.X.vals[j] = crop_image(im, x, y, d.w, d.h).data; + } + ds[i] = d; + } + return ds; +} + +data resize_data(data orig, int w, int h) +{ + data d = {0}; + d.shallow = 0; + d.w = w; + d.h = h; + int i; + d.X.rows = orig.X.rows; + d.X.cols = w*h*3; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + + d.y = copy_matrix(orig.y); +#pragma omp parallel for + for(i = 0; i < orig.X.rows; ++i){ + image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[i]); + d.X.vals[i] = resize_image(im, w, h).data; + } + return d; +} + +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.w=size; + d.h=size; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, center); + d.y = load_labels_paths(paths, n, labels, k, hierarchy); + if(m) free(paths); + return d; +} + +data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.w = size; + d.h = size; + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, 0); + d.y = load_tags_paths(paths, n, k); + if(m) free(paths); + return d; +} + +matrix concat_matrix(matrix m1, matrix m2) +{ + int i, count = 0; + matrix m; + m.cols = m1.cols; + m.rows = m1.rows+m2.rows; + m.vals = calloc(m1.rows + m2.rows, sizeof(float*)); + for(i = 0; i < m1.rows; ++i){ + m.vals[count++] = m1.vals[i]; + } + for(i = 0; i < m2.rows; ++i){ + m.vals[count++] = m2.vals[i]; + } + return m; +} + +data concat_data(data d1, data d2) +{ + data d = {0}; + d.shallow = 1; + d.X = concat_matrix(d1.X, d2.X); + d.y = concat_matrix(d1.y, d2.y); + d.w = d1.w; + d.h = d1.h; + return d; +} + +data concat_datas(data *d, int n) +{ + int i; + data out = {0}; + for(i = 0; i < n; ++i){ + data new = concat_data(d[i], out); + free_data(out); + out = new; + } + return out; +} + +data load_categorical_data_csv(char *filename, int target, int k) +{ + data d = {0}; + d.shallow = 0; + matrix X = csv_to_matrix(filename); + float *truth_1d = pop_column(&X, target); + float **truth = one_hot_encode(truth_1d, X.rows, k); + matrix y; + y.rows = X.rows; + y.cols = k; + y.vals = truth; + d.X = X; + d.y = y; + free(truth_1d); + return d; +} + +data load_cifar10_data(char *filename) +{ + data d = {0}; + d.shallow = 0; + long i,j; + matrix X = make_matrix(10000, 3072); + matrix y = make_matrix(10000, 10); + d.X = X; + d.y = y; + + FILE *fp = fopen(filename, "rb"); + if(!fp) file_error(filename); + for(i = 0; i < 10000; ++i){ + unsigned char bytes[3073]; + fread(bytes, 1, 3073, fp); + int class = bytes[0]; + y.vals[i][class] = 1; + for(j = 0; j < X.cols; ++j){ + X.vals[i][j] = (double)bytes[j+1]; + } + } + scale_data_rows(d, 1./255); + //normalize_data_rows(d); + fclose(fp); + return d; +} + +void get_random_batch(data d, int n, float *X, float *y) +{ + int j; + for(j = 0; j < n; ++j){ + int index = rand()%d.X.rows; + memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float)); + memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float)); + } +} + +void get_next_batch(data d, int n, int offset, float *X, float *y) +{ + int j; + for(j = 0; j < n; ++j){ + int index = offset + j; + memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float)); + if(y) memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float)); + } +} + +void smooth_data(data d) +{ + int i, j; + float scale = 1. / d.y.cols; + float eps = .1; + for(i = 0; i < d.y.rows; ++i){ + for(j = 0; j < d.y.cols; ++j){ + d.y.vals[i][j] = eps * scale + (1-eps) * d.y.vals[i][j]; + } + } +} + +data load_all_cifar10() +{ + data d = {0}; + d.shallow = 0; + int i,j,b; + matrix X = make_matrix(50000, 3072); + matrix y = make_matrix(50000, 10); + d.X = X; + d.y = y; + + + for(b = 0; b < 5; ++b){ + char buff[256]; + sprintf(buff, "data/cifar/cifar-10-batches-bin/data_batch_%d.bin", b+1); + FILE *fp = fopen(buff, "rb"); + if(!fp) file_error(buff); + for(i = 0; i < 10000; ++i){ + unsigned char bytes[3073]; + fread(bytes, 1, 3073, fp); + int class = bytes[0]; + y.vals[i+b*10000][class] = 1; + for(j = 0; j < X.cols; ++j){ + X.vals[i+b*10000][j] = (double)bytes[j+1]; + } + } + fclose(fp); + } + //normalize_data_rows(d); + scale_data_rows(d, 1./255); + smooth_data(d); + return d; +} + +data load_go(char *filename) +{ + FILE *fp = fopen(filename, "rb"); + matrix X = make_matrix(3363059, 361); + matrix y = make_matrix(3363059, 361); + int row, col; + + if(!fp) file_error(filename); + char *label; + int count = 0; + while((label = fgetl(fp))){ + int i; + if(count == X.rows){ + X = resize_matrix(X, count*2); + y = resize_matrix(y, count*2); + } + sscanf(label, "%d %d", &row, &col); + char *board = fgetl(fp); + + int index = row*19 + col; + y.vals[count][index] = 1; + + for(i = 0; i < 19*19; ++i){ + float val = 0; + if(board[i] == '1') val = 1; + else if(board[i] == '2') val = -1; + X.vals[count][i] = val; + } + ++count; + free(label); + free(board); + } + X = resize_matrix(X, count); + y = resize_matrix(y, count); + + data d = {0}; + d.shallow = 0; + d.X = X; + d.y = y; + + + fclose(fp); + + return d; +} + + +void randomize_data(data d) +{ + int i; + for(i = d.X.rows-1; i > 0; --i){ + int index = rand()%i; + float *swap = d.X.vals[index]; + d.X.vals[index] = d.X.vals[i]; + d.X.vals[i] = swap; + + swap = d.y.vals[index]; + d.y.vals[index] = d.y.vals[i]; + d.y.vals[i] = swap; + } +} + +void scale_data_rows(data d, float s) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + scale_array(d.X.vals[i], d.X.cols, s); + } +} + +void translate_data_rows(data d, float s) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + translate_array(d.X.vals[i], d.X.cols, s); + } +} + +data copy_data(data d) +{ + data c = {0}; + c.w = d.w; + c.h = d.h; + c.shallow = 0; + c.num_boxes = d.num_boxes; + c.boxes = d.boxes; + c.X = copy_matrix(d.X); + c.y = copy_matrix(d.y); + return c; +} + +void normalize_data_rows(data d) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + normalize_array(d.X.vals[i], d.X.cols); + } +} + +data get_data_part(data d, int part, int total) +{ + data p = {0}; + p.shallow = 1; + p.X.rows = d.X.rows * (part + 1) / total - d.X.rows * part / total; + p.y.rows = d.y.rows * (part + 1) / total - d.y.rows * part / total; + p.X.cols = d.X.cols; + p.y.cols = d.y.cols; + p.X.vals = d.X.vals + d.X.rows * part / total; + p.y.vals = d.y.vals + d.y.rows * part / total; + return p; +} + +data get_random_data(data d, int num) +{ + data r = {0}; + r.shallow = 1; + + r.X.rows = num; + r.y.rows = num; + + r.X.cols = d.X.cols; + r.y.cols = d.y.cols; + + r.X.vals = calloc(num, sizeof(float *)); + r.y.vals = calloc(num, sizeof(float *)); + + int i; + for(i = 0; i < num; ++i){ + int index = rand()%d.X.rows; + r.X.vals[i] = d.X.vals[index]; + r.y.vals[i] = d.y.vals[index]; + } + return r; +} + +data *split_data(data d, int part, int total) +{ + data *split = calloc(2, sizeof(data)); + int i; + int start = part*d.X.rows/total; + int end = (part+1)*d.X.rows/total; + data train; + data test; + train.shallow = test.shallow = 1; + + test.X.rows = test.y.rows = end-start; + train.X.rows = train.y.rows = d.X.rows - (end-start); + train.X.cols = test.X.cols = d.X.cols; + train.y.cols = test.y.cols = d.y.cols; + + train.X.vals = calloc(train.X.rows, sizeof(float*)); + test.X.vals = calloc(test.X.rows, sizeof(float*)); + train.y.vals = calloc(train.y.rows, sizeof(float*)); + test.y.vals = calloc(test.y.rows, sizeof(float*)); + + for(i = 0; i < start; ++i){ + train.X.vals[i] = d.X.vals[i]; + train.y.vals[i] = d.y.vals[i]; + } + for(i = start; i < end; ++i){ + test.X.vals[i-start] = d.X.vals[i]; + test.y.vals[i-start] = d.y.vals[i]; + } + for(i = end; i < d.X.rows; ++i){ + train.X.vals[i-(end-start)] = d.X.vals[i]; + train.y.vals[i-(end-start)] = d.y.vals[i]; + } + split[0] = train; + split[1] = test; + return split; +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/data.h b/workloads/realworld/uvm_prefetch/darknet/src/data.h new file mode 100644 index 0000000000000000000000000000000000000000..781906f8743c7d88c0fa134403d0ae020b544053 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/data.h @@ -0,0 +1,50 @@ +#ifndef DATA_H +#define DATA_H +#include + +#include "darknet.h" +#include "matrix.h" +#include "list.h" +#include "image.h" +#include "tree.h" + +static inline float distance_from_edge(int x, int max) +{ + int dx = (max/2) - x; + if (dx < 0) dx = -dx; + dx = (max/2) + 1 - dx; + dx *= 2; + float dist = (float)dx/max; + if (dist > 1) dist = 1; + return dist; +} +void load_data_blocking(load_args args); + + +void print_letters(float *pred, int n); +data load_data_captcha(char **paths, int n, int m, int k, int w, int h); +data load_data_captcha_encode(char **paths, int n, int m, int w, int h); +data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure); +data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); +matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); +data load_data_super(char **paths, int n, int m, int w, int h, int scale); +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); +data load_data_regression(char **paths, int n, int m, int classes, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); +data load_go(char *filename); + + +data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h); + +void get_random_batch(data d, int n, float *X, float *y); +data get_data_part(data d, int part, int total); +data get_random_data(data d, int num); +data load_categorical_data_csv(char *filename, int target, int k); +void normalize_data_rows(data d); +void scale_data_rows(data d, float s); +void translate_data_rows(data d, float s); +void randomize_data(data d); +data *split_data(data d, int part, int total); +data concat_datas(data *d, int n); +void fill_truth(char *path, char **labels, int k, float *truth); + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/deconvolutional_kernels.cu b/workloads/realworld/uvm_prefetch/darknet/src/deconvolutional_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..ed12e7a3dc5148b1cbff746f13901a9653bc0f6d --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/deconvolutional_kernels.cu @@ -0,0 +1,139 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "convolutional_layer.h" +#include "deconvolutional_layer.h" +#include "batchnorm_layer.h" +#include "gemm.h" +#include "blas.h" +#include "im2col.h" +#include "col2im.h" +#include "utils.h" +#include "cuda_dark.h" +} + +extern "C" void forward_deconvolutional_layer_gpu(layer l, network net) +{ + int i; + + int m = l.size*l.size*l.n; + int n = l.h*l.w; + int k = l.c; + + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + + for(i = 0; i < l.batch; ++i){ + float *a = l.weights_gpu; + float *b = net.input_gpu + i*l.c*l.h*l.w; + float *c = net.workspace; + + gemm_gpu(1,0,m,n,k,1,a,m,b,n,0,c,n); + + col2im_gpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output_gpu+i*l.outputs); + } + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); + } + activate_array_gpu(l.output_gpu, l.batch*l.n*l.out_w*l.out_h, l.activation); +} + +extern "C" void backward_deconvolutional_layer_gpu(layer l, network net) +{ + int i; + + //constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); + } + + //if(net.delta_gpu) memset(net.delta_gpu, 0, l.batch*l.h*l.w*l.c*sizeof(float)); + + for(i = 0; i < l.batch; ++i){ + int m = l.c; + int n = l.size*l.size*l.n; + int k = l.h*l.w; + + float *a = net.input_gpu + i*m*k; + float *b = net.workspace; + float *c = l.weight_updates_gpu; + + im2col_gpu(l.delta_gpu + i*l.outputs, l.out_c, l.out_h, l.out_w, + l.size, l.stride, l.pad, b); + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if(net.delta_gpu){ + int m = l.c; + int n = l.h*l.w; + int k = l.size*l.size*l.n; + + float *a = l.weights_gpu; + float *b = net.workspace; + float *c = net.delta_gpu + i*n*m; + + gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +} + +extern "C" void pull_deconvolutional_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size); + cuda_pull_array(l.biases_gpu, l.biases, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.n); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +extern "C" void push_deconvolutional_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size); + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.n); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void update_deconvolutional_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + } + }else{ + axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1); + + axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.n, momentum, l.bias_updates_gpu, 1); + + if(l.scales_gpu){ + axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.n, momentum, l.scale_updates_gpu, 1); + } + } +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/deconvolutional_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/deconvolutional_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..00c0e85771d42f99de969f9fd03e5f0f359d405c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/deconvolutional_layer.c @@ -0,0 +1,312 @@ +#include "deconvolutional_layer.h" +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "utils.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" + +#include +#include + + +static size_t get_workspace_size(layer l){ + return (size_t)l.h*l.w*l.size*l.size*l.n*sizeof(float); +} + +void bilinear_init(layer l) +{ + int i,j,f; + float center = (l.size-1) / 2.; + for(f = 0; f < l.n; ++f){ + for(j = 0; j < l.size; ++j){ + for(i = 0; i < l.size; ++i){ + float val = (1 - fabs(i - center)) * (1 - fabs(j - center)); + int c = f%l.c; + int ind = f*l.size*l.size*l.c + c*l.size*l.size + j*l.size + i; + l.weights[ind] = val; + } + } + } +} + + +layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam) +{ + int i; + layer l = {0}; + l.type = DECONVOLUTIONAL; + + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.batch = batch; + l.stride = stride; + l.size = size; + + l.nweights = c*n*size*size; + l.nbiases = n; + + l.weights = calloc(c*n*size*size, sizeof(float)); + l.weight_updates = calloc(c*n*size*size, sizeof(float)); + + l.biases = calloc(n, sizeof(float)); + l.bias_updates = calloc(n, sizeof(float)); + //float scale = n/(size*size*c); + //printf("scale: %f\n", scale); + float scale = .02; + for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal(); + //bilinear_init(l); + for(i = 0; i < n; ++i){ + l.biases[i] = 0; + } + l.pad = padding; + + l.out_h = (l.h - 1) * l.stride + l.size - 2*l.pad; + l.out_w = (l.w - 1) * l.stride + l.size - 2*l.pad; + l.out_c = n; + l.outputs = l.out_w * l.out_h * l.out_c; + l.inputs = l.w * l.h * l.c; + + scal_cpu(l.nweights, (float)l.out_w*l.out_h/(l.w*l.h), l.weights, 1); + + l.output = calloc(l.batch*l.outputs, sizeof(float)); + l.delta = calloc(l.batch*l.outputs, sizeof(float)); + + l.forward = forward_deconvolutional_layer; + l.backward = backward_deconvolutional_layer; + l.update = update_deconvolutional_layer; + + l.batch_normalize = batch_normalize; + + if(batch_normalize){ + l.scales = calloc(n, sizeof(float)); + l.scale_updates = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(n, sizeof(float)); + l.variance = calloc(n, sizeof(float)); + + l.mean_delta = calloc(n, sizeof(float)); + l.variance_delta = calloc(n, sizeof(float)); + + l.rolling_mean = calloc(n, sizeof(float)); + l.rolling_variance = calloc(n, sizeof(float)); + l.x = calloc(l.batch*l.outputs, sizeof(float)); + l.x_norm = calloc(l.batch*l.outputs, sizeof(float)); + } + if(adam){ + l.m = calloc(c*n*size*size, sizeof(float)); + l.v = calloc(c*n*size*size, sizeof(float)); + l.bias_m = calloc(n, sizeof(float)); + l.scale_m = calloc(n, sizeof(float)); + l.bias_v = calloc(n, sizeof(float)); + l.scale_v = calloc(n, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_deconvolutional_layer_gpu; + l.backward_gpu = backward_deconvolutional_layer_gpu; + l.update_gpu = update_deconvolutional_layer_gpu; + + if(gpu_index >= 0){ + + if (adam) { + l.m_gpu = cuda_make_array(l.m, c*n*size*size); + l.v_gpu = cuda_make_array(l.v, c*n*size*size); + l.bias_m_gpu = cuda_make_array(l.bias_m, n); + l.bias_v_gpu = cuda_make_array(l.bias_v, n); + l.scale_m_gpu = cuda_make_array(l.scale_m, n); + l.scale_v_gpu = cuda_make_array(l.scale_v, n); + } + l.weights_gpu = cuda_make_array(l.weights, c*n*size*size); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size); + + l.biases_gpu = cuda_make_array(l.biases, n); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*l.out_h*l.out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n); + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(0, n); + l.variance_gpu = cuda_make_array(0, n); + + l.rolling_mean_gpu = cuda_make_array(0, n); + l.rolling_variance_gpu = cuda_make_array(0, n); + + l.mean_delta_gpu = cuda_make_array(0, n); + l.variance_delta_gpu = cuda_make_array(0, n); + + l.scales_gpu = cuda_make_array(l.scales, n); + l.scale_updates_gpu = cuda_make_array(0, n); + + l.x_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n); + l.x_norm_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n); + } + } + #ifdef CUDNN + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); + #endif +#endif + + l.activation = activation; + l.workspace_size = get_workspace_size(l); + + fprintf(stderr, "deconv%5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); + + return l; +} + +void denormalize_deconvolutional_layer(layer l) +{ + int i, j; + for(i = 0; i < l.n; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001); + for(j = 0; j < l.c*l.size*l.size; ++j){ + l.weights[i*l.c*l.size*l.size + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + +void resize_deconvolutional_layer(layer *l, int h, int w) +{ + l->h = h; + l->w = w; + l->out_h = (l->h - 1) * l->stride + l->size - 2*l->pad; + l->out_w = (l->w - 1) * l->stride + l->size - 2*l->pad; + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->w * l->h * l->c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + if(l->batch_normalize){ + l->x = realloc(l->x, l->batch*l->outputs*sizeof(float)); + l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float)); + } + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); + + if(l->batch_normalize){ + cuda_free(l->x_gpu); + cuda_free(l->x_norm_gpu); + + l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); + } + #ifdef CUDNN + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + #endif +#endif + l->workspace_size = get_workspace_size(*l); +} + +void forward_deconvolutional_layer(const layer l, network net) +{ + int i; + + int m = l.size*l.size*l.n; + int n = l.h*l.w; + int k = l.c; + + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + + for(i = 0; i < l.batch; ++i){ + float *a = l.weights; + float *b = net.input + i*l.c*l.h*l.w; + float *c = net.workspace; + + gemm_cpu(1,0,m,n,k,1,a,m,b,n,0,c,n); + + col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output+i*l.outputs); + } + if (l.batch_normalize) { + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.n, l.out_w*l.out_h); + } + activate_array(l.output, l.batch*l.n*l.out_w*l.out_h, l.activation); +} + +void backward_deconvolutional_layer(layer l, network net) +{ + int i; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h); + } + + //if(net.delta) memset(net.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float)); + + for(i = 0; i < l.batch; ++i){ + int m = l.c; + int n = l.size*l.size*l.n; + int k = l.h*l.w; + + float *a = net.input + i*m*k; + float *b = net.workspace; + float *c = l.weight_updates; + + im2col_cpu(l.delta + i*l.outputs, l.out_c, l.out_h, l.out_w, + l.size, l.stride, l.pad, b); + gemm_cpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if(net.delta){ + int m = l.c; + int n = l.h*l.w; + int k = l.size*l.size*l.n; + + float *a = l.weights; + float *b = net.workspace; + float *c = net.delta + i*n*m; + + gemm_cpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +} + +void update_deconvolutional_layer(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int size = l.size*l.size*l.c*l.n; + axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.n, momentum, l.bias_updates, 1); + + if(l.scales){ + axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.n, momentum, l.scale_updates, 1); + } + + axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(size, momentum, l.weight_updates, 1); +} + + + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/deconvolutional_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/deconvolutional_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..722a1a58feec4ef13dac2b811df98e3f9960d4ef --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/deconvolutional_layer.h @@ -0,0 +1,25 @@ +#ifndef DECONVOLUTIONAL_LAYER_H +#define DECONVOLUTIONAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +#ifdef GPU +void forward_deconvolutional_layer_gpu(layer l, network net); +void backward_deconvolutional_layer_gpu(layer l, network net); +void update_deconvolutional_layer_gpu(layer l, update_args a); +void push_deconvolutional_layer(layer l); +void pull_deconvolutional_layer(layer l); +#endif + +layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam); +void resize_deconvolutional_layer(layer *l, int h, int w); +void forward_deconvolutional_layer(const layer l, network net); +void update_deconvolutional_layer(layer l, update_args a); +void backward_deconvolutional_layer(layer l, network net); + +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/demo.c b/workloads/realworld/uvm_prefetch/darknet/src/demo.c new file mode 100644 index 0000000000000000000000000000000000000000..b89efb8dc4c044c0240b7442e39222405409a676 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/demo.c @@ -0,0 +1,349 @@ +#include "network.h" +#include "detection_layer.h" +#include "region_layer.h" +#include "cost_layer.h" +#include "utils.h" +#include "parser.h" +#include "box.h" +#include "image.h" +#include "demo.h" +#include + +#define DEMO 1 + +#ifdef OPENCV + +static char **demo_names; +static image **demo_alphabet; +static int demo_classes; + +static network *net; +static image buff [3]; +static image buff_letter[3]; +static int buff_index = 0; +static void * cap; +static float fps = 0; +static float demo_thresh = 0; +static float demo_hier = .5; +static int running = 0; + +static int demo_frame = 3; +static int demo_index = 0; +static float **predictions; +static float *avg; +static int demo_done = 0; +static int demo_total = 0; +double demo_time; + +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num); + +int size_network(network *net) +{ + int i; + int count = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + count += l.outputs; + } + } + return count; +} + +void remember_network(network *net) +{ + int i; + int count = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + memcpy(predictions[demo_index] + count, net->layers[i].output, sizeof(float) * l.outputs); + count += l.outputs; + } + } +} + +detection *avg_predictions(network *net, int *nboxes) +{ + int i, j; + int count = 0; + fill_cpu(demo_total, 0, avg, 1); + for(j = 0; j < demo_frame; ++j){ + axpy_cpu(demo_total, 1./demo_frame, predictions[j], 1, avg, 1); + } + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + memcpy(l.output, avg + count, sizeof(float) * l.outputs); + count += l.outputs; + } + } + detection *dets = get_network_boxes(net, buff[0].w, buff[0].h, demo_thresh, demo_hier, 0, 1, nboxes); + return dets; +} + +void *detect_in_thread(void *ptr) +{ + running = 1; + float nms = .4; + + layer l = net->layers[net->n-1]; + float *X = buff_letter[(buff_index+2)%3].data; + network_predict(net, X); + + /* + if(l.type == DETECTION){ + get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0); + } else */ + remember_network(net); + detection *dets = 0; + int nboxes = 0; + dets = avg_predictions(net, &nboxes); + + + /* + int i,j; + box zero = {0}; + int classes = l.classes; + for(i = 0; i < demo_detections; ++i){ + avg[i].objectness = 0; + avg[i].bbox = zero; + memset(avg[i].prob, 0, classes*sizeof(float)); + for(j = 0; j < demo_frame; ++j){ + axpy_cpu(classes, 1./demo_frame, dets[j][i].prob, 1, avg[i].prob, 1); + avg[i].objectness += dets[j][i].objectness * 1./demo_frame; + avg[i].bbox.x += dets[j][i].bbox.x * 1./demo_frame; + avg[i].bbox.y += dets[j][i].bbox.y * 1./demo_frame; + avg[i].bbox.w += dets[j][i].bbox.w * 1./demo_frame; + avg[i].bbox.h += dets[j][i].bbox.h * 1./demo_frame; + } + //copy_cpu(classes, dets[0][i].prob, 1, avg[i].prob, 1); + //avg[i].objectness = dets[0][i].objectness; + } + */ + + if (nms > 0) do_nms_obj(dets, nboxes, l.classes, nms); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.1f\n",fps); + printf("Objects:\n\n"); + image display = buff[(buff_index+2) % 3]; + draw_detections(display, dets, nboxes, demo_thresh, demo_names, demo_alphabet, demo_classes); + free_detections(dets, nboxes); + + demo_index = (demo_index + 1)%demo_frame; + running = 0; + return 0; +} + +void *fetch_in_thread(void *ptr) +{ + free_image(buff[buff_index]); + buff[buff_index] = get_image_from_stream(cap); + if(buff[buff_index].data == 0) { + demo_done = 1; + return 0; + } + letterbox_image_into(buff[buff_index], net->w, net->h, buff_letter[buff_index]); + return 0; +} + +void *display_in_thread(void *ptr) +{ + int c = show_image(buff[(buff_index + 1)%3], "Demo", 1); + if (c != -1) c = c%256; + if (c == 27) { + demo_done = 1; + return 0; + } else if (c == 82) { + demo_thresh += .02; + } else if (c == 84) { + demo_thresh -= .02; + if(demo_thresh <= .02) demo_thresh = .02; + } else if (c == 83) { + demo_hier += .02; + } else if (c == 81) { + demo_hier -= .02; + if(demo_hier <= .0) demo_hier = .0; + } + return 0; +} + +void *display_loop(void *ptr) +{ + while(1){ + display_in_thread(0); + } +} + +void *detect_loop(void *ptr) +{ + while(1){ + detect_in_thread(0); + } +} + +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) +{ + //demo_frame = avg_frames; + image **alphabet = load_alphabet(); + demo_names = names; + demo_alphabet = alphabet; + demo_classes = classes; + demo_thresh = thresh; + demo_hier = hier; + printf("Demo\n"); + net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + pthread_t detect_thread; + pthread_t fetch_thread; + + srand(2222222); + + int i; + demo_total = size_network(net); + predictions = calloc(demo_frame, sizeof(float*)); + for (i = 0; i < demo_frame; ++i){ + predictions[i] = calloc(demo_total, sizeof(float)); + } + avg = calloc(demo_total, sizeof(float)); + + if(filename){ + printf("video file: %s\n", filename); + cap = open_video_stream(filename, 0, 0, 0, 0); + }else{ + cap = open_video_stream(0, cam_index, w, h, frames); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + + buff[0] = get_image_from_stream(cap); + buff[1] = copy_image(buff[0]); + buff[2] = copy_image(buff[0]); + buff_letter[0] = letterbox_image(buff[0], net->w, net->h); + buff_letter[1] = letterbox_image(buff[0], net->w, net->h); + buff_letter[2] = letterbox_image(buff[0], net->w, net->h); + + int count = 0; + if(!prefix){ + make_window("Demo", 1352, 1013, fullscreen); + } + + demo_time = what_time_is_it_now(); + + while(!demo_done){ + buff_index = (buff_index + 1) %3; + if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); + if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); + if(!prefix){ + fps = 1./(what_time_is_it_now() - demo_time); + demo_time = what_time_is_it_now(); + display_in_thread(0); + }else{ + char name[256]; + sprintf(name, "%s_%08d", prefix, count); + save_image(buff[(buff_index + 1)%3], name); + } + pthread_join(fetch_thread, 0); + pthread_join(detect_thread, 0); + ++count; + } +} + +/* + void demo_compare(char *cfg1, char *weight1, char *cfg2, char *weight2, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) + { + demo_frame = avg_frames; + predictions = calloc(demo_frame, sizeof(float*)); + image **alphabet = load_alphabet(); + demo_names = names; + demo_alphabet = alphabet; + demo_classes = classes; + demo_thresh = thresh; + demo_hier = hier; + printf("Demo\n"); + net = load_network(cfg1, weight1, 0); + set_batch_network(net, 1); + pthread_t detect_thread; + pthread_t fetch_thread; + + srand(2222222); + + if(filename){ + printf("video file: %s\n", filename); + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + if(frames){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FPS, frames); + } + } + + if(!cap) error("Couldn't connect to webcam.\n"); + + layer l = net->layers[net->n-1]; + demo_detections = l.n*l.w*l.h; + int j; + + avg = (float *) calloc(l.outputs, sizeof(float)); + for(j = 0; j < demo_frame; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float)); + + boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box)); + probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *)); + for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes+1, sizeof(float)); + + buff[0] = get_image_from_stream(cap); + buff[1] = copy_image(buff[0]); + buff[2] = copy_image(buff[0]); + buff_letter[0] = letterbox_image(buff[0], net->w, net->h); + buff_letter[1] = letterbox_image(buff[0], net->w, net->h); + buff_letter[2] = letterbox_image(buff[0], net->w, net->h); + ipl = cvCreateImage(cvSize(buff[0].w,buff[0].h), IPL_DEPTH_8U, buff[0].c); + + int count = 0; + if(!prefix){ + cvNamedWindow("Demo", CV_WINDOW_NORMAL); + if(fullscreen){ + cvSetWindowProperty("Demo", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); + } else { + cvMoveWindow("Demo", 0, 0); + cvResizeWindow("Demo", 1352, 1013); + } + } + + demo_time = what_time_is_it_now(); + + while(!demo_done){ +buff_index = (buff_index + 1) %3; +if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); +if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); +if(!prefix){ + fps = 1./(what_time_is_it_now() - demo_time); + demo_time = what_time_is_it_now(); + display_in_thread(0); +}else{ + char name[256]; + sprintf(name, "%s_%08d", prefix, count); + save_image(buff[(buff_index + 1)%3], name); +} +pthread_join(fetch_thread, 0); +pthread_join(detect_thread, 0); +++count; +} +} +*/ +#else +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg, float hier, int w, int h, int frames, int fullscreen) +{ + fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); +} +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/demo.h b/workloads/realworld/uvm_prefetch/darknet/src/demo.h new file mode 100644 index 0000000000000000000000000000000000000000..86e46541d1a7473b22373b29bc6ff9cc281d4939 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/demo.h @@ -0,0 +1,6 @@ +#ifndef DEMO_H +#define DEMO_H + +#include "image.h" + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/detection_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/detection_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..565fa3c3f7d123736d65661d3be8ea91e26b3d5c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/detection_layer.c @@ -0,0 +1,275 @@ +#include "detection_layer.h" +#include "activations.h" +#include "softmax_layer.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +detection_layer make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore) +{ + detection_layer l = {0}; + l.type = DETECTION; + + l.n = n; + l.batch = batch; + l.inputs = inputs; + l.classes = classes; + l.coords = coords; + l.rescore = rescore; + l.side = side; + l.w = side; + l.h = side; + assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs); + l.cost = calloc(1, sizeof(float)); + l.outputs = l.inputs; + l.truths = l.side*l.side*(1+l.coords+l.classes); + l.output = calloc(batch*l.outputs, sizeof(float)); + l.delta = calloc(batch*l.outputs, sizeof(float)); + + l.forward = forward_detection_layer; + l.backward = backward_detection_layer; +#ifdef GPU + l.forward_gpu = forward_detection_layer_gpu; + l.backward_gpu = backward_detection_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "Detection Layer\n"); + srand(0); + + return l; +} + +void forward_detection_layer(const detection_layer l, network net) +{ + int locations = l.side*l.side; + int i,j; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + //if(l.reorg) reorg(l.output, l.w*l.h, size*l.n, l.batch, 1); + int b; + if (l.softmax){ + for(b = 0; b < l.batch; ++b){ + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + int offset = i*l.classes; + softmax(l.output + index + offset, l.classes, 1, 1, + l.output + index + offset); + } + } + } + if(net.train){ + float avg_iou = 0; + float avg_cat = 0; + float avg_allcat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + *(l.cost) = 0; + int size = l.inputs * l.batch; + memset(l.delta, 0, size * sizeof(float)); + for (b = 0; b < l.batch; ++b){ + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + int truth_index = (b*locations + i)*(1+l.coords+l.classes); + int is_obj = net.truth[truth_index]; + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + l.delta[p_index] = l.noobject_scale*(0 - l.output[p_index]); + *(l.cost) += l.noobject_scale*pow(l.output[p_index], 2); + avg_anyobj += l.output[p_index]; + } + + int best_index = -1; + float best_iou = 0; + float best_rmse = 20; + + if (!is_obj){ + continue; + } + + int class_index = index + i*l.classes; + for(j = 0; j < l.classes; ++j) { + l.delta[class_index+j] = l.class_scale * (net.truth[truth_index+1+j] - l.output[class_index+j]); + *(l.cost) += l.class_scale * pow(net.truth[truth_index+1+j] - l.output[class_index+j], 2); + if(net.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j]; + avg_allcat += l.output[class_index+j]; + } + + box truth = float_to_box(net.truth + truth_index + 1 + l.classes, 1); + truth.x /= l.side; + truth.y /= l.side; + + for(j = 0; j < l.n; ++j){ + int box_index = index + locations*(l.classes + l.n) + (i*l.n + j) * l.coords; + box out = float_to_box(l.output + box_index, 1); + out.x /= l.side; + out.y /= l.side; + + if (l.sqrt){ + out.w = out.w*out.w; + out.h = out.h*out.h; + } + + float iou = box_iou(out, truth); + //iou = 0; + float rmse = box_rmse(out, truth); + if(best_iou > 0 || iou > 0){ + if(iou > best_iou){ + best_iou = iou; + best_index = j; + } + }else{ + if(rmse < best_rmse){ + best_rmse = rmse; + best_index = j; + } + } + } + + if(l.forced){ + if(truth.w*truth.h < .1){ + best_index = 1; + }else{ + best_index = 0; + } + } + if(l.random && *(net.seen) < 64000){ + best_index = rand()%l.n; + } + + int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords; + int tbox_index = truth_index + 1 + l.classes; + + box out = float_to_box(l.output + box_index, 1); + out.x /= l.side; + out.y /= l.side; + if (l.sqrt) { + out.w = out.w*out.w; + out.h = out.h*out.h; + } + float iou = box_iou(out, truth); + + //printf("%d,", best_index); + int p_index = index + locations*l.classes + i*l.n + best_index; + *(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2); + *(l.cost) += l.object_scale * pow(1-l.output[p_index], 2); + avg_obj += l.output[p_index]; + l.delta[p_index] = l.object_scale * (1.-l.output[p_index]); + + if(l.rescore){ + l.delta[p_index] = l.object_scale * (iou - l.output[p_index]); + } + + l.delta[box_index+0] = l.coord_scale*(net.truth[tbox_index + 0] - l.output[box_index + 0]); + l.delta[box_index+1] = l.coord_scale*(net.truth[tbox_index + 1] - l.output[box_index + 1]); + l.delta[box_index+2] = l.coord_scale*(net.truth[tbox_index + 2] - l.output[box_index + 2]); + l.delta[box_index+3] = l.coord_scale*(net.truth[tbox_index + 3] - l.output[box_index + 3]); + if(l.sqrt){ + l.delta[box_index+2] = l.coord_scale*(sqrt(net.truth[tbox_index + 2]) - l.output[box_index + 2]); + l.delta[box_index+3] = l.coord_scale*(sqrt(net.truth[tbox_index + 3]) - l.output[box_index + 3]); + } + + *(l.cost) += pow(1-iou, 2); + avg_iou += iou; + ++count; + } + } + + if(0){ + float *costs = calloc(l.batch*locations*l.n, sizeof(float)); + for (b = 0; b < l.batch; ++b) { + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + costs[b*locations*l.n + i*l.n + j] = l.delta[p_index]*l.delta[p_index]; + } + } + } + int indexes[100]; + top_k(costs, l.batch*locations*l.n, 100, indexes); + float cutoff = costs[indexes[99]]; + for (b = 0; b < l.batch; ++b) { + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + if (l.delta[p_index]*l.delta[p_index] < cutoff) l.delta[p_index] = 0; + } + } + } + free(costs); + } + + + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + + + printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count); + //if(l.reorg) reorg(l.delta, l.w*l.h, size*l.n, l.batch, 0); + } +} + +void backward_detection_layer(const detection_layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +void get_detection_detections(layer l, int w, int h, float thresh, detection *dets) +{ + int i,j,n; + float *predictions = l.output; + //int per_cell = 5*num+classes; + for (i = 0; i < l.side*l.side; ++i){ + int row = i / l.side; + int col = i % l.side; + for(n = 0; n < l.n; ++n){ + int index = i*l.n + n; + int p_index = l.side*l.side*l.classes + i*l.n + n; + float scale = predictions[p_index]; + int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n)*4; + box b; + b.x = (predictions[box_index + 0] + col) / l.side * w; + b.y = (predictions[box_index + 1] + row) / l.side * h; + b.w = pow(predictions[box_index + 2], (l.sqrt?2:1)) * w; + b.h = pow(predictions[box_index + 3], (l.sqrt?2:1)) * h; + dets[index].bbox = b; + dets[index].objectness = scale; + for(j = 0; j < l.classes; ++j){ + int class_index = i*l.classes; + float prob = scale*predictions[class_index+j]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } + } +} + +#ifdef GPU + +void forward_detection_layer_gpu(const detection_layer l, network net) +{ + if(!net.train){ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + return; + } + + cuda_pull_array(net.input_gpu, net.input, l.batch*l.inputs); + forward_detection_layer(l, net); + cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +void backward_detection_layer_gpu(detection_layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); + //copy_gpu(l.batch*l.inputs, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/detection_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/detection_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1c818535700c770c7a5d9387534b199b58876198 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/detection_layer.h @@ -0,0 +1,18 @@ +#ifndef DETECTION_LAYER_H +#define DETECTION_LAYER_H + +#include "layer.h" +#include "network.h" + +typedef layer detection_layer; + +detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore); +void forward_detection_layer(const detection_layer l, network net); +void backward_detection_layer(const detection_layer l, network net); + +#ifdef GPU +void forward_detection_layer_gpu(const detection_layer l, network net); +void backward_detection_layer_gpu(detection_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/dropout_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/dropout_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..8fefa22caeddd174b2a7010274fadae854c742c1 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/dropout_layer.c @@ -0,0 +1,60 @@ +#include "dropout_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include +#include + +dropout_layer make_dropout_layer(int batch, int inputs, float probability) +{ + dropout_layer l = {0}; + l.type = DROPOUT; + l.probability = probability; + l.inputs = inputs; + l.outputs = inputs; + l.batch = batch; + l.rand = calloc(inputs*batch, sizeof(float)); + l.scale = 1./(1.-probability); + l.forward = forward_dropout_layer; + l.backward = backward_dropout_layer; + #ifdef GPU + l.forward_gpu = forward_dropout_layer_gpu; + l.backward_gpu = backward_dropout_layer_gpu; + l.rand_gpu = cuda_make_array(l.rand, inputs*batch); + #endif + fprintf(stderr, "dropout p = %.2f %4d -> %4d\n", probability, inputs, inputs); + return l; +} + +void resize_dropout_layer(dropout_layer *l, int inputs) +{ + l->rand = realloc(l->rand, l->inputs*l->batch*sizeof(float)); + #ifdef GPU + cuda_free(l->rand_gpu); + + l->rand_gpu = cuda_make_array(l->rand, inputs*l->batch); + #endif +} + +void forward_dropout_layer(dropout_layer l, network net) +{ + int i; + if (!net.train) return; + for(i = 0; i < l.batch * l.inputs; ++i){ + float r = rand_uniform(0, 1); + l.rand[i] = r; + if(r < l.probability) net.input[i] = 0; + else net.input[i] *= l.scale; + } +} + +void backward_dropout_layer(dropout_layer l, network net) +{ + int i; + if(!net.delta) return; + for(i = 0; i < l.batch * l.inputs; ++i){ + float r = l.rand[i]; + if(r < l.probability) net.delta[i] = 0; + else net.delta[i] *= l.scale; + } +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/dropout_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/dropout_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..01f94d4d7d10b732fb0e558089579e95128a70bd --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/dropout_layer.h @@ -0,0 +1,20 @@ +#ifndef DROPOUT_LAYER_H +#define DROPOUT_LAYER_H + +#include "layer.h" +#include "network.h" + +typedef layer dropout_layer; + +dropout_layer make_dropout_layer(int batch, int inputs, float probability); + +void forward_dropout_layer(dropout_layer l, network net); +void backward_dropout_layer(dropout_layer l, network net); +void resize_dropout_layer(dropout_layer *l, int inputs); + +#ifdef GPU +void forward_dropout_layer_gpu(dropout_layer l, network net); +void backward_dropout_layer_gpu(dropout_layer l, network net); + +#endif +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/dropout_layer_kernels.cu b/workloads/realworld/uvm_prefetch/darknet/src/dropout_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..30cd67a34f06e5b398e198f186a2ecefcf6df3dc --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/dropout_layer_kernels.cu @@ -0,0 +1,60 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "dropout_layer.h" +#include "cuda_dark.h" +#include "utils.h" +} + +__global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale; +} + +void forward_dropout_layer_gpu(dropout_layer layer, network net) +{ + if (!net.train) return; + int size = layer.inputs*layer.batch; + cuda_random(layer.rand_gpu, size); + /* + int i; + for(i = 0; i < size; ++i){ + layer.rand[i] = rand_uniform(); + } + cuda_push_array(layer.rand_gpu, layer.rand, size); + */ + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(net.input_gpu, size * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(layer.rand_gpu, size * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + yoloswag420blazeit360noscope<<>>(net.input_gpu, size, layer.rand_gpu, layer.probability, layer.scale); + check_error(cudaPeekAtLastError()); +} + +void backward_dropout_layer_gpu(dropout_layer layer, network net) +{ + if(!net.delta_gpu) return; + int size = layer.inputs*layer.batch; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(net.delta_gpu, size * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(layer.rand_gpu, size * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + yoloswag420blazeit360noscope<<>>(net.delta_gpu, size, layer.rand_gpu, layer.probability, layer.scale); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/uvm_prefetch/darknet/src/gemm.c b/workloads/realworld/uvm_prefetch/darknet/src/gemm.c new file mode 100644 index 0000000000000000000000000000000000000000..756ae595d7348fc2d343a48715b05ea882d6aa7c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/gemm.c @@ -0,0 +1,324 @@ +#include "gemm.h" +#include "utils.h" +#include "cuda_dark.h" +#include +#include +#include + +void gemm_bin(int M, int N, int K, float ALPHA, + char *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + char A_PART = A[i*lda+k]; + if(A_PART){ + for(j = 0; j < N; ++j){ + C[i*ldc+j] += B[k*ldb+j]; + } + } else { + for(j = 0; j < N; ++j){ + C[i*ldc+j] -= B[k*ldb+j]; + } + } + } + } +} + +float *random_matrix(int rows, int cols) +{ + int i; + float *m = calloc(rows*cols, sizeof(float)); + for(i = 0; i < rows*cols; ++i){ + m[i] = (float)rand()/RAND_MAX; + } + return m; +} + +void time_random_matrix(int TA, int TB, int m, int k, int n) +{ + float *a; + if(!TA) a = random_matrix(m,k); + else a = random_matrix(k,m); + int lda = (!TA)?k:m; + float *b; + if(!TB) b = random_matrix(k,n); + else b = random_matrix(n,k); + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + int i; + clock_t start = clock(), end; + for(i = 0; i<10; ++i){ + gemm_cpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); + } + end = clock(); + printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf ms\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); + free(a); + free(b); + free(c); +} + + +void gemm(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + gemm_cpu( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); +} + +void gemm_nn(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + register float A_PART = ALPHA*A[i*lda+k]; + for(j = 0; j < N; ++j){ + C[i*ldc+j] += A_PART*B[k*ldb+j]; + } + } + } +} + +void gemm_nt(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + register float sum = 0; + for(k = 0; k < K; ++k){ + sum += ALPHA*A[i*lda+k]*B[j*ldb + k]; + } + C[i*ldc+j] += sum; + } + } +} + +void gemm_tn(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + register float A_PART = ALPHA*A[k*lda+i]; + for(j = 0; j < N; ++j){ + C[i*ldc+j] += A_PART*B[k*ldb+j]; + } + } + } +} + +void gemm_tt(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + register float sum = 0; + for(k = 0; k < K; ++k){ + sum += ALPHA*A[i+k*lda]*B[k+j*ldb]; + } + C[i*ldc+j] += sum; + } + } +} + + +void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + //printf("cpu: %d %d %d %d %d %f %d %d %f %d\n",TA, TB, M, N, K, ALPHA, lda, ldb, BETA, ldc); + int i, j; + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + C[i*ldc + j] *= BETA; + } + } + if(!TA && !TB) + gemm_nn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else if(TA && !TB) + gemm_tn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else if(!TA && TB) + gemm_nt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else + gemm_tt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); +} + + +// #ifdef GPU + +// #include + +// void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, +// float *A_gpu, int lda, +// float *B_gpu, int ldb, +// float BETA, +// float *C_gpu, int ldc) +// { +// cublasHandle_t handle = blas_handle(); +// cudaError_t status = (cudaError_t) cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N), +// (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc); +// check_error(status); +// } + +// #include +// #include +// #include +// #include + +// void time_gpu_random_matrix(int TA, int TB, int m, int k, int n) +// { +// float *a; +// if(!TA) a = random_matrix(m,k); +// else a = random_matrix(k,m); +// int lda = (!TA)?k:m; +// float *b; +// if(!TB) b = random_matrix(k,n); +// else b = random_matrix(n,k); +// int ldb = (!TB)?n:k; + +// float *c = random_matrix(m,n); +// int i; +// clock_t start = clock(), end; +// for(i = 0; i<32; ++i){ +// gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); +// } +// end = clock(); +// printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); +// free(a); +// free(b); +// free(c); +// } + +// void time_gpu(int TA, int TB, int m, int k, int n) +// { +// int iter = 10; +// float *a = random_matrix(m,k); +// float *b = random_matrix(k,n); + +// int lda = (!TA)?k:m; +// int ldb = (!TB)?n:k; + +// float *c = random_matrix(m,n); + +// float *a_cl = cuda_make_array(a, m*k); +// float *b_cl = cuda_make_array(b, k*n); +// float *c_cl = cuda_make_array(c, m*n); + +// int i; +// clock_t start = clock(), end; +// for(i = 0; i +#include +#include +#include +#include + +#include "gemm.h" +#include "utils.h" +#include "cuda_dark.h" + +#include +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK_X 16 +#define DIM_THREAD_BLOCK_Y 16 + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +__global__ void gemm_kernel(float *a, float *b, float *c, int M, int K, int N, float alpha, float beta) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // Compute each thread's global row and column index + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + + + // Statically allocated shared memory + __shared__ float s_a[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + __shared__ float s_b[DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + // Accumulate in temporary variable + + float tmp = 0.0f; + if (row < M && col < N) { + + tmp = beta * c[row * N + col]; + + // Sweep tile across matrix + for (int i = 0; i < K; i += blockDim.x) { + int left = K - i; + + if ((i + threadIdx.x) < K) + s_a[threadIdx.y * blockDim.x + threadIdx.x] = a[row * K + i + threadIdx.x]; + + if ((i + threadIdx.y) < K) + s_b[threadIdx.y * blockDim.x + threadIdx.x] = b[(i + threadIdx.y) * N + col]; + + block.sync(); + + for (int k = 0; k < blockDim.x && k < left ; k++) { + tmp += alpha * s_a[threadIdx.y * blockDim.x + k] * s_b[k * blockDim.x + threadIdx.x]; + } + block.sync(); + } + c[row * N + col] = tmp; + } + block.sync(); +} + +void gemmCuda(float *A, float *B, float *C, int M, int N, int K, float alpha, float beta) +{ + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + dim3 grid((size_t)(ceil(((float)N) / ((float)block.y))), (size_t)(ceil(((float)M) / ((float)block.x)))); + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(A, M * K * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(B, K * N * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(C, M * N * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + gemm_kernel<<>>(A, B, C, M, K, N, alpha, beta); + check_error(cudaPeekAtLastError()); +} + +void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, + float *A_gpu, int lda, + float *B_gpu, int ldb, + float BETA, + float *C_gpu, int ldc) +{ + // printf("TA is %d, TB is %d, M is %d, N is %d, K is %d, lda is %d, ldb is %d, ldc is %d.\n", TA, TB, M, N, K, lda, ldb, ldc); + if (TA == 0 && TB == 0) { + gemmCuda(A_gpu, B_gpu, C_gpu, M, N, K, ALPHA, BETA); + } else { + cublasHandle_t handle = blas_handle(); + cudaError_t status = (cudaError_t) cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N), + (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc); + check_error(status); + } +} + + +void time_gpu_random_matrix(int TA, int TB, int m, int k, int n) +{ + float *a; + if(!TA) a = random_matrix(m,k); + else a = random_matrix(k,m); + int lda = (!TA)?k:m; + float *b; + if(!TB) b = random_matrix(k,n); + else b = random_matrix(n,k); + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + int i; + clock_t start = clock(), end; + for(i = 0; i<32; ++i){ + gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); + } + end = clock(); + printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); + free(a); + free(b); + free(c); +} + +void time_gpu(int TA, int TB, int m, int k, int n) +{ + int iter = 10; + float *a = random_matrix(m,k); + float *b = random_matrix(k,n); + + int lda = (!TA)?k:m; + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + + float *a_cl = cuda_make_array(a, m*k); + float *b_cl = cuda_make_array(b, k*n); + float *c_cl = cuda_make_array(c, m*n); + + int i; + clock_t start = clock(), end; + for(i = 0; i +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam) +{ + fprintf(stderr, "GRU Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = GRU; + l.steps = steps; + l.inputs = inputs; + + l.uz = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uz) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uz->batch = batch; + + l.wz = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wz) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wz->batch = batch; + + l.ur = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ur) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ur->batch = batch; + + l.wr = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wr) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wr->batch = batch; + + + + l.uh = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uh) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uh->batch = batch; + + l.wh = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wh) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wh->batch = batch; + + l.batch_normalize = batch_normalize; + + + l.outputs = outputs; + l.output = calloc(outputs*batch*steps, sizeof(float)); + l.delta = calloc(outputs*batch*steps, sizeof(float)); + l.state = calloc(outputs*batch, sizeof(float)); + l.prev_state = calloc(outputs*batch, sizeof(float)); + l.forgot_state = calloc(outputs*batch, sizeof(float)); + l.forgot_delta = calloc(outputs*batch, sizeof(float)); + + l.r_cpu = calloc(outputs*batch, sizeof(float)); + l.z_cpu = calloc(outputs*batch, sizeof(float)); + l.h_cpu = calloc(outputs*batch, sizeof(float)); + + l.forward = forward_gru_layer; + l.backward = backward_gru_layer; + l.update = update_gru_layer; + +#ifdef GPU + l.forward_gpu = forward_gru_layer_gpu; + l.backward_gpu = backward_gru_layer_gpu; + l.update_gpu = update_gru_layer_gpu; + + l.forgot_state_gpu = cuda_make_array(0, batch*outputs); + l.forgot_delta_gpu = cuda_make_array(0, batch*outputs); + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.state_gpu = cuda_make_array(0, batch*outputs); + l.output_gpu = cuda_make_array(0, batch*outputs*steps); + l.delta_gpu = cuda_make_array(0, batch*outputs*steps); + l.r_gpu = cuda_make_array(0, batch*outputs); + l.z_gpu = cuda_make_array(0, batch*outputs); + l.h_gpu = cuda_make_array(0, batch*outputs); + +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.uz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uz->out_c, l.uz->out_h, l.uz->out_w); + cudnnSetTensor4dDescriptor(l.uh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uh->out_c, l.uh->out_h, l.uh->out_w); + cudnnSetTensor4dDescriptor(l.ur->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ur->out_c, l.ur->out_h, l.ur->out_w); + cudnnSetTensor4dDescriptor(l.wz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wz->out_c, l.wz->out_h, l.wz->out_w); + cudnnSetTensor4dDescriptor(l.wh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wh->out_c, l.wh->out_h, l.wh->out_w); + cudnnSetTensor4dDescriptor(l.wr->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wr->out_c, l.wr->out_h, l.wr->out_w); +#endif +#endif + + return l; +} + +void update_gru_layer(layer l, update_args a) +{ + update_connected_layer(*(l.ur), a); + update_connected_layer(*(l.uz), a); + update_connected_layer(*(l.uh), a); + update_connected_layer(*(l.wr), a); + update_connected_layer(*(l.wz), a); + update_connected_layer(*(l.wh), a); +} + +void forward_gru_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + fill_cpu(l.outputs * l.batch * l.steps, 0, uz.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ur.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, uh.delta, 1); + + fill_cpu(l.outputs * l.batch * l.steps, 0, wz.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wr.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wh.delta, 1); + if(net.train) { + fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); + copy_cpu(l.outputs*l.batch, l.state, 1, l.prev_state, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input = l.state; + forward_connected_layer(wz, s); + forward_connected_layer(wr, s); + + s.input = net.input; + forward_connected_layer(uz, s); + forward_connected_layer(ur, s); + forward_connected_layer(uh, s); + + + copy_cpu(l.outputs*l.batch, uz.output, 1, l.z_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wz.output, 1, l.z_cpu, 1); + + copy_cpu(l.outputs*l.batch, ur.output, 1, l.r_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wr.output, 1, l.r_cpu, 1); + + activate_array(l.z_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.r_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.state, 1, l.forgot_state, 1); + mul_cpu(l.outputs*l.batch, l.r_cpu, 1, l.forgot_state, 1); + + s.input = l.forgot_state; + forward_connected_layer(wh, s); + + copy_cpu(l.outputs*l.batch, uh.output, 1, l.h_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wh.output, 1, l.h_cpu, 1); + + if(l.tanh){ + activate_array(l.h_cpu, l.outputs*l.batch, TANH); + } else { + activate_array(l.h_cpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_sum_cpu(l.state, l.h_cpu, l.z_cpu, l.outputs*l.batch, l.output); + + copy_cpu(l.outputs*l.batch, l.output, 1, l.state, 1); + + net.input += l.inputs*l.batch; + l.output += l.outputs*l.batch; + increment_layer(&uz, 1); + increment_layer(&ur, 1); + increment_layer(&uh, 1); + + increment_layer(&wz, 1); + increment_layer(&wr, 1); + increment_layer(&wh, 1); + } +} + +void backward_gru_layer(layer l, network net) +{ +} + +#ifdef GPU + +void pull_gru_layer(layer l) +{ +} + +void push_gru_layer(layer l) +{ +} + +void update_gru_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.ur), a); + update_connected_layer_gpu(*(l.uz), a); + update_connected_layer_gpu(*(l.uh), a); + update_connected_layer_gpu(*(l.wr), a); + update_connected_layer_gpu(*(l.wz), a); + update_connected_layer_gpu(*(l.wh), a); +} + +void forward_gru_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + fill_gpu(l.outputs * l.batch * l.steps, 0, uz.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ur.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, uh.delta_gpu, 1); + + fill_gpu(l.outputs * l.batch * l.steps, 0, wz.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wr.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wh.delta_gpu, 1); + if(net.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.prev_state_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(wz, s); + forward_connected_layer_gpu(wr, s); + + s.input_gpu = net.input_gpu; + forward_connected_layer_gpu(uz, s); + forward_connected_layer_gpu(ur, s); + forward_connected_layer_gpu(uh, s); + + copy_gpu(l.outputs*l.batch, uz.output_gpu, 1, l.z_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wz.output_gpu, 1, l.z_gpu, 1); + + copy_gpu(l.outputs*l.batch, ur.output_gpu, 1, l.r_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wr.output_gpu, 1, l.r_gpu, 1); + + activate_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.forgot_state_gpu, 1); + mul_gpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1); + + s.input_gpu = l.forgot_state_gpu; + forward_connected_layer_gpu(wh, s); + + copy_gpu(l.outputs*l.batch, uh.output_gpu, 1, l.h_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wh.output_gpu, 1, l.h_gpu, 1); + + if(l.tanh){ + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + } else { + activate_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_sum_gpu(l.state_gpu, l.h_gpu, l.z_gpu, l.outputs*l.batch, l.output_gpu); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.state_gpu, 1); + + net.input_gpu += l.inputs*l.batch; + l.output_gpu += l.outputs*l.batch; + increment_layer(&uz, 1); + increment_layer(&ur, 1); + increment_layer(&uh, 1); + + increment_layer(&wz, 1); + increment_layer(&wr, 1); + increment_layer(&wh, 1); + } +} + +void backward_gru_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + increment_layer(&uz, l.steps - 1); + increment_layer(&ur, l.steps - 1); + increment_layer(&uh, l.steps - 1); + + increment_layer(&wz, l.steps - 1); + increment_layer(&wr, l.steps - 1); + increment_layer(&wh, l.steps - 1); + + net.input_gpu += l.inputs*l.batch*(l.steps-1); + if(net.delta_gpu) net.delta_gpu += l.inputs*l.batch*(l.steps-1); + l.output_gpu += l.outputs*l.batch*(l.steps-1); + l.delta_gpu += l.outputs*l.batch*(l.steps-1); + float *end_state = l.output_gpu; + for (i = l.steps-1; i >= 0; --i) { + if(i != 0) copy_gpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + else copy_gpu(l.outputs*l.batch, l.prev_state_gpu, 1, l.state_gpu, 1); + float *prev_delta_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch; + + copy_gpu(l.outputs*l.batch, uz.output_gpu, 1, l.z_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wz.output_gpu, 1, l.z_gpu, 1); + + copy_gpu(l.outputs*l.batch, ur.output_gpu, 1, l.r_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wr.output_gpu, 1, l.r_gpu, 1); + + activate_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, uh.output_gpu, 1, l.h_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wh.output_gpu, 1, l.h_gpu, 1); + + if(l.tanh){ + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + } else { + activate_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_delta_gpu(l.state_gpu, l.h_gpu, l.z_gpu, prev_delta_gpu, uh.delta_gpu, uz.delta_gpu, l.outputs*l.batch, l.delta_gpu); + + if(l.tanh){ + gradient_array_gpu(l.h_gpu, l.outputs*l.batch, TANH, uh.delta_gpu); + } else { + gradient_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC, uh.delta_gpu); + } + + copy_gpu(l.outputs*l.batch, uh.delta_gpu, 1, wh.delta_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.forgot_state_gpu, 1); + mul_gpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1); + fill_gpu(l.outputs*l.batch, 0, l.forgot_delta_gpu, 1); + + s.input_gpu = l.forgot_state_gpu; + s.delta_gpu = l.forgot_delta_gpu; + + backward_connected_layer_gpu(wh, s); + if(prev_delta_gpu) mult_add_into_gpu(l.outputs*l.batch, l.forgot_delta_gpu, l.r_gpu, prev_delta_gpu); + mult_add_into_gpu(l.outputs*l.batch, l.forgot_delta_gpu, l.state_gpu, ur.delta_gpu); + + gradient_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC, ur.delta_gpu); + copy_gpu(l.outputs*l.batch, ur.delta_gpu, 1, wr.delta_gpu, 1); + + gradient_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC, uz.delta_gpu); + copy_gpu(l.outputs*l.batch, uz.delta_gpu, 1, wz.delta_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = prev_delta_gpu; + + backward_connected_layer_gpu(wr, s); + backward_connected_layer_gpu(wz, s); + + s.input_gpu = net.input_gpu; + s.delta_gpu = net.delta_gpu; + + backward_connected_layer_gpu(uh, s); + backward_connected_layer_gpu(ur, s); + backward_connected_layer_gpu(uz, s); + + + net.input_gpu -= l.inputs*l.batch; + if(net.delta_gpu) net.delta_gpu -= l.inputs*l.batch; + l.output_gpu -= l.outputs*l.batch; + l.delta_gpu -= l.outputs*l.batch; + increment_layer(&uz, -1); + increment_layer(&ur, -1); + increment_layer(&uh, -1); + + increment_layer(&wz, -1); + increment_layer(&wr, -1); + increment_layer(&wh, -1); + } + copy_gpu(l.outputs*l.batch, end_state, 1, l.state_gpu, 1); +} +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/gru_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/gru_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9067942e9499d53c8d54f7728d64a5030200f4de --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/gru_layer.h @@ -0,0 +1,24 @@ + +#ifndef GRU_LAYER_H +#define GRU_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam); + +void forward_gru_layer(layer l, network state); +void backward_gru_layer(layer l, network state); +void update_gru_layer(layer l, update_args a); + +#ifdef GPU +void forward_gru_layer_gpu(layer l, network state); +void backward_gru_layer_gpu(layer l, network state); +void update_gru_layer_gpu(layer l, update_args a); +void push_gru_layer(layer l); +void pull_gru_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/im2col.c b/workloads/realworld/uvm_prefetch/darknet/src/im2col.c new file mode 100644 index 0000000000000000000000000000000000000000..69ec98a9d12b2e21a3859611ad709d62fc80dcf3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/im2col.c @@ -0,0 +1,40 @@ +#include "im2col.h" +#include +float im2col_get_pixel(float *im, int height, int width, int channels, + int row, int col, int channel, int pad) +{ + row -= pad; + col -= pad; + + if (row < 0 || col < 0 || + row >= height || col >= width) return 0; + return im[col + width*(row + height*channel)]; +} + +//From Berkeley Vision's Caffe! +//https://github.com/BVLC/caffe/blob/master/LICENSE +void im2col_cpu(float* data_im, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_col) +{ + int c,h,w; + int height_col = (height + 2*pad - ksize) / stride + 1; + int width_col = (width + 2*pad - ksize) / stride + 1; + + int channels_col = channels * ksize * ksize; + for (c = 0; c < channels_col; ++c) { + int w_offset = c % ksize; + int h_offset = (c / ksize) % ksize; + int c_im = c / ksize / ksize; + for (h = 0; h < height_col; ++h) { + for (w = 0; w < width_col; ++w) { + int im_row = h_offset + h * stride; + int im_col = w_offset + w * stride; + int col_index = (c * height_col + h) * width_col + w; + data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, + im_row, im_col, c_im, pad); + } + } + } +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/im2col.h b/workloads/realworld/uvm_prefetch/darknet/src/im2col.h new file mode 100644 index 0000000000000000000000000000000000000000..02c4247fad9b8428a8e89fc8caec0b5b6ba5b36a --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/im2col.h @@ -0,0 +1,15 @@ +#ifndef IM2COL_H +#define IM2COL_H + +void im2col_cpu(float* data_im, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_col); + +#ifdef GPU + +void im2col_gpu(float *im, + int channels, int height, int width, + int ksize, int stride, int pad,float *data_col); + +#endif +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/im2col_kernels.cu b/workloads/realworld/uvm_prefetch/darknet/src/im2col_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..660806208adf57bac0afe8b026de3e97e57cd250 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/im2col_kernels.cu @@ -0,0 +1,62 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "im2col.h" +#include "cuda_dark.h" +} + +// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu +// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE + +__global__ void im2col_gpu_kernel(const int n, const float* data_im, + const int height, const int width, const int ksize, + const int pad, + const int stride, + const int height_col, const int width_col, + float *data_col) { + int index = blockIdx.x*blockDim.x+threadIdx.x; + for(; index < n; index += blockDim.x*gridDim.x){ + int w_out = index % width_col; + int h_index = index / width_col; + int h_out = h_index % height_col; + int channel_in = h_index / height_col; + int channel_out = channel_in * ksize * ksize; + int h_in = h_out * stride - pad; + int w_in = w_out * stride - pad; + float* data_col_ptr = data_col; + data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out; + const float* data_im_ptr = data_im; + data_im_ptr += (channel_in * height + h_in) * width + w_in; + for (int i = 0; i < ksize; ++i) { + for (int j = 0; j < ksize; ++j) { + int h = h_in + i; + int w = w_in + j; + + *data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ? + data_im_ptr[i * width + j] : 0; + + //*data_col_ptr = data_im_ptr[ii * width + jj]; + + data_col_ptr += height_col * width_col; + } + } + } +} + +void im2col_gpu(float *im, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_col){ + // We are going to launch channels * height_col * width_col kernels, each + // kernel responsible for copying a single-channel grid. + int height_col = (height + 2 * pad - ksize) / stride + 1; + int width_col = (width + 2 * pad - ksize) / stride + 1; + int num_kernels = channels * height_col * width_col; + im2col_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, + BLOCK>>>( + num_kernels, im, height, width, ksize, pad, + stride, height_col, + width_col, data_col); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/uvm_prefetch/darknet/src/image.c b/workloads/realworld/uvm_prefetch/darknet/src/image.c new file mode 100644 index 0000000000000000000000000000000000000000..3edf6d1045f4637d7bb108440302fd36d5ef9a18 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/image.c @@ -0,0 +1,1467 @@ +#include "image.h" +#include "utils.h" +#include "blas.h" +#include "cuda_dark.h" +#include +#include + +#define STB_IMAGE_IMPLEMENTATION +#include "stb_image.h" +#define STB_IMAGE_WRITE_IMPLEMENTATION +#include "stb_image_write.h" + +int windows = 0; + +float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} }; + +float get_color(int c, int x, int max) +{ + float ratio = ((float)x/max)*5; + int i = floor(ratio); + int j = ceil(ratio); + ratio -= i; + float r = (1-ratio) * colors[i][c] + ratio*colors[j][c]; + //printf("%f\n", r); + return r; +} + +image mask_to_rgb(image mask) +{ + int n = mask.c; + image im = make_image(mask.w, mask.h, 3); + int i, j; + for(j = 0; j < n; ++j){ + int offset = j*123457 % n; + float red = get_color(2,offset,n); + float green = get_color(1,offset,n); + float blue = get_color(0,offset,n); + for(i = 0; i < im.w*im.h; ++i){ + im.data[i + 0*im.w*im.h] += mask.data[j*im.h*im.w + i]*red; + im.data[i + 1*im.w*im.h] += mask.data[j*im.h*im.w + i]*green; + im.data[i + 2*im.w*im.h] += mask.data[j*im.h*im.w + i]*blue; + } + } + return im; +} + +static float get_pixel(image m, int x, int y, int c) +{ + assert(x < m.w && y < m.h && c < m.c); + return m.data[c*m.h*m.w + y*m.w + x]; +} +static float get_pixel_extend(image m, int x, int y, int c) +{ + if(x < 0 || x >= m.w || y < 0 || y >= m.h) return 0; + /* + if(x < 0) x = 0; + if(x >= m.w) x = m.w-1; + if(y < 0) y = 0; + if(y >= m.h) y = m.h-1; + */ + if(c < 0 || c >= m.c) return 0; + return get_pixel(m, x, y, c); +} +static void set_pixel(image m, int x, int y, int c, float val) +{ + if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return; + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] = val; +} +static void add_pixel(image m, int x, int y, int c, float val) +{ + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] += val; +} + +static float bilinear_interpolate(image im, float x, float y, int c) +{ + int ix = (int) floorf(x); + int iy = (int) floorf(y); + + float dx = x - ix; + float dy = y - iy; + + float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) + + dy * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + + (1-dy) * dx * get_pixel_extend(im, ix+1, iy, c) + + dy * dx * get_pixel_extend(im, ix+1, iy+1, c); + return val; +} + + +void composite_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float val = get_pixel(source, x, y, k); + float val2 = get_pixel_extend(dest, dx+x, dy+y, k); + set_pixel(dest, dx+x, dy+y, k, val * val2); + } + } + } +} + +image border_image(image a, int border) +{ + image b = make_image(a.w + 2*border, a.h + 2*border, a.c); + int x,y,k; + for(k = 0; k < b.c; ++k){ + for(y = 0; y < b.h; ++y){ + for(x = 0; x < b.w; ++x){ + float val = get_pixel_extend(a, x - border, y - border, k); + if(x - border < 0 || x - border >= a.w || y - border < 0 || y - border >= a.h) val = 1; + set_pixel(b, x, y, k, val); + } + } + } + return b; +} + +image tile_images(image a, image b, int dx) +{ + if(a.w == 0) return copy_image(b); + image c = make_image(a.w + b.w + dx, (a.h > b.h) ? a.h : b.h, (a.c > b.c) ? a.c : b.c); + fill_cpu(c.w*c.h*c.c, 1, c.data, 1); + embed_image(a, c, 0, 0); + composite_image(b, c, a.w + dx, 0); + return c; +} + +image get_label(image **characters, char *string, int size) +{ + size = size/10; + if(size > 7) size = 7; + image label = make_empty_image(0,0,0); + while(*string){ + image l = characters[size][(int)*string]; + image n = tile_images(label, l, -size - 1 + (size+1)/2); + free_image(label); + label = n; + ++string; + } + image b = border_image(label, label.h*.25); + free_image(label); + return b; +} + +void draw_label(image a, int r, int c, image label, const float *rgb) +{ + int w = label.w; + int h = label.h; + if (r - h >= 0) r = r - h; + + int i, j, k; + for(j = 0; j < h && j + r < a.h; ++j){ + for(i = 0; i < w && i + c < a.w; ++i){ + for(k = 0; k < label.c; ++k){ + float val = get_pixel(label, i, j, k); + set_pixel(a, i+c, j+r, k, rgb[k] * val); + } + } + } +} + +void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b) +{ + //normalize_image(a); + int i; + if(x1 < 0) x1 = 0; + if(x1 >= a.w) x1 = a.w-1; + if(x2 < 0) x2 = 0; + if(x2 >= a.w) x2 = a.w-1; + + if(y1 < 0) y1 = 0; + if(y1 >= a.h) y1 = a.h-1; + if(y2 < 0) y2 = 0; + if(y2 >= a.h) y2 = a.h-1; + + for(i = x1; i <= x2; ++i){ + a.data[i + y1*a.w + 0*a.w*a.h] = r; + a.data[i + y2*a.w + 0*a.w*a.h] = r; + + a.data[i + y1*a.w + 1*a.w*a.h] = g; + a.data[i + y2*a.w + 1*a.w*a.h] = g; + + a.data[i + y1*a.w + 2*a.w*a.h] = b; + a.data[i + y2*a.w + 2*a.w*a.h] = b; + } + for(i = y1; i <= y2; ++i){ + a.data[x1 + i*a.w + 0*a.w*a.h] = r; + a.data[x2 + i*a.w + 0*a.w*a.h] = r; + + a.data[x1 + i*a.w + 1*a.w*a.h] = g; + a.data[x2 + i*a.w + 1*a.w*a.h] = g; + + a.data[x1 + i*a.w + 2*a.w*a.h] = b; + a.data[x2 + i*a.w + 2*a.w*a.h] = b; + } +} + +void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b) +{ + int i; + for(i = 0; i < w; ++i){ + draw_box(a, x1+i, y1+i, x2-i, y2-i, r, g, b); + } +} + +void draw_bbox(image a, box bbox, int w, float r, float g, float b) +{ + int left = (bbox.x-bbox.w/2)*a.w; + int right = (bbox.x+bbox.w/2)*a.w; + int top = (bbox.y-bbox.h/2)*a.h; + int bot = (bbox.y+bbox.h/2)*a.h; + + int i; + for(i = 0; i < w; ++i){ + draw_box(a, left+i, top+i, right-i, bot-i, r, g, b); + } +} + +image **load_alphabet() +{ + char *value = getenv("UVMAsyncBench_BASE"); + int i, j; + const int nsize = 8; + image **alphabets = calloc(nsize, sizeof(image)); + for(j = 0; j < nsize; ++j){ + alphabets[j] = calloc(128, sizeof(image)); + for(i = 32; i < 127; ++i){ + char buff[256]; + sprintf(buff, "%s/workloads/realworld/standard/darknet/data/labels/%d_%d.png", value, i, j); + alphabets[j][i] = load_image_color(buff, 0, 0); + } + } + return alphabets; +} + +void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes) +{ + int i,j; + + for(i = 0; i < num; ++i){ + char labelstr[4096] = {0}; + int class = -1; + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j] > thresh){ + if (class < 0) { + strcat(labelstr, names[j]); + class = j; + } else { + strcat(labelstr, ", "); + strcat(labelstr, names[j]); + } + printf("%s: %.0f%%\n", names[j], dets[i].prob[j]*100); + } + } + if(class >= 0){ + int width = im.h * .006; + + /* + if(0){ + width = pow(prob, 1./2.)*10+1; + alphabet = 0; + } + */ + + //printf("%d %s: %.0f%%\n", i, names[class], prob*100); + int offset = class*123457 % classes; + float red = get_color(2,offset,classes); + float green = get_color(1,offset,classes); + float blue = get_color(0,offset,classes); + float rgb[3]; + + //width = prob*20+2; + + rgb[0] = red; + rgb[1] = green; + rgb[2] = blue; + box b = dets[i].bbox; + //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + + int left = (b.x-b.w/2.)*im.w; + int right = (b.x+b.w/2.)*im.w; + int top = (b.y-b.h/2.)*im.h; + int bot = (b.y+b.h/2.)*im.h; + + if(left < 0) left = 0; + if(right > im.w-1) right = im.w-1; + if(top < 0) top = 0; + if(bot > im.h-1) bot = im.h-1; + + draw_box_width(im, left, top, right, bot, width, red, green, blue); + if (alphabet) { + image label = get_label(alphabet, labelstr, (im.h*.03)); + draw_label(im, top + width, left, label, rgb); + free_image(label); + } + if (dets[i].mask){ + image mask = float_to_image(14, 14, 1, dets[i].mask); + image resized_mask = resize_image(mask, b.w*im.w, b.h*im.h); + image tmask = threshold_image(resized_mask, .5); + embed_image(tmask, im, left, top); + free_image(mask); + free_image(resized_mask); + free_image(tmask); + } + } + } +} + +void transpose_image(image im) +{ + assert(im.w == im.h); + int n, m; + int c; + for(c = 0; c < im.c; ++c){ + for(n = 0; n < im.w-1; ++n){ + for(m = n + 1; m < im.w; ++m){ + float swap = im.data[m + im.w*(n + im.h*c)]; + im.data[m + im.w*(n + im.h*c)] = im.data[n + im.w*(m + im.h*c)]; + im.data[n + im.w*(m + im.h*c)] = swap; + } + } + } +} + +void rotate_image_cw(image im, int times) +{ + assert(im.w == im.h); + times = (times + 400) % 4; + int i, x, y, c; + int n = im.w; + for(i = 0; i < times; ++i){ + for(c = 0; c < im.c; ++c){ + for(x = 0; x < n/2; ++x){ + for(y = 0; y < (n-1)/2 + 1; ++y){ + float temp = im.data[y + im.w*(x + im.h*c)]; + im.data[y + im.w*(x + im.h*c)] = im.data[n-1-x + im.w*(y + im.h*c)]; + im.data[n-1-x + im.w*(y + im.h*c)] = im.data[n-1-y + im.w*(n-1-x + im.h*c)]; + im.data[n-1-y + im.w*(n-1-x + im.h*c)] = im.data[x + im.w*(n-1-y + im.h*c)]; + im.data[x + im.w*(n-1-y + im.h*c)] = temp; + } + } + } + } +} + +void flip_image(image a) +{ + int i,j,k; + for(k = 0; k < a.c; ++k){ + for(i = 0; i < a.h; ++i){ + for(j = 0; j < a.w/2; ++j){ + int index = j + a.w*(i + a.h*(k)); + int flip = (a.w - j - 1) + a.w*(i + a.h*(k)); + float swap = a.data[flip]; + a.data[flip] = a.data[index]; + a.data[index] = swap; + } + } + } +} + +image image_distance(image a, image b) +{ + int i,j; + image dist = make_image(a.w, a.h, 1); + for(i = 0; i < a.c; ++i){ + for(j = 0; j < a.h*a.w; ++j){ + dist.data[j] += pow(a.data[i*a.h*a.w+j]-b.data[i*a.h*a.w+j],2); + } + } + for(j = 0; j < a.h*a.w; ++j){ + dist.data[j] = sqrt(dist.data[j]); + } + return dist; +} + +void ghost_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + float max_dist = sqrt((-source.w/2. + .5)*(-source.w/2. + .5)); + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float dist = sqrt((x - source.w/2. + .5)*(x - source.w/2. + .5) + (y - source.h/2. + .5)*(y - source.h/2. + .5)); + float alpha = (1 - dist/max_dist); + if(alpha < 0) alpha = 0; + float v1 = get_pixel(source, x,y,k); + float v2 = get_pixel(dest, dx+x,dy+y,k); + float val = alpha*v1 + (1-alpha)*v2; + set_pixel(dest, dx+x, dy+y, k, val); + } + } + } +} + +void blocky_image(image im, int s) +{ + int i,j,k; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)]; + } + } + } +} + +void censor_image(image im, int dx, int dy, int w, int h) +{ + int i,j,k; + int s = 32; + if(dx < 0) dx = 0; + if(dy < 0) dy = 0; + + for(k = 0; k < im.c; ++k){ + for(j = dy; j < dy + h && j < im.h; ++j){ + for(i = dx; i < dx + w && i < im.w; ++i){ + im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)]; + //im.data[i + j*im.w + k*im.w*im.h] = 0; + } + } + } +} + +void embed_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float val = get_pixel(source, x,y,k); + set_pixel(dest, dx+x, dy+y, k, val); + } + } + } +} + +image collapse_image_layers(image source, int border) +{ + int h = source.h; + h = (h+border)*source.c - border; + image dest = make_image(source.w, h, 1); + int i; + for(i = 0; i < source.c; ++i){ + image layer = get_image_layer(source, i); + int h_offset = i*(source.h+border); + embed_image(layer, dest, 0, h_offset); + free_image(layer); + } + return dest; +} + +void constrain_image(image im) +{ + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + if(im.data[i] < 0) im.data[i] = 0; + if(im.data[i] > 1) im.data[i] = 1; + } +} + +void normalize_image(image p) +{ + int i; + float min = 9999999; + float max = -999999; + + for(i = 0; i < p.h*p.w*p.c; ++i){ + float v = p.data[i]; + if(v < min) min = v; + if(v > max) max = v; + } + if(max - min < .000000001){ + min = 0; + max = 1; + } + for(i = 0; i < p.c*p.w*p.h; ++i){ + p.data[i] = (p.data[i] - min)/(max-min); + } +} + +void normalize_image2(image p) +{ + float *min = calloc(p.c, sizeof(float)); + float *max = calloc(p.c, sizeof(float)); + int i,j; + for(i = 0; i < p.c; ++i) min[i] = max[i] = p.data[i*p.h*p.w]; + + for(j = 0; j < p.c; ++j){ + for(i = 0; i < p.h*p.w; ++i){ + float v = p.data[i+j*p.h*p.w]; + if(v < min[j]) min[j] = v; + if(v > max[j]) max[j] = v; + } + } + for(i = 0; i < p.c; ++i){ + if(max[i] - min[i] < .000000001){ + min[i] = 0; + max[i] = 1; + } + } + for(j = 0; j < p.c; ++j){ + for(i = 0; i < p.w*p.h; ++i){ + p.data[i+j*p.h*p.w] = (p.data[i+j*p.h*p.w] - min[j])/(max[j]-min[j]); + } + } + free(min); + free(max); +} + +void copy_image_into(image src, image dest) +{ + memcpy(dest.data, src.data, src.h*src.w*src.c*sizeof(float)); +} + +image copy_image(image p) +{ + image copy = p; + copy.data = calloc(p.h*p.w*p.c, sizeof(float)); + memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float)); + return copy; +} + +void rgbgr_image(image im) +{ + int i; + for(i = 0; i < im.w*im.h; ++i){ + float swap = im.data[i]; + im.data[i] = im.data[i+im.w*im.h*2]; + im.data[i+im.w*im.h*2] = swap; + } +} + +int show_image(image p, const char *name, int ms) +{ +#ifdef OPENCV + int c = show_image_cv(p, name, ms); + return c; +#else + fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name); + save_image(p, name); + return -1; +#endif +} + +void save_image_options(image im, const char *name, IMTYPE f, int quality) +{ + char buff[256]; + //sprintf(buff, "%s (%d)", name, windows); + if(f == PNG) sprintf(buff, "%s.png", name); + else if (f == BMP) sprintf(buff, "%s.bmp", name); + else if (f == TGA) sprintf(buff, "%s.tga", name); + else if (f == JPG) sprintf(buff, "%s.jpg", name); + else sprintf(buff, "%s.png", name); + unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char)); + int i,k; + for(k = 0; k < im.c; ++k){ + for(i = 0; i < im.w*im.h; ++i){ + data[i*im.c+k] = (unsigned char) (255*im.data[i + k*im.w*im.h]); + } + } + int success = 0; + if(f == PNG) success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c); + else if (f == BMP) success = stbi_write_bmp(buff, im.w, im.h, im.c, data); + else if (f == TGA) success = stbi_write_tga(buff, im.w, im.h, im.c, data); + else if (f == JPG) success = stbi_write_jpg(buff, im.w, im.h, im.c, data, quality); + free(data); + if(!success) fprintf(stderr, "Failed to write image %s\n", buff); +} + +void save_image(image im, const char *name) +{ + save_image_options(im, name, JPG, 80); +} + +void show_image_layers(image p, char *name) +{ + int i; + char buff[256]; + for(i = 0; i < p.c; ++i){ + sprintf(buff, "%s - Layer %d", name, i); + image layer = get_image_layer(p, i); + show_image(layer, buff, 1); + free_image(layer); + } +} + +void show_image_collapsed(image p, char *name) +{ + image c = collapse_image_layers(p, 1); + show_image(c, name, 1); + free_image(c); +} + +image make_empty_image(int w, int h, int c) +{ + image out; + out.data = 0; + out.h = h; + out.w = w; + out.c = c; + return out; +} + +image make_image(int w, int h, int c) +{ + image out = make_empty_image(w,h,c); + out.data = calloc(h*w*c, sizeof(float)); + return out; +} + +image make_random_image(int w, int h, int c) +{ + image out = make_empty_image(w,h,c); + out.data = calloc(h*w*c, sizeof(float)); + int i; + for(i = 0; i < w*h*c; ++i){ + out.data[i] = (rand_normal() * .25) + .5; + } + return out; +} + +image float_to_image(int w, int h, int c, float *data) +{ + image out = make_empty_image(w,h,c); + out.data = data; + return out; +} + +void place_image(image im, int w, int h, int dx, int dy, image canvas) +{ + int x, y, c; + for(c = 0; c < im.c; ++c){ + for(y = 0; y < h; ++y){ + for(x = 0; x < w; ++x){ + float rx = ((float)x / w) * im.w; + float ry = ((float)y / h) * im.h; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(canvas, x + dx, y + dy, c, val); + } + } + } +} + +image center_crop_image(image im, int w, int h) +{ + int m = (im.w < im.h) ? im.w : im.h; + image c = crop_image(im, (im.w - m) / 2, (im.h - m)/2, m, m); + image r = resize_image(c, w, h); + free_image(c); + return r; +} + +image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect) +{ + int x, y, c; + float cx = im.w/2.; + float cy = im.h/2.; + image rot = make_image(w, h, im.c); + for(c = 0; c < im.c; ++c){ + for(y = 0; y < h; ++y){ + for(x = 0; x < w; ++x){ + float rx = cos(rad)*((x - w/2.)/s*aspect + dx/s*aspect) - sin(rad)*((y - h/2.)/s + dy/s) + cx; + float ry = sin(rad)*((x - w/2.)/s*aspect + dx/s*aspect) + cos(rad)*((y - h/2.)/s + dy/s) + cy; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(rot, x, y, c, val); + } + } + } + return rot; +} + +image rotate_image(image im, float rad) +{ + int x, y, c; + float cx = im.w/2.; + float cy = im.h/2.; + image rot = make_image(im.w, im.h, im.c); + for(c = 0; c < im.c; ++c){ + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx; + float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(rot, x, y, c, val); + } + } + } + return rot; +} + +void fill_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] = s; +} + +void translate_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s; +} + +void scale_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s; +} + +image crop_image(image im, int dx, int dy, int w, int h) +{ + image cropped = make_image(w, h, im.c); + int i, j, k; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int r = j + dy; + int c = i + dx; + float val = 0; + r = constrain_int(r, 0, im.h-1); + c = constrain_int(c, 0, im.w-1); + val = get_pixel(im, c, r, k); + set_pixel(cropped, i, j, k, val); + } + } + } + return cropped; +} + +int best_3d_shift_r(image a, image b, int min, int max) +{ + if(min == max) return min; + int mid = floor((min + max) / 2.); + image c1 = crop_image(b, 0, mid, b.w, b.h); + image c2 = crop_image(b, 0, mid+1, b.w, b.h); + float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 10); + float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 10); + free_image(c1); + free_image(c2); + if(d1 < d2) return best_3d_shift_r(a, b, min, mid); + else return best_3d_shift_r(a, b, mid+1, max); +} + +int best_3d_shift(image a, image b, int min, int max) +{ + int i; + int best = 0; + float best_distance = FLT_MAX; + for(i = min; i <= max; i += 2){ + image c = crop_image(b, 0, i, b.w, b.h); + float d = dist_array(c.data, a.data, a.w*a.h*a.c, 100); + if(d < best_distance){ + best_distance = d; + best = i; + } + printf("%d %f\n", i, d); + free_image(c); + } + return best; +} + +void composite_3d(char *f1, char *f2, char *out, int delta) +{ + if(!out) out = "out"; + image a = load_image(f1, 0,0,0); + image b = load_image(f2, 0,0,0); + int shift = best_3d_shift_r(a, b, -a.h/100, a.h/100); + + image c1 = crop_image(b, 10, shift, b.w, b.h); + float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 100); + image c2 = crop_image(b, -10, shift, b.w, b.h); + float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 100); + + if(d2 < d1 && 0){ + image swap = a; + a = b; + b = swap; + shift = -shift; + printf("swapped, %d\n", shift); + } + else{ + printf("%d\n", shift); + } + + image c = crop_image(b, delta, shift, a.w, a.h); + int i; + for(i = 0; i < c.w*c.h; ++i){ + c.data[i] = a.data[i]; + } + save_image(c, out); +} + +void letterbox_image_into(image im, int w, int h, image boxed) +{ + int new_w = im.w; + int new_h = im.h; + if (((float)w/im.w) < ((float)h/im.h)) { + new_w = w; + new_h = (im.h * w)/im.w; + } else { + new_h = h; + new_w = (im.w * h)/im.h; + } + image resized = resize_image(im, new_w, new_h); + embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); + free_image(resized); +} + +image letterbox_image(image im, int w, int h) +{ + int new_w = im.w; + int new_h = im.h; + if (((float)w/im.w) < ((float)h/im.h)) { + new_w = w; + new_h = (im.h * w)/im.w; + } else { + new_h = h; + new_w = (im.w * h)/im.h; + } + image resized = resize_image(im, new_w, new_h); + image boxed = make_image(w, h, im.c); + fill_image(boxed, .5); + //int i; + //for(i = 0; i < boxed.w*boxed.h*boxed.c; ++i) boxed.data[i] = 0; + embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); + free_image(resized); + return boxed; +} + +image resize_max(image im, int max) +{ + int w = im.w; + int h = im.h; + if(w > h){ + h = (h * max) / w; + w = max; + } else { + w = (w * max) / h; + h = max; + } + if(w == im.w && h == im.h) return im; + image resized = resize_image(im, w, h); + return resized; +} + +image resize_min(image im, int min) +{ + int w = im.w; + int h = im.h; + if(w < h){ + h = (h * min) / w; + w = min; + } else { + w = (w * min) / h; + h = min; + } + if(w == im.w && h == im.h) return im; + image resized = resize_image(im, w, h); + return resized; +} + +image random_crop_image(image im, int w, int h) +{ + int dx = rand_int(0, im.w - w); + int dy = rand_int(0, im.h - h); + image crop = crop_image(im, dx, dy, w, h); + return crop; +} + +augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h) +{ + augment_args a = {0}; + aspect = rand_scale(aspect); + int r = rand_int(low, high); + int min = (im.h < im.w*aspect) ? im.h : im.w*aspect; + float scale = (float)r / min; + + float rad = rand_uniform(-angle, angle) * TWO_PI / 360.; + + float dx = (im.w*scale/aspect - w) / 2.; + float dy = (im.h*scale - w) / 2.; + //if(dx < 0) dx = 0; + //if(dy < 0) dy = 0; + dx = rand_uniform(-dx, dx); + dy = rand_uniform(-dy, dy); + + a.rad = rad; + a.scale = scale; + a.w = w; + a.h = h; + a.dx = dx; + a.dy = dy; + a.aspect = aspect; + return a; +} + +image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h) +{ + augment_args a = random_augment_args(im, angle, aspect, low, high, w, h); + image crop = rotate_crop_image(im, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + return crop; +} + +float three_way_max(float a, float b, float c) +{ + return (a > b) ? ( (a > c) ? a : c) : ( (b > c) ? b : c) ; +} + +float three_way_min(float a, float b, float c) +{ + return (a < b) ? ( (a < c) ? a : c) : ( (b < c) ? b : c) ; +} + +void yuv_to_rgb(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float y, u, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + y = get_pixel(im, i , j, 0); + u = get_pixel(im, i , j, 1); + v = get_pixel(im, i , j, 2); + + r = y + 1.13983*v; + g = y + -.39465*u + -.58060*v; + b = y + 2.03211*u; + + set_pixel(im, i, j, 0, r); + set_pixel(im, i, j, 1, g); + set_pixel(im, i, j, 2, b); + } + } +} + +void rgb_to_yuv(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float y, u, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + r = get_pixel(im, i , j, 0); + g = get_pixel(im, i , j, 1); + b = get_pixel(im, i , j, 2); + + y = .299*r + .587*g + .114*b; + u = -.14713*r + -.28886*g + .436*b; + v = .615*r + -.51499*g + -.10001*b; + + set_pixel(im, i, j, 0, y); + set_pixel(im, i, j, 1, u); + set_pixel(im, i, j, 2, v); + } + } +} + +// http://www.cs.rit.edu/~ncs/color/t_convert.html +void rgb_to_hsv(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float h, s, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + r = get_pixel(im, i , j, 0); + g = get_pixel(im, i , j, 1); + b = get_pixel(im, i , j, 2); + float max = three_way_max(r,g,b); + float min = three_way_min(r,g,b); + float delta = max - min; + v = max; + if(max == 0){ + s = 0; + h = 0; + }else{ + s = delta/max; + if(r == max){ + h = (g - b) / delta; + } else if (g == max) { + h = 2 + (b - r) / delta; + } else { + h = 4 + (r - g) / delta; + } + if (h < 0) h += 6; + h = h/6.; + } + set_pixel(im, i, j, 0, h); + set_pixel(im, i, j, 1, s); + set_pixel(im, i, j, 2, v); + } + } +} + +void hsv_to_rgb(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float h, s, v; + float f, p, q, t; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + h = 6 * get_pixel(im, i , j, 0); + s = get_pixel(im, i , j, 1); + v = get_pixel(im, i , j, 2); + if (s == 0) { + r = g = b = v; + } else { + int index = floor(h); + f = h - index; + p = v*(1-s); + q = v*(1-s*f); + t = v*(1-s*(1-f)); + if(index == 0){ + r = v; g = t; b = p; + } else if(index == 1){ + r = q; g = v; b = p; + } else if(index == 2){ + r = p; g = v; b = t; + } else if(index == 3){ + r = p; g = q; b = v; + } else if(index == 4){ + r = t; g = p; b = v; + } else { + r = v; g = p; b = q; + } + } + set_pixel(im, i, j, 0, r); + set_pixel(im, i, j, 1, g); + set_pixel(im, i, j, 2, b); + } + } +} + +void grayscale_image_3c(image im) +{ + assert(im.c == 3); + int i, j, k; + float scale[] = {0.299, 0.587, 0.114}; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float val = 0; + for(k = 0; k < 3; ++k){ + val += scale[k]*get_pixel(im, i, j, k); + } + im.data[0*im.h*im.w + im.w*j + i] = val; + im.data[1*im.h*im.w + im.w*j + i] = val; + im.data[2*im.h*im.w + im.w*j + i] = val; + } + } +} + +image grayscale_image(image im) +{ + assert(im.c == 3); + int i, j, k; + image gray = make_image(im.w, im.h, 1); + float scale[] = {0.299, 0.587, 0.114}; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + gray.data[i+im.w*j] += scale[k]*get_pixel(im, i, j, k); + } + } + } + return gray; +} + +image threshold_image(image im, float thresh) +{ + int i; + image t = make_image(im.w, im.h, im.c); + for(i = 0; i < im.w*im.h*im.c; ++i){ + t.data[i] = im.data[i]>thresh ? 1 : 0; + } + return t; +} + +image blend_image(image fore, image back, float alpha) +{ + assert(fore.w == back.w && fore.h == back.h && fore.c == back.c); + image blend = make_image(fore.w, fore.h, fore.c); + int i, j, k; + for(k = 0; k < fore.c; ++k){ + for(j = 0; j < fore.h; ++j){ + for(i = 0; i < fore.w; ++i){ + float val = alpha * get_pixel(fore, i, j, k) + + (1 - alpha)* get_pixel(back, i, j, k); + set_pixel(blend, i, j, k, val); + } + } + } + return blend; +} + +void scale_image_channel(image im, int c, float v) +{ + int i, j; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float pix = get_pixel(im, i, j, c); + pix = pix*v; + set_pixel(im, i, j, c, pix); + } + } +} + +void translate_image_channel(image im, int c, float v) +{ + int i, j; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float pix = get_pixel(im, i, j, c); + pix = pix+v; + set_pixel(im, i, j, c, pix); + } + } +} + +image binarize_image(image im) +{ + image c = copy_image(im); + int i; + for(i = 0; i < im.w * im.h * im.c; ++i){ + if(c.data[i] > .5) c.data[i] = 1; + else c.data[i] = 0; + } + return c; +} + +void saturate_image(image im, float sat) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + hsv_to_rgb(im); + constrain_image(im); +} + +void hue_image(image im, float hue) +{ + rgb_to_hsv(im); + int i; + for(i = 0; i < im.w*im.h; ++i){ + im.data[i] = im.data[i] + hue; + if (im.data[i] > 1) im.data[i] -= 1; + if (im.data[i] < 0) im.data[i] += 1; + } + hsv_to_rgb(im); + constrain_image(im); +} + +void exposure_image(image im, float sat) +{ + rgb_to_hsv(im); + scale_image_channel(im, 2, sat); + hsv_to_rgb(im); + constrain_image(im); +} + +void distort_image(image im, float hue, float sat, float val) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + scale_image_channel(im, 2, val); + int i; + for(i = 0; i < im.w*im.h; ++i){ + im.data[i] = im.data[i] + hue; + if (im.data[i] > 1) im.data[i] -= 1; + if (im.data[i] < 0) im.data[i] += 1; + } + hsv_to_rgb(im); + constrain_image(im); +} + +void random_distort_image(image im, float hue, float saturation, float exposure) +{ + float dhue = rand_uniform(-hue, hue); + float dsat = rand_scale(saturation); + float dexp = rand_scale(exposure); + distort_image(im, dhue, dsat, dexp); +} + +void saturate_exposure_image(image im, float sat, float exposure) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + scale_image_channel(im, 2, exposure); + hsv_to_rgb(im); + constrain_image(im); +} + +image resize_image(image im, int w, int h) +{ + image resized = make_image(w, h, im.c); + image part = make_image(w, im.h, im.c); + int r, c, k; + float w_scale = (float)(im.w - 1) / (w - 1); + float h_scale = (float)(im.h - 1) / (h - 1); + for(k = 0; k < im.c; ++k){ + for(r = 0; r < im.h; ++r){ + for(c = 0; c < w; ++c){ + float val = 0; + if(c == w-1 || im.w == 1){ + val = get_pixel(im, im.w-1, r, k); + } else { + float sx = c*w_scale; + int ix = (int) sx; + float dx = sx - ix; + val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k); + } + set_pixel(part, c, r, k, val); + } + } + } + for(k = 0; k < im.c; ++k){ + for(r = 0; r < h; ++r){ + float sy = r*h_scale; + int iy = (int) sy; + float dy = sy - iy; + for(c = 0; c < w; ++c){ + float val = (1-dy) * get_pixel(part, c, iy, k); + set_pixel(resized, c, r, k, val); + } + if(r == h-1 || im.h == 1) continue; + for(c = 0; c < w; ++c){ + float val = dy * get_pixel(part, c, iy+1, k); + add_pixel(resized, c, r, k, val); + } + } + } + + free_image(part); + return resized; +} + + +void test_resize(char *filename) +{ + image im = load_image(filename, 0,0, 3); + float mag = mag_array(im.data, im.w*im.h*im.c); + printf("L2 Norm: %f\n", mag); + image gray = grayscale_image(im); + + image c1 = copy_image(im); + image c2 = copy_image(im); + image c3 = copy_image(im); + image c4 = copy_image(im); + distort_image(c1, .1, 1.5, 1.5); + distort_image(c2, -.1, .66666, .66666); + distort_image(c3, .1, 1.5, .66666); + distort_image(c4, .1, .66666, 1.5); + + + show_image(im, "Original", 1); + show_image(gray, "Gray", 1); + show_image(c1, "C1", 1); + show_image(c2, "C2", 1); + show_image(c3, "C3", 1); + show_image(c4, "C4", 1); +#ifdef OPENCV + while(1){ + image aug = random_augment_image(im, 0, .75, 320, 448, 320, 320); + show_image(aug, "aug", 1); + free_image(aug); + + + float exposure = 1.15; + float saturation = 1.15; + float hue = .05; + + image c = copy_image(im); + + float dexp = rand_scale(exposure); + float dsat = rand_scale(saturation); + float dhue = rand_uniform(-hue, hue); + + distort_image(c, dhue, dsat, dexp); + show_image(c, "rand", 1); + printf("%f %f %f\n", dhue, dsat, dexp); + free_image(c); + } +#endif +} + + +image load_image_stb(char *filename, int channels) +{ + int w, h, c; + unsigned char *data = stbi_load(filename, &w, &h, &c, channels); + if (!data) { + fprintf(stderr, "Cannot load image \"%s\"\nSTB Reason: %s\n", filename, stbi_failure_reason()); + exit(0); + } + if(channels) c = channels; + int i,j,k; + image im = make_image(w, h, c); + for(k = 0; k < c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int dst_index = i + w*j + w*h*k; + int src_index = k + c*i + c*w*j; + im.data[dst_index] = (float)data[src_index]/255.; + } + } + } + free(data); + return im; +} + +image load_image(char *filename, int w, int h, int c) +{ +#ifdef OPENCV + image out = load_image_cv(filename, c); +#else + image out = load_image_stb(filename, c); +#endif + + if((h && w) && (h != out.h || w != out.w)){ + image resized = resize_image(out, w, h); + free_image(out); + out = resized; + } + return out; +} + +image load_image_color(char *filename, int w, int h) +{ + return load_image(filename, w, h, 3); +} + +image get_image_layer(image m, int l) +{ + image out = make_image(m.w, m.h, 1); + int i; + for(i = 0; i < m.h*m.w; ++i){ + out.data[i] = m.data[i+l*m.h*m.w]; + } + return out; +} +void print_image(image m) +{ + int i, j, k; + for(i =0 ; i < m.c; ++i){ + for(j =0 ; j < m.h; ++j){ + for(k = 0; k < m.w; ++k){ + printf("%.2lf, ", m.data[i*m.h*m.w + j*m.w + k]); + if(k > 30) break; + } + printf("\n"); + if(j > 30) break; + } + printf("\n"); + } + printf("\n"); +} + +image collapse_images_vert(image *ims, int n) +{ + int color = 1; + int border = 1; + int h,w,c; + w = ims[0].w; + h = (ims[0].h + border) * n - border; + c = ims[0].c; + if(c != 3 || !color){ + w = (w+border)*c - border; + c = 1; + } + + image filters = make_image(w, h, c); + int i,j; + for(i = 0; i < n; ++i){ + int h_offset = i*(ims[0].h+border); + image copy = copy_image(ims[i]); + //normalize_image(copy); + if(c == 3 && color){ + embed_image(copy, filters, 0, h_offset); + } + else{ + for(j = 0; j < copy.c; ++j){ + int w_offset = j*(ims[0].w+border); + image layer = get_image_layer(copy, j); + embed_image(layer, filters, w_offset, h_offset); + free_image(layer); + } + } + free_image(copy); + } + return filters; +} + +image collapse_images_horz(image *ims, int n) +{ + int color = 1; + int border = 1; + int h,w,c; + int size = ims[0].h; + h = size; + w = (ims[0].w + border) * n - border; + c = ims[0].c; + if(c != 3 || !color){ + h = (h+border)*c - border; + c = 1; + } + + image filters = make_image(w, h, c); + int i,j; + for(i = 0; i < n; ++i){ + int w_offset = i*(size+border); + image copy = copy_image(ims[i]); + //normalize_image(copy); + if(c == 3 && color){ + embed_image(copy, filters, w_offset, 0); + } + else{ + for(j = 0; j < copy.c; ++j){ + int h_offset = j*(size+border); + image layer = get_image_layer(copy, j); + embed_image(layer, filters, w_offset, h_offset); + free_image(layer); + } + } + free_image(copy); + } + return filters; +} + +void show_image_normalized(image im, const char *name) +{ + image c = copy_image(im); + normalize_image(c); + show_image(c, name, 1); + free_image(c); +} + +void show_images(image *ims, int n, char *window) +{ + image m = collapse_images_vert(ims, n); + /* + int w = 448; + int h = ((float)m.h/m.w) * 448; + if(h > 896){ + h = 896; + w = ((float)m.w/m.h) * 896; + } + image sized = resize_image(m, w, h); + */ + normalize_image(m); + save_image(m, window); + show_image(m, window, 1); + free_image(m); +} + +void free_image(image m) +{ + if(m.data){ + free(m.data); + } +} diff --git a/workloads/realworld/uvm_prefetch/darknet/src/image.h b/workloads/realworld/uvm_prefetch/darknet/src/image.h new file mode 100644 index 0000000000000000000000000000000000000000..3392bb9787fc542929cda064bcefa0f3f893b76c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/image.h @@ -0,0 +1,69 @@ +#ifndef IMAGE_H +#define IMAGE_H + +#include +#include +#include +#include +#include +#include "box.h" +#include "darknet.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef OPENCV +void *open_video_stream(const char *f, int c, int w, int h, int fps); +image get_image_from_stream(void *p); +image load_image_cv(char *filename, int channels); +int show_image_cv(image im, const char* name, int ms); +#endif + +float get_color(int c, int x, int max); +void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); +void draw_bbox(image a, box bbox, int w, float r, float g, float b); +void write_label(image a, int r, int c, image *characters, char *string, float *rgb); +image image_distance(image a, image b); +void scale_image(image m, float s); +image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect); +image random_crop_image(image im, int w, int h); +image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h); +augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h); +void letterbox_image_into(image im, int w, int h, image boxed); +image resize_max(image im, int max); +void translate_image(image m, float s); +void embed_image(image source, image dest, int dx, int dy); +void place_image(image im, int w, int h, int dx, int dy, image canvas); +void saturate_image(image im, float sat); +void exposure_image(image im, float sat); +void distort_image(image im, float hue, float sat, float val); +void saturate_exposure_image(image im, float sat, float exposure); +void rgb_to_hsv(image im); +void hsv_to_rgb(image im); +void yuv_to_rgb(image im); +void rgb_to_yuv(image im); + + +image collapse_image_layers(image source, int border); +image collapse_images_horz(image *ims, int n); +image collapse_images_vert(image *ims, int n); + +void show_image_normalized(image im, const char *name); +void show_images(image *ims, int n, char *window); +void show_image_layers(image p, char *name); +void show_image_collapsed(image p, char *name); + +void print_image(image m); + +image make_empty_image(int w, int h, int c); +void copy_image_into(image src, image dest); + +image get_image_layer(image m, int l); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/image_opencv.cpp b/workloads/realworld/uvm_prefetch/darknet/src/image_opencv.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7511280be07ca987fd51fa54aea55910cd34a706 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/image_opencv.cpp @@ -0,0 +1,135 @@ +#ifdef OPENCV + +#include "stdio.h" +#include "stdlib.h" +#include "opencv2/opencv.hpp" +#include "image.h" + +using namespace cv; + +extern "C" { + +IplImage *image_to_ipl(image im) +{ + int x,y,c; + IplImage *disp = cvCreateImage(cvSize(im.w,im.h), IPL_DEPTH_8U, im.c); + int step = disp->widthStep; + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + for(c= 0; c < im.c; ++c){ + float val = im.data[c*im.h*im.w + y*im.w + x]; + disp->imageData[y*step + x*im.c + c] = (unsigned char)(val*255); + } + } + } + return disp; +} + +image ipl_to_image(IplImage* src) +{ + int h = src->height; + int w = src->width; + int c = src->nChannels; + image im = make_image(w, h, c); + unsigned char *data = (unsigned char *)src->imageData; + int step = src->widthStep; + int i, j, k; + + for(i = 0; i < h; ++i){ + for(k= 0; k < c; ++k){ + for(j = 0; j < w; ++j){ + im.data[k*w*h + i*w + j] = data[i*step + j*c + k]/255.; + } + } + } + return im; +} + +Mat image_to_mat(image im) +{ + image copy = copy_image(im); + constrain_image(copy); + if(im.c == 3) rgbgr_image(copy); + + IplImage *ipl = image_to_ipl(copy); + Mat m = cvarrToMat(ipl, true); + cvReleaseImage(&ipl); + free_image(copy); + return m; +} + +image mat_to_image(Mat m) +{ + IplImage ipl = m; + image im = ipl_to_image(&ipl); + rgbgr_image(im); + return im; +} + +void *open_video_stream(const char *f, int c, int w, int h, int fps) +{ + VideoCapture *cap; + if(f) cap = new VideoCapture(f); + else cap = new VideoCapture(c); + if(!cap->isOpened()) return 0; + if(w) cap->set(CV_CAP_PROP_FRAME_WIDTH, w); + if(h) cap->set(CV_CAP_PROP_FRAME_HEIGHT, w); + if(fps) cap->set(CV_CAP_PROP_FPS, w); + return (void *) cap; +} + +image get_image_from_stream(void *p) +{ + VideoCapture *cap = (VideoCapture *)p; + Mat m; + *cap >> m; + if(m.empty()) return make_empty_image(0,0,0); + return mat_to_image(m); +} + +image load_image_cv(char *filename, int channels) +{ + int flag = -1; + if (channels == 0) flag = -1; + else if (channels == 1) flag = 0; + else if (channels == 3) flag = 1; + else { + fprintf(stderr, "OpenCV can't force load with %d channels\n", channels); + } + Mat m; + m = imread(filename, flag); + if(!m.data){ + fprintf(stderr, "Cannot load image \"%s\"\n", filename); + char buff[256]; + sprintf(buff, "echo %s >> bad.list", filename); + system(buff); + return make_image(10,10,3); + //exit(0); + } + image im = mat_to_image(m); + return im; +} + +int show_image_cv(image im, const char* name, int ms) +{ + Mat m = image_to_mat(im); + imshow(name, m); + int c = waitKey(ms); + if (c != -1) c = c%256; + return c; +} + +void make_window(char *name, int w, int h, int fullscreen) +{ + namedWindow(name, WINDOW_NORMAL); + if (fullscreen) { + setWindowProperty(name, CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); + } else { + resizeWindow(name, w, h); + if(strcmp(name, "Demo") == 0) moveWindow(name, 0, 0); + } +} + +} + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/iseg_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/iseg_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..a1b822a5797a6d04b0f3756f106cb2b20ba31a5b --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/iseg_layer.c @@ -0,0 +1,225 @@ +#include "iseg_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_iseg_layer(int batch, int w, int h, int classes, int ids) +{ + layer l = {0}; + l.type = ISEG; + + l.h = h; + l.w = w; + l.c = classes + ids; + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.batch = batch; + l.extra = ids; + l.cost = calloc(1, sizeof(float)); + l.outputs = h*w*l.c; + l.inputs = l.outputs; + l.truths = 90*(l.w*l.h+1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + + l.counts = calloc(90, sizeof(int)); + l.sums = calloc(90, sizeof(float*)); + if(ids){ + int i; + for(i = 0; i < 90; ++i){ + l.sums[i] = calloc(ids, sizeof(float)); + } + } + + l.forward = forward_iseg_layer; + l.backward = backward_iseg_layer; +#ifdef GPU + l.forward_gpu = forward_iseg_layer_gpu; + l.backward_gpu = backward_iseg_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "iseg\n"); + srand(0); + + return l; +} + +void resize_iseg_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->c; + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +void forward_iseg_layer(const layer l, network net) +{ + + double time = what_time_is_it_now(); + int i,b,j,k; + int ids = l.extra; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + int index = b*l.outputs; + activate_array(l.output + index, l.classes*l.w*l.h, LOGISTIC); + } +#endif + + for (b = 0; b < l.batch; ++b){ + // a priori, each pixel has no class + for(i = 0; i < l.classes; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + i*l.w*l.h + k; + l.delta[index] = 0 - l.output[index]; + } + } + + // a priori, embedding should be small magnitude + for(i = 0; i < ids; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + (i+l.classes)*l.w*l.h + k; + l.delta[index] = .1 * (0 - l.output[index]); + } + } + + + memset(l.counts, 0, 90*sizeof(int)); + for(i = 0; i < 90; ++i){ + fill_cpu(ids, 0, l.sums[i], 1); + + int c = net.truth[b*l.truths + i*(l.w*l.h+1)]; + if(c < 0) break; + // add up metric embeddings for each instance + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + c*l.w*l.h + k; + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + l.delta[index] = v - l.output[index]; + axpy_cpu(ids, 1, l.output + b*l.outputs + l.classes*l.w*l.h + k, l.w*l.h, l.sums[i], 1); + ++l.counts[i]; + } + } + } + + float *mse = calloc(90, sizeof(float)); + for(i = 0; i < 90; ++i){ + int c = net.truth[b*l.truths + i*(l.w*l.h+1)]; + if(c < 0) break; + for(k = 0; k < l.w*l.h; ++k){ + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + int z; + float sum = 0; + for(z = 0; z < ids; ++z){ + int index = b*l.outputs + (l.classes + z)*l.w*l.h + k; + sum += pow(l.sums[i][z]/l.counts[i] - l.output[index], 2); + } + mse[i] += sum; + } + } + mse[i] /= l.counts[i]; + } + + // Calculate average embedding + for(i = 0; i < 90; ++i){ + if(!l.counts[i]) continue; + scal_cpu(ids, 1.f/l.counts[i], l.sums[i], 1); + if(b == 0 && net.gpu_index == 0){ + printf("%4d, %6.3f, ", l.counts[i], mse[i]); + for(j = 0; j < ids; ++j){ + printf("%6.3f,", l.sums[i][j]); + } + printf("\n"); + } + } + free(mse); + + // Calculate embedding loss + for(i = 0; i < 90; ++i){ + if(!l.counts[i]) continue; + for(k = 0; k < l.w*l.h; ++k){ + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + for(j = 0; j < 90; ++j){ + if(!l.counts[j])continue; + int z; + for(z = 0; z < ids; ++z){ + int index = b*l.outputs + (l.classes + z)*l.w*l.h + k; + float diff = l.sums[j][z] - l.output[index]; + if (j == i) l.delta[index] += diff < 0? -.1 : .1; + else l.delta[index] += -(diff < 0? -.1 : .1); + } + } + } + } + } + + for(i = 0; i < ids; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + (i+l.classes)*l.w*l.h + k; + l.delta[index] *= .01; + } + } + } + + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("took %lf sec\n", what_time_is_it_now() - time); +} + +void backward_iseg_layer(const layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_iseg_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b; + for (b = 0; b < l.batch; ++b){ + activate_array_gpu(l.output_gpu + b*l.outputs, l.classes*l.w*l.h, LOGISTIC); + //if(l.extra) activate_array_gpu(l.output_gpu + b*l.outputs + l.classes*l.w*l.h, l.extra*l.w*l.h, LOGISTIC); + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_iseg_layer(l, net); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_iseg_layer_gpu(const layer l, network net) +{ + int b; + for (b = 0; b < l.batch; ++b){ + //if(l.extra) gradient_array_gpu(l.output_gpu + b*l.outputs + l.classes*l.w*l.h, l.extra*l.w*l.h, LOGISTIC, l.delta_gpu + b*l.outputs + l.classes*l.w*l.h); + } + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/iseg_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/iseg_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..dd8e64e023caf1e1fd0c30af57f9983f24ddd691 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/iseg_layer.h @@ -0,0 +1,19 @@ +#ifndef ISEG_LAYER_H +#define ISEG_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_iseg_layer(int batch, int w, int h, int classes, int ids); +void forward_iseg_layer(const layer l, network net); +void backward_iseg_layer(const layer l, network net); +void resize_iseg_layer(layer *l, int w, int h); +int iseg_num_detections(layer l, float thresh); + +#ifdef GPU +void forward_iseg_layer_gpu(const layer l, network net); +void backward_iseg_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/l2norm_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/l2norm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..0cf7f844170cb2c3dba15be94d4a435aaa63067c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/l2norm_layer.c @@ -0,0 +1,63 @@ +#include "l2norm_layer.h" +#include "activations.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +layer make_l2norm_layer(int batch, int inputs) +{ + fprintf(stderr, "l2norm %4d\n", inputs); + layer l = {0}; + l.type = L2NORM; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.output = calloc(inputs*batch, sizeof(float)); + l.scales = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + + l.forward = forward_l2norm_layer; + l.backward = backward_l2norm_layer; + #ifdef GPU + l.forward_gpu = forward_l2norm_layer_gpu; + l.backward_gpu = backward_l2norm_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.scales_gpu = cuda_make_array(l.output, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_l2norm_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + l2normalize_cpu(l.output, l.scales, l.batch, l.out_c, l.out_w*l.out_h); +} + +void backward_l2norm_layer(const layer l, network net) +{ + //axpy_cpu(l.inputs*l.batch, 1, l.scales, 1, l.delta, 1); + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_l2norm_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + l2normalize_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_w*l.out_h); +} + +void backward_l2norm_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.scales_gpu, 1, l.delta_gpu, 1); + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/l2norm_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/l2norm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1ca6f710f017f2857f566eaed90634698d72b26d --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/l2norm_layer.h @@ -0,0 +1,15 @@ +#ifndef L2NORM_LAYER_H +#define L2NORM_LAYER_H +#include "layer.h" +#include "network.h" + +layer make_l2norm_layer(int batch, int inputs); +void forward_l2norm_layer(const layer l, network net); +void backward_l2norm_layer(const layer l, network net); + +#ifdef GPU +void forward_l2norm_layer_gpu(const layer l, network net); +void backward_l2norm_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/layer.c b/workloads/realworld/uvm_prefetch/darknet/src/layer.c new file mode 100644 index 0000000000000000000000000000000000000000..3bffe436f06a455e2d1043158ff6da9b07bbb61f --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/layer.c @@ -0,0 +1,97 @@ +#include "layer.h" +#include "cuda_dark.h" + +#include + +void free_layer(layer l) +{ + if(l.type == DROPOUT){ + if(l.rand) free(l.rand); +#ifdef GPU + if(l.rand_gpu) cuda_free(l.rand_gpu); +#endif + return; + } + if(l.cweights) free(l.cweights); + if(l.indexes) free(l.indexes); + if(l.input_layers) free(l.input_layers); + if(l.input_sizes) free(l.input_sizes); + if(l.map) free(l.map); + if(l.rand) free(l.rand); + if(l.cost) free(l.cost); + if(l.state) free(l.state); + if(l.prev_state) free(l.prev_state); + if(l.forgot_state) free(l.forgot_state); + if(l.forgot_delta) free(l.forgot_delta); + if(l.state_delta) free(l.state_delta); + if(l.concat) free(l.concat); + if(l.concat_delta) free(l.concat_delta); + if(l.binary_weights) free(l.binary_weights); + if(l.biases) free(l.biases); + if(l.bias_updates) free(l.bias_updates); + if(l.scales) free(l.scales); + if(l.scale_updates) free(l.scale_updates); + if(l.weights) free(l.weights); + if(l.weight_updates) free(l.weight_updates); + if(l.delta) free(l.delta); + if(l.output) free(l.output); + if(l.squared) free(l.squared); + if(l.norms) free(l.norms); + if(l.spatial_mean) free(l.spatial_mean); + if(l.mean) free(l.mean); + if(l.variance) free(l.variance); + if(l.mean_delta) free(l.mean_delta); + if(l.variance_delta) free(l.variance_delta); + if(l.rolling_mean) free(l.rolling_mean); + if(l.rolling_variance) free(l.rolling_variance); + if(l.x) free(l.x); + if(l.x_norm) free(l.x_norm); + if(l.m) free(l.m); + if(l.v) free(l.v); + if(l.z_cpu) free(l.z_cpu); + if(l.r_cpu) free(l.r_cpu); + if(l.h_cpu) free(l.h_cpu); + if(l.binary_input) free(l.binary_input); + +#ifdef GPU + if(l.indexes_gpu) cuda_free((float *)l.indexes_gpu); + + if(l.z_gpu) cuda_free(l.z_gpu); + if(l.r_gpu) cuda_free(l.r_gpu); + if(l.h_gpu) cuda_free(l.h_gpu); + if(l.m_gpu) cuda_free(l.m_gpu); + if(l.v_gpu) cuda_free(l.v_gpu); + if(l.prev_state_gpu) cuda_free(l.prev_state_gpu); + if(l.forgot_state_gpu) cuda_free(l.forgot_state_gpu); + if(l.forgot_delta_gpu) cuda_free(l.forgot_delta_gpu); + if(l.state_gpu) cuda_free(l.state_gpu); + if(l.state_delta_gpu) cuda_free(l.state_delta_gpu); + if(l.gate_gpu) cuda_free(l.gate_gpu); + if(l.gate_delta_gpu) cuda_free(l.gate_delta_gpu); + if(l.save_gpu) cuda_free(l.save_gpu); + if(l.save_delta_gpu) cuda_free(l.save_delta_gpu); + if(l.concat_gpu) cuda_free(l.concat_gpu); + if(l.concat_delta_gpu) cuda_free(l.concat_delta_gpu); + if(l.binary_input_gpu) cuda_free(l.binary_input_gpu); + if(l.binary_weights_gpu) cuda_free(l.binary_weights_gpu); + if(l.mean_gpu) cuda_free(l.mean_gpu); + if(l.variance_gpu) cuda_free(l.variance_gpu); + if(l.rolling_mean_gpu) cuda_free(l.rolling_mean_gpu); + if(l.rolling_variance_gpu) cuda_free(l.rolling_variance_gpu); + if(l.variance_delta_gpu) cuda_free(l.variance_delta_gpu); + if(l.mean_delta_gpu) cuda_free(l.mean_delta_gpu); + if(l.x_gpu) cuda_free(l.x_gpu); + if(l.x_norm_gpu) cuda_free(l.x_norm_gpu); + if(l.weights_gpu) cuda_free(l.weights_gpu); + if(l.weight_updates_gpu) cuda_free(l.weight_updates_gpu); + if(l.biases_gpu) cuda_free(l.biases_gpu); + if(l.bias_updates_gpu) cuda_free(l.bias_updates_gpu); + if(l.scales_gpu) cuda_free(l.scales_gpu); + if(l.scale_updates_gpu) cuda_free(l.scale_updates_gpu); + if(l.output_gpu) cuda_free(l.output_gpu); + if(l.delta_gpu) cuda_free(l.delta_gpu); + if(l.rand_gpu) cuda_free(l.rand_gpu); + if(l.squared_gpu) cuda_free(l.squared_gpu); + if(l.norms_gpu) cuda_free(l.norms_gpu); +#endif +} diff --git a/workloads/realworld/uvm_prefetch/darknet/src/layer.h b/workloads/realworld/uvm_prefetch/darknet/src/layer.h new file mode 100644 index 0000000000000000000000000000000000000000..af6cd2ab5054f5ef3bbdfca2da45f08d710a7bd0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/layer.h @@ -0,0 +1 @@ +#include "darknet.h" diff --git a/workloads/realworld/uvm_prefetch/darknet/src/list.c b/workloads/realworld/uvm_prefetch/darknet/src/list.c new file mode 100644 index 0000000000000000000000000000000000000000..0e4165d37800e1b4c7c33992cd64a6634fe4688c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/list.c @@ -0,0 +1,92 @@ +#include +#include +#include "list.h" + +list *make_list() +{ + list *l = malloc(sizeof(list)); + l->size = 0; + l->front = 0; + l->back = 0; + return l; +} + +/* +void transfer_node(list *s, list *d, node *n) +{ + node *prev, *next; + prev = n->prev; + next = n->next; + if(prev) prev->next = next; + if(next) next->prev = prev; + --s->size; + if(s->front == n) s->front = next; + if(s->back == n) s->back = prev; +} +*/ + +void *list_pop(list *l){ + if(!l->back) return 0; + node *b = l->back; + void *val = b->val; + l->back = b->prev; + if(l->back) l->back->next = 0; + free(b); + --l->size; + + return val; +} + +void list_insert(list *l, void *val) +{ + node *new = malloc(sizeof(node)); + new->val = val; + new->next = 0; + + if(!l->back){ + l->front = new; + new->prev = 0; + }else{ + l->back->next = new; + new->prev = l->back; + } + l->back = new; + ++l->size; +} + +void free_node(node *n) +{ + node *next; + while(n) { + next = n->next; + free(n); + n = next; + } +} + +void free_list(list *l) +{ + free_node(l->front); + free(l); +} + +void free_list_contents(list *l) +{ + node *n = l->front; + while(n){ + free(n->val); + n = n->next; + } +} + +void **list_to_array(list *l) +{ + void **a = calloc(l->size, sizeof(void*)); + int count = 0; + node *n = l->front; + while(n){ + a[count++] = n->val; + n = n->next; + } + return a; +} diff --git a/workloads/realworld/uvm_prefetch/darknet/src/list.h b/workloads/realworld/uvm_prefetch/darknet/src/list.h new file mode 100644 index 0000000000000000000000000000000000000000..6b445c717c2937b9c90536654ba82a33e14bb4ec --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/list.h @@ -0,0 +1,13 @@ +#ifndef LIST_H +#define LIST_H +#include "darknet.h" + +list *make_list(); +int list_find(list *l, void *val); + +void list_insert(list *, void *); + + +void free_list_contents(list *l); + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/local_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/local_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..74f6910a8fd751ad9f3b41fc67be737399a151d0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/local_layer.c @@ -0,0 +1,293 @@ +#include "local_layer.h" +#include "utils.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" +#include +#include + +int local_out_height(local_layer l) +{ + int h = l.h; + if (!l.pad) h -= l.size; + else h -= 1; + return h/l.stride + 1; +} + +int local_out_width(local_layer l) +{ + int w = l.w; + if (!l.pad) w -= l.size; + else w -= 1; + return w/l.stride + 1; +} + +local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation) +{ + int i; + local_layer l = {0}; + l.type = LOCAL; + + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.batch = batch; + l.stride = stride; + l.size = size; + l.pad = pad; + + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int locations = out_h*out_w; + l.out_h = out_h; + l.out_w = out_w; + l.out_c = n; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = l.w * l.h * l.c; + + l.weights = calloc(c*n*size*size*locations, sizeof(float)); + l.weight_updates = calloc(c*n*size*size*locations, sizeof(float)); + + l.biases = calloc(l.outputs, sizeof(float)); + l.bias_updates = calloc(l.outputs, sizeof(float)); + + // float scale = 1./sqrt(size*size*c); + float scale = sqrt(2./(size*size*c)); + for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1); + + l.output = calloc(l.batch*out_h * out_w * n, sizeof(float)); + l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float)); + + l.workspace_size = out_h*out_w*size*size*c; + + l.forward = forward_local_layer; + l.backward = backward_local_layer; + l.update = update_local_layer; + +#ifdef GPU + l.forward_gpu = forward_local_layer_gpu; + l.backward_gpu = backward_local_layer_gpu; + l.update_gpu = update_local_layer_gpu; + + l.weights_gpu = cuda_make_array(l.weights, c*n*size*size*locations); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size*locations); + + l.biases_gpu = cuda_make_array(l.biases, l.outputs); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, l.outputs); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + +#endif + l.activation = activation; + + fprintf(stderr, "Local Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); + + return l; +} + +void forward_local_layer(const local_layer l, network net) +{ + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int i, j; + int locations = out_h * out_w; + + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.outputs, l.biases, 1, l.output + i*l.outputs, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input + i*l.w*l.h*l.c; + im2col_cpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + float *output = l.output + i*l.outputs; + for(j = 0; j < locations; ++j){ + float *a = l.weights + j*l.size*l.size*l.c*l.n; + float *b = net.workspace + j; + float *c = output + j; + + int m = l.n; + int n = 1; + int k = l.size*l.size*l.c; + + gemm(0,0,m,n,k,1,a,k,b,locations,1,c,locations); + } + } + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_local_layer(local_layer l, network net) +{ + int i, j; + int locations = l.out_w*l.out_h; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + for(i = 0; i < l.batch; ++i){ + axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input + i*l.w*l.h*l.c; + im2col_cpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + + for(j = 0; j < locations; ++j){ + float *a = l.delta + i*l.outputs + j; + float *b = net.workspace + j; + float *c = l.weight_updates + j*l.size*l.size*l.c*l.n; + int m = l.n; + int n = l.size*l.size*l.c; + int k = 1; + + gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n); + } + + if(net.delta){ + for(j = 0; j < locations; ++j){ + float *a = l.weights + j*l.size*l.size*l.c*l.n; + float *b = l.delta + i*l.outputs + j; + float *c = net.workspace + j; + + int m = l.size*l.size*l.c; + int n = 1; + int k = l.n; + + gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations); + } + + col2im_cpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta+i*l.c*l.h*l.w); + } + } +} + +void update_local_layer(local_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.outputs, momentum, l.bias_updates, 1); + + axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(size, momentum, l.weight_updates, 1); +} + +#ifdef GPU + +void forward_local_layer_gpu(const local_layer l, network net) +{ + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int i, j; + int locations = out_h * out_w; + + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.outputs, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input_gpu + i*l.w*l.h*l.c; + im2col_gpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + float *output = l.output_gpu + i*l.outputs; + for(j = 0; j < locations; ++j){ + float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n; + float *b = net.workspace + j; + float *c = output + j; + + int m = l.n; + int n = 1; + int k = l.size*l.size*l.c; + + gemm_gpu(0,0,m,n,k,1,a,k,b,locations,1,c,locations); + } + } + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_local_layer_gpu(local_layer l, network net) +{ + int i, j; + int locations = l.out_w*l.out_h; + + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + for(i = 0; i < l.batch; ++i){ + axpy_gpu(l.outputs, 1, l.delta_gpu + i*l.outputs, 1, l.bias_updates_gpu, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input_gpu + i*l.w*l.h*l.c; + im2col_gpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + + for(j = 0; j < locations; ++j){ + float *a = l.delta_gpu + i*l.outputs + j; + float *b = net.workspace + j; + float *c = l.weight_updates_gpu + j*l.size*l.size*l.c*l.n; + int m = l.n; + int n = l.size*l.size*l.c; + int k = 1; + + gemm_gpu(0,1,m,n,k,1,a,locations,b,locations,1,c,n); + } + + if(net.delta_gpu){ + for(j = 0; j < locations; ++j){ + float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n; + float *b = l.delta_gpu + i*l.outputs + j; + float *c = net.workspace + j; + + int m = l.size*l.size*l.c; + int n = 1; + int k = l.n; + + gemm_gpu(1,0,m,n,k,1,a,m,b,locations,0,c,locations); + } + + col2im_gpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta_gpu+i*l.c*l.h*l.w); + } + } +} + +void update_local_layer_gpu(local_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + axpy_gpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.outputs, momentum, l.bias_updates_gpu, 1); + + axpy_gpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(size, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(size, momentum, l.weight_updates_gpu, 1); +} + +void pull_local_layer(local_layer l) +{ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + cuda_pull_array(l.weights_gpu, l.weights, size); + cuda_pull_array(l.biases_gpu, l.biases, l.outputs); +} + +void push_local_layer(local_layer l) +{ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + cuda_push_array(l.weights_gpu, l.weights, size); + cuda_push_array(l.biases_gpu, l.biases, l.outputs); +} +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/local_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/local_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..56805c4f1cb51fed9ef0e771d2befb430df60df5 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/local_layer.h @@ -0,0 +1,31 @@ +#ifndef LOCAL_LAYER_H +#define LOCAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +typedef layer local_layer; + +#ifdef GPU +void forward_local_layer_gpu(local_layer layer, network net); +void backward_local_layer_gpu(local_layer layer, network net); +void update_local_layer_gpu(local_layer layer, update_args a); + +void push_local_layer(local_layer layer); +void pull_local_layer(local_layer layer); +#endif + +local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation); + +void forward_local_layer(const local_layer layer, network net); +void backward_local_layer(local_layer layer, network net); +void update_local_layer(local_layer layer, update_args a); + +void bias_output(float *output, float *biases, int batch, int n, int size); +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); + +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/logistic_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/logistic_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..8d98986f67a17da70df75e3d56a46615cfc8eaf1 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/logistic_layer.c @@ -0,0 +1,71 @@ +#include "logistic_layer.h" +#include "activations.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +layer make_logistic_layer(int batch, int inputs) +{ + fprintf(stderr, "logistic x entropy %4d\n", inputs); + layer l = {0}; + l.type = LOGXENT; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.loss = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_logistic_layer; + l.backward = backward_logistic_layer; + #ifdef GPU + l.forward_gpu = forward_logistic_layer_gpu; + l.backward_gpu = backward_logistic_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.loss_gpu = cuda_make_array(l.loss, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_logistic_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + activate_array(l.output, l.outputs*l.batch, LOGISTIC); + if(net.truth){ + logistic_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_logistic_layer(const layer l, network net) +{ + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_logistic_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, LOGISTIC); + if(net.truth){ + logistic_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu); + cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_logistic_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/logistic_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/logistic_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9c25bee3c2a6eb1013ed43ce0c4aeaa63b7a293f --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/logistic_layer.h @@ -0,0 +1,15 @@ +#ifndef LOGISTIC_LAYER_H +#define LOGISTIC_LAYER_H +#include "layer.h" +#include "network.h" + +layer make_logistic_layer(int batch, int inputs); +void forward_logistic_layer(const layer l, network net); +void backward_logistic_layer(const layer l, network net); + +#ifdef GPU +void forward_logistic_layer_gpu(const layer l, network net); +void backward_logistic_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/lstm_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/lstm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..16f43914df8f35fb7f7b16bd93ff1d83f513dda0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/lstm_layer.c @@ -0,0 +1,626 @@ +#include "lstm_layer.h" +#include "connected_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam) +{ + fprintf(stderr, "LSTM Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = { 0 }; + l.batch = batch; + l.type = LSTM; + l.steps = steps; + l.inputs = inputs; + + l.uf = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uf) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uf->batch = batch; + + l.ui = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ui) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ui->batch = batch; + + l.ug = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ug) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ug->batch = batch; + + l.uo = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uo) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uo->batch = batch; + + l.wf = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wf) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wf->batch = batch; + + l.wi = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wi) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wi->batch = batch; + + l.wg = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wg) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wg->batch = batch; + + l.wo = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wo) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wo->batch = batch; + + l.batch_normalize = batch_normalize; + l.outputs = outputs; + + l.output = calloc(outputs*batch*steps, sizeof(float)); + l.state = calloc(outputs*batch, sizeof(float)); + + l.forward = forward_lstm_layer; + l.update = update_lstm_layer; + + l.prev_state_cpu = calloc(batch*outputs, sizeof(float)); + l.prev_cell_cpu = calloc(batch*outputs, sizeof(float)); + l.cell_cpu = calloc(batch*outputs*steps, sizeof(float)); + + l.f_cpu = calloc(batch*outputs, sizeof(float)); + l.i_cpu = calloc(batch*outputs, sizeof(float)); + l.g_cpu = calloc(batch*outputs, sizeof(float)); + l.o_cpu = calloc(batch*outputs, sizeof(float)); + l.c_cpu = calloc(batch*outputs, sizeof(float)); + l.h_cpu = calloc(batch*outputs, sizeof(float)); + l.temp_cpu = calloc(batch*outputs, sizeof(float)); + l.temp2_cpu = calloc(batch*outputs, sizeof(float)); + l.temp3_cpu = calloc(batch*outputs, sizeof(float)); + l.dc_cpu = calloc(batch*outputs, sizeof(float)); + l.dh_cpu = calloc(batch*outputs, sizeof(float)); + +#ifdef GPU + l.forward_gpu = forward_lstm_layer_gpu; + l.backward_gpu = backward_lstm_layer_gpu; + l.update_gpu = update_lstm_layer_gpu; + + l.output_gpu = cuda_make_array(0, batch*outputs*steps); + l.delta_gpu = cuda_make_array(0, batch*l.outputs*steps); + + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.prev_cell_gpu = cuda_make_array(0, batch*outputs); + l.cell_gpu = cuda_make_array(0, batch*outputs*steps); + + l.f_gpu = cuda_make_array(0, batch*outputs); + l.i_gpu = cuda_make_array(0, batch*outputs); + l.g_gpu = cuda_make_array(0, batch*outputs); + l.o_gpu = cuda_make_array(0, batch*outputs); + l.c_gpu = cuda_make_array(0, batch*outputs); + l.h_gpu = cuda_make_array(0, batch*outputs); + l.temp_gpu = cuda_make_array(0, batch*outputs); + l.temp2_gpu = cuda_make_array(0, batch*outputs); + l.temp3_gpu = cuda_make_array(0, batch*outputs); + l.dc_gpu = cuda_make_array(0, batch*outputs); + l.dh_gpu = cuda_make_array(0, batch*outputs); +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.wf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wf->out_c, l.wf->out_h, l.wf->out_w); + cudnnSetTensor4dDescriptor(l.wi->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wi->out_c, l.wi->out_h, l.wi->out_w); + cudnnSetTensor4dDescriptor(l.wg->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wg->out_c, l.wg->out_h, l.wg->out_w); + cudnnSetTensor4dDescriptor(l.wo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wo->out_c, l.wo->out_h, l.wo->out_w); + + cudnnSetTensor4dDescriptor(l.uf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uf->out_c, l.uf->out_h, l.uf->out_w); + cudnnSetTensor4dDescriptor(l.ui->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ui->out_c, l.ui->out_h, l.ui->out_w); + cudnnSetTensor4dDescriptor(l.ug->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ug->out_c, l.ug->out_h, l.ug->out_w); + cudnnSetTensor4dDescriptor(l.uo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uo->out_c, l.uo->out_h, l.uo->out_w); +#endif + +#endif + + return l; +} + +void update_lstm_layer(layer l, update_args a) +{ + update_connected_layer(*(l.wf), a); + update_connected_layer(*(l.wi), a); + update_connected_layer(*(l.wg), a); + update_connected_layer(*(l.wo), a); + update_connected_layer(*(l.uf), a); + update_connected_layer(*(l.ui), a); + update_connected_layer(*(l.ug), a); + update_connected_layer(*(l.uo), a); +} + +void forward_lstm_layer(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + fill_cpu(l.outputs * l.batch * l.steps, 0, wf.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wi.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wg.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wo.delta, 1); + + fill_cpu(l.outputs * l.batch * l.steps, 0, uf.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ui.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ug.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, uo.delta, 1); + if (state.train) { + fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input = l.h_cpu; + forward_connected_layer(wf, s); + forward_connected_layer(wi, s); + forward_connected_layer(wg, s); + forward_connected_layer(wo, s); + + s.input = state.input; + forward_connected_layer(uf, s); + forward_connected_layer(ui, s); + forward_connected_layer(ug, s); + forward_connected_layer(uo, s); + + copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); + + copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); + + copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); + + copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); + + activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.g_cpu, l.outputs*l.batch, TANH); + activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.c_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, l.temp_cpu, 1, l.c_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.h_cpu, 1); + activate_array(l.h_cpu, l.outputs*l.batch, TANH); + mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.h_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.cell_cpu, 1); + copy_cpu(l.outputs*l.batch, l.h_cpu, 1, l.output, 1); + + state.input += l.inputs*l.batch; + l.output += l.outputs*l.batch; + l.cell_cpu += l.outputs*l.batch; + + increment_layer(&wf, 1); + increment_layer(&wi, 1); + increment_layer(&wg, 1); + increment_layer(&wo, 1); + + increment_layer(&uf, 1); + increment_layer(&ui, 1); + increment_layer(&ug, 1); + increment_layer(&uo, 1); + } +} + +void backward_lstm_layer(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + increment_layer(&wf, l.steps - 1); + increment_layer(&wi, l.steps - 1); + increment_layer(&wg, l.steps - 1); + increment_layer(&wo, l.steps - 1); + + increment_layer(&uf, l.steps - 1); + increment_layer(&ui, l.steps - 1); + increment_layer(&ug, l.steps - 1); + increment_layer(&uo, l.steps - 1); + + state.input += l.inputs*l.batch*(l.steps - 1); + if (state.delta) state.delta += l.inputs*l.batch*(l.steps - 1); + + l.output += l.outputs*l.batch*(l.steps - 1); + l.cell_cpu += l.outputs*l.batch*(l.steps - 1); + l.delta += l.outputs*l.batch*(l.steps - 1); + + for (i = l.steps - 1; i >= 0; --i) { + if (i != 0) copy_cpu(l.outputs*l.batch, l.cell_cpu - l.outputs*l.batch, 1, l.prev_cell_cpu, 1); + copy_cpu(l.outputs*l.batch, l.cell_cpu, 1, l.c_cpu, 1); + if (i != 0) copy_cpu(l.outputs*l.batch, l.output - l.outputs*l.batch, 1, l.prev_state_cpu, 1); + copy_cpu(l.outputs*l.batch, l.output, 1, l.h_cpu, 1); + + l.dh_cpu = (i == 0) ? 0 : l.delta - l.outputs*l.batch; + + copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); + + copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); + + copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); + + copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); + + activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.g_cpu, l.outputs*l.batch, TANH); + activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.delta, 1, l.temp3_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); + activate_array(l.temp_cpu, l.outputs*l.batch, TANH); + + copy_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp2_cpu, 1); + mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.temp2_cpu, 1); + + gradient_array(l.temp_cpu, l.outputs*l.batch, TANH, l.temp2_cpu); + axpy_cpu(l.outputs*l.batch, 1, l.dc_cpu, 1, l.temp2_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); + activate_array(l.temp_cpu, l.outputs*l.batch, TANH); + mul_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp_cpu, 1); + gradient_array(l.o_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wo.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wo, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uo.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(uo, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); + gradient_array(l.g_cpu, l.outputs*l.batch, TANH, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wg.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wg, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ug.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(ug, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); + gradient_array(l.i_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wi.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wi, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ui.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(ui, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.prev_cell_cpu, 1, l.temp_cpu, 1); + gradient_array(l.f_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wf.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wf, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uf.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(uf, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.temp_cpu, 1); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, l.dc_cpu, 1); + + state.input -= l.inputs*l.batch; + if (state.delta) state.delta -= l.inputs*l.batch; + l.output -= l.outputs*l.batch; + l.cell_cpu -= l.outputs*l.batch; + l.delta -= l.outputs*l.batch; + + increment_layer(&wf, -1); + increment_layer(&wi, -1); + increment_layer(&wg, -1); + increment_layer(&wo, -1); + + increment_layer(&uf, -1); + increment_layer(&ui, -1); + increment_layer(&ug, -1); + increment_layer(&uo, -1); + } +} + +#ifdef GPU +void update_lstm_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.wf), a); + update_connected_layer_gpu(*(l.wi), a); + update_connected_layer_gpu(*(l.wg), a); + update_connected_layer_gpu(*(l.wo), a); + update_connected_layer_gpu(*(l.uf), a); + update_connected_layer_gpu(*(l.ui), a); + update_connected_layer_gpu(*(l.ug), a); + update_connected_layer_gpu(*(l.uo), a); +} + +void forward_lstm_layer_gpu(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + fill_gpu(l.outputs * l.batch * l.steps, 0, wf.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wi.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wg.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wo.delta_gpu, 1); + + fill_gpu(l.outputs * l.batch * l.steps, 0, uf.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ui.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ug.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, uo.delta_gpu, 1); + if (state.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = l.h_gpu; + forward_connected_layer_gpu(wf, s); + forward_connected_layer_gpu(wi, s); + forward_connected_layer_gpu(wg, s); + forward_connected_layer_gpu(wo, s); + + s.input_gpu = state.input_gpu; + forward_connected_layer_gpu(uf, s); + forward_connected_layer_gpu(ui, s); + forward_connected_layer_gpu(ug, s); + forward_connected_layer_gpu(uo, s); + + copy_gpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); + + copy_gpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); + + copy_gpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); + + copy_gpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); + + activate_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.g_gpu, l.outputs*l.batch, TANH); + activate_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.f_gpu, 1, l.c_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, l.temp_gpu, 1, l.c_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.h_gpu, 1); + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + mul_gpu(l.outputs*l.batch, l.o_gpu, 1, l.h_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.cell_gpu, 1); + copy_gpu(l.outputs*l.batch, l.h_gpu, 1, l.output_gpu, 1); + + state.input_gpu += l.inputs*l.batch; + l.output_gpu += l.outputs*l.batch; + l.cell_gpu += l.outputs*l.batch; + + increment_layer(&wf, 1); + increment_layer(&wi, 1); + increment_layer(&wg, 1); + increment_layer(&wo, 1); + + increment_layer(&uf, 1); + increment_layer(&ui, 1); + increment_layer(&ug, 1); + increment_layer(&uo, 1); + } +} + +void backward_lstm_layer_gpu(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + increment_layer(&wf, l.steps - 1); + increment_layer(&wi, l.steps - 1); + increment_layer(&wg, l.steps - 1); + increment_layer(&wo, l.steps - 1); + + increment_layer(&uf, l.steps - 1); + increment_layer(&ui, l.steps - 1); + increment_layer(&ug, l.steps - 1); + increment_layer(&uo, l.steps - 1); + + state.input_gpu += l.inputs*l.batch*(l.steps - 1); + if (state.delta_gpu) state.delta_gpu += l.inputs*l.batch*(l.steps - 1); + + l.output_gpu += l.outputs*l.batch*(l.steps - 1); + l.cell_gpu += l.outputs*l.batch*(l.steps - 1); + l.delta_gpu += l.outputs*l.batch*(l.steps - 1); + + for (i = l.steps - 1; i >= 0; --i) { + if (i != 0) copy_gpu(l.outputs*l.batch, l.cell_gpu - l.outputs*l.batch, 1, l.prev_cell_gpu, 1); + copy_gpu(l.outputs*l.batch, l.cell_gpu, 1, l.c_gpu, 1); + if (i != 0) copy_gpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, 1, l.prev_state_gpu, 1); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.h_gpu, 1); + + l.dh_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch; + + copy_gpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); + + copy_gpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); + + copy_gpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); + + copy_gpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); + + activate_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.g_gpu, l.outputs*l.batch, TANH); + activate_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, l.temp3_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.temp_gpu, 1); + activate_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH); + + copy_gpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp2_gpu, 1); + mul_gpu(l.outputs*l.batch, l.o_gpu, 1, l.temp2_gpu, 1); + + gradient_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH, l.temp2_gpu); + axpy_gpu(l.outputs*l.batch, 1, l.dc_gpu, 1, l.temp2_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.temp_gpu, 1); + activate_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH); + mul_gpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wo.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wo, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, uo.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(uo, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.g_gpu, l.outputs*l.batch, TANH, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wg.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wg, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, ug.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(ug, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wi.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wi, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, ui.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(ui, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.prev_cell_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wf.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wf, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, uf.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(uf, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.f_gpu, 1, l.temp_gpu, 1); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, l.dc_gpu, 1); + + state.input_gpu -= l.inputs*l.batch; + if (state.delta_gpu) state.delta_gpu -= l.inputs*l.batch; + l.output_gpu -= l.outputs*l.batch; + l.cell_gpu -= l.outputs*l.batch; + l.delta_gpu -= l.outputs*l.batch; + + increment_layer(&wf, -1); + increment_layer(&wi, -1); + increment_layer(&wg, -1); + increment_layer(&wo, -1); + + increment_layer(&uf, -1); + increment_layer(&ui, -1); + increment_layer(&ug, -1); + increment_layer(&uo, -1); + } +} +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/lstm_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/lstm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..b9f07e6424b55c336e692aa6f1028d0bc1cae0b3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/lstm_layer.h @@ -0,0 +1,20 @@ +#ifndef LSTM_LAYER_H +#define LSTM_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" +#define USET + +layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam); + +void forward_lstm_layer(layer l, network net); +void update_lstm_layer(layer l, update_args a); + +#ifdef GPU +void forward_lstm_layer_gpu(layer l, network net); +void backward_lstm_layer_gpu(layer l, network net); +void update_lstm_layer_gpu(layer l, update_args a); + +#endif +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/matrix.c b/workloads/realworld/uvm_prefetch/darknet/src/matrix.c new file mode 100644 index 0000000000000000000000000000000000000000..799916bff017180e220ae48748f495007793d168 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/matrix.c @@ -0,0 +1,196 @@ +#include "matrix.h" +#include "utils.h" +#include "blas.h" +#include +#include +#include +#include +#include + +void free_matrix(matrix m) +{ + int i; + for(i = 0; i < m.rows; ++i) free(m.vals[i]); + free(m.vals); +} + +float matrix_topk_accuracy(matrix truth, matrix guess, int k) +{ + int *indexes = calloc(k, sizeof(int)); + int n = truth.cols; + int i,j; + int correct = 0; + for(i = 0; i < truth.rows; ++i){ + top_k(guess.vals[i], n, k, indexes); + for(j = 0; j < k; ++j){ + int class = indexes[j]; + if(truth.vals[i][class]){ + ++correct; + break; + } + } + } + free(indexes); + return (float)correct/truth.rows; +} + +void scale_matrix(matrix m, float scale) +{ + int i,j; + for(i = 0; i < m.rows; ++i){ + for(j = 0; j < m.cols; ++j){ + m.vals[i][j] *= scale; + } + } +} + +matrix resize_matrix(matrix m, int size) +{ + int i; + if (m.rows == size) return m; + if (m.rows < size) { + m.vals = realloc(m.vals, size*sizeof(float*)); + for (i = m.rows; i < size; ++i) { + m.vals[i] = calloc(m.cols, sizeof(float)); + } + } else if (m.rows > size) { + for (i = size; i < m.rows; ++i) { + free(m.vals[i]); + } + m.vals = realloc(m.vals, size*sizeof(float*)); + } + m.rows = size; + return m; +} + +void matrix_add_matrix(matrix from, matrix to) +{ + assert(from.rows == to.rows && from.cols == to.cols); + int i,j; + for(i = 0; i < from.rows; ++i){ + for(j = 0; j < from.cols; ++j){ + to.vals[i][j] += from.vals[i][j]; + } + } +} + +matrix copy_matrix(matrix m) +{ + matrix c = {0}; + c.rows = m.rows; + c.cols = m.cols; + c.vals = calloc(c.rows, sizeof(float *)); + int i; + for(i = 0; i < c.rows; ++i){ + c.vals[i] = calloc(c.cols, sizeof(float)); + copy_cpu(c.cols, m.vals[i], 1, c.vals[i], 1); + } + return c; +} + +matrix make_matrix(int rows, int cols) +{ + int i; + matrix m; + m.rows = rows; + m.cols = cols; + m.vals = calloc(m.rows, sizeof(float *)); + for(i = 0; i < m.rows; ++i){ + m.vals[i] = calloc(m.cols, sizeof(float)); + } + return m; +} + +matrix hold_out_matrix(matrix *m, int n) +{ + int i; + matrix h; + h.rows = n; + h.cols = m->cols; + h.vals = calloc(h.rows, sizeof(float *)); + for(i = 0; i < n; ++i){ + int index = rand()%m->rows; + h.vals[i] = m->vals[index]; + m->vals[index] = m->vals[--(m->rows)]; + } + return h; +} + +float *pop_column(matrix *m, int c) +{ + float *col = calloc(m->rows, sizeof(float)); + int i, j; + for(i = 0; i < m->rows; ++i){ + col[i] = m->vals[i][c]; + for(j = c; j < m->cols-1; ++j){ + m->vals[i][j] = m->vals[i][j+1]; + } + } + --m->cols; + return col; +} + +matrix csv_to_matrix(char *filename) +{ + FILE *fp = fopen(filename, "r"); + if(!fp) file_error(filename); + + matrix m; + m.cols = -1; + + char *line; + + int n = 0; + int size = 1024; + m.vals = calloc(size, sizeof(float*)); + while((line = fgetl(fp))){ + if(m.cols == -1) m.cols = count_fields(line); + if(n == size){ + size *= 2; + m.vals = realloc(m.vals, size*sizeof(float*)); + } + m.vals[n] = parse_fields(line, m.cols); + free(line); + ++n; + } + m.vals = realloc(m.vals, n*sizeof(float*)); + m.rows = n; + return m; +} + +void matrix_to_csv(matrix m) +{ + int i, j; + + for(i = 0; i < m.rows; ++i){ + for(j = 0; j < m.cols; ++j){ + if(j > 0) printf(","); + printf("%.17g", m.vals[i][j]); + } + printf("\n"); + } +} + +void print_matrix(matrix m) +{ + int i, j; + printf("%d X %d Matrix:\n",m.rows, m.cols); + printf(" __"); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf("__ \n"); + + printf("| "); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf(" |\n"); + + for(i = 0; i < m.rows; ++i){ + printf("| "); + for(j = 0; j < m.cols; ++j){ + printf("%15.7f ", m.vals[i][j]); + } + printf(" |\n"); + } + printf("|__"); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf("__|\n"); +} diff --git a/workloads/realworld/uvm_prefetch/darknet/src/matrix.h b/workloads/realworld/uvm_prefetch/darknet/src/matrix.h new file mode 100644 index 0000000000000000000000000000000000000000..879acd70d26c084931b30067ddcc77057068e58c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/matrix.h @@ -0,0 +1,13 @@ +#ifndef MATRIX_H +#define MATRIX_H +#include "darknet.h" + +matrix copy_matrix(matrix m); +void print_matrix(matrix m); + +matrix hold_out_matrix(matrix *m, int n); +matrix resize_matrix(matrix m, int size); + +float *pop_column(matrix *m, int c); + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/maxpool_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/maxpool_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..b54be838662ebfc53abc539da22413becc1805a3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/maxpool_layer.c @@ -0,0 +1,127 @@ +#include "maxpool_layer.h" +#include "cuda_dark.h" +#include + +image get_maxpool_image(maxpool_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.c; + return float_to_image(w,h,c,l.output); +} + +image get_maxpool_delta(maxpool_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.c; + return float_to_image(w,h,c,l.delta); +} + +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding) +{ + maxpool_layer l = {0}; + l.type = MAXPOOL; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.pad = padding; + l.out_w = (w + padding - size)/stride + 1; + l.out_h = (h + padding - size)/stride + 1; + l.out_c = c; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = h*w*c; + l.size = size; + l.stride = stride; + int output_size = l.out_h * l.out_w * l.out_c * batch; + l.indexes = calloc(output_size, sizeof(int)); + l.output = calloc(output_size, sizeof(float)); + l.delta = calloc(output_size, sizeof(float)); + l.forward = forward_maxpool_layer; + l.backward = backward_maxpool_layer; + #ifdef GPU + l.forward_gpu = forward_maxpool_layer_gpu; + l.backward_gpu = backward_maxpool_layer_gpu; + l.indexes_gpu = cuda_make_int_array(0, output_size); + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); + #endif + fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); + return l; +} + +void resize_maxpool_layer(maxpool_layer *l, int w, int h) +{ + l->h = h; + l->w = w; + l->inputs = h*w*l->c; + + l->out_w = (w + l->pad - l->size)/l->stride + 1; + l->out_h = (h + l->pad - l->size)/l->stride + 1; + l->outputs = l->out_w * l->out_h * l->c; + int output_size = l->outputs * l->batch; + + l->indexes = realloc(l->indexes, output_size * sizeof(int)); + l->output = realloc(l->output, output_size * sizeof(float)); + l->delta = realloc(l->delta, output_size * sizeof(float)); + + #ifdef GPU + cuda_free((float *)l->indexes_gpu); + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->indexes_gpu = cuda_make_int_array(0, output_size); + l->output_gpu = cuda_make_array(l->output, output_size); + l->delta_gpu = cuda_make_array(l->delta, output_size); + #endif +} + +void forward_maxpool_layer(const maxpool_layer l, network net) +{ + int b,i,j,k,m,n; + int w_offset = -l.pad/2; + int h_offset = -l.pad/2; + + int h = l.out_h; + int w = l.out_w; + int c = l.c; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < c; ++k){ + for(i = 0; i < h; ++i){ + for(j = 0; j < w; ++j){ + int out_index = j + w*(i + h*(k + c*b)); + float max = -FLT_MAX; + int max_i = -1; + for(n = 0; n < l.size; ++n){ + for(m = 0; m < l.size; ++m){ + int cur_h = h_offset + i*l.stride + n; + int cur_w = w_offset + j*l.stride + m; + int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c)); + int valid = (cur_h >= 0 && cur_h < l.h && + cur_w >= 0 && cur_w < l.w); + float val = (valid != 0) ? net.input[index] : -FLT_MAX; + max_i = (val > max) ? index : max_i; + max = (val > max) ? val : max; + } + } + l.output[out_index] = max; + l.indexes[out_index] = max_i; + } + } + } + } +} + +void backward_maxpool_layer(const maxpool_layer l, network net) +{ + int i; + int h = l.out_h; + int w = l.out_w; + int c = l.c; + for(i = 0; i < h*w*c*l.batch; ++i){ + int index = l.indexes[i]; + net.delta[index] += l.delta[i]; + } +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/maxpool_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/maxpool_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..f01adb957e8bd8ce01a06e5a1ff14a988ae07149 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/maxpool_layer.h @@ -0,0 +1,23 @@ +#ifndef MAXPOOL_LAYER_H +#define MAXPOOL_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +typedef layer maxpool_layer; + +image get_maxpool_image(maxpool_layer l); +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding); +void resize_maxpool_layer(maxpool_layer *l, int w, int h); +void forward_maxpool_layer(const maxpool_layer l, network net); +void backward_maxpool_layer(const maxpool_layer l, network net); + +#ifdef GPU +void forward_maxpool_layer_gpu(maxpool_layer l, network net); +void backward_maxpool_layer_gpu(maxpool_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/maxpool_layer_kernels.cu b/workloads/realworld/uvm_prefetch/darknet/src/maxpool_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..89348b6d9b65c2456592e8d2c2755fec6fc1a9db --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/maxpool_layer_kernels.cu @@ -0,0 +1,135 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "maxpool_layer.h" +#include "cuda_dark.h" +} + +__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) +{ + int h = (in_h + pad - size)/stride + 1; + int w = (in_w + pad - size)/stride + 1; + int c = in_c; + + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int j = id % w; + id /= w; + int i = id % h; + id /= h; + int k = id % c; + id /= c; + int b = id; + + int w_offset = -pad/2; + int h_offset = -pad/2; + + int out_index = j + w*(i + h*(k + c*b)); + float max = -INFINITY; + int max_i = -1; + int l, m; + for(l = 0; l < size; ++l){ + for(m = 0; m < size; ++m){ + int cur_h = h_offset + i*stride + l; + int cur_w = w_offset + j*stride + m; + int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); + int valid = (cur_h >= 0 && cur_h < in_h && + cur_w >= 0 && cur_w < in_w); + float val = (valid != 0) ? input[index] : -INFINITY; + max_i = (val > max) ? index : max_i; + max = (val > max) ? val : max; + } + } + output[out_index] = max; + indexes[out_index] = max_i; +} + +__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes) +{ + int h = (in_h + pad - size)/stride + 1; + int w = (in_w + pad - size)/stride + 1; + int c = in_c; + int area = (size-1)/stride; + + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int index = id; + int j = id % in_w; + id /= in_w; + int i = id % in_h; + id /= in_h; + int k = id % in_c; + id /= in_c; + int b = id; + + int w_offset = -pad/2; + int h_offset = -pad/2; + + float d = 0; + int l, m; + for(l = -area; l < area+1; ++l){ + for(m = -area; m < area+1; ++m){ + int out_w = (j-w_offset)/stride + m; + int out_h = (i-h_offset)/stride + l; + int out_index = out_w + w*(out_h + h*(k + c*b)); + int valid = (out_w >= 0 && out_w < w && + out_h >= 0 && out_h < h); + d += (valid && indexes[out_index] == index) ? delta[out_index] : 0; + } + } + prev_delta[index] += d; +} + +extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network net) +{ + int h = layer.out_h; + int w = layer.out_w; + int c = layer.c; + + size_t n = h*w*c*layer.batch; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(net.input_gpu, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(layer.indexes_gpu, n * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(layer.output_gpu, n * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + forward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, net.input_gpu, layer.output_gpu, layer.indexes_gpu); + check_error(cudaPeekAtLastError()); +} + +extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network net) +{ + size_t n = layer.h*layer.w*layer.c*layer.batch; + size_t in_n = layer.out_h*layer.out_w*layer.c*layer.batch; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(layer.delta_gpu, in_n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(layer.indexes_gpu, in_n * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(net.delta_gpu, n * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, net.delta_gpu, layer.indexes_gpu); + check_error(cudaPeekAtLastError()); +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/network.c b/workloads/realworld/uvm_prefetch/darknet/src/network.c new file mode 100644 index 0000000000000000000000000000000000000000..a90310a6fb581ffe9090b6ab758d63c4f0e728df --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/network.c @@ -0,0 +1,1135 @@ +#include +#include +#include +#include "network.h" +#include "image.h" +#include "data.h" +#include "utils.h" +#include "blas.h" + +#include "crop_layer.h" +#include "connected_layer.h" +#include "gru_layer.h" +#include "rnn_layer.h" +#include "crnn_layer.h" +#include "local_layer.h" +#include "convolutional_layer.h" +#include "activation_layer.h" +#include "detection_layer.h" +#include "region_layer.h" +#include "yolo_layer.h" +#include "normalization_layer.h" +#include "batchnorm_layer.h" +#include "maxpool_layer.h" +#include "reorg_layer.h" +#include "avgpool_layer.h" +#include "cost_layer.h" +#include "softmax_layer.h" +#include "dropout_layer.h" +#include "route_layer.h" +#include "upsample_layer.h" +#include "shortcut_layer.h" +#include "parser.h" +#include "data.h" + +load_args get_base_args(network *net) +{ + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.size = net->w; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.center = net->center; + args.saturation = net->saturation; + args.hue = net->hue; + return args; +} + +network *load_network(char *cfg, char *weights, int clear) +{ + network *net = parse_network_cfg(cfg); + if(weights && weights[0] != 0){ + load_weights(net, weights); + } + if(clear) (*net->seen) = 0; + return net; +} + +size_t get_current_batch(network *net) +{ + size_t batch_num = (*net->seen)/(net->batch*net->subdivisions); + return batch_num; +} + +void reset_network_state(network *net, int b) +{ + int i; + for (i = 0; i < net->n; ++i) { + #ifdef GPU + layer l = net->layers[i]; + if(l.state_gpu){ + fill_gpu(l.outputs, 0, l.state_gpu + l.outputs*b, 1); + } + if(l.h_gpu){ + fill_gpu(l.outputs, 0, l.h_gpu + l.outputs*b, 1); + } + #endif + } +} + +void reset_rnn(network *net) +{ + reset_network_state(net, 0); +} + +float get_current_rate(network *net) +{ + size_t batch_num = get_current_batch(net); + int i; + float rate; + if (batch_num < net->burn_in) return net->learning_rate * pow((float)batch_num / net->burn_in, net->power); + switch (net->policy) { + case CONSTANT: + return net->learning_rate; + case STEP: + return net->learning_rate * pow(net->scale, batch_num/net->step); + case STEPS: + rate = net->learning_rate; + for(i = 0; i < net->num_steps; ++i){ + if(net->steps[i] > batch_num) return rate; + rate *= net->scales[i]; + } + return rate; + case EXP: + return net->learning_rate * pow(net->gamma, batch_num); + case POLY: + return net->learning_rate * pow(1 - (float)batch_num / net->max_batches, net->power); + case RANDOM: + return net->learning_rate * pow(rand_uniform(0,1), net->power); + case SIG: + return net->learning_rate * (1./(1.+exp(net->gamma*(batch_num - net->step)))); + default: + fprintf(stderr, "Policy is weird!\n"); + return net->learning_rate; + } +} + +char *get_layer_string(LAYER_TYPE a) +{ + switch(a){ + case CONVOLUTIONAL: + return "convolutional"; + case ACTIVE: + return "activation"; + case LOCAL: + return "local"; + case DECONVOLUTIONAL: + return "deconvolutional"; + case CONNECTED: + return "connected"; + case RNN: + return "rnn"; + case GRU: + return "gru"; + case LSTM: + return "lstm"; + case CRNN: + return "crnn"; + case MAXPOOL: + return "maxpool"; + case REORG: + return "reorg"; + case AVGPOOL: + return "avgpool"; + case SOFTMAX: + return "softmax"; + case DETECTION: + return "detection"; + case REGION: + return "region"; + case YOLO: + return "yolo"; + case DROPOUT: + return "dropout"; + case CROP: + return "crop"; + case COST: + return "cost"; + case ROUTE: + return "route"; + case SHORTCUT: + return "shortcut"; + case NORMALIZATION: + return "normalization"; + case BATCHNORM: + return "batchnorm"; + default: + break; + } + return "none"; +} + +network *make_network(int n) +{ + network *net = calloc(1, sizeof(network)); + net->n = n; + net->layers = calloc(net->n, sizeof(layer)); + net->seen = calloc(1, sizeof(size_t)); + net->t = calloc(1, sizeof(int)); + net->cost = calloc(1, sizeof(float)); + return net; +} + +void forward_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + forward_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + for(i = 0; i < net.n; ++i){ + net.index = i; + layer l = net.layers[i]; + if(l.delta){ + fill_cpu(l.outputs * l.batch, 0, l.delta, 1); + } + l.forward(l, net); + net.input = l.output; + if(l.truth) { + net.truth = l.output; + } + } + calc_network_cost(netp); +} + +void update_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + update_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + update_args a = {0}; + a.batch = net.batch*net.subdivisions; + a.learning_rate = get_current_rate(netp); + a.momentum = net.momentum; + a.decay = net.decay; + a.adam = net.adam; + a.B1 = net.B1; + a.B2 = net.B2; + a.eps = net.eps; + ++*net.t; + a.t = *net.t; + + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.update){ + l.update(l, a); + } + } +} + +void calc_network_cost(network *netp) +{ + network net = *netp; + int i; + float sum = 0; + int count = 0; + for(i = 0; i < net.n; ++i){ + if(net.layers[i].cost){ + sum += net.layers[i].cost[0]; + ++count; + } + } + *net.cost = sum/count; +} + +int get_predicted_class_network(network *net) +{ + return max_index(net->output, net->outputs); +} + +void backward_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + backward_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + network orig = net; + for(i = net.n-1; i >= 0; --i){ + layer l = net.layers[i]; + if(l.stopbackward) break; + if(i == 0){ + net = orig; + }else{ + layer prev = net.layers[i-1]; + net.input = prev.output; + net.delta = prev.delta; + } + net.index = i; + l.backward(l, net); + } +} + +float train_network_datum(network *net) +{ + *net->seen += net->batch; + net->train = 1; + forward_network(net); + backward_network(net); + float error = *net->cost; + if(((*net->seen)/net->batch)%net->subdivisions == 0) update_network(net); + return error; +} + +float train_network_sgd(network *net, data d, int n) +{ + int batch = net->batch; + + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + get_random_batch(d, batch, net->input, net->truth); + float err = train_network_datum(net); + sum += err; + } + return (float)sum/(n*batch); +} + +float train_network(network *net, data d) +{ + assert(d.X.rows % net->batch == 0); + int batch = net->batch; + int n = d.X.rows / batch; + + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + get_next_batch(d, batch, i*batch, net->input, net->truth); + float err = train_network_datum(net); + sum += err; + } + return (float)sum/(n*batch); +} + +void set_temp_network(network *net, float t) +{ + int i; + for(i = 0; i < net->n; ++i){ + net->layers[i].temperature = t; + } +} + + +void set_batch_network(network *net, int b) +{ + net->batch = b; + int i; + for(i = 0; i < net->n; ++i){ + net->layers[i].batch = b; +#ifdef CUDNN + if(net->layers[i].type == CONVOLUTIONAL){ + cudnn_convolutional_setup(net->layers + i); + } + if(net->layers[i].type == DECONVOLUTIONAL){ + layer *l = net->layers + i; + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + } +#endif + } +} + +int resize_network(network *net, int w, int h) +{ +#ifdef GPU + cuda_set_device(net->gpu_index); + cuda_free(net->workspace); +#endif + int i; + //if(w == net->w && h == net->h) return 0; + net->w = w; + net->h = h; + int inputs = 0; + size_t workspace_size = 0; + //fprintf(stderr, "Resizing to %d x %d...\n", w, h); + //fflush(stderr); + for (i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + resize_convolutional_layer(&l, w, h); + }else if(l.type == CROP){ + resize_crop_layer(&l, w, h); + }else if(l.type == MAXPOOL){ + resize_maxpool_layer(&l, w, h); + }else if(l.type == REGION){ + resize_region_layer(&l, w, h); + }else if(l.type == YOLO){ + resize_yolo_layer(&l, w, h); + }else if(l.type == ROUTE){ + resize_route_layer(&l, net); + }else if(l.type == SHORTCUT){ + resize_shortcut_layer(&l, w, h); + }else if(l.type == UPSAMPLE){ + resize_upsample_layer(&l, w, h); + }else if(l.type == REORG){ + resize_reorg_layer(&l, w, h); + }else if(l.type == AVGPOOL){ + resize_avgpool_layer(&l, w, h); + }else if(l.type == NORMALIZATION){ + resize_normalization_layer(&l, w, h); + }else if(l.type == COST){ + resize_cost_layer(&l, inputs); + }else{ + error("Cannot resize this type of layer"); + } + if(l.workspace_size > workspace_size) workspace_size = l.workspace_size; + if(l.workspace_size > 2000000000) assert(0); + inputs = l.outputs; + net->layers[i] = l; + w = l.out_w; + h = l.out_h; + if(l.type == AVGPOOL) break; + } + layer out = get_network_output_layer(net); + net->inputs = net->layers[0].inputs; + net->outputs = out.outputs; + net->truths = out.outputs; + if(net->layers[net->n-1].truths) net->truths = net->layers[net->n-1].truths; + net->output = out.output; + free(net->input); + free(net->truth); + net->input = calloc(net->inputs*net->batch, sizeof(float)); + net->truth = calloc(net->truths*net->batch, sizeof(float)); +#ifdef GPU + if(gpu_index >= 0){ + cuda_free(net->input_gpu); + cuda_free(net->truth_gpu); + net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch); + net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch); + if(workspace_size){ + net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1); + } + }else { + free(net->workspace); + net->workspace = calloc(1, workspace_size); + } +#else + free(net->workspace); + net->workspace = calloc(1, workspace_size); +#endif + //fprintf(stderr, " Done!\n"); + return 0; +} + +layer get_network_detection_layer(network *net) +{ + int i; + for(i = 0; i < net->n; ++i){ + if(net->layers[i].type == DETECTION){ + return net->layers[i]; + } + } + fprintf(stderr, "Detection layer not found!!\n"); + layer l = {0}; + return l; +} + +image get_network_image_layer(network *net, int i) +{ + layer l = net->layers[i]; +#ifdef GPU + //cuda_pull_array(l.output_gpu, l.output, l.outputs); +#endif + if (l.out_w && l.out_h && l.out_c){ + return float_to_image(l.out_w, l.out_h, l.out_c, l.output); + } + image def = {0}; + return def; +} + +image get_network_image(network *net) +{ + int i; + for(i = net->n-1; i >= 0; --i){ + image m = get_network_image_layer(net, i); + if(m.h != 0) return m; + } + image def = {0}; + return def; +} + +void visualize_network(network *net) +{ + image *prev = 0; + int i; + char buff[256]; + for(i = 0; i < net->n; ++i){ + sprintf(buff, "Layer %d", i); + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + prev = visualize_convolutional_layer(l, buff, prev); + } + } +} + +void top_predictions(network *net, int k, int *index) +{ + top_k(net->output, net->outputs, k, index); +} + + +float *network_predict(network *net, float *input) +{ + network orig = *net; + net->input = input; + net->truth = 0; + net->train = 0; + net->delta = 0; + forward_network(net); + float *out = net->output; + *net = orig; + return out; +} + +int num_detections(network *net, float thresh) +{ + int i; + int s = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO){ + s += yolo_num_detections(l, thresh); + } + if(l.type == DETECTION || l.type == REGION){ + s += l.w*l.h*l.n; + } + } + return s; +} + +detection *make_network_boxes(network *net, float thresh, int *num) +{ + layer l = net->layers[net->n - 1]; + int i; + int nboxes = num_detections(net, thresh); + if(num) *num = nboxes; + detection *dets = calloc(nboxes, sizeof(detection)); + for(i = 0; i < nboxes; ++i){ + dets[i].prob = calloc(l.classes, sizeof(float)); + if(l.coords > 4){ + dets[i].mask = calloc(l.coords-4, sizeof(float)); + } + } + return dets; +} + +void fill_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets) +{ + int j; + for(j = 0; j < net->n; ++j){ + layer l = net->layers[j]; + if(l.type == YOLO){ + int count = get_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets); + dets += count; + } + if(l.type == REGION){ + get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets); + dets += l.w*l.h*l.n; + } + if(l.type == DETECTION){ + get_detection_detections(l, w, h, thresh, dets); + dets += l.w*l.h*l.n; + } + } +} + +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num) +{ + detection *dets = make_network_boxes(net, thresh, num); + fill_network_boxes(net, w, h, thresh, hier, map, relative, dets); + return dets; +} + +void free_detections(detection *dets, int n) +{ + int i; + for(i = 0; i < n; ++i){ + free(dets[i].prob); + if(dets[i].mask) free(dets[i].mask); + } + free(dets); +} + +float *network_predict_image(network *net, image im) +{ + image imr = letterbox_image(im, net->w, net->h); + set_batch_network(net, 1); + float *p = network_predict(net, imr.data); + free_image(imr); + return p; +} + +int network_width(network *net){return net->w;} +int network_height(network *net){return net->h;} + +matrix network_predict_data_multi(network *net, data test, int n) +{ + int i,j,b,m; + int k = net->outputs; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.rows, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + } + for(m = 0; m < n; ++m){ + float *out = network_predict(net, X); + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + for(j = 0; j < k; ++j){ + pred.vals[i+b][j] += out[j+b*k]/n; + } + } + } + } + free(X); + return pred; +} + +matrix network_predict_data(network *net, data test) +{ + int i,j,b; + int k = net->outputs; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.cols, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + } + float *out = network_predict(net, X); + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + for(j = 0; j < k; ++j){ + pred.vals[i+b][j] = out[j+b*k]; + } + } + } + free(X); + return pred; +} + +void print_network(network *net) +{ + int i,j; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + float *output = l.output; + int n = l.outputs; + float mean = mean_array(output, n); + float vari = variance_array(output, n); + fprintf(stderr, "Layer %d - Mean: %f, Variance: %f\n",i,mean, vari); + if(n > 100) n = 100; + for(j = 0; j < n; ++j) fprintf(stderr, "%f, ", output[j]); + if(n == 100)fprintf(stderr,".....\n"); + fprintf(stderr, "\n"); + } +} + +void compare_networks(network *n1, network *n2, data test) +{ + matrix g1 = network_predict_data(n1, test); + matrix g2 = network_predict_data(n2, test); + int i; + int a,b,c,d; + a = b = c = d = 0; + for(i = 0; i < g1.rows; ++i){ + int truth = max_index(test.y.vals[i], test.y.cols); + int p1 = max_index(g1.vals[i], g1.cols); + int p2 = max_index(g2.vals[i], g2.cols); + if(p1 == truth){ + if(p2 == truth) ++d; + else ++c; + }else{ + if(p2 == truth) ++b; + else ++a; + } + } + printf("%5d %5d\n%5d %5d\n", a, b, c, d); + float num = pow((abs(b - c) - 1.), 2.); + float den = b + c; + printf("%f\n", num/den); +} + +float network_accuracy(network *net, data d) +{ + matrix guess = network_predict_data(net, d); + float acc = matrix_topk_accuracy(d.y, guess,1); + free_matrix(guess); + return acc; +} + +float *network_accuracies(network *net, data d, int n) +{ + static float acc[2]; + matrix guess = network_predict_data(net, d); + acc[0] = matrix_topk_accuracy(d.y, guess, 1); + acc[1] = matrix_topk_accuracy(d.y, guess, n); + free_matrix(guess); + return acc; +} + +layer get_network_output_layer(network *net) +{ + int i; + for(i = net->n - 1; i >= 0; --i){ + if(net->layers[i].type != COST) break; + } + return net->layers[i]; +} + +float network_accuracy_multi(network *net, data d, int n) +{ + matrix guess = network_predict_data_multi(net, d, n); + float acc = matrix_topk_accuracy(d.y, guess,1); + free_matrix(guess); + return acc; +} + +void free_network(network *net) +{ + int i; + for(i = 0; i < net->n; ++i){ + free_layer(net->layers[i]); + } + free(net->layers); + if(net->input) free(net->input); + if(net->truth) free(net->truth); +#ifdef GPU + if(net->input_gpu) cuda_free(net->input_gpu); + if(net->truth_gpu) cuda_free(net->truth_gpu); +#endif + free(net); +} + +// Some day... +// ^ What the hell is this comment for? + + +layer network_output_layer(network *net) +{ + int i; + for(i = net->n - 1; i >= 0; --i){ + if(net->layers[i].type != COST) break; + } + return net->layers[i]; +} + +int network_inputs(network *net) +{ + return net->layers[0].inputs; +} + +int network_outputs(network *net) +{ + return network_output_layer(net).outputs; +} + +float *network_output(network *net) +{ + return network_output_layer(net).output; +} + +#ifdef GPU + +void forward_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + cuda_push_array(net.input_gpu, net.input, net.inputs*net.batch); + if(net.truth){ + cuda_push_array(net.truth_gpu, net.truth, net.truths*net.batch); + } + + int i; + for(i = 0; i < net.n; ++i){ + net.index = i; + layer l = net.layers[i]; + if(l.delta_gpu){ + fill_gpu(l.outputs * l.batch, 0, l.delta_gpu, 1); + } + l.forward_gpu(l, net); + net.input_gpu = l.output_gpu; + net.input = l.output; + if(l.truth) { + net.truth_gpu = l.output_gpu; + net.truth = l.output; + } + } + pull_network_output(netp); + calc_network_cost(netp); +} + +void backward_network_gpu(network *netp) +{ + int i; + network net = *netp; + network orig = net; + cuda_set_device(net.gpu_index); + for(i = net.n-1; i >= 0; --i){ + // Ruihao + // fprintf(stderr, "backward_network_gpu Layer %d\n", i); + // Ruihao + layer l = net.layers[i]; + if(l.stopbackward) break; + if(i == 0){ + net = orig; + }else{ + layer prev = net.layers[i-1]; + net.input = prev.output; + net.delta = prev.delta; + net.input_gpu = prev.output_gpu; + net.delta_gpu = prev.delta_gpu; + } + net.index = i; + l.backward_gpu(l, net); + } + // Ruihao + // fprintf(stderr, "backward_network_gpu succeed\n"); + // Ruihao +} + +void update_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + int i; + update_args a = {0}; + a.batch = net.batch*net.subdivisions; + a.learning_rate = get_current_rate(netp); + a.momentum = net.momentum; + a.decay = net.decay; + a.adam = net.adam; + a.B1 = net.B1; + a.B2 = net.B2; + a.eps = net.eps; + ++*net.t; + a.t = (*net.t); + + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.update_gpu){ + l.update_gpu(l, a); + } + } +} + +void harmless_update_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + int i; + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.weight_updates_gpu) fill_gpu(l.nweights, 0, l.weight_updates_gpu, 1); + if(l.bias_updates_gpu) fill_gpu(l.nbiases, 0, l.bias_updates_gpu, 1); + if(l.scale_updates_gpu) fill_gpu(l.nbiases, 0, l.scale_updates_gpu, 1); + } +} + +typedef struct { + network *net; + data d; + float *err; +} train_args; + +void *train_thread(void *ptr) +{ + train_args args = *(train_args*)ptr; + free(ptr); + cuda_set_device(args.net->gpu_index); + *args.err = train_network(args.net, args.d); + return 0; +} + +pthread_t train_network_in_thread(network *net, data d, float *err) +{ + pthread_t thread; + train_args *ptr = (train_args *)calloc(1, sizeof(train_args)); + ptr->net = net; + ptr->d = d; + ptr->err = err; + if(pthread_create(&thread, 0, train_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void merge_weights(layer l, layer base) +{ + if (l.type == CONVOLUTIONAL) { + axpy_cpu(l.n, 1, l.bias_updates, 1, base.biases, 1); + axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weights, 1); + if (l.scales) { + axpy_cpu(l.n, 1, l.scale_updates, 1, base.scales, 1); + } + } else if(l.type == CONNECTED) { + axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.biases, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weights, 1); + } +} + +void scale_weights(layer l, float s) +{ + if (l.type == CONVOLUTIONAL) { + scal_cpu(l.n, s, l.biases, 1); + scal_cpu(l.nweights, s, l.weights, 1); + if (l.scales) { + scal_cpu(l.n, s, l.scales, 1); + } + } else if(l.type == CONNECTED) { + scal_cpu(l.outputs, s, l.biases, 1); + scal_cpu(l.outputs*l.inputs, s, l.weights, 1); + } +} + + +void pull_weights(layer l) +{ + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_pull_array(l.biases_gpu, l.bias_updates, l.n); + cuda_pull_array(l.weights_gpu, l.weight_updates, l.nweights); + if(l.scales) cuda_pull_array(l.scales_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_pull_array(l.biases_gpu, l.bias_updates, l.outputs); + cuda_pull_array(l.weights_gpu, l.weight_updates, l.outputs*l.inputs); + } +} + +void push_weights(layer l) +{ + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weights_gpu, l.weights, l.nweights); + if(l.scales) cuda_push_array(l.scales_gpu, l.scales, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.biases_gpu, l.biases, l.outputs); + cuda_push_array(l.weights_gpu, l.weights, l.outputs*l.inputs); + } +} + +void distribute_weights(layer l, layer base) +{ + if (l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL) { + cuda_push_array(l.biases_gpu, base.biases, l.n); + cuda_push_array(l.weights_gpu, base.weights, l.nweights); + if (base.scales) cuda_push_array(l.scales_gpu, base.scales, l.n); + } else if (l.type == CONNECTED) { + cuda_push_array(l.biases_gpu, base.biases, l.outputs); + cuda_push_array(l.weights_gpu, base.weights, l.outputs*l.inputs); + } +} + + +/* + + void pull_updates(layer l) + { + if(l.type == CONVOLUTIONAL){ + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + if(l.scale_updates) cuda_pull_array(l.scale_updates_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs); + } + } + + void push_updates(layer l) + { + if(l.type == CONVOLUTIONAL){ + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + if(l.scale_updates) cuda_push_array(l.scale_updates_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs); + } + } + + void update_layer(layer l, network net) + { + int update_batch = net.batch*net.subdivisions; + float rate = get_current_rate(net); + l.t = get_current_batch(net); + if(l.update_gpu){ + l.update_gpu(l, update_batch, rate*l.learning_rate_scale, net.momentum, net.decay); + } + } + void merge_updates(layer l, layer base) + { + if (l.type == CONVOLUTIONAL) { + axpy_cpu(l.n, 1, l.bias_updates, 1, base.bias_updates, 1); + axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weight_updates, 1); + if (l.scale_updates) { + axpy_cpu(l.n, 1, l.scale_updates, 1, base.scale_updates, 1); + } + } else if(l.type == CONNECTED) { + axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.bias_updates, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weight_updates, 1); + } + } + + void distribute_updates(layer l, layer base) + { + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.n); + cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.nweights); + if(base.scale_updates) cuda_push_array(l.scale_updates_gpu, base.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.outputs); + cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.outputs*l.inputs); + } + } + */ + +/* + void sync_layer(network *nets, int n, int j) + { + int i; + network net = nets[0]; + layer base = net.layers[j]; + scale_weights(base, 0); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i].gpu_index); + layer l = nets[i].layers[j]; + pull_weights(l); + merge_weights(l, base); + } + scale_weights(base, 1./n); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i].gpu_index); + layer l = nets[i].layers[j]; + distribute_weights(l, base); + } + } + */ + +void sync_layer(network **nets, int n, int j) +{ + int i; + network *net = nets[0]; + layer base = net->layers[j]; + scale_weights(base, 0); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i]->gpu_index); + layer l = nets[i]->layers[j]; + pull_weights(l); + merge_weights(l, base); + } + scale_weights(base, 1./n); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i]->gpu_index); + layer l = nets[i]->layers[j]; + distribute_weights(l, base); + } +} + +typedef struct{ + network **nets; + int n; + int j; +} sync_args; + +void *sync_layer_thread(void *ptr) +{ + sync_args args = *(sync_args*)ptr; + sync_layer(args.nets, args.n, args.j); + free(ptr); + return 0; +} + +pthread_t sync_layer_in_thread(network **nets, int n, int j) +{ + pthread_t thread; + sync_args *ptr = (sync_args *)calloc(1, sizeof(sync_args)); + ptr->nets = nets; + ptr->n = n; + ptr->j = j; + if(pthread_create(&thread, 0, sync_layer_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void sync_nets(network **nets, int n, int interval) +{ + int j; + int layers = nets[0]->n; + pthread_t *threads = (pthread_t *) calloc(layers, sizeof(pthread_t)); + + *(nets[0]->seen) += interval * (n-1) * nets[0]->batch * nets[0]->subdivisions; + for (j = 0; j < n; ++j){ + *(nets[j]->seen) = *(nets[0]->seen); + } + for (j = 0; j < layers; ++j) { + threads[j] = sync_layer_in_thread(nets, n, j); + } + for (j = 0; j < layers; ++j) { + pthread_join(threads[j], 0); + } + free(threads); +} + +float train_networks(network **nets, int n, data d, int interval) +{ + int i; + int batch = nets[0]->batch; + int subdivisions = nets[0]->subdivisions; + assert(batch * subdivisions * n == d.X.rows); + pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t)); + float *errors = (float *) calloc(n, sizeof(float)); + + float sum = 0; + for(i = 0; i < n; ++i){ + data p = get_data_part(d, i, n); + threads[i] = train_network_in_thread(nets[i], p, errors + i); + } + for(i = 0; i < n; ++i){ + pthread_join(threads[i], 0); + //printf("%f\n", errors[i]); + sum += errors[i]; + } + //cudaDeviceSynchronize(); + if (get_current_batch(nets[0]) % interval == 0) { + printf("Syncing... "); + fflush(stdout); + sync_nets(nets, n, interval); + printf("Done!\n"); + } + //cudaDeviceSynchronize(); + free(threads); + free(errors); + return (float)sum/(n); +} + +void pull_network_output(network *net) +{ + layer l = get_network_output_layer(net); + cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); +} + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/network.h b/workloads/realworld/uvm_prefetch/darknet/src/network.h new file mode 100644 index 0000000000000000000000000000000000000000..1b0dfd1aaa3e090c6ce276d26f24d127de2cb66d --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/network.h @@ -0,0 +1,29 @@ +// Oh boy, why am I about to do this.... +#ifndef NETWORK_H +#define NETWORK_H +#include "darknet.h" + +#include "image.h" +#include "layer.h" +#include "data.h" +#include "tree.h" + + +#ifdef GPU +void pull_network_output(network *net); +#endif + +void compare_networks(network *n1, network *n2, data d); +char *get_layer_string(LAYER_TYPE a); + +network *make_network(int n); + + +float network_accuracy_multi(network *net, data d, int n); +int get_predicted_class_network(network *net); +void print_network(network *net); +int resize_network(network *net, int w, int h); +void calc_network_cost(network *net); + +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/normalization_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/normalization_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..424714fe8653f79b57fd4cde625997749d8eff83 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/normalization_layer.c @@ -0,0 +1,151 @@ +#include "normalization_layer.h" +#include "blas.h" + +#include + +layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa) +{ + fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size); + layer layer = {0}; + layer.type = NORMALIZATION; + layer.batch = batch; + layer.h = layer.out_h = h; + layer.w = layer.out_w = w; + layer.c = layer.out_c = c; + layer.kappa = kappa; + layer.size = size; + layer.alpha = alpha; + layer.beta = beta; + layer.output = calloc(h * w * c * batch, sizeof(float)); + layer.delta = calloc(h * w * c * batch, sizeof(float)); + layer.squared = calloc(h * w * c * batch, sizeof(float)); + layer.norms = calloc(h * w * c * batch, sizeof(float)); + layer.inputs = w*h*c; + layer.outputs = layer.inputs; + + layer.forward = forward_normalization_layer; + layer.backward = backward_normalization_layer; + #ifdef GPU + layer.forward_gpu = forward_normalization_layer_gpu; + layer.backward_gpu = backward_normalization_layer_gpu; + + layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch); + layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch); + layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch); + layer.norms_gpu = cuda_make_array(layer.norms, h * w * c * batch); + #endif + return layer; +} + +void resize_normalization_layer(layer *layer, int w, int h) +{ + int c = layer->c; + int batch = layer->batch; + layer->h = h; + layer->w = w; + layer->out_h = h; + layer->out_w = w; + layer->inputs = w*h*c; + layer->outputs = layer->inputs; + layer->output = realloc(layer->output, h * w * c * batch * sizeof(float)); + layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float)); + layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float)); + layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float)); +#ifdef GPU + cuda_free(layer->output_gpu); + cuda_free(layer->delta_gpu); + cuda_free(layer->squared_gpu); + cuda_free(layer->norms_gpu); + layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch); + layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch); + layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch); + layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch); +#endif +} + +void forward_normalization_layer(const layer layer, network net) +{ + int k,b; + int w = layer.w; + int h = layer.h; + int c = layer.c; + scal_cpu(w*h*c*layer.batch, 0, layer.squared, 1); + + for(b = 0; b < layer.batch; ++b){ + float *squared = layer.squared + w*h*c*b; + float *norms = layer.norms + w*h*c*b; + float *input = net.input + w*h*c*b; + pow_cpu(w*h*c, 2, input, 1, squared, 1); + + const_cpu(w*h, layer.kappa, norms, 1); + for(k = 0; k < layer.size/2; ++k){ + axpy_cpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); + } + + for(k = 1; k < layer.c; ++k){ + copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); + int prev = k - ((layer.size-1)/2) - 1; + int next = k + (layer.size/2); + if(prev >= 0) axpy_cpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); + if(next < layer.c) axpy_cpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); + } + } + pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1); + mul_cpu(w*h*c*layer.batch, net.input, 1, layer.output, 1); +} + +void backward_normalization_layer(const layer layer, network net) +{ + // TODO This is approximate ;-) + // Also this should add in to delta instead of overwritting. + + int w = layer.w; + int h = layer.h; + int c = layer.c; + pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, net.delta, 1); + mul_cpu(w*h*c*layer.batch, layer.delta, 1, net.delta, 1); +} + +#ifdef GPU +void forward_normalization_layer_gpu(const layer layer, network net) +{ + int k,b; + int w = layer.w; + int h = layer.h; + int c = layer.c; + scal_gpu(w*h*c*layer.batch, 0, layer.squared_gpu, 1); + + for(b = 0; b < layer.batch; ++b){ + float *squared = layer.squared_gpu + w*h*c*b; + float *norms = layer.norms_gpu + w*h*c*b; + float *input = net.input_gpu + w*h*c*b; + pow_gpu(w*h*c, 2, input, 1, squared, 1); + + const_gpu(w*h, layer.kappa, norms, 1); + for(k = 0; k < layer.size/2; ++k){ + axpy_gpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); + } + + for(k = 1; k < layer.c; ++k){ + copy_gpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); + int prev = k - ((layer.size-1)/2) - 1; + int next = k + (layer.size/2); + if(prev >= 0) axpy_gpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); + if(next < layer.c) axpy_gpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); + } + } + pow_gpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1); + mul_gpu(w*h*c*layer.batch, net.input_gpu, 1, layer.output_gpu, 1); +} + +void backward_normalization_layer_gpu(const layer layer, network net) +{ + // TODO This is approximate ;-) + + int w = layer.w; + int h = layer.h; + int c = layer.c; + pow_gpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, net.delta_gpu, 1); + mul_gpu(w*h*c*layer.batch, layer.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/normalization_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/normalization_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..665baa5066282335b6625618ce07c2fcc833d952 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/normalization_layer.h @@ -0,0 +1,19 @@ +#ifndef NORMALIZATION_LAYER_H +#define NORMALIZATION_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa); +void resize_normalization_layer(layer *layer, int h, int w); +void forward_normalization_layer(const layer layer, network net); +void backward_normalization_layer(const layer layer, network net); +void visualize_normalization_layer(layer layer, char *window); + +#ifdef GPU +void forward_normalization_layer_gpu(const layer layer, network net); +void backward_normalization_layer_gpu(const layer layer, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/option_list.c b/workloads/realworld/uvm_prefetch/darknet/src/option_list.c new file mode 100644 index 0000000000000000000000000000000000000000..2f52781f8096fecc5e9d1db3cfbfa10685506b93 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/option_list.c @@ -0,0 +1,140 @@ +#include +#include +#include +#include "option_list.h" +#include "utils.h" + +list *read_data_cfg(char *filename) +{ + FILE *file = fopen(filename, "r"); + if(file == 0) file_error(filename); + char *line; + int nu = 0; + list *options = make_list(); + while((line=fgetl(file)) != 0){ + ++ nu; + strip(line); + switch(line[0]){ + case '\0': + case '#': + case ';': + free(line); + break; + default: + if(!read_option(line, options)){ + fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); + free(line); + } + break; + } + } + fclose(file); + return options; +} + +metadata get_metadata(char *file) +{ + metadata m = {0}; + list *options = read_data_cfg(file); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", 0); + if(!name_list) { + fprintf(stderr, "No names or labels found\n"); + } else { + m.names = get_labels(name_list); + } + m.classes = option_find_int(options, "classes", 2); + free_list(options); + return m; +} + +int read_option(char *s, list *options) +{ + size_t i; + size_t len = strlen(s); + char *val = 0; + for(i = 0; i < len; ++i){ + if(s[i] == '='){ + s[i] = '\0'; + val = s+i+1; + break; + } + } + if(i == len-1) return 0; + char *key = s; + option_insert(options, key, val); + return 1; +} + +void option_insert(list *l, char *key, char *val) +{ + kvp *p = malloc(sizeof(kvp)); + p->key = key; + p->val = val; + p->used = 0; + list_insert(l, p); +} + +void option_unused(list *l) +{ + node *n = l->front; + while(n){ + kvp *p = (kvp *)n->val; + if(!p->used){ + fprintf(stderr, "Unused field: '%s = %s'\n", p->key, p->val); + } + n = n->next; + } +} + +char *option_find(list *l, char *key) +{ + node *n = l->front; + while(n){ + kvp *p = (kvp *)n->val; + if(strcmp(p->key, key) == 0){ + p->used = 1; + return p->val; + } + n = n->next; + } + return 0; +} +char *option_find_str(list *l, char *key, char *def) +{ + char *v = option_find(l, key); + if(v) return v; + if(def) fprintf(stderr, "%s: Using default '%s'\n", key, def); + return def; +} + +int option_find_int(list *l, char *key, int def) +{ + char *v = option_find(l, key); + if(v) return atoi(v); + fprintf(stderr, "%s: Using default '%d'\n", key, def); + return def; +} + +int option_find_int_quiet(list *l, char *key, int def) +{ + char *v = option_find(l, key); + if(v) return atoi(v); + return def; +} + +float option_find_float_quiet(list *l, char *key, float def) +{ + char *v = option_find(l, key); + if(v) return atof(v); + return def; +} + +float option_find_float(list *l, char *key, float def) +{ + char *v = option_find(l, key); + if(v) return atof(v); + fprintf(stderr, "%s: Using default '%lf'\n", key, def); + return def; +} diff --git a/workloads/realworld/uvm_prefetch/darknet/src/option_list.h b/workloads/realworld/uvm_prefetch/darknet/src/option_list.h new file mode 100644 index 0000000000000000000000000000000000000000..844bd8724b77889d9ab6e6e70f62305e3339048c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/option_list.h @@ -0,0 +1,19 @@ +#ifndef OPTION_LIST_H +#define OPTION_LIST_H +#include "list.h" + +typedef struct{ + char *key; + char *val; + int used; +} kvp; + + +int read_option(char *s, list *options); +void option_insert(list *l, char *key, char *val); +char *option_find(list *l, char *key); +float option_find_float(list *l, char *key, float def); +float option_find_float_quiet(list *l, char *key, float def); +void option_unused(list *l); + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/parser.c b/workloads/realworld/uvm_prefetch/darknet/src/parser.c new file mode 100644 index 0000000000000000000000000000000000000000..c8141c9f2ddc95941900d11006ff583fadf22290 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/parser.c @@ -0,0 +1,1312 @@ +#include +#include +#include +#include + +#include "activation_layer.h" +#include "logistic_layer.h" +#include "l2norm_layer.h" +#include "activations.h" +#include "avgpool_layer.h" +#include "batchnorm_layer.h" +#include "blas.h" +#include "connected_layer.h" +#include "deconvolutional_layer.h" +#include "convolutional_layer.h" +#include "cost_layer.h" +#include "crnn_layer.h" +#include "crop_layer.h" +#include "detection_layer.h" +#include "dropout_layer.h" +#include "gru_layer.h" +#include "list.h" +#include "local_layer.h" +#include "maxpool_layer.h" +#include "normalization_layer.h" +#include "option_list.h" +#include "parser.h" +#include "region_layer.h" +#include "yolo_layer.h" +#include "iseg_layer.h" +#include "reorg_layer.h" +#include "rnn_layer.h" +#include "route_layer.h" +#include "upsample_layer.h" +#include "shortcut_layer.h" +#include "softmax_layer.h" +#include "lstm_layer.h" +#include "utils.h" + +typedef struct{ + char *type; + list *options; +}section; + +list *read_cfg(char *filename); + +LAYER_TYPE string_to_layer_type(char * type) +{ + + if (strcmp(type, "[shortcut]")==0) return SHORTCUT; + if (strcmp(type, "[crop]")==0) return CROP; + if (strcmp(type, "[cost]")==0) return COST; + if (strcmp(type, "[detection]")==0) return DETECTION; + if (strcmp(type, "[region]")==0) return REGION; + if (strcmp(type, "[yolo]")==0) return YOLO; + if (strcmp(type, "[iseg]")==0) return ISEG; + if (strcmp(type, "[local]")==0) return LOCAL; + if (strcmp(type, "[conv]")==0 + || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL; + if (strcmp(type, "[deconv]")==0 + || strcmp(type, "[deconvolutional]")==0) return DECONVOLUTIONAL; + if (strcmp(type, "[activation]")==0) return ACTIVE; + if (strcmp(type, "[logistic]")==0) return LOGXENT; + if (strcmp(type, "[l2norm]")==0) return L2NORM; + if (strcmp(type, "[net]")==0 + || strcmp(type, "[network]")==0) return NETWORK; + if (strcmp(type, "[crnn]")==0) return CRNN; + if (strcmp(type, "[gru]")==0) return GRU; + if (strcmp(type, "[lstm]") == 0) return LSTM; + if (strcmp(type, "[rnn]")==0) return RNN; + if (strcmp(type, "[conn]")==0 + || strcmp(type, "[connected]")==0) return CONNECTED; + if (strcmp(type, "[max]")==0 + || strcmp(type, "[maxpool]")==0) return MAXPOOL; + if (strcmp(type, "[reorg]")==0) return REORG; + if (strcmp(type, "[avg]")==0 + || strcmp(type, "[avgpool]")==0) return AVGPOOL; + if (strcmp(type, "[dropout]")==0) return DROPOUT; + if (strcmp(type, "[lrn]")==0 + || strcmp(type, "[normalization]")==0) return NORMALIZATION; + if (strcmp(type, "[batchnorm]")==0) return BATCHNORM; + if (strcmp(type, "[soft]")==0 + || strcmp(type, "[softmax]")==0) return SOFTMAX; + if (strcmp(type, "[route]")==0) return ROUTE; + if (strcmp(type, "[upsample]")==0) return UPSAMPLE; + return BLANK; +} + +void free_section(section *s) +{ + free(s->type); + node *n = s->options->front; + while(n){ + kvp *pair = (kvp *)n->val; + free(pair->key); + free(pair); + node *next = n->next; + free(n); + n = next; + } + free(s->options); + free(s); +} + +void parse_data(char *data, float *a, int n) +{ + int i; + if(!data) return; + char *curr = data; + char *next = data; + int done = 0; + for(i = 0; i < n && !done; ++i){ + while(*++next !='\0' && *next != ','); + if(*next == '\0') done = 1; + *next = '\0'; + sscanf(curr, "%g", &a[i]); + curr = next+1; + } +} + +typedef struct size_params{ + int batch; + int inputs; + int h; + int w; + int c; + int index; + int time_steps; + network *net; +} size_params; + +local_layer parse_local(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + int pad = option_find_int(options, "pad",0); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before local layer must output image."); + + local_layer layer = make_local_layer(batch,h,w,c,n,size,stride,pad,activation); + + return layer; +} + +layer parse_deconvolutional(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before deconvolutional layer must output image."); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + int pad = option_find_int_quiet(options, "pad",0); + int padding = option_find_int_quiet(options, "padding",0); + if(pad) padding = size/2; + + layer l = make_deconvolutional_layer(batch,h,w,c,n,size,stride,padding, activation, batch_normalize, params.net->adam); + + return l; +} + + +convolutional_layer parse_convolutional(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + int pad = option_find_int_quiet(options, "pad",0); + int padding = option_find_int_quiet(options, "padding",0); + int groups = option_find_int_quiet(options, "groups", 1); + if(pad) padding = size/2; + + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before convolutional layer must output image."); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + int binary = option_find_int_quiet(options, "binary", 0); + int xnor = option_find_int_quiet(options, "xnor", 0); + + convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,groups,size,stride,padding,activation, batch_normalize, binary, xnor, params.net->adam); + layer.flipped = option_find_int_quiet(options, "flipped", 0); + layer.dot = option_find_float_quiet(options, "dot", 0); + + return layer; +} + +layer parse_crnn(list *options, size_params params) +{ + int output_filters = option_find_int(options, "output_filters",1); + int hidden_filters = option_find_int(options, "hidden_filters",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_crnn_layer(params.batch, params.w, params.h, params.c, hidden_filters, output_filters, params.time_steps, activation, batch_normalize); + + l.shortcut = option_find_int_quiet(options, "shortcut", 0); + + return l; +} + +layer parse_rnn(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_rnn_layer(params.batch, params.inputs, output, params.time_steps, activation, batch_normalize, params.net->adam); + + l.shortcut = option_find_int_quiet(options, "shortcut", 0); + + return l; +} + +layer parse_gru(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_gru_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net->adam); + l.tanh = option_find_int_quiet(options, "tanh", 0); + + return l; +} + +layer parse_lstm(list *options, size_params params) +{ + int output = option_find_int(options, "output", 1); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_lstm_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net->adam); + + return l; +} + +layer parse_connected(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_connected_layer(params.batch, params.inputs, output, activation, batch_normalize, params.net->adam); + return l; +} + +layer parse_softmax(list *options, size_params params) +{ + int groups = option_find_int_quiet(options, "groups",1); + layer l = make_softmax_layer(params.batch, params.inputs, groups); + l.temperature = option_find_float_quiet(options, "temperature", 1); + char *tree_file = option_find_str(options, "tree", 0); + if (tree_file) l.softmax_tree = read_tree(tree_file); + l.w = params.w; + l.h = params.h; + l.c = params.c; + l.spatial = option_find_float_quiet(options, "spatial", 0); + l.noloss = option_find_int_quiet(options, "noloss", 0); + return l; +} + +int *parse_yolo_mask(char *a, int *num) +{ + int *mask = 0; + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + mask = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + int val = atoi(a); + mask[i] = val; + a = strchr(a, ',')+1; + } + *num = n; + } + return mask; +} + +layer parse_yolo(list *options, size_params params) +{ + int classes = option_find_int(options, "classes", 20); + int total = option_find_int(options, "num", 1); + int num = total; + + char *a = option_find_str(options, "mask", 0); + int *mask = parse_yolo_mask(a, &num); + layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes); + assert(l.outputs == params.inputs); + + l.max_boxes = option_find_int_quiet(options, "max",90); + l.jitter = option_find_float(options, "jitter", .2); + + l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); + l.truth_thresh = option_find_float(options, "truth_thresh", 1); + l.random = option_find_int_quiet(options, "random", 0); + + char *map_file = option_find_str(options, "map", 0); + if (map_file) l.map = read_map(map_file); + + a = option_find_str(options, "anchors", 0); + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + for(i = 0; i < n; ++i){ + float bias = atof(a); + l.biases[i] = bias; + a = strchr(a, ',')+1; + } + } + return l; +} + +layer parse_iseg(list *options, size_params params) +{ + int classes = option_find_int(options, "classes", 20); + int ids = option_find_int(options, "ids", 32); + layer l = make_iseg_layer(params.batch, params.w, params.h, classes, ids); + assert(l.outputs == params.inputs); + return l; +} + +layer parse_region(list *options, size_params params) +{ + int coords = option_find_int(options, "coords", 4); + int classes = option_find_int(options, "classes", 20); + int num = option_find_int(options, "num", 1); + + layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords); + assert(l.outputs == params.inputs); + + l.log = option_find_int_quiet(options, "log", 0); + l.sqrt = option_find_int_quiet(options, "sqrt", 0); + + l.softmax = option_find_int(options, "softmax", 0); + l.background = option_find_int_quiet(options, "background", 0); + l.max_boxes = option_find_int_quiet(options, "max",30); + l.jitter = option_find_float(options, "jitter", .2); + l.rescore = option_find_int_quiet(options, "rescore",0); + + l.thresh = option_find_float(options, "thresh", .5); + l.classfix = option_find_int_quiet(options, "classfix", 0); + l.absolute = option_find_int_quiet(options, "absolute", 0); + l.random = option_find_int_quiet(options, "random", 0); + + l.coord_scale = option_find_float(options, "coord_scale", 1); + l.object_scale = option_find_float(options, "object_scale", 1); + l.noobject_scale = option_find_float(options, "noobject_scale", 1); + l.mask_scale = option_find_float(options, "mask_scale", 1); + l.class_scale = option_find_float(options, "class_scale", 1); + l.bias_match = option_find_int_quiet(options, "bias_match",0); + + char *tree_file = option_find_str(options, "tree", 0); + if (tree_file) l.softmax_tree = read_tree(tree_file); + char *map_file = option_find_str(options, "map", 0); + if (map_file) l.map = read_map(map_file); + + char *a = option_find_str(options, "anchors", 0); + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + for(i = 0; i < n; ++i){ + float bias = atof(a); + l.biases[i] = bias; + a = strchr(a, ',')+1; + } + } + return l; +} + +detection_layer parse_detection(list *options, size_params params) +{ + int coords = option_find_int(options, "coords", 1); + int classes = option_find_int(options, "classes", 1); + int rescore = option_find_int(options, "rescore", 0); + int num = option_find_int(options, "num", 1); + int side = option_find_int(options, "side", 7); + detection_layer layer = make_detection_layer(params.batch, params.inputs, num, side, classes, coords, rescore); + + layer.softmax = option_find_int(options, "softmax", 0); + layer.sqrt = option_find_int(options, "sqrt", 0); + + layer.max_boxes = option_find_int_quiet(options, "max",90); + layer.coord_scale = option_find_float(options, "coord_scale", 1); + layer.forced = option_find_int(options, "forced", 0); + layer.object_scale = option_find_float(options, "object_scale", 1); + layer.noobject_scale = option_find_float(options, "noobject_scale", 1); + layer.class_scale = option_find_float(options, "class_scale", 1); + layer.jitter = option_find_float(options, "jitter", .2); + layer.random = option_find_int_quiet(options, "random", 0); + layer.reorg = option_find_int_quiet(options, "reorg", 0); + return layer; +} + +cost_layer parse_cost(list *options, size_params params) +{ + char *type_s = option_find_str(options, "type", "sse"); + COST_TYPE type = get_cost_type(type_s); + float scale = option_find_float_quiet(options, "scale",1); + cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale); + layer.ratio = option_find_float_quiet(options, "ratio",0); + layer.noobject_scale = option_find_float_quiet(options, "noobj", 1); + layer.thresh = option_find_float_quiet(options, "thresh",0); + return layer; +} + +crop_layer parse_crop(list *options, size_params params) +{ + int crop_height = option_find_int(options, "crop_height",1); + int crop_width = option_find_int(options, "crop_width",1); + int flip = option_find_int(options, "flip",0); + float angle = option_find_float(options, "angle",0); + float saturation = option_find_float(options, "saturation",1); + float exposure = option_find_float(options, "exposure",1); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before crop layer must output image."); + + int noadjust = option_find_int_quiet(options, "noadjust",0); + + crop_layer l = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip, angle, saturation, exposure); + l.shift = option_find_float(options, "shift", 0); + l.noadjust = noadjust; + return l; +} + +layer parse_reorg(list *options, size_params params) +{ + int stride = option_find_int(options, "stride",1); + int reverse = option_find_int_quiet(options, "reverse",0); + int flatten = option_find_int_quiet(options, "flatten",0); + int extra = option_find_int_quiet(options, "extra",0); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before reorg layer must output image."); + + layer layer = make_reorg_layer(batch,w,h,c,stride,reverse, flatten, extra); + return layer; +} + +maxpool_layer parse_maxpool(list *options, size_params params) +{ + int stride = option_find_int(options, "stride",1); + int size = option_find_int(options, "size",stride); + int padding = option_find_int_quiet(options, "padding", size-1); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before maxpool layer must output image."); + + maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride,padding); + return layer; +} + +avgpool_layer parse_avgpool(list *options, size_params params) +{ + int batch,w,h,c; + w = params.w; + h = params.h; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before avgpool layer must output image."); + + avgpool_layer layer = make_avgpool_layer(batch,w,h,c); + return layer; +} + +dropout_layer parse_dropout(list *options, size_params params) +{ + float probability = option_find_float(options, "probability", .5); + dropout_layer layer = make_dropout_layer(params.batch, params.inputs, probability); + layer.out_w = params.w; + layer.out_h = params.h; + layer.out_c = params.c; + return layer; +} + +layer parse_normalization(list *options, size_params params) +{ + float alpha = option_find_float(options, "alpha", .0001); + float beta = option_find_float(options, "beta" , .75); + float kappa = option_find_float(options, "kappa", 1); + int size = option_find_int(options, "size", 5); + layer l = make_normalization_layer(params.batch, params.w, params.h, params.c, size, alpha, beta, kappa); + return l; +} + +layer parse_batchnorm(list *options, size_params params) +{ + layer l = make_batchnorm_layer(params.batch, params.w, params.h, params.c); + return l; +} + +layer parse_shortcut(list *options, size_params params, network *net) +{ + char *l = option_find(options, "from"); + int index = atoi(l); + if(index < 0) index = params.index + index; + + int batch = params.batch; + layer from = net->layers[index]; + + layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c); + + char *activation_s = option_find_str(options, "activation", "linear"); + ACTIVATION activation = get_activation(activation_s); + s.activation = activation; + s.alpha = option_find_float_quiet(options, "alpha", 1); + s.beta = option_find_float_quiet(options, "beta", 1); + return s; +} + + +layer parse_l2norm(list *options, size_params params) +{ + layer l = make_l2norm_layer(params.batch, params.inputs); + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + return l; +} + + +layer parse_logistic(list *options, size_params params) +{ + layer l = make_logistic_layer(params.batch, params.inputs); + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + return l; +} + +layer parse_activation(list *options, size_params params) +{ + char *activation_s = option_find_str(options, "activation", "linear"); + ACTIVATION activation = get_activation(activation_s); + + layer l = make_activation_layer(params.batch, params.inputs, activation); + + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + + return l; +} + +layer parse_upsample(list *options, size_params params, network *net) +{ + + int stride = option_find_int(options, "stride",2); + layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride); + l.scale = option_find_float_quiet(options, "scale", 1); + return l; +} + +route_layer parse_route(list *options, size_params params, network *net) +{ + char *l = option_find(options, "layers"); + int len = strlen(l); + if(!l) error("Route Layer must specify input layers"); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (l[i] == ',') ++n; + } + + int *layers = calloc(n, sizeof(int)); + int *sizes = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + int index = atoi(l); + l = strchr(l, ',')+1; + if(index < 0) index = params.index + index; + layers[i] = index; + sizes[i] = net->layers[index].outputs; + } + int batch = params.batch; + + route_layer layer = make_route_layer(batch, n, layers, sizes); + + convolutional_layer first = net->layers[layers[0]]; + layer.out_w = first.out_w; + layer.out_h = first.out_h; + layer.out_c = first.out_c; + for(i = 1; i < n; ++i){ + int index = layers[i]; + convolutional_layer next = net->layers[index]; + if(next.out_w == first.out_w && next.out_h == first.out_h){ + layer.out_c += next.out_c; + }else{ + layer.out_h = layer.out_w = layer.out_c = 0; + } + } + + return layer; +} + +learning_rate_policy get_policy(char *s) +{ + if (strcmp(s, "random")==0) return RANDOM; + if (strcmp(s, "poly")==0) return POLY; + if (strcmp(s, "constant")==0) return CONSTANT; + if (strcmp(s, "step")==0) return STEP; + if (strcmp(s, "exp")==0) return EXP; + if (strcmp(s, "sigmoid")==0) return SIG; + if (strcmp(s, "steps")==0) return STEPS; + fprintf(stderr, "Couldn't find policy %s, going with constant\n", s); + return CONSTANT; +} + +void parse_net_options(list *options, network *net) +{ + net->batch = option_find_int(options, "batch",1); + net->learning_rate = option_find_float(options, "learning_rate", .001); + net->momentum = option_find_float(options, "momentum", .9); + net->decay = option_find_float(options, "decay", .0001); + int subdivs = option_find_int(options, "subdivisions",1); + net->time_steps = option_find_int_quiet(options, "time_steps",1); + net->notruth = option_find_int_quiet(options, "notruth",0); + net->batch /= subdivs; + net->batch *= net->time_steps; + net->subdivisions = subdivs; + net->random = option_find_int_quiet(options, "random", 0); + + net->adam = option_find_int_quiet(options, "adam", 0); + if(net->adam){ + net->B1 = option_find_float(options, "B1", .9); + net->B2 = option_find_float(options, "B2", .999); + net->eps = option_find_float(options, "eps", .0000001); + } + + net->h = option_find_int_quiet(options, "height",0); + net->w = option_find_int_quiet(options, "width",0); + net->c = option_find_int_quiet(options, "channels",0); + net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c); + net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2); + net->min_crop = option_find_int_quiet(options, "min_crop",net->w); + net->max_ratio = option_find_float_quiet(options, "max_ratio", (float) net->max_crop / net->w); + net->min_ratio = option_find_float_quiet(options, "min_ratio", (float) net->min_crop / net->w); + net->center = option_find_int_quiet(options, "center",0); + net->clip = option_find_float_quiet(options, "clip", 0); + + net->angle = option_find_float_quiet(options, "angle", 0); + net->aspect = option_find_float_quiet(options, "aspect", 1); + net->saturation = option_find_float_quiet(options, "saturation", 1); + net->exposure = option_find_float_quiet(options, "exposure", 1); + net->hue = option_find_float_quiet(options, "hue", 0); + + if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied"); + + char *policy_s = option_find_str(options, "policy", "constant"); + net->policy = get_policy(policy_s); + net->burn_in = option_find_int_quiet(options, "burn_in", 0); + net->power = option_find_float_quiet(options, "power", 4); + if(net->policy == STEP){ + net->step = option_find_int(options, "step", 1); + net->scale = option_find_float(options, "scale", 1); + } else if (net->policy == STEPS){ + char *l = option_find(options, "steps"); + char *p = option_find(options, "scales"); + if(!l || !p) error("STEPS policy must have steps and scales in cfg file"); + + int len = strlen(l); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (l[i] == ',') ++n; + } + int *steps = calloc(n, sizeof(int)); + float *scales = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + int step = atoi(l); + float scale = atof(p); + l = strchr(l, ',')+1; + p = strchr(p, ',')+1; + steps[i] = step; + scales[i] = scale; + } + net->scales = scales; + net->steps = steps; + net->num_steps = n; + } else if (net->policy == EXP){ + net->gamma = option_find_float(options, "gamma", 1); + } else if (net->policy == SIG){ + net->gamma = option_find_float(options, "gamma", 1); + net->step = option_find_int(options, "step", 1); + } else if (net->policy == POLY || net->policy == RANDOM){ + } + net->max_batches = option_find_int(options, "max_batches", 0); +} + +int is_network(section *s) +{ + return (strcmp(s->type, "[net]")==0 + || strcmp(s->type, "[network]")==0); +} + +network *parse_network_cfg(char *filename) +{ + list *sections = read_cfg(filename); + node *n = sections->front; + if(!n) error("Config file has no sections"); + network *net = make_network(sections->size - 1); + net->gpu_index = gpu_index; + size_params params; + + section *s = (section *)n->val; + list *options = s->options; + if(!is_network(s)) error("First section must be [net] or [network]"); + parse_net_options(options, net); + + params.h = net->h; + params.w = net->w; + params.c = net->c; + params.inputs = net->inputs; + params.batch = net->batch; + params.time_steps = net->time_steps; + params.net = net; + + size_t workspace_size = 0; + n = n->next; + int count = 0; + free_section(s); + fprintf(stderr, "layer filters size input output\n"); + while(n){ + params.index = count; + fprintf(stderr, "%5d ", count); + s = (section *)n->val; + options = s->options; + layer l = {0}; + LAYER_TYPE lt = string_to_layer_type(s->type); + if(lt == CONVOLUTIONAL){ + l = parse_convolutional(options, params); + }else if(lt == DECONVOLUTIONAL){ + l = parse_deconvolutional(options, params); + }else if(lt == LOCAL){ + l = parse_local(options, params); + }else if(lt == ACTIVE){ + l = parse_activation(options, params); + }else if(lt == LOGXENT){ + l = parse_logistic(options, params); + }else if(lt == L2NORM){ + l = parse_l2norm(options, params); + }else if(lt == RNN){ + l = parse_rnn(options, params); + }else if(lt == GRU){ + l = parse_gru(options, params); + }else if (lt == LSTM) { + l = parse_lstm(options, params); + }else if(lt == CRNN){ + l = parse_crnn(options, params); + }else if(lt == CONNECTED){ + l = parse_connected(options, params); + }else if(lt == CROP){ + l = parse_crop(options, params); + }else if(lt == COST){ + l = parse_cost(options, params); + }else if(lt == REGION){ + l = parse_region(options, params); + }else if(lt == YOLO){ + l = parse_yolo(options, params); + }else if(lt == ISEG){ + l = parse_iseg(options, params); + }else if(lt == DETECTION){ + l = parse_detection(options, params); + }else if(lt == SOFTMAX){ + l = parse_softmax(options, params); + net->hierarchy = l.softmax_tree; + }else if(lt == NORMALIZATION){ + l = parse_normalization(options, params); + }else if(lt == BATCHNORM){ + l = parse_batchnorm(options, params); + }else if(lt == MAXPOOL){ + l = parse_maxpool(options, params); + }else if(lt == REORG){ + l = parse_reorg(options, params); + }else if(lt == AVGPOOL){ + l = parse_avgpool(options, params); + }else if(lt == ROUTE){ + l = parse_route(options, params, net); + }else if(lt == UPSAMPLE){ + l = parse_upsample(options, params, net); + }else if(lt == SHORTCUT){ + l = parse_shortcut(options, params, net); + }else if(lt == DROPOUT){ + l = parse_dropout(options, params); + l.output = net->layers[count-1].output; + l.delta = net->layers[count-1].delta; +#ifdef GPU + l.output_gpu = net->layers[count-1].output_gpu; + l.delta_gpu = net->layers[count-1].delta_gpu; +#endif + }else{ + fprintf(stderr, "Type not recognized: %s\n", s->type); + } + l.clip = net->clip; + l.truth = option_find_int_quiet(options, "truth", 0); + l.onlyforward = option_find_int_quiet(options, "onlyforward", 0); + l.stopbackward = option_find_int_quiet(options, "stopbackward", 0); + l.dontsave = option_find_int_quiet(options, "dontsave", 0); + l.dontload = option_find_int_quiet(options, "dontload", 0); + l.numload = option_find_int_quiet(options, "numload", 0); + l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0); + l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1); + l.smooth = option_find_float_quiet(options, "smooth", 0); + option_unused(options); + net->layers[count] = l; + if (l.workspace_size > workspace_size) workspace_size = l.workspace_size; + free_section(s); + n = n->next; + ++count; + if(n){ + params.h = l.out_h; + params.w = l.out_w; + params.c = l.out_c; + params.inputs = l.outputs; + } + } + free_list(sections); + layer out = get_network_output_layer(net); + net->outputs = out.outputs; + net->truths = out.outputs; + if(net->layers[net->n-1].truths) net->truths = net->layers[net->n-1].truths; + net->output = out.output; + net->input = calloc(net->inputs*net->batch, sizeof(float)); + net->truth = calloc(net->truths*net->batch, sizeof(float)); +#ifdef GPU + net->output_gpu = out.output_gpu; + net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch); + net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch); +#endif + if(workspace_size){ + //printf("%ld\n", workspace_size); +#ifdef GPU + if(gpu_index >= 0){ + net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1); + }else { + net->workspace = calloc(1, workspace_size); + } +#else + net->workspace = calloc(1, workspace_size); +#endif + } + return net; +} + +list *read_cfg(char *filename) +{ + FILE *file = fopen(filename, "r"); + if(file == 0) file_error(filename); + char *line; + int nu = 0; + list *options = make_list(); + section *current = 0; + while((line=fgetl(file)) != 0){ + ++ nu; + strip(line); + switch(line[0]){ + case '[': + current = malloc(sizeof(section)); + list_insert(options, current); + current->options = make_list(); + current->type = line; + break; + case '\0': + case '#': + case ';': + free(line); + break; + default: + if(!read_option(line, current->options)){ + fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); + free(line); + } + break; + } + } + fclose(file); + return options; +} + +void save_convolutional_weights_binary(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_convolutional_layer(l); + } +#endif + binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.binary_weights); + int size = l.c*l.size*l.size; + int i, j, k; + fwrite(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.n, fp); + fwrite(l.rolling_mean, sizeof(float), l.n, fp); + fwrite(l.rolling_variance, sizeof(float), l.n, fp); + } + for(i = 0; i < l.n; ++i){ + float mean = l.binary_weights[i*size]; + if(mean < 0) mean = -mean; + fwrite(&mean, sizeof(float), 1, fp); + for(j = 0; j < size/8; ++j){ + int index = i*size + j*8; + unsigned char c = 0; + for(k = 0; k < 8; ++k){ + if (j*8 + k >= size) break; + if (l.binary_weights[index + k] > 0) c = (c | 1<= 0){ + pull_convolutional_layer(l); + } +#endif + int num = l.nweights; + fwrite(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.n, fp); + fwrite(l.rolling_mean, sizeof(float), l.n, fp); + fwrite(l.rolling_variance, sizeof(float), l.n, fp); + } + fwrite(l.weights, sizeof(float), num, fp); +} + +void save_batchnorm_weights(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_batchnorm_layer(l); + } +#endif + fwrite(l.scales, sizeof(float), l.c, fp); + fwrite(l.rolling_mean, sizeof(float), l.c, fp); + fwrite(l.rolling_variance, sizeof(float), l.c, fp); +} + +void save_connected_weights(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_connected_layer(l); + } +#endif + fwrite(l.biases, sizeof(float), l.outputs, fp); + fwrite(l.weights, sizeof(float), l.outputs*l.inputs, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.outputs, fp); + fwrite(l.rolling_mean, sizeof(float), l.outputs, fp); + fwrite(l.rolling_variance, sizeof(float), l.outputs, fp); + } +} + +void save_weights_upto(network *net, char *filename, int cutoff) +{ +#ifdef GPU + if(net->gpu_index >= 0){ + cuda_set_device(net->gpu_index); + } +#endif + fprintf(stderr, "Saving weights to %s\n", filename); + FILE *fp = fopen(filename, "wb"); + if(!fp) file_error(filename); + + int major = 0; + int minor = 2; + int revision = 0; + fwrite(&major, sizeof(int), 1, fp); + fwrite(&minor, sizeof(int), 1, fp); + fwrite(&revision, sizeof(int), 1, fp); + fwrite(net->seen, sizeof(size_t), 1, fp); + + int i; + for(i = 0; i < net->n && i < cutoff; ++i){ + layer l = net->layers[i]; + if (l.dontsave) continue; + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + save_convolutional_weights(l, fp); + } if(l.type == CONNECTED){ + save_connected_weights(l, fp); + } if(l.type == BATCHNORM){ + save_batchnorm_weights(l, fp); + } if(l.type == RNN){ + save_connected_weights(*(l.input_layer), fp); + save_connected_weights(*(l.self_layer), fp); + save_connected_weights(*(l.output_layer), fp); + } if (l.type == LSTM) { + save_connected_weights(*(l.wi), fp); + save_connected_weights(*(l.wf), fp); + save_connected_weights(*(l.wo), fp); + save_connected_weights(*(l.wg), fp); + save_connected_weights(*(l.ui), fp); + save_connected_weights(*(l.uf), fp); + save_connected_weights(*(l.uo), fp); + save_connected_weights(*(l.ug), fp); + } if (l.type == GRU) { + if(1){ + save_connected_weights(*(l.wz), fp); + save_connected_weights(*(l.wr), fp); + save_connected_weights(*(l.wh), fp); + save_connected_weights(*(l.uz), fp); + save_connected_weights(*(l.ur), fp); + save_connected_weights(*(l.uh), fp); + }else{ + save_connected_weights(*(l.reset_layer), fp); + save_connected_weights(*(l.update_layer), fp); + save_connected_weights(*(l.state_layer), fp); + } + } if(l.type == CRNN){ + save_convolutional_weights(*(l.input_layer), fp); + save_convolutional_weights(*(l.self_layer), fp); + save_convolutional_weights(*(l.output_layer), fp); + } if(l.type == LOCAL){ +#ifdef GPU + if(gpu_index >= 0){ + pull_local_layer(l); + } +#endif + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + fwrite(l.biases, sizeof(float), l.outputs, fp); + fwrite(l.weights, sizeof(float), size, fp); + } + } + fclose(fp); +} +void save_weights(network *net, char *filename) +{ + save_weights_upto(net, filename, net->n); +} + +void transpose_matrix(float *a, int rows, int cols) +{ + float *transpose = calloc(rows*cols, sizeof(float)); + int x, y; + for(x = 0; x < rows; ++x){ + for(y = 0; y < cols; ++y){ + transpose[y*rows + x] = a[x*cols + y]; + } + } + memcpy(a, transpose, rows*cols*sizeof(float)); + free(transpose); +} + +void load_connected_weights(layer l, FILE *fp, int transpose) +{ + fread(l.biases, sizeof(float), l.outputs, fp); + fread(l.weights, sizeof(float), l.outputs*l.inputs, fp); + if(transpose){ + transpose_matrix(l.weights, l.inputs, l.outputs); + } + //printf("Biases: %f mean %f variance\n", mean_array(l.biases, l.outputs), variance_array(l.biases, l.outputs)); + //printf("Weights: %f mean %f variance\n", mean_array(l.weights, l.outputs*l.inputs), variance_array(l.weights, l.outputs*l.inputs)); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.outputs, fp); + fread(l.rolling_mean, sizeof(float), l.outputs, fp); + fread(l.rolling_variance, sizeof(float), l.outputs, fp); + //printf("Scales: %f mean %f variance\n", mean_array(l.scales, l.outputs), variance_array(l.scales, l.outputs)); + //printf("rolling_mean: %f mean %f variance\n", mean_array(l.rolling_mean, l.outputs), variance_array(l.rolling_mean, l.outputs)); + //printf("rolling_variance: %f mean %f variance\n", mean_array(l.rolling_variance, l.outputs), variance_array(l.rolling_variance, l.outputs)); + } +#ifdef GPU + if(gpu_index >= 0){ + push_connected_layer(l); + } +#endif +} + +void load_batchnorm_weights(layer l, FILE *fp) +{ + fread(l.scales, sizeof(float), l.c, fp); + fread(l.rolling_mean, sizeof(float), l.c, fp); + fread(l.rolling_variance, sizeof(float), l.c, fp); +#ifdef GPU + if(gpu_index >= 0){ + push_batchnorm_layer(l); + } +#endif +} + +void load_convolutional_weights_binary(layer l, FILE *fp) +{ + fread(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.n, fp); + fread(l.rolling_mean, sizeof(float), l.n, fp); + fread(l.rolling_variance, sizeof(float), l.n, fp); + } + int size = l.c*l.size*l.size; + int i, j, k; + for(i = 0; i < l.n; ++i){ + float mean = 0; + fread(&mean, sizeof(float), 1, fp); + for(j = 0; j < size/8; ++j){ + int index = i*size + j*8; + unsigned char c = 0; + fread(&c, sizeof(char), 1, fp); + for(k = 0; k < 8; ++k){ + if (j*8 + k >= size) break; + l.weights[index + k] = (c & 1<= 0){ + push_convolutional_layer(l); + } +#endif +} + +void load_convolutional_weights(layer l, FILE *fp) +{ + if(l.binary){ + //load_convolutional_weights_binary(l, fp); + //return; + } + if(l.numload) l.n = l.numload; + int num = l.c/l.groups*l.n*l.size*l.size; + fread(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.n, fp); + fread(l.rolling_mean, sizeof(float), l.n, fp); + fread(l.rolling_variance, sizeof(float), l.n, fp); + if(0){ + int i; + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_mean[i]); + } + printf("\n"); + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_variance[i]); + } + printf("\n"); + } + if(0){ + fill_cpu(l.n, 0, l.rolling_mean, 1); + fill_cpu(l.n, 0, l.rolling_variance, 1); + } + if(0){ + int i; + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_mean[i]); + } + printf("\n"); + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_variance[i]); + } + printf("\n"); + } + } + fread(l.weights, sizeof(float), num, fp); + //if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1); + if (l.flipped) { + transpose_matrix(l.weights, l.c*l.size*l.size, l.n); + } + //if (l.binary) binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.weights); +#ifdef GPU + if(gpu_index >= 0){ + push_convolutional_layer(l); + } +#endif +} + + +void load_weights_upto(network *net, char *filename, int start, int cutoff) +{ +#ifdef GPU + if(net->gpu_index >= 0){ + cuda_set_device(net->gpu_index); + } +#endif + fprintf(stderr, "Loading weights from %s...", filename); + fflush(stdout); + FILE *fp = fopen(filename, "rb"); + if(!fp) file_error(filename); + + int major; + int minor; + int revision; + fread(&major, sizeof(int), 1, fp); + fread(&minor, sizeof(int), 1, fp); + fread(&revision, sizeof(int), 1, fp); + if ((major*10 + minor) >= 2 && major < 1000 && minor < 1000){ + fread(net->seen, sizeof(size_t), 1, fp); + } else { + int iseen = 0; + fread(&iseen, sizeof(int), 1, fp); + *net->seen = iseen; + } + int transpose = (major > 1000) || (minor > 1000); + + int i; + for(i = start; i < net->n && i < cutoff; ++i){ + layer l = net->layers[i]; + if (l.dontload) continue; + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + load_convolutional_weights(l, fp); + } + if(l.type == CONNECTED){ + load_connected_weights(l, fp, transpose); + } + if(l.type == BATCHNORM){ + load_batchnorm_weights(l, fp); + } + if(l.type == CRNN){ + load_convolutional_weights(*(l.input_layer), fp); + load_convolutional_weights(*(l.self_layer), fp); + load_convolutional_weights(*(l.output_layer), fp); + } + if(l.type == RNN){ + load_connected_weights(*(l.input_layer), fp, transpose); + load_connected_weights(*(l.self_layer), fp, transpose); + load_connected_weights(*(l.output_layer), fp, transpose); + } + if (l.type == LSTM) { + load_connected_weights(*(l.wi), fp, transpose); + load_connected_weights(*(l.wf), fp, transpose); + load_connected_weights(*(l.wo), fp, transpose); + load_connected_weights(*(l.wg), fp, transpose); + load_connected_weights(*(l.ui), fp, transpose); + load_connected_weights(*(l.uf), fp, transpose); + load_connected_weights(*(l.uo), fp, transpose); + load_connected_weights(*(l.ug), fp, transpose); + } + if (l.type == GRU) { + if(1){ + load_connected_weights(*(l.wz), fp, transpose); + load_connected_weights(*(l.wr), fp, transpose); + load_connected_weights(*(l.wh), fp, transpose); + load_connected_weights(*(l.uz), fp, transpose); + load_connected_weights(*(l.ur), fp, transpose); + load_connected_weights(*(l.uh), fp, transpose); + }else{ + load_connected_weights(*(l.reset_layer), fp, transpose); + load_connected_weights(*(l.update_layer), fp, transpose); + load_connected_weights(*(l.state_layer), fp, transpose); + } + } + if(l.type == LOCAL){ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + fread(l.biases, sizeof(float), l.outputs, fp); + fread(l.weights, sizeof(float), size, fp); +#ifdef GPU + if(gpu_index >= 0){ + push_local_layer(l); + } +#endif + } + } + fprintf(stderr, "Done!\n"); + fclose(fp); +} + +void load_weights(network *net, char *filename) +{ + load_weights_upto(net, filename, 0, net->n); +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/parser.h b/workloads/realworld/uvm_prefetch/darknet/src/parser.h new file mode 100644 index 0000000000000000000000000000000000000000..81aef2c86f3e6cb362f8bde9695ce9d5699ca77f --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/parser.h @@ -0,0 +1,9 @@ +#ifndef PARSER_H +#define PARSER_H +#include "darknet.h" +#include "network.h" + +void save_network(network net, char *filename); +void save_weights_double(network net, char *filename); + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/region_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/region_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..9df1b8bc252239ca520fa3dabeff55e5eb5959b8 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/region_layer.c @@ -0,0 +1,507 @@ +#include "region_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_region_layer(int batch, int w, int h, int n, int classes, int coords) +{ + layer l = {0}; + l.type = REGION; + + l.n = n; + l.batch = batch; + l.h = h; + l.w = w; + l.c = n*(classes + coords + 1); + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.coords = coords; + l.cost = calloc(1, sizeof(float)); + l.biases = calloc(n*2, sizeof(float)); + l.bias_updates = calloc(n*2, sizeof(float)); + l.outputs = h*w*n*(classes + coords + 1); + l.inputs = l.outputs; + l.truths = 30*(l.coords + 1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + int i; + for(i = 0; i < n*2; ++i){ + l.biases[i] = .5; + } + + l.forward = forward_region_layer; + l.backward = backward_region_layer; +#ifdef GPU + l.forward_gpu = forward_region_layer_gpu; + l.backward_gpu = backward_region_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "detection\n"); + srand(0); + + return l; +} + +void resize_region_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->n*(l->classes + l->coords + 1); + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride) +{ + box b; + b.x = (i + x[index + 0*stride]) / w; + b.y = (j + x[index + 1*stride]) / h; + b.w = exp(x[index + 2*stride]) * biases[2*n] / w; + b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h; + return b; +} + +float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale, int stride) +{ + box pred = get_region_box(x, biases, n, index, i, j, w, h, stride); + float iou = box_iou(pred, truth); + + float tx = (truth.x*w - i); + float ty = (truth.y*h - j); + float tw = log(truth.w*w / biases[2*n]); + float th = log(truth.h*h / biases[2*n + 1]); + + delta[index + 0*stride] = scale * (tx - x[index + 0*stride]); + delta[index + 1*stride] = scale * (ty - x[index + 1*stride]); + delta[index + 2*stride] = scale * (tw - x[index + 2*stride]); + delta[index + 3*stride] = scale * (th - x[index + 3*stride]); + return iou; +} + +void delta_region_mask(float *truth, float *x, int n, int index, float *delta, int stride, int scale) +{ + int i; + for(i = 0; i < n; ++i){ + delta[index + i*stride] = scale*(truth[i] - x[index + i*stride]); + } +} + + +void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, int stride, float *avg_cat, int tag) +{ + int i, n; + if(hier){ + float pred = 1; + while(class >= 0){ + pred *= output[index + stride*class]; + int g = hier->group[class]; + int offset = hier->group_offset[g]; + for(i = 0; i < hier->group_size[g]; ++i){ + delta[index + stride*(offset + i)] = scale * (0 - output[index + stride*(offset + i)]); + } + delta[index + stride*class] = scale * (1 - output[index + stride*class]); + + class = hier->parent[class]; + } + *avg_cat += pred; + } else { + if (delta[index] && tag){ + delta[index + stride*class] = scale * (1 - output[index + stride*class]); + return; + } + for(n = 0; n < classes; ++n){ + delta[index + stride*n] = scale * (((n == class)?1 : 0) - output[index + stride*n]); + if(n == class) *avg_cat += output[index + stride*n]; + } + } +} + +float logit(float x) +{ + return log(x/(1.-x)); +} + +float tisnan(float x) +{ + return (x != x); +} + +int entry_index(layer l, int batch, int location, int entry) +{ + int n = location / (l.w*l.h); + int loc = location % (l.w*l.h); + return batch*l.outputs + n*l.w*l.h*(l.coords+l.classes+1) + entry*l.w*l.h + loc; +} + +void forward_region_layer(const layer l, network net) +{ + int i,j,b,t,n; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) activate_array(l.output + index, l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords + 1); + if(!l.softmax && !l.softmax_tree) activate_array(l.output + index, l.classes*l.w*l.h, LOGISTIC); + } + } + if (l.softmax_tree){ + int i; + int count = l.coords + 1; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_cpu(net.input + count, group_size, l.batch, l.inputs, l.n*l.w*l.h, 1, l.n*l.w*l.h, l.temperature, l.output + count); + count += group_size; + } + } else if (l.softmax){ + int index = entry_index(l, 0, 0, l.coords + !l.background); + softmax_cpu(net.input + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output + index); + } +#endif + + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + if(!net.train) return; + float avg_iou = 0; + float recall = 0; + float avg_cat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + int class_count = 0; + *(l.cost) = 0; + for (b = 0; b < l.batch; ++b) { + if(l.softmax_tree){ + int onlyclass = 0; + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + if(!truth.x) break; + int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords]; + float maxp = 0; + int maxi = 0; + if(truth.x > 100000 && truth.y > 100000){ + for(n = 0; n < l.n*l.w*l.h; ++n){ + int class_index = entry_index(l, b, n, l.coords + 1); + int obj_index = entry_index(l, b, n, l.coords); + float scale = l.output[obj_index]; + l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]); + float p = scale*get_hierarchy_probability(l.output + class_index, l.softmax_tree, class, l.w*l.h); + if(p > maxp){ + maxp = p; + maxi = n; + } + } + int class_index = entry_index(l, b, maxi, l.coords + 1); + int obj_index = entry_index(l, b, maxi, l.coords); + delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax); + if(l.output[obj_index] < .3) l.delta[obj_index] = l.object_scale * (.3 - l.output[obj_index]); + else l.delta[obj_index] = 0; + l.delta[obj_index] = 0; + ++class_count; + onlyclass = 1; + break; + } + } + if(onlyclass) continue; + } + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h); + float best_iou = 0; + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + if(!truth.x) break; + float iou = box_iou(pred, truth); + if (iou > best_iou) { + best_iou = iou; + } + } + int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, l.coords); + avg_anyobj += l.output[obj_index]; + l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]); + if(l.background) l.delta[obj_index] = l.noobject_scale * (1 - l.output[obj_index]); + if (best_iou > l.thresh) { + l.delta[obj_index] = 0; + } + + if(*(net.seen) < 12800){ + box truth = {0}; + truth.x = (i + .5)/l.w; + truth.y = (j + .5)/l.h; + truth.w = l.biases[2*n]/l.w; + truth.h = l.biases[2*n+1]/l.h; + delta_region_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, l.delta, .01, l.w*l.h); + } + } + } + } + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + + if(!truth.x) break; + float best_iou = 0; + int best_n = 0; + i = (truth.x * l.w); + j = (truth.y * l.h); + box truth_shift = truth; + truth_shift.x = 0; + truth_shift.y = 0; + for(n = 0; n < l.n; ++n){ + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h); + if(l.bias_match){ + pred.w = l.biases[2*n]/l.w; + pred.h = l.biases[2*n+1]/l.h; + } + pred.x = 0; + pred.y = 0; + float iou = box_iou(pred, truth_shift); + if (iou > best_iou){ + best_iou = iou; + best_n = n; + } + } + + int box_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 0); + float iou = delta_region_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, l.delta, l.coord_scale * (2 - truth.w*truth.h), l.w*l.h); + if(l.coords > 4){ + int mask_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 4); + delta_region_mask(net.truth + t*(l.coords + 1) + b*l.truths + 5, l.output, l.coords - 4, mask_index, l.delta, l.w*l.h, l.mask_scale); + } + if(iou > .5) recall += 1; + avg_iou += iou; + + int obj_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords); + avg_obj += l.output[obj_index]; + l.delta[obj_index] = l.object_scale * (1 - l.output[obj_index]); + if (l.rescore) { + l.delta[obj_index] = l.object_scale * (iou - l.output[obj_index]); + } + if(l.background){ + l.delta[obj_index] = l.object_scale * (0 - l.output[obj_index]); + } + + int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords + 1); + delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax); + ++count; + ++class_count; + } + } + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count); +} + +void backward_region_layer(const layer l, network net) +{ + /* + int b; + int size = l.coords + l.classes + 1; + for (b = 0; b < l.batch*l.n; ++b){ + int index = (b*size + 4)*l.w*l.h; + gradient_array(l.output + index, l.w*l.h, LOGISTIC, l.delta + index); + } + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); + */ +} + +void correct_region_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +{ + int i; + int new_w=0; + int new_h=0; + if (((float)netw/w) < ((float)neth/h)) { + new_w = netw; + new_h = (h * netw)/w; + } else { + new_h = neth; + new_w = (w * neth)/h; + } + for (i = 0; i < n; ++i){ + box b = dets[i].bbox; + b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); + b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); + b.w *= (float)netw/new_w; + b.h *= (float)neth/new_h; + if(!relative){ + b.x *= w; + b.w *= w; + b.y *= h; + b.h *= h; + } + dets[i].bbox = b; + } +} + +void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets) +{ + int i,j,n,z; + float *predictions = l.output; + if (l.batch == 2) { + float *flip = l.output + l.outputs; + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w/2; ++i) { + for (n = 0; n < l.n; ++n) { + for(z = 0; z < l.classes + l.coords + 1; ++z){ + int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; + int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); + float swap = flip[i1]; + flip[i1] = flip[i2]; + flip[i2] = swap; + if(z == 0){ + flip[i1] = -flip[i1]; + flip[i2] = -flip[i2]; + } + } + } + } + } + for(i = 0; i < l.outputs; ++i){ + l.output[i] = (l.output[i] + flip[i])/2.; + } + } + for (i = 0; i < l.w*l.h; ++i){ + int row = i / l.w; + int col = i % l.w; + for(n = 0; n < l.n; ++n){ + int index = n*l.w*l.h + i; + for(j = 0; j < l.classes; ++j){ + dets[index].prob[j] = 0; + } + int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); + int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); + int mask_index = entry_index(l, 0, n*l.w*l.h + i, 4); + float scale = l.background ? 1 : predictions[obj_index]; + dets[index].bbox = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h); + dets[index].objectness = scale > thresh ? scale : 0; + if(dets[index].mask){ + for(j = 0; j < l.coords - 4; ++j){ + dets[index].mask[j] = l.output[mask_index + j*l.w*l.h]; + } + } + + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + !l.background); + if(l.softmax_tree){ + + hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0, l.w*l.h); + if(map){ + for(j = 0; j < 200; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + map[j]); + float prob = scale*predictions[class_index]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } else { + int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh, l.w*l.h); + dets[index].prob[j] = (scale > thresh) ? scale : 0; + } + } else { + if(dets[index].objectness){ + for(j = 0; j < l.classes; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + j); + float prob = scale*predictions[class_index]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } + } + } + } + correct_region_boxes(dets, l.w*l.h*l.n, w, h, netw, neth, relative); +} + +#ifdef GPU + +void forward_region_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + if(l.coords > 4){ + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC); + } + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) activate_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords + 1); + if(!l.softmax && !l.softmax_tree) activate_array_gpu(l.output_gpu + index, l.classes*l.w*l.h, LOGISTIC); + } + } + if (l.softmax_tree){ + int index = entry_index(l, 0, 0, l.coords + 1); + softmax_tree(net.input_gpu + index, l.w*l.h, l.batch*l.n, l.inputs/l.n, 1, l.output_gpu + index, *l.softmax_tree); + } else if (l.softmax) { + int index = entry_index(l, 0, 0, l.coords + !l.background); + softmax_gpu(net.input_gpu + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index); + } + if(!net.train || l.onlyforward){ + cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); + return; + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_region_layer(l, net); + //cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs); + if(!net.train) return; + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_region_layer_gpu(const layer l, network net) +{ + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + gradient_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC, l.delta_gpu + index); + if(l.coords > 4){ + index = entry_index(l, b, n*l.w*l.h, 4); + gradient_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC, l.delta_gpu + index); + } + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) gradient_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC, l.delta_gpu + index); + } + } + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + +void zero_objectness(layer l) +{ + int i, n; + for (i = 0; i < l.w*l.h; ++i){ + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); + l.output[obj_index] = 0; + } + } +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/region_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/region_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9f12fd187fd490d10cbc21af8251e0e2a870b7cb --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/region_layer.h @@ -0,0 +1,18 @@ +#ifndef REGION_LAYER_H +#define REGION_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_region_layer(int batch, int w, int h, int n, int classes, int coords); +void forward_region_layer(const layer l, network net); +void backward_region_layer(const layer l, network net); +void resize_region_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_region_layer_gpu(const layer l, network net); +void backward_region_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/reorg_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/reorg_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..b3634d53a5e01a8bbcf00e62a90f70f40108e1d7 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/reorg_layer.c @@ -0,0 +1,173 @@ +#include "reorg_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + + +layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra) +{ + layer l = {0}; + l.type = REORG; + l.batch = batch; + l.stride = stride; + l.extra = extra; + l.h = h; + l.w = w; + l.c = c; + l.flatten = flatten; + if(reverse){ + l.out_w = w*stride; + l.out_h = h*stride; + l.out_c = c/(stride*stride); + }else{ + l.out_w = w/stride; + l.out_h = h/stride; + l.out_c = c*(stride*stride); + } + l.reverse = reverse; + + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = h*w*c; + if(l.extra){ + l.out_w = l.out_h = l.out_c = 0; + l.outputs = l.inputs + l.extra; + } + + if(extra){ + fprintf(stderr, "reorg %4d -> %4d\n", l.inputs, l.outputs); + } else { + fprintf(stderr, "reorg /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + } + int output_size = l.outputs * batch; + l.output = calloc(output_size, sizeof(float)); + l.delta = calloc(output_size, sizeof(float)); + + l.forward = forward_reorg_layer; + l.backward = backward_reorg_layer; +#ifdef GPU + l.forward_gpu = forward_reorg_layer_gpu; + l.backward_gpu = backward_reorg_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); +#endif + return l; +} + +void resize_reorg_layer(layer *l, int w, int h) +{ + int stride = l->stride; + int c = l->c; + + l->h = h; + l->w = w; + + if(l->reverse){ + l->out_w = w*stride; + l->out_h = h*stride; + l->out_c = c/(stride*stride); + }else{ + l->out_w = w/stride; + l->out_h = h/stride; + l->out_c = c*(stride*stride); + } + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->outputs; + int output_size = l->outputs * l->batch; + + l->output = realloc(l->output, output_size * sizeof(float)); + l->delta = realloc(l->delta, output_size * sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, output_size); + l->delta_gpu = cuda_make_array(l->delta, output_size); +#endif +} + +void forward_reorg_layer(const layer l, network net) +{ + int i; + if(l.flatten){ + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + if(l.reverse){ + flatten(l.output, l.w*l.h, l.c, l.batch, 0); + }else{ + flatten(l.output, l.w*l.h, l.c, l.batch, 1); + } + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.inputs, net.input + i*l.inputs, 1, l.output + i*l.outputs, 1); + } + } else if (l.reverse){ + reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output); + } else { + reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output); + } +} + +void backward_reorg_layer(const layer l, network net) +{ + int i; + if(l.flatten){ + memcpy(net.delta, l.delta, l.outputs*l.batch*sizeof(float)); + if(l.reverse){ + flatten(net.delta, l.w*l.h, l.c, l.batch, 1); + }else{ + flatten(net.delta, l.w*l.h, l.c, l.batch, 0); + } + } else if(l.reverse){ + reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta); + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.inputs, l.delta + i*l.outputs, 1, net.delta + i*l.inputs, 1); + } + }else{ + reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta); + } +} + +#ifdef GPU +void forward_reorg_layer_gpu(layer l, network net) +{ + int i; + if(l.flatten){ + if(l.reverse){ + flatten_gpu(net.input_gpu, l.w*l.h, l.c, l.batch, 0, l.output_gpu); + }else{ + flatten_gpu(net.input_gpu, l.w*l.h, l.c, l.batch, 1, l.output_gpu); + } + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.inputs, net.input_gpu + i*l.inputs, 1, l.output_gpu + i*l.outputs, 1); + } + } else if (l.reverse) { + reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu); + }else { + reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu); + } +} + +void backward_reorg_layer_gpu(layer l, network net) +{ + if(l.flatten){ + if(l.reverse){ + flatten_gpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 1, net.delta_gpu); + }else{ + flatten_gpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 0, net.delta_gpu); + } + } else if (l.extra) { + int i; + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.inputs, l.delta_gpu + i*l.outputs, 1, net.delta_gpu + i*l.inputs, 1); + } + } else if(l.reverse){ + reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta_gpu); + } else { + reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta_gpu); + } +} +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/reorg_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/reorg_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1d1445f17d2874835ee19d033b50e09761374de3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/reorg_layer.h @@ -0,0 +1,20 @@ +#ifndef REORG_LAYER_H +#define REORG_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra); +void resize_reorg_layer(layer *l, int w, int h); +void forward_reorg_layer(const layer l, network net); +void backward_reorg_layer(const layer l, network net); + +#ifdef GPU +void forward_reorg_layer_gpu(layer l, network net); +void backward_reorg_layer_gpu(layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/rnn_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/rnn_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..c07e338caee5418657eb1127058419566d9ef787 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/rnn_layer.c @@ -0,0 +1,292 @@ +#include "rnn_layer.h" +#include "connected_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTIVATION activation, int batch_normalize, int adam) +{ + fprintf(stderr, "RNN Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = RNN; + l.steps = steps; + l.inputs = inputs; + + l.state = calloc(batch*outputs, sizeof(float)); + l.prev_state = calloc(batch*outputs, sizeof(float)); + + l.input_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.input_layer) = make_connected_layer(batch*steps, inputs, outputs, activation, batch_normalize, adam); + l.input_layer->batch = batch; + + l.self_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.self_layer) = make_connected_layer(batch*steps, outputs, outputs, activation, batch_normalize, adam); + l.self_layer->batch = batch; + + l.output_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.output_layer) = make_connected_layer(batch*steps, outputs, outputs, activation, batch_normalize, adam); + l.output_layer->batch = batch; + + l.outputs = outputs; + l.output = l.output_layer->output; + l.delta = l.output_layer->delta; + + l.forward = forward_rnn_layer; + l.backward = backward_rnn_layer; + l.update = update_rnn_layer; +#ifdef GPU + l.forward_gpu = forward_rnn_layer_gpu; + l.backward_gpu = backward_rnn_layer_gpu; + l.update_gpu = update_rnn_layer_gpu; + l.state_gpu = cuda_make_array(0, batch*outputs); + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.output_gpu = l.output_layer->output_gpu; + l.delta_gpu = l.output_layer->delta_gpu; +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.input_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.input_layer->out_c, l.input_layer->out_h, l.input_layer->out_w); + cudnnSetTensor4dDescriptor(l.self_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.self_layer->out_c, l.self_layer->out_h, l.self_layer->out_w); + cudnnSetTensor4dDescriptor(l.output_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.output_layer->out_c, l.output_layer->out_h, l.output_layer->out_w); +#endif +#endif + + return l; +} + +void update_rnn_layer(layer l, update_args a) +{ + update_connected_layer(*(l.input_layer), a); + update_connected_layer(*(l.self_layer), a); + update_connected_layer(*(l.output_layer), a); +} + +void forward_rnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, self_layer.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, input_layer.delta, 1); + if(net.train) fill_cpu(l.outputs * l.batch, 0, l.state, 1); + + for (i = 0; i < l.steps; ++i) { + s.input = net.input; + forward_connected_layer(input_layer, s); + + s.input = l.state; + forward_connected_layer(self_layer, s); + + float *old_state = l.state; + if(net.train) l.state += l.outputs*l.batch; + if(l.shortcut){ + copy_cpu(l.outputs * l.batch, old_state, 1, l.state, 1); + }else{ + fill_cpu(l.outputs * l.batch, 0, l.state, 1); + } + axpy_cpu(l.outputs * l.batch, 1, input_layer.output, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + forward_connected_layer(output_layer, s); + + net.input += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_rnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + increment_layer(&input_layer, l.steps-1); + increment_layer(&self_layer, l.steps-1); + increment_layer(&output_layer, l.steps-1); + + l.state += l.outputs*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_cpu(l.outputs * l.batch, input_layer.output, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + s.delta = self_layer.delta; + backward_connected_layer(output_layer, s); + + l.state -= l.outputs*l.batch; + /* + if(i > 0){ + copy_cpu(l.outputs * l.batch, input_layer.output - l.outputs*l.batch, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output - l.outputs*l.batch, 1, l.state, 1); + }else{ + fill_cpu(l.outputs * l.batch, 0, l.state, 1); + } + */ + + s.input = l.state; + s.delta = self_layer.delta - l.outputs*l.batch; + if (i == 0) s.delta = 0; + backward_connected_layer(self_layer, s); + + copy_cpu(l.outputs*l.batch, self_layer.delta, 1, input_layer.delta, 1); + if (i > 0 && l.shortcut) axpy_cpu(l.outputs*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.outputs*l.batch, 1); + s.input = net.input + i*l.inputs*l.batch; + if(net.delta) s.delta = net.delta + i*l.inputs*l.batch; + else s.delta = 0; + backward_connected_layer(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} + +#ifdef GPU + +void pull_rnn_layer(layer l) +{ + pull_connected_layer(*(l.input_layer)); + pull_connected_layer(*(l.self_layer)); + pull_connected_layer(*(l.output_layer)); +} + +void push_rnn_layer(layer l) +{ + push_connected_layer(*(l.input_layer)); + push_connected_layer(*(l.self_layer)); + push_connected_layer(*(l.output_layer)); +} + +void update_rnn_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.input_layer), a); + update_connected_layer_gpu(*(l.self_layer), a); + update_connected_layer_gpu(*(l.output_layer), a); +} + +void forward_rnn_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_gpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, self_layer.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, input_layer.delta_gpu, 1); + + if(net.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.prev_state_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = net.input_gpu; + forward_connected_layer_gpu(input_layer, s); + + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(self_layer, s); + + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(output_layer, s); + + net.input_gpu += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_rnn_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + increment_layer(&input_layer, l.steps - 1); + increment_layer(&self_layer, l.steps - 1); + increment_layer(&output_layer, l.steps - 1); + float *last_input = input_layer.output_gpu; + float *last_self = self_layer.output_gpu; + for (i = l.steps-1; i >= 0; --i) { + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu; + backward_connected_layer_gpu(output_layer, s); + + if(i != 0) { + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + }else { + copy_gpu(l.outputs*l.batch, l.prev_state_gpu, 1, l.state_gpu, 1); + } + + copy_gpu(l.outputs*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = (i > 0) ? self_layer.delta_gpu - l.outputs*l.batch : 0; + if (i == 0) s.delta_gpu = 0; + backward_connected_layer_gpu(self_layer, s); + + s.input_gpu = net.input_gpu + i*l.inputs*l.batch; + if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch; + else s.delta_gpu = 0; + backward_connected_layer_gpu(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, last_input, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, last_self, 1, l.state_gpu, 1); +} +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/rnn_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/rnn_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..270a63ffafca9a9adb7b995ed674f93c70bdeb51 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/rnn_layer.h @@ -0,0 +1,25 @@ + +#ifndef RNN_LAYER_H +#define RNN_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" +#define USET + +layer make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTIVATION activation, int batch_normalize, int adam); + +void forward_rnn_layer(layer l, network net); +void backward_rnn_layer(layer l, network net); +void update_rnn_layer(layer l, update_args a); + +#ifdef GPU +void forward_rnn_layer_gpu(layer l, network net); +void backward_rnn_layer_gpu(layer l, network net); +void update_rnn_layer_gpu(layer l, update_args a); +void push_rnn_layer(layer l); +void pull_rnn_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/route_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/route_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..608abe9a1c729eb6bdfd5e0d65c58196b51da496 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/route_layer.c @@ -0,0 +1,134 @@ +#include "route_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + +route_layer make_route_layer(int batch, int n, int *input_layers, int *input_sizes) +{ + fprintf(stderr,"route "); + route_layer l = {0}; + l.type = ROUTE; + l.batch = batch; + l.n = n; + l.input_layers = input_layers; + l.input_sizes = input_sizes; + int i; + int outputs = 0; + for(i = 0; i < n; ++i){ + fprintf(stderr," %d", input_layers[i]); + outputs += input_sizes[i]; + } + fprintf(stderr, "\n"); + l.outputs = outputs; + l.inputs = outputs; + l.delta = calloc(outputs*batch, sizeof(float)); + l.output = calloc(outputs*batch, sizeof(float));; + + l.forward = forward_route_layer; + l.backward = backward_route_layer; + #ifdef GPU + l.forward_gpu = forward_route_layer_gpu; + l.backward_gpu = backward_route_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, outputs*batch); + l.output_gpu = cuda_make_array(l.output, outputs*batch); + #endif + return l; +} + +void resize_route_layer(route_layer *l, network *net) +{ + int i; + layer first = net->layers[l->input_layers[0]]; + l->out_w = first.out_w; + l->out_h = first.out_h; + l->out_c = first.out_c; + l->outputs = first.outputs; + l->input_sizes[0] = first.outputs; + for(i = 1; i < l->n; ++i){ + int index = l->input_layers[i]; + layer next = net->layers[index]; + l->outputs += next.outputs; + l->input_sizes[i] = next.outputs; + if(next.out_w == first.out_w && next.out_h == first.out_h){ + l->out_c += next.out_c; + }else{ + printf("%d %d, %d %d\n", next.out_w, next.out_h, first.out_w, first.out_h); + l->out_h = l->out_w = l->out_c = 0; + } + } + l->inputs = l->outputs; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + +void forward_route_layer(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *input = net.layers[index].output; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1); + } + offset += input_size; + } +} + +void backward_route_layer(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *delta = net.layers[index].delta; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1); + } + offset += input_size; + } +} + +#ifdef GPU +void forward_route_layer_gpu(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *input = net.layers[index].output_gpu; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + copy_gpu(input_size, input + j*input_size, 1, l.output_gpu + offset + j*l.outputs, 1); + } + offset += input_size; + } +} + +void backward_route_layer_gpu(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *delta = net.layers[index].delta_gpu; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + axpy_gpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1); + } + offset += input_size; + } +} +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/route_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/route_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1d40330ff30c9c93a2180a696d5f67f628ea481c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/route_layer.h @@ -0,0 +1,18 @@ +#ifndef ROUTE_LAYER_H +#define ROUTE_LAYER_H +#include "network.h" +#include "layer.h" + +typedef layer route_layer; + +route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size); +void forward_route_layer(const route_layer l, network net); +void backward_route_layer(const route_layer l, network net); +void resize_route_layer(route_layer *l, network *net); + +#ifdef GPU +void forward_route_layer_gpu(const route_layer l, network net); +void backward_route_layer_gpu(const route_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/shortcut_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/shortcut_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..e5b9e14301c0a6b7e03b270824352f1ba40163cd --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/shortcut_layer.c @@ -0,0 +1,90 @@ +#include "shortcut_layer.h" +#include "cuda_dark.h" +#include "blas.h" +#include "activations.h" + +#include +#include + +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2) +{ + fprintf(stderr, "res %3d %4d x%4d x%4d -> %4d x%4d x%4d\n",index, w2,h2,c2, w,h,c); + layer l = {0}; + l.type = SHORTCUT; + l.batch = batch; + l.w = w2; + l.h = h2; + l.c = c2; + l.out_w = w; + l.out_h = h; + l.out_c = c; + l.outputs = w*h*c; + l.inputs = l.outputs; + + l.index = index; + + l.delta = calloc(l.outputs*batch, sizeof(float)); + l.output = calloc(l.outputs*batch, sizeof(float));; + + l.forward = forward_shortcut_layer; + l.backward = backward_shortcut_layer; + #ifdef GPU + l.forward_gpu = forward_shortcut_layer_gpu; + l.backward_gpu = backward_shortcut_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + #endif + return l; +} + +void resize_shortcut_layer(layer *l, int w, int h) +{ + assert(l->w == l->out_w); + assert(l->h == l->out_h); + l->w = l->out_w = w; + l->h = l->out_h = h; + l->outputs = w*h*l->out_c; + l->inputs = l->outputs; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + + +void forward_shortcut_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + shortcut_cpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output); + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_shortcut_layer(const layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + axpy_cpu(l.outputs*l.batch, l.alpha, l.delta, 1, net.delta, 1); + shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta); +} + +#ifdef GPU +void forward_shortcut_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + shortcut_gpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output_gpu); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_shortcut_layer_gpu(const layer l, network net) +{ + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + axpy_gpu(l.outputs*l.batch, l.alpha, l.delta_gpu, 1, net.delta_gpu, 1); + shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta_gpu); +} +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/shortcut_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/shortcut_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..5f684fc1eadea2c6902be96bf4a4bf9a3b533da9 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/shortcut_layer.h @@ -0,0 +1,17 @@ +#ifndef SHORTCUT_LAYER_H +#define SHORTCUT_LAYER_H + +#include "layer.h" +#include "network.h" + +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2); +void forward_shortcut_layer(const layer l, network net); +void backward_shortcut_layer(const layer l, network net); +void resize_shortcut_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_shortcut_layer_gpu(const layer l, network net); +void backward_shortcut_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/softmax_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/softmax_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..569b62b14097ed226d9939d8e1f1fd2899083ee6 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/softmax_layer.c @@ -0,0 +1,107 @@ +#include "softmax_layer.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +softmax_layer make_softmax_layer(int batch, int inputs, int groups) +{ + assert(inputs%groups == 0); + fprintf(stderr, "softmax %4d\n", inputs); + softmax_layer l = {0}; + l.type = SOFTMAX; + l.batch = batch; + l.groups = groups; + l.inputs = inputs; + l.outputs = inputs; + l.loss = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_softmax_layer; + l.backward = backward_softmax_layer; + #ifdef GPU + l.forward_gpu = forward_softmax_layer_gpu; + l.backward_gpu = backward_softmax_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.loss_gpu = cuda_make_array(l.loss, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_softmax_layer(const softmax_layer l, network net) +{ + if(l.softmax_tree){ + int i; + int count = 0; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_cpu(net.input + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output + count); + count += group_size; + } + } else { + softmax_cpu(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output); + } + + if(net.truth && !l.noloss){ + softmax_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_softmax_layer(const softmax_layer l, network net) +{ + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_softmax_layer_output(const softmax_layer layer) +{ + cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch); +} + +void forward_softmax_layer_gpu(const softmax_layer l, network net) +{ + if(l.softmax_tree){ + softmax_tree(net.input_gpu, 1, l.batch, l.inputs, l.temperature, l.output_gpu, *l.softmax_tree); + /* + int i; + int count = 0; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_gpu(net.input_gpu + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output_gpu + count); + count += group_size; + } + */ + } else { + if(l.spatial){ + softmax_gpu(net.input_gpu, l.c, l.batch*l.c, l.inputs/l.c, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu); + }else{ + softmax_gpu(net.input_gpu, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu); + } + } + if(net.truth && !l.noloss){ + softmax_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu); + if(l.softmax_tree){ + mask_gpu(l.batch*l.inputs, l.delta_gpu, SECRET_NUM, net.truth_gpu, 0); + mask_gpu(l.batch*l.inputs, l.loss_gpu, SECRET_NUM, net.truth_gpu, 0); + } + cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_softmax_layer_gpu(const softmax_layer layer, network net) +{ + axpy_gpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/softmax_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/softmax_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..2e3ffe01a6c5d273a9f6139bc9f265cd7e2bc860 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/softmax_layer.h @@ -0,0 +1,19 @@ +#ifndef SOFTMAX_LAYER_H +#define SOFTMAX_LAYER_H +#include "layer.h" +#include "network.h" + +typedef layer softmax_layer; + +void softmax_array(float *input, int n, float temp, float *output); +softmax_layer make_softmax_layer(int batch, int inputs, int groups); +void forward_softmax_layer(const softmax_layer l, network net); +void backward_softmax_layer(const softmax_layer l, network net); + +#ifdef GPU +void pull_softmax_layer_output(const softmax_layer l); +void forward_softmax_layer_gpu(const softmax_layer l, network net); +void backward_softmax_layer_gpu(const softmax_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/stb_image.h b/workloads/realworld/uvm_prefetch/darknet/src/stb_image.h new file mode 100644 index 0000000000000000000000000000000000000000..d9c21bc813f1f24de2a25ee3cc82bdce9413eaa5 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/stb_image.h @@ -0,0 +1,7462 @@ +/* stb_image - v2.19 - public domain image loader - http://nothings.org/stb + no warranty implied; use at your own risk + + Do this: + #define STB_IMAGE_IMPLEMENTATION + before you include this file in *one* C or C++ file to create the implementation. + + // i.e. it should look like this: + #include ... + #include ... + #include ... + #define STB_IMAGE_IMPLEMENTATION + #include "stb_image.h" + + You can #define STBI_ASSERT(x) before the #include to avoid using assert.h. + And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free + + + QUICK NOTES: + Primarily of interest to game developers and other people who can + avoid problematic images and only need the trivial interface + + JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) + PNG 1/2/4/8/16-bit-per-channel + + TGA (not sure what subset, if a subset) + BMP non-1bpp, non-RLE + PSD (composited view only, no extra channels, 8/16 bit-per-channel) + + GIF (*comp always reports as 4-channel) + HDR (radiance rgbE format) + PIC (Softimage PIC) + PNM (PPM and PGM binary only) + + Animated GIF still needs a proper API, but here's one way to do it: + http://gist.github.com/urraka/685d9a6340b26b830d49 + + - decode from memory or through FILE (define STBI_NO_STDIO to remove code) + - decode from arbitrary I/O callbacks + - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON) + + Full documentation under "DOCUMENTATION" below. + + +LICENSE + + See end of file for license information. + +RECENT REVISION HISTORY: + + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings + 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes + 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 + RGB-format JPEG; remove white matting in PSD; + allocate large structures on the stack; + correct channel count for PNG & BMP + 2.10 (2016-01-22) avoid warning introduced in 2.09 + 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED + + See end of file for full revision history. + + + ============================ Contributors ========================= + + Image formats Extensions, features + Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) + Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) + Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) + Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) + Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) + Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) + Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) + github:urraka (animated gif) Junggon Kim (PNM comments) + Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) + socks-the-fox (16-bit PNG) + Jeremy Sawicki (handle all ImageNet JPGs) + Optimizations & bugfixes Mikhail Morozov (1-bit BMP) + Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query) + Arseny Kapoulkine + John-Mark Allen + + Bug & warning fixes + Marc LeBlanc David Woo Guillaume George Martins Mozeiko + Christpher Lloyd Jerry Jansson Joseph Thomson Phil Jordan + Dave Moore Roy Eltham Hayaki Saito Nathan Reed + Won Chun Luke Graham Johan Duparc Nick Verigakis + the Horde3D community Thomas Ruf Ronny Chevalier github:rlyeh + Janez Zemva John Bartholomew Michal Cichon github:romigrou + Jonathan Blow Ken Hamada Tero Hanninen github:svdijk + Laurent Gomila Cort Stratton Sergio Gonzalez github:snagar + Aruelien Pocheville Thibault Reuille Cass Everitt github:Zelex + Ryamond Barbiero Paul Du Bois Engin Manap github:grim210 + Aldo Culquicondor Philipp Wiesemann Dale Weiler github:sammyhw + Oriol Ferrer Mesia Josh Tobin Matthew Gregan github:phprus + Julian Raschke Gregory Mullen Baldur Karlsson github:poppolopoppo + Christian Floisand Kevin Schmidt github:darealshinji + Blazej Dariusz Roszkowski github:Michaelangel007 +*/ + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H + +// DOCUMENTATION +// +// Limitations: +// - no 12-bit-per-channel JPEG +// - no JPEGs with arithmetic coding +// - GIF always returns *comp=4 +// +// Basic usage (see HDR discussion below for HDR usage): +// int x,y,n; +// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +// // ... process data if not NULL ... +// // ... x = width, y = height, n = # 8-bit components per pixel ... +// // ... replace '0' with '1'..'4' to force that many components per pixel +// // ... but 'n' will always be the number that it would have been if you said 0 +// stbi_image_free(data) +// +// Standard parameters: +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *channels_in_file -- outputs # of image components in image file +// int desired_channels -- if non-zero, # of image components requested in result +// +// The return value from an image loader is an 'unsigned char *' which points +// to the pixel data, or NULL on an allocation failure or if the image is +// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels, +// with each pixel consisting of N interleaved 8-bit components; the first +// pixel pointed to is top-left-most in the image. There is no padding between +// image scanlines or between pixels, regardless of format. The number of +// components N is 'desired_channels' if desired_channels is non-zero, or +// *channels_in_file otherwise. If desired_channels is non-zero, +// *channels_in_file has the number of components that _would_ have been +// output otherwise. E.g. if you set desired_channels to 4, you will always +// get RGBA output, but you can check *channels_in_file to see if it's trivially +// opaque because e.g. there were only 3 channels in the source image. +// +// An output image with N components has the following components interleaved +// in this order in each pixel: +// +// N=#comp components +// 1 grey +// 2 grey, alpha +// 3 red, green, blue +// 4 red, green, blue, alpha +// +// If image loading fails for any reason, the return value will be NULL, +// and *x, *y, *channels_in_file will be unchanged. The function +// stbi_failure_reason() can be queried for an extremely brief, end-user +// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS +// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// more user-friendly ones. +// +// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. +// +// =========================================================================== +// +// Philosophy +// +// stb libraries are designed with the following priorities: +// +// 1. easy to use +// 2. easy to maintain +// 3. good performance +// +// Sometimes I let "good performance" creep up in priority over "easy to maintain", +// and for best performance I may provide less-easy-to-use APIs that give higher +// performance, in addition to the easy to use ones. Nevertheless, it's important +// to keep in mind that from the standpoint of you, a client of this library, +// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all. +// +// Some secondary priorities arise directly from the first two, some of which +// make more explicit reasons why performance can't be emphasized. +// +// - Portable ("ease of use") +// - Small source code footprint ("easy to maintain") +// - No dependencies ("ease of use") +// +// =========================================================================== +// +// I/O callbacks +// +// I/O callbacks allow you to read from arbitrary sources, like packaged +// files or some other source. Data read from callbacks are processed +// through a small internal buffer (currently 128 bytes) to try to reduce +// overhead. +// +// The three functions you must define are "read" (reads some bytes of data), +// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end). +// +// =========================================================================== +// +// SIMD support +// +// The JPEG decoder will try to automatically use SIMD kernels on x86 when +// supported by the compiler. For ARM Neon support, you must explicitly +// request it. +// +// (The old do-it-yourself SIMD API is no longer supported in the current +// code.) +// +// On x86, SSE2 will automatically be used when available based on a run-time +// test; if not, the generic C versions are used as a fall-back. On ARM targets, +// the typical path is to have separate builds for NEON and non-NEON devices +// (at least this is true for iOS and Android). Therefore, the NEON support is +// toggled by a build flag: define STBI_NEON to get NEON loops. +// +// If for some reason you do not want to use any of SIMD code, or if +// you have issues compiling it, you can disable it entirely by +// defining STBI_NO_SIMD. +// +// =========================================================================== +// +// HDR image support (disable by defining STBI_NO_HDR) +// +// stb_image now supports loading HDR images in general, and currently +// the Radiance .HDR file format, although the support is provided +// generically. You can still load any file through the existing interface; +// if you attempt to load an HDR file, it will be automatically remapped to +// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// both of these constants can be reconfigured through this interface: +// +// stbi_hdr_to_ldr_gamma(2.2f); +// stbi_hdr_to_ldr_scale(1.0f); +// +// (note, do not use _inverse_ constants; stbi_image will invert them +// appropriately). +// +// Additionally, there is a new, parallel interface for loading files as +// (linear) floats to preserve the full dynamic range: +// +// float *data = stbi_loadf(filename, &x, &y, &n, 0); +// +// If you load LDR images through this interface, those images will +// be promoted to floating point values, run through the inverse of +// constants corresponding to the above: +// +// stbi_ldr_to_hdr_scale(1.0f); +// stbi_ldr_to_hdr_gamma(2.2f); +// +// Finally, given a filename (or an open file or memory block--see header +// file for details) containing image data, you can query for the "most +// appropriate" interface to use (that is, whether the image is HDR or +// not), using: +// +// stbi_is_hdr(char *filename); +// +// =========================================================================== +// +// iPhone PNG support: +// +// By default we convert iphone-formatted PNGs back to RGB, even though +// they are internally encoded differently. You can disable this conversion +// by by calling stbi_convert_iphone_png_to_rgb(0), in which case +// you will always just get the native iphone "format" through (which +// is BGR stored in RGB). +// +// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per +// pixel to remove any premultiplied alpha *only* if the image file explicitly +// says there's premultiplied data (currently only happens in iPhone images, +// and only if iPhone convert-to-rgb processing is on). +// +// =========================================================================== +// +// ADDITIONAL CONFIGURATION +// +// - You can suppress implementation of any of the decoders to reduce +// your code footprint by #defining one or more of the following +// symbols before creating the implementation. +// +// STBI_NO_JPEG +// STBI_NO_PNG +// STBI_NO_BMP +// STBI_NO_PSD +// STBI_NO_TGA +// STBI_NO_GIF +// STBI_NO_HDR +// STBI_NO_PIC +// STBI_NO_PNM (.ppm and .pgm) +// +// - You can request *only* certain decoders and suppress all other ones +// (this will be more forward-compatible, as addition of new decoders +// doesn't require you to disable them explicitly): +// +// STBI_ONLY_JPEG +// STBI_ONLY_PNG +// STBI_ONLY_BMP +// STBI_ONLY_PSD +// STBI_ONLY_TGA +// STBI_ONLY_GIF +// STBI_ONLY_HDR +// STBI_ONLY_PIC +// STBI_ONLY_PNM (.ppm and .pgm) +// +// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still +// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB +// + + +#ifndef STBI_NO_STDIO +#include +#endif // STBI_NO_STDIO + +#define STBI_VERSION 1 + +enum +{ + STBI_default = 0, // only used for desired_channels + + STBI_grey = 1, + STBI_grey_alpha = 2, + STBI_rgb = 3, + STBI_rgb_alpha = 4 +}; + +typedef unsigned char stbi_uc; +typedef unsigned short stbi_us; + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef STB_IMAGE_STATIC +#define STBIDEF static +#else +#define STBIDEF extern +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// PRIMARY API - works on images of any type +// + +// +// load image by filename, open file, or memory buffer +// + +typedef struct +{ + int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read + void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative + int (*eof) (void *user); // returns nonzero if we are at end of file/data +} stbi_io_callbacks; + +//////////////////////////////////// +// +// 8-bits-per-channel interface +// + +STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels); +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +#endif + + +#ifndef STBI_NO_STDIO +STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +// for stbi_load_from_file, file pointer is left pointing immediately after image +#endif + +//////////////////////////////////// +// +// 16-bits-per-channel interface +// + +STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +#endif + +//////////////////////////////////// +// +// float-per-channel interface +// +#ifndef STBI_NO_LINEAR + STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + + #ifndef STBI_NO_STDIO + STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); + #endif +#endif + +#ifndef STBI_NO_HDR + STBIDEF void stbi_hdr_to_ldr_gamma(float gamma); + STBIDEF void stbi_hdr_to_ldr_scale(float scale); +#endif // STBI_NO_HDR + +#ifndef STBI_NO_LINEAR + STBIDEF void stbi_ldr_to_hdr_gamma(float gamma); + STBIDEF void stbi_ldr_to_hdr_scale(float scale); +#endif // STBI_NO_LINEAR + +// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user); +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename); +STBIDEF int stbi_is_hdr_from_file(FILE *f); +#endif // STBI_NO_STDIO + + +// get a VERY brief reason for failure +// NOT THREADSAFE +STBIDEF const char *stbi_failure_reason (void); + +// free the loaded image -- this is just free() +STBIDEF void stbi_image_free (void *retval_from_stbi_load); + +// get image dimensions & components without fully decoding +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len); +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user); + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit (char const *filename); +STBIDEF int stbi_is_16_bit_from_file(FILE *f); +#endif + + + +// for image formats that explicitly notate that they have premultiplied alpha, +// we just return the colors as stored in the file. set this flag to force +// unpremultiplication. results are undefined if the unpremultiply overflow. +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply); + +// indicate whether we should process iphone images back to canonical format, +// or just pass them through "as-is" +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); + +// flip the image vertically, so the first pixel in the output array is the bottom left +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip); + +// ZLIB client - used by PNG, available for other purposes + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header); +STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + + +#ifdef __cplusplus +} +#endif + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBI_INCLUDE_STB_IMAGE_H + +#ifdef STB_IMAGE_IMPLEMENTATION + +#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \ + || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \ + || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \ + || defined(STBI_ONLY_ZLIB) + #ifndef STBI_ONLY_JPEG + #define STBI_NO_JPEG + #endif + #ifndef STBI_ONLY_PNG + #define STBI_NO_PNG + #endif + #ifndef STBI_ONLY_BMP + #define STBI_NO_BMP + #endif + #ifndef STBI_ONLY_PSD + #define STBI_NO_PSD + #endif + #ifndef STBI_ONLY_TGA + #define STBI_NO_TGA + #endif + #ifndef STBI_ONLY_GIF + #define STBI_NO_GIF + #endif + #ifndef STBI_ONLY_HDR + #define STBI_NO_HDR + #endif + #ifndef STBI_ONLY_PIC + #define STBI_NO_PIC + #endif + #ifndef STBI_ONLY_PNM + #define STBI_NO_PNM + #endif +#endif + +#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB) +#define STBI_NO_ZLIB +#endif + + +#include +#include // ptrdiff_t on osx +#include +#include +#include + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +#include // ldexp, pow +#endif + +#ifndef STBI_NO_STDIO +#include +#endif + +#ifndef STBI_ASSERT +#include +#define STBI_ASSERT(x) assert(x) +#endif + + +#ifndef _MSC_VER + #ifdef __cplusplus + #define stbi_inline inline + #else + #define stbi_inline + #endif +#else + #define stbi_inline __forceinline +#endif + + +#ifdef _MSC_VER +typedef unsigned short stbi__uint16; +typedef signed short stbi__int16; +typedef unsigned int stbi__uint32; +typedef signed int stbi__int32; +#else +#include +typedef uint16_t stbi__uint16; +typedef int16_t stbi__int16; +typedef uint32_t stbi__uint32; +typedef int32_t stbi__int32; +#endif + +// should produce compiler error if size is wrong +typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; + +#ifdef _MSC_VER +#define STBI_NOTUSED(v) (void)(v) +#else +#define STBI_NOTUSED(v) (void)sizeof(v) +#endif + +#ifdef _MSC_VER +#define STBI_HAS_LROTL +#endif + +#ifdef STBI_HAS_LROTL + #define stbi_lrot(x,y) _lrotl(x,y) +#else + #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y)))) +#endif + +#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) +// ok +#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)." +#endif + +#ifndef STBI_MALLOC +#define STBI_MALLOC(sz) malloc(sz) +#define STBI_REALLOC(p,newsz) realloc(p,newsz) +#define STBI_FREE(p) free(p) +#endif + +#ifndef STBI_REALLOC_SIZED +#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz) +#endif + +// x86/x64 detection +#if defined(__x86_64__) || defined(_M_X64) +#define STBI__X64_TARGET +#elif defined(__i386) || defined(_M_IX86) +#define STBI__X86_TARGET +#endif + +#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) +// gcc doesn't support sse2 intrinsics unless you compile with -msse2, +// which in turn means it gets to use SSE2 everywhere. This is unfortunate, +// but previous attempts to provide the SSE2 functions with runtime +// detection caused numerous issues. The way architecture extensions are +// exposed in GCC/Clang is, sadly, not really suited for one-file libs. +// New behavior: if compiled with -msse2, we use SSE2 without any +// detection; if not, we don't use it at all. +#define STBI_NO_SIMD +#endif + +#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD) +// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET +// +// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the +// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant. +// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not +// simultaneously enabling "-mstackrealign". +// +// See https://github.com/nothings/stb/issues/81 for more information. +// +// So default to no SSE2 on 32-bit MinGW. If you've read this far and added +// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2. +#define STBI_NO_SIMD +#endif + +#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) +#define STBI_SSE2 +#include + +#ifdef _MSC_VER + +#if _MSC_VER >= 1400 // not VC6 +#include // __cpuid +static int stbi__cpuid3(void) +{ + int info[4]; + __cpuid(info,1); + return info[3]; +} +#else +static int stbi__cpuid3(void) +{ + int res; + __asm { + mov eax,1 + cpuid + mov res,edx + } + return res; +} +#endif + +#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name + +static int stbi__sse2_available(void) +{ + int info3 = stbi__cpuid3(); + return ((info3 >> 26) & 1) != 0; +} +#else // assume GCC-style if not VC++ +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) + +static int stbi__sse2_available(void) +{ + // If we're even attempting to compile this on GCC/Clang, that means + // -msse2 is on, which means the compiler is allowed to use SSE2 + // instructions at will, and so are we. + return 1; +} +#endif +#endif + +// ARM NEON +#if defined(STBI_NO_SIMD) && defined(STBI_NEON) +#undef STBI_NEON +#endif + +#ifdef STBI_NEON +#include +// assume GCC or Clang on ARM targets +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) +#endif + +#ifndef STBI_SIMD_ALIGN +#define STBI_SIMD_ALIGN(type, name) type name +#endif + +/////////////////////////////////////////////// +// +// stbi__context struct and start_xxx functions + +// stbi__context structure is our basic context used by all images, so it +// contains all the IO context, plus some basic image information +typedef struct +{ + stbi__uint32 img_x, img_y; + int img_n, img_out_n; + + stbi_io_callbacks io; + void *io_user_data; + + int read_from_callbacks; + int buflen; + stbi_uc buffer_start[128]; + + stbi_uc *img_buffer, *img_buffer_end; + stbi_uc *img_buffer_original, *img_buffer_original_end; +} stbi__context; + + +static void stbi__refill_buffer(stbi__context *s); + +// initialize a memory-decode context +static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len) +{ + s->io.read = NULL; + s->read_from_callbacks = 0; + s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer; + s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len; +} + +// initialize a callback-based context +static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user) +{ + s->io = *c; + s->io_user_data = user; + s->buflen = sizeof(s->buffer_start); + s->read_from_callbacks = 1; + s->img_buffer_original = s->buffer_start; + stbi__refill_buffer(s); + s->img_buffer_original_end = s->img_buffer_end; +} + +#ifndef STBI_NO_STDIO + +static int stbi__stdio_read(void *user, char *data, int size) +{ + return (int) fread(data,1,size,(FILE*) user); +} + +static void stbi__stdio_skip(void *user, int n) +{ + fseek((FILE*) user, n, SEEK_CUR); +} + +static int stbi__stdio_eof(void *user) +{ + return feof((FILE*) user); +} + +static stbi_io_callbacks stbi__stdio_callbacks = +{ + stbi__stdio_read, + stbi__stdio_skip, + stbi__stdio_eof, +}; + +static void stbi__start_file(stbi__context *s, FILE *f) +{ + stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f); +} + +//static void stop_file(stbi__context *s) { } + +#endif // !STBI_NO_STDIO + +static void stbi__rewind(stbi__context *s) +{ + // conceptually rewind SHOULD rewind to the beginning of the stream, + // but we just rewind to the beginning of the initial buffer, because + // we only use it after doing 'test', which only ever looks at at most 92 bytes + s->img_buffer = s->img_buffer_original; + s->img_buffer_end = s->img_buffer_original_end; +} + +enum +{ + STBI_ORDER_RGB, + STBI_ORDER_BGR +}; + +typedef struct +{ + int bits_per_channel; + int num_channels; + int channel_order; +} stbi__result_info; + +#ifndef STBI_NO_JPEG +static int stbi__jpeg_test(stbi__context *s); +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNG +static int stbi__png_test(stbi__context *s); +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__png_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_BMP +static int stbi__bmp_test(stbi__context *s); +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_TGA +static int stbi__tga_test(stbi__context *s); +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s); +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__psd_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_HDR +static int stbi__hdr_test(stbi__context *s); +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_test(stbi__context *s); +static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_GIF +static int stbi__gif_test(stbi__context *s); +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNM +static int stbi__pnm_test(stbi__context *s); +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +// this is not threadsafe +static const char *stbi__g_failure_reason; + +STBIDEF const char *stbi_failure_reason(void) +{ + return stbi__g_failure_reason; +} + +static int stbi__err(const char *str) +{ + stbi__g_failure_reason = str; + return 0; +} + +static void *stbi__malloc(size_t size) +{ + return STBI_MALLOC(size); +} + +// stb_image uses ints pervasively, including for offset calculations. +// therefore the largest decoded image size we can support with the +// current code, even on 64-bit targets, is INT_MAX. this is not a +// significant limitation for the intended use case. +// +// we do, however, need to make sure our size calculations don't +// overflow. hence a few helper functions for size calculations that +// multiply integers together, making sure that they're non-negative +// and no overflow occurs. + +// return 1 if the sum is valid, 0 on overflow. +// negative terms are considered invalid. +static int stbi__addsizes_valid(int a, int b) +{ + if (b < 0) return 0; + // now 0 <= b <= INT_MAX, hence also + // 0 <= INT_MAX - b <= INTMAX. + // And "a + b <= INT_MAX" (which might overflow) is the + // same as a <= INT_MAX - b (no overflow) + return a <= INT_MAX - b; +} + +// returns 1 if the product is valid, 0 on overflow. +// negative factors are considered invalid. +static int stbi__mul2sizes_valid(int a, int b) +{ + if (a < 0 || b < 0) return 0; + if (b == 0) return 1; // mul-by-0 is always safe + // portable way to check for no overflows in a*b + return a <= INT_MAX/b; +} + +// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow +static int stbi__mad2sizes_valid(int a, int b, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add); +} + +// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow +static int stbi__mad3sizes_valid(int a, int b, int c, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__addsizes_valid(a*b*c, add); +} + +// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); +} +#endif + +// mallocs with size overflow checking +static void *stbi__malloc_mad2(int a, int b, int add) +{ + if (!stbi__mad2sizes_valid(a, b, add)) return NULL; + return stbi__malloc(a*b + add); +} + +static void *stbi__malloc_mad3(int a, int b, int c, int add) +{ + if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL; + return stbi__malloc(a*b*c + add); +} + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static void *stbi__malloc_mad4(int a, int b, int c, int d, int add) +{ + if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL; + return stbi__malloc(a*b*c*d + add); +} +#endif + +// stbi__err - error +// stbi__errpf - error returning pointer to float +// stbi__errpuc - error returning pointer to unsigned char + +#ifdef STBI_NO_FAILURE_STRINGS + #define stbi__err(x,y) 0 +#elif defined(STBI_FAILURE_USERMSG) + #define stbi__err(x,y) stbi__err(y) +#else + #define stbi__err(x,y) stbi__err(x) +#endif + +#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL)) +#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL)) + +STBIDEF void stbi_image_free(void *retval_from_stbi_load) +{ + STBI_FREE(retval_from_stbi_load); +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp); +#endif + +#ifndef STBI_NO_HDR +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp); +#endif + +static int stbi__vertically_flip_on_load = 0; + +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) +{ + stbi__vertically_flip_on_load = flag_true_if_should_flip; +} + +static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields + ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed + ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order + ri->num_channels = 0; + + #ifndef STBI_NO_JPEG + if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNG + if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_BMP + if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_GIF + if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PSD + if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc); + #endif + #ifndef STBI_NO_PIC + if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNM + if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri); + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); + return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + + #ifndef STBI_NO_TGA + // test tga last because it's a crappy test! + if (stbi__tga_test(s)) + return stbi__tga_load(s,x,y,comp,req_comp, ri); + #endif + + return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); +} + +static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi_uc *reduced; + + reduced = (stbi_uc *) stbi__malloc(img_len); + if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling + + STBI_FREE(orig); + return reduced; +} + +static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi__uint16 *enlarged; + + enlarged = (stbi__uint16 *) stbi__malloc(img_len*2); + if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff + + STBI_FREE(orig); + return enlarged; +} + +static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel) +{ + int row; + size_t bytes_per_row = (size_t)w * bytes_per_pixel; + stbi_uc temp[2048]; + stbi_uc *bytes = (stbi_uc *)image; + + for (row = 0; row < (h>>1); row++) { + stbi_uc *row0 = bytes + row*bytes_per_row; + stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; + // swap row0 with row1 + size_t bytes_left = bytes_per_row; + while (bytes_left) { + size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); + memcpy(temp, row0, bytes_copy); + memcpy(row0, row1, bytes_copy); + memcpy(row1, temp, bytes_copy); + row0 += bytes_copy; + row1 += bytes_copy; + bytes_left -= bytes_copy; + } + } +} + +static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel) +{ + int slice; + int slice_size = w * h * bytes_per_pixel; + + stbi_uc *bytes = (stbi_uc *)image; + for (slice = 0; slice < z; ++slice) { + stbi__vertical_flip(bytes, w, h, bytes_per_pixel); + bytes += slice_size; + } +} + +static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); + + if (result == NULL) + return NULL; + + if (ri.bits_per_channel != 8) { + STBI_ASSERT(ri.bits_per_channel == 16); + result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 8; + } + + // @TODO: move stbi__convert_format to here + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); + } + + return (unsigned char *) result; +} + +static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); + + if (result == NULL) + return NULL; + + if (ri.bits_per_channel != 16) { + STBI_ASSERT(ri.bits_per_channel == 8); + result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 16; + } + + // @TODO: move stbi__convert_format16 to here + // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); + } + + return (stbi__uint16 *) result; +} + +#if !defined(STBI_NO_HDR) || !defined(STBI_NO_LINEAR) +static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp) +{ + if (stbi__vertically_flip_on_load && result != NULL) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); + } +} +#endif + +#ifndef STBI_NO_STDIO + +static FILE *stbi__fopen(char const *filename, char const *mode) +{ + FILE *f; +#if defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != fopen_s(&f, filename, mode)) + f=0; +#else + f = fopen(filename, mode); +#endif + return f; +} + + +STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + unsigned char *result; + if (!f) return stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__uint16 *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + stbi__uint16 *result; + if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file_16(f,x,y,comp,req_comp); + fclose(f); + return result; +} + + +#endif //!STBI_NO_STDIO + +STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_mem(&s,buffer,len); + + result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp); + if (stbi__vertically_flip_on_load) { + stbi__vertical_flip_slices( result, *x, *y, *z, *comp ); + } + + return result; +} +#endif + +#ifndef STBI_NO_LINEAR +static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + stbi__result_info ri; + float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); + if (hdr_data) + stbi__float_postprocess(hdr_data,x,y,comp,req_comp); + return hdr_data; + } + #endif + data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp); + if (data) + return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); +} + +STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + float *result; + FILE *f = stbi__fopen(filename, "rb"); + if (!f) return stbi__errpf("can't fopen", "Unable to open file"); + result = stbi_loadf_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_file(&s,f); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} +#endif // !STBI_NO_STDIO + +#endif // !STBI_NO_LINEAR + +// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is +// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always +// reports false! + +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(buffer); + STBI_NOTUSED(len); + return 0; + #endif +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result=0; + if (f) { + result = stbi_is_hdr_from_file(f); + fclose(f); + } + return result; +} + +STBIDEF int stbi_is_hdr_from_file(FILE *f) +{ + #ifndef STBI_NO_HDR + long pos = ftell(f); + int res; + stbi__context s; + stbi__start_file(&s,f); + res = stbi__hdr_test(&s); + fseek(f, pos, SEEK_SET); + return res; + #else + STBI_NOTUSED(f); + return 0; + #endif +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(clbk); + STBI_NOTUSED(user); + return 0; + #endif +} + +#ifndef STBI_NO_LINEAR +static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f; + +STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; } +STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; } +#endif + +static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f; + +STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; } +STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; } + + +////////////////////////////////////////////////////////////////////////////// +// +// Common code used by all image loaders +// + +enum +{ + STBI__SCAN_load=0, + STBI__SCAN_type, + STBI__SCAN_header +}; + +static void stbi__refill_buffer(stbi__context *s) +{ + int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen); + if (n == 0) { + // at end of file, treat same as if from memory, but need to handle case + // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file + s->read_from_callbacks = 0; + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start+1; + *s->img_buffer = 0; + } else { + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start + n; + } +} + +stbi_inline static stbi_uc stbi__get8(stbi__context *s) +{ + if (s->img_buffer < s->img_buffer_end) + return *s->img_buffer++; + if (s->read_from_callbacks) { + stbi__refill_buffer(s); + return *s->img_buffer++; + } + return 0; +} + +stbi_inline static int stbi__at_eof(stbi__context *s) +{ + if (s->io.read) { + if (!(s->io.eof)(s->io_user_data)) return 0; + // if feof() is true, check if buffer = end + // special case: we've only got the special 0 character at the end + if (s->read_from_callbacks == 0) return 1; + } + + return s->img_buffer >= s->img_buffer_end; +} + +static void stbi__skip(stbi__context *s, int n) +{ + if (n < 0) { + s->img_buffer = s->img_buffer_end; + return; + } + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + s->img_buffer = s->img_buffer_end; + (s->io.skip)(s->io_user_data, n - blen); + return; + } + } + s->img_buffer += n; +} + +static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) +{ + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + int res, count; + + memcpy(buffer, s->img_buffer, blen); + + count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen); + res = (count == (n-blen)); + s->img_buffer = s->img_buffer_end; + return res; + } + } + + if (s->img_buffer+n <= s->img_buffer_end) { + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; + return 1; + } else + return 0; +} + +static int stbi__get16be(stbi__context *s) +{ + int z = stbi__get8(s); + return (z << 8) + stbi__get8(s); +} + +static stbi__uint32 stbi__get32be(stbi__context *s) +{ + stbi__uint32 z = stbi__get16be(s); + return (z << 16) + stbi__get16be(s); +} + +#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) +// nothing +#else +static int stbi__get16le(stbi__context *s) +{ + int z = stbi__get8(s); + return z + (stbi__get8(s) << 8); +} +#endif + +#ifndef STBI_NO_BMP +static stbi__uint32 stbi__get32le(stbi__context *s) +{ + stbi__uint32 z = stbi__get16le(s); + return z + (stbi__get16le(s) << 16); +} +#endif + +#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings + + +////////////////////////////////////////////////////////////////////////////// +// +// generic converter from built-in img_n to req_comp +// individual types do this automatically as much as possible (e.g. jpeg +// does all cases internally since it needs to colorspace convert anyway, +// and it never has alpha, so very few cases ). png can automatically +// interleave an alpha=255 channel, but falls back to this for other cases +// +// assume data buffer is malloced, so malloc a new one and free that one +// only failure mode is malloc failing + +static stbi_uc stbi__compute_y(int r, int g, int b) +{ + return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + unsigned char *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0); + if (good == NULL) { + STBI_FREE(data); + return stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; + default: STBI_ASSERT(0); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} + +static stbi__uint16 stbi__compute_y_16(int r, int g, int b) +{ + return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + stbi__uint16 *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2); + if (good == NULL) { + STBI_FREE(data); + return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + stbi__uint16 *src = data + j * x * img_n ; + stbi__uint16 *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; + default: STBI_ASSERT(0); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) +{ + int i,k,n; + float *output; + if (!data) return NULL; + output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); + } + if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f; + } + STBI_FREE(data); + return output; +} +#endif + +#ifndef STBI_NO_HDR +#define stbi__float2int(x) ((int) (x)) +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) +{ + int i,k,n; + stbi_uc *output; + if (!data) return NULL; + output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + if (k < comp) { + float z = data[i*comp+k] * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + } + STBI_FREE(data); + return output; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// "baseline" JPEG/JFIF decoder +// +// simple implementation +// - doesn't support delayed output of y-dimension +// - simple interface (only one output format: 8-bit interleaved RGB) +// - doesn't try to recover corrupt jpegs +// - doesn't allow partial loading, loading multiple at once +// - still fast on x86 (copying globals into locals doesn't help x86) +// - allocates lots of intermediate memory (full size of all components) +// - non-interleaved case requires this anyway +// - allows good upsampling (see next) +// high-quality +// - upsampled channels are bilinearly interpolated, even across blocks +// - quality integer IDCT derived from IJG's 'slow' +// performance +// - fast huffman; reasonable integer IDCT +// - some SIMD kernels for common paths on targets with SSE2/NEON +// - uses a lot of intermediate memory, could cache poorly + +#ifndef STBI_NO_JPEG + +// huffman decoding acceleration +#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache + +typedef struct +{ + stbi_uc fast[1 << FAST_BITS]; + // weirdly, repacking this into AoS is a 10% speed loss, instead of a win + stbi__uint16 code[256]; + stbi_uc values[256]; + stbi_uc size[257]; + unsigned int maxcode[18]; + int delta[17]; // old 'firstsymbol' - old 'firstcode' +} stbi__huffman; + +typedef struct +{ + stbi__context *s; + stbi__huffman huff_dc[4]; + stbi__huffman huff_ac[4]; + stbi__uint16 dequant[4][64]; + stbi__int16 fast_ac[4][1 << FAST_BITS]; + +// sizes for components, interleaved MCUs + int img_h_max, img_v_max; + int img_mcu_x, img_mcu_y; + int img_mcu_w, img_mcu_h; + +// definition of jpeg image component + struct + { + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + stbi_uc *data; + void *raw_data, *raw_coeff; + stbi_uc *linebuf; + short *coeff; // progressive only + int coeff_w, coeff_h; // number of 8x8 coefficient blocks + } img_comp[4]; + + stbi__uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int progressive; + int spec_start; + int spec_end; + int succ_high; + int succ_low; + int eob_run; + int jfif; + int app14_color_transform; // Adobe APP14 tag + int rgb; + + int scan_n, order[4]; + int restart_interval, todo; + +// kernels + void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]); + void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step); + stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs); +} stbi__jpeg; + +static int stbi__build_huffman(stbi__huffman *h, int *count) +{ + int i,j,k=0; + unsigned int code; + // build size list for each symbol (from JPEG spec) + for (i=0; i < 16; ++i) + for (j=0; j < count[i]; ++j) + h->size[k++] = (stbi_uc) (i+1); + h->size[k] = 0; + + // compute actual symbols (from jpeg spec) + code = 0; + k = 0; + for(j=1; j <= 16; ++j) { + // compute delta to add to code to compute symbol id + h->delta[j] = k - code; + if (h->size[k] == j) { + while (h->size[k] == j) + h->code[k++] = (stbi__uint16) (code++); + if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG"); + } + // compute largest code + 1 for this size, preshifted as needed later + h->maxcode[j] = code << (16-j); + code <<= 1; + } + h->maxcode[j] = 0xffffffff; + + // build non-spec acceleration table; 255 is flag for not-accelerated + memset(h->fast, 255, 1 << FAST_BITS); + for (i=0; i < k; ++i) { + int s = h->size[i]; + if (s <= FAST_BITS) { + int c = h->code[i] << (FAST_BITS-s); + int m = 1 << (FAST_BITS-s); + for (j=0; j < m; ++j) { + h->fast[c+j] = (stbi_uc) i; + } + } + } + return 1; +} + +// build a table that decodes both magnitude and value of small ACs in +// one go. +static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) +{ + int i; + for (i=0; i < (1 << FAST_BITS); ++i) { + stbi_uc fast = h->fast[i]; + fast_ac[i] = 0; + if (fast < 255) { + int rs = h->values[fast]; + int run = (rs >> 4) & 15; + int magbits = rs & 15; + int len = h->size[fast]; + + if (magbits && len + magbits <= FAST_BITS) { + // magnitude code followed by receive_extend code + int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); + int m = 1 << (magbits - 1); + if (k < m) k += (~0U << magbits) + 1; + // if the result is small enough, we can fit it in fast_ac table + if (k >= -128 && k <= 127) + fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits)); + } + } + } +} + +static void stbi__grow_buffer_unsafe(stbi__jpeg *j) +{ + do { + unsigned int b = j->nomore ? 0 : stbi__get8(j->s); + if (b == 0xff) { + int c = stbi__get8(j->s); + while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes + if (c != 0) { + j->marker = (unsigned char) c; + j->nomore = 1; + return; + } + } + j->code_buffer |= b << (24 - j->code_bits); + j->code_bits += 8; + } while (j->code_bits <= 24); +} + +// (1 << n) - 1 +static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; + +// decode a jpeg huffman value from the bitstream +stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) +{ + unsigned int temp; + int c,k; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + // look at the top FAST_BITS and determine what symbol ID it is, + // if the code is <= FAST_BITS + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + k = h->fast[c]; + if (k < 255) { + int s = h->size[k]; + if (s > j->code_bits) + return -1; + j->code_buffer <<= s; + j->code_bits -= s; + return h->values[k]; + } + + // naive test is to shift the code_buffer down so k bits are + // valid, then test against maxcode. To speed this up, we've + // preshifted maxcode left so that it has (16-k) 0s at the + // end; in other words, regardless of the number of bits, it + // wants to be compared against something shifted to have 16; + // that way we don't need to shift inside the loop. + temp = j->code_buffer >> 16; + for (k=FAST_BITS+1 ; ; ++k) + if (temp < h->maxcode[k]) + break; + if (k == 17) { + // error! code not found + j->code_bits -= 16; + return -1; + } + + if (k > j->code_bits) + return -1; + + // convert the huffman code to the symbol id + c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; + STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]); + + // convert the id to a symbol + j->code_bits -= k; + j->code_buffer <<= k; + return h->values[c]; +} + +// bias[n] = (-1<code_bits < n) stbi__grow_buffer_unsafe(j); + + sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB + k = stbi_lrot(j->code_buffer, n); + STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask))); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k + (stbi__jbias[n] & ~sgn); +} + +// get some unsigned bits +stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n) +{ + unsigned int k; + if (j->code_bits < n) stbi__grow_buffer_unsafe(j); + k = stbi_lrot(j->code_buffer, n); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k; +} + +stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) +{ + unsigned int k; + if (j->code_bits < 1) stbi__grow_buffer_unsafe(j); + k = j->code_buffer; + j->code_buffer <<= 1; + --j->code_bits; + return k & 0x80000000; +} + +// given a value that's at position X in the zigzag stream, +// where does it appear in the 8x8 matrix coded as row-major? +static const stbi_uc stbi__jpeg_dezigzag[64+15] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // let corrupt input sample past end + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63 +}; + +// decode one 64-entry block-- +static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant) +{ + int diff,dc,k; + int t; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + t = stbi__jpeg_huff_decode(j, hdc); + if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + + // 0 all the ac values now so we can do it 32-bits at a time + memset(data,0,64*sizeof(data[0])); + + diff = t ? stbi__extend_receive(j, t) : 0; + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc * dequant[0]); + + // decode AC components, see JPEG spec + k = 1; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) * dequant[zig]); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (rs != 0xf0) break; // end block + k += 16; + } else { + k += r; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]); + } + } + } while (k < 64); + return 1; +} + +static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b) +{ + int diff,dc; + int t; + if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + if (j->succ_high == 0) { + // first scan for DC coefficient, must be first + memset(data,0,64*sizeof(data[0])); // 0 all the ac values now + t = stbi__jpeg_huff_decode(j, hdc); + diff = t ? stbi__extend_receive(j, t) : 0; + + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc << j->succ_low); + } else { + // refinement scan for DC coefficient + if (stbi__jpeg_get_bit(j)) + data[0] += (short) (1 << j->succ_low); + } + return 1; +} + +// @OPTIMIZE: store non-zigzagged during the decode passes, +// and only de-zigzag when dequantizing +static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac) +{ + int k; + if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->succ_high == 0) { + int shift = j->succ_low; + + if (j->eob_run) { + --j->eob_run; + return 1; + } + + k = j->spec_start; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) << shift); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r); + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + --j->eob_run; + break; + } + k += 16; + } else { + k += r; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) << shift); + } + } + } while (k <= j->spec_end); + } else { + // refinement scan for these AC coefficients + + short bit = (short) (1 << j->succ_low); + + if (j->eob_run) { + --j->eob_run; + for (k = j->spec_start; k <= j->spec_end; ++k) { + short *p = &data[stbi__jpeg_dezigzag[k]]; + if (*p != 0) + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } + } else { + k = j->spec_start; + do { + int r,s; + int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r) - 1; + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + r = 64; // force end of block + } else { + // r=15 s=0 should write 16 0s, so we just do + // a run of 15 0s and then write s (which is 0), + // so we don't have to do anything special here + } + } else { + if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG"); + // sign bit + if (stbi__jpeg_get_bit(j)) + s = bit; + else + s = -bit; + } + + // advance by r + while (k <= j->spec_end) { + short *p = &data[stbi__jpeg_dezigzag[k++]]; + if (*p != 0) { + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } else { + if (r == 0) { + *p = (short) s; + break; + } + --r; + } + } + } while (k <= j->spec_end); + } + } + return 1; +} + +// take a -128..127 value and stbi__clamp it and convert to 0..255 +stbi_inline static stbi_uc stbi__clamp(int x) +{ + // trick to use a single test to catch both cases + if ((unsigned int) x > 255) { + if (x < 0) return 0; + if (x > 255) return 255; + } + return (stbi_uc) x; +} + +#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5))) +#define stbi__fsh(x) ((x) * 4096) + +// derived from jidctint -- DCT_ISLOW +#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ + int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2+p3) * stbi__f2f(0.5411961f); \ + t2 = p1 + p3*stbi__f2f(-1.847759065f); \ + t3 = p1 + p2*stbi__f2f( 0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = stbi__fsh(p2+p3); \ + t1 = stbi__fsh(p2-p3); \ + x0 = t0+t3; \ + x3 = t0-t3; \ + x1 = t1+t2; \ + x2 = t1-t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0+t2; \ + p4 = t1+t3; \ + p1 = t0+t3; \ + p2 = t1+t2; \ + p5 = (p3+p4)*stbi__f2f( 1.175875602f); \ + t0 = t0*stbi__f2f( 0.298631336f); \ + t1 = t1*stbi__f2f( 2.053119869f); \ + t2 = t2*stbi__f2f( 3.072711026f); \ + t3 = t3*stbi__f2f( 1.501321110f); \ + p1 = p5 + p1*stbi__f2f(-0.899976223f); \ + p2 = p5 + p2*stbi__f2f(-2.562915447f); \ + p3 = p3*stbi__f2f(-1.961570560f); \ + p4 = p4*stbi__f2f(-0.390180644f); \ + t3 += p1+p4; \ + t2 += p2+p3; \ + t1 += p2+p4; \ + t0 += p1+p3; + +static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) +{ + int i,val[64],*v=val; + stbi_uc *o; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0]*4; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + x0 += 65536 + (128<<17); + x1 += 65536 + (128<<17); + x2 += 65536 + (128<<17); + x3 += 65536 + (128<<17); + // tried computing the shifts into temps, or'ing the temps to see + // if any were out of range, but that was slower + o[0] = stbi__clamp((x0+t3) >> 17); + o[7] = stbi__clamp((x0-t3) >> 17); + o[1] = stbi__clamp((x1+t2) >> 17); + o[6] = stbi__clamp((x1-t2) >> 17); + o[2] = stbi__clamp((x2+t1) >> 17); + o[5] = stbi__clamp((x2-t1) >> 17); + o[3] = stbi__clamp((x3+t0) >> 17); + o[4] = stbi__clamp((x3-t0) >> 17); + } +} + +#ifdef STBI_SSE2 +// sse2 integer IDCT. not the fastest possible implementation but it +// produces bit-identical results to the generic C version so it's +// fully "transparent". +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + // This is constructed to match our regular (generic) integer IDCT exactly. + __m128i row0, row1, row2, row3, row4, row5, row6, row7; + __m128i tmp; + + // dot product constant: even elems=x, odd elems=y + #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y)) + + // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit) + // out(1) = c1[even]*x + c1[odd]*y + #define dct_rot(out0,out1, x,y,c0,c1) \ + __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \ + __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \ + __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ + __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ + __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ + __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) + + // out = in << 12 (in 16-bit, out 32-bit) + #define dct_widen(out, in) \ + __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ + __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) + + // wide add + #define dct_wadd(out, a, b) \ + __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_add_epi32(a##_h, b##_h) + + // wide sub + #define dct_wsub(out, a, b) \ + __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) + + // butterfly a/b, add bias, then shift by "s" and pack + #define dct_bfly32o(out0, out1, a,b,bias,s) \ + { \ + __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ + __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ + dct_wadd(sum, abiased, b); \ + dct_wsub(dif, abiased, b); \ + out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ + out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ + } + + // 8-bit interleave step (for transposes) + #define dct_interleave8(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi8(a, b); \ + b = _mm_unpackhi_epi8(tmp, b) + + // 16-bit interleave step (for transposes) + #define dct_interleave16(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi16(a, b); \ + b = _mm_unpackhi_epi16(tmp, b) + + #define dct_pass(bias,shift) \ + { \ + /* even part */ \ + dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \ + __m128i sum04 = _mm_add_epi16(row0, row4); \ + __m128i dif04 = _mm_sub_epi16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ + dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \ + __m128i sum17 = _mm_add_epi16(row1, row7); \ + __m128i sum35 = _mm_add_epi16(row3, row5); \ + dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \ + dct_wadd(x4, y0o, y4o); \ + dct_wadd(x5, y1o, y5o); \ + dct_wadd(x6, y2o, y5o); \ + dct_wadd(x7, y3o, y4o); \ + dct_bfly32o(row0,row7, x0,x7,bias,shift); \ + dct_bfly32o(row1,row6, x1,x6,bias,shift); \ + dct_bfly32o(row2,row5, x2,x5,bias,shift); \ + dct_bfly32o(row3,row4, x3,x4,bias,shift); \ + } + + __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); + __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f)); + __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f)); + __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f)); + __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f)); + __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f)); + __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f)); + __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f)); + + // rounding biases in column/row passes, see stbi__idct_block for explanation. + __m128i bias_0 = _mm_set1_epi32(512); + __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17)); + + // load + row0 = _mm_load_si128((const __m128i *) (data + 0*8)); + row1 = _mm_load_si128((const __m128i *) (data + 1*8)); + row2 = _mm_load_si128((const __m128i *) (data + 2*8)); + row3 = _mm_load_si128((const __m128i *) (data + 3*8)); + row4 = _mm_load_si128((const __m128i *) (data + 4*8)); + row5 = _mm_load_si128((const __m128i *) (data + 5*8)); + row6 = _mm_load_si128((const __m128i *) (data + 6*8)); + row7 = _mm_load_si128((const __m128i *) (data + 7*8)); + + // column pass + dct_pass(bias_0, 10); + + { + // 16bit 8x8 transpose pass 1 + dct_interleave16(row0, row4); + dct_interleave16(row1, row5); + dct_interleave16(row2, row6); + dct_interleave16(row3, row7); + + // transpose pass 2 + dct_interleave16(row0, row2); + dct_interleave16(row1, row3); + dct_interleave16(row4, row6); + dct_interleave16(row5, row7); + + // transpose pass 3 + dct_interleave16(row0, row1); + dct_interleave16(row2, row3); + dct_interleave16(row4, row5); + dct_interleave16(row6, row7); + } + + // row pass + dct_pass(bias_1, 17); + + { + // pack + __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 + __m128i p1 = _mm_packus_epi16(row2, row3); + __m128i p2 = _mm_packus_epi16(row4, row5); + __m128i p3 = _mm_packus_epi16(row6, row7); + + // 8bit 8x8 transpose pass 1 + dct_interleave8(p0, p2); // a0e0a1e1... + dct_interleave8(p1, p3); // c0g0c1g1... + + // transpose pass 2 + dct_interleave8(p0, p1); // a0c0e0g0... + dct_interleave8(p2, p3); // b0d0f0h0... + + // transpose pass 3 + dct_interleave8(p0, p2); // a0b0c0d0... + dct_interleave8(p1, p3); // a4b4c4d4... + + // store + _mm_storel_epi64((__m128i *) out, p0); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p2); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p1); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p3); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e)); + } + +#undef dct_const +#undef dct_rot +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_interleave8 +#undef dct_interleave16 +#undef dct_pass +} + +#endif // STBI_SSE2 + +#ifdef STBI_NEON + +// NEON integer IDCT. should produce bit-identical +// results to the generic C version. +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + int16x8_t row0, row1, row2, row3, row4, row5, row6, row7; + + int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f)); + int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f)); + int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f)); + int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f)); + int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f)); + int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f)); + int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f)); + int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f)); + int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f)); + int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f)); + int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f)); + int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f)); + +#define dct_long_mul(out, inq, coeff) \ + int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff) + +#define dct_long_mac(out, acc, inq, coeff) \ + int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff) + +#define dct_widen(out, inq) \ + int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \ + int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12) + +// wide add +#define dct_wadd(out, a, b) \ + int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vaddq_s32(a##_h, b##_h) + +// wide sub +#define dct_wsub(out, a, b) \ + int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vsubq_s32(a##_h, b##_h) + +// butterfly a/b, then shift using "shiftop" by "s" and pack +#define dct_bfly32o(out0,out1, a,b,shiftop,s) \ + { \ + dct_wadd(sum, a, b); \ + dct_wsub(dif, a, b); \ + out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ + out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ + } + +#define dct_pass(shiftop, shift) \ + { \ + /* even part */ \ + int16x8_t sum26 = vaddq_s16(row2, row6); \ + dct_long_mul(p1e, sum26, rot0_0); \ + dct_long_mac(t2e, p1e, row6, rot0_1); \ + dct_long_mac(t3e, p1e, row2, rot0_2); \ + int16x8_t sum04 = vaddq_s16(row0, row4); \ + int16x8_t dif04 = vsubq_s16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + int16x8_t sum15 = vaddq_s16(row1, row5); \ + int16x8_t sum17 = vaddq_s16(row1, row7); \ + int16x8_t sum35 = vaddq_s16(row3, row5); \ + int16x8_t sum37 = vaddq_s16(row3, row7); \ + int16x8_t sumodd = vaddq_s16(sum17, sum35); \ + dct_long_mul(p5o, sumodd, rot1_0); \ + dct_long_mac(p1o, p5o, sum17, rot1_1); \ + dct_long_mac(p2o, p5o, sum35, rot1_2); \ + dct_long_mul(p3o, sum37, rot2_0); \ + dct_long_mul(p4o, sum15, rot2_1); \ + dct_wadd(sump13o, p1o, p3o); \ + dct_wadd(sump24o, p2o, p4o); \ + dct_wadd(sump23o, p2o, p3o); \ + dct_wadd(sump14o, p1o, p4o); \ + dct_long_mac(x4, sump13o, row7, rot3_0); \ + dct_long_mac(x5, sump24o, row5, rot3_1); \ + dct_long_mac(x6, sump23o, row3, rot3_2); \ + dct_long_mac(x7, sump14o, row1, rot3_3); \ + dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \ + dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \ + dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \ + dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \ + } + + // load + row0 = vld1q_s16(data + 0*8); + row1 = vld1q_s16(data + 1*8); + row2 = vld1q_s16(data + 2*8); + row3 = vld1q_s16(data + 3*8); + row4 = vld1q_s16(data + 4*8); + row5 = vld1q_s16(data + 5*8); + row6 = vld1q_s16(data + 6*8); + row7 = vld1q_s16(data + 7*8); + + // add DC bias + row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0)); + + // column pass + dct_pass(vrshrn_n_s32, 10); + + // 16bit 8x8 transpose + { +// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively. +// whether compilers actually get this is another story, sadly. +#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); } +#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); } + + // pass 1 + dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 + dct_trn16(row2, row3); + dct_trn16(row4, row5); + dct_trn16(row6, row7); + + // pass 2 + dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 + dct_trn32(row1, row3); + dct_trn32(row4, row6); + dct_trn32(row5, row7); + + // pass 3 + dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 + dct_trn64(row1, row5); + dct_trn64(row2, row6); + dct_trn64(row3, row7); + +#undef dct_trn16 +#undef dct_trn32 +#undef dct_trn64 + } + + // row pass + // vrshrn_n_s32 only supports shifts up to 16, we need + // 17. so do a non-rounding shift of 16 first then follow + // up with a rounding shift by 1. + dct_pass(vshrn_n_s32, 16); + + { + // pack and round + uint8x8_t p0 = vqrshrun_n_s16(row0, 1); + uint8x8_t p1 = vqrshrun_n_s16(row1, 1); + uint8x8_t p2 = vqrshrun_n_s16(row2, 1); + uint8x8_t p3 = vqrshrun_n_s16(row3, 1); + uint8x8_t p4 = vqrshrun_n_s16(row4, 1); + uint8x8_t p5 = vqrshrun_n_s16(row5, 1); + uint8x8_t p6 = vqrshrun_n_s16(row6, 1); + uint8x8_t p7 = vqrshrun_n_s16(row7, 1); + + // again, these can translate into one instruction, but often don't. +#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); } +#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); } + + // sadly can't use interleaved stores here since we only write + // 8 bytes to each scan line! + + // 8x8 8-bit transpose pass 1 + dct_trn8_8(p0, p1); + dct_trn8_8(p2, p3); + dct_trn8_8(p4, p5); + dct_trn8_8(p6, p7); + + // pass 2 + dct_trn8_16(p0, p2); + dct_trn8_16(p1, p3); + dct_trn8_16(p4, p6); + dct_trn8_16(p5, p7); + + // pass 3 + dct_trn8_32(p0, p4); + dct_trn8_32(p1, p5); + dct_trn8_32(p2, p6); + dct_trn8_32(p3, p7); + + // store + vst1_u8(out, p0); out += out_stride; + vst1_u8(out, p1); out += out_stride; + vst1_u8(out, p2); out += out_stride; + vst1_u8(out, p3); out += out_stride; + vst1_u8(out, p4); out += out_stride; + vst1_u8(out, p5); out += out_stride; + vst1_u8(out, p6); out += out_stride; + vst1_u8(out, p7); + +#undef dct_trn8_8 +#undef dct_trn8_16 +#undef dct_trn8_32 + } + +#undef dct_long_mul +#undef dct_long_mac +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_pass +} + +#endif // STBI_NEON + +#define STBI__MARKER_none 0xff +// if there's a pending marker from the entropy stream, return that +// otherwise, fetch from the stream and get a marker. if there's no +// marker, return 0xff, which is never a valid marker value +static stbi_uc stbi__get_marker(stbi__jpeg *j) +{ + stbi_uc x; + if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; } + x = stbi__get8(j->s); + if (x != 0xff) return STBI__MARKER_none; + while (x == 0xff) + x = stbi__get8(j->s); // consume repeated 0xff fill bytes + return x; +} + +// in each scan, we'll have scan_n components, and the order +// of the components is specified by order[] +#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) + +// after a restart interval, stbi__jpeg_reset the entropy decoder and +// the dc prediction +static void stbi__jpeg_reset(stbi__jpeg *j) +{ + j->code_bits = 0; + j->code_buffer = 0; + j->nomore = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0; + j->marker = STBI__MARKER_none; + j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; + j->eob_run = 0; + // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, + // since we don't even allow 1<<30 pixels +} + +static int stbi__parse_entropy_coded_data(stbi__jpeg *z) +{ + stbi__jpeg_reset(z); + if (!z->progressive) { + if (z->scan_n == 1) { + int i,j; + STBI_SIMD_ALIGN(short, data[64]); + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + STBI_SIMD_ALIGN(short, data[64]); + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data); + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } else { + if (z->scan_n == 1) { + int i,j; + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + if (z->spec_start == 0) { + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } else { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha])) + return 0; + } + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x); + int y2 = (j*z->img_comp[n].v + y); + short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w); + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } +} + +static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant) +{ + int i; + for (i=0; i < 64; ++i) + data[i] *= dequant[i]; +} + +static void stbi__jpeg_finish(stbi__jpeg *z) +{ + if (z->progressive) { + // dequantize and idct the data + int i,j,n; + for (n=0; n < z->s->img_n; ++n) { + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + } + } + } + } +} + +static int stbi__process_marker(stbi__jpeg *z, int m) +{ + int L; + switch (m) { + case STBI__MARKER_none: // no marker found + return stbi__err("expected marker","Corrupt JPEG"); + + case 0xDD: // DRI - specify restart interval + if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG"); + z->restart_interval = stbi__get16be(z->s); + return 1; + + case 0xDB: // DQT - define quantization table + L = stbi__get16be(z->s)-2; + while (L > 0) { + int q = stbi__get8(z->s); + int p = q >> 4, sixteen = (p != 0); + int t = q & 15,i; + if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); + if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); + + for (i=0; i < 64; ++i) + z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); + L -= (sixteen ? 129 : 65); + } + return L==0; + + case 0xC4: // DHT - define huffman table + L = stbi__get16be(z->s)-2; + while (L > 0) { + stbi_uc *v; + int sizes[16],i,n=0; + int q = stbi__get8(z->s); + int tc = q >> 4; + int th = q & 15; + if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG"); + for (i=0; i < 16; ++i) { + sizes[i] = stbi__get8(z->s); + n += sizes[i]; + } + L -= 17; + if (tc == 0) { + if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; + } else { + if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; + } + for (i=0; i < n; ++i) + v[i] = stbi__get8(z->s); + if (tc != 0) + stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th); + L -= n; + } + return L==0; + } + + // check for comment block or APP blocks + if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { + L = stbi__get16be(z->s); + if (L < 2) { + if (m == 0xFE) + return stbi__err("bad COM len","Corrupt JPEG"); + else + return stbi__err("bad APP len","Corrupt JPEG"); + } + L -= 2; + + if (m == 0xE0 && L >= 5) { // JFIF APP0 segment + static const unsigned char tag[5] = {'J','F','I','F','\0'}; + int ok = 1; + int i; + for (i=0; i < 5; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 5; + if (ok) + z->jfif = 1; + } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment + static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; + int ok = 1; + int i; + for (i=0; i < 6; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 6; + if (ok) { + stbi__get8(z->s); // version + stbi__get16be(z->s); // flags0 + stbi__get16be(z->s); // flags1 + z->app14_color_transform = stbi__get8(z->s); // color transform + L -= 6; + } + } + + stbi__skip(z->s, L); + return 1; + } + + return stbi__err("unknown marker","Corrupt JPEG"); +} + +// after we see SOS +static int stbi__process_scan_header(stbi__jpeg *z) +{ + int i; + int Ls = stbi__get16be(z->s); + z->scan_n = stbi__get8(z->s); + if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG"); + if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG"); + for (i=0; i < z->scan_n; ++i) { + int id = stbi__get8(z->s), which; + int q = stbi__get8(z->s); + for (which = 0; which < z->s->img_n; ++which) + if (z->img_comp[which].id == id) + break; + if (which == z->s->img_n) return 0; // no match + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG"); + z->order[i] = which; + } + + { + int aa; + z->spec_start = stbi__get8(z->s); + z->spec_end = stbi__get8(z->s); // should be 63, but might be 0 + aa = stbi__get8(z->s); + z->succ_high = (aa >> 4); + z->succ_low = (aa & 15); + if (z->progressive) { + if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13) + return stbi__err("bad SOS", "Corrupt JPEG"); + } else { + if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG"); + if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG"); + z->spec_end = 63; + } + } + + return 1; +} + +static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why) +{ + int i; + for (i=0; i < ncomp; ++i) { + if (z->img_comp[i].raw_data) { + STBI_FREE(z->img_comp[i].raw_data); + z->img_comp[i].raw_data = NULL; + z->img_comp[i].data = NULL; + } + if (z->img_comp[i].raw_coeff) { + STBI_FREE(z->img_comp[i].raw_coeff); + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].coeff = 0; + } + if (z->img_comp[i].linebuf) { + STBI_FREE(z->img_comp[i].linebuf); + z->img_comp[i].linebuf = NULL; + } + } + return why; +} + +static int stbi__process_frame_header(stbi__jpeg *z, int scan) +{ + stbi__context *s = z->s; + int Lf,p,i,q, h_max=1,v_max=1,c; + Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG + p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires + c = stbi__get8(s); + if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG"); + s->img_n = c; + for (i=0; i < c; ++i) { + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; + } + + if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG"); + + z->rgb = 0; + for (i=0; i < s->img_n; ++i) { + static const unsigned char rgb[3] = { 'R', 'G', 'B' }; + z->img_comp[i].id = stbi__get8(s); + if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) + ++z->rgb; + q = stbi__get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); + z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG"); + } + + if (scan != STBI__SCAN_load) return 1; + + if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode"); + + for (i=0; i < s->img_n; ++i) { + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; + } + + // compute interleaved mcu info + z->img_h_max = h_max; + z->img_v_max = v_max; + z->img_mcu_w = h_max * 8; + z->img_mcu_h = v_max * 8; + // these sizes can't be more than 17 bits + z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; + z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; + + for (i=0; i < s->img_n; ++i) { + // number of effective pixels (e.g. for non-interleaved MCU) + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; + // to simplify generation, we'll allocate enough memory to decode + // the bogus oversized data from using interleaved MCUs and their + // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't + // discard the extra data until colorspace conversion + // + // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) + // so these muls can't overflow with 32-bit ints (which we require) + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].coeff = 0; + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].linebuf = NULL; + z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); + if (z->img_comp[i].raw_data == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + // align blocks for idct using mmx/sse + z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + if (z->progressive) { + // w2, h2 are multiples of 8 (see above) + z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; + z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; + z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); + if (z->img_comp[i].raw_coeff == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); + } + } + + return 1; +} + +// use comparisons since in some cases we handle more than one case (e.g. SOF) +#define stbi__DNL(x) ((x) == 0xdc) +#define stbi__SOI(x) ((x) == 0xd8) +#define stbi__EOI(x) ((x) == 0xd9) +#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2) +#define stbi__SOS(x) ((x) == 0xda) + +#define stbi__SOF_progressive(x) ((x) == 0xc2) + +static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) +{ + int m; + z->jfif = 0; + z->app14_color_transform = -1; // valid values are 0,1,2 + z->marker = STBI__MARKER_none; // initialize cached marker to empty + m = stbi__get_marker(z); + if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG"); + if (scan == STBI__SCAN_type) return 1; + m = stbi__get_marker(z); + while (!stbi__SOF(m)) { + if (!stbi__process_marker(z,m)) return 0; + m = stbi__get_marker(z); + while (m == STBI__MARKER_none) { + // some files have extra padding after their blocks, so ok, we'll scan + if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG"); + m = stbi__get_marker(z); + } + } + z->progressive = stbi__SOF_progressive(m); + if (!stbi__process_frame_header(z, scan)) return 0; + return 1; +} + +// decode image to YCbCr format +static int stbi__decode_jpeg_image(stbi__jpeg *j) +{ + int m; + for (m = 0; m < 4; m++) { + j->img_comp[m].raw_data = NULL; + j->img_comp[m].raw_coeff = NULL; + } + j->restart_interval = 0; + if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0; + m = stbi__get_marker(j); + while (!stbi__EOI(m)) { + if (stbi__SOS(m)) { + if (!stbi__process_scan_header(j)) return 0; + if (!stbi__parse_entropy_coded_data(j)) return 0; + if (j->marker == STBI__MARKER_none ) { + // handle 0s at the end of image data from IP Kamera 9060 + while (!stbi__at_eof(j->s)) { + int x = stbi__get8(j->s); + if (x == 255) { + j->marker = stbi__get8(j->s); + break; + } + } + // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 + } + } else if (stbi__DNL(m)) { + int Ld = stbi__get16be(j->s); + stbi__uint32 NL = stbi__get16be(j->s); + if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); + if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); + } else { + if (!stbi__process_marker(j, m)) return 0; + } + m = stbi__get_marker(j); + } + if (j->progressive) + stbi__jpeg_finish(j); + return 1; +} + +// static jfif-centered resampling (across block boundaries) + +typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1, + int w, int hs); + +#define stbi__div4(x) ((stbi_uc) ((x) >> 2)) + +static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + STBI_NOTUSED(out); + STBI_NOTUSED(in_far); + STBI_NOTUSED(w); + STBI_NOTUSED(hs); + return in_near; +} + +static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples vertically for every one in input + int i; + STBI_NOTUSED(hs); + for (i=0; i < w; ++i) + out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2); + return out; +} + +static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples horizontally for every one in input + int i; + stbi_uc *input = in_near; + + if (w == 1) { + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; + } + + out[0] = input[0]; + out[1] = stbi__div4(input[0]*3 + input[1] + 2); + for (i=1; i < w-1; ++i) { + int n = 3*input[i]+2; + out[i*2+0] = stbi__div4(n+input[i-1]); + out[i*2+1] = stbi__div4(n+input[i+1]); + } + out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2); + out[i*2+1] = input[w-1]; + + STBI_NOTUSED(in_far); + STBI_NOTUSED(hs); + + return out; +} + +#define stbi__div16(x) ((stbi_uc) ((x) >> 4)) + +static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i,t0,t1; + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + out[0] = stbi__div4(t1+2); + for (i=1; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i=0,t0,t1; + + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + // process groups of 8 pixels for as long as we can. + // note we can't handle the last pixel in a row in this loop + // because we need to handle the filter boundary conditions. + for (; i < ((w-1) & ~7); i += 8) { +#if defined(STBI_SSE2) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + __m128i zero = _mm_setzero_si128(); + __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i)); + __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i)); + __m128i farw = _mm_unpacklo_epi8(farb, zero); + __m128i nearw = _mm_unpacklo_epi8(nearb, zero); + __m128i diff = _mm_sub_epi16(farw, nearw); + __m128i nears = _mm_slli_epi16(nearw, 2); + __m128i curr = _mm_add_epi16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + __m128i prv0 = _mm_slli_si128(curr, 2); + __m128i nxt0 = _mm_srli_si128(curr, 2); + __m128i prev = _mm_insert_epi16(prv0, t1, 0); + __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + __m128i bias = _mm_set1_epi16(8); + __m128i curs = _mm_slli_epi16(curr, 2); + __m128i prvd = _mm_sub_epi16(prev, curr); + __m128i nxtd = _mm_sub_epi16(next, curr); + __m128i curb = _mm_add_epi16(curs, bias); + __m128i even = _mm_add_epi16(prvd, curb); + __m128i odd = _mm_add_epi16(nxtd, curb); + + // interleave even and odd pixels, then undo scaling. + __m128i int0 = _mm_unpacklo_epi16(even, odd); + __m128i int1 = _mm_unpackhi_epi16(even, odd); + __m128i de0 = _mm_srli_epi16(int0, 4); + __m128i de1 = _mm_srli_epi16(int1, 4); + + // pack and write output + __m128i outv = _mm_packus_epi16(de0, de1); + _mm_storeu_si128((__m128i *) (out + i*2), outv); +#elif defined(STBI_NEON) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + uint8x8_t farb = vld1_u8(in_far + i); + uint8x8_t nearb = vld1_u8(in_near + i); + int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb)); + int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2)); + int16x8_t curr = vaddq_s16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + int16x8_t prv0 = vextq_s16(curr, curr, 7); + int16x8_t nxt0 = vextq_s16(curr, curr, 1); + int16x8_t prev = vsetq_lane_s16(t1, prv0, 0); + int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + int16x8_t curs = vshlq_n_s16(curr, 2); + int16x8_t prvd = vsubq_s16(prev, curr); + int16x8_t nxtd = vsubq_s16(next, curr); + int16x8_t even = vaddq_s16(curs, prvd); + int16x8_t odd = vaddq_s16(curs, nxtd); + + // undo scaling and round, then store with even/odd phases interleaved + uint8x8x2_t o; + o.val[0] = vqrshrun_n_s16(even, 4); + o.val[1] = vqrshrun_n_s16(odd, 4); + vst2_u8(out + i*2, o); +#endif + + // "previous" value for next iter + t1 = 3*in_near[i+7] + in_far[i+7]; + } + + t0 = t1; + t1 = 3*in_near[i] + in_far[i]; + out[i*2] = stbi__div16(3*t1 + t0 + 8); + + for (++i; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} +#endif + +static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // resample with nearest-neighbor + int i,j; + STBI_NOTUSED(in_far); + for (i=0; i < w; ++i) + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; + return out; +} + +// this is a reduced-precision calculation of YCbCr-to-RGB introduced +// to make sure the code produces the same results in both SIMD and scalar +#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8) +static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step) +{ + int i; + for (i=0; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step) +{ + int i = 0; + +#ifdef STBI_SSE2 + // step == 3 is pretty ugly on the final interleave, and i'm not convinced + // it's useful in practice (you wouldn't use it for textures, for example). + // so just accelerate step == 4 case. + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + __m128i signflip = _mm_set1_epi8(-0x80); + __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f)); + __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); + __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); + __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f)); + __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128); + __m128i xw = _mm_set1_epi16(255); // alpha channel + + for (; i+7 < count; i += 8) { + // load + __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i)); + __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i)); + __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i)); + __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 + __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 + + // unpack to short (and left-shift cr, cb by 8) + __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes); + __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); + __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); + + // color transform + __m128i yws = _mm_srli_epi16(yw, 4); + __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); + __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); + __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); + __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); + __m128i rws = _mm_add_epi16(cr0, yws); + __m128i gwt = _mm_add_epi16(cb0, yws); + __m128i bws = _mm_add_epi16(yws, cb1); + __m128i gws = _mm_add_epi16(gwt, cr1); + + // descale + __m128i rw = _mm_srai_epi16(rws, 4); + __m128i bw = _mm_srai_epi16(bws, 4); + __m128i gw = _mm_srai_epi16(gws, 4); + + // back to byte, set up for transpose + __m128i brb = _mm_packus_epi16(rw, bw); + __m128i gxb = _mm_packus_epi16(gw, xw); + + // transpose to interleave channels + __m128i t0 = _mm_unpacklo_epi8(brb, gxb); + __m128i t1 = _mm_unpackhi_epi8(brb, gxb); + __m128i o0 = _mm_unpacklo_epi16(t0, t1); + __m128i o1 = _mm_unpackhi_epi16(t0, t1); + + // store + _mm_storeu_si128((__m128i *) (out + 0), o0); + _mm_storeu_si128((__m128i *) (out + 16), o1); + out += 32; + } + } +#endif + +#ifdef STBI_NEON + // in this version, step=3 support would be easy to add. but is there demand? + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + uint8x8_t signflip = vdup_n_u8(0x80); + int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f)); + int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f)); + int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f)); + int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f)); + + for (; i+7 < count; i += 8) { + // load + uint8x8_t y_bytes = vld1_u8(y + i); + uint8x8_t cr_bytes = vld1_u8(pcr + i); + uint8x8_t cb_bytes = vld1_u8(pcb + i); + int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); + int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); + + // expand to s16 + int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); + int16x8_t crw = vshll_n_s8(cr_biased, 7); + int16x8_t cbw = vshll_n_s8(cb_biased, 7); + + // color transform + int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); + int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); + int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); + int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); + int16x8_t rws = vaddq_s16(yws, cr0); + int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); + int16x8_t bws = vaddq_s16(yws, cb1); + + // undo scaling, round, convert to byte + uint8x8x4_t o; + o.val[0] = vqrshrun_n_s16(rws, 4); + o.val[1] = vqrshrun_n_s16(gws, 4); + o.val[2] = vqrshrun_n_s16(bws, 4); + o.val[3] = vdup_n_u8(255); + + // store, interleaving r/g/b/a + vst4_u8(out, o); + out += 8*4; + } + } +#endif + + for (; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} +#endif + +// set up the kernels +static void stbi__setup_jpeg(stbi__jpeg *j) +{ + j->idct_block_kernel = stbi__idct_block; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2; + +#ifdef STBI_SSE2 + if (stbi__sse2_available()) { + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; + } +#endif + +#ifdef STBI_NEON + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; +#endif +} + +// clean up the temporary component buffers +static void stbi__cleanup_jpeg(stbi__jpeg *j) +{ + stbi__free_jpeg_components(j, j->s->img_n, 0); +} + +typedef struct +{ + resample_row_func resample; + stbi_uc *line0,*line1; + int hs,vs; // expansion factor in each axis + int w_lores; // horizontal pixels pre-expansion + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on +} stbi__resample; + +// fast 0..255 * 0..255 => 0..255 rounded multiplication +static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) +{ + unsigned int t = x*y + 128; + return (stbi_uc) ((t + (t >>8)) >> 8); +} + +static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ + int n, decode_n, is_rgb; + z->s->img_n = 0; // make stbi__cleanup_jpeg safe + + // validate req_comp + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + + // load a jpeg image from whichever source, but leave in YCbCr format + if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; } + + // determine actual number of components to generate + n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1; + + is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif)); + + if (z->s->img_n == 3 && n < 3 && !is_rgb) + decode_n = 1; + else + decode_n = z->s->img_n; + + // resample and color-convert + { + int k; + unsigned int i,j; + stbi_uc *output; + stbi_uc *coutput[4]; + + stbi__resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3); + if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s->img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel; + else r->resample = stbi__resample_row_generic; + } + + // can't error after this so, this is safe + output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); + if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s->img_y; ++j) { + stbi_uc *out = output + n * z->s->img_x * j; + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; + } + } + if (n >= 3) { + stbi_uc *y = coutput[0]; + if (z->s->img_n == 3) { + if (is_rgb) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = y[i]; + out[1] = coutput[1][i]; + out[2] = coutput[2][i]; + out[3] = 255; + out += n; + } + } else { + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else if (z->s->img_n == 4) { + if (z->app14_color_transform == 0) { // CMYK + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(coutput[0][i], m); + out[1] = stbi__blinn_8x8(coutput[1][i], m); + out[2] = stbi__blinn_8x8(coutput[2][i], m); + out[3] = 255; + out += n; + } + } else if (z->app14_color_transform == 2) { // YCCK + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(255 - out[0], m); + out[1] = stbi__blinn_8x8(255 - out[1], m); + out[2] = stbi__blinn_8x8(255 - out[2], m); + out += n; + } + } else { // YCbCr + alpha? Ignore the fourth channel for now + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else + for (i=0; i < z->s->img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + if (is_rgb) { + if (n == 1) + for (i=0; i < z->s->img_x; ++i) + *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + else { + for (i=0; i < z->s->img_x; ++i, out += 2) { + out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + out[1] = 255; + } + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); + stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); + stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); + out[0] = stbi__compute_y(r, g, b); + out[1] = 255; + out += n; + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); + out[1] = 255; + out += n; + } + } else { + stbi_uc *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255; + } + } + } + stbi__cleanup_jpeg(z); + *out_x = z->s->img_x; + *out_y = z->s->img_y; + if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output + return output; + } +} + +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + unsigned char* result; + stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg)); + STBI_NOTUSED(ri); + j->s = s; + stbi__setup_jpeg(j); + result = load_jpeg_image(j, x,y,comp,req_comp); + STBI_FREE(j); + return result; +} + +static int stbi__jpeg_test(stbi__context *s) +{ + int r; + stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg)); + j->s = s; + stbi__setup_jpeg(j); + r = stbi__decode_jpeg_header(j, STBI__SCAN_type); + stbi__rewind(s); + STBI_FREE(j); + return r; +} + +static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) +{ + if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) { + stbi__rewind( j->s ); + return 0; + } + if (x) *x = j->s->img_x; + if (y) *y = j->s->img_y; + if (comp) *comp = j->s->img_n >= 3 ? 3 : 1; + return 1; +} + +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) +{ + int result; + stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg))); + j->s = s; + result = stbi__jpeg_info_raw(j, x, y, comp); + STBI_FREE(j); + return result; +} +#endif + +// public domain zlib decode v0.2 Sean Barrett 2006-11-18 +// simple implementation +// - all input must be provided in an upfront buffer +// - all output is written to a single output buffer (can malloc/realloc) +// performance +// - fast huffman + +#ifndef STBI_NO_ZLIB + +// fast-way is faster to check than jpeg huffman, but slow way is slower +#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables +#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1) + +// zlib-style huffman encoding +// (jpegs packs from left, zlib from right, so can't share code) +typedef struct +{ + stbi__uint16 fast[1 << STBI__ZFAST_BITS]; + stbi__uint16 firstcode[16]; + int maxcode[17]; + stbi__uint16 firstsymbol[16]; + stbi_uc size[288]; + stbi__uint16 value[288]; +} stbi__zhuffman; + +stbi_inline static int stbi__bitreverse16(int n) +{ + n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); + n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); + n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); + n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); + return n; +} + +stbi_inline static int stbi__bit_reverse(int v, int bits) +{ + STBI_ASSERT(bits <= 16); + // to bit reverse n bits, reverse 16 and shift + // e.g. 11 bits, bit reverse and shift away 5 + return stbi__bitreverse16(v) >> (16-bits); +} + +static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num) +{ + int i,k=0; + int code, next_code[16], sizes[17]; + + // DEFLATE spec for generating codes + memset(sizes, 0, sizeof(sizes)); + memset(z->fast, 0, sizeof(z->fast)); + for (i=0; i < num; ++i) + ++sizes[sizelist[i]]; + sizes[0] = 0; + for (i=1; i < 16; ++i) + if (sizes[i] > (1 << i)) + return stbi__err("bad sizes", "Corrupt PNG"); + code = 0; + for (i=1; i < 16; ++i) { + next_code[i] = code; + z->firstcode[i] = (stbi__uint16) code; + z->firstsymbol[i] = (stbi__uint16) k; + code = (code + sizes[i]); + if (sizes[i]) + if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG"); + z->maxcode[i] = code << (16-i); // preshift for inner loop + code <<= 1; + k += sizes[i]; + } + z->maxcode[16] = 0x10000; // sentinel + for (i=0; i < num; ++i) { + int s = sizelist[i]; + if (s) { + int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); + z->size [c] = (stbi_uc ) s; + z->value[c] = (stbi__uint16) i; + if (s <= STBI__ZFAST_BITS) { + int j = stbi__bit_reverse(next_code[s],s); + while (j < (1 << STBI__ZFAST_BITS)) { + z->fast[j] = fastv; + j += (1 << s); + } + } + ++next_code[s]; + } + } + return 1; +} + +// zlib-from-memory implementation for PNG reading +// because PNG allows splitting the zlib stream arbitrarily, +// and it's annoying structurally to have PNG call ZLIB call PNG, +// we require PNG read all the IDATs and combine them into a single +// memory buffer + +typedef struct +{ + stbi_uc *zbuffer, *zbuffer_end; + int num_bits; + stbi__uint32 code_buffer; + + char *zout; + char *zout_start; + char *zout_end; + int z_expandable; + + stbi__zhuffman z_length, z_distance; +} stbi__zbuf; + +stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z) +{ + if (z->zbuffer >= z->zbuffer_end) return 0; + return *z->zbuffer++; +} + +static void stbi__fill_bits(stbi__zbuf *z) +{ + do { + STBI_ASSERT(z->code_buffer < (1U << z->num_bits)); + z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; + z->num_bits += 8; + } while (z->num_bits <= 24); +} + +stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n) +{ + unsigned int k; + if (z->num_bits < n) stbi__fill_bits(z); + k = z->code_buffer & ((1 << n) - 1); + z->code_buffer >>= n; + z->num_bits -= n; + return k; +} + +static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s,k; + // not resolved by fast table, so compute it the slow way + // use jpeg approach, which requires MSbits at top + k = stbi__bit_reverse(a->code_buffer, 16); + for (s=STBI__ZFAST_BITS+1; ; ++s) + if (k < z->maxcode[s]) + break; + if (s == 16) return -1; // invalid code! + // code size is s, so: + b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; + STBI_ASSERT(z->size[b] == s); + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; +} + +stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s; + if (a->num_bits < 16) stbi__fill_bits(a); + b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; + if (b) { + s = b >> 9; + a->code_buffer >>= s; + a->num_bits -= s; + return b & 511; + } + return stbi__zhuffman_decode_slowpath(a, z); +} + +static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes +{ + char *q; + int cur, limit, old_limit; + z->zout = zout; + if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); + cur = (int) (z->zout - z->zout_start); + limit = old_limit = (int) (z->zout_end - z->zout_start); + while (cur + n > limit) + limit *= 2; + q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); + STBI_NOTUSED(old_limit); + if (q == NULL) return stbi__err("outofmem", "Out of memory"); + z->zout_start = q; + z->zout = q + cur; + z->zout_end = q + limit; + return 1; +} + +static const int stbi__zlength_base[31] = { + 3,4,5,6,7,8,9,10,11,13, + 15,17,19,23,27,31,35,43,51,59, + 67,83,99,115,131,163,195,227,258,0,0 }; + +static const int stbi__zlength_extra[31]= +{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + +static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + +static const int stbi__zdist_extra[32] = +{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int stbi__parse_huffman_block(stbi__zbuf *a) +{ + char *zout = a->zout; + for(;;) { + int z = stbi__zhuffman_decode(a, &a->z_length); + if (z < 256) { + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes + if (zout >= a->zout_end) { + if (!stbi__zexpand(a, zout, 1)) return 0; + zout = a->zout; + } + *zout++ = (char) z; + } else { + stbi_uc *p; + int len,dist; + if (z == 256) { + a->zout = zout; + return 1; + } + z -= 257; + len = stbi__zlength_base[z]; + if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); + z = stbi__zhuffman_decode(a, &a->z_distance); + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); + dist = stbi__zdist_base[z]; + if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); + if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); + if (zout + len > a->zout_end) { + if (!stbi__zexpand(a, zout, len)) return 0; + zout = a->zout; + } + p = (stbi_uc *) (zout - dist); + if (dist == 1) { // run of one byte; common in images. + stbi_uc v = *p; + if (len) { do *zout++ = v; while (--len); } + } else { + if (len) { do *zout++ = *p++; while (--len); } + } + } + } +} + +static int stbi__compute_huffman_codes(stbi__zbuf *a) +{ + static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + stbi__zhuffman z_codelength; + stbi_uc lencodes[286+32+137];//padding for maximum single op + stbi_uc codelength_sizes[19]; + int i,n; + + int hlit = stbi__zreceive(a,5) + 257; + int hdist = stbi__zreceive(a,5) + 1; + int hclen = stbi__zreceive(a,4) + 4; + int ntot = hlit + hdist; + + memset(codelength_sizes, 0, sizeof(codelength_sizes)); + for (i=0; i < hclen; ++i) { + int s = stbi__zreceive(a,3); + codelength_sizes[length_dezigzag[i]] = (stbi_uc) s; + } + if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; + + n = 0; + while (n < ntot) { + int c = stbi__zhuffman_decode(a, &z_codelength); + if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); + if (c < 16) + lencodes[n++] = (stbi_uc) c; + else { + stbi_uc fill = 0; + if (c == 16) { + c = stbi__zreceive(a,2)+3; + if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); + fill = lencodes[n-1]; + } else if (c == 17) + c = stbi__zreceive(a,3)+3; + else { + STBI_ASSERT(c == 18); + c = stbi__zreceive(a,7)+11; + } + if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); + memset(lencodes+n, fill, c); + n += c; + } + } + if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG"); + if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; + return 1; +} + +static int stbi__parse_uncompressed_block(stbi__zbuf *a) +{ + stbi_uc header[4]; + int len,nlen,k; + if (a->num_bits & 7) + stbi__zreceive(a, a->num_bits & 7); // discard + // drain the bit-packed data into header + k = 0; + while (a->num_bits > 0) { + header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check + a->code_buffer >>= 8; + a->num_bits -= 8; + } + STBI_ASSERT(a->num_bits == 0); + // now fill header the normal way + while (k < 4) + header[k++] = stbi__zget8(a); + len = header[1] * 256 + header[0]; + nlen = header[3] * 256 + header[2]; + if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG"); + if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG"); + if (a->zout + len > a->zout_end) + if (!stbi__zexpand(a, a->zout, len)) return 0; + memcpy(a->zout, a->zbuffer, len); + a->zbuffer += len; + a->zout += len; + return 1; +} + +static int stbi__parse_zlib_header(stbi__zbuf *a) +{ + int cmf = stbi__zget8(a); + int cm = cmf & 15; + /* int cinfo = cmf >> 4; */ + int flg = stbi__zget8(a); + if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec + if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png + if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png + // window = 1 << (8 + cinfo)... but who cares, we fully buffer output + return 1; +} + +static const stbi_uc stbi__zdefault_length[288] = +{ + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8 +}; +static const stbi_uc stbi__zdefault_distance[32] = +{ + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 +}; +/* +Init algorithm: +{ + int i; // use <= to match clearly with spec + for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8; + for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9; + for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7; + for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8; + + for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5; +} +*/ + +static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) +{ + int final, type; + if (parse_header) + if (!stbi__parse_zlib_header(a)) return 0; + a->num_bits = 0; + a->code_buffer = 0; + do { + final = stbi__zreceive(a,1); + type = stbi__zreceive(a,2); + if (type == 0) { + if (!stbi__parse_uncompressed_block(a)) return 0; + } else if (type == 3) { + return 0; + } else { + if (type == 1) { + // use fixed code lengths + if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; + } else { + if (!stbi__compute_huffman_codes(a)) return 0; + } + if (!stbi__parse_huffman_block(a)) return 0; + } + } while (!final); + return 1; +} + +static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header) +{ + a->zout_start = obuf; + a->zout = obuf; + a->zout_end = obuf + olen; + a->z_expandable = exp; + + return stbi__parse_zlib(a, parse_header); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, 1)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) +{ + return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 1)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(16384); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer+len; + if (stbi__do_zlib(&a, p, 16384, 1, 0)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 0)) + return (int) (a.zout - a.zout_start); + else + return -1; +} +#endif + +// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 +// simple implementation +// - only 8-bit samples +// - no CRC checking +// - allocates lots of intermediate memory +// - avoids problem of streaming data between subsystems +// - avoids explicit window management +// performance +// - uses stb_zlib, a PD zlib implementation with fast huffman decoding + +#ifndef STBI_NO_PNG +typedef struct +{ + stbi__uint32 length; + stbi__uint32 type; +} stbi__pngchunk; + +static stbi__pngchunk stbi__get_chunk_header(stbi__context *s) +{ + stbi__pngchunk c; + c.length = stbi__get32be(s); + c.type = stbi__get32be(s); + return c; +} + +static int stbi__check_png_header(stbi__context *s) +{ + static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; + int i; + for (i=0; i < 8; ++i) + if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); + return 1; +} + +typedef struct +{ + stbi__context *s; + stbi_uc *idata, *expanded, *out; + int depth; +} stbi__png; + + +enum { + STBI__F_none=0, + STBI__F_sub=1, + STBI__F_up=2, + STBI__F_avg=3, + STBI__F_paeth=4, + // synthetic filters used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static stbi_uc first_row_filter[5] = +{ + STBI__F_none, + STBI__F_sub, + STBI__F_none, + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static int stbi__paeth(int a, int b, int c) +{ + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; +} + +static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; + +// create the png data from post-deflated data +static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) +{ + int bytes = (depth == 16? 2 : 1); + stbi__context *s = a->s; + stbi__uint32 i,j,stride = x*out_n*bytes; + stbi__uint32 img_len, img_width_bytes; + int k; + int img_n = s->img_n; // copy it into a local for later + + int output_bytes = out_n*bytes; + int filter_bytes = img_n*bytes; + int width = x; + + STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1); + a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into + if (!a->out) return stbi__err("outofmem", "Out of memory"); + + if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); + img_width_bytes = (((img_n * x * depth) + 7) >> 3); + img_len = (img_width_bytes + 1) * y; + + // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, + // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros), + // so just check for raw_len < img_len always. + if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *prior; + int filter = *raw++; + + if (filter > 4) + return stbi__err("invalid filter","Corrupt PNG"); + + if (depth < 8) { + STBI_ASSERT(img_width_bytes <= x); + cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place + filter_bytes = 1; + width = img_width_bytes; + } + prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above + + // if first row, use special filter that doesn't sample previous row + if (j == 0) filter = first_row_filter[filter]; + + // handle first byte explicitly + for (k=0; k < filter_bytes; ++k) { + switch (filter) { + case STBI__F_none : cur[k] = raw[k]; break; + case STBI__F_sub : cur[k] = raw[k]; break; + case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; + case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; + case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; + case STBI__F_avg_first : cur[k] = raw[k]; break; + case STBI__F_paeth_first: cur[k] = raw[k]; break; + } + } + + if (depth == 8) { + if (img_n != out_n) + cur[img_n] = 255; // first pixel + raw += img_n; + cur += out_n; + prior += out_n; + } else if (depth == 16) { + if (img_n != out_n) { + cur[filter_bytes] = 255; // first pixel top byte + cur[filter_bytes+1] = 255; // first pixel bottom byte + } + raw += filter_bytes; + cur += output_bytes; + prior += output_bytes; + } else { + raw += 1; + cur += 1; + prior += 1; + } + + // this is a little gross, so that we don't switch per-pixel or per-component + if (depth < 8 || img_n == out_n) { + int nk = (width - 1)*filter_bytes; + #define STBI__CASE(f) \ + case f: \ + for (k=0; k < nk; ++k) + switch (filter) { + // "none" filter turns into a memcpy here; make that explicit. + case STBI__F_none: memcpy(cur, raw, nk); break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; + } + #undef STBI__CASE + raw += nk; + } else { + STBI_ASSERT(img_n+1 == out_n); + #define STBI__CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ + for (k=0; k < filter_bytes; ++k) + switch (filter) { + STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; + } + #undef STBI__CASE + + // the loop above sets the high byte of the pixels' alpha, but for + // 16 bit png files we also need the low byte set. we'll do that here. + if (depth == 16) { + cur = a->out + stride*j; // start at the beginning of the row again + for (i=0; i < x; ++i,cur+=output_bytes) { + cur[filter_bytes+1] = 255; + } + } + } + } + + // we make a separate pass to expand bits to pixels; for performance, + // this could run two scanlines behind the above code, so it won't + // intefere with filtering but will still be in the cache. + if (depth < 8) { + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; + // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit + // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop + stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + + // note that the final byte might overshoot and write more data than desired. + // we can allocate enough data that this never writes out of memory, but it + // could also overwrite the next scanline. can it overwrite non-empty data + // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. + // so we need to explicitly clamp the final ones + + if (depth == 4) { + for (k=x*img_n; k >= 2; k-=2, ++in) { + *cur++ = scale * ((*in >> 4) ); + *cur++ = scale * ((*in ) & 0x0f); + } + if (k > 0) *cur++ = scale * ((*in >> 4) ); + } else if (depth == 2) { + for (k=x*img_n; k >= 4; k-=4, ++in) { + *cur++ = scale * ((*in >> 6) ); + *cur++ = scale * ((*in >> 4) & 0x03); + *cur++ = scale * ((*in >> 2) & 0x03); + *cur++ = scale * ((*in ) & 0x03); + } + if (k > 0) *cur++ = scale * ((*in >> 6) ); + if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); + if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); + } else if (depth == 1) { + for (k=x*img_n; k >= 8; k-=8, ++in) { + *cur++ = scale * ((*in >> 7) ); + *cur++ = scale * ((*in >> 6) & 0x01); + *cur++ = scale * ((*in >> 5) & 0x01); + *cur++ = scale * ((*in >> 4) & 0x01); + *cur++ = scale * ((*in >> 3) & 0x01); + *cur++ = scale * ((*in >> 2) & 0x01); + *cur++ = scale * ((*in >> 1) & 0x01); + *cur++ = scale * ((*in ) & 0x01); + } + if (k > 0) *cur++ = scale * ((*in >> 7) ); + if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); + if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); + if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); + if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); + if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); + if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); + } + if (img_n != out_n) { + int q; + // insert alpha = 255 + cur = a->out + stride*j; + if (img_n == 1) { + for (q=x-1; q >= 0; --q) { + cur[q*2+1] = 255; + cur[q*2+0] = cur[q]; + } + } else { + STBI_ASSERT(img_n == 3); + for (q=x-1; q >= 0; --q) { + cur[q*4+3] = 255; + cur[q*4+2] = cur[q*3+2]; + cur[q*4+1] = cur[q*3+1]; + cur[q*4+0] = cur[q*3+0]; + } + } + } + } + } else if (depth == 16) { + // force the image data from big-endian to platform-native. + // this is done in a separate pass due to the decoding relying + // on the data being untouched, but could probably be done + // per-line during decode if care is taken. + stbi_uc *cur = a->out; + stbi__uint16 *cur16 = (stbi__uint16*)cur; + + for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { + *cur16 = (cur[0] << 8) | cur[1]; + } + } + + return 1; +} + +static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) +{ + int bytes = (depth == 16 ? 2 : 1); + int out_bytes = out_n * bytes; + stbi_uc *final; + int p; + if (!interlaced) + return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); + + // de-interlacing + final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); + for (p=0; p < 7; ++p) { + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y; + if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) { + STBI_FREE(final); + return 0; + } + for (j=0; j < y; ++j) { + for (i=0; i < x; ++i) { + int out_y = j*yspc[p]+yorig[p]; + int out_x = i*xspc[p]+xorig[p]; + memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, + a->out + (j*x+i)*out_bytes, out_bytes); + } + } + STBI_FREE(a->out); + image_data += img_len; + image_data_len -= img_len; + } + } + a->out = final; + + return 1; +} + +static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + // compute color-based transparency, assuming we've + // already got 255 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i=0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 255); + p += 2; + } + } else { + for (i=0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi__uint16 *p = (stbi__uint16*) z->out; + + // compute color-based transparency, assuming we've + // already got 65535 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i = 0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 65535); + p += 2; + } + } else { + for (i = 0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n) +{ + stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y; + stbi_uc *p, *temp_out, *orig = a->out; + + p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0); + if (p == NULL) return stbi__err("outofmem", "Out of memory"); + + // between here and free(out) below, exitting would leak + temp_out = p; + + if (pal_img_n == 3) { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p += 3; + } + } else { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p[3] = palette[n+3]; + p += 4; + } + } + STBI_FREE(a->out); + a->out = temp_out; + + STBI_NOTUSED(len); + + return 1; +} + +static int stbi__unpremultiply_on_load = 0; +static int stbi__de_iphone_flag = 0; + +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) +{ + stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply; +} + +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) +{ + stbi__de_iphone_flag = flag_true_if_should_convert; +} + +static void stbi__de_iphone(stbi__png *z) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + if (s->img_out_n == 3) { // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 3; + } + } else { + STBI_ASSERT(s->img_out_n == 4); + if (stbi__unpremultiply_on_load) { + // convert bgr to rgb and unpremultiply + for (i=0; i < pixel_count; ++i) { + stbi_uc a = p[3]; + stbi_uc t = p[0]; + if (a) { + stbi_uc half = a / 2; + p[0] = (p[2] * 255 + half) / a; + p[1] = (p[1] * 255 + half) / a; + p[2] = ( t * 255 + half) / a; + } else { + p[0] = p[2]; + p[2] = t; + } + p += 4; + } + } else { + // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 4; + } + } + } +} + +#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d)) + +static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) +{ + stbi_uc palette[1024], pal_img_n=0; + stbi_uc has_trans=0, tc[3]; + stbi__uint16 tc16[3]; + stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0; + int first=1,k,interlace=0, color=0, is_iphone=0; + stbi__context *s = z->s; + + z->expanded = NULL; + z->idata = NULL; + z->out = NULL; + + if (!stbi__check_png_header(s)) return 0; + + if (scan == STBI__SCAN_type) return 1; + + for (;;) { + stbi__pngchunk c = stbi__get_chunk_header(s); + switch (c.type) { + case STBI__PNG_TYPE('C','g','B','I'): + is_iphone = 1; + stbi__skip(s, c.length); + break; + case STBI__PNG_TYPE('I','H','D','R'): { + int comp,filter; + if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); + first = 0; + if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); + s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); + s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); + z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); + color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); + comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG"); + filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG"); + interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG"); + if (!pal_img_n) { + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); + if (scan == STBI__SCAN_header) return 1; + } else { + // if paletted, then pal_n is our final components, and + // img_n is # components to decompress/filter. + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); + // if SCAN_header, have to scan to see if we have a tRNS + } + break; + } + + case STBI__PNG_TYPE('P','L','T','E'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG"); + pal_len = c.length / 3; + if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG"); + for (i=0; i < pal_len; ++i) { + palette[i*4+0] = stbi__get8(s); + palette[i*4+1] = stbi__get8(s); + palette[i*4+2] = stbi__get8(s); + palette[i*4+3] = 255; + } + break; + } + + case STBI__PNG_TYPE('t','R','N','S'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG"); + if (pal_img_n) { + if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; } + if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG"); + if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG"); + pal_img_n = 4; + for (i=0; i < c.length; ++i) + palette[i*4+3] = stbi__get8(s); + } else { + if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); + if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); + has_trans = 1; + if (z->depth == 16) { + for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is + } else { + for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger + } + } + break; + } + + case STBI__PNG_TYPE('I','D','A','T'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); + if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; } + if ((int)(ioff + c.length) < (int)ioff) return 0; + if (ioff + c.length > idata_limit) { + stbi__uint32 idata_limit_old = idata_limit; + stbi_uc *p; + if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; + while (ioff + c.length > idata_limit) + idata_limit *= 2; + STBI_NOTUSED(idata_limit_old); + p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); + z->idata = p; + } + if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG"); + ioff += c.length; + break; + } + + case STBI__PNG_TYPE('I','E','N','D'): { + stbi__uint32 raw_len, bpl; + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (scan != STBI__SCAN_load) return 1; + if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); + // initial guess for decoded data size to avoid unnecessary reallocs + bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component + raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; + z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); + if (z->expanded == NULL) return 0; // zlib should set error + STBI_FREE(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; + else + s->img_out_n = s->img_n; + if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0; + if (has_trans) { + if (z->depth == 16) { + if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0; + } else { + if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0; + } + } + if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) + stbi__de_iphone(z); + if (pal_img_n) { + // pal_img_n == 3 or 4 + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) + return 0; + } else if (has_trans) { + // non-paletted image with tRNS -> source image has (constant) alpha + ++s->img_n; + } + STBI_FREE(z->expanded); z->expanded = NULL; + return 1; + } + + default: + // if critical, fail + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if ((c.type & (1 << 29)) == 0) { + #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe + static char invalid_chunk[] = "XXXX PNG chunk not known"; + invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); + invalid_chunk[1] = STBI__BYTECAST(c.type >> 16); + invalid_chunk[2] = STBI__BYTECAST(c.type >> 8); + invalid_chunk[3] = STBI__BYTECAST(c.type >> 0); + #endif + return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type"); + } + stbi__skip(s, c.length); + break; + } + // end of PNG chunk, read and skip CRC + stbi__get32be(s); + } +} + +static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri) +{ + void *result=NULL; + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { + if (p->depth < 8) + ri->bits_per_channel = 8; + else + ri->bits_per_channel = p->depth; + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s->img_out_n) { + if (ri->bits_per_channel == 8) + result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + else + result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + p->s->img_out_n = req_comp; + if (result == NULL) return result; + } + *x = p->s->img_x; + *y = p->s->img_y; + if (n) *n = p->s->img_n; + } + STBI_FREE(p->out); p->out = NULL; + STBI_FREE(p->expanded); p->expanded = NULL; + STBI_FREE(p->idata); p->idata = NULL; + + return result; +} + +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi__png p; + p.s = s; + return stbi__do_png(&p, x,y,comp,req_comp, ri); +} + +static int stbi__png_test(stbi__context *s) +{ + int r; + r = stbi__check_png_header(s); + stbi__rewind(s); + return r; +} + +static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp) +{ + if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) { + stbi__rewind( p->s ); + return 0; + } + if (x) *x = p->s->img_x; + if (y) *y = p->s->img_y; + if (comp) *comp = p->s->img_n; + return 1; +} + +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__png p; + p.s = s; + return stbi__png_info_raw(&p, x, y, comp); +} + +static int stbi__png_is16(stbi__context *s) +{ + stbi__png p; + p.s = s; + if (!stbi__png_info_raw(&p, NULL, NULL, NULL)) + return 0; + if (p.depth != 16) { + stbi__rewind(p.s); + return 0; + } + return 1; +} +#endif + +// Microsoft/Windows BMP image + +#ifndef STBI_NO_BMP +static int stbi__bmp_test_raw(stbi__context *s) +{ + int r; + int sz; + if (stbi__get8(s) != 'B') return 0; + if (stbi__get8(s) != 'M') return 0; + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + stbi__get32le(s); // discard data offset + sz = stbi__get32le(s); + r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124); + return r; +} + +static int stbi__bmp_test(stbi__context *s) +{ + int r = stbi__bmp_test_raw(s); + stbi__rewind(s); + return r; +} + + +// returns 0..31 for the highest set bit +static int stbi__high_bit(unsigned int z) +{ + int n=0; + if (z == 0) return -1; + if (z >= 0x10000) n += 16, z >>= 16; + if (z >= 0x00100) n += 8, z >>= 8; + if (z >= 0x00010) n += 4, z >>= 4; + if (z >= 0x00004) n += 2, z >>= 2; + if (z >= 0x00002) n += 1, z >>= 1; + return n; +} + +static int stbi__bitcount(unsigned int a) +{ + a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 + a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 + a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits + a = (a + (a >> 8)); // max 16 per 8 bits + a = (a + (a >> 16)); // max 32 per 8 bits + return a & 0xff; +} + +// extract an arbitrarily-aligned N-bit value (N=bits) +// from v, and then make it 8-bits long and fractionally +// extend it to full full range. +static int stbi__shiftsigned(int v, int shift, int bits) +{ + static unsigned int mul_table[9] = { + 0, + 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/, + 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/, + }; + static unsigned int shift_table[9] = { + 0, 0,0,1,0,2,4,6,0, + }; + if (shift < 0) + v <<= -shift; + else + v >>= shift; + STBI_ASSERT(v >= 0 && v < 256); + v >>= (8-bits); + STBI_ASSERT(bits >= 0 && bits <= 8); + return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits]; +} + +typedef struct +{ + int bpp, offset, hsz; + unsigned int mr,mg,mb,ma, all_a; +} stbi__bmp_data; + +static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) +{ + int hsz; + if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP"); + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + info->offset = stbi__get32le(s); + info->hsz = hsz = stbi__get32le(s); + info->mr = info->mg = info->mb = info->ma = 0; + + if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown"); + if (hsz == 12) { + s->img_x = stbi__get16le(s); + s->img_y = stbi__get16le(s); + } else { + s->img_x = stbi__get32le(s); + s->img_y = stbi__get32le(s); + } + if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP"); + info->bpp = stbi__get16le(s); + if (hsz != 12) { + int compress = stbi__get32le(s); + if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); + stbi__get32le(s); // discard sizeof + stbi__get32le(s); // discard hres + stbi__get32le(s); // discard vres + stbi__get32le(s); // discard colorsused + stbi__get32le(s); // discard max important + if (hsz == 40 || hsz == 56) { + if (hsz == 56) { + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + } + if (info->bpp == 16 || info->bpp == 32) { + if (compress == 0) { + if (info->bpp == 32) { + info->mr = 0xffu << 16; + info->mg = 0xffu << 8; + info->mb = 0xffu << 0; + info->ma = 0xffu << 24; + info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 + } else { + info->mr = 31u << 10; + info->mg = 31u << 5; + info->mb = 31u << 0; + } + } else if (compress == 3) { + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + // not documented, but generated by photoshop and handled by mspaint + if (info->mr == info->mg && info->mg == info->mb) { + // ?!?!? + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else { + int i; + if (hsz != 108 && hsz != 124) + return stbi__errpuc("bad BMP", "bad BMP"); + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + info->ma = stbi__get32le(s); + stbi__get32le(s); // discard color space + for (i=0; i < 12; ++i) + stbi__get32le(s); // discard color space parameters + if (hsz == 124) { + stbi__get32le(s); // discard rendering intent + stbi__get32le(s); // discard offset of profile data + stbi__get32le(s); // discard size of profile data + stbi__get32le(s); // discard reserved + } + } + } + return (void *) 1; +} + + +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + unsigned int mr=0,mg=0,mb=0,ma=0, all_a; + stbi_uc pal[256][4]; + int psize=0,i,j,width; + int flip_vertically, pad, target; + stbi__bmp_data info; + STBI_NOTUSED(ri); + + info.all_a = 255; + if (stbi__bmp_parse_header(s, &info) == NULL) + return NULL; // error code already set + + flip_vertically = ((int) s->img_y) > 0; + s->img_y = abs((int) s->img_y); + + mr = info.mr; + mg = info.mg; + mb = info.mb; + ma = info.ma; + all_a = info.all_a; + + if (info.hsz == 12) { + if (info.bpp < 24) + psize = (info.offset - 14 - 24) / 3; + } else { + if (info.bpp < 16) + psize = (info.offset - 14 - info.hsz) >> 2; + } + + s->img_n = ma ? 4 : 3; + if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 + target = req_comp; + else + target = s->img_n; // if they want monochrome, we'll post-convert + + // sanity-check size + if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "Corrupt BMP"); + + out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + if (info.bpp < 16) { + int z=0; + if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); } + for (i=0; i < psize; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + if (info.hsz != 12) stbi__get8(s); + pal[i][3] = 255; + } + stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); + if (info.bpp == 1) width = (s->img_x + 7) >> 3; + else if (info.bpp == 4) width = (s->img_x + 1) >> 1; + else if (info.bpp == 8) width = s->img_x; + else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } + pad = (-width)&3; + if (info.bpp == 1) { + for (j=0; j < (int) s->img_y; ++j) { + int bit_offset = 7, v = stbi__get8(s); + for (i=0; i < (int) s->img_x; ++i) { + int color = (v>>bit_offset)&0x1; + out[z++] = pal[color][0]; + out[z++] = pal[color][1]; + out[z++] = pal[color][2]; + if((--bit_offset) < 0) { + bit_offset = 7; + v = stbi__get8(s); + } + } + stbi__skip(s, pad); + } + } else { + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=stbi__get8(s),v2=0; + if (info.bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (info.bpp == 8) ? stbi__get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + stbi__skip(s, pad); + } + } + } else { + int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; + int z = 0; + int easy=0; + stbi__skip(s, info.offset - 14 - info.hsz); + if (info.bpp == 24) width = 3 * s->img_x; + else if (info.bpp == 16) width = 2*s->img_x; + else /* bpp = 32 and pad = 0 */ width=0; + pad = (-width) & 3; + if (info.bpp == 24) { + easy = 1; + } else if (info.bpp == 32) { + if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) + easy = 2; + } + if (!easy) { + if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } + // right shift amt to put high bit in position #7 + rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr); + gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); + bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); + ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); + } + for (j=0; j < (int) s->img_y; ++j) { + if (easy) { + for (i=0; i < (int) s->img_x; ++i) { + unsigned char a; + out[z+2] = stbi__get8(s); + out[z+1] = stbi__get8(s); + out[z+0] = stbi__get8(s); + z += 3; + a = (easy == 2 ? stbi__get8(s) : 255); + all_a |= a; + if (target == 4) out[z++] = a; + } + } else { + int bpp = info.bpp; + for (i=0; i < (int) s->img_x; ++i) { + stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); + unsigned int a; + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); + a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255); + all_a |= a; + if (target == 4) out[z++] = STBI__BYTECAST(a); + } + } + stbi__skip(s, pad); + } + } + + // if alpha channel is all 0s, replace with all 255s + if (target == 4 && all_a == 0) + for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) + out[i] = 255; + + if (flip_vertically) { + stbi_uc t; + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { + t = p1[i], p1[i] = p2[i], p2[i] = t; + } + } + } + + if (req_comp && req_comp != target) { + out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + return out; +} +#endif + +// Targa Truevision - TGA +// by Jonathan Dummer +#ifndef STBI_NO_TGA +// returns STBI_rgb or whatever, 0 on error +static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) +{ + // only RGB or RGBA (incl. 16bit) or grey allowed + if (is_rgb16) *is_rgb16 = 0; + switch(bits_per_pixel) { + case 8: return STBI_grey; + case 16: if(is_grey) return STBI_grey_alpha; + // fallthrough + case 15: if(is_rgb16) *is_rgb16 = 1; + return STBI_rgb; + case 24: // fallthrough + case 32: return bits_per_pixel/8; + default: return 0; + } +} + +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp) +{ + int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp; + int sz, tga_colormap_type; + stbi__get8(s); // discard Offset + tga_colormap_type = stbi__get8(s); // colormap type + if( tga_colormap_type > 1 ) { + stbi__rewind(s); + return 0; // only RGB or indexed allowed + } + tga_image_type = stbi__get8(s); // image type + if ( tga_colormap_type == 1 ) { // colormapped (paletted) image + if (tga_image_type != 1 && tga_image_type != 9) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip image x and y origin + tga_colormap_bpp = sz; + } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE + if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) { + stbi__rewind(s); + return 0; // only RGB or grey allowed, +/- RLE + } + stbi__skip(s,9); // skip colormap specification and image x/y origin + tga_colormap_bpp = 0; + } + tga_w = stbi__get16le(s); + if( tga_w < 1 ) { + stbi__rewind(s); + return 0; // test width + } + tga_h = stbi__get16le(s); + if( tga_h < 1 ) { + stbi__rewind(s); + return 0; // test height + } + tga_bits_per_pixel = stbi__get8(s); // bits per pixel + stbi__get8(s); // ignore alpha bits + if (tga_colormap_bpp != 0) { + if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) { + // when using a colormap, tga_bits_per_pixel is the size of the indexes + // I don't think anything but 8 or 16bit indexes makes sense + stbi__rewind(s); + return 0; + } + tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL); + } else { + tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL); + } + if(!tga_comp) { + stbi__rewind(s); + return 0; + } + if (x) *x = tga_w; + if (y) *y = tga_h; + if (comp) *comp = tga_comp; + return 1; // seems to have passed everything +} + +static int stbi__tga_test(stbi__context *s) +{ + int res = 0; + int sz, tga_color_type; + stbi__get8(s); // discard Offset + tga_color_type = stbi__get8(s); // color type + if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed + sz = stbi__get8(s); // image type + if ( tga_color_type == 1 ) { // colormapped (paletted) image + if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9 + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + stbi__skip(s,4); // skip image x and y origin + } else { // "normal" image w/o colormap + if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE + stbi__skip(s,9); // skip colormap specification and image x/y origin + } + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height + sz = stbi__get8(s); // bits per pixel + if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + + res = 1; // if we got this far, everything's good and we can return 1 instead of 0 + +errorEnd: + stbi__rewind(s); + return res; +} + +// read 16bit value and convert to 24bit RGB +static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) +{ + stbi__uint16 px = (stbi__uint16)stbi__get16le(s); + stbi__uint16 fiveBitMask = 31; + // we have 3 channels with 5bits each + int r = (px >> 10) & fiveBitMask; + int g = (px >> 5) & fiveBitMask; + int b = px & fiveBitMask; + // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later + out[0] = (stbi_uc)((r * 255)/31); + out[1] = (stbi_uc)((g * 255)/31); + out[2] = (stbi_uc)((b * 255)/31); + + // some people claim that the most significant bit might be used for alpha + // (possibly if an alpha-bit is set in the "image descriptor byte") + // but that only made 16bit test images completely translucent.. + // so let's treat all 15 and 16bit TGAs as RGB with no alpha. +} + +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + // read in the TGA header stuff + int tga_offset = stbi__get8(s); + int tga_indexed = stbi__get8(s); + int tga_image_type = stbi__get8(s); + int tga_is_RLE = 0; + int tga_palette_start = stbi__get16le(s); + int tga_palette_len = stbi__get16le(s); + int tga_palette_bits = stbi__get8(s); + int tga_x_origin = stbi__get16le(s); + int tga_y_origin = stbi__get16le(s); + int tga_width = stbi__get16le(s); + int tga_height = stbi__get16le(s); + int tga_bits_per_pixel = stbi__get8(s); + int tga_comp, tga_rgb16=0; + int tga_inverted = stbi__get8(s); + // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?) + // image data + unsigned char *tga_data; + unsigned char *tga_palette = NULL; + int i, j; + unsigned char raw_data[4] = {0}; + int RLE_count = 0; + int RLE_repeating = 0; + int read_next_pixel = 1; + STBI_NOTUSED(ri); + + // do a tiny bit of precessing + if ( tga_image_type >= 8 ) + { + tga_image_type -= 8; + tga_is_RLE = 1; + } + tga_inverted = 1 - ((tga_inverted >> 5) & 1); + + // If I'm paletted, then I'll use the number of bits from the palette + if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16); + else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16); + + if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency + return stbi__errpuc("bad format", "Can't find out TGA pixelformat"); + + // tga info + *x = tga_width; + *y = tga_height; + if (comp) *comp = tga_comp; + + if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) + return stbi__errpuc("too large", "Corrupt TGA"); + + tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0); + if (!tga_data) return stbi__errpuc("outofmem", "Out of memory"); + + // skip to the data's starting position (offset usually = 0) + stbi__skip(s, tga_offset ); + + if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) { + for (i=0; i < tga_height; ++i) { + int row = tga_inverted ? tga_height -i - 1 : i; + stbi_uc *tga_row = tga_data + row*tga_width*tga_comp; + stbi__getn(s, tga_row, tga_width * tga_comp); + } + } else { + // do I need to load a palette? + if ( tga_indexed) + { + // any data to skip? (offset usually = 0) + stbi__skip(s, tga_palette_start ); + // load the palette + tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); + if (!tga_palette) { + STBI_FREE(tga_data); + return stbi__errpuc("outofmem", "Out of memory"); + } + if (tga_rgb16) { + stbi_uc *pal_entry = tga_palette; + STBI_ASSERT(tga_comp == STBI_rgb); + for (i=0; i < tga_palette_len; ++i) { + stbi__tga_read_rgb16(s, pal_entry); + pal_entry += tga_comp; + } + } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) { + STBI_FREE(tga_data); + STBI_FREE(tga_palette); + return stbi__errpuc("bad palette", "Corrupt TGA"); + } + } + // load the data + for (i=0; i < tga_width * tga_height; ++i) + { + // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk? + if ( tga_is_RLE ) + { + if ( RLE_count == 0 ) + { + // yep, get the next byte as a RLE command + int RLE_cmd = stbi__get8(s); + RLE_count = 1 + (RLE_cmd & 127); + RLE_repeating = RLE_cmd >> 7; + read_next_pixel = 1; + } else if ( !RLE_repeating ) + { + read_next_pixel = 1; + } + } else + { + read_next_pixel = 1; + } + // OK, if I need to read a pixel, do it now + if ( read_next_pixel ) + { + // load however much data we did have + if ( tga_indexed ) + { + // read in index, then perform the lookup + int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s); + if ( pal_idx >= tga_palette_len ) { + // invalid index + pal_idx = 0; + } + pal_idx *= tga_comp; + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = tga_palette[pal_idx+j]; + } + } else if(tga_rgb16) { + STBI_ASSERT(tga_comp == STBI_rgb); + stbi__tga_read_rgb16(s, raw_data); + } else { + // read in the data raw + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = stbi__get8(s); + } + } + // clear the reading flag for the next pixel + read_next_pixel = 0; + } // end of reading a pixel + + // copy data + for (j = 0; j < tga_comp; ++j) + tga_data[i*tga_comp+j] = raw_data[j]; + + // in case we're in RLE mode, keep counting down + --RLE_count; + } + // do I need to invert the image? + if ( tga_inverted ) + { + for (j = 0; j*2 < tga_height; ++j) + { + int index1 = j * tga_width * tga_comp; + int index2 = (tga_height - 1 - j) * tga_width * tga_comp; + for (i = tga_width * tga_comp; i > 0; --i) + { + unsigned char temp = tga_data[index1]; + tga_data[index1] = tga_data[index2]; + tga_data[index2] = temp; + ++index1; + ++index2; + } + } + } + // clear my palette, if I had one + if ( tga_palette != NULL ) + { + STBI_FREE( tga_palette ); + } + } + + // swap RGB - if the source data was RGB16, it already is in the right order + if (tga_comp >= 3 && !tga_rgb16) + { + unsigned char* tga_pixel = tga_data; + for (i=0; i < tga_width * tga_height; ++i) + { + unsigned char temp = tga_pixel[0]; + tga_pixel[0] = tga_pixel[2]; + tga_pixel[2] = temp; + tga_pixel += tga_comp; + } + } + + // convert to target component count + if (req_comp && req_comp != tga_comp) + tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height); + + // the things I do to get rid of an error message, and yet keep + // Microsoft's C compilers happy... [8^( + tga_palette_start = tga_palette_len = tga_palette_bits = + tga_x_origin = tga_y_origin = 0; + // OK, done + return tga_data; +} +#endif + +// ************************************************************************************************* +// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s) +{ + int r = (stbi__get32be(s) == 0x38425053); + stbi__rewind(s); + return r; +} + +static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount) +{ + int count, nleft, len; + + count = 0; + while ((nleft = pixelCount - count) > 0) { + len = stbi__get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + if (len > nleft) return 0; // corrupt data + count += len; + while (len) { + *p = stbi__get8(s); + p += 4; + len--; + } + } else if (len > 128) { + stbi_uc val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len = 257 - len; + if (len > nleft) return 0; // corrupt data + val = stbi__get8(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } + } + + return 1; +} + +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + int pixelCount; + int channelCount, compression; + int channel, i; + int bitdepth; + int w,h; + stbi_uc *out; + STBI_NOTUSED(ri); + + // Check identifier + if (stbi__get32be(s) != 0x38425053) // "8BPS" + return stbi__errpuc("not PSD", "Corrupt PSD image"); + + // Check file type version. + if (stbi__get16be(s) != 1) + return stbi__errpuc("wrong version", "Unsupported version of PSD image"); + + // Skip 6 reserved bytes. + stbi__skip(s, 6 ); + + // Read the number of channels (R, G, B, A, etc). + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) + return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image"); + + // Read the rows and columns of the image. + h = stbi__get32be(s); + w = stbi__get32be(s); + + // Make sure the depth is 8 bits. + bitdepth = stbi__get16be(s); + if (bitdepth != 8 && bitdepth != 16) + return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit"); + + // Make sure the color mode is RGB. + // Valid options are: + // 0: Bitmap + // 1: Grayscale + // 2: Indexed color + // 3: RGB color + // 4: CMYK color + // 7: Multichannel + // 8: Duotone + // 9: Lab color + if (stbi__get16be(s) != 3) + return stbi__errpuc("wrong color format", "PSD is not in RGB color format"); + + // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) + stbi__skip(s,stbi__get32be(s) ); + + // Skip the image resources. (resolution, pen tool paths, etc) + stbi__skip(s, stbi__get32be(s) ); + + // Skip the reserved data. + stbi__skip(s, stbi__get32be(s) ); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + compression = stbi__get16be(s); + if (compression > 1) + return stbi__errpuc("bad compression", "PSD has an unknown compression format"); + + // Check size + if (!stbi__mad3sizes_valid(4, w, h, 0)) + return stbi__errpuc("too large", "Corrupt PSD"); + + // Create the destination image. + + if (!compression && bitdepth == 16 && bpc == 16) { + out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); + ri->bits_per_channel = 16; + } else + out = (stbi_uc *) stbi__malloc(4 * w*h); + + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + pixelCount = w*h; + + // Initialize the data to zero. + //memset( out, 0, pixelCount * 4 ); + + // Finally, the image data. + if (compression) { + // RLE as used by .PSD and .TIFF + // Loop until you get the number of unpacked bytes you are expecting: + // Read the next source byte into n. + // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. + // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. + // Else if n is 128, noop. + // Endloop + + // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data, + // which we're going to just skip. + stbi__skip(s, h * channelCount * 2 ); + + // Read the RLE data by channel. + for (channel = 0; channel < 4; channel++) { + stbi_uc *p; + + p = out+channel; + if (channel >= channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++, p += 4) + *p = (channel == 3 ? 255 : 0); + } else { + // Read the RLE data. + if (!stbi__psd_decode_rle(s, p, pixelCount)) { + STBI_FREE(out); + return stbi__errpuc("corrupt", "bad RLE data"); + } + } + } + + } else { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. + + // Read the data by channel. + for (channel = 0; channel < 4; channel++) { + if (channel >= channelCount) { + // Fill this channel with default data. + if (bitdepth == 16 && bpc == 16) { + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + stbi__uint16 val = channel == 3 ? 65535 : 0; + for (i = 0; i < pixelCount; i++, q += 4) + *q = val; + } else { + stbi_uc *p = out+channel; + stbi_uc val = channel == 3 ? 255 : 0; + for (i = 0; i < pixelCount; i++, p += 4) + *p = val; + } + } else { + if (ri->bits_per_channel == 16) { // output bpc + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + for (i = 0; i < pixelCount; i++, q += 4) + *q = (stbi__uint16) stbi__get16be(s); + } else { + stbi_uc *p = out+channel; + if (bitdepth == 16) { // input bpc + for (i = 0; i < pixelCount; i++, p += 4) + *p = (stbi_uc) (stbi__get16be(s) >> 8); + } else { + for (i = 0; i < pixelCount; i++, p += 4) + *p = stbi__get8(s); + } + } + } + } + } + + // remove weird white matte from PSD + if (channelCount >= 4) { + if (ri->bits_per_channel == 16) { + for (i=0; i < w*h; ++i) { + stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; + if (pixel[3] != 0 && pixel[3] != 65535) { + float a = pixel[3] / 65535.0f; + float ra = 1.0f / a; + float inv_a = 65535.0f * (1 - ra); + pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); + pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); + pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); + } + } + } else { + for (i=0; i < w*h; ++i) { + unsigned char *pixel = out + 4*i; + if (pixel[3] != 0 && pixel[3] != 255) { + float a = pixel[3] / 255.0f; + float ra = 1.0f / a; + float inv_a = 255.0f * (1 - ra); + pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); + pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); + pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); + } + } + } + } + + // convert to desired output format + if (req_comp && req_comp != 4) { + if (ri->bits_per_channel == 16) + out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); + else + out = stbi__convert_format(out, 4, req_comp, w, h); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + if (comp) *comp = 4; + *y = h; + *x = w; + + return out; +} +#endif + +// ************************************************************************************************* +// Softimage PIC loader +// by Tom Seddon +// +// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format +// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/ + +#ifndef STBI_NO_PIC +static int stbi__pic_is4(stbi__context *s,const char *str) +{ + int i; + for (i=0; i<4; ++i) + if (stbi__get8(s) != (stbi_uc)str[i]) + return 0; + + return 1; +} + +static int stbi__pic_test_core(stbi__context *s) +{ + int i; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) + return 0; + + for(i=0;i<84;++i) + stbi__get8(s); + + if (!stbi__pic_is4(s,"PICT")) + return 0; + + return 1; +} + +typedef struct +{ + stbi_uc size,type,channel; +} stbi__pic_packet; + +static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest) +{ + int mask=0x80, i; + + for (i=0; i<4; ++i, mask>>=1) { + if (channel & mask) { + if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short"); + dest[i]=stbi__get8(s); + } + } + + return dest; +} + +static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src) +{ + int mask=0x80,i; + + for (i=0;i<4; ++i, mask>>=1) + if (channel&mask) + dest[i]=src[i]; +} + +static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result) +{ + int act_comp=0,num_packets=0,y,chained; + stbi__pic_packet packets[10]; + + // this will (should...) cater for even some bizarre stuff like having data + // for the same channel in multiple packets. + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return stbi__errpuc("bad format","too many packets"); + + packet = &packets[num_packets++]; + + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + + act_comp |= packet->channel; + + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)"); + if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp"); + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? + + for(y=0; ytype) { + default: + return stbi__errpuc("bad format","packet has bad compression type"); + + case 0: {//uncompressed + int x; + + for(x=0;xchannel,dest)) + return 0; + break; + } + + case 1://Pure RLE + { + int left=width, i; + + while (left>0) { + stbi_uc count,value[4]; + + count=stbi__get8(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)"); + + if (count > left) + count = (stbi_uc) left; + + if (!stbi__readval(s,packet->channel,value)) return 0; + + for(i=0; ichannel,dest,value); + left -= count; + } + } + break; + + case 2: {//Mixed RLE + int left=width; + while (left>0) { + int count = stbi__get8(s), i; + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)"); + + if (count >= 128) { // Repeated + stbi_uc value[4]; + + if (count==128) + count = stbi__get16be(s); + else + count -= 127; + if (count > left) + return stbi__errpuc("bad file","scanline overrun"); + + if (!stbi__readval(s,packet->channel,value)) + return 0; + + for(i=0;ichannel,dest,value); + } else { // Raw + ++count; + if (count>left) return stbi__errpuc("bad file","scanline overrun"); + + for(i=0;ichannel,dest)) + return 0; + } + left-=count; + } + break; + } + } + } + } + + return result; +} + +static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri) +{ + stbi_uc *result; + int i, x,y, internal_comp; + STBI_NOTUSED(ri); + + if (!comp) comp = &internal_comp; + + for (i=0; i<92; ++i) + stbi__get8(s); + + x = stbi__get16be(s); + y = stbi__get16be(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)"); + if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode"); + + stbi__get32be(s); //skip `ratio' + stbi__get16be(s); //skip `fields' + stbi__get16be(s); //skip `pad' + + // intermediate buffer is RGBA + result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0); + memset(result, 0xff, x*y*4); + + if (!stbi__pic_load_core(s,x,y,comp, result)) { + STBI_FREE(result); + result=0; + } + *px = x; + *py = y; + if (req_comp == 0) req_comp = *comp; + result=stbi__convert_format(result,4,req_comp,x,y); + + return result; +} + +static int stbi__pic_test(stbi__context *s) +{ + int r = stbi__pic_test_core(s); + stbi__rewind(s); + return r; +} +#endif + +// ************************************************************************************************* +// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb + +#ifndef STBI_NO_GIF +typedef struct +{ + stbi__int16 prefix; + stbi_uc first; + stbi_uc suffix; +} stbi__gif_lzw; + +typedef struct +{ + int w,h; + stbi_uc *out; // output buffer (always 4 components) + stbi_uc *background; // The current "background" as far as a gif is concerned + stbi_uc *history; + int flags, bgindex, ratio, transparent, eflags; + stbi_uc pal[256][4]; + stbi_uc lpal[256][4]; + stbi__gif_lzw codes[8192]; + stbi_uc *color_table; + int parse, step; + int lflags; + int start_x, start_y; + int max_x, max_y; + int cur_x, cur_y; + int line_size; + int delay; +} stbi__gif; + +static int stbi__gif_test_raw(stbi__context *s) +{ + int sz; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0; + sz = stbi__get8(s); + if (sz != '9' && sz != '7') return 0; + if (stbi__get8(s) != 'a') return 0; + return 1; +} + +static int stbi__gif_test(stbi__context *s) +{ + int r = stbi__gif_test_raw(s); + stbi__rewind(s); + return r; +} + +static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp) +{ + int i; + for (i=0; i < num_entries; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + pal[i][3] = transp == i ? 0 : 255; + } +} + +static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info) +{ + stbi_uc version; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') + return stbi__err("not GIF", "Corrupt GIF"); + + version = stbi__get8(s); + if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF"); + if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF"); + + stbi__g_failure_reason = ""; + g->w = stbi__get16le(s); + g->h = stbi__get16le(s); + g->flags = stbi__get8(s); + g->bgindex = stbi__get8(s); + g->ratio = stbi__get8(s); + g->transparent = -1; + + if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments + + if (is_info) return 1; + + if (g->flags & 0x80) + stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); + + return 1; +} + +static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); + if (!stbi__gif_header(s, g, comp, 1)) { + STBI_FREE(g); + stbi__rewind( s ); + return 0; + } + if (x) *x = g->w; + if (y) *y = g->h; + STBI_FREE(g); + return 1; +} + +static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) +{ + stbi_uc *p, *c; + int idx; + + // recurse to decode the prefixes, since the linked-list is backwards, + // and working backwards through an interleaved image would be nasty + if (g->codes[code].prefix >= 0) + stbi__out_gif_code(g, g->codes[code].prefix); + + if (g->cur_y >= g->max_y) return; + + idx = g->cur_x + g->cur_y; + p = &g->out[idx]; + g->history[idx / 4] = 1; + + c = &g->color_table[g->codes[code].suffix * 4]; + if (c[3] > 128) { // don't render transparent pixels; + p[0] = c[2]; + p[1] = c[1]; + p[2] = c[0]; + p[3] = c[3]; + } + g->cur_x += 4; + + if (g->cur_x >= g->max_x) { + g->cur_x = g->start_x; + g->cur_y += g->step; + + while (g->cur_y >= g->max_y && g->parse > 0) { + g->step = (1 << g->parse) * g->line_size; + g->cur_y = g->start_y + (g->step >> 1); + --g->parse; + } + } +} + +static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) +{ + stbi_uc lzw_cs; + stbi__int32 len, init_code; + stbi__uint32 first; + stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear; + stbi__gif_lzw *p; + + lzw_cs = stbi__get8(s); + if (lzw_cs > 12) return NULL; + clear = 1 << lzw_cs; + first = 1; + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + bits = 0; + valid_bits = 0; + for (init_code = 0; init_code < clear; init_code++) { + g->codes[init_code].prefix = -1; + g->codes[init_code].first = (stbi_uc) init_code; + g->codes[init_code].suffix = (stbi_uc) init_code; + } + + // support no starting clear code + avail = clear+2; + oldcode = -1; + + len = 0; + for(;;) { + if (valid_bits < codesize) { + if (len == 0) { + len = stbi__get8(s); // start new block + if (len == 0) + return g->out; + } + --len; + bits |= (stbi__int32) stbi__get8(s) << valid_bits; + valid_bits += 8; + } else { + stbi__int32 code = bits & codemask; + bits >>= codesize; + valid_bits -= codesize; + // @OPTIMIZE: is there some way we can accelerate the non-clear path? + if (code == clear) { // clear code + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + avail = clear + 2; + oldcode = -1; + first = 0; + } else if (code == clear + 1) { // end of stream code + stbi__skip(s, len); + while ((len = stbi__get8(s)) > 0) + stbi__skip(s,len); + return g->out; + } else if (code <= avail) { + if (first) { + return stbi__errpuc("no clear code", "Corrupt GIF"); + } + + if (oldcode >= 0) { + p = &g->codes[avail++]; + if (avail > 8192) { + return stbi__errpuc("too many codes", "Corrupt GIF"); + } + + p->prefix = (stbi__int16) oldcode; + p->first = g->codes[oldcode].first; + p->suffix = (code == avail) ? p->first : g->codes[code].first; + } else if (code == avail) + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + + stbi__out_gif_code(g, (stbi__uint16) code); + + if ((avail & codemask) == 0 && avail <= 0x0FFF) { + codesize++; + codemask = (1 << codesize) - 1; + } + + oldcode = code; + } else { + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + } + } + } +} + +// this function is designed to support animated gifs, although stb_image doesn't support it +// two back is the image from two frames ago, used for a very specific disposal format +static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back) +{ + int dispose; + int first_frame; + int pi; + int pcount; + + // on first frame, any non-written pixels get the background colour (non-transparent) + first_frame = 0; + if (g->out == 0) { + if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header + g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h); + g->background = (stbi_uc *) stbi__malloc(4 * g->w * g->h); + g->history = (stbi_uc *) stbi__malloc(g->w * g->h); + if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory"); + + // image is treated as "tranparent" at the start - ie, nothing overwrites the current background; + // background colour is only used for pixels that are not rendered first frame, after that "background" + // color refers to teh color that was there the previous frame. + memset( g->out, 0x00, 4 * g->w * g->h ); + memset( g->background, 0x00, 4 * g->w * g->h ); // state of the background (starts transparent) + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + first_frame = 1; + } else { + // second frame - how do we dispoase of the previous one? + dispose = (g->eflags & 0x1C) >> 2; + pcount = g->w * g->h; + + if ((dispose == 3) && (two_back == 0)) { + dispose = 2; // if I don't have an image to revert back to, default to the old background + } + + if (dispose == 3) { // use previous graphic + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 ); + } + } + } else if (dispose == 2) { + // restore what was changed last frame to background before that frame; + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 ); + } + } + } else { + // This is a non-disposal case eithe way, so just + // leave the pixels as is, and they will become the new background + // 1: do not dispose + // 0: not specified. + } + + // background is what out is after the undoing of the previou frame; + memcpy( g->background, g->out, 4 * g->w * g->h ); + } + + // clear my history; + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + + for (;;) { + int tag = stbi__get8(s); + switch (tag) { + case 0x2C: /* Image Descriptor */ + { + stbi__int32 x, y, w, h; + stbi_uc *o; + + x = stbi__get16le(s); + y = stbi__get16le(s); + w = stbi__get16le(s); + h = stbi__get16le(s); + if (((x + w) > (g->w)) || ((y + h) > (g->h))) + return stbi__errpuc("bad Image Descriptor", "Corrupt GIF"); + + g->line_size = g->w * 4; + g->start_x = x * 4; + g->start_y = y * g->line_size; + g->max_x = g->start_x + w * 4; + g->max_y = g->start_y + h * g->line_size; + g->cur_x = g->start_x; + g->cur_y = g->start_y; + + g->lflags = stbi__get8(s); + + if (g->lflags & 0x40) { + g->step = 8 * g->line_size; // first interlaced spacing + g->parse = 3; + } else { + g->step = g->line_size; + g->parse = 0; + } + + if (g->lflags & 0x80) { + stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); + g->color_table = (stbi_uc *) g->lpal; + } else if (g->flags & 0x80) { + g->color_table = (stbi_uc *) g->pal; + } else + return stbi__errpuc("missing color table", "Corrupt GIF"); + + o = stbi__process_gif_raster(s, g); + if (o == NULL) return NULL; + + // if this was the first frame, + pcount = g->w * g->h; + if (first_frame && (g->bgindex > 0)) { + // if first frame, any pixel not drawn to gets the background color + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi] == 0) { + g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; + memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 ); + } + } + } + + return o; + } + + case 0x21: // Comment Extension. + { + int len; + int ext = stbi__get8(s); + if (ext == 0xF9) { // Graphic Control Extension. + len = stbi__get8(s); + if (len == 4) { + g->eflags = stbi__get8(s); + g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths. + + // unset old transparent + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 255; + } + if (g->eflags & 0x01) { + g->transparent = stbi__get8(s); + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 0; + } + } else { + // don't need transparent + stbi__skip(s, 1); + g->transparent = -1; + } + } else { + stbi__skip(s, len); + break; + } + } + while ((len = stbi__get8(s)) != 0) { + stbi__skip(s, len); + } + break; + } + + case 0x3B: // gif stream termination code + return (stbi_uc *) s; // using '1' causes warning on some compilers + + default: + return stbi__errpuc("unknown code", "Corrupt GIF"); + } + } +} + +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + if (stbi__gif_test(s)) { + int layers = 0; + stbi_uc *u = 0; + stbi_uc *out = 0; + stbi_uc *two_back = 0; + stbi__gif g; + int stride; + memset(&g, 0, sizeof(g)); + if (delays) { + *delays = 0; + } + + do { + u = stbi__gif_load_next(s, &g, comp, req_comp, two_back); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + + if (u) { + *x = g.w; + *y = g.h; + ++layers; + stride = g.w * g.h * 4; + + if (out) { + out = (stbi_uc*) STBI_REALLOC( out, layers * stride ); + if (delays) { + *delays = (int*) STBI_REALLOC( *delays, sizeof(int) * layers ); + } + } else { + out = (stbi_uc*)stbi__malloc( layers * stride ); + if (delays) { + *delays = (int*) stbi__malloc( layers * sizeof(int) ); + } + } + memcpy( out + ((layers - 1) * stride), u, stride ); + if (layers >= 2) { + two_back = out - 2 * stride; + } + + if (delays) { + (*delays)[layers - 1U] = g.delay; + } + } + } while (u != 0); + + // free temp buffer; + STBI_FREE(g.out); + STBI_FREE(g.history); + STBI_FREE(g.background); + + // do the final conversion after loading everything; + if (req_comp && req_comp != 4) + out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h); + + *z = layers; + return out; + } else { + return stbi__errpuc("not GIF", "Image was not as a gif type."); + } +} + +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *u = 0; + stbi__gif g; + memset(&g, 0, sizeof(g)); + + u = stbi__gif_load_next(s, &g, comp, req_comp, 0); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + if (u) { + *x = g.w; + *y = g.h; + + // moved conversion to after successful load so that the same + // can be done for multiple frames. + if (req_comp && req_comp != 4) + u = stbi__convert_format(u, 4, req_comp, g.w, g.h); + } + + // free buffers needed for multiple frame loading; + STBI_FREE(g.history); + STBI_FREE(g.background); + + return u; +} + +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp) +{ + return stbi__gif_info_raw(s,x,y,comp); +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR loader +// originally by Nicolas Schulz +#ifndef STBI_NO_HDR +static int stbi__hdr_test_core(stbi__context *s, const char *signature) +{ + int i; + for (i=0; signature[i]; ++i) + if (stbi__get8(s) != signature[i]) + return 0; + stbi__rewind(s); + return 1; +} + +static int stbi__hdr_test(stbi__context* s) +{ + int r = stbi__hdr_test_core(s, "#?RADIANCE\n"); + stbi__rewind(s); + if(!r) { + r = stbi__hdr_test_core(s, "#?RGBE\n"); + stbi__rewind(s); + } + return r; +} + +#define STBI__HDR_BUFLEN 1024 +static char *stbi__hdr_gettoken(stbi__context *z, char *buffer) +{ + int len=0; + char c = '\0'; + + c = (char) stbi__get8(z); + + while (!stbi__at_eof(z) && c != '\n') { + buffer[len++] = c; + if (len == STBI__HDR_BUFLEN-1) { + // flush to end of line + while (!stbi__at_eof(z) && stbi__get8(z) != '\n') + ; + break; + } + c = (char) stbi__get8(z); + } + + buffer[len] = 0; + return buffer; +} + +static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp) +{ + if ( input[3] != 0 ) { + float f1; + // Exponent + f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); + if (req_comp <= 2) + output[0] = (input[0] + input[1] + input[2]) * f1 / 3; + else { + output[0] = input[0] * f1; + output[1] = input[1] * f1; + output[2] = input[2] * f1; + } + if (req_comp == 2) output[1] = 1; + if (req_comp == 4) output[3] = 1; + } else { + switch (req_comp) { + case 4: output[3] = 1; /* fallthrough */ + case 3: output[0] = output[1] = output[2] = 0; + break; + case 2: output[1] = 1; /* fallthrough */ + case 1: output[0] = 0; + break; + } + } +} + +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int width, height; + stbi_uc *scanline; + float *hdr_data; + int len; + unsigned char count, value; + int i, j, k, c1,c2, z; + const char *headerToken; + STBI_NOTUSED(ri); + + // Check identifier + headerToken = stbi__hdr_gettoken(s,buffer); + if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0) + return stbi__errpf("not HDR", "Corrupt HDR image"); + + // Parse header + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format"); + + // Parse width and height + // can't use sscanf() if we're not using stdio! + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + height = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + width = (int) strtol(token, NULL, 10); + + *x = width; + *y = height; + + if (comp) *comp = 3; + if (req_comp == 0) req_comp = 3; + + if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) + return stbi__errpf("too large", "HDR image is too large"); + + // Read data + hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0); + if (!hdr_data) + return stbi__errpf("outofmem", "Out of memory"); + + // Load image data + // image data is stored as some number of sca + if ( width < 8 || width >= 32768) { + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + stbi_uc rgbe[4]; + main_decode_loop: + stbi__getn(s, rgbe, 4); + stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); + } + } + } else { + // Read RLE-encoded data + scanline = NULL; + + for (j = 0; j < height; ++j) { + c1 = stbi__get8(s); + c2 = stbi__get8(s); + len = stbi__get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + stbi_uc rgbe[4]; + rgbe[0] = (stbi_uc) c1; + rgbe[1] = (stbi_uc) c2; + rgbe[2] = (stbi_uc) len; + rgbe[3] = (stbi_uc) stbi__get8(s); + stbi__hdr_convert(hdr_data, rgbe, req_comp); + i = 1; + j = 0; + STBI_FREE(scanline); + goto main_decode_loop; // yes, this makes no sense + } + len <<= 8; + len |= stbi__get8(s); + if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } + if (scanline == NULL) { + scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); + if (!scanline) { + STBI_FREE(hdr_data); + return stbi__errpf("outofmem", "Out of memory"); + } + } + + for (k = 0; k < 4; ++k) { + int nleft; + i = 0; + while ((nleft = width - i) > 0) { + count = stbi__get8(s); + if (count > 128) { + // Run + value = stbi__get8(s); + count -= 128; + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = stbi__get8(s); + } + } + } + for (i=0; i < width; ++i) + stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); + } + if (scanline) + STBI_FREE(scanline); + } + + return hdr_data; +} + +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int dummy; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (stbi__hdr_test(s) == 0) { + stbi__rewind( s ); + return 0; + } + + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) { + stbi__rewind( s ); + return 0; + } + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *y = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *x = (int) strtol(token, NULL, 10); + *comp = 3; + return 1; +} +#endif // STBI_NO_HDR + +#ifndef STBI_NO_BMP +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) +{ + void *p; + stbi__bmp_data info; + + info.all_a = 255; + p = stbi__bmp_parse_header(s, &info); + stbi__rewind( s ); + if (p == NULL) + return 0; + if (x) *x = s->img_x; + if (y) *y = s->img_y; + if (comp) *comp = info.ma ? 4 : 3; + return 1; +} +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) +{ + int channelCount, dummy, depth; + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + *y = stbi__get32be(s); + *x = stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 8 && depth != 16) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 3) { + stbi__rewind( s ); + return 0; + } + *comp = 4; + return 1; +} + +static int stbi__psd_is16(stbi__context *s) +{ + int channelCount, depth; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + (void) stbi__get32be(s); + (void) stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 16) { + stbi__rewind( s ); + return 0; + } + return 1; +} +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) +{ + int act_comp=0,num_packets=0,chained,dummy; + stbi__pic_packet packets[10]; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) { + stbi__rewind(s); + return 0; + } + + stbi__skip(s, 88); + + *x = stbi__get16be(s); + *y = stbi__get16be(s); + if (stbi__at_eof(s)) { + stbi__rewind( s); + return 0; + } + if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) { + stbi__rewind( s ); + return 0; + } + + stbi__skip(s, 8); + + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return 0; + + packet = &packets[num_packets++]; + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + act_comp |= packet->channel; + + if (stbi__at_eof(s)) { + stbi__rewind( s ); + return 0; + } + if (packet->size != 8) { + stbi__rewind( s ); + return 0; + } + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); + + return 1; +} +#endif + +// ************************************************************************************************* +// Portable Gray Map and Portable Pixel Map loader +// by Ken Miller +// +// PGM: http://netpbm.sourceforge.net/doc/pgm.html +// PPM: http://netpbm.sourceforge.net/doc/ppm.html +// +// Known limitations: +// Does not support comments in the header section +// Does not support ASCII image data (formats P2 and P3) +// Does not support 16-bit-per-channel + +#ifndef STBI_NO_PNM + +static int stbi__pnm_test(stbi__context *s) +{ + char p, t; + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind( s ); + return 0; + } + return 1; +} + +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + STBI_NOTUSED(ri); + + if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n)) + return 0; + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + + if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "PNM too large"); + + out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + stbi__getn(s, out, s->img_n * s->img_x * s->img_y); + + if (req_comp && req_comp != s->img_n) { + out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + return out; +} + +static int stbi__pnm_isspace(char c) +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; +} + +static void stbi__pnm_skip_whitespace(stbi__context *s, char *c) +{ + for (;;) { + while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) + *c = (char) stbi__get8(s); + + if (stbi__at_eof(s) || *c != '#') + break; + + while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' ) + *c = (char) stbi__get8(s); + } +} + +static int stbi__pnm_isdigit(char c) +{ + return c >= '0' && c <= '9'; +} + +static int stbi__pnm_getinteger(stbi__context *s, char *c) +{ + int value = 0; + + while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { + value = value*10 + (*c - '0'); + *c = (char) stbi__get8(s); + } + + return value; +} + +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) +{ + int maxv, dummy; + char c, p, t; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + stbi__rewind(s); + + // Get identifier + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind(s); + return 0; + } + + *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm + + c = (char) stbi__get8(s); + stbi__pnm_skip_whitespace(s, &c); + + *x = stbi__pnm_getinteger(s, &c); // read width + stbi__pnm_skip_whitespace(s, &c); + + *y = stbi__pnm_getinteger(s, &c); // read height + stbi__pnm_skip_whitespace(s, &c); + + maxv = stbi__pnm_getinteger(s, &c); // read max value + + if (maxv > 255) + return stbi__err("max value > 255", "PPM image not 8-bit"); + else + return 1; +} +#endif + +static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) +{ + #ifndef STBI_NO_JPEG + if (stbi__jpeg_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNG + if (stbi__png_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_GIF + if (stbi__gif_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_BMP + if (stbi__bmp_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PIC + if (stbi__pic_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNM + if (stbi__pnm_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_info(s, x, y, comp)) return 1; + #endif + + // test tga last because it's a crappy test! + #ifndef STBI_NO_TGA + if (stbi__tga_info(s, x, y, comp)) + return 1; + #endif + return stbi__err("unknown image type", "Image not of any known type, or corrupt"); +} + +static int stbi__is_16_main(stbi__context *s) +{ + #ifndef STBI_NO_PNG + if (stbi__png_is16(s)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_is16(s)) return 1; + #endif + + return 0; +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_info_from_file(f, x, y, comp); + fclose(f); + return result; +} + +STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__info_main(&s,x,y,comp); + fseek(f,pos,SEEK_SET); + return r; +} + +STBIDEF int stbi_is_16_bit(char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_is_16_bit_from_file(f); + fclose(f); + return result; +} + +STBIDEF int stbi_is_16_bit_from_file(FILE *f) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__is_16_main(&s); + fseek(f,pos,SEEK_SET); + return r; +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__is_16_main(&s); +} + +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__is_16_main(&s); +} + +#endif // STB_IMAGE_IMPLEMENTATION + +/* + revision history: + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug + 1-bit BMP + *_is_16_bit api + avoid warnings + 2.16 (2017-07-23) all functions have 16-bit variants; + STBI_NO_STDIO works again; + compilation fixes; + fix rounding in unpremultiply; + optimize vertical flip; + disable raw_len validation; + documentation fixes + 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; + warning fixes; disable run-time SSE detection on gcc; + uniform handling of optional "return" values; + thread-safe initialization of zlib tables + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) allocate large structures on the stack + remove white matting for transparent PSD + fix reported channel count for PNG & BMP + re-enable SSE2 in non-gcc 64-bit + support RGB-formatted JPEG + read 16-bit PNGs (only as 8-bit) + 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED + 2.09 (2016-01-16) allow comments in PNM files + 16-bit-per-pixel TGA (not bit-per-component) + info() for TGA could break due to .hdr handling + info() for BMP to shares code instead of sloppy parse + can use STBI_REALLOC_SIZED if allocator doesn't support realloc + code cleanup + 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA + 2.07 (2015-09-13) fix compiler warnings + partial animated GIF support + limited 16-bpc PSD support + #ifdef unused functions + bug with < 92 byte PIC,PNM,HDR,TGA + 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value + 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning + 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit + 2.03 (2015-04-12) extra corruption checking (mmozeiko) + stbi_set_flip_vertically_on_load (nguillemot) + fix NEON support; fix mingw support + 2.02 (2015-01-19) fix incorrect assert, fix warning + 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2 + 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG + 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) + progressive JPEG (stb) + PGM/PPM support (Ken Miller) + STBI_MALLOC,STBI_REALLOC,STBI_FREE + GIF bugfix -- seemingly never worked + STBI_NO_*, STBI_ONLY_* + 1.48 (2014-12-14) fix incorrectly-named assert() + 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb) + optimize PNG (ryg) + fix bug in interlaced PNG with user-specified channel count (stb) + 1.46 (2014-08-26) + fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG + 1.45 (2014-08-16) + fix MSVC-ARM internal compiler error by wrapping malloc + 1.44 (2014-08-07) + various warning fixes from Ronny Chevalier + 1.43 (2014-07-15) + fix MSVC-only compiler problem in code changed in 1.42 + 1.42 (2014-07-09) + don't define _CRT_SECURE_NO_WARNINGS (affects user code) + fixes to stbi__cleanup_jpeg path + added STBI_ASSERT to avoid requiring assert.h + 1.41 (2014-06-25) + fix search&replace from 1.36 that messed up comments/error messages + 1.40 (2014-06-22) + fix gcc struct-initialization warning + 1.39 (2014-06-15) + fix to TGA optimization when req_comp != number of components in TGA; + fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite) + add support for BMP version 5 (more ignored fields) + 1.38 (2014-06-06) + suppress MSVC warnings on integer casts truncating values + fix accidental rename of 'skip' field of I/O + 1.37 (2014-06-04) + remove duplicate typedef + 1.36 (2014-06-03) + convert to header file single-file library + if de-iphone isn't set, load iphone images color-swapped instead of returning NULL + 1.35 (2014-05-27) + various warnings + fix broken STBI_SIMD path + fix bug where stbi_load_from_file no longer left file pointer in correct place + fix broken non-easy path for 32-bit BMP (possibly never used) + TGA optimization by Arseny Kapoulkine + 1.34 (unknown) + use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case + 1.33 (2011-07-14) + make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements + 1.32 (2011-07-13) + support for "info" function for all supported filetypes (SpartanJ) + 1.31 (2011-06-20) + a few more leak fixes, bug in PNG handling (SpartanJ) + 1.30 (2011-06-11) + added ability to load files via callbacks to accomidate custom input streams (Ben Wenger) + removed deprecated format-specific test/load functions + removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway + error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) + fix inefficiency in decoding 32-bit BMP (David Woo) + 1.29 (2010-08-16) + various warning fixes from Aurelien Pocheville + 1.28 (2010-08-01) + fix bug in GIF palette transparency (SpartanJ) + 1.27 (2010-08-01) + cast-to-stbi_uc to fix warnings + 1.26 (2010-07-24) + fix bug in file buffering for PNG reported by SpartanJ + 1.25 (2010-07-17) + refix trans_data warning (Won Chun) + 1.24 (2010-07-12) + perf improvements reading from files on platforms with lock-heavy fgetc() + minor perf improvements for jpeg + deprecated type-specific functions so we'll get feedback if they're needed + attempt to fix trans_data warning (Won Chun) + 1.23 fixed bug in iPhone support + 1.22 (2010-07-10) + removed image *writing* support + stbi_info support from Jetro Lauha + GIF support from Jean-Marc Lienher + iPhone PNG-extensions from James Brown + warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva) + 1.21 fix use of 'stbi_uc' in header (reported by jon blow) + 1.20 added support for Softimage PIC, by Tom Seddon + 1.19 bug in interlaced PNG corruption check (found by ryg) + 1.18 (2008-08-02) + fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - stbi__convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug + header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) + 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz + 1.07 attempt to fix C++ warning/errors again + 1.06 attempt to fix C++ warning/errors again + 1.05 fix TGA loading to return correct *comp and use good luminance calc + 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free + 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR + 1.02 support for (subset of) HDR files, float interface for preferred access to them + 1.01 fix bug: possible bug in handling right-side up bmps... not sure + fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all + 1.00 interface to zlib that skips zlib header + 0.99 correct handling of alpha in palette + 0.98 TGA loader by lonesock; dynamically add loaders (untested) + 0.97 jpeg errors on too large a file; also catch another malloc failure + 0.96 fix detection of invalid v value - particleman@mollyrocket forum + 0.95 during header scan, seek to markers in case of padding + 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same + 0.93 handle jpegtran output; verbose errors + 0.92 read 4,8,16,24,32-bit BMP files of several formats + 0.91 output 24-bit Windows 3.0 BMP files + 0.90 fix a few more warnings; bump version number to approach 1.0 + 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd + 0.60 fix compiling as c++ + 0.59 fix warnings: merge Dave Moore's -Wall fixes + 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian + 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available + 0.56 fix bug: zlib uncompressed mode len vs. nlen + 0.55 fix bug: restart_interval not initialized to 0 + 0.54 allow NULL for 'int *comp' + 0.53 fix bug in png 3->4; speedup png decoding + 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments + 0.51 obey req_comp requests, 1-component jpegs return as 1-component, + on 'test' only check type, not whether we support this variant + 0.50 (2006-11-19) + first released version +*/ + + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/workloads/realworld/uvm_prefetch/darknet/src/stb_image_write.h b/workloads/realworld/uvm_prefetch/darknet/src/stb_image_write.h new file mode 100644 index 0000000000000000000000000000000000000000..c05e95812b96232abd3617f98255832cc3fe4716 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/stb_image_write.h @@ -0,0 +1,1568 @@ +/* stb_image_write - v1.09 - public domain - http://nothings.org/stb/stb_image_write.h + writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 + no warranty implied; use at your own risk + + Before #including, + + #define STB_IMAGE_WRITE_IMPLEMENTATION + + in the file that you want to have the implementation. + + Will probably not work correctly with strict-aliasing optimizations. + + If using a modern Microsoft Compiler, non-safe versions of CRT calls may cause + compilation warnings or even errors. To avoid this, also before #including, + + #define STBI_MSC_SECURE_CRT + +ABOUT: + + This header file is a library for writing images to C stdio. It could be + adapted to write to memory or a general streaming interface; let me know. + + The PNG output is not optimal; it is 20-50% larger than the file + written by a decent optimizing implementation; though providing a custom + zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that. + This library is designed for source code compactness and simplicity, + not optimal image file size or run-time performance. + +BUILDING: + + You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h. + You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace + malloc,realloc,free. + You can #define STBIW_MEMMOVE() to replace memmove() + You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function + for PNG compression (instead of the builtin one), it must have the following signature: + unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality); + The returned data will be freed with STBIW_FREE() (free() by default), + so it must be heap allocated with STBIW_MALLOC() (malloc() by default), + +USAGE: + + There are five functions, one for each image file format: + + int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality); + int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); + + void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically + + There are also five equivalent functions that use an arbitrary write function. You are + expected to open/close your file-equivalent before and after calling these: + + int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); + int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + + where the callback is: + void stbi_write_func(void *context, void *data, int size); + + You can configure it with these global variables: + int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE + int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression + int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode + + + You can define STBI_WRITE_NO_STDIO to disable the file variant of these + functions, so the library will not use stdio.h at all. However, this will + also disable HDR writing, because it requires stdio for formatted output. + + Each function returns 0 on failure and non-0 on success. + + The functions create an image file defined by the parameters. The image + is a rectangle of pixels stored from left-to-right, top-to-bottom. + Each pixel contains 'comp' channels of data stored interleaved with 8-bits + per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is + monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall. + The *data pointer points to the first byte of the top-left-most pixel. + For PNG, "stride_in_bytes" is the distance in bytes from the first byte of + a row of pixels to the first byte of the next row of pixels. + + PNG creates output files with the same number of components as the input. + The BMP format expands Y to RGB in the file format and does not + output alpha. + + PNG supports writing rectangles of data even when the bytes storing rows of + data are not consecutive in memory (e.g. sub-rectangles of a larger image), + by supplying the stride between the beginning of adjacent rows. The other + formats do not. (Thus you cannot write a native-format BMP through the BMP + writer, both because it is in BGR order and because it may have padding + at the end of the line.) + + PNG allows you to set the deflate compression level by setting the global + variable 'stbi_write_png_compression_level' (it defaults to 8). + + HDR expects linear float data. Since the format is always 32-bit rgb(e) + data, alpha (if provided) is discarded, and for monochrome data it is + replicated across all three channels. + + TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed + data, set the global variable 'stbi_write_tga_with_rle' to 0. + + JPEG does ignore alpha channels in input data; quality is between 1 and 100. + Higher quality looks better but results in a bigger image. + JPEG baseline (no JPEG progressive). + +CREDITS: + + + Sean Barrett - PNG/BMP/TGA + Baldur Karlsson - HDR + Jean-Sebastien Guay - TGA monochrome + Tim Kelsey - misc enhancements + Alan Hickman - TGA RLE + Emmanuel Julien - initial file IO callback implementation + Jon Olick - original jo_jpeg.cpp code + Daniel Gibson - integrate JPEG, allow external zlib + Aarni Koskela - allow choosing PNG filter + + bugfixes: + github:Chribba + Guillaume Chereau + github:jry2 + github:romigrou + Sergio Gonzalez + Jonas Karlsson + Filip Wasil + Thatcher Ulrich + github:poppolopoppo + Patrick Boettcher + github:xeekworx + Cap Petschulat + Simon Rodriguez + Ivan Tikhonov + github:ignotion + Adam Schackart + +LICENSE + + See end of file for license information. + +*/ + +#ifndef INCLUDE_STB_IMAGE_WRITE_H +#define INCLUDE_STB_IMAGE_WRITE_H + +// if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline' or 'static inline' +#ifndef STBIWDEF +#ifdef STB_IMAGE_WRITE_STATIC +#define STBIWDEF static +#else +#ifdef __cplusplus +#define STBIWDEF extern "C" +#else +#define STBIWDEF extern +#endif +#endif +#endif + +#ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations +extern int stbi_write_tga_with_rle; +extern int stbi_write_png_compression_level; +extern int stbi_write_force_png_filter; +#endif + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality); +#endif + +typedef void stbi_write_func(void *context, void *data, int size); + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + +STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); + +#endif//INCLUDE_STB_IMAGE_WRITE_H + +#ifdef STB_IMAGE_WRITE_IMPLEMENTATION + +#ifdef _WIN32 + #ifndef _CRT_SECURE_NO_WARNINGS + #define _CRT_SECURE_NO_WARNINGS + #endif + #ifndef _CRT_NONSTDC_NO_DEPRECATE + #define _CRT_NONSTDC_NO_DEPRECATE + #endif +#endif + +#ifndef STBI_WRITE_NO_STDIO +#include +#endif // STBI_WRITE_NO_STDIO + +#include +#include +#include +#include + +#if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED)) +// ok +#elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)." +#endif + +#ifndef STBIW_MALLOC +#define STBIW_MALLOC(sz) malloc(sz) +#define STBIW_REALLOC(p,newsz) realloc(p,newsz) +#define STBIW_FREE(p) free(p) +#endif + +#ifndef STBIW_REALLOC_SIZED +#define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz) +#endif + + +#ifndef STBIW_MEMMOVE +#define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz) +#endif + + +#ifndef STBIW_ASSERT +#include +#define STBIW_ASSERT(x) assert(x) +#endif + +#define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff) + +#ifdef STB_IMAGE_WRITE_STATIC +static int stbi__flip_vertically_on_write=0; +static int stbi_write_png_compression_level = 8; +static int stbi_write_tga_with_rle = 1; +static int stbi_write_force_png_filter = -1; +#else +int stbi_write_png_compression_level = 8; +int stbi__flip_vertically_on_write=0; +int stbi_write_tga_with_rle = 1; +int stbi_write_force_png_filter = -1; +#endif + +STBIWDEF void stbi_flip_vertically_on_write(int flag) +{ + stbi__flip_vertically_on_write = flag; +} + +typedef struct +{ + stbi_write_func *func; + void *context; +} stbi__write_context; + +// initialize a callback-based context +static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context) +{ + s->func = c; + s->context = context; +} + +#ifndef STBI_WRITE_NO_STDIO + +static void stbi__stdio_write(void *context, void *data, int size) +{ + fwrite(data,1,size,(FILE*) context); +} + +static int stbi__start_write_file(stbi__write_context *s, const char *filename) +{ + FILE *f; +#ifdef STBI_MSC_SECURE_CRT + if (fopen_s(&f, filename, "wb")) + f = NULL; +#else + f = fopen(filename, "wb"); +#endif + stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f); + return f != NULL; +} + +static void stbi__end_write_file(stbi__write_context *s) +{ + fclose((FILE *)s->context); +} + +#endif // !STBI_WRITE_NO_STDIO + +typedef unsigned int stbiw_uint32; +typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1]; + +static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) +{ + while (*fmt) { + switch (*fmt++) { + case ' ': break; + case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int)); + s->func(s->context,&x,1); + break; } + case '2': { int x = va_arg(v,int); + unsigned char b[2]; + b[0] = STBIW_UCHAR(x); + b[1] = STBIW_UCHAR(x>>8); + s->func(s->context,b,2); + break; } + case '4': { stbiw_uint32 x = va_arg(v,int); + unsigned char b[4]; + b[0]=STBIW_UCHAR(x); + b[1]=STBIW_UCHAR(x>>8); + b[2]=STBIW_UCHAR(x>>16); + b[3]=STBIW_UCHAR(x>>24); + s->func(s->context,b,4); + break; } + default: + STBIW_ASSERT(0); + return; + } + } +} + +static void stbiw__writef(stbi__write_context *s, const char *fmt, ...) +{ + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); +} + +static void stbiw__putc(stbi__write_context *s, unsigned char c) +{ + s->func(s->context, &c, 1); +} + +static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c) +{ + unsigned char arr[3]; + arr[0] = a, arr[1] = b, arr[2] = c; + s->func(s->context, arr, 3); +} + +static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d) +{ + unsigned char bg[3] = { 255, 0, 255}, px[3]; + int k; + + if (write_alpha < 0) + s->func(s->context, &d[comp - 1], 1); + + switch (comp) { + case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case + case 1: + if (expand_mono) + stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp + else + s->func(s->context, d, 1); // monochrome TGA + break; + case 4: + if (!write_alpha) { + // composite against pink background + for (k = 0; k < 3; ++k) + px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255; + stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]); + break; + } + /* FALLTHROUGH */ + case 3: + stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); + break; + } + if (write_alpha > 0) + s->func(s->context, &d[comp - 1], 1); +} + +static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) +{ + stbiw_uint32 zero = 0; + int i,j, j_end; + + if (y <= 0) + return; + + if (stbi__flip_vertically_on_write) + vdir *= -1; + + if (vdir < 0) + j_end = -1, j = y-1; + else + j_end = y, j = 0; + + for (; j != j_end; j += vdir) { + for (i=0; i < x; ++i) { + unsigned char *d = (unsigned char *) data + (j*x+i)*comp; + stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); + } + s->func(s->context, &zero, scanline_pad); + } +} + +static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...) +{ + if (y < 0 || x < 0) { + return 0; + } else { + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); + stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono); + return 1; + } +} + +static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data) +{ + int pad = (-x*3) & 3; + return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad, + "11 4 22 4" "4 44 22 444444", + 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header + 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header +} + +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_bmp_core(&s, x, y, comp, data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_bmp_core(&s, x, y, comp, data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif //!STBI_WRITE_NO_STDIO + +static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data) +{ + int has_alpha = (comp == 2 || comp == 4); + int colorbytes = has_alpha ? comp-1 : comp; + int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3 + + if (y < 0 || x < 0) + return 0; + + if (!stbi_write_tga_with_rle) { + return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0, + "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); + } else { + int i,j,k; + int jend, jdir; + + stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8); + + if (stbi__flip_vertically_on_write) { + j = 0; + jend = y; + jdir = 1; + } else { + j = y-1; + jend = -1; + jdir = -1; + } + for (; j != jend; j += jdir) { + unsigned char *row = (unsigned char *) data + j * x * comp; + int len; + + for (i = 0; i < x; i += len) { + unsigned char *begin = row + i * comp; + int diff = 1; + len = 1; + + if (i < x - 1) { + ++len; + diff = memcmp(begin, row + (i + 1) * comp, comp); + if (diff) { + const unsigned char *prev = begin; + for (k = i + 2; k < x && len < 128; ++k) { + if (memcmp(prev, row + k * comp, comp)) { + prev += comp; + ++len; + } else { + --len; + break; + } + } + } else { + for (k = i + 2; k < x && len < 128; ++k) { + if (!memcmp(begin, row + k * comp, comp)) { + ++len; + } else { + break; + } + } + } + } + + if (diff) { + unsigned char header = STBIW_UCHAR(len - 1); + s->func(s->context, &header, 1); + for (k = 0; k < len; ++k) { + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); + } + } else { + unsigned char header = STBIW_UCHAR(len - 129); + s->func(s->context, &header, 1); + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); + } + } + } + } + return 1; +} + +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_tga_core(&s, x, y, comp, (void *) data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_tga_core(&s, x, y, comp, (void *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR writer +// by Baldur Karlsson + +#define stbiw__max(a, b) ((a) > (b) ? (a) : (b)) + +void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear) +{ + int exponent; + float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2])); + + if (maxcomp < 1e-32f) { + rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; + } else { + float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp; + + rgbe[0] = (unsigned char)(linear[0] * normalize); + rgbe[1] = (unsigned char)(linear[1] * normalize); + rgbe[2] = (unsigned char)(linear[2] * normalize); + rgbe[3] = (unsigned char)(exponent + 128); + } +} + +void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte) +{ + unsigned char lengthbyte = STBIW_UCHAR(length+128); + STBIW_ASSERT(length+128 <= 255); + s->func(s->context, &lengthbyte, 1); + s->func(s->context, &databyte, 1); +} + +void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data) +{ + unsigned char lengthbyte = STBIW_UCHAR(length); + STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code + s->func(s->context, &lengthbyte, 1); + s->func(s->context, data, length); +} + +void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline) +{ + unsigned char scanlineheader[4] = { 2, 2, 0, 0 }; + unsigned char rgbe[4]; + float linear[3]; + int x; + + scanlineheader[2] = (width&0xff00)>>8; + scanlineheader[3] = (width&0x00ff); + + /* skip RLE for images too small or large */ + if (width < 8 || width >= 32768) { + for (x=0; x < width; x++) { + switch (ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + s->func(s->context, rgbe, 4); + } + } else { + int c,r; + /* encode into scratch buffer */ + for (x=0; x < width; x++) { + switch(ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + scratch[x + width*0] = rgbe[0]; + scratch[x + width*1] = rgbe[1]; + scratch[x + width*2] = rgbe[2]; + scratch[x + width*3] = rgbe[3]; + } + + s->func(s->context, scanlineheader, 4); + + /* RLE each component separately */ + for (c=0; c < 4; c++) { + unsigned char *comp = &scratch[width*c]; + + x = 0; + while (x < width) { + // find first run + r = x; + while (r+2 < width) { + if (comp[r] == comp[r+1] && comp[r] == comp[r+2]) + break; + ++r; + } + if (r+2 >= width) + r = width; + // dump up to first run + while (x < r) { + int len = r-x; + if (len > 128) len = 128; + stbiw__write_dump_data(s, len, &comp[x]); + x += len; + } + // if there's a run, output it + if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd + // find next byte after run + while (r < width && comp[r] == comp[x]) + ++r; + // output run up to r + while (x < r) { + int len = r-x; + if (len > 127) len = 127; + stbiw__write_run_data(s, len, comp[x]); + x += len; + } + } + } + } + } +} + +static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data) +{ + if (y <= 0 || x <= 0 || data == NULL) + return 0; + else { + // Each component is stored separately. Allocate scratch space for full output scanline. + unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4); + int i, len; + char buffer[128]; + char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"; + s->func(s->context, header, sizeof(header)-1); + +#ifdef STBI_MSC_SECURE_CRT + len = sprintf_s(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#else + len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#endif + s->func(s->context, buffer, len); + + for(i=0; i < y; i++) + stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i)*x); + STBIW_FREE(scratch); + return 1; + } +} + +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_hdr_core(&s, x, y, comp, (float *) data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif // STBI_WRITE_NO_STDIO + + +////////////////////////////////////////////////////////////////////////////// +// +// PNG writer +// + +#ifndef STBIW_ZLIB_COMPRESS +// stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size() +#define stbiw__sbraw(a) ((int *) (a) - 2) +#define stbiw__sbm(a) stbiw__sbraw(a)[0] +#define stbiw__sbn(a) stbiw__sbraw(a)[1] + +#define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) +#define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0) +#define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) + +#define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) +#define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) +#define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) + +static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) +{ + int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1; + void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2); + STBIW_ASSERT(p); + if (p) { + if (!*arr) ((int *) p)[1] = 0; + *arr = (void *) ((int *) p + 2); + stbiw__sbm(*arr) = m; + } + return *arr; +} + +static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount) +{ + while (*bitcount >= 8) { + stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer)); + *bitbuffer >>= 8; + *bitcount -= 8; + } + return data; +} + +static int stbiw__zlib_bitrev(int code, int codebits) +{ + int res=0; + while (codebits--) { + res = (res << 1) | (code & 1); + code >>= 1; + } + return res; +} + +static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit) +{ + int i; + for (i=0; i < limit && i < 258; ++i) + if (a[i] != b[i]) break; + return i; +} + +static unsigned int stbiw__zhash(unsigned char *data) +{ + stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16); + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + return hash; +} + +#define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount)) +#define stbiw__zlib_add(code,codebits) \ + (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) +#define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) +// default huffman tables +#define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8) +#define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9) +#define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7) +#define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8) +#define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) +#define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n)) + +#define stbiw__ZHASH 16384 + +#endif // STBIW_ZLIB_COMPRESS + +unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality) +{ +#ifdef STBIW_ZLIB_COMPRESS + // user provided a zlib compress implementation, use that + return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality); +#else // use builtin + static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 }; + static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; + static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 }; + static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; + unsigned int bitbuf=0; + int i,j, bitcount=0; + unsigned char *out = NULL; + unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(char**)); + if (hash_table == NULL) + return NULL; + if (quality < 5) quality = 5; + + stbiw__sbpush(out, 0x78); // DEFLATE 32K window + stbiw__sbpush(out, 0x5e); // FLEVEL = 1 + stbiw__zlib_add(1,1); // BFINAL = 1 + stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman + + for (i=0; i < stbiw__ZHASH; ++i) + hash_table[i] = NULL; + + i=0; + while (i < data_len-3) { + // hash next 3 bytes of data to be compressed + int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3; + unsigned char *bestloc = 0; + unsigned char **hlist = hash_table[h]; + int n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32768) { // if entry lies within window + int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i); + if (d >= best) best=d,bestloc=hlist[j]; + } + } + // when hash table entry is too long, delete half the entries + if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) { + STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality); + stbiw__sbn(hash_table[h]) = quality; + } + stbiw__sbpush(hash_table[h],data+i); + + if (bestloc) { + // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal + h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1); + hlist = hash_table[h]; + n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32767) { + int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1); + if (e > best) { // if next match is better, bail on current match + bestloc = NULL; + break; + } + } + } + } + + if (bestloc) { + int d = (int) (data+i - bestloc); // distance back + STBIW_ASSERT(d <= 32767 && best <= 258); + for (j=0; best > lengthc[j+1]-1; ++j); + stbiw__zlib_huff(j+257); + if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); + for (j=0; d > distc[j+1]-1; ++j); + stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5); + if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]); + i += best; + } else { + stbiw__zlib_huffb(data[i]); + ++i; + } + } + // write out final bytes + for (;i < data_len; ++i) + stbiw__zlib_huffb(data[i]); + stbiw__zlib_huff(256); // end of block + // pad with 0 bits to byte boundary + while (bitcount) + stbiw__zlib_add(0,1); + + for (i=0; i < stbiw__ZHASH; ++i) + (void) stbiw__sbfree(hash_table[i]); + STBIW_FREE(hash_table); + + { + // compute adler32 on input + unsigned int s1=1, s2=0; + int blocklen = (int) (data_len % 5552); + j=0; + while (j < data_len) { + for (i=0; i < blocklen; ++i) s1 += data[j+i], s2 += s1; + s1 %= 65521, s2 %= 65521; + j += blocklen; + blocklen = 5552; + } + stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s2)); + stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s1)); + } + *out_len = stbiw__sbn(out); + // make returned pointer freeable + STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len); + return (unsigned char *) stbiw__sbraw(out); +#endif // STBIW_ZLIB_COMPRESS +} + +static unsigned int stbiw__crc32(unsigned char *buffer, int len) +{ + static unsigned int crc_table[256] = + { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + }; + + unsigned int crc = ~0u; + int i; + for (i=0; i < len; ++i) + crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; + return ~crc; +} + +#define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4) +#define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v)); +#define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3]) + +static void stbiw__wpcrc(unsigned char **data, int len) +{ + unsigned int crc = stbiw__crc32(*data - len - 4, len+4); + stbiw__wp32(*data, crc); +} + +static unsigned char stbiw__paeth(int a, int b, int c) +{ + int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c); + if (pa <= pb && pa <= pc) return STBIW_UCHAR(a); + if (pb <= pc) return STBIW_UCHAR(b); + return STBIW_UCHAR(c); +} + +// @OPTIMIZE: provide an option that always forces left-predict or paeth predict +static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer) +{ + static int mapping[] = { 0,1,2,3,4 }; + static int firstmap[] = { 0,1,0,5,6 }; + int *mymap = (y != 0) ? mapping : firstmap; + int i; + int type = mymap[filter_type]; + unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y); + int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes; + for (i = 0; i < n; ++i) { + switch (type) { + case 0: line_buffer[i] = z[i]; break; + case 1: line_buffer[i] = z[i]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break; + case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break; + case 5: line_buffer[i] = z[i]; break; + case 6: line_buffer[i] = z[i]; break; + } + } + for (i=n; i < width*n; ++i) { + switch (type) { + case 0: line_buffer[i] = z[i]; break; + case 1: line_buffer[i] = z[i] - z[i-n]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break; + case 4: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break; + case 5: line_buffer[i] = z[i] - (z[i-n]>>1); break; + case 6: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break; + } + } +} + +unsigned char *stbi_write_png_to_mem(unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len) +{ + int force_filter = stbi_write_force_png_filter; + int ctype[5] = { -1, 0, 4, 2, 6 }; + unsigned char sig[8] = { 137,80,78,71,13,10,26,10 }; + unsigned char *out,*o, *filt, *zlib; + signed char *line_buffer; + int j,zlen; + + if (stride_bytes == 0) + stride_bytes = x * n; + + if (force_filter >= 5) { + force_filter = -1; + } + + filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0; + line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; } + for (j=0; j < y; ++j) { + int filter_type; + if (force_filter > -1) { + filter_type = force_filter; + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, force_filter, line_buffer); + } else { // Estimate the best filter by running through all of them: + int best_filter = 0, best_filter_val = 0x7fffffff, est, i; + for (filter_type = 0; filter_type < 5; filter_type++) { + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, filter_type, line_buffer); + + // Estimate the entropy of the line using this filter; the less, the better. + est = 0; + for (i = 0; i < x*n; ++i) { + est += abs((signed char) line_buffer[i]); + } + if (est < best_filter_val) { + best_filter_val = est; + best_filter = filter_type; + } + } + if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, best_filter, line_buffer); + filter_type = best_filter; + } + } + // when we get here, filter_type contains the filter type, and line_buffer contains the data + filt[j*(x*n+1)] = (unsigned char) filter_type; + STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); + } + STBIW_FREE(line_buffer); + zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level); + STBIW_FREE(filt); + if (!zlib) return 0; + + // each tag requires 12 bytes of overhead + out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12); + if (!out) return 0; + *out_len = 8 + 12+13 + 12+zlen + 12; + + o=out; + STBIW_MEMMOVE(o,sig,8); o+= 8; + stbiw__wp32(o, 13); // header length + stbiw__wptag(o, "IHDR"); + stbiw__wp32(o, x); + stbiw__wp32(o, y); + *o++ = 8; + *o++ = STBIW_UCHAR(ctype[n]); + *o++ = 0; + *o++ = 0; + *o++ = 0; + stbiw__wpcrc(&o,13); + + stbiw__wp32(o, zlen); + stbiw__wptag(o, "IDAT"); + STBIW_MEMMOVE(o, zlib, zlen); + o += zlen; + STBIW_FREE(zlib); + stbiw__wpcrc(&o, zlen); + + stbiw__wp32(o,0); + stbiw__wptag(o, "IEND"); + stbiw__wpcrc(&o,0); + + STBIW_ASSERT(o == out + *out_len); + + return out; +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes) +{ + FILE *f; + int len; + unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; +#ifdef STBI_MSC_SECURE_CRT + if (fopen_s(&f, filename, "wb")) + f = NULL; +#else + f = fopen(filename, "wb"); +#endif + if (!f) { STBIW_FREE(png); return 0; } + fwrite(png, 1, len, f); + fclose(f); + STBIW_FREE(png); + return 1; +} +#endif + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes) +{ + int len; + unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; + func(context, png, len); + STBIW_FREE(png); + return 1; +} + + +/* *************************************************************************** + * + * JPEG writer + * + * This is based on Jon Olick's jo_jpeg.cpp: + * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html + */ + +static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18, + 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; + +static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) { + int bitBuf = *bitBufP, bitCnt = *bitCntP; + bitCnt += bs[1]; + bitBuf |= bs[0] << (24 - bitCnt); + while(bitCnt >= 8) { + unsigned char c = (bitBuf >> 16) & 255; + stbiw__putc(s, c); + if(c == 255) { + stbiw__putc(s, 0); + } + bitBuf <<= 8; + bitCnt -= 8; + } + *bitBufP = bitBuf; + *bitCntP = bitCnt; +} + +static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) { + float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p; + float z1, z2, z3, z4, z5, z11, z13; + + float tmp0 = d0 + d7; + float tmp7 = d0 - d7; + float tmp1 = d1 + d6; + float tmp6 = d1 - d6; + float tmp2 = d2 + d5; + float tmp5 = d2 - d5; + float tmp3 = d3 + d4; + float tmp4 = d3 - d4; + + // Even part + float tmp10 = tmp0 + tmp3; // phase 2 + float tmp13 = tmp0 - tmp3; + float tmp11 = tmp1 + tmp2; + float tmp12 = tmp1 - tmp2; + + d0 = tmp10 + tmp11; // phase 3 + d4 = tmp10 - tmp11; + + z1 = (tmp12 + tmp13) * 0.707106781f; // c4 + d2 = tmp13 + z1; // phase 5 + d6 = tmp13 - z1; + + // Odd part + tmp10 = tmp4 + tmp5; // phase 2 + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + // The rotator is modified from fig 4-8 to avoid extra negations. + z5 = (tmp10 - tmp12) * 0.382683433f; // c6 + z2 = tmp10 * 0.541196100f + z5; // c2-c6 + z4 = tmp12 * 1.306562965f + z5; // c2+c6 + z3 = tmp11 * 0.707106781f; // c4 + + z11 = tmp7 + z3; // phase 5 + z13 = tmp7 - z3; + + *d5p = z13 + z2; // phase 6 + *d3p = z13 - z2; + *d1p = z11 + z4; + *d7p = z11 - z4; + + *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6; +} + +static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) { + int tmp1 = val < 0 ? -val : val; + val = val < 0 ? val-1 : val; + bits[1] = 1; + while(tmp1 >>= 1) { + ++bits[1]; + } + bits[0] = val & ((1<0)&&(DU[end0pos]==0); --end0pos) { + } + // end0pos = first element in reverse order !=0 + if(end0pos == 0) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + return DU[0]; + } + for(i = 1; i <= end0pos; ++i) { + int startpos = i; + int nrzeroes; + unsigned short bits[2]; + for (; DU[i]==0 && i<=end0pos; ++i) { + } + nrzeroes = i-startpos; + if ( nrzeroes >= 16 ) { + int lng = nrzeroes>>4; + int nrmarker; + for (nrmarker=1; nrmarker <= lng; ++nrmarker) + stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes); + nrzeroes &= 15; + } + stbiw__jpg_calcBits(DU[i], bits); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); + } + if(end0pos != 63) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + } + return DU[0]; +} + +static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) { + // Constants that don't pollute global namespace + static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0}; + static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d}; + static const unsigned char std_ac_luminance_values[] = { + 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08, + 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28, + 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59, + 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, + 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6, + 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2, + 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0}; + static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77}; + static const unsigned char std_ac_chrominance_values[] = { + 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, + 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26, + 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58, + 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, + 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4, + 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda, + 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + // Huffman tables + static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}}; + static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}}; + static const unsigned short YAC_HT[256][2] = { + {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const unsigned short UVAC_HT[256][2] = { + {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22, + 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99}; + static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99, + 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99}; + static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, + 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; + + int row, col, i, k; + float fdtbl_Y[64], fdtbl_UV[64]; + unsigned char YTable[64], UVTable[64]; + + if(!data || !width || !height || comp > 4 || comp < 1) { + return 0; + } + + quality = quality ? quality : 90; + quality = quality < 1 ? 1 : quality > 100 ? 100 : quality; + quality = quality < 50 ? 5000 / quality : 200 - quality * 2; + + for(i = 0; i < 64; ++i) { + int uvti, yti = (YQT[i]*quality+50)/100; + YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti); + uvti = (UVQT[i]*quality+50)/100; + UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); + } + + for(row = 0, k = 0; row < 8; ++row) { + for(col = 0; col < 8; ++col, ++k) { + fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + } + } + + // Write Headers + { + static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 }; + static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 }; + const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width), + 3,1,0x11,0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 }; + s->func(s->context, (void*)head0, sizeof(head0)); + s->func(s->context, (void*)YTable, sizeof(YTable)); + stbiw__putc(s, 1); + s->func(s->context, UVTable, sizeof(UVTable)); + s->func(s->context, (void*)head1, sizeof(head1)); + s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1); + s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values)); + stbiw__putc(s, 0x10); // HTYACinfo + s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1); + s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values)); + stbiw__putc(s, 1); // HTUDCinfo + s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values)); + stbiw__putc(s, 0x11); // HTUACinfo + s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values)); + s->func(s->context, (void*)head2, sizeof(head2)); + } + + // Encode 8x8 macroblocks + { + static const unsigned short fillBits[] = {0x7F, 7}; + const unsigned char *imageData = (const unsigned char *)data; + int DCY=0, DCU=0, DCV=0; + int bitBuf=0, bitCnt=0; + // comp == 2 is grey+alpha (alpha is ignored) + int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; + int x, y, pos; + for(y = 0; y < height; y += 8) { + for(x = 0; x < width; x += 8) { + float YDU[64], UDU[64], VDU[64]; + for(row = y, pos = 0; row < y+8; ++row) { + for(col = x; col < x+8; ++col, ++pos) { + int p = (stbi__flip_vertically_on_write ? height-1-row : row)*width*comp + col*comp; + float r, g, b; + if(row >= height) { + p -= width*comp*(row+1 - height); + } + if(col >= width) { + p -= comp*(col+1 - width); + } + + r = imageData[p+0]; + g = imageData[p+ofsG]; + b = imageData[p+ofsB]; + YDU[pos]=+0.29900f*r+0.58700f*g+0.11400f*b-128; + UDU[pos]=-0.16874f*r-0.33126f*g+0.50000f*b; + VDU[pos]=+0.50000f*r-0.41869f*g-0.08131f*b; + } + } + + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, YDU, fdtbl_Y, DCY, YDC_HT, YAC_HT); + DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, UDU, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); + DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, VDU, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); + } + } + + // Do the bit alignment of the EOI marker + stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits); + } + + // EOI + stbiw__putc(s, 0xFF); + stbiw__putc(s, 0xD9); + + return 1; +} + +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality); +} + + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +#endif // STB_IMAGE_WRITE_IMPLEMENTATION + +/* Revision history + 1.09 (2018-02-11) + fix typo in zlib quality API, improve STB_I_W_STATIC in C++ + 1.08 (2018-01-29) + add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter + 1.07 (2017-07-24) + doc fix + 1.06 (2017-07-23) + writing JPEG (using Jon Olick's code) + 1.05 ??? + 1.04 (2017-03-03) + monochrome BMP expansion + 1.03 ??? + 1.02 (2016-04-02) + avoid allocating large structures on the stack + 1.01 (2016-01-16) + STBIW_REALLOC_SIZED: support allocators with no realloc support + avoid race-condition in crc initialization + minor compile issues + 1.00 (2015-09-14) + installable file IO function + 0.99 (2015-09-13) + warning fixes; TGA rle support + 0.98 (2015-04-08) + added STBIW_MALLOC, STBIW_ASSERT etc + 0.97 (2015-01-18) + fixed HDR asserts, rewrote HDR rle logic + 0.96 (2015-01-17) + add HDR output + fix monochrome BMP + 0.95 (2014-08-17) + add monochrome TGA output + 0.94 (2014-05-31) + rename private functions to avoid conflicts with stb_image.h + 0.93 (2014-05-27) + warning fixes + 0.92 (2010-08-01) + casts to unsigned char to fix warnings + 0.91 (2010-07-17) + first public release + 0.90 first internal release +*/ + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/workloads/realworld/uvm_prefetch/darknet/src/tree.c b/workloads/realworld/uvm_prefetch/darknet/src/tree.c new file mode 100644 index 0000000000000000000000000000000000000000..67b6d431f6f7e92ede234c71ecae9bd9146dc71f --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/tree.c @@ -0,0 +1,139 @@ +#include +#include +#include "tree.h" +#include "utils.h" +#include "data.h" + +void change_leaves(tree *t, char *leaf_list) +{ + list *llist = get_paths(leaf_list); + char **leaves = (char **)list_to_array(llist); + int n = llist->size; + int i,j; + int found = 0; + for(i = 0; i < t->n; ++i){ + t->leaf[i] = 0; + for(j = 0; j < n; ++j){ + if (0==strcmp(t->name[i], leaves[j])){ + t->leaf[i] = 1; + ++found; + break; + } + } + } + fprintf(stderr, "Found %d leaves.\n", found); +} + +float get_hierarchy_probability(float *x, tree *hier, int c, int stride) +{ + float p = 1; + while(c >= 0){ + p = p * x[c*stride]; + c = hier->parent[c]; + } + return p; +} + +void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride) +{ + int j; + for(j = 0; j < n; ++j){ + int parent = hier->parent[j]; + if(parent >= 0){ + predictions[j*stride] *= predictions[parent*stride]; + } + } + if(only_leaves){ + for(j = 0; j < n; ++j){ + if(!hier->leaf[j]) predictions[j*stride] = 0; + } + } +} + +int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride) +{ + float p = 1; + int group = 0; + int i; + while(1){ + float max = 0; + int max_i = 0; + + for(i = 0; i < hier->group_size[group]; ++i){ + int index = i + hier->group_offset[group]; + float val = predictions[(i + hier->group_offset[group])*stride]; + if(val > max){ + max_i = index; + max = val; + } + } + if(p*max > thresh){ + p = p*max; + group = hier->child[max_i]; + if(hier->child[max_i] < 0) return max_i; + } else if (group == 0){ + return max_i; + } else { + return hier->parent[hier->group_offset[group]]; + } + } + return 0; +} + +tree *read_tree(char *filename) +{ + tree t = {0}; + FILE *fp = fopen(filename, "r"); + + char *line; + int last_parent = -1; + int group_size = 0; + int groups = 0; + int n = 0; + while((line=fgetl(fp)) != 0){ + char *id = calloc(256, sizeof(char)); + int parent = -1; + sscanf(line, "%s %d", id, &parent); + t.parent = realloc(t.parent, (n+1)*sizeof(int)); + t.parent[n] = parent; + + t.child = realloc(t.child, (n+1)*sizeof(int)); + t.child[n] = -1; + + t.name = realloc(t.name, (n+1)*sizeof(char *)); + t.name[n] = id; + if(parent != last_parent){ + ++groups; + t.group_offset = realloc(t.group_offset, groups * sizeof(int)); + t.group_offset[groups - 1] = n - group_size; + t.group_size = realloc(t.group_size, groups * sizeof(int)); + t.group_size[groups - 1] = group_size; + group_size = 0; + last_parent = parent; + } + t.group = realloc(t.group, (n+1)*sizeof(int)); + t.group[n] = groups; + if (parent >= 0) { + t.child[parent] = groups; + } + ++n; + ++group_size; + } + ++groups; + t.group_offset = realloc(t.group_offset, groups * sizeof(int)); + t.group_offset[groups - 1] = n - group_size; + t.group_size = realloc(t.group_size, groups * sizeof(int)); + t.group_size[groups - 1] = group_size; + t.n = n; + t.groups = groups; + t.leaf = calloc(n, sizeof(int)); + int i; + for(i = 0; i < n; ++i) t.leaf[i] = 1; + for(i = 0; i < n; ++i) if(t.parent[i] >= 0) t.leaf[t.parent[i]] = 0; + + fclose(fp); + tree *tree_ptr = calloc(1, sizeof(tree)); + *tree_ptr = t; + //error(0); + return tree_ptr; +} diff --git a/workloads/realworld/uvm_prefetch/darknet/src/tree.h b/workloads/realworld/uvm_prefetch/darknet/src/tree.h new file mode 100644 index 0000000000000000000000000000000000000000..3802b8ead806266edd291de5407b08c2d7ed5dd1 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/tree.h @@ -0,0 +1,8 @@ +#ifndef TREE_H +#define TREE_H +#include "darknet.h" + +int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride); +float get_hierarchy_probability(float *x, tree *hier, int c, int stride); + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/upsample_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/upsample_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..83f32ea5f41b4c787c38e5324e3e7dd4909ca928 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/upsample_layer.c @@ -0,0 +1,106 @@ +#include "upsample_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + +layer make_upsample_layer(int batch, int w, int h, int c, int stride) +{ + layer l = {0}; + l.type = UPSAMPLE; + l.batch = batch; + l.w = w; + l.h = h; + l.c = c; + l.out_w = w*stride; + l.out_h = h*stride; + l.out_c = c; + if(stride < 0){ + stride = -stride; + l.reverse=1; + l.out_w = w/stride; + l.out_h = h/stride; + } + l.stride = stride; + l.outputs = l.out_w*l.out_h*l.out_c; + l.inputs = l.w*l.h*l.c; + l.delta = calloc(l.outputs*batch, sizeof(float)); + l.output = calloc(l.outputs*batch, sizeof(float));; + + l.forward = forward_upsample_layer; + l.backward = backward_upsample_layer; + #ifdef GPU + l.forward_gpu = forward_upsample_layer_gpu; + l.backward_gpu = backward_upsample_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + #endif + if(l.reverse) fprintf(stderr, "downsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + else fprintf(stderr, "upsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + return l; +} + +void resize_upsample_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + l->out_w = w*l->stride; + l->out_h = h*l->stride; + if(l->reverse){ + l->out_w = w/l->stride; + l->out_h = h/l->stride; + } + l->outputs = l->out_w*l->out_h*l->out_c; + l->inputs = l->h*l->w*l->c; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + +void forward_upsample_layer(const layer l, network net) +{ + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + if(l.reverse){ + upsample_cpu(l.output, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input); + }else{ + upsample_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output); + } +} + +void backward_upsample_layer(const layer l, network net) +{ + if(l.reverse){ + upsample_cpu(l.delta, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, net.delta); + }else{ + upsample_cpu(net.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta); + } +} + +#ifdef GPU +void forward_upsample_layer_gpu(const layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + if(l.reverse){ + upsample_gpu(l.output_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input_gpu); + }else{ + upsample_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output_gpu); + } +} + +void backward_upsample_layer_gpu(const layer l, network net) +{ + if(l.reverse){ + upsample_gpu(l.delta_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, net.delta_gpu); + }else{ + upsample_gpu(net.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta_gpu); + } +} +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/upsample_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/upsample_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..86790d1088354ea9c46a4b20fbe1dacf36925ca8 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/upsample_layer.h @@ -0,0 +1,15 @@ +#ifndef UPSAMPLE_LAYER_H +#define UPSAMPLE_LAYER_H +#include "darknet.h" + +layer make_upsample_layer(int batch, int w, int h, int c, int stride); +void forward_upsample_layer(const layer l, network net); +void backward_upsample_layer(const layer l, network net); +void resize_upsample_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_upsample_layer_gpu(const layer l, network net); +void backward_upsample_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/src/utils.c b/workloads/realworld/uvm_prefetch/darknet/src/utils.c new file mode 100644 index 0000000000000000000000000000000000000000..626b4678c1e2779552ed9d34f19ce4b0f57d9ded --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/utils.c @@ -0,0 +1,726 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + + +/* +// old timing. is it better? who knows!! +double get_wall_time() +{ + struct timeval time; + if (gettimeofday(&time,NULL)){ + return 0; + } + return (double)time.tv_sec + (double)time.tv_usec * .000001; +} +*/ + +double what_time_is_it_now() +{ + struct timeval time; + if (gettimeofday(&time,NULL)){ + return 0; + } + return (double)time.tv_sec + (double)time.tv_usec * .000001; +} + +int *read_intlist(char *gpu_list, int *ngpus, int d) +{ + int *gpus = 0; + if(gpu_list){ + int len = strlen(gpu_list); + *ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++*ngpus; + } + gpus = calloc(*ngpus, sizeof(int)); + for(i = 0; i < *ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpus = calloc(1, sizeof(float)); + *gpus = d; + *ngpus = 1; + } + return gpus; +} + +int *read_map(char *filename) +{ + int n = 0; + int *map = 0; + char *str; + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + while((str=fgetl(file))){ + ++n; + map = realloc(map, n*sizeof(int)); + map[n-1] = atoi(str); + } + return map; +} + +void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections) +{ + size_t i; + for(i = 0; i < sections; ++i){ + size_t start = n*i/sections; + size_t end = n*(i+1)/sections; + size_t num = end-start; + shuffle(arr+(start*size), num, size); + } +} + +void shuffle(void *arr, size_t n, size_t size) +{ + size_t i; + void *swp = calloc(1, size); + for(i = 0; i < n-1; ++i){ + size_t j = i + rand()/(RAND_MAX / (n-i)+1); + memcpy(swp, arr+(j*size), size); + memcpy(arr+(j*size), arr+(i*size), size); + memcpy(arr+(i*size), swp, size); + } +} + +int *random_index_order(int min, int max) +{ + int *inds = calloc(max-min, sizeof(int)); + int i; + for(i = min; i < max; ++i){ + inds[i] = i; + } + for(i = min; i < max-1; ++i){ + int swap = inds[i]; + int index = i + rand()%(max-i); + inds[i] = inds[index]; + inds[index] = swap; + } + return inds; +} + +void del_arg(int argc, char **argv, int index) +{ + int i; + for(i = index; i < argc-1; ++i) argv[i] = argv[i+1]; + argv[i] = 0; +} + +int find_arg(int argc, char* argv[], char *arg) +{ + int i; + for(i = 0; i < argc; ++i) { + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)) { + del_arg(argc, argv, i); + return 1; + } + } + return 0; +} + +int find_int_arg(int argc, char **argv, char *arg, int def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = atoi(argv[i+1]); + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + +float find_float_arg(int argc, char **argv, char *arg, float def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = atof(argv[i+1]); + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + +char *find_char_arg(int argc, char **argv, char *arg, char *def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = argv[i+1]; + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + + +char *basecfg(char *cfgfile) +{ + char *c = cfgfile; + char *next; + while((next = strchr(c, '/'))) + { + c = next+1; + } + c = copy_string(c); + next = strchr(c, '.'); + if (next) *next = 0; + return c; +} + +int alphanum_to_int(char c) +{ + return (c < 58) ? c - 48 : c-87; +} +char int_to_alphanum(int i) +{ + if (i == 36) return '.'; + return (i < 10) ? i + 48 : i + 87; +} + +void pm(int M, int N, float *A) +{ + int i,j; + for(i =0 ; i < M; ++i){ + printf("%d ", i+1); + for(j = 0; j < N; ++j){ + printf("%2.4f, ", A[i*N+j]); + } + printf("\n"); + } + printf("\n"); +} + +void find_replace(char *str, char *orig, char *rep, char *output) +{ + char buffer[4096] = {0}; + char *p; + + sprintf(buffer, "%s", str); + if(!(p = strstr(buffer, orig))){ // Is 'orig' even in 'str'? + sprintf(output, "%s", str); + return; + } + + *p = '\0'; + + sprintf(output, "%s%s%s", buffer, rep, p+strlen(orig)); +} + +float sec(clock_t clocks) +{ + return (float)clocks/CLOCKS_PER_SEC; +} + +void top_k(float *a, int n, int k, int *index) +{ + int i,j; + for(j = 0; j < k; ++j) index[j] = -1; + for(i = 0; i < n; ++i){ + int curr = i; + for(j = 0; j < k; ++j){ + if((index[j] < 0) || a[curr] > a[index[j]]){ + int swap = curr; + curr = index[j]; + index[j] = swap; + } + } + } +} + +void error(const char *s) +{ + perror(s); + assert(0); + exit(-1); +} + +unsigned char *read_file(char *filename) +{ + FILE *fp = fopen(filename, "rb"); + size_t size; + + fseek(fp, 0, SEEK_END); + size = ftell(fp); + fseek(fp, 0, SEEK_SET); + + unsigned char *text = calloc(size+1, sizeof(char)); + fread(text, 1, size, fp); + fclose(fp); + return text; +} + +void malloc_error() +{ + fprintf(stderr, "Malloc error\n"); + exit(-1); +} + +void file_error(char *s) +{ + fprintf(stderr, "Couldn't open file: %s\n", s); + exit(0); +} + +list *split_str(char *s, char delim) +{ + size_t i; + size_t len = strlen(s); + list *l = make_list(); + list_insert(l, s); + for(i = 0; i < len; ++i){ + if(s[i] == delim){ + s[i] = '\0'; + list_insert(l, &(s[i+1])); + } + } + return l; +} + +void strip(char *s) +{ + size_t i; + size_t len = strlen(s); + size_t offset = 0; + for(i = 0; i < len; ++i){ + char c = s[i]; + if(c==' '||c=='\t'||c=='\n') ++offset; + else s[i-offset] = c; + } + s[len-offset] = '\0'; +} + +void strip_char(char *s, char bad) +{ + size_t i; + size_t len = strlen(s); + size_t offset = 0; + for(i = 0; i < len; ++i){ + char c = s[i]; + if(c==bad) ++offset; + else s[i-offset] = c; + } + s[len-offset] = '\0'; +} + +void free_ptrs(void **ptrs, int n) +{ + int i; + for(i = 0; i < n; ++i) free(ptrs[i]); + free(ptrs); +} + +char *fgetl(FILE *fp) +{ + if(feof(fp)) return 0; + size_t size = 512; + char *line = malloc(size*sizeof(char)); + if(!fgets(line, size, fp)){ + free(line); + return 0; + } + + size_t curr = strlen(line); + + while((line[curr-1] != '\n') && !feof(fp)){ + if(curr == size-1){ + size *= 2; + line = realloc(line, size*sizeof(char)); + if(!line) { + printf("%ld\n", size); + malloc_error(); + } + } + size_t readsize = size-curr; + if(readsize > INT_MAX) readsize = INT_MAX-1; + fgets(&line[curr], readsize, fp); + curr = strlen(line); + } + if(line[curr-1] == '\n') line[curr-1] = '\0'; + + return line; +} + +int read_int(int fd) +{ + int n = 0; + int next = read(fd, &n, sizeof(int)); + if(next <= 0) return -1; + return n; +} + +void write_int(int fd, int n) +{ + int next = write(fd, &n, sizeof(int)); + if(next <= 0) error("read failed"); +} + +int read_all_fail(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + int next = read(fd, buffer + n, bytes-n); + if(next <= 0) return 1; + n += next; + } + return 0; +} + +int write_all_fail(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + size_t next = write(fd, buffer + n, bytes-n); + if(next <= 0) return 1; + n += next; + } + return 0; +} + +void read_all(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + int next = read(fd, buffer + n, bytes-n); + if(next <= 0) error("read failed"); + n += next; + } +} + +void write_all(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + size_t next = write(fd, buffer + n, bytes-n); + if(next <= 0) error("write failed"); + n += next; + } +} + + +char *copy_string(char *s) +{ + char *copy = malloc(strlen(s)+1); + strncpy(copy, s, strlen(s)+1); + return copy; +} + +list *parse_csv_line(char *line) +{ + list *l = make_list(); + char *c, *p; + int in = 0; + for(c = line, p = line; *c != '\0'; ++c){ + if(*c == '"') in = !in; + else if(*c == ',' && !in){ + *c = '\0'; + list_insert(l, copy_string(p)); + p = c+1; + } + } + list_insert(l, copy_string(p)); + return l; +} + +int count_fields(char *line) +{ + int count = 0; + int done = 0; + char *c; + for(c = line; !done; ++c){ + done = (*c == '\0'); + if(*c == ',' || done) ++count; + } + return count; +} + +float *parse_fields(char *line, int n) +{ + float *field = calloc(n, sizeof(float)); + char *c, *p, *end; + int count = 0; + int done = 0; + for(c = line, p = line; !done; ++c){ + done = (*c == '\0'); + if(*c == ',' || done){ + *c = '\0'; + field[count] = strtod(p, &end); + if(p == c) field[count] = nan(""); + if(end != c && (end != c-1 || *end != '\r')) field[count] = nan(""); //DOS file formats! + p = c+1; + ++count; + } + } + return field; +} + +float sum_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i) sum += a[i]; + return sum; +} + +float mean_array(float *a, int n) +{ + return sum_array(a,n)/n; +} + +void mean_arrays(float **a, int n, int els, float *avg) +{ + int i; + int j; + memset(avg, 0, els*sizeof(float)); + for(j = 0; j < n; ++j){ + for(i = 0; i < els; ++i){ + avg[i] += a[j][i]; + } + } + for(i = 0; i < els; ++i){ + avg[i] /= n; + } +} + +void print_statistics(float *a, int n) +{ + float m = mean_array(a, n); + float v = variance_array(a, n); + printf("MSE: %.6f, Mean: %.6f, Variance: %.6f\n", mse_array(a, n), m, v); +} + +float variance_array(float *a, int n) +{ + int i; + float sum = 0; + float mean = mean_array(a, n); + for(i = 0; i < n; ++i) sum += (a[i] - mean)*(a[i]-mean); + float variance = sum/n; + return variance; +} + +int constrain_int(int a, int min, int max) +{ + if (a < min) return min; + if (a > max) return max; + return a; +} + +float constrain(float min, float max, float a) +{ + if (a < min) return min; + if (a > max) return max; + return a; +} + +float dist_array(float *a, float *b, int n, int sub) +{ + int i; + float sum = 0; + for(i = 0; i < n; i += sub) sum += pow(a[i]-b[i], 2); + return sqrt(sum); +} + +float mse_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i) sum += a[i]*a[i]; + return sqrt(sum/n); +} + +void normalize_array(float *a, int n) +{ + int i; + float mu = mean_array(a,n); + float sigma = sqrt(variance_array(a,n)); + for(i = 0; i < n; ++i){ + a[i] = (a[i] - mu)/sigma; + } + mu = mean_array(a,n); + sigma = sqrt(variance_array(a,n)); +} + +void translate_array(float *a, int n, float s) +{ + int i; + for(i = 0; i < n; ++i){ + a[i] += s; + } +} + +float mag_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + sum += a[i]*a[i]; + } + return sqrt(sum); +} + +void scale_array(float *a, int n, float s) +{ + int i; + for(i = 0; i < n; ++i){ + a[i] *= s; + } +} + +int sample_array(float *a, int n) +{ + float sum = sum_array(a, n); + scale_array(a, n, 1./sum); + float r = rand_uniform(0, 1); + int i; + for(i = 0; i < n; ++i){ + r = r - a[i]; + if (r <= 0) return i; + } + return n-1; +} + +int max_int_index(int *a, int n) +{ + if(n <= 0) return -1; + int i, max_i = 0; + int max = a[0]; + for(i = 1; i < n; ++i){ + if(a[i] > max){ + max = a[i]; + max_i = i; + } + } + return max_i; +} + +int max_index(float *a, int n) +{ + if(n <= 0) return -1; + int i, max_i = 0; + float max = a[0]; + for(i = 1; i < n; ++i){ + if(a[i] > max){ + max = a[i]; + max_i = i; + } + } + return max_i; +} + +int int_index(int *a, int val, int n) +{ + int i; + for(i = 0; i < n; ++i){ + if(a[i] == val) return i; + } + return -1; +} + +int rand_int(int min, int max) +{ + if (max < min){ + int s = min; + min = max; + max = s; + } + int r = (rand()%(max - min + 1)) + min; + return r; +} + +// From http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform +float rand_normal() +{ + static int haveSpare = 0; + static double rand1, rand2; + + if(haveSpare) + { + haveSpare = 0; + return sqrt(rand1) * sin(rand2); + } + + haveSpare = 1; + + rand1 = rand() / ((double) RAND_MAX); + if(rand1 < 1e-100) rand1 = 1e-100; + rand1 = -2 * log(rand1); + rand2 = (rand() / ((double) RAND_MAX)) * TWO_PI; + + return sqrt(rand1) * cos(rand2); +} + +/* + float rand_normal() + { + int n = 12; + int i; + float sum= 0; + for(i = 0; i < n; ++i) sum += (float)rand()/RAND_MAX; + return sum-n/2.; + } + */ + +size_t rand_size_t() +{ + return ((size_t)(rand()&0xff) << 56) | + ((size_t)(rand()&0xff) << 48) | + ((size_t)(rand()&0xff) << 40) | + ((size_t)(rand()&0xff) << 32) | + ((size_t)(rand()&0xff) << 24) | + ((size_t)(rand()&0xff) << 16) | + ((size_t)(rand()&0xff) << 8) | + ((size_t)(rand()&0xff) << 0); +} + +float rand_uniform(float min, float max) +{ + if(max < min){ + float swap = min; + min = max; + max = swap; + } + return ((float)rand()/RAND_MAX * (max - min)) + min; +} + +float rand_scale(float s) +{ + float scale = rand_uniform(1, s); + if(rand()%2) return scale; + return 1./scale; +} + +float **one_hot_encode(float *a, int n, int k) +{ + int i; + float **t = calloc(n, sizeof(float*)); + for(i = 0; i < n; ++i){ + t[i] = calloc(k, sizeof(float)); + int index = (int)a[i]; + t[i][index] = 1; + } + return t; +} + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/utils.h b/workloads/realworld/uvm_prefetch/darknet/src/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..ef24da79888612f5b48fbb4dc233c483590e0c34 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/utils.h @@ -0,0 +1,53 @@ +#ifndef UTILS_H +#define UTILS_H +#include +#include +#include "darknet.h" +#include "list.h" + +#define TIME(a) \ + do { \ + double start = what_time_is_it_now(); \ + a; \ + printf("%s took: %f seconds\n", #a, what_time_is_it_now() - start); \ + } while (0) + +#define TWO_PI 6.2831853071795864769252866f + +double what_time_is_it_now(); +void shuffle(void *arr, size_t n, size_t size); +void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections); +void free_ptrs(void **ptrs, int n); +int alphanum_to_int(char c); +char int_to_alphanum(int i); +int read_int(int fd); +void write_int(int fd, int n); +void read_all(int fd, char *buffer, size_t bytes); +void write_all(int fd, char *buffer, size_t bytes); +int read_all_fail(int fd, char *buffer, size_t bytes); +int write_all_fail(int fd, char *buffer, size_t bytes); +void find_replace(char *str, char *orig, char *rep, char *output); +void malloc_error(); +void file_error(char *s); +void strip(char *s); +void strip_char(char *s, char bad); +list *split_str(char *s, char delim); +char *fgetl(FILE *fp); +list *parse_csv_line(char *line); +char *copy_string(char *s); +int count_fields(char *line); +float *parse_fields(char *line, int n); +void translate_array(float *a, int n, float s); +float constrain(float min, float max, float a); +int constrain_int(int a, int min, int max); +float rand_scale(float s); +int rand_int(int min, int max); +void mean_arrays(float **a, int n, int els, float *avg); +float dist_array(float *a, float *b, int n, int sub); +float **one_hot_encode(float *a, int n, int k); +float sec(clock_t clocks); +void print_statistics(float *a, int n); +int int_index(int *a, int val, int n); + +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/yolo_layer.c b/workloads/realworld/uvm_prefetch/darknet/src/yolo_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..049a4d6a92cf7fea667b8de2340822834408bb05 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/yolo_layer.c @@ -0,0 +1,374 @@ +#include "yolo_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes) +{ + int i; + layer l = {0}; + l.type = YOLO; + + l.n = n; + l.total = total; + l.batch = batch; + l.h = h; + l.w = w; + l.c = n*(classes + 4 + 1); + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.cost = calloc(1, sizeof(float)); + l.biases = calloc(total*2, sizeof(float)); + if(mask) l.mask = mask; + else{ + l.mask = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + l.mask[i] = i; + } + } + l.bias_updates = calloc(n*2, sizeof(float)); + l.outputs = h*w*n*(classes + 4 + 1); + l.inputs = l.outputs; + l.truths = 90*(4 + 1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + for(i = 0; i < total*2; ++i){ + l.biases[i] = .5; + } + + l.forward = forward_yolo_layer; + l.backward = backward_yolo_layer; +#ifdef GPU + l.forward_gpu = forward_yolo_layer_gpu; + l.backward_gpu = backward_yolo_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "yolo\n"); + srand(0); + + return l; +} + +void resize_yolo_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->n*(l->classes + 4 + 1); + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride) +{ + box b; + b.x = (i + x[index + 0*stride]) / lw; + b.y = (j + x[index + 1*stride]) / lh; + b.w = exp(x[index + 2*stride]) * biases[2*n] / w; + b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h; + return b; +} + +float delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride) +{ + box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride); + float iou = box_iou(pred, truth); + + float tx = (truth.x*lw - i); + float ty = (truth.y*lh - j); + float tw = log(truth.w*w / biases[2*n]); + float th = log(truth.h*h / biases[2*n + 1]); + + delta[index + 0*stride] = scale * (tx - x[index + 0*stride]); + delta[index + 1*stride] = scale * (ty - x[index + 1*stride]); + delta[index + 2*stride] = scale * (tw - x[index + 2*stride]); + delta[index + 3*stride] = scale * (th - x[index + 3*stride]); + return iou; +} + + +void delta_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat) +{ + int n; + if (delta[index]){ + delta[index + stride*class] = 1 - output[index + stride*class]; + if(avg_cat) *avg_cat += output[index + stride*class]; + return; + } + for(n = 0; n < classes; ++n){ + delta[index + stride*n] = ((n == class)?1 : 0) - output[index + stride*n]; + if(n == class && avg_cat) *avg_cat += output[index + stride*n]; + } +} + +static int entry_index(layer l, int batch, int location, int entry) +{ + int n = location / (l.w*l.h); + int loc = location % (l.w*l.h); + return batch*l.outputs + n*l.w*l.h*(4+l.classes+1) + entry*l.w*l.h + loc; +} + +void forward_yolo_layer(const layer l, network net) +{ + int i,j,b,t,n; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array(l.output + index, (1+l.classes)*l.w*l.h, LOGISTIC); + } + } +#endif + + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + if(!net.train) return; + float avg_iou = 0; + float recall = 0; + float recall75 = 0; + float avg_cat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + int class_count = 0; + *(l.cost) = 0; + for (b = 0; b < l.batch; ++b) { + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.w*l.h); + float best_iou = 0; + int best_t = 0; + for(t = 0; t < l.max_boxes; ++t){ + box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1); + if(!truth.x) break; + float iou = box_iou(pred, truth); + if (iou > best_iou) { + best_iou = iou; + best_t = t; + } + } + int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4); + avg_anyobj += l.output[obj_index]; + l.delta[obj_index] = 0 - l.output[obj_index]; + if (best_iou > l.ignore_thresh) { + l.delta[obj_index] = 0; + } + if (best_iou > l.truth_thresh) { + l.delta[obj_index] = 1 - l.output[obj_index]; + + int class = net.truth[best_t*(4 + 1) + b*l.truths + 4]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); + delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, 0); + box truth = float_to_box(net.truth + best_t*(4 + 1) + b*l.truths, 1); + delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + } + } + } + } + for(t = 0; t < l.max_boxes; ++t){ + box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1); + + if(!truth.x) break; + float best_iou = 0; + int best_n = 0; + i = (truth.x * l.w); + j = (truth.y * l.h); + box truth_shift = truth; + truth_shift.x = truth_shift.y = 0; + for(n = 0; n < l.total; ++n){ + box pred = {0}; + pred.w = l.biases[2*n]/net.w; + pred.h = l.biases[2*n+1]/net.h; + float iou = box_iou(pred, truth_shift); + if (iou > best_iou){ + best_iou = iou; + best_n = n; + } + } + + int mask_n = int_index(l.mask, best_n, l.n); + if(mask_n >= 0){ + int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); + float iou = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + + int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4); + avg_obj += l.output[obj_index]; + l.delta[obj_index] = 1 - l.output[obj_index]; + + int class = net.truth[t*(4 + 1) + b*l.truths + 4]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4 + 1); + delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, &avg_cat); + + ++count; + ++class_count; + if(iou > .5) recall += 1; + if(iou > .75) recall75 += 1; + avg_iou += iou; + } + } + } + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", net.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count); +} + +void backward_yolo_layer(const layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +{ + int i; + int new_w=0; + int new_h=0; + if (((float)netw/w) < ((float)neth/h)) { + new_w = netw; + new_h = (h * netw)/w; + } else { + new_h = neth; + new_w = (w * neth)/h; + } + for (i = 0; i < n; ++i){ + box b = dets[i].bbox; + b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); + b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); + b.w *= (float)netw/new_w; + b.h *= (float)neth/new_h; + if(!relative){ + b.x *= w; + b.w *= w; + b.y *= h; + b.h *= h; + } + dets[i].bbox = b; + } +} + +int yolo_num_detections(layer l, float thresh) +{ + int i, n; + int count = 0; + for (i = 0; i < l.w*l.h; ++i){ + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); + if(l.output[obj_index] > thresh){ + ++count; + } + } + } + return count; +} + +void avg_flipped_yolo(layer l) +{ + int i,j,n,z; + float *flip = l.output + l.outputs; + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w/2; ++i) { + for (n = 0; n < l.n; ++n) { + for(z = 0; z < l.classes + 4 + 1; ++z){ + int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; + int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); + float swap = flip[i1]; + flip[i1] = flip[i2]; + flip[i2] = swap; + if(z == 0){ + flip[i1] = -flip[i1]; + flip[i2] = -flip[i2]; + } + } + } + } + } + for(i = 0; i < l.outputs; ++i){ + l.output[i] = (l.output[i] + flip[i])/2.; + } +} + +int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets) +{ + int i,j,n; + float *predictions = l.output; + if (l.batch == 2) avg_flipped_yolo(l); + int count = 0; + for (i = 0; i < l.w*l.h; ++i){ + int row = i / l.w; + int col = i % l.w; + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); + float objectness = predictions[obj_index]; + if(objectness <= thresh) continue; + int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); + dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); + dets[count].objectness = objectness; + dets[count].classes = l.classes; + for(j = 0; j < l.classes; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j); + float prob = objectness*predictions[class_index]; + dets[count].prob[j] = (prob > thresh) ? prob : 0; + } + ++count; + } + } + correct_yolo_boxes(dets, count, w, h, netw, neth, relative); + return count; +} + +#ifdef GPU + +void forward_yolo_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array_gpu(l.output_gpu + index, (1+l.classes)*l.w*l.h, LOGISTIC); + } + } + if(!net.train || l.onlyforward){ + cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); + return; + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_yolo_layer(l, net); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_yolo_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/uvm_prefetch/darknet/src/yolo_layer.h b/workloads/realworld/uvm_prefetch/darknet/src/yolo_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..d2a0243268146e00ebff2b4b11bce23f830689d1 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/src/yolo_layer.h @@ -0,0 +1,19 @@ +#ifndef YOLO_LAYER_H +#define YOLO_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes); +void forward_yolo_layer(const layer l, network net); +void backward_yolo_layer(const layer l, network net); +void resize_yolo_layer(layer *l, int w, int h); +int yolo_num_detections(layer l, float thresh); + +#ifdef GPU +void forward_yolo_layer_gpu(const layer l, network net); +void backward_yolo_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch/darknet/yolov3-tiny/predictions.jpg b/workloads/realworld/uvm_prefetch/darknet/yolov3-tiny/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e76dabc8b31ad049905fe65ca8aeee298ae22f2e Binary files /dev/null and b/workloads/realworld/uvm_prefetch/darknet/yolov3-tiny/predictions.jpg differ diff --git a/workloads/realworld/uvm_prefetch/darknet/yolov3-tiny/run_super.sh b/workloads/realworld/uvm_prefetch/darknet/yolov3-tiny/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..d890a7c581dc59167fab1b93778d143fc7e679a9 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/yolov3-tiny/run_super.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm_prefetch/darknet/yolov3-tiny/run_yolov3-tiny.sh b/workloads/realworld/uvm_prefetch/darknet/yolov3-tiny/run_yolov3-tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..d890a7c581dc59167fab1b93778d143fc7e679a9 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/yolov3-tiny/run_yolov3-tiny.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm_prefetch/darknet/yolov3-tiny_b/run_super.sh b/workloads/realworld/uvm_prefetch/darknet/yolov3-tiny_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..31669e62fb94142e7dc24b3f905c8f1d25950367 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/yolov3-tiny_b/run_super.sh @@ -0,0 +1,2 @@ +#../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg +../darknet detector infer ../cfg/coco.data ../cfg/yolov3-tiny_b.cfg diff --git a/workloads/realworld/uvm_prefetch/darknet/yolov3-tiny_t/run_super.sh b/workloads/realworld/uvm_prefetch/darknet/yolov3-tiny_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..6cc56bc601476fa212f615b5aec964f12e044473 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/yolov3-tiny_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg +../darknet detector train ../cfg/coco.data ../cfg/yolov3-tiny_t.cfg diff --git a/workloads/realworld/uvm_prefetch/darknet/yolov3/predictions.jpg b/workloads/realworld/uvm_prefetch/darknet/yolov3/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ab9d6f14fb2743a6fc5ac59a712348a2383edf7f Binary files /dev/null and b/workloads/realworld/uvm_prefetch/darknet/yolov3/predictions.jpg differ diff --git a/workloads/realworld/uvm_prefetch/darknet/yolov3/run_super.sh b/workloads/realworld/uvm_prefetch/darknet/yolov3/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..440a4b456a80a434243dffa9614d5a501790990f --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/yolov3/run_super.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm_prefetch/darknet/yolov3/run_yolov3.sh b/workloads/realworld/uvm_prefetch/darknet/yolov3/run_yolov3.sh new file mode 100755 index 0000000000000000000000000000000000000000..440a4b456a80a434243dffa9614d5a501790990f --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/yolov3/run_yolov3.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm_prefetch/darknet/yolov3_b/run_super.sh b/workloads/realworld/uvm_prefetch/darknet/yolov3_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..913790c1ee53a0d442a89306fbd8bda93faa2581 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/yolov3_b/run_super.sh @@ -0,0 +1,2 @@ +#../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg +../darknet detector infer ../cfg/coco.data ../cfg/yolov3_b.cfg diff --git a/workloads/realworld/uvm_prefetch/darknet/yolov3_t/run_super.sh b/workloads/realworld/uvm_prefetch/darknet/yolov3_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ee7df07c1a4172f25c1129d8027095d2e3861e28 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/darknet/yolov3_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg +../darknet detector train ../cfg/coco.data ../cfg/yolov3_t.cfg diff --git a/workloads/realworld/uvm_prefetch/hotspot/Makefile b/workloads/realworld/uvm_prefetch/hotspot/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..ca348f25bcda1ae4de926a3b112cd792a7848251 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/hotspot/Makefile @@ -0,0 +1,25 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include + +SRC = hotspot.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = hotspot + +release: $(SRC) + $(CC) $(KERNEL_DIM) $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +enum: $(SRC) + $(CC) $(KERNEL_DIM) -deviceemu $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +debug: $(SRC) + $(CC) $(KERNEL_DIM) -g $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +debugenum: $(SRC) + $(CC) $(KERNEL_DIM) -g -deviceemu $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt + diff --git a/workloads/realworld/uvm_prefetch/hotspot/Makefile_nvidia b/workloads/realworld/uvm_prefetch/hotspot/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..1f7ae25a90c968563e96208c693b927b6765490a --- /dev/null +++ b/workloads/realworld/uvm_prefetch/hotspot/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := hotspot +# CUDA source files (compiled with cudacc) +CUFILES := hotspot.cu +# CUDA dependency files +# CU_DEPS := needle_kernel.cu +# C/C++ source files (compiled with gcc / c++) +# CCFILES := + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/uvm_prefetch/hotspot/README b/workloads/realworld/uvm_prefetch/hotspot/README new file mode 100644 index 0000000000000000000000000000000000000000..f24239abebe938fe3e8d3c1a7a97f915bd09a90b --- /dev/null +++ b/workloads/realworld/uvm_prefetch/hotspot/README @@ -0,0 +1,8 @@ +******Adjustable work group size***** +The kernel has square shape +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe one dimension +The actually dimension = RD_WG_SIZE_0 * RD_WG_SIZE_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/hotspot/hotspot.cu b/workloads/realworld/uvm_prefetch/hotspot/hotspot.cu new file mode 100644 index 0000000000000000000000000000000000000000..c174c0ee25ebfdcec04657ce595ed4cf44287e59 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/hotspot/hotspot.cu @@ -0,0 +1,400 @@ +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#ifdef RD_WG_SIZE_0_0 +#define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) +#define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) +#define BLOCK_SIZE RD_WG_SIZE +#else +#define BLOCK_SIZE 16 +#endif + +#define STR_SIZE 256 + +/* maximum power density possible (say 300W for a 10mm x 10mm chip) */ +#define MAX_PD (3.0e6) +/* required precision in degrees */ +#define PRECISION 0.001 +#define SPEC_HEAT_SI 1.75e6 +#define K_SI 100 +/* capacitance fitting factor */ +#define FACTOR_CHIP 0.5 + +/* chip parameters */ +float t_chip = 0.0005; +float chip_height = 0.016; +float chip_width = 0.016; +/* ambient temperature, assuming no package at all */ +float amb_temp = 80.0; + +void run(int argc, char **argv); + +/* define timer macros */ +#define pin_stats_reset() startCycle() +#define pin_stats_pause(cycles) stopCycle(cycles) +#define pin_stats_dump(cycles) printf("timer: %Lu\n", cycles) + +void fatal(char *s) +{ + fprintf(stderr, "error: %s\n", s); +} + +void writeoutput(float *vect, int grid_rows, int grid_cols, char *file) +{ + + int i, j, index = 0; + FILE *fp; + char str[STR_SIZE]; + + if ((fp = fopen(file, "w")) == 0) + printf("The file was not opened\n"); + + for (i = 0; i < grid_rows; i++) + for (j = 0; j < grid_cols; j++) + { + + sprintf(str, "%d\t%g\n", index, vect[i * grid_cols + j]); + fputs(str, fp); + index++; + } + + fclose(fp); +} + +void readinput(float *vect, int grid_rows, int grid_cols, char *file) +{ + + int i, j; + FILE *fp; + char str[STR_SIZE]; + float val; + + if ((fp = fopen(file, "r")) == 0) + printf("The file was not opened\n"); + + for (i = 0; i <= grid_rows - 1; i++) + for (j = 0; j <= grid_cols - 1; j++) + { + fgets(str, STR_SIZE, fp); + if (feof(fp)) + fatal("not enough lines in file"); + // if ((sscanf(str, "%d%f", &index, &val) != 2) || (index != ((i-1)*(grid_cols-2)+j-1))) + if ((sscanf(str, "%f", &val) != 1)) + fatal("invalid file format"); + vect[i * grid_cols + j] = val; + } + + fclose(fp); +} + +#define IN_RANGE(x, min, max) ((x) >= (min) && (x) <= (max)) +#define CLAMP_RANGE(x, min, max) x = (x < (min)) ? min : ((x > (max)) ? max : x) +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +__global__ void calculate_temp(int iteration, // number of iteration + float *power, // power input + float *temp_src, // temperature input/output + float *temp_dst, // temperature input/output + int grid_cols, // Col of grid + int grid_rows, // Row of grid + int border_cols, // border offset + int border_rows, // border offset + float Cap, // Capacitance + float Rx, + float Ry, + float Rz, + float step, + int batch_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + __shared__ float temp_on_cuda[BLOCK_SIZE][BLOCK_SIZE]; + __shared__ float power_on_cuda[BLOCK_SIZE][BLOCK_SIZE]; + __shared__ float temp_t[BLOCK_SIZE][BLOCK_SIZE]; // saving temparary temperature result + + float amb_temp = 80.0; + float step_div_Cap; + float Rx_1, Ry_1, Rz_1; + + // int bx = blockIdx.x; + // int by = blockIdx.y; + + int tx = threadIdx.x; + int ty = threadIdx.y; + + step_div_Cap = step / Cap; + + Rx_1 = 1 / Rx; + Ry_1 = 1 / Ry; + Rz_1 = 1 / Rz; + + // each block finally computes result for a small block + // after N iterations. + // it is the non-overlapping small blocks that cover + // all the input data + + // calculate the small block size + int small_block_rows = BLOCK_SIZE - iteration * 2; // EXPAND_RATE + int small_block_cols = BLOCK_SIZE - iteration * 2; // EXPAND_RATE + + // if (bx == 0 && by == 0 && tx == 0 && ty == 0) + // printf("iteration is %d, small_block_rows is %d\n", iteration, small_block_rows); + + int tile_dim_x = gridDim.x * batch_size; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = batch_size * batch_size; + int tiles_this_block_x = batch_size; + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + // block id + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + // calculate the boundary for the block according to + // the boundary of its small block + int blkY = small_block_rows * by - border_rows; + int blkX = small_block_cols * bx - border_cols; + int blkYmax = blkY + BLOCK_SIZE - 1; + int blkXmax = blkX + BLOCK_SIZE - 1; + + // calculate the global thread coordination + int yidx = blkY + ty; + int xidx = blkX + tx; + + // load data if it is within the valid input range + int loadYidx = yidx, loadXidx = xidx; + int index = grid_cols * loadYidx + loadXidx; + + if (IN_RANGE(loadYidx, 0, grid_rows - 1) && IN_RANGE(loadXidx, 0, grid_cols - 1)) + { + temp_on_cuda[ty][tx] = temp_src[index]; // Load the temperature data from global memory to shared memory + power_on_cuda[ty][tx] = power[index]; // Load the power data from global memory to shared memory + } + block.sync(); + + // effective range within this block that falls within + // the valid range of the input data + // used to rule out computation outside the boundary. + int validYmin = (blkY < 0) ? -blkY : 0; + int validYmax = (blkYmax > grid_rows - 1) ? BLOCK_SIZE - 1 - (blkYmax - grid_rows + 1) : BLOCK_SIZE - 1; + int validXmin = (blkX < 0) ? -blkX : 0; + int validXmax = (blkXmax > grid_cols - 1) ? BLOCK_SIZE - 1 - (blkXmax - grid_cols + 1) : BLOCK_SIZE - 1; + + int N = ty - 1; + int S = ty + 1; + int W = tx - 1; + int E = tx + 1; + + N = (N < validYmin) ? validYmin : N; + S = (S > validYmax) ? validYmax : S; + W = (W < validXmin) ? validXmin : W; + E = (E > validXmax) ? validXmax : E; + + bool computed; + for (int i = 0; i < iteration; i++) + { + computed = false; + if (IN_RANGE(tx, i + 1, BLOCK_SIZE - i - 2) && + IN_RANGE(ty, i + 1, BLOCK_SIZE - i - 2) && + IN_RANGE(tx, validXmin, validXmax) && + IN_RANGE(ty, validYmin, validYmax)) + { + computed = true; + temp_t[ty][tx] = temp_on_cuda[ty][tx] + step_div_Cap * (power_on_cuda[ty][tx] + + (temp_on_cuda[S][tx] + temp_on_cuda[N][tx] - 2.0 * temp_on_cuda[ty][tx]) * Ry_1 + + (temp_on_cuda[ty][E] + temp_on_cuda[ty][W] - 2.0 * temp_on_cuda[ty][tx]) * Rx_1 + + (amb_temp - temp_on_cuda[ty][tx]) * Rz_1); + } + block.sync(); + if (i == iteration - 1) + break; + if (computed) // Assign the computation range + temp_on_cuda[ty][tx] = temp_t[ty][tx]; + block.sync(); + } + + // update the global memory + // after the last iteration, only threads coordinated within the + // small block perform the calculation and switch on ``computed'' + if (computed) + { + temp_dst[index] = temp_t[ty][tx]; + } + } +} + +/* + compute N time steps +*/ + +int compute_tran_temp(float *MatrixPower, float *MatrixTemp[2], int col, int row, + int total_iterations, int num_iterations, int blockCols, int blockRows, int borderCols, int borderRows, int batch_size) +{ + dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE); + dim3 dimGrid(blockCols, blockRows); + + float grid_height = chip_height / row; + float grid_width = chip_width / col; + + float Cap = FACTOR_CHIP * SPEC_HEAT_SI * t_chip * grid_width * grid_height; + float Rx = grid_width / (2.0 * K_SI * t_chip * grid_height); + float Ry = grid_height / (2.0 * K_SI * t_chip * grid_width); + float Rz = t_chip / (K_SI * grid_height * grid_width); + + float max_slope = MAX_PD / (FACTOR_CHIP * t_chip * SPEC_HEAT_SI); + float step = PRECISION / max_slope; + float t; + + int src = 1, dst = 0; + + for (t = 0; t < total_iterations; t += num_iterations) + { + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(MatrixPower, col * row * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(MatrixTemp[src], col * row * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(MatrixTemp[dst], col * row * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + int temp = src; + src = dst; + dst = temp; + calculate_temp<<>>(MIN(num_iterations, total_iterations - t), MatrixPower, MatrixTemp[src], MatrixTemp[dst], + col, row, borderCols, borderRows, Cap, Rx, Ry, Rz, step, batch_size); + } + return dst; +} + +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - number of rows/cols in the grid (positive integer)\n"); + fprintf(stderr, "\t - pyramid heigh(positive integer)\n"); + fprintf(stderr, "\t - number of iterations\n"); + fprintf(stderr, "\t - name of the file containing the initial temperature values of each cell\n"); + fprintf(stderr, "\t - name of the file containing the dissipated power values of each cell\n"); + fprintf(stderr, "\t - name of the output file\n"); + fprintf(stderr, "\t - batch_size * batch_size per block\n"); + exit(1); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE); + + run(argc, argv); + + return EXIT_SUCCESS; +} + +void run(int argc, char **argv) +{ + int size; + int grid_rows, grid_cols; + float *FilesavingTemp, *FilesavingPower, *MatrixOut; + char *tfile, *pfile, *ofile; + + int total_iterations = 60; + int pyramid_height = 1; // number of iterations + + if (argc != 8) + usage(argc, argv); + if ((grid_rows = atoi(argv[1])) <= 0 || + (grid_cols = atoi(argv[1])) <= 0 || + (pyramid_height = atoi(argv[2])) <= 0 || + (total_iterations = atoi(argv[3])) <= 0) + usage(argc, argv); + + tfile = argv[4]; + pfile = argv[5]; + ofile = argv[6]; + + int batch_size = atoi(argv[7]); + + size = grid_rows * grid_cols; + +/* --------------- pyramid parameters --------------- */ +#define EXPAND_RATE 2 // add one iteration will extend the pyramid base by 2 per each borderline + int borderCols = (pyramid_height)*EXPAND_RATE / 2; + int borderRows = (pyramid_height)*EXPAND_RATE / 2; + int smallBlockCol = BLOCK_SIZE - (pyramid_height)*EXPAND_RATE; + int smallBlockRow = BLOCK_SIZE - (pyramid_height)*EXPAND_RATE; + // int blockCols = grid_cols / smallBlockCol + ((grid_cols % smallBlockCol == 0) ? 0 : 1); + // int blockRows = grid_rows / smallBlockRow + ((grid_rows % smallBlockRow == 0) ? 0 : 1); + + int blockCols = (grid_cols + smallBlockCol * batch_size - 1) / (smallBlockCol * batch_size); + int blockRows = (grid_rows + smallBlockRow * batch_size - 1) / (smallBlockRow * batch_size); + + // printf("borderCols is %d, smallBlockCol is %d, blockCols is %d, grid_cols is %d \n", borderCols, smallBlockCol, blockCols, grid_cols); + + FilesavingTemp = (float *)malloc(size * sizeof(float)); + FilesavingPower = (float *)malloc(size * sizeof(float)); + MatrixOut = (float *)calloc(size, sizeof(float)); + + if (!FilesavingPower || !FilesavingTemp || !MatrixOut) + fatal("unable to allocate memory"); + + printf("pyramidHeight: %d\ngridSize: [%d, %d]\nborder:[%d, %d]\nblockGrid:[%d, %d]\ntargetBlock:[%d, %d]\n", + pyramid_height, grid_cols, grid_rows, borderCols, borderRows, blockCols, blockRows, smallBlockCol, smallBlockRow); + + readinput(FilesavingTemp, grid_rows, grid_cols, tfile); + readinput(FilesavingPower, grid_rows, grid_cols, pfile); + + GPU_argv_init(); + + initTrace(); + startCPU(); + float *MatrixTemp[2], *MatrixPower; + cudaMallocManaged((void **)&MatrixTemp[0], sizeof(float) * size); + cudaMallocManaged((void **)&MatrixTemp[1], sizeof(float) * size); + memcpy(MatrixTemp[0], FilesavingTemp, sizeof(float) * size); + + cudaMallocManaged((void **)&MatrixPower, sizeof(float) * size); + memcpy(MatrixPower, FilesavingPower, sizeof(float) * size); + // printf("Start computing the transient temperature\n"); + int ret = compute_tran_temp(MatrixPower, MatrixTemp, grid_cols, grid_rows, + total_iterations, pyramid_height, blockCols, blockRows, borderCols, borderRows, batch_size); + // printf("Ending simulation\n"); + memcpy(MatrixOut, MatrixTemp[ret], sizeof(float) * size); + + cudaFree(MatrixPower); + cudaFree(MatrixTemp[0]); + cudaFree(MatrixTemp[1]); + + endCPU(); + finiTrace(); + + writeoutput(MatrixOut, grid_rows, grid_cols, ofile); + free(MatrixOut); +} diff --git a/workloads/realworld/uvm_prefetch/hotspot/run.sh b/workloads/realworld/uvm_prefetch/hotspot/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..66b814725286fb6699d2b893740518ad43dc307a --- /dev/null +++ b/workloads/realworld/uvm_prefetch/hotspot/run.sh @@ -0,0 +1 @@ +./hotspot 512 2 2 ../../../../data/hotspot/temp_512 ../../../../data/hotspot/power_512 output.out 4 diff --git a/workloads/realworld/uvm_prefetch/hotspot/run_mega.sh b/workloads/realworld/uvm_prefetch/hotspot/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..68ff2e14db1e57289f0c85c6472049c5b99c62f4 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/hotspot/run_mega.sh @@ -0,0 +1 @@ +./hotspot 16384 2 2 ../../../../data/hotspot/temp_16384 ../../../../data/hotspot/power_16384 output.out 8 diff --git a/workloads/realworld/uvm_prefetch/hotspot/run_super.sh b/workloads/realworld/uvm_prefetch/hotspot/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ad31b9dcd0ce3e1d53df5aaf445082ab86cf2366 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/hotspot/run_super.sh @@ -0,0 +1 @@ +./hotspot 8192 2 2 ../../../../data/hotspot/temp_8192.txt ../../../../data/hotspot/power_8192.txt output.out 8 diff --git a/workloads/realworld/uvm_prefetch/kmeans/Makefile b/workloads/realworld/uvm_prefetch/kmeans/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..e473b45be17e5805399c15b04dda57742451d913 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/kmeans/Makefile @@ -0,0 +1,33 @@ +include ../../../common/make.config + +# C compiler +CC = gcc +CC_FLAGS = -g -fopenmp -O2 + +# CUDA compiler +NVCC = $(CUDA_DIR)/bin/nvcc +NVCC_FLAGS = -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) + +# 'make dbg=1' enables NVCC debugging + + +# 'make emu=1' compiles the CUDA kernels for emulation +ifeq ($(emu),1) + NVCC_FLAGS += -deviceemu +endif + + +kmeans: cluster.o getopt.o kmeans.o kmeans_clustering.o kmeans_cuda.o rmse.o $(CUPTI_ADD_COMMON)/cpu_timestamps.o + $(CC) $(CC_FLAGS) cluster.o getopt.o kmeans.o kmeans_clustering.o kmeans_cuda.o rmse.o $(CUPTI_ADD_COMMON)/cpu_timestamps.o $(CUPTI_ADD_COMMON)/cupti_add.cpp -o kmeans $(NVCC_FLAGS) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +kmeans.o: kmeans.c + $(CC) $(CC_FLAGS) $< -c $(NVCC_FLAGS) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +%.o: %.[ch] + $(CC) $(CC_FLAGS) $< -c -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lstdc++ + +kmeans_cuda.o: kmeans_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp + $(NVCC) -O2 -c kmeans_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(NVCC_FLAGS) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +clean: + rm -f *.o *~ kmeans kmeans_cuda.linkinfo diff --git a/workloads/realworld/uvm_prefetch/kmeans/Makefile_nvidia b/workloads/realworld/uvm_prefetch/kmeans/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..5d612b9dc8b5c19310162e1f721e1d2d4e33fb72 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/kmeans/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := kmeans +# CUDA source files (compiled with cudacc) +CUFILES := kmeans_cuda.cu +# CUDA dependency files +CU_DEPS := kmeans_cuda_kernel.cu +# C/C++ source files (compiled with gcc / c++) +CFILES := cluster.c getopt.c kmeans.c kmeans_clustering.c rmse.c + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/uvm_prefetch/kmeans/README b/workloads/realworld/uvm_prefetch/kmeans/README new file mode 100755 index 0000000000000000000000000000000000000000..bebae52d716986889b30a435214e095346424190 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/kmeans/README @@ -0,0 +1,10 @@ +Usage: ./kmeans [switches] -i filename + + -i filename :file containing data to be clustered + -m max_nclusters :maximum number of clusters allowed [default=5] + -n min_nclusters :minimum number of clusters allowed [default=5] + -t threshold :threshold value [default=0.001] + -l nloops :iteration for each number of clusters [default=1] + -b :input file is in binary format + -r :calculate RMSE [default=off] + -o :output cluster center coordinates [default=off] \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/kmeans/cluster.c b/workloads/realworld/uvm_prefetch/kmeans/cluster.c new file mode 100755 index 0000000000000000000000000000000000000000..c4010b11c0306acae331279b89b1a63f3ad5637d --- /dev/null +++ b/workloads/realworld/uvm_prefetch/kmeans/cluster.c @@ -0,0 +1,165 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: cluster.c **/ +/** Description: Takes as input a file, containing 1 data point per **/ +/** per line, and performs a fuzzy c-means clustering **/ +/** on the data. Fuzzy clustering is performed using **/ +/** min to max clusters and the clustering that gets **/ +/** the best score according to a compactness and **/ +/** separation criterion are returned. **/ +/** Author: Brendan McCane **/ +/** James Cook University of North Queensland. **/ +/** Australia. email: mccane@cs.jcu.edu.au **/ +/** **/ +/** Edited by: Jay Pisharath, Wei-keng Liao **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "kmeans.h" +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) \ + (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer)) + +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +extern double wtime(void); +float min_rmse_ref = FLT_MAX; /* reference min_rmse value */ + +/*---< cluster() >-----------------------------------------------------------*/ +int cluster(int npoints, /* number of data points */ + int nfeatures, /* number of attributes for each point */ + float **features, /* array: [npoints][nfeatures] */ + int min_nclusters, /* range of min to max number of clusters */ + int max_nclusters, + float threshold, /* loop terminating factor */ + int *best_nclusters, /* out: number between min and max with lowest RMSE */ + float ***cluster_centres, /* out: [best_nclusters][nfeatures] */ + float *min_rmse, /* out: minimum RMSE */ + int isRMSE, /* calculate RMSE */ + int nloops /* number of iteration for each number of clusters */ + ) +{ + + + int nclusters; /* number of clusters k */ + int index =0; /* number of iteration to reach the best RMSE */ + int rmse; /* RMSE for each clustering */ + int *membership; /* which cluster a data point belongs to */ + float **tmp_cluster_centres; /* hold coordinates of cluster centers */ + int i; + + /* allocate memory for membership */ + membership = (int*) malloc(npoints * sizeof(int)); + + /* sweep k from min to max_nclusters to find the best number of clusters */ + for(nclusters = min_nclusters; nclusters <= max_nclusters; nclusters++) + { + if (nclusters > npoints) break; /* cannot have more clusters than points */ + + /* allocate device memory, invert data array (@ kmeans_cuda.cu) */ + allocateMemory(npoints, nfeatures, nclusters, features); + + /* iterate nloops times for each number of clusters */ + for(i = 0; i < nloops; i++) + { + /* initialize initial cluster centers, CUDA calls (@ kmeans_cuda.cu) */ + tmp_cluster_centres = kmeans_clustering(features, + nfeatures, + npoints, + nclusters, + threshold, + membership); + + if (*cluster_centres) { + free((*cluster_centres)[0]); + free(*cluster_centres); + } + *cluster_centres = tmp_cluster_centres; + + + /* find the number of clusters with the best RMSE */ + if(isRMSE) + { + rmse = rms_err(features, + nfeatures, + npoints, + tmp_cluster_centres, + nclusters); + + if(rmse < min_rmse_ref){ + min_rmse_ref = rmse; //update reference min RMSE + *min_rmse = min_rmse_ref; //update return min RMSE + *best_nclusters = nclusters; //update optimum number of clusters + index = i; //update number of iteration to reach best RMSE + } + } + } + + deallocateMemory(); /* free device memory (@ kmeans_cuda.cu) */ + } + + free(membership); + + return index; +} + diff --git a/workloads/realworld/uvm_prefetch/kmeans/cp.sh b/workloads/realworld/uvm_prefetch/kmeans/cp.sh new file mode 100755 index 0000000000000000000000000000000000000000..243885047459789eb8eae5d6c5b2b4822eb3aabf --- /dev/null +++ b/workloads/realworld/uvm_prefetch/kmeans/cp.sh @@ -0,0 +1,27 @@ +cp super_0.log super_3.log +cp super_0.log super_4.log +cp super_0.log super_5.log +cp super_0.log super_6.log +cp super_0.log super_7.log +cp super_0.log super_8.log +cp super_0.log super_9.log +cp super_0.log super_10.log +cp super_0.log super_11.log +cp super_0.log super_12.log +cp super_0.log super_13.log +cp super_0.log super_14.log +cp super_0.log super_15.log +cp super_0.log super_17.log +cp super_0.log super_17.log +cp super_0.log super_18.log +cp super_0.log super_19.log +cp super_0.log super_20.log +cp super_0.log super_21.log +cp super_0.log super_22.log +cp super_0.log super_23.log +cp super_0.log super_24.log +cp super_0.log super_25.log +cp super_0.log super_26.log +cp super_0.log super_27.log +cp super_0.log super_28.log +cp super_0.log super_29.log diff --git a/workloads/realworld/uvm_prefetch/kmeans/getopt.c b/workloads/realworld/uvm_prefetch/kmeans/getopt.c new file mode 100755 index 0000000000000000000000000000000000000000..fa2f31378fb2978f65267ba2e810aae3ff1ee016 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/kmeans/getopt.c @@ -0,0 +1,1184 @@ +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to drepper@gnu.org + before changing it! + Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This tells Alpha OSF/1 not to define a getopt prototype in . + Ditto for AIX 3.2 and . */ +#ifndef _NO_PROTO +# define _NO_PROTO +#endif + +#ifdef HAVE_CONFIG_H +# include +#endif + +#if !defined __STDC__ || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +# ifndef const +# define const +# endif +#endif + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +# include +# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +# define ELIDE_CODE +# endif +#endif + +#ifndef ELIDE_CODE + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +/* Don't include stdlib.h for non-GNU C libraries because some of them + contain conflicting prototypes for getopt. */ +# include +# include +#endif /* GNU C library. */ + +#ifdef VMS +# include +# if HAVE_STRING_H - 0 +# include +# endif +#endif + +#ifndef _ +/* This is for other GNU distributions with internationalized messages. */ +# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include +# ifndef _ +# define _(msgid) gettext (msgid) +# endif +# else +# define _(msgid) (msgid) +# endif +# if defined _LIBC && defined USE_IN_LIBIO +# include +# endif +#endif + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* 1003.2 says this must be 1 before any call. */ +int optind = 1; + +/* Formerly, initialization of getopt depended on optind==0, which + causes problems with re-calling getopt as programs generally don't + know that. */ + +int __getopt_initialized; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return -1 with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +/* Value of POSIXLY_CORRECT environment variable. */ +static char *posixly_correct; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +# include +# define my_index strchr +#else + +//# if HAVE_STRING_H || WIN32 /* Pete Wilson mod 7/28/02 */ +# include +//# else +//# include +//# endif + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +#ifndef getenv +extern char *getenv (); +#endif + +static char * +my_index (str, chr) + const char *str; + int chr; +{ + while (*str) + { + if (*str == chr) + return (char *) str; + str++; + } + return 0; +} + +/* If using GCC, we can safely declare strlen this way. + If not using GCC, it is ok not to declare it. */ +#ifdef __GNUC__ +/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. + That was relevant to code that was here before. */ +# if (!defined __STDC__ || !__STDC__) && !defined strlen +/* gcc with -traditional declares the built-in strlen to return int, + and has done so at least since version 2.4.5. -- rms. */ +extern int strlen (const char *); +# endif /* not __STDC__ */ +#endif /* __GNUC__ */ + +#endif /* not __GNU_LIBRARY__ */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +#ifdef _LIBC +/* Stored original parameters. + XXX This is no good solution. We should rather copy the args so + that we can compare them later. But we must not use malloc(3). */ +extern int __libc_argc; +extern char **__libc_argv; + +/* Bash 2.0 gives us an environment variable containing flags + indicating ARGV elements that should not be considered arguments. */ + +# ifdef USE_NONOPTION_FLAGS +/* Defined in getopt_init.c */ +extern char *__getopt_nonoption_flags; + +static int nonoption_flags_max_len; +static int nonoption_flags_len; +# endif + +# ifdef USE_NONOPTION_FLAGS +# define SWAP_FLAGS(ch1, ch2) \ + if (nonoption_flags_len > 0) \ + { \ + char __tmp = __getopt_nonoption_flags[ch1]; \ + __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ + __getopt_nonoption_flags[ch2] = __tmp; \ + } +# else +# define SWAP_FLAGS(ch1, ch2) +# endif +#else /* !_LIBC */ +# define SWAP_FLAGS(ch1, ch2) +#endif /* _LIBC */ + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +#if defined __STDC__ && __STDC__ +static void exchange (char **); +#endif + +static void +exchange (argv) + char **argv; +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + /* First make sure the handling of the `__getopt_nonoption_flags' + string can work normally. Our top argument must be in the range + of the string. */ + if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len) + { + /* We must extend the array. The user plays games with us and + presents new arguments. */ + char *new_str = malloc (top + 1); + if (new_str == NULL) + nonoption_flags_len = nonoption_flags_max_len = 0; + else + { + memset (__mempcpy (new_str, __getopt_nonoption_flags, + nonoption_flags_max_len), + '\0', top + 1 - nonoption_flags_max_len); + nonoption_flags_max_len = top + 1; + __getopt_nonoption_flags = new_str; + } + } +#endif + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + SWAP_FLAGS (bottom + i, middle + i); + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Initialize the internal data when the first call is made. */ + +#if defined __STDC__ && __STDC__ +static const char *_getopt_initialize (int, char *const *, const char *); +#endif +static const char * +_getopt_initialize (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = optind; + + nextchar = NULL; + + posixly_correct = getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + if (posixly_correct == NULL + && argc == __libc_argc && argv == __libc_argv) + { + if (nonoption_flags_max_len == 0) + { + if (__getopt_nonoption_flags == NULL + || __getopt_nonoption_flags[0] == '\0') + nonoption_flags_max_len = -1; + else + { + const char *orig_str = __getopt_nonoption_flags; + int len = nonoption_flags_max_len = strlen (orig_str); + if (nonoption_flags_max_len < argc) + nonoption_flags_max_len = argc; + __getopt_nonoption_flags = + (char *) malloc (nonoption_flags_max_len); + if (__getopt_nonoption_flags == NULL) + nonoption_flags_max_len = -1; + else + memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), + '\0', nonoption_flags_max_len - len); + } + } + nonoption_flags_len = nonoption_flags_max_len; + } + else + nonoption_flags_len = 0; +#endif + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns -1. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (argc, argv, optstring, longopts, longind, long_only) + int argc; + char *const *argv; + const char *optstring; + const struct option *longopts; + int *longind; + int long_only; +{ + int print_errors = opterr; + if (optstring[0] == ':') + print_errors = 0; + + if (argc < 1) + return -1; + + optarg = NULL; + + if (optind == 0 || !__getopt_initialized) + { + if (optind == 0) + optind = 1; /* Don't scan ARGV[0], the program name. */ + optstring = _getopt_initialize (argc, argv, optstring); + __getopt_initialized = 1; + } + + /* Test whether ARGV[optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ +#if defined _LIBC && defined USE_NONOPTION_FLAGS +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \ + || (optind < nonoption_flags_len \ + && __getopt_nonoption_flags[optind] == '1')) +#else +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0') +#endif + + if (nextchar == NULL || *nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT and LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (last_nonopt > optind) + last_nonopt = optind; + if (first_nonopt > optind) + first_nonopt = optind; + + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc && NONOPTION_P) + optind++; + last_nonopt = optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) + { + if (ordering == REQUIRE_ORDER) + return -1; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[optind][1] == '-' + || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = -1; + int option_index; + + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) + == (unsigned int) strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else if (long_only + || pfound->has_arg != p->has_arg + || pfound->flag != p->flag + || pfound->val != p->val) + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + optopt = 0; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind - 1][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#else + fprintf (stderr, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], + pfound->name); +#else + fprintf (stderr, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], pfound->name); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + + nextchar += strlen (nextchar); + + optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' + || my_index (optstring, *nextchar) == NULL) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + nextchar = (char *) ""; + optind++; + optopt = 0; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (posixly_correct) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: illegal option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c); +#endif + } + else + { +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: invalid option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + optopt = c; + return '?'; + } + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + + /* optarg is now the argument, see if it's in the + table of longopts. */ + + for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + return '?'; + } + if (pfound != NULL) + { + option_index = indfound; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = NULL; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +#endif /* Not ELIDE_CODE. */ + + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/kmeans/getopt.h b/workloads/realworld/uvm_prefetch/kmeans/getopt.h new file mode 100755 index 0000000000000000000000000000000000000000..bae04bf7d418206d73892a94ff94923a36549362 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/kmeans/getopt.h @@ -0,0 +1,191 @@ + + +/* getopt.h */ +/* Declarations for getopt. + Copyright (C) 1989-1994, 1996-1999, 2001 Free Software + Foundation, Inc. This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute + it and/or modify it under the terms of the GNU Lesser + General Public License as published by the Free Software + Foundation; either version 2.1 of the License, or + (at your option) any later version. + + The GNU C Library is distributed in the hope that it will + be useful, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A + PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General + Public License along with the GNU C Library; if not, write + to the Free Software Foundation, Inc., 59 Temple Place, + Suite 330, Boston, MA 02111-1307 USA. */ + + + + + +#ifndef _GETOPT_H + +#ifndef __need_getopt +# define _GETOPT_H 1 +#endif + +/* If __GNU_LIBRARY__ is not already defined, either we are being used + standalone, or this is the first header included in the source file. + If we are being used with glibc, we need to include , but + that does not exist if we are standalone. So: if __GNU_LIBRARY__ is + not defined, include , which will pull in for us + if it's from glibc. (Why ctype.h? It's guaranteed to exist and it + doesn't flood the namespace with stuff the way some other headers do.) */ +#if !defined __GNU_LIBRARY__ +# include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +#ifndef __need_getopt +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +# if (defined __STDC__ && __STDC__) || defined __cplusplus + const char *name; +# else + char *name; +# endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +# define no_argument 0 +# define required_argument 1 +# define optional_argument 2 +#endif /* need getopt */ + + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. + + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, `optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in `optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU `getopt'. + + The argument `--' causes premature termination of argument + scanning, explicitly telling `getopt' that there are no more + options. + + If OPTS begins with `--', then non-option arguments are treated as + arguments to the option '\0'. This behavior is specific to the GNU + `getopt'. */ + +#if (defined __STDC__ && __STDC__) || defined __cplusplus +# ifdef __GNU_LIBRARY__ +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int ___argc, char *const *___argv, const char *__shortopts); +# else /* not __GNU_LIBRARY__ */ +extern int getopt (); +# endif /* __GNU_LIBRARY__ */ + +# ifndef __need_getopt +extern int getopt_long (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); +extern int getopt_long_only (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only); +# endif +#else /* not __STDC__ */ +extern int getopt (); +# ifndef __need_getopt +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +# endif +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +/* Make sure we later can get all the definitions and declarations. */ +#undef __need_getopt + +#endif /* getopt.h */ + diff --git a/workloads/realworld/uvm_prefetch/kmeans/kmeans.c b/workloads/realworld/uvm_prefetch/kmeans/kmeans.c new file mode 100755 index 0000000000000000000000000000000000000000..b2668674074c4b29ede04ec09a06d29486a1ec6a --- /dev/null +++ b/workloads/realworld/uvm_prefetch/kmeans/kmeans.c @@ -0,0 +1,309 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: example.c **/ +/** Description: Takes as input a file: **/ +/** ascii file: containing 1 data point per line **/ +/** binary file: first int is the number of objects **/ +/** 2nd int is the no. of features of each **/ +/** object **/ +/** This example performs a fuzzy c-means clustering **/ +/** on the data. Fuzzy clustering is performed using **/ +/** min to max clusters and the clustering that gets **/ +/** the best score according to a compactness and **/ +/** separation criterion are returned. **/ +/** Author: Wei-keng Liao **/ +/** ECE Department Northwestern University **/ +/** email: wkliao@ece.northwestern.edu **/ +/** **/ +/** Edited by: Jay Pisharath **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ +#define _CRT_SECURE_NO_DEPRECATE 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#define _POSIX_C_SOURCE 200809L +#include +#include +#include "kmeans.h" + +extern double wtime(void); + + + +/*---< usage() >------------------------------------------------------------*/ +void usage(char *argv0) { + char *help = + "\nUsage: %s [switches] -i filename\n\n" + " -i filename :file containing data to be clustered\n" + " -m max_nclusters :maximum number of clusters allowed [default=5]\n" + " -n min_nclusters :minimum number of clusters allowed [default=5]\n" + " -t threshold :threshold value [default=0.001]\n" + " -l nloops :iteration for each number of clusters [default=1]\n" + " -b :input file is in binary format\n" + " -r :calculate RMSE [default=off]\n" + " -o :output cluster center coordinates [default=off]\n"; + fprintf(stderr, help, argv0); + exit(-1); +} + +/*---< main() >-------------------------------------------------------------*/ +int setup(int argc, char **argv) { + int opt; + extern char *optarg; + char *filename = 0; + float *buf; + char line[1024]; + int isBinaryFile = 0; + + float threshold = 0.001; /* default value */ + int max_nclusters=5; /* default value */ + int min_nclusters=5; /* default value */ + int best_nclusters = 0; + int nfeatures = 0; + int npoints = 0; + float len; + + float **features; + float **cluster_centres=NULL; + int i, j, index; + int nloops = 1; /* default value */ + + int isRMSE = 0; + float rmse; + + int isOutput = 0; + //float cluster_timing, io_timing; + + /* obtain command line arguments and change appropriate options */ + while ( (opt=getopt(argc,argv,"i:t:m:n:l:bro"))!= EOF) { + switch (opt) { + case 'i': filename=optarg; + break; + case 'b': isBinaryFile = 1; + break; + case 't': threshold=atof(optarg); + break; + case 'm': max_nclusters = atoi(optarg); + break; + case 'n': min_nclusters = atoi(optarg); + break; + case 'r': isRMSE = 1; + break; + case 'o': isOutput = 1; + break; + case 'l': nloops = atoi(optarg); + break; + case '?': usage(argv[0]); + break; + default: usage(argv[0]); + break; + } + } + + if (filename == 0) usage(argv[0]); + + /* ============== I/O begin ==============*/ + /* get nfeatures and npoints */ + //io_timing = omp_get_wtime(); + if (isBinaryFile) { //Binary file input + int infile; + if ((infile = open(filename, O_RDONLY, "0600")) == -1) { + fprintf(stderr, "Error: no such file (%s)\n", filename); + exit(1); + } + read(infile, &npoints, sizeof(int)); + read(infile, &nfeatures, sizeof(int)); + + /* allocate space for features[][] and read attributes of all objects */ + buf = (float*) malloc(npoints*nfeatures*sizeof(float)); + features = (float**)malloc(npoints* sizeof(float*)); + features[0] = (float*) malloc(npoints*nfeatures*sizeof(float)); + for (i=1; i npoints(%d) -- cannot proceed\n", min_nclusters, npoints); + exit(0); + } + + srand(7); /* seed for future random number generator */ + memcpy(features[0], buf, npoints*nfeatures*sizeof(float)); /* now features holds 2-dimensional array of features */ + free(buf); + + /* ======================= core of the clustering ===================*/ + + //cluster_timing = omp_get_wtime(); /* Total clustering time */ + cluster_centres = NULL; + index = cluster(npoints, /* number of data points */ + nfeatures, /* number of features for each point */ + features, /* array: [npoints][nfeatures] */ + min_nclusters, /* range of min to max number of clusters */ + max_nclusters, + threshold, /* loop termination factor */ + &best_nclusters, /* return: number between min and max */ + &cluster_centres, /* return: [best_nclusters][nfeatures] */ + &rmse, /* Root Mean Squared Error */ + isRMSE, /* calculate RMSE */ + nloops); /* number of iteration for each number of clusters */ + + //cluster_timing = omp_get_wtime() - cluster_timing; + + + /* =============== Command Line Output =============== */ + + /* cluster center coordinates + :displayed only for when k=1*/ + if((min_nclusters == max_nclusters) && (isOutput == 1)) { + printf("\n================= Centroid Coordinates =================\n"); + for(i = 0; i < max_nclusters; i++){ + printf("%d:", i); + for(j = 0; j < nfeatures; j++){ + printf(" %.2f", cluster_centres[i][j]); + } + printf("\n\n"); + } + } + + len = (float) ((max_nclusters - min_nclusters + 1)*nloops); + + printf("Number of Iteration: %d\n", nloops); + //printf("Time for I/O: %.5fsec\n", io_timing); + //printf("Time for Entire Clustering: %.5fsec\n", cluster_timing); + + if(min_nclusters != max_nclusters){ + if(nloops != 1){ //range of k, multiple iteration + //printf("Average Clustering Time: %fsec\n", + // cluster_timing / len); + printf("Best number of clusters is %d\n", best_nclusters); + } + else{ //range of k, single iteration + //printf("Average Clustering Time: %fsec\n", + // cluster_timing / len); + printf("Best number of clusters is %d\n", best_nclusters); + } + } + else{ + if(nloops != 1){ // single k, multiple iteration + //printf("Average Clustering Time: %.5fsec\n", + // cluster_timing / nloops); + if(isRMSE) // if calculated RMSE + printf("Number of trials to approach the best RMSE of %.3f is %d\n", rmse, index + 1); + } + else{ // single k, single iteration + if(isRMSE) // if calculated RMSE + printf("Root Mean Squared Error: %.3f\n", rmse); + } + } + + + /* free up memory */ + free(features[0]); + free(features); + return(0); +} + diff --git a/workloads/realworld/uvm_prefetch/kmeans/kmeans.h b/workloads/realworld/uvm_prefetch/kmeans/kmeans.h new file mode 100755 index 0000000000000000000000000000000000000000..28b6c34732313f04c02b59b095361bc8142d4b05 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/kmeans/kmeans.h @@ -0,0 +1,60 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +#ifndef _H_FUZZY_KMEANS +#define _H_FUZZY_KMEANS + +#ifndef FLT_MAX +#define FLT_MAX 3.40282347e+38 +#endif + +#include + +/* rmse.c */ +float euclid_dist_2 (float*, float*, int); +int find_nearest_point (float* , int, float**, int); +float rms_err(float**, int, int, float**, int); + +/* cluster.c */ +int cluster(int, int, float**, int, int, float, int*, float***, float*, int, int); + +/* kmeans_clustering.c */ +float **kmeans_clustering(float**, int, int, int, float, int*); + +#endif diff --git a/workloads/realworld/uvm_prefetch/kmeans/kmeans_clustering.c b/workloads/realworld/uvm_prefetch/kmeans/kmeans_clustering.c new file mode 100755 index 0000000000000000000000000000000000000000..54ddcd6d8ff6f0d075ff16e6b6aef84fda4716d2 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/kmeans/kmeans_clustering.c @@ -0,0 +1,178 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: kmeans_clustering.c **/ +/** Description: Implementation of regular k-means clustering **/ +/** algorithm **/ +/** Author: Wei-keng Liao **/ +/** ECE Department, Northwestern University **/ +/** email: wkliao@ece.northwestern.edu **/ +/** **/ +/** Edited by: Jay Pisharath **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include + +#include "kmeans.h" + +#define RANDOM_MAX 2147483647 + +extern double wtime(void); + +/*----< kmeans_clustering() >---------------------------------------------*/ +float** kmeans_clustering(float **feature, /* in: [npoints][nfeatures] */ + int nfeatures, + int npoints, + int nclusters, + float threshold, + int *membership) /* out: [npoints] */ +{ + int i, j, n = 0; /* counters */ + int loop=0, temp; + int *new_centers_len; /* [nclusters]: no. of points in each cluster */ + float delta; /* if the point moved */ + float **clusters; /* out: [nclusters][nfeatures] */ + float **new_centers; /* [nclusters][nfeatures] */ + + int *initial; /* used to hold the index of points not yet selected + prevents the "birthday problem" of dual selection (?) + considered holding initial cluster indices, but changed due to + possible, though unlikely, infinite loops */ + int initial_points; + int c = 0; + + /* nclusters should never be > npoints + that would guarantee a cluster without points */ + if (nclusters > npoints) + nclusters = npoints; + + /* allocate space for and initialize returning variable clusters[] */ + clusters = (float**) malloc(nclusters * sizeof(float*)); + clusters[0] = (float*) malloc(nclusters * nfeatures * sizeof(float)); + for (i=1; i= 0; i++) { + //n = (int)rand() % initial_points; + + for (j=0; j 0) + clusters[i][j] = new_centers[i][j] / new_centers_len[i]; /* take average i.e. sum/n */ + new_centers[i][j] = 0.0; /* set back to 0 */ + } + new_centers_len[i] = 0; /* set back to 0 */ + } + c++; + } while ((delta > threshold) && (loop++ < 500)); /* makes sure loop terminates */ + printf("iterated %d times\n", c); + free(new_centers[0]); + free(new_centers); + free(new_centers_len); + + return clusters; +} + diff --git a/workloads/realworld/uvm_prefetch/kmeans/kmeans_cuda.cu b/workloads/realworld/uvm_prefetch/kmeans/kmeans_cuda.cu new file mode 100755 index 0000000000000000000000000000000000000000..493d1b58293293a9c58cc5414db1e96044547ca6 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/kmeans/kmeans_cuda.cu @@ -0,0 +1,323 @@ +#include +#include +#include +#include +#include +#include + +#include + +#define THREADS_PER_DIM 16 +#define BLOCKS_PER_DIM 64 +#define THREADS_PER_BLOCK THREADS_PER_DIM*THREADS_PER_DIM +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" +#include "kmeans_cuda_kernel.cu" + + +//#define BLOCK_DELTA_REDUCE +//#define BLOCK_CENTER_REDUCE + +#define CPU_DELTA_REDUCE +#define CPU_CENTER_REDUCE + +extern "C" +int setup(int argc, char** argv); /* function prototype */ + +// GLOBAL!!!!! +unsigned int num_threads_perdim = THREADS_PER_DIM; /* sqrt(256) -- see references for this choice */ +unsigned int num_blocks_perdim = BLOCKS_PER_DIM; /* temporary */ +unsigned int num_threads = num_threads_perdim*num_threads_perdim; /* number of threads */ +unsigned int num_blocks = num_blocks_perdim*num_blocks_perdim; /* number of blocks */ + +/* _d denotes it resides on the device */ +int *membership_new; /* newly assignment membership */ +float *feature_d; /* inverted data array */ +float *feature_flipped_d; /* original (not inverted) data array */ +int *membership_d; /* membership on the device */ +float *block_new_centers; /* sum of points in a cluster (per block) */ +float *clusters_d; /* cluster centers on the device */ +float *block_clusters_d; /* per block calculation of cluster centers */ +int *block_deltas_d; /* per block calculation of deltas */ + + +/* -------------- allocateMemory() ------------------- */ +/* allocate device memory, calculate number of blocks and threads, and invert the data array */ +extern "C" +void allocateMemory(int npoints, int nfeatures, int nclusters, float **features) +{ + // printf("npoints is %d, num_threads is %d\n", npoints, num_threads); + // num_blocks = npoints / num_threads; + // if (npoints % num_threads > 0) /* defeat truncation */ + // num_blocks++; + + // num_blocks_perdim = sqrt((double) num_blocks); + // while (num_blocks_perdim * num_blocks_perdim < num_blocks) // defeat truncation (should run once) + // num_blocks_perdim++; + + num_blocks = num_blocks_perdim*num_blocks_perdim; + + /* allocate memory for memory_new[] and initialize to -1 (host) */ + membership_new = (int*) malloc(npoints * sizeof(int)); + for(int i=0;i>>(feature_flipped_d, feature_d, npoints, (num_blocks_perdim * num_blocks_perdim * num_threads_perdim * num_threads_perdim), nfeatures); + + /* allocate memory for membership_d[] and clusters_d[][] (device) */ + cudaMallocManaged((void **)&membership_d, npoints * sizeof(int)); + cudaMallocManaged((void **)&clusters_d, nclusters * nfeatures * sizeof(float)); + +#ifdef BLOCK_DELTA_REDUCE + // allocate array to hold the per block deltas on the gpu side + + cudaMallocManaged((void**) &block_deltas_d, num_blocks_perdim * num_blocks_perdim * sizeof(int)); +#endif + +#ifdef BLOCK_CENTER_REDUCE + // allocate memory and copy to card cluster array in which to accumulate center points for the next iteration + cudaMallocManaged((void**) &block_clusters_d, + num_blocks_perdim * num_blocks_perdim * + nclusters * nfeatures * sizeof(float)); +#endif + +} +/* -------------- allocateMemory() end ------------------- */ + +/* -------------- deallocateMemory() ------------------- */ +/* free host and device memory */ +extern "C" +void deallocateMemory() +{ + free(membership_new); + free(block_new_centers); + cudaFree(feature_d); + cudaFree(feature_flipped_d); + cudaFree(membership_d); + + cudaFree(clusters_d); +#ifdef BLOCK_CENTER_REDUCE + cudaFree(block_clusters_d); +#endif +#ifdef BLOCK_DELTA_REDUCE + cudaFree(block_deltas_d); +#endif +endCPU(); +} +/* -------------- deallocateMemory() end ------------------- */ + +//////////////////////////////////////////////////////////////////////////////// +// Program main // + +int main(int argc, char **argv) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + // make sure we're running on the big card + GPU_argv_init(); + // as done in the CUDA start/help document provided + initTrace(); + setup(argc, argv); + finiTrace(); +} + +// // +//////////////////////////////////////////////////////////////////////////////// +/* ------------------- kmeansCuda() ------------------------ */ +extern "C" +int // delta -- had problems when return value was of float type +kmeansCuda(float **feature, /* in: [npoints][nfeatures] */ + int nfeatures, /* number of attributes for each point */ + int npoints, /* number of data points */ + int nclusters, /* number of clusters */ + int *membership, /* which cluster the point belongs to */ + float **clusters, /* coordinates of cluster centers */ + int *new_centers_len, /* number of elements in each cluster */ + float **new_centers /* sum of elements in each cluster */ + ) +{ + int delta = 0; /* if point has moved */ + int i,j; /* counters */ + + // cudaSetDevice(1); + + + /* copy membership (host to device) */ + memcpy(membership_d, membership_new, npoints*sizeof(int)); + + // /* copy clusters (host to device) */ + // memcpy(clusters_d, clusters[0], nclusters * nfeatures * sizeof(float)); + + // /* set up texture */ + // cudaChannelFormatDesc chDesc0 = cudaCreateChannelDesc(); + // t_features.filterMode = cudaFilterModePoint; + // t_features.normalized = false; + // t_features.channelDesc = chDesc0; + + // if(cudaBindTexture(NULL, &t_features, feature_d, &chDesc0, npoints*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind features array to texture!\n"); + + // cudaChannelFormatDesc chDesc1 = cudaCreateChannelDesc(); + // t_features_flipped.filterMode = cudaFilterModePoint; + // t_features_flipped.normalized = false; + // t_features_flipped.channelDesc = chDesc1; + + // if(cudaBindTexture(NULL, &t_features_flipped, feature_flipped_d, &chDesc1, npoints*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind features_flipped array to texture!\n"); + + // cudaChannelFormatDesc chDesc2 = cudaCreateChannelDesc(); + // t_clusters.filterMode = cudaFilterModePoint; + // t_clusters.normalized = false; + // t_clusters.channelDesc = chDesc2; + + // if(cudaBindTexture(NULL, &t_clusters, clusters_d, &chDesc2, nclusters*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind clusters array to texture!\n"); + + // /* copy clusters to constant memory */ + // cudaMemcpyToSymbol("c_clusters",clusters[0],nclusters*nfeatures*sizeof(float),0,cudaMemcpyHostToDevice); + + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(membership_d, npoints * sizeof(int), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + /* setup execution parameters. + changed to 2d (source code on NVIDIA CUDA Programming Guide) */ + dim3 grid( num_blocks_perdim, num_blocks_perdim ); + dim3 threads( num_threads_perdim*num_threads_perdim ); + static uint64_t startKernel2; + CUPTI_CALL(cuptiGetTimestamp(&startKernel2)); + /* execute the kernel */ + kmeansPoint<<>>(feature_d, + nfeatures, + npoints, + (num_blocks_perdim * num_blocks_perdim * num_threads_perdim * num_threads_perdim), + nclusters, + membership_d); + static uint64_t endKernel2; + CUPTI_CALL(cuptiGetTimestamp(&endKernel2)); + // cudaThreadSynchronize(); + cudaDeviceSynchronize(); + printf("CUPTI,kmeansPoint,%lu,%lu\n", startKernel2, endKernel2); + + /* copy back membership (device to host) */ + memcpy(membership_new, membership_d, npoints * sizeof(int)); + +#ifdef BLOCK_CENTER_REDUCE + /*** Copy back arrays of per block sums ***/ + float * block_clusters_h = (float *) malloc( + num_blocks_perdim * num_blocks_perdim * + nclusters * nfeatures * sizeof(float)); + + // cudaMemcpy(block_clusters_h, block_clusters_d, + // num_blocks_perdim * num_blocks_perdim * + // nclusters * nfeatures * sizeof(float), + // cudaMemcpyDeviceToHost); +#endif +#ifdef BLOCK_DELTA_REDUCE + int * block_deltas_h = (int *) malloc( + num_blocks_perdim * num_blocks_perdim * sizeof(int)); + + // cudaMemcpy(block_deltas_h, block_deltas_d, + // num_blocks_perdim * num_blocks_perdim * sizeof(int), + // cudaMemcpyDeviceToHost); +#endif + + /* for each point, sum data points in each cluster + and see if membership has changed: + if so, increase delta and change old membership, and update new_centers; + otherwise, update new_centers */ + delta = 0; + for (i = 0; i < npoints; i++) + { + int cluster_id = membership_d[i]; + new_centers_len[cluster_id]++; + if (membership_d[i] != membership[i]) + { +#ifdef CPU_DELTA_REDUCE + delta++; +#endif + membership[i] = membership_d[i]; + } +#ifdef CPU_CENTER_REDUCE + for (j = 0; j < nfeatures; j++) + { + new_centers[cluster_id][j] += feature[i][j]; + } +#endif + } + + +#ifdef BLOCK_DELTA_REDUCE + /*** calculate global sums from per block sums for delta and the new centers ***/ + + //debug + //printf("\t \t reducing %d block sums to global sum \n",num_blocks_perdim * num_blocks_perdim); + for(i = 0; i < num_blocks_perdim * num_blocks_perdim; i++) { + //printf("block %d delta is %d \n",i,block_deltas_h[i]); + delta += block_deltas_h[i]; + } + +#endif +#ifdef BLOCK_CENTER_REDUCE + + for(int j = 0; j < nclusters;j++) { + for(int k = 0; k < nfeatures;k++) { + block_new_centers[j*nfeatures + k] = 0.f; + } + } + + for(i = 0; i < num_blocks_perdim * num_blocks_perdim; i++) { + for(int j = 0; j < nclusters;j++) { + for(int k = 0; k < nfeatures;k++) { + block_new_centers[j*nfeatures + k] += block_clusters_h[i * nclusters*nfeatures + j * nfeatures + k]; + } + } + } + + +#ifdef CPU_CENTER_REDUCE + //debug + /*for(int j = 0; j < nclusters;j++) { + for(int k = 0; k < nfeatures;k++) { + if(new_centers[j][k] > 1.001 * block_new_centers[j*nfeatures + k] || new_centers[j][k] < 0.999 * block_new_centers[j*nfeatures + k]) { + printf("\t \t for %d:%d, normal value is %e and gpu reduced value id %e \n",j,k,new_centers[j][k],block_new_centers[j*nfeatures + k]); + } + } + }*/ +#endif + +#ifdef BLOCK_CENTER_REDUCE + for(int j = 0; j < nclusters;j++) { + for(int k = 0; k < nfeatures;k++) + new_centers[j][k]= block_new_centers[j*nfeatures + k]; + } +#endif + +#endif + finiTrace(); + return delta; + +} +/* ------------------- kmeansCuda() end ------------------------ */ + diff --git a/workloads/realworld/uvm_prefetch/kmeans/kmeans_cuda_kernel.cu b/workloads/realworld/uvm_prefetch/kmeans/kmeans_cuda_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..d5d94f15588c4097081c1fa9f2dab392ffe57dc9 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/kmeans/kmeans_cuda_kernel.cu @@ -0,0 +1,136 @@ +#ifndef _KMEANS_CUDA_KERNEL_H_ +#define _KMEANS_CUDA_KERNEL_H_ + +#include +#include + +#include "kmeans.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +// FIXME: Make this a runtime selectable variable! +#define ASSUMED_NR_CLUSTERS 32 + +#define SDATA(index) CUT_BANK_CHECKER(sdata, index) + +// t_features has the layout dim0[points 0-m-1]dim1[ points 0-m-1]... +texture t_features; +// t_features_flipped has the layout point0[dim 0-n-1]point1[dim 0-n-1] +texture t_features_flipped; +texture t_clusters; + +__constant__ float c_clusters[ASSUMED_NR_CLUSTERS * 34]; /* constant memory for cluster centers */ + +/* ----------------- invert_mapping() --------------------- */ +/* inverts data array from row-major to column-major. + + [p0,dim0][p0,dim1][p0,dim2] ... + [p1,dim0][p1,dim1][p1,dim2] ... + [p2,dim0][p2,dim1][p2,dim2] ... + to + [dim0,p0][dim0,p1][dim0,p2] ... + [dim1,p0][dim1,p1][dim1,p2] ... + [dim2,p0][dim2,p1][dim2,p2] ... +*/ +__global__ void invert_mapping(float *input, /* original */ + float *output, /* inverted */ + int npoints, /* npoints */ + int batch_size, + int nfeatures) /* nfeatures */ +{ + int point_id = threadIdx.x + blockDim.x * blockIdx.x; /* id of thread */ + + int batches = npoints / batch_size; + + for (int b = 0; b < batches; b++) + { + for (int i = 0; i < nfeatures; i++) + { + output[b * batch_size + point_id + npoints * i] = input[(b * batch_size + point_id) * nfeatures + i]; + } + } + + + + return; +} +/* ----------------- invert_mapping() end --------------------- */ + +/* to turn on the GPU delta and center reduction */ +// #define GPU_DELTA_REDUCTION +// #define GPU_NEW_CENTER_REDUCTION + +/* ----------------- kmeansPoint() --------------------- */ +/* find the index of nearest cluster centers and change membership*/ +__global__ void +kmeansPoint(float *features, /* in: [npoints*nfeatures] */ + int nfeatures, + int npoints, + int batch_size, + int nclusters, + int *membership) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // block ID + const unsigned int block_id = gridDim.x * blockIdx.y + blockIdx.x; + // point/thread ID + const unsigned int point_id = block_id * blockDim.x * blockDim.y + threadIdx.x; + + __shared__ float tmp_features[THREADS_PER_DIM][THREADS_PER_DIM][16]; + + int batches = npoints / batch_size; + int tile = 0; + int end_tile = tile + batches; + + for (; tile < end_tile; tile += 1) + { + for (int i = 0; i < 16; i++) + { + int addr = tile * batch_size + point_id + i * npoints; + tmp_features[threadIdx.y][threadIdx.x][i] = features[addr]; + } + block.sync(); + + int index = -1; + + float min_dist = FLT_MAX; + float dist; /* distance square between a point to cluster center */ + + /* find the cluster center id with min distance to pt */ + for (int i = 0; i < nclusters; i++) + { + int cluster_base_index = i * nfeatures; /* base index of cluster centers for inverted array */ + float ans = 0.0; /* Euclidean distance sqaure */ + + for (int j = 0; j < nfeatures; j++) + { + // int addr = point_id + j * npoints; /* appropriate index of data point */ + // float diff = (tex1Dfetch(t_features,addr) - c_clusters[cluster_base_index + j]); /* distance between a data point to cluster centers */ + + // int addr = point_id + j * npoints; /* appropriate index of data point */ + // float diff = features[addr] - c_clusters[cluster_base_index + j]; /* distance between a data point to cluster centers */ + float diff = tmp_features[threadIdx.y][threadIdx.x][j] - c_clusters[cluster_base_index + j]; /* distance between a data point to cluster centers */ + ans += diff * diff; /* sum of squares */ + } + dist = ans; + block.sync(); + + /* see if distance is smaller than previous ones: + if so, change minimum distance and save index of cluster center */ + if (dist < min_dist) + { + min_dist = dist; + index = i; + } + } + membership[tile * batch_size + point_id] = index; + block.sync(); + } + +} +#endif // #ifndef _KMEANS_CUDA_KERNEL_H_ diff --git a/workloads/realworld/uvm_prefetch/kmeans/kmeans_cuda_kernel.cu.old b/workloads/realworld/uvm_prefetch/kmeans/kmeans_cuda_kernel.cu.old new file mode 100755 index 0000000000000000000000000000000000000000..dd0dec27eebf197d811a58063b13a86c8f72e1ed --- /dev/null +++ b/workloads/realworld/uvm_prefetch/kmeans/kmeans_cuda_kernel.cu.old @@ -0,0 +1,185 @@ +#ifndef _KMEANS_CUDA_KERNEL_H_ +#define _KMEANS_CUDA_KERNEL_H_ + +#include +#include + +#include "kmeans.h" + +// FIXME: Make this a runtime selectable variable! +#define ASSUMED_NR_CLUSTERS 32 + +#define SDATA( index) CUT_BANK_CHECKER(sdata, index) + +// t_features has the layout dim0[points 0-m-1]dim1[ points 0-m-1]... +texture t_features; +// t_features_flipped has the layout point0[dim 0-n-1]point1[dim 0-n-1] +texture t_features_flipped; +texture t_clusters; + + +__constant__ float c_clusters[ASSUMED_NR_CLUSTERS*34]; /* constant memory for cluster centers */ + +/* ----------------- invert_mapping() --------------------- */ +/* inverts data array from row-major to column-major. + + [p0,dim0][p0,dim1][p0,dim2] ... + [p1,dim0][p1,dim1][p1,dim2] ... + [p2,dim0][p2,dim1][p2,dim2] ... + to + [dim0,p0][dim0,p1][dim0,p2] ... + [dim1,p0][dim1,p1][dim1,p2] ... + [dim2,p0][dim2,p1][dim2,p2] ... +*/ +__global__ void invert_mapping(float *input, /* original */ + float *output, /* inverted */ + int npoints, /* npoints */ + int nfeatures) /* nfeatures */ +{ + int point_id = threadIdx.x + blockDim.x*blockIdx.x; /* id of thread */ + int i; + + if(point_id < npoints){ + for(i=0;i 1; threadids_participating /= 2) { + if(threadIdx.x < threadids_participating) { + deltas[threadIdx.x] += deltas[threadIdx.x + threadids_participating]; + } + __syncthreads(); + } + if(threadIdx.x < 1) {deltas[threadIdx.x] += deltas[threadIdx.x + 1];} + __syncthreads(); + // propagate number of changes to global counter + if(threadIdx.x == 0) { + block_deltas[blockIdx.y * gridDim.x + blockIdx.x] = deltas[0]; + //printf("original id: %d, modified: %d\n", blockIdx.y*gridDim.x+blockIdx.x, blockIdx.x); + + } + +#endif + + +#ifdef GPU_NEW_CENTER_REDUCTION + int center_id = threadIdx.x / nfeatures; + int dim_id = threadIdx.x - nfeatures*center_id; + + __shared__ int new_center_ids[THREADS_PER_BLOCK]; + + new_center_ids[threadIdx.x] = index; + __syncthreads(); + + /*** + determine which dimension calculte the sum for + mapping of threads is + center0[dim0,dim1,dim2,...]center1[dim0,dim1,dim2,...]... + ***/ + + int new_base_index = (point_id - threadIdx.x)*nfeatures + dim_id; + float accumulator = 0.f; + + if(threadIdx.x < nfeatures * nclusters) { + // accumulate over all the elements of this threadblock + for(int i = 0; i< (THREADS_PER_BLOCK); i++) { + float val = tex1Dfetch(t_features_flipped,new_base_index+i*nfeatures); + if(new_center_ids[i] == center_id) + accumulator += val; + } + + // now store the sum for this threadblock + /*** + mapping to global array is + block0[center0[dim0,dim1,dim2,...]center1[dim0,dim1,dim2,...]...]block1[...]... + ***/ + block_clusters[(blockIdx.y*gridDim.x + blockIdx.x) * nclusters * nfeatures + threadIdx.x] = accumulator; + } +#endif + +} +#endif // #ifndef _KMEANS_CUDA_KERNEL_H_ diff --git a/workloads/realworld/uvm_prefetch/kmeans/rmse.c b/workloads/realworld/uvm_prefetch/kmeans/rmse.c new file mode 100755 index 0000000000000000000000000000000000000000..fe7786342bf77cab12958e630cb8d99834312f0d --- /dev/null +++ b/workloads/realworld/uvm_prefetch/kmeans/rmse.c @@ -0,0 +1,95 @@ +/*************************************************************************/ +/** File: rmse.c **/ +/** Description: calculate root mean squared error of particular **/ +/** clustering. **/ +/** Author: Sang-Ha Lee **/ +/** University of Virginia. **/ +/** **/ +/** Note: euclid_dist_2() and find_nearest_point() adopted from **/ +/** Minebench code. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include + +#include "kmeans.h" + +extern double wtime(void); + +/*----< euclid_dist_2() >----------------------------------------------------*/ +/* multi-dimensional spatial Euclid distance square */ +__inline +float euclid_dist_2(float *pt1, + float *pt2, + int numdims) +{ + int i; + float ans=0.0; + + for (i=0; i-----------------------------------------------*/ +__inline +int find_nearest_point(float *pt, /* [nfeatures] */ + int nfeatures, + float **pts, /* [npts][nfeatures] */ + int npts) +{ + int index, i; + float max_dist=FLT_MAX; + + /* find the cluster center id with min distance to pt */ + for (i=0; i-------------------------------------*/ +float rms_err (float **feature, /* [npoints][nfeatures] */ + int nfeatures, + int npoints, + float **cluster_centres, /* [nclusters][nfeatures] */ + int nclusters) +{ + int i; + int nearest_cluster_index; /* cluster center id with min distance to pt */ + float sum_euclid = 0.0; /* sum of Euclidean distance squares */ + float ret; /* return value */ + + /* calculate and sum the sqaure of euclidean distance*/ + #pragma omp parallel for \ + shared(feature,cluster_centres) \ + firstprivate(npoints,nfeatures,nclusters) \ + private(i, nearest_cluster_index) \ + schedule (static) + for (i=0; i +#endif + +#include + +#ifndef _H_TYPES +#include +#endif + +#include + +#ifndef _H_ACCESS +#include /* for the "access" function */ +#endif + +/* + * POSIX requires that certain values be included in unistd.h. It also + * requires that when _POSIX_SOURCE is defined only those standard + * specific values are present. This header includes all the POSIX + * required entries. + */ + +#ifdef _POSIX_SOURCE +#ifdef _LARGE_FILES +#define lseek lseek64 +#endif + + +/* Symbolic constants for the "lseek" function: */ +#ifndef SEEK_SET +#define SEEK_SET 0 /* Set file pointer to "offset" */ +#define SEEK_CUR 1 /* Set file pointer to current plus "offset" */ +#define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif /* SEEK_SET */ + +#ifdef _NO_PROTO + +#ifndef _KERNEL +extern int access(); +extern unsigned int alarm(); +extern int chdir(); +extern int chown(); +extern int close(); +extern char *ctermid(); +extern int dup(); +extern int dup2(); +extern int execl(); +extern int execv(); +extern int execle(); +extern int execve(); +extern int execlp(); +extern int execvp(); +extern void _exit(); +extern pid_t fork(); +extern long fpathconf(); +extern char *getcwd(); +extern gid_t getegid(); +extern uid_t geteuid(); +extern gid_t getgid(); +extern int getgroups(); +extern char *getlogin(); +extern pid_t getpgrp(); +extern pid_t getpid(); +extern pid_t getppid(); +extern uid_t getuid(); +extern int isatty(); +extern int link(); +extern off_t lseek(); +extern long pathconf(); +extern int pause(); +extern int pipe(); +#if defined(_XOPEN_SOURCE) && ( _XOPEN_SOURCE >= 500 ) +extern int pthread_atfork(); +#endif +extern int read(); +extern int rmdir(); +extern int setgid(); +extern int setpgid(); +extern int setsid(); +extern int setuid(); +extern unsigned int sleep(); +extern long sysconf(); +extern pid_t tcgetpgrp(); +extern int tcsetpgrp(); +extern char *ttyname(); +extern int unlink(); +extern int write(); +#endif /* !_KERNEL */ + +#else /* POSIX required prototypes */ + +#ifndef _KERNEL +extern int access(const char *, int); +extern unsigned int alarm(unsigned int); +extern int chdir(const char *); +extern int chown(const char *, uid_t, gid_t); +extern int close(int); +extern char *ctermid(char *); +extern int dup(int); +extern int dup2(int, int); +extern int execl(const char *, const char *, ...); +extern int execv(const char *, char *const []); +extern int execle(const char *, const char *, ...); +extern int execve(const char *, char *const [], char *const []); +extern int execlp(const char *, const char *, ...); +extern int execvp(const char *, char *const []); +extern void _exit(int); +extern pid_t fork(void); +extern long fpathconf(int, int); +extern char *getcwd(char *, size_t); +extern gid_t getegid(void); +extern uid_t geteuid(void); +extern gid_t getgid(void); +extern int getgroups(int, gid_t []); +extern char *getlogin(void); +#ifndef _BSD +extern pid_t getpgrp(void); +#endif /* _BSD */ +extern pid_t getpid(void); +extern pid_t getppid(void); +extern uid_t getuid(void); +extern int isatty(int); +extern int link(const char *, const char *); +extern off_t lseek(int, off_t, int); +#ifdef _LARGE_FILE_API +extern off64_t lseek64(int, off64_t, int); +#endif +extern long pathconf(const char *, int); +extern int pause(void); +extern int pipe(int []); +#if defined(_XOPEN_SOURCE) && ( _XOPEN_SOURCE >= 500 ) +extern int pthread_atfork(void (*)(void), void (*)(void), void (*)(void)); +#endif +extern ssize_t read(int, void *, size_t); +extern int rmdir(const char *); +extern int setgid(gid_t); +extern int setpgid(pid_t, pid_t); +extern pid_t setsid(void); +extern int setuid(uid_t); +extern unsigned int sleep(unsigned int); +extern long sysconf(int); +extern pid_t tcgetpgrp(int); +extern int tcsetpgrp(int, pid_t); +extern char *ttyname(int); +extern int unlink(const char *); +extern ssize_t write(int, const void *, size_t); +#endif /* !_KERNEL */ +#endif /* !_NO_PROTO */ + +#define STDIN_FILENO 0 +#define STDOUT_FILENO 1 +#define STDERR_FILENO 2 + +#define _POSIX_JOB_CONTROL 1 +#define _POSIX_SAVED_IDS 1 + +#define _POSIX_VERSION 200112L +#define _POSIX2_VERSION 200112L +#define _POSIX2_C_VERSION 200112L + + +#ifdef _XOPEN_SOURCE + +#define _XOPEN_VERSION 600 +#define _XOPEN_XCU_VERSION 4 +#define _XOPEN_XPG3 1 +#define _XOPEN_XPG4 1 +#define _XOPEN_UNIX 1 + +#define _XOPEN_REALTIME (-1) +#define _XOPEN_REALTIME_THREADS (-1) + +#if (_XOPEN_SOURCE >= 600) +#define _XOPEN_STREAMS 1 +#endif + +#define _XBS5_ILP32_OFF32 1 +#define _XBS5_ILP32_OFFBIG 1 +#define _XBS5_LP64_OFF64 1 +#define _XBS5_LPBIG_OFFBIG 1 + +#define _POSIX2_C_BIND 200112L +#define _POSIX2_C_DEV 200112L +#define _POSIX2_CHAR_TERM 1 +#define _POSIX2_LOCALEDEF 200112L +#define _POSIX2_UPE 200112L +#define _POSIX2_FORT_DEV (-1) +#define _POSIX2_FORT_RUN (-1) +#define _POSIX2_SW_DEV (-1) + +#if (_POSIX_C_SOURCE >= 200112L) +#define _POSIX_REGEXP 1 +#define _POSIX_SHELL 1 +#define _POSIX2_PBS (-1) +#define _POSIX2_PBS_ACCOUNTING (-1) +#define _POSIX2_PBS_CHECKPOINT (-1) +#define _POSIX2_PBS_LOCATE (-1) +#define _POSIX2_PBS_MESSAGE (-1) +#define _POSIX2_PBS_TRACK (-1) +#define _V6_ILP32_OFF32 1 +#define _V6_ILP32_OFFBIG 1 +#define _V6_LP64_OFF64 1 +#define _V6_LPBIG_OFFBIG 1 + +#define _POSIX_ADVISORY_INFO 200112L +#define _POSIX_BARRIERS 200112L +#define _POSIX_CLOCK_SELECTION 200112L +#define _POSIX_CPUTIME 200112L +#define _POSIX_MONOTONIC_CLOCK 200112L + +#ifdef _POSIX_RAW_SOCKETS +#undef _POSIX_RAW_SOCKETS +#endif + +#define _POSIX_SPAWN 200112L +#define _POSIX_SPIN_LOCKS 200112L +#define _POSIX_SPORADIC_SERVER (-1) +#define _POSIX_THREAD_CPUTIME 200112L +#define _POSIX_THREAD_SPORADIC_SERVER (-1) +#define _POSIX_TIMEOUTS 200112L +#define _POSIX_TRACE (-1) +#define _POSIX_TRACE_EVENT_FILTER (-1) +#define _POSIX_TRACE_INHERIT (-1) +#define _POSIX_TRACE_LOG (-1) +#define _POSIX_TYPED_MEMORY_OBJECTS (-1) + +#endif /* _POSIX_C_SOURCE >= 200112L */ + +#define _XOPEN_CRYPT 1 +#define _XOPEN_SHM 1 +#define _XOPEN_ENH_I18N 1 +#define _XOPEN_LEGACY (-1) +#ifndef __64BIT__ +#define _UNIX_ABI (-1) +#define _UNIX_ABI_IA64 (-1) +#define _UNIX_ABI_BIG_ENDIAN (-1) +#define _UNIX_ABI_LITTLE_ENDIAN (-1) +#endif /* __64BIT__ */ + +extern char *optarg; +extern int optind, opterr, optopt; + +#ifdef _NO_PROTO + extern size_t confstr(); + extern char *crypt(); + extern void encrypt(); + extern int fsync(); + extern int getopt(); + extern int nice(); + extern void swab(); +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern char *getpass(); + extern int chroot(); +#endif +#else + extern size_t confstr(int, char*, size_t); + extern char *crypt(const char *, const char *); + extern void encrypt(char *, int); + extern int fsync(int); + extern int getopt(int, char* const*, const char*); + extern int nice(int); + extern void swab(const void *, void *, ssize_t); + extern int fdatasync(int); +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern char *getpass(const char *); + extern int chroot(const char *); +#endif +#endif + +#endif /* _XOPEN _SOURCE */ + +/* Threads options for 1003.1c and XPG UNIX98 */ +#define _POSIX_THREADS 200112L +#define _POSIX_THREAD_ATTR_STACKADDR 200112L +#define _POSIX_THREAD_ATTR_STACKSIZE 200112L +#define _POSIX_THREAD_PROCESS_SHARED 200112L +#define _POSIX_THREAD_SAFE_FUNCTIONS 200112L +#ifdef _ALL_SOURCE +#define _POSIX_REENTRANT_FUNCTIONS _POSIX_THREAD_SAFE_FUNCTIONS +#endif + +/* Realtime threads options for 1003.1c and XPG UNIX98 */ +#define _POSIX_THREAD_PRIORITY_SCHEDULING (-1) +#define _POSIX_THREAD_PRIO_INHERIT (-1) +#define _POSIX_THREAD_PRIO_PROTECT (-1) + +#undef _POSIX_THREAD_FORKALL + +/* Realtime options for 1003.1c and XPG UNIX98 */ +#define _POSIX_ASYNCHRONOUS_IO 200112L +#define _POSIX_FSYNC 200112L +#define _POSIX_MAPPED_FILES 200112L +#define _POSIX_MEMLOCK 200112L +#define _POSIX_MEMLOCK_RANGE 200112L +#define _POSIX_MEMORY_PROTECTION 200112L +#define _POSIX_MESSAGE_PASSING 200112L +#define _POSIX_PRIORITIZED_IO 200112L +#define _POSIX_PRIORITY_SCHEDULING 200112L +#define _POSIX_REALTIME_SIGNALS 200112L +#define _POSIX_SEMAPHORES 200112L +#define _POSIX_SHARED_MEMORY_OBJECTS 200112L +#define _POSIX_SYNCHRONIZED_IO 200112L +#define _POSIX_TIMERS 200112L + +#define _POSIX_ASYNC_IO (-1) +#undef _POSIX_SYNC_IO +#define _POSIX_PRIO_IO (-1) + +#define _POSIX_CHOWN_RESTRICTED 0 +#define _POSIX_VDISABLE 0xFF +#define _POSIX_NO_TRUNC 0 + + /* UNIX03 and POSIX01 */ + /* Always enabled */ +#define _POSIX_IPV6 200112L +#define _POSIX_RAW_SOCKETS 200112L + + +#ifndef NULL +#define NULL 0 +#endif + +#if (_POSIX_C_SOURCE >= 200112L) +#define _POSIX_READER_WRITER_LOCKS 200112L +#endif + +/* arguments for the confstr() function */ + +#define _CS_PATH 1 + + /* compile,link,lib,lint flags for 32bit, no_LARGE_FILES system */ +#define _CS_XBS5_ILP32_OFF32_CFLAGS 2 +#define _CS_XBS5_ILP32_OFF32_LDFLAGS 3 +#define _CS_XBS5_ILP32_OFF32_LIBS 4 +#define _CS_XBS5_ILP32_OFF32_LINTFLAGS 5 + + /* compile,link,lib,lint flags for 32bit, _LARGE_FILES system */ +#define _CS_XBS5_ILP32_OFFBIG_CFLAGS 6 +#define _CS_XBS5_ILP32_OFFBIG_LDFLAGS 7 +#define _CS_XBS5_ILP32_OFFBIG_LIBS 8 +#define _CS_XBS5_ILP32_OFFBIG_LINTFLAGS 9 + + /* compile,link,lib,lint flags for LP64 64bit system */ +#define _CS_XBS5_LP64_OFF64_CFLAGS 10 +#define _CS_XBS5_LP64_OFF64_LDFLAGS 11 +#define _CS_XBS5_LP64_OFF64_LIBS 12 +#define _CS_XBS5_LP64_OFF64_LINTFLAGS 13 + + /* compile,link,lib,lint flags for ILP64 64bit system */ + /* AIX does not currently support this */ +#define _CS_XBS5_LPBIG_OFFBIG_CFLAGS 14 +#define _CS_XBS5_LPBIG_OFFBIG_LDFLAGS 15 +#define _CS_XBS5_LPBIG_OFFBIG_LIBS 16 +#define _CS_XBS5_LPBIG_OFFBIG_LINTFLAGS 17 + +#define _CS_AIX_BOOTDEV 24 +#define _CS_AIX_MODEL_CODE 25 +#define _CS_AIX_ARCHITECTURE 26 +#define _CS_AIX_MODEL_CLASS 40 + +#if (_POSIX_C_SOURCE >= 200112L) +#define _CS_POSIX_V6_ILP32_OFF32_CFLAGS 27 +#define _CS_POSIX_V6_ILP32_OFF32_LDFLAGS 28 +#define _CS_POSIX_V6_ILP32_OFF32_LIBS 29 +#define _CS_POSIX_V6_ILP32_OFFBIG_CFLAGS 30 +#define _CS_POSIX_V6_ILP32_OFFBIG_LDFLAGS 31 +#define _CS_POSIX_V6_ILP32_OFFBIG_LIBS 32 +#define _CS_POSIX_V6_LP64_OFF64_CFLAGS 33 +#define _CS_POSIX_V6_LP64_OFF64_LDFLAGS 34 +#define _CS_POSIX_V6_LP64_OFF64_LIBS 35 +#define _CS_POSIX_V6_LPBIG_OFFBIG_CFLAGS 36 +#define _CS_POSIX_V6_LPBIG_OFFBIG_LDFLAGS 37 +#define _CS_POSIX_V6_LPBIG_OFFBIG_LIBS 38 +#define _CS_POSIX_V6_WIDTH_RESTRICTED_ENVS 39 +#endif + + /* Values for the above */ +#define _CSPATH "/usr/bin:/usr/vac/bin" + + /* ILP32_OFF32 */ +#define _CSPOSIX_V6_ILP32_OFF32_CFLAGS "-q32" +#define _CSXBS5_ILP32_OFF32_CFLAGS _CSPOSIX_V6_ILP32_OFF32_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_ILP32_OFF32_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_ILP32_OFF32_LDFLAGS "-b32" +#define _CSXBS5_ILP32_OFF32_LDFLAGS _CSPOSIX_V6_ILP32_OFF32_LDFLAGS +#endif + +#define _CSPOSIX_V6_ILP32_OFF32_LIBS "-lc -lpthread -lm" +#define _CSXBS5_ILP32_OFF32_LIBS _CSPOSIX_V6_ILP32_OFF32_LIBS + +#define _CSXBS5_ILP32_OFF32_LINTFLAGS "" + + /* ILP32_OFFOFFBIG */ +#define _CSPOSIX_V6_ILP32_OFFBIG_CFLAGS "-q32 -D_LARGE_FILES -qlonglong" +#define _CSXBS5_ILP32_OFFBIG_CFLAGS _CSPOSIX_V6_ILP32_OFFBIG_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_ILP32_OFFBIG_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_ILP32_OFFBIG_LDFLAGS "-b32" +#define _CSXBS5_ILP32_OFFBIG_LDFLAGS _CSPOSIX_V6_ILP32_OFFBIG_LDFLAGS +#endif + +#define _CSPOSIX_V6_ILP32_OFFBIG_LIBS "-lc -lpthread -lm" +#define _CSXBS5_ILP32_OFFBIG_LIBS _CSPOSIX_V6_ILP32_OFFBIG_LIBS + +#define _CSXBS5_ILP32_OFFBIG_LINTFLAGS "-D_LARGE_FILES -qlonglong" + + /* LP64_OFF64 */ +#define _CSPOSIX_V6_LP64_OFF64_CFLAGS "-q64" +#define _CSXBS5_LP64_OFF64_CFLAGS _CSPOSIX_V6_LP64_OFF64_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_LP64_OFF64_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_LP64_OFF64_LDFLAGS "-b64" +#define _CSXBS5_LP64_OFF64_LDFLAGS _CSPOSIX_V6_LP64_OFF64_LDFLAGS +#endif + +#define _CSPOSIX_V6_LP64_OFF64_LIBS "-lc -lpthread -lm" +#define _CSXBS5_LP64_OFF64_LIBS _CSPOSIX_V6_LP64_OFF64_LIBS + +#define _CSXBS5_LP64_OFF64_LINTFLAGS "-D__64BIT__" + + /* LPBIG_OFFBIG */ +#define _CSPOSIX_V6_LPBIG_OFFBIG_CFLAGS "-q64" +#define _CSXBS5_LPBIG_OFFBIG_CFLAGS _CSPOSIX_V6_LPBIG_OFFBIG_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_LPBIG_OFFBIG_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_LPBIG_OFFBIG_LDFLAGS "-b64" +#define _CSXBS5_LPBIG_OFFBIG_LDFLAGS _CSPOSIX_V6_LPBIG_OFFBIG_LDFLAGS +#endif + +#define _CSPOSIX_V6_LPBIG_OFFBIG_LIBS "-lc -lpthread -lm" +#define _CSXBS5_LPBIG_OFFBIG_LIBS _CSPOSIX_V6_LPBIG_OFFBIG_LIBS + +#define _CSXBS5_LPBIG_OFFBIG_LINTFLAGS "-D__64BIT__" + +#if (_POSIX_C_SOURCE >= 200112L) +#define _CSPOSIX_V6_WIDTH_RESTRICTED_ENVS \ + "POSIX_V6_ILP32_OFF32\n" \ + "POSIX_V6_ILP32_OFFBIG\n" \ + "POSIX_V6_LP64_OFF64\n" \ + "POSIX_V6_LPBIG_OFFBIG" +#endif + +/* arguments for the pathconf() function */ + +#define _PC_CHOWN_RESTRICTED 10 +#define _PC_LINK_MAX 11 +#define _PC_MAX_CANON 12 +#define _PC_MAX_INPUT 13 +#define _PC_NAME_MAX 14 +#define _PC_NO_TRUNC 15 +#define _PC_PATH_MAX 16 +#define _PC_PIPE_BUF 17 +#define _PC_VDISABLE 18 +#define _PC_ASYNC_IO 19 +#define _PC_SYNC_IO 20 +#define _PC_PRIO_IO 21 +#define _PC_FILESIZEBITS 22 /* # bits needed to hold offset */ +#define _PC_AIX_DISK_PARTITION 23 +#define _PC_AIX_DISK_SIZE 24 +#if (_POSIX_C_SOURCE >= 200112L) +#define _PC_SYMLINK_MAX 25 +#define _PC_ALLOC_SIZE_MIN 26 +#define _PC_REC_INCR_XFER_SIZE 27 +#define _PC_REC_MAX_XFER_SIZE 28 +#define _PC_REC_MIN_XFER_SIZE 29 +#define _PC_REC_XFER_ALIGN 30 +#define _PC_2_SYMLINKS 31 +#endif + +/* arguments for the sysconf() function, the defined numbers are used as + * array index in sysconf(). + * + * POSIX.1(1990), Table 4-2 + */ +#define _SC_ARG_MAX 0 +#define _SC_CHILD_MAX 1 +#define _SC_CLK_TCK 2 +#define _SC_NGROUPS_MAX 3 +#define _SC_OPEN_MAX 4 +#define _SC_STREAM_MAX 5 +#define _SC_TZNAME_MAX 6 +#define _SC_JOB_CONTROL 7 +#define _SC_SAVED_IDS 8 +#define _SC_VERSION 9 + +/* POSIX.1(1990), Table 2-3, required by command getconf */ + +#define _SC_POSIX_ARG_MAX 10 +#define _SC_POSIX_CHILD_MAX 11 +#define _SC_POSIX_LINK_MAX 12 +#define _SC_POSIX_MAX_CANON 13 +#define _SC_POSIX_MAX_INPUT 14 +#define _SC_POSIX_NAME_MAX 15 +#define _SC_POSIX_NGROUPS_MAX 16 +#define _SC_POSIX_OPEN_MAX 17 +#define _SC_POSIX_PATH_MAX 18 +#define _SC_POSIX_PIPE_BUF 19 +#define _SC_POSIX_SSIZE_MAX 20 +#define _SC_POSIX_STREAM_MAX 21 +#define _SC_POSIX_TZNAME_MAX 22 + +/* POSIX.2 (Draft 10), Table 41) */ + +#define _SC_BC_BASE_MAX 23 +#define _SC_BC_DIM_MAX 24 +#define _SC_BC_SCALE_MAX 25 +#define _SC_BC_STRING_MAX 26 +#define _SC_EQUIV_CLASS_MAX 27 +#define _SC_EXPR_NEST_MAX 28 +#define _SC_LINE_MAX 29 +#define _SC_RE_DUP_MAX 30 +#define _SC_2_VERSION 31 +#define _SC_2_C_DEV 32 +#define _SC_2_FORT_DEV 33 +#define _SC_2_FORT_RUN 34 +#define _SC_2_LOCALEDEF 35 +#define _SC_2_SW_DEV 36 + +/* POSIX.2 (Draft 10), Table 13) */ + +#define _SC_POSIX2_BC_BASE_MAX 37 +#define _SC_POSIX2_BC_DIM_MAX 38 +#define _SC_POSIX2_BC_SCALE_MAX 39 +#define _SC_POSIX2_BC_STRING_MAX 40 +#define _SC_POSIX2_EQUIV_CLASS_MAX 41 +#define _SC_POSIX2_EXPR_NEST_MAX 42 +#define _SC_POSIX2_LINE_MAX 43 +#define _SC_POSIX2_RE_DUP_MAX 44 +#define _SC_PASS_MAX 45 +#define _SC_XOPEN_VERSION 46 +#define _SC_ATEXIT_MAX 47 +#if _XOPEN_SOURCE_EXTENDED==1 +#define _SC_PAGE_SIZE 48 +#endif /* _XOPEN_SOURCE_EXTENDED */ +#define _SC_AES_OS_VERSION 49 +#define _SC_COLL_WEIGHTS_MAX 50 +#define _SC_2_C_BIND 51 +#define _SC_2_C_VERSION 52 +#define _SC_2_UPE 53 +#define _SC_2_CHAR_TERM 54 +#define _SC_XOPEN_SHM 55 +#define _SC_XOPEN_CRYPT 56 +#define _SC_XOPEN_ENH_I18N 57 +#if _XOPEN_SOURCE_EXTENDED==1 +#define _SC_PAGESIZE _SC_PAGE_SIZE +#define _SC_IOV_MAX 58 +#endif /* _XOPEN_SOURCE_EXTENDED */ +#define _SC_THREAD_SAFE_FUNCTIONS 59 +#define _SC_THREADS 60 +#define _SC_THREAD_ATTR_STACKADDR 61 +#define _SC_THREAD_ATTR_STACKSIZE 62 +#define _SC_THREAD_FORKALL 63 +#define _SC_THREAD_PRIORITY_SCHEDULING 64 +#define _SC_THREAD_PRIO_INHERIT 65 +#define _SC_THREAD_PRIO_PROTECT 66 +#define _SC_THREAD_PROCESS_SHARED 67 +#define _SC_THREAD_KEYS_MAX 68 +#define _SC_THREAD_DATAKEYS_MAX _SC_THREAD_KEYS_MAX +#define _SC_THREAD_STACK_MIN 69 +#define _SC_THREAD_THREADS_MAX 70 +#ifdef _ALL_SOURCE +#define _SC_NPROCESSORS_CONF 71 +#define _SC_NPROCESSORS_ONLN 72 +#endif /* _ALL_SOURCE */ +#define _SC_XOPEN_UNIX 73 + +#if (_XOPEN_SOURCE >= 500) + +/* POSIX 1003.1c and XPG UNIX98 */ +/* look to defines above for meanings */ +#define _SC_AIO_LISTIO_MAX 75 +#define _SC_AIO_MAX 76 +#define _SC_AIO_PRIO_DELTA_MAX 77 +#define _SC_ASYNCHRONOUS_IO 78 +#define _SC_DELAYTIMER_MAX 79 +#define _SC_FSYNC 80 +#define _SC_GETGR_R_SIZE_MAX 81 +#define _SC_GETPW_R_SIZE_MAX 82 +#define _SC_LOGIN_NAME_MAX 83 +#define _SC_MAPPED_FILES 84 +#define _SC_MEMLOCK 85 +#define _SC_MEMLOCK_RANGE 86 +#define _SC_MEMORY_PROTECTION 87 +#define _SC_MESSAGE_PASSING 88 +#define _SC_MQ_OPEN_MAX 89 +#define _SC_MQ_PRIO_MAX 90 +#define _SC_PRIORITIZED_IO 91 +#define _SC_PRIORITY_SCHEDULING 92 +#define _SC_REALTIME_SIGNALS 93 +#define _SC_RTSIG_MAX 94 +#define _SC_SEMAPHORES 95 +#define _SC_SEM_NSEMS_MAX 96 +#define _SC_SEM_VALUE_MAX 97 +#define _SC_SHARED_MEMORY_OBJECTS 98 +#define _SC_SIGQUEUE_MAX 99 +#define _SC_SYNCHRONIZED_IO 100 +#define _SC_THREAD_DESTRUCTOR_ITERATIONS 101 +#define _SC_TIMERS 102 +#define _SC_TIMER_MAX 103 +#define _SC_TTY_NAME_MAX 104 +#define _SC_XBS5_ILP32_OFF32 105 +#define _SC_XBS5_ILP32_OFFBIG 106 +#define _SC_XBS5_LP64_OFF64 107 +#define _SC_XBS5_LPBIG_OFFBIG 108 +#define _SC_XOPEN_XCU_VERSION 109 +#define _SC_XOPEN_REALTIME 110 +#define _SC_XOPEN_REALTIME_THREADS 111 +#define _SC_XOPEN_LEGACY 112 +#endif /* _XOPEN_SOURCE >= 500 */ + +#ifdef _ALL_SOURCE +#define _SC_REENTRANT_FUNCTIONS _SC_THREAD_SAFE_FUNCTIONS +#define _SC_PHYS_PAGES 113 +#define _SC_AVPHYS_PAGES 114 +#define _SC_LPAR_ENABLED 115 +#define _SC_LARGE_PAGESIZE 116 +#endif /* _ALL_SOURCE */ + +#define _SC_AIX_KERNEL_BITMODE 117 +#define _SC_AIX_REALMEM 118 +#define _SC_AIX_HARDWARE_BITMODE 119 +#define _SC_AIX_MP_CAPABLE 120 + +#define _SC_V6_ILP32_OFF32 121 +#define _SC_V6_ILP32_OFFBIG 122 +#define _SC_V6_LP64_OFF64 123 +#define _SC_V6_LPBIG_OFFBIG 124 + +#define _SC_XOPEN_STREAMS 125 + +#if (_POSIX_C_SOURCE >= 200112L) +#define _SC_HOST_NAME_MAX 126 +#define _SC_REGEXP 127 +#define _SC_SHELL 128 +#define _SC_SYMLOOP_MAX 129 +#define _SC_ADVISORY_INFO 130 +#define _SC_FILE_LOCKING 131 +#define _SC_2_PBS 132 +#define _SC_2_PBS_ACCOUNTING 133 +#define _SC_2_PBS_CHECKPOINT 134 +#define _SC_2_PBS_LOCATE 135 +#define _SC_2_PBS_MESSAGE 136 +#define _SC_2_PBS_TRACK 137 +#define _SC_BARRIERS 138 +#define _SC_CLOCK_SELECTION 139 +#define _SC_CPUTIME 140 +#define _SC_MONOTONIC_CLOCK 141 +#define _SC_READER_WRITER_LOCKS 142 +#define _SC_SPAWN 143 +#define _SC_SPIN_LOCKS 144 +#define _SC_SPORADIC_SERVER 145 +#define _SC_THREAD_CPUTIME 146 +#define _SC_THREAD_SPORADIC_SERVER 147 +#define _SC_TIMEOUTS 148 +#define _SC_TRACE 149 +#define _SC_TRACE_EVENT_FILTER 150 +#define _SC_TRACE_INHERIT 151 +#define _SC_TRACE_LOG 152 +#define _SC_TYPED_MEMORY_OBJECTS 153 +#define _SC_IPV6 154 +#define _SC_RAW_SOCKETS 155 +#define _SC_SS_REPL_MAX 156 +#define _SC_TRACE_EVENT_NAME_MAX 157 +#define _SC_TRACE_NAME_MAX 158 +#define _SC_TRACE_SYS_MAX 159 +#define _SC_TRACE_USER_EVENT_MAX 160 +#endif /* _POSIX_C_SOURCE >= 200112L */ + +#ifdef _ALL_SOURCE +#define _SC_AIX_UKEYS 161 +#endif /* _ALL_SOURCE */ + +#endif /* _POSIX_SOURCE */ + + +#if _XOPEN_SOURCE_EXTENDED==1 +#ifdef _LARGE_FILES +#define ftruncate ftruncate64 +#define truncate truncate64 +#endif + +#ifndef _H_LOCKF +#include /* lockf definitions for portability */ +#endif + +#ifdef _NO_PROTO +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern int brk(); + extern int getpagesize(); +#ifndef _MSGQSUPPORT + extern int __fd_getdtablesize(); + static int getdtablesize() + { + return __fd_getdtablesize(); + } +#else + extern int getdtablesize(); +#endif /* _MSGQSUPPORT */ + + extern void *sbrk(); +#endif /* _POSIX_C_SOURCE<200112L */ + extern int fchdir(); + extern int fchown(); + extern int ftruncate(); + extern long gethostid(); + extern int gethostname(); + extern pid_t getpgid(); + extern pid_t getsid(); + extern char *getwd(); + extern int lchown(); + extern int readlink(); + extern pid_t setpgrp(); + extern int setregid(); + extern int setreuid(); + extern int symlink(); + extern void sync(); + extern int truncate(); + extern useconds_t ualarm(); + extern int usleep(); + extern pid_t vfork(); +#else /* _NO_PROTO */ +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern int brk(void *); + extern int getpagesize(void); +#ifndef _MSGQSUPPORT + extern int __fd_getdtablesize(void); + static int getdtablesize() + { + return __fd_getdtablesize(); + } +#else + extern int getdtablesize(void); +#endif /* _MSGQSUPPORT */ +#ifdef _LINUX_SOURCE_COMPAT + extern void *sbrk(ptrdiff_t); +#elif (_XOPEN_SOURCE >= 500) || defined(__64BIT__) + extern void *sbrk(intptr_t); +#else + extern void *sbrk(int); +#endif +#endif /* _POSIX_C_SOURCE<200112L */ + extern int fchdir(int); + extern int fchown(int, uid_t, gid_t); + extern int ftruncate(int, off_t); +#ifdef _LARGE_FILE_API + extern int ftruncate64(int, off64_t); +#endif + extern int gethostname(char *, size_t); + extern long gethostid(void); + extern pid_t getpgid(pid_t); + extern pid_t getsid(pid_t); + extern char *getwd(char *); + extern int lchown(const char *, uid_t, gid_t); + +#if (defined(_SUSV3_READLINK) || \ + (!defined(_ALL_SOURCE) && (_POSIX_C_SOURCE >= 200112L))) + /* If SUSV3 readlink specifically requested or if strict SUSv3 + * environment requested */ +#ifdef __64BIT__ +static ssize_t readlink(const char *__restrict__ __path, + char *__restrict__ __buf, size_t __bufsize) +{ + extern ssize_t __readlink64(const char *__restrict__, char *__restrict__, size_t); + return __readlink64(__path, __buf, __bufsize); +} +#else + extern ssize_t readlink(const char *__restrict__, char *__restrict__, size_t); +#endif /* __64BIT__ */ +#else + extern int readlink(const char *, char *, size_t); +#endif /* _SUSV3_READLINK || !_ALL_SOURCE && _POSIX_C_SOURCE >= 200112L */ + +#ifndef _BSD + extern pid_t setpgrp(void); +#endif /* _BSD */ + extern int setregid(gid_t, gid_t); + extern int setreuid(uid_t, uid_t); + extern int symlink(const char *, const char *); + extern void sync(void); + extern int truncate(const char *, off_t); +#ifdef _LARGE_FILE_API + extern int truncate64(const char *, off64_t); +#endif + extern useconds_t ualarm(useconds_t, useconds_t); + extern int usleep(useconds_t); + extern pid_t vfork(void); +#if _XOPEN_SOURCE>=500 + extern int getlogin_r(char *, size_t); + extern int ttyname_r(int, char *, size_t); + +#ifdef _LARGE_FILES +#define pread pread64 +#define pwrite pwrite64 +#endif /* _LARGE_FILES */ + + extern ssize_t pread(int, void *, size_t, off_t); + extern ssize_t pwrite(int, const void *, size_t, off_t); +#ifdef _LARGE_FILE_API + extern ssize_t pread64(int, void *, size_t, off64_t); + extern ssize_t pwrite64(int, const void *, size_t, off64_t); +#endif /* _LARGE_FILE_API */ +#endif /* _XOPEN_SOURCE>=500 */ + +#endif /* _NO_PROTO */ + +#endif /* _XOPEN_SOURCE_EXTENDED */ + +#ifdef _ALL_SOURCE + +extern char **environ; + +#ifndef _KERNEL +#ifdef _NO_PROTO + extern pid_t f_fork(); +#else /* _NO_PROTO */ + extern pid_t f_fork(void); +#endif /* _NO_PROTO */ +#endif /* _KERNEL */ + +#ifdef _NO_PROTO + extern char * cuserid(); + extern int ioctl(); +#ifdef __64BIT__ + extern int ioctlx(); + extern int ioctl32(); + extern int ioctl32x(); +#endif /* __64BIT__ */ + extern int readx(); + extern int setgroups(); + extern int writex(); + extern int setegid(); + extern int seteuid(); + extern int setrgid(); + extern int setruid(); + extern offset_t llseek(); + extern char * getusershell(); + extern void setusershell(); + extern void endusershell(); + extern char * get_current_dir_name(); + extern int sysfs(); +#else + extern char * cuserid(char *); + extern int setegid(gid_t); + extern int seteuid(uid_t); + extern int setrgid(gid_t); + extern int setruid(uid_t); +#ifndef _BSD + extern int ioctl(int, int, ...); +#endif /* _BSD */ +#ifdef __64BIT__ + extern int ioctlx(int, int, void *, long); + extern int ioctl32(int, int, ...); + extern int ioctl32x(int, int, unsigned int, unsigned int); +#endif /* __64BIT__ */ + extern int setgroups(int, gid_t []); +#ifndef _KERNEL + extern int readx(int, char*, unsigned, long); + extern int writex(int, char*, unsigned, long); + +#ifdef _LARGE_FILES +#define fclear fclear64 +#define fsync_range fsync_range64 +#endif + extern off_t fclear(int, off_t); + extern int fsync_range(int, int, off_t, off_t); +#ifdef _LARGE_FILE_API + extern off64_t fclear64(int, off64_t); + extern int fsync_range64(int, int, off64_t, off64_t); +#endif + extern offset_t llseek(int, offset_t, int); + extern char * getusershell(void); + extern void setusershell(void); + extern void endusershell(void); + extern char * get_current_dir_name(void); + extern int sysfs(int, ...); + extern int finfo(const char *, int, void *, int32long64_t); + extern int ffinfo(int, int, void *, int32long64_t); + +#endif /* ndef _KERNEL */ + +#endif /* _NO_PROTO */ + +#define _AES_OS_VERSION 1 /* OSF, AES version */ + +#endif /* _ALL_SOURCE */ + +#ifdef __cplusplus +} +#endif + +#endif /* _H_UNISTD */ diff --git a/workloads/realworld/uvm_prefetch/knn/Makefile b/workloads/realworld/uvm_prefetch/knn/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..6ebd397ea3d2a2082eb070de41b7f1eb452dbf53 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/knn/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := knn +CUFILES := knn_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o knn + diff --git a/workloads/realworld/uvm_prefetch/knn/knn_cuda.cu b/workloads/realworld/uvm_prefetch/knn/knn_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..98e6d443c69169c4d1bd839d56fb6014bf5e0993 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/knn/knn_cuda.cu @@ -0,0 +1,601 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +//-----------------------------------------------------------------------------------------------// +// KERNELS // +//-----------------------------------------------------------------------------------------------// +__global__ void extract_with_interpolation(int nthreads, float *data, + float *n_xy_coords, + float *extracted_data, + int n_max_coord, int channels, + int height, int width) { + + int x0, x1, y0, y1, nc; + float wx0, wx1, wy0, wy1; + int n, nd; + float x, y; + + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); + index += blockDim.x * gridDim.x) { + n = (index / n_max_coord); + nd = n * n_max_coord * channels; + x = n_xy_coords[index * 2]; + y = n_xy_coords[index * 2 + 1]; + + x0 = static_cast(floor(x)); + x1 = x0 + 1; + y0 = static_cast(floor(y)); + y1 = y0 + 1; + + x0 = x0 <= 0 ? 0 : (x0 >= (width - 1) ? (width - 1) : x0); + y0 = y0 <= 0 ? 0 : (y0 >= (height - 1) ? (height - 1) : y0); + x1 = x1 <= 0 ? 0 : (x1 >= (width - 1) ? (width - 1) : x1); + y1 = y1 <= 0 ? 0 : (y1 >= (height - 1) ? (height - 1) : y1); + + wx0 = static_cast(x1) - x; + wx1 = x - x0; + wy0 = static_cast(y1) - y; + wy1 = y - y0; + + if (x0 == x1) { + wx0 = 1; + wx1 = 0; + } + if (y0 == y1) { + wy0 = 1; + wy1 = 0; + } + for (int c = 0; c < channels; c++) { + nc = (n * channels + c) * height; + // extracted_data[index * channels + c] = wy0 * wx0 * data[(nc + y0) * + // width + x0] + // extracted_data[nd + index % n_max_coord + n_max_coord * c] = index; + extracted_data[nd + index % n_max_coord + n_max_coord * c] = + wy0 * wx0 * data[(nc + y0) * width + x0] + + wy1 * wx0 * data[(nc + y1) * width + x0] + + wy0 * wx1 * data[(nc + y0) * width + x1] + + wy1 * wx1 * data[(nc + y1) * width + x1]; + } + } +} + +/** + * Computes the distance between two matrix A (reference points) and + * B (query points) containing respectively wA and wB points. + * + * @param A pointer on the matrix A + * @param wA width of the matrix A = number of points in A + * @param B pointer on the matrix B + * @param wB width of the matrix B = number of points in B + * @param dim dimension of points = height of matrices A and B + * @param AB pointer on the matrix containing the wA*wB distances computed + */ +__global__ void cuComputeDistanceGlobal(float *A, int wA, float *B, int wB, + int dim, float *AB) { + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // Declaration of the shared memory arrays As and Bs used to store the + // sub-matrix of A and B + __shared__ float shared_A[BLOCK_DIM][BLOCK_DIM]; + __shared__ float shared_B[BLOCK_DIM][BLOCK_DIM]; + + // Sub-matrix of A (begin, step, end) and Sub-matrix of B (begin, step) + __shared__ int begin_A; + __shared__ int begin_B; + __shared__ int step_A; + __shared__ int step_B; + __shared__ int end_A; + + // Thread index + int tx = threadIdx.x; + int ty = threadIdx.y; + + // Other variables + float tmp; + float ssd = 0; + + // Loop parameters + begin_A = BLOCK_DIM * blockIdx.y; + begin_B = BLOCK_DIM * blockIdx.x; + step_A = BLOCK_DIM * wA; + step_B = BLOCK_DIM * wB; + end_A = begin_A + (dim - 1) * wA; + + // if (blockIdx.x == 0 && blockIdx.y == 0 && tx == 0 && ty == 0) + // printf("begin_A is %d, end_A is %d, step_A is %d, begin_B is %d, step_B is %d\n", begin_A, end_A, step_A, begin_B, step_B); + + // Conditions + int cond0 = (begin_A + tx < wA); // used to write in shared memory + int cond1 = (begin_B + tx < wB); // used to write in shared memory & to + // computations and to write in output matrix + int cond2 = + (begin_A + ty < wA); // used to computations and to write in output matrix + // Loop over all the sub-matrices of A and B required to compute the block + // sub-matrix + for (int a = begin_A, b = begin_B; a <= end_A; a += step_A, b += step_B) { + // if (blockIdx.x == 0 && blockIdx.y == 0 && tx == 0 && ty == 0) + // printf("a is %d, end_A is %d, step_A is %d, b is %d, step_B is %d\n", begin_A, end_A, step_A, begin_B, step_B); + // Load the matrices from device memory to shared memory; each thread loads + // one element of each matrix + if (a / wA + ty < dim) { + shared_A[ty][tx] = (cond0) ? A[a + wA * ty + tx] : 0; + shared_B[ty][tx] = (cond1) ? B[b + wB * ty + tx] : 0; + } else { + shared_A[ty][tx] = 0; + shared_B[ty][tx] = 0; + } + + // Synchronize to make sure the matrices are loaded + block.sync(); + // Compute the difference between the two matrixes; each thread computes one + // element of the block sub-matrix + if (cond2 && cond1) { + for (int k = 0; k < BLOCK_DIM; ++k) { + tmp = shared_A[k][ty] - shared_B[k][tx]; + ssd += tmp * tmp; + } + } + + // Synchronize to make sure that the preceding computation is done before + // loading two new sub-matrices of A and B in the next iteration + block.sync(); + } + + // Write the block sub-matrix to device memory; each thread writes one element + if (cond2 && cond1) { + AB[(begin_A + ty) * wB + begin_B + tx] = ssd; + } +} + +/** + * Gathers k-th smallest distances for each column of the distance matrix in + * the top. + * + * @param dist distance matrix + * @param ind index matrix + * @param width width of the distance matrix and of the index matrix + * @param height height of the distance matrix and of the index matrix + * @param k number of neighbors to consider + */ +__global__ void cuInsertionSort(float *dist, int *ind, int width, int height, + int k) { + // printf("test2\n"); + // Variables + int l, i, j; + float *p_dist; + int *p_ind; + float curr_dist, max_dist; + int curr_row, max_row; + unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x; + + if (xIndex < width) { + // Pointer shift, initialization, and max value + p_dist = dist + xIndex; + p_ind = ind + xIndex; + max_dist = p_dist[0]; + p_ind[0] = 0; + + // Part 1 : sort kth firt elementZ + for (l = 1; l < k; l++) { + curr_row = l * width; + curr_dist = p_dist[curr_row]; + if (curr_dist < max_dist) { + i = l - 1; + for (int a = 0; a < l - 1; a++) { + if (p_dist[a * width] > curr_dist) { + i = a; + break; + } + } + for (j = l; j > i; j--) { + p_dist[j * width] = p_dist[(j - 1) * width]; + p_ind[j * width] = p_ind[(j - 1) * width]; + } + p_dist[i * width] = curr_dist; + p_ind[i * width] = l; + } else { + p_ind[l * width] = l; + } + max_dist = p_dist[curr_row]; + } + + // Part 2 : insert element in the k-th first lines + max_row = (k - 1) * width; + for (l = k; l < height; l++) { + curr_dist = p_dist[l * width]; + if (curr_dist < max_dist) { + i = k - 1; + for (int a = 0; a < k - 1; a++) { + if (p_dist[a * width] > curr_dist) { + i = a; + break; + } + } + for (j = k - 1; j > i; j--) { + p_dist[j * width] = p_dist[(j - 1) * width]; + p_ind[j * width] = p_ind[(j - 1) * width]; + } + p_dist[i * width] = curr_dist; + p_ind[i * width] = l; + max_dist = p_dist[max_row]; + } + } + } +} + +/** + * Computes the square root of the first line (width-th first element) + * of the distance matrix. + * + * @param dist distance matrix + * @param width width of the distance matrix + * @param k number of neighbors to consider + */ +__global__ void cuParallelSqrt(float *dist, int width, int k) { + unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int yIndex = blockIdx.y * blockDim.y + threadIdx.y; + // printf("test3\n"); + if (xIndex < width && yIndex < k) + dist[yIndex * width + xIndex] = sqrt(dist[yIndex * width + xIndex]); +} + +//-----------------------------------------------------------------------------------------------// +// K-th NEAREST NEIGHBORS // +//-----------------------------------------------------------------------------------------------// + +/** + * Prints the error message return during the memory allocation. + * + * @param error error value return by the memory allocation function + * @param memorySize size of memory tried to be allocated + */ +void printErrorMessage(cudaError_t error, int memorySize) { + printf("==================================================\n"); + printf("MEMORY ALLOCATION ERROR : %s\n", cudaGetErrorString(error)); + printf("Whished allocated memory : %d\n", memorySize); + printf("==================================================\n"); +} + +/** + * K nearest neighbor algorithm + * - Initialize CUDA + * - Allocate device memory + * - Copy point sets (reference and query points) from host to device memory + * - Compute the distances + indexes to the k nearest neighbors for each query + * point + * - Copy distances from device to host memory + * + * @param ref_host reference points ; pointer to linear matrix + * @param ref_width number of reference points ; width of the matrix + * @param query_host query points ; pointer to linear matrix + * @param query_width number of query points ; width of the matrix + * @param height dimension of points ; height of the matrices + * @param k number of neighbor to consider + * @param dist_host distances to k nearest neighbors ; pointer to linear + * matrix + * @param dist_host indexes of the k nearest neighbors ; pointer to linear + * matrix + * + */ +void knn_cuda(float *ref_host, int ref_width, float *query_host, + int query_width, int height, int k, float *dist_host, + int *ind_host) { + // Grids ans threads + dim3 g_16x16(query_width / 16, ref_width / 16, 1); + dim3 t_16x16(16, 16, 1); + if (query_width % 16 != 0) + g_16x16.x += 1; + if (ref_width % 16 != 0) + g_16x16.y += 1; + // + dim3 g_256x1(query_width / 256, 1, 1); + dim3 t_256x1(256, 1, 1); + if (query_width % 256 != 0) + g_256x1.x += 1; + + dim3 g_k_16x16(query_width / 16, k / 16, 1); + dim3 t_k_16x16(16, 16, 1); + if (query_width % 16 != 0) + g_k_16x16.x += 1; + if (k % 16 != 0) + g_k_16x16.y += 1; + + // printf("ref_width is %d, query_width is %d, height is %d\n", ref_width, query_width, height); + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStream_t stream4; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + cudaStreamCreate(&stream4); + + // knn_cuda(ref_device, ref_nb, query_device, query_nb, dim, k, dist_device, ind_device); + // memcpy(ref_device, ref, ref_nb * dim * sizeof(float)); + // memcpy(query_device, query, query_nb * dim * sizeof(float)); + // memcpy(dist, dist_device, query_nb * ref_nb * sizeof(float)); + // memcpy(ind, ind_device, query_nb * k * sizeof(int)); + + cudaMemPrefetchAsync(ref_host, ref_width * height * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(query_host, query_width * height * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(dist_host, query_width * ref_width * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + cudaMemPrefetchAsync(ind_host, query_width * k * sizeof(int), GPU_DEVICE, stream4); + cudaStreamSynchronize(stream4); + + // Kernel 1: Compute all the distances + cuComputeDistanceGlobal<<>>(ref_host, ref_width, query_host, + query_width, height, dist_host); + // Kernel 2: Sort each column + cuInsertionSort<<>>(dist_host, ind_host, query_width, + ref_width, k); + // Kernel 3: Compute square root of k first elements + cuParallelSqrt<<>>(dist_host, query_width, k); + cudaDeviceSynchronize(); +} + +float compute_distance(const float *ref, int ref_nb, const float *query, + int query_nb, int dim, int ref_index, int query_index) { + float sum = 0.f; + for (int d = 0; d < dim; ++d) { + const float diff = + ref[d * ref_nb + ref_index] - query[d * query_nb + query_index]; + sum += diff * diff; + } + return sqrtf(sum); +} + +void modified_insertion_sort(float *dist, int *index, int length, int k) { + + // Initialise the first index + index[0] = 0; + + // Go through all points + for (int i = 1; i < length; ++i) { + + // Store current distance and associated index + float curr_dist = dist[i]; + int curr_index = i; + + // Skip the current value if its index is >= k and if it's higher the k-th + // slready sorted mallest value + if (i >= k && curr_dist >= dist[k - 1]) { + continue; + } + + // Shift values (and indexes) higher that the current distance to the right + int j = min(i, k - 1); + while (j > 0 && dist[j - 1] > curr_dist) { + dist[j] = dist[j - 1]; + index[j] = index[j - 1]; + --j; + } + + // Write the current distance and index at their position + dist[j] = curr_dist; + index[j] = curr_index; + } +} + +bool knn_c(const float *ref, int ref_nb, const float *query, int query_nb, + int dim, int k, float *knn_dist, int *knn_index) { + // Allocate local array to store all the distances / indexes for a given query + // point + float *dist = (float *)malloc(ref_nb * sizeof(float)); + int *index = (int *)malloc(ref_nb * sizeof(int)); + + // Allocation checks + if (!dist || !index) { + printf("Memory allocation error\n"); + free(dist); + free(index); + return false; + } + + // Process one query point at the time + for (int i = 0; i < query_nb; ++i) { + + // Compute all distances / indexes + for (int j = 0; j < ref_nb; ++j) { + dist[j] = compute_distance(ref, ref_nb, query, query_nb, dim, j, i); + index[j] = j; + } + + // Sort distances / indexes + modified_insertion_sort(dist, index, ref_nb, k); + + // Copy k smallest distances and their associated index + for (int j = 0; j < k; ++j) { + knn_dist[j * query_nb + i] = dist[j]; + knn_index[j * query_nb + i] = index[j]; + } + } + + // Memory clean-up + free(dist); + free(index); + return true; +} + +/** + * Example of use of kNN search CUDA. + */ +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + // Variables and parameters + float *ref; // Pointer to reference point array + float *query; // Pointer to query point array + float *dist, *dist_c; // Pointer to distance array + int *ind, *ind_c; // Pointer to index array + int ref_nb = 4096; // Reference point number, max=65535 + int query_nb = 4096; // Query point number, max=65535 + int dim = 128; // Dimension of points + int k = 20; // Nearest neighbors to consider + int iterations = 100; + + if (argc >= 4) { + ref_nb = atoi(argv[1]); + query_nb = atoi(argv[2]); + dim = atoi(argv[3]); + } + + int c_iterations = 10; + int i; + const float precision = 0.001f; // distance error max + int nb_correct_precisions = 0; + int nb_correct_indexes = 0; + float *knn_dist = (float *)malloc(query_nb * k * sizeof(float)); + int *knn_index = (int *)malloc(query_nb * k * sizeof(int)); + + // Memory allocation + ref = (float *)malloc(ref_nb * dim * sizeof(float)); + query = (float *)malloc(query_nb * dim * sizeof(float)); + dist = (float *)malloc(query_nb * ref_nb * sizeof(float)); + ind = (int *)malloc(query_nb * k * sizeof(int)); + + dist_c = (float *)malloc(query_nb * k * sizeof(float)); + ind_c = (int *)malloc(query_nb * k * sizeof(float)); + + // Init + srand(time(NULL)); + for (i = 0; i < ref_nb * dim; i++) + ref[i] = (float)rand() / (float)RAND_MAX; + for (i = 0; i < query_nb * dim; i++) + query[i] = (float)rand() / (float)RAND_MAX; + + // printf("Ground truth computation in progress...\n\n"); + // if (!knn_c(ref, ref_nb, query, query_nb, dim, k, knn_dist, knn_index)) { + // free(knn_dist); + // free(knn_index); + // return EXIT_FAILURE; + // } + + // Variables for duration evaluation + float elapsed_time; + + // Display informations + printf("Number of reference points : %6d\n", ref_nb); + printf("Number of query points : %6d\n", query_nb); + printf("Dimension of points : %4d\n", dim); + printf("Number of neighbors to consider : %4d\n", k); + printf("Processing kNN search :\n"); + + float precision_accuracy = 0.0f; + float index_accuracy = 0.0f; + /* + printf("On CPU: \n"); + struct timeval tic; + gettimeofday(&tic, NULL); + for (i = 0; i < c_iterations; i++) { + knn_c(ref, ref_nb, query, query_nb, dim, k, dist_c, ind_c); + } + + for (int i = 0; i < query_nb * k; ++i) { + if (fabs(dist_c[i] - knn_dist[i]) <= precision) { + nb_correct_precisions++; + } + if (ind_c[i] == knn_index[i]) { + nb_correct_indexes++; + } + } + + struct timeval toc; + gettimeofday(&toc, NULL); + elapsed_time = toc.tv_sec - tic.tv_sec; + elapsed_time += (toc.tv_usec - tic.tv_usec) / 1000000.; + precision_accuracy = nb_correct_precisions / ((float)query_nb * k); + index_accuracy = nb_correct_indexes / ((float)query_nb * k); + printf("%f, %f\n", precision_accuracy, index_accuracy); + printf(" done in %f s for %d iterations (%f s by iteration)\n", elapsed_time, + c_iterations, elapsed_time / (c_iterations)); + */ + printf("on GPU: \n"); + + // Call kNN search CUDA + GPU_argv_init(); + + initTrace(); + startCPU(); + + float *ref_device; + float *query_device; + float *dist_device; + int *ind_device; + + cudaMallocManaged(&ref_device, ref_nb * dim * sizeof(float)); + cudaMallocManaged(&query_device, query_nb * dim * sizeof(float)); + cudaMallocManaged(&dist_device, query_nb * ref_nb * sizeof(float)); + cudaMallocManaged(&ind_device, query_nb * k * sizeof(int)); + + memcpy(ref_device, ref, ref_nb * dim * sizeof(float)); + memcpy(query_device, query, query_nb * dim * sizeof(float)); + + for (i = 0; i < iterations; i++) { + // knn_cuda(ref, ref_nb, query, query_nb, dim, k, dist, ind); + knn_cuda(ref_device, ref_nb, query_device, query_nb, dim, k, dist_device, ind_device); + } + + memcpy(dist, dist_device, query_nb * ref_nb * sizeof(float)); + memcpy(ind, ind_device, query_nb * k * sizeof(int)); + + cudaFree(ind_device); + cudaFree(dist_device); + cudaFree(query_device); + cudaFree(ref_device); + + endCPU(); + finiTrace(); + + nb_correct_precisions = 0; + nb_correct_indexes = 0; + for (int i = 0; i < query_nb * k; ++i) { + if (fabs(dist[i] - knn_dist[i]) <= precision) { + nb_correct_precisions++; + } + if (ind[i] == knn_index[i]) { + nb_correct_indexes++; + } + } + + precision_accuracy = nb_correct_precisions / ((float)query_nb * k); + index_accuracy = nb_correct_indexes / ((float)query_nb * k); + printf("%f, %f\n", precision_accuracy, index_accuracy); + + + // Destroy cuda event object and free memory + free(ind); + free(dist); + free(query); + free(ref); + free(dist_c); + free(ind_c); +} \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/knn/run.sh b/workloads/realworld/uvm_prefetch/knn/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..541db1387ce3ebe87b1338f079609b8b4a2736c6 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/knn/run.sh @@ -0,0 +1 @@ +./knn 4096 4096 128 \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/knn/run_super.sh b/workloads/realworld/uvm_prefetch/knn/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..86ad9321b470072e5e84e706e1619ee200cf2b31 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/knn/run_super.sh @@ -0,0 +1 @@ +./knn 32768 32768 128 \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/lavaMD/README b/workloads/realworld/uvm_prefetch/lavaMD/README new file mode 100755 index 0000000000000000000000000000000000000000..27b526ff669e9632b11193634307bfe778a2dfff --- /dev/null +++ b/workloads/realworld/uvm_prefetch/lavaMD/README @@ -0,0 +1,50 @@ +//======================================================================================================================================================150 +// DESCRIPTION +//======================================================================================================================================================150 + +This is the CUDA version of the code. + +The code calculates particle potential and relocation due to mutual forces between particles within a large 3D space. This space is +divided into cubes, or large boxes, that are allocated to individual cluster nodes. The large box at each node is further divided into +cubes, called boxes. 26 neighbor boxes surround each box (the home box). Home boxes at the boundaries of the particle space have fewer neighbors. +Particles only interact with those other particles that are within a cutoff radius since ones at larger distances exert negligible forces. Thus the +box size s chosen so that cutoff radius does not span beyond any neighbor box for any particle in a home box, thus limiting the reference space to +a finite number of boxes. + +This code [1] was derived from the ddcMD application [2] by rewriting the front end and structuring it for parallelization. This code represents MPI +task that runs on a single cluster node. While the details of the code are somewhat different than the original, the code retains the structure of the +MPI task in the original code. Since the rest of MPI code is not included here, the application first emulates MPI partitioning of the particle space +into boxes. Then, for every particle in the home box, the nested loop processes interactions first with other particles in the home box and then with +particles in all neighbor boxes. The processing of each particle consists of a single stage of calculation that is enclosed in the innermost loop. The +nested loops in the application were parallelized in such a way that at any point of time GPU warp/wavefront accesses adjacent memory locations. The +speedup depends on the number of boxes, particles (fixed) and the actualcal culation for each particle (fixed). The application is memory bound, and +GPU speedup seems to saturate at about 16x when compared to single-core CPU. + +More information about the parallel version of this code can be found in: +[1] L. G. Szafaryn, T. Gamblin, B. deSupinski and K. Skadron. "Experiences with Achieving Portability across Heterogeneous Architectures." Submitted to +WOLFHPC workshop at 25th International Conference on Supercomputing (ICS). Tucson, AZ. 2010. +More about the original ddcMD application can be found in: +[2] F. H. Streitz, J. N. Glosli, M. V. Patel, B. Chan, R. K. Yates, B. R. de Supinski, J. Sexton, J and A. Gunnels. "100+ TFlop Solidification Simulations +on BlueGene/L." In Proceedings of the 2005 Supercomputing Conference (SC 05). Seattle, WA. 2005. + +//======================================================================================================================================================150 +// USE +//======================================================================================================================================================150 + +The code takes the followint parameters: +-boxes1d (number of boxes in one dimension, the total number of boxes will be that^3) + +The code can be run as follows: +./lavaMD -boxes1d 10 + +******Adjustable work group size***** +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=128" + +######OUTPUT FOR VALIDATION######## +USAGE: +make clean +make OUTPUT=Y \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/lavaMD/kernel/kernel_gpu_cuda.cu b/workloads/realworld/uvm_prefetch/lavaMD/kernel/kernel_gpu_cuda.cu new file mode 100755 index 0000000000000000000000000000000000000000..15164488f40349d583134da5d2a03a1ffc854c52 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/lavaMD/kernel/kernel_gpu_cuda.cu @@ -0,0 +1,199 @@ +//----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------200 +// plasmaKernel_gpu_2 +//----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------200 + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +__global__ void kernel_gpu_cuda(par_str d_par_gpu, + dim_str d_dim_gpu, + box_str *d_box_gpu, + FOUR_VECTOR *d_rv_gpu, + fp *d_qv_gpu, + FOUR_VECTOR *d_fv_gpu, + int boxes_per_block) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + // THREAD PARAMETERS + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + int bx = blockIdx.x; // get current horizontal block index (0-n) + int tx = threadIdx.x; // get current horizontal thread index (0-n) + int wtx = tx; + + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // Extract input parameters + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + // parameters + fp a2 = 2.0 * d_par_gpu.alpha * d_par_gpu.alpha; + + // home box + int first_i; + FOUR_VECTOR *rA; + FOUR_VECTOR *fA; + __shared__ FOUR_VECTOR rA_shared[100]; + + // nei box + int pointer; + int k = 0; + int first_j; + FOUR_VECTOR *rB; + fp *qB; + int j = 0; + __shared__ FOUR_VECTOR rB_shared[100]; + __shared__ double qB_shared[100]; + + // common + fp r2; + fp u2; + fp vij; + fp fs; + fp fxij; + fp fyij; + fp fzij; + THREE_VECTOR d; + + + int box = bx * boxes_per_block; + int end_box = box + boxes_per_block; + + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + // DO FOR THE NUMBER OF BOXES + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + for (; box < end_box; box++) + { + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // Home box + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // home box - box parameters + first_i = d_box_gpu[box].offset; + + // home box - distance, force, charge and type parameters + rA = &d_rv_gpu[first_i]; + fA = &d_fv_gpu[first_i]; + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Copy to shared memory + //----------------------------------------------------------------------------------------------------------------------------------140 + + // home box - shared memory + while (wtx < NUMBER_PAR_PER_BOX) + { + rA_shared[wtx] = rA[wtx]; + wtx = wtx + NUMBER_THREADS; + } + wtx = tx; + + // synchronize threads - not needed, but just to be safe + block.sync(); + + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // nei box loop + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + // if (wtx == 0) + // printf("d_box_gpu[%d].nn is %d\n", bx, d_box_gpu[bx].nn); + + int tile = 0; + int end_tile = 1 + d_box_gpu[box].nn; + + // loop over neiing boxes of home box + for (; tile < end_tile; tile++) + { + + //----------------------------------------50 + // nei box - get pointer to the right box + //----------------------------------------50 + + if (tile == 0) + { + pointer = box; // set first box to be processed to home box + } + else + { + pointer = d_box_gpu[box].nei[tile - 1].number; // remaining boxes are nei boxes + } + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // nei box - box parameters + first_j = d_box_gpu[pointer].offset; + + // nei box - distance, (force), charge and (type) parameters + rB = &d_rv_gpu[first_j]; + qB = &d_qv_gpu[first_j]; + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // nei box - shared memory + while (wtx < NUMBER_PAR_PER_BOX) + { + rB_shared[wtx] = rB[wtx]; + qB_shared[wtx] = qB[wtx]; + wtx = wtx + NUMBER_THREADS; + } + wtx = tx; + + // synchronize threads because in next section each thread accesses data brought in by different threads here + block.sync(); + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Calculation + //----------------------------------------------------------------------------------------------------------------------------------140 + + // loop for the number of particles in the home box + // for (int i=0; i +#include "../../../../common/cupti_add.h" +#include "../../../../common/cpu_timestamps.h" + +void +kernel_gpu_cuda_wrapper(par_str par_cpu, + dim_str dim_cpu, + box_str* box_cpu, + FOUR_VECTOR* rv_cpu, + fp* qv_cpu, + FOUR_VECTOR* fv_cpu, + int nblocks) +{ + + //======================================================================================================================================================150 + // CPU VARIABLES + //======================================================================================================================================================150 + + // timer + long long time0; + long long time1; + long long time2; + long long time3; + long long time4; + long long time5; + long long time6; + + time0 = get_time(); + + //======================================================================================================================================================150 + // GPU SETUP + //======================================================================================================================================================150 + + //====================================================================================================100 + // INITIAL DRIVER OVERHEAD + //====================================================================================================100 + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaThreadSynchronize(); + + //====================================================================================================100 + // VARIABLES + //====================================================================================================100 + + box_str* d_box_gpu; + FOUR_VECTOR* d_rv_gpu; + fp* d_qv_gpu; + FOUR_VECTOR* d_fv_gpu; + + dim3 threads; + dim3 blocks; + + //====================================================================================================100 + // EXECUTION PARAMETERS + //====================================================================================================100 + + // blocks.x = dim_cpu.number_boxes; + blocks.x = nblocks * nblocks * nblocks; + blocks.y = 1; + threads.x = NUMBER_THREADS; // define the number of threads in the block + threads.y = 1; + + int boxes_per_block = 1; + if (dim_cpu.number_boxes >= blocks.x) + { + boxes_per_block = (dim_cpu.number_boxes + blocks.x - 1) / blocks.x; + } + + time1 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY (MALLOC) + //======================================================================================================================================================150 + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY IN + //====================================================================================================100 + + //==================================================50 + // boxes + //==================================================50 + + cudaMallocManaged( (void **)&d_box_gpu, + dim_cpu.box_mem); + + //==================================================50 + // rv + //==================================================50 + + cudaMallocManaged((void **)&d_rv_gpu, + dim_cpu.space_mem); + + //==================================================50 + // qv + //==================================================50 + + cudaMallocManaged((void **)&d_qv_gpu, + dim_cpu.space_mem2); + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY + //====================================================================================================100 + + //==================================================50 + // fv + //==================================================50 + + cudaMallocManaged((void **)&d_fv_gpu, + dim_cpu.space_mem); + + time2 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY COPY + //======================================================================================================================================================150 + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY IN + //====================================================================================================100 + + //==================================================50 + // boxes + //==================================================50 + + memcpy(d_box_gpu, + box_cpu, + dim_cpu.box_mem); + + //==================================================50 + // rv + //==================================================50 + + memcpy(d_rv_gpu, + rv_cpu, + dim_cpu.space_mem); + + //==================================================50 + // qv + //==================================================50 + + memcpy(d_qv_gpu, + qv_cpu, + dim_cpu.space_mem2); + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY + //====================================================================================================100 + + //==================================================50 + // fv + //==================================================50 + + memcpy(d_fv_gpu, + fv_cpu, + dim_cpu.space_mem); + + time3 = get_time(); + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStream_t stream4; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + cudaStreamCreate(&stream4); + + cudaMemPrefetchAsync(d_box_gpu, dim_cpu.box_mem, GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(d_rv_gpu, dim_cpu.space_mem, GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(d_qv_gpu, dim_cpu.space_mem2, GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + cudaMemPrefetchAsync(d_fv_gpu, dim_cpu.space_mem, GPU_DEVICE, stream4); + cudaStreamSynchronize(stream4); + + //======================================================================================================================================================150 + // KERNEL + //======================================================================================================================================================150 + // launch kernel - all boxes + kernel_gpu_cuda<<>>(par_cpu, + dim_cpu, + d_box_gpu, + d_rv_gpu, + d_qv_gpu, + d_fv_gpu, + boxes_per_block); + + checkCUDAError("Start"); + cudaDeviceSynchronize(); + + time4 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY COPY (CONTD.) + //======================================================================================================================================================150 + + memcpy(fv_cpu, + d_fv_gpu, + dim_cpu.space_mem); + + time5 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY DEALLOCATION + //======================================================================================================================================================150 + + cudaFree(d_rv_gpu); + cudaFree(d_qv_gpu); + cudaFree(d_fv_gpu); + cudaFree(d_box_gpu); + + endCPU(); + finiTrace(); + + time6 = get_time(); + + //======================================================================================================================================================150 + // DISPLAY TIMING + //======================================================================================================================================================150 + + printf("Time spent in different stages of GPU_CUDA KERNEL:\n"); + + printf("%15.12f s, %15.12f % : GPU: SET DEVICE / DRIVER INIT\n", (float)(time1 - time0) / 1000000, (float)(time1 - time0) / (float)(time6 - time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: ALO\n", (float)(time2 - time1) / 1000000, (float)(time2 - time1) / (float)(time6 - time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: COPY IN\n", (float)(time3 - time2) / 1000000, (float)(time3 - time2) / (float)(time6 - time0) * 100); + + printf("%15.12f s, %15.12f % : GPU: KERNEL\n", (float)(time4 - time3) / 1000000, (float)(time4 - time3) / (float)(time6 - time0) * 100); + + printf("%15.12f s, %15.12f % : GPU MEM: COPY OUT\n", (float)(time5 - time4) / 1000000, (float)(time5 - time4) / (float)(time6 - time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: FRE\n", (float)(time6 - time5) / 1000000, (float)(time6 - time5) / (float)(time6 - time0) * 100); + + printf("Total time:\n"); + printf("%.12f s\n", (float)(time6 - time0) / 1000000); +} diff --git a/workloads/realworld/uvm_prefetch/lavaMD/kernel/kernel_gpu_cuda_wrapper.h b/workloads/realworld/uvm_prefetch/lavaMD/kernel/kernel_gpu_cuda_wrapper.h new file mode 100755 index 0000000000000000000000000000000000000000..cf499f1480469569c649eccf174cc8ba0655ddbd --- /dev/null +++ b/workloads/realworld/uvm_prefetch/lavaMD/kernel/kernel_gpu_cuda_wrapper.h @@ -0,0 +1,19 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//========================================================================================================================================================================================================200 +// KERNEL_GPU_CUDA_WRAPPER HEADER +//========================================================================================================================================================================================================200 + +void kernel_gpu_cuda_wrapper( par_str parms_cpu, + dim_str dim_cpu, + box_str* box_cpu, + FOUR_VECTOR* rv_cpu, + fp* qv_cpu, + FOUR_VECTOR* fv_cpu, + int nblocks); + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/uvm_prefetch/lavaMD/main.c b/workloads/realworld/uvm_prefetch/lavaMD/main.c new file mode 100755 index 0000000000000000000000000000000000000000..a7c88472e3939414bbdc314e2bfb1c46bc345bea --- /dev/null +++ b/workloads/realworld/uvm_prefetch/lavaMD/main.c @@ -0,0 +1,318 @@ +//========================================================================================================================================================================================================200 +//======================================================================================================================================================150 +//====================================================================================================100 +//==================================================50 + +//========================================================================================================================================================================================================200 +// UPDATE +//========================================================================================================================================================================================================200 + +// 14 APR 2011 Lukasz G. Szafaryn + +//========================================================================================================================================================================================================200 +// DEFINE/INCLUDE +//========================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// LIBRARIES +//======================================================================================================================================================150 + +#include // (in path known to compiler) needed by printf +#include // (in path known to compiler) needed by malloc +#include // (in path known to compiler) needed by true/false + +//======================================================================================================================================================150 +// UTILITIES +//======================================================================================================================================================150 + +#include "./util/timer/timer.h" // (in path specified here) +#include "./util/num/num.h" // (in path specified here) + +//======================================================================================================================================================150 +// MAIN FUNCTION HEADER +//======================================================================================================================================================150 + +#include "./main.h" // (in the current directory) + +//======================================================================================================================================================150 +// KERNEL +//======================================================================================================================================================150 + +#include "./kernel/kernel_gpu_cuda_wrapper.h" // (in library path specified here) + +//========================================================================================================================================================================================================200 +// MAIN FUNCTION +//========================================================================================================================================================================================================200 +#define _POSIX_C_SOURCE 200809L +#include +#include +#include +extern inline __attribute__((always_inline)) unsigned long rdtsc() +{ + unsigned long a, d; + + __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); + + return (a | (d << 32)); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsp() { + struct timespec tms; + if (clock_gettime(CLOCK_REALTIME, &tms)) { + return -1; + } + unsigned long ns = tms.tv_sec * 1000000000; + ns += tms.tv_nsec; + return ns; +} + + +int +main( int argc, + char *argv []) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + + printf("thread block size of kernel = %d \n", NUMBER_THREADS); + //======================================================================================================================================================150 + // CPU/MCPU VARIABLES + //======================================================================================================================================================150 + + // timer + long long time0; + + time0 = get_time(); + + // timer + long long time1; + long long time2; + long long time3; + long long time4; + long long time5; + long long time6; + long long time7; + + // counters + int i, j, k, l, m, n; + + // system memory + par_str par_cpu; + dim_str dim_cpu; + box_str* box_cpu; + FOUR_VECTOR* rv_cpu; + fp* qv_cpu; + FOUR_VECTOR* fv_cpu; + int nh; + + time1 = get_time(); + + //======================================================================================================================================================150 + // CHECK INPUT ARGUMENTS + //======================================================================================================================================================150 + + // assing default values + dim_cpu.boxes1d_arg = 1; + + // go through arguments + dim_cpu.boxes1d_arg = atoi(argv[1]); + int nblocks = atoi(argv[2]); + + // Print configuration + printf("Configuration used: boxes1d = %d\n", dim_cpu.boxes1d_arg); + + time2 = get_time(); + + //======================================================================================================================================================150 + // INPUTS + //======================================================================================================================================================150 + + par_cpu.alpha = 0.5; + + time3 = get_time(); + + //======================================================================================================================================================150 + // DIMENSIONS + //======================================================================================================================================================150 + + // total number of boxes + dim_cpu.number_boxes = dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg; + + // how many particles space has in each direction + dim_cpu.space_elem = dim_cpu.number_boxes * NUMBER_PAR_PER_BOX; + dim_cpu.space_mem = dim_cpu.space_elem * sizeof(FOUR_VECTOR); + dim_cpu.space_mem2 = dim_cpu.space_elem * sizeof(fp); + + // box array + dim_cpu.box_mem = dim_cpu.number_boxes * sizeof(box_str); + + time4 = get_time(); + + //======================================================================================================================================================150 + // SYSTEM MEMORY + //======================================================================================================================================================150 + + //====================================================================================================100 + // BOX + //====================================================================================================100 + + // allocate boxes + box_cpu = (box_str*)malloc(dim_cpu.box_mem); + + // initialize number of home boxes + nh = 0; + + // home boxes in z direction + for(i=0; i=0 && (j+m)>=0 && (k+n)>=0)==true && ((i+l) 1) { + + // variables + int max_multiprocessors; + int max_device; + cudaDeviceProp properties; + + // initialize variables + max_multiprocessors = 0; + max_device = 0; + + for (device = 0; device < num_devices; device++) { + cudaGetDeviceProperties(&properties, device); + if (max_multiprocessors < properties.multiProcessorCount) { + max_multiprocessors = properties.multiProcessorCount; + max_device = device; + } + } + cudaSetDevice(max_device); + } + +} + +//====================================================================================================100 +// GET LAST ERROR +//====================================================================================================100 + +void checkCUDAError(const char *msg) +{ + cudaError_t err = cudaGetLastError(); + if( cudaSuccess != err) { + // fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + printf("Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + fflush(NULL); + exit(EXIT_FAILURE); + } +} + +//===============================================================================================================================================================================================================200 +// END SET_DEVICE CODE +//===============================================================================================================================================================================================================200 diff --git a/workloads/realworld/uvm_prefetch/lavaMD/util/device/device.h b/workloads/realworld/uvm_prefetch/lavaMD/util/device/device.h new file mode 100755 index 0000000000000000000000000000000000000000..23bb31d26c1bc0e607c9b2faf7bddaa5a5c06d98 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/lavaMD/util/device/device.h @@ -0,0 +1,29 @@ +//===============================================================================================================================================================================================================200 +// SET_DEVICE HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// INCLUDE/DEFINE +//======================================================================================================================================================150 + +#include // (in library path known to compiler) needed by printf + +//======================================================================================================================================================150 +// FUNCTION PROTOTYPES +//======================================================================================================================================================150 + +//====================================================================================================100 +// SET DEVICE +//====================================================================================================100 + +void setdevice(void); + +//====================================================================================================100 +// GET LAST ERROR +//====================================================================================================100 + +void checkCUDAError(const char *msg); + +//===============================================================================================================================================================================================================200 +// END SET_DEVICE HEADER +//===============================================================================================================================================================================================================200 diff --git a/workloads/realworld/uvm_prefetch/lavaMD/util/num/num.c b/workloads/realworld/uvm_prefetch/lavaMD/util/num/num.c new file mode 100755 index 0000000000000000000000000000000000000000..980ff7498832c784eab718a8b886e82891047599 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/lavaMD/util/num/num.c @@ -0,0 +1,53 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// DESCRIPTION +//===============================================================================================================================================================================================================200 + +// Returns: 0 if string does not represent integer +// 1 if string represents integer + +//===============================================================================================================================================================================================================200 +// NUM CODE +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// ISINTEGER FUNCTION +//======================================================================================================================================================150 + +int isInteger(char *str){ + + //====================================================================================================100 + // make sure it's not empty + //====================================================================================================100 + + if (*str == '\0'){ + return 0; + } + + //====================================================================================================100 + // if any digit is not a number, return false + //====================================================================================================100 + + for(; *str != '\0'; str++){ + if (*str < 48 || *str > 57){ // digit characters (need to include . if checking for float) + return 0; + } + } + + //====================================================================================================100 + // it got past all my checks so I think it's a number + //====================================================================================================100 + + return 1; +} + +//===============================================================================================================================================================================================================200 +// END NUM CODE +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/uvm_prefetch/lavaMD/util/num/num.h b/workloads/realworld/uvm_prefetch/lavaMD/util/num/num.h new file mode 100755 index 0000000000000000000000000000000000000000..27a5e42fe2819d9ecc2f569b5979fb451985976f --- /dev/null +++ b/workloads/realworld/uvm_prefetch/lavaMD/util/num/num.h @@ -0,0 +1,21 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// FILE HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// ISINTEGER FUNCTION PROTOTYPE +//======================================================================================================================================================150 + +int isInteger(char *str); + +//===============================================================================================================================================================================================================200 +// END FILE HEADER +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/uvm_prefetch/lavaMD/util/timer/timer.c b/workloads/realworld/uvm_prefetch/lavaMD/util/timer/timer.c new file mode 100755 index 0000000000000000000000000000000000000000..c7cc252b4e67b3a868722b7b2c58f5b863ae0cfc --- /dev/null +++ b/workloads/realworld/uvm_prefetch/lavaMD/util/timer/timer.c @@ -0,0 +1,36 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// TIMER CODE +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// INCLUDE/DEFINE +//======================================================================================================================================================150 + +#include + +//======================================================================================================================================================150 +// FUNCTIONS +//======================================================================================================================================================150 + +//====================================================================================================100 +// DISPLAY TIME +//====================================================================================================100 + + // Returns the current system time in microseconds +long long get_time() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000000) + tv.tv_usec; +} + +//===============================================================================================================================================================================================================200 +// END TIMER CODE +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/uvm_prefetch/lavaMD/util/timer/timer.h b/workloads/realworld/uvm_prefetch/lavaMD/util/timer/timer.h new file mode 100755 index 0000000000000000000000000000000000000000..1744df4b8607f95c057ac4db6e9ced5ff84c4ab7 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/lavaMD/util/timer/timer.h @@ -0,0 +1,21 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// TIMER HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// FUNCTION PROTOTYPES +//======================================================================================================================================================150 + +long long get_time(); + +//===============================================================================================================================================================================================================200 +// END TIMER HEADER +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/uvm_prefetch/lud/Makefile b/workloads/realworld/uvm_prefetch/lud/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1dfc37ac7fa0db46535d0583970dba1cb5cfb80e --- /dev/null +++ b/workloads/realworld/uvm_prefetch/lud/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := lud +CUFILES := lud_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o lud + diff --git a/workloads/realworld/uvm_prefetch/lud/lud_cuda.cu b/workloads/realworld/uvm_prefetch/lud/lud_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..48398c2678207053a78fc07f7965be41aee83450 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/lud/lud_cuda.cu @@ -0,0 +1,293 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK 256 + +#ifndef SIZE +#define SIZE 4096 +#endif + +__global__ void add(float *a, float *b, float *c) +{ + int tid = blockIdx.x; // Handle the data at the index + + c[tid] = a[tid] + b[tid]; +} + +__global__ void scale(float *a, int size, int index) +{ + int i; + int start = (index * size + index); + int end = (index * size + size); + + for (i = start + 1; i < end; i++) + { + a[i] = (a[i] / a[start]); + } +} + +__global__ void reduce(float *a, int size, int index, int b_size) +{ + extern __shared__ float pivot[SIZE]; + int i; + + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = b_size; + + int pivot_start = (index * size + index); + int pivot_end = (index * size + size); + + int start; + int end; + int pivot_row; + int my_row; + + if (tid == 0) + { + for (i = index; i < size; i++) + pivot[i] = a[(index * size) + i]; + } + + __syncthreads(); + + pivot_row = (index * size); + my_row = (((block_size * bid) + tid) * size); + start = my_row + index; + end = my_row + size; + + if (my_row > pivot_row) + { + for (i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(i - my_row)]); + } + } +} + +void initCPU(float *a, int N) +{ + srand((unsigned)2); + // fill the arrays 'a' on the CPU + for (int i = 0; i < (N * N); i++) + { + a[i] = ((rand() % 10) + 1); + // a[i] = 1.0f; + } +} + +void initGPU(float *a_dev, float *a, int N) +{ + for (int i = 0; i < (N * N); i++) + { + a_dev[i] = a[i]; + // a_dev[i] = 1.0f; + } +} + +__global__ void lud_kernel(float *a, int N) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // extern __shared__ float pivot[]; + __shared__ float pivot[SIZE]; + + for (int tile = 0; tile < N; tile += 1) { + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = blockDim.x; + + if (tid == 0 && bid == 0) { + int start = (tile * N + tile); + int end = (tile * N + N); + + for (int i = start + 1; i < end; i++) + a[i] = (a[i] / a[start]); + } + block.sync(); + + if (tid == 0) + { + for (int i = tile; i < N; i++) + pivot[i] = a[(tile * N) + i]; + } + block.sync(); + + int pivot_row = (tile * N); + int my_row = (((block_size * bid) + tid) * N); + int start = my_row + tile; + int end = my_row + N; + + if (my_row > pivot_row) + { + for (int i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(i - my_row)]); + } + } + block.sync(); + } +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + float *a; + float *a_gpu; + float *c; + float error; + int N; + int flag = 0; + + float **result; + float **a_ref; + int blocks; + + int i; + int j; + int k; + float l1; + float u1; + + N = SIZE; + // allocate memory on CPU + a = (float *)malloc(sizeof(float) * N * N); + c = (float *)malloc(sizeof(float) * N * N); + + result = (float **)malloc(sizeof(float *) * N); + a_ref = (float **)malloc(sizeof(float *) * N); + + for (i = 0; i < N; i++) + { + result[i] = (float *)malloc(sizeof(float) * N); + a_ref[i] = (float *)malloc(sizeof(float) * N); + } + initCPU(a, N); + + GPU_argv_init(); + initTrace(); + startCPU(); + // allocate the memory on the GPU + // cudaMalloc((void **)&dev_a, N * N * sizeof(float)); + + cudaMallocManaged(&a_gpu, N * N * sizeof(float)); + memcpy(a_gpu, a, N * N * sizeof(float)); + + // cudaMemcpy(dev_a, a, N * N * sizeof(float), cudaMemcpyHostToDevice); // copy array to device memory + + /*Perform LU Decomposition*/ + // for (i = 0; i < N; i++) + // { + // scale<<<1, 1>>>(a_gpu, N, i); + // // blocks= ((N-i-1)/512)+1; + // blocks = ((N / DIM_THREAD_BLOCK)); + // // printf("Number of blocks rxd : %d \n",blocks); + // reduce<<>>(a_gpu, N, i, DIM_THREAD_BLOCK); + // cudaDeviceSynchronize(); + // } + + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(a_gpu, N * N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + blocks = ((N / DIM_THREAD_BLOCK)); + lud_kernel<<>>(a_gpu, N); + cudaDeviceSynchronize(); + /*LU decomposition ends here*/ + + // cudaMemcpy(c, dev_a, N * N * sizeof(float), cudaMemcpyDeviceToHost); // copy array back to host + memcpy(c, a_gpu, N * N * sizeof(float)); + // free the memory allocated on the GPU + cudaFree(a_gpu); + + endCPU(); + finiTrace(); + + /*copy the result matrix into explicit 2D matrix for verification*/ + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + // c[i * N + j] = a_gpu[i * N + j]; + result[i][j] = c[i * N + j]; + // printf("result %d %d Error is %lf \n ", i, j, result[i][j]); + } + } + + printf("======================================================="); + printf("\n Performing inplace verification \n"); + + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + a_ref[i][j] = 0; + for (k = 0; k < N; k++) + { + if (i >= k) + l1 = result[i][k]; + else + l1 = 0; + + if (k == j) + u1 = 1; + else if (k < j) + u1 = result[k][j]; // figured it out + else + u1 = 0.0; + + a_ref[i][j] = a_ref[i][j] + (l1 * u1); + } + } + } + + // for (i = 0; i < N; i++) + // { + // for (j = 0; j < N; j++) + // { + // error = abs(a[(i * N + j)] - a_ref[i][j]); + // if (error > 1) + // { + // // printf("No match occured at %d %d Error is %lf \n ", i, j, abs(a[(i * N + j)] - a_ref[i][j])); + // // printf("No match occured at %d %d Error is %lf, %lf \n ", i, j, a[(i * N + j)], a_ref[i][j]); + // flag = flag + 1; + // } + // } + // } + + // if (flag == 0) + // printf("Match \n"); + // else + // printf("No Matchs %d \n", flag); + + + + return 0; +} diff --git a/workloads/realworld/uvm_prefetch/lud/run.sh b/workloads/realworld/uvm_prefetch/lud/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..ea7db937489e328f5e923d2b18774e4256eef123 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/lud/run.sh @@ -0,0 +1 @@ +./lud 1024 diff --git a/workloads/realworld/uvm_prefetch/lud/run_super.sh b/workloads/realworld/uvm_prefetch/lud/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2791fe07c43d75b40894206ba79ed441f207ee26 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/lud/run_super.sh @@ -0,0 +1 @@ +./lud 4096 diff --git a/workloads/realworld/uvm_prefetch/lud_perf/Makefile b/workloads/realworld/uvm_prefetch/lud_perf/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1dfc37ac7fa0db46535d0583970dba1cb5cfb80e --- /dev/null +++ b/workloads/realworld/uvm_prefetch/lud_perf/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := lud +CUFILES := lud_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o lud + diff --git a/workloads/realworld/uvm_prefetch/lud_perf/lud b/workloads/realworld/uvm_prefetch/lud_perf/lud new file mode 100755 index 0000000000000000000000000000000000000000..a7ac6150ed8ac3f086c43d5adf1d08dc2a7fd135 Binary files /dev/null and b/workloads/realworld/uvm_prefetch/lud_perf/lud differ diff --git a/workloads/realworld/uvm_prefetch/lud_perf/lud_cuda.cu b/workloads/realworld/uvm_prefetch/lud_perf/lud_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..f7ece3c1f9f0f95db0467df5f616193272b4357e --- /dev/null +++ b/workloads/realworld/uvm_prefetch/lud_perf/lud_cuda.cu @@ -0,0 +1,293 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK 256 + +#ifndef SIZE +#define SIZE 4096 +#endif + +__global__ void add(float *a, float *b, float *c) +{ + int tid = blockIdx.x; // Handle the data at the index + + c[tid] = a[tid] + b[tid]; +} + +__global__ void scale(float *a, int size, int index) +{ + int i; + int start = (index * size + index); + int end = (index * size + size); + + for (i = start + 1; i < end; i++) + { + a[i] = (a[i] / a[start]); + } +} + +__global__ void reduce(float *a, int size, int index, int b_size) +{ + extern __shared__ float pivot[SIZE]; + int i; + + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = b_size; + + int pivot_start = (index * size + index); + int pivot_end = (index * size + size); + + int start; + int end; + int pivot_row; + int my_row; + + if (tid == 0) + { + for (i = index; i < size; i++) + pivot[i] = a[(index * size) + i]; + } + + __syncthreads(); + + pivot_row = (index * size); + my_row = (((block_size * bid) + tid) * size); + start = my_row + index; + end = my_row + size; + + if (my_row > pivot_row) + { + for (i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(i - my_row)]); + } + } +} + +void initCPU(float *a, int N) +{ + srand((unsigned)2); + // fill the arrays 'a' on the CPU + for (int i = 0; i < (N * N); i++) + { + a[i] = ((rand() % 10) + 1); + // a[i] = 1.0f; + } +} + +void initGPU(float *a_dev, float *a, int N) +{ + for (int i = 0; i < (N * N); i++) + { + a_dev[i] = a[i]; + // a_dev[i] = 1.0f; + } +} + +__global__ void lud_kernel(float *a, int N) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // extern __shared__ float pivot[]; + __shared__ float pivot[SIZE]; + + for (int tile = 0; tile < N; tile += 1) { + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = blockDim.x; + + if (tid == 0 && bid == 0) { + int start = (tile * N + tile); + int end = (tile * N + N); + + for (int i = start + 1; i < end; i++) + a[i] = (a[i] / a[start]); + } + block.sync(); + + if (tid == 0) + { + for (int i = tile; i < N; i++) + pivot[i] = a[(tile * N) + i]; + } + block.sync(); + + int pivot_row = (tile * N); + int my_row = (((block_size * bid) + tid) * N); + int start = my_row + tile; + int end = my_row + N; + + if (my_row > pivot_row) + { + for (int i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(i - my_row)]); + } + } + block.sync(); + } +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + float *a; + float *a_gpu; + float *c; + float error; + int N; + int flag = 0; + + float **result; + float **a_ref; + int blocks; + + int i; + int j; + int k; + float l1; + float u1; + + N = SIZE; + // allocate memory on CPU + a = (float *)malloc(sizeof(float) * N * N); + c = (float *)malloc(sizeof(float) * N * N); + + result = (float **)malloc(sizeof(float *) * N); + a_ref = (float **)malloc(sizeof(float *) * N); + + for (i = 0; i < N; i++) + { + result[i] = (float *)malloc(sizeof(float) * N); + a_ref[i] = (float *)malloc(sizeof(float) * N); + } + initCPU(a, N); + + GPU_argv_init(); + // initTrace(); + startCPU(); + // allocate the memory on the GPU + // cudaMalloc((void **)&dev_a, N * N * sizeof(float)); + + cudaMallocManaged(&a_gpu, N * N * sizeof(float)); + memcpy(a_gpu, a, N * N * sizeof(float)); + + // cudaMemcpy(dev_a, a, N * N * sizeof(float), cudaMemcpyHostToDevice); // copy array to device memory + + /*Perform LU Decomposition*/ + // for (i = 0; i < N; i++) + // { + // scale<<<1, 1>>>(a_gpu, N, i); + // // blocks= ((N-i-1)/512)+1; + // blocks = ((N / DIM_THREAD_BLOCK)); + // // printf("Number of blocks rxd : %d \n",blocks); + // reduce<<>>(a_gpu, N, i, DIM_THREAD_BLOCK); + // cudaDeviceSynchronize(); + // } + + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(a_gpu, N * N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + blocks = ((N / DIM_THREAD_BLOCK)); + lud_kernel<<>>(a_gpu, N); + cudaDeviceSynchronize(); + /*LU decomposition ends here*/ + + // cudaMemcpy(c, dev_a, N * N * sizeof(float), cudaMemcpyDeviceToHost); // copy array back to host + memcpy(c, a_gpu, N * N * sizeof(float)); + // free the memory allocated on the GPU + cudaFree(a_gpu); + + endCPU(); + // finiTrace(); + + /*copy the result matrix into explicit 2D matrix for verification*/ + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + // c[i * N + j] = a_gpu[i * N + j]; + result[i][j] = c[i * N + j]; + // printf("result %d %d Error is %lf \n ", i, j, result[i][j]); + } + } + + printf("======================================================="); + printf("\n Performing inplace verification \n"); + + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + a_ref[i][j] = 0; + for (k = 0; k < N; k++) + { + if (i >= k) + l1 = result[i][k]; + else + l1 = 0; + + if (k == j) + u1 = 1; + else if (k < j) + u1 = result[k][j]; // figured it out + else + u1 = 0.0; + + a_ref[i][j] = a_ref[i][j] + (l1 * u1); + } + } + } + + // for (i = 0; i < N; i++) + // { + // for (j = 0; j < N; j++) + // { + // error = abs(a[(i * N + j)] - a_ref[i][j]); + // if (error > 1) + // { + // // printf("No match occured at %d %d Error is %lf \n ", i, j, abs(a[(i * N + j)] - a_ref[i][j])); + // // printf("No match occured at %d %d Error is %lf, %lf \n ", i, j, a[(i * N + j)], a_ref[i][j]); + // flag = flag + 1; + // } + // } + // } + + // if (flag == 0) + // printf("Match \n"); + // else + // printf("No Matchs %d \n", flag); + + + + return 0; +} diff --git a/workloads/realworld/uvm_prefetch/lud_perf/run.sh b/workloads/realworld/uvm_prefetch/lud_perf/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..ea7db937489e328f5e923d2b18774e4256eef123 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/lud_perf/run.sh @@ -0,0 +1 @@ +./lud 1024 diff --git a/workloads/realworld/uvm_prefetch/lud_perf/run_super.sh b/workloads/realworld/uvm_prefetch/lud_perf/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2791fe07c43d75b40894206ba79ed441f207ee26 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/lud_perf/run_super.sh @@ -0,0 +1 @@ +./lud 4096 diff --git a/workloads/realworld/uvm_prefetch/nw/Makefile b/workloads/realworld/uvm_prefetch/nw/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..b33ae6462826357f9665bfd7fc9929ed176f9b35 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/nw/Makefile @@ -0,0 +1,15 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include + +SRC = needle.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = needle + +release: $(SRC) + $(CC) ${KERNEL_DIM} $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt diff --git a/workloads/realworld/uvm_prefetch/nw/Makefile_nvidia b/workloads/realworld/uvm_prefetch/nw/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..2fd0b98d07beea56ae69a96a0c8cb3af87d602f6 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/nw/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := needle +# CUDA source files (compiled with cudacc) +CUFILES := needle.cu +# CUDA dependency files +CU_DEPS := needle_kernel.cu +# C/C++ source files (compiled with gcc / c++) +# CCFILES := BlackScholes_gold.cpp + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/uvm_prefetch/nw/README b/workloads/realworld/uvm_prefetch/nw/README new file mode 100755 index 0000000000000000000000000000000000000000..683cbd53db81f0ece4f926fa01316582ea0d5fc9 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/nw/README @@ -0,0 +1,12 @@ +Note: This program generate two sequences randomly. Please specify your own sequences for different uses. + At the current stage, the program only supports two sequences with the same lengh, which can be divided by 16. +Usage: needle 32 10 + 32 //the length of both sequences + 10 //penalty value + +******Adjustable work group size***** +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" diff --git a/workloads/realworld/uvm_prefetch/nw/needle.cu b/workloads/realworld/uvm_prefetch/nw/needle.cu new file mode 100755 index 0000000000000000000000000000000000000000..6ca18de92670def6bc2bac66dacbe2f3b8d99db5 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/nw/needle.cu @@ -0,0 +1,293 @@ +#define LIMIT -999 +#include +#include +#include +#include +#include "needle.h" +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +// includes, kernels +#include "needle_kernel.cu" + +#ifdef TIMING +#include "timing.h" + +struct timeval tv; +struct timeval tv_total_start, tv_total_end; +struct timeval tv_h2d_start, tv_h2d_end; +struct timeval tv_d2h_start, tv_d2h_end; +struct timeval tv_kernel_start, tv_kernel_end; +struct timeval tv_mem_alloc_start, tv_mem_alloc_end; +struct timeval tv_close_start, tv_close_end; +float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, + d2h_time = 0, close_time = 0, total_time = 0; +#endif + +//////////////////////////////////////////////////////////////////////////////// +// declaration, forward +void runTest( int argc, char** argv); + + +int blosum62[24][24] = { +{ 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0, -4}, +{-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1, -4}, +{-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1, -4}, +{-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1, -4}, +{ 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4}, +{-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1, -4}, +{-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4}, +{ 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -4}, +{-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1, -4}, +{-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1, -4}, +{-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1, -4}, +{-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1, -4}, +{-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1, -4}, +{-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1, -4}, +{-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2, -4}, +{ 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0, -4}, +{ 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0, -4}, +{-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2, -4}, +{-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1, -4}, +{ 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1, -4}, +{-2, -1, 3, 4, -3, 0, 1, -1, 0, -3, -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1, -4}, +{-1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4}, +{ 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1, -4}, +{-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1} +}; + +double gettime() { + struct timeval t; + gettimeofday(&t,NULL); + return t.tv_sec+t.tv_usec*1e-6; +} + +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int +main( int argc, char** argv) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + + printf("WG size of kernel = %d \n", BLOCK_SIZE); + + runTest( argc, argv); + + return EXIT_SUCCESS; +} + +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - x and y dimensions\n"); + fprintf(stderr, "\t - penalty(positive integer)\n"); + exit(1); +} + +void runTest( int argc, char** argv) +{ + int max_rows, max_cols, penalty, nblocks; + int *input_itemsets, *output_itemsets, *referrence; + int *matrix_cuda, *referrence_cuda; + int size; + + + // the lengths of the two sequences should be able to divided by 16. + // And at current stage max_rows needs to equal max_cols + if (argc == 4) + { + max_rows = atoi(argv[1]); + max_cols = atoi(argv[1]); + penalty = atoi(argv[2]); + nblocks = atoi(argv[3]); + } + else{ + usage(argc, argv); + } + + if(atoi(argv[1])%16!=0){ + fprintf(stderr,"The dimension values must be a multiple of 16\n"); + exit(1); + } + + + max_rows = max_rows + 1; + max_cols = max_cols + 1; + referrence = (int *)malloc( max_rows * max_cols * sizeof(int) ); + input_itemsets = (int *)malloc( max_rows * max_cols * sizeof(int) ); + output_itemsets = (int *)malloc( max_rows * max_cols * sizeof(int) ); + + + if (!input_itemsets) + fprintf(stderr, "error: can not allocate memory"); + + srand ( 7 ); + + + for (int i = 0 ; i < max_cols; i++){ + for (int j = 0 ; j < max_rows; j++){ + input_itemsets[i*max_cols+j] = 0; + } + } + + printf("Start Needleman-Wunsch\n"); + + for( int i=1; i< max_rows ; i++){ //please define your own sequence. + input_itemsets[i*max_cols] = rand() % 10 + 1; + } + for( int j=1; j< max_cols ; j++){ //please define your own sequence. + input_itemsets[j] = rand() % 10 + 1; + } + + + for (int i = 1 ; i < max_cols; i++){ + for (int j = 1 ; j < max_rows; j++){ + referrence[i*max_cols+j] = blosum62[input_itemsets[i*max_cols]][input_itemsets[j]]; + } + } + + for( int i = 1; i< max_rows ; i++) + input_itemsets[i*max_cols] = -i * penalty; + for( int j = 1; j< max_cols ; j++) + input_itemsets[j] = -j * penalty; + + + size = max_cols * max_rows; + + GPU_argv_init(); + initTrace(); + startCPU(); + + cudaMallocManaged((void**)& referrence_cuda, sizeof(int)*size); + cudaMallocManaged((void **)&matrix_cuda, sizeof(int) * size); + + memcpy(referrence_cuda, referrence, sizeof(int) * size); + memcpy(matrix_cuda, input_itemsets, sizeof(int) * size); + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(referrence_cuda, sizeof(int) * size, GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(matrix_cuda, sizeof(int) * size, GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + dim3 dimGrid; + dim3 dimBlock(BLOCK_SIZE, 1); + // int block_width = ( max_cols - 1 )/BLOCK_SIZE; + int block_width = nblocks - 1; + int block_size = (max_cols - 1) / (nblocks * BLOCK_SIZE); + +#ifdef TIMING + gettimeofday(&tv_kernel_start, NULL); +#endif + + //printf("Processing top-left matrix\n"); + //process top-left matrix + for( int i = 1 ; i <= block_width ; i++) { + dimGrid.x = i; + dimGrid.y = 1; + needle_cuda_shared_1<<>>(referrence_cuda, matrix_cuda + ,max_cols, penalty, i, block_width, block_size); + } + //printf("Processing bottom-right matrix\n"); + //process bottom-right matrix + for( int i = block_width - 1 ; i >= 1 ; i--){ + dimGrid.x = i; + dimGrid.y = 1; + needle_cuda_shared_2<<>>(referrence_cuda, matrix_cuda + ,max_cols, penalty, i, block_width, block_size); + } + cudaDeviceSynchronize(); + memcpy(output_itemsets, matrix_cuda, sizeof(int) * size); + + cudaFree(referrence_cuda); + cudaFree(matrix_cuda); + +#ifdef TIMING + gettimeofday(&tv_kernel_end, NULL); + tvsub(&tv_kernel_end, &tv_kernel_start, &tv); + kernel_time += tv.tv_sec * 1000.0 + (float) tv.tv_usec / 1000.0; +#endif + + // cudaMemcpy(output_itemsets, matrix_cuda, sizeof(int) * size, cudaMemcpyDeviceToHost); + +//#define TRACEBACK +#ifdef TRACEBACK + + FILE *fpo = fopen("result.txt","w"); + fprintf(fpo, "print traceback value GPU:\n"); + + for (int i = max_rows - 2, j = max_rows - 2; i>=0, j>=0;){ + int nw, n, w, traceback; + if ( i == max_rows - 2 && j == max_rows - 2 ) + fprintf(fpo, "%d ", output_itemsets[ i * max_cols + j]); //print the first element + if ( i == 0 && j == 0 ) + break; + if ( i > 0 && j > 0 ){ + nw = output_itemsets[(i - 1) * max_cols + j - 1]; + w = output_itemsets[ i * max_cols + j - 1 ]; + n = output_itemsets[(i - 1) * max_cols + j]; + } + else if ( i == 0 ){ + nw = n = LIMIT; + w = output_itemsets[ i * max_cols + j - 1 ]; + } + else if ( j == 0 ){ + nw = w = LIMIT; + n = output_itemsets[(i - 1) * max_cols + j]; + } + else{ + } + + //traceback = maximum(nw, w, n); + int new_nw, new_w, new_n; + new_nw = nw + referrence[i * max_cols + j]; + new_w = w - penalty; + new_n = n - penalty; + + traceback = maximum(new_nw, new_w, new_n); + if(traceback == new_nw) + traceback = nw; + if(traceback == new_w) + traceback = w; + if(traceback == new_n) + traceback = n; + + fprintf(fpo, "%d ", traceback); + + if(traceback == nw ) + {i--; j--; continue;} + + else if(traceback == w ) + {j--; continue;} + + else if(traceback == n ) + {i--; continue;} + + else + ; + } + + fclose(fpo); + +#endif + endCPU(); + finiTrace(); + free(referrence); + free(input_itemsets); + free(output_itemsets); + +#ifdef TIMING + printf("Exec: %f\n", kernel_time); +#endif +} + diff --git a/workloads/realworld/uvm_prefetch/nw/needle.h b/workloads/realworld/uvm_prefetch/nw/needle.h new file mode 100755 index 0000000000000000000000000000000000000000..e73320d6496262665592117d242e9bc383298b5b --- /dev/null +++ b/workloads/realworld/uvm_prefetch/nw/needle.h @@ -0,0 +1,11 @@ +#ifdef RD_WG_SIZE_0_0 + #define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) + #define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) + #define BLOCK_SIZE RD_WG_SIZE +#else + #define BLOCK_SIZE 16 +#endif +//#define TRACE + diff --git a/workloads/realworld/uvm_prefetch/nw/needle_kernel.cu b/workloads/realworld/uvm_prefetch/nw/needle_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..d7b4a0a1984521fa4a3d0dab3e1a3b3645ad5a4c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/nw/needle_kernel.cu @@ -0,0 +1,197 @@ + +#include "needle.h" +#include + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SDATA( index) CUT_BANK_CHECKER(sdata, index) + +__device__ __host__ int +maximum( int a, + int b, + int c){ + +int k; +if( a <= b ) +k = b; +else +k = a; + +if( k <=c ) +return(c); +else +return(k); + +} + +__global__ void +needle_cuda_shared_1( int* referrence, + int* matrix_cuda, + int cols, + int penalty, + int i, + int block_width, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + int bx = blockIdx.x; + int tx = threadIdx.x; + + int b_index_x = bx; + int b_index_y = i - 1 - bx; + + __shared__ int temp[BLOCK_SIZE+1][BLOCK_SIZE+1]; + __shared__ int ref[BLOCK_SIZE][BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (b_index_y * gridDim.x + b_index_x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int b_index_x = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int b_index_y = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + int index = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( cols + 1 ); + int index_n = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( 1 ); + int index_w = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + ( cols ); + int index_nw = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x; + + if (tx == 0) + temp[tx][0] = matrix_cuda[index_nw]; + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + ref[ty][tx] = referrence[index + cols * ty]; + block.sync(); + + temp[tx + 1][0] = matrix_cuda[index_w + cols * tx]; + block.sync(); + + temp[0][tx + 1] = matrix_cuda[index_n]; + block.sync(); + + for( int m = 0 ; m < BLOCK_SIZE ; m++){ + if ( tx <= m ){ + int t_index_x = tx + 1; + int t_index_y = m - tx + 1; + + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[t_index_y-1][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + } + block.sync(); + } + + for( int m = BLOCK_SIZE - 2 ; m >=0 ; m--){ + if ( tx <= m){ + int t_index_x = tx + BLOCK_SIZE - m ; + int t_index_y = BLOCK_SIZE - tx; + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[t_index_y-1][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + + } + block.sync(); + } + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + matrix_cuda[index + ty * cols] = temp[ty+1][tx+1]; + } +} + + +__global__ void +needle_cuda_shared_2( int* referrence, + int* matrix_cuda, + int cols, + int penalty, + int i, + int block_width, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + int bx = blockIdx.x; + int tx = threadIdx.x; + + int b_index_x = bx + block_width - i; + int b_index_y = block_width - bx -1; + + __shared__ int temp[BLOCK_SIZE+1][BLOCK_SIZE+1]; + __shared__ int ref[BLOCK_SIZE][BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (b_index_y * gridDim.x + b_index_x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int b_index_x = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int b_index_y = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + int index = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( cols + 1 ); + int index_n = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( 1 ); + int index_w = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + ( cols ); + int index_nw = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x; + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + ref[ty][tx] = referrence[index + cols * ty]; + block.sync(); + + if (tx == 0) + temp[tx][0] = matrix_cuda[index_nw]; + temp[tx + 1][0] = matrix_cuda[index_w + cols * tx]; + block.sync(); + + temp[0][tx + 1] = matrix_cuda[index_n]; + block.sync(); + + for( int m = 0 ; m < BLOCK_SIZE ; m++){ + if ( tx <= m ){ + int t_index_x = tx + 1; + int t_index_y = m - tx + 1; + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[t_index_y-1][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + + } + block.sync(); + } + + for( int m = BLOCK_SIZE - 2 ; m >=0 ; m--){ + if ( tx <= m){ + int t_index_x = tx + BLOCK_SIZE - m ; + int t_index_y = BLOCK_SIZE - tx; + + temp[t_index_y][t_index_x] = maximum( temp[t_index_y-1][t_index_x-1] + ref[t_index_y-1][t_index_x-1], + temp[t_index_y][t_index_x-1] - penalty, + temp[t_index_y-1][t_index_x] - penalty); + } + block.sync(); + } + + for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++) + matrix_cuda[index + ty * cols] = temp[ty+1][tx+1]; + } +} + diff --git a/workloads/realworld/uvm_prefetch/nw/run.sh b/workloads/realworld/uvm_prefetch/nw/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..e3d20f9f4402de57c2a49c4db5c04d917907d741 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/nw/run.sh @@ -0,0 +1 @@ +./needle 32768 10 256 diff --git a/workloads/realworld/uvm_prefetch/nw/run_super.sh b/workloads/realworld/uvm_prefetch/nw/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..23b570be1ac96cce67094a9469de1c6d24c03b08 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/nw/run_super.sh @@ -0,0 +1 @@ +./needle 32768 10 64 diff --git a/workloads/realworld/uvm_prefetch/pathfinder/Makefile b/workloads/realworld/uvm_prefetch/pathfinder/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d740e79027f3651c458e229179bbbd46fb4fcbec --- /dev/null +++ b/workloads/realworld/uvm_prefetch/pathfinder/Makefile @@ -0,0 +1,14 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc +INCLUDE := $(CUDA_DIR)/include + +SRC = pathfinder.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = pathfinder + +release: + $(CC) $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +clean: + rm -f pathfinder diff --git a/workloads/realworld/uvm_prefetch/pathfinder/README b/workloads/realworld/uvm_prefetch/pathfinder/README new file mode 100644 index 0000000000000000000000000000000000000000..9af75abe201eb95c5c89a038c6d79f54b276f94e --- /dev/null +++ b/workloads/realworld/uvm_prefetch/pathfinder/README @@ -0,0 +1,6 @@ +To compile the program: + +nvcc -cuda dynproc.cu +nvcc -o dynproc dynproc.cu.cpp + +Usage: dynproc row_len col_len pyramid_height diff --git a/workloads/realworld/uvm_prefetch/pathfinder/pathfinder.cu b/workloads/realworld/uvm_prefetch/pathfinder/pathfinder.cu new file mode 100644 index 0000000000000000000000000000000000000000..fba7997d5b345e779070329447be01c04adcb2cf --- /dev/null +++ b/workloads/realworld/uvm_prefetch/pathfinder/pathfinder.cu @@ -0,0 +1,315 @@ +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define GPU_DEVICE 6 + +#ifdef TIMING +#include "timing.h" + +struct timeval tv; +struct timeval tv_total_start, tv_total_end; +struct timeval tv_h2d_start, tv_h2d_end; +struct timeval tv_d2h_start, tv_d2h_end; +struct timeval tv_kernel_start, tv_kernel_end; +struct timeval tv_mem_alloc_start, tv_mem_alloc_end; +struct timeval tv_close_start, tv_close_end; +float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, + d2h_time = 0, close_time = 0, total_time = 0; +#endif + +#define BLOCK_SIZE 256 +#define STR_SIZE 256 +#define DEVICE 0 +#define HALO 1 // halo width along one direction when advancing to the next iteration + +// #define BENCH_PRINT + +void run(int argc, char **argv); + +int rows, cols; +int *data; +int **wall; +int *result; +#define M_SEED 9 +int pyramid_height; +int nblocks; + +void init(int argc, char **argv) +{ + if (argc == 5) + { + cols = atoi(argv[1]); + rows = atoi(argv[2]); + pyramid_height = atoi(argv[3]); + nblocks = atoi(argv[4]); + } + else + { + printf("Usage: dynproc row_len col_len pyramid_height\n"); + exit(0); + } + data = new int[rows * cols]; + wall = new int *[rows]; + for (int n = 0; n < rows; n++) + wall[n] = data + cols * n; + result = new int[cols]; + + int seed = M_SEED; + srand(seed); + + for (int i = 0; i < rows; i++) + { + for (int j = 0; j < cols; j++) + { + wall[i][j] = rand() % 10; + } + } +#ifdef BENCH_PRINT + for (int i = 0; i < rows; i++) + { + for (int j = 0; j < cols; j++) + { + printf("%d ", wall[i][j]); + } + printf("\n"); + } +#endif +} + +void fatal(char *s) +{ + fprintf(stderr, "error: %s\n", s); +} + +#define IN_RANGE(x, min, max) ((x) >= (min) && (x) <= (max)) +#define CLAMP_RANGE(x, min, max) x = (x < (min)) ? min : ((x > (max)) ? max : x) +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +__global__ void dynproc_kernel( + int iteration, + int *gpuWall, + int *gpuSrc, + int *gpuResults, + int cols, + int rows, + int startStep, + int border, + int small_block_cols, + int tile_size, + int batches) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + __shared__ int prev[BLOCK_SIZE]; + __shared__ int result[BLOCK_SIZE]; + + int bx = blockIdx.x; + int tx = threadIdx.x; + + for (int b = 0; b < batches; b++) + { + // each block finally computes result for a small block + // after N iterations. + // it is the non-overlapping small blocks that cover + // all the input data + + // calculate the boundary for the block according to + // the boundary of its small block + int blkX = bx * tile_size + small_block_cols * b - border; + int blkXmax = blkX + BLOCK_SIZE - 1; + + // calculate the global thread coordination + int xidx = blkX + tx; + + // effective range within this block that falls within + // the valid range of the input data + // used to rule out computation outside the boundary. + int validXmin = (blkX < 0) ? -blkX : 0; + int validXmax = (blkXmax > cols - 1) ? BLOCK_SIZE - 1 - (blkXmax - cols + 1) : BLOCK_SIZE - 1; + + int W = tx - 1; + int E = tx + 1; + + W = (W < validXmin) ? validXmin : W; + E = (E > validXmax) ? validXmax : E; + + bool isValid = IN_RANGE(tx, validXmin, validXmax); + + if (IN_RANGE(xidx, 0, cols - 1)) + { + prev[tx] = gpuSrc[xidx]; + } + block.sync(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012 + bool computed; + for (int i = 0; i < iteration; i++) + { + computed = false; + if (IN_RANGE(tx, i + 1, BLOCK_SIZE - i - 2) && + isValid) + { + computed = true; + int left = prev[W]; + int up = prev[tx]; + int right = prev[E]; + int shortest = MIN(left, up); + shortest = MIN(shortest, right); + int index = cols * (startStep + i) + xidx; + result[tx] = shortest + gpuWall[index]; + } + block.sync(); + if (i == iteration - 1) + break; + if (computed) // Assign the computation range + prev[tx] = result[tx]; + block.sync(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012 + } + + // update the global memory + // after the last iteration, only threads coordinated within the + // small block perform the calculation and switch on ``computed'' + if (computed) + { + gpuResults[xidx] = result[tx]; + } + } + + +} + +/* + compute N time steps +*/ +int calc_path(int *gpuWall, int *gpuResult[2], int rows, int cols, + int pyramid_height, int blockCols, int borderCols, int tile_size, int batches) +{ + dim3 dimBlock(BLOCK_SIZE); + dim3 dimGrid(nblocks); + + int src = 1, dst = 0; + for (int t = 0; t < rows - 1; t += pyramid_height) + { + int temp = src; + src = dst; + dst = temp; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(gpuWall, sizeof(int) * (rows * cols - cols), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(gpuResult[src], sizeof(int) * cols, GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(gpuResult[dst], sizeof(int) * cols, GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + int iteration = MIN(pyramid_height, rows - t - 1); + int small_block_cols = BLOCK_SIZE - iteration * HALO * 2; + dynproc_kernel<<>>( + iteration, gpuWall, gpuResult[src], gpuResult[dst], + cols, rows, t, borderCols, small_block_cols, tile_size, batches); + + // for the measurement fairness + cudaDeviceSynchronize(); + } + return dst; +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + GPU_argv_init(); + + run(argc, argv); + + return EXIT_SUCCESS; +} + +void run(int argc, char **argv) +{ + init(argc, argv); + + /* --------------- pyramid parameters --------------- */ + int borderCols = (pyramid_height)*HALO; + int smallBlockCol = BLOCK_SIZE - (pyramid_height)*HALO * 2; + int blockCols = cols / smallBlockCol + ((cols % smallBlockCol == 0) ? 0 : 1); + + //ruihao + int cols_per_block = cols / nblocks; + if (cols_per_block < BLOCK_SIZE) cols_per_block = BLOCK_SIZE; + int batches = cols_per_block / smallBlockCol + ((cols_per_block % smallBlockCol == 0) ? 0 : 1); + + // printf("pyramidHeight: %d\ngridSize: [%d]\nborder:[%d]\nblockSize: %d\nblockGrid:[%d]\ntargetBlock:[%d]\n", + // pyramid_height, cols, borderCols, BLOCK_SIZE, blockCols, smallBlockCol); + printf("pyramidHeight: %d\ngridSize: [%d]\nborder:[%d]\nblockSize: %d\nblockGrid:[%d]\ntargetBlock:[%d]\n", + pyramid_height, cols, borderCols, BLOCK_SIZE, nblocks, smallBlockCol); + + int *gpuWall, *gpuResult[2]; + int size = rows * cols; + + initTrace(); + startCPU(); + + cudaMallocManaged((void **)&gpuResult[0], sizeof(int) * cols); + cudaMallocManaged((void **)&gpuResult[1], sizeof(int) * cols); + memcpy(gpuResult[0], data, sizeof(int) * cols); + cudaMallocManaged((void **)&gpuWall, sizeof(int) * (size - cols)); + memcpy(gpuWall, data + cols, sizeof(int) * (size - cols)); + +#ifdef TIMING + gettimeofday(&tv_kernel_start, NULL); +#endif + + // int final_ret = calc_path(gpuWall, gpuResult, rows, cols, + // pyramid_height, blockCols, borderCols); + int final_ret = calc_path(gpuWall, gpuResult, rows, cols, + pyramid_height, blockCols, borderCols, cols_per_block, batches); + +#ifdef TIMING + gettimeofday(&tv_kernel_end, NULL); + tvsub(&tv_kernel_end, &tv_kernel_start, &tv); + kernel_time += tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; +#endif + + memcpy(result, gpuResult[final_ret], sizeof(int) * cols); + +#ifdef BENCH_PRINT + for (int i = 0; i < cols; i++) + printf("%d ", data[i]); + printf("\n"); + for (int i = 0; i < cols; i++) + printf("%d ", result[i]); + printf("\n"); +#endif + + cudaFree(gpuWall); + cudaFree(gpuResult[0]); + cudaFree(gpuResult[1]); + + endCPU(); + finiTrace(); + + delete[] data; + delete[] wall; + delete[] result; + +#ifdef TIMING + printf("Exec: %f\n", kernel_time); +#endif +} diff --git a/workloads/realworld/uvm_prefetch/pathfinder/result.txt b/workloads/realworld/uvm_prefetch/pathfinder/result.txt new file mode 100644 index 0000000000000000000000000000000000000000..fa67c591d071682e1842a455f4477b397825e250 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/pathfinder/result.txt @@ -0,0 +1,11 @@ +pyramidHeight: 20 +gridSize: [100000] +border:[20] +blockSize: 256 +blockGrid:[463] +targetBlock:[216] +CUPTI dynproc_kernel iter 0 start: 1679530155077329959 end: 1679530155078946951 +CUPTI dynproc_kernel iter 20 start: 1679530155078953603 end: 1679530155081441509 +CUPTI dynproc_kernel iter 40 start: 1679530155081441880 end: 1679530155083936228 +CUPTI dynproc_kernel iter 60 start: 1679530155083936508 end: 1679530155086433891 +CUPTI dynproc_kernel iter 80 start: 1679530155086434172 end: 1679530155088929332 diff --git a/workloads/realworld/uvm_prefetch/pathfinder/run.sh b/workloads/realworld/uvm_prefetch/pathfinder/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..11a9e6199ea4b2ff7fd3e0ebf893dc96fa89ff45 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/pathfinder/run.sh @@ -0,0 +1,2 @@ +#./pathfinder 100000 100 20 1024 > result.txt +./pathfinder 10000000 100 20 1024 diff --git a/workloads/realworld/uvm_prefetch/pathfinder/run_super.sh b/workloads/realworld/uvm_prefetch/pathfinder/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..b35cc0b44511def3912323c1e0d58c2daa280722 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/pathfinder/run_super.sh @@ -0,0 +1 @@ +./pathfinder 10000000 100 20 1024 diff --git a/workloads/realworld/uvm_prefetch/srad/Makefile b/workloads/realworld/uvm_prefetch/srad/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..47da520a663446e36c04461d54cfbb3d12cfa328 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/srad/Makefile @@ -0,0 +1,15 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -std=c++11 -arch=sm_80 -O3 + +SRC = srad.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = srad + +release: $(SRC) + $(CC) $(KERNEL_DIM) $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt diff --git a/workloads/realworld/uvm_prefetch/srad/Makefile_nvidia b/workloads/realworld/uvm_prefetch/srad/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..e1f345c41c0f838dcf159958f628276455ef4dd7 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/srad/Makefile_nvidia @@ -0,0 +1,22 @@ +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := srad +# CUDA source files (compiled with cudacc) +CUFILES := srad.cu +# CUDA dependency files +CU_DEPS := \ + srad_kernel.cu \ + +# C/C++ source files (compiled with gcc / c++) +CCFILES := \ + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/uvm_prefetch/srad/README b/workloads/realworld/uvm_prefetch/srad/README new file mode 100755 index 0000000000000000000000000000000000000000..91e803b576bdeebe232c00a5112dadd836ffc33f --- /dev/null +++ b/workloads/realworld/uvm_prefetch/srad/README @@ -0,0 +1,24 @@ +In srad.h, define either GPU or CPU computation +Currently, the GPU implementation can only support x-, y-dimensions that can be divided by 16. + +Usage: +srad 128 128 0 31 0 31 0.5 2 + +128 //number of rows in the domain +128 //number of cols in the domain +0 //y1 position of the speckle +31 //y2 position of the speckle +0 //x1 position of the speckle +31 //x2 position of the speckle +0.5 //Lambda value +2 //number of iterations + + +******Adjustable work group size***** +The kernel has square shape +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe one dimesion +The total thread number for one block is RD_WG_SIZE_0*RD_WG_SIZE_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/srad/run.sh b/workloads/realworld/uvm_prefetch/srad/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..982fd1345383490dd093950055b359dc475480cc --- /dev/null +++ b/workloads/realworld/uvm_prefetch/srad/run.sh @@ -0,0 +1,3 @@ +# ./srad 2048 2048 0 127 0 127 0.5 2 32 + +./srad 16384 16384 0 127 0 127 0.5 2 32 \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/srad/run_super.sh b/workloads/realworld/uvm_prefetch/srad/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2d0f1b8ebd049e33bbb74a15722fc7172167641f --- /dev/null +++ b/workloads/realworld/uvm_prefetch/srad/run_super.sh @@ -0,0 +1 @@ +./srad 32768 32768 0 127 0 127 0.5 2 8 \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch/srad/srad.cu b/workloads/realworld/uvm_prefetch/srad/srad.cu new file mode 100755 index 0000000000000000000000000000000000000000..f7755e15ea422b5424c22ea3a2b18b3d956bf84c --- /dev/null +++ b/workloads/realworld/uvm_prefetch/srad/srad.cu @@ -0,0 +1,304 @@ +// includes, system +#include +#include +#include +#include +#include "srad.h" +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +// includes, project +#include + +// includes, kernels +#include "srad_kernel.cu" + +void random_matrix(float *I, int rows, int cols); +void runTest( int argc, char** argv); +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - number of rows\n"); + fprintf(stderr, "\t - number of cols\n"); + fprintf(stderr, "\t - y1 value of the speckle\n"); + fprintf(stderr, "\t - y2 value of the speckle\n"); + fprintf(stderr, "\t - x1 value of the speckle\n"); + fprintf(stderr, "\t - x2 value of the speckle\n"); + fprintf(stderr, "\t - lambda (0,1)\n"); + fprintf(stderr, "\t - number of iterations\n"); + + exit(1); +} +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE); + runTest( argc, argv); + + return EXIT_SUCCESS; +} + +void +runTest( int argc, char** argv) +{ + int rows, cols, size_I, size_R, niter = 10, iter, nblocks; + float *I, *J, lambda, q0sqr, sum, sum2, tmp, meanROI,varROI ; + +#ifdef CPU + float Jc, G2, L, num, den, qsqr; + int *iN,*iS,*jE,*jW, k; + float *dN,*dS,*dW,*dE; + float cN,cS,cW,cE,D; +#endif + +#ifdef GPU + + float *J_cuda; + float *C_cuda; + float *E_C, *W_C, *N_C, *S_C; + +#endif + + unsigned int r1, r2, c1, c2; + float *c; + + + + if (argc == 10) + { + rows = atoi(argv[1]); //number of rows in the domain + cols = atoi(argv[2]); //number of cols in the domain + if ((rows%16!=0) || (cols%16!=0)){ + fprintf(stderr, "rows and cols must be multiples of 16\n"); + exit(1); + } + r1 = atoi(argv[3]); //y1 position of the speckle + r2 = atoi(argv[4]); //y2 position of the speckle + c1 = atoi(argv[5]); //x1 position of the speckle + c2 = atoi(argv[6]); //x2 position of the speckle + lambda = atof(argv[7]); //Lambda value + niter = atoi(argv[8]); //number of iterations + nblocks = atoi(argv[9]); // number of blocks + } + else{ + usage(argc, argv); + } + + size_I = cols * rows; + size_R = (r2-r1+1)*(c2-c1+1); + + I = (float *)malloc( size_I * sizeof(float) ); + J = (float *)malloc( size_I * sizeof(float) ); + c = (float *)malloc(sizeof(float)* size_I) ; + + +#ifdef CPU + + iN = (int *)malloc(sizeof(unsigned int*) * rows) ; + iS = (int *)malloc(sizeof(unsigned int*) * rows) ; + jW = (int *)malloc(sizeof(unsigned int*) * cols) ; + jE = (int *)malloc(sizeof(unsigned int*) * cols) ; + + + dN = (float *)malloc(sizeof(float)* size_I) ; + dS = (float *)malloc(sizeof(float)* size_I) ; + dW = (float *)malloc(sizeof(float)* size_I) ; + dE = (float *)malloc(sizeof(float)* size_I) ; + + + for (int i=0; i< rows; i++) { + iN[i] = i-1; + iS[i] = i+1; + } + for (int j=0; j< cols; j++) { + jW[j] = j-1; + jE[j] = j+1; + } + iN[0] = 0; + iS[rows-1] = rows-1; + jW[0] = 0; + jE[cols-1] = cols-1; + +#endif + GPU_argv_init(); + initTrace(); + startCPU(); + +#ifdef GPU + + //Allocate device memory + cudaMallocManaged((void**)& J_cuda, sizeof(float)* size_I); + cudaMallocManaged((void **)&C_cuda, sizeof(float) * size_I); + cudaMallocManaged((void **)&E_C, sizeof(float) * size_I); + cudaMallocManaged((void **)&W_C, sizeof(float) * size_I); + cudaMallocManaged((void **)&S_C, sizeof(float) * size_I); + cudaMallocManaged((void **)&N_C, sizeof(float) * size_I); + +#endif + + printf("Randomizing the input matrix\n"); + //Generate a random matrix + random_matrix(I, rows, cols); + + for (int k = 0; k < size_I; k++ ) { + J[k] = (float)exp(I[k]) ; + } + printf("Start the SRAD main loop\n"); + for (iter=0; iter< niter; iter++){ + sum=0; sum2=0; + for (int i=r1; i<=r2; i++) { + for (int j=c1; j<=c2; j++) { + tmp = J[i * cols + j]; + sum += tmp ; + sum2 += tmp*tmp; + } + } + meanROI = sum / size_R; + varROI = (sum2 / size_R) - meanROI*meanROI; + q0sqr = varROI / (meanROI*meanROI); + +#ifdef CPU + + for (int i = 0 ; i < rows ; i++) { + for (int j = 0; j < cols; j++) { + + k = i * cols + j; + Jc = J[k]; + + // directional derivates + dN[k] = J[iN[i] * cols + j] - Jc; + dS[k] = J[iS[i] * cols + j] - Jc; + dW[k] = J[i * cols + jW[j]] - Jc; + dE[k] = J[i * cols + jE[j]] - Jc; + + G2 = (dN[k]*dN[k] + dS[k]*dS[k] + + dW[k]*dW[k] + dE[k]*dE[k]) / (Jc*Jc); + + L = (dN[k] + dS[k] + dW[k] + dE[k]) / Jc; + + num = (0.5*G2) - ((1.0/16.0)*(L*L)) ; + den = 1 + (.25*L); + qsqr = num/(den*den); + + // diffusion coefficent (equ 33) + den = (qsqr-q0sqr) / (q0sqr * (1+q0sqr)) ; + c[k] = 1.0 / (1.0+den) ; + + // saturate diffusion coefficent + if (c[k] < 0) {c[k] = 0;} + else if (c[k] > 1) {c[k] = 1;} + } + } + + for (int i = 0; i < rows; i++) { + for (int j = 0; j < cols; j++) { + + // current index + k = i * cols + j; + + // diffusion coefficent + cN = c[k]; + cS = c[iS[i] * cols + j]; + cW = c[k]; + cE = c[i * cols + jE[j]]; + + // divergence (equ 58) + D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k]; + + // image update (equ 61) + J[k] = J[k] + 0.25*lambda*D; + } + } + +#endif // CPU + + +#ifdef GPU + + //Currently the input size must be divided by 16 - the block size + + // ruihao + int block_x = cols/BLOCK_SIZE ; + int block_y = rows/BLOCK_SIZE ; + + if (nblocks > block_x) nblocks = block_x; + + dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE); + // dim3 dimGrid(block_x, block_y); + dim3 dimGrid(nblocks, nblocks); + // ruihao + + //Copy data from main memory to device memory + memcpy(J_cuda, J, sizeof(float) * size_I); + + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(J_cuda, sizeof(float) * size_I, GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + //Run kernels + // srad_cuda_1<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr); + // srad_cuda_2<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, lambda, q0sqr); + srad_cuda_1<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr, cols / nblocks); + srad_cuda_2<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, lambda, q0sqr, cols / nblocks); + //Copy data from device memory to main memory + cudaDeviceSynchronize(); + memcpy(J, J_cuda, sizeof(float) * size_I); + +#endif +} + + cudaThreadSynchronize(); +#ifdef GPU + cudaFree(C_cuda); + cudaFree(J_cuda); + cudaFree(E_C); + cudaFree(W_C); + cudaFree(N_C); + cudaFree(S_C); +#endif + endCPU(); + finiTrace(); + +#ifdef OUTPUT + //Printing output + printf("Printing Output:\n"); + for( int i = 0 ; i < rows ; i++){ + for ( int j = 0 ; j < cols ; j++){ + printf("%.5f ", J[i * cols + j]); + } + printf("\n"); + } +#endif + + printf("Computation Done\n"); + + free(I); + free(J); +#ifdef CPU + free(iN); free(iS); free(jW); free(jE); + free(dN); free(dS); free(dW); free(dE); +#endif + free(c); + +} + + +void random_matrix(float *I, int rows, int cols){ + + srand(7); + + for( int i = 0 ; i < rows ; i++){ + for ( int j = 0 ; j < cols ; j++){ + I[i * cols + j] = rand()/(float)RAND_MAX ; + } + } + +} + diff --git a/workloads/realworld/uvm_prefetch/srad/srad.h b/workloads/realworld/uvm_prefetch/srad/srad.h new file mode 100755 index 0000000000000000000000000000000000000000..2b2adb6d956b697c5b0ace9bccb89162ef98be50 --- /dev/null +++ b/workloads/realworld/uvm_prefetch/srad/srad.h @@ -0,0 +1,16 @@ +#define STR_SIZE 256 + +#ifdef RD_WG_SIZE_0_0 + #define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) + #define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) + #define BLOCK_SIZE RD_WG_SIZE +#else + #define BLOCK_SIZE 16 +#endif + +#define GPU +#define TIMER +//#define OUTPUT + diff --git a/workloads/realworld/uvm_prefetch/srad/srad_kernel.cu b/workloads/realworld/uvm_prefetch/srad/srad_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..a81386576462e82375b7826f9b203005cca803ab --- /dev/null +++ b/workloads/realworld/uvm_prefetch/srad/srad_kernel.cu @@ -0,0 +1,316 @@ +#include "srad.h" +#include + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +__global__ void +srad_cuda_1( + float *E_C, + float *W_C, + float *N_C, + float *S_C, + float *J_cuda, + float *C_cuda, + int cols, + int rows, + float q0sqr, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // shared memory allocation + __shared__ float temp[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float temp_result[BLOCK_SIZE * BLOCK_SIZE]; + + __shared__ float north[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float south[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float east[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float west[BLOCK_SIZE * BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + // block id + int offset = tile - base_tile; + int block_id = tile / tiles_this_block; + int bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + // thread id + int tx = threadIdx.x; + int ty = threadIdx.y; + + // indices + int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; + int index_n = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + tx - cols; + int index_s = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * BLOCK_SIZE + tx; + int index_w = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty - 1; + int index_e = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + BLOCK_SIZE; + + if (index_n < 0) index_n = 0; + if (index_s >= (cols * rows)) index_s = cols * rows - 1; + if (index_w < 0) index_w = 0; + if (index_e >= (cols * rows)) index_e = cols * rows - 1; + + float n, w, e, s, jc, g2, l, num, den, qsqr, c; + + // load data to shared memory + north[ty * BLOCK_SIZE + tx] = J_cuda[index_n]; + south[ty * BLOCK_SIZE + tx] = J_cuda[index_s]; + if (by == 0) + { + north[ty * BLOCK_SIZE + tx] = J_cuda[BLOCK_SIZE * bx + tx]; + } + else if (by == tile_dim_x - 1) + { + south[ty * BLOCK_SIZE + tx] = J_cuda[cols * BLOCK_SIZE * (tile_dim_x - 1) + BLOCK_SIZE * bx + cols * (BLOCK_SIZE - 1) + tx]; + } + block.sync(); + + west[ty * BLOCK_SIZE + tx] = J_cuda[index_w]; + east[ty * BLOCK_SIZE + tx] = J_cuda[index_e]; + + if (bx == 0) + { + west[ty * BLOCK_SIZE + tx] = J_cuda[cols * BLOCK_SIZE * by + cols * ty]; + } + else if (bx == tile_dim_x - 1) + { + east[ty * BLOCK_SIZE + tx] = J_cuda[cols * BLOCK_SIZE * by + BLOCK_SIZE * (tile_dim_x - 1) + cols * ty + BLOCK_SIZE - 1]; + } + + block.sync(); + temp[ty * BLOCK_SIZE + tx] = J_cuda[index]; + + block.sync(); + + jc = temp[ty * BLOCK_SIZE + tx]; + + if (ty == 0 && tx == 0) + { // nw + n = north[ty * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = west[ty * BLOCK_SIZE + tx] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (ty == 0 && tx == BLOCK_SIZE - 1) + { // ne + n = north[ty * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = east[ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1 && tx == BLOCK_SIZE - 1) + { // se + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[ty * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = east[ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1 && tx == 0) + { // sw + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[ty * BLOCK_SIZE + tx] - jc; + w = west[ty * BLOCK_SIZE + tx] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + + else if (ty == 0) + { // n + n = north[ty * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (tx == BLOCK_SIZE - 1) + { // e + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = east[ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1) + { // s + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[ty * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (tx == 0) + { // w + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = west[ty * BLOCK_SIZE + tx] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + else + { // the data elements which are not on the borders + n = temp[(ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[ty * BLOCK_SIZE + tx + 1] - jc; + } + + g2 = (n * n + s * s + w * w + e * e) / (jc * jc); + + l = (n + s + w + e) / jc; + + num = (0.5 * g2) - ((1.0 / 16.0) * (l * l)); + den = 1 + (.25 * l); + qsqr = num / (den * den); + + // diffusion coefficent (equ 33) + den = (qsqr - q0sqr) / (q0sqr * (1 + q0sqr)); + c = 1.0 / (1.0 + den); + + // saturate diffusion coefficent + if (c < 0) + { + temp_result[ty * BLOCK_SIZE + tx] = 0; + } + else if (c > 1) + { + temp_result[ty * BLOCK_SIZE + tx] = 1; + } + else + { + temp_result[ty * BLOCK_SIZE + tx] = c; + } + + block.sync(); + + C_cuda[index] = temp_result[ty * BLOCK_SIZE + tx]; + E_C[index] = e; + W_C[index] = w; + S_C[index] = s; + N_C[index] = n; + } +} + +__global__ void +srad_cuda_2( + float *E_C, + float *W_C, + float *N_C, + float *S_C, + float *J_cuda, + float *C_cuda, + int cols, + int rows, + float lambda, + float q0sqr, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + // shared memory allocation + __shared__ float south_c[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float east_c[BLOCK_SIZE * BLOCK_SIZE]; + + __shared__ float c_cuda_temp[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float c_cuda_result[BLOCK_SIZE * BLOCK_SIZE]; + __shared__ float temp[BLOCK_SIZE * BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int tile = base_tile; + int end_tile = tile + tiles_this_block; + + for (; tile < end_tile; tile += 1) + { + //block id + int bx = tile % tile_dim_x; + int by = tile / tile_dim_x; + + //thread id + int tx = threadIdx.x; + int ty = threadIdx.y; + + // indices + int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; + int index_s = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * BLOCK_SIZE + tx; + int index_e = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + BLOCK_SIZE; + + if (index_s >= (cols * rows)) index_s = cols * rows - 1; + if (index_e >= (cols * rows)) index_e = cols * rows - 1; + + float cc, cn, cs, ce, cw, d_sum; + + // load data to shared memory + temp[ty * BLOCK_SIZE + tx] = J_cuda[index]; + block.sync(); + + south_c[ty * BLOCK_SIZE + tx] = C_cuda[index_s]; + if (by == tile_dim_x - 1) + { + south_c[ty * BLOCK_SIZE + tx] = C_cuda[cols * BLOCK_SIZE * (tile_dim_x - 1) + BLOCK_SIZE * bx + cols * (BLOCK_SIZE - 1) + tx]; + } + block.sync(); + + east_c[ty * BLOCK_SIZE + tx] = C_cuda[index_e]; + if (bx == tile_dim_x - 1) + { + east_c[ty * BLOCK_SIZE + tx] = C_cuda[cols * BLOCK_SIZE * by + BLOCK_SIZE * (tile_dim_x - 1) + cols * ty + BLOCK_SIZE - 1]; + } + block.sync(); + + c_cuda_temp[ty * BLOCK_SIZE + tx] = C_cuda[index]; + block.sync(); + cc = c_cuda_temp[ty * BLOCK_SIZE + tx]; + + if (ty == BLOCK_SIZE - 1 && tx == BLOCK_SIZE - 1) + { // se + cn = cc; + cs = south_c[ty * BLOCK_SIZE + tx]; + cw = cc; + ce = east_c[ty * BLOCK_SIZE + tx]; + } + else if (tx == BLOCK_SIZE - 1) + { // e + cn = cc; + cs = c_cuda_temp[(ty + 1) * BLOCK_SIZE + tx]; + cw = cc; + ce = east_c[ty * BLOCK_SIZE + tx]; + } + else if (ty == BLOCK_SIZE - 1) + { // s + cn = cc; + cs = south_c[ty * BLOCK_SIZE + tx]; + cw = cc; + ce = c_cuda_temp[ty * BLOCK_SIZE + tx + 1]; + } + else + { // the data elements which are not on the borders + cn = cc; + cs = c_cuda_temp[(ty + 1) * BLOCK_SIZE + tx]; + cw = cc; + ce = c_cuda_temp[ty * BLOCK_SIZE + tx + 1]; + } + + // divergence (equ 58) + d_sum = cn * N_C[index] + cs * S_C[index] + cw * W_C[index] + ce * E_C[index]; + + // image update (equ 61) + c_cuda_result[ty * BLOCK_SIZE + tx] = temp[ty * BLOCK_SIZE + tx] + 0.25 * lambda * d_sum; + + block.sync(); + + J_cuda[index] = c_cuda_result[ty * BLOCK_SIZE + tx]; + } +} diff --git a/workloads/realworld/uvm_prefetch_async/BN/.clang-format b/workloads/realworld/uvm_prefetch_async/BN/.clang-format new file mode 100644 index 0000000000000000000000000000000000000000..3a5940ef65bf1e40df9511da805a7a0440184e84 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/BN/.clang-format @@ -0,0 +1,90 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: false +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: false +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IndentCaseLabels: false +IndentWidth: 2 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 8 +UseTab: Never +... + diff --git a/workloads/realworld/uvm_prefetch_async/BN/LICENSE b/workloads/realworld/uvm_prefetch_async/BN/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/BN/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/workloads/realworld/uvm_prefetch_async/BN/Makefile b/workloads/realworld/uvm_prefetch_async/BN/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..864b8e45401b0fe12162389c2e9d33fa86f4fc9f --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/BN/Makefile @@ -0,0 +1,169 @@ +################################################################################ +# +# Copyright 1993-2013 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Makefile project only supported on Mac OS X and Linux Platforms) +# +################################################################################ + +include ../../../common/make.config +include ./findcudalib.mk + +# Location of the CUDA Toolkit +CUDA_PATH ?= $(CUDA_DIR) + +# internal flags +NVCCFLAGS := -m${OS_SIZE} +CCFLAGS := -Wno-narrowing +NVCCLDFLAGS := +LDFLAGS := + +# Extra user flags +EXTRA_NVCCFLAGS ?= +EXTRA_NVCCLDFLAGS ?= +EXTRA_LDFLAGS ?= +EXTRA_CCFLAGS ?= + +# OS-specific build flags +ifneq ($(DARWIN),) + LDFLAGS += -rpath $(CUDA_PATH)/lib + CCFLAGS += -arch $(OS_ARCH) $(STDLIB) +else + ifeq ($(OS_ARCH),armv7l) + ifeq ($(abi),gnueabi) + CCFLAGS += -mfloat-abi=softfp + else + # default to gnueabihf + override abi := gnueabihf + LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3 + CCFLAGS += -mfloat-abi=hard + endif + endif +endif + +ifeq ($(ARMv7),1) +NVCCFLAGS += -target-cpu-arch ARM +ifneq ($(TARGET_FS),) +CCFLAGS += --sysroot=$(TARGET_FS) +LDFLAGS += --sysroot=$(TARGET_FS) +LDFLAGS += -rpath-link=$(TARGET_FS)/lib +LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib +LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-$(abi) +endif +endif + +# Debug build flags +ifeq ($(dbg),1) + NVCCFLAGS += -g -G + TARGET := debug +else + TARGET := release +endif + +ALL_CCFLAGS := +ALL_CCFLAGS += $(NVCCFLAGS) +ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS)) +ALL_CCFLAGS += $(EXTRA_NVCCFLAGS) +ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS)) + +ALL_LDFLAGS := +ALL_LDFLAGS += $(ALL_CCFLAGS) +ALL_LDFLAGS += $(NVCCLDFLAGS) +ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS)) +ALL_LDFLAGS += $(EXTRA_NVCCLDFLAGS) +ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS)) + +# Common includes and paths for CUDA +INCLUDES := -I../../common/inc -I$(INCLUDE) -I$(CUPTI_INCLUDE) +LIBRARIES := -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +################################################################################ + +# CUDA code generation flags +ifneq ($(OS_ARCH),armv7l) +GENCODE_SM10 := -gencode arch=compute_10,code=sm_10 +endif +GENCODE_SM20 := -gencode arch=compute_20,code=sm_20 +GENCODE_SM30 := -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=\"sm_35,compute_35\" +GENCODE_SM80 := -gencode arch=compute_80,code=sm_80 +GENCODE_FLAGS := $(GENCODE_SM80) + +################################################################################ + +# Target rules +all: build + +build: ordergraph_30 ordergraph_40 ordergraph_45 ordergraph_50 + +# ordergraph_25.o: ordergraph.cu ordergraph_kernel.cu +# $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_25 -o $@ -c $< + +ordergraph_30.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_30 -o $@ -c $< + +ordergraph_40.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_40 -o $@ -c $< + +ordergraph_45.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_45 -o $@ -c $< + +ordergraph_50.o: ordergraph.cu ordergraph_kernel.cu + $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_50 -o $@ -c $< + +# ordergraph_125.o: ordergraph.cu ordergraph_kernel.cu +# $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -DDATA_125 -o $@ -c $< + + + +# ordergraph_25: ordergraph_25.o +# $(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES) + +ordergraph_30: ordergraph_30.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_40: ordergraph_40.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_45: ordergraph_45.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +ordergraph_50: ordergraph_50.o + $(NVCC) $(INCLUDES) $(ALL_LDFLAGS) -o $@ $+ $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp $(CUPTI_ADD_COMMON)/cupti_add.cpp $(LIBRARIES) + +# ordergraph_125: ordergraph_125.o +# $(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES) + +run: build + ./ordergraph + +clean: + rm -f ordergraph_30 ordergraph_40 ordergraph_45 ordergraph_50 *.o *.bin *.out + +clobber: clean diff --git a/workloads/realworld/uvm_prefetch_async/BN/README.md b/workloads/realworld/uvm_prefetch_async/BN/README.md new file mode 100644 index 0000000000000000000000000000000000000000..07158a0bd52af63032c860ff04e243e0d7c76ef1 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/BN/README.md @@ -0,0 +1,21 @@ +The code works with CUDA 6.0. +If you are using this code for your project, please cite [our paper](https://yuemmawang.github.io/publications/wang-tpds2016.pdf): + +``` +Wang Y, Qian W, Zhang S, et al. A Learning Algorithm for Bayesian Networks and +Its Efficient Implementation on GPUs[J]. Parallel and Distributed Systems, IEEE +Transactions on, 2016, 27(1): 17-30. +``` + +``` +@article{wang2015learning, + title={A learning algorithm for Bayesian networks and its efficient implementation on GPUs}, + author={Wang, Yu and Qian, Weikang and Zhang, Shuchang and Liang, Xiaoyao and Yuan, Bo}, + journal={IEEE Transactions on Parallel and Distributed Systems}, + volume={27}, + number={1}, + pages={17--30}, + year={2015}, + publisher={IEEE} +} +``` diff --git a/workloads/realworld/uvm_prefetch_async/BN/data125.cu b/workloads/realworld/uvm_prefetch_async/BN/data125.cu new file mode 100644 index 0000000000000000000000000000000000000000..6bb370a636a330992e083f0b52f1f67a9a86040e --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/BN/data125.cu @@ -0,0 +1,610 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=25; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,1, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,0,1,0, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,0,1,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,0,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,0,1,0,0,0,1, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,1,0,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,0,0,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,0,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,1,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,1,1,0, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,0,0,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,1,0,1,1,1,1,0,1,1,0,1,0, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,1,1,0,0,0,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,0,0,1,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,1,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,1,1, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,1,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,1,1,0,1,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,1,1,0,0,1,1,1,1,1,0,0,0,0,1,0,1,0,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,1,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,1,0, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,1,1,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,1,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,1,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,1,1,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,0,1,0,1,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,1,1,1,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,1, +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/BN/data25.cu b/workloads/realworld/uvm_prefetch_async/BN/data25.cu new file mode 100644 index 0000000000000000000000000000000000000000..6af94f79766c6e36ee121f6c537f987f841bf7c0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/BN/data25.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=25; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0, +} + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/BN/data30.cu b/workloads/realworld/uvm_prefetch_async/BN/data30.cu new file mode 100644 index 0000000000000000000000000000000000000000..bf89729e319e920533f9d134c8a2dff9aa4bc022 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/BN/data30.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=30; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0, +} + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/BN/data40.cu b/workloads/realworld/uvm_prefetch_async/BN/data40.cu new file mode 100644 index 0000000000000000000000000000000000000000..16d34d9dc4860d1dd24d604e501bccb43cae8095 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/BN/data40.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=40; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1, +} + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/BN/data45.cu b/workloads/realworld/uvm_prefetch_async/BN/data45.cu new file mode 100644 index 0000000000000000000000000000000000000000..b23e9a35e7c27948c0853710a06be462694df57d --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/BN/data45.cu @@ -0,0 +1,616 @@ +// The data are synthesized. +#include +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=45; +const int STATE_N=2; +const int DATA_N=600; + + + +int data[DATA_N*NODE_N]= { +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0, +} + + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/BN/data50.cu b/workloads/realworld/uvm_prefetch_async/BN/data50.cu new file mode 100644 index 0000000000000000000000000000000000000000..936a7aa4a67a1f949f1264477388a7eb5a93a1b4 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/BN/data50.cu @@ -0,0 +1,612 @@ +// The data are synthesized. + +#ifndef _DATA_H_ +#define _DATA_H_ +const int NODE_N=50; +const int STATE_N=2; +const int DATA_N=600; +int data[DATA_N*NODE_N]={ +1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,0,0, +1,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0,1,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,0,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1, +1,1,1,1,0,1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1, +0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1, +1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1, +1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,1, +0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,1,0,0,0,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,1,1, +1,0,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,1,0,1,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0, +1,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,0,1, +1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0, +0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1, +0,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0, +1,1,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0, +1,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0, +1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,1,0, +1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1, +1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,1,1,0, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,1,0,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,1,1, +1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1, +0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,1,0,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1, +0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1, +1,0,1,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1, +0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,0,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,1,1,0,0, +1,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0, +0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0, +1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0, +1,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,1,0,1, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,0,1, +1,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0, +0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1, +1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1, +1,1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0, +0,1,1,1,0,1,1,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,1,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0,1,0,1, +1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0, +0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1, +1,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1, +0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,1,0,1,0,1,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,1, +1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1, +1,1,1,0,1,1,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,1,1, +1,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,0,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,1,0,1,0, +0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,0, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0, +0,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1, +1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1, +1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,1, +1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,0, +1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,1,0,1, +1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1, +1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,1,0, +1,0,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1, +1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0, +0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,1, +0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1, +1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1, +1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0, +0,0,0,0,1,1,1,1,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,1,0, +0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,1,0,0,0,0,1,1,1, +0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,1,1,0,0, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1, +1,0,1,0,0,0,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0, +0,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,1, +1,1,1,1,1,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0, +0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,0, +1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1, +0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0, +1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,1,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0, +1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,1, +1,1,1,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,0,1,1,0,1,0,1,0,1,1, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,1, +1,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0, +1,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1, +1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,1,1,0,1,0,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,0, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,0,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,0,1, +1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,1,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0, +0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1, +1,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,1,1,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,1,1,0,0,1, +1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,1,0,1, +0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1, +1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0,1, +1,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,0, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1, +0,1,0,1,1,1,0,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,1,1,0, +0,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1, +1,1,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,1,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,1,1,0,0,1, +1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1,1,1,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,0,0,1, +0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0, +1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,1,1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,1, +0,1,1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,1,0,0,0,1, +1,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1, +1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,0,1,0, +0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,1,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,0,1, +1,1,0,1,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,1,1,0,0,1,1,1,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0, +1,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0, +1,0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1, +1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0, +1,1,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,0,0,0,1, +1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1, +1,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0, +1,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0, +1,0,1,1,0,1,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,1,1,1,0,1,0,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0, +1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1, +0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,0,1,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,1,1,1,0, +1,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1, +1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1, +1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0, +1,1,0,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0, +1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,1,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1, +1,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,0,0,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1, +1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,0,0,1,0,0,1,1, +1,1,0,1,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1,1,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,0,1,0,1,0,0,1, +1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0, +1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0, +1,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0, +0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,1,1,1,1,1,0,1, +1,1,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,0, +1,1,1,1,0,1,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,1, +1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,1,0, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1,0,1,1, +0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,0,0,0,1,0,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,0,1, +1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,0,1,1,1,1,0,0,1, +1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,0,1, +0,1,1,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,1,1,0,1,1,0,0, +1,0,0,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,1,1,0, +1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0, +1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,1,1,1,0,0,1, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,1,0, +1,1,1,1,0,0,1,0,1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0, +1,1,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,1,0,1,0,1,1,0, +0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,1,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,0,0, +0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,0,0,0,1,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,1,0, +0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0, +0,1,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1, +1,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0, +1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,1,0,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,1, +1,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0, +1,0,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1, +1,1,1,1,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,1, +1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0, +1,0,1,0,1,0,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1, +1,0,1,1,0,0,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0,0, +1,0,0,0,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0, +0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1, +0,0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0, +1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1,1, +1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0, +1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,0, +1,1,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0, +1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1, +1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,0,0,1,1,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,1,0,1, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,1,1,0, +1,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,1,0,1,1,1, +1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0, +0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,1,1,1,1,0,0,0,0,1,0,0,1,0, +1,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,0,1,0,1,0, +1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,1,1,0,0,1, +1,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,1,1,1,0, +1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,1, +1,1,1,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0, +1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,0,1,0,1,0,0,0, +0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,1, +1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0, +1,0,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,1,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0, +0,1,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0, +1,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1, +0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,1, +1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,0,1,0,0, +1,1,1,1,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0,0,0,1,1,0, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0, +1,1,1,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1, +1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,1,0,0,0,0,1,0,1, +1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,1, +1,0,1,1,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,1,0,1, +1,0,1,0,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1, +0,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,0,0,1,1,0, +1,1,1,1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,0, +1,0,1,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1,0,0,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,1,1,0, +1,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,1, +1,1,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1, +0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,0,1, +1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,1,1,1,1,0,1,1,0,1,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1, +1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1, +0,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1, +1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,1,1,0,0,0,0,0, +1,1,0,0,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,1,1,0,1,0,1,0,1,1,1,0,0,1,1,0,0,1,1,0, +1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,1,0,0,1, +1,0,1,0,0,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,1,1,0,1,0,1, +0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0, +1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,1,1, +1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1, +0,1,1,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,0,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,1,0,0,1, +1,0,1,1,0,1,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,0, +1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1, +1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0, +0,0,1,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0, +1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,1,0,1,1,1,0,1,0, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,1, +1,0,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1, +1,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1, +1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0, +1,1,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1, +0,1,1,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,0,0,0,1,1,1,1, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,1,1,0,1, +1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,1,1,0,0,1,0,0, +1,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,1,0,1,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,1,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1, +1,0,1,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0, +1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0, +1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1, +1,1,0,0,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0, +1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,1,1,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0, +1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1, +1,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1, +1,0,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,1,0,0,0,1,1,0,0,1,1, +0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,0,1,0, +0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,1,0,1, +1,1,0,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,1, +1,0,0,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,1,1,0,1,1,0,1,1,1,0,1,1,0,0,1,1,0, +1,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0,0,1,1,1,0,0,1,1,1,1,1,1,0,1, +1,1,1,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,1,1,1,1,0,1,1,1,0,1,0,0, +1,1,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,1,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1, +1,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,0, +0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0, +0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0, +0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0,0,1,1,0,1,1,0,0,1,0,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,1,0,1, +1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,1,1,0,1, +1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1, +1,1,1,0,0,1,0,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1, +1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,0,0,1,1,0,1,0,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1, +0,1,1,1,0,0,1,1,1,1,0,1,0,1,0,1,1,0,1,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,1,0,1,1,0,0,0, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0,0,0,1, +1,0,1,1,1,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,1,1,1,0,0,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0, +1,0,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,1,1, +1,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1, +1,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,0, +1,0,0,1,0,0,0,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1, +1,0,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1, +1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,1,1,1,0, +1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0, +1,0,0,1,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,0, +1,0,0,0,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0,0, +1,0,0,1,0,0,1,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,1,1,0, +0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,0,1,1,0,0,0,0,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,1, +0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,0, +1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,1,1,0,1,0,1, +1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,1,1,1,1,0,0,1,1,0,1,0,0,0, +1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,1,1, +1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, +1,1,0,1,0,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,1,0, +1,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0, +1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,0,1,0,0,1, +1,0,1,0,0,1,0,1,1,1,0,0,1,1,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0, +1,1,1,0,1,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,1,1,0,1,0,1,1,1,0,1,0,1,0,0, +1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,0,1,0, +1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,1, +1,0,1,1,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1,0, +0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,0, +1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1,0,1, +0,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0, +0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,1, +1,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0, +1,1,1,0,0,0,1,0,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0, +1,1,1,0,0,1,0,1,1,0,0,0,0,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,0,0, +1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1, +1,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1,0,0, +1,0,0,1,0,0,0,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,0,0,0,0,0,1, +1,0,1,0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,1,0, +1,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,1,0, +1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0, +1,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1, +1,1,0,1,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1, +1,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,1,0,0,0,0,0,1, +1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,0,1,1,0,1,1,1, +1,1,0,1,0,1,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0, +1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1, +1,0,0,1,0,0,0,0,1,1,1,0,1,1,1,0,1,1,0,0,1,0,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,0,1,0,1, +0,1,0,1,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,1,1,1,1,1,1,0,0,0,0, +1,1,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1, +1,1,0,1,1,0,1,0,0,0,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,1,1,0,0,1,0,1,0,1,0,1,1, +1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,1, +1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,1, +1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,1, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1, +1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,0,1,0, +1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,1,1,1,0,1,0, +1,1,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,0,1, +1,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,0,1,0,0, +1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,0,1,1,1,0,1,0, +1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1, +0,0,0,1,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0, +1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1, +1,1,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0, +1,0,0,0,1,1,0,0,0,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,0,1,1,0,1,1,1,0,1,1, +1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0, +1,1,1,0,1,0,1,0,0,0,1,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,1,1,0, +0,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0, +1,1,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0, +1,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,0,0,1, +1,1,0,1,0,1,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,0,1,1,1,1,0,0,1,0,1, +1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0, +1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,1,1,1,1,0,1,0,1,0,1,1, +1,1,0,1,1,1,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,0,0, +1,1,1,1,0,1,1,0,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,0,0,1,0,0,1, +1,0,0,1,0,0,1,1,0,0,0,1,1,1,1,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,0,0,0,1,0,0, +0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0, +1,1,0,1,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1, +1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,1,0, +0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,1,1,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,1, +1,1,1,1,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1, +1,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0, +} + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/BN/file_process.py b/workloads/realworld/uvm_prefetch_async/BN/file_process.py new file mode 100644 index 0000000000000000000000000000000000000000..eeebbee70e59153ca0f1a960f4e8ffa0437693c3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/BN/file_process.py @@ -0,0 +1,15 @@ +import os +filename = "data125.cu" +file_write = "data45.cu" + +f_w = open(file_write,"w") +with open(filename) as f: + content = f.readlines() + + for i, line in enumerate(content): + if i < 8: + f_w.write(line) + elif i >= 8: + f_w.write(line[0:90]) + f_w.write("\n") +f_w.close() \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/BN/findcudalib.mk b/workloads/realworld/uvm_prefetch_async/BN/findcudalib.mk new file mode 100644 index 0000000000000000000000000000000000000000..f40c2c38e5510fdee2fdf59df00160b547c056c1 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/BN/findcudalib.mk @@ -0,0 +1,226 @@ +################################################################################ +# +# Copyright 1993-2013 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# findcudalib.mk is used to find the locations for CUDA libraries and other +# Unix Platforms. This is supported Mac OS X and Linux. +# +################################################################################ + +# OS Name (Linux or Darwin) +OSUPPER = $(shell uname -s 2>/dev/null | tr "[:lower:]" "[:upper:]") +OSLOWER = $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]") + +# Flags to detect 32-bit or 64-bit OS platform +OS_SIZE = $(shell uname -m | sed -e "s/i.86/32/" -e "s/x86_64/64/" -e "s/armv7l/32/") +OS_ARCH = $(shell uname -m | sed -e "s/i386/i686/") + +# Determine OS platform and unix distribution +ifeq ("$(OSLOWER)","linux") + # first search lsb_release + DISTRO = $(shell lsb_release -i -s 2>/dev/null | tr "[:upper:]" "[:lower:]") + DISTVER = $(shell lsb_release -r -s 2>/dev/null) + ifeq ("$(DISTRO)",'') + # second search and parse /etc/issue + DISTRO = $(shell more /etc/issue | awk '{print $$1}' | sed '1!d' | sed -e "/^$$/d" 2>/dev/null | tr "[:upper:]" "[:lower:]") + DISTVER= $(shell more /etc/issue | awk '{print $$2}' | sed '1!d' 2>/dev/null + endif + ifeq ("$(DISTRO)",'') + # third, we can search in /etc/os-release or /etc/{distro}-release + DISTRO = $(shell awk '/ID/' /etc/*-release | sed 's/ID=//' | grep -v "VERSION" | grep -v "ID" | grep -v "DISTRIB") + DISTVER= $(shell awk '/DISTRIB_RELEASE/' /etc/*-release | sed 's/DISTRIB_RELEASE=//' | grep -v "DISTRIB_RELEASE") + endif +endif + +# search at Darwin (unix based info) +DARWIN = $(strip $(findstring DARWIN, $(OSUPPER))) +ifneq ($(DARWIN),) + SNOWLEOPARD = $(strip $(findstring 10.6, $(shell egrep "10\.6" /System/Library/CoreServices/SystemVersion.plist))) + LION = $(strip $(findstring 10.7, $(shell egrep "10\.7" /System/Library/CoreServices/SystemVersion.plist))) + MOUNTAIN = $(strip $(findstring 10.8, $(shell egrep "10\.8" /System/Library/CoreServices/SystemVersion.plist))) + MAVERICKS = $(strip $(findstring 10.9, $(shell egrep "10\.9" /System/Library/CoreServices/SystemVersion.plist))) +endif + +# Common binaries +GCC ?= g++ +CLANG ?= /usr/bin/clang + +ifeq ("$(OSUPPER)","LINUX") + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(GCC) +else + # for some newer versions of XCode, CLANG is the default compiler, so we need to include this + ifneq ($(MAVERICKS),) + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(CLANG) + STDLIB ?= -stdlib=libstdc++ + else + NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(GCC) + endif +endif + +# Take command line flags that override any of these settings +ifeq ($(i386),1) + OS_SIZE = 32 + OS_ARCH = i686 +endif +ifeq ($(x86_64),1) + OS_SIZE = 64 + OS_ARCH = x86_64 +endif +ifeq ($(ARMv7),1) + OS_SIZE = 32 + OS_ARCH = armv7l +endif + +ifeq ("$(OSUPPER)","LINUX") + # Each Linux Distribuion has a set of different paths. This applies especially when using the Linux RPM/debian packages + ifeq ("$(DISTRO)","ubuntu") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","kubuntu") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","debian") + CUDAPATH ?= /usr/lib/nvidia-current + CUDALINK ?= -L/usr/lib/nvidia-current + DFLT_PATH = /usr/lib + endif + ifeq ("$(DISTRO)","suse") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","suse linux") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","opensuse") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","fedora") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","redhat") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","red") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + ifeq ("$(DISTRO)","redhatenterpriseworkstation") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH ?= /usr/lib + endif + endif + ifeq ("$(DISTRO)","centos") + ifeq ($(OS_SIZE),64) + CUDAPATH ?= /usr/lib64/nvidia + CUDALINK ?= -L/usr/lib64/nvidia + DFLT_PATH = /usr/lib64 + else + CUDAPATH ?= + CUDALINK ?= + DFLT_PATH = /usr/lib + endif + endif + + ifeq ($(ARMv7),1) + CUDAPATH := /usr/arm-linux-gnueabihf/lib + CUDALINK := -L/usr/arm-linux-gnueabihf/lib + ifneq ($(TARGET_FS),) + CUDAPATH += $(TARGET_FS)/usr/lib/nvidia-current + CUDALINK += -L$(TARGET_FS)/usr/lib/nvidia-current + endif + endif + + # Search for Linux distribution path for libcuda.so + CUDALIB ?= $(shell find $(CUDAPATH) $(DFLT_PATH) -name libcuda.so -print 2>/dev/null) + + ifeq ("$(CUDALIB)",'') + $(info >>> WARNING - CUDA Driver libcuda.so is not found. Please check and re-install the NVIDIA driver. <<<) + EXEC=@echo "[@]" + endif +else + # This would be the Mac OS X path if we had to do anything special +endif + diff --git a/workloads/realworld/uvm_prefetch_async/BN/ordergraph.cu b/workloads/realworld/uvm_prefetch_async/BN/ordergraph.cu new file mode 100644 index 0000000000000000000000000000000000000000..30061ec8f3c3395b03be36e159d455a531d77e86 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/BN/ordergraph.cu @@ -0,0 +1,757 @@ +#include +#include +#include +#include +#include +// #include +// includes CUDA +#include +// includes, kernels +#include "ordergraph_kernel.cu" +; + +#include "../../../common/cpu_timestamps.h" +#include "../../../common/cupti_add.h" + +const int HIGHEST = 3; +int taskperthr = 1; +int sizepernode; +int ITER = 100; + +// global var +float preScore = -99999999999.0f; +float score = 0.0; +float maxScore[HIGHEST] = {-999999999.0f}; +bool orders[NODE_N][NODE_N]; +bool preOrders[NODE_N][NODE_N]; +bool preGraph[NODE_N][NODE_N]; +bool bestGraph[HIGHEST][NODE_N][NODE_N]; +bool graph[NODE_N][NODE_N]; +// float prior[NODE_N][NODE_N]; +float *localscore, *D_localscore, *D_Score, *scores; +float *LG; +bool *D_parent; +int *D_resP, *parents; + +void initial(); // initial orders and data +int genOrders(); // swap +int ConCore(); // discard new order or not +bool getparent(int *bit, int *pre, int posN, int *parent, int *parN, + int time); // get every possible set of parents for a node +void incr(int *bit, int n); // binary code increases 1 each time +void incrS(int *bit, int n); // STATE_N code increases 1 each time +bool getState( + int parN, int *state, + int time); // get every possible combination of state for a parent set +float logGamma(int N); // log and gamma +float findBestGraph(); +void genScore(); +int convert(int *parent, int parN); +void sortGraph(); +void swap(int a, int b); +void Pre_logGamma(); +int findindex(int *arr, int size); +int C(int n, int a); + +FILE *fpout; + +int main(int argc, char *argv[]) { + /* + for(i=0;i maxScore[HIGHEST - 1]) { + maxScore[HIGHEST - 1] = preScore; + for (a = 0; a < NODE_N; a++) { + for (b = 0; b < NODE_N; b++) { + bestGraph[HIGHEST - 1][a][b] = preGraph[a][b]; + } + } + b = HIGHEST - 1; + for (a = HIGHEST - 2; a >= 0; a--) { + if (maxScore[b] > maxScore[a]) { + swap(a, b); + tmpd = maxScore[a]; + maxScore[a] = maxScore[b]; + maxScore[b] = tmpd; + b = a; + } + } + } + } + + } // endwhile + + cudaFreeHost(localscore); + cudaFree(D_localscore); + cudaFree(D_parent); + + cudaFreeHost(scores); + cudaFreeHost(parents); + cudaFree(D_Score); + cudaFree(D_resP); + + /* + for(j=0;j max) { + max = maxScore[i]; + maxi = i; + } + } + + swap(j, maxi); + tmp = maxScore[j]; + maxScore[j] = max; + maxScore[maxi] = tmp; + } +} + +void swap(int a, int b) { + int i, j; + bool tmp; + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + + tmp = bestGraph[a][i][j]; + bestGraph[a][i][j] = bestGraph[b][i][j]; + bestGraph[b][i][j] = tmp; + } + } +} + +void initial() { + int i, j, tmp, a, b, r; + bool tmpd; + tmp = 1; + for (i = 1; i <= 4; i++) { + tmp += C(NODE_N - 1, i); + } + sizepernode = tmp; + tmp *= NODE_N; + + cudaMallocHost((void **)&localscore, tmp * sizeof(float)); + + for (i = 0; i < tmp; i++) + localscore[i] = 0; + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) + orders[i][j] = 0; + } + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < i; j++) + orders[i][j] = 1; + } + r = rand() % 10000; + for (i = 0; i < r; i++) { + a = rand() % NODE_N; + b = rand() % NODE_N; + for (j = 0; j < NODE_N; j++) { + tmpd = orders[j][a]; + orders[j][a] = orders[j][b]; + orders[j][b] = tmpd; + } + + for (j = 0; j < NODE_N; j++) { + tmpd = orders[a][j]; + orders[a][j] = orders[b][j]; + orders[b][j] = tmpd; + } + } + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + preOrders[i][j] = orders[i][j]; + } + } +} + +// generate ramdom order +int genOrders() { + + int a, b, j; + bool tmp; + a = rand() % NODE_N; + b = rand() % NODE_N; + + for (j = 0; j < NODE_N; j++) { + tmp = orders[a][j]; + orders[a][j] = orders[b][j]; + orders[b][j] = tmp; + } + for (j = 0; j < NODE_N; j++) { + tmp = orders[j][a]; + orders[j][a] = orders[j][b]; + orders[j][b] = tmp; + } + + return 1; +} + +// decide leave or discard an order +int ConCore() { + int i, j; + float tmp; + tmp = log((rand() % 100000) / 100000.0); + if (tmp < (score - preScore)) { + + for (i = 0; i < NODE_N; i++) { + for (j = 0; j < NODE_N; j++) { + preOrders[i][j] = orders[i][j]; + preGraph[i][j] = graph[i][j]; + } + } + preScore = score; + + return 1; + } + + return 0; +} + +void genScore() { + int *D_data; + float *D_LG; + dim3 grid(sizepernode / 256 + 1, 1, 1); + dim3 threads(256, 1, 1); + + Pre_logGamma(); + // cudaPrintfInit(); + cudaMallocManaged((void **)&D_data, NODE_N * DATA_N * sizeof(int)); + cudaMallocManaged((void **)&D_localscore, NODE_N * sizepernode * sizeof(float)); + cudaMallocManaged((void **)&D_LG, (DATA_N + 2) * sizeof(float)); + cudaMemset(D_localscore, 0.0, NODE_N * sizepernode * sizeof(float)); + memcpy(D_data, data, NODE_N * DATA_N * sizeof(int)); + memcpy(D_LG, LG, (DATA_N + 2) * sizeof(float)); + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + cudaMemPrefetchAsync(D_data, NODE_N * DATA_N * sizeof(int), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(D_LG, (DATA_N + 2) * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(D_localscore,NODE_N * sizepernode * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + genScoreKernel<<>>(sizepernode, D_localscore, D_data, D_LG); + cudaDeviceSynchronize(); + + memcpy(localscore, D_localscore, NODE_N * sizepernode * sizeof(float)); + + // cudaPrintfDisplay(stdout, true); + // cudaPrintfEnd(); + + cudaFreeHost(LG); + cudaFree(D_LG); + cudaFree(D_data); + + cudaMallocHost((void **)&scores, + (sizepernode / (256 * taskperthr) + 1) * sizeof(float)); + cudaMallocHost((void **)&parents, + (sizepernode / (256 * taskperthr) + 1) * 4 * sizeof(int)); + cudaMallocManaged((void **)&D_Score, + (sizepernode / (256 * taskperthr) + 1) * sizeof(float)); + cudaMallocManaged((void **)&D_parent, NODE_N * sizeof(bool)); + cudaMallocManaged((void **)&D_resP, + (sizepernode / (256 * taskperthr) + 1) * 4 * sizeof(int)); +} + +int convert(int *parent, int parN) { + int i, j, w = 1, tmp = 0; + j = 0; + for (i = 0; parN > 0 && i <= parent[parN - 1]; i++) { + if (parent[j] == i) { + j++; + tmp += w; + } + w *= 2; + } + + return tmp; +} + +void Pre_logGamma() { + + cudaMallocHost((void **)&LG, (DATA_N + 2) * sizeof(float)); + + LG[1] = log(1.0); + float i; + for (i = 2; i <= DATA_N + 1; i++) { + LG[(int)i] = LG[(int)i - 1] + log((float)i); + } +} + +void incr(int *bit, int n) { + + bit[n]++; + if (bit[n] >= 2) { + bit[n] = 0; + incr(bit, n + 1); + } + + return; +} + +void incrS(int *bit, int n) { + + bit[n]++; + if (bit[n] >= STATE_N) { + bit[n] = 0; + incr(bit, n + 1); + } + + return; +} + +bool getState(int parN, int *state, int time) { + int j = 1; + + j = pow(STATE_N, (float)parN) - 1; + + if (time > j) + return false; + + if (time >= 1) + incrS(state, 0); + + return true; +} + +bool getparent(int *bit, int *pre, int posN, int *parent, int *parN, int time) { + int i, j = 1; + + *parN = 0; + if (time == 0) + return true; + + for (i = 0; i < posN; i++) { + j = j * 2; + } + j--; + + if (time > j) + return false; + + incr(bit, 0); + + for (i = 0; i < posN; i++) { + if (bit[i] == 1) { + parent[(*parN)++] = pre[i]; + } + } + + return true; +} + +float findBestGraph() { + float bestls = -99999999; + int bestparent[5]; + int bestpN, total; + int node, index; + int pre[NODE_N] = {0}; + int parent[NODE_N] = {0}; + int posN = 0, i, j, parN, tmp, k, l; + float ls = -99999999999, score = 0; + int blocknum; + + for (i = 0; i < NODE_N; i++) + for (j = 0; j < NODE_N; j++) + graph[i][j] = 0; + + for (node = 0; node < NODE_N; node++) { + + bestls = -99999999; + posN = 0; + + for (i = 0; i < NODE_N; i++) { + if (orders[node][i] == 1) { + pre[posN++] = i; + } + } + + if (posN >= 0) { + total = C(posN, 4) + C(posN, 3) + C(posN, 2) + posN + 1; + taskperthr = 1; + blocknum = total / (256 * taskperthr) + 1; + + int nbatches = MIN_NBATCHES; + + int blocknum_max = total / (BLOCK_SIZE * MIN_NBATCHES * taskperthr) + 1; + if (blocknum_max >= MAX_NBLOCKS) { + blocknum = MAX_NBLOCKS; + nbatches = (total + 1) / (BLOCK_SIZE * MAX_NBLOCKS * taskperthr); + } else { + blocknum = blocknum_max; + } + + cudaMemset(D_resP, 0, blocknum * 4 * sizeof(int)); + cudaMemset(D_Score, -999999.0, blocknum * nbatches * sizeof(float)); + memcpy(D_parent, orders[node], NODE_N * sizeof(bool)); + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + cudaMemPrefetchAsync(D_parent, NODE_N * sizeof(bool), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(D_resP, blocknum * 4 * sizeof(int), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(D_Score, blocknum * nbatches * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + computeKernel<<>>( + taskperthr, sizepernode, D_localscore, D_parent, node, total, D_Score, + D_resP, nbatches); + cudaDeviceSynchronize(); + + memcpy(parents, D_resP, blocknum * 4 * sizeof(int)); + memcpy(scores, D_Score, blocknum * sizeof(float)); + + for (i = 0; i < blocknum * nbatches; i++) { + + if (scores[i] > bestls) { + + bestls = scores[i]; + + parN = 0; + for (tmp = 0; tmp < 4; tmp++) { + if (parents[i * 4 + tmp] < 0) + break; + + bestparent[tmp] = parents[i * 4 + tmp]; + + parN++; + } + + bestpN = parN; + } + } + } else { + if (posN >= 4) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + for (k = j + 1; k < posN; k++) { + for (l = k + 1; l < posN; l++) { + parN = 4; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + if (pre[k] > node) + parent[3] = pre[k]; + else + parent[3] = pre[k] + 1; + if (pre[l] > node) + parent[4] = pre[l]; + else + parent[4] = pre[l] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + } + } + + if (posN >= 3) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + for (k = j + 1; k < posN; k++) { + + parN = 3; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + if (pre[k] > node) + parent[3] = pre[k]; + else + parent[3] = pre[k] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + } + + if (posN >= 2) { + for (i = 0; i < posN; i++) { + for (j = i + 1; j < posN; j++) { + + parN = 2; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + if (pre[j] > node) + parent[2] = pre[j]; + else + parent[2] = pre[j] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + } + + if (posN >= 1) { + for (i = 0; i < posN; i++) { + + parN = 1; + if (pre[i] > node) + parent[1] = pre[i]; + else + parent[1] = pre[i] + 1; + + index = findindex(parent, parN); + index += sizepernode * node; + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = parN; + for (tmp = 0; tmp < parN; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + } + + parN = 0; + index = sizepernode * node; + + ls = localscore[index]; + + if (ls > bestls) { + bestls = ls; + bestpN = 0; + } + } + if (bestls > -99999999) { + + for (i = 0; i < bestpN; i++) { + if (bestparent[i] < node) + graph[node][bestparent[i] - 1] = 1; + else + graph[node][bestparent[i]] = 1; + } + score += bestls; + } + } + + return score; +} + +int findindex(int *arr, int size) { // reminder: arr[0] has to be 0 && size == + // array size-1 && index start from 0 + int i, j, index = 0; + + for (i = 1; i < size; i++) { + index += C(NODE_N - 1, i); + } + + for (i = 1; i <= size - 1; i++) { + for (j = arr[i - 1] + 1; j <= arr[i] - 1; j++) { + index += C(NODE_N - 1 - j, size - i); + } + } + + index += arr[size] - arr[size - 1]; + + return index; +} + +int C(int n, int a) { + int i, res = 1, atmp = a; + + for (i = 0; i < atmp; i++) { + res *= n; + n--; + } + + for (i = 0; i < atmp; i++) { + res /= a; + a--; + } + + return res; +} \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/BN/ordergraph_kernel.cu b/workloads/realworld/uvm_prefetch_async/BN/ordergraph_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..f6cbe5f9465edf7aa0f1d3f3a04bddc4d4f72def --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/BN/ordergraph_kernel.cu @@ -0,0 +1,352 @@ +#ifndef _ORDERGRAPH_KERNEL_H_ +#define _ORDERGRAPH_KERNEL_H_ + +#include + +#ifdef DATA_25 +#include "data25.cu" +#endif +#ifdef DATA_30 +#include "data30.cu" +#endif +#ifdef DATA_40 +#include "data40.cu" +#endif +#ifdef DATA_45 +#include "data45.cu" +#endif +#ifdef DATA_50 +#include "data50.cu" +#endif +#ifdef DATA_125 +#include "data125.cu" +#endif +; + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define BLOCK_SIZE 256 +#define MAX_NBLOCKS 1024 +#define MIN_NBATCHES 16 + + +__device__ void Dincr(int *bit, int n); +__device__ void DincrS(int *bit, int n); +__device__ bool D_getState(int parN, int *sta, int time); +__device__ void D_findComb(int *comb, int l, int n); +__device__ int D_findindex(int *arr, int size); +__device__ int D_C(int n, int a); + +__global__ void genScoreKernel(int sizepernode, float *D_localscore, + int *D_data, float *D_LG) { + int id = blockIdx.x * BLOCK_SIZE + threadIdx.x; + int node, index; + bool flag; + int parent[5] = {0}; + int pre[NODE_N] = {0}; + int state[5] = {0}; + int i, j, parN = 0, tmp, t; + int t1 = 0, t2 = 0; + float ls = 0; + int Nij[STATE_N] = {0}; + + if (id < sizepernode) { + + D_findComb(parent, id, NODE_N - 1); + + for (i = 0; i < 4; i++) { + if (parent[i] > 0) + parN++; + } + + for (node = 0; node < NODE_N; node++) { + + j = 1; + for (i = 0; i < NODE_N; i++) { + if (i != node) + pre[j++] = i; + } + + for (tmp = 0; tmp < parN; tmp++) + state[tmp] = 0; + + index = sizepernode * node + id; + + // priors + /* + for(tmp=1;tmp<=4;tmp++){ + localscore[index]+=100*(prior[node][pre[parent[tmp]]]-0.5)*(prior[node][pre[parent[tmp]]]-0.5)*(prior[node][pre[parent[tmp]]]-0.5); + } + */ + t = 0; + while (D_getState(parN, state, t++)) { // for get state + // printf("test %u\n",id); + ls = 0; + for (tmp = 0; tmp < STATE_N; tmp++) + Nij[tmp] = 0; + + for (t1 = 0; t1 < DATA_N; t1++) { + flag = true; + for (t2 = 0; t2 < parN; t2++) { + if (D_data[t1 * NODE_N + pre[parent[t2]]] != state[t2]) { + flag = false; + break; + } + } + if (!flag) + continue; + + Nij[D_data[t1 * NODE_N + node]]++; + } + + tmp = STATE_N - 1; + + for (t1 = 0; t1 < STATE_N; t1++) { + ls += D_LG[Nij[t1]]; + tmp += Nij[t1]; + } + + ls -= D_LG[tmp]; + ls += D_LG[STATE_N - 1]; + + D_localscore[index] += ls; + } + } + } +} + +__global__ void computeKernel(int taskperthr, int sizepernode, + float *D_localscore, bool *D_parent, int node, + int total, float *D_Score, int *D_resP, + int nbatches) { + cooperative_groups::thread_block block = + cooperative_groups::this_thread_block(); + pipeline pipe; + __shared__ float lsinblock[PREFETCH_COUNT][BLOCK_SIZE]; + + int fetch = 0; + int end_tile = fetch + nbatches; + int bestparent[4] = {0}, parent[5] = {-1}; + + for (int compute = fetch; compute < end_tile; compute++) { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) { + unsigned int bid = blockIdx.x * nbatches + fetch; + unsigned int tid = threadIdx.x; + unsigned int id = bid * (BLOCK_SIZE * nbatches) + tid; + + int posN = 1, i, index, tmp; + int pre[NODE_N] = {0}; + int parN = 0; + + float bestls = -999999999999999, ls; + + for (i = 0; i < NODE_N; i++) { + if (D_parent[i] == 1) { + pre[posN++] = i; + } + } + + for (i = 0; i < taskperthr && ((id * taskperthr + i) < total); i++) { + + D_findComb(parent, id * taskperthr + i, posN); + + for (parN = 0; parN < 4; parN++) { + if (parent[parN] < 0) + break; + if (pre[parent[parN]] > node) + parent[parN] = pre[parent[parN]]; + else + parent[parN] = pre[parent[parN]] + 1; + } + + for (tmp = parN; tmp > 0; tmp--) { + parent[tmp] = parent[tmp - 1]; + } + parent[0] = 0; + + index = D_findindex(parent, parN); + index += sizepernode * node; + + ls = D_localscore[index]; + + if (ls > bestls) { + bestls = ls; + for (tmp = 0; tmp < 4; tmp++) + bestparent[tmp] = parent[tmp + 1]; + } + } + + memcpy_async(lsinblock[fetch % PREFETCH_COUNT][tid], bestls, pipe); + pipe.commit(); + } + if (fetch == end_tile) { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + int i, t; + unsigned int bid = blockIdx.x * nbatches + compute; + unsigned int tid = threadIdx.x; + // unsigned int id = bid * (BLOCK_SIZE * nbatches) + tid; + + for (i = BLOCK_SIZE / 2; i >= 1; i /= 2) { + if (tid < i) { + if (lsinblock[compute % PREFETCH_COUNT][tid + i] > + lsinblock[compute % PREFETCH_COUNT][tid] && + lsinblock[compute % PREFETCH_COUNT][tid + i] < 0) { + lsinblock[compute % PREFETCH_COUNT][tid] = + lsinblock[compute % PREFETCH_COUNT][tid + i]; + lsinblock[compute % PREFETCH_COUNT][tid + i] = (float)(tid + i); + } else if (lsinblock[compute % PREFETCH_COUNT][tid + i] < + lsinblock[compute % PREFETCH_COUNT][tid] && + lsinblock[compute % PREFETCH_COUNT][tid] < 0) { + lsinblock[compute % PREFETCH_COUNT][tid + i] = (float)tid; + } else if (lsinblock[tid] > 0 && lsinblock[tid + i] < 0) { + lsinblock[compute % PREFETCH_COUNT][tid] = + lsinblock[compute % PREFETCH_COUNT][tid + i]; + lsinblock[compute % PREFETCH_COUNT][tid + i] = (float)(tid + i); + } else if (lsinblock[compute % PREFETCH_COUNT][tid] < 0 && + lsinblock[compute % PREFETCH_COUNT][tid + i] > 0) { + lsinblock[compute % PREFETCH_COUNT][tid + i] = (float)tid; + } + } + block.sync(); + } + block.sync(); + + if (tid == 0) { + D_Score[bid] = lsinblock[compute % PREFETCH_COUNT][0]; + t = 0; + for (i = 0; i < 7 && t < 128 && t >= 0; i++) { + t = (int)lsinblock[compute % PREFETCH_COUNT][(int)powf(2.0, i) + t]; + } + lsinblock[compute % PREFETCH_COUNT][0] = (float)t; + } + block.sync(); + + if (tid == (int)lsinblock[compute % PREFETCH_COUNT][0]) { + for (i = 0; i < 4; i++) { + D_resP[bid * 4 + i] = bestparent[i]; + } + } + } +} + +__device__ void Dincr(int *bit, int n) { + + while (n <= NODE_N) { + bit[n]++; + if (bit[n] >= 2) { + bit[n] = 0; + n++; + } else { + break; + } + } + + return; +} + +__device__ void DincrS(int *bit, int n) { + + bit[n]++; + if (bit[n] >= STATE_N) { + bit[n] = 0; + Dincr(bit, n + 1); + } + + return; +} + +__device__ bool D_getState(int parN, int *sta, int time) { + int i, j = 1; + + for (i = 0; i < parN; i++) { + j *= STATE_N; + } + j--; + if (time > j) + return false; + + if (time >= 1) + DincrS(sta, 0); + + return true; +} + +__device__ void D_findComb(int *comb, int l, int n) { + const int len = 4; + if (l == 0) { + for (int i = 0; i < len; i++) + comb[i] = -1; + return; + } + int sum = 0; + int k = 1; + + while (sum < l) + sum += D_C(n, k++); + l -= sum - D_C(n, --k); + int low = 0; + int pos = 0; + while (k > 1) { + sum = 0; + int s = 1; + while (sum < l) + sum += D_C(n - s++, k - 1); + l -= sum - D_C(n - (--s), --k); + low += s; + comb[pos++] = low; + n -= s; + } + comb[pos] = low + l; + for (int i = pos + 1; i < 4; i++) + comb[i] = -1; +} + +__device__ int D_findindex(int *arr, + int size) { // reminder: arr[0] has to be 0 && size + // == array size-1 && index start from 0 + int i, j, index = 0; + + for (i = 1; i < size; i++) { + index += D_C(NODE_N - 1, i); + } + + for (i = 1; i <= size - 1; i++) { + for (j = arr[i - 1] + 1; j <= arr[i] - 1; j++) { + index += D_C(NODE_N - 1 - j, size - i); + } + } + + index += arr[size] - arr[size - 1]; + + return index; +} + +__device__ int D_C(int n, int a) { + int i, res = 1, atmp = a; + + for (i = 0; i < atmp; i++) { + res *= n; + n--; + } + + for (i = 0; i < atmp; i++) { + res /= a; + a--; + } + + return res; +} + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/BN/run.sh b/workloads/realworld/uvm_prefetch_async/BN/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..f87817975334cff247b1bdf91651d41062aa8320 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/BN/run.sh @@ -0,0 +1,5 @@ +# ./ordergraph_25 +# ./ordergraph_30 +# ./ordergraph_40 +# ./ordergraph_45 +./ordergraph_50 \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/BN/run_super.sh b/workloads/realworld/uvm_prefetch_async/BN/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..5c45d88db0716b0b4b0828ba397cbd918d1612c0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/BN/run_super.sh @@ -0,0 +1 @@ +./ordergraph_50 diff --git a/workloads/realworld/uvm_prefetch_async/backprop/Makefile b/workloads/realworld/uvm_prefetch_async/backprop/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..286cd40af79bbb80b6d86aad9bd0d2c0d1a846e0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/backprop/Makefile @@ -0,0 +1,47 @@ +include ../../../common/make.config + +# C compiler +CC = gcc +CC_FLAGS = -g -O2 + +# CUDA compiler +NVCC = $(CUDA_DIR)/bin/nvcc +NVCC_FLAGS = -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -arch=sm_80 + +# 'make dbg=1' enables NVCC debugging +ifeq ($(dbg),1) + NVCC_FLAGS += -g -O0 +else + NVCC_FLAGS += -O2 +endif + +# 'make emu=1' compiles the CUDA kernels for emulation +ifeq ($(emu),1) + NVCC_FLAGS += -deviceemu +endif + + +backprop: backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + $(NVCC) $(NVCC_FLAGS) backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -o backprop $(NVCC_FLAGS) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +# backprop: backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp +# $(CC) $(CC_FLAGS) backprop.o facetrain.o imagenet.o backprop_cuda.o $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -o backprop -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +%.o: %.[ch] + $(CC) $(CC_FLAGS) $< -c + +facetrain.o: facetrain.c backprop.h + $(CC) $(CC_FLAGS) facetrain.c -c + +backprop.o: backprop.c backprop.h + $(CC) $(CC_FLAGS) backprop.c -c + +backprop_cuda.o: backprop_cuda.cu backprop.h $(CUPTI_ADD_COMMON)/cupti_add.h $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + $(NVCC) $(NVCC_FLAGS) -c backprop_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +imagenet.o: imagenet.c backprop.h + $(CC) $(CC_FLAGS) imagenet.c -c + + +clean: + rm -f *.o *~ backprop backprop_cuda.linkinfo diff --git a/workloads/realworld/uvm_prefetch_async/backprop/backprop.c b/workloads/realworld/uvm_prefetch_async/backprop/backprop.c new file mode 100644 index 0000000000000000000000000000000000000000..3a38f012b785f8cbaec7f9c33e9ae58b9ee92ae5 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/backprop/backprop.c @@ -0,0 +1,502 @@ +/* + ****************************************************************** + * HISTORY + * 15-Oct-94 Jeff Shufelt (js), Carnegie Mellon University + * Prepared for 15-681, Fall 1994. + * Modified by Shuai Che + ****************************************************************** + */ +#include +#include +#include +#include +#include "backprop.h" +#include +//#define OPEN + +#define ABS(x) (((x) > 0.0) ? (x) : (-(x))) + +#define fastcopy(to,from,len)\ +{\ + register char *_to,*_from;\ + register int _i,_l;\ + _to = (char *)(to);\ + _from = (char *)(from);\ + _l = (len);\ + for (_i = 0; _i < _l; _i++) *_to++ = *_from++;\ +} + +/*** Return random number between 0.0 and 1.0 ***/ +float drnd() +{ + return ((float) rand() / (float) BIGRND); +} + +/*** Return random number between -1.0 and 1.0 ***/ +float dpn1() +{ + return ((drnd() * 2.0) - 1.0); +} + +/*** The squashing function. Currently, it's a sigmoid. ***/ + +float squash(x) +float x; +{ + float m; + //x = -x; + //m = 1 + x + x*x/2 + x*x*x/6 + x*x*x*x/24 + x*x*x*x*x/120; + //return(1.0 / (1.0 + m)); + return (1.0 / (1.0 + exp(-x))); +} + + +/*** Allocate 1d array of floats ***/ + +float *alloc_1d_dbl(n) +int n; +{ + float *new; + + new = (float *) malloc ((unsigned) (n * sizeof (float))); + if (new == NULL) { + printf("ALLOC_1D_DBL: Couldn't allocate array of floats\n"); + return (NULL); + } + return (new); +} + + +/*** Allocate 2d array of floats ***/ + +float **alloc_2d_dbl(m, n) +int m, n; +{ + int i; + float **new; + + new = (float **) malloc ((unsigned) (m * sizeof (float *))); + if (new == NULL) { + printf("ALLOC_2D_DBL: Couldn't allocate array of dbl ptrs\n"); + return (NULL); + } + + for (i = 0; i < m; i++) { + new[i] = alloc_1d_dbl(n); + } + + return (new); +} + + +bpnn_randomize_weights(w, m, n) +float **w; +int m, n; +{ + int i, j; + + for (i = 0; i <= m; i++) { + for (j = 0; j <= n; j++) { + w[i][j] = (float) rand()/RAND_MAX; + // w[i][j] = dpn1(); + } + } +} + +bpnn_randomize_row(w, m) +float *w; +int m; +{ + int i; + for (i = 0; i <= m; i++) { + //w[i] = (float) rand()/RAND_MAX; + w[i] = 0.1; + } +} + + +bpnn_zero_weights(w, m, n) +float **w; +int m, n; +{ + int i, j; + + for (i = 0; i <= m; i++) { + for (j = 0; j <= n; j++) { + w[i][j] = 0.0; + } + } +} + + +void bpnn_initialize(seed) +{ + printf("Random number generator seed: %d\n", seed); + srand(seed); +} + + +BPNN *bpnn_internal_create(n_in, n_hidden, n_out) +int n_in, n_hidden, n_out; +{ + BPNN *newnet; + + newnet = (BPNN *) malloc (sizeof (BPNN)); + if (newnet == NULL) { + printf("BPNN_CREATE: Couldn't allocate neural network\n"); + return (NULL); + } + + newnet->input_n = n_in; + newnet->hidden_n = n_hidden; + newnet->output_n = n_out; + newnet->input_units = alloc_1d_dbl(n_in + 1); + newnet->hidden_units = alloc_1d_dbl(n_hidden + 1); + newnet->output_units = alloc_1d_dbl(n_out + 1); + + newnet->hidden_delta = alloc_1d_dbl(n_hidden + 1); + newnet->output_delta = alloc_1d_dbl(n_out + 1); + newnet->target = alloc_1d_dbl(n_out + 1); + + newnet->input_weights = alloc_2d_dbl(n_in + 1, n_hidden + 1); + newnet->hidden_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); + + newnet->input_prev_weights = alloc_2d_dbl(n_in + 1, n_hidden + 1); + newnet->hidden_prev_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); + + return (newnet); +} + + +void bpnn_free(net) +BPNN *net; +{ + int n1, n2, i; + + n1 = net->input_n; + n2 = net->hidden_n; + + free((char *) net->input_units); + free((char *) net->hidden_units); + free((char *) net->output_units); + + free((char *) net->hidden_delta); + free((char *) net->output_delta); + free((char *) net->target); + + for (i = 0; i <= n1; i++) { + free((char *) net->input_weights[i]); + free((char *) net->input_prev_weights[i]); + } + free((char *) net->input_weights); + free((char *) net->input_prev_weights); + + for (i = 0; i <= n2; i++) { + free((char *) net->hidden_weights[i]); + free((char *) net->hidden_prev_weights[i]); + } + free((char *) net->hidden_weights); + free((char *) net->hidden_prev_weights); + + free((char *) net); +} + + +/*** Creates a new fully-connected network from scratch, + with the given numbers of input, hidden, and output units. + Threshold units are automatically included. All weights are + randomly initialized. + + Space is also allocated for temporary storage (momentum weights, + error computations, etc). +***/ + +BPNN *bpnn_create(n_in, n_hidden, n_out) +int n_in, n_hidden, n_out; +{ + + BPNN *newnet; + + newnet = bpnn_internal_create(n_in, n_hidden, n_out); + +#ifdef INITZERO + bpnn_zero_weights(newnet->input_weights, n_in, n_hidden); +#else + bpnn_randomize_weights(newnet->input_weights, n_in, n_hidden); +#endif + bpnn_randomize_weights(newnet->hidden_weights, n_hidden, n_out); + bpnn_zero_weights(newnet->input_prev_weights, n_in, n_hidden); + bpnn_zero_weights(newnet->hidden_prev_weights, n_hidden, n_out); + bpnn_randomize_row(newnet->target, n_out); + return (newnet); +} + + +void bpnn_layerforward(l1, l2, conn, n1, n2) +float *l1, *l2, **conn; +int n1, n2; +{ + float sum; + int j, k; + + /*** Set up thresholding unit ***/ + l1[0] = 1.0; +#ifdef OPEN + omp_set_num_threads(NUM_THREAD); + #pragma omp parallel for shared(conn, n1, n2, l1) private(k, j) reduction(+: sum) schedule(static) +#endif + /*** For each unit in second layer ***/ + for (j = 1; j <= n2; j++) { + + /*** Compute weighted sum of its inputs ***/ + sum = 0.0; + for (k = 0; k <= n1; k++) { + sum += conn[k][j] * l1[k]; + } + l2[j] = squash(sum); + } +} + +//extern "C" +void bpnn_output_error(delta, target, output, nj, err) +float *delta, *target, *output, *err; +int nj; +{ + int j; + float o, t, errsum; + errsum = 0.0; + for (j = 1; j <= nj; j++) { + o = output[j]; + t = target[j]; + delta[j] = o * (1.0 - o) * (t - o); + errsum += ABS(delta[j]); + } + *err = errsum; +} + + +void bpnn_hidden_error(delta_h, + nh, + delta_o, + no, + who, + hidden, + err) +float *delta_h, *delta_o, *hidden, **who, *err; +int nh, no; +{ + int j, k; + float h, sum, errsum; + + errsum = 0.0; + for (j = 1; j <= nh; j++) { + h = hidden[j]; + sum = 0.0; + for (k = 1; k <= no; k++) { + sum += delta_o[k] * who[j][k]; + } + delta_h[j] = h * (1.0 - h) * sum; + errsum += ABS(delta_h[j]); + } + *err = errsum; +} + + +void bpnn_adjust_weights(delta, ndelta, ly, nly, w, oldw) +float *delta, *ly, **w, **oldw; +{ + float new_dw; + int k, j; + ly[0] = 1.0; + //eta = 0.3; + //momentum = 0.3; + +#ifdef OPEN + omp_set_num_threads(NUM_THREAD); + #pragma omp parallel for \ + shared(oldw, w, delta) \ + private(j, k, new_dw) \ + firstprivate(ndelta, nly, momentum) +#endif + for (j = 1; j <= ndelta; j++) { + for (k = 0; k <= nly; k++) { + new_dw = ((ETA * delta[j] * ly[k]) + (MOMENTUM * oldw[k][j])); + w[k][j] += new_dw; + oldw[k][j] = new_dw; + } + } +} + + +void bpnn_feedforward(net) +BPNN *net; +{ + int in, hid, out; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + + /*** Feed forward input activations. ***/ + bpnn_layerforward(net->input_units, net->hidden_units, + net->input_weights, in, hid); + bpnn_layerforward(net->hidden_units, net->output_units, + net->hidden_weights, hid, out); + +} + + +void bpnn_train(net, eo, eh) +BPNN *net; +float *eo, *eh; +{ + int in, hid, out; + float out_err, hid_err; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + + /*** Feed forward input activations. ***/ + bpnn_layerforward(net->input_units, net->hidden_units, + net->input_weights, in, hid); + bpnn_layerforward(net->hidden_units, net->output_units, + net->hidden_weights, hid, out); + + /*** Compute error on output and hidden units. ***/ + bpnn_output_error(net->output_delta, net->target, net->output_units, + out, &out_err); + bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, + net->hidden_weights, net->hidden_units, &hid_err); + *eo = out_err; + *eh = hid_err; + + /*** Adjust input and hidden weights. ***/ + bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, + net->hidden_weights, net->hidden_prev_weights); + bpnn_adjust_weights(net->hidden_delta, hid, net->input_units, in, + net->input_weights, net->input_prev_weights); + +} + + + + +void bpnn_save(net, filename) +BPNN *net; +char *filename; +{ + int n1, n2, n3, i, j, memcnt; + float dvalue, **w; + char *mem; + ///add// + FILE *pFile; + pFile = fopen( filename, "w+" ); + /////// + /* + if ((fd = creat(filename, 0644)) == -1) { + printf("BPNN_SAVE: Cannot create '%s'\n", filename); + return; + } + */ + + n1 = net->input_n; n2 = net->hidden_n; n3 = net->output_n; + printf("Saving %dx%dx%d network to '%s'\n", n1, n2, n3, filename); + //fflush(stdout); + + //write(fd, (char *) &n1, sizeof(int)); + //write(fd, (char *) &n2, sizeof(int)); + //write(fd, (char *) &n3, sizeof(int)); + + fwrite( (char *) &n1 , sizeof(char), sizeof(char), pFile); + fwrite( (char *) &n2 , sizeof(char), sizeof(char), pFile); + fwrite( (char *) &n3 , sizeof(char), sizeof(char), pFile); + + + + memcnt = 0; + w = net->input_weights; + mem = (char *) malloc ((unsigned) ((n1+1) * (n2+1) * sizeof(float))); + for (i = 0; i <= n1; i++) { + for (j = 0; j <= n2; j++) { + dvalue = w[i][j]; + fastcopy(&mem[memcnt], &dvalue, sizeof(float)); + memcnt += sizeof(float); + } + } + //write(fd, mem, (n1+1) * (n2+1) * sizeof(float)); + fwrite( mem , (unsigned)(sizeof(float)), (unsigned) ((n1+1) * (n2+1) * sizeof(float)) , pFile); + free(mem); + + memcnt = 0; + w = net->hidden_weights; + mem = (char *) malloc ((unsigned) ((n2+1) * (n3+1) * sizeof(float))); + for (i = 0; i <= n2; i++) { + for (j = 0; j <= n3; j++) { + dvalue = w[i][j]; + fastcopy(&mem[memcnt], &dvalue, sizeof(float)); + memcnt += sizeof(float); + } + } + //write(fd, mem, (n2+1) * (n3+1) * sizeof(float)); + fwrite( mem , sizeof(float), (unsigned) ((n2+1) * (n3+1) * sizeof(float)) , pFile); + free(mem); + + fclose(pFile); + return; +} + + +BPNN *bpnn_read(filename) +char *filename; +{ + char *mem; + BPNN *new; + int fd, n1, n2, n3, i, j, memcnt; + + if ((fd = open(filename, 0, 0644)) == -1) { + return (NULL); + } + + printf("Reading '%s'\n", filename); //fflush(stdout); + + read(fd, (char *) &n1, sizeof(int)); + read(fd, (char *) &n2, sizeof(int)); + read(fd, (char *) &n3, sizeof(int)); + new = bpnn_internal_create(n1, n2, n3); + + printf("'%s' contains a %dx%dx%d network\n", filename, n1, n2, n3); + printf("Reading input weights..."); //fflush(stdout); + + memcnt = 0; + mem = (char *) malloc ((unsigned) ((n1+1) * (n2+1) * sizeof(float))); + read(fd, mem, (n1+1) * (n2+1) * sizeof(float)); + for (i = 0; i <= n1; i++) { + for (j = 0; j <= n2; j++) { + fastcopy(&(new->input_weights[i][j]), &mem[memcnt], sizeof(float)); + memcnt += sizeof(float); + } + } + free(mem); + + printf("Done\nReading hidden weights..."); //fflush(stdout); + + memcnt = 0; + mem = (char *) malloc ((unsigned) ((n2+1) * (n3+1) * sizeof(float))); + read(fd, mem, (n2+1) * (n3+1) * sizeof(float)); + for (i = 0; i <= n2; i++) { + for (j = 0; j <= n3; j++) { + fastcopy(&(new->hidden_weights[i][j]), &mem[memcnt], sizeof(float)); + memcnt += sizeof(float); + } + } + free(mem); + close(fd); + + printf("Done\n"); //fflush(stdout); + + bpnn_zero_weights(new->input_prev_weights, n1, n2); + bpnn_zero_weights(new->hidden_prev_weights, n2, n3); + + return (new); +} diff --git a/workloads/realworld/uvm_prefetch_async/backprop/backprop.h b/workloads/realworld/uvm_prefetch_async/backprop/backprop.h new file mode 100644 index 0000000000000000000000000000000000000000..dfaafe39b76a1c9c1455e38bbe0a14d5461d5d2b --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/backprop/backprop.h @@ -0,0 +1,53 @@ +#ifndef _BACKPROP_H_ +#define _BACKPROP_H_ + +#define BIGRND 0x7fffffff + +#define GPU +#define THREADS 256 +#define WIDTH 16 // shared memory width +#define HEIGHT 16 // shared memory height + +#define ETA 0.3 //eta value +#define MOMENTUM 0.3 //momentum value +#define NUM_THREAD 4 //OpenMP threads + + +typedef struct { + int input_n; /* number of input units */ + int hidden_n; /* number of hidden units */ + int output_n; /* number of output units */ + + float *input_units; /* the input units */ + float *hidden_units; /* the hidden units */ + float *output_units; /* the output units */ + + float *hidden_delta; /* storage for hidden unit error */ + float *output_delta; /* storage for output unit error */ + + float *target; /* storage for target vector */ + + float **input_weights; /* weights from input to hidden layer */ + float **hidden_weights; /* weights from hidden to output layer */ + + /*** The next two are for momentum ***/ + float **input_prev_weights; /* previous change on input to hidden wgt */ + float **hidden_prev_weights; /* previous change on hidden to output wgt */ +} BPNN; + + +/*** User-level functions ***/ + +void bpnn_initialize(); + +BPNN *bpnn_create(); +void bpnn_free(); + +void bpnn_train(); +void bpnn_feedforward(); + +void bpnn_save(); +BPNN *bpnn_read(); + + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/backprop/backprop_cuda.cu b/workloads/realworld/uvm_prefetch_async/backprop/backprop_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..24126be67ec657a2b560227d15fb8a96bde19893 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/backprop/backprop_cuda.cu @@ -0,0 +1,279 @@ + + +// includes, system +#include +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +double t_start, t_end; + +// includes, kernels +#include "backprop_cuda_kernel.cu" +#include "backprop.h" + +//////////////////////////////////////////////////////////////////////////////// + +extern "C" void bpnn_layerforward(float *l1, float *l2, float **conn, int n1, int n2); + +extern "C" void bpnn_output_error(float *delta, float *target, float *output, int nj, float *err); + +extern "C" void bpnn_hidden_error(float *delta_h, int nh, float *delta_o, int no, float **who, float *hidden, float *err); + +extern "C" void bpnn_adjust_weights(float *delta, int ndelta, float *ly, int nly, float **w, float **oldw); + +extern "C" int setup(int argc, char **argv); + +extern "C" float **alloc_2d_dbl(int m, int n); + +extern "C" float squash(float x); + +double gettime() +{ + struct timeval t; + gettimeofday(&t, NULL); + return t.tv_sec + t.tv_usec * 1e-6; +} + +unsigned int num_threads = 0; +unsigned int num_blocks = 0; + +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + GPU_argv_init(); + + initTrace(); + startCPU(); + + num_blocks = atoi(argv[2]); + setup(argc, argv); +} + +extern "C" void bpnn_train_cuda(BPNN *net, float *eo, float *eh) +{ + int in, hid, out; + float out_err, hid_err; + + in = net->input_n; + hid = net->hidden_n; + out = net->output_n; + +#ifdef GPU + int m = 0; + float *input_hidden_cuda; + float *input_cuda; + float *output_hidden_cuda; + float *partial_sum; + float *hidden_partial_sum; + float *hidden_delta_cuda; + float *input_prev_weights_cuda; + float sum; + float *input_weights_one_dim; + float *input_weights_prev_one_dim; + // ruihao + // num_blocks = in / 16; + // dim3 grid(1, num_blocks); + // dim3 threads(16, 16); + + int tile_size = in / num_blocks; + dim3 grid(1, num_blocks); + dim3 threads(16, 16); + // ruihao + + input_weights_one_dim = (float *)malloc((in + 1) * (hid + 1) * sizeof(float)); + input_weights_prev_one_dim = (float *)malloc((in + 1) * (hid + 1) * sizeof(float)); + // ruihao + // partial_sum = (float *) malloc(num_blocks * WIDTH * sizeof(float)); + partial_sum = (float *)malloc(in * sizeof(float)); + // ruihao + + // this preprocessing stage is added to correct the bugs of wrong memcopy using two-dimensional net->inputweights + for (int k = 0; k <= in; k++) + { + for (int j = 0; j <= hid; j++) + { + input_weights_one_dim[m] = net->input_weights[k][j]; + input_weights_prev_one_dim[m] = net->input_prev_weights[k][j]; + m++; + } + } + + // GPU_argv_init(); + + // initTrace(); + // startCPU(); + + cudaMallocManaged((void **)&input_cuda, (in + 1) * sizeof(float)); + cudaMallocManaged((void **)&output_hidden_cuda, (hid + 1) * sizeof(float)); + cudaMallocManaged((void **)&input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float)); + // ruihao + // cudaMalloc((void**) &hidden_partial_sum, num_blocks * WIDTH * sizeof(float)); + cudaMallocManaged((void **)&hidden_partial_sum, in * sizeof(float)); + // ruihao + +#endif + +#ifdef CPU + + printf("Performing CPU computation\n"); + bpnn_layerforward(net->input_units, net->hidden_units, net->input_weights, in, hid); + +#endif + +#ifdef GPU + + //printf("Performing GPU computation\n"); + + memcpy(input_cuda, net->input_units, (in + 1) * sizeof(float)); + memcpy(input_hidden_cuda, input_weights_one_dim, (in + 1) * (hid + 1) * sizeof(float)); + + // ruihao + //t_start = rtclock(); + // ruihao + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(input_cuda, (in + 1) * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(hidden_partial_sum, in * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + bpnn_layerforward_CUDA<<>>(input_cuda, + output_hidden_cuda, + input_hidden_cuda, + hidden_partial_sum, + in, + hid, + tile_size); + + cudaDeviceSynchronize(); + + // ruihao + //t_end = rtclock(); + //fprintf(stdout, "bpnn_layerforward_CUDA GPU Runtime: %0.6lfs\n", t_end - t_start); + memcpy(partial_sum, hidden_partial_sum, in * sizeof(float)); + // ruihao + + cudaError_t error = cudaGetLastError(); + if (error != cudaSuccess) + { + printf("bpnn kernel error: %s\n", cudaGetErrorString(error)); + exit(EXIT_FAILURE); + } + + for (int j = 1; j <= hid; j++) + { + sum = 0.0; + // ruihao + // for (int k = 0; k < num_blocks; k++) { + // sum += partial_sum[k * hid + j-1] ; + // } + for (int k = 0; k < in / WIDTH; k++) + { + sum += partial_sum[k * hid + j - 1]; + } + // ruihao + sum += net->input_weights[0][j]; + net->hidden_units[j] = float(1.0 / (1.0 + exp(-sum))); + } +#endif + + bpnn_layerforward(net->hidden_units, net->output_units, net->hidden_weights, hid, out); + bpnn_output_error(net->output_delta, net->target, net->output_units, out, &out_err); + bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, net->hidden_weights, net->hidden_units, &hid_err); + bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, net->hidden_weights, net->hidden_prev_weights); + +#ifdef CPU + + bpnn_adjust_weights(net->hidden_delta, hid, net->input_units, in, net->input_weights, net->input_prev_weights); + +#endif + +#ifdef GPU + + cudaMallocManaged((void **)&hidden_delta_cuda, (hid + 1) * sizeof(float)); + cudaMallocManaged((void **)&input_prev_weights_cuda, (in + 1) * (hid + 1) * sizeof(float)); + // ruihao + //t_start = rtclock(); + memcpy(hidden_delta_cuda, net->hidden_delta, (hid + 1) * sizeof(float)); + memcpy(input_prev_weights_cuda, input_weights_prev_one_dim, (in + 1) * (hid + 1) * sizeof(float)); + memcpy(input_hidden_cuda, input_weights_one_dim, (in + 1) * (hid + 1) * sizeof(float)); + + cudaStream_t stream4; + cudaStream_t stream5; + cudaStream_t stream6; + cudaStream_t stream7; + cudaStreamCreate(&stream4); + cudaStreamCreate(&stream5); + cudaStreamCreate(&stream6); + cudaStreamCreate(&stream7); + + cudaMemPrefetchAsync(hidden_delta_cuda, (hid + 1) * sizeof(float), GPU_DEVICE, stream4); + cudaStreamSynchronize(stream4); + cudaMemPrefetchAsync(input_prev_weights_cuda, (in + 1) * (hid + 1) * sizeof(float), GPU_DEVICE, stream5); + cudaStreamSynchronize(stream5); + cudaMemPrefetchAsync(input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float), GPU_DEVICE, stream6); + cudaStreamSynchronize(stream6); + cudaMemPrefetchAsync(input_cuda, (in + 1) * sizeof(float), GPU_DEVICE, stream7); + cudaStreamSynchronize(stream7); + + // ruihao + bpnn_adjust_weights_cuda<<>>(hidden_delta_cuda, + hid, + input_cuda, + in, + input_hidden_cuda, + input_prev_weights_cuda, + tile_size); + // ruihao + cudaDeviceSynchronize(); + //t_end = rtclock(); + memcpy(net->input_units, input_cuda, (in + 1) * sizeof(float)); + memcpy(input_weights_one_dim, input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float)); + //fprintf(stdout, "bpnn_adjust_weights_cuda GPU Runtime: %0.6lfs\n", t_end - t_start); + // ruihao + + cudaFree(input_cuda); + cudaFree(output_hidden_cuda); + cudaFree(input_hidden_cuda); + cudaFree(hidden_partial_sum); + cudaFree(input_prev_weights_cuda); + cudaFree(hidden_delta_cuda); + + endCPU(); + finiTrace(); + + free(partial_sum); + free(input_weights_one_dim); + free(input_weights_prev_one_dim); + +#endif +} diff --git a/workloads/realworld/uvm_prefetch_async/backprop/backprop_cuda_kernel.cu b/workloads/realworld/uvm_prefetch_async/backprop/backprop_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..27c3be5bba4df2464e86a0f9586e1fb6d1e41a88 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/backprop/backprop_cuda_kernel.cu @@ -0,0 +1,182 @@ + + +#ifndef _BACKPROP_CUDA_KERNEL_H_ +#define _BACKPROP_CUDA_KERNEL_H_ + +#include +#include "backprop.h" +#include "math.h" +#include "cuda.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +__global__ void +bpnn_layerforward_CUDA(float *input_cuda, + float *output_hidden_cuda, + float *input_hidden_cuda, + float *hidden_partial_sum, + int in, + int hid, + int tile_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + + int by = blockIdx.y; + int tx = threadIdx.x; + int ty = threadIdx.y; + + __shared__ float input_node[HEIGHT * PREFETCH_COUNT]; + __shared__ float weight_matrix[HEIGHT * WIDTH * PREFETCH_COUNT]; + + int batches = tile_size / WIDTH; + + int fetch = batches * by; + int end_tile = fetch + batches; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + int fetch_index = (hid + 1) * HEIGHT * fetch + (hid + 1) * ty + tx + 1 + (hid + 1); + int index_in = HEIGHT * fetch + ty + 1; + + if (tx == 0) + memcpy_async(input_node[(fetch % PREFETCH_COUNT) * HEIGHT + ty], input_cuda[index_in], pipe); + + memcpy_async(weight_matrix[(fetch % PREFETCH_COUNT) * HEIGHT * WIDTH + ty * WIDTH + tx], input_hidden_cuda[fetch_index], pipe); + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + int compute_index = (hid + 1) * HEIGHT * compute + (hid + 1) * ty + tx + 1 + (hid + 1); + weight_matrix[(compute % PREFETCH_COUNT) * HEIGHT * WIDTH + ty * WIDTH + tx] *= input_node[(compute % PREFETCH_COUNT) * HEIGHT + ty]; + block.sync(); + + for (int i = 1; i <= __log2f(HEIGHT); i++) + { + int power_two = __powf(2, i); + if (ty % power_two == 0) + weight_matrix[(compute % PREFETCH_COUNT) * HEIGHT * WIDTH + ty * WIDTH + tx] += weight_matrix[(compute % PREFETCH_COUNT) * HEIGHT * WIDTH + (ty + power_two / 2) * WIDTH + tx]; + block.sync(); + } + + input_hidden_cuda[compute_index] = weight_matrix[(compute % PREFETCH_COUNT) * HEIGHT * WIDTH + ty * WIDTH + tx]; + block.sync(); + + if (tx == 0) + { + hidden_partial_sum[compute * hid + ty] = weight_matrix[(compute % PREFETCH_COUNT) * HEIGHT * WIDTH + tx * WIDTH + ty]; + } + } +} + +// __global__ void +// bpnn_layerforward_CUDA(float *input_cuda, +// float *output_hidden_cuda, +// float *input_hidden_cuda, +// float *hidden_partial_sum, +// int in, +// int hid, +// int tile_size) +// { +// int by = blockIdx.y; +// int tx = threadIdx.x; +// int ty = threadIdx.y; + +// int batches = tile_size / WIDTH; + +// __shared__ float input_node[HEIGHT]; +// __shared__ float weight_matrix[HEIGHT * WIDTH]; + +// for (int b = 0; b < batches; b++) +// { +// int index = (hid + 1) * HEIGHT * (batches * by + b) + (hid + 1) * ty + tx + 1 + (hid + 1); + +// int index_in = HEIGHT * (batches * by + b) + ty + 1; + +// if (tx == 0) +// input_node[ty] = input_cuda[index_in]; + +// __syncthreads(); + +// weight_matrix[ty * WIDTH + tx] = input_hidden_cuda[index]; + +// __syncthreads(); + +// weight_matrix[ty * WIDTH + tx] = weight_matrix[ty * WIDTH + tx] * input_node[ty]; + +// __syncthreads(); + +// for (int i = 1; i <= __log2f(HEIGHT); i++) +// { + +// int power_two = __powf(2, i); + +// if (ty % power_two == 0) +// weight_matrix[ty * WIDTH + tx] += weight_matrix[(ty + power_two / 2) * WIDTH + tx]; + +// __syncthreads(); +// } + +// input_hidden_cuda[index] = weight_matrix[ty * WIDTH + tx]; + +// __syncthreads(); + +// if (tx == 0) +// { +// hidden_partial_sum[(batches * by + b) * hid + ty] = weight_matrix[tx * WIDTH + ty]; +// } +// } +// } + +__global__ void bpnn_adjust_weights_cuda(float *delta, + int hid, + float *ly, + int in, + float *w, + float *oldw, + int tile_size) +{ + int by = blockIdx.y; + + int tx = threadIdx.x; + int ty = threadIdx.y; + + int batches = tile_size / WIDTH; + + for (int b = 0; b < batches; b++) + { + int index = (hid + 1) * HEIGHT * (batches * by + b) + (hid + 1) * ty + tx + 1 + (hid + 1); + int index_y = HEIGHT * (batches * by + b) + ty + 1; + int index_x = tx + 1; + // eta = 0.3; + // momentum = 0.3; + + w[index] += ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index])); + oldw[index] = ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index])); + + __syncthreads(); + + if (ty == 0 && by == 0 && b == 0) + { + w[index_x] += ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x])); + oldw[index_x] = ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x])); + } + } +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/backprop/facetrain.c b/workloads/realworld/uvm_prefetch_async/backprop/facetrain.c new file mode 100644 index 0000000000000000000000000000000000000000..cbf83810934b68551d7dd4b7b94fda5eb6837276 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/backprop/facetrain.c @@ -0,0 +1,54 @@ + +#include +#include +#include +#include +#include "backprop.h" +#include "omp.h" + +extern char *strcpy(); +extern void exit(); + +int layer_size = 0; + +backprop_face() +{ + BPNN *net; + int i; + float out_err, hid_err; + net = bpnn_create(layer_size, 16, 1); // (16, 1 can not be changed) + + printf("Input layer size : %d\n", layer_size); + load(net); + // entering the training kernel, only one iteration + printf("Starting training kernel\n"); + bpnn_train_cuda(net, &out_err, &hid_err); + bpnn_free(net); + printf("Training done\n"); +} + +int setup(argc, argv) +int argc; +char *argv[]; +{ + + int seed; + + if (argc != 3) + { + fprintf(stderr, "usage: backprop \n"); + exit(0); + } + layer_size = atoi(argv[1]); + if (layer_size % 16 != 0) + { + fprintf(stderr, "The number of input points must be divided by 16\n"); + exit(0); + } + + seed = 7; + bpnn_initialize(seed); + backprop_face(); + + exit(0); +} diff --git a/workloads/realworld/uvm_prefetch_async/backprop/imagenet.c b/workloads/realworld/uvm_prefetch_async/backprop/imagenet.c new file mode 100644 index 0000000000000000000000000000000000000000..255b0d5d8ca67508f6732e266299e7c58012906f --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/backprop/imagenet.c @@ -0,0 +1,24 @@ + +#include +#include +#include "backprop.h" + +extern layer_size; + +load(net) +BPNN *net; +{ + float *units; + int nr, nc, imgsize, i, j, k; + + nr = layer_size; + + imgsize = nr * nc; + units = net->input_units; + + k = 1; + for (i = 0; i < nr; i++) { + units[k] = (float) rand()/RAND_MAX ; + k++; + } +} diff --git a/workloads/realworld/uvm_prefetch_async/backprop/run.sh b/workloads/realworld/uvm_prefetch_async/backprop/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..874cbb88032622578f319cce3800a3793151cb92 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/backprop/run.sh @@ -0,0 +1,5 @@ +# ./backprop 524288 +# ./backprop 8388608 128 + +# ./backprop 66708864 128 +./backprop 66708864 1024 \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/backprop/run_super.sh b/workloads/realworld/uvm_prefetch_async/backprop/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..d9e8a3d42354f597bbcd153d180d9b23bed71192 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/backprop/run_super.sh @@ -0,0 +1,3 @@ +# ./backprop 1073741824 1024 +# ./backprop 134217728 1024 +./backprop 67108864 1024 \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/darknet/LICENSE b/workloads/realworld/uvm_prefetch_async/darknet/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..a50f7d700ba02bfacd50f59b315311cf4d0bbda2 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/LICENSE @@ -0,0 +1,12 @@ + YOLO LICENSE + Version 2, July 29 2016 + +THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER +SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN +TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES +LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY. NOW HERE'S +THE REAL LICENSE: + +0. Darknet is public domain. +1. Do whatever you want with it. +2. Stop emailing me about it! diff --git a/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.fuck b/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.fuck new file mode 100644 index 0000000000000000000000000000000000000000..8b1a9d8189b3b9f4479221d52882ce36fdc73a62 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.fuck @@ -0,0 +1,13 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + +Copyright (C) 2004 Sam Hocevar + +Everyone is permitted to copy and distribute verbatim or modified +copies of this license document, and changing it is allowed as long +as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. diff --git a/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.gen b/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.gen new file mode 100644 index 0000000000000000000000000000000000000000..c54113271e15057c4def6676693eb96fd6362b28 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.gen @@ -0,0 +1,91 @@ +RNN LICENSE Version 3, June 21 2017 + +Copyright (c) 1990, 1989, 1999 Free87337 May 48 THIRD PARTIES OR ANY OTHER THE +COMPLAIN OR CONSEQUENTIAL DAMAGES AND REGARDLESS OF WHETHER IN CONTRACT, TO THE +EXTENT REPAIR OR AGENTS (NOT THE IN ANY EVENT). THE SOFTWARE WILL BE +UNINTERRUPTED OR ERROR-FREE OR ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF ALL THE WORK (GOVERNED CODE) HIM RESPONSES, OR OF FINES, +SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR ANY OTHER OR OTHER HARL UNDER NO +CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), +PATENT PERMITTED BY THE INSTAGRAM PARENT STATE OR TORT (INCLUDING NEGLIGENCE), +PRODUCT LIABILITY OR OTHERWISE, ARISING OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR ANYTHING PROVIDED IN THIS PRODUCT, COMMIS AND SERVICES +ARE LICENSED SOFTWARE AND ANY RESULE OR ANY OTHER THE COPYRIGHT HOLDERS BE +LIABLE FOR ANY SPECIAL, INCIDENTAL, CASE, SUCH WARRANTIES, EXPRESS OR IMPLIED, +INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COPYRIGHT HOLDERS AND/OR ANY +PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY +EXPRESS OR DISTRIBUTE THAT ALL CLAIMS ARE SHALL CREATE DERAVE BE LIABLE TO YOU +WILL HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +6\. TERMINATION. TO THE EXTENT PERMITTED BY LAW, NO USE OF THE COVERED CODE IS +WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE +INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY +SERVICING, REPAIR OR COULT OR IN ANY WAY OUT OF THE USE OF THE WEBSITES OR +SERVICE WILL BE CONSEQUENTIAL DAMAGES OF ANY KIND HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + +This paragraph Agreement constitutes the entire agreement between the parties +with respect to the Work licensed here. However, if you place the name of the +fact that the arbitration was the consultation of the parties as a "patent is". +Subject to the terms and conditions of this License, Contributor has knowledge +that a license under a third party may also be used to endorse or promote +products derived from the Work, and there is no warranty on the Software and +Science Fees. For the purposes of this Agreement, attach the following +disclaimers (without liabilities of written notice to the Subject Software) in a +manner that a product is under common control with you. The Free Software +Foundation may publish revised and/or new versions of the License for the +Modifications made by the applicable terms. The Recipient shall promptly retain +the covered works for any reason be entered in any federal or state or login +Restricted Laws appearing in the United States or any of its own information +that is not disabled from a derivative work except as expressly permitted in +this License, to the extent that they are in receiving the Software and Source +Code or any exercise of the rights granted to You by this License or a +Contributor made by the Licensor or are authorized to make a reasonable +retirement by the courts of the courts located in Santa Clara County, California +printed and related to the Work or “Company” and Apache Software Foundation. If +the Licensor shall be entitled to reflect your rights to use the Software and +the Software to exercise the rights granted to the recipient without a +requirement to exercise the rights granted by the Agreement to the provision +will begin will appear in such cases, you will use such information without such +corporation shall be an officer with respect to any part of the Software or any +portion thereof. Capitalized terms are included in the Initial Contributor and +under no circumstances will license the Service at any time and for any direct, +indirect, special, incidental, or consequential damages of or assist in +connection with any Services or the registration purposes only to the extent +that it includes any or all means including the processing of which you download +any derivative work. Any of the purchases’ transmission purposes are made +available, if any, in other circumstances, we may review the copyright notice. +In the event that this Agreement is required to give us strict content. The +inclusion of the other party hereunder may also notify you Intellectual Property +Rights to any third party. This means that the Source Code exists of the Work +will not charge a program available to you at any time. You must include a +prominent statement that the Software is governed under a particular version of +this Agreement. You must include a provision to the extent that there is no +warranty for the content of others. You agree that the Recipient was appointed +as a Contributor, (c) are effective until terminated by hereunder, then the +registration are not disabled and not limited to, submit any Customer Data +without the updated use of the Software and that no fee is released. You grant +to Use Other Arbitration Rules for Diagnostic or Services may use or modify the +Apple Software and Consolidated Apple Software or Services. The Company may have +full risk as a product of the Compatible Source. A Contribution by the Licensor +or by the updated Software under the following conditions we can redistribute +any General Provision of this Agreement. If the Program is used in accordance +with the terms of this Agreement, Customer may provide advertisements from your +devices that clause you can your employer or a transaction or country that has +been controlled by the arbitrator, that they will be useful of this Agreement. +The term "Open Source Software is available in connection with the program, and +you may not protect the combination of the Covered Code. You should like to +select a user's rights to charge a copy of this License. I are Contributor's +confidentiality of the exercise of the rights granted herein. Such a covered +work is released as a consequence, the Licensor shall be eligible for a purpose +or subcontractor of the person or entity to the user of the user, then the word +"Application" means having the original fee for any reason; and that no patent +license to more than fifty stated close of the license term. The terms of this +License will the license terms and conditions set forth in Section 2.2 (OPEC) +and You will not use the Software or any set of responsibility for any resulting +information that the Original Code warrants that you have the right to disclose +these information (or in the notification; or (iii) late use of the software or +any third party to the three (50) days before such belief to the extent that it +includes a court court obtains the rights granted by this License. diff --git a/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.gpl b/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.gpl new file mode 100644 index 0000000000000000000000000000000000000000..9cecc1d4669ee8af2ca727a5d8cde10cd8b2d7cc --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.gpl @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + {one line to give the program's name and a brief idea of what it does.} + Copyright (C) {year} {name of author} + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + {project} Copyright (C) {year} {fullname} + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.meta b/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.meta new file mode 100644 index 0000000000000000000000000000000000000000..6728bd28d319c68ae04944fb034118dcc4c9aa09 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.meta @@ -0,0 +1,8 @@ + META-LICENSE + Version 1, June 21 2017 + +Any and all licenses may be applied to the software either individually +or in concert. Any issues, ambiguities, paradoxes, or metaphysical quandries +arising from this combination should be discussed with a local faith leader, +hermit, or guru. The Oxford comma shall be used. + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.mit b/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.mit new file mode 100644 index 0000000000000000000000000000000000000000..5bd806ce16ea5053c8631793787362439375026e --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.mit @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2017 Joseph Redmon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.v1 b/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.v1 new file mode 100644 index 0000000000000000000000000000000000000000..5b8709acc43e7b76ed69758a52a9eaffaba775e6 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/LICENSE.v1 @@ -0,0 +1,13 @@ + YOLO LICENSE + Version 1, July 10 2015 + +THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER +SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN +TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES +LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY SUBJECT TO +THE FOLLOWING CONDITIONS: + +1. #yolo +2. #swag +3. #blazeit + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/Makefile b/workloads/realworld/uvm_prefetch_async/darknet/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..5022f68377d7626ab768f2501883d72b013dafba --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/Makefile @@ -0,0 +1,114 @@ +GPU=1 +CUDNN=0 +OPENCV=0 +OPENMP=0 +DEBUG=0 + +#ARCH= -gencode arch=compute_30,code=sm_30 \ +# -gencode arch=compute_35,code=sm_35 \ +# -gencode arch=compute_50,code=[sm_50,compute_50] \ +# -gencode arch=compute_52,code=[sm_52,compute_52] +# -gencode arch=compute_20,code=[sm_20,sm_21] \ This one is deprecated? + +# This is what I use, uncomment if you know your arch and want to specify +ARCH= -gencode arch=compute_80,code=sm_80 \ +#ARCH= -arch=sm_80 + +VPATH=./src/:./examples:$(CUPTI_ADD_COMMON) +SLIB=libdarknet.so +ALIB=libdarknet.a +EXEC=darknet +OBJDIR=./obj/ + +CC=gcc +CPP=g++ +# NVCC=nvcc --default-stream per-thread +NVCC=nvcc +AR=ar +ARFLAGS=rcs +OPTS=-Ofast +LDFLAGS= -lm -pthread +COMMON= -Iinclude/ -Isrc/ +CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC +ifeq ($(PROFILE), 1) +CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -DPROFILE +endif + + +ifeq ($(OPENMP), 1) +CFLAGS+= -fopenmp +endif + +ifeq ($(DEBUG), 1) +OPTS=-O0 -g +endif + +CFLAGS+=$(OPTS) + +ifeq ($(OPENCV), 1) +COMMON+= -DOPENCV +CFLAGS+= -DOPENCV +LDFLAGS+= `pkg-config --libs opencv` -lstdc++ +COMMON+= `pkg-config --cflags opencv` +endif + +ifeq ($(GPU), 1) +include ../../../common/make.config +COMMON+= -DGPU -I$(CUDA_DIR)/include/ -I$(CUDA_DIR)/extras/CUPTI/include/ +CFLAGS+= -DGPU +LDFLAGS+= -L$(CUDA_DIR)/lib64 -L$(CUDA_DIR)/extras/CUPTI/lib64/ -lcuda -lcudart -lcublas -lcurand -lcupti +endif + + +ifeq ($(CUDNN), 1) +COMMON+= -DCUDNN +CFLAGS+= -DCUDNN +LDFLAGS+= -lcudnn +endif + +OBJ=gemm.o utils.o cuda_dark.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o lstm_layer.o l2norm_layer.o yolo_layer.o iseg_layer.o image_opencv.o +EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o instance-segmenter.o darknet.o +ifeq ($(GPU), 1) +LDFLAGS+= -lstdc++ +OBJ+=gemm_kernel.o convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o +endif + +# cpu_timestamps.o +# cupti_add.o + +EXECOBJ = $(addprefix $(OBJDIR), $(EXECOBJA)) +OBJS = $(addprefix $(OBJDIR), $(OBJ)) +DEPS = $(wildcard src/*.h) Makefile include/darknet.h + +all: obj backup results $(SLIB) $(ALIB) $(EXEC) + +$(EXEC): $(EXECOBJ) $(ALIB) + $(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS) $(ALIB) + +$(ALIB): $(OBJS) + $(AR) $(ARFLAGS) $@ $^ + +$(SLIB): $(OBJS) + $(CC) $(CFLAGS) -shared $^ -o $@ $(LDFLAGS) + +$(OBJDIR)%.o: %.cpp $(DEPS) + $(CPP) $(COMMON) $(CFLAGS) -c $< -o $@ + +$(OBJDIR)%.o: %.c $(DEPS) + $(CC) $(COMMON) $(CFLAGS) -c $< -o $@ + +$(OBJDIR)%.o: %.cu $(DEPS) + $(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@ + +obj: + mkdir -p obj +backup: + mkdir -p backup +results: + mkdir -p results + +.PHONY: clean + +clean: + rm -rf $(OBJS) $(SLIB) $(ALIB) $(EXEC) $(EXECOBJ) $(OBJDIR)/* + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/README.md b/workloads/realworld/uvm_prefetch_async/darknet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fb58c2640038a963cd573d121e4fab59399f67dc --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/README.md @@ -0,0 +1,124 @@ +![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png) + +# Darknet # +Darknet is an open source neural network framework written in C and CUDA. It is fast, easy to install, and supports CPU and GPU computation. + +**Discord** invite link for for communication and questions: https://discord.gg/zSq8rtW + +## YOLOv7: + +* **paper** - YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors: https://arxiv.org/abs/2207.02696 + +* **source code - Pytorch (use to reproduce results):** https://github.com/WongKinYiu/yolov7 + +---- + +Official YOLOv7 is more accurate and faster than YOLOv5 by **120%** FPS, than YOLOX by **180%** FPS, than Dual-Swin-T by **1200%** FPS, than ConvNext by **550%** FPS, than SWIN-L by **500%** FPS. + +YOLOv7 surpasses all known object detectors in both speed and accuracy in the range from 5 FPS to 160 FPS and has the highest accuracy 56.8% AP among all known real-time object detectors with 30 FPS or higher on GPU V100, batch=1. + +* YOLOv7-e6 (55.9% AP, 56 FPS V100 b=1) by `+500%` FPS faster than SWIN-L Cascade-Mask R-CNN (53.9% AP, 9.2 FPS A100 b=1) +* YOLOv7-e6 (55.9% AP, 56 FPS V100 b=1) by `+550%` FPS faster than ConvNeXt-XL C-M-RCNN (55.2% AP, 8.6 FPS A100 b=1) +* YOLOv7-w6 (54.6% AP, 84 FPS V100 b=1) by `+120%` FPS faster than YOLOv5-X6-r6.1 (55.0% AP, 38 FPS V100 b=1) +* YOLOv7-w6 (54.6% AP, 84 FPS V100 b=1) by `+1200%` FPS faster than Dual-Swin-T C-M-RCNN (53.6% AP, 6.5 FPS V100 b=1) +* YOLOv7x (52.9% AP, 114 FPS V100 b=1) by `+150%` FPS faster than PPYOLOE-X (51.9% AP, 45 FPS V100 b=1) +* YOLOv7 (51.2% AP, 161 FPS V100 b=1) by `+180%` FPS faster than YOLOX-X (51.1% AP, 58 FPS V100 b=1) + +---- + +![more5](https://user-images.githubusercontent.com/4096485/179425274-f55a36d4-8450-4471-816b-8c105841effd.jpg) + +---- + +![image](https://user-images.githubusercontent.com/4096485/177675030-a929ee00-0eba-4d93-95c2-225231d0fd61.png) + + +---- + +![yolov7_640_1280](https://user-images.githubusercontent.com/4096485/177688869-d75e0c36-63af-46ec-bdbd-81dbb281f257.png) + +---- + +## Scaled-YOLOv4: + +* **paper (CVPR 2021)**: https://openaccess.thecvf.com/content/CVPR2021/html/Wang_Scaled-YOLOv4_Scaling_Cross_Stage_Partial_Network_CVPR_2021_paper.html + +* **source code - Pytorch (use to reproduce results):** https://github.com/WongKinYiu/ScaledYOLOv4 + +* **source code - Darknet:** https://github.com/AlexeyAB/darknet + +* **Medium:** https://alexeyab84.medium.com/scaled-yolo-v4-is-the-best-neural-network-for-object-detection-on-ms-coco-dataset-39dfa22fa982?source=friends_link&sk=c8553bfed861b1a7932f739d26f487c8 + +## YOLOv4: + +* **paper:** https://arxiv.org/abs/2004.10934 + +* **source code:** https://github.com/AlexeyAB/darknet + +* **Wiki:** https://github.com/AlexeyAB/darknet/wiki + +* **useful links:** https://medium.com/@alexeyab84/yolov4-the-most-accurate-real-time-neural-network-on-ms-coco-dataset-73adfd3602fe?source=friends_link&sk=6039748846bbcf1d960c3061542591d7 + +For more information see the [Darknet project website](http://pjreddie.com/darknet). + + +
Expand + +![yolo_progress](https://user-images.githubusercontent.com/4096485/146988929-1ed0cbec-1e01-4ad0-b42c-808dcef32994.png) https://paperswithcode.com/sota/object-detection-on-coco + +---- + +![scaled_yolov4](https://user-images.githubusercontent.com/4096485/112776361-281d8380-9048-11eb-8083-8728b12dcd55.png) AP50:95 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2011.08036 + +---- + +![YOLOv4Tiny](https://user-images.githubusercontent.com/4096485/101363015-e5c21200-38b1-11eb-986f-b3e516e05977.png) + +---- + +![YOLOv4](https://user-images.githubusercontent.com/4096485/90338826-06114c80-dff5-11ea-9ba2-8eb63a7409b3.png) + +
+ +---- + +![OpenCV_TRT](https://user-images.githubusercontent.com/4096485/90338805-e5e18d80-dff4-11ea-8a68-5710956256ff.png) + + +## Citation + + +``` +@misc{https://doi.org/10.48550/arxiv.2207.02696, + doi = {10.48550/ARXIV.2207.02696}, + url = {https://arxiv.org/abs/2207.02696}, + author = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors}, + publisher = {arXiv}, + year = {2022}, + copyright = {arXiv.org perpetual, non-exclusive license} +} +``` + +``` +@misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +``` +@InProceedings{Wang_2021_CVPR, + author = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + title = {{Scaled-YOLOv4}: Scaling Cross Stage Partial Network}, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2021}, + pages = {13029-13038} +} +``` diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/alexnet.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/alexnet.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e2ed4bb8e7b1bad7859aef0d802cb4084753cb7a --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/alexnet.cfg @@ -0,0 +1,96 @@ +[net] +# Training +# batch=128 +# subdivisions=1 +# Testing +batch=1 +subdivisions=1 +height=227 +width=227 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=256 + +learning_rate=0.01 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue = .1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +filters=96 +size=11 +stride=4 +pad=0 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[convolutional] +filters=256 +size=5 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[convolutional] +filters=384 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=384 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=3 +stride=2 +padding=0 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=1000 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/cifar.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/cifar.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b2f69f53903e55c24718ed12d9adaaa1557e3647 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/cifar.cfg @@ -0,0 +1,121 @@ +[net] +batch=128 +subdivisions=1 +height=28 +width=28 +channels=3 +max_crop=32 +min_crop=32 + +hue=.1 +saturation=.75 +exposure=.75 + +learning_rate=0.4 +policy=poly +power=4 +max_batches = 5000 +momentum=0.9 +decay=0.0005 + + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[dropout] +probability=.5 + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/cifar.data b/workloads/realworld/uvm_prefetch_async/darknet/cfg/cifar.data new file mode 100644 index 0000000000000000000000000000000000000000..a52208db1b203b5e1f24d5afaf8c7002adfd71a3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/cifar.data @@ -0,0 +1,7 @@ +classes=10 +train = data/cifar/train.list +valid = data/cifar/test.list +test = data/cifar/test.list +labels = data/cifar/labels.txt +backup = backup/ +top=2 diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/cifar.test.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/cifar.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..18b6c54c909152b1201d6320b85fafc5c36ba1ef --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/cifar.test.cfg @@ -0,0 +1,117 @@ +[net] +batch=128 +subdivisions=1 +height=32 +width=32 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.4 +policy=poly +power=4 +max_batches = 50000 + + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[dropout] +probability=.5 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[dropout] +probability=.5 + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 +temperature=3 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/cifar_small.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/cifar_small.cfg new file mode 100644 index 0000000000000000000000000000000000000000..d48b1231f0131faaa187b18df6705411c3d16a76 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/cifar_small.cfg @@ -0,0 +1,86 @@ +[net] +batch=128 +subdivisions=1 +height=28 +width=28 +channels=3 +max_crop=32 +min_crop=32 + +hue=.1 +saturation=.75 +exposure=.75 + +learning_rate=0.1 +policy=poly +power=4 +max_batches = 5000 +momentum=0.9 +decay=0.0005 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=10 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/coco.data b/workloads/realworld/uvm_prefetch_async/darknet/cfg/coco.data new file mode 100644 index 0000000000000000000000000000000000000000..5951d5245a7895e8418bc3155de3b03049e76c42 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/coco.data @@ -0,0 +1,6 @@ +classes= 80 +train = /data/darknet/coco/valid.list +valid = /data/darknet/coco/valid.list +backup = /data/darknet/backup/ +names = /data/darknet/coco/coco.names +eval=coco diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/coco.names b/workloads/realworld/uvm_prefetch_async/darknet/cfg/coco.names new file mode 100644 index 0000000000000000000000000000000000000000..16315f2becec9705017bfaf1b9fb81ca2a83c0b0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/coco.names @@ -0,0 +1,80 @@ +person +bicycle +car +motorbike +aeroplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +sofa +pottedplant +bed +diningtable +toilet +tvmonitor +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/combine9k.data b/workloads/realworld/uvm_prefetch_async/darknet/cfg/combine9k.data new file mode 100644 index 0000000000000000000000000000000000000000..06a3e76aefac9c1074c3dfe159bc115a92b0791e --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/combine9k.data @@ -0,0 +1,10 @@ +classes= 9418 +#train = /home/pjreddie/data/coco/trainvalno5k.txt +train = data/combine9k.train.list +valid = /home/pjreddie/data/imagenet/det.val.files +labels = data/9k.labels +names = data/9k.names +backup = backup/ +map = data/inet9k.map +eval = imagenet +results = results diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet.cfg new file mode 100644 index 0000000000000000000000000000000000000000..375107f7c196baf7adf229a7cfffc84739875828 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet.cfg @@ -0,0 +1,120 @@ +[net] +# Training +# batch=128 +# subdivisions=1 +# Testing +batch=1 +subdivisions=1 +height=256 +width=256 +min_crop=128 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet19.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet19.cfg new file mode 100644 index 0000000000000000000000000000000000000000..28ac9669ef686b4d638a5bf462451962fec45a4e --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet19.cfg @@ -0,0 +1,205 @@ +[net] +# Training +#batch=128 +#subdivisions=2 + +# Testing + batch=1 + subdivisions=1 + +height=256 +width=256 +min_crop=128 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet19_448.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet19_448.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c6df7306d3ef0622e0a0e0cbd6a5603699344e56 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet19_448.cfg @@ -0,0 +1,197 @@ +[net] +batch=128 +subdivisions=4 +height=448 +width=448 +max_crop=512 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 + +angle=7 +hue = .1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet53.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet53.cfg new file mode 100644 index 0000000000000000000000000000000000000000..7b6d5766e9ec48ee19a74321583b44621c1e07b3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet53.cfg @@ -0,0 +1,566 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet53_448.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet53_448.cfg new file mode 100644 index 0000000000000000000000000000000000000000..dedab1b97c7e5d4226f061e6c983046d7a278dd0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet53_448.cfg @@ -0,0 +1,559 @@ +[net] +# Training - start training with darknet53.weights +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=448 +width=448 +channels=3 +min_crop=448 +max_crop=512 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 +momentum=0.9 +decay=0.0005 + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet9000.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet9000.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9dd2dfbbf5a7137faada4e091b8e6d48233f09bf --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/darknet9000.cfg @@ -0,0 +1,205 @@ +[net] +# Training +# batch=128 +# subdivisions=4 +# Testing +batch = 1 +subdivisions = 1 +height=448 +width=448 +max_crop=512 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +policy=poly +power=4 +max_batches=100000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=9418 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 +tree=data/9k.tree + +[cost] +type=masked + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/densenet201.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/densenet201.cfg new file mode 100644 index 0000000000000000000000000000000000000000..65b4aecc52d45075f2913e3d63b9aec0527fa44c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/densenet201.cfg @@ -0,0 +1,1951 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1,-3 + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/extraction.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/extraction.cfg new file mode 100644 index 0000000000000000000000000000000000000000..66cb15f80e9a5e811223299594882a3b5d9485dc --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/extraction.cfg @@ -0,0 +1,209 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=224 +width=224 +max_crop=320 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/extraction.conv.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/extraction.conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..2a7d09ec80fa2f47e1ebb4134b7845d5cae2b828 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/extraction.conv.cfg @@ -0,0 +1,179 @@ +[net] +batch=1 +subdivisions=1 +height=256 +width=256 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.5 +policy=poly +power=6 +max_batches=500000 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[connected] +output=1000 +activation=leaky + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/extraction22k.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/extraction22k.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b5f54090d00537fdca72f54bb2eed69dd78f5b00 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/extraction22k.cfg @@ -0,0 +1,206 @@ +[net] +batch=128 +subdivisions=1 +height=224 +width=224 +max_crop=320 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +max_batches = 0 +policy=steps +steps=444000,590000,970000 +scales=.5,.2,.1 + +#policy=sigmoid +#gamma=.00008 +#step=100000 +#max_batches=200000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=3 +stride=1 +pad=1 +activation=leaky + +[avgpool] + +[connected] +output=21842 +activation=leaky + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/go.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/go.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c730092ff3ffda0124baeace050bd382c442d88d --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/go.cfg @@ -0,0 +1,132 @@ +[net] +batch=512 +subdivisions=1 +height=19 +width=19 +channels=1 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=10000000 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=1 +size=1 +stride=1 +pad=1 +activation=linear + +[reorg] +extra=1 +stride=1 + +[softmax] + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/go.test.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/go.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..1e4e43809bf3ede20a67b5020fcca0f61612e8f7 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/go.test.cfg @@ -0,0 +1,132 @@ +[net] +batch=1 +subdivisions=1 +height=19 +width=19 +channels=1 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +policy=poly +power=4 +max_batches=100000 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu +batch_normalize=1 + +[convolutional] +filters=1 +size=1 +stride=1 +pad=1 +activation=linear + +[reorg] +extra=1 +stride=1 + +[softmax] + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/gru.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/gru.cfg new file mode 100644 index 0000000000000000000000000000000000000000..6064221289d41dc3ee464a715ae05593a02f34f4 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/gru.cfg @@ -0,0 +1,29 @@ +[net] +inputs=256 +momentum=0.9 +decay=0.0 +subdivisions=1 +batch = 1 +time_steps=1 +learning_rate=.002 +adam=1 + +policy=constant +power=4 +max_batches=1000000 + +[gru] +output = 256 + +[gru] +output = 256 + +[gru] +output = 256 + +[connected] +output=256 +activation=linear + +[softmax] + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/imagenet.labels.list b/workloads/realworld/uvm_prefetch_async/darknet/cfg/imagenet.labels.list new file mode 100644 index 0000000000000000000000000000000000000000..e73d41762d311df7f7eefec0f65ab12a7bacc023 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/imagenet.labels.list @@ -0,0 +1,21842 @@ +n02119789 +n02100735 +n02110185 +n02096294 +n02102040 +n02066245 +n02509815 +n02124075 +n02417914 +n02123394 +n02125311 +n02423022 +n02346627 +n02077923 +n02110063 +n02447366 +n02109047 +n02089867 +n02102177 +n02091134 +n02092002 +n02071294 +n02442845 +n02504458 +n02092339 +n02098105 +n02096437 +n02114712 +n02105641 +n02128925 +n02091635 +n02088466 +n02096051 +n02117135 +n02138441 +n02097130 +n02493509 +n02457408 +n02389026 +n02443484 +n02110341 +n02089078 +n02086910 +n02445715 +n02093256 +n02113978 +n02106382 +n02441942 +n02113712 +n02113186 +n02105162 +n02415577 +n02356798 +n02488702 +n02123159 +n02098413 +n02422699 +n02114855 +n02094433 +n02111277 +n02132136 +n02119022 +n02091467 +n02106550 +n02422106 +n02091831 +n02120505 +n02104365 +n02086079 +n02112706 +n02098286 +n02095889 +n02484975 +n02137549 +n02500267 +n02129604 +n02090721 +n02396427 +n02108000 +n02391049 +n02412080 +n02108915 +n02480495 +n02110806 +n02128385 +n02107683 +n02085936 +n02094114 +n02087046 +n02100583 +n02096177 +n02494079 +n02105056 +n02101556 +n02123597 +n02481823 +n02105505 +n02088094 +n02085782 +n02489166 +n02364673 +n02114548 +n02134084 +n02480855 +n02090622 +n02113624 +n02093859 +n02403003 +n02097298 +n02108551 +n02493793 +n02107142 +n02096585 +n02107574 +n02107908 +n02086240 +n02102973 +n02112018 +n02093647 +n02397096 +n02437312 +n02483708 +n02097047 +n02106030 +n02099601 +n02093991 +n02110627 +n02106166 +n02326432 +n02108089 +n02097658 +n02088364 +n02111129 +n02100236 +n02486261 +n02115913 +n02486410 +n02487347 +n02099849 +n02108422 +n02104029 +n02492035 +n02110958 +n02099429 +n02094258 +n02099267 +n02395406 +n02112350 +n02109961 +n02101388 +n02113799 +n02095570 +n02128757 +n02101006 +n02115641 +n02097209 +n02342885 +n02097474 +n02120079 +n02095314 +n02088238 +n02408429 +n02133161 +n02328150 +n02410509 +n02492660 +n02398521 +n02112137 +n02510455 +n02093428 +n02105855 +n02111500 +n02085620 +n02123045 +n02490219 +n02099712 +n02109525 +n02454379 +n02111889 +n02088632 +n02090379 +n02443114 +n02361337 +n02105412 +n02483362 +n02437616 +n02107312 +n02325366 +n02091032 +n02129165 +n02102318 +n02100877 +n02074367 +n02504013 +n02363005 +n02102480 +n02113023 +n02086646 +n02497673 +n02087394 +n02127052 +n02116738 +n02488291 +n02091244 +n02114367 +n02130308 +n02089973 +n02105251 +n02134418 +n02093754 +n02106662 +n02444819 +n01882714 +n01871265 +n01872401 +n01877812 +n01873310 +n01883070 +n04086273 +n04507155 +n04147183 +n04254680 +n02672831 +n02219486 +n02317335 +n01968897 +n03452741 +n03642806 +n07745940 +n02690373 +n04552348 +n02692877 +n02782093 +n04266014 +n03344393 +n03447447 +n04273569 +n03662601 +n02951358 +n04612504 +n02981792 +n04483307 +n03095699 +n03673027 +n03947888 +n02687172 +n04347754 +n04606251 +n03478589 +n04389033 +n03773504 +n02860847 +n03218198 +n02835271 +n03792782 +n03393912 +n03895866 +n02797295 +n04204347 +n03791053 +n03384352 +n03272562 +n04310018 +n02704792 +n02701002 +n02814533 +n02930766 +n03100240 +n03594945 +n03670208 +n03770679 +n03777568 +n04037443 +n04285008 +n03444034 +n03445924 +n03785016 +n04252225 +n03345487 +n03417042 +n03930630 +n04461696 +n04467665 +n03796401 +n03977966 +n04065272 +n04335435 +n04252077 +n04465501 +n03776460 +n04482393 +n04509417 +n03538406 +n03599486 +n03868242 +n02804414 +n03125729 +n03131574 +n03388549 +n02870880 +n03018349 +n03742115 +n03016953 +n04380533 +n03337140 +n03891251 +n02791124 +n04429376 +n03376595 +n04099969 +n04344873 +n04447861 +n03179701 +n03982430 +n03201208 +n03290653 +n04550184 +n07742313 +n07747607 +n07749582 +n07753113 +n07753275 +n07753592 +n07754684 +n07760859 +n07768694 +n12267677 +n12620546 +n13133613 +n11879895 +n12144580 +n12768682 +n03854065 +n04515003 +n03017168 +n03249569 +n03447721 +n03720891 +n03721384 +n04311174 +n02787622 +n02992211 +n04536866 +n03495258 +n02676566 +n03272010 +n03110669 +n03394916 +n04487394 +n03494278 +n03840681 +n03884397 +n02804610 +n03838899 +n04141076 +n03372029 +n11939491 +n12057211 +n09246464 +n09468604 +n09193705 +n09472597 +n09399592 +n09421951 +n09256479 +n09332890 +n09428293 +n09288635 +n03498962 +n03041632 +n03658185 +n03954731 +n03995372 +n03649909 +n03481172 +n03109150 +n02951585 +n03970156 +n04154565 +n04208210 +n03967562 +n03000684 +n01514668 +n01514859 +n01518878 +n01530575 +n01531178 +n01532829 +n01534433 +n01537544 +n01558993 +n01560419 +n01580077 +n01582220 +n01592084 +n01601694 +n01608432 +n01614925 +n01616318 +n01622779 +n01795545 +n01796340 +n01797886 +n01798484 +n01806143 +n01806567 +n01807496 +n01817953 +n01818515 +n01819313 +n01820546 +n01824575 +n01828970 +n01829413 +n01833805 +n01843065 +n01843383 +n01847000 +n01855032 +n01855672 +n01860187 +n02002556 +n02002724 +n02006656 +n02007558 +n02009912 +n02009229 +n02011460 +n02012849 +n02013706 +n02018207 +n02018795 +n02025239 +n02027492 +n02028035 +n02033041 +n02037110 +n02017213 +n02051845 +n02056570 +n02058221 +n01484850 +n01491361 +n01494475 +n01496331 +n01498041 +n02514041 +n02536864 +n01440764 +n01443537 +n02526121 +n02606052 +n02607072 +n02643566 +n02655020 +n02640242 +n02641379 +n01664065 +n01665541 +n01667114 +n01667778 +n01669191 +n01675722 +n01677366 +n01682714 +n01685808 +n01687978 +n01688243 +n01689811 +n01692333 +n01693334 +n01694178 +n01695060 +n01704323 +n01697457 +n01698640 +n01728572 +n01728920 +n01729322 +n01729977 +n01734418 +n01735189 +n01737021 +n01739381 +n01740131 +n01742172 +n01744401 +n01748264 +n01749939 +n01751748 +n01753488 +n01755581 +n01756291 +n01629819 +n01630670 +n01631663 +n01632458 +n01632777 +n01641577 +n01644373 +n01644900 +n04579432 +n04592741 +n03876231 +n03483316 +n03868863 +n04251144 +n03691459 +n03759954 +n04152593 +n03793489 +n03271574 +n03843555 +n04332243 +n04265275 +n04330267 +n03467068 +n02794156 +n04118776 +n03841143 +n04141975 +n02708093 +n03196217 +n04548280 +n03544143 +n04355338 +n03891332 +n04328186 +n03197337 +n04317175 +n04376876 +n03706229 +n02841315 +n04009552 +n04356056 +n03692522 +n04044716 +n02879718 +n02950826 +n02749479 +n04090263 +n04008634 +n03085013 +n04505470 +n03126707 +n03666591 +n02666196 +n02977058 +n04238763 +n03180011 +n03485407 +n03832673 +n06359193 +n03496892 +n04428191 +n04004767 +n04243546 +n04525305 +n04179913 +n03602883 +n04372370 +n03532672 +n02974003 +n03874293 +n03944341 +n03992509 +n03425413 +n02966193 +n04371774 +n04067472 +n04040759 +n04019541 +n03492542 +n04355933 +n03929660 +n02965783 +n04258138 +n04074963 +n03208938 +n02910353 +n03476684 +n03627232 +n03075370 +n03874599 +n03804744 +n04127249 +n04153751 +n03803284 +n04162706 +n04228054 +n02948072 +n03590841 +n04286575 +n04456115 +n03814639 +n03933933 +n04485082 +n03733131 +n03794056 +n04275548 +n01768244 +n01770081 +n01770393 +n01773157 +n01773549 +n01773797 +n01774384 +n01774750 +n01775062 +n01776313 +n01784675 +n01990800 +n01978287 +n01978455 +n01980166 +n01981276 +n01983481 +n01984695 +n01985128 +n01986214 +n02165105 +n02165456 +n02167151 +n02168699 +n02169497 +n02172182 +n02174001 +n02177972 +n02190166 +n02206856 +n02226429 +n02229544 +n02231487 +n02233338 +n02236044 +n02256656 +n02259212 +n02264363 +n02268443 +n02268853 +n02276258 +n02277742 +n02279972 +n02280649 +n02281406 +n02281787 +n01910747 +n01914609 +n01917289 +n01924916 +n01930112 +n01943899 +n01944390 +n01945685 +n01950731 +n01955084 +n02319095 +n02321529 +n03584829 +n03297495 +n03761084 +n03259280 +n04111531 +n04442312 +n04542943 +n04517823 +n03207941 +n04070727 +n04554684 +n03133878 +n03400231 +n04596742 +n02939185 +n03063689 +n04398044 +n04270147 +n02699494 +n04486054 +n03899768 +n04311004 +n04366367 +n04532670 +n02793495 +n03457902 +n03877845 +n03781244 +n03661043 +n02727426 +n02859443 +n03028079 +n03788195 +n04346328 +n03956157 +n04081281 +n03032252 +n03529860 +n03697007 +n03065424 +n03837869 +n04458633 +n02980441 +n04005630 +n03461385 +n02776631 +n02791270 +n02871525 +n02927161 +n03089624 +n04200800 +n04443257 +n04462240 +n03388043 +n03042490 +n04613696 +n03216828 +n02892201 +n03743016 +n02788148 +n02894605 +n03160309 +n03000134 +n03930313 +n04604644 +n04326547 +n03459775 +n04239074 +n04501370 +n03792972 +n04149813 +n03530642 +n03961711 +n03903868 +n02814860 +n07711569 +n07720875 +n07714571 +n07714990 +n07715103 +n07716358 +n07716906 +n07717410 +n07717556 +n07718472 +n07718747 +n07730033 +n07734744 +n04209239 +n03594734 +n02971356 +n03485794 +n04133789 +n02747177 +n04125021 +n07579787 +n03814906 +n03134739 +n03404251 +n04423845 +n03877472 +n04120489 +n03062245 +n03014705 +n03717622 +n03777754 +n04493381 +n04476259 +n02777292 +n07693725 +n03998194 +n03617480 +n07590611 +n04579145 +n03623198 +n07248320 +n04277352 +n04229816 +n02823428 +n03127747 +n02877765 +n04435653 +n03724870 +n03710637 +n03920288 +n03379051 +n02807133 +n04399382 +n03527444 +n03983396 +n03924679 +n04532106 +n06785654 +n03445777 +n07613480 +n04350905 +n04562935 +n03325584 +n03045698 +n07892512 +n03250847 +n04192698 +n03026506 +n03534580 +n07565083 +n04296562 +n02869837 +n07871810 +n02799071 +n03314780 +n04141327 +n04357314 +n02823750 +n13052670 +n07583066 +n03637318 +n04599235 +n07802026 +n02883205 +n03709823 +n04560804 +n02909870 +n03207743 +n04263257 +n07932039 +n03786901 +n04479046 +n03873416 +n02999410 +n04367480 +n03775546 +n07875152 +n04591713 +n04201297 +n02916936 +n03240683 +n02840245 +n02963159 +n04370456 +n03991062 +n02843684 +n03482405 +n03942813 +n03908618 +n03902125 +n07584110 +n02730930 +n04023962 +n02769748 +n10148035 +n02817516 +n03908714 +n02906734 +n03788365 +n02667093 +n03787032 +n03980874 +n03141823 +n03976467 +n04264628 +n07930864 +n04039381 +n06874185 +n04033901 +n04041544 +n07860988 +n03146219 +n03763968 +n03676483 +n04209133 +n03782006 +n03857828 +n03775071 +n02892767 +n07684084 +n04522168 +n03764736 +n04118538 +n03887697 +n13044778 +n03291819 +n03770439 +n03124170 +n04487081 +n03916031 +n02808440 +n07697537 +n12985857 +n02917067 +n03938244 +n15075141 +n02978881 +n02966687 +n03633091 +n13040303 +n03690938 +n03476991 +n02669723 +n03220513 +n03127925 +n04584207 +n07880968 +n03937543 +n03000247 +n04418357 +n04590129 +n02795169 +n04553703 +n02783161 +n02802426 +n02808304 +n03124043 +n03450230 +n04589890 +n12998815 +n02992529 +n03825788 +n02790996 +n03710193 +n03630383 +n03347037 +n03769881 +n03871628 +n03733281 +n03976657 +n03535780 +n04259630 +n03929855 +n04049303 +n04548362 +n02979186 +n06596364 +n03935335 +n06794110 +n02825657 +n03388183 +n04591157 +n04540053 +n03866082 +n04136333 +n04026417 +n02865351 +n02834397 +n03888257 +n04235860 +n04404412 +n04371430 +n03733805 +n07920052 +n07873807 +n02895154 +n04204238 +n04597913 +n04131690 +n07836838 +n09835506 +n03443371 +n13037406 +n04336792 +n04557648 +n03187595 +n04254120 +n03595614 +n04146614 +n03598930 +n03958227 +n04069434 +n03188531 +n02786058 +n07615774 +n04525038 +n04409515 +n03424325 +n03223299 +n03680355 +n07614500 +n07695742 +n04033995 +n03710721 +n04392985 +n03047690 +n03584254 +n13054560 +n10565667 +n03950228 +n03729826 +n02837789 +n04254777 +n02988304 +n03657121 +n04417672 +n04523525 +n02815834 +n09229709 +n07697313 +n03888605 +n03355925 +n03063599 +n04116512 +n04325704 +n07831146 +n03255030 +n00483313 +n02432291 +n02356381 +n02377388 +n04028764 +n04381587 +n02279257 +n04168199 +n00445055 +n02461128 +n03626760 +n04313503 +n00451635 +n02509515 +n04224842 +n09403734 +n02769290 +n13054073 +n03163222 +n00464478 +n03087069 +n04477219 +n03445617 +n00449054 +n00483705 +n04395106 +n03389611 +n04285965 +n04166281 +n04003856 +n03696301 +n00475787 +n04587404 +n09218641 +n02276355 +n03592669 +n04459909 +n04492375 +n09447666 +n00463543 +n04148703 +n04591517 +n03970546 +n04297750 +n02782778 +n02383231 +n03693474 +n02277094 +n03766044 +n02056228 +n03394272 +n03047052 +n00434075 +n04185946 +n02411999 +n03858418 +n12833149 +n02836035 +n03108853 +n04587559 +n04138261 +n02278024 +n03063485 +n02774921 +n09475044 +n02811204 +n03329302 +n04026813 +n03986562 +n03379204 +n03426134 +n02790669 +n03487090 +n03548402 +n08614632 +n04054361 +n03421485 +n03302671 +n03098959 +n02970408 +n03772584 +n03064935 +n09415584 +n11715430 +n12024445 +n02710201 +n03475581 +n13142504 +n03396074 +n03211789 +n03914337 +n03678558 +n03233123 +n00453396 +n00454395 +n00440382 +n04289027 +n00445226 +n11953610 +n04128413 +n00480211 +n00470966 +n12547503 +n03085219 +n02275773 +n02692086 +n04257790 +n00448748 +n02686379 +n12328567 +n03432129 +n03859000 +n12091377 +n02124313 +n00442847 +n04603399 +n03114379 +n02920369 +n03818343 +n02946127 +n02978055 +n12914923 +n02705429 +n00448232 +n12882945 +n04289690 +n07606669 +n02056728 +n11848479 +n03046921 +n12282933 +n02867966 +n12821505 +n02812949 +n04545305 +n02699770 +n04395651 +n02900160 +n04099003 +n02054711 +n12606545 +n03356858 +n01859190 +n03643737 +n02962200 +n03123553 +n09361517 +n02793089 +n00449517 +n02783994 +n10117851 +n12038585 +n04383839 +n10142391 +n07719213 +n03536122 +n02472987 +n03454536 +n11728099 +n02392824 +n03795758 +n04282872 +n00448872 +n02404432 +n03797182 +n03029197 +n03665924 +n12477163 +n02769963 +n03863262 +n01532325 +n04165409 +n04593077 +n04473108 +n03577090 +n09988063 +n00446804 +n03931765 +n00475014 +n02700064 +n03240892 +n12475242 +n11735053 +n04053508 +n02852173 +n02382750 +n03823111 +n04543772 +n04112147 +n04433585 +n03175189 +n03596543 +n00445685 +n03307792 +n04589593 +n01814217 +n02993368 +n04303497 +n02811350 +n03355768 +n03699591 +n04590553 +n01893825 +n12726670 +n09916348 +n11544015 +n01318894 +n02133704 +n02367492 +n04506289 +n02069974 +n01900150 +n03207835 +n03363549 +n02831595 +n04970470 +n04160847 +n03767203 +n03928814 +n02302969 +n02918595 +n10401331 +n04231272 +n03717447 +n03063968 +n03380724 +n00825773 +n09988493 +n02740300 +n04539794 +n04121511 +n01323599 +n12937130 +n02428508 +n02980036 +n12061380 +n01887787 +n04214046 +n01787835 +n00466630 +n02979290 +n03927091 +n03231368 +n03904657 +n04469003 +n04196502 +n02122948 +n04544325 +n07868340 +n13876561 +n11925898 +n12158443 +n01595450 +n12454705 +n02069412 +n09618957 +n02357111 +n00451563 +n04197110 +n02276902 +n03111296 +n03909020 +n12303083 +n02082791 +n01956764 +n04269822 +n04207343 +n02433318 +n01888181 +n12682668 +n01592387 +n09793141 +n00466273 +n04026180 +n06255081 +n12172364 +n10145590 +n12311579 +n12173912 +n03822171 +n03140292 +n03027625 +n02739427 +n02060133 +n02431785 +n03219010 +n00447957 +n11910271 +n03620967 +n12547215 +n02409508 +n04290079 +n12329260 +n13901858 +n02008497 +n10304914 +n04524142 +n04279462 +n04233124 +n09733793 +n12822115 +n09475179 +n10151760 +n03418618 +n12858397 +n07735510 +n03549473 +n10098245 +n03653583 +n10604380 +n03375575 +n03885293 +n01527347 +n03237340 +n02760658 +n11953038 +n03187268 +n03004275 +n02393161 +n11965218 +n08580944 +n03938725 +n03900979 +n04144241 +n03760310 +n02376679 +n03237992 +n09432283 +n02379908 +n09918554 +n04041747 +n12012111 +n10331167 +n01612122 +n10147935 +n07691539 +n11669786 +n09403427 +n01935395 +n09903501 +n04439585 +n04459018 +n02780704 +n03720163 +n12899752 +n04118635 +n03404149 +n02429456 +n00449168 +n04516354 +n04317833 +n12075299 +n07878647 +n09438940 +n03361550 +n02027357 +n04317976 +n03092883 +n04526964 +n03985069 +n03610682 +n04028581 +n02277268 +n09433839 +n03846431 +n03919289 +n10146104 +n10260706 +n02686227 +n03321103 +n00444846 +n01558307 +n01595168 +n03919096 +n11844892 +n04260364 +n02750070 +n03034244 +n03002096 +n04273972 +n11814584 +n04605321 +n07745466 +n02922798 +n03361380 +n12651229 +n08521623 +n04498389 +n00453313 +n04967882 +n12024690 +n03934656 +n02685082 +n04501550 +n09972458 +n03055418 +n07763629 +n03902756 +n09938449 +n09712696 +n02387346 +n03133415 +n07711080 +n03129753 +n03524150 +n02275560 +n03993053 +n03438661 +n11939180 +n00466524 +n11753355 +n03456024 +n03421324 +n07890540 +n11720643 +n02057035 +n00453126 +n04453037 +n01540832 +n03546235 +n03370387 +n02041875 +n02871439 +n03262072 +n01786646 +n02430830 +n02799175 +n05262422 +n03854722 +n12817694 +n04449966 +n01564773 +n02034971 +n03490119 +n02822579 +n07879953 +n04110178 +n04963588 +n04252653 +n01565078 +n02389128 +n02779435 +n10645017 +n04582205 +n08573842 +n10146002 +n03892178 +n03119396 +n03813078 +n07866868 +n03160740 +n03371875 +n02417387 +n03904782 +n03098688 +n02902687 +n01828556 +n04401680 +n04590933 +n01575401 +n07693048 +n02901114 +n03047941 +n04355511 +n11849871 +n10738111 +n03122073 +n12052787 +n01594004 +n01549886 +n02824058 +n03506184 +n11487732 +n12574866 +n12948053 +n10091450 +n00470554 +n00326094 +n12093329 +n04438897 +n07818995 +n12828791 +n13901321 +n10613996 +n10159533 +n02669295 +n02843158 +n06415688 +n14858292 +n09813219 +n12485653 +n03200231 +n02089468 +n03935234 +n01539925 +n12428076 +n10439373 +n01536644 +n02694662 +n02123242 +n03002711 +n03363749 +n02669534 +n03451798 +n11927215 +n02679257 +n09475925 +n10015485 +n12422129 +n03946162 +n02377291 +n07871720 +n12622297 +n12782915 +n01579260 +n11838916 +n10267311 +n12824053 +n03340723 +n02276749 +n04439712 +n02126139 +n04188179 +n02386853 +n07942152 +n02499316 +n04324387 +n10635788 +n04234887 +n12237641 +n03713436 +n04960582 +n04076713 +n01646292 +n03947798 +n02840134 +n04476972 +n09822830 +n03551395 +n04533802 +n02918964 +n00474657 +n12932966 +n01615458 +n01806364 +n12458550 +n11784497 +n03557360 +n10638922 +n09889941 +n10689306 +n03358172 +n04295571 +n06596607 +n11853356 +n00482122 +n11760785 +n03150232 +n11778257 +n03059685 +n10105733 +n04104384 +n07691237 +n04326676 +n07684938 +n12666965 +n04177820 +n13918387 +n03398153 +n03914438 +n09932098 +n02988486 +n02977619 +n03317788 +n03484487 +n02988679 +n04062428 +n02568087 +n12866162 +n04227144 +n07875436 +n04082886 +n11753700 +n00470682 +n02122298 +n10145239 +n12755727 +n04214282 +n01852671 +n02378969 +n04108822 +n10382825 +n12392549 +n03973839 +n12258885 +n11782761 +n12389501 +n02940570 +n03405595 +n02969323 +n03207630 +n10169147 +n03805725 +n09847543 +n02415253 +n07880080 +n04305572 +n02042180 +n07565161 +n02871147 +n04438507 +n04445154 +n07842433 +n12029635 +n09750282 +n09621232 +n01858906 +n02761206 +n03986355 +n12591351 +n13916721 +n02905036 +n11894770 +n02377603 +n12924623 +n03950899 +n09454153 +n10247358 +n05261310 +n11943660 +n10804287 +n03560430 +n01756089 +n10618342 +n04283378 +n13926786 +n04238321 +n04393549 +n04461879 +n03502200 +n00440941 +n03494706 +n04148579 +n13902336 +n02780815 +n10726031 +n04124098 +n12344483 +n04384910 +n07681450 +n02030837 +n04059157 +n09247410 +n02714751 +n08633683 +n04520784 +n10141732 +n12371439 +n04499062 +n02931148 +n07609632 +n04536335 +n02874537 +n03013438 +n11786539 +n11690455 +n07600696 +n00478262 +n00466712 +n03399677 +n12441183 +n07895962 +n11966083 +n02990373 +n04241249 +n02068541 +n12513933 +n02356977 +n04252560 +n04087826 +n03455488 +n07619409 +n09787534 +n03680942 +n00446980 +n12384839 +n03416900 +n07821758 +n11853813 +n01606522 +n11780148 +n04969242 +n12413880 +n04130257 +n01322604 +n03061211 +n01959492 +n02842573 +n04313628 +n03815149 +n02445394 +n08547544 +n03222176 +n04070003 +n03075768 +n09695979 +n02877266 +n08583292 +n02870676 +n03657511 +n01621635 +n04284341 +n04136161 +n02836174 +n10247880 +n01744100 +n02882894 +n03408444 +n03411079 +n02366959 +n04399158 +n04542715 +n02787435 +n04251701 +n13863020 +n07890226 +n12245319 +n12849952 +n11626826 +n00887544 +n03140431 +n03519387 +n03855604 +n07906111 +n02054036 +n11954161 +n03038281 +n00450998 +n12136392 +n02119477 +n04356925 +n02406647 +n04450133 +n12545635 +n01565599 +n02028900 +n07817024 +n02971167 +n04309049 +n02678897 +n12795555 +n11769803 +n01904886 +n02079851 +n12189987 +n04581829 +n12098403 +n01839330 +n12587803 +n03652932 +n08628141 +n03544238 +n04513827 +n01847806 +n03132076 +n10243137 +n03621377 +n10530959 +n14765422 +n04968139 +n12950314 +n02064816 +n02846511 +n10513823 +n11772408 +n03341297 +n03492922 +n03683606 +n02894337 +n02365480 +n09846755 +n03495039 +n01317813 +n12610328 +n02157206 +n01588002 +n03914831 +n03659686 +n10406765 +n09205509 +n02870526 +n07954211 +n10578471 +n11646694 +n03115762 +n07762913 +n12056758 +n12305986 +n11845913 +n02835915 +n02831237 +n07927512 +n12171098 +n02073831 +n07605040 +n02885462 +n02768114 +n04450994 +n11844371 +n03963645 +n02956699 +n02029378 +n01528396 +n10005934 +n04465666 +n04390977 +n11882074 +n03831382 +n04605163 +n06276501 +n02944075 +n05258051 +n07901457 +n12683571 +n02205219 +n13235503 +n02388735 +n03941231 +n14919819 +n12816508 +n11536673 +n13895262 +n02903204 +n10137825 +n07841345 +n07893253 +n01850192 +n07769731 +n11773987 +n03539678 +n12938193 +n10802507 +n03089879 +n00477392 +n01828096 +n09263912 +n13653902 +n04579667 +n01322983 +n08579352 +n07587023 +n07756951 +n07870167 +n10588357 +n01606809 +n13864035 +n02802544 +n07591961 +n02979399 +n04144539 +n02416820 +n11769176 +n09743792 +n09732170 +n04972451 +n13918274 +n01847089 +n01859689 +n04208065 +n07617051 +n10674713 +n07914271 +n07887461 +n03736064 +n03644858 +n03878963 +n04040247 +n07891433 +n01611969 +n07587618 +n02689144 +n10049363 +n04059516 +n10313239 +n03115400 +n01519563 +n01533893 +n03850245 +n11733548 +n03372549 +n01884834 +n02839110 +n07887192 +n03617312 +n07886463 +n03103396 +n07764847 +n01855476 +n07808587 +n12858871 +n03632729 +n10209731 +n04141712 +n03978686 +n03225988 +n00475273 +n09224725 +n04966543 +n01322221 +n03649674 +n13154494 +n03948830 +n03320519 +n03723267 +n07869611 +n12342498 +n01827793 +n03145719 +n11821184 +n11956348 +n11857875 +n10339717 +n09450163 +n10756148 +n01591301 +n07915094 +n04422727 +n09719309 +n03349469 +n03389889 +n10718131 +n04298661 +n09747495 +n03676623 +n03547229 +n03062015 +n10734394 +n07817315 +n02852360 +n01850553 +n02952585 +n03587205 +n02009750 +n01540090 +n02947660 +n03656957 +n03378174 +n02508213 +n01572489 +n12008487 +n12185859 +n11691046 +n01323355 +n05262534 +n00448126 +n02432983 +n12038406 +n03883385 +n02411206 +n01643896 +n10159045 +n11675025 +n01803362 +n02009508 +n07920349 +n04098513 +n11617272 +n09913455 +n12390314 +n04171208 +n02995345 +n10634849 +n03173929 +n02749953 +n11845793 +n12796022 +n11955153 +n11816829 +n03032453 +n11984542 +n02992795 +n03712111 +n02873733 +n02759387 +n14915184 +n02381364 +n12686274 +n07857731 +n04518764 +n03010473 +n02418465 +n02359556 +n07894799 +n04104770 +n04335209 +n01848976 +n02006063 +n04454908 +n03002948 +n04220250 +n09923561 +n04102162 +n11958080 +n04598965 +n10173410 +n03067339 +n02003204 +n12686676 +n11986511 +n02311617 +n03367059 +n02761557 +n05578095 +n04041069 +n10575463 +n03325941 +n10082043 +n01806297 +n09691729 +n04593866 +n01813088 +n01625562 +n03906224 +n01652026 +n10236304 +n04102618 +n04321453 +n07820145 +n01575117 +n12788854 +n07823698 +n04206225 +n03216710 +n02421449 +n03343737 +n07560903 +n02872529 +n11989869 +n12071744 +n06278475 +n04492749 +n02920259 +n03798061 +n02420509 +n03316105 +n12052447 +n03974915 +n02904803 +n03430418 +n12291959 +n06892775 +n03875806 +n07903841 +n10282482 +n02683323 +n07862348 +n01849157 +n04469813 +n09944022 +n03342127 +n07592481 +n02936402 +n02405929 +n10002760 +n02537716 +n05259914 +n01560280 +n12694486 +n07879350 +n02377063 +n03637181 +n03409297 +n01607812 +n02808185 +n09239302 +n12055516 +n09712448 +n02859184 +n12772908 +n02735538 +n10333838 +n12336092 +n02386968 +n04613939 +n00452864 +n04535524 +n03174731 +n04189816 +n07607605 +n12909917 +n02387722 +n02960690 +n07715221 +n02407071 +n10667477 +n09398076 +n04236809 +n01904806 +n01610552 +n12373100 +n12771390 +n04122685 +n07804771 +n15102455 +n03469175 +n03746005 +n02536456 +n03505667 +n11816336 +n09376198 +n10572706 +n03464053 +n02869155 +n07816164 +n04969798 +n02942349 +n14820180 +n01623615 +n12676703 +n03369276 +n03650551 +n02010272 +n02976123 +n01852400 +n02196119 +n04132158 +n03238586 +n07639069 +n03313333 +n10542761 +n12215022 +n00455173 +n10019406 +n12899537 +n04277826 +n09906449 +n04549629 +n11508382 +n15090065 +n10289462 +n04540255 +n02723165 +n04335693 +n01536334 +n03107488 +n12782530 +n14785065 +n02974348 +n09874862 +n04479939 +n03309465 +n09902954 +n12092417 +n03425595 +n12433081 +n07806774 +n12462805 +n01314781 +n10192839 +n01622120 +n07807171 +n03261019 +n02843553 +n04287747 +n02324587 +n09915434 +n01818299 +n01592694 +n03826186 +n03607659 +n01527917 +n03628511 +n02005399 +n04204081 +n02052775 +n04403413 +n03914106 +n12811027 +n01872772 +n04555700 +n02004855 +n04602762 +n02713003 +n04406817 +n11934807 +n03336282 +n09684901 +n03836976 +n11959862 +n03062336 +n03506028 +n04503413 +n07819896 +n03205669 +n11902200 +n07685218 +n03046133 +n10261624 +n10303814 +n03676087 +n04023695 +n07587111 +n07764155 +n01504179 +n03794136 +n03389761 +n13901211 +n02784124 +n04488530 +n02807731 +n07898443 +n04981658 +n04177755 +n03649161 +n04125257 +n10135129 +n03653110 +n10560106 +n07735687 +n03511333 +n11960245 +n03301568 +n03878066 +n10746931 +n04223299 +n04237423 +n07888229 +n01819734 +n12312728 +n09981939 +n03727465 +n13882276 +n02993194 +n11971927 +n09713108 +n03581125 +n09718936 +n14698884 +n03005285 +n03540914 +n03359436 +n03934042 +n07569644 +n04964878 +n07890068 +n07580253 +n01538630 +n03132666 +n03259009 +n02796318 +n12703190 +n01464844 +n11792029 +n04270371 +n13102775 +n02933649 +n02387254 +n02890188 +n04335886 +n04358491 +n02786837 +n03885194 +n04001265 +n03438071 +n10375402 +n02997910 +n03326795 +n00470830 +n02734725 +n03494537 +n08376250 +n07743544 +n02991847 +n04246271 +n04156140 +n04381073 +n07732168 +n04951071 +n07977870 +n04334599 +n02838728 +n03326948 +n11723227 +n08182379 +n03686924 +n03821518 +n02382204 +n02080415 +n11788727 +n07732636 +n03860404 +n03898395 +n07867324 +n04392113 +n13237188 +n03263076 +n07843636 +n04968056 +n04397027 +n03320421 +n06267564 +n02880842 +n04115456 +n13862407 +n10289039 +n03128248 +n01457852 +n01536035 +n04579056 +n03937931 +n03036022 +n01804163 +n09913593 +n12841007 +n03115897 +n03256032 +n02475669 +n07924443 +n03061505 +n10001481 +n03600722 +n07842308 +n10696508 +n04215402 +n10588074 +n03614782 +n03995535 +n12091953 +n04113194 +n10092978 +n03011741 +n04381860 +n07819769 +n07905474 +n03288500 +n04225987 +n13223710 +n02879087 +n02920083 +n08640739 +n03362890 +n03996849 +n03849814 +n09694664 +n02407390 +n02910864 +n02388917 +n01668665 +n07616046 +n02932891 +n10553235 +n03652729 +n01615703 +n12801781 +n12164656 +n05302499 +n03801760 +n03332271 +n02901793 +n03941417 +n09833441 +n01623110 +n02807523 +n10598181 +n03725600 +n10368528 +n04116098 +n12719944 +n02045864 +n02173373 +n02811059 +n04479823 +n07816398 +n10572889 +n04142731 +n07687381 +n02799323 +n07865484 +n01858845 +n12684379 +n01842235 +n09242389 +n02028727 +n03527565 +n03438863 +n15019030 +n13907272 +n09659039 +n04251791 +n03683995 +n04137217 +n04389430 +n09785659 +n02016816 +n03124590 +n01859325 +n03138669 +n02999936 +n11926365 +n12686077 +n03517760 +n09734450 +n04563413 +n12074867 +n01564217 +n12521394 +n06267893 +n03594148 +n04139395 +n12369309 +n01544389 +n12048056 +n04524941 +n03016868 +n03653740 +n02795528 +n03687137 +n03766935 +n03361297 +n04263502 +n10043491 +n03446268 +n01994910 +n03891538 +n10091564 +n10226413 +n02755140 +n03500389 +n10237196 +n03625646 +n06596474 +n03360300 +n09730824 +n10732010 +n04469514 +n02904927 +n04961331 +n02936570 +n03680858 +n07585758 +n09199101 +n04050933 +n03712337 +n03911513 +n01556182 +n03102371 +n07928887 +n12133462 +n03974070 +n03971218 +n03292475 +n03425241 +n03440216 +n11995092 +n02894158 +n02918112 +n10568358 +n11524451 +n03169176 +n04100519 +n07588193 +n06883725 +n02860640 +n07762114 +n04082710 +n07896893 +n10167152 +n03287351 +n02788021 +n08494231 +n01560935 +n03249342 +n04564581 +n09349648 +n07704205 +n03510244 +n12127460 +n09945745 +n11719286 +n11613459 +n12656369 +n03824381 +n07655263 +n09894143 +n04964001 +n02161457 +n07654298 +n07930433 +n02979074 +n02026948 +n13914608 +n07611267 +n02843276 +n09827363 +n10259780 +n04432662 +n11715678 +n12388858 +n03057920 +n10465451 +n03855214 +n07728181 +n09835348 +n03549732 +n04589325 +n03491032 +n00452034 +n03948242 +n01456756 +n07921615 +n02809105 +n12889713 +n07586894 +n07734879 +n07905979 +n12847374 +n12129134 +n02122580 +n04028074 +n02911332 +n09251407 +n07697825 +n04597309 +n02800213 +n03480579 +n07621618 +n04170933 +n03743279 +n01916481 +n04037220 +n10748620 +n02708433 +n12007196 +n02561381 +n04103769 +n03030880 +n04413969 +n03911658 +n04590746 +n00476389 +n04331639 +n07725789 +n01792429 +n02949542 +n07686720 +n04064862 +n04447028 +n01713764 +n09854218 +n04032603 +n04405907 +n15093298 +n04385536 +n11954345 +n01560793 +n09249034 +n03784270 +n03436549 +n01324610 +n02379183 +n07616487 +n04119478 +n03309356 +n12865037 +n12850168 +n04250850 +n03024064 +n04412097 +n02982515 +n00450070 +n10175248 +n11847169 +n12276872 +n12870891 +n10229883 +n10505613 +n03482252 +n09300905 +n02919890 +n07617611 +n10283170 +n01607962 +n01671125 +n07894551 +n04561287 +n00005787 +n10025635 +n02850732 +n03732020 +n02036711 +n07907429 +n03797896 +n03004824 +n12011620 +n10300303 +n03105467 +n03767745 +n07868508 +n07868200 +n03788047 +n07886057 +n04559451 +n09845401 +n04373704 +n02676938 +n02565324 +n02667478 +n02122878 +n03244047 +n01747589 +n04320973 +n13205058 +n02379430 +n11959632 +n10183931 +n07683490 +n10055410 +n04370288 +n03273551 +n13900422 +n07899434 +n04053677 +n07740461 +n11879722 +n04282494 +n02981911 +n03449451 +n07581249 +n03965456 +n11808468 +n13881644 +n11725973 +n12091213 +n13193856 +n02873520 +n02754656 +n02431976 +n01324431 +n02385214 +n01888411 +n12680864 +n07731284 +n04337287 +n07631926 +n02549248 +n04395024 +n07585557 +n02776825 +n09460046 +n12023108 +n00475403 +n10098517 +n07902336 +n03683708 +n02412210 +n04397452 +n04583212 +n13869547 +n03632577 +n01616086 +n02763901 +n08256735 +n03015478 +n02084732 +n12178896 +n11966215 +n07605380 +n13869788 +n01847170 +n07744811 +n01854700 +n00444937 +n10422405 +n07801892 +n09688804 +n11879054 +n02802215 +n07908411 +n07822518 +n01558594 +n07935737 +n10730728 +n04436329 +n04294879 +n04972350 +n12911440 +n13886260 +n07578093 +n02537525 +n03703730 +n09607630 +n13865904 +n02360282 +n11731659 +n04126066 +n04212165 +n11618290 +n07588574 +n09269472 +n11896722 +n02892304 +n03487642 +n02028342 +n03321563 +n03135030 +n03522100 +n03253886 +n04095109 +n06470073 +n12603449 +n10644598 +n10260800 +n01535469 +n09696456 +n03553019 +n03963198 +n11918473 +n10314517 +n03002341 +n07574923 +n10421470 +n05716342 +n03244231 +n01730563 +n11691857 +n12807251 +n12345899 +n03142679 +n01531512 +n12307240 +n07835457 +n04535370 +n00451186 +n12481458 +n03434188 +n09734185 +n04578934 +n04167346 +n02747802 +n03459328 +n03301940 +n01562014 +n07690431 +n10642596 +n03696065 +n12781940 +n02759257 +n04392764 +n04218564 +n03499907 +n01536780 +n09751895 +n03235042 +n04570815 +n12070381 +n09448690 +n07625061 +n10178216 +n04560113 +n09457979 +n03858085 +n02421792 +n02944579 +n10085869 +n09718811 +n04103206 +n04239786 +n04501947 +n01321123 +n02390015 +n03964495 +n01554448 +n02925107 +n03028596 +n12483625 +n03227317 +n10701644 +n11968704 +n03900393 +n01851038 +n02276078 +n03132776 +n07585906 +n04480033 +n07880458 +n12887293 +n07921239 +n03307037 +n04595028 +n04244379 +n13131028 +n10313724 +n09436708 +n02694045 +n09941787 +n00449796 +n01817346 +n07928696 +n03401279 +n12901724 +n11646167 +n07682477 +n09415671 +n07900225 +n03607029 +n02692232 +n11834654 +n07935379 +n12437930 +n03762434 +n07922764 +n03595523 +n04546340 +n10686885 +n03516844 +n03767112 +n09896685 +n03859608 +n03149686 +n07920872 +n12388143 +n10406391 +n04233715 +n04373089 +n02023992 +n01947396 +n12115180 +n00479616 +n03962852 +n02392434 +n12414035 +n14976871 +n03201776 +n10665587 +n03600285 +n04402449 +n08539072 +n03629231 +n12860365 +n03488438 +n03337383 +n12455950 +n10384392 +n02953455 +n03101796 +n07919572 +n03233744 +n01578180 +n01756508 +n04556533 +n02962843 +n02882190 +n03731483 +n01850873 +n05260240 +n03111177 +n09836519 +n03030557 +n11789066 +n02788572 +n07903101 +n04067818 +n07840804 +n01567678 +n12427184 +n03333610 +n02416964 +n10607291 +n07936548 +n05451384 +n02968074 +n07605597 +n02704949 +n07609215 +n01951274 +n07696977 +n03180384 +n04303357 +n03291741 +n02207805 +n10123844 +n03420345 +n12384227 +n02758863 +n02047975 +n03978966 +n03549199 +n04275175 +n09294877 +n09836343 +n11970586 +n02010728 +n10369317 +n12681893 +n03192543 +n12413165 +n12174521 +n11916696 +n10042845 +n07822197 +n04968749 +n10323634 +n12849416 +n02814774 +n05538625 +n03078802 +n12230794 +n07726095 +n03051249 +n12005656 +n11876432 +n12164881 +n09711435 +n01622483 +n09896170 +n07684289 +n03368352 +n07910048 +n03159535 +n00466377 +n01541386 +n11647703 +n09752023 +n07903731 +n12249542 +n03794798 +n11786131 +n02852043 +n10493685 +n09846894 +n01752585 +n01536186 +n07618432 +n09859152 +n02065026 +n02382635 +n07867616 +n03885788 +n04255586 +n03275681 +n11961100 +n12485981 +n04495698 +n03293741 +n13902048 +n03254862 +n07903962 +n01594787 +n11962272 +n03284886 +n07842202 +n10157128 +n02405302 +n04443766 +n06266633 +n02519862 +n01487506 +n03373943 +n04247876 +n04327204 +n03349771 +n09260907 +n10092794 +n12223764 +n03504723 +n11926833 +n01820052 +n13032381 +n03889871 +n03209359 +n04608923 +n15093137 +n15091304 +n03688405 +n09905185 +n03543112 +n11611356 +n03885028 +n03234164 +n07594066 +n02396014 +n03456186 +n09874725 +n11601333 +n02917521 +n03055857 +n02804123 +n12352844 +n12866002 +n09858165 +n12037691 +n02565072 +n04477387 +n02008643 +n07867021 +n04119360 +n09893191 +n02944146 +n12435649 +n13197274 +n04974859 +n07751004 +n12003696 +n02762508 +n02680512 +n01743086 +n06998748 +n10607478 +n07613815 +n01559477 +n01859852 +n03239054 +n04466871 +n05263183 +n13173882 +n07897438 +n12427757 +n04400737 +n03291963 +n07682808 +n11692265 +n04130143 +n09445289 +n07696839 +n03835197 +n12821895 +n09734639 +n03365374 +n04305210 +n04962240 +n09871867 +n07897750 +n07616386 +n09443281 +n03641569 +n13882563 +n07680761 +n10498816 +n04034262 +n03533014 +n07928790 +n07690152 +n10060352 +n04124370 +n12453186 +n04509171 +n03013580 +n10604979 +n12515711 +n04971211 +n07693223 +n03786715 +n07894703 +n02761834 +n04232800 +n03437741 +n04045644 +n14976759 +n03042697 +n12557681 +n06275095 +n11678010 +n01586941 +n07684517 +n07822845 +n03483823 +n09951616 +n03180865 +n07861557 +n03644378 +n12848499 +n11962667 +n03886762 +n04238128 +n11979964 +n13915113 +n12791329 +n12457091 +n03341153 +n10267865 +n03484576 +n10186216 +n07612137 +n03843438 +n11807525 +n11931540 +n02027897 +n07614730 +n04116294 +n03469903 +n10017272 +n03688605 +n07860103 +n03981566 +n01888045 +n03345837 +n11998888 +n02071636 +n02726017 +n04310157 +n04607869 +n01622959 +n08524735 +n03119203 +n12031927 +n03610524 +n02807616 +n04056180 +n03233905 +n03374473 +n14810561 +n11944954 +n03121431 +n09750891 +n08505018 +n10727171 +n12357485 +n12571781 +n12067193 +n07586604 +n02086753 +n03548086 +n02560110 +n07804900 +n02880393 +n04208427 +n12931542 +n01594968 +n05218119 +n03520493 +n03727605 +n12687698 +n03612965 +n04135315 +n07730320 +n10540114 +n07599911 +n01323493 +n02115096 +n04590263 +n12043836 +n02861387 +n09836786 +n04966941 +n02816768 +n13131618 +n10701962 +n02919792 +n03442597 +n04325041 +n03333129 +n04091693 +n04950952 +n10631309 +n04177931 +n13234678 +n01970667 +n07748416 +n07893642 +n07691650 +n03660909 +n04145863 +n11945514 +n10334009 +n12336973 +n03954393 +n04558478 +n09899929 +n03487533 +n07816575 +n07877187 +n07863547 +n01603812 +n02098906 +n04973585 +n03674440 +n04371050 +n12243109 +n07871234 +n02928049 +n07574504 +n07889274 +n12141167 +n04543996 +n03080633 +n03423479 +n07879659 +n04380916 +n10514429 +n07584423 +n04009801 +n12479537 +n07606538 +n07698543 +n12353754 +n10132035 +n03367545 +n04245508 +n09811852 +n02024763 +n04052442 +n10120330 +n12352639 +n12606438 +n07752966 +n09772930 +n02535759 +n11737534 +n10345015 +n12427566 +n09705784 +n04112654 +n02985963 +n03758089 +n12953484 +n07906572 +n02881757 +n12739332 +n03718458 +n03407865 +n07775050 +n03210552 +n09452395 +n09789566 +n10566072 +n10559996 +n07826930 +n12414932 +n01887474 +n03026907 +n07751148 +n10223177 +n03957420 +n03788601 +n12244819 +n12421137 +n04266162 +n10038409 +n02981024 +n03228967 +n11825351 +n12058822 +n11963932 +n03041449 +n03046029 +n07590502 +n02932523 +n02152881 +n04970398 +n07887967 +n12812478 +n12421917 +n02708711 +n11870747 +n04290507 +n07934282 +n01608265 +n12070583 +n03205574 +n02305085 +n07866015 +n02960903 +n10098624 +n00481803 +n07938007 +n02693246 +n03923379 +n04103665 +n11792742 +n12489815 +n04971313 +n01668892 +n01055165 +n03215508 +n12104501 +n07899292 +n12822955 +n07713074 +n03842012 +n02449350 +n07868955 +n02835829 +n12283542 +n04525584 +n07910656 +n11625003 +n03987266 +n02805983 +n15091846 +n09736945 +n04973816 +n02439398 +n01519873 +n07899003 +n03019938 +n07582152 +n01885498 +n12108871 +n02934451 +n04327682 +n07696625 +n09750770 +n12084890 +n03960374 +n07585107 +n01570839 +n11905392 +n06277135 +n07842044 +n03751269 +n04398951 +n12861892 +n12649539 +n07596967 +n07580592 +n12845413 +n07690739 +n07804657 +n04334105 +n03779128 +n03268918 +n03066359 +n02744323 +n12596148 +n04272389 +n07832416 +n10210911 +n01548865 +n03221351 +n15091669 +n07878926 +n07607967 +n12171966 +n02846141 +n07576781 +n02922292 +n10092643 +n01732614 +n02578771 +n02864593 +n03537241 +n09635534 +n03268645 +n07852833 +n13873917 +n12640839 +n03506727 +n10536416 +n09976429 +n10692482 +n07600285 +n04156946 +n07818689 +n02605703 +n02710429 +n02890351 +n03408054 +n03121298 +n02731629 +n12450840 +n04061681 +n10153414 +n07648913 +n07891309 +n01562265 +n14973585 +n01610226 +n06267991 +n03302938 +n07822323 +n07826091 +n02764398 +n10406266 +n09282208 +n01734104 +n04283096 +n03530910 +n11542137 +n02610664 +n03856012 +n01531811 +n07862611 +n11625632 +n12643313 +n02469248 +n03333711 +n02907082 +n02122430 +n01559804 +n09744161 +n10187990 +n12015525 +n07844867 +n07887304 +n02878425 +n02009380 +n11448153 +n10655594 +n12566954 +n11901977 +n03999160 +n02389779 +n07928488 +n12785889 +n04281375 +n03745146 +n03224603 +n04594828 +n12835331 +n09715427 +n11615026 +n09972010 +n04038231 +n02379329 +n03445326 +n10753442 +n04249882 +n11727738 +n07866723 +n04282992 +n11621281 +n01566645 +n03919430 +n11980682 +n03480719 +n11625804 +n10467395 +n09436444 +n07867751 +n03684611 +n03788498 +n12062626 +n07808904 +n07690585 +n03865557 +n10711766 +n10465831 +n04380255 +n12166128 +n04432203 +n07892418 +n10432441 +n12991184 +n04209613 +n04459773 +n09666883 +n07807472 +n09873899 +n12939874 +n04545748 +n09637339 +n07919441 +n03987376 +n03645577 +n03437430 +n10671613 +n02964843 +n09707289 +n11700058 +n03877351 +n03518445 +n07643200 +n02140049 +n12683791 +n12418221 +n04154152 +n03397947 +n03238131 +n11851839 +n04545858 +n07744682 +n02995871 +n07593199 +n03543394 +n10293332 +n12658481 +n11599324 +n02705201 +n03920867 +n08249459 +n02876084 +n03937835 +n01397871 +n03849679 +n12016567 +n04208936 +n07696728 +n13148208 +n01904029 +n08659861 +n07878785 +n07827130 +n03390983 +n02624807 +n03319745 +n03994614 +n00446493 +n12477583 +n02920658 +n04602956 +n02688273 +n07577538 +n04350581 +n09283405 +n04074185 +n04495843 +n03538179 +n03454885 +n03878211 +n10308168 +n08518171 +n02660208 +n07904760 +n07928367 +n10174445 +n02137015 +n02863426 +n07700003 +n04015908 +n03946076 +n11725821 +n01794344 +n04364160 +n01663782 +n04283255 +n02822064 +n04406239 +n02782681 +n11990313 +n03563460 +n02957008 +n07889814 +n07896060 +n03683079 +n04278447 +n13011595 +n11810358 +n03836451 +n12827537 +n03545470 +n03213538 +n07929351 +n03471190 +n02882301 +n03625943 +n03397087 +n11955896 +n04097373 +n03145522 +n03034405 +n02889646 +n02928299 +n09652149 +n01641391 +n04593524 +n07651025 +n03719343 +n03884778 +n03452594 +n02174659 +n12345280 +n03039827 +n03309687 +n11635433 +n02057330 +n01664990 +n09779790 +n02011016 +n09689958 +n07770763 +n03010915 +n03443912 +n02946509 +n13050397 +n03031012 +n04217546 +n04124202 +n12766869 +n04177041 +n12050533 +n03251932 +n03086580 +n03918737 +n04386792 +n03176594 +n01577035 +n01669654 +n01818832 +n10441962 +n03885904 +n03724756 +n02925666 +n03549589 +n03062122 +n02828427 +n12604228 +n03624400 +n07725888 +n03873699 +n01503976 +n02887079 +n03610098 +n02940385 +n04610013 +n03652100 +n04496872 +n04008385 +n02583890 +n10476467 +n03395514 +n03306385 +n04228581 +n02389261 +n12576323 +n01579149 +n01623425 +n02593019 +n03995265 +n02124484 +n12745386 +n04355267 +n02643836 +n01614343 +n03810952 +n04058594 +n12278650 +n03474779 +n02823510 +n00442437 +n12039317 +n04574067 +n03762602 +n02153109 +n03518943 +n04289827 +n02288268 +n07749969 +n04132985 +n03213826 +n04307986 +n03567066 +n02049088 +n04408871 +n03522003 +n09305898 +n04266375 +n08571898 +n03039259 +n01587526 +n03261603 +n00464277 +n02627532 +n02992368 +n03640850 +n03037404 +n04525191 +n02106854 +n07772147 +n04173511 +n12761284 +n03257210 +n02813544 +n07740342 +n04066270 +n03070059 +n03616428 +n02904233 +n03209910 +n04389854 +n03078995 +n03193260 +n01488038 +n01754533 +n12629305 +n02055107 +n11664418 +n04228693 +n03353951 +n03440682 +n03025250 +n03300216 +n02042046 +n04226826 +n03342015 +n03090000 +n02050313 +n03492250 +n01535690 +n01572654 +n03465718 +n02879309 +n06278338 +n04113406 +n03695857 +n09720256 +n01860002 +n02851939 +n09828216 +n02564270 +n03528901 +n02542432 +n11978961 +n01670802 +n03956623 +n01612275 +n09376786 +n03222318 +n02813645 +n02213543 +n13898207 +n03616763 +n03616979 +n11904109 +n04212282 +n04608435 +n02042472 +n04198453 +n03216402 +n02015357 +n12282737 +n02699629 +n12866635 +n02048353 +n02933340 +n01793715 +n12001707 +n02878222 +n03187037 +n03105306 +n04080705 +n04254009 +n01623880 +n02839592 +n03436182 +n01591123 +n01318279 +n03002816 +n13155095 +n03141702 +n03775388 +n12165170 +n03322836 +n03259401 +n04471148 +n03911767 +n12585629 +n04317325 +n04257986 +n03133050 +n02035210 +n12891305 +n11882426 +n04491388 +n12948251 +n03498781 +n04262161 +n03775636 +n09915651 +n07584332 +n07852614 +n11626152 +n03901750 +n09723067 +n04265904 +n09920283 +n02397744 +n03253796 +n07712959 +n03898129 +n01743936 +n02075612 +n04560292 +n03479397 +n04334365 +n04357121 +n10145902 +n03844673 +n09854421 +n12687957 +n12598027 +n03944138 +n01839750 +n07722888 +n04258859 +n03088389 +n03351434 +n03509608 +n01677747 +n03145147 +n12046815 +n03505133 +n01629962 +n03333252 +n03993703 +n02962061 +n04529962 +n03463666 +n07681691 +n12160857 +n04187233 +n09331251 +n11614713 +n04376400 +n12301445 +n12633994 +n03883524 +n11614420 +n13062421 +n03645011 +n03293863 +n11640132 +n02579928 +n02854739 +n04461437 +n07729384 +n02977936 +n02836392 +n03593122 +n01666228 +n07820683 +n07568502 +n11910460 +n09348460 +n09712324 +n02403740 +n03482877 +n04370774 +n07750146 +n12992177 +n03152303 +n04134008 +n09805324 +n01611800 +n04374315 +n07586099 +n02032222 +n01979874 +n04350769 +n02907873 +n03016609 +n02543565 +n03256166 +n03016737 +n02419336 +n03268790 +n03559999 +n07765999 +n04607035 +n02416104 +n02123917 +n12484784 +n03225108 +n10739391 +n03506880 +n02918831 +n03045228 +n12516828 +n01314663 +n04172342 +n02768226 +n12368028 +n01500476 +n01558149 +n03604156 +n04035912 +n02359915 +n12261571 +n03875955 +n01887623 +n03871371 +n03390786 +n12494794 +n03826039 +n04465358 +n03838298 +n03165466 +n04229737 +n01321770 +n04354026 +n02998003 +n04114844 +n10611613 +n03600475 +n01909906 +n00466880 +n04284869 +n07722485 +n04496614 +n03298716 +n02285801 +n04081699 +n07765208 +n12659539 +n11618525 +n11757653 +n07727048 +n03913343 +n12070016 +n02697675 +n04284572 +n02595702 +n04482297 +n03516996 +n03704549 +n02040266 +n04476116 +n01323261 +n03823216 +n07696403 +n03226880 +n09734535 +n03950537 +n01671479 +n03049924 +n12593994 +n04568841 +n03604400 +n01837072 +n01754370 +n03122202 +n12338454 +n04094720 +n04150980 +n03429682 +n03884926 +n03378005 +n02434954 +n03461288 +n02893692 +n04472563 +n10472129 +n04590021 +n07739344 +n04162433 +n03395859 +n12059314 +n03498662 +n03678729 +n02927764 +n02770211 +n11710393 +n07730207 +n04178190 +n07772935 +n03801880 +n04414675 +n12729521 +n12203529 +n04122578 +n04575824 +n06267655 +n03698360 +n02804515 +n02431337 +n08598568 +n02893608 +n02270623 +n00479440 +n11616662 +n02884994 +n04305323 +n02407625 +n04476831 +n04222307 +n03179910 +n11623967 +n00446311 +n00454983 +n02886434 +n12279458 +n03723781 +n11816121 +n02403231 +n11808299 +n07816296 +n03219483 +n02657694 +n00453478 +n02816656 +n02625851 +n04112752 +n03339529 +n12171316 +n02044517 +n04137773 +n01486838 +n03015149 +n12911673 +n03967270 +n03498441 +n11672269 +n03386870 +n11615967 +n02580679 +n01681653 +n02793199 +n02824319 +n10727458 +n02555863 +n01533000 +n02175916 +n12064389 +n04383015 +n02469472 +n03101664 +n03623338 +n12295796 +n02869249 +n01792042 +n03447075 +n04453390 +n04382438 +n04112252 +n03332393 +n12729729 +n01851207 +n04269270 +n12333771 +n06272612 +n03135532 +n02927887 +n11711537 +n12301180 +n04107743 +n01813948 +n03282295 +n09714694 +n00483409 +n01504344 +n04279353 +n04040373 +n12658308 +n04134523 +n10104064 +n12056601 +n04525417 +n07819166 +n12263038 +n02072798 +n03125057 +n03367410 +n04000592 +n03549897 +n01877606 +n01564914 +n12307076 +n02855925 +n03176763 +n12271933 +n04121728 +n07690511 +n02825442 +n04442441 +n01630901 +n03088580 +n02499808 +n10675010 +n01531971 +n02273392 +n01526521 +n01531344 +n03667664 +n02888270 +n04412416 +n07733394 +n04559910 +n04105704 +n11792341 +n04201064 +n01693175 +n04555291 +n02908773 +n01976868 +n03529175 +n03365231 +n03622839 +n04258333 +n03327133 +n03425769 +n12477747 +n03718935 +n11727540 +n07933799 +n03030262 +n12043673 +n02619550 +n07937461 +n12198286 +n08560295 +n12402348 +n01733957 +n12344700 +n02763604 +n11925303 +n01557962 +n03927299 +n11611758 +n03035252 +n09454412 +n04004990 +n03456299 +n02175569 +n03668279 +n12352990 +n03507241 +n01534155 +n12278371 +n02499022 +n03822767 +n01318381 +n04024983 +n04277493 +n11934616 +n02027075 +n11611561 +n03454442 +n02236355 +n01732789 +n07722052 +n01489501 +n04409625 +n10563403 +n01817263 +n07757511 +n03770316 +n02977438 +n01840775 +n03607923 +n03322704 +n02375302 +n01614038 +n01646555 +n03952576 +n02946824 +n12847008 +n03016389 +n11809594 +n03165096 +n03839671 +n02687821 +n01689081 +n03822656 +n02597608 +n12336727 +n01579578 +n03631922 +n03904909 +n11658331 +n04224543 +n12621410 +n03870672 +n04252331 +n09720842 +n01396048 +n11988596 +n00483205 +n02871005 +n01597022 +n02382039 +n07743902 +n02358890 +n07877961 +n05263448 +n01862399 +n04136800 +n10624540 +n11990167 +n02731398 +n03366974 +n03490006 +n01561732 +n02626265 +n10627252 +n12402051 +n08517676 +n10488656 +n03099274 +n03718581 +n11806219 +n01830042 +n07728585 +n03732114 +n10755080 +n03359285 +n07720277 +n03354207 +n01596273 +n04416005 +n01847253 +n07733567 +n09725653 +n04274985 +n00449977 +n07772274 +n12063639 +n01530439 +n01322508 +n04397768 +n07273802 +n04261281 +n10524076 +n01678343 +n03410938 +n01797020 +n02388832 +n07719616 +n03639497 +n09787765 +n07721018 +n11818069 +n04185529 +n11644462 +n12074408 +n00483848 +n01583495 +n11891175 +n03347617 +n03308481 +n02535258 +n07750872 +n07748157 +n02855701 +n04584373 +n02461830 +n02912557 +n12277578 +n03604311 +n03643253 +n03031152 +n04039742 +n03435743 +n13908201 +n04150153 +n03250405 +n01410457 +n02357401 +n12588780 +n12729315 +n01690149 +n02538216 +n03171228 +n02424909 +n06274760 +n03775747 +n04211857 +n12429352 +n12272239 +n11759853 +n03401129 +n12649317 +n02625258 +n12651611 +n03603442 +n02803934 +n03861271 +n02605936 +n02018368 +n12711984 +n02811936 +n04612026 +n01339471 +n02923682 +n09194227 +n04346157 +n03939178 +n12635532 +n01593028 +n01793249 +n02380464 +n12400720 +n07708398 +n12020941 +n12492106 +n12850336 +n12749679 +n02892948 +n12591017 +n03193423 +n01791463 +n11979527 +n12134025 +n12167075 +n09308743 +n13108545 +n01618503 +n07827284 +n07724492 +n02338145 +n04533946 +n01586020 +n07598256 +n01603953 +n12646740 +n03067518 +n04046277 +n01532511 +n07769584 +n11644046 +n12753573 +n02681392 +n08492461 +n07749446 +n04409384 +n01791954 +n12330891 +n04560882 +n10145480 +n04250473 +n02655848 +n02903126 +n11736851 +n11901294 +n12865824 +n03870105 +n00449892 +n04240752 +n11851258 +n04200537 +n12049562 +n01521399 +n03565830 +n07860447 +n03067212 +n01664674 +n07561590 +n02727141 +n02324514 +n02372952 +n01584853 +n07766173 +n11811706 +n03097362 +n04200258 +n02732572 +n01853195 +n12282527 +n09838621 +n02764505 +n04256891 +n12337617 +n12635955 +n07831267 +n11628793 +n12316572 +n07807834 +n02037869 +n01821869 +n02820556 +n04517211 +n01839086 +n03842986 +n07698401 +n02386224 +n07841800 +n01830915 +n11616486 +n11902389 +n03427202 +n12727101 +n01851573 +n02125494 +n07746186 +n11628087 +n07746551 +n03943115 +n11892029 +n02861022 +n11733312 +n01852329 +n09392402 +n12336224 +n07887099 +n03403643 +n04414199 +n07895100 +n02264232 +n02317781 +n07823460 +n07755929 +n02524202 +n04324297 +n11627512 +n01585715 +n02922578 +n00479887 +n02687423 +n02416880 +n11784126 +n12073991 +n01853870 +n01561452 +n04187970 +n10300154 +n02520147 +n12294124 +n07743224 +n12066018 +n11634736 +n02041678 +n11626585 +n02386141 +n03986949 +n07860331 +n12356023 +n12072722 +n03082280 +n12083113 +n12979829 +n01448594 +n03007444 +n07858978 +n01641739 +n02043333 +n12020736 +n02751215 +n04528079 +n01538200 +n07925608 +n12091550 +n03742019 +n03518305 +n01642539 +n03414029 +n04363991 +n03767966 +n02596067 +n01586374 +n02885882 +n04080138 +n11617631 +n02033779 +n09451237 +n02310585 +n12648045 +n03955489 +n01752736 +n07899899 +n02299505 +n01579410 +n02156871 +n02998841 +n03759661 +n02050809 +n02683454 +n11621950 +n02910145 +n04967801 +n07896661 +n11906917 +n12275675 +n11611233 +n07736692 +n02312640 +n12588320 +n04399537 +n12757303 +n04197781 +n12717224 +n11635152 +n03122295 +n01792955 +n13133932 +n02518324 +n01584695 +n02915904 +n02967294 +n04345201 +n03019434 +n02470238 +n03049782 +n03101517 +n12709688 +n03716887 +n02422391 +n12638753 +n00288384 +n02162561 +n02053584 +n01317294 +n03334291 +n07814634 +n12273768 +n12406715 +n11644226 +n01646802 +n03460147 +n12338796 +n01972541 +n02147947 +n03890093 +n04127395 +n01581984 +n01681328 +n02213239 +n04582869 +n03254189 +n03274265 +n03186285 +n11839823 +n01624833 +n09792969 +n07891189 +n12023726 +n07619208 +n03466600 +n01849676 +n12190869 +n03079136 +n12317296 +n13001930 +n00477639 +n02944459 +n03903733 +n04131208 +n12710295 +n12180885 +n11612349 +n03443149 +n03982331 +n04264765 +n12642090 +n03237416 +n13868944 +n04046400 +n11705171 +n11979715 +n12597134 +n01609956 +n01568294 +n01469103 +n00443692 +n01606672 +n04556408 +n07690019 +n03977592 +n03358726 +n12696492 +n01573240 +n11632619 +n01772664 +n03453231 +n04179712 +n03646020 +n01812662 +n04306592 +n07724654 +n13908580 +n02903852 +n04284438 +n13132656 +n04317063 +n07829248 +n01589718 +n02654745 +n12294331 +n12515925 +n07900825 +n07721195 +n04189282 +n11907689 +n01624537 +n12333530 +n07762244 +n11757851 +n01599159 +n04038338 +n01568892 +n12691661 +n09744834 +n04307767 +n03120778 +n07920540 +n03781683 +n04185804 +n12080820 +n04354182 +n07574426 +n02579303 +n03046802 +n12078172 +n03210245 +n01614556 +n02304432 +n07713267 +n09724656 +n02861147 +n12755387 +n01483830 +n12921868 +n12026018 +n07817871 +n12062781 +n04241573 +n11621727 +n03376159 +n11815721 +n13007034 +n03540090 +n00450866 +n11619455 +n01528845 +n01568720 +n12743352 +n02871314 +n03606251 +n01490670 +n04246060 +n02053425 +n10780284 +n01915700 +n04510706 +n00456465 +n01563945 +n11809094 +n09855433 +n04112579 +n03855333 +n09809925 +n03413684 +n02123478 +n12070712 +n03651843 +n02032355 +n01591005 +n01646648 +n02752615 +n02415829 +n03283221 +n04368496 +n01573360 +n02321170 +n10348526 +n04446844 +n07763792 +n12077944 +n04431025 +n02895438 +n10082687 +n07714188 +n02262449 +n03090172 +n12491017 +n01558461 +n12754781 +n04070415 +n04297098 +n03424862 +n01970164 +n09833536 +n01793435 +n01670535 +n09894445 +n09676247 +n01548492 +n12501202 +n03250089 +n03358380 +n02578928 +n12020184 +n02301935 +n03393017 +n12340755 +n01849863 +n01748906 +n03075946 +n01810268 +n01984245 +n04555400 +n12286988 +n04097760 +n02050586 +n12104238 +n01679962 +n02709101 +n01569060 +n12790430 +n01757901 +n13199717 +n11815918 +n07827410 +n02970534 +n12942572 +n07924276 +n04103918 +n11704093 +n07908647 +n07601686 +n12172906 +n04084889 +n02381261 +n02299157 +n11978713 +n12460957 +n02963503 +n03272810 +n12469517 +n03443005 +n01797307 +n02952237 +n11908549 +n13912540 +n03428226 +n10276477 +n01757343 +n01443243 +n01607600 +n03580518 +n12709103 +n07579688 +n04329834 +n12710415 +n11808932 +n10583790 +n02213788 +n11622184 +n12596709 +n02216211 +n07721942 +n07765361 +n01848453 +n11724109 +n02028451 +n02935017 +n12046028 +n10629939 +n00441073 +n07900958 +n12451399 +n02823964 +n04210120 +n01848840 +n10485883 +n07767709 +n02432704 +n11622591 +n03210372 +n07848196 +n11992806 +n02953197 +n07620689 +n01521756 +n03571625 +n03158186 +n12647560 +n02065407 +n01572782 +n09890749 +n05581932 +n07754451 +n03350204 +n13044375 +n12294723 +n12482893 +n04434531 +n12989938 +n12196336 +n01701859 +n07746334 +n11941924 +n02047411 +n12650379 +n10486166 +n01599556 +n01567879 +n12675876 +n01682435 +n02043808 +n12362668 +n12306089 +n02999138 +n01679626 +n03557270 +n01546039 +n11901759 +n01549053 +n11883328 +n06596727 +n03193107 +n11612018 +n03300443 +n03612010 +n03668488 +n12648888 +n01448291 +n11632167 +n10262445 +n09742101 +n09717233 +n04299370 +n03094159 +n04536595 +n03514693 +n02029706 +n02886321 +n07816052 +n04045255 +n01851731 +n02627292 +n01841288 +n02739889 +n02932693 +n03784896 +n04569063 +n07902799 +n03863108 +n02607470 +n13200651 +n07916183 +n01573898 +n04347119 +n10076604 +n13033577 +n01824035 +n03630262 +n04426316 +n03064250 +n12262018 +n12048399 +n12279772 +n04143140 +n07829331 +n12891643 +n01826680 +n12646605 +n13103877 +n02023855 +n03086868 +n04163530 +n03736470 +n04358117 +n13872822 +n03159640 +n01680655 +n11611087 +n03980478 +n02978478 +n01555004 +n12402840 +n07763987 +n04387706 +n04979002 +n03258330 +n09856671 +n11624192 +n01538059 +n02003839 +n12552309 +n10469874 +n01576076 +n03643149 +n04419868 +n04586581 +n00483508 +n03131967 +n01847407 +n07929172 +n09683757 +n03786621 +n04369282 +n12733870 +n11612575 +n11619227 +n03301833 +n02176439 +n01569971 +n07935043 +n02563792 +n02051059 +n04482177 +n11859472 +n11710136 +n04115144 +n07864934 +n07691758 +n02620167 +n07748276 +n03415486 +n07835921 +n00452152 +n01848323 +n12906214 +n12075010 +n01563449 +n01499396 +n01570267 +n12047345 +n07920989 +n07601572 +n02683558 +n04428634 +n04345028 +n12161969 +n03460040 +n02561514 +n02006364 +n03582959 +n11812910 +n13185269 +n04297847 +n07896165 +n01552813 +n12361946 +n02031585 +n12766595 +n11622368 +n11695599 +n11615387 +n02509197 +n12409470 +n01314388 +n11758799 +n09846469 +n02675219 +n04253057 +n04041243 +n12276628 +n04381724 +n01855188 +n02203152 +n04403925 +n11895092 +n11924849 +n04172904 +n11888800 +n01546506 +n07906718 +n01489920 +n03436417 +n03615655 +n07765073 +n02434190 +n02004492 +n12282235 +n12406488 +n11981192 +n10373390 +n13183056 +n04332074 +n12818346 +n07731006 +n02598573 +n02438580 +n01957335 +n03356982 +n10288964 +n02629230 +n02042759 +n12319414 +n01451426 +n03521675 +n02016066 +n01813532 +n13207335 +n11805544 +n04401828 +n02952109 +n03963294 +n10013811 +n12058630 +n01551711 +n01574560 +n01858780 +n10093818 +n03858183 +n01550172 +n03571280 +n02309242 +n10258786 +n01569423 +n10134178 +n08578517 +n04445327 +n03250279 +n02584449 +n03223553 +n04523831 +n04485423 +n02050442 +n04474035 +n04528968 +n02649546 +n01913166 +n09971273 +n04517408 +n02437482 +n03824713 +n03778817 +n07643026 +n01613177 +n12022054 +n07714448 +n07592768 +n00454493 +n03296328 +n02305929 +n03084834 +n03698815 +n12093600 +n08649711 +n03466493 +n04067658 +n03041114 +n03514451 +n01491006 +n04178329 +n03790953 +n03938401 +n02048115 +n07768858 +n03273740 +n10333601 +n05418717 +n12754003 +n02098806 +n03314608 +n01565930 +n12113195 +n12284821 +n12483427 +n04332580 +n10382710 +n03416094 +n02837887 +n03917198 +n14131950 +n04414476 +n11861641 +n11903671 +n01841441 +n09872066 +n01806467 +n04964799 +n00467320 +n01595974 +n03220692 +n01339083 +n01825278 +n11727358 +n04518343 +n11984144 +n07724269 +n02292692 +n02324850 +n01753032 +n01624115 +n11816649 +n07930062 +n02460451 +n12319204 +n04340521 +n12325234 +n01541102 +n02979836 +n00141669 +n01822300 +n11658544 +n12272883 +n03334382 +n11726707 +n03639077 +n07904934 +n03516367 +n03698723 +n03553248 +n11812094 +n03724417 +n01540566 +n02341974 +n11819912 +n07734555 +n02987379 +n03580845 +n12546962 +n02548247 +n12753245 +n07768423 +n12849279 +n11617090 +n02912894 +n07840027 +n12295033 +n12703383 +n02696165 +n10419785 +n04426427 +n03694639 +n11712282 +n04142999 +n01597737 +n03801533 +n01495493 +n07774719 +n03267113 +n01742821 +n03859170 +n03416640 +n03320959 +n12733218 +n02017725 +n13229543 +n09344324 +n04965451 +n01490112 +n10069296 +n12084555 +n04554406 +n04086446 +n02976249 +n02656032 +n02424486 +n02381609 +n09934337 +n04573937 +n07685399 +n02800497 +n02905152 +n02951703 +n07760153 +n03609397 +n00447463 +n03680512 +n02046939 +n03288886 +n11870418 +n03386544 +n07767171 +n07847453 +n12687044 +n01664492 +n03099147 +n03463381 +n02125081 +n12920204 +n03517647 +n02603540 +n12267411 +n11933546 +n11947802 +n04387095 +n12975804 +n02973904 +n13195341 +n04048441 +n11753143 +n03212114 +n03298858 +n04366116 +n01424420 +n10450161 +n01442972 +n07877299 +n04503593 +n04349306 +n12969425 +n12597466 +n03092656 +n07914995 +n03487886 +n12223569 +n01756733 +n13919919 +n04175147 +n02029087 +n03530511 +n02425887 +n03572107 +n03927539 +n03383099 +n04130907 +n01632601 +n07823105 +n10378026 +n02382850 +n07613266 +n03235180 +n02810782 +n12708654 +n11636835 +n02823124 +n03402941 +n12121610 +n03715114 +n04052658 +n00480366 +n12493208 +n04255163 +n12145477 +n01489709 +n12402596 +n01598074 +n03837606 +n02628062 +n04103364 +n03247083 +n02032480 +n07736256 +n12578916 +n09218315 +n02218371 +n03730334 +n02080146 +n03836906 +n02868638 +n02198859 +n12744387 +n02942460 +n11754893 +n12274358 +n02725872 +n09218494 +n03942920 +n07574780 +n02921756 +n01757115 +n02763306 +n11758122 +n10508141 +n02303284 +n04083800 +n13879049 +n12765115 +n12075830 +n02666943 +n11980318 +n07907037 +n12794135 +n02333909 +n03870980 +n07586718 +n11923174 +n10782471 +n01493146 +n12294871 +n11726269 +n12932173 +n07825972 +n12732009 +n03572321 +n07682197 +n03423306 +n12495895 +n03545756 +n03557692 +n03785237 +n07902937 +n09899671 +n12061614 +n07902443 +n01449374 +n12632335 +n03474896 +n03539433 +n04310904 +n03902482 +n12006930 +n03285578 +n04200000 +n03912218 +n07821260 +n03548626 +n03223686 +n11826198 +n03165616 +n02104280 +n09981278 +n09382099 +n03732458 +n03987990 +n09946814 +n12270741 +n07737745 +n04172776 +n10189278 +n03543012 +n12629666 +n02180875 +n04087432 +n12961879 +n03321954 +n12528549 +n02424085 +n09843443 +n03846677 +n12304703 +n09873473 +n03410571 +n03041810 +n02425228 +n01562451 +n03615790 +n10081204 +n03985881 +n07842130 +n02890513 +n03649797 +n02381004 +n12560621 +n12523475 +n07687626 +n11905749 +n11759404 +n12905412 +n03542605 +n03983612 +n12573474 +n11972291 +n03767459 +n02698634 +n12713866 +n13084834 +n02202006 +n13108323 +n02631475 +n10737103 +n03637898 +n03069752 +n12400489 +n09692915 +n10242328 +n02794664 +n12465557 +n12085267 +n03348868 +n12754981 +n02745611 +n10504206 +n12073554 +n02835724 +n04605572 +n02825961 +n03528523 +n12116429 +n02973805 +n12708941 +n01544704 +n04180229 +n09403211 +n08242223 +n02146371 +n12127768 +n09770359 +n03295246 +n01757677 +n04385799 +n02584145 +n07909593 +n12587132 +n13029326 +n04184316 +n07903643 +n01848555 +n10750031 +n02332156 +n12703557 +n03196990 +n12406902 +n02768973 +n12416073 +n02147591 +n09724533 +n09693982 +n12687462 +n01982068 +n03435991 +n03272125 +n07713763 +n03018712 +n03648431 +n03336575 +n07854184 +n12806015 +n07879174 +n03984643 +n03147280 +n02699915 +n07617708 +n01533651 +n12483841 +n01697611 +n02576906 +n03724066 +n03935116 +n09782397 +n01599269 +n10672371 +n12066630 +n03178674 +n15086247 +n03523987 +n02826068 +n12580654 +n02358390 +n01647640 +n10259997 +n03738066 +n13915023 +n02639605 +n03174450 +n12269406 +n09874428 +n03432061 +n04386051 +n03923918 +n04592465 +n12480456 +n10333439 +n04206790 +n01443831 +n02967626 +n07733712 +n03746155 +n12947313 +n11690254 +n12244650 +n12670758 +n08658309 +n12710693 +n11860555 +n03485198 +n03047799 +n04461570 +n07600177 +n02126640 +n12704343 +n02866386 +n03008976 +n04532831 +n03465426 +n12691428 +n01641206 +n04962062 +n03254046 +n04425804 +n02014524 +n03439348 +n02538010 +n11603246 +n12265600 +n12277800 +n04016240 +n12086192 +n09650729 +n01549641 +n03112719 +n04961062 +n02710324 +n12049282 +n12362274 +n11969607 +n12856680 +n02201000 +n07863802 +n03360622 +n07601809 +n04354487 +n12898774 +n12939282 +n03109693 +n12867826 +n12441390 +n12915811 +n12879527 +n04137355 +n04131368 +n03527149 +n10164492 +n09932508 +n12426623 +n12575812 +n02557318 +n10263790 +n04309548 +n00476235 +n04194127 +n11876634 +n10327987 +n03499354 +n02616851 +n04464615 +n03615406 +n02744844 +n11732567 +n10347446 +n09752519 +n04228215 +n10004718 +n07899533 +n12030908 +n15102894 +n12044467 +n11711764 +n02610066 +n03415749 +n04562496 +n02034295 +n02297442 +n03566193 +n12506991 +n07774842 +n12827270 +n14908027 +n12242409 +n04072960 +n02829596 +n12496427 +n02266050 +n13108481 +n12473840 +n08677424 +n12076223 +n15091473 +n02815749 +n04549028 +n12558425 +n12023407 +n04179824 +n02378541 +n03188725 +n12517445 +n07573347 +n02004131 +n11921395 +n12570972 +n10602470 +n12095647 +n03854421 +n02450295 +n02792409 +n03543735 +n12836337 +n12204175 +n12152722 +n07900734 +n12517642 +n02775039 +n12607456 +n03376938 +n12179122 +n09873348 +n01847978 +n07888816 +n10453184 +n09675922 +n01851895 +n12865562 +n01797601 +n03711044 +n02738859 +n12064591 +n04033425 +n08551296 +n01650690 +n01537895 +n04207151 +n10087434 +n12261808 +n09438844 +n10364198 +n01814755 +n01583209 +n12270946 +n11892817 +n03344642 +n04117464 +n07847917 +n04003241 +n10362319 +n10477713 +n03495570 +n07560542 +n04363777 +n04534359 +n02404906 +n03349892 +n07712267 +n02960352 +n07866277 +n07857170 +n00324978 +n02755823 +n03150511 +n04211528 +n01899894 +n07588299 +n11874081 +n03425325 +n04506506 +n11949402 +n02952374 +n03309110 +n12159388 +n07591049 +n03068998 +n03228254 +n10279018 +n04173046 +n07728053 +n13052931 +n01597906 +n12368451 +n02767665 +n09435739 +n03915900 +n09728285 +n03292603 +n03331077 +n07817160 +n07917392 +n12540250 +n04153025 +n10209082 +n03968581 +n12676534 +n11824146 +n03521899 +n01853666 +n04292921 +n12332030 +n03984759 +n02863014 +n07801091 +n07723177 +n03289660 +n01533481 +n04488202 +n03468821 +n02382338 +n03543254 +n01961985 +n07915918 +n03703862 +n02771004 +n02047045 +n03877674 +n13141415 +n03529629 +n02240517 +n03675235 +n04491638 +n12384037 +n04419642 +n03019685 +n07591586 +n04496726 +n12985420 +n12927013 +n12196694 +n03473227 +n11621547 +n02988066 +n10451450 +n07729828 +n09618760 +n12196527 +n01555305 +n12830222 +n11950877 +n13190747 +n12160303 +n12390099 +n02818135 +n03163381 +n04554211 +n03244919 +n07897975 +n03386726 +n04290615 +n02011281 +n12407890 +n04123448 +n07904865 +n03447358 +n02393940 +n07931870 +n02937958 +n04318787 +n04587327 +n12807409 +n04112430 +n07560193 +n12774299 +n02618827 +n07854982 +n03757604 +n03817191 +n12793494 +n02324431 +n03013850 +n04113641 +n01612476 +n03127408 +n02038466 +n03799876 +n04257684 +n03382292 +n10449664 +n04394630 +n10275395 +n07698250 +n12329473 +n07694659 +n07642742 +n02563648 +n08583455 +n02557182 +n02775178 +n09274152 +n03189083 +n12570703 +n04211219 +n12486574 +n03073694 +n11969166 +n02475078 +n02976350 +n08584914 +n07899660 +n10116702 +n01613807 +n12461109 +n04025508 +n12451240 +n12596849 +n12079963 +n03541269 +n04561422 +n11699442 +n07725255 +n03460297 +n07616748 +n12757458 +n03103563 +n02813752 +n07698782 +n12840362 +n01543632 +n01602832 +n01875313 +n12472024 +n02926591 +n02872333 +n10728624 +n12532564 +n03882960 +n12333053 +n03684224 +n13146583 +n03436075 +n04154340 +n03868643 +n02598878 +n04139140 +n03266371 +n04083309 +n12506341 +n12200143 +n03503477 +n12807773 +n03123917 +n13029760 +n10173771 +n03659809 +n12047884 +n12759273 +n04193377 +n04258438 +n04597400 +n04579986 +n03719743 +n04299963 +n02864504 +n10510245 +n03417970 +n09719794 +n03138344 +n02085272 +n07694516 +n12665857 +n01642257 +n03229244 +n10581890 +n10318293 +n03635108 +n10652605 +n12189429 +n09934774 +n11709205 +n04207903 +n10296176 +n10603851 +n03450734 +n13223588 +n12754648 +n09886403 +n07751280 +n11950686 +n07814390 +n12799776 +n01646902 +n09796809 +n12819728 +n01938454 +n02410011 +n07607138 +n02119634 +n10332861 +n09230202 +n02757061 +n02849885 +n15092227 +n12151615 +n03111041 +n02413050 +n03506560 +n07744057 +n04030518 +n12544539 +n04089836 +n02038993 +n13882201 +n12099342 +n01946630 +n10095769 +n02982416 +n12957924 +n13215586 +n07726525 +n12452836 +n03801671 +n04598318 +n01449712 +n12428747 +n04119751 +n10509063 +n07694839 +n02782602 +n11626409 +n02573704 +n12399384 +n12388989 +n01601068 +n11971406 +n04367011 +n07930315 +n12925179 +n04967674 +n03497352 +n03653833 +n01819465 +n03688192 +n02802990 +n03393761 +n04430475 +n13107694 +n10384496 +n07867164 +n12449526 +n01515303 +n12574320 +n01444339 +n07919310 +n03453443 +n04173907 +n02887489 +n07772788 +n03629520 +n02580830 +n11705387 +n12069679 +n01956344 +n02406533 +n03973402 +n03938037 +n04969952 +n04103094 +n04393808 +n07715407 +n04172107 +n01917882 +n12085664 +n07608429 +n09835230 +n04135024 +n07842605 +n12568186 +n04339879 +n07691091 +n01801876 +n00474568 +n01807105 +n12128071 +n01673282 +n11948864 +n03991837 +n09659188 +n02070174 +n02670683 +n12454949 +n10385566 +n11631854 +n12305293 +n12002428 +n12948495 +n12757816 +n11852028 +n10690648 +n09283866 +n03214582 +n03423877 +n04127521 +n03006626 +n09283193 +n07712559 +n01447331 +n02981321 +n02658531 +n11947629 +n02419634 +n02420828 +n11923637 +n12570394 +n11968931 +n12731029 +n09749386 +n07736813 +n03967396 +n11908846 +n03029445 +n02426481 +n01964271 +n13198914 +n04484432 +n12656685 +n10806113 +n11849983 +n03236423 +n10649197 +n07688624 +n03057541 +n12015221 +n02094931 +n02014237 +n07560331 +n02801450 +n04206570 +n07556406 +n11627908 +n11889619 +n07852229 +n04063154 +n02713364 +n02783459 +n12877838 +n02930214 +n02125010 +n02407276 +n07815424 +n12855494 +n12530818 +n07750449 +n01963317 +n10082997 +n03245724 +n03012013 +n03555006 +n02421136 +n03332989 +n04375405 +n03746486 +n12636224 +n03278914 +n07917133 +n12504783 +n09416890 +n03896526 +n02258198 +n12983048 +n03837698 +n12869061 +n04541987 +n01637615 +n04401949 +n02241426 +n13220122 +n07876651 +n03729308 +n02364840 +n01339801 +n03418915 +n09257843 +n11614039 +n09731343 +n03809603 +n05399243 +n01569262 +n11901597 +n03124474 +n01566207 +n03796522 +n12595699 +n04573281 +n09689435 +n11859737 +n03201529 +n12902662 +n03374372 +n03760944 +n09189157 +n01517966 +n10431625 +n02898269 +n03693707 +n04369025 +n07834618 +n04095342 +n02786331 +n03822504 +n02284611 +n09862621 +n03436891 +n07688898 +n12435777 +n03949317 +n12443323 +n12273114 +n12623077 +n04333869 +n07907831 +n07774596 +n05450617 +n03320262 +n04190376 +n12671651 +n11819509 +n07588111 +n09756049 +n07611046 +n04973291 +n11602873 +n00120010 +n03500699 +n03844815 +n03708843 +n04452528 +n04387261 +n09889065 +n10147121 +n03318294 +n12599435 +n04164406 +n01965529 +n11636204 +n11791569 +n12275131 +n02977330 +n07851443 +n04132603 +n07824191 +n09760609 +n12190410 +n07915491 +n12665271 +n10120671 +n02570164 +n10208950 +n02163297 +n02244797 +n09842528 +n08645104 +n01841679 +n11603835 +n04488857 +n07814487 +n01953762 +n04612373 +n11877193 +n03198500 +n03981924 +n01943087 +n11552806 +n04414909 +n03005033 +n02457945 +n10500217 +n10375314 +n04607242 +n07914777 +n09832456 +n12915568 +n12813189 +n10578021 +n03519081 +n07801779 +n12026476 +n03296081 +n03850492 +n07902121 +n09881265 +n12562785 +n03290195 +n10131151 +n10078719 +n01558765 +n03917814 +n02045596 +n07734183 +n03414676 +n07933154 +n02126787 +n12148757 +n12263987 +n07684164 +n03406966 +n01492569 +n02988963 +n12963628 +n09964202 +n03417749 +n01854838 +n02921029 +n02183096 +n11762433 +n11722466 +n02387093 +n02768655 +n12519089 +n09871229 +n07938313 +n10502329 +n11989393 +n03768916 +n13145040 +n11813077 +n04457910 +n03655720 +n03703945 +n11876803 +n01438581 +n07910379 +n07847827 +n02300797 +n09245515 +n10754189 +n04581102 +n12513172 +n02458135 +n03762332 +n11789589 +n09695620 +n03850053 +n07911249 +n12342852 +n12753007 +n07748574 +n07727458 +n03696568 +n04304680 +n07723039 +n07775197 +n07577144 +n03043693 +n04374735 +n01858281 +n09228055 +n09466678 +n01949085 +n02024479 +n11623815 +n02704645 +n07894451 +n01751472 +n01646388 +n01317916 +n13880994 +n10300500 +n11794024 +n03735963 +n04610274 +n11854479 +n07754894 +n02639087 +n02122510 +n02262803 +n12732966 +n04529108 +n13194036 +n09990777 +n10009276 +n12088223 +n12155009 +n07886176 +n04278247 +n04222723 +n11707229 +n01999186 +n07851641 +n12741792 +n01315213 +n10033412 +n04249582 +n03586631 +n03237839 +n12037499 +n12014085 +n07756325 +n01636352 +n03905947 +n08611339 +n07693590 +n03724538 +n09791816 +n01666585 +n10588965 +n11613219 +n10542608 +n12913791 +n10528023 +n03171635 +n11923397 +n12854600 +n10410246 +n12698598 +n04135118 +n09844457 +n04441790 +n03882611 +n02337001 +n07907342 +n12561169 +n12027658 +n10719132 +n09851165 +n02801823 +n12330587 +n01683558 +n12162181 +n04387932 +n11704620 +n09679170 +n07601290 +n04028221 +n10277027 +n09877750 +n11758483 +n10027246 +n03819336 +n10205231 +n12478768 +n03451711 +n12973443 +n01923025 +n03262717 +n07807594 +n00475535 +n07744430 +n02341475 +n04614655 +n07924747 +n03388323 +n12680402 +n03202940 +n04534520 +n09661873 +n15092059 +n11832480 +n04198355 +n12529220 +n12389130 +n12304115 +n03234952 +n07610620 +n02868975 +n04442741 +n05282652 +n02820675 +n12795352 +n12675299 +n08547468 +n04189651 +n04141198 +n04513998 +n12273939 +n12482668 +n12858618 +n01958346 +n03172038 +n10280674 +n04301760 +n02631330 +n12433178 +n07763107 +n03068181 +n07565259 +n03605598 +n13177884 +n04005197 +n09751496 +n12737383 +n07648997 +n09839702 +n09442595 +n07925229 +n12150722 +n11898775 +n09904208 +n02207345 +n07642361 +n07685918 +n03205458 +n10574538 +n09742315 +n02599557 +n03585682 +n04273659 +n02200850 +n03410740 +n03391301 +n07726672 +n09782167 +n13155305 +n02067240 +n07561848 +n07728708 +n12463134 +n12228229 +n09743487 +n12225563 +n03421669 +n03226375 +n03973945 +n12498055 +n04483925 +n04564278 +n11890150 +n12519563 +n12754468 +n04353573 +n11615607 +n04430896 +n04585128 +n10395828 +n10773665 +n02772435 +n09881895 +n12663023 +n01615303 +n12803754 +n09445008 +n03955296 +n05245192 +n05486510 +n07899769 +n07575510 +n02307681 +n03814817 +n02670186 +n03598515 +n12797860 +n03518135 +n07587962 +n12630763 +n06273743 +n09843824 +n03226254 +n12407222 +n02961544 +n12951835 +n06417096 +n02016659 +n01441117 +n07735404 +n09411189 +n13896217 +n03262248 +n03451120 +n02525382 +n03375329 +n04155068 +n12916179 +n10297234 +n11907100 +n03423568 +n04360914 +n12027222 +n12199790 +n01744270 +n09896401 +n07925116 +n03693860 +n04414319 +n07767549 +n03555564 +n04043411 +n07872593 +n03774461 +n03129471 +n04497801 +n11756870 +n09776346 +n04530283 +n01520576 +n12828220 +n01583828 +n04120842 +n09676021 +n04344734 +n01916388 +n12513613 +n09861863 +n02310334 +n03318983 +n04533499 +n02427576 +n12727518 +n04502059 +n11725480 +n11987126 +n11876204 +n03504205 +n09720595 +n12315999 +n12935609 +n04452757 +n12201331 +n01603152 +n10772092 +n03156279 +n12723610 +n02003037 +n03244775 +n07802963 +n11954642 +n07770034 +n09931165 +n10559508 +n01745902 +n07654148 +n10070108 +n01585287 +n13196003 +n04389718 +n10253122 +n03730893 +n02983357 +n02783900 +n01680813 +n03072440 +n03109253 +n03274435 +n11655974 +n10048612 +n07849733 +n07896994 +n03792334 +n03035832 +n03819448 +n03105088 +n11943992 +n01485479 +n01699675 +n11795049 +n12086778 +n01840120 +n07753980 +n10685398 +n04346428 +n04532398 +n07709172 +n02146700 +n09461069 +n03853924 +n01321456 +n12068432 +n09757449 +n03206282 +n03751757 +n13053608 +n11695974 +n12123741 +n03500209 +n04367371 +n02890940 +n01917611 +n07835331 +n02907656 +n04136045 +n12059625 +n03862862 +n12864160 +n00440039 +n03448590 +n12628986 +n04115802 +n03949145 +n12916511 +n12647893 +n09706255 +n13181811 +n07752109 +n04375615 +n01648620 +n04403524 +n09967967 +n12911079 +n03857687 +n02803539 +n01551080 +n10734891 +n13235159 +n04127633 +n07935878 +n12853482 +n10191001 +n03126385 +n10076224 +n01812866 +n12919403 +n03769610 +n09283767 +n03462110 +n11770256 +n12038898 +n09889170 +n11894558 +n10298647 +n02592055 +n02795670 +n11701066 +n12762049 +n02890662 +n07918193 +n02976455 +n03100897 +n13127843 +n12184912 +n00468299 +n12407079 +n12496949 +n03541537 +n05260127 +n01535140 +n01541760 +n11945783 +n07687053 +n07745046 +n12083847 +n02382132 +n12270027 +n10140597 +n03788914 +n01790711 +n02197689 +n03173270 +n10368624 +n04449290 +n01579729 +n07834872 +n07734417 +n02379630 +n01636829 +n12549192 +n12951146 +n13579829 +n03268142 +n11761202 +n02769669 +n09452760 +n04095577 +n12031139 +n02003577 +n12891469 +n03931885 +n01577941 +n04176295 +n12046428 +n03418402 +n13145250 +n11865874 +n12473608 +n11797321 +n01798168 +n09923186 +n02786736 +n01698782 +n09976283 +n03975788 +n14685296 +n01682172 +n07838441 +n02771286 +n03429137 +n03948950 +n02512830 +n02298218 +n10141364 +n02823848 +n02077384 +n12584715 +n11748811 +n02214773 +n03667552 +n04121426 +n04135710 +n07579917 +n12275888 +n07826453 +n12167436 +n04586072 +n09877288 +n04248396 +n02761696 +n03038870 +n01490360 +n12353203 +n09785891 +n12057660 +n04146343 +n12557556 +n02081798 +n02917964 +n07898617 +n12597798 +n07574176 +n07764630 +n03008177 +n04255899 +n04434207 +n07897600 +n09929577 +n11811921 +n12415595 +n02893941 +n12276110 +n02821202 +n09690621 +n02508742 +n02077787 +n02390640 +n03764822 +n02257985 +n13033134 +n04559166 +n07865196 +n10506915 +n12051103 +n10473917 +n12775919 +n02971579 +n12880462 +n11837970 +n02063662 +n09840520 +n12019827 +n09208496 +n12836508 +n02982232 +n04219185 +n03332005 +n07914128 +n07862461 +n04250692 +n09267854 +n04561734 +n02076402 +n12344837 +n02919148 +n06592281 +n03668803 +n03062985 +n04246731 +n12112609 +n04012482 +n03558633 +n03982642 +n01998741 +n07665438 +n04209509 +n07913882 +n01749244 +n07801342 +n02611561 +n04488742 +n01897536 +n10624437 +n13128976 +n07931612 +n04300643 +n03727067 +n03360431 +n07593471 +n10253296 +n03297226 +n03854506 +n07879450 +n10562283 +n12557438 +n13154388 +n12862512 +n02126028 +n07752514 +n02387887 +n12066261 +n07666176 +n02806530 +n09988703 +n03721252 +n03221540 +n12195533 +n02682569 +n03622058 +n03943266 +n04207596 +n11721337 +n02427032 +n07910152 +n01551300 +n12861345 +n11660300 +n03786313 +n12966945 +n02046171 +n02797535 +n03546112 +n07711232 +n02044908 +n02998563 +n02652132 +n12634986 +n12187247 +n11645590 +n07582892 +n03065243 +n09911226 +n04396902 +n10763075 +n02359047 +n10400108 +n04294614 +n03991646 +n11728945 +n07766891 +n12277150 +n13141564 +n10563314 +n12426749 +n07827750 +n12403994 +n12627119 +n03420801 +n10203949 +n12830568 +n12280060 +n13180875 +n12659064 +n04239436 +n03823312 +n04367746 +n12448700 +n01896844 +n07581931 +n09384106 +n11625223 +n04198722 +n01477875 +n09932336 +n03477512 +n12281974 +n10117739 +n07759194 +n12281788 +n01405007 +n03077616 +n02304036 +n12947544 +n03140126 +n12356960 +n07807002 +n07877849 +n02956795 +n04373795 +n07925500 +n10359546 +n09730077 +n01694955 +n10611267 +n04316498 +n07849912 +n12841354 +n07903543 +n10026976 +n04050313 +n03939844 +n03260849 +n07917507 +n12228387 +n03199775 +n01569566 +n02403920 +n04261638 +n02986160 +n03724623 +n01960177 +n03783430 +n07877675 +n10401639 +n04215153 +n03077741 +n02589623 +n12934985 +n03233624 +n04506688 +n12194147 +n09975425 +n07818825 +n12641007 +n10036692 +n02771750 +n12285900 +n01472303 +n10033663 +n10707134 +n03219966 +n11772879 +n10146416 +n10435169 +n10304086 +n12385566 +n03126580 +n12904314 +n03619196 +n02299846 +n03574243 +n12368257 +n03690473 +n01748686 +n09834378 +n07750736 +n02930645 +n01679307 +n03721047 +n02710044 +n07563207 +n02930080 +n09309168 +n03127203 +n02863536 +n02536165 +n01559639 +n09654518 +n02961035 +n12007406 +n12773651 +n04351699 +n03114504 +n06273414 +n02017475 +n01733466 +n02175014 +n07920663 +n03953901 +n09670521 +n09400987 +n11791341 +n02284884 +n12919646 +n07880325 +n03801353 +n01982347 +n07828642 +n01570421 +n03998333 +n03449309 +n10482220 +n12850906 +n12805561 +n12926689 +n03232543 +n04248851 +n03195959 +n04082562 +n03846100 +n07682952 +n07695652 +n11809271 +n09895561 +n04287898 +n09740724 +n02859955 +n09830400 +n03674731 +n02825153 +n04571686 +n13107891 +n10318607 +n07848093 +n13226871 +n08555710 +n03137473 +n02776978 +n03141455 +n12514138 +n01809371 +n09405078 +n01753180 +n02184473 +n11610215 +n03539546 +n12731835 +n04485884 +n03590588 +n10221312 +n04049753 +n03441345 +n02302244 +n12262185 +n15092650 +n11877646 +n10377185 +n01684578 +n03796605 +n07897116 +n03164344 +n12135049 +n10757050 +n01692523 +n04566756 +n07697699 +n07575392 +n10262655 +n04064747 +n07914006 +n12433769 +n07873348 +n04457767 +n10019072 +n02921195 +n03856465 +n04041408 +n12639584 +n12920955 +n11781176 +n07864756 +n03941013 +n03646148 +n04401578 +n11692792 +n02757714 +n02286089 +n04253168 +n03890514 +n07855510 +n03507458 +n04123026 +n11661909 +n12435152 +n04330746 +n09481120 +n03731019 +n03717285 +n03271030 +n02772101 +n07740597 +n02847852 +n12825497 +n12263738 +n03342262 +n03603594 +n07804543 +n12932365 +n12695975 +n10297531 +n04054670 +n03175081 +n12703856 +n03832144 +n03966206 +n02414290 +n03619275 +n09738121 +n03290096 +n10585077 +n07731767 +n12409840 +n12026981 +n02278980 +n02752810 +n01654637 +n02654112 +n10314836 +n13023134 +n01823414 +n07461050 +n11902982 +n04543636 +n02204907 +n04049585 +n12304899 +n03073545 +n04272928 +n10315456 +n03975657 +n09899782 +n12288005 +n07005523 +n03795269 +n09823832 +n02242137 +n02907391 +n03643491 +n03245889 +n12285369 +n03061345 +n03797264 +n07838073 +n09219233 +n02859343 +n07608098 +n03920641 +n12578626 +n10688356 +n04542858 +n07834065 +n00443803 +n04181561 +n04570214 +n02047517 +n03295012 +n01633781 +n10610850 +n04035836 +n03001115 +n04593376 +n02393807 +n13061348 +n10123122 +n11800236 +n13207094 +n10140929 +n12167602 +n01809752 +n10421956 +n02764935 +n03424489 +n12889219 +n04046091 +n07714287 +n07708685 +n07736087 +n04142434 +n11961446 +n04521863 +n02414763 +n02901377 +n00467536 +n13085747 +n03855756 +n11846765 +n02530999 +n03063199 +n04258618 +n12204032 +n04424692 +n11758276 +n02653497 +n03766508 +n02026629 +n02572484 +n12339831 +n01635027 +n01668436 +n07821919 +n01543175 +n02689748 +n12528974 +n04024862 +n04184880 +n11720891 +n13869896 +n01678043 +n01647303 +n11532682 +n03236217 +n04963307 +n03012897 +n11682659 +n03191286 +n07643891 +n12737898 +n10680609 +n07924955 +n03879705 +n10461060 +n02523427 +n02013567 +n09893344 +n04124488 +n09863031 +n12454436 +n12305089 +n07709046 +n03805180 +n11940599 +n01691217 +n04198562 +n03978421 +n02357585 +n07818572 +n12870682 +n03798442 +n04154938 +n10550369 +n11957678 +n01958531 +n09936825 +n02334201 +n07910538 +n11978551 +n10562135 +n12700088 +n12784889 +n04480853 +n03281673 +n07588419 +n02968333 +n11935469 +n13046669 +n11730602 +n09643799 +n11849467 +n01758757 +n09638454 +n03267468 +n07914586 +n12104734 +n02961225 +n09827246 +n09917214 +n13079073 +n12634734 +n04089376 +n13034062 +n11714382 +n12753762 +n07683039 +n11840067 +n07689842 +n12173069 +n12172481 +n04182152 +n07869522 +n10356877 +n02771166 +n03154895 +n07615289 +n12986227 +n12361135 +n03456447 +n12706410 +n12895811 +n02988156 +n03130761 +n10639359 +n03628215 +n02738741 +n01643507 +n07730708 +n03232309 +n02846733 +n04969540 +n03051041 +n12890928 +n03235327 +n04289576 +n07588817 +n10325774 +n03973285 +n09703485 +n02358584 +n03061674 +n03195332 +n02901259 +n07849619 +n04486934 +n07908812 +n01588725 +n03682877 +n11949015 +n04146504 +n04146862 +n07898247 +n03318865 +n04367950 +n07880213 +n04247011 +n01447658 +n12711817 +n03146687 +n02926426 +n12856091 +n11966896 +n02413593 +n09764900 +n03009794 +n03314227 +n10499232 +n10075693 +n04451318 +n12320806 +n11933728 +n07764315 +n12133682 +n09904837 +n12832538 +n03816530 +n07802863 +n04391445 +n09728137 +n03887330 +n04436012 +n03957991 +n07771731 +n06266973 +n10407310 +n10290919 +n07862244 +n01842504 +n10262561 +n12726159 +n07691954 +n07618119 +n03437829 +n11966617 +n03629100 +n04231905 +n04208760 +n03344305 +n03684143 +n12934174 +n08645212 +n03556679 +n12109365 +n03751458 +n02380875 +n02025389 +n02770721 +n09830629 +n02800675 +n04951186 +n04483073 +n12710577 +n12789054 +n12058192 +n11777080 +n07716203 +n09618880 +n04525821 +n04016846 +n02918330 +n10375052 +n13158512 +n13090871 +n02929582 +n02308735 +n10487182 +n02213663 +n07608339 +n04384593 +n12890490 +n03992436 +n02994573 +n13231078 +n12880244 +n01651059 +n02925009 +n09686401 +n13219976 +n09981540 +n04582771 +n06267758 +n09893502 +n13214340 +n03272940 +n12554911 +n02214341 +n04137089 +n03874487 +n04573513 +n12003167 +n12004547 +n13065089 +n01903346 +n04373428 +n02216365 +n02024185 +n12577895 +n11698042 +n07586318 +n11705776 +n03030353 +n04486213 +n07885705 +n07928163 +n02356612 +n02767038 +n02897097 +n11662371 +n04128710 +n09842395 +n07683360 +n11533212 +n08495908 +n12841193 +n03669886 +n07768068 +n02381831 +n12081215 +n02757337 +n02811618 +n10144338 +n01379389 +n09698644 +n12779851 +n10400618 +n11801891 +n12322099 +n12408077 +n02767956 +n08640962 +n07816839 +n03021228 +n10346015 +n07868830 +n07917272 +n10076957 +n12865708 +n04290259 +n03595264 +n03986224 +n07825194 +n01610100 +n04417086 +n12995601 +n12734070 +n15091129 +n12428412 +n07587331 +n02405101 +n03108455 +n03594523 +n04489695 +n03892425 +n13032618 +n04409011 +n07590752 +n15092942 +n03914583 +n13066448 +n03532919 +n10639637 +n04566561 +n13223843 +n07904637 +n12347158 +n02720048 +n03901229 +n03936466 +n10574840 +n03782794 +n12397431 +n07908567 +n12580896 +n02697221 +n09791419 +n02577403 +n07870069 +n02136103 +n04318892 +n01462544 +n09747191 +n12287836 +n03067093 +n03934565 +n03543945 +n13126856 +n02240068 +n01585422 +n12413301 +n03246454 +n01876034 +n03635330 +n11680596 +n03228365 +n03082656 +n11609862 +n12859986 +n03934229 +n10233248 +n03166514 +n12166793 +n10115430 +n03327553 +n03373611 +n02967782 +n12338258 +n01604968 +n01323155 +n02590094 +n03044934 +n07866409 +n12291143 +n14900342 +n12094612 +n07845702 +n07926250 +n10750640 +n04359500 +n09797873 +n09953350 +n03561047 +n12122725 +n12725738 +n01453087 +n04977412 +n04575723 +n13219833 +n12161056 +n04273285 +n12482437 +n12863624 +n04953296 +n03390075 +n10188957 +n02874442 +n04236935 +n09990690 +n12866459 +n04075715 +n09725000 +n12794367 +n12461673 +n03050453 +n03677115 +n12427391 +n07736371 +n02973236 +n02406749 +n12322699 +n12815198 +n10680796 +n03268311 +n02405799 +n12302248 +n09791014 +n01545574 +n07740033 +n07862095 +n09901337 +n04390577 +n03597916 +n12110085 +n11802586 +n04205505 +n07696527 +n12076852 +n04344003 +n03326660 +n02823586 +n03042139 +n01565345 +n07905296 +n01454545 +n07650903 +n07905386 +n12530629 +n02841187 +n02943964 +n03329536 +n09681234 +n03479121 +n03770085 +n04147793 +n11552133 +n03774327 +n13197507 +n07901355 +n10400437 +n07837912 +n02310941 +n07845087 +n02239774 +n04976319 +n03960490 +n05239437 +n06275471 +n01633406 +n04257223 +n12009420 +n10483138 +n02775897 +n07866151 +n07922512 +n02666624 +n03944024 +n03842377 +n01832493 +n07855907 +n03968728 +n04492060 +n07879072 +n11635830 +n11802800 +n02357911 +n02431628 +n03730494 +n13099999 +n07768230 +n13147270 +n12331655 +n10237676 +n11855553 +n09759501 +n10620586 +n13181055 +n12309277 +n13183489 +n04382695 +n07679034 +n10495756 +n02173113 +n12764202 +n03683457 +n10298912 +n07680313 +n10160280 +n02205673 +n12053690 +n11653904 +n02931294 +n04093775 +n12856479 +n02427470 +n07608866 +n09954639 +n11639445 +n03364599 +n09924106 +n09683924 +n10419472 +n03089753 +n12620969 +n07604956 +n12940609 +n12564083 +n03514894 +n10343355 +n13068255 +n03805280 +n12793284 +n03140652 +n02666501 +n11717577 +n04267435 +n04593185 +n12820853 +n03934311 +n02630615 +n07767002 +n07723968 +n01631354 +n07931452 +n12414818 +n03097673 +n09944430 +n04457474 +n11850521 +n12227658 +n10131815 +n12408717 +n03566730 +n12777680 +n06273555 +n04357531 +n03759243 +n09861599 +n03015851 +n04175039 +n03392741 +n07859796 +n07741138 +n04474187 +n02266864 +n04553561 +n02667244 +n12720200 +n12432356 +n07806120 +n10362557 +n11929743 +n07765862 +n02963987 +n02762371 +n02747672 +n04289195 +n04056413 +n03039493 +n03894677 +n12338655 +n04422409 +n12079120 +n10252222 +n10168837 +n12919847 +n10297841 +n01340014 +n11710827 +n10167838 +n12278107 +n01384164 +n10498986 +n02742468 +n02899439 +n11752937 +n12107710 +n12315598 +n03985441 +n07605804 +n07686202 +n12884100 +n13121349 +n11725311 +n10420507 +n11706761 +n01381044 +n03331599 +n12336333 +n10185483 +n07880880 +n01782516 +n12615232 +n03175457 +n12657082 +n01750437 +n07918879 +n13213066 +n12927494 +n02910542 +n06273986 +n02161338 +n10235024 +n12180168 +n03659950 +n02160947 +n11861853 +n09866817 +n09279986 +n12393269 +n01552034 +n05526957 +n02956883 +n12818966 +n09753792 +n03114236 +n12273344 +n12546617 +n13177048 +n02129991 +n01731941 +n01628770 +n12774641 +n07685546 +n03253279 +n10678937 +n12579038 +n08673039 +n01392275 +n02379081 +n10530150 +n12851469 +n12414449 +n11694664 +n11877283 +n09708889 +n03585438 +n00483605 +n12332555 +n03323096 +n07851767 +n02417663 +n10667863 +n02856237 +n09269341 +n01596608 +n09720033 +n13160604 +n04443164 +n02814428 +n11622771 +n10328123 +n04338963 +n01794651 +n12069217 +n07762740 +n02935387 +n11897116 +n10569179 +n12749852 +n10745006 +n07823280 +n12162425 +n09801533 +n03772269 +n04518643 +n07916319 +n12771597 +n02147173 +n10342992 +n03795123 +n11646344 +n12847927 +n07686021 +n12383894 +n04465050 +n14564779 +n04212467 +n12274863 +n02380052 +n04329958 +n12034384 +n04213353 +n04366033 +n04955160 +n02778294 +n12890685 +n03028785 +n03097535 +n04533594 +n01750167 +n01415626 +n12276477 +n07729926 +n07711371 +n12843970 +n10500419 +n12891093 +n03840823 +n12509665 +n11878101 +n04315342 +n07685031 +n12305819 +n10039271 +n12264512 +n03911866 +n13919547 +n12413419 +n03785721 +n02599347 +n03786194 +n04018155 +n12856287 +n09607903 +n02396088 +n10212501 +n10313000 +n07683617 +n03586219 +n03890233 +n03156767 +n12033709 +n01648139 +n04399846 +n10671736 +n07698672 +n10791115 +n07708124 +n02709908 +n04266968 +n01758141 +n10058962 +n09444783 +n03668067 +n02838345 +n02388143 +n12893993 +n12590499 +n01462042 +n02689434 +n13209808 +n04075291 +n02412629 +n01953594 +n03906463 +n03043423 +n02200509 +n10152763 +n12504570 +n04396808 +n03382413 +n03618101 +n02767147 +n02390101 +n03450974 +n12778398 +n03625539 +n02574271 +n04113316 +n07572616 +n11809437 +n04119230 +n03829954 +n10500603 +n04258732 +n02731900 +n10174330 +n01574801 +n08663703 +n12558230 +n03981760 +n07732904 +n11875523 +n11823436 +n03238286 +n03079494 +n04281260 +n07873057 +n11686912 +n10568608 +n07593004 +n04271531 +n10037922 +n07838551 +n03615300 +n12624568 +n12940226 +n05242928 +n03680734 +n01589893 +n11652376 +n11893640 +n04119091 +n09696763 +n07851554 +n02660640 +n12124818 +n10370955 +n02663211 +n02414209 +n13187367 +n03258577 +n04375241 +n07617932 +n12240477 +n03417202 +n07595649 +n03839424 +n03087245 +n02431441 +n04396335 +n03484809 +n03426285 +n03592931 +n02912319 +n03488887 +n12187891 +n07592400 +n12918609 +n07858114 +n07567980 +n01548694 +n02726210 +n02406859 +n10147262 +n05458576 +n02848921 +n03503233 +n02587618 +n03465151 +n03582508 +n11654293 +n03695452 +n02197185 +n04223170 +n10243273 +n03149135 +n02842809 +n03669534 +n03857291 +n02147328 +n12278865 +n12733428 +n03264906 +n09924195 +n10432189 +n12203896 +n03892728 +n12360958 +n10418735 +n01650901 +n12420722 +n03341606 +n02557909 +n07751858 +n03483971 +n12019035 +n03991202 +n02072040 +n03129848 +n04505345 +n02405440 +n03901974 +n11656123 +n11552976 +n10291822 +n10108018 +n09902731 +n03325691 +n12646072 +n04134170 +n12097396 +n07564008 +n01624305 +n03421117 +n02776007 +n10792856 +n07818133 +n03227184 +n10198437 +n04157099 +n12743009 +n07820960 +n12749456 +n13035925 +n05262698 +n03422771 +n02878628 +n12140903 +n07820297 +n03524745 +n09901921 +n03170872 +n10039946 +n12638964 +n11989087 +n03461988 +n04287451 +n04298053 +n07882420 +n04002262 +n02734835 +n11707827 +n07756641 +n12808007 +n10069981 +n12637123 +n12947895 +n04363082 +n04292080 +n11858077 +n04535252 +n12646397 +n12283147 +n12321077 +n02746595 +n02895328 +n07624924 +n12537253 +n11952541 +n02181477 +n01440160 +n03878828 +n12861541 +n02869563 +n04242084 +n03197201 +n09396608 +n04291992 +n07845863 +n04314522 +n12843557 +n04029647 +n12146654 +n13147386 +n12954799 +n11920133 +n03038480 +n03213715 +n02971473 +n04149374 +n04230387 +n00444340 +n11859275 +n07564796 +n02948403 +n10186068 +n04315713 +n02366002 +n02670935 +n13208302 +n10225931 +n07826340 +n04102872 +n02259708 +n11855842 +n09941089 +n08896327 +n10237464 +n12084158 +n03764995 +n03627954 +n12384375 +n10341343 +n07876189 +n04573379 +n07904293 +n07840520 +n12038038 +n03005147 +n10483799 +n02978367 +n01484285 +n13094273 +n04539053 +n01748389 +n10146816 +n07815839 +n12991837 +n03294604 +n03588841 +n04055180 +n03209477 +n09917345 +n04393913 +n12337391 +n12126084 +n01882125 +n07688130 +n02814116 +n09640715 +n12679593 +n12596345 +n03029925 +n11761650 +n04457157 +n12683096 +n07709881 +n03841290 +n13157684 +n07927836 +n03523134 +n03690279 +n10187491 +n12451070 +n02682311 +n03978815 +n11806679 +n07808022 +n01386354 +n03622526 +n02369293 +n11885856 +n02289610 +n12663359 +n02624987 +n13173488 +n03027001 +n07896765 +n11935330 +n07814790 +n04242704 +n09959142 +n07589543 +n03551582 +n07843117 +n03556992 +n02060569 +n04000998 +n03825271 +n11946918 +n02874750 +n03479502 +n09919451 +n02176747 +n02080713 +n03400972 +n10222170 +n07926785 +n07852302 +n03012373 +n10438842 +n12868019 +n03634034 +n04210591 +n07853560 +n12374862 +n09248399 +n04355115 +n12908093 +n12906498 +n12875269 +n02791665 +n03146777 +n02854378 +n12414159 +n07821610 +n07595180 +n12238913 +n12141385 +n10761190 +n12165758 +n01653223 +n12956367 +n03695753 +n12416703 +n12346813 +n03405111 +n04304215 +n01624212 +n12674895 +n09850760 +n12407715 +n04156040 +n11610437 +n03395256 +n09970822 +n04229959 +n02530831 +n07870894 +n12098524 +n12828379 +n04057215 +n10751152 +n10053439 +n03674270 +n07869291 +n12256920 +n02535163 +n04282231 +n02136452 +n02365108 +n10328328 +n02315487 +n03325403 +n09231117 +n03342657 +n09980985 +n10702167 +n11961871 +n02065263 +n12857779 +n03219612 +n07805966 +n10699981 +n07691863 +n12831932 +n04179126 +n10208189 +n09765118 +n07922147 +n01631512 +n01947997 +n01405616 +n01892030 +n07827896 +n12964920 +n07749870 +n03276696 +n10020670 +n11828577 +n07624666 +n10590146 +n02407521 +n10253703 +n03270854 +n11610047 +n12981443 +n12413642 +n12302565 +n03177059 +n04594114 +n10227985 +n07728391 +n10395073 +n02810270 +n03569293 +n07812046 +n03843316 +n12477401 +n03802643 +n07618029 +n10755648 +n12837803 +n12454556 +n01636127 +n02809241 +n03270165 +n12035631 +n02962414 +n09750641 +n01793085 +n04346003 +n07922041 +n04164002 +n12499979 +n03301291 +n07921834 +n09656077 +n07599161 +n13155611 +n10194231 +n10063635 +n03601442 +n10366276 +n00475661 +n03943714 +n10377291 +n02624551 +n02568447 +n07589458 +n09691858 +n02685995 +n11919975 +n01690466 +n13211020 +n04114069 +n10530383 +n04200908 +n12631932 +n07916437 +n03219859 +n07918309 +n10368291 +n10253479 +n03317889 +n13206178 +n02821415 +n10592811 +n12557064 +n12872458 +n10212231 +n07926346 +n09695514 +n09741816 +n03964611 +n07812913 +n09703708 +n02587479 +n10593521 +n03485309 +n03776877 +n12289433 +n07716504 +n10580030 +n03061893 +n03206158 +n09710041 +n04266849 +n07864065 +n12767648 +n02333190 +n12295429 +n02406432 +n01799679 +n07861983 +n02201626 +n03441582 +n03653975 +n02834506 +n12263204 +n10672662 +n03072682 +n03410423 +n11620389 +n04542095 +n07910970 +n03697913 +n02706806 +n09736798 +n12318965 +n07938594 +n12032429 +n03191776 +n04210288 +n01422335 +n03236093 +n11881189 +n02247216 +n12338146 +n03104512 +n00474881 +n04172230 +n01461315 +n04400109 +n10646140 +n02215621 +n10096126 +n03019806 +n11809754 +n02492948 +n10741367 +n10308504 +n07875560 +n02523110 +n07738224 +n02015797 +n10499631 +n03025165 +n03284308 +n03508881 +n10441037 +n10757492 +n07608721 +n09755241 +n04264361 +n04394421 +n03776997 +n03175843 +n04476526 +n02523877 +n13196369 +n10190122 +n03172738 +n02709763 +n02070624 +n04563560 +n04017807 +n03824589 +n07817758 +n03222722 +n01542433 +n13173259 +n04458201 +n12869668 +n12580786 +n02407763 +n09760913 +n10530571 +n11752798 +n09612700 +n07601175 +n11632376 +n10641223 +n03158668 +n03411208 +n01413457 +n03684740 +n10248008 +n12656528 +n11849271 +n07771891 +n12067433 +n12389727 +n11734698 +n04042204 +n07825399 +n12621945 +n07624757 +n03180732 +n09741331 +n10246317 +n04030414 +n07821107 +n04524716 +n03789603 +n12867449 +n10249869 +n02434415 +n07614103 +n03333349 +n04602840 +n09923996 +n02658811 +n13033879 +n03663433 +n02873623 +n07837545 +n12436907 +n02675077 +n01500854 +n04435552 +n01790304 +n11687789 +n03443543 +n09733459 +n01606177 +n12245885 +n11721642 +n02201497 +n12010815 +n04594742 +n02755984 +n07927716 +n04245218 +n03134118 +n13214485 +n12294542 +n12713521 +n03556173 +n12650038 +n07719058 +n04319774 +n10443830 +n10019187 +n09720702 +n07926442 +n10402709 +n03989777 +n11699751 +n09613118 +n02965122 +n04221076 +n01861330 +n12837052 +n02975589 +n09668437 +n03012499 +n01418498 +n12451566 +n03585778 +n07692517 +n09672590 +n09741999 +n09748648 +n07621264 +n03482001 +n10185148 +n01542168 +n12536291 +n07846557 +n11840476 +n03130866 +n02631775 +n11730015 +n03715275 +n07680168 +n12175370 +n05427346 +n03665232 +n08611421 +n11730458 +n02413484 +n09783884 +n07888378 +n04611351 +n02247655 +n02136794 +n11649359 +n01382033 +n07889193 +n10405540 +n03510384 +n04420720 +n03585875 +n03812789 +n01835769 +n12139921 +n09762011 +n10103228 +n03477410 +n11930788 +n10064831 +n12311045 +n07681805 +n03136504 +n12887713 +n03886940 +n03130233 +n10197392 +n12333961 +n07672914 +n12723062 +n12599661 +n04268799 +n03696909 +n12809868 +n12452256 +n10710778 +n02571652 +n12117326 +n02450677 +n03041265 +n12544240 +n01966377 +n10252354 +n02378625 +n09814488 +n10569011 +n13067330 +n07928998 +n07890970 +n02187279 +n02592371 +n07846802 +n03475961 +n05448704 +n10410996 +n02851795 +n10093167 +n12468719 +n09876701 +n03057724 +n03469031 +n02344270 +n04248209 +n02687682 +n04467899 +n12897788 +n03436656 +n12539832 +n09906704 +n03190458 +n11843441 +n12130549 +n11823756 +n03153246 +n03684489 +n04160036 +n02908951 +n12855365 +n03518230 +n12225222 +n12933274 +n10432957 +n02921406 +n10156831 +n12239647 +n02826812 +n03411927 +n11602091 +n13200986 +n04244847 +n01330126 +n14938389 +n03001540 +n04387531 +n03423099 +n07608533 +n11723986 +n07600394 +n12529500 +n02403820 +n02587300 +n10333317 +n07935288 +n12680652 +n01449980 +n12153914 +n07803310 +n11741797 +n01881857 +n13081999 +n08644045 +n02061217 +n02173784 +n02660519 +n03104019 +n13137951 +n04538403 +n02621258 +n04515729 +n04165945 +n11919761 +n13078021 +n07861247 +n11959259 +n11801665 +n04070545 +n13210597 +n10218043 +n10717337 +n01365885 +n10718952 +n11979187 +n03880032 +n03798610 +n03477303 +n01876667 +n11860208 +n03401721 +n03360133 +n13230843 +n13194758 +n13190060 +n02564935 +n13894154 +n12754311 +n07697408 +n13171210 +n02035402 +n03736147 +n10396337 +n04554998 +n02793930 +n04126852 +n03654826 +n09411295 +n06255613 +n01680983 +n10261862 +n01581874 +n10378780 +n10646641 +n03539103 +n03351151 +n04349913 +n03906106 +n02370525 +n03319576 +n04113968 +n09693244 +n02945964 +n03344509 +n04117216 +n03889626 +n03557840 +n09800469 +n04280487 +n07890890 +n12147835 +n12295237 +n03883664 +n04436992 +n02922877 +n10099002 +n01988203 +n10056719 +n11646517 +n03672521 +n04568713 +n10111358 +n03606347 +n04047733 +n12320627 +n10251612 +n10460033 +n01742447 +n11917835 +n10443032 +n13079567 +n04363671 +n10788852 +n10482587 +n03308614 +n12741586 +n12938667 +n04539407 +n01630148 +n02303777 +n13050940 +n04552551 +n02341288 +n04098169 +n04110439 +n11625391 +n12259316 +n02822762 +n10631131 +n04089152 +n03571439 +n04558199 +n12656909 +n03170292 +n02877642 +n12771890 +n03033267 +n12658603 +n13354021 +n12855886 +n11840246 +n03619050 +n07727252 +n12932706 +n13874073 +n01315805 +n02948942 +n12048928 +n03146449 +n10656969 +n09872557 +n03906590 +n04454792 +n12500309 +n04239333 +n01815036 +n09644657 +n10497645 +n02918455 +n07812662 +n04240434 +n10804636 +n11967878 +n04184095 +n11834272 +n05244755 +n02299039 +n12665659 +n12144987 +n07607492 +n11887750 +n13083461 +n04577139 +n09670909 +n07876893 +n02875948 +n04069582 +n10458111 +n10361194 +n09389867 +n01651778 +n11933387 +n13193143 +n12834190 +n03516266 +n02184589 +n10041373 +n02809605 +n04064213 +n04957589 +n12643113 +n02582721 +n07911061 +n07921360 +n10369417 +n10527147 +n04104925 +n03707372 +n01386182 +n10374849 +n09902851 +n08559155 +n02332447 +n11649150 +n11722036 +n01823740 +n04592356 +n10002257 +n10661732 +n07562379 +n07597263 +n04036776 +n13112201 +n09842288 +n07738105 +n04545984 +n09635973 +n02885233 +n02756854 +n07808479 +n03029296 +n01543383 +n02884450 +n09843716 +n04224395 +n10576676 +n10140051 +n07919894 +n07806879 +n10212780 +n09478210 +n12017127 +n03770224 +n07606191 +n03555217 +n09715165 +n12270460 +n12129738 +n11739365 +n02303585 +n07818029 +n05314075 +n03019304 +n09859975 +n09454744 +n13151082 +n12586989 +n00455076 +n07741357 +n04957356 +n08659242 +n04577293 +n04126244 +n03131193 +n12428242 +n03569494 +n03781594 +n07743384 +n02892392 +n12576695 +n12199982 +n07693439 +n07719756 +n11884384 +n03043798 +n12351091 +n03690168 +n02214499 +n01839949 +n01831360 +n12642964 +n02957862 +n03125588 +n12883628 +n04002371 +n10747965 +n09744462 +n02853745 +n13030337 +n12156679 +n02761034 +n12587487 +n03374570 +n12728322 +n01731764 +n07918706 +n03696445 +n03185868 +n02805283 +n03868763 +n02202124 +n12369665 +n12449934 +n12650229 +n02656301 +n07743723 +n11702713 +n02927053 +n03916385 +n01486010 +n03986071 +n04188064 +n13897528 +n12414329 +n07718068 +n07837755 +n11735570 +n10464542 +n04091466 +n01315581 +n10374943 +n03989898 +n13220525 +n04076052 +n04062179 +n02414442 +n04414101 +n04446162 +n00480885 +n03536568 +n03773835 +n10728998 +n12643877 +n02255391 +n03799610 +n07847585 +n00446411 +n11910666 +n03139998 +n02296276 +n02889996 +n02786611 +n10363445 +n07854348 +n08583682 +n09912681 +n07896422 +n02368821 +n11935953 +n12185254 +n11738547 +n03809211 +n02448318 +n13066979 +n01987076 +n12009047 +n12839574 +n13174823 +n07902520 +n03369866 +n13209129 +n02593191 +n03853291 +n02620578 +n10071332 +n01813658 +n09895480 +n10134760 +n01316734 +n07845166 +n03175983 +n13132156 +n12814960 +n12883265 +n03637787 +n04310507 +n04133114 +n03900194 +n04129688 +n04449550 +n01805321 +n01717467 +n01573627 +n12271451 +n11722621 +n09976917 +n12232280 +n12905135 +n03451253 +n01655344 +n12346986 +n11987511 +n10517283 +n02941845 +n12730370 +n03121190 +n07917874 +n10023656 +n10151133 +n07695187 +n03258456 +n10639238 +n10682713 +n02085019 +n12343753 +n10749928 +n04595611 +n04410565 +n08500819 +n07719980 +n04016479 +n03232417 +n03469832 +n09834885 +n07925327 +n10094782 +n03632100 +n12734215 +n09845849 +n04047139 +n10743124 +n02604954 +n12270278 +n03036244 +n11991777 +n10168012 +n02561803 +n10531109 +n10344319 +n03804211 +n10513938 +n10732967 +n09917481 +n02950482 +n03148808 +n07910245 +n07925423 +n07889990 +n04302988 +n07745357 +n04346511 +n07573563 +n02564403 +n12084400 +n10030277 +n09815455 +n04388473 +n12404729 +n10576316 +n12072210 +n11811059 +n01824344 +n03556811 +n03175301 +n07586485 +n13137010 +n11986729 +n04967561 +n03881404 +n07692114 +n07874995 +n02770585 +n07853345 +n02775689 +n04328580 +n01323781 +n07773428 +n02414043 +n02794474 +n02352932 +n07569873 +n12374705 +n03606106 +n04267246 +n04369485 +n11934239 +n12705698 +n11841247 +n07868045 +n03525693 +n12358293 +n02937010 +n09658398 +n12711182 +n03516647 +n04591631 +n10228712 +n11930353 +n03471779 +n12594324 +n02251593 +n04455579 +n02542017 +n03381450 +n03320845 +n12364940 +n09657748 +n12412987 +n01840412 +n10570704 +n10117267 +n03251280 +n10195261 +n12178129 +n12285049 +n02177775 +n10117415 +n03707766 +n04475309 +n05604434 +n03999064 +n12127575 +n01972131 +n09793946 +n01635176 +n02791532 +n07564101 +n07876460 +n02813981 +n10764719 +n03638743 +n12761702 +n02125689 +n11657585 +n09923003 +n13069773 +n02683183 +n04324515 +n11936946 +n12862828 +n02659808 +n02619861 +n13175682 +n11648039 +n07768139 +n12512674 +n12108613 +n02947977 +n12899971 +n03845107 +n07689490 +n02081927 +n07619508 +n10248377 +n10300041 +n10761326 +n09655213 +n02675522 +n04963111 +n01995686 +n03256631 +n10684630 +n04471912 +n12728864 +n03870546 +n02829246 +n09725546 +n03409920 +n13194918 +n10055297 +n02513248 +n01462803 +n11782266 +n13094145 +n07839478 +n13916363 +n07932454 +n09722817 +n07774479 +n10386874 +n12832822 +n01599388 +n02964295 +n04349189 +n07689313 +n11653126 +n02309841 +n02064000 +n04410663 +n04562122 +n02358712 +n09901786 +n10441124 +n12882158 +n12815668 +n10159289 +n01641930 +n03315990 +n12271187 +n10277638 +n07815163 +n12903014 +n07915366 +n04412300 +n01324799 +n03408264 +n09452291 +n03019198 +n11890884 +n10355806 +n03186199 +n04013600 +n12541157 +n06259898 +n06273294 +n11946051 +n01671705 +n04415257 +n01905321 +n04050600 +n12604460 +n04051439 +n02929184 +n11765568 +n10025060 +n02396796 +n04033287 +n13027557 +n03127531 +n10308066 +n09729062 +n01593553 +n02476567 +n07609728 +n12970293 +n01419888 +n03215749 +n01684741 +n13067672 +n03870290 +n07846359 +n12961536 +n03356559 +n07727140 +n09843602 +n02378755 +n12044041 +n01977485 +n07718920 +n12060546 +n04265428 +n12237855 +n04006067 +n10227266 +n04361937 +n12134486 +n10097842 +n02264591 +n03912821 +n07594155 +n03116163 +n11771924 +n04155457 +n12394118 +n10507380 +n01844746 +n11901452 +n03024233 +n03383562 +n11806814 +n10062716 +n04204755 +n08613733 +n12907671 +n03533654 +n09826605 +n03109033 +n07606419 +n03537085 +n11615812 +n07695504 +n11694300 +n04520962 +n09971839 +n02664285 +n03402511 +n02061560 +n13133140 +n03548195 +n12877493 +n02425086 +n12845187 +n12488454 +n02975994 +n02071028 +n01457407 +n03685486 +n07605282 +n07771405 +n07827554 +n10538733 +n03438780 +n04379096 +n12686496 +n10001764 +n11848867 +n12125001 +n09886540 +n03275566 +n01442710 +n12789554 +n07858197 +n12722071 +n12868880 +n10441694 +n12409651 +n07727741 +n12289585 +n04069166 +n12686877 +n03723439 +n07815956 +n12543455 +n10778044 +n02200630 +n10074841 +n12640284 +n12589841 +n07592317 +n07866571 +n12712626 +n04228422 +n11711289 +n03590475 +n13081229 +n03045800 +n03639230 +n02874214 +n07615954 +n03204134 +n12053962 +n12769219 +n15006012 +n09873769 +n11818636 +n01959029 +n03349599 +n12227909 +n07576969 +n03638180 +n07742224 +n03390673 +n02344175 +n03770520 +n00447361 +n13235319 +n01983674 +n10061882 +n04267165 +n12493868 +n12713358 +n02930339 +n10493419 +n12918810 +n02582220 +n12248359 +n02644501 +n04596492 +n04538249 +n07905618 +n13230190 +n07808268 +n15005577 +n09351905 +n12730544 +n11937023 +n04024137 +n02238358 +n11646955 +n11618079 +n09849990 +n04060448 +n04220805 +n12725940 +n12004120 +n01484562 +n02669442 +n12132956 +n01756916 +n03980986 +n02256172 +n07716750 +n12119390 +n04047834 +n11934041 +n12828977 +n03648219 +n11873612 +n12909614 +n04397860 +n03908111 +n03261395 +n03695616 +n11668117 +n12014355 +n02896074 +n03988758 +n04426184 +n10328696 +n02477028 +n04507326 +n04320871 +n03256472 +n01919385 +n03988926 +n13182164 +n07826250 +n03207548 +n01396617 +n04369618 +n07913774 +n13229951 +n03410022 +n12728508 +n01997119 +n03598783 +n01341090 +n03879456 +n01736796 +n02864122 +n13879816 +n02684962 +n12246037 +n02433729 +n04364397 +n09881358 +n02950120 +n03326371 +n02243878 +n01790812 +n12990597 +n03330947 +n07764486 +n03332173 +n10006177 +n03347472 +n07619301 +n10106509 +n12365285 +n01732989 +n07678586 +n04098795 +n07733847 +n03994297 +n12872914 +n02762909 +n07766530 +n13198482 +n02395855 +n12273515 +n04487894 +n07847047 +n12488709 +n02859557 +n04255768 +n02360933 +n03267696 +n03152951 +n10188715 +n10520544 +n13065514 +n02900594 +n03699754 +n01319187 +n01949499 +n10417424 +n01603000 +n12062105 +n09683180 +n09863339 +n01880716 +n10702615 +n03893935 +n10495555 +n04131499 +n02957252 +n02113892 +n07724078 +n12246941 +n04303095 +n01751215 +n04213530 +n12117695 +n12418507 +n01922948 +n12131405 +n13188767 +n01481498 +n03174079 +n02407172 +n11613867 +n10152616 +n10119609 +n04158250 +n11695085 +n07855105 +n02854630 +n03768683 +n12739966 +n12266984 +n12819141 +n12732605 +n13205249 +n11917407 +n01607429 +n02694279 +n07815294 +n06614901 +n07846471 +n12119717 +n02595339 +n12366186 +n10693235 +n12263410 +n12484244 +n10337488 +n04146976 +n01469723 +n07872748 +n03238879 +n12000191 +n07846938 +n03116008 +n12139196 +n04013176 +n10317963 +n12140511 +n02065726 +n01649556 +n10316862 +n01755952 +n04385079 +n12770529 +n02814338 +n01675352 +n11874423 +n01369484 +n10537708 +n07618281 +n07821404 +n02297819 +n03238762 +n03357081 +n02628600 +n07830986 +n12507823 +n04431925 +n11955532 +n03429771 +n10281896 +n12383737 +n12760875 +n09673091 +n12892013 +n06625062 +n04503269 +n03674842 +n12338979 +n04268275 +n12033139 +n11767877 +n07812790 +n12676134 +n04037873 +n10097477 +n12310638 +n12258101 +n09391386 +n13196738 +n13866626 +n12720354 +n10106995 +n07843220 +n03878294 +n04101375 +n07733217 +n10220080 +n04601938 +n10778148 +n12973937 +n10556825 +n12256708 +n12583855 +n04259202 +n07628181 +n04226962 +n02777402 +n09674412 +n12188635 +n03776167 +n04504038 +n04156591 +n02270945 +n02264021 +n07826653 +n02980203 +n02059852 +n02102806 +n12921660 +n04477725 +n10107173 +n12837466 +n02697022 +n04350688 +n12110236 +n02177196 +n07899976 +n12639910 +n02368399 +n10009162 +n03950647 +n09248153 +n02425532 +n04044955 +n11933257 +n03460899 +n10147710 +n02379743 +n02413917 +n02890804 +n12915140 +n02146879 +n07915800 +n01787006 +n03646809 +n11677902 +n04065909 +n02088992 +n02887832 +n10115946 +n02306825 +n03719560 +n10456696 +n03758220 +n12625003 +n04021503 +n07563366 +n02531625 +n10304650 +n12855710 +n09735654 +n07853762 +n03512030 +n12898342 +n02297938 +n12618727 +n04082344 +n12953712 +n12617559 +n03035715 +n02532451 +n05399356 +n03602686 +n10082423 +n04607759 +n07581607 +n07594737 +n04030965 +n03464628 +n12103894 +n03039353 +n03522990 +n02964934 +n03169063 +n10153865 +n09653144 +n09941571 +n12907057 +n07768318 +n02600798 +n02187150 +n01811243 +n12252383 +n04495555 +n07678953 +n13181244 +n13069224 +n13184394 +n12765402 +n03471347 +n10208847 +n03697366 +n09840435 +n02506947 +n09709673 +n07928578 +n11935715 +n07848936 +n02757927 +n01999767 +n02245443 +n10260473 +n13898645 +n02701260 +n07840219 +n11785875 +n12385830 +n12017664 +n12145148 +n04530456 +n01929186 +n02384741 +n04113038 +n03296217 +n09723819 +n03766697 +n12143215 +n09929202 +n02684248 +n12119539 +n03566555 +n12941220 +n04124573 +n10750188 +n07733005 +n04230707 +n03829857 +n07756838 +n12244458 +n12543826 +n03514129 +n02762169 +n04435870 +n03342863 +n09745324 +n12369476 +n11652039 +n03915320 +n07746749 +n07608641 +n12642600 +n02389943 +n12137791 +n04111962 +n12493426 +n12454793 +n01455317 +n10728117 +n03281524 +n12195734 +n12353431 +n02477329 +n02678010 +n04557522 +n10162354 +n14942411 +n07806043 +n12274151 +n09835153 +n03983499 +n04086663 +n07851926 +n07868684 +n11926976 +n03972146 +n04310604 +n09675799 +n13880704 +n13173132 +n07577918 +n10720964 +n11937102 +n03349020 +n12340581 +n03725506 +n03477143 +n10578162 +n01731137 +n03382104 +n11616852 +n01493829 +n09327077 +n03856335 +n03321843 +n02375757 +n02118643 +n08500989 +n03496486 +n04140777 +n12858987 +n02845293 +n04093157 +n07819682 +n10394786 +n12289310 +n02901620 +n01559160 +n07919165 +n12648196 +n11774972 +n11995396 +n10543937 +n10154013 +n03977158 +n01884476 +n12266528 +n11906127 +n12661538 +n04396650 +n12761905 +n04175574 +n10181878 +n12017326 +n12876899 +n09744346 +n07741706 +n04451636 +n07735981 +n03751590 +n03140546 +n03070396 +n03091223 +n12071477 +n07562017 +n09981092 +n09847344 +n12552893 +n12371202 +n02245111 +n01598271 +n04400499 +n02298095 +n15048888 +n02967170 +n04030161 +n10676434 +n01556514 +n13235766 +n02538562 +n12603672 +n03941586 +n02449183 +n07567611 +n12923257 +n02296021 +n11730933 +n12497669 +n02917742 +n07875926 +n02714535 +n13142182 +n02878107 +n07861334 +n02682811 +n03730655 +n03681813 +n12970733 +n02132320 +n12436090 +n07931280 +n04295353 +n12982590 +n01783017 +n13164501 +n02424589 +n01499732 +n12650805 +n04543509 +n10369699 +n03439631 +n13160116 +n07831663 +n05449196 +n13025854 +n10169241 +n02847461 +n10734963 +n13213397 +n03343234 +n12275317 +n02793414 +n04300509 +n01803893 +n11617878 +n02179192 +n03637480 +n04514648 +n03087521 +n10478827 +n11757190 +n12919195 +n04532504 +n01736375 +n04015786 +n04545471 +n12668131 +n04472961 +n14786943 +n07584938 +n02498743 +n07744559 +n10010062 +n10101308 +n07832099 +n02601767 +n10473453 +n02451575 +n02496052 +n03696746 +n12669803 +n07904072 +n04290762 +n11737125 +n07760755 +n12553742 +n12068138 +n12630999 +n02390938 +n02202678 +n02216740 +n02679961 +n13173697 +n11828973 +n02287987 +n04585318 +n10360366 +n07745661 +n03474352 +n07934800 +n12677612 +n03692272 +n13092240 +n04230487 +n11846312 +n12433952 +n11793403 +n03056873 +n05454833 +n12517077 +n12682882 +n02649218 +n09425344 +n07878283 +n02795978 +n10064977 +n12754174 +n02945813 +n01750743 +n03150661 +n13880415 +n12337800 +n04017571 +n09754907 +n04456734 +n02967540 +n10621400 +n11744471 +n01971620 +n04148285 +n10781817 +n11991549 +n12305654 +n03943833 +n10330931 +n12918991 +n01783706 +n11933099 +n12931231 +n07589967 +n09666349 +n07853445 +n12714949 +n03548533 +n04158672 +n03809802 +n03080309 +n12800049 +n02578454 +n02834027 +n10067600 +n03044671 +n04198233 +n07930205 +n04357930 +n12221522 +n11957317 +n03085781 +n03723885 +n03614383 +n02661618 +n04292221 +n03426574 +n03838024 +n10442093 +n12399534 +n01450950 +n07876550 +n11937446 +n09870096 +n02631628 +n05460759 +n01710177 +n03660562 +n04283784 +n01497738 +n02232223 +n04209811 +n12837259 +n02864987 +n04499810 +n12654857 +n03493792 +n09688233 +n02312912 +n10057271 +n07606058 +n03258192 +n10507565 +n11930038 +n08679269 +n03812263 +n11662128 +n04085574 +n07643577 +n03981094 +n02796412 +n02513939 +n07686634 +n07936979 +n03168774 +n03816394 +n07625324 +n04138131 +n10383094 +n10222716 +n10381981 +n12254168 +n13223090 +n03056583 +n09910556 +n03277004 +n12649866 +n02089725 +n03688707 +n09665367 +n07849506 +n02843909 +n13141797 +n02477516 +n09710886 +n03835941 +n11734493 +n10778711 +n10007809 +n02038141 +n12766043 +n02353172 +n02030224 +n10762212 +n06274921 +n13033396 +n03560860 +n01961234 +n13868515 +n03216199 +n01553527 +n04429038 +n10211036 +n02150885 +n02435517 +n02755675 +n09699020 +n12566331 +n03909516 +n02903727 +n02594942 +n04173172 +n04125692 +n12251001 +n02412787 +n01649412 +n01411450 +n01774097 +n09912907 +n03162556 +n07566231 +n12267534 +n03928589 +n04142327 +n11771147 +n07832592 +n04155177 +n07937621 +n07839864 +n03201895 +n13095013 +n10298271 +n03059103 +n03784793 +n11925450 +n03288742 +n02809364 +n04108999 +n04449449 +n03726233 +n07854455 +n03692136 +n12018447 +n03374282 +n06008896 +n07598928 +n03577312 +n04604806 +n09892513 +n04370600 +n08238463 +n01793159 +n07822687 +n03242390 +n07685303 +n03822361 +n01996280 +n10505942 +n06596845 +n04219580 +n12056990 +n10579062 +n10240082 +n10298202 +n07711907 +n03905730 +n12222900 +n07598622 +n04415815 +n12389932 +n12154114 +n04210012 +n12500751 +n03729402 +n12122918 +n04572121 +n12804352 +n02415130 +n12780325 +n11639084 +n12768933 +n02253494 +n13217005 +n03567788 +n12304286 +n10703480 +n07766723 +n05455113 +n07741804 +n12186839 +n01687128 +n01350701 +n03260206 +n07876026 +n12528382 +n04125541 +n10457444 +n01606097 +n11717399 +n04598416 +n12899166 +n09748101 +n12160125 +n07608980 +n07843348 +n02409038 +n02571167 +n09980805 +n09706029 +n02495242 +n12765846 +n10373525 +n12321873 +n03047171 +n12365462 +n03752398 +n02662993 +n10316527 +n10728233 +n06273207 +n01733214 +n12297846 +n12755876 +n02428842 +n02289307 +n04536465 +n03253187 +n02297294 +n05584746 +n03117642 +n12189779 +n10338231 +n07599649 +n04559994 +n12710917 +n09966470 +n12470907 +n04499300 +n12403075 +n11837743 +n02269657 +n12599185 +n07618587 +n03996004 +n12851094 +n03392648 +n01319001 +n12826143 +n12369845 +n01814549 +n10056103 +n12854193 +n02267483 +n04019881 +n03490649 +n04268142 +n10801802 +n12315060 +n10149436 +n04563790 +n09865068 +n03000530 +n10657556 +n07840672 +n12118414 +n02856013 +n02900459 +n04094859 +n12079523 +n11827541 +n12236160 +n02904505 +n02846619 +n09842823 +n12926039 +n02146201 +n03195799 +n12815838 +n09899289 +n01483021 +n02519340 +n05453815 +n10329035 +n02494383 +n09742927 +n13220355 +n03212406 +n11759609 +n10061431 +n12095281 +n04262530 +n03799240 +n02426176 +n04608809 +n12230540 +n13880551 +n11741175 +n11858814 +n11723452 +n07590841 +n12604845 +n10342543 +n12760539 +n09270657 +n02563079 +n10643937 +n12843316 +n01651641 +n07838811 +n04359034 +n07758260 +n02762725 +n11726433 +n03114743 +n01952029 +n12321395 +n11930571 +n12337922 +n12427946 +n12001294 +n12551457 +n13235011 +n02290340 +n06419354 +n12408873 +n01741442 +n12308447 +n10243872 +n03658635 +n03694761 +n02570484 +n12912801 +n04158002 +n02417785 +n01332181 +n03703075 +n10283366 +n03142431 +n02779609 +n02300554 +n09868782 +n10323752 +n03166809 +n03394149 +n02827148 +n02186717 +n01350226 +n03344784 +n03555996 +n04498873 +n13157481 +n04519887 +n12028424 +n12349711 +n10471640 +n07741235 +n04032936 +n12357968 +n10228592 +n13178284 +n04168840 +n13239177 +n03561573 +n02566489 +n11807696 +n07681264 +n02566665 +n10456070 +n10063919 +n10492727 +n01788579 +n11977660 +n02036228 +n02738978 +n03989349 +n10332953 +n12949361 +n09901502 +n07839730 +n13146928 +n10152306 +n04170515 +n11602478 +n02522722 +n01333610 +n13030852 +n02143891 +n12807624 +n04542329 +n12243693 +n12036226 +n13917690 +n02553028 +n02752199 +n10594857 +n11627714 +n04348070 +n13171797 +n04612257 +n07934373 +n04536765 +n02244515 +n04526800 +n04546595 +n02551668 +n12143405 +n07871588 +n07858484 +n03628728 +n13179804 +n03242264 +n12089846 +n07588688 +n07620047 +n01647466 +n09685233 +n03467254 +n12666369 +n05449661 +n10694939 +n12886600 +n12256522 +n04006330 +n03317673 +n04316815 +n12222090 +n04022866 +n04088441 +n07617526 +n10782362 +n04355821 +n13901490 +n12508618 +n03849943 +n04503499 +n13193466 +n09754633 +n07583978 +n13911045 +n07643679 +n12054195 +n10692090 +n04032509 +n10146927 +n02031298 +n04002629 +n04035748 +n10712229 +n02866106 +n07909504 +n04540397 +n06266878 +n10219879 +n12567950 +n07853648 +n03191561 +n07856045 +n12646197 +n03317510 +n10515863 +n13198054 +n02808829 +n12889579 +n02698473 +n09924437 +n03595055 +n12306270 +n07857356 +n09715303 +n03024518 +n04323519 +n09629065 +n04178668 +n12748248 +n02308618 +n07873198 +n10564098 +n03007297 +n04036155 +n02143439 +n10507482 +n12267931 +n03956331 +n12888234 +n04066476 +n07813107 +n02736396 +n10306496 +n12324388 +n01744555 +n01649726 +n06596179 +n03616091 +n07754279 +n02072493 +n12408280 +n04314632 +n02412700 +n04030846 +n09833997 +n03599964 +n05258627 +n12572759 +n12136581 +n02419056 +n12453714 +n11652217 +n03878511 +n03907908 +n12223160 +n10514121 +n04153330 +n12163279 +n12623818 +n03495671 +n13222985 +n10354754 +n04365112 +n12384680 +n12538209 +n03105214 +n12534862 +n13869045 +n03945928 +n11613692 +n11892181 +n13002209 +n02685253 +n07598529 +n02629716 +n13202355 +n07927070 +n02176916 +n04370955 +n11988132 +n03246197 +n01440467 +n07620145 +n03940894 +n01897667 +n03408340 +n12602612 +n02539424 +n03863657 +n04559620 +n02604480 +n11822300 +n03518829 +n11619845 +n10504090 +n03341035 +n02908123 +n04281998 +n03277602 +n03865288 +n10074578 +n13902793 +n03054605 +n04404200 +n12786836 +n12235051 +n04035231 +n12009792 +n12705458 +n04378489 +n02476870 +n11954798 +n03573848 +n02087314 +n03162460 +n04363412 +n02261063 +n09953615 +n01947139 +n03044801 +n04287351 +n04479287 +n03861596 +n12510343 +n07854066 +n03027505 +n12161577 +n04197878 +n01812187 +n10015792 +n08685188 +n11737009 +n10333044 +n02730568 +n10290813 +n13096779 +n05257476 +n07917951 +n12121187 +n03517509 +n07932762 +n02336275 +n12159942 +n12105981 +n02562971 +n13882961 +n12016777 +n02793684 +n12717644 +n01380754 +n07724173 +n04055861 +n11831297 +n03059934 +n03370646 +n10065758 +n09459979 +n07913644 +n04322531 +n03457451 +n02567633 +n04240867 +n10693334 +n10556704 +n04614844 +n07909362 +n12082131 +n09268007 +n04359217 +n09883807 +n02292085 +n04052346 +n03431570 +n02843465 +n04584056 +n04432043 +n09846142 +n07864317 +n04475749 +n04227050 +n04280845 +n03535284 +n07890617 +n03217889 +n02806762 +n11967315 +n11762927 +n02501923 +n03442487 +n09690083 +n02964634 +n02920164 +n07855317 +n10196725 +n03042829 +n11662937 +n12183816 +n12311224 +n13884261 +n02243209 +n03140771 +n02385002 +n03071288 +n12936826 +n04583022 +n07859142 +n04578112 +n04467506 +n12938081 +n09982152 +n12555255 +n03335333 +n10104888 +n12151170 +n12709349 +n10456138 +n02237868 +n07620327 +n12561309 +n12341931 +n12350032 +n01775730 +n12950796 +n01440242 +n04261767 +n10568915 +n12285195 +n07589872 +n13112035 +n07840395 +n11750508 +n12286197 +n03336168 +n03325288 +n02551134 +n04293258 +n13130014 +n07733124 +n04451139 +n11985903 +n03602365 +n11722342 +n11944751 +n12897999 +n02277422 +n03101302 +n07608245 +n03531982 +n01997825 +n11713370 +n04442582 +n02833403 +n04427857 +n01648356 +n10645223 +n10414865 +n10696101 +n12885045 +n10037080 +n12218274 +n07570530 +n04493259 +n10659042 +n10577710 +n03141612 +n10582604 +n00446632 +n02834642 +n07568389 +n04583888 +n04096848 +n12879068 +n04495051 +n09837459 +n12216215 +n03702440 +n10174695 +n10559009 +n10577182 +n07686299 +n04269668 +n02404028 +n03720665 +n09885866 +n03082450 +n12492682 +n12780563 +n03703463 +n02644360 +n02307910 +n01374703 +n04402342 +n04264134 +n03158414 +n04443433 +n12522894 +n10803978 +n11706942 +n10751026 +n13143758 +n02972934 +n04174234 +n12718995 +n11994150 +n11545350 +n12526754 +n07753448 +n02870772 +n11942659 +n11744108 +n12735160 +n12229887 +n04970312 +n02874336 +n10721819 +n13193269 +n03330665 +n09865162 +n10306595 +n12161744 +n03303669 +n07846688 +n02168427 +n01961600 +n03559531 +n09826821 +n03413124 +n09695019 +n03783873 +n11863877 +n13874558 +n02283617 +n11895472 +n13182799 +n07854614 +n03283827 +n01397690 +n02650413 +n09809279 +n10290541 +n10383505 +n11724660 +n07689757 +n10181547 +n07620597 +n11979354 +n02771547 +n13061471 +n12631637 +n11966385 +n03969510 +n11735977 +n07621497 +n12956588 +n03217653 +n04546081 +n11696450 +n10300654 +n02032769 +n01654863 +n09779280 +n02390258 +n03887512 +n10489426 +n10745770 +n10713843 +n03602194 +n10710913 +n07864475 +n04486322 +n07915213 +n08663051 +n10236842 +n02390738 +n02388453 +n03598385 +n12228689 +n11771746 +n12803226 +n11242849 +n02378149 +n10427223 +n05448827 +n11870044 +n12477983 +n12311413 +n03500090 +n10280034 +n02685365 +n03652389 +n12728656 +n07695284 +n09961198 +n03780799 +n03935883 +n01612955 +n12475774 +n02701730 +n07833535 +n12584365 +n03902220 +n12727960 +n10619492 +n04450465 +n10646780 +n10110731 +n04142175 +n12296735 +n09337048 +n12681579 +n12819354 +n12541403 +n04305016 +n12798910 +n10321126 +n08618831 +n09721244 +n02225798 +n01637338 +n12218868 +n05545879 +n12022382 +n03972372 +n02505063 +n01694311 +n10695450 +n10081842 +n12297507 +n07592922 +n12118661 +n01952712 +n10517137 +n01340522 +n07719330 +n03729482 +n04168541 +n03090710 +n07873679 +n07828378 +n07728284 +n10343088 +n07869937 +n14585392 +n01453475 +n12095412 +n04973020 +n12810007 +n07564515 +n01599741 +n11629047 +n09937802 +n12450607 +n12460146 +n02292401 +n03632963 +n09617696 +n12545232 +n02874642 +n09934488 +n10091349 +n01447946 +n05469861 +n11830400 +n03382533 +n02608547 +n12697152 +n03542727 +n10716576 +n03664159 +n07568625 +n02976815 +n13147532 +n02336826 +n12432574 +n07686461 +n04107598 +n02505998 +n09849167 +n03688066 +n02836513 +n01576358 +n01893021 +n12017511 +n12065649 +n01714231 +n11662585 +n12827907 +n12954353 +n11936199 +n01368672 +n03843883 +n12184095 +n10058411 +n11684654 +n08506347 +n10579549 +n01423302 +n11604046 +n07613158 +n03605504 +n02090129 +n02284224 +n01958435 +n12664469 +n04459122 +n09617161 +n09780828 +n11830252 +n12870048 +n04247544 +n09871095 +n02962938 +n09933020 +n13064457 +n10341243 +n07694169 +n13200193 +n07765728 +n01524761 +n07730562 +n07751737 +n07740855 +n04192521 +n12593122 +n07841037 +n02809736 +n10604275 +n12512095 +n01907287 +n04592596 +n09823153 +n03181667 +n12449784 +n07908923 +n12365900 +n03053976 +n15060688 +n04165675 +n02530637 +n09816654 +n12540966 +n07934152 +n09290350 +n03455802 +n10111779 +n01351315 +n10281770 +n13862552 +n12435486 +n12370174 +n12296045 +n03493219 +n12363301 +n11973749 +n03939565 +n02938321 +n13209270 +n12604639 +n12657755 +n03604536 +n10328941 +n04278932 +n10376890 +n01884203 +n02061853 +n04256318 +n07831821 +n10585217 +n07591813 +n10210648 +n07739035 +n01632308 +n10319313 +n02861777 +n03821145 +n13029610 +n04239900 +n10313441 +n04951716 +n10628097 +n02368116 +n08571275 +n04433377 +n10458596 +n12435965 +n12448136 +n12129986 +n04295777 +n07898895 +n07854266 +n12327846 +n12318782 +n07825850 +n10414239 +n11731157 +n04409911 +n10655442 +n11829205 +n01738306 +n02840515 +n04150371 +n03369512 +n02645538 +n12773917 +n07818422 +n03227010 +n10303037 +n12942025 +n12406304 +n06616216 +n02435216 +n12981954 +n03683341 +n09703809 +n07722666 +n11817160 +n10110893 +n10228468 +n03572631 +n01378545 +n02130086 +n04388574 +n11960673 +n12956922 +n11924014 +n09895902 +n03426462 +n07759576 +n02563949 +n03466947 +n02522637 +n09480959 +n02033882 +n02451415 +n12677120 +n10580437 +n04425977 +n03057841 +n12285512 +n07614348 +n03144873 +n03391613 +n12366870 +n02304657 +n07863935 +n07909714 +n02413717 +n12591702 +n07838659 +n02967407 +n12016914 +n02735268 +n09470027 +n10222259 +n03899100 +n10513509 +n11620016 +n12600267 +n04368840 +n03016209 +n04085017 +n03215076 +n10238272 +n09782855 +n07586179 +n12434483 +n12452480 +n01990516 +n12030092 +n11739978 +n12714254 +n13036804 +n07727377 +n07879560 +n03710421 +n12128490 +n11968519 +n03250588 +n10173579 +n03114041 +n02942015 +n12729164 +n07871065 +n02591330 +n09353815 +n10138472 +n02712545 +n12866333 +n07835823 +n03508485 +n01758895 +n02925385 +n03321419 +n09931418 +n02846874 +n12500518 +n07587819 +n03160186 +n04974340 +n13067532 +n11940349 +n13027879 +n02878534 +n10055566 +n07925708 +n12628356 +n11958499 +n03472672 +n04233295 +n04563020 +n03426871 +n04330109 +n03677682 +n04129766 +n02884859 +n12692521 +n10188856 +n03500971 +n10355306 +n12407545 +n11955040 +n10028541 +n10345659 +n14720833 +n09641578 +n12613706 +n11718296 +n03380301 +n01334217 +n03890358 +n03583419 +n12447121 +n09660010 +n11826569 +n11837351 +n12096089 +n03871860 +n01821554 +n12834938 +n02738449 +n02644665 +n03316873 +n12548564 +n03605417 +n12094401 +n13152339 +n03004531 +n03080904 +n03535647 +n12349315 +n04213264 +n07860208 +n01526766 +n03710937 +n11806521 +n10618234 +n12306938 +n10473562 +n10050880 +n04596116 +n02577164 +n04479694 +n07936093 +n07834286 +n12175181 +n03986857 +n02919648 +n12055073 +n04567593 +n07585015 +n12771085 +n10551576 +n09778783 +n01593282 +n02406952 +n12331263 +n10629329 +n12287195 +n07729225 +n07828041 +n01880473 +n12257725 +n02696246 +n07853232 +n11936864 +n09745229 +n03364156 +n04503155 +n03194297 +n04003359 +n07607361 +n10106387 +n10306890 +n10455619 +n01647180 +n07740115 +n12106323 +n03626272 +n11685621 +n11866706 +n04321121 +n01606978 +n12621619 +n11615259 +n07840304 +n02841847 +n05459769 +n03432360 +n04604276 +n12356395 +n12468545 +n03645168 +n00477827 +n03459591 +n04202142 +n12959074 +n07881625 +n12382233 +n02405692 +n12299640 +n12247202 +n12628705 +n12534625 +n09264803 +n12176953 +n09835017 +n10390807 +n04975739 +n12474418 +n11931135 +n07917791 +n10636488 +n09690496 +n11993675 +n03703203 +n11794139 +n13015688 +n04168084 +n01948446 +n10169419 +n04455048 +n04973669 +n12840502 +n12120578 +n10448455 +n01386007 +n02288122 +n01441910 +n02278463 +n03108759 +n02753710 +n03143400 +n13080866 +n13917785 +n13124358 +n13220663 +n02475358 +n01925916 +n02684649 +n10451590 +n03869976 +n03881305 +n07928264 +n01422185 +n04035634 +n11996677 +n04261369 +n12925583 +n12764008 +n09972587 +n03708962 +n01791388 +n02892626 +n04098399 +n07823369 +n07752874 +n13225244 +n03376771 +n01771766 +n13146403 +n12157179 +n13897198 +n07770869 +n13240362 +n07610502 +n03688504 +n02896856 +n12543186 +n09967063 +n05453412 +n12590600 +n02378870 +n07568241 +n01687290 +n00474769 +n11694866 +n02338722 +n02637977 +n04567746 +n10586444 +n11907405 +n03421960 +n07605693 +n10384214 +n12877637 +n12018363 +n10056611 +n13882487 +n12140759 +n04114301 +n11762018 +n12678794 +n11817501 +n02116450 +n12018530 +n03324629 +n12726528 +n03155502 +n10493199 +n04181083 +n10609198 +n04328703 +n03045074 +n07769886 +n01892385 +n12828520 +n03165211 +n11800565 +n07567139 +n13877547 +n12829582 +n02949084 +n07589724 +n01746191 +n12395463 +n05459457 +n10565502 +n11981475 +n09310616 +n12327022 +n02313709 +n12957803 +n11865276 +n12955414 +n12939479 +n13225365 +n07936459 +n03139089 +n07577772 +n12057895 +n03620353 +n12152031 +n01885158 +n04096733 +n12626674 +n10464711 +n10675609 +n07752782 +n03709960 +n02540983 +n02285179 +n01903234 +n07835701 +n04421083 +n02352290 +n09421031 +n03349367 +n02539894 +n04052235 +n07922955 +n03941887 +n04234260 +n04423552 +n11975254 +n08501887 +n12489676 +n04574348 +n10602119 +n02163008 +n02748491 +n10024937 +n10033888 +n12605683 +n01790398 +n10128519 +n14977188 +n10293590 +n12077244 +n09741074 +n11694469 +n12692714 +n12159804 +n12533437 +n03831203 +n03692004 +n09462600 +n04537436 +n06618653 +n07913537 +n12783316 +n10038119 +n10236521 +n01486540 +n07875267 +n04345787 +n07681355 +n13028937 +n03607186 +n07863107 +n12387103 +n09830926 +n03574416 +n04478383 +n11685091 +n03197446 +n03225458 +n09741722 +n07736527 +n02857907 +n10177150 +n12711398 +n10308275 +n02418770 +n02577662 +n09935107 +n03362639 +n12446908 +n04329681 +n04114428 +n09624899 +n12913144 +n12338034 +n02341616 +n12360817 +n12907857 +n02414904 +n05482922 +n11974888 +n04127117 +n12581110 +n04368365 +n01699254 +n12525753 +n04254450 +n11951052 +n12458874 +n12721477 +n07562651 +n02239192 +n10533874 +n12006306 +n09537660 +n10008123 +n02788386 +n03248835 +n04491312 +n11795580 +n04025633 +n10166189 +n07703889 +n11824747 +n07605198 +n12134836 +n03591116 +n02946753 +n13212025 +n11742310 +n02328820 +n02985606 +n09955944 +n12679432 +n10020366 +n12013035 +n02942147 +n04172512 +n11802410 +n10789709 +n03385295 +n02039497 +n01416213 +n11940750 +n12178780 +n01967963 +n12662379 +n12217851 +n02812631 +n12432069 +n09991740 +n03089477 +n12458713 +n03876111 +n10311661 +n12286068 +n02838958 +n11936369 +n03716228 +n13228017 +n06276902 +n12677331 +n04330189 +n10488016 +n12011370 +n04343740 +n07893792 +n02171164 +n03963483 +n12080588 +n07577657 +n12936155 +n03809686 +n04223066 +n04086066 +n12776558 +n07813579 +n01841943 +n12285705 +n02581482 +n11653570 +n10010632 +n04305947 +n12228886 +n12797368 +n01404495 +n09697986 +n11882237 +n10077879 +n07607832 +n09779461 +n13212379 +n10769188 +n10715789 +n01480106 +n02145910 +n04275093 +n01983829 +n01978010 +n09937903 +n11976314 +n11785276 +n12386945 +n04445782 +n10712374 +n10706812 +n10194775 +n12655062 +n10739135 +n02597972 +n02307176 +n04121342 +n02350670 +n12698027 +n02805845 +n02895008 +n13149970 +n03451365 +n04542595 +n07803895 +n07864198 +n09690864 +n03844550 +n12378249 +n10345422 +n13163553 +n10457903 +n10783539 +n10539015 +n11757017 +n10274173 +n08652376 +n10283546 +n04541777 +n02824152 +n12945177 +n02082056 +n03695957 +n07936015 +n07591162 +n03628071 +n02990758 +n07685118 +n04023422 +n04951875 +n03541393 +n10289176 +n04039209 +n07913180 +n07910799 +n12017853 +n03732543 +n10656120 +n10512859 +n04556664 +n12464649 +n12927758 +n12078451 +n07878145 +n10561320 +n12467592 +n07689217 +n07619881 +n11935187 +n09837720 +n03642144 +n12220019 +n02983507 +n03271260 +n02778588 +n10193650 +n01654083 +n02746978 +n10202763 +n02953552 +n07924366 +n08583554 +n02905886 +n07855603 +n09745834 +n12366053 +n04140539 +n03383211 +n11648268 +n03352961 +n12116734 +n07771539 +n07836077 +n03842754 +n11683838 +n03004409 +n11730750 +n13098962 +n12292463 +n02867592 +n01653026 +n07583865 +n12548804 +n12702124 +n03917048 +n12677841 +n12511488 +n04217387 +n12495670 +n03554375 +n12403513 +n08558770 +n02781764 +n12339526 +n12742290 +n01404365 +n03591798 +n12446737 +n10494195 +n12110352 +n01672611 +n10493922 +n03638623 +n09910840 +n02238594 +n02575325 +n13186546 +n11873182 +n10344774 +n04094060 +n10417682 +n02749169 +n02428089 +n04549721 +n03824284 +n12107002 +n12784371 +n09986904 +n01634227 +n07826544 +n12253487 +n01679005 +n12516165 +n09339810 +n03126090 +n07803408 +n11883945 +n03842276 +n03397412 +n03280216 +n12264786 +n02545841 +n11877860 +n01830479 +n13207923 +n12490490 +n02542958 +n04114719 +n12590715 +n13226320 +n11644872 +n04119630 +n10176913 +n04213105 +n11652966 +n12546420 +n12625823 +n11897466 +n02092173 +n10567613 +n04953678 +n10059067 +n12408466 +n03056288 +n13036116 +n04169597 +n12467197 +n02569905 +n02758490 +n12623211 +n04077889 +n04959061 +n04183957 +n11689815 +n03777126 +n03306869 +n07720084 +n02659478 +n12947756 +n04341288 +n04448185 +n04037076 +n09828988 +n03346289 +n04174705 +n13126050 +n04255346 +n09764732 +n11773628 +n14891255 +n04314107 +n02184720 +n02646892 +n04320598 +n01979526 +n03191451 +n03662452 +n10290422 +n01739094 +n02305636 +n04202282 +n05459101 +n02766168 +n09994808 +n03528100 +n10475940 +n03005619 +n12639168 +n02144936 +n13202125 +n10703221 +n03770834 +n12324056 +n03474167 +n02609302 +n12166929 +n12852570 +n12920719 +n12508762 +n11983375 +n01422450 +n12616630 +n09681107 +n10486561 +n13038577 +n12266644 +n02478875 +n02547014 +n02249809 +n03336742 +n12038760 +n01672432 +n09861287 +n03678879 +n01949973 +n09928845 +n02310149 +n12648693 +n10533983 +n12812801 +n04550676 +n01800633 +n12128306 +n12744142 +n13140367 +n07803213 +n07688265 +n13068434 +n02030568 +n12955840 +n01625121 +n13215258 +n04270576 +n02680638 +n02817251 +n01539272 +n04066023 +n12969927 +n10280598 +n04001661 +n09774167 +n10358575 +n01836673 +n02290664 +n09940725 +n12447581 +n07803779 +n04561965 +n10151261 +n01538362 +n10170060 +n13160365 +n09823287 +n12554729 +n10620212 +n11935027 +n03465605 +n03227856 +n08519299 +n07785487 +n03522863 +n02861286 +n12200905 +n04269502 +n02104184 +n07612273 +n01390763 +n11872658 +n12981086 +n10244359 +n01738731 +n12117235 +n12846690 +n02861658 +n08782627 +n09832633 +n02531114 +n01394492 +n03269073 +n03077442 +n09794668 +n13884384 +n08659331 +n02556373 +n02587877 +n03523506 +n03723153 +n12024805 +n13061172 +n03978575 +n07914686 +n13134844 +n12183026 +n03573574 +n03765128 +n03319167 +n01920438 +n07852452 +n07680655 +n03017698 +n12959538 +n04261506 +n01793340 +n03292362 +n12817855 +n03593222 +n01962506 +n12453018 +n04027367 +n12518481 +n09223487 +n07871335 +n03779246 +n09668562 +n01889849 +n02492356 +n07830841 +n03277149 +n09968652 +n03092476 +n10400205 +n06263202 +n07595368 +n12767208 +n02196896 +n12580012 +n10265801 +n02103181 +n02922461 +n01731277 +n12422559 +n04278605 +n02250280 +n03283413 +n11829922 +n10191613 +n02493224 +n04427559 +n12181352 +n12742878 +n10683675 +n04503705 +n03785142 +n12816942 +n10723230 +n11936707 +n12360534 +n12909759 +n03766218 +n02696843 +n11935877 +n07828156 +n10617397 +n12921499 +n13158714 +n10166394 +n12370549 +n03505015 +n12769065 +n02636550 +n10781236 +n09869317 +n10275249 +n04234763 +n10735173 +n13137225 +n02070776 +n04232312 +n07575226 +n03471030 +n07909954 +n02633677 +n01662060 +n07563642 +n04263950 +n11824344 +n13178707 +n02972714 +n10417288 +n12092930 +n11993203 +n10170681 +n03726116 +n03215337 +n12564613 +n14975598 +n07758125 +n03123666 +n07717714 +n01421333 +n02359667 +n09403086 +n03857026 +n12759668 +n02628259 +n02307515 +n12146488 +n09777870 +n07819303 +n12105353 +n10784113 +n11802995 +n12561594 +n02845130 +n12100187 +n03507658 +n02141611 +n01800195 +n03470005 +n12444898 +n02203592 +n09707061 +n00475142 +n12216628 +n01732093 +n02581642 +n03803780 +n12114590 +n04541662 +n12267133 +n11652753 +n07859951 +n04524594 +n12843144 +n04040540 +n10604880 +n12559044 +n03063834 +n12394328 +n12704513 +n10230216 +n10756641 +n02101670 +n12309630 +n03070587 +n11626010 +n04239639 +n01638329 +n01928517 +n13144084 +n10420649 +n03102516 +n12395289 +n09833111 +n01651285 +n11688069 +n12881913 +n12783730 +n07716649 +n03618678 +n10344203 +n03626502 +n10718665 +n03577474 +n01683201 +n03246653 +n12153224 +n02519472 +n02470709 +n15090238 +n03129636 +n07774295 +n04577567 +n09995829 +n09662038 +n10297367 +n03555862 +n12531727 +n09947127 +n12533190 +n04062807 +n00479734 +n12860978 +n01884104 +n09866559 +n12069009 +n04595501 +n12088495 +n02909053 +n12283790 +n02180427 +n10697282 +n07562881 +n13092078 +n11706325 +n01746952 +n01978136 +n07731436 +n02386746 +n12648424 +n12726357 +n10314182 +n07839172 +n11753562 +n12903503 +n12589687 +n02375438 +n03604763 +n11549895 +n13202602 +n12304420 +n10738215 +n12220829 +n10095420 +n12177455 +n11887476 +n04006411 +n09838370 +n02853218 +n12688372 +n03335461 +n02800940 +n03036701 +n09885059 +n10206629 +n11922926 +n01678657 +n12192132 +n12248141 +n03108624 +n01936671 +n02417242 +n03222857 +n03768823 +n04343511 +n03538817 +n12655726 +n12521186 +n01330497 +n12767423 +n12965951 +n09695132 +n04410886 +n12599874 +n07865700 +n07596160 +n10227698 +n03224490 +n11598886 +n02948293 +n09906293 +n12247963 +n03301175 +n03895170 +n04259468 +n07808806 +n13147689 +n09856827 +n13882639 +n02241008 +n03842585 +n02883101 +n12182276 +n13918717 +n12728164 +n10634464 +n02477187 +n03107716 +n02342250 +n01479213 +n12793695 +n09808080 +n10707707 +n04161010 +n02836607 +n10076483 +n07726386 +n03872273 +n10250712 +n07688412 +n13884930 +n12301766 +n10196404 +n07591330 +n03814727 +n09610255 +n12757115 +n09814381 +n02397987 +n07886317 +n03959123 +n02185167 +n03533845 +n11838413 +n10227393 +n07704305 +n03580615 +n02663485 +n10101981 +n04346855 +n10067011 +n04464125 +n02829510 +n10007995 +n07845775 +n03004713 +n02450561 +n09905530 +n10361060 +n12394638 +n12095934 +n10479135 +n03145277 +n12246773 +n13194212 +n04475900 +n03252787 +n14867545 +n10485298 +n09961739 +n02149653 +n01553762 +n03931980 +n02344408 +n11676850 +n04034367 +n04235646 +n12867184 +n12625670 +n12763529 +n07593107 +n04351550 +n02571810 +n13899735 +n03652826 +n09495962 +n03421768 +n04205062 +n11918808 +n07745197 +n07752264 +n01892744 +n04609811 +n10278456 +n11790936 +n09754152 +n13234519 +n09820044 +n00440643 +n02350357 +n03779884 +n07803992 +n03305953 +n01836087 +n10068234 +n10690421 +n03134394 +n12380761 +n12801966 +n03134232 +n02596720 +n07591236 +n11882821 +n02312175 +n02387983 +n01912152 +n10805501 +n12718074 +n03188290 +n02776505 +n10528148 +n09971385 +n10524223 +n09958292 +n02721813 +n10300829 +n12007766 +n12107191 +n04449700 +n02987950 +n11878633 +n12328801 +n04551833 +n10567722 +n11654984 +n02808968 +n12066451 +n02964075 +n11633284 +n02434712 +n03070854 +n07926540 +n01543936 +n10091861 +n09938080 +n11976511 +n03342432 +n12886831 +n12509993 +n12958261 +n12730776 +n10066206 +n07846014 +n13176714 +n03332591 +n04607640 +n02513727 +n12138248 +n11964848 +n01318053 +n10553140 +n07839055 +n02632039 +n11865429 +n02286654 +n02367812 +n12093885 +n10774329 +n02296912 +n01729672 +n10353928 +n12033504 +n11936113 +n03263338 +n07822053 +n09737050 +n13875884 +n13212559 +n11690088 +n05468739 +n09344724 +n02507148 +n01377694 +n04172607 +n10464870 +n07804152 +n02825872 +n03139640 +n11858703 +n10227490 +n12334153 +n03616225 +n12018188 +n12399656 +n10235269 +n11840764 +n01995514 +n03326475 +n12704041 +n10684827 +n03006788 +n13906484 +n02868240 +n03614887 +n03491724 +n12124172 +n03675907 +n13170840 +n03983712 +n03254737 +n07836269 +n01784293 +n02095212 +n12470512 +n12219668 +n12920521 +n04492157 +n02950018 +n01922717 +n11797981 +n12601805 +n02744961 +n07814925 +n09798096 +n03939062 +n13891547 +n07564292 +n01590220 +n09295210 +n03997875 +n03479266 +n01491661 +n03781055 +n12528768 +n10657306 +n12014923 +n10094320 +n02532272 +n02224023 +n04541136 +n12067672 +n02661473 +n04233027 +n12399899 +n12889412 +n01736032 +n12551173 +n01337734 +n10104487 +n02921592 +n02148512 +n10216403 +n03276839 +n01781570 +n03999621 +n02505238 +n12537569 +n10433452 +n02351343 +n12365158 +n08539276 +n01897257 +n12221801 +n10557246 +n10437698 +n01803641 +n11836327 +n07813833 +n03468570 +n06277025 +n10040240 +n03692842 +n03017835 +n01881564 +n10487363 +n07937069 +n10597505 +n01638722 +n10160412 +n09825096 +n12611640 +n03098515 +n10654211 +n13196234 +n03436990 +n04058486 +n09814567 +n10758337 +n03515934 +n07688757 +n10269199 +n12627347 +n04521571 +n01636510 +n03220095 +n09982525 +n12768809 +n02340930 +n02473857 +n12336586 +n12125584 +n02833040 +n02498153 +n01467804 +n12120347 +n11650430 +n11953339 +n12592058 +n05102764 +n10575594 +n09722064 +n01966586 +n10619888 +n07852376 +n12650915 +n10321882 +n11974557 +n09847267 +n13201423 +n12337131 +n13185658 +n02150134 +n10538853 +n10471732 +n07836600 +n03526062 +n02512752 +n04232437 +n03367321 +n04308915 +n07600895 +n11539289 +n03539293 +n12699922 +n07817599 +n02781213 +n03594010 +n12035907 +n04075813 +n05233741 +n07863229 +n10735984 +n12095543 +n12272735 +n04229620 +n12240965 +n07768590 +n04420024 +n12111627 +n02861509 +n02595056 +n12183452 +n04607982 +n13213577 +n07741888 +n03750614 +n10043024 +n03372933 +n10051861 +n10199251 +n03249956 +n03984125 +n02956393 +n11619687 +n03356279 +n07833951 +n10715030 +n02340358 +n10768272 +n01494041 +n02592734 +n03323319 +n02136285 +n03995661 +n09945223 +n03547397 +n10044682 +n12878784 +n02803809 +n13160254 +n12726902 +n12196954 +n03161016 +n03105645 +n04218921 +n09493983 +n10719036 +n12263588 +n12565102 +n10684146 +n03148518 +n04287986 +n02340640 +n04331443 +n10727016 +n03369407 +n07824863 +n07844786 +n12467433 +n07582811 +n02964196 +n02197877 +n10758445 +n03271376 +n13212175 +n03260504 +n12777778 +n11973634 +n05467054 +n11946313 +n02462213 +n13906669 +n10520286 +n02074726 +n01771100 +n13880199 +n09811568 +n13883763 +n02334728 +n11831100 +n12025220 +n12751172 +n03858837 +n10127186 +n12831535 +n07823591 +n02513805 +n03662301 +n09913329 +n02749670 +n10655986 +n01787191 +n03199488 +n12732252 +n12253664 +n07735294 +n03440876 +n09650839 +n03844965 +n10341446 +n12688187 +n12961242 +n03423224 +n13157346 +n09802951 +n11948044 +n03489048 +n12279060 +n03664840 +n03731882 +n07742605 +n07870734 +n03949761 +n10759331 +n07739923 +n02737351 +n01788291 +n11780424 +n03722646 +n12297110 +n12363768 +n04495310 +n10008254 +n03934890 +n01318478 +n03609959 +n10070377 +n04123228 +n13068735 +n02909706 +n10671042 +n10491998 +n07650792 +n12664710 +n10213034 +n03455642 +n10411867 +n09903936 +n10121800 +n02622955 +n03647423 +n07596566 +n09654898 +n12248780 +n02684515 +n04255670 +n06273890 +n03495941 +n12960552 +n09724234 +n03861048 +n03293095 +n11835251 +n12852428 +n04084517 +n01814620 +n13159890 +n03147156 +n02311748 +n10237799 +n07584859 +n01946827 +n09651968 +n12241192 +n03669245 +n07858336 +n11932927 +n04444218 +n10526534 +n03642573 +n09470222 +n10731732 +n12001924 +n03786096 +n01359762 +n03824999 +n13877667 +n10591811 +n10574311 +n03275125 +n11631985 +n10539160 +n10502950 +n12499757 +n12432707 +n12068615 +n07689624 +n02610373 +n03204436 +n13051346 +n13134531 +n07610890 +n04021164 +n03502897 +n02299378 +n10417843 +n10050043 +n07929940 +n02593453 +n10577820 +n12870225 +n03333851 +n09463226 +n11741575 +n09193551 +n12012510 +n11987349 +n09215023 +n07924655 +n10060075 +n11999278 +n03933391 +n02602059 +n11993444 +n02337902 +n10149867 +n04441093 +n02868429 +n10629647 +n04192361 +n12029039 +n02768433 +n12078747 +n12730143 +n03255167 +n12492900 +n01709876 +n09672725 +n07870620 +n02315821 +n12277334 +n12204730 +n07852712 +n01319685 +n07802246 +n13031193 +n00812526 +n09658815 +n11982939 +n04264485 +n07893425 +n04094438 +n03285730 +n13182338 +n10724570 +n07832741 +n13210350 +n10654015 +n04058721 +n07875086 +n03462747 +n03994417 +n02889856 +n11957514 +n10109443 +n10478462 +n03064562 +n02477782 +n11920998 +n02138169 +n04227787 +n11797508 +n10753339 +n12928307 +n11921792 +n12643688 +n01833112 +n03919808 +n09817386 +n01903498 +n03848033 +n12031547 +n01035504 +n12324906 +n01911063 +n02588794 +n03749634 +n03539754 +n02242455 +n03079616 +n03246312 +n09705671 +n07860629 +n10458356 +n10051761 +n09709531 +n02867401 +n12522678 +n13150378 +n04462576 +n03462315 +n03712981 +n07607027 +n10581648 +n02957427 +n04271793 +n02253913 +n12824735 +n11697802 +n02161588 +n12463975 +n02361090 +n09784564 +n09680908 +n03512452 +n13214217 +n10712690 +n04023119 +n07814007 +n09833751 +n12885265 +n02259987 +n11933903 +n03628831 +n11967142 +n02533545 +n03900301 +n07919787 +n12793886 +n10768148 +n03071552 +n02780315 +n12193665 +n03378442 +n04486616 +n07832307 +n03164192 +n12786273 +n04261868 +n12655351 +n12320414 +n04371979 +n10630093 +n13052014 +n01357328 +n07879821 +n09753348 +n03796974 +n11701302 +n11678299 +n04022434 +n11610823 +n07726009 +n04117639 +n10474343 +n11888061 +n01842788 +n10435251 +n03343047 +n03383378 +n12750767 +n09662661 +n05241485 +n10000459 +n12220496 +n02246941 +n12676370 +n02253264 +n07766409 +n02940289 +n12089320 +n10363573 +n12922119 +n09783537 +n11695285 +n12331066 +n12573647 +n10218164 +n12509821 +n07862946 +n12818601 +n02589316 +n13191620 +n03758992 +n12112337 +n10733820 +n02898093 +n02645953 +n10150794 +n04595762 +n02344918 +n13132756 +n12859153 +n12138444 +n04211001 +n12935166 +n07830493 +n10142166 +n11951820 +n03018848 +n01453742 +n11985321 +n10000294 +n01362336 +n02328009 +n12639376 +n03090437 +n02204249 +n04312916 +n13127666 +n09684082 +n03432509 +n10274318 +n09704057 +n07593972 +n10074249 +n13157971 +n01638194 +n04036963 +n11708857 +n03418749 +n12589458 +n11899762 +n07683138 +n01601410 +n07854707 +n04279063 +n03239607 +n10302700 +n12520406 +n12576451 +n03881534 +n07565608 +n02349390 +n12569851 +n12249294 +n04059399 +n03530189 +n09357346 +n04325208 +n13159691 +n04045941 +n13898315 +n11992479 +n02353411 +n07825496 +n12922458 +n03115014 +n11761836 +n03323211 +n02793296 +n03492087 +n05241662 +n05491154 +n10419630 +n04506895 +n10546428 +n02907296 +n10769459 +n11647868 +n13188462 +n03825442 +n13209460 +n10742005 +n07599242 +n12361754 +n04570532 +n04131811 +n07756499 +n02598134 +n01910252 +n02910701 +n10129338 +n13871717 +n12673588 +n12565912 +n07562172 +n02711237 +n10775003 +n07695410 +n02637179 +n12930951 +n10261211 +n02906963 +n01366700 +n10642705 +n09846586 +n02779719 +n04978561 +n01369358 +n12114010 +n03521771 +n10667709 +n02296612 +n10722029 +n03500557 +n01365474 +n10472447 +n07585644 +n07609316 +n04013060 +n04505888 +n09726811 +n12692160 +n12378963 +n03585551 +n13139837 +n10167565 +n03799375 +n11990920 +n09640327 +n04502989 +n10108832 +n10561736 +n01897426 +n11766189 +n12462582 +n12913524 +n02684356 +n13200542 +n10466198 +n04331892 +n01478969 +n07837234 +n07692248 +n04552097 +n12382875 +n01484447 +n04120695 +n12681376 +n10293861 +n11965962 +n11788039 +n03959227 +n01832813 +n09918867 +n09942697 +n07587206 +n10459882 +n01347583 +n02267208 +n03951453 +n03006903 +n12126736 +n10286749 +n03395401 +n04605057 +n03467887 +n12755559 +n04020744 +n11629354 +n01647033 +n02780445 +n10205714 +n09439032 +n03138128 +n02763083 +n07835547 +n12251278 +n11949857 +n01635480 +n10675142 +n07845335 +n07751977 +n10332110 +n11871496 +n11764814 +n12229651 +n07760297 +n09865672 +n02919308 +n12218490 +n03782929 +n12231709 +n11909864 +n03144982 +n11799331 +n10433610 +n10483395 +n03206023 +n05442594 +n03626418 +n07870478 +n10171456 +n11964446 +n12796849 +n02126317 +n03797062 +n01412694 +n07610746 +n03581897 +n04479526 +n12447891 +n11906514 +n09699642 +n12873984 +n10586903 +n13234114 +n02436353 +n11889205 +n01460303 +n04400899 +n11884967 +n02140491 +n12215824 +n03586911 +n01394040 +n10691937 +n12371704 +n09668988 +n04362624 +n01740885 +n01337191 +n09714120 +n02185481 +n08555333 +n10704238 +n12430471 +n12034594 +n10012484 +n12088909 +n03205903 +n04129490 +n13090018 +n10712474 +n12234669 +n13016076 +n00454855 +n13882713 +n02644817 +n03192907 +n03519226 +n01561181 +n04583967 +n11732052 +n10732854 +n04480303 +n07934908 +n03825673 +n10621294 +n04354387 +n03374102 +n02922159 +n13158815 +n04000716 +n09685806 +n04427216 +n12051514 +n09712967 +n12081649 +n09748889 +n03252231 +n10704886 +n12897118 +n12525168 +n11728769 +n02731251 +n02548884 +n12403276 +n09627807 +n08679167 +n09663999 +n04247440 +n07711683 +n09909929 +n03415868 +n05244421 +n07680416 +n12757668 +n11935794 +n03483086 +n01860864 +n10755164 +n03675076 +n12004987 +n07566092 +n04078955 +n03379719 +n01916588 +n10138369 +n09755893 +n03649003 +n03977430 +n02309120 +n10616578 +n12242850 +n12388293 +n03292085 +n09919061 +n10302576 +n01497413 +n01936858 +n01377278 +n04358256 +n02667693 +n12125183 +n07758582 +n07813324 +n09737453 +n12745564 +n03855464 +n03166685 +n01446152 +n09801102 +n10561222 +n10576818 +n13915209 +n10474446 +n03845990 +n04237174 +n12531328 +n07855812 +n10763245 +n04614505 +n07905770 +n12051792 +n12653633 +n03593862 +n10359659 +n10436334 +n07853125 +n12911264 +n12265083 +n03638014 +n04444121 +n02706221 +n10563711 +n07808166 +n11799732 +n04093915 +n10451858 +n04410760 +n10075299 +n12740967 +n12635359 +n09611722 +n12902466 +n13915305 +n05542893 +n04440597 +n03675445 +n12315245 +n10646032 +n10047199 +n12775717 +n10365514 +n10590452 +n11616260 +n02812342 +n07856756 +n04570416 +n03565991 +n12215210 +n04330896 +n02388588 +n02266269 +n10760199 +n14714645 +n02742070 +n03565710 +n12609379 +n03420935 +n03441465 +n00453631 +n01963479 +n04362972 +n09863936 +n03961394 +n03009269 +n12297280 +n04561010 +n12192877 +n02981565 +n12134695 +n07855413 +n03232815 +n10180791 +n09932788 +n10571907 +n02109256 +n02660091 +n07865788 +n13228536 +n10306279 +n02635580 +n03634899 +n10262343 +n12296929 +n04393301 +n06281175 +n04485586 +n13103660 +n10510974 +n04166436 +n01634522 +n07596362 +n12700357 +n08597579 +n11744011 +n12238756 +n01790171 +n04571800 +n11867311 +n03464467 +n12241880 +n09961605 +n12592544 +n03170459 +n09938991 +n02692680 +n10295371 +n04331765 +n02612167 +n02520810 +n11977887 +n04094608 +n07722390 +n07832202 +n12448361 +n04612159 +n12186352 +n13161151 +n12654227 +n09868899 +n10104756 +n09920106 +n12981301 +n02610980 +n12545865 +n10673296 +n04110841 +n01704626 +n04055700 +n12117912 +n10519126 +n12443736 +n01697978 +n02148088 +n03012644 +n12091697 +n10395390 +n10509810 +n10462751 +n02896949 +n03836602 +n03928994 +n07718195 +n02473983 +n08571642 +n02648916 +n11970298 +n06274292 +n04613158 +n09856401 +n12811713 +n13111340 +n12122442 +n10095265 +n04445610 +n11631619 +n07863644 +n12022821 +n10315217 +n12549799 +n03386343 +n03121040 +n03558007 +n12272432 +n11798496 +n02522866 +n02952935 +n10741493 +n12143065 +n07883156 +n09616573 +n02289988 +n13161904 +n02588945 +n00451768 +n12375769 +n10777299 +n04495183 +n11930994 +n09970088 +n02254246 +n12276314 +n07857598 +n04428382 +n03789794 +n03383821 +n12980080 +n01447139 +n12880799 +n03501520 +n10764465 +n13143285 +n12727729 +n12444095 +n02354621 +n13174354 +n01691652 +n07732525 +n10437014 +n04368235 +n10371052 +n02611898 +n03597147 +n09912431 +n03135788 +n07888058 +n02409202 +n14582716 +n11934463 +n04395332 +n12558680 +n05257967 +n11798978 +n10617024 +n04102760 +n12132092 +n12988572 +n10390698 +n11887310 +n12063211 +n12952717 +n13141972 +n12176453 +n10245863 +n10509161 +n10389976 +n10333165 +n01474864 +n09274305 +n11888424 +n10368711 +n13222877 +n10469611 +n07582970 +n09700125 +n12805762 +n07865575 +n07853852 +n03628421 +n04482975 +n03099622 +n01349735 +n11943133 +n12736603 +n12197601 +n10597745 +n04418644 +n12689305 +n07755262 +n10598459 +n04312020 +n03195485 +n09776642 +n10596517 +n10223606 +n01923890 +n12703716 +n03465040 +n12372233 +n12528109 +n03571853 +n10802621 +n10204177 +n02320465 +n03976105 +n02214096 +n02148991 +n10377542 +n10697135 +n03538542 +n07582027 +n04517999 +n12180456 +n02838014 +n03977266 +n03818001 +n12191240 +n11648776 +n10773800 +n04475496 +n03945817 +n04682018 +n02994743 +n02787269 +n11650160 +n03834472 +n03389983 +n09797742 +n06209940 +n12525513 +n12672289 +n01893164 +n10710259 +n01892145 +n11773408 +n10554024 +n09864968 +n10699752 +n11631405 +n10414768 +n04430605 +n10742546 +n10738871 +n12857204 +n09309046 +n01724840 +n04123317 +n07881525 +n03868044 +n02140268 +n10708292 +n09838295 +n09797998 +n10710171 +n11814996 +n11938556 +n03543511 +n02151230 +n01515217 +n03533392 +n02039780 +n12810151 +n02335231 +n12152251 +n13225617 +n09801275 +n01978587 +n14821852 +n11742878 +n12679023 +n03521431 +n09679028 +n02021281 +n10784544 +n04421258 +n12492460 +n03720005 +n02541257 +n03889397 +n02888898 +n10659762 +n12045157 +n12712320 +n10369095 +n09721444 +n12769318 +n01703161 +n12697514 +n07836456 +n03905361 +n10660883 +n07769306 +n11893916 +n07846274 +n04110281 +n03655470 +n07740744 +n01363719 +n12540647 +n09896311 +n12842642 +n07755619 +n07754155 +n11548870 +n02868546 +n04215588 +n04288165 +n13201566 +n07721118 +n12018271 +n11903333 +n02909165 +n02662559 +n11658709 +n13063514 +n07725663 +n10179069 +n10776887 +n12637485 +n03814528 +n12542043 +n07833333 +n07820036 +n02746683 +n07925808 +n10349750 +n03154316 +n04155625 +n03232923 +n02116185 +n09998788 +n02821543 +n03410303 +n10656223 +n07916582 +n12880638 +n10408809 +n04612840 +n11805255 +n12044784 +n10497534 +n03458422 +n12873341 +n07808675 +n09476123 +n07611733 +n10598013 +n02214660 +n05469664 +n03952150 +n11855435 +n04375926 +n08523340 +n01642391 +n04007415 +n09756961 +n12891824 +n02894847 +n11698245 +n12906771 +n02894024 +n04131015 +n11882636 +n04386456 +n03291551 +n07837110 +n12462221 +n08540532 +n10299875 +n12705978 +n10448322 +n10487592 +n12175598 +n02272552 +n03833907 +n10383237 +n12758176 +n12729950 +n10061195 +n07816726 +n03241903 +n12239880 +n10380499 +n07855188 +n10207077 +n02770078 +n12961393 +n03778459 +n10734741 +n03485575 +n09958447 +n12337246 +n11830045 +n09866354 +n03209666 +n01470145 +n10395209 +n03872016 +n04267091 +n12888457 +n12104104 +n04088229 +n01964957 +n12002651 +n02503756 +n00481938 +n01908042 +n03378765 +n04193883 +n09862183 +n11861487 +n02520525 +n02081060 +n10386754 +n12693865 +n04514095 +n01325060 +n02460817 +n07568095 +n03651605 +n02561937 +n12844409 +n12888016 +n02974565 +n12439154 +n13018906 +n12071259 +n03897634 +n02863176 +n10603528 +n03493911 +n12887532 +n12944095 +n12794568 +n09980458 +n03503567 +n11783162 +n13123309 +n11729860 +n03702582 +n04280373 +n10086744 +n01790557 +n12627526 +n10552393 +n12092629 +n03888998 +n12751675 +n01442450 +n02479332 +n07726230 +n03642341 +n03142325 +n06263895 +n12088327 +n09703344 +n10528493 +n02820085 +n07737594 +n04090781 +n09901642 +n02328942 +n02724722 +n09866115 +n12658715 +n10481167 +n13135692 +n11850918 +n10205344 +n12361560 +n03698123 +n03284482 +n12106134 +n04441528 +n02591613 +n02581108 +n07856186 +n12197359 +n12900783 +n01725713 +n12012253 +n03907475 +n02170738 +n03694949 +n13238654 +n04611795 +n02782432 +n13191148 +n02741367 +n04170694 +n12770892 +n01973148 +n10080508 +n10161622 +n09808591 +n07912093 +n02059541 +n02779971 +n03857156 +n12945366 +n03055159 +n12758325 +n10067305 +n02597818 +n07808352 +n13147153 +n10679723 +n02271222 +n04012665 +n12942729 +n10349243 +n01377510 +n07800636 +n10654321 +n10219453 +n09961469 +n10732521 +n04479405 +n11632929 +n03856728 +n08658918 +n10327143 +n10754281 +n02085118 +n09691604 +n09952163 +n10082299 +n03872167 +n03733465 +n04138869 +n01425223 +n12066821 +n02177506 +n09892262 +n02896694 +n12983654 +n13224922 +n09658921 +n12744850 +n03639880 +n02943686 +n10660621 +n11936539 +n03698226 +n04519536 +n12392765 +n09319604 +n07567039 +n04160261 +n01802159 +n02838178 +n07746910 +n02266421 +n10240417 +n12542240 +n12550408 +n01445857 +n04132465 +n03569014 +n12666050 +n12362514 +n10676569 +n09702673 +n12885510 +n04447156 +n04396226 +n12240150 +n11639306 +n02249134 +n01340785 +n02833140 +n10027590 +n02142407 +n11996251 +n07874531 +n04340019 +n03166120 +n10420277 +n04465203 +n12738259 +n12831141 +n03998673 +n01385017 +n12842519 +n02587051 +n10753061 +n12505253 +n13906936 +n01989516 +n12640435 +n07852532 +n04243142 +n10261511 +n12853287 +n12239240 +n03973003 +n09983889 +n10345302 +n14804958 +n02354162 +n03049326 +n10443659 +n01318660 +n12787364 +n04253304 +n11941094 +n09283514 +n09393524 +n11865574 +n01531639 +n04409279 +n02859729 +n10712835 +n03694196 +n04343630 +n10331098 +n12929600 +n02826259 +n10171219 +n07735179 +n07594840 +n03709644 +n09950728 +n09859285 +n07718329 +n01418620 +n09858299 +n12395068 +n10011360 +n07763290 +n02643316 +n03596099 +n04422566 +n11958888 +n09650989 +n10318686 +n01333082 +n12886402 +n03781467 +n12667582 +n02923535 +n09988311 +n08663860 +n02508346 +n13885011 +n03939281 +n10772937 +n04485750 +n09871952 +n10291942 +n07759324 +n10174971 +n03666238 +n01937579 +n02308033 +n07847706 +n10371330 +n04124887 +n11853079 +n11941478 +n12647231 +n04601041 +n12718483 +n02902816 +n01941340 +n04066767 +n07617839 +n02254901 +n03488784 +n07834774 +n02524659 +n03367969 +n10783734 +n03422484 +n09776807 +n03970363 +n10131590 +n03433247 +n02622712 +n10206506 +n12061104 +n11936287 +n07874674 +n10061043 +n07828275 +n03764606 +n12236768 +n01826844 +n09741904 +n05454978 +n03591592 +n01441272 +n03736372 +n07585474 +n12762405 +n12943912 +n01894522 +n03218446 +n11846425 +n11689678 +n04147916 +n02375862 +n10409459 +n09287415 +n10113583 +n03261263 +n02817386 +n09869578 +n10550252 +n02532786 +n12031388 +n07937344 +n11612235 +n01571410 +n09402944 +n04234670 +n02603862 +n04196925 +n09999135 +n10468750 +n15093049 +n03003633 +n11650307 +n12312110 +n02525703 +n10501635 +n09751622 +n10114550 +n10103155 +n12829975 +n04004099 +n12419878 +n02082190 +n03328201 +n03093427 +n07845571 +n12655498 +n02558206 +n12563045 +n07573453 +n12324558 +n13016289 +n10601234 +n10310783 +n03531691 +n02135610 +n03168543 +n09985978 +n10615334 +n07839312 +n09985809 +n10142537 +n10417969 +n07869111 +n12514992 +n04327544 +n10326776 +n12583681 +n01476418 +n12840168 +n03852544 +n11713763 +n07824502 +n07858841 +n12256325 +n03036149 +n07883661 +n04500390 +n10170866 +n01835918 +n10760951 +n10720197 +n12330239 +n02135844 +n10210512 +n03217739 +n10802953 +n03136254 +n02161225 +n03961630 +n12927194 +n02251233 +n13891937 +n09945603 +n02695762 +n12181612 +n13234857 +n10175725 +n11346873 +n07934678 +n02318687 +n10251329 +n04112921 +n04001132 +n03042984 +n11704791 +n04246459 +n12193334 +n10718509 +n10371221 +n05278922 +n03265754 +n12186554 +n12481289 +n10521853 +n10748506 +n11729142 +n10143595 +n09422631 +n07562984 +n07850219 +n04193742 +n11997160 +n12002826 +n12820113 +n04132829 +n10272913 +n03358841 +n12610740 +n12384569 +n10725280 +n02746008 +n13148384 +n12635151 +n02337171 +n10350774 +n12308907 +n04542474 +n04339062 +n03549350 +n10240235 +n10556033 +n10214390 +n01791314 +n02801047 +n07817465 +n11610602 +n10315730 +n14592309 +n10249191 +n12453857 +n12579822 +n09833275 +n04051269 +n11552594 +n04088343 +n04565039 +n03930431 +n10679503 +n11899921 +n10295479 +n01357507 +n13036312 +n03404900 +n12523141 +n01816017 +n02020578 +n12661045 +n06262943 +n02775813 +n12921315 +n09751076 +n09834258 +n10585628 +n12885754 +n04411019 +n10342367 +n10368798 +n09672840 +n12729023 +n04578329 +n10325549 +n03680248 +n11920663 +n10416567 +n10011486 +n01643255 +n03193754 +n07823814 +n04055447 +n10660128 +n07765612 +n07612530 +n04205613 +n09677427 +n03989199 +n11100798 +n12721122 +n10000787 +n10382157 +n07724819 +n12928819 +n11631159 +n02608996 +n10516527 +n09703101 +n12290975 +n03470222 +n03810412 +n03729131 +n03356038 +n12692024 +n12614625 +n10789415 +n02333819 +n01722670 +n03885410 +n12038208 +n02294097 +n02608860 +n02500596 +n07909231 +n03254625 +n09681973 +n12221368 +n01893399 +n10025295 +n03194812 +n13181406 +n12249122 +n03447894 +n09795010 +n02187900 +n10139651 +n10631654 +n01792530 +n02569631 +n07853946 +n09907804 +n03263758 +n04214649 +n02450829 +n02431542 +n11998492 +n02651060 +n04101860 +n01806061 +n13901423 +n12903964 +n03968479 +n04268565 +n12601494 +n02083780 +n04570118 +n12247407 +n03337822 +n09878921 +n02369935 +n10022908 +n09667358 +n13160938 +n11937360 +n07741623 +n03705808 +n12241426 +n10478118 +n03805933 +n10343869 +n09391774 +n03482128 +n10357737 +n10334461 +n09675045 +n09662951 +n10174253 +n01815270 +n13873361 +n04432785 +n09778927 +n10671898 +n05571341 +n10033572 +n09864632 +n10618465 +n03437184 +n12786464 +n01723579 +n11798270 +n07742415 +n02143142 +n10548419 +n03695122 +n02518622 +n04605446 +n10218292 +n11832671 +n12646950 +n03382708 +n09844898 +n09674786 +n01472502 +n07616906 +n09763272 +n03982767 +n10005006 +n03059236 +n01816474 +n03725869 +n01979269 +n04226322 +n13236100 +n03920384 +n11852148 +n04373563 +n04324120 +n11686652 +n03036341 +n02142898 +n09783776 +n13147918 +n03465320 +n07855721 +n10336411 +n10438619 +n07750299 +n12237152 +n03559373 +n10077106 +n10169796 +n09828403 +n09959658 +n12464128 +n12934685 +n04221673 +n02617537 +n11689367 +n10180580 +n07813717 +n12529905 +n02340186 +n01400247 +n11749112 +n04404072 +n03135656 +n12098827 +n12481150 +n10023506 +n03500838 +n01564101 +n04009923 +n10023264 +n03908456 +n03206405 +n07590068 +n09958133 +n10755394 +n01423617 +n11511327 +n10536274 +n01965252 +n11549245 +n11935627 +n09635635 +n03752071 +n07585997 +n03147084 +n12666159 +n09748408 +n03796848 +n01501948 +n02345078 +n12430675 +n03103128 +n11710987 +n03393199 +n09233603 +n10465002 +n04298765 +n01351170 +n02720576 +n03966582 +n10643837 +n12420124 +n10793799 +n01652297 +n09281252 +n11983606 +n10222497 +n11832899 +n02391617 +n12434106 +n03987674 +n02140179 +n07896560 +n04325804 +n10647745 +n01924800 +n10156629 +n03545961 +n03906789 +n01890564 +n10699558 +n12332218 +n03247495 +n11839460 +n03527675 +n12586725 +n13208965 +n02714315 +n02750320 +n04615149 +n12679876 +n12863234 +n03304323 +n12139793 +n11922755 +n12321669 +n04979307 +n01921059 +n09657206 +n13042134 +n04045787 +n11700279 +n02337598 +n01415920 +n01400391 +n13207572 +n10785480 +n02515713 +n12018100 +n02634545 +n03292736 +n02881546 +n12655605 +n03105810 +n10545792 +n03894933 +n09796974 +n10320484 +n12308112 +n11549009 +n13047862 +n14941787 +n12379531 +n10540252 +n11696935 +n12184468 +n12851860 +n12908854 +n10586265 +n12369066 +n10426630 +n12523850 +n03916289 +n04538878 +n09908769 +n02828115 +n07560422 +n10266016 +n03569174 +n06423496 +n10495167 +n03617834 +n09327538 +n10195056 +n10508379 +n13031323 +n11659248 +n04242315 +n10742111 +n10700963 +n12032686 +n09877587 +n07825597 +n07568991 +n11736362 +n12169099 +n13103750 +n03263640 +n12248941 +n10665302 +n01920051 +n09704283 +n11533999 +n04503073 +n11645163 +n10639817 +n09920901 +n06340977 +n03251100 +n10378113 +n03226090 +n10131268 +n02877513 +n13191884 +n02787120 +n11709045 +n02740061 +n12323665 +n02831998 +n10342180 +n12716594 +n04498275 +n09905050 +n03745487 +n07642833 +n10294020 +n10211666 +n12205460 +n02981198 +n01642943 +n07679140 +n04390483 +n10432875 +n09214269 +n10792506 +n10243483 +n13099833 +n10221520 +n13177768 +n04091584 +n10672540 +n10200246 +n13889331 +n02345340 +n10237556 +n01833415 +n01335218 +n09804230 +n09957523 +n05235879 +n10070449 +n10308653 +n10721708 +n04312654 +n10394434 +n12201938 +n12434775 +n07601025 +n02672152 +n10157271 +n02635154 +n12572858 +n13182937 +n10160188 +n03396997 +n10344656 +n02968210 +n10190516 +n07684422 +n03706939 +n07618871 +n02290870 +n03817331 +n03275311 +n12698774 +n04375080 +n07837630 +n04314216 +n11833373 +n07618684 +n03742238 +n12532886 +n03712444 +n11750989 +n10038620 +n09617577 +n03807334 +n10108089 +n01816140 +n10715347 +n02648035 +n13127303 +n02809491 +n02430748 +n12235479 +n01451863 +n01514926 +n10010864 +n01913440 +n09660240 +n11806369 +n01470479 +n12655245 +n07655067 +n03436772 +n11778092 +n03951800 +n10277815 +n07931733 +n01479820 +n03576955 +n07609549 +n12568649 +n05263316 +n02636405 +n01384084 +n03298352 +n07617344 +n09987045 +n10573957 +n07801709 +n02589062 +n02534165 +n02748359 +n09607782 +n07590974 +n02199170 +n02696569 +n09678747 +n12795209 +n13176363 +n10663315 +n10588724 +n09772330 +n10174589 +n12366313 +n11883628 +n07617447 +n01334690 +n03168663 +n11764478 +n08599174 +n03942028 +n12153033 +n03448696 +n12096674 +n10037588 +n03548320 +n09760290 +n10374541 +n09653438 +n10294139 +n10276942 +n12279293 +n12764507 +n12803958 +n10764622 +n02140858 +n07599068 +n10245507 +n12351790 +n12818004 +n10118301 +n03945459 +n09912995 +n12176709 +n03873996 +n10339179 +n10614507 +n10114662 +n10784922 +n03821424 +n04959230 +n13015509 +n12573911 +n11948469 +n09775907 +n12758014 +n01780142 +n09956578 +n12165384 +n10088200 +n10382480 +n04131113 +n09930628 +n09784160 +n11750173 +n13064111 +n03817522 +n12662074 +n03176238 +n12310021 +n11679378 +n09961331 +n02385580 +n11904274 +n03113505 +n10244913 +n02836900 +n09986700 +n11963572 +n13158605 +n10321632 +n02179891 +n02189670 +n10097995 +n10774756 +n10783240 +n10605737 +n02530052 +n10386196 +n10184505 +n09788237 +n03589672 +n12509109 +n10658304 +n12966804 +n12559518 +n03189311 +n01451295 +n12179632 +n12301613 +n10496489 +n03402785 +n10244108 +n02385676 +n03552001 +n03092053 +n02313360 +n02547733 +n02109391 +n01327909 +n04574606 +n03060728 +n07840124 +n10567848 +n10062176 +n02703124 +n10804732 +n12699301 +n04515890 +n07919665 +n10457214 +n09663248 +n03165955 +n12988341 +n03987865 +n03031756 +n10277912 +n10172080 +n09325824 +n03198223 +n09605110 +n10113869 +n11603462 +n03352366 +n11930203 +n09769929 +n12979316 +n02579762 +n09953052 +n03105974 +n00476140 +n11598287 +n02830157 +n10512201 +n09746936 +n10668666 +n02919976 +n09993651 +n02149861 +n09705003 +n10389865 +n11655152 +n10010767 +n10070563 +n03688832 +n10590239 +n11936027 +n02939763 +n03163488 +n03171910 +n09955406 +n03266195 +n10217208 +n09338013 +n07594250 +n03215930 +n09725935 +n10592049 +n03732658 +n12498457 +n09966554 +n10668450 +n10361525 +n04060198 +n11936624 +n02602760 +n03942600 +n03708425 +n10020533 +n12067817 +n07590177 +n01891274 +n11837204 +n01419332 +n03860234 +n12616248 +n07834160 +n09867154 +n09788073 +n12222493 +n03388990 +n04245412 +n10182402 +n11675404 +n10450038 +n13045594 +n13158167 +n13082568 +n12052267 +n12707199 +n07810531 +n07914887 +n13127001 +n02573249 +n08619112 +n10471859 +n09919899 +n03635516 +n12067029 +n03352232 +n07765517 +n10519984 +n02742194 +n03062798 +n13124654 +n09958569 +n02370137 +n10121714 +n04019335 +n07732433 +n02559383 +n12585137 +n09729156 +n10744078 +n09954355 +n03078506 +n10062042 +n10688811 +n02668613 +n03142205 +n10347204 +n10518349 +n09898020 +n12563702 +n05468098 +n10116370 +n07838905 +n03127024 +n03545585 +n12801072 +n09940818 +n04480995 +n10466564 +n02606751 +n10032987 +n10771066 +n01587278 +n11852531 +n01455461 +n10397392 +n02349205 +n10180923 +n09778266 +n04366832 +n10051975 +n10538629 +n09865744 +n12554029 +n13118330 +n12952590 +n04187751 +n09924313 +n10062594 +n01980655 +n10028402 +n02567334 +n10590903 +n10265891 +n10739297 +n01457082 +n03437581 +n03713151 +n03475674 +n05464534 +n11863467 +n06592421 +n12491435 +n14914945 +n10279778 +n03388711 +n10483890 +n10612373 +n03332784 +n02332954 +n02952798 +n13041943 +n01607309 +n04356772 +n07711799 +n12670962 +n12229111 +n07878479 +n12401893 +n07772413 +n12138110 +n09781504 +n07902698 +n02750652 +n13042316 +n12400924 +n02304797 +n03066464 +n12852234 +n10155222 +n05541509 +n10711483 +n04210858 +n02835551 +n12859679 +n02935490 +n03540476 +n05279953 +n09807075 +n09617435 +n03566860 +n10549510 +n10025391 +n10754449 +n11927740 +n03554645 +n01837526 +n02656969 +n08648917 +n07860548 +n01452345 +n04021704 +n07783827 +n10080117 +n02187554 +n03214966 +n10036444 +n04291069 +n12407396 +n02170599 +n09896826 +n12417836 +n07845495 +n02749292 +n03061819 +n03682380 +n10756261 +n10369955 +n09692125 +n09978442 +n04277669 +n10539278 +n09703932 +n01879837 +n02746225 +n13159357 +n11763874 +n10540656 +n07933530 +n12987535 +n02371344 +n10654827 +n09723944 +n12775393 +n11856573 +n12626878 +n12716400 +n09903639 +n09784043 +n03906894 +n10775128 +n03124313 +n10396727 +n02841641 +n10211830 +n12283395 +n03490784 +n14175579 +n04027935 +n12396091 +n02609823 +n01414216 +n09880741 +n11976933 +n03073384 +n09270160 +n11768816 +n12073217 +n11597657 +n09994878 +n11756329 +n12579404 +n03161893 +n01451115 +n07736971 +n02949356 +n03878418 +n12653436 +n10626630 +n12777892 +n13061704 +n10498699 +n03609786 +n03199358 +n10776339 +n10762480 +n13179056 +n10113249 +n04029913 +n12640081 +n10493835 +n11683216 +n03524287 +n04585626 +n02969527 +n12976554 +n08569482 +n10204833 +n12442548 +n02577952 +n09357447 +n10202225 +n02198129 +n11882972 +n10404426 +n01600341 +n12016434 +n09867069 +n10576223 +n09893600 +n01702479 +n04274686 +n04406552 +n02848118 +n02258629 +n03260733 +n03685640 +n11751974 +n09967555 +n06274546 +n09649067 +n10681557 +n07606933 +n03110202 +n11982545 +n10803031 +n02679142 +n04086937 +n10514255 +n04506402 +n03884554 +n09970192 +n10117017 +n12642435 +n10186686 +n02097967 +n03956531 +n11834890 +n02677436 +n10040698 +n11796188 +n03348142 +n04168472 +n02294407 +n12483282 +n09429630 +n04423687 +n09819477 +n09755555 +n10157016 +n03344935 +n07762373 +n12871859 +n09853541 +n09875979 +n13050705 +n02251067 +n10637483 +n03823673 +n10357012 +n03424204 +n04431648 +n01475940 +n02339282 +n10248198 +n07683265 +n13150592 +n10359117 +n10096508 +n03473078 +n13052248 +n10743356 +n03710079 +n10634990 +n04507689 +n07921090 +n02352002 +n03924407 +n03609147 +n02837567 +n03406759 +n03909658 +n10286282 +n12135576 +n01912809 +n10801561 +n10717055 +n03473465 +n03761588 +n03144156 +n09474412 +n10253611 +n12549420 +n02499568 +n09910222 +n10431122 +n12699031 +n01697749 +n11786843 +n03888808 +n12089496 +n10066314 +n10302905 +n12696830 +n09965787 +n11969806 +n04066388 +n13080306 +n03913930 +n09968259 +n10490421 +n10714195 +n07570021 +n10343449 +n10401204 +n03472796 +n10779897 +n11787190 +n03503097 +n10439523 +n12123648 +n04279858 +n10511771 +n09755788 +n08253141 +n02616397 +n12248574 +n01645466 +n04334504 +n07729142 +n05451099 +n10503818 +n10354265 +n09707735 +n02633422 +n11999656 +n01324916 +n02088745 +n09354511 +n10705448 +n09756195 +n10136615 +n10427359 +n09702134 +n12600095 +n04122262 +n10791820 +n03330002 +n02713496 +n11710658 +n09664908 +n02550203 +n02349847 +n12835766 +n04098260 +n11536567 +n11686780 +n12875861 +n12758471 +n09806944 +n11810030 +n10400003 +n10098388 +n11663263 +n10559683 +n07833672 +n10753989 +n10643095 +n01988869 +n03112240 +n12911914 +n09979913 +n09785236 +n09790047 +n02676097 +n01653509 +n04601159 +n01938735 +n10748142 +n12978076 +n11990627 +n10437262 +n12972136 +n04077594 +n10148825 +n02269340 +n12886185 +n03608504 +n11677485 +n10612518 +n12267265 +n10649308 +n05458173 +n10650162 +n03213361 +n02747063 +n01611674 +n02322992 +n01554017 +n03512624 +n12773142 +n12747120 +n09902128 +n03162714 +n03924532 +n10299125 +n12378753 +n02778131 +n09976024 +n13093629 +n10778999 +n07721833 +n12232851 +n07876775 +n10097590 +n03194170 +n13029122 +n04573832 +n12859272 +n09639382 +n07688021 +n02878796 +n10751710 +n03633632 +n07762534 +n10779995 +n13914265 +n13093275 +n10729330 +n10433077 +n03663910 +n10499110 +n02272286 +n10371450 +n01967308 +n12633061 +n11659627 +n12982915 +n10344121 +n10268629 +n02697876 +n09879552 +n10167361 +n10719807 +n04042076 +n01632952 +n03243625 +n02125872 +n10105906 +n12194613 +n03149810 +n10721124 +n03947343 +n02020219 +n10122531 +n01315330 +n08647264 +n00452734 +n03607527 +n10010243 +n09863749 +n04473275 +n11782878 +n03585337 +n09655466 +n12989007 +n11711971 +n10716864 +n10475835 +n10704712 +n01894956 +n10568443 +n12881105 +n10387836 +n10403633 +n08645318 +n03500457 +n10377633 +n10108464 +n09933972 +n02618094 +n11798688 +n04155735 +n09780395 +n12822466 +n04302200 +n11899223 +n10633298 +n02760298 +n12142450 +n10803282 +n10769321 +n10514051 +n10597889 +n11837562 +n02261757 +n01458746 +n09830759 +n10003476 +n09817174 +n10738670 +n10118743 +n12096563 +n03054491 +n12155773 +n10439727 +n04170384 +n03223923 +n12632733 +n07845421 +n10062905 +n11831521 +n04267985 +n12796385 +n04154854 +n00444142 +n09778537 +n03115663 +n04385157 +n10109826 +n02337332 +n09996304 +n09880189 +n12871696 +n11823305 +n02516776 +n12377494 +n08511017 +n04421417 +n10765305 +n09675673 +n03488111 +n03076623 +n11829672 +n10292316 +n10758949 +n13031474 +n02829353 +n10090745 +n09186592 +n12736999 +n12715195 +n11684499 +n03168933 +n09890192 +n10596899 +n12527081 +n10496393 +n10497135 +n02137302 +n03266620 +n12958615 +n12664187 +n02633977 +n04262869 +n04215800 +n13133233 +n02392555 +n09858733 +n10186350 +n01715888 +n03142099 +n08573674 +n11687071 +n02690715 +n03146342 +n12331788 +n08079613 +n10609092 +n12943049 +n12234318 +n02312325 +n12618146 +n10135197 +n11705573 +n02794368 +n02850358 +n09464486 +n01993525 +n03187153 +n10097262 +n02976641 +n12198793 +n12941717 +n10219240 +n12434634 +n03827420 +n10437137 +n10342893 +n04174026 +n10265281 +n07757874 +n10765885 +n01470895 +n02349557 +n11716698 +n03765467 +n10227793 +n07824268 +n12994892 +n10486236 +n02974454 +n10718349 +n11726145 +n09909660 +n03378593 +n07805006 +n09875025 +n02645691 +n10223069 +n03722944 +n04389999 +n02544274 +n10239928 +n04456011 +n10382302 +n01552333 +n10082562 +n12952469 +n09883047 +n10442573 +n01891013 +n10690268 +n13111504 +n02287352 +n03567635 +n10331347 +n09762385 +n09933842 +n02369555 +n12291459 +n09919200 +n01492860 +n02067768 +n10713254 +n10550468 +n12846335 +n03835729 +n12467018 +n11676743 +n03629643 +n12987423 +n10655730 +n08678783 +n10349836 +n10087736 +n10246703 +n10338391 +n04585456 +n04158138 +n10500942 +n09850974 +n10791890 +n10020807 +n03315805 +n02752917 +n04033801 +n10492086 +n04427473 +n02940706 +n12110475 +n09832978 +n12515393 +n07800487 +n09848110 +n02659176 +n09967406 +n10536134 +n10760622 +n09736485 +n07830690 +n07835173 +n09814252 +n10311506 +n10341955 +n03869838 +n07760673 +n09970402 +n12526178 +n11687964 +n09968741 +n10719267 +n07851054 +n10116478 +n10599215 +n09951524 +n03855908 +n03997274 +n02986348 +n08599292 +n02474282 +n04155889 +n09983314 +n01987727 +n10280130 +n10404998 +n02294577 +n02998696 +n08586978 +n11652578 +n13867005 +n12663254 +n10524869 +n02287622 +n10220924 +n03279918 +n02626089 +n10291110 +n12820669 +n07861681 +n08643267 +n07720185 +n12555859 +n03225616 +n09769525 +n03295140 +n12489046 +n10615179 +n12150969 +n02888429 +n10753182 +n10267166 +n03675558 +n12693352 +n02378299 +n02788462 +n03622401 +n12236977 +n10730542 +n12758099 +n10502046 +n11937195 +n10366145 +n10307114 +n12984595 +n10128748 +n09362316 +n09789898 +n09654079 +n04260192 +n10114476 +n08623676 +n10331841 +n05265736 +n10269289 +n03090856 +n12764978 +n02825240 +n10358032 +n09825750 +n03062651 +n11196627 +n11825749 +n04148464 +n04439505 +n07572858 +n04561857 +n12904562 +n03643907 +n10723597 +n01492708 +n10071557 +n10140683 +n01739871 +n12984267 +n03072056 +n10772580 +n10462588 +n11936448 +n10494373 +n12845908 +n09793352 +n10717196 +n12577362 +n09779124 +n10663549 +n02286425 +n10380126 +n01890144 +n02751490 +n03361109 +n01781875 +n13128278 +n09994400 +n09883452 +n13881512 +n02833275 +n10362003 +n01376543 +n12366675 +n09984960 +n10173665 +n10673776 +n02057898 +n01934844 +n04057673 +n10018747 +n02916065 +n13024653 +n05539947 +n09648911 +n04150273 +n01393486 +n10411356 +n12232114 +n02436224 +n12757930 +n03095965 +n10555059 +n01577458 +n09666476 +n10598904 +n11656549 +n02591911 +n13092385 +n10506336 +n13103023 +n09658254 +n04095938 +n11936782 +n07824383 +n09781650 +n10240821 +n01780426 +n02850060 +n02863340 +n13914141 +n12138578 +n13034555 +n12291671 +n12133151 +n04515444 +n04591359 +n02589196 +n02689819 +n11740414 +n07610295 +n10246395 +n09921034 +n12447346 +n12641180 +n01419573 +n04242587 +n07760395 +n03399579 +n09866661 +n02549376 +n11861238 +n01588996 +n04319545 +n09789150 +n03288643 +n10312491 +n03353281 +n02345997 +n09711132 +n03043173 +n02558860 +n03703590 +n03188871 +n12589142 +n12113323 +n09987161 +n05242239 +n09686262 +n09780984 +n09668199 +n09716933 +n11675738 +n04459243 +n11833749 +n10646942 +n07760070 +n10286539 +n04469684 +n13030616 +n03939440 +n01725086 +n09967816 +n10500824 +n13026015 +n03983928 +n02936921 +n04115542 +n10245029 +n12105828 +n12452673 +n10498046 +n10737264 +n11766046 +n04079603 +n10072054 +n12569037 +n10153155 +n09867311 +n02806992 +n10258602 +n10164025 +n10520964 +n02258508 +n12199399 +n05266096 +n08496334 +n10351064 +n12441552 +n12878325 +n13102648 +n02980625 +n03462972 +n12395906 +n13022903 +n11895714 +n03324814 +n11318824 +n01728266 +n07883510 +n10731013 +n10181799 +n12142357 +n09671089 +n11531334 +n01718414 +n04573625 +n10390600 +n11553522 +n01314910 +n04227519 +n10514784 +n02944256 +n12103680 +n03081859 +n11655592 +n12569616 +n10700105 +n09755086 +n03865820 +n01456137 +n10442232 +n02900987 +n04491934 +n07849026 +n04519728 +n09986450 +n03305300 +n10186143 +n02879422 +n03018614 +n10747548 +n10562509 +n10068425 +n12593341 +n11937692 +n08679562 +n09613690 +n10646433 +n12251740 +n10994097 +n13048447 +n03848537 +n12153741 +n12614096 +n11654438 +n09985470 +n10562968 +n02923915 +n10740594 +n07802767 +n12514592 +n10335801 +n03878674 +n12586499 +n10255459 +n02413824 +n10312600 +n02616128 +n12644283 +n04238953 +n04526520 +n01898593 +n09737161 +n03372822 +n09781398 +n10339251 +n02502807 +n10198832 +n10679610 +n13136781 +n11974373 +n11680457 +n10083677 +n04037298 +n09945021 +n09987239 +n02708885 +n13107807 +n10130877 +n12507379 +n08651104 +n12116058 +n10135297 +n04269086 +n03858533 +n10477955 +n04394031 +n10442417 +n10074735 +n03618797 +n03460455 +n04374521 +n10756061 +n08517010 +n12923108 +n02362194 +n01704103 +n10062492 +n01394771 +n10473789 +n10330593 +n02748183 +n12562141 +n09745933 +n02505485 +n11922661 +n12018014 +n09866922 +n04067143 +n13161254 +n07813495 +n01374846 +n10213429 +n03253071 +n02546028 +n01642097 +n01475232 +n03212247 +n10155600 +n11689957 +n11738997 +n10525878 +n03301389 +n10589666 +n01908958 +n10289766 +n03900028 +n03437295 +n02987823 +n02739123 +n10505347 +n02546627 +n10381804 +n10132502 +n10336904 +n10189597 +n09786115 +n12875697 +n10761519 +n01470733 +n02875626 +n12111238 +n07862770 +n07856895 +n09996039 +n03368048 +n07913300 +n10062996 +n10555430 +n04302863 +n12758555 +n10740732 +n02385898 +n02385098 +n12162758 +n03887899 +n03976268 +n04234160 +n03641947 +n07857076 +n10578656 +n12135729 +n12675515 +n09032191 +n12969670 +n02600503 +n12518013 +n10227166 +n10121026 +n01801672 +n10661216 +n03244388 +n04147291 +n09664556 +n02539573 +n04480141 +n10601362 +n02613572 +n10537906 +n02613820 +n11656771 +n03841011 +n02845985 +n12534208 +n10241024 +n03645290 +n12743976 +n11922839 +n07709701 +n03066232 +n03467380 +n09266604 +n09663786 +n12775070 +n02427183 +n04083113 +n12896615 +n10501453 +n02345774 +n09965515 +n09704157 +n10666752 +n03846970 +n04167661 +n03991321 +n09556121 +n10686517 +n02586238 +n03594277 +n03591313 +n10391416 +n10756837 +n13163649 +n03971960 +n10245341 +n02577041 +n04481642 +n12373739 +n10214062 +n10091997 +n10275848 +n02090253 +n03514340 +n04593629 +n11795216 +n03126927 +n11871748 +n10272782 +n12056099 +n04484024 +n03101375 +n12255225 +n10724372 +n10531838 +n02354781 +n02389865 +n02853336 +n01477080 +n01779939 +n10776052 +n10724132 +n10284871 +n10554141 +n03898787 +n02366301 +n10721612 +n04421740 +n04256758 +n01445593 +n10103921 +n02729222 +n02530188 +n02387452 +n02601921 +n01711160 +n02474110 +n09869447 +n12789977 +n10158506 +n10396908 +n07839593 +n02662825 +n02473720 +n13034788 +n07752602 +n03762238 +n10262880 +n07770180 +n04030054 +n10151367 +n03525252 +n10252075 +n10747424 +n10191388 +n04130566 +n03951068 +n13239921 +n03733547 +n10358124 +n11549779 +n09203827 +n04043168 +n10359422 +n04286960 +n04237287 +n10130686 +n02338449 +n12912274 +n10586998 +n02812785 +n10364502 +n03955941 +n12324222 +n09743601 +n03766600 +n01427399 +n12968309 +n11776234 +n01501777 +n10051026 +n10397001 +n01516212 +n02596252 +n02225081 +n10479328 +n02109687 +n10181445 +n02248062 +n03802973 +n01639187 +n02142734 +n02342534 +n02410141 +n02743426 +n03950359 +n12253835 +n07805478 +n03706415 +n03578981 +n04560619 +n09761753 +n03524425 +n01962788 +n04350235 +n10686694 +n13139321 +n10195155 +n12335937 +n12758399 +n03805374 +n12895298 +n03800371 +n11972959 +n11530008 +n03178538 +n02217839 +n10591072 +n04033557 +n01880813 +n12292877 +n02430643 +n07599383 +n01954516 +n09894909 +n02474605 +n03576443 +n07595051 +n03367875 +n12945549 +n02360480 +n14583400 +n04208582 +n02405577 +n02550655 +n02513355 +n04381450 +n00444490 +n03567912 +n09937688 +n07932323 +n04029416 +n01913346 +n13237508 +n04437276 +n12938445 +n03042384 +n12543639 +n03194992 +n04094250 +n12045514 +n03825913 +n03504293 +n12758250 +n03547861 +n03649288 +n04572235 +n07569423 +n03534695 +n03253714 +n01501641 +n13906767 +n12578255 +n11749603 +n07742513 +n07609083 +n04214413 +n07595751 +n12013701 +n12592839 +n12949160 +n04093223 +n02983072 +n03510072 +n02966068 +n03867854 +n01747285 +n10691318 +n13091982 +n12574470 +n02255023 +n03449217 +n03153585 +n04006227 +n13140049 +n02965024 +n03805503 +n03911406 +n13120958 +n12203699 +n01456454 +n10397142 +n12920043 +n02412977 +n08674344 +n07801007 +n03037590 +n10361296 +n13133316 +n03483637 +n04435759 +n12983873 +n02627037 +n03783304 +n07725158 +n02921292 +n01788864 +n01705010 +n12616996 +n03903290 +n08662427 +n03667060 +n07856992 +n03252422 +n02449699 +n12137954 +n10024025 +n07891095 +n04337157 +n04368109 +n03015631 +n02363996 +n12824289 +n03206602 +n12799269 +n02333733 +n01793565 +n01721898 +n03178173 +n02844056 +n11688378 +n13889066 +n02637475 +n03750437 +n01403457 +n01717229 +n02677136 +n12512294 +n03736269 +n02838577 +n08661878 +n01993830 +n02777638 +n02900857 +n04023021 +n03843092 +n07770439 +n12928491 +n03697812 +n02639922 +n13139482 +n07771082 +n12487058 +n07774182 +n02122810 +n02856362 +n11686195 +n11687432 +n02853870 +n04239218 +n02665250 +n02938218 +n11746600 +n10183347 +n10681194 +n04164199 +n04407257 +n12549005 +n02331842 +n03862379 +n02863638 +n11962994 +n03091907 +n04177654 +n02252972 +n02403153 +n01376437 +n02848806 +n08579266 +n07616265 +n10331258 +n10765587 +n09433312 +n03412387 +n10178077 +n13123841 +n02532918 +n04144651 +n03296963 +n03450881 +n04348988 +n10425946 +n03257065 +n02354320 +n11689197 +n04084682 +n10140783 +n03637027 +n02346170 +n02559144 +n01705591 +n09400584 +n03840327 +n03918074 +n04053767 +n02406046 +n00288190 +n03160001 +n03366464 +n09249155 +n01324305 +n07556872 +n03381565 +n12705220 +n11874878 +n02632494 +n02502006 +n03146560 +n02179340 +n04312756 +n10162016 +n03800563 +n04140853 +n07933652 +n03075248 +n04421582 +n10652703 +n02218134 +n12233249 +n04578559 +n01781071 +n02615298 +n04436832 +n04054566 +n02608284 +n11674019 +n03505764 +n02662397 +n09422190 +n04382537 +n04355684 +n04383923 +n09888635 +n03783575 +n03228796 +n07772026 +n02381119 +n15060326 +n10586166 +n12647787 +n02458517 +n10281546 +n03498866 +n02485988 +n10121246 +n09391644 +n03103904 +n08676253 +n02203978 +n04092168 +n03213014 +n03138217 +n04135933 +n12612811 +n04478066 +n02157285 +n02543255 +n03863783 +n01502101 +n03930229 +n12439830 +n09425019 +n02618513 +n02910241 +n12261359 +n03648667 +n04365229 +n03461651 +n04388040 +n03295928 +n03581531 +n04203356 +n02622249 +n13142907 +n04497249 +n11678377 +n02366579 +n02931013 +n02837134 +n03132438 +n13092987 +n04196803 +n03056215 +n03255322 +n02130925 +n10291469 +n02971940 +n01718096 +n12510774 +n11766432 +n04271891 +n03366721 +n03154616 +n03694356 +n10478293 +n11763142 +n07763483 +n03037228 +n09201998 +n01517389 +n00443517 +n12693244 +n03580990 +n03519848 +n10238375 +n10783646 +n03564849 +n03975926 +n02473554 +n02450426 +n03464952 +n04411835 +n04573045 +n10505732 +n04337650 +n10621514 +n10334782 +n12434985 +n07769102 +n10594523 +n05475397 +n01875610 +n03299406 +n10507692 +n02593679 +n03317233 +n13239736 +n03550420 +n03247351 +n03819047 +n03633341 +n03154745 +n04073425 +n04532022 +n02910964 +n04301242 +n04378651 +n13098515 +n11775626 +n14603798 +n10263146 +n01886045 +n03761731 +n02224713 +n04591249 +n02144251 +n03849412 +n11548728 +n04051705 +n12298165 +n03150795 +n03989447 +n02826459 +n07602650 +n03155915 +n09891730 +n02067603 +n01523105 +n03618339 +n03897130 +n02711780 +n05285623 +n03533486 +n04085873 +n01923404 +n10139077 +n01709484 +n02183507 +n03216562 +n01971850 +n03136051 +n02948834 +n03589313 +n03665851 +n02937336 +n02035656 +n07769465 +n07849186 +n12585373 +n12280364 +n02846260 +n02511730 +n02614653 +n04193179 +n11718681 +n09467696 +n01522450 +n03040836 +n03162297 +n11896141 +n04000480 +n10350220 +n07746038 +n02124157 +n10655169 +n03476542 +n03895038 +n00443917 +n07757753 +n01726203 +n02987706 +n12750076 +n03012734 +n02941228 +n04194009 +n04501127 +n09794550 +n03510487 +n08589670 +n03166951 +n03673270 +n09792125 +n08492354 +n02396157 +n01628331 +n03993878 +n07833816 +n04958865 +n13650447 +n04339191 +n02826683 +n02893269 +n02810139 +n02626471 +n02589796 +n08677801 +n04325968 +n03275864 +n02622547 +n04406687 +n04097085 +n02998107 +n07831450 +n03658102 +n02575590 +n03523398 +n02412909 +n02953850 +n04337503 +n03510987 +n12664005 +n03710294 +n13138155 +n10110093 +n07831955 +n03932080 +n12971804 +n03943623 +n03726371 +n10531445 +n12984489 +n07835051 +n12097556 +n02685701 +n03038041 +n02451125 +n04594919 +n02372140 +n02665985 +n03496183 +n03961828 +n03802800 +n01713170 +n03602790 +n04974145 +n02780588 +n04031884 +n03588216 +n02614140 +n04578708 +n04501281 +n03166600 +n03992975 +n04206070 +n03227721 +n02582349 +n02664642 +n07805389 +n09226869 +n02459190 +n12216968 +n03628984 +n02524928 +n09209025 +n04078002 +n03167153 +n03562565 +n07599554 +n10252547 +n03279804 +n07692887 +n14909584 +n02529293 +n04444953 +n04156814 +n07616174 +n03415626 +n03331244 +n03868324 +n03644073 +n02818687 +n10085101 +n02953056 +n03202481 +n02118707 +n03591901 +n12602434 +n02943465 +n02818254 +n07922607 +n02597004 +n04212810 +n04056073 +n12327528 +n02207647 +n01792808 +n03002555 +n03951213 +n12242123 +n10062275 +n12325787 +n10048117 +n11937278 +n03624767 +n04039041 +n04059298 +n03707171 +n07758407 +n01333483 +n02219015 +n02436645 +n02478239 +n04457638 +n01781698 +n09474765 +n03686363 +n10769084 +n09456207 +n02385776 +n13555775 +n03962685 +n13129078 +n03463185 +n01429172 +n04243251 +n12177129 +n03143754 +n03958338 +n02791795 +n04560502 +n12776774 +n02745816 +n03009111 +n02976552 +n03008817 +n03211413 +n03537550 +n12200504 +n01909788 +n11790089 +n03480973 +n10507070 +n01707294 +n04374907 +n04281571 +n00006024 +n03823906 +n12603273 +n03503358 +n04027820 +n12645530 +n02535080 +n04143365 +n08385989 +n12661227 +n12814857 +n11871059 +n04268418 +n13128582 +n01928865 +n04359124 +n12670334 +n03610836 +n04543924 +n02252799 +n15102359 +n04437380 +n04316924 +n11872324 +n09330378 +n10122300 +n03784139 +n00443375 +n14993378 +n01721174 +n00004475 +n00006484 +n00007846 +n00015388 +n00017222 +n00021265 +n00021939 +n00288000 +n00433458 +n00433661 +n00433802 +n00439826 +n00440218 +n00440509 +n00440747 +n00441824 +n00442115 +n00442981 +n00443231 +n00444651 +n00445351 +n00445802 +n00447073 +n00447221 +n00447540 +n00448466 +n00448640 +n00448958 +n00449295 +n00449695 +n00450335 +n00450700 +n00451370 +n00451866 +n00452293 +n00453935 +n00454237 +n00454624 +n00463246 +n00464651 +n00464894 +n00467719 +n00467995 +n00468480 +n00469651 +n00471437 +n00471613 +n00479076 +n00480508 +n00480993 +n00482298 +n00523513 +n01035667 +n01316422 +n01316579 +n01316949 +n01317089 +n01317391 +n01317541 +n01319467 +n01320872 +n01321230 +n01321579 +n01321854 +n01322343 +n01322685 +n01322898 +n01323068 +n01326291 +n01329186 +n01338685 +n01339336 +n01340935 +n01342269 +n01358572 +n01367772 +n01375204 +n01376237 +n01380610 +n01384687 +n01385330 +n01387065 +n01389507 +n01390123 +n01392380 +n01395254 +n01397114 +n01402600 +n01407798 +n01421807 +n01438208 +n01439121 +n01439514 +n01439808 +n01441425 +n01444783 +n01445429 +n01446589 +n01446760 +n01448951 +n01450661 +n01454856 +n01455778 +n01458842 +n01459791 +n01461646 +n01466257 +n01467336 +n01468238 +n01468712 +n01471682 +n01473806 +n01474283 +n01477525 +n01478511 +n01480516 +n01480880 +n01481331 +n01482071 +n01482330 +n01483522 +n01484097 +n01488918 +n01491874 +n01492357 +n01493541 +n01494757 +n01494882 +n01495006 +n01495701 +n01497118 +n01498406 +n01498699 +n01498989 +n01500091 +n01501160 +n01503061 +n01514752 +n01515078 +n01517565 +n01524359 +n01525720 +n01527194 +n01527617 +n01528654 +n01529672 +n01533339 +n01534582 +n01534762 +n01537134 +n01538955 +n01539573 +n01540233 +n01541922 +n01542786 +n01544208 +n01546921 +n01547832 +n01548301 +n01549430 +n01550761 +n01553142 +n01555809 +n01557185 +n01560105 +n01560636 +n01563128 +n01563746 +n01564394 +n01567133 +n01568132 +n01569836 +n01570676 +n01571904 +n01572328 +n01573074 +n01574045 +n01574390 +n01575745 +n01576695 +n01577659 +n01578575 +n01579028 +n01580379 +n01580490 +n01580772 +n01580870 +n01581166 +n01581434 +n01581730 +n01582398 +n01582498 +n01582856 +n01584225 +n01585121 +n01587834 +n01588431 +n01589286 +n01591697 +n01592257 +n01592540 +n01594372 +n01595624 +n01597336 +n01598588 +n01598988 +n01600085 +n01600657 +n01602080 +n01602209 +n01602630 +n01603600 +n01604330 +n01605630 +n01608814 +n01609062 +n01609391 +n01609751 +n01610955 +n01611472 +n01612628 +n01613294 +n01613615 +n01615121 +n01616551 +n01616764 +n01617095 +n01617443 +n01617766 +n01618082 +n01618922 +n01619310 +n01619536 +n01619835 +n01620135 +n01620414 +n01620735 +n01621127 +n01622352 +n01623706 +n01627424 +n01629276 +n01630284 +n01631175 +n01632047 +n01637112 +n01637932 +n01639765 +n01640846 +n01645776 +n01649170 +n01650167 +n01651487 +n01653773 +n01661091 +n01661592 +n01661818 +n01662622 +n01662784 +n01663401 +n01664369 +n01665932 +n01667432 +n01668091 +n01669372 +n01670092 +n01672032 +n01674216 +n01674464 +n01674990 +n01676755 +n01680264 +n01680478 +n01681940 +n01684133 +n01685439 +n01686044 +n01686220 +n01686403 +n01686609 +n01686808 +n01687665 +n01688961 +n01689411 +n01691951 +n01692864 +n01693783 +n01694709 +n01696633 +n01697178 +n01698434 +n01699040 +n01701551 +n01702256 +n01703011 +n01703569 +n01705934 +n01708106 +n01708998 +n01712008 +n01712752 +n01717016 +n01719403 +n01722998 +n01724231 +n01726692 +n01727646 +n01730185 +n01730307 +n01730812 +n01730960 +n01731545 +n01732244 +n01733757 +n01734637 +n01734808 +n01735439 +n01735577 +n01735728 +n01737472 +n01737728 +n01737875 +n01738065 +n01738601 +n01739647 +n01740551 +n01741232 +n01741562 +n01741943 +n01743605 +n01745125 +n01745484 +n01746359 +n01747885 +n01749582 +n01749742 +n01751036 +n01752165 +n01753959 +n01754876 +n01755740 +n01767661 +n01769347 +n01770795 +n01771417 +n01772222 +n01775370 +n01776192 +n01776705 +n01777304 +n01777467 +n01777649 +n01777909 +n01778217 +n01778487 +n01778621 +n01778801 +n01779148 +n01779463 +n01779629 +n01780696 +n01782209 +n01785667 +n01789386 +n01789740 +n01791107 +n01791625 +n01792158 +n01792640 +n01794158 +n01795088 +n01795735 +n01795900 +n01796019 +n01796105 +n01796519 +n01796729 +n01798706 +n01798839 +n01798979 +n01799302 +n01800424 +n01801088 +n01801479 +n01802721 +n01803078 +n01804478 +n01804653 +n01804921 +n01805070 +n01805801 +n01806847 +n01807828 +n01808140 +n01808291 +n01808596 +n01809106 +n01810700 +n01811909 +n01812337 +n01813385 +n01814370 +n01814921 +n01815601 +n01816887 +n01819115 +n01820348 +n01820801 +n01821076 +n01821203 +n01822602 +n01823013 +n01824749 +n01825930 +n01826364 +n01827403 +n01829869 +n01831712 +n01832167 +n01834177 +n01834540 +n01835276 +n01838038 +n01838598 +n01839598 +n01841102 +n01843719 +n01844231 +n01844551 +n01844917 +n01845132 +n01845477 +n01846331 +n01848123 +n01848648 +n01849466 +n01850373 +n01851375 +n01852142 +n01852861 +n01853498 +n01854415 +n01856072 +n01856155 +n01856380 +n01856553 +n01856890 +n01857079 +n01857325 +n01857512 +n01857632 +n01857851 +n01858441 +n01859496 +n01860497 +n01861148 +n01861778 +n01871543 +n01871875 +n01874434 +n01874928 +n01876326 +n01877134 +n01878061 +n01878335 +n01878639 +n01878929 +n01879217 +n01879509 +n01880152 +n01881171 +n01883513 +n01883920 +n01886756 +n01887896 +n01888264 +n01889074 +n01889520 +n01890860 +n01891633 +n01892551 +n01894207 +n01905661 +n01906749 +n01907738 +n01909422 +n01911403 +n01911839 +n01912454 +n01914163 +n01914830 +n01915811 +n01916187 +n01916925 +n01918744 +n01922303 +n01925270 +n01925695 +n01926379 +n01926689 +n01927159 +n01927456 +n01927928 +n01928215 +n01930852 +n01931140 +n01931520 +n01931714 +n01932151 +n01932936 +n01933151 +n01933478 +n01933988 +n01934440 +n01935176 +n01936391 +n01937909 +n01940736 +n01941223 +n01942177 +n01942869 +n01943541 +n01944118 +n01944812 +n01944955 +n01945143 +n01945340 +n01945845 +n01946277 +n01948573 +n01951613 +n01953361 +n01955933 +n01956481 +n01958038 +n01959985 +n01960459 +n01963571 +n01964049 +n01964441 +n01965889 +n01967094 +n01968315 +n01969726 +n01971094 +n01971280 +n01974773 +n01975687 +n01976146 +n01976957 +n01978930 +n01981702 +n01982650 +n01983048 +n01985493 +n01985797 +n01986806 +n01987545 +n01988701 +n01989869 +n01990007 +n01991028 +n01991520 +n01992262 +n01992423 +n01992773 +n01996585 +n01998183 +n02000954 +n02002075 +n02005790 +n02006985 +n02007284 +n02008041 +n02008796 +n02010453 +n02011805 +n02011943 +n02012185 +n02013177 +n02014941 +n02015554 +n02016358 +n02016956 +n02018027 +n02019190 +n02019438 +n02019929 +n02021050 +n02021795 +n02022684 +n02023341 +n02025043 +n02026059 +n02028175 +n02030035 +n02030287 +n02030996 +n02031934 +n02033208 +n02033324 +n02033561 +n02034129 +n02034661 +n02036053 +n02037464 +n02039171 +n02040505 +n02041085 +n02041246 +n02043063 +n02044178 +n02044778 +n02045369 +n02046759 +n02047260 +n02047614 +n02048698 +n02049532 +n02050004 +n02051474 +n02052204 +n02052365 +n02053083 +n02054502 +n02055658 +n02055803 +n02057731 +n02058594 +n02058747 +n02059162 +n02060411 +n02060889 +n02062017 +n02062430 +n02062744 +n02063224 +n02064338 +n02066707 +n02068206 +n02068974 +n02069701 +n02070430 +n02073250 +n02075296 +n02075927 +n02076196 +n02076779 +n02077152 +n02077658 +n02078292 +n02078574 +n02078738 +n02079005 +n02079389 +n02081571 +n02083346 +n02083672 +n02084071 +n02084861 +n02085374 +n02086346 +n02086478 +n02087122 +n02087551 +n02088839 +n02089232 +n02089555 +n02090475 +n02090827 +n02092468 +n02093056 +n02094562 +n02094721 +n02095050 +n02095412 +n02095727 +n02096756 +n02097786 +n02098550 +n02099029 +n02099997 +n02100399 +n02101108 +n02101861 +n02102605 +n02103406 +n02103841 +n02104523 +n02104882 +n02106966 +n02107420 +n02108254 +n02108672 +n02109150 +n02109811 +n02110532 +n02111626 +n02112497 +n02112826 +n02113335 +n02114100 +n02115012 +n02115335 +n02117512 +n02117646 +n02117900 +n02118176 +n02118333 +n02119247 +n02119359 +n02120278 +n02120997 +n02121620 +n02121808 +n02122725 +n02123785 +n02124623 +n02127292 +n02127381 +n02127482 +n02127586 +n02127678 +n02127808 +n02128598 +n02128669 +n02129463 +n02129530 +n02129837 +n02129923 +n02130545 +n02131653 +n02132466 +n02132580 +n02132788 +n02133400 +n02134971 +n02135220 +n02137722 +n02137888 +n02138647 +n02138777 +n02139199 +n02139671 +n02141306 +n02141713 +n02144593 +n02145424 +n02148835 +n02149420 +n02150482 +n02152740 +n02152991 +n02153203 +n02153809 +n02156732 +n02159955 +n02164464 +n02165877 +n02166229 +n02166567 +n02166826 +n02167505 +n02167820 +n02167944 +n02168245 +n02169023 +n02169218 +n02169705 +n02169974 +n02170400 +n02170993 +n02171453 +n02171869 +n02172518 +n02172678 +n02172761 +n02172870 +n02174355 +n02176261 +n02178411 +n02178717 +n02179012 +n02180233 +n02181235 +n02181724 +n02182045 +n02182355 +n02182642 +n02182930 +n02183857 +n02186153 +n02188699 +n02189363 +n02190790 +n02191273 +n02191773 +n02191979 +n02192252 +n02192513 +n02192814 +n02193009 +n02193163 +n02194249 +n02194750 +n02195091 +n02195526 +n02195819 +n02196344 +n02198532 +n02199502 +n02200198 +n02202287 +n02204722 +n02206270 +n02207179 +n02207449 +n02208280 +n02208498 +n02208848 +n02208979 +n02209111 +n02209354 +n02209624 +n02209964 +n02210427 +n02210921 +n02211444 +n02211627 +n02211896 +n02212062 +n02212602 +n02212958 +n02213107 +n02215161 +n02215770 +n02217563 +n02218713 +n02220055 +n02220225 +n02220518 +n02220804 +n02221083 +n02221414 +n02221571 +n02221715 +n02221820 +n02222035 +n02222321 +n02222582 +n02223266 +n02223520 +n02226183 +n02226821 +n02226970 +n02227247 +n02227604 +n02227966 +n02228341 +n02228697 +n02229156 +n02229765 +n02230023 +n02230187 +n02230480 +n02230634 +n02231052 +n02231803 +n02233943 +n02234355 +n02234570 +n02234848 +n02235205 +n02236241 +n02236896 +n02237424 +n02237581 +n02238235 +n02238887 +n02239528 +n02241569 +n02241799 +n02243562 +n02244173 +n02246011 +n02246628 +n02247511 +n02248368 +n02248510 +n02248887 +n02249515 +n02250822 +n02251775 +n02252226 +n02253127 +n02253715 +n02254697 +n02257003 +n02257284 +n02257715 +n02259377 +n02260421 +n02260863 +n02261419 +n02262178 +n02263378 +n02264885 +n02265330 +n02268148 +n02269196 +n02269522 +n02270011 +n02270200 +n02271570 +n02271897 +n02272871 +n02274024 +n02274259 +n02274822 +n02278210 +n02278839 +n02279637 +n02280458 +n02281015 +n02281136 +n02281267 +n02282257 +n02282385 +n02282553 +n02282903 +n02283077 +n02283201 +n02283951 +n02285548 +n02287004 +n02287799 +n02288789 +n02291220 +n02291572 +n02291748 +n02293352 +n02293868 +n02295064 +n02295390 +n02295870 +n02298541 +n02300173 +n02301452 +n02302459 +n02302620 +n02305407 +n02306433 +n02307325 +n02308139 +n02308471 +n02309337 +n02310000 +n02310717 +n02311060 +n02312006 +n02312427 +n02313008 +n02316707 +n02318167 +n02319308 +n02319555 +n02319829 +n02320127 +n02322047 +n02323449 +n02323902 +n02324045 +n02325722 +n02325884 +n02326074 +n02326763 +n02326862 +n02327028 +n02327175 +n02327435 +n02327656 +n02327842 +n02328429 +n02329401 +n02330245 +n02331046 +n02331309 +n02332755 +n02333546 +n02334460 +n02335127 +n02336011 +n02336641 +n02338901 +n02339376 +n02339922 +n02343058 +n02343320 +n02343772 +n02344528 +n02345600 +n02346998 +n02347274 +n02347573 +n02347744 +n02348173 +n02348788 +n02350105 +n02350989 +n02351870 +n02352591 +n02353861 +n02355227 +n02355477 +n02358091 +n02359324 +n02360781 +n02361587 +n02361706 +n02361850 +n02363245 +n02363351 +n02364520 +n02369680 +n02370806 +n02372584 +n02373336 +n02374149 +n02374451 +n02376542 +n02376791 +n02376918 +n02377181 +n02377480 +n02377703 +n02378415 +n02380335 +n02380583 +n02380745 +n02381460 +n02382437 +n02382948 +n02384858 +n02386014 +n02386310 +n02386496 +n02388276 +n02389346 +n02389559 +n02390454 +n02390834 +n02391234 +n02391373 +n02391508 +n02391994 +n02393580 +n02394477 +n02395003 +n02395694 +n02395931 +n02397529 +n02399000 +n02401031 +n02402010 +n02402175 +n02402425 +n02403325 +n02403454 +n02404186 +n02404573 +n02406174 +n02407959 +n02408660 +n02408817 +n02409870 +n02410702 +n02410900 +n02411705 +n02412440 +n02413131 +n02414578 +n02415435 +n02416519 +n02417070 +n02417534 +n02418064 +n02419796 +n02423218 +n02423362 +n02423589 +n02424305 +n02424695 +n02426813 +n02427724 +n02428349 +n02430045 +n02430559 +n02431122 +n02432511 +n02433546 +n02433925 +n02435853 +n02437136 +n02437971 +n02438173 +n02438272 +n02439033 +n02441326 +n02442172 +n02442336 +n02442446 +n02442572 +n02442668 +n02443015 +n02443346 +n02443808 +n02443959 +n02444251 +n02445004 +n02445171 +n02446206 +n02446352 +n02446645 +n02447021 +n02447762 +n02448060 +n02448633 +n02448885 +n02450034 +n02453108 +n02453611 +n02454794 +n02455135 +n02455428 +n02455720 +n02456008 +n02456275 +n02456962 +n02460009 +n02469914 +n02470325 +n02470899 +n02471300 +n02471762 +n02472293 +n02473307 +n02474777 +n02476219 +n02480153 +n02481103 +n02481235 +n02481366 +n02481500 +n02482060 +n02482286 +n02482474 +n02482650 +n02483092 +n02484322 +n02484473 +n02485225 +n02485371 +n02485536 +n02485688 +n02486657 +n02486908 +n02487079 +n02487547 +n02487675 +n02487847 +n02488003 +n02488415 +n02488894 +n02489589 +n02490597 +n02490811 +n02491107 +n02491329 +n02491474 +n02496913 +n02501583 +n02502514 +n02503127 +n02503517 +n02504770 +n02507649 +n02508021 +n02512053 +n02512938 +n02513560 +n02515214 +n02516188 +n02517442 +n02517938 +n02519148 +n02519686 +n02521646 +n02522399 +n02524524 +n02526425 +n02526818 +n02527057 +n02527271 +n02527622 +n02528163 +n02529772 +n02530421 +n02532028 +n02532602 +n02533209 +n02533834 +n02534559 +n02534734 +n02535537 +n02537085 +n02537319 +n02538406 +n02538985 +n02540412 +n02541687 +n02546331 +n02548689 +n02549989 +n02550460 +n02552171 +n02554730 +n02556846 +n02557591 +n02557749 +n02559862 +n02561108 +n02561661 +n02562315 +n02562796 +n02563182 +n02564720 +n02565573 +n02566109 +n02568959 +n02569484 +n02570838 +n02572196 +n02574910 +n02576223 +n02576575 +n02578233 +n02579557 +n02580336 +n02581957 +n02583567 +n02585872 +n02586543 +n02588286 +n02590495 +n02590702 +n02590987 +n02594250 +n02596381 +n02597367 +n02599052 +n02599958 +n02600298 +n02601344 +n02602405 +n02603317 +n02604157 +n02605316 +n02606384 +n02607201 +n02607862 +n02613181 +n02614482 +n02614978 +n02619165 +n02621908 +n02623445 +n02624167 +n02625612 +n02626762 +n02627835 +n02630281 +n02630739 +n02631041 +n02636170 +n02636854 +n02638596 +n02640626 +n02640857 +n02642107 +n02642644 +n02643112 +n02644113 +n02646667 +n02648625 +n02650050 +n02650541 +n02652668 +n02653145 +n02653786 +n02654425 +n02655523 +n02656670 +n02657368 +n02658079 +n02661017 +n02662239 +n02663849 +n02667379 +n02667576 +n02668393 +n02670382 +n02671780 +n02672371 +n02676261 +n02676670 +n02677028 +n02677718 +n02678384 +n02680110 +n02680754 +n02682407 +n02682922 +n02683791 +n02686121 +n02686568 +n02687992 +n02688443 +n02689274 +n02691156 +n02692513 +n02693413 +n02693540 +n02694426 +n02694966 +n02695627 +n02697576 +n02698244 +n02700258 +n02700895 +n02702989 +n02703275 +n02705944 +n02708224 +n02708555 +n02709367 +n02709637 +n02710600 +n02712643 +n02713218 +n02715229 +n02715513 +n02715712 +n02716626 +n02726305 +n02726681 +n02727016 +n02727825 +n02728440 +n02729837 +n02729965 +n02730265 +n02732072 +n02732827 +n02733213 +n02733524 +n02735361 +n02735688 +n02736798 +n02737660 +n02738031 +n02738271 +n02738535 +n02739550 +n02739668 +n02740533 +n02740764 +n02741475 +n02742322 +n02742753 +n02745492 +n02746365 +n02749790 +n02750169 +n02751067 +n02751295 +n02752496 +n02753044 +n02753394 +n02754103 +n02755352 +n02755529 +n02756098 +n02756977 +n02757462 +n02757810 +n02758134 +n02758960 +n02759700 +n02759963 +n02760099 +n02760199 +n02760429 +n02760855 +n02761392 +n02763198 +n02763714 +n02764044 +n02764614 +n02764779 +n02765028 +n02766320 +n02766534 +n02766792 +n02767433 +n02769075 +n02770830 +n02772554 +n02772700 +n02773037 +n02773838 +n02774152 +n02774630 +n02775483 +n02776205 +n02777100 +n02777734 +n02777927 +n02778456 +n02778669 +n02781121 +n02781338 +n02781517 +n02783035 +n02783324 +n02784998 +n02785648 +n02786198 +n02786463 +n02788689 +n02789487 +n02790823 +n02792552 +n02792948 +n02793842 +n02794008 +n02794779 +n02794972 +n02795783 +n02796207 +n02796623 +n02796995 +n02797692 +n02797881 +n02799897 +n02801184 +n02801525 +n02801938 +n02802721 +n02803349 +n02803666 +n02804252 +n02806088 +n02806379 +n02806875 +n02810471 +n02811468 +n02811719 +n02812201 +n02813252 +n02813399 +n02815478 +n02815950 +n02816494 +n02817031 +n02817650 +n02817799 +n02818832 +n02819697 +n02820210 +n02821627 +n02821943 +n02822220 +n02822399 +n02822865 +n02823335 +n02824448 +n02826589 +n02826886 +n02827606 +n02828299 +n02828884 +n02831335 +n02831724 +n02831894 +n02833793 +n02834778 +n02835412 +n02836268 +n02839351 +n02839910 +n02840619 +n02841063 +n02841506 +n02842133 +n02843029 +n02843777 +n02844214 +n02844307 +n02844714 +n02847631 +n02848216 +n02848523 +n02849154 +n02850950 +n02851099 +n02853016 +n02854532 +n02854926 +n02855089 +n02855390 +n02855793 +n02857365 +n02857477 +n02857644 +n02858304 +n02860415 +n02861886 +n02862048 +n02862916 +n02863750 +n02865665 +n02865931 +n02866578 +n02867715 +n02869737 +n02871631 +n02871824 +n02871963 +n02872752 +n02873839 +n02874086 +n02875436 +n02876326 +n02876457 +n02876657 +n02877962 +n02879517 +n02880189 +n02880546 +n02880940 +n02881193 +n02881906 +n02882483 +n02882647 +n02883004 +n02883344 +n02884225 +n02885108 +n02885338 +n02886599 +n02887209 +n02887970 +n02888569 +n02889425 +n02891188 +n02891788 +n02892499 +n02893418 +n02896294 +n02896442 +n02897389 +n02897820 +n02898173 +n02898369 +n02898585 +n02898711 +n02900705 +n02901481 +n02901901 +n02902079 +n02902916 +n02903006 +n02904109 +n02904640 +n02908217 +n02909285 +n02911485 +n02912065 +n02913152 +n02914991 +n02916179 +n02916350 +n02917377 +n02917607 +n02919414 +n02920503 +n02921884 +n02923129 +n02924116 +n02925519 +n02928413 +n02928608 +n02929289 +n02929462 +n02929923 +n02931417 +n02931836 +n02932019 +n02932400 +n02933112 +n02933462 +n02933750 +n02933990 +n02934168 +n02935658 +n02935891 +n02936176 +n02936281 +n02936714 +n02938886 +n02939866 +n02941095 +n02942699 +n02943241 +n02943871 +n02944826 +n02945161 +n02946270 +n02946348 +n02946921 +n02947212 +n02947818 +n02948557 +n02949202 +n02950186 +n02950256 +n02950632 +n02950943 +n02951843 +n02952485 +n02952674 +n02953673 +n02954163 +n02954340 +n02954938 +n02955065 +n02955247 +n02955540 +n02955767 +n02957135 +n02957755 +n02958343 +n02959942 +n02961451 +n02961947 +n02963302 +n02963692 +n02963821 +n02965216 +n02965300 +n02965529 +n02966545 +n02966786 +n02966942 +n02967081 +n02967991 +n02968473 +n02969010 +n02969163 +n02969634 +n02969886 +n02970685 +n02970849 +n02971691 +n02972397 +n02973017 +n02974697 +n02975212 +n02976939 +n02978205 +n02978753 +n02979516 +n02982599 +n02983189 +n02983904 +n02984061 +n02984203 +n02984469 +n02984699 +n02985137 +n02985828 +n02986066 +n02987047 +n02987492 +n02989099 +n02991048 +n02991302 +n02992032 +n02993546 +n02995998 +n02997391 +n02997607 +n03001282 +n03001627 +n03002210 +n03003091 +n03004620 +n03005515 +n03007130 +n03007591 +n03010656 +n03010795 +n03011018 +n03011355 +n03012159 +n03013006 +n03014440 +n03015254 +n03017070 +n03018209 +n03020034 +n03020416 +n03020692 +n03024333 +n03025070 +n03025886 +n03027108 +n03027250 +n03029066 +n03031422 +n03032811 +n03033362 +n03033986 +n03034516 +n03034663 +n03035510 +n03036469 +n03036866 +n03037108 +n03037709 +n03038685 +n03039015 +n03039947 +n03040229 +n03040376 +n03043274 +n03043958 +n03045337 +n03046257 +n03048883 +n03049066 +n03049457 +n03050026 +n03050546 +n03050655 +n03050864 +n03051396 +n03051540 +n03052464 +n03052917 +n03053047 +n03054901 +n03055670 +n03056097 +n03056493 +n03057021 +n03057636 +n03058107 +n03058603 +n03058949 +n03059366 +n03061050 +n03063073 +n03063338 +n03064350 +n03064758 +n03065708 +n03066849 +n03070193 +n03071021 +n03071160 +n03072201 +n03073296 +n03073977 +n03074380 +n03074855 +n03075097 +n03075500 +n03075634 +n03076411 +n03076708 +n03078287 +n03078670 +n03079230 +n03079741 +n03080497 +n03080731 +n03081986 +n03082127 +n03082807 +n03082979 +n03084420 +n03085333 +n03085602 +n03085915 +n03086183 +n03086457 +n03086670 +n03087366 +n03087643 +n03087816 +n03088707 +n03091044 +n03091374 +n03092166 +n03092314 +n03093792 +n03094503 +n03096439 +n03096960 +n03098140 +n03098806 +n03099454 +n03099771 +n03099945 +n03100346 +n03100490 +n03101156 +n03101986 +n03102654 +n03102859 +n03106722 +n03106898 +n03107046 +n03109881 +n03111690 +n03112869 +n03113152 +n03113657 +n03113835 +n03114839 +n03115180 +n03116530 +n03116767 +n03117199 +n03118346 +n03118969 +n03119510 +n03120198 +n03120491 +n03121897 +n03122748 +n03123809 +n03125870 +n03128085 +n03128427 +n03128519 +n03129001 +n03130066 +n03130563 +n03131669 +n03132261 +n03134853 +n03135917 +n03136369 +n03137579 +n03139464 +n03140900 +n03141065 +n03141327 +n03143572 +n03145384 +n03145843 +n03146846 +n03147509 +n03148324 +n03148727 +n03149401 +n03151077 +n03153948 +n03154073 +n03154446 +n03155178 +n03156071 +n03156405 +n03157348 +n03158796 +n03158885 +n03161450 +n03162818 +n03163798 +n03163973 +n03164605 +n03164722 +n03164929 +n03165823 +n03167978 +n03168107 +n03168217 +n03170635 +n03171356 +n03172965 +n03173387 +n03175604 +n03176386 +n03177165 +n03177708 +n03178000 +n03178430 +n03180504 +n03180969 +n03181293 +n03182140 +n03182232 +n03182912 +n03183080 +n03186818 +n03187751 +n03189818 +n03193597 +n03196062 +n03196324 +n03196598 +n03199647 +n03199901 +n03200357 +n03200539 +n03200701 +n03200906 +n03201035 +n03201638 +n03201996 +n03202354 +n03202760 +n03203089 +n03203806 +n03204306 +n03204558 +n03204955 +n03205143 +n03205304 +n03206718 +n03206908 +n03207305 +n03208556 +n03210683 +n03211117 +n03211616 +n03212811 +n03214253 +n03214450 +n03215191 +n03219135 +n03220237 +n03221059 +n03221720 +n03222516 +n03223162 +n03223441 +n03224753 +n03224893 +n03225777 +n03226538 +n03228016 +n03228533 +n03228692 +n03229115 +n03229526 +n03231160 +n03231819 +n03235796 +n03235979 +n03236580 +n03236735 +n03237212 +n03237639 +n03239259 +n03239726 +n03240140 +n03241093 +n03241335 +n03241496 +n03242120 +n03242506 +n03242995 +n03243218 +n03245271 +n03245421 +n03246933 +n03250952 +n03251533 +n03251766 +n03252324 +n03252637 +n03254374 +n03255488 +n03255899 +n03256788 +n03256928 +n03257586 +n03258905 +n03259505 +n03261776 +n03262519 +n03262809 +n03262932 +n03265032 +n03266749 +n03267821 +n03269203 +n03269401 +n03270695 +n03271765 +n03271865 +n03272239 +n03272383 +n03273061 +n03273913 +n03274561 +n03274796 +n03276179 +n03277459 +n03277771 +n03278248 +n03279153 +n03279364 +n03279508 +n03280394 +n03280644 +n03281145 +n03282060 +n03282401 +n03284743 +n03284981 +n03285912 +n03286572 +n03287733 +n03288003 +n03289985 +n03291413 +n03292960 +n03294048 +n03294833 +n03296478 +n03297103 +n03297644 +n03297735 +n03298089 +n03302790 +n03303217 +n03303831 +n03304197 +n03304465 +n03305522 +n03307573 +n03308152 +n03309808 +n03314378 +n03314884 +n03315644 +n03316406 +n03318136 +n03319457 +n03320046 +n03322570 +n03322940 +n03323703 +n03324928 +n03325088 +n03326073 +n03327234 +n03327691 +n03327841 +n03329663 +n03330792 +n03334017 +n03334492 +n03334912 +n03335030 +n03335846 +n03336839 +n03337494 +n03338287 +n03338821 +n03339296 +n03339643 +n03340009 +n03340923 +n03342961 +n03343354 +n03343560 +n03343853 +n03346135 +n03346455 +n03349296 +n03350352 +n03350456 +n03350602 +n03351262 +n03351979 +n03352628 +n03354903 +n03355468 +n03356446 +n03357267 +n03357716 +n03359137 +n03359566 +n03360731 +n03361683 +n03362771 +n03363363 +n03364008 +n03364937 +n03365592 +n03365991 +n03366823 +n03373237 +n03374649 +n03374838 +n03375171 +n03376279 +n03378342 +n03379343 +n03379828 +n03379989 +n03380647 +n03380867 +n03381126 +n03381231 +n03381776 +n03382856 +n03382969 +n03383468 +n03384167 +n03384891 +n03385557 +n03386011 +n03387323 +n03387653 +n03390327 +n03391770 +n03393324 +n03394480 +n03394649 +n03396580 +n03396654 +n03397266 +n03397532 +n03398228 +n03399761 +n03399971 +n03402188 +n03402369 +n03404012 +n03404360 +n03404449 +n03405265 +n03405725 +n03407369 +n03409393 +n03409591 +n03410147 +n03411339 +n03412058 +n03412220 +n03412511 +n03412906 +n03413264 +n03413428 +n03413828 +n03414162 +n03415252 +n03416489 +n03416775 +n03417345 +n03418158 +n03418242 +n03419014 +n03422072 +n03422589 +n03423719 +n03424630 +n03427296 +n03428090 +n03428349 +n03429003 +n03429288 +n03429914 +n03430091 +n03430313 +n03430551 +n03430959 +n03431243 +n03431745 +n03433637 +n03433877 +n03434285 +n03434830 +n03435593 +n03437941 +n03438257 +n03439814 +n03441112 +n03442288 +n03442756 +n03446070 +n03446832 +n03448031 +n03448956 +n03449564 +n03449858 +n03450516 +n03452267 +n03452449 +n03453320 +n03454110 +n03454211 +n03454707 +n03455355 +n03456548 +n03456665 +n03457008 +n03457686 +n03458271 +n03459914 +n03461882 +n03465500 +n03465818 +n03466162 +n03466839 +n03467517 +n03467796 +n03467984 +n03468696 +n03469493 +n03470387 +n03470629 +n03470948 +n03472232 +n03472535 +n03472937 +n03473817 +n03473966 +n03475823 +n03476083 +n03476313 +n03477773 +n03477902 +n03478756 +n03478907 +n03481521 +n03482523 +n03483230 +n03483531 +n03484083 +n03484931 +n03487331 +n03487444 +n03487774 +n03488188 +n03488603 +n03489162 +n03490324 +n03490449 +n03490884 +n03491988 +n03496296 +n03496612 +n03497100 +n03497657 +n03498536 +n03499468 +n03500295 +n03501152 +n03501288 +n03501614 +n03502331 +n03502509 +n03502777 +n03503718 +n03503997 +n03505383 +n03505504 +n03506370 +n03507963 +n03508101 +n03509394 +n03509843 +n03510583 +n03510866 +n03511175 +n03512147 +n03512911 +n03513137 +n03513376 +n03515338 +n03517899 +n03517982 +n03518631 +n03519674 +n03521076 +n03521544 +n03522634 +n03524574 +n03524976 +n03525074 +n03525454 +n03525827 +n03528263 +n03529444 +n03531281 +n03531447 +n03531546 +n03532342 +n03534776 +n03535024 +n03536761 +n03537412 +n03538037 +n03538300 +n03538634 +n03538957 +n03540267 +n03540595 +n03541091 +n03541696 +n03541923 +n03542333 +n03542860 +n03543603 +n03544360 +n03545150 +n03546340 +n03547054 +n03547530 +n03548930 +n03550153 +n03550289 +n03551084 +n03551790 +n03552449 +n03552749 +n03553486 +n03554460 +n03555426 +n03555662 +n03557590 +n03558176 +n03558404 +n03558739 +n03561169 +n03563200 +n03563710 +n03563967 +n03565288 +n03565565 +n03566329 +n03568117 +n03568818 +n03571942 +n03572205 +n03574555 +n03574816 +n03575958 +n03576215 +n03577672 +n03577818 +n03578055 +n03578251 +n03578656 +n03579538 +n03579982 +n03583621 +n03584400 +n03585073 +n03588951 +n03589513 +n03589791 +n03590306 +n03590932 +n03592245 +n03592773 +n03593526 +n03595409 +n03595860 +n03596285 +n03597317 +n03598151 +n03598299 +n03598646 +n03600977 +n03601638 +n03601840 +n03602081 +n03603722 +n03604629 +n03604843 +n03605722 +n03605915 +n03606465 +n03609235 +n03609542 +n03610418 +n03610992 +n03612814 +n03613294 +n03613592 +n03614007 +n03614532 +n03615563 +n03617095 +n03617594 +n03618546 +n03618982 +n03619396 +n03619650 +n03619793 +n03619890 +n03620052 +n03621049 +n03621694 +n03622931 +n03623556 +n03624134 +n03625355 +n03626115 +n03631177 +n03631811 +n03632852 +n03633886 +n03635032 +n03635668 +n03635932 +n03636248 +n03636649 +n03638883 +n03639675 +n03640988 +n03642444 +n03646296 +n03646916 +n03647520 +n03651388 +n03653220 +n03653454 +n03654576 +n03655072 +n03656484 +n03657239 +n03658858 +n03659292 +n03660124 +n03661340 +n03662719 +n03662887 +n03663531 +n03664675 +n03664943 +n03665366 +n03666362 +n03666917 +n03667235 +n03667829 +n03671914 +n03672827 +n03673450 +n03673767 +n03676759 +n03677766 +n03679384 +n03679712 +n03681477 +n03682487 +n03684823 +n03685307 +n03685820 +n03686130 +n03686470 +n03687928 +n03688943 +n03689157 +n03689570 +n03690851 +n03691817 +n03692379 +n03693293 +n03697552 +n03698604 +n03699280 +n03699975 +n03700963 +n03701191 +n03701391 +n03701640 +n03701790 +n03702248 +n03704834 +n03705379 +n03706653 +n03707597 +n03708036 +n03709206 +n03709363 +n03709545 +n03710528 +n03711711 +n03711999 +n03712887 +n03713069 +n03714235 +n03715386 +n03715669 +n03715892 +n03716966 +n03717131 +n03718212 +n03718335 +n03718699 +n03718789 +n03719053 +n03721590 +n03722007 +n03722288 +n03724176 +n03725035 +n03725717 +n03726516 +n03726760 +n03726993 +n03727837 +n03727946 +n03728437 +n03728982 +n03729647 +n03729951 +n03730153 +n03730788 +n03731695 +n03733644 +n03733925 +n03735637 +n03736970 +n03738241 +n03738472 +n03739518 +n03739693 +n03743902 +n03744276 +n03744684 +n03744840 +n03745571 +n03746330 +n03748162 +n03749504 +n03749807 +n03750206 +n03751065 +n03752185 +n03752922 +n03753077 +n03753514 +n03758894 +n03759432 +n03760671 +n03762982 +n03763727 +n03764276 +n03765561 +n03765934 +n03766322 +n03768132 +n03769722 +n03770954 +n03772077 +n03772674 +n03773035 +n03775199 +n03775847 +n03779000 +n03779370 +n03780047 +n03781787 +n03782190 +n03785499 +n03787523 +n03789171 +n03789400 +n03789946 +n03790230 +n03790512 +n03790755 +n03791235 +n03792048 +n03792526 +n03793850 +n03795976 +n03796181 +n03797390 +n03798982 +n03799113 +n03800485 +n03800772 +n03800933 +n03802007 +n03802228 +n03802393 +n03803116 +n03809312 +n03811295 +n03811444 +n03811847 +n03811965 +n03812382 +n03812924 +n03813176 +n03813946 +n03815278 +n03815482 +n03815615 +n03816005 +n03816136 +n03816849 +n03817647 +n03819595 +n03819994 +n03820154 +n03820318 +n03820728 +n03820950 +n03824197 +n03825080 +n03827536 +n03828020 +n03829340 +n03831757 +n03834040 +n03834604 +n03836062 +n03837422 +n03838748 +n03839172 +n03839276 +n03839795 +n03841666 +n03842156 +n03844045 +n03844233 +n03845190 +n03846234 +n03846772 +n03847471 +n03847823 +n03848168 +n03848348 +n03849275 +n03850613 +n03851341 +n03851787 +n03852280 +n03852688 +n03854815 +n03859280 +n03859495 +n03859958 +n03861430 +n03861842 +n03862676 +n03863923 +n03864139 +n03864356 +n03864692 +n03865371 +n03865949 +n03868406 +n03871083 +n03871524 +n03871724 +n03873848 +n03874138 +n03874823 +n03875218 +n03880129 +n03880323 +n03880531 +n03883054 +n03883773 +n03883944 +n03884639 +n03885535 +n03885669 +n03886053 +n03886641 +n03887185 +n03888022 +n03889503 +n03889726 +n03891051 +n03892557 +n03894051 +n03894379 +n03896103 +n03896233 +n03896419 +n03896628 +n03896984 +n03897943 +n03898271 +n03898633 +n03899612 +n03899933 +n03901338 +n03903133 +n03903424 +n03904060 +n03904183 +n03904433 +n03905540 +n03906997 +n03907654 +n03908204 +n03909160 +n03909406 +n03915118 +n03915437 +n03916470 +n03916720 +n03917327 +n03918480 +n03920737 +n03923564 +n03923692 +n03924069 +n03926148 +n03926412 +n03926876 +n03927792 +n03928116 +n03929091 +n03929202 +n03929443 +n03930515 +n03932670 +n03936269 +n03938522 +n03939677 +n03940256 +n03941684 +n03943920 +n03945615 +n03947111 +n03947466 +n03948459 +n03951971 +n03953020 +n03953416 +n03955809 +n03956785 +n03956922 +n03957315 +n03957762 +n03958630 +n03958752 +n03959014 +n03959701 +n03961939 +n03962525 +n03962932 +n03963028 +n03965907 +n03966325 +n03966751 +n03966976 +n03967942 +n03968293 +n03971321 +n03972524 +n03973520 +n03973628 +n03975035 +n03979377 +n03979492 +n03980026 +n03981340 +n03982232 +n03982895 +n03984234 +n03984381 +n03985232 +n03986704 +n03988170 +n03989665 +n03990474 +n03991443 +n03992325 +n03992703 +n03993180 +n03993403 +n03994008 +n03994757 +n03995018 +n03995856 +n03996145 +n03996416 +n03997484 +n03999992 +n04000311 +n04001397 +n04001499 +n04001845 +n04004210 +n04004475 +n04005912 +n04007664 +n04010057 +n04010779 +n04010927 +n04011827 +n04012084 +n04013729 +n04014297 +n04015204 +n04016576 +n04016684 +n04018399 +n04018667 +n04019101 +n04019696 +n04020087 +n04020298 +n04020912 +n04021028 +n04021362 +n04021798 +n04022332 +n04022708 +n04023249 +n04024274 +n04026053 +n04026918 +n04027023 +n04027706 +n04028315 +n04029734 +n04030274 +n04036303 +n04037964 +n04038440 +n04038727 +n04039848 +n04042358 +n04042632 +n04042795 +n04042985 +n04043733 +n04044307 +n04044498 +n04045085 +n04045397 +n04046590 +n04046974 +n04047401 +n04049405 +n04050066 +n04051549 +n04051825 +n04052757 +n04056932 +n04057047 +n04057435 +n04057846 +n04057981 +n04058096 +n04058239 +n04059947 +n04060647 +n04060904 +n04061793 +n04061969 +n04062644 +n04063373 +n04063868 +n04064401 +n04065464 +n04065789 +n04067231 +n04067353 +n04067921 +n04068441 +n04068601 +n04069276 +n04069777 +n04070207 +n04070964 +n04071102 +n04071263 +n04071393 +n04072193 +n04072551 +n04073948 +n04075468 +n04075916 +n04076284 +n04077430 +n04077734 +n04078574 +n04079106 +n04079244 +n04079933 +n04080454 +n04080833 +n04081844 +n04083649 +n04086794 +n04087126 +n04087709 +n04088696 +n04088797 +n04089666 +n04089976 +n04090548 +n04091097 +n04093625 +n04095210 +n04096066 +n04097622 +n04097866 +n04099175 +n04099429 +n04100174 +n04101497 +n04101701 +n04102037 +n04102285 +n04102406 +n04102962 +n04104147 +n04104500 +n04105068 +n04105438 +n04105893 +n04107984 +n04108268 +n04110068 +n04110654 +n04110955 +n04111190 +n04111414 +n04111668 +n04113765 +n04114996 +n04115256 +n04115996 +n04116389 +n04118021 +n04121228 +n04122349 +n04122492 +n04122825 +n04123123 +n04123567 +n04123740 +n04125116 +n04125853 +n04126541 +n04126659 +n04126980 +n04127904 +n04128499 +n04128837 +n04131929 +n04134632 +n04136510 +n04137444 +n04137897 +n04138977 +n04139859 +n04140064 +n04140631 +n04141838 +n04143897 +n04146050 +n04147495 +n04148054 +n04149083 +n04151108 +n04151581 +n04151940 +n04152387 +n04154753 +n04156297 +n04156411 +n04157320 +n04158807 +n04158956 +n04160372 +n04160586 +n04161358 +n04161981 +n04164757 +n04164868 +n04166111 +n04167489 +n04169437 +n04170037 +n04171459 +n04171629 +n04171831 +n04174101 +n04174500 +n04176068 +n04176190 +n04176528 +n04177329 +n04177545 +n04180063 +n04180888 +n04181228 +n04181718 +n04182322 +n04183217 +n04183329 +n04184435 +n04184600 +n04185071 +n04186051 +n04186268 +n04186455 +n04186624 +n04186848 +n04187061 +n04187547 +n04187885 +n04189092 +n04190052 +n04190464 +n04190747 +n04190997 +n04191150 +n04191595 +n04191943 +n04192238 +n04192858 +n04194289 +n04196080 +n04197391 +n04198015 +n04198797 +n04199027 +n04201733 +n04202417 +n04205318 +n04206356 +n04207763 +n04210390 +n04211356 +n04211970 +n04215910 +n04216634 +n04216860 +n04216963 +n04217718 +n04217882 +n04219424 +n04221823 +n04222210 +n04222470 +n04222847 +n04225031 +n04225222 +n04225729 +n04226464 +n04226537 +n04227900 +n04229007 +n04229107 +n04229480 +n04230603 +n04230808 +n04231693 +n04232153 +n04233832 +n04234455 +n04235291 +n04235771 +n04236001 +n04236377 +n04236702 +n04238617 +n04241042 +n04241394 +n04242408 +n04243003 +n04243941 +n04244997 +n04245847 +n04246855 +n04247630 +n04247736 +n04248507 +n04249415 +n04250224 +n04250599 +n04253931 +n04255499 +n04256520 +n04260589 +n04261116 +n04262678 +n04263336 +n04263760 +n04264233 +n04264914 +n04266486 +n04267577 +n04269944 +n04270891 +n04271148 +n04272054 +n04272782 +n04273064 +n04273796 +n04275283 +n04275661 +n04275904 +n04278353 +n04279172 +n04279987 +n04280259 +n04280970 +n04283585 +n04283905 +n04284002 +n04285146 +n04285622 +n04285803 +n04286128 +n04288272 +n04288533 +n04288673 +n04289449 +n04291242 +n04291759 +n04292414 +n04292572 +n04293119 +n04293744 +n04294212 +n04294426 +n04295081 +n04295881 +n04299215 +n04300358 +n04301000 +n04301474 +n04303258 +n04304375 +n04305471 +n04306080 +n04306847 +n04307419 +n04307878 +n04308084 +n04308273 +n04308397 +n04308583 +n04308807 +n04309348 +n04309833 +n04310721 +n04311595 +n04312154 +n04312432 +n04313220 +n04314914 +n04315828 +n04315948 +n04317420 +n04318131 +n04318982 +n04319937 +n04320405 +n04322026 +n04322692 +n04322801 +n04323819 +n04326799 +n04326896 +n04328054 +n04328329 +n04328946 +n04329477 +n04330340 +n04330669 +n04330998 +n04331277 +n04332987 +n04333129 +n04338517 +n04339638 +n04340750 +n04340935 +n04341133 +n04341414 +n04341686 +n04346679 +n04347519 +n04348184 +n04348359 +n04349401 +n04350104 +n04350458 +n04354589 +n04356595 +n04358707 +n04358874 +n04359335 +n04359589 +n04360501 +n04360798 +n04361095 +n04361260 +n04362821 +n04363210 +n04363874 +n04364545 +n04364827 +n04364994 +n04365328 +n04365484 +n04365751 +n04368695 +n04370048 +n04371563 +n04373894 +n04375775 +n04377057 +n04378956 +n04379243 +n04379964 +n04380346 +n04381994 +n04382334 +n04382880 +n04383130 +n04383301 +n04386664 +n04387201 +n04387400 +n04388162 +n04388743 +n04389521 +n04390873 +n04391838 +n04392526 +n04393095 +n04394261 +n04395875 +n04397168 +n04397261 +n04397645 +n04398497 +n04398688 +n04398834 +n04399046 +n04400289 +n04401088 +n04402057 +n04402580 +n04402746 +n04402984 +n04403638 +n04404817 +n04404997 +n04405540 +n04405762 +n04407435 +n04407686 +n04409128 +n04409806 +n04410086 +n04410365 +n04410485 +n04411264 +n04411966 +n04413151 +n04413419 +n04415663 +n04416901 +n04417180 +n04417361 +n04417809 +n04419073 +n04421872 +n04422875 +n04427715 +n04428008 +n04431436 +n04431745 +n04434932 +n04435180 +n04436185 +n04436401 +n04436542 +n04437670 +n04437953 +n04438304 +n04438643 +n04440963 +n04441662 +n04444749 +n04445040 +n04445952 +n04446276 +n04447276 +n04447443 +n04448070 +n04448361 +n04450243 +n04450640 +n04450749 +n04451818 +n04452615 +n04452848 +n04453156 +n04453666 +n04453910 +n04454654 +n04455250 +n04455652 +n04456472 +n04457326 +n04458843 +n04459362 +n04459610 +n04460130 +n04462011 +n04463679 +n04464852 +n04467099 +n04467307 +n04468005 +n04469251 +n04470741 +n04471315 +n04471632 +n04472243 +n04472726 +n04473884 +n04474466 +n04475411 +n04475631 +n04477548 +n04478512 +n04478657 +n04480527 +n04481524 +n04487724 +n04488427 +n04489008 +n04489817 +n04490091 +n04491769 +n04493109 +n04494204 +n04495450 +n04497442 +n04497570 +n04498523 +n04499446 +n04499554 +n04500060 +n04501837 +n04502197 +n04502502 +n04502670 +n04502851 +n04504141 +n04504770 +n04505036 +n04506994 +n04507453 +n04508163 +n04508489 +n04508949 +n04509260 +n04509592 +n04511002 +n04514241 +n04516116 +n04516214 +n04516672 +n04518132 +n04519153 +n04520170 +n04520382 +n04521987 +n04524313 +n04527648 +n04529681 +n04530566 +n04531098 +n04531873 +n04533042 +n04533199 +n04533700 +n04534127 +n04534895 +n04536153 +n04538552 +n04539203 +n04540761 +n04541320 +n04543158 +n04544450 +n04546194 +n04546855 +n04547592 +n04549122 +n04549919 +n04551055 +n04552696 +n04553389 +n04554871 +n04555600 +n04555897 +n04556948 +n04557308 +n04557751 +n04558059 +n04558804 +n04559023 +n04559730 +n04562262 +n04563204 +n04565375 +n04566257 +n04567098 +n04568069 +n04568557 +n04569520 +n04569822 +n04570958 +n04571292 +n04571566 +n04571958 +n04572935 +n04574471 +n04574999 +n04576002 +n04576211 +n04576971 +n04577426 +n04577769 +n04578801 +n04579230 +n04580493 +n04581595 +n04582349 +n04583620 +n04585745 +n04585980 +n04586932 +n04587648 +n04588739 +n04589190 +n04589434 +n04591056 +n04591887 +n04592005 +n04592099 +n04594218 +n04594489 +n04595285 +n04595855 +n04596852 +n04597066 +n04597804 +n04598136 +n04598582 +n04599124 +n04600312 +n04600486 +n04600912 +n04603729 +n04603872 +n04605726 +n04606574 +n04608329 +n04608567 +n04609531 +n04609651 +n04610176 +n04610503 +n04610676 +n04611916 +n04613015 +n04615226 +n04615644 +n04950713 +n04951373 +n04958634 +n04959672 +n04960277 +n04961691 +n04963740 +n04965179 +n04965661 +n04967191 +n04968895 +n04970059 +n04970631 +n04970916 +n04972801 +n04973386 +n04976952 +n05238282 +n05241218 +n05242070 +n05244934 +n05266879 +n05399034 +n05447757 +n05449959 +n05453657 +n05467758 +n05586759 +n06254669 +n06262567 +n06263369 +n06263609 +n06263762 +n06266417 +n06266710 +n06267145 +n06271778 +n06272290 +n06272803 +n06274092 +n06275353 +n06276697 +n06277280 +n06281040 +n06359467 +n06359657 +n06418693 +n06591815 +n06592078 +n06595351 +n06613686 +n06793231 +n07556637 +n07556970 +n07557165 +n07557434 +n07560652 +n07561112 +n07562495 +n07563800 +n07564629 +n07564971 +n07565725 +n07565945 +n07566340 +n07566863 +n07567390 +n07567707 +n07568818 +n07569106 +n07569543 +n07570720 +n07572353 +n07572957 +n07573103 +n07573696 +n07574602 +n07575076 +n07575726 +n07575984 +n07576182 +n07576438 +n07576577 +n07577374 +n07579575 +n07580053 +n07580359 +n07580470 +n07581346 +n07581775 +n07582277 +n07582441 +n07582609 +n07583197 +n07584228 +n07584593 +n07585208 +n07587441 +n07587700 +n07588947 +n07590320 +n07591473 +n07592094 +n07592656 +n07593774 +n07595914 +n07596046 +n07596452 +n07596684 +n07597145 +n07597365 +n07598734 +n07599468 +n07599783 +n07599998 +n07600506 +n07601407 +n07605474 +n07605944 +n07606278 +n07606764 +n07607707 +n07609407 +n07609840 +n07611148 +n07611358 +n07611839 +n07611991 +n07612367 +n07612632 +n07612996 +n07613671 +n07614198 +n07614825 +n07615052 +n07615190 +n07615460 +n07615569 +n07615671 +n07616590 +n07617188 +n07619004 +n07623136 +n07624466 +n07627931 +n07628068 +n07641928 +n07642471 +n07642933 +n07643306 +n07643474 +n07643764 +n07643981 +n07644244 +n07663899 +n07678729 +n07679356 +n07680517 +n07680932 +n07681926 +n07682316 +n07682624 +n07683786 +n07684600 +n07685730 +n07686873 +n07687211 +n07687469 +n07687789 +n07689003 +n07690273 +n07690892 +n07692405 +n07692614 +n07693889 +n07693972 +n07694403 +n07695878 +n07695965 +n07697100 +n07704054 +n07705931 +n07707451 +n07708512 +n07708798 +n07709333 +n07710007 +n07710283 +n07710616 +n07710952 +n07712063 +n07712382 +n07712748 +n07712856 +n07713395 +n07713895 +n07714078 +n07714802 +n07714895 +n07715561 +n07715721 +n07716034 +n07717070 +n07717858 +n07718671 +n07719437 +n07719839 +n07720442 +n07720615 +n07721325 +n07721456 +n07721678 +n07722217 +n07722763 +n07723330 +n07723559 +n07723753 +n07724943 +n07725376 +n07725531 +n07726796 +n07727578 +n07727868 +n07728804 +n07729000 +n07729485 +n07730406 +n07730855 +n07731122 +n07731587 +n07731952 +n07732302 +n07732747 +n07734017 +n07734292 +n07735052 +n07735803 +n07737081 +n07739125 +n07739506 +n07740220 +n07740954 +n07741461 +n07742012 +n07742704 +n07744246 +n07747055 +n07747811 +n07747951 +n07748753 +n07748912 +n07749095 +n07749192 +n07749312 +n07749731 +n07750586 +n07751451 +n07752377 +n07752664 +n07753743 +n07755089 +n07755411 +n07755707 +n07756096 +n07757132 +n07757312 +n07757602 +n07757990 +n07758680 +n07758950 +n07759424 +n07759691 +n07759816 +n07760501 +n07761141 +n07761309 +n07761611 +n07761777 +n07761954 +n07767344 +n07767847 +n07770571 +n07771212 +n07800091 +n07800740 +n07801508 +n07802152 +n07802417 +n07803093 +n07803545 +n07804323 +n07805254 +n07805594 +n07805731 +n07806221 +n07806633 +n07807317 +n07807710 +n07807922 +n07809096 +n07809368 +n07810907 +n07811416 +n07812184 +n07814203 +n07815588 +n07818277 +n07819480 +n07820497 +n07820814 +n07823951 +n07824702 +n07824988 +n07825717 +n07828987 +n07829412 +n07830593 +n07832902 +n07834507 +n07836731 +n07837002 +n07837362 +n07838233 +n07841495 +n07841639 +n07841907 +n07842753 +n07842972 +n07843464 +n07843775 +n07844042 +n07844604 +n07846143 +n07847198 +n07848338 +n07848771 +n07849336 +n07850083 +n07850329 +n07851298 +n07852045 +n07852919 +n07854813 +n07856270 +n07857959 +n07858595 +n07859284 +n07859583 +n07860805 +n07861158 +n07861813 +n07863374 +n07864638 +n07865105 +n07867421 +n07867883 +n07869391 +n07869775 +n07870313 +n07871436 +n07873464 +n07874063 +n07874159 +n07874259 +n07874343 +n07874441 +n07874780 +n07875693 +n07875835 +n07876281 +n07880751 +n07881117 +n07881205 +n07881404 +n07881800 +n07882497 +n07882886 +n07883031 +n07883251 +n07883384 +n07884567 +n07886572 +n07886849 +n07887634 +n07888465 +n07888909 +n07889510 +n07890352 +n07890750 +n07891726 +n07892813 +n07893528 +n07893891 +n07894102 +n07894298 +n07894965 +n07895237 +n07895435 +n07895595 +n07895710 +n07895839 +n07896287 +n07897200 +n07897865 +n07898117 +n07898333 +n07898745 +n07899108 +n07900406 +n07900616 +n07901587 +n07903208 +n07904395 +n07905038 +n07906284 +n07906877 +n07907161 +n07907548 +n07907943 +n07909129 +n07909811 +n07911371 +n07911677 +n07912211 +n07913393 +n07914413 +n07915618 +n07916041 +n07917618 +n07918028 +n07920222 +n07921455 +n07921948 +n07923748 +n07924033 +n07924560 +n07924834 +n07925966 +n07926920 +n07927197 +n07927931 +n07929519 +n07930554 +n07931001 +n07931096 +n07932614 +n07932841 +n07933274 +n07933891 +n07934032 +n07934530 +n07935152 +n07935504 +n07936263 +n07936745 +n07938149 +n07951464 +n08554440 +n08558963 +n08596076 +n08598301 +n08616050 +n08640531 +n08659446 +n09191635 +n09206896 +n09206985 +n09210862 +n09213434 +n09213565 +n09214060 +n09214916 +n09215437 +n09217230 +n09230041 +n09233446 +n09238926 +n09255070 +n09259025 +n09259219 +n09262690 +n09265620 +n09269882 +n09270735 +n09287968 +n09289331 +n09289596 +n09290444 +n09295946 +n09300306 +n09302616 +n09303008 +n09303528 +n09304750 +n09305031 +n09308572 +n09309292 +n09315159 +n09326662 +n09335693 +n09335809 +n09336555 +n09337253 +n09344198 +n09352849 +n09359803 +n09362945 +n09366017 +n09366317 +n09375606 +n09376526 +n09381242 +n09393605 +n09396465 +n09398677 +n09405787 +n09406793 +n09409512 +n09409752 +n09410224 +n09416076 +n09421799 +n09428628 +n09432990 +n09433442 +n09437454 +n09439213 +n09443641 +n09453008 +n09458269 +n09472413 +n09474010 +n09505153 +n09606009 +n09606527 +n09608709 +n09610405 +n09613191 +n09615336 +n09616922 +n09619168 +n09619452 +n09620078 +n09620794 +n09622049 +n09622302 +n09624168 +n09624559 +n09625401 +n09626238 +n09627906 +n09629246 +n09629752 +n09631129 +n09632274 +n09632518 +n09633969 +n09636339 +n09638875 +n09639919 +n09641002 +n09644152 +n09648743 +n09651123 +n09665545 +n09669631 +n09670280 +n09676884 +n09679925 +n09690208 +n09694771 +n09696585 +n09697401 +n09700964 +n09701148 +n09701833 +n09705124 +n09708750 +n09710164 +n09716047 +n09718217 +n09722658 +n09724785 +n09725229 +n09725772 +n09726621 +n09727440 +n09727826 +n09730204 +n09731436 +n09731571 +n09735258 +n09738400 +n09744679 +n09754217 +n09758173 +n09758885 +n09761068 +n09763784 +n09764201 +n09764598 +n09765278 +n09767197 +n09769076 +n09770179 +n09771435 +n09772746 +n09773962 +n09774783 +n09790482 +n09792555 +n09795124 +n09795334 +n09800964 +n09802445 +n09802641 +n09805151 +n09805475 +n09809538 +n09809749 +n09810166 +n09811712 +n09814660 +n09815790 +n09816771 +n09818022 +n09820263 +n09821831 +n09823502 +n09824135 +n09824609 +n09826204 +n09830194 +n09831962 +n09834699 +n09836160 +n09840217 +n09841188 +n09841515 +n09841696 +n09842047 +n09848489 +n09851575 +n09853645 +n09853881 +n09854915 +n09857007 +n09861946 +n09865398 +n09868270 +n09871681 +n09877951 +n09889691 +n09892693 +n09894654 +n09895222 +n09895701 +n09902353 +n09903153 +n09910374 +n09917593 +n09918248 +n09923418 +n09923673 +n09924996 +n09927089 +n09927451 +n09928136 +n09928451 +n09929298 +n09930257 +n09930876 +n09931640 +n09933098 +n09935434 +n09936892 +n09937056 +n09941964 +n09942970 +n09943239 +n09943811 +n09944160 +n09945319 +n09950457 +n09951070 +n09951274 +n09960688 +n09962966 +n09964411 +n09968845 +n09974648 +n09976728 +n09979321 +n09983572 +n09989502 +n09990415 +n09991867 +n09992538 +n09992837 +n09993252 +n09994673 +n09996481 +n09997622 +n10001217 +n10006748 +n10007684 +n10009484 +n10009671 +n10015215 +n10015897 +n10017422 +n10018861 +n10020890 +n10024362 +n10029068 +n10034201 +n10034614 +n10035952 +n10036266 +n10036929 +n10037385 +n10040945 +n10041887 +n10042690 +n10043643 +n10044879 +n10047459 +n10048367 +n10048836 +n10052694 +n10053808 +n10054657 +n10055730 +n10055847 +n10060175 +n10067968 +n10070711 +n10077593 +n10078131 +n10078806 +n10079399 +n10079893 +n10080869 +n10083823 +n10084043 +n10084295 +n10086383 +n10091651 +n10092488 +n10093475 +n10094584 +n10095869 +n10098710 +n10098862 +n10099375 +n10101634 +n10102800 +n10105085 +n10107303 +n10109662 +n10111903 +n10112129 +n10118844 +n10126177 +n10126424 +n10126708 +n10127689 +n10129825 +n10134396 +n10134982 +n10136959 +n10142747 +n10142946 +n10143172 +n10143725 +n10145340 +n10145774 +n10148305 +n10150071 +n10150940 +n10151570 +n10153594 +n10154186 +n10154601 +n10155849 +n10162194 +n10164233 +n10165448 +n10168183 +n10168584 +n10171567 +n10182190 +n10185793 +n10186774 +n10187130 +n10195593 +n10200781 +n10202624 +n10205457 +n10206173 +n10207169 +n10210137 +n10215623 +n10216106 +n10224578 +n10225219 +n10228278 +n10235385 +n10237069 +n10241300 +n10243664 +n10245639 +n10249270 +n10249459 +n10249950 +n10257221 +n10259348 +n10263411 +n10266328 +n10266848 +n10271677 +n10273064 +n10274815 +n10276045 +n10282672 +n10284064 +n10284965 +n10296444 +n10299250 +n10299700 +n10305635 +n10305802 +n10306004 +n10308732 +n10312287 +n10314054 +n10315561 +n10316360 +n10317007 +n10317500 +n10320863 +n10321340 +n10322238 +n10323999 +n10324560 +n10328437 +n10332385 +n10335246 +n10335931 +n10340312 +n10341573 +n10343554 +n10345100 +n10353016 +n10353355 +n10355142 +n10355449 +n10355688 +n10356450 +n10357613 +n10360747 +n10366966 +n10369528 +n10370381 +n10376523 +n10377021 +n10379376 +n10380672 +n10383816 +n10386984 +n10387196 +n10387324 +n10393909 +n10396106 +n10399130 +n10400998 +n10402824 +n10403876 +n10405694 +n10407954 +n10409752 +n10411551 +n10415037 +n10417551 +n10418101 +n10419047 +n10420031 +n10421016 +n10426454 +n10427764 +n10428004 +n10433737 +n10435716 +n10435988 +n10438172 +n10439851 +n10444194 +n10450303 +n10462860 +n10464052 +n10466918 +n10467179 +n10470779 +n10474064 +n10474645 +n10478960 +n10481268 +n10482054 +n10482921 +n10484858 +n10488309 +n10495421 +n10499355 +n10499857 +n10506544 +n10508710 +n10512372 +n10512708 +n10519494 +n10521100 +n10521662 +n10522035 +n10522324 +n10522759 +n10523341 +n10525134 +n10525436 +n10525617 +n10527334 +n10529231 +n10541833 +n10542888 +n10543161 +n10544232 +n10544748 +n10546633 +n10548537 +n10548681 +n10554846 +n10556518 +n10557854 +n10559288 +n10560637 +n10568200 +n10570019 +n10575787 +n10576962 +n10577284 +n10580535 +n10582746 +n10583387 +n10594147 +n10595164 +n10595647 +n10599806 +n10602985 +n10604634 +n10605253 +n10610465 +n10612210 +n10614629 +n10617193 +n10618685 +n10618848 +n10619642 +n10620758 +n10622053 +n10624074 +n10624310 +n10625860 +n10628644 +n10630188 +n10632576 +n10633450 +n10648237 +n10648696 +n10654932 +n10657835 +n10661002 +n10661563 +n10665698 +n10669991 +n10674130 +n10676018 +n10679174 +n10682953 +n10686073 +n10692883 +n10693824 +n10694258 +n10698368 +n10700201 +n10700640 +n10701180 +n10703336 +n10703692 +n10705615 +n10707233 +n10708454 +n10709529 +n10713686 +n10720453 +n10721321 +n10722575 +n10722965 +n10726786 +n10735298 +n10740868 +n10741152 +n10742997 +n10744164 +n10747119 +n10751265 +n10752480 +n10759151 +n10759982 +n10763383 +n10763620 +n10765679 +n10766260 +n10768903 +n10779610 +n10780632 +n10782791 +n10782940 +n10787470 +n10791221 +n10792335 +n10793570 +n10794014 +n11531193 +n11537327 +n11542640 +n11545524 +n11545714 +n11547562 +n11547855 +n11552386 +n11553240 +n11596108 +n11598686 +n11600372 +n11601177 +n11601918 +n11608250 +n11609475 +n11609684 +n11612923 +n11614250 +n11618861 +n11620673 +n11621029 +n11623105 +n11624531 +n11627168 +n11628456 +n11630017 +n11630489 +n11643835 +n11645914 +n11647306 +n11649878 +n11650558 +n11650759 +n11661372 +n11665372 +n11666854 +n11669921 +n11672400 +n11674332 +n11676500 +n11684264 +n11689483 +n11693981 +n11697560 +n11700864 +n11703669 +n11708658 +n11709674 +n11713164 +n11720353 +n11722982 +n11723770 +n11725015 +n11725623 +n11727091 +n11729478 +n11733054 +n11736694 +n11741350 +n11745817 +n11747468 +n11748002 +n11751765 +n11752578 +n11756092 +n11756669 +n11759224 +n11763625 +n11767354 +n11769621 +n11771539 +n11774513 +n11775340 +n11779300 +n11782036 +n11783920 +n11785668 +n11789438 +n11789962 +n11790788 +n11793779 +n11794519 +n11796005 +n11801392 +n11805956 +n11807108 +n11807979 +n11808721 +n11811473 +n11815491 +n11817914 +n11820965 +n11823043 +n11830714 +n11830906 +n11832214 +n11836722 +n11839568 +n11845557 +n11851578 +n11855274 +n11857696 +n11862835 +n11865071 +n11866248 +n11868814 +n11869351 +n11869689 +n11872146 +n11875691 +n11875938 +n11877473 +n11878283 +n11887119 +n11890022 +n11892637 +n11894327 +n11898639 +n11900569 +n11902709 +n11915214 +n11915658 +n11915899 +n11916467 +n11918286 +n11919447 +n11920498 +n11924445 +n11928352 +n11928858 +n11931918 +n11932745 +n11939699 +n11940006 +n11943407 +n11944196 +n11945367 +n11946727 +n11947251 +n11948264 +n11950345 +n11951511 +n11952346 +n11953884 +n11954484 +n11956850 +n11965627 +n11967744 +n11970101 +n11971248 +n11971783 +n11972759 +n11973341 +n11976170 +n11977303 +n11978233 +n11982115 +n11985053 +n11985739 +n11988893 +n11991263 +n11997032 +n11997969 +n12006766 +n12008252 +n12008749 +n12010628 +n12013511 +n12015959 +n12018760 +n12020507 +n12024176 +n12030654 +n12034141 +n12036067 +n12036939 +n12041446 +n12043444 +n12045860 +n12050959 +n12053405 +n12056217 +n12057447 +n12062468 +n12065316 +n12065777 +n12075151 +n12076577 +n12080395 +n12083591 +n12086012 +n12086539 +n12087961 +n12090890 +n12092262 +n12094244 +n12095020 +n12096395 +n12101870 +n12102133 +n12105125 +n12107970 +n12108432 +n12109827 +n12110778 +n12112008 +n12112918 +n12113657 +n12117017 +n12119099 +n12119238 +n12121033 +n12124627 +n12126360 +n12131550 +n12135898 +n12136720 +n12137120 +n12137569 +n12139575 +n12141495 +n12142085 +n12143676 +n12144313 +n12146311 +n12147226 +n12152532 +n12153580 +n12154773 +n12155583 +n12156819 +n12157056 +n12157769 +n12158031 +n12158798 +n12159055 +n12159555 +n12160490 +n12161285 +n12163035 +n12164363 +n12166424 +n12168565 +n12170585 +n12173664 +n12174311 +n12174926 +n12182049 +n12187663 +n12188289 +n12195391 +n12196129 +n12199266 +n12201580 +n12202936 +n12205694 +n12214789 +n12215579 +n12217453 +n12221191 +n12224978 +n12225349 +n12226932 +n12231192 +n12236546 +n12237486 +n12244153 +n12245695 +n12246232 +n12252168 +n12252866 +n12253229 +n12256112 +n12257570 +n12260799 +n12262553 +n12265394 +n12266217 +n12266796 +n12268246 +n12269241 +n12269652 +n12271643 +n12274630 +n12275489 +n12281241 +n12284262 +n12286826 +n12287642 +n12288823 +n12290748 +n12293723 +n12296432 +n12300840 +n12302071 +n12303462 +n12305475 +n12306717 +n12307756 +n12310349 +n12316444 +n12318378 +n12320010 +n12322501 +n12328398 +n12330469 +n12334293 +n12334891 +n12335483 +n12335664 +n12335800 +n12340383 +n12341542 +n12342299 +n12343480 +n12344283 +n12346578 +n12350758 +n12352287 +n12355760 +n12360108 +n12360684 +n12364604 +n12367611 +n12374418 +n12377198 +n12381511 +n12385429 +n12387633 +n12387839 +n12392070 +n12396924 +n12399132 +n12401335 +n12401684 +n12405714 +n12409231 +n12411461 +n12412355 +n12412606 +n12416423 +n12419037 +n12420535 +n12421467 +n12421683 +n12425281 +n12430198 +n12431434 +n12437513 +n12437769 +n12441958 +n12446200 +n12446519 +n12449296 +n12450344 +n12451915 +n12454159 +n12459629 +n12460697 +n12461466 +n12462032 +n12463743 +n12464476 +n12466727 +n12470092 +n12474167 +n12475035 +n12476510 +n12480895 +n12491826 +n12495146 +n12499163 +n12506181 +n12508309 +n12509476 +n12511856 +n12516584 +n12522188 +n12524188 +n12526516 +n12527738 +n12539074 +n12539306 +n12546183 +n12548280 +n12550210 +n12554526 +n12556656 +n12560282 +n12560775 +n12562577 +n12572546 +n12573256 +n12575322 +n12582231 +n12582665 +n12582846 +n12583126 +n12583401 +n12584191 +n12586298 +n12590232 +n12594989 +n12595964 +n12602262 +n12602980 +n12612170 +n12614477 +n12615710 +n12620196 +n12622875 +n12624381 +n12625383 +n12631331 +n12633638 +n12634211 +n12634429 +n12635744 +n12636885 +n12638218 +n12638556 +n12639736 +n12640607 +n12641413 +n12641931 +n12642200 +n12643473 +n12644902 +n12645174 +n12647376 +n12649065 +n12650556 +n12651821 +n12653218 +n12655869 +n12658118 +n12658846 +n12659356 +n12660601 +n12662772 +n12663804 +n12665048 +n12667406 +n12667964 +n12674120 +n12674685 +n12682411 +n12683407 +n12685431 +n12685831 +n12688716 +n12690653 +n12695144 +n12698435 +n12705013 +n12707781 +n12708293 +n12709901 +n12711596 +n12713063 +n12714755 +n12715914 +n12717072 +n12719684 +n12724942 +n12725521 +n12727301 +n12731401 +n12732491 +n12732756 +n12733647 +n12741222 +n12742741 +n12743823 +n12746884 +n12749049 +n12752205 +n12755225 +n12756457 +n12762896 +n12768369 +n12771192 +n12772753 +n12777436 +n12778605 +n12779603 +n12785724 +n12791064 +n12793015 +n12794985 +n12798284 +n12800586 +n12801520 +n12805146 +n12806732 +n12810595 +n12812235 +n12814643 +n12817464 +n12822769 +n12823717 +n12823859 +n12832315 +n12833985 +n12834798 +n12836212 +n12836862 +n12839979 +n12840749 +n12842302 +n12842887 +n12844939 +n12849061 +n12853080 +n12854048 +n12858150 +n12866968 +n12869478 +n12870535 +n12871272 +n12877244 +n12878169 +n12879963 +n12882779 +n12884260 +n12890265 +n12893463 +n12903367 +n12904938 +n12908645 +n12909421 +n12912670 +n12917901 +n12922763 +n12926480 +n12928071 +n12929403 +n12930778 +n12931906 +n12934036 +n12934479 +n12939104 +n12941536 +n12942395 +n12943443 +n12946849 +n12950126 +n12952165 +n12953206 +n12956170 +n12957608 +n12960378 +n12960863 +n12965626 +n12968136 +n12969131 +n12970193 +n12971400 +n12973791 +n12974987 +n12976198 +n12980840 +n12982468 +n12983961 +n12985773 +n12987056 +n12988158 +n12992868 +n12997654 +n12997919 +n13000891 +n13001041 +n13001206 +n13001366 +n13001529 +n13002750 +n13002925 +n13003061 +n13003254 +n13003522 +n13003712 +n13004423 +n13004640 +n13004826 +n13004992 +n13005329 +n13005984 +n13006171 +n13006631 +n13006894 +n13007417 +n13007629 +n13008157 +n13008315 +n13008485 +n13008689 +n13008839 +n13009085 +n13009244 +n13009429 +n13009656 +n13010694 +n13010951 +n13011221 +n13012253 +n13012469 +n13012973 +n13013534 +n13013764 +n13013965 +n13014097 +n13014265 +n13014409 +n13014581 +n13014741 +n13014879 +n13017102 +n13017240 +n13017439 +n13017610 +n13017789 +n13017979 +n13018088 +n13018232 +n13018407 +n13019496 +n13019643 +n13019835 +n13020191 +n13020481 +n13020964 +n13021166 +n13021332 +n13021543 +n13021689 +n13021867 +n13022210 +n13022709 +n13024012 +n13024500 +n13025647 +n13028611 +n13032115 +n13032923 +n13035241 +n13035389 +n13035707 +n13037585 +n13037805 +n13038068 +n13038376 +n13038744 +n13039349 +n13040629 +n13040796 +n13041312 +n13042982 +n13043926 +n13045210 +n13045975 +n13046130 +n13049953 +n13055423 +n13055577 +n13055792 +n13055949 +n13056135 +n13056349 +n13056607 +n13056799 +n13057054 +n13057242 +n13057422 +n13057639 +n13058037 +n13058272 +n13058608 +n13059298 +n13059657 +n13060017 +n13060190 +n13063269 +n13066129 +n13067191 +n13068917 +n13070308 +n13070875 +n13071371 +n13071553 +n13071815 +n13072031 +n13072209 +n13072350 +n13072528 +n13072706 +n13072863 +n13073055 +n13073703 +n13074619 +n13074814 +n13075020 +n13075272 +n13075441 +n13075684 +n13075847 +n13076041 +n13076405 +n13076643 +n13076831 +n13077033 +n13077295 +n13079419 +n13083023 +n13084184 +n13085113 +n13091620 +n13091774 +n13100156 +n13100677 +n13104059 +n13108131 +n13108662 +n13108841 +n13109733 +n13110915 +n13111174 +n13111881 +n13118707 +n13119870 +n13120211 +n13121104 +n13122364 +n13123431 +n13125117 +n13130161 +n13130726 +n13132034 +n13132338 +n13132486 +n13132940 +n13134302 +n13134947 +n13135832 +n13136316 +n13136556 +n13137409 +n13137672 +n13138308 +n13138658 +n13138842 +n13139055 +n13139647 +n13141141 +n13145444 +n13149296 +n13150894 +n13154841 +n13156986 +n13157137 +n13160831 +n13163991 +n13172923 +n13174670 +n13177529 +n13180534 +n13186388 +n13188096 +n13188268 +n13192625 +n13193642 +n13194572 +n13195761 +n13199970 +n13201969 +n13206817 +n13207736 +n13208705 +n13211790 +n13219422 +n13221529 +n13224673 +n13230662 +n13231678 +n13231919 +n13232106 +n13232363 +n13232779 +n13233727 +n13238375 +n13238988 +n13252672 +n13862780 +n13863186 +n13863473 +n13863771 +n13864153 +n13864965 +n13865298 +n13865483 +n13866144 +n13866827 +n13867492 +n13868248 +n13868371 +n13872592 +n13873502 +n13875392 +n13875571 +n13878306 +n13879320 +n13883603 +n13888491 +n13893786 +n13894434 +n13896100 +n13897996 +n13900287 +n13903079 +n13905121 +n13905275 +n13905792 +n13912260 +n13915999 +n14633206 +n14696793 +n14844693 +n14853210 +n14899328 +n14900184 +n14974264 +n14977504 +n14992287 +n15062057 +n15067877 +n15089258 +n15089472 +n15089645 +n15089803 +n15090742 +n15092409 +n15092751 \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/imagenet.shortnames.list b/workloads/realworld/uvm_prefetch_async/darknet/cfg/imagenet.shortnames.list new file mode 100644 index 0000000000000000000000000000000000000000..e7a18d44b543086958eaf60e6dc0b7deb0df9400 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/imagenet.shortnames.list @@ -0,0 +1,21842 @@ +kit fox +English setter +Siberian husky +Australian terrier +English springer +grey whale +lesser panda +Egyptian cat +ibex +Persian cat +cougar +gazelle +porcupine +sea lion +malamute +badger +Great Dane +Walker hound +Welsh springer spaniel +whippet +Scottish deerhound +killer whale +mink +African elephant +Weimaraner +soft-coated wheaten terrier +Dandie Dinmont +red wolf +Old English sheepdog +jaguar +otterhound +bloodhound +Airedale +hyena +meerkat +giant schnauzer +titi +three-toed sloth +sorrel +black-footed ferret +dalmatian +black-and-tan coonhound +papillon +skunk +Staffordshire bullterrier +Mexican hairless +Bouvier des Flandres +weasel +miniature poodle +Cardigan +malinois +bighorn +fox squirrel +colobus +tiger cat +Lhasa +impala +coyote +Yorkshire terrier +Newfoundland +brown bear +red fox +Norwegian elkhound +Rottweiler +hartebeest +Saluki +grey fox +schipperke +Pekinese +Brabancon griffon +West Highland white terrier +Sealyham terrier +guenon +mongoose +indri +tiger +Irish wolfhound +wild boar +EntleBucher +zebra +ram +French bulldog +orangutan +basenji +leopard +Bernese mountain dog +Maltese dog +Norfolk terrier +toy terrier +vizsla +cairn +squirrel monkey +groenendael +clumber +Siamese cat +chimpanzee +komondor +Afghan hound +Japanese spaniel +proboscis monkey +guinea pig +white wolf +ice bear +gorilla +borzoi +toy poodle +Kerry blue terrier +ox +Scotch terrier +Tibetan mastiff +spider monkey +Doberman +Boston bull +Greater Swiss Mountain dog +Appenzeller +Shih-Tzu +Irish water spaniel +Pomeranian +Bedlington terrier +warthog +Arabian camel +siamang +miniature schnauzer +collie +golden retriever +Irish terrier +affenpinscher +Border collie +hare +boxer +silky terrier +beagle +Leonberg +German short-haired pointer +patas +dhole +baboon +macaque +Chesapeake Bay retriever +bull mastiff +kuvasz +capuchin +pug +curly-coated retriever +Norwich terrier +flat-coated retriever +hog +keeshond +Eskimo dog +Brittany spaniel +standard poodle +Lakeland terrier +snow leopard +Gordon setter +dingo +standard schnauzer +hamster +Tibetan terrier +Arctic fox +wire-haired fox terrier +basset +water buffalo +American black bear +Angora +bison +howler monkey +hippopotamus +chow +giant panda +American Staffordshire terrier +Shetland sheepdog +Great Pyrenees +Chihuahua +tabby +marmoset +Labrador retriever +Saint Bernard +armadillo +Samoyed +bluetick +redbone +polecat +marmot +kelpie +gibbon +llama +miniature pinscher +wood rabbit +Italian greyhound +lion +cocker spaniel +Irish setter +dugong +Indian elephant +beaver +Sussex spaniel +Pembroke +Blenheim spaniel +Madagascar cat +Rhodesian ridgeback +lynx +African hunting dog +langur +Ibizan hound +timber wolf +cheetah +English foxhound +briard +sloth bear +Border terrier +German shepherd +otter +koala +tusker +echidna +wallaby +platypus +wombat +revolver +umbrella +schooner +soccer ball +accordion +ant +starfish +chambered nautilus +grand piano +laptop +strawberry +airliner +warplane +airship +balloon +space shuttle +fireboat +gondola +speedboat +lifeboat +canoe +yawl +catamaran +trimaran +container ship +liner +pirate +aircraft carrier +submarine +wreck +half track +tank +missile +bobsled +dogsled +bicycle-built-for-two +mountain bike +freight car +passenger car +barrow +shopping cart +motor scooter +forklift +electric locomotive +steam locomotive +amphibian +ambulance +beach wagon +cab +convertible +jeep +limousine +minivan +Model T +racer +sports car +go-kart +golfcart +moped +snowplow +fire engine +garbage truck +pickup +tow truck +trailer truck +moving van +police van +recreational vehicle +streetcar +snowmobile +tractor +mobile home +tricycle +unicycle +horse cart +jinrikisha +oxcart +bassinet +cradle +crib +four-poster +bookcase +china cabinet +medicine chest +chiffonier +table lamp +file +park bench +barber chair +throne +folding chair +rocking chair +studio couch +toilet seat +desk +pool table +dining table +entertainment center +wardrobe +Granny Smith +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +acorn +hip +ear +rapeseed +corn +buckeye +organ +upright +chime +drum +gong +maraca +marimba +steel drum +banjo +cello +violin +harp +acoustic guitar +electric guitar +cornet +French horn +trombone +harmonica +ocarina +panpipe +bassoon +oboe +sax +flute +daisy +yellow lady's slipper +cliff +valley +alp +volcano +promontory +sandbar +coral reef +lakeside +seashore +geyser +hatchet +cleaver +letter opener +plane +power drill +lawn mower +hammer +corkscrew +can opener +plunger +screwdriver +shovel +plow +chain saw +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +white stork +black stork +spoonbill +flamingo +American egret +little blue heron +bittern +crane +limpkin +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +European gallinule +pelican +king penguin +albatross +great white shark +tiger shark +hammerhead +electric ray +stingray +barracouta +coho +tench +goldfish +eel +rock beauty +anemone fish +lionfish +puffer +sturgeon +gar +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +triceratops +African crocodile +American alligator +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +whistle +wing +paintbrush +hand blower +oxygen mask +snorkel +loudspeaker +microphone +screen +mouse +electric fan +oil filter +strainer +space heater +stove +guillotine +barometer +rule +odometer +scale +analog clock +digital clock +wall clock +hourglass +sundial +parking meter +stopwatch +digital watch +stethoscope +syringe +magnetic compass +binoculars +projector +sunglasses +loupe +radio telescope +bow +cannon +assault rifle +rifle +projectile +computer keyboard +typewriter keyboard +crane +lighter +abacus +cash machine +slide rule +desktop computer +hand-held computer +notebook +web site +harvester +thresher +printer +slot +vending machine +sewing machine +joystick +switch +hook +car wheel +paddlewheel +pinwheel +potter's wheel +gas pump +carousel +swing +reel +radiator +puck +hard disc +sunglass +pick +car mirror +solar dish +remote control +disk brake +buckle +hair slide +knot +combination lock +padlock +nail +safety pin +screw +muzzle +seat belt +ski +candle +jack-o'-lantern +spotlight +torch +neck brace +pier +tripod +maypole +mousetrap +spider web +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +isopod +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +sea urchin +sea cucumber +iron +espresso maker +microwave +Dutch oven +rotisserie +toaster +waffle iron +vacuum +dishwasher +refrigerator +washer +Crock Pot +frying pan +wok +caldron +coffeepot +teapot +spatula +altar +triumphal arch +patio +steel arch bridge +suspension bridge +viaduct +barn +greenhouse +palace +monastery +library +apiary +boathouse +church +mosque +stupa +planetarium +restaurant +cinema +home theater +lumbermill +coil +obelisk +totem pole +castle +prison +grocery store +bakery +barbershop +bookshop +butcher shop +confectionery +shoe shop +tobacco shop +toyshop +fountain +cliff dwelling +yurt +dock +brass +megalith +bannister +breakwater +dam +chainlink fence +picket fence +worm fence +stone wall +grille +sliding door +turnstile +mountain tent +scoreboard +honeycomb +plate rack +pedestal +beacon +mashed potato +bell pepper +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +cardoon +mushroom +shower curtain +jean +carton +handkerchief +sandal +ashcan +safe +plate +necklace +croquet ball +fur coat +thimble +pajama +running shoe +cocktail shaker +chest +manhole cover +modem +tub +tray +balance beam +bagel +prayer rug +kimono +hot pot +whiskey jug +knee pad +book jacket +spindle +ski mask +beer bottle +crash helmet +bottlecap +tile roof +mask +maillot +Petri dish +football helmet +bathing cap +teddy +holster +pop bottle +photocopier +vestment +crossword puzzle +golf ball +trifle +suit +water tower +feather boa +cloak +red wine +drumstick +shield +Christmas stocking +hoopskirt +menu +stage +bonnet +meat loaf +baseball +face powder +scabbard +sunscreen +beer glass +hen-of-the-woods +guacamole +lampshade +wool +hay +bow tie +mailbag +water jug +bucket +dishrag +soup bowl +eggnog +mortar +trench coat +paddle +chain +swab +mixing bowl +potpie +wine bottle +shoji +bulletproof vest +drilling platform +binder +cardigan +sweatshirt +pot +birdhouse +hamper +ping-pong ball +pencil box +pay-phone +consomme +apron +punching bag +backpack +groom +bearskin +pencil sharpener +broom +mosquito net +abaya +mortarboard +poncho +crutch +Polaroid camera +space bar +cup +racket +traffic light +quill +radio +dough +cuirass +military uniform +lipstick +shower cap +monitor +oscilloscope +mitten +brassiere +French loaf +vase +milk can +rugby ball +paper towel +earthstar +envelope +miniskirt +cowboy hat +trolleybus +perfume +bathtub +hotdog +coral fungus +bullet train +pillow +toilet tissue +cassette +carpenter's kit +ladle +stinkhorn +lotion +hair spray +academic gown +dome +crate +wig +burrito +pill bottle +chain mail +theater curtain +window shade +barrel +washbasin +ballpoint +basketball +bath towel +cowboy boot +gown +window screen +agaric +cellular telephone +nipple +barbell +mailbox +lab coat +fire screen +minibus +packet +maze +pole +horizontal bar +sombrero +pickelhaube +rain barrel +wallet +cassette player +comic book +piggy bank +street sign +bell cote +fountain pen +Windsor tie +volleyball +overskirt +sarong +purse +bolo tie +bib +parachute +sleeping bag +television +swimming trunks +measuring cup +espresso +pizza +breastplate +shopping basket +wooden spoon +saltshaker +chocolate sauce +ballplayer +goblet +gyromitra +stretcher +water bottle +dial telephone +soap dispenser +jersey +school bus +jigsaw puzzle +plastic bag +reflex camera +diaper +Band Aid +ice lolly +velvet +tennis ball +gasmask +doormat +Loafer +ice cream +pretzel +quilt +maillot +tape player +clog +iPod +bolete +scuba diver +pitcher +matchstick +bikini +sock +CD player +lens cap +thatch +vault +beaker +bubble +cheeseburger +parallel bars +flagpole +coffee mug +rubber eraser +stole +carbonara +dumbbell +singles +Virginia deer +eastern grey squirrel +gelding +pylon +table-tennis table +peacock +Segway +surfing +tamandua +knocker +steering wheel +motorcycling +coati +sitar +range +backhoe +agaric +dashboard +water polo +concrete mixer +treadmill +golf bag +skateboarding +royal tennis +tartan +four-wheel drive +sport utility +sedan +print +luggage rack +softball +windmill +ben +red admiral +jalousie +towel rail +truss +strand +ice hockey +sconce +wind turbine +plush +stained-glass window +ballpark +thoroughbred +love seat +red-spotted purple +miller +Adelie +freight liner +clock tower +acrobatics +shaving brush +ewe +ottoman +African violet +bicycle wheel +cork +windmill +satin +comma +coffee mill +baggage +wasp's nest +batting glove +Ferris wheel +push-bike +porthole +football stadium +gas tank +barbecue +handlebar +hula-hoop +fairground +rapier +garter stitch +exercise bike +control tower +carryall +minute hand +cog +riverbank +water nymph +common dandelion +android +hairbrush +redberry +fret +display window +pepper mill +litterbin +drapery +ducking +fly-fishing +broad jump +sprinkler +water-skiing +chicory +sail +volleyball +rugby +Texas bluebonnet +computer monitor +tortoiseshell +airplane propeller +solar array +figure skating +air conditioner +purple loosestrife +gearshift +outboard motor +cowslip +Abyssinian +dip +workstation +cosy +bunker +neon lamp +campanile +casket +verbena +amphora +sumo +common foxglove +sprocket +jelly bean +emperor penguin +night-blooming cereus +clock radio +black birch +bomber jacket +Virginia bluebell +bayonet +walker +altarpiece +tattoo +bridle +rocker arm +water turkey +spiderwort +flange +mute swan +laser printer +carburetor +coverlet +mountainside +baritone +auto racing +baluster +gal +peach bells +taffeta +grandfather +asparagus +horizontal stabilizer +world +grate +marsh marigold +white rhinoceros +movement +split rail +rollerblading +longhorn +muffler +church tower +light bulb +American agave +backpacking tent +overall +New World goldfinch +sectional +wing chair +transom +integrated circuit +dad +spar +picture frame +no-hit game +alternator +drill press +strawflower +hepatica +rangefinder +blinker +Welsh pony +nib +wagon wheel +rotor +tie +denim +jetliner +sculling +external drive +window frame +mourning dove +censer +stapler +batting helmet +flagon +machete +windshield +hedgehog +weeping willow +chief executive officer +hepatica +pet +Asiatic black bear +chinchilla +uke +Atlantic bottlenose dolphin +hair +dishtowel +flintlock +Bermuda shorts +lavender +searchlight +millwheel +piano keyboard +luna moth +bumper +parrot +skirt +manhole +coffee table +footstool +judo +Dalai Lama +armored personnel carrier +voile +saber +thoroughbred +wild carrot +gemsbok +caster +butterfly orchid +cow +sideboard +horseshoe crab +match play +cassette recorder +photomicrograph +drafting table +pediment +tramline +shipping +kitten +wainscoting +fried rice +helix +marguerite +pumpkin +white-bellied swallow +Tulipa gesneriana +common dolphin +face +red squirrel +bicycling +shipwreck +banded purple +cornice +pendant earring +forsythia +aardvark +seashell +spat +shoulder bag +fallow deer +yearling +common teasel +tufted titmouse +ancient +professional golf +purl +vehicle +okra +great grandmother +common lilac +rose mallow +newspaper +crucifix +chukka +armlet +fulmar +wapiti +doily +Greco-Roman wrestling +bleeding heart +kitchen table +bluebonnet +Cape buffalo +spun yarn +crape myrtle +dewdrop +great blue heron +medalist +vaulting horse +spinning wheel +skyscraper +Tahitian +forget-me-not +watercourse +guitarist +gargoyle +bee balm +pumpkin +hunting knife +flutist +lectern +skateboarder +foil +pant leg +hedge sparrow +dresser +automatic pistol +chicory +dialog box +chamberpot +black rhinoceros +fireweed +half-mast +pillow sham +pavilion +scarf joint +microprocessor +filly +dressing gown +shell +Arabian +child +radio antenna +butterweed +morris dancer +sparrow hawk +groom +brioche +floret +rainbow +earthworm +cellist +tine +toupee +balldress +map +angel's trumpet +ruin +fur +pronghorn +speed skating +used-car +stick +early spider orchid +stuffed peppers +snowdrift +flats +least sandpiper +stick +console table +ventilator +portable +kepi +pylon +viceroy +shoreline +Olympian Zeus +pestle +great-niece +life +air compressor +fanjet +scuba diving +fieldfare +tree swallow +personnel carrier +night-blooming cereus +sonogram +assembly hall +circuit breaker +chair +speed skate +soapwort +worsted +raspberry +burlap +flat panel display +Pyracantha +cemetery +turban +deer hunting +bottle green +dandelion green +pieta +aigrette +turntable +cover girl +clutch bag +kiwi +pea jacket +color guard +Malay +shire +crock +french fries +credenza +hockey stick +mourning cloak +potty seat +glass +balsamroot +medal play +red clover +gravy boat +garter belt +Guinness +meadow buttercup +jackass penguin +coursing +tooth +hawfinch +housetop +fluorescent lamp +black-backed gull +bookshelf +earplug +millipede +fawn +baseball bat +soup-strainer +organ loft +bugloss +tomahawk +blackcap +black-necked stilt +hand truck +bedstead +tempura +rose window +crimson +snow thrower +lesser whitethroat +palomino +ball +staff sergeant +wicker +garbage heap +great-nephew +parquet +coupe +nave +eggs Benedict +damask +flush toilet +Angora +pedometer +control room +bristle brush +kookaburra +telephone booth +Windsor chair +red-winged blackbird +cinnamon roll +briefs +cloister +sundress +mammillaria +unicyclist +covered bridge +coelogyne +fairy bluebird +phoebe +beer mug +headstock +parhelion +gorse +common European dogwood +fire-eater +professional football +rock climbing +cyclamen +tin +marjoram +Japanese morning glory +pipe +smasher +hang glider +abutment +birdbath +jotter +litter +artist's model +butterfly bush +dining area +sausage dog +piggery +English sparrow +Turk's-cap +platinum blond +song sparrow +alarm clock +tortoiseshell +chaise longue +flintlock +academic costume +graffito +Arnica montana +adding machine +waterside +director +jonquil +pipefitting +stud +Swedish meatball +musk rose +Venus's flytrap +raven +bougainvillea +little brother +field bindweed +finder +white admiral +tinfoil +serval +sheet +carthorse +people +potto +stockroom +sphinx +slate roof +mountain laurel +majolica +coal black +repository +bufo +pique +binder +tread +attorney general +hydraulic press +videocassette recorder +bumper car +professional baseball +cow parsley +ern +blue peafowl +common hyacinth +jack-in-the-pulpit +ice hockey rink +sport +camper +tailback +flash +stacks +pulp +Christmas cactus +netball +calliandra +curler +large periwinkle +cobweb +forward +Roman arch +cross bun +stoneware +banana bread +cape jasmine +settle +tongue +frock +pepper shaker +pitching coach +CD-R +casing +faience +hand cream +CD-ROM +recliner +striped bass +clary +sketch +risotto +reticle +white clover +touch football +kitty +great-aunt +Japanese maple +sidecar +muscovy duck +hack +rope bridge +organist +stinging nettle +pocket watch +Indian pipe +amorphophallus +bird's-foot violet +caller ID +furnishing +carriageway +dish rack +heiress +nail polish +beldam +Dall sheep +teriyaki +stateroom +laughing gull +chow +bookmark +timer +toga virilis +deviled egg +coltsfoot +Papuan +native +cygnet +automation +portfolio +cabbage palm +cube +broiler +radish +broodmare +castor-oil plant +pith hat +talus +lass +thatch +common marigold +young buck +igloo +prairie rattlesnake +soccer player +spoke +place +slide fastener +tapestry +toy +headboard +cross-country skiing +harness +sconce +rim +ballet skirt +transvestite +saddlebag +common evening primrose +taillight +challah +willet +ready-to-wear +cloud +answering machine +waterfront +vane +granddaughter +Chinese gooseberry +tureen +cab +truffle +viola +bootlace +chemise +taro +petal +candied apple +soccer +miniature golf +front porch +asparagus +Sauvignon blanc +daisy fleabane +ceiling +slip-on +bottle-nosed whale +redbud +black squirrel +snowsuit +ribbing +gravestone +creme brulee +ambassador +local +archery +love-in-a-mist +garbage +thyme +night-blooming cereus +goshawk +cuckoopint +azure +German iris +salad bowl +puppy +cockhorse +giant clam +biplane +stele +necklet +sea otter +crest +door +reformer +comforter +Byelorussian +bottle +hemline +book bag +leotard +owlet +spoon +sari +bidet +Latin +reticulated python +bowling shoe +futon +gaiter +coypu +tea urn +waders +bangle +snowbank +pencil +porter +azalea +English lavender +red spruce +team sport +cruet +high-rise +O ring +vodka +cormorant +Canada thistle +clasp +showjumping +rattan +red fox +sun parlor +Charolais +Tommy gun +bird's foot trefoil +sedge warbler +knot +chives +car tire +steam engine +adapter +spirea +common allamanda +oyster shell +harbor seal +baobab +wick +plumbago +downy woodpecker +coconut +leash +kasbah +hour hand +upholstery +mallard +cricket bat +lady +kitchenware +right-hander +leopard +olive green +common valerian +blue whale +blackboard +redhead +periwinkle +fingerboard +hard hat +locker +breakfast table +capybara +beekeeper +harness +feeder +water hyacinth +hexapod +brown thrasher +percale +lever +patriarch +arete +book +book +senator +bunya bunya +couch +durian +common lady's-slipper +mountain ash +golden barrel cactus +bicycle seat +beret +pop +musk mallow +manatee +cotton candy +boxing glove +backboard +tongue +saguaro +playground +capitol +sanderling +wagtail +deputy +tractor +tap +lady's smock +noseband +worsted +radiotelephone +camisole +forelock +muscat +sweet scabious +crane fly +butterfly weed +chestnut +pinata +inositol +borage +aquatic +belly +broadcaster +gondolier +egg yolk +blush wine +bufflehead +rambutan +oleander +horse-trail +sea holly +yard bird +conference room +lacrosse +belted kingfisher +defile +extremum +whistle +bear cub +grainfield +potage +watermelon +lasagna +sheik +Cooper's hawk +bulb +basketball court +paella +cassette tape +scatter rug +kid +impala lily +Minnesotan +Sudanese +chocolate +tail +quack-quack +whistling swan +shoulder patch +frozen custard +sumo wrestler +smoothie +bock +meat grinder +latch +palisade +radial +sake +kestrel +corn chowder +airframe +electrician +reamer +metropolitan +cotton flannel +cassowary +crossbill +operating room +winter aconite +flute +Tasmanian devil +billboard +suds +kilt +aperitif +cooling tower +avocado +hooded merganser +coleslaw +bee balm +ladder-back +insurance broker +scaffolding +polo mallet +double bed +two-hitter +bluff +gamboge +baby +lawn chair +frond +pistol grip +fancy dress +marquetry +jambalaya +fireweed +Eurasian kingfisher +cue ball +ice plant +horseweed +rose moss +musher +sun +viscount +white-breasted nuthatch +gin and tonic +thermos +Kenyan +first-aid kit +four-wheeler +tourist +stairwell +Gambian +liqueur glass +hovercraft +cocktail dress +twin +coriander +blister pack +Barrow's goldeneye +canteen +irrigation ditch +great white heron +tree sparrow +canal boat +lens +food processor +common raccoon +Baltimore oriole +black-eyed Susan +bush hibiscus +corolla +sire +mustachio +professional wrestling +elk +clustered bellflower +pannier +musk ox +crapaud +animal trainer +rosebud +ring-necked pheasant +little egret +cappuccino +rocker +bristlecone pine +cheerleader +hedge violet +semaphore +central processing unit +speedskater +delivery truck +assembly +hedgehog cactus +bergenia +bull thistle +bladder campion +cinquefoil +inula +cellulose tape +main rotor +bootee +autogiro +ice +grey +meadow cranesbill +hummus +valise +chassis +mountain goat +blacktail prairie dog +Chardonnay +romper +street +shoveler +wood ibis +topiary +chalice +silo +circus acrobat +Rollerblade +cosmos +woof +heroine +cold cream +marabou +herb robert +garden lettuce +nymph +floor lamp +automobile engine +heel +radiator +seeded player +fedora +father-in-law +peahen +Bahamian +wiper +wood pigeon +barn owl +pegboard +chorus frog +kin +roller skate +stob +rosemary +cowbird +hortensia +cranberry sauce +shot glass +Dixie cup +gnu +fire alarm +diet +booster +oxeye daisy +twayblade +high-definition television +truss bridge +bunk bed +mule +blackbuck +facsimile +frog orchid +point-and-shoot camera +brocade +gazebo +prairie gentian +concert +paintball +Cognac +maid +afghan +barbecued spareribs +pintail +tramway +commissioner +finger-painting +beef stew +caftan +Aberdeen Angus +demonstrator +sea trout +pigtail +thrush nightingale +barbados cherry +sashimi +ridgeling +lamppost +gabardine +red-shouldered hawk +bath salts +cavern +cymbid +Haitian +boater +southern buckthorn +arctic +motorcycle cop +red gum +Clydesdale +Zamboni +beagling +villa +demitasse +Sheetrock +lollipop +hybrid petunia +post horse +carabiner +brussels sprouts +Durham +stylist +pothole +sleigh bed +scallop shell +harrier eagle +papaya +Japanese persimmon +sachet +wild rice +chipboard +gun enclosure +menorah +chinook +headset +white campion +ocean +Secretary of State +G-string +bone china +basil +greenish blue +camcorder +concrete +screech owl +trumpet honeysuckle +flugelhorn +layette +cattle egret +case knife +mandarin duck +robber fly +salwar +dressing table +doughnut +facade +runner +honeypot +surf casting +diver +angel's trumpet +spin dryer +chameleon +wand +snow +vitamin A1 +manageress +volleyball net +antiperspirant +street clothes +tree sparrow +cords +sundew +bricks and mortar +caryatid +bridesmaid +trestle bridge +eyepiece +celebrant +scarlet pimpernel +gas range +onion +green salad +squill +creepy-crawly +hunk +little owl +salad nicoise +earflap +bird feeder +spray gun +bunny +Cheops +amazon +blue tit +Nissen hut +Kalashnikov +skylark +kremlin +shoebill +shopping bag +frigate bird +telephoto lens +peplum +moss pink +echidna +wastepaper basket +wood ibis +workroom +ankle brace +telpherage +Michaelmas daisy +figure skate +swami +nylons +cardoon +cocotte +headstall +twin bed +parsley +dirndl +corn poppy +nut bread +cloche +light heavyweight +mayor +lip-gloss +punch bowl +pottage +mango +fledgling +mousse +four-wheel drive +barrel +banana boat +trouser +bathroom +Sauterne +ring +settee +lavaliere +safe-deposit +godson +leatherette +schoolmate +radish +hedge trimmer +dahlia +euphonium +palace +vaulter +singlet +slicer +Pilsner +cockateel +kangaroo paw +Cub Scout +master bedroom +hexagon +cenotaph +Barberton daisy +Netherlander +intersection +Korean +gravel +chandelier +hospital bed +flash memory +pier +whole wheat flour +maroon +pale ale +special +snow bunting +crinoline +dustpan +barrette +common wood sorrel +yolk +pothos +speakerphone +tendril +cabinetwork +farm horse +brake disk +streetlight +superhighway +bandsaw +panting +pressure cooker +girdle +old man +cereal bowl +felt +hurling +architecture +harmonium +chain +blueberry +cellar +smocking +scrub brush +tablespoon +sweet corn +graining +library +street +bill +felt-tip pen +monkshood +crowd +log cabin +newel post +hack +elephant seal +golden pothos +popcorn +outhouse +patch pocket +fish and chips +tape +wax plant +eaves +fried egg +emerald +tea cart +fan blade +daily +Bowie knife +rowing boat +leaf shape +man +crayon +trumpetfish +chipping sparrow +whiskey bottle +pillion +city hall +golden pheasant +cheerleader +creeping bugle +couch +Dumpster +Homo sapiens sapiens +cranberry juice +cockpit +demagogue +joinery +scrambled eggs +technician +sidewalk +sheep +keyhole +power line +polyanthus +roulette +first lieutenant +checkout +tabletop +nasturtium +schnapps +engineering +skateboard +ground fir +bouquet +bunk +resort area +fleur-de-lis +power steering +opera +Bolivian +Friesian +buckskins +bay +slider +frozen yogurt +cabin cruiser +saunterer +lean-to +fishing eagle +bog star +cantaloupe +mouth +music stand +fiddlestick +brilliantine +pinball machine +bairn +barred owl +bath oil +signorina +Mason jar +nymph +rubber band +garden nasturtium +razorbill +Japanese beetle +batting cage +trestle +borage +Secretary of the Interior +scanner +baguet +baseball cap +chow mein +pen +jewelweed +barbet +chasm +pectoral sandpiper +holster +glasses case +sand +crevice +Kickapoo +snowboard +locket +satchel +tankard +alpinist +moorhen +cow pen +whooper +crown +chain +silversword +wild geranium +hi-fi +Tibetan +waterwheel +bee orchid +ruby-crowned kinglet +common broom +tabloid +javelin +sauna +klammath weed +zebra finch +spider orchid +velour +chiffon +lecture room +barrel +loggia +millstone +flatlet +soupspoon +econometrician +golf-club head +daphnia +parlor +fire-eater +juggler +attache case +hay bale +kisser +knitting needle +news magazine +flatbed +Senegalese +trumpeter +trampoline +brogan +bone +caftan +lobster pot +gazpacho +anthill +ramekin +mainsail +penitentiary +spotted flycatcher +cookstove +root beer +broom beard grass +pogo stick +plywood +epee +gas oven +Global Positioning System +sweet false chamomile +breakfast area +bullring +second cousin +wave +decolletage +rodeo +won ton +swastika +bobby pin +papaw +retaining wall +Muscadet +heavyweight +energizer +banner +amusement park +whinchat +drugstore +waxwork +meander +congee +heat sink +switch grass +commuter +peony +western white pine +wild raspberry +nightgown +saute +cardinal +claret +pollinator +biryani +pina colada +cassette deck +European sandpiper +block +flan +birdcage +baby +lieutenant colonel +ticking +European white lily +dog violet +coat hanger +premature baby +organza +string bean +balloonist +hurricane deck +window box +hang glider +bullfighting +piste +seahorse +hard cider +batik +common mullein +petite marmite +stuffed mushroom +tequila +ground ivy +fountain grass +stray +putter +buffer +comet +bomber +woodcarving +baseball glove +halter +garnish +selvage +megaphone +sea fan +rabbit hutch +very important person +analog watch +long-head coneflower +northern pike +roll-on +cigarette butt +terraced house +penknife +windshield wiper +cricket +straightener +snow pea +cockerel +canister +sour bread +recovery room +toilet bowl +tyrannosaur +big sister +quartz battery +television receiver +vitamin C +tailpipe +field thistle +stonechat +col +monstrance +gift wrapping +herbivore +quarter horse +ice-cream sundae +rumpus room +eyepatch +clary sage +French lavender +snorkel +choir +tent-fly +cat box +horse racing +high priest +barrel cactus +pin oak +wild thyme +keyboardist +raiser +hammock +hail +bungee +chocolate mousse +major +buzzard +gopher tortoise +Chablis +water meter +benthos +donna +blender +Mauser +avocet +rye +mulch +chancel +dusty miller +mate +corbel +minaret +frittata +French toast +mosaic +home brew +water faucet +beard +swivel chair +acropolis +largemouth +abbey +tabby +driver +copperhead +stirrup +Boston fern +Tennessee walker +artichoke +honor guard +chapatti +enchantress +sweat pants +electric organ +column +dry vermouth +range hood +Red Delicious +rape +splint +catapult +gourd +antipasto +plaza +carnation +star +wood anemone +English primrose +male fern +boot +atrium +Japanese deer +carnivore +yearling +doe +guelder rose +chicory +stretch pants +ice-cream cake +frogfish +tarpaulin +chicken soup +balaclava +tor +feverfew +three-hitter +flyweight +aqua vitae +locker room +wether +teacup +wide-angle lens +hook +ladder-back +osprey +awning +wedding +chest protector +pooch +rose mallow +orange daisy +fondant +envelope +duckling +blackberry +goosander +snorkeling +philatelist +broad bean +Frank +bok choy +basket +absinth +cayenne +blackbird +bottled water +trooper +timber +stable +chestnut +tomatillo +bell +banquet +rainbow trout +macrame +appointee +heart +chipmunk +purple clematis +safety bicycle +shuttle bus +Japanese black pine +lentil soup +downhill +field mustard +brass +hand-me-down +greater yellowlegs +fanny pack +croquet mallet +hip roof +duffel bag +Ritz +document +pie plant +staff member +lifeguard +white-throated sparrow +Cameroonian +hydrofoil +platter +common ageratum +middleweight +chairlift +brunch +pharmacist +lemon +driveshaft +green snake +lip +London plane +mangrove +crystal +siskin +common jasmine +hollandaise +villa +cross-country riding +mother-in-law's tongue +generator +Tanzanian +whisk +seeder +ashtray +griddle +evening bag +bluebird +bran muffin +square dancer +luggage compartment +tropical pitcher plant +autofocus +tape drive +silencer +Hawaiian guitar +swamp sparrow +Zimbabwean +drawing room +weekender +liparis +streambed +samosa +hitter +water heater +tidal basin +ossuary +dik-dik +camouflage +fiance +Jordanian +rolling pin +slingback +turret +hen +jennet +playpen +woodhewer +bushing +church bell +bear grass +double knit +tennis pro +Joe-Pye weed +pave +pochard +painted beauty +crinoline +gumbo +trestle table +schnitzel +balloon flower +Turkish coffee +extension cord +wireless local area network +sluice +umbel +microeconomist +sky +aisle +commander in chief +hydroplane racing +poll +Coca Cola +fuel injection +bird pepper +monkey puzzle +English muffin +riverbed +varietal +kachina +airport +saltwort +oolong +red-hot poker +mihrab +cocoa +jersey +Walkman +syndic +Hessian boot +millstone +carpenter +outfall +curbstone +mocha +field pansy +patriarch +slacks +switchblade +killdeer +whelk +pampas grass +racquetball +platform bed +Indian rhinoceros +Japanese iris +blacktop +dinner jacket +stud +jodhpurs +telephone pole +business district +kurta +basil +handset +file folder +gloriosa +orphan +cantle +cookie sheet +cafe au lait +drawbridge +hill myna +Western diamondback +watch case +cardcase +bowling alley +mattress cover +canvasback +pompadour +cornice +matador +cigar cutter +skunk cabbage +baptismal font +bitters +refectory +egg +parula warbler +tiger lily +field house +nanny +skin-diver +soda water +lymphocyte +carport +chocolate fudge +amphitheater +sugar candy +sea hare +open-face sandwich +dessert spoon +staple gun +envelope +worker bee +general +garment bag +maypop +autobahn +Atlantic puffin +polo shirt +Humvee +spice rack +grotto +banderillero +gaillardia +black-crowned night heron +oboist +weigela +Dictaphone +dwarf iris +marsh mallow +yarrow +eccentric +catsup +jade green +mistress +henbit +beachwear +head +commuter +strawberry tree +chickpea +clothespin +fleabane +brussels sprout +winter melon +Laconian +great horned owl +caricaturist +nan +flowerbed +triple sec +dairy +round of golf +cardinal +kauri +Zulu +Armagnac +cowberry +mouthpiece +wild calla +bling +puppeteer +beer drinker +adder +field sparrow +chocolate pudding +blacksmith +finback +Shetland pony +cheese fondue +panty girdle +soda can +electrolytic +florist's chrysanthemum +yellow jasmine +tudung +equalizer +ridge +dulcimer +grappa +barn swallow +coneflower +enamel +poached egg +halfback +yak +toby +Fleet Street +blue catfish +sand tiger +flying buttress +snaffle +stoop +first base +cultivated land +first lady +waratah +headquarters +arnica +lovebird +common morel +parasol +disk clutch +Xerox +vitamin P +vitamin B12 +long sleeve +certified public accountant +hot pants +pitch pine +pantie +drawers +cake mix +boar +grey +bride +false sago +bullion +coach house +bass guitar +Japanese banana +meadow clary +black belt +Canterbury bell +smallmouth +treadmill +great white heron +enchilada +rummer +captain +camisole +wild garlic +oak fern +ultramarine +peach +hawkweed +autostrada +adit +anaconda +artwork +skinhead +jello +hermit thrush +Bewick's swan +dress suit +trail bike +stubble +common polypody +Riesling +Easter lily +telegraph key +envelope +garlic bread +perianth +salad bar +steppe +club sandwich +nude +garden forget-me-not +Tuareg +flood +Statehouse +charcoal +boy scout +Rhone wine +parfait +spoor +lanyard +octagon +brown bread +quarterback +quilted bedspread +hookah +Pepsi +hamburger bun +entrepreneur +saddle oxford +snake's head fritillary +undies +chemise +skidder +chickpea +carnation +honey bun +mortar +Montrachet +automobile horn +skylight +gingham +rafter +pantile +climbing frame +scarlet runner +cable +cornstalk +mockingbird +raisin bread +chili sauce +hand calculator +concert-goer +detached house +coq au vin +lasso +hyssop +globe thistle +paper clip +slide +Jerusalem artichoke +tetrahedron +mock orange +lemon lily +finger +little sister +handcuff +horse wrangler +pavlova +oilcloth +snow-in-summer +common mugwort +greenshank +ice-cream cone +rubber boot +gunnysack +disk jockey +long trousers +sorghum +pontoon +calf +fire extinguisher +cotton thistle +pilot whale +ao dai +steamroller +wristwatch +tawny owl +city +country store +ironweed +kennel +bathrobe +rattan +drawer +fly tent +choline +musk thistle +courthouse +Yugoslav +bush +trawler +shellflower +jade vine +ragged orchid +pea soup +King Charles spaniel +hubcap +snook +paddy +bow and arrow +shovel +dill +cliff swallow +cadaver +hijab +masterpiece +fish geranium +kettle +sanitary napkin +carrot stick +Mountie +peanut brittle +dam +jackal +windowsill +butterfly orchid +bodice +picador +pale yellow +beanie +petiole +tenor saxophonist +bungalow +gnomon +stock saddle +field glass +rigging +wood grain +Speaker +settlement house +swamp milkweed +paper nautilus +tangerine +champagne +crescent roll +library +Schmidt telescope +stemless carline thistle +motorcyclist +alpine ash +planchet +water closet +casuist +hand luggage +hyssop +spaghetti and meatballs +cannelloni +cedar waxwing +water dog +brick red +linkage +sweep hand +purple heather +macaroni and cheese +butter knife +refreshment +malt +St. Augustine grass +wainscot +compass +gas heater +tamale +table saw +referee +borsch +projector +dracaena +peppermint +Reuben +Abyssinian banana +glassblower +floss +small stores +artilleryman +lapwing +ranch +garbage man +dwarf banana +commelina +currant +adulteress +landlocked salmon +pasqueflower +nan +tiger lily +Eritrean +rotunda +catsup bottle +mezzanine +royal fern +blended whiskey +bowler hat +mistletoe +manor +fusee drive +pistachio +dispensary +swamp +amputee +sculptor +schoolmaster +Chinese anise +dwarf iris +livestock +chronograph +nectarine +jockey +plaster +motel room +swamp azalea +hippeastrum +space station +duchess +catacomb +dovetail +cockscomb +common spotted orchid +brittlebush +cleats +cloche +hotchpotch +cabin car +prey +indigo +light beer +bear's breech +jonquil +analyzer +alyssum +spur gear +ice tea +honey buzzard +twayblade +dirndl +atlas moth +croquette +carafe +flyweight +professional basketball +multivitamin +air terminal +phial +roll-on +skunk cabbage +bird of paradise +rose +cooter +camping +divided highway +herbage +sweet vermouth +common comfrey +eggplant +office building +glutton +gefilte fish +bicycle rack +swamp birch +Venetian blind +Pernod +Norway spruce +portrait camera +bastion +vitamin Bc +Ugandan +Indian red +okapi +emu +vin ordinaire +chintz +shrimp cocktail +numbat +tall oat grass +cable car +stopcock +ham sandwich +Yemeni +stanhopea +plate +chicken broth +common yellowthroat +California poppy +radio +chocolate egg +mess jacket +tea table +physostegia +Japanese flowering cherry +confectionery +chicken cacciatore +painted nettle +popover +white rice +strapless +mohair +electrical cable +coil spring +arterial road +miniature fan palm +spectator pump +pesto +interlocutor +eastern kingbird +dongle +vitamin B6 +stuffed tomato +cough drop +okra +black +barbecue +burial mound +firstborn +corn snake +amberjack +bollard +horn +Black African +elbow pad +Camembert +circle +Japanese apricot +hearing aid +rock star +creature +taster +bubble gum +scull +lemon balm +chaetodon +anemometer +brake drum +fuselage +courthouse +aqualung +yellow adder's tongue +reception desk +guy +buffalo wing +ginger beer +robin +pantothenic acid +marsh hawk +yellow journalism +exhaust +cardamom +Tabasco +ax handle +patriarch +floor +pine snake +spoiler +hood +sphagnum +parrotfish +orphanage +redpoll +beef Wellington +white spruce +cherry plum +scapular +field lens +broomstick +mouser +wood thrush +Nebraskan +hotelier +milk thistle +soya milk +Munich beer +boucle +snowy egret +dust storm +steward +kudzu +oriental poppy +presbytery +burro +orange soda +stonecrop +splashboard +menagerie +dormer +wire cutter +yellow bells +Dubliner +shore pine +cousin +racing gig +Morgan +gold plate +villager +snifter +granny's bonnets +egg roll +Spode +amabilis fir +babbler +pestle +heliopsis +halter +black spruce +President of the United States +ski slope +chocolate fondue +lockstitch +motel +Epipactis helleborine +tabbouleh +Yorkshire pudding +overpass +Timorese +presbyter +tablefork +bottle gourd +tiara +vintage +pilgrim +reindeer moss +shower stall +towel rack +kachina +chef's salad +breeder +cow parsnip +walker +Black woman +Irish coffee +portrait lens +lateen +gilt +successor +cargo container +Lithuanian +mayapple +paisley +highchair +strawberry jam +flying fox +field scabious +blue-eyed grass +screw +Frisbee +dressing room +cholla +walkie-talkie +red currant +centrifugal pump +smorgasbord +hot rod +marcher +rowanberry +welwitschia +amphitheater +pew +concert band +bosom +pillbox +seagrass +openwork +meadow goldenrod +shower +chicken sandwich +Boston ivy +plastron +oilfield +stuffed tomato +juniper berries +frame +Spanish mackerel +family room +powder horn +fight +maguey +bunker +work-shirt +air filter +nosh +sugar bowl +foothill +reliquary +tugboat +horsebox +grater +palace +board member +campsite +halibut +geneva +ginger ale +high commissioner +genet +bodywork +spaghetti +protractor +pipe cutter +wood anemone +turkey cock +surge suppressor +green turtle +spoiler +bedsitting room +television room +ballot box +shasta daisy +impeller +capote +bitter +California wine +lock +spinnaker +gill fungus +baby's breath +nut and bolt +moonflower +houseboat +distributor cap +coffee bean +gusset +bowling ball +knitwear +frieze +mistflower +roadster +cue +circuitry +brake +butt hinge +Chickasaw +leopard frog +wing tip +puree +mantel +pantheon +grandfather clock +cockchafer +pomegranate +cleaners +eyeshadow +Oregon cedar +rock hopper +hawksbill turtle +agriculturist +yellow-crowned night heron +Albanian +pumpkin seed +chateau +goggles +camper trailer +bracket fungus +cigarette case +signal box +saddle blanket +poison ivy +set gun +cattleya +dry fly +concert hall +personal digital assistant +talcum +deodorant +common starling +painted turtle +kea +plenipotentiary +pantyhose +masjid +buskin +hurdle +cocktail lounge +belting +sour dock +knife blade +sugar snap pea +paddle +dickeybird +brace +keep +call center +yacht +lead pencil +tumbler +production line +tetra +private +French window +express +ski boot +pinto +broad bean +American crow +screech owl +snapper +power cord +Manx +rambutan +sun deck +stonefish +golden eagle +national monument +readout +cork oak +hacksaw +beer can +bathe +tussock bellflower +wet suit +mihrab +big game +highlighter +sprocket +measuring worm +grapefruit +samovar +distributor point +steak knife +incubator +loon +temporary hookup +hippodrome +hot spring +spacesuit +flea market +clay pigeon +catbird +earmuff +tetherball +yellowfin +cellophane +lanolin +clapperboard +velveteen +police dog +cashew +sequencer +mango +duplex house +bazaar +Golden Delicious +red carpet +collet +kickstand +broadloom +diskette +tank engine +compact +diesel-electric locomotive +whale shark +water moccasin +mountain avens +tropic bird +ginkgo +ski cap +fixative +glockenspiel +chopine +ethernet +herring gull +skeleton key +finger paint +conference table +great crested grebe +harbor +white-crowned sparrow +Bullock's oriole +guestroom +boutique +cable television +roulette wheel +Luger +Latin American +trumpeter +blindfold +baby +freshwater bass +home plate +bonefish +giant sunflower +giant tortoise +planking +pigeon hawk +oceanfront +door +bazaar +common wasp +conformation +kick starter +kid glove +corydalis +shuttlecock +writing desk +ivory gull +shirttail +diving suit +weka +downy birch +altar +wild sage +tufted puffin +cabinet +Orpington +cineraria +bottom +dial +coracle +resort hotel +soap dish +spotted owl +billiard room +ghetto blaster +red-breasted nuthatch +hatchling +chalet +bracteole +crusher +mixer +net melon +farmhouse +Dutch oven +transept +penlight +palmyra +stewing pan +solar cell +crochet needle +black-winged stilt +germander speedwell +crinkleroot +truncheon +bunchberry +hatchback +sounding board +mixing faucet +chess master +bisque +Brie +Sitka spruce +pawn +Mexican-American +space rocket +choreographer +collared peccary +duffel +nacho +patchcord +carpet snake +omnivore +watering can +hall of residence +streamer fly +sunroof +great grandson +oil refinery +billiard player +ivy geranium +key palm +pinwheel +yellow-shafted flicker +purple onion +soldering iron +condominium +fishing gear +heat pump +marine iguana +cuckoo clock +Bletilla striata +headrest +spotted salamander +field hockey ball +pound +carboy +vertical stabilizer +groundsheet +cinnamon bread +acorn squash +sheathing +lakefront +Jeffrey pine +synthesizer +olive +apple +pannier +ponderosa +Jew's-ear +latch +equatorial +metasequoia +permit +bloomers +town hall +fava bean +casino +bier +jampot +common snapping turtle +clary sage +oatmeal +Dutchman's breeches +massif +Guyanese +heifer +handball +sweat suit +pomelo +Iceland moss +customhouse +sandbag +archer +gyrfalcon +sword cane +marmite +whole snipe +blue crab +sugar spoon +brownstone +chicken wire +lizardfish +dump truck +chicken yard +chamois +electric +idle pulley +jujube +wrestling mat +aoudad +Burmese cat +water shamrock +dormitory +Unknown Soldier +hearse +bumper +clipper +desert pea +critter +semitrailer +backboard +common St John's wort +Atlantic manta +song thrush +jukebox +quoin +eastern chipmunk +copper beech +paintball gun +bull +package store +fraise +royal poinciana +niqab +traction engine +objective +day nursery +ski lodge +orphan +summer house +cereal box +router +sleuth +jodhpur +polyp +croquet +sport kite +green onion +tulle +etagere +tussock caterpillar +rest house +elderberry +bridal wreath +Torrey pine +silver wattle +kidney bean +pentode +laelia +Allen wrench +sporran +red drum +tricot +heterodyne receiver +magazine rack +stone curlew +trawler +suckling +niblick +sandwich plate +double door +Togolese +pitching wedge +desert tortoise +cloth cap +date palm +webbing +jumper +frogmouth +copperhead +covered couch +black mallee +riser +scraper +gauntlet +pantheon +food court +muntjac +grocery bag +bread-bin +transmission shaft +primigravida +window seat +crab apple +seat +Fresnel lens +dendrobium +hatchback +little theater +butter dish +back porch +umbrella tree +carrot +seventy-eight +coconut +music stool +Tesla coil +bay willow +American basswood +sabot +wheel and axle +gazette +lute +bassinet +hart +mecca +breadbasket +silverfish +handball +Scotch pine +box camera +stately home +Hereford +tread +single-breasted jacket +desk phone +deodar +professional boxing +fly casting +box wrench +black oak +martello tower +red campion +bullock +sweet William +bay leaf +dollhouse +flounder +fox hunting +beanbag +king mackerel +rouge +film advance +common mallow +parasitic jaeger +satellite receiver +nurse shark +chesterfield +tomatillo +plimsoll +hatbox +bloomer +foul-weather gear +longleaf pine +horse mackerel +tree lizard +bark +belfry +Treasury +perch +purple finch +stag beetle +fragrant orchid +tachymeter +tadpole +cookie jar +knee piece +agueweed +bones +chick +golf glove +toothpick +taboret +rotor blade +field artillery +purple willow +redhead +spark plug +guava +voice mail +cross +butterfly valve +star magnolia +olive +room light +Australian turtledove +embassy +Iraqi +singles +nestling +spinning rod +radial engine +rowan +sandbox +boss +moccasin flower +veneer +mint +American chestnut +white whale +CPU board +florist +press box +hurricane lamp +giant kangaroo +greater whitethroat +winter jasmine +blue +department store +southern red oak +saber saw +corn muffin +bellbottom trousers +toaster oven +red eft +condominium +galago +sunbather +redpoll +common European earwig +songbird +linnet +light meter +bracer +tepee +gumbo +water glass +roofing +spathiphyllum +shofar +sand lizard +washroom +Brussels carpet +brachyuran +home room +floatplane +knee brace +solar heater +felucca +gas ring +maguey +manse +blue columbine +cuppa +cigar band +male orchis +mudskipper +couscous +Chinese parasol tree +dude ranch +banyan +gopher snake +sundrops +aviary +African daisy +missel thrush +Photostat +stone pine +circus tent +tangle +printer cable +grease-gun +rose chafer +light pen +plantain +hearth +bullfinch +post oak +slow loris +Newtonian telescope +head +punt +spindle +New England aster +spotted sandpiper +pond pine +grass skirt +bug +black rat snake +tabasco +bull shark +tennis camp +scrambler +popinjay +bing cherry +ministry +cash register +redheaded woodpecker +kameez +farmer's market +roan +harpy +European toad +pizzeria +camshaft +hemp nettle +chicken coop +cottage pink +daybed +observatory +airdock +mountain devil +newsstand +kingfish +snow gum +jackdaw +lacquerware +peeler +miro +sister ship +damask rose +pack +snowshoe +Liberian +paramecium +tidytips +professional tennis +bookend +wood swallow +cayuse +cranberry +rock squirrel +steak au poivre +soul patch +female mammal +sash fastener +songwriter +oxeye daisy +apse +floor joist +hand towel +wheatear +cero +soul mate +golden fig +bus stop +psycholinguist +convenience store +manor hall +mountain sandwort +Euopean hoopoe +haricot vert +mausoleum +violist +flashlight battery +chard +fixer-upper +bank martin +testudo +diving duck +kohlrabi +Omani +sphygmomanometer +greyhound racing +chestnut +rattlesnake plantain +chaffinch +wolf pup +teakettle +cairn +souk +resident commissioner +chuckwalla +gaiter +capercaillie +liver chestnut +bean sprout +land line +ambassador +green pepper +common chickweed +Sharpie +Oriental arborvitae +oncidium +pallone +currawong +sweet alyssum +fire tower +eyebrow pencil +redfish +apricot +clementine +blucher +wigwam +pangolin +buggy +common oak +jumbojet +laser +cigarette holder +racquetball +georgette +cleft +scouring pad +drum printer +pond scum +American red squirrel +caranday +swamp willow +blindworm +brook trout +defense system +nyala +three-way calling +mizzen +shuttle +African lily +Oregon white oak +rain tree +fuel gauge +oriental cherry +wahoo +pear +jungle gym +bass fiddle +outrigger +angelfish +Old World coot +lime +battlement +yarmulke +herpes varicella zoster +burp gun +Alpine glacier +stun gun +pilot boat +Southern crab apple +bushtit +pullet +polo pony +jackfruit +raw vegetable +French marigold +golden shower tree +spike lavender +wahoo +brass knucks +cabbage palm +diesel-hydraulic locomotive +red jungle fowl +prairie sunflower +rye +loofa +icecap +shade tree +secretary bird +saffron +cos +muskrat +videodisk +Carolina wren +candy bar +Bohemian waxwing +flowering almond +cold frame +raglan +pine siskin +quince +western red cedar +red maple +adobe +agora +kumquat +tenement +bantam +bayberry +water jump +great granddaughter +snips +porcupinefish +brochette +love-in-a-mist +Iceland poppy +common sage +pace car +camel racing +slipcover +nopal +shoehorn +calypso +rhea +in-basket +maple syrup +cold chisel +Pacific ridley +dietary +aperture +lapin +rock hyrax +house wren +litchi +ragged robin +control center +shoebox +arabesque +eider +silver birch +bantamweight +ax head +softball +blue gum +Bechtel crab +tomato sauce +green douglas fir +sweet gum +macaroni salad +red phalarope +budgerigar +Bedford cord +Uzi +green woodpecker +ohmmeter +bacon-lettuce-tomato sandwich +hackney +Easter egg +motmot +red pine +opium poppy +gat +pussy willow +greater scaup +ocelot +persimmon +western hemlock +carambola +pinion +Malcolm stock +bobsled +larkspur +wood drake +pinetum +red gum +draft beer +funnel +terrarium +Pinot blanc +doodlebug +brittle star +salsa +cantaloup +pollack +stockpot +eastern hemlock +rock wren +burqa +squash +aircraft engine +billy +flamingo flower +odontoglossum +old squaw +redstart +sheepskin coat +mate +flathead catfish +gentianella +bilberry +bog rein orchid +incense cedar +mew +Colorado spruce +cob +portmanteau +grenadine +common ginger +masdevallia +compound microscope +sobralia +white fungus +guppy +chapterhouse +honey +green frog +sea swallow +African marigold +astrolabe +verdigris +yellowhammer +carrot juice +oxlip +medicine ball +highboy +grass frog +gamebag +surgery +mincer +mulloway +cactus wren +box office +resonator +table-mountain pine +European curlew +supernova +cabbageworm +peach +plane seat +asp +Yquem +tomato hornworm +rook +quadruped +chador +micrometer +dabchick +Afro-wig +balsam fir +bucket seat +sage green +macon +blue poppy +chinquapin oak +black pine +spinach +chrysalis +carnauba +tee +bearberry +shirt button +tree of heaven +southern white cedar +covered wagon +brood hen +spadix +European catfish +winter wren +bulldog clip +carpetbag +study hall +chino +simian +closeup lens +cookie cutter +grapefruit +mandola +sassaby +Allegheny plum +piaffe +scorpion fly +booby +draft animal +field tent +cumin +laurel oak +smooth-leaved elm +American arborvitae +American toad +grinding wheel +mountain ash +cuttlefish +pipistrelle +parer +safety rail +Clark's nutcracker +side-blotched lizard +giant hornet +wicket +dugout +electric toothbrush +dhow +common four-o'clock +long-eared owl +anchor +near beer +tansy +creme caramel +guided missile frigate +shelduck +durian +compact +iron tree +shiitake +polo +camouflage +pedal pusher +salon +tangerine +lacebark +Swiss mountain pine +goalpost +poolroom +space capsule +wild cherry +dress hat +wave +raglan sleeve +cassia +Jerusalem artichoke +cabbage palmetto +marsh harrier +American redstart +sea squirt +cliff diving +sparrow hawk +watch cap +frankfurter bun +police boat +flash camera +neem +eastern meadowlark +Italian cypress +orb-weaving spider +graniteware +sewing basket +latex paint +rock dove +stator +leaf lettuce +roulette +broadcloth +Spork +panicle +sternwheeler +cider vinegar +brown creeper +cowfish +closed gentian +chickpea +port +pimento +sheeting +matilija poppy +hawk owl +guava +papaya +huisache +European shrike +racing skiff +yellow warbler +gumbo-limbo +North Carolinian +staysail +court +iced coffee +money belt +shaver +Psychopsis papilio +sumo ring +refection +kingfish +clock pendulum +greater butterfly orchid +disk harrow +tawny eagle +polyphemus moth +pieplant +Nicaraguan +bocce ball +California box elder +porbeagle +crown of thorns +Mexican sunflower +fennel +stream orchid +slip ring +white fir +fold +moss campion +fairy ring +hose +pony-trekking +western larch +meadow pipit +Cape May warbler +longan +bookmobile +junk shop +lemon shark +smelling bottle +solan +widow +sea pen +universal joint +day game +goldcrest +maiden pink +biographer +rotunda +oriel +arranger +gambrel +Angora +fen orchid +leading rein +Wilson's snipe +European nuthatch +natterjack +athletic supporter +mouflon +emergency room +swallow-tailed coat +western meadowlark +feather star +Navy SEAL +toilet bag +loquat +lesser butterfly orchid +thumbhole +breathalyzer +featherweight +collards +mayfly +confessional +mountain ebony +redwing +Norway maple +refractometer +stagecoach +gasoline gauge +octopus +baker +Rhode Island red +European tortoise +cardiologist +Punjabi +Arkansas kingbird +tamarind +drum brake +flash +yellowtail +stokes' aster +emperor +free house +sour gum +ruddy duck +hamadryad +command module +tinamou +Norway lobster +washstand +European hornbeam +roaster +black-necked grebe +tallgrass +leopard lizard +anastigmat +Blackburn +deutzia +ground rattler +Christmas fern +wild pink +sesame seed +carrycot +Italian parsley +nectar +roll-on roll-off +true laurel +anisette +candy corn +flowering maple +revers +dun +tobacco hornworm +common sunflower +common grape hyacinth +cardiograph +electric meter +herb Paris +goalmouth +spruce grouse +canopy +wind poppy +stemma +gateleg table +lumper +speckled rattlesnake +gudgeon +rough-legged hawk +internal drive +pomelo +piece de resistance +storm door +clementine +Japanese pink +settler +yellow jacket +Fraser fir +royal palm +cicada killer +cayenne +guava +bluewing +red baneberry +lesser yellowlegs +cache +bog rose +sparring partner +ski jumping +sherry +glacier lily +beer mat +shredder +American widgeon +protectionist +green olive +black-tailed deer +Alpine fir +dispatch case +whipping cream +African daisy +cantilever bridge +maraschino +rhea +ink bottle +dacha +hagberry tree +lesser rorqual +orchard oriole +candidate +cuticle +breadfruit +fishbowl +giant puffball +closed gentian +Joshua tree +tie rod +beard lichen +flame tree +stegosaur +acerola +Swan River daisy +common murre +flowering almond +protegee +loggerhead shrike +Wilson's warbler +Japanese honeysuckle +basilisk +skimmer +hybrid tuberous begonia +pumpkin ash +chafing dish +collared lizard +iced-tea spoon +scrubbird +Iceland poppy +grey kingbird +wallflower +slick +diesel +Swiss pine +ethernet cable +ketch +lightship +black cherry +swordtail +Monterey cypress +lightweight +Floridian +Sabine +stall +contact +viola da gamba +hemstitch +upland sandpiper +box spring +sassafras +radome +lesser scaup +bluefin +yellow-bellied sapsucker +armored car +cabin class +Moorish arch +webcam +aquavit +overall +sergeant major +soft shield fern +gin and it +bobolink +subcompact +falconer +black morel +roadrunner +lab bench +thong +coffee urn +weeping beech +caladenia +southern live oak +scanner +wine vinegar +common speedwell +European roller +fuji +snag +piping plover +concertina +secateurs +meat thermometer +supercomputer +funnel +dais +western fence lizard +spruce pine +pommel horse +Cassegrainian telescope +pitta +India-rubber tree +mangosteen +tamp +aposematic coloration +dustcloth +birth +Atlas cedar +reed bunting +jabiru +sainfoin +press photographer +golden oriole +laryngoscope +thermal printer +winder +doubles +cricket ball +dabbling duck +tonic +Buddhist +Morris chair +swatter +quaking aspen +ancient pine +American larch +evaporative cooler +click beetle +yellow-breasted chat +souchong +bluegill +pied-billed grebe +tricorn +spring beauty +southern magnolia +rowel +chili +hard roll +flathead +satsuma +gangplank +bourguignon +cockfighting +greenwing +plum tomato +fly orchid +gnatcatcher +spotted eagle ray +ovenbird +brassavola +mocha +candy cane +afterburner +thriftshop +study +winter crookneck +grinder +muskellunge +sacred ibis +inverter +sandwort +deer fern +stair-carpet +Cotes de Provence +ovenbird +rex begonia +American woodcock +poison ash +lowland fir +pawpaw +loblolly pine +kinkajou +European hackberry +pest +coralwood +Bedouin +acetate rayon +snuffbox +radiator cap +basket oak +table-tennis racquet +smew +midge +telescopic sight +radish +great burdock +separate +damask violet +broadbill +bourbon +blacktip shark +gift shop +khimar +date +woodland caribou +policeman bird +grey birch +American elm +strawflower +officiant +hart's-tongue +straight razor +Spanish elm +radicchio +white croaker +vicuna +soft-shell clam +flannel +adonis +bonito +kittiwake +English walnut +soldierfish +hipflask +spotted crake +Streptopelia turtur +American maidenhair fern +corn cockle +telephone cord +canopy +playback +diocesan +marsh orchid +manakin +purple grackle +cob +fishmonger +otoscope +vermillion flycatcher +inhaler +instar +licentiate +myrtle warbler +goat herder +benthos +toggle +drumhead +piranha +doorplate +vault +triptych +red-necked grebe +transporter +vernier caliper +flathead +Portuguese man-of-war +countrywoman +vacation home +Bactrian camel +night-light +module +lemon curd +carancha +painted daisy +bok choy +ratatouille +troll +escarpment +cinnabar +computerized axial tomography scanner +lychgate +sowbread +bedside +guided missile cruiser +reel +cleat +hemostat +blue shark +Seven Wonders of the Ancient World +motorized wheelchair +pillow block +horned puffin +prickly pear +electric range +mother's daughter +vein +Oregon maple +bird dog +faceplate +wren warbler +feather reed grass +common alder +Adam's needle +straitjacket +organ-grinder +gantry +bikini pants +peristyle +herpes +terry +toad lily +celandine +red-breasted sapsucker +bragger +green peafowl +fuschia +quoits +house martin +dome +herpes simplex 1 +touraco +meeting house +vacuum gauge +cat's-ear +crisphead lettuce +carpet moth +European rabbit +puff adder +Old World scops owl +fire pink +fruit punch +ant bear +black walnut +stroboscope +white mangrove +pine grosbeak +cast +check-in +ring-necked parakeet +matai +shingle oak +fieldwork +rue anemone +landing net +ouzo +herringbone +lyceum +hydrogen bomb +mullein pink +masher +evening grosbeak +water vole +livingstone daisy +tomatillo +cavalier hat +interphone +wild lupine +goosefish +sugar maple +plantain +white dead nettle +Monterey pine +bugle +veloute +marsh gentian +Bermuda buttercup +alehouse +Peter Pan +thong +LP +tulip tree +scanner +scarlet tanager +music hall +angel shark +pecan +eight ball +rosy boa +outboard motorboat +garage +fanlight +black cottonwood +notornis +mountain fern +lunar crater +reddish orange +whitetip shark +executant +European ladies' tresses +washboard +revolving door +case knife +balloonfish +greater kudu +tarpan +cog +wet fly +Irish soda bread +basement +broken arch +canopic jar +muscat +kazoo +bobsledding +loaner +black guillemot +English saddle +garlic mustard +Foucault pendulum +mulberry +clotted cream +dove's foot geranium +Atlantic ridley +convector +ground floor +European wildcat +poinsettia +hideaway +great barracuda +black beech +bushy aster +cornflower +tam +true slime mold +carving knife +holly fern +railroad tunnel +crimson clover +disposal +etamine +suspension +plasmodium +political scientist +minnow +Spanish rice +twist bit +subway train +Scleroderma citrinum +saw palmetto +console +gimlet +hand pump +waratah +rock rattlesnake +keel +server +curlew sandpiper +hone +sable antelope +inkle +photostat +foresail +sallet +tiger salamander +chutney +onlooker +Exmoor +tiramisu +drawing room +battery +sour orange +juniper berry +beeper +funeral home +fescue +Maksutov telescope +ranch house +jai alai +carob +socket +popcorn +sandbar shark +pipal +summer tanager +oast +skipjack +rolling stock +dropper +great snipe +turnip greens +cowpea +honeycomb +ichneumon fly +maternity hospital +harp seal +nylon +bomb shelter +horse tick +litchi +camel's hair +mimosa +bur oak +anvil +belay +pinhead +continental breakfast +burglar alarm +Mojave rattlesnake +auxiliary storage +lightwood +ratepayer +cecropia +retractor +quadrate +pepper tree +Venus' slipper +abattoir +strawflower +firewater +purple saxifrage +black rat +pack +pepper pot +mayweed +winger +whitetip shark +great yellow gentian +snowdrop anemone +garden angelica +soy sauce +white poplar +inkwell +crouton +gas gun +honey locust +house of cards +ice maker +moquette +arrack +casualty +butterfly orchid +eau de vie +mosquitofish +prairie smoke +haft +horseshoe +steel +peach orchard +Mexican hat +encaustic +shoe +pennywhistle +sweet woodruff +hull +doorsill +globe amaranth +day school +housedog +crown princess +oxbow +maxi +positron emission tomography scanner +compere +European turkey oak +peanut +sentry box +house physician +hot line +loquat +rove beetle +riband +flowering fern +fan vaulting +ceibo +bongo +bat boy +omelet pan +European ash +breadwinner +gaff topsail +clerestory +bushbuck +bluethroat +khukuri +Father +portcullis +candy egg +brake lining +lawn furniture +buckskin +garden pea +Brazilian rosewood +Italian bread +horn poppy +silk tree +Christmasberry +hotel-casino +poplin +false lupine +desert sunflower +mimeograph +alpenstock +cork tree +cultivar +common mosquito +pollard +black marlin +understudy +lancet window +college +breadfruit +Herero +Labourite +bar printer +squaw grass +stelis +firing chamber +sycamore +artificial horizon +radiologist +pansy orchid +bicycle pump +wraparound +bell gable +home computer +orchard grass +carving fork +bergamot +honeycreeper +sewing room +radiator +core +brown bat +goose grass +adjutant general +Erlenmeyer flask +massasauga +tail rotor +cardinal tetra +Drambuie +wine palm +Sarcoscypha coccinea +shantung +Calvados +garganey +vicar +house mouse +creeping oxalis +digital subscriber line +cedar elm +backgammon board +blackberry-lily +pallid bat +New Zealander +Barbadian +rose geranium +European spider crab +gharry +electric hammer +mustard +Chinese lantern +laundry cart +filament +mozzarella +gooseberry +sukiyaki +porkpie +culvert +altazimuth +plum pudding +serin +Spanish dagger +Asian crocodile +crevalle jack +mascara +pig bed +alderman +northern shrike +Sufi +purple-fringed orchid +derringer +linseed +hockey skate +bell jar +Japanese wistaria +mantled ground squirrel +western toad +lieutenant commander +mechanical piano +ovoid +paddlefish +demijohn +coast live oak +brick +gearset +tailstock +phonograph needle +winery +tuberose +mother's boy +shot tower +crucian carp +carpet pad +lamb's-quarter +Menorah +common white dogwood +hypanthium +rosebay +wild medlar +soil horizon +sweet orange +bitterroot +hand glass +cloisonne +towpath +gum ball +margay +carambola +bolt cutter +charger +vibraphone +gueridon +elephant tree +wood-frog +ash grey +duffel coat +third base +chunga +glebe house +lake trout +encephalartos +Japanese oak +northern red oak +pruner +blue orchid +Biloxi +western wood pewee +corselet +alabaster +anechoic chamber +grass pink +wax begonia +blue daisy +pennyroyal +Asian tiger mosquito +cheese souffle +flat bench +caramel +sump pump +bush violet +common fennel +corner +skullcap +asparagus fern +white mangrove +calceolaria +sateen +saltbox +hollowware +head nurse +coal miner +mountain lily +tufted vetch +European perch +line officer +steamer +stickball +shin guard +cauliflower +Monegasque +hatpin +wolffish +trackball +khaki +arthrogram +rocket larkspur +naval commander +Gemini +ski binding +department head +Chenin blanc +wingstem +knothole +aerides +sweet bay +tautog +gangway +waterspout +Hudsonian godwit +armyworm +incinerator +kidney vetch +pine nut +cypress vine +hip tile +sorrel tree +relay +bench press +Kentucky coffee tree +dobson +sapling +false lily of the valley +veld +phaius +vitamin B2 +beaker +wall tent +sieva bean +dusty miller +sewing kit +cavalry horse +diaper +butterfly pea +Spam +saddlebill +pearly everlasting +kowhai +Sister +moneywort +organdy +pine marten +bareboat +hot-water bottle +baby blue-eyes +silver lime +common cotton grass +malmsey +blue pea +baggage car +pineapple +folding saw +cotton rose +brawler +black duck +Weizenbock +pool player +Gujarati +wild duck +purple sage +sage grouse +mail train +arm guard +short-spurred fragrant orchid +queen +eparchy +spring peeper +ortolan +shoulder +fighter pilot +American beech +snowcap +novitiate +roller +butcherbird +canyon oak +brompton stock +firebrick +rudder +light cream +Primus stove +nonsmoker +probationer +harp +kosher +surcoat +videotape +zebu +first class +yam +car +rissole +miso +funambulism +attic +curling iron +shutter +encolure +split-pea soup +yellow rocket +gas oven +ultracentrifuge +chamomile +canteen +eyeliner +yellow squash +Irish stew +collar +doublet +machinist +septic tank +snap bean +Polyporus squamosus +western tanager +creeping St John's wort +back +sinkhole +perforation +Romanian +epergne +fez +comfrey +sidecar +beach pea +screen door +instigator +plughole +woodbine +pigweed +hip pocket +common scoter +squeegee +Surinam cherry +porringer +body stocking +eatage +shallot +enlarger +common canary +trophy case +gun case +plow horse +hot plate +pearl oyster +margarita +madras +backspace key +pigeon guillemot +pajama +buckthorn berry +homestead +bedbug +Linotype +trundle bed +granadilla +theremin +chin rest +bouillabaisse +tumble-dryer +truffle +cassava +kurrajong +gyroscope +European silver fir +C-clamp +politician +green soybean +exponent +flame tree +scissortail +achimenes +crown daisy +soft tree fern +spaghetti squash +pale violet +beaver +dashiki +washboard +driving wheel +sack +foulard +sputnik +boatbill +English elm +sack coat +grog +golliwog +Malayan tapir +May wine +calash +stile +windjammer +American sycamore +rotor head +fast food +balata +dragonet +Emmenthal +metronome +negative +meadow saxifrage +rabbit ears +chenille +round +hobby +crankshaft +Wilson's phalarope +Murphy bed +soil pipe +forecourt +policyholder +tarmacadam +loyalist +gyro +Queen's crape myrtle +shortcake +apple butter +pumpkinseed +heronry +yellow perch +baggage claim +escarpment +diaphragm +mescal bean +shunter +flax +columbarium +Joe-Pye weed +Neandertal man +casement +hole-in-the-wall +Verdicchio +futurist +eaglet +tassel hyacinth +pup tent +fawn lily +cabbage palm +pogonia +hospital ship +water mill +Oregon grape +lentil +grindstone +banana split +inkberry +coonskin cap +bazooka +wrap +anise hyssop +Java sparrow +red-eyed vireo +common opossum +clintonia +bustle +booster +tribesman +soy +panhandle +jaboticaba +locking pliers +Sauvignon grape +ghat +screw +oximeter +white croaker +saucepot +eggbeater +reticule +cabbage bark +looking-glass plant +head gasket +California sycamore +cowbell +Aleuria aurantia +Herr +lever +spider orchid +cashew +shift key +solar house +wood chisel +white +mantilla +stamp +bolero +rear admiral +garden rake +Lao +crowbar +lapdog +buttermilk biscuit +yellow bedstraw +pickerel frog +dowel +serjeant-at-law +mill-hand +lambrequin +state treasurer +red silk-cotton tree +coiffeur +star anise +shoulder pad +marshal +sitar player +gown +ground cedar +hedge maple +caddie +pitahaya +corn marigold +stick cinnamon +woodland star +Eurasian green toad +anti +blueweed +medicinal leech +gaur +chocolate kiss +kit fox +mother +butte +audio CD +blast furnace +vitamin D +nutgrass +cornice +black sheep +hearing aid +lingonberry +quad +lentil +riding crop +pratincole +pentagon +sea lavender +nerita +flatmate +catboat +water clover +angiopteris +mushy peas +crown imperial +music school +woodshed +platy +Turk's-cap +rundle +reading teacher +hardtack +balloon sail +oriental spruce +bluefish +white mulberry +horned violet +satin bowerbird +treasure flower +sustaining pedal +mimosa +spurge nettle +sea green +hasp +lederhosen +pink cockatoo +long johns +basket weave +freewheel +thrust bearing +timber tree +orphan +falafel +common camas +bird of passage +bird's foot trefoil +electric eel +fizz +grape arbor +serape +brace +hazelnut +kylix +horse mackerel +cassia bark +lizard orchid +spat +Brown Swiss +pocket flap +pillory +purplish blue +rolling mill +tappet +broccoli rabe +semi-detached house +mushroom coral +fly orchid +nougat bar +ball hawk +sand wedge +shirred egg +black locust +strip lighting +drop scone +brush turkey +ball +tragopan +dallisgrass +tuatara +great knapweed +potentiometer +Kiliwa +Pacific bottlenose dolphin +accelerator +Darwin tulip +osteopath +Arizona cypress +manna ash +butterbur +cornelian cherry +American holly +nopal +tanker +foreshore +ditty bag +gas lamp +safety razor +chanter +fomite +chip +striped killifish +catalytic converter +plaice +dusty miller +takin +gerenuk +corn chamomile +Japanese pagoda tree +boneset +common osier +Guinean +taro +plotter +celandine poppy +churn +steenbok +edible mussel +sensitive fern +triode +black raspberry +zoo keeper +feather ball +dredger +starlet +cornpone +coat button +rosinweed +toy Manchester +crested cariama +finger food +basilisk +shotgun shell +comfort food +mountain hemlock +candytuft +Stilton +record changer +anklet +ball valve +Mediterranean snapdragon +BVD +sand cat +Galloway +nutmeg +water-mint +woodwaxen +citron +ark shell +federalist +drone +cheekpiece +hyperbaric chamber +addax +field-emission microscope +synchronous converter +men's room +medlar +electronic fetal monitor +Sazerac +false indigo +roof +passe-partout +meadow spittlebug +Phytophthora infestans +oast house +hedge nettle +voting booth +slender salamander +telephone jack +true bug +scouring rush +Scotch egg +matchbook +aperea +cytomegalovirus +garlic press +cove +whitebark pine +Slovene +narrow wale +mother's milk +Audubon's warbler +prickly poppy +cowl +tailorbird +mud brick +bamboo palm +welt +Afghan +Virginia spring beauty +dinner bell +night jasmine +fly rod +microtome +aerie +carinate +picker +brick trowel +loving cup +swathe +green mayonnaise +rivet +bandbox +newsroom +tea tortrix +bobby +gig +hush puppy +garlic chive +piston rod +aspidistra +bluejack oak +harvest-lice +strap hinge +sour mash +macadamia nut +histiocyte +fan belt +shelf bracket +abelia +Hottentot fig +fish chowder +abettor +compote +beige +dioon +hop +haymaker +oilskin +magnetometer +tool bag +tambour +call girl +gringo +fairy light +broad-leaved plantain +second base +zebra mussel +Japanese cedar +pistia +swamp chestnut oak +cashmere +double cream +samisen +lamb curry +companion +kapok +julep +sweet woodruff +gardener +jewfish +inspector general +collembolan +wheel bug +bass +scrubland +wryneck +macrozamia +trouser press +clove +tiger cowrie +yawl +collard +dildo +pony cart +ormer +annual +tessera +chancellery +two-toed sloth +queen +old lady +wringer +spritzer +baggage +black mangrove +black-eyed Susan +semifinalist +highlighter +alfalfa +Easter daisy +escapement +operating table +neutral spirits +bursar +roble +entablature +girl wonder +farm boy +ring ouzel +permanent press +auklet +beefsteak tomato +gaming table +tea bag +manul +giant bamboo +Ozark chinkapin +matzo +furrow +smoothhound +CD-ROM drive +powdery mildew +copilot +garden +American merganser +bunsen burner +Asian longhorned beetle +lead tree +creeping buttercup +Percheron +back brace +axseed +cub +soul food +rabbi +edelweiss +mineshaft +fox grape +sandwort +torque wrench +leisure wear +Mae West +broccoli +loach +maraschino +heavy cream +silkworm +cirque +vintner +whitewash +butterfly pea +two-toed sloth +midiron +ceriman +Bulgarian +operating microscope +sambuca +California fuchsia +silver maple +tangelo +black bean +lugsail +starting gate +leek +sunflower seed +fish fry +clinker +synagogue +coscoroba +brae +uphill +common limpet +golden plover +cedar of Lebanon +amphibian +Canary wine +taipan +agua +feeder +parallel +mater +pink calla +meat counter +yagi +crab cactus +cacao bean +bowfin +alley cat +stonefly +Eastern cottonwood +vernier scale +marginal wood fern +dancing-master +detective +yam +textile screw pine +hooch +spinet +single prop +sassafras +goose barnacle +triple cream +China tree +peeper +dressmaker +snatch block +ironmongery +dressing case +creeping bellflower +silver sage +honeydew +eastern red-backed salamander +peg +nombril +danish +mashie +anarchist +alligator snapping turtle +shepherd +American white pine +runner +chalice vine +rheumatologist +defibrillator +yellow chamomile +lemon balm +peacekeeper +native beech +sandwich board +Bavarian +titrator +paneling +deer mouse +poteen +sugar snap pea +meadow salsify +town crier +best +basinet +common myrtle +night lizard +cushaw +Tampax +camphor tree +gentile +orange peel +putty knife +pyromaniac +Brummie +fever tree +double +nest +inferior +cabbage tree +graduated cylinder +mucor +woodborer +earthwork +potato salad +four-hitter +gooseberry +water vole +ziggurat +grapefruit juice +four-in-hand +cranberry bush +diode +videotape +Mohican +niacin +beetroot +shirtsleeve +cork tree +two-eyed violet +white ash +drawing chalk +baked Alaska +bone-ash cup +toastrack +diastema +bed jacket +dwarf astilbe +yellow honeysuckle +cow pasture +sheet pile +saxhorn +upholstery material +California white oak +Spanish bayonet +horsemint +littleneck +deflector +magician +standard transmission +blue marlin +shallot +feijoa +collar +board +jump suit +common staghorn fern +priory +Xhosa +Loranthaceae +barbecued wing +barmaid +spit +lemon juice +umbrella plant +field pennycress +centenarian +queen bee +fish stick +black bread +dirk +secularist +German American +spotted weakfish +iron foundry +speed bump +yellow-fever mosquito +gag +frame +black-eyed pea +alcoholic +involucre +sperm whale +balanced diet +wax bean +butcher's broom +winter heath +Mainer +Australian pine +gas guzzler +double-breasted jacket +pod +palo verde +trimmer +wattmeter +dyer's woad +crotalaria +vine maple +sulky +jack pine +thumb +Wilton +Panchen Lama +welder +badminton court +business editor +Arabian coffee +Kamchatkan sea eagle +foamflower +steep +plane +freckle +cerebral cortex +Vouvray +tea +forest tent caterpillar +neckerchief +accelerator +jig +bridal wreath +highball glass +New England clam chowder +beach strawberry +call waiting +baton twirler +double boiler +Dutch elm +car bomb +filmy fern +breviary +Florida gallinule +dace +parsnip +riparian forest +crescent +earplug +grab bar +cusk +foglamp +screwtop +black mangrove +mascot +Welsh poppy +gas holder +support hose +salsify +red beech +Indian python +caroler +pineapple juice +lowboy +terra sigillata +black olive +hypodermic needle +radio-phonograph +moussaka +miter joint +creche +tuning fork +black wattle +affiliate +vertical tail +kiwi +red morning-glory +piping crow +runway +Kashmiri +studio apartment +sea feather +Judas tree +boatbuilder +corn earworm +fallboard +Victrola +lechwe +goat willow +turret clock +Canada anemone +leaf lettuce +savoy cabbage +headpiece +Lebanese +fothergilla +hemlock +toolshed +silver tree +blue-headed vireo +weatherman +cylinder +caltrop +adjutant bird +driving iron +millet +European woolly thistle +rose apple +clown +schoolfriend +eastern coral snake +barbecue +executive vice president +long-billed marsh wren +brittle bladder fern +tank destroyer +left-hander +matting +catchment +balsa raft +eastern fence lizard +color tube +corncrib +electric typewriter +westland pine +elder statesman +whey +plonk +mound +cittern +nest egg +copyholder +China aster +basking shark +gavial +common duckweed +vanilla orchid +red-shafted flicker +granadilla +sylph +sty +vest pocket +potherb +little brown bat +Trapezium +ordinary +adult +purple-fringed orchid +abseiler +disco +metal detector +beefsteak fungus +ilang-ilang +barley grass +hawser +suture +brake shoe +staghorn coral +barbecue sauce +Browning machine gun +sarcophagus +disa +oven thermometer +rosemary +track +gorget +quince +royal +piston ring +teak +pin cherry +Komi +walking fern +sloe +synchronous motor +fire-bellied toad +Teleprompter +co-star +cape gooseberry +oscillograph +bass clarinet +cock of the rock +Tyke +showy milkweed +safety valve +branch water +sweet marjoram +hugger +crampon +fairy godmother +band-tailed pigeon +snow-on-the-mountain +minibar +foreland +grosgrain +dita +rampion +calligrapher +jointed charlock +master +sheepshead +barrelhouse +Carolina allspice +mastic +brake pad +whiskey sour +casement window +conveyer belt +stolon +pavonia +shinny +witch elm +logwood +hostel +pageboy +vesper sparrow +pyrrhuloxia +common carline thistle +wafer +boysenberry +screw augur +hack +American white oak +governor general +Mother Hubbard +game fowl +drosophila +delft +nymphet +tollbooth +chough +Russian dressing +plum tomato +American saddle horse +dusky salamander +black medick +red valerian +cordage +Elastoplast +conacaste +backlighting +swell +riveting machine +cowpen daisy +openbill +water speedwell +picture hat +crested myna +servo +bletia +garden trowel +muscadine +common caper +false lily of the valley +aralia +sharp-tailed grouse +cigar smoker +bandoneon +Chinese alligator +crazy +point lace +charcoal +Texas horned lizard +marinara +backstay +Gatling gun +piston +game fish +fall armyworm +grammarian +beer hall +guadalupe fur seal +sugar palm +peanut +velvet ant +light machine gun +rya +cling film +adobo +myrtle oak +angelica +balsam apple +windbreak +brother-in-law +snap brim +automobile factory +clavichord +dusky shark +edible banana +altar boy +California lady's slipper +schoolbag +wax bean +Atlantic walrus +bullpen +straw wine +thatch palm +potluck +tamarind +charcuterie +sod house +tie rack +liebfraumilch +clinician +scarlet lychnis +Spanish iris +bread knife +water oak +bedpan +Angolan +bassarisk +Alaska fur seal +African wild ass +milk float +froghopper +Verpa bohemica +water cooler +chop suey +ranker +red helleborine +Prince of Wales +marmalade tree +car train +giant red paintbrush +desert sand verbena +right whale +baron +stevia +asterism +five-spot +catapult +Silex +fiberscope +refresher +beef Bourguignonne +snood +divot +waterproof +crabeater seal +Missouri primrose +bumper guard +rock opera +Lilo +coffee can +smokehouse +buffalo grass +propjet +ice tongs +poop deck +acorn barnacle +veal parmesan +shower room +collins +ringhals +silage +jawfish +trouser cuff +contour feather +songstress +rachis +White Russian +stanchion +mastaba +flatbed press +viand +legal representative +espalier +organic light-emitting diode +sushi +scorer +haricot +pinna +plectranthus +jungle cat +dried apricot +coach horse +white fringed orchis +veal cordon bleu +bath +dallier +marching order +donkey jacket +Panama tree +aerator +klaxon +pinnacle +shouldered arch +lesser celandine +common eland +Grand Marnier +cock of the rock +phlomis +Japanese umbrella pine +morning room +dead-man's-fingers +little auk +bascule +house paint +home fries +great skua +cesspool +flying gurnard +wild crab +checkerbloom +Wollemi pine +cheese dip +coif +charwoman +tea ball +waif +Arctic ground squirrel +parishioner +stabilizer bar +potentiometer +black cohosh +medlar +willow oak +cascara buckthorn +scoutmaster +Canada lily +poppy seed +paper mulberry +blackthorn +garrison cap +inductee +aeschynanthus +interior live oak +black spleenwort +wild service tree +sling +nicad +swab +sego lily +eiderdown +fruit cocktail +pallasite +weeping spruce +shiv +sea lamprey +coachman +half binding +American white birch +gainer +Concord grape +yellow birch +fucus +common room +io moth +red osier +crucible +galangal +salmagundi +pepper steak +cap opener +swizzle stick +tomato juice +Nobelist +Sarawakian +African monitor +sleeping beauty +stereoscope +curd +pyramid bugle +applejack +dosser +rake handle +pilot light +Eames chair +Scotch and soda +bell heather +dinette +blackpoll +dogie +sound camera +cattle guard +mashie niblick +edible cockle +monocle +steak tartare +partaker +sidesaddle +communications satellite +porkfish +water hemlock +drawbar +ultramicroscope +Jamaican cherry +craftsman +lovage +common apricot +drum majorette +backsword +smooth alder +Amniota +dribbler +theosophist +dolman +ivory tree +Green Beret +pipe smoker +mayoress +mignonette +crampon +henbane +kirtle +death's-head moth +instep +great St John's wort +lorry +black-necked cobra +ball carrier +Jordan almond +byway +earless lizard +marble +andiron +high-protein diet +buzzer +ice floe +crankcase +Bofors gun +sockeye +veery +Delaware +caravansary +prairie coneflower +star apple +suiting +cot +call forwarding +American gallinule +glossy snake +rose chafer +instant coffee +placket +Tarahumara +pulsar +philodendron +orange tortrix +cypress spurge +Welsh rarebit +music box +giant crab +vanilla bean +water thrush +prayer shawl +gouge +promoter +dagga +black currant +bitter cassava +drain basket +snare +digital audiotape +retainer +olive drab +gluten bread +graham cracker +cheddar pink +caregiver +spray paint +Anglo-American +boatyard +backbencher +Link trainer +bell arch +weir +arbor +millionairess +sour cream +earthtongue +crawlspace +crossjack +balalaika +crupper +western redbud +guinea hen +rangeland +gaboon viper +common louse +single-leaf +horseshoe +balsam poplar +triskelion +jack-in-the-box +jester +rain stick +glove compartment +imperial moth +Japanese beech +biotin +turnip +oligarch +western skink +mudguard +retsina +data system +green bristlegrass +visiting professor +beaded lizard +weathercock +Sloppy Joe +high tea +lightweight +record sleeve +cooler +nodding onion +pigs in blankets +torque converter +district attorney +bunting +orrery +radiator hose +common plum +wood spurge +calamus +chicken Kiev +pin +lath +telephone bell +thistledown +audiotape +gypsy moth +snuffer +pari-mutuel machine +peanut butter +hearthrug +sack +Old World yew +chives +stovepipe +xenolith +mattock +mangle +electric chair +backup system +Empire +blackwash +dodder +Allegheny chinkapin +finger plate +junk +brown rice +wild angelica +chinaberry +mason +rasp +den +violet wood sorrel +nosewheel +plenum +merino +kirtle +Igbo +ensign +sex symbol +Belgian endive +sugarberry +yellow salsify +purple emperor +atlas +African clawed frog +leatherjacket +midwife +sac fungus +European cuckoo +three-day event +Mexican poppy +wagon tire +armyworm +rain gauge +Oregon ash +columbarium +spectrophotometer +Milanese +pointing trowel +casualty +Eastern hop hornbeam +lobe +mouthpiece +au pair girl +giant water bug +Browning automatic rifle +laser-guided bomb +drone +white alder +cockleshell +mufti +gravy +berm +boat hook +marshmallow +pet shop +cowpea +tactician +wading pool +anchovy dressing +flip +shackle +Wedgwood +thick-billed murre +erecting prism +giant salamander +sleeper +quiver +chain store +wing tip +New World tapir +witches' butter +gendarme +ginseng +common maidenhair +graduate nurse +balsam pear +hoatzin +philanthropist +axle bar +gas meter +moth mullein +ragbag +Chinese cabbage +celery stick +rutabaga +scalpel +cape marigold +variometer +argali +brig +shuffleboard +wort +Orlon +epiphyllum +allice shad +coffee filter +solar telescope +Japanese linden +thinning shears +golden wattle +queen triggerfish +millinery +surfbird +flame fish +clove +dicamptodon +red-bellied terrapin +turmeric +baya +air horn +Indian coral tree +punnet +sharkskin +water crowfoot +bight +desert iguana +Texas toad +volva +dredge +Turkey red +chemical plant +gemma +dice cup +orange marmalade +mistletoe +surveyor +frozen orange juice +pallette +poultryman +burbot +courlan +captain +saddlery +bodyguard +dwarf tulip +black ash +pulse +nailbrush +tickseed sunflower +legless lizard +shirtwaist +polling booth +chickeree +garlic chive +common thyme +multichannel recorder +screw thread +sangoma +calliopsis +geoduck +colleen +bandicoot rat +pastis +swamp sunflower +scorekeeper +Honduras mahogany +Australian pitcher plant +triangle +elevator shaft +green pea soup +carrel +prairie aster +bird's-nest fungus +scarlet clematis +gook +mescal button +carcase +mulatto +ejection seat +strawberry daiquiri +goat grass +car battery +babu +chief of staff +monilia +Siberian crab +ridge rope +Morchella semilibera +nutmeg +moosewood +graham bread +California four o'clock +zwieback +velvetleaf +abelmosk +shadow box +corned beef hash +newsreader +backstairs +cutwork +sherbert +tooth fungus +angel-wing begonia +greasepaint +common milkwort +potato vine +CD drive +crepe de Chine +sporting man +koto +armet +barking frog +celeriac +drainage ditch +black box +steel blue +clotheshorse +corn speedwell +drawknife +spritsail +vichyssoise +modeler +pocketcomb +limey +suslik +cockpit +digester +brig +raita +troll +benedictine +rock wren +lock +Barnaby's thistle +school bell +school ship +Soave +falchion +swaddling clothes +terrine +smoke screen +rivulus +sweet lemon +cullis +bustier +peppermint +Philadelphia fleabane +Hampshire +active +charnel house +face guard +Quebecois +facilitator +tongue depressor +bitternut +heath aster +sapodilla +bluestem +centrist +Canterbury bell +needlenose pliers +groats +tapa +Qatari +paper feed +tilt-top table +plastering trowel +brazil nut +rotogravure +patriot +manicurist +bacon and eggs +puffbird +lightweight +golden willow +kaiser roll +duff +girandole +seaside daisy +Kurdistan +Skivvies +showboat +fire bell +lock-gate +greater masterwort +weald +ice ax +toetoe +mess kit +bucking bronco +black turnstone +backscratcher +backpacker +basement +marbleization +trigger +satsuma +fall-blooming hydrangea +mountain lady's slipper +yellow oleander +crookneck +ex-president +Venn diagram +psaltery +bulwark +old boy +linear leaf +aril +butt weld +fall webworm +pruner +bald-faced hornet +nougat +tailgate +field speedwell +potsherd +center punch +long beech fern +desert paintbrush +canyon treefrog +bushel basket +Eurasian +swamp horsetail +cryptanalyst +wicket +school newspaper +captive +spider brake +electric mixer +tumbleweed +mason wasp +sash window +paddock +wet bar +oxtongue +stevia +wheat rust +scute +switch engine +mud dauber +dotterel +snailflower +common barberry +mulligatawny +cinnamon bark +cigar box +trivet +proof spirit +cream soda +western grey squirrel +baby powder +Bren +Japanese yew +sailcloth +Basket Maker +bannock +basidiocarp +aphelion +erect bugle +limiter +bosc +Przewalski's horse +helmet orchid +audiometer +battle cruiser +grass widower +staphylococcus +Congolese +common pitcher plant +parliamentary agent +Virginia snakeroot +mockernut +Siberian elm +backbench +rough +chervil +chlamys +nationalist +galantine +screwdriver +falsifier +cancerweed +spur +jerkin +porte-cochere +dill pickle +Montagu's harrier +tetrode +true fungus +American quaking aspen +vitamin B1 +leopard lily +eggdrop soup +aurochs +core bit +Jaws of Life +trousseau +parquetry +Disciotis venosa +tender +beef goulash +vitamin K1 +pepper spray +covered smut +hook +sports announcer +weapons carrier +foxtail grass +sloe gin +mezereon +antifouling paint +pavior +pile driver +security consultant +monkey-wrench +Indian hemp +amaretto +American wistaria +A-line +market strategist +rainbow runner +souvlaki +binturong +stiletto +gastrula +Vietnamese +Old World hop hornbeam +cold cathode +pier table +houndstooth check +prop root +leaf-footed bug +sedge wren +Dutch iris +drop curtain +opossum rat +lame +pollen tube +doubletree +compression bandage +pinon pine +catmint +pier arch +kingmaker +deanery +loofah +fullback +fencing mask +flying boat +carpet sweeper +lemon-scented gum +Accipitriformes +kit +pigfish +clipper +dolmas +lesser centaury +blood agar +water violet +raw milk +lemonade +vicar-general +supply closet +Anzac +confectioner +ignition key +velvet grass +white willow +John Dory +ruddiness +wheel +common horsetail +hubbard squash +speculum +Spanish bayonet +mountain mint +glint +foxhole +housemate +bootjack +sleigh bell +clog dancer +Mexican mint +rendering +Hausa +star saxifrage +spring squill +clothesbrush +liquid metal reactor +Columbia tiger lily +sorrel +cartwheel +Jersey +Caucasian walnut +desert willow +surveyor +elbow +Santa Gertrudis +fringe bush +industry analyst +lyrebird +Cortland +arroz con pollo +catechist +tank top +jew's harp +cereal oat +heartleaf +short sleeve +butty +butterfly plant +stud finder +felloe +beer garden +clevis +wood warbler +demerara +cornetfish +mince +Jamaica rum +Spanish broom +binnacle +camise +ferrule +Copt +hall +minicar +scimitar +cryptogam +miter box +limestone fern +Marsala +Parliamentarian +gravy +woolly bear moth +formula +squash bug +pigmentation +plate +skin graft +radiotelegraph +hellbender +soft pedal +lavender cotton +propagator +Bailey bridge +cottage pie +rotgut +A battery +pintle +off-line equipment +European swift +shrimp butter +plumb bob +trunk lid +succotash +yellow cypress +heartleaf +antelope squirrel +sambar +maternity ward +deciduous plant +bartlett +Riesling +sour cherry +Klansman +poke +academician +sociolinguist +bird's nest fern +common privet +scale fern +tachograph +oyster stuffing +pusher +green June beetle +staghorn sumac +lockage +master +bap +harlequin +blackfly +spotted coral root +kahikatea +cabana +riot gun +apple mint +kob +praline +confidant +pahautea +float +city father +Zen Buddhist +pessimist +conference center +banksia rose +comfit +sweet cicely +winged bean +henroost +myope +bunt +nailfile +yellow mountain saxifrage +cruise control +abandoned ship +water chinquapin +spanker +wing nut +puccoon +pier glass +Atlantic sailfish +medlar +buttercrunch +rough-skinned newt +planter's punch +Dutch iris +control key +committeewoman +torpedo-boat destroyer +garambulla +tree heath +gladiator +September elm +inclinometer +snowbell +call-in +sunsuit +microfiche +bluestocking +cheval glass +server +franking machine +sugar syrup +Macoun +transport ship +alderfly +wash-and-wear +Abbe condenser +bush nasturtium +wild leek +canary seed +Northern Baptist +sweet wormwood +jaboticaba +cardroom +autoradiograph +ash-pan +sprinkler system +rattrap +claymore +parts bin +forest red gum +thermonuclear reactor +Indian crocus +lector +heir apparent +leafy spurge +masquerader +varicella zoster virus +cucumber tree +hedger +Shumard oak +zooplankton +quartermaster +arrester +bridge +hop clover +meadow foxtail +winter hazel +portable circular saw +penuche +limpa +blue toadflax +mesophyte +Alpine anemone +pet sitter +avocado +streptococcus +fiber optic cable +river red gum +hornist +chicken taco +red spider +tape grass +densitometer +salmonberry +tiger snake +hot toddy +silver fern +candlenut +buckram +local call +defoliator +king +mahoe +lever lock +social insect +winter purslane +bootblack +fireball +ramie +bellbird +prepuce +capote +Chinese forget-me-not +Pisces +costume +California black oak +tree lupine +golden polypody +liger +California whipsnake +urodele +sapodilla +skillet bread +duckpin +supremo +asparagus bean +kampong +endameba +cow pony +rider +motherwort +Persian iris +soursop +kohlrabi +Parisienne +irons +doubles +feijoa +farmplace +cottage cheese +bezoar goat +subcontractor +blunderbuss +down +purple martin +Lapp +crenate leaf +tobacco pouch +beach towel +Santa Lucia fir +monetarist +stringer +ocellated turkey +Texas purple spike +ackee +caddy +hedge mustard +second-rater +strawberry bush +valedictorian +steak sauce +prairie gourd +aspirant +mint +Valenciennes +vodka martini +American persimmon +big brown bat +Mycenaen +mouthpiece +norfolk island pine +pennyroyal +Jewish rye bread +granadilla +tract house +wall +shuttle helicopter +blackjack oak +Lippizan +storm window +white zinnia +sickle +sushi bar +polish +baldric +brooklime +church hat +control circuit +vicuna +death adder +eukaryote +durmast +field soybean +jacket potato +wild basil +queen consort +brooklime +octant +blue false indigo +broccoli raab +step-down transformer +date bread +blue ash +duffer +oak chestnut +pennant +wedge +Florentine iris +morion +weakfish +morning dress +public address system +spearmint +Ashkenazi +sow +interpreter +Metis +pita +iron lung +parfait glass +cylinder lock +immortelle +obstetrical toad +tee hinge +successor +western +working girl +julienne +AND circuit +spaghetti junction +fer-de-lance +enlisted woman +star +lightning rod +bilge pump +pacer +horse nettle +African oil palm +blastocyst +air hammer +bamboo fern +remote terminal +lambkin +money cowrie +Pelham +clinical thermometer +wiggler +guru +false indigo +tea bag +foredeck +king +baby shoe +mule +grab bag +silver-bell tree +knitting machine +cobia +roulette ball +larder +button pink +rumble seat +noria +queen mother +solar thermal system +aquaplane +highbrow +rusty blackbird +desktop +lima bean +pontoon bridge +watercress +wild cabbage +tumbleweed +dressing sack +compact-disk burner +spittoon +marrow +sporophyte +second fiddle +pot-au-feu +specialty store +dry +mole +khadi +japonica +lovage +squamous cell +lobe +European creeper +brown pine +bladderpod +rumble +French Canadian +mascarpone +Pacific halibut +perennial ryegrass +wine lover +turbot +longwool +silver tree fern +dust cover +synchromesh +corn pudding +alpine azalea +garboard +cane sugar +observation dome +condensation pump +hind +taximeter +hand drill +gas thermometer +jammer +buffing wheel +handstamp +prairie mallow +turkey stew +sun spurge +duck pate +kibble +Cassin's kingbird +apadana +Devon +grinner +oocyte +blank +header +schoolmaster +guard ship +intravenous pyelogram +rimu +luff +Mediterranean fruit fly +singlestick +lady-in-waiting +curb +birch +limekiln +orthoscope +serotine +Spanish oak +swamp cottonwood +edger +city man +picnicker +white basswood +Parsons table +Christmas begonia +perspirer +Pacific tree toad +Cape tulip +finger bowl +blue pike +greengage +handcar +milkweed +potbelly +river dolphin +creel +typewriter carriage +banteng +pawnbroker's shop +huon pine +biennial +man of action +foundress +caveman +featheredge +jordan almond +sandblaster +coralberry +low-calorie diet +hoot owl +garter +bain-marie +wrecker +fenugreek +double-hung window +idol +scullery +balloon vine +summer savory +winged spindle tree +Helvella crispa +walrus mustache +gas engine +boulle +rush grass +rue +hoe handle +cat fancier +deerstalker +dunker +American red plum +fall dandelion +groover +sprag +stair-rod +wish-wash +pricket +architrave +California laurel +net melon +Arizona sycamore +executive secretary +silverweed +silky cornel +surface ship +square sail +common purslane +villa +holly-leaved cherry +sweet birch +pecan +artillery shell +breast pocket +pirogi +scarlet runner +rabbit brush +mealworm +leather carp +palette knife +Jerusalem sage +boneshaker +slit lamp +digital voltmeter +polar glacier +square-rigger +homogenized milk +Sten gun +lesser calamint +pyrograph +Korean lawn grass +Zinfandel +crepe fern +western ragweed +clasp knife +distributor housing +cartouche +scooter +ski parka +jackknife +Carolina spring beauty +soft diet +candlesnuffer +horse trader +step stool +agouti +accelerometer +annual fern +judge advocate +angelica +roll film +treehopper +ombu +comer +sultanate +kitchen help +hooded ladies' tresses +milking machine +knuckle joint +Jamaica honeysuckle +music teacher +sauerkraut +Weston cell +slivovitz +Worcester sauce +tall bellflower +chancery +prophetess +casquet +shortfin mako +sorus +visual display unit +asp +grenadier +black pepper +crottle +erasable programmable read-only memory +jabot +ratchet +disk controller +chief petty officer +tap wrench +white mountain ash +cultivated rice +flying phalanger +skillet corn bread +BB gun +Elamite +European red elder +reed rhapis +ciderpress +inga +torpedo +wild teasel +bean curd +oeil de boeuf +acuminate leaf +bitter lemon +hitchrack +Lorraine cross +hostess +European dogtooth +adz +polonaise +rock sandwort +Waldorf salad +myrmecophile +klystron +mole rat +draba +corn borer +robusta coffee +chub mackerel +leatherleaf +chronometer +Moselle +sea aster +fennel +slop basin +constable +Brunswick stew +hydraulic pump +French omelet +icebreaker +Manx shearwater +press of sail +ninepin +blue succory +bootstrap +hallstand +chit +firefly +bearded seal +fuel filter +jezebel +mate +Roquefort +cheesecloth +plasterer +blue pimpernel +lake dwelling +shrink-wrap +goat cheese +common gum cistus +coastland +Sunday best +wild tobacco +mandrake +common unicorn plant +barbican +culotte +blockhouse +German iris +tarragon +caramel +wild rosemary +grain +voyager +squirting cucumber +eastern narrow-mouthed toad +creeping fern +luge +saffron +garland flower +furnace room +starship +Oriental scops owl +Italian honeysuckle +berserker +Chinese elm +scrubber +bishop pine +French polish +compromiser +skimmer +river shad +lobster thermidor +leadwort +man-of-the-earth +razorblade +vicegerent +empress +link +ham and eggs +wild lily of the valley +blackfish +splicer +fossa +mara +moneygrubber +brachiopod +fauteuil +caldera +finish coat +croupier +termer +leopard's-bane +sei whale +molucca balm +dolly +dog food +term infant +soft roll +episcia +sewer +inquiry agent +active citizen +perry +California newt +moon shell +bladderwrack +common shrew +dill +Dutch elm fungus +key lime +electrometer +divorce lawyer +lamb's-quarters +apple turnover +shipmate +Guernsey +legionnaire +electric blanket +Rocky mountain pinon +tobacco mildew +stinking iris +forestiera +departure lounge +wiper motor +jurist +scarlet runner +pallbearer +batter's box +inertial guidance system +fines herbes +oilcan +sisal +mustache cup +steamed pudding +Visayan +fiesta flower +lady tulip +lungless salamander +batiste +electrical system +blazing star +car carrier +Walloon +mother hen +stump +mulled cider +secondary coil +Alexandria senna +etui +scrumpy +Havasupai +jawbreaker +glume +ex-husband +Eskimo +Joint Direct Attack Munition +number theorist +five-hitter +pinstripe +Olympian +common mackerel +stone bass +bigos +Bahraini +airbrush +great ragweed +glass lizard +hand fern +roundel +riding master +shoetree +yellow avens +old fashioned +dolman +stinger +nursling +legate +faille +golden fern +bedpost +shop steward +kidney bean +bladderwort +internist +limeade +Bruneian +Coloradan +playsuit +wintergreen oil +Cantabrigian +mutton snapper +shot putter +hand grenade +moccasin +cobnut +marrow +separatist +cockscomb +discharge pipe +Gabonese +spade bit +chicken cordon bleu +varnish tree +European wood mouse +striped gentian +Ayrshire +curassow +moo goo gai pan +malarial mosquito +glow tube +ledger board +bib-and-tucker +European chestnut +suffragette +color wash +gaffsail +golden larch +voting machine +Kahlua +lungi +amusement arcade +Uzbek +butternut +mold +mule's ears +dickey +shrimper +trophozoite +dreadnought +shepherd's purse +greenhouse whitefly +spotted gum +copperware +perfect game +semigloss +spawn +telecom hotel +stakeholder +mason wasp +flibbertigibbet +chin strap +fringed pink +saki +urchin +memorizer +roulade +whiting +cling +corncrake +Queen of England +choo-choo +empty +heating pad +playmate +visualizer +popcorn ball +absconder +sou'wester +target acquisition system +mock-up +dental floss +tray cloth +haddock +bulblet fern +housing commissioner +delayed action +anchor light +harbor porpoise +water wings +PT boat +night latch +fennel +doorframe +green-tailed towhee +grey polypody +torture chamber +American germander +Chinese wistaria +cattalo +accompanist +rifleman +alpine clover +contrarian +lemon peel +Mexican cypress +sprog +dado +Galilean telescope +desmid +lockup +Latin +American raspberry +mescal +butternut +prairie orchid +downy yellow violet +green hellebore +radio compass +bread and butter pickle +Cherokee rose +knish +destroyer escort +Arkansan +langlaufer +pyxis +winter savory +velocipede +motley +winter savory +law student +barren ground caribou +apple dumpling +field hospital +works +city editor +European flatfish +Morchella crassipes +life office +boot camp +cream sauce +cape aloe +acetate disk +devil ray +tile cutter +Plymouth Rock +microspore +godown +Syrian +tiercel +American cranberry +lesser spearwort +anopheline +Spanish oyster plant +wire cloth +attic fan +birch beer +small computer system interface +crook +ribbon fern +explorer's gentian +nagami +I-beam +rosebud cherry +Jerusalem artichoke +Stillson wrench +pluralist +district manager +Levantine +orangeade +part-timer +post horn +Oregon grape +contadino +cargo helicopter +silverpoint +chaja +California bluebell +case +Shasta +cheese cutter +Leishmania +avalanche lily +iron horse +bialy +Yana +Delawarean +Prussian +nonpareil +hammer +hoper +chewink +anil +skim milk +desert four o'clock +crescent wrench +white marlin +blue jasmine +malacca +anadama bread +purple poppy mallow +ganglion cell +ligature +no-parking zone +golden clematis +Cotswold +aliterate +shebeen +yardarm +superbug +fanaloka +stinking cedar +spirochete +wort +pater +heaume +thermocouple +ironing +naval tactical data system +European goatsucker +prairie cordgrass +accused +foreign agent +halberd +western mugwort +esthetician +Persian lilac +cracked-wheat bread +crosscut saw +rock penstemon +paper cutter +crematory +ideologist +cattley guava +margarine +creosote bush +hoary plantain +spark gap +lumberjack +Greek valerian +mission bells +tight end +bigeye +large crabgrass +stone marten +cleat +lentil +bay scallop +lector +charger +assemblywoman +second lieutenant +boil smut +sarsaparilla +hydromel +cat flea +pinfish +whole milk +hairnet +myeloblast +peasant +blind curve +first offender +dwarf-white trillium +Brother +coatdress +gun emplacement +tamarisk gerbil +snap +air cushion +trailing edge +potato vine +gig +everlasting pea +champion +dibble +rattail cactus +timothy +prince's-feather +cutlas +lockring +sealing wax +Brussels lace +corn mint +highboard +she-oak +wild celery +pillar +Burberry +Hakka +leucothoe +bell tent +gallery +coontie +leather fern +smack +adenovirus +linoleum +chain wrench +tammy +gas fixture +nut bar +baneberry +butterscotch +goat's rue +bullock +grey snapper +mother-in-law +hyson +wayfaring tree +mollie +needle spike rush +buckwheat +bayberry +brush-tailed phalanger +dry rot +harborage +stormy petrel +Oriental beetle +Atlantic halibut +coping saw +simple fruit +viscose rayon +surgeonfish +upstairs +security system +common ragweed +verticillium +pancake batter +hawk's-beard +Dutchman's-pipe +refrigeration system +European parsley fern +Ivy Leaguer +totalitarian +gonococcus +towhead +showy sunflower +pallium +multiengine airplane +hair trigger +rabbit-eared bandicoot +siskiyou lewisia +fuel system +flat arch +broad beech fern +Alpine lady fern +bracken +Kentucky black bass +rut +mountain maple +tunaburger +umbrella fern +white-headed stilt +meat hook +panhandler +washhouse +barnyard +safety lamp +leg +ripple mark +paper +sagebrush lizard +light heavyweight +common nutcracker +operator +stalking-horse +horseless carriage +fishhook +suction cup +peg +Ungulata +false teeth +round-bottom flask +Luba +campaign hat +firebox +rudder +parapet +ice pack +appellant +spirit stove +metheglin +common bamboo +soapwort gentian +pannikin +time capsule +burn bag +folk poet +tropical prawn +end man +new caledonian pine +linen +web +free trader +jury box +railing +pignut +leaker +potboy +rubber boa +white snakeroot +plumber +Candida albicans +surfboat +woman +promulgator +eyecup +wild China tree +rattlesnake master +Viyella +alpine salamander +ailanthus silkworm +Albatrellus ovinus +war room +meadow vole +robotics equipment +rotary actuator +Engelmann spruce +pinesap +beefcake +native speaker +ridge +injector +water chute +salmonberry +decoupage +bottlebrush +date plum +circlet +American mountain ash +pocketbook +horsemint +sweet four o'clock +kirpan +pinto bean +chervil +equator +range animal +candy thermometer +calanthe +cul +stipendiary +brahman +pelican crossing +topgallant +wild senna +sliding window +carrier pigeon +Tatar +quadruplet +bumboat +spearmint oil +slip clutch +young Turk +golden yarrow +shank +glasswort +dental plaque +Manduca sexta +Northern bedstraw +dent corn +Life Saver +western wall flower +bedder +wherry +Tuscarora +scrapple +borstal +reflux condenser +problem solver +nondriver +perforation +eastern cricket frog +white wood aster +broad buckler-fern +Cape primrose +herringbone +head louse +earl +baton +recording system +primary color for light +cherry laurel +pomfret +ratafia +chocolate milk +obscurantist +revisionist +rood screen +magnetic needle +commensal +oil tycoon +celebrant +domicile +harvest mouse +California nutmeg +greater spearwort +black-billed cuckoo +winepress +demographer +straw boss +diabetic diet +sweetmeat +rabbet +ming tree +basketweaver +freestone +walk-in +Aryan +box coat +audio amplifier +chicken salad +churidars +whydah +box +batman +siren +selectman +gouger +drip coffee +Caesar salad +interpreter +whinstone +grey goldenrod +minicomputer +honey crisp +hypercoaster +Irishman +swamp white oak +reed canary grass +globeflower +cynthia moth +fennel seed +canthus +chino +blind date +tar pit +watermelon begonia +fishtail palm +overcast +Pearmain +primary color for pigments +coal seam +wherry +safety bolt +cretonne +Michigan lily +inflater +moneybag +huckleberry +brassard +bush vetch +looking glass tree +pinwheel roll +alfalfa sprout +sea kale +clinometer +achira +lorgnette +potter wasp +gilded flicker +tody +capulin +captain's chair +crackle +gerardia +prie-dieu +venture capitalist +New Jerseyan +block and tackle +elf cup +bur reed +automatic transmission +wax palm +flytrap +crack willow +coachwhip +swizzle +lugger +Dewar flask +baster +oxyacetylene torch +Culex quinquefasciatus +St Peter's wort +wild hyacinth +Russian almond +burrfish +wintergreen +katsura tree +butcher knife +perfumery +thresher +porte-cochere +sheepwalk +hypotenuse +Dalmatian iris +buttercup squash +demiglace +goldenseal +preceptor +rigger +poikilotherm +old-age pensioner +posthouse +wood horsetail +repeater +reciprocating engine +Rambouillet +terra cotta +togs +battledore +horizontal tail +missile defense system +trier +morello +woolly adelgid +munition +double creme +in-fighting +squirrel corn +crow's nest +antler moth +brake cylinder +bandoleer +noticer +Parmesan +hipline +cheapskate +Dubonnet +mole rat +bog aster +ribbon tree +meadow rue +nard +ratel +loose smut +snapping shrimp +golden glow +basil thyme +Florida strap fern +moonshine +flume +lace fern +black bream +orchestra pit +archerfish +exile +ringdove +career man +godfather +bottom-feeder +pasteurized milk +dental implant +pedicel +Catalpa speciosa +yellow foxglove +lancet arch +steam shovel +sampan +patrol boat +sailor cap +tollgate +monal +velociraptor +cacique +jack oak +cursed crowfoot +creep +Parry manzanita +common matrimony vine +grace cup +caecilian +spurge laurel +prickly lettuce +Regius professor +camail +Sitka willow +Courtelle +gin sling +dogmatist +guest +saltine +dust cover +sport +sweeper +feist +lady's-eardrop +vibist +wire stripper +tenpin +interplanetary space +beet green +pruning knife +drainage system +gunnery +ballet master +lime juice +flak catcher +lacrosse ball +Canadian aspen +beatnik +railhead +utilizer +spadefish +Arizona white oak +city university +dense blazing star +hedger +chain pickerel +right-hand man +namby-pamby +nacelle +redneck +tumbler +Chief Secretary +cannon +cupola +kummel +papaya juice +Burton +Stanley Steamer +loganberry +stylus +square meal +rock bass +western ladies' tresses +dramatist +assignee +tandoor +trumpetwood +segregator +green adder's mouth +coral necklace +ani +iceboat +densimeter +oxtail soup +kernel +cos lettuce +greenishness +panchromatic film +Parker House roll +oatmeal +backsaw +double Gloucester +bailey +storage cell +giant +coconut milk +broadtail +barouche +loir +soybean meal +white-leaved rockrose +junction barrier +spandrel +sweat bag +goldilocks +flowering wintergreen +cockspur +beef fondue +holding cell +cardamom +cagoule +Kamia +tangelo +Herschelian telescope +wine bar +kachina +sand sage +guy +ivory palm +citrus mealybug +topper +ladyfish +force pump +fanion +calaba +Iowa +orrisroot +ivorybill +Secretary of Agriculture +gagman +dry cell +hypnotist +kenaf +grey alder +deathwatch beetle +gagman +magnetic stripe +trap door +abdominal wall +prefab +broomcorn millet +architeuthis +angler +Pacific giant salamander +barbette carriage +low-fat diet +veal scallopini +B battery +wallah +landing flap +pistachio +jaguarundi +nagi +cicerone +felt fungus +Aertex +stocks +smooth aster +patchouli +lemon sole +sleeper +basket fern +dundathu pine +anjou +Moreton Bay chestnut +broom sedge +candid camera +red angel's trumpet +oilstone +cinnamon toast +Pacific walrus +fruit custard +Jehovah's Witness +mate +voyeur +Esselen +achromatic lens +sanguine +brine shrimp +dunce cap +swot +transit instrument +grey willow +pack +bench clamp +Nova Scotian +gadgetry +silvery spleenwort +enchantress +rough fish +morula +giant taro +sorus +roux +polyhedral angle +spruce beer +Chicano +cola extract +outfielder +kohleria +white-rumped shrike +car-ferry +subway token +spoon bread +totara +corn borer +bowhead +tensimeter +water scooter +flickertail +Catholicos +pleaser +blue-eyed Mary +calabash +handyman +cascades frog +facing +scarlet oak +lutist +ginger +tree tomato +Harvey Wallbanger +tent peg +insectivore +fusil +swale +chinning bar +bladderpod +New Dealer +dhoti +proscenium arch +common vetchling +channel +collect call +safflower +Texas tortoise +test equipment +theca +RAM disk +sheep sorrel +rammer +buttonhook +honey mesquite +dominus +babirusa +queen +Aspergillus fumigatus +crash barrier +nonmember +Muscovite +verdin +Australopithecus afarensis +Turkish Delight +stalked puffball +giardia +divider +mountain skink +head smut +pacemaker +evaporated milk +rattlesnake fern +flamethrower +navy bean +bather +steed +showy orchis +stone crab +artichoke heart +phantom orchid +space helmet +swamp laurel +privateer +junior +surcoat +bristlegrass +flower girl +aphid lion +penthouse +lemonade mix +coude telescope +natal plum +scriber +wood nettle +rape suspect +resplendent quetzel +western poppy +choir loft +fore-topsail +thyme-leaved sandwort +erotic +short circuit +outdoors +flowering tobacco +hookup +aviatrix +corker +horehound +horn +swamp pine +water biscuit +cherimoya +vaporizer +courtier +European sole +full skirt +Mother Carey's chicken +cymule +huck +white snapdragon +mountain nyala +country borage +bonduc +casein paint +grampus +shrimpfish +lodge +dragee +black walnut +caraway seed +roper +glass cutter +tab key +Richardson's geranium +demigod +chichipe +Italian ryegrass +cadet +electrograph +rudd +carpenteria +foie gras +lignum vitae +hedge nettle +pledger +American hackberry +flageolet +beaked hazelnut +reflectometer +sticky geranium +marriage bed +white pepper +japanese clover +whiteface +gnat +extrovert +Canada plum +talipot +chicken stew +egg foo yong +fraxinella +skibob +saucer magnolia +jacket +green smut fungus +cloakroom +landing skid +booth +ice milk +dipole +striped coral root +red buckeye +roughcast +breaststroker +cowherb +razor clam +first-aid station +briarroot +clambake +lander +Bramley's Seedling +frail +jird +minisub +luging +poison milkweed +European lobster +epidemiologist +spandex +paloverde +marumi +bypass condenser +punter +petty spurge +Coryphaena hippurus +bilberry +vermillion rockfish +witness box +viscometer +pulque +Massachusetts fern +herring salad +ridge tile +mesa +dwarf grey willow +southern aster +punch pliers +tarnished plant bug +hoop pine +Japanese red pine +benedick +rebozo +silver plate +silver willow +mouse-ear hawkweed +bonito shark +abutment arch +noble cane +tiger rattlesnake +pongee +jumping plant louse +pattypan squash +giant ryegrass +railroad bed +stiff aster +imperial Japanese morning glory +laundry +winter cress +large white petunia +tea maker +pen-and-ink +early warning system +lug +monocot +sea wormwood +breechblock +postage meter +third rail +Mongoloid +Australopithecus boisei +umbrella tent +stirrer +Dumpy level +beroe +post and lintel +green spleenwort +tomato paste +dishpan +stentor +sweatband +cobbler +New York fern +gaff +prairie willow +cyclops +jigsaw +rotavirus +pallet +eastern ground snake +boiling water reactor +acute triangle +agora +European cranberry +roebuck +surgical dressing +busboy +cannikin +feedlot +common pond-skater +cochin +horsehair lichen +fetter +sapote +fichu +dermatologist +fire tongs +creme anglais +foster-mother +laurelwood +chicken snake +mincemeat +rocker +wild spinach +powder and shot +butterwort +auxiliary engine +mamey +hart's-tongue +sucking pig +American turkey oak +troopship +buttermilk +divi-divi +boatswain's chair +soda fountain +southern flying squirrel +elastic +cutaway +housekeeper +renegade +apple rust +bridoon +machicolation +stunt +keyhole limpet +personality +solitary vireo +epidendron +Jihadist +boffin +bettong +terror +partial denture +pusher +saltcellar +capstan +large poodle +Bibb lettuce +low-bush blueberry +staple +banded krait +sickroom +barnyard grass +wandflower +woodworm +bluegrass +squirrel's-foot fern +rabbitfish +delta wing +milking shorthorn +limber pine +guru +gamine +scythe +sweetsop +Gruyere +bloodmobile +mine detector +American mistletoe +silver beech +hound's-tongue +Lombardy poplar +basket fern +pink-and-white everlasting +redtail +Aladdin's lamp +mace +outtake +condensed milk +Canada wild rye +silver perch +waxflower +taxer +Chinese chestnut +Our Lord's candle +mugwump +school system +salp +osso buco +dress shirt +butterweed +low-fat milk +couchette +broomcorn +proscenium +mill agent +smut grass +humpback +southern spadefoot +military leader +canebrake rattlesnake +tailor-made +ebony +beach house +flying gecko +hoary alison +typhoid bacillus +Romanov +vanilla pudding +sweet cicely +Spodoptera exigua +dress rack +flannel +skipjack +bolognese pasta sauce +rooibos +thunderer +blessed thistle +gauntlet +mahatma +granadilla +laurel sumac +Yuma +thyme-leaved speedwell +encyclical +twill +linocut +manna gum +spark arrester +cocklebur +Indian hemp +lemon oil +Hall's honeysuckle +raceway +flop +Himalayan lilac +one-flowered wintergreen +photosphere +silvery spleenwort +convex polygon +canarybird flower +foster-sister +fluffy omelet +palanquin +roll +dandelion green +Javanese +workpiece +Carmelite +bread mold +schlemiel +wild lily of the valley +grugru +solenoid +puff batter +skep +balance wheel +Gadaba +portia tree +mobcap +two-man tent +scuffle +firebrat +ant lion +anise +caster +giant petrel +American water spaniel +naboom +treasure ship +foster-son +fiddleneck +alidade +sugar refinery +wild oat +water beetle +generic +damson plum +abrocome +detainee +pitch pipe +coast +nilgai +radiotherapy equipment +heart-leaved aster +gristmill +grocer +Appaloosa +Cheviot +brake pedal +lantana +cave myotis +Rob Roy +sea spider +latrine +carpophore +recycling plant +coondog +brace and bit +funambulist +eggar +mantelet +postdoc +mezzanine +coco plum +pulse generator +high-vitamin diet +menhaden +mechanical engineer +bergamot mint +Chuvash +grated cheese +helicon +belladonna +beet armyworm +eelgrass +resuscitator +interrupted fern +arrow grass +cistern +Pacific herring +colostrum +journal bearing +Fauve +wrist pin +canape +choice morsel +quadraphony +guard boat +shortgrass +claymore mine +hitching post +cargo door +decoder +gym rat +Cocopa +commander +apple of Peru +seckel +yellow goatfish +dog flea +dodo +oconee bells +Tudor arch +turkey stuffing +ebony spleenwort +wheat flag smut +scolopendrium +Brazilian pepper tree +gusset +inspector +lunar excursion module +baron +plantigrade mammal +Creole +phosphate +aromatic aster +ghee +audiovisual +onychophoran +cotton stainer +lieutenant junior grade +spheroid +amen corner +caper sauce +Caladium bicolor +dyer's rocket +seaside goldenrod +flint corn +Very pistol +rotifer +steeplechaser +rouleau +escape wheel +Namibian +millivoltmeter +emmer +climatologist +agateware +sea lyme grass +inclinometer +water fennel +saddle seat +vicar +garden cress +ski rack +Norfolk jacket +casaba +coast rhododendron +sericea lespedeza +hematocrit +autopilot +tilter +finish coat +Pennsylvanian +shrubby St John's wort +podocarp +percussion cap +ceriman +peanut bar +gean +jack +durra +rotor +carob +cottage tulip +three-spined stickleback +trencher +elevator +kalumpang +abaca +Australopithecus robustus +active matrix screen +water bed +hatmaker +lodestone +cat food +overcup oak +balletomane +popgun +rheometer +process cheese +frog legs +heartleaf arnica +p-n-p transistor +steam turbine +Tulu +scalene triangle +licorice fern +coffee break +trade unionist +starved aster +firing pin +water gum +Masonite +hairspring +seminarian +blue racer +forecastle +scrub pine +Atlantic spiny dogfish +kopje +orphrey +fan tracery +gee-gee +vixen +interstellar space +Harris Tweed +sawmill +lemon mint +bitewing +ringlet +Chinese mustard +paleontologist +American hazel +brigantine +clay-colored robin +zombie +nectarine +West Indian jasmine +pineapple weed +rusher +gynecologist +pole +thylacine +myrtle beech +golden cup +woodruff +T-bar lift +terebinth +service club +homegirl +Blue Mountain tea +figwort +New Hampshirite +Stayman +tonometer +white turnip +messuage +cruet-stand +colliery +connecting room +lesser twayblade +bland diet +crown prince +beggarwoman +restharrow +bower actinidia +firebug +hepatic tanager +telegraph +Spodoptera frugiperda +spackle +carpenter's square +pyx +supermom +thickhead +whorled milkweed +Arctic char +Chinese rhubarb +pince-nez +wolverine +tomato concentrate +cascarilla bark +red underwing +leather flower +Jerusalem thorn +bullpen +Salisbury steak +anode +coffeeberry +bottling plant +fritter batter +aerial torpedo +matrix +local oscillator +stalked puffball +bruin +three-cornered leek +wassail +stabling +damping off fungus +myriapod +osier +lesser kudu +cownose ray +chokecherry +wagon +obstetrician +Glengarry +even-pinnate leaf +wine sauce +osteocyte +baker's yeast +heir presumptive +blackjack +tympanist +golden fern +fipple +Japanese oak +bar mask +stamping machine +argus +knobcone pine +oil beetle +lanai +upper berth +condenser +proctologist +catechu +wild spurge +vestry +ground snake +proton accelerator +walker +scarlet bush +transom +lagging +bouillon +slender loris +black currant +developer +football hero +plum sauce +striped mullet +prince charming +fictional animal +prosimian +lug wrench +lemonwood +kirsch +spy satellite +black caraway +Thompson Seedless +bead tree +purple fringeless orchid +Virginia strawberry +chigetai +punkie +gall wasp +addressing machine +rock polypody +good-king-henry +spring cankerworm +wimple +noncandidate +saskatoon +hacienda +Darjeeling +snowberry +lounging pajama +ascospore +ski-plane +hedgehog cereus +Welsh onion +yautia +coaster brake +sickle cell +parrot's beak +fuller's teasel +painted greenling +scablands +stuffed cabbage +barrel organ +etcher +dwarf maple +camp +Australian blacksnake +currycomb +obtuse triangle +rose gum +psychrometer +abridger +torpedo +carpet loom +sodalist +slender rush +loligo +sclerometer +wimp +dotted gayfeather +green ash +pinstripe +moralist +medusa's head +garden centipede +heath aster +fool's parsley +olla podrida +Potawatomi +Edam +toothache tree +hulk +seabag +narthex +compartment +prairie star +lookdown +B-flat clarinet +event planner +clip lead +shirting +milk punch +supercharger +macadamia nut +giant coreopsis +computer store +martingale +keyboard buffer +summer flounder +squash ball +gas turbine +object ball +plier +black mulberry +reef squirrelfish +scampi +willow aster +bowler +striped marlin +smooth muscle cell +diplodocus +Liberty ship +sponge cloth +guitarfish +walking leaf +showroom +California bluebell +bolo +turnbuckle +boysenberry +hardware +Gael +imago +endorser +jujube +dust bag +rapporteur +field wormwood +low-water mark +naval missile +Pacific yew +reversible +crabapple jelly +poniard +barricade +spawner +simnel +seltzer +deckle edge +needle +timbale +satellite transmitter +organization man +job candidate +orderly +native cranberry +fir clubmoss +coaming +chartered accountant +electron accelerator +Sierra plum +American foxhound +long underwear +Penobscot +blueberry yogurt +biretta +cascara +Paranthropus +Dorian +nun's habit +lenten rose +Augustinian +designer +northern phalarope +mombin +hazel mouse +reeve +waffler +telegraphy +Verpa conica +ignition coil +Japanese oyster +S-shape +divining rod +ant thrush +throat protector +interlocutor +Desmodus rotundus +pere david's deer +attenuator +Cypriot +red sandalwood +pendulum watch +broadcloth +striped drum +sequence +safety arch +diapensia +hog +western spadefoot +chlorella +comb-footed spider +Chechen +darning needle +C-ration +hard beech +piano action +scaling ladder +Nepal trumpet flower +ravigote +screw wrench +ramekin +Lyonnaise sauce +dinner napkin +partial veil +masseuse +coatrack +mooring tower +blue-eyed African daisy +English horn +baton +rope tow +toll bridge +massage parlor +quark cheese +lounging jacket +tall goldenrod +flying jib +coordinate axis +barley-sugar +integrator +worm gear +captain +sweatshop +class +layer +chili powder +dripping pan +oatcake +newsroom +tadpole shrimp +rake +trade magazine +silks +ram's-head +senior +knower +masseur +yam +peg +wheel tree +hardbake +test room +long-spurred violet +creeping spike rush +shrapnel +coffee senna +matchbox +creeping soft grass +welder's mask +pickaback plant +urial +hooded pitcher plant +incense cedar +Ohio buckeye +ant cow +skeleton fork fern +Indiaman +swamp ash +testatrix +marang +spherocyte +Winesap +Indian mallow +teju +Yersinia pestis +dye-works +sauerbraten +coral bean tree +safe house +postulator +eyas +lotus +wood vise +lady-of-the-night +East German +cymling +rock candy +western omelet +anoa +rainbow seaperch +crossover voter +Finn +tree shrew +hog plum +Federal +shagbark +clockwork +Alexandrian laurel +metal wood +brill +military chaplain +trend-setter +call-back +Indian rat snake +spurred gentian +Japanese maple +forest goat +bee moth +viola da braccio +duckboard +armyworm +hangnail +counterbore +cream-of-tartar tree +Mullah +bonbon +water hazard +temple orange +corporatist +rough bindweed +Turkish bath +mistletoe fig +beach sand verbena +caddisworm +English plantain +brown Betty +power pack +lion's-ear +Francis turbine +stayer +dichondra +marsh St-John's wort +squab +energizer +common horehound +mantispid +pullback +handwheel +spark arrester +yakuza +Virginian witch hazel +grunter +waterworks +bondwoman +chain printer +stockjobber +coconut milk +yardgrass +blue chip +bridle path +riser +pleurothallis +saltwort +salal +broadside +blackboard eraser +bastard +Para rubber tree +red bat +digital-analog converter +calabash +cashier +cow shark +horned pout +microphage +monologist +woolly monkey +Illinoisan +marsh horsetail +distaff +siris +eparch +gooseneck loosestrife +sounding rocket +multiprocessor +saiga +xerographic printer +madrona +right triangle +sweet gale +red maids +wolfsbane +pork-and-veal goulash +French sorrel +mutterer +Venetian sumac +drumlin +white crappie +squire +large-flowered calamint +northern cricket frog +mushroom sauce +supertanker +morello +auxiliary boiler +Virginia thimbleweed +cottage tent +bubble shell +big shellbark +wormwood sage +cider gum +coast lily +American feverfew +Peruvian balsam +purple silkweed +tobacco moth +desk dictionary +rock elm +eastern indigo snake +Japanese privet +lamb +levee +L-plate +soapfish +painted tongue +scuttle +markhor +Marburg virus +mackinaw +major +crypt +ball and chain +domestic silkworm moth +bottom feeder +mistress +death house +freight elevator +bellyband +Pulex irritans +Bacillus anthracis +fire control radar +hysterosalpingogram +turbogenerator +decompound leaf +vambrace +scentless camomile +Medinilla magnifica +prima ballerina +Northern Spy +quartz lamp +grains of paradise +justiciar +felt fern +seismograph +Madagascar jasmine +imaret +white perch +Alpine mouse-ear +tea bread +yellow bass +poseuse +espionage agent +punching bag +eurypterid +orange sneezeweed +banded stilt +armhole +postern +mother +kapuka +catechumen +Soubise +Sauvignon blanc +gunnery sergeant +self-starter +ceratozamia +Atlantic cod +Reoviridae +blood cup +horseshoe bat +oriental plane +voussoir +fetterbush +samara +truncated pyramid +lingcod +athenaeum +shyster +Carolina hemlock +submarine torpedo +floating fern +yataghan +sun tea +viola d'amore +conenose +ventilation shaft +walk-up apartment +saury +wild wheat +porcupine ball +tahini +kris +grass fern +drip pan +black bryony +Scotch broth +tapioca pudding +southwestern toad +Hare Krishna +guimpe +wild madder +megalocyte +teaching fellow +shrubby penstemon +lesser wintergreen +privet hedge +Fahrenheit thermometer +stern chaser +prickly ash +pump room +ricer +chicken mousse +wing commander +sun gear +bolus +alpine milk vetch +opera cloak +twinjet +Goldie's fern +abnegator +alphabet soup +node +grape jelly +early coral root +Tarzan +quarterstaff +greeter +Eurasian woodcock +primary coil +quirt +tinkerer +bolt +creme de fraise +voltage regulator +news photography +Jat +bristly locust +Gouda +dickey +lobster butter +dwarf flowering almond +fagot stitch +Reform Jew +ostrich fern +bathyscaphe +purple mullein +alpaca +civic leader +jellaba +Arizona ash +wasabi +Irishwoman +choke +stockinet +religionist +sewage disposal plant +bittersweet +Hyphantria cunea +pheasant under glass +screen actor +chapterhouse +quoit +horseshoe bat +rapper +cupule +planetary gear +cascade penstemon +redoubt +salt +areaway +megalomaniac +bush willow +amethystine python +plains spadefoot +colour supplement +kick pleat +bell apple +narwhal +slippery elm +stenograph +baa-lamb +quadrant +balker +jobcentre +spit curl +bastard indigo +malacca +serow +adobe lily +yacca +palestra +penalty box +scrub beefwood +reenactor +screening +white bryony +alderleaf Juneberry +harpoon +alpine clubmoss +neurosurgeon +surrey +sweet calabash +Scotch laburnum +coquille +French honeysuckle +extrados +pipe cleaner +southwestern white pine +Virginian stock +scaly lentinus +aileron +carob bar +swordfish +Alpine woodsia +negus +wireworm +sweep +goldfields +drop arch +European bream +roly-poly +pin +bastard wing +fustian +wild buckwheat +lake whitefish +overcoat +water filter +Bermuda chub +New Zealand spinach +high-hat cymbal +European larch +radiologic technologist +fine-tooth comb +brunch coat +splice +electronic converter +overmantel +extern +taper +cluster bomb +teletypewriter +pinwheel +trailing arbutus +quipu +creeping zinnia +orange milkwort +tabard +Australopithecus africanus +melancholy thistle +insole +courser +darkroom +surface-to-air missile system +bark-louse +Confederate +neritina +clip-on +spouter +trench knife +outside caliper +dhak +Limburger +chuck wagon +buttercup squash +shirtdress +pouter pigeon +dirty old man +zodiac +fennel flower +mother figure +appointment +Manichaean +lignum +bouffant +rum sling +Ravenna grass +hibachi +gin rickey +American harvest mouse +cocozelle +western wheatgrass +black crappie +rhombus +Missouri goldenrod +barndoor +wild mango +pneumococcus +Boston lettuce +ratline +desert holly +cobweb +fluoroscope +ethnologist +tor +bullshot +stockade +greave +rock sea bass +slip-joint pliers +taxi dancer +schizophrenic +zill +creme de menthe +orange-blossom orchid +divot +supplejack +busybody +casemaking clothes moth +ramrod +gearbox +birdcall +Wiffle +thwart +beauty consultant +chicken paprika +trawl +skep +spirometer +hopper +kvass +doggie bag +bath chair +showy daisy +wild tamarind +Tarsius syrichta +glyptics +Algerian +cargo area +bunk +Velveeta +iconoclast +clinch +New Caledonian yew +false mallow +Japanese tree lilac +convex polyhedron +water boatman +cruise missile +finisher +colonoscope +cumin +wickiup +saccharin +whipcord +trailer camp +eryngo +cuckold +yam bean +fighting chair +forewoman +galingale +citron +positivist +four-lined plant bug +suet pudding +field pea +Circaea lutetiana +deer grass +trap-door spider +common corn salad +mirror carp +sounder +second-in-command +seaside alder +burgoo +ming tree +curry sauce +courbaril +green alder +figure loom +fauld +halfbeak +squelch circuit +cladode +winter cress +tongue and groove joint +dwarf dandelion +joss house +western buttercup +welted thistle +potato tree +anglewing +cookfire +marzipan +hood latch +seed shrimp +common moonseed +toasting fork +bevel +three-quarter binding +midwife toad +stage director +Pentecostal +technical sergeant +golden-beard penstemon +drunk +silky oak +corn gluten feed +T-square +stoker +selling agent +cruse +server +rope-a-dope +bicorn +matzo meal +wide wale +roadblock +false foxglove +tuck box +bandsman +smoke bush +machinist's vise +Highlander +scholiast +self-starter +Swedish rye bread +spark transmitter +maverick +maquiladora +cabinetmaker +compress +rainbow shower +huntsman's horn +mackinaw +copper rockfish +lappet +nitrate bacterium +telephone plug +soutache +Dacron +toboggan +sissoo +yogi +laurel-tree +vice chancellor +Christ's-thorn +cartridge fuse +serial port +quassia +tarweed +pecopteris +beggarweed +anchovy pear +bookbindery +woodland oxeye +toad rush +sandalwood tree +marsh andromeda +Tyrian purple +boothose +tragedienne +fragrant cliff fern +festoon +bondwoman +melancholic +butternut squash +exhaust valve +semi-skimmed milk +glowworm +Virginia oyster +Identikit +ayah +gallows tree +Carioca +monoplane +jewels-of-opar +scallop +moth miller +marsh cress +lobed spleenwort +ricotta +emitter +arame +tub gurnard +army attache +maniac +organizer +pheasant's-eye +Melba toast +homeboy +Bavarian cream +Maximilian's sunflower +backstop +Tremella foliacea +yellow avens +spreading fleabane +plumb level +false rue anemone +zabaglione +climbing maidenhair +doeskin +walking shoe +lancewood +material +jacksnipe +South American poison toad +agonist +hinny +paper mill +psychophysicist +valley girl +toast mistress +jorum +tiler +chicken Tetrazzini +trivet +grasshopper +three-mile limit +kink +kiang +pole horse +jig +Cornish heath +hedge thorn +false alumroot +Popper +remount +photojournalist +sideroblast +stonecress +Agave tequilana +Japanese lilac +hawse +maenad +air bag +leaf spring +dwarf willow +soda cracker +contralto +moleskin +pilaster +Audubon's caracara +pia +American organ +bleu cheese dressing +betel palm +PC board +almond willow +socializer +tone arm +stammerer +free-liver +scaler +Gentianopsis crinita +leak +black haw +hound's-tongue +grass pea +Stassano furnace +coralbells +ministrant +perihelion +Luxemburger +powder-post termite +arboreal salamander +cushion flower +foramen magnum +pyrethrum +poacher +woolly mammoth +horned chameleon +tearaway +father-figure +tufted gentian +salmi +finger millet +physa +registrar +polyoma +bamboo shoot +matchlock +seine +congress boot +bulgur pilaf +monosodium glutamate +Kentucky wonder +mycologist +kedgeree +ragweed pollen +boarfish +yellow pimpernel +tan +northern Jacob's ladder +macrobiotic diet +migrant shrike +big-cone spruce +colonialist +white dogtooth violet +bath asparagus +webbing clothes moth +ladies' room +experimenter +prairie bird's-foot trefoil +bootleg +cognitive neuroscientist +fire chief +flagfish +dendrite +stinking goosefoot +fore edge +hogfish +Spanish cedar +hotel-casino +Tory +life-support system +pea flour +cash bar +Chenin blanc +white-footed mouse +Canada garlic +salt-rising bread +roomette +mastodon +bell founder +long iron +bi-fold door +fig-bird +European water shrew +dyer's weed +frog orchid +allosaur +Florida yew +wild potato vine +crape fern +flat-topped white aster +klebsiella +oil heater +waxmallow +enjoyer +mesocarp +semidesert +senior vice president +coccidium +burrawong +syllabub +jump suit +harrier +leaf roller +cherrystone +cinchona tree +touring car +eulogist +air force officer +red goosefoot +cat thyme +smoothbore +slugger +cardiac monitor +cobber +blister rust +musicologist +rolled biscuit +Braun's holly fern +hog plum +nonpasserine bird +pascal celery +damson +Jonathan +Sheraton +cohune palm +egg white +baton +sixth-former +Siberian pea tree +choanocyte +wineskin +auditor +detention home +Leichtlin's camas +Chartreuse +clusia +club car +wattle and daub +security blanket +common American shad +assistant professor +marsh pea +camomile tea +gopher hole +gravure +Freudian +spirillum +maharani +equilateral +crow garlic +mammee apple +felwort +hardtop +dillenia +curlycup gumweed +pilot engine +calcimine +wooly lip fern +bitter dock +wineberry +jumper +monolingual +spinning frame +old-timer +native cat +diving petrel +sodium-vapor lamp +marchand de vin +sexton +matelote +interior designer +windfall +mole salamander +minder +bodkin +neutron bomb +Caloscypha fulgens +slinger ring +mezzo-soprano +aura +Southern Baptist +viscacha +midfield +tie +prosthetist +round-headed leek +yellow mariposa tulip +canary grass +staddle +Tokay +Muenster +brazil nut +California black walnut +applesauce +penologist +virgin's bower +tenon +steward +Jerusalem oak +red-bellied snake +bindery +scow +fluid flywheel +bullhead +satinleaf +clove +double glazing +matron +wild parsnip +winged elm +shoot-'em-up +musk deer +white rust +lock +Cornishman +Vidalia onion +corn spurry +freeloader +justice of the peace +inlay +myxobacteria +tiglon +tangram +German ivy +scented fern +woolly daisy +caretaker +gastroscope +scuppernong +spotted sunfish +guilloche +codling +wormcast +Eskimo curlew +tayra +European fly honeysuckle +septuagenarian +third gear +coatee +red alder +water ice +cubitiere +frame buffer +gamboge tree +pernyi moth +chicken Marengo +Galliano +Lincoln +true sago palm +hunter's sauce +carpet beater +alpine goldenrod +arch support +vehicle-borne transmission +jilt +paternoster +redcap +Siberian larch +hoary plantain +swan's down +chicane +reverse +divan +kneeler +alexic +mock turtle soup +daffodil garlic +mission bells +squilla +ursinia +winter's bark +trifoliate orange +discina +frijole +Swiss steak +maildrop +knotgrass +dog fennel +drum sander +heroin addict +costume +camber arch +shining willow +lutefisk +red porgy +microfossil +good old boy +angle bracket +pitcher sage +bordelaise +heat exchanger +carrion +bush jacket +fanjet +coach +blackface +sicklepod +Manhattan clam chowder +daisywheel printer +olive +Sphacelotheca +Spanish needles +brown root rot fungus +boudoir +encyclopedist +V-8 juice +red haw +brass buttons +gym suit +skywalk +water wagon +gas-turbine ship +stoup +lisle +sailor suit +box beam +balm of gilead +housemaster +hayrack +neutralist +water elm +brook thistle +doyenne +nark +alpha-tocopheral +WASP +hydrilla +water-shield +footlocker +variola major +pargeting +ion engine +yellow globe lily +Malecite +bloodleaf +yellow sand verbena +whorled loosestrife +packinghouse +Carolina parakeet +Virginia waterleaf +armband +red rockfish +factory ship +moon trefoil +jump seat +water gillyflower +yerba mansa +chamfer bit +compass saw +hopsacking +Indian rhododendron +sickbed +treacle +honey eater +mailsorter +seabeach sandwort +sob sister +primrose jasmine +prince consort +elocutionist +wishing cap +runner +trestle +sugar water +half-and-half dressing +fringed poppy mallow +portiere +bung +swan orchid +weather satellite +beef broth +marblewood +sapper +agitator +wren-tit +grade +allspice tree +spacewalker +American hornbeam +sieva bean +dill seed +potoroo +love-in-winter +alembic +Cheshire cheese +small white aster +Oregonian +flipper +twill +differential gear +Prince Albert +licorice +foster-father +Melkite +portraitist +Yosemite toad +Cox's Orange Pippin +slender wheatgrass +knob +silique +Rocky Mountain bee plant +stirrup pump +chicken hawk +sweetbrier +Sierra lodgepole pine +poulette +biohazard suit +striated muscle cell +Geiger counter +World Wide Web +turmeric +prairie wake-robin +latchet +pushball +grill +shooting lodge +floating-moss +refried beans +boojum tree +red poll +toothbrush tree +rabbiteye blueberry +red haw +sweet vetch +delta +upland cotton +ballet mistress +padrone +complementary color +great Solomon's-seal +bud brush +brandy sling +spinster +Andorran +Mojave aster +mackinaw +golden calla +bottom rot fungus +segmental arch +periwinkle +hellion +topknot +copper +Mexican hyssop +weeping love grass +point woman +pathogen +fall cankerworm +common shiner +silverspot +corer +atomic pile +crystal detector +yellow spot fungus +truncated cone +saprobe +variegated horsetail +Cro-magnon +cercaria +aglet +pollster +oyster bed +pancake turner +egg cream +sporozoite +quirk molding +mutisia +sound bow +physic nut +sugar-bush +cow +magnetron +jungle hen +brassie +rock bit +taco sauce +seeded raisin +desert selaginella +folding door +vinegarroon +Pinot blanc +rye +ellipsoid +betel nut +tree of knowledge +ambrosia +long tom +breechloader +bicolor lespediza +cosmetician +monoblast +American oil palm +prancer +farina +caiman lizard +hardball +bullock's heart +cotton rat +whiting +weather ship +sharecropper +creamcups +gas bracket +divinity +ornithologist +yellow twining snapdragon +showy goldenrod +end man +heptagon +sand dropseed +round file +guama +blue elder +sand spurry +raccoon dog +zigzag goldenrod +fast reactor +arctic willow +cyclopean masonry +punter +sgraffito +slattern +storage ring +clipper +pulasan +short-tailed shrew +scammony +daybook +umbrella tree +coloring +element of a cone +gesneriad +cane +burgoo +western coral snake +friendship plant +Leydig cell +scrutineer +hairy golden aster +inclined fault +water milfoil +bryozoan +nardoo +native pomegranate +curly grass +Florence fennel +resurrection plant +ice water +crown +ploughman's lunch +clustered lady's slipper +kitchenette +sand sedge +pouched mouse +roadbed +parsley haw +predecessor +super heavyweight +seedless raisin +mailbag +sparling +codling moth +squama +Bercy +thermoelectric thermometer +Jaculus jaculus +saltpan +firmer chisel +round whitefish +ramrod +criollo +pinch bar +slash pocket +thigh pad +velvet plant +intergalactic space +brazilian ironwood +whaleboat +sirrah +hanging fly +aspirator +Dominican +dribbler +yellow-eyed grass +Cornish +geophysicist +tarmacadam +marchioness +rattlesnake orchid +Alaska Native +ilama +myrrh tree +zucchini +licorice root +nosebag +lounger +troposphere +virginal +spaghetti Western +Virgin Mary +waterwheel plant +dry nurse +enate +carpet shark +rijsttaffel +stuffing nut +caraway seed bread +Leotia lubrica +kaffiyeh +Boston baked beans +halophyte +backscratcher +instillator +trefoil arch +pip +digitizer +dosemeter +Carolinian +French sorrel +boards +historian +rangpur +clansman +goral +leatherjacket +coiner +fleece +white globe lily +storm cellar +roundhouse +mediatrix +butterfly flower +swamp gum +prairie vole +rhizomatous begonia +common tobacco +Marco Polo sheep +subarachnoid space +broomweed +safety net +silky wisteria +swagger stick +spectacled caiman +derris root +soap pad +chop-suey greens +summer hyacinth +palo santo +carbohydrate loading +chinch bug +roadman +sheep plant +messiah +desk officer +banquette +drugget +trumpet arch +great duckweed +purdah +heartbreaker +hasty pudding +alligator weed +dragee +yellow bristlegrass +Jacob's ladder +campstool +coffee fern +sweet fern +little chief hare +cat-o'-nine-tails +rep +American red elder +divorcee +black salsify +cambric +sennit +Canada ginger +wonderer +Formica +cream-colored courser +zooid +European beggar-ticks +sorrel tree +piddock +blolly +red-flowered silky oak +bay +Hooker's onion +dark horse +cone clutch +Roman hyacinth +paintbox +mestiza +green alder +bill +panicled aster +mammogram +snuffbox fern +Rediffusion +swamp fly honeysuckle +stoup +psychiatrist +nodding groundsel +student union +cold duck +bee beetle +playbox +Psychopsis krameriana +nosh-up +earthnut +narthex +single-rotor helicopter +revetment +sweetleaf +seasoned salt +piculet +speckled alder +mackerel scad +common yellowwood +devisee +static tube +Spanish heath +umbrella plant +fucoid +Chilean +coral-root bittercress +fanatic +cachou +agony aunt +bird's-foot fern +washwoman +torchbearer +placoderm +frosted bat +spicemill +Cape lobster +hard-shell crab +colonizer +camphor daisy +friar's-cowl +false tamarisk +toggle joint +tinsmith +theorist +hydrologist +loganberry +universal donor +northern whiting +tent-caterpillar moth +russet +kangaroo mouse +African scented mahogany +bastinado +breast implant +betel +grade separation +vox humana +stodge +Maryland chicken +Anguillan +oil pump +governor's plum +narcissist +deadwood +private citizen +winker +ropewalker +gidgee +Lothario +ski resort +major-domo +von Neumann machine +belaying pin +water parsnip +Fissipedia +luggage carrier +spring water +oyster stew +kohl +celesta +date-nut bread +punchboard +sunniness +hospital train +man +rack and pinion +mixer +pousse-cafe +narrow goldenrod +Maxim gun +stiff +recruiting-sergeant +watch glass +white hellebore +tung tree +prairie white-fringed orchid +beef Stroganoff +scoffer +grassy death camas +Shawnee cake +tapioca +Short's aster +banker +laparoscope +honeyflower +Caterpillar +electric clock +baling wire +huntress +Surinam toad +art school +incurable +Canton crepe +apple juice +hipline +bronchoscope +marshmallow fluff +Texan +wild fig +sawed-off shotgun +forestay +red kauri +fish slice +Egyptian grass +English walnut +brown sauce +ogee arch +nectary +chambray +leather flower +phloem +Persian violet +bomb calorimeter +western narrow-mouthed toad +soup du jour +sickle alfalfa +caracolito +periscope +coralberry +sword bean +sigmoidoscope +water locust +hygrodeik +sycamore +sheikdom +ballistocardiograph +clove +akee +fucoid +jacquard +cat's-ear +puritan +slender wild oat +smooth softshell +purchasing agent +landing craft +chartist +lace bug +sharksucker +Virginia chain fern +horseradish +namer +ripcord +personage +aspirin powder +puku +Wankel engine +nightcap +velvet bent +roridula +cytogeneticist +olm +almond extract +common heath +fringe-toed lizard +Kentucky yellowwood +lithosphere +cramp +bulgur +scurvy grass +officer's mess +frigate +electroscope +giant chinkapin +opah +rutabaga +wood hoopoe +Farley maidenhair +shingle tree +argentine +router +palm nut +quillwort +hiba arborvitae +runcible spoon +hireling +sickbay +alpine totara +white lupine +Cotoneaster horizontalis +desert plume +staghound +Sea Scout +opalescence +enophile +Jersey elm +coal house +Helvella acetabulum +selenium cell +white camas +creole-fish +auger +fragrant agrimony +research center +achromia +shank +cottonseed +mod con +extension +sugar beet +winter flounder +silky dogwood +strop +tokamak +rabbit ears +baby farmer +fireman's ax +serration +taproot +socket wrench +action officer +Chilean jasmine +Greek fire +stem-winder +body louse +lumpsucker +stink bomb +American lady crab +dicer +lie detector +maneuverer +black-headed snake +tiger moth +shooting stick +spermatid +babushka +deaconess +home +prior +chanfron +chickasaw plum +big-eared bat +rusty woodsia +tertigravida +miniver +combretum +habit +bluehead +angled loofah +gipsywort +fire-on-the-mountain +purple milk vetch +alpine gold +merozoite +loddon pondweed +Uniat +provost marshal +Gyromitra fastigiata +Coigue +proconsul +oarfish +San Jose scale +filature +chimney plant +spiny softshell +bluecoat +live axle +river limpet +clever Dick +pink bollworm +Japanese plum +roarer +caricature plant +wardroom +Texas chachalaca +Bahia grass +Moreton Bay tulipwood +accessory fruit +pearl barley +ashcake +bunt +Polynesian tattler +pine fern +laughing owl +potato fern +speaking trumpet +adjoining room +bearing rein +banana quit +redbrick university +Scleroderma bovista +magdalen +pressurized water reactor +advisee +NIMBY +poorwill +almond moth +comedian +star tulip +cracked wheat +water pump +guest of honor +yellow-breasted bunting +hire +pedate leaf +augur +purple locoweed +Socinian +upland white aster +guesthouse +double reed +detention basin +rollmops +hitch +bodega +mayeng +sparkplug wrench +attack dog +peach melba +heliozoan +tower mustard +blue mold fungus +lamplighter +banded sand snake +smooth crabgrass +elsholtzia +bodkin +Aegean island +bag lady +alewife +arcella +electrical contact +common ax +animist +concave polyhedron +coalface +climbing perch +yellowtail +hobble skirt +marquee +Russian dandelion +snow mushroom +polo ball +NADA daiquiri +cormous plant +chaparral mallow +inside caliper +milking stool +fallout shelter +sea gooseberry +Danish blue +grissino +chimney breast +mosquito fern +soundbox +spring chicken +epauliere +cape forget-me-not +japan +saddle oyster +white fritillary +push-button radio +bladder senna +bladder stone +macedoine +moire +Shawnee +starnose mole +douroucouli +horseradish sauce +electron gun +cotter +console +park commissioner +free press +lump sugar +western poison oak +apple maggot +keurboom +lisper +griffon +burin +horseshoe whipsnake +Jacobean lily +spinner +cochineal insect +emesis basin +sowbane +humanitarian +uakari +three-dimensional radar +wild hollyhock +heartseed +swinger +two-by-four +mop handle +common amsinckia +traitress +rush aster +fibrous-rooted begonia +violet-flowered petunia +milliammeter +alidade +azure aster +celery seed +snorer +scarlet plume +obtuse leaf +heathen +rose chestnut +headrace +dwarf buckeye +Pacific tripletail +wiggler +bounty hunter +Lowlander +slate pencil +typist +syconium +vaquita +skybox +business lunch +gusher +curacao +palometa +Diapsida +light diet +sourdine +thorny amaranth +potato fern +cartridge extractor +peshmerga +chaffweed +tahoka daisy +hematologist +massage parlor +diverging lens +breadroot +papyrus +amarelle +cover plate +hubbard squash +cryptomonad +whitetail prairie dog +rabbit burrow +orthochromatic film +goncalo alves +Chile bonito +tent-caterpillar moth +Manila grass +buck sergeant +mustard seed +crested wheatgrass +wise guy +asarabacca +field pea +bite plate +barbasco +heart-lung machine +mouse-eared bat +piping guan +gun pendulum +climbing onion +fungus gnat +Livonian +one-hitter +Chilean firebush +Sonoran whipsnake +round scad +myelogram +Rhodes grass +vomitory +roble beech +South-African yellowwood +molasses +Velcro +common calamint +radiation pyrometer +sketcher +chaparral pea +coffee stall +Australian nettle +bilimbi +Khedive +visionary +field spaniel +devilwood +collimator +Siberian spruce +sling +limestone salamander +ribbon worm +hazel +petter +coolant system +artillery plant +bailiff +chameleon tree frog +microsporophyll +maiden blue-eyed Mary +Drosophyllum lusitanicum +cocozelle +king post +nailer +knobkerrie +tovarich +Intelnet +worm lizard +drop forge +wool grass +brown bullhead +anthropoid +vitamin A2 +creche +hickory nut +whiffletree +deipnosophist +Muskhogean +masochist +hypsometer +gliricidia +complexifier +wild licorice +reconnaissance vehicle +fives +beefsteak plant +eastern dasyure +bookworm +crested coral root +wire recorder +cinnamon vine +bubble +Newfoundland dwarf birch +spruce bark beetle +teetotaler +fad diet +ascus +spicebush +African coral snake +soft-shell crab +Postum +packhorse +sand cherry +cricket-bat willow +middlebrow +Hungarian sauce +buffalo clover +jimsonweed +latanier +stablemate +jumper +zoospore +smooth woodsia +flowering ash +unilateralist +lomatia +flapper +wild cotton +Siberian wall flower +probe +bankrupt +blockade +lemon geranium +fig leaf +basic point defense missile system +clack valve +buttinsky +ingenue +mountain everlasting +zebra-tailed lizard +shaving-brush tree +evergreen huckleberry +core drill +lugworm +Cashmere goat +doorjamb +minelayer +student center +horsehair +European dewberry +white broom +arenavirus +eastern poison oak +rye ergot +Tupi +tensiometer +fleawort +coquille +icing sugar +junior lightweight +Doppler radar +mahuang +candlepin +chambermaid +evergreen blueberry +Eton jacket +parvis +solleret +molded salad +malvasia +birth-control campaigner +nonagon +backswimmer +ogee +bowstring +salt marsh mallow +trapezohedron +hoary willow +speech therapist +Zinjanthropus +core +red-backed mouse +eptatretus +mossy saxifrage +Aristotelian +Thessalonian +searing iron +bifocals +falangist +field pea +packsaddle +lay reader +hoecake +cuboid +white maire +iceman +lobscouse +neckcloth +color-blind person +Chinese holly +assemblyman +white-lipped peccary +kava +plastron +crab louse +hook wrench +trailing four o'clock +junior +skilly +internet +tonguefish +footman +sub-assembly +evangelist +track +bench lathe +desk clerk +scalded milk +chamois cloth +American marten +chachka +nondescript +pellitory-of-the-wall +swamp candles +procurator +cuddy +farkleberry +mountain male fern +trawl +dual scan display +fish meal +prospector +convener +guano bat +ant shrike +picture rail +sand rat +gynophore +quilting +sleeper +summer savory +Cotoneaster dammeri +smooth sumac +slumgullion +suite +catalufa +spherule +lean-to tent +gryphon +gas shell +short iron +sweet sultan +dewberry +Victoria plum +American water shrew +X-ray tube +macebearer +green arrow arum +abbe +poke milkweed +atheist +Fosbury flop +Ord kangaroo rat +moldboard +wheat germ +explosive trace detection +whippoorwill +examiner +tallyman +Crookes tube +wild peach +fringed grass of Parnassus +Crookes radiometer +Atlantic croaker +lobster stew +spring cress +maggot +pacer +hydra +Zionist +pepper tree +diamante +baize +Rhodesian man +county agent +respecter +Anglican +antimacassar +materialist +Swan River everlasting +cloud grass +toll line +C battery +chinese mustard +grass poly +warming pan +seasonal worker +common sickle pine +bathysphere +elegant Habenaria +card table +Chilean cedar +brocket +collimator +malted milk +avadavat +fire marshall +coloratura +yellow spiny daisy +fingerstall +narrow-leaf penstemon +indigo broom +pillwort +bearberry willow +Etonian +certified milk +climbing bird's nest fern +field coil +wrist pad +parr +kaoliang +engelmannia +stocker +satrap +Nantua +spearfish +caper tree +gold-tail moth +mountain chinchilla +sea milkwort +westerner +army cutworm +leaf-nosed snake +neurobiologist +xeranthemum +Eastern silvery aster +ecclesiastical attire +caper +Ukranian +bight +button fern +peach pit +oligodendrocyte +maar +digitigrade mammal +streptobacillus +sensitometer +preemptor +oat +bell foundry +crown lens +rock purslane +Junior +Brazilian guava +kicksorter +Ohio goldenrod +red mulberry +King's Counsel +mountain four o'clock +fairy shrimp +fell +oca +sycophant +chantry +dermatoglyphic +bomblet +keyhole saw +hangman's rope +little barley +lion-jaw forceps +giant scrambling fern +popper +dulcimer +Espagnole +tardigrade +smooth-haired fox terrier +bullbrier +rewa-rewa +Japanese poinsettia +trunk line +cannery +helminth +American spikenard +prince's-feather +arthroscope +ginger +aphakic +pilot bit +angle of refraction +low-sodium diet +wall creeper +growler +praetorium +Hall of Fame +soupfin shark +Molotov cocktail +kaffir boom +stitcher +sawwort +flagellant +Atlantic herring +Reticulitermes lucifugus +voltaic pile +snowy orchid +southern flounder +skysail +osage orange +white mullein +lined snake +tolu tree +poliovirus +foreman +burette +jackass bat +invigilator +electromyograph +acarus +presence chamber +columbian mammoth +hyacinth bean +pilot +meadow jumping mouse +Maria +outskirts +aftershaft +Queensland nut +schlockmeister +plainsman +afropavo +scarlet musk flower +five spice powder +gunboat +multiplex +Dutch uncle +louvered window +chimney corner +cuscus +psalmist +Vichy water +signer +amphiuma +harmonizer +authorizer +naiad +control rod +stentor +mountain bladder fern +gig +read-only memory chip +assenter +vixen +hermitage +corn dab +locksmith +cockspur thorn +variable-pitch propeller +western red-backed salamander +dolman sleeve +cultist +sweet buckeye +pine vole +Peking man +mountain swamp gum +nimblewill +bethel +aye-aye +lancelet +teff +Alpine celery pine +endive +nipa palm +center of curvature +seeder +Sabahan +sea scallop +social secretary +gorgonzola +western chokecherry +misanthrope +rabbitweed +beggarman +button fern +white mallee +doodia +mastiff bat +roper +prima donna +blanc +holding pen +fingerling +skyhook +flophouse +steam chest +crystallized ginger +acrocarp +horse pistol +true mahogany +costmary +ballistic galvanometer +jaunting car +bartonia +rep +mandibular notch +bubble and squeak +umpire +fringed loosestrife +bear oak +ski jump +staggerbush +plumcot +thermal reactor +field brome +bodkin +jackknife-fish +malope +writing arm +gold fern +Stayman Winesap +merlon +eclectic +fluxmeter +emeritus +imam +drum +pop tent +capital ship +subalpine larch +flail +Lorenzo dressing +tomboy +eastern woodrat +warrantee +Pacific spiny dogfish +sheepshead porgy +farthingale +Cryptoprocta +power loom +communicant +howdah +ectomorph +false foxglove +basset horn +odd-pinnate leaf +Wisconsin weeping willow +Queensland bottletree +dampener +corbel arch +silent butler +Circe +town clerk +Japanese chestnut +bloodwood tree +switcher +cup hook +spreader +rice rat +straightedge +traverser +fluid drive +Spanish paprika +sour milk +poison camas +bean dip +card table +vinegar fly +vizier +electric-discharge lamp +purple rock brake +dynamo +Japanese snowbell +Grindelia robusta +neuroglia +safflower seed +coronet +frown line +Renaissance man +Steller's sea cow +book scorpion +isosceles triangle +arthritic +spherical triangle +kangaroo mouse +garden orache +stemless hymenoxys +titi +out-basket +gent +columnea +mint sauce +mouthbreeder +Liebig condenser +cheerer +assegai +stickler +Merostomata +dimmer +grey poplar +common heath +scorzonera +glory hole +Blackfoot +oil slick +musketeer +apple geranium +daisyleaf grape fern +gas furnace +bijugate leaf +Arabist +star-thistle +hand throttle +huckleberry oak +lift pump +maulstick +Rome Beauty +Newburg sauce +pit +volunteer +Baldwin +ark +Asian horseshoe crab +black calla +marlinespike +Gentianopsid procera +guinea gold vine +tucker-bag +desk sergeant +piezometer +migrator +keelson +executrix +sackcloth +onion smut +buckboard +substitute +pudge +mess +cinchona +intervenor +gravimeter +pederast +censor +gastroenterologist +cutlassfish +launch +demerara +Diegueno +bog bilberry +aglet +soda fountain +crank call +harpoon gun +ribbon fern +Gurkha +output device +epilating wax +greasewood +water horehound +return key +fairy swallow +spatulate leaf +culverin +leptocephalus +kleptomaniac +barley water +bleeding tooth +Cheyenne +maleberry +limber +tapenade +whorled aster +toe +revenant +lap joint +vein +truant +florest's cineraria +morning dress +trichodesmium +nightshirt +element of a cylinder +shopaholic +section hand +electrodynamometer +Guadalupe cypress +rosebud +racist +avaram +keeled garlic +Alaska rein orchid +orange toast +cunner +dipstick +Neolentinus ponderosus +bulbil +charlotte +pull-through +header +Manduca quinquemaculata +persona grata +elegist +cafe royale +scup +semanticist +wood sage +field magnet +tundra +bay myrtle +alluvial flat +arrowleaf groundsel +celtuce +baryon +must +entrant +othonna +pied-a-terre +liza +sticky aster +grasshopper mouse +prison guard +tire iron +bomb rack +Spanish American +sheltered workshop +turfing daisy +backbone +tangle orchid +creeping willow +dumb bomb +horse cassia +barosaur +Yavapai +shrimp Newburg +peanut worm +dwarf chinkapin oak +corchorus +brick cheese +by-catch +stover +Urnula craterium +clasp +Kekchi +alpine coltsfoot +soybean future +altar wine +ripping chisel +encephalogram +mountain spleenwort +transferee +remoulade sauce +American rock brake +stenographer +read/write head +loblolly +ground +powdered mustard +brake band +sea dahlia +freak +proconsul +Coffey still +Sivapithecus +pellitory +palm cat +skew arch +American angelica tree +vigilante +candelilla +andryala +amarelle +swiftlet +petcock +associate professor +sclerite +open circuit +Virginia crownbeard +Last Supper +button tree +scyphozoan +margate +mercury cell +horsewhip +water scorpion +companionway +drop cloth +Amhara +miraculous food +pro-lifer +embryologist +Creole +bombazine +Indian blackwood +cubeb +trace detector +gros point +main-topsail +meringue kiss +spree killer +capstone +specimen bottle +woolly apple aphid +silverweed +American barberry +gallfly +European bog asphodel +northern flying squirrel +alliterator +Old Catholic +heliograph +Pteris cretica +tippler +pump well +allspice +balancer +scarlet bugler +lantern fly +white prairie aster +krummhorn +robin's plantain +Pacific sardine +patty-pan +decaffeinated coffee +western saxifrage +warrantee +colorimeter +ball bearing +makomako +foot +troika +apricot sauce +data multiplexer +rose-root +sound film +Northern dewberry +water hickory +swing door +spastic +Oligoporus leucospongia +botulinus +tamale pie +Sagittarius +muff +spicebush +petiolule +pump action +Parry's pinyon +split-pea +rudder blade +princess royal +wormseed mustard +honey guide +pip-squeak +fin keel +foretop +cyrilla +Navaho +melanocyte +deist +silver tree +citrus whitefly +Morrow's honeysuckle +green peach aphid +longanberry +call-board +wild yam +novelist +toothed spurge +alienee +pond apple +allspice +Carolina lupine +Jack of all trades +white false indigo +boiled dinner +princewood +sailor's-choice +false bracken +microbrewery +black grama +tutee +brickkiln +sea raven +guesser +wirework +European lemming +thyrse +plains lemon monarda +milo +shunt +spotted cowbane +anchovy sauce +grande dame +Maryland golden aster +Chinese puzzle +boarfish +burweed marsh elder +defense contractor +nitric bacteria +Belgian hare +beach plum +conformal projection +sand fly +steering linkage +quickset +Mahayanist +Geiger tube +loudmouth +Lancastrian +brownie mix +ex-spouse +deltoid leaf +Shasta salamander +rabbet joint +purple anise +garibaldi +gebang palm +bladderpod +Host +great bowerbird +string cheese +spinning jenny +drift net +matriarch +guar +bitter betch +panda car +mess +plains pocket mouse +scarlet wisteria tree +deerberry +reamer +homing torpedo +molehill +stockyard +reniform leaf +rag +symmetry +Texas star +lerot +pickle relish +three-seeded mercury +cotter pin +ice-cream bean +farmyard +bar magnet +hansom +prickle cell +renal cortex +pest +Ultrasuede +sailing master +brougham +wastrel +amboina pine +Canary Island hare's foot fern +ninepin ball +southwestern lip fern +usherette +lemon drop +star begonia +weeds +saltworks +Persian melon +corbina +medusa +bucksaw +Gibson girl +diameter +American twinflower +kino +clear liquid diet +angiocardiogram +wetter +oyster cracker +yellowfin mojarra +wild parsley +life tenant +broom closet +Corynebacterium diphtheriae +square shooter +bedwetter +ball-and-socket joint +nonsolid color +Salmonella typhimurium +buffel grass +hip pad +subaltern +heliothis moth +trail boss +hayloft +Francisella +primordial dwarf +cock-a-leekie +sugarplum +propulsion system +tyrolean +Carib +salai +ketembilla +ironclad +cornhusk +heckler +multistage rocket +north island edelweiss +Chaldean +twenty-two pistol +Francophobe +scofflaw +sickle feather +screw bean +sea squill +Scopolia carniolica +agglomerator +western holly fern +presenter +straight pin +Myxine glutinosa +Colbert +clover-leaf roll +war paint +bird's-eye bush +longfin mako +running suit +arrow wood +margrave +blue fleabane +dracontium +plastron +chimney swift +child prodigy +commissar +turtle soup +postulant +archaebacteria +snakefly +Pitot tube +chap +smilo +Malthusian +French roof +worm wheel +gulag +pointed-leaf maple +pull-off +Cathaya +American green toad +ball cartridge +infiltrator +snowfield +crotchet +auxiliary pump +bearnaise +galax +chaenactis +olympic salamander +sundowner +cows' milk +beach plum +moss-trooper +Arabidopsis thaliana +cat's-claw +bog rosemary +ribier +book agent +bumper jack +beefwood +monk's cloth +alpine bearberry +climbing fumitory +cucking stool +puka +Piltdown man +property man +discharge lamp +X chromosome +knobble +lobster Newburg +herbalist +sunray +golden saxifrage +leopard cat +muffle +stonewort +blancmange +intraocular lens +trepan +desert mariposa tulip +plume poppy +Dane +martynia +shaver +white milkweed +napu +tansy-leaved rocket +abortus +telemeter +tansy mustard +harpy +honeysuckle +ironworks +testacean +Tartuffe +silvervine +Sihasapa +surface gauge +western blind snake +paramyxovirus +Icelander +bird louse +stockbroker belt +test-tube baby +ague root +little golden zinnia +dietician +elephant's-foot +dirty bomb +sailing warship +brier +tinter +Connemara heath +potato fungus +bait casting +decagon +rosefish +die +high-pass filter +solitaire +widow's walk +goldthread +Tudor +trews +orange pekoe +ninon +soda jerk +sump +flying carpet +burial garment +oblanceolate leaf +press gallery +Shintoist +three-centered arch +spreading pogonia +Moro +foxtail orchid +Ghanian +dry kiln +thane +naranjilla +bitter pea +American bugbane +apron string +oyster fish +Port Jackson fig +prize winner +high-water mark +Oneida +smoking room +potato skin +charge d'affaires +gantlet +amyloid plaque +barmbrack +mate +arrow leaved aster +handbarrow +horned screamer +virago +linoleum knife +rattlesnake root +K ration +reset +foot brake +red coral +good guy +aberrant +lavalava +poleax +garden webworm +sneezer +mountain heath +American dog violet +eolith +chimneysweeper +matriarch +smalltooth sawfish +sea mouse +tubercle bacillus +superconducting supercollider +Abney level +darnel +gherkin +celery salt +Tungus +pulasan +oriflamme +death camp +redhorse +apprehender +scion +selectwoman +pentahedron +principal +old school tie +slice bar +chanar +pimento butter +wailer +zero +mescal +rosebud orchid +stone bramble +Jarvik heart +NOC +pitchman +rat cheese +strawberry tomato +dwarf golden chinkapin +landau +tocsin +ampulla +scratcher +crab Louis +ginseng +ripcord +polluter +tensiometer +eyewitness +aalii +Oregon crab apple +conservator +day jessamine +hexahedron +suture +tippet +linsey-woolsey +vernal witch hazel +stainer +egocentric +canistel +nudger +shipping agent +shortleaf pine +battle sight +cheese spread +weeder +incendiary bomb +honeyflower +stovepipe iron +stepper +hellgrammiate +votary +aflatoxin +arquebus +impulse turbine +pipewort +garrote +glow lamp +pigsticking +blood clam +surface search radar +Bolshevik +platen +chariot +Gentianopsis thermalis +water level +quandong +catalytic cracker +giant foxtail +nut butter +drainplug +holdover +coastguardsman +Secretary of Health and Human Services +Seeing Eye dog +American plaice +coquilles Saint-Jacques +christella +medium +clingfish +lally +light-o'-love +Gentianopsis detonsa +taper file +signal detection +trip wire +lignosae +receiver +sedan +mud puppy +corn sugar +Philippine mahogany +magnetic pole +jointed rush +trapper's tea +Dorking +welcome wagon +clammyweed +guard +false azalea +convalescent +babassu +dedicated file server +colossus +air search radar +marquess +straight flute +sand stargazer +sea catfish +rosilla +ripsaw +Bermuda onion +peach sauce +sagebrush mariposa tulip +yashmak +Virginia mallow +erose leaf +sand blackberry +boulevardier +forester +choragus +onion mildew +threadfin +winged pea +sugar daddy +rotary press +styracosaur +rathskeller +Japanese millet +anchorite +coral drops +false gavial +eastern pipistrel +cheese press +Chinese primrose +pamperer +real estate broker +power worker +breeder reactor +nutcracker +piano wire +cushaw +Sinanthropus +firebreak +kelp greenling +herba impia +toll call +yoke +bird fancier +evening-snow +fever tree +reed meadow grass +flanker back +toggle bolt +Santa Cruz cypress +carbonnade flamande +northern dune tansy +mikado +millettia +forty-five +court +icepick +holm oak +Japanese angelica tree +Pacific cod +cant hook +urologist +spelt +lekvar +enologist +Mediterranean flour moth +prickly-edged leaf +Spanish grunt +dune cycling +frostweed +whisperer +tucker +Roman wormwood +counterterrorist +woolly alder aphid +Nuttall oak +snail butter +threshing floor +motley +forge +water mold +mummichog +sulfur paintbrush +head +walking delegate +jujube +peachleaf willow +Christmas bells +valley pocket gopher +bear's-paw fern +Lanthanotus borneensis +pearl hominy +placeman +swage block +offerer +stargazer +jeweler's glass +male chauvinist +crossbar +Oktoberfest +tamarau +micronutrient +large-leaved aster +tasset +tepary bean +sausage curl +ivy +snob +roller towel +wood meadowgrass +archil +padrone +prairie rocket +tongueflower +kidney fern +Carolina buckthorn +sea island cotton +landscape architect +realist +oyabun +mother hen +ostracoderm +esker +heliophila +nympholept +shining clubmoss +press agent +clam dip +Djiboutian +white currant +codfish ball +hand cheese +kraal +trident +conventicle +bacteroid +Indian plantain +quandong +kola nut +signor +theater light +musk clover +canistel +silent partner +steel-wool pad +diggings +affluent +sightreader +John Doe +arrowworm +goatsfoot +guardroom +wild cinnamon +kaffir boom +ink eraser +yardie +industrialist +sea lily +polarimeter +Polistes annularis +western big-eared bat +omnivore +Ted +horsecloth +crab cocktail +vacuum chamber +flower-of-an-hour +bilge +poleax +neolith +Montezuma +plum-yew +welfare case +trave +pipe bomb +shading +Centigrade thermometer +bangalore torpedo +celery top pine +nuclear rocket +fowling piece +anti-Semite +landscape +derris +bush honeysuckle +Mediterranean water shrew +ticket collector +masked shrew +white dipladenia +Savoyard +bondman +tempter +pygmy cypress +pentathlete +thruster +usurper +Arminian +yerba buena +ice field +ichthyosaurus +sackcloth +bean tostada +Oxbridge +Pteropus hypomelanus +thinker +bank robber +ape-man +thurifer +knawel +mule fat +hot spot +hairy-legged vampire bat +night raven +hook and eye +crocodile bird +skunkweed +beaver rat +cypress sedge +florida selaginella +April fool +Jonah crab +glass wool +corkwood +dwarf elder +hinging post +gentile +Brazilian trumpeter +witch doctor +thermograph +pink shower +Mao jacket +capelin +parang +bradawl +stooper +jewel orchid +citrange +oarswoman +Macedonian +particolored buckeye +pachycephalosaur +satinwood +Chinese brown sauce +peep sight +straight man +quandong +chamois cress +nonfat dry milk +rosin bag +Leiden jar +Grimes' golden +spirillum +grass vetch +carillonneur +downy wood mint +melon ball +sweet calabash +chlamydospore +bombshell +sidewall +sprig +Indian button fern +globe pepper +rough-stemmed goldenrod +bocconia +bubble chamber +sand dab +plum-fruited yew +aecium +marrowfat pea +hobbyist +whipper-in +salad burnet +neckband +Tangier pea +sauce Louis +salad burnet +artist's loft +koumiss +Nazarene +cutter +scrim +drape +crab-eating dog +deckhand +bedroll +gaff +stifler +pink lady +great plains paintbrush +patternmaker +yoke +caryophyllaceous plant +angrecum +quadriplegic +grid +genlisea +aspic +water table +junket +signore +Mutillidae +proprioceptor +pivoting window +Indian poke +synchroscope +trichion +tarahumara frog +proctoscope +abomination +purslane speedwell +breast drill +Japanese barberry +mandrake root +breakable +salon +American watercress +take-up +entrenchment +cocktail sauce +Scotch asphodel +borough +matchmaker +Seneca snakeroot +pointsman +psephologist +clustered poppy mallow +onion thrips +nuclear-powered ship +organizer +deciduous holly +balsam willow +enzymologist +caraway +drip loop +dog laurel +Orangeman +sapsago +polymath +backplate +leathery grape fern +modillion +two-timer +handhold +consignee +white stringybark +nettle-leaved goosefoot +bookmaker +disk drive +doliolum +palmist +packinghouse +Spandau +Whipple's penstemon +sword grass +ribbon development +pearly-shelled mussel +winter heliotrope +rogue elephant +deck tennis +Venus's flower basket +football +shim +boatswain +blinks +armored catfish +hooded seal +outdoorswoman +water starwort +upholstery needle +pleurodont +silky anteater +cornmeal +lead-in +redfin pickerel +horse balm +Rydberg's penstemon +cascade transformer +fly poison +Volvaria bombycina +broad-leaved twayblade +pastry cart +body plethysmograph +waverer +hardware store +Parry's penstemon +European sanicle +strawberry geranium +cross-examiner +head gate +devil's tongue +hemiepiphyte +pine hyacinth +machmeter +spirit lamp +field judge +Rock Cornish +mayhaw +Sassenach +bog pimpernel +parallel interface +crowberry +roach +Aegyptopithecus +cajan pea +lapboard +cryostat +magnetic storage medium +white yam +Lombard +rhymer +bed and breakfast +bunya bunya +rifle grenade +caterer +collared pika +anti-submarine rocket +bookkeeper +Western mountain ash +profit taker +fruitlet +Knowlton's cactus +infernal +beefsteak begonia +lunula +emulsion +intermediate wheatgrass +titfer +European sea bream +bigeye scad +yak butter +kola +cone pepper +plesiosaur +ragwort +penal colony +black carpet beetle +lubber's hole +Stapelias asterias +yard marker +balloon bomb +Scythian lamb +armory +selsyn +marblewood +spirula +fatalist +hash head +armiger +Dom Pedro +white-chinned petrel +ballast +orthopter +greater water parsnip +clutch +largeleaf holly +Evangelist +king whiting +tuna fish salad +Muscadet +surpriser +jumping bristletail +proportional counter tube +Hamburg parsley +obstructionist +pus-forming bacteria +creep feed +stepbrother +janissary +control freak +trusty +trepan +King William pine +orthicon +geological horizon +molecular biologist +violator +pariah dog +Austrian +conciliator +Fauntleroy +packing needle +mazer +Saturday night special +leucocytozoan +coastal rein orchid +whirligig beetle +capitalist +breeches buoy +clubroot fungus +meadow spikemoss +Kichai +Spanish lime +land office +camera obscura +strafer +purple-stemmed aster +lusterware +valve +Roman nettle +isthmus +breadstuff +sealskin +maleo +bilge keel +carissa plum +fish fly +kolkhoznik +heath pea +cowage +hog sucker +Sam Browne belt +inductor +wild licorice +Socotra begonia +supernumerary +Angle +red shrubby penstemon +toilet kit +tawse +sweet bells +kawaka +brown soft scale +lyssavirus +betting shop +double-crosser +macrotus +climbing hempweed +poi +strip mall +deadhead +petit juror +tract housing +American mistletoe +lace-flower vine +precipitator +endoparasite +hairy wood mint +red snapper +Victorian +hog peanut +line of heart +opossum shrimp +plumcot +Bavarian blue +slops +light flyweight +oregano +sand myrtle +pocket battleship +curator +narc +hydraulic cement +plains pocket gopher +closed loop +pluralist +molter +Christmas bush +snuffers +slender knapweed +footwall +plage +caper tree +red siskin +tender +boat train +tipster +low-pass filter +student lamp +morosoph +japonica +bellows +herald +oyster plant +savory +mail +computational linguist +blade +winter crookneck squash +zoomastigote +blackmailer +richweed +dialectician +genip +plumed scorpionfish +jet bridge +thermopile +billy buttons +Brule +millwright +Arenaviridae +Jones' penstemon +monastic habit +genipap fruit +burnous +dairyman +top +crab-eating raccoon +quadrangular prism +pilot burner +weeder +trireme +boy wonder +man of letters +Catawba +high-muck-a-muck +light circuit +bloodworm +lappet caterpillar +half-and-half +office boy +saddle stitch +mistletoe cactus +false chamomile +Catalina cherry +workhouse +Jamaica quassia +britches +tooth shell +reduction gear +carrot pudding +balsam woolly aphid +handspike +aioli +silver hake +flour bin +wireman +gas-cooled reactor +aficionado +plus fours +gitano +gene chip +oilfish +ingenue +tulip orchid +late purple aster +pork and beans +envoy +lemon extract +milk bar +black huckleberry +ground roller +Connecticuter +siderocyte +Jacquard loom +chub +meat safe +stock cube +Australian sumac +purple sanicle +tailless tenrec +dog wrench +rainbow cactus +castor bean +scintillation counter +eohippus +pawnbroker +gauge boson +front man +early warning radar +bearing wall +Bourbon +sandwichman +sild +gravelweed +perishable +cembra nut +riflebird +quicksand +slate +sweeper +ship-towed long-range acoustic detection system +defamer +president +vitamin K3 +challis +tanekaha +bloodwort +grenadier +quietist +Zairese +fucker +foremother +gesneria +print buffer +salsilla +fissiped mammal +fender +consulate +acidophilus milk +Southern dewberry +snail darter +Panama redwood tree +dehydrated food +bush willow +coffee fungus +Sinologist +Mesoamerican +hood +large civet +deck-house +cyborg +smuggler +pepper sauce +cyberpunk +Grand Inquisitor +persona non grata +haggis +weeping tree broom +stop bath +modifier +coyol +conodont +yellow giant hyssop +optical pyrometer +Carolina moonseed +marinade +aspartame +false wintergreen +cityscape +philter +turnery +hemiplegic +chuck-will's-widow +vower +track star +myrtaceous tree +small civet +intelligence analyst +dogcart +yardman +cross bit +holometabola +platen +sweet cassava +Comstock mealybug +acute angle +Communist +alcohol thermometer +mountain hollyhock +Mead's milkweed +highjacker +Townes +congou +Astrophyton muricatum +lazybones +roughcast +pressure cabin +clinch +cinnamon +smoke bomb +quandong +tout +office-bearer +punctum +efficiency apartment +Queensland hemp +Ceylon bowstring hemp +newswoman +vermin +fetid bugbane +grantee +sanitary landfill +gluten-free diet +clabber +shillelagh +white lettuce +sweet coltsfoot +beggar's lice +samite +loser +flasher +water star grass +banana passion fruit +translator +artificial kidney +Virginia creeper +American crab apple +cactus mouse +nebbish +Ligustrum obtusifolium +vox angelica +stringer +hunter +know-it-all +scene painter +invalidator +jungle cock +basilica +coriander +California single-leaf pinyon +miles gloriosus +pina cloth +law agent +scarlet fritillary +keurboom +bailor +ramjet +seedling +rib joint pliers +ways +picket ship +Surgeon General +wasabi +marquis +clostridium perfringens +Helvella sulcata +furnace lining +kingwood +painted sandgrouse +plain wanderer +Indian madder +silver screen +bailey +dwarf spurge +Serbian +ball-buster +shaheed +Platte River penstemon +tensiometer +mute +nymphomaniac +Yokuts +arroyo willow +whipping post +class act +load +winged everlasting +periodontist +diarist +robber frog +diestock +curry powder +ratchet wheel +store detective +hog plum +prune whip +shortwave diathermy machine +Anabaptist +post chaise +Kennan +bean caper +delegate +orderly sergeant +celtuce +jumping bean +gowen cypress +puddingwife +registered nurse +West Saxon +rosita +gun room +nasotracheal tube +matchboard +flagship +Boswellia carteri +Canadian pondweed +wonder boy +sewer rat +dimetrodon +pantograph +marsh bellflower +angoumois moth +slippery dick +woolly indris +creme de cacao +dulciana +Jewess +Macadamia integrifolia +least shrew +don +diffuser +black-stem spleenwort +grouseberry +goniometer +annotator +sticktight +gossip columnist +speechwriter +capon +rock hind +Liederkranz +chandler +echocardiograph +sidelight +fisher +brocket +New Zealand daisybush +northern sea robin +roller bandage +peachick +pellet +pichi +plug fuse +spark coil +buckwheat +brood bitch +wedgie +dwarf bilberry +filigree +bull +queen +dodo +Salish +denticulate leaf +Western silvery aster +Prima +magnetic bottle +fetterbush +process-server +nainsook +mythologist +Piedmont glacier +hammerhead +niggard +Mound Builder +Kui +Nootka +highbinder +passenger pigeon +oblong +tickler coil +agnostic +succorer +esophagogastric junction +dressmaker's model +bombshell +social anthropologist +gildhall +orpine +pterodactyl +bristly sarsaparilla +Lane's Prince Albert +hognose bat +salesgirl +lubricating system +electric catfish +wrap +Jacksonian +chard +cherry laurel +foreground +beadsman +Kolam +amniote +frozen pudding +acid head +poor box +depositor +coattail +pallas's sandgrouse +mason's level +English lady crab +skeg +cruel plant +petrolatum gauze +tuna +swivel +stock-in-trade +perisperm +civies +Phyllostomus hastatus +alienor +Verdicchio +guard's van +onion butter +moviegoer +planter +citrange +box huckleberry +iconoscope +familiar +helmsman +baby boomer +constructivist +American bog asphodel +whorled caraway +simple pendulum +viviparous eelpout +Job's tears +holdout +sour salt +poison bush +dusky-footed woodrat +golden algae +granadilla tree +telethermometer +crossbar +thrift +African bowstring hemp +dog in the manger +hayrack +gold-crowned kinglet +prolonge +doge +pencil +discount house +mulligan stew +Nonconformist +virologist +gregarine +facula +rocket scientist +thin-shelled mussel +oospore +annual salt-marsh aster +Afrikaner +metallic +julienne +culverin +cleavers +Berliner +mudhif +thorny skate +brown lemming +yellow colicroot +cooling system +large-leaved magnolia +free-reed +canyonside +preemptor +stake +Brucella +anti-G suit +pleximeter +squire +salsilla +write-in candidate +lowland burrowing treefrog +flare star +dwarf hulsea +jobber +mangel-wurzel +quagga +red-skinned onion +positive pole +Pteropus capestratus +jug wine +stomacher +standee +bladder worm +hakim +house of correction +pelisse +golden mole +temporizer +rose apple +drove +umbrellawort +holy of holies +lawyer cane +smooth lip fern +anode +astatic coils +zip gun +feverroot +self-heal +expansion bit +salt reed grass +field pussytoes +nutmeg hickory +cryptic coloration +Venus's girdle +Hunkpapa +Calostoma cinnabarina +raft foundation +May apple +pygmy mouse +prokaryote +yellow-green algae +Bermuda maidenhair +withdrawer +coelacanth +Elliott's goldenrod +driftfish +epicyclic train +bowl +swamp dewberry +corbel step +sadist +party line +anti-American +mining engineer +Amur privet +conidium +Gastrocybe lateritia +lithia water +chaulmoogra +Rough Rider +Guinea pepper +glade mallow +pitcher sage +whitecup +shanghaier +low St Andrew's cross +phonologist +cocobolo +perfumery +visor +prison chaplain +belt +ingesta +literary critic +industrial watercourse +reckoner +pursuer +Kinetoscope +Kuiper belt +hyperope +raw recruit +Galiella rufa +Prince Albert yew +slit trench +usher +tenderfoot +white-rayed mule's ears +browser +piccalilli +bran +giant buttercup +water lobelia +arborescent plant +echinus +dryland blueberry +struggler +platyctenean +Geordie +domatium +twenty-two rifle +keteleeria +sports editor +chorus girl +Hakham +dry-bulb thermometer +onomancer +double-bitted ax +Girondist +bottle bank +thyrsopteris +bandwagon +star anise +armored car +dhawa +Bessemer converter +mutineer +paradise tree +tupik +centurion +mending +chowchow +margrave +International Grandmaster +African hemp +catafalque +leptodactylid frog +forcemeat +tank shell +pill +barbecue pit +worthy +lady's maid +evergreen +Jesuit +South American staghorn +rigger +suffragan +imperialist +spherical angle +grey lemming +kitchen police +tree swift +coliphage +archaist +Conservative +rib +exegete +Mendelian +tragedian +steerage +Paleo-American +obeche +garlic +grapefruit peel +accommodating lens implant +half blood +barrelfish +catgut +lanceolate spleenwort +hardliner +frieze +name dropper +carrack +huckster +onion bread +magnetic head +pease pudding +raisin moth +negative magnetic pole +electroencephalograph +bunji-bunji +synchroflash +Mornay sauce +stencil +winged pigweed +Nesselrode +MEDLINE +licorice +mainspring +melilotus +duke +experimenter +Napier's bones +four-minute man +pin-tailed sandgrouse +toolmaker +pogge +rootstock +baton +pricket +creeping snowberry +anomalops +nester +devourer +apolemia +Maricopa +pine-barren sandwort +larvacean +American dewberry +escalope de veau Orloff +gig +myrtle +pitsaw +Lutheran +fish house punch +gnathostome +intake valve +molasses taffy +clammy locust +vandyke beard +Atlantic tripletail +planktonic algae +estradiol patch +flummery +cytologist +sectarian +oil meal +tomtate +mediterranean anchovy +aspersorium +argonaut +porkholt +sheep ked +algometer +Adventist +false goatsbeard +snake polypody +streetwalker +shelver +adoptee +highflier +pitch apple +prairie rocket +fish mousse +viroid +deckle +manila tamarind +observer's meridian +pincurl clip +hardstem bulrush +gossamer +brookweed +Druze +hug-me-tight +accessory before the fact +oilman +Comanche +Marine +bedlamite +Chinese cork oak +squawbush +false miterwort +walk-on +Cynopterus sphinx +brandyball +landlubber +arrowroot +cape forget-me-not +galoot +tabor pipe +checker +Levant cotton +paddle box +murderess +smirker +fuddy-duddy +withdrawer +newel +shade +pink disease fungus +tipu +sweet sultan +aeronautical engineer +tall gallberry holly +acarid +conqueror +cucumber +film director +ordinary +salon +closet queen +allegorizer +tonka bean +flax rust +negative pole +dagame +dentist's drill +mock privet +micropyle +contributor +dark horse +climbing corydalis +cosmotron +land agent +Big Blue +Cynic +tassel flower +lyrate leaf +Minuteman +Dutch-elm beetle +Hessian fly +flower girl +West-sider +window dresser +skinny-dipper +whitebait +out-and-outer +hooker +amicus curiae +jack +camwood +stockist +black root rot fungus +Jamaica dogwood +diaphragm +Holocentrus ascensionis +roselle +black maire +Pygmy +fumigator +lame duck +mudder +hydraulic transmission +conning tower +phoronid +batfish +hearing dog +monohybrid +whaling gun +Cockcroft and Walton accelerator +allemande +seasoner +epileptic +ammonia clock +Young Turk +lanseh tree +urceole +cafe noir +poster girl +Oglala +deadeye +manna lichen +positive pole +cinch +lyricist +hermaphrodite +kidney stone +dilator +number one +frotteur +kaffir bread +fish knife +tarragon +adjuster +potato wart fungus +Florida pompano +conductor +corbie gable +rounders +Catha edulis +bender +recruit +Uruguayan +subject +bunghole +day boarder +pocketed bat +Oxonian +owner-occupier +yellow-leaf sickle pine +devisor +exhibitor +looking glass +shipowner +crooked-stemmed aster +calico +dash-pot +defilade +Confucian +egg-and-dart +irreligionist +lepton +self-rising flour +diving bell +Brahui +shop girl +maximum and minimum thermometer +Dalmatian laburnum +correspondent +subduer +nonperson +Reaumur thermometer +rough-leaved aster +jacksmelt +pinfold +magneto +ex-wife +round-leaved rein orchid +purloo +American shrew mole +sweet sand verbena +polymastigote +outfitter +curled leaf pondweed +Italian dressing +borderer +ambusher +geebung +four-stroke engine +small ship +homeopath +gynostegium +political prisoner +Radiigera fuscogleba +ensiform leaf +rhizoctinia +satyr orchid +rue +bouillon cube +flip +prophyll +tilefish +periselene +prima donna +choker +laminar flow clean room +Hooker's orchid +fish joint +mombin +remover +array +coelostat +autophyte +consigner +Damaraland mole rat +gasman +public works +lye hominy +pearlfish +piassava palm +Georgian +uxoricide +confessor +community center +epigone +tagger +abrading stone +cryoscope +nautch girl +reliever +Cartesian +Indian beech +protoplasmic astrocyte +fundamentalist +mustard sauce +crank +houselights +five-point bishop's cap +comedienne +triangle +presentist +beaugregory +dreamer +Wave +blue mockingbird +Barbados gooseberry +ten-spined stickleback +papoose +silky pocket mouse +holdup man +agent-in-place +suspensory +emigrant +ropemaker +bookbinder +jumby bead +undershrub +Killarney fern +sheep bell +city slicker +equerry +pea crab +down-and-out +blackmouth bass +shirtmaker +lister +UNIX guru +snipefish +gimbal +maisonette +haircloth +Ranvier's nodes +pigmy talinum +tribute album +msasa +hydroxide ion +madame +four-pounder +prophet +sloganeer +field-effect transistor +nude mouse +canteen +Calostoma lutescens +buteonine +sunlamp +Uruguay potato +Spanish tamarind +Prince-of-Wales'-heath +kishke +caprifig +chincapin +hegari +alarmist +bathtub gin +astatic galvanometer +Calostoma ravenelii +marang +tussah +coin box +bugleweed +hacker +frontal eminence +timekeeper +shunt +bicycle clip +mustang mint +caesium clock +hospice +glenoid fossa +archpriest +ex-gambler +incrustation +salvager +Donatist +violator +lamb succory +hygroscope +oilbird +sharptail mola +showplace +corn syrup +flashlight fish +pulse timing circuit +anchovy paste +fascista +chigoe +divan +Druid +squad room +Huntingdon elm +buffalo carpet beetle +carper +corn lily +goats' milk +assault gun +cockpit +Lochaber ax +Visigoth +occupier +Basotho +criminologist +spindle +Rosicrucian +Cornishwoman +musk kangaroo +artificial skin +pandurate leaf +Parkia javanica +roundhead +tea-like drink +basidiolichen +unguiculate +stepmother +Nauruan +gutta-percha tree +bloodberry +scarlet haw +marupa +censor +algebraist +pelvimeter +whaler +cowhide +paparazzo +biochip +internationalist +Yukon white birch +hangar queen +chlamydia +puttee +Pipturus albidus +pearly razorfish +sea moss +burglar +hoary golden bush +colter +drey +bushman's poison +maxillaria +gnetum +deadeye +shittah +swamp oak +damper block +deepwater squirrelfish +truffle +cangue +paleolith +lawyerbush +sorehead +Texas snowbell +Tremella reticulata +quarter +keelboat +dimity +whiner +Wagnerian +myrmecophyte +frontierswoman +pyrometric cone +big-tree plum +puppy +galbulus +hod +winceyette +carriage wrench +dictostylium +farmland +infanticide +Jacob's rod +threadfish +monocline +inamorato +leaf miner +purple cress +passer +black-fronted bush shrike +silverrod +bootmaker +segregate +captive +Edmontonia +spherometer +television transmitter +bladder +Saratoga spittlebug +dynamometer +lodge +smooth darling pea +Cossack +wake-up call +Olmec +sutler +molasses kiss +corner post +rattlesnake weed +yardmaster +adder +rhinoscope +referral +ulster +pantaloon +counterspy +gadgeteer +heart cherry +hospital chaplain +Clydesdale terrier +plank-bed +Russian thistle +actinometer +dyspeptic +common wolffia +firewall +seidel +potato moth +soapweed +seif dune +thill +cosmographer +absolver +halberdier +fire control system +kai apple +bastard pennyroyal +Big Brother +broadcast journalist +Albatrellus dispansus +citrophilous mealybug +split end +nickel-iron battery +Newtonian +gas maser +thumbstall +anaspid +dusky-footed wood rat +latitudinarian +flatbrod +schizocarp +niqaabi +flight surgeon +gyrocompass +Polyporus tenuiculus +Utopian +mailboat +spellbinder +undercoat +cassareep +typical jerboa +photocathode +katharometer +bight +fur-piece +penetration bomb +malik +Siberian millet +nanomia +Wykehamist +tosser +gyrostabilizer +microwave diathermy machine +crystal set +wall +legatee +alfalfa +angwantibo +charioteer +piano maker +African mahogany +Morlett's crocodile +taro +parallel circuit +cush-cush +etymologist +matriculate +neem seed +cornerback +kingfisher daisy +redoubt +blastomycete +peplos +costumier +publican +tobogganist +semolina +myrmidon +parricide +gymslip +whoremaster +cryptocoryne +header +platitudinarian +barleycorn +spiral bandage +reciter +abecedarian +dance +wrymouth +bilberry +Liopelma hamiltoni +streamliner +Fordhooks +fixed phagocyte +radiobiologist +neurologist +Selkup +dollarfish +cascade everlasting +acrodont +boarhound +midstream +theatrical producer +abhorrer +goldsmith +photometrist +Anglo-Saxon +rugel's plantain +sable +workmate +ferule +ankus +earleaved umbrella tree +Passamaquody +timucu +Mexican pocket mouse +yerba santa +Rochon prism +apomict +monocarp +sweet unicorn plant +common winterberry holly +archivist +drypis +paretic +fly-by-night +white-berry yew +Schoolman +blue cheese dressing +vintager +squatter +Euphausia pacifica +corrugated fastener +yellow henbane +Croesus +almoner +analphabet +acoustic delay line +sheep frog +workhouse +horseleech +venturer +pond-scum parasite +Pyrenees daisy +plagiarist +Truncocolumella citrina +rerebrace +group captain +caddis fly +hot-rock penstemon +kanzu +stylopodium +slopseller +rauli beech +starter +ootid +statesman +distributor cam +ascot +falcon-gentle +Duplicidentata +spotted antbird +heliometer +false buckthorn +Allegheny spurge +Cavalier +dart +photocoagulator +master-at-arms +kei apple +baldachin +crapshooter +gametangium +white hope +chipotle +spike heath +Scotch woodcock +Florentine +differential analyzer +Mitrula elegans +wet cell +basil balm +Circassian +corn cake +bouncing betty +vice-regent +lagerphone +ketembilla +whoremaster +fork +tetrasporangium +trifler +pill head +life-support system +quartermaster general +tobacco thrips +officeholder +teredo +toyon +Sundacarpus amara +Phytophthora citrophthora +naif +lobbyist +alligator wrench +bully +heavy +toxicologist +radio chassis +waterdog +drive line +kaffir cat +foster-brother +breakax +curette +traditionalist +pipe vise +striped button quail +gawker +homeotherm +schoolyard +battue +kalansuwa +deviationist +Bolshevik +transponder +pungapung +iron +Eyeish +roccella +manglietia +Tory +print seller +Texas Ranger +otter shrew +seconder +shellflower +outlier +party man +wold +hayfork +oncologist +framer +co-beneficiary +ocean pout +Chinese angelica +scrimshaw +air attache +false gromwell +standing press +fringepod +specifier +automatic choke +durum +yenta +wassailer +reeler +signora +beach pancake +common booklouse +pellicle +backroom boy +den mother +associate +Unitarian +gambist +brookweed +clubroom +cat's-tail +playboy +self-registering thermometer +doorstop +bennet +yak's milk +escapee +quail bush +sparge pipe +coast boykinia +screw key +half gainer +aggravator +cotton mill +tailor's chalk +free agent +cotton mouse +deadhead +bunny +turpentine camphor weed +amaranth +ceratodus +red lauan +beam-ends +thermograph +wally +Toda +handrest +commissary +oak-leaved goosefoot +manufacturer +voicer +Jafnea semitosta +bench hook +finder +abyssal zone +rabbitwood +Hercules'-club +epicarp +declinometer +camp follower +signaler +Australian pea +putz +qadi +banded palm civet +egg timer +regnellidium +calisaya +harvestfish +sound spectrograph +side-wheeler +glomerule +woolly rhinoceros +Black Muslim +horticulturist +ornithomimid +cryometer +battlefront +gametophyte +airmailer +cuisse +nakedwood +baseball club +slasher +anise +leatherleaf +leatherjacket +horned pondweed +gofer +Saigon cinnamon +barong +blazer +twinkler +skeleton shrimp +dial +floorwalker +case shot +flannelbush +cultivated parsnip +Jane Doe +few-flowered leek +nogging +placer miner +muzzler +serge +lion-hunter +capulin +Wandering Jew +ascidian tadpole +hispid pocket mouse +southern spatterdock +milk wagon +junior middleweight +duck sauce +promycelium +protozoologist +cascade liquefier +tout +longheaded thimbleweed +charcoal burner +footage +slop +bridge agent +miller's-thumb +Job's comforter +marocain +tanker plane +lancetfish +knocker +toque +ordinand +umbrella bird +favorite son +hare's-foot bristle fern +business traveler +plotter +Asiatic shrew mole +tallyman +stump +Paleacrita vernata +index register +mortgagee +accuser +codger +sand rat +seaside centaury +chiropractor +Florida smoothhound +dwarf sperm whale +T-man +sannup +dragonhead +numdah +alkali grass +gynobase +kymograph +ascolichen +steward +waterline +Nazarene +filer +lapidary +muncher +wincey +scyphus +question master +besieger +worldling +docent +facing +atmometer +quern +puerpera +three-decker +calliope +wild red oat +bailee +flame pea +cattle cake +theist +yellowtail flounder +cosmopolitan +rocket engineer +vouchee +Turkoman +hard sauce +Thousand Island dressing +assayer +messmate +mutilator +oyster bar +flame tokay +countess +prairie mimosa +microsporangium +cotter +townsman +paring +fundraiser +simperer +Comrade +orlop deck +power takeoff +cattleship +prime meridian +Javanthropus +scriptorium +curandera +long-clawed prawn +maestro +paster +potato tuberworm +chachka +junkyard +cape yellowwood +reentrant polygon +Liberian coffee +restaurateur +Alsophila pometaria +Jekyll and Hyde +electrophorus +Scomberomorus maculatus +manipulator +gromwell +chicken provencale +ashram +mangel-wurzel +shamrock pea +dossal +adducer +erection +Mysore thorn +smoothie +chufa +brace wrench +victualer +litterer +linstock +Protium guianense +palfrey +banyan +klieg light +dangleberry +trooper +yaupon holly +quitter +tradescant's aster +nullipara +melter +devil's urn +ghostwriter +mouth +analogist +Creek +sonic depth finder +fucker +locus of infection +mortician +esophageal smear +locum tenens +conic projection +aroeira blanca +bellarmine +night porter +automobile mechanic +codpiece +Munro +cottonweed +scoinson arch +tinderbox +frozen food +waterproofing +Egyptian henbane +lash +transactor +American smooth dogfish +existentialist +grabber +Sonoran lyre snake +Rufous rubber cup +colors +weekend warrior +power user +perennial salt marsh aster +Puritan +Apalachicola rosemary +anecdotist +tosser +moth bean +agnostic +stretcher-bearer +browntail +optimist +brewer's mole +astronomy satellite +flat file +rust mite +tuberous plant +day laborer +buster +trapezoid +bevatron +nonresident +Streptomyces griseus +mangosteen +customer agent +hero worshiper +suicide bomber +procellariiform seabird +archiannelid +reaction turbine +distortionist +bulldog wrench +grainy club +scalp +Aztec +scow +globigerina +pedant +heartleaf manzanita +kanchil +low gallberry holly +containment +scandalmonger +rose-colored starling +Powhatan +addle-head +Chilean rimu +Atlantic sea bream +arthrospore +ramrod +root climber +Kalapooia +roach clip +Schreiber's aster +horseradish +albino +Kshatriya +trombidiid +blasting cap +body pad +brachium +shallu +Wynnea americana +slender centaury +munj +upset +wind tunnel +cottonwick +airing cupboard +pepper shrub +ambrosia +languisher +chosen +rose globe lily +purple apricot +costia +sloop of war +sultana +frontlet +booster +sargassum fish +broad-leaved montia +rifleman bird +stillroom +amoralist +enginery +meter maid +fitment +southern bog lemming +Athenian +clincher +cusk-eel +mackintosh +diaphone +corozo +Australian reed grass +czar +spongioblast +Eurafrican +airhead +Shahaptian +Roman +pollinium +tourist class +halogeton +stamper +emperor +malingerer +tramp steamer +Peziza domicilina +pilot cloth +stenopterygius +cost accountant +Queen's Counsel +wine-maker's yeast +poppet +cage +rowlock arch +landgrave +bearded wheatgrass +stink bell +quaker +undesirable +algarroba +resistance pyrometer +exorcist +carib wood +guvnor +border patrolman +bathhouse +licenser +headman +rentier +pine spittlebug +nut-leaved screw tree +paraduodenal smear +apron +necker +smilax +Alpine besseya +creeper +castle +ground bait +Queensland grass-cloth plant +sclerotium +great yellowcress +fat farm +Stoker +hoop snake +elixir of life +Trotskyite +home buyer +wheat berry +Tutelo +semi-climber +utahraptor +wet-bulb thermometer +packrat +hygrophyte +darter +sketcher +refiner +camlet +midgrass +compound +tarwood +Colorado River hemp +toiler +abstractor +override +dwarf pipefish +plodder +briefcase computer +trunk hose +brown butter +valve-in-head engine +cymbalist +explosive detection system +horsewoman +boutonniere +chinchilla +venerator +scourer +exarch +cohune nut +ayapana +continental divide +cosigner +stalker +pyxie +Genet +Macowanites americanus +open-hearth furnace +water chestnut +American frogbit +tarwood +cutter +scout +burr +upsetter +grist +tagasaste +mouthpiece +palette +rattan +letterman +Exmoor +Methodist +eelblenny +marasca +slide valve +ventilation +saddle hackle +Yakut +flux applicator +air traveler +murder suspect +Cynocephalus variegatus +idolizer +Surgeon General +nutlet +little-head snakeweed +germ tube +fellow traveler +raceabout +commodore +czar +anamorphosis +treelet +girlfriend +groundnut +sideline +giant star grass +goffer +spark lever +oubliette +processor +tare +plodder +extremist +Kipp's apparatus +gripsack +S wrench +viscountess +bridgehead +cascarilla +Asiatic flying squirrel +protoceratops +equerry +difflugia +princeling +moonlighter +aspergill +common flat pea +Utahan +imperial mammoth +plantain-leaved pussytoes +Boott's goldenrod +bootlegger +reed pipe +runcinate leaf +onion salt +nitrite bacterium +introvert +duck +New World opah +goliath frog +heterostracan +disrupting explosive +haggler +candlenut +false bugbane +returning officer +eudiometer +ship-breaker +metazoan +mandarin +patka +gill net +cavity wall +armilla +rainmaker +dealfish +orderly +gleaner +muffin man +house sitter +alto +sand devil's claw +vulcanizer +appendicularia +boron chamber +chess +bitok +anchovy butter +dropout +flour mill +bishop +escapist +scapegrace +stanhope +smooth winterberry holly +upstager +stalking-horse +pony +prairie gourd +parabolic mirror +Polaroid +slasher +lap +garlic butter +sendee +German millet +hairy honeysuckle +Swiss canton +Scleroderma flavidium +red goatfish +telegraph plant +Jungian +garment cutter +mallee hen +stranger +driveway +schooner +Paiute +cisco +trestlework +sipper +shanny +romanticist +Molly Miller +mountain rimu +odd-leg caliper +bitumastic +Western Australia coral pea +labor coach +latchkey +harpulla +solitary pussytoes +chop-suey greens +coil +guimpe +diapir +Osage +gutta-percha tree +giant eland +reticulation +garden huckleberry +quick study +Hudson bay collared lemming +coreligionist +Lancastrian +stumblebum +omnirange +seersucker +Potemkin village +Rhea Silvia +symphonist +bolti +jaw +jaconet +page +visiting fireman +haulm +p-n junction +landlubber +yellow jack +triclinium +souari +invader +fire walker +Luddite +Plott hound +hemming-stitch +winker +star-duckweed +craniometer +Arabidopsis lyrata +loser +cypripedia +trimmer arch +cookhouse +pink fivecorner +transfer +ringleader +northern pocket gopher +moke +blockade-runner +cyclostome +web-spinning mite +Whig +transcriber +malahini +sawyer +patent log +paca +tragedian +thermojunction +soffit +black buffalo +foreigner +applecart +brit +pole horse +white mullet +argentinosaur +Homo soloensis +bounty hunter +decumary +hand +paperboy +Smitane +windowpane +Java man +Wynnea sparassoides +prune +middy +lilliputian +sorb +pyrostat +guest worker +hold +leaseholder +vegan +humanist +salinometer +piton +zygospore +means +night rider +tetraspore +archipelago +radiomicrometer +nitpicker +spot weld +slicer +girlfriend +round-tailed muskrat +cock's eggs +Shavian +bay +nuclear chemist +planetarium +hiccup nut +Marylander +milling +microsporidian +brown cup +Strophanthus kombe +little skate +emancipator +paperhanger +archaeopteryx +maigre +Mastotermes electrodominicus +procurer +seizure-alert dog +homeboy +cotton strain +mute +siren +spearnose bat +phenacomys +gayal +arsenal +pitchfork +Port Jackson heath +cud +magnetic core memory +interferometer +water jacket +account executive +hodoscope +window oyster +sudatorium +syncopator +loment +hypertensive +smoothbark +Geogia holly +nailhead +African holly +musette +chafeweed +microflora +derrick +strawworm +shogun +queen post +jerboa kangaroo +columbo +royal +sourball +solenogaster +cardsharp +Homo habilis +intaglio +calf's-foot jelly +flotsam +skirret +baronduki +chyme +shovel hat +Welsh +monoplane flying fish +groundfish +tablet-armed chair +swan dive +Indian club +colonial +cassiri +pyramidal tent +praya +silk vine +time clock +button snakeroot +clews +Korean lespedeza +diffuser +ripping bar +puttyroot +nipple shield +headpin +juneberry holly +hub-and-spoke +laver +weldment +plain flour +hoosegow +dudeen +grey skate +line of life +mung +arariba +Newtown Wonder +rock candy +side chapel +castor sugar +narrow-leaved white-topped aster +babassu nut +puka +rings +catchall +heat shield +caroche +oxbow +Australian coral snake +tapper +sporangiophore +fenugreek +spruce gall aphid +gouache +cutoff +private line +pod +cargo hatch +nailhead +penile implant +geophyte +small-leaved linden +deepwater pipefish +paperhanger +hairy spurge +Persian lamb +subtropics +feed grain +clarence +nonparticipant +scorpioid cyme +hand brake +tiller +Geglossaceae +albacore +monochrome +goa bean +bur +tongue worm +psittacosaur +frog's lettuce +pectoral +terreplein +light filter +fishpaste +dry point +grison +feterita +dolichocephalic +oenomel +stretcher +swag +cheval-de-frise +mountain beaver +scammony +discus +leatherleaf saxifrage +wharf rat +Dominique +pelycosaur +depth gauge +bishop +archespore +true anomaly +silver jenny +mercy seat +kelp +oviraptorid +acrylic +Chinese pea tree +meat house +bilge well +Temperate Zone +whale louse +balbriggan +briefcase bomb +pump-type pliers +oil +sour gourd +Jewbush +lunette +Chinese paddlefish +pyxidium +beechnut +calabar bean +grugru nut +gib +blunt file +cataphyll +megasporangium +blockbuster +sliding seat +hogchoker +calceus +Connarus guianensis +honest woman +survivor +second balcony +tempera +Calvary clover +murine +outwork +bogy +elephant's-foot +conning tower +set square +blackfly +stirk +Streptomyces erythreus +blade +goldfield +snowball +mortal enemy +waltzer +shoal +galley +hitchhiker +lithophyte +brisling +scauper +esophagoscope +grab +subtracter +philosopher +duplex apartment +southeastern pocket gopher +bonduc nut +reverberatory furnace +grader +lamp house +northern bog lemming +brotula +ornithopod +ptyalith +obturator +perpetual motion machine +range pole +Africander +curvet +daisy print wheel +floor +collector +mutant +tuck +fore-and-after +senega +buckler mustard +louvar +Tarsius glis +culdoscope +Spanish fly +steering gear +hatchet man +museum +saw set +cambric tea +comber +thermohydrometer +stationer +chalcis fly +bryanthus +whipstitch +harvest mite +rock gunnel +time bomb +rariora +pigfish +apetalous flower +head shop +horned whiff +sandpit +tachistoscope +sundries +taffrail +caller +monofocal lens implant +Dover's powder +souari nut +crowbait +render +Shakespearian +hagberry +megatherian +magus +hatchel +mangabey +garroter +piedmont +cope +barrio +psychodid +rigout +distributor +croupier's rake +sarcenet +narrow-leaved water plantain +treenail +biped +lanternfish +overdrive +barndoor skate +picket boat +amber lily +sawpit +sand lance +bucket shop +common beech +laundry truck +surtout +grogram +tampion +escape hatch +interstice +shop bell +snake mackerel +nakedwood +tumbrel +mericarp +mountain paca +cab +big board +cringle +eusporangium +shipping room +coal chute +dumbwaiter +Smiledon californicus +man-at-arms +cartridge +deinonychus +pigeon pea +screw bean +spectacle +floorboard +cutting room +low-warp-loom +proconsul +sabicu +genipap +clapper +aquifer +archaeornis +belly flop +Protium heptaphyllum +interrupter +high-warp loom +knight +wiper +impression +poker +Pithecanthropus +sable +guardroom +tenter +wellhead +raja +strickle +sodomite +mountebank +sand leek +Barbados gooseberry +shuffler +sensory fiber +crab-eating opossum +etching +rare bird +scup +fagot +negro vine +hutment +droshky +nephoscope +lady chapel +cutty stool +release +vestiture +buff +standard +Tabernacle +vascular ray +snakewood +chlorobenzylidenemalononitrile +limnologist +pouched mole +microwave linear accelerator +Mastotermes darwiniensis +wind tee +orange bat +open sight +carpospore +rampant arch +sabbatia +cursor +post exchange +bellpull +center +cyclostyle +canonist +pygmy sperm whale +moa +king +pass-through +angioscope +marrow +hookup +revetment +acanthocephalan +good Samaritan +apatosaur +web spinner +dixie +ommastrephes +crossbench +candlewick +jack +light arm +caisson +kaki +quandong nut +Meuniere butter +coquilla nut +mast +black +twitterer +bluethroat pikeblenny +shielding +water-shield +urolith +elephant bird +clearway +dark lantern +schizopetalon +press +Nazi +sugarberry +Maltese +stevedore +hair shirt +party wall +gainer +blackheart +nothosaur +cavetto +evergreen bittersweet +chemical bomb +calpac +shingle +turnpike +animator +heaver +isoclinic line +death knell +liner +anathema +aerie +razorback +Ichyostega +pound net +French dressing +mottle +yard +string tie +bell seat +brattice +battering ram +sierra +pompon +vertex +stomach pump +electrolytic cell +escolar +telpher +roadhouse +cerecloth +tartare sauce +letter case +whale sucker +hob +teg +canvas +strickle +hectograph +Cartagena bark +mail car +acinus +freedom rider +bread sauce +picture window +Rhizopogon idahoensis +pinprick +mass spectrograph +ringer +devil's cigar +salad cream +marlberry +airbrake +Clark cell +yellow-throated marten +wire gauge +dinoceras +aba +harpoon log +plate rail +mustard plaster +coelophysis +journal box +puce +ballcock +quartering +izar +clinid +whirler +turnspit +deathbed +pottle +shot +doubler +Coryphaena equisetis +English sole +chicken feed +borrow pit +mylodontid +Chilean nut +Kundt's tube +ling +asthenosphere +reseau +death seat +immovable bandage +peppermint patty +lecturer +electron multiplier +bear claw +hyacinth +beaked salmon +toehold +scull +snowball +gangsaw +fiber +oxeye +lashing +Beckman thermometer +fence +cantilever +dinner theater +Reynard +jag +umbrella plant +camera lucida +beaver +slug +yellowfin croaker +Sibley tent +rat-tail file +anchovy pear +soldier +cackler +chaise +Pitot-static tube +minniebush +Episcopalian +oleaster +ejaculator +wavy-leaved aster +knight +rack +real storage +magnetic mine +cocoa plum +vesiculovirus +birch leaf miner +water chevrotain +rudapithecus +torpedo tube +itch mite +warren +loft +washerman +terrace +nonstarter +shit +platform +caudex +ground control +Ostariophysi +slopshop +Peruvian cotton +crystal oscillator +plastic bomb +bar bit +watering cart +Asiatic sweetleaf +artificial joint +chariot +casern +charge-exchange accelerator +display adapter +hornpipe +honey bell +planula +Nephthytis afzelii +hame +ranter +trachodon +synchrocyclotron +splasher +heterotroph +Nicol prism +Himalayan rhubarb +headfast +put-put +bitter almond +parr +scantling +power breakfast +madder +Catalpa bignioides +rose of Jericho +spark chamber +rhizome +beard worm +supper club +negro peach +keratoscope +wain +apple aphid +planking +time-delay measuring instrument +sternpost +sicklepod +lake bed +gatherer +monotype +dead-man's float +poison gas +dicynodont +organism +cell +person +animal +plant +food +artifact +dressage +contact sport +outdoor sport +gymnastics +track and field +jumping +high jump +skiing +water sport +swimming +dive +floating +skin diving +rowing +boxing +sledding +tobogganing +wrestling +skating +ice skating +roller skating +racing +boat racing +riding +equestrian sport +cycling +blood sport +hunt +fishing +angling +casting +athletic game +outdoor game +golf +field game +field hockey +football +American football +ball game +baseball +court game +badminton +basketball +tennis +sport +Seder +scavenger +bottom-feeder +work animal +beast of burden +pack animal +domestic animal +marine animal +female +male +young +young mammal +pup +cub +lion cub +tiger cub +microorganism +arbovirus +herpes +herpes zoster +reovirus +moneran +cyanobacteria +enteric bacteria +actinomycete +streptomyces +diplococcus +parasite +ectoparasite +protoctist +protozoan +sarcodinian +ameba +ciliate +alga +brown algae +green algae +sporozoan +cypriniform fish +cyprinid +carp +domestic carp +shiner +catostomid +buffalo fish +cyprinodont +killifish +topminnow +squirrelfish +stickleback +pipefish +embryo +fetus +blastula +chordate +cephalochordate +tunicate +ascidian +vertebrate +aquatic vertebrate +jawless vertebrate +lamprey +hagfish +cartilaginous fish +holocephalan +chimaera +elasmobranch +shark +mackerel shark +mako +requiem shark +dogfish +smooth dogfish +spiny dogfish +smooth hammerhead +smalleye hammerhead +shovelhead +ray +sawfish +roughtail stingray +butterfly ray +eagle ray +manta +skate +bird +gamecock +night bird +ratite +passerine +oscine +accentor +lark +pipit +finch +canary +dark-eyed junco +New World sparrow +bunting +honeycreeper +sparrow +grosbeak +towhee +weaver +grassfinch +tyrannid +New World flycatcher +kingbird +pewee +cotinga +antbird +Old World flycatcher +thrush +nightingale +Old World chat +warbler +kinglet +Old World warbler +New World warbler +flycatching warbler +New World chat +yellowthroat +New World oriole +northern oriole +meadowlark +New World blackbird +grackle +Old World oriole +starling +myna +corvine bird +crow +Old World jay +common European jay +New World jay +blue jay +Canada jay +Rocky Mountain jay +nutcracker +European magpie +American magpie +Australian magpie +wren +marsh wren +thrasher +New Zealand wren +creeper +titmouse +black-capped chickadee +Carolina chickadee +swallow +martin +tanager +shrike +butcherbird +bush shrike +bowerbird +European water ouzel +American water ouzel +vireo +waxwing +bird of prey +hawk +black kite +swallow-tailed kite +white-tailed kite +harrier +falcon +peregrine +caracara +eagle +young bird +sea eagle +Aegypiidae +Old World vulture +griffon vulture +bearded vulture +Egyptian vulture +black vulture +New World vulture +buzzard +condor +Andean condor +California condor +black vulture +king vulture +owl +horned owl +scops owl +amphibian +salamander +newt +Pacific newt +ambystomid +climbing salamander +web-toed salamander +frog +true frog +true toad +spadefoot +tree toad +cricket frog +tongueless frog +reptile +anapsid +diapsid +chelonian +turtle +sea turtle +ridley +snapping turtle +musk turtle +diamondback terrapin +Western box turtle +tortoise +soft-shelled turtle +saurian +lizard +gecko +iguanid +spiny lizard +fence lizard +horned lizard +skink +teiid lizard +racerunner +plateau striped whiptail +Chihuahuan spotted whiptail +western whiptail +checkered whiptail +agamid +moloch +anguid lizard +venomous lizard +lacertid lizard +chameleon +monitor +crocodilian reptile +crocodile +alligator +caiman +armored dinosaur +ankylosaur +bone-headed dinosaur +ceratopsian +hadrosaur +saurischian +sauropod +theropod +ceratosaur +maniraptor +synapsid +pterosaur +ichthyosaur +snake +colubrid snake +smooth green snake +rough green snake +racer +blacksnake +whip-snake +rat snake +bull snake +common kingsnake +milk snake +common garter snake +ribbon snake +Western ribbon snake +common water snake +water moccasin +grass snake +viperine grass snake +sand snake +lyre snake +blind snake +indigo snake +constrictor +boa +python +elapid +coral snake +coral snake +cobra +mamba +black mamba +krait +viper +pit viper +rattlesnake +timber rattlesnake +arthropod +arachnid +false scorpion +whip-scorpion +spider +European wolf spider +acarine +hard tick +Ixodes dammini +Ixodes neotomae +Ixodes pacificus +Ixodes scapularis +sheep-tick +Ixodes persulcatus +Ixodes dentatus +Ixodes spinipalpis +wood tick +soft tick +mite +trombiculid +spider mite +house centipede +gallinaceous bird +domestic fowl +jungle fowl +chicken +cock +hen +turkey +grouse +European black grouse +Asian black grouse +blackcock +greyhen +red grouse +moorhen +greater prairie chicken +lesser prairie chicken +heath hen +guan +chachalaca +megapode +mallee fowl +phasianid +pheasant +bobwhite +northern bobwhite +Old World quail +migratory quail +peafowl +California quail +Hungarian partridge +red-legged partridge +Greek partridge +mountain quail +guinea fowl +columbiform bird +pigeon +dove +turtledove +domestic pigeon +homing pigeon +sandgrouse +parrot +cockatoo +lory +varied Lorikeet +rainbow lorikeet +parakeet +cuculiform bird +cuckoo +crow pheasant +coraciiform bird +roller +kingfisher +hoopoe +apodiform bird +swift +Archilochus colubris +thornbill +goatsucker +piciform bird +woodpecker +flicker +sapsucker +toucanet +trogon +quetzal +aquatic bird +waterfowl +anseriform bird +duck +teal +widgeon +sheldrake +goldeneye +scaup +wood duck +sea duck +scoter +merganser +gosling +gander +Chinese goose +greylag +blue goose +snow goose +brant +common brant goose +honker +barnacle goose +swan +tundra swan +screamer +crested screamer +mammal +prototherian +monotreme +marsupial +opossum +bandicoot +kangaroo +common wallaby +hare wallaby +nail-tailed wallaby +rock wallaby +pademelon +tree wallaby +rat kangaroo +phalanger +dasyurid marsupial +dasyure +placental +calf +buck +insectivore +mole +shrew mole +shrew +water shrew +tenrec +invertebrate +sponge +glass sponge +coelenterate +Chrysaora quinquecirrha +hydrozoan +siphonophore +anthozoan +actinia +coral +gorgonian +stony coral +ctenophore +worm +planarian +fluke +liver fluke +Fasciolopsis buski +schistosome +tapeworm +echinococcus +taenia +common roundworm +chicken roundworm +pinworm +eelworm +vinegar eel +trichina +hookworm +filaria +Guinea worm +annelid +oligochaete +polychaete +leech +mollusk +scaphopod +gastropod +abalone +scorpion shell +giant conch +edible snail +garden snail +brown snail +Helix hortensis +seasnail +neritid +limpet +Hermissenda crassicornis +cowrie +bivalve +clam +quahog +cockle +oyster +mussel +marine mussel +freshwater mussel +scallop +shipworm +cephalopod +octopod +decapod +squid +crustacean +malacostracan crustacean +decapod crustacean +crab +swimming crab +spider crab +lobster +true lobster +Old World crayfish +American crayfish +shrimp +prawn +krill +stomatopod +mantis shrimp +woodlouse +pill bug +sow bug +sea louse +amphipod +copepod +barnacle +wading bird +stork +ibis +common spoonbill +roseate spoonbill +heron +egret +night heron +American bittern +European bittern +least bittern +whooping crane +rail +crake +gallinule +purple gallinule +coot +great bustard +plain turkey +button quail +trumpeter +seabird +shorebird +plover +turnstone +sandpiper +yellowlegs +ruff +tattler +woodcock +snipe +greyback +red-breasted snipe +curlew +godwit +stilt +stilt +phalarope +courser +coastal diving bird +larid +gull +tern +jaeger +skua +auk +guillemot +murre +puffin +gaviiform seabird +podicipitiform seabird +grebe +pelecaniform seabird +white pelican +Old world white pelican +gannet +snakebird +sphenisciform seabird +penguin +pelagic bird +wandering albatross +black-footed albatross +petrel +shearwater +storm petrel +aquatic mammal +cetacean +whale +baleen whale +rorqual +toothed whale +beaked whale +dolphin +bottlenose dolphin +porpoise +sea cow +carnivore +pinniped mammal +seal +eared seal +fur seal +fur seal +South American sea lion +California sea lion +Australian sea lion +Steller sea lion +earless seal +walrus +canine +bitch +dog +cur +toy dog +toy spaniel +English toy spaniel +hunting dog +hound +coonhound +dachshund +foxhound +wolfhound +greyhound +terrier +bullterrier +rat terrier +Manchester terrier +fox terrier +wirehair +Welsh terrier +schnauzer +Skye terrier +sporting dog +retriever +pointer +setter +spaniel +springer spaniel +water spaniel +working dog +watchdog +shepherd dog +Belgian sheepdog +pinscher +Sennenhunde +mastiff +bulldog +guide dog +sled dog +liver-spotted dalmatian +spitz +griffon +corgi +poodle +wolf +coydog +wild dog +striped hyena +brown hyena +spotted hyena +aardwolf +fox +black fox +silver fox +blue fox +feline +cat +domestic cat +tom +blue point Siamese +wildcat +common lynx +Canada lynx +bobcat +spotted lynx +caracal +big cat +leopardess +panther +lioness +lionet +Bengal tiger +tigress +saber-toothed tiger +bear +Syrian bear +grizzly +Alaskan brown bear +cinnamon bear +viverrine +civet +Indian mongoose +ichneumon +slender-tailed meerkat +suricate +bat +fruit bat +carnivorous bat +leafnose bat +false vampire +vespertilian bat +long-eared bat +freetail +vampire bat +predator +game +game bird +fossorial mammal +tetrapod +insect +beetle +two-spotted ladybug +Mexican bean beetle +Hippodamia convergens +vedalia +bombardier beetle +calosoma +searcher +firefly +sawyer +pine sawyer +flea beetle +Colorado potato beetle +carpet beetle +clerid beetle +lamellicorn beetle +scarabaeid beetle +scarab +tumblebug +dorbeetle +June beetle +melolonthid beetle +elaterid beetle +snout beetle +boll weevil +blister beetle +bark beetle +darkling beetle +flour beetle +seed beetle +pea weevil +bean weevil +rice weevil +louse +flea +dipterous insect +gall midge +housefly +tsetse fly +blowfly +bluebottle +greenbottle +flesh fly +tachina fly +gadfly +botfly +human botfly +sheep botfly +warble fly +horsefly +bee fly +fruit fly +louse fly +horn fly +mosquito +gnat +fungus gnat +hymenopterous insect +drone +worker +honeybee +Africanized bee +black bee +Carniolan bee +Italian bee +carpenter bee +bumblebee +cuckoo-bumblebee +andrena +Nomia melanderi +leaf-cutting bee +mason bee +potter bee +wasp +vespid +paper wasp +hornet +sphecoid wasp +digger wasp +chalcid fly +sawfly +pharaoh ant +little black ant +army ant +carpenter ant +fire ant +wood ant +slave ant +Formica fusca +slave-making ant +sanguinary ant +bulldog ant +Amazon ant +termite +dry-wood termite +orthopterous insect +short-horned grasshopper +locust +migratory locust +migratory grasshopper +long-horned grasshopper +katydid +mormon cricket +sand cricket +mole cricket +European house cricket +field cricket +tree cricket +snowy tree cricket +phasmid +diapheromera +oriental cockroach +American cockroach +Australian cockroach +German cockroach +giant cockroach +praying mantis +hemipterous insect +leaf bug +mirid bug +lygus bug +lygaeid +coreid bug +heteropterous insect +water bug +water strider +assassin bug +homopterous insect +whitefly +sweet-potato whitefly +coccid insect +scale insect +soft scale +armored scale +mealybug +plant louse +aphid +greenfly +woolly aphid +adelgid +dog-day cicada +seventeen-year locust +spittle insect +plant hopper +psocopterous insect +psocid +booklouse +ephemerid +neuropteron +green lacewing +brown lacewing +odonate +trichopterous insect +caseworm +thysanuran insect +bristletail +thysanopter +thrips +earwig +lepidopterous insect +butterfly +nymphalid +fritillary +emperor butterfly +danaid +pierid +small white +large white +southern cabbage butterfly +blue +copper +American copper +hairstreak +Strymon melinus +moth +tortricid +lymantriid +geometrid +cankerworm +pyralid +tineoid +tineid +clothes moth +gelechiid +grain moth +noctuid moth +cutworm +underwing +hawkmoth +bombycid +saturniid +giant silkworm moth +silkworm +arctiid +lasiocampid +tent caterpillar +webworm +webworm moth +caterpillar +bollworm +woolly bear +larva +grub +pupa +queen +echinoderm +basket star +edible sea urchin +sand dollar +heart urchin +crinoid +trepang +lagomorph +leporid +rabbit +eastern cottontail +swamp rabbit +marsh hare +leveret +European hare +jackrabbit +white-tailed jackrabbit +blacktail jackrabbit +polar hare +snowshoe hare +pika +rodent +mouse +rat +pocket rat +field mouse +brown rat +jerboa rat +water rat +New World mouse +wood mouse +wood rat +vole +packrat +Eurasian hamster +golden hamster +gerbil +lemming +pied lemming +Old World porcupine +brush-tailed porcupine +long-tailed porcupine +New World porcupine +Canada porcupine +pocket mouse +kangaroo rat +jumping mouse +jerboa +dormouse +gopher +squirrel +tree squirrel +ground squirrel +prairie dog +American flying squirrel +groundhog +hoary marmot +yellowbelly marmot +Old World beaver +New World beaver +cavy +naked mole rat +ungulate +hyrax +odd-toed ungulate +equine +horse +foal +colt +male horse +stallion +mare +saddle horse +warhorse +pony +mustang +bronco +wild horse +pony +racehorse +racer +harness horse +workhorse +draft horse +trotting horse +ass +domestic ass +wild ass +onager +common zebra +mountain zebra +grevy's zebra +rhinoceros +tapir +even-toed ungulate +swine +piglet +porker +peccary +ruminant +bovid +bovine +ox +cattle +bull +cow +beef +Brahman +dairy cattle +Old World buffalo +Indian buffalo +carabao +Asian wild ox +American bison +wisent +sheep +lamb +domestic sheep +wild sheep +mountain sheep +goat +domestic goat +wild goat +goat antelope +antelope +Thomson's gazelle +Gazella subgutturosa +springbok +kudu +harnessed antelope +eland +waterbuck +oryx +deer +stag +red deer +mule deer +roe deer +caribou +chevrotain +camel +domestic llama +guanaco +alpaca +giraffe +musteline mammal +ermine +stoat +New World least weasel +Old World least weasel +longtail weasel +American mink +ferret +muishond +snake muishond +striped muishond +river otter +Eurasian otter +striped skunk +hooded skunk +hog-nosed skunk +spotted skunk +American badger +Eurasian badger +ferret badger +hog badger +marten +pachyderm +edentate +peba +apar +tatouay +peludo +giant armadillo +pichiciago +sloth +anteater +primate +ape +anthropoid ape +hominoid +hominid +homo +Homo erectus +Homo sapiens +australopithecine +great ape +western lowland gorilla +eastern lowland gorilla +mountain gorilla +silverback +western chimpanzee +eastern chimpanzee +central chimpanzee +pygmy chimpanzee +lesser ape +monkey +Old World monkey +talapoin +grivet +vervet +green monkey +chacma +mandrill +drill +rhesus +bonnet macaque +Barbary ape +crab-eating macaque +entellus +guereza +New World monkey +true marmoset +pygmy marmoset +tamarin +silky tamarin +pinche +lemur +tarsier +flying lemur +proboscidean +elephant +mammoth +procyonid +raccoon +fish +food fish +young fish +crossopterygian +lungfish +catfish +silurid +bullhead +channel catfish +gadoid +cod +hake +elver +common eel +tuna +moray +conger +teleost fish +clupeid fish +shad +herring +sardine +pilchard +anchovy +salmonid +salmon +Atlantic salmon +trout +brown trout +char +whitefish +smelt +tarpon +ribbonfish +toadfish +needlefish +flying fish +spiny-finned fish +percoid fish +perch +pike-perch +walleye +robalo +pike +pickerel +sunfish +crappie +freshwater bream +black bass +bass +serranid fish +grouper +hind +surfperch +cardinalfish +remora +carangid fish +jack +moonfish +pompano +scad +dolphinfish +characin +cichlid +snapper +grunt +sparid +sea bream +porgy +sciaenid fish +croaker +whiting +sea trout +mullet +goatfish +mullet +silversides +barracuda +sea chub +butterfly fish +damselfish +clown anemone fish +wrasse +blenny +pikeblenny +gunnel +goby +gempylid +scombroid +mackerel +Spanish mackerel +tuna +bonito +sailfish +billfish +marlin +tripletail +mojarra +ganoid +Pacific sturgeon +beluga +scorpaenoid +scorpaenid +scorpionfish +rockfish +lumpfish +greenling +gurnard +sea robin +plectognath +triggerfish +filefish +boxfish +spiny puffer +ocean sunfish +flatfish +righteye flounder +lefteye flounder +whiff +sole +abbey +abbey +abrader +accelerator +accessory +accommodation +acoustic device +acoustic modem +acrylic +action +actuator +adhesive bandage +adjustable wrench +aeolian harp +aerosol +after-shave +airbus +aircraft +airfield +airfoil +air gun +airplane +air pump +air-to-air missile +air-to-ground missile +alarm +alb +alcazar +Allen screw +alms dish +altimeter +Amati +ammeter +ammunition +amplifier +analog computer +analytical balance +anchor +anchor chain +aneroid barometer +angledozer +anklet +antenna +anteroom +antiaircraft +antiballistic missile +apartment +apartment building +aperture +apparatus +apparel +appliance +appliance +applicator +aquarium +arbor +arcade +arch +arc lamp +area +argyle +arm +armament +armature +armchair +armoire +armor +armored vehicle +armor plate +armrest +array +arrow +artificial heart +artillery +assembly +assembly plant +astrodome +astronomical telescope +athletic sock +atom bomb +atomic clock +atomizer +attachment +attack submarine +attire +audiocassette +audio system +audiotape +auditorium +autoclave +autoinjector +autoloader +automat +automat +automatic firearm +automatic rifle +automaton +auxiliary research submarine +awl +ax +axis +axle +axletree +baby bed +baby buggy +baby grand +back +background +backseat +badminton equipment +badminton racket +bag +bag +bag +baggage +bagpipe +bait +balance +balcony +balcony +bale +ball +ball gown +ballistic missile +ballistic pendulum +ball-peen hammer +ballroom +band +bandage +bandanna +banderilla +bar +bar +barbed wire +barge +barge pole +barn door +barograph +barrack +barrage balloon +barrel knot +barrel vault +barrier +barroom +base +base +baseball equipment +basilica +basin +basket +basketball equipment +bass +bass drum +bass horn +bastion +bat +bathhouse +battery +battle-ax +battle dress +battleship +bay rum +bay window +beading plane +beam +beam balance +bearing +beater +beating-reed instrument +bed +bed +bedclothes +bedroom +bedroom furniture +bedspread +bedspring +beehive +beer barrel +bell +bell push +bell tower +belt +belt buckle +bench +berlin +berth +besom +bevel gear +bicycle +bicycle chain +bier +billiard ball +bin +binding +bin liner +binocular microscope +bioscope +birchbark canoe +bird shot +bistro +bit +bit +black tie +blade +blade +blanket +blimp +blind +block +block plane +blouse +blower +blowtorch +bludgeon +boarding +boarding house +boardroom +boat +bobbin +body +body armor +body lotion +boiler +bolt +bolt +bomb +bomber +bongo +boom +boom +boomerang +boot +booth +booth +bore bit +Boston rocker +bota +bottle +bottle opener +bow +bow +bowed stringed instrument +bowl +bowl +bowline +bowling equipment +bowling pin +bowsprit +box +box +boxcar +boxing equipment +brace +brace +bracelet +bracket +brake +brake system +brass +brasserie +brazier +breechcloth +breeches +brewpub +brick +bricklayer's hammer +brickwork +bridal gown +bridge +briefcase +brigandine +brilliant pebble +brim +broad arrow +broadax +broad hatchet +broadsword +brush +bubble jet printer +buffer +buffet +building +building complex +bulldozer +bullet +bullhorn +bullnose +bundle +bunker +burial chamber +burner +bus +business suit +butt joint +button +buttress +butt shaft +buzz bomb +cabaret +caber +cabin +cabin +cabinet +cabinet +cabin liner +cable +cable +cafe +cafeteria +cafeteria tray +caff +cage +calculator +caliper +calorimeter +camera +camera lens +camera tripod +camp +camp +camp chair +camper +can +canal +candelabrum +candlestick +cane +cannikin +cannon +cannon +cannonball +canopy +canteen +canteen +canvas +canvas tent +cap +cap +cap +capacitor +caparison +cape +cap screw +capsule +car +car +carbine +carbon arc lamp +card index +cardioid microphone +car door +cargo liner +cargo ship +carillon +carpenter's hammer +carpenter's level +carpenter's mallet +carpenter's rule +carpet tack +carriage +carriage +carriage bolt +carrick bend +carrier +car seat +cart +cartridge +cartridge belt +cartridge holder +case +case +cashbox +casque +casserole +cassock +catch +catcher's mask +cathedra +cathedral +cathedral +catheter +cathode +cathode-ray tube +cat's-paw +cattle car +cautery +cavalry sword +cedar chest +cell +cell +cellblock +center +centrifuge +ceramic +ceramic ware +chain tongs +chair +chair of state +chalk +chamfer plane +chandlery +chapel +character printer +chassis +chasuble +chatelaine +checker +cheeseboard +chemical reactor +chessman +chest of drawers +child's room +china +chip +chip +chisel +choke +chokey +chordophone +chronoscope +chuck +church key +cigar lighter +circle +circuit +circuit board +circular plane +circular saw +cistern +civilian clothing +clamp +clamshell +clarinet +classroom +clavier +cleaning implement +cleaning pad +clean room +clinic +clip +cloak +clock +closed circuit +closed-circuit television +closet +cloth covering +clothes closet +clothes dryer +clothes hamper +clothes tree +clothing +clothing store +clout nail +clove hitch +clutch +coach +coal car +coal shovel +coat +coat closet +coating +coating +coat of paint +coaxial cable +cocked hat +coffee cup +coffee maker +coffer +coffin +coil +colander +collider +cologne +colonnade +color television +Colt +column +column +comb +comb +combination plane +combine +commissary +commodity +communication system +commutator +compact disk +compartment +compass +compass card +compound lens +compound lever +compressor +computer +computer circuit +computer network +computer screen +computer system +concentration camp +concert grand +concertina +condenser +condenser +condenser microphone +conductor +connecting rod +connection +conservatory +conservatory +contact +container +contrabassoon +control +control panel +control system +convent +converging lens +converter +convertible +conveyance +cooker +cooking utensil +cooler +cooling system +cord +cord +cordage +corner +correctional institution +corset +cosmetic +costume +costume +cotter +cotton +counter +counter +counter +counter tube +country house +coupling +court +court +coverall +covering +cowbarn +craft +cravat +crazy quilt +cream +cream pitcher +crematory +crepe +crib +cricket equipment +croquet equipment +crossbar +crossbow +crosspiece +crown jewels +cruiser +cruiser +cruise ship +crystal microphone +cudgel +cuff +cultivator +cup +cupboard +cupola +curb roof +curtain +cutout +cutter +cutting implement +cybercafe +cyclotron +cylinder +cymbal +dado plane +dagger +damper +dart +data converter +data input device +davenport +davenport +davit +dead axle +deck +deck +deck chair +deep-freeze +defensive structure +delay line +delicatessen +dental appliance +denture +depilatory +depressor +depth finder +derrick +destroyer +detector +detector +detonating fuse +detonator +developer +device +dial +dialyzer +diathermy machine +diesel locomotive +digital camera +digital computer +digital display +diner +dinghy +dining car +dining-hall +dining room +dining-room furniture +dining-room table +dinner dress +dinner pail +dinner table +diode +dip +diplomatic building +dipper +DIP switch +directional antenna +directional microphone +direction finder +disguise +dish +dish +disk +dispenser +display +display panel +distillery +ditch +ditch spade +dive bomber +doll +dolmen +domino +door +doorbell +doorlock +doornail +dormer window +dormitory +dot matrix printer +double-breasted suit +double-reed instrument +douche +dovecote +dovetail plane +downstage +drafting instrument +Dragunov +drawstring bag +dray +dredging bucket +dress +dress blues +dressing +dress uniform +drill +electric drill +drill rig +drinking fountain +drinking vessel +drip mat +drip pot +drive +drive +drogue +drogue parachute +drop-leaf table +dry battery +dry dock +dryer +dry masonry +dry wall +dugout canoe +dumdum +dumpcart +dune buggy +dungeon +duplicator +dustmop +dwelling +earphone +earthenware +easel +easy chair +edge tool +eiderdown +elastic bandage +electrical converter +electrical device +electric bell +electric frying pan +electric furnace +electric heater +electric lamp +electric motor +electric refrigerator +electro-acoustic transducer +electrode +electromagnet +electronic balance +electronic device +electronic equipment +electronic instrument +electronic voltmeter +electron microscope +electrostatic generator +electrostatic printer +elevator +embankment +embellishment +enamel +enamelware +enclosure +endoscope +engine +engine +ensemble +entrenching tool +epidiascope +equipment +eraser +escutcheon +espadrille +espresso shop +establishment +estaminet +exercise device +exhaust fan +exhibition hall +Exocet +expansion bolt +explosive device +external-combustion engine +extractor +fabric +face mask +face veil +facing +factory +fairlead +false face +fan +farm building +farm machine +fastener +fatigues +faucet +feedback circuit +fence +fencing sword +fender +ferry +fetoscope +field-sequential color television +fife +fifth wheel +fighter +figure eight +file +file server +filling +film +film +filter +filter +finery +finisher +fipple flute +fire +firearm +fire iron +fireplace +firkin +fisherman's bend +fisherman's knot +fisherman's lure +fishing boat +fishing rod +fishnet +flag +flageolet +flambeau +flannelette +flap +flashlight +flask +flatcar +flat tip screwdriver +fleet ballistic missile submarine +flight simulator +flip-flop +floating dock +floor +floor +floor cover +fly +flywheel +fob +foghorn +folder +food hamper +footbath +footbridge +foothold +foot rule +footwear +footwear +forceps +fore-and-aft sail +foremast +fore plane +fore-topmast +fork +formalwear +fortification +fortress +foundation garment +foundry +fragmentation bomb +framework +free-reed instrument +freight train +French door +friary +friction clutch +frigate +frill +frock coat +front projector +fruit machine +full-dress uniform +full metal jacket +funny wagon +fur hat +furnace +furnishing +furniture +fuse +gable +gable roof +gaff +galleon +gallery +galley +galley +gallows +galvanometer +gambling house +game +game equipment +gamp +garage +Garand rifle +garden +garden spade +garden tool +garment +gas burner +gas-discharge tube +gasket +gasoline engine +gate +gatehouse +gatepost +gathered skirt +gauge +gauze +gauze +gavel +gear +gear +gear +gearing +general-purpose bomb +generator +generator +Geneva gown +geodesic dome +girder +glass +glider +glove +glyptic art +goal +golf club +golf equipment +Gordian knot +Gothic arch +government building +government office +gown +gramophone +granary +granny knot +grapnel +grapnel +grate +graver +greasy spoon +greatcoat +great hall +greengrocery +grenade +grillroom +groined vault +Guarnerius +guidance system +guided missile +guildhall +guitar +guitar pick +gun +gun carriage +gunlock +gunsight +gun trigger +gurney +gymnastic apparatus +gym shoe +gypsy cab +habergeon +habit +hairdressing +hairpiece +hairpin +half hatchet +half hitch +hall +hall +hammer +hand +handbell +handbow +handcart +hand glass +handloom +hand lotion +hand mower +handsaw +hand shovel +hand tool +handwear +handwheel +hanger +hank +harpsichord +harrow +hash house +hat +hatch +hauberk +hawser bend +hazard +head +head +head covering +headdress +header +headgear +headlight +headsail +headscarf +health spa +heat engine +heater +heat lamp +heat-seeking missile +heavier-than-air craft +heckelphone +hedge +helicopter +helm +helmet +helmet +heraldry +high altar +high-angle gun +high gear +high table +hinge +hip boot +hitch +hoe +hogshead +hoist +holder +holding device +home appliance +homespun +hood +hood +hood +hook +Hoover +hope chest +horn +horn button +horse +horsecar +horse-drawn vehicle +horsehair wig +hosiery +hospital +hospital room +hostel +hot-air balloon +hotel +hotel room +hot tub +house +house +housing +hovel +huarache +humeral veil +hut +hutch +hydraulic brake +hydraulic system +hydroelectric turbine +hydrofoil +hydrometer +hygrometer +hypermarket +hypodermic syringe +ice machine +ice rink +ice skate +icetray +ignition switch +impact printer +implant +implement +imprint +improvised explosive device +inclined plane +indicator +induction coil +ink-jet printer +inkstand +institution +instrument +instrument of punishment +instrument of torture +interceptor +interchange +intercommunication system +intercontinental ballistic missile +interface +interior door +internal-combustion engine +ionization chamber +video iPod +iron +jack +jack +jacket +jacket +jack plane +jail +jamb +jar +jeroboam +jet +jet engine +jewelled headdress +jib +jibboom +jiggermast +joint +jointer +joist +jolly boat +jug +jumper +jumper cable +junction +junction +jury mast +kayak +keel +keg +kerchief +kettle +key +key +keyboard +keyboard instrument +khakis +kiln +kinescope +kingbolt +kirk +kit +kit +kitbag +kitchen +kitchen appliance +kitchen utensil +kite balloon +knee-high +knife +knife +knit +knob +lace +lacquer +ladder truck +lag screw +lamasery +laminate +lamination +lamp +lamp +landing gear +land mine +lantern +lapel +lathe +lattice +launcher +lead-acid battery +leather strip +Leclanche cell +leg +legging +lens +lens implant +level +lever +Levi's +lid +life buoy +life jacket +life preserver +lifting device +ligament +light +light-emitting diode +lighter-than-air craft +lighting +light microscope +linear accelerator +line printer +lingerie +lining +liquid crystal display +lister +living quarters +living room +local area network +lock +locomotive +lodge +lodging house +loft +loft +longbow +lookout +loom +loop knot +lota +lounge +loungewear +love knot +lunchroom +luxury liner +lyre +machine +machine +machine bolt +machine gun +machinery +machine screw +machine tool +magic lantern +magnet +magnetic disk +magnetic recorder +magnetic tape +magnifier +magnum +magnus hitch +mailer +mainframe +mainmast +main-topmast +main yard +makeup +mallet +mallet +mallet +mandolin +manger +man-of-war +manometer +MANPAD +mansard +mansion +marina +marker +marketplace +maser +mask +masonry +mass spectrometer +mast +mast +mat +mat +match +match +match plane +material +materiel +Matthew Walker +maul +measure +measuring instrument +measuring stick +mechanical device +mechanical system +mechanism +medical building +medical instrument +memorial +memory +memory chip +memory device +menhir +man's clothing +mercantile establishment +mercury barometer +mercury thermometer +mercury-vapor lamp +mess +metal screw +meteorological balloon +meter +meterstick +microbalance +microfilm +microscope +military hospital +military quarters +military vehicle +mill +milldam +millinery +mine +minibike +mink +minster +Minuteman +mirror +mixer +mizzenmast +module +mold +moldboard plow +monitor +monitor +morgue +mortise joint +motion-picture camera +motion-picture film +motor +motorboat +motorcycle +motor hotel +motor vehicle +mound +mount +mouse button +movie projector +moving-coil galvanometer +mug +multiplex +multiplexer +musette pipe +mushroom anchor +musical instrument +musket +musket ball +muslin +muzzle loader +narrowbody aircraft +nautilus +navigational system +naval equipment +naval gun +naval radar +naval weaponry +navigational instrument +nebuchadnezzar +neckline +neckpiece +necktie +neckwear +needle +needlework +negligee +net +net +net +net +network +network +night bell +nightwear +noisemaker +nonsmoker +non-volatile storage +nose flute +nuclear reactor +nuclear weapon +nursery +oar +oblique bandage +oboe da caccia +oboe d'amore +obstacle +office +office furniture +oil lamp +oil paint +oil tanker +olive drab +omnidirectional antenna +onion dome +open-air market +open-end wrench +opener +openside plane +ophthalmoscope +optical device +optical disk +optical instrument +optical telescope +organ pipe +outbuilding +outerwear +outfit +outrigger canoe +outside mirror +oven +overgarment +overhand knot +overhang +overhead projector +overnighter +overshoe +oxford +package +packaging +packing box +paddle +paddle steamer +page printer +paint +pallium +pan +pan +panic button +panopticon +panopticon +pantechnicon +pantry +pants suit +panzer +paper chain +paper fastener +parabolic reflector +parapet +parasail +parka +parsonage +particle detector +partition +passenger ship +passenger train +passenger van +passive matrix display +passkey +patch +patchouli +patchwork +patina +patisserie +pavis +peavey +pedal +pedestal table +pedestrian crossing +pedicab +peg +pen +penal institution +pencil +pendulum +pendulum clock +percolator +percussion instrument +perfumery +peripheral +periwig +personal computer +petticoat +Phillips screw +Phillips screwdriver +phonograph record +photographic equipment +photographic paper +photometer +physical pendulum +piano +piccolo +pick +pick +pickle barrel +piece of cloth +pile +pillow lace +pilothouse +pin +pincer +pinstripe +pipe +pipet +pipe wrench +pistol +pivot +place of business +place of worship +planetarium +planner +plant +planter +plasterboard +plastic laminate +plastic wrap +plastron +plate +platform +platform +platform rocker +plating +pleat +plethysmograph +plexor +pliers +plug +plug +pneumatic drill +pocket +pocket-handkerchief +pocketknife +pointed arch +polyester +polygraph +pomade +pontifical +pool ball +poorhouse +porcelain +porch +portable computer +portico +post +posthole digger +pot +potential divider +potpourri +pottery +pouch +poultice +powder +powder keg +power brake +power mower +power saw +power shovel +power tool +press +press +pressure dome +pressure gauge +pressure suit +printed circuit +printer +prison camp +prod +prolonge knot +prompter +prong +propeller +propeller plane +prosthesis +protective covering +protective garment +pruning saw +pruning shears +public house +public toilet +public transport +pull +pull chain +pulley +Pullman +pullover +pulse counter +pump +pump +pump house +punch +punch press +purifier +push broom +push button +pusher +puzzle +pyrometer +pyx +QWERTY keyboard +racing boat +rack +rack +radar +radiogram +radio interferometer +radio link +radiometer +radio receiver +radiotelegraph +radiotelephone +radio transmitter +raft +rail +rail fence +railing +raincoat +rake +ramp +rampart +random-access memory +rayon +razor +reaction-propulsion engine +reactor +reading lamp +reading room +read-only memory +rearview mirror +receiver +receptacle +reception room +recess +reconnaissance plane +recorder +recording +record player +recreation room +recycling bin +reed stop +reef knot +refectory table +refinery +reflecting telescope +reflector +reformatory +refracting telescope +refrigerator car +refuge +regalia +regimentals +regulator +rein +religious residence +removable disk +repair shop +repeating firearm +reproducer +rescue equipment +reservoir +reset button +residence +resistor +resonator +respirator +restraint +retort +rheostat +rib +ribbed vault +riddle +ride +riding boot +riding mower +rifle ball +rig +rink +river boat +road +roadway +robe +rocket +rocket +rod +roller +roller +in-line skate +roller blind +roller coaster +rolling hitch +Rolodex +Roman building +roof +roof +room +roost +rope +rose water +rotary engine +rotating mechanism +rotating shaft +rotisserie +rotor +round arch +router plane +row house +royal mast +rubber bullet +rug +rushlight +sable +sable coat +sack +sackbut +sacking +saddle +safe +safety belt +safety curtain +safety fuse +safety match +sail +sailboat +sailing vessel +salver +sandglass +sash +satellite +satellite television +saucepan +savings bank +saw +sawhorse +scale +scarf +school +scientific instrument +scissors +scoop +scratcher +screen +screen +screen +screw eye +scrub plane +scuffer +sculpture +sea boat +sea chest +seam +seaplane +seat +seat +second hand +secretary +security system +seeker +selector +self-propelled vehicle +semiautomatic firearm +semiautomatic pistol +semiconductor device +serger +serpent +serving cart +serving dish +set +setscrew +setscrew +sewing needle +sextant +shackle +shade +shaft +shag rug +shaker +shaper +shaping tool +sharpener +shaving cream +shaving foam +shawl +shawm +shears +sheath +shed +sheepshank +sheet bend +shelf +shell +shell +shell +shellac +shelter +shelter +shelter +shield +ship +shipboard system +shirt +shirtfront +shock absorber +shoe +shooting brake +shop +short pants +shotgun +shoulder holster +shrine +shutter +shuttle +sidewinder +sieve +sifter +sights +signaling device +signboard +silk +simulator +single bed +single-breasted suit +single-reed instrument +sitz bath +six-pack +skate +skein +skeleton +skewer +skidder +skid lid +skiff +ski pole +skirt +ski tow +skullcap +slack suit +slat +sled +sleeper +sleeping car +sleeve +sleeve +slide projector +slipknot +slipper +sloop +slop pail +slot machine +small boat +smart bomb +smoker +smooth plane +snack bar +snap-brim hat +snare drum +sniper rifle +Sno-cat +soapbox +socle +sofa +sonograph +sorter +sound recording +soup ladle +source of illumination +soutane +spacecraft +spade +spar +spatula +spear +spear +spectacles +spectrograph +spectroscope +speedometer +spider +spike +spike +spinet +spinning machine +spiral ratchet screwdriver +spiral spring +spit +spokeshave +sponge mop +spoon +sports equipment +sports implement +sportswear +spot +spring +spring balance +springboard +sprit +square +square knot +squash racket +squawk box +squeezer +squinch +stabilizer +stabilizer +stable gear +stadium +stall +stamp mill +stand +standard cell +staple +starter +state prison +station +statue +stay +steakhouse +stealth aircraft +stealth bomber +stealth fighter +steam bath +steamboat +steamer +steam iron +steam whistle +steel mill +steelyard +steeple +steering system +step +step-up transformer +stereo +stick +stick +still +stilt +Stinger +stock +stockcar +stock car +stocking +stonework +stool +stopper knot +storage battery +storage space +storeroom +stove +stove bolt +Stradavarius +straight chair +strap +strap +stringed instrument +strip +strongbox +stronghold +strongroom +structural member +structure +stylus +submachine gun +submersible +submersible +subwoofer +suction pump +suede cloth +sunbonnet +sunhat +supermarket +superstructure +supply chamber +support +support +support column +supporting structure +supporting tower +surface lift +surface-to-air missile +surgeon's knot +surgical instrument +surgical knife +surplice +surveillance system +surveying instrument +surveyor's level +swamp buggy +sweater +swimsuit +sword +synchrotron +system +tabi +table +table +table knife +tableware +tabor +tachometer +tack +tack hammer +talaria +tambour +tambourine +tampon +tank +tank car +tannoy +tape +tape deck +tape recorder +target +tavern +tea chest +teaching aid +tea gown +teashop +teaspoon +tea-strainer +tea tray +telecommunication system +telephone +telephone line +telephone receiver +telephone system +telephone wire +telescope +television antenna +television camera +television equipment +television monitor +temple +temple +tender +tennis racket +tenor drum +tenoroon +tenpenny nail +tent +tenterhook +terminal +terminal +test rocket +tetraskelion +textile machine +textile mill +theater +theodolite +thermometer +thermostat +three-piece suit +three-way switch +thumbscrew +thumbtack +tights +tile +timber +timber hitch +timbrel +time-fuse +timepiece +timer +time-switch +tire chain +tithe barn +toecap +toga +toggle switch +toilet +toilet powder +toiletry +toilet water +token +tomograph +toner +tongs +tool +toolbox +tooth +toothbrush +top +top +topgallant +topmast +topsail +torpedo +torpedo boat +touch screen +towel +toweling +tower +toy box +track +tracked vehicle +trailer +trailer +train +trammel +transdermal patch +transformer +transistor +transmission +transmitter +transporter +trap +trapeze +travel iron +treasure chest +trellis +trench +trial balloon +triclinium +troop carrier +trough +trouser +trowel +truck +trunk +try square +tube +tuck shop +tun +tunic +turbine +Turkish towel +Turk's head +turner +turntable +turtleneck +tweed +tweeter +twenty-two +two-piece +typesetting machine +typewriter +ultraviolet lamp +undercarriage +undergarment +underpants +underwear +uneven parallel bars +uniform +university +uplift +urn +urn +utensil +vacuum flask +valve +van +van +varnish +vehicle +veranda +vertical file +vessel +vessel +vest +vibrator +vibrator +videocassette +video recording +vigil light +viol +vise +vivarium +voltaic cell +voltmeter +wagon +waist pack +walking stick +wall +wall +wall unit +ward +warehouse +warship +wash +washer +washtub +watch +watchtower +water-base paint +water butt +water cart +watercolor +water-cooled reactor +water gauge +water ski +waterwheel +weapon +weaponry +weatherglass +weathervane +web +wedge +wedge +weighbridge +weight +weir +weld +well +whaler +wheel +wheelchair +wheeled vehicle +wheelwork +whetstone +whip +whisk +whispering gallery +white goods +whorehouse +wicker basket +widebody aircraft +winch +Winchester +wind instrument +window +window +window blind +window envelope +Windsor knot +wine bucket +wine cask +wineglass +wire +wire +wire matrix printer +wiring +woman's clothing +wood +woodenware +woodscrew +woodwind +woofer +workbasket +workbench +work-clothing +worktable +workwear +wrapping +wrench +writing desk +writing implement +X-ray film +X-ray machine +yacht chair +yard +yard +yardstick +yoke +zither +zoot suit +grain +light +colorlessness +chromatic color +black +gray +dark red +orange +yellow +green +blue +purple +reddish purple +pink +light brown +reddish brown +complexion +skin +epidermal cell +columnar cell +macule +specimen +milk +embryonic cell +leukocyte +neutrophil +astrocyte +exoskeleton +medium +film +press +print media +storage medium +journalism +photojournalism +newspaper +telecommunication +telephone +call +long distance +wireless +broadcasting +television +reception +chat room +portal site +wordbook +album +concept album +magazine +movie +sign +comestible +course +dainty +dish +fare +diet +dietary supplement +liquid diet +reducing diet +vegetarianism +ration +field ration +foodstuff +starches +concentrate +meal +roughage +flour +wheat flour +nutriment +commissariat +canned food +canned meat +meal +breakfast +lunch +dinner +supper +buffet +picnic +cookout +bite +entree +side dish +casserole +chicken casserole +appetizer +cocktail +hors d'oeuvre +relish +dip +soup +madrilene +broth +broth +chowder +clam chowder +stew +goulash +fish stew +fricassee +ragout +ready-mix +powdered sugar +granulated sugar +brown sugar +sweet +confiture +candy +hard candy +patty +brittle +chewing gum +candied fruit +candied citrus peel +fudge +gumdrop +mint +kiss +lozenge +taffy +dessert +dumpling +frozen dessert +mousse +mousse +whip +pudding +pudding +tipsy cake +ice +chocolate ice cream +Neapolitan ice cream +peach ice cream +strawberry ice cream +tutti-frutti +vanilla ice cream +split +pudding +custard +pastry +turnover +puff paste +phyllo +fish cake +conserve +jam +jelly +apple jelly +marmalade +gelatin +gelatin dessert +patty +stuffing +bread +breadstick +bun +cracker +dark bread +flatbread +loaf of bread +quick bread +rye bread +toast +white bread +French bread +cornbread +johnnycake +muffin +scone +onion roll +sweet roll +onion bagel +biscuit +baking-powder biscuit +soft pretzel +sandwich +hamburger +gruel +edible fruit +vegetable +crudites +legume +greens +solanaceous vegetable +root vegetable +potato +baked potato +sweet potato +snack food +corn chip +tortilla chip +cruciferous vegetable +cabbage +kale +red cabbage +savoy cabbage +squash +summer squash +yellow squash +winter squash +turban squash +gherkin +sprout +beet +pepper +sweet pepper +hot pepper +chili +jalapeno +onion +Spanish onion +salad green +lettuce +butterhead lettuce +bean +pea +green pea +common bean +fresh bean +green bean +shell bean +lima bean +soy +celery +chicory +coffee substitute +chicory escarole +corn +hominy +cress +tomato +cherry tomato +salsify +turnip +edible nut +apple +eating apple +Delicious +McIntosh +Pippin +cooking apple +berry +currant +citrus +temple orange +mandarin +bitter orange +sweet orange +Jaffa orange +navel orange +Valencia orange +lime +almond +plum +dried fruit +raisin +passion fruit +cocoa +melon +muskmelon +winter melon +cherry +sweet cherry +heart cherry +sour cherry +grape +fox grape +muscadine +slipskin grape +vinifera grape +Tokay +cherimoya +soursop +sweetsop +ilama +pond apple +olive +pear +edible seed +walnut +feed +fodder +oil cake +timothy +grain +barley +wheat +rice +mash +bird feed +petfood +salad +tossed salad +combination salad +pasta salad +fruit salad +ingredient +flavorer +condiment +herb +spice +cinnamon +pepper +garlic +mustard +sage +savory +curry +paprika +pickle +sweet pickle +vinegar +sauce +hot sauce +dressing +mayonnaise +cheese sauce +hot-fudge sauce +white sauce +spaghetti sauce +boiled egg +hard-boiled egg +Easter egg +omelet +firm omelet +souffle +dairy product +milk +milk +powdered milk +cream +butter +clarified butter +yogurt +curd +cheese +cream cheese +bleu +cheddar +Swiss cheese +spread +pate +sweetening +sugar +syrup +batter +bread dough +chicken and rice +pasta +Tetrazzini +chili dog +fondue +fondue +hash +kabob +seafood Newburg +meatball +pilaf +sausage pizza +pepperoni pizza +cheese pizza +anchovy pizza +Sicilian pizza +porridge +fish loaf +salmon loaf +scallopine +taco +beef burrito +quesadilla +tostada +beverage +concoction +mix +filling +potion +elixir +alcohol +brew +beer +lager +Weissbier +malt +ale +stout +mead +wine +white wine +sparkling wine +Burgundy +Beaujolais +Medoc +Pinot noir +Bordeaux +claret +Chianti +Cabernet +Merlot +dessert wine +Rhine wine +Rioja +Saint Emilion +zinfandel +table wine +vermouth +fortified wine +Madeira +liquor +brandy +gin +rum +whiskey +corn whiskey +Irish +Scotch +liqueur +coffee liqueur +orange liqueur +mixed drink +cocktail +highball +Bloody Mary +daiquiri +manhattan +martini +sling +sour +caffe latte +cider +sweet cider +juice +fruit juice +grape juice +orange juice +fruit drink +mulled wine +soft drink +cola +coffee +punch +champagne cup +claret cup +rickey +tea +tea +herb tea +tisane +black tea +green tea +water +drinking water +mineral water +vitamin pill +collection +suburb +residence +littoral +grassland +pasture +resort +field +air bubble +arroyo +ascent +atoll +bank +bank +bar +barrier reef +basin +beach +burrow +canyon +cave +continental glacier +crag +crater +dale +descent +draw +dune +geological formation +glacier +glen +gorge +gulch +gully +highland +hill +hillside +hole +hollow +iceberg +ice mass +ion +knoll +landfall +landfill +lather +ledge +lowland +meteorite +mountain +mull +natural depression +natural elevation +nullah +ocean floor +outcrop +plain +point +precipice +ravine +reef +ridge +ridge +rift valley +rock +sandbank +seaside +shiner +shore +slope +soapsuds +spume +tableland +tideland +volcanic crater +wadi +spiritual leader +adventurer +anomaly +benefactor +commoner +contestant +discussant +entertainer +female +finisher +inhabitant +native +juvenile +lover +male +mediator +national +peer +recipient +sensualist +traveler +unwelcome person +unskilled person +worker +wrongdoer +Black +White +Semite +white man +Mongol +Nahuatl +Caddo +Penutian +Teton +Taracahitian +Slav +Catholic +Altaic +Bornean +Canadian +Central American +Britisher +English person +Englishwoman +Ethiopian +Parisian +Greek +Italian +Japanese +Mexican +Nigerian +North American +Pakistani +South American Indian +Filipino +Polynesian +Scandinavian +South African +South American +Turki +American +New Yorker +abbess +abstainer +academic administrator +accomplice +acquaintance +acquirer +aerialist +actor +actor +addict +adjutant +admirer +adulterer +advertiser +advocate +analyst +ancestor +announcer +announcer +appointee +appreciator +appropriator +archbishop +architect +army engineer +army officer +arrival +articulator +asserter +assistant +associate +astronaut +athlete +attendant +aunt +authoritarian +authority +aviator +back +bad person +ballet dancer +bullfighter +baron +bartender +baseball coach +base runner +basketball player +believer +betrothed +bigot +big shot +biochemist +bisexual +boatman +bond servant +botanist +Boy Scout +buddy +campaigner +captain +card player +careerist +caretaker +cavalryman +celebrity +charmer +child +child +cipher +citizen +civil rights leader +cleaner +clergyman +cleric +clerk +climber +closer +clown +coach +cobbler +collaborator +college student +collegian +commanding officer +commissioned officer +commissioned military officer +commissioner +committee member +communist +compulsive +computer scientist +computer user +contractor +convict +copycat +counselor +craftsman +creditor +critic +curate +dancer +dancer +darling +date +daughter +dawdler +deacon +deaf person +debtor +deliveryman +descender +designated hitter +detective +detractor +director +disbeliever +dispatcher +distributor +doctor +domestic partner +draftsman +drinker +drinker +drug addict +drug user +drummer +drunkard +eager beaver +earner +eavesdropper +economist +editor +egotist +elder +elected official +emissary +employee +employer +endomorph +enemy +entrant +examiner +exhibitionist +fan +fancier +farmer +farmhand +fascist +father +female aristocrat +female offspring +female child +fielder +fireman +first baseman +first sergeant +flag officer +flatterer +foe +folk dancer +follower +football player +forefather +forger +founder +free agent +friar +monk +gambler +generator +geneticist +genitor +geologist +girl +godchild +godparent +golfer +grandma +grandmaster +grandparent +granter +great grandchild +great grandparent +grouch +guard +guest +guide +gymnast +Gypsy +hack +hairdresser +hater +headmaster +hearer +hedonist +heir +herder +homeless +horseman +host +host +hypocrite +important person +incumbent +infielder +informer +in-law +insurgent +investigator +investor +journalist +judge +juror +Counsel to the Crown +kinswoman +laborer +lama +landowner +lawgiver +lawman +lawyer +liberator +lieutenant +lineman +literate +litigant +Lord +failure +lowerclassman +lumberman +maid +maker +malcontent +martinet +master of ceremonies +masturbator +medical officer +medical practitioner +medical scientist +mender +meteorologist +middle-aged man +miler +military attache +military officer +military policeman +minister +minor leaguer +misfit +mixed-blood +model +moneymaker +mother +mourner +mover +musician +Muslimah +mystic +nanny +neonate +nephew +neutral +newcomer +newcomer +newspaper editor +niece +noncommissioned officer +nurse +observer +occultist +oldster +old woman +opportunist +orator +originator +outfielder +right fielder +right-handed pitcher +painter +panelist +pardoner +parodist +party +passenger +patient +patron +payer +peddler +percussionist +personal representative +personification +pervert +petitioner +Pharaoh +phonetician +physical therapist +physicist +pimp +pisser +pitcher +planner +player +poet +politician +practitioner +prayer +preserver +president +priest +princess +principal +proctor +programmer +promiser +propagandist +prosecutor +psychic +pusher +queen +queen +ranch hand +reader +recruit +recruiter +religious leader +repairman +reporter +representative +reprobate +rescuer +reservist +restrainer +retailer +retiree +revolutionist +rich person +civil authority +runner +running back +rustic +saboteur +sailor +salesman +salesperson +scalper +schemer +scholar +schoolchild +scientist +second baseman +secretary +seeker +selfish person +seller +serf +serviceman +settler +shrew +sibling +sick person +singer +sister +skeptic +skier +sleeper +slob +smith +snoop +social climber +socialist +social scientist +sociologist +soldier +son +songster +sorcerer +sovereign +speaker +specialist +spectator +stand-in +star +stepparent +stock trader +stranger +strategist +student +subordinate +suitor +superior +surgeon +sweetheart +sympathizer +tax assessor +taxonomist +teacher +television reporter +tenant +tenant +tennis player +testator +testee +theologian +therapist +thinker +thrower +toastmaster +trader +traffic cop +trainer +traitor +traveling salesman +tyrant +upstart +upstart +utility man +vacationer +vegetarian +vice president +victim +volunteer +votary +waiter +waitress +wanderer +wanton +washer +white supremacist +wife +winner +winner +woman +workman +worshiper +wright +writer +wilding +bryophyte +liverwort +pteridophyte +fern +fern ally +spore +spermatophyte +perennial +gymnosperm +ephedra +cycad +sago palm +zamia +pine +pinon +nut pine +white pine +yellow pine +larch +fir +silver fir +cedar +spruce +hemlock +douglas fir +cedar +cypress +arborvitae +araucaria +kauri pine +celery pine +yellowwood +gymnospermous yellowwood +yew +angiosperm +dicot +flower +wildflower +inflorescence +pistil +pericarp +oilseed +custard apple +barberry +allspice +laurel +anise tree +magnolia +moonseed +buttercup +aconite +baneberry +anemone +thimbleweed +columbine +clematis +delphinium +nigella +wax myrtle +zebrawood +legume +legume +darling pea +clover +acacia +wattle +albizzia +nitta tree +dogbane +allamanda +carissa +frangipani +rauwolfia +arum +alocasia +anthurium +caladium +monstera +nephthytis +arrow arum +calla lily +duckweed +watermeal +birthwort +sandwort +mouse-ear chickweed +pink +china pink +lychnis +silene +chickweed +fig marigold +amaranth +orach +saltbush +beet +sand verbena +four o'clock +echinocactus +prickly pear +pokeweed +portulaca +flame flower +caper +spiderflower +crucifer +cress +watercress +rock cress +cabbage +head cabbage +turnip plant +mustard +wallflower +woad +stock +radish plant +pennycress +poppy +prickly poppy +composite +compass plant +everlasting +achillea +ageratum +ragweed +ammobium +burdock +artemisia +mugwort +aster +wood aster +common daisy +bur marigold +calendula +thistle +carline thistle +catananche +centaury +knapweed +chrysanthemum +golden aster +goldenbush +plume thistle +woolly thistle +coreopsis +fleabane +woolly sunflower +cotton rose +gazania +African daisy +cudweed +gumweed +goldenbush +sneezeweed +sunflower +hawkweed +marsh elder +krigia +hawkbit +blazing star +rattlesnake root +daisybush +coneflower +coneflower +cutleaved coneflower +golden thistle +white-topped aster +goldenrod +sow thistle +marigold +dandelion +crownbeard +zinnia +achene +campanula +orchid +orchis +arethusa +helleborine +coral root +lady's slipper +large yellow lady's slipper +helleborine +fringed orchis +rein orchid +spider orchid +moth orchid +butterfly orchid +ladies' tresses +vanda +vanilla +yam +primrose +pimpernel +featherfoil +loosestrife +water pimpernel +gramineous plant +grass +wheatgrass +foxtail +broom grass +oat +brome +grama +reed grass +burgrass +crabgrass +lyme grass +wild rye +plume grass +rye grass +ricegrass +meadowgrass +millet +reed +sorghum +grain sorghum +cordgrass +cereal +wheat +corn +mealie +zoysia +bamboo +cotton grass +spike rush +pandanus +cattail +grain +kernel +gourd +gourd +squash +summer squash +marrow +winter squash +turban squash +bryony +sweet melon +luffa +lobelia +mallow +hollyhock +althea +poppy mallow +seashore mallow +globe mallow +tulipwood tree +sterculia +bottle-tree +screw tree +cacao +linden +herb +protea +banksia +grevillea +macadamia +casuarina +beefwood +heath +bearberry +huckleberry +kalmia +rhododendron +cranberry +blueberry +shortia +Australian heath +epacris +wintergreen +pipsissewa +beech +chestnut +tanbark oak +southern beech +New Zealand beech +oak +live oak +white oak +red oak +scrub oak +chestnut oak +birch +alder +hornbeam +hop hornbeam +hazelnut +centaury +gentian +fringed gentian +olive tree +fringe tree +ash +red ash +jasmine +privet +lilac +liquidambar +walnut +hickory +wing nut +loosestrife +myrtle +gum tree +eucalyptus +flooded gum +mallee +stringybark +tupelo +enchanter's nightshade +willowherb +fuchsia +evening primrose +daphne +canna +banana +ginger +begonia +tuberous begonia +poon +St John's wort +rockrose +dipterocarp +candlewood +reseda +viola +violet +nettle +cannabis +mulberry +fig tree +fig +elm +hackberry +iridaceous plant +bearded iris +beardless iris +crocus +amaryllis +blood lily +narcissus +daffodil +liliaceous plant +colicroot +alliaceous plant +kniphofia +poker plant +asphodel +mariposa +globe lily +camas +dogtooth violet +fritillary +tulip +star-of-Bethlehem +grape hyacinth +scilla +false asphodel +bog asphodel +hellebore +death camas +sarsaparilla +Solomon's-seal +bellwort +agave +sansevieria +cassia +locust tree +senna +angelim +milk vetch +wild indigo +pea tree +glory pea +rosewood +blackwood +tick trefoil +coral tree +vetchling +wild pea +lupine +medic +mucuna +locoweed +pole bean +pea +edible-pod pea +quira +hoary pea +bush pea +vetch +palm +sago palm +feather palm +fan palm +palmetto +areca +calamus +oil palm +raffia palm +lady palm +eriogonum +rhubarb +water plantain +waterweed +pondweed +rose +agrimonia +flowering quince +cotoneaster +avens +apple tree +wild apple +crab apple +Iowa crab +cinquefoil +plum +wild plum +bullace +apricot +cherry +wild cherry +sweet cherry +sour cherry +almond tree +almond +bird cherry +flowering cherry +chokecherry +fruit tree +bramble bush +raspberry +mountain ash +service tree +spirea +madderwort +coffee +cinchona +bedstraw +genipa +hamelia +honeysuckle +American fly honeysuckle +teasel +scabious +geranium +cranesbill +storksbill +incense tree +mahogany +silver ash +milkwort +citrus +orange +mandarin +lemon +kumquat +prickly ash +bitterwood tree +ailanthus +nasturtium +willow +osier +sallow +poplar +black poplar +cottonwood +aspen +soapberry +soapberry vine +harpullia +pachysandra +spindle tree +maple +box elder +holly +sumac +horse chestnut +persimmon +buckthorn +styrax +carnivorous plant +pitcher plant +sedum +philadelphus +saxifrage +astilbe +alumroot +miterwort +parnassia +currant +plane tree +phlox +acanthus +catalpa +anchusa +comfrey +convolvulus +bindweed +gloxinia +streptocarpus +waterleaf +nemophila +scorpionweed +giant hyssop +bugle +wood mint +calamint +coleus +dead nettle +origanum +horehound +monarda +savory +germander +thyme +blue curls +snapdragon +kitten-tails +Indian paintbrush +foxglove +toadflax +veronica +nightshade +thorn apple +matrimony vine +cupflower +petunia +salpiglossis +spurge +croton +cassava +slipper spurge +camellia +umbellifer +angelica +astrantia +caraway +fennel +parsnip +parsley +sanicle +dogwood +valerian +bristle fern +flowering fern +climbing fern +clover fern +adder's tongue +grape fern +ergot +sclerotinia +earthball +Podaxaceae +false truffle +rhizopus +slime mold +cellular slime mold +downy mildew +pythium +Sarcosomataceae +club fungus +lichen +lecanora +fungus +basidiomycete +mushroom +mushroom +mushroom +toadstool +horse mushroom +meadow mushroom +royal agaric +false deathcap +fly agaric +death cap +blushing mushroom +destroying angel +chanterelle +floccose chanterelle +pig's ears +cinnabar chanterelle +jack-o-lantern fungus +inky cap +shaggymane +milkcap +fairy-ring mushroom +oyster mushroom +olive-tree agaric +Pholiota astragalina +Pholiota aurea +Pholiota destruens +Pholiota flammans +Pholiota flavida +nameko +Pholiota squarrosa-adiposa +Pholiota squarrosa +Pholiota squarrosoides +Stropharia ambigua +Stropharia hornemannii +Stropharia rugoso-annulata +Entoloma lividum +Entoloma aprile +Chlorophyllum molybdites +lepiota +parasol mushroom +poisonous parasol +Lepiota naucina +Lepiota rhacodes +American parasol +Lepiota rubrotincta +Lepiota clypeolaria +onion stem +blewits +sandy mushroom +Tricholoma pessundatum +Tricholoma sejunctum +man-on-a-horse +Tricholoma venenata +Tricholoma pardinum +Tricholoma vaccinum +Tricholoma aurantium +Pluteus aurantiorugosus +Pluteus magnus +deer mushroom +straw mushroom +Volvariella bombycina +Clitocybe clavipes +Clitocybe dealbata +Clitocybe inornata +Clitocybe robusta +Clitocybe irina +Clitocybe subconnexa +winter mushroom +mycelium +ascomycete +Clavicipitaceae +yeast +discomycete +morel +Verpa +false morel +lorchel +helvella +Gyromitra californica +Gyromitra sphaerospora +Gyromitra esculenta +Gyromitra infula +Gyromitra gigas +gasteromycete +common stinkhorn +Phallus ravenelii +dog stinkhorn +stinky squid +puffball +Geastrum coronatum +Astreus pteridis +Astreus hygrometricus +polypore +Boletus chrysenteron +Boletus edulis +Frost's bolete +Boletus luridus +Boletus mirabilis +Boletus pallidus +Boletus pulcherrimus +Boletus pulverulentus +Boletus roxanae +Boletus subvelutipes +Boletus variipes +Boletus zelleri +Fuscoboletinus paluster +Fuscoboletinus serotinus +Leccinum fibrillosum +Suillus albivelatus +old-man-of-the-woods +Boletellus russellii +jelly fungus +rust +smut +cornsmut +flag smut fungus +waxycap +Hygrocybe acutoconica +Hygrophorus borealis +Hygrophorus caeruleus +Hygrophorus inocybiformis +Hygrophorus kauffmanii +Hygrophorus marzuolus +Hygrophorus purpurascens +Hygrophorus russula +Hygrophorus sordidus +Hygrophorus tennesseensis +Hygrophorus turundus +Neohygrophorus angelesianus +Cortinarius armillatus +Cortinarius atkinsonianus +Cortinarius corrugatus +Cortinarius gentilis +Cortinarius mutabilis +Cortinarius semisanguineus +Cortinarius subfoetidus +Cortinarius violaceus +Gymnopilus spectabilis +Gymnopilus validipes +Gymnopilus ventricosus +mold +mildew +candida +houseplant +succulent +weed +sporophyll +sporangium +poisonous plant +vine +tree +bean tree +gymnospermous tree +conifer +angiospermous tree +nut tree +spice tree +bonsai +subshrub +bramble +liana +desert plant +marsh plant +strangler +root +receptacle +scape +peduncle +flower cluster +raceme +cyme +bulbous plant +fruit +seed +bean +nut +berry +aggregate fruit +drupe +drupelet +pome +pod +husk +buckthorn +vinifera +true pepper +peperomia +bract +palmate leaf +pinnate leaf +dentate leaf +branchlet +polypody +strap fern +staghorn fern +spleenwort +chain fern +davallia +hare's-foot fern +shield fern +wood fern +lady fern +bladder fern +holly fern +woodsia +maidenhair +brittle maidenhair +lip fern +cliff brake +horsetail +club moss +spikemoss +beech fern +shoestring fungus +Armillaria caligata +Armillaria ponderosa +Armillaria zelleri +honey mushroom +milkweed +stapelia +stephanotis +orangery +figure +plane figure +solid figure +line +convex shape +concave shape +cylinder +round shape +polygon +concave polygon +amorphous shape +closed curve +simple closed curve +cone +circle +ring +loop +ellipse +triangle +spherical polygon +angular distance +groove +bulge +bow +balance +toroid +boundary +incisure +notch +wrinkle +tree +regular polyhedron +carbon +rock +soil +high explosive +culture medium +agar +paper +paving +plaster +stucco +tear gas +vitamin +fat-soluble vitamin +water-soluble vitamin +vitamin A +B-complex vitamin +vitamin E +vitamin K \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/imagenet1k.data b/workloads/realworld/uvm_prefetch_async/darknet/cfg/imagenet1k.data new file mode 100644 index 0000000000000000000000000000000000000000..daf120a3c020003e8ed08096c51304272ca3ba27 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/imagenet1k.data @@ -0,0 +1,9 @@ +classes=1000 +train = /data/darknet/imagenet_mini/valid.list +valid = /data/darknet/imagenet_mini/valid.list +test = /data/darknet/imagenet_mini/valid.list +backup = /data/darknet/backup/ +labels = /data/darknet/imagenet_mini/imagenet.labels.list +names = /data/darknet/imagenet_mini/imagenet.shortnames.list +top=5 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/imagenet22k.dataset b/workloads/realworld/uvm_prefetch_async/darknet/cfg/imagenet22k.dataset new file mode 100644 index 0000000000000000000000000000000000000000..e25ef007ecceb096e5846ee7cacd1fd54fb8f9e4 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/imagenet22k.dataset @@ -0,0 +1,9 @@ +classes=21842 +train = /data/imagenet/imagenet22k.train.list +valid = /data/imagenet/imagenet22k.valid.list +#valid = /data/imagenet/imagenet1k.valid.list +backup = /home/pjreddie/backup/ +labels = data/imagenet.labels.list +names = data/imagenet.shortnames.list +top = 5 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/imagenet9k.hierarchy.dataset b/workloads/realworld/uvm_prefetch_async/darknet/cfg/imagenet9k.hierarchy.dataset new file mode 100644 index 0000000000000000000000000000000000000000..41fb71b065544b919bc8ed7d723afb5d04ad85ac --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/imagenet9k.hierarchy.dataset @@ -0,0 +1,9 @@ +classes=9418 +train = data/9k.train.list +valid = /data/imagenet/imagenet1k.valid.list +leaves = data/imagenet1k.labels +backup = /home/pjreddie/backup/ +labels = data/9k.labels +names = data/9k.names +top=5 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/jnet-conv.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/jnet-conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..056f82aa6e2a0710a664c4740ab763961e4de33d --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/jnet-conv.cfg @@ -0,0 +1,118 @@ +[net] +batch=1 +subdivisions=1 +height=10 +width=10 +channels=3 +learning_rate=0.01 +momentum=0.9 +decay=0.0005 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +stride=2 +size=2 + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/openimages.data b/workloads/realworld/uvm_prefetch_async/darknet/cfg/openimages.data new file mode 100644 index 0000000000000000000000000000000000000000..fa80e5ab7d8576d391c7cac9dfc8367aab704139 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/openimages.data @@ -0,0 +1,8 @@ +classes= 601 +train = /home/pjreddie/data/openimsv4/openimages.train.list +#valid = coco_testdev +valid = data/coco_val_5k.list +names = data/openimages.names +backup = /home/pjreddie/backup/ +eval=coco + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet101.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet101.cfg new file mode 100644 index 0000000000000000000000000000000000000000..de458820bcd35f5e65d858f9f661e42653ed0184 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet101.cfg @@ -0,0 +1,990 @@ +[net] +# Training +# batch=128 +# subdivisions=2 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + +[cost] +type=sse + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet152.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet152.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e8e3297ac2364b95f28fa0a0bdd4ca71f14ac82c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet152.cfg @@ -0,0 +1,1460 @@ +[net] +# Training +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +max_crop=448 +channels=3 +momentum=0.9 +decay=0.0005 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet18.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet18.cfg new file mode 100644 index 0000000000000000000000000000000000000000..275f4bdb5962d77c16f353cd3d2751e189b9344c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet18.cfg @@ -0,0 +1,228 @@ +[net] +# Training +# batch=128 +# subdivisions=1 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet18_b.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet18_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c0712e9cdfaf1f28dbe3a358355405d2758e6d2b --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet18_b.cfg @@ -0,0 +1,228 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet18_t.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet18_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c0712e9cdfaf1f28dbe3a358355405d2758e6d2b --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet18_t.cfg @@ -0,0 +1,228 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet34.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet34.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9f68f096741ae3b4898f40b76af7569d4697729f --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet34.cfg @@ -0,0 +1,392 @@ +[net] +# Training +# batch=128 +# subdivisions=2 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Strided Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + +# Residual Block +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=linear + +[shortcut] +activation=leaky +from=-3 + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet50.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet50.cfg new file mode 100644 index 0000000000000000000000000000000000000000..d0d7c511516e997a392bb5ba77682740c0494972 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet50.cfg @@ -0,0 +1,510 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet50_b.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet50_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..462479918fd608c4a0761b84c7299e51081193c6 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet50_b.cfg @@ -0,0 +1,510 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet50_t.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet50_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..462479918fd608c4a0761b84c7299e51081193c6 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnet50_t.cfg @@ -0,0 +1,510 @@ +[net] +# Training +batch=32 +subdivisions=1 + +# Testing +# batch=1 +# subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=8 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnext101-32x4d.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnext101-32x4d.cfg new file mode 100644 index 0000000000000000000000000000000000000000..8538ccc3daee2e3de589eb4e2edf868340d4924b --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnext101-32x4d.cfg @@ -0,0 +1,1053 @@ +[net] +# Training +# batch=128 +# subdivisions=8 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnext152-32x4d.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnext152-32x4d.cfg new file mode 100644 index 0000000000000000000000000000000000000000..48279fd28eb0dbe23c7b0c593f67051cb6a62374 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnext152-32x4d.cfg @@ -0,0 +1,1562 @@ +[net] +# Training +# batch=128 +# subdivisions=16 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +groups = 32 +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=4096 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + + + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnext50.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnext50.cfg new file mode 100644 index 0000000000000000000000000000000000000000..12aebdf6fbd48bde40ee22c4257e06f2e0cf46eb --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/resnext50.cfg @@ -0,0 +1,523 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +min_crop=128 +max_crop=448 + +burn_in=1000 +learning_rate=0.1 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.0005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + + +# Conv 4 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +#Conv 5 +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +groups=32 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=2048 +size=1 +stride=1 +pad=1 +activation=linear + +[shortcut] +from=-4 +activation=leaky + +[avgpool] + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/rnn.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/rnn.cfg new file mode 100644 index 0000000000000000000000000000000000000000..61b202f3a441b701f76d9b007c6276467c639e11 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/rnn.cfg @@ -0,0 +1,38 @@ +[net] +subdivisions=1 +inputs=256 +batch = 1 +momentum=0.9 +decay=0.001 +max_batches = 2000 +time_steps=1 +learning_rate=0.1 +policy=steps +steps=1000,1500 +scales=.1,.1 + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[connected] +output=256 +activation=leaky + +[softmax] + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/rnn.train.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/rnn.train.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b9748990aceaa85cc2e43358073114606725dcbd --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/rnn.train.cfg @@ -0,0 +1,38 @@ +[net] +subdivisions=1 +inputs=256 +batch = 128 +momentum=0.9 +decay=0.001 +max_batches = 2000 +time_steps=576 +learning_rate=0.1 +policy=steps +steps=1000,1500 +scales=.1,.1 + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[rnn] +batch_normalize=1 +output = 1024 +hidden=1024 +activation=leaky + +[connected] +output=256 +activation=leaky + +[softmax] + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/strided.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/strided.cfg new file mode 100644 index 0000000000000000000000000000000000000000..2f745085adc268a3e99bd7895bd4dda28227bffd --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/strided.cfg @@ -0,0 +1,182 @@ +[net] +batch=128 +subdivisions=4 +height=256 +width=256 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.01 +policy=steps +scales=.1,.1,.1 +steps=200000,300000,400000 +max_batches=800000 + + +[crop] +crop_height=224 +crop_width=224 +flip=1 +angle=0 +saturation=1 +exposure=1 +shift=.2 + +[convolutional] +filters=64 +size=7 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=192 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=128 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=256 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=1024 +size=3 +stride=2 +pad=1 +activation=ramp + +[convolutional] +filters=512 +size=1 +stride=1 +pad=1 +activation=ramp + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=ramp + +[maxpool] +size=3 +stride=2 + +[connected] +output=4096 +activation=ramp + +[dropout] +probability=0.5 + +[connected] +output=1000 +activation=ramp + +[softmax] + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/t1.test.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/t1.test.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b3628114e048dc78f4797342afd95a757c81c977 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/t1.test.cfg @@ -0,0 +1,117 @@ +[net] +batch=1 +subdivisions=1 +height=224 +width=224 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,20000,30000 +scales=2.5,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[connected] +output= 1470 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/tiny.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..f97327cfceebf868998bf2bb16224bd0994ebd82 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/tiny.cfg @@ -0,0 +1,174 @@ +[net] +# Train +batch=128 +subdivisions=1 +# Test +# batch=1 +# subdivisions=1 +height=224 +width=224 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=320 + +learning_rate=0.1 +policy=poly +power=4 +max_batches=1600000 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=16 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=1 +activation=linear + +[avgpool] + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/vgg-16.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/vgg-16.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c73b17b6ccfdcc9cae9b67591b662571463569ab --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/vgg-16.cfg @@ -0,0 +1,157 @@ +[net] +# Training +# batch=128 +# subdivisions=4 + +# Testing +batch=1 +subdivisions=1 + +height=256 +width=256 +channels=3 +learning_rate=0.00001 +momentum=0.9 +decay=0.0005 + +[crop] +crop_height=224 +crop_width=224 +flip=1 +exposure=1 +saturation=1 +angle=0 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=4096 +activation=relu + +[dropout] +probability=.5 + +[connected] +output=1000 +activation=linear + +[softmax] +groups=1 + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/vgg-conv.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/vgg-conv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..21e1d724c9418107f9cf82f9bffb9ae64d3e2084 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/vgg-conv.cfg @@ -0,0 +1,121 @@ +[net] +batch=1 +subdivisions=1 +width=224 +height=224 +channels=3 +learning_rate=0.00001 +momentum=0.9 +decay=0.0005 + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=64 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=128 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=256 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[convolutional] +filters=512 +size=3 +stride=1 +pad=1 +activation=relu + +[maxpool] +size=2 +stride=2 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/voc.data b/workloads/realworld/uvm_prefetch_async/darknet/cfg/voc.data new file mode 100644 index 0000000000000000000000000000000000000000..7807b5d2a8fd0f855a8c68e82c064dc320551da1 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/voc.data @@ -0,0 +1,6 @@ +classes= 20 +train = /home/pjreddie/data/voc/train.txt +valid = /home/pjreddie/data/voc/2007_test.txt +names = data/voc.names +backup = backup + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/writing.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/writing.cfg new file mode 100644 index 0000000000000000000000000000000000000000..1ed899bcd63d6354e8320ace7e7f513ba1174886 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/writing.cfg @@ -0,0 +1,41 @@ +[net] +batch=128 +subdivisions=2 +height=256 +width=256 +channels=3 +learning_rate=0.00000001 +momentum=0.9 +decay=0.0005 +seen=0 + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=1 +size=3 +stride=1 +pad=1 +activation=logistic + +[cost] + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolo9000.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolo9000.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e745f78a6e37611fb0f13c2d848c292cea1a89d3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolo9000.cfg @@ -0,0 +1,218 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +batch=1 +subdivisions=1 +height=544 +width=544 +channels=3 +momentum=0.9 +decay=0.0005 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +hue=.1 +saturation=.75 +exposure=.75 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +filters=28269 +size=1 +stride=1 +pad=1 +activation=linear + +[region] +anchors = 0.77871, 1.14074, 3.00525, 4.31277, 9.22725, 9.61974 +bias_match=1 +classes=9418 +coords=4 +num=3 +softmax=1 +jitter=.2 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +thresh = .6 +absolute=1 +random=1 + +tree=data/9k.tree +map = data/coco9k.map diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov1-tiny.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov1-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a5e7b4920289ccb507a3a0356a33362bc7633581 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov1-tiny.cfg @@ -0,0 +1,130 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 + +saturation=.75 +exposure=.75 +hue = .1 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,800,20000,30000 +scales=2.5,2,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[connected] +output= 1470 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=2 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov1.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov1.cfg new file mode 100644 index 0000000000000000000000000000000000000000..06cf6e676170e41d24e63ec08d7b27a31c411718 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov1.cfg @@ -0,0 +1,261 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=448 +width=448 +channels=3 +momentum=0.9 +decay=0.0005 +saturation=1.5 +exposure=1.5 +hue=.1 + +learning_rate=0.0005 +policy=steps +steps=200,400,600,20000,30000 +scales=2.5,2,2,.1,.1 +max_batches = 40000 + +[convolutional] +batch_normalize=1 +filters=64 +size=7 +stride=2 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[local] +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[dropout] +probability=.5 + +[connected] +output= 1715 +activation=linear + +[detection] +classes=20 +coords=4 +rescore=1 +side=7 +num=3 +softmax=0 +sqrt=1 +jitter=.2 + +object_scale=1 +noobject_scale=.5 +class_scale=1 +coord_scale=5 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov2-tiny-voc.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov2-tiny-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c4c127cdd352bd98b3b7a3336d5c3b2efc6fadcd --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov2-tiny-voc.cfg @@ -0,0 +1,138 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +max_batches = 40200 +policy=steps +steps=-1,100,20000,30000 +scales=.1,10,.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=125 +activation=linear + +[region] +anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 +bias_match=1 +classes=20 +coords=4 +num=5 +softmax=1 +jitter=.2 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov2-tiny.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov2-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..81d0ac45d6dca10f50875bfe85f7496ded8e0f63 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov2-tiny.cfg @@ -0,0 +1,139 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=425 +activation=linear + +[region] +anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 +bias_match=1 +classes=80 +coords=4 +num=5 +softmax=1 +jitter=.2 +rescore=0 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov2-voc.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov2-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..dbf2de281c1200cb4889409c616e775080823268 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov2-voc.cfg @@ -0,0 +1,258 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +height=416 +width=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 80200 +policy=steps +steps=40000,60000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[route] +layers=-9 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=64 +activation=leaky + +[reorg] +stride=2 + +[route] +layers=-1,-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=125 +activation=linear + + +[region] +anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 +bias_match=1 +classes=20 +coords=4 +num=5 +softmax=1 +jitter=.3 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov2.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov2.cfg new file mode 100644 index 0000000000000000000000000000000000000000..088edf81573e83c59edd7137cbc07b6fe1433591 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov2.cfg @@ -0,0 +1,258 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=8 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + + +####### + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[route] +layers=-9 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=64 +activation=leaky + +[reorg] +stride=2 + +[route] +layers=-1,-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=425 +activation=linear + + +[region] +anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 +bias_match=1 +classes=80 +coords=4 +num=5 +softmax=1 +jitter=.3 +rescore=1 + +object_scale=5 +noobject_scale=1 +class_scale=1 +coord_scale=1 + +absolute=1 +thresh = .6 +random=1 diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-openimages.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-openimages.cfg new file mode 100644 index 0000000000000000000000000000000000000000..65d241a74c4c4995dbd997b1750575a83b0a17d4 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-openimages.cfg @@ -0,0 +1,789 @@ +[net] +# Testing + batch=1 + subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=5000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=1818 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=601 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-spp.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-spp.cfg new file mode 100644 index 0000000000000000000000000000000000000000..4ad2a052d88328a79cff5686ff4dd1df6993a2fd --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-spp.cfg @@ -0,0 +1,822 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 + +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-tiny.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-tiny.cfg new file mode 100644 index 0000000000000000000000000000000000000000..cfca3cfa6415b7b61eae238aa71107dedbe5d607 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-tiny.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-tiny_b.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-tiny_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..0d8ded69bcf909f4cca02ab202cc99b1800a1562 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-tiny_b.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-tiny_t.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-tiny_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..0d8ded69bcf909f4cca02ab202cc99b1800a1562 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-tiny_t.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-voc.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-voc.cfg new file mode 100644 index 0000000000000000000000000000000000000000..3f3e8dfb31b7103cf7ca00cd0bef83d6d426bb8d --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3-voc.cfg @@ -0,0 +1,785 @@ +[net] +# Testing + batch=1 + subdivisions=1 +# Training +# batch=64 +# subdivisions=16 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 50200 +policy=steps +steps=40000,45000 +scales=.1,.1 + + + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=75 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=20 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3.cfg new file mode 100644 index 0000000000000000000000000000000000000000..938ffff23f106d65290faae217f6a9b0a715c023 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3_b.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3_b.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a86d73b42225ef65d8ff8692b0d72827ba3a8484 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3_b.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3_t.cfg b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3_t.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a86d73b42225ef65d8ff8692b0d72827ba3a8484 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/cfg/yolov3_t.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=4 +subdivisions=1 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 8 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/art.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/art.c new file mode 100644 index 0000000000000000000000000000000000000000..932688e7b9ecbfd1a359a5d373dddf52815da9bb --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/art.c @@ -0,0 +1,59 @@ +#include "darknet.h" + +#include + +void demo_art(char *cfgfile, char *weightfile, int cam_index) +{ +#ifdef OPENCV + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + + void * cap = open_video_stream(0, cam_index, 0,0,0); + + char *window = "ArtJudgementBot9000!!!"; + if(!cap) error("Couldn't connect to webcam.\n"); + int i; + int idx[] = {37, 401, 434}; + int n = sizeof(idx)/sizeof(idx[0]); + + while(1){ + image in = get_image_from_stream(cap); + image in_s = resize_image(in, net->w, net->h); + + float *p = network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + + float score = 0; + for(i = 0; i < n; ++i){ + float s = p[idx[i]]; + if (s > score) score = s; + } + score = score; + printf("I APPRECIATE THIS ARTWORK: %10.7f%%\n", score*100); + printf("["); + int upper = 30; + for(i = 0; i < upper; ++i){ + printf("%c", ((i+.5) < score*upper) ? 219 : ' '); + } + printf("]\n"); + + show_image(in, window, 1); + free_image(in_s); + free_image(in); + } +#endif +} + + +void run_art(int argc, char **argv) +{ + int cam_index = find_int_arg(argc, argv, "-c", 0); + char *cfg = argv[2]; + char *weights = argv[3]; + demo_art(cfg, weights, cam_index); +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/attention.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/attention.c new file mode 100644 index 0000000000000000000000000000000000000000..cd1e579d375be8ffed5620c70180f0a59a927159 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/attention.c @@ -0,0 +1,459 @@ +#include "darknet.h" + +#include +#include + +void extend_data_truth(data *d, int n, float val) +{ + int i, j; + for(i = 0; i < d->y.rows; ++i){ + d->y.vals[i] = realloc(d->y.vals[i], (d->y.cols+n)*sizeof(float)); + for(j = 0; j < n; ++j){ + d->y.vals[i][d->y.cols + j] = val; + } + } + d->y.cols += n; +} + +matrix network_loss_data(network *net, data test) +{ + int i,b; + int k = 1; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.cols, sizeof(float)); + float *y = calloc(net->batch*test.y.cols, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + memcpy(y+b*test.y.cols, test.y.vals[i+b], test.y.cols*sizeof(float)); + } + + network orig = *net; + net->input = X; + net->truth = y; + net->train = 0; + net->delta = 0; + forward_network(net); + *net = orig; + + float *delta = net->layers[net->n-1].output; + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + int t = max_index(y + b*test.y.cols, 1000); + float err = sum_array(delta + b*net->outputs, net->outputs); + pred.vals[i+b][0] = -err; + //pred.vals[i+b][0] = 1-delta[b*net->outputs + t]; + } + } + free(X); + free(y); + return pred; +} + +void train_attention(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i, j; + + float avg_cls_loss = -1; + float avg_att_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *train_list = option_find_str(options, "train", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + + char **labels = get_labels(label_list); + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + double time; + + int divs=3; + int size=2; + + load_args args = {0}; + args.w = divs*net->w/size; + args.h = divs*net->h/size; + args.size = divs*net->w/size; + args.threads = 32; + args.hierarchy = net->hierarchy; + + args.min = net->min_ratio*args.w; + args.max = net->max_ratio*args.w; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + args.paths = paths; + args.classes = classes; + args.n = imgs; + args.m = N; + args.labels = labels; + args.type = CLASSIFICATION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + data resized = resize_data(train, net->w, net->h); + extend_data_truth(&resized, divs*divs, 0); + data *tiles = tile_data(train, divs, size); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float aloss = 0; + float closs = 0; + int z; + for (i = 0; i < divs*divs/ngpus; ++i) { +#pragma omp parallel for + for(j = 0; j < ngpus; ++j){ + int index = i*ngpus + j; + extend_data_truth(tiles+index, divs*divs, SECRET_NUM); + matrix deltas = network_loss_data(nets[j], tiles[index]); + for(z = 0; z < resized.y.rows; ++z){ + resized.y.vals[z][train.y.cols + index] = deltas.vals[z][0]; + } + free_matrix(deltas); + } + } + int *inds = calloc(resized.y.rows, sizeof(int)); + for(z = 0; z < resized.y.rows; ++z){ + int index = max_index(resized.y.vals[z] + train.y.cols, divs*divs); + inds[z] = index; + for(i = 0; i < divs*divs; ++i){ + resized.y.vals[z][train.y.cols + i] = (i == index)? 1 : 0; + } + } + data best = select_data(tiles, inds); + free(inds); + #ifdef GPU + if (ngpus == 1) { + closs = train_network(net, best); + } else { + closs = train_networks(nets, ngpus, best, 4); + } + #endif + for (i = 0; i < divs*divs; ++i) { + printf("%.2f ", resized.y.vals[0][train.y.cols + i]); + if((i+1)%divs == 0) printf("\n"); + free_data(tiles[i]); + } + free_data(best); + printf("\n"); + image im = float_to_image(64,64,3,resized.X.vals[0]); + //show_image(im, "orig"); + //cvWaitKey(100); + /* + image im1 = float_to_image(64,64,3,tiles[i].X.vals[0]); + image im2 = float_to_image(64,64,3,resized.X.vals[0]); + show_image(im1, "tile"); + show_image(im2, "res"); + */ +#ifdef GPU + if (ngpus == 1) { + aloss = train_network(net, resized); + } else { + aloss = train_networks(nets, ngpus, resized, 4); + } +#endif + for(i = 0; i < divs*divs; ++i){ + printf("%f ", nets[0]->output[1000 + i]); + if ((i+1) % divs == 0) printf("\n"); + } + printf("\n"); + + free_data(resized); + free_data(train); + if(avg_cls_loss == -1) avg_cls_loss = closs; + if(avg_att_loss == -1) avg_att_loss = aloss; + avg_cls_loss = avg_cls_loss*.9 + closs*.1; + avg_att_loss = avg_att_loss*.9 + aloss*.1; + + printf("%ld, %.3f: Att: %f, %f avg, Class: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, aloss, avg_att_loss, closs, avg_cls_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + pthread_join(load_thread, 0); + + free_network(net); + free_ptrs((void**)labels, classes); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_attention_single(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *leaf_list = option_find_str(options, "leaves", 0); + if(leaf_list) change_leaves(net->hierarchy, leaf_list); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + int divs = 4; + int size = 2; + int extra = 0; + float *avgs = calloc(classes, sizeof(float)); + int *inds = calloc(divs*divs, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w*divs/size); + image crop = crop_image(resized, (resized.w - net->w*divs/size)/2, (resized.h - net->h*divs/size)/2, net->w*divs/size, net->h*divs/size); + image rcrop = resize_image(crop, net->w, net->h); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, rcrop.data); + //pred[classes + 56] = 0; + for(j = 0; j < divs*divs; ++j){ + printf("%.2f ", pred[classes + j]); + if((j+1)%divs == 0) printf("\n"); + } + printf("\n"); + copy_cpu(classes, pred, 1, avgs, 1); + top_k(pred + classes, divs*divs, divs*divs, inds); + show_image(crop, "crop"); + for(j = 0; j < extra; ++j){ + int index = inds[j]; + int row = index / divs; + int col = index % divs; + int y = row * crop.h / divs - (net->h - crop.h/divs)/2; + int x = col * crop.w / divs - (net->w - crop.w/divs)/2; + printf("%d %d %d %d\n", row, col, y, x); + image tile = crop_image(crop, x, y, net->w, net->h); + float *pred = network_predict(net, tile.data); + axpy_cpu(classes, 1., pred, 1, avgs, 1); + show_image(tile, "tile"); + //cvWaitKey(10); + } + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + if(rcrop.data != resized.data) free_image(rcrop); + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_attention_multi(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + int scales[] = {224, 288, 320, 352, 384}; + int nscales = sizeof(scales)/sizeof(scales[0]); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + float *pred = calloc(classes, sizeof(float)); + image im = load_image_color(paths[i], 0, 0); + for(j = 0; j < nscales; ++j){ + image r = resize_min(im, scales[j]); + resize_network(net, r.w, r.h); + float *p = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + flip_image(r); + p = network_predict(net, r.data); + axpy_cpu(classes, 1, p, 1, pred, 1); + if(r.data != im.data) free_image(r); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void predict_attention(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + if(top == 0) top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = letterbox_image(im, net->w, net->h); + //resize_network(&net, r.w, r.h); + //printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + float *predictions = network_predict(net, X); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root"); + //else printf("%s: %f\n",names[index], predictions[index]); + printf("%5.2f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void run_attention(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int ngpus; + int *gpus = read_intlist(gpu_list, &ngpus, gpu_index); + + + int top = find_int_arg(argc, argv, "-t", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + char *layer_s = (argc > 7) ? argv[7]: 0; + if(0==strcmp(argv[2], "predict")) predict_attention(data, cfg, weights, filename, top); + else if(0==strcmp(argv[2], "train")) train_attention(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) validate_attention_single(data, cfg, weights); + else if(0==strcmp(argv[2], "validmulti")) validate_attention_multi(data, cfg, weights); +} + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/captcha.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/captcha.c new file mode 100644 index 0000000000000000000000000000000000000000..41d6d07c30801b35da34c05984be488e6f6767e9 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/captcha.c @@ -0,0 +1,353 @@ +#include "darknet.h" + +void fix_data_captcha(data d, int mask) +{ + matrix labels = d.y; + int i, j; + for(i = 0; i < d.y.rows; ++i){ + for(j = 0; j < d.y.cols; j += 2){ + if (mask){ + if(!labels.vals[i][j]){ + labels.vals[i][j] = SECRET_NUM; + labels.vals[i][j+1] = SECRET_NUM; + }else if(labels.vals[i][j+1]){ + labels.vals[i][j] = 0; + } + } else{ + if (labels.vals[i][j]) { + labels.vals[i][j+1] = 0; + } else { + labels.vals[i][j+1] = 1; + } + } + } + } +} + +void train_captcha(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + int i = *net->seen/imgs; + int solved = 1; + list *plist; + char **labels = get_labels("/data/captcha/reimgs.labels.list"); + if (solved){ + plist = get_paths("/data/captcha/reimgs.solved.list"); + }else{ + plist = get_paths("/data/captcha/reimgs.raw.list"); + } + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = 26; + args.n = imgs; + args.m = plist->size; + args.labels = labels; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + + load_thread = load_data_in_thread(args); + while(1){ + ++i; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + fix_data_captcha(train, solved); + + /* + image im = float_to_image(256, 256, 3, train.X.vals[114]); + show_image(im, "training"); + cvWaitKey(0); + */ + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net->seen); + free_data(train); + if(i%100==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } + } +} + +void test_captcha(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + int i = 0; + char **names = get_labels("/data/captcha/reimgs.labels.list"); + char buff[256]; + char *input = buff; + int indexes[26]; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + //printf("Enter Image Path: "); + //fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, net->w, net->h); + float *X = im.data; + float *predictions = network_predict(net, X); + top_predictions(net, 26, indexes); + //printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < 26; ++i){ + int index = indexes[i]; + if(i != 0) printf(", "); + printf("%s %f", names[index], predictions[index]); + } + printf("\n"); + fflush(stdout); + free_image(im); + if (filename) break; + } +} + +void valid_captcha(char *cfgfile, char *weightfile, char *filename) +{ + char **labels = get_labels("/data/captcha/reimgs.labels.list"); + network *net = load_network(cfgfile, weightfile, 0); + list *plist = get_paths("/data/captcha/reimgs.fg.list"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + int outputs = net->outputs; + + set_batch_network(net, 1); + srand(2222222); + int i, j; + for(i = 0; i < N; ++i){ + if (i%100 == 0) fprintf(stderr, "%d\n", i); + image im = load_image_color(paths[i], net->w, net->h); + float *X = im.data; + float *predictions = network_predict(net, X); + //printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + int truth = -1; + for(j = 0; j < 13; ++j){ + if (strstr(paths[i], labels[j])) truth = j; + } + if (truth == -1){ + fprintf(stderr, "bad: %s\n", paths[i]); + return; + } + printf("%d, ", truth); + for(j = 0; j < outputs; ++j){ + if (j != 0) printf(", "); + printf("%f", predictions[j]); + } + printf("\n"); + fflush(stdout); + free_image(im); + if (filename) break; + } +} + +/* + void train_captcha(char *cfgfile, char *weightfile) + { + float avg_loss = -1; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + int i = net->seen/imgs; + list *plist = get_paths("/data/captcha/train.auto5"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + while(1){ + ++i; + time=clock(); + data train = load_data_captcha(paths, imgs, plist->size, 10, 200, 60); + translate_data_rows(train, -128); + scale_data_rows(train, 1./128); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + net->seen += imgs; + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net->seen); + free_data(train); + if(i%10==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } + } + } + + void decode_captcha(char *cfgfile, char *weightfile) + { + setbuf(stdout, NULL); + srand(time(0)); + network net = parse_network_cfg(cfgfile); + set_batch_network(&net, 1); + if(weightfile){ + load_weights(&net, weightfile); + } + char filename[256]; + while(1){ + printf("Enter filename: "); + fgets(filename, 256, stdin); + strtok(filename, "\n"); + image im = load_image_color(filename, 300, 57); + scale_image(im, 1./255.); + float *X = im.data; + float *predictions = network_predict(net, X); + image out = float_to_image(300, 57, 1, predictions); + show_image(out, "decoded"); +#ifdef OPENCV +cvWaitKey(0); +#endif +free_image(im); +} +} + +void encode_captcha(char *cfgfile, char *weightfile) +{ +float avg_loss = -1; +srand(time(0)); +char *base = basecfg(cfgfile); +printf("%s\n", base); +network net = parse_network_cfg(cfgfile); +if(weightfile){ + load_weights(&net, weightfile); +} +printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); +int imgs = 1024; +int i = net->seen/imgs; +list *plist = get_paths("/data/captcha/encode.list"); +char **paths = (char **)list_to_array(plist); +printf("%d\n", plist->size); +clock_t time; +while(1){ + ++i; + time=clock(); + data train = load_data_captcha_encode(paths, imgs, plist->size, 300, 57); + scale_data_rows(train, 1./255); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + net->seen += imgs; + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net->seen); + free_matrix(train.X); + if(i%100==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } +} +} + +void validate_captcha(char *cfgfile, char *weightfile) +{ + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + int numchars = 37; + list *plist = get_paths("/data/captcha/solved.hard"); + char **paths = (char **)list_to_array(plist); + int imgs = plist->size; + data valid = load_data_captcha(paths, imgs, 0, 10, 200, 60); + translate_data_rows(valid, -128); + scale_data_rows(valid, 1./128); + matrix pred = network_predict_data(net, valid); + int i, k; + int correct = 0; + int total = 0; + int accuracy = 0; + for(i = 0; i < imgs; ++i){ + int allcorrect = 1; + for(k = 0; k < 10; ++k){ + char truth = int_to_alphanum(max_index(valid.y.vals[i]+k*numchars, numchars)); + char prediction = int_to_alphanum(max_index(pred.vals[i]+k*numchars, numchars)); + if (truth != prediction) allcorrect=0; + if (truth != '.' && truth == prediction) ++correct; + if (truth != '.' || truth != prediction) ++total; + } + accuracy += allcorrect; + } + printf("Word Accuracy: %f, Char Accuracy %f\n", (float)accuracy/imgs, (float)correct/total); + free_data(valid); +} + +void test_captcha(char *cfgfile, char *weightfile) +{ + setbuf(stdout, NULL); + srand(time(0)); + //char *base = basecfg(cfgfile); + //printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + set_batch_network(&net, 1); + if(weightfile){ + load_weights(&net, weightfile); + } + char filename[256]; + while(1){ + //printf("Enter filename: "); + fgets(filename, 256, stdin); + strtok(filename, "\n"); + image im = load_image_color(filename, 200, 60); + translate_image(im, -128); + scale_image(im, 1/128.); + float *X = im.data; + float *predictions = network_predict(net, X); + print_letters(predictions, 10); + free_image(im); + } +} + */ +void run_captcha(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_captcha(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights, filename); + else if(0==strcmp(argv[2], "valid")) valid_captcha(cfg, weights, filename); + //if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "encode")) encode_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "decode")) decode_captcha(cfg, weights); + //else if(0==strcmp(argv[2], "valid")) validate_captcha(cfg, weights); +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/cifar.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/cifar.c new file mode 100644 index 0000000000000000000000000000000000000000..a5f5f240b9f680acd9b5890042300d3b683e0f82 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/cifar.c @@ -0,0 +1,251 @@ +#include "darknet.h" + +void train_cifar(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + int classes = 10; + int N = 50000; + + char **labels = get_labels("data/cifar/labels.txt"); + int epoch = (*net->seen)/N; + data train = load_all_cifar10(); + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + clock_t time=clock(); + + float loss = train_network_sgd(net, train, 1); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)labels, classes); + free(base); + free_data(train); +} + +void train_cifar_distill(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + int classes = 10; + int N = 50000; + + char **labels = get_labels("data/cifar/labels.txt"); + int epoch = (*net->seen)/N; + + data train = load_all_cifar10(); + matrix soft = csv_to_matrix("results/ensemble.csv"); + + float weight = .9; + scale_matrix(soft, weight); + scale_matrix(train.y, 1. - weight); + matrix_add_matrix(soft, train.y); + + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + clock_t time=clock(); + + float loss = train_network_sgd(net, train, 1); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)labels, classes); + free(base); + free_data(train); +} + +void test_cifar_multi(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + float avg_acc = 0; + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + + float pred[10] = {0}; + + float *p = network_predict(net, im.data); + axpy_cpu(10, 1, p, 1, pred, 1); + flip_image(im); + p = network_predict(net, im.data); + axpy_cpu(10, 1, p, 1, pred, 1); + + int index = max_index(pred, 10); + int class = max_index(test.y.vals[i], 10); + if(index == class) avg_acc += 1; + free_image(im); + printf("%4d: %.2f%%\n", i, 100.*avg_acc/(i+1)); + } +} + +void test_cifar(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + clock_t time; + float avg_acc = 0; + float avg_top5 = 0; + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + time=clock(); + + float *acc = network_accuracies(net, test, 2); + avg_acc += acc[0]; + avg_top5 += acc[1]; + printf("top1: %f, %lf seconds, %d images\n", avg_acc, sec(clock()-time), test.X.rows); + free_data(test); +} + +void extract_cifar() +{ +char *labels[] = {"airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"}; + int i; + data train = load_all_cifar10(); + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + for(i = 0; i < train.X.rows; ++i){ + image im = float_to_image(32, 32, 3, train.X.vals[i]); + int class = max_index(train.y.vals[i], 10); + char buff[256]; + sprintf(buff, "data/cifar/train/%d_%s",i,labels[class]); + save_image_options(im, buff, PNG, 0); + } + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + int class = max_index(test.y.vals[i], 10); + char buff[256]; + sprintf(buff, "data/cifar/test/%d_%s",i,labels[class]); + save_image_options(im, buff, PNG, 0); + } +} + +void test_cifar_csv(char *filename, char *weightfile) +{ + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + matrix pred = network_predict_data(net, test); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + flip_image(im); + } + matrix pred2 = network_predict_data(net, test); + scale_matrix(pred, .5); + scale_matrix(pred2, .5); + matrix_add_matrix(pred2, pred); + + matrix_to_csv(pred); + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); +} + +void test_cifar_csvtrain(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + srand(time(0)); + + data test = load_all_cifar10(); + + matrix pred = network_predict_data(net, test); + + int i; + for(i = 0; i < test.X.rows; ++i){ + image im = float_to_image(32, 32, 3, test.X.vals[i]); + flip_image(im); + } + matrix pred2 = network_predict_data(net, test); + scale_matrix(pred, .5); + scale_matrix(pred2, .5); + matrix_add_matrix(pred2, pred); + + matrix_to_csv(pred); + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); +} + +void eval_cifar_csv() +{ + data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); + + matrix pred = csv_to_matrix("results/combined.csv"); + fprintf(stderr, "%d %d\n", pred.rows, pred.cols); + + fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); + free_data(test); + free_matrix(pred); +} + + +void run_cifar(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_cifar(cfg, weights); + else if(0==strcmp(argv[2], "extract")) extract_cifar(); + else if(0==strcmp(argv[2], "distill")) train_cifar_distill(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_cifar(cfg, weights); + else if(0==strcmp(argv[2], "multi")) test_cifar_multi(cfg, weights); + else if(0==strcmp(argv[2], "csv")) test_cifar_csv(cfg, weights); + else if(0==strcmp(argv[2], "csvtrain")) test_cifar_csvtrain(cfg, weights); + else if(0==strcmp(argv[2], "eval")) eval_cifar_csv(); +} + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/classifier.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/classifier.c new file mode 100644 index 0000000000000000000000000000000000000000..e8779836dc01a2e476104132acd2dbfdd6ed29aa --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/classifier.c @@ -0,0 +1,1123 @@ +#include "darknet.h" + +#include +#include + +float *get_regression_values(char **labels, int n) +{ + float *v = calloc(n, sizeof(float)); + int i; + for(i = 0; i < n; ++i){ + char *p = strchr(labels[i], ' '); + *p = 0; + v[i] = atof(p+1); + } + return v; +} + +void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + // Ruihao + int tag = option_find_int_quiet(options, "tag", 0); + + char *backup_directory_cfg = option_find_str(options, "backup", "/backup/"); + char *label_list_cfg = option_find_str(options, "labels", "data/labels.list"); + char *train_list_cfg = option_find_str(options, "train", "data/train.list"); + + char *env = getenv("UVMAsyncBench_BASE"); + char backup_directory[256]; + char label_list[256]; + char train_list[256]; + sprintf(backup_directory, "%s/%s", env, backup_directory_cfg); + sprintf(label_list, "%s/%s", env, label_list_cfg); + sprintf(train_list, "%s/%s", env, train_list_cfg); + // Ruihao + char *tree = option_find_str(options, "tree", 0); + if (tree) net->hierarchy = read_tree(tree); + int classes = option_find_int(options, "classes", 2); + + char **labels = 0; + if(!tag){ + labels = get_labels(label_list); + } + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + double time; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.hierarchy = net->hierarchy; + + args.min = net->min_ratio*net->w; + args.max = net->max_ratio*net->w; + printf("%d %d\n", args.min, args.max); + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + + args.paths = paths; + args.classes = classes; + args.n = imgs; + args.m = N; + args.labels = labels; + if (tag){ + args.type = TAG_DATA; + } else { + args.type = CLASSIFICATION_DATA; + } + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int count = 0; + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + if(net->random && count++%40 == 0){ + printf("Resizing\n"); + int dim = (rand() % 11 + 4) * 32; + //if (get_current_batch(net)+200 > net->max_batches) dim = 608; + //int dim = (rand() % 4 + 16) * 32; + printf("%d\n", dim); + args.w = dim; + args.h = dim; + args.size = dim; + args.min = net->min_ratio*dim; + args.max = net->max_ratio*dim; + printf("%d %d\n", args.min, args.max); + + pthread_join(load_thread, 0); + train = buffer; + free_data(train); + load_thread = load_data(args); + + for(i = 0; i < ngpus; ++i){ + resize_network(nets[i], dim, dim); + } + net = nets[0]; + } + time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + pthread_join(load_thread, 0); + + // free_network(net); + if(labels) free_ptrs((void**)labels, classes); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_classifier_crop(char *datacfg, char *filename, char *weightfile) +{ + int i = 0; + network *net = load_network(filename, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + clock_t time; + float avg_acc = 0; + float avg_topk = 0; + int splits = m/1000; + int num = (i+1)*m/splits - i*m/splits; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + + args.paths = paths; + args.classes = classes; + args.n = num; + args.m = 0; + args.labels = labels; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(i = 1; i <= splits; ++i){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + num = (i+1)*m/splits - i*m/splits; + char **part = paths+(i*m/splits); + if(i != splits){ + args.paths = part; + load_thread = load_data_in_thread(args); + } + printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + float *acc = network_accuracies(net, val, topk); + avg_acc += acc[0]; + avg_topk += acc[1]; + printf("%d: top 1: %f, top %d: %f, %lf seconds, %d images\n", i, avg_acc/i, topk, avg_topk/i, sec(clock()-time), val.X.rows); + free_data(val); + } +} + +void validate_classifier_10(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + int w = net->w; + int h = net->h; + int shift = 32; + image im = load_image_color(paths[i], w+shift, h+shift); + image images[10]; + images[0] = crop_image(im, -shift, -shift, w, h); + images[1] = crop_image(im, shift, -shift, w, h); + images[2] = crop_image(im, 0, 0, w, h); + images[3] = crop_image(im, -shift, shift, w, h); + images[4] = crop_image(im, shift, shift, w, h); + flip_image(im); + images[5] = crop_image(im, -shift, -shift, w, h); + images[6] = crop_image(im, shift, -shift, w, h); + images[7] = crop_image(im, 0, 0, w, h); + images[8] = crop_image(im, -shift, shift, w, h); + images[9] = crop_image(im, shift, shift, w, h); + float *pred = calloc(classes, sizeof(float)); + for(j = 0; j < 10; ++j){ + float *p = network_predict(net, images[j].data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1, 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + free_image(images[j]); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_classifier_full(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + int size = net->w; + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, size); + resize_network(net, resized.w, resized.h); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, resized.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + free_image(im); + free_image(resized); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + + +void validate_classifier_single(char *datacfg, char *filename, char *weightfile) +{ + int i, j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *leaf_list = option_find_str(options, "leaves", 0); + if(leaf_list) change_leaves(net->hierarchy, leaf_list); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + image im = load_image_color(paths[i], 0, 0); + image crop = center_crop_image(im, net->w, net->h); + //grayscale_image_3c(crop); + //show_image(im, "orig"); + //show_image(crop, "cropped"); + //cvWaitKey(0); + float *pred = network_predict(net, crop.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1); + + free_image(im); + free_image(crop); + top_k(pred, classes, topk, indexes); + + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%s, %d, %f, %f, \n", paths[i], class, pred[0], pred[1]); + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void validate_classifier_multi(char *datacfg, char *cfg, char *weights) +{ + int i, j; + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "labels", "data/labels.list"); + char *valid_list = option_find_str(options, "valid", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + int topk = option_find_int(options, "top", 1); + + char **labels = get_labels(label_list); + list *plist = get_paths(valid_list); + //int scales[] = {224, 288, 320, 352, 384}; + int scales[] = {224, 256, 288, 320}; + int nscales = sizeof(scales)/sizeof(scales[0]); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + float avg_acc = 0; + float avg_topk = 0; + int *indexes = calloc(topk, sizeof(int)); + + for(i = 0; i < m; ++i){ + int class = -1; + char *path = paths[i]; + for(j = 0; j < classes; ++j){ + if(strstr(path, labels[j])){ + class = j; + break; + } + } + float *pred = calloc(classes, sizeof(float)); + image im = load_image_color(paths[i], 0, 0); + for(j = 0; j < nscales; ++j){ + image r = resize_max(im, scales[j]); + resize_network(net, r.w, r.h); + float *p = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1); + axpy_cpu(classes, 1, p, 1, pred, 1); + flip_image(r); + p = network_predict(net, r.data); + axpy_cpu(classes, 1, p, 1, pred, 1); + if(r.data != im.data) free_image(r); + } + free_image(im); + top_k(pred, classes, topk, indexes); + free(pred); + if(indexes[0] == class) avg_acc += 1; + for(j = 0; j < topk; ++j){ + if(indexes[j] == class) avg_topk += 1; + } + + printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); + } +} + +void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int layer_num) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + int top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image orig = load_image_color(input, 0, 0); + image r = resize_min(orig, 256); + image im = crop_image(r, (r.w - 224 - 1)/2 + 1, (r.h - 224 - 1)/2 + 1, 224, 224); + float mean[] = {0.48263312050943, 0.45230225481413, 0.40099074308742}; + float std[] = {0.22590347483426, 0.22120921437787, 0.22103996251583}; + float var[3]; + var[0] = std[0]*std[0]; + var[1] = std[1]*std[1]; + var[2] = std[2]*std[2]; + + normalize_cpu(im.data, mean, var, 1, 3, im.w*im.h); + + float *X = im.data; + time=clock(); + float *predictions = network_predict(net, X); + + layer l = net->layers[layer_num]; + for(i = 0; i < l.c; ++i){ + if(l.rolling_mean) printf("%f %f %f\n", l.rolling_mean[i], l.rolling_variance[i], l.scales[i]); + } +#ifdef GPU + cuda_pull_array(l.output_gpu, l.output, l.outputs); +#endif + for(i = 0; i < l.outputs; ++i){ + printf("%f\n", l.output[i]); + } + /* + + printf("\n\nWeights\n"); + for(i = 0; i < l.n*l.size*l.size*l.c; ++i){ + printf("%f\n", l.filters[i]); + } + + printf("\n\nBiases\n"); + for(i = 0; i < l.n; ++i){ + printf("%f\n", l.biases[i]); + } + */ + + top_predictions(net, top, indexes); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + printf("%s: %f\n", names[index], predictions[index]); + } + free_image(im); + if (filename) break; + } +} + +void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *name_list_cfg = option_find_str(options, "names", 0); + char *env = getenv("UVMAsyncBench_BASE"); + char name_list[256]; + sprintf(name_list, "%s/%s", env, name_list_cfg); + // if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); + // Ruihao + if(top == 0) top = option_find_int(options, "top", 1); + + int i = 0; + char **names = get_labels(name_list); + clock_t time; + int *indexes = calloc(top, sizeof(int)); + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = letterbox_image(im, net->w, net->h); + //image r = resize_min(im, 320); + //printf("%d %d\n", r.w, r.h); + //resize_network(net, r.w, r.h); + //printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + startCPU(); + float *predictions = network_predict(net, X); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + endCPU(); + fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < top; ++i){ + int index = indexes[i]; + //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root"); + //else printf("%s: %f\n",names[index], predictions[index]); + printf("%5.2f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void label_classifier(char *datacfg, char *filename, char *weightfile) +{ + int i; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *label_list = option_find_str(options, "names", "data/labels.list"); + char *test_list = option_find_str(options, "test", "data/train.list"); + int classes = option_find_int(options, "classes", 2); + + char **labels = get_labels(label_list); + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + for(i = 0; i < m; ++i){ + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + float *pred = network_predict(net, crop.data); + + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + int ind = max_index(pred, classes); + + printf("%s\n", labels[ind]); + } +} + +void csv_classifier(char *datacfg, char *cfgfile, char *weightfile) +{ + int i,j; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + char *test_list = option_find_str(options, "test", "data/test.list"); + int top = option_find_int(options, "top", 1); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + int *indexes = calloc(top, sizeof(int)); + + for(i = 0; i < m; ++i){ + double time = what_time_is_it_now(); + char *path = paths[i]; + image im = load_image_color(path, 0, 0); + image r = letterbox_image(im, net->w, net->h); + float *predictions = network_predict(net, r.data); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_k(predictions, net->outputs, top, indexes); + + printf("%s", path); + for(j = 0; j < top; ++j){ + printf("\t%d", indexes[j]); + } + printf("\n"); + + free_image(im); + free_image(r); + + fprintf(stderr, "%lf seconds, %d images, %d total\n", what_time_is_it_now() - time, i+1, m); + } +} + +void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_layer) +{ + int curr = 0; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *test_list_cfg = option_find_str(options, "test", "data/test.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char test_list[256]; + sprintf(test_list, "%s/%s", env, test_list_cfg); + // Ruihao + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + m = net->max_batches * net->batch; + free_list(plist); + + clock_t time; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = classes; + args.n = net->batch; + args.m = 0; + args.labels = 0; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(curr = net->batch; curr <= m; curr += net->batch){ + // while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + if(curr < m){ + args.paths = paths + curr; + if (curr + net->batch > m) args.n = m - curr; + load_thread = load_data_in_thread(args); + } + fprintf(stderr, "Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + + int i, j; + if (target_layer >= 0){ + //layer l = net->layers[target_layer]; + } + + // for(i = 0; i < pred.rows; ++i){ + // printf("%s", paths[curr-net->batch+i]); + // for(j = 0; j < pred.cols; ++j){ + // printf("\t%g", pred.vals[i][j]); + // } + // printf("\n"); + // } + + fprintf(stderr, "%lf seconds, %d images, %d total\n", sec(clock()-time), val.X.rows, curr); + free_matrix(pred); + free_data(val); + } +} + + +void file_output_classifier(char *datacfg, char *filename, char *weightfile, char *listfile) +{ + int i,j; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + //char *label_list = option_find_str(options, "names", "data/labels.list"); + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(listfile); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + for(i = 0; i < m; ++i){ + image im = load_image_color(paths[i], 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + + float *pred = network_predict(net, crop.data); + if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 0, 1); + + if(resized.data != im.data) free_image(resized); + free_image(im); + free_image(crop); + + printf("%s", paths[i]); + for(j = 0; j < classes; ++j){ + printf("\t%g", pred[j]); + } + printf("\n"); + } +} + + +void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + float threat = 0; + float roll = .2; + + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + int top = option_find_int(options, "top", 1); + + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + //cvNamedWindow("Threat", CV_WINDOW_NORMAL); + //cvResizeWindow("Threat", 512, 512); + float fps = 0; + int i; + + int count = 0; + + while(1){ + ++count; + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + if(!in.data) break; + image in_s = resize_image(in, net->w, net->h); + + image out = in; + int x1 = out.w / 20; + int y1 = out.h / 20; + int x2 = 2*x1; + int y2 = out.h - out.h/20; + + int border = .01*out.h; + int h = y2 - y1 - 2*border; + int w = x2 - x1 - 2*border; + + float *predictions = network_predict(net, in_s.data); + float curr_threat = 0; + if(1){ + curr_threat = predictions[0] * 0 + + predictions[1] * .6 + + predictions[2]; + } else { + curr_threat = predictions[218] + + predictions[539] + + predictions[540] + + predictions[368] + + predictions[369] + + predictions[370]; + } + threat = roll * curr_threat + (1-roll) * threat; + + draw_box_width(out, x2 + border, y1 + .02*h, x2 + .5 * w, y1 + .02*h + border, border, 0,0,0); + if(threat > .97) { + draw_box_width(out, x2 + .5 * w + border, + y1 + .02*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .02*h + 3*border, 3*border, 1,0,0); + } + draw_box_width(out, x2 + .5 * w + border, + y1 + .02*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .02*h + 3*border, .5*border, 0,0,0); + draw_box_width(out, x2 + border, y1 + .42*h, x2 + .5 * w, y1 + .42*h + border, border, 0,0,0); + if(threat > .57) { + draw_box_width(out, x2 + .5 * w + border, + y1 + .42*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .42*h + 3*border, 3*border, 1,1,0); + } + draw_box_width(out, x2 + .5 * w + border, + y1 + .42*h - 2*border, + x2 + .5 * w + 6*border, + y1 + .42*h + 3*border, .5*border, 0,0,0); + + draw_box_width(out, x1, y1, x2, y2, border, 0,0,0); + for(i = 0; i < threat * h ; ++i){ + float ratio = (float) i / h; + float r = (ratio < .5) ? (2*(ratio)) : 1; + float g = (ratio < .5) ? 1 : 1 - 2*(ratio - .5); + draw_box_width(out, x1 + border, y2 - border - i, x2 - border, y2 - border - i, 1, r, g, 0); + } + top_predictions(net, top, indexes); + char buff[256]; + sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count); + //save_image(out, buff); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + for(i = 0; i < top; ++i){ + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + } + + if(1){ + show_image(out, "Threat", 10); + } + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + int bad_cats[] = {218, 539, 540, 1213, 1501, 1742, 1911, 2415, 4348, 19223, 368, 369, 370, 1133, 1200, 1306, 2122, 2301, 2537, 2823, 3179, 3596, 3639, 4489, 5107, 5140, 5289, 6240, 6631, 6762, 7048, 7171, 7969, 7984, 7989, 8824, 8927, 9915, 10270, 10448, 13401, 15205, 18358, 18894, 18895, 19249, 19697}; + + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + int top = option_find_int(options, "top", 1); + + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + int i; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = resize_image(in, net->w, net->h); + + float *predictions = network_predict(net, in_s.data); + top_predictions(net, top, indexes); + + printf("\033[2J"); + printf("\033[1;1H"); + + int threat = 0; + for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){ + int index = bad_cats[i]; + if(predictions[index] > .01){ + printf("Threat Detected!\n"); + threat = 1; + break; + } + } + if(!threat) printf("Scanning...\n"); + for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){ + int index = bad_cats[i]; + if(predictions[index] > .01){ + printf("%s\n", names[index]); + } + } + + show_image(in, "Threat Detection", 10); + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + +void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + image **alphabet = load_alphabet(); + printf("Classifier Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + list *options = read_data_cfg(datacfg); + + srand(2222222); + + int w = 1280; + int h = 720; + void * cap = open_video_stream(filename, cam_index, w, h, 0); + + int top = option_find_int(options, "top", 1); + + char *label_list = option_find_str(options, "labels", 0); + char *name_list = option_find_str(options, "names", label_list); + char **names = get_labels(name_list); + + int *indexes = calloc(top, sizeof(int)); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + int i; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + + float *predictions = network_predict(net, in_s.data); + if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1); + top_predictions(net, top, indexes); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + int lh = in.h*.03; + int toph = 3*lh; + + float rgb[3] = {1,1,1}; + for(i = 0; i < top; ++i){ + printf("%d\n", toph); + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + + char buff[1024]; + sprintf(buff, "%3.1f%%: %s\n", predictions[index]*100, names[index]); + image label = get_label(alphabet, buff, lh); + draw_label(in, toph, lh, label, rgb); + toph += 2*lh; + free_image(label); + } + + show_image(in, base, 10); + free_image(in_s); + free_image(in); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_classifier(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int ngpus; + int *gpus = read_intlist(gpu_list, &ngpus, gpu_index); + + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int top = find_int_arg(argc, argv, "-t", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + char *layer_s = (argc > 7) ? argv[7]: 0; + int layer = layer_s ? atoi(layer_s) : -1; + if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename, top); + else if(0==strcmp(argv[2], "fout")) file_output_classifier(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s)); + else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename); + else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer); + else if(0==strcmp(argv[2], "csv")) csv_classifier(data, cfg, weights); + else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_classifier_single(data, cfg, weights); + else if(0==strcmp(argv[2], "validmulti")) validate_classifier_multi(data, cfg, weights); + else if(0==strcmp(argv[2], "valid10")) validate_classifier_10(data, cfg, weights); + else if(0==strcmp(argv[2], "validcrop")) validate_classifier_crop(data, cfg, weights); + else if(0==strcmp(argv[2], "validfull")) validate_classifier_full(data, cfg, weights); +} + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/coco.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/coco.c new file mode 100644 index 0000000000000000000000000000000000000000..6a50b89abd2abc7fb217b5118034a746f790f690 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/coco.c @@ -0,0 +1,357 @@ +#include "darknet.h" + +#include + +char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"}; + +int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; + +void train_coco(char *cfgfile, char *weightfile) +{ + //char *train_images = "/home/pjreddie/data/voc/test/train.txt"; + //char *train_images = "/home/pjreddie/data/coco/train.txt"; + char *train_images = "data/coco.trainval.txt"; + //char *train_images = "data/bags.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + layer l = net->layers[net->n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + /* + image im = float_to_image(net->w, net->h, 3, train.X.vals[113]); + image copy = copy_image(im); + draw_coco(copy, train.y.vals[113], 7, "truth"); + cvWaitKey(0); + free_image(copy); + */ + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || (i < 1000 && i%100 == 0)){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +static void print_cocos(FILE *fp, int image_id, detection *dets, int num_boxes, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < num_boxes; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + float bx = xmin; + float by = ymin; + float bw = xmax - xmin; + float bh = ymax - ymin; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); + } + } +} + +int get_coco_image_id(char *filename) +{ + char *p = strrchr(filename, '_'); + return atoi(p+1); +} + +void validate_coco(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/"; + list *plist = get_paths("data/coco_val_5k.list"); + //list *plist = get_paths("/home/pjreddie/data/people-art/test.txt"); + //list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + snprintf(buff, 1024, "%s/coco_results.json", base); + FILE *fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + + int m = plist->size; + int i=0; + int t; + + float thresh = .01; + int nms = 1; + float iou_thresh = .5; + + int nthreads = 8; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.type = IMAGE_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + time_t start = time(0); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + int image_id = get_coco_image_id(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh); + print_cocos(fp, image_id, dets, l.side*l.side*l.n, classes, w, h); + free_detections(dets, nboxes); + free_image(val[t]); + free_image(val_resized[t]); + } + } + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); +} + +void validate_coco_recall(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + int side = l.side; + + int j, k; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + + float thresh = .001; + int nms = 0; + float iou_thresh = .5; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + + int nboxes = 0; + detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, side*side*l.n, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < side*side*l.n; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < side*side*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + free_detections(dets, nboxes); + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free(id); + free_image(orig); + free_image(sized); + } +} + +void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) +{ + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + layer l = net->layers[net->n-1]; + set_batch_network(net, 1); + srand(2222222); + float nms = .4; + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = resize_image(im, net->w, net->h); + float *X = sized.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + + int nboxes = 0; + detection *dets = get_network_boxes(net, 1, 1, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms); + + draw_detections(im, dets, l.side*l.side*l.n, thresh, coco_classes, alphabet, 80); + save_image(im, "prediction"); + show_image(im, "predictions", 0); + free_detections(dets, nboxes); + free_image(im); + free_image(sized); + if (filename) break; + } +} + +void run_coco(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .2); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + int avg = find_int_arg(argc, argv, "-avg", 1); + if(0==strcmp(argv[2], "test")) test_coco(cfg, weights, filename, thresh); + else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights); + else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, 80, frame_skip, prefix, avg, .5, 0,0,0,0); +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/darknet.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/darknet.c new file mode 100644 index 0000000000000000000000000000000000000000..f1c5e43b66391a9674c13a193e329cf3dfc26439 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/darknet.c @@ -0,0 +1,559 @@ +#include "darknet.h" + +// #include "../../../../common/cupti_add.h" +// #include "../../../../common/cpu_timestamps.h" +// #include "cpu_timestamps.h" + +static uint64_t startCPUTime; +static uint64_t endCPUTime; + +void startCPU() +{ + struct timespec tv; + if (clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + startCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); +} + +void endCPU() +{ + struct timespec tv; + if (clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + + endCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); + // endCPUTimestamp1 = std::chrono::system_clock::now(); + printf("CPU_Times,%lu,%lu,%lu\n", startCPUTime, endCPUTime, endCPUTime - startCPUTime); +} + +#ifdef __cplusplus +extern "C" { +#endif +extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top); +extern void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen); +extern void run_yolo(int argc, char **argv); +extern void run_detector(int argc, char **argv); +extern void run_coco(int argc, char **argv); +extern void run_nightmare(int argc, char **argv); +extern void run_classifier(int argc, char **argv); +extern void run_regressor(int argc, char **argv); +extern void run_segmenter(int argc, char **argv); +extern void run_isegmenter(int argc, char **argv); +extern void run_char_rnn(int argc, char **argv); +extern void run_tag(int argc, char **argv); +extern void run_cifar(int argc, char **argv); +extern void run_go(int argc, char **argv); +extern void run_art(int argc, char **argv); +extern void run_super(int argc, char **argv); +extern void run_lsd(int argc, char **argv); +#ifdef __cplusplus +} +#endif + +void average(int argc, char *argv[]) +{ + char *cfgfile = argv[2]; + char *outfile = argv[3]; + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + network *sum = parse_network_cfg(cfgfile); + + char *weightfile = argv[4]; + load_weights(sum, weightfile); + + int i, j; + int n = argc - 5; + for(i = 0; i < n; ++i){ + weightfile = argv[i+5]; + load_weights(net, weightfile); + for(j = 0; j < net->n; ++j){ + layer l = net->layers[j]; + layer out = sum->layers[j]; + if(l.type == CONVOLUTIONAL){ + int num = l.n*l.c*l.size*l.size; + axpy_cpu(l.n, 1, l.biases, 1, out.biases, 1); + axpy_cpu(num, 1, l.weights, 1, out.weights, 1); + if(l.batch_normalize){ + axpy_cpu(l.n, 1, l.scales, 1, out.scales, 1); + axpy_cpu(l.n, 1, l.rolling_mean, 1, out.rolling_mean, 1); + axpy_cpu(l.n, 1, l.rolling_variance, 1, out.rolling_variance, 1); + } + } + if(l.type == CONNECTED){ + axpy_cpu(l.outputs, 1, l.biases, 1, out.biases, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weights, 1, out.weights, 1); + } + } + } + n = n+1; + for(j = 0; j < net->n; ++j){ + layer l = sum->layers[j]; + if(l.type == CONVOLUTIONAL){ + int num = l.n*l.c*l.size*l.size; + scal_cpu(l.n, 1./n, l.biases, 1); + scal_cpu(num, 1./n, l.weights, 1); + if(l.batch_normalize){ + scal_cpu(l.n, 1./n, l.scales, 1); + scal_cpu(l.n, 1./n, l.rolling_mean, 1); + scal_cpu(l.n, 1./n, l.rolling_variance, 1); + } + } + if(l.type == CONNECTED){ + scal_cpu(l.outputs, 1./n, l.biases, 1); + scal_cpu(l.outputs*l.inputs, 1./n, l.weights, 1); + } + } + save_weights(sum, outfile); +} + +long numops(network *net) +{ + int i; + long ops = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + ops += 2l * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w; + } else if(l.type == CONNECTED){ + ops += 2l * l.inputs * l.outputs; + } else if (l.type == RNN){ + ops += 2l * l.input_layer->inputs * l.input_layer->outputs; + ops += 2l * l.self_layer->inputs * l.self_layer->outputs; + ops += 2l * l.output_layer->inputs * l.output_layer->outputs; + } else if (l.type == GRU){ + ops += 2l * l.uz->inputs * l.uz->outputs; + ops += 2l * l.uh->inputs * l.uh->outputs; + ops += 2l * l.ur->inputs * l.ur->outputs; + ops += 2l * l.wz->inputs * l.wz->outputs; + ops += 2l * l.wh->inputs * l.wh->outputs; + ops += 2l * l.wr->inputs * l.wr->outputs; + } else if (l.type == LSTM){ + ops += 2l * l.uf->inputs * l.uf->outputs; + ops += 2l * l.ui->inputs * l.ui->outputs; + ops += 2l * l.ug->inputs * l.ug->outputs; + ops += 2l * l.uo->inputs * l.uo->outputs; + ops += 2l * l.wf->inputs * l.wf->outputs; + ops += 2l * l.wi->inputs * l.wi->outputs; + ops += 2l * l.wg->inputs * l.wg->outputs; + ops += 2l * l.wo->inputs * l.wo->outputs; + } + } + return ops; +} + +void speed(char *cfgfile, int tics) +{ + if (tics == 0) tics = 1000; + network *net = parse_network_cfg(cfgfile); + set_batch_network(net, 1); + int i; + double time=what_time_is_it_now(); + image im = make_image(net->w, net->h, net->c*net->batch); + for(i = 0; i < tics; ++i){ + network_predict(net, im.data); + } + double t = what_time_is_it_now() - time; + long ops = numops(net); + printf("\n%d evals, %f Seconds\n", tics, t); + printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); + printf("FLOPS: %.2f Bn\n", (float)ops/1000000000.*tics/t); + printf("Speed: %f sec/eval\n", t/tics); + printf("Speed: %f Hz\n", tics/t); +} + +void operations(char *cfgfile) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + long ops = numops(net); + printf("Floating Point Operations: %ld\n", ops); + printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); +} + +void oneoff(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + int oldn = net->layers[net->n - 2].n; + int c = net->layers[net->n - 2].c; + scal_cpu(oldn*c, .1, net->layers[net->n - 2].weights, 1); + scal_cpu(oldn, 0, net->layers[net->n - 2].biases, 1); + net->layers[net->n - 2].n = 11921; + net->layers[net->n - 2].biases += 5; + net->layers[net->n - 2].weights += 5*c; + if(weightfile){ + load_weights(net, weightfile); + } + net->layers[net->n - 2].biases -= 5; + net->layers[net->n - 2].weights -= 5*c; + net->layers[net->n - 2].n = oldn; + printf("%d\n", oldn); + layer l = net->layers[net->n - 2]; + copy_cpu(l.n/3, l.biases, 1, l.biases + l.n/3, 1); + copy_cpu(l.n/3, l.biases, 1, l.biases + 2*l.n/3, 1); + copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + l.n/3*l.c, 1); + copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + 2*l.n/3*l.c, 1); + *net->seen = 0; + save_weights(net, outfile); +} + +void oneoff2(char *cfgfile, char *weightfile, char *outfile, int l) +{ + gpu_index = -1; + network *net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights_upto(net, weightfile, 0, net->n); + load_weights_upto(net, weightfile, l, net->n); + } + *net->seen = 0; + save_weights_upto(net, outfile, net->n); +} + +void partial(char *cfgfile, char *weightfile, char *outfile, int max) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 1); + save_weights_upto(net, outfile, max); +} + +void print_weights(char *cfgfile, char *weightfile, int n) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 1); + layer l = net->layers[n]; + int i, j; + //printf("["); + for(i = 0; i < l.n; ++i){ + //printf("["); + for(j = 0; j < l.size*l.size*l.c; ++j){ + //if(j > 0) printf(","); + printf("%g ", l.weights[i*l.size*l.size*l.c + j]); + } + printf("\n"); + //printf("]%s\n", (i == l.n-1)?"":","); + } + //printf("]"); +} + +void rescale_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + rescale_weights(l, 2, -.5); + break; + } + } + save_weights(net, outfile); +} + +void rgbgr_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + rgbgr_weights(l); + break; + } + } + save_weights(net, outfile); +} + +void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if (l.type == CONVOLUTIONAL && l.batch_normalize) { + denormalize_convolutional_layer(l); + } + if (l.type == CONNECTED && l.batch_normalize) { + denormalize_connected_layer(l); + } + if (l.type == GRU && l.batch_normalize) { + denormalize_connected_layer(*l.input_z_layer); + denormalize_connected_layer(*l.input_r_layer); + denormalize_connected_layer(*l.input_h_layer); + denormalize_connected_layer(*l.state_z_layer); + denormalize_connected_layer(*l.state_r_layer); + denormalize_connected_layer(*l.state_h_layer); + } + } + save_weights(net, outfile); +} + +layer normalize_layer(layer l, int n) +{ + int j; + l.batch_normalize=1; + l.scales = (float *) calloc(n, sizeof(float)); + for(j = 0; j < n; ++j){ + l.scales[j] = 1; + } + l.rolling_mean = (float *) calloc(n, sizeof(float)); + l.rolling_variance = (float *) calloc(n, sizeof(float)); + return l; +} + +void normalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL && !l.batch_normalize){ + net->layers[i] = normalize_layer(l, l.n); + } + if (l.type == CONNECTED && !l.batch_normalize) { + net->layers[i] = normalize_layer(l, l.outputs); + } + if (l.type == GRU && l.batch_normalize) { + *l.input_z_layer = normalize_layer(*l.input_z_layer, l.input_z_layer->outputs); + *l.input_r_layer = normalize_layer(*l.input_r_layer, l.input_r_layer->outputs); + *l.input_h_layer = normalize_layer(*l.input_h_layer, l.input_h_layer->outputs); + *l.state_z_layer = normalize_layer(*l.state_z_layer, l.state_z_layer->outputs); + *l.state_r_layer = normalize_layer(*l.state_r_layer, l.state_r_layer->outputs); + *l.state_h_layer = normalize_layer(*l.state_h_layer, l.state_h_layer->outputs); + net->layers[i].batch_normalize=1; + } + } + save_weights(net, outfile); +} + +void statistics_net(char *cfgfile, char *weightfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if (l.type == CONNECTED && l.batch_normalize) { + printf("Connected Layer %d\n", i); + statistics_connected_layer(l); + } + if (l.type == GRU && l.batch_normalize) { + printf("GRU Layer %d\n", i); + printf("Input Z\n"); + statistics_connected_layer(*l.input_z_layer); + printf("Input R\n"); + statistics_connected_layer(*l.input_r_layer); + printf("Input H\n"); + statistics_connected_layer(*l.input_h_layer); + printf("State Z\n"); + statistics_connected_layer(*l.state_z_layer); + printf("State R\n"); + statistics_connected_layer(*l.state_r_layer); + printf("State H\n"); + statistics_connected_layer(*l.state_h_layer); + } + printf("\n"); + } +} + +void denormalize_net(char *cfgfile, char *weightfile, char *outfile) +{ + gpu_index = -1; + network *net = load_network(cfgfile, weightfile, 0); + int i; + for (i = 0; i < net->n; ++i) { + layer l = net->layers[i]; + if ((l.type == DECONVOLUTIONAL || l.type == CONVOLUTIONAL) && l.batch_normalize) { + denormalize_convolutional_layer(l); + net->layers[i].batch_normalize=0; + } + if (l.type == CONNECTED && l.batch_normalize) { + denormalize_connected_layer(l); + net->layers[i].batch_normalize=0; + } + if (l.type == GRU && l.batch_normalize) { + denormalize_connected_layer(*l.input_z_layer); + denormalize_connected_layer(*l.input_r_layer); + denormalize_connected_layer(*l.input_h_layer); + denormalize_connected_layer(*l.state_z_layer); + denormalize_connected_layer(*l.state_r_layer); + denormalize_connected_layer(*l.state_h_layer); + l.input_z_layer->batch_normalize = 0; + l.input_r_layer->batch_normalize = 0; + l.input_h_layer->batch_normalize = 0; + l.state_z_layer->batch_normalize = 0; + l.state_r_layer->batch_normalize = 0; + l.state_h_layer->batch_normalize = 0; + net->layers[i].batch_normalize=0; + } + } + save_weights(net, outfile); +} + +void mkimg(char *cfgfile, char *weightfile, int h, int w, int num, char *prefix) +{ + network *net = load_network(cfgfile, weightfile, 0); + image *ims = get_weights(net->layers[0]); + int n = net->layers[0].n; + int z; + for(z = 0; z < num; ++z){ + image im = make_image(h, w, 3); + fill_image(im, .5); + int i; + for(i = 0; i < 100; ++i){ + image r = copy_image(ims[rand()%n]); + rotate_image_cw(r, rand()%4); + random_distort_image(r, 1, 1.5, 1.5); + int dx = rand()%(w-r.w); + int dy = rand()%(h-r.h); + ghost_image(r, im, dx, dy); + free_image(r); + } + char buff[256]; + sprintf(buff, "%s/gen_%d", prefix, z); + save_image(im, buff); + free_image(im); + } +} + +void visualize(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + visualize_network(net); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsc() +{ + unsigned long a, d; + + __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); + + return (a | (d << 32)); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsp() { + struct timespec tms; + if (clock_gettime(CLOCK_REALTIME, &tms)) { + return -1; + } + unsigned long ns = tms.tv_sec * 1000000000; + ns += tms.tv_nsec; + return ns; +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + + //test_resize("data/bad.jpg"); + //test_box(); + //test_convolutional_layer(); + if(argc < 2){ + fprintf(stderr, "usage: %s \n", argv[0]); + return 0; + } + gpu_index = find_int_arg(argc, argv, "-i", GPU_DEVICE); + if(find_arg(argc, argv, "-nogpu")) { + gpu_index = -1; + } + +#ifndef GPU + gpu_index = -1; +#else + if(gpu_index >= 0){ + cuda_set_device(gpu_index); + } + initTrace(); +#endif + + if (0 == strcmp(argv[1], "average")){ + average(argc, argv); + } else if (0 == strcmp(argv[1], "yolo")){ + run_yolo(argc, argv); + } else if (0 == strcmp(argv[1], "super")){ + run_super(argc, argv); + } else if (0 == strcmp(argv[1], "lsd")){ + run_lsd(argc, argv); + } else if (0 == strcmp(argv[1], "detector")){ + run_detector(argc, argv); + } else if (0 == strcmp(argv[1], "detect")){ + float thresh = find_float_arg(argc, argv, "-thresh", .5); + char *filename = (argc > 4) ? argv[4]: 0; + char *outfile = find_char_arg(argc, argv, "-out", 0); + int fullscreen = find_arg(argc, argv, "-fullscreen"); + char *value = getenv("UVMAsyncBench_BASE"); + char buff[256]; + sprintf(buff, "%s/workloads/realworld/standard/darknet/cfg/coco.data", value); + test_detector(buff, argv[2], argv[3], filename, thresh, .5, outfile, fullscreen); + } else if (0 == strcmp(argv[1], "cifar")){ + run_cifar(argc, argv); + } else if (0 == strcmp(argv[1], "go")){ + run_go(argc, argv); + } else if (0 == strcmp(argv[1], "rnn")){ + run_char_rnn(argc, argv); + } else if (0 == strcmp(argv[1], "coco")){ + run_coco(argc, argv); + } else if (0 == strcmp(argv[1], "classify")){ + predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5); + } else if (0 == strcmp(argv[1], "classifier")){ + run_classifier(argc, argv); + } else if (0 == strcmp(argv[1], "regressor")){ + run_regressor(argc, argv); + } else if (0 == strcmp(argv[1], "isegmenter")){ + run_isegmenter(argc, argv); + } else if (0 == strcmp(argv[1], "segmenter")){ + run_segmenter(argc, argv); + } else if (0 == strcmp(argv[1], "art")){ + run_art(argc, argv); + } else if (0 == strcmp(argv[1], "tag")){ + run_tag(argc, argv); + } else if (0 == strcmp(argv[1], "3d")){ + composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0); + } else if (0 == strcmp(argv[1], "test")){ + test_resize(argv[2]); + } else if (0 == strcmp(argv[1], "nightmare")){ + run_nightmare(argc, argv); + } else if (0 == strcmp(argv[1], "rgbgr")){ + rgbgr_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "reset")){ + reset_normalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "denormalize")){ + denormalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "statistics")){ + statistics_net(argv[2], argv[3]); + } else if (0 == strcmp(argv[1], "normalize")){ + normalize_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "rescale")){ + rescale_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "ops")){ + operations(argv[2]); + } else if (0 == strcmp(argv[1], "speed")){ + speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0); + } else if (0 == strcmp(argv[1], "oneoff")){ + oneoff(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "oneoff2")){ + oneoff2(argv[2], argv[3], argv[4], atoi(argv[5])); + } else if (0 == strcmp(argv[1], "print")){ + print_weights(argv[2], argv[3], atoi(argv[4])); + } else if (0 == strcmp(argv[1], "partial")){ + partial(argv[2], argv[3], argv[4], atoi(argv[5])); + } else if (0 == strcmp(argv[1], "average")){ + average(argc, argv); + } else if (0 == strcmp(argv[1], "visualize")){ + visualize(argv[2], (argc > 3) ? argv[3] : 0); + } else if (0 == strcmp(argv[1], "mkimg")){ + mkimg(argv[2], argv[3], atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), argv[7]); + } else if (0 == strcmp(argv[1], "imtest")){ + test_resize(argv[2]); + } else { + fprintf(stderr, "Not an option: %s\n", argv[1]); + } + finiTrace(); + return 0; +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/detector-scipy-opencv.py b/workloads/realworld/uvm_prefetch_async/darknet/examples/detector-scipy-opencv.py new file mode 100644 index 0000000000000000000000000000000000000000..3bfc591312ad89ff2b026ffac0daecd461c80447 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/detector-scipy-opencv.py @@ -0,0 +1,56 @@ +# Stupid python path shit. +# Instead just add darknet.py to somewhere in your python path +# OK actually that might not be a great idea, idk, work in progress +# Use at your own risk. or don't, i don't care + +from scipy.misc import imread +import cv2 + +def array_to_image(arr): + arr = arr.transpose(2,0,1) + c = arr.shape[0] + h = arr.shape[1] + w = arr.shape[2] + arr = (arr/255.0).flatten() + data = dn.c_array(dn.c_float, arr) + im = dn.IMAGE(w,h,c,data) + return im + +def detect2(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): + boxes = dn.make_boxes(net) + probs = dn.make_probs(net) + num = dn.num_boxes(net) + dn.network_detect(net, image, thresh, hier_thresh, nms, boxes, probs) + res = [] + for j in range(num): + for i in range(meta.classes): + if probs[j][i] > 0: + res.append((meta.names[i], probs[j][i], (boxes[j].x, boxes[j].y, boxes[j].w, boxes[j].h))) + res = sorted(res, key=lambda x: -x[1]) + dn.free_ptrs(dn.cast(probs, dn.POINTER(dn.c_void_p)), num) + return res + +import sys, os +sys.path.append(os.path.join(os.getcwd(),'python/')) + +import darknet as dn + +# Darknet +net = dn.load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0) +meta = dn.load_meta("cfg/coco.data") +r = dn.detect(net, meta, "data/dog.jpg") +print r + +# scipy +arr= imread('data/dog.jpg') +im = array_to_image(arr) +r = detect2(net, meta, im) +print r + +# OpenCV +arr = cv2.imread('data/dog.jpg') +im = array_to_image(arr) +dn.rgbgr_image(im) +r = detect2(net, meta, im) +print r + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/detector.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/detector.c new file mode 100644 index 0000000000000000000000000000000000000000..6ff1fcdff3d3d81abb458e001091cf2757b8d837 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/detector.c @@ -0,0 +1,931 @@ +#include "darknet.h" + +static int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; + + +void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + list *options = read_data_cfg(datacfg); + // Ruihao + char *train_images_cfg = option_find_str(options, "train", "data/train.list"); + char *backup_directory_cfg = option_find_str(options, "backup", "/backup/"); + + char *env = getenv("UVMAsyncBench_BASE"); + char train_images[256]; + char backup_directory[256]; + sprintf(train_images, "%s/%s", env, train_images_cfg); + sprintf(backup_directory, "%s/%s", env, backup_directory_cfg); + // Ruihao + + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network **nets = calloc(ngpus, sizeof(network)); + + srand(time(0)); + int seed = rand(); + int i; + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + data train, buffer; + + layer l = net->layers[net->n - 1]; + + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = get_base_args(net); + args.coords = l.coords; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = l.max_boxes; + args.d = &buffer; + args.type = DETECTION_DATA; + //args.type = INSTANCE_DATA; + args.threads = 64; + + pthread_t load_thread = load_data(args); + double time; + int count = 0; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + if(l.random && count++%10 == 0){ + printf("Resizing\n"); + int dim = (rand() % 10 + 10) * 32; + if (get_current_batch(net)+200 > net->max_batches) dim = 608; + //int dim = (rand() % 4 + 16) * 32; + printf("%d\n", dim); + args.w = dim; + args.h = dim; + + pthread_join(load_thread, 0); + train = buffer; + free_data(train); + load_thread = load_data(args); + + #pragma omp parallel for + for(i = 0; i < ngpus; ++i){ + resize_network(nets[i], dim, dim); + } + net = nets[0]; + } + time=what_time_is_it_now(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + /* + int k; + for(k = 0; k < l.max_boxes; ++k){ + box b = float_to_box(train.y.vals[10] + 1 + k*5); + if(!b.x) break; + printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h); + } + */ + /* + int zz; + for(zz = 0; zz < train.X.cols; ++zz){ + image im = float_to_image(net->w, net->h, 3, train.X.vals[zz]); + int k; + for(k = 0; k < l.max_boxes; ++k){ + box b = float_to_box(train.y.vals[zz] + k*5, 1); + printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + draw_bbox(im, b, 1, 1,0,0); + } + show_image(im, "truth11"); + cvWaitKey(0); + save_image(im, "truth11"); + } + */ + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + + time=what_time_is_it_now(); + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + i = get_current_batch(net); + printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, i*imgs); + if(i%100==0){ +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + if(i%10000==0 || (i < 1000 && i%100 == 0)){ +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } +#ifdef GPU + if(ngpus != 1) sync_nets(nets, ngpus, 0); +#endif + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + + +static int get_coco_image_id(char *filename) +{ + char *p = strrchr(filename, '/'); + char *c = strrchr(filename, '_'); + if(c) p = c; + return atoi(p+1); +} + +static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_boxes, int classes, int w, int h) +{ + int i, j; + int image_id = get_coco_image_id(image_path); + for(i = 0; i < num_boxes; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + float bx = xmin; + float by = ymin; + float bw = xmax - xmin; + float bh = ymax - ymin; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); + } + } +} + +void print_detector_detections(FILE **fps, char *id, detection *dets, int total, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2. + 1; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2. + 1; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2. + 1; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2. + 1; + + if (xmin < 1) xmin = 1; + if (ymin < 1) ymin = 1; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], + xmin, ymin, xmax, ymax); + } + } +} + +void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int classes, int w, int h) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + int class = j; + if (dets[i].prob[class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j+1, dets[i].prob[class], + xmin, ymin, xmax, ymax); + } + } +} + +void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char *outfile) +{ + int j; + list *options = read_data_cfg(datacfg); + char *valid_images = option_find_str(options, "valid", "data/train.list"); + char *name_list = option_find_str(options, "names", "data/names.list"); + char *prefix = option_find_str(options, "results", "results"); + char **names = get_labels(name_list); + char *mapf = option_find_str(options, "map", 0); + int *map = 0; + if (mapf) map = read_map(mapf); + + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 2); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths(valid_images); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + char *type = option_find_str(options, "eval", "voc"); + FILE *fp = 0; + FILE **fps = 0; + int coco = 0; + int imagenet = 0; + if(0==strcmp(type, "coco")){ + if(!outfile) outfile = "coco_results"; + snprintf(buff, 1024, "%s/%s.json", prefix, outfile); + fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + coco = 1; + } else if(0==strcmp(type, "imagenet")){ + if(!outfile) outfile = "imagenet-detection"; + snprintf(buff, 1024, "%s/%s.txt", prefix, outfile); + fp = fopen(buff, "w"); + imagenet = 1; + classes = 200; + } else { + if(!outfile) outfile = "comp4_det_test_"; + fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); + fps[j] = fopen(buff, "w"); + } + } + + int m = plist->size; + int i=0; + int t; + + float thresh = .005; + float nms = .45; + + int nthreads = 4; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + image input = make_image(net->w, net->h, net->c*2); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + //args.type = IMAGE_DATA; + args.type = LETTERBOX_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + double start = what_time_is_it_now(); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data, 1); + flip_image(val_resized[t]); + copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data + net->w*net->h*net->c, 1); + + network_predict(net, input.data); + int w = val[t].w; + int h = val[t].h; + int num = 0; + detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &num); + if (nms) do_nms_sort(dets, num, classes, nms); + if (coco){ + print_cocos(fp, path, dets, num, classes, w, h); + } else if (imagenet){ + print_imagenet_detections(fp, i+t-nthreads+1, dets, num, classes, w, h); + } else { + print_detector_detections(fps, id, dets, num, classes, w, h); + } + free_detections(dets, num); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + for(j = 0; j < classes; ++j){ + if(fps) fclose(fps[j]); + } + if(coco){ + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start); +} + + +void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile) +{ + int j; + list *options = read_data_cfg(datacfg); + char *valid_images = option_find_str(options, "valid", "data/train.list"); + char *name_list = option_find_str(options, "names", "data/names.list"); + char *prefix = option_find_str(options, "results", "results"); + char **names = get_labels(name_list); + char *mapf = option_find_str(options, "map", 0); + int *map = 0; + if (mapf) map = read_map(mapf); + + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths(valid_images); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + char buff[1024]; + char *type = option_find_str(options, "eval", "voc"); + FILE *fp = 0; + FILE **fps = 0; + int coco = 0; + int imagenet = 0; + if(0==strcmp(type, "coco")){ + if(!outfile) outfile = "coco_results"; + snprintf(buff, 1024, "%s/%s.json", prefix, outfile); + fp = fopen(buff, "w"); + fprintf(fp, "[\n"); + coco = 1; + } else if(0==strcmp(type, "imagenet")){ + if(!outfile) outfile = "imagenet-detection"; + snprintf(buff, 1024, "%s/%s.txt", prefix, outfile); + fp = fopen(buff, "w"); + imagenet = 1; + classes = 200; + } else { + if(!outfile) outfile = "comp4_det_test_"; + fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); + fps[j] = fopen(buff, "w"); + } + } + + + int m = plist->size; + int i=0; + int t; + + float thresh = .005; + float nms = .45; + + int nthreads = 4; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + //args.type = IMAGE_DATA; + args.type = LETTERBOX_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + double start = what_time_is_it_now(); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &nboxes); + if (nms) do_nms_sort(dets, nboxes, classes, nms); + if (coco){ + print_cocos(fp, path, dets, nboxes, classes, w, h); + } else if (imagenet){ + print_imagenet_detections(fp, i+t-nthreads+1, dets, nboxes, classes, w, h); + } else { + print_detector_detections(fps, id, dets, nboxes, classes, w, h); + } + free_detections(dets, nboxes); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + for(j = 0; j < classes; ++j){ + if(fps) fclose(fps[j]); + } + if(coco){ + fseek(fp, -2, SEEK_CUR); + fprintf(fp, "\n]\n"); + fclose(fp); + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start); +} + +void validate_detector_recall(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + list *plist = get_paths("data/coco_val_5k.list"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + + int j, k; + + int m = plist->size; + int i=0; + + float thresh = .001; + float iou_thresh = .5; + float nms = .4; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + int nboxes = 0; + detection *dets = get_network_boxes(net, sized.w, sized.h, thresh, .5, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, nboxes, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < nboxes; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < l.w*l.h*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free(id); + free_image(orig); + free_image(sized); + } +} + + +void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen) +{ + list *options = read_data_cfg(datacfg); + // Ruihao + char *name_list_cfg = option_find_str(options, "names", "data/names.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char name_list[256]; + sprintf(name_list, "%s/%s", env, name_list_cfg); + // Ruihao + char **names = get_labels(name_list); + + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + double time; + char buff[256]; + char *input = buff; + float nms=.45; + while(1){ + printf("fine name is %s\n", filename); + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = letterbox_image(im, net->w, net->h); + //image sized = resize_image(im, net->w, net->h); + //image sized2 = resize_max(im, net->w); + //image sized = crop_image(sized2, -((net->w - sized2.w)/2), -((net->h - sized2.h)/2), net->w, net->h); + //resize_network(net, sized.w, sized.h); + layer l = net->layers[net->n-1]; + + + float *X = sized.data; + time=what_time_is_it_now(); + startCPU(); + network_predict(net, X); + endCPU(); + printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time); + int nboxes = 0; + detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes); + //printf("%d\n", nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes); + free_detections(dets, nboxes); + if(outfile){ + save_image(im, outfile); + } + else{ + save_image(im, "predictions"); +#ifdef OPENCV + make_window("predictions", 512, 512, 0); + show_image(im, "predictions", 0); +#endif + } + + free_image(im); + free_image(sized); + if (filename) break; + } +} + +/* +void censor_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + CvCapture * cap; + + int w = 1280; + int h = 720; + + if(filename){ + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + } + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + cvNamedWindow(base, CV_WINDOW_NORMAL); + cvResizeWindow(base, 512, 512); + float fps = 0; + int i; + float nms = .45; + + while(1){ + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + layer l = net->layers[net->n-1]; + + float *X = in_s.data; + network_predict(net, X); + int nboxes = 0; + detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 0, &nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + + for(i = 0; i < nboxes; ++i){ + if(dets[i].prob[class] > thresh){ + box b = dets[i].bbox; + int left = b.x-b.w/2.; + int top = b.y-b.h/2.; + censor_image(in, left, top, b.w, b.h); + } + } + show_image(in, base); + cvWaitKey(10); + free_detections(dets, nboxes); + + + free_image(in_s); + free_image(in); + + + float curr = 0; + fps = .9*fps + .1*curr; + for(i = 0; i < skip; ++i){ + image in = get_image_from_stream(cap); + free_image(in); + } + } + #endif +} + +void extract_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip) +{ +#ifdef OPENCV + char *base = basecfg(cfgfile); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + CvCapture * cap; + + int w = 1280; + int h = 720; + + if(filename){ + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + } + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + cvNamedWindow(base, CV_WINDOW_NORMAL); + cvResizeWindow(base, 512, 512); + float fps = 0; + int i; + int count = 0; + float nms = .45; + + while(1){ + image in = get_image_from_stream(cap); + //image in_s = resize_image(in, net->w, net->h); + image in_s = letterbox_image(in, net->w, net->h); + layer l = net->layers[net->n-1]; + + show_image(in, base); + + int nboxes = 0; + float *X = in_s.data; + network_predict(net, X); + detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 1, &nboxes); + //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + + for(i = 0; i < nboxes; ++i){ + if(dets[i].prob[class] > thresh){ + box b = dets[i].bbox; + int size = b.w*in.w > b.h*in.h ? b.w*in.w : b.h*in.h; + int dx = b.x*in.w-size/2.; + int dy = b.y*in.h-size/2.; + image bim = crop_image(in, dx, dy, size, size); + char buff[2048]; + sprintf(buff, "results/extract/%07d", count); + ++count; + save_image(bim, buff); + free_image(bim); + } + } + free_detections(dets, nboxes); + + + free_image(in_s); + free_image(in); + + + float curr = 0; + fps = .9*fps + .1*curr; + for(i = 0; i < skip; ++i){ + image in = get_image_from_stream(cap); + free_image(in); + } + } + #endif +} +*/ + +/* +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets) +{ + network_predict_image(net, im); + layer l = net->layers[net->n-1]; + int nboxes = num_boxes(net); + fill_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 0, dets); + if (nms) do_nms_sort(dets, nboxes, l.classes, nms); +} +*/ + +void infer_detector(char *datacfg, char *cfgfile, char *weightfile) +{ + int curr = 0; + network *net = load_network(cfgfile, weightfile, 0); + srand(time(0)); + + list *options = read_data_cfg(datacfg); + + // Ruihao + char *test_list_cfg = option_find_str(options, "valid", "data/valid.list"); + char *env = getenv("UVMAsyncBench_BASE"); + char test_list[256]; + sprintf(test_list, "%s/%s", env, test_list_cfg); + // Ruihao + int classes = option_find_int(options, "classes", 2); + + list *plist = get_paths(test_list); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + m = net->max_batches * net->batch; + free_list(plist); + + clock_t time; + + data val, buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.classes = classes; + args.n = net->batch; + args.m = 0; + args.labels = 0; + args.d = &buffer; + args.type = OLD_CLASSIFICATION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(curr = net->batch; curr <= m; curr += net->batch){ + // while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + if(curr < m){ + args.paths = paths + curr; + if (curr + net->batch > m) args.n = m - curr; + load_thread = load_data_in_thread(args); + } + fprintf(stderr, "Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + + fprintf(stderr, "%lf seconds, %d images, %d total\n", sec(clock()-time), val.X.rows, curr); + free_matrix(pred); + free_data(val); + } +} + +void run_detector(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .5); + float hier_thresh = find_float_arg(argc, argv, "-hier", .5); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + int avg = find_int_arg(argc, argv, "-avg", 3); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + char *outfile = find_char_arg(argc, argv, "-out", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int clear = find_arg(argc, argv, "-clear"); + int fullscreen = find_arg(argc, argv, "-fullscreen"); + int width = find_int_arg(argc, argv, "-w", 0); + int height = find_int_arg(argc, argv, "-h", 0); + int fps = find_int_arg(argc, argv, "-fps", 0); + //int class = find_int_arg(argc, argv, "-class", 0); + + char *datacfg = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen); + else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile); + // Ruihao + else if(0==strcmp(argv[2], "infer")) infer_detector(datacfg, cfg, weights); + // Ruihao + else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile); + else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) { + list *options = read_data_cfg(datacfg); + int classes = option_find_int(options, "classes", 20); + char *name_list = option_find_str(options, "names", "data/names.list"); + char **names = get_labels(name_list); + demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen); + } + //else if(0==strcmp(argv[2], "extract")) extract_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); + //else if(0==strcmp(argv[2], "censor")) censor_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip); +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/detector.py b/workloads/realworld/uvm_prefetch_async/darknet/examples/detector.py new file mode 100644 index 0000000000000000000000000000000000000000..40bb365e68211c513db9d63847ac95070f5eab98 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/detector.py @@ -0,0 +1,27 @@ +# Stupid python path shit. +# Instead just add darknet.py to somewhere in your python path +# OK actually that might not be a great idea, idk, work in progress +# Use at your own risk. or don't, i don't care + +import sys, os +sys.path.append(os.path.join(os.getcwd(),'python/')) + +import darknet as dn +import pdb + +dn.set_gpu(0) +net = dn.load_net("cfg/yolo-thor.cfg", "/home/pjreddie/backup/yolo-thor_final.weights", 0) +meta = dn.load_meta("cfg/thor.data") +r = dn.detect(net, meta, "data/bedroom.jpg") +print r + +# And then down here you could detect a lot more images like: +r = dn.detect(net, meta, "data/eagle.jpg") +print r +r = dn.detect(net, meta, "data/giraffe.jpg") +print r +r = dn.detect(net, meta, "data/horses.jpg") +print r +r = dn.detect(net, meta, "data/person.jpg") +print r + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/dice.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/dice.c new file mode 100644 index 0000000000000000000000000000000000000000..f56d76c0bb66c7f630ba1c4d1dc9195398b87cfb --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/dice.c @@ -0,0 +1,116 @@ +#include "darknet.h" + +char *dice_labels[] = {"face1","face2","face3","face4","face5","face6"}; + +void train_dice(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + int i = *net.seen/imgs; + char **labels = dice_labels; + list *plist = get_paths("data/dice/dice.train.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + while(1){ + ++i; + time=clock(); + data train = load_data_old(paths, imgs, plist->size, labels, 6, net.w, net.h); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); + free_data(train); + if((i % 100) == 0) net.learning_rate *= .1; + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, i); + save_weights(net, buff); + } + } +} + +void validate_dice(char *filename, char *weightfile) +{ + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + + char **labels = dice_labels; + list *plist = get_paths("data/dice/dice.val.list"); + + char **paths = (char **)list_to_array(plist); + int m = plist->size; + free_list(plist); + + data val = load_data_old(paths, m, 0, labels, 6, net.w, net.h); + float *acc = network_accuracies(net, val, 2); + printf("Validation Accuracy: %f, %d images\n", acc[0], m); + free_data(val); +} + +void test_dice(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + int i = 0; + char **names = dice_labels; + char buff[256]; + char *input = buff; + int indexes[6]; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, net.w, net.h); + float *X = im.data; + float *predictions = network_predict(net, X); + top_predictions(net, 6, indexes); + for(i = 0; i < 6; ++i){ + int index = indexes[i]; + printf("%s: %f\n", names[index], predictions[index]); + } + free_image(im); + if (filename) break; + } +} + +void run_dice(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "test")) test_dice(cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_dice(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_dice(cfg, weights); +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/go.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/go.c new file mode 100644 index 0000000000000000000000000000000000000000..688579dcb3a3e35e9a79b8fb8aa684f28f44290d --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/go.c @@ -0,0 +1,1370 @@ +#include "darknet.h" + +#include +#include +#include + +int inverted = 1; +int noi = 1; +static const int nind = 10; +int legal_go(float *b, float *ko, int p, int r, int c); +int check_ko(float *x, float *ko); + +typedef struct { + char **data; + int n; +} moves; + +char *fgetgo(FILE *fp) +{ + if(feof(fp)) return 0; + size_t size = 96; + char *line = malloc(size*sizeof(char)); + if(size != fread(line, sizeof(char), size, fp)){ + free(line); + return 0; + } + + return line; +} + +moves load_go_moves(char *filename) +{ + moves m; + m.n = 128; + m.data = calloc(128, sizeof(char*)); + FILE *fp = fopen(filename, "rb"); + int count = 0; + char *line = 0; + while ((line = fgetgo(fp))) { + if (count >= m.n) { + m.n *= 2; + m.data = realloc(m.data, m.n*sizeof(char*)); + } + m.data[count] = line; + ++count; + } + printf("%d\n", count); + m.n = count; + m.data = realloc(m.data, count*sizeof(char*)); + return m; +} + +void string_to_board(char *s, float *board) +{ + int i, j; + memset(board, 0, 2*19*19*sizeof(float)); + int count = 0; + for(i = 0; i < 91; ++i){ + char c = s[i]; + for(j = 0; j < 4; ++j){ + int me = (c >> (2*j)) & 1; + int you = (c >> (2*j + 1)) & 1; + if (me) board[count] = 1; + else if (you) board[count + 19*19] = 1; + ++count; + if(count >= 19*19) break; + } + } +} + +void board_to_string(char *s, float *board) +{ + int i, j; + memset(s, 0, (19*19/4+1)*sizeof(char)); + int count = 0; + for(i = 0; i < 91; ++i){ + for(j = 0; j < 4; ++j){ + int me = (board[count] == 1); + int you = (board[count + 19*19] == 1); + if (me) s[i] = s[i] | (1<<(2*j)); + if (you) s[i] = s[i] | (1<<(2*j + 1)); + ++count; + if(count >= 19*19) break; + } + } +} + +static int occupied(float *b, int i) +{ + if (b[i]) return 1; + if (b[i+19*19]) return -1; + return 0; +} + +data random_go_moves(moves m, int n) +{ + data d = {0}; + d.X = make_matrix(n, 19*19*3); + d.y = make_matrix(n, 19*19+2); + int i, j; + for(i = 0; i < n; ++i){ + float *board = d.X.vals[i]; + float *label = d.y.vals[i]; + char *b = m.data[rand()%m.n]; + int player = b[0] - '0'; + int result = b[1] - '0'; + int row = b[2]; + int col = b[3]; + string_to_board(b+4, board); + if(player > 0) for(j = 0; j < 19*19; ++j) board[19*19*2 + j] = 1; + label[19*19+1] = (player==result); + if(row >= 19 || col >= 19){ + label[19*19] = 1; + } else { + label[col + 19*row] = 1; + if(occupied(board, col + 19*row)) printf("hey\n"); + } + + int flip = rand()%2; + int rotate = rand()%4; + image in = float_to_image(19, 19, 3, board); + image out = float_to_image(19, 19, 1, label); + if(flip){ + flip_image(in); + flip_image(out); + } + rotate_image_cw(in, rotate); + rotate_image_cw(out, rotate); + } + return d; +} + + +void train_go(char *cfgfile, char *weightfile, char *filename, int *gpus, int ngpus, int clear) +{ + int i; + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + network *net = nets[0]; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + char *backup_directory = "/home/pjreddie/backup/"; + + char buff[256]; + moves m = load_go_moves(filename); + //moves m = load_go_moves("games.txt"); + + int N = m.n; + printf("Moves: %d\n", N); + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time=what_time_is_it_now(); + + data train = random_go_moves(m, net->batch*net->subdivisions*ngpus); + printf("Loaded: %lf seconds\n", what_time_is_it_now() - time); + time=what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 10); + } +#else + loss = train_network(net, train); +#endif + free_data(train); + + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.95 + loss*.05; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory,base, epoch); + save_weights(net, buff); + + } + if(get_current_batch(net)%1000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + if(get_current_batch(net)%10000 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_%ld.backup",backup_directory,base,get_current_batch(net)); + save_weights(net, buff); + } + } + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free(base); +} + +static void propagate_liberty(float *board, int *lib, int *visited, int row, int col, int side) +{ + if (row < 0 || row > 18 || col < 0 || col > 18) return; + int index = row*19 + col; + if (occupied(board,index) != side) return; + if (visited[index]) return; + visited[index] = 1; + lib[index] += 1; + propagate_liberty(board, lib, visited, row+1, col, side); + propagate_liberty(board, lib, visited, row-1, col, side); + propagate_liberty(board, lib, visited, row, col+1, side); + propagate_liberty(board, lib, visited, row, col-1, side); +} + + +static int *calculate_liberties(float *board) +{ + int *lib = calloc(19*19, sizeof(int)); + int visited[19*19]; + int i, j; + for(j = 0; j < 19; ++j){ + for(i = 0; i < 19; ++i){ + memset(visited, 0, 19*19*sizeof(int)); + int index = j*19 + i; + if(!occupied(board,index)){ + if ((i > 0) && occupied(board,index - 1)) propagate_liberty(board, lib, visited, j, i-1, occupied(board,index-1)); + if ((i < 18) && occupied(board,index + 1)) propagate_liberty(board, lib, visited, j, i+1, occupied(board,index+1)); + if ((j > 0) && occupied(board,index - 19)) propagate_liberty(board, lib, visited, j-1, i, occupied(board,index-19)); + if ((j < 18) && occupied(board,index + 19)) propagate_liberty(board, lib, visited, j+1, i, occupied(board,index+19)); + } + } + } + return lib; +} + +void print_board(FILE *stream, float *board, int player, int *indexes) +{ + int i,j,n; + fprintf(stream, " "); + for(i = 0; i < 19; ++i){ + fprintf(stream, "%c ", 'A' + i + 1*(i > 7 && noi)); + } + fprintf(stream, "\n"); + for(j = 0; j < 19; ++j){ + fprintf(stream, "%2d", (inverted) ? 19-j : j+1); + for(i = 0; i < 19; ++i){ + int index = j*19 + i; + if(indexes){ + int found = 0; + for(n = 0; n < nind; ++n){ + if(index == indexes[n]){ + found = 1; + /* + if(n == 0) fprintf(stream, "\uff11"); + else if(n == 1) fprintf(stream, "\uff12"); + else if(n == 2) fprintf(stream, "\uff13"); + else if(n == 3) fprintf(stream, "\uff14"); + else if(n == 4) fprintf(stream, "\uff15"); + */ + fprintf(stream, " %d", n+1); + } + } + if(found) continue; + } + //if(board[index]*-swap > 0) fprintf(stream, "\u25C9 "); + //else if(board[index]*-swap < 0) fprintf(stream, "\u25EF "); + if (occupied(board, index) == player) fprintf(stream, " X"); + else if (occupied(board, index) ==-player) fprintf(stream, " O"); + else fprintf(stream, " ."); + } + fprintf(stream, "\n"); + } +} + +void flip_board(float *board) +{ + int i; + for(i = 0; i < 19*19; ++i){ + float swap = board[i]; + board[i] = board[i+19*19]; + board[i+19*19] = swap; + board[i+19*19*2] = 1-board[i+19*19*2]; + } +} + +float predict_move2(network *net, float *board, float *move, int multi) +{ + float *output = network_predict(net, board); + copy_cpu(19*19+1, output, 1, move, 1); + float result = output[19*19 + 1]; + int i; + if(multi){ + image bim = float_to_image(19, 19, 3, board); + for(i = 1; i < 8; ++i){ + rotate_image_cw(bim, i); + if(i >= 4) flip_image(bim); + + float *output = network_predict(net, board); + image oim = float_to_image(19, 19, 1, output); + result += output[19*19 + 1]; + + if(i >= 4) flip_image(oim); + rotate_image_cw(oim, -i); + + axpy_cpu(19*19+1, 1, output, 1, move, 1); + + if(i >= 4) flip_image(bim); + rotate_image_cw(bim, -i); + } + result = result/8; + scal_cpu(19*19+1, 1./8., move, 1); + } + for(i = 0; i < 19*19; ++i){ + if(board[i] || board[i+19*19]) move[i] = 0; + } + return result; +} + +static void remove_connected(float *b, int *lib, int p, int r, int c) +{ + if (r < 0 || r >= 19 || c < 0 || c >= 19) return; + if (occupied(b, r*19 + c) != p) return; + if (lib[r*19 + c] != 1) return; + b[r*19 + c] = 0; + b[19*19 + r*19 + c] = 0; + remove_connected(b, lib, p, r+1, c); + remove_connected(b, lib, p, r-1, c); + remove_connected(b, lib, p, r, c+1); + remove_connected(b, lib, p, r, c-1); +} + + +void move_go(float *b, int p, int r, int c) +{ + int *l = calculate_liberties(b); + if(p > 0) b[r*19 + c] = 1; + else b[19*19 + r*19 + c] = 1; + remove_connected(b, l, -p, r+1, c); + remove_connected(b, l, -p, r-1, c); + remove_connected(b, l, -p, r, c+1); + remove_connected(b, l, -p, r, c-1); + free(l); +} + +int compare_board(float *a, float *b) +{ + if(memcmp(a, b, 19*19*3*sizeof(float)) == 0) return 1; + return 0; +} + +typedef struct mcts_tree{ + float *board; + struct mcts_tree **children; + float *prior; + int *visit_count; + float *value; + float *mean; + float *prob; + int total_count; + float result; + int done; + int pass; +} mcts_tree; + +void free_mcts(mcts_tree *root) +{ + if(!root) return; + int i; + free(root->board); + for(i = 0; i < 19*19+1; ++i){ + if(root->children[i]) free_mcts(root->children[i]); + } + free(root->children); + free(root->prior); + free(root->visit_count); + free(root->value); + free(root->mean); + free(root->prob); + free(root); +} + +float *network_predict_rotations(network *net, float *next) +{ + int n = net->batch; + float *in = calloc(19*19*3*n, sizeof(float)); + image im = float_to_image(19, 19, 3, next); + int i,j; + int *inds = random_index_order(0, 8); + for(j = 0; j < n; ++j){ + i = inds[j]; + rotate_image_cw(im, i); + if(i >= 4) flip_image(im); + memcpy(in + 19*19*3*j, im.data, 19*19*3*sizeof(float)); + if(i >= 4) flip_image(im); + rotate_image_cw(im, -i); + } + float *pred = network_predict(net, in); + for(j = 0; j < n; ++j){ + i = inds[j]; + image im = float_to_image(19, 19, 1, pred + j*(19*19 + 2)); + if(i >= 4) flip_image(im); + rotate_image_cw(im, -i); + if(j > 0){ + axpy_cpu(19*19+2, 1, im.data, 1, pred, 1); + } + } + free(in); + free(inds); + scal_cpu(19*19+2, 1./n, pred, 1); + return pred; +} + +mcts_tree *expand(float *next, float *ko, network *net) +{ + mcts_tree *root = calloc(1, sizeof(mcts_tree)); + root->board = next; + root->children = calloc(19*19+1, sizeof(mcts_tree*)); + root->prior = calloc(19*19 + 1, sizeof(float)); + root->prob = calloc(19*19 + 1, sizeof(float)); + root->mean = calloc(19*19 + 1, sizeof(float)); + root->value = calloc(19*19 + 1, sizeof(float)); + root->visit_count = calloc(19*19 + 1, sizeof(int)); + root->total_count = 1; + int i; + float *pred = network_predict_rotations(net, next); + copy_cpu(19*19+1, pred, 1, root->prior, 1); + float val = 2*pred[19*19 + 1] - 1; + root->result = val; + for(i = 0; i < 19*19+1; ++i) { + root->visit_count[i] = 0; + root->value[i] = 0; + root->mean[i] = val; + if(i < 19*19 && occupied(next, i)){ + root->value[i] = -1; + root->mean[i] = -1; + root->prior[i] = 0; + } + } + //print_board(stderr, next, flip?-1:1, 0); + return root; +} + +float *copy_board(float *board) +{ + float *next = calloc(19*19*3, sizeof(float)); + copy_cpu(19*19*3, board, 1, next, 1); + return next; +} + +float select_mcts(mcts_tree *root, network *net, float *prev, float cpuct) +{ + if(root->done) return -root->result; + int i; + float max = -1000; + int max_i = 0; + for(i = 0; i < 19*19+1; ++i){ + root->prob[i] = root->mean[i] + cpuct*root->prior[i] * sqrt(root->total_count) / (1. + root->visit_count[i]); + if(root->prob[i] > max){ + max = root->prob[i]; + max_i = i; + } + } + float val; + i = max_i; + root->visit_count[i]++; + root->total_count++; + if (root->children[i]) { + val = select_mcts(root->children[i], net, root->board, cpuct); + } else { + if(max_i < 19*19 && !legal_go(root->board, prev, 1, max_i/19, max_i%19)) { + root->mean[i] = -1; + root->value[i] = -1; + root->prior[i] = 0; + --root->total_count; + return select_mcts(root, net, prev, cpuct); + //printf("Detected ko\n"); + //getchar(); + } else { + float *next = copy_board(root->board); + if (max_i < 19*19) { + move_go(next, 1, max_i / 19, max_i % 19); + } + flip_board(next); + root->children[i] = expand(next, root->board, net); + val = -root->children[i]->result; + if(max_i == 19*19){ + root->children[i]->pass = 1; + if (root->pass){ + root->children[i]->done = 1; + } + } + } + } + root->value[i] += val; + root->mean[i] = root->value[i]/root->visit_count[i]; + return -val; +} + +mcts_tree *run_mcts(mcts_tree *tree, network *net, float *board, float *ko, int player, int n, float cpuct, float secs) +{ + int i; + double t = what_time_is_it_now(); + if(player < 0) flip_board(board); + if(!tree) tree = expand(copy_board(board), ko, net); + assert(compare_board(tree->board, board)); + for(i = 0; i < n; ++i){ + if (secs > 0 && (what_time_is_it_now() - t) > secs) break; + int max_i = max_int_index(tree->visit_count, 19*19+1); + if (tree->visit_count[max_i] >= n) break; + select_mcts(tree, net, ko, cpuct); + } + if(player < 0) flip_board(board); + //fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + return tree; +} + +mcts_tree *move_mcts(mcts_tree *tree, int index) +{ + if(index < 0 || index > 19*19 || !tree || !tree->children[index]) { + free_mcts(tree); + tree = 0; + } else { + mcts_tree *swap = tree; + tree = tree->children[index]; + swap->children[index] = 0; + free_mcts(swap); + } + return tree; +} + +typedef struct { + float value; + float mcts; + int row; + int col; +} move; + +move pick_move(mcts_tree *tree, float temp, int player) +{ + int i; + float probs[19*19+1] = {0}; + move m = {0}; + double sum = 0; + /* + for(i = 0; i < 19*19+1; ++i){ + probs[i] = tree->visit_count[i]; + } + */ + //softmax(probs, 19*19+1, temp, 1, probs); + for(i = 0; i < 19*19+1; ++i){ + sum += pow(tree->visit_count[i], 1./temp); + } + for(i = 0; i < 19*19+1; ++i){ + probs[i] = pow(tree->visit_count[i], 1./temp) / sum; + } + + int index = sample_array(probs, 19*19+1); + m.row = index / 19; + m.col = index % 19; + m.value = (tree->result+1.)/2.; + m.mcts = (tree->mean[index]+1.)/2.; + + int indexes[nind]; + top_k(probs, 19*19+1, nind, indexes); + print_board(stderr, tree->board, player, indexes); + + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", index/19, index%19, tree->result, tree->prior[index], probs[index], tree->mean[index], (tree->children[index])?tree->children[index]->result:0, tree->visit_count[index]); + int ind = max_index(probs, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, tree->result, tree->prior[ind], probs[ind], tree->mean[ind], (tree->children[ind])?tree->children[ind]->result:0, tree->visit_count[ind]); + ind = max_index(tree->prior, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, tree->result, tree->prior[ind], probs[ind], tree->mean[ind], (tree->children[ind])?tree->children[ind]->result:0, tree->visit_count[ind]); + return m; +} + +/* + float predict_move(network *net, float *board, float *move, int multi, float *ko, float temp) + { + + int i; + + int max_v = 0; + int max_i = 0; + for(i = 0; i < 19*19+1; ++i){ + if(root->visit_count[i] > max_v){ + max_v = root->visit_count[i]; + max_i = i; + } + } + fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + int ind = max_index(root->mean, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", max_i/19, max_i%19, root->result, root->prior[max_i], root->prob[max_i], root->mean[max_i], (root->children[max_i])?root->children[max_i]->result:0, root->visit_count[max_i]); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, root->result, root->prior[ind], root->prob[ind], root->mean[ind], (root->children[ind])?root->children[ind]->result:0, root->visit_count[ind]); + ind = max_index(root->prior, 19*19+1); + fprintf(stderr, "%d %d, Result: %f, Prior: %f, Prob: %f, Mean Value: %f, Child Result: %f, Visited: %d\n", ind/19, ind%19, root->result, root->prior[ind], root->prob[ind], root->mean[ind], (root->children[ind])?root->children[ind]->result:0, root->visit_count[ind]); + if(root->result < -.9 && root->mean[max_i] < -.9) return -1000.f; + + float val = root->result; + free_mcts(root); + return val; + } + */ + +static int makes_safe_go(float *b, int *lib, int p, int r, int c){ + if (r < 0 || r >= 19 || c < 0 || c >= 19) return 0; + if (occupied(b,r*19 + c) == -p){ + if (lib[r*19 + c] > 1) return 0; + else return 1; + } + if (!occupied(b,r*19 + c)) return 1; + if (lib[r*19 + c] > 1) return 1; + return 0; +} + +int suicide_go(float *b, int p, int r, int c) +{ + int *l = calculate_liberties(b); + int safe = 0; + safe = safe || makes_safe_go(b, l, p, r+1, c); + safe = safe || makes_safe_go(b, l, p, r-1, c); + safe = safe || makes_safe_go(b, l, p, r, c+1); + safe = safe || makes_safe_go(b, l, p, r, c-1); + free(l); + return !safe; +} + +int check_ko(float *x, float *ko) +{ + if(!ko) return 0; + float curr[19*19*3]; + copy_cpu(19*19*3, x, 1, curr, 1); + if(curr[19*19*2] != ko[19*19*2]) flip_board(curr); + if(compare_board(curr, ko)) return 1; + return 0; +} + +int legal_go(float *b, float *ko, int p, int r, int c) +{ + if (occupied(b, r*19+c)) return 0; + float curr[19*19*3]; + copy_cpu(19*19*3, b, 1, curr, 1); + move_go(curr, p, r, c); + if(check_ko(curr, ko)) return 0; + if(suicide_go(b, p, r, c)) return 0; + return 1; +} + +/* + move generate_move(mcts_tree *root, network *net, int player, float *board, int multi, float temp, float *ko, int print) + { + move m = {0}; +//root = run_mcts(tree, network *net, float *board, float *ko, int n, float cpuct) +int i, j; +int empty = 1; +for(i = 0; i < 19*19; ++i){ +if (occupied(board, i)) { +empty = 0; +break; +} +} +if(empty) { +m.value = .5; +m.mcts = .5; +m.row = 3; +m.col = 15; +return m; +} + +float move[362]; +if (player < 0) flip_board(board); +float result = predict_move(net, board, move, multi, ko, temp); +if (player < 0) flip_board(board); +if(result == -1000.f) return -2; + +for(i = 0; i < 19; ++i){ +for(j = 0; j < 19; ++j){ +if (!legal_go(board, ko, player, i, j)) move[i*19 + j] = 0; +} +} + +int indexes[nind]; +top_k(move, 19*19+1, nind, indexes); + + +int max = max_index(move, 19*19+1); +int row = max / 19; +int col = max % 19; +int index = sample_array(move, 19*19+1); + +if(print){ +top_k(move, 19*19+1, nind, indexes); +for(i = 0; i < nind; ++i){ +if (!move[indexes[i]]) indexes[i] = -1; +} +print_board(stderr, board, 1, indexes); +fprintf(stderr, "%s To Move\n", player > 0 ? "X" : "O"); +fprintf(stderr, "%.2f%% Win Chance\n", (result+1)/2*100); +for(i = 0; i < nind; ++i){ +int index = indexes[i]; +int row = index / 19; +int col = index % 19; +if(row == 19){ +fprintf(stderr, "%d: Pass, %.2f%%\n", i+1, move[index]*100); +} else { +fprintf(stderr, "%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100); +} +} +} +if (row == 19) return -1; + +if (suicide_go(board, player, row, col)){ +return -1; +} + +if (suicide_go(board, player, index/19, index%19)){ +index = max; +} +if (index == 19*19) return -1; +return index; +} +*/ + +void valid_go(char *cfgfile, char *weightfile, int multi, char *filename) +{ + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + + float *board = calloc(19*19*3, sizeof(float)); + float *move = calloc(19*19+2, sizeof(float)); + // moves m = load_go_moves("/home/pjreddie/backup/go.test"); + moves m = load_go_moves(filename); + + int N = m.n; + int i,j; + int correct = 0; + for (i = 0; i 0) for(j = 0; j < 19*19; ++j) board[19*19*2 + j] = 1; + predict_move2(net, board, move, multi); + int index = max_index(move, 19*19+1); + if(index == truth) ++correct; + printf("%d Accuracy %f\n", i, (float) correct/(i+1)); + } +} + +int print_game(float *board, FILE *fp) +{ + int i, j; + int count = 3; + fprintf(fp, "komi 6.5\n"); + fprintf(fp, "boardsize 19\n"); + fprintf(fp, "clear_board\n"); + for(j = 0; j < 19; ++j){ + for(i = 0; i < 19; ++i){ + if(occupied(board,j*19 + i) == 1) fprintf(fp, "play black %c%d\n", 'A'+i+(i>=8), 19-j); + if(occupied(board,j*19 + i) == -1) fprintf(fp, "play white %c%d\n", 'A'+i+(i>=8), 19-j); + if(occupied(board,j*19 + i)) ++count; + } + } + return count; +} + + +int stdin_ready() +{ + fd_set readfds; + FD_ZERO(&readfds); + + struct timeval timeout; + timeout.tv_sec = 0; + timeout.tv_usec = 0; + FD_SET(STDIN_FILENO, &readfds); + + if (select(1, &readfds, NULL, NULL, &timeout)){ + return 1; + } + return 0; +} + +mcts_tree *ponder(mcts_tree *tree, network *net, float *b, float *ko, int player, float cpuct) +{ + double t = what_time_is_it_now(); + int count = 0; + if (tree) count = tree->total_count; + while(!stdin_ready()){ + if (what_time_is_it_now() - t > 120) break; + tree = run_mcts(tree, net, b, ko, player, 100000, cpuct, .1); + } + fprintf(stderr, "Pondered %d moves...\n", tree->total_count - count); + return tree; +} + +void engine_go(char *filename, char *weightfile, int mcts_iters, float secs, float temp, float cpuct, int anon, int resign) +{ + mcts_tree *root = 0; + network *net = load_network(filename, weightfile, 0); + set_batch_network(net, 1); + srand(time(0)); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *one = calloc(19*19*3, sizeof(float)); + float *two = calloc(19*19*3, sizeof(float)); + int ponder_player = 0; + int passed = 0; + int move_num = 0; + int main_time = 0; + int byo_yomi_time = 0; + int byo_yomi_stones = 0; + int black_time_left = 0; + int black_stones_left = 0; + int white_time_left = 0; + int white_stones_left = 0; + float orig_time = secs; + int old_ponder = 0; + while(1){ + if(ponder_player){ + root = ponder(root, net, board, two, ponder_player, cpuct); + } + old_ponder = ponder_player; + ponder_player = 0; + char buff[256]; + int id = 0; + int has_id = (scanf("%d", &id) == 1); + scanf("%s", buff); + if (feof(stdin)) break; + fprintf(stderr, "%s\n", buff); + char ids[256]; + sprintf(ids, "%d", id); + //fprintf(stderr, "%s\n", buff); + if (!has_id) ids[0] = 0; + if (!strcmp(buff, "protocol_version")){ + printf("=%s 2\n\n", ids); + } else if (!strcmp(buff, "name")){ + if(anon){ + printf("=%s The Fool!\n\n", ids); + }else{ + printf("=%s DarkGo\n\n", ids); + } + } else if (!strcmp(buff, "time_settings")){ + ponder_player = old_ponder; + scanf("%d %d %d", &main_time, &byo_yomi_time, &byo_yomi_stones); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "time_left")){ + ponder_player = old_ponder; + char color[256]; + int time = 0, stones = 0; + scanf("%s %d %d", color, &time, &stones); + if (color[0] == 'b' || color[0] == 'B'){ + black_time_left = time; + black_stones_left = stones; + } else { + white_time_left = time; + white_stones_left = stones; + } + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "version")){ + if(anon){ + printf("=%s :-DDDD\n\n", ids); + }else { + printf("=%s 1.0. Want more DarkGo? You can find me on OGS, unlimited games, no waiting! https://online-go.com/user/view/434218\n\n", ids); + } + } else if (!strcmp(buff, "known_command")){ + char comm[256]; + scanf("%s", comm); + int known = (!strcmp(comm, "protocol_version") || + !strcmp(comm, "name") || + !strcmp(comm, "version") || + !strcmp(comm, "known_command") || + !strcmp(comm, "list_commands") || + !strcmp(comm, "quit") || + !strcmp(comm, "boardsize") || + !strcmp(comm, "clear_board") || + !strcmp(comm, "komi") || + !strcmp(comm, "final_status_list") || + !strcmp(comm, "play") || + !strcmp(comm, "genmove_white") || + !strcmp(comm, "genmove_black") || + !strcmp(comm, "fixed_handicap") || + !strcmp(comm, "genmove")); + if(known) printf("=%s true\n\n", ids); + else printf("=%s false\n\n", ids); + } else if (!strcmp(buff, "list_commands")){ + printf("=%s protocol_version\nshowboard\nname\nversion\nknown_command\nlist_commands\nquit\nboardsize\nclear_board\nkomi\nplay\ngenmove_black\ngenmove_white\ngenmove\nfinal_status_list\nfixed_handicap\n\n", ids); + } else if (!strcmp(buff, "quit")){ + break; + } else if (!strcmp(buff, "boardsize")){ + int boardsize = 0; + scanf("%d", &boardsize); + //fprintf(stderr, "%d\n", boardsize); + if(boardsize != 19){ + printf("?%s unacceptable size\n\n", ids); + } else { + root = move_mcts(root, -1); + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + move_num = 0; + printf("=%s \n\n", ids); + } + } else if (!strcmp(buff, "fixed_handicap")){ + int handicap = 0; + scanf("%d", &handicap); + int indexes[] = {72, 288, 300, 60, 180, 174, 186, 66, 294}; + int i; + for(i = 0; i < handicap; ++i){ + board[indexes[i]] = 1; + ++move_num; + } + root = move_mcts(root, -1); + } else if (!strcmp(buff, "clear_board")){ + passed = 0; + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + move_num = 0; + root = move_mcts(root, -1); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "komi")){ + float komi = 0; + scanf("%f", &komi); + printf("=%s \n\n", ids); + } else if (!strcmp(buff, "showboard")){ + printf("=%s \n", ids); + print_board(stdout, board, 1, 0); + printf("\n"); + } else if (!strcmp(buff, "play") || !strcmp(buff, "black") || !strcmp(buff, "white")){ + ++move_num; + char color[256]; + if(!strcmp(buff, "play")) + { + scanf("%s ", color); + } else { + scanf(" "); + color[0] = buff[0]; + } + char c; + int r; + int count = scanf("%c%d", &c, &r); + int player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1; + if((c == 'p' || c == 'P') && count < 2) { + passed = 1; + printf("=%s \n\n", ids); + char *line = fgetl(stdin); + free(line); + fflush(stdout); + fflush(stderr); + root = move_mcts(root, 19*19); + continue; + } else { + passed = 0; + } + if(c >= 'A' && c <= 'Z') c = c - 'A'; + if(c >= 'a' && c <= 'z') c = c - 'a'; + if(c >= 8) --c; + r = 19 - r; + fprintf(stderr, "move: %d %d\n", r, c); + + float *swap = two; + two = one; + one = swap; + move_go(board, player, r, c); + copy_cpu(19*19*3, board, 1, one, 1); + if(root) fprintf(stderr, "Prior: %f\n", root->prior[r*19 + c]); + if(root) fprintf(stderr, "Mean: %f\n", root->mean[r*19 + c]); + if(root) fprintf(stderr, "Result: %f\n", root->result); + root = move_mcts(root, r*19 + c); + if(root) fprintf(stderr, "Visited: %d\n", root->total_count); + else fprintf(stderr, "NOT VISITED\n"); + + printf("=%s \n\n", ids); + //print_board(stderr, board, 1, 0); + } else if (!strcmp(buff, "genmove") || !strcmp(buff, "genmove_black") || !strcmp(buff, "genmove_white")){ + ++move_num; + int player = 0; + if(!strcmp(buff, "genmove")){ + char color[256]; + scanf("%s", color); + player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1; + } else if (!strcmp(buff, "genmove_black")){ + player = 1; + } else { + player = -1; + } + if(player > 0){ + if(black_time_left <= 30) secs = 2.5; + else secs = orig_time; + } else { + if(white_time_left <= 30) secs = 2.5; + else secs = orig_time; + } + ponder_player = -player; + + //tree = generate_move(net, player, board, multi, .1, two, 1); + double t = what_time_is_it_now(); + root = run_mcts(root, net, board, two, player, mcts_iters, cpuct, secs); + fprintf(stderr, "%f Seconds\n", what_time_is_it_now() - t); + move m = pick_move(root, temp, player); + root = move_mcts(root, m.row*19 + m.col); + + + if(move_num > resign && m.value < .1 && m.mcts < .1){ + printf("=%s resign\n\n", ids); + } else if(m.row == 19){ + printf("=%s pass\n\n", ids); + passed = 0; + } else { + int row = m.row; + int col = m.col; + + float *swap = two; + two = one; + one = swap; + + move_go(board, player, row, col); + copy_cpu(19*19*3, board, 1, one, 1); + row = 19 - row; + if (col >= 8) ++col; + printf("=%s %c%d\n\n", ids, 'A' + col, row); + } + + } else if (!strcmp(buff, "p")){ + //print_board(board, 1, 0); + } else if (!strcmp(buff, "final_status_list")){ + char type[256]; + scanf("%s", type); + fprintf(stderr, "final_status\n"); + char *line = fgetl(stdin); + free(line); + if(type[0] == 'd' || type[0] == 'D'){ + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "%s final_status_list dead\n", ids); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + while((l = fgetl(p))){ + printf("%s\n", l); + free(l); + } + } else { + printf("?%s unknown command\n\n", ids); + } + } else if (!strcmp(buff, "kgs-genmove_cleanup")){ + char type[256]; + scanf("%s", type); + fprintf(stderr, "kgs-genmove_cleanup\n"); + char *line = fgetl(stdin); + free(line); + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "%s kgs-genmove_cleanup %s\n", ids, type); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + while((l = fgetl(p))){ + printf("%s\n", l); + free(l); + } + } else { + char *line = fgetl(stdin); + free(line); + printf("?%s unknown command\n\n", ids); + } + fflush(stdout); + fflush(stderr); + } + printf("%d %d %d\n",passed, black_stones_left, white_stones_left); +} + +void test_go(char *cfg, char *weights, int multi) +{ + int i; + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(time(0)); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *move = calloc(19*19+1, sizeof(float)); + int color = 1; + while(1){ + float result = predict_move2(net, board, move, multi); + printf("%.2f%% Win Chance\n", (result+1)/2*100); + + int indexes[nind]; + int row, col; + top_k(move, 19*19+1, nind, indexes); + print_board(stderr, board, color, indexes); + for(i = 0; i < nind; ++i){ + int index = indexes[i]; + row = index / 19; + col = index % 19; + if(row == 19){ + printf("%d: Pass, %.2f%%\n", i+1, move[index]*100); + } else { + printf("%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100); + } + } + //if(color == 1) printf("\u25EF Enter move: "); + //else printf("\u25C9 Enter move: "); + if(color == 1) printf("X Enter move: "); + else printf("O Enter move: "); + + char c; + char *line = fgetl(stdin); + int picked = 1; + int dnum = sscanf(line, "%d", &picked); + int cnum = sscanf(line, "%c", &c); + if (strlen(line) == 0 || dnum) { + --picked; + if (picked < nind){ + int index = indexes[picked]; + row = index / 19; + col = index % 19; + if(row < 19){ + move_go(board, 1, row, col); + } + } + } else if (cnum){ + if (c <= 'T' && c >= 'A'){ + int num = sscanf(line, "%c %d", &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 2) move_go(board, 1, row, col); + } else if (c == 'p') { + // Pass + } else if(c=='b' || c == 'w'){ + char g; + int num = sscanf(line, "%c %c %d", &g, &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 3) { + int mc = (g == 'b') ? 1 : -1; + if (mc == color) { + board[row*19 + col] = 1; + } else { + board[19*19 + row*19 + col] = 1; + } + } + } else if(c == 'c'){ + char g; + int num = sscanf(line, "%c %c %d", &g, &c, &row); + row = (inverted)?19 - row : row-1; + col = c - 'A'; + if (col > 7 && noi) col -= 1; + if (num == 3) { + board[row*19 + col] = 0; + board[19*19 + row*19 + col] = 0; + } + } + } + free(line); + flip_board(board); + color = -color; + } +} + +float score_game(float *board) +{ + int i; + FILE *f = fopen("game.txt", "w"); + int count = print_game(board, f); + fprintf(f, "final_score\n"); + fclose(f); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + for(i = 0; i < count; ++i){ + free(fgetl(p)); + free(fgetl(p)); + } + char *l = 0; + float score = 0; + char player = 0; + while((l = fgetl(p))){ + fprintf(stderr, "%s \t", l); + int n = sscanf(l, "= %c+%f", &player, &score); + free(l); + if (n == 2) break; + } + if(player == 'W') score = -score; + pclose(p); + return score; +} + +void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi) +{ + mcts_tree *tree1 = 0; + mcts_tree *tree2 = 0; + network *net = load_network(filename, weightfile, 0); + //set_batch_network(net, 1); + + network *net2; + if (f2) { + net2 = parse_network_cfg(f2); + if(w2){ + load_weights(net2, w2); + } + } else { + net2 = calloc(1, sizeof(network)); + *net2 = *net; + } + srand(time(0)); + char boards[600][93]; + int count = 0; + //set_batch_network(net, 1); + //set_batch_network(net2, 1); + float *board = calloc(19*19*3, sizeof(float)); + flip_board(board); + float *one = calloc(19*19*3, sizeof(float)); + float *two = calloc(19*19*3, sizeof(float)); + int done = 0; + int player = 1; + int p1 = 0; + int p2 = 0; + int total = 0; + float temp = .1; + int mcts_iters = 500; + float cpuct = 5; + while(1){ + if (done){ + tree1 = move_mcts(tree1, -1); + tree2 = move_mcts(tree2, -1); + float score = score_game(board); + if((score > 0) == (total%2==0)) ++p1; + else ++p2; + ++total; + fprintf(stderr, "Total: %d, Player 1: %f, Player 2: %f\n", total, (float)p1/total, (float)p2/total); + sleep(1); + /* + int i = (score > 0)? 0 : 1; + int j; + for(; i < count; i += 2){ + for(j = 0; j < 93; ++j){ + printf("%c", boards[i][j]); + } + printf("\n"); + } + */ + memset(board, 0, 3*19*19*sizeof(float)); + flip_board(board); + player = 1; + done = 0; + count = 0; + fflush(stdout); + fflush(stderr); + } + //print_board(stderr, board, 1, 0); + //sleep(1); + + if ((total%2==0) == (player==1)){ + //mcts_iters = 4500; + cpuct = 5; + } else { + //mcts_iters = 500; + cpuct = 1; + } + network *use = ((total%2==0) == (player==1)) ? net : net2; + mcts_tree *t = ((total%2==0) == (player==1)) ? tree1 : tree2; + t = run_mcts(t, use, board, two, player, mcts_iters, cpuct, 0); + move m = pick_move(t, temp, player); + if(((total%2==0) == (player==1))) tree1 = t; + else tree2 = t; + + tree1 = move_mcts(tree1, m.row*19 + m.col); + tree2 = move_mcts(tree2, m.row*19 + m.col); + + if(m.row == 19){ + done = 1; + continue; + } + int row = m.row; + int col = m.col; + + float *swap = two; + two = one; + one = swap; + + if(player < 0) flip_board(board); + boards[count][0] = row; + boards[count][1] = col; + board_to_string(boards[count] + 2, board); + if(player < 0) flip_board(board); + ++count; + + move_go(board, player, row, col); + copy_cpu(19*19*3, board, 1, one, 1); + + player = -player; + } +} + +void run_go(int argc, char **argv) +{ + //boards_go(); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + int clear = find_arg(argc, argv, "-clear"); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *c2 = (argc > 5) ? argv[5] : 0; + char *w2 = (argc > 6) ? argv[6] : 0; + int multi = find_arg(argc, argv, "-multi"); + int anon = find_arg(argc, argv, "-anon"); + int iters = find_int_arg(argc, argv, "-iters", 500); + int resign = find_int_arg(argc, argv, "-resign", 175); + float cpuct = find_float_arg(argc, argv, "-cpuct", 5); + float temp = find_float_arg(argc, argv, "-temp", .1); + float time = find_float_arg(argc, argv, "-time", 0); + if(0==strcmp(argv[2], "train")) train_go(cfg, weights, c2, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "valid")) valid_go(cfg, weights, multi, c2); + else if(0==strcmp(argv[2], "self")) self_go(cfg, weights, c2, w2, multi); + else if(0==strcmp(argv[2], "test")) test_go(cfg, weights, multi); + else if(0==strcmp(argv[2], "engine")) engine_go(cfg, weights, iters, time, temp, cpuct, anon, resign); +} + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/instance-segmenter.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/instance-segmenter.c new file mode 100644 index 0000000000000000000000000000000000000000..664e71426d58e19f758bab198783eac178a3cdc4 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/instance-segmenter.c @@ -0,0 +1,267 @@ +#include "darknet.h" +#include +#include + +void normalize_image2(image p); +void train_isegmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + image pred = get_network_image(net); + + image embed = pred; + embed.c = 3; + embed.data += embed.w*embed.h*80; + + int div = net->w/pred.w; + assert(pred.w * div == net->w); + assert(pred.h * div == net->h); + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.scale = div; + args.num_boxes = 90; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.classes = 80; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = ISEG_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(display){ + image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]); + image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]); + pred.c = 80; + image mask = mask_to_rgb(tr); + image prmask = mask_to_rgb(pred); + image ecopy = copy_image(embed); + normalize_image2(ecopy); + show_image(ecopy, "embed", 1); + free_image(ecopy); + + show_image(im, "input", 1); + show_image(prmask, "pred", 1); + show_image(mask, "truth", 100); + free_image(mask); + free_image(prmask); + } + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_isegmenter(char *datafile, char *cfg, char *weights, char *filename) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + show_image(sized, "orig", 1); + show_image(prmask, "pred", 0); + free_image(im); + free_image(sized); + free_image(prmask); + if (filename) break; + } +} + + +void demo_isegmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Classifier Demo\n"); + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = letterbox_image(in, net->w, net->h); + + network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + show_image(prmask, "Segmenter", 10); + + free_image(in_s); + free_image(in); + free_image(prmask); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_isegmenter(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_isegmenter(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_isegmenter(data, cfg, weights, gpus, ngpus, clear, display); + else if(0==strcmp(argv[2], "demo")) demo_isegmenter(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/lsd.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/lsd.c new file mode 100644 index 0000000000000000000000000000000000000000..4ab944c884b9df422cd2b273b1faee128f2ab112 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/lsd.c @@ -0,0 +1,1378 @@ +#include +#include "darknet.h" + +/* +void train_lsd3(char *fcfg, char *fweight, char *gcfg, char *gweight, char *acfg, char *aweight, int clear) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + //char *style_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *style_images = "/home/pjreddie/zelda.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + network fnet = load_network(fcfg, fweight, clear); + network gnet = load_network(gcfg, gweight, clear); + network anet = load_network(acfg, aweight, clear); + char *gbase = basecfg(gcfg); + char *abase = basecfg(acfg); + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + int i = *gnet->seen/imgs; + data train, tbuffer; + data style, sbuffer; + + + list *slist = get_paths(style_images); + char **spaths = (char **)list_to_array(slist); + + list *tlist = get_paths(train_images); + char **tpaths = (char **)list_to_array(tlist); + + load_args targs= get_base_args(gnet); + targs.paths = tpaths; + targs.n = imgs; + targs.m = tlist->size; + targs.d = &tbuffer; + targs.type = CLASSIFICATION_DATA; + targs.classes = 1; + char *ls[1] = {"zelda"}; + targs.labels = ls; + + load_args sargs = get_base_args(gnet); + sargs.paths = spaths; + sargs.n = imgs; + sargs.m = slist->size; + sargs.d = &sbuffer; + sargs.type = CLASSIFICATION_DATA; + sargs.classes = 1; + sargs.labels = ls; + + pthread_t tload_thread = load_data_in_thread(targs); + pthread_t sload_thread = load_data_in_thread(sargs); + clock_t time; + + float aloss_avg = -1; + float floss_avg = -1; + + fnet->train=1; + int x_size = fnet->inputs*fnet->batch; + int y_size = fnet->truths*fnet->batch; + float *X = calloc(x_size, sizeof(float)); + float *y = calloc(y_size, sizeof(float)); + + + int ax_size = anet->inputs*anet->batch; + int ay_size = anet->truths*anet->batch; + fill_gpu(ay_size, .9, anet->truth_gpu, 1); + anet->delta_gpu = cuda_make_array(0, ax_size); + anet->train = 1; + + int gx_size = gnet->inputs*gnet->batch; + int gy_size = gnet->truths*gnet->batch; + gstate.input = cuda_make_array(0, gx_size); + gstate.truth = 0; + gstate.delta = 0; + gstate.train = 1; + + while (get_current_batch(gnet) < gnet->max_batches) { + i += 1; + time=clock(); + pthread_join(tload_thread, 0); + pthread_join(sload_thread, 0); + train = tbuffer; + style = sbuffer; + tload_thread = load_data_in_thread(targs); + sload_thread = load_data_in_thread(sargs); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data generated = copy_data(train); + time=clock(); + + int j, k; + float floss = 0; + for(j = 0; j < fnet->subdivisions; ++j){ + layer imlayer = gnet->layers[gnet->n - 1]; + get_next_batch(train, fnet->batch, j*fnet->batch, X, y); + + cuda_push_array(fstate.input, X, x_size); + cuda_push_array(gstate.input, X, gx_size); + *gnet->seen += gnet->batch; + + forward_network_gpu(fnet, fstate); + float *feats = fnet->layers[fnet->n - 2].output_gpu; + copy_gpu(y_size, feats, 1, fstate.truth, 1); + + forward_network_gpu(gnet, gstate); + float *gen = gnet->layers[gnet->n-1].output_gpu; + copy_gpu(x_size, gen, 1, fstate.input, 1); + + fill_gpu(x_size, 0, fstate.delta, 1); + forward_network_gpu(fnet, fstate); + backward_network_gpu(fnet, fstate); + //HERE + + astate.input = gen; + fill_gpu(ax_size, 0, astate.delta, 1); + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + float *delta = imlayer.delta_gpu; + fill_gpu(x_size, 0, delta, 1); + scal_gpu(x_size, 100, astate.delta, 1); + scal_gpu(x_size, .001, fstate.delta, 1); + axpy_gpu(x_size, 1, fstate.delta, 1, delta, 1); + axpy_gpu(x_size, 1, astate.delta, 1, delta, 1); + + //fill_gpu(x_size, 0, delta, 1); + //cuda_push_array(delta, X, x_size); + //axpy_gpu(x_size, -1, imlayer.output_gpu, 1, delta, 1); + //printf("pix error: %f\n", cuda_mag_array(delta, x_size)); + printf("fea error: %f\n", cuda_mag_array(fstate.delta, x_size)); + printf("adv error: %f\n", cuda_mag_array(astate.delta, x_size)); + //axpy_gpu(x_size, 1, astate.delta, 1, delta, 1); + + backward_network_gpu(gnet, gstate); + + floss += get_network_cost(fnet) /(fnet->subdivisions*fnet->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1); + generated.y.vals[index][0] = .1; + style.y.vals[index][0] = .9; + } + } + +*/ +/* + image sim = float_to_image(anet->w, anet->h, anet->c, style.X.vals[j]); + show_image(sim, "style"); + cvWaitKey(0); + */ + /* + + harmless_update_network_gpu(anet); + + data merge = concat_data(style, generated); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(generated); + free_data(style); + if (aloss_avg < 0) aloss_avg = aloss; + if (floss_avg < 0) floss_avg = floss; + aloss_avg = aloss_avg*.9 + aloss*.1; + floss_avg = floss_avg*.9 + floss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, floss, aloss, floss_avg, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, gbase, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, gbase); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } +#endif +} +*/ + +/* +void train_pix2pix(char *cfg, char *weight, char *acfg, char *aweight, int clear) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/train1.txt"; + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network net = load_network(cfg, weight, clear); + network anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = net->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.type = CLASSIFICATION_DATA; + args.classes = 1; + char *ls[1] = {"coco"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + network_state gstate = {0}; + gstate.index = 0; + gstate.net = net; + int x_size = get_network_input_size(net)*net->batch; + int y_size = x_size; + gstate.input = cuda_make_array(0, x_size); + gstate.truth = cuda_make_array(0, y_size); + gstate.delta = 0; + gstate.train = 1; + float *pixs = calloc(x_size, sizeof(float)); + float *graypixs = calloc(x_size, sizeof(float)); + float *y = calloc(y_size, sizeof(float)); + + network_state astate = {0}; + astate.index = 0; + astate.net = anet; + int ay_size = get_network_output_size(anet)*anet->batch; + astate.input = 0; + astate.truth = 0; + astate.delta = 0; + astate.train = 1; + + float *imerror = cuda_make_array(0, imlayer.outputs); + float *ones_gpu = cuda_make_array(0, ay_size); + fill_gpu(ay_size, .9, ones_gpu, 1); + + float aloss_avg = -1; + float gloss_avg = -1; + + //data generated = copy_data(train); + + while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gray = copy_data(train); + for(j = 0; j < imgs; ++j){ + image gim = float_to_image(net->w, net->h, net->c, gray.X.vals[j]); + grayscale_image_3c(gim); + train.y.vals[j][0] = .9; + + image yim = float_to_image(net->w, net->h, net->c, train.X.vals[j]); + //rgb_to_yuv(yim); + } + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, pixs, y); + get_next_batch(gray, net->batch, j*net->batch, graypixs, y); + cuda_push_array(gstate.input, graypixs, x_size); + cuda_push_array(gstate.truth, pixs, y_size); + */ + /* + image origi = float_to_image(net->w, net->h, 3, pixs); + image grayi = float_to_image(net->w, net->h, 3, graypixs); + show_image(grayi, "gray"); + show_image(origi, "orig"); + cvWaitKey(0); + */ + /* + *net->seen += net->batch; + forward_network_gpu(net, gstate); + + fill_gpu(imlayer.outputs, 0, imerror, 1); + astate.input = imlayer.output_gpu; + astate.delta = imerror; + astate.truth = ones_gpu; + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + scal_gpu(imlayer.outputs, .1, net->layers[net->n-1].delta_gpu, 1); + + backward_network_gpu(net, gstate); + + scal_gpu(imlayer.outputs, 1000, imerror, 1); + + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs)); + printf("features %f\n", cuda_mag_array(net->layers[net->n-1].delta_gpu, imlayer.outputs)); + + axpy_gpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1); + + gloss += get_network_cost(net) /(net->subdivisions*net->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1); + gray.y.vals[index][0] = .1; + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gray); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + update_network_gpu(anet); + free_data(merge); + free_data(train); + free_data(gray); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +#endif +} +*/ + +void slerp(float *start, float *end, float s, int n, float *out) +{ + float omega = acos(dot_cpu(n, start, 1, end, 1)); + float so = sin(omega); + fill_cpu(n, 0, out, 1); + axpy_cpu(n, sin((1-s)*omega)/so, start, 1, out, 1); + axpy_cpu(n, sin(s*omega)/so, end, 1, out, 1); + + float mag = mag_array(out, n); + scale_array(out, n, 1./mag); +} + +image random_unit_vector_image(int w, int h, int c) +{ + image im = make_image(w, h, c); + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + im.data[i] = rand_normal(); + } + float mag = mag_array(im.data, im.w*im.h*im.c); + scale_array(im.data, im.w*im.h*im.c, 1./mag); + return im; +} + +void inter_dcgan(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int i, imlayer = 0; + + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = i; + printf("%d\n", i); + break; + } + } + image start = random_unit_vector_image(net->w, net->h, net->c); + image end = random_unit_vector_image(net->w, net->h, net->c); + image im = make_image(net->w, net->h, net->c); + image orig = copy_image(start); + + int c = 0; + int count = 0; + int max_count = 15; + while(1){ + ++c; + + if(count == max_count){ + count = 0; + free_image(start); + start = end; + end = random_unit_vector_image(net->w, net->h, net->c); + if(c > 300){ + end = orig; + } + if(c>300 + max_count) return; + } + ++count; + + slerp(start.data, end.data, (float)count / max_count, im.w*im.h*im.c, im.data); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + normalize_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + //char buff[256]; + sprintf(buff, "out%05d", c); + save_image(out, "out"); + save_image(out, buff); + show_image(out, "out", 0); + } +} + +void test_dcgan(char *cfgfile, char *weightfile) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int imlayer = 0; + + imlayer = net->n-1; + + while(1){ + image im = make_image(net->w, net->h, net->c); + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + im.data[i] = rand_normal(); + } + //float mag = mag_array(im.data, im.w*im.h*im.c); + //scale_array(im.data, im.w*im.h*im.c, 1./mag); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + normalize_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 0); + + free_image(im); + } +} + +void set_network_alpha_beta(network *net, float alpha, float beta) +{ + int i; + for(i = 0; i < net->n; ++i){ + if(net->layers[i].type == SHORTCUT){ + net->layers[i].alpha = alpha; + net->layers[i].beta = beta; + } + } +} + +void train_prog(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) +{ +#ifdef GPU + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *gnet = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = gnet->layers[gnet->n-1]; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + i = *gnet->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(anet); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + args.threads=16; + args.classes = 1; + char *ls[2] = {"imagenet", "zzzzzzzz"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + gnet->train = 1; + anet->train = 1; + + int x_size = gnet->inputs*gnet->batch; + int y_size = gnet->truths*gnet->batch; + float *imerror = cuda_make_array(0, y_size); + + float aloss_avg = -1; + + if (maxbatch == 0) maxbatch = gnet->max_batches; + while (get_current_batch(gnet) < maxbatch) { + { + int cb = get_current_batch(gnet); + float alpha = (float) cb / (maxbatch/2); + if(alpha > 1) alpha = 1; + float beta = 1 - alpha; + printf("%f %f\n", alpha, beta); + set_network_alpha_beta(gnet, alpha, beta); + set_network_alpha_beta(anet, beta, alpha); + } + + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gen = copy_data(train); + for (j = 0; j < imgs; ++j) { + train.y.vals[j][0] = 1; + gen.y.vals[j][0] = 0; + } + time=clock(); + + for (j = 0; j < gnet->subdivisions; ++j) { + get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); + int z; + for(z = 0; z < x_size; ++z){ + gnet->input[z] = rand_normal(); + } + /* + for(z = 0; z < gnet->batch; ++z){ + float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); + scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); + } + */ + *gnet->seen += gnet->batch; + forward_network(gnet); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); + copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1); + anet->delta_gpu = imerror; + forward_network(anet); + backward_network(anet); + + //float genaloss = *anet->cost / anet->batch; + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1); + + backward_network(gnet); + + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gen); + float aloss = train_network(anet, merge); + +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + save_image(im, "gen"); + save_image(im2, "train"); + } +#endif + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(gen); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + + printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(gnet, buff); +#endif +} + +void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display, char *train_images, int maxbatch) +{ +#ifdef GPU + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *gnet = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + //float orig_rate = anet->learning_rate; + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < gnet->n; ++i) { + if (gnet->layers[i].out_c == 3) { + imlayer = gnet->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", gnet->learning_rate, gnet->momentum, gnet->decay); + int imgs = gnet->batch*gnet->subdivisions; + i = *gnet->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(anet); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = CLASSIFICATION_DATA; + args.threads=16; + args.classes = 1; + char *ls[2] = {"imagenet", "zzzzzzzz"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + gnet->train = 1; + anet->train = 1; + + int x_size = gnet->inputs*gnet->batch; + int y_size = gnet->truths*gnet->batch; + float *imerror = cuda_make_array(0, y_size); + + //int ay_size = anet->truths*anet->batch; + + float aloss_avg = -1; + + //data generated = copy_data(train); + + if (maxbatch == 0) maxbatch = gnet->max_batches; + while (get_current_batch(gnet) < maxbatch) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + //translate_data_rows(train, -.5); + //scale_data_rows(train, 2); + + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gen = copy_data(train); + for (j = 0; j < imgs; ++j) { + train.y.vals[j][0] = 1; + gen.y.vals[j][0] = 0; + } + time=clock(); + + for(j = 0; j < gnet->subdivisions; ++j){ + get_next_batch(train, gnet->batch, j*gnet->batch, gnet->truth, 0); + int z; + for(z = 0; z < x_size; ++z){ + gnet->input[z] = rand_normal(); + } + for(z = 0; z < gnet->batch; ++z){ + float mag = mag_array(gnet->input + z*gnet->inputs, gnet->inputs); + scale_array(gnet->input + z*gnet->inputs, gnet->inputs, 1./mag); + } + /* + for(z = 0; z < 100; ++z){ + printf("%f, ", gnet->input[z]); + } + printf("\n"); + printf("input: %f %f\n", mean_array(gnet->input, x_size), variance_array(gnet->input, x_size)); + */ + + //cuda_push_array(gnet->input_gpu, gnet->input, x_size); + //cuda_push_array(gnet->truth_gpu, gnet->truth, y_size); + *gnet->seen += gnet->batch; + forward_network(gnet); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + fill_cpu(anet->truths*anet->batch, 1, anet->truth, 1); + copy_cpu(anet->inputs*anet->batch, imlayer.output, 1, anet->input, 1); + anet->delta_gpu = imerror; + forward_network(anet); + backward_network(anet); + + //float genaloss = *anet->cost / anet->batch; + //printf("%f\n", genaloss); + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + scal_gpu(imlayer.outputs*imlayer.batch, 0, gnet->layers[gnet->n-1].delta_gpu, 1); + + //printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch)); + //printf("features %f\n", cuda_mag_array(gnet->layers[gnet->n-1].delta_gpu, imlayer.outputs*imlayer.batch)); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, gnet->layers[gnet->n-1].delta_gpu, 1); + + backward_network(gnet); + + /* + for(k = 0; k < gnet->n; ++k){ + layer l = gnet->layers[k]; + cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); + printf("%d: %f %f\n", k, mean_array(l.output, l.outputs*l.batch), variance_array(l.output, l.outputs*l.batch)); + } + */ + + for(k = 0; k < gnet->batch; ++k){ + int index = j*gnet->batch + k; + copy_cpu(gnet->outputs, gnet->output + k*gnet->outputs, 1, gen.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gen); + //randomize_data(merge); + float aloss = train_network(anet, merge); + + //translate_image(im, 1); + //scale_image(im, .5); + //translate_image(im2, 1); + //scale_image(im2, .5); +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gen.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + save_image(im, "gen"); + save_image(im2, "train"); + } +#endif + + /* + if(aloss < .1){ + anet->learning_rate = 0; + } else if (aloss > .3){ + anet->learning_rate = orig_rate; + } + */ + + update_network_gpu(gnet); + + free_data(merge); + free_data(train); + free_data(gen); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + + printf("%d: adv: %f | adv_avg: %f, %f rate, %lf seconds, %d images\n", i, aloss, aloss_avg, get_current_rate(gnet), sec(clock()-time), i*imgs); + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(gnet, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(gnet, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(gnet, buff); +#endif +} + +void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int clear, int display) +{ +#ifdef GPU + //char *train_images = "/home/pjreddie/data/coco/train1.txt"; + //char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *train_images = "/home/pjreddie/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfg); + char *abase = basecfg(acfg); + printf("%s\n", base); + network *net = load_network(cfg, weight, clear); + network *anet = load_network(acfg, aweight, clear); + + int i, j, k; + layer imlayer = {0}; + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = net->layers[i]; + break; + } + } + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args= get_base_args(net); + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + + args.type = CLASSIFICATION_DATA; + args.classes = 1; + char *ls[2] = {"imagenet"}; + args.labels = ls; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + + int x_size = net->inputs*net->batch; + //int y_size = x_size; + net->delta = 0; + net->train = 1; + float *pixs = calloc(x_size, sizeof(float)); + float *graypixs = calloc(x_size, sizeof(float)); + //float *y = calloc(y_size, sizeof(float)); + + //int ay_size = anet->outputs*anet->batch; + anet->delta = 0; + anet->train = 1; + + float *imerror = cuda_make_array(0, imlayer.outputs*imlayer.batch); + + float aloss_avg = -1; + float gloss_avg = -1; + + //data generated = copy_data(train); + + while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data gray = copy_data(train); + for(j = 0; j < imgs; ++j){ + image gim = float_to_image(net->w, net->h, net->c, gray.X.vals[j]); + grayscale_image_3c(gim); + train.y.vals[j][0] = .95; + gray.y.vals[j][0] = .05; + } + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, pixs, 0); + get_next_batch(gray, net->batch, j*net->batch, graypixs, 0); + cuda_push_array(net->input_gpu, graypixs, net->inputs*net->batch); + cuda_push_array(net->truth_gpu, pixs, net->truths*net->batch); + /* + image origi = float_to_image(net->w, net->h, 3, pixs); + image grayi = float_to_image(net->w, net->h, 3, graypixs); + show_image(grayi, "gray"); + show_image(origi, "orig"); + cvWaitKey(0); + */ + *net->seen += net->batch; + forward_network_gpu(net); + + fill_gpu(imlayer.outputs*imlayer.batch, 0, imerror, 1); + copy_gpu(anet->inputs*anet->batch, imlayer.output_gpu, 1, anet->input_gpu, 1); + fill_gpu(anet->inputs*anet->batch, .95, anet->truth_gpu, 1); + anet->delta_gpu = imerror; + forward_network_gpu(anet); + backward_network_gpu(anet); + + scal_gpu(imlayer.outputs*imlayer.batch, 1./100., net->layers[net->n-1].delta_gpu, 1); + + scal_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1); + + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs*imlayer.batch)); + printf("features %f\n", cuda_mag_array(net->layers[net->n-1].delta_gpu, imlayer.outputs*imlayer.batch)); + + axpy_gpu(imlayer.outputs*imlayer.batch, 1, imerror, 1, net->layers[net->n-1].delta_gpu, 1); + + backward_network_gpu(net); + + + gloss += *net->cost /(net->subdivisions*net->batch); + + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, gray.X.vals[index], 1); + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, gray); + //randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + +#ifdef OPENCV + if(display){ + image im = float_to_image(anet->w, anet->h, anet->c, gray.X.vals[0]); + image im2 = float_to_image(anet->w, anet->h, anet->c, train.X.vals[0]); + show_image(im, "gen", 1); + show_image(im2, "train", 1); + } +#endif + free_data(merge); + free_data(train); + free_data(gray); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +#endif +} + +/* + void train_lsd2(char *cfgfile, char *weightfile, char *acfgfile, char *aweightfile, int clear) + { +#ifdef GPU +char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; +char *backup_directory = "/home/pjreddie/backup/"; +srand(time(0)); +char *base = basecfg(cfgfile); +printf("%s\n", base); +network net = parse_network_cfg(cfgfile); +if(weightfile){ +load_weights(&net, weightfile); +} +if(clear) *net->seen = 0; + +char *abase = basecfg(acfgfile); +network anet = parse_network_cfg(acfgfile); +if(aweightfile){ +load_weights(&anet, aweightfile); +} +if(clear) *anet->seen = 0; + +int i, j, k; +layer imlayer = {0}; +for (i = 0; i < net->n; ++i) { +if (net->layers[i].out_c == 3) { +imlayer = net->layers[i]; +break; +} +} + +printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); +int imgs = net->batch*net->subdivisions; +i = *net->seen/imgs; +data train, buffer; + + +list *plist = get_paths(train_images); +//int N = plist->size; +char **paths = (char **)list_to_array(plist); + +load_args args = {0}; +args.w = net->w; +args.h = net->h; +args.paths = paths; +args.n = imgs; +args.m = plist->size; +args.d = &buffer; + +args.min = net->min_crop; +args.max = net->max_crop; +args.angle = net->angle; +args.aspect = net->aspect; +args.exposure = net->exposure; +args.saturation = net->saturation; +args.hue = net->hue; +args.size = net->w; +args.type = CLASSIFICATION_DATA; +args.classes = 1; +char *ls[1] = {"coco"}; +args.labels = ls; + +pthread_t load_thread = load_data_in_thread(args); +clock_t time; + +network_state gstate = {0}; +gstate.index = 0; +gstate.net = net; +int x_size = get_network_input_size(net)*net->batch; +int y_size = 1*net->batch; +gstate.input = cuda_make_array(0, x_size); +gstate.truth = 0; +gstate.delta = 0; +gstate.train = 1; +float *X = calloc(x_size, sizeof(float)); +float *y = calloc(y_size, sizeof(float)); + +network_state astate = {0}; +astate.index = 0; +astate.net = anet; +int ay_size = get_network_output_size(anet)*anet->batch; +astate.input = 0; +astate.truth = 0; +astate.delta = 0; +astate.train = 1; + +float *imerror = cuda_make_array(0, imlayer.outputs); +float *ones_gpu = cuda_make_array(0, ay_size); +fill_gpu(ay_size, 1, ones_gpu, 1); + +float aloss_avg = -1; +float gloss_avg = -1; + +//data generated = copy_data(train); + +while (get_current_batch(net) < net->max_batches) { + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + data generated = copy_data(train); + time=clock(); + float gloss = 0; + + for(j = 0; j < net->subdivisions; ++j){ + get_next_batch(train, net->batch, j*net->batch, X, y); + cuda_push_array(gstate.input, X, x_size); + *net->seen += net->batch; + forward_network_gpu(net, gstate); + + fill_gpu(imlayer.outputs, 0, imerror, 1); + astate.input = imlayer.output_gpu; + astate.delta = imerror; + astate.truth = ones_gpu; + forward_network_gpu(anet, astate); + backward_network_gpu(anet, astate); + + scal_gpu(imlayer.outputs, 1, imerror, 1); + axpy_gpu(imlayer.outputs, 1, imerror, 1, imlayer.delta_gpu, 1); + + backward_network_gpu(net, gstate); + + printf("features %f\n", cuda_mag_array(imlayer.delta_gpu, imlayer.outputs)); + printf("realness %f\n", cuda_mag_array(imerror, imlayer.outputs)); + + gloss += get_network_cost(net) /(net->subdivisions*net->batch); + + cuda_pull_array(imlayer.output_gpu, imlayer.output, imlayer.outputs*imlayer.batch); + for(k = 0; k < net->batch; ++k){ + int index = j*net->batch + k; + copy_cpu(imlayer.outputs, imlayer.output + k*imlayer.outputs, 1, generated.X.vals[index], 1); + generated.y.vals[index][0] = 0; + } + } + harmless_update_network_gpu(anet); + + data merge = concat_data(train, generated); + randomize_data(merge); + float aloss = train_network(anet, merge); + + update_network_gpu(net); + update_network_gpu(anet); + free_data(merge); + free_data(train); + free_data(generated); + if (aloss_avg < 0) aloss_avg = aloss; + aloss_avg = aloss_avg*.9 + aloss*.1; + gloss_avg = gloss_avg*.9 + gloss*.1; + + printf("%d: gen: %f, adv: %f | gen_avg: %f, adv_avg: %f, %f rate, %lf seconds, %d images\n", i, gloss, aloss, gloss_avg, aloss_avg, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + sprintf(buff, "%s/%s_%d.weights", backup_directory, abase, i); + save_weights(anet, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + sprintf(buff, "%s/%s.backup", backup_directory, abase); + save_weights(anet, buff); + } +} +char buff[256]; +sprintf(buff, "%s/%s_final.weights", backup_directory, base); +save_weights(net, buff); +#endif +} +*/ + +/* + void train_lsd(char *cfgfile, char *weightfile, int clear) + { + char *train_images = "/home/pjreddie/data/coco/trainvalno5k.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + if(clear) *net->seen = 0; + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); +//int N = plist->size; +char **paths = (char **)list_to_array(plist); + +load_args args = {0}; +args.w = net->w; +args.h = net->h; +args.paths = paths; +args.n = imgs; +args.m = plist->size; +args.d = &buffer; + +args.min = net->min_crop; +args.max = net->max_crop; +args.angle = net->angle; +args.aspect = net->aspect; +args.exposure = net->exposure; +args.saturation = net->saturation; +args.hue = net->hue; +args.size = net->w; +args.type = CLASSIFICATION_DATA; +args.classes = 1; +char *ls[1] = {"coco"}; +args.labels = ls; + +pthread_t load_thread = load_data_in_thread(args); +clock_t time; +//while(i*imgs < N*120){ +while(get_current_batch(net) < net->max_batches){ +i += 1; +time=clock(); +pthread_join(load_thread, 0); +train = buffer; +load_thread = load_data_in_thread(args); + +printf("Loaded: %lf seconds\n", sec(clock()-time)); + +time=clock(); +float loss = train_network(net, train); +if (avg_loss < 0) avg_loss = loss; +avg_loss = avg_loss*.9 + loss*.1; + +printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); +if(i%1000==0){ +char buff[256]; +sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); +save_weights(net, buff); +} +if(i%100==0){ +char buff[256]; +sprintf(buff, "%s/%s.backup", backup_directory, base); +save_weights(net, buff); +} +free_data(train); +} +char buff[256]; +sprintf(buff, "%s/%s_final.weights", backup_directory, base); +save_weights(net, buff); +} +*/ + +void test_lsd(char *cfg, char *weights, char *filename, int gray) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + int i, imlayer = 0; + + for (i = 0; i < net->n; ++i) { + if (net->layers[i].out_c == 3) { + imlayer = i; + printf("%d\n", i); + break; + } + } + + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image resized = resize_min(im, net->w); + image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h); + if(gray) grayscale_image_3c(crop); + + float *X = crop.data; + time=clock(); + network_predict(net, X); + image out = get_network_image_layer(net, imlayer); + //yuv_to_rgb(out); + constrain_image(out); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 1); + show_image(crop, "crop", 0); + + free_image(im); + free_image(resized); + free_image(crop); + if (filename) break; + } +} + + +void run_lsd(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + int batches = find_int_arg(argc, argv, "-b", 0); + char *file = find_char_arg(argc, argv, "-file", "/home/pjreddie/data/imagenet/imagenet1k.train.list"); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + char *acfg = argv[5]; + char *aweights = (argc > 6) ? argv[6] : 0; + //if(0==strcmp(argv[2], "train")) train_lsd(cfg, weights, clear); + //else if(0==strcmp(argv[2], "train2")) train_lsd2(cfg, weights, acfg, aweights, clear); + //else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear); + //else if(0==strcmp(argv[2], "train3")) train_lsd3(argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], clear); + if(0==strcmp(argv[2], "traingan")) train_dcgan(cfg, weights, acfg, aweights, clear, display, file, batches); + else if(0==strcmp(argv[2], "trainprog")) train_prog(cfg, weights, acfg, aweights, clear, display, file, batches); + else if(0==strcmp(argv[2], "traincolor")) train_colorizer(cfg, weights, acfg, aweights, clear, display); + else if(0==strcmp(argv[2], "gan")) test_dcgan(cfg, weights); + else if(0==strcmp(argv[2], "inter")) inter_dcgan(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_lsd(cfg, weights, filename, 0); + else if(0==strcmp(argv[2], "color")) test_lsd(cfg, weights, filename, 1); + /* + else if(0==strcmp(argv[2], "valid")) validate_lsd(cfg, weights); + */ +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/nightmare.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/nightmare.c new file mode 100644 index 0000000000000000000000000000000000000000..2978eb61193e96325441c5b830a786eccb203569 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/nightmare.c @@ -0,0 +1,414 @@ +#include "darknet.h" + +#include + +// ./darknet nightmare cfg/extractor.recon.cfg ~/trained/yolo-coco.conv frame6.png -reconstruct -iters 500 -i 3 -lambda .1 -rate .01 -smooth 2 + +float abs_mean(float *x, int n) +{ + int i; + float sum = 0; + for (i = 0; i < n; ++i){ + sum += fabs(x[i]); + } + return sum/n; +} + +void calculate_loss(float *output, float *delta, int n, float thresh) +{ + int i; + float mean = mean_array(output, n); + float var = variance_array(output, n); + for(i = 0; i < n; ++i){ + if(delta[i] > mean + thresh*sqrt(var)) delta[i] = output[i]; + else delta[i] = 0; + } +} + +void optimize_picture(network *net, image orig, int max_layer, float scale, float rate, float thresh, int norm) +{ + //scale_image(orig, 2); + //translate_image(orig, -1); + net->n = max_layer + 1; + + int dx = rand()%16 - 8; + int dy = rand()%16 - 8; + int flip = rand()%2; + + image crop = crop_image(orig, dx, dy, orig.w, orig.h); + image im = resize_image(crop, (int)(orig.w * scale), (int)(orig.h * scale)); + if(flip) flip_image(im); + + resize_network(net, im.w, im.h); + layer last = net->layers[net->n-1]; + //net->layers[net->n - 1].activation = LINEAR; + + image delta = make_image(im.w, im.h, im.c); + +#ifdef GPU + net->delta_gpu = cuda_make_array(delta.data, im.w*im.h*im.c); + copy_cpu(net->inputs, im.data, 1, net->input, 1); + + forward_network_gpu(net); + copy_gpu(last.outputs, last.output_gpu, 1, last.delta_gpu, 1); + + cuda_pull_array(last.delta_gpu, last.delta, last.outputs); + calculate_loss(last.delta, last.delta, last.outputs, thresh); + cuda_push_array(last.delta_gpu, last.delta, last.outputs); + + backward_network_gpu(net); + + cuda_pull_array(net->delta_gpu, delta.data, im.w*im.h*im.c); + cuda_free(net->delta_gpu); + net->delta_gpu = 0; +#else + printf("\nnet: %d %d %d im: %d %d %d\n", net->w, net->h, net->inputs, im.w, im.h, im.c); + copy_cpu(net->inputs, im.data, 1, net->input, 1); + net->delta = delta.data; + forward_network(net); + copy_cpu(last.outputs, last.output, 1, last.delta, 1); + calculate_loss(last.output, last.delta, last.outputs, thresh); + backward_network(net); +#endif + + if(flip) flip_image(delta); + //normalize_array(delta.data, delta.w*delta.h*delta.c); + image resized = resize_image(delta, orig.w, orig.h); + image out = crop_image(resized, -dx, -dy, orig.w, orig.h); + + /* + image g = grayscale_image(out); + free_image(out); + out = g; + */ + + //rate = rate / abs_mean(out.data, out.w*out.h*out.c); + image gray = make_image(out.w, out.h, out.c); + fill_image(gray, .5); + axpy_cpu(orig.w*orig.h*orig.c, -1, orig.data, 1, gray.data, 1); + axpy_cpu(orig.w*orig.h*orig.c, .1, gray.data, 1, out.data, 1); + + if(norm) normalize_array(out.data, out.w*out.h*out.c); + axpy_cpu(orig.w*orig.h*orig.c, rate, out.data, 1, orig.data, 1); + + /* + normalize_array(orig.data, orig.w*orig.h*orig.c); + scale_image(orig, sqrt(var)); + translate_image(orig, mean); + */ + + //translate_image(orig, 1); + //scale_image(orig, .5); + //normalize_image(orig); + + constrain_image(orig); + + free_image(crop); + free_image(im); + free_image(delta); + free_image(resized); + free_image(out); + +} + +void smooth(image recon, image update, float lambda, int num) +{ + int i, j, k; + int ii, jj; + for(k = 0; k < recon.c; ++k){ + for(j = 0; j < recon.h; ++j){ + for(i = 0; i < recon.w; ++i){ + int out_index = i + recon.w*(j + recon.h*k); + for(jj = j-num; jj <= j + num && jj < recon.h; ++jj){ + if (jj < 0) continue; + for(ii = i-num; ii <= i + num && ii < recon.w; ++ii){ + if (ii < 0) continue; + int in_index = ii + recon.w*(jj + recon.h*k); + update.data[out_index] += lambda * (recon.data[in_index] - recon.data[out_index]); + } + } + } + } + } +} + +void reconstruct_picture(network *net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters) +{ + int iter = 0; + for (iter = 0; iter < iters; ++iter) { + image delta = make_image(recon.w, recon.h, recon.c); + +#ifdef GPU + layer l = get_network_output_layer(net); + cuda_push_array(net->input_gpu, recon.data, recon.w*recon.h*recon.c); + //cuda_push_array(net->truth_gpu, features, net->truths); + net->delta_gpu = cuda_make_array(delta.data, delta.w*delta.h*delta.c); + + forward_network_gpu(net); + cuda_push_array(l.delta_gpu, features, l.outputs); + axpy_gpu(l.outputs, -1, l.output_gpu, 1, l.delta_gpu, 1); + backward_network_gpu(net); + + cuda_pull_array(net->delta_gpu, delta.data, delta.w*delta.h*delta.c); + + cuda_free(net->delta_gpu); +#else + net->input = recon.data; + net->delta = delta.data; + net->truth = features; + + forward_network(net); + backward_network(net); +#endif + + //normalize_array(delta.data, delta.w*delta.h*delta.c); + axpy_cpu(recon.w*recon.h*recon.c, 1, delta.data, 1, update.data, 1); + //smooth(recon, update, lambda, smooth_size); + + axpy_cpu(recon.w*recon.h*recon.c, rate, update.data, 1, recon.data, 1); + scal_cpu(recon.w*recon.h*recon.c, momentum, update.data, 1); + + float mag = mag_array(delta.data, recon.w*recon.h*recon.c); + printf("mag: %f\n", mag); + //scal_cpu(recon.w*recon.h*recon.c, 600/mag, recon.data, 1); + + constrain_image(recon); + free_image(delta); + } +} + +/* +void run_lsd(int argc, char **argv) +{ + srand(0); + if(argc < 3){ + fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [options! (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[2]; + char *weights = argv[3]; + char *input = argv[4]; + + int norm = find_int_arg(argc, argv, "-norm", 1); + int rounds = find_int_arg(argc, argv, "-rounds", 1); + int iters = find_int_arg(argc, argv, "-iters", 10); + float rate = find_float_arg(argc, argv, "-rate", .04); + float momentum = find_float_arg(argc, argv, "-momentum", .9); + float lambda = find_float_arg(argc, argv, "-lambda", .01); + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + int reconstruct = find_arg(argc, argv, "-reconstruct"); + int smooth_size = find_int_arg(argc, argv, "-smooth", 1); + + network net = parse_network_cfg(cfg); + load_weights(&net, weights); + char *cfgbase = basecfg(cfg); + char *imbase = basecfg(input); + + set_batch_network(&net, 1); + image im = load_image_color(input, 0, 0); + + float *features = 0; + image update; + if (reconstruct){ + im = letterbox_image(im, net->w, net->h); + + int zz = 0; + network_predict(net, im.data); + image out_im = get_network_image(net); + image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz); + //flip_image(crop); + image f_im = resize_image(crop, out_im.w, out_im.h); + free_image(crop); + printf("%d features\n", out_im.w*out_im.h*out_im.c); + + + im = resize_image(im, im.w, im.h); + f_im = resize_image(f_im, f_im.w, f_im.h); + features = f_im.data; + + int i; + for(i = 0; i < 14*14*512; ++i){ + features[i] += rand_uniform(-.19, .19); + } + + free_image(im); + im = make_random_image(im.w, im.h, im.c); + update = make_image(im.w, im.h, im.c); + + } + + int e; + int n; + for(e = 0; e < rounds; ++e){ + fprintf(stderr, "Iteration: "); + fflush(stderr); + for(n = 0; n < iters; ++n){ + fprintf(stderr, "%d, ", n); + fflush(stderr); + if(reconstruct){ + reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1); + //if ((n+1)%30 == 0) rate *= .5; + show_image(im, "reconstruction"); +#ifdef OPENCV + cvWaitKey(10); +#endif + }else{ + int layer = max_layer + rand()%range - range/2; + int octave = rand()%octaves; + optimize_picture(&net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); + } + } + fprintf(stderr, "done\n"); + char buff[256]; + if (prefix){ + sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e); + }else{ + sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e); + } + printf("%d %s\n", e, buff); + save_image(im, buff); + //show_image(im, buff); + //cvWaitKey(0); + + if(rotate){ + image rot = rotate_image(im, rotate); + free_image(im); + im = rot; + } + image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom); + image resized = resize_image(crop, im.w, im.h); + free_image(im); + free_image(crop); + im = resized; + } +} +*/ + +void run_nightmare(int argc, char **argv) +{ + srand(0); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [layer] [options! (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[2]; + char *weights = argv[3]; + char *input = argv[4]; + int max_layer = atoi(argv[5]); + + int range = find_int_arg(argc, argv, "-range", 1); + int norm = find_int_arg(argc, argv, "-norm", 1); + int rounds = find_int_arg(argc, argv, "-rounds", 1); + int iters = find_int_arg(argc, argv, "-iters", 10); + int octaves = find_int_arg(argc, argv, "-octaves", 4); + float zoom = find_float_arg(argc, argv, "-zoom", 1.); + float rate = find_float_arg(argc, argv, "-rate", .04); + float thresh = find_float_arg(argc, argv, "-thresh", 1.); + float rotate = find_float_arg(argc, argv, "-rotate", 0); + float momentum = find_float_arg(argc, argv, "-momentum", .9); + float lambda = find_float_arg(argc, argv, "-lambda", .01); + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + int reconstruct = find_arg(argc, argv, "-reconstruct"); + int smooth_size = find_int_arg(argc, argv, "-smooth", 1); + + network *net = load_network(cfg, weights, 0); + char *cfgbase = basecfg(cfg); + char *imbase = basecfg(input); + + set_batch_network(net, 1); + image im = load_image_color(input, 0, 0); + if(0){ + float scale = 1; + if(im.w > 512 || im.h > 512){ + if(im.w > im.h) scale = 512.0/im.w; + else scale = 512.0/im.h; + } + image resized = resize_image(im, scale*im.w, scale*im.h); + free_image(im); + im = resized; + } + //im = letterbox_image(im, net->w, net->h); + + float *features = 0; + image update; + if (reconstruct){ + net->n = max_layer; + im = letterbox_image(im, net->w, net->h); + //resize_network(&net, im.w, im.h); + + network_predict(net, im.data); + if(net->layers[net->n-1].type == REGION){ + printf("region!\n"); + zero_objectness(net->layers[net->n-1]); + } + image out_im = copy_image(get_network_image(net)); + /* + image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz); + //flip_image(crop); + image f_im = resize_image(crop, out_im.w, out_im.h); + free_image(crop); + */ + printf("%d features\n", out_im.w*out_im.h*out_im.c); + + features = out_im.data; + + /* + int i; + for(i = 0; i < 14*14*512; ++i){ + //features[i] += rand_uniform(-.19, .19); + } + free_image(im); + im = make_random_image(im.w, im.h, im.c); + */ + update = make_image(im.w, im.h, im.c); + } + + int e; + int n; + for(e = 0; e < rounds; ++e){ + fprintf(stderr, "Iteration: "); + fflush(stderr); + for(n = 0; n < iters; ++n){ + fprintf(stderr, "%d, ", n); + fflush(stderr); + if(reconstruct){ + reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1); + //if ((n+1)%30 == 0) rate *= .5; + show_image(im, "reconstruction", 10); + }else{ + int layer = max_layer + rand()%range - range/2; + int octave = rand()%octaves; + optimize_picture(net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); + } + } + fprintf(stderr, "done\n"); + if(0){ + image g = grayscale_image(im); + free_image(im); + im = g; + } + char buff[256]; + if (prefix){ + sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e); + }else{ + sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e); + } + printf("%d %s\n", e, buff); + save_image(im, buff); + //show_image(im, buff, 0); + + if(rotate){ + image rot = rotate_image(im, rotate); + free_image(im); + im = rot; + } + image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom); + image resized = resize_image(crop, im.w, im.h); + free_image(im); + free_image(crop); + im = resized; + } +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/regressor.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/regressor.c new file mode 100644 index 0000000000000000000000000000000000000000..20cec0fad9f0a2ccb2c46a30d0a01793119b43ce --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/regressor.c @@ -0,0 +1,240 @@ +#include "darknet.h" +#include +#include + +void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + int classes = option_find_int(options, "classes", 1); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + clock_t time; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.classes = classes; + + args.min = net->min_ratio*net->w; + args.max = net->max_ratio*net->w; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = REGRESSION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_regressor(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + free_image(im); + free_image(sized); + if (filename) break; + } +} + + +void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Regressor Demo\n"); + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + + srand(2222222); + list *options = read_data_cfg(datacfg); + int classes = option_find_int(options, "classes", 1); + char *name_list = option_find_str(options, "names", 0); + char **names = get_labels(name_list); + + void * cap = open_video_stream(filename, cam_index, 0,0,0); + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image crop = center_crop_image(in, net->w, net->h); + grayscale_image_3c(crop); + + float *predictions = network_predict(net, crop.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + int i; + for(i = 0; i < classes; ++i){ + printf("%s: %f\n", names[i], predictions[i]); + } + + show_image(crop, "Regressor", 10); + free_image(in); + free_image(crop); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_regressor(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_regressor(data, cfg, weights); + else if(0==strcmp(argv[2], "train")) train_regressor(data, cfg, weights, gpus, ngpus, clear); + else if(0==strcmp(argv[2], "demo")) demo_regressor(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/rnn.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/rnn.c new file mode 100644 index 0000000000000000000000000000000000000000..5d49eaae7070eb1dc9a87b5627b7ec6f7cb09e46 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/rnn.c @@ -0,0 +1,542 @@ +#include "darknet.h" + +#include + +typedef struct { + float *x; + float *y; +} float_pair; + +unsigned char **load_files(char *filename, int *n) +{ + list *paths = get_paths(filename); + *n = paths->size; + unsigned char **contents = calloc(*n, sizeof(char *)); + int i; + node *x = paths->front; + for(i = 0; i < *n; ++i){ + contents[i] = read_file((char *)x->val); + x = x->next; + } + return contents; +} + +int *read_tokenized_data(char *filename, size_t *read) +{ + size_t size = 512; + size_t count = 0; + FILE *fp = fopen(filename, "r"); + int *d = calloc(size, sizeof(int)); + int n, one; + one = fscanf(fp, "%d", &n); + while(one == 1){ + ++count; + if(count > size){ + size = size*2; + d = realloc(d, size*sizeof(int)); + } + d[count-1] = n; + one = fscanf(fp, "%d", &n); + } + fclose(fp); + d = realloc(d, count*sizeof(int)); + *read = count; + return d; +} + +char **read_tokens(char *filename, size_t *read) +{ + size_t size = 512; + size_t count = 0; + FILE *fp = fopen(filename, "r"); + char **d = calloc(size, sizeof(char *)); + char *line; + while((line=fgetl(fp)) != 0){ + ++count; + if(count > size){ + size = size*2; + d = realloc(d, size*sizeof(char *)); + } + if(0==strcmp(line, "")) line = "\n"; + d[count-1] = line; + } + fclose(fp); + d = realloc(d, count*sizeof(char *)); + *read = count; + return d; +} + + +float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size_t len, int batch, int steps) +{ + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + int i,j; + for(i = 0; i < batch; ++i){ + for(j = 0; j < steps; ++j){ + int curr = tokens[(offsets[i])%len]; + int next = tokens[(offsets[i] + 1)%len]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + offsets[i] = (offsets[i] + 1) % len; + + if(curr >= characters || curr < 0 || next >= characters || next < 0){ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +float_pair get_seq2seq_data(char **source, char **dest, int n, int characters, size_t len, int batch, int steps) +{ + int i,j; + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + for(i = 0; i < batch; ++i){ + int index = rand()%n; + //int slen = strlen(source[index]); + //int dlen = strlen(dest[index]); + for(j = 0; j < steps; ++j){ + unsigned char curr = source[index][j]; + unsigned char next = dest[index][j]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + if(curr > 255 || curr <= 0 || next > 255 || next <= 0){ + /*text[(index+j+2)%len] = 0; + printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]); + printf("%s", text+index); + */ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +float_pair get_rnn_data(unsigned char *text, size_t *offsets, int characters, size_t len, int batch, int steps) +{ + float *x = calloc(batch * steps * characters, sizeof(float)); + float *y = calloc(batch * steps * characters, sizeof(float)); + int i,j; + for(i = 0; i < batch; ++i){ + for(j = 0; j < steps; ++j){ + unsigned char curr = text[(offsets[i])%len]; + unsigned char next = text[(offsets[i] + 1)%len]; + + x[(j*batch + i)*characters + curr] = 1; + y[(j*batch + i)*characters + next] = 1; + + offsets[i] = (offsets[i] + 1) % len; + + if(curr > 255 || curr <= 0 || next > 255 || next <= 0){ + /*text[(index+j+2)%len] = 0; + printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]); + printf("%s", text+index); + */ + error("Bad char"); + } + } + } + float_pair p; + p.x = x; + p.y = y; + return p; +} + +void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear, int tokenized) +{ + srand(time(0)); + unsigned char *text = 0; + int *tokens = 0; + size_t size; + if(tokenized){ + tokens = read_tokenized_data(filename, &size); + } else { + text = read_file(filename); + size = strlen((const char*)text); + } + + char *backup_directory = "/home/pjreddie/backup/"; + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, clear); + + int inputs = net->inputs; + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g, Inputs: %d %d %d\n", net->learning_rate, net->momentum, net->decay, inputs, net->batch, net->time_steps); + int batch = net->batch; + int steps = net->time_steps; + if(clear) *net->seen = 0; + int i = (*net->seen)/net->batch; + + int streams = batch/steps; + size_t *offsets = calloc(streams, sizeof(size_t)); + int j; + for(j = 0; j < streams; ++j){ + offsets[j] = rand_size_t()%size; + } + + clock_t time; + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + float_pair p; + if(tokenized){ + p = get_rnn_token_data(tokens, offsets, inputs, size, streams, steps); + }else{ + p = get_rnn_data(text, offsets, inputs, size, streams, steps); + } + + copy_cpu(net->inputs*net->batch, p.x, 1, net->input, 1); + copy_cpu(net->truths*net->batch, p.y, 1, net->truth, 1); + float loss = train_network_datum(net) / (batch); + free(p.x); + free(p.y); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + size_t chars = get_current_batch(net)*batch; + fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds, %f epochs\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), (float) chars/size); + + for(j = 0; j < streams; ++j){ + //printf("%d\n", j); + if(rand()%64 == 0){ + //fprintf(stderr, "Reset\n"); + offsets[j] = rand_size_t()%size; + reset_network_state(net, j); + } + } + + if(i%10000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void print_symbol(int n, char **tokens){ + if(tokens){ + printf("%s ", tokens[n]); + } else { + printf("%c", n); + } +} + +void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + + /* + fill_cpu(inputs, 0, input, 1); + for(i = 0; i < 10; ++i){ + network_predict(net, input); + } + fill_cpu(inputs, 0, input, 1); + */ + + for(i = 0; i < len-1; ++i){ + c = seed[i]; + input[c] = 1; + network_predict(net, input); + input[c] = 0; + print_symbol(c, tokens); + } + if(len) c = seed[len-1]; + print_symbol(c, tokens); + for(i = 0; i < num; ++i){ + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + for(j = 32; j < 127; ++j){ + //printf("%d %c %f\n",j, j, out[j]); + } + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + c = sample_array(out, inputs); + print_symbol(c, tokens); + } + printf("\n"); +} + +void test_tactic_rnn_multi(char *cfgfile, char *weightfile, int num, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + float *input = calloc(inputs, sizeof(float)); + float *out = 0; + + while(1){ + reset_network_state(net, 0); + while((c = getc(stdin)) != EOF && c != 0){ + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + for(i = 0; i < num; ++i){ + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + int next = sample_array(out, inputs); + if(c == '.' && next == '\n') break; + c = next; + print_symbol(c, tokens); + + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + printf("\n"); + } +} + +void test_tactic_rnn(char *cfgfile, char *weightfile, int num, float temp, int rseed, char *token_file) +{ + char **tokens = 0; + if(token_file){ + size_t n; + tokens = read_tokens(token_file, &n); + } + + srand(rseed); + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int i, j; + for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp; + int c = 0; + float *input = calloc(inputs, sizeof(float)); + float *out = 0; + + while((c = getc(stdin)) != EOF){ + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + for(i = 0; i < num; ++i){ + for(j = 0; j < inputs; ++j){ + if (out[j] < .0001) out[j] = 0; + } + int next = sample_array(out, inputs); + if(c == '.' && next == '\n') break; + c = next; + print_symbol(c, tokens); + + input[c] = 1; + out = network_predict(net, input); + input[c] = 0; + } + printf("\n"); +} + +void valid_tactic_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int count = 0; + int words = 1; + int c; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + for(i = 0; i < len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + float sum = 0; + c = getc(stdin); + float log2 = log(2); + int in = 0; + while(c != EOF){ + int next = getc(stdin); + if(next == EOF) break; + if(next < 0 || next >= 255) error("Out of range character"); + + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + + if(c == '.' && next == '\n') in = 0; + if(!in) { + if(c == '>' && next == '>'){ + in = 1; + ++words; + } + c = next; + continue; + } + ++count; + sum += log(out[next])/log2; + c = next; + printf("%d %d Perplexity: %4.4f Word Perplexity: %4.4f\n", count, words, pow(2, -sum/count), pow(2, -sum/words)); + } +} + +void valid_char_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int count = 0; + int words = 1; + int c; + int len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + for(i = 0; i < len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + float sum = 0; + c = getc(stdin); + float log2 = log(2); + while(c != EOF){ + int next = getc(stdin); + if(next == EOF) break; + if(next < 0 || next >= 255) error("Out of range character"); + ++count; + if(next == ' ' || next == '\n' || next == '\t') ++words; + input[c] = 1; + float *out = network_predict(net, input); + input[c] = 0; + sum += log(out[next])/log2; + c = next; + printf("%d BPC: %4.4f Perplexity: %4.4f Word Perplexity: %4.4f\n", count, -sum/count, pow(2, -sum/count), pow(2, -sum/words)); + } +} + +void vec_char_rnn(char *cfgfile, char *weightfile, char *seed) +{ + char *base = basecfg(cfgfile); + fprintf(stderr, "%s\n", base); + + network *net = load_network(cfgfile, weightfile, 0); + int inputs = net->inputs; + + int c; + int seed_len = strlen(seed); + float *input = calloc(inputs, sizeof(float)); + int i; + char *line; + while((line=fgetl(stdin)) != 0){ + reset_network_state(net, 0); + for(i = 0; i < seed_len; ++i){ + c = seed[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + strip(line); + int str_len = strlen(line); + for(i = 0; i < str_len; ++i){ + c = line[i]; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + } + c = ' '; + input[(int)c] = 1; + network_predict(net, input); + input[(int)c] = 0; + + layer l = net->layers[0]; + #ifdef GPU + cuda_pull_array(l.output_gpu, l.output, l.outputs); + #endif + printf("%s", line); + for(i = 0; i < l.outputs; ++i){ + printf(",%g", l.output[i]); + } + printf("\n"); + } +} + +void run_char_rnn(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + char *filename = find_char_arg(argc, argv, "-file", "data/shakespeare.txt"); + char *seed = find_char_arg(argc, argv, "-seed", "\n\n"); + int len = find_int_arg(argc, argv, "-len", 1000); + float temp = find_float_arg(argc, argv, "-temp", .7); + int rseed = find_int_arg(argc, argv, "-srand", time(0)); + int clear = find_arg(argc, argv, "-clear"); + int tokenized = find_arg(argc, argv, "-tokenized"); + char *tokens = find_char_arg(argc, argv, "-tokens", 0); + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_char_rnn(cfg, weights, filename, clear, tokenized); + else if(0==strcmp(argv[2], "valid")) valid_char_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "validtactic")) valid_tactic_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "vec")) vec_char_rnn(cfg, weights, seed); + else if(0==strcmp(argv[2], "generate")) test_char_rnn(cfg, weights, len, seed, temp, rseed, tokens); + else if(0==strcmp(argv[2], "generatetactic")) test_tactic_rnn(cfg, weights, len, temp, rseed, tokens); +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/rnn_vid.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/rnn_vid.c new file mode 100644 index 0000000000000000000000000000000000000000..e88792352311438d0fcb25bb7befd0677f70bae5 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/rnn_vid.c @@ -0,0 +1,208 @@ +#include "darknet.h" + +#ifdef OPENCV +image get_image_from_stream(CvCapture *cap); +image ipl_to_image(IplImage* src); + +void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters); + + +typedef struct { + float *x; + float *y; +} float_pair; + +float_pair get_rnn_vid_data(network net, char **files, int n, int batch, int steps) +{ + int b; + assert(net.batch == steps + 1); + image out_im = get_network_image(net); + int output_size = out_im.w*out_im.h*out_im.c; + printf("%d %d %d\n", out_im.w, out_im.h, out_im.c); + float *feats = calloc(net.batch*batch*output_size, sizeof(float)); + for(b = 0; b < batch; ++b){ + int input_size = net.w*net.h*net.c; + float *input = calloc(input_size*net.batch, sizeof(float)); + char *filename = files[rand()%n]; + CvCapture *cap = cvCaptureFromFile(filename); + int frames = cvGetCaptureProperty(cap, CV_CAP_PROP_FRAME_COUNT); + int index = rand() % (frames - steps - 2); + if (frames < (steps + 4)){ + --b; + free(input); + continue; + } + + printf("frames: %d, index: %d\n", frames, index); + cvSetCaptureProperty(cap, CV_CAP_PROP_POS_FRAMES, index); + + int i; + for(i = 0; i < net.batch; ++i){ + IplImage* src = cvQueryFrame(cap); + image im = ipl_to_image(src); + rgbgr_image(im); + image re = resize_image(im, net.w, net.h); + //show_image(re, "loaded"); + //cvWaitKey(10); + memcpy(input + i*input_size, re.data, input_size*sizeof(float)); + free_image(im); + free_image(re); + } + float *output = network_predict(net, input); + + free(input); + + for(i = 0; i < net.batch; ++i){ + memcpy(feats + (b + i*batch)*output_size, output + i*output_size, output_size*sizeof(float)); + } + + cvReleaseCapture(&cap); + } + + //printf("%d %d %d\n", out_im.w, out_im.h, out_im.c); + float_pair p = {0}; + p.x = feats; + p.y = feats + output_size*batch; //+ out_im.w*out_im.h*out_im.c; + + return p; +} + + +void train_vid_rnn(char *cfgfile, char *weightfile) +{ + char *train_videos = "data/vid/train.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + + list *plist = get_paths(train_videos); + int N = plist->size; + char **paths = (char **)list_to_array(plist); + clock_t time; + int steps = net.time_steps; + int batch = net.batch / net.time_steps; + + network extractor = parse_network_cfg("cfg/extractor.cfg"); + load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv"); + + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + float_pair p = get_rnn_vid_data(extractor, paths, N, batch, steps); + + copy_cpu(net.inputs*net.batch, p.x, 1, net.input, 1); + copy_cpu(net.truths*net.batch, p.y, 1, net.truth, 1); + float loss = train_network_datum(net) / (net.batch); + + + free(p.x); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time)); + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%10==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + + +image save_reconstruction(network net, image *init, float *feat, char *name, int i) +{ + image recon; + if (init) { + recon = copy_image(*init); + } else { + recon = make_random_image(net.w, net.h, 3); + } + + image update = make_image(net.w, net.h, 3); + reconstruct_picture(net, feat, recon, update, .01, .9, .1, 2, 50); + char buff[256]; + sprintf(buff, "%s%d", name, i); + save_image(recon, buff); + free_image(update); + return recon; +} + +void generate_vid_rnn(char *cfgfile, char *weightfile) +{ + network extractor = parse_network_cfg("cfg/extractor.recon.cfg"); + load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv"); + + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&extractor, 1); + set_batch_network(&net, 1); + + int i; + CvCapture *cap = cvCaptureFromFile("/extra/vid/ILSVRC2015/Data/VID/snippets/val/ILSVRC2015_val_00007030.mp4"); + float *feat; + float *next; + image last; + for(i = 0; i < 25; ++i){ + image im = get_image_from_stream(cap); + image re = resize_image(im, extractor.w, extractor.h); + feat = network_predict(extractor, re.data); + if(i > 0){ + printf("%f %f\n", mean_array(feat, 14*14*512), variance_array(feat, 14*14*512)); + printf("%f %f\n", mean_array(next, 14*14*512), variance_array(next, 14*14*512)); + printf("%f\n", mse_array(feat, 14*14*512)); + axpy_cpu(14*14*512, -1, feat, 1, next, 1); + printf("%f\n", mse_array(next, 14*14*512)); + } + next = network_predict(net, feat); + + free_image(im); + + free_image(save_reconstruction(extractor, 0, feat, "feat", i)); + free_image(save_reconstruction(extractor, 0, next, "next", i)); + if (i==24) last = copy_image(re); + free_image(re); + } + for(i = 0; i < 30; ++i){ + next = network_predict(net, next); + image new = save_reconstruction(extractor, &last, next, "new", i); + free_image(last); + last = new; + } +} + +void run_vid_rnn(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + //char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_vid_rnn(cfg, weights); + else if(0==strcmp(argv[2], "generate")) generate_vid_rnn(cfg, weights); +} +#else +void run_vid_rnn(int argc, char **argv){} +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/segmenter.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/segmenter.c new file mode 100644 index 0000000000000000000000000000000000000000..2e7cea0b730754b74a125bcd865aa12f0bdd3be0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/segmenter.c @@ -0,0 +1,255 @@ +#include "darknet.h" +#include +#include + +void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display) +{ + int i; + + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + printf("%d\n", ngpus); + network **nets = calloc(ngpus, sizeof(network*)); + + srand(time(0)); + int seed = rand(); + for(i = 0; i < ngpus; ++i){ + srand(seed); +#ifdef GPU + cuda_set_device(gpus[i]); +#endif + nets[i] = load_network(cfgfile, weightfile, clear); + nets[i]->learning_rate *= ngpus; + } + srand(time(0)); + network *net = nets[0]; + image pred = get_network_image(net); + + int div = net->w/pred.w; + assert(pred.w * div == net->w); + assert(pred.h * div == net->h); + + int imgs = net->batch * net->subdivisions * ngpus; + + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + list *options = read_data_cfg(datacfg); + + char *backup_directory = option_find_str(options, "backup", "/backup/"); + char *train_list = option_find_str(options, "train", "data/train.list"); + + list *plist = get_paths(train_list); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.threads = 32; + args.scale = div; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + args.size = net->w; + args.classes = 80; + + args.paths = paths; + args.n = imgs; + args.m = N; + args.type = SEGMENTATION_DATA; + + data train; + data buffer; + pthread_t load_thread; + args.d = &buffer; + load_thread = load_data(args); + + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + double time = what_time_is_it_now(); + + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data(args); + + printf("Loaded: %lf seconds\n", what_time_is_it_now()-time); + time = what_time_is_it_now(); + + float loss = 0; +#ifdef GPU + if(ngpus == 1){ + loss = train_network(net, train); + } else { + loss = train_networks(nets, ngpus, train, 4); + } +#else + loss = train_network(net, train); +#endif + if(display){ + image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]); + image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]); + image mask = mask_to_rgb(tr); + image prmask = mask_to_rgb(pred); + show_image(im, "input", 1); + show_image(prmask, "pred", 1); + show_image(mask, "truth", 100); + free_image(mask); + free_image(prmask); + } + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void predict_segmenter(char *datafile, char *cfg, char *weights, char *filename) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image sized = letterbox_image(im, net->w, net->h); + + float *X = sized.data; + time=clock(); + float *predictions = network_predict(net, X); + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + printf("Predicted: %f\n", predictions[0]); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + show_image(sized, "orig", 1); + show_image(prmask, "pred", 0); + free_image(im); + free_image(sized); + free_image(prmask); + if (filename) break; + } +} + + +void demo_segmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename) +{ +#ifdef OPENCV + printf("Classifier Demo\n"); + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + + srand(2222222); + void * cap = open_video_stream(filename, cam_index, 0,0,0); + + if(!cap) error("Couldn't connect to webcam.\n"); + float fps = 0; + + while(1){ + struct timeval tval_before, tval_after, tval_result; + gettimeofday(&tval_before, NULL); + + image in = get_image_from_stream(cap); + image in_s = letterbox_image(in, net->w, net->h); + + network_predict(net, in_s.data); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.0f\n",fps); + + image pred = get_network_image(net); + image prmask = mask_to_rgb(pred); + show_image(prmask, "Segmenter", 10); + + free_image(in_s); + free_image(in); + free_image(prmask); + + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float curr = 1000000.f/((long int)tval_result.tv_usec); + fps = .9*fps + .1*curr; + } +#endif +} + + +void run_segmenter(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); + int *gpus = 0; + int gpu = 0; + int ngpus = 0; + if(gpu_list){ + printf("%s\n", gpu_list); + int len = strlen(gpu_list); + ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++ngpus; + } + gpus = calloc(ngpus, sizeof(int)); + for(i = 0; i < ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpu = gpu_index; + gpus = &gpu; + ngpus = 1; + } + + int cam_index = find_int_arg(argc, argv, "-c", 0); + int clear = find_arg(argc, argv, "-clear"); + int display = find_arg(argc, argv, "-display"); + char *data = argv[3]; + char *cfg = argv[4]; + char *weights = (argc > 5) ? argv[5] : 0; + char *filename = (argc > 6) ? argv[6]: 0; + if(0==strcmp(argv[2], "test")) predict_segmenter(data, cfg, weights, filename); + else if(0==strcmp(argv[2], "train")) train_segmenter(data, cfg, weights, gpus, ngpus, clear, display); + else if(0==strcmp(argv[2], "demo")) demo_segmenter(data, cfg, weights, cam_index, filename); +} + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/super.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/super.c new file mode 100644 index 0000000000000000000000000000000000000000..d34406b1f2ce70cd36eecb8298bf1ca3e736f01b --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/super.c @@ -0,0 +1,120 @@ +#include "darknet.h" + +void train_super(char *cfgfile, char *weightfile, int clear) +{ + char *train_images = "/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, clear); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.scale = 4; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = SUPER_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void test_super(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + resize_network(net, im.w, im.h); + printf("%d %d\n", im.w, im.h); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image(net); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + show_image(out, "out", 0); + + free_image(im); + if (filename) break; + } +} + + +void run_super(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + int clear = find_arg(argc, argv, "-clear"); + if(0==strcmp(argv[2], "train")) train_super(cfg, weights, clear); + else if(0==strcmp(argv[2], "test")) test_super(cfg, weights, filename); + /* + else if(0==strcmp(argv[2], "valid")) validate_super(cfg, weights); + */ +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/swag.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/swag.c new file mode 100644 index 0000000000000000000000000000000000000000..c22d7855c46a975ecd1e94a60f9b7059bc288fee --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/swag.c @@ -0,0 +1,83 @@ +#include "darknet.h" +#include + +void train_swag(char *cfgfile, char *weightfile) +{ + char *train_images = "data/voc.0712.trainval"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + data train, buffer; + + layer l = net.layers[net.n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || i == 600){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void run_swag(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_swag(cfg, weights); +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/tag.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/tag.c new file mode 100644 index 0000000000000000000000000000000000000000..4caf8cba18f39f62deb54ea913fd40c194b3e33c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/tag.c @@ -0,0 +1,140 @@ +#include "darknet.h" + +void train_tag(char *cfgfile, char *weightfile, int clear) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network *net = load_network(cfgfile, weightfile, clear); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = 1024; + list *plist = get_paths("/home/pjreddie/tag/train.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + int N = plist->size; + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + + args.min = net->w; + args.max = net->max_crop; + args.size = net->w; + + args.paths = paths; + args.classes = net->outputs; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = TAG_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + fprintf(stderr, "%d classes\n", net->outputs); + + load_thread = load_data_in_thread(args); + int epoch = (*net->seen)/N; + while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); + free_data(train); + if(*net->seen/N > epoch){ + epoch = *net->seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s.backup",backup_directory,base); + save_weights(net, buff); + } + } + char buff[256]; + sprintf(buff, "%s/%s.weights", backup_directory, base); + save_weights(net, buff); + + pthread_join(load_thread, 0); + free_data(buffer); + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void test_tag(char *cfgfile, char *weightfile, char *filename) +{ + network *net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + srand(2222222); + int i = 0; + char **names = get_labels("data/tags.txt"); + clock_t time; + int indexes[10]; + char buff[256]; + char *input = buff; + int size = net->w; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + image r = resize_min(im, size); + resize_network(net, r.w, r.h); + printf("%d %d\n", r.w, r.h); + + float *X = r.data; + time=clock(); + float *predictions = network_predict(net, X); + top_predictions(net, 10, indexes); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + for(i = 0; i < 10; ++i){ + int index = indexes[i]; + printf("%.1f%%: %s\n", predictions[index]*100, names[index]); + } + if(r.data != im.data) free_image(r); + free_image(im); + if (filename) break; + } +} + + +void run_tag(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int clear = find_arg(argc, argv, "-clear"); + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_tag(cfg, weights, clear); + else if(0==strcmp(argv[2], "test")) test_tag(cfg, weights, filename); +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/voxel.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/voxel.c new file mode 100644 index 0000000000000000000000000000000000000000..01ea9bb98987590227758364bbfff50996cf9a2d --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/voxel.c @@ -0,0 +1,161 @@ +#include "darknet.h" + +void extract_voxel(char *lfile, char *rfile, char *prefix) +{ +#ifdef OPENCV + int w = 1920; + int h = 1080; + int shift = 0; + int count = 0; + CvCapture *lcap = cvCaptureFromFile(lfile); + CvCapture *rcap = cvCaptureFromFile(rfile); + while(1){ + image l = get_image_from_stream(lcap); + image r = get_image_from_stream(rcap); + if(!l.w || !r.w) break; + if(count%100 == 0) { + shift = best_3d_shift_r(l, r, -l.h/100, l.h/100); + printf("%d\n", shift); + } + image ls = crop_image(l, (l.w - w)/2, (l.h - h)/2, w, h); + image rs = crop_image(r, 105 + (r.w - w)/2, (r.h - h)/2 + shift, w, h); + char buff[256]; + sprintf(buff, "%s_%05d_l", prefix, count); + save_image(ls, buff); + sprintf(buff, "%s_%05d_r", prefix, count); + save_image(rs, buff); + free_image(l); + free_image(r); + free_image(ls); + free_image(rs); + ++count; + } + +#else + printf("need OpenCV for extraction\n"); +#endif +} + +void train_voxel(char *cfgfile, char *weightfile) +{ + char *train_images = "/data/imagenet/imagenet1k.train.list"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + int i = *net.seen/imgs; + data train, buffer; + + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.scale = 4; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.d = &buffer; + args.type = SUPER_DATA; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net.max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + if(i%100==0){ + char buff[256]; + sprintf(buff, "%s/%s.backup", backup_directory, base); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void test_voxel(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input, 0, 0); + resize_network(&net, im.w, im.h); + printf("%d %d\n", im.w, im.h); + + float *X = im.data; + time=clock(); + network_predict(net, X); + image out = get_network_image(net); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + save_image(out, "out"); + + free_image(im); + if (filename) break; + } +} + + +void run_voxel(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_voxel(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_voxel(cfg, weights, filename); + else if(0==strcmp(argv[2], "extract")) extract_voxel(argv[3], argv[4], argv[5]); + /* + else if(0==strcmp(argv[2], "valid")) validate_voxel(cfg, weights); + */ +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/writing.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/writing.c new file mode 100644 index 0000000000000000000000000000000000000000..1b6ff83b5838b654e0fd1b6664156daf6d7a889b --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/writing.c @@ -0,0 +1,144 @@ +#include "darknet.h" + +void train_writing(char *cfgfile, char *weightfile) +{ + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = net.batch*net.subdivisions; + list *plist = get_paths("figures.list"); + char **paths = (char **)list_to_array(plist); + clock_t time; + int N = plist->size; + printf("N: %d\n", N); + image out = get_network_image(net); + + data train, buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.out_w = out.w; + args.out_h = out.h; + args.paths = paths; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = WRITING_DATA; + + pthread_t load_thread = load_data_in_thread(args); + int epoch = (*net.seen)/N; + while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + printf("Loaded %lf seconds\n",sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + + /* + image pred = float_to_image(64, 64, 1, out); + print_image(pred); + */ + + /* + image im = float_to_image(256, 256, 3, train.X.vals[0]); + image lab = float_to_image(64, 64, 1, train.y.vals[0]); + image pred = float_to_image(64, 64, 1, out); + show_image(im, "image"); + show_image(lab, "label"); + print_image(lab); + show_image(pred, "pred"); + cvWaitKey(0); + */ + + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + free_data(train); + if(get_current_batch(net)%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_batch_%ld.weights", backup_directory, base, get_current_batch(net)); + save_weights(net, buff); + } + if(*net.seen/N > epoch){ + epoch = *net.seen/N; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + } + } +} + +void test_writing(char *cfgfile, char *weightfile, char *filename) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + set_batch_network(&net, 1); + srand(2222222); + clock_t time; + char buff[256]; + char *input = buff; + while(1){ + if(filename){ + strncpy(input, filename, 256); + }else{ + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + + image im = load_image_color(input, 0, 0); + resize_network(&net, im.w, im.h); + printf("%d %d %d\n", im.h, im.w, im.c); + float *X = im.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + image pred = get_network_image(net); + + image upsampled = resize_image(pred, im.w, im.h); + image thresh = threshold_image(upsampled, .5); + pred = thresh; + + show_image(pred, "prediction"); + show_image(im, "orig"); +#ifdef OPENCV + cvWaitKey(0); + cvDestroyAllWindows(); +#endif + + free_image(upsampled); + free_image(thresh); + free_image(im); + if (filename) break; + } +} + +void run_writing(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5] : 0; + if(0==strcmp(argv[2], "train")) train_writing(cfg, weights); + else if(0==strcmp(argv[2], "test")) test_writing(cfg, weights, filename); +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/examples/yolo.c b/workloads/realworld/uvm_prefetch_async/darknet/examples/yolo.c new file mode 100644 index 0000000000000000000000000000000000000000..4ddb69a3e53b2123ccb89026645a66c044047faa --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/examples/yolo.c @@ -0,0 +1,327 @@ +#include "darknet.h" + +char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; + +void train_yolo(char *cfgfile, char *weightfile) +{ + char *train_images = "/data/voc/train.txt"; + char *backup_directory = "/home/pjreddie/backup/"; + srand(time(0)); + char *base = basecfg(cfgfile); + printf("%s\n", base); + float avg_loss = -1; + network *net = load_network(cfgfile, weightfile, 0); + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + int imgs = net->batch*net->subdivisions; + int i = *net->seen/imgs; + data train, buffer; + + + layer l = net->layers[net->n - 1]; + + int side = l.side; + int classes = l.classes; + float jitter = l.jitter; + + list *plist = get_paths(train_images); + //int N = plist->size; + char **paths = (char **)list_to_array(plist); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.paths = paths; + args.n = imgs; + args.m = plist->size; + args.classes = classes; + args.jitter = jitter; + args.num_boxes = side; + args.d = &buffer; + args.type = REGION_DATA; + + args.angle = net->angle; + args.exposure = net->exposure; + args.saturation = net->saturation; + args.hue = net->hue; + + pthread_t load_thread = load_data_in_thread(args); + clock_t time; + //while(i*imgs < N*120){ + while(get_current_batch(net) < net->max_batches){ + i += 1; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + load_thread = load_data_in_thread(args); + + printf("Loaded: %lf seconds\n", sec(clock()-time)); + + time=clock(); + float loss = train_network(net, train); + if (avg_loss < 0) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + + printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); + if(i%1000==0 || (i < 1000 && i%100 == 0)){ + char buff[256]; + sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + save_weights(net, buff); + } + free_data(train); + } + char buff[256]; + sprintf(buff, "%s/%s_final.weights", backup_directory, base); + save_weights(net, buff); +} + +void print_yolo_detections(FILE **fps, char *id, int total, int classes, int w, int h, detection *dets) +{ + int i, j; + for(i = 0; i < total; ++i){ + float xmin = dets[i].bbox.x - dets[i].bbox.w/2.; + float xmax = dets[i].bbox.x + dets[i].bbox.w/2.; + float ymin = dets[i].bbox.y - dets[i].bbox.h/2.; + float ymax = dets[i].bbox.y + dets[i].bbox.h/2.; + + if (xmin < 0) xmin = 0; + if (ymin < 0) ymin = 0; + if (xmax > w) xmax = w; + if (ymax > h) ymax = h; + + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], + xmin, ymin, xmax, ymax); + } + } +} + +void validate_yolo(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + //list *plist = get_paths("data/voc.2007.test"); + list *plist = get_paths("/home/pjreddie/data/voc/2007_test.txt"); + //list *plist = get_paths("data/voc.2012.test"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + + int j; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + int t; + + float thresh = .001; + int nms = 1; + float iou_thresh = .5; + + int nthreads = 8; + image *val = calloc(nthreads, sizeof(image)); + image *val_resized = calloc(nthreads, sizeof(image)); + image *buf = calloc(nthreads, sizeof(image)); + image *buf_resized = calloc(nthreads, sizeof(image)); + pthread_t *thr = calloc(nthreads, sizeof(pthread_t)); + + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.type = IMAGE_DATA; + + for(t = 0; t < nthreads; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + time_t start = time(0); + for(i = nthreads; i < m+nthreads; i += nthreads){ + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + val_resized[t] = buf_resized[t]; + } + for(t = 0; t < nthreads && i+t < m; ++t){ + args.path = paths[i+t]; + args.im = &buf[t]; + args.resized = &buf_resized[t]; + thr[t] = load_data_in_thread(args); + } + for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ + char *path = paths[i+t-nthreads]; + char *id = basecfg(path); + float *X = val_resized[t].data; + network_predict(net, X); + int w = val[t].w; + int h = val[t].h; + int nboxes = 0; + detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh); + print_yolo_detections(fps, id, l.side*l.side*l.n, classes, w, h, dets); + free_detections(dets, nboxes); + free(id); + free_image(val[t]); + free_image(val_resized[t]); + } + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); +} + +void validate_yolo_recall(char *cfg, char *weights) +{ + network *net = load_network(cfg, weights, 0); + set_batch_network(net, 1); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); + srand(time(0)); + + char *base = "results/comp4_det_test_"; + list *plist = get_paths("data/voc.2007.test"); + char **paths = (char **)list_to_array(plist); + + layer l = net->layers[net->n-1]; + int classes = l.classes; + int side = l.side; + + int j, k; + FILE **fps = calloc(classes, sizeof(FILE *)); + for(j = 0; j < classes; ++j){ + char buff[1024]; + snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); + fps[j] = fopen(buff, "w"); + } + + int m = plist->size; + int i=0; + + float thresh = .001; + float iou_thresh = .5; + float nms = 0; + + int total = 0; + int correct = 0; + int proposals = 0; + float avg_iou = 0; + + for(i = 0; i < m; ++i){ + char *path = paths[i]; + image orig = load_image_color(path, 0, 0); + image sized = resize_image(orig, net->w, net->h); + char *id = basecfg(path); + network_predict(net, sized.data); + + int nboxes = 0; + detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes); + if (nms) do_nms_obj(dets, side*side*l.n, 1, nms); + + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int num_labels = 0; + box_label *truth = read_boxes(labelpath, &num_labels); + for(k = 0; k < side*side*l.n; ++k){ + if(dets[k].objectness > thresh){ + ++proposals; + } + } + for (j = 0; j < num_labels; ++j) { + ++total; + box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; + float best_iou = 0; + for(k = 0; k < side*side*l.n; ++k){ + float iou = box_iou(dets[k].bbox, t); + if(dets[k].objectness > thresh && iou > best_iou){ + best_iou = iou; + } + } + avg_iou += best_iou; + if(best_iou > iou_thresh){ + ++correct; + } + } + + fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); + free_detections(dets, nboxes); + free(id); + free_image(orig); + free_image(sized); + } +} + +void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) +{ + image **alphabet = load_alphabet(); + network *net = load_network(cfgfile, weightfile, 0); + layer l = net->layers[net->n-1]; + set_batch_network(net, 1); + srand(2222222); + clock_t time; + char buff[256]; + char *input = buff; + float nms=.4; + while(1){ + if(filename){ + strncpy(input, filename, 256); + } else { + printf("Enter Image Path: "); + fflush(stdout); + input = fgets(input, 256, stdin); + if(!input) return; + strtok(input, "\n"); + } + image im = load_image_color(input,0,0); + image sized = resize_image(im, net->w, net->h); + float *X = sized.data; + time=clock(); + network_predict(net, X); + printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); + + int nboxes = 0; + detection *dets = get_network_boxes(net, 1, 1, thresh, 0, 0, 0, &nboxes); + if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms); + + draw_detections(im, dets, l.side*l.side*l.n, thresh, voc_names, alphabet, 20); + save_image(im, "predictions"); + show_image(im, "predictions", 0); + free_detections(dets, nboxes); + free_image(im); + free_image(sized); + if (filename) break; + } +} + +void run_yolo(int argc, char **argv) +{ + char *prefix = find_char_arg(argc, argv, "-prefix", 0); + float thresh = find_float_arg(argc, argv, "-thresh", .2); + int cam_index = find_int_arg(argc, argv, "-c", 0); + int frame_skip = find_int_arg(argc, argv, "-s", 0); + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + int avg = find_int_arg(argc, argv, "-avg", 1); + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "test")) test_yolo(cfg, weights, filename, thresh); + else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights); + else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights); + else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix, avg, .5, 0,0,0,0); +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/include/darknet.h b/workloads/realworld/uvm_prefetch_async/darknet/include/darknet.h new file mode 100644 index 0000000000000000000000000000000000000000..7be8225e2d39f079ca0a15da6980b42f8966af40 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/include/darknet.h @@ -0,0 +1,810 @@ +#ifndef DARKNET_API +#define DARKNET_API +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef GPU + #define GPU_DEVICE 7 + #define BLOCK 512 + + #include "cuda_runtime.h" + #include "curand.h" + #include "cublas_v2.h" + + #ifdef CUDNN + #include "cudnn.h" + #endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define SECRET_NUM -1234 +extern int gpu_index; + +typedef struct{ + int classes; + char **names; +} metadata; + +metadata get_metadata(char *file); + +typedef struct{ + int *leaf; + int n; + int *parent; + int *child; + int *group; + char **name; + + int groups; + int *group_size; + int *group_offset; +} tree; +tree *read_tree(char *filename); + +typedef enum{ + LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU +} ACTIVATION; + +typedef enum{ + PNG, BMP, TGA, JPG +} IMTYPE; + +typedef enum{ + MULT, ADD, SUB, DIV +} BINARY_ACTIVATION; + +typedef enum { + CONVOLUTIONAL, + DECONVOLUTIONAL, + CONNECTED, + MAXPOOL, + SOFTMAX, + DETECTION, + DROPOUT, + CROP, + ROUTE, + COST, + NORMALIZATION, + AVGPOOL, + LOCAL, + SHORTCUT, + ACTIVE, + RNN, + GRU, + LSTM, + CRNN, + BATCHNORM, + NETWORK, + XNOR, + REGION, + YOLO, + ISEG, + REORG, + UPSAMPLE, + LOGXENT, + L2NORM, + BLANK +} LAYER_TYPE; + +typedef enum{ + SSE, MASKED, L1, SEG, SMOOTH,WGAN +} COST_TYPE; + +typedef struct{ + int batch; + float learning_rate; + float momentum; + float decay; + int adam; + float B1; + float B2; + float eps; + int t; +} update_args; + +struct network; +typedef struct network network; + +struct layer; +typedef struct layer layer; + +struct layer{ + LAYER_TYPE type; + ACTIVATION activation; + COST_TYPE cost_type; + void (*forward) (struct layer, struct network); + void (*backward) (struct layer, struct network); + void (*update) (struct layer, update_args); + void (*forward_gpu) (struct layer, struct network); + void (*backward_gpu) (struct layer, struct network); + void (*update_gpu) (struct layer, update_args); + int batch_normalize; + int shortcut; + int batch; + int forced; + int flipped; + int inputs; + int outputs; + int nweights; + int nbiases; + int extra; + int truths; + int h,w,c; + int out_h, out_w, out_c; + int n; + int max_boxes; + int groups; + int size; + int side; + int stride; + int reverse; + int flatten; + int spatial; + int pad; + int sqrt; + int flip; + int index; + int binary; + int xnor; + int steps; + int hidden; + int truth; + float smooth; + float dot; + float angle; + float jitter; + float saturation; + float exposure; + float shift; + float ratio; + float learning_rate_scale; + float clip; + int noloss; + int softmax; + int classes; + int coords; + int background; + int rescore; + int objectness; + int joint; + int noadjust; + int reorg; + int log; + int tanh; + int *mask; + int total; + + float alpha; + float beta; + float kappa; + + float coord_scale; + float object_scale; + float noobject_scale; + float mask_scale; + float class_scale; + int bias_match; + int random; + float ignore_thresh; + float truth_thresh; + float thresh; + float focus; + int classfix; + int absolute; + + int onlyforward; + int stopbackward; + int dontload; + int dontsave; + int dontloadscales; + int numload; + + float temperature; + float probability; + float scale; + + char * cweights; + int * indexes; + int * input_layers; + int * input_sizes; + int * map; + int * counts; + float ** sums; + float * rand; + float * cost; + float * state; + float * prev_state; + float * forgot_state; + float * forgot_delta; + float * state_delta; + float * combine_cpu; + float * combine_delta_cpu; + + float * concat; + float * concat_delta; + + float * binary_weights; + + float * biases; + float * bias_updates; + + float * scales; + float * scale_updates; + + float * weights; + float * weight_updates; + + float * delta; + float * output; + float * loss; + float * squared; + float * norms; + + float * spatial_mean; + float * mean; + float * variance; + + float * mean_delta; + float * variance_delta; + + float * rolling_mean; + float * rolling_variance; + + float * x; + float * x_norm; + + float * m; + float * v; + + float * bias_m; + float * bias_v; + float * scale_m; + float * scale_v; + + + float *z_cpu; + float *r_cpu; + float *h_cpu; + float * prev_state_cpu; + + float *temp_cpu; + float *temp2_cpu; + float *temp3_cpu; + + float *dh_cpu; + float *hh_cpu; + float *prev_cell_cpu; + float *cell_cpu; + float *f_cpu; + float *i_cpu; + float *g_cpu; + float *o_cpu; + float *c_cpu; + float *dc_cpu; + + float * binary_input; + + struct layer *input_layer; + struct layer *self_layer; + struct layer *output_layer; + + struct layer *reset_layer; + struct layer *update_layer; + struct layer *state_layer; + + struct layer *input_gate_layer; + struct layer *state_gate_layer; + struct layer *input_save_layer; + struct layer *state_save_layer; + struct layer *input_state_layer; + struct layer *state_state_layer; + + struct layer *input_z_layer; + struct layer *state_z_layer; + + struct layer *input_r_layer; + struct layer *state_r_layer; + + struct layer *input_h_layer; + struct layer *state_h_layer; + + struct layer *wz; + struct layer *uz; + struct layer *wr; + struct layer *ur; + struct layer *wh; + struct layer *uh; + struct layer *uo; + struct layer *wo; + struct layer *uf; + struct layer *wf; + struct layer *ui; + struct layer *wi; + struct layer *ug; + struct layer *wg; + + tree *softmax_tree; + + size_t workspace_size; + +#ifdef GPU + int *indexes_gpu; + + float *z_gpu; + float *r_gpu; + float *h_gpu; + + float *temp_gpu; + float *temp2_gpu; + float *temp3_gpu; + + float *dh_gpu; + float *hh_gpu; + float *prev_cell_gpu; + float *cell_gpu; + float *f_gpu; + float *i_gpu; + float *g_gpu; + float *o_gpu; + float *c_gpu; + float *dc_gpu; + + float *m_gpu; + float *v_gpu; + float *bias_m_gpu; + float *scale_m_gpu; + float *bias_v_gpu; + float *scale_v_gpu; + + float * combine_gpu; + float * combine_delta_gpu; + + float * prev_state_gpu; + float * forgot_state_gpu; + float * forgot_delta_gpu; + float * state_gpu; + float * state_delta_gpu; + float * gate_gpu; + float * gate_delta_gpu; + float * save_gpu; + float * save_delta_gpu; + float * concat_gpu; + float * concat_delta_gpu; + + float * binary_input_gpu; + float * binary_weights_gpu; + + float * mean_gpu; + float * variance_gpu; + + float * rolling_mean_gpu; + float * rolling_variance_gpu; + + float * variance_delta_gpu; + float * mean_delta_gpu; + + float * x_gpu; + float * x_norm_gpu; + float * weights_gpu; + float * weight_updates_gpu; + float * weight_change_gpu; + + float * biases_gpu; + float * bias_updates_gpu; + float * bias_change_gpu; + + float * scales_gpu; + float * scale_updates_gpu; + float * scale_change_gpu; + + float * output_gpu; + float * loss_gpu; + float * delta_gpu; + float * rand_gpu; + float * squared_gpu; + float * norms_gpu; +#ifdef CUDNN + cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc; + cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc; + cudnnTensorDescriptor_t normTensorDesc; + cudnnFilterDescriptor_t weightDesc; + cudnnFilterDescriptor_t dweightDesc; + cudnnConvolutionDescriptor_t convDesc; + cudnnConvolutionFwdAlgo_t fw_algo; + cudnnConvolutionBwdDataAlgo_t bd_algo; + cudnnConvolutionBwdFilterAlgo_t bf_algo; +#endif +#endif +}; + +void free_layer(layer); + +typedef enum { + CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM +} learning_rate_policy; + +typedef struct network{ + int n; + int batch; + size_t *seen; + int *t; + float epoch; + int subdivisions; + layer *layers; + float *output; + learning_rate_policy policy; + + float learning_rate; + float momentum; + float decay; + float gamma; + float scale; + float power; + int time_steps; + int step; + int max_batches; + float *scales; + int *steps; + int num_steps; + int burn_in; + + int adam; + float B1; + float B2; + float eps; + + int inputs; + int outputs; + int truths; + int notruth; + int h, w, c; + int max_crop; + int min_crop; + float max_ratio; + float min_ratio; + int center; + float angle; + float aspect; + float exposure; + float saturation; + float hue; + int random; + + int gpu_index; + tree *hierarchy; + + float *input; + float *truth; + float *delta; + float *workspace; + int train; + int index; + float *cost; + float clip; + +#ifdef GPU + float *input_gpu; + float *truth_gpu; + float *delta_gpu; + float *output_gpu; +#endif + +} network; + +typedef struct { + int w; + int h; + float scale; + float rad; + float dx; + float dy; + float aspect; +} augment_args; + +typedef struct { + int w; + int h; + int c; + float *data; +} image; + +typedef struct{ + float x, y, w, h; +} box; + +typedef struct detection{ + box bbox; + int classes; + float *prob; + float *mask; + float objectness; + int sort_class; +} detection; + +typedef struct matrix{ + int rows, cols; + float **vals; +} matrix; + + +typedef struct{ + int w, h; + matrix X; + matrix y; + int shallow; + int *num_boxes; + box **boxes; +} data; + +typedef enum { + CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA, ISEG_DATA +} data_type; + +typedef struct load_args{ + int threads; + char **paths; + char *path; + int n; + int m; + char **labels; + int h; + int w; + int out_w; + int out_h; + int nh; + int nw; + int num_boxes; + int min, max, size; + int classes; + int background; + int scale; + int center; + int coords; + float jitter; + float angle; + float aspect; + float saturation; + float exposure; + float hue; + data *d; + image *im; + image *resized; + data_type type; + tree *hierarchy; +} load_args; + +typedef struct{ + int id; + float x,y,w,h; + float left, right, top, bottom; +} box_label; + + +network *load_network(char *cfg, char *weights, int clear); +load_args get_base_args(network *net); + +void free_data(data d); + +typedef struct node{ + void *val; + struct node *next; + struct node *prev; +} node; + +typedef struct list{ + int size; + node *front; + node *back; +} list; + +pthread_t load_data(load_args args); +list *read_data_cfg(char *filename); +list *read_cfg(char *filename); +unsigned char *read_file(char *filename); +data resize_data(data orig, int w, int h); +data *tile_data(data orig, int divs, int size); +data select_data(data *orig, int *inds); + +void forward_network(network *net); +void backward_network(network *net); +void update_network(network *net); + + +float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); +void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void copy_cpu(int N, float *X, int INCX, float *Y, int INCY); +void scal_cpu(int N, float ALPHA, float *X, int INCX); +void fill_cpu(int N, float ALPHA, float * X, int INCX); +void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); +void softmax(float *input, int n, float temp, int stride, float *output); + +int best_3d_shift_r(image a, image b, int min, int max); +#ifdef GPU +void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); +void fill_gpu(int N, float ALPHA, float * X, int INCX); +void scal_gpu(int N, float ALPHA, float * X, int INCX); +void copy_gpu(int N, float * X, int INCX, float * Y, int INCY); + +void cuda_set_device(int n); +void cuda_free(float *x_gpu); +float *cuda_make_array(float *x, size_t n); +void cuda_pull_array(float *x_gpu, float *x, size_t n); +float cuda_mag_array(float *x_gpu, size_t n); +void cuda_push_array(float *x_gpu, float *x, size_t n); + +void forward_network_gpu(network *net); +void backward_network_gpu(network *net); +void update_network_gpu(network *net); + +float train_networks(network **nets, int n, data d, int interval); +void sync_nets(network **nets, int n, int interval); +void harmless_update_network_gpu(network *net); +#endif +image get_label(image **characters, char *string, int size); +void draw_label(image a, int r, int c, image label, const float *rgb); +void save_image(image im, const char *name); +void save_image_options(image im, const char *name, IMTYPE f, int quality); +void get_next_batch(data d, int n, int offset, float *X, float *y); +void grayscale_image_3c(image im); +void normalize_image(image p); +void matrix_to_csv(matrix m); +float train_network_sgd(network *net, data d, int n); +void rgbgr_image(image im); +data copy_data(data d); +data concat_data(data d1, data d2); +data load_cifar10_data(char *filename); +float matrix_topk_accuracy(matrix truth, matrix guess, int k); +void matrix_add_matrix(matrix from, matrix to); +void scale_matrix(matrix m, float scale); +matrix csv_to_matrix(char *filename); +float *network_accuracies(network *net, data d, int n); +float train_network_datum(network *net); +image make_random_image(int w, int h, int c); + +void denormalize_connected_layer(layer l); +void denormalize_convolutional_layer(layer l); +void statistics_connected_layer(layer l); +void rescale_weights(layer l, float scale, float trans); +void rgbgr_weights(layer l); +image *get_weights(layer l); + +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, int avg, float hier_thresh, int w, int h, int fps, int fullscreen); +void get_detection_detections(layer l, int w, int h, float thresh, detection *dets); + +char *option_find_str(list *l, char *key, char *def); +int option_find_int(list *l, char *key, int def); +int option_find_int_quiet(list *l, char *key, int def); + +network *parse_network_cfg(char *filename); +void save_weights(network *net, char *filename); +void load_weights(network *net, char *filename); +void save_weights_upto(network *net, char *filename, int cutoff); +void load_weights_upto(network *net, char *filename, int start, int cutoff); + +void zero_objectness(layer l); +void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets); +int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets); +void free_network(network *net); +void set_batch_network(network *net, int b); +void set_temp_network(network *net, float t); +image load_image(char *filename, int w, int h, int c); +image load_image_color(char *filename, int w, int h); +image make_image(int w, int h, int c); +image resize_image(image im, int w, int h); +void censor_image(image im, int dx, int dy, int w, int h); +image letterbox_image(image im, int w, int h); +image crop_image(image im, int dx, int dy, int w, int h); +image center_crop_image(image im, int w, int h); +image resize_min(image im, int min); +image resize_max(image im, int max); +image threshold_image(image im, float thresh); +image mask_to_rgb(image mask); +int resize_network(network *net, int w, int h); +void free_matrix(matrix m); +void test_resize(char *filename); +int show_image(image p, const char *name, int ms); +image copy_image(image p); +void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b); +float get_current_rate(network *net); +void composite_3d(char *f1, char *f2, char *out, int delta); +data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h); +size_t get_current_batch(network *net); +void constrain_image(image im); +image get_network_image_layer(network *net, int i); +layer get_network_output_layer(network *net); +void top_predictions(network *net, int n, int *index); +void flip_image(image a); +image float_to_image(int w, int h, int c, float *data); +void ghost_image(image source, image dest, int dx, int dy); +float network_accuracy(network *net, data d); +void random_distort_image(image im, float hue, float saturation, float exposure); +void fill_image(image m, float s); +image grayscale_image(image im); +void rotate_image_cw(image im, int times); +double what_time_is_it_now(); +image rotate_image(image m, float rad); +void visualize_network(network *net); +float box_iou(box a, box b); +data load_all_cifar10(); +box_label *read_boxes(char *filename, int *n); +box float_to_box(float *f, int stride); +void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes); + +matrix network_predict_data(network *net, data test); +image **load_alphabet(); +image get_network_image(network *net); +float *network_predict(network *net, float *input); + +int network_width(network *net); +int network_height(network *net); +float *network_predict_image(network *net, image im); +void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets); +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num); +void free_detections(detection *dets, int n); + +void reset_network_state(network *net, int b); + +char **get_labels(char *filename); +void do_nms_obj(detection *dets, int total, int classes, float thresh); +void do_nms_sort(detection *dets, int total, int classes, float thresh); + +matrix make_matrix(int rows, int cols); + +#ifdef OPENCV +void *open_video_stream(const char *f, int c, int w, int h, int fps); +image get_image_from_stream(void *p); +void make_window(char *name, int w, int h, int fullscreen); +#endif + +void free_image(image m); +float train_network(network *net, data d); +pthread_t load_data_in_thread(load_args args); +void load_data_blocking(load_args args); +list *get_paths(char *filename); +void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride); +void change_leaves(tree *t, char *leaf_list); + +int find_int_arg(int argc, char **argv, char *arg, int def); +float find_float_arg(int argc, char **argv, char *arg, float def); +int find_arg(int argc, char* argv[], char *arg); +char *find_char_arg(int argc, char **argv, char *arg, char *def); +char *basecfg(char *cfgfile); +void find_replace(char *str, char *orig, char *rep, char *output); +void free_ptrs(void **ptrs, int n); +char *fgetl(FILE *fp); +void strip(char *s); +float sec(clock_t clocks); +void **list_to_array(list *l); +void top_k(float *a, int n, int k, int *index); +int *read_map(char *filename); +void error(const char *s); +int max_index(float *a, int n); +int max_int_index(int *a, int n); +int sample_array(float *a, int n); +int *random_index_order(int min, int max); +void free_list(list *l); +float mse_array(float *a, int n); +float variance_array(float *a, int n); +float mag_array(float *a, int n); +void scale_array(float *a, int n, float s); +float mean_array(float *a, int n); +float sum_array(float *a, int n); +void normalize_array(float *a, int n); +int *read_intlist(char *s, int *n, int d); +size_t rand_size_t(); +float rand_normal(); +float rand_uniform(float min, float max); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/predictions.jpg b/workloads/realworld/uvm_prefetch_async/darknet/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c92d70d77e70e11853e9838ca90b46eb71a18ffa Binary files /dev/null and b/workloads/realworld/uvm_prefetch_async/darknet/predictions.jpg differ diff --git a/workloads/realworld/uvm_prefetch_async/darknet/python/darknet.py b/workloads/realworld/uvm_prefetch_async/darknet/python/darknet.py new file mode 100644 index 0000000000000000000000000000000000000000..b14d24485d86aa69f3991be79ec4f25c2b8e5a59 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/python/darknet.py @@ -0,0 +1,156 @@ +from ctypes import * +import math +import random + +def sample(probs): + s = sum(probs) + probs = [a/s for a in probs] + r = random.uniform(0, 1) + for i in range(len(probs)): + r = r - probs[i] + if r <= 0: + return i + return len(probs)-1 + +def c_array(ctype, values): + arr = (ctype*len(values))() + arr[:] = values + return arr + +class BOX(Structure): + _fields_ = [("x", c_float), + ("y", c_float), + ("w", c_float), + ("h", c_float)] + +class DETECTION(Structure): + _fields_ = [("bbox", BOX), + ("classes", c_int), + ("prob", POINTER(c_float)), + ("mask", POINTER(c_float)), + ("objectness", c_float), + ("sort_class", c_int)] + + +class IMAGE(Structure): + _fields_ = [("w", c_int), + ("h", c_int), + ("c", c_int), + ("data", POINTER(c_float))] + +class METADATA(Structure): + _fields_ = [("classes", c_int), + ("names", POINTER(c_char_p))] + + + +#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL) +lib = CDLL("libdarknet.so", RTLD_GLOBAL) +lib.network_width.argtypes = [c_void_p] +lib.network_width.restype = c_int +lib.network_height.argtypes = [c_void_p] +lib.network_height.restype = c_int + +predict = lib.network_predict +predict.argtypes = [c_void_p, POINTER(c_float)] +predict.restype = POINTER(c_float) + +set_gpu = lib.cuda_set_device +set_gpu.argtypes = [c_int] + +make_image = lib.make_image +make_image.argtypes = [c_int, c_int, c_int] +make_image.restype = IMAGE + +get_network_boxes = lib.get_network_boxes +get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)] +get_network_boxes.restype = POINTER(DETECTION) + +make_network_boxes = lib.make_network_boxes +make_network_boxes.argtypes = [c_void_p] +make_network_boxes.restype = POINTER(DETECTION) + +free_detections = lib.free_detections +free_detections.argtypes = [POINTER(DETECTION), c_int] + +free_ptrs = lib.free_ptrs +free_ptrs.argtypes = [POINTER(c_void_p), c_int] + +network_predict = lib.network_predict +network_predict.argtypes = [c_void_p, POINTER(c_float)] + +reset_rnn = lib.reset_rnn +reset_rnn.argtypes = [c_void_p] + +load_net = lib.load_network +load_net.argtypes = [c_char_p, c_char_p, c_int] +load_net.restype = c_void_p + +do_nms_obj = lib.do_nms_obj +do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +do_nms_sort = lib.do_nms_sort +do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +free_image = lib.free_image +free_image.argtypes = [IMAGE] + +letterbox_image = lib.letterbox_image +letterbox_image.argtypes = [IMAGE, c_int, c_int] +letterbox_image.restype = IMAGE + +load_meta = lib.get_metadata +lib.get_metadata.argtypes = [c_char_p] +lib.get_metadata.restype = METADATA + +load_image = lib.load_image_color +load_image.argtypes = [c_char_p, c_int, c_int] +load_image.restype = IMAGE + +rgbgr_image = lib.rgbgr_image +rgbgr_image.argtypes = [IMAGE] + +predict_image = lib.network_predict_image +predict_image.argtypes = [c_void_p, IMAGE] +predict_image.restype = POINTER(c_float) + +def classify(net, meta, im): + out = predict_image(net, im) + res = [] + for i in range(meta.classes): + res.append((meta.names[i], out[i])) + res = sorted(res, key=lambda x: -x[1]) + return res + +def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): + im = load_image(image, 0, 0) + num = c_int(0) + pnum = pointer(num) + predict_image(net, im) + dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum) + num = pnum[0] + if (nms): do_nms_obj(dets, num, meta.classes, nms); + + res = [] + for j in range(num): + for i in range(meta.classes): + if dets[j].prob[i] > 0: + b = dets[j].bbox + res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h))) + res = sorted(res, key=lambda x: -x[1]) + free_image(im) + free_detections(dets, num) + return res + +if __name__ == "__main__": + #net = load_net("cfg/densenet201.cfg", "/home/pjreddie/trained/densenet201.weights", 0) + #im = load_image("data/wolf.jpg", 0, 0) + #meta = load_meta("cfg/imagenet1k.data") + #r = classify(net, meta, im) + #print r[:10] + net = load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0) + meta = load_meta("cfg/coco.data") + r = detect(net, meta, "data/dog.jpg") + print(r) + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/python/proverbot.py b/workloads/realworld/uvm_prefetch_async/darknet/python/proverbot.py new file mode 100644 index 0000000000000000000000000000000000000000..095aae8f8bf8bbe47ea1768a6e2c948bb0ff8f85 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/python/proverbot.py @@ -0,0 +1,37 @@ +from darknet import * + +def predict_tactic(net, s): + prob = 0 + d = c_array(c_float, [0.0]*256) + tac = '' + if not len(s): + s = '\n' + for c in s[:-1]: + d[ord(c)] = 1 + pred = predict(net, d) + d[ord(c)] = 0 + c = s[-1] + while 1: + d[ord(c)] = 1 + pred = predict(net, d) + d[ord(c)] = 0 + pred = [pred[i] for i in range(256)] + ind = sample(pred) + c = chr(ind) + prob += math.log(pred[ind]) + if len(tac) and tac[-1] == '.': + break + tac = tac + c + return (tac, prob) + +def predict_tactics(net, s, n): + tacs = [] + for i in range(n): + reset_rnn(net) + tacs.append(predict_tactic(net, s)) + tacs = sorted(tacs, key=lambda x: -x[1]) + return tacs + +net = load_net("cfg/coq.test.cfg", "/home/pjreddie/backup/coq.backup", 0) +t = predict_tactics(net, "+++++\n", 10) +print t diff --git a/workloads/realworld/uvm_prefetch_async/darknet/resnet18/run_resnet18.sh b/workloads/realworld/uvm_prefetch_async/darknet/resnet18/run_resnet18.sh new file mode 100755 index 0000000000000000000000000000000000000000..10257ad02affc17f8287ea42917813fe320f5f23 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/resnet18/run_resnet18.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm_prefetch_async/darknet/resnet18/run_super.sh b/workloads/realworld/uvm_prefetch_async/darknet/resnet18/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..10257ad02affc17f8287ea42917813fe320f5f23 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/resnet18/run_super.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm_prefetch_async/darknet/resnet18_b/run_super.sh b/workloads/realworld/uvm_prefetch_async/darknet/resnet18_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..012635a1ce64ecda462e50097be554185989ae7a --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/resnet18_b/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier test ../cfg/imagenet1k.data ../cfg/resnet18_b.cfg diff --git a/workloads/realworld/uvm_prefetch_async/darknet/resnet18_t/run_super.sh b/workloads/realworld/uvm_prefetch_async/darknet/resnet18_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..0eb59b3bd65cf0186c5ed5f36eff5ec34d54298c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/resnet18_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier train ../cfg/imagenet1k.data ../cfg/resnet18_t.cfg \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/darknet/resnet50/run_resnet50.sh b/workloads/realworld/uvm_prefetch_async/darknet/resnet50/run_resnet50.sh new file mode 100755 index 0000000000000000000000000000000000000000..ce88111af06600203c1ae94421134eb3404f51a4 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/resnet50/run_resnet50.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet50.cfg ../../../../../data/darknet/resnet50.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm_prefetch_async/darknet/resnet50/run_super.sh b/workloads/realworld/uvm_prefetch_async/darknet/resnet50/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ce88111af06600203c1ae94421134eb3404f51a4 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/resnet50/run_super.sh @@ -0,0 +1 @@ +../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet50.cfg ../../../../../data/darknet/resnet50.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm_prefetch_async/darknet/resnet50_b/run_super.sh b/workloads/realworld/uvm_prefetch_async/darknet/resnet50_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..e6f1b1d59b612bef36d04af547bf61808261eb12 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/resnet50_b/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier test ../cfg/imagenet1k.data ../cfg/resnet50_b.cfg diff --git a/workloads/realworld/uvm_prefetch_async/darknet/resnet50_t/run_super.sh b/workloads/realworld/uvm_prefetch_async/darknet/resnet50_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..4d4c7feebd1bd5bdcded72e3d4cf58045949ac90 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/resnet50_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet classifier predict ../cfg/imagenet1k.data ../cfg/resnet18.cfg ../../../../../data/darknet/resnet18.weights ../data/dog.jpg +../darknet classifier train ../cfg/imagenet1k.data ../cfg/resnet50_t.cfg diff --git a/workloads/realworld/uvm_prefetch_async/darknet/scripts/dice_label.sh b/workloads/realworld/uvm_prefetch_async/darknet/scripts/dice_label.sh new file mode 100644 index 0000000000000000000000000000000000000000..f19f8a49481b46d5a04dd18b1b05af8928b21957 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/scripts/dice_label.sh @@ -0,0 +1,20 @@ +mkdir -p images +mkdir -p images/orig +mkdir -p images/train +mkdir -p images/val + +ffmpeg -i Face1.mp4 images/orig/face1_%6d.jpg +ffmpeg -i Face2.mp4 images/orig/face2_%6d.jpg +ffmpeg -i Face3.mp4 images/orig/face3_%6d.jpg +ffmpeg -i Face4.mp4 images/orig/face4_%6d.jpg +ffmpeg -i Face5.mp4 images/orig/face5_%6d.jpg +ffmpeg -i Face6.mp4 images/orig/face6_%6d.jpg + +mogrify -resize 100x100^ -gravity center -crop 100x100+0+0 +repage images/orig/* + +ls images/orig/* | shuf | head -n 1000 | xargs mv -t images/val +mv images/orig/* images/train + +find `pwd`/images/train > dice.train.list -name \*.jpg +find `pwd`/images/val > dice.val.list -name \*.jpg + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/scripts/gen_tactic.sh b/workloads/realworld/uvm_prefetch_async/darknet/scripts/gen_tactic.sh new file mode 100755 index 0000000000000000000000000000000000000000..ffa30d27754dacdd03bd5996d41cbfab14db0f39 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/scripts/gen_tactic.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Usage: +# wget http://pjreddie.com/media/files/peek.weights +# scripts/gen_tactic.sh < data/goal.txt +./darknet rnn generatetactic cfg/gru.cfg peek.weights 2>/dev/null diff --git a/workloads/realworld/uvm_prefetch_async/darknet/scripts/get_coco_dataset.sh b/workloads/realworld/uvm_prefetch_async/darknet/scripts/get_coco_dataset.sh new file mode 100644 index 0000000000000000000000000000000000000000..28463015d1748fd331e071a0a778c6d4500b29ef --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/scripts/get_coco_dataset.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Clone COCO API +git clone https://github.com/pdollar/coco +cd coco + +mkdir images +cd images + +# Download Images +wget -c https://pjreddie.com/media/files/train2014.zip +wget -c https://pjreddie.com/media/files/val2014.zip + +# Unzip +unzip -q train2014.zip +unzip -q val2014.zip + +cd .. + +# Download COCO Metadata +wget -c https://pjreddie.com/media/files/instances_train-val2014.zip +wget -c https://pjreddie.com/media/files/coco/5k.part +wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part +wget -c https://pjreddie.com/media/files/coco/labels.tgz +tar xzf labels.tgz +unzip -q instances_train-val2014.zip + +# Set Up Image Lists +paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt +paste <(awk "{print \"$PWD\"}" trainvalno5k.txt + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/scripts/imagenet_label.sh b/workloads/realworld/uvm_prefetch_async/darknet/scripts/imagenet_label.sh new file mode 100644 index 0000000000000000000000000000000000000000..01e4306ee3cf7322427374f01c766bcdef970922 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/scripts/imagenet_label.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +mkdir -p labelled +wd=`pwd` + +for f in val/*.xml; +do +label=`grep -m1 "" $f | grep -oP '\K[^<]*'` +im=`echo $f | sed 's/val/imgs/; s/xml/JPEG/'` +out=`echo $im | sed 's/JPEG/'${label}'.JPEG/; s/imgs/labelled/'` +ln -s ${wd}/$im ${wd}/$out +done + +find ${wd}/labelled -name \*.JPEG > inet.val.list + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/scripts/voc_label.py b/workloads/realworld/uvm_prefetch_async/darknet/scripts/voc_label.py new file mode 100644 index 0000000000000000000000000000000000000000..679fc366890d9eccf15124f950a274d8ad24fc83 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/scripts/voc_label.py @@ -0,0 +1,59 @@ +import xml.etree.ElementTree as ET +import pickle +import os +from os import listdir, getcwd +from os.path import join + +sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')] + +classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + + +def convert(size, box): + dw = 1./(size[0]) + dh = 1./(size[1]) + x = (box[0] + box[1])/2.0 - 1 + y = (box[2] + box[3])/2.0 - 1 + w = box[1] - box[0] + h = box[3] - box[2] + x = x*dw + w = w*dw + y = y*dh + h = h*dh + return (x,y,w,h) + +def convert_annotation(year, image_id): + in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id)) + out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w') + tree=ET.parse(in_file) + root = tree.getroot() + size = root.find('size') + w = int(size.find('width').text) + h = int(size.find('height').text) + + for obj in root.iter('object'): + difficult = obj.find('difficult').text + cls = obj.find('name').text + if cls not in classes or int(difficult)==1: + continue + cls_id = classes.index(cls) + xmlbox = obj.find('bndbox') + b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) + bb = convert((w,h), b) + out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') + +wd = getcwd() + +for year, image_set in sets: + if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)): + os.makedirs('VOCdevkit/VOC%s/labels/'%(year)) + image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split() + list_file = open('%s_%s.txt'%(year, image_set), 'w') + for image_id in image_ids: + list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id)) + convert_annotation(year, image_id) + list_file.close() + +os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt") +os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt") + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/activation_kernels.cu b/workloads/realworld/uvm_prefetch_async/darknet/src/activation_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..ee4d511541d8a5f5fd6f6ba9b538e26bede124e3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/activation_kernels.cu @@ -0,0 +1,242 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "activations.h" +#include "cuda_dark.h" +} + + +__device__ float lhtan_activate_kernel(float x) +{ + if(x < 0) return .001f*x; + if(x > 1) return .001f*(x-1.f) + 1.f; + return x; +} +__device__ float lhtan_gradient_kernel(float x) +{ + if(x > 0 && x < 1) return 1; + return .001; +} + +__device__ float hardtan_activate_kernel(float x) +{ + if (x < -1) return -1; + if (x > 1) return 1; + return x; +} +__device__ float linear_activate_kernel(float x){return x;} +__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));} +__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;} +__device__ float relu_activate_kernel(float x){return x*(x>0);} +__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);} +__device__ float selu_activate_kernel(float x){return (x >= 0)*1.0507f*x + (x < 0)*1.0507f*1.6732f*(expf(x)-1);} +__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;} +__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;} +__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;} +__device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);} +__device__ float plse_activate_kernel(float x) +{ + if(x < -4) return .01f * (x + 4); + if(x > 4) return .01f * (x - 4) + 1; + return .125f*x + .5f; +} +__device__ float stair_activate_kernel(float x) +{ + int n = floorf(x); + if (n%2 == 0) return floorf(x/2); + else return (x - n) + floorf(x/2); +} + + +__device__ float hardtan_gradient_kernel(float x) +{ + if (x > -1 && x < 1) return 1; + return 0; +} +__device__ float linear_gradient_kernel(float x){return 1;} +__device__ float logistic_gradient_kernel(float x){return (1-x)*x;} +__device__ float loggy_gradient_kernel(float x) +{ + float y = (x+1)/2; + return 2*(1-y)*y; +} +__device__ float relu_gradient_kernel(float x){return (x>0);} +__device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);} +__device__ float selu_gradient_kernel(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);} +__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;} +__device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;} +__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;} +__device__ float tanh_gradient_kernel(float x){return 1-x*x;} +__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;} +__device__ float stair_gradient_kernel(float x) +{ + if (floorf(x) == x) return 0; + return 1; +} + +__device__ float activate_kernel(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_activate_kernel(x); + case LOGISTIC: + return logistic_activate_kernel(x); + case LOGGY: + return loggy_activate_kernel(x); + case RELU: + return relu_activate_kernel(x); + case ELU: + return elu_activate_kernel(x); + case SELU: + return selu_activate_kernel(x); + case RELIE: + return relie_activate_kernel(x); + case RAMP: + return ramp_activate_kernel(x); + case LEAKY: + return leaky_activate_kernel(x); + case TANH: + return tanh_activate_kernel(x); + case PLSE: + return plse_activate_kernel(x); + case STAIR: + return stair_activate_kernel(x); + case HARDTAN: + return hardtan_activate_kernel(x); + case LHTAN: + return lhtan_activate_kernel(x); + } + return 0; +} + +__device__ float gradient_kernel(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_gradient_kernel(x); + case LOGISTIC: + return logistic_gradient_kernel(x); + case LOGGY: + return loggy_gradient_kernel(x); + case RELU: + return relu_gradient_kernel(x); + case ELU: + return elu_gradient_kernel(x); + case SELU: + return selu_gradient_kernel(x); + case RELIE: + return relie_gradient_kernel(x); + case RAMP: + return ramp_gradient_kernel(x); + case LEAKY: + return leaky_gradient_kernel(x); + case TANH: + return tanh_gradient_kernel(x); + case PLSE: + return plse_gradient_kernel(x); + case STAIR: + return stair_gradient_kernel(x); + case HARDTAN: + return hardtan_gradient_kernel(x); + case LHTAN: + return lhtan_gradient_kernel(x); + } + return 0; +} + +__global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int i = id % s; + int b = id / s; + float x1 = x[b*s + i]; + float x2 = x[b*s + s/2 + i]; + if(id < n) { + float de = dy[id]; + dx[b*s + i] = x2*de; + dx[b*s + s/2 + i] = x1*de; + } +} + +extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y) +{ + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(x, n / 2 * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(dx, n / 2 * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(y, n / 2 * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + binary_gradient_array_kernel<<>>(x, dx, n/2, size, a, y); + check_error(cudaPeekAtLastError()); +} +__global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int i = id % s; + int b = id / s; + float x1 = x[b*s + i]; + float x2 = x[b*s + s/2 + i]; + if(id < n) y[id] = x1*x2; +} + +extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y) +{ + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(x, n / 2 * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(y, n / 2 * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + binary_activate_array_kernel<<>>(x, n / 2, size, a, y); + check_error(cudaPeekAtLastError()); +} + +__global__ void activate_array_kernel(float *x, int n, ACTIVATION a) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n) x[i] = activate_kernel(x[i], a); +} + +__global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n) delta[i] *= gradient_kernel(x[i], a); +} + +extern "C" void activate_array_gpu(float *x, int n, ACTIVATION a) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(x, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + activate_array_kernel<<>>(x, n, a); + check_error(cudaPeekAtLastError()); +} + +extern "C" void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(x, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + gradient_array_kernel<<>>(x, n, a, delta); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/activation_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/activation_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..0791772336e4d1b001ed1b76bbbf21ee8d6fa24f --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/activation_layer.c @@ -0,0 +1,63 @@ +#include "activation_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +layer make_activation_layer(int batch, int inputs, ACTIVATION activation) +{ + layer l = {0}; + l.type = ACTIVE; + + l.inputs = inputs; + l.outputs = inputs; + l.batch=batch; + + l.output = calloc(batch*inputs, sizeof(float*)); + l.delta = calloc(batch*inputs, sizeof(float*)); + + l.forward = forward_activation_layer; + l.backward = backward_activation_layer; +#ifdef GPU + l.forward_gpu = forward_activation_layer_gpu; + l.backward_gpu = backward_activation_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); +#endif + l.activation = activation; + fprintf(stderr, "Activation Layer: %d inputs\n", inputs); + return l; +} + +void forward_activation_layer(layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_activation_layer(layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_activation_layer_gpu(layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_activation_layer_gpu(layer l, network net) +{ + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/activation_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/activation_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..42118a84e83f59a8997e354959404d1283a3004c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/activation_layer.h @@ -0,0 +1,19 @@ +#ifndef ACTIVATION_LAYER_H +#define ACTIVATION_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_activation_layer(int batch, int inputs, ACTIVATION activation); + +void forward_activation_layer(layer l, network net); +void backward_activation_layer(layer l, network net); + +#ifdef GPU +void forward_activation_layer_gpu(layer l, network net); +void backward_activation_layer_gpu(layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/activations.c b/workloads/realworld/uvm_prefetch_async/darknet/src/activations.c new file mode 100644 index 0000000000000000000000000000000000000000..da1a17a89b46b6c41fa80b5dd113e1b30c910712 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/activations.c @@ -0,0 +1,150 @@ +#include "activations.h" + +#include +#include +#include +#include + +char *get_activation_string(ACTIVATION a) +{ + switch(a){ + case LOGISTIC: + return "logistic"; + case LOGGY: + return "loggy"; + case RELU: + return "relu"; + case ELU: + return "elu"; + case SELU: + return "selu"; + case RELIE: + return "relie"; + case RAMP: + return "ramp"; + case LINEAR: + return "linear"; + case TANH: + return "tanh"; + case PLSE: + return "plse"; + case LEAKY: + return "leaky"; + case STAIR: + return "stair"; + case HARDTAN: + return "hardtan"; + case LHTAN: + return "lhtan"; + default: + break; + } + return "relu"; +} + +ACTIVATION get_activation(char *s) +{ + if (strcmp(s, "logistic")==0) return LOGISTIC; + if (strcmp(s, "loggy")==0) return LOGGY; + if (strcmp(s, "relu")==0) return RELU; + if (strcmp(s, "elu")==0) return ELU; + if (strcmp(s, "selu")==0) return SELU; + if (strcmp(s, "relie")==0) return RELIE; + if (strcmp(s, "plse")==0) return PLSE; + if (strcmp(s, "hardtan")==0) return HARDTAN; + if (strcmp(s, "lhtan")==0) return LHTAN; + if (strcmp(s, "linear")==0) return LINEAR; + if (strcmp(s, "ramp")==0) return RAMP; + if (strcmp(s, "leaky")==0) return LEAKY; + if (strcmp(s, "tanh")==0) return TANH; + if (strcmp(s, "stair")==0) return STAIR; + fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s); + return RELU; +} + +float activate(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_activate(x); + case LOGISTIC: + return logistic_activate(x); + case LOGGY: + return loggy_activate(x); + case RELU: + return relu_activate(x); + case ELU: + return elu_activate(x); + case SELU: + return selu_activate(x); + case RELIE: + return relie_activate(x); + case RAMP: + return ramp_activate(x); + case LEAKY: + return leaky_activate(x); + case TANH: + return tanh_activate(x); + case PLSE: + return plse_activate(x); + case STAIR: + return stair_activate(x); + case HARDTAN: + return hardtan_activate(x); + case LHTAN: + return lhtan_activate(x); + } + return 0; +} + +void activate_array(float *x, const int n, const ACTIVATION a) +{ + int i; + for(i = 0; i < n; ++i){ + x[i] = activate(x[i], a); + } +} + +float gradient(float x, ACTIVATION a) +{ + switch(a){ + case LINEAR: + return linear_gradient(x); + case LOGISTIC: + return logistic_gradient(x); + case LOGGY: + return loggy_gradient(x); + case RELU: + return relu_gradient(x); + case ELU: + return elu_gradient(x); + case SELU: + return selu_gradient(x); + case RELIE: + return relie_gradient(x); + case RAMP: + return ramp_gradient(x); + case LEAKY: + return leaky_gradient(x); + case TANH: + return tanh_gradient(x); + case PLSE: + return plse_gradient(x); + case STAIR: + return stair_gradient(x); + case HARDTAN: + return hardtan_gradient(x); + case LHTAN: + return lhtan_gradient(x); + } + return 0; +} + +void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta) +{ + int i; + for(i = 0; i < n; ++i){ + delta[i] *= gradient(x[i], a); + } +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/activations.h b/workloads/realworld/uvm_prefetch_async/darknet/src/activations.h new file mode 100644 index 0000000000000000000000000000000000000000..eec28d5b692ede3975e01a4d454ace20e8a9fdd8 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/activations.h @@ -0,0 +1,87 @@ +#ifndef ACTIVATIONS_H +#define ACTIVATIONS_H +#include "darknet.h" +#include "cuda_dark.h" +#include "math.h" + +ACTIVATION get_activation(char *s); + +char *get_activation_string(ACTIVATION a); +float activate(float x, ACTIVATION a); +float gradient(float x, ACTIVATION a); +void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta); +void activate_array(float *x, const int n, const ACTIVATION a); +#ifdef GPU +void activate_array_gpu(float *x, int n, ACTIVATION a); +void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta); +#endif + +static inline float stair_activate(float x) +{ + int n = floor(x); + if (n%2 == 0) return floor(x/2.); + else return (x - n) + floor(x/2.); +} +static inline float hardtan_activate(float x) +{ + if (x < -1) return -1; + if (x > 1) return 1; + return x; +} +static inline float linear_activate(float x){return x;} +static inline float logistic_activate(float x){return 1./(1. + exp(-x));} +static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;} +static inline float relu_activate(float x){return x*(x>0);} +static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} +static inline float selu_activate(float x){return (x >= 0)*1.0507*x + (x < 0)*1.0507*1.6732*(exp(x)-1);} +static inline float relie_activate(float x){return (x>0) ? x : .01*x;} +static inline float ramp_activate(float x){return x*(x>0)+.1*x;} +static inline float leaky_activate(float x){return (x>0) ? x : .1*x;} +static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);} +static inline float plse_activate(float x) +{ + if(x < -4) return .01 * (x + 4); + if(x > 4) return .01 * (x - 4) + 1; + return .125*x + .5; +} + +static inline float lhtan_activate(float x) +{ + if(x < 0) return .001*x; + if(x > 1) return .001*(x-1) + 1; + return x; +} +static inline float lhtan_gradient(float x) +{ + if(x > 0 && x < 1) return 1; + return .001; +} + +static inline float hardtan_gradient(float x) +{ + if (x > -1 && x < 1) return 1; + return 0; +} +static inline float linear_gradient(float x){return 1;} +static inline float logistic_gradient(float x){return (1-x)*x;} +static inline float loggy_gradient(float x) +{ + float y = (x+1.)/2.; + return 2*(1-y)*y; +} +static inline float stair_gradient(float x) +{ + if (floor(x) == x) return 0; + return 1; +} +static inline float relu_gradient(float x){return (x>0);} +static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);} +static inline float selu_gradient(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);} +static inline float relie_gradient(float x){return (x>0) ? 1 : .01;} +static inline float ramp_gradient(float x){return (x>0)+.1;} +static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;} +static inline float tanh_gradient(float x){return 1-x*x;} +static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;} + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/avgpool_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/avgpool_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..7d17fa8f829aba43652117c141fb8b54ef4cf5dc --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/avgpool_layer.c @@ -0,0 +1,71 @@ +#include "avgpool_layer.h" +#include "cuda_dark.h" +#include + +avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) +{ + fprintf(stderr, "avg %4d x%4d x%4d -> %4d\n", w, h, c, c); + avgpool_layer l = {0}; + l.type = AVGPOOL; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.out_w = 1; + l.out_h = 1; + l.out_c = c; + l.outputs = l.out_c; + l.inputs = h*w*c; + int output_size = l.outputs * batch; + l.output = calloc(output_size, sizeof(float)); + l.delta = calloc(output_size, sizeof(float)); + l.forward = forward_avgpool_layer; + l.backward = backward_avgpool_layer; + #ifdef GPU + l.forward_gpu = forward_avgpool_layer_gpu; + l.backward_gpu = backward_avgpool_layer_gpu; + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); + #endif + return l; +} + +void resize_avgpool_layer(avgpool_layer *l, int w, int h) +{ + l->w = w; + l->h = h; + l->inputs = h*w*l->c; +} + +void forward_avgpool_layer(const avgpool_layer l, network net) +{ + int b,i,k; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < l.c; ++k){ + int out_index = k + b*l.c; + l.output[out_index] = 0; + for(i = 0; i < l.h*l.w; ++i){ + int in_index = i + l.h*l.w*(k + b*l.c); + l.output[out_index] += net.input[in_index]; + } + l.output[out_index] /= l.h*l.w; + } + } +} + +void backward_avgpool_layer(const avgpool_layer l, network net) +{ + int b,i,k; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < l.c; ++k){ + int out_index = k + b*l.c; + for(i = 0; i < l.h*l.w; ++i){ + int in_index = i + l.h*l.w*(k + b*l.c); + net.delta[in_index] += l.delta[out_index] / (l.h*l.w); + } + } + } +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/avgpool_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/avgpool_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..576ad1db9e9cb87640b0c3f764e2bbfbaae4b2b3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/avgpool_layer.h @@ -0,0 +1,23 @@ +#ifndef AVGPOOL_LAYER_H +#define AVGPOOL_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +typedef layer avgpool_layer; + +image get_avgpool_image(avgpool_layer l); +avgpool_layer make_avgpool_layer(int batch, int w, int h, int c); +void resize_avgpool_layer(avgpool_layer *l, int w, int h); +void forward_avgpool_layer(const avgpool_layer l, network net); +void backward_avgpool_layer(const avgpool_layer l, network net); + +#ifdef GPU +void forward_avgpool_layer_gpu(avgpool_layer l, network net); +void backward_avgpool_layer_gpu(avgpool_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/avgpool_layer_kernels.cu b/workloads/realworld/uvm_prefetch_async/darknet/src/avgpool_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..bd5afbf394bb702332188fa279af7c2e67583b93 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/avgpool_layer_kernels.cu @@ -0,0 +1,81 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "avgpool_layer.h" +#include "cuda_dark.h" +} + +__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int k = id % c; + id /= c; + int b = id; + + int i; + int out_index = (k + c*b); + output[out_index] = 0; + for(i = 0; i < w*h; ++i){ + int in_index = i + h*w*(k + b*c); + output[out_index] += input[in_index]; + } + output[out_index] /= w*h; +} + +__global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int k = id % c; + id /= c; + int b = id; + + int i; + int out_index = (k + c*b); + for(i = 0; i < w*h; ++i){ + int in_index = i + h*w*(k + b*c); + in_delta[in_index] += out_delta[out_index] / (w*h); + } +} + +extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network net) +{ + size_t n = layer.c*layer.batch; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(net.input_gpu, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(layer.output_gpu, n * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + forward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, net.input_gpu, layer.output_gpu); + check_error(cudaPeekAtLastError()); +} + +extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network net) +{ + size_t n = layer.c*layer.batch; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(net.delta_gpu, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(layer.delta_gpu, n * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + backward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, net.delta_gpu, layer.delta_gpu); + check_error(cudaPeekAtLastError()); +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/batchnorm_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/batchnorm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..ebff387cc4b0365173fb6727efd80ebc80bfbd41 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/batchnorm_layer.c @@ -0,0 +1,279 @@ +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "blas.h" +#include + +layer make_batchnorm_layer(int batch, int w, int h, int c) +{ + fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c); + layer l = {0}; + l.type = BATCHNORM; + l.batch = batch; + l.h = l.out_h = h; + l.w = l.out_w = w; + l.c = l.out_c = c; + l.output = calloc(h * w * c * batch, sizeof(float)); + l.delta = calloc(h * w * c * batch, sizeof(float)); + l.inputs = w*h*c; + l.outputs = l.inputs; + + l.scales = calloc(c, sizeof(float)); + l.scale_updates = calloc(c, sizeof(float)); + l.biases = calloc(c, sizeof(float)); + l.bias_updates = calloc(c, sizeof(float)); + int i; + for(i = 0; i < c; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(c, sizeof(float)); + l.variance = calloc(c, sizeof(float)); + + l.rolling_mean = calloc(c, sizeof(float)); + l.rolling_variance = calloc(c, sizeof(float)); + + l.forward = forward_batchnorm_layer; + l.backward = backward_batchnorm_layer; +#ifdef GPU + l.forward_gpu = forward_batchnorm_layer_gpu; + l.backward_gpu = backward_batchnorm_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, h * w * c * batch); + l.delta_gpu = cuda_make_array(l.delta, h * w * c * batch); + + l.biases_gpu = cuda_make_array(l.biases, c); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, c); + + l.scales_gpu = cuda_make_array(l.scales, c); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, c); + + l.mean_gpu = cuda_make_array(l.mean, c); + l.variance_gpu = cuda_make_array(l.variance, c); + + l.rolling_mean_gpu = cuda_make_array(l.mean, c); + l.rolling_variance_gpu = cuda_make_array(l.variance, c); + + l.mean_delta_gpu = cuda_make_array(l.mean, c); + l.variance_delta_gpu = cuda_make_array(l.variance, c); + + l.x_gpu = cuda_make_array(l.output, l.batch*l.outputs); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*l.outputs); + #ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); + + #endif +#endif + return l; +} + +void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + int i,b,f; + for(f = 0; f < n; ++f){ + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int index = i + size*(f + n*b); + sum += delta[index] * x_norm[index]; + } + } + scale_updates[f] += sum; + } +} + +void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + + int i,j,k; + for(i = 0; i < filters; ++i){ + mean_delta[i] = 0; + for (j = 0; j < batch; ++j) { + for (k = 0; k < spatial; ++k) { + int index = j*filters*spatial + i*spatial + k; + mean_delta[i] += delta[index]; + } + } + mean_delta[i] *= (-1./sqrt(variance[i] + .00001f)); + } +} +void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + + int i,j,k; + for(i = 0; i < filters; ++i){ + variance_delta[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance_delta[i] += delta[index]*(x[index] - mean[i]); + } + } + variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float)(-3./2.)); + } +} +void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + int f, j, k; + for(j = 0; j < batch; ++j){ + for(f = 0; f < filters; ++f){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + f*spatial + k; + delta[index] = delta[index] * 1./(sqrt(variance[f] + .00001f)) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); + } + } + } +} + +void resize_batchnorm_layer(layer *layer, int w, int h) +{ + fprintf(stderr, "Not implemented\n"); +} + +void forward_batchnorm_layer(layer l, network net) +{ + if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1); + if(net.train){ + mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean); + variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance); + + scal_cpu(l.out_c, .99, l.rolling_mean, 1); + axpy_cpu(l.out_c, .01, l.mean, 1, l.rolling_mean, 1); + scal_cpu(l.out_c, .99, l.rolling_variance, 1); + axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1); + + normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w); + copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1); + } else { + normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w); + } + scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w); + add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h*l.out_w); +} + +void backward_batchnorm_layer(layer l, network net) +{ + if(!net.train){ + l.mean = l.rolling_mean; + l.variance = l.rolling_variance; + } + backward_bias(l.bias_updates, l.delta, l.batch, l.out_c, l.out_w*l.out_h); + backward_scale_cpu(l.x_norm, l.delta, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates); + + scale_bias(l.delta, l.scales, l.batch, l.out_c, l.out_h*l.out_w); + + mean_delta_cpu(l.delta, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta); + variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta); + normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.out_c, l.out_w*l.out_h, l.delta); + if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_batchnorm_layer(layer l) +{ + cuda_pull_array(l.scales_gpu, l.scales, l.c); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.c); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.c); +} +void push_batchnorm_layer(layer l) +{ + cuda_push_array(l.scales_gpu, l.scales, l.c); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.c); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.c); +} + +void forward_batchnorm_layer_gpu(layer l, network net) +{ + if(l.type == BATCHNORM) copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); + if (net.train) { +#ifdef CUDNN + float one = 1; + float zero = 0; + cudnnBatchNormalizationForwardTraining(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + l.dstTensorDesc, + l.x_gpu, + l.dstTensorDesc, + l.output_gpu, + l.normTensorDesc, + l.scales_gpu, + l.biases_gpu, + .01, + l.rolling_mean_gpu, + l.rolling_variance_gpu, + .00001, + l.mean_gpu, + l.variance_gpu); +#else + fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu); + fast_variance_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.variance_gpu); + + scal_gpu(l.out_c, .99, l.rolling_mean_gpu, 1); + axpy_gpu(l.out_c, .01, l.mean_gpu, 1, l.rolling_mean_gpu, 1); + scal_gpu(l.out_c, .99, l.rolling_variance_gpu, 1); + axpy_gpu(l.out_c, .01, l.variance_gpu, 1, l.rolling_variance_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); + normalize_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_norm_gpu, 1); + + scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); +#endif + } else { + normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); + scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); + } + +} + +void backward_batchnorm_layer_gpu(layer l, network net) +{ + if(!net.train){ + l.mean_gpu = l.rolling_mean_gpu; + l.variance_gpu = l.rolling_variance_gpu; + } +#ifdef CUDNN + float one = 1; + float zero = 0; + cudnnBatchNormalizationBackward(cudnn_handle(), + CUDNN_BATCHNORM_SPATIAL, + &one, + &zero, + &one, + &one, + l.dstTensorDesc, + l.x_gpu, + l.dstTensorDesc, + l.delta_gpu, + l.dstTensorDesc, + l.x_norm_gpu, + l.normTensorDesc, + l.scales_gpu, + l.scale_updates_gpu, + l.bias_updates_gpu, + .00001, + l.mean_gpu, + l.variance_gpu); + copy_gpu(l.outputs*l.batch, l.x_norm_gpu, 1, l.delta_gpu, 1); +#else + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h); + backward_scale_gpu(l.x_norm_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates_gpu); + + scale_bias_gpu(l.delta_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); + + fast_mean_delta_gpu(l.delta_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta_gpu); + fast_variance_delta_gpu(l.x_gpu, l.delta_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta_gpu); + normalize_delta_gpu(l.x_gpu, l.mean_gpu, l.variance_gpu, l.mean_delta_gpu, l.variance_delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu); +#endif + if(l.type == BATCHNORM) copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/batchnorm_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/batchnorm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..25a18a3c8f2569bab135b088501248159e1cae11 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/batchnorm_layer.h @@ -0,0 +1,19 @@ +#ifndef BATCHNORM_LAYER_H +#define BATCHNORM_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +layer make_batchnorm_layer(int batch, int w, int h, int c); +void forward_batchnorm_layer(layer l, network net); +void backward_batchnorm_layer(layer l, network net); + +#ifdef GPU +void forward_batchnorm_layer_gpu(layer l, network net); +void backward_batchnorm_layer_gpu(layer l, network net); +void pull_batchnorm_layer(layer l); +void push_batchnorm_layer(layer l); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/blas.c b/workloads/realworld/uvm_prefetch_async/darknet/src/blas.c new file mode 100644 index 0000000000000000000000000000000000000000..9e1604449ba9aeb9decdc7f0395a38bd3b478671 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/blas.c @@ -0,0 +1,351 @@ +#include "blas.h" + +#include +#include +#include +#include +#include +#include +void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int b,i,j,k; + int out_c = c/(stride*stride); + + for(b = 0; b < batch; ++b){ + for(k = 0; k < c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int in_index = i + w*(j + h*(k + c*b)); + int c2 = k % out_c; + int offset = k / out_c; + int w2 = i*stride + offset % stride; + int h2 = j*stride + offset / stride; + int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); + if(forward) out[out_index] = x[in_index]; + else out[in_index] = x[out_index]; + } + } + } + } +} + +void flatten(float *x, int size, int layers, int batch, int forward) +{ + float *swap = calloc(size*layers*batch, sizeof(float)); + int i,c,b; + for(b = 0; b < batch; ++b){ + for(c = 0; c < layers; ++c){ + for(i = 0; i < size; ++i){ + int i1 = b*layers*size + c*size + i; + int i2 = b*layers*size + i*layers + c; + if (forward) swap[i2] = x[i1]; + else swap[i1] = x[i2]; + } + } + } + memcpy(x, swap, size*layers*batch*sizeof(float)); + free(swap); +} + +void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c) +{ + int i; + for(i = 0; i < n; ++i){ + c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); + } +} + +void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc) +{ + int i; + for(i = 0; i < n; ++i){ + if(da) da[i] += dc[i] * s[i]; + if(db) db[i] += dc[i] * (1-s[i]); + ds[i] += dc[i] * (a[i] - b[i]); + } +} + +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int stride = w1/w2; + int sample = w2/w1; + assert(stride == h1/h2); + assert(sample == h2/h1); + if(stride < 1) stride = 1; + if(sample < 1) sample = 1; + int minw = (w1 < w2) ? w1 : w2; + int minh = (h1 < h2) ? h1 : h2; + int minc = (c1 < c2) ? c1 : c2; + + int i,j,k,b; + for(b = 0; b < batch; ++b){ + for(k = 0; k < minc; ++k){ + for(j = 0; j < minh; ++j){ + for(i = 0; i < minw; ++i){ + int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); + int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); + out[out_index] = s1*out[out_index] + s2*add[add_index]; + } + } + } + } +} + +void mean_cpu(float *x, int batch, int filters, int spatial, float *mean) +{ + float scale = 1./(batch * spatial); + int i,j,k; + for(i = 0; i < filters; ++i){ + mean[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + mean[i] += x[index]; + } + } + mean[i] *= scale; + } +} + +void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + float scale = 1./(batch * spatial - 1); + int i,j,k; + for(i = 0; i < filters; ++i){ + variance[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance[i] += pow((x[index] - mean[i]), 2); + } + } + variance[i] *= scale; + } +} + +void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial) +{ + int b,f,i; + for(b = 0; b < batch; ++b){ + for(i = 0; i < spatial; ++i){ + float sum = 0; + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + sum += powf(x[index], 2); + } + sum = sqrtf(sum); + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + x[index] /= sum; + dx[index] = (1 - x[index]) / sum; + } + } + } +} + + +void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + int b, f, i; + for(b = 0; b < batch; ++b){ + for(f = 0; f < filters; ++f){ + for(i = 0; i < spatial; ++i){ + int index = b*filters*spatial + f*spatial + i; + x[index] = (x[index] - mean[f])/(sqrt(variance[f]) + .000001f); + } + } + } +} + +void const_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; +} + +void mul_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] *= X[i*INCX]; +} + +void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] = pow(X[i*INCX], ALPHA); +} + +void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX]; +} + +void scal_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA; +} + +void fill_cpu(int N, float ALPHA, float *X, int INCX) +{ + int i; + for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; +} + +void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i, j; + int index = 0; + for(j = 0; j < B; ++j) { + for(i = 0; i < NX; ++i){ + if(X) X[j*NX + i] += OUT[index]; + ++index; + } + for(i = 0; i < NY; ++i){ + if(Y) Y[j*NY + i] += OUT[index]; + ++index; + } + } +} + +void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i, j; + int index = 0; + for(j = 0; j < B; ++j) { + for(i = 0; i < NX; ++i){ + OUT[index++] = X[j*NX + i]; + } + for(i = 0; i < NY; ++i){ + OUT[index++] = Y[j*NY + i]; + } + } +} + +void copy_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX]; +} + +void mult_add_into_cpu(int N, float *X, float *Y, float *Z) +{ + int i; + for(i = 0; i < N; ++i) Z[i] += X[i]*Y[i]; +} + +void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + float abs_val = fabs(diff); + if(abs_val < 1) { + error[i] = diff * diff; + delta[i] = diff; + } + else { + error[i] = 2*abs_val - 1; + delta[i] = (diff < 0) ? 1 : -1; + } + } +} + +void l1_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + error[i] = fabs(diff); + delta[i] = diff > 0 ? 1 : -1; + } +} + +void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float t = truth[i]; + float p = pred[i]; + error[i] = (t) ? -log(p) : 0; + delta[i] = t-p; + } +} + +void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float t = truth[i]; + float p = pred[i]; + error[i] = -t*log(p) - (1-t)*log(1-p); + delta[i] = t-p; + } +} + +void l2_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + error[i] = diff * diff; + delta[i] = diff; + } +} + +float dot_cpu(int N, float *X, int INCX, float *Y, int INCY) +{ + int i; + float dot = 0; + for(i = 0; i < N; ++i) dot += X[i*INCX] * Y[i*INCY]; + return dot; +} + +void softmax(float *input, int n, float temp, int stride, float *output) +{ + int i; + float sum = 0; + float largest = -FLT_MAX; + for(i = 0; i < n; ++i){ + if(input[i*stride] > largest) largest = input[i*stride]; + } + for(i = 0; i < n; ++i){ + float e = exp(input[i*stride]/temp - largest/temp); + sum += e; + output[i*stride] = e; + } + for(i = 0; i < n; ++i){ + output[i*stride] /= sum; + } +} + + +void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + int g, b; + for(b = 0; b < batch; ++b){ + for(g = 0; g < groups; ++g){ + softmax(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); + } + } +} + +void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + int i, j, k, b; + for(b = 0; b < batch; ++b){ + for(k = 0; k < c; ++k){ + for(j = 0; j < h*stride; ++j){ + for(i = 0; i < w*stride; ++i){ + int in_index = b*w*h*c + k*w*h + (j/stride)*w + i/stride; + int out_index = b*w*h*c*stride*stride + k*w*h*stride*stride + j*w*stride + i; + if(forward) out[out_index] = scale*in[in_index]; + else in[in_index] += scale*out[out_index]; + } + } + } + } +} + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/blas.h b/workloads/realworld/uvm_prefetch_async/darknet/src/blas.h new file mode 100644 index 0000000000000000000000000000000000000000..5d24a9aea70d8050b05098aa7a5634576444a32c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/blas.h @@ -0,0 +1,105 @@ +#ifndef BLAS_H +#define BLAS_H +#include "darknet.h" + +void flatten(float *x, int size, int layers, int batch, int forward); +void pm(int M, int N, float *A); +float *random_matrix(int rows, int cols); +void time_random_matrix(int TA, int TB, int m, int k, int n); +void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); + +void test_blas(); + +void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void mult_add_into_cpu(int N, float *X, float *Y, float *Z); + +void const_cpu(int N, float ALPHA, float *X, int INCX); +void constrain_gpu(int N, float ALPHA, float * X, int INCX); +void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void mul_cpu(int N, float *X, int INCX, float *Y, int INCY); + +int test_gpu_blas(); +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out); + +void mean_cpu(float *x, int batch, int filters, int spatial, float *mean); +void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); + +void scale_bias(float *output, float *scales, int batch, int n, int size); +void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); +void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); +void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); +void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); +void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial); + +void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error); +void l2_cpu(int n, float *pred, float *truth, float *delta, float *error); +void l1_cpu(int n, float *pred, float *truth, float *delta, float *error); +void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); +void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); +void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c); +void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc); + +void softmax(float *input, int n, float temp, int stride, float *output); +void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); +void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); + +#ifdef GPU +#include "cuda_dark.h" +#include "tree.h" + +void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); +void axpy_gpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); +void copy_gpu(int N, float * X, int INCX, float * Y, int INCY); +void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); +void add_gpu(int N, float ALPHA, float * X, int INCX); +void supp_gpu(int N, float ALPHA, float * X, int INCX); +void mask_gpu(int N, float * X, float mask_num, float * mask, float val); +void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale); +void const_gpu(int N, float ALPHA, float *X, int INCX); +void pow_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void mul_gpu(int N, float *X, int INCX, float *Y, int INCY); + +void mean_gpu(float *x, int batch, int filters, int spatial, float *mean); +void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); +void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); +void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial); + +void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); + +void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); +void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); + +void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); +void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean); +void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out); +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); +void add_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); + +void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error); +void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error); +void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error); +void l2_gpu(int n, float *pred, float *truth, float *delta, float *error); +void l1_gpu(int n, float *pred, float *truth, float *delta, float *error); +void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error); +void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc); +void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c); +void mult_add_into_gpu(int num, float *a, float *b, float *c); +void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT); +void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT); + +void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); + +void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); +void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t); +void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t); + +void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out); +void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier); +void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); + +#endif +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/blas_kernels.cu b/workloads/realworld/uvm_prefetch_async/darknet/src/blas_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..c8175cc74a61d5f21f1bcbba355507285322eb4a --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/blas_kernels.cu @@ -0,0 +1,1077 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" +#include + +extern "C" { +#include "blas.h" +#include "cuda_dark.h" +#include "utils.h" +} + +__global__ void scale_bias_kernel(float *output, float *biases, int n, int size) +{ + int offset = blockIdx.x * blockDim.x + threadIdx.x; + int filter = blockIdx.y; + int batch = blockIdx.z; + + if(offset < size) output[(batch*n+filter)*size + offset] *= biases[filter]; +} + +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size) +{ + dim3 dimGrid((size-1)/BLOCK + 1, n, batch); + dim3 dimBlock(BLOCK, 1, 1); + + scale_bias_kernel<<>>(output, biases, n, size); + check_error(cudaPeekAtLastError()); +} + +__global__ void backward_scale_kernel(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + __shared__ float part[BLOCK]; + int i,b; + int filter = blockIdx.x; + int p = threadIdx.x; + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; i += BLOCK){ + int index = p + i + size*(filter + n*b); + sum += (p+i < size) ? delta[index]*x_norm[index] : 0; + } + } + part[p] = sum; + __syncthreads(); + if (p == 0) { + for(i = 0; i < BLOCK; ++i) scale_updates[filter] += part[i]; + } +} + +void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) +{ + backward_scale_kernel<<>>(x_norm, delta, batch, n, size, scale_updates); + check_error(cudaPeekAtLastError()); +} + +__global__ void add_bias_kernel(float *output, float *biases, int batch, int n, int size) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= n*size*batch) return; + int i = index % size; + index /= size; + int j = index % n; + index /= n; + int k = index; + + output[(k*n+j)*size + i] += biases[j]; +} + +void add_bias_gpu(float *output, float *biases, int batch, int n, int size) +{ + int num = n*size*batch; + + add_bias_kernel<<>>(output, biases, batch, n, size); + check_error(cudaPeekAtLastError()); +} + +__global__ void backward_bias_conn_kernel(float *bias_updates, float *delta, int batch, int n) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= n) return; + int b; + float sum = 0; + for(b = 0; b < batch; ++b){ + int i = b*n + index; + sum += delta[i]; + } + bias_updates[index] += sum; +} + +__global__ void backward_bias_kernel(float *bias_updates, float *delta, int batch, int n, int size) +{ + __shared__ float part[BLOCK]; + int i,b; + int filter = blockIdx.x; + int p = threadIdx.x; + float sum = 0; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; i += BLOCK){ + int index = p + i + size*(filter + n*b); + sum += (p+i < size) ? delta[index] : 0; + } + } + part[p] = sum; + __syncthreads(); + if (p == 0) { + for(i = 0; i < BLOCK; ++i) bias_updates[filter] += part[i]; + } +} + +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size) +{ + if(size == 1){ + backward_bias_conn_kernel<<>>(bias_updates, delta, batch, n); + }else{ + backward_bias_kernel<<>>(bias_updates, delta, batch, n, size); + } + check_error(cudaPeekAtLastError()); +} + +/* +__global__ void dot_kernel(float *output, float scale, int batch, int n, int size, float *delta) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int f1 = index / n; + int f2 = index % n; + if (f2 <= f1) return; + + float sum = 0; + float norm1 = 0; + float norm2 = 0; + int b, i; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int i1 = b * size * n + f1 * size + i; + int i2 = b * size * n + f2 * size + i; + sum += output[i1] * output[i2]; + norm1 += output[i1] * output[i1]; + norm2 += output[i2] * output[i2]; + } + } + norm1 = sqrt(norm1); + norm2 = sqrt(norm2); + float norm = norm1 * norm2; + sum = sum / norm; + for(b = 0; b < batch; ++b){ + for(i = 0; i < size; ++i){ + int i1 = b * size * n + f1 * size + i; + int i2 = b * size * n + f2 * size + i; + delta[i1] += - scale * sum * output[i2] / norm; + delta[i2] += - scale * sum * output[i1] / norm; + } + } +} + +void dot_error_gpu(layer l) +{ + dot_kernel<<>>(l.output_gpu, l.dot, l.batch, l.n, l.out_w * l.out_h, l.delta_gpu); + check_error(cudaPeekAtLastError()); +} +*/ + + +__global__ void adam_kernel(int N, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + + float mhat = m[index] / (1.f - powf(B1, t)); + float vhat = v[index] / (1.f - powf(B2, t)); + + x[index] = x[index] + rate * mhat / (sqrtf(vhat) + eps); +} + +extern "C" void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) +{ + adam_kernel<<>>(n, x, m, v, B1, B2, rate, eps, t); + check_error(cudaPeekAtLastError()); +} + +extern "C" void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t) +{ + scal_gpu(n, B1, m, 1); + scal_gpu(n, B2, v, 1); + axpy_gpu(n, -decay*batch, w, 1, d, 1); + + axpy_gpu(n, (1-B1), d, 1, m, 1); + mul_gpu(n, d, 1, d, 1); + axpy_gpu(n, (1-B2), d, 1, v, 1); + + adam_gpu(n, w, m, v, B1, B2, rate, eps, t); + fill_gpu(n, 0, d, 1); +} + +__global__ void normalize_kernel(int N, float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int f = (index/spatial)%filters; + + x[index] = (x[index] - mean[f])/(sqrtf(variance[f] + .00001f)); +} + +__global__ void normalize_delta_kernel(int N, float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int f = (index/spatial)%filters; + + delta[index] = delta[index] * 1.f/(sqrtf(variance[f] + .00001f)) + variance_delta[f] * 2.f * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); +} + +extern "C" void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) +{ + size_t N = batch*filters*spatial; + normalize_delta_kernel<<>>(N, x, mean, variance, mean_delta, variance_delta, batch, filters, spatial, delta); + check_error(cudaPeekAtLastError()); +} + +__global__ void variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + variance_delta[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance_delta[i] += delta[index]*(x[index] - mean[i]); + } + } + variance_delta[i] *= -.5f * powf(variance[i] + .00001f, (float)(-3.f/2.f)); +} + +__global__ void accumulate_kernel(float *x, int n, int groups, float *sum) +{ + int k; + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= groups) return; + sum[i] = 0; + for(k = 0; k < n; ++k){ + sum[i] += x[k*groups + i]; + } +} + +__global__ void fast_mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + local[id] += (i+id < spatial) ? delta[index] : 0; + } + } + + __syncthreads(); + + if(id == 0){ + mean_delta[filter] = 0; + for(i = 0; i < threads; ++i){ + mean_delta[filter] += local[i]; + } + mean_delta[filter] *= (-1.f/sqrtf(variance[filter] + .00001f)); + } +} + +__global__ void fast_variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + + local[id] += (i+id < spatial) ? delta[index]*(x[index] - mean[filter]) : 0; + } + } + + __syncthreads(); + + if(id == 0){ + variance_delta[filter] = 0; + for(i = 0; i < threads; ++i){ + variance_delta[filter] += local[i]; + } + variance_delta[filter] *= -.5f * powf(variance[filter] + .00001f, (float)(-3.f/2.f)); + } +} + + +__global__ void mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + mean_delta[i] = 0; + for (j = 0; j < batch; ++j) { + for (k = 0; k < spatial; ++k) { + int index = j*filters*spatial + i*spatial + k; + mean_delta[i] += delta[index]; + } + } + mean_delta[i] *= (-1.f/sqrtf(variance[i] + .00001f)); +} + +extern "C" void mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + mean_delta_kernel<<>>(delta, variance, batch, filters, spatial, mean_delta); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) +{ + fast_mean_delta_kernel<<>>(delta, variance, batch, filters, spatial, mean_delta); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) +{ + fast_variance_delta_kernel<<>>(x, delta, mean, variance, batch, filters, spatial, variance_delta); + check_error(cudaPeekAtLastError()); +} + +__global__ void mean_kernel(float *x, int batch, int filters, int spatial, float *mean) +{ + float scale = 1.f/(batch * spatial); + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + int j,k; + mean[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + mean[i] += x[index]; + } + } + mean[i] *= scale; +} + +__global__ void variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + float scale = 1.f/(batch * spatial - 1); + int j,k; + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= filters) return; + variance[i] = 0; + for(j = 0; j < batch; ++j){ + for(k = 0; k < spatial; ++k){ + int index = j*filters*spatial + i*spatial + k; + variance[i] += powf((x[index] - mean[i]), 2); + } + } + variance[i] *= scale; +} + +__global__ void reorg_kernel(int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int in_index = i; + int in_w = i%w; + i = i/w; + int in_h = i%h; + i = i/h; + int in_c = i%c; + i = i/c; + int b = i%batch; + + int out_c = c/(stride*stride); + + int c2 = in_c % out_c; + int offset = in_c / out_c; + int w2 = in_w*stride + offset % stride; + int h2 = in_h*stride + offset / stride; + //printf("%d\n", offset); + int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); + + // printf("%d %d %d\n", w2, h2, c2); + //printf("%d %d\n", in_index, out_index); + //if(out_index >= N || out_index < 0) printf("bad bad bad \n"); + + if(forward) out[out_index] = x[in_index]; + else out[in_index] = x[out_index]; + //if(forward) out[1] = x[1]; + //else out[0] = x[0]; +} + +__global__ void axpy_kernel(int N, float ALPHA, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[OFFY+i*INCY] += ALPHA*X[OFFX+i*INCX]; +} + +__global__ void pow_kernel(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY] = pow(X[i*INCX], ALPHA); +} + +__global__ void const_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = ALPHA; +} + +__global__ void constrain_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = fminf(ALPHA, fmaxf(-ALPHA, X[i*INCX])); +} + +__global__ void supp_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) { + if((X[i*INCX] * X[i*INCX]) < (ALPHA * ALPHA)) X[i*INCX] = 0; + } +} + +__global__ void add_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] += ALPHA; +} + +__global__ void scal_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] *= ALPHA; +} + +__global__ void fill_kernel(int N, float ALPHA, float *X, int INCX) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) X[i*INCX] = ALPHA; +} + +__global__ void copy_kernel(int N, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY + OFFY] = X[i*INCX + OFFX]; +} + +__global__ void mul_kernel(int N, float *X, int INCX, float *Y, int INCY) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < N) Y[i*INCY] *= X[i*INCX]; +} + + +extern "C" void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) +{ + size_t N = batch*filters*spatial; + normalize_kernel<<>>(N, x, mean, variance, batch, filters, spatial); + check_error(cudaPeekAtLastError()); +} + +__global__ void l2norm_kernel(int N, float *x, float *dx, int batch, int filters, int spatial) +{ + int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (index >= N) return; + int b = index / spatial; + int i = index % spatial; + int f; + float sum = 0; + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + sum += powf(x[index], 2); + } + sum = sqrtf(sum); + if(sum == 0) sum = 1; + //printf("%f\n", sum); + for(f = 0; f < filters; ++f){ + int index = b*filters*spatial + f*spatial + i; + x[index] /= sum; + dx[index] = (1 - x[index]) / sum; + } +} + +extern "C" void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial) +{ + size_t N = batch*spatial; + l2norm_kernel<<>>(N, x, dx, batch, filters, spatial); + check_error(cudaPeekAtLastError()); +} + +__global__ void fast_mean_kernel(float *x, int batch, int filters, int spatial, float *mean) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + local[id] += (i+id < spatial) ? x[index] : 0; + } + } + + __syncthreads(); + + if(id == 0){ + mean[filter] = 0; + for(i = 0; i < threads; ++i){ + mean[filter] += local[i]; + } + mean[filter] /= spatial * batch; + } +} + +__global__ void fast_variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + const int threads = BLOCK; + __shared__ float local[threads]; + + int id = threadIdx.x; + local[id] = 0; + + int filter = blockIdx.x; + + int i, j; + for(j = 0; j < batch; ++j){ + for(i = 0; i < spatial; i += threads){ + int index = j*spatial*filters + filter*spatial + i + id; + + local[id] += (i+id < spatial) ? powf((x[index] - mean[filter]), 2) : 0; + } + } + + __syncthreads(); + + if(id == 0){ + variance[filter] = 0; + for(i = 0; i < threads; ++i){ + variance[filter] += local[i]; + } + variance[filter] /= (spatial * batch - 1); + } +} + +extern "C" void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean) +{ + fast_mean_kernel<<>>(x, batch, filters, spatial, mean); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + fast_variance_kernel<<>>(x, mean, batch, filters, spatial, variance); + check_error(cudaPeekAtLastError()); +} + + +extern "C" void mean_gpu(float *x, int batch, int filters, int spatial, float *mean) +{ + mean_kernel<<>>(x, batch, filters, spatial, mean); + check_error(cudaPeekAtLastError()); +} + +extern "C" void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) +{ + variance_kernel<<>>(x, mean, batch, filters, spatial, variance); + check_error(cudaPeekAtLastError()); +} + +extern "C" void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY) +{ + axpy_gpu_offset(N, ALPHA, X, 0, INCX, Y, 0, INCY); +} + +extern "C" void pow_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY) +{ + pow_kernel<<>>(N, ALPHA, X, INCX, Y, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void axpy_gpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY) +{ + axpy_kernel<<>>(N, ALPHA, X, OFFX, INCX, Y, OFFY, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void copy_gpu(int N, float * X, int INCX, float * Y, int INCY) +{ + copy_gpu_offset(N, X, 0, INCX, Y, 0, INCY); +} + +extern "C" void mul_gpu(int N, float * X, int INCX, float * Y, int INCY) +{ + mul_kernel<<>>(N, X, INCX, Y, INCY); + check_error(cudaPeekAtLastError()); +} + +extern "C" void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY) +{ + copy_kernel<<>>(N, X, OFFX, INCX, Y, OFFY, INCY); + check_error(cudaPeekAtLastError()); +} + +__global__ void flatten_kernel(int N, float *x, int spatial, int layers, int batch, int forward, float *out) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int in_s = i%spatial; + i = i/spatial; + int in_c = i%layers; + i = i/layers; + int b = i; + + int i1 = b*layers*spatial + in_c*spatial + in_s; + int i2 = b*layers*spatial + in_s*layers + in_c; + + if (forward) out[i2] = x[i1]; + else out[i1] = x[i2]; +} + +extern "C" void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out) +{ + int size = spatial*batch*layers; + flatten_kernel<<>>(size, x, spatial, layers, batch, forward, out); + check_error(cudaPeekAtLastError()); +} + +extern "C" void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) +{ + int size = w*h*c*batch; + reorg_kernel<<>>(size, x, w, h, c, batch, stride, forward, out); + check_error(cudaPeekAtLastError()); +} + +__global__ void mask_kernel(int n, float *x, float mask_num, float *mask, float val) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n && mask[i] == mask_num) x[i] = val; +} + +extern "C" void mask_gpu(int N, float * X, float mask_num, float * mask, float val) +{ + mask_kernel<<>>(N, X, mask_num, mask, val); + check_error(cudaPeekAtLastError()); +} + +__global__ void scale_mask_kernel(int n, float *x, float mask_num, float *mask, float scale) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n && mask[i] == mask_num) x[i] *= scale; +} + +extern "C" void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(X, N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + scale_mask_kernel<<>>(N, X, mask_num, mask, scale); + check_error(cudaPeekAtLastError()); +} + +extern "C" void const_gpu(int N, float ALPHA, float * X, int INCX) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(X, N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + const_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void constrain_gpu(int N, float ALPHA, float * X, int INCX) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(X, N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + constrain_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + + +extern "C" void add_gpu(int N, float ALPHA, float * X, int INCX) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(X, N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + add_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void scal_gpu(int N, float ALPHA, float * X, int INCX) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(X, N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + scal_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void supp_gpu(int N, float ALPHA, float * X, int INCX) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(X, N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + supp_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +extern "C" void fill_gpu(int N, float ALPHA, float * X, int INCX) +{ + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(X, N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + fill_kernel<<>>(N, ALPHA, X, INCX); + check_error(cudaPeekAtLastError()); +} + +__global__ void shortcut_kernel(int size, int minw, int minh, int minc, int stride, int sample, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= size) return; + int i = id % minw; + id /= minw; + int j = id % minh; + id /= minh; + int k = id % minc; + id /= minc; + int b = id % batch; + + int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); + int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); + out[out_index] = s1*out[out_index] + s2*add[add_index]; + //out[out_index] += add[add_index]; +} + +extern "C" void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) +{ + int minw = (w1 < w2) ? w1 : w2; + int minh = (h1 < h2) ? h1 : h2; + int minc = (c1 < c2) ? c1 : c2; + + int stride = w1/w2; + int sample = w2/w1; + assert(stride == h1/h2); + assert(sample == h2/h1); + if(stride < 1) stride = 1; + if(sample < 1) sample = 1; + + int size = batch * minw * minh * minc; + shortcut_kernel<<>>(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, s1, s2, out); + check_error(cudaPeekAtLastError()); +} + +__global__ void smooth_l1_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + float abs_val = fabsf(diff); + if(abs_val < 1) { + error[i] = diff * diff; + delta[i] = diff; + } + else { + error[i] = 2*abs_val - 1; + delta[i] = (diff > 0) ? 1 : -1; + } + } +} + +extern "C" void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + smooth_l1_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void softmax_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float t = truth[i]; + float p = pred[i]; + error[i] = (t) ? -log(p) : 0; + delta[i] = t-p; + } +} + +extern "C" void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + softmax_x_ent_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void logistic_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float t = truth[i]; + float p = pred[i]; + error[i] = -t*log(p+.0000001) - (1-t)*log(1-p+.0000001); + delta[i] = t-p; + } +} + +extern "C" void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + logistic_x_ent_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void l2_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + error[i] = diff * diff; //I know this is technically wrong, deal with it. + delta[i] = diff; + } +} + +extern "C" void l2_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + l2_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void l1_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + float diff = truth[i] - pred[i]; + error[i] = abs(diff); + delta[i] = (diff > 0) ? 1 : -1; + } +} + +extern "C" void l1_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + l1_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + +__global__ void wgan_kernel(int n, float *pred, float *truth, float *delta, float *error) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + error[i] = truth[i] ? -pred[i] : pred[i]; + delta[i] = (truth[i] > 0) ? 1 : -1; + } +} + +extern "C" void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error) +{ + wgan_kernel<<>>(n, pred, truth, delta, error); + check_error(cudaPeekAtLastError()); +} + + + + +__global__ void weighted_sum_kernel(int n, float *a, float *b, float *s, float *c) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); + } +} + +__global__ void deinter_kernel(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < (NX+NY)*B){ + int b = i / (NX+NY); + int j = i % (NX+NY); + if (j < NX){ + if(X) X[b*NX + j] += OUT[i]; + } else { + if(Y) Y[b*NY + j - NX] += OUT[i]; + } + } +} + +extern "C" void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + deinter_kernel<<>>(NX, X, NY, Y, B, OUT); + check_error(cudaPeekAtLastError()); +} + +__global__ void inter_kernel(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < (NX+NY)*B){ + int b = i / (NX+NY); + int j = i % (NX+NY); + if (j < NX){ + OUT[i] = X[b*NX + j]; + } else { + OUT[i] = Y[b*NY + j - NX]; + } + } +} + +extern "C" void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + inter_kernel<<>>(NX, X, NY, Y, B, OUT); + check_error(cudaPeekAtLastError()); +} + +extern "C" void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c) +{ + weighted_sum_kernel<<>>(num, a, b, s, c); + check_error(cudaPeekAtLastError()); +} + +__global__ void weighted_delta_kernel(int n, float *a, float *b, float *s, float *da, float *db, float *ds, float *dc) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + if(da) da[i] += dc[i] * s[i]; + if(db) db[i] += dc[i] * (1-s[i]); + ds[i] += dc[i] * (a[i] - b[i]); + } +} + +extern "C" void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc) +{ + weighted_delta_kernel<<>>(num, a, b, s, da, db, ds, dc); + check_error(cudaPeekAtLastError()); +} + +__global__ void mult_add_into_kernel(int n, float *a, float *b, float *c) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i < n){ + c[i] += a[i]*b[i]; + } +} + +extern "C" void mult_add_into_gpu(int num, float *a, float *b, float *c) +{ + mult_add_into_kernel<<>>(num, a, b, c); + check_error(cudaPeekAtLastError()); +} + + +__device__ void softmax_device(float *input, int n, float temp, int stride, float *output) +{ + int i; + float sum = 0; + float largest = -INFINITY; + for(i = 0; i < n; ++i){ + int val = input[i*stride]; + largest = (val>largest) ? val : largest; + } + for(i = 0; i < n; ++i){ + float e = expf(input[i*stride]/temp - largest/temp); + sum += e; + output[i*stride] = e; + } + for(i = 0; i < n; ++i){ + output[i*stride] /= sum; + } +} + + +__global__ void softmax_tree_kernel(float *input, int spatial, int batch, int stride, float temp, float *output, int groups, int *group_size, int *group_offset) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= spatial*batch*groups) return; + int s = id % spatial; + id = id / spatial; + int g = id % groups; + int b = id / groups; + int goff = group_offset[g]*spatial; + int boff = b*stride; + softmax_device(input + goff + boff + s, group_size[g], temp, spatial, output + goff + boff + s); +} + +extern "C" void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier) +{ + int *tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); + int *tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); + /* + static int *tree_groups_size = 0; + static int *tree_groups_offset = 0; + if(!tree_groups_size){ + tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); + tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); + } + */ + int num = spatial*batch*hier.groups; + softmax_tree_kernel<<>>(input, spatial, batch, stride, temp, output, hier.groups, tree_groups_size, tree_groups_offset); + check_error(cudaPeekAtLastError()); + cuda_free((float *)tree_groups_size); + cuda_free((float *)tree_groups_offset); +} + +__global__ void softmax_kernel(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= batch*groups) return; + int b = id / groups; + int g = id % groups; + softmax_device(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); +} + +extern "C" void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + softmax_kernel<<>>(input, n, batch, batch_offset, groups, group_offset, stride, temp, output); + check_error(cudaPeekAtLastError()); +} + + +__global__ void upsample_kernel(size_t N, float *x, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + size_t i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(i >= N) return; + int out_index = i; + int out_w = i%(w*stride); + i = i/(w*stride); + int out_h = i%(h*stride); + i = i/(h*stride); + int out_c = i%c; + i = i/c; + int b = i%batch; + + int in_w = out_w / stride; + int in_h = out_h / stride; + int in_c = out_c; + + int in_index = b*w*h*c + in_c*w*h + in_h*w + in_w; + + + if(forward) out[out_index] += scale * x[in_index]; + else atomicAdd(x+in_index, scale * out[out_index]); +} +extern "C" void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) +{ + size_t size = w*h*c*batch*stride*stride; + upsample_kernel<<>>(size, in, w, h, c, batch, stride, forward, scale, out); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/box.c b/workloads/realworld/uvm_prefetch_async/darknet/src/box.c new file mode 100644 index 0000000000000000000000000000000000000000..8a1772c9ae05dede6ddc83d9b6465f64cf974ae8 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/box.c @@ -0,0 +1,357 @@ +#include "box.h" +#include +#include +#include + +int nms_comparator(const void *pa, const void *pb) +{ + detection a = *(detection *)pa; + detection b = *(detection *)pb; + float diff = 0; + if(b.sort_class >= 0){ + diff = a.prob[b.sort_class] - b.prob[b.sort_class]; + } else { + diff = a.objectness - b.objectness; + } + if(diff < 0) return 1; + else if(diff > 0) return -1; + return 0; +} + +void do_nms_obj(detection *dets, int total, int classes, float thresh) +{ + int i, j, k; + k = total-1; + for(i = 0; i <= k; ++i){ + if(dets[i].objectness == 0){ + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k+1; + + for(i = 0; i < total; ++i){ + dets[i].sort_class = -1; + } + + qsort(dets, total, sizeof(detection), nms_comparator); + for(i = 0; i < total; ++i){ + if(dets[i].objectness == 0) continue; + box a = dets[i].bbox; + for(j = i+1; j < total; ++j){ + if(dets[j].objectness == 0) continue; + box b = dets[j].bbox; + if (box_iou(a, b) > thresh){ + dets[j].objectness = 0; + for(k = 0; k < classes; ++k){ + dets[j].prob[k] = 0; + } + } + } + } +} + + +void do_nms_sort(detection *dets, int total, int classes, float thresh) +{ + int i, j, k; + k = total-1; + for(i = 0; i <= k; ++i){ + if(dets[i].objectness == 0){ + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k+1; + + for(k = 0; k < classes; ++k){ + for(i = 0; i < total; ++i){ + dets[i].sort_class = k; + } + qsort(dets, total, sizeof(detection), nms_comparator); + for(i = 0; i < total; ++i){ + if(dets[i].prob[k] == 0) continue; + box a = dets[i].bbox; + for(j = i+1; j < total; ++j){ + box b = dets[j].bbox; + if (box_iou(a, b) > thresh){ + dets[j].prob[k] = 0; + } + } + } + } +} + +box float_to_box(float *f, int stride) +{ + box b = {0}; + b.x = f[0]; + b.y = f[1*stride]; + b.w = f[2*stride]; + b.h = f[3*stride]; + return b; +} + +dbox derivative(box a, box b) +{ + dbox d; + d.dx = 0; + d.dw = 0; + float l1 = a.x - a.w/2; + float l2 = b.x - b.w/2; + if (l1 > l2){ + d.dx -= 1; + d.dw += .5; + } + float r1 = a.x + a.w/2; + float r2 = b.x + b.w/2; + if(r1 < r2){ + d.dx += 1; + d.dw += .5; + } + if (l1 > r2) { + d.dx = -1; + d.dw = 0; + } + if (r1 < l2){ + d.dx = 1; + d.dw = 0; + } + + d.dy = 0; + d.dh = 0; + float t1 = a.y - a.h/2; + float t2 = b.y - b.h/2; + if (t1 > t2){ + d.dy -= 1; + d.dh += .5; + } + float b1 = a.y + a.h/2; + float b2 = b.y + b.h/2; + if(b1 < b2){ + d.dy += 1; + d.dh += .5; + } + if (t1 > b2) { + d.dy = -1; + d.dh = 0; + } + if (b1 < t2){ + d.dy = 1; + d.dh = 0; + } + return d; +} + +float overlap(float x1, float w1, float x2, float w2) +{ + float l1 = x1 - w1/2; + float l2 = x2 - w2/2; + float left = l1 > l2 ? l1 : l2; + float r1 = x1 + w1/2; + float r2 = x2 + w2/2; + float right = r1 < r2 ? r1 : r2; + return right - left; +} + +float box_intersection(box a, box b) +{ + float w = overlap(a.x, a.w, b.x, b.w); + float h = overlap(a.y, a.h, b.y, b.h); + if(w < 0 || h < 0) return 0; + float area = w*h; + return area; +} + +float box_union(box a, box b) +{ + float i = box_intersection(a, b); + float u = a.w*a.h + b.w*b.h - i; + return u; +} + +float box_iou(box a, box b) +{ + return box_intersection(a, b)/box_union(a, b); +} + +float box_rmse(box a, box b) +{ + return sqrt(pow(a.x-b.x, 2) + + pow(a.y-b.y, 2) + + pow(a.w-b.w, 2) + + pow(a.h-b.h, 2)); +} + +dbox dintersect(box a, box b) +{ + float w = overlap(a.x, a.w, b.x, b.w); + float h = overlap(a.y, a.h, b.y, b.h); + dbox dover = derivative(a, b); + dbox di; + + di.dw = dover.dw*h; + di.dx = dover.dx*h; + di.dh = dover.dh*w; + di.dy = dover.dy*w; + + return di; +} + +dbox dunion(box a, box b) +{ + dbox du; + + dbox di = dintersect(a, b); + du.dw = a.h - di.dw; + du.dh = a.w - di.dh; + du.dx = -di.dx; + du.dy = -di.dy; + + return du; +} + + +void test_dunion() +{ + box a = {0, 0, 1, 1}; + box dxa= {0+.0001, 0, 1, 1}; + box dya= {0, 0+.0001, 1, 1}; + box dwa= {0, 0, 1+.0001, 1}; + box dha= {0, 0, 1, 1+.0001}; + + box b = {.5, .5, .2, .2}; + dbox di = dunion(a,b); + printf("Union: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); + float inter = box_union(a, b); + float xinter = box_union(dxa, b); + float yinter = box_union(dya, b); + float winter = box_union(dwa, b); + float hinter = box_union(dha, b); + xinter = (xinter - inter)/(.0001); + yinter = (yinter - inter)/(.0001); + winter = (winter - inter)/(.0001); + hinter = (hinter - inter)/(.0001); + printf("Union Manual %f %f %f %f\n", xinter, yinter, winter, hinter); +} +void test_dintersect() +{ + box a = {0, 0, 1, 1}; + box dxa= {0+.0001, 0, 1, 1}; + box dya= {0, 0+.0001, 1, 1}; + box dwa= {0, 0, 1+.0001, 1}; + box dha= {0, 0, 1, 1+.0001}; + + box b = {.5, .5, .2, .2}; + dbox di = dintersect(a,b); + printf("Inter: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); + float inter = box_intersection(a, b); + float xinter = box_intersection(dxa, b); + float yinter = box_intersection(dya, b); + float winter = box_intersection(dwa, b); + float hinter = box_intersection(dha, b); + xinter = (xinter - inter)/(.0001); + yinter = (yinter - inter)/(.0001); + winter = (winter - inter)/(.0001); + hinter = (hinter - inter)/(.0001); + printf("Inter Manual %f %f %f %f\n", xinter, yinter, winter, hinter); +} + +void test_box() +{ + test_dintersect(); + test_dunion(); + box a = {0, 0, 1, 1}; + box dxa= {0+.00001, 0, 1, 1}; + box dya= {0, 0+.00001, 1, 1}; + box dwa= {0, 0, 1+.00001, 1}; + box dha= {0, 0, 1, 1+.00001}; + + box b = {.5, 0, .2, .2}; + + float iou = box_iou(a,b); + iou = (1-iou)*(1-iou); + printf("%f\n", iou); + dbox d = diou(a, b); + printf("%f %f %f %f\n", d.dx, d.dy, d.dw, d.dh); + + float xiou = box_iou(dxa, b); + float yiou = box_iou(dya, b); + float wiou = box_iou(dwa, b); + float hiou = box_iou(dha, b); + xiou = ((1-xiou)*(1-xiou) - iou)/(.00001); + yiou = ((1-yiou)*(1-yiou) - iou)/(.00001); + wiou = ((1-wiou)*(1-wiou) - iou)/(.00001); + hiou = ((1-hiou)*(1-hiou) - iou)/(.00001); + printf("manual %f %f %f %f\n", xiou, yiou, wiou, hiou); +} + +dbox diou(box a, box b) +{ + float u = box_union(a,b); + float i = box_intersection(a,b); + dbox di = dintersect(a,b); + dbox du = dunion(a,b); + dbox dd = {0,0,0,0}; + + if(i <= 0 || 1) { + dd.dx = b.x - a.x; + dd.dy = b.y - a.y; + dd.dw = b.w - a.w; + dd.dh = b.h - a.h; + return dd; + } + + dd.dx = 2*pow((1-(i/u)),1)*(di.dx*u - du.dx*i)/(u*u); + dd.dy = 2*pow((1-(i/u)),1)*(di.dy*u - du.dy*i)/(u*u); + dd.dw = 2*pow((1-(i/u)),1)*(di.dw*u - du.dw*i)/(u*u); + dd.dh = 2*pow((1-(i/u)),1)*(di.dh*u - du.dh*i)/(u*u); + return dd; +} + + +void do_nms(box *boxes, float **probs, int total, int classes, float thresh) +{ + int i, j, k; + for(i = 0; i < total; ++i){ + int any = 0; + for(k = 0; k < classes; ++k) any = any || (probs[i][k] > 0); + if(!any) { + continue; + } + for(j = i+1; j < total; ++j){ + if (box_iou(boxes[i], boxes[j]) > thresh){ + for(k = 0; k < classes; ++k){ + if (probs[i][k] < probs[j][k]) probs[i][k] = 0; + else probs[j][k] = 0; + } + } + } + } +} + +box encode_box(box b, box anchor) +{ + box encode; + encode.x = (b.x - anchor.x) / anchor.w; + encode.y = (b.y - anchor.y) / anchor.h; + encode.w = log2(b.w / anchor.w); + encode.h = log2(b.h / anchor.h); + return encode; +} + +box decode_box(box b, box anchor) +{ + box decode; + decode.x = b.x * anchor.w + anchor.x; + decode.y = b.y * anchor.h + anchor.y; + decode.w = pow(2., b.w) * anchor.w; + decode.h = pow(2., b.h) * anchor.h; + return decode; +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/box.h b/workloads/realworld/uvm_prefetch_async/darknet/src/box.h new file mode 100644 index 0000000000000000000000000000000000000000..dda3e59100c3d9e0a6bb05a80070155d9fcbc876 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/box.h @@ -0,0 +1,14 @@ +#ifndef BOX_H +#define BOX_H +#include "darknet.h" + +typedef struct{ + float dx, dy, dw, dh; +} dbox; + +float box_rmse(box a, box b); +dbox diou(box a, box b); +box decode_box(box b, box anchor); +box encode_box(box b, box anchor); + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/classifier.h b/workloads/realworld/uvm_prefetch_async/darknet/src/classifier.h new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/classifier.h @@ -0,0 +1 @@ + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/col2im.c b/workloads/realworld/uvm_prefetch_async/darknet/src/col2im.c new file mode 100644 index 0000000000000000000000000000000000000000..5c4605e197439f79fe05c41337a5f2b8103f63ba --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/col2im.c @@ -0,0 +1,39 @@ +#include +#include +void col2im_add_pixel(float *im, int height, int width, int channels, + int row, int col, int channel, int pad, float val) +{ + row -= pad; + col -= pad; + + if (row < 0 || col < 0 || + row >= height || col >= width) return; + im[col + width*(row + height*channel)] += val; +} +//This one might be too, can't remember. +void col2im_cpu(float* data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_im) +{ + int c,h,w; + int height_col = (height + 2*pad - ksize) / stride + 1; + int width_col = (width + 2*pad - ksize) / stride + 1; + + int channels_col = channels * ksize * ksize; + for (c = 0; c < channels_col; ++c) { + int w_offset = c % ksize; + int h_offset = (c / ksize) % ksize; + int c_im = c / ksize / ksize; + for (h = 0; h < height_col; ++h) { + for (w = 0; w < width_col; ++w) { + int im_row = h_offset + h * stride; + int im_col = w_offset + w * stride; + int col_index = (c * height_col + h) * width_col + w; + double val = data_col[col_index]; + col2im_add_pixel(data_im, height, width, channels, + im_row, im_col, c_im, pad, val); + } + } + } +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/col2im.h b/workloads/realworld/uvm_prefetch_async/darknet/src/col2im.h new file mode 100644 index 0000000000000000000000000000000000000000..3fbe05307db65a1f511f801670a23734e21b7dff --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/col2im.h @@ -0,0 +1,13 @@ +#ifndef COL2IM_H +#define COL2IM_H + +void col2im_cpu(float* data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_im); + +#ifdef GPU +void col2im_gpu(float *data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_im); +#endif +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/col2im_kernels.cu b/workloads/realworld/uvm_prefetch_async/darknet/src/col2im_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..3543ec940a807a2dc42eb11fb31bc370c750a0f5 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/col2im_kernels.cu @@ -0,0 +1,70 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "col2im.h" +#include "cuda_dark.h" +} + +// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu +// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE + +__global__ void col2im_gpu_kernel(const int n, const float* data_col, + const int height, const int width, const int ksize, + const int pad, + const int stride, + const int height_col, const int width_col, + float *data_im) { + int index = blockIdx.x*blockDim.x+threadIdx.x; + for(; index < n; index += blockDim.x*gridDim.x){ + float val = 0; + int w = index % width + pad; + int h = (index / width) % height + pad; + int c = index / (width * height); + // compute the start and end of the output + int w_col_start = (w < ksize) ? 0 : (w - ksize) / stride + 1; + int w_col_end = min(w / stride + 1, width_col); + int h_col_start = (h < ksize) ? 0 : (h - ksize) / stride + 1; + int h_col_end = min(h / stride + 1, height_col); + // equivalent implementation + int offset = + (c * ksize * ksize + h * ksize + w) * height_col * width_col; + int coeff_h_col = (1 - stride * ksize * height_col) * width_col; + int coeff_w_col = (1 - stride * height_col * width_col); + for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { + for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { + val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col]; + } + } + data_im[index] += val; + } +} + +void col2im_gpu(float *data_col, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_im){ + // We are going to launch channels * height_col * width_col kernels, each + // kernel responsible for copying a single-channel grid. + int height_col = (height + 2 * pad - ksize) / stride + 1; + int width_col = (width + 2 * pad - ksize) / stride + 1; + int num_kernels = channels * height * width; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(data_col, num_kernels * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(data_im, num_kernels * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + col2im_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, + BLOCK, 0, stream2>>>( + num_kernels, data_col, height, width, ksize, pad, + stride, height_col, + width_col, data_im); + check_error(cudaPeekAtLastError()); +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/compare.c b/workloads/realworld/uvm_prefetch_async/darknet/src/compare.c new file mode 100644 index 0000000000000000000000000000000000000000..d2d2b3bdc675cf808f483d1607550e072e245396 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/compare.c @@ -0,0 +1,352 @@ +#include + +#include "network.h" +#include "detection_layer.h" +#include "cost_layer.h" +#include "utils.h" +#include "parser.h" +#include "box.h" + +void train_compare(char *cfgfile, char *weightfile) +{ + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + char *backup_directory = "/home/pjreddie/backup/"; + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + list *plist = get_paths("data/compare.train.list"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + printf("%d\n", N); + clock_t time; + pthread_t load_thread; + data train; + data buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.classes = 20; + args.n = imgs; + args.m = N; + args.d = &buffer; + args.type = COMPARE_DATA; + + load_thread = load_data_in_thread(args); + int epoch = *net.seen/N; + int i = 0; + while(1){ + ++i; + time=clock(); + pthread_join(load_thread, 0); + train = buffer; + + load_thread = load_data_in_thread(args); + printf("Loaded: %lf seconds\n", sec(clock()-time)); + time=clock(); + float loss = train_network(net, train); + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%.3f: %f, %f avg, %lf seconds, %ld images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen); + free_data(train); + if(i%100 == 0){ + char buff[256]; + sprintf(buff, "%s/%s_%d_minor_%d.weights",backup_directory,base, epoch, i); + save_weights(net, buff); + } + if(*net.seen/N > epoch){ + epoch = *net.seen/N; + i = 0; + char buff[256]; + sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); + save_weights(net, buff); + if(epoch%22 == 0) net.learning_rate *= .1; + } + } + pthread_join(load_thread, 0); + free_data(buffer); + free_network(net); + free_ptrs((void**)paths, plist->size); + free_list(plist); + free(base); +} + +void validate_compare(char *filename, char *weightfile) +{ + int i = 0; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + + list *plist = get_paths("data/compare.val.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size/2; + free_list(plist); + + clock_t time; + int correct = 0; + int total = 0; + int splits = 10; + int num = (i+1)*N/splits - i*N/splits; + + data val, buffer; + + load_args args = {0}; + args.w = net.w; + args.h = net.h; + args.paths = paths; + args.classes = 20; + args.n = num; + args.m = 0; + args.d = &buffer; + args.type = COMPARE_DATA; + + pthread_t load_thread = load_data_in_thread(args); + for(i = 1; i <= splits; ++i){ + time=clock(); + + pthread_join(load_thread, 0); + val = buffer; + + num = (i+1)*N/splits - i*N/splits; + char **part = paths+(i*N/splits); + if(i != splits){ + args.paths = part; + load_thread = load_data_in_thread(args); + } + printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); + + time=clock(); + matrix pred = network_predict_data(net, val); + int j,k; + for(j = 0; j < val.y.rows; ++j){ + for(k = 0; k < 20; ++k){ + if(val.y.vals[j][k*2] != val.y.vals[j][k*2+1]){ + ++total; + if((val.y.vals[j][k*2] < val.y.vals[j][k*2+1]) == (pred.vals[j][k*2] < pred.vals[j][k*2+1])){ + ++correct; + } + } + } + } + free_matrix(pred); + printf("%d: Acc: %f, %lf seconds, %d images\n", i, (float)correct/total, sec(clock()-time), val.X.rows); + free_data(val); + } +} + +typedef struct { + network net; + char *filename; + int class; + int classes; + float elo; + float *elos; +} sortable_bbox; + +int total_compares = 0; +int current_class = 0; + +int elo_comparator(const void*a, const void *b) +{ + sortable_bbox box1 = *(sortable_bbox*)a; + sortable_bbox box2 = *(sortable_bbox*)b; + if(box1.elos[current_class] == box2.elos[current_class]) return 0; + if(box1.elos[current_class] > box2.elos[current_class]) return -1; + return 1; +} + +int bbox_comparator(const void *a, const void *b) +{ + ++total_compares; + sortable_bbox box1 = *(sortable_bbox*)a; + sortable_bbox box2 = *(sortable_bbox*)b; + network net = box1.net; + int class = box1.class; + + image im1 = load_image_color(box1.filename, net.w, net.h); + image im2 = load_image_color(box2.filename, net.w, net.h); + float *X = calloc(net.w*net.h*net.c, sizeof(float)); + memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); + memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); + float *predictions = network_predict(net, X); + + free_image(im1); + free_image(im2); + free(X); + if (predictions[class*2] > predictions[class*2+1]){ + return 1; + } + return -1; +} + +void bbox_update(sortable_bbox *a, sortable_bbox *b, int class, int result) +{ + int k = 32; + float EA = 1./(1+pow(10, (b->elos[class] - a->elos[class])/400.)); + float EB = 1./(1+pow(10, (a->elos[class] - b->elos[class])/400.)); + float SA = result ? 1 : 0; + float SB = result ? 0 : 1; + a->elos[class] += k*(SA - EA); + b->elos[class] += k*(SB - EB); +} + +void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, int class) +{ + image im1 = load_image_color(a->filename, net.w, net.h); + image im2 = load_image_color(b->filename, net.w, net.h); + float *X = calloc(net.w*net.h*net.c, sizeof(float)); + memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); + memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); + float *predictions = network_predict(net, X); + ++total_compares; + + int i; + for(i = 0; i < classes; ++i){ + if(class < 0 || class == i){ + int result = predictions[i*2] > predictions[i*2+1]; + bbox_update(a, b, i, result); + } + } + + free_image(im1); + free_image(im2); + free(X); +} + +void SortMaster3000(char *filename, char *weightfile) +{ + int i = 0; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + set_batch_network(&net, 1); + + list *plist = get_paths("data/compare.sort.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + free_list(plist); + sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); + printf("Sorting %d boxes...\n", N); + for(i = 0; i < N; ++i){ + boxes[i].filename = paths[i]; + boxes[i].net = net; + boxes[i].class = 7; + boxes[i].elo = 1500; + } + clock_t time=clock(); + qsort(boxes, N, sizeof(sortable_bbox), bbox_comparator); + for(i = 0; i < N; ++i){ + printf("%s\n", boxes[i].filename); + } + printf("Sorted in %d compares, %f secs\n", total_compares, sec(clock()-time)); +} + +void BattleRoyaleWithCheese(char *filename, char *weightfile) +{ + int classes = 20; + int i,j; + network net = parse_network_cfg(filename); + if(weightfile){ + load_weights(&net, weightfile); + } + srand(time(0)); + set_batch_network(&net, 1); + + list *plist = get_paths("data/compare.sort.list"); + //list *plist = get_paths("data/compare.small.list"); + //list *plist = get_paths("data/compare.cat.list"); + //list *plist = get_paths("data/compare.val.old"); + char **paths = (char **)list_to_array(plist); + int N = plist->size; + int total = N; + free_list(plist); + sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox)); + printf("Battling %d boxes...\n", N); + for(i = 0; i < N; ++i){ + boxes[i].filename = paths[i]; + boxes[i].net = net; + boxes[i].classes = classes; + boxes[i].elos = calloc(classes, sizeof(float));; + for(j = 0; j < classes; ++j){ + boxes[i].elos[j] = 1500; + } + } + int round; + clock_t time=clock(); + for(round = 1; round <= 4; ++round){ + clock_t round_time=clock(); + printf("Round: %d\n", round); + shuffle(boxes, N, sizeof(sortable_bbox)); + for(i = 0; i < N/2; ++i){ + bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, -1); + } + printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N); + } + + int class; + + for (class = 0; class < classes; ++class){ + + N = total; + current_class = class; + qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); + N /= 2; + + for(round = 1; round <= 100; ++round){ + clock_t round_time=clock(); + printf("Round: %d\n", round); + + sorta_shuffle(boxes, N, sizeof(sortable_bbox), 10); + for(i = 0; i < N/2; ++i){ + bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, class); + } + qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); + if(round <= 20) N = (N*9/10)/2*2; + + printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N); + } + char buff[256]; + sprintf(buff, "results/battle_%d.log", class); + FILE *outfp = fopen(buff, "w"); + for(i = 0; i < N; ++i){ + fprintf(outfp, "%s %f\n", boxes[i].filename, boxes[i].elos[class]); + } + fclose(outfp); + } + printf("Tournament in %d compares, %f secs\n", total_compares, sec(clock()-time)); +} + +void run_compare(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + //char *filename = (argc > 5) ? argv[5]: 0; + if(0==strcmp(argv[2], "train")) train_compare(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_compare(cfg, weights); + else if(0==strcmp(argv[2], "sort")) SortMaster3000(cfg, weights); + else if(0==strcmp(argv[2], "battle")) BattleRoyaleWithCheese(cfg, weights); + /* + else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); + else if(0==strcmp(argv[2], "extract")) extract_boxes(cfg, weights); + else if(0==strcmp(argv[2], "valid")) validate_recall(cfg, weights); + */ +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/connected_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/connected_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..ec25b44d998661c4735cd9a8a86f2355a0ae0080 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/connected_layer.c @@ -0,0 +1,336 @@ +#include "connected_layer.h" +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam) +{ + int i; + layer l = {0}; + l.learning_rate_scale = 1; + l.type = CONNECTED; + + l.inputs = inputs; + l.outputs = outputs; + l.batch=batch; + l.batch_normalize = batch_normalize; + l.h = 1; + l.w = 1; + l.c = inputs; + l.out_h = 1; + l.out_w = 1; + l.out_c = outputs; + + l.output = calloc(batch*outputs, sizeof(float)); + l.delta = calloc(batch*outputs, sizeof(float)); + + l.weight_updates = calloc(inputs*outputs, sizeof(float)); + l.bias_updates = calloc(outputs, sizeof(float)); + + l.weights = calloc(outputs*inputs, sizeof(float)); + l.biases = calloc(outputs, sizeof(float)); + + l.forward = forward_connected_layer; + l.backward = backward_connected_layer; + l.update = update_connected_layer; + + //float scale = 1./sqrt(inputs); + float scale = sqrt(2./inputs); + for(i = 0; i < outputs*inputs; ++i){ + l.weights[i] = scale*rand_uniform(-1, 1); + } + + for(i = 0; i < outputs; ++i){ + l.biases[i] = 0; + } + + if(adam){ + l.m = calloc(l.inputs*l.outputs, sizeof(float)); + l.v = calloc(l.inputs*l.outputs, sizeof(float)); + l.bias_m = calloc(l.outputs, sizeof(float)); + l.scale_m = calloc(l.outputs, sizeof(float)); + l.bias_v = calloc(l.outputs, sizeof(float)); + l.scale_v = calloc(l.outputs, sizeof(float)); + } + if(batch_normalize){ + l.scales = calloc(outputs, sizeof(float)); + l.scale_updates = calloc(outputs, sizeof(float)); + for(i = 0; i < outputs; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(outputs, sizeof(float)); + l.mean_delta = calloc(outputs, sizeof(float)); + l.variance = calloc(outputs, sizeof(float)); + l.variance_delta = calloc(outputs, sizeof(float)); + + l.rolling_mean = calloc(outputs, sizeof(float)); + l.rolling_variance = calloc(outputs, sizeof(float)); + + l.x = calloc(batch*outputs, sizeof(float)); + l.x_norm = calloc(batch*outputs, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_connected_layer_gpu; + l.backward_gpu = backward_connected_layer_gpu; + l.update_gpu = update_connected_layer_gpu; + + l.weights_gpu = cuda_make_array(l.weights, outputs*inputs); + l.biases_gpu = cuda_make_array(l.biases, outputs); + + l.weight_updates_gpu = cuda_make_array(l.weight_updates, outputs*inputs); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, outputs); + + l.output_gpu = cuda_make_array(l.output, outputs*batch); + l.delta_gpu = cuda_make_array(l.delta, outputs*batch); + if (adam) { + l.m_gpu = cuda_make_array(0, inputs*outputs); + l.v_gpu = cuda_make_array(0, inputs*outputs); + l.bias_m_gpu = cuda_make_array(0, outputs); + l.bias_v_gpu = cuda_make_array(0, outputs); + l.scale_m_gpu = cuda_make_array(0, outputs); + l.scale_v_gpu = cuda_make_array(0, outputs); + } + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(l.mean, outputs); + l.variance_gpu = cuda_make_array(l.variance, outputs); + + l.rolling_mean_gpu = cuda_make_array(l.mean, outputs); + l.rolling_variance_gpu = cuda_make_array(l.variance, outputs); + + l.mean_delta_gpu = cuda_make_array(l.mean, outputs); + l.variance_delta_gpu = cuda_make_array(l.variance, outputs); + + l.scales_gpu = cuda_make_array(l.scales, outputs); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, outputs); + + l.x_gpu = cuda_make_array(l.output, l.batch*outputs); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*outputs); +#ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); +#endif + } +#endif + l.activation = activation; + fprintf(stderr, "connected %4d -> %4d\n", inputs, outputs); + return l; +} + +void update_connected_layer(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.outputs, momentum, l.bias_updates, 1); + + if(l.batch_normalize){ + axpy_cpu(l.outputs, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.outputs, momentum, l.scale_updates, 1); + } + + axpy_cpu(l.inputs*l.outputs, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(l.inputs*l.outputs, momentum, l.weight_updates, 1); +} + +void forward_connected_layer(layer l, network net) +{ + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + int m = l.batch; + int k = l.inputs; + int n = l.outputs; + float *a = net.input; + float *b = l.weights; + float *c = l.output; + gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); + if(l.batch_normalize){ + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.outputs, 1); + } + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_connected_layer(layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.outputs, 1); + } + + int m = l.outputs; + int k = l.batch; + int n = l.inputs; + float *a = l.delta; + float *b = net.input; + float *c = l.weight_updates; + gemm(1,0,m,n,k,1,a,m,b,n,1,c,n); + + m = l.batch; + k = l.outputs; + n = l.inputs; + + a = l.delta; + b = l.weights; + c = net.delta; + + if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); +} + + +void denormalize_connected_layer(layer l) +{ + int i, j; + for(i = 0; i < l.outputs; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .000001); + for(j = 0; j < l.inputs; ++j){ + l.weights[i*l.inputs + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + + +void statistics_connected_layer(layer l) +{ + if(l.batch_normalize){ + printf("Scales "); + print_statistics(l.scales, l.outputs); + /* + printf("Rolling Mean "); + print_statistics(l.rolling_mean, l.outputs); + printf("Rolling Variance "); + print_statistics(l.rolling_variance, l.outputs); + */ + } + printf("Biases "); + print_statistics(l.biases, l.outputs); + printf("Weights "); + print_statistics(l.weights, l.outputs); +} + +#ifdef GPU + +void pull_connected_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.inputs*l.outputs); + cuda_pull_array(l.biases_gpu, l.biases, l.outputs); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.outputs); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs); + } +} + +void push_connected_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.inputs*l.outputs); + cuda_push_array(l.biases_gpu, l.biases, l.outputs); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.outputs); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs); + } +} + +void update_connected_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.inputs*l.outputs, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.outputs, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.outputs, batch, a.t); + } + }else{ + axpy_gpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.outputs, momentum, l.bias_updates_gpu, 1); + + if(l.batch_normalize){ + axpy_gpu(l.outputs, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.outputs, momentum, l.scale_updates_gpu, 1); + } + + axpy_gpu(l.inputs*l.outputs, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.inputs*l.outputs, momentum, l.weight_updates_gpu, 1); + } +} + +void forward_connected_layer_gpu(layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + + int m = l.batch; + int k = l.inputs; + int n = l.outputs; + float * a = net.input_gpu; + float * b = l.weights_gpu; + float * c = l.output_gpu; + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.outputs, 1); + } + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_connected_layer_gpu(layer l, network net) +{ + constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.outputs, 1); + } + + int m = l.outputs; + int k = l.batch; + int n = l.inputs; + float * a = l.delta_gpu; + float * b = net.input_gpu; + float * c = l.weight_updates_gpu; + gemm_gpu(1,0,m,n,k,1,a,m,b,n,1,c,n); + + m = l.batch; + k = l.outputs; + n = l.inputs; + + a = l.delta_gpu; + b = l.weights_gpu; + c = net.delta_gpu; + + if(c) gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/connected_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/connected_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..6727a964eaa923906b202ff337aa69ad91817117 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/connected_layer.h @@ -0,0 +1,23 @@ +#ifndef CONNECTED_LAYER_H +#define CONNECTED_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam); + +void forward_connected_layer(layer l, network net); +void backward_connected_layer(layer l, network net); +void update_connected_layer(layer l, update_args a); + +#ifdef GPU +void forward_connected_layer_gpu(layer l, network net); +void backward_connected_layer_gpu(layer l, network net); +void update_connected_layer_gpu(layer l, update_args a); +void push_connected_layer(layer l); +void pull_connected_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/convolutional_kernels.cu b/workloads/realworld/uvm_prefetch_async/darknet/src/convolutional_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..3677bc8597d0e37f4d45848a75bf34165cafa729 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/convolutional_kernels.cu @@ -0,0 +1,370 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "gemm.h" +#include "blas.h" +#include "im2col.h" +#include "col2im.h" +#include "utils.h" +#include "cuda_dark.h" +} + +__global__ void binarize_kernel(float *x, int n, float *binary) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i >= n) return; + binary[i] = (x[i] >= 0) ? 1 : -1; +} + +void binarize_gpu(float *x, int n, float *binary) +{ + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(x, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(binary, n * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + binarize_kernel<<>>(x, n, binary); + check_error(cudaPeekAtLastError()); +} + +__global__ void binarize_input_kernel(float *input, int n, int size, float *binary) +{ + int s = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (s >= size) return; + int i = 0; + float mean = 0; + for(i = 0; i < n; ++i){ + mean += fabsf(input[i*size + s]); + } + mean = mean / n; + for(i = 0; i < n; ++i){ + binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean; + } +} + +void binarize_input_gpu(float *input, int n, int size, float *binary) +{ + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(input, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(binary, n * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + binarize_input_kernel<<>>(input, n, size, binary); + check_error(cudaPeekAtLastError()); +} + + +__global__ void binarize_weights_kernel(float *weights, int n, int size, float *binary) +{ + int f = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (f >= n) return; + int i = 0; + float mean = 0; + for(i = 0; i < size; ++i){ + mean += fabsf(weights[f*size + i]); + } + mean = mean / size; + for(i = 0; i < size; ++i){ + binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean; + //binary[f*size + i] = weights[f*size + i]; + } +} + +void binarize_weights_gpu(float *weights, int n, int size, float *binary) +{ + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(weights, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(binary, n * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + binarize_weights_kernel<<>>(weights, n, size, binary); + check_error(cudaPeekAtLastError()); +} + +void forward_convolutional_layer_gpu(convolutional_layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + if(l.binary){ + binarize_weights_gpu(l.weights_gpu, l.n, l.c/l.groups*l.size*l.size, l.binary_weights_gpu); + swap_binary(&l); + } + + if(l.xnor){ + binarize_weights_gpu(l.weights_gpu, l.n, l.c/l.groups*l.size*l.size, l.binary_weights_gpu); + swap_binary(&l); + binarize_gpu(net.input_gpu, l.c*l.h*l.w*l.batch, l.binary_input_gpu); + net.input_gpu = l.binary_input_gpu; + } + +#ifdef CUDNN + float one = 1; + cudnnConvolutionForward(cudnn_handle(), + &one, + l.srcTensorDesc, + net.input_gpu, + l.weightDesc, + l.weights_gpu, + l.convDesc, + l.fw_algo, + net.workspace, + l.workspace_size, + &one, + l.dstTensorDesc, + l.output_gpu); + +#else + int i, j; + int m = l.n/l.groups; + int k = l.size*l.size*l.c/l.groups; + int n = l.out_w*l.out_h; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.weights_gpu + j*l.nweights/l.groups; + float *b = net.workspace; + float *c = l.output_gpu + (i*l.groups + j)*n*m; + float *im = net.input_gpu + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if (l.size == 1){ + b = im; + } else { + im2col_gpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + } + gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +#endif + + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); + } + + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); + //if(l.dot > 0) dot_error_gpu(l); + if(l.binary || l.xnor) swap_binary(&l); +} + +__global__ void smooth_kernel(float *x, int n, int w, int h, int c, int size, float rate, float *delta) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int j = id % w; + id /= w; + int i = id % h; + id /= h; + int k = id % c; + id /= c; + int b = id; + + int w_offset = -(size/2.f); + int h_offset = -(size/2.f); + + int out_index = j + w*(i + h*(k + c*b)); + int l, m; + for(l = 0; l < size; ++l){ + for(m = 0; m < size; ++m){ + int cur_h = h_offset + i + l; + int cur_w = w_offset + j + m; + int index = cur_w + w*(cur_h + h*(k + b*c)); + int valid = (cur_h >= 0 && cur_h < h && + cur_w >= 0 && cur_w < w); + delta[out_index] += valid ? rate*(x[index] - x[out_index]) : 0; + } + } +} + +extern "C" void smooth_layer(layer l, int size, float rate) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.out_c; + + size_t n = h*w*c*l.batch; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(l.output_gpu, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(l.delta_gpu, n * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + smooth_kernel<<>>(l.output_gpu, n, l.w, l.h, l.c, size, rate, l.delta_gpu); + check_error(cudaPeekAtLastError()); +} + +void backward_convolutional_layer_gpu(convolutional_layer l, network net) +{ + if(l.smooth){ + smooth_layer(l, 5, l.smooth); + } + //constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + + + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); + } + float *original_input = net.input_gpu; + + if(l.xnor) net.input_gpu = l.binary_input_gpu; +#ifdef CUDNN + float one = 1; + cudnnConvolutionBackwardFilter(cudnn_handle(), + &one, + l.srcTensorDesc, + net.input_gpu, + l.ddstTensorDesc, + l.delta_gpu, + l.convDesc, + l.bf_algo, + net.workspace, + l.workspace_size, + &one, + l.dweightDesc, + l.weight_updates_gpu); + + if(net.delta_gpu){ + if(l.binary || l.xnor) swap_binary(&l); + cudnnConvolutionBackwardData(cudnn_handle(), + &one, + l.weightDesc, + l.weights_gpu, + l.ddstTensorDesc, + l.delta_gpu, + l.convDesc, + l.bd_algo, + net.workspace, + l.workspace_size, + &one, + l.dsrcTensorDesc, + net.delta_gpu); + if(l.binary || l.xnor) swap_binary(&l); + if(l.xnor) gradient_array_gpu(original_input, l.batch*l.c*l.h*l.w, HARDTAN, net.delta_gpu); + } + +#else + int m = l.n/l.groups; + int n = l.size*l.size*l.c/l.groups; + int k = l.out_w*l.out_h; + + int i, j; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.delta_gpu + (i*l.groups + j)*m*k; + float *b = net.workspace; + float *c = l.weight_updates_gpu + j*l.nweights/l.groups; + + float *im = net.input_gpu+(i*l.groups + j)*l.c/l.groups*l.h*l.w; + float *imd = net.delta_gpu+(i*l.groups + j)*l.c/l.groups*l.h*l.w; + + im2col_gpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (net.delta_gpu) { + if (l.binary || l.xnor) swap_binary(&l); + a = l.weights_gpu + j*l.nweights/l.groups; + b = l.delta_gpu + (i*l.groups + j)*m*k; + c = net.workspace; + if (l.size == 1) { + c = imd; + } + + gemm_gpu(1,0,n,k,m,1,a,n,b,k,0,c,k); + + if (l.size != 1) { + col2im_gpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd); + } + if(l.binary || l.xnor) { + swap_binary(&l); + } + } + if(l.xnor) gradient_array_gpu(original_input + i*l.c*l.h*l.w, l.c*l.h*l.w, HARDTAN, net.delta_gpu + i*l.c*l.h*l.w); + } + } +#endif +} + +void pull_convolutional_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.nweights); + cuda_pull_array(l.biases_gpu, l.biases, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.n); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void push_convolutional_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.nweights); + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.n); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void update_convolutional_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + } + }else{ + axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1); + + axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.n, momentum, l.bias_updates_gpu, 1); + + if(l.scales_gpu){ + axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.n, momentum, l.scale_updates_gpu, 1); + } + } + if(l.clip){ + constrain_gpu(l.nweights, l.clip, l.weights_gpu, 1); + } +} + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/convolutional_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/convolutional_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..1fb58b0933b06f2b27ec89f9f7c05f0b2b8a87eb --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/convolutional_layer.c @@ -0,0 +1,622 @@ +#include "convolutional_layer.h" +#include "utils.h" +#include "batchnorm_layer.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" +#include +#include + +#ifdef AI2 +#include "xnor_layer.h" +#endif + +void swap_binary(convolutional_layer *l) +{ + float *swap = l->weights; + l->weights = l->binary_weights; + l->binary_weights = swap; + +#ifdef GPU + swap = l->weights_gpu; + l->weights_gpu = l->binary_weights_gpu; + l->binary_weights_gpu = swap; +#endif +} + +void binarize_weights(float *weights, int n, int size, float *binary) +{ + int i, f; + for(f = 0; f < n; ++f){ + float mean = 0; + for(i = 0; i < size; ++i){ + mean += fabs(weights[f*size + i]); + } + mean = mean / size; + for(i = 0; i < size; ++i){ + binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean; + } + } +} + +void binarize_cpu(float *input, int n, float *binary) +{ + int i; + for(i = 0; i < n; ++i){ + binary[i] = (input[i] > 0) ? 1 : -1; + } +} + +void binarize_input(float *input, int n, int size, float *binary) +{ + int i, s; + for(s = 0; s < size; ++s){ + float mean = 0; + for(i = 0; i < n; ++i){ + mean += fabs(input[i*size + s]); + } + mean = mean / n; + for(i = 0; i < n; ++i){ + binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean; + } + } +} + +int convolutional_out_height(convolutional_layer l) +{ + return (l.h + 2*l.pad - l.size) / l.stride + 1; +} + +int convolutional_out_width(convolutional_layer l) +{ + return (l.w + 2*l.pad - l.size) / l.stride + 1; +} + +image get_convolutional_image(convolutional_layer l) +{ + return float_to_image(l.out_w,l.out_h,l.out_c,l.output); +} + +image get_convolutional_delta(convolutional_layer l) +{ + return float_to_image(l.out_w,l.out_h,l.out_c,l.delta); +} + +static size_t get_workspace_size(layer l){ +#ifdef CUDNN + if(gpu_index >= 0){ + size_t most = 0; + size_t s = 0; + cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle(), + l.srcTensorDesc, + l.weightDesc, + l.convDesc, + l.dstTensorDesc, + l.fw_algo, + &s); + if (s > most) most = s; + cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnn_handle(), + l.srcTensorDesc, + l.ddstTensorDesc, + l.convDesc, + l.dweightDesc, + l.bf_algo, + &s); + if (s > most) most = s; + cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(), + l.weightDesc, + l.ddstTensorDesc, + l.convDesc, + l.dsrcTensorDesc, + l.bd_algo, + &s); + if (s > most) most = s; + return most; + } +#endif + return (size_t)l.out_h*l.out_w*l.size*l.size*l.c/l.groups*sizeof(float); +} + +#ifdef GPU +#ifdef CUDNN +void cudnn_convolutional_setup(layer *l) +{ + cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); + cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + + cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + + cudnnSetFilter4dDescriptor(l->dweightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); + cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); + #if CUDNN_MAJOR >= 6 + cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT); + #else + cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION); + #endif + + #if CUDNN_MAJOR >= 7 + cudnnSetConvolutionGroupCount(l->convDesc, l->groups); + #else + if(l->groups > 1){ + error("CUDNN < 7 doesn't support groups, please upgrade!"); + } + #endif + + cudnnGetConvolutionForwardAlgorithm(cudnn_handle(), + l->srcTensorDesc, + l->weightDesc, + l->convDesc, + l->dstTensorDesc, + CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->fw_algo); + cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(), + l->weightDesc, + l->ddstTensorDesc, + l->convDesc, + l->dsrcTensorDesc, + CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->bd_algo); + cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(), + l->srcTensorDesc, + l->ddstTensorDesc, + l->convDesc, + l->dweightDesc, + CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, + 2000000000, + &l->bf_algo); +} +#endif +#endif + +convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam) +{ + int i; + convolutional_layer l = {0}; + l.type = CONVOLUTIONAL; + + l.groups = groups; + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.binary = binary; + l.xnor = xnor; + l.batch = batch; + l.stride = stride; + l.size = size; + l.pad = padding; + l.batch_normalize = batch_normalize; + + l.weights = calloc(c/groups*n*size*size, sizeof(float)); + l.weight_updates = calloc(c/groups*n*size*size, sizeof(float)); + + l.biases = calloc(n, sizeof(float)); + l.bias_updates = calloc(n, sizeof(float)); + + l.nweights = c/groups*n*size*size; + l.nbiases = n; + + // float scale = 1./sqrt(size*size*c); + float scale = sqrt(2./(size*size*c/l.groups)); + //printf("convscale %f\n", scale); + //scale = .02; + //for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1); + for(i = 0; i < l.nweights; ++i) l.weights[i] = scale*rand_normal(); + int out_w = convolutional_out_width(l); + int out_h = convolutional_out_height(l); + l.out_h = out_h; + l.out_w = out_w; + l.out_c = n; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = l.w * l.h * l.c; + + l.output = calloc(l.batch*l.outputs, sizeof(float)); + l.delta = calloc(l.batch*l.outputs, sizeof(float)); + + l.forward = forward_convolutional_layer; + l.backward = backward_convolutional_layer; + l.update = update_convolutional_layer; + if(binary){ + l.binary_weights = calloc(l.nweights, sizeof(float)); + l.cweights = calloc(l.nweights, sizeof(char)); + l.scales = calloc(n, sizeof(float)); + } + if(xnor){ + l.binary_weights = calloc(l.nweights, sizeof(float)); + l.binary_input = calloc(l.inputs*l.batch, sizeof(float)); + } + + if(batch_normalize){ + l.scales = calloc(n, sizeof(float)); + l.scale_updates = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(n, sizeof(float)); + l.variance = calloc(n, sizeof(float)); + + l.mean_delta = calloc(n, sizeof(float)); + l.variance_delta = calloc(n, sizeof(float)); + + l.rolling_mean = calloc(n, sizeof(float)); + l.rolling_variance = calloc(n, sizeof(float)); + l.x = calloc(l.batch*l.outputs, sizeof(float)); + l.x_norm = calloc(l.batch*l.outputs, sizeof(float)); + } + if(adam){ + l.m = calloc(l.nweights, sizeof(float)); + l.v = calloc(l.nweights, sizeof(float)); + l.bias_m = calloc(n, sizeof(float)); + l.scale_m = calloc(n, sizeof(float)); + l.bias_v = calloc(n, sizeof(float)); + l.scale_v = calloc(n, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_convolutional_layer_gpu; + l.backward_gpu = backward_convolutional_layer_gpu; + l.update_gpu = update_convolutional_layer_gpu; + + if(gpu_index >= 0){ + if (adam) { + l.m_gpu = cuda_make_array(l.m, l.nweights); + l.v_gpu = cuda_make_array(l.v, l.nweights); + l.bias_m_gpu = cuda_make_array(l.bias_m, n); + l.bias_v_gpu = cuda_make_array(l.bias_v, n); + l.scale_m_gpu = cuda_make_array(l.scale_m, n); + l.scale_v_gpu = cuda_make_array(l.scale_v, n); + } + + l.weights_gpu = cuda_make_array(l.weights, l.nweights); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights); + + l.biases_gpu = cuda_make_array(l.biases, n); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + + if(binary){ + l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights); + } + if(xnor){ + l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights); + l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch); + } + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(l.mean, n); + l.variance_gpu = cuda_make_array(l.variance, n); + + l.rolling_mean_gpu = cuda_make_array(l.mean, n); + l.rolling_variance_gpu = cuda_make_array(l.variance, n); + + l.mean_delta_gpu = cuda_make_array(l.mean, n); + l.variance_delta_gpu = cuda_make_array(l.variance, n); + + l.scales_gpu = cuda_make_array(l.scales, n); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); + + l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + } +#ifdef CUDNN + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnCreateTensorDescriptor(&l.srcTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnCreateFilterDescriptor(&l.weightDesc); + cudnnCreateTensorDescriptor(&l.dsrcTensorDesc); + cudnnCreateTensorDescriptor(&l.ddstTensorDesc); + cudnnCreateFilterDescriptor(&l.dweightDesc); + cudnnCreateConvolutionDescriptor(&l.convDesc); + cudnn_convolutional_setup(&l); +#endif + } +#endif + l.workspace_size = get_workspace_size(l); + l.activation = activation; + + fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BFLOPs\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, (2.0 * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w)/1000000000.); + + return l; +} + +void denormalize_convolutional_layer(convolutional_layer l) +{ + int i, j; + for(i = 0; i < l.n; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001); + for(j = 0; j < l.c/l.groups*l.size*l.size; ++j){ + l.weights[i*l.c/l.groups*l.size*l.size + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + +/* +void test_convolutional_layer() +{ + convolutional_layer l = make_convolutional_layer(1, 5, 5, 3, 2, 5, 2, 1, LEAKY, 1, 0, 0, 0); + l.batch_normalize = 1; + float data[] = {1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 1,1,1,1,1, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 2,2,2,2,2, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3, + 3,3,3,3,3}; + //net.input = data; + //forward_convolutional_layer(l); +} +*/ + +void resize_convolutional_layer(convolutional_layer *l, int w, int h) +{ + l->w = w; + l->h = h; + int out_w = convolutional_out_width(*l); + int out_h = convolutional_out_height(*l); + + l->out_w = out_w; + l->out_h = out_h; + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->w * l->h * l->c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + if(l->batch_normalize){ + l->x = realloc(l->x, l->batch*l->outputs*sizeof(float)); + l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float)); + } + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); + + if(l->batch_normalize){ + cuda_free(l->x_gpu); + cuda_free(l->x_norm_gpu); + + l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); + } +#ifdef CUDNN + cudnn_convolutional_setup(l); +#endif +#endif + l->workspace_size = get_workspace_size(*l); +} + +void add_bias(float *output, float *biases, int batch, int n, int size) +{ + int i,j,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + for(j = 0; j < size; ++j){ + output[(b*n + i)*size + j] += biases[i]; + } + } + } +} + +void scale_bias(float *output, float *scales, int batch, int n, int size) +{ + int i,j,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + for(j = 0; j < size; ++j){ + output[(b*n + i)*size + j] *= scales[i]; + } + } + } +} + +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size) +{ + int i,b; + for(b = 0; b < batch; ++b){ + for(i = 0; i < n; ++i){ + bias_updates[i] += sum_array(delta+size*(i+b*n), size); + } + } +} + +void forward_convolutional_layer(convolutional_layer l, network net) +{ + int i, j; + + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + + if(l.xnor){ + binarize_weights(l.weights, l.n, l.c/l.groups*l.size*l.size, l.binary_weights); + swap_binary(&l); + binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input); + net.input = l.binary_input; + } + + int m = l.n/l.groups; + int k = l.size*l.size*l.c/l.groups; + int n = l.out_w*l.out_h; + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.weights + j*l.nweights/l.groups; + float *b = net.workspace; + float *c = l.output + (i*l.groups + j)*n*m; + float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if (l.size == 1) { + b = im; + } else { + im2col_cpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); + } + gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } + + if(l.batch_normalize){ + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w); + } + + activate_array(l.output, l.outputs*l.batch, l.activation); + if(l.binary || l.xnor) swap_binary(&l); +} + +void backward_convolutional_layer(convolutional_layer l, network net) +{ + int i, j; + int m = l.n/l.groups; + int n = l.size*l.size*l.c/l.groups; + int k = l.out_w*l.out_h; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.n, k); + } + + for(i = 0; i < l.batch; ++i){ + for(j = 0; j < l.groups; ++j){ + float *a = l.delta + (i*l.groups + j)*m*k; + float *b = net.workspace; + float *c = l.weight_updates + j*l.nweights/l.groups; + + float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + float *imd = net.delta + (i*l.groups + j)*l.c/l.groups*l.h*l.w; + + if(l.size == 1){ + b = im; + } else { + im2col_cpu(im, l.c/l.groups, l.h, l.w, + l.size, l.stride, l.pad, b); + } + + gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if (net.delta) { + a = l.weights + j*l.nweights/l.groups; + b = l.delta + (i*l.groups + j)*m*k; + c = net.workspace; + if (l.size == 1) { + c = imd; + } + + gemm(1,0,n,k,m,1,a,n,b,k,0,c,k); + + if (l.size != 1) { + col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd); + } + } + } + } +} + +void update_convolutional_layer(convolutional_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.n, momentum, l.bias_updates, 1); + + if(l.scales){ + axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.n, momentum, l.scale_updates, 1); + } + + axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(l.nweights, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(l.nweights, momentum, l.weight_updates, 1); +} + + +image get_convolutional_weight(convolutional_layer l, int i) +{ + int h = l.size; + int w = l.size; + int c = l.c/l.groups; + return float_to_image(w,h,c,l.weights+i*h*w*c); +} + +void rgbgr_weights(convolutional_layer l) +{ + int i; + for(i = 0; i < l.n; ++i){ + image im = get_convolutional_weight(l, i); + if (im.c == 3) { + rgbgr_image(im); + } + } +} + +void rescale_weights(convolutional_layer l, float scale, float trans) +{ + int i; + for(i = 0; i < l.n; ++i){ + image im = get_convolutional_weight(l, i); + if (im.c == 3) { + scale_image(im, scale); + float sum = sum_array(im.data, im.w*im.h*im.c); + l.biases[i] += sum*trans; + } + } +} + +image *get_weights(convolutional_layer l) +{ + image *weights = calloc(l.n, sizeof(image)); + int i; + for(i = 0; i < l.n; ++i){ + weights[i] = copy_image(get_convolutional_weight(l, i)); + normalize_image(weights[i]); + /* + char buff[256]; + sprintf(buff, "filter%d", i); + save_image(weights[i], buff); + */ + } + //error("hey"); + return weights; +} + +image *visualize_convolutional_layer(convolutional_layer l, char *window, image *prev_weights) +{ + image *single_weights = get_weights(l); + show_images(single_weights, l.n, window); + + image delta = get_convolutional_image(l); + image dc = collapse_image_layers(delta, 1); + char buff[256]; + sprintf(buff, "%s: Output", window); + //show_image(dc, buff); + //save_image(dc, buff); + free_image(dc); + return single_weights; +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/convolutional_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/convolutional_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..baacf38f4127a42abe009ef8aa3b59543433a286 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/convolutional_layer.h @@ -0,0 +1,50 @@ +#ifndef CONVOLUTIONAL_LAYER_H +#define CONVOLUTIONAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +typedef layer convolutional_layer; + +#ifdef GPU +void forward_convolutional_layer_gpu(convolutional_layer layer, network net); +void backward_convolutional_layer_gpu(convolutional_layer layer, network net); +void update_convolutional_layer_gpu(convolutional_layer layer, update_args a); + +void push_convolutional_layer(convolutional_layer layer); +void pull_convolutional_layer(convolutional_layer layer); + +void add_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); +void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t); +#ifdef CUDNN +void cudnn_convolutional_setup(layer *l); +#endif +#endif + +convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam); +void resize_convolutional_layer(convolutional_layer *layer, int w, int h); +void forward_convolutional_layer(const convolutional_layer layer, network net); +void update_convolutional_layer(convolutional_layer layer, update_args a); +image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_weights); +void binarize_weights(float *weights, int n, int size, float *binary); +void swap_binary(convolutional_layer *l); +void binarize_weights2(float *weights, int n, int size, char *binary, float *scales); + +void backward_convolutional_layer(convolutional_layer layer, network net); + +void add_bias(float *output, float *biases, int batch, int n, int size); +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); + +image get_convolutional_image(convolutional_layer layer); +image get_convolutional_delta(convolutional_layer layer); +image get_convolutional_weight(convolutional_layer layer, int i); + +int convolutional_out_height(convolutional_layer layer); +int convolutional_out_width(convolutional_layer layer); + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/cost_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/cost_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..85fa85daf306dda03c113a6bbdc2d92b25d0b00d --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/cost_layer.c @@ -0,0 +1,176 @@ +#include "cost_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include +#include +#include +#include + +COST_TYPE get_cost_type(char *s) +{ + if (strcmp(s, "seg")==0) return SEG; + if (strcmp(s, "sse")==0) return SSE; + if (strcmp(s, "masked")==0) return MASKED; + if (strcmp(s, "smooth")==0) return SMOOTH; + if (strcmp(s, "L1")==0) return L1; + if (strcmp(s, "wgan")==0) return WGAN; + fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s); + return SSE; +} + +char *get_cost_string(COST_TYPE a) +{ + switch(a){ + case SEG: + return "seg"; + case SSE: + return "sse"; + case MASKED: + return "masked"; + case SMOOTH: + return "smooth"; + case L1: + return "L1"; + case WGAN: + return "wgan"; + } + return "sse"; +} + +cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale) +{ + fprintf(stderr, "cost %4d\n", inputs); + cost_layer l = {0}; + l.type = COST; + + l.scale = scale; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.cost_type = cost_type; + l.delta = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_cost_layer; + l.backward = backward_cost_layer; + #ifdef GPU + l.forward_gpu = forward_cost_layer_gpu; + l.backward_gpu = backward_cost_layer_gpu; + + l.delta_gpu = cuda_make_array(l.output, inputs*batch); + l.output_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void resize_cost_layer(cost_layer *l, int inputs) +{ + l->inputs = inputs; + l->outputs = inputs; + l->delta = realloc(l->delta, inputs*l->batch*sizeof(float)); + l->output = realloc(l->output, inputs*l->batch*sizeof(float)); +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch); + l->output_gpu = cuda_make_array(l->output, inputs*l->batch); +#endif +} + +void forward_cost_layer(cost_layer l, network net) +{ + if (!net.truth) return; + if(l.cost_type == MASKED){ + int i; + for(i = 0; i < l.batch*l.inputs; ++i){ + if(net.truth[i] == SECRET_NUM) net.input[i] = SECRET_NUM; + } + } + if(l.cost_type == SMOOTH){ + smooth_l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + }else if(l.cost_type == L1){ + l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + } else { + l2_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output); + } + l.cost[0] = sum_array(l.output, l.batch*l.inputs); +} + +void backward_cost_layer(const cost_layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_cost_layer(cost_layer l) +{ + cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +void push_cost_layer(cost_layer l) +{ + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +int float_abs_compare (const void * a, const void * b) +{ + float fa = *(const float*) a; + if(fa < 0) fa = -fa; + float fb = *(const float*) b; + if(fb < 0) fb = -fb; + return (fa > fb) - (fa < fb); +} + +void forward_cost_layer_gpu(cost_layer l, network net) +{ + if (!net.truth) return; + if(l.smooth){ + scal_gpu(l.batch*l.inputs, (1-l.smooth), net.truth_gpu, 1); + add_gpu(l.batch*l.inputs, l.smooth * 1./l.inputs, net.truth_gpu, 1); + } + + if(l.cost_type == SMOOTH){ + smooth_l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else if (l.cost_type == L1){ + l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else if (l.cost_type == WGAN){ + wgan_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } else { + l2_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu); + } + + if (l.cost_type == SEG && l.noobject_scale != 1) { + scale_mask_gpu(l.batch*l.inputs, l.delta_gpu, 0, net.truth_gpu, l.noobject_scale); + scale_mask_gpu(l.batch*l.inputs, l.output_gpu, 0, net.truth_gpu, l.noobject_scale); + } + if (l.cost_type == MASKED) { + mask_gpu(l.batch*l.inputs, net.delta_gpu, SECRET_NUM, net.truth_gpu, 0); + } + + if(l.ratio){ + cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); + qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare); + int n = (1-l.ratio) * l.batch*l.inputs; + float thresh = l.delta[n]; + thresh = 0; + printf("%f\n", thresh); + supp_gpu(l.batch*l.inputs, thresh, l.delta_gpu, 1); + } + + if(l.thresh){ + supp_gpu(l.batch*l.inputs, l.thresh*1./l.inputs, l.delta_gpu, 1); + } + + cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs); + l.cost[0] = sum_array(l.output, l.batch*l.inputs); +} + +void backward_cost_layer_gpu(const cost_layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/cost_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/cost_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..ceb64de00bf66839c2f34852a05ea71114608a35 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/cost_layer.h @@ -0,0 +1,20 @@ +#ifndef COST_LAYER_H +#define COST_LAYER_H +#include "layer.h" +#include "network.h" + +typedef layer cost_layer; + +COST_TYPE get_cost_type(char *s); +char *get_cost_string(COST_TYPE a); +cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale); +void forward_cost_layer(const cost_layer l, network net); +void backward_cost_layer(const cost_layer l, network net); +void resize_cost_layer(cost_layer *l, int inputs); + +#ifdef GPU +void forward_cost_layer_gpu(cost_layer l, network net); +void backward_cost_layer_gpu(const cost_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/cpu_timestamps.c b/workloads/realworld/uvm_prefetch_async/darknet/src/cpu_timestamps.c new file mode 100644 index 0000000000000000000000000000000000000000..35114479c7a9cce3debe2204b6886ad5528041d5 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/cpu_timestamps.c @@ -0,0 +1,20 @@ +#include "cpu_timestamps.h" + +void startCPU() { + struct timespec tv; + if(clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + startCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); +} + + + +void endCPU() { + struct timespec tv; + if(clock_gettime(CLOCK_REALTIME, &tv)) + printf("error clock_gettime\n"); + + endCPUTime = (tv.tv_sec * 1.0e9 + tv.tv_nsec); + //endCPUTimestamp1 = std::chrono::system_clock::now(); + printf("CPU_Times,%lu,%lu,%lu\n", startCPUTime, endCPUTime, endCPUTime-startCPUTime); +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/cpu_timestamps.h b/workloads/realworld/uvm_prefetch_async/darknet/src/cpu_timestamps.h new file mode 100644 index 0000000000000000000000000000000000000000..e53e995a5603b4610759c02a4a179eb9f0124e48 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/cpu_timestamps.h @@ -0,0 +1,21 @@ +#ifndef CPU_TIMESTAMP_ +#define CPU_TIMESTAMP_ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +static uint64_t startCPUTime; +static uint64_t endCPUTime; + +void startCPU(); +void endCPU(); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/crnn_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/crnn_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..159e17f92d45693461c92d482bf3aa7354a148d8 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/crnn_layer.c @@ -0,0 +1,283 @@ +#include "crnn_layer.h" +#include "convolutional_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize) +{ + fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = CRNN; + l.steps = steps; + l.h = h; + l.w = w; + l.c = c; + l.out_h = h; + l.out_w = w; + l.out_c = output_filters; + l.inputs = h*w*c; + l.hidden = h * w * hidden_filters; + l.outputs = l.out_h * l.out_w * l.out_c; + + l.state = calloc(l.hidden*batch*(steps+1), sizeof(float)); + + l.input_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.input_layer) = make_convolutional_layer(batch*steps, h, w, c, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.input_layer->batch = batch; + + l.self_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.self_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.self_layer->batch = batch; + + l.output_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.output_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, output_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); + l.output_layer->batch = batch; + + l.output = l.output_layer->output; + l.delta = l.output_layer->delta; + + l.forward = forward_crnn_layer; + l.backward = backward_crnn_layer; + l.update = update_crnn_layer; + +#ifdef GPU + l.forward_gpu = forward_crnn_layer_gpu; + l.backward_gpu = backward_crnn_layer_gpu; + l.update_gpu = update_crnn_layer_gpu; + + l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1)); + l.output_gpu = l.output_layer->output_gpu; + l.delta_gpu = l.output_layer->delta_gpu; +#endif + + return l; +} + +void update_crnn_layer(layer l, update_args a) +{ + update_convolutional_layer(*(l.input_layer), a); + update_convolutional_layer(*(l.self_layer), a); + update_convolutional_layer(*(l.output_layer), a); +} + +void forward_crnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1); + fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1); + fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1); + if(net.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1); + + for (i = 0; i < l.steps; ++i) { + s.input = net.input; + forward_convolutional_layer(input_layer, s); + + s.input = l.state; + forward_convolutional_layer(self_layer, s); + + float *old_state = l.state; + if(net.train) l.state += l.hidden*l.batch; + if(l.shortcut){ + copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1); + }else{ + fill_cpu(l.hidden * l.batch, 0, l.state, 1); + } + axpy_cpu(l.hidden * l.batch, 1, input_layer.output, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + forward_convolutional_layer(output_layer, s); + + net.input += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_crnn_layer(layer l, network net) +{ + network s = net; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + increment_layer(&input_layer, l.steps-1); + increment_layer(&self_layer, l.steps-1); + increment_layer(&output_layer, l.steps-1); + + l.state += l.hidden*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + s.delta = self_layer.delta; + backward_convolutional_layer(output_layer, s); + + l.state -= l.hidden*l.batch; + /* + if(i > 0){ + copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1); + axpy_cpu(l.hidden * l.batch, 1, self_layer.output - l.hidden*l.batch, 1, l.state, 1); + }else{ + fill_cpu(l.hidden * l.batch, 0, l.state, 1); + } + */ + + s.input = l.state; + s.delta = self_layer.delta - l.hidden*l.batch; + if (i == 0) s.delta = 0; + backward_convolutional_layer(self_layer, s); + + copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1); + if (i > 0 && l.shortcut) axpy_cpu(l.hidden*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden*l.batch, 1); + s.input = net.input + i*l.inputs*l.batch; + if(net.delta) s.delta = net.delta + i*l.inputs*l.batch; + else s.delta = 0; + backward_convolutional_layer(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} + +#ifdef GPU + +void pull_crnn_layer(layer l) +{ + pull_convolutional_layer(*(l.input_layer)); + pull_convolutional_layer(*(l.self_layer)); + pull_convolutional_layer(*(l.output_layer)); +} + +void push_crnn_layer(layer l) +{ + push_convolutional_layer(*(l.input_layer)); + push_convolutional_layer(*(l.self_layer)); + push_convolutional_layer(*(l.output_layer)); +} + +void update_crnn_layer_gpu(layer l, update_args a) +{ + update_convolutional_layer_gpu(*(l.input_layer), a); + update_convolutional_layer_gpu(*(l.self_layer), a); + update_convolutional_layer_gpu(*(l.output_layer), a); +} + +void forward_crnn_layer_gpu(layer l, network net) +{ + network s = net; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_gpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); + fill_gpu(l.hidden * l.batch * l.steps, 0, self_layer.delta_gpu, 1); + fill_gpu(l.hidden * l.batch * l.steps, 0, input_layer.delta_gpu, 1); + if(net.train) fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1); + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = net.input_gpu; + forward_convolutional_layer_gpu(input_layer, s); + + s.input_gpu = l.state_gpu; + forward_convolutional_layer_gpu(self_layer, s); + + float *old_state = l.state_gpu; + if(net.train) l.state_gpu += l.hidden*l.batch; + if(l.shortcut){ + copy_gpu(l.hidden * l.batch, old_state, 1, l.state_gpu, 1); + }else{ + fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1); + } + axpy_gpu(l.hidden * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + forward_convolutional_layer_gpu(output_layer, s); + + net.input_gpu += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_crnn_layer_gpu(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + increment_layer(&input_layer, l.steps - 1); + increment_layer(&self_layer, l.steps - 1); + increment_layer(&output_layer, l.steps - 1); + l.state_gpu += l.hidden*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_gpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu; + backward_convolutional_layer_gpu(output_layer, s); + + l.state_gpu -= l.hidden*l.batch; + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu - l.hidden*l.batch; + if (i == 0) s.delta_gpu = 0; + backward_convolutional_layer_gpu(self_layer, s); + + copy_gpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); + if (i > 0 && l.shortcut) axpy_gpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1); + s.input_gpu = net.input_gpu + i*l.inputs*l.batch; + if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch; + else s.delta_gpu = 0; + backward_convolutional_layer_gpu(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/crnn_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/crnn_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..515f378354e9cc6149e7a1ac60ffc86ace112991 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/crnn_layer.h @@ -0,0 +1,24 @@ + +#ifndef CRNN_LAYER_H +#define CRNN_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize); + +void forward_crnn_layer(layer l, network net); +void backward_crnn_layer(layer l, network net); +void update_crnn_layer(layer l, update_args a); + +#ifdef GPU +void forward_crnn_layer_gpu(layer l, network net); +void backward_crnn_layer_gpu(layer l, network net); +void update_crnn_layer_gpu(layer l, update_args a); +void push_crnn_layer(layer l); +void pull_crnn_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/crop_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/crop_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..3c8c9650bda6dcf4485ce8da8e2fa1984f2b244d --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/crop_layer.c @@ -0,0 +1,103 @@ +#include "crop_layer.h" +#include "cuda_dark.h" +#include + +image get_crop_image(crop_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.out_c; + return float_to_image(w,h,c,l.output); +} + +void backward_crop_layer(const crop_layer l, network net){} +void backward_crop_layer_gpu(const crop_layer l, network net){} + +crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure) +{ + fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c); + crop_layer l = {0}; + l.type = CROP; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.scale = (float)crop_height / h; + l.flip = flip; + l.angle = angle; + l.saturation = saturation; + l.exposure = exposure; + l.out_w = crop_width; + l.out_h = crop_height; + l.out_c = c; + l.inputs = l.w * l.h * l.c; + l.outputs = l.out_w * l.out_h * l.out_c; + l.output = calloc(l.outputs*batch, sizeof(float)); + l.forward = forward_crop_layer; + l.backward = backward_crop_layer; + + #ifdef GPU + l.forward_gpu = forward_crop_layer_gpu; + l.backward_gpu = backward_crop_layer_gpu; + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + l.rand_gpu = cuda_make_array(0, l.batch*8); + #endif + return l; +} + +void resize_crop_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->out_w = l->scale*w; + l->out_h = l->scale*h; + + l->inputs = l->w * l->h * l->c; + l->outputs = l->out_h * l->out_w * l->out_c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + #ifdef GPU + cuda_free(l->output_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + #endif +} + + +void forward_crop_layer(const crop_layer l, network net) +{ + int i,j,c,b,row,col; + int index; + int count = 0; + int flip = (l.flip && rand()%2); + int dh = rand()%(l.h - l.out_h + 1); + int dw = rand()%(l.w - l.out_w + 1); + float scale = 2; + float trans = -1; + if(l.noadjust){ + scale = 1; + trans = 0; + } + if(!net.train){ + flip = 0; + dh = (l.h - l.out_h)/2; + dw = (l.w - l.out_w)/2; + } + for(b = 0; b < l.batch; ++b){ + for(c = 0; c < l.c; ++c){ + for(i = 0; i < l.out_h; ++i){ + for(j = 0; j < l.out_w; ++j){ + if(flip){ + col = l.w - dw - j - 1; + }else{ + col = j + dw; + } + row = i + dh; + index = col+l.w*(row+l.h*(c + l.c*b)); + l.output[count++] = net.input[index]*scale + trans; + } + } + } + } +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/crop_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/crop_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..3b5883c47d6df0987700e1b0434010eebd6312af --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/crop_layer.h @@ -0,0 +1,20 @@ +#ifndef CROP_LAYER_H +#define CROP_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +typedef layer crop_layer; + +image get_crop_image(crop_layer l); +crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure); +void forward_crop_layer(const crop_layer l, network net); +void resize_crop_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_crop_layer_gpu(crop_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/crop_layer_kernels.cu b/workloads/realworld/uvm_prefetch_async/darknet/src/crop_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..1b355fe6f9924ef8d53ff323f1f542a223fd4e78 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/crop_layer_kernels.cu @@ -0,0 +1,239 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "crop_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "image.h" +} + +__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c) +{ + if(x < 0 || x >= w || y < 0 || y >= h) return 0; + return image[x + w*(y + c*h)]; +} + +__device__ float3 rgb_to_hsv_kernel(float3 rgb) +{ + float r = rgb.x; + float g = rgb.y; + float b = rgb.z; + + float h, s, v; + float max = (r > g) ? ( (r > b) ? r : b) : ( (g > b) ? g : b); + float min = (r < g) ? ( (r < b) ? r : b) : ( (g < b) ? g : b); + float delta = max - min; + v = max; + if(max == 0){ + s = 0; + h = -1; + }else{ + s = delta/max; + if(r == max){ + h = (g - b) / delta; + } else if (g == max) { + h = 2 + (b - r) / delta; + } else { + h = 4 + (r - g) / delta; + } + if (h < 0) h += 6; + } + return make_float3(h, s, v); +} + +__device__ float3 hsv_to_rgb_kernel(float3 hsv) +{ + float h = hsv.x; + float s = hsv.y; + float v = hsv.z; + + float r, g, b; + float f, p, q, t; + + if (s == 0) { + r = g = b = v; + } else { + int index = (int) floorf(h); + f = h - index; + p = v*(1-s); + q = v*(1-s*f); + t = v*(1-s*(1-f)); + if(index == 0){ + r = v; g = t; b = p; + } else if(index == 1){ + r = q; g = v; b = p; + } else if(index == 2){ + r = p; g = v; b = t; + } else if(index == 3){ + r = p; g = q; b = v; + } else if(index == 4){ + r = t; g = p; b = v; + } else { + r = v; g = p; b = q; + } + } + r = (r < 0) ? 0 : ((r > 1) ? 1 : r); + g = (g < 0) ? 0 : ((g > 1) ? 1 : g); + b = (b < 0) ? 0 : ((b > 1) ? 1 : b); + return make_float3(r, g, b); +} + +__device__ float bilinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c) +{ + int ix = (int) floorf(x); + int iy = (int) floorf(y); + + float dx = x - ix; + float dy = y - iy; + + float val = (1-dy) * (1-dx) * get_pixel_kernel(image, w, h, ix, iy, c) + + dy * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) + + (1-dy) * dx * get_pixel_kernel(image, w, h, ix+1, iy, c) + + dy * dx * get_pixel_kernel(image, w, h, ix+1, iy+1, c); + return val; +} + +__global__ void levels_image_kernel(float *image, float *rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale, float shift) +{ + int size = batch * w * h; + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= size) return; + int x = id % w; + id /= w; + int y = id % h; + id /= h; + float rshift = rand[0]; + float gshift = rand[1]; + float bshift = rand[2]; + float r0 = rand[8*id + 0]; + float r1 = rand[8*id + 1]; + float r2 = rand[8*id + 2]; + float r3 = rand[8*id + 3]; + + saturation = r0*(saturation - 1) + 1; + saturation = (r1 > .5f) ? 1.f/saturation : saturation; + exposure = r2*(exposure - 1) + 1; + exposure = (r3 > .5f) ? 1.f/exposure : exposure; + + size_t offset = id * h * w * 3; + image += offset; + float r = image[x + w*(y + h*0)]; + float g = image[x + w*(y + h*1)]; + float b = image[x + w*(y + h*2)]; + float3 rgb = make_float3(r,g,b); + if(train){ + float3 hsv = rgb_to_hsv_kernel(rgb); + hsv.y *= saturation; + hsv.z *= exposure; + rgb = hsv_to_rgb_kernel(hsv); + } else { + shift = 0; + } + image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5f)*shift; + image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5f)*shift; + image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5f)*shift; +} + +__global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= size) return; + + float cx = w/2.f; + float cy = h/2.f; + + int count = id; + int j = id % crop_width; + id /= crop_width; + int i = id % crop_height; + id /= crop_height; + int k = id % c; + id /= c; + int b = id; + + float r4 = rand[8*b + 4]; + float r5 = rand[8*b + 5]; + float r6 = rand[8*b + 6]; + float r7 = rand[8*b + 7]; + + float dw = (w - crop_width)*r4; + float dh = (h - crop_height)*r5; + flip = (flip && (r6 > .5f)); + angle = 2*angle*r7 - angle; + if(!train){ + dw = (w - crop_width)/2.f; + dh = (h - crop_height)/2.f; + flip = 0; + angle = 0; + } + + input += w*h*c*b; + + float x = (flip) ? w - dw - j - 1 : j + dw; + float y = i + dh; + + float rx = cosf(angle)*(x-cx) - sinf(angle)*(y-cy) + cx; + float ry = sinf(angle)*(x-cx) + cosf(angle)*(y-cy) + cy; + + output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k); +} + +extern "C" void forward_crop_layer_gpu(crop_layer layer, network net) +{ + cuda_random(layer.rand_gpu, layer.batch*8); + + float radians = layer.angle*3.14159265f/180.f; + + float scale = 2; + float translate = -1; + if(layer.noadjust){ + scale = 1; + translate = 0; + } + + int size = layer.batch * layer.w * layer.h; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(net.input_gpu, size * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(layer.rand_gpu, size * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(layer.output_gpu, size * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + levels_image_kernel<<>>(net.input_gpu, layer.rand_gpu, layer.batch, layer.w, layer.h, net.train, layer.saturation, layer.exposure, translate, scale, layer.shift); + check_error(cudaPeekAtLastError()); + + size = layer.batch*layer.c*layer.out_w*layer.out_h; + + forward_crop_layer_kernel<<>>(net.input_gpu, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, net.train, layer.flip, radians, layer.output_gpu); + check_error(cudaPeekAtLastError()); + +/* + cuda_pull_array(layer.output_gpu, layer.output, size); + image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 0*(size/layer.batch)); + image im2 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 1*(size/layer.batch)); + image im3 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 2*(size/layer.batch)); + + translate_image(im, -translate); + scale_image(im, 1/scale); + translate_image(im2, -translate); + scale_image(im2, 1/scale); + translate_image(im3, -translate); + scale_image(im3, 1/scale); + + show_image(im, "cropped"); + show_image(im2, "cropped2"); + show_image(im3, "cropped3"); + cvWaitKey(0); + */ +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/cuda_dark.cu b/workloads/realworld/uvm_prefetch_async/darknet/src/cuda_dark.cu new file mode 100644 index 0000000000000000000000000000000000000000..ce415292e8f25ef96de7aa3d4954592fea771195 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/cuda_dark.cu @@ -0,0 +1,435 @@ +int gpu_index = 0; + +#ifdef GPU + +#include "cuda.h" +#include "utils.h" +#include "blas.h" +#include +#include +#include + + +#include + +void cuda_set_device(int n) +{ + gpu_index = n; + cudaError_t status = cudaSetDevice(n); + check_error(status); +} + +int cuda_get_device() +{ + int n = 0; + cudaError_t status = cudaGetDevice(&n); + check_error(status); + return n; +} + +void check_error(cudaError_t status) +{ + cudaDeviceSynchronize(); + cudaError_t status2 = cudaGetLastError(); + if (status != cudaSuccess) + { + const char *s = cudaGetErrorString(status); + char buffer[256]; + printf("CUDA Error: %s\n", s); + assert(0); + snprintf(buffer, 256, "CUDA Error: %s", s); + error(buffer); + } + if (status2 != cudaSuccess) + { + const char *s = cudaGetErrorString(status2); + char buffer[256]; + printf("CUDA Error Prev: %s\n", s); + assert(0); + snprintf(buffer, 256, "CUDA Error Prev: %s", s); + error(buffer); + } +} + +dim3 cuda_gridsize(size_t n){ + size_t k = (n-1) / BLOCK + 1; + size_t x = k; + size_t y = 1; + if(x > 65535){ + x = ceil(sqrt(k)); + y = (n-1)/(x*BLOCK) + 1; + } + dim3 d = {x, y, 1}; + //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK); + return d; +} + +#ifdef CUDNN +cudnnHandle_t cudnn_handle() +{ + static int init[16] = {0}; + static cudnnHandle_t handle[16]; + int i = cuda_get_device(); + if(!init[i]) { + cudnnCreate(&handle[i]); + init[i] = 1; + } + return handle[i]; +} +#endif + +cublasHandle_t blas_handle() +{ + static int init[16] = {0}; + static cublasHandle_t handle[16]; + int i = cuda_get_device(); + if(!init[i]) { + cublasCreate(&handle[i]); + init[i] = 1; + } + return handle[i]; +} + +float *cuda_make_array(float *x, size_t n) +{ + float *x_gpu; + size_t size = sizeof(float)*n; + // cudaError_t status = cudaMalloc((void **)&x_gpu, size); + cudaError_t status = cudaMallocManaged((void **)&x_gpu, size); + check_error(status); + if(x){ + // status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + // check_error(status); + // for (int i = 0; i < n; i++) { + // x_gpu[i] = x[i]; + // } + memcpy(x_gpu, x, size); + } else { + fill_gpu(n, 0, x_gpu, 1); + } + if(!x_gpu) error("Cuda malloc failed\n"); + return x_gpu; +} + +void cuda_random(float *x_gpu, size_t n) +{ + static curandGenerator_t gen[16]; + static int init[16] = {0}; + int i = cuda_get_device(); + if(!init[i]){ + curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT); + curandSetPseudoRandomGeneratorSeed(gen[i], time(0)); + init[i] = 1; + } + curandGenerateUniform(gen[i], x_gpu, n); + check_error(cudaPeekAtLastError()); +} + +float cuda_compare(float *x_gpu, float *x, size_t n, char *s) +{ + float *tmp = (float *) calloc(n, sizeof(float)); + cuda_pull_array(x_gpu, tmp, n); + //int i; + //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]); + axpy_cpu(n, -1, x, 1, tmp, 1); + float err = dot_cpu(n, tmp, 1, tmp, 1); + printf("Error %s: %f\n", s, sqrt(err/n)); + free(tmp); + return err; +} + +int *cuda_make_int_array(int *x, size_t n) +{ + int *x_gpu; + size_t size = sizeof(int)*n; + // cudaError_t status = cudaMalloc((void **)&x_gpu, size); + cudaError_t status = cudaMallocManaged((void **)&x_gpu, size); + check_error(status); + if(x){ + // status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + // check_error(status); + memcpy(x_gpu, x, size); + // for (int i = 0; i < n; i++) + // x_gpu[i] = x[i]; + } + if(!x_gpu) error("Cuda malloc failed\n"); + return x_gpu; +} + +void cuda_free(float *x_gpu) +{ + cudaError_t status = cudaFree(x_gpu); + check_error(status); +} + +void cuda_push_array(float *x_gpu, float *x, size_t n) +{ + size_t size = sizeof(float)*n; + // cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + // check_error(status); + memcpy(x_gpu, x, size); + // for (int i = 0; i < n; i++) + // x_gpu[i] = x[i]; +} + +void cuda_pull_array(float *x_gpu, float *x, size_t n) +{ + size_t size = sizeof(float)*n; + // cudaError_t status = cudaMemcpy(x, x_gpu, size, cudaMemcpyDeviceToHost); + // check_error(status); + memcpy(x, x_gpu, size); + // for (int i = 0; i < n; i++) + // x[i] = x_gpu[i]; +} + +float cuda_mag_array(float *x_gpu, size_t n) +{ + float *temp = (float *) calloc(n, sizeof(float)); + cuda_pull_array(x_gpu, temp, n); + float m = mag_array(temp, n); + free(temp); + return m; +} + +static const char * +getMemcpyKindString(CUpti_ActivityMemcpyKind kind) +{ + switch (kind) + { + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOD: + return "HtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOH: + return "DtoH"; + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOA: + return "HtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOH: + return "AtoH"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOA: + return "AtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_ATOD: + return "AtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOA: + return "DtoA"; + case CUPTI_ACTIVITY_MEMCPY_KIND_DTOD: + return "DtoD"; + case CUPTI_ACTIVITY_MEMCPY_KIND_HTOH: + return "HtoH"; + default: + break; + } + + return ""; +} + +static const char * +getUvmCounterKindString(CUpti_ActivityUnifiedMemoryCounterKind kind) +{ + switch (kind) + { + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD: + return "BYTES_TRANSFER_HTOD"; + case CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH: + return "BYTES_TRANSFER_DTOH"; + default: + break; + } + return ""; +} + +static void +printActivity(CUpti_Activity *record) +{ + switch (record->kind) + { + case CUPTI_ACTIVITY_KIND_KERNEL: + { + int status; + CUpti_ActivityKernel4 *kernel = (CUpti_ActivityKernel4 *)record; + printf("KERNEL %s, %llu, %llu, %llu\n", + abi::__cxa_demangle(kernel->name, 0, 0, &status), + (unsigned long long)(kernel->start), + (unsigned long long)(kernel->end), + (unsigned long long)(kernel->end) - (kernel->start)); + break; + } + case CUPTI_ACTIVITY_KIND_RUNTIME: + { + CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; + const char *callback_name; + cuptiGetCallbackName(CUPTI_CB_DOMAIN_RUNTIME_API, api->cbid, &callback_name); + // printf("RUNTIME %s (cbid=%u) [ %llu - %llu ] process %u, thread %u, correlation %u\n", + // callback_name, api->cbid, + // (unsigned long long)(api->start - startTimestamp), + // (unsigned long long)(api->end - startTimestamp), + // api->processId, api->threadId, api->correlationId); + printf("RUNTIME %s (cbid=%u), %llu,%llu,%llu, process %u, thread %u, correlation %u\n", + callback_name, api->cbid, + (unsigned long long)(api->start), + (unsigned long long)(api->end), + (unsigned long long)(api->end - api->start), + api->processId, api->threadId, api->correlationId); + break; + } + case CUPTI_ACTIVITY_KIND_MEMCPY: + { + CUpti_ActivityMemcpy4 *memcpy = (CUpti_ActivityMemcpy4 *)record; + printf("MEMCPY %s, size %llu, %llu, %llu, %llu\n", + getMemcpyKindString((CUpti_ActivityMemcpyKind)memcpy->copyKind), + (unsigned long long)memcpy->bytes, + (unsigned long long)(memcpy->start), + (unsigned long long)(memcpy->end), + (unsigned long long)(memcpy->end) - (memcpy->start)); + break; + } + case CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER: + { + CUpti_ActivityUnifiedMemoryCounter2 *uvm = (CUpti_ActivityUnifiedMemoryCounter2 *)record; + printf("UVM MEMCPY %s, size %llu, %llu, %llu, %llu \n", + getUvmCounterKindString(uvm->counterKind), + (unsigned long long)uvm->value, + (unsigned long long)(uvm->start), + (unsigned long long)(uvm->end), + (unsigned long long)(uvm->end - uvm->start)); + break; + } + } +} + +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) +{ + uint8_t *bfr = (uint8_t *)malloc(BUF_SIZE + ALIGN_SIZE); + if (bfr == NULL) + { + printf("Error: out of memory\n"); + exit(-1); + } + + *size = BUF_SIZE; + *buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE); + *maxNumRecords = 0; +} + +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) +{ + CUptiResult status; + CUpti_Activity *record = NULL; + if (validSize > 0) + { + do + { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if (status == CUPTI_SUCCESS) + { + printActivity(record); + } + else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) + break; + else + { + CUPTI_CALL(status); + } + } while (1); + + // report any records dropped from the queue + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if (dropped != 0) + { + printf("Dropped %u activity records\n", (unsigned int)dropped); + } + } + + free(buffer); +} + +#ifndef PROFILE +void initTrace() { + printf("not Profile initTrace()\n"); + return; +} + +void finiTrace() { + return; +} + +#else +void initTrace() +{ + printf("Profile initTrace()\n"); + size_t attrValue = 0, attrValueSize = sizeof(size_t); + + CUpti_ActivityUnifiedMemoryCounterConfig config[2]; + + // configure unified memory counters + config[0].scope = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE; + config[0].kind = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD; + config[0].deviceId = 0; + config[0].enable = 1; + + config[1].scope = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_SCOPE_PROCESS_SINGLE_DEVICE; + config[1].kind = CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH; + config[1].deviceId = 0; + config[1].enable = 1; + + CUptiResult res = cuptiActivityConfigureUnifiedMemoryCounter(config, 2); + if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED) + { + printf("Test is waived, unified memory is not supported on the underlying platform.\n"); + } + else if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE) + { + printf("Test is waived, unified memory is not supported on the device.\n"); + } + else if (res == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES) + { + printf("Test is waived, unified memory is not supported on the non-P2P multi-gpu setup.\n"); + } + else + { + CUPTI_CALL(res); + } + + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER)); + + // Register callbacks for buffer requests and for buffers completed by CUPTI. + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + + // Optionally get and set activity attributes. + // Attributes can be set by the CUPTI client to change behavior of the activity API. + // Some attributes require to be set before any CUDA context is created to be effective, + // e.g. to be applied to all device buffer allocations (see documentation). + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + printf("%s = %llu B\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); +} + +void finiTrace() +{ + // Force flush any remaining activity buffers before termination of the application + CUPTI_CALL(cuptiActivityFlushAll(1)); +} +#endif + + +void GPU_argv_init() +{ + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, GPU_DEVICE); + printf("setting device %d with name %s\n", GPU_DEVICE, deviceProp.name); + cudaSetDevice(GPU_DEVICE); +} +#else +void cuda_set_device(int n){} + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/cuda_dark.h b/workloads/realworld/uvm_prefetch_async/darknet/src/cuda_dark.h new file mode 100644 index 0000000000000000000000000000000000000000..ac6b60bc3d27ec1ebc8190463648b946f6c809ef --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/cuda_dark.h @@ -0,0 +1,63 @@ +#ifndef CUDA_H +#define CUDA_H + +#include "darknet.h" + +#ifdef GPU + +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) (((uintptr_t)(buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t)(buffer) & ((align)-1))) : (buffer)) + +static uint64_t startTimestamp; +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +#define CUPTI_CALL(call) \ + do \ + { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) \ + { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + if (_status == CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED) \ + exit(0); \ + else \ + exit(-1); \ + } \ + } while (0) + +#include + +#ifdef __cplusplus +extern "C" { +#endif +void check_error(cudaError_t status); +cublasHandle_t blas_handle(); +int *cuda_make_int_array(int *x, size_t n); +void cuda_random(float *x_gpu, size_t n); +float cuda_compare(float *x_gpu, float *x, size_t n, char *s); +dim3 cuda_gridsize(size_t n); + +void GPU_argv_init(); +void initTrace(); +void finiTrace(); +void startCPU(); +void endCPU(); +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords); +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); +static void printActivity(CUpti_Activity *record); + +#ifdef __cplusplus +} +#endif + +#ifdef CUDNN +cudnnHandle_t cudnn_handle(); +#endif + +#endif +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/cupti_add.cpp b/workloads/realworld/uvm_prefetch_async/darknet/src/cupti_add.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a0d16eb72f41f8e858a59354d2de9d6b470c0e76 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/cupti_add.cpp @@ -0,0 +1,112 @@ +#include "cupti_add.h" + +static void +printActivity(CUpti_Activity *record) +{ + switch (record->kind) + { + case CUPTI_ACTIVITY_KIND_KERNEL: + { + int status; + CUpti_ActivityKernel4 *kernel = (CUpti_ActivityKernel4 *)record; + printf("CUPTI,%s,%llu,%llu,%llu\n", + abi::__cxa_demangle(kernel->name, 0, 0, &status), + (unsigned long long)(kernel->start), + (unsigned long long)(kernel->end), + (unsigned long long)(kernel->end - startTimestamp) - (kernel->start - startTimestamp)); + break; + } + case CUPTI_ACTIVITY_KIND_RUNTIME: + { + CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; + const char *callback_name; + cuptiGetCallbackName(CUPTI_CB_DOMAIN_RUNTIME_API, api->cbid, &callback_name); + printf("RUNTIME %s (cbid=%u) [ %llu - %llu ] process %u, thread %u, correlation %u\n", + callback_name, api->cbid, + (unsigned long long)(api->start - startTimestamp), + (unsigned long long)(api->end - startTimestamp), + api->processId, api->threadId, api->correlationId); + break; + } + } +} + +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) +{ + uint8_t *bfr = (uint8_t *)malloc(BUF_SIZE + ALIGN_SIZE); + if (bfr == NULL) + { + printf("Error: out of memory\n"); + exit(-1); + } + + *size = BUF_SIZE; + *buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE); + *maxNumRecords = 0; +} + +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) +{ + CUptiResult status; + CUpti_Activity *record = NULL; + if (validSize > 0) + { + do + { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if (status == CUPTI_SUCCESS) + { + printActivity(record); + } + else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) + break; + else + { + CUPTI_CALL(status); + } + } while (1); + + // report any records dropped from the queue + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if (dropped != 0) + { + printf("Dropped %u activity records\n", (unsigned int)dropped); + } + } + + free(buffer); +} + +void initTrace() +{ + size_t attrValue = 0, attrValueSize = sizeof(size_t); + + // CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); + + // Register callbacks for buffer requests and for buffers completed by CUPTI. + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + + // Optionally get and set activity attributes. + // Attributes can be set by the CUPTI client to change behavior of the activity API. + // Some attributes require to be set before any CUDA context is created to be effective, + // e.g. to be applied to all device buffer allocations (see documentation). + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + printf("%s = %llu B\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue); + attrValue *= 2; + CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue)); + + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); +} + +void finiTrace() +{ + // Force flush any remaining activity buffers before termination of the application + CUPTI_CALL(cuptiActivityFlushAll(1)); +} \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/cupti_add.h b/workloads/realworld/uvm_prefetch_async/darknet/src/cupti_add.h new file mode 100644 index 0000000000000000000000000000000000000000..a30b7b847ad13381032d2f60eac2955d30146485 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/cupti_add.h @@ -0,0 +1,36 @@ +#include +#include +#include + + +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) \ + (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer)) + +static uint64_t startTimestamp; +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +#define CUPTI_CALL(call) \ + do { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + if(_status == CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED) \ + exit(0); \ + else \ + exit(-1); \ + } \ + } while (0) + +void initTrace(); +void finiTrace(); +void startCPU(); +void endCPU(); +void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords); +void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); +static void printActivity(CUpti_Activity *record); diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/data.c b/workloads/realworld/uvm_prefetch_async/darknet/src/data.c new file mode 100644 index 0000000000000000000000000000000000000000..d50f1346c5cdcfe1dbeb2d0f70ec408fb4f33960 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/data.c @@ -0,0 +1,1685 @@ +#include "data.h" +#include "utils.h" +#include "image.h" +#include "cuda_dark.h" + +#include +#include +#include + +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + +list *get_paths(char *filename) +{ + char *path; + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + list *lines = make_list(); + while((path=fgetl(file))){ + list_insert(lines, path); + } + fclose(file); + return lines; +} + +/* +char **get_random_paths_indexes(char **paths, int n, int m, int *indexes) +{ + char **random_paths = calloc(n, sizeof(char*)); + int i; + pthread_mutex_lock(&mutex); + for(i = 0; i < n; ++i){ + int index = rand()%m; + indexes[i] = index; + random_paths[i] = paths[index]; + if(i == 0) printf("%s\n", paths[index]); + } + pthread_mutex_unlock(&mutex); + return random_paths; +} +*/ + +char **get_random_paths(char **paths, int n, int m) +{ + char **random_paths = calloc(n, sizeof(char*)); + int i; + pthread_mutex_lock(&mutex); + for(i = 0; i < n; ++i){ + int index = rand()%m; + random_paths[i] = paths[index]; + //if(i == 0) printf("%s\n", paths[index]); + } + pthread_mutex_unlock(&mutex); + return random_paths; +} + +char **find_replace_paths(char **paths, int n, char *find, char *replace) +{ + char **replace_paths = calloc(n, sizeof(char*)); + int i; + for(i = 0; i < n; ++i){ + char replaced[4096]; + find_replace(paths[i], find, replace, replaced); + replace_paths[i] = copy_string(replaced); + } + return replace_paths; +} + +matrix load_image_paths_gray(char **paths, int n, int w, int h) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image(paths[i], w, h, 3); + + image gray = grayscale_image(im); + free_image(im); + im = gray; + + X.vals[i] = im.data; + X.cols = im.h*im.w*im.c; + } + return X; +} + +matrix load_image_paths(char **paths, int n, int w, int h) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], w, h); + X.vals[i] = im.data; + X.cols = im.h*im.w*im.c; + } + return X; +} + +matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center) +{ + int i; + matrix X; + X.rows = n; + X.vals = calloc(X.rows, sizeof(float*)); + X.cols = 0; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], 0, 0); + image crop; + if(center){ + crop = center_crop_image(im, size, size); + } else { + crop = random_augment_image(im, angle, aspect, min, max, size, size); + } + int flip = rand()%2; + if (flip) flip_image(crop); + random_distort_image(crop, hue, saturation, exposure); + + /* + show_image(im, "orig"); + show_image(crop, "crop"); + cvWaitKey(0); + */ + //grayscale_image_3c(crop); + free_image(im); + X.vals[i] = crop.data; + X.cols = crop.h*crop.w*crop.c; + } + return X; +} + + +box_label *read_boxes(char *filename, int *n) +{ + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + float x, y, h, w; + int id; + int count = 0; + int size = 64; + box_label *boxes = calloc(size, sizeof(box_label)); + while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){ + if(count == size) { + size = size * 2; + boxes = realloc(boxes, size*sizeof(box_label)); + } + boxes[count].id = id; + boxes[count].x = x; + boxes[count].y = y; + boxes[count].h = h; + boxes[count].w = w; + boxes[count].left = x - w/2; + boxes[count].right = x + w/2; + boxes[count].top = y - h/2; + boxes[count].bottom = y + h/2; + ++count; + } + fclose(file); + *n = count; + return boxes; +} + +void randomize_boxes(box_label *b, int n) +{ + int i; + for(i = 0; i < n; ++i){ + box_label swap = b[i]; + int index = rand()%n; + b[i] = b[index]; + b[index] = swap; + } +} + +void correct_boxes(box_label *boxes, int n, float dx, float dy, float sx, float sy, int flip) +{ + int i; + for(i = 0; i < n; ++i){ + if(boxes[i].x == 0 && boxes[i].y == 0) { + boxes[i].x = 999999; + boxes[i].y = 999999; + boxes[i].w = 999999; + boxes[i].h = 999999; + continue; + } + boxes[i].left = boxes[i].left * sx - dx; + boxes[i].right = boxes[i].right * sx - dx; + boxes[i].top = boxes[i].top * sy - dy; + boxes[i].bottom = boxes[i].bottom* sy - dy; + + if(flip){ + float swap = boxes[i].left; + boxes[i].left = 1. - boxes[i].right; + boxes[i].right = 1. - swap; + } + + boxes[i].left = constrain(0, 1, boxes[i].left); + boxes[i].right = constrain(0, 1, boxes[i].right); + boxes[i].top = constrain(0, 1, boxes[i].top); + boxes[i].bottom = constrain(0, 1, boxes[i].bottom); + + boxes[i].x = (boxes[i].left+boxes[i].right)/2; + boxes[i].y = (boxes[i].top+boxes[i].bottom)/2; + boxes[i].w = (boxes[i].right - boxes[i].left); + boxes[i].h = (boxes[i].bottom - boxes[i].top); + + boxes[i].w = constrain(0, 1, boxes[i].w); + boxes[i].h = constrain(0, 1, boxes[i].h); + } +} + +void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + float x,y,w,h; + int id; + int i; + + for (i = 0; i < count && i < 90; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if (w < .0 || h < .0) continue; + + int index = (4+classes) * i; + + truth[index++] = x; + truth[index++] = y; + truth[index++] = w; + truth[index++] = h; + + if (id < classes) truth[index+id] = 1; + } + free(boxes); +} + +void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + float x,y,w,h; + int id; + int i; + + for (i = 0; i < count; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if (w < .005 || h < .005) continue; + + int col = (int)(x*num_boxes); + int row = (int)(y*num_boxes); + + x = x*num_boxes - col; + y = y*num_boxes - row; + + int index = (col+row*num_boxes)*(5+classes); + if (truth[index]) continue; + truth[index++] = 1; + + if (id < classes) truth[index+id] = 1; + index += classes; + + truth[index++] = x; + truth[index++] = y; + truth[index++] = w; + truth[index++] = h; + } + free(boxes); +} + +void load_rle(image im, int *rle, int n) +{ + int count = 0; + int curr = 0; + int i,j; + for(i = 0; i < n; ++i){ + for(j = 0; j < rle[i]; ++j){ + im.data[count++] = curr; + } + curr = 1 - curr; + } + for(; count < im.h*im.w*im.c; ++count){ + im.data[count] = curr; + } +} + +void or_image(image src, image dest, int c) +{ + int i; + for(i = 0; i < src.w*src.h; ++i){ + if(src.data[i]) dest.data[dest.w*dest.h*c + i] = 1; + } +} + +void exclusive_image(image src) +{ + int k, j, i; + int s = src.w*src.h; + for(k = 0; k < src.c-1; ++k){ + for(i = 0; i < s; ++i){ + if (src.data[k*s + i]){ + for(j = k+1; j < src.c; ++j){ + src.data[j*s + i] = 0; + } + } + } + } +} + +box bound_image(image im) +{ + int x,y; + int minx = im.w; + int miny = im.h; + int maxx = 0; + int maxy = 0; + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + if(im.data[y*im.w + x]){ + minx = (x < minx) ? x : minx; + miny = (y < miny) ? y : miny; + maxx = (x > maxx) ? x : maxx; + maxy = (y > maxy) ? y : maxy; + } + } + } + box b = {minx, miny, maxx-minx + 1, maxy-miny + 1}; + //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + return b; +} + +void fill_truth_iseg(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + int i = 0; + int j; + image part = make_image(w, h, 1); + while((fscanf(file, "%d %s", &id, buff) == 2) && i < num_boxes){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + image sized = rotate_crop_image(part, aug.rad, aug.scale, aug.w, aug.h, aug.dx, aug.dy, aug.aspect); + if(flip) flip_image(sized); + + image mask = resize_image(sized, mw, mh); + truth[i*(mw*mh+1)] = id; + for(j = 0; j < mw*mh; ++j){ + truth[i*(mw*mh + 1) + 1 + j] = mask.data[j]; + } + ++i; + + free_image(mask); + free_image(sized); + free(rle); + } + if(i < num_boxes) truth[i*(mw*mh+1)] = -1; + fclose(file); + free_image(part); +} + +void fill_truth_mask(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + int i = 0; + image part = make_image(w, h, 1); + while((fscanf(file, "%d %s", &id, buff) == 2) && i < num_boxes){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + image sized = rotate_crop_image(part, aug.rad, aug.scale, aug.w, aug.h, aug.dx, aug.dy, aug.aspect); + if(flip) flip_image(sized); + box b = bound_image(sized); + if(b.w > 0){ + image crop = crop_image(sized, b.x, b.y, b.w, b.h); + image mask = resize_image(crop, mw, mh); + truth[i*(4 + mw*mh + 1) + 0] = (b.x + b.w/2.)/sized.w; + truth[i*(4 + mw*mh + 1) + 1] = (b.y + b.h/2.)/sized.h; + truth[i*(4 + mw*mh + 1) + 2] = b.w/sized.w; + truth[i*(4 + mw*mh + 1) + 3] = b.h/sized.h; + int j; + for(j = 0; j < mw*mh; ++j){ + truth[i*(4 + mw*mh + 1) + 4 + j] = mask.data[j]; + } + truth[i*(4 + mw*mh + 1) + 4 + mw*mh] = id; + free_image(crop); + free_image(mask); + ++i; + } + free_image(sized); + free(rle); + } + fclose(file); + free_image(part); +} + + +void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) +{ + char labelpath[4096]; + find_replace(path, "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + + find_replace(labelpath, "raw", "labels", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + int count = 0; + box_label *boxes = read_boxes(labelpath, &count); + randomize_boxes(boxes, count); + correct_boxes(boxes, count, dx, dy, sx, sy, flip); + if(count > num_boxes) count = num_boxes; + float x,y,w,h; + int id; + int i; + int sub = 0; + + for (i = 0; i < count; ++i) { + x = boxes[i].x; + y = boxes[i].y; + w = boxes[i].w; + h = boxes[i].h; + id = boxes[i].id; + + if ((w < .001 || h < .001)) { + ++sub; + continue; + } + + truth[(i-sub)*5+0] = x; + truth[(i-sub)*5+1] = y; + truth[(i-sub)*5+2] = w; + truth[(i-sub)*5+3] = h; + truth[(i-sub)*5+4] = id; + } + free(boxes); +} + +#define NUMCHARS 37 + +void print_letters(float *pred, int n) +{ + int i; + for(i = 0; i < n; ++i){ + int index = max_index(pred+i*NUMCHARS, NUMCHARS); + printf("%c", int_to_alphanum(index)); + } + printf("\n"); +} + +void fill_truth_captcha(char *path, int n, float *truth) +{ + char *begin = strrchr(path, '/'); + ++begin; + int i; + for(i = 0; i < strlen(begin) && i < n && begin[i] != '.'; ++i){ + int index = alphanum_to_int(begin[i]); + if(index > 35) printf("Bad %c\n", begin[i]); + truth[i*NUMCHARS+index] = 1; + } + for(;i < n; ++i){ + truth[i*NUMCHARS + NUMCHARS-1] = 1; + } +} + +data load_data_captcha(char **paths, int n, int m, int k, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = make_matrix(n, k*NUMCHARS); + int i; + for(i = 0; i < n; ++i){ + fill_truth_captcha(paths[i], k, d.y.vals[i]); + } + if(m) free(paths); + return d; +} + +data load_data_captcha_encode(char **paths, int n, int m, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.X.cols = 17100; + d.y = d.X; + if(m) free(paths); + return d; +} + +void fill_truth(char *path, char **labels, int k, float *truth) +{ + int i; + memset(truth, 0, k*sizeof(float)); + int count = 0; + for(i = 0; i < k; ++i){ + if(strstr(path, labels[i])){ + truth[i] = 1; + ++count; + //printf("%s %s %d\n", path, labels[i], i); + } + } + if(count != 1 && (k != 1 || count != 0)) printf("Too many or too few labels: %d, %s\n", count, path); +} + +void fill_hierarchy(float *truth, int k, tree *hierarchy) +{ + int j; + for(j = 0; j < k; ++j){ + if(truth[j]){ + int parent = hierarchy->parent[j]; + while(parent >= 0){ + truth[parent] = 1; + parent = hierarchy->parent[parent]; + } + } + } + int i; + int count = 0; + for(j = 0; j < hierarchy->groups; ++j){ + //printf("%d\n", count); + int mask = 1; + for(i = 0; i < hierarchy->group_size[j]; ++i){ + if(truth[count + i]){ + mask = 0; + break; + } + } + if (mask) { + for(i = 0; i < hierarchy->group_size[j]; ++i){ + truth[count + i] = SECRET_NUM; + } + } + count += hierarchy->group_size[j]; + } +} + +matrix load_regression_labels_paths(char **paths, int n, int k) +{ + matrix y = make_matrix(n, k); + int i,j; + for(i = 0; i < n; ++i){ + char labelpath[4096]; + find_replace(paths[i], "images", "labels", labelpath); + find_replace(labelpath, "JPEGImages", "labels", labelpath); + find_replace(labelpath, ".BMP", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPeG", ".txt", labelpath); + find_replace(labelpath, ".Jpeg", ".txt", labelpath); + find_replace(labelpath, ".PNG", ".txt", labelpath); + find_replace(labelpath, ".TIF", ".txt", labelpath); + find_replace(labelpath, ".bmp", ".txt", labelpath); + find_replace(labelpath, ".jpeg", ".txt", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".png", ".txt", labelpath); + find_replace(labelpath, ".tif", ".txt", labelpath); + + FILE *file = fopen(labelpath, "r"); + for(j = 0; j < k; ++j){ + fscanf(file, "%f", &(y.vals[i][j])); + } + fclose(file); + } + return y; +} + +matrix load_labels_paths(char **paths, int n, char **labels, int k, tree *hierarchy) +{ + matrix y = make_matrix(n, k); + int i; + for(i = 0; i < n && labels; ++i){ + fill_truth(paths[i], labels, k, y.vals[i]); + if(hierarchy){ + fill_hierarchy(y.vals[i], k, hierarchy); + } + } + return y; +} + +matrix load_tags_paths(char **paths, int n, int k) +{ + matrix y = make_matrix(n, k); + int i; + //int count = 0; + for(i = 0; i < n; ++i){ + char label[4096]; + find_replace(paths[i], "images", "labels", label); + find_replace(label, ".jpg", ".txt", label); + FILE *file = fopen(label, "r"); + if (!file) continue; + //++count; + int tag; + while(fscanf(file, "%d", &tag) == 1){ + if(tag < k){ + y.vals[i][tag] = 1; + } + } + fclose(file); + } + //printf("%d/%d\n", count, n); + return y; +} + +char **get_labels(char *filename) +{ + list *plist = get_paths(filename); + char **labels = (char **)list_to_array(plist); + free_list(plist); + return labels; +} + +void free_data(data d) +{ + if(!d.shallow){ + free_matrix(d.X); + free_matrix(d.y); + }else{ + free(d.X.vals); + free(d.y.vals); + } +} + +image get_segmentation_image(char *path, int w, int h, int classes) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + image mask = make_image(w, h, classes); + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + image part = make_image(w, h, 1); + while(fscanf(file, "%d %s", &id, buff) == 2){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + or_image(part, mask, id); + free(rle); + } + //exclusive_image(mask); + fclose(file); + free_image(part); + return mask; +} + +image get_segmentation_image2(char *path, int w, int h, int classes) +{ + char labelpath[4096]; + find_replace(path, "images", "mask", labelpath); + find_replace(labelpath, "JPEGImages", "mask", labelpath); + find_replace(labelpath, ".jpg", ".txt", labelpath); + find_replace(labelpath, ".JPG", ".txt", labelpath); + find_replace(labelpath, ".JPEG", ".txt", labelpath); + image mask = make_image(w, h, classes+1); + int i; + for(i = 0; i < w*h; ++i){ + mask.data[w*h*classes + i] = 1; + } + FILE *file = fopen(labelpath, "r"); + if(!file) file_error(labelpath); + char buff[32788]; + int id; + image part = make_image(w, h, 1); + while(fscanf(file, "%d %s", &id, buff) == 2){ + int n = 0; + int *rle = read_intlist(buff, &n, 0); + load_rle(part, rle, n); + or_image(part, mask, id); + for(i = 0; i < w*h; ++i){ + if(part.data[i]) mask.data[w*h*classes + i] = 0; + } + free(rle); + } + //exclusive_image(mask); + fclose(file); + free_image(part); + return mask; +} + +data load_data_seg(int n, char **paths, int m, int w, int h, int classes, int min, int max, float angle, float aspect, float hue, float saturation, float exposure, int div) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + + d.y.rows = n; + d.y.cols = h*w*classes/div/div; + d.y.vals = calloc(d.X.rows, sizeof(float*)); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + + image mask = get_segmentation_image(random_paths[i], orig.w, orig.h, classes); + //image mask = make_image(orig.w, orig.h, classes+1); + image sized_m = rotate_crop_image(mask, a.rad, a.scale/div, a.w/div, a.h/div, a.dx/div, a.dy/div, a.aspect); + + if(flip) flip_image(sized_m); + d.y.vals[i] = sized_m.data; + + free_image(orig); + free_image(mask); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_iseg(int n, char **paths, int m, int w, int h, int classes, int boxes, int div, int min, int max, float angle, float aspect, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, (((w/div)*(h/div))+1)*boxes); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + //show_image(sized, "image"); + + fill_truth_iseg(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, w/div, h/div); + + free_image(orig); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_mask(int n, char **paths, int m, int w, int h, int classes, int boxes, int coords, int min, int max, float angle, float aspect, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, (coords+1)*boxes); + + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h); + image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + + int flip = rand()%2; + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + //show_image(sized, "image"); + + fill_truth_mask(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, 14, 14); + + free_image(orig); + + /* + image rgb = mask_to_rgb(sized_m, classes); + show_image(rgb, "part"); + show_image(sized, "orig"); + cvWaitKey(0); + free_image(rgb); + */ + } + free(random_paths); + return d; +} + +data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + + int k = size*size*(5+classes); + d.y = make_matrix(n, k); + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + + int oh = orig.h; + int ow = orig.w; + + int dw = (ow*jitter); + int dh = (oh*jitter); + + int pleft = rand_uniform(-dw, dw); + int pright = rand_uniform(-dw, dw); + int ptop = rand_uniform(-dh, dh); + int pbot = rand_uniform(-dh, dh); + + int swidth = ow - pleft - pright; + int sheight = oh - ptop - pbot; + + float sx = (float)swidth / ow; + float sy = (float)sheight / oh; + + int flip = rand()%2; + image cropped = crop_image(orig, pleft, ptop, swidth, sheight); + + float dx = ((float)pleft/ow)/sx; + float dy = ((float)ptop /oh)/sy; + + image sized = resize_image(cropped, w, h); + if(flip) flip_image(sized); + random_distort_image(sized, hue, saturation, exposure); + d.X.vals[i] = sized.data; + + fill_truth_region(random_paths[i], d.y.vals[i], classes, size, flip, dx, dy, 1./sx, 1./sy); + + free_image(orig); + free_image(cropped); + } + free(random_paths); + return d; +} + +data load_data_compare(int n, char **paths, int m, int classes, int w, int h) +{ + if(m) paths = get_random_paths(paths, 2*n, m); + int i,j; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*6; + + int k = 2*(classes); + d.y = make_matrix(n, k); + for(i = 0; i < n; ++i){ + image im1 = load_image_color(paths[i*2], w, h); + image im2 = load_image_color(paths[i*2+1], w, h); + + d.X.vals[i] = calloc(d.X.cols, sizeof(float)); + memcpy(d.X.vals[i], im1.data, h*w*3*sizeof(float)); + memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float)); + + int id; + float iou; + + char imlabel1[4096]; + char imlabel2[4096]; + find_replace(paths[i*2], "imgs", "labels", imlabel1); + find_replace(imlabel1, "jpg", "txt", imlabel1); + FILE *fp1 = fopen(imlabel1, "r"); + + while(fscanf(fp1, "%d %f", &id, &iou) == 2){ + if (d.y.vals[i][2*id] < iou) d.y.vals[i][2*id] = iou; + } + + find_replace(paths[i*2+1], "imgs", "labels", imlabel2); + find_replace(imlabel2, "jpg", "txt", imlabel2); + FILE *fp2 = fopen(imlabel2, "r"); + + while(fscanf(fp2, "%d %f", &id, &iou) == 2){ + if (d.y.vals[i][2*id + 1] < iou) d.y.vals[i][2*id + 1] = iou; + } + + for (j = 0; j < classes; ++j){ + if (d.y.vals[i][2*j] > .5 && d.y.vals[i][2*j+1] < .5){ + d.y.vals[i][2*j] = 1; + d.y.vals[i][2*j+1] = 0; + } else if (d.y.vals[i][2*j] < .5 && d.y.vals[i][2*j+1] > .5){ + d.y.vals[i][2*j] = 0; + d.y.vals[i][2*j+1] = 1; + } else { + d.y.vals[i][2*j] = SECRET_NUM; + d.y.vals[i][2*j+1] = SECRET_NUM; + } + } + fclose(fp1); + fclose(fp2); + + free_image(im1); + free_image(im2); + } + if(m) free(paths); + return d; +} + +data load_data_swag(char **paths, int n, int classes, float jitter) +{ + int index = rand()%n; + char *random_path = paths[index]; + + image orig = load_image_color(random_path, 0, 0); + int h = orig.h; + int w = orig.w; + + data d = {0}; + d.shallow = 0; + d.w = w; + d.h = h; + + d.X.rows = 1; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + int k = (4+classes)*90; + d.y = make_matrix(1, k); + + int dw = w*jitter; + int dh = h*jitter; + + int pleft = rand_uniform(-dw, dw); + int pright = rand_uniform(-dw, dw); + int ptop = rand_uniform(-dh, dh); + int pbot = rand_uniform(-dh, dh); + + int swidth = w - pleft - pright; + int sheight = h - ptop - pbot; + + float sx = (float)swidth / w; + float sy = (float)sheight / h; + + int flip = rand()%2; + image cropped = crop_image(orig, pleft, ptop, swidth, sheight); + + float dx = ((float)pleft/w)/sx; + float dy = ((float)ptop /h)/sy; + + image sized = resize_image(cropped, w, h); + if(flip) flip_image(sized); + d.X.vals[0] = sized.data; + + fill_truth_swag(random_path, d.y.vals[0], classes, flip, dx, dy, 1./sx, 1./sy); + + free_image(orig); + free_image(cropped); + + return d; +} + +data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure) +{ + char **random_paths = get_random_paths(paths, n, m); + int i; + data d = {0}; + d.shallow = 0; + + d.X.rows = n; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + d.X.cols = h*w*3; + + d.y = make_matrix(n, 5*boxes); + for(i = 0; i < n; ++i){ + image orig = load_image_color(random_paths[i], 0, 0); + image sized = make_image(w, h, orig.c); + fill_image(sized, .5); + + float dw = jitter * orig.w; + float dh = jitter * orig.h; + + float new_ar = (orig.w + rand_uniform(-dw, dw)) / (orig.h + rand_uniform(-dh, dh)); + //float scale = rand_uniform(.25, 2); + float scale = 1; + + float nw, nh; + + if(new_ar < 1){ + nh = scale * h; + nw = nh * new_ar; + } else { + nw = scale * w; + nh = nw / new_ar; + } + + float dx = rand_uniform(0, w - nw); + float dy = rand_uniform(0, h - nh); + + place_image(orig, nw, nh, dx, dy, sized); + + random_distort_image(sized, hue, saturation, exposure); + + int flip = rand()%2; + if(flip) flip_image(sized); + d.X.vals[i] = sized.data; + + + fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, -dx/w, -dy/h, nw/w, nh/h); + + free_image(orig); + } + free(random_paths); + return d; +} + +void *load_thread(void *ptr) +{ + //printf("Loading data: %d\n", rand()); + load_args a = *(struct load_args*)ptr; + if(a.exposure == 0) a.exposure = 1; + if(a.saturation == 0) a.saturation = 1; + if(a.aspect == 0) a.aspect = 1; + + if (a.type == OLD_CLASSIFICATION_DATA){ + *a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h); + } else if (a.type == REGRESSION_DATA){ + *a.d = load_data_regression(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == CLASSIFICATION_DATA){ + *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.center); + } else if (a.type == SUPER_DATA){ + *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale); + } else if (a.type == WRITING_DATA){ + *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h); + } else if (a.type == ISEG_DATA){ + *a.d = load_data_iseg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.scale, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == INSTANCE_DATA){ + *a.d = load_data_mask(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.coords, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } else if (a.type == SEGMENTATION_DATA){ + *a.d = load_data_seg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.scale); + } else if (a.type == REGION_DATA){ + *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); + } else if (a.type == DETECTION_DATA){ + *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); + } else if (a.type == SWAG_DATA){ + *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter); + } else if (a.type == COMPARE_DATA){ + *a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h); + } else if (a.type == IMAGE_DATA){ + *(a.im) = load_image_color(a.path, 0, 0); + *(a.resized) = resize_image(*(a.im), a.w, a.h); + } else if (a.type == LETTERBOX_DATA){ + *(a.im) = load_image_color(a.path, 0, 0); + *(a.resized) = letterbox_image(*(a.im), a.w, a.h); + } else if (a.type == TAG_DATA){ + *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + } + free(ptr); + return 0; +} + +pthread_t load_data_in_thread(load_args args) +{ + pthread_t thread; + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + if(pthread_create(&thread, 0, load_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void *load_threads(void *ptr) +{ + int i; + load_args args = *(load_args *)ptr; + if (args.threads == 0) args.threads = 1; + data *out = args.d; + int total = args.n; + free(ptr); + data *buffers = calloc(args.threads, sizeof(data)); + pthread_t *threads = calloc(args.threads, sizeof(pthread_t)); + for(i = 0; i < args.threads; ++i){ + args.d = buffers + i; + args.n = (i+1) * total/args.threads - i * total/args.threads; + threads[i] = load_data_in_thread(args); + } + for(i = 0; i < args.threads; ++i){ + pthread_join(threads[i], 0); + } + *out = concat_datas(buffers, args.threads); + out->shallow = 0; + for(i = 0; i < args.threads; ++i){ + buffers[i].shallow = 1; + free_data(buffers[i]); + } + free(buffers); + free(threads); + return 0; +} + +void load_data_blocking(load_args args) +{ + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + load_thread(ptr); +} + +pthread_t load_data(load_args args) +{ + pthread_t thread; + struct load_args *ptr = calloc(1, sizeof(struct load_args)); + *ptr = args; + if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed"); + return thread; +} + +data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h) +{ + if(m) paths = get_random_paths(paths, n, m); + char **replace_paths = find_replace_paths(paths, n, ".png", "-label.png"); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = load_image_paths_gray(replace_paths, n, out_w, out_h); + if(m) free(paths); + int i; + for(i = 0; i < n; ++i) free(replace_paths[i]); + free(replace_paths); + return d; +} + +data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_paths(paths, n, w, h); + d.y = load_labels_paths(paths, n, labels, k, 0); + if(m) free(paths); + return d; +} + +/* + data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) + { + data d = {0}; + d.indexes = calloc(n, sizeof(int)); + if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes); + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure); + d.y = load_labels_paths(paths, n, labels, k); + if(m) free(paths); + return d; + } + */ + +data load_data_super(char **paths, int n, int m, int w, int h, int scale) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + + int i; + d.X.rows = n; + d.X.vals = calloc(n, sizeof(float*)); + d.X.cols = w*h*3; + + d.y.rows = n; + d.y.vals = calloc(n, sizeof(float*)); + d.y.cols = w*scale * h*scale * 3; + + for(i = 0; i < n; ++i){ + image im = load_image_color(paths[i], 0, 0); + image crop = random_crop_image(im, w*scale, h*scale); + int flip = rand()%2; + if (flip) flip_image(crop); + image resize = resize_image(crop, w, h); + d.X.vals[i] = resize.data; + d.y.vals[i] = crop.data; + free_image(im); + } + + if(m) free(paths); + return d; +} + +data load_data_regression(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, 0); + d.y = load_regression_labels_paths(paths, n, k); + if(m) free(paths); + return d; +} + +data select_data(data *orig, int *inds) +{ + data d = {0}; + d.shallow = 1; + d.w = orig[0].w; + d.h = orig[0].h; + + d.X.rows = orig[0].X.rows; + d.y.rows = orig[0].X.rows; + + d.X.cols = orig[0].X.cols; + d.y.cols = orig[0].y.cols; + + d.X.vals = calloc(orig[0].X.rows, sizeof(float *)); + d.y.vals = calloc(orig[0].y.rows, sizeof(float *)); + int i; + for(i = 0; i < d.X.rows; ++i){ + d.X.vals[i] = orig[inds[i]].X.vals[i]; + d.y.vals[i] = orig[inds[i]].y.vals[i]; + } + return d; +} + +data *tile_data(data orig, int divs, int size) +{ + data *ds = calloc(divs*divs, sizeof(data)); + int i, j; +#pragma omp parallel for + for(i = 0; i < divs*divs; ++i){ + data d; + d.shallow = 0; + d.w = orig.w/divs * size; + d.h = orig.h/divs * size; + d.X.rows = orig.X.rows; + d.X.cols = d.w*d.h*3; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + + d.y = copy_matrix(orig.y); +#pragma omp parallel for + for(j = 0; j < orig.X.rows; ++j){ + int x = (i%divs) * orig.w / divs - (d.w - orig.w/divs)/2; + int y = (i/divs) * orig.h / divs - (d.h - orig.h/divs)/2; + image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[j]); + d.X.vals[j] = crop_image(im, x, y, d.w, d.h).data; + } + ds[i] = d; + } + return ds; +} + +data resize_data(data orig, int w, int h) +{ + data d = {0}; + d.shallow = 0; + d.w = w; + d.h = h; + int i; + d.X.rows = orig.X.rows; + d.X.cols = w*h*3; + d.X.vals = calloc(d.X.rows, sizeof(float*)); + + d.y = copy_matrix(orig.y); +#pragma omp parallel for + for(i = 0; i < orig.X.rows; ++i){ + image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[i]); + d.X.vals[i] = resize_image(im, w, h).data; + } + return d; +} + +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.shallow = 0; + d.w=size; + d.h=size; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, center); + d.y = load_labels_paths(paths, n, labels, k, hierarchy); + if(m) free(paths); + return d; +} + +data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +{ + if(m) paths = get_random_paths(paths, n, m); + data d = {0}; + d.w = size; + d.h = size; + d.shallow = 0; + d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, 0); + d.y = load_tags_paths(paths, n, k); + if(m) free(paths); + return d; +} + +matrix concat_matrix(matrix m1, matrix m2) +{ + int i, count = 0; + matrix m; + m.cols = m1.cols; + m.rows = m1.rows+m2.rows; + m.vals = calloc(m1.rows + m2.rows, sizeof(float*)); + for(i = 0; i < m1.rows; ++i){ + m.vals[count++] = m1.vals[i]; + } + for(i = 0; i < m2.rows; ++i){ + m.vals[count++] = m2.vals[i]; + } + return m; +} + +data concat_data(data d1, data d2) +{ + data d = {0}; + d.shallow = 1; + d.X = concat_matrix(d1.X, d2.X); + d.y = concat_matrix(d1.y, d2.y); + d.w = d1.w; + d.h = d1.h; + return d; +} + +data concat_datas(data *d, int n) +{ + int i; + data out = {0}; + for(i = 0; i < n; ++i){ + data new = concat_data(d[i], out); + free_data(out); + out = new; + } + return out; +} + +data load_categorical_data_csv(char *filename, int target, int k) +{ + data d = {0}; + d.shallow = 0; + matrix X = csv_to_matrix(filename); + float *truth_1d = pop_column(&X, target); + float **truth = one_hot_encode(truth_1d, X.rows, k); + matrix y; + y.rows = X.rows; + y.cols = k; + y.vals = truth; + d.X = X; + d.y = y; + free(truth_1d); + return d; +} + +data load_cifar10_data(char *filename) +{ + data d = {0}; + d.shallow = 0; + long i,j; + matrix X = make_matrix(10000, 3072); + matrix y = make_matrix(10000, 10); + d.X = X; + d.y = y; + + FILE *fp = fopen(filename, "rb"); + if(!fp) file_error(filename); + for(i = 0; i < 10000; ++i){ + unsigned char bytes[3073]; + fread(bytes, 1, 3073, fp); + int class = bytes[0]; + y.vals[i][class] = 1; + for(j = 0; j < X.cols; ++j){ + X.vals[i][j] = (double)bytes[j+1]; + } + } + scale_data_rows(d, 1./255); + //normalize_data_rows(d); + fclose(fp); + return d; +} + +void get_random_batch(data d, int n, float *X, float *y) +{ + int j; + for(j = 0; j < n; ++j){ + int index = rand()%d.X.rows; + memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float)); + memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float)); + } +} + +void get_next_batch(data d, int n, int offset, float *X, float *y) +{ + int j; + for(j = 0; j < n; ++j){ + int index = offset + j; + memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float)); + if(y) memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float)); + } +} + +void smooth_data(data d) +{ + int i, j; + float scale = 1. / d.y.cols; + float eps = .1; + for(i = 0; i < d.y.rows; ++i){ + for(j = 0; j < d.y.cols; ++j){ + d.y.vals[i][j] = eps * scale + (1-eps) * d.y.vals[i][j]; + } + } +} + +data load_all_cifar10() +{ + data d = {0}; + d.shallow = 0; + int i,j,b; + matrix X = make_matrix(50000, 3072); + matrix y = make_matrix(50000, 10); + d.X = X; + d.y = y; + + + for(b = 0; b < 5; ++b){ + char buff[256]; + sprintf(buff, "data/cifar/cifar-10-batches-bin/data_batch_%d.bin", b+1); + FILE *fp = fopen(buff, "rb"); + if(!fp) file_error(buff); + for(i = 0; i < 10000; ++i){ + unsigned char bytes[3073]; + fread(bytes, 1, 3073, fp); + int class = bytes[0]; + y.vals[i+b*10000][class] = 1; + for(j = 0; j < X.cols; ++j){ + X.vals[i+b*10000][j] = (double)bytes[j+1]; + } + } + fclose(fp); + } + //normalize_data_rows(d); + scale_data_rows(d, 1./255); + smooth_data(d); + return d; +} + +data load_go(char *filename) +{ + FILE *fp = fopen(filename, "rb"); + matrix X = make_matrix(3363059, 361); + matrix y = make_matrix(3363059, 361); + int row, col; + + if(!fp) file_error(filename); + char *label; + int count = 0; + while((label = fgetl(fp))){ + int i; + if(count == X.rows){ + X = resize_matrix(X, count*2); + y = resize_matrix(y, count*2); + } + sscanf(label, "%d %d", &row, &col); + char *board = fgetl(fp); + + int index = row*19 + col; + y.vals[count][index] = 1; + + for(i = 0; i < 19*19; ++i){ + float val = 0; + if(board[i] == '1') val = 1; + else if(board[i] == '2') val = -1; + X.vals[count][i] = val; + } + ++count; + free(label); + free(board); + } + X = resize_matrix(X, count); + y = resize_matrix(y, count); + + data d = {0}; + d.shallow = 0; + d.X = X; + d.y = y; + + + fclose(fp); + + return d; +} + + +void randomize_data(data d) +{ + int i; + for(i = d.X.rows-1; i > 0; --i){ + int index = rand()%i; + float *swap = d.X.vals[index]; + d.X.vals[index] = d.X.vals[i]; + d.X.vals[i] = swap; + + swap = d.y.vals[index]; + d.y.vals[index] = d.y.vals[i]; + d.y.vals[i] = swap; + } +} + +void scale_data_rows(data d, float s) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + scale_array(d.X.vals[i], d.X.cols, s); + } +} + +void translate_data_rows(data d, float s) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + translate_array(d.X.vals[i], d.X.cols, s); + } +} + +data copy_data(data d) +{ + data c = {0}; + c.w = d.w; + c.h = d.h; + c.shallow = 0; + c.num_boxes = d.num_boxes; + c.boxes = d.boxes; + c.X = copy_matrix(d.X); + c.y = copy_matrix(d.y); + return c; +} + +void normalize_data_rows(data d) +{ + int i; + for(i = 0; i < d.X.rows; ++i){ + normalize_array(d.X.vals[i], d.X.cols); + } +} + +data get_data_part(data d, int part, int total) +{ + data p = {0}; + p.shallow = 1; + p.X.rows = d.X.rows * (part + 1) / total - d.X.rows * part / total; + p.y.rows = d.y.rows * (part + 1) / total - d.y.rows * part / total; + p.X.cols = d.X.cols; + p.y.cols = d.y.cols; + p.X.vals = d.X.vals + d.X.rows * part / total; + p.y.vals = d.y.vals + d.y.rows * part / total; + return p; +} + +data get_random_data(data d, int num) +{ + data r = {0}; + r.shallow = 1; + + r.X.rows = num; + r.y.rows = num; + + r.X.cols = d.X.cols; + r.y.cols = d.y.cols; + + r.X.vals = calloc(num, sizeof(float *)); + r.y.vals = calloc(num, sizeof(float *)); + + int i; + for(i = 0; i < num; ++i){ + int index = rand()%d.X.rows; + r.X.vals[i] = d.X.vals[index]; + r.y.vals[i] = d.y.vals[index]; + } + return r; +} + +data *split_data(data d, int part, int total) +{ + data *split = calloc(2, sizeof(data)); + int i; + int start = part*d.X.rows/total; + int end = (part+1)*d.X.rows/total; + data train; + data test; + train.shallow = test.shallow = 1; + + test.X.rows = test.y.rows = end-start; + train.X.rows = train.y.rows = d.X.rows - (end-start); + train.X.cols = test.X.cols = d.X.cols; + train.y.cols = test.y.cols = d.y.cols; + + train.X.vals = calloc(train.X.rows, sizeof(float*)); + test.X.vals = calloc(test.X.rows, sizeof(float*)); + train.y.vals = calloc(train.y.rows, sizeof(float*)); + test.y.vals = calloc(test.y.rows, sizeof(float*)); + + for(i = 0; i < start; ++i){ + train.X.vals[i] = d.X.vals[i]; + train.y.vals[i] = d.y.vals[i]; + } + for(i = start; i < end; ++i){ + test.X.vals[i-start] = d.X.vals[i]; + test.y.vals[i-start] = d.y.vals[i]; + } + for(i = end; i < d.X.rows; ++i){ + train.X.vals[i-(end-start)] = d.X.vals[i]; + train.y.vals[i-(end-start)] = d.y.vals[i]; + } + split[0] = train; + split[1] = test; + return split; +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/data.h b/workloads/realworld/uvm_prefetch_async/darknet/src/data.h new file mode 100644 index 0000000000000000000000000000000000000000..781906f8743c7d88c0fa134403d0ae020b544053 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/data.h @@ -0,0 +1,50 @@ +#ifndef DATA_H +#define DATA_H +#include + +#include "darknet.h" +#include "matrix.h" +#include "list.h" +#include "image.h" +#include "tree.h" + +static inline float distance_from_edge(int x, int max) +{ + int dx = (max/2) - x; + if (dx < 0) dx = -dx; + dx = (max/2) + 1 - dx; + dx *= 2; + float dist = (float)dx/max; + if (dist > 1) dist = 1; + return dist; +} +void load_data_blocking(load_args args); + + +void print_letters(float *pred, int n); +data load_data_captcha(char **paths, int n, int m, int k, int w, int h); +data load_data_captcha_encode(char **paths, int n, int m, int w, int h); +data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure); +data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); +matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); +data load_data_super(char **paths, int n, int m, int w, int h, int scale); +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); +data load_data_regression(char **paths, int n, int m, int classes, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); +data load_go(char *filename); + + +data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h); + +void get_random_batch(data d, int n, float *X, float *y); +data get_data_part(data d, int part, int total); +data get_random_data(data d, int num); +data load_categorical_data_csv(char *filename, int target, int k); +void normalize_data_rows(data d); +void scale_data_rows(data d, float s); +void translate_data_rows(data d, float s); +void randomize_data(data d); +data *split_data(data d, int part, int total); +data concat_datas(data *d, int n); +void fill_truth(char *path, char **labels, int k, float *truth); + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/deconvolutional_kernels.cu b/workloads/realworld/uvm_prefetch_async/darknet/src/deconvolutional_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..ed12e7a3dc5148b1cbff746f13901a9653bc0f6d --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/deconvolutional_kernels.cu @@ -0,0 +1,139 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "convolutional_layer.h" +#include "deconvolutional_layer.h" +#include "batchnorm_layer.h" +#include "gemm.h" +#include "blas.h" +#include "im2col.h" +#include "col2im.h" +#include "utils.h" +#include "cuda_dark.h" +} + +extern "C" void forward_deconvolutional_layer_gpu(layer l, network net) +{ + int i; + + int m = l.size*l.size*l.n; + int n = l.h*l.w; + int k = l.c; + + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + + for(i = 0; i < l.batch; ++i){ + float *a = l.weights_gpu; + float *b = net.input_gpu + i*l.c*l.h*l.w; + float *c = net.workspace; + + gemm_gpu(1,0,m,n,k,1,a,m,b,n,0,c,n); + + col2im_gpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output_gpu+i*l.outputs); + } + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, net); + } else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); + } + activate_array_gpu(l.output_gpu, l.batch*l.n*l.out_w*l.out_h, l.activation); +} + +extern "C" void backward_deconvolutional_layer_gpu(layer l, network net) +{ + int i; + + //constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + + if(l.batch_normalize){ + backward_batchnorm_layer_gpu(l, net); + } else { + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); + } + + //if(net.delta_gpu) memset(net.delta_gpu, 0, l.batch*l.h*l.w*l.c*sizeof(float)); + + for(i = 0; i < l.batch; ++i){ + int m = l.c; + int n = l.size*l.size*l.n; + int k = l.h*l.w; + + float *a = net.input_gpu + i*m*k; + float *b = net.workspace; + float *c = l.weight_updates_gpu; + + im2col_gpu(l.delta_gpu + i*l.outputs, l.out_c, l.out_h, l.out_w, + l.size, l.stride, l.pad, b); + gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if(net.delta_gpu){ + int m = l.c; + int n = l.h*l.w; + int k = l.size*l.size*l.n; + + float *a = l.weights_gpu; + float *b = net.workspace; + float *c = net.delta_gpu + i*n*m; + + gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +} + +extern "C" void pull_deconvolutional_layer(layer l) +{ + cuda_pull_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size); + cuda_pull_array(l.biases_gpu, l.biases, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size); + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_pull_array(l.scales_gpu, l.scales, l.n); + cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +extern "C" void push_deconvolutional_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size); + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.batch_normalize){ + cuda_push_array(l.scales_gpu, l.scales, l.n); + cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n); + cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n); + } +} + +void update_deconvolutional_layer_gpu(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + if(a.adam){ + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + if(l.scales_gpu){ + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); + } + }else{ + axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1); + + axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.n, momentum, l.bias_updates_gpu, 1); + + if(l.scales_gpu){ + axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_gpu(l.n, momentum, l.scale_updates_gpu, 1); + } + } +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/deconvolutional_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/deconvolutional_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..00c0e85771d42f99de969f9fd03e5f0f359d405c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/deconvolutional_layer.c @@ -0,0 +1,312 @@ +#include "deconvolutional_layer.h" +#include "convolutional_layer.h" +#include "batchnorm_layer.h" +#include "utils.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" + +#include +#include + + +static size_t get_workspace_size(layer l){ + return (size_t)l.h*l.w*l.size*l.size*l.n*sizeof(float); +} + +void bilinear_init(layer l) +{ + int i,j,f; + float center = (l.size-1) / 2.; + for(f = 0; f < l.n; ++f){ + for(j = 0; j < l.size; ++j){ + for(i = 0; i < l.size; ++i){ + float val = (1 - fabs(i - center)) * (1 - fabs(j - center)); + int c = f%l.c; + int ind = f*l.size*l.size*l.c + c*l.size*l.size + j*l.size + i; + l.weights[ind] = val; + } + } + } +} + + +layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam) +{ + int i; + layer l = {0}; + l.type = DECONVOLUTIONAL; + + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.batch = batch; + l.stride = stride; + l.size = size; + + l.nweights = c*n*size*size; + l.nbiases = n; + + l.weights = calloc(c*n*size*size, sizeof(float)); + l.weight_updates = calloc(c*n*size*size, sizeof(float)); + + l.biases = calloc(n, sizeof(float)); + l.bias_updates = calloc(n, sizeof(float)); + //float scale = n/(size*size*c); + //printf("scale: %f\n", scale); + float scale = .02; + for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal(); + //bilinear_init(l); + for(i = 0; i < n; ++i){ + l.biases[i] = 0; + } + l.pad = padding; + + l.out_h = (l.h - 1) * l.stride + l.size - 2*l.pad; + l.out_w = (l.w - 1) * l.stride + l.size - 2*l.pad; + l.out_c = n; + l.outputs = l.out_w * l.out_h * l.out_c; + l.inputs = l.w * l.h * l.c; + + scal_cpu(l.nweights, (float)l.out_w*l.out_h/(l.w*l.h), l.weights, 1); + + l.output = calloc(l.batch*l.outputs, sizeof(float)); + l.delta = calloc(l.batch*l.outputs, sizeof(float)); + + l.forward = forward_deconvolutional_layer; + l.backward = backward_deconvolutional_layer; + l.update = update_deconvolutional_layer; + + l.batch_normalize = batch_normalize; + + if(batch_normalize){ + l.scales = calloc(n, sizeof(float)); + l.scale_updates = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + l.scales[i] = 1; + } + + l.mean = calloc(n, sizeof(float)); + l.variance = calloc(n, sizeof(float)); + + l.mean_delta = calloc(n, sizeof(float)); + l.variance_delta = calloc(n, sizeof(float)); + + l.rolling_mean = calloc(n, sizeof(float)); + l.rolling_variance = calloc(n, sizeof(float)); + l.x = calloc(l.batch*l.outputs, sizeof(float)); + l.x_norm = calloc(l.batch*l.outputs, sizeof(float)); + } + if(adam){ + l.m = calloc(c*n*size*size, sizeof(float)); + l.v = calloc(c*n*size*size, sizeof(float)); + l.bias_m = calloc(n, sizeof(float)); + l.scale_m = calloc(n, sizeof(float)); + l.bias_v = calloc(n, sizeof(float)); + l.scale_v = calloc(n, sizeof(float)); + } + +#ifdef GPU + l.forward_gpu = forward_deconvolutional_layer_gpu; + l.backward_gpu = backward_deconvolutional_layer_gpu; + l.update_gpu = update_deconvolutional_layer_gpu; + + if(gpu_index >= 0){ + + if (adam) { + l.m_gpu = cuda_make_array(l.m, c*n*size*size); + l.v_gpu = cuda_make_array(l.v, c*n*size*size); + l.bias_m_gpu = cuda_make_array(l.bias_m, n); + l.bias_v_gpu = cuda_make_array(l.bias_v, n); + l.scale_m_gpu = cuda_make_array(l.scale_m, n); + l.scale_v_gpu = cuda_make_array(l.scale_v, n); + } + l.weights_gpu = cuda_make_array(l.weights, c*n*size*size); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size); + + l.biases_gpu = cuda_make_array(l.biases, n); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*l.out_h*l.out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n); + + if(batch_normalize){ + l.mean_gpu = cuda_make_array(0, n); + l.variance_gpu = cuda_make_array(0, n); + + l.rolling_mean_gpu = cuda_make_array(0, n); + l.rolling_variance_gpu = cuda_make_array(0, n); + + l.mean_delta_gpu = cuda_make_array(0, n); + l.variance_delta_gpu = cuda_make_array(0, n); + + l.scales_gpu = cuda_make_array(l.scales, n); + l.scale_updates_gpu = cuda_make_array(0, n); + + l.x_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n); + l.x_norm_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n); + } + } + #ifdef CUDNN + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnCreateTensorDescriptor(&l.normTensorDesc); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); + cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); + #endif +#endif + + l.activation = activation; + l.workspace_size = get_workspace_size(l); + + fprintf(stderr, "deconv%5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); + + return l; +} + +void denormalize_deconvolutional_layer(layer l) +{ + int i, j; + for(i = 0; i < l.n; ++i){ + float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001); + for(j = 0; j < l.c*l.size*l.size; ++j){ + l.weights[i*l.c*l.size*l.size + j] *= scale; + } + l.biases[i] -= l.rolling_mean[i] * scale; + l.scales[i] = 1; + l.rolling_mean[i] = 0; + l.rolling_variance[i] = 1; + } +} + +void resize_deconvolutional_layer(layer *l, int h, int w) +{ + l->h = h; + l->w = w; + l->out_h = (l->h - 1) * l->stride + l->size - 2*l->pad; + l->out_w = (l->w - 1) * l->stride + l->size - 2*l->pad; + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->w * l->h * l->c; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + if(l->batch_normalize){ + l->x = realloc(l->x, l->batch*l->outputs*sizeof(float)); + l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float)); + } + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); + + if(l->batch_normalize){ + cuda_free(l->x_gpu); + cuda_free(l->x_norm_gpu); + + l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); + l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); + } + #ifdef CUDNN + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + #endif +#endif + l->workspace_size = get_workspace_size(*l); +} + +void forward_deconvolutional_layer(const layer l, network net) +{ + int i; + + int m = l.size*l.size*l.n; + int n = l.h*l.w; + int k = l.c; + + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + + for(i = 0; i < l.batch; ++i){ + float *a = l.weights; + float *b = net.input + i*l.c*l.h*l.w; + float *c = net.workspace; + + gemm_cpu(1,0,m,n,k,1,a,m,b,n,0,c,n); + + col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output+i*l.outputs); + } + if (l.batch_normalize) { + forward_batchnorm_layer(l, net); + } else { + add_bias(l.output, l.biases, l.batch, l.n, l.out_w*l.out_h); + } + activate_array(l.output, l.batch*l.n*l.out_w*l.out_h, l.activation); +} + +void backward_deconvolutional_layer(layer l, network net) +{ + int i; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + if(l.batch_normalize){ + backward_batchnorm_layer(l, net); + } else { + backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h); + } + + //if(net.delta) memset(net.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float)); + + for(i = 0; i < l.batch; ++i){ + int m = l.c; + int n = l.size*l.size*l.n; + int k = l.h*l.w; + + float *a = net.input + i*m*k; + float *b = net.workspace; + float *c = l.weight_updates; + + im2col_cpu(l.delta + i*l.outputs, l.out_c, l.out_h, l.out_w, + l.size, l.stride, l.pad, b); + gemm_cpu(0,1,m,n,k,1,a,k,b,k,1,c,n); + + if(net.delta){ + int m = l.c; + int n = l.h*l.w; + int k = l.size*l.size*l.n; + + float *a = l.weights; + float *b = net.workspace; + float *c = net.delta + i*n*m; + + gemm_cpu(0,0,m,n,k,1,a,k,b,n,1,c,n); + } + } +} + +void update_deconvolutional_layer(layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int size = l.size*l.size*l.c*l.n; + axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.n, momentum, l.bias_updates, 1); + + if(l.scales){ + axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1); + scal_cpu(l.n, momentum, l.scale_updates, 1); + } + + axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(size, momentum, l.weight_updates, 1); +} + + + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/deconvolutional_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/deconvolutional_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..722a1a58feec4ef13dac2b811df98e3f9960d4ef --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/deconvolutional_layer.h @@ -0,0 +1,25 @@ +#ifndef DECONVOLUTIONAL_LAYER_H +#define DECONVOLUTIONAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +#ifdef GPU +void forward_deconvolutional_layer_gpu(layer l, network net); +void backward_deconvolutional_layer_gpu(layer l, network net); +void update_deconvolutional_layer_gpu(layer l, update_args a); +void push_deconvolutional_layer(layer l); +void pull_deconvolutional_layer(layer l); +#endif + +layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam); +void resize_deconvolutional_layer(layer *l, int h, int w); +void forward_deconvolutional_layer(const layer l, network net); +void update_deconvolutional_layer(layer l, update_args a); +void backward_deconvolutional_layer(layer l, network net); + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/demo.c b/workloads/realworld/uvm_prefetch_async/darknet/src/demo.c new file mode 100644 index 0000000000000000000000000000000000000000..b89efb8dc4c044c0240b7442e39222405409a676 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/demo.c @@ -0,0 +1,349 @@ +#include "network.h" +#include "detection_layer.h" +#include "region_layer.h" +#include "cost_layer.h" +#include "utils.h" +#include "parser.h" +#include "box.h" +#include "image.h" +#include "demo.h" +#include + +#define DEMO 1 + +#ifdef OPENCV + +static char **demo_names; +static image **demo_alphabet; +static int demo_classes; + +static network *net; +static image buff [3]; +static image buff_letter[3]; +static int buff_index = 0; +static void * cap; +static float fps = 0; +static float demo_thresh = 0; +static float demo_hier = .5; +static int running = 0; + +static int demo_frame = 3; +static int demo_index = 0; +static float **predictions; +static float *avg; +static int demo_done = 0; +static int demo_total = 0; +double demo_time; + +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num); + +int size_network(network *net) +{ + int i; + int count = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + count += l.outputs; + } + } + return count; +} + +void remember_network(network *net) +{ + int i; + int count = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + memcpy(predictions[demo_index] + count, net->layers[i].output, sizeof(float) * l.outputs); + count += l.outputs; + } + } +} + +detection *avg_predictions(network *net, int *nboxes) +{ + int i, j; + int count = 0; + fill_cpu(demo_total, 0, avg, 1); + for(j = 0; j < demo_frame; ++j){ + axpy_cpu(demo_total, 1./demo_frame, predictions[j], 1, avg, 1); + } + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO || l.type == REGION || l.type == DETECTION){ + memcpy(l.output, avg + count, sizeof(float) * l.outputs); + count += l.outputs; + } + } + detection *dets = get_network_boxes(net, buff[0].w, buff[0].h, demo_thresh, demo_hier, 0, 1, nboxes); + return dets; +} + +void *detect_in_thread(void *ptr) +{ + running = 1; + float nms = .4; + + layer l = net->layers[net->n-1]; + float *X = buff_letter[(buff_index+2)%3].data; + network_predict(net, X); + + /* + if(l.type == DETECTION){ + get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0); + } else */ + remember_network(net); + detection *dets = 0; + int nboxes = 0; + dets = avg_predictions(net, &nboxes); + + + /* + int i,j; + box zero = {0}; + int classes = l.classes; + for(i = 0; i < demo_detections; ++i){ + avg[i].objectness = 0; + avg[i].bbox = zero; + memset(avg[i].prob, 0, classes*sizeof(float)); + for(j = 0; j < demo_frame; ++j){ + axpy_cpu(classes, 1./demo_frame, dets[j][i].prob, 1, avg[i].prob, 1); + avg[i].objectness += dets[j][i].objectness * 1./demo_frame; + avg[i].bbox.x += dets[j][i].bbox.x * 1./demo_frame; + avg[i].bbox.y += dets[j][i].bbox.y * 1./demo_frame; + avg[i].bbox.w += dets[j][i].bbox.w * 1./demo_frame; + avg[i].bbox.h += dets[j][i].bbox.h * 1./demo_frame; + } + //copy_cpu(classes, dets[0][i].prob, 1, avg[i].prob, 1); + //avg[i].objectness = dets[0][i].objectness; + } + */ + + if (nms > 0) do_nms_obj(dets, nboxes, l.classes, nms); + + printf("\033[2J"); + printf("\033[1;1H"); + printf("\nFPS:%.1f\n",fps); + printf("Objects:\n\n"); + image display = buff[(buff_index+2) % 3]; + draw_detections(display, dets, nboxes, demo_thresh, demo_names, demo_alphabet, demo_classes); + free_detections(dets, nboxes); + + demo_index = (demo_index + 1)%demo_frame; + running = 0; + return 0; +} + +void *fetch_in_thread(void *ptr) +{ + free_image(buff[buff_index]); + buff[buff_index] = get_image_from_stream(cap); + if(buff[buff_index].data == 0) { + demo_done = 1; + return 0; + } + letterbox_image_into(buff[buff_index], net->w, net->h, buff_letter[buff_index]); + return 0; +} + +void *display_in_thread(void *ptr) +{ + int c = show_image(buff[(buff_index + 1)%3], "Demo", 1); + if (c != -1) c = c%256; + if (c == 27) { + demo_done = 1; + return 0; + } else if (c == 82) { + demo_thresh += .02; + } else if (c == 84) { + demo_thresh -= .02; + if(demo_thresh <= .02) demo_thresh = .02; + } else if (c == 83) { + demo_hier += .02; + } else if (c == 81) { + demo_hier -= .02; + if(demo_hier <= .0) demo_hier = .0; + } + return 0; +} + +void *display_loop(void *ptr) +{ + while(1){ + display_in_thread(0); + } +} + +void *detect_loop(void *ptr) +{ + while(1){ + detect_in_thread(0); + } +} + +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) +{ + //demo_frame = avg_frames; + image **alphabet = load_alphabet(); + demo_names = names; + demo_alphabet = alphabet; + demo_classes = classes; + demo_thresh = thresh; + demo_hier = hier; + printf("Demo\n"); + net = load_network(cfgfile, weightfile, 0); + set_batch_network(net, 1); + pthread_t detect_thread; + pthread_t fetch_thread; + + srand(2222222); + + int i; + demo_total = size_network(net); + predictions = calloc(demo_frame, sizeof(float*)); + for (i = 0; i < demo_frame; ++i){ + predictions[i] = calloc(demo_total, sizeof(float)); + } + avg = calloc(demo_total, sizeof(float)); + + if(filename){ + printf("video file: %s\n", filename); + cap = open_video_stream(filename, 0, 0, 0, 0); + }else{ + cap = open_video_stream(0, cam_index, w, h, frames); + } + + if(!cap) error("Couldn't connect to webcam.\n"); + + buff[0] = get_image_from_stream(cap); + buff[1] = copy_image(buff[0]); + buff[2] = copy_image(buff[0]); + buff_letter[0] = letterbox_image(buff[0], net->w, net->h); + buff_letter[1] = letterbox_image(buff[0], net->w, net->h); + buff_letter[2] = letterbox_image(buff[0], net->w, net->h); + + int count = 0; + if(!prefix){ + make_window("Demo", 1352, 1013, fullscreen); + } + + demo_time = what_time_is_it_now(); + + while(!demo_done){ + buff_index = (buff_index + 1) %3; + if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); + if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); + if(!prefix){ + fps = 1./(what_time_is_it_now() - demo_time); + demo_time = what_time_is_it_now(); + display_in_thread(0); + }else{ + char name[256]; + sprintf(name, "%s_%08d", prefix, count); + save_image(buff[(buff_index + 1)%3], name); + } + pthread_join(fetch_thread, 0); + pthread_join(detect_thread, 0); + ++count; + } +} + +/* + void demo_compare(char *cfg1, char *weight1, char *cfg2, char *weight2, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) + { + demo_frame = avg_frames; + predictions = calloc(demo_frame, sizeof(float*)); + image **alphabet = load_alphabet(); + demo_names = names; + demo_alphabet = alphabet; + demo_classes = classes; + demo_thresh = thresh; + demo_hier = hier; + printf("Demo\n"); + net = load_network(cfg1, weight1, 0); + set_batch_network(net, 1); + pthread_t detect_thread; + pthread_t fetch_thread; + + srand(2222222); + + if(filename){ + printf("video file: %s\n", filename); + cap = cvCaptureFromFile(filename); + }else{ + cap = cvCaptureFromCAM(cam_index); + + if(w){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w); + } + if(h){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h); + } + if(frames){ + cvSetCaptureProperty(cap, CV_CAP_PROP_FPS, frames); + } + } + + if(!cap) error("Couldn't connect to webcam.\n"); + + layer l = net->layers[net->n-1]; + demo_detections = l.n*l.w*l.h; + int j; + + avg = (float *) calloc(l.outputs, sizeof(float)); + for(j = 0; j < demo_frame; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float)); + + boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box)); + probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *)); + for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes+1, sizeof(float)); + + buff[0] = get_image_from_stream(cap); + buff[1] = copy_image(buff[0]); + buff[2] = copy_image(buff[0]); + buff_letter[0] = letterbox_image(buff[0], net->w, net->h); + buff_letter[1] = letterbox_image(buff[0], net->w, net->h); + buff_letter[2] = letterbox_image(buff[0], net->w, net->h); + ipl = cvCreateImage(cvSize(buff[0].w,buff[0].h), IPL_DEPTH_8U, buff[0].c); + + int count = 0; + if(!prefix){ + cvNamedWindow("Demo", CV_WINDOW_NORMAL); + if(fullscreen){ + cvSetWindowProperty("Demo", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); + } else { + cvMoveWindow("Demo", 0, 0); + cvResizeWindow("Demo", 1352, 1013); + } + } + + demo_time = what_time_is_it_now(); + + while(!demo_done){ +buff_index = (buff_index + 1) %3; +if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); +if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); +if(!prefix){ + fps = 1./(what_time_is_it_now() - demo_time); + demo_time = what_time_is_it_now(); + display_in_thread(0); +}else{ + char name[256]; + sprintf(name, "%s_%08d", prefix, count); + save_image(buff[(buff_index + 1)%3], name); +} +pthread_join(fetch_thread, 0); +pthread_join(detect_thread, 0); +++count; +} +} +*/ +#else +void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg, float hier, int w, int h, int frames, int fullscreen) +{ + fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); +} +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/demo.h b/workloads/realworld/uvm_prefetch_async/darknet/src/demo.h new file mode 100644 index 0000000000000000000000000000000000000000..86e46541d1a7473b22373b29bc6ff9cc281d4939 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/demo.h @@ -0,0 +1,6 @@ +#ifndef DEMO_H +#define DEMO_H + +#include "image.h" + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/detection_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/detection_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..565fa3c3f7d123736d65661d3be8ea91e26b3d5c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/detection_layer.c @@ -0,0 +1,275 @@ +#include "detection_layer.h" +#include "activations.h" +#include "softmax_layer.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +detection_layer make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore) +{ + detection_layer l = {0}; + l.type = DETECTION; + + l.n = n; + l.batch = batch; + l.inputs = inputs; + l.classes = classes; + l.coords = coords; + l.rescore = rescore; + l.side = side; + l.w = side; + l.h = side; + assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs); + l.cost = calloc(1, sizeof(float)); + l.outputs = l.inputs; + l.truths = l.side*l.side*(1+l.coords+l.classes); + l.output = calloc(batch*l.outputs, sizeof(float)); + l.delta = calloc(batch*l.outputs, sizeof(float)); + + l.forward = forward_detection_layer; + l.backward = backward_detection_layer; +#ifdef GPU + l.forward_gpu = forward_detection_layer_gpu; + l.backward_gpu = backward_detection_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "Detection Layer\n"); + srand(0); + + return l; +} + +void forward_detection_layer(const detection_layer l, network net) +{ + int locations = l.side*l.side; + int i,j; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + //if(l.reorg) reorg(l.output, l.w*l.h, size*l.n, l.batch, 1); + int b; + if (l.softmax){ + for(b = 0; b < l.batch; ++b){ + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + int offset = i*l.classes; + softmax(l.output + index + offset, l.classes, 1, 1, + l.output + index + offset); + } + } + } + if(net.train){ + float avg_iou = 0; + float avg_cat = 0; + float avg_allcat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + *(l.cost) = 0; + int size = l.inputs * l.batch; + memset(l.delta, 0, size * sizeof(float)); + for (b = 0; b < l.batch; ++b){ + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + int truth_index = (b*locations + i)*(1+l.coords+l.classes); + int is_obj = net.truth[truth_index]; + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + l.delta[p_index] = l.noobject_scale*(0 - l.output[p_index]); + *(l.cost) += l.noobject_scale*pow(l.output[p_index], 2); + avg_anyobj += l.output[p_index]; + } + + int best_index = -1; + float best_iou = 0; + float best_rmse = 20; + + if (!is_obj){ + continue; + } + + int class_index = index + i*l.classes; + for(j = 0; j < l.classes; ++j) { + l.delta[class_index+j] = l.class_scale * (net.truth[truth_index+1+j] - l.output[class_index+j]); + *(l.cost) += l.class_scale * pow(net.truth[truth_index+1+j] - l.output[class_index+j], 2); + if(net.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j]; + avg_allcat += l.output[class_index+j]; + } + + box truth = float_to_box(net.truth + truth_index + 1 + l.classes, 1); + truth.x /= l.side; + truth.y /= l.side; + + for(j = 0; j < l.n; ++j){ + int box_index = index + locations*(l.classes + l.n) + (i*l.n + j) * l.coords; + box out = float_to_box(l.output + box_index, 1); + out.x /= l.side; + out.y /= l.side; + + if (l.sqrt){ + out.w = out.w*out.w; + out.h = out.h*out.h; + } + + float iou = box_iou(out, truth); + //iou = 0; + float rmse = box_rmse(out, truth); + if(best_iou > 0 || iou > 0){ + if(iou > best_iou){ + best_iou = iou; + best_index = j; + } + }else{ + if(rmse < best_rmse){ + best_rmse = rmse; + best_index = j; + } + } + } + + if(l.forced){ + if(truth.w*truth.h < .1){ + best_index = 1; + }else{ + best_index = 0; + } + } + if(l.random && *(net.seen) < 64000){ + best_index = rand()%l.n; + } + + int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords; + int tbox_index = truth_index + 1 + l.classes; + + box out = float_to_box(l.output + box_index, 1); + out.x /= l.side; + out.y /= l.side; + if (l.sqrt) { + out.w = out.w*out.w; + out.h = out.h*out.h; + } + float iou = box_iou(out, truth); + + //printf("%d,", best_index); + int p_index = index + locations*l.classes + i*l.n + best_index; + *(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2); + *(l.cost) += l.object_scale * pow(1-l.output[p_index], 2); + avg_obj += l.output[p_index]; + l.delta[p_index] = l.object_scale * (1.-l.output[p_index]); + + if(l.rescore){ + l.delta[p_index] = l.object_scale * (iou - l.output[p_index]); + } + + l.delta[box_index+0] = l.coord_scale*(net.truth[tbox_index + 0] - l.output[box_index + 0]); + l.delta[box_index+1] = l.coord_scale*(net.truth[tbox_index + 1] - l.output[box_index + 1]); + l.delta[box_index+2] = l.coord_scale*(net.truth[tbox_index + 2] - l.output[box_index + 2]); + l.delta[box_index+3] = l.coord_scale*(net.truth[tbox_index + 3] - l.output[box_index + 3]); + if(l.sqrt){ + l.delta[box_index+2] = l.coord_scale*(sqrt(net.truth[tbox_index + 2]) - l.output[box_index + 2]); + l.delta[box_index+3] = l.coord_scale*(sqrt(net.truth[tbox_index + 3]) - l.output[box_index + 3]); + } + + *(l.cost) += pow(1-iou, 2); + avg_iou += iou; + ++count; + } + } + + if(0){ + float *costs = calloc(l.batch*locations*l.n, sizeof(float)); + for (b = 0; b < l.batch; ++b) { + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + costs[b*locations*l.n + i*l.n + j] = l.delta[p_index]*l.delta[p_index]; + } + } + } + int indexes[100]; + top_k(costs, l.batch*locations*l.n, 100, indexes); + float cutoff = costs[indexes[99]]; + for (b = 0; b < l.batch; ++b) { + int index = b*l.inputs; + for (i = 0; i < locations; ++i) { + for (j = 0; j < l.n; ++j) { + int p_index = index + locations*l.classes + i*l.n + j; + if (l.delta[p_index]*l.delta[p_index] < cutoff) l.delta[p_index] = 0; + } + } + } + free(costs); + } + + + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + + + printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count); + //if(l.reorg) reorg(l.delta, l.w*l.h, size*l.n, l.batch, 0); + } +} + +void backward_detection_layer(const detection_layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +void get_detection_detections(layer l, int w, int h, float thresh, detection *dets) +{ + int i,j,n; + float *predictions = l.output; + //int per_cell = 5*num+classes; + for (i = 0; i < l.side*l.side; ++i){ + int row = i / l.side; + int col = i % l.side; + for(n = 0; n < l.n; ++n){ + int index = i*l.n + n; + int p_index = l.side*l.side*l.classes + i*l.n + n; + float scale = predictions[p_index]; + int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n)*4; + box b; + b.x = (predictions[box_index + 0] + col) / l.side * w; + b.y = (predictions[box_index + 1] + row) / l.side * h; + b.w = pow(predictions[box_index + 2], (l.sqrt?2:1)) * w; + b.h = pow(predictions[box_index + 3], (l.sqrt?2:1)) * h; + dets[index].bbox = b; + dets[index].objectness = scale; + for(j = 0; j < l.classes; ++j){ + int class_index = i*l.classes; + float prob = scale*predictions[class_index+j]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } + } +} + +#ifdef GPU + +void forward_detection_layer_gpu(const detection_layer l, network net) +{ + if(!net.train){ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + return; + } + + cuda_pull_array(net.input_gpu, net.input, l.batch*l.inputs); + forward_detection_layer(l, net); + cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +void backward_detection_layer_gpu(detection_layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); + //copy_gpu(l.batch*l.inputs, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/detection_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/detection_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1c818535700c770c7a5d9387534b199b58876198 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/detection_layer.h @@ -0,0 +1,18 @@ +#ifndef DETECTION_LAYER_H +#define DETECTION_LAYER_H + +#include "layer.h" +#include "network.h" + +typedef layer detection_layer; + +detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore); +void forward_detection_layer(const detection_layer l, network net); +void backward_detection_layer(const detection_layer l, network net); + +#ifdef GPU +void forward_detection_layer_gpu(const detection_layer l, network net); +void backward_detection_layer_gpu(detection_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/dropout_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/dropout_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..8fefa22caeddd174b2a7010274fadae854c742c1 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/dropout_layer.c @@ -0,0 +1,60 @@ +#include "dropout_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include +#include + +dropout_layer make_dropout_layer(int batch, int inputs, float probability) +{ + dropout_layer l = {0}; + l.type = DROPOUT; + l.probability = probability; + l.inputs = inputs; + l.outputs = inputs; + l.batch = batch; + l.rand = calloc(inputs*batch, sizeof(float)); + l.scale = 1./(1.-probability); + l.forward = forward_dropout_layer; + l.backward = backward_dropout_layer; + #ifdef GPU + l.forward_gpu = forward_dropout_layer_gpu; + l.backward_gpu = backward_dropout_layer_gpu; + l.rand_gpu = cuda_make_array(l.rand, inputs*batch); + #endif + fprintf(stderr, "dropout p = %.2f %4d -> %4d\n", probability, inputs, inputs); + return l; +} + +void resize_dropout_layer(dropout_layer *l, int inputs) +{ + l->rand = realloc(l->rand, l->inputs*l->batch*sizeof(float)); + #ifdef GPU + cuda_free(l->rand_gpu); + + l->rand_gpu = cuda_make_array(l->rand, inputs*l->batch); + #endif +} + +void forward_dropout_layer(dropout_layer l, network net) +{ + int i; + if (!net.train) return; + for(i = 0; i < l.batch * l.inputs; ++i){ + float r = rand_uniform(0, 1); + l.rand[i] = r; + if(r < l.probability) net.input[i] = 0; + else net.input[i] *= l.scale; + } +} + +void backward_dropout_layer(dropout_layer l, network net) +{ + int i; + if(!net.delta) return; + for(i = 0; i < l.batch * l.inputs; ++i){ + float r = l.rand[i]; + if(r < l.probability) net.delta[i] = 0; + else net.delta[i] *= l.scale; + } +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/dropout_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/dropout_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..01f94d4d7d10b732fb0e558089579e95128a70bd --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/dropout_layer.h @@ -0,0 +1,20 @@ +#ifndef DROPOUT_LAYER_H +#define DROPOUT_LAYER_H + +#include "layer.h" +#include "network.h" + +typedef layer dropout_layer; + +dropout_layer make_dropout_layer(int batch, int inputs, float probability); + +void forward_dropout_layer(dropout_layer l, network net); +void backward_dropout_layer(dropout_layer l, network net); +void resize_dropout_layer(dropout_layer *l, int inputs); + +#ifdef GPU +void forward_dropout_layer_gpu(dropout_layer l, network net); +void backward_dropout_layer_gpu(dropout_layer l, network net); + +#endif +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/dropout_layer_kernels.cu b/workloads/realworld/uvm_prefetch_async/darknet/src/dropout_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..30cd67a34f06e5b398e198f186a2ecefcf6df3dc --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/dropout_layer_kernels.cu @@ -0,0 +1,60 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "dropout_layer.h" +#include "cuda_dark.h" +#include "utils.h" +} + +__global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale; +} + +void forward_dropout_layer_gpu(dropout_layer layer, network net) +{ + if (!net.train) return; + int size = layer.inputs*layer.batch; + cuda_random(layer.rand_gpu, size); + /* + int i; + for(i = 0; i < size; ++i){ + layer.rand[i] = rand_uniform(); + } + cuda_push_array(layer.rand_gpu, layer.rand, size); + */ + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(net.input_gpu, size * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(layer.rand_gpu, size * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + yoloswag420blazeit360noscope<<>>(net.input_gpu, size, layer.rand_gpu, layer.probability, layer.scale); + check_error(cudaPeekAtLastError()); +} + +void backward_dropout_layer_gpu(dropout_layer layer, network net) +{ + if(!net.delta_gpu) return; + int size = layer.inputs*layer.batch; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(net.delta_gpu, size * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(layer.rand_gpu, size * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + yoloswag420blazeit360noscope<<>>(net.delta_gpu, size, layer.rand_gpu, layer.probability, layer.scale); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/gemm.c b/workloads/realworld/uvm_prefetch_async/darknet/src/gemm.c new file mode 100644 index 0000000000000000000000000000000000000000..756ae595d7348fc2d343a48715b05ea882d6aa7c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/gemm.c @@ -0,0 +1,324 @@ +#include "gemm.h" +#include "utils.h" +#include "cuda_dark.h" +#include +#include +#include + +void gemm_bin(int M, int N, int K, float ALPHA, + char *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + char A_PART = A[i*lda+k]; + if(A_PART){ + for(j = 0; j < N; ++j){ + C[i*ldc+j] += B[k*ldb+j]; + } + } else { + for(j = 0; j < N; ++j){ + C[i*ldc+j] -= B[k*ldb+j]; + } + } + } + } +} + +float *random_matrix(int rows, int cols) +{ + int i; + float *m = calloc(rows*cols, sizeof(float)); + for(i = 0; i < rows*cols; ++i){ + m[i] = (float)rand()/RAND_MAX; + } + return m; +} + +void time_random_matrix(int TA, int TB, int m, int k, int n) +{ + float *a; + if(!TA) a = random_matrix(m,k); + else a = random_matrix(k,m); + int lda = (!TA)?k:m; + float *b; + if(!TB) b = random_matrix(k,n); + else b = random_matrix(n,k); + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + int i; + clock_t start = clock(), end; + for(i = 0; i<10; ++i){ + gemm_cpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); + } + end = clock(); + printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf ms\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); + free(a); + free(b); + free(c); +} + + +void gemm(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + gemm_cpu( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); +} + +void gemm_nn(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + register float A_PART = ALPHA*A[i*lda+k]; + for(j = 0; j < N; ++j){ + C[i*ldc+j] += A_PART*B[k*ldb+j]; + } + } + } +} + +void gemm_nt(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + register float sum = 0; + for(k = 0; k < K; ++k){ + sum += ALPHA*A[i*lda+k]*B[j*ldb + k]; + } + C[i*ldc+j] += sum; + } + } +} + +void gemm_tn(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + register float A_PART = ALPHA*A[k*lda+i]; + for(j = 0; j < N; ++j){ + C[i*ldc+j] += A_PART*B[k*ldb+j]; + } + } + } +} + +void gemm_tt(int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float *C, int ldc) +{ + int i,j,k; + #pragma omp parallel for + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + register float sum = 0; + for(k = 0; k < K; ++k){ + sum += ALPHA*A[i+k*lda]*B[k+j*ldb]; + } + C[i*ldc+j] += sum; + } + } +} + + +void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + //printf("cpu: %d %d %d %d %d %f %d %d %f %d\n",TA, TB, M, N, K, ALPHA, lda, ldb, BETA, ldc); + int i, j; + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + C[i*ldc + j] *= BETA; + } + } + if(!TA && !TB) + gemm_nn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else if(TA && !TB) + gemm_tn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else if(!TA && TB) + gemm_nt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); + else + gemm_tt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); +} + + +// #ifdef GPU + +// #include + +// void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, +// float *A_gpu, int lda, +// float *B_gpu, int ldb, +// float BETA, +// float *C_gpu, int ldc) +// { +// cublasHandle_t handle = blas_handle(); +// cudaError_t status = (cudaError_t) cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N), +// (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc); +// check_error(status); +// } + +// #include +// #include +// #include +// #include + +// void time_gpu_random_matrix(int TA, int TB, int m, int k, int n) +// { +// float *a; +// if(!TA) a = random_matrix(m,k); +// else a = random_matrix(k,m); +// int lda = (!TA)?k:m; +// float *b; +// if(!TB) b = random_matrix(k,n); +// else b = random_matrix(n,k); +// int ldb = (!TB)?n:k; + +// float *c = random_matrix(m,n); +// int i; +// clock_t start = clock(), end; +// for(i = 0; i<32; ++i){ +// gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); +// } +// end = clock(); +// printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); +// free(a); +// free(b); +// free(c); +// } + +// void time_gpu(int TA, int TB, int m, int k, int n) +// { +// int iter = 10; +// float *a = random_matrix(m,k); +// float *b = random_matrix(k,n); + +// int lda = (!TA)?k:m; +// int ldb = (!TB)?n:k; + +// float *c = random_matrix(m,n); + +// float *a_cl = cuda_make_array(a, m*k); +// float *b_cl = cuda_make_array(b, k*n); +// float *c_cl = cuda_make_array(c, m*n); + +// int i; +// clock_t start = clock(), end; +// for(i = 0; i +#include +#include +#include +#include + +#include "gemm.h" +#include "utils.h" +#include "cuda_dark.h" + +#include +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK_X 16 +#define DIM_THREAD_BLOCK_Y 16 + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday(&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +__global__ void gemm_kernel(float *a, float *b, float *c, int M, int K, int N, float alpha, float beta) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + // Compute each thread's global row and column index + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + + // Statically allocated shared memory + __shared__ float s_a[PREFETCH_COUNT * DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + __shared__ float s_b[PREFETCH_COUNT * DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y]; + // Accumulate in temporary variable + + if (row < M && col < N) + { + float tmp = beta * c[row * N + col]; + + int base_tiles = 0; + int end_tile = base_tiles + (K + blockDim.x - 1) / blockDim.x; + + int fetch = base_tiles; + int tile_size = DIM_THREAD_BLOCK_X; + int mem_size = DIM_THREAD_BLOCK_X * DIM_THREAD_BLOCK_Y; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + if ((fetch * tile_size + threadIdx.x) < K) + memcpy_async(s_a[(fetch % PREFETCH_COUNT) * mem_size + (threadIdx.y * blockDim.x + threadIdx.x)], a[row * K + fetch * tile_size + threadIdx.x], pipe); + if ((fetch * tile_size + threadIdx.y) < K) + memcpy_async(s_b[(fetch % PREFETCH_COUNT) * mem_size + (threadIdx.y * blockDim.x + threadIdx.x)], b[(fetch * tile_size + threadIdx.y) * N + col], pipe); + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + int left = K - compute * blockDim.x; + for (int k = 0; k < blockDim.x && k < left; k++) + { + tmp += alpha * s_a[(compute % PREFETCH_COUNT) * mem_size + threadIdx.y * blockDim.x + k] * s_b[(compute % PREFETCH_COUNT) * mem_size + k * blockDim.x + threadIdx.x]; + } + block.sync(); + } + c[row * N + col] = tmp; + } + block.sync(); +} + +void gemmCuda(float *A, float *B, float *C, int M, int N, int K, float alpha, float beta) +{ + dim3 block(DIM_THREAD_BLOCK_X, DIM_THREAD_BLOCK_Y); + dim3 grid((size_t)(ceil(((float)N) / ((float)block.y))), (size_t)(ceil(((float)M) / ((float)block.x)))); + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(A, M * K * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(B, K * N * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(C, M * N * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + gemm_kernel<<>>(A, B, C, M, K, N, alpha, beta); + check_error(cudaPeekAtLastError()); +} + +void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, + float *A_gpu, int lda, + float *B_gpu, int ldb, + float BETA, + float *C_gpu, int ldc) +{ + // printf("TA is %d, TB is %d, M is %d, N is %d, K is %d, lda is %d, ldb is %d, ldc is %d.\n", TA, TB, M, N, K, lda, ldb, ldc); + if (TA == 0 && TB == 0) { + gemmCuda(A_gpu, B_gpu, C_gpu, M, N, K, ALPHA, BETA); + } else { + cublasHandle_t handle = blas_handle(); + cudaError_t status = (cudaError_t) cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N), + (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc); + check_error(status); + } +} + + +void time_gpu_random_matrix(int TA, int TB, int m, int k, int n) +{ + float *a; + if(!TA) a = random_matrix(m,k); + else a = random_matrix(k,m); + int lda = (!TA)?k:m; + float *b; + if(!TB) b = random_matrix(k,n); + else b = random_matrix(n,k); + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + int i; + clock_t start = clock(), end; + for(i = 0; i<32; ++i){ + gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); + } + end = clock(); + printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); + free(a); + free(b); + free(c); +} + +void time_gpu(int TA, int TB, int m, int k, int n) +{ + int iter = 10; + float *a = random_matrix(m,k); + float *b = random_matrix(k,n); + + int lda = (!TA)?k:m; + int ldb = (!TB)?n:k; + + float *c = random_matrix(m,n); + + float *a_cl = cuda_make_array(a, m*k); + float *b_cl = cuda_make_array(b, k*n); + float *c_cl = cuda_make_array(c, m*n); + + int i; + clock_t start = clock(), end; + for(i = 0; i +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam) +{ + fprintf(stderr, "GRU Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = GRU; + l.steps = steps; + l.inputs = inputs; + + l.uz = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uz) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uz->batch = batch; + + l.wz = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wz) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wz->batch = batch; + + l.ur = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ur) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ur->batch = batch; + + l.wr = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wr) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wr->batch = batch; + + + + l.uh = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uh) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uh->batch = batch; + + l.wh = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wh) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wh->batch = batch; + + l.batch_normalize = batch_normalize; + + + l.outputs = outputs; + l.output = calloc(outputs*batch*steps, sizeof(float)); + l.delta = calloc(outputs*batch*steps, sizeof(float)); + l.state = calloc(outputs*batch, sizeof(float)); + l.prev_state = calloc(outputs*batch, sizeof(float)); + l.forgot_state = calloc(outputs*batch, sizeof(float)); + l.forgot_delta = calloc(outputs*batch, sizeof(float)); + + l.r_cpu = calloc(outputs*batch, sizeof(float)); + l.z_cpu = calloc(outputs*batch, sizeof(float)); + l.h_cpu = calloc(outputs*batch, sizeof(float)); + + l.forward = forward_gru_layer; + l.backward = backward_gru_layer; + l.update = update_gru_layer; + +#ifdef GPU + l.forward_gpu = forward_gru_layer_gpu; + l.backward_gpu = backward_gru_layer_gpu; + l.update_gpu = update_gru_layer_gpu; + + l.forgot_state_gpu = cuda_make_array(0, batch*outputs); + l.forgot_delta_gpu = cuda_make_array(0, batch*outputs); + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.state_gpu = cuda_make_array(0, batch*outputs); + l.output_gpu = cuda_make_array(0, batch*outputs*steps); + l.delta_gpu = cuda_make_array(0, batch*outputs*steps); + l.r_gpu = cuda_make_array(0, batch*outputs); + l.z_gpu = cuda_make_array(0, batch*outputs); + l.h_gpu = cuda_make_array(0, batch*outputs); + +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.uz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uz->out_c, l.uz->out_h, l.uz->out_w); + cudnnSetTensor4dDescriptor(l.uh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uh->out_c, l.uh->out_h, l.uh->out_w); + cudnnSetTensor4dDescriptor(l.ur->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ur->out_c, l.ur->out_h, l.ur->out_w); + cudnnSetTensor4dDescriptor(l.wz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wz->out_c, l.wz->out_h, l.wz->out_w); + cudnnSetTensor4dDescriptor(l.wh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wh->out_c, l.wh->out_h, l.wh->out_w); + cudnnSetTensor4dDescriptor(l.wr->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wr->out_c, l.wr->out_h, l.wr->out_w); +#endif +#endif + + return l; +} + +void update_gru_layer(layer l, update_args a) +{ + update_connected_layer(*(l.ur), a); + update_connected_layer(*(l.uz), a); + update_connected_layer(*(l.uh), a); + update_connected_layer(*(l.wr), a); + update_connected_layer(*(l.wz), a); + update_connected_layer(*(l.wh), a); +} + +void forward_gru_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + fill_cpu(l.outputs * l.batch * l.steps, 0, uz.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ur.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, uh.delta, 1); + + fill_cpu(l.outputs * l.batch * l.steps, 0, wz.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wr.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wh.delta, 1); + if(net.train) { + fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); + copy_cpu(l.outputs*l.batch, l.state, 1, l.prev_state, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input = l.state; + forward_connected_layer(wz, s); + forward_connected_layer(wr, s); + + s.input = net.input; + forward_connected_layer(uz, s); + forward_connected_layer(ur, s); + forward_connected_layer(uh, s); + + + copy_cpu(l.outputs*l.batch, uz.output, 1, l.z_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wz.output, 1, l.z_cpu, 1); + + copy_cpu(l.outputs*l.batch, ur.output, 1, l.r_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wr.output, 1, l.r_cpu, 1); + + activate_array(l.z_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.r_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.state, 1, l.forgot_state, 1); + mul_cpu(l.outputs*l.batch, l.r_cpu, 1, l.forgot_state, 1); + + s.input = l.forgot_state; + forward_connected_layer(wh, s); + + copy_cpu(l.outputs*l.batch, uh.output, 1, l.h_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, wh.output, 1, l.h_cpu, 1); + + if(l.tanh){ + activate_array(l.h_cpu, l.outputs*l.batch, TANH); + } else { + activate_array(l.h_cpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_sum_cpu(l.state, l.h_cpu, l.z_cpu, l.outputs*l.batch, l.output); + + copy_cpu(l.outputs*l.batch, l.output, 1, l.state, 1); + + net.input += l.inputs*l.batch; + l.output += l.outputs*l.batch; + increment_layer(&uz, 1); + increment_layer(&ur, 1); + increment_layer(&uh, 1); + + increment_layer(&wz, 1); + increment_layer(&wr, 1); + increment_layer(&wh, 1); + } +} + +void backward_gru_layer(layer l, network net) +{ +} + +#ifdef GPU + +void pull_gru_layer(layer l) +{ +} + +void push_gru_layer(layer l) +{ +} + +void update_gru_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.ur), a); + update_connected_layer_gpu(*(l.uz), a); + update_connected_layer_gpu(*(l.uh), a); + update_connected_layer_gpu(*(l.wr), a); + update_connected_layer_gpu(*(l.wz), a); + update_connected_layer_gpu(*(l.wh), a); +} + +void forward_gru_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + fill_gpu(l.outputs * l.batch * l.steps, 0, uz.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ur.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, uh.delta_gpu, 1); + + fill_gpu(l.outputs * l.batch * l.steps, 0, wz.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wr.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wh.delta_gpu, 1); + if(net.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.prev_state_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(wz, s); + forward_connected_layer_gpu(wr, s); + + s.input_gpu = net.input_gpu; + forward_connected_layer_gpu(uz, s); + forward_connected_layer_gpu(ur, s); + forward_connected_layer_gpu(uh, s); + + copy_gpu(l.outputs*l.batch, uz.output_gpu, 1, l.z_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wz.output_gpu, 1, l.z_gpu, 1); + + copy_gpu(l.outputs*l.batch, ur.output_gpu, 1, l.r_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wr.output_gpu, 1, l.r_gpu, 1); + + activate_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.forgot_state_gpu, 1); + mul_gpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1); + + s.input_gpu = l.forgot_state_gpu; + forward_connected_layer_gpu(wh, s); + + copy_gpu(l.outputs*l.batch, uh.output_gpu, 1, l.h_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wh.output_gpu, 1, l.h_gpu, 1); + + if(l.tanh){ + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + } else { + activate_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_sum_gpu(l.state_gpu, l.h_gpu, l.z_gpu, l.outputs*l.batch, l.output_gpu); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.state_gpu, 1); + + net.input_gpu += l.inputs*l.batch; + l.output_gpu += l.outputs*l.batch; + increment_layer(&uz, 1); + increment_layer(&ur, 1); + increment_layer(&uh, 1); + + increment_layer(&wz, 1); + increment_layer(&wr, 1); + increment_layer(&wh, 1); + } +} + +void backward_gru_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer uz = *(l.uz); + layer ur = *(l.ur); + layer uh = *(l.uh); + + layer wz = *(l.wz); + layer wr = *(l.wr); + layer wh = *(l.wh); + + increment_layer(&uz, l.steps - 1); + increment_layer(&ur, l.steps - 1); + increment_layer(&uh, l.steps - 1); + + increment_layer(&wz, l.steps - 1); + increment_layer(&wr, l.steps - 1); + increment_layer(&wh, l.steps - 1); + + net.input_gpu += l.inputs*l.batch*(l.steps-1); + if(net.delta_gpu) net.delta_gpu += l.inputs*l.batch*(l.steps-1); + l.output_gpu += l.outputs*l.batch*(l.steps-1); + l.delta_gpu += l.outputs*l.batch*(l.steps-1); + float *end_state = l.output_gpu; + for (i = l.steps-1; i >= 0; --i) { + if(i != 0) copy_gpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + else copy_gpu(l.outputs*l.batch, l.prev_state_gpu, 1, l.state_gpu, 1); + float *prev_delta_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch; + + copy_gpu(l.outputs*l.batch, uz.output_gpu, 1, l.z_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wz.output_gpu, 1, l.z_gpu, 1); + + copy_gpu(l.outputs*l.batch, ur.output_gpu, 1, l.r_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wr.output_gpu, 1, l.r_gpu, 1); + + activate_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, uh.output_gpu, 1, l.h_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, wh.output_gpu, 1, l.h_gpu, 1); + + if(l.tanh){ + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + } else { + activate_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC); + } + + weighted_delta_gpu(l.state_gpu, l.h_gpu, l.z_gpu, prev_delta_gpu, uh.delta_gpu, uz.delta_gpu, l.outputs*l.batch, l.delta_gpu); + + if(l.tanh){ + gradient_array_gpu(l.h_gpu, l.outputs*l.batch, TANH, uh.delta_gpu); + } else { + gradient_array_gpu(l.h_gpu, l.outputs*l.batch, LOGISTIC, uh.delta_gpu); + } + + copy_gpu(l.outputs*l.batch, uh.delta_gpu, 1, wh.delta_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.forgot_state_gpu, 1); + mul_gpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1); + fill_gpu(l.outputs*l.batch, 0, l.forgot_delta_gpu, 1); + + s.input_gpu = l.forgot_state_gpu; + s.delta_gpu = l.forgot_delta_gpu; + + backward_connected_layer_gpu(wh, s); + if(prev_delta_gpu) mult_add_into_gpu(l.outputs*l.batch, l.forgot_delta_gpu, l.r_gpu, prev_delta_gpu); + mult_add_into_gpu(l.outputs*l.batch, l.forgot_delta_gpu, l.state_gpu, ur.delta_gpu); + + gradient_array_gpu(l.r_gpu, l.outputs*l.batch, LOGISTIC, ur.delta_gpu); + copy_gpu(l.outputs*l.batch, ur.delta_gpu, 1, wr.delta_gpu, 1); + + gradient_array_gpu(l.z_gpu, l.outputs*l.batch, LOGISTIC, uz.delta_gpu); + copy_gpu(l.outputs*l.batch, uz.delta_gpu, 1, wz.delta_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = prev_delta_gpu; + + backward_connected_layer_gpu(wr, s); + backward_connected_layer_gpu(wz, s); + + s.input_gpu = net.input_gpu; + s.delta_gpu = net.delta_gpu; + + backward_connected_layer_gpu(uh, s); + backward_connected_layer_gpu(ur, s); + backward_connected_layer_gpu(uz, s); + + + net.input_gpu -= l.inputs*l.batch; + if(net.delta_gpu) net.delta_gpu -= l.inputs*l.batch; + l.output_gpu -= l.outputs*l.batch; + l.delta_gpu -= l.outputs*l.batch; + increment_layer(&uz, -1); + increment_layer(&ur, -1); + increment_layer(&uh, -1); + + increment_layer(&wz, -1); + increment_layer(&wr, -1); + increment_layer(&wh, -1); + } + copy_gpu(l.outputs*l.batch, end_state, 1, l.state_gpu, 1); +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/gru_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/gru_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9067942e9499d53c8d54f7728d64a5030200f4de --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/gru_layer.h @@ -0,0 +1,24 @@ + +#ifndef GRU_LAYER_H +#define GRU_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" + +layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam); + +void forward_gru_layer(layer l, network state); +void backward_gru_layer(layer l, network state); +void update_gru_layer(layer l, update_args a); + +#ifdef GPU +void forward_gru_layer_gpu(layer l, network state); +void backward_gru_layer_gpu(layer l, network state); +void update_gru_layer_gpu(layer l, update_args a); +void push_gru_layer(layer l); +void pull_gru_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/im2col.c b/workloads/realworld/uvm_prefetch_async/darknet/src/im2col.c new file mode 100644 index 0000000000000000000000000000000000000000..69ec98a9d12b2e21a3859611ad709d62fc80dcf3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/im2col.c @@ -0,0 +1,40 @@ +#include "im2col.h" +#include +float im2col_get_pixel(float *im, int height, int width, int channels, + int row, int col, int channel, int pad) +{ + row -= pad; + col -= pad; + + if (row < 0 || col < 0 || + row >= height || col >= width) return 0; + return im[col + width*(row + height*channel)]; +} + +//From Berkeley Vision's Caffe! +//https://github.com/BVLC/caffe/blob/master/LICENSE +void im2col_cpu(float* data_im, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_col) +{ + int c,h,w; + int height_col = (height + 2*pad - ksize) / stride + 1; + int width_col = (width + 2*pad - ksize) / stride + 1; + + int channels_col = channels * ksize * ksize; + for (c = 0; c < channels_col; ++c) { + int w_offset = c % ksize; + int h_offset = (c / ksize) % ksize; + int c_im = c / ksize / ksize; + for (h = 0; h < height_col; ++h) { + for (w = 0; w < width_col; ++w) { + int im_row = h_offset + h * stride; + int im_col = w_offset + w * stride; + int col_index = (c * height_col + h) * width_col + w; + data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, + im_row, im_col, c_im, pad); + } + } + } +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/im2col.h b/workloads/realworld/uvm_prefetch_async/darknet/src/im2col.h new file mode 100644 index 0000000000000000000000000000000000000000..02c4247fad9b8428a8e89fc8caec0b5b6ba5b36a --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/im2col.h @@ -0,0 +1,15 @@ +#ifndef IM2COL_H +#define IM2COL_H + +void im2col_cpu(float* data_im, + int channels, int height, int width, + int ksize, int stride, int pad, float* data_col); + +#ifdef GPU + +void im2col_gpu(float *im, + int channels, int height, int width, + int ksize, int stride, int pad,float *data_col); + +#endif +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/im2col_kernels.cu b/workloads/realworld/uvm_prefetch_async/darknet/src/im2col_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..660806208adf57bac0afe8b026de3e97e57cd250 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/im2col_kernels.cu @@ -0,0 +1,62 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "im2col.h" +#include "cuda_dark.h" +} + +// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu +// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE + +__global__ void im2col_gpu_kernel(const int n, const float* data_im, + const int height, const int width, const int ksize, + const int pad, + const int stride, + const int height_col, const int width_col, + float *data_col) { + int index = blockIdx.x*blockDim.x+threadIdx.x; + for(; index < n; index += blockDim.x*gridDim.x){ + int w_out = index % width_col; + int h_index = index / width_col; + int h_out = h_index % height_col; + int channel_in = h_index / height_col; + int channel_out = channel_in * ksize * ksize; + int h_in = h_out * stride - pad; + int w_in = w_out * stride - pad; + float* data_col_ptr = data_col; + data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out; + const float* data_im_ptr = data_im; + data_im_ptr += (channel_in * height + h_in) * width + w_in; + for (int i = 0; i < ksize; ++i) { + for (int j = 0; j < ksize; ++j) { + int h = h_in + i; + int w = w_in + j; + + *data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ? + data_im_ptr[i * width + j] : 0; + + //*data_col_ptr = data_im_ptr[ii * width + jj]; + + data_col_ptr += height_col * width_col; + } + } + } +} + +void im2col_gpu(float *im, + int channels, int height, int width, + int ksize, int stride, int pad, float *data_col){ + // We are going to launch channels * height_col * width_col kernels, each + // kernel responsible for copying a single-channel grid. + int height_col = (height + 2 * pad - ksize) / stride + 1; + int width_col = (width + 2 * pad - ksize) / stride + 1; + int num_kernels = channels * height_col * width_col; + im2col_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, + BLOCK>>>( + num_kernels, im, height, width, ksize, pad, + stride, height_col, + width_col, data_col); + check_error(cudaPeekAtLastError()); +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/image.c b/workloads/realworld/uvm_prefetch_async/darknet/src/image.c new file mode 100644 index 0000000000000000000000000000000000000000..3edf6d1045f4637d7bb108440302fd36d5ef9a18 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/image.c @@ -0,0 +1,1467 @@ +#include "image.h" +#include "utils.h" +#include "blas.h" +#include "cuda_dark.h" +#include +#include + +#define STB_IMAGE_IMPLEMENTATION +#include "stb_image.h" +#define STB_IMAGE_WRITE_IMPLEMENTATION +#include "stb_image_write.h" + +int windows = 0; + +float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} }; + +float get_color(int c, int x, int max) +{ + float ratio = ((float)x/max)*5; + int i = floor(ratio); + int j = ceil(ratio); + ratio -= i; + float r = (1-ratio) * colors[i][c] + ratio*colors[j][c]; + //printf("%f\n", r); + return r; +} + +image mask_to_rgb(image mask) +{ + int n = mask.c; + image im = make_image(mask.w, mask.h, 3); + int i, j; + for(j = 0; j < n; ++j){ + int offset = j*123457 % n; + float red = get_color(2,offset,n); + float green = get_color(1,offset,n); + float blue = get_color(0,offset,n); + for(i = 0; i < im.w*im.h; ++i){ + im.data[i + 0*im.w*im.h] += mask.data[j*im.h*im.w + i]*red; + im.data[i + 1*im.w*im.h] += mask.data[j*im.h*im.w + i]*green; + im.data[i + 2*im.w*im.h] += mask.data[j*im.h*im.w + i]*blue; + } + } + return im; +} + +static float get_pixel(image m, int x, int y, int c) +{ + assert(x < m.w && y < m.h && c < m.c); + return m.data[c*m.h*m.w + y*m.w + x]; +} +static float get_pixel_extend(image m, int x, int y, int c) +{ + if(x < 0 || x >= m.w || y < 0 || y >= m.h) return 0; + /* + if(x < 0) x = 0; + if(x >= m.w) x = m.w-1; + if(y < 0) y = 0; + if(y >= m.h) y = m.h-1; + */ + if(c < 0 || c >= m.c) return 0; + return get_pixel(m, x, y, c); +} +static void set_pixel(image m, int x, int y, int c, float val) +{ + if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return; + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] = val; +} +static void add_pixel(image m, int x, int y, int c, float val) +{ + assert(x < m.w && y < m.h && c < m.c); + m.data[c*m.h*m.w + y*m.w + x] += val; +} + +static float bilinear_interpolate(image im, float x, float y, int c) +{ + int ix = (int) floorf(x); + int iy = (int) floorf(y); + + float dx = x - ix; + float dy = y - iy; + + float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) + + dy * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + + (1-dy) * dx * get_pixel_extend(im, ix+1, iy, c) + + dy * dx * get_pixel_extend(im, ix+1, iy+1, c); + return val; +} + + +void composite_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float val = get_pixel(source, x, y, k); + float val2 = get_pixel_extend(dest, dx+x, dy+y, k); + set_pixel(dest, dx+x, dy+y, k, val * val2); + } + } + } +} + +image border_image(image a, int border) +{ + image b = make_image(a.w + 2*border, a.h + 2*border, a.c); + int x,y,k; + for(k = 0; k < b.c; ++k){ + for(y = 0; y < b.h; ++y){ + for(x = 0; x < b.w; ++x){ + float val = get_pixel_extend(a, x - border, y - border, k); + if(x - border < 0 || x - border >= a.w || y - border < 0 || y - border >= a.h) val = 1; + set_pixel(b, x, y, k, val); + } + } + } + return b; +} + +image tile_images(image a, image b, int dx) +{ + if(a.w == 0) return copy_image(b); + image c = make_image(a.w + b.w + dx, (a.h > b.h) ? a.h : b.h, (a.c > b.c) ? a.c : b.c); + fill_cpu(c.w*c.h*c.c, 1, c.data, 1); + embed_image(a, c, 0, 0); + composite_image(b, c, a.w + dx, 0); + return c; +} + +image get_label(image **characters, char *string, int size) +{ + size = size/10; + if(size > 7) size = 7; + image label = make_empty_image(0,0,0); + while(*string){ + image l = characters[size][(int)*string]; + image n = tile_images(label, l, -size - 1 + (size+1)/2); + free_image(label); + label = n; + ++string; + } + image b = border_image(label, label.h*.25); + free_image(label); + return b; +} + +void draw_label(image a, int r, int c, image label, const float *rgb) +{ + int w = label.w; + int h = label.h; + if (r - h >= 0) r = r - h; + + int i, j, k; + for(j = 0; j < h && j + r < a.h; ++j){ + for(i = 0; i < w && i + c < a.w; ++i){ + for(k = 0; k < label.c; ++k){ + float val = get_pixel(label, i, j, k); + set_pixel(a, i+c, j+r, k, rgb[k] * val); + } + } + } +} + +void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b) +{ + //normalize_image(a); + int i; + if(x1 < 0) x1 = 0; + if(x1 >= a.w) x1 = a.w-1; + if(x2 < 0) x2 = 0; + if(x2 >= a.w) x2 = a.w-1; + + if(y1 < 0) y1 = 0; + if(y1 >= a.h) y1 = a.h-1; + if(y2 < 0) y2 = 0; + if(y2 >= a.h) y2 = a.h-1; + + for(i = x1; i <= x2; ++i){ + a.data[i + y1*a.w + 0*a.w*a.h] = r; + a.data[i + y2*a.w + 0*a.w*a.h] = r; + + a.data[i + y1*a.w + 1*a.w*a.h] = g; + a.data[i + y2*a.w + 1*a.w*a.h] = g; + + a.data[i + y1*a.w + 2*a.w*a.h] = b; + a.data[i + y2*a.w + 2*a.w*a.h] = b; + } + for(i = y1; i <= y2; ++i){ + a.data[x1 + i*a.w + 0*a.w*a.h] = r; + a.data[x2 + i*a.w + 0*a.w*a.h] = r; + + a.data[x1 + i*a.w + 1*a.w*a.h] = g; + a.data[x2 + i*a.w + 1*a.w*a.h] = g; + + a.data[x1 + i*a.w + 2*a.w*a.h] = b; + a.data[x2 + i*a.w + 2*a.w*a.h] = b; + } +} + +void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b) +{ + int i; + for(i = 0; i < w; ++i){ + draw_box(a, x1+i, y1+i, x2-i, y2-i, r, g, b); + } +} + +void draw_bbox(image a, box bbox, int w, float r, float g, float b) +{ + int left = (bbox.x-bbox.w/2)*a.w; + int right = (bbox.x+bbox.w/2)*a.w; + int top = (bbox.y-bbox.h/2)*a.h; + int bot = (bbox.y+bbox.h/2)*a.h; + + int i; + for(i = 0; i < w; ++i){ + draw_box(a, left+i, top+i, right-i, bot-i, r, g, b); + } +} + +image **load_alphabet() +{ + char *value = getenv("UVMAsyncBench_BASE"); + int i, j; + const int nsize = 8; + image **alphabets = calloc(nsize, sizeof(image)); + for(j = 0; j < nsize; ++j){ + alphabets[j] = calloc(128, sizeof(image)); + for(i = 32; i < 127; ++i){ + char buff[256]; + sprintf(buff, "%s/workloads/realworld/standard/darknet/data/labels/%d_%d.png", value, i, j); + alphabets[j][i] = load_image_color(buff, 0, 0); + } + } + return alphabets; +} + +void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes) +{ + int i,j; + + for(i = 0; i < num; ++i){ + char labelstr[4096] = {0}; + int class = -1; + for(j = 0; j < classes; ++j){ + if (dets[i].prob[j] > thresh){ + if (class < 0) { + strcat(labelstr, names[j]); + class = j; + } else { + strcat(labelstr, ", "); + strcat(labelstr, names[j]); + } + printf("%s: %.0f%%\n", names[j], dets[i].prob[j]*100); + } + } + if(class >= 0){ + int width = im.h * .006; + + /* + if(0){ + width = pow(prob, 1./2.)*10+1; + alphabet = 0; + } + */ + + //printf("%d %s: %.0f%%\n", i, names[class], prob*100); + int offset = class*123457 % classes; + float red = get_color(2,offset,classes); + float green = get_color(1,offset,classes); + float blue = get_color(0,offset,classes); + float rgb[3]; + + //width = prob*20+2; + + rgb[0] = red; + rgb[1] = green; + rgb[2] = blue; + box b = dets[i].bbox; + //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); + + int left = (b.x-b.w/2.)*im.w; + int right = (b.x+b.w/2.)*im.w; + int top = (b.y-b.h/2.)*im.h; + int bot = (b.y+b.h/2.)*im.h; + + if(left < 0) left = 0; + if(right > im.w-1) right = im.w-1; + if(top < 0) top = 0; + if(bot > im.h-1) bot = im.h-1; + + draw_box_width(im, left, top, right, bot, width, red, green, blue); + if (alphabet) { + image label = get_label(alphabet, labelstr, (im.h*.03)); + draw_label(im, top + width, left, label, rgb); + free_image(label); + } + if (dets[i].mask){ + image mask = float_to_image(14, 14, 1, dets[i].mask); + image resized_mask = resize_image(mask, b.w*im.w, b.h*im.h); + image tmask = threshold_image(resized_mask, .5); + embed_image(tmask, im, left, top); + free_image(mask); + free_image(resized_mask); + free_image(tmask); + } + } + } +} + +void transpose_image(image im) +{ + assert(im.w == im.h); + int n, m; + int c; + for(c = 0; c < im.c; ++c){ + for(n = 0; n < im.w-1; ++n){ + for(m = n + 1; m < im.w; ++m){ + float swap = im.data[m + im.w*(n + im.h*c)]; + im.data[m + im.w*(n + im.h*c)] = im.data[n + im.w*(m + im.h*c)]; + im.data[n + im.w*(m + im.h*c)] = swap; + } + } + } +} + +void rotate_image_cw(image im, int times) +{ + assert(im.w == im.h); + times = (times + 400) % 4; + int i, x, y, c; + int n = im.w; + for(i = 0; i < times; ++i){ + for(c = 0; c < im.c; ++c){ + for(x = 0; x < n/2; ++x){ + for(y = 0; y < (n-1)/2 + 1; ++y){ + float temp = im.data[y + im.w*(x + im.h*c)]; + im.data[y + im.w*(x + im.h*c)] = im.data[n-1-x + im.w*(y + im.h*c)]; + im.data[n-1-x + im.w*(y + im.h*c)] = im.data[n-1-y + im.w*(n-1-x + im.h*c)]; + im.data[n-1-y + im.w*(n-1-x + im.h*c)] = im.data[x + im.w*(n-1-y + im.h*c)]; + im.data[x + im.w*(n-1-y + im.h*c)] = temp; + } + } + } + } +} + +void flip_image(image a) +{ + int i,j,k; + for(k = 0; k < a.c; ++k){ + for(i = 0; i < a.h; ++i){ + for(j = 0; j < a.w/2; ++j){ + int index = j + a.w*(i + a.h*(k)); + int flip = (a.w - j - 1) + a.w*(i + a.h*(k)); + float swap = a.data[flip]; + a.data[flip] = a.data[index]; + a.data[index] = swap; + } + } + } +} + +image image_distance(image a, image b) +{ + int i,j; + image dist = make_image(a.w, a.h, 1); + for(i = 0; i < a.c; ++i){ + for(j = 0; j < a.h*a.w; ++j){ + dist.data[j] += pow(a.data[i*a.h*a.w+j]-b.data[i*a.h*a.w+j],2); + } + } + for(j = 0; j < a.h*a.w; ++j){ + dist.data[j] = sqrt(dist.data[j]); + } + return dist; +} + +void ghost_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + float max_dist = sqrt((-source.w/2. + .5)*(-source.w/2. + .5)); + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float dist = sqrt((x - source.w/2. + .5)*(x - source.w/2. + .5) + (y - source.h/2. + .5)*(y - source.h/2. + .5)); + float alpha = (1 - dist/max_dist); + if(alpha < 0) alpha = 0; + float v1 = get_pixel(source, x,y,k); + float v2 = get_pixel(dest, dx+x,dy+y,k); + float val = alpha*v1 + (1-alpha)*v2; + set_pixel(dest, dx+x, dy+y, k, val); + } + } + } +} + +void blocky_image(image im, int s) +{ + int i,j,k; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)]; + } + } + } +} + +void censor_image(image im, int dx, int dy, int w, int h) +{ + int i,j,k; + int s = 32; + if(dx < 0) dx = 0; + if(dy < 0) dy = 0; + + for(k = 0; k < im.c; ++k){ + for(j = dy; j < dy + h && j < im.h; ++j){ + for(i = dx; i < dx + w && i < im.w; ++i){ + im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)]; + //im.data[i + j*im.w + k*im.w*im.h] = 0; + } + } + } +} + +void embed_image(image source, image dest, int dx, int dy) +{ + int x,y,k; + for(k = 0; k < source.c; ++k){ + for(y = 0; y < source.h; ++y){ + for(x = 0; x < source.w; ++x){ + float val = get_pixel(source, x,y,k); + set_pixel(dest, dx+x, dy+y, k, val); + } + } + } +} + +image collapse_image_layers(image source, int border) +{ + int h = source.h; + h = (h+border)*source.c - border; + image dest = make_image(source.w, h, 1); + int i; + for(i = 0; i < source.c; ++i){ + image layer = get_image_layer(source, i); + int h_offset = i*(source.h+border); + embed_image(layer, dest, 0, h_offset); + free_image(layer); + } + return dest; +} + +void constrain_image(image im) +{ + int i; + for(i = 0; i < im.w*im.h*im.c; ++i){ + if(im.data[i] < 0) im.data[i] = 0; + if(im.data[i] > 1) im.data[i] = 1; + } +} + +void normalize_image(image p) +{ + int i; + float min = 9999999; + float max = -999999; + + for(i = 0; i < p.h*p.w*p.c; ++i){ + float v = p.data[i]; + if(v < min) min = v; + if(v > max) max = v; + } + if(max - min < .000000001){ + min = 0; + max = 1; + } + for(i = 0; i < p.c*p.w*p.h; ++i){ + p.data[i] = (p.data[i] - min)/(max-min); + } +} + +void normalize_image2(image p) +{ + float *min = calloc(p.c, sizeof(float)); + float *max = calloc(p.c, sizeof(float)); + int i,j; + for(i = 0; i < p.c; ++i) min[i] = max[i] = p.data[i*p.h*p.w]; + + for(j = 0; j < p.c; ++j){ + for(i = 0; i < p.h*p.w; ++i){ + float v = p.data[i+j*p.h*p.w]; + if(v < min[j]) min[j] = v; + if(v > max[j]) max[j] = v; + } + } + for(i = 0; i < p.c; ++i){ + if(max[i] - min[i] < .000000001){ + min[i] = 0; + max[i] = 1; + } + } + for(j = 0; j < p.c; ++j){ + for(i = 0; i < p.w*p.h; ++i){ + p.data[i+j*p.h*p.w] = (p.data[i+j*p.h*p.w] - min[j])/(max[j]-min[j]); + } + } + free(min); + free(max); +} + +void copy_image_into(image src, image dest) +{ + memcpy(dest.data, src.data, src.h*src.w*src.c*sizeof(float)); +} + +image copy_image(image p) +{ + image copy = p; + copy.data = calloc(p.h*p.w*p.c, sizeof(float)); + memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float)); + return copy; +} + +void rgbgr_image(image im) +{ + int i; + for(i = 0; i < im.w*im.h; ++i){ + float swap = im.data[i]; + im.data[i] = im.data[i+im.w*im.h*2]; + im.data[i+im.w*im.h*2] = swap; + } +} + +int show_image(image p, const char *name, int ms) +{ +#ifdef OPENCV + int c = show_image_cv(p, name, ms); + return c; +#else + fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name); + save_image(p, name); + return -1; +#endif +} + +void save_image_options(image im, const char *name, IMTYPE f, int quality) +{ + char buff[256]; + //sprintf(buff, "%s (%d)", name, windows); + if(f == PNG) sprintf(buff, "%s.png", name); + else if (f == BMP) sprintf(buff, "%s.bmp", name); + else if (f == TGA) sprintf(buff, "%s.tga", name); + else if (f == JPG) sprintf(buff, "%s.jpg", name); + else sprintf(buff, "%s.png", name); + unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char)); + int i,k; + for(k = 0; k < im.c; ++k){ + for(i = 0; i < im.w*im.h; ++i){ + data[i*im.c+k] = (unsigned char) (255*im.data[i + k*im.w*im.h]); + } + } + int success = 0; + if(f == PNG) success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c); + else if (f == BMP) success = stbi_write_bmp(buff, im.w, im.h, im.c, data); + else if (f == TGA) success = stbi_write_tga(buff, im.w, im.h, im.c, data); + else if (f == JPG) success = stbi_write_jpg(buff, im.w, im.h, im.c, data, quality); + free(data); + if(!success) fprintf(stderr, "Failed to write image %s\n", buff); +} + +void save_image(image im, const char *name) +{ + save_image_options(im, name, JPG, 80); +} + +void show_image_layers(image p, char *name) +{ + int i; + char buff[256]; + for(i = 0; i < p.c; ++i){ + sprintf(buff, "%s - Layer %d", name, i); + image layer = get_image_layer(p, i); + show_image(layer, buff, 1); + free_image(layer); + } +} + +void show_image_collapsed(image p, char *name) +{ + image c = collapse_image_layers(p, 1); + show_image(c, name, 1); + free_image(c); +} + +image make_empty_image(int w, int h, int c) +{ + image out; + out.data = 0; + out.h = h; + out.w = w; + out.c = c; + return out; +} + +image make_image(int w, int h, int c) +{ + image out = make_empty_image(w,h,c); + out.data = calloc(h*w*c, sizeof(float)); + return out; +} + +image make_random_image(int w, int h, int c) +{ + image out = make_empty_image(w,h,c); + out.data = calloc(h*w*c, sizeof(float)); + int i; + for(i = 0; i < w*h*c; ++i){ + out.data[i] = (rand_normal() * .25) + .5; + } + return out; +} + +image float_to_image(int w, int h, int c, float *data) +{ + image out = make_empty_image(w,h,c); + out.data = data; + return out; +} + +void place_image(image im, int w, int h, int dx, int dy, image canvas) +{ + int x, y, c; + for(c = 0; c < im.c; ++c){ + for(y = 0; y < h; ++y){ + for(x = 0; x < w; ++x){ + float rx = ((float)x / w) * im.w; + float ry = ((float)y / h) * im.h; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(canvas, x + dx, y + dy, c, val); + } + } + } +} + +image center_crop_image(image im, int w, int h) +{ + int m = (im.w < im.h) ? im.w : im.h; + image c = crop_image(im, (im.w - m) / 2, (im.h - m)/2, m, m); + image r = resize_image(c, w, h); + free_image(c); + return r; +} + +image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect) +{ + int x, y, c; + float cx = im.w/2.; + float cy = im.h/2.; + image rot = make_image(w, h, im.c); + for(c = 0; c < im.c; ++c){ + for(y = 0; y < h; ++y){ + for(x = 0; x < w; ++x){ + float rx = cos(rad)*((x - w/2.)/s*aspect + dx/s*aspect) - sin(rad)*((y - h/2.)/s + dy/s) + cx; + float ry = sin(rad)*((x - w/2.)/s*aspect + dx/s*aspect) + cos(rad)*((y - h/2.)/s + dy/s) + cy; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(rot, x, y, c, val); + } + } + } + return rot; +} + +image rotate_image(image im, float rad) +{ + int x, y, c; + float cx = im.w/2.; + float cy = im.h/2.; + image rot = make_image(im.w, im.h, im.c); + for(c = 0; c < im.c; ++c){ + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx; + float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy; + float val = bilinear_interpolate(im, rx, ry, c); + set_pixel(rot, x, y, c, val); + } + } + } + return rot; +} + +void fill_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] = s; +} + +void translate_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s; +} + +void scale_image(image m, float s) +{ + int i; + for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s; +} + +image crop_image(image im, int dx, int dy, int w, int h) +{ + image cropped = make_image(w, h, im.c); + int i, j, k; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int r = j + dy; + int c = i + dx; + float val = 0; + r = constrain_int(r, 0, im.h-1); + c = constrain_int(c, 0, im.w-1); + val = get_pixel(im, c, r, k); + set_pixel(cropped, i, j, k, val); + } + } + } + return cropped; +} + +int best_3d_shift_r(image a, image b, int min, int max) +{ + if(min == max) return min; + int mid = floor((min + max) / 2.); + image c1 = crop_image(b, 0, mid, b.w, b.h); + image c2 = crop_image(b, 0, mid+1, b.w, b.h); + float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 10); + float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 10); + free_image(c1); + free_image(c2); + if(d1 < d2) return best_3d_shift_r(a, b, min, mid); + else return best_3d_shift_r(a, b, mid+1, max); +} + +int best_3d_shift(image a, image b, int min, int max) +{ + int i; + int best = 0; + float best_distance = FLT_MAX; + for(i = min; i <= max; i += 2){ + image c = crop_image(b, 0, i, b.w, b.h); + float d = dist_array(c.data, a.data, a.w*a.h*a.c, 100); + if(d < best_distance){ + best_distance = d; + best = i; + } + printf("%d %f\n", i, d); + free_image(c); + } + return best; +} + +void composite_3d(char *f1, char *f2, char *out, int delta) +{ + if(!out) out = "out"; + image a = load_image(f1, 0,0,0); + image b = load_image(f2, 0,0,0); + int shift = best_3d_shift_r(a, b, -a.h/100, a.h/100); + + image c1 = crop_image(b, 10, shift, b.w, b.h); + float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 100); + image c2 = crop_image(b, -10, shift, b.w, b.h); + float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 100); + + if(d2 < d1 && 0){ + image swap = a; + a = b; + b = swap; + shift = -shift; + printf("swapped, %d\n", shift); + } + else{ + printf("%d\n", shift); + } + + image c = crop_image(b, delta, shift, a.w, a.h); + int i; + for(i = 0; i < c.w*c.h; ++i){ + c.data[i] = a.data[i]; + } + save_image(c, out); +} + +void letterbox_image_into(image im, int w, int h, image boxed) +{ + int new_w = im.w; + int new_h = im.h; + if (((float)w/im.w) < ((float)h/im.h)) { + new_w = w; + new_h = (im.h * w)/im.w; + } else { + new_h = h; + new_w = (im.w * h)/im.h; + } + image resized = resize_image(im, new_w, new_h); + embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); + free_image(resized); +} + +image letterbox_image(image im, int w, int h) +{ + int new_w = im.w; + int new_h = im.h; + if (((float)w/im.w) < ((float)h/im.h)) { + new_w = w; + new_h = (im.h * w)/im.w; + } else { + new_h = h; + new_w = (im.w * h)/im.h; + } + image resized = resize_image(im, new_w, new_h); + image boxed = make_image(w, h, im.c); + fill_image(boxed, .5); + //int i; + //for(i = 0; i < boxed.w*boxed.h*boxed.c; ++i) boxed.data[i] = 0; + embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); + free_image(resized); + return boxed; +} + +image resize_max(image im, int max) +{ + int w = im.w; + int h = im.h; + if(w > h){ + h = (h * max) / w; + w = max; + } else { + w = (w * max) / h; + h = max; + } + if(w == im.w && h == im.h) return im; + image resized = resize_image(im, w, h); + return resized; +} + +image resize_min(image im, int min) +{ + int w = im.w; + int h = im.h; + if(w < h){ + h = (h * min) / w; + w = min; + } else { + w = (w * min) / h; + h = min; + } + if(w == im.w && h == im.h) return im; + image resized = resize_image(im, w, h); + return resized; +} + +image random_crop_image(image im, int w, int h) +{ + int dx = rand_int(0, im.w - w); + int dy = rand_int(0, im.h - h); + image crop = crop_image(im, dx, dy, w, h); + return crop; +} + +augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h) +{ + augment_args a = {0}; + aspect = rand_scale(aspect); + int r = rand_int(low, high); + int min = (im.h < im.w*aspect) ? im.h : im.w*aspect; + float scale = (float)r / min; + + float rad = rand_uniform(-angle, angle) * TWO_PI / 360.; + + float dx = (im.w*scale/aspect - w) / 2.; + float dy = (im.h*scale - w) / 2.; + //if(dx < 0) dx = 0; + //if(dy < 0) dy = 0; + dx = rand_uniform(-dx, dx); + dy = rand_uniform(-dy, dy); + + a.rad = rad; + a.scale = scale; + a.w = w; + a.h = h; + a.dx = dx; + a.dy = dy; + a.aspect = aspect; + return a; +} + +image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h) +{ + augment_args a = random_augment_args(im, angle, aspect, low, high, w, h); + image crop = rotate_crop_image(im, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect); + return crop; +} + +float three_way_max(float a, float b, float c) +{ + return (a > b) ? ( (a > c) ? a : c) : ( (b > c) ? b : c) ; +} + +float three_way_min(float a, float b, float c) +{ + return (a < b) ? ( (a < c) ? a : c) : ( (b < c) ? b : c) ; +} + +void yuv_to_rgb(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float y, u, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + y = get_pixel(im, i , j, 0); + u = get_pixel(im, i , j, 1); + v = get_pixel(im, i , j, 2); + + r = y + 1.13983*v; + g = y + -.39465*u + -.58060*v; + b = y + 2.03211*u; + + set_pixel(im, i, j, 0, r); + set_pixel(im, i, j, 1, g); + set_pixel(im, i, j, 2, b); + } + } +} + +void rgb_to_yuv(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float y, u, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + r = get_pixel(im, i , j, 0); + g = get_pixel(im, i , j, 1); + b = get_pixel(im, i , j, 2); + + y = .299*r + .587*g + .114*b; + u = -.14713*r + -.28886*g + .436*b; + v = .615*r + -.51499*g + -.10001*b; + + set_pixel(im, i, j, 0, y); + set_pixel(im, i, j, 1, u); + set_pixel(im, i, j, 2, v); + } + } +} + +// http://www.cs.rit.edu/~ncs/color/t_convert.html +void rgb_to_hsv(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float h, s, v; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + r = get_pixel(im, i , j, 0); + g = get_pixel(im, i , j, 1); + b = get_pixel(im, i , j, 2); + float max = three_way_max(r,g,b); + float min = three_way_min(r,g,b); + float delta = max - min; + v = max; + if(max == 0){ + s = 0; + h = 0; + }else{ + s = delta/max; + if(r == max){ + h = (g - b) / delta; + } else if (g == max) { + h = 2 + (b - r) / delta; + } else { + h = 4 + (r - g) / delta; + } + if (h < 0) h += 6; + h = h/6.; + } + set_pixel(im, i, j, 0, h); + set_pixel(im, i, j, 1, s); + set_pixel(im, i, j, 2, v); + } + } +} + +void hsv_to_rgb(image im) +{ + assert(im.c == 3); + int i, j; + float r, g, b; + float h, s, v; + float f, p, q, t; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + h = 6 * get_pixel(im, i , j, 0); + s = get_pixel(im, i , j, 1); + v = get_pixel(im, i , j, 2); + if (s == 0) { + r = g = b = v; + } else { + int index = floor(h); + f = h - index; + p = v*(1-s); + q = v*(1-s*f); + t = v*(1-s*(1-f)); + if(index == 0){ + r = v; g = t; b = p; + } else if(index == 1){ + r = q; g = v; b = p; + } else if(index == 2){ + r = p; g = v; b = t; + } else if(index == 3){ + r = p; g = q; b = v; + } else if(index == 4){ + r = t; g = p; b = v; + } else { + r = v; g = p; b = q; + } + } + set_pixel(im, i, j, 0, r); + set_pixel(im, i, j, 1, g); + set_pixel(im, i, j, 2, b); + } + } +} + +void grayscale_image_3c(image im) +{ + assert(im.c == 3); + int i, j, k; + float scale[] = {0.299, 0.587, 0.114}; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float val = 0; + for(k = 0; k < 3; ++k){ + val += scale[k]*get_pixel(im, i, j, k); + } + im.data[0*im.h*im.w + im.w*j + i] = val; + im.data[1*im.h*im.w + im.w*j + i] = val; + im.data[2*im.h*im.w + im.w*j + i] = val; + } + } +} + +image grayscale_image(image im) +{ + assert(im.c == 3); + int i, j, k; + image gray = make_image(im.w, im.h, 1); + float scale[] = {0.299, 0.587, 0.114}; + for(k = 0; k < im.c; ++k){ + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + gray.data[i+im.w*j] += scale[k]*get_pixel(im, i, j, k); + } + } + } + return gray; +} + +image threshold_image(image im, float thresh) +{ + int i; + image t = make_image(im.w, im.h, im.c); + for(i = 0; i < im.w*im.h*im.c; ++i){ + t.data[i] = im.data[i]>thresh ? 1 : 0; + } + return t; +} + +image blend_image(image fore, image back, float alpha) +{ + assert(fore.w == back.w && fore.h == back.h && fore.c == back.c); + image blend = make_image(fore.w, fore.h, fore.c); + int i, j, k; + for(k = 0; k < fore.c; ++k){ + for(j = 0; j < fore.h; ++j){ + for(i = 0; i < fore.w; ++i){ + float val = alpha * get_pixel(fore, i, j, k) + + (1 - alpha)* get_pixel(back, i, j, k); + set_pixel(blend, i, j, k, val); + } + } + } + return blend; +} + +void scale_image_channel(image im, int c, float v) +{ + int i, j; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float pix = get_pixel(im, i, j, c); + pix = pix*v; + set_pixel(im, i, j, c, pix); + } + } +} + +void translate_image_channel(image im, int c, float v) +{ + int i, j; + for(j = 0; j < im.h; ++j){ + for(i = 0; i < im.w; ++i){ + float pix = get_pixel(im, i, j, c); + pix = pix+v; + set_pixel(im, i, j, c, pix); + } + } +} + +image binarize_image(image im) +{ + image c = copy_image(im); + int i; + for(i = 0; i < im.w * im.h * im.c; ++i){ + if(c.data[i] > .5) c.data[i] = 1; + else c.data[i] = 0; + } + return c; +} + +void saturate_image(image im, float sat) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + hsv_to_rgb(im); + constrain_image(im); +} + +void hue_image(image im, float hue) +{ + rgb_to_hsv(im); + int i; + for(i = 0; i < im.w*im.h; ++i){ + im.data[i] = im.data[i] + hue; + if (im.data[i] > 1) im.data[i] -= 1; + if (im.data[i] < 0) im.data[i] += 1; + } + hsv_to_rgb(im); + constrain_image(im); +} + +void exposure_image(image im, float sat) +{ + rgb_to_hsv(im); + scale_image_channel(im, 2, sat); + hsv_to_rgb(im); + constrain_image(im); +} + +void distort_image(image im, float hue, float sat, float val) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + scale_image_channel(im, 2, val); + int i; + for(i = 0; i < im.w*im.h; ++i){ + im.data[i] = im.data[i] + hue; + if (im.data[i] > 1) im.data[i] -= 1; + if (im.data[i] < 0) im.data[i] += 1; + } + hsv_to_rgb(im); + constrain_image(im); +} + +void random_distort_image(image im, float hue, float saturation, float exposure) +{ + float dhue = rand_uniform(-hue, hue); + float dsat = rand_scale(saturation); + float dexp = rand_scale(exposure); + distort_image(im, dhue, dsat, dexp); +} + +void saturate_exposure_image(image im, float sat, float exposure) +{ + rgb_to_hsv(im); + scale_image_channel(im, 1, sat); + scale_image_channel(im, 2, exposure); + hsv_to_rgb(im); + constrain_image(im); +} + +image resize_image(image im, int w, int h) +{ + image resized = make_image(w, h, im.c); + image part = make_image(w, im.h, im.c); + int r, c, k; + float w_scale = (float)(im.w - 1) / (w - 1); + float h_scale = (float)(im.h - 1) / (h - 1); + for(k = 0; k < im.c; ++k){ + for(r = 0; r < im.h; ++r){ + for(c = 0; c < w; ++c){ + float val = 0; + if(c == w-1 || im.w == 1){ + val = get_pixel(im, im.w-1, r, k); + } else { + float sx = c*w_scale; + int ix = (int) sx; + float dx = sx - ix; + val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k); + } + set_pixel(part, c, r, k, val); + } + } + } + for(k = 0; k < im.c; ++k){ + for(r = 0; r < h; ++r){ + float sy = r*h_scale; + int iy = (int) sy; + float dy = sy - iy; + for(c = 0; c < w; ++c){ + float val = (1-dy) * get_pixel(part, c, iy, k); + set_pixel(resized, c, r, k, val); + } + if(r == h-1 || im.h == 1) continue; + for(c = 0; c < w; ++c){ + float val = dy * get_pixel(part, c, iy+1, k); + add_pixel(resized, c, r, k, val); + } + } + } + + free_image(part); + return resized; +} + + +void test_resize(char *filename) +{ + image im = load_image(filename, 0,0, 3); + float mag = mag_array(im.data, im.w*im.h*im.c); + printf("L2 Norm: %f\n", mag); + image gray = grayscale_image(im); + + image c1 = copy_image(im); + image c2 = copy_image(im); + image c3 = copy_image(im); + image c4 = copy_image(im); + distort_image(c1, .1, 1.5, 1.5); + distort_image(c2, -.1, .66666, .66666); + distort_image(c3, .1, 1.5, .66666); + distort_image(c4, .1, .66666, 1.5); + + + show_image(im, "Original", 1); + show_image(gray, "Gray", 1); + show_image(c1, "C1", 1); + show_image(c2, "C2", 1); + show_image(c3, "C3", 1); + show_image(c4, "C4", 1); +#ifdef OPENCV + while(1){ + image aug = random_augment_image(im, 0, .75, 320, 448, 320, 320); + show_image(aug, "aug", 1); + free_image(aug); + + + float exposure = 1.15; + float saturation = 1.15; + float hue = .05; + + image c = copy_image(im); + + float dexp = rand_scale(exposure); + float dsat = rand_scale(saturation); + float dhue = rand_uniform(-hue, hue); + + distort_image(c, dhue, dsat, dexp); + show_image(c, "rand", 1); + printf("%f %f %f\n", dhue, dsat, dexp); + free_image(c); + } +#endif +} + + +image load_image_stb(char *filename, int channels) +{ + int w, h, c; + unsigned char *data = stbi_load(filename, &w, &h, &c, channels); + if (!data) { + fprintf(stderr, "Cannot load image \"%s\"\nSTB Reason: %s\n", filename, stbi_failure_reason()); + exit(0); + } + if(channels) c = channels; + int i,j,k; + image im = make_image(w, h, c); + for(k = 0; k < c; ++k){ + for(j = 0; j < h; ++j){ + for(i = 0; i < w; ++i){ + int dst_index = i + w*j + w*h*k; + int src_index = k + c*i + c*w*j; + im.data[dst_index] = (float)data[src_index]/255.; + } + } + } + free(data); + return im; +} + +image load_image(char *filename, int w, int h, int c) +{ +#ifdef OPENCV + image out = load_image_cv(filename, c); +#else + image out = load_image_stb(filename, c); +#endif + + if((h && w) && (h != out.h || w != out.w)){ + image resized = resize_image(out, w, h); + free_image(out); + out = resized; + } + return out; +} + +image load_image_color(char *filename, int w, int h) +{ + return load_image(filename, w, h, 3); +} + +image get_image_layer(image m, int l) +{ + image out = make_image(m.w, m.h, 1); + int i; + for(i = 0; i < m.h*m.w; ++i){ + out.data[i] = m.data[i+l*m.h*m.w]; + } + return out; +} +void print_image(image m) +{ + int i, j, k; + for(i =0 ; i < m.c; ++i){ + for(j =0 ; j < m.h; ++j){ + for(k = 0; k < m.w; ++k){ + printf("%.2lf, ", m.data[i*m.h*m.w + j*m.w + k]); + if(k > 30) break; + } + printf("\n"); + if(j > 30) break; + } + printf("\n"); + } + printf("\n"); +} + +image collapse_images_vert(image *ims, int n) +{ + int color = 1; + int border = 1; + int h,w,c; + w = ims[0].w; + h = (ims[0].h + border) * n - border; + c = ims[0].c; + if(c != 3 || !color){ + w = (w+border)*c - border; + c = 1; + } + + image filters = make_image(w, h, c); + int i,j; + for(i = 0; i < n; ++i){ + int h_offset = i*(ims[0].h+border); + image copy = copy_image(ims[i]); + //normalize_image(copy); + if(c == 3 && color){ + embed_image(copy, filters, 0, h_offset); + } + else{ + for(j = 0; j < copy.c; ++j){ + int w_offset = j*(ims[0].w+border); + image layer = get_image_layer(copy, j); + embed_image(layer, filters, w_offset, h_offset); + free_image(layer); + } + } + free_image(copy); + } + return filters; +} + +image collapse_images_horz(image *ims, int n) +{ + int color = 1; + int border = 1; + int h,w,c; + int size = ims[0].h; + h = size; + w = (ims[0].w + border) * n - border; + c = ims[0].c; + if(c != 3 || !color){ + h = (h+border)*c - border; + c = 1; + } + + image filters = make_image(w, h, c); + int i,j; + for(i = 0; i < n; ++i){ + int w_offset = i*(size+border); + image copy = copy_image(ims[i]); + //normalize_image(copy); + if(c == 3 && color){ + embed_image(copy, filters, w_offset, 0); + } + else{ + for(j = 0; j < copy.c; ++j){ + int h_offset = j*(size+border); + image layer = get_image_layer(copy, j); + embed_image(layer, filters, w_offset, h_offset); + free_image(layer); + } + } + free_image(copy); + } + return filters; +} + +void show_image_normalized(image im, const char *name) +{ + image c = copy_image(im); + normalize_image(c); + show_image(c, name, 1); + free_image(c); +} + +void show_images(image *ims, int n, char *window) +{ + image m = collapse_images_vert(ims, n); + /* + int w = 448; + int h = ((float)m.h/m.w) * 448; + if(h > 896){ + h = 896; + w = ((float)m.w/m.h) * 896; + } + image sized = resize_image(m, w, h); + */ + normalize_image(m); + save_image(m, window); + show_image(m, window, 1); + free_image(m); +} + +void free_image(image m) +{ + if(m.data){ + free(m.data); + } +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/image.h b/workloads/realworld/uvm_prefetch_async/darknet/src/image.h new file mode 100644 index 0000000000000000000000000000000000000000..3392bb9787fc542929cda064bcefa0f3f893b76c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/image.h @@ -0,0 +1,69 @@ +#ifndef IMAGE_H +#define IMAGE_H + +#include +#include +#include +#include +#include +#include "box.h" +#include "darknet.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef OPENCV +void *open_video_stream(const char *f, int c, int w, int h, int fps); +image get_image_from_stream(void *p); +image load_image_cv(char *filename, int channels); +int show_image_cv(image im, const char* name, int ms); +#endif + +float get_color(int c, int x, int max); +void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); +void draw_bbox(image a, box bbox, int w, float r, float g, float b); +void write_label(image a, int r, int c, image *characters, char *string, float *rgb); +image image_distance(image a, image b); +void scale_image(image m, float s); +image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect); +image random_crop_image(image im, int w, int h); +image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h); +augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h); +void letterbox_image_into(image im, int w, int h, image boxed); +image resize_max(image im, int max); +void translate_image(image m, float s); +void embed_image(image source, image dest, int dx, int dy); +void place_image(image im, int w, int h, int dx, int dy, image canvas); +void saturate_image(image im, float sat); +void exposure_image(image im, float sat); +void distort_image(image im, float hue, float sat, float val); +void saturate_exposure_image(image im, float sat, float exposure); +void rgb_to_hsv(image im); +void hsv_to_rgb(image im); +void yuv_to_rgb(image im); +void rgb_to_yuv(image im); + + +image collapse_image_layers(image source, int border); +image collapse_images_horz(image *ims, int n); +image collapse_images_vert(image *ims, int n); + +void show_image_normalized(image im, const char *name); +void show_images(image *ims, int n, char *window); +void show_image_layers(image p, char *name); +void show_image_collapsed(image p, char *name); + +void print_image(image m); + +image make_empty_image(int w, int h, int c); +void copy_image_into(image src, image dest); + +image get_image_layer(image m, int l); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/image_opencv.cpp b/workloads/realworld/uvm_prefetch_async/darknet/src/image_opencv.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7511280be07ca987fd51fa54aea55910cd34a706 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/image_opencv.cpp @@ -0,0 +1,135 @@ +#ifdef OPENCV + +#include "stdio.h" +#include "stdlib.h" +#include "opencv2/opencv.hpp" +#include "image.h" + +using namespace cv; + +extern "C" { + +IplImage *image_to_ipl(image im) +{ + int x,y,c; + IplImage *disp = cvCreateImage(cvSize(im.w,im.h), IPL_DEPTH_8U, im.c); + int step = disp->widthStep; + for(y = 0; y < im.h; ++y){ + for(x = 0; x < im.w; ++x){ + for(c= 0; c < im.c; ++c){ + float val = im.data[c*im.h*im.w + y*im.w + x]; + disp->imageData[y*step + x*im.c + c] = (unsigned char)(val*255); + } + } + } + return disp; +} + +image ipl_to_image(IplImage* src) +{ + int h = src->height; + int w = src->width; + int c = src->nChannels; + image im = make_image(w, h, c); + unsigned char *data = (unsigned char *)src->imageData; + int step = src->widthStep; + int i, j, k; + + for(i = 0; i < h; ++i){ + for(k= 0; k < c; ++k){ + for(j = 0; j < w; ++j){ + im.data[k*w*h + i*w + j] = data[i*step + j*c + k]/255.; + } + } + } + return im; +} + +Mat image_to_mat(image im) +{ + image copy = copy_image(im); + constrain_image(copy); + if(im.c == 3) rgbgr_image(copy); + + IplImage *ipl = image_to_ipl(copy); + Mat m = cvarrToMat(ipl, true); + cvReleaseImage(&ipl); + free_image(copy); + return m; +} + +image mat_to_image(Mat m) +{ + IplImage ipl = m; + image im = ipl_to_image(&ipl); + rgbgr_image(im); + return im; +} + +void *open_video_stream(const char *f, int c, int w, int h, int fps) +{ + VideoCapture *cap; + if(f) cap = new VideoCapture(f); + else cap = new VideoCapture(c); + if(!cap->isOpened()) return 0; + if(w) cap->set(CV_CAP_PROP_FRAME_WIDTH, w); + if(h) cap->set(CV_CAP_PROP_FRAME_HEIGHT, w); + if(fps) cap->set(CV_CAP_PROP_FPS, w); + return (void *) cap; +} + +image get_image_from_stream(void *p) +{ + VideoCapture *cap = (VideoCapture *)p; + Mat m; + *cap >> m; + if(m.empty()) return make_empty_image(0,0,0); + return mat_to_image(m); +} + +image load_image_cv(char *filename, int channels) +{ + int flag = -1; + if (channels == 0) flag = -1; + else if (channels == 1) flag = 0; + else if (channels == 3) flag = 1; + else { + fprintf(stderr, "OpenCV can't force load with %d channels\n", channels); + } + Mat m; + m = imread(filename, flag); + if(!m.data){ + fprintf(stderr, "Cannot load image \"%s\"\n", filename); + char buff[256]; + sprintf(buff, "echo %s >> bad.list", filename); + system(buff); + return make_image(10,10,3); + //exit(0); + } + image im = mat_to_image(m); + return im; +} + +int show_image_cv(image im, const char* name, int ms) +{ + Mat m = image_to_mat(im); + imshow(name, m); + int c = waitKey(ms); + if (c != -1) c = c%256; + return c; +} + +void make_window(char *name, int w, int h, int fullscreen) +{ + namedWindow(name, WINDOW_NORMAL); + if (fullscreen) { + setWindowProperty(name, CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); + } else { + resizeWindow(name, w, h); + if(strcmp(name, "Demo") == 0) moveWindow(name, 0, 0); + } +} + +} + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/iseg_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/iseg_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..a1b822a5797a6d04b0f3756f106cb2b20ba31a5b --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/iseg_layer.c @@ -0,0 +1,225 @@ +#include "iseg_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_iseg_layer(int batch, int w, int h, int classes, int ids) +{ + layer l = {0}; + l.type = ISEG; + + l.h = h; + l.w = w; + l.c = classes + ids; + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.batch = batch; + l.extra = ids; + l.cost = calloc(1, sizeof(float)); + l.outputs = h*w*l.c; + l.inputs = l.outputs; + l.truths = 90*(l.w*l.h+1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + + l.counts = calloc(90, sizeof(int)); + l.sums = calloc(90, sizeof(float*)); + if(ids){ + int i; + for(i = 0; i < 90; ++i){ + l.sums[i] = calloc(ids, sizeof(float)); + } + } + + l.forward = forward_iseg_layer; + l.backward = backward_iseg_layer; +#ifdef GPU + l.forward_gpu = forward_iseg_layer_gpu; + l.backward_gpu = backward_iseg_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "iseg\n"); + srand(0); + + return l; +} + +void resize_iseg_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->c; + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +void forward_iseg_layer(const layer l, network net) +{ + + double time = what_time_is_it_now(); + int i,b,j,k; + int ids = l.extra; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + int index = b*l.outputs; + activate_array(l.output + index, l.classes*l.w*l.h, LOGISTIC); + } +#endif + + for (b = 0; b < l.batch; ++b){ + // a priori, each pixel has no class + for(i = 0; i < l.classes; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + i*l.w*l.h + k; + l.delta[index] = 0 - l.output[index]; + } + } + + // a priori, embedding should be small magnitude + for(i = 0; i < ids; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + (i+l.classes)*l.w*l.h + k; + l.delta[index] = .1 * (0 - l.output[index]); + } + } + + + memset(l.counts, 0, 90*sizeof(int)); + for(i = 0; i < 90; ++i){ + fill_cpu(ids, 0, l.sums[i], 1); + + int c = net.truth[b*l.truths + i*(l.w*l.h+1)]; + if(c < 0) break; + // add up metric embeddings for each instance + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + c*l.w*l.h + k; + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + l.delta[index] = v - l.output[index]; + axpy_cpu(ids, 1, l.output + b*l.outputs + l.classes*l.w*l.h + k, l.w*l.h, l.sums[i], 1); + ++l.counts[i]; + } + } + } + + float *mse = calloc(90, sizeof(float)); + for(i = 0; i < 90; ++i){ + int c = net.truth[b*l.truths + i*(l.w*l.h+1)]; + if(c < 0) break; + for(k = 0; k < l.w*l.h; ++k){ + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + int z; + float sum = 0; + for(z = 0; z < ids; ++z){ + int index = b*l.outputs + (l.classes + z)*l.w*l.h + k; + sum += pow(l.sums[i][z]/l.counts[i] - l.output[index], 2); + } + mse[i] += sum; + } + } + mse[i] /= l.counts[i]; + } + + // Calculate average embedding + for(i = 0; i < 90; ++i){ + if(!l.counts[i]) continue; + scal_cpu(ids, 1.f/l.counts[i], l.sums[i], 1); + if(b == 0 && net.gpu_index == 0){ + printf("%4d, %6.3f, ", l.counts[i], mse[i]); + for(j = 0; j < ids; ++j){ + printf("%6.3f,", l.sums[i][j]); + } + printf("\n"); + } + } + free(mse); + + // Calculate embedding loss + for(i = 0; i < 90; ++i){ + if(!l.counts[i]) continue; + for(k = 0; k < l.w*l.h; ++k){ + float v = net.truth[b*l.truths + i*(l.w*l.h + 1) + 1 + k]; + if(v){ + for(j = 0; j < 90; ++j){ + if(!l.counts[j])continue; + int z; + for(z = 0; z < ids; ++z){ + int index = b*l.outputs + (l.classes + z)*l.w*l.h + k; + float diff = l.sums[j][z] - l.output[index]; + if (j == i) l.delta[index] += diff < 0? -.1 : .1; + else l.delta[index] += -(diff < 0? -.1 : .1); + } + } + } + } + } + + for(i = 0; i < ids; ++i){ + for(k = 0; k < l.w*l.h; ++k){ + int index = b*l.outputs + (i+l.classes)*l.w*l.h + k; + l.delta[index] *= .01; + } + } + } + + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("took %lf sec\n", what_time_is_it_now() - time); +} + +void backward_iseg_layer(const layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_iseg_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b; + for (b = 0; b < l.batch; ++b){ + activate_array_gpu(l.output_gpu + b*l.outputs, l.classes*l.w*l.h, LOGISTIC); + //if(l.extra) activate_array_gpu(l.output_gpu + b*l.outputs + l.classes*l.w*l.h, l.extra*l.w*l.h, LOGISTIC); + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_iseg_layer(l, net); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_iseg_layer_gpu(const layer l, network net) +{ + int b; + for (b = 0; b < l.batch; ++b){ + //if(l.extra) gradient_array_gpu(l.output_gpu + b*l.outputs + l.classes*l.w*l.h, l.extra*l.w*l.h, LOGISTIC, l.delta_gpu + b*l.outputs + l.classes*l.w*l.h); + } + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/iseg_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/iseg_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..dd8e64e023caf1e1fd0c30af57f9983f24ddd691 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/iseg_layer.h @@ -0,0 +1,19 @@ +#ifndef ISEG_LAYER_H +#define ISEG_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_iseg_layer(int batch, int w, int h, int classes, int ids); +void forward_iseg_layer(const layer l, network net); +void backward_iseg_layer(const layer l, network net); +void resize_iseg_layer(layer *l, int w, int h); +int iseg_num_detections(layer l, float thresh); + +#ifdef GPU +void forward_iseg_layer_gpu(const layer l, network net); +void backward_iseg_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/l2norm_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/l2norm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..0cf7f844170cb2c3dba15be94d4a435aaa63067c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/l2norm_layer.c @@ -0,0 +1,63 @@ +#include "l2norm_layer.h" +#include "activations.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +layer make_l2norm_layer(int batch, int inputs) +{ + fprintf(stderr, "l2norm %4d\n", inputs); + layer l = {0}; + l.type = L2NORM; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.output = calloc(inputs*batch, sizeof(float)); + l.scales = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + + l.forward = forward_l2norm_layer; + l.backward = backward_l2norm_layer; + #ifdef GPU + l.forward_gpu = forward_l2norm_layer_gpu; + l.backward_gpu = backward_l2norm_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.scales_gpu = cuda_make_array(l.output, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_l2norm_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + l2normalize_cpu(l.output, l.scales, l.batch, l.out_c, l.out_w*l.out_h); +} + +void backward_l2norm_layer(const layer l, network net) +{ + //axpy_cpu(l.inputs*l.batch, 1, l.scales, 1, l.delta, 1); + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_l2norm_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + l2normalize_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_w*l.out_h); +} + +void backward_l2norm_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.scales_gpu, 1, l.delta_gpu, 1); + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/l2norm_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/l2norm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1ca6f710f017f2857f566eaed90634698d72b26d --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/l2norm_layer.h @@ -0,0 +1,15 @@ +#ifndef L2NORM_LAYER_H +#define L2NORM_LAYER_H +#include "layer.h" +#include "network.h" + +layer make_l2norm_layer(int batch, int inputs); +void forward_l2norm_layer(const layer l, network net); +void backward_l2norm_layer(const layer l, network net); + +#ifdef GPU +void forward_l2norm_layer_gpu(const layer l, network net); +void backward_l2norm_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/layer.c new file mode 100644 index 0000000000000000000000000000000000000000..3bffe436f06a455e2d1043158ff6da9b07bbb61f --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/layer.c @@ -0,0 +1,97 @@ +#include "layer.h" +#include "cuda_dark.h" + +#include + +void free_layer(layer l) +{ + if(l.type == DROPOUT){ + if(l.rand) free(l.rand); +#ifdef GPU + if(l.rand_gpu) cuda_free(l.rand_gpu); +#endif + return; + } + if(l.cweights) free(l.cweights); + if(l.indexes) free(l.indexes); + if(l.input_layers) free(l.input_layers); + if(l.input_sizes) free(l.input_sizes); + if(l.map) free(l.map); + if(l.rand) free(l.rand); + if(l.cost) free(l.cost); + if(l.state) free(l.state); + if(l.prev_state) free(l.prev_state); + if(l.forgot_state) free(l.forgot_state); + if(l.forgot_delta) free(l.forgot_delta); + if(l.state_delta) free(l.state_delta); + if(l.concat) free(l.concat); + if(l.concat_delta) free(l.concat_delta); + if(l.binary_weights) free(l.binary_weights); + if(l.biases) free(l.biases); + if(l.bias_updates) free(l.bias_updates); + if(l.scales) free(l.scales); + if(l.scale_updates) free(l.scale_updates); + if(l.weights) free(l.weights); + if(l.weight_updates) free(l.weight_updates); + if(l.delta) free(l.delta); + if(l.output) free(l.output); + if(l.squared) free(l.squared); + if(l.norms) free(l.norms); + if(l.spatial_mean) free(l.spatial_mean); + if(l.mean) free(l.mean); + if(l.variance) free(l.variance); + if(l.mean_delta) free(l.mean_delta); + if(l.variance_delta) free(l.variance_delta); + if(l.rolling_mean) free(l.rolling_mean); + if(l.rolling_variance) free(l.rolling_variance); + if(l.x) free(l.x); + if(l.x_norm) free(l.x_norm); + if(l.m) free(l.m); + if(l.v) free(l.v); + if(l.z_cpu) free(l.z_cpu); + if(l.r_cpu) free(l.r_cpu); + if(l.h_cpu) free(l.h_cpu); + if(l.binary_input) free(l.binary_input); + +#ifdef GPU + if(l.indexes_gpu) cuda_free((float *)l.indexes_gpu); + + if(l.z_gpu) cuda_free(l.z_gpu); + if(l.r_gpu) cuda_free(l.r_gpu); + if(l.h_gpu) cuda_free(l.h_gpu); + if(l.m_gpu) cuda_free(l.m_gpu); + if(l.v_gpu) cuda_free(l.v_gpu); + if(l.prev_state_gpu) cuda_free(l.prev_state_gpu); + if(l.forgot_state_gpu) cuda_free(l.forgot_state_gpu); + if(l.forgot_delta_gpu) cuda_free(l.forgot_delta_gpu); + if(l.state_gpu) cuda_free(l.state_gpu); + if(l.state_delta_gpu) cuda_free(l.state_delta_gpu); + if(l.gate_gpu) cuda_free(l.gate_gpu); + if(l.gate_delta_gpu) cuda_free(l.gate_delta_gpu); + if(l.save_gpu) cuda_free(l.save_gpu); + if(l.save_delta_gpu) cuda_free(l.save_delta_gpu); + if(l.concat_gpu) cuda_free(l.concat_gpu); + if(l.concat_delta_gpu) cuda_free(l.concat_delta_gpu); + if(l.binary_input_gpu) cuda_free(l.binary_input_gpu); + if(l.binary_weights_gpu) cuda_free(l.binary_weights_gpu); + if(l.mean_gpu) cuda_free(l.mean_gpu); + if(l.variance_gpu) cuda_free(l.variance_gpu); + if(l.rolling_mean_gpu) cuda_free(l.rolling_mean_gpu); + if(l.rolling_variance_gpu) cuda_free(l.rolling_variance_gpu); + if(l.variance_delta_gpu) cuda_free(l.variance_delta_gpu); + if(l.mean_delta_gpu) cuda_free(l.mean_delta_gpu); + if(l.x_gpu) cuda_free(l.x_gpu); + if(l.x_norm_gpu) cuda_free(l.x_norm_gpu); + if(l.weights_gpu) cuda_free(l.weights_gpu); + if(l.weight_updates_gpu) cuda_free(l.weight_updates_gpu); + if(l.biases_gpu) cuda_free(l.biases_gpu); + if(l.bias_updates_gpu) cuda_free(l.bias_updates_gpu); + if(l.scales_gpu) cuda_free(l.scales_gpu); + if(l.scale_updates_gpu) cuda_free(l.scale_updates_gpu); + if(l.output_gpu) cuda_free(l.output_gpu); + if(l.delta_gpu) cuda_free(l.delta_gpu); + if(l.rand_gpu) cuda_free(l.rand_gpu); + if(l.squared_gpu) cuda_free(l.squared_gpu); + if(l.norms_gpu) cuda_free(l.norms_gpu); +#endif +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/layer.h new file mode 100644 index 0000000000000000000000000000000000000000..af6cd2ab5054f5ef3bbdfca2da45f08d710a7bd0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/layer.h @@ -0,0 +1 @@ +#include "darknet.h" diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/list.c b/workloads/realworld/uvm_prefetch_async/darknet/src/list.c new file mode 100644 index 0000000000000000000000000000000000000000..0e4165d37800e1b4c7c33992cd64a6634fe4688c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/list.c @@ -0,0 +1,92 @@ +#include +#include +#include "list.h" + +list *make_list() +{ + list *l = malloc(sizeof(list)); + l->size = 0; + l->front = 0; + l->back = 0; + return l; +} + +/* +void transfer_node(list *s, list *d, node *n) +{ + node *prev, *next; + prev = n->prev; + next = n->next; + if(prev) prev->next = next; + if(next) next->prev = prev; + --s->size; + if(s->front == n) s->front = next; + if(s->back == n) s->back = prev; +} +*/ + +void *list_pop(list *l){ + if(!l->back) return 0; + node *b = l->back; + void *val = b->val; + l->back = b->prev; + if(l->back) l->back->next = 0; + free(b); + --l->size; + + return val; +} + +void list_insert(list *l, void *val) +{ + node *new = malloc(sizeof(node)); + new->val = val; + new->next = 0; + + if(!l->back){ + l->front = new; + new->prev = 0; + }else{ + l->back->next = new; + new->prev = l->back; + } + l->back = new; + ++l->size; +} + +void free_node(node *n) +{ + node *next; + while(n) { + next = n->next; + free(n); + n = next; + } +} + +void free_list(list *l) +{ + free_node(l->front); + free(l); +} + +void free_list_contents(list *l) +{ + node *n = l->front; + while(n){ + free(n->val); + n = n->next; + } +} + +void **list_to_array(list *l) +{ + void **a = calloc(l->size, sizeof(void*)); + int count = 0; + node *n = l->front; + while(n){ + a[count++] = n->val; + n = n->next; + } + return a; +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/list.h b/workloads/realworld/uvm_prefetch_async/darknet/src/list.h new file mode 100644 index 0000000000000000000000000000000000000000..6b445c717c2937b9c90536654ba82a33e14bb4ec --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/list.h @@ -0,0 +1,13 @@ +#ifndef LIST_H +#define LIST_H +#include "darknet.h" + +list *make_list(); +int list_find(list *l, void *val); + +void list_insert(list *, void *); + + +void free_list_contents(list *l); + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/local_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/local_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..74f6910a8fd751ad9f3b41fc67be737399a151d0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/local_layer.c @@ -0,0 +1,293 @@ +#include "local_layer.h" +#include "utils.h" +#include "im2col.h" +#include "col2im.h" +#include "blas.h" +#include "gemm.h" +#include +#include + +int local_out_height(local_layer l) +{ + int h = l.h; + if (!l.pad) h -= l.size; + else h -= 1; + return h/l.stride + 1; +} + +int local_out_width(local_layer l) +{ + int w = l.w; + if (!l.pad) w -= l.size; + else w -= 1; + return w/l.stride + 1; +} + +local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation) +{ + int i; + local_layer l = {0}; + l.type = LOCAL; + + l.h = h; + l.w = w; + l.c = c; + l.n = n; + l.batch = batch; + l.stride = stride; + l.size = size; + l.pad = pad; + + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int locations = out_h*out_w; + l.out_h = out_h; + l.out_w = out_w; + l.out_c = n; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = l.w * l.h * l.c; + + l.weights = calloc(c*n*size*size*locations, sizeof(float)); + l.weight_updates = calloc(c*n*size*size*locations, sizeof(float)); + + l.biases = calloc(l.outputs, sizeof(float)); + l.bias_updates = calloc(l.outputs, sizeof(float)); + + // float scale = 1./sqrt(size*size*c); + float scale = sqrt(2./(size*size*c)); + for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1); + + l.output = calloc(l.batch*out_h * out_w * n, sizeof(float)); + l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float)); + + l.workspace_size = out_h*out_w*size*size*c; + + l.forward = forward_local_layer; + l.backward = backward_local_layer; + l.update = update_local_layer; + +#ifdef GPU + l.forward_gpu = forward_local_layer_gpu; + l.backward_gpu = backward_local_layer_gpu; + l.update_gpu = update_local_layer_gpu; + + l.weights_gpu = cuda_make_array(l.weights, c*n*size*size*locations); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size*locations); + + l.biases_gpu = cuda_make_array(l.biases, l.outputs); + l.bias_updates_gpu = cuda_make_array(l.bias_updates, l.outputs); + + l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); + l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); + +#endif + l.activation = activation; + + fprintf(stderr, "Local Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); + + return l; +} + +void forward_local_layer(const local_layer l, network net) +{ + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int i, j; + int locations = out_h * out_w; + + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.outputs, l.biases, 1, l.output + i*l.outputs, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input + i*l.w*l.h*l.c; + im2col_cpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + float *output = l.output + i*l.outputs; + for(j = 0; j < locations; ++j){ + float *a = l.weights + j*l.size*l.size*l.c*l.n; + float *b = net.workspace + j; + float *c = output + j; + + int m = l.n; + int n = 1; + int k = l.size*l.size*l.c; + + gemm(0,0,m,n,k,1,a,k,b,locations,1,c,locations); + } + } + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_local_layer(local_layer l, network net) +{ + int i, j; + int locations = l.out_w*l.out_h; + + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + + for(i = 0; i < l.batch; ++i){ + axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input + i*l.w*l.h*l.c; + im2col_cpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + + for(j = 0; j < locations; ++j){ + float *a = l.delta + i*l.outputs + j; + float *b = net.workspace + j; + float *c = l.weight_updates + j*l.size*l.size*l.c*l.n; + int m = l.n; + int n = l.size*l.size*l.c; + int k = 1; + + gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n); + } + + if(net.delta){ + for(j = 0; j < locations; ++j){ + float *a = l.weights + j*l.size*l.size*l.c*l.n; + float *b = l.delta + i*l.outputs + j; + float *c = net.workspace + j; + + int m = l.size*l.size*l.c; + int n = 1; + int k = l.n; + + gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations); + } + + col2im_cpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta+i*l.c*l.h*l.w); + } + } +} + +void update_local_layer(local_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1); + scal_cpu(l.outputs, momentum, l.bias_updates, 1); + + axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(size, momentum, l.weight_updates, 1); +} + +#ifdef GPU + +void forward_local_layer_gpu(const local_layer l, network net) +{ + int out_h = local_out_height(l); + int out_w = local_out_width(l); + int i, j; + int locations = out_h * out_w; + + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.outputs, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input_gpu + i*l.w*l.h*l.c; + im2col_gpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + float *output = l.output_gpu + i*l.outputs; + for(j = 0; j < locations; ++j){ + float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n; + float *b = net.workspace + j; + float *c = output + j; + + int m = l.n; + int n = 1; + int k = l.size*l.size*l.c; + + gemm_gpu(0,0,m,n,k,1,a,k,b,locations,1,c,locations); + } + } + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_local_layer_gpu(local_layer l, network net) +{ + int i, j; + int locations = l.out_w*l.out_h; + + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + for(i = 0; i < l.batch; ++i){ + axpy_gpu(l.outputs, 1, l.delta_gpu + i*l.outputs, 1, l.bias_updates_gpu, 1); + } + + for(i = 0; i < l.batch; ++i){ + float *input = net.input_gpu + i*l.w*l.h*l.c; + im2col_gpu(input, l.c, l.h, l.w, + l.size, l.stride, l.pad, net.workspace); + + for(j = 0; j < locations; ++j){ + float *a = l.delta_gpu + i*l.outputs + j; + float *b = net.workspace + j; + float *c = l.weight_updates_gpu + j*l.size*l.size*l.c*l.n; + int m = l.n; + int n = l.size*l.size*l.c; + int k = 1; + + gemm_gpu(0,1,m,n,k,1,a,locations,b,locations,1,c,n); + } + + if(net.delta_gpu){ + for(j = 0; j < locations; ++j){ + float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n; + float *b = l.delta_gpu + i*l.outputs + j; + float *c = net.workspace + j; + + int m = l.size*l.size*l.c; + int n = 1; + int k = l.n; + + gemm_gpu(1,0,m,n,k,1,a,m,b,locations,0,c,locations); + } + + col2im_gpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta_gpu+i*l.c*l.h*l.w); + } + } +} + +void update_local_layer_gpu(local_layer l, update_args a) +{ + float learning_rate = a.learning_rate*l.learning_rate_scale; + float momentum = a.momentum; + float decay = a.decay; + int batch = a.batch; + + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + axpy_gpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_gpu(l.outputs, momentum, l.bias_updates_gpu, 1); + + axpy_gpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_gpu(size, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_gpu(size, momentum, l.weight_updates_gpu, 1); +} + +void pull_local_layer(local_layer l) +{ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + cuda_pull_array(l.weights_gpu, l.weights, size); + cuda_pull_array(l.biases_gpu, l.biases, l.outputs); +} + +void push_local_layer(local_layer l) +{ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + cuda_push_array(l.weights_gpu, l.weights, size); + cuda_push_array(l.biases_gpu, l.biases, l.outputs); +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/local_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/local_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..56805c4f1cb51fed9ef0e771d2befb430df60df5 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/local_layer.h @@ -0,0 +1,31 @@ +#ifndef LOCAL_LAYER_H +#define LOCAL_LAYER_H + +#include "cuda_dark.h" +#include "image.h" +#include "activations.h" +#include "layer.h" +#include "network.h" + +typedef layer local_layer; + +#ifdef GPU +void forward_local_layer_gpu(local_layer layer, network net); +void backward_local_layer_gpu(local_layer layer, network net); +void update_local_layer_gpu(local_layer layer, update_args a); + +void push_local_layer(local_layer layer); +void pull_local_layer(local_layer layer); +#endif + +local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation); + +void forward_local_layer(const local_layer layer, network net); +void backward_local_layer(local_layer layer, network net); +void update_local_layer(local_layer layer, update_args a); + +void bias_output(float *output, float *biases, int batch, int n, int size); +void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/logistic_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/logistic_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..8d98986f67a17da70df75e3d56a46615cfc8eaf1 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/logistic_layer.c @@ -0,0 +1,71 @@ +#include "logistic_layer.h" +#include "activations.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +layer make_logistic_layer(int batch, int inputs) +{ + fprintf(stderr, "logistic x entropy %4d\n", inputs); + layer l = {0}; + l.type = LOGXENT; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.loss = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_logistic_layer; + l.backward = backward_logistic_layer; + #ifdef GPU + l.forward_gpu = forward_logistic_layer_gpu; + l.backward_gpu = backward_logistic_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.loss_gpu = cuda_make_array(l.loss, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_logistic_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + activate_array(l.output, l.outputs*l.batch, LOGISTIC); + if(net.truth){ + logistic_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_logistic_layer(const layer l, network net) +{ + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void forward_logistic_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, LOGISTIC); + if(net.truth){ + logistic_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu); + cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_logistic_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/logistic_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/logistic_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9c25bee3c2a6eb1013ed43ce0c4aeaa63b7a293f --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/logistic_layer.h @@ -0,0 +1,15 @@ +#ifndef LOGISTIC_LAYER_H +#define LOGISTIC_LAYER_H +#include "layer.h" +#include "network.h" + +layer make_logistic_layer(int batch, int inputs); +void forward_logistic_layer(const layer l, network net); +void backward_logistic_layer(const layer l, network net); + +#ifdef GPU +void forward_logistic_layer_gpu(const layer l, network net); +void backward_logistic_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/lstm_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/lstm_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..16f43914df8f35fb7f7b16bd93ff1d83f513dda0 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/lstm_layer.c @@ -0,0 +1,626 @@ +#include "lstm_layer.h" +#include "connected_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam) +{ + fprintf(stderr, "LSTM Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = { 0 }; + l.batch = batch; + l.type = LSTM; + l.steps = steps; + l.inputs = inputs; + + l.uf = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uf) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uf->batch = batch; + + l.ui = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ui) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ui->batch = batch; + + l.ug = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.ug) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.ug->batch = batch; + + l.uo = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.uo) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam); + l.uo->batch = batch; + + l.wf = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wf) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wf->batch = batch; + + l.wi = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wi) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wi->batch = batch; + + l.wg = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wg) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wg->batch = batch; + + l.wo = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.wo) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam); + l.wo->batch = batch; + + l.batch_normalize = batch_normalize; + l.outputs = outputs; + + l.output = calloc(outputs*batch*steps, sizeof(float)); + l.state = calloc(outputs*batch, sizeof(float)); + + l.forward = forward_lstm_layer; + l.update = update_lstm_layer; + + l.prev_state_cpu = calloc(batch*outputs, sizeof(float)); + l.prev_cell_cpu = calloc(batch*outputs, sizeof(float)); + l.cell_cpu = calloc(batch*outputs*steps, sizeof(float)); + + l.f_cpu = calloc(batch*outputs, sizeof(float)); + l.i_cpu = calloc(batch*outputs, sizeof(float)); + l.g_cpu = calloc(batch*outputs, sizeof(float)); + l.o_cpu = calloc(batch*outputs, sizeof(float)); + l.c_cpu = calloc(batch*outputs, sizeof(float)); + l.h_cpu = calloc(batch*outputs, sizeof(float)); + l.temp_cpu = calloc(batch*outputs, sizeof(float)); + l.temp2_cpu = calloc(batch*outputs, sizeof(float)); + l.temp3_cpu = calloc(batch*outputs, sizeof(float)); + l.dc_cpu = calloc(batch*outputs, sizeof(float)); + l.dh_cpu = calloc(batch*outputs, sizeof(float)); + +#ifdef GPU + l.forward_gpu = forward_lstm_layer_gpu; + l.backward_gpu = backward_lstm_layer_gpu; + l.update_gpu = update_lstm_layer_gpu; + + l.output_gpu = cuda_make_array(0, batch*outputs*steps); + l.delta_gpu = cuda_make_array(0, batch*l.outputs*steps); + + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.prev_cell_gpu = cuda_make_array(0, batch*outputs); + l.cell_gpu = cuda_make_array(0, batch*outputs*steps); + + l.f_gpu = cuda_make_array(0, batch*outputs); + l.i_gpu = cuda_make_array(0, batch*outputs); + l.g_gpu = cuda_make_array(0, batch*outputs); + l.o_gpu = cuda_make_array(0, batch*outputs); + l.c_gpu = cuda_make_array(0, batch*outputs); + l.h_gpu = cuda_make_array(0, batch*outputs); + l.temp_gpu = cuda_make_array(0, batch*outputs); + l.temp2_gpu = cuda_make_array(0, batch*outputs); + l.temp3_gpu = cuda_make_array(0, batch*outputs); + l.dc_gpu = cuda_make_array(0, batch*outputs); + l.dh_gpu = cuda_make_array(0, batch*outputs); +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.wf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wf->out_c, l.wf->out_h, l.wf->out_w); + cudnnSetTensor4dDescriptor(l.wi->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wi->out_c, l.wi->out_h, l.wi->out_w); + cudnnSetTensor4dDescriptor(l.wg->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wg->out_c, l.wg->out_h, l.wg->out_w); + cudnnSetTensor4dDescriptor(l.wo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wo->out_c, l.wo->out_h, l.wo->out_w); + + cudnnSetTensor4dDescriptor(l.uf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uf->out_c, l.uf->out_h, l.uf->out_w); + cudnnSetTensor4dDescriptor(l.ui->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ui->out_c, l.ui->out_h, l.ui->out_w); + cudnnSetTensor4dDescriptor(l.ug->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ug->out_c, l.ug->out_h, l.ug->out_w); + cudnnSetTensor4dDescriptor(l.uo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uo->out_c, l.uo->out_h, l.uo->out_w); +#endif + +#endif + + return l; +} + +void update_lstm_layer(layer l, update_args a) +{ + update_connected_layer(*(l.wf), a); + update_connected_layer(*(l.wi), a); + update_connected_layer(*(l.wg), a); + update_connected_layer(*(l.wo), a); + update_connected_layer(*(l.uf), a); + update_connected_layer(*(l.ui), a); + update_connected_layer(*(l.ug), a); + update_connected_layer(*(l.uo), a); +} + +void forward_lstm_layer(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + fill_cpu(l.outputs * l.batch * l.steps, 0, wf.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wi.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wg.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, wo.delta, 1); + + fill_cpu(l.outputs * l.batch * l.steps, 0, uf.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ui.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, ug.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, uo.delta, 1); + if (state.train) { + fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input = l.h_cpu; + forward_connected_layer(wf, s); + forward_connected_layer(wi, s); + forward_connected_layer(wg, s); + forward_connected_layer(wo, s); + + s.input = state.input; + forward_connected_layer(uf, s); + forward_connected_layer(ui, s); + forward_connected_layer(ug, s); + forward_connected_layer(uo, s); + + copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); + + copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); + + copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); + + copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); + + activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.g_cpu, l.outputs*l.batch, TANH); + activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.c_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, l.temp_cpu, 1, l.c_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.h_cpu, 1); + activate_array(l.h_cpu, l.outputs*l.batch, TANH); + mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.h_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.cell_cpu, 1); + copy_cpu(l.outputs*l.batch, l.h_cpu, 1, l.output, 1); + + state.input += l.inputs*l.batch; + l.output += l.outputs*l.batch; + l.cell_cpu += l.outputs*l.batch; + + increment_layer(&wf, 1); + increment_layer(&wi, 1); + increment_layer(&wg, 1); + increment_layer(&wo, 1); + + increment_layer(&uf, 1); + increment_layer(&ui, 1); + increment_layer(&ug, 1); + increment_layer(&uo, 1); + } +} + +void backward_lstm_layer(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + increment_layer(&wf, l.steps - 1); + increment_layer(&wi, l.steps - 1); + increment_layer(&wg, l.steps - 1); + increment_layer(&wo, l.steps - 1); + + increment_layer(&uf, l.steps - 1); + increment_layer(&ui, l.steps - 1); + increment_layer(&ug, l.steps - 1); + increment_layer(&uo, l.steps - 1); + + state.input += l.inputs*l.batch*(l.steps - 1); + if (state.delta) state.delta += l.inputs*l.batch*(l.steps - 1); + + l.output += l.outputs*l.batch*(l.steps - 1); + l.cell_cpu += l.outputs*l.batch*(l.steps - 1); + l.delta += l.outputs*l.batch*(l.steps - 1); + + for (i = l.steps - 1; i >= 0; --i) { + if (i != 0) copy_cpu(l.outputs*l.batch, l.cell_cpu - l.outputs*l.batch, 1, l.prev_cell_cpu, 1); + copy_cpu(l.outputs*l.batch, l.cell_cpu, 1, l.c_cpu, 1); + if (i != 0) copy_cpu(l.outputs*l.batch, l.output - l.outputs*l.batch, 1, l.prev_state_cpu, 1); + copy_cpu(l.outputs*l.batch, l.output, 1, l.h_cpu, 1); + + l.dh_cpu = (i == 0) ? 0 : l.delta - l.outputs*l.batch; + + copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); + + copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); + + copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); + + copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); + axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); + + activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); + activate_array(l.g_cpu, l.outputs*l.batch, TANH); + activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); + + copy_cpu(l.outputs*l.batch, l.delta, 1, l.temp3_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); + activate_array(l.temp_cpu, l.outputs*l.batch, TANH); + + copy_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp2_cpu, 1); + mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.temp2_cpu, 1); + + gradient_array(l.temp_cpu, l.outputs*l.batch, TANH, l.temp2_cpu); + axpy_cpu(l.outputs*l.batch, 1, l.dc_cpu, 1, l.temp2_cpu, 1); + + copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); + activate_array(l.temp_cpu, l.outputs*l.batch, TANH); + mul_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp_cpu, 1); + gradient_array(l.o_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wo.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wo, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uo.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(uo, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); + gradient_array(l.g_cpu, l.outputs*l.batch, TANH, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wg.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wg, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ug.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(ug, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); + gradient_array(l.i_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wi.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wi, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ui.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(ui, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.prev_cell_cpu, 1, l.temp_cpu, 1); + gradient_array(l.f_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wf.delta, 1); + s.input = l.prev_state_cpu; + s.delta = l.dh_cpu; + backward_connected_layer(wf, s); + + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uf.delta, 1); + s.input = state.input; + s.delta = state.delta; + backward_connected_layer(uf, s); + + copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); + mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.temp_cpu, 1); + copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, l.dc_cpu, 1); + + state.input -= l.inputs*l.batch; + if (state.delta) state.delta -= l.inputs*l.batch; + l.output -= l.outputs*l.batch; + l.cell_cpu -= l.outputs*l.batch; + l.delta -= l.outputs*l.batch; + + increment_layer(&wf, -1); + increment_layer(&wi, -1); + increment_layer(&wg, -1); + increment_layer(&wo, -1); + + increment_layer(&uf, -1); + increment_layer(&ui, -1); + increment_layer(&ug, -1); + increment_layer(&uo, -1); + } +} + +#ifdef GPU +void update_lstm_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.wf), a); + update_connected_layer_gpu(*(l.wi), a); + update_connected_layer_gpu(*(l.wg), a); + update_connected_layer_gpu(*(l.wo), a); + update_connected_layer_gpu(*(l.uf), a); + update_connected_layer_gpu(*(l.ui), a); + update_connected_layer_gpu(*(l.ug), a); + update_connected_layer_gpu(*(l.uo), a); +} + +void forward_lstm_layer_gpu(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + fill_gpu(l.outputs * l.batch * l.steps, 0, wf.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wi.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wg.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, wo.delta_gpu, 1); + + fill_gpu(l.outputs * l.batch * l.steps, 0, uf.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ui.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, ug.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, uo.delta_gpu, 1); + if (state.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = l.h_gpu; + forward_connected_layer_gpu(wf, s); + forward_connected_layer_gpu(wi, s); + forward_connected_layer_gpu(wg, s); + forward_connected_layer_gpu(wo, s); + + s.input_gpu = state.input_gpu; + forward_connected_layer_gpu(uf, s); + forward_connected_layer_gpu(ui, s); + forward_connected_layer_gpu(ug, s); + forward_connected_layer_gpu(uo, s); + + copy_gpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); + + copy_gpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); + + copy_gpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); + + copy_gpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); + + activate_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.g_gpu, l.outputs*l.batch, TANH); + activate_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.f_gpu, 1, l.c_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, l.temp_gpu, 1, l.c_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.h_gpu, 1); + activate_array_gpu(l.h_gpu, l.outputs*l.batch, TANH); + mul_gpu(l.outputs*l.batch, l.o_gpu, 1, l.h_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.cell_gpu, 1); + copy_gpu(l.outputs*l.batch, l.h_gpu, 1, l.output_gpu, 1); + + state.input_gpu += l.inputs*l.batch; + l.output_gpu += l.outputs*l.batch; + l.cell_gpu += l.outputs*l.batch; + + increment_layer(&wf, 1); + increment_layer(&wi, 1); + increment_layer(&wg, 1); + increment_layer(&wo, 1); + + increment_layer(&uf, 1); + increment_layer(&ui, 1); + increment_layer(&ug, 1); + increment_layer(&uo, 1); + } +} + +void backward_lstm_layer_gpu(layer l, network state) +{ + network s = { 0 }; + s.train = state.train; + int i; + layer wf = *(l.wf); + layer wi = *(l.wi); + layer wg = *(l.wg); + layer wo = *(l.wo); + + layer uf = *(l.uf); + layer ui = *(l.ui); + layer ug = *(l.ug); + layer uo = *(l.uo); + + increment_layer(&wf, l.steps - 1); + increment_layer(&wi, l.steps - 1); + increment_layer(&wg, l.steps - 1); + increment_layer(&wo, l.steps - 1); + + increment_layer(&uf, l.steps - 1); + increment_layer(&ui, l.steps - 1); + increment_layer(&ug, l.steps - 1); + increment_layer(&uo, l.steps - 1); + + state.input_gpu += l.inputs*l.batch*(l.steps - 1); + if (state.delta_gpu) state.delta_gpu += l.inputs*l.batch*(l.steps - 1); + + l.output_gpu += l.outputs*l.batch*(l.steps - 1); + l.cell_gpu += l.outputs*l.batch*(l.steps - 1); + l.delta_gpu += l.outputs*l.batch*(l.steps - 1); + + for (i = l.steps - 1; i >= 0; --i) { + if (i != 0) copy_gpu(l.outputs*l.batch, l.cell_gpu - l.outputs*l.batch, 1, l.prev_cell_gpu, 1); + copy_gpu(l.outputs*l.batch, l.cell_gpu, 1, l.c_gpu, 1); + if (i != 0) copy_gpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, 1, l.prev_state_gpu, 1); + copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.h_gpu, 1); + + l.dh_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch; + + copy_gpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); + + copy_gpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); + + copy_gpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); + + copy_gpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); + axpy_gpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); + + activate_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); + activate_array_gpu(l.g_gpu, l.outputs*l.batch, TANH); + activate_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); + + copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, l.temp3_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.temp_gpu, 1); + activate_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH); + + copy_gpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp2_gpu, 1); + mul_gpu(l.outputs*l.batch, l.o_gpu, 1, l.temp2_gpu, 1); + + gradient_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH, l.temp2_gpu); + axpy_gpu(l.outputs*l.batch, 1, l.dc_gpu, 1, l.temp2_gpu, 1); + + copy_gpu(l.outputs*l.batch, l.c_gpu, 1, l.temp_gpu, 1); + activate_array_gpu(l.temp_gpu, l.outputs*l.batch, TANH); + mul_gpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.o_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wo.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wo, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, uo.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(uo, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.g_gpu, l.outputs*l.batch, TANH, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wg.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wg, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, ug.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(ug, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.i_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wi.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wi, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, ui.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(ui, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.prev_cell_gpu, 1, l.temp_gpu, 1); + gradient_array_gpu(l.f_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, wf.delta_gpu, 1); + s.input_gpu = l.prev_state_gpu; + s.delta_gpu = l.dh_gpu; + backward_connected_layer_gpu(wf, s); + + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, uf.delta_gpu, 1); + s.input_gpu = state.input_gpu; + s.delta_gpu = state.delta_gpu; + backward_connected_layer_gpu(uf, s); + + copy_gpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); + mul_gpu(l.outputs*l.batch, l.f_gpu, 1, l.temp_gpu, 1); + copy_gpu(l.outputs*l.batch, l.temp_gpu, 1, l.dc_gpu, 1); + + state.input_gpu -= l.inputs*l.batch; + if (state.delta_gpu) state.delta_gpu -= l.inputs*l.batch; + l.output_gpu -= l.outputs*l.batch; + l.cell_gpu -= l.outputs*l.batch; + l.delta_gpu -= l.outputs*l.batch; + + increment_layer(&wf, -1); + increment_layer(&wi, -1); + increment_layer(&wg, -1); + increment_layer(&wo, -1); + + increment_layer(&uf, -1); + increment_layer(&ui, -1); + increment_layer(&ug, -1); + increment_layer(&uo, -1); + } +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/lstm_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/lstm_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..b9f07e6424b55c336e692aa6f1028d0bc1cae0b3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/lstm_layer.h @@ -0,0 +1,20 @@ +#ifndef LSTM_LAYER_H +#define LSTM_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" +#define USET + +layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam); + +void forward_lstm_layer(layer l, network net); +void update_lstm_layer(layer l, update_args a); + +#ifdef GPU +void forward_lstm_layer_gpu(layer l, network net); +void backward_lstm_layer_gpu(layer l, network net); +void update_lstm_layer_gpu(layer l, update_args a); + +#endif +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/matrix.c b/workloads/realworld/uvm_prefetch_async/darknet/src/matrix.c new file mode 100644 index 0000000000000000000000000000000000000000..799916bff017180e220ae48748f495007793d168 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/matrix.c @@ -0,0 +1,196 @@ +#include "matrix.h" +#include "utils.h" +#include "blas.h" +#include +#include +#include +#include +#include + +void free_matrix(matrix m) +{ + int i; + for(i = 0; i < m.rows; ++i) free(m.vals[i]); + free(m.vals); +} + +float matrix_topk_accuracy(matrix truth, matrix guess, int k) +{ + int *indexes = calloc(k, sizeof(int)); + int n = truth.cols; + int i,j; + int correct = 0; + for(i = 0; i < truth.rows; ++i){ + top_k(guess.vals[i], n, k, indexes); + for(j = 0; j < k; ++j){ + int class = indexes[j]; + if(truth.vals[i][class]){ + ++correct; + break; + } + } + } + free(indexes); + return (float)correct/truth.rows; +} + +void scale_matrix(matrix m, float scale) +{ + int i,j; + for(i = 0; i < m.rows; ++i){ + for(j = 0; j < m.cols; ++j){ + m.vals[i][j] *= scale; + } + } +} + +matrix resize_matrix(matrix m, int size) +{ + int i; + if (m.rows == size) return m; + if (m.rows < size) { + m.vals = realloc(m.vals, size*sizeof(float*)); + for (i = m.rows; i < size; ++i) { + m.vals[i] = calloc(m.cols, sizeof(float)); + } + } else if (m.rows > size) { + for (i = size; i < m.rows; ++i) { + free(m.vals[i]); + } + m.vals = realloc(m.vals, size*sizeof(float*)); + } + m.rows = size; + return m; +} + +void matrix_add_matrix(matrix from, matrix to) +{ + assert(from.rows == to.rows && from.cols == to.cols); + int i,j; + for(i = 0; i < from.rows; ++i){ + for(j = 0; j < from.cols; ++j){ + to.vals[i][j] += from.vals[i][j]; + } + } +} + +matrix copy_matrix(matrix m) +{ + matrix c = {0}; + c.rows = m.rows; + c.cols = m.cols; + c.vals = calloc(c.rows, sizeof(float *)); + int i; + for(i = 0; i < c.rows; ++i){ + c.vals[i] = calloc(c.cols, sizeof(float)); + copy_cpu(c.cols, m.vals[i], 1, c.vals[i], 1); + } + return c; +} + +matrix make_matrix(int rows, int cols) +{ + int i; + matrix m; + m.rows = rows; + m.cols = cols; + m.vals = calloc(m.rows, sizeof(float *)); + for(i = 0; i < m.rows; ++i){ + m.vals[i] = calloc(m.cols, sizeof(float)); + } + return m; +} + +matrix hold_out_matrix(matrix *m, int n) +{ + int i; + matrix h; + h.rows = n; + h.cols = m->cols; + h.vals = calloc(h.rows, sizeof(float *)); + for(i = 0; i < n; ++i){ + int index = rand()%m->rows; + h.vals[i] = m->vals[index]; + m->vals[index] = m->vals[--(m->rows)]; + } + return h; +} + +float *pop_column(matrix *m, int c) +{ + float *col = calloc(m->rows, sizeof(float)); + int i, j; + for(i = 0; i < m->rows; ++i){ + col[i] = m->vals[i][c]; + for(j = c; j < m->cols-1; ++j){ + m->vals[i][j] = m->vals[i][j+1]; + } + } + --m->cols; + return col; +} + +matrix csv_to_matrix(char *filename) +{ + FILE *fp = fopen(filename, "r"); + if(!fp) file_error(filename); + + matrix m; + m.cols = -1; + + char *line; + + int n = 0; + int size = 1024; + m.vals = calloc(size, sizeof(float*)); + while((line = fgetl(fp))){ + if(m.cols == -1) m.cols = count_fields(line); + if(n == size){ + size *= 2; + m.vals = realloc(m.vals, size*sizeof(float*)); + } + m.vals[n] = parse_fields(line, m.cols); + free(line); + ++n; + } + m.vals = realloc(m.vals, n*sizeof(float*)); + m.rows = n; + return m; +} + +void matrix_to_csv(matrix m) +{ + int i, j; + + for(i = 0; i < m.rows; ++i){ + for(j = 0; j < m.cols; ++j){ + if(j > 0) printf(","); + printf("%.17g", m.vals[i][j]); + } + printf("\n"); + } +} + +void print_matrix(matrix m) +{ + int i, j; + printf("%d X %d Matrix:\n",m.rows, m.cols); + printf(" __"); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf("__ \n"); + + printf("| "); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf(" |\n"); + + for(i = 0; i < m.rows; ++i){ + printf("| "); + for(j = 0; j < m.cols; ++j){ + printf("%15.7f ", m.vals[i][j]); + } + printf(" |\n"); + } + printf("|__"); + for(j = 0; j < 16*m.cols-1; ++j) printf(" "); + printf("__|\n"); +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/matrix.h b/workloads/realworld/uvm_prefetch_async/darknet/src/matrix.h new file mode 100644 index 0000000000000000000000000000000000000000..879acd70d26c084931b30067ddcc77057068e58c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/matrix.h @@ -0,0 +1,13 @@ +#ifndef MATRIX_H +#define MATRIX_H +#include "darknet.h" + +matrix copy_matrix(matrix m); +void print_matrix(matrix m); + +matrix hold_out_matrix(matrix *m, int n); +matrix resize_matrix(matrix m, int size); + +float *pop_column(matrix *m, int c); + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/maxpool_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/maxpool_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..b54be838662ebfc53abc539da22413becc1805a3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/maxpool_layer.c @@ -0,0 +1,127 @@ +#include "maxpool_layer.h" +#include "cuda_dark.h" +#include + +image get_maxpool_image(maxpool_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.c; + return float_to_image(w,h,c,l.output); +} + +image get_maxpool_delta(maxpool_layer l) +{ + int h = l.out_h; + int w = l.out_w; + int c = l.c; + return float_to_image(w,h,c,l.delta); +} + +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding) +{ + maxpool_layer l = {0}; + l.type = MAXPOOL; + l.batch = batch; + l.h = h; + l.w = w; + l.c = c; + l.pad = padding; + l.out_w = (w + padding - size)/stride + 1; + l.out_h = (h + padding - size)/stride + 1; + l.out_c = c; + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = h*w*c; + l.size = size; + l.stride = stride; + int output_size = l.out_h * l.out_w * l.out_c * batch; + l.indexes = calloc(output_size, sizeof(int)); + l.output = calloc(output_size, sizeof(float)); + l.delta = calloc(output_size, sizeof(float)); + l.forward = forward_maxpool_layer; + l.backward = backward_maxpool_layer; + #ifdef GPU + l.forward_gpu = forward_maxpool_layer_gpu; + l.backward_gpu = backward_maxpool_layer_gpu; + l.indexes_gpu = cuda_make_int_array(0, output_size); + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); + #endif + fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); + return l; +} + +void resize_maxpool_layer(maxpool_layer *l, int w, int h) +{ + l->h = h; + l->w = w; + l->inputs = h*w*l->c; + + l->out_w = (w + l->pad - l->size)/l->stride + 1; + l->out_h = (h + l->pad - l->size)/l->stride + 1; + l->outputs = l->out_w * l->out_h * l->c; + int output_size = l->outputs * l->batch; + + l->indexes = realloc(l->indexes, output_size * sizeof(int)); + l->output = realloc(l->output, output_size * sizeof(float)); + l->delta = realloc(l->delta, output_size * sizeof(float)); + + #ifdef GPU + cuda_free((float *)l->indexes_gpu); + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->indexes_gpu = cuda_make_int_array(0, output_size); + l->output_gpu = cuda_make_array(l->output, output_size); + l->delta_gpu = cuda_make_array(l->delta, output_size); + #endif +} + +void forward_maxpool_layer(const maxpool_layer l, network net) +{ + int b,i,j,k,m,n; + int w_offset = -l.pad/2; + int h_offset = -l.pad/2; + + int h = l.out_h; + int w = l.out_w; + int c = l.c; + + for(b = 0; b < l.batch; ++b){ + for(k = 0; k < c; ++k){ + for(i = 0; i < h; ++i){ + for(j = 0; j < w; ++j){ + int out_index = j + w*(i + h*(k + c*b)); + float max = -FLT_MAX; + int max_i = -1; + for(n = 0; n < l.size; ++n){ + for(m = 0; m < l.size; ++m){ + int cur_h = h_offset + i*l.stride + n; + int cur_w = w_offset + j*l.stride + m; + int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c)); + int valid = (cur_h >= 0 && cur_h < l.h && + cur_w >= 0 && cur_w < l.w); + float val = (valid != 0) ? net.input[index] : -FLT_MAX; + max_i = (val > max) ? index : max_i; + max = (val > max) ? val : max; + } + } + l.output[out_index] = max; + l.indexes[out_index] = max_i; + } + } + } + } +} + +void backward_maxpool_layer(const maxpool_layer l, network net) +{ + int i; + int h = l.out_h; + int w = l.out_w; + int c = l.c; + for(i = 0; i < h*w*c*l.batch; ++i){ + int index = l.indexes[i]; + net.delta[index] += l.delta[i]; + } +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/maxpool_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/maxpool_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..f01adb957e8bd8ce01a06e5a1ff14a988ae07149 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/maxpool_layer.h @@ -0,0 +1,23 @@ +#ifndef MAXPOOL_LAYER_H +#define MAXPOOL_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +typedef layer maxpool_layer; + +image get_maxpool_image(maxpool_layer l); +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding); +void resize_maxpool_layer(maxpool_layer *l, int w, int h); +void forward_maxpool_layer(const maxpool_layer l, network net); +void backward_maxpool_layer(const maxpool_layer l, network net); + +#ifdef GPU +void forward_maxpool_layer_gpu(maxpool_layer l, network net); +void backward_maxpool_layer_gpu(maxpool_layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/maxpool_layer_kernels.cu b/workloads/realworld/uvm_prefetch_async/darknet/src/maxpool_layer_kernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..89348b6d9b65c2456592e8d2c2755fec6fc1a9db --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/maxpool_layer_kernels.cu @@ -0,0 +1,135 @@ +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +extern "C" { +#include "maxpool_layer.h" +#include "cuda_dark.h" +} + +__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) +{ + int h = (in_h + pad - size)/stride + 1; + int w = (in_w + pad - size)/stride + 1; + int c = in_c; + + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int j = id % w; + id /= w; + int i = id % h; + id /= h; + int k = id % c; + id /= c; + int b = id; + + int w_offset = -pad/2; + int h_offset = -pad/2; + + int out_index = j + w*(i + h*(k + c*b)); + float max = -INFINITY; + int max_i = -1; + int l, m; + for(l = 0; l < size; ++l){ + for(m = 0; m < size; ++m){ + int cur_h = h_offset + i*stride + l; + int cur_w = w_offset + j*stride + m; + int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); + int valid = (cur_h >= 0 && cur_h < in_h && + cur_w >= 0 && cur_w < in_w); + float val = (valid != 0) ? input[index] : -INFINITY; + max_i = (val > max) ? index : max_i; + max = (val > max) ? val : max; + } + } + output[out_index] = max; + indexes[out_index] = max_i; +} + +__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes) +{ + int h = (in_h + pad - size)/stride + 1; + int w = (in_w + pad - size)/stride + 1; + int c = in_c; + int area = (size-1)/stride; + + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int index = id; + int j = id % in_w; + id /= in_w; + int i = id % in_h; + id /= in_h; + int k = id % in_c; + id /= in_c; + int b = id; + + int w_offset = -pad/2; + int h_offset = -pad/2; + + float d = 0; + int l, m; + for(l = -area; l < area+1; ++l){ + for(m = -area; m < area+1; ++m){ + int out_w = (j-w_offset)/stride + m; + int out_h = (i-h_offset)/stride + l; + int out_index = out_w + w*(out_h + h*(k + c*b)); + int valid = (out_w >= 0 && out_w < w && + out_h >= 0 && out_h < h); + d += (valid && indexes[out_index] == index) ? delta[out_index] : 0; + } + } + prev_delta[index] += d; +} + +extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network net) +{ + int h = layer.out_h; + int w = layer.out_w; + int c = layer.c; + + size_t n = h*w*c*layer.batch; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(net.input_gpu, n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(layer.indexes_gpu, n * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(layer.output_gpu, n * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + forward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, net.input_gpu, layer.output_gpu, layer.indexes_gpu); + check_error(cudaPeekAtLastError()); +} + +extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network net) +{ + size_t n = layer.h*layer.w*layer.c*layer.batch; + size_t in_n = layer.out_h*layer.out_w*layer.c*layer.batch; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(layer.delta_gpu, in_n * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(layer.indexes_gpu, in_n * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(net.delta_gpu, n * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, net.delta_gpu, layer.indexes_gpu); + check_error(cudaPeekAtLastError()); +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/network.c b/workloads/realworld/uvm_prefetch_async/darknet/src/network.c new file mode 100644 index 0000000000000000000000000000000000000000..aaab7997b5ee7da829289fa153f942a066b43d8c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/network.c @@ -0,0 +1,1129 @@ +#include +#include +#include +#include "network.h" +#include "image.h" +#include "data.h" +#include "utils.h" +#include "blas.h" + +#include "crop_layer.h" +#include "connected_layer.h" +#include "gru_layer.h" +#include "rnn_layer.h" +#include "crnn_layer.h" +#include "local_layer.h" +#include "convolutional_layer.h" +#include "activation_layer.h" +#include "detection_layer.h" +#include "region_layer.h" +#include "yolo_layer.h" +#include "normalization_layer.h" +#include "batchnorm_layer.h" +#include "maxpool_layer.h" +#include "reorg_layer.h" +#include "avgpool_layer.h" +#include "cost_layer.h" +#include "softmax_layer.h" +#include "dropout_layer.h" +#include "route_layer.h" +#include "upsample_layer.h" +#include "shortcut_layer.h" +#include "parser.h" +#include "data.h" + +load_args get_base_args(network *net) +{ + load_args args = {0}; + args.w = net->w; + args.h = net->h; + args.size = net->w; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.center = net->center; + args.saturation = net->saturation; + args.hue = net->hue; + return args; +} + +network *load_network(char *cfg, char *weights, int clear) +{ + network *net = parse_network_cfg(cfg); + if(weights && weights[0] != 0){ + load_weights(net, weights); + } + if(clear) (*net->seen) = 0; + return net; +} + +size_t get_current_batch(network *net) +{ + size_t batch_num = (*net->seen)/(net->batch*net->subdivisions); + return batch_num; +} + +void reset_network_state(network *net, int b) +{ + int i; + for (i = 0; i < net->n; ++i) { + #ifdef GPU + layer l = net->layers[i]; + if(l.state_gpu){ + fill_gpu(l.outputs, 0, l.state_gpu + l.outputs*b, 1); + } + if(l.h_gpu){ + fill_gpu(l.outputs, 0, l.h_gpu + l.outputs*b, 1); + } + #endif + } +} + +void reset_rnn(network *net) +{ + reset_network_state(net, 0); +} + +float get_current_rate(network *net) +{ + size_t batch_num = get_current_batch(net); + int i; + float rate; + if (batch_num < net->burn_in) return net->learning_rate * pow((float)batch_num / net->burn_in, net->power); + switch (net->policy) { + case CONSTANT: + return net->learning_rate; + case STEP: + return net->learning_rate * pow(net->scale, batch_num/net->step); + case STEPS: + rate = net->learning_rate; + for(i = 0; i < net->num_steps; ++i){ + if(net->steps[i] > batch_num) return rate; + rate *= net->scales[i]; + } + return rate; + case EXP: + return net->learning_rate * pow(net->gamma, batch_num); + case POLY: + return net->learning_rate * pow(1 - (float)batch_num / net->max_batches, net->power); + case RANDOM: + return net->learning_rate * pow(rand_uniform(0,1), net->power); + case SIG: + return net->learning_rate * (1./(1.+exp(net->gamma*(batch_num - net->step)))); + default: + fprintf(stderr, "Policy is weird!\n"); + return net->learning_rate; + } +} + +char *get_layer_string(LAYER_TYPE a) +{ + switch(a){ + case CONVOLUTIONAL: + return "convolutional"; + case ACTIVE: + return "activation"; + case LOCAL: + return "local"; + case DECONVOLUTIONAL: + return "deconvolutional"; + case CONNECTED: + return "connected"; + case RNN: + return "rnn"; + case GRU: + return "gru"; + case LSTM: + return "lstm"; + case CRNN: + return "crnn"; + case MAXPOOL: + return "maxpool"; + case REORG: + return "reorg"; + case AVGPOOL: + return "avgpool"; + case SOFTMAX: + return "softmax"; + case DETECTION: + return "detection"; + case REGION: + return "region"; + case YOLO: + return "yolo"; + case DROPOUT: + return "dropout"; + case CROP: + return "crop"; + case COST: + return "cost"; + case ROUTE: + return "route"; + case SHORTCUT: + return "shortcut"; + case NORMALIZATION: + return "normalization"; + case BATCHNORM: + return "batchnorm"; + default: + break; + } + return "none"; +} + +network *make_network(int n) +{ + network *net = calloc(1, sizeof(network)); + net->n = n; + net->layers = calloc(net->n, sizeof(layer)); + net->seen = calloc(1, sizeof(size_t)); + net->t = calloc(1, sizeof(int)); + net->cost = calloc(1, sizeof(float)); + return net; +} + +void forward_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + forward_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + for(i = 0; i < net.n; ++i){ + net.index = i; + layer l = net.layers[i]; + if(l.delta){ + fill_cpu(l.outputs * l.batch, 0, l.delta, 1); + } + l.forward(l, net); + net.input = l.output; + if(l.truth) { + net.truth = l.output; + } + } + calc_network_cost(netp); +} + +void update_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + update_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + update_args a = {0}; + a.batch = net.batch*net.subdivisions; + a.learning_rate = get_current_rate(netp); + a.momentum = net.momentum; + a.decay = net.decay; + a.adam = net.adam; + a.B1 = net.B1; + a.B2 = net.B2; + a.eps = net.eps; + ++*net.t; + a.t = *net.t; + + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.update){ + l.update(l, a); + } + } +} + +void calc_network_cost(network *netp) +{ + network net = *netp; + int i; + float sum = 0; + int count = 0; + for(i = 0; i < net.n; ++i){ + if(net.layers[i].cost){ + sum += net.layers[i].cost[0]; + ++count; + } + } + *net.cost = sum/count; +} + +int get_predicted_class_network(network *net) +{ + return max_index(net->output, net->outputs); +} + +void backward_network(network *netp) +{ +#ifdef GPU + if(netp->gpu_index >= 0){ + backward_network_gpu(netp); + return; + } +#endif + network net = *netp; + int i; + network orig = net; + for(i = net.n-1; i >= 0; --i){ + layer l = net.layers[i]; + if(l.stopbackward) break; + if(i == 0){ + net = orig; + }else{ + layer prev = net.layers[i-1]; + net.input = prev.output; + net.delta = prev.delta; + } + net.index = i; + l.backward(l, net); + } +} + +float train_network_datum(network *net) +{ + *net->seen += net->batch; + net->train = 1; + forward_network(net); + backward_network(net); + float error = *net->cost; + if(((*net->seen)/net->batch)%net->subdivisions == 0) update_network(net); + return error; +} + +float train_network_sgd(network *net, data d, int n) +{ + int batch = net->batch; + + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + get_random_batch(d, batch, net->input, net->truth); + float err = train_network_datum(net); + sum += err; + } + return (float)sum/(n*batch); +} + +float train_network(network *net, data d) +{ + assert(d.X.rows % net->batch == 0); + int batch = net->batch; + int n = d.X.rows / batch; + + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + get_next_batch(d, batch, i*batch, net->input, net->truth); + float err = train_network_datum(net); + sum += err; + } + return (float)sum/(n*batch); +} + +void set_temp_network(network *net, float t) +{ + int i; + for(i = 0; i < net->n; ++i){ + net->layers[i].temperature = t; + } +} + + +void set_batch_network(network *net, int b) +{ + net->batch = b; + int i; + for(i = 0; i < net->n; ++i){ + net->layers[i].batch = b; +#ifdef CUDNN + if(net->layers[i].type == CONVOLUTIONAL){ + cudnn_convolutional_setup(net->layers + i); + } + if(net->layers[i].type == DECONVOLUTIONAL){ + layer *l = net->layers + i; + cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, l->out_h, l->out_w); + cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); + } +#endif + } +} + +int resize_network(network *net, int w, int h) +{ +#ifdef GPU + cuda_set_device(net->gpu_index); + cuda_free(net->workspace); +#endif + int i; + //if(w == net->w && h == net->h) return 0; + net->w = w; + net->h = h; + int inputs = 0; + size_t workspace_size = 0; + //fprintf(stderr, "Resizing to %d x %d...\n", w, h); + //fflush(stderr); + for (i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + resize_convolutional_layer(&l, w, h); + }else if(l.type == CROP){ + resize_crop_layer(&l, w, h); + }else if(l.type == MAXPOOL){ + resize_maxpool_layer(&l, w, h); + }else if(l.type == REGION){ + resize_region_layer(&l, w, h); + }else if(l.type == YOLO){ + resize_yolo_layer(&l, w, h); + }else if(l.type == ROUTE){ + resize_route_layer(&l, net); + }else if(l.type == SHORTCUT){ + resize_shortcut_layer(&l, w, h); + }else if(l.type == UPSAMPLE){ + resize_upsample_layer(&l, w, h); + }else if(l.type == REORG){ + resize_reorg_layer(&l, w, h); + }else if(l.type == AVGPOOL){ + resize_avgpool_layer(&l, w, h); + }else if(l.type == NORMALIZATION){ + resize_normalization_layer(&l, w, h); + }else if(l.type == COST){ + resize_cost_layer(&l, inputs); + }else{ + error("Cannot resize this type of layer"); + } + if(l.workspace_size > workspace_size) workspace_size = l.workspace_size; + if(l.workspace_size > 2000000000) assert(0); + inputs = l.outputs; + net->layers[i] = l; + w = l.out_w; + h = l.out_h; + if(l.type == AVGPOOL) break; + } + layer out = get_network_output_layer(net); + net->inputs = net->layers[0].inputs; + net->outputs = out.outputs; + net->truths = out.outputs; + if(net->layers[net->n-1].truths) net->truths = net->layers[net->n-1].truths; + net->output = out.output; + free(net->input); + free(net->truth); + net->input = calloc(net->inputs*net->batch, sizeof(float)); + net->truth = calloc(net->truths*net->batch, sizeof(float)); +#ifdef GPU + if(gpu_index >= 0){ + cuda_free(net->input_gpu); + cuda_free(net->truth_gpu); + net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch); + net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch); + if(workspace_size){ + net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1); + } + }else { + free(net->workspace); + net->workspace = calloc(1, workspace_size); + } +#else + free(net->workspace); + net->workspace = calloc(1, workspace_size); +#endif + //fprintf(stderr, " Done!\n"); + return 0; +} + +layer get_network_detection_layer(network *net) +{ + int i; + for(i = 0; i < net->n; ++i){ + if(net->layers[i].type == DETECTION){ + return net->layers[i]; + } + } + fprintf(stderr, "Detection layer not found!!\n"); + layer l = {0}; + return l; +} + +image get_network_image_layer(network *net, int i) +{ + layer l = net->layers[i]; +#ifdef GPU + //cuda_pull_array(l.output_gpu, l.output, l.outputs); +#endif + if (l.out_w && l.out_h && l.out_c){ + return float_to_image(l.out_w, l.out_h, l.out_c, l.output); + } + image def = {0}; + return def; +} + +image get_network_image(network *net) +{ + int i; + for(i = net->n-1; i >= 0; --i){ + image m = get_network_image_layer(net, i); + if(m.h != 0) return m; + } + image def = {0}; + return def; +} + +void visualize_network(network *net) +{ + image *prev = 0; + int i; + char buff[256]; + for(i = 0; i < net->n; ++i){ + sprintf(buff, "Layer %d", i); + layer l = net->layers[i]; + if(l.type == CONVOLUTIONAL){ + prev = visualize_convolutional_layer(l, buff, prev); + } + } +} + +void top_predictions(network *net, int k, int *index) +{ + top_k(net->output, net->outputs, k, index); +} + + +float *network_predict(network *net, float *input) +{ + network orig = *net; + net->input = input; + net->truth = 0; + net->train = 0; + net->delta = 0; + forward_network(net); + float *out = net->output; + *net = orig; + return out; +} + +int num_detections(network *net, float thresh) +{ + int i; + int s = 0; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + if(l.type == YOLO){ + s += yolo_num_detections(l, thresh); + } + if(l.type == DETECTION || l.type == REGION){ + s += l.w*l.h*l.n; + } + } + return s; +} + +detection *make_network_boxes(network *net, float thresh, int *num) +{ + layer l = net->layers[net->n - 1]; + int i; + int nboxes = num_detections(net, thresh); + if(num) *num = nboxes; + detection *dets = calloc(nboxes, sizeof(detection)); + for(i = 0; i < nboxes; ++i){ + dets[i].prob = calloc(l.classes, sizeof(float)); + if(l.coords > 4){ + dets[i].mask = calloc(l.coords-4, sizeof(float)); + } + } + return dets; +} + +void fill_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets) +{ + int j; + for(j = 0; j < net->n; ++j){ + layer l = net->layers[j]; + if(l.type == YOLO){ + int count = get_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets); + dets += count; + } + if(l.type == REGION){ + get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets); + dets += l.w*l.h*l.n; + } + if(l.type == DETECTION){ + get_detection_detections(l, w, h, thresh, dets); + dets += l.w*l.h*l.n; + } + } +} + +detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num) +{ + detection *dets = make_network_boxes(net, thresh, num); + fill_network_boxes(net, w, h, thresh, hier, map, relative, dets); + return dets; +} + +void free_detections(detection *dets, int n) +{ + int i; + for(i = 0; i < n; ++i){ + free(dets[i].prob); + if(dets[i].mask) free(dets[i].mask); + } + free(dets); +} + +float *network_predict_image(network *net, image im) +{ + image imr = letterbox_image(im, net->w, net->h); + set_batch_network(net, 1); + float *p = network_predict(net, imr.data); + free_image(imr); + return p; +} + +int network_width(network *net){return net->w;} +int network_height(network *net){return net->h;} + +matrix network_predict_data_multi(network *net, data test, int n) +{ + int i,j,b,m; + int k = net->outputs; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.rows, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + } + for(m = 0; m < n; ++m){ + float *out = network_predict(net, X); + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + for(j = 0; j < k; ++j){ + pred.vals[i+b][j] += out[j+b*k]/n; + } + } + } + } + free(X); + return pred; +} + +matrix network_predict_data(network *net, data test) +{ + int i,j,b; + int k = net->outputs; + matrix pred = make_matrix(test.X.rows, k); + float *X = calloc(net->batch*test.X.cols, sizeof(float)); + for(i = 0; i < test.X.rows; i += net->batch){ + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); + } + float *out = network_predict(net, X); + for(b = 0; b < net->batch; ++b){ + if(i+b == test.X.rows) break; + for(j = 0; j < k; ++j){ + pred.vals[i+b][j] = out[j+b*k]; + } + } + } + free(X); + return pred; +} + +void print_network(network *net) +{ + int i,j; + for(i = 0; i < net->n; ++i){ + layer l = net->layers[i]; + float *output = l.output; + int n = l.outputs; + float mean = mean_array(output, n); + float vari = variance_array(output, n); + fprintf(stderr, "Layer %d - Mean: %f, Variance: %f\n",i,mean, vari); + if(n > 100) n = 100; + for(j = 0; j < n; ++j) fprintf(stderr, "%f, ", output[j]); + if(n == 100)fprintf(stderr,".....\n"); + fprintf(stderr, "\n"); + } +} + +void compare_networks(network *n1, network *n2, data test) +{ + matrix g1 = network_predict_data(n1, test); + matrix g2 = network_predict_data(n2, test); + int i; + int a,b,c,d; + a = b = c = d = 0; + for(i = 0; i < g1.rows; ++i){ + int truth = max_index(test.y.vals[i], test.y.cols); + int p1 = max_index(g1.vals[i], g1.cols); + int p2 = max_index(g2.vals[i], g2.cols); + if(p1 == truth){ + if(p2 == truth) ++d; + else ++c; + }else{ + if(p2 == truth) ++b; + else ++a; + } + } + printf("%5d %5d\n%5d %5d\n", a, b, c, d); + float num = pow((abs(b - c) - 1.), 2.); + float den = b + c; + printf("%f\n", num/den); +} + +float network_accuracy(network *net, data d) +{ + matrix guess = network_predict_data(net, d); + float acc = matrix_topk_accuracy(d.y, guess,1); + free_matrix(guess); + return acc; +} + +float *network_accuracies(network *net, data d, int n) +{ + static float acc[2]; + matrix guess = network_predict_data(net, d); + acc[0] = matrix_topk_accuracy(d.y, guess, 1); + acc[1] = matrix_topk_accuracy(d.y, guess, n); + free_matrix(guess); + return acc; +} + +layer get_network_output_layer(network *net) +{ + int i; + for(i = net->n - 1; i >= 0; --i){ + if(net->layers[i].type != COST) break; + } + return net->layers[i]; +} + +float network_accuracy_multi(network *net, data d, int n) +{ + matrix guess = network_predict_data_multi(net, d, n); + float acc = matrix_topk_accuracy(d.y, guess,1); + free_matrix(guess); + return acc; +} + +void free_network(network *net) +{ + int i; + for(i = 0; i < net->n; ++i){ + free_layer(net->layers[i]); + } + free(net->layers); + if(net->input) free(net->input); + if(net->truth) free(net->truth); +#ifdef GPU + if(net->input_gpu) cuda_free(net->input_gpu); + if(net->truth_gpu) cuda_free(net->truth_gpu); +#endif + free(net); +} + +// Some day... +// ^ What the hell is this comment for? + + +layer network_output_layer(network *net) +{ + int i; + for(i = net->n - 1; i >= 0; --i){ + if(net->layers[i].type != COST) break; + } + return net->layers[i]; +} + +int network_inputs(network *net) +{ + return net->layers[0].inputs; +} + +int network_outputs(network *net) +{ + return network_output_layer(net).outputs; +} + +float *network_output(network *net) +{ + return network_output_layer(net).output; +} + +#ifdef GPU + +void forward_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + cuda_push_array(net.input_gpu, net.input, net.inputs*net.batch); + if(net.truth){ + cuda_push_array(net.truth_gpu, net.truth, net.truths*net.batch); + } + + int i; + for(i = 0; i < net.n; ++i){ + net.index = i; + layer l = net.layers[i]; + if(l.delta_gpu){ + fill_gpu(l.outputs * l.batch, 0, l.delta_gpu, 1); + } + l.forward_gpu(l, net); + net.input_gpu = l.output_gpu; + net.input = l.output; + if(l.truth) { + net.truth_gpu = l.output_gpu; + net.truth = l.output; + } + } + pull_network_output(netp); + calc_network_cost(netp); +} + +void backward_network_gpu(network *netp) +{ + int i; + network net = *netp; + network orig = net; + cuda_set_device(net.gpu_index); + for(i = net.n-1; i >= 0; --i){ + layer l = net.layers[i]; + if(l.stopbackward) break; + if(i == 0){ + net = orig; + }else{ + layer prev = net.layers[i-1]; + net.input = prev.output; + net.delta = prev.delta; + net.input_gpu = prev.output_gpu; + net.delta_gpu = prev.delta_gpu; + } + net.index = i; + l.backward_gpu(l, net); + } +} + +void update_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + int i; + update_args a = {0}; + a.batch = net.batch*net.subdivisions; + a.learning_rate = get_current_rate(netp); + a.momentum = net.momentum; + a.decay = net.decay; + a.adam = net.adam; + a.B1 = net.B1; + a.B2 = net.B2; + a.eps = net.eps; + ++*net.t; + a.t = (*net.t); + + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.update_gpu){ + l.update_gpu(l, a); + } + } +} + +void harmless_update_network_gpu(network *netp) +{ + network net = *netp; + cuda_set_device(net.gpu_index); + int i; + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.weight_updates_gpu) fill_gpu(l.nweights, 0, l.weight_updates_gpu, 1); + if(l.bias_updates_gpu) fill_gpu(l.nbiases, 0, l.bias_updates_gpu, 1); + if(l.scale_updates_gpu) fill_gpu(l.nbiases, 0, l.scale_updates_gpu, 1); + } +} + +typedef struct { + network *net; + data d; + float *err; +} train_args; + +void *train_thread(void *ptr) +{ + train_args args = *(train_args*)ptr; + free(ptr); + cuda_set_device(args.net->gpu_index); + *args.err = train_network(args.net, args.d); + return 0; +} + +pthread_t train_network_in_thread(network *net, data d, float *err) +{ + pthread_t thread; + train_args *ptr = (train_args *)calloc(1, sizeof(train_args)); + ptr->net = net; + ptr->d = d; + ptr->err = err; + if(pthread_create(&thread, 0, train_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void merge_weights(layer l, layer base) +{ + if (l.type == CONVOLUTIONAL) { + axpy_cpu(l.n, 1, l.bias_updates, 1, base.biases, 1); + axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weights, 1); + if (l.scales) { + axpy_cpu(l.n, 1, l.scale_updates, 1, base.scales, 1); + } + } else if(l.type == CONNECTED) { + axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.biases, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weights, 1); + } +} + +void scale_weights(layer l, float s) +{ + if (l.type == CONVOLUTIONAL) { + scal_cpu(l.n, s, l.biases, 1); + scal_cpu(l.nweights, s, l.weights, 1); + if (l.scales) { + scal_cpu(l.n, s, l.scales, 1); + } + } else if(l.type == CONNECTED) { + scal_cpu(l.outputs, s, l.biases, 1); + scal_cpu(l.outputs*l.inputs, s, l.weights, 1); + } +} + + +void pull_weights(layer l) +{ + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_pull_array(l.biases_gpu, l.bias_updates, l.n); + cuda_pull_array(l.weights_gpu, l.weight_updates, l.nweights); + if(l.scales) cuda_pull_array(l.scales_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_pull_array(l.biases_gpu, l.bias_updates, l.outputs); + cuda_pull_array(l.weights_gpu, l.weight_updates, l.outputs*l.inputs); + } +} + +void push_weights(layer l) +{ + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_push_array(l.biases_gpu, l.biases, l.n); + cuda_push_array(l.weights_gpu, l.weights, l.nweights); + if(l.scales) cuda_push_array(l.scales_gpu, l.scales, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.biases_gpu, l.biases, l.outputs); + cuda_push_array(l.weights_gpu, l.weights, l.outputs*l.inputs); + } +} + +void distribute_weights(layer l, layer base) +{ + if (l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL) { + cuda_push_array(l.biases_gpu, base.biases, l.n); + cuda_push_array(l.weights_gpu, base.weights, l.nweights); + if (base.scales) cuda_push_array(l.scales_gpu, base.scales, l.n); + } else if (l.type == CONNECTED) { + cuda_push_array(l.biases_gpu, base.biases, l.outputs); + cuda_push_array(l.weights_gpu, base.weights, l.outputs*l.inputs); + } +} + + +/* + + void pull_updates(layer l) + { + if(l.type == CONVOLUTIONAL){ + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + if(l.scale_updates) cuda_pull_array(l.scale_updates_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs); + } + } + + void push_updates(layer l) + { + if(l.type == CONVOLUTIONAL){ + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + if(l.scale_updates) cuda_push_array(l.scale_updates_gpu, l.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs); + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs); + } + } + + void update_layer(layer l, network net) + { + int update_batch = net.batch*net.subdivisions; + float rate = get_current_rate(net); + l.t = get_current_batch(net); + if(l.update_gpu){ + l.update_gpu(l, update_batch, rate*l.learning_rate_scale, net.momentum, net.decay); + } + } + void merge_updates(layer l, layer base) + { + if (l.type == CONVOLUTIONAL) { + axpy_cpu(l.n, 1, l.bias_updates, 1, base.bias_updates, 1); + axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weight_updates, 1); + if (l.scale_updates) { + axpy_cpu(l.n, 1, l.scale_updates, 1, base.scale_updates, 1); + } + } else if(l.type == CONNECTED) { + axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.bias_updates, 1); + axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weight_updates, 1); + } + } + + void distribute_updates(layer l, layer base) + { + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.n); + cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.nweights); + if(base.scale_updates) cuda_push_array(l.scale_updates_gpu, base.scale_updates, l.n); + } else if(l.type == CONNECTED){ + cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.outputs); + cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.outputs*l.inputs); + } + } + */ + +/* + void sync_layer(network *nets, int n, int j) + { + int i; + network net = nets[0]; + layer base = net.layers[j]; + scale_weights(base, 0); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i].gpu_index); + layer l = nets[i].layers[j]; + pull_weights(l); + merge_weights(l, base); + } + scale_weights(base, 1./n); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i].gpu_index); + layer l = nets[i].layers[j]; + distribute_weights(l, base); + } + } + */ + +void sync_layer(network **nets, int n, int j) +{ + int i; + network *net = nets[0]; + layer base = net->layers[j]; + scale_weights(base, 0); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i]->gpu_index); + layer l = nets[i]->layers[j]; + pull_weights(l); + merge_weights(l, base); + } + scale_weights(base, 1./n); + for (i = 0; i < n; ++i) { + cuda_set_device(nets[i]->gpu_index); + layer l = nets[i]->layers[j]; + distribute_weights(l, base); + } +} + +typedef struct{ + network **nets; + int n; + int j; +} sync_args; + +void *sync_layer_thread(void *ptr) +{ + sync_args args = *(sync_args*)ptr; + sync_layer(args.nets, args.n, args.j); + free(ptr); + return 0; +} + +pthread_t sync_layer_in_thread(network **nets, int n, int j) +{ + pthread_t thread; + sync_args *ptr = (sync_args *)calloc(1, sizeof(sync_args)); + ptr->nets = nets; + ptr->n = n; + ptr->j = j; + if(pthread_create(&thread, 0, sync_layer_thread, ptr)) error("Thread creation failed"); + return thread; +} + +void sync_nets(network **nets, int n, int interval) +{ + int j; + int layers = nets[0]->n; + pthread_t *threads = (pthread_t *) calloc(layers, sizeof(pthread_t)); + + *(nets[0]->seen) += interval * (n-1) * nets[0]->batch * nets[0]->subdivisions; + for (j = 0; j < n; ++j){ + *(nets[j]->seen) = *(nets[0]->seen); + } + for (j = 0; j < layers; ++j) { + threads[j] = sync_layer_in_thread(nets, n, j); + } + for (j = 0; j < layers; ++j) { + pthread_join(threads[j], 0); + } + free(threads); +} + +float train_networks(network **nets, int n, data d, int interval) +{ + int i; + int batch = nets[0]->batch; + int subdivisions = nets[0]->subdivisions; + assert(batch * subdivisions * n == d.X.rows); + pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t)); + float *errors = (float *) calloc(n, sizeof(float)); + + float sum = 0; + for(i = 0; i < n; ++i){ + data p = get_data_part(d, i, n); + threads[i] = train_network_in_thread(nets[i], p, errors + i); + } + for(i = 0; i < n; ++i){ + pthread_join(threads[i], 0); + //printf("%f\n", errors[i]); + sum += errors[i]; + } + //cudaDeviceSynchronize(); + if (get_current_batch(nets[0]) % interval == 0) { + printf("Syncing... "); + fflush(stdout); + sync_nets(nets, n, interval); + printf("Done!\n"); + } + //cudaDeviceSynchronize(); + free(threads); + free(errors); + return (float)sum/(n); +} + +void pull_network_output(network *net) +{ + layer l = get_network_output_layer(net); + cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); +} + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/network.h b/workloads/realworld/uvm_prefetch_async/darknet/src/network.h new file mode 100644 index 0000000000000000000000000000000000000000..1b0dfd1aaa3e090c6ce276d26f24d127de2cb66d --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/network.h @@ -0,0 +1,29 @@ +// Oh boy, why am I about to do this.... +#ifndef NETWORK_H +#define NETWORK_H +#include "darknet.h" + +#include "image.h" +#include "layer.h" +#include "data.h" +#include "tree.h" + + +#ifdef GPU +void pull_network_output(network *net); +#endif + +void compare_networks(network *n1, network *n2, data d); +char *get_layer_string(LAYER_TYPE a); + +network *make_network(int n); + + +float network_accuracy_multi(network *net, data d, int n); +int get_predicted_class_network(network *net); +void print_network(network *net); +int resize_network(network *net, int w, int h); +void calc_network_cost(network *net); + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/normalization_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/normalization_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..424714fe8653f79b57fd4cde625997749d8eff83 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/normalization_layer.c @@ -0,0 +1,151 @@ +#include "normalization_layer.h" +#include "blas.h" + +#include + +layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa) +{ + fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size); + layer layer = {0}; + layer.type = NORMALIZATION; + layer.batch = batch; + layer.h = layer.out_h = h; + layer.w = layer.out_w = w; + layer.c = layer.out_c = c; + layer.kappa = kappa; + layer.size = size; + layer.alpha = alpha; + layer.beta = beta; + layer.output = calloc(h * w * c * batch, sizeof(float)); + layer.delta = calloc(h * w * c * batch, sizeof(float)); + layer.squared = calloc(h * w * c * batch, sizeof(float)); + layer.norms = calloc(h * w * c * batch, sizeof(float)); + layer.inputs = w*h*c; + layer.outputs = layer.inputs; + + layer.forward = forward_normalization_layer; + layer.backward = backward_normalization_layer; + #ifdef GPU + layer.forward_gpu = forward_normalization_layer_gpu; + layer.backward_gpu = backward_normalization_layer_gpu; + + layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch); + layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch); + layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch); + layer.norms_gpu = cuda_make_array(layer.norms, h * w * c * batch); + #endif + return layer; +} + +void resize_normalization_layer(layer *layer, int w, int h) +{ + int c = layer->c; + int batch = layer->batch; + layer->h = h; + layer->w = w; + layer->out_h = h; + layer->out_w = w; + layer->inputs = w*h*c; + layer->outputs = layer->inputs; + layer->output = realloc(layer->output, h * w * c * batch * sizeof(float)); + layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float)); + layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float)); + layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float)); +#ifdef GPU + cuda_free(layer->output_gpu); + cuda_free(layer->delta_gpu); + cuda_free(layer->squared_gpu); + cuda_free(layer->norms_gpu); + layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch); + layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch); + layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch); + layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch); +#endif +} + +void forward_normalization_layer(const layer layer, network net) +{ + int k,b; + int w = layer.w; + int h = layer.h; + int c = layer.c; + scal_cpu(w*h*c*layer.batch, 0, layer.squared, 1); + + for(b = 0; b < layer.batch; ++b){ + float *squared = layer.squared + w*h*c*b; + float *norms = layer.norms + w*h*c*b; + float *input = net.input + w*h*c*b; + pow_cpu(w*h*c, 2, input, 1, squared, 1); + + const_cpu(w*h, layer.kappa, norms, 1); + for(k = 0; k < layer.size/2; ++k){ + axpy_cpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); + } + + for(k = 1; k < layer.c; ++k){ + copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); + int prev = k - ((layer.size-1)/2) - 1; + int next = k + (layer.size/2); + if(prev >= 0) axpy_cpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); + if(next < layer.c) axpy_cpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); + } + } + pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1); + mul_cpu(w*h*c*layer.batch, net.input, 1, layer.output, 1); +} + +void backward_normalization_layer(const layer layer, network net) +{ + // TODO This is approximate ;-) + // Also this should add in to delta instead of overwritting. + + int w = layer.w; + int h = layer.h; + int c = layer.c; + pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, net.delta, 1); + mul_cpu(w*h*c*layer.batch, layer.delta, 1, net.delta, 1); +} + +#ifdef GPU +void forward_normalization_layer_gpu(const layer layer, network net) +{ + int k,b; + int w = layer.w; + int h = layer.h; + int c = layer.c; + scal_gpu(w*h*c*layer.batch, 0, layer.squared_gpu, 1); + + for(b = 0; b < layer.batch; ++b){ + float *squared = layer.squared_gpu + w*h*c*b; + float *norms = layer.norms_gpu + w*h*c*b; + float *input = net.input_gpu + w*h*c*b; + pow_gpu(w*h*c, 2, input, 1, squared, 1); + + const_gpu(w*h, layer.kappa, norms, 1); + for(k = 0; k < layer.size/2; ++k){ + axpy_gpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); + } + + for(k = 1; k < layer.c; ++k){ + copy_gpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); + int prev = k - ((layer.size-1)/2) - 1; + int next = k + (layer.size/2); + if(prev >= 0) axpy_gpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); + if(next < layer.c) axpy_gpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); + } + } + pow_gpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1); + mul_gpu(w*h*c*layer.batch, net.input_gpu, 1, layer.output_gpu, 1); +} + +void backward_normalization_layer_gpu(const layer layer, network net) +{ + // TODO This is approximate ;-) + + int w = layer.w; + int h = layer.h; + int c = layer.c; + pow_gpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, net.delta_gpu, 1); + mul_gpu(w*h*c*layer.batch, layer.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/normalization_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/normalization_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..665baa5066282335b6625618ce07c2fcc833d952 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/normalization_layer.h @@ -0,0 +1,19 @@ +#ifndef NORMALIZATION_LAYER_H +#define NORMALIZATION_LAYER_H + +#include "image.h" +#include "layer.h" +#include "network.h" + +layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa); +void resize_normalization_layer(layer *layer, int h, int w); +void forward_normalization_layer(const layer layer, network net); +void backward_normalization_layer(const layer layer, network net); +void visualize_normalization_layer(layer layer, char *window); + +#ifdef GPU +void forward_normalization_layer_gpu(const layer layer, network net); +void backward_normalization_layer_gpu(const layer layer, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/option_list.c b/workloads/realworld/uvm_prefetch_async/darknet/src/option_list.c new file mode 100644 index 0000000000000000000000000000000000000000..2f52781f8096fecc5e9d1db3cfbfa10685506b93 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/option_list.c @@ -0,0 +1,140 @@ +#include +#include +#include +#include "option_list.h" +#include "utils.h" + +list *read_data_cfg(char *filename) +{ + FILE *file = fopen(filename, "r"); + if(file == 0) file_error(filename); + char *line; + int nu = 0; + list *options = make_list(); + while((line=fgetl(file)) != 0){ + ++ nu; + strip(line); + switch(line[0]){ + case '\0': + case '#': + case ';': + free(line); + break; + default: + if(!read_option(line, options)){ + fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); + free(line); + } + break; + } + } + fclose(file); + return options; +} + +metadata get_metadata(char *file) +{ + metadata m = {0}; + list *options = read_data_cfg(file); + + char *name_list = option_find_str(options, "names", 0); + if(!name_list) name_list = option_find_str(options, "labels", 0); + if(!name_list) { + fprintf(stderr, "No names or labels found\n"); + } else { + m.names = get_labels(name_list); + } + m.classes = option_find_int(options, "classes", 2); + free_list(options); + return m; +} + +int read_option(char *s, list *options) +{ + size_t i; + size_t len = strlen(s); + char *val = 0; + for(i = 0; i < len; ++i){ + if(s[i] == '='){ + s[i] = '\0'; + val = s+i+1; + break; + } + } + if(i == len-1) return 0; + char *key = s; + option_insert(options, key, val); + return 1; +} + +void option_insert(list *l, char *key, char *val) +{ + kvp *p = malloc(sizeof(kvp)); + p->key = key; + p->val = val; + p->used = 0; + list_insert(l, p); +} + +void option_unused(list *l) +{ + node *n = l->front; + while(n){ + kvp *p = (kvp *)n->val; + if(!p->used){ + fprintf(stderr, "Unused field: '%s = %s'\n", p->key, p->val); + } + n = n->next; + } +} + +char *option_find(list *l, char *key) +{ + node *n = l->front; + while(n){ + kvp *p = (kvp *)n->val; + if(strcmp(p->key, key) == 0){ + p->used = 1; + return p->val; + } + n = n->next; + } + return 0; +} +char *option_find_str(list *l, char *key, char *def) +{ + char *v = option_find(l, key); + if(v) return v; + if(def) fprintf(stderr, "%s: Using default '%s'\n", key, def); + return def; +} + +int option_find_int(list *l, char *key, int def) +{ + char *v = option_find(l, key); + if(v) return atoi(v); + fprintf(stderr, "%s: Using default '%d'\n", key, def); + return def; +} + +int option_find_int_quiet(list *l, char *key, int def) +{ + char *v = option_find(l, key); + if(v) return atoi(v); + return def; +} + +float option_find_float_quiet(list *l, char *key, float def) +{ + char *v = option_find(l, key); + if(v) return atof(v); + return def; +} + +float option_find_float(list *l, char *key, float def) +{ + char *v = option_find(l, key); + if(v) return atof(v); + fprintf(stderr, "%s: Using default '%lf'\n", key, def); + return def; +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/option_list.h b/workloads/realworld/uvm_prefetch_async/darknet/src/option_list.h new file mode 100644 index 0000000000000000000000000000000000000000..844bd8724b77889d9ab6e6e70f62305e3339048c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/option_list.h @@ -0,0 +1,19 @@ +#ifndef OPTION_LIST_H +#define OPTION_LIST_H +#include "list.h" + +typedef struct{ + char *key; + char *val; + int used; +} kvp; + + +int read_option(char *s, list *options); +void option_insert(list *l, char *key, char *val); +char *option_find(list *l, char *key); +float option_find_float(list *l, char *key, float def); +float option_find_float_quiet(list *l, char *key, float def); +void option_unused(list *l); + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/parser.c b/workloads/realworld/uvm_prefetch_async/darknet/src/parser.c new file mode 100644 index 0000000000000000000000000000000000000000..c8141c9f2ddc95941900d11006ff583fadf22290 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/parser.c @@ -0,0 +1,1312 @@ +#include +#include +#include +#include + +#include "activation_layer.h" +#include "logistic_layer.h" +#include "l2norm_layer.h" +#include "activations.h" +#include "avgpool_layer.h" +#include "batchnorm_layer.h" +#include "blas.h" +#include "connected_layer.h" +#include "deconvolutional_layer.h" +#include "convolutional_layer.h" +#include "cost_layer.h" +#include "crnn_layer.h" +#include "crop_layer.h" +#include "detection_layer.h" +#include "dropout_layer.h" +#include "gru_layer.h" +#include "list.h" +#include "local_layer.h" +#include "maxpool_layer.h" +#include "normalization_layer.h" +#include "option_list.h" +#include "parser.h" +#include "region_layer.h" +#include "yolo_layer.h" +#include "iseg_layer.h" +#include "reorg_layer.h" +#include "rnn_layer.h" +#include "route_layer.h" +#include "upsample_layer.h" +#include "shortcut_layer.h" +#include "softmax_layer.h" +#include "lstm_layer.h" +#include "utils.h" + +typedef struct{ + char *type; + list *options; +}section; + +list *read_cfg(char *filename); + +LAYER_TYPE string_to_layer_type(char * type) +{ + + if (strcmp(type, "[shortcut]")==0) return SHORTCUT; + if (strcmp(type, "[crop]")==0) return CROP; + if (strcmp(type, "[cost]")==0) return COST; + if (strcmp(type, "[detection]")==0) return DETECTION; + if (strcmp(type, "[region]")==0) return REGION; + if (strcmp(type, "[yolo]")==0) return YOLO; + if (strcmp(type, "[iseg]")==0) return ISEG; + if (strcmp(type, "[local]")==0) return LOCAL; + if (strcmp(type, "[conv]")==0 + || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL; + if (strcmp(type, "[deconv]")==0 + || strcmp(type, "[deconvolutional]")==0) return DECONVOLUTIONAL; + if (strcmp(type, "[activation]")==0) return ACTIVE; + if (strcmp(type, "[logistic]")==0) return LOGXENT; + if (strcmp(type, "[l2norm]")==0) return L2NORM; + if (strcmp(type, "[net]")==0 + || strcmp(type, "[network]")==0) return NETWORK; + if (strcmp(type, "[crnn]")==0) return CRNN; + if (strcmp(type, "[gru]")==0) return GRU; + if (strcmp(type, "[lstm]") == 0) return LSTM; + if (strcmp(type, "[rnn]")==0) return RNN; + if (strcmp(type, "[conn]")==0 + || strcmp(type, "[connected]")==0) return CONNECTED; + if (strcmp(type, "[max]")==0 + || strcmp(type, "[maxpool]")==0) return MAXPOOL; + if (strcmp(type, "[reorg]")==0) return REORG; + if (strcmp(type, "[avg]")==0 + || strcmp(type, "[avgpool]")==0) return AVGPOOL; + if (strcmp(type, "[dropout]")==0) return DROPOUT; + if (strcmp(type, "[lrn]")==0 + || strcmp(type, "[normalization]")==0) return NORMALIZATION; + if (strcmp(type, "[batchnorm]")==0) return BATCHNORM; + if (strcmp(type, "[soft]")==0 + || strcmp(type, "[softmax]")==0) return SOFTMAX; + if (strcmp(type, "[route]")==0) return ROUTE; + if (strcmp(type, "[upsample]")==0) return UPSAMPLE; + return BLANK; +} + +void free_section(section *s) +{ + free(s->type); + node *n = s->options->front; + while(n){ + kvp *pair = (kvp *)n->val; + free(pair->key); + free(pair); + node *next = n->next; + free(n); + n = next; + } + free(s->options); + free(s); +} + +void parse_data(char *data, float *a, int n) +{ + int i; + if(!data) return; + char *curr = data; + char *next = data; + int done = 0; + for(i = 0; i < n && !done; ++i){ + while(*++next !='\0' && *next != ','); + if(*next == '\0') done = 1; + *next = '\0'; + sscanf(curr, "%g", &a[i]); + curr = next+1; + } +} + +typedef struct size_params{ + int batch; + int inputs; + int h; + int w; + int c; + int index; + int time_steps; + network *net; +} size_params; + +local_layer parse_local(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + int pad = option_find_int(options, "pad",0); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before local layer must output image."); + + local_layer layer = make_local_layer(batch,h,w,c,n,size,stride,pad,activation); + + return layer; +} + +layer parse_deconvolutional(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before deconvolutional layer must output image."); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + int pad = option_find_int_quiet(options, "pad",0); + int padding = option_find_int_quiet(options, "padding",0); + if(pad) padding = size/2; + + layer l = make_deconvolutional_layer(batch,h,w,c,n,size,stride,padding, activation, batch_normalize, params.net->adam); + + return l; +} + + +convolutional_layer parse_convolutional(list *options, size_params params) +{ + int n = option_find_int(options, "filters",1); + int size = option_find_int(options, "size",1); + int stride = option_find_int(options, "stride",1); + int pad = option_find_int_quiet(options, "pad",0); + int padding = option_find_int_quiet(options, "padding",0); + int groups = option_find_int_quiet(options, "groups", 1); + if(pad) padding = size/2; + + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before convolutional layer must output image."); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + int binary = option_find_int_quiet(options, "binary", 0); + int xnor = option_find_int_quiet(options, "xnor", 0); + + convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,groups,size,stride,padding,activation, batch_normalize, binary, xnor, params.net->adam); + layer.flipped = option_find_int_quiet(options, "flipped", 0); + layer.dot = option_find_float_quiet(options, "dot", 0); + + return layer; +} + +layer parse_crnn(list *options, size_params params) +{ + int output_filters = option_find_int(options, "output_filters",1); + int hidden_filters = option_find_int(options, "hidden_filters",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_crnn_layer(params.batch, params.w, params.h, params.c, hidden_filters, output_filters, params.time_steps, activation, batch_normalize); + + l.shortcut = option_find_int_quiet(options, "shortcut", 0); + + return l; +} + +layer parse_rnn(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_rnn_layer(params.batch, params.inputs, output, params.time_steps, activation, batch_normalize, params.net->adam); + + l.shortcut = option_find_int_quiet(options, "shortcut", 0); + + return l; +} + +layer parse_gru(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_gru_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net->adam); + l.tanh = option_find_int_quiet(options, "tanh", 0); + + return l; +} + +layer parse_lstm(list *options, size_params params) +{ + int output = option_find_int(options, "output", 1); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_lstm_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize, params.net->adam); + + return l; +} + +layer parse_connected(list *options, size_params params) +{ + int output = option_find_int(options, "output",1); + char *activation_s = option_find_str(options, "activation", "logistic"); + ACTIVATION activation = get_activation(activation_s); + int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); + + layer l = make_connected_layer(params.batch, params.inputs, output, activation, batch_normalize, params.net->adam); + return l; +} + +layer parse_softmax(list *options, size_params params) +{ + int groups = option_find_int_quiet(options, "groups",1); + layer l = make_softmax_layer(params.batch, params.inputs, groups); + l.temperature = option_find_float_quiet(options, "temperature", 1); + char *tree_file = option_find_str(options, "tree", 0); + if (tree_file) l.softmax_tree = read_tree(tree_file); + l.w = params.w; + l.h = params.h; + l.c = params.c; + l.spatial = option_find_float_quiet(options, "spatial", 0); + l.noloss = option_find_int_quiet(options, "noloss", 0); + return l; +} + +int *parse_yolo_mask(char *a, int *num) +{ + int *mask = 0; + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + mask = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + int val = atoi(a); + mask[i] = val; + a = strchr(a, ',')+1; + } + *num = n; + } + return mask; +} + +layer parse_yolo(list *options, size_params params) +{ + int classes = option_find_int(options, "classes", 20); + int total = option_find_int(options, "num", 1); + int num = total; + + char *a = option_find_str(options, "mask", 0); + int *mask = parse_yolo_mask(a, &num); + layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes); + assert(l.outputs == params.inputs); + + l.max_boxes = option_find_int_quiet(options, "max",90); + l.jitter = option_find_float(options, "jitter", .2); + + l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); + l.truth_thresh = option_find_float(options, "truth_thresh", 1); + l.random = option_find_int_quiet(options, "random", 0); + + char *map_file = option_find_str(options, "map", 0); + if (map_file) l.map = read_map(map_file); + + a = option_find_str(options, "anchors", 0); + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + for(i = 0; i < n; ++i){ + float bias = atof(a); + l.biases[i] = bias; + a = strchr(a, ',')+1; + } + } + return l; +} + +layer parse_iseg(list *options, size_params params) +{ + int classes = option_find_int(options, "classes", 20); + int ids = option_find_int(options, "ids", 32); + layer l = make_iseg_layer(params.batch, params.w, params.h, classes, ids); + assert(l.outputs == params.inputs); + return l; +} + +layer parse_region(list *options, size_params params) +{ + int coords = option_find_int(options, "coords", 4); + int classes = option_find_int(options, "classes", 20); + int num = option_find_int(options, "num", 1); + + layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords); + assert(l.outputs == params.inputs); + + l.log = option_find_int_quiet(options, "log", 0); + l.sqrt = option_find_int_quiet(options, "sqrt", 0); + + l.softmax = option_find_int(options, "softmax", 0); + l.background = option_find_int_quiet(options, "background", 0); + l.max_boxes = option_find_int_quiet(options, "max",30); + l.jitter = option_find_float(options, "jitter", .2); + l.rescore = option_find_int_quiet(options, "rescore",0); + + l.thresh = option_find_float(options, "thresh", .5); + l.classfix = option_find_int_quiet(options, "classfix", 0); + l.absolute = option_find_int_quiet(options, "absolute", 0); + l.random = option_find_int_quiet(options, "random", 0); + + l.coord_scale = option_find_float(options, "coord_scale", 1); + l.object_scale = option_find_float(options, "object_scale", 1); + l.noobject_scale = option_find_float(options, "noobject_scale", 1); + l.mask_scale = option_find_float(options, "mask_scale", 1); + l.class_scale = option_find_float(options, "class_scale", 1); + l.bias_match = option_find_int_quiet(options, "bias_match",0); + + char *tree_file = option_find_str(options, "tree", 0); + if (tree_file) l.softmax_tree = read_tree(tree_file); + char *map_file = option_find_str(options, "map", 0); + if (map_file) l.map = read_map(map_file); + + char *a = option_find_str(options, "anchors", 0); + if(a){ + int len = strlen(a); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (a[i] == ',') ++n; + } + for(i = 0; i < n; ++i){ + float bias = atof(a); + l.biases[i] = bias; + a = strchr(a, ',')+1; + } + } + return l; +} + +detection_layer parse_detection(list *options, size_params params) +{ + int coords = option_find_int(options, "coords", 1); + int classes = option_find_int(options, "classes", 1); + int rescore = option_find_int(options, "rescore", 0); + int num = option_find_int(options, "num", 1); + int side = option_find_int(options, "side", 7); + detection_layer layer = make_detection_layer(params.batch, params.inputs, num, side, classes, coords, rescore); + + layer.softmax = option_find_int(options, "softmax", 0); + layer.sqrt = option_find_int(options, "sqrt", 0); + + layer.max_boxes = option_find_int_quiet(options, "max",90); + layer.coord_scale = option_find_float(options, "coord_scale", 1); + layer.forced = option_find_int(options, "forced", 0); + layer.object_scale = option_find_float(options, "object_scale", 1); + layer.noobject_scale = option_find_float(options, "noobject_scale", 1); + layer.class_scale = option_find_float(options, "class_scale", 1); + layer.jitter = option_find_float(options, "jitter", .2); + layer.random = option_find_int_quiet(options, "random", 0); + layer.reorg = option_find_int_quiet(options, "reorg", 0); + return layer; +} + +cost_layer parse_cost(list *options, size_params params) +{ + char *type_s = option_find_str(options, "type", "sse"); + COST_TYPE type = get_cost_type(type_s); + float scale = option_find_float_quiet(options, "scale",1); + cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale); + layer.ratio = option_find_float_quiet(options, "ratio",0); + layer.noobject_scale = option_find_float_quiet(options, "noobj", 1); + layer.thresh = option_find_float_quiet(options, "thresh",0); + return layer; +} + +crop_layer parse_crop(list *options, size_params params) +{ + int crop_height = option_find_int(options, "crop_height",1); + int crop_width = option_find_int(options, "crop_width",1); + int flip = option_find_int(options, "flip",0); + float angle = option_find_float(options, "angle",0); + float saturation = option_find_float(options, "saturation",1); + float exposure = option_find_float(options, "exposure",1); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before crop layer must output image."); + + int noadjust = option_find_int_quiet(options, "noadjust",0); + + crop_layer l = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip, angle, saturation, exposure); + l.shift = option_find_float(options, "shift", 0); + l.noadjust = noadjust; + return l; +} + +layer parse_reorg(list *options, size_params params) +{ + int stride = option_find_int(options, "stride",1); + int reverse = option_find_int_quiet(options, "reverse",0); + int flatten = option_find_int_quiet(options, "flatten",0); + int extra = option_find_int_quiet(options, "extra",0); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before reorg layer must output image."); + + layer layer = make_reorg_layer(batch,w,h,c,stride,reverse, flatten, extra); + return layer; +} + +maxpool_layer parse_maxpool(list *options, size_params params) +{ + int stride = option_find_int(options, "stride",1); + int size = option_find_int(options, "size",stride); + int padding = option_find_int_quiet(options, "padding", size-1); + + int batch,h,w,c; + h = params.h; + w = params.w; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before maxpool layer must output image."); + + maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride,padding); + return layer; +} + +avgpool_layer parse_avgpool(list *options, size_params params) +{ + int batch,w,h,c; + w = params.w; + h = params.h; + c = params.c; + batch=params.batch; + if(!(h && w && c)) error("Layer before avgpool layer must output image."); + + avgpool_layer layer = make_avgpool_layer(batch,w,h,c); + return layer; +} + +dropout_layer parse_dropout(list *options, size_params params) +{ + float probability = option_find_float(options, "probability", .5); + dropout_layer layer = make_dropout_layer(params.batch, params.inputs, probability); + layer.out_w = params.w; + layer.out_h = params.h; + layer.out_c = params.c; + return layer; +} + +layer parse_normalization(list *options, size_params params) +{ + float alpha = option_find_float(options, "alpha", .0001); + float beta = option_find_float(options, "beta" , .75); + float kappa = option_find_float(options, "kappa", 1); + int size = option_find_int(options, "size", 5); + layer l = make_normalization_layer(params.batch, params.w, params.h, params.c, size, alpha, beta, kappa); + return l; +} + +layer parse_batchnorm(list *options, size_params params) +{ + layer l = make_batchnorm_layer(params.batch, params.w, params.h, params.c); + return l; +} + +layer parse_shortcut(list *options, size_params params, network *net) +{ + char *l = option_find(options, "from"); + int index = atoi(l); + if(index < 0) index = params.index + index; + + int batch = params.batch; + layer from = net->layers[index]; + + layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c); + + char *activation_s = option_find_str(options, "activation", "linear"); + ACTIVATION activation = get_activation(activation_s); + s.activation = activation; + s.alpha = option_find_float_quiet(options, "alpha", 1); + s.beta = option_find_float_quiet(options, "beta", 1); + return s; +} + + +layer parse_l2norm(list *options, size_params params) +{ + layer l = make_l2norm_layer(params.batch, params.inputs); + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + return l; +} + + +layer parse_logistic(list *options, size_params params) +{ + layer l = make_logistic_layer(params.batch, params.inputs); + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + return l; +} + +layer parse_activation(list *options, size_params params) +{ + char *activation_s = option_find_str(options, "activation", "linear"); + ACTIVATION activation = get_activation(activation_s); + + layer l = make_activation_layer(params.batch, params.inputs, activation); + + l.h = l.out_h = params.h; + l.w = l.out_w = params.w; + l.c = l.out_c = params.c; + + return l; +} + +layer parse_upsample(list *options, size_params params, network *net) +{ + + int stride = option_find_int(options, "stride",2); + layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride); + l.scale = option_find_float_quiet(options, "scale", 1); + return l; +} + +route_layer parse_route(list *options, size_params params, network *net) +{ + char *l = option_find(options, "layers"); + int len = strlen(l); + if(!l) error("Route Layer must specify input layers"); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (l[i] == ',') ++n; + } + + int *layers = calloc(n, sizeof(int)); + int *sizes = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + int index = atoi(l); + l = strchr(l, ',')+1; + if(index < 0) index = params.index + index; + layers[i] = index; + sizes[i] = net->layers[index].outputs; + } + int batch = params.batch; + + route_layer layer = make_route_layer(batch, n, layers, sizes); + + convolutional_layer first = net->layers[layers[0]]; + layer.out_w = first.out_w; + layer.out_h = first.out_h; + layer.out_c = first.out_c; + for(i = 1; i < n; ++i){ + int index = layers[i]; + convolutional_layer next = net->layers[index]; + if(next.out_w == first.out_w && next.out_h == first.out_h){ + layer.out_c += next.out_c; + }else{ + layer.out_h = layer.out_w = layer.out_c = 0; + } + } + + return layer; +} + +learning_rate_policy get_policy(char *s) +{ + if (strcmp(s, "random")==0) return RANDOM; + if (strcmp(s, "poly")==0) return POLY; + if (strcmp(s, "constant")==0) return CONSTANT; + if (strcmp(s, "step")==0) return STEP; + if (strcmp(s, "exp")==0) return EXP; + if (strcmp(s, "sigmoid")==0) return SIG; + if (strcmp(s, "steps")==0) return STEPS; + fprintf(stderr, "Couldn't find policy %s, going with constant\n", s); + return CONSTANT; +} + +void parse_net_options(list *options, network *net) +{ + net->batch = option_find_int(options, "batch",1); + net->learning_rate = option_find_float(options, "learning_rate", .001); + net->momentum = option_find_float(options, "momentum", .9); + net->decay = option_find_float(options, "decay", .0001); + int subdivs = option_find_int(options, "subdivisions",1); + net->time_steps = option_find_int_quiet(options, "time_steps",1); + net->notruth = option_find_int_quiet(options, "notruth",0); + net->batch /= subdivs; + net->batch *= net->time_steps; + net->subdivisions = subdivs; + net->random = option_find_int_quiet(options, "random", 0); + + net->adam = option_find_int_quiet(options, "adam", 0); + if(net->adam){ + net->B1 = option_find_float(options, "B1", .9); + net->B2 = option_find_float(options, "B2", .999); + net->eps = option_find_float(options, "eps", .0000001); + } + + net->h = option_find_int_quiet(options, "height",0); + net->w = option_find_int_quiet(options, "width",0); + net->c = option_find_int_quiet(options, "channels",0); + net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c); + net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2); + net->min_crop = option_find_int_quiet(options, "min_crop",net->w); + net->max_ratio = option_find_float_quiet(options, "max_ratio", (float) net->max_crop / net->w); + net->min_ratio = option_find_float_quiet(options, "min_ratio", (float) net->min_crop / net->w); + net->center = option_find_int_quiet(options, "center",0); + net->clip = option_find_float_quiet(options, "clip", 0); + + net->angle = option_find_float_quiet(options, "angle", 0); + net->aspect = option_find_float_quiet(options, "aspect", 1); + net->saturation = option_find_float_quiet(options, "saturation", 1); + net->exposure = option_find_float_quiet(options, "exposure", 1); + net->hue = option_find_float_quiet(options, "hue", 0); + + if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied"); + + char *policy_s = option_find_str(options, "policy", "constant"); + net->policy = get_policy(policy_s); + net->burn_in = option_find_int_quiet(options, "burn_in", 0); + net->power = option_find_float_quiet(options, "power", 4); + if(net->policy == STEP){ + net->step = option_find_int(options, "step", 1); + net->scale = option_find_float(options, "scale", 1); + } else if (net->policy == STEPS){ + char *l = option_find(options, "steps"); + char *p = option_find(options, "scales"); + if(!l || !p) error("STEPS policy must have steps and scales in cfg file"); + + int len = strlen(l); + int n = 1; + int i; + for(i = 0; i < len; ++i){ + if (l[i] == ',') ++n; + } + int *steps = calloc(n, sizeof(int)); + float *scales = calloc(n, sizeof(float)); + for(i = 0; i < n; ++i){ + int step = atoi(l); + float scale = atof(p); + l = strchr(l, ',')+1; + p = strchr(p, ',')+1; + steps[i] = step; + scales[i] = scale; + } + net->scales = scales; + net->steps = steps; + net->num_steps = n; + } else if (net->policy == EXP){ + net->gamma = option_find_float(options, "gamma", 1); + } else if (net->policy == SIG){ + net->gamma = option_find_float(options, "gamma", 1); + net->step = option_find_int(options, "step", 1); + } else if (net->policy == POLY || net->policy == RANDOM){ + } + net->max_batches = option_find_int(options, "max_batches", 0); +} + +int is_network(section *s) +{ + return (strcmp(s->type, "[net]")==0 + || strcmp(s->type, "[network]")==0); +} + +network *parse_network_cfg(char *filename) +{ + list *sections = read_cfg(filename); + node *n = sections->front; + if(!n) error("Config file has no sections"); + network *net = make_network(sections->size - 1); + net->gpu_index = gpu_index; + size_params params; + + section *s = (section *)n->val; + list *options = s->options; + if(!is_network(s)) error("First section must be [net] or [network]"); + parse_net_options(options, net); + + params.h = net->h; + params.w = net->w; + params.c = net->c; + params.inputs = net->inputs; + params.batch = net->batch; + params.time_steps = net->time_steps; + params.net = net; + + size_t workspace_size = 0; + n = n->next; + int count = 0; + free_section(s); + fprintf(stderr, "layer filters size input output\n"); + while(n){ + params.index = count; + fprintf(stderr, "%5d ", count); + s = (section *)n->val; + options = s->options; + layer l = {0}; + LAYER_TYPE lt = string_to_layer_type(s->type); + if(lt == CONVOLUTIONAL){ + l = parse_convolutional(options, params); + }else if(lt == DECONVOLUTIONAL){ + l = parse_deconvolutional(options, params); + }else if(lt == LOCAL){ + l = parse_local(options, params); + }else if(lt == ACTIVE){ + l = parse_activation(options, params); + }else if(lt == LOGXENT){ + l = parse_logistic(options, params); + }else if(lt == L2NORM){ + l = parse_l2norm(options, params); + }else if(lt == RNN){ + l = parse_rnn(options, params); + }else if(lt == GRU){ + l = parse_gru(options, params); + }else if (lt == LSTM) { + l = parse_lstm(options, params); + }else if(lt == CRNN){ + l = parse_crnn(options, params); + }else if(lt == CONNECTED){ + l = parse_connected(options, params); + }else if(lt == CROP){ + l = parse_crop(options, params); + }else if(lt == COST){ + l = parse_cost(options, params); + }else if(lt == REGION){ + l = parse_region(options, params); + }else if(lt == YOLO){ + l = parse_yolo(options, params); + }else if(lt == ISEG){ + l = parse_iseg(options, params); + }else if(lt == DETECTION){ + l = parse_detection(options, params); + }else if(lt == SOFTMAX){ + l = parse_softmax(options, params); + net->hierarchy = l.softmax_tree; + }else if(lt == NORMALIZATION){ + l = parse_normalization(options, params); + }else if(lt == BATCHNORM){ + l = parse_batchnorm(options, params); + }else if(lt == MAXPOOL){ + l = parse_maxpool(options, params); + }else if(lt == REORG){ + l = parse_reorg(options, params); + }else if(lt == AVGPOOL){ + l = parse_avgpool(options, params); + }else if(lt == ROUTE){ + l = parse_route(options, params, net); + }else if(lt == UPSAMPLE){ + l = parse_upsample(options, params, net); + }else if(lt == SHORTCUT){ + l = parse_shortcut(options, params, net); + }else if(lt == DROPOUT){ + l = parse_dropout(options, params); + l.output = net->layers[count-1].output; + l.delta = net->layers[count-1].delta; +#ifdef GPU + l.output_gpu = net->layers[count-1].output_gpu; + l.delta_gpu = net->layers[count-1].delta_gpu; +#endif + }else{ + fprintf(stderr, "Type not recognized: %s\n", s->type); + } + l.clip = net->clip; + l.truth = option_find_int_quiet(options, "truth", 0); + l.onlyforward = option_find_int_quiet(options, "onlyforward", 0); + l.stopbackward = option_find_int_quiet(options, "stopbackward", 0); + l.dontsave = option_find_int_quiet(options, "dontsave", 0); + l.dontload = option_find_int_quiet(options, "dontload", 0); + l.numload = option_find_int_quiet(options, "numload", 0); + l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0); + l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1); + l.smooth = option_find_float_quiet(options, "smooth", 0); + option_unused(options); + net->layers[count] = l; + if (l.workspace_size > workspace_size) workspace_size = l.workspace_size; + free_section(s); + n = n->next; + ++count; + if(n){ + params.h = l.out_h; + params.w = l.out_w; + params.c = l.out_c; + params.inputs = l.outputs; + } + } + free_list(sections); + layer out = get_network_output_layer(net); + net->outputs = out.outputs; + net->truths = out.outputs; + if(net->layers[net->n-1].truths) net->truths = net->layers[net->n-1].truths; + net->output = out.output; + net->input = calloc(net->inputs*net->batch, sizeof(float)); + net->truth = calloc(net->truths*net->batch, sizeof(float)); +#ifdef GPU + net->output_gpu = out.output_gpu; + net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch); + net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch); +#endif + if(workspace_size){ + //printf("%ld\n", workspace_size); +#ifdef GPU + if(gpu_index >= 0){ + net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1); + }else { + net->workspace = calloc(1, workspace_size); + } +#else + net->workspace = calloc(1, workspace_size); +#endif + } + return net; +} + +list *read_cfg(char *filename) +{ + FILE *file = fopen(filename, "r"); + if(file == 0) file_error(filename); + char *line; + int nu = 0; + list *options = make_list(); + section *current = 0; + while((line=fgetl(file)) != 0){ + ++ nu; + strip(line); + switch(line[0]){ + case '[': + current = malloc(sizeof(section)); + list_insert(options, current); + current->options = make_list(); + current->type = line; + break; + case '\0': + case '#': + case ';': + free(line); + break; + default: + if(!read_option(line, current->options)){ + fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); + free(line); + } + break; + } + } + fclose(file); + return options; +} + +void save_convolutional_weights_binary(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_convolutional_layer(l); + } +#endif + binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.binary_weights); + int size = l.c*l.size*l.size; + int i, j, k; + fwrite(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.n, fp); + fwrite(l.rolling_mean, sizeof(float), l.n, fp); + fwrite(l.rolling_variance, sizeof(float), l.n, fp); + } + for(i = 0; i < l.n; ++i){ + float mean = l.binary_weights[i*size]; + if(mean < 0) mean = -mean; + fwrite(&mean, sizeof(float), 1, fp); + for(j = 0; j < size/8; ++j){ + int index = i*size + j*8; + unsigned char c = 0; + for(k = 0; k < 8; ++k){ + if (j*8 + k >= size) break; + if (l.binary_weights[index + k] > 0) c = (c | 1<= 0){ + pull_convolutional_layer(l); + } +#endif + int num = l.nweights; + fwrite(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.n, fp); + fwrite(l.rolling_mean, sizeof(float), l.n, fp); + fwrite(l.rolling_variance, sizeof(float), l.n, fp); + } + fwrite(l.weights, sizeof(float), num, fp); +} + +void save_batchnorm_weights(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_batchnorm_layer(l); + } +#endif + fwrite(l.scales, sizeof(float), l.c, fp); + fwrite(l.rolling_mean, sizeof(float), l.c, fp); + fwrite(l.rolling_variance, sizeof(float), l.c, fp); +} + +void save_connected_weights(layer l, FILE *fp) +{ +#ifdef GPU + if(gpu_index >= 0){ + pull_connected_layer(l); + } +#endif + fwrite(l.biases, sizeof(float), l.outputs, fp); + fwrite(l.weights, sizeof(float), l.outputs*l.inputs, fp); + if (l.batch_normalize){ + fwrite(l.scales, sizeof(float), l.outputs, fp); + fwrite(l.rolling_mean, sizeof(float), l.outputs, fp); + fwrite(l.rolling_variance, sizeof(float), l.outputs, fp); + } +} + +void save_weights_upto(network *net, char *filename, int cutoff) +{ +#ifdef GPU + if(net->gpu_index >= 0){ + cuda_set_device(net->gpu_index); + } +#endif + fprintf(stderr, "Saving weights to %s\n", filename); + FILE *fp = fopen(filename, "wb"); + if(!fp) file_error(filename); + + int major = 0; + int minor = 2; + int revision = 0; + fwrite(&major, sizeof(int), 1, fp); + fwrite(&minor, sizeof(int), 1, fp); + fwrite(&revision, sizeof(int), 1, fp); + fwrite(net->seen, sizeof(size_t), 1, fp); + + int i; + for(i = 0; i < net->n && i < cutoff; ++i){ + layer l = net->layers[i]; + if (l.dontsave) continue; + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + save_convolutional_weights(l, fp); + } if(l.type == CONNECTED){ + save_connected_weights(l, fp); + } if(l.type == BATCHNORM){ + save_batchnorm_weights(l, fp); + } if(l.type == RNN){ + save_connected_weights(*(l.input_layer), fp); + save_connected_weights(*(l.self_layer), fp); + save_connected_weights(*(l.output_layer), fp); + } if (l.type == LSTM) { + save_connected_weights(*(l.wi), fp); + save_connected_weights(*(l.wf), fp); + save_connected_weights(*(l.wo), fp); + save_connected_weights(*(l.wg), fp); + save_connected_weights(*(l.ui), fp); + save_connected_weights(*(l.uf), fp); + save_connected_weights(*(l.uo), fp); + save_connected_weights(*(l.ug), fp); + } if (l.type == GRU) { + if(1){ + save_connected_weights(*(l.wz), fp); + save_connected_weights(*(l.wr), fp); + save_connected_weights(*(l.wh), fp); + save_connected_weights(*(l.uz), fp); + save_connected_weights(*(l.ur), fp); + save_connected_weights(*(l.uh), fp); + }else{ + save_connected_weights(*(l.reset_layer), fp); + save_connected_weights(*(l.update_layer), fp); + save_connected_weights(*(l.state_layer), fp); + } + } if(l.type == CRNN){ + save_convolutional_weights(*(l.input_layer), fp); + save_convolutional_weights(*(l.self_layer), fp); + save_convolutional_weights(*(l.output_layer), fp); + } if(l.type == LOCAL){ +#ifdef GPU + if(gpu_index >= 0){ + pull_local_layer(l); + } +#endif + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + fwrite(l.biases, sizeof(float), l.outputs, fp); + fwrite(l.weights, sizeof(float), size, fp); + } + } + fclose(fp); +} +void save_weights(network *net, char *filename) +{ + save_weights_upto(net, filename, net->n); +} + +void transpose_matrix(float *a, int rows, int cols) +{ + float *transpose = calloc(rows*cols, sizeof(float)); + int x, y; + for(x = 0; x < rows; ++x){ + for(y = 0; y < cols; ++y){ + transpose[y*rows + x] = a[x*cols + y]; + } + } + memcpy(a, transpose, rows*cols*sizeof(float)); + free(transpose); +} + +void load_connected_weights(layer l, FILE *fp, int transpose) +{ + fread(l.biases, sizeof(float), l.outputs, fp); + fread(l.weights, sizeof(float), l.outputs*l.inputs, fp); + if(transpose){ + transpose_matrix(l.weights, l.inputs, l.outputs); + } + //printf("Biases: %f mean %f variance\n", mean_array(l.biases, l.outputs), variance_array(l.biases, l.outputs)); + //printf("Weights: %f mean %f variance\n", mean_array(l.weights, l.outputs*l.inputs), variance_array(l.weights, l.outputs*l.inputs)); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.outputs, fp); + fread(l.rolling_mean, sizeof(float), l.outputs, fp); + fread(l.rolling_variance, sizeof(float), l.outputs, fp); + //printf("Scales: %f mean %f variance\n", mean_array(l.scales, l.outputs), variance_array(l.scales, l.outputs)); + //printf("rolling_mean: %f mean %f variance\n", mean_array(l.rolling_mean, l.outputs), variance_array(l.rolling_mean, l.outputs)); + //printf("rolling_variance: %f mean %f variance\n", mean_array(l.rolling_variance, l.outputs), variance_array(l.rolling_variance, l.outputs)); + } +#ifdef GPU + if(gpu_index >= 0){ + push_connected_layer(l); + } +#endif +} + +void load_batchnorm_weights(layer l, FILE *fp) +{ + fread(l.scales, sizeof(float), l.c, fp); + fread(l.rolling_mean, sizeof(float), l.c, fp); + fread(l.rolling_variance, sizeof(float), l.c, fp); +#ifdef GPU + if(gpu_index >= 0){ + push_batchnorm_layer(l); + } +#endif +} + +void load_convolutional_weights_binary(layer l, FILE *fp) +{ + fread(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.n, fp); + fread(l.rolling_mean, sizeof(float), l.n, fp); + fread(l.rolling_variance, sizeof(float), l.n, fp); + } + int size = l.c*l.size*l.size; + int i, j, k; + for(i = 0; i < l.n; ++i){ + float mean = 0; + fread(&mean, sizeof(float), 1, fp); + for(j = 0; j < size/8; ++j){ + int index = i*size + j*8; + unsigned char c = 0; + fread(&c, sizeof(char), 1, fp); + for(k = 0; k < 8; ++k){ + if (j*8 + k >= size) break; + l.weights[index + k] = (c & 1<= 0){ + push_convolutional_layer(l); + } +#endif +} + +void load_convolutional_weights(layer l, FILE *fp) +{ + if(l.binary){ + //load_convolutional_weights_binary(l, fp); + //return; + } + if(l.numload) l.n = l.numload; + int num = l.c/l.groups*l.n*l.size*l.size; + fread(l.biases, sizeof(float), l.n, fp); + if (l.batch_normalize && (!l.dontloadscales)){ + fread(l.scales, sizeof(float), l.n, fp); + fread(l.rolling_mean, sizeof(float), l.n, fp); + fread(l.rolling_variance, sizeof(float), l.n, fp); + if(0){ + int i; + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_mean[i]); + } + printf("\n"); + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_variance[i]); + } + printf("\n"); + } + if(0){ + fill_cpu(l.n, 0, l.rolling_mean, 1); + fill_cpu(l.n, 0, l.rolling_variance, 1); + } + if(0){ + int i; + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_mean[i]); + } + printf("\n"); + for(i = 0; i < l.n; ++i){ + printf("%g, ", l.rolling_variance[i]); + } + printf("\n"); + } + } + fread(l.weights, sizeof(float), num, fp); + //if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1); + if (l.flipped) { + transpose_matrix(l.weights, l.c*l.size*l.size, l.n); + } + //if (l.binary) binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.weights); +#ifdef GPU + if(gpu_index >= 0){ + push_convolutional_layer(l); + } +#endif +} + + +void load_weights_upto(network *net, char *filename, int start, int cutoff) +{ +#ifdef GPU + if(net->gpu_index >= 0){ + cuda_set_device(net->gpu_index); + } +#endif + fprintf(stderr, "Loading weights from %s...", filename); + fflush(stdout); + FILE *fp = fopen(filename, "rb"); + if(!fp) file_error(filename); + + int major; + int minor; + int revision; + fread(&major, sizeof(int), 1, fp); + fread(&minor, sizeof(int), 1, fp); + fread(&revision, sizeof(int), 1, fp); + if ((major*10 + minor) >= 2 && major < 1000 && minor < 1000){ + fread(net->seen, sizeof(size_t), 1, fp); + } else { + int iseen = 0; + fread(&iseen, sizeof(int), 1, fp); + *net->seen = iseen; + } + int transpose = (major > 1000) || (minor > 1000); + + int i; + for(i = start; i < net->n && i < cutoff; ++i){ + layer l = net->layers[i]; + if (l.dontload) continue; + if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){ + load_convolutional_weights(l, fp); + } + if(l.type == CONNECTED){ + load_connected_weights(l, fp, transpose); + } + if(l.type == BATCHNORM){ + load_batchnorm_weights(l, fp); + } + if(l.type == CRNN){ + load_convolutional_weights(*(l.input_layer), fp); + load_convolutional_weights(*(l.self_layer), fp); + load_convolutional_weights(*(l.output_layer), fp); + } + if(l.type == RNN){ + load_connected_weights(*(l.input_layer), fp, transpose); + load_connected_weights(*(l.self_layer), fp, transpose); + load_connected_weights(*(l.output_layer), fp, transpose); + } + if (l.type == LSTM) { + load_connected_weights(*(l.wi), fp, transpose); + load_connected_weights(*(l.wf), fp, transpose); + load_connected_weights(*(l.wo), fp, transpose); + load_connected_weights(*(l.wg), fp, transpose); + load_connected_weights(*(l.ui), fp, transpose); + load_connected_weights(*(l.uf), fp, transpose); + load_connected_weights(*(l.uo), fp, transpose); + load_connected_weights(*(l.ug), fp, transpose); + } + if (l.type == GRU) { + if(1){ + load_connected_weights(*(l.wz), fp, transpose); + load_connected_weights(*(l.wr), fp, transpose); + load_connected_weights(*(l.wh), fp, transpose); + load_connected_weights(*(l.uz), fp, transpose); + load_connected_weights(*(l.ur), fp, transpose); + load_connected_weights(*(l.uh), fp, transpose); + }else{ + load_connected_weights(*(l.reset_layer), fp, transpose); + load_connected_weights(*(l.update_layer), fp, transpose); + load_connected_weights(*(l.state_layer), fp, transpose); + } + } + if(l.type == LOCAL){ + int locations = l.out_w*l.out_h; + int size = l.size*l.size*l.c*l.n*locations; + fread(l.biases, sizeof(float), l.outputs, fp); + fread(l.weights, sizeof(float), size, fp); +#ifdef GPU + if(gpu_index >= 0){ + push_local_layer(l); + } +#endif + } + } + fprintf(stderr, "Done!\n"); + fclose(fp); +} + +void load_weights(network *net, char *filename) +{ + load_weights_upto(net, filename, 0, net->n); +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/parser.h b/workloads/realworld/uvm_prefetch_async/darknet/src/parser.h new file mode 100644 index 0000000000000000000000000000000000000000..81aef2c86f3e6cb362f8bde9695ce9d5699ca77f --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/parser.h @@ -0,0 +1,9 @@ +#ifndef PARSER_H +#define PARSER_H +#include "darknet.h" +#include "network.h" + +void save_network(network net, char *filename); +void save_weights_double(network net, char *filename); + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/region_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/region_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..9df1b8bc252239ca520fa3dabeff55e5eb5959b8 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/region_layer.c @@ -0,0 +1,507 @@ +#include "region_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_region_layer(int batch, int w, int h, int n, int classes, int coords) +{ + layer l = {0}; + l.type = REGION; + + l.n = n; + l.batch = batch; + l.h = h; + l.w = w; + l.c = n*(classes + coords + 1); + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.coords = coords; + l.cost = calloc(1, sizeof(float)); + l.biases = calloc(n*2, sizeof(float)); + l.bias_updates = calloc(n*2, sizeof(float)); + l.outputs = h*w*n*(classes + coords + 1); + l.inputs = l.outputs; + l.truths = 30*(l.coords + 1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + int i; + for(i = 0; i < n*2; ++i){ + l.biases[i] = .5; + } + + l.forward = forward_region_layer; + l.backward = backward_region_layer; +#ifdef GPU + l.forward_gpu = forward_region_layer_gpu; + l.backward_gpu = backward_region_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "detection\n"); + srand(0); + + return l; +} + +void resize_region_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->n*(l->classes + l->coords + 1); + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride) +{ + box b; + b.x = (i + x[index + 0*stride]) / w; + b.y = (j + x[index + 1*stride]) / h; + b.w = exp(x[index + 2*stride]) * biases[2*n] / w; + b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h; + return b; +} + +float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale, int stride) +{ + box pred = get_region_box(x, biases, n, index, i, j, w, h, stride); + float iou = box_iou(pred, truth); + + float tx = (truth.x*w - i); + float ty = (truth.y*h - j); + float tw = log(truth.w*w / biases[2*n]); + float th = log(truth.h*h / biases[2*n + 1]); + + delta[index + 0*stride] = scale * (tx - x[index + 0*stride]); + delta[index + 1*stride] = scale * (ty - x[index + 1*stride]); + delta[index + 2*stride] = scale * (tw - x[index + 2*stride]); + delta[index + 3*stride] = scale * (th - x[index + 3*stride]); + return iou; +} + +void delta_region_mask(float *truth, float *x, int n, int index, float *delta, int stride, int scale) +{ + int i; + for(i = 0; i < n; ++i){ + delta[index + i*stride] = scale*(truth[i] - x[index + i*stride]); + } +} + + +void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, int stride, float *avg_cat, int tag) +{ + int i, n; + if(hier){ + float pred = 1; + while(class >= 0){ + pred *= output[index + stride*class]; + int g = hier->group[class]; + int offset = hier->group_offset[g]; + for(i = 0; i < hier->group_size[g]; ++i){ + delta[index + stride*(offset + i)] = scale * (0 - output[index + stride*(offset + i)]); + } + delta[index + stride*class] = scale * (1 - output[index + stride*class]); + + class = hier->parent[class]; + } + *avg_cat += pred; + } else { + if (delta[index] && tag){ + delta[index + stride*class] = scale * (1 - output[index + stride*class]); + return; + } + for(n = 0; n < classes; ++n){ + delta[index + stride*n] = scale * (((n == class)?1 : 0) - output[index + stride*n]); + if(n == class) *avg_cat += output[index + stride*n]; + } + } +} + +float logit(float x) +{ + return log(x/(1.-x)); +} + +float tisnan(float x) +{ + return (x != x); +} + +int entry_index(layer l, int batch, int location, int entry) +{ + int n = location / (l.w*l.h); + int loc = location % (l.w*l.h); + return batch*l.outputs + n*l.w*l.h*(l.coords+l.classes+1) + entry*l.w*l.h + loc; +} + +void forward_region_layer(const layer l, network net) +{ + int i,j,b,t,n; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) activate_array(l.output + index, l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords + 1); + if(!l.softmax && !l.softmax_tree) activate_array(l.output + index, l.classes*l.w*l.h, LOGISTIC); + } + } + if (l.softmax_tree){ + int i; + int count = l.coords + 1; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_cpu(net.input + count, group_size, l.batch, l.inputs, l.n*l.w*l.h, 1, l.n*l.w*l.h, l.temperature, l.output + count); + count += group_size; + } + } else if (l.softmax){ + int index = entry_index(l, 0, 0, l.coords + !l.background); + softmax_cpu(net.input + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output + index); + } +#endif + + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + if(!net.train) return; + float avg_iou = 0; + float recall = 0; + float avg_cat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + int class_count = 0; + *(l.cost) = 0; + for (b = 0; b < l.batch; ++b) { + if(l.softmax_tree){ + int onlyclass = 0; + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + if(!truth.x) break; + int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords]; + float maxp = 0; + int maxi = 0; + if(truth.x > 100000 && truth.y > 100000){ + for(n = 0; n < l.n*l.w*l.h; ++n){ + int class_index = entry_index(l, b, n, l.coords + 1); + int obj_index = entry_index(l, b, n, l.coords); + float scale = l.output[obj_index]; + l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]); + float p = scale*get_hierarchy_probability(l.output + class_index, l.softmax_tree, class, l.w*l.h); + if(p > maxp){ + maxp = p; + maxi = n; + } + } + int class_index = entry_index(l, b, maxi, l.coords + 1); + int obj_index = entry_index(l, b, maxi, l.coords); + delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax); + if(l.output[obj_index] < .3) l.delta[obj_index] = l.object_scale * (.3 - l.output[obj_index]); + else l.delta[obj_index] = 0; + l.delta[obj_index] = 0; + ++class_count; + onlyclass = 1; + break; + } + } + if(onlyclass) continue; + } + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h); + float best_iou = 0; + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + if(!truth.x) break; + float iou = box_iou(pred, truth); + if (iou > best_iou) { + best_iou = iou; + } + } + int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, l.coords); + avg_anyobj += l.output[obj_index]; + l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]); + if(l.background) l.delta[obj_index] = l.noobject_scale * (1 - l.output[obj_index]); + if (best_iou > l.thresh) { + l.delta[obj_index] = 0; + } + + if(*(net.seen) < 12800){ + box truth = {0}; + truth.x = (i + .5)/l.w; + truth.y = (j + .5)/l.h; + truth.w = l.biases[2*n]/l.w; + truth.h = l.biases[2*n+1]/l.h; + delta_region_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, l.delta, .01, l.w*l.h); + } + } + } + } + for(t = 0; t < 30; ++t){ + box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1); + + if(!truth.x) break; + float best_iou = 0; + int best_n = 0; + i = (truth.x * l.w); + j = (truth.y * l.h); + box truth_shift = truth; + truth_shift.x = 0; + truth_shift.y = 0; + for(n = 0; n < l.n; ++n){ + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h); + if(l.bias_match){ + pred.w = l.biases[2*n]/l.w; + pred.h = l.biases[2*n+1]/l.h; + } + pred.x = 0; + pred.y = 0; + float iou = box_iou(pred, truth_shift); + if (iou > best_iou){ + best_iou = iou; + best_n = n; + } + } + + int box_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 0); + float iou = delta_region_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, l.delta, l.coord_scale * (2 - truth.w*truth.h), l.w*l.h); + if(l.coords > 4){ + int mask_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 4); + delta_region_mask(net.truth + t*(l.coords + 1) + b*l.truths + 5, l.output, l.coords - 4, mask_index, l.delta, l.w*l.h, l.mask_scale); + } + if(iou > .5) recall += 1; + avg_iou += iou; + + int obj_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords); + avg_obj += l.output[obj_index]; + l.delta[obj_index] = l.object_scale * (1 - l.output[obj_index]); + if (l.rescore) { + l.delta[obj_index] = l.object_scale * (iou - l.output[obj_index]); + } + if(l.background){ + l.delta[obj_index] = l.object_scale * (0 - l.output[obj_index]); + } + + int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords + 1); + delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax); + ++count; + ++class_count; + } + } + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count); +} + +void backward_region_layer(const layer l, network net) +{ + /* + int b; + int size = l.coords + l.classes + 1; + for (b = 0; b < l.batch*l.n; ++b){ + int index = (b*size + 4)*l.w*l.h; + gradient_array(l.output + index, l.w*l.h, LOGISTIC, l.delta + index); + } + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); + */ +} + +void correct_region_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +{ + int i; + int new_w=0; + int new_h=0; + if (((float)netw/w) < ((float)neth/h)) { + new_w = netw; + new_h = (h * netw)/w; + } else { + new_h = neth; + new_w = (w * neth)/h; + } + for (i = 0; i < n; ++i){ + box b = dets[i].bbox; + b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); + b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); + b.w *= (float)netw/new_w; + b.h *= (float)neth/new_h; + if(!relative){ + b.x *= w; + b.w *= w; + b.y *= h; + b.h *= h; + } + dets[i].bbox = b; + } +} + +void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets) +{ + int i,j,n,z; + float *predictions = l.output; + if (l.batch == 2) { + float *flip = l.output + l.outputs; + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w/2; ++i) { + for (n = 0; n < l.n; ++n) { + for(z = 0; z < l.classes + l.coords + 1; ++z){ + int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; + int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); + float swap = flip[i1]; + flip[i1] = flip[i2]; + flip[i2] = swap; + if(z == 0){ + flip[i1] = -flip[i1]; + flip[i2] = -flip[i2]; + } + } + } + } + } + for(i = 0; i < l.outputs; ++i){ + l.output[i] = (l.output[i] + flip[i])/2.; + } + } + for (i = 0; i < l.w*l.h; ++i){ + int row = i / l.w; + int col = i % l.w; + for(n = 0; n < l.n; ++n){ + int index = n*l.w*l.h + i; + for(j = 0; j < l.classes; ++j){ + dets[index].prob[j] = 0; + } + int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); + int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); + int mask_index = entry_index(l, 0, n*l.w*l.h + i, 4); + float scale = l.background ? 1 : predictions[obj_index]; + dets[index].bbox = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h); + dets[index].objectness = scale > thresh ? scale : 0; + if(dets[index].mask){ + for(j = 0; j < l.coords - 4; ++j){ + dets[index].mask[j] = l.output[mask_index + j*l.w*l.h]; + } + } + + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + !l.background); + if(l.softmax_tree){ + + hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0, l.w*l.h); + if(map){ + for(j = 0; j < 200; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + map[j]); + float prob = scale*predictions[class_index]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } else { + int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh, l.w*l.h); + dets[index].prob[j] = (scale > thresh) ? scale : 0; + } + } else { + if(dets[index].objectness){ + for(j = 0; j < l.classes; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + j); + float prob = scale*predictions[class_index]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } + } + } + } + correct_region_boxes(dets, l.w*l.h*l.n, w, h, netw, neth, relative); +} + +#ifdef GPU + +void forward_region_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + if(l.coords > 4){ + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC); + } + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) activate_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, l.coords + 1); + if(!l.softmax && !l.softmax_tree) activate_array_gpu(l.output_gpu + index, l.classes*l.w*l.h, LOGISTIC); + } + } + if (l.softmax_tree){ + int index = entry_index(l, 0, 0, l.coords + 1); + softmax_tree(net.input_gpu + index, l.w*l.h, l.batch*l.n, l.inputs/l.n, 1, l.output_gpu + index, *l.softmax_tree); + } else if (l.softmax) { + int index = entry_index(l, 0, 0, l.coords + !l.background); + softmax_gpu(net.input_gpu + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index); + } + if(!net.train || l.onlyforward){ + cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); + return; + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_region_layer(l, net); + //cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs); + if(!net.train) return; + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_region_layer_gpu(const layer l, network net) +{ + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + gradient_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC, l.delta_gpu + index); + if(l.coords > 4){ + index = entry_index(l, b, n*l.w*l.h, 4); + gradient_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC, l.delta_gpu + index); + } + index = entry_index(l, b, n*l.w*l.h, l.coords); + if(!l.background) gradient_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC, l.delta_gpu + index); + } + } + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + +void zero_objectness(layer l) +{ + int i, n; + for (i = 0; i < l.w*l.h; ++i){ + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); + l.output[obj_index] = 0; + } + } +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/region_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/region_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..9f12fd187fd490d10cbc21af8251e0e2a870b7cb --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/region_layer.h @@ -0,0 +1,18 @@ +#ifndef REGION_LAYER_H +#define REGION_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_region_layer(int batch, int w, int h, int n, int classes, int coords); +void forward_region_layer(const layer l, network net); +void backward_region_layer(const layer l, network net); +void resize_region_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_region_layer_gpu(const layer l, network net); +void backward_region_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/reorg_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/reorg_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..b3634d53a5e01a8bbcf00e62a90f70f40108e1d7 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/reorg_layer.c @@ -0,0 +1,173 @@ +#include "reorg_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + + +layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra) +{ + layer l = {0}; + l.type = REORG; + l.batch = batch; + l.stride = stride; + l.extra = extra; + l.h = h; + l.w = w; + l.c = c; + l.flatten = flatten; + if(reverse){ + l.out_w = w*stride; + l.out_h = h*stride; + l.out_c = c/(stride*stride); + }else{ + l.out_w = w/stride; + l.out_h = h/stride; + l.out_c = c*(stride*stride); + } + l.reverse = reverse; + + l.outputs = l.out_h * l.out_w * l.out_c; + l.inputs = h*w*c; + if(l.extra){ + l.out_w = l.out_h = l.out_c = 0; + l.outputs = l.inputs + l.extra; + } + + if(extra){ + fprintf(stderr, "reorg %4d -> %4d\n", l.inputs, l.outputs); + } else { + fprintf(stderr, "reorg /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + } + int output_size = l.outputs * batch; + l.output = calloc(output_size, sizeof(float)); + l.delta = calloc(output_size, sizeof(float)); + + l.forward = forward_reorg_layer; + l.backward = backward_reorg_layer; +#ifdef GPU + l.forward_gpu = forward_reorg_layer_gpu; + l.backward_gpu = backward_reorg_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); +#endif + return l; +} + +void resize_reorg_layer(layer *l, int w, int h) +{ + int stride = l->stride; + int c = l->c; + + l->h = h; + l->w = w; + + if(l->reverse){ + l->out_w = w*stride; + l->out_h = h*stride; + l->out_c = c/(stride*stride); + }else{ + l->out_w = w/stride; + l->out_h = h/stride; + l->out_c = c*(stride*stride); + } + + l->outputs = l->out_h * l->out_w * l->out_c; + l->inputs = l->outputs; + int output_size = l->outputs * l->batch; + + l->output = realloc(l->output, output_size * sizeof(float)); + l->delta = realloc(l->delta, output_size * sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, output_size); + l->delta_gpu = cuda_make_array(l->delta, output_size); +#endif +} + +void forward_reorg_layer(const layer l, network net) +{ + int i; + if(l.flatten){ + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + if(l.reverse){ + flatten(l.output, l.w*l.h, l.c, l.batch, 0); + }else{ + flatten(l.output, l.w*l.h, l.c, l.batch, 1); + } + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.inputs, net.input + i*l.inputs, 1, l.output + i*l.outputs, 1); + } + } else if (l.reverse){ + reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output); + } else { + reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output); + } +} + +void backward_reorg_layer(const layer l, network net) +{ + int i; + if(l.flatten){ + memcpy(net.delta, l.delta, l.outputs*l.batch*sizeof(float)); + if(l.reverse){ + flatten(net.delta, l.w*l.h, l.c, l.batch, 1); + }else{ + flatten(net.delta, l.w*l.h, l.c, l.batch, 0); + } + } else if(l.reverse){ + reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta); + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_cpu(l.inputs, l.delta + i*l.outputs, 1, net.delta + i*l.inputs, 1); + } + }else{ + reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta); + } +} + +#ifdef GPU +void forward_reorg_layer_gpu(layer l, network net) +{ + int i; + if(l.flatten){ + if(l.reverse){ + flatten_gpu(net.input_gpu, l.w*l.h, l.c, l.batch, 0, l.output_gpu); + }else{ + flatten_gpu(net.input_gpu, l.w*l.h, l.c, l.batch, 1, l.output_gpu); + } + } else if (l.extra) { + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.inputs, net.input_gpu + i*l.inputs, 1, l.output_gpu + i*l.outputs, 1); + } + } else if (l.reverse) { + reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu); + }else { + reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu); + } +} + +void backward_reorg_layer_gpu(layer l, network net) +{ + if(l.flatten){ + if(l.reverse){ + flatten_gpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 1, net.delta_gpu); + }else{ + flatten_gpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 0, net.delta_gpu); + } + } else if (l.extra) { + int i; + for(i = 0; i < l.batch; ++i){ + copy_gpu(l.inputs, l.delta_gpu + i*l.outputs, 1, net.delta_gpu + i*l.inputs, 1); + } + } else if(l.reverse){ + reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta_gpu); + } else { + reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta_gpu); + } +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/reorg_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/reorg_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1d1445f17d2874835ee19d033b50e09761374de3 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/reorg_layer.h @@ -0,0 +1,20 @@ +#ifndef REORG_LAYER_H +#define REORG_LAYER_H + +#include "image.h" +#include "cuda_dark.h" +#include "layer.h" +#include "network.h" + +layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra); +void resize_reorg_layer(layer *l, int w, int h); +void forward_reorg_layer(const layer l, network net); +void backward_reorg_layer(const layer l, network net); + +#ifdef GPU +void forward_reorg_layer_gpu(layer l, network net); +void backward_reorg_layer_gpu(layer l, network net); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/rnn_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/rnn_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..c07e338caee5418657eb1127058419566d9ef787 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/rnn_layer.c @@ -0,0 +1,292 @@ +#include "rnn_layer.h" +#include "connected_layer.h" +#include "utils.h" +#include "cuda_dark.h" +#include "blas.h" +#include "gemm.h" + +#include +#include +#include +#include + +static void increment_layer(layer *l, int steps) +{ + int num = l->outputs*l->batch*steps; + l->output += num; + l->delta += num; + l->x += num; + l->x_norm += num; + +#ifdef GPU + l->output_gpu += num; + l->delta_gpu += num; + l->x_gpu += num; + l->x_norm_gpu += num; +#endif +} + +layer make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTIVATION activation, int batch_normalize, int adam) +{ + fprintf(stderr, "RNN Layer: %d inputs, %d outputs\n", inputs, outputs); + batch = batch / steps; + layer l = {0}; + l.batch = batch; + l.type = RNN; + l.steps = steps; + l.inputs = inputs; + + l.state = calloc(batch*outputs, sizeof(float)); + l.prev_state = calloc(batch*outputs, sizeof(float)); + + l.input_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.input_layer) = make_connected_layer(batch*steps, inputs, outputs, activation, batch_normalize, adam); + l.input_layer->batch = batch; + + l.self_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.self_layer) = make_connected_layer(batch*steps, outputs, outputs, activation, batch_normalize, adam); + l.self_layer->batch = batch; + + l.output_layer = malloc(sizeof(layer)); + fprintf(stderr, "\t\t"); + *(l.output_layer) = make_connected_layer(batch*steps, outputs, outputs, activation, batch_normalize, adam); + l.output_layer->batch = batch; + + l.outputs = outputs; + l.output = l.output_layer->output; + l.delta = l.output_layer->delta; + + l.forward = forward_rnn_layer; + l.backward = backward_rnn_layer; + l.update = update_rnn_layer; +#ifdef GPU + l.forward_gpu = forward_rnn_layer_gpu; + l.backward_gpu = backward_rnn_layer_gpu; + l.update_gpu = update_rnn_layer_gpu; + l.state_gpu = cuda_make_array(0, batch*outputs); + l.prev_state_gpu = cuda_make_array(0, batch*outputs); + l.output_gpu = l.output_layer->output_gpu; + l.delta_gpu = l.output_layer->delta_gpu; +#ifdef CUDNN + cudnnSetTensor4dDescriptor(l.input_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.input_layer->out_c, l.input_layer->out_h, l.input_layer->out_w); + cudnnSetTensor4dDescriptor(l.self_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.self_layer->out_c, l.self_layer->out_h, l.self_layer->out_w); + cudnnSetTensor4dDescriptor(l.output_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.output_layer->out_c, l.output_layer->out_h, l.output_layer->out_w); +#endif +#endif + + return l; +} + +void update_rnn_layer(layer l, update_args a) +{ + update_connected_layer(*(l.input_layer), a); + update_connected_layer(*(l.self_layer), a); + update_connected_layer(*(l.output_layer), a); +} + +void forward_rnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, self_layer.delta, 1); + fill_cpu(l.outputs * l.batch * l.steps, 0, input_layer.delta, 1); + if(net.train) fill_cpu(l.outputs * l.batch, 0, l.state, 1); + + for (i = 0; i < l.steps; ++i) { + s.input = net.input; + forward_connected_layer(input_layer, s); + + s.input = l.state; + forward_connected_layer(self_layer, s); + + float *old_state = l.state; + if(net.train) l.state += l.outputs*l.batch; + if(l.shortcut){ + copy_cpu(l.outputs * l.batch, old_state, 1, l.state, 1); + }else{ + fill_cpu(l.outputs * l.batch, 0, l.state, 1); + } + axpy_cpu(l.outputs * l.batch, 1, input_layer.output, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + forward_connected_layer(output_layer, s); + + net.input += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_rnn_layer(layer l, network net) +{ + network s = net; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + increment_layer(&input_layer, l.steps-1); + increment_layer(&self_layer, l.steps-1); + increment_layer(&output_layer, l.steps-1); + + l.state += l.outputs*l.batch*l.steps; + for (i = l.steps-1; i >= 0; --i) { + copy_cpu(l.outputs * l.batch, input_layer.output, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output, 1, l.state, 1); + + s.input = l.state; + s.delta = self_layer.delta; + backward_connected_layer(output_layer, s); + + l.state -= l.outputs*l.batch; + /* + if(i > 0){ + copy_cpu(l.outputs * l.batch, input_layer.output - l.outputs*l.batch, 1, l.state, 1); + axpy_cpu(l.outputs * l.batch, 1, self_layer.output - l.outputs*l.batch, 1, l.state, 1); + }else{ + fill_cpu(l.outputs * l.batch, 0, l.state, 1); + } + */ + + s.input = l.state; + s.delta = self_layer.delta - l.outputs*l.batch; + if (i == 0) s.delta = 0; + backward_connected_layer(self_layer, s); + + copy_cpu(l.outputs*l.batch, self_layer.delta, 1, input_layer.delta, 1); + if (i > 0 && l.shortcut) axpy_cpu(l.outputs*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.outputs*l.batch, 1); + s.input = net.input + i*l.inputs*l.batch; + if(net.delta) s.delta = net.delta + i*l.inputs*l.batch; + else s.delta = 0; + backward_connected_layer(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } +} + +#ifdef GPU + +void pull_rnn_layer(layer l) +{ + pull_connected_layer(*(l.input_layer)); + pull_connected_layer(*(l.self_layer)); + pull_connected_layer(*(l.output_layer)); +} + +void push_rnn_layer(layer l) +{ + push_connected_layer(*(l.input_layer)); + push_connected_layer(*(l.self_layer)); + push_connected_layer(*(l.output_layer)); +} + +void update_rnn_layer_gpu(layer l, update_args a) +{ + update_connected_layer_gpu(*(l.input_layer), a); + update_connected_layer_gpu(*(l.self_layer), a); + update_connected_layer_gpu(*(l.output_layer), a); +} + +void forward_rnn_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + + fill_gpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, self_layer.delta_gpu, 1); + fill_gpu(l.outputs * l.batch * l.steps, 0, input_layer.delta_gpu, 1); + + if(net.train) { + fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); + copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.prev_state_gpu, 1); + } + + for (i = 0; i < l.steps; ++i) { + s.input_gpu = net.input_gpu; + forward_connected_layer_gpu(input_layer, s); + + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(self_layer, s); + + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + forward_connected_layer_gpu(output_layer, s); + + net.input_gpu += l.inputs*l.batch; + increment_layer(&input_layer, 1); + increment_layer(&self_layer, 1); + increment_layer(&output_layer, 1); + } +} + +void backward_rnn_layer_gpu(layer l, network net) +{ + network s = {0}; + s.train = net.train; + int i; + layer input_layer = *(l.input_layer); + layer self_layer = *(l.self_layer); + layer output_layer = *(l.output_layer); + increment_layer(&input_layer, l.steps - 1); + increment_layer(&self_layer, l.steps - 1); + increment_layer(&output_layer, l.steps - 1); + float *last_input = input_layer.output_gpu; + float *last_self = self_layer.output_gpu; + for (i = l.steps-1; i >= 0; --i) { + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = self_layer.delta_gpu; + backward_connected_layer_gpu(output_layer, s); + + if(i != 0) { + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1); + }else { + copy_gpu(l.outputs*l.batch, l.prev_state_gpu, 1, l.state_gpu, 1); + } + + copy_gpu(l.outputs*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); + + s.input_gpu = l.state_gpu; + s.delta_gpu = (i > 0) ? self_layer.delta_gpu - l.outputs*l.batch : 0; + if (i == 0) s.delta_gpu = 0; + backward_connected_layer_gpu(self_layer, s); + + s.input_gpu = net.input_gpu + i*l.inputs*l.batch; + if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch; + else s.delta_gpu = 0; + backward_connected_layer_gpu(input_layer, s); + + increment_layer(&input_layer, -1); + increment_layer(&self_layer, -1); + increment_layer(&output_layer, -1); + } + fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, last_input, 1, l.state_gpu, 1); + axpy_gpu(l.outputs * l.batch, 1, last_self, 1, l.state_gpu, 1); +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/rnn_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/rnn_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..270a63ffafca9a9adb7b995ed674f93c70bdeb51 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/rnn_layer.h @@ -0,0 +1,25 @@ + +#ifndef RNN_LAYER_H +#define RNN_LAYER_H + +#include "activations.h" +#include "layer.h" +#include "network.h" +#define USET + +layer make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTIVATION activation, int batch_normalize, int adam); + +void forward_rnn_layer(layer l, network net); +void backward_rnn_layer(layer l, network net); +void update_rnn_layer(layer l, update_args a); + +#ifdef GPU +void forward_rnn_layer_gpu(layer l, network net); +void backward_rnn_layer_gpu(layer l, network net); +void update_rnn_layer_gpu(layer l, update_args a); +void push_rnn_layer(layer l); +void pull_rnn_layer(layer l); +#endif + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/route_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/route_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..608abe9a1c729eb6bdfd5e0d65c58196b51da496 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/route_layer.c @@ -0,0 +1,134 @@ +#include "route_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + +route_layer make_route_layer(int batch, int n, int *input_layers, int *input_sizes) +{ + fprintf(stderr,"route "); + route_layer l = {0}; + l.type = ROUTE; + l.batch = batch; + l.n = n; + l.input_layers = input_layers; + l.input_sizes = input_sizes; + int i; + int outputs = 0; + for(i = 0; i < n; ++i){ + fprintf(stderr," %d", input_layers[i]); + outputs += input_sizes[i]; + } + fprintf(stderr, "\n"); + l.outputs = outputs; + l.inputs = outputs; + l.delta = calloc(outputs*batch, sizeof(float)); + l.output = calloc(outputs*batch, sizeof(float));; + + l.forward = forward_route_layer; + l.backward = backward_route_layer; + #ifdef GPU + l.forward_gpu = forward_route_layer_gpu; + l.backward_gpu = backward_route_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, outputs*batch); + l.output_gpu = cuda_make_array(l.output, outputs*batch); + #endif + return l; +} + +void resize_route_layer(route_layer *l, network *net) +{ + int i; + layer first = net->layers[l->input_layers[0]]; + l->out_w = first.out_w; + l->out_h = first.out_h; + l->out_c = first.out_c; + l->outputs = first.outputs; + l->input_sizes[0] = first.outputs; + for(i = 1; i < l->n; ++i){ + int index = l->input_layers[i]; + layer next = net->layers[index]; + l->outputs += next.outputs; + l->input_sizes[i] = next.outputs; + if(next.out_w == first.out_w && next.out_h == first.out_h){ + l->out_c += next.out_c; + }else{ + printf("%d %d, %d %d\n", next.out_w, next.out_h, first.out_w, first.out_h); + l->out_h = l->out_w = l->out_c = 0; + } + } + l->inputs = l->outputs; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + +void forward_route_layer(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *input = net.layers[index].output; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1); + } + offset += input_size; + } +} + +void backward_route_layer(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *delta = net.layers[index].delta; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1); + } + offset += input_size; + } +} + +#ifdef GPU +void forward_route_layer_gpu(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *input = net.layers[index].output_gpu; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + copy_gpu(input_size, input + j*input_size, 1, l.output_gpu + offset + j*l.outputs, 1); + } + offset += input_size; + } +} + +void backward_route_layer_gpu(const route_layer l, network net) +{ + int i, j; + int offset = 0; + for(i = 0; i < l.n; ++i){ + int index = l.input_layers[i]; + float *delta = net.layers[index].delta_gpu; + int input_size = l.input_sizes[i]; + for(j = 0; j < l.batch; ++j){ + axpy_gpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1); + } + offset += input_size; + } +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/route_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/route_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..1d40330ff30c9c93a2180a696d5f67f628ea481c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/route_layer.h @@ -0,0 +1,18 @@ +#ifndef ROUTE_LAYER_H +#define ROUTE_LAYER_H +#include "network.h" +#include "layer.h" + +typedef layer route_layer; + +route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size); +void forward_route_layer(const route_layer l, network net); +void backward_route_layer(const route_layer l, network net); +void resize_route_layer(route_layer *l, network *net); + +#ifdef GPU +void forward_route_layer_gpu(const route_layer l, network net); +void backward_route_layer_gpu(const route_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/shortcut_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/shortcut_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..e5b9e14301c0a6b7e03b270824352f1ba40163cd --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/shortcut_layer.c @@ -0,0 +1,90 @@ +#include "shortcut_layer.h" +#include "cuda_dark.h" +#include "blas.h" +#include "activations.h" + +#include +#include + +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2) +{ + fprintf(stderr, "res %3d %4d x%4d x%4d -> %4d x%4d x%4d\n",index, w2,h2,c2, w,h,c); + layer l = {0}; + l.type = SHORTCUT; + l.batch = batch; + l.w = w2; + l.h = h2; + l.c = c2; + l.out_w = w; + l.out_h = h; + l.out_c = c; + l.outputs = w*h*c; + l.inputs = l.outputs; + + l.index = index; + + l.delta = calloc(l.outputs*batch, sizeof(float)); + l.output = calloc(l.outputs*batch, sizeof(float));; + + l.forward = forward_shortcut_layer; + l.backward = backward_shortcut_layer; + #ifdef GPU + l.forward_gpu = forward_shortcut_layer_gpu; + l.backward_gpu = backward_shortcut_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + #endif + return l; +} + +void resize_shortcut_layer(layer *l, int w, int h) +{ + assert(l->w == l->out_w); + assert(l->h == l->out_h); + l->w = l->out_w = w; + l->h = l->out_h = h; + l->outputs = w*h*l->out_c; + l->inputs = l->outputs; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + + +void forward_shortcut_layer(const layer l, network net) +{ + copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); + shortcut_cpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output); + activate_array(l.output, l.outputs*l.batch, l.activation); +} + +void backward_shortcut_layer(const layer l, network net) +{ + gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + axpy_cpu(l.outputs*l.batch, l.alpha, l.delta, 1, net.delta, 1); + shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta); +} + +#ifdef GPU +void forward_shortcut_layer_gpu(const layer l, network net) +{ + copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); + shortcut_gpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output_gpu); + activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); +} + +void backward_shortcut_layer_gpu(const layer l, network net) +{ + gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + axpy_gpu(l.outputs*l.batch, l.alpha, l.delta_gpu, 1, net.delta_gpu, 1); + shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta_gpu); +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/shortcut_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/shortcut_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..5f684fc1eadea2c6902be96bf4a4bf9a3b533da9 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/shortcut_layer.h @@ -0,0 +1,17 @@ +#ifndef SHORTCUT_LAYER_H +#define SHORTCUT_LAYER_H + +#include "layer.h" +#include "network.h" + +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2); +void forward_shortcut_layer(const layer l, network net); +void backward_shortcut_layer(const layer l, network net); +void resize_shortcut_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_shortcut_layer_gpu(const layer l, network net); +void backward_shortcut_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/softmax_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/softmax_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..569b62b14097ed226d9939d8e1f1fd2899083ee6 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/softmax_layer.c @@ -0,0 +1,107 @@ +#include "softmax_layer.h" +#include "blas.h" +#include "cuda_dark.h" + +#include +#include +#include +#include +#include + +softmax_layer make_softmax_layer(int batch, int inputs, int groups) +{ + assert(inputs%groups == 0); + fprintf(stderr, "softmax %4d\n", inputs); + softmax_layer l = {0}; + l.type = SOFTMAX; + l.batch = batch; + l.groups = groups; + l.inputs = inputs; + l.outputs = inputs; + l.loss = calloc(inputs*batch, sizeof(float)); + l.output = calloc(inputs*batch, sizeof(float)); + l.delta = calloc(inputs*batch, sizeof(float)); + l.cost = calloc(1, sizeof(float)); + + l.forward = forward_softmax_layer; + l.backward = backward_softmax_layer; + #ifdef GPU + l.forward_gpu = forward_softmax_layer_gpu; + l.backward_gpu = backward_softmax_layer_gpu; + + l.output_gpu = cuda_make_array(l.output, inputs*batch); + l.loss_gpu = cuda_make_array(l.loss, inputs*batch); + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + #endif + return l; +} + +void forward_softmax_layer(const softmax_layer l, network net) +{ + if(l.softmax_tree){ + int i; + int count = 0; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_cpu(net.input + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output + count); + count += group_size; + } + } else { + softmax_cpu(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output); + } + + if(net.truth && !l.noloss){ + softmax_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_softmax_layer(const softmax_layer l, network net) +{ + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); +} + +#ifdef GPU + +void pull_softmax_layer_output(const softmax_layer layer) +{ + cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch); +} + +void forward_softmax_layer_gpu(const softmax_layer l, network net) +{ + if(l.softmax_tree){ + softmax_tree(net.input_gpu, 1, l.batch, l.inputs, l.temperature, l.output_gpu, *l.softmax_tree); + /* + int i; + int count = 0; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_gpu(net.input_gpu + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output_gpu + count); + count += group_size; + } + */ + } else { + if(l.spatial){ + softmax_gpu(net.input_gpu, l.c, l.batch*l.c, l.inputs/l.c, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu); + }else{ + softmax_gpu(net.input_gpu, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu); + } + } + if(net.truth && !l.noloss){ + softmax_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu); + if(l.softmax_tree){ + mask_gpu(l.batch*l.inputs, l.delta_gpu, SECRET_NUM, net.truth_gpu, 0); + mask_gpu(l.batch*l.inputs, l.loss_gpu, SECRET_NUM, net.truth_gpu, 0); + } + cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); + l.cost[0] = sum_array(l.loss, l.batch*l.inputs); + } +} + +void backward_softmax_layer_gpu(const softmax_layer layer, network net) +{ + axpy_gpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, net.delta_gpu, 1); +} + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/softmax_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/softmax_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..2e3ffe01a6c5d273a9f6139bc9f265cd7e2bc860 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/softmax_layer.h @@ -0,0 +1,19 @@ +#ifndef SOFTMAX_LAYER_H +#define SOFTMAX_LAYER_H +#include "layer.h" +#include "network.h" + +typedef layer softmax_layer; + +void softmax_array(float *input, int n, float temp, float *output); +softmax_layer make_softmax_layer(int batch, int inputs, int groups); +void forward_softmax_layer(const softmax_layer l, network net); +void backward_softmax_layer(const softmax_layer l, network net); + +#ifdef GPU +void pull_softmax_layer_output(const softmax_layer l); +void forward_softmax_layer_gpu(const softmax_layer l, network net); +void backward_softmax_layer_gpu(const softmax_layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/stb_image.h b/workloads/realworld/uvm_prefetch_async/darknet/src/stb_image.h new file mode 100644 index 0000000000000000000000000000000000000000..d9c21bc813f1f24de2a25ee3cc82bdce9413eaa5 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/stb_image.h @@ -0,0 +1,7462 @@ +/* stb_image - v2.19 - public domain image loader - http://nothings.org/stb + no warranty implied; use at your own risk + + Do this: + #define STB_IMAGE_IMPLEMENTATION + before you include this file in *one* C or C++ file to create the implementation. + + // i.e. it should look like this: + #include ... + #include ... + #include ... + #define STB_IMAGE_IMPLEMENTATION + #include "stb_image.h" + + You can #define STBI_ASSERT(x) before the #include to avoid using assert.h. + And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free + + + QUICK NOTES: + Primarily of interest to game developers and other people who can + avoid problematic images and only need the trivial interface + + JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) + PNG 1/2/4/8/16-bit-per-channel + + TGA (not sure what subset, if a subset) + BMP non-1bpp, non-RLE + PSD (composited view only, no extra channels, 8/16 bit-per-channel) + + GIF (*comp always reports as 4-channel) + HDR (radiance rgbE format) + PIC (Softimage PIC) + PNM (PPM and PGM binary only) + + Animated GIF still needs a proper API, but here's one way to do it: + http://gist.github.com/urraka/685d9a6340b26b830d49 + + - decode from memory or through FILE (define STBI_NO_STDIO to remove code) + - decode from arbitrary I/O callbacks + - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON) + + Full documentation under "DOCUMENTATION" below. + + +LICENSE + + See end of file for license information. + +RECENT REVISION HISTORY: + + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings + 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes + 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 + RGB-format JPEG; remove white matting in PSD; + allocate large structures on the stack; + correct channel count for PNG & BMP + 2.10 (2016-01-22) avoid warning introduced in 2.09 + 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED + + See end of file for full revision history. + + + ============================ Contributors ========================= + + Image formats Extensions, features + Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) + Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) + Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) + Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) + Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) + Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) + Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) + github:urraka (animated gif) Junggon Kim (PNM comments) + Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) + socks-the-fox (16-bit PNG) + Jeremy Sawicki (handle all ImageNet JPGs) + Optimizations & bugfixes Mikhail Morozov (1-bit BMP) + Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query) + Arseny Kapoulkine + John-Mark Allen + + Bug & warning fixes + Marc LeBlanc David Woo Guillaume George Martins Mozeiko + Christpher Lloyd Jerry Jansson Joseph Thomson Phil Jordan + Dave Moore Roy Eltham Hayaki Saito Nathan Reed + Won Chun Luke Graham Johan Duparc Nick Verigakis + the Horde3D community Thomas Ruf Ronny Chevalier github:rlyeh + Janez Zemva John Bartholomew Michal Cichon github:romigrou + Jonathan Blow Ken Hamada Tero Hanninen github:svdijk + Laurent Gomila Cort Stratton Sergio Gonzalez github:snagar + Aruelien Pocheville Thibault Reuille Cass Everitt github:Zelex + Ryamond Barbiero Paul Du Bois Engin Manap github:grim210 + Aldo Culquicondor Philipp Wiesemann Dale Weiler github:sammyhw + Oriol Ferrer Mesia Josh Tobin Matthew Gregan github:phprus + Julian Raschke Gregory Mullen Baldur Karlsson github:poppolopoppo + Christian Floisand Kevin Schmidt github:darealshinji + Blazej Dariusz Roszkowski github:Michaelangel007 +*/ + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H + +// DOCUMENTATION +// +// Limitations: +// - no 12-bit-per-channel JPEG +// - no JPEGs with arithmetic coding +// - GIF always returns *comp=4 +// +// Basic usage (see HDR discussion below for HDR usage): +// int x,y,n; +// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +// // ... process data if not NULL ... +// // ... x = width, y = height, n = # 8-bit components per pixel ... +// // ... replace '0' with '1'..'4' to force that many components per pixel +// // ... but 'n' will always be the number that it would have been if you said 0 +// stbi_image_free(data) +// +// Standard parameters: +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *channels_in_file -- outputs # of image components in image file +// int desired_channels -- if non-zero, # of image components requested in result +// +// The return value from an image loader is an 'unsigned char *' which points +// to the pixel data, or NULL on an allocation failure or if the image is +// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels, +// with each pixel consisting of N interleaved 8-bit components; the first +// pixel pointed to is top-left-most in the image. There is no padding between +// image scanlines or between pixels, regardless of format. The number of +// components N is 'desired_channels' if desired_channels is non-zero, or +// *channels_in_file otherwise. If desired_channels is non-zero, +// *channels_in_file has the number of components that _would_ have been +// output otherwise. E.g. if you set desired_channels to 4, you will always +// get RGBA output, but you can check *channels_in_file to see if it's trivially +// opaque because e.g. there were only 3 channels in the source image. +// +// An output image with N components has the following components interleaved +// in this order in each pixel: +// +// N=#comp components +// 1 grey +// 2 grey, alpha +// 3 red, green, blue +// 4 red, green, blue, alpha +// +// If image loading fails for any reason, the return value will be NULL, +// and *x, *y, *channels_in_file will be unchanged. The function +// stbi_failure_reason() can be queried for an extremely brief, end-user +// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS +// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// more user-friendly ones. +// +// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. +// +// =========================================================================== +// +// Philosophy +// +// stb libraries are designed with the following priorities: +// +// 1. easy to use +// 2. easy to maintain +// 3. good performance +// +// Sometimes I let "good performance" creep up in priority over "easy to maintain", +// and for best performance I may provide less-easy-to-use APIs that give higher +// performance, in addition to the easy to use ones. Nevertheless, it's important +// to keep in mind that from the standpoint of you, a client of this library, +// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all. +// +// Some secondary priorities arise directly from the first two, some of which +// make more explicit reasons why performance can't be emphasized. +// +// - Portable ("ease of use") +// - Small source code footprint ("easy to maintain") +// - No dependencies ("ease of use") +// +// =========================================================================== +// +// I/O callbacks +// +// I/O callbacks allow you to read from arbitrary sources, like packaged +// files or some other source. Data read from callbacks are processed +// through a small internal buffer (currently 128 bytes) to try to reduce +// overhead. +// +// The three functions you must define are "read" (reads some bytes of data), +// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end). +// +// =========================================================================== +// +// SIMD support +// +// The JPEG decoder will try to automatically use SIMD kernels on x86 when +// supported by the compiler. For ARM Neon support, you must explicitly +// request it. +// +// (The old do-it-yourself SIMD API is no longer supported in the current +// code.) +// +// On x86, SSE2 will automatically be used when available based on a run-time +// test; if not, the generic C versions are used as a fall-back. On ARM targets, +// the typical path is to have separate builds for NEON and non-NEON devices +// (at least this is true for iOS and Android). Therefore, the NEON support is +// toggled by a build flag: define STBI_NEON to get NEON loops. +// +// If for some reason you do not want to use any of SIMD code, or if +// you have issues compiling it, you can disable it entirely by +// defining STBI_NO_SIMD. +// +// =========================================================================== +// +// HDR image support (disable by defining STBI_NO_HDR) +// +// stb_image now supports loading HDR images in general, and currently +// the Radiance .HDR file format, although the support is provided +// generically. You can still load any file through the existing interface; +// if you attempt to load an HDR file, it will be automatically remapped to +// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// both of these constants can be reconfigured through this interface: +// +// stbi_hdr_to_ldr_gamma(2.2f); +// stbi_hdr_to_ldr_scale(1.0f); +// +// (note, do not use _inverse_ constants; stbi_image will invert them +// appropriately). +// +// Additionally, there is a new, parallel interface for loading files as +// (linear) floats to preserve the full dynamic range: +// +// float *data = stbi_loadf(filename, &x, &y, &n, 0); +// +// If you load LDR images through this interface, those images will +// be promoted to floating point values, run through the inverse of +// constants corresponding to the above: +// +// stbi_ldr_to_hdr_scale(1.0f); +// stbi_ldr_to_hdr_gamma(2.2f); +// +// Finally, given a filename (or an open file or memory block--see header +// file for details) containing image data, you can query for the "most +// appropriate" interface to use (that is, whether the image is HDR or +// not), using: +// +// stbi_is_hdr(char *filename); +// +// =========================================================================== +// +// iPhone PNG support: +// +// By default we convert iphone-formatted PNGs back to RGB, even though +// they are internally encoded differently. You can disable this conversion +// by by calling stbi_convert_iphone_png_to_rgb(0), in which case +// you will always just get the native iphone "format" through (which +// is BGR stored in RGB). +// +// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per +// pixel to remove any premultiplied alpha *only* if the image file explicitly +// says there's premultiplied data (currently only happens in iPhone images, +// and only if iPhone convert-to-rgb processing is on). +// +// =========================================================================== +// +// ADDITIONAL CONFIGURATION +// +// - You can suppress implementation of any of the decoders to reduce +// your code footprint by #defining one or more of the following +// symbols before creating the implementation. +// +// STBI_NO_JPEG +// STBI_NO_PNG +// STBI_NO_BMP +// STBI_NO_PSD +// STBI_NO_TGA +// STBI_NO_GIF +// STBI_NO_HDR +// STBI_NO_PIC +// STBI_NO_PNM (.ppm and .pgm) +// +// - You can request *only* certain decoders and suppress all other ones +// (this will be more forward-compatible, as addition of new decoders +// doesn't require you to disable them explicitly): +// +// STBI_ONLY_JPEG +// STBI_ONLY_PNG +// STBI_ONLY_BMP +// STBI_ONLY_PSD +// STBI_ONLY_TGA +// STBI_ONLY_GIF +// STBI_ONLY_HDR +// STBI_ONLY_PIC +// STBI_ONLY_PNM (.ppm and .pgm) +// +// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still +// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB +// + + +#ifndef STBI_NO_STDIO +#include +#endif // STBI_NO_STDIO + +#define STBI_VERSION 1 + +enum +{ + STBI_default = 0, // only used for desired_channels + + STBI_grey = 1, + STBI_grey_alpha = 2, + STBI_rgb = 3, + STBI_rgb_alpha = 4 +}; + +typedef unsigned char stbi_uc; +typedef unsigned short stbi_us; + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef STB_IMAGE_STATIC +#define STBIDEF static +#else +#define STBIDEF extern +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// PRIMARY API - works on images of any type +// + +// +// load image by filename, open file, or memory buffer +// + +typedef struct +{ + int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read + void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative + int (*eof) (void *user); // returns nonzero if we are at end of file/data +} stbi_io_callbacks; + +//////////////////////////////////// +// +// 8-bits-per-channel interface +// + +STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels); +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +#endif + + +#ifndef STBI_NO_STDIO +STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +// for stbi_load_from_file, file pointer is left pointing immediately after image +#endif + +//////////////////////////////////// +// +// 16-bits-per-channel interface +// + +STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +#endif + +//////////////////////////////////// +// +// float-per-channel interface +// +#ifndef STBI_NO_LINEAR + STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + + #ifndef STBI_NO_STDIO + STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); + #endif +#endif + +#ifndef STBI_NO_HDR + STBIDEF void stbi_hdr_to_ldr_gamma(float gamma); + STBIDEF void stbi_hdr_to_ldr_scale(float scale); +#endif // STBI_NO_HDR + +#ifndef STBI_NO_LINEAR + STBIDEF void stbi_ldr_to_hdr_gamma(float gamma); + STBIDEF void stbi_ldr_to_hdr_scale(float scale); +#endif // STBI_NO_LINEAR + +// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user); +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename); +STBIDEF int stbi_is_hdr_from_file(FILE *f); +#endif // STBI_NO_STDIO + + +// get a VERY brief reason for failure +// NOT THREADSAFE +STBIDEF const char *stbi_failure_reason (void); + +// free the loaded image -- this is just free() +STBIDEF void stbi_image_free (void *retval_from_stbi_load); + +// get image dimensions & components without fully decoding +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len); +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user); + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit (char const *filename); +STBIDEF int stbi_is_16_bit_from_file(FILE *f); +#endif + + + +// for image formats that explicitly notate that they have premultiplied alpha, +// we just return the colors as stored in the file. set this flag to force +// unpremultiplication. results are undefined if the unpremultiply overflow. +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply); + +// indicate whether we should process iphone images back to canonical format, +// or just pass them through "as-is" +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); + +// flip the image vertically, so the first pixel in the output array is the bottom left +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip); + +// ZLIB client - used by PNG, available for other purposes + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header); +STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + + +#ifdef __cplusplus +} +#endif + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBI_INCLUDE_STB_IMAGE_H + +#ifdef STB_IMAGE_IMPLEMENTATION + +#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \ + || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \ + || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \ + || defined(STBI_ONLY_ZLIB) + #ifndef STBI_ONLY_JPEG + #define STBI_NO_JPEG + #endif + #ifndef STBI_ONLY_PNG + #define STBI_NO_PNG + #endif + #ifndef STBI_ONLY_BMP + #define STBI_NO_BMP + #endif + #ifndef STBI_ONLY_PSD + #define STBI_NO_PSD + #endif + #ifndef STBI_ONLY_TGA + #define STBI_NO_TGA + #endif + #ifndef STBI_ONLY_GIF + #define STBI_NO_GIF + #endif + #ifndef STBI_ONLY_HDR + #define STBI_NO_HDR + #endif + #ifndef STBI_ONLY_PIC + #define STBI_NO_PIC + #endif + #ifndef STBI_ONLY_PNM + #define STBI_NO_PNM + #endif +#endif + +#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB) +#define STBI_NO_ZLIB +#endif + + +#include +#include // ptrdiff_t on osx +#include +#include +#include + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +#include // ldexp, pow +#endif + +#ifndef STBI_NO_STDIO +#include +#endif + +#ifndef STBI_ASSERT +#include +#define STBI_ASSERT(x) assert(x) +#endif + + +#ifndef _MSC_VER + #ifdef __cplusplus + #define stbi_inline inline + #else + #define stbi_inline + #endif +#else + #define stbi_inline __forceinline +#endif + + +#ifdef _MSC_VER +typedef unsigned short stbi__uint16; +typedef signed short stbi__int16; +typedef unsigned int stbi__uint32; +typedef signed int stbi__int32; +#else +#include +typedef uint16_t stbi__uint16; +typedef int16_t stbi__int16; +typedef uint32_t stbi__uint32; +typedef int32_t stbi__int32; +#endif + +// should produce compiler error if size is wrong +typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; + +#ifdef _MSC_VER +#define STBI_NOTUSED(v) (void)(v) +#else +#define STBI_NOTUSED(v) (void)sizeof(v) +#endif + +#ifdef _MSC_VER +#define STBI_HAS_LROTL +#endif + +#ifdef STBI_HAS_LROTL + #define stbi_lrot(x,y) _lrotl(x,y) +#else + #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y)))) +#endif + +#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) +// ok +#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)." +#endif + +#ifndef STBI_MALLOC +#define STBI_MALLOC(sz) malloc(sz) +#define STBI_REALLOC(p,newsz) realloc(p,newsz) +#define STBI_FREE(p) free(p) +#endif + +#ifndef STBI_REALLOC_SIZED +#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz) +#endif + +// x86/x64 detection +#if defined(__x86_64__) || defined(_M_X64) +#define STBI__X64_TARGET +#elif defined(__i386) || defined(_M_IX86) +#define STBI__X86_TARGET +#endif + +#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) +// gcc doesn't support sse2 intrinsics unless you compile with -msse2, +// which in turn means it gets to use SSE2 everywhere. This is unfortunate, +// but previous attempts to provide the SSE2 functions with runtime +// detection caused numerous issues. The way architecture extensions are +// exposed in GCC/Clang is, sadly, not really suited for one-file libs. +// New behavior: if compiled with -msse2, we use SSE2 without any +// detection; if not, we don't use it at all. +#define STBI_NO_SIMD +#endif + +#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD) +// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET +// +// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the +// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant. +// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not +// simultaneously enabling "-mstackrealign". +// +// See https://github.com/nothings/stb/issues/81 for more information. +// +// So default to no SSE2 on 32-bit MinGW. If you've read this far and added +// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2. +#define STBI_NO_SIMD +#endif + +#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) +#define STBI_SSE2 +#include + +#ifdef _MSC_VER + +#if _MSC_VER >= 1400 // not VC6 +#include // __cpuid +static int stbi__cpuid3(void) +{ + int info[4]; + __cpuid(info,1); + return info[3]; +} +#else +static int stbi__cpuid3(void) +{ + int res; + __asm { + mov eax,1 + cpuid + mov res,edx + } + return res; +} +#endif + +#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name + +static int stbi__sse2_available(void) +{ + int info3 = stbi__cpuid3(); + return ((info3 >> 26) & 1) != 0; +} +#else // assume GCC-style if not VC++ +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) + +static int stbi__sse2_available(void) +{ + // If we're even attempting to compile this on GCC/Clang, that means + // -msse2 is on, which means the compiler is allowed to use SSE2 + // instructions at will, and so are we. + return 1; +} +#endif +#endif + +// ARM NEON +#if defined(STBI_NO_SIMD) && defined(STBI_NEON) +#undef STBI_NEON +#endif + +#ifdef STBI_NEON +#include +// assume GCC or Clang on ARM targets +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) +#endif + +#ifndef STBI_SIMD_ALIGN +#define STBI_SIMD_ALIGN(type, name) type name +#endif + +/////////////////////////////////////////////// +// +// stbi__context struct and start_xxx functions + +// stbi__context structure is our basic context used by all images, so it +// contains all the IO context, plus some basic image information +typedef struct +{ + stbi__uint32 img_x, img_y; + int img_n, img_out_n; + + stbi_io_callbacks io; + void *io_user_data; + + int read_from_callbacks; + int buflen; + stbi_uc buffer_start[128]; + + stbi_uc *img_buffer, *img_buffer_end; + stbi_uc *img_buffer_original, *img_buffer_original_end; +} stbi__context; + + +static void stbi__refill_buffer(stbi__context *s); + +// initialize a memory-decode context +static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len) +{ + s->io.read = NULL; + s->read_from_callbacks = 0; + s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer; + s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len; +} + +// initialize a callback-based context +static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user) +{ + s->io = *c; + s->io_user_data = user; + s->buflen = sizeof(s->buffer_start); + s->read_from_callbacks = 1; + s->img_buffer_original = s->buffer_start; + stbi__refill_buffer(s); + s->img_buffer_original_end = s->img_buffer_end; +} + +#ifndef STBI_NO_STDIO + +static int stbi__stdio_read(void *user, char *data, int size) +{ + return (int) fread(data,1,size,(FILE*) user); +} + +static void stbi__stdio_skip(void *user, int n) +{ + fseek((FILE*) user, n, SEEK_CUR); +} + +static int stbi__stdio_eof(void *user) +{ + return feof((FILE*) user); +} + +static stbi_io_callbacks stbi__stdio_callbacks = +{ + stbi__stdio_read, + stbi__stdio_skip, + stbi__stdio_eof, +}; + +static void stbi__start_file(stbi__context *s, FILE *f) +{ + stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f); +} + +//static void stop_file(stbi__context *s) { } + +#endif // !STBI_NO_STDIO + +static void stbi__rewind(stbi__context *s) +{ + // conceptually rewind SHOULD rewind to the beginning of the stream, + // but we just rewind to the beginning of the initial buffer, because + // we only use it after doing 'test', which only ever looks at at most 92 bytes + s->img_buffer = s->img_buffer_original; + s->img_buffer_end = s->img_buffer_original_end; +} + +enum +{ + STBI_ORDER_RGB, + STBI_ORDER_BGR +}; + +typedef struct +{ + int bits_per_channel; + int num_channels; + int channel_order; +} stbi__result_info; + +#ifndef STBI_NO_JPEG +static int stbi__jpeg_test(stbi__context *s); +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNG +static int stbi__png_test(stbi__context *s); +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__png_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_BMP +static int stbi__bmp_test(stbi__context *s); +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_TGA +static int stbi__tga_test(stbi__context *s); +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s); +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__psd_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_HDR +static int stbi__hdr_test(stbi__context *s); +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_test(stbi__context *s); +static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_GIF +static int stbi__gif_test(stbi__context *s); +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNM +static int stbi__pnm_test(stbi__context *s); +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +// this is not threadsafe +static const char *stbi__g_failure_reason; + +STBIDEF const char *stbi_failure_reason(void) +{ + return stbi__g_failure_reason; +} + +static int stbi__err(const char *str) +{ + stbi__g_failure_reason = str; + return 0; +} + +static void *stbi__malloc(size_t size) +{ + return STBI_MALLOC(size); +} + +// stb_image uses ints pervasively, including for offset calculations. +// therefore the largest decoded image size we can support with the +// current code, even on 64-bit targets, is INT_MAX. this is not a +// significant limitation for the intended use case. +// +// we do, however, need to make sure our size calculations don't +// overflow. hence a few helper functions for size calculations that +// multiply integers together, making sure that they're non-negative +// and no overflow occurs. + +// return 1 if the sum is valid, 0 on overflow. +// negative terms are considered invalid. +static int stbi__addsizes_valid(int a, int b) +{ + if (b < 0) return 0; + // now 0 <= b <= INT_MAX, hence also + // 0 <= INT_MAX - b <= INTMAX. + // And "a + b <= INT_MAX" (which might overflow) is the + // same as a <= INT_MAX - b (no overflow) + return a <= INT_MAX - b; +} + +// returns 1 if the product is valid, 0 on overflow. +// negative factors are considered invalid. +static int stbi__mul2sizes_valid(int a, int b) +{ + if (a < 0 || b < 0) return 0; + if (b == 0) return 1; // mul-by-0 is always safe + // portable way to check for no overflows in a*b + return a <= INT_MAX/b; +} + +// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow +static int stbi__mad2sizes_valid(int a, int b, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add); +} + +// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow +static int stbi__mad3sizes_valid(int a, int b, int c, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__addsizes_valid(a*b*c, add); +} + +// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); +} +#endif + +// mallocs with size overflow checking +static void *stbi__malloc_mad2(int a, int b, int add) +{ + if (!stbi__mad2sizes_valid(a, b, add)) return NULL; + return stbi__malloc(a*b + add); +} + +static void *stbi__malloc_mad3(int a, int b, int c, int add) +{ + if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL; + return stbi__malloc(a*b*c + add); +} + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static void *stbi__malloc_mad4(int a, int b, int c, int d, int add) +{ + if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL; + return stbi__malloc(a*b*c*d + add); +} +#endif + +// stbi__err - error +// stbi__errpf - error returning pointer to float +// stbi__errpuc - error returning pointer to unsigned char + +#ifdef STBI_NO_FAILURE_STRINGS + #define stbi__err(x,y) 0 +#elif defined(STBI_FAILURE_USERMSG) + #define stbi__err(x,y) stbi__err(y) +#else + #define stbi__err(x,y) stbi__err(x) +#endif + +#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL)) +#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL)) + +STBIDEF void stbi_image_free(void *retval_from_stbi_load) +{ + STBI_FREE(retval_from_stbi_load); +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp); +#endif + +#ifndef STBI_NO_HDR +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp); +#endif + +static int stbi__vertically_flip_on_load = 0; + +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) +{ + stbi__vertically_flip_on_load = flag_true_if_should_flip; +} + +static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields + ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed + ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order + ri->num_channels = 0; + + #ifndef STBI_NO_JPEG + if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNG + if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_BMP + if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_GIF + if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PSD + if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc); + #endif + #ifndef STBI_NO_PIC + if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNM + if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri); + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); + return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + + #ifndef STBI_NO_TGA + // test tga last because it's a crappy test! + if (stbi__tga_test(s)) + return stbi__tga_load(s,x,y,comp,req_comp, ri); + #endif + + return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); +} + +static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi_uc *reduced; + + reduced = (stbi_uc *) stbi__malloc(img_len); + if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling + + STBI_FREE(orig); + return reduced; +} + +static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi__uint16 *enlarged; + + enlarged = (stbi__uint16 *) stbi__malloc(img_len*2); + if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff + + STBI_FREE(orig); + return enlarged; +} + +static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel) +{ + int row; + size_t bytes_per_row = (size_t)w * bytes_per_pixel; + stbi_uc temp[2048]; + stbi_uc *bytes = (stbi_uc *)image; + + for (row = 0; row < (h>>1); row++) { + stbi_uc *row0 = bytes + row*bytes_per_row; + stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; + // swap row0 with row1 + size_t bytes_left = bytes_per_row; + while (bytes_left) { + size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); + memcpy(temp, row0, bytes_copy); + memcpy(row0, row1, bytes_copy); + memcpy(row1, temp, bytes_copy); + row0 += bytes_copy; + row1 += bytes_copy; + bytes_left -= bytes_copy; + } + } +} + +static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel) +{ + int slice; + int slice_size = w * h * bytes_per_pixel; + + stbi_uc *bytes = (stbi_uc *)image; + for (slice = 0; slice < z; ++slice) { + stbi__vertical_flip(bytes, w, h, bytes_per_pixel); + bytes += slice_size; + } +} + +static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); + + if (result == NULL) + return NULL; + + if (ri.bits_per_channel != 8) { + STBI_ASSERT(ri.bits_per_channel == 16); + result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 8; + } + + // @TODO: move stbi__convert_format to here + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); + } + + return (unsigned char *) result; +} + +static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); + + if (result == NULL) + return NULL; + + if (ri.bits_per_channel != 16) { + STBI_ASSERT(ri.bits_per_channel == 8); + result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 16; + } + + // @TODO: move stbi__convert_format16 to here + // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); + } + + return (stbi__uint16 *) result; +} + +#if !defined(STBI_NO_HDR) || !defined(STBI_NO_LINEAR) +static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp) +{ + if (stbi__vertically_flip_on_load && result != NULL) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); + } +} +#endif + +#ifndef STBI_NO_STDIO + +static FILE *stbi__fopen(char const *filename, char const *mode) +{ + FILE *f; +#if defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != fopen_s(&f, filename, mode)) + f=0; +#else + f = fopen(filename, mode); +#endif + return f; +} + + +STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + unsigned char *result; + if (!f) return stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__uint16 *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + stbi__uint16 *result; + if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file_16(f,x,y,comp,req_comp); + fclose(f); + return result; +} + + +#endif //!STBI_NO_STDIO + +STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_mem(&s,buffer,len); + + result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp); + if (stbi__vertically_flip_on_load) { + stbi__vertical_flip_slices( result, *x, *y, *z, *comp ); + } + + return result; +} +#endif + +#ifndef STBI_NO_LINEAR +static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + stbi__result_info ri; + float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); + if (hdr_data) + stbi__float_postprocess(hdr_data,x,y,comp,req_comp); + return hdr_data; + } + #endif + data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp); + if (data) + return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); +} + +STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + float *result; + FILE *f = stbi__fopen(filename, "rb"); + if (!f) return stbi__errpf("can't fopen", "Unable to open file"); + result = stbi_loadf_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_file(&s,f); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} +#endif // !STBI_NO_STDIO + +#endif // !STBI_NO_LINEAR + +// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is +// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always +// reports false! + +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(buffer); + STBI_NOTUSED(len); + return 0; + #endif +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result=0; + if (f) { + result = stbi_is_hdr_from_file(f); + fclose(f); + } + return result; +} + +STBIDEF int stbi_is_hdr_from_file(FILE *f) +{ + #ifndef STBI_NO_HDR + long pos = ftell(f); + int res; + stbi__context s; + stbi__start_file(&s,f); + res = stbi__hdr_test(&s); + fseek(f, pos, SEEK_SET); + return res; + #else + STBI_NOTUSED(f); + return 0; + #endif +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(clbk); + STBI_NOTUSED(user); + return 0; + #endif +} + +#ifndef STBI_NO_LINEAR +static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f; + +STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; } +STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; } +#endif + +static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f; + +STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; } +STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; } + + +////////////////////////////////////////////////////////////////////////////// +// +// Common code used by all image loaders +// + +enum +{ + STBI__SCAN_load=0, + STBI__SCAN_type, + STBI__SCAN_header +}; + +static void stbi__refill_buffer(stbi__context *s) +{ + int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen); + if (n == 0) { + // at end of file, treat same as if from memory, but need to handle case + // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file + s->read_from_callbacks = 0; + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start+1; + *s->img_buffer = 0; + } else { + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start + n; + } +} + +stbi_inline static stbi_uc stbi__get8(stbi__context *s) +{ + if (s->img_buffer < s->img_buffer_end) + return *s->img_buffer++; + if (s->read_from_callbacks) { + stbi__refill_buffer(s); + return *s->img_buffer++; + } + return 0; +} + +stbi_inline static int stbi__at_eof(stbi__context *s) +{ + if (s->io.read) { + if (!(s->io.eof)(s->io_user_data)) return 0; + // if feof() is true, check if buffer = end + // special case: we've only got the special 0 character at the end + if (s->read_from_callbacks == 0) return 1; + } + + return s->img_buffer >= s->img_buffer_end; +} + +static void stbi__skip(stbi__context *s, int n) +{ + if (n < 0) { + s->img_buffer = s->img_buffer_end; + return; + } + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + s->img_buffer = s->img_buffer_end; + (s->io.skip)(s->io_user_data, n - blen); + return; + } + } + s->img_buffer += n; +} + +static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) +{ + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + int res, count; + + memcpy(buffer, s->img_buffer, blen); + + count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen); + res = (count == (n-blen)); + s->img_buffer = s->img_buffer_end; + return res; + } + } + + if (s->img_buffer+n <= s->img_buffer_end) { + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; + return 1; + } else + return 0; +} + +static int stbi__get16be(stbi__context *s) +{ + int z = stbi__get8(s); + return (z << 8) + stbi__get8(s); +} + +static stbi__uint32 stbi__get32be(stbi__context *s) +{ + stbi__uint32 z = stbi__get16be(s); + return (z << 16) + stbi__get16be(s); +} + +#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) +// nothing +#else +static int stbi__get16le(stbi__context *s) +{ + int z = stbi__get8(s); + return z + (stbi__get8(s) << 8); +} +#endif + +#ifndef STBI_NO_BMP +static stbi__uint32 stbi__get32le(stbi__context *s) +{ + stbi__uint32 z = stbi__get16le(s); + return z + (stbi__get16le(s) << 16); +} +#endif + +#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings + + +////////////////////////////////////////////////////////////////////////////// +// +// generic converter from built-in img_n to req_comp +// individual types do this automatically as much as possible (e.g. jpeg +// does all cases internally since it needs to colorspace convert anyway, +// and it never has alpha, so very few cases ). png can automatically +// interleave an alpha=255 channel, but falls back to this for other cases +// +// assume data buffer is malloced, so malloc a new one and free that one +// only failure mode is malloc failing + +static stbi_uc stbi__compute_y(int r, int g, int b) +{ + return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + unsigned char *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0); + if (good == NULL) { + STBI_FREE(data); + return stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; + default: STBI_ASSERT(0); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} + +static stbi__uint16 stbi__compute_y_16(int r, int g, int b) +{ + return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + stbi__uint16 *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2); + if (good == NULL) { + STBI_FREE(data); + return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + stbi__uint16 *src = data + j * x * img_n ; + stbi__uint16 *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; + default: STBI_ASSERT(0); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) +{ + int i,k,n; + float *output; + if (!data) return NULL; + output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); + } + if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f; + } + STBI_FREE(data); + return output; +} +#endif + +#ifndef STBI_NO_HDR +#define stbi__float2int(x) ((int) (x)) +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) +{ + int i,k,n; + stbi_uc *output; + if (!data) return NULL; + output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + if (k < comp) { + float z = data[i*comp+k] * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + } + STBI_FREE(data); + return output; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// "baseline" JPEG/JFIF decoder +// +// simple implementation +// - doesn't support delayed output of y-dimension +// - simple interface (only one output format: 8-bit interleaved RGB) +// - doesn't try to recover corrupt jpegs +// - doesn't allow partial loading, loading multiple at once +// - still fast on x86 (copying globals into locals doesn't help x86) +// - allocates lots of intermediate memory (full size of all components) +// - non-interleaved case requires this anyway +// - allows good upsampling (see next) +// high-quality +// - upsampled channels are bilinearly interpolated, even across blocks +// - quality integer IDCT derived from IJG's 'slow' +// performance +// - fast huffman; reasonable integer IDCT +// - some SIMD kernels for common paths on targets with SSE2/NEON +// - uses a lot of intermediate memory, could cache poorly + +#ifndef STBI_NO_JPEG + +// huffman decoding acceleration +#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache + +typedef struct +{ + stbi_uc fast[1 << FAST_BITS]; + // weirdly, repacking this into AoS is a 10% speed loss, instead of a win + stbi__uint16 code[256]; + stbi_uc values[256]; + stbi_uc size[257]; + unsigned int maxcode[18]; + int delta[17]; // old 'firstsymbol' - old 'firstcode' +} stbi__huffman; + +typedef struct +{ + stbi__context *s; + stbi__huffman huff_dc[4]; + stbi__huffman huff_ac[4]; + stbi__uint16 dequant[4][64]; + stbi__int16 fast_ac[4][1 << FAST_BITS]; + +// sizes for components, interleaved MCUs + int img_h_max, img_v_max; + int img_mcu_x, img_mcu_y; + int img_mcu_w, img_mcu_h; + +// definition of jpeg image component + struct + { + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + stbi_uc *data; + void *raw_data, *raw_coeff; + stbi_uc *linebuf; + short *coeff; // progressive only + int coeff_w, coeff_h; // number of 8x8 coefficient blocks + } img_comp[4]; + + stbi__uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int progressive; + int spec_start; + int spec_end; + int succ_high; + int succ_low; + int eob_run; + int jfif; + int app14_color_transform; // Adobe APP14 tag + int rgb; + + int scan_n, order[4]; + int restart_interval, todo; + +// kernels + void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]); + void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step); + stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs); +} stbi__jpeg; + +static int stbi__build_huffman(stbi__huffman *h, int *count) +{ + int i,j,k=0; + unsigned int code; + // build size list for each symbol (from JPEG spec) + for (i=0; i < 16; ++i) + for (j=0; j < count[i]; ++j) + h->size[k++] = (stbi_uc) (i+1); + h->size[k] = 0; + + // compute actual symbols (from jpeg spec) + code = 0; + k = 0; + for(j=1; j <= 16; ++j) { + // compute delta to add to code to compute symbol id + h->delta[j] = k - code; + if (h->size[k] == j) { + while (h->size[k] == j) + h->code[k++] = (stbi__uint16) (code++); + if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG"); + } + // compute largest code + 1 for this size, preshifted as needed later + h->maxcode[j] = code << (16-j); + code <<= 1; + } + h->maxcode[j] = 0xffffffff; + + // build non-spec acceleration table; 255 is flag for not-accelerated + memset(h->fast, 255, 1 << FAST_BITS); + for (i=0; i < k; ++i) { + int s = h->size[i]; + if (s <= FAST_BITS) { + int c = h->code[i] << (FAST_BITS-s); + int m = 1 << (FAST_BITS-s); + for (j=0; j < m; ++j) { + h->fast[c+j] = (stbi_uc) i; + } + } + } + return 1; +} + +// build a table that decodes both magnitude and value of small ACs in +// one go. +static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) +{ + int i; + for (i=0; i < (1 << FAST_BITS); ++i) { + stbi_uc fast = h->fast[i]; + fast_ac[i] = 0; + if (fast < 255) { + int rs = h->values[fast]; + int run = (rs >> 4) & 15; + int magbits = rs & 15; + int len = h->size[fast]; + + if (magbits && len + magbits <= FAST_BITS) { + // magnitude code followed by receive_extend code + int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); + int m = 1 << (magbits - 1); + if (k < m) k += (~0U << magbits) + 1; + // if the result is small enough, we can fit it in fast_ac table + if (k >= -128 && k <= 127) + fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits)); + } + } + } +} + +static void stbi__grow_buffer_unsafe(stbi__jpeg *j) +{ + do { + unsigned int b = j->nomore ? 0 : stbi__get8(j->s); + if (b == 0xff) { + int c = stbi__get8(j->s); + while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes + if (c != 0) { + j->marker = (unsigned char) c; + j->nomore = 1; + return; + } + } + j->code_buffer |= b << (24 - j->code_bits); + j->code_bits += 8; + } while (j->code_bits <= 24); +} + +// (1 << n) - 1 +static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; + +// decode a jpeg huffman value from the bitstream +stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) +{ + unsigned int temp; + int c,k; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + // look at the top FAST_BITS and determine what symbol ID it is, + // if the code is <= FAST_BITS + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + k = h->fast[c]; + if (k < 255) { + int s = h->size[k]; + if (s > j->code_bits) + return -1; + j->code_buffer <<= s; + j->code_bits -= s; + return h->values[k]; + } + + // naive test is to shift the code_buffer down so k bits are + // valid, then test against maxcode. To speed this up, we've + // preshifted maxcode left so that it has (16-k) 0s at the + // end; in other words, regardless of the number of bits, it + // wants to be compared against something shifted to have 16; + // that way we don't need to shift inside the loop. + temp = j->code_buffer >> 16; + for (k=FAST_BITS+1 ; ; ++k) + if (temp < h->maxcode[k]) + break; + if (k == 17) { + // error! code not found + j->code_bits -= 16; + return -1; + } + + if (k > j->code_bits) + return -1; + + // convert the huffman code to the symbol id + c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; + STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]); + + // convert the id to a symbol + j->code_bits -= k; + j->code_buffer <<= k; + return h->values[c]; +} + +// bias[n] = (-1<code_bits < n) stbi__grow_buffer_unsafe(j); + + sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB + k = stbi_lrot(j->code_buffer, n); + STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask))); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k + (stbi__jbias[n] & ~sgn); +} + +// get some unsigned bits +stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n) +{ + unsigned int k; + if (j->code_bits < n) stbi__grow_buffer_unsafe(j); + k = stbi_lrot(j->code_buffer, n); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k; +} + +stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) +{ + unsigned int k; + if (j->code_bits < 1) stbi__grow_buffer_unsafe(j); + k = j->code_buffer; + j->code_buffer <<= 1; + --j->code_bits; + return k & 0x80000000; +} + +// given a value that's at position X in the zigzag stream, +// where does it appear in the 8x8 matrix coded as row-major? +static const stbi_uc stbi__jpeg_dezigzag[64+15] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // let corrupt input sample past end + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63 +}; + +// decode one 64-entry block-- +static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant) +{ + int diff,dc,k; + int t; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + t = stbi__jpeg_huff_decode(j, hdc); + if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + + // 0 all the ac values now so we can do it 32-bits at a time + memset(data,0,64*sizeof(data[0])); + + diff = t ? stbi__extend_receive(j, t) : 0; + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc * dequant[0]); + + // decode AC components, see JPEG spec + k = 1; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) * dequant[zig]); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (rs != 0xf0) break; // end block + k += 16; + } else { + k += r; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]); + } + } + } while (k < 64); + return 1; +} + +static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b) +{ + int diff,dc; + int t; + if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + if (j->succ_high == 0) { + // first scan for DC coefficient, must be first + memset(data,0,64*sizeof(data[0])); // 0 all the ac values now + t = stbi__jpeg_huff_decode(j, hdc); + diff = t ? stbi__extend_receive(j, t) : 0; + + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc << j->succ_low); + } else { + // refinement scan for DC coefficient + if (stbi__jpeg_get_bit(j)) + data[0] += (short) (1 << j->succ_low); + } + return 1; +} + +// @OPTIMIZE: store non-zigzagged during the decode passes, +// and only de-zigzag when dequantizing +static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac) +{ + int k; + if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->succ_high == 0) { + int shift = j->succ_low; + + if (j->eob_run) { + --j->eob_run; + return 1; + } + + k = j->spec_start; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) << shift); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r); + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + --j->eob_run; + break; + } + k += 16; + } else { + k += r; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) << shift); + } + } + } while (k <= j->spec_end); + } else { + // refinement scan for these AC coefficients + + short bit = (short) (1 << j->succ_low); + + if (j->eob_run) { + --j->eob_run; + for (k = j->spec_start; k <= j->spec_end; ++k) { + short *p = &data[stbi__jpeg_dezigzag[k]]; + if (*p != 0) + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } + } else { + k = j->spec_start; + do { + int r,s; + int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r) - 1; + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + r = 64; // force end of block + } else { + // r=15 s=0 should write 16 0s, so we just do + // a run of 15 0s and then write s (which is 0), + // so we don't have to do anything special here + } + } else { + if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG"); + // sign bit + if (stbi__jpeg_get_bit(j)) + s = bit; + else + s = -bit; + } + + // advance by r + while (k <= j->spec_end) { + short *p = &data[stbi__jpeg_dezigzag[k++]]; + if (*p != 0) { + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } else { + if (r == 0) { + *p = (short) s; + break; + } + --r; + } + } + } while (k <= j->spec_end); + } + } + return 1; +} + +// take a -128..127 value and stbi__clamp it and convert to 0..255 +stbi_inline static stbi_uc stbi__clamp(int x) +{ + // trick to use a single test to catch both cases + if ((unsigned int) x > 255) { + if (x < 0) return 0; + if (x > 255) return 255; + } + return (stbi_uc) x; +} + +#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5))) +#define stbi__fsh(x) ((x) * 4096) + +// derived from jidctint -- DCT_ISLOW +#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ + int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2+p3) * stbi__f2f(0.5411961f); \ + t2 = p1 + p3*stbi__f2f(-1.847759065f); \ + t3 = p1 + p2*stbi__f2f( 0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = stbi__fsh(p2+p3); \ + t1 = stbi__fsh(p2-p3); \ + x0 = t0+t3; \ + x3 = t0-t3; \ + x1 = t1+t2; \ + x2 = t1-t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0+t2; \ + p4 = t1+t3; \ + p1 = t0+t3; \ + p2 = t1+t2; \ + p5 = (p3+p4)*stbi__f2f( 1.175875602f); \ + t0 = t0*stbi__f2f( 0.298631336f); \ + t1 = t1*stbi__f2f( 2.053119869f); \ + t2 = t2*stbi__f2f( 3.072711026f); \ + t3 = t3*stbi__f2f( 1.501321110f); \ + p1 = p5 + p1*stbi__f2f(-0.899976223f); \ + p2 = p5 + p2*stbi__f2f(-2.562915447f); \ + p3 = p3*stbi__f2f(-1.961570560f); \ + p4 = p4*stbi__f2f(-0.390180644f); \ + t3 += p1+p4; \ + t2 += p2+p3; \ + t1 += p2+p4; \ + t0 += p1+p3; + +static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) +{ + int i,val[64],*v=val; + stbi_uc *o; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0]*4; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + x0 += 65536 + (128<<17); + x1 += 65536 + (128<<17); + x2 += 65536 + (128<<17); + x3 += 65536 + (128<<17); + // tried computing the shifts into temps, or'ing the temps to see + // if any were out of range, but that was slower + o[0] = stbi__clamp((x0+t3) >> 17); + o[7] = stbi__clamp((x0-t3) >> 17); + o[1] = stbi__clamp((x1+t2) >> 17); + o[6] = stbi__clamp((x1-t2) >> 17); + o[2] = stbi__clamp((x2+t1) >> 17); + o[5] = stbi__clamp((x2-t1) >> 17); + o[3] = stbi__clamp((x3+t0) >> 17); + o[4] = stbi__clamp((x3-t0) >> 17); + } +} + +#ifdef STBI_SSE2 +// sse2 integer IDCT. not the fastest possible implementation but it +// produces bit-identical results to the generic C version so it's +// fully "transparent". +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + // This is constructed to match our regular (generic) integer IDCT exactly. + __m128i row0, row1, row2, row3, row4, row5, row6, row7; + __m128i tmp; + + // dot product constant: even elems=x, odd elems=y + #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y)) + + // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit) + // out(1) = c1[even]*x + c1[odd]*y + #define dct_rot(out0,out1, x,y,c0,c1) \ + __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \ + __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \ + __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ + __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ + __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ + __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) + + // out = in << 12 (in 16-bit, out 32-bit) + #define dct_widen(out, in) \ + __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ + __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) + + // wide add + #define dct_wadd(out, a, b) \ + __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_add_epi32(a##_h, b##_h) + + // wide sub + #define dct_wsub(out, a, b) \ + __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) + + // butterfly a/b, add bias, then shift by "s" and pack + #define dct_bfly32o(out0, out1, a,b,bias,s) \ + { \ + __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ + __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ + dct_wadd(sum, abiased, b); \ + dct_wsub(dif, abiased, b); \ + out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ + out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ + } + + // 8-bit interleave step (for transposes) + #define dct_interleave8(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi8(a, b); \ + b = _mm_unpackhi_epi8(tmp, b) + + // 16-bit interleave step (for transposes) + #define dct_interleave16(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi16(a, b); \ + b = _mm_unpackhi_epi16(tmp, b) + + #define dct_pass(bias,shift) \ + { \ + /* even part */ \ + dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \ + __m128i sum04 = _mm_add_epi16(row0, row4); \ + __m128i dif04 = _mm_sub_epi16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ + dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \ + __m128i sum17 = _mm_add_epi16(row1, row7); \ + __m128i sum35 = _mm_add_epi16(row3, row5); \ + dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \ + dct_wadd(x4, y0o, y4o); \ + dct_wadd(x5, y1o, y5o); \ + dct_wadd(x6, y2o, y5o); \ + dct_wadd(x7, y3o, y4o); \ + dct_bfly32o(row0,row7, x0,x7,bias,shift); \ + dct_bfly32o(row1,row6, x1,x6,bias,shift); \ + dct_bfly32o(row2,row5, x2,x5,bias,shift); \ + dct_bfly32o(row3,row4, x3,x4,bias,shift); \ + } + + __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); + __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f)); + __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f)); + __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f)); + __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f)); + __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f)); + __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f)); + __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f)); + + // rounding biases in column/row passes, see stbi__idct_block for explanation. + __m128i bias_0 = _mm_set1_epi32(512); + __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17)); + + // load + row0 = _mm_load_si128((const __m128i *) (data + 0*8)); + row1 = _mm_load_si128((const __m128i *) (data + 1*8)); + row2 = _mm_load_si128((const __m128i *) (data + 2*8)); + row3 = _mm_load_si128((const __m128i *) (data + 3*8)); + row4 = _mm_load_si128((const __m128i *) (data + 4*8)); + row5 = _mm_load_si128((const __m128i *) (data + 5*8)); + row6 = _mm_load_si128((const __m128i *) (data + 6*8)); + row7 = _mm_load_si128((const __m128i *) (data + 7*8)); + + // column pass + dct_pass(bias_0, 10); + + { + // 16bit 8x8 transpose pass 1 + dct_interleave16(row0, row4); + dct_interleave16(row1, row5); + dct_interleave16(row2, row6); + dct_interleave16(row3, row7); + + // transpose pass 2 + dct_interleave16(row0, row2); + dct_interleave16(row1, row3); + dct_interleave16(row4, row6); + dct_interleave16(row5, row7); + + // transpose pass 3 + dct_interleave16(row0, row1); + dct_interleave16(row2, row3); + dct_interleave16(row4, row5); + dct_interleave16(row6, row7); + } + + // row pass + dct_pass(bias_1, 17); + + { + // pack + __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 + __m128i p1 = _mm_packus_epi16(row2, row3); + __m128i p2 = _mm_packus_epi16(row4, row5); + __m128i p3 = _mm_packus_epi16(row6, row7); + + // 8bit 8x8 transpose pass 1 + dct_interleave8(p0, p2); // a0e0a1e1... + dct_interleave8(p1, p3); // c0g0c1g1... + + // transpose pass 2 + dct_interleave8(p0, p1); // a0c0e0g0... + dct_interleave8(p2, p3); // b0d0f0h0... + + // transpose pass 3 + dct_interleave8(p0, p2); // a0b0c0d0... + dct_interleave8(p1, p3); // a4b4c4d4... + + // store + _mm_storel_epi64((__m128i *) out, p0); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p2); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p1); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p3); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e)); + } + +#undef dct_const +#undef dct_rot +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_interleave8 +#undef dct_interleave16 +#undef dct_pass +} + +#endif // STBI_SSE2 + +#ifdef STBI_NEON + +// NEON integer IDCT. should produce bit-identical +// results to the generic C version. +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + int16x8_t row0, row1, row2, row3, row4, row5, row6, row7; + + int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f)); + int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f)); + int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f)); + int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f)); + int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f)); + int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f)); + int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f)); + int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f)); + int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f)); + int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f)); + int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f)); + int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f)); + +#define dct_long_mul(out, inq, coeff) \ + int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff) + +#define dct_long_mac(out, acc, inq, coeff) \ + int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff) + +#define dct_widen(out, inq) \ + int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \ + int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12) + +// wide add +#define dct_wadd(out, a, b) \ + int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vaddq_s32(a##_h, b##_h) + +// wide sub +#define dct_wsub(out, a, b) \ + int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vsubq_s32(a##_h, b##_h) + +// butterfly a/b, then shift using "shiftop" by "s" and pack +#define dct_bfly32o(out0,out1, a,b,shiftop,s) \ + { \ + dct_wadd(sum, a, b); \ + dct_wsub(dif, a, b); \ + out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ + out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ + } + +#define dct_pass(shiftop, shift) \ + { \ + /* even part */ \ + int16x8_t sum26 = vaddq_s16(row2, row6); \ + dct_long_mul(p1e, sum26, rot0_0); \ + dct_long_mac(t2e, p1e, row6, rot0_1); \ + dct_long_mac(t3e, p1e, row2, rot0_2); \ + int16x8_t sum04 = vaddq_s16(row0, row4); \ + int16x8_t dif04 = vsubq_s16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + int16x8_t sum15 = vaddq_s16(row1, row5); \ + int16x8_t sum17 = vaddq_s16(row1, row7); \ + int16x8_t sum35 = vaddq_s16(row3, row5); \ + int16x8_t sum37 = vaddq_s16(row3, row7); \ + int16x8_t sumodd = vaddq_s16(sum17, sum35); \ + dct_long_mul(p5o, sumodd, rot1_0); \ + dct_long_mac(p1o, p5o, sum17, rot1_1); \ + dct_long_mac(p2o, p5o, sum35, rot1_2); \ + dct_long_mul(p3o, sum37, rot2_0); \ + dct_long_mul(p4o, sum15, rot2_1); \ + dct_wadd(sump13o, p1o, p3o); \ + dct_wadd(sump24o, p2o, p4o); \ + dct_wadd(sump23o, p2o, p3o); \ + dct_wadd(sump14o, p1o, p4o); \ + dct_long_mac(x4, sump13o, row7, rot3_0); \ + dct_long_mac(x5, sump24o, row5, rot3_1); \ + dct_long_mac(x6, sump23o, row3, rot3_2); \ + dct_long_mac(x7, sump14o, row1, rot3_3); \ + dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \ + dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \ + dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \ + dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \ + } + + // load + row0 = vld1q_s16(data + 0*8); + row1 = vld1q_s16(data + 1*8); + row2 = vld1q_s16(data + 2*8); + row3 = vld1q_s16(data + 3*8); + row4 = vld1q_s16(data + 4*8); + row5 = vld1q_s16(data + 5*8); + row6 = vld1q_s16(data + 6*8); + row7 = vld1q_s16(data + 7*8); + + // add DC bias + row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0)); + + // column pass + dct_pass(vrshrn_n_s32, 10); + + // 16bit 8x8 transpose + { +// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively. +// whether compilers actually get this is another story, sadly. +#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); } +#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); } + + // pass 1 + dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 + dct_trn16(row2, row3); + dct_trn16(row4, row5); + dct_trn16(row6, row7); + + // pass 2 + dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 + dct_trn32(row1, row3); + dct_trn32(row4, row6); + dct_trn32(row5, row7); + + // pass 3 + dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 + dct_trn64(row1, row5); + dct_trn64(row2, row6); + dct_trn64(row3, row7); + +#undef dct_trn16 +#undef dct_trn32 +#undef dct_trn64 + } + + // row pass + // vrshrn_n_s32 only supports shifts up to 16, we need + // 17. so do a non-rounding shift of 16 first then follow + // up with a rounding shift by 1. + dct_pass(vshrn_n_s32, 16); + + { + // pack and round + uint8x8_t p0 = vqrshrun_n_s16(row0, 1); + uint8x8_t p1 = vqrshrun_n_s16(row1, 1); + uint8x8_t p2 = vqrshrun_n_s16(row2, 1); + uint8x8_t p3 = vqrshrun_n_s16(row3, 1); + uint8x8_t p4 = vqrshrun_n_s16(row4, 1); + uint8x8_t p5 = vqrshrun_n_s16(row5, 1); + uint8x8_t p6 = vqrshrun_n_s16(row6, 1); + uint8x8_t p7 = vqrshrun_n_s16(row7, 1); + + // again, these can translate into one instruction, but often don't. +#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); } +#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); } + + // sadly can't use interleaved stores here since we only write + // 8 bytes to each scan line! + + // 8x8 8-bit transpose pass 1 + dct_trn8_8(p0, p1); + dct_trn8_8(p2, p3); + dct_trn8_8(p4, p5); + dct_trn8_8(p6, p7); + + // pass 2 + dct_trn8_16(p0, p2); + dct_trn8_16(p1, p3); + dct_trn8_16(p4, p6); + dct_trn8_16(p5, p7); + + // pass 3 + dct_trn8_32(p0, p4); + dct_trn8_32(p1, p5); + dct_trn8_32(p2, p6); + dct_trn8_32(p3, p7); + + // store + vst1_u8(out, p0); out += out_stride; + vst1_u8(out, p1); out += out_stride; + vst1_u8(out, p2); out += out_stride; + vst1_u8(out, p3); out += out_stride; + vst1_u8(out, p4); out += out_stride; + vst1_u8(out, p5); out += out_stride; + vst1_u8(out, p6); out += out_stride; + vst1_u8(out, p7); + +#undef dct_trn8_8 +#undef dct_trn8_16 +#undef dct_trn8_32 + } + +#undef dct_long_mul +#undef dct_long_mac +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_pass +} + +#endif // STBI_NEON + +#define STBI__MARKER_none 0xff +// if there's a pending marker from the entropy stream, return that +// otherwise, fetch from the stream and get a marker. if there's no +// marker, return 0xff, which is never a valid marker value +static stbi_uc stbi__get_marker(stbi__jpeg *j) +{ + stbi_uc x; + if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; } + x = stbi__get8(j->s); + if (x != 0xff) return STBI__MARKER_none; + while (x == 0xff) + x = stbi__get8(j->s); // consume repeated 0xff fill bytes + return x; +} + +// in each scan, we'll have scan_n components, and the order +// of the components is specified by order[] +#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) + +// after a restart interval, stbi__jpeg_reset the entropy decoder and +// the dc prediction +static void stbi__jpeg_reset(stbi__jpeg *j) +{ + j->code_bits = 0; + j->code_buffer = 0; + j->nomore = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0; + j->marker = STBI__MARKER_none; + j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; + j->eob_run = 0; + // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, + // since we don't even allow 1<<30 pixels +} + +static int stbi__parse_entropy_coded_data(stbi__jpeg *z) +{ + stbi__jpeg_reset(z); + if (!z->progressive) { + if (z->scan_n == 1) { + int i,j; + STBI_SIMD_ALIGN(short, data[64]); + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + STBI_SIMD_ALIGN(short, data[64]); + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data); + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } else { + if (z->scan_n == 1) { + int i,j; + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + if (z->spec_start == 0) { + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } else { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha])) + return 0; + } + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x); + int y2 = (j*z->img_comp[n].v + y); + short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w); + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } +} + +static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant) +{ + int i; + for (i=0; i < 64; ++i) + data[i] *= dequant[i]; +} + +static void stbi__jpeg_finish(stbi__jpeg *z) +{ + if (z->progressive) { + // dequantize and idct the data + int i,j,n; + for (n=0; n < z->s->img_n; ++n) { + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + } + } + } + } +} + +static int stbi__process_marker(stbi__jpeg *z, int m) +{ + int L; + switch (m) { + case STBI__MARKER_none: // no marker found + return stbi__err("expected marker","Corrupt JPEG"); + + case 0xDD: // DRI - specify restart interval + if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG"); + z->restart_interval = stbi__get16be(z->s); + return 1; + + case 0xDB: // DQT - define quantization table + L = stbi__get16be(z->s)-2; + while (L > 0) { + int q = stbi__get8(z->s); + int p = q >> 4, sixteen = (p != 0); + int t = q & 15,i; + if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); + if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); + + for (i=0; i < 64; ++i) + z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); + L -= (sixteen ? 129 : 65); + } + return L==0; + + case 0xC4: // DHT - define huffman table + L = stbi__get16be(z->s)-2; + while (L > 0) { + stbi_uc *v; + int sizes[16],i,n=0; + int q = stbi__get8(z->s); + int tc = q >> 4; + int th = q & 15; + if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG"); + for (i=0; i < 16; ++i) { + sizes[i] = stbi__get8(z->s); + n += sizes[i]; + } + L -= 17; + if (tc == 0) { + if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; + } else { + if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; + } + for (i=0; i < n; ++i) + v[i] = stbi__get8(z->s); + if (tc != 0) + stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th); + L -= n; + } + return L==0; + } + + // check for comment block or APP blocks + if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { + L = stbi__get16be(z->s); + if (L < 2) { + if (m == 0xFE) + return stbi__err("bad COM len","Corrupt JPEG"); + else + return stbi__err("bad APP len","Corrupt JPEG"); + } + L -= 2; + + if (m == 0xE0 && L >= 5) { // JFIF APP0 segment + static const unsigned char tag[5] = {'J','F','I','F','\0'}; + int ok = 1; + int i; + for (i=0; i < 5; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 5; + if (ok) + z->jfif = 1; + } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment + static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; + int ok = 1; + int i; + for (i=0; i < 6; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 6; + if (ok) { + stbi__get8(z->s); // version + stbi__get16be(z->s); // flags0 + stbi__get16be(z->s); // flags1 + z->app14_color_transform = stbi__get8(z->s); // color transform + L -= 6; + } + } + + stbi__skip(z->s, L); + return 1; + } + + return stbi__err("unknown marker","Corrupt JPEG"); +} + +// after we see SOS +static int stbi__process_scan_header(stbi__jpeg *z) +{ + int i; + int Ls = stbi__get16be(z->s); + z->scan_n = stbi__get8(z->s); + if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG"); + if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG"); + for (i=0; i < z->scan_n; ++i) { + int id = stbi__get8(z->s), which; + int q = stbi__get8(z->s); + for (which = 0; which < z->s->img_n; ++which) + if (z->img_comp[which].id == id) + break; + if (which == z->s->img_n) return 0; // no match + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG"); + z->order[i] = which; + } + + { + int aa; + z->spec_start = stbi__get8(z->s); + z->spec_end = stbi__get8(z->s); // should be 63, but might be 0 + aa = stbi__get8(z->s); + z->succ_high = (aa >> 4); + z->succ_low = (aa & 15); + if (z->progressive) { + if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13) + return stbi__err("bad SOS", "Corrupt JPEG"); + } else { + if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG"); + if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG"); + z->spec_end = 63; + } + } + + return 1; +} + +static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why) +{ + int i; + for (i=0; i < ncomp; ++i) { + if (z->img_comp[i].raw_data) { + STBI_FREE(z->img_comp[i].raw_data); + z->img_comp[i].raw_data = NULL; + z->img_comp[i].data = NULL; + } + if (z->img_comp[i].raw_coeff) { + STBI_FREE(z->img_comp[i].raw_coeff); + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].coeff = 0; + } + if (z->img_comp[i].linebuf) { + STBI_FREE(z->img_comp[i].linebuf); + z->img_comp[i].linebuf = NULL; + } + } + return why; +} + +static int stbi__process_frame_header(stbi__jpeg *z, int scan) +{ + stbi__context *s = z->s; + int Lf,p,i,q, h_max=1,v_max=1,c; + Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG + p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires + c = stbi__get8(s); + if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG"); + s->img_n = c; + for (i=0; i < c; ++i) { + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; + } + + if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG"); + + z->rgb = 0; + for (i=0; i < s->img_n; ++i) { + static const unsigned char rgb[3] = { 'R', 'G', 'B' }; + z->img_comp[i].id = stbi__get8(s); + if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) + ++z->rgb; + q = stbi__get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); + z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG"); + } + + if (scan != STBI__SCAN_load) return 1; + + if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode"); + + for (i=0; i < s->img_n; ++i) { + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; + } + + // compute interleaved mcu info + z->img_h_max = h_max; + z->img_v_max = v_max; + z->img_mcu_w = h_max * 8; + z->img_mcu_h = v_max * 8; + // these sizes can't be more than 17 bits + z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; + z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; + + for (i=0; i < s->img_n; ++i) { + // number of effective pixels (e.g. for non-interleaved MCU) + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; + // to simplify generation, we'll allocate enough memory to decode + // the bogus oversized data from using interleaved MCUs and their + // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't + // discard the extra data until colorspace conversion + // + // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) + // so these muls can't overflow with 32-bit ints (which we require) + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].coeff = 0; + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].linebuf = NULL; + z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); + if (z->img_comp[i].raw_data == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + // align blocks for idct using mmx/sse + z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + if (z->progressive) { + // w2, h2 are multiples of 8 (see above) + z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; + z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; + z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); + if (z->img_comp[i].raw_coeff == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); + } + } + + return 1; +} + +// use comparisons since in some cases we handle more than one case (e.g. SOF) +#define stbi__DNL(x) ((x) == 0xdc) +#define stbi__SOI(x) ((x) == 0xd8) +#define stbi__EOI(x) ((x) == 0xd9) +#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2) +#define stbi__SOS(x) ((x) == 0xda) + +#define stbi__SOF_progressive(x) ((x) == 0xc2) + +static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) +{ + int m; + z->jfif = 0; + z->app14_color_transform = -1; // valid values are 0,1,2 + z->marker = STBI__MARKER_none; // initialize cached marker to empty + m = stbi__get_marker(z); + if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG"); + if (scan == STBI__SCAN_type) return 1; + m = stbi__get_marker(z); + while (!stbi__SOF(m)) { + if (!stbi__process_marker(z,m)) return 0; + m = stbi__get_marker(z); + while (m == STBI__MARKER_none) { + // some files have extra padding after their blocks, so ok, we'll scan + if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG"); + m = stbi__get_marker(z); + } + } + z->progressive = stbi__SOF_progressive(m); + if (!stbi__process_frame_header(z, scan)) return 0; + return 1; +} + +// decode image to YCbCr format +static int stbi__decode_jpeg_image(stbi__jpeg *j) +{ + int m; + for (m = 0; m < 4; m++) { + j->img_comp[m].raw_data = NULL; + j->img_comp[m].raw_coeff = NULL; + } + j->restart_interval = 0; + if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0; + m = stbi__get_marker(j); + while (!stbi__EOI(m)) { + if (stbi__SOS(m)) { + if (!stbi__process_scan_header(j)) return 0; + if (!stbi__parse_entropy_coded_data(j)) return 0; + if (j->marker == STBI__MARKER_none ) { + // handle 0s at the end of image data from IP Kamera 9060 + while (!stbi__at_eof(j->s)) { + int x = stbi__get8(j->s); + if (x == 255) { + j->marker = stbi__get8(j->s); + break; + } + } + // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 + } + } else if (stbi__DNL(m)) { + int Ld = stbi__get16be(j->s); + stbi__uint32 NL = stbi__get16be(j->s); + if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); + if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); + } else { + if (!stbi__process_marker(j, m)) return 0; + } + m = stbi__get_marker(j); + } + if (j->progressive) + stbi__jpeg_finish(j); + return 1; +} + +// static jfif-centered resampling (across block boundaries) + +typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1, + int w, int hs); + +#define stbi__div4(x) ((stbi_uc) ((x) >> 2)) + +static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + STBI_NOTUSED(out); + STBI_NOTUSED(in_far); + STBI_NOTUSED(w); + STBI_NOTUSED(hs); + return in_near; +} + +static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples vertically for every one in input + int i; + STBI_NOTUSED(hs); + for (i=0; i < w; ++i) + out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2); + return out; +} + +static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples horizontally for every one in input + int i; + stbi_uc *input = in_near; + + if (w == 1) { + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; + } + + out[0] = input[0]; + out[1] = stbi__div4(input[0]*3 + input[1] + 2); + for (i=1; i < w-1; ++i) { + int n = 3*input[i]+2; + out[i*2+0] = stbi__div4(n+input[i-1]); + out[i*2+1] = stbi__div4(n+input[i+1]); + } + out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2); + out[i*2+1] = input[w-1]; + + STBI_NOTUSED(in_far); + STBI_NOTUSED(hs); + + return out; +} + +#define stbi__div16(x) ((stbi_uc) ((x) >> 4)) + +static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i,t0,t1; + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + out[0] = stbi__div4(t1+2); + for (i=1; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i=0,t0,t1; + + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + // process groups of 8 pixels for as long as we can. + // note we can't handle the last pixel in a row in this loop + // because we need to handle the filter boundary conditions. + for (; i < ((w-1) & ~7); i += 8) { +#if defined(STBI_SSE2) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + __m128i zero = _mm_setzero_si128(); + __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i)); + __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i)); + __m128i farw = _mm_unpacklo_epi8(farb, zero); + __m128i nearw = _mm_unpacklo_epi8(nearb, zero); + __m128i diff = _mm_sub_epi16(farw, nearw); + __m128i nears = _mm_slli_epi16(nearw, 2); + __m128i curr = _mm_add_epi16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + __m128i prv0 = _mm_slli_si128(curr, 2); + __m128i nxt0 = _mm_srli_si128(curr, 2); + __m128i prev = _mm_insert_epi16(prv0, t1, 0); + __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + __m128i bias = _mm_set1_epi16(8); + __m128i curs = _mm_slli_epi16(curr, 2); + __m128i prvd = _mm_sub_epi16(prev, curr); + __m128i nxtd = _mm_sub_epi16(next, curr); + __m128i curb = _mm_add_epi16(curs, bias); + __m128i even = _mm_add_epi16(prvd, curb); + __m128i odd = _mm_add_epi16(nxtd, curb); + + // interleave even and odd pixels, then undo scaling. + __m128i int0 = _mm_unpacklo_epi16(even, odd); + __m128i int1 = _mm_unpackhi_epi16(even, odd); + __m128i de0 = _mm_srli_epi16(int0, 4); + __m128i de1 = _mm_srli_epi16(int1, 4); + + // pack and write output + __m128i outv = _mm_packus_epi16(de0, de1); + _mm_storeu_si128((__m128i *) (out + i*2), outv); +#elif defined(STBI_NEON) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + uint8x8_t farb = vld1_u8(in_far + i); + uint8x8_t nearb = vld1_u8(in_near + i); + int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb)); + int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2)); + int16x8_t curr = vaddq_s16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + int16x8_t prv0 = vextq_s16(curr, curr, 7); + int16x8_t nxt0 = vextq_s16(curr, curr, 1); + int16x8_t prev = vsetq_lane_s16(t1, prv0, 0); + int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + int16x8_t curs = vshlq_n_s16(curr, 2); + int16x8_t prvd = vsubq_s16(prev, curr); + int16x8_t nxtd = vsubq_s16(next, curr); + int16x8_t even = vaddq_s16(curs, prvd); + int16x8_t odd = vaddq_s16(curs, nxtd); + + // undo scaling and round, then store with even/odd phases interleaved + uint8x8x2_t o; + o.val[0] = vqrshrun_n_s16(even, 4); + o.val[1] = vqrshrun_n_s16(odd, 4); + vst2_u8(out + i*2, o); +#endif + + // "previous" value for next iter + t1 = 3*in_near[i+7] + in_far[i+7]; + } + + t0 = t1; + t1 = 3*in_near[i] + in_far[i]; + out[i*2] = stbi__div16(3*t1 + t0 + 8); + + for (++i; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} +#endif + +static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // resample with nearest-neighbor + int i,j; + STBI_NOTUSED(in_far); + for (i=0; i < w; ++i) + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; + return out; +} + +// this is a reduced-precision calculation of YCbCr-to-RGB introduced +// to make sure the code produces the same results in both SIMD and scalar +#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8) +static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step) +{ + int i; + for (i=0; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step) +{ + int i = 0; + +#ifdef STBI_SSE2 + // step == 3 is pretty ugly on the final interleave, and i'm not convinced + // it's useful in practice (you wouldn't use it for textures, for example). + // so just accelerate step == 4 case. + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + __m128i signflip = _mm_set1_epi8(-0x80); + __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f)); + __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); + __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); + __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f)); + __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128); + __m128i xw = _mm_set1_epi16(255); // alpha channel + + for (; i+7 < count; i += 8) { + // load + __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i)); + __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i)); + __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i)); + __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 + __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 + + // unpack to short (and left-shift cr, cb by 8) + __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes); + __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); + __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); + + // color transform + __m128i yws = _mm_srli_epi16(yw, 4); + __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); + __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); + __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); + __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); + __m128i rws = _mm_add_epi16(cr0, yws); + __m128i gwt = _mm_add_epi16(cb0, yws); + __m128i bws = _mm_add_epi16(yws, cb1); + __m128i gws = _mm_add_epi16(gwt, cr1); + + // descale + __m128i rw = _mm_srai_epi16(rws, 4); + __m128i bw = _mm_srai_epi16(bws, 4); + __m128i gw = _mm_srai_epi16(gws, 4); + + // back to byte, set up for transpose + __m128i brb = _mm_packus_epi16(rw, bw); + __m128i gxb = _mm_packus_epi16(gw, xw); + + // transpose to interleave channels + __m128i t0 = _mm_unpacklo_epi8(brb, gxb); + __m128i t1 = _mm_unpackhi_epi8(brb, gxb); + __m128i o0 = _mm_unpacklo_epi16(t0, t1); + __m128i o1 = _mm_unpackhi_epi16(t0, t1); + + // store + _mm_storeu_si128((__m128i *) (out + 0), o0); + _mm_storeu_si128((__m128i *) (out + 16), o1); + out += 32; + } + } +#endif + +#ifdef STBI_NEON + // in this version, step=3 support would be easy to add. but is there demand? + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + uint8x8_t signflip = vdup_n_u8(0x80); + int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f)); + int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f)); + int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f)); + int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f)); + + for (; i+7 < count; i += 8) { + // load + uint8x8_t y_bytes = vld1_u8(y + i); + uint8x8_t cr_bytes = vld1_u8(pcr + i); + uint8x8_t cb_bytes = vld1_u8(pcb + i); + int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); + int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); + + // expand to s16 + int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); + int16x8_t crw = vshll_n_s8(cr_biased, 7); + int16x8_t cbw = vshll_n_s8(cb_biased, 7); + + // color transform + int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); + int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); + int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); + int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); + int16x8_t rws = vaddq_s16(yws, cr0); + int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); + int16x8_t bws = vaddq_s16(yws, cb1); + + // undo scaling, round, convert to byte + uint8x8x4_t o; + o.val[0] = vqrshrun_n_s16(rws, 4); + o.val[1] = vqrshrun_n_s16(gws, 4); + o.val[2] = vqrshrun_n_s16(bws, 4); + o.val[3] = vdup_n_u8(255); + + // store, interleaving r/g/b/a + vst4_u8(out, o); + out += 8*4; + } + } +#endif + + for (; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} +#endif + +// set up the kernels +static void stbi__setup_jpeg(stbi__jpeg *j) +{ + j->idct_block_kernel = stbi__idct_block; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2; + +#ifdef STBI_SSE2 + if (stbi__sse2_available()) { + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; + } +#endif + +#ifdef STBI_NEON + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; +#endif +} + +// clean up the temporary component buffers +static void stbi__cleanup_jpeg(stbi__jpeg *j) +{ + stbi__free_jpeg_components(j, j->s->img_n, 0); +} + +typedef struct +{ + resample_row_func resample; + stbi_uc *line0,*line1; + int hs,vs; // expansion factor in each axis + int w_lores; // horizontal pixels pre-expansion + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on +} stbi__resample; + +// fast 0..255 * 0..255 => 0..255 rounded multiplication +static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) +{ + unsigned int t = x*y + 128; + return (stbi_uc) ((t + (t >>8)) >> 8); +} + +static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ + int n, decode_n, is_rgb; + z->s->img_n = 0; // make stbi__cleanup_jpeg safe + + // validate req_comp + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + + // load a jpeg image from whichever source, but leave in YCbCr format + if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; } + + // determine actual number of components to generate + n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1; + + is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif)); + + if (z->s->img_n == 3 && n < 3 && !is_rgb) + decode_n = 1; + else + decode_n = z->s->img_n; + + // resample and color-convert + { + int k; + unsigned int i,j; + stbi_uc *output; + stbi_uc *coutput[4]; + + stbi__resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3); + if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s->img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel; + else r->resample = stbi__resample_row_generic; + } + + // can't error after this so, this is safe + output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); + if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s->img_y; ++j) { + stbi_uc *out = output + n * z->s->img_x * j; + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; + } + } + if (n >= 3) { + stbi_uc *y = coutput[0]; + if (z->s->img_n == 3) { + if (is_rgb) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = y[i]; + out[1] = coutput[1][i]; + out[2] = coutput[2][i]; + out[3] = 255; + out += n; + } + } else { + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else if (z->s->img_n == 4) { + if (z->app14_color_transform == 0) { // CMYK + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(coutput[0][i], m); + out[1] = stbi__blinn_8x8(coutput[1][i], m); + out[2] = stbi__blinn_8x8(coutput[2][i], m); + out[3] = 255; + out += n; + } + } else if (z->app14_color_transform == 2) { // YCCK + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(255 - out[0], m); + out[1] = stbi__blinn_8x8(255 - out[1], m); + out[2] = stbi__blinn_8x8(255 - out[2], m); + out += n; + } + } else { // YCbCr + alpha? Ignore the fourth channel for now + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else + for (i=0; i < z->s->img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + if (is_rgb) { + if (n == 1) + for (i=0; i < z->s->img_x; ++i) + *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + else { + for (i=0; i < z->s->img_x; ++i, out += 2) { + out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + out[1] = 255; + } + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); + stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); + stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); + out[0] = stbi__compute_y(r, g, b); + out[1] = 255; + out += n; + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); + out[1] = 255; + out += n; + } + } else { + stbi_uc *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255; + } + } + } + stbi__cleanup_jpeg(z); + *out_x = z->s->img_x; + *out_y = z->s->img_y; + if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output + return output; + } +} + +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + unsigned char* result; + stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg)); + STBI_NOTUSED(ri); + j->s = s; + stbi__setup_jpeg(j); + result = load_jpeg_image(j, x,y,comp,req_comp); + STBI_FREE(j); + return result; +} + +static int stbi__jpeg_test(stbi__context *s) +{ + int r; + stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg)); + j->s = s; + stbi__setup_jpeg(j); + r = stbi__decode_jpeg_header(j, STBI__SCAN_type); + stbi__rewind(s); + STBI_FREE(j); + return r; +} + +static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) +{ + if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) { + stbi__rewind( j->s ); + return 0; + } + if (x) *x = j->s->img_x; + if (y) *y = j->s->img_y; + if (comp) *comp = j->s->img_n >= 3 ? 3 : 1; + return 1; +} + +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) +{ + int result; + stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg))); + j->s = s; + result = stbi__jpeg_info_raw(j, x, y, comp); + STBI_FREE(j); + return result; +} +#endif + +// public domain zlib decode v0.2 Sean Barrett 2006-11-18 +// simple implementation +// - all input must be provided in an upfront buffer +// - all output is written to a single output buffer (can malloc/realloc) +// performance +// - fast huffman + +#ifndef STBI_NO_ZLIB + +// fast-way is faster to check than jpeg huffman, but slow way is slower +#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables +#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1) + +// zlib-style huffman encoding +// (jpegs packs from left, zlib from right, so can't share code) +typedef struct +{ + stbi__uint16 fast[1 << STBI__ZFAST_BITS]; + stbi__uint16 firstcode[16]; + int maxcode[17]; + stbi__uint16 firstsymbol[16]; + stbi_uc size[288]; + stbi__uint16 value[288]; +} stbi__zhuffman; + +stbi_inline static int stbi__bitreverse16(int n) +{ + n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); + n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); + n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); + n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); + return n; +} + +stbi_inline static int stbi__bit_reverse(int v, int bits) +{ + STBI_ASSERT(bits <= 16); + // to bit reverse n bits, reverse 16 and shift + // e.g. 11 bits, bit reverse and shift away 5 + return stbi__bitreverse16(v) >> (16-bits); +} + +static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num) +{ + int i,k=0; + int code, next_code[16], sizes[17]; + + // DEFLATE spec for generating codes + memset(sizes, 0, sizeof(sizes)); + memset(z->fast, 0, sizeof(z->fast)); + for (i=0; i < num; ++i) + ++sizes[sizelist[i]]; + sizes[0] = 0; + for (i=1; i < 16; ++i) + if (sizes[i] > (1 << i)) + return stbi__err("bad sizes", "Corrupt PNG"); + code = 0; + for (i=1; i < 16; ++i) { + next_code[i] = code; + z->firstcode[i] = (stbi__uint16) code; + z->firstsymbol[i] = (stbi__uint16) k; + code = (code + sizes[i]); + if (sizes[i]) + if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG"); + z->maxcode[i] = code << (16-i); // preshift for inner loop + code <<= 1; + k += sizes[i]; + } + z->maxcode[16] = 0x10000; // sentinel + for (i=0; i < num; ++i) { + int s = sizelist[i]; + if (s) { + int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); + z->size [c] = (stbi_uc ) s; + z->value[c] = (stbi__uint16) i; + if (s <= STBI__ZFAST_BITS) { + int j = stbi__bit_reverse(next_code[s],s); + while (j < (1 << STBI__ZFAST_BITS)) { + z->fast[j] = fastv; + j += (1 << s); + } + } + ++next_code[s]; + } + } + return 1; +} + +// zlib-from-memory implementation for PNG reading +// because PNG allows splitting the zlib stream arbitrarily, +// and it's annoying structurally to have PNG call ZLIB call PNG, +// we require PNG read all the IDATs and combine them into a single +// memory buffer + +typedef struct +{ + stbi_uc *zbuffer, *zbuffer_end; + int num_bits; + stbi__uint32 code_buffer; + + char *zout; + char *zout_start; + char *zout_end; + int z_expandable; + + stbi__zhuffman z_length, z_distance; +} stbi__zbuf; + +stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z) +{ + if (z->zbuffer >= z->zbuffer_end) return 0; + return *z->zbuffer++; +} + +static void stbi__fill_bits(stbi__zbuf *z) +{ + do { + STBI_ASSERT(z->code_buffer < (1U << z->num_bits)); + z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; + z->num_bits += 8; + } while (z->num_bits <= 24); +} + +stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n) +{ + unsigned int k; + if (z->num_bits < n) stbi__fill_bits(z); + k = z->code_buffer & ((1 << n) - 1); + z->code_buffer >>= n; + z->num_bits -= n; + return k; +} + +static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s,k; + // not resolved by fast table, so compute it the slow way + // use jpeg approach, which requires MSbits at top + k = stbi__bit_reverse(a->code_buffer, 16); + for (s=STBI__ZFAST_BITS+1; ; ++s) + if (k < z->maxcode[s]) + break; + if (s == 16) return -1; // invalid code! + // code size is s, so: + b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; + STBI_ASSERT(z->size[b] == s); + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; +} + +stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s; + if (a->num_bits < 16) stbi__fill_bits(a); + b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; + if (b) { + s = b >> 9; + a->code_buffer >>= s; + a->num_bits -= s; + return b & 511; + } + return stbi__zhuffman_decode_slowpath(a, z); +} + +static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes +{ + char *q; + int cur, limit, old_limit; + z->zout = zout; + if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); + cur = (int) (z->zout - z->zout_start); + limit = old_limit = (int) (z->zout_end - z->zout_start); + while (cur + n > limit) + limit *= 2; + q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); + STBI_NOTUSED(old_limit); + if (q == NULL) return stbi__err("outofmem", "Out of memory"); + z->zout_start = q; + z->zout = q + cur; + z->zout_end = q + limit; + return 1; +} + +static const int stbi__zlength_base[31] = { + 3,4,5,6,7,8,9,10,11,13, + 15,17,19,23,27,31,35,43,51,59, + 67,83,99,115,131,163,195,227,258,0,0 }; + +static const int stbi__zlength_extra[31]= +{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + +static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + +static const int stbi__zdist_extra[32] = +{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int stbi__parse_huffman_block(stbi__zbuf *a) +{ + char *zout = a->zout; + for(;;) { + int z = stbi__zhuffman_decode(a, &a->z_length); + if (z < 256) { + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes + if (zout >= a->zout_end) { + if (!stbi__zexpand(a, zout, 1)) return 0; + zout = a->zout; + } + *zout++ = (char) z; + } else { + stbi_uc *p; + int len,dist; + if (z == 256) { + a->zout = zout; + return 1; + } + z -= 257; + len = stbi__zlength_base[z]; + if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); + z = stbi__zhuffman_decode(a, &a->z_distance); + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); + dist = stbi__zdist_base[z]; + if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); + if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); + if (zout + len > a->zout_end) { + if (!stbi__zexpand(a, zout, len)) return 0; + zout = a->zout; + } + p = (stbi_uc *) (zout - dist); + if (dist == 1) { // run of one byte; common in images. + stbi_uc v = *p; + if (len) { do *zout++ = v; while (--len); } + } else { + if (len) { do *zout++ = *p++; while (--len); } + } + } + } +} + +static int stbi__compute_huffman_codes(stbi__zbuf *a) +{ + static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + stbi__zhuffman z_codelength; + stbi_uc lencodes[286+32+137];//padding for maximum single op + stbi_uc codelength_sizes[19]; + int i,n; + + int hlit = stbi__zreceive(a,5) + 257; + int hdist = stbi__zreceive(a,5) + 1; + int hclen = stbi__zreceive(a,4) + 4; + int ntot = hlit + hdist; + + memset(codelength_sizes, 0, sizeof(codelength_sizes)); + for (i=0; i < hclen; ++i) { + int s = stbi__zreceive(a,3); + codelength_sizes[length_dezigzag[i]] = (stbi_uc) s; + } + if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; + + n = 0; + while (n < ntot) { + int c = stbi__zhuffman_decode(a, &z_codelength); + if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); + if (c < 16) + lencodes[n++] = (stbi_uc) c; + else { + stbi_uc fill = 0; + if (c == 16) { + c = stbi__zreceive(a,2)+3; + if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); + fill = lencodes[n-1]; + } else if (c == 17) + c = stbi__zreceive(a,3)+3; + else { + STBI_ASSERT(c == 18); + c = stbi__zreceive(a,7)+11; + } + if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); + memset(lencodes+n, fill, c); + n += c; + } + } + if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG"); + if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; + return 1; +} + +static int stbi__parse_uncompressed_block(stbi__zbuf *a) +{ + stbi_uc header[4]; + int len,nlen,k; + if (a->num_bits & 7) + stbi__zreceive(a, a->num_bits & 7); // discard + // drain the bit-packed data into header + k = 0; + while (a->num_bits > 0) { + header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check + a->code_buffer >>= 8; + a->num_bits -= 8; + } + STBI_ASSERT(a->num_bits == 0); + // now fill header the normal way + while (k < 4) + header[k++] = stbi__zget8(a); + len = header[1] * 256 + header[0]; + nlen = header[3] * 256 + header[2]; + if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG"); + if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG"); + if (a->zout + len > a->zout_end) + if (!stbi__zexpand(a, a->zout, len)) return 0; + memcpy(a->zout, a->zbuffer, len); + a->zbuffer += len; + a->zout += len; + return 1; +} + +static int stbi__parse_zlib_header(stbi__zbuf *a) +{ + int cmf = stbi__zget8(a); + int cm = cmf & 15; + /* int cinfo = cmf >> 4; */ + int flg = stbi__zget8(a); + if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec + if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png + if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png + // window = 1 << (8 + cinfo)... but who cares, we fully buffer output + return 1; +} + +static const stbi_uc stbi__zdefault_length[288] = +{ + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8 +}; +static const stbi_uc stbi__zdefault_distance[32] = +{ + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 +}; +/* +Init algorithm: +{ + int i; // use <= to match clearly with spec + for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8; + for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9; + for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7; + for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8; + + for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5; +} +*/ + +static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) +{ + int final, type; + if (parse_header) + if (!stbi__parse_zlib_header(a)) return 0; + a->num_bits = 0; + a->code_buffer = 0; + do { + final = stbi__zreceive(a,1); + type = stbi__zreceive(a,2); + if (type == 0) { + if (!stbi__parse_uncompressed_block(a)) return 0; + } else if (type == 3) { + return 0; + } else { + if (type == 1) { + // use fixed code lengths + if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; + } else { + if (!stbi__compute_huffman_codes(a)) return 0; + } + if (!stbi__parse_huffman_block(a)) return 0; + } + } while (!final); + return 1; +} + +static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header) +{ + a->zout_start = obuf; + a->zout = obuf; + a->zout_end = obuf + olen; + a->z_expandable = exp; + + return stbi__parse_zlib(a, parse_header); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, 1)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) +{ + return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 1)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(16384); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer+len; + if (stbi__do_zlib(&a, p, 16384, 1, 0)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 0)) + return (int) (a.zout - a.zout_start); + else + return -1; +} +#endif + +// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 +// simple implementation +// - only 8-bit samples +// - no CRC checking +// - allocates lots of intermediate memory +// - avoids problem of streaming data between subsystems +// - avoids explicit window management +// performance +// - uses stb_zlib, a PD zlib implementation with fast huffman decoding + +#ifndef STBI_NO_PNG +typedef struct +{ + stbi__uint32 length; + stbi__uint32 type; +} stbi__pngchunk; + +static stbi__pngchunk stbi__get_chunk_header(stbi__context *s) +{ + stbi__pngchunk c; + c.length = stbi__get32be(s); + c.type = stbi__get32be(s); + return c; +} + +static int stbi__check_png_header(stbi__context *s) +{ + static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; + int i; + for (i=0; i < 8; ++i) + if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); + return 1; +} + +typedef struct +{ + stbi__context *s; + stbi_uc *idata, *expanded, *out; + int depth; +} stbi__png; + + +enum { + STBI__F_none=0, + STBI__F_sub=1, + STBI__F_up=2, + STBI__F_avg=3, + STBI__F_paeth=4, + // synthetic filters used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static stbi_uc first_row_filter[5] = +{ + STBI__F_none, + STBI__F_sub, + STBI__F_none, + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static int stbi__paeth(int a, int b, int c) +{ + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; +} + +static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; + +// create the png data from post-deflated data +static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) +{ + int bytes = (depth == 16? 2 : 1); + stbi__context *s = a->s; + stbi__uint32 i,j,stride = x*out_n*bytes; + stbi__uint32 img_len, img_width_bytes; + int k; + int img_n = s->img_n; // copy it into a local for later + + int output_bytes = out_n*bytes; + int filter_bytes = img_n*bytes; + int width = x; + + STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1); + a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into + if (!a->out) return stbi__err("outofmem", "Out of memory"); + + if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); + img_width_bytes = (((img_n * x * depth) + 7) >> 3); + img_len = (img_width_bytes + 1) * y; + + // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, + // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros), + // so just check for raw_len < img_len always. + if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *prior; + int filter = *raw++; + + if (filter > 4) + return stbi__err("invalid filter","Corrupt PNG"); + + if (depth < 8) { + STBI_ASSERT(img_width_bytes <= x); + cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place + filter_bytes = 1; + width = img_width_bytes; + } + prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above + + // if first row, use special filter that doesn't sample previous row + if (j == 0) filter = first_row_filter[filter]; + + // handle first byte explicitly + for (k=0; k < filter_bytes; ++k) { + switch (filter) { + case STBI__F_none : cur[k] = raw[k]; break; + case STBI__F_sub : cur[k] = raw[k]; break; + case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; + case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; + case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; + case STBI__F_avg_first : cur[k] = raw[k]; break; + case STBI__F_paeth_first: cur[k] = raw[k]; break; + } + } + + if (depth == 8) { + if (img_n != out_n) + cur[img_n] = 255; // first pixel + raw += img_n; + cur += out_n; + prior += out_n; + } else if (depth == 16) { + if (img_n != out_n) { + cur[filter_bytes] = 255; // first pixel top byte + cur[filter_bytes+1] = 255; // first pixel bottom byte + } + raw += filter_bytes; + cur += output_bytes; + prior += output_bytes; + } else { + raw += 1; + cur += 1; + prior += 1; + } + + // this is a little gross, so that we don't switch per-pixel or per-component + if (depth < 8 || img_n == out_n) { + int nk = (width - 1)*filter_bytes; + #define STBI__CASE(f) \ + case f: \ + for (k=0; k < nk; ++k) + switch (filter) { + // "none" filter turns into a memcpy here; make that explicit. + case STBI__F_none: memcpy(cur, raw, nk); break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; + } + #undef STBI__CASE + raw += nk; + } else { + STBI_ASSERT(img_n+1 == out_n); + #define STBI__CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ + for (k=0; k < filter_bytes; ++k) + switch (filter) { + STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; + } + #undef STBI__CASE + + // the loop above sets the high byte of the pixels' alpha, but for + // 16 bit png files we also need the low byte set. we'll do that here. + if (depth == 16) { + cur = a->out + stride*j; // start at the beginning of the row again + for (i=0; i < x; ++i,cur+=output_bytes) { + cur[filter_bytes+1] = 255; + } + } + } + } + + // we make a separate pass to expand bits to pixels; for performance, + // this could run two scanlines behind the above code, so it won't + // intefere with filtering but will still be in the cache. + if (depth < 8) { + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; + // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit + // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop + stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + + // note that the final byte might overshoot and write more data than desired. + // we can allocate enough data that this never writes out of memory, but it + // could also overwrite the next scanline. can it overwrite non-empty data + // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. + // so we need to explicitly clamp the final ones + + if (depth == 4) { + for (k=x*img_n; k >= 2; k-=2, ++in) { + *cur++ = scale * ((*in >> 4) ); + *cur++ = scale * ((*in ) & 0x0f); + } + if (k > 0) *cur++ = scale * ((*in >> 4) ); + } else if (depth == 2) { + for (k=x*img_n; k >= 4; k-=4, ++in) { + *cur++ = scale * ((*in >> 6) ); + *cur++ = scale * ((*in >> 4) & 0x03); + *cur++ = scale * ((*in >> 2) & 0x03); + *cur++ = scale * ((*in ) & 0x03); + } + if (k > 0) *cur++ = scale * ((*in >> 6) ); + if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); + if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); + } else if (depth == 1) { + for (k=x*img_n; k >= 8; k-=8, ++in) { + *cur++ = scale * ((*in >> 7) ); + *cur++ = scale * ((*in >> 6) & 0x01); + *cur++ = scale * ((*in >> 5) & 0x01); + *cur++ = scale * ((*in >> 4) & 0x01); + *cur++ = scale * ((*in >> 3) & 0x01); + *cur++ = scale * ((*in >> 2) & 0x01); + *cur++ = scale * ((*in >> 1) & 0x01); + *cur++ = scale * ((*in ) & 0x01); + } + if (k > 0) *cur++ = scale * ((*in >> 7) ); + if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); + if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); + if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); + if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); + if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); + if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); + } + if (img_n != out_n) { + int q; + // insert alpha = 255 + cur = a->out + stride*j; + if (img_n == 1) { + for (q=x-1; q >= 0; --q) { + cur[q*2+1] = 255; + cur[q*2+0] = cur[q]; + } + } else { + STBI_ASSERT(img_n == 3); + for (q=x-1; q >= 0; --q) { + cur[q*4+3] = 255; + cur[q*4+2] = cur[q*3+2]; + cur[q*4+1] = cur[q*3+1]; + cur[q*4+0] = cur[q*3+0]; + } + } + } + } + } else if (depth == 16) { + // force the image data from big-endian to platform-native. + // this is done in a separate pass due to the decoding relying + // on the data being untouched, but could probably be done + // per-line during decode if care is taken. + stbi_uc *cur = a->out; + stbi__uint16 *cur16 = (stbi__uint16*)cur; + + for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { + *cur16 = (cur[0] << 8) | cur[1]; + } + } + + return 1; +} + +static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) +{ + int bytes = (depth == 16 ? 2 : 1); + int out_bytes = out_n * bytes; + stbi_uc *final; + int p; + if (!interlaced) + return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); + + // de-interlacing + final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); + for (p=0; p < 7; ++p) { + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y; + if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) { + STBI_FREE(final); + return 0; + } + for (j=0; j < y; ++j) { + for (i=0; i < x; ++i) { + int out_y = j*yspc[p]+yorig[p]; + int out_x = i*xspc[p]+xorig[p]; + memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, + a->out + (j*x+i)*out_bytes, out_bytes); + } + } + STBI_FREE(a->out); + image_data += img_len; + image_data_len -= img_len; + } + } + a->out = final; + + return 1; +} + +static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + // compute color-based transparency, assuming we've + // already got 255 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i=0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 255); + p += 2; + } + } else { + for (i=0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi__uint16 *p = (stbi__uint16*) z->out; + + // compute color-based transparency, assuming we've + // already got 65535 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i = 0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 65535); + p += 2; + } + } else { + for (i = 0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n) +{ + stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y; + stbi_uc *p, *temp_out, *orig = a->out; + + p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0); + if (p == NULL) return stbi__err("outofmem", "Out of memory"); + + // between here and free(out) below, exitting would leak + temp_out = p; + + if (pal_img_n == 3) { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p += 3; + } + } else { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p[3] = palette[n+3]; + p += 4; + } + } + STBI_FREE(a->out); + a->out = temp_out; + + STBI_NOTUSED(len); + + return 1; +} + +static int stbi__unpremultiply_on_load = 0; +static int stbi__de_iphone_flag = 0; + +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) +{ + stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply; +} + +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) +{ + stbi__de_iphone_flag = flag_true_if_should_convert; +} + +static void stbi__de_iphone(stbi__png *z) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + if (s->img_out_n == 3) { // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 3; + } + } else { + STBI_ASSERT(s->img_out_n == 4); + if (stbi__unpremultiply_on_load) { + // convert bgr to rgb and unpremultiply + for (i=0; i < pixel_count; ++i) { + stbi_uc a = p[3]; + stbi_uc t = p[0]; + if (a) { + stbi_uc half = a / 2; + p[0] = (p[2] * 255 + half) / a; + p[1] = (p[1] * 255 + half) / a; + p[2] = ( t * 255 + half) / a; + } else { + p[0] = p[2]; + p[2] = t; + } + p += 4; + } + } else { + // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 4; + } + } + } +} + +#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d)) + +static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) +{ + stbi_uc palette[1024], pal_img_n=0; + stbi_uc has_trans=0, tc[3]; + stbi__uint16 tc16[3]; + stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0; + int first=1,k,interlace=0, color=0, is_iphone=0; + stbi__context *s = z->s; + + z->expanded = NULL; + z->idata = NULL; + z->out = NULL; + + if (!stbi__check_png_header(s)) return 0; + + if (scan == STBI__SCAN_type) return 1; + + for (;;) { + stbi__pngchunk c = stbi__get_chunk_header(s); + switch (c.type) { + case STBI__PNG_TYPE('C','g','B','I'): + is_iphone = 1; + stbi__skip(s, c.length); + break; + case STBI__PNG_TYPE('I','H','D','R'): { + int comp,filter; + if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); + first = 0; + if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); + s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); + s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); + z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); + color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); + comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG"); + filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG"); + interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG"); + if (!pal_img_n) { + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); + if (scan == STBI__SCAN_header) return 1; + } else { + // if paletted, then pal_n is our final components, and + // img_n is # components to decompress/filter. + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); + // if SCAN_header, have to scan to see if we have a tRNS + } + break; + } + + case STBI__PNG_TYPE('P','L','T','E'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG"); + pal_len = c.length / 3; + if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG"); + for (i=0; i < pal_len; ++i) { + palette[i*4+0] = stbi__get8(s); + palette[i*4+1] = stbi__get8(s); + palette[i*4+2] = stbi__get8(s); + palette[i*4+3] = 255; + } + break; + } + + case STBI__PNG_TYPE('t','R','N','S'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG"); + if (pal_img_n) { + if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; } + if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG"); + if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG"); + pal_img_n = 4; + for (i=0; i < c.length; ++i) + palette[i*4+3] = stbi__get8(s); + } else { + if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); + if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); + has_trans = 1; + if (z->depth == 16) { + for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is + } else { + for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger + } + } + break; + } + + case STBI__PNG_TYPE('I','D','A','T'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); + if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; } + if ((int)(ioff + c.length) < (int)ioff) return 0; + if (ioff + c.length > idata_limit) { + stbi__uint32 idata_limit_old = idata_limit; + stbi_uc *p; + if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; + while (ioff + c.length > idata_limit) + idata_limit *= 2; + STBI_NOTUSED(idata_limit_old); + p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); + z->idata = p; + } + if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG"); + ioff += c.length; + break; + } + + case STBI__PNG_TYPE('I','E','N','D'): { + stbi__uint32 raw_len, bpl; + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (scan != STBI__SCAN_load) return 1; + if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); + // initial guess for decoded data size to avoid unnecessary reallocs + bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component + raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; + z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); + if (z->expanded == NULL) return 0; // zlib should set error + STBI_FREE(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; + else + s->img_out_n = s->img_n; + if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0; + if (has_trans) { + if (z->depth == 16) { + if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0; + } else { + if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0; + } + } + if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) + stbi__de_iphone(z); + if (pal_img_n) { + // pal_img_n == 3 or 4 + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) + return 0; + } else if (has_trans) { + // non-paletted image with tRNS -> source image has (constant) alpha + ++s->img_n; + } + STBI_FREE(z->expanded); z->expanded = NULL; + return 1; + } + + default: + // if critical, fail + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if ((c.type & (1 << 29)) == 0) { + #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe + static char invalid_chunk[] = "XXXX PNG chunk not known"; + invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); + invalid_chunk[1] = STBI__BYTECAST(c.type >> 16); + invalid_chunk[2] = STBI__BYTECAST(c.type >> 8); + invalid_chunk[3] = STBI__BYTECAST(c.type >> 0); + #endif + return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type"); + } + stbi__skip(s, c.length); + break; + } + // end of PNG chunk, read and skip CRC + stbi__get32be(s); + } +} + +static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri) +{ + void *result=NULL; + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { + if (p->depth < 8) + ri->bits_per_channel = 8; + else + ri->bits_per_channel = p->depth; + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s->img_out_n) { + if (ri->bits_per_channel == 8) + result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + else + result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + p->s->img_out_n = req_comp; + if (result == NULL) return result; + } + *x = p->s->img_x; + *y = p->s->img_y; + if (n) *n = p->s->img_n; + } + STBI_FREE(p->out); p->out = NULL; + STBI_FREE(p->expanded); p->expanded = NULL; + STBI_FREE(p->idata); p->idata = NULL; + + return result; +} + +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi__png p; + p.s = s; + return stbi__do_png(&p, x,y,comp,req_comp, ri); +} + +static int stbi__png_test(stbi__context *s) +{ + int r; + r = stbi__check_png_header(s); + stbi__rewind(s); + return r; +} + +static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp) +{ + if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) { + stbi__rewind( p->s ); + return 0; + } + if (x) *x = p->s->img_x; + if (y) *y = p->s->img_y; + if (comp) *comp = p->s->img_n; + return 1; +} + +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__png p; + p.s = s; + return stbi__png_info_raw(&p, x, y, comp); +} + +static int stbi__png_is16(stbi__context *s) +{ + stbi__png p; + p.s = s; + if (!stbi__png_info_raw(&p, NULL, NULL, NULL)) + return 0; + if (p.depth != 16) { + stbi__rewind(p.s); + return 0; + } + return 1; +} +#endif + +// Microsoft/Windows BMP image + +#ifndef STBI_NO_BMP +static int stbi__bmp_test_raw(stbi__context *s) +{ + int r; + int sz; + if (stbi__get8(s) != 'B') return 0; + if (stbi__get8(s) != 'M') return 0; + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + stbi__get32le(s); // discard data offset + sz = stbi__get32le(s); + r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124); + return r; +} + +static int stbi__bmp_test(stbi__context *s) +{ + int r = stbi__bmp_test_raw(s); + stbi__rewind(s); + return r; +} + + +// returns 0..31 for the highest set bit +static int stbi__high_bit(unsigned int z) +{ + int n=0; + if (z == 0) return -1; + if (z >= 0x10000) n += 16, z >>= 16; + if (z >= 0x00100) n += 8, z >>= 8; + if (z >= 0x00010) n += 4, z >>= 4; + if (z >= 0x00004) n += 2, z >>= 2; + if (z >= 0x00002) n += 1, z >>= 1; + return n; +} + +static int stbi__bitcount(unsigned int a) +{ + a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 + a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 + a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits + a = (a + (a >> 8)); // max 16 per 8 bits + a = (a + (a >> 16)); // max 32 per 8 bits + return a & 0xff; +} + +// extract an arbitrarily-aligned N-bit value (N=bits) +// from v, and then make it 8-bits long and fractionally +// extend it to full full range. +static int stbi__shiftsigned(int v, int shift, int bits) +{ + static unsigned int mul_table[9] = { + 0, + 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/, + 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/, + }; + static unsigned int shift_table[9] = { + 0, 0,0,1,0,2,4,6,0, + }; + if (shift < 0) + v <<= -shift; + else + v >>= shift; + STBI_ASSERT(v >= 0 && v < 256); + v >>= (8-bits); + STBI_ASSERT(bits >= 0 && bits <= 8); + return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits]; +} + +typedef struct +{ + int bpp, offset, hsz; + unsigned int mr,mg,mb,ma, all_a; +} stbi__bmp_data; + +static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) +{ + int hsz; + if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP"); + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + info->offset = stbi__get32le(s); + info->hsz = hsz = stbi__get32le(s); + info->mr = info->mg = info->mb = info->ma = 0; + + if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown"); + if (hsz == 12) { + s->img_x = stbi__get16le(s); + s->img_y = stbi__get16le(s); + } else { + s->img_x = stbi__get32le(s); + s->img_y = stbi__get32le(s); + } + if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP"); + info->bpp = stbi__get16le(s); + if (hsz != 12) { + int compress = stbi__get32le(s); + if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); + stbi__get32le(s); // discard sizeof + stbi__get32le(s); // discard hres + stbi__get32le(s); // discard vres + stbi__get32le(s); // discard colorsused + stbi__get32le(s); // discard max important + if (hsz == 40 || hsz == 56) { + if (hsz == 56) { + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + } + if (info->bpp == 16 || info->bpp == 32) { + if (compress == 0) { + if (info->bpp == 32) { + info->mr = 0xffu << 16; + info->mg = 0xffu << 8; + info->mb = 0xffu << 0; + info->ma = 0xffu << 24; + info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 + } else { + info->mr = 31u << 10; + info->mg = 31u << 5; + info->mb = 31u << 0; + } + } else if (compress == 3) { + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + // not documented, but generated by photoshop and handled by mspaint + if (info->mr == info->mg && info->mg == info->mb) { + // ?!?!? + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else { + int i; + if (hsz != 108 && hsz != 124) + return stbi__errpuc("bad BMP", "bad BMP"); + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + info->ma = stbi__get32le(s); + stbi__get32le(s); // discard color space + for (i=0; i < 12; ++i) + stbi__get32le(s); // discard color space parameters + if (hsz == 124) { + stbi__get32le(s); // discard rendering intent + stbi__get32le(s); // discard offset of profile data + stbi__get32le(s); // discard size of profile data + stbi__get32le(s); // discard reserved + } + } + } + return (void *) 1; +} + + +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + unsigned int mr=0,mg=0,mb=0,ma=0, all_a; + stbi_uc pal[256][4]; + int psize=0,i,j,width; + int flip_vertically, pad, target; + stbi__bmp_data info; + STBI_NOTUSED(ri); + + info.all_a = 255; + if (stbi__bmp_parse_header(s, &info) == NULL) + return NULL; // error code already set + + flip_vertically = ((int) s->img_y) > 0; + s->img_y = abs((int) s->img_y); + + mr = info.mr; + mg = info.mg; + mb = info.mb; + ma = info.ma; + all_a = info.all_a; + + if (info.hsz == 12) { + if (info.bpp < 24) + psize = (info.offset - 14 - 24) / 3; + } else { + if (info.bpp < 16) + psize = (info.offset - 14 - info.hsz) >> 2; + } + + s->img_n = ma ? 4 : 3; + if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 + target = req_comp; + else + target = s->img_n; // if they want monochrome, we'll post-convert + + // sanity-check size + if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "Corrupt BMP"); + + out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + if (info.bpp < 16) { + int z=0; + if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); } + for (i=0; i < psize; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + if (info.hsz != 12) stbi__get8(s); + pal[i][3] = 255; + } + stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); + if (info.bpp == 1) width = (s->img_x + 7) >> 3; + else if (info.bpp == 4) width = (s->img_x + 1) >> 1; + else if (info.bpp == 8) width = s->img_x; + else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } + pad = (-width)&3; + if (info.bpp == 1) { + for (j=0; j < (int) s->img_y; ++j) { + int bit_offset = 7, v = stbi__get8(s); + for (i=0; i < (int) s->img_x; ++i) { + int color = (v>>bit_offset)&0x1; + out[z++] = pal[color][0]; + out[z++] = pal[color][1]; + out[z++] = pal[color][2]; + if((--bit_offset) < 0) { + bit_offset = 7; + v = stbi__get8(s); + } + } + stbi__skip(s, pad); + } + } else { + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=stbi__get8(s),v2=0; + if (info.bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (info.bpp == 8) ? stbi__get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + stbi__skip(s, pad); + } + } + } else { + int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; + int z = 0; + int easy=0; + stbi__skip(s, info.offset - 14 - info.hsz); + if (info.bpp == 24) width = 3 * s->img_x; + else if (info.bpp == 16) width = 2*s->img_x; + else /* bpp = 32 and pad = 0 */ width=0; + pad = (-width) & 3; + if (info.bpp == 24) { + easy = 1; + } else if (info.bpp == 32) { + if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) + easy = 2; + } + if (!easy) { + if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } + // right shift amt to put high bit in position #7 + rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr); + gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); + bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); + ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); + } + for (j=0; j < (int) s->img_y; ++j) { + if (easy) { + for (i=0; i < (int) s->img_x; ++i) { + unsigned char a; + out[z+2] = stbi__get8(s); + out[z+1] = stbi__get8(s); + out[z+0] = stbi__get8(s); + z += 3; + a = (easy == 2 ? stbi__get8(s) : 255); + all_a |= a; + if (target == 4) out[z++] = a; + } + } else { + int bpp = info.bpp; + for (i=0; i < (int) s->img_x; ++i) { + stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); + unsigned int a; + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); + a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255); + all_a |= a; + if (target == 4) out[z++] = STBI__BYTECAST(a); + } + } + stbi__skip(s, pad); + } + } + + // if alpha channel is all 0s, replace with all 255s + if (target == 4 && all_a == 0) + for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) + out[i] = 255; + + if (flip_vertically) { + stbi_uc t; + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { + t = p1[i], p1[i] = p2[i], p2[i] = t; + } + } + } + + if (req_comp && req_comp != target) { + out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + return out; +} +#endif + +// Targa Truevision - TGA +// by Jonathan Dummer +#ifndef STBI_NO_TGA +// returns STBI_rgb or whatever, 0 on error +static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) +{ + // only RGB or RGBA (incl. 16bit) or grey allowed + if (is_rgb16) *is_rgb16 = 0; + switch(bits_per_pixel) { + case 8: return STBI_grey; + case 16: if(is_grey) return STBI_grey_alpha; + // fallthrough + case 15: if(is_rgb16) *is_rgb16 = 1; + return STBI_rgb; + case 24: // fallthrough + case 32: return bits_per_pixel/8; + default: return 0; + } +} + +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp) +{ + int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp; + int sz, tga_colormap_type; + stbi__get8(s); // discard Offset + tga_colormap_type = stbi__get8(s); // colormap type + if( tga_colormap_type > 1 ) { + stbi__rewind(s); + return 0; // only RGB or indexed allowed + } + tga_image_type = stbi__get8(s); // image type + if ( tga_colormap_type == 1 ) { // colormapped (paletted) image + if (tga_image_type != 1 && tga_image_type != 9) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip image x and y origin + tga_colormap_bpp = sz; + } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE + if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) { + stbi__rewind(s); + return 0; // only RGB or grey allowed, +/- RLE + } + stbi__skip(s,9); // skip colormap specification and image x/y origin + tga_colormap_bpp = 0; + } + tga_w = stbi__get16le(s); + if( tga_w < 1 ) { + stbi__rewind(s); + return 0; // test width + } + tga_h = stbi__get16le(s); + if( tga_h < 1 ) { + stbi__rewind(s); + return 0; // test height + } + tga_bits_per_pixel = stbi__get8(s); // bits per pixel + stbi__get8(s); // ignore alpha bits + if (tga_colormap_bpp != 0) { + if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) { + // when using a colormap, tga_bits_per_pixel is the size of the indexes + // I don't think anything but 8 or 16bit indexes makes sense + stbi__rewind(s); + return 0; + } + tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL); + } else { + tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL); + } + if(!tga_comp) { + stbi__rewind(s); + return 0; + } + if (x) *x = tga_w; + if (y) *y = tga_h; + if (comp) *comp = tga_comp; + return 1; // seems to have passed everything +} + +static int stbi__tga_test(stbi__context *s) +{ + int res = 0; + int sz, tga_color_type; + stbi__get8(s); // discard Offset + tga_color_type = stbi__get8(s); // color type + if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed + sz = stbi__get8(s); // image type + if ( tga_color_type == 1 ) { // colormapped (paletted) image + if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9 + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + stbi__skip(s,4); // skip image x and y origin + } else { // "normal" image w/o colormap + if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE + stbi__skip(s,9); // skip colormap specification and image x/y origin + } + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height + sz = stbi__get8(s); // bits per pixel + if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + + res = 1; // if we got this far, everything's good and we can return 1 instead of 0 + +errorEnd: + stbi__rewind(s); + return res; +} + +// read 16bit value and convert to 24bit RGB +static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) +{ + stbi__uint16 px = (stbi__uint16)stbi__get16le(s); + stbi__uint16 fiveBitMask = 31; + // we have 3 channels with 5bits each + int r = (px >> 10) & fiveBitMask; + int g = (px >> 5) & fiveBitMask; + int b = px & fiveBitMask; + // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later + out[0] = (stbi_uc)((r * 255)/31); + out[1] = (stbi_uc)((g * 255)/31); + out[2] = (stbi_uc)((b * 255)/31); + + // some people claim that the most significant bit might be used for alpha + // (possibly if an alpha-bit is set in the "image descriptor byte") + // but that only made 16bit test images completely translucent.. + // so let's treat all 15 and 16bit TGAs as RGB with no alpha. +} + +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + // read in the TGA header stuff + int tga_offset = stbi__get8(s); + int tga_indexed = stbi__get8(s); + int tga_image_type = stbi__get8(s); + int tga_is_RLE = 0; + int tga_palette_start = stbi__get16le(s); + int tga_palette_len = stbi__get16le(s); + int tga_palette_bits = stbi__get8(s); + int tga_x_origin = stbi__get16le(s); + int tga_y_origin = stbi__get16le(s); + int tga_width = stbi__get16le(s); + int tga_height = stbi__get16le(s); + int tga_bits_per_pixel = stbi__get8(s); + int tga_comp, tga_rgb16=0; + int tga_inverted = stbi__get8(s); + // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?) + // image data + unsigned char *tga_data; + unsigned char *tga_palette = NULL; + int i, j; + unsigned char raw_data[4] = {0}; + int RLE_count = 0; + int RLE_repeating = 0; + int read_next_pixel = 1; + STBI_NOTUSED(ri); + + // do a tiny bit of precessing + if ( tga_image_type >= 8 ) + { + tga_image_type -= 8; + tga_is_RLE = 1; + } + tga_inverted = 1 - ((tga_inverted >> 5) & 1); + + // If I'm paletted, then I'll use the number of bits from the palette + if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16); + else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16); + + if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency + return stbi__errpuc("bad format", "Can't find out TGA pixelformat"); + + // tga info + *x = tga_width; + *y = tga_height; + if (comp) *comp = tga_comp; + + if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) + return stbi__errpuc("too large", "Corrupt TGA"); + + tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0); + if (!tga_data) return stbi__errpuc("outofmem", "Out of memory"); + + // skip to the data's starting position (offset usually = 0) + stbi__skip(s, tga_offset ); + + if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) { + for (i=0; i < tga_height; ++i) { + int row = tga_inverted ? tga_height -i - 1 : i; + stbi_uc *tga_row = tga_data + row*tga_width*tga_comp; + stbi__getn(s, tga_row, tga_width * tga_comp); + } + } else { + // do I need to load a palette? + if ( tga_indexed) + { + // any data to skip? (offset usually = 0) + stbi__skip(s, tga_palette_start ); + // load the palette + tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); + if (!tga_palette) { + STBI_FREE(tga_data); + return stbi__errpuc("outofmem", "Out of memory"); + } + if (tga_rgb16) { + stbi_uc *pal_entry = tga_palette; + STBI_ASSERT(tga_comp == STBI_rgb); + for (i=0; i < tga_palette_len; ++i) { + stbi__tga_read_rgb16(s, pal_entry); + pal_entry += tga_comp; + } + } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) { + STBI_FREE(tga_data); + STBI_FREE(tga_palette); + return stbi__errpuc("bad palette", "Corrupt TGA"); + } + } + // load the data + for (i=0; i < tga_width * tga_height; ++i) + { + // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk? + if ( tga_is_RLE ) + { + if ( RLE_count == 0 ) + { + // yep, get the next byte as a RLE command + int RLE_cmd = stbi__get8(s); + RLE_count = 1 + (RLE_cmd & 127); + RLE_repeating = RLE_cmd >> 7; + read_next_pixel = 1; + } else if ( !RLE_repeating ) + { + read_next_pixel = 1; + } + } else + { + read_next_pixel = 1; + } + // OK, if I need to read a pixel, do it now + if ( read_next_pixel ) + { + // load however much data we did have + if ( tga_indexed ) + { + // read in index, then perform the lookup + int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s); + if ( pal_idx >= tga_palette_len ) { + // invalid index + pal_idx = 0; + } + pal_idx *= tga_comp; + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = tga_palette[pal_idx+j]; + } + } else if(tga_rgb16) { + STBI_ASSERT(tga_comp == STBI_rgb); + stbi__tga_read_rgb16(s, raw_data); + } else { + // read in the data raw + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = stbi__get8(s); + } + } + // clear the reading flag for the next pixel + read_next_pixel = 0; + } // end of reading a pixel + + // copy data + for (j = 0; j < tga_comp; ++j) + tga_data[i*tga_comp+j] = raw_data[j]; + + // in case we're in RLE mode, keep counting down + --RLE_count; + } + // do I need to invert the image? + if ( tga_inverted ) + { + for (j = 0; j*2 < tga_height; ++j) + { + int index1 = j * tga_width * tga_comp; + int index2 = (tga_height - 1 - j) * tga_width * tga_comp; + for (i = tga_width * tga_comp; i > 0; --i) + { + unsigned char temp = tga_data[index1]; + tga_data[index1] = tga_data[index2]; + tga_data[index2] = temp; + ++index1; + ++index2; + } + } + } + // clear my palette, if I had one + if ( tga_palette != NULL ) + { + STBI_FREE( tga_palette ); + } + } + + // swap RGB - if the source data was RGB16, it already is in the right order + if (tga_comp >= 3 && !tga_rgb16) + { + unsigned char* tga_pixel = tga_data; + for (i=0; i < tga_width * tga_height; ++i) + { + unsigned char temp = tga_pixel[0]; + tga_pixel[0] = tga_pixel[2]; + tga_pixel[2] = temp; + tga_pixel += tga_comp; + } + } + + // convert to target component count + if (req_comp && req_comp != tga_comp) + tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height); + + // the things I do to get rid of an error message, and yet keep + // Microsoft's C compilers happy... [8^( + tga_palette_start = tga_palette_len = tga_palette_bits = + tga_x_origin = tga_y_origin = 0; + // OK, done + return tga_data; +} +#endif + +// ************************************************************************************************* +// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s) +{ + int r = (stbi__get32be(s) == 0x38425053); + stbi__rewind(s); + return r; +} + +static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount) +{ + int count, nleft, len; + + count = 0; + while ((nleft = pixelCount - count) > 0) { + len = stbi__get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + if (len > nleft) return 0; // corrupt data + count += len; + while (len) { + *p = stbi__get8(s); + p += 4; + len--; + } + } else if (len > 128) { + stbi_uc val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len = 257 - len; + if (len > nleft) return 0; // corrupt data + val = stbi__get8(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } + } + + return 1; +} + +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + int pixelCount; + int channelCount, compression; + int channel, i; + int bitdepth; + int w,h; + stbi_uc *out; + STBI_NOTUSED(ri); + + // Check identifier + if (stbi__get32be(s) != 0x38425053) // "8BPS" + return stbi__errpuc("not PSD", "Corrupt PSD image"); + + // Check file type version. + if (stbi__get16be(s) != 1) + return stbi__errpuc("wrong version", "Unsupported version of PSD image"); + + // Skip 6 reserved bytes. + stbi__skip(s, 6 ); + + // Read the number of channels (R, G, B, A, etc). + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) + return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image"); + + // Read the rows and columns of the image. + h = stbi__get32be(s); + w = stbi__get32be(s); + + // Make sure the depth is 8 bits. + bitdepth = stbi__get16be(s); + if (bitdepth != 8 && bitdepth != 16) + return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit"); + + // Make sure the color mode is RGB. + // Valid options are: + // 0: Bitmap + // 1: Grayscale + // 2: Indexed color + // 3: RGB color + // 4: CMYK color + // 7: Multichannel + // 8: Duotone + // 9: Lab color + if (stbi__get16be(s) != 3) + return stbi__errpuc("wrong color format", "PSD is not in RGB color format"); + + // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) + stbi__skip(s,stbi__get32be(s) ); + + // Skip the image resources. (resolution, pen tool paths, etc) + stbi__skip(s, stbi__get32be(s) ); + + // Skip the reserved data. + stbi__skip(s, stbi__get32be(s) ); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + compression = stbi__get16be(s); + if (compression > 1) + return stbi__errpuc("bad compression", "PSD has an unknown compression format"); + + // Check size + if (!stbi__mad3sizes_valid(4, w, h, 0)) + return stbi__errpuc("too large", "Corrupt PSD"); + + // Create the destination image. + + if (!compression && bitdepth == 16 && bpc == 16) { + out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); + ri->bits_per_channel = 16; + } else + out = (stbi_uc *) stbi__malloc(4 * w*h); + + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + pixelCount = w*h; + + // Initialize the data to zero. + //memset( out, 0, pixelCount * 4 ); + + // Finally, the image data. + if (compression) { + // RLE as used by .PSD and .TIFF + // Loop until you get the number of unpacked bytes you are expecting: + // Read the next source byte into n. + // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. + // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. + // Else if n is 128, noop. + // Endloop + + // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data, + // which we're going to just skip. + stbi__skip(s, h * channelCount * 2 ); + + // Read the RLE data by channel. + for (channel = 0; channel < 4; channel++) { + stbi_uc *p; + + p = out+channel; + if (channel >= channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++, p += 4) + *p = (channel == 3 ? 255 : 0); + } else { + // Read the RLE data. + if (!stbi__psd_decode_rle(s, p, pixelCount)) { + STBI_FREE(out); + return stbi__errpuc("corrupt", "bad RLE data"); + } + } + } + + } else { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. + + // Read the data by channel. + for (channel = 0; channel < 4; channel++) { + if (channel >= channelCount) { + // Fill this channel with default data. + if (bitdepth == 16 && bpc == 16) { + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + stbi__uint16 val = channel == 3 ? 65535 : 0; + for (i = 0; i < pixelCount; i++, q += 4) + *q = val; + } else { + stbi_uc *p = out+channel; + stbi_uc val = channel == 3 ? 255 : 0; + for (i = 0; i < pixelCount; i++, p += 4) + *p = val; + } + } else { + if (ri->bits_per_channel == 16) { // output bpc + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + for (i = 0; i < pixelCount; i++, q += 4) + *q = (stbi__uint16) stbi__get16be(s); + } else { + stbi_uc *p = out+channel; + if (bitdepth == 16) { // input bpc + for (i = 0; i < pixelCount; i++, p += 4) + *p = (stbi_uc) (stbi__get16be(s) >> 8); + } else { + for (i = 0; i < pixelCount; i++, p += 4) + *p = stbi__get8(s); + } + } + } + } + } + + // remove weird white matte from PSD + if (channelCount >= 4) { + if (ri->bits_per_channel == 16) { + for (i=0; i < w*h; ++i) { + stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; + if (pixel[3] != 0 && pixel[3] != 65535) { + float a = pixel[3] / 65535.0f; + float ra = 1.0f / a; + float inv_a = 65535.0f * (1 - ra); + pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); + pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); + pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); + } + } + } else { + for (i=0; i < w*h; ++i) { + unsigned char *pixel = out + 4*i; + if (pixel[3] != 0 && pixel[3] != 255) { + float a = pixel[3] / 255.0f; + float ra = 1.0f / a; + float inv_a = 255.0f * (1 - ra); + pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); + pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); + pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); + } + } + } + } + + // convert to desired output format + if (req_comp && req_comp != 4) { + if (ri->bits_per_channel == 16) + out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); + else + out = stbi__convert_format(out, 4, req_comp, w, h); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + if (comp) *comp = 4; + *y = h; + *x = w; + + return out; +} +#endif + +// ************************************************************************************************* +// Softimage PIC loader +// by Tom Seddon +// +// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format +// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/ + +#ifndef STBI_NO_PIC +static int stbi__pic_is4(stbi__context *s,const char *str) +{ + int i; + for (i=0; i<4; ++i) + if (stbi__get8(s) != (stbi_uc)str[i]) + return 0; + + return 1; +} + +static int stbi__pic_test_core(stbi__context *s) +{ + int i; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) + return 0; + + for(i=0;i<84;++i) + stbi__get8(s); + + if (!stbi__pic_is4(s,"PICT")) + return 0; + + return 1; +} + +typedef struct +{ + stbi_uc size,type,channel; +} stbi__pic_packet; + +static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest) +{ + int mask=0x80, i; + + for (i=0; i<4; ++i, mask>>=1) { + if (channel & mask) { + if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short"); + dest[i]=stbi__get8(s); + } + } + + return dest; +} + +static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src) +{ + int mask=0x80,i; + + for (i=0;i<4; ++i, mask>>=1) + if (channel&mask) + dest[i]=src[i]; +} + +static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result) +{ + int act_comp=0,num_packets=0,y,chained; + stbi__pic_packet packets[10]; + + // this will (should...) cater for even some bizarre stuff like having data + // for the same channel in multiple packets. + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return stbi__errpuc("bad format","too many packets"); + + packet = &packets[num_packets++]; + + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + + act_comp |= packet->channel; + + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)"); + if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp"); + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? + + for(y=0; ytype) { + default: + return stbi__errpuc("bad format","packet has bad compression type"); + + case 0: {//uncompressed + int x; + + for(x=0;xchannel,dest)) + return 0; + break; + } + + case 1://Pure RLE + { + int left=width, i; + + while (left>0) { + stbi_uc count,value[4]; + + count=stbi__get8(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)"); + + if (count > left) + count = (stbi_uc) left; + + if (!stbi__readval(s,packet->channel,value)) return 0; + + for(i=0; ichannel,dest,value); + left -= count; + } + } + break; + + case 2: {//Mixed RLE + int left=width; + while (left>0) { + int count = stbi__get8(s), i; + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)"); + + if (count >= 128) { // Repeated + stbi_uc value[4]; + + if (count==128) + count = stbi__get16be(s); + else + count -= 127; + if (count > left) + return stbi__errpuc("bad file","scanline overrun"); + + if (!stbi__readval(s,packet->channel,value)) + return 0; + + for(i=0;ichannel,dest,value); + } else { // Raw + ++count; + if (count>left) return stbi__errpuc("bad file","scanline overrun"); + + for(i=0;ichannel,dest)) + return 0; + } + left-=count; + } + break; + } + } + } + } + + return result; +} + +static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri) +{ + stbi_uc *result; + int i, x,y, internal_comp; + STBI_NOTUSED(ri); + + if (!comp) comp = &internal_comp; + + for (i=0; i<92; ++i) + stbi__get8(s); + + x = stbi__get16be(s); + y = stbi__get16be(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)"); + if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode"); + + stbi__get32be(s); //skip `ratio' + stbi__get16be(s); //skip `fields' + stbi__get16be(s); //skip `pad' + + // intermediate buffer is RGBA + result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0); + memset(result, 0xff, x*y*4); + + if (!stbi__pic_load_core(s,x,y,comp, result)) { + STBI_FREE(result); + result=0; + } + *px = x; + *py = y; + if (req_comp == 0) req_comp = *comp; + result=stbi__convert_format(result,4,req_comp,x,y); + + return result; +} + +static int stbi__pic_test(stbi__context *s) +{ + int r = stbi__pic_test_core(s); + stbi__rewind(s); + return r; +} +#endif + +// ************************************************************************************************* +// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb + +#ifndef STBI_NO_GIF +typedef struct +{ + stbi__int16 prefix; + stbi_uc first; + stbi_uc suffix; +} stbi__gif_lzw; + +typedef struct +{ + int w,h; + stbi_uc *out; // output buffer (always 4 components) + stbi_uc *background; // The current "background" as far as a gif is concerned + stbi_uc *history; + int flags, bgindex, ratio, transparent, eflags; + stbi_uc pal[256][4]; + stbi_uc lpal[256][4]; + stbi__gif_lzw codes[8192]; + stbi_uc *color_table; + int parse, step; + int lflags; + int start_x, start_y; + int max_x, max_y; + int cur_x, cur_y; + int line_size; + int delay; +} stbi__gif; + +static int stbi__gif_test_raw(stbi__context *s) +{ + int sz; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0; + sz = stbi__get8(s); + if (sz != '9' && sz != '7') return 0; + if (stbi__get8(s) != 'a') return 0; + return 1; +} + +static int stbi__gif_test(stbi__context *s) +{ + int r = stbi__gif_test_raw(s); + stbi__rewind(s); + return r; +} + +static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp) +{ + int i; + for (i=0; i < num_entries; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + pal[i][3] = transp == i ? 0 : 255; + } +} + +static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info) +{ + stbi_uc version; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') + return stbi__err("not GIF", "Corrupt GIF"); + + version = stbi__get8(s); + if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF"); + if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF"); + + stbi__g_failure_reason = ""; + g->w = stbi__get16le(s); + g->h = stbi__get16le(s); + g->flags = stbi__get8(s); + g->bgindex = stbi__get8(s); + g->ratio = stbi__get8(s); + g->transparent = -1; + + if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments + + if (is_info) return 1; + + if (g->flags & 0x80) + stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); + + return 1; +} + +static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); + if (!stbi__gif_header(s, g, comp, 1)) { + STBI_FREE(g); + stbi__rewind( s ); + return 0; + } + if (x) *x = g->w; + if (y) *y = g->h; + STBI_FREE(g); + return 1; +} + +static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) +{ + stbi_uc *p, *c; + int idx; + + // recurse to decode the prefixes, since the linked-list is backwards, + // and working backwards through an interleaved image would be nasty + if (g->codes[code].prefix >= 0) + stbi__out_gif_code(g, g->codes[code].prefix); + + if (g->cur_y >= g->max_y) return; + + idx = g->cur_x + g->cur_y; + p = &g->out[idx]; + g->history[idx / 4] = 1; + + c = &g->color_table[g->codes[code].suffix * 4]; + if (c[3] > 128) { // don't render transparent pixels; + p[0] = c[2]; + p[1] = c[1]; + p[2] = c[0]; + p[3] = c[3]; + } + g->cur_x += 4; + + if (g->cur_x >= g->max_x) { + g->cur_x = g->start_x; + g->cur_y += g->step; + + while (g->cur_y >= g->max_y && g->parse > 0) { + g->step = (1 << g->parse) * g->line_size; + g->cur_y = g->start_y + (g->step >> 1); + --g->parse; + } + } +} + +static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) +{ + stbi_uc lzw_cs; + stbi__int32 len, init_code; + stbi__uint32 first; + stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear; + stbi__gif_lzw *p; + + lzw_cs = stbi__get8(s); + if (lzw_cs > 12) return NULL; + clear = 1 << lzw_cs; + first = 1; + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + bits = 0; + valid_bits = 0; + for (init_code = 0; init_code < clear; init_code++) { + g->codes[init_code].prefix = -1; + g->codes[init_code].first = (stbi_uc) init_code; + g->codes[init_code].suffix = (stbi_uc) init_code; + } + + // support no starting clear code + avail = clear+2; + oldcode = -1; + + len = 0; + for(;;) { + if (valid_bits < codesize) { + if (len == 0) { + len = stbi__get8(s); // start new block + if (len == 0) + return g->out; + } + --len; + bits |= (stbi__int32) stbi__get8(s) << valid_bits; + valid_bits += 8; + } else { + stbi__int32 code = bits & codemask; + bits >>= codesize; + valid_bits -= codesize; + // @OPTIMIZE: is there some way we can accelerate the non-clear path? + if (code == clear) { // clear code + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + avail = clear + 2; + oldcode = -1; + first = 0; + } else if (code == clear + 1) { // end of stream code + stbi__skip(s, len); + while ((len = stbi__get8(s)) > 0) + stbi__skip(s,len); + return g->out; + } else if (code <= avail) { + if (first) { + return stbi__errpuc("no clear code", "Corrupt GIF"); + } + + if (oldcode >= 0) { + p = &g->codes[avail++]; + if (avail > 8192) { + return stbi__errpuc("too many codes", "Corrupt GIF"); + } + + p->prefix = (stbi__int16) oldcode; + p->first = g->codes[oldcode].first; + p->suffix = (code == avail) ? p->first : g->codes[code].first; + } else if (code == avail) + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + + stbi__out_gif_code(g, (stbi__uint16) code); + + if ((avail & codemask) == 0 && avail <= 0x0FFF) { + codesize++; + codemask = (1 << codesize) - 1; + } + + oldcode = code; + } else { + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + } + } + } +} + +// this function is designed to support animated gifs, although stb_image doesn't support it +// two back is the image from two frames ago, used for a very specific disposal format +static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back) +{ + int dispose; + int first_frame; + int pi; + int pcount; + + // on first frame, any non-written pixels get the background colour (non-transparent) + first_frame = 0; + if (g->out == 0) { + if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header + g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h); + g->background = (stbi_uc *) stbi__malloc(4 * g->w * g->h); + g->history = (stbi_uc *) stbi__malloc(g->w * g->h); + if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory"); + + // image is treated as "tranparent" at the start - ie, nothing overwrites the current background; + // background colour is only used for pixels that are not rendered first frame, after that "background" + // color refers to teh color that was there the previous frame. + memset( g->out, 0x00, 4 * g->w * g->h ); + memset( g->background, 0x00, 4 * g->w * g->h ); // state of the background (starts transparent) + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + first_frame = 1; + } else { + // second frame - how do we dispoase of the previous one? + dispose = (g->eflags & 0x1C) >> 2; + pcount = g->w * g->h; + + if ((dispose == 3) && (two_back == 0)) { + dispose = 2; // if I don't have an image to revert back to, default to the old background + } + + if (dispose == 3) { // use previous graphic + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 ); + } + } + } else if (dispose == 2) { + // restore what was changed last frame to background before that frame; + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 ); + } + } + } else { + // This is a non-disposal case eithe way, so just + // leave the pixels as is, and they will become the new background + // 1: do not dispose + // 0: not specified. + } + + // background is what out is after the undoing of the previou frame; + memcpy( g->background, g->out, 4 * g->w * g->h ); + } + + // clear my history; + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + + for (;;) { + int tag = stbi__get8(s); + switch (tag) { + case 0x2C: /* Image Descriptor */ + { + stbi__int32 x, y, w, h; + stbi_uc *o; + + x = stbi__get16le(s); + y = stbi__get16le(s); + w = stbi__get16le(s); + h = stbi__get16le(s); + if (((x + w) > (g->w)) || ((y + h) > (g->h))) + return stbi__errpuc("bad Image Descriptor", "Corrupt GIF"); + + g->line_size = g->w * 4; + g->start_x = x * 4; + g->start_y = y * g->line_size; + g->max_x = g->start_x + w * 4; + g->max_y = g->start_y + h * g->line_size; + g->cur_x = g->start_x; + g->cur_y = g->start_y; + + g->lflags = stbi__get8(s); + + if (g->lflags & 0x40) { + g->step = 8 * g->line_size; // first interlaced spacing + g->parse = 3; + } else { + g->step = g->line_size; + g->parse = 0; + } + + if (g->lflags & 0x80) { + stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); + g->color_table = (stbi_uc *) g->lpal; + } else if (g->flags & 0x80) { + g->color_table = (stbi_uc *) g->pal; + } else + return stbi__errpuc("missing color table", "Corrupt GIF"); + + o = stbi__process_gif_raster(s, g); + if (o == NULL) return NULL; + + // if this was the first frame, + pcount = g->w * g->h; + if (first_frame && (g->bgindex > 0)) { + // if first frame, any pixel not drawn to gets the background color + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi] == 0) { + g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; + memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 ); + } + } + } + + return o; + } + + case 0x21: // Comment Extension. + { + int len; + int ext = stbi__get8(s); + if (ext == 0xF9) { // Graphic Control Extension. + len = stbi__get8(s); + if (len == 4) { + g->eflags = stbi__get8(s); + g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths. + + // unset old transparent + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 255; + } + if (g->eflags & 0x01) { + g->transparent = stbi__get8(s); + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 0; + } + } else { + // don't need transparent + stbi__skip(s, 1); + g->transparent = -1; + } + } else { + stbi__skip(s, len); + break; + } + } + while ((len = stbi__get8(s)) != 0) { + stbi__skip(s, len); + } + break; + } + + case 0x3B: // gif stream termination code + return (stbi_uc *) s; // using '1' causes warning on some compilers + + default: + return stbi__errpuc("unknown code", "Corrupt GIF"); + } + } +} + +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + if (stbi__gif_test(s)) { + int layers = 0; + stbi_uc *u = 0; + stbi_uc *out = 0; + stbi_uc *two_back = 0; + stbi__gif g; + int stride; + memset(&g, 0, sizeof(g)); + if (delays) { + *delays = 0; + } + + do { + u = stbi__gif_load_next(s, &g, comp, req_comp, two_back); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + + if (u) { + *x = g.w; + *y = g.h; + ++layers; + stride = g.w * g.h * 4; + + if (out) { + out = (stbi_uc*) STBI_REALLOC( out, layers * stride ); + if (delays) { + *delays = (int*) STBI_REALLOC( *delays, sizeof(int) * layers ); + } + } else { + out = (stbi_uc*)stbi__malloc( layers * stride ); + if (delays) { + *delays = (int*) stbi__malloc( layers * sizeof(int) ); + } + } + memcpy( out + ((layers - 1) * stride), u, stride ); + if (layers >= 2) { + two_back = out - 2 * stride; + } + + if (delays) { + (*delays)[layers - 1U] = g.delay; + } + } + } while (u != 0); + + // free temp buffer; + STBI_FREE(g.out); + STBI_FREE(g.history); + STBI_FREE(g.background); + + // do the final conversion after loading everything; + if (req_comp && req_comp != 4) + out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h); + + *z = layers; + return out; + } else { + return stbi__errpuc("not GIF", "Image was not as a gif type."); + } +} + +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *u = 0; + stbi__gif g; + memset(&g, 0, sizeof(g)); + + u = stbi__gif_load_next(s, &g, comp, req_comp, 0); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + if (u) { + *x = g.w; + *y = g.h; + + // moved conversion to after successful load so that the same + // can be done for multiple frames. + if (req_comp && req_comp != 4) + u = stbi__convert_format(u, 4, req_comp, g.w, g.h); + } + + // free buffers needed for multiple frame loading; + STBI_FREE(g.history); + STBI_FREE(g.background); + + return u; +} + +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp) +{ + return stbi__gif_info_raw(s,x,y,comp); +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR loader +// originally by Nicolas Schulz +#ifndef STBI_NO_HDR +static int stbi__hdr_test_core(stbi__context *s, const char *signature) +{ + int i; + for (i=0; signature[i]; ++i) + if (stbi__get8(s) != signature[i]) + return 0; + stbi__rewind(s); + return 1; +} + +static int stbi__hdr_test(stbi__context* s) +{ + int r = stbi__hdr_test_core(s, "#?RADIANCE\n"); + stbi__rewind(s); + if(!r) { + r = stbi__hdr_test_core(s, "#?RGBE\n"); + stbi__rewind(s); + } + return r; +} + +#define STBI__HDR_BUFLEN 1024 +static char *stbi__hdr_gettoken(stbi__context *z, char *buffer) +{ + int len=0; + char c = '\0'; + + c = (char) stbi__get8(z); + + while (!stbi__at_eof(z) && c != '\n') { + buffer[len++] = c; + if (len == STBI__HDR_BUFLEN-1) { + // flush to end of line + while (!stbi__at_eof(z) && stbi__get8(z) != '\n') + ; + break; + } + c = (char) stbi__get8(z); + } + + buffer[len] = 0; + return buffer; +} + +static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp) +{ + if ( input[3] != 0 ) { + float f1; + // Exponent + f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); + if (req_comp <= 2) + output[0] = (input[0] + input[1] + input[2]) * f1 / 3; + else { + output[0] = input[0] * f1; + output[1] = input[1] * f1; + output[2] = input[2] * f1; + } + if (req_comp == 2) output[1] = 1; + if (req_comp == 4) output[3] = 1; + } else { + switch (req_comp) { + case 4: output[3] = 1; /* fallthrough */ + case 3: output[0] = output[1] = output[2] = 0; + break; + case 2: output[1] = 1; /* fallthrough */ + case 1: output[0] = 0; + break; + } + } +} + +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int width, height; + stbi_uc *scanline; + float *hdr_data; + int len; + unsigned char count, value; + int i, j, k, c1,c2, z; + const char *headerToken; + STBI_NOTUSED(ri); + + // Check identifier + headerToken = stbi__hdr_gettoken(s,buffer); + if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0) + return stbi__errpf("not HDR", "Corrupt HDR image"); + + // Parse header + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format"); + + // Parse width and height + // can't use sscanf() if we're not using stdio! + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + height = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + width = (int) strtol(token, NULL, 10); + + *x = width; + *y = height; + + if (comp) *comp = 3; + if (req_comp == 0) req_comp = 3; + + if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) + return stbi__errpf("too large", "HDR image is too large"); + + // Read data + hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0); + if (!hdr_data) + return stbi__errpf("outofmem", "Out of memory"); + + // Load image data + // image data is stored as some number of sca + if ( width < 8 || width >= 32768) { + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + stbi_uc rgbe[4]; + main_decode_loop: + stbi__getn(s, rgbe, 4); + stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); + } + } + } else { + // Read RLE-encoded data + scanline = NULL; + + for (j = 0; j < height; ++j) { + c1 = stbi__get8(s); + c2 = stbi__get8(s); + len = stbi__get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + stbi_uc rgbe[4]; + rgbe[0] = (stbi_uc) c1; + rgbe[1] = (stbi_uc) c2; + rgbe[2] = (stbi_uc) len; + rgbe[3] = (stbi_uc) stbi__get8(s); + stbi__hdr_convert(hdr_data, rgbe, req_comp); + i = 1; + j = 0; + STBI_FREE(scanline); + goto main_decode_loop; // yes, this makes no sense + } + len <<= 8; + len |= stbi__get8(s); + if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } + if (scanline == NULL) { + scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); + if (!scanline) { + STBI_FREE(hdr_data); + return stbi__errpf("outofmem", "Out of memory"); + } + } + + for (k = 0; k < 4; ++k) { + int nleft; + i = 0; + while ((nleft = width - i) > 0) { + count = stbi__get8(s); + if (count > 128) { + // Run + value = stbi__get8(s); + count -= 128; + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = stbi__get8(s); + } + } + } + for (i=0; i < width; ++i) + stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); + } + if (scanline) + STBI_FREE(scanline); + } + + return hdr_data; +} + +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int dummy; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (stbi__hdr_test(s) == 0) { + stbi__rewind( s ); + return 0; + } + + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) { + stbi__rewind( s ); + return 0; + } + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *y = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *x = (int) strtol(token, NULL, 10); + *comp = 3; + return 1; +} +#endif // STBI_NO_HDR + +#ifndef STBI_NO_BMP +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) +{ + void *p; + stbi__bmp_data info; + + info.all_a = 255; + p = stbi__bmp_parse_header(s, &info); + stbi__rewind( s ); + if (p == NULL) + return 0; + if (x) *x = s->img_x; + if (y) *y = s->img_y; + if (comp) *comp = info.ma ? 4 : 3; + return 1; +} +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) +{ + int channelCount, dummy, depth; + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + *y = stbi__get32be(s); + *x = stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 8 && depth != 16) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 3) { + stbi__rewind( s ); + return 0; + } + *comp = 4; + return 1; +} + +static int stbi__psd_is16(stbi__context *s) +{ + int channelCount, depth; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + (void) stbi__get32be(s); + (void) stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 16) { + stbi__rewind( s ); + return 0; + } + return 1; +} +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) +{ + int act_comp=0,num_packets=0,chained,dummy; + stbi__pic_packet packets[10]; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) { + stbi__rewind(s); + return 0; + } + + stbi__skip(s, 88); + + *x = stbi__get16be(s); + *y = stbi__get16be(s); + if (stbi__at_eof(s)) { + stbi__rewind( s); + return 0; + } + if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) { + stbi__rewind( s ); + return 0; + } + + stbi__skip(s, 8); + + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return 0; + + packet = &packets[num_packets++]; + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + act_comp |= packet->channel; + + if (stbi__at_eof(s)) { + stbi__rewind( s ); + return 0; + } + if (packet->size != 8) { + stbi__rewind( s ); + return 0; + } + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); + + return 1; +} +#endif + +// ************************************************************************************************* +// Portable Gray Map and Portable Pixel Map loader +// by Ken Miller +// +// PGM: http://netpbm.sourceforge.net/doc/pgm.html +// PPM: http://netpbm.sourceforge.net/doc/ppm.html +// +// Known limitations: +// Does not support comments in the header section +// Does not support ASCII image data (formats P2 and P3) +// Does not support 16-bit-per-channel + +#ifndef STBI_NO_PNM + +static int stbi__pnm_test(stbi__context *s) +{ + char p, t; + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind( s ); + return 0; + } + return 1; +} + +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + STBI_NOTUSED(ri); + + if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n)) + return 0; + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + + if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "PNM too large"); + + out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + stbi__getn(s, out, s->img_n * s->img_x * s->img_y); + + if (req_comp && req_comp != s->img_n) { + out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + return out; +} + +static int stbi__pnm_isspace(char c) +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; +} + +static void stbi__pnm_skip_whitespace(stbi__context *s, char *c) +{ + for (;;) { + while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) + *c = (char) stbi__get8(s); + + if (stbi__at_eof(s) || *c != '#') + break; + + while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' ) + *c = (char) stbi__get8(s); + } +} + +static int stbi__pnm_isdigit(char c) +{ + return c >= '0' && c <= '9'; +} + +static int stbi__pnm_getinteger(stbi__context *s, char *c) +{ + int value = 0; + + while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { + value = value*10 + (*c - '0'); + *c = (char) stbi__get8(s); + } + + return value; +} + +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) +{ + int maxv, dummy; + char c, p, t; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + stbi__rewind(s); + + // Get identifier + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind(s); + return 0; + } + + *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm + + c = (char) stbi__get8(s); + stbi__pnm_skip_whitespace(s, &c); + + *x = stbi__pnm_getinteger(s, &c); // read width + stbi__pnm_skip_whitespace(s, &c); + + *y = stbi__pnm_getinteger(s, &c); // read height + stbi__pnm_skip_whitespace(s, &c); + + maxv = stbi__pnm_getinteger(s, &c); // read max value + + if (maxv > 255) + return stbi__err("max value > 255", "PPM image not 8-bit"); + else + return 1; +} +#endif + +static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) +{ + #ifndef STBI_NO_JPEG + if (stbi__jpeg_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNG + if (stbi__png_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_GIF + if (stbi__gif_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_BMP + if (stbi__bmp_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PIC + if (stbi__pic_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNM + if (stbi__pnm_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_info(s, x, y, comp)) return 1; + #endif + + // test tga last because it's a crappy test! + #ifndef STBI_NO_TGA + if (stbi__tga_info(s, x, y, comp)) + return 1; + #endif + return stbi__err("unknown image type", "Image not of any known type, or corrupt"); +} + +static int stbi__is_16_main(stbi__context *s) +{ + #ifndef STBI_NO_PNG + if (stbi__png_is16(s)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_is16(s)) return 1; + #endif + + return 0; +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_info_from_file(f, x, y, comp); + fclose(f); + return result; +} + +STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__info_main(&s,x,y,comp); + fseek(f,pos,SEEK_SET); + return r; +} + +STBIDEF int stbi_is_16_bit(char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_is_16_bit_from_file(f); + fclose(f); + return result; +} + +STBIDEF int stbi_is_16_bit_from_file(FILE *f) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__is_16_main(&s); + fseek(f,pos,SEEK_SET); + return r; +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__is_16_main(&s); +} + +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__is_16_main(&s); +} + +#endif // STB_IMAGE_IMPLEMENTATION + +/* + revision history: + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug + 1-bit BMP + *_is_16_bit api + avoid warnings + 2.16 (2017-07-23) all functions have 16-bit variants; + STBI_NO_STDIO works again; + compilation fixes; + fix rounding in unpremultiply; + optimize vertical flip; + disable raw_len validation; + documentation fixes + 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; + warning fixes; disable run-time SSE detection on gcc; + uniform handling of optional "return" values; + thread-safe initialization of zlib tables + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) allocate large structures on the stack + remove white matting for transparent PSD + fix reported channel count for PNG & BMP + re-enable SSE2 in non-gcc 64-bit + support RGB-formatted JPEG + read 16-bit PNGs (only as 8-bit) + 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED + 2.09 (2016-01-16) allow comments in PNM files + 16-bit-per-pixel TGA (not bit-per-component) + info() for TGA could break due to .hdr handling + info() for BMP to shares code instead of sloppy parse + can use STBI_REALLOC_SIZED if allocator doesn't support realloc + code cleanup + 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA + 2.07 (2015-09-13) fix compiler warnings + partial animated GIF support + limited 16-bpc PSD support + #ifdef unused functions + bug with < 92 byte PIC,PNM,HDR,TGA + 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value + 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning + 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit + 2.03 (2015-04-12) extra corruption checking (mmozeiko) + stbi_set_flip_vertically_on_load (nguillemot) + fix NEON support; fix mingw support + 2.02 (2015-01-19) fix incorrect assert, fix warning + 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2 + 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG + 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) + progressive JPEG (stb) + PGM/PPM support (Ken Miller) + STBI_MALLOC,STBI_REALLOC,STBI_FREE + GIF bugfix -- seemingly never worked + STBI_NO_*, STBI_ONLY_* + 1.48 (2014-12-14) fix incorrectly-named assert() + 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb) + optimize PNG (ryg) + fix bug in interlaced PNG with user-specified channel count (stb) + 1.46 (2014-08-26) + fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG + 1.45 (2014-08-16) + fix MSVC-ARM internal compiler error by wrapping malloc + 1.44 (2014-08-07) + various warning fixes from Ronny Chevalier + 1.43 (2014-07-15) + fix MSVC-only compiler problem in code changed in 1.42 + 1.42 (2014-07-09) + don't define _CRT_SECURE_NO_WARNINGS (affects user code) + fixes to stbi__cleanup_jpeg path + added STBI_ASSERT to avoid requiring assert.h + 1.41 (2014-06-25) + fix search&replace from 1.36 that messed up comments/error messages + 1.40 (2014-06-22) + fix gcc struct-initialization warning + 1.39 (2014-06-15) + fix to TGA optimization when req_comp != number of components in TGA; + fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite) + add support for BMP version 5 (more ignored fields) + 1.38 (2014-06-06) + suppress MSVC warnings on integer casts truncating values + fix accidental rename of 'skip' field of I/O + 1.37 (2014-06-04) + remove duplicate typedef + 1.36 (2014-06-03) + convert to header file single-file library + if de-iphone isn't set, load iphone images color-swapped instead of returning NULL + 1.35 (2014-05-27) + various warnings + fix broken STBI_SIMD path + fix bug where stbi_load_from_file no longer left file pointer in correct place + fix broken non-easy path for 32-bit BMP (possibly never used) + TGA optimization by Arseny Kapoulkine + 1.34 (unknown) + use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case + 1.33 (2011-07-14) + make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements + 1.32 (2011-07-13) + support for "info" function for all supported filetypes (SpartanJ) + 1.31 (2011-06-20) + a few more leak fixes, bug in PNG handling (SpartanJ) + 1.30 (2011-06-11) + added ability to load files via callbacks to accomidate custom input streams (Ben Wenger) + removed deprecated format-specific test/load functions + removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway + error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) + fix inefficiency in decoding 32-bit BMP (David Woo) + 1.29 (2010-08-16) + various warning fixes from Aurelien Pocheville + 1.28 (2010-08-01) + fix bug in GIF palette transparency (SpartanJ) + 1.27 (2010-08-01) + cast-to-stbi_uc to fix warnings + 1.26 (2010-07-24) + fix bug in file buffering for PNG reported by SpartanJ + 1.25 (2010-07-17) + refix trans_data warning (Won Chun) + 1.24 (2010-07-12) + perf improvements reading from files on platforms with lock-heavy fgetc() + minor perf improvements for jpeg + deprecated type-specific functions so we'll get feedback if they're needed + attempt to fix trans_data warning (Won Chun) + 1.23 fixed bug in iPhone support + 1.22 (2010-07-10) + removed image *writing* support + stbi_info support from Jetro Lauha + GIF support from Jean-Marc Lienher + iPhone PNG-extensions from James Brown + warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva) + 1.21 fix use of 'stbi_uc' in header (reported by jon blow) + 1.20 added support for Softimage PIC, by Tom Seddon + 1.19 bug in interlaced PNG corruption check (found by ryg) + 1.18 (2008-08-02) + fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - stbi__convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug + header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) + 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz + 1.07 attempt to fix C++ warning/errors again + 1.06 attempt to fix C++ warning/errors again + 1.05 fix TGA loading to return correct *comp and use good luminance calc + 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free + 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR + 1.02 support for (subset of) HDR files, float interface for preferred access to them + 1.01 fix bug: possible bug in handling right-side up bmps... not sure + fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all + 1.00 interface to zlib that skips zlib header + 0.99 correct handling of alpha in palette + 0.98 TGA loader by lonesock; dynamically add loaders (untested) + 0.97 jpeg errors on too large a file; also catch another malloc failure + 0.96 fix detection of invalid v value - particleman@mollyrocket forum + 0.95 during header scan, seek to markers in case of padding + 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same + 0.93 handle jpegtran output; verbose errors + 0.92 read 4,8,16,24,32-bit BMP files of several formats + 0.91 output 24-bit Windows 3.0 BMP files + 0.90 fix a few more warnings; bump version number to approach 1.0 + 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd + 0.60 fix compiling as c++ + 0.59 fix warnings: merge Dave Moore's -Wall fixes + 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian + 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available + 0.56 fix bug: zlib uncompressed mode len vs. nlen + 0.55 fix bug: restart_interval not initialized to 0 + 0.54 allow NULL for 'int *comp' + 0.53 fix bug in png 3->4; speedup png decoding + 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments + 0.51 obey req_comp requests, 1-component jpegs return as 1-component, + on 'test' only check type, not whether we support this variant + 0.50 (2006-11-19) + first released version +*/ + + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/stb_image_write.h b/workloads/realworld/uvm_prefetch_async/darknet/src/stb_image_write.h new file mode 100644 index 0000000000000000000000000000000000000000..c05e95812b96232abd3617f98255832cc3fe4716 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/stb_image_write.h @@ -0,0 +1,1568 @@ +/* stb_image_write - v1.09 - public domain - http://nothings.org/stb/stb_image_write.h + writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 + no warranty implied; use at your own risk + + Before #including, + + #define STB_IMAGE_WRITE_IMPLEMENTATION + + in the file that you want to have the implementation. + + Will probably not work correctly with strict-aliasing optimizations. + + If using a modern Microsoft Compiler, non-safe versions of CRT calls may cause + compilation warnings or even errors. To avoid this, also before #including, + + #define STBI_MSC_SECURE_CRT + +ABOUT: + + This header file is a library for writing images to C stdio. It could be + adapted to write to memory or a general streaming interface; let me know. + + The PNG output is not optimal; it is 20-50% larger than the file + written by a decent optimizing implementation; though providing a custom + zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that. + This library is designed for source code compactness and simplicity, + not optimal image file size or run-time performance. + +BUILDING: + + You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h. + You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace + malloc,realloc,free. + You can #define STBIW_MEMMOVE() to replace memmove() + You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function + for PNG compression (instead of the builtin one), it must have the following signature: + unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality); + The returned data will be freed with STBIW_FREE() (free() by default), + so it must be heap allocated with STBIW_MALLOC() (malloc() by default), + +USAGE: + + There are five functions, one for each image file format: + + int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality); + int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); + + void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically + + There are also five equivalent functions that use an arbitrary write function. You are + expected to open/close your file-equivalent before and after calling these: + + int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); + int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + + where the callback is: + void stbi_write_func(void *context, void *data, int size); + + You can configure it with these global variables: + int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE + int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression + int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode + + + You can define STBI_WRITE_NO_STDIO to disable the file variant of these + functions, so the library will not use stdio.h at all. However, this will + also disable HDR writing, because it requires stdio for formatted output. + + Each function returns 0 on failure and non-0 on success. + + The functions create an image file defined by the parameters. The image + is a rectangle of pixels stored from left-to-right, top-to-bottom. + Each pixel contains 'comp' channels of data stored interleaved with 8-bits + per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is + monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall. + The *data pointer points to the first byte of the top-left-most pixel. + For PNG, "stride_in_bytes" is the distance in bytes from the first byte of + a row of pixels to the first byte of the next row of pixels. + + PNG creates output files with the same number of components as the input. + The BMP format expands Y to RGB in the file format and does not + output alpha. + + PNG supports writing rectangles of data even when the bytes storing rows of + data are not consecutive in memory (e.g. sub-rectangles of a larger image), + by supplying the stride between the beginning of adjacent rows. The other + formats do not. (Thus you cannot write a native-format BMP through the BMP + writer, both because it is in BGR order and because it may have padding + at the end of the line.) + + PNG allows you to set the deflate compression level by setting the global + variable 'stbi_write_png_compression_level' (it defaults to 8). + + HDR expects linear float data. Since the format is always 32-bit rgb(e) + data, alpha (if provided) is discarded, and for monochrome data it is + replicated across all three channels. + + TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed + data, set the global variable 'stbi_write_tga_with_rle' to 0. + + JPEG does ignore alpha channels in input data; quality is between 1 and 100. + Higher quality looks better but results in a bigger image. + JPEG baseline (no JPEG progressive). + +CREDITS: + + + Sean Barrett - PNG/BMP/TGA + Baldur Karlsson - HDR + Jean-Sebastien Guay - TGA monochrome + Tim Kelsey - misc enhancements + Alan Hickman - TGA RLE + Emmanuel Julien - initial file IO callback implementation + Jon Olick - original jo_jpeg.cpp code + Daniel Gibson - integrate JPEG, allow external zlib + Aarni Koskela - allow choosing PNG filter + + bugfixes: + github:Chribba + Guillaume Chereau + github:jry2 + github:romigrou + Sergio Gonzalez + Jonas Karlsson + Filip Wasil + Thatcher Ulrich + github:poppolopoppo + Patrick Boettcher + github:xeekworx + Cap Petschulat + Simon Rodriguez + Ivan Tikhonov + github:ignotion + Adam Schackart + +LICENSE + + See end of file for license information. + +*/ + +#ifndef INCLUDE_STB_IMAGE_WRITE_H +#define INCLUDE_STB_IMAGE_WRITE_H + +// if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline' or 'static inline' +#ifndef STBIWDEF +#ifdef STB_IMAGE_WRITE_STATIC +#define STBIWDEF static +#else +#ifdef __cplusplus +#define STBIWDEF extern "C" +#else +#define STBIWDEF extern +#endif +#endif +#endif + +#ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations +extern int stbi_write_tga_with_rle; +extern int stbi_write_png_compression_level; +extern int stbi_write_force_png_filter; +#endif + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality); +#endif + +typedef void stbi_write_func(void *context, void *data, int size); + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + +STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); + +#endif//INCLUDE_STB_IMAGE_WRITE_H + +#ifdef STB_IMAGE_WRITE_IMPLEMENTATION + +#ifdef _WIN32 + #ifndef _CRT_SECURE_NO_WARNINGS + #define _CRT_SECURE_NO_WARNINGS + #endif + #ifndef _CRT_NONSTDC_NO_DEPRECATE + #define _CRT_NONSTDC_NO_DEPRECATE + #endif +#endif + +#ifndef STBI_WRITE_NO_STDIO +#include +#endif // STBI_WRITE_NO_STDIO + +#include +#include +#include +#include + +#if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED)) +// ok +#elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)." +#endif + +#ifndef STBIW_MALLOC +#define STBIW_MALLOC(sz) malloc(sz) +#define STBIW_REALLOC(p,newsz) realloc(p,newsz) +#define STBIW_FREE(p) free(p) +#endif + +#ifndef STBIW_REALLOC_SIZED +#define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz) +#endif + + +#ifndef STBIW_MEMMOVE +#define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz) +#endif + + +#ifndef STBIW_ASSERT +#include +#define STBIW_ASSERT(x) assert(x) +#endif + +#define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff) + +#ifdef STB_IMAGE_WRITE_STATIC +static int stbi__flip_vertically_on_write=0; +static int stbi_write_png_compression_level = 8; +static int stbi_write_tga_with_rle = 1; +static int stbi_write_force_png_filter = -1; +#else +int stbi_write_png_compression_level = 8; +int stbi__flip_vertically_on_write=0; +int stbi_write_tga_with_rle = 1; +int stbi_write_force_png_filter = -1; +#endif + +STBIWDEF void stbi_flip_vertically_on_write(int flag) +{ + stbi__flip_vertically_on_write = flag; +} + +typedef struct +{ + stbi_write_func *func; + void *context; +} stbi__write_context; + +// initialize a callback-based context +static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context) +{ + s->func = c; + s->context = context; +} + +#ifndef STBI_WRITE_NO_STDIO + +static void stbi__stdio_write(void *context, void *data, int size) +{ + fwrite(data,1,size,(FILE*) context); +} + +static int stbi__start_write_file(stbi__write_context *s, const char *filename) +{ + FILE *f; +#ifdef STBI_MSC_SECURE_CRT + if (fopen_s(&f, filename, "wb")) + f = NULL; +#else + f = fopen(filename, "wb"); +#endif + stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f); + return f != NULL; +} + +static void stbi__end_write_file(stbi__write_context *s) +{ + fclose((FILE *)s->context); +} + +#endif // !STBI_WRITE_NO_STDIO + +typedef unsigned int stbiw_uint32; +typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1]; + +static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) +{ + while (*fmt) { + switch (*fmt++) { + case ' ': break; + case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int)); + s->func(s->context,&x,1); + break; } + case '2': { int x = va_arg(v,int); + unsigned char b[2]; + b[0] = STBIW_UCHAR(x); + b[1] = STBIW_UCHAR(x>>8); + s->func(s->context,b,2); + break; } + case '4': { stbiw_uint32 x = va_arg(v,int); + unsigned char b[4]; + b[0]=STBIW_UCHAR(x); + b[1]=STBIW_UCHAR(x>>8); + b[2]=STBIW_UCHAR(x>>16); + b[3]=STBIW_UCHAR(x>>24); + s->func(s->context,b,4); + break; } + default: + STBIW_ASSERT(0); + return; + } + } +} + +static void stbiw__writef(stbi__write_context *s, const char *fmt, ...) +{ + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); +} + +static void stbiw__putc(stbi__write_context *s, unsigned char c) +{ + s->func(s->context, &c, 1); +} + +static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c) +{ + unsigned char arr[3]; + arr[0] = a, arr[1] = b, arr[2] = c; + s->func(s->context, arr, 3); +} + +static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d) +{ + unsigned char bg[3] = { 255, 0, 255}, px[3]; + int k; + + if (write_alpha < 0) + s->func(s->context, &d[comp - 1], 1); + + switch (comp) { + case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case + case 1: + if (expand_mono) + stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp + else + s->func(s->context, d, 1); // monochrome TGA + break; + case 4: + if (!write_alpha) { + // composite against pink background + for (k = 0; k < 3; ++k) + px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255; + stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]); + break; + } + /* FALLTHROUGH */ + case 3: + stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); + break; + } + if (write_alpha > 0) + s->func(s->context, &d[comp - 1], 1); +} + +static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) +{ + stbiw_uint32 zero = 0; + int i,j, j_end; + + if (y <= 0) + return; + + if (stbi__flip_vertically_on_write) + vdir *= -1; + + if (vdir < 0) + j_end = -1, j = y-1; + else + j_end = y, j = 0; + + for (; j != j_end; j += vdir) { + for (i=0; i < x; ++i) { + unsigned char *d = (unsigned char *) data + (j*x+i)*comp; + stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); + } + s->func(s->context, &zero, scanline_pad); + } +} + +static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...) +{ + if (y < 0 || x < 0) { + return 0; + } else { + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); + stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono); + return 1; + } +} + +static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data) +{ + int pad = (-x*3) & 3; + return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad, + "11 4 22 4" "4 44 22 444444", + 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header + 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header +} + +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_bmp_core(&s, x, y, comp, data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_bmp_core(&s, x, y, comp, data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif //!STBI_WRITE_NO_STDIO + +static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data) +{ + int has_alpha = (comp == 2 || comp == 4); + int colorbytes = has_alpha ? comp-1 : comp; + int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3 + + if (y < 0 || x < 0) + return 0; + + if (!stbi_write_tga_with_rle) { + return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0, + "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); + } else { + int i,j,k; + int jend, jdir; + + stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8); + + if (stbi__flip_vertically_on_write) { + j = 0; + jend = y; + jdir = 1; + } else { + j = y-1; + jend = -1; + jdir = -1; + } + for (; j != jend; j += jdir) { + unsigned char *row = (unsigned char *) data + j * x * comp; + int len; + + for (i = 0; i < x; i += len) { + unsigned char *begin = row + i * comp; + int diff = 1; + len = 1; + + if (i < x - 1) { + ++len; + diff = memcmp(begin, row + (i + 1) * comp, comp); + if (diff) { + const unsigned char *prev = begin; + for (k = i + 2; k < x && len < 128; ++k) { + if (memcmp(prev, row + k * comp, comp)) { + prev += comp; + ++len; + } else { + --len; + break; + } + } + } else { + for (k = i + 2; k < x && len < 128; ++k) { + if (!memcmp(begin, row + k * comp, comp)) { + ++len; + } else { + break; + } + } + } + } + + if (diff) { + unsigned char header = STBIW_UCHAR(len - 1); + s->func(s->context, &header, 1); + for (k = 0; k < len; ++k) { + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); + } + } else { + unsigned char header = STBIW_UCHAR(len - 129); + s->func(s->context, &header, 1); + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); + } + } + } + } + return 1; +} + +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_tga_core(&s, x, y, comp, (void *) data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_tga_core(&s, x, y, comp, (void *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR writer +// by Baldur Karlsson + +#define stbiw__max(a, b) ((a) > (b) ? (a) : (b)) + +void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear) +{ + int exponent; + float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2])); + + if (maxcomp < 1e-32f) { + rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; + } else { + float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp; + + rgbe[0] = (unsigned char)(linear[0] * normalize); + rgbe[1] = (unsigned char)(linear[1] * normalize); + rgbe[2] = (unsigned char)(linear[2] * normalize); + rgbe[3] = (unsigned char)(exponent + 128); + } +} + +void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte) +{ + unsigned char lengthbyte = STBIW_UCHAR(length+128); + STBIW_ASSERT(length+128 <= 255); + s->func(s->context, &lengthbyte, 1); + s->func(s->context, &databyte, 1); +} + +void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data) +{ + unsigned char lengthbyte = STBIW_UCHAR(length); + STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code + s->func(s->context, &lengthbyte, 1); + s->func(s->context, data, length); +} + +void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline) +{ + unsigned char scanlineheader[4] = { 2, 2, 0, 0 }; + unsigned char rgbe[4]; + float linear[3]; + int x; + + scanlineheader[2] = (width&0xff00)>>8; + scanlineheader[3] = (width&0x00ff); + + /* skip RLE for images too small or large */ + if (width < 8 || width >= 32768) { + for (x=0; x < width; x++) { + switch (ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + s->func(s->context, rgbe, 4); + } + } else { + int c,r; + /* encode into scratch buffer */ + for (x=0; x < width; x++) { + switch(ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + scratch[x + width*0] = rgbe[0]; + scratch[x + width*1] = rgbe[1]; + scratch[x + width*2] = rgbe[2]; + scratch[x + width*3] = rgbe[3]; + } + + s->func(s->context, scanlineheader, 4); + + /* RLE each component separately */ + for (c=0; c < 4; c++) { + unsigned char *comp = &scratch[width*c]; + + x = 0; + while (x < width) { + // find first run + r = x; + while (r+2 < width) { + if (comp[r] == comp[r+1] && comp[r] == comp[r+2]) + break; + ++r; + } + if (r+2 >= width) + r = width; + // dump up to first run + while (x < r) { + int len = r-x; + if (len > 128) len = 128; + stbiw__write_dump_data(s, len, &comp[x]); + x += len; + } + // if there's a run, output it + if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd + // find next byte after run + while (r < width && comp[r] == comp[x]) + ++r; + // output run up to r + while (x < r) { + int len = r-x; + if (len > 127) len = 127; + stbiw__write_run_data(s, len, comp[x]); + x += len; + } + } + } + } + } +} + +static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data) +{ + if (y <= 0 || x <= 0 || data == NULL) + return 0; + else { + // Each component is stored separately. Allocate scratch space for full output scanline. + unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4); + int i, len; + char buffer[128]; + char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"; + s->func(s->context, header, sizeof(header)-1); + +#ifdef STBI_MSC_SECURE_CRT + len = sprintf_s(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#else + len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#endif + s->func(s->context, buffer, len); + + for(i=0; i < y; i++) + stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i)*x); + STBIW_FREE(scratch); + return 1; + } +} + +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_hdr_core(&s, x, y, comp, (float *) data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif // STBI_WRITE_NO_STDIO + + +////////////////////////////////////////////////////////////////////////////// +// +// PNG writer +// + +#ifndef STBIW_ZLIB_COMPRESS +// stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size() +#define stbiw__sbraw(a) ((int *) (a) - 2) +#define stbiw__sbm(a) stbiw__sbraw(a)[0] +#define stbiw__sbn(a) stbiw__sbraw(a)[1] + +#define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) +#define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0) +#define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) + +#define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) +#define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) +#define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) + +static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) +{ + int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1; + void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2); + STBIW_ASSERT(p); + if (p) { + if (!*arr) ((int *) p)[1] = 0; + *arr = (void *) ((int *) p + 2); + stbiw__sbm(*arr) = m; + } + return *arr; +} + +static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount) +{ + while (*bitcount >= 8) { + stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer)); + *bitbuffer >>= 8; + *bitcount -= 8; + } + return data; +} + +static int stbiw__zlib_bitrev(int code, int codebits) +{ + int res=0; + while (codebits--) { + res = (res << 1) | (code & 1); + code >>= 1; + } + return res; +} + +static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit) +{ + int i; + for (i=0; i < limit && i < 258; ++i) + if (a[i] != b[i]) break; + return i; +} + +static unsigned int stbiw__zhash(unsigned char *data) +{ + stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16); + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + return hash; +} + +#define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount)) +#define stbiw__zlib_add(code,codebits) \ + (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) +#define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) +// default huffman tables +#define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8) +#define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9) +#define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7) +#define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8) +#define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) +#define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n)) + +#define stbiw__ZHASH 16384 + +#endif // STBIW_ZLIB_COMPRESS + +unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality) +{ +#ifdef STBIW_ZLIB_COMPRESS + // user provided a zlib compress implementation, use that + return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality); +#else // use builtin + static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 }; + static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; + static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 }; + static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; + unsigned int bitbuf=0; + int i,j, bitcount=0; + unsigned char *out = NULL; + unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(char**)); + if (hash_table == NULL) + return NULL; + if (quality < 5) quality = 5; + + stbiw__sbpush(out, 0x78); // DEFLATE 32K window + stbiw__sbpush(out, 0x5e); // FLEVEL = 1 + stbiw__zlib_add(1,1); // BFINAL = 1 + stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman + + for (i=0; i < stbiw__ZHASH; ++i) + hash_table[i] = NULL; + + i=0; + while (i < data_len-3) { + // hash next 3 bytes of data to be compressed + int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3; + unsigned char *bestloc = 0; + unsigned char **hlist = hash_table[h]; + int n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32768) { // if entry lies within window + int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i); + if (d >= best) best=d,bestloc=hlist[j]; + } + } + // when hash table entry is too long, delete half the entries + if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) { + STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality); + stbiw__sbn(hash_table[h]) = quality; + } + stbiw__sbpush(hash_table[h],data+i); + + if (bestloc) { + // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal + h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1); + hlist = hash_table[h]; + n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32767) { + int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1); + if (e > best) { // if next match is better, bail on current match + bestloc = NULL; + break; + } + } + } + } + + if (bestloc) { + int d = (int) (data+i - bestloc); // distance back + STBIW_ASSERT(d <= 32767 && best <= 258); + for (j=0; best > lengthc[j+1]-1; ++j); + stbiw__zlib_huff(j+257); + if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); + for (j=0; d > distc[j+1]-1; ++j); + stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5); + if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]); + i += best; + } else { + stbiw__zlib_huffb(data[i]); + ++i; + } + } + // write out final bytes + for (;i < data_len; ++i) + stbiw__zlib_huffb(data[i]); + stbiw__zlib_huff(256); // end of block + // pad with 0 bits to byte boundary + while (bitcount) + stbiw__zlib_add(0,1); + + for (i=0; i < stbiw__ZHASH; ++i) + (void) stbiw__sbfree(hash_table[i]); + STBIW_FREE(hash_table); + + { + // compute adler32 on input + unsigned int s1=1, s2=0; + int blocklen = (int) (data_len % 5552); + j=0; + while (j < data_len) { + for (i=0; i < blocklen; ++i) s1 += data[j+i], s2 += s1; + s1 %= 65521, s2 %= 65521; + j += blocklen; + blocklen = 5552; + } + stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s2)); + stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s1)); + } + *out_len = stbiw__sbn(out); + // make returned pointer freeable + STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len); + return (unsigned char *) stbiw__sbraw(out); +#endif // STBIW_ZLIB_COMPRESS +} + +static unsigned int stbiw__crc32(unsigned char *buffer, int len) +{ + static unsigned int crc_table[256] = + { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + }; + + unsigned int crc = ~0u; + int i; + for (i=0; i < len; ++i) + crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; + return ~crc; +} + +#define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4) +#define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v)); +#define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3]) + +static void stbiw__wpcrc(unsigned char **data, int len) +{ + unsigned int crc = stbiw__crc32(*data - len - 4, len+4); + stbiw__wp32(*data, crc); +} + +static unsigned char stbiw__paeth(int a, int b, int c) +{ + int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c); + if (pa <= pb && pa <= pc) return STBIW_UCHAR(a); + if (pb <= pc) return STBIW_UCHAR(b); + return STBIW_UCHAR(c); +} + +// @OPTIMIZE: provide an option that always forces left-predict or paeth predict +static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer) +{ + static int mapping[] = { 0,1,2,3,4 }; + static int firstmap[] = { 0,1,0,5,6 }; + int *mymap = (y != 0) ? mapping : firstmap; + int i; + int type = mymap[filter_type]; + unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y); + int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes; + for (i = 0; i < n; ++i) { + switch (type) { + case 0: line_buffer[i] = z[i]; break; + case 1: line_buffer[i] = z[i]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break; + case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break; + case 5: line_buffer[i] = z[i]; break; + case 6: line_buffer[i] = z[i]; break; + } + } + for (i=n; i < width*n; ++i) { + switch (type) { + case 0: line_buffer[i] = z[i]; break; + case 1: line_buffer[i] = z[i] - z[i-n]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break; + case 4: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break; + case 5: line_buffer[i] = z[i] - (z[i-n]>>1); break; + case 6: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break; + } + } +} + +unsigned char *stbi_write_png_to_mem(unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len) +{ + int force_filter = stbi_write_force_png_filter; + int ctype[5] = { -1, 0, 4, 2, 6 }; + unsigned char sig[8] = { 137,80,78,71,13,10,26,10 }; + unsigned char *out,*o, *filt, *zlib; + signed char *line_buffer; + int j,zlen; + + if (stride_bytes == 0) + stride_bytes = x * n; + + if (force_filter >= 5) { + force_filter = -1; + } + + filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0; + line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; } + for (j=0; j < y; ++j) { + int filter_type; + if (force_filter > -1) { + filter_type = force_filter; + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, force_filter, line_buffer); + } else { // Estimate the best filter by running through all of them: + int best_filter = 0, best_filter_val = 0x7fffffff, est, i; + for (filter_type = 0; filter_type < 5; filter_type++) { + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, filter_type, line_buffer); + + // Estimate the entropy of the line using this filter; the less, the better. + est = 0; + for (i = 0; i < x*n; ++i) { + est += abs((signed char) line_buffer[i]); + } + if (est < best_filter_val) { + best_filter_val = est; + best_filter = filter_type; + } + } + if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, best_filter, line_buffer); + filter_type = best_filter; + } + } + // when we get here, filter_type contains the filter type, and line_buffer contains the data + filt[j*(x*n+1)] = (unsigned char) filter_type; + STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); + } + STBIW_FREE(line_buffer); + zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level); + STBIW_FREE(filt); + if (!zlib) return 0; + + // each tag requires 12 bytes of overhead + out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12); + if (!out) return 0; + *out_len = 8 + 12+13 + 12+zlen + 12; + + o=out; + STBIW_MEMMOVE(o,sig,8); o+= 8; + stbiw__wp32(o, 13); // header length + stbiw__wptag(o, "IHDR"); + stbiw__wp32(o, x); + stbiw__wp32(o, y); + *o++ = 8; + *o++ = STBIW_UCHAR(ctype[n]); + *o++ = 0; + *o++ = 0; + *o++ = 0; + stbiw__wpcrc(&o,13); + + stbiw__wp32(o, zlen); + stbiw__wptag(o, "IDAT"); + STBIW_MEMMOVE(o, zlib, zlen); + o += zlen; + STBIW_FREE(zlib); + stbiw__wpcrc(&o, zlen); + + stbiw__wp32(o,0); + stbiw__wptag(o, "IEND"); + stbiw__wpcrc(&o,0); + + STBIW_ASSERT(o == out + *out_len); + + return out; +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes) +{ + FILE *f; + int len; + unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; +#ifdef STBI_MSC_SECURE_CRT + if (fopen_s(&f, filename, "wb")) + f = NULL; +#else + f = fopen(filename, "wb"); +#endif + if (!f) { STBIW_FREE(png); return 0; } + fwrite(png, 1, len, f); + fclose(f); + STBIW_FREE(png); + return 1; +} +#endif + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes) +{ + int len; + unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; + func(context, png, len); + STBIW_FREE(png); + return 1; +} + + +/* *************************************************************************** + * + * JPEG writer + * + * This is based on Jon Olick's jo_jpeg.cpp: + * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html + */ + +static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18, + 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; + +static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) { + int bitBuf = *bitBufP, bitCnt = *bitCntP; + bitCnt += bs[1]; + bitBuf |= bs[0] << (24 - bitCnt); + while(bitCnt >= 8) { + unsigned char c = (bitBuf >> 16) & 255; + stbiw__putc(s, c); + if(c == 255) { + stbiw__putc(s, 0); + } + bitBuf <<= 8; + bitCnt -= 8; + } + *bitBufP = bitBuf; + *bitCntP = bitCnt; +} + +static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) { + float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p; + float z1, z2, z3, z4, z5, z11, z13; + + float tmp0 = d0 + d7; + float tmp7 = d0 - d7; + float tmp1 = d1 + d6; + float tmp6 = d1 - d6; + float tmp2 = d2 + d5; + float tmp5 = d2 - d5; + float tmp3 = d3 + d4; + float tmp4 = d3 - d4; + + // Even part + float tmp10 = tmp0 + tmp3; // phase 2 + float tmp13 = tmp0 - tmp3; + float tmp11 = tmp1 + tmp2; + float tmp12 = tmp1 - tmp2; + + d0 = tmp10 + tmp11; // phase 3 + d4 = tmp10 - tmp11; + + z1 = (tmp12 + tmp13) * 0.707106781f; // c4 + d2 = tmp13 + z1; // phase 5 + d6 = tmp13 - z1; + + // Odd part + tmp10 = tmp4 + tmp5; // phase 2 + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + // The rotator is modified from fig 4-8 to avoid extra negations. + z5 = (tmp10 - tmp12) * 0.382683433f; // c6 + z2 = tmp10 * 0.541196100f + z5; // c2-c6 + z4 = tmp12 * 1.306562965f + z5; // c2+c6 + z3 = tmp11 * 0.707106781f; // c4 + + z11 = tmp7 + z3; // phase 5 + z13 = tmp7 - z3; + + *d5p = z13 + z2; // phase 6 + *d3p = z13 - z2; + *d1p = z11 + z4; + *d7p = z11 - z4; + + *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6; +} + +static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) { + int tmp1 = val < 0 ? -val : val; + val = val < 0 ? val-1 : val; + bits[1] = 1; + while(tmp1 >>= 1) { + ++bits[1]; + } + bits[0] = val & ((1<0)&&(DU[end0pos]==0); --end0pos) { + } + // end0pos = first element in reverse order !=0 + if(end0pos == 0) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + return DU[0]; + } + for(i = 1; i <= end0pos; ++i) { + int startpos = i; + int nrzeroes; + unsigned short bits[2]; + for (; DU[i]==0 && i<=end0pos; ++i) { + } + nrzeroes = i-startpos; + if ( nrzeroes >= 16 ) { + int lng = nrzeroes>>4; + int nrmarker; + for (nrmarker=1; nrmarker <= lng; ++nrmarker) + stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes); + nrzeroes &= 15; + } + stbiw__jpg_calcBits(DU[i], bits); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); + } + if(end0pos != 63) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + } + return DU[0]; +} + +static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) { + // Constants that don't pollute global namespace + static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0}; + static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d}; + static const unsigned char std_ac_luminance_values[] = { + 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08, + 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28, + 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59, + 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, + 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6, + 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2, + 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0}; + static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77}; + static const unsigned char std_ac_chrominance_values[] = { + 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, + 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26, + 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58, + 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, + 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4, + 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda, + 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + // Huffman tables + static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}}; + static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}}; + static const unsigned short YAC_HT[256][2] = { + {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const unsigned short UVAC_HT[256][2] = { + {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22, + 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99}; + static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99, + 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99}; + static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, + 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; + + int row, col, i, k; + float fdtbl_Y[64], fdtbl_UV[64]; + unsigned char YTable[64], UVTable[64]; + + if(!data || !width || !height || comp > 4 || comp < 1) { + return 0; + } + + quality = quality ? quality : 90; + quality = quality < 1 ? 1 : quality > 100 ? 100 : quality; + quality = quality < 50 ? 5000 / quality : 200 - quality * 2; + + for(i = 0; i < 64; ++i) { + int uvti, yti = (YQT[i]*quality+50)/100; + YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti); + uvti = (UVQT[i]*quality+50)/100; + UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); + } + + for(row = 0, k = 0; row < 8; ++row) { + for(col = 0; col < 8; ++col, ++k) { + fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + } + } + + // Write Headers + { + static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 }; + static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 }; + const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width), + 3,1,0x11,0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 }; + s->func(s->context, (void*)head0, sizeof(head0)); + s->func(s->context, (void*)YTable, sizeof(YTable)); + stbiw__putc(s, 1); + s->func(s->context, UVTable, sizeof(UVTable)); + s->func(s->context, (void*)head1, sizeof(head1)); + s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1); + s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values)); + stbiw__putc(s, 0x10); // HTYACinfo + s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1); + s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values)); + stbiw__putc(s, 1); // HTUDCinfo + s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values)); + stbiw__putc(s, 0x11); // HTUACinfo + s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values)); + s->func(s->context, (void*)head2, sizeof(head2)); + } + + // Encode 8x8 macroblocks + { + static const unsigned short fillBits[] = {0x7F, 7}; + const unsigned char *imageData = (const unsigned char *)data; + int DCY=0, DCU=0, DCV=0; + int bitBuf=0, bitCnt=0; + // comp == 2 is grey+alpha (alpha is ignored) + int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; + int x, y, pos; + for(y = 0; y < height; y += 8) { + for(x = 0; x < width; x += 8) { + float YDU[64], UDU[64], VDU[64]; + for(row = y, pos = 0; row < y+8; ++row) { + for(col = x; col < x+8; ++col, ++pos) { + int p = (stbi__flip_vertically_on_write ? height-1-row : row)*width*comp + col*comp; + float r, g, b; + if(row >= height) { + p -= width*comp*(row+1 - height); + } + if(col >= width) { + p -= comp*(col+1 - width); + } + + r = imageData[p+0]; + g = imageData[p+ofsG]; + b = imageData[p+ofsB]; + YDU[pos]=+0.29900f*r+0.58700f*g+0.11400f*b-128; + UDU[pos]=-0.16874f*r-0.33126f*g+0.50000f*b; + VDU[pos]=+0.50000f*r-0.41869f*g-0.08131f*b; + } + } + + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, YDU, fdtbl_Y, DCY, YDC_HT, YAC_HT); + DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, UDU, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); + DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, VDU, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); + } + } + + // Do the bit alignment of the EOI marker + stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits); + } + + // EOI + stbiw__putc(s, 0xFF); + stbiw__putc(s, 0xD9); + + return 1; +} + +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality); +} + + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +#endif // STB_IMAGE_WRITE_IMPLEMENTATION + +/* Revision history + 1.09 (2018-02-11) + fix typo in zlib quality API, improve STB_I_W_STATIC in C++ + 1.08 (2018-01-29) + add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter + 1.07 (2017-07-24) + doc fix + 1.06 (2017-07-23) + writing JPEG (using Jon Olick's code) + 1.05 ??? + 1.04 (2017-03-03) + monochrome BMP expansion + 1.03 ??? + 1.02 (2016-04-02) + avoid allocating large structures on the stack + 1.01 (2016-01-16) + STBIW_REALLOC_SIZED: support allocators with no realloc support + avoid race-condition in crc initialization + minor compile issues + 1.00 (2015-09-14) + installable file IO function + 0.99 (2015-09-13) + warning fixes; TGA rle support + 0.98 (2015-04-08) + added STBIW_MALLOC, STBIW_ASSERT etc + 0.97 (2015-01-18) + fixed HDR asserts, rewrote HDR rle logic + 0.96 (2015-01-17) + add HDR output + fix monochrome BMP + 0.95 (2014-08-17) + add monochrome TGA output + 0.94 (2014-05-31) + rename private functions to avoid conflicts with stb_image.h + 0.93 (2014-05-27) + warning fixes + 0.92 (2010-08-01) + casts to unsigned char to fix warnings + 0.91 (2010-07-17) + first public release + 0.90 first internal release +*/ + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/tree.c b/workloads/realworld/uvm_prefetch_async/darknet/src/tree.c new file mode 100644 index 0000000000000000000000000000000000000000..67b6d431f6f7e92ede234c71ecae9bd9146dc71f --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/tree.c @@ -0,0 +1,139 @@ +#include +#include +#include "tree.h" +#include "utils.h" +#include "data.h" + +void change_leaves(tree *t, char *leaf_list) +{ + list *llist = get_paths(leaf_list); + char **leaves = (char **)list_to_array(llist); + int n = llist->size; + int i,j; + int found = 0; + for(i = 0; i < t->n; ++i){ + t->leaf[i] = 0; + for(j = 0; j < n; ++j){ + if (0==strcmp(t->name[i], leaves[j])){ + t->leaf[i] = 1; + ++found; + break; + } + } + } + fprintf(stderr, "Found %d leaves.\n", found); +} + +float get_hierarchy_probability(float *x, tree *hier, int c, int stride) +{ + float p = 1; + while(c >= 0){ + p = p * x[c*stride]; + c = hier->parent[c]; + } + return p; +} + +void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride) +{ + int j; + for(j = 0; j < n; ++j){ + int parent = hier->parent[j]; + if(parent >= 0){ + predictions[j*stride] *= predictions[parent*stride]; + } + } + if(only_leaves){ + for(j = 0; j < n; ++j){ + if(!hier->leaf[j]) predictions[j*stride] = 0; + } + } +} + +int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride) +{ + float p = 1; + int group = 0; + int i; + while(1){ + float max = 0; + int max_i = 0; + + for(i = 0; i < hier->group_size[group]; ++i){ + int index = i + hier->group_offset[group]; + float val = predictions[(i + hier->group_offset[group])*stride]; + if(val > max){ + max_i = index; + max = val; + } + } + if(p*max > thresh){ + p = p*max; + group = hier->child[max_i]; + if(hier->child[max_i] < 0) return max_i; + } else if (group == 0){ + return max_i; + } else { + return hier->parent[hier->group_offset[group]]; + } + } + return 0; +} + +tree *read_tree(char *filename) +{ + tree t = {0}; + FILE *fp = fopen(filename, "r"); + + char *line; + int last_parent = -1; + int group_size = 0; + int groups = 0; + int n = 0; + while((line=fgetl(fp)) != 0){ + char *id = calloc(256, sizeof(char)); + int parent = -1; + sscanf(line, "%s %d", id, &parent); + t.parent = realloc(t.parent, (n+1)*sizeof(int)); + t.parent[n] = parent; + + t.child = realloc(t.child, (n+1)*sizeof(int)); + t.child[n] = -1; + + t.name = realloc(t.name, (n+1)*sizeof(char *)); + t.name[n] = id; + if(parent != last_parent){ + ++groups; + t.group_offset = realloc(t.group_offset, groups * sizeof(int)); + t.group_offset[groups - 1] = n - group_size; + t.group_size = realloc(t.group_size, groups * sizeof(int)); + t.group_size[groups - 1] = group_size; + group_size = 0; + last_parent = parent; + } + t.group = realloc(t.group, (n+1)*sizeof(int)); + t.group[n] = groups; + if (parent >= 0) { + t.child[parent] = groups; + } + ++n; + ++group_size; + } + ++groups; + t.group_offset = realloc(t.group_offset, groups * sizeof(int)); + t.group_offset[groups - 1] = n - group_size; + t.group_size = realloc(t.group_size, groups * sizeof(int)); + t.group_size[groups - 1] = group_size; + t.n = n; + t.groups = groups; + t.leaf = calloc(n, sizeof(int)); + int i; + for(i = 0; i < n; ++i) t.leaf[i] = 1; + for(i = 0; i < n; ++i) if(t.parent[i] >= 0) t.leaf[t.parent[i]] = 0; + + fclose(fp); + tree *tree_ptr = calloc(1, sizeof(tree)); + *tree_ptr = t; + //error(0); + return tree_ptr; +} diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/tree.h b/workloads/realworld/uvm_prefetch_async/darknet/src/tree.h new file mode 100644 index 0000000000000000000000000000000000000000..3802b8ead806266edd291de5407b08c2d7ed5dd1 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/tree.h @@ -0,0 +1,8 @@ +#ifndef TREE_H +#define TREE_H +#include "darknet.h" + +int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride); +float get_hierarchy_probability(float *x, tree *hier, int c, int stride); + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/upsample_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/upsample_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..83f32ea5f41b4c787c38e5324e3e7dd4909ca928 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/upsample_layer.c @@ -0,0 +1,106 @@ +#include "upsample_layer.h" +#include "cuda_dark.h" +#include "blas.h" + +#include + +layer make_upsample_layer(int batch, int w, int h, int c, int stride) +{ + layer l = {0}; + l.type = UPSAMPLE; + l.batch = batch; + l.w = w; + l.h = h; + l.c = c; + l.out_w = w*stride; + l.out_h = h*stride; + l.out_c = c; + if(stride < 0){ + stride = -stride; + l.reverse=1; + l.out_w = w/stride; + l.out_h = h/stride; + } + l.stride = stride; + l.outputs = l.out_w*l.out_h*l.out_c; + l.inputs = l.w*l.h*l.c; + l.delta = calloc(l.outputs*batch, sizeof(float)); + l.output = calloc(l.outputs*batch, sizeof(float));; + + l.forward = forward_upsample_layer; + l.backward = backward_upsample_layer; + #ifdef GPU + l.forward_gpu = forward_upsample_layer_gpu; + l.backward_gpu = backward_upsample_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + #endif + if(l.reverse) fprintf(stderr, "downsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + else fprintf(stderr, "upsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); + return l; +} + +void resize_upsample_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + l->out_w = w*l->stride; + l->out_h = h*l->stride; + if(l->reverse){ + l->out_w = w/l->stride; + l->out_h = h/l->stride; + } + l->outputs = l->out_w*l->out_h*l->out_c; + l->inputs = l->h*l->w*l->c; + l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); + l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); + +#ifdef GPU + cuda_free(l->output_gpu); + cuda_free(l->delta_gpu); + l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); +#endif + +} + +void forward_upsample_layer(const layer l, network net) +{ + fill_cpu(l.outputs*l.batch, 0, l.output, 1); + if(l.reverse){ + upsample_cpu(l.output, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input); + }else{ + upsample_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output); + } +} + +void backward_upsample_layer(const layer l, network net) +{ + if(l.reverse){ + upsample_cpu(l.delta, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, net.delta); + }else{ + upsample_cpu(net.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta); + } +} + +#ifdef GPU +void forward_upsample_layer_gpu(const layer l, network net) +{ + fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); + if(l.reverse){ + upsample_gpu(l.output_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input_gpu); + }else{ + upsample_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output_gpu); + } +} + +void backward_upsample_layer_gpu(const layer l, network net) +{ + if(l.reverse){ + upsample_gpu(l.delta_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, net.delta_gpu); + }else{ + upsample_gpu(net.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta_gpu); + } +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/upsample_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/upsample_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..86790d1088354ea9c46a4b20fbe1dacf36925ca8 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/upsample_layer.h @@ -0,0 +1,15 @@ +#ifndef UPSAMPLE_LAYER_H +#define UPSAMPLE_LAYER_H +#include "darknet.h" + +layer make_upsample_layer(int batch, int w, int h, int c, int stride); +void forward_upsample_layer(const layer l, network net); +void backward_upsample_layer(const layer l, network net); +void resize_upsample_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_upsample_layer_gpu(const layer l, network net); +void backward_upsample_layer_gpu(const layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/utils.c b/workloads/realworld/uvm_prefetch_async/darknet/src/utils.c new file mode 100644 index 0000000000000000000000000000000000000000..626b4678c1e2779552ed9d34f19ce4b0f57d9ded --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/utils.c @@ -0,0 +1,726 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + + +/* +// old timing. is it better? who knows!! +double get_wall_time() +{ + struct timeval time; + if (gettimeofday(&time,NULL)){ + return 0; + } + return (double)time.tv_sec + (double)time.tv_usec * .000001; +} +*/ + +double what_time_is_it_now() +{ + struct timeval time; + if (gettimeofday(&time,NULL)){ + return 0; + } + return (double)time.tv_sec + (double)time.tv_usec * .000001; +} + +int *read_intlist(char *gpu_list, int *ngpus, int d) +{ + int *gpus = 0; + if(gpu_list){ + int len = strlen(gpu_list); + *ngpus = 1; + int i; + for(i = 0; i < len; ++i){ + if (gpu_list[i] == ',') ++*ngpus; + } + gpus = calloc(*ngpus, sizeof(int)); + for(i = 0; i < *ngpus; ++i){ + gpus[i] = atoi(gpu_list); + gpu_list = strchr(gpu_list, ',')+1; + } + } else { + gpus = calloc(1, sizeof(float)); + *gpus = d; + *ngpus = 1; + } + return gpus; +} + +int *read_map(char *filename) +{ + int n = 0; + int *map = 0; + char *str; + FILE *file = fopen(filename, "r"); + if(!file) file_error(filename); + while((str=fgetl(file))){ + ++n; + map = realloc(map, n*sizeof(int)); + map[n-1] = atoi(str); + } + return map; +} + +void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections) +{ + size_t i; + for(i = 0; i < sections; ++i){ + size_t start = n*i/sections; + size_t end = n*(i+1)/sections; + size_t num = end-start; + shuffle(arr+(start*size), num, size); + } +} + +void shuffle(void *arr, size_t n, size_t size) +{ + size_t i; + void *swp = calloc(1, size); + for(i = 0; i < n-1; ++i){ + size_t j = i + rand()/(RAND_MAX / (n-i)+1); + memcpy(swp, arr+(j*size), size); + memcpy(arr+(j*size), arr+(i*size), size); + memcpy(arr+(i*size), swp, size); + } +} + +int *random_index_order(int min, int max) +{ + int *inds = calloc(max-min, sizeof(int)); + int i; + for(i = min; i < max; ++i){ + inds[i] = i; + } + for(i = min; i < max-1; ++i){ + int swap = inds[i]; + int index = i + rand()%(max-i); + inds[i] = inds[index]; + inds[index] = swap; + } + return inds; +} + +void del_arg(int argc, char **argv, int index) +{ + int i; + for(i = index; i < argc-1; ++i) argv[i] = argv[i+1]; + argv[i] = 0; +} + +int find_arg(int argc, char* argv[], char *arg) +{ + int i; + for(i = 0; i < argc; ++i) { + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)) { + del_arg(argc, argv, i); + return 1; + } + } + return 0; +} + +int find_int_arg(int argc, char **argv, char *arg, int def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = atoi(argv[i+1]); + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + +float find_float_arg(int argc, char **argv, char *arg, float def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = atof(argv[i+1]); + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + +char *find_char_arg(int argc, char **argv, char *arg, char *def) +{ + int i; + for(i = 0; i < argc-1; ++i){ + if(!argv[i]) continue; + if(0==strcmp(argv[i], arg)){ + def = argv[i+1]; + del_arg(argc, argv, i); + del_arg(argc, argv, i); + break; + } + } + return def; +} + + +char *basecfg(char *cfgfile) +{ + char *c = cfgfile; + char *next; + while((next = strchr(c, '/'))) + { + c = next+1; + } + c = copy_string(c); + next = strchr(c, '.'); + if (next) *next = 0; + return c; +} + +int alphanum_to_int(char c) +{ + return (c < 58) ? c - 48 : c-87; +} +char int_to_alphanum(int i) +{ + if (i == 36) return '.'; + return (i < 10) ? i + 48 : i + 87; +} + +void pm(int M, int N, float *A) +{ + int i,j; + for(i =0 ; i < M; ++i){ + printf("%d ", i+1); + for(j = 0; j < N; ++j){ + printf("%2.4f, ", A[i*N+j]); + } + printf("\n"); + } + printf("\n"); +} + +void find_replace(char *str, char *orig, char *rep, char *output) +{ + char buffer[4096] = {0}; + char *p; + + sprintf(buffer, "%s", str); + if(!(p = strstr(buffer, orig))){ // Is 'orig' even in 'str'? + sprintf(output, "%s", str); + return; + } + + *p = '\0'; + + sprintf(output, "%s%s%s", buffer, rep, p+strlen(orig)); +} + +float sec(clock_t clocks) +{ + return (float)clocks/CLOCKS_PER_SEC; +} + +void top_k(float *a, int n, int k, int *index) +{ + int i,j; + for(j = 0; j < k; ++j) index[j] = -1; + for(i = 0; i < n; ++i){ + int curr = i; + for(j = 0; j < k; ++j){ + if((index[j] < 0) || a[curr] > a[index[j]]){ + int swap = curr; + curr = index[j]; + index[j] = swap; + } + } + } +} + +void error(const char *s) +{ + perror(s); + assert(0); + exit(-1); +} + +unsigned char *read_file(char *filename) +{ + FILE *fp = fopen(filename, "rb"); + size_t size; + + fseek(fp, 0, SEEK_END); + size = ftell(fp); + fseek(fp, 0, SEEK_SET); + + unsigned char *text = calloc(size+1, sizeof(char)); + fread(text, 1, size, fp); + fclose(fp); + return text; +} + +void malloc_error() +{ + fprintf(stderr, "Malloc error\n"); + exit(-1); +} + +void file_error(char *s) +{ + fprintf(stderr, "Couldn't open file: %s\n", s); + exit(0); +} + +list *split_str(char *s, char delim) +{ + size_t i; + size_t len = strlen(s); + list *l = make_list(); + list_insert(l, s); + for(i = 0; i < len; ++i){ + if(s[i] == delim){ + s[i] = '\0'; + list_insert(l, &(s[i+1])); + } + } + return l; +} + +void strip(char *s) +{ + size_t i; + size_t len = strlen(s); + size_t offset = 0; + for(i = 0; i < len; ++i){ + char c = s[i]; + if(c==' '||c=='\t'||c=='\n') ++offset; + else s[i-offset] = c; + } + s[len-offset] = '\0'; +} + +void strip_char(char *s, char bad) +{ + size_t i; + size_t len = strlen(s); + size_t offset = 0; + for(i = 0; i < len; ++i){ + char c = s[i]; + if(c==bad) ++offset; + else s[i-offset] = c; + } + s[len-offset] = '\0'; +} + +void free_ptrs(void **ptrs, int n) +{ + int i; + for(i = 0; i < n; ++i) free(ptrs[i]); + free(ptrs); +} + +char *fgetl(FILE *fp) +{ + if(feof(fp)) return 0; + size_t size = 512; + char *line = malloc(size*sizeof(char)); + if(!fgets(line, size, fp)){ + free(line); + return 0; + } + + size_t curr = strlen(line); + + while((line[curr-1] != '\n') && !feof(fp)){ + if(curr == size-1){ + size *= 2; + line = realloc(line, size*sizeof(char)); + if(!line) { + printf("%ld\n", size); + malloc_error(); + } + } + size_t readsize = size-curr; + if(readsize > INT_MAX) readsize = INT_MAX-1; + fgets(&line[curr], readsize, fp); + curr = strlen(line); + } + if(line[curr-1] == '\n') line[curr-1] = '\0'; + + return line; +} + +int read_int(int fd) +{ + int n = 0; + int next = read(fd, &n, sizeof(int)); + if(next <= 0) return -1; + return n; +} + +void write_int(int fd, int n) +{ + int next = write(fd, &n, sizeof(int)); + if(next <= 0) error("read failed"); +} + +int read_all_fail(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + int next = read(fd, buffer + n, bytes-n); + if(next <= 0) return 1; + n += next; + } + return 0; +} + +int write_all_fail(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + size_t next = write(fd, buffer + n, bytes-n); + if(next <= 0) return 1; + n += next; + } + return 0; +} + +void read_all(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + int next = read(fd, buffer + n, bytes-n); + if(next <= 0) error("read failed"); + n += next; + } +} + +void write_all(int fd, char *buffer, size_t bytes) +{ + size_t n = 0; + while(n < bytes){ + size_t next = write(fd, buffer + n, bytes-n); + if(next <= 0) error("write failed"); + n += next; + } +} + + +char *copy_string(char *s) +{ + char *copy = malloc(strlen(s)+1); + strncpy(copy, s, strlen(s)+1); + return copy; +} + +list *parse_csv_line(char *line) +{ + list *l = make_list(); + char *c, *p; + int in = 0; + for(c = line, p = line; *c != '\0'; ++c){ + if(*c == '"') in = !in; + else if(*c == ',' && !in){ + *c = '\0'; + list_insert(l, copy_string(p)); + p = c+1; + } + } + list_insert(l, copy_string(p)); + return l; +} + +int count_fields(char *line) +{ + int count = 0; + int done = 0; + char *c; + for(c = line; !done; ++c){ + done = (*c == '\0'); + if(*c == ',' || done) ++count; + } + return count; +} + +float *parse_fields(char *line, int n) +{ + float *field = calloc(n, sizeof(float)); + char *c, *p, *end; + int count = 0; + int done = 0; + for(c = line, p = line; !done; ++c){ + done = (*c == '\0'); + if(*c == ',' || done){ + *c = '\0'; + field[count] = strtod(p, &end); + if(p == c) field[count] = nan(""); + if(end != c && (end != c-1 || *end != '\r')) field[count] = nan(""); //DOS file formats! + p = c+1; + ++count; + } + } + return field; +} + +float sum_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i) sum += a[i]; + return sum; +} + +float mean_array(float *a, int n) +{ + return sum_array(a,n)/n; +} + +void mean_arrays(float **a, int n, int els, float *avg) +{ + int i; + int j; + memset(avg, 0, els*sizeof(float)); + for(j = 0; j < n; ++j){ + for(i = 0; i < els; ++i){ + avg[i] += a[j][i]; + } + } + for(i = 0; i < els; ++i){ + avg[i] /= n; + } +} + +void print_statistics(float *a, int n) +{ + float m = mean_array(a, n); + float v = variance_array(a, n); + printf("MSE: %.6f, Mean: %.6f, Variance: %.6f\n", mse_array(a, n), m, v); +} + +float variance_array(float *a, int n) +{ + int i; + float sum = 0; + float mean = mean_array(a, n); + for(i = 0; i < n; ++i) sum += (a[i] - mean)*(a[i]-mean); + float variance = sum/n; + return variance; +} + +int constrain_int(int a, int min, int max) +{ + if (a < min) return min; + if (a > max) return max; + return a; +} + +float constrain(float min, float max, float a) +{ + if (a < min) return min; + if (a > max) return max; + return a; +} + +float dist_array(float *a, float *b, int n, int sub) +{ + int i; + float sum = 0; + for(i = 0; i < n; i += sub) sum += pow(a[i]-b[i], 2); + return sqrt(sum); +} + +float mse_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i) sum += a[i]*a[i]; + return sqrt(sum/n); +} + +void normalize_array(float *a, int n) +{ + int i; + float mu = mean_array(a,n); + float sigma = sqrt(variance_array(a,n)); + for(i = 0; i < n; ++i){ + a[i] = (a[i] - mu)/sigma; + } + mu = mean_array(a,n); + sigma = sqrt(variance_array(a,n)); +} + +void translate_array(float *a, int n, float s) +{ + int i; + for(i = 0; i < n; ++i){ + a[i] += s; + } +} + +float mag_array(float *a, int n) +{ + int i; + float sum = 0; + for(i = 0; i < n; ++i){ + sum += a[i]*a[i]; + } + return sqrt(sum); +} + +void scale_array(float *a, int n, float s) +{ + int i; + for(i = 0; i < n; ++i){ + a[i] *= s; + } +} + +int sample_array(float *a, int n) +{ + float sum = sum_array(a, n); + scale_array(a, n, 1./sum); + float r = rand_uniform(0, 1); + int i; + for(i = 0; i < n; ++i){ + r = r - a[i]; + if (r <= 0) return i; + } + return n-1; +} + +int max_int_index(int *a, int n) +{ + if(n <= 0) return -1; + int i, max_i = 0; + int max = a[0]; + for(i = 1; i < n; ++i){ + if(a[i] > max){ + max = a[i]; + max_i = i; + } + } + return max_i; +} + +int max_index(float *a, int n) +{ + if(n <= 0) return -1; + int i, max_i = 0; + float max = a[0]; + for(i = 1; i < n; ++i){ + if(a[i] > max){ + max = a[i]; + max_i = i; + } + } + return max_i; +} + +int int_index(int *a, int val, int n) +{ + int i; + for(i = 0; i < n; ++i){ + if(a[i] == val) return i; + } + return -1; +} + +int rand_int(int min, int max) +{ + if (max < min){ + int s = min; + min = max; + max = s; + } + int r = (rand()%(max - min + 1)) + min; + return r; +} + +// From http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform +float rand_normal() +{ + static int haveSpare = 0; + static double rand1, rand2; + + if(haveSpare) + { + haveSpare = 0; + return sqrt(rand1) * sin(rand2); + } + + haveSpare = 1; + + rand1 = rand() / ((double) RAND_MAX); + if(rand1 < 1e-100) rand1 = 1e-100; + rand1 = -2 * log(rand1); + rand2 = (rand() / ((double) RAND_MAX)) * TWO_PI; + + return sqrt(rand1) * cos(rand2); +} + +/* + float rand_normal() + { + int n = 12; + int i; + float sum= 0; + for(i = 0; i < n; ++i) sum += (float)rand()/RAND_MAX; + return sum-n/2.; + } + */ + +size_t rand_size_t() +{ + return ((size_t)(rand()&0xff) << 56) | + ((size_t)(rand()&0xff) << 48) | + ((size_t)(rand()&0xff) << 40) | + ((size_t)(rand()&0xff) << 32) | + ((size_t)(rand()&0xff) << 24) | + ((size_t)(rand()&0xff) << 16) | + ((size_t)(rand()&0xff) << 8) | + ((size_t)(rand()&0xff) << 0); +} + +float rand_uniform(float min, float max) +{ + if(max < min){ + float swap = min; + min = max; + max = swap; + } + return ((float)rand()/RAND_MAX * (max - min)) + min; +} + +float rand_scale(float s) +{ + float scale = rand_uniform(1, s); + if(rand()%2) return scale; + return 1./scale; +} + +float **one_hot_encode(float *a, int n, int k) +{ + int i; + float **t = calloc(n, sizeof(float*)); + for(i = 0; i < n; ++i){ + t[i] = calloc(k, sizeof(float)); + int index = (int)a[i]; + t[i][index] = 1; + } + return t; +} + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/utils.h b/workloads/realworld/uvm_prefetch_async/darknet/src/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..ef24da79888612f5b48fbb4dc233c483590e0c34 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/utils.h @@ -0,0 +1,53 @@ +#ifndef UTILS_H +#define UTILS_H +#include +#include +#include "darknet.h" +#include "list.h" + +#define TIME(a) \ + do { \ + double start = what_time_is_it_now(); \ + a; \ + printf("%s took: %f seconds\n", #a, what_time_is_it_now() - start); \ + } while (0) + +#define TWO_PI 6.2831853071795864769252866f + +double what_time_is_it_now(); +void shuffle(void *arr, size_t n, size_t size); +void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections); +void free_ptrs(void **ptrs, int n); +int alphanum_to_int(char c); +char int_to_alphanum(int i); +int read_int(int fd); +void write_int(int fd, int n); +void read_all(int fd, char *buffer, size_t bytes); +void write_all(int fd, char *buffer, size_t bytes); +int read_all_fail(int fd, char *buffer, size_t bytes); +int write_all_fail(int fd, char *buffer, size_t bytes); +void find_replace(char *str, char *orig, char *rep, char *output); +void malloc_error(); +void file_error(char *s); +void strip(char *s); +void strip_char(char *s, char bad); +list *split_str(char *s, char delim); +char *fgetl(FILE *fp); +list *parse_csv_line(char *line); +char *copy_string(char *s); +int count_fields(char *line); +float *parse_fields(char *line, int n); +void translate_array(float *a, int n, float s); +float constrain(float min, float max, float a); +int constrain_int(int a, int min, int max); +float rand_scale(float s); +int rand_int(int min, int max); +void mean_arrays(float **a, int n, int els, float *avg); +float dist_array(float *a, float *b, int n, int sub); +float **one_hot_encode(float *a, int n, int k); +float sec(clock_t clocks); +void print_statistics(float *a, int n); +int int_index(int *a, int val, int n); + +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/yolo_layer.c b/workloads/realworld/uvm_prefetch_async/darknet/src/yolo_layer.c new file mode 100644 index 0000000000000000000000000000000000000000..049a4d6a92cf7fea667b8de2340822834408bb05 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/yolo_layer.c @@ -0,0 +1,374 @@ +#include "yolo_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "cuda_dark.h" +#include "utils.h" + +#include +#include +#include +#include + +layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes) +{ + int i; + layer l = {0}; + l.type = YOLO; + + l.n = n; + l.total = total; + l.batch = batch; + l.h = h; + l.w = w; + l.c = n*(classes + 4 + 1); + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.cost = calloc(1, sizeof(float)); + l.biases = calloc(total*2, sizeof(float)); + if(mask) l.mask = mask; + else{ + l.mask = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + l.mask[i] = i; + } + } + l.bias_updates = calloc(n*2, sizeof(float)); + l.outputs = h*w*n*(classes + 4 + 1); + l.inputs = l.outputs; + l.truths = 90*(4 + 1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + for(i = 0; i < total*2; ++i){ + l.biases[i] = .5; + } + + l.forward = forward_yolo_layer; + l.backward = backward_yolo_layer; +#ifdef GPU + l.forward_gpu = forward_yolo_layer_gpu; + l.backward_gpu = backward_yolo_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "yolo\n"); + srand(0); + + return l; +} + +void resize_yolo_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->n*(l->classes + 4 + 1); + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride) +{ + box b; + b.x = (i + x[index + 0*stride]) / lw; + b.y = (j + x[index + 1*stride]) / lh; + b.w = exp(x[index + 2*stride]) * biases[2*n] / w; + b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h; + return b; +} + +float delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride) +{ + box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride); + float iou = box_iou(pred, truth); + + float tx = (truth.x*lw - i); + float ty = (truth.y*lh - j); + float tw = log(truth.w*w / biases[2*n]); + float th = log(truth.h*h / biases[2*n + 1]); + + delta[index + 0*stride] = scale * (tx - x[index + 0*stride]); + delta[index + 1*stride] = scale * (ty - x[index + 1*stride]); + delta[index + 2*stride] = scale * (tw - x[index + 2*stride]); + delta[index + 3*stride] = scale * (th - x[index + 3*stride]); + return iou; +} + + +void delta_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat) +{ + int n; + if (delta[index]){ + delta[index + stride*class] = 1 - output[index + stride*class]; + if(avg_cat) *avg_cat += output[index + stride*class]; + return; + } + for(n = 0; n < classes; ++n){ + delta[index + stride*n] = ((n == class)?1 : 0) - output[index + stride*n]; + if(n == class && avg_cat) *avg_cat += output[index + stride*n]; + } +} + +static int entry_index(layer l, int batch, int location, int entry) +{ + int n = location / (l.w*l.h); + int loc = location % (l.w*l.h); + return batch*l.outputs + n*l.w*l.h*(4+l.classes+1) + entry*l.w*l.h + loc; +} + +void forward_yolo_layer(const layer l, network net) +{ + int i,j,b,t,n; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array(l.output + index, (1+l.classes)*l.w*l.h, LOGISTIC); + } + } +#endif + + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + if(!net.train) return; + float avg_iou = 0; + float recall = 0; + float recall75 = 0; + float avg_cat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + int class_count = 0; + *(l.cost) = 0; + for (b = 0; b < l.batch; ++b) { + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.w*l.h); + float best_iou = 0; + int best_t = 0; + for(t = 0; t < l.max_boxes; ++t){ + box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1); + if(!truth.x) break; + float iou = box_iou(pred, truth); + if (iou > best_iou) { + best_iou = iou; + best_t = t; + } + } + int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4); + avg_anyobj += l.output[obj_index]; + l.delta[obj_index] = 0 - l.output[obj_index]; + if (best_iou > l.ignore_thresh) { + l.delta[obj_index] = 0; + } + if (best_iou > l.truth_thresh) { + l.delta[obj_index] = 1 - l.output[obj_index]; + + int class = net.truth[best_t*(4 + 1) + b*l.truths + 4]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); + delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, 0); + box truth = float_to_box(net.truth + best_t*(4 + 1) + b*l.truths, 1); + delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + } + } + } + } + for(t = 0; t < l.max_boxes; ++t){ + box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1); + + if(!truth.x) break; + float best_iou = 0; + int best_n = 0; + i = (truth.x * l.w); + j = (truth.y * l.h); + box truth_shift = truth; + truth_shift.x = truth_shift.y = 0; + for(n = 0; n < l.total; ++n){ + box pred = {0}; + pred.w = l.biases[2*n]/net.w; + pred.h = l.biases[2*n+1]/net.h; + float iou = box_iou(pred, truth_shift); + if (iou > best_iou){ + best_iou = iou; + best_n = n; + } + } + + int mask_n = int_index(l.mask, best_n, l.n); + if(mask_n >= 0){ + int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); + float iou = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + + int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4); + avg_obj += l.output[obj_index]; + l.delta[obj_index] = 1 - l.output[obj_index]; + + int class = net.truth[t*(4 + 1) + b*l.truths + 4]; + if (l.map) class = l.map[class]; + int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4 + 1); + delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, &avg_cat); + + ++count; + ++class_count; + if(iou > .5) recall += 1; + if(iou > .75) recall75 += 1; + avg_iou += iou; + } + } + } + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", net.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count); +} + +void backward_yolo_layer(const layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +{ + int i; + int new_w=0; + int new_h=0; + if (((float)netw/w) < ((float)neth/h)) { + new_w = netw; + new_h = (h * netw)/w; + } else { + new_h = neth; + new_w = (w * neth)/h; + } + for (i = 0; i < n; ++i){ + box b = dets[i].bbox; + b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); + b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); + b.w *= (float)netw/new_w; + b.h *= (float)neth/new_h; + if(!relative){ + b.x *= w; + b.w *= w; + b.y *= h; + b.h *= h; + } + dets[i].bbox = b; + } +} + +int yolo_num_detections(layer l, float thresh) +{ + int i, n; + int count = 0; + for (i = 0; i < l.w*l.h; ++i){ + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); + if(l.output[obj_index] > thresh){ + ++count; + } + } + } + return count; +} + +void avg_flipped_yolo(layer l) +{ + int i,j,n,z; + float *flip = l.output + l.outputs; + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w/2; ++i) { + for (n = 0; n < l.n; ++n) { + for(z = 0; z < l.classes + 4 + 1; ++z){ + int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; + int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); + float swap = flip[i1]; + flip[i1] = flip[i2]; + flip[i2] = swap; + if(z == 0){ + flip[i1] = -flip[i1]; + flip[i2] = -flip[i2]; + } + } + } + } + } + for(i = 0; i < l.outputs; ++i){ + l.output[i] = (l.output[i] + flip[i])/2.; + } +} + +int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets) +{ + int i,j,n; + float *predictions = l.output; + if (l.batch == 2) avg_flipped_yolo(l); + int count = 0; + for (i = 0; i < l.w*l.h; ++i){ + int row = i / l.w; + int col = i % l.w; + for(n = 0; n < l.n; ++n){ + int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); + float objectness = predictions[obj_index]; + if(objectness <= thresh) continue; + int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); + dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); + dets[count].objectness = objectness; + dets[count].classes = l.classes; + for(j = 0; j < l.classes; ++j){ + int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j); + float prob = objectness*predictions[class_index]; + dets[count].prob[j] = (prob > thresh) ? prob : 0; + } + ++count; + } + } + correct_yolo_boxes(dets, count, w, h, netw, neth, relative); + return count; +} + +#ifdef GPU + +void forward_yolo_layer_gpu(const layer l, network net) +{ + copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b, n; + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + int index = entry_index(l, b, n*l.w*l.h, 0); + activate_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + index = entry_index(l, b, n*l.w*l.h, 4); + activate_array_gpu(l.output_gpu + index, (1+l.classes)*l.w*l.h, LOGISTIC); + } + } + if(!net.train || l.onlyforward){ + cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); + return; + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_yolo_layer(l, net); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_yolo_layer_gpu(const layer l, network net) +{ + axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif + diff --git a/workloads/realworld/uvm_prefetch_async/darknet/src/yolo_layer.h b/workloads/realworld/uvm_prefetch_async/darknet/src/yolo_layer.h new file mode 100644 index 0000000000000000000000000000000000000000..d2a0243268146e00ebff2b4b11bce23f830689d1 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/src/yolo_layer.h @@ -0,0 +1,19 @@ +#ifndef YOLO_LAYER_H +#define YOLO_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes); +void forward_yolo_layer(const layer l, network net); +void backward_yolo_layer(const layer l, network net); +void resize_yolo_layer(layer *l, int w, int h); +int yolo_num_detections(layer l, float thresh); + +#ifdef GPU +void forward_yolo_layer_gpu(const layer l, network net); +void backward_yolo_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/darknet/yolov3-tiny/predictions.jpg b/workloads/realworld/uvm_prefetch_async/darknet/yolov3-tiny/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9ac52ad293314b2654fa1bb577fc1a3249e51228 Binary files /dev/null and b/workloads/realworld/uvm_prefetch_async/darknet/yolov3-tiny/predictions.jpg differ diff --git a/workloads/realworld/uvm_prefetch_async/darknet/yolov3-tiny/run_super.sh b/workloads/realworld/uvm_prefetch_async/darknet/yolov3-tiny/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..d890a7c581dc59167fab1b93778d143fc7e679a9 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/yolov3-tiny/run_super.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm_prefetch_async/darknet/yolov3-tiny/run_yolov3-tiny.sh b/workloads/realworld/uvm_prefetch_async/darknet/yolov3-tiny/run_yolov3-tiny.sh new file mode 100755 index 0000000000000000000000000000000000000000..d890a7c581dc59167fab1b93778d143fc7e679a9 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/yolov3-tiny/run_yolov3-tiny.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm_prefetch_async/darknet/yolov3-tiny_b/run_super.sh b/workloads/realworld/uvm_prefetch_async/darknet/yolov3-tiny_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..31669e62fb94142e7dc24b3f905c8f1d25950367 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/yolov3-tiny_b/run_super.sh @@ -0,0 +1,2 @@ +#../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg +../darknet detector infer ../cfg/coco.data ../cfg/yolov3-tiny_b.cfg diff --git a/workloads/realworld/uvm_prefetch_async/darknet/yolov3-tiny_t/run_super.sh b/workloads/realworld/uvm_prefetch_async/darknet/yolov3-tiny_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..6cc56bc601476fa212f615b5aec964f12e044473 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/yolov3-tiny_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg +../darknet detector train ../cfg/coco.data ../cfg/yolov3-tiny_t.cfg diff --git a/workloads/realworld/uvm_prefetch_async/darknet/yolov3/predictions.jpg b/workloads/realworld/uvm_prefetch_async/darknet/yolov3/predictions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f54b422e8b61dce98f8448474dbd38cca87c1cb4 Binary files /dev/null and b/workloads/realworld/uvm_prefetch_async/darknet/yolov3/predictions.jpg differ diff --git a/workloads/realworld/uvm_prefetch_async/darknet/yolov3/run_super.sh b/workloads/realworld/uvm_prefetch_async/darknet/yolov3/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..440a4b456a80a434243dffa9614d5a501790990f --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/yolov3/run_super.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm_prefetch_async/darknet/yolov3/run_yolov3.sh b/workloads/realworld/uvm_prefetch_async/darknet/yolov3/run_yolov3.sh new file mode 100755 index 0000000000000000000000000000000000000000..440a4b456a80a434243dffa9614d5a501790990f --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/yolov3/run_yolov3.sh @@ -0,0 +1 @@ +../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg diff --git a/workloads/realworld/uvm_prefetch_async/darknet/yolov3_b/run_super.sh b/workloads/realworld/uvm_prefetch_async/darknet/yolov3_b/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..913790c1ee53a0d442a89306fbd8bda93faa2581 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/yolov3_b/run_super.sh @@ -0,0 +1,2 @@ +#../darknet detect ../cfg/yolov3.cfg ../../../../../data/darknet/yolov3.weights ../data/dog.jpg +../darknet detector infer ../cfg/coco.data ../cfg/yolov3_b.cfg diff --git a/workloads/realworld/uvm_prefetch_async/darknet/yolov3_t/run_super.sh b/workloads/realworld/uvm_prefetch_async/darknet/yolov3_t/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ee7df07c1a4172f25c1129d8027095d2e3861e28 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/darknet/yolov3_t/run_super.sh @@ -0,0 +1,2 @@ +# ../darknet detect ../cfg/yolov3-tiny.cfg ../../../../../data/darknet/yolov3-tiny.weights ../data/dog.jpg +../darknet detector train ../cfg/coco.data ../cfg/yolov3_t.cfg diff --git a/workloads/realworld/uvm_prefetch_async/hotspot/Makefile b/workloads/realworld/uvm_prefetch_async/hotspot/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..ca348f25bcda1ae4de926a3b112cd792a7848251 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/hotspot/Makefile @@ -0,0 +1,25 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include + +SRC = hotspot.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = hotspot + +release: $(SRC) + $(CC) $(KERNEL_DIM) $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +enum: $(SRC) + $(CC) $(KERNEL_DIM) -deviceemu $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +debug: $(SRC) + $(CC) $(KERNEL_DIM) -g $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +debugenum: $(SRC) + $(CC) $(KERNEL_DIM) -g -deviceemu $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt + diff --git a/workloads/realworld/uvm_prefetch_async/hotspot/Makefile_nvidia b/workloads/realworld/uvm_prefetch_async/hotspot/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..1f7ae25a90c968563e96208c693b927b6765490a --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/hotspot/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := hotspot +# CUDA source files (compiled with cudacc) +CUFILES := hotspot.cu +# CUDA dependency files +# CU_DEPS := needle_kernel.cu +# C/C++ source files (compiled with gcc / c++) +# CCFILES := + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/uvm_prefetch_async/hotspot/README b/workloads/realworld/uvm_prefetch_async/hotspot/README new file mode 100644 index 0000000000000000000000000000000000000000..f24239abebe938fe3e8d3c1a7a97f915bd09a90b --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/hotspot/README @@ -0,0 +1,8 @@ +******Adjustable work group size***** +The kernel has square shape +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe one dimension +The actually dimension = RD_WG_SIZE_0 * RD_WG_SIZE_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/hotspot/hotspot.cu b/workloads/realworld/uvm_prefetch_async/hotspot/hotspot.cu new file mode 100644 index 0000000000000000000000000000000000000000..2aec900f5f1af1a83bb1281c0380ee79ab1b6e05 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/hotspot/hotspot.cu @@ -0,0 +1,435 @@ +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#ifdef RD_WG_SIZE_0_0 +#define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) +#define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) +#define BLOCK_SIZE RD_WG_SIZE +#else +#define BLOCK_SIZE 16 +#endif + +#define STR_SIZE 256 + +/* maximum power density possible (say 300W for a 10mm x 10mm chip) */ +#define MAX_PD (3.0e6) +/* required precision in degrees */ +#define PRECISION 0.001 +#define SPEC_HEAT_SI 1.75e6 +#define K_SI 100 +/* capacitance fitting factor */ +#define FACTOR_CHIP 0.5 + +/* chip parameters */ +float t_chip = 0.0005; +float chip_height = 0.016; +float chip_width = 0.016; +/* ambient temperature, assuming no package at all */ +float amb_temp = 80.0; + +void run(int argc, char **argv); + +/* define timer macros */ +#define pin_stats_reset() startCycle() +#define pin_stats_pause(cycles) stopCycle(cycles) +#define pin_stats_dump(cycles) printf("timer: %Lu\n", cycles) + +void fatal(char *s) +{ + fprintf(stderr, "error: %s\n", s); +} + +void writeoutput(float *vect, int grid_rows, int grid_cols, char *file) +{ + + int i, j, index = 0; + FILE *fp; + char str[STR_SIZE]; + + if ((fp = fopen(file, "w")) == 0) + printf("The file was not opened\n"); + + for (i = 0; i < grid_rows; i++) + for (j = 0; j < grid_cols; j++) + { + + sprintf(str, "%d\t%g\n", index, vect[i * grid_cols + j]); + fputs(str, fp); + index++; + } + + fclose(fp); +} + +void readinput(float *vect, int grid_rows, int grid_cols, char *file) +{ + + int i, j; + FILE *fp; + char str[STR_SIZE]; + float val; + + if ((fp = fopen(file, "r")) == 0) + printf("The file was not opened\n"); + + for (i = 0; i <= grid_rows - 1; i++) + for (j = 0; j <= grid_cols - 1; j++) + { + fgets(str, STR_SIZE, fp); + if (feof(fp)) + fatal("not enough lines in file"); + // if ((sscanf(str, "%d%f", &index, &val) != 2) || (index != ((i-1)*(grid_cols-2)+j-1))) + if ((sscanf(str, "%f", &val) != 1)) + fatal("invalid file format"); + vect[i * grid_cols + j] = val; + } + + fclose(fp); +} + +#define IN_RANGE(x, min, max) ((x) >= (min) && (x) <= (max)) +#define CLAMP_RANGE(x, min, max) x = (x < (min)) ? min : ((x > (max)) ? max : x) +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +__global__ void calculate_temp(int iteration, // number of iteration + float *power, // power input + float *temp_src, // temperature input/output + float *temp_dst, // temperature input/output + int grid_cols, // Col of grid + int grid_rows, // Row of grid + int border_cols, // border offset + int border_rows, // border offset + float Cap, // Capacitance + float Rx, + float Ry, + float Rz, + float step, + int batch_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + __shared__ float temp_on_cuda[PREFETCH_COUNT][BLOCK_SIZE][BLOCK_SIZE]; + __shared__ float power_on_cuda[PREFETCH_COUNT][BLOCK_SIZE][BLOCK_SIZE]; + __shared__ float temp_t[BLOCK_SIZE][BLOCK_SIZE]; // saving temparary temperature result + + float amb_temp = 80.0; + float step_div_Cap; + float Rx_1, Ry_1, Rz_1; + + // int bx = blockIdx.x; + // int by = blockIdx.y; + + int tx = threadIdx.x; + int ty = threadIdx.y; + + step_div_Cap = step / Cap; + + Rx_1 = 1 / Rx; + Ry_1 = 1 / Ry; + Rz_1 = 1 / Rz; + + // each block finally computes result for a small block + // after N iterations. + // it is the non-overlapping small blocks that cover + // all the input data + + // calculate the small block size + int small_block_rows = BLOCK_SIZE - iteration * 2; // EXPAND_RATE + int small_block_cols = BLOCK_SIZE - iteration * 2; // EXPAND_RATE + + // if (bx == 0 && by == 0 && tx == 0 && ty == 0) + // printf("iteration is %d, small_block_rows is %d\n", iteration, small_block_rows); + + int tile_dim_x = gridDim.x * batch_size; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = batch_size * batch_size; + int tiles_this_block_x = batch_size; + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int fetch = base_tile; + int end_tile = fetch + tiles_this_block; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + // block id + int offset = fetch - base_tile; + int block_id = fetch / tiles_this_block; + int bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + // calculate the boundary for the block according to + // the boundary of its small block + int blkY = small_block_rows * by - border_rows; + int blkX = small_block_cols * bx - border_cols; + int blkYmax = blkY + BLOCK_SIZE - 1; + int blkXmax = blkX + BLOCK_SIZE - 1; + + // calculate the global thread coordination + int yidx = blkY + ty; + int xidx = blkX + tx; + + // load data if it is within the valid input range + int loadYidx = yidx, loadXidx = xidx; + int index = grid_cols * loadYidx + loadXidx; + + if (IN_RANGE(loadYidx, 0, grid_rows - 1) && IN_RANGE(loadXidx, 0, grid_cols - 1)) + { + memcpy_async(temp_on_cuda[fetch % PREFETCH_COUNT][ty][tx], temp_src[index], pipe); // Load the temperature data from global memory to shared memory + memcpy_async(power_on_cuda[fetch % PREFETCH_COUNT][ty][tx], power[index], pipe); // Load the power data from global memory to shared memory + } + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + // block id + int offset = compute - base_tile; + int block_id = compute / tiles_this_block; + int bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + // calculate the boundary for the block according to + // the boundary of its small block + int blkY = small_block_rows * by - border_rows; + int blkX = small_block_cols * bx - border_cols; + int blkYmax = blkY + BLOCK_SIZE - 1; + int blkXmax = blkX + BLOCK_SIZE - 1; + + // calculate the global thread coordination + int yidx = blkY + ty; + int xidx = blkX + tx; + + // load data if it is within the valid input range + int loadYidx = yidx, loadXidx = xidx; + int index = grid_cols * loadYidx + loadXidx; + + // effective range within this block that falls within + // the valid range of the input data + // used to rule out computation outside the boundary. + int validYmin = (blkY < 0) ? -blkY : 0; + int validYmax = (blkYmax > grid_rows - 1) ? BLOCK_SIZE - 1 - (blkYmax - grid_rows + 1) : BLOCK_SIZE - 1; + int validXmin = (blkX < 0) ? -blkX : 0; + int validXmax = (blkXmax > grid_cols - 1) ? BLOCK_SIZE - 1 - (blkXmax - grid_cols + 1) : BLOCK_SIZE - 1; + + int N = ty - 1; + int S = ty + 1; + int W = tx - 1; + int E = tx + 1; + + N = (N < validYmin) ? validYmin : N; + S = (S > validYmax) ? validYmax : S; + W = (W < validXmin) ? validXmin : W; + E = (E > validXmax) ? validXmax : E; + + bool computed; + for (int i = 0; i < iteration; i++) + { + computed = false; + if (IN_RANGE(tx, i + 1, BLOCK_SIZE - i - 2) && + IN_RANGE(ty, i + 1, BLOCK_SIZE - i - 2) && + IN_RANGE(tx, validXmin, validXmax) && + IN_RANGE(ty, validYmin, validYmax)) + { + computed = true; + temp_t[ty][tx] = temp_on_cuda[compute % PREFETCH_COUNT][ty][tx] + step_div_Cap * (power_on_cuda[compute % PREFETCH_COUNT][ty][tx] + + (temp_on_cuda[compute % PREFETCH_COUNT][S][tx] + temp_on_cuda[compute % PREFETCH_COUNT][N][tx] - 2.0 * temp_on_cuda[compute % PREFETCH_COUNT][ty][tx]) * Ry_1 + + (temp_on_cuda[compute % PREFETCH_COUNT][ty][E] + temp_on_cuda[compute % PREFETCH_COUNT][ty][W] - 2.0 * temp_on_cuda[compute % PREFETCH_COUNT][ty][tx]) * Rx_1 + + (amb_temp - temp_on_cuda[compute % PREFETCH_COUNT][ty][tx]) * Rz_1); + } + block.sync(); + if (i == iteration - 1) + break; + if (computed) // Assign the computation range + temp_on_cuda[compute % PREFETCH_COUNT][ty][tx] = temp_t[ty][tx]; + block.sync(); + } + + // update the global memory + // after the last iteration, only threads coordinated within the + // small block perform the calculation and switch on ``computed'' + if (computed) + { + temp_dst[index] = temp_t[ty][tx]; + } + } +} + +/* + compute N time steps +*/ + +int compute_tran_temp(float *MatrixPower, float *MatrixTemp[2], int col, int row, + int total_iterations, int num_iterations, int blockCols, int blockRows, int borderCols, int borderRows, int batch_size) +{ + dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE); + dim3 dimGrid(blockCols, blockRows); + + float grid_height = chip_height / row; + float grid_width = chip_width / col; + + float Cap = FACTOR_CHIP * SPEC_HEAT_SI * t_chip * grid_width * grid_height; + float Rx = grid_width / (2.0 * K_SI * t_chip * grid_height); + float Ry = grid_height / (2.0 * K_SI * t_chip * grid_width); + float Rz = t_chip / (K_SI * grid_height * grid_width); + + float max_slope = MAX_PD / (FACTOR_CHIP * t_chip * SPEC_HEAT_SI); + float step = PRECISION / max_slope; + float t; + + int src = 1, dst = 0; + + for (t = 0; t < total_iterations; t += num_iterations) + { + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(MatrixPower, col * row * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(MatrixTemp[src], col * row * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(MatrixTemp[dst], col * row * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + int temp = src; + src = dst; + dst = temp; + calculate_temp<<>>(MIN(num_iterations, total_iterations - t), MatrixPower, MatrixTemp[src], MatrixTemp[dst], + col, row, borderCols, borderRows, Cap, Rx, Ry, Rz, step, batch_size); + } + return dst; +} + +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - number of rows/cols in the grid (positive integer)\n"); + fprintf(stderr, "\t - pyramid heigh(positive integer)\n"); + fprintf(stderr, "\t - number of iterations\n"); + fprintf(stderr, "\t - name of the file containing the initial temperature values of each cell\n"); + fprintf(stderr, "\t - name of the file containing the dissipated power values of each cell\n"); + fprintf(stderr, "\t - name of the output file\n"); + fprintf(stderr, "\t - batch_size * batch_size per block\n"); + exit(1); +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE); + + run(argc, argv); + + return EXIT_SUCCESS; +} + +void run(int argc, char **argv) +{ + int size; + int grid_rows, grid_cols; + float *FilesavingTemp, *FilesavingPower, *MatrixOut; + char *tfile, *pfile, *ofile; + + int total_iterations = 60; + int pyramid_height = 1; // number of iterations + + if (argc != 8) + usage(argc, argv); + if ((grid_rows = atoi(argv[1])) <= 0 || + (grid_cols = atoi(argv[1])) <= 0 || + (pyramid_height = atoi(argv[2])) <= 0 || + (total_iterations = atoi(argv[3])) <= 0) + usage(argc, argv); + + tfile = argv[4]; + pfile = argv[5]; + ofile = argv[6]; + + int batch_size = atoi(argv[7]); + + size = grid_rows * grid_cols; + +/* --------------- pyramid parameters --------------- */ +#define EXPAND_RATE 2 // add one iteration will extend the pyramid base by 2 per each borderline + int borderCols = (pyramid_height)*EXPAND_RATE / 2; + int borderRows = (pyramid_height)*EXPAND_RATE / 2; + int smallBlockCol = BLOCK_SIZE - (pyramid_height)*EXPAND_RATE; + int smallBlockRow = BLOCK_SIZE - (pyramid_height)*EXPAND_RATE; + // int blockCols = grid_cols / smallBlockCol + ((grid_cols % smallBlockCol == 0) ? 0 : 1); + // int blockRows = grid_rows / smallBlockRow + ((grid_rows % smallBlockRow == 0) ? 0 : 1); + + int blockCols = (grid_cols + smallBlockCol * batch_size - 1) / (smallBlockCol * batch_size); + int blockRows = (grid_rows + smallBlockRow * batch_size - 1) / (smallBlockRow * batch_size); + + // printf("borderCols is %d, smallBlockCol is %d, blockCols is %d, grid_cols is %d \n", borderCols, smallBlockCol, blockCols, grid_cols); + + FilesavingTemp = (float *)malloc(size * sizeof(float)); + FilesavingPower = (float *)malloc(size * sizeof(float)); + MatrixOut = (float *)calloc(size, sizeof(float)); + + if (!FilesavingPower || !FilesavingTemp || !MatrixOut) + fatal("unable to allocate memory"); + + printf("pyramidHeight: %d\ngridSize: [%d, %d]\nborder:[%d, %d]\nblockGrid:[%d, %d]\ntargetBlock:[%d, %d]\n", + pyramid_height, grid_cols, grid_rows, borderCols, borderRows, blockCols, blockRows, smallBlockCol, smallBlockRow); + + readinput(FilesavingTemp, grid_rows, grid_cols, tfile); + readinput(FilesavingPower, grid_rows, grid_cols, pfile); + + GPU_argv_init(); + + initTrace(); + startCPU(); + float *MatrixTemp[2], *MatrixPower; + cudaMallocManaged((void **)&MatrixTemp[0], sizeof(float) * size); + cudaMallocManaged((void **)&MatrixTemp[1], sizeof(float) * size); + memcpy(MatrixTemp[0], FilesavingTemp, sizeof(float) * size); + + cudaMallocManaged((void **)&MatrixPower, sizeof(float) * size); + memcpy(MatrixPower, FilesavingPower, sizeof(float) * size); + // printf("Start computing the transient temperature\n"); + int ret = compute_tran_temp(MatrixPower, MatrixTemp, grid_cols, grid_rows, + total_iterations, pyramid_height, blockCols, blockRows, borderCols, borderRows, batch_size); + // printf("Ending simulation\n"); + memcpy(MatrixOut, MatrixTemp[ret], sizeof(float) * size); + + cudaFree(MatrixPower); + cudaFree(MatrixTemp[0]); + cudaFree(MatrixTemp[1]); + + endCPU(); + finiTrace(); + + writeoutput(MatrixOut, grid_rows, grid_cols, ofile); + free(MatrixOut); +} diff --git a/workloads/realworld/uvm_prefetch_async/hotspot/run.sh b/workloads/realworld/uvm_prefetch_async/hotspot/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..66b814725286fb6699d2b893740518ad43dc307a --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/hotspot/run.sh @@ -0,0 +1 @@ +./hotspot 512 2 2 ../../../../data/hotspot/temp_512 ../../../../data/hotspot/power_512 output.out 4 diff --git a/workloads/realworld/uvm_prefetch_async/hotspot/run_mega.sh b/workloads/realworld/uvm_prefetch_async/hotspot/run_mega.sh new file mode 100755 index 0000000000000000000000000000000000000000..68ff2e14db1e57289f0c85c6472049c5b99c62f4 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/hotspot/run_mega.sh @@ -0,0 +1 @@ +./hotspot 16384 2 2 ../../../../data/hotspot/temp_16384 ../../../../data/hotspot/power_16384 output.out 8 diff --git a/workloads/realworld/uvm_prefetch_async/hotspot/run_super.sh b/workloads/realworld/uvm_prefetch_async/hotspot/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..ad31b9dcd0ce3e1d53df5aaf445082ab86cf2366 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/hotspot/run_super.sh @@ -0,0 +1 @@ +./hotspot 8192 2 2 ../../../../data/hotspot/temp_8192.txt ../../../../data/hotspot/power_8192.txt output.out 8 diff --git a/workloads/realworld/uvm_prefetch_async/kmeans/Makefile b/workloads/realworld/uvm_prefetch_async/kmeans/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..e473b45be17e5805399c15b04dda57742451d913 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/kmeans/Makefile @@ -0,0 +1,33 @@ +include ../../../common/make.config + +# C compiler +CC = gcc +CC_FLAGS = -g -fopenmp -O2 + +# CUDA compiler +NVCC = $(CUDA_DIR)/bin/nvcc +NVCC_FLAGS = -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) + +# 'make dbg=1' enables NVCC debugging + + +# 'make emu=1' compiles the CUDA kernels for emulation +ifeq ($(emu),1) + NVCC_FLAGS += -deviceemu +endif + + +kmeans: cluster.o getopt.o kmeans.o kmeans_clustering.o kmeans_cuda.o rmse.o $(CUPTI_ADD_COMMON)/cpu_timestamps.o + $(CC) $(CC_FLAGS) cluster.o getopt.o kmeans.o kmeans_clustering.o kmeans_cuda.o rmse.o $(CUPTI_ADD_COMMON)/cpu_timestamps.o $(CUPTI_ADD_COMMON)/cupti_add.cpp -o kmeans $(NVCC_FLAGS) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lm -lstdc++ + +kmeans.o: kmeans.c + $(CC) $(CC_FLAGS) $< -c $(NVCC_FLAGS) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +%.o: %.[ch] + $(CC) $(CC_FLAGS) $< -c -I$(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcudart -lcupti -lstdc++ + +kmeans_cuda.o: kmeans_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp + $(NVCC) -O2 -c kmeans_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(NVCC_FLAGS) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +clean: + rm -f *.o *~ kmeans kmeans_cuda.linkinfo diff --git a/workloads/realworld/uvm_prefetch_async/kmeans/Makefile_nvidia b/workloads/realworld/uvm_prefetch_async/kmeans/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..5d612b9dc8b5c19310162e1f721e1d2d4e33fb72 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/kmeans/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := kmeans +# CUDA source files (compiled with cudacc) +CUFILES := kmeans_cuda.cu +# CUDA dependency files +CU_DEPS := kmeans_cuda_kernel.cu +# C/C++ source files (compiled with gcc / c++) +CFILES := cluster.c getopt.c kmeans.c kmeans_clustering.c rmse.c + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/uvm_prefetch_async/kmeans/README b/workloads/realworld/uvm_prefetch_async/kmeans/README new file mode 100755 index 0000000000000000000000000000000000000000..bebae52d716986889b30a435214e095346424190 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/kmeans/README @@ -0,0 +1,10 @@ +Usage: ./kmeans [switches] -i filename + + -i filename :file containing data to be clustered + -m max_nclusters :maximum number of clusters allowed [default=5] + -n min_nclusters :minimum number of clusters allowed [default=5] + -t threshold :threshold value [default=0.001] + -l nloops :iteration for each number of clusters [default=1] + -b :input file is in binary format + -r :calculate RMSE [default=off] + -o :output cluster center coordinates [default=off] \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/kmeans/cluster.c b/workloads/realworld/uvm_prefetch_async/kmeans/cluster.c new file mode 100755 index 0000000000000000000000000000000000000000..c4010b11c0306acae331279b89b1a63f3ad5637d --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/kmeans/cluster.c @@ -0,0 +1,165 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: cluster.c **/ +/** Description: Takes as input a file, containing 1 data point per **/ +/** per line, and performs a fuzzy c-means clustering **/ +/** on the data. Fuzzy clustering is performed using **/ +/** min to max clusters and the clustering that gets **/ +/** the best score according to a compactness and **/ +/** separation criterion are returned. **/ +/** Author: Brendan McCane **/ +/** James Cook University of North Queensland. **/ +/** Australia. email: mccane@cs.jcu.edu.au **/ +/** **/ +/** Edited by: Jay Pisharath, Wei-keng Liao **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "kmeans.h" +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) \ + (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer)) + +// Timestamp at trace initialization time. Used to normalized other +// timestamps + +extern double wtime(void); +float min_rmse_ref = FLT_MAX; /* reference min_rmse value */ + +/*---< cluster() >-----------------------------------------------------------*/ +int cluster(int npoints, /* number of data points */ + int nfeatures, /* number of attributes for each point */ + float **features, /* array: [npoints][nfeatures] */ + int min_nclusters, /* range of min to max number of clusters */ + int max_nclusters, + float threshold, /* loop terminating factor */ + int *best_nclusters, /* out: number between min and max with lowest RMSE */ + float ***cluster_centres, /* out: [best_nclusters][nfeatures] */ + float *min_rmse, /* out: minimum RMSE */ + int isRMSE, /* calculate RMSE */ + int nloops /* number of iteration for each number of clusters */ + ) +{ + + + int nclusters; /* number of clusters k */ + int index =0; /* number of iteration to reach the best RMSE */ + int rmse; /* RMSE for each clustering */ + int *membership; /* which cluster a data point belongs to */ + float **tmp_cluster_centres; /* hold coordinates of cluster centers */ + int i; + + /* allocate memory for membership */ + membership = (int*) malloc(npoints * sizeof(int)); + + /* sweep k from min to max_nclusters to find the best number of clusters */ + for(nclusters = min_nclusters; nclusters <= max_nclusters; nclusters++) + { + if (nclusters > npoints) break; /* cannot have more clusters than points */ + + /* allocate device memory, invert data array (@ kmeans_cuda.cu) */ + allocateMemory(npoints, nfeatures, nclusters, features); + + /* iterate nloops times for each number of clusters */ + for(i = 0; i < nloops; i++) + { + /* initialize initial cluster centers, CUDA calls (@ kmeans_cuda.cu) */ + tmp_cluster_centres = kmeans_clustering(features, + nfeatures, + npoints, + nclusters, + threshold, + membership); + + if (*cluster_centres) { + free((*cluster_centres)[0]); + free(*cluster_centres); + } + *cluster_centres = tmp_cluster_centres; + + + /* find the number of clusters with the best RMSE */ + if(isRMSE) + { + rmse = rms_err(features, + nfeatures, + npoints, + tmp_cluster_centres, + nclusters); + + if(rmse < min_rmse_ref){ + min_rmse_ref = rmse; //update reference min RMSE + *min_rmse = min_rmse_ref; //update return min RMSE + *best_nclusters = nclusters; //update optimum number of clusters + index = i; //update number of iteration to reach best RMSE + } + } + } + + deallocateMemory(); /* free device memory (@ kmeans_cuda.cu) */ + } + + free(membership); + + return index; +} + diff --git a/workloads/realworld/uvm_prefetch_async/kmeans/cp.sh b/workloads/realworld/uvm_prefetch_async/kmeans/cp.sh new file mode 100755 index 0000000000000000000000000000000000000000..243885047459789eb8eae5d6c5b2b4822eb3aabf --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/kmeans/cp.sh @@ -0,0 +1,27 @@ +cp super_0.log super_3.log +cp super_0.log super_4.log +cp super_0.log super_5.log +cp super_0.log super_6.log +cp super_0.log super_7.log +cp super_0.log super_8.log +cp super_0.log super_9.log +cp super_0.log super_10.log +cp super_0.log super_11.log +cp super_0.log super_12.log +cp super_0.log super_13.log +cp super_0.log super_14.log +cp super_0.log super_15.log +cp super_0.log super_17.log +cp super_0.log super_17.log +cp super_0.log super_18.log +cp super_0.log super_19.log +cp super_0.log super_20.log +cp super_0.log super_21.log +cp super_0.log super_22.log +cp super_0.log super_23.log +cp super_0.log super_24.log +cp super_0.log super_25.log +cp super_0.log super_26.log +cp super_0.log super_27.log +cp super_0.log super_28.log +cp super_0.log super_29.log diff --git a/workloads/realworld/uvm_prefetch_async/kmeans/getopt.c b/workloads/realworld/uvm_prefetch_async/kmeans/getopt.c new file mode 100755 index 0000000000000000000000000000000000000000..fa2f31378fb2978f65267ba2e810aae3ff1ee016 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/kmeans/getopt.c @@ -0,0 +1,1184 @@ +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to drepper@gnu.org + before changing it! + Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This tells Alpha OSF/1 not to define a getopt prototype in . + Ditto for AIX 3.2 and . */ +#ifndef _NO_PROTO +# define _NO_PROTO +#endif + +#ifdef HAVE_CONFIG_H +# include +#endif + +#if !defined __STDC__ || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +# ifndef const +# define const +# endif +#endif + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +# include +# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +# define ELIDE_CODE +# endif +#endif + +#ifndef ELIDE_CODE + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +/* Don't include stdlib.h for non-GNU C libraries because some of them + contain conflicting prototypes for getopt. */ +# include +# include +#endif /* GNU C library. */ + +#ifdef VMS +# include +# if HAVE_STRING_H - 0 +# include +# endif +#endif + +#ifndef _ +/* This is for other GNU distributions with internationalized messages. */ +# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include +# ifndef _ +# define _(msgid) gettext (msgid) +# endif +# else +# define _(msgid) (msgid) +# endif +# if defined _LIBC && defined USE_IN_LIBIO +# include +# endif +#endif + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* 1003.2 says this must be 1 before any call. */ +int optind = 1; + +/* Formerly, initialization of getopt depended on optind==0, which + causes problems with re-calling getopt as programs generally don't + know that. */ + +int __getopt_initialized; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return -1 with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +/* Value of POSIXLY_CORRECT environment variable. */ +static char *posixly_correct; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +# include +# define my_index strchr +#else + +//# if HAVE_STRING_H || WIN32 /* Pete Wilson mod 7/28/02 */ +# include +//# else +//# include +//# endif + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +#ifndef getenv +extern char *getenv (); +#endif + +static char * +my_index (str, chr) + const char *str; + int chr; +{ + while (*str) + { + if (*str == chr) + return (char *) str; + str++; + } + return 0; +} + +/* If using GCC, we can safely declare strlen this way. + If not using GCC, it is ok not to declare it. */ +#ifdef __GNUC__ +/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. + That was relevant to code that was here before. */ +# if (!defined __STDC__ || !__STDC__) && !defined strlen +/* gcc with -traditional declares the built-in strlen to return int, + and has done so at least since version 2.4.5. -- rms. */ +extern int strlen (const char *); +# endif /* not __STDC__ */ +#endif /* __GNUC__ */ + +#endif /* not __GNU_LIBRARY__ */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +#ifdef _LIBC +/* Stored original parameters. + XXX This is no good solution. We should rather copy the args so + that we can compare them later. But we must not use malloc(3). */ +extern int __libc_argc; +extern char **__libc_argv; + +/* Bash 2.0 gives us an environment variable containing flags + indicating ARGV elements that should not be considered arguments. */ + +# ifdef USE_NONOPTION_FLAGS +/* Defined in getopt_init.c */ +extern char *__getopt_nonoption_flags; + +static int nonoption_flags_max_len; +static int nonoption_flags_len; +# endif + +# ifdef USE_NONOPTION_FLAGS +# define SWAP_FLAGS(ch1, ch2) \ + if (nonoption_flags_len > 0) \ + { \ + char __tmp = __getopt_nonoption_flags[ch1]; \ + __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ + __getopt_nonoption_flags[ch2] = __tmp; \ + } +# else +# define SWAP_FLAGS(ch1, ch2) +# endif +#else /* !_LIBC */ +# define SWAP_FLAGS(ch1, ch2) +#endif /* _LIBC */ + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +#if defined __STDC__ && __STDC__ +static void exchange (char **); +#endif + +static void +exchange (argv) + char **argv; +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + /* First make sure the handling of the `__getopt_nonoption_flags' + string can work normally. Our top argument must be in the range + of the string. */ + if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len) + { + /* We must extend the array. The user plays games with us and + presents new arguments. */ + char *new_str = malloc (top + 1); + if (new_str == NULL) + nonoption_flags_len = nonoption_flags_max_len = 0; + else + { + memset (__mempcpy (new_str, __getopt_nonoption_flags, + nonoption_flags_max_len), + '\0', top + 1 - nonoption_flags_max_len); + nonoption_flags_max_len = top + 1; + __getopt_nonoption_flags = new_str; + } + } +#endif + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + SWAP_FLAGS (bottom + i, middle + i); + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Initialize the internal data when the first call is made. */ + +#if defined __STDC__ && __STDC__ +static const char *_getopt_initialize (int, char *const *, const char *); +#endif +static const char * +_getopt_initialize (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = optind; + + nextchar = NULL; + + posixly_correct = getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + if (posixly_correct == NULL + && argc == __libc_argc && argv == __libc_argv) + { + if (nonoption_flags_max_len == 0) + { + if (__getopt_nonoption_flags == NULL + || __getopt_nonoption_flags[0] == '\0') + nonoption_flags_max_len = -1; + else + { + const char *orig_str = __getopt_nonoption_flags; + int len = nonoption_flags_max_len = strlen (orig_str); + if (nonoption_flags_max_len < argc) + nonoption_flags_max_len = argc; + __getopt_nonoption_flags = + (char *) malloc (nonoption_flags_max_len); + if (__getopt_nonoption_flags == NULL) + nonoption_flags_max_len = -1; + else + memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), + '\0', nonoption_flags_max_len - len); + } + } + nonoption_flags_len = nonoption_flags_max_len; + } + else + nonoption_flags_len = 0; +#endif + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns -1. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (argc, argv, optstring, longopts, longind, long_only) + int argc; + char *const *argv; + const char *optstring; + const struct option *longopts; + int *longind; + int long_only; +{ + int print_errors = opterr; + if (optstring[0] == ':') + print_errors = 0; + + if (argc < 1) + return -1; + + optarg = NULL; + + if (optind == 0 || !__getopt_initialized) + { + if (optind == 0) + optind = 1; /* Don't scan ARGV[0], the program name. */ + optstring = _getopt_initialize (argc, argv, optstring); + __getopt_initialized = 1; + } + + /* Test whether ARGV[optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ +#if defined _LIBC && defined USE_NONOPTION_FLAGS +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \ + || (optind < nonoption_flags_len \ + && __getopt_nonoption_flags[optind] == '1')) +#else +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0') +#endif + + if (nextchar == NULL || *nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT and LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (last_nonopt > optind) + last_nonopt = optind; + if (first_nonopt > optind) + first_nonopt = optind; + + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc && NONOPTION_P) + optind++; + last_nonopt = optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) + { + if (ordering == REQUIRE_ORDER) + return -1; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[optind][1] == '-' + || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = -1; + int option_index; + + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) + == (unsigned int) strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else if (long_only + || pfound->has_arg != p->has_arg + || pfound->flag != p->flag + || pfound->val != p->val) + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + optopt = 0; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind - 1][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#else + fprintf (stderr, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], + pfound->name); +#else + fprintf (stderr, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], pfound->name); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + + nextchar += strlen (nextchar); + + optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' + || my_index (optstring, *nextchar) == NULL) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + nextchar = (char *) ""; + optind++; + optopt = 0; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (posixly_correct) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: illegal option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c); +#endif + } + else + { +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: invalid option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + optopt = c; + return '?'; + } + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + + /* optarg is now the argument, see if it's in the + table of longopts. */ + + for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + return '?'; + } + if (pfound != NULL) + { + option_index = indfound; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = NULL; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +#endif /* Not ELIDE_CODE. */ + + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/kmeans/getopt.h b/workloads/realworld/uvm_prefetch_async/kmeans/getopt.h new file mode 100755 index 0000000000000000000000000000000000000000..bae04bf7d418206d73892a94ff94923a36549362 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/kmeans/getopt.h @@ -0,0 +1,191 @@ + + +/* getopt.h */ +/* Declarations for getopt. + Copyright (C) 1989-1994, 1996-1999, 2001 Free Software + Foundation, Inc. This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute + it and/or modify it under the terms of the GNU Lesser + General Public License as published by the Free Software + Foundation; either version 2.1 of the License, or + (at your option) any later version. + + The GNU C Library is distributed in the hope that it will + be useful, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A + PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General + Public License along with the GNU C Library; if not, write + to the Free Software Foundation, Inc., 59 Temple Place, + Suite 330, Boston, MA 02111-1307 USA. */ + + + + + +#ifndef _GETOPT_H + +#ifndef __need_getopt +# define _GETOPT_H 1 +#endif + +/* If __GNU_LIBRARY__ is not already defined, either we are being used + standalone, or this is the first header included in the source file. + If we are being used with glibc, we need to include , but + that does not exist if we are standalone. So: if __GNU_LIBRARY__ is + not defined, include , which will pull in for us + if it's from glibc. (Why ctype.h? It's guaranteed to exist and it + doesn't flood the namespace with stuff the way some other headers do.) */ +#if !defined __GNU_LIBRARY__ +# include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +#ifndef __need_getopt +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +# if (defined __STDC__ && __STDC__) || defined __cplusplus + const char *name; +# else + char *name; +# endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +# define no_argument 0 +# define required_argument 1 +# define optional_argument 2 +#endif /* need getopt */ + + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. + + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, `optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in `optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU `getopt'. + + The argument `--' causes premature termination of argument + scanning, explicitly telling `getopt' that there are no more + options. + + If OPTS begins with `--', then non-option arguments are treated as + arguments to the option '\0'. This behavior is specific to the GNU + `getopt'. */ + +#if (defined __STDC__ && __STDC__) || defined __cplusplus +# ifdef __GNU_LIBRARY__ +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int ___argc, char *const *___argv, const char *__shortopts); +# else /* not __GNU_LIBRARY__ */ +extern int getopt (); +# endif /* __GNU_LIBRARY__ */ + +# ifndef __need_getopt +extern int getopt_long (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); +extern int getopt_long_only (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only); +# endif +#else /* not __STDC__ */ +extern int getopt (); +# ifndef __need_getopt +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +# endif +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +/* Make sure we later can get all the definitions and declarations. */ +#undef __need_getopt + +#endif /* getopt.h */ + diff --git a/workloads/realworld/uvm_prefetch_async/kmeans/kmeans.c b/workloads/realworld/uvm_prefetch_async/kmeans/kmeans.c new file mode 100755 index 0000000000000000000000000000000000000000..b2668674074c4b29ede04ec09a06d29486a1ec6a --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/kmeans/kmeans.c @@ -0,0 +1,309 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: example.c **/ +/** Description: Takes as input a file: **/ +/** ascii file: containing 1 data point per line **/ +/** binary file: first int is the number of objects **/ +/** 2nd int is the no. of features of each **/ +/** object **/ +/** This example performs a fuzzy c-means clustering **/ +/** on the data. Fuzzy clustering is performed using **/ +/** min to max clusters and the clustering that gets **/ +/** the best score according to a compactness and **/ +/** separation criterion are returned. **/ +/** Author: Wei-keng Liao **/ +/** ECE Department Northwestern University **/ +/** email: wkliao@ece.northwestern.edu **/ +/** **/ +/** Edited by: Jay Pisharath **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ +#define _CRT_SECURE_NO_DEPRECATE 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#define _POSIX_C_SOURCE 200809L +#include +#include +#include "kmeans.h" + +extern double wtime(void); + + + +/*---< usage() >------------------------------------------------------------*/ +void usage(char *argv0) { + char *help = + "\nUsage: %s [switches] -i filename\n\n" + " -i filename :file containing data to be clustered\n" + " -m max_nclusters :maximum number of clusters allowed [default=5]\n" + " -n min_nclusters :minimum number of clusters allowed [default=5]\n" + " -t threshold :threshold value [default=0.001]\n" + " -l nloops :iteration for each number of clusters [default=1]\n" + " -b :input file is in binary format\n" + " -r :calculate RMSE [default=off]\n" + " -o :output cluster center coordinates [default=off]\n"; + fprintf(stderr, help, argv0); + exit(-1); +} + +/*---< main() >-------------------------------------------------------------*/ +int setup(int argc, char **argv) { + int opt; + extern char *optarg; + char *filename = 0; + float *buf; + char line[1024]; + int isBinaryFile = 0; + + float threshold = 0.001; /* default value */ + int max_nclusters=5; /* default value */ + int min_nclusters=5; /* default value */ + int best_nclusters = 0; + int nfeatures = 0; + int npoints = 0; + float len; + + float **features; + float **cluster_centres=NULL; + int i, j, index; + int nloops = 1; /* default value */ + + int isRMSE = 0; + float rmse; + + int isOutput = 0; + //float cluster_timing, io_timing; + + /* obtain command line arguments and change appropriate options */ + while ( (opt=getopt(argc,argv,"i:t:m:n:l:bro"))!= EOF) { + switch (opt) { + case 'i': filename=optarg; + break; + case 'b': isBinaryFile = 1; + break; + case 't': threshold=atof(optarg); + break; + case 'm': max_nclusters = atoi(optarg); + break; + case 'n': min_nclusters = atoi(optarg); + break; + case 'r': isRMSE = 1; + break; + case 'o': isOutput = 1; + break; + case 'l': nloops = atoi(optarg); + break; + case '?': usage(argv[0]); + break; + default: usage(argv[0]); + break; + } + } + + if (filename == 0) usage(argv[0]); + + /* ============== I/O begin ==============*/ + /* get nfeatures and npoints */ + //io_timing = omp_get_wtime(); + if (isBinaryFile) { //Binary file input + int infile; + if ((infile = open(filename, O_RDONLY, "0600")) == -1) { + fprintf(stderr, "Error: no such file (%s)\n", filename); + exit(1); + } + read(infile, &npoints, sizeof(int)); + read(infile, &nfeatures, sizeof(int)); + + /* allocate space for features[][] and read attributes of all objects */ + buf = (float*) malloc(npoints*nfeatures*sizeof(float)); + features = (float**)malloc(npoints* sizeof(float*)); + features[0] = (float*) malloc(npoints*nfeatures*sizeof(float)); + for (i=1; i npoints(%d) -- cannot proceed\n", min_nclusters, npoints); + exit(0); + } + + srand(7); /* seed for future random number generator */ + memcpy(features[0], buf, npoints*nfeatures*sizeof(float)); /* now features holds 2-dimensional array of features */ + free(buf); + + /* ======================= core of the clustering ===================*/ + + //cluster_timing = omp_get_wtime(); /* Total clustering time */ + cluster_centres = NULL; + index = cluster(npoints, /* number of data points */ + nfeatures, /* number of features for each point */ + features, /* array: [npoints][nfeatures] */ + min_nclusters, /* range of min to max number of clusters */ + max_nclusters, + threshold, /* loop termination factor */ + &best_nclusters, /* return: number between min and max */ + &cluster_centres, /* return: [best_nclusters][nfeatures] */ + &rmse, /* Root Mean Squared Error */ + isRMSE, /* calculate RMSE */ + nloops); /* number of iteration for each number of clusters */ + + //cluster_timing = omp_get_wtime() - cluster_timing; + + + /* =============== Command Line Output =============== */ + + /* cluster center coordinates + :displayed only for when k=1*/ + if((min_nclusters == max_nclusters) && (isOutput == 1)) { + printf("\n================= Centroid Coordinates =================\n"); + for(i = 0; i < max_nclusters; i++){ + printf("%d:", i); + for(j = 0; j < nfeatures; j++){ + printf(" %.2f", cluster_centres[i][j]); + } + printf("\n\n"); + } + } + + len = (float) ((max_nclusters - min_nclusters + 1)*nloops); + + printf("Number of Iteration: %d\n", nloops); + //printf("Time for I/O: %.5fsec\n", io_timing); + //printf("Time for Entire Clustering: %.5fsec\n", cluster_timing); + + if(min_nclusters != max_nclusters){ + if(nloops != 1){ //range of k, multiple iteration + //printf("Average Clustering Time: %fsec\n", + // cluster_timing / len); + printf("Best number of clusters is %d\n", best_nclusters); + } + else{ //range of k, single iteration + //printf("Average Clustering Time: %fsec\n", + // cluster_timing / len); + printf("Best number of clusters is %d\n", best_nclusters); + } + } + else{ + if(nloops != 1){ // single k, multiple iteration + //printf("Average Clustering Time: %.5fsec\n", + // cluster_timing / nloops); + if(isRMSE) // if calculated RMSE + printf("Number of trials to approach the best RMSE of %.3f is %d\n", rmse, index + 1); + } + else{ // single k, single iteration + if(isRMSE) // if calculated RMSE + printf("Root Mean Squared Error: %.3f\n", rmse); + } + } + + + /* free up memory */ + free(features[0]); + free(features); + return(0); +} + diff --git a/workloads/realworld/uvm_prefetch_async/kmeans/kmeans.h b/workloads/realworld/uvm_prefetch_async/kmeans/kmeans.h new file mode 100755 index 0000000000000000000000000000000000000000..28b6c34732313f04c02b59b095361bc8142d4b05 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/kmeans/kmeans.h @@ -0,0 +1,60 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +#ifndef _H_FUZZY_KMEANS +#define _H_FUZZY_KMEANS + +#ifndef FLT_MAX +#define FLT_MAX 3.40282347e+38 +#endif + +#include + +/* rmse.c */ +float euclid_dist_2 (float*, float*, int); +int find_nearest_point (float* , int, float**, int); +float rms_err(float**, int, int, float**, int); + +/* cluster.c */ +int cluster(int, int, float**, int, int, float, int*, float***, float*, int, int); + +/* kmeans_clustering.c */ +float **kmeans_clustering(float**, int, int, int, float, int*); + +#endif diff --git a/workloads/realworld/uvm_prefetch_async/kmeans/kmeans_clustering.c b/workloads/realworld/uvm_prefetch_async/kmeans/kmeans_clustering.c new file mode 100755 index 0000000000000000000000000000000000000000..54ddcd6d8ff6f0d075ff16e6b6aef84fda4716d2 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/kmeans/kmeans_clustering.c @@ -0,0 +1,178 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: kmeans_clustering.c **/ +/** Description: Implementation of regular k-means clustering **/ +/** algorithm **/ +/** Author: Wei-keng Liao **/ +/** ECE Department, Northwestern University **/ +/** email: wkliao@ece.northwestern.edu **/ +/** **/ +/** Edited by: Jay Pisharath **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include + +#include "kmeans.h" + +#define RANDOM_MAX 2147483647 + +extern double wtime(void); + +/*----< kmeans_clustering() >---------------------------------------------*/ +float** kmeans_clustering(float **feature, /* in: [npoints][nfeatures] */ + int nfeatures, + int npoints, + int nclusters, + float threshold, + int *membership) /* out: [npoints] */ +{ + int i, j, n = 0; /* counters */ + int loop=0, temp; + int *new_centers_len; /* [nclusters]: no. of points in each cluster */ + float delta; /* if the point moved */ + float **clusters; /* out: [nclusters][nfeatures] */ + float **new_centers; /* [nclusters][nfeatures] */ + + int *initial; /* used to hold the index of points not yet selected + prevents the "birthday problem" of dual selection (?) + considered holding initial cluster indices, but changed due to + possible, though unlikely, infinite loops */ + int initial_points; + int c = 0; + + /* nclusters should never be > npoints + that would guarantee a cluster without points */ + if (nclusters > npoints) + nclusters = npoints; + + /* allocate space for and initialize returning variable clusters[] */ + clusters = (float**) malloc(nclusters * sizeof(float*)); + clusters[0] = (float*) malloc(nclusters * nfeatures * sizeof(float)); + for (i=1; i= 0; i++) { + //n = (int)rand() % initial_points; + + for (j=0; j 0) + clusters[i][j] = new_centers[i][j] / new_centers_len[i]; /* take average i.e. sum/n */ + new_centers[i][j] = 0.0; /* set back to 0 */ + } + new_centers_len[i] = 0; /* set back to 0 */ + } + c++; + } while ((delta > threshold) && (loop++ < 500)); /* makes sure loop terminates */ + printf("iterated %d times\n", c); + free(new_centers[0]); + free(new_centers); + free(new_centers_len); + + return clusters; +} + diff --git a/workloads/realworld/uvm_prefetch_async/kmeans/kmeans_cuda.cu b/workloads/realworld/uvm_prefetch_async/kmeans/kmeans_cuda.cu new file mode 100755 index 0000000000000000000000000000000000000000..493d1b58293293a9c58cc5414db1e96044547ca6 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/kmeans/kmeans_cuda.cu @@ -0,0 +1,323 @@ +#include +#include +#include +#include +#include +#include + +#include + +#define THREADS_PER_DIM 16 +#define BLOCKS_PER_DIM 64 +#define THREADS_PER_BLOCK THREADS_PER_DIM*THREADS_PER_DIM +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" +#include "kmeans_cuda_kernel.cu" + + +//#define BLOCK_DELTA_REDUCE +//#define BLOCK_CENTER_REDUCE + +#define CPU_DELTA_REDUCE +#define CPU_CENTER_REDUCE + +extern "C" +int setup(int argc, char** argv); /* function prototype */ + +// GLOBAL!!!!! +unsigned int num_threads_perdim = THREADS_PER_DIM; /* sqrt(256) -- see references for this choice */ +unsigned int num_blocks_perdim = BLOCKS_PER_DIM; /* temporary */ +unsigned int num_threads = num_threads_perdim*num_threads_perdim; /* number of threads */ +unsigned int num_blocks = num_blocks_perdim*num_blocks_perdim; /* number of blocks */ + +/* _d denotes it resides on the device */ +int *membership_new; /* newly assignment membership */ +float *feature_d; /* inverted data array */ +float *feature_flipped_d; /* original (not inverted) data array */ +int *membership_d; /* membership on the device */ +float *block_new_centers; /* sum of points in a cluster (per block) */ +float *clusters_d; /* cluster centers on the device */ +float *block_clusters_d; /* per block calculation of cluster centers */ +int *block_deltas_d; /* per block calculation of deltas */ + + +/* -------------- allocateMemory() ------------------- */ +/* allocate device memory, calculate number of blocks and threads, and invert the data array */ +extern "C" +void allocateMemory(int npoints, int nfeatures, int nclusters, float **features) +{ + // printf("npoints is %d, num_threads is %d\n", npoints, num_threads); + // num_blocks = npoints / num_threads; + // if (npoints % num_threads > 0) /* defeat truncation */ + // num_blocks++; + + // num_blocks_perdim = sqrt((double) num_blocks); + // while (num_blocks_perdim * num_blocks_perdim < num_blocks) // defeat truncation (should run once) + // num_blocks_perdim++; + + num_blocks = num_blocks_perdim*num_blocks_perdim; + + /* allocate memory for memory_new[] and initialize to -1 (host) */ + membership_new = (int*) malloc(npoints * sizeof(int)); + for(int i=0;i>>(feature_flipped_d, feature_d, npoints, (num_blocks_perdim * num_blocks_perdim * num_threads_perdim * num_threads_perdim), nfeatures); + + /* allocate memory for membership_d[] and clusters_d[][] (device) */ + cudaMallocManaged((void **)&membership_d, npoints * sizeof(int)); + cudaMallocManaged((void **)&clusters_d, nclusters * nfeatures * sizeof(float)); + +#ifdef BLOCK_DELTA_REDUCE + // allocate array to hold the per block deltas on the gpu side + + cudaMallocManaged((void**) &block_deltas_d, num_blocks_perdim * num_blocks_perdim * sizeof(int)); +#endif + +#ifdef BLOCK_CENTER_REDUCE + // allocate memory and copy to card cluster array in which to accumulate center points for the next iteration + cudaMallocManaged((void**) &block_clusters_d, + num_blocks_perdim * num_blocks_perdim * + nclusters * nfeatures * sizeof(float)); +#endif + +} +/* -------------- allocateMemory() end ------------------- */ + +/* -------------- deallocateMemory() ------------------- */ +/* free host and device memory */ +extern "C" +void deallocateMemory() +{ + free(membership_new); + free(block_new_centers); + cudaFree(feature_d); + cudaFree(feature_flipped_d); + cudaFree(membership_d); + + cudaFree(clusters_d); +#ifdef BLOCK_CENTER_REDUCE + cudaFree(block_clusters_d); +#endif +#ifdef BLOCK_DELTA_REDUCE + cudaFree(block_deltas_d); +#endif +endCPU(); +} +/* -------------- deallocateMemory() end ------------------- */ + +//////////////////////////////////////////////////////////////////////////////// +// Program main // + +int main(int argc, char **argv) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + // make sure we're running on the big card + GPU_argv_init(); + // as done in the CUDA start/help document provided + initTrace(); + setup(argc, argv); + finiTrace(); +} + +// // +//////////////////////////////////////////////////////////////////////////////// +/* ------------------- kmeansCuda() ------------------------ */ +extern "C" +int // delta -- had problems when return value was of float type +kmeansCuda(float **feature, /* in: [npoints][nfeatures] */ + int nfeatures, /* number of attributes for each point */ + int npoints, /* number of data points */ + int nclusters, /* number of clusters */ + int *membership, /* which cluster the point belongs to */ + float **clusters, /* coordinates of cluster centers */ + int *new_centers_len, /* number of elements in each cluster */ + float **new_centers /* sum of elements in each cluster */ + ) +{ + int delta = 0; /* if point has moved */ + int i,j; /* counters */ + + // cudaSetDevice(1); + + + /* copy membership (host to device) */ + memcpy(membership_d, membership_new, npoints*sizeof(int)); + + // /* copy clusters (host to device) */ + // memcpy(clusters_d, clusters[0], nclusters * nfeatures * sizeof(float)); + + // /* set up texture */ + // cudaChannelFormatDesc chDesc0 = cudaCreateChannelDesc(); + // t_features.filterMode = cudaFilterModePoint; + // t_features.normalized = false; + // t_features.channelDesc = chDesc0; + + // if(cudaBindTexture(NULL, &t_features, feature_d, &chDesc0, npoints*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind features array to texture!\n"); + + // cudaChannelFormatDesc chDesc1 = cudaCreateChannelDesc(); + // t_features_flipped.filterMode = cudaFilterModePoint; + // t_features_flipped.normalized = false; + // t_features_flipped.channelDesc = chDesc1; + + // if(cudaBindTexture(NULL, &t_features_flipped, feature_flipped_d, &chDesc1, npoints*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind features_flipped array to texture!\n"); + + // cudaChannelFormatDesc chDesc2 = cudaCreateChannelDesc(); + // t_clusters.filterMode = cudaFilterModePoint; + // t_clusters.normalized = false; + // t_clusters.channelDesc = chDesc2; + + // if(cudaBindTexture(NULL, &t_clusters, clusters_d, &chDesc2, nclusters*nfeatures*sizeof(float)) != CUDA_SUCCESS) + // printf("Couldn't bind clusters array to texture!\n"); + + // /* copy clusters to constant memory */ + // cudaMemcpyToSymbol("c_clusters",clusters[0],nclusters*nfeatures*sizeof(float),0,cudaMemcpyHostToDevice); + + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(membership_d, npoints * sizeof(int), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + /* setup execution parameters. + changed to 2d (source code on NVIDIA CUDA Programming Guide) */ + dim3 grid( num_blocks_perdim, num_blocks_perdim ); + dim3 threads( num_threads_perdim*num_threads_perdim ); + static uint64_t startKernel2; + CUPTI_CALL(cuptiGetTimestamp(&startKernel2)); + /* execute the kernel */ + kmeansPoint<<>>(feature_d, + nfeatures, + npoints, + (num_blocks_perdim * num_blocks_perdim * num_threads_perdim * num_threads_perdim), + nclusters, + membership_d); + static uint64_t endKernel2; + CUPTI_CALL(cuptiGetTimestamp(&endKernel2)); + // cudaThreadSynchronize(); + cudaDeviceSynchronize(); + printf("CUPTI,kmeansPoint,%lu,%lu\n", startKernel2, endKernel2); + + /* copy back membership (device to host) */ + memcpy(membership_new, membership_d, npoints * sizeof(int)); + +#ifdef BLOCK_CENTER_REDUCE + /*** Copy back arrays of per block sums ***/ + float * block_clusters_h = (float *) malloc( + num_blocks_perdim * num_blocks_perdim * + nclusters * nfeatures * sizeof(float)); + + // cudaMemcpy(block_clusters_h, block_clusters_d, + // num_blocks_perdim * num_blocks_perdim * + // nclusters * nfeatures * sizeof(float), + // cudaMemcpyDeviceToHost); +#endif +#ifdef BLOCK_DELTA_REDUCE + int * block_deltas_h = (int *) malloc( + num_blocks_perdim * num_blocks_perdim * sizeof(int)); + + // cudaMemcpy(block_deltas_h, block_deltas_d, + // num_blocks_perdim * num_blocks_perdim * sizeof(int), + // cudaMemcpyDeviceToHost); +#endif + + /* for each point, sum data points in each cluster + and see if membership has changed: + if so, increase delta and change old membership, and update new_centers; + otherwise, update new_centers */ + delta = 0; + for (i = 0; i < npoints; i++) + { + int cluster_id = membership_d[i]; + new_centers_len[cluster_id]++; + if (membership_d[i] != membership[i]) + { +#ifdef CPU_DELTA_REDUCE + delta++; +#endif + membership[i] = membership_d[i]; + } +#ifdef CPU_CENTER_REDUCE + for (j = 0; j < nfeatures; j++) + { + new_centers[cluster_id][j] += feature[i][j]; + } +#endif + } + + +#ifdef BLOCK_DELTA_REDUCE + /*** calculate global sums from per block sums for delta and the new centers ***/ + + //debug + //printf("\t \t reducing %d block sums to global sum \n",num_blocks_perdim * num_blocks_perdim); + for(i = 0; i < num_blocks_perdim * num_blocks_perdim; i++) { + //printf("block %d delta is %d \n",i,block_deltas_h[i]); + delta += block_deltas_h[i]; + } + +#endif +#ifdef BLOCK_CENTER_REDUCE + + for(int j = 0; j < nclusters;j++) { + for(int k = 0; k < nfeatures;k++) { + block_new_centers[j*nfeatures + k] = 0.f; + } + } + + for(i = 0; i < num_blocks_perdim * num_blocks_perdim; i++) { + for(int j = 0; j < nclusters;j++) { + for(int k = 0; k < nfeatures;k++) { + block_new_centers[j*nfeatures + k] += block_clusters_h[i * nclusters*nfeatures + j * nfeatures + k]; + } + } + } + + +#ifdef CPU_CENTER_REDUCE + //debug + /*for(int j = 0; j < nclusters;j++) { + for(int k = 0; k < nfeatures;k++) { + if(new_centers[j][k] > 1.001 * block_new_centers[j*nfeatures + k] || new_centers[j][k] < 0.999 * block_new_centers[j*nfeatures + k]) { + printf("\t \t for %d:%d, normal value is %e and gpu reduced value id %e \n",j,k,new_centers[j][k],block_new_centers[j*nfeatures + k]); + } + } + }*/ +#endif + +#ifdef BLOCK_CENTER_REDUCE + for(int j = 0; j < nclusters;j++) { + for(int k = 0; k < nfeatures;k++) + new_centers[j][k]= block_new_centers[j*nfeatures + k]; + } +#endif + +#endif + finiTrace(); + return delta; + +} +/* ------------------- kmeansCuda() end ------------------------ */ + diff --git a/workloads/realworld/uvm_prefetch_async/kmeans/kmeans_cuda_kernel.cu b/workloads/realworld/uvm_prefetch_async/kmeans/kmeans_cuda_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..4150c36621af95fb81ffe1e965b5e08941fff234 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/kmeans/kmeans_cuda_kernel.cu @@ -0,0 +1,150 @@ +#ifndef _KMEANS_CUDA_KERNEL_H_ +#define _KMEANS_CUDA_KERNEL_H_ + +#include +#include + +#include "kmeans.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +// FIXME: Make this a runtime selectable variable! +#define ASSUMED_NR_CLUSTERS 32 + +#define SDATA(index) CUT_BANK_CHECKER(sdata, index) + +// t_features has the layout dim0[points 0-m-1]dim1[ points 0-m-1]... +texture t_features; +// t_features_flipped has the layout point0[dim 0-n-1]point1[dim 0-n-1] +texture t_features_flipped; +texture t_clusters; + +__constant__ float c_clusters[ASSUMED_NR_CLUSTERS * 34]; /* constant memory for cluster centers */ + +/* ----------------- invert_mapping() --------------------- */ +/* inverts data array from row-major to column-major. + + [p0,dim0][p0,dim1][p0,dim2] ... + [p1,dim0][p1,dim1][p1,dim2] ... + [p2,dim0][p2,dim1][p2,dim2] ... + to + [dim0,p0][dim0,p1][dim0,p2] ... + [dim1,p0][dim1,p1][dim1,p2] ... + [dim2,p0][dim2,p1][dim2,p2] ... +*/ +__global__ void invert_mapping(float *input, /* original */ + float *output, /* inverted */ + int npoints, /* npoints */ + int batch_size, + int nfeatures) /* nfeatures */ +{ + int point_id = threadIdx.x + blockDim.x * blockIdx.x; /* id of thread */ + + int batches = npoints / batch_size; + + for (int b = 0; b < batches; b++) + { + for (int i = 0; i < nfeatures; i++) + { + output[b * batch_size + point_id + npoints * i] = input[(b * batch_size + point_id) * nfeatures + i]; + } + } + + + + return; +} +/* ----------------- invert_mapping() end --------------------- */ + +/* to turn on the GPU delta and center reduction */ +// #define GPU_DELTA_REDUCTION +// #define GPU_NEW_CENTER_REDUCTION + +/* ----------------- kmeansPoint() --------------------- */ +/* find the index of nearest cluster centers and change membership*/ +__global__ void +kmeansPoint(float *features, /* in: [npoints*nfeatures] */ + int nfeatures, + int npoints, + int batch_size, + int nclusters, + int *membership) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + // block ID + const unsigned int block_id = gridDim.x * blockIdx.y + blockIdx.x; + // point/thread ID + const unsigned int point_id = block_id * blockDim.x * blockDim.y + threadIdx.x; + + __shared__ float tmp_features[PREFETCH_COUNT * THREADS_PER_DIM][THREADS_PER_DIM][16]; + + int batches = npoints / batch_size; + int fetch = 0; + int end_tile = fetch + batches; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + for (int i = 0; i < 16; i++) + { + int addr = fetch * batch_size + point_id + i * npoints; + memcpy_async(tmp_features[(fetch % PREFETCH_COUNT) * THREADS_PER_DIM + threadIdx.y][threadIdx.x][i], features[addr], pipe); + } + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + int index = -1; + + float min_dist = FLT_MAX; + float dist; /* distance square between a point to cluster center */ + + /* find the cluster center id with min distance to pt */ + for (int i = 0; i < nclusters; i++) + { + int cluster_base_index = i * nfeatures; /* base index of cluster centers for inverted array */ + float ans = 0.0; /* Euclidean distance sqaure */ + + for (int j = 0; j < nfeatures; j++) + { + // int addr = point_id + j * npoints; /* appropriate index of data point */ + // float diff = (tex1Dfetch(t_features,addr) - c_clusters[cluster_base_index + j]); /* distance between a data point to cluster centers */ + + // int addr = point_id + j * npoints; /* appropriate index of data point */ + // float diff = features[addr] - c_clusters[cluster_base_index + j]; /* distance between a data point to cluster centers */ + float diff = tmp_features[(compute % PREFETCH_COUNT) * THREADS_PER_DIM + threadIdx.y][threadIdx.x][j] - c_clusters[cluster_base_index + j]; /* distance between a data point to cluster centers */ + ans += diff * diff; /* sum of squares */ + } + dist = ans; + block.sync(); + + /* see if distance is smaller than previous ones: + if so, change minimum distance and save index of cluster center */ + if (dist < min_dist) + { + min_dist = dist; + index = i; + } + } + membership[compute * batch_size + point_id] = index; + block.sync(); + } + +} +#endif // #ifndef _KMEANS_CUDA_KERNEL_H_ diff --git a/workloads/realworld/uvm_prefetch_async/kmeans/kmeans_cuda_kernel.cu.old b/workloads/realworld/uvm_prefetch_async/kmeans/kmeans_cuda_kernel.cu.old new file mode 100755 index 0000000000000000000000000000000000000000..dd0dec27eebf197d811a58063b13a86c8f72e1ed --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/kmeans/kmeans_cuda_kernel.cu.old @@ -0,0 +1,185 @@ +#ifndef _KMEANS_CUDA_KERNEL_H_ +#define _KMEANS_CUDA_KERNEL_H_ + +#include +#include + +#include "kmeans.h" + +// FIXME: Make this a runtime selectable variable! +#define ASSUMED_NR_CLUSTERS 32 + +#define SDATA( index) CUT_BANK_CHECKER(sdata, index) + +// t_features has the layout dim0[points 0-m-1]dim1[ points 0-m-1]... +texture t_features; +// t_features_flipped has the layout point0[dim 0-n-1]point1[dim 0-n-1] +texture t_features_flipped; +texture t_clusters; + + +__constant__ float c_clusters[ASSUMED_NR_CLUSTERS*34]; /* constant memory for cluster centers */ + +/* ----------------- invert_mapping() --------------------- */ +/* inverts data array from row-major to column-major. + + [p0,dim0][p0,dim1][p0,dim2] ... + [p1,dim0][p1,dim1][p1,dim2] ... + [p2,dim0][p2,dim1][p2,dim2] ... + to + [dim0,p0][dim0,p1][dim0,p2] ... + [dim1,p0][dim1,p1][dim1,p2] ... + [dim2,p0][dim2,p1][dim2,p2] ... +*/ +__global__ void invert_mapping(float *input, /* original */ + float *output, /* inverted */ + int npoints, /* npoints */ + int nfeatures) /* nfeatures */ +{ + int point_id = threadIdx.x + blockDim.x*blockIdx.x; /* id of thread */ + int i; + + if(point_id < npoints){ + for(i=0;i 1; threadids_participating /= 2) { + if(threadIdx.x < threadids_participating) { + deltas[threadIdx.x] += deltas[threadIdx.x + threadids_participating]; + } + __syncthreads(); + } + if(threadIdx.x < 1) {deltas[threadIdx.x] += deltas[threadIdx.x + 1];} + __syncthreads(); + // propagate number of changes to global counter + if(threadIdx.x == 0) { + block_deltas[blockIdx.y * gridDim.x + blockIdx.x] = deltas[0]; + //printf("original id: %d, modified: %d\n", blockIdx.y*gridDim.x+blockIdx.x, blockIdx.x); + + } + +#endif + + +#ifdef GPU_NEW_CENTER_REDUCTION + int center_id = threadIdx.x / nfeatures; + int dim_id = threadIdx.x - nfeatures*center_id; + + __shared__ int new_center_ids[THREADS_PER_BLOCK]; + + new_center_ids[threadIdx.x] = index; + __syncthreads(); + + /*** + determine which dimension calculte the sum for + mapping of threads is + center0[dim0,dim1,dim2,...]center1[dim0,dim1,dim2,...]... + ***/ + + int new_base_index = (point_id - threadIdx.x)*nfeatures + dim_id; + float accumulator = 0.f; + + if(threadIdx.x < nfeatures * nclusters) { + // accumulate over all the elements of this threadblock + for(int i = 0; i< (THREADS_PER_BLOCK); i++) { + float val = tex1Dfetch(t_features_flipped,new_base_index+i*nfeatures); + if(new_center_ids[i] == center_id) + accumulator += val; + } + + // now store the sum for this threadblock + /*** + mapping to global array is + block0[center0[dim0,dim1,dim2,...]center1[dim0,dim1,dim2,...]...]block1[...]... + ***/ + block_clusters[(blockIdx.y*gridDim.x + blockIdx.x) * nclusters * nfeatures + threadIdx.x] = accumulator; + } +#endif + +} +#endif // #ifndef _KMEANS_CUDA_KERNEL_H_ diff --git a/workloads/realworld/uvm_prefetch_async/kmeans/rmse.c b/workloads/realworld/uvm_prefetch_async/kmeans/rmse.c new file mode 100755 index 0000000000000000000000000000000000000000..fe7786342bf77cab12958e630cb8d99834312f0d --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/kmeans/rmse.c @@ -0,0 +1,95 @@ +/*************************************************************************/ +/** File: rmse.c **/ +/** Description: calculate root mean squared error of particular **/ +/** clustering. **/ +/** Author: Sang-Ha Lee **/ +/** University of Virginia. **/ +/** **/ +/** Note: euclid_dist_2() and find_nearest_point() adopted from **/ +/** Minebench code. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include + +#include "kmeans.h" + +extern double wtime(void); + +/*----< euclid_dist_2() >----------------------------------------------------*/ +/* multi-dimensional spatial Euclid distance square */ +__inline +float euclid_dist_2(float *pt1, + float *pt2, + int numdims) +{ + int i; + float ans=0.0; + + for (i=0; i-----------------------------------------------*/ +__inline +int find_nearest_point(float *pt, /* [nfeatures] */ + int nfeatures, + float **pts, /* [npts][nfeatures] */ + int npts) +{ + int index, i; + float max_dist=FLT_MAX; + + /* find the cluster center id with min distance to pt */ + for (i=0; i-------------------------------------*/ +float rms_err (float **feature, /* [npoints][nfeatures] */ + int nfeatures, + int npoints, + float **cluster_centres, /* [nclusters][nfeatures] */ + int nclusters) +{ + int i; + int nearest_cluster_index; /* cluster center id with min distance to pt */ + float sum_euclid = 0.0; /* sum of Euclidean distance squares */ + float ret; /* return value */ + + /* calculate and sum the sqaure of euclidean distance*/ + #pragma omp parallel for \ + shared(feature,cluster_centres) \ + firstprivate(npoints,nfeatures,nclusters) \ + private(i, nearest_cluster_index) \ + schedule (static) + for (i=0; i +#endif + +#include + +#ifndef _H_TYPES +#include +#endif + +#include + +#ifndef _H_ACCESS +#include /* for the "access" function */ +#endif + +/* + * POSIX requires that certain values be included in unistd.h. It also + * requires that when _POSIX_SOURCE is defined only those standard + * specific values are present. This header includes all the POSIX + * required entries. + */ + +#ifdef _POSIX_SOURCE +#ifdef _LARGE_FILES +#define lseek lseek64 +#endif + + +/* Symbolic constants for the "lseek" function: */ +#ifndef SEEK_SET +#define SEEK_SET 0 /* Set file pointer to "offset" */ +#define SEEK_CUR 1 /* Set file pointer to current plus "offset" */ +#define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif /* SEEK_SET */ + +#ifdef _NO_PROTO + +#ifndef _KERNEL +extern int access(); +extern unsigned int alarm(); +extern int chdir(); +extern int chown(); +extern int close(); +extern char *ctermid(); +extern int dup(); +extern int dup2(); +extern int execl(); +extern int execv(); +extern int execle(); +extern int execve(); +extern int execlp(); +extern int execvp(); +extern void _exit(); +extern pid_t fork(); +extern long fpathconf(); +extern char *getcwd(); +extern gid_t getegid(); +extern uid_t geteuid(); +extern gid_t getgid(); +extern int getgroups(); +extern char *getlogin(); +extern pid_t getpgrp(); +extern pid_t getpid(); +extern pid_t getppid(); +extern uid_t getuid(); +extern int isatty(); +extern int link(); +extern off_t lseek(); +extern long pathconf(); +extern int pause(); +extern int pipe(); +#if defined(_XOPEN_SOURCE) && ( _XOPEN_SOURCE >= 500 ) +extern int pthread_atfork(); +#endif +extern int read(); +extern int rmdir(); +extern int setgid(); +extern int setpgid(); +extern int setsid(); +extern int setuid(); +extern unsigned int sleep(); +extern long sysconf(); +extern pid_t tcgetpgrp(); +extern int tcsetpgrp(); +extern char *ttyname(); +extern int unlink(); +extern int write(); +#endif /* !_KERNEL */ + +#else /* POSIX required prototypes */ + +#ifndef _KERNEL +extern int access(const char *, int); +extern unsigned int alarm(unsigned int); +extern int chdir(const char *); +extern int chown(const char *, uid_t, gid_t); +extern int close(int); +extern char *ctermid(char *); +extern int dup(int); +extern int dup2(int, int); +extern int execl(const char *, const char *, ...); +extern int execv(const char *, char *const []); +extern int execle(const char *, const char *, ...); +extern int execve(const char *, char *const [], char *const []); +extern int execlp(const char *, const char *, ...); +extern int execvp(const char *, char *const []); +extern void _exit(int); +extern pid_t fork(void); +extern long fpathconf(int, int); +extern char *getcwd(char *, size_t); +extern gid_t getegid(void); +extern uid_t geteuid(void); +extern gid_t getgid(void); +extern int getgroups(int, gid_t []); +extern char *getlogin(void); +#ifndef _BSD +extern pid_t getpgrp(void); +#endif /* _BSD */ +extern pid_t getpid(void); +extern pid_t getppid(void); +extern uid_t getuid(void); +extern int isatty(int); +extern int link(const char *, const char *); +extern off_t lseek(int, off_t, int); +#ifdef _LARGE_FILE_API +extern off64_t lseek64(int, off64_t, int); +#endif +extern long pathconf(const char *, int); +extern int pause(void); +extern int pipe(int []); +#if defined(_XOPEN_SOURCE) && ( _XOPEN_SOURCE >= 500 ) +extern int pthread_atfork(void (*)(void), void (*)(void), void (*)(void)); +#endif +extern ssize_t read(int, void *, size_t); +extern int rmdir(const char *); +extern int setgid(gid_t); +extern int setpgid(pid_t, pid_t); +extern pid_t setsid(void); +extern int setuid(uid_t); +extern unsigned int sleep(unsigned int); +extern long sysconf(int); +extern pid_t tcgetpgrp(int); +extern int tcsetpgrp(int, pid_t); +extern char *ttyname(int); +extern int unlink(const char *); +extern ssize_t write(int, const void *, size_t); +#endif /* !_KERNEL */ +#endif /* !_NO_PROTO */ + +#define STDIN_FILENO 0 +#define STDOUT_FILENO 1 +#define STDERR_FILENO 2 + +#define _POSIX_JOB_CONTROL 1 +#define _POSIX_SAVED_IDS 1 + +#define _POSIX_VERSION 200112L +#define _POSIX2_VERSION 200112L +#define _POSIX2_C_VERSION 200112L + + +#ifdef _XOPEN_SOURCE + +#define _XOPEN_VERSION 600 +#define _XOPEN_XCU_VERSION 4 +#define _XOPEN_XPG3 1 +#define _XOPEN_XPG4 1 +#define _XOPEN_UNIX 1 + +#define _XOPEN_REALTIME (-1) +#define _XOPEN_REALTIME_THREADS (-1) + +#if (_XOPEN_SOURCE >= 600) +#define _XOPEN_STREAMS 1 +#endif + +#define _XBS5_ILP32_OFF32 1 +#define _XBS5_ILP32_OFFBIG 1 +#define _XBS5_LP64_OFF64 1 +#define _XBS5_LPBIG_OFFBIG 1 + +#define _POSIX2_C_BIND 200112L +#define _POSIX2_C_DEV 200112L +#define _POSIX2_CHAR_TERM 1 +#define _POSIX2_LOCALEDEF 200112L +#define _POSIX2_UPE 200112L +#define _POSIX2_FORT_DEV (-1) +#define _POSIX2_FORT_RUN (-1) +#define _POSIX2_SW_DEV (-1) + +#if (_POSIX_C_SOURCE >= 200112L) +#define _POSIX_REGEXP 1 +#define _POSIX_SHELL 1 +#define _POSIX2_PBS (-1) +#define _POSIX2_PBS_ACCOUNTING (-1) +#define _POSIX2_PBS_CHECKPOINT (-1) +#define _POSIX2_PBS_LOCATE (-1) +#define _POSIX2_PBS_MESSAGE (-1) +#define _POSIX2_PBS_TRACK (-1) +#define _V6_ILP32_OFF32 1 +#define _V6_ILP32_OFFBIG 1 +#define _V6_LP64_OFF64 1 +#define _V6_LPBIG_OFFBIG 1 + +#define _POSIX_ADVISORY_INFO 200112L +#define _POSIX_BARRIERS 200112L +#define _POSIX_CLOCK_SELECTION 200112L +#define _POSIX_CPUTIME 200112L +#define _POSIX_MONOTONIC_CLOCK 200112L + +#ifdef _POSIX_RAW_SOCKETS +#undef _POSIX_RAW_SOCKETS +#endif + +#define _POSIX_SPAWN 200112L +#define _POSIX_SPIN_LOCKS 200112L +#define _POSIX_SPORADIC_SERVER (-1) +#define _POSIX_THREAD_CPUTIME 200112L +#define _POSIX_THREAD_SPORADIC_SERVER (-1) +#define _POSIX_TIMEOUTS 200112L +#define _POSIX_TRACE (-1) +#define _POSIX_TRACE_EVENT_FILTER (-1) +#define _POSIX_TRACE_INHERIT (-1) +#define _POSIX_TRACE_LOG (-1) +#define _POSIX_TYPED_MEMORY_OBJECTS (-1) + +#endif /* _POSIX_C_SOURCE >= 200112L */ + +#define _XOPEN_CRYPT 1 +#define _XOPEN_SHM 1 +#define _XOPEN_ENH_I18N 1 +#define _XOPEN_LEGACY (-1) +#ifndef __64BIT__ +#define _UNIX_ABI (-1) +#define _UNIX_ABI_IA64 (-1) +#define _UNIX_ABI_BIG_ENDIAN (-1) +#define _UNIX_ABI_LITTLE_ENDIAN (-1) +#endif /* __64BIT__ */ + +extern char *optarg; +extern int optind, opterr, optopt; + +#ifdef _NO_PROTO + extern size_t confstr(); + extern char *crypt(); + extern void encrypt(); + extern int fsync(); + extern int getopt(); + extern int nice(); + extern void swab(); +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern char *getpass(); + extern int chroot(); +#endif +#else + extern size_t confstr(int, char*, size_t); + extern char *crypt(const char *, const char *); + extern void encrypt(char *, int); + extern int fsync(int); + extern int getopt(int, char* const*, const char*); + extern int nice(int); + extern void swab(const void *, void *, ssize_t); + extern int fdatasync(int); +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern char *getpass(const char *); + extern int chroot(const char *); +#endif +#endif + +#endif /* _XOPEN _SOURCE */ + +/* Threads options for 1003.1c and XPG UNIX98 */ +#define _POSIX_THREADS 200112L +#define _POSIX_THREAD_ATTR_STACKADDR 200112L +#define _POSIX_THREAD_ATTR_STACKSIZE 200112L +#define _POSIX_THREAD_PROCESS_SHARED 200112L +#define _POSIX_THREAD_SAFE_FUNCTIONS 200112L +#ifdef _ALL_SOURCE +#define _POSIX_REENTRANT_FUNCTIONS _POSIX_THREAD_SAFE_FUNCTIONS +#endif + +/* Realtime threads options for 1003.1c and XPG UNIX98 */ +#define _POSIX_THREAD_PRIORITY_SCHEDULING (-1) +#define _POSIX_THREAD_PRIO_INHERIT (-1) +#define _POSIX_THREAD_PRIO_PROTECT (-1) + +#undef _POSIX_THREAD_FORKALL + +/* Realtime options for 1003.1c and XPG UNIX98 */ +#define _POSIX_ASYNCHRONOUS_IO 200112L +#define _POSIX_FSYNC 200112L +#define _POSIX_MAPPED_FILES 200112L +#define _POSIX_MEMLOCK 200112L +#define _POSIX_MEMLOCK_RANGE 200112L +#define _POSIX_MEMORY_PROTECTION 200112L +#define _POSIX_MESSAGE_PASSING 200112L +#define _POSIX_PRIORITIZED_IO 200112L +#define _POSIX_PRIORITY_SCHEDULING 200112L +#define _POSIX_REALTIME_SIGNALS 200112L +#define _POSIX_SEMAPHORES 200112L +#define _POSIX_SHARED_MEMORY_OBJECTS 200112L +#define _POSIX_SYNCHRONIZED_IO 200112L +#define _POSIX_TIMERS 200112L + +#define _POSIX_ASYNC_IO (-1) +#undef _POSIX_SYNC_IO +#define _POSIX_PRIO_IO (-1) + +#define _POSIX_CHOWN_RESTRICTED 0 +#define _POSIX_VDISABLE 0xFF +#define _POSIX_NO_TRUNC 0 + + /* UNIX03 and POSIX01 */ + /* Always enabled */ +#define _POSIX_IPV6 200112L +#define _POSIX_RAW_SOCKETS 200112L + + +#ifndef NULL +#define NULL 0 +#endif + +#if (_POSIX_C_SOURCE >= 200112L) +#define _POSIX_READER_WRITER_LOCKS 200112L +#endif + +/* arguments for the confstr() function */ + +#define _CS_PATH 1 + + /* compile,link,lib,lint flags for 32bit, no_LARGE_FILES system */ +#define _CS_XBS5_ILP32_OFF32_CFLAGS 2 +#define _CS_XBS5_ILP32_OFF32_LDFLAGS 3 +#define _CS_XBS5_ILP32_OFF32_LIBS 4 +#define _CS_XBS5_ILP32_OFF32_LINTFLAGS 5 + + /* compile,link,lib,lint flags for 32bit, _LARGE_FILES system */ +#define _CS_XBS5_ILP32_OFFBIG_CFLAGS 6 +#define _CS_XBS5_ILP32_OFFBIG_LDFLAGS 7 +#define _CS_XBS5_ILP32_OFFBIG_LIBS 8 +#define _CS_XBS5_ILP32_OFFBIG_LINTFLAGS 9 + + /* compile,link,lib,lint flags for LP64 64bit system */ +#define _CS_XBS5_LP64_OFF64_CFLAGS 10 +#define _CS_XBS5_LP64_OFF64_LDFLAGS 11 +#define _CS_XBS5_LP64_OFF64_LIBS 12 +#define _CS_XBS5_LP64_OFF64_LINTFLAGS 13 + + /* compile,link,lib,lint flags for ILP64 64bit system */ + /* AIX does not currently support this */ +#define _CS_XBS5_LPBIG_OFFBIG_CFLAGS 14 +#define _CS_XBS5_LPBIG_OFFBIG_LDFLAGS 15 +#define _CS_XBS5_LPBIG_OFFBIG_LIBS 16 +#define _CS_XBS5_LPBIG_OFFBIG_LINTFLAGS 17 + +#define _CS_AIX_BOOTDEV 24 +#define _CS_AIX_MODEL_CODE 25 +#define _CS_AIX_ARCHITECTURE 26 +#define _CS_AIX_MODEL_CLASS 40 + +#if (_POSIX_C_SOURCE >= 200112L) +#define _CS_POSIX_V6_ILP32_OFF32_CFLAGS 27 +#define _CS_POSIX_V6_ILP32_OFF32_LDFLAGS 28 +#define _CS_POSIX_V6_ILP32_OFF32_LIBS 29 +#define _CS_POSIX_V6_ILP32_OFFBIG_CFLAGS 30 +#define _CS_POSIX_V6_ILP32_OFFBIG_LDFLAGS 31 +#define _CS_POSIX_V6_ILP32_OFFBIG_LIBS 32 +#define _CS_POSIX_V6_LP64_OFF64_CFLAGS 33 +#define _CS_POSIX_V6_LP64_OFF64_LDFLAGS 34 +#define _CS_POSIX_V6_LP64_OFF64_LIBS 35 +#define _CS_POSIX_V6_LPBIG_OFFBIG_CFLAGS 36 +#define _CS_POSIX_V6_LPBIG_OFFBIG_LDFLAGS 37 +#define _CS_POSIX_V6_LPBIG_OFFBIG_LIBS 38 +#define _CS_POSIX_V6_WIDTH_RESTRICTED_ENVS 39 +#endif + + /* Values for the above */ +#define _CSPATH "/usr/bin:/usr/vac/bin" + + /* ILP32_OFF32 */ +#define _CSPOSIX_V6_ILP32_OFF32_CFLAGS "-q32" +#define _CSXBS5_ILP32_OFF32_CFLAGS _CSPOSIX_V6_ILP32_OFF32_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_ILP32_OFF32_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_ILP32_OFF32_LDFLAGS "-b32" +#define _CSXBS5_ILP32_OFF32_LDFLAGS _CSPOSIX_V6_ILP32_OFF32_LDFLAGS +#endif + +#define _CSPOSIX_V6_ILP32_OFF32_LIBS "-lc -lpthread -lm" +#define _CSXBS5_ILP32_OFF32_LIBS _CSPOSIX_V6_ILP32_OFF32_LIBS + +#define _CSXBS5_ILP32_OFF32_LINTFLAGS "" + + /* ILP32_OFFOFFBIG */ +#define _CSPOSIX_V6_ILP32_OFFBIG_CFLAGS "-q32 -D_LARGE_FILES -qlonglong" +#define _CSXBS5_ILP32_OFFBIG_CFLAGS _CSPOSIX_V6_ILP32_OFFBIG_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_ILP32_OFFBIG_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_ILP32_OFFBIG_LDFLAGS "-b32" +#define _CSXBS5_ILP32_OFFBIG_LDFLAGS _CSPOSIX_V6_ILP32_OFFBIG_LDFLAGS +#endif + +#define _CSPOSIX_V6_ILP32_OFFBIG_LIBS "-lc -lpthread -lm" +#define _CSXBS5_ILP32_OFFBIG_LIBS _CSPOSIX_V6_ILP32_OFFBIG_LIBS + +#define _CSXBS5_ILP32_OFFBIG_LINTFLAGS "-D_LARGE_FILES -qlonglong" + + /* LP64_OFF64 */ +#define _CSPOSIX_V6_LP64_OFF64_CFLAGS "-q64" +#define _CSXBS5_LP64_OFF64_CFLAGS _CSPOSIX_V6_LP64_OFF64_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_LP64_OFF64_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_LP64_OFF64_LDFLAGS "-b64" +#define _CSXBS5_LP64_OFF64_LDFLAGS _CSPOSIX_V6_LP64_OFF64_LDFLAGS +#endif + +#define _CSPOSIX_V6_LP64_OFF64_LIBS "-lc -lpthread -lm" +#define _CSXBS5_LP64_OFF64_LIBS _CSPOSIX_V6_LP64_OFF64_LIBS + +#define _CSXBS5_LP64_OFF64_LINTFLAGS "-D__64BIT__" + + /* LPBIG_OFFBIG */ +#define _CSPOSIX_V6_LPBIG_OFFBIG_CFLAGS "-q64" +#define _CSXBS5_LPBIG_OFFBIG_CFLAGS _CSPOSIX_V6_LPBIG_OFFBIG_CFLAGS + +#ifdef __ia64 +#define _CSXBS5_LPBIG_OFFBIG_LDFLAGS "" +#else /* POWER */ +#define _CSPOSIX_V6_LPBIG_OFFBIG_LDFLAGS "-b64" +#define _CSXBS5_LPBIG_OFFBIG_LDFLAGS _CSPOSIX_V6_LPBIG_OFFBIG_LDFLAGS +#endif + +#define _CSPOSIX_V6_LPBIG_OFFBIG_LIBS "-lc -lpthread -lm" +#define _CSXBS5_LPBIG_OFFBIG_LIBS _CSPOSIX_V6_LPBIG_OFFBIG_LIBS + +#define _CSXBS5_LPBIG_OFFBIG_LINTFLAGS "-D__64BIT__" + +#if (_POSIX_C_SOURCE >= 200112L) +#define _CSPOSIX_V6_WIDTH_RESTRICTED_ENVS \ + "POSIX_V6_ILP32_OFF32\n" \ + "POSIX_V6_ILP32_OFFBIG\n" \ + "POSIX_V6_LP64_OFF64\n" \ + "POSIX_V6_LPBIG_OFFBIG" +#endif + +/* arguments for the pathconf() function */ + +#define _PC_CHOWN_RESTRICTED 10 +#define _PC_LINK_MAX 11 +#define _PC_MAX_CANON 12 +#define _PC_MAX_INPUT 13 +#define _PC_NAME_MAX 14 +#define _PC_NO_TRUNC 15 +#define _PC_PATH_MAX 16 +#define _PC_PIPE_BUF 17 +#define _PC_VDISABLE 18 +#define _PC_ASYNC_IO 19 +#define _PC_SYNC_IO 20 +#define _PC_PRIO_IO 21 +#define _PC_FILESIZEBITS 22 /* # bits needed to hold offset */ +#define _PC_AIX_DISK_PARTITION 23 +#define _PC_AIX_DISK_SIZE 24 +#if (_POSIX_C_SOURCE >= 200112L) +#define _PC_SYMLINK_MAX 25 +#define _PC_ALLOC_SIZE_MIN 26 +#define _PC_REC_INCR_XFER_SIZE 27 +#define _PC_REC_MAX_XFER_SIZE 28 +#define _PC_REC_MIN_XFER_SIZE 29 +#define _PC_REC_XFER_ALIGN 30 +#define _PC_2_SYMLINKS 31 +#endif + +/* arguments for the sysconf() function, the defined numbers are used as + * array index in sysconf(). + * + * POSIX.1(1990), Table 4-2 + */ +#define _SC_ARG_MAX 0 +#define _SC_CHILD_MAX 1 +#define _SC_CLK_TCK 2 +#define _SC_NGROUPS_MAX 3 +#define _SC_OPEN_MAX 4 +#define _SC_STREAM_MAX 5 +#define _SC_TZNAME_MAX 6 +#define _SC_JOB_CONTROL 7 +#define _SC_SAVED_IDS 8 +#define _SC_VERSION 9 + +/* POSIX.1(1990), Table 2-3, required by command getconf */ + +#define _SC_POSIX_ARG_MAX 10 +#define _SC_POSIX_CHILD_MAX 11 +#define _SC_POSIX_LINK_MAX 12 +#define _SC_POSIX_MAX_CANON 13 +#define _SC_POSIX_MAX_INPUT 14 +#define _SC_POSIX_NAME_MAX 15 +#define _SC_POSIX_NGROUPS_MAX 16 +#define _SC_POSIX_OPEN_MAX 17 +#define _SC_POSIX_PATH_MAX 18 +#define _SC_POSIX_PIPE_BUF 19 +#define _SC_POSIX_SSIZE_MAX 20 +#define _SC_POSIX_STREAM_MAX 21 +#define _SC_POSIX_TZNAME_MAX 22 + +/* POSIX.2 (Draft 10), Table 41) */ + +#define _SC_BC_BASE_MAX 23 +#define _SC_BC_DIM_MAX 24 +#define _SC_BC_SCALE_MAX 25 +#define _SC_BC_STRING_MAX 26 +#define _SC_EQUIV_CLASS_MAX 27 +#define _SC_EXPR_NEST_MAX 28 +#define _SC_LINE_MAX 29 +#define _SC_RE_DUP_MAX 30 +#define _SC_2_VERSION 31 +#define _SC_2_C_DEV 32 +#define _SC_2_FORT_DEV 33 +#define _SC_2_FORT_RUN 34 +#define _SC_2_LOCALEDEF 35 +#define _SC_2_SW_DEV 36 + +/* POSIX.2 (Draft 10), Table 13) */ + +#define _SC_POSIX2_BC_BASE_MAX 37 +#define _SC_POSIX2_BC_DIM_MAX 38 +#define _SC_POSIX2_BC_SCALE_MAX 39 +#define _SC_POSIX2_BC_STRING_MAX 40 +#define _SC_POSIX2_EQUIV_CLASS_MAX 41 +#define _SC_POSIX2_EXPR_NEST_MAX 42 +#define _SC_POSIX2_LINE_MAX 43 +#define _SC_POSIX2_RE_DUP_MAX 44 +#define _SC_PASS_MAX 45 +#define _SC_XOPEN_VERSION 46 +#define _SC_ATEXIT_MAX 47 +#if _XOPEN_SOURCE_EXTENDED==1 +#define _SC_PAGE_SIZE 48 +#endif /* _XOPEN_SOURCE_EXTENDED */ +#define _SC_AES_OS_VERSION 49 +#define _SC_COLL_WEIGHTS_MAX 50 +#define _SC_2_C_BIND 51 +#define _SC_2_C_VERSION 52 +#define _SC_2_UPE 53 +#define _SC_2_CHAR_TERM 54 +#define _SC_XOPEN_SHM 55 +#define _SC_XOPEN_CRYPT 56 +#define _SC_XOPEN_ENH_I18N 57 +#if _XOPEN_SOURCE_EXTENDED==1 +#define _SC_PAGESIZE _SC_PAGE_SIZE +#define _SC_IOV_MAX 58 +#endif /* _XOPEN_SOURCE_EXTENDED */ +#define _SC_THREAD_SAFE_FUNCTIONS 59 +#define _SC_THREADS 60 +#define _SC_THREAD_ATTR_STACKADDR 61 +#define _SC_THREAD_ATTR_STACKSIZE 62 +#define _SC_THREAD_FORKALL 63 +#define _SC_THREAD_PRIORITY_SCHEDULING 64 +#define _SC_THREAD_PRIO_INHERIT 65 +#define _SC_THREAD_PRIO_PROTECT 66 +#define _SC_THREAD_PROCESS_SHARED 67 +#define _SC_THREAD_KEYS_MAX 68 +#define _SC_THREAD_DATAKEYS_MAX _SC_THREAD_KEYS_MAX +#define _SC_THREAD_STACK_MIN 69 +#define _SC_THREAD_THREADS_MAX 70 +#ifdef _ALL_SOURCE +#define _SC_NPROCESSORS_CONF 71 +#define _SC_NPROCESSORS_ONLN 72 +#endif /* _ALL_SOURCE */ +#define _SC_XOPEN_UNIX 73 + +#if (_XOPEN_SOURCE >= 500) + +/* POSIX 1003.1c and XPG UNIX98 */ +/* look to defines above for meanings */ +#define _SC_AIO_LISTIO_MAX 75 +#define _SC_AIO_MAX 76 +#define _SC_AIO_PRIO_DELTA_MAX 77 +#define _SC_ASYNCHRONOUS_IO 78 +#define _SC_DELAYTIMER_MAX 79 +#define _SC_FSYNC 80 +#define _SC_GETGR_R_SIZE_MAX 81 +#define _SC_GETPW_R_SIZE_MAX 82 +#define _SC_LOGIN_NAME_MAX 83 +#define _SC_MAPPED_FILES 84 +#define _SC_MEMLOCK 85 +#define _SC_MEMLOCK_RANGE 86 +#define _SC_MEMORY_PROTECTION 87 +#define _SC_MESSAGE_PASSING 88 +#define _SC_MQ_OPEN_MAX 89 +#define _SC_MQ_PRIO_MAX 90 +#define _SC_PRIORITIZED_IO 91 +#define _SC_PRIORITY_SCHEDULING 92 +#define _SC_REALTIME_SIGNALS 93 +#define _SC_RTSIG_MAX 94 +#define _SC_SEMAPHORES 95 +#define _SC_SEM_NSEMS_MAX 96 +#define _SC_SEM_VALUE_MAX 97 +#define _SC_SHARED_MEMORY_OBJECTS 98 +#define _SC_SIGQUEUE_MAX 99 +#define _SC_SYNCHRONIZED_IO 100 +#define _SC_THREAD_DESTRUCTOR_ITERATIONS 101 +#define _SC_TIMERS 102 +#define _SC_TIMER_MAX 103 +#define _SC_TTY_NAME_MAX 104 +#define _SC_XBS5_ILP32_OFF32 105 +#define _SC_XBS5_ILP32_OFFBIG 106 +#define _SC_XBS5_LP64_OFF64 107 +#define _SC_XBS5_LPBIG_OFFBIG 108 +#define _SC_XOPEN_XCU_VERSION 109 +#define _SC_XOPEN_REALTIME 110 +#define _SC_XOPEN_REALTIME_THREADS 111 +#define _SC_XOPEN_LEGACY 112 +#endif /* _XOPEN_SOURCE >= 500 */ + +#ifdef _ALL_SOURCE +#define _SC_REENTRANT_FUNCTIONS _SC_THREAD_SAFE_FUNCTIONS +#define _SC_PHYS_PAGES 113 +#define _SC_AVPHYS_PAGES 114 +#define _SC_LPAR_ENABLED 115 +#define _SC_LARGE_PAGESIZE 116 +#endif /* _ALL_SOURCE */ + +#define _SC_AIX_KERNEL_BITMODE 117 +#define _SC_AIX_REALMEM 118 +#define _SC_AIX_HARDWARE_BITMODE 119 +#define _SC_AIX_MP_CAPABLE 120 + +#define _SC_V6_ILP32_OFF32 121 +#define _SC_V6_ILP32_OFFBIG 122 +#define _SC_V6_LP64_OFF64 123 +#define _SC_V6_LPBIG_OFFBIG 124 + +#define _SC_XOPEN_STREAMS 125 + +#if (_POSIX_C_SOURCE >= 200112L) +#define _SC_HOST_NAME_MAX 126 +#define _SC_REGEXP 127 +#define _SC_SHELL 128 +#define _SC_SYMLOOP_MAX 129 +#define _SC_ADVISORY_INFO 130 +#define _SC_FILE_LOCKING 131 +#define _SC_2_PBS 132 +#define _SC_2_PBS_ACCOUNTING 133 +#define _SC_2_PBS_CHECKPOINT 134 +#define _SC_2_PBS_LOCATE 135 +#define _SC_2_PBS_MESSAGE 136 +#define _SC_2_PBS_TRACK 137 +#define _SC_BARRIERS 138 +#define _SC_CLOCK_SELECTION 139 +#define _SC_CPUTIME 140 +#define _SC_MONOTONIC_CLOCK 141 +#define _SC_READER_WRITER_LOCKS 142 +#define _SC_SPAWN 143 +#define _SC_SPIN_LOCKS 144 +#define _SC_SPORADIC_SERVER 145 +#define _SC_THREAD_CPUTIME 146 +#define _SC_THREAD_SPORADIC_SERVER 147 +#define _SC_TIMEOUTS 148 +#define _SC_TRACE 149 +#define _SC_TRACE_EVENT_FILTER 150 +#define _SC_TRACE_INHERIT 151 +#define _SC_TRACE_LOG 152 +#define _SC_TYPED_MEMORY_OBJECTS 153 +#define _SC_IPV6 154 +#define _SC_RAW_SOCKETS 155 +#define _SC_SS_REPL_MAX 156 +#define _SC_TRACE_EVENT_NAME_MAX 157 +#define _SC_TRACE_NAME_MAX 158 +#define _SC_TRACE_SYS_MAX 159 +#define _SC_TRACE_USER_EVENT_MAX 160 +#endif /* _POSIX_C_SOURCE >= 200112L */ + +#ifdef _ALL_SOURCE +#define _SC_AIX_UKEYS 161 +#endif /* _ALL_SOURCE */ + +#endif /* _POSIX_SOURCE */ + + +#if _XOPEN_SOURCE_EXTENDED==1 +#ifdef _LARGE_FILES +#define ftruncate ftruncate64 +#define truncate truncate64 +#endif + +#ifndef _H_LOCKF +#include /* lockf definitions for portability */ +#endif + +#ifdef _NO_PROTO +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern int brk(); + extern int getpagesize(); +#ifndef _MSGQSUPPORT + extern int __fd_getdtablesize(); + static int getdtablesize() + { + return __fd_getdtablesize(); + } +#else + extern int getdtablesize(); +#endif /* _MSGQSUPPORT */ + + extern void *sbrk(); +#endif /* _POSIX_C_SOURCE<200112L */ + extern int fchdir(); + extern int fchown(); + extern int ftruncate(); + extern long gethostid(); + extern int gethostname(); + extern pid_t getpgid(); + extern pid_t getsid(); + extern char *getwd(); + extern int lchown(); + extern int readlink(); + extern pid_t setpgrp(); + extern int setregid(); + extern int setreuid(); + extern int symlink(); + extern void sync(); + extern int truncate(); + extern useconds_t ualarm(); + extern int usleep(); + extern pid_t vfork(); +#else /* _NO_PROTO */ +#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE<200112L) || defined(_ALL_SOURCE) + extern int brk(void *); + extern int getpagesize(void); +#ifndef _MSGQSUPPORT + extern int __fd_getdtablesize(void); + static int getdtablesize() + { + return __fd_getdtablesize(); + } +#else + extern int getdtablesize(void); +#endif /* _MSGQSUPPORT */ +#ifdef _LINUX_SOURCE_COMPAT + extern void *sbrk(ptrdiff_t); +#elif (_XOPEN_SOURCE >= 500) || defined(__64BIT__) + extern void *sbrk(intptr_t); +#else + extern void *sbrk(int); +#endif +#endif /* _POSIX_C_SOURCE<200112L */ + extern int fchdir(int); + extern int fchown(int, uid_t, gid_t); + extern int ftruncate(int, off_t); +#ifdef _LARGE_FILE_API + extern int ftruncate64(int, off64_t); +#endif + extern int gethostname(char *, size_t); + extern long gethostid(void); + extern pid_t getpgid(pid_t); + extern pid_t getsid(pid_t); + extern char *getwd(char *); + extern int lchown(const char *, uid_t, gid_t); + +#if (defined(_SUSV3_READLINK) || \ + (!defined(_ALL_SOURCE) && (_POSIX_C_SOURCE >= 200112L))) + /* If SUSV3 readlink specifically requested or if strict SUSv3 + * environment requested */ +#ifdef __64BIT__ +static ssize_t readlink(const char *__restrict__ __path, + char *__restrict__ __buf, size_t __bufsize) +{ + extern ssize_t __readlink64(const char *__restrict__, char *__restrict__, size_t); + return __readlink64(__path, __buf, __bufsize); +} +#else + extern ssize_t readlink(const char *__restrict__, char *__restrict__, size_t); +#endif /* __64BIT__ */ +#else + extern int readlink(const char *, char *, size_t); +#endif /* _SUSV3_READLINK || !_ALL_SOURCE && _POSIX_C_SOURCE >= 200112L */ + +#ifndef _BSD + extern pid_t setpgrp(void); +#endif /* _BSD */ + extern int setregid(gid_t, gid_t); + extern int setreuid(uid_t, uid_t); + extern int symlink(const char *, const char *); + extern void sync(void); + extern int truncate(const char *, off_t); +#ifdef _LARGE_FILE_API + extern int truncate64(const char *, off64_t); +#endif + extern useconds_t ualarm(useconds_t, useconds_t); + extern int usleep(useconds_t); + extern pid_t vfork(void); +#if _XOPEN_SOURCE>=500 + extern int getlogin_r(char *, size_t); + extern int ttyname_r(int, char *, size_t); + +#ifdef _LARGE_FILES +#define pread pread64 +#define pwrite pwrite64 +#endif /* _LARGE_FILES */ + + extern ssize_t pread(int, void *, size_t, off_t); + extern ssize_t pwrite(int, const void *, size_t, off_t); +#ifdef _LARGE_FILE_API + extern ssize_t pread64(int, void *, size_t, off64_t); + extern ssize_t pwrite64(int, const void *, size_t, off64_t); +#endif /* _LARGE_FILE_API */ +#endif /* _XOPEN_SOURCE>=500 */ + +#endif /* _NO_PROTO */ + +#endif /* _XOPEN_SOURCE_EXTENDED */ + +#ifdef _ALL_SOURCE + +extern char **environ; + +#ifndef _KERNEL +#ifdef _NO_PROTO + extern pid_t f_fork(); +#else /* _NO_PROTO */ + extern pid_t f_fork(void); +#endif /* _NO_PROTO */ +#endif /* _KERNEL */ + +#ifdef _NO_PROTO + extern char * cuserid(); + extern int ioctl(); +#ifdef __64BIT__ + extern int ioctlx(); + extern int ioctl32(); + extern int ioctl32x(); +#endif /* __64BIT__ */ + extern int readx(); + extern int setgroups(); + extern int writex(); + extern int setegid(); + extern int seteuid(); + extern int setrgid(); + extern int setruid(); + extern offset_t llseek(); + extern char * getusershell(); + extern void setusershell(); + extern void endusershell(); + extern char * get_current_dir_name(); + extern int sysfs(); +#else + extern char * cuserid(char *); + extern int setegid(gid_t); + extern int seteuid(uid_t); + extern int setrgid(gid_t); + extern int setruid(uid_t); +#ifndef _BSD + extern int ioctl(int, int, ...); +#endif /* _BSD */ +#ifdef __64BIT__ + extern int ioctlx(int, int, void *, long); + extern int ioctl32(int, int, ...); + extern int ioctl32x(int, int, unsigned int, unsigned int); +#endif /* __64BIT__ */ + extern int setgroups(int, gid_t []); +#ifndef _KERNEL + extern int readx(int, char*, unsigned, long); + extern int writex(int, char*, unsigned, long); + +#ifdef _LARGE_FILES +#define fclear fclear64 +#define fsync_range fsync_range64 +#endif + extern off_t fclear(int, off_t); + extern int fsync_range(int, int, off_t, off_t); +#ifdef _LARGE_FILE_API + extern off64_t fclear64(int, off64_t); + extern int fsync_range64(int, int, off64_t, off64_t); +#endif + extern offset_t llseek(int, offset_t, int); + extern char * getusershell(void); + extern void setusershell(void); + extern void endusershell(void); + extern char * get_current_dir_name(void); + extern int sysfs(int, ...); + extern int finfo(const char *, int, void *, int32long64_t); + extern int ffinfo(int, int, void *, int32long64_t); + +#endif /* ndef _KERNEL */ + +#endif /* _NO_PROTO */ + +#define _AES_OS_VERSION 1 /* OSF, AES version */ + +#endif /* _ALL_SOURCE */ + +#ifdef __cplusplus +} +#endif + +#endif /* _H_UNISTD */ diff --git a/workloads/realworld/uvm_prefetch_async/knn/Makefile b/workloads/realworld/uvm_prefetch_async/knn/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..6ebd397ea3d2a2082eb070de41b7f1eb452dbf53 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/knn/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := knn +CUFILES := knn_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o knn + diff --git a/workloads/realworld/uvm_prefetch_async/knn/knn_cuda.cu b/workloads/realworld/uvm_prefetch_async/knn/knn_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..c3dc2e3d445be2dbc9c5e00db93931102b53cbff --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/knn/knn_cuda.cu @@ -0,0 +1,630 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +//-----------------------------------------------------------------------------------------------// +// KERNELS // +//-----------------------------------------------------------------------------------------------// +__global__ void extract_with_interpolation(int nthreads, float *data, + float *n_xy_coords, + float *extracted_data, + int n_max_coord, int channels, + int height, int width) { + + int x0, x1, y0, y1, nc; + float wx0, wx1, wy0, wy1; + int n, nd; + float x, y; + + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); + index += blockDim.x * gridDim.x) { + n = (index / n_max_coord); + nd = n * n_max_coord * channels; + x = n_xy_coords[index * 2]; + y = n_xy_coords[index * 2 + 1]; + + x0 = static_cast(floor(x)); + x1 = x0 + 1; + y0 = static_cast(floor(y)); + y1 = y0 + 1; + + x0 = x0 <= 0 ? 0 : (x0 >= (width - 1) ? (width - 1) : x0); + y0 = y0 <= 0 ? 0 : (y0 >= (height - 1) ? (height - 1) : y0); + x1 = x1 <= 0 ? 0 : (x1 >= (width - 1) ? (width - 1) : x1); + y1 = y1 <= 0 ? 0 : (y1 >= (height - 1) ? (height - 1) : y1); + + wx0 = static_cast(x1) - x; + wx1 = x - x0; + wy0 = static_cast(y1) - y; + wy1 = y - y0; + + if (x0 == x1) { + wx0 = 1; + wx1 = 0; + } + if (y0 == y1) { + wy0 = 1; + wy1 = 0; + } + for (int c = 0; c < channels; c++) { + nc = (n * channels + c) * height; + // extracted_data[index * channels + c] = wy0 * wx0 * data[(nc + y0) * + // width + x0] + // extracted_data[nd + index % n_max_coord + n_max_coord * c] = index; + extracted_data[nd + index % n_max_coord + n_max_coord * c] = + wy0 * wx0 * data[(nc + y0) * width + x0] + + wy1 * wx0 * data[(nc + y1) * width + x0] + + wy0 * wx1 * data[(nc + y0) * width + x1] + + wy1 * wx1 * data[(nc + y1) * width + x1]; + } + } +} + +/** + * Computes the distance between two matrix A (reference points) and + * B (query points) containing respectively wA and wB points. + * + * @param A pointer on the matrix A + * @param wA width of the matrix A = number of points in A + * @param B pointer on the matrix B + * @param wB width of the matrix B = number of points in B + * @param dim dimension of points = height of matrices A and B + * @param AB pointer on the matrix containing the wA*wB distances computed + */ +__global__ void cuComputeDistanceGlobal(float *A, int wA, float *B, int wB, + int dim, float *AB) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + // Declaration of the shared memory arrays As and Bs used to store the + // sub-matrix of A and B + __shared__ float shared_A[PREFETCH_COUNT][BLOCK_DIM][BLOCK_DIM]; + __shared__ float shared_B[PREFETCH_COUNT][BLOCK_DIM][BLOCK_DIM]; + + // Sub-matrix of A (begin, step, end) and Sub-matrix of B (begin, step) + __shared__ int begin_A; + __shared__ int begin_B; + __shared__ int step_A; + __shared__ int step_B; + __shared__ int end_A; + + // Thread index + int tx = threadIdx.x; + int ty = threadIdx.y; + + // Other variables + float tmp; + float ssd = 0; + + // Loop parameters + begin_A = BLOCK_DIM * blockIdx.y; + begin_B = BLOCK_DIM * blockIdx.x; + step_A = BLOCK_DIM * wA; + step_B = BLOCK_DIM * wB; + end_A = begin_A + (dim - 1) * wA; + + // if (blockIdx.x == 0 && blockIdx.y == 0 && tx == 0 && ty == 0) + // printf("begin_A is %d, end_A is %d, step_A is %d, begin_B is %d, step_B is %d\n", begin_A, end_A, step_A, begin_B, step_B); + + // Conditions + int cond0 = (begin_A + tx < wA); // used to write in shared memory + int cond1 = (begin_B + tx < wB); // used to write in shared memory & to + // computations and to write in output matrix + int cond2 = + (begin_A + ty < wA); // used to computations and to write in output matrix + // Loop over all the sub-matrices of A and B required to compute the block + // sub-matrix + + // int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int fetch = 0; + int end_tile = fetch + (end_A - begin_A + 1) / step_A; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + int a = begin_A + step_A * fetch; + int b = begin_B + step_B * fetch; + if (a / wA + ty < dim) + { + // shared_A[fetch % PREFETCH_COUNT][ty][tx] = (cond0) ? A[a + wA * ty + tx] : 0; + // shared_B[fetch % PREFETCH_COUNT][ty][tx] = (cond1) ? B[b + wB * ty + tx] : 0; + if (cond0) + memcpy_async(shared_A[fetch % PREFETCH_COUNT][ty][tx], A[a + wA * ty + tx], pipe); + else + shared_A[fetch % PREFETCH_COUNT][ty][tx] = 0; + if (cond1) + memcpy_async(shared_B[fetch % PREFETCH_COUNT][ty][tx], B[b + wB * ty + tx], pipe); + else + shared_B[fetch % PREFETCH_COUNT][ty][tx] = 0; + } + else + { + shared_A[fetch % PREFETCH_COUNT][ty][tx] = 0; + shared_B[fetch % PREFETCH_COUNT][ty][tx] = 0; + } + // block.sync(); + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + if (cond2 && cond1) + { + for (int k = 0; k < BLOCK_DIM; ++k) + { + tmp = shared_A[compute % PREFETCH_COUNT][k][ty] - shared_B[compute % PREFETCH_COUNT][k][tx]; + ssd += tmp * tmp; + } + } + + // Synchronize to make sure that the preceding computation is done before + // loading two new sub-matrices of A and B in the next iteration + block.sync(); + } + + // Write the block sub-matrix to device memory; each thread writes one element + if (cond2 && cond1) + { + AB[(begin_A + ty) * wB + begin_B + tx] = ssd; + } +} +/** + * Gathers k-th smallest distances for each column of the distance matrix in + * the top. + * + * @param dist distance matrix + * @param ind index matrix + * @param width width of the distance matrix and of the index matrix + * @param height height of the distance matrix and of the index matrix + * @param k number of neighbors to consider + */ +__global__ void cuInsertionSort(float *dist, int *ind, int width, int height, + int k) { + // printf("test2\n"); + // Variables + int l, i, j; + float *p_dist; + int *p_ind; + float curr_dist, max_dist; + int curr_row, max_row; + unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x; + + if (xIndex < width) { + // Pointer shift, initialization, and max value + p_dist = dist + xIndex; + p_ind = ind + xIndex; + max_dist = p_dist[0]; + p_ind[0] = 0; + + // Part 1 : sort kth firt elementZ + for (l = 1; l < k; l++) { + curr_row = l * width; + curr_dist = p_dist[curr_row]; + if (curr_dist < max_dist) { + i = l - 1; + for (int a = 0; a < l - 1; a++) { + if (p_dist[a * width] > curr_dist) { + i = a; + break; + } + } + for (j = l; j > i; j--) { + p_dist[j * width] = p_dist[(j - 1) * width]; + p_ind[j * width] = p_ind[(j - 1) * width]; + } + p_dist[i * width] = curr_dist; + p_ind[i * width] = l; + } else { + p_ind[l * width] = l; + } + max_dist = p_dist[curr_row]; + } + + // Part 2 : insert element in the k-th first lines + max_row = (k - 1) * width; + for (l = k; l < height; l++) { + curr_dist = p_dist[l * width]; + if (curr_dist < max_dist) { + i = k - 1; + for (int a = 0; a < k - 1; a++) { + if (p_dist[a * width] > curr_dist) { + i = a; + break; + } + } + for (j = k - 1; j > i; j--) { + p_dist[j * width] = p_dist[(j - 1) * width]; + p_ind[j * width] = p_ind[(j - 1) * width]; + } + p_dist[i * width] = curr_dist; + p_ind[i * width] = l; + max_dist = p_dist[max_row]; + } + } + } +} + +/** + * Computes the square root of the first line (width-th first element) + * of the distance matrix. + * + * @param dist distance matrix + * @param width width of the distance matrix + * @param k number of neighbors to consider + */ +__global__ void cuParallelSqrt(float *dist, int width, int k) { + unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int yIndex = blockIdx.y * blockDim.y + threadIdx.y; + // printf("test3\n"); + if (xIndex < width && yIndex < k) + dist[yIndex * width + xIndex] = sqrt(dist[yIndex * width + xIndex]); +} + +//-----------------------------------------------------------------------------------------------// +// K-th NEAREST NEIGHBORS // +//-----------------------------------------------------------------------------------------------// + +/** + * Prints the error message return during the memory allocation. + * + * @param error error value return by the memory allocation function + * @param memorySize size of memory tried to be allocated + */ +void printErrorMessage(cudaError_t error, int memorySize) { + printf("==================================================\n"); + printf("MEMORY ALLOCATION ERROR : %s\n", cudaGetErrorString(error)); + printf("Whished allocated memory : %d\n", memorySize); + printf("==================================================\n"); +} + +/** + * K nearest neighbor algorithm + * - Initialize CUDA + * - Allocate device memory + * - Copy point sets (reference and query points) from host to device memory + * - Compute the distances + indexes to the k nearest neighbors for each query + * point + * - Copy distances from device to host memory + * + * @param ref_host reference points ; pointer to linear matrix + * @param ref_width number of reference points ; width of the matrix + * @param query_host query points ; pointer to linear matrix + * @param query_width number of query points ; width of the matrix + * @param height dimension of points ; height of the matrices + * @param k number of neighbor to consider + * @param dist_host distances to k nearest neighbors ; pointer to linear + * matrix + * @param dist_host indexes of the k nearest neighbors ; pointer to linear + * matrix + * + */ +void knn_cuda(float *ref_host, int ref_width, float *query_host, + int query_width, int height, int k, float *dist_host, + int *ind_host) { + // Grids ans threads + dim3 g_16x16(query_width / 16, ref_width / 16, 1); + dim3 t_16x16(16, 16, 1); + if (query_width % 16 != 0) + g_16x16.x += 1; + if (ref_width % 16 != 0) + g_16x16.y += 1; + // + dim3 g_256x1(query_width / 256, 1, 1); + dim3 t_256x1(256, 1, 1); + if (query_width % 256 != 0) + g_256x1.x += 1; + + dim3 g_k_16x16(query_width / 16, k / 16, 1); + dim3 t_k_16x16(16, 16, 1); + if (query_width % 16 != 0) + g_k_16x16.x += 1; + if (k % 16 != 0) + g_k_16x16.y += 1; + + // printf("ref_width is %d, query_width is %d, height is %d\n", ref_width, query_width, height); + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStream_t stream4; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + cudaStreamCreate(&stream4); + + // memcpy(ref_device, ref, ref_nb * dim * sizeof(float)); + // memcpy(query_device, query, query_nb * dim * sizeof(float)); + // memcpy(dist, dist_device, query_nb * ref_nb * sizeof(float)); + // memcpy(ind, ind_device, query_nb * k * sizeof(int)); + + cudaMemPrefetchAsync(ref_host, ref_width * height * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(query_host, query_width * height * sizeof(float), GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(dist_host, query_width * ref_width * sizeof(float), GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + cudaMemPrefetchAsync(ind_host, query_width * k * sizeof(int), GPU_DEVICE, stream4); + cudaStreamSynchronize(stream4); + + // Kernel 1: Compute all the distances + cuComputeDistanceGlobal<<>>(ref_host, ref_width, query_host, + query_width, height, dist_host); + // Kernel 2: Sort each column + cuInsertionSort<<>>(dist_host, ind_host, query_width, + ref_width, k); + // Kernel 3: Compute square root of k first elements + cuParallelSqrt<<>>(dist_host, query_width, k); + cudaDeviceSynchronize(); +} + +float compute_distance(const float *ref, int ref_nb, const float *query, + int query_nb, int dim, int ref_index, int query_index) { + float sum = 0.f; + for (int d = 0; d < dim; ++d) { + const float diff = + ref[d * ref_nb + ref_index] - query[d * query_nb + query_index]; + sum += diff * diff; + } + return sqrtf(sum); +} + +void modified_insertion_sort(float *dist, int *index, int length, int k) { + + // Initialise the first index + index[0] = 0; + + // Go through all points + for (int i = 1; i < length; ++i) { + + // Store current distance and associated index + float curr_dist = dist[i]; + int curr_index = i; + + // Skip the current value if its index is >= k and if it's higher the k-th + // slready sorted mallest value + if (i >= k && curr_dist >= dist[k - 1]) { + continue; + } + + // Shift values (and indexes) higher that the current distance to the right + int j = min(i, k - 1); + while (j > 0 && dist[j - 1] > curr_dist) { + dist[j] = dist[j - 1]; + index[j] = index[j - 1]; + --j; + } + + // Write the current distance and index at their position + dist[j] = curr_dist; + index[j] = curr_index; + } +} + +bool knn_c(const float *ref, int ref_nb, const float *query, int query_nb, + int dim, int k, float *knn_dist, int *knn_index) { + // Allocate local array to store all the distances / indexes for a given query + // point + float *dist = (float *)malloc(ref_nb * sizeof(float)); + int *index = (int *)malloc(ref_nb * sizeof(int)); + + // Allocation checks + if (!dist || !index) { + printf("Memory allocation error\n"); + free(dist); + free(index); + return false; + } + + // Process one query point at the time + for (int i = 0; i < query_nb; ++i) { + + // Compute all distances / indexes + for (int j = 0; j < ref_nb; ++j) { + dist[j] = compute_distance(ref, ref_nb, query, query_nb, dim, j, i); + index[j] = j; + } + + // Sort distances / indexes + modified_insertion_sort(dist, index, ref_nb, k); + + // Copy k smallest distances and their associated index + for (int j = 0; j < k; ++j) { + knn_dist[j * query_nb + i] = dist[j]; + knn_index[j * query_nb + i] = index[j]; + } + } + + // Memory clean-up + free(dist); + free(index); + return true; +} + +/** + * Example of use of kNN search CUDA. + */ +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + // Variables and parameters + float *ref; // Pointer to reference point array + float *query; // Pointer to query point array + float *dist, *dist_c; // Pointer to distance array + int *ind, *ind_c; // Pointer to index array + int ref_nb = 4096; // Reference point number, max=65535 + int query_nb = 4096; // Query point number, max=65535 + int dim = 128; // Dimension of points + int k = 20; // Nearest neighbors to consider + int iterations = 100; + + if (argc >= 4) { + ref_nb = atoi(argv[1]); + query_nb = atoi(argv[2]); + dim = atoi(argv[3]); + } + + int c_iterations = 10; + int i; + const float precision = 0.001f; // distance error max + int nb_correct_precisions = 0; + int nb_correct_indexes = 0; + float *knn_dist = (float *)malloc(query_nb * k * sizeof(float)); + int *knn_index = (int *)malloc(query_nb * k * sizeof(int)); + + // Memory allocation + ref = (float *)malloc(ref_nb * dim * sizeof(float)); + query = (float *)malloc(query_nb * dim * sizeof(float)); + dist = (float *)malloc(query_nb * ref_nb * sizeof(float)); + ind = (int *)malloc(query_nb * k * sizeof(int)); + + dist_c = (float *)malloc(query_nb * k * sizeof(float)); + ind_c = (int *)malloc(query_nb * k * sizeof(float)); + + // Init + srand(time(NULL)); + for (i = 0; i < ref_nb * dim; i++) + ref[i] = (float)rand() / (float)RAND_MAX; + for (i = 0; i < query_nb * dim; i++) + query[i] = (float)rand() / (float)RAND_MAX; + + // printf("Ground truth computation in progress...\n\n"); + // if (!knn_c(ref, ref_nb, query, query_nb, dim, k, knn_dist, knn_index)) { + // free(knn_dist); + // free(knn_index); + // return EXIT_FAILURE; + // } + + // Variables for duration evaluation + float elapsed_time; + + // Display informations + printf("Number of reference points : %6d\n", ref_nb); + printf("Number of query points : %6d\n", query_nb); + printf("Dimension of points : %4d\n", dim); + printf("Number of neighbors to consider : %4d\n", k); + printf("Processing kNN search :\n"); + + float precision_accuracy = 0.0f; + float index_accuracy = 0.0f; + /* + printf("On CPU: \n"); + struct timeval tic; + gettimeofday(&tic, NULL); + for (i = 0; i < c_iterations; i++) { + knn_c(ref, ref_nb, query, query_nb, dim, k, dist_c, ind_c); + } + + for (int i = 0; i < query_nb * k; ++i) { + if (fabs(dist_c[i] - knn_dist[i]) <= precision) { + nb_correct_precisions++; + } + if (ind_c[i] == knn_index[i]) { + nb_correct_indexes++; + } + } + + struct timeval toc; + gettimeofday(&toc, NULL); + elapsed_time = toc.tv_sec - tic.tv_sec; + elapsed_time += (toc.tv_usec - tic.tv_usec) / 1000000.; + precision_accuracy = nb_correct_precisions / ((float)query_nb * k); + index_accuracy = nb_correct_indexes / ((float)query_nb * k); + printf("%f, %f\n", precision_accuracy, index_accuracy); + printf(" done in %f s for %d iterations (%f s by iteration)\n", elapsed_time, + c_iterations, elapsed_time / (c_iterations)); + */ + printf("on GPU: \n"); + + // Call kNN search CUDA + GPU_argv_init(); + + initTrace(); + startCPU(); + + float *ref_device; + float *query_device; + float *dist_device; + int *ind_device; + + cudaMallocManaged(&ref_device, ref_nb * dim * sizeof(float)); + cudaMallocManaged(&query_device, query_nb * dim * sizeof(float)); + cudaMallocManaged(&dist_device, query_nb * ref_nb * sizeof(float)); + cudaMallocManaged(&ind_device, query_nb * k * sizeof(int)); + + memcpy(ref_device, ref, ref_nb * dim * sizeof(float)); + memcpy(query_device, query, query_nb * dim * sizeof(float)); + + for (i = 0; i < iterations; i++) { + // knn_cuda(ref, ref_nb, query, query_nb, dim, k, dist, ind); + knn_cuda(ref_device, ref_nb, query_device, query_nb, dim, k, dist_device, ind_device); + } + + memcpy(dist, dist_device, query_nb * ref_nb * sizeof(float)); + memcpy(ind, ind_device, query_nb * k * sizeof(int)); + + cudaFree(ind_device); + cudaFree(dist_device); + cudaFree(query_device); + cudaFree(ref_device); + + endCPU(); + finiTrace(); + + nb_correct_precisions = 0; + nb_correct_indexes = 0; + for (int i = 0; i < query_nb * k; ++i) { + if (fabs(dist[i] - knn_dist[i]) <= precision) { + nb_correct_precisions++; + } + if (ind[i] == knn_index[i]) { + nb_correct_indexes++; + } + } + + precision_accuracy = nb_correct_precisions / ((float)query_nb * k); + index_accuracy = nb_correct_indexes / ((float)query_nb * k); + printf("%f, %f\n", precision_accuracy, index_accuracy); + + + // Destroy cuda event object and free memory + free(ind); + free(dist); + free(query); + free(ref); + free(dist_c); + free(ind_c); +} \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/knn/run.sh b/workloads/realworld/uvm_prefetch_async/knn/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..541db1387ce3ebe87b1338f079609b8b4a2736c6 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/knn/run.sh @@ -0,0 +1 @@ +./knn 4096 4096 128 \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/knn/run_super.sh b/workloads/realworld/uvm_prefetch_async/knn/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..86ad9321b470072e5e84e706e1619ee200cf2b31 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/knn/run_super.sh @@ -0,0 +1 @@ +./knn 32768 32768 128 \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/lavaMD/README b/workloads/realworld/uvm_prefetch_async/lavaMD/README new file mode 100755 index 0000000000000000000000000000000000000000..27b526ff669e9632b11193634307bfe778a2dfff --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/lavaMD/README @@ -0,0 +1,50 @@ +//======================================================================================================================================================150 +// DESCRIPTION +//======================================================================================================================================================150 + +This is the CUDA version of the code. + +The code calculates particle potential and relocation due to mutual forces between particles within a large 3D space. This space is +divided into cubes, or large boxes, that are allocated to individual cluster nodes. The large box at each node is further divided into +cubes, called boxes. 26 neighbor boxes surround each box (the home box). Home boxes at the boundaries of the particle space have fewer neighbors. +Particles only interact with those other particles that are within a cutoff radius since ones at larger distances exert negligible forces. Thus the +box size s chosen so that cutoff radius does not span beyond any neighbor box for any particle in a home box, thus limiting the reference space to +a finite number of boxes. + +This code [1] was derived from the ddcMD application [2] by rewriting the front end and structuring it for parallelization. This code represents MPI +task that runs on a single cluster node. While the details of the code are somewhat different than the original, the code retains the structure of the +MPI task in the original code. Since the rest of MPI code is not included here, the application first emulates MPI partitioning of the particle space +into boxes. Then, for every particle in the home box, the nested loop processes interactions first with other particles in the home box and then with +particles in all neighbor boxes. The processing of each particle consists of a single stage of calculation that is enclosed in the innermost loop. The +nested loops in the application were parallelized in such a way that at any point of time GPU warp/wavefront accesses adjacent memory locations. The +speedup depends on the number of boxes, particles (fixed) and the actualcal culation for each particle (fixed). The application is memory bound, and +GPU speedup seems to saturate at about 16x when compared to single-core CPU. + +More information about the parallel version of this code can be found in: +[1] L. G. Szafaryn, T. Gamblin, B. deSupinski and K. Skadron. "Experiences with Achieving Portability across Heterogeneous Architectures." Submitted to +WOLFHPC workshop at 25th International Conference on Supercomputing (ICS). Tucson, AZ. 2010. +More about the original ddcMD application can be found in: +[2] F. H. Streitz, J. N. Glosli, M. V. Patel, B. Chan, R. K. Yates, B. R. de Supinski, J. Sexton, J and A. Gunnels. "100+ TFlop Solidification Simulations +on BlueGene/L." In Proceedings of the 2005 Supercomputing Conference (SC 05). Seattle, WA. 2005. + +//======================================================================================================================================================150 +// USE +//======================================================================================================================================================150 + +The code takes the followint parameters: +-boxes1d (number of boxes in one dimension, the total number of boxes will be that^3) + +The code can be run as follows: +./lavaMD -boxes1d 10 + +******Adjustable work group size***** +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=128" + +######OUTPUT FOR VALIDATION######## +USAGE: +make clean +make OUTPUT=Y \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/lavaMD/kernel/kernel_gpu_cuda.cu b/workloads/realworld/uvm_prefetch_async/lavaMD/kernel/kernel_gpu_cuda.cu new file mode 100755 index 0000000000000000000000000000000000000000..c931ae7f7eb429c844ac7722059faeedae8c9fbe --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/lavaMD/kernel/kernel_gpu_cuda.cu @@ -0,0 +1,210 @@ +//----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------200 +// plasmaKernel_gpu_2 +//----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------200 + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +__global__ void kernel_gpu_cuda(par_str d_par_gpu, + dim_str d_dim_gpu, + box_str *d_box_gpu, + FOUR_VECTOR *d_rv_gpu, + fp *d_qv_gpu, + FOUR_VECTOR *d_fv_gpu, + int boxes_per_block) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + // THREAD PARAMETERS + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + int bx = blockIdx.x; // get current horizontal block index (0-n) + int tx = threadIdx.x; // get current horizontal thread index (0-n) + int wtx = tx; + + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // Extract input parameters + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + // parameters + fp a2 = 2.0 * d_par_gpu.alpha * d_par_gpu.alpha; + + // home box + int first_i; + FOUR_VECTOR *rA; + FOUR_VECTOR *fA; + __shared__ FOUR_VECTOR rA_shared[100]; + + // nei box + int pointer; + int k = 0; + int first_j; + FOUR_VECTOR *rB; + fp *qB; + int j = 0; + __shared__ FOUR_VECTOR rB_shared[NUMBER_PAR_PER_BOX * PREFETCH_COUNT]; + __shared__ double qB_shared[NUMBER_PAR_PER_BOX * PREFETCH_COUNT]; + + // common + fp r2; + fp u2; + fp vij; + fp fs; + fp fxij; + fp fyij; + fp fzij; + THREE_VECTOR d; + + int box = bx * boxes_per_block; + int end_box = box + boxes_per_block; + + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + // DO FOR THE NUMBER OF BOXES + //--------------------------------------------------------------------------------------------------------------------------------------------------------------------------180 + for (; box < end_box; box++) + { + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // Home box + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // home box - box parameters + first_i = d_box_gpu[box].offset; + + // home box - distance, force, charge and type parameters + rA = &d_rv_gpu[first_i]; + fA = &d_fv_gpu[first_i]; + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Copy to shared memory + //----------------------------------------------------------------------------------------------------------------------------------140 + + // home box - shared memory + while (wtx < NUMBER_PAR_PER_BOX) + { + rA_shared[wtx] = rA[wtx]; + wtx = wtx + NUMBER_THREADS; + } + wtx = tx; + + // synchronize threads - not needed, but just to be safe + block.sync(); + + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + // nei box loop + //------------------------------------------------------------------------------------------------------------------------------------------------------160 + + // if (wtx == 0) + // printf("d_box_gpu[%d].nn is %d\n", bx, d_box_gpu[bx].nn); + + int fetch = 0; + int end_tile = 1 + d_box_gpu[box].nn; + + // loop over neiing boxes of home box + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + //----------------------------------------50 + // nei box - get pointer to the right box + //----------------------------------------50 + + if (fetch == 0) + { + pointer = box; // set first box to be processed to home box + } + else + { + pointer = d_box_gpu[box].nei[fetch - 1].number; // remaining boxes are nei boxes + } + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // nei box - box parameters + first_j = d_box_gpu[pointer].offset; + + // nei box - distance, (force), charge and (type) parameters + rB = &d_rv_gpu[first_j]; + qB = &d_qv_gpu[first_j]; + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Setup parameters + //----------------------------------------------------------------------------------------------------------------------------------140 + + // nei box - shared memory + while (wtx < NUMBER_PAR_PER_BOX) + { + memcpy_async(rB_shared[(fetch % PREFETCH_COUNT) * NUMBER_PAR_PER_BOX + wtx], rB[wtx], pipe); + memcpy_async(qB_shared[(fetch % PREFETCH_COUNT) * NUMBER_PAR_PER_BOX + wtx], qB[wtx], pipe); + wtx = wtx + NUMBER_THREADS; + } + wtx = tx; + + // synchronize threads because in next section each thread accesses data brought in by different threads here + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + //----------------------------------------------------------------------------------------------------------------------------------140 + // Calculation + //----------------------------------------------------------------------------------------------------------------------------------140 + + // loop for the number of particles in the home box + // for (int i=0; i +#include "../../../../common/cupti_add.h" +#include "../../../../common/cpu_timestamps.h" + +void +kernel_gpu_cuda_wrapper(par_str par_cpu, + dim_str dim_cpu, + box_str* box_cpu, + FOUR_VECTOR* rv_cpu, + fp* qv_cpu, + FOUR_VECTOR* fv_cpu, + int nblocks) +{ + + //======================================================================================================================================================150 + // CPU VARIABLES + //======================================================================================================================================================150 + + // timer + long long time0; + long long time1; + long long time2; + long long time3; + long long time4; + long long time5; + long long time6; + + time0 = get_time(); + + //======================================================================================================================================================150 + // GPU SETUP + //======================================================================================================================================================150 + + //====================================================================================================100 + // INITIAL DRIVER OVERHEAD + //====================================================================================================100 + GPU_argv_init(); + + initTrace(); + startCPU(); + + cudaThreadSynchronize(); + + //====================================================================================================100 + // VARIABLES + //====================================================================================================100 + + box_str* d_box_gpu; + FOUR_VECTOR* d_rv_gpu; + fp* d_qv_gpu; + FOUR_VECTOR* d_fv_gpu; + + dim3 threads; + dim3 blocks; + + //====================================================================================================100 + // EXECUTION PARAMETERS + //====================================================================================================100 + + // blocks.x = dim_cpu.number_boxes; + blocks.x = nblocks * nblocks * nblocks; + blocks.y = 1; + threads.x = NUMBER_THREADS; // define the number of threads in the block + threads.y = 1; + + int boxes_per_block = 1; + if (dim_cpu.number_boxes >= blocks.x) + { + boxes_per_block = (dim_cpu.number_boxes + blocks.x - 1) / blocks.x; + } + + time1 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY (MALLOC) + //======================================================================================================================================================150 + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY IN + //====================================================================================================100 + + //==================================================50 + // boxes + //==================================================50 + + cudaMallocManaged( (void **)&d_box_gpu, + dim_cpu.box_mem); + + //==================================================50 + // rv + //==================================================50 + + cudaMallocManaged((void **)&d_rv_gpu, + dim_cpu.space_mem); + + //==================================================50 + // qv + //==================================================50 + + cudaMallocManaged((void **)&d_qv_gpu, + dim_cpu.space_mem2); + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY + //====================================================================================================100 + + //==================================================50 + // fv + //==================================================50 + + cudaMallocManaged((void **)&d_fv_gpu, + dim_cpu.space_mem); + + time2 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY COPY + //======================================================================================================================================================150 + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY IN + //====================================================================================================100 + + //==================================================50 + // boxes + //==================================================50 + + memcpy(d_box_gpu, + box_cpu, + dim_cpu.box_mem); + + //==================================================50 + // rv + //==================================================50 + + memcpy(d_rv_gpu, + rv_cpu, + dim_cpu.space_mem); + + //==================================================50 + // qv + //==================================================50 + + memcpy(d_qv_gpu, + qv_cpu, + dim_cpu.space_mem2); + + //====================================================================================================100 + // GPU MEMORY (MALLOC) COPY + //====================================================================================================100 + + //==================================================50 + // fv + //==================================================50 + + memcpy(d_fv_gpu, + fv_cpu, + dim_cpu.space_mem); + + time3 = get_time(); + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStream_t stream4; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + cudaStreamCreate(&stream4); + + cudaMemPrefetchAsync(d_box_gpu, dim_cpu.box_mem, GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(d_rv_gpu, dim_cpu.space_mem, GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(d_qv_gpu, dim_cpu.space_mem2, GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + cudaMemPrefetchAsync(d_fv_gpu, dim_cpu.space_mem, GPU_DEVICE, stream4); + cudaStreamSynchronize(stream4); + + //======================================================================================================================================================150 + // KERNEL + //======================================================================================================================================================150 + // launch kernel - all boxes + kernel_gpu_cuda<<>>(par_cpu, + dim_cpu, + d_box_gpu, + d_rv_gpu, + d_qv_gpu, + d_fv_gpu, + boxes_per_block); + + checkCUDAError("Start"); + cudaDeviceSynchronize(); + + time4 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY COPY (CONTD.) + //======================================================================================================================================================150 + + memcpy(fv_cpu, + d_fv_gpu, + dim_cpu.space_mem); + + time5 = get_time(); + + //======================================================================================================================================================150 + // GPU MEMORY DEALLOCATION + //======================================================================================================================================================150 + + cudaFree(d_rv_gpu); + cudaFree(d_qv_gpu); + cudaFree(d_fv_gpu); + cudaFree(d_box_gpu); + + endCPU(); + finiTrace(); + + time6 = get_time(); + + //======================================================================================================================================================150 + // DISPLAY TIMING + //======================================================================================================================================================150 + + printf("Time spent in different stages of GPU_CUDA KERNEL:\n"); + + printf("%15.12f s, %15.12f % : GPU: SET DEVICE / DRIVER INIT\n", (float)(time1 - time0) / 1000000, (float)(time1 - time0) / (float)(time6 - time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: ALO\n", (float)(time2 - time1) / 1000000, (float)(time2 - time1) / (float)(time6 - time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: COPY IN\n", (float)(time3 - time2) / 1000000, (float)(time3 - time2) / (float)(time6 - time0) * 100); + + printf("%15.12f s, %15.12f % : GPU: KERNEL\n", (float)(time4 - time3) / 1000000, (float)(time4 - time3) / (float)(time6 - time0) * 100); + + printf("%15.12f s, %15.12f % : GPU MEM: COPY OUT\n", (float)(time5 - time4) / 1000000, (float)(time5 - time4) / (float)(time6 - time0) * 100); + printf("%15.12f s, %15.12f % : GPU MEM: FRE\n", (float)(time6 - time5) / 1000000, (float)(time6 - time5) / (float)(time6 - time0) * 100); + + printf("Total time:\n"); + printf("%.12f s\n", (float)(time6 - time0) / 1000000); +} diff --git a/workloads/realworld/uvm_prefetch_async/lavaMD/kernel/kernel_gpu_cuda_wrapper.h b/workloads/realworld/uvm_prefetch_async/lavaMD/kernel/kernel_gpu_cuda_wrapper.h new file mode 100755 index 0000000000000000000000000000000000000000..cf499f1480469569c649eccf174cc8ba0655ddbd --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/lavaMD/kernel/kernel_gpu_cuda_wrapper.h @@ -0,0 +1,19 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//========================================================================================================================================================================================================200 +// KERNEL_GPU_CUDA_WRAPPER HEADER +//========================================================================================================================================================================================================200 + +void kernel_gpu_cuda_wrapper( par_str parms_cpu, + dim_str dim_cpu, + box_str* box_cpu, + FOUR_VECTOR* rv_cpu, + fp* qv_cpu, + FOUR_VECTOR* fv_cpu, + int nblocks); + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/lavaMD/main.c b/workloads/realworld/uvm_prefetch_async/lavaMD/main.c new file mode 100755 index 0000000000000000000000000000000000000000..a7c88472e3939414bbdc314e2bfb1c46bc345bea --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/lavaMD/main.c @@ -0,0 +1,318 @@ +//========================================================================================================================================================================================================200 +//======================================================================================================================================================150 +//====================================================================================================100 +//==================================================50 + +//========================================================================================================================================================================================================200 +// UPDATE +//========================================================================================================================================================================================================200 + +// 14 APR 2011 Lukasz G. Szafaryn + +//========================================================================================================================================================================================================200 +// DEFINE/INCLUDE +//========================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// LIBRARIES +//======================================================================================================================================================150 + +#include // (in path known to compiler) needed by printf +#include // (in path known to compiler) needed by malloc +#include // (in path known to compiler) needed by true/false + +//======================================================================================================================================================150 +// UTILITIES +//======================================================================================================================================================150 + +#include "./util/timer/timer.h" // (in path specified here) +#include "./util/num/num.h" // (in path specified here) + +//======================================================================================================================================================150 +// MAIN FUNCTION HEADER +//======================================================================================================================================================150 + +#include "./main.h" // (in the current directory) + +//======================================================================================================================================================150 +// KERNEL +//======================================================================================================================================================150 + +#include "./kernel/kernel_gpu_cuda_wrapper.h" // (in library path specified here) + +//========================================================================================================================================================================================================200 +// MAIN FUNCTION +//========================================================================================================================================================================================================200 +#define _POSIX_C_SOURCE 200809L +#include +#include +#include +extern inline __attribute__((always_inline)) unsigned long rdtsc() +{ + unsigned long a, d; + + __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); + + return (a | (d << 32)); +} + +extern inline __attribute__((always_inline)) unsigned long rdtsp() { + struct timespec tms; + if (clock_gettime(CLOCK_REALTIME, &tms)) { + return -1; + } + unsigned long ns = tms.tv_sec * 1000000000; + ns += tms.tv_nsec; + return ns; +} + + +int +main( int argc, + char *argv []) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + + printf("thread block size of kernel = %d \n", NUMBER_THREADS); + //======================================================================================================================================================150 + // CPU/MCPU VARIABLES + //======================================================================================================================================================150 + + // timer + long long time0; + + time0 = get_time(); + + // timer + long long time1; + long long time2; + long long time3; + long long time4; + long long time5; + long long time6; + long long time7; + + // counters + int i, j, k, l, m, n; + + // system memory + par_str par_cpu; + dim_str dim_cpu; + box_str* box_cpu; + FOUR_VECTOR* rv_cpu; + fp* qv_cpu; + FOUR_VECTOR* fv_cpu; + int nh; + + time1 = get_time(); + + //======================================================================================================================================================150 + // CHECK INPUT ARGUMENTS + //======================================================================================================================================================150 + + // assing default values + dim_cpu.boxes1d_arg = 1; + + // go through arguments + dim_cpu.boxes1d_arg = atoi(argv[1]); + int nblocks = atoi(argv[2]); + + // Print configuration + printf("Configuration used: boxes1d = %d\n", dim_cpu.boxes1d_arg); + + time2 = get_time(); + + //======================================================================================================================================================150 + // INPUTS + //======================================================================================================================================================150 + + par_cpu.alpha = 0.5; + + time3 = get_time(); + + //======================================================================================================================================================150 + // DIMENSIONS + //======================================================================================================================================================150 + + // total number of boxes + dim_cpu.number_boxes = dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg; + + // how many particles space has in each direction + dim_cpu.space_elem = dim_cpu.number_boxes * NUMBER_PAR_PER_BOX; + dim_cpu.space_mem = dim_cpu.space_elem * sizeof(FOUR_VECTOR); + dim_cpu.space_mem2 = dim_cpu.space_elem * sizeof(fp); + + // box array + dim_cpu.box_mem = dim_cpu.number_boxes * sizeof(box_str); + + time4 = get_time(); + + //======================================================================================================================================================150 + // SYSTEM MEMORY + //======================================================================================================================================================150 + + //====================================================================================================100 + // BOX + //====================================================================================================100 + + // allocate boxes + box_cpu = (box_str*)malloc(dim_cpu.box_mem); + + // initialize number of home boxes + nh = 0; + + // home boxes in z direction + for(i=0; i=0 && (j+m)>=0 && (k+n)>=0)==true && ((i+l) 1) { + + // variables + int max_multiprocessors; + int max_device; + cudaDeviceProp properties; + + // initialize variables + max_multiprocessors = 0; + max_device = 0; + + for (device = 0; device < num_devices; device++) { + cudaGetDeviceProperties(&properties, device); + if (max_multiprocessors < properties.multiProcessorCount) { + max_multiprocessors = properties.multiProcessorCount; + max_device = device; + } + } + cudaSetDevice(max_device); + } + +} + +//====================================================================================================100 +// GET LAST ERROR +//====================================================================================================100 + +void checkCUDAError(const char *msg) +{ + cudaError_t err = cudaGetLastError(); + if( cudaSuccess != err) { + // fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + printf("Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + fflush(NULL); + exit(EXIT_FAILURE); + } +} + +//===============================================================================================================================================================================================================200 +// END SET_DEVICE CODE +//===============================================================================================================================================================================================================200 diff --git a/workloads/realworld/uvm_prefetch_async/lavaMD/util/device/device.h b/workloads/realworld/uvm_prefetch_async/lavaMD/util/device/device.h new file mode 100755 index 0000000000000000000000000000000000000000..23bb31d26c1bc0e607c9b2faf7bddaa5a5c06d98 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/lavaMD/util/device/device.h @@ -0,0 +1,29 @@ +//===============================================================================================================================================================================================================200 +// SET_DEVICE HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// INCLUDE/DEFINE +//======================================================================================================================================================150 + +#include // (in library path known to compiler) needed by printf + +//======================================================================================================================================================150 +// FUNCTION PROTOTYPES +//======================================================================================================================================================150 + +//====================================================================================================100 +// SET DEVICE +//====================================================================================================100 + +void setdevice(void); + +//====================================================================================================100 +// GET LAST ERROR +//====================================================================================================100 + +void checkCUDAError(const char *msg); + +//===============================================================================================================================================================================================================200 +// END SET_DEVICE HEADER +//===============================================================================================================================================================================================================200 diff --git a/workloads/realworld/uvm_prefetch_async/lavaMD/util/num/num.c b/workloads/realworld/uvm_prefetch_async/lavaMD/util/num/num.c new file mode 100755 index 0000000000000000000000000000000000000000..980ff7498832c784eab718a8b886e82891047599 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/lavaMD/util/num/num.c @@ -0,0 +1,53 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// DESCRIPTION +//===============================================================================================================================================================================================================200 + +// Returns: 0 if string does not represent integer +// 1 if string represents integer + +//===============================================================================================================================================================================================================200 +// NUM CODE +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// ISINTEGER FUNCTION +//======================================================================================================================================================150 + +int isInteger(char *str){ + + //====================================================================================================100 + // make sure it's not empty + //====================================================================================================100 + + if (*str == '\0'){ + return 0; + } + + //====================================================================================================100 + // if any digit is not a number, return false + //====================================================================================================100 + + for(; *str != '\0'; str++){ + if (*str < 48 || *str > 57){ // digit characters (need to include . if checking for float) + return 0; + } + } + + //====================================================================================================100 + // it got past all my checks so I think it's a number + //====================================================================================================100 + + return 1; +} + +//===============================================================================================================================================================================================================200 +// END NUM CODE +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/lavaMD/util/num/num.h b/workloads/realworld/uvm_prefetch_async/lavaMD/util/num/num.h new file mode 100755 index 0000000000000000000000000000000000000000..27a5e42fe2819d9ecc2f569b5979fb451985976f --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/lavaMD/util/num/num.h @@ -0,0 +1,21 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// FILE HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// ISINTEGER FUNCTION PROTOTYPE +//======================================================================================================================================================150 + +int isInteger(char *str); + +//===============================================================================================================================================================================================================200 +// END FILE HEADER +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/lavaMD/util/timer/timer.c b/workloads/realworld/uvm_prefetch_async/lavaMD/util/timer/timer.c new file mode 100755 index 0000000000000000000000000000000000000000..c7cc252b4e67b3a868722b7b2c58f5b863ae0cfc --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/lavaMD/util/timer/timer.c @@ -0,0 +1,36 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// TIMER CODE +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// INCLUDE/DEFINE +//======================================================================================================================================================150 + +#include + +//======================================================================================================================================================150 +// FUNCTIONS +//======================================================================================================================================================150 + +//====================================================================================================100 +// DISPLAY TIME +//====================================================================================================100 + + // Returns the current system time in microseconds +long long get_time() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000000) + tv.tv_usec; +} + +//===============================================================================================================================================================================================================200 +// END TIMER CODE +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/lavaMD/util/timer/timer.h b/workloads/realworld/uvm_prefetch_async/lavaMD/util/timer/timer.h new file mode 100755 index 0000000000000000000000000000000000000000..1744df4b8607f95c057ac4db6e9ced5ff84c4ab7 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/lavaMD/util/timer/timer.h @@ -0,0 +1,21 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//===============================================================================================================================================================================================================200 +// TIMER HEADER +//===============================================================================================================================================================================================================200 + +//======================================================================================================================================================150 +// FUNCTION PROTOTYPES +//======================================================================================================================================================150 + +long long get_time(); + +//===============================================================================================================================================================================================================200 +// END TIMER HEADER +//===============================================================================================================================================================================================================200 + +#ifdef __cplusplus +} +#endif diff --git a/workloads/realworld/uvm_prefetch_async/lud/Makefile b/workloads/realworld/uvm_prefetch_async/lud/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1dfc37ac7fa0db46535d0583970dba1cb5cfb80e --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/lud/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := lud +CUFILES := lud_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o lud + diff --git a/workloads/realworld/uvm_prefetch_async/lud/lud_cuda.cu b/workloads/realworld/uvm_prefetch_async/lud/lud_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..cbd14d205614c74b908029f2942b6ef98d8c0345 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/lud/lud_cuda.cu @@ -0,0 +1,317 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK 256 + +#ifndef SIZE +#define SIZE 4096 +#endif + +__global__ void add(float *a, float *b, float *c) +{ + int tid = blockIdx.x; // Handle the data at the index + + c[tid] = a[tid] + b[tid]; +} + +__global__ void scale(float *a, int size, int index) +{ + int i; + int start = (index * size + index); + int end = (index * size + size); + + for (i = start + 1; i < end; i++) + { + a[i] = (a[i] / a[start]); + } +} + +__global__ void reduce(float *a, int size, int index, int b_size) +{ + extern __shared__ float pivot[SIZE]; + int i; + + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = b_size; + + int pivot_start = (index * size + index); + int pivot_end = (index * size + size); + + int start; + int end; + int pivot_row; + int my_row; + + if (tid == 0) + { + for (i = index; i < size; i++) + pivot[i] = a[(index * size) + i]; + } + + __syncthreads(); + + pivot_row = (index * size); + my_row = (((block_size * bid) + tid) * size); + start = my_row + index; + end = my_row + size; + + if (my_row > pivot_row) + { + for (i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(i - my_row)]); + } + } +} + +void initCPU(float *a, int N) +{ + srand((unsigned)2); + // fill the arrays 'a' on the CPU + for (int i = 0; i < (N * N); i++) + { + a[i] = ((rand() % 10) + 1); + // a[i] = 1.0f; + } +} + +void initGPU(float *a_dev, float *a, int N) +{ + for (int i = 0; i < (N * N); i++) + { + a_dev[i] = a[i]; + // a_dev[i] = 1.0f; + } +} + +__global__ void lud_kernel(float *a, int N) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + // extern __shared__ float pivot[]; + __shared__ float pivot[SIZE * PREFETCH_COUNT]; + + int fetch = 0; + int end_tile = fetch + N; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = blockDim.x; + + if (tid == 0 && bid == 0) + { + int start = (compute * N + compute); + int end = (compute * N + N); + + for (int i = start + 1; i < end; i++) + a[i] = (a[i] / a[start]); + } + block.sync(); + + if (tid == 0) + { + for (int i = compute; i < N; i++) + memcpy_async(pivot[(fetch % PREFETCH_COUNT) * N + i], a[(compute * N) + i], pipe); + } + // block.sync(); + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = blockDim.x; + + int pivot_row = (compute * N); + int my_row = (((block_size * bid) + tid) * N); + int start = my_row + compute; + int end = my_row + N; + + if (my_row > pivot_row) + { + for (int i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(compute % PREFETCH_COUNT) * N + (i - my_row)]); + } + } + block.sync(); + } +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + float *a; + float *a_gpu; + float *c; + float error; + int N; + int flag = 0; + + float **result; + float **a_ref; + int blocks; + + int i; + int j; + int k; + float l1; + float u1; + + N = SIZE; + // allocate memory on CPU + a = (float *)malloc(sizeof(float) * N * N); + c = (float *)malloc(sizeof(float) * N * N); + + result = (float **)malloc(sizeof(float *) * N); + a_ref = (float **)malloc(sizeof(float *) * N); + + for (i = 0; i < N; i++) + { + result[i] = (float *)malloc(sizeof(float) * N); + a_ref[i] = (float *)malloc(sizeof(float) * N); + } + initCPU(a, N); + + GPU_argv_init(); + initTrace(); + startCPU(); + // allocate the memory on the GPU + // cudaMalloc((void **)&dev_a, N * N * sizeof(float)); + + cudaMallocManaged(&a_gpu, N * N * sizeof(float)); + memcpy(a_gpu, a, N * N * sizeof(float)); + + // cudaMemcpy(dev_a, a, N * N * sizeof(float), cudaMemcpyHostToDevice); // copy array to device memory + + /*Perform LU Decomposition*/ + // for (i = 0; i < N; i++) + // { + // scale<<<1, 1>>>(a_gpu, N, i); + // // blocks= ((N-i-1)/512)+1; + // blocks = ((N / DIM_THREAD_BLOCK)); + // // printf("Number of blocks rxd : %d \n",blocks); + // reduce<<>>(a_gpu, N, i, DIM_THREAD_BLOCK); + // cudaDeviceSynchronize(); + // } + + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(a_gpu, N * N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + blocks = ((N / DIM_THREAD_BLOCK)); + lud_kernel<<>>(a_gpu, N); + cudaDeviceSynchronize(); + /*LU decomposition ends here*/ + + // cudaMemcpy(c, dev_a, N * N * sizeof(float), cudaMemcpyDeviceToHost); // copy array back to host + memcpy(c, a_gpu, N * N * sizeof(float)); + // free the memory allocated on the GPU + cudaFree(a_gpu); + + endCPU(); + finiTrace(); + + /*copy the result matrix into explicit 2D matrix for verification*/ + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + // c[i * N + j] = a_gpu[i * N + j]; + result[i][j] = c[i * N + j]; + // printf("result %d %d Error is %lf \n ", i, j, result[i][j]); + } + } + + printf("======================================================="); + printf("\n Performing inplace verification \n"); + + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + a_ref[i][j] = 0; + for (k = 0; k < N; k++) + { + if (i >= k) + l1 = result[i][k]; + else + l1 = 0; + + if (k == j) + u1 = 1; + else if (k < j) + u1 = result[k][j]; // figured it out + else + u1 = 0.0; + + a_ref[i][j] = a_ref[i][j] + (l1 * u1); + } + } + } + + // for (i = 0; i < N; i++) + // { + // for (j = 0; j < N; j++) + // { + // error = abs(a[(i * N + j)] - a_ref[i][j]); + // if (error > 1) + // { + // // printf("No match occured at %d %d Error is %lf \n ", i, j, abs(a[(i * N + j)] - a_ref[i][j])); + // // printf("No match occured at %d %d Error is %lf, %lf \n ", i, j, a[(i * N + j)], a_ref[i][j]); + // flag = flag + 1; + // } + // } + // } + + // if (flag == 0) + // printf("Match \n"); + // else + // printf("No Matchs %d \n", flag); + + + + return 0; +} diff --git a/workloads/realworld/uvm_prefetch_async/lud/run.sh b/workloads/realworld/uvm_prefetch_async/lud/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..ea7db937489e328f5e923d2b18774e4256eef123 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/lud/run.sh @@ -0,0 +1 @@ +./lud 1024 diff --git a/workloads/realworld/uvm_prefetch_async/lud/run_super.sh b/workloads/realworld/uvm_prefetch_async/lud/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2791fe07c43d75b40894206ba79ed441f207ee26 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/lud/run_super.sh @@ -0,0 +1 @@ +./lud 4096 diff --git a/workloads/realworld/uvm_prefetch_async/lud_perf/Makefile b/workloads/realworld/uvm_prefetch_async/lud_perf/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1dfc37ac7fa0db46535d0583970dba1cb5cfb80e --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/lud_perf/Makefile @@ -0,0 +1,13 @@ +include ../../../common/make.config + +NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 +NVCC = $(CUDA_DIR)/bin/nvcc + +EXECUTABLE := lud +CUFILES := lud_cuda.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +all: + $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} +clean: + rm -f *.o lud + diff --git a/workloads/realworld/uvm_prefetch_async/lud_perf/lud b/workloads/realworld/uvm_prefetch_async/lud_perf/lud new file mode 100755 index 0000000000000000000000000000000000000000..912dadc0edf579e33db2584b21fdb1c24c81cc14 Binary files /dev/null and b/workloads/realworld/uvm_prefetch_async/lud_perf/lud differ diff --git a/workloads/realworld/uvm_prefetch_async/lud_perf/lud_cuda.cu b/workloads/realworld/uvm_prefetch_async/lud_perf/lud_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..6ffdc92a933afad653731a0731d8ffab983fdc14 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/lud_perf/lud_cuda.cu @@ -0,0 +1,317 @@ +/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA + * The modifications are + * removed texture memory usage + * removed split query KNN computation + * added feature extraction with bilinear interpolation + * + * Last modified by Christopher B. Choy 12/23/2016 + */ + +// Includes +#include "cuda.h" +#include +#include +#include +// Constants used by the program +#define BLOCK_DIM 16 + +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define DIM_THREAD_BLOCK 256 + +#ifndef SIZE +#define SIZE 4096 +#endif + +__global__ void add(float *a, float *b, float *c) +{ + int tid = blockIdx.x; // Handle the data at the index + + c[tid] = a[tid] + b[tid]; +} + +__global__ void scale(float *a, int size, int index) +{ + int i; + int start = (index * size + index); + int end = (index * size + size); + + for (i = start + 1; i < end; i++) + { + a[i] = (a[i] / a[start]); + } +} + +__global__ void reduce(float *a, int size, int index, int b_size) +{ + extern __shared__ float pivot[SIZE]; + int i; + + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = b_size; + + int pivot_start = (index * size + index); + int pivot_end = (index * size + size); + + int start; + int end; + int pivot_row; + int my_row; + + if (tid == 0) + { + for (i = index; i < size; i++) + pivot[i] = a[(index * size) + i]; + } + + __syncthreads(); + + pivot_row = (index * size); + my_row = (((block_size * bid) + tid) * size); + start = my_row + index; + end = my_row + size; + + if (my_row > pivot_row) + { + for (i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(i - my_row)]); + } + } +} + +void initCPU(float *a, int N) +{ + srand((unsigned)2); + // fill the arrays 'a' on the CPU + for (int i = 0; i < (N * N); i++) + { + a[i] = ((rand() % 10) + 1); + // a[i] = 1.0f; + } +} + +void initGPU(float *a_dev, float *a, int N) +{ + for (int i = 0; i < (N * N); i++) + { + a_dev[i] = a[i]; + // a_dev[i] = 1.0f; + } +} + +__global__ void lud_kernel(float *a, int N) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + // extern __shared__ float pivot[]; + __shared__ float pivot[SIZE * PREFETCH_COUNT]; + + int fetch = 0; + int end_tile = fetch + N; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = blockDim.x; + + if (tid == 0 && bid == 0) + { + int start = (compute * N + compute); + int end = (compute * N + N); + + for (int i = start + 1; i < end; i++) + a[i] = (a[i] / a[start]); + } + block.sync(); + + if (tid == 0) + { + for (int i = compute; i < N; i++) + memcpy_async(pivot[(fetch % PREFETCH_COUNT) * N + i], a[(compute * N) + i], pipe); + } + // block.sync(); + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + int tid = threadIdx.x; + int bid = blockIdx.x; + int block_size = blockDim.x; + + int pivot_row = (compute * N); + int my_row = (((block_size * bid) + tid) * N); + int start = my_row + compute; + int end = my_row + N; + + if (my_row > pivot_row) + { + for (int i = start + 1; i < end; i++) + { + a[i] = a[i] - (a[start] * pivot[(compute % PREFETCH_COUNT) * N + (i - my_row)]); + } + } + block.sync(); + } +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + float *a; + float *a_gpu; + float *c; + float error; + int N; + int flag = 0; + + float **result; + float **a_ref; + int blocks; + + int i; + int j; + int k; + float l1; + float u1; + + N = SIZE; + // allocate memory on CPU + a = (float *)malloc(sizeof(float) * N * N); + c = (float *)malloc(sizeof(float) * N * N); + + result = (float **)malloc(sizeof(float *) * N); + a_ref = (float **)malloc(sizeof(float *) * N); + + for (i = 0; i < N; i++) + { + result[i] = (float *)malloc(sizeof(float) * N); + a_ref[i] = (float *)malloc(sizeof(float) * N); + } + initCPU(a, N); + + GPU_argv_init(); + // initTrace(); + startCPU(); + // allocate the memory on the GPU + // cudaMalloc((void **)&dev_a, N * N * sizeof(float)); + + cudaMallocManaged(&a_gpu, N * N * sizeof(float)); + memcpy(a_gpu, a, N * N * sizeof(float)); + + // cudaMemcpy(dev_a, a, N * N * sizeof(float), cudaMemcpyHostToDevice); // copy array to device memory + + /*Perform LU Decomposition*/ + // for (i = 0; i < N; i++) + // { + // scale<<<1, 1>>>(a_gpu, N, i); + // // blocks= ((N-i-1)/512)+1; + // blocks = ((N / DIM_THREAD_BLOCK)); + // // printf("Number of blocks rxd : %d \n",blocks); + // reduce<<>>(a_gpu, N, i, DIM_THREAD_BLOCK); + // cudaDeviceSynchronize(); + // } + + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(a_gpu, N * N * sizeof(float), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + blocks = ((N / DIM_THREAD_BLOCK)); + lud_kernel<<>>(a_gpu, N); + cudaDeviceSynchronize(); + /*LU decomposition ends here*/ + + // cudaMemcpy(c, dev_a, N * N * sizeof(float), cudaMemcpyDeviceToHost); // copy array back to host + memcpy(c, a_gpu, N * N * sizeof(float)); + // free the memory allocated on the GPU + cudaFree(a_gpu); + + endCPU(); + // finiTrace(); + + /*copy the result matrix into explicit 2D matrix for verification*/ + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + // c[i * N + j] = a_gpu[i * N + j]; + result[i][j] = c[i * N + j]; + // printf("result %d %d Error is %lf \n ", i, j, result[i][j]); + } + } + + printf("======================================================="); + printf("\n Performing inplace verification \n"); + + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + a_ref[i][j] = 0; + for (k = 0; k < N; k++) + { + if (i >= k) + l1 = result[i][k]; + else + l1 = 0; + + if (k == j) + u1 = 1; + else if (k < j) + u1 = result[k][j]; // figured it out + else + u1 = 0.0; + + a_ref[i][j] = a_ref[i][j] + (l1 * u1); + } + } + } + + // for (i = 0; i < N; i++) + // { + // for (j = 0; j < N; j++) + // { + // error = abs(a[(i * N + j)] - a_ref[i][j]); + // if (error > 1) + // { + // // printf("No match occured at %d %d Error is %lf \n ", i, j, abs(a[(i * N + j)] - a_ref[i][j])); + // // printf("No match occured at %d %d Error is %lf, %lf \n ", i, j, a[(i * N + j)], a_ref[i][j]); + // flag = flag + 1; + // } + // } + // } + + // if (flag == 0) + // printf("Match \n"); + // else + // printf("No Matchs %d \n", flag); + + + + return 0; +} diff --git a/workloads/realworld/uvm_prefetch_async/lud_perf/run.sh b/workloads/realworld/uvm_prefetch_async/lud_perf/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..ea7db937489e328f5e923d2b18774e4256eef123 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/lud_perf/run.sh @@ -0,0 +1 @@ +./lud 1024 diff --git a/workloads/realworld/uvm_prefetch_async/lud_perf/run_super.sh b/workloads/realworld/uvm_prefetch_async/lud_perf/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2791fe07c43d75b40894206ba79ed441f207ee26 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/lud_perf/run_super.sh @@ -0,0 +1 @@ +./lud 4096 diff --git a/workloads/realworld/uvm_prefetch_async/nw/Makefile b/workloads/realworld/uvm_prefetch_async/nw/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..b33ae6462826357f9665bfd7fc9929ed176f9b35 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/nw/Makefile @@ -0,0 +1,15 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include + +SRC = needle.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = needle + +release: $(SRC) + $(CC) ${KERNEL_DIM} $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt diff --git a/workloads/realworld/uvm_prefetch_async/nw/Makefile_nvidia b/workloads/realworld/uvm_prefetch_async/nw/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..2fd0b98d07beea56ae69a96a0c8cb3af87d602f6 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/nw/Makefile_nvidia @@ -0,0 +1,50 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := needle +# CUDA source files (compiled with cudacc) +CUFILES := needle.cu +# CUDA dependency files +CU_DEPS := needle_kernel.cu +# C/C++ source files (compiled with gcc / c++) +# CCFILES := BlackScholes_gold.cpp + + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/uvm_prefetch_async/nw/README b/workloads/realworld/uvm_prefetch_async/nw/README new file mode 100755 index 0000000000000000000000000000000000000000..683cbd53db81f0ece4f926fa01316582ea0d5fc9 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/nw/README @@ -0,0 +1,12 @@ +Note: This program generate two sequences randomly. Please specify your own sequences for different uses. + At the current stage, the program only supports two sequences with the same lengh, which can be divided by 16. +Usage: needle 32 10 + 32 //the length of both sequences + 10 //penalty value + +******Adjustable work group size***** +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" diff --git a/workloads/realworld/uvm_prefetch_async/nw/needle.cu b/workloads/realworld/uvm_prefetch_async/nw/needle.cu new file mode 100755 index 0000000000000000000000000000000000000000..6ca18de92670def6bc2bac66dacbe2f3b8d99db5 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/nw/needle.cu @@ -0,0 +1,293 @@ +#define LIMIT -999 +#include +#include +#include +#include +#include "needle.h" +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +// includes, kernels +#include "needle_kernel.cu" + +#ifdef TIMING +#include "timing.h" + +struct timeval tv; +struct timeval tv_total_start, tv_total_end; +struct timeval tv_h2d_start, tv_h2d_end; +struct timeval tv_d2h_start, tv_d2h_end; +struct timeval tv_kernel_start, tv_kernel_end; +struct timeval tv_mem_alloc_start, tv_mem_alloc_end; +struct timeval tv_close_start, tv_close_end; +float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, + d2h_time = 0, close_time = 0, total_time = 0; +#endif + +//////////////////////////////////////////////////////////////////////////////// +// declaration, forward +void runTest( int argc, char** argv); + + +int blosum62[24][24] = { +{ 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0, -4}, +{-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1, -4}, +{-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1, -4}, +{-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1, -4}, +{ 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4}, +{-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1, -4}, +{-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4}, +{ 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -4}, +{-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1, -4}, +{-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1, -4}, +{-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1, -4}, +{-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1, -4}, +{-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1, -4}, +{-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1, -4}, +{-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2, -4}, +{ 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0, -4}, +{ 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0, -4}, +{-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2, -4}, +{-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1, -4}, +{ 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1, -4}, +{-2, -1, 3, 4, -3, 0, 1, -1, 0, -3, -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1, -4}, +{-1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4}, +{ 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1, -4}, +{-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1} +}; + +double gettime() { + struct timeval t; + gettimeofday(&t,NULL); + return t.tv_sec+t.tv_usec*1e-6; +} + +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int +main( int argc, char** argv) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %llu start_tsp %llu\n", start_tsc, start_tsp); + + printf("WG size of kernel = %d \n", BLOCK_SIZE); + + runTest( argc, argv); + + return EXIT_SUCCESS; +} + +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - x and y dimensions\n"); + fprintf(stderr, "\t - penalty(positive integer)\n"); + exit(1); +} + +void runTest( int argc, char** argv) +{ + int max_rows, max_cols, penalty, nblocks; + int *input_itemsets, *output_itemsets, *referrence; + int *matrix_cuda, *referrence_cuda; + int size; + + + // the lengths of the two sequences should be able to divided by 16. + // And at current stage max_rows needs to equal max_cols + if (argc == 4) + { + max_rows = atoi(argv[1]); + max_cols = atoi(argv[1]); + penalty = atoi(argv[2]); + nblocks = atoi(argv[3]); + } + else{ + usage(argc, argv); + } + + if(atoi(argv[1])%16!=0){ + fprintf(stderr,"The dimension values must be a multiple of 16\n"); + exit(1); + } + + + max_rows = max_rows + 1; + max_cols = max_cols + 1; + referrence = (int *)malloc( max_rows * max_cols * sizeof(int) ); + input_itemsets = (int *)malloc( max_rows * max_cols * sizeof(int) ); + output_itemsets = (int *)malloc( max_rows * max_cols * sizeof(int) ); + + + if (!input_itemsets) + fprintf(stderr, "error: can not allocate memory"); + + srand ( 7 ); + + + for (int i = 0 ; i < max_cols; i++){ + for (int j = 0 ; j < max_rows; j++){ + input_itemsets[i*max_cols+j] = 0; + } + } + + printf("Start Needleman-Wunsch\n"); + + for( int i=1; i< max_rows ; i++){ //please define your own sequence. + input_itemsets[i*max_cols] = rand() % 10 + 1; + } + for( int j=1; j< max_cols ; j++){ //please define your own sequence. + input_itemsets[j] = rand() % 10 + 1; + } + + + for (int i = 1 ; i < max_cols; i++){ + for (int j = 1 ; j < max_rows; j++){ + referrence[i*max_cols+j] = blosum62[input_itemsets[i*max_cols]][input_itemsets[j]]; + } + } + + for( int i = 1; i< max_rows ; i++) + input_itemsets[i*max_cols] = -i * penalty; + for( int j = 1; j< max_cols ; j++) + input_itemsets[j] = -j * penalty; + + + size = max_cols * max_rows; + + GPU_argv_init(); + initTrace(); + startCPU(); + + cudaMallocManaged((void**)& referrence_cuda, sizeof(int)*size); + cudaMallocManaged((void **)&matrix_cuda, sizeof(int) * size); + + memcpy(referrence_cuda, referrence, sizeof(int) * size); + memcpy(matrix_cuda, input_itemsets, sizeof(int) * size); + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + + cudaMemPrefetchAsync(referrence_cuda, sizeof(int) * size, GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(matrix_cuda, sizeof(int) * size, GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + + dim3 dimGrid; + dim3 dimBlock(BLOCK_SIZE, 1); + // int block_width = ( max_cols - 1 )/BLOCK_SIZE; + int block_width = nblocks - 1; + int block_size = (max_cols - 1) / (nblocks * BLOCK_SIZE); + +#ifdef TIMING + gettimeofday(&tv_kernel_start, NULL); +#endif + + //printf("Processing top-left matrix\n"); + //process top-left matrix + for( int i = 1 ; i <= block_width ; i++) { + dimGrid.x = i; + dimGrid.y = 1; + needle_cuda_shared_1<<>>(referrence_cuda, matrix_cuda + ,max_cols, penalty, i, block_width, block_size); + } + //printf("Processing bottom-right matrix\n"); + //process bottom-right matrix + for( int i = block_width - 1 ; i >= 1 ; i--){ + dimGrid.x = i; + dimGrid.y = 1; + needle_cuda_shared_2<<>>(referrence_cuda, matrix_cuda + ,max_cols, penalty, i, block_width, block_size); + } + cudaDeviceSynchronize(); + memcpy(output_itemsets, matrix_cuda, sizeof(int) * size); + + cudaFree(referrence_cuda); + cudaFree(matrix_cuda); + +#ifdef TIMING + gettimeofday(&tv_kernel_end, NULL); + tvsub(&tv_kernel_end, &tv_kernel_start, &tv); + kernel_time += tv.tv_sec * 1000.0 + (float) tv.tv_usec / 1000.0; +#endif + + // cudaMemcpy(output_itemsets, matrix_cuda, sizeof(int) * size, cudaMemcpyDeviceToHost); + +//#define TRACEBACK +#ifdef TRACEBACK + + FILE *fpo = fopen("result.txt","w"); + fprintf(fpo, "print traceback value GPU:\n"); + + for (int i = max_rows - 2, j = max_rows - 2; i>=0, j>=0;){ + int nw, n, w, traceback; + if ( i == max_rows - 2 && j == max_rows - 2 ) + fprintf(fpo, "%d ", output_itemsets[ i * max_cols + j]); //print the first element + if ( i == 0 && j == 0 ) + break; + if ( i > 0 && j > 0 ){ + nw = output_itemsets[(i - 1) * max_cols + j - 1]; + w = output_itemsets[ i * max_cols + j - 1 ]; + n = output_itemsets[(i - 1) * max_cols + j]; + } + else if ( i == 0 ){ + nw = n = LIMIT; + w = output_itemsets[ i * max_cols + j - 1 ]; + } + else if ( j == 0 ){ + nw = w = LIMIT; + n = output_itemsets[(i - 1) * max_cols + j]; + } + else{ + } + + //traceback = maximum(nw, w, n); + int new_nw, new_w, new_n; + new_nw = nw + referrence[i * max_cols + j]; + new_w = w - penalty; + new_n = n - penalty; + + traceback = maximum(new_nw, new_w, new_n); + if(traceback == new_nw) + traceback = nw; + if(traceback == new_w) + traceback = w; + if(traceback == new_n) + traceback = n; + + fprintf(fpo, "%d ", traceback); + + if(traceback == nw ) + {i--; j--; continue;} + + else if(traceback == w ) + {j--; continue;} + + else if(traceback == n ) + {i--; continue;} + + else + ; + } + + fclose(fpo); + +#endif + endCPU(); + finiTrace(); + free(referrence); + free(input_itemsets); + free(output_itemsets); + +#ifdef TIMING + printf("Exec: %f\n", kernel_time); +#endif +} + diff --git a/workloads/realworld/uvm_prefetch_async/nw/needle.h b/workloads/realworld/uvm_prefetch_async/nw/needle.h new file mode 100755 index 0000000000000000000000000000000000000000..e73320d6496262665592117d242e9bc383298b5b --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/nw/needle.h @@ -0,0 +1,11 @@ +#ifdef RD_WG_SIZE_0_0 + #define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) + #define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) + #define BLOCK_SIZE RD_WG_SIZE +#else + #define BLOCK_SIZE 16 +#endif +//#define TRACE + diff --git a/workloads/realworld/uvm_prefetch_async/nw/needle_kernel.cu b/workloads/realworld/uvm_prefetch_async/nw/needle_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..5b909b72964d9f33031db6fe506ec0ca9d4f54b6 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/nw/needle_kernel.cu @@ -0,0 +1,239 @@ + +#include "needle.h" +#include + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define SDATA(index) CUT_BANK_CHECKER(sdata, index) + +__device__ __host__ int +maximum(int a, + int b, + int c) +{ + + int k; + if (a <= b) + k = b; + else + k = a; + + if (k <= c) + return (c); + else + return (k); +} + +__global__ void +needle_cuda_shared_1(int *referrence, + int *matrix_cuda, + int cols, + int penalty, + int i, + int block_width, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + int bx = blockIdx.x; + int tx = threadIdx.x; + + int b_index_x = bx; + int b_index_y = i - 1 - bx; + + __shared__ int temp[PREFETCH_COUNT * (BLOCK_SIZE + 1)][BLOCK_SIZE + 1]; + __shared__ int ref[PREFETCH_COUNT * BLOCK_SIZE][BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (b_index_y * gridDim.x + b_index_x) * tiles_this_block; + int fetch = base_tile; + int end_tile = fetch + tiles_this_block; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + int offset = fetch - base_tile; + int block_id = fetch / tiles_this_block; + int b_index_x = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int b_index_y = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + int index = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + (cols + 1); + int index_n = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + (1); + int index_w = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + (cols); + int index_nw = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x; + + if (tx == 0) + memcpy_async(temp[(fetch % PREFETCH_COUNT) * (BLOCK_SIZE + 1) + tx][0], matrix_cuda[index_nw], pipe); + + for (int ty = 0; ty < BLOCK_SIZE; ty++) + memcpy_async(ref[(fetch % PREFETCH_COUNT) * (BLOCK_SIZE) + ty][tx], referrence[index + cols * ty], pipe); + block.sync(); + + memcpy_async(temp[(fetch % PREFETCH_COUNT) * (BLOCK_SIZE + 1) + tx + 1][0], matrix_cuda[index_w + cols * tx], pipe); + block.sync(); + + memcpy_async(temp[(fetch % PREFETCH_COUNT) * (BLOCK_SIZE + 1) + 0][tx + 1], matrix_cuda[index_n], pipe); + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + int b_index_x = compute % tile_dim_x; + int b_index_y = compute / tile_dim_x; + + int index = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + (cols + 1); + + for (int m = 0; m < BLOCK_SIZE; m++) + { + if (tx <= m) + { + int t_index_x = tx + 1; + int t_index_y = (compute % PREFETCH_COUNT) * (BLOCK_SIZE + 1) + m - tx + 1; + + temp[t_index_y][t_index_x] = maximum(temp[t_index_y - 1][t_index_x - 1] + ref[(compute % PREFETCH_COUNT) * BLOCK_SIZE + m - tx][t_index_x - 1], + temp[t_index_y][t_index_x - 1] - penalty, + temp[t_index_y - 1][t_index_x] - penalty); + } + block.sync(); + } + + for (int m = BLOCK_SIZE - 2; m >= 0; m--) + { + if (tx <= m) + { + int t_index_x = tx + BLOCK_SIZE - m; + int t_index_y = (compute % PREFETCH_COUNT) * (BLOCK_SIZE + 1) + BLOCK_SIZE - tx; + temp[t_index_y][t_index_x] = maximum(temp[t_index_y - 1][t_index_x - 1] + ref[(compute % PREFETCH_COUNT) * BLOCK_SIZE + m - tx][t_index_x - 1], + temp[t_index_y][t_index_x - 1] - penalty, + temp[t_index_y - 1][t_index_x] - penalty); + } + block.sync(); + } + + for (int ty = 0; ty < BLOCK_SIZE; ty++) + matrix_cuda[index + ty * cols] = temp[(compute % PREFETCH_COUNT) * (BLOCK_SIZE + 1) + ty + 1][tx + 1]; + } +} + +__global__ void +needle_cuda_shared_2(int *referrence, + int *matrix_cuda, + int cols, + int penalty, + int i, + int block_width, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + int bx = blockIdx.x; + int tx = threadIdx.x; + + int b_index_x = bx + block_width - i; + int b_index_y = block_width - bx - 1; + + __shared__ int temp[PREFETCH_COUNT * (BLOCK_SIZE + 1)][BLOCK_SIZE + 1]; + __shared__ int ref[PREFETCH_COUNT * BLOCK_SIZE][BLOCK_SIZE]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (b_index_y * gridDim.x + b_index_x) * tiles_this_block; + int fetch = base_tile; + int end_tile = fetch + tiles_this_block; + + for (int compute = fetch; compute < end_tile; compute++) + { + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + int offset = fetch - base_tile; + int block_id = fetch / tiles_this_block; + int b_index_x = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int b_index_y = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + int index = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + (cols + 1); + int index_n = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + (1); + int index_w = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + (cols); + int index_nw = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x; + + for (int ty = 0; ty < BLOCK_SIZE; ty++) + ref[(fetch % PREFETCH_COUNT) * BLOCK_SIZE + ty][tx] = referrence[index + cols * ty]; + block.sync(); + + if (tx == 0) + temp[(fetch % PREFETCH_COUNT) * (BLOCK_SIZE + 1) + tx][0] = matrix_cuda[index_nw]; + temp[(fetch % PREFETCH_COUNT) * (BLOCK_SIZE + 1) + tx + 1][0] = matrix_cuda[index_w + cols * tx]; + block.sync(); + + temp[(fetch % PREFETCH_COUNT) * (BLOCK_SIZE + 1) + 0][tx + 1] = matrix_cuda[index_n]; + block.sync(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + int b_index_x = compute % tile_dim_x; + int b_index_y = compute / tile_dim_x; + + int index = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + (cols + 1); + + for (int m = 0; m < BLOCK_SIZE; m++) + { + if (tx <= m) + { + int t_index_x = tx + 1; + int t_index_y = (compute % PREFETCH_COUNT) * (BLOCK_SIZE + 1) + m - tx + 1; + temp[t_index_y][t_index_x] = maximum(temp[t_index_y - 1][t_index_x - 1] + ref[(compute % PREFETCH_COUNT) * BLOCK_SIZE + m - tx][t_index_x - 1], + temp[t_index_y][t_index_x - 1] - penalty, + temp[t_index_y - 1][t_index_x] - penalty); + } + block.sync(); + } + + for (int m = BLOCK_SIZE - 2; m >= 0; m--) + { + if (tx <= m) + { + int t_index_x = tx + BLOCK_SIZE - m; + int t_index_y = (compute % PREFETCH_COUNT) * (BLOCK_SIZE + 1) + BLOCK_SIZE - tx; + + temp[t_index_y][t_index_x] = maximum(temp[t_index_y - 1][t_index_x - 1] + ref[(compute % PREFETCH_COUNT) * BLOCK_SIZE + BLOCK_SIZE - tx - 1][t_index_x - 1], + temp[t_index_y][t_index_x - 1] - penalty, + temp[t_index_y - 1][t_index_x] - penalty); + } + block.sync(); + } + + for (int ty = 0; ty < BLOCK_SIZE; ty++) + matrix_cuda[index + ty * cols] = temp[(compute % PREFETCH_COUNT) * (BLOCK_SIZE + 1) + ty + 1][tx + 1]; + } +} diff --git a/workloads/realworld/uvm_prefetch_async/nw/run.sh b/workloads/realworld/uvm_prefetch_async/nw/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..e3d20f9f4402de57c2a49c4db5c04d917907d741 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/nw/run.sh @@ -0,0 +1 @@ +./needle 32768 10 256 diff --git a/workloads/realworld/uvm_prefetch_async/nw/run_super.sh b/workloads/realworld/uvm_prefetch_async/nw/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..23b570be1ac96cce67094a9469de1c6d24c03b08 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/nw/run_super.sh @@ -0,0 +1 @@ +./needle 32768 10 64 diff --git a/workloads/realworld/uvm_prefetch_async/pathfinder/Makefile b/workloads/realworld/uvm_prefetch_async/pathfinder/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d740e79027f3651c458e229179bbbd46fb4fcbec --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/pathfinder/Makefile @@ -0,0 +1,14 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc +INCLUDE := $(CUDA_DIR)/include + +SRC = pathfinder.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = pathfinder + +release: + $(CC) $(SRC) -o $(EXE) -I$(INCLUDE) -I$(CUPTI_INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcupti + +clean: + rm -f pathfinder diff --git a/workloads/realworld/uvm_prefetch_async/pathfinder/README b/workloads/realworld/uvm_prefetch_async/pathfinder/README new file mode 100644 index 0000000000000000000000000000000000000000..9af75abe201eb95c5c89a038c6d79f54b276f94e --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/pathfinder/README @@ -0,0 +1,6 @@ +To compile the program: + +nvcc -cuda dynproc.cu +nvcc -o dynproc dynproc.cu.cpp + +Usage: dynproc row_len col_len pyramid_height diff --git a/workloads/realworld/uvm_prefetch_async/pathfinder/pathfinder.cu b/workloads/realworld/uvm_prefetch_async/pathfinder/pathfinder.cu new file mode 100644 index 0000000000000000000000000000000000000000..743c2384e24e58bd8dd37de5627d3eb5ab0de7d6 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/pathfinder/pathfinder.cu @@ -0,0 +1,353 @@ +#include +#include +#include +#include +#include +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +#define GPU_DEVICE 6 + +#ifdef TIMING +#include "timing.h" + +struct timeval tv; +struct timeval tv_total_start, tv_total_end; +struct timeval tv_h2d_start, tv_h2d_end; +struct timeval tv_d2h_start, tv_d2h_end; +struct timeval tv_kernel_start, tv_kernel_end; +struct timeval tv_mem_alloc_start, tv_mem_alloc_end; +struct timeval tv_close_start, tv_close_end; +float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, + d2h_time = 0, close_time = 0, total_time = 0; +#endif + +#define BLOCK_SIZE 256 +#define STR_SIZE 256 +#define DEVICE 0 +#define HALO 1 // halo width along one direction when advancing to the next iteration + +// #define BENCH_PRINT + +void run(int argc, char **argv); + +int rows, cols; +int *data; +int **wall; +int *result; +#define M_SEED 9 +int pyramid_height; +int nblocks; + +void init(int argc, char **argv) +{ + if (argc == 5) + { + cols = atoi(argv[1]); + rows = atoi(argv[2]); + pyramid_height = atoi(argv[3]); + nblocks = atoi(argv[4]); + } + else + { + printf("Usage: dynproc row_len col_len pyramid_height\n"); + exit(0); + } + data = new int[rows * cols]; + wall = new int *[rows]; + for (int n = 0; n < rows; n++) + wall[n] = data + cols * n; + result = new int[cols]; + + int seed = M_SEED; + srand(seed); + + for (int i = 0; i < rows; i++) + { + for (int j = 0; j < cols; j++) + { + wall[i][j] = rand() % 10; + } + } +#ifdef BENCH_PRINT + for (int i = 0; i < rows; i++) + { + for (int j = 0; j < cols; j++) + { + printf("%d ", wall[i][j]); + } + printf("\n"); + } +#endif +} + +void fatal(char *s) +{ + fprintf(stderr, "error: %s\n", s); +} + +#define IN_RANGE(x, min, max) ((x) >= (min) && (x) <= (max)) +#define CLAMP_RANGE(x, min, max) x = (x < (min)) ? min : ((x > (max)) ? max : x) +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +__global__ void dynproc_kernel( + int iteration, + int *gpuWall, + int *gpuSrc, + int *gpuResults, + int cols, + int rows, + int startStep, + int border, + int small_block_cols, + int tile_size, + int batches) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + __shared__ int prev[BLOCK_SIZE * PREFETCH_COUNT]; + __shared__ int result[BLOCK_SIZE]; + + int bx = blockIdx.x; + int tx = threadIdx.x; + + int fetch = 0; + + for (int compute = fetch; compute < batches; compute++) + { + for (; fetch < batches && fetch < compute + PREFETCH_COUNT; fetch++) + { + // each block finally computes result for a small block + // after N iterations. + // it is the non-overlapping small blocks that cover + // all the input data + + // calculate the boundary for the block according to + // the boundary of its small block + int blkX = bx * tile_size + small_block_cols * fetch - border; + int blkXmax = blkX + BLOCK_SIZE - 1; + + // calculate the global thread coordination + int xidx = blkX + tx; + + // effective range within this block that falls within + // the valid range of the input data + // used to rule out computation outside the boundary. + int validXmin = (blkX < 0) ? -blkX : 0; + int validXmax = (blkXmax > cols - 1) ? BLOCK_SIZE - 1 - (blkXmax - cols + 1) : BLOCK_SIZE - 1; + + int W = tx - 1; + int E = tx + 1; + + W = (W < validXmin) ? validXmin : W; + E = (E > validXmax) ? validXmax : E; + + bool isValid = IN_RANGE(tx, validXmin, validXmax); + + if (IN_RANGE(xidx, 0, cols - 1)) + { + memcpy_async(prev[(fetch % PREFETCH_COUNT) * BLOCK_SIZE + tx], gpuSrc[xidx], pipe); + } + // block.sync(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012 + pipe.commit(); + } + if (fetch == batches) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + // calculate the boundary for the block according to + // the boundary of its small block + int blkX = bx * tile_size + small_block_cols * compute - border; + int blkXmax = blkX + BLOCK_SIZE - 1; + + // calculate the global thread coordination + int xidx = blkX + tx; + + // effective range within this block that falls within + // the valid range of the input data + // used to rule out computation outside the boundary. + int validXmin = (blkX < 0) ? -blkX : 0; + int validXmax = (blkXmax > cols - 1) ? BLOCK_SIZE - 1 - (blkXmax - cols + 1) : BLOCK_SIZE - 1; + + int W = tx - 1; + int E = tx + 1; + + W = (W < validXmin) ? validXmin : W; + E = (E > validXmax) ? validXmax : E; + + bool isValid = IN_RANGE(tx, validXmin, validXmax); + + bool computed; + for (int i = 0; i < iteration; i++) + { + computed = false; + if (IN_RANGE(tx, i + 1, BLOCK_SIZE - i - 2) && + isValid) + { + computed = true; + int left = prev[(compute % PREFETCH_COUNT) * BLOCK_SIZE + W]; + int up = prev[(compute % PREFETCH_COUNT) * BLOCK_SIZE + tx]; + int right = prev[(compute % PREFETCH_COUNT) * BLOCK_SIZE + E]; + int shortest = MIN(left, up); + shortest = MIN(shortest, right); + int index = cols * (startStep + i) + xidx; + result[tx] = shortest + gpuWall[index]; + } + block.sync(); + if (i == iteration - 1) + break; + if (computed) // Assign the computation range + prev[(compute % PREFETCH_COUNT) * BLOCK_SIZE + tx] = result[tx]; + block.sync(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012 + } + + // update the global memory + // after the last iteration, only threads coordinated within the + // small block perform the calculation and switch on ``computed'' + if (computed) + { + gpuResults[xidx] = result[tx]; + } + } +} + +/* + compute N time steps +*/ +int calc_path(int *gpuWall, int *gpuResult[2], int rows, int cols, + int pyramid_height, int blockCols, int borderCols, int tile_size, int batches) +{ + dim3 dimBlock(BLOCK_SIZE); + dim3 dimGrid(nblocks); + + int src = 1, dst = 0; + for (int t = 0; t < rows - 1; t += pyramid_height) + { + int temp = src; + src = dst; + dst = temp; + + cudaStream_t stream1; + cudaStream_t stream2; + cudaStream_t stream3; + cudaStreamCreate(&stream1); + cudaStreamCreate(&stream2); + cudaStreamCreate(&stream3); + + cudaMemPrefetchAsync(gpuWall, sizeof(int) * (rows * cols - cols), GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + cudaMemPrefetchAsync(gpuResult[src], sizeof(int) * cols, GPU_DEVICE, stream2); + cudaStreamSynchronize(stream2); + cudaMemPrefetchAsync(gpuResult[dst], sizeof(int) * cols, GPU_DEVICE, stream3); + cudaStreamSynchronize(stream3); + + int iteration = MIN(pyramid_height, rows - t - 1); + int small_block_cols = BLOCK_SIZE - iteration * HALO * 2; + dynproc_kernel<<>>( + iteration, gpuWall, gpuResult[src], gpuResult[dst], + cols, rows, t, borderCols, small_block_cols, tile_size, batches); + + // for the measurement fairness + cudaDeviceSynchronize(); + } + return dst; +} + +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + GPU_argv_init(); + + run(argc, argv); + + return EXIT_SUCCESS; +} + +void run(int argc, char **argv) +{ + init(argc, argv); + + /* --------------- pyramid parameters --------------- */ + int borderCols = (pyramid_height)*HALO; + int smallBlockCol = BLOCK_SIZE - (pyramid_height)*HALO * 2; + int blockCols = cols / smallBlockCol + ((cols % smallBlockCol == 0) ? 0 : 1); + + //ruihao + int cols_per_block = cols / nblocks; + if (cols_per_block < BLOCK_SIZE) cols_per_block = BLOCK_SIZE; + int batches = cols_per_block / smallBlockCol + ((cols_per_block % smallBlockCol == 0) ? 0 : 1); + + // printf("pyramidHeight: %d\ngridSize: [%d]\nborder:[%d]\nblockSize: %d\nblockGrid:[%d]\ntargetBlock:[%d]\n", + // pyramid_height, cols, borderCols, BLOCK_SIZE, blockCols, smallBlockCol); + printf("pyramidHeight: %d\ngridSize: [%d]\nborder:[%d]\nblockSize: %d\nblockGrid:[%d]\ntargetBlock:[%d]\n", + pyramid_height, cols, borderCols, BLOCK_SIZE, nblocks, smallBlockCol); + + int *gpuWall, *gpuResult[2]; + int size = rows * cols; + + initTrace(); + startCPU(); + + cudaMallocManaged((void **)&gpuResult[0], sizeof(int) * cols); + cudaMallocManaged((void **)&gpuResult[1], sizeof(int) * cols); + memcpy(gpuResult[0], data, sizeof(int) * cols); + cudaMallocManaged((void **)&gpuWall, sizeof(int) * (size - cols)); + memcpy(gpuWall, data + cols, sizeof(int) * (size - cols)); + +#ifdef TIMING + gettimeofday(&tv_kernel_start, NULL); +#endif + + // int final_ret = calc_path(gpuWall, gpuResult, rows, cols, + // pyramid_height, blockCols, borderCols); + int final_ret = calc_path(gpuWall, gpuResult, rows, cols, + pyramid_height, blockCols, borderCols, cols_per_block, batches); + +#ifdef TIMING + gettimeofday(&tv_kernel_end, NULL); + tvsub(&tv_kernel_end, &tv_kernel_start, &tv); + kernel_time += tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; +#endif + + memcpy(result, gpuResult[final_ret], sizeof(int) * cols); + +#ifdef BENCH_PRINT + for (int i = 0; i < cols; i++) + printf("%d ", data[i]); + printf("\n"); + for (int i = 0; i < cols; i++) + printf("%d ", result[i]); + printf("\n"); +#endif + + cudaFree(gpuWall); + cudaFree(gpuResult[0]); + cudaFree(gpuResult[1]); + + endCPU(); + finiTrace(); + + delete[] data; + delete[] wall; + delete[] result; + +#ifdef TIMING + printf("Exec: %f\n", kernel_time); +#endif +} diff --git a/workloads/realworld/uvm_prefetch_async/pathfinder/result.txt b/workloads/realworld/uvm_prefetch_async/pathfinder/result.txt new file mode 100644 index 0000000000000000000000000000000000000000..fa67c591d071682e1842a455f4477b397825e250 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/pathfinder/result.txt @@ -0,0 +1,11 @@ +pyramidHeight: 20 +gridSize: [100000] +border:[20] +blockSize: 256 +blockGrid:[463] +targetBlock:[216] +CUPTI dynproc_kernel iter 0 start: 1679530155077329959 end: 1679530155078946951 +CUPTI dynproc_kernel iter 20 start: 1679530155078953603 end: 1679530155081441509 +CUPTI dynproc_kernel iter 40 start: 1679530155081441880 end: 1679530155083936228 +CUPTI dynproc_kernel iter 60 start: 1679530155083936508 end: 1679530155086433891 +CUPTI dynproc_kernel iter 80 start: 1679530155086434172 end: 1679530155088929332 diff --git a/workloads/realworld/uvm_prefetch_async/pathfinder/run.sh b/workloads/realworld/uvm_prefetch_async/pathfinder/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..11a9e6199ea4b2ff7fd3e0ebf893dc96fa89ff45 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/pathfinder/run.sh @@ -0,0 +1,2 @@ +#./pathfinder 100000 100 20 1024 > result.txt +./pathfinder 10000000 100 20 1024 diff --git a/workloads/realworld/uvm_prefetch_async/pathfinder/run_super.sh b/workloads/realworld/uvm_prefetch_async/pathfinder/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..b35cc0b44511def3912323c1e0d58c2daa280722 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/pathfinder/run_super.sh @@ -0,0 +1 @@ +./pathfinder 10000000 100 20 1024 diff --git a/workloads/realworld/uvm_prefetch_async/srad/Makefile b/workloads/realworld/uvm_prefetch_async/srad/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..47da520a663446e36c04461d54cfbb3d12cfa328 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/srad/Makefile @@ -0,0 +1,15 @@ +include ../../../common/make.config + +CC := $(CUDA_DIR)/bin/nvcc + +INCLUDE := $(CUDA_DIR)/include -I$(CUPTI_INCLUDE) -std=c++11 -arch=sm_80 -O3 + +SRC = srad.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp + +EXE = srad + +release: $(SRC) + $(CC) $(KERNEL_DIM) $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) -L$(CUPTI_LIB_DIR) -lcuda -lcupti + +clean: $(SRC) + rm -f $(EXE) $(EXE).linkinfo result.txt diff --git a/workloads/realworld/uvm_prefetch_async/srad/Makefile_nvidia b/workloads/realworld/uvm_prefetch_async/srad/Makefile_nvidia new file mode 100755 index 0000000000000000000000000000000000000000..e1f345c41c0f838dcf159958f628276455ef4dd7 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/srad/Makefile_nvidia @@ -0,0 +1,22 @@ +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := srad +# CUDA source files (compiled with cudacc) +CUFILES := srad.cu +# CUDA dependency files +CU_DEPS := \ + srad_kernel.cu \ + +# C/C++ source files (compiled with gcc / c++) +CCFILES := \ + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/workloads/realworld/uvm_prefetch_async/srad/README b/workloads/realworld/uvm_prefetch_async/srad/README new file mode 100755 index 0000000000000000000000000000000000000000..91e803b576bdeebe232c00a5112dadd836ffc33f --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/srad/README @@ -0,0 +1,24 @@ +In srad.h, define either GPU or CPU computation +Currently, the GPU implementation can only support x-, y-dimensions that can be divided by 16. + +Usage: +srad 128 128 0 31 0 31 0.5 2 + +128 //number of rows in the domain +128 //number of cols in the domain +0 //y1 position of the speckle +31 //y2 position of the speckle +0 //x1 position of the speckle +31 //x2 position of the speckle +0.5 //Lambda value +2 //number of iterations + + +******Adjustable work group size***** +The kernel has square shape +RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe one dimesion +The total thread number for one block is RD_WG_SIZE_0*RD_WG_SIZE_0 + +USAGE: +make clean +make KERNEL_DIM="-DRD_WG_SIZE_0=16" \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/srad/run.sh b/workloads/realworld/uvm_prefetch_async/srad/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..982fd1345383490dd093950055b359dc475480cc --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/srad/run.sh @@ -0,0 +1,3 @@ +# ./srad 2048 2048 0 127 0 127 0.5 2 32 + +./srad 16384 16384 0 127 0 127 0.5 2 32 \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/srad/run_super.sh b/workloads/realworld/uvm_prefetch_async/srad/run_super.sh new file mode 100755 index 0000000000000000000000000000000000000000..2d0f1b8ebd049e33bbb74a15722fc7172167641f --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/srad/run_super.sh @@ -0,0 +1 @@ +./srad 32768 32768 0 127 0 127 0.5 2 8 \ No newline at end of file diff --git a/workloads/realworld/uvm_prefetch_async/srad/srad.cu b/workloads/realworld/uvm_prefetch_async/srad/srad.cu new file mode 100755 index 0000000000000000000000000000000000000000..f7755e15ea422b5424c22ea3a2b18b3d956bf84c --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/srad/srad.cu @@ -0,0 +1,304 @@ +// includes, system +#include +#include +#include +#include +#include "srad.h" +#include "../../../common/cupti_add.h" +#include "../../../common/cpu_timestamps.h" + +// includes, project +#include + +// includes, kernels +#include "srad_kernel.cu" + +void random_matrix(float *I, int rows, int cols); +void runTest( int argc, char** argv); +void usage(int argc, char **argv) +{ + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "\t - number of rows\n"); + fprintf(stderr, "\t - number of cols\n"); + fprintf(stderr, "\t - y1 value of the speckle\n"); + fprintf(stderr, "\t - y2 value of the speckle\n"); + fprintf(stderr, "\t - x1 value of the speckle\n"); + fprintf(stderr, "\t - x2 value of the speckle\n"); + fprintf(stderr, "\t - lambda (0,1)\n"); + fprintf(stderr, "\t - number of iterations\n"); + + exit(1); +} +//////////////////////////////////////////////////////////////////////////////// +// Program main +//////////////////////////////////////////////////////////////////////////////// +int main(int argc, char *argv[]) +{ + uint64_t start_tsc = rdtsc(); + uint64_t start_tsp = rdtsp(); + printf("start_tsc %lu start_tsp %lu\n", start_tsc, start_tsp); + printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE); + runTest( argc, argv); + + return EXIT_SUCCESS; +} + +void +runTest( int argc, char** argv) +{ + int rows, cols, size_I, size_R, niter = 10, iter, nblocks; + float *I, *J, lambda, q0sqr, sum, sum2, tmp, meanROI,varROI ; + +#ifdef CPU + float Jc, G2, L, num, den, qsqr; + int *iN,*iS,*jE,*jW, k; + float *dN,*dS,*dW,*dE; + float cN,cS,cW,cE,D; +#endif + +#ifdef GPU + + float *J_cuda; + float *C_cuda; + float *E_C, *W_C, *N_C, *S_C; + +#endif + + unsigned int r1, r2, c1, c2; + float *c; + + + + if (argc == 10) + { + rows = atoi(argv[1]); //number of rows in the domain + cols = atoi(argv[2]); //number of cols in the domain + if ((rows%16!=0) || (cols%16!=0)){ + fprintf(stderr, "rows and cols must be multiples of 16\n"); + exit(1); + } + r1 = atoi(argv[3]); //y1 position of the speckle + r2 = atoi(argv[4]); //y2 position of the speckle + c1 = atoi(argv[5]); //x1 position of the speckle + c2 = atoi(argv[6]); //x2 position of the speckle + lambda = atof(argv[7]); //Lambda value + niter = atoi(argv[8]); //number of iterations + nblocks = atoi(argv[9]); // number of blocks + } + else{ + usage(argc, argv); + } + + size_I = cols * rows; + size_R = (r2-r1+1)*(c2-c1+1); + + I = (float *)malloc( size_I * sizeof(float) ); + J = (float *)malloc( size_I * sizeof(float) ); + c = (float *)malloc(sizeof(float)* size_I) ; + + +#ifdef CPU + + iN = (int *)malloc(sizeof(unsigned int*) * rows) ; + iS = (int *)malloc(sizeof(unsigned int*) * rows) ; + jW = (int *)malloc(sizeof(unsigned int*) * cols) ; + jE = (int *)malloc(sizeof(unsigned int*) * cols) ; + + + dN = (float *)malloc(sizeof(float)* size_I) ; + dS = (float *)malloc(sizeof(float)* size_I) ; + dW = (float *)malloc(sizeof(float)* size_I) ; + dE = (float *)malloc(sizeof(float)* size_I) ; + + + for (int i=0; i< rows; i++) { + iN[i] = i-1; + iS[i] = i+1; + } + for (int j=0; j< cols; j++) { + jW[j] = j-1; + jE[j] = j+1; + } + iN[0] = 0; + iS[rows-1] = rows-1; + jW[0] = 0; + jE[cols-1] = cols-1; + +#endif + GPU_argv_init(); + initTrace(); + startCPU(); + +#ifdef GPU + + //Allocate device memory + cudaMallocManaged((void**)& J_cuda, sizeof(float)* size_I); + cudaMallocManaged((void **)&C_cuda, sizeof(float) * size_I); + cudaMallocManaged((void **)&E_C, sizeof(float) * size_I); + cudaMallocManaged((void **)&W_C, sizeof(float) * size_I); + cudaMallocManaged((void **)&S_C, sizeof(float) * size_I); + cudaMallocManaged((void **)&N_C, sizeof(float) * size_I); + +#endif + + printf("Randomizing the input matrix\n"); + //Generate a random matrix + random_matrix(I, rows, cols); + + for (int k = 0; k < size_I; k++ ) { + J[k] = (float)exp(I[k]) ; + } + printf("Start the SRAD main loop\n"); + for (iter=0; iter< niter; iter++){ + sum=0; sum2=0; + for (int i=r1; i<=r2; i++) { + for (int j=c1; j<=c2; j++) { + tmp = J[i * cols + j]; + sum += tmp ; + sum2 += tmp*tmp; + } + } + meanROI = sum / size_R; + varROI = (sum2 / size_R) - meanROI*meanROI; + q0sqr = varROI / (meanROI*meanROI); + +#ifdef CPU + + for (int i = 0 ; i < rows ; i++) { + for (int j = 0; j < cols; j++) { + + k = i * cols + j; + Jc = J[k]; + + // directional derivates + dN[k] = J[iN[i] * cols + j] - Jc; + dS[k] = J[iS[i] * cols + j] - Jc; + dW[k] = J[i * cols + jW[j]] - Jc; + dE[k] = J[i * cols + jE[j]] - Jc; + + G2 = (dN[k]*dN[k] + dS[k]*dS[k] + + dW[k]*dW[k] + dE[k]*dE[k]) / (Jc*Jc); + + L = (dN[k] + dS[k] + dW[k] + dE[k]) / Jc; + + num = (0.5*G2) - ((1.0/16.0)*(L*L)) ; + den = 1 + (.25*L); + qsqr = num/(den*den); + + // diffusion coefficent (equ 33) + den = (qsqr-q0sqr) / (q0sqr * (1+q0sqr)) ; + c[k] = 1.0 / (1.0+den) ; + + // saturate diffusion coefficent + if (c[k] < 0) {c[k] = 0;} + else if (c[k] > 1) {c[k] = 1;} + } + } + + for (int i = 0; i < rows; i++) { + for (int j = 0; j < cols; j++) { + + // current index + k = i * cols + j; + + // diffusion coefficent + cN = c[k]; + cS = c[iS[i] * cols + j]; + cW = c[k]; + cE = c[i * cols + jE[j]]; + + // divergence (equ 58) + D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k]; + + // image update (equ 61) + J[k] = J[k] + 0.25*lambda*D; + } + } + +#endif // CPU + + +#ifdef GPU + + //Currently the input size must be divided by 16 - the block size + + // ruihao + int block_x = cols/BLOCK_SIZE ; + int block_y = rows/BLOCK_SIZE ; + + if (nblocks > block_x) nblocks = block_x; + + dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE); + // dim3 dimGrid(block_x, block_y); + dim3 dimGrid(nblocks, nblocks); + // ruihao + + //Copy data from main memory to device memory + memcpy(J_cuda, J, sizeof(float) * size_I); + + cudaStream_t stream1; + cudaStreamCreate(&stream1); + + cudaMemPrefetchAsync(J_cuda, sizeof(float) * size_I, GPU_DEVICE, stream1); + cudaStreamSynchronize(stream1); + + //Run kernels + // srad_cuda_1<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr); + // srad_cuda_2<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, lambda, q0sqr); + srad_cuda_1<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr, cols / nblocks); + srad_cuda_2<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, lambda, q0sqr, cols / nblocks); + //Copy data from device memory to main memory + cudaDeviceSynchronize(); + memcpy(J, J_cuda, sizeof(float) * size_I); + +#endif +} + + cudaThreadSynchronize(); +#ifdef GPU + cudaFree(C_cuda); + cudaFree(J_cuda); + cudaFree(E_C); + cudaFree(W_C); + cudaFree(N_C); + cudaFree(S_C); +#endif + endCPU(); + finiTrace(); + +#ifdef OUTPUT + //Printing output + printf("Printing Output:\n"); + for( int i = 0 ; i < rows ; i++){ + for ( int j = 0 ; j < cols ; j++){ + printf("%.5f ", J[i * cols + j]); + } + printf("\n"); + } +#endif + + printf("Computation Done\n"); + + free(I); + free(J); +#ifdef CPU + free(iN); free(iS); free(jW); free(jE); + free(dN); free(dS); free(dW); free(dE); +#endif + free(c); + +} + + +void random_matrix(float *I, int rows, int cols){ + + srand(7); + + for( int i = 0 ; i < rows ; i++){ + for ( int j = 0 ; j < cols ; j++){ + I[i * cols + j] = rand()/(float)RAND_MAX ; + } + } + +} + diff --git a/workloads/realworld/uvm_prefetch_async/srad/srad.h b/workloads/realworld/uvm_prefetch_async/srad/srad.h new file mode 100755 index 0000000000000000000000000000000000000000..2b2adb6d956b697c5b0ace9bccb89162ef98be50 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/srad/srad.h @@ -0,0 +1,16 @@ +#define STR_SIZE 256 + +#ifdef RD_WG_SIZE_0_0 + #define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) + #define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) + #define BLOCK_SIZE RD_WG_SIZE +#else + #define BLOCK_SIZE 16 +#endif + +#define GPU +#define TIMER +//#define OUTPUT + diff --git a/workloads/realworld/uvm_prefetch_async/srad/srad_kernel.cu b/workloads/realworld/uvm_prefetch_async/srad/srad_kernel.cu new file mode 100755 index 0000000000000000000000000000000000000000..559e2ae28157fb4edd281e82993883bf1bf96dc4 --- /dev/null +++ b/workloads/realworld/uvm_prefetch_async/srad/srad_kernel.cu @@ -0,0 +1,375 @@ +#include "srad.h" +#include + +#include +#include + +using namespace nvcuda::experimental; + +#define PREFETCH_COUNT 2 + +__global__ void +srad_cuda_1( + float *E_C, + float *W_C, + float *N_C, + float *S_C, + float *J_cuda, + float *C_cuda, + int cols, + int rows, + float q0sqr, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + // shared memory allocation + __shared__ float temp[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + __shared__ float temp_result[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + + __shared__ float north[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + __shared__ float south[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + __shared__ float east[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + __shared__ float west[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int fetch = base_tile; + int end_tile = fetch + tiles_this_block; + + for (int compute = fetch; compute < end_tile; compute++) + { + // thread id + int tx = threadIdx.x; + int ty = threadIdx.y; + + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + // block id + int bx = fetch % tile_dim_x; + int by = fetch / tile_dim_x; + + // indices + int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; + int index_n = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + tx - cols; + int index_s = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * BLOCK_SIZE + tx; + int index_w = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty - 1; + int index_e = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + BLOCK_SIZE; + + if (index_n < 0) + index_n = 0; + if (index_s >= (cols * rows)) + index_s = cols * rows - 1; + if (index_w < 0) + index_w = 0; + if (index_e >= (cols * rows)) + index_e = cols * rows - 1; + + // load data to shared memory + memcpy_async(north[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], J_cuda[index_n], pipe); + memcpy_async(south[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], J_cuda[index_s], pipe); + if (by == 0) + { + memcpy_async(north[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], J_cuda[BLOCK_SIZE * bx + tx], pipe); + } + else if (by == tile_dim_x - 1) + { + memcpy_async(south[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], + J_cuda[cols * BLOCK_SIZE * (tile_dim_x - 1) + BLOCK_SIZE * bx + cols * (BLOCK_SIZE - 1) + tx], pipe); + } + block.sync(); + + memcpy_async(west[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], J_cuda[index_w], pipe); + memcpy_async(east[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], J_cuda[index_e], pipe); + + if (bx == 0) + { + memcpy_async(west[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], J_cuda[cols * BLOCK_SIZE * by + cols * ty], pipe); + } + else if (bx == tile_dim_x - 1) + { + memcpy_async(east[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], + J_cuda[cols * BLOCK_SIZE * by + BLOCK_SIZE * (tile_dim_x - 1) + cols * ty + BLOCK_SIZE - 1], pipe); + } + block.sync(); + memcpy_async(temp[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx], J_cuda[index], pipe); + pipe.commit(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + // block id + int bx = compute % tile_dim_x; + int by = compute / tile_dim_x; + + // indices + int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; + int index_n = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + tx - cols; + int index_s = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * BLOCK_SIZE + tx; + int index_w = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty - 1; + int index_e = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + BLOCK_SIZE; + + if (index_n < 0) + index_n = 0; + if (index_s >= (cols * rows)) + index_s = cols * rows - 1; + if (index_w < 0) + index_w = 0; + if (index_e >= (cols * rows)) + index_e = cols * rows - 1; + + float n, w, e, s, jc, g2, l, num, den, qsqr, c; + jc = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx]; + + if (ty == 0 && tx == 0) + { // nw + n = north[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + s = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty + 1) * BLOCK_SIZE + tx] - jc; + w = west[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + e = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (ty == 0 && tx == BLOCK_SIZE - 1) + { // ne + n = north[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + s = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx - 1] - jc; + e = east[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1 && tx == BLOCK_SIZE - 1) + { // se + n = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + w = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx - 1] - jc; + e = east[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1 && tx == 0) + { // sw + n = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + w = west[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + e = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx + 1] - jc; + } + + else if (ty == 0) + { // n + n = north[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + s = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (tx == BLOCK_SIZE - 1) + { // e + n = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx - 1] - jc; + e = east[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + } + else if (ty == BLOCK_SIZE - 1) + { // s + n = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty - 1) * BLOCK_SIZE + tx] - jc; + s = south[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + w = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx + 1] - jc; + } + else if (tx == 0) + { // w + n = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty + 1) * BLOCK_SIZE + tx] - jc; + w = west[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] - jc; + e = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx + 1] - jc; + } + else + { // the data elements which are not on the borders + n = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty - 1) * BLOCK_SIZE + tx] - jc; + s = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty + 1) * BLOCK_SIZE + tx] - jc; + w = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx - 1] - jc; + e = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx + 1] - jc; + } + + g2 = (n * n + s * s + w * w + e * e) / (jc * jc); + + l = (n + s + w + e) / jc; + + num = (0.5 * g2) - ((1.0 / 16.0) * (l * l)); + den = 1 + (.25 * l); + qsqr = num / (den * den); + + // diffusion coefficent (equ 33) + den = (qsqr - q0sqr) / (q0sqr * (1 + q0sqr)); + c = 1.0 / (1.0 + den); + + // saturate diffusion coefficent + if (c < 0) + { + temp_result[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = 0; + } + else if (c > 1) + { + temp_result[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = 1; + } + else + { + temp_result[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = c; + } + block.sync(); + + C_cuda[index] = temp_result[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx]; + E_C[index] = e; + W_C[index] = w; + S_C[index] = s; + N_C[index] = n; + } +} + +__global__ void +srad_cuda_2( + float *E_C, + float *W_C, + float *N_C, + float *S_C, + float *J_cuda, + float *C_cuda, + int cols, + int rows, + float lambda, + float q0sqr, + int block_size) +{ + cooperative_groups::thread_block block = cooperative_groups::this_thread_block(); + pipeline pipe; + // shared memory allocation + __shared__ float south_c[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + __shared__ float east_c[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + + __shared__ float c_cuda_temp[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + __shared__ float c_cuda_result[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + __shared__ float temp[BLOCK_SIZE * BLOCK_SIZE * PREFETCH_COUNT]; + + int tile_dim_x = cols / BLOCK_SIZE; + + int total_tiles = tile_dim_x * tile_dim_x; + int tiles_this_block = (block_size / BLOCK_SIZE) * (block_size / BLOCK_SIZE); + int tiles_this_block_x = (block_size / BLOCK_SIZE); + + int base_tile = (blockIdx.y * gridDim.x + blockIdx.x) * tiles_this_block; + int fetch = base_tile; + int end_tile = fetch + tiles_this_block; + + for (int compute = fetch; compute < end_tile; compute++) + { + // thread id + int tx = threadIdx.x; + int ty = threadIdx.y; + + for (; fetch < end_tile && fetch < compute + PREFETCH_COUNT; fetch++) + { + // block id + int offset = fetch - base_tile; + int block_id = fetch / tiles_this_block; + int bx = block_id % gridDim.x * tiles_this_block_x + offset % tiles_this_block_x; + int by = block_id / gridDim.x * tiles_this_block_x + offset / tiles_this_block_x; + + // indices + int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; + int index_s = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * BLOCK_SIZE + tx; + int index_e = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + BLOCK_SIZE; + + if (index_s >= (cols * rows)) + index_s = cols * rows - 1; + if (index_e >= (cols * rows)) + index_e = cols * rows - 1; + + // load data to shared memory + temp[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = J_cuda[index]; + block.sync(); + + south_c[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = C_cuda[index_s]; + if (by == tile_dim_x - 1) + { + south_c[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = C_cuda[cols * BLOCK_SIZE * (tile_dim_x - 1) + BLOCK_SIZE * bx + cols * (BLOCK_SIZE - 1) + tx]; + } + block.sync(); + + east_c[ty * BLOCK_SIZE + tx] = C_cuda[index_e]; + if (bx == tile_dim_x - 1) + { + east_c[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = C_cuda[cols * BLOCK_SIZE * by + BLOCK_SIZE * (tile_dim_x - 1) + cols * ty + BLOCK_SIZE - 1]; + } + block.sync(); + + c_cuda_temp[(fetch % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = C_cuda[index]; + block.sync(); + } + if (fetch == end_tile) + { + for (int i = 0; i < PREFETCH_COUNT - 1; ++i) + { + pipe.commit(); + } + ++fetch; + } + pipe.wait_prior(); + block.sync(); + + // block id + int bx = compute % tile_dim_x; + int by = compute / tile_dim_x; + + // indices + int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; + + float cc, cn, cs, ce, cw, d_sum; + cc = c_cuda_temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx]; + + if (ty == BLOCK_SIZE - 1 && tx == BLOCK_SIZE - 1) + { // se + cn = cc; + cs = south_c[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx]; + cw = cc; + ce = east_c[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx]; + } + else if (tx == BLOCK_SIZE - 1) + { // e + cn = cc; + cs = c_cuda_temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty + 1) * BLOCK_SIZE + tx]; + cw = cc; + ce = east_c[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx]; + } + else if (ty == BLOCK_SIZE - 1) + { // s + cn = cc; + cs = south_c[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx]; + cw = cc; + ce = c_cuda_temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx + 1]; + } + else + { // the data elements which are not on the borders + cn = cc; + cs = c_cuda_temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + (ty + 1) * BLOCK_SIZE + tx]; + cw = cc; + ce = c_cuda_temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx + 1]; + } + + // divergence (equ 58) + d_sum = cn * N_C[index] + cs * S_C[index] + cw * W_C[index] + ce * E_C[index]; + + // image update (equ 61) + c_cuda_result[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] = temp[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx] + 0.25 * lambda * d_sum; + + block.sync(); + + J_cuda[index] = c_cuda_result[(compute % PREFETCH_COUNT) * BLOCK_SIZE * BLOCK_SIZE + ty * BLOCK_SIZE + tx]; + } +} \ No newline at end of file diff --git a/workloads/super_profile_control.csv b/workloads/super_profile_control.csv new file mode 100644 index 0000000000000000000000000000000000000000..51ba41809d937ff2bfd4ab160543c116f8a2c424 --- /dev/null +++ b/workloads/super_profile_control.csv @@ -0,0 +1,4 @@ +group,standard,async,uvm,uvm_prefetch,uvm_prefetch_async +gemm,687463202815.9999,962341109760.0,687463202815.9999,687463202815.9999,962341109760.0 +lud,1667324715.0,1726481842.0000002,1667324715.0,1667324715.0,1726481842.0000002 +yolov3,28935313386.999996,37651191937.200005,28935406289.79999,28935465310.8,37651191937.200005 diff --git a/workloads/super_profile_control.pdf b/workloads/super_profile_control.pdf new file mode 100644 index 0000000000000000000000000000000000000000..98678235f3ac267e997b805c28ff276800e3e75d Binary files /dev/null and b/workloads/super_profile_control.pdf differ diff --git a/workloads/super_profile_fp.csv b/workloads/super_profile_fp.csv new file mode 100644 index 0000000000000000000000000000000000000000..0f0256beeebd8a6bd7eb9e9712fa501eba3b115c --- /dev/null +++ b/workloads/super_profile_fp.csv @@ -0,0 +1,4 @@ +group,standard,async,uvm,uvm_prefetch,uvm_prefetch_async +gemm,20485114953728.008,20967225032703.99,20485114953728.008,20485114953728.008,20967225032703.99 +lud,37800778104.99999,38633714334.0,37800778104.99999,37800778104.99999,38633714334.0 +yolov3,355093680427.0334,490202011190.59973,355093756324.43335,355093807029.2667,490202011190.59973 diff --git a/workloads/super_profile_fp.pdf b/workloads/super_profile_fp.pdf new file mode 100644 index 0000000000000000000000000000000000000000..ff2f9dfd1faab48252519497a96b16b470277785 Binary files /dev/null and b/workloads/super_profile_fp.pdf differ diff --git a/workloads/super_profile_int.csv b/workloads/super_profile_int.csv new file mode 100644 index 0000000000000000000000000000000000000000..5cfd7e1f3570a480e1122ac152dbf77a4d08ed0f --- /dev/null +++ b/workloads/super_profile_int.csv @@ -0,0 +1,4 @@ +group,standard,async,uvm,uvm_prefetch,uvm_prefetch_async +gemm,137707388928.00006,268435455.99999997,137707388928.00006,137707388928.00006,268435455.99999997 +lud,15794518.000000006,32768270.999999985,15794518.000000006,15794518.000000006,32768270.999999985 +yolov3,9570445802.000002,9582275049.999998,9570445802.000002,9570445802.000002,9582275049.999998 diff --git a/workloads/super_profile_int.pdf b/workloads/super_profile_int.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c216a7dbf6e8c87d0c75d100260efb1aa58e74cb Binary files /dev/null and b/workloads/super_profile_int.pdf differ diff --git a/workloads/super_profile_load_miss_rate.csv b/workloads/super_profile_load_miss_rate.csv new file mode 100644 index 0000000000000000000000000000000000000000..b4ecae7727e4381088bde0747f06e683dd316656 --- /dev/null +++ b/workloads/super_profile_load_miss_rate.csv @@ -0,0 +1,4 @@ +group,standard,async,uvm,uvm_prefetch,uvm_prefetch_async +gemm,0.9999859226079493,0.999995192442483,0.9999859554011646,0.999985966019514,0.9999952086782068 +lud,0.9574898199390721,0.6129163624699273,0.9574918204561332,0.9574904557809533,0.612846786487996 +yolov3,0.8267956932887867,0.8751196120010926,0.8268134431292528,0.8268051864470075,0.8751196120010926 diff --git a/workloads/super_profile_load_miss_rate.pdf b/workloads/super_profile_load_miss_rate.pdf new file mode 100644 index 0000000000000000000000000000000000000000..93d16b9460ab804d74986ce88b9247b217d467b6 Binary files /dev/null and b/workloads/super_profile_load_miss_rate.pdf differ diff --git a/workloads/super_profile_memory.csv b/workloads/super_profile_memory.csv new file mode 100644 index 0000000000000000000000000000000000000000..efe286bdd7fc5f8b3aeb196c51a4a01702c9cc80 --- /dev/null +++ b/workloads/super_profile_memory.csv @@ -0,0 +1,4 @@ +group,standard,async,uvm,uvm_prefetch,uvm_prefetch_async +gemm,9346385707007.996,9209215188991.994,9346385707007.996,9346385707007.996,9209215188991.994 +lud,91879781375.99995,91762389995.00002,91879781375.99995,91879781375.99995,91762389995.00002 +yolov3,160621857455.0,156312653522.00003,160621857455.0,160621857455.0,156312653522.00003 diff --git a/workloads/super_profile_memory.pdf b/workloads/super_profile_memory.pdf new file mode 100644 index 0000000000000000000000000000000000000000..6bd1f9b0e46117cf3890d6ded519eaf5a7eae25d Binary files /dev/null and b/workloads/super_profile_memory.pdf differ diff --git a/workloads/super_profile_store_miss_rate.csv b/workloads/super_profile_store_miss_rate.csv new file mode 100644 index 0000000000000000000000000000000000000000..a8280f1d520a3158f254bddde4ee37322271f85c --- /dev/null +++ b/workloads/super_profile_store_miss_rate.csv @@ -0,0 +1,4 @@ +group,standard,async,uvm,uvm_prefetch,uvm_prefetch_async +gemm,0.9999999999999999,0.9999999999999999,0.9999999999999999,0.9999999999999999,0.9999999999999999 +lud,0.30541689093206636,0.09156675835925585,0.30541537583167344,0.30541165942073206,0.0916704313219938 +yolov3,0.5813169829674152,0.5850946442445588,0.581286703025981,0.5812848967598933,0.5850946442445588 diff --git a/workloads/super_profile_store_miss_rate.pdf b/workloads/super_profile_store_miss_rate.pdf new file mode 100644 index 0000000000000000000000000000000000000000..2fc454cac0f37bb28488071c193b11b6908d68bc Binary files /dev/null and b/workloads/super_profile_store_miss_rate.pdf differ